From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Dec 2008 20:12:29 +0100
Subject: performance counters: core code

Implement the core kernel bits of Performance Counters subsystem.

The Linux Performance Counter subsystem provides an abstraction of
performance counter hardware capabilities. It provides per task and per
CPU counters, and it provides event capabilities on top of those.

Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.

The special file descriptor is opened via the perf_counter_open()
system call:

 int
 perf_counter_open(u32 hw_event_type,
                   u32 hw_event_period,
                   u32 record_type,
                   pid_t pid,
                   int cpu);

The syscall returns the new fd. The fd can be used via the normal
VFS system calls: read() can be used to read the counter, fcntl()
can be used to set the blocking mode, etc.

Multiple counters can be kept open at a time, and the counters
can be poll()ed.

See more details in Documentation/perf-counters.txt.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ce0d9da..52146c2 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -25,6 +25,7 @@
 #include <linux/kbd_kern.h>
 #include <linux/proc_fs.h>
 #include <linux/quotaops.h>
+#include <linux/perf_counter.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
@@ -244,6 +245,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
 		show_regs(regs);
+	perf_counter_print_debug();
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 0000000..22c4469
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,171 @@
+/*
+ *  Performance counters:
+ *
+ *   Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
+ *   Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <asm/atomic.h>
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+
+struct task_struct;
+
+/*
+ * Generalized hardware event types, used by the hw_event_type parameter
+ * of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	PERF_COUNT_CYCLES,
+	PERF_COUNT_INSTRUCTIONS,
+	PERF_COUNT_CACHE_REFERENCES,
+	PERF_COUNT_CACHE_MISSES,
+	PERF_COUNT_BRANCH_INSTRUCTIONS,
+	PERF_COUNT_BRANCH_MISSES,
+	/*
+	 * If this bit is set in the type, then trigger NMI sampling:
+	 */
+	PERF_COUNT_NMI			= (1 << 30),
+};
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_record_type {
+	PERF_RECORD_SIMPLE,
+	PERF_RECORD_IRQ,
+	PERF_RECORD_GROUP,
+};
+
+/**
+ * struct hw_perf_counter - performance counter hardware details
+ */
+struct hw_perf_counter {
+	u64			config;
+	unsigned long		config_base;
+	unsigned long		counter_base;
+	int			nmi;
+	unsigned int		idx;
+	u64			prev_count;
+	s32			next_count;
+	u64			irq_period;
+};
+
+/*
+ * Hardcoded buffer length limit for now, for IRQ-fed events:
+ */
+#define PERF_DATA_BUFLEN	2048
+
+/**
+ * struct perf_data - performance counter IRQ data sampling ...
+ */
+struct perf_data {
+	int			len;
+	int			rd_idx;
+	int			overrun;
+	u8			data[PERF_DATA_BUFLEN];
+};
+
+/**
+ * struct perf_counter - performance counter kernel representation:
+ */
+struct perf_counter {
+	struct list_head		list;
+	int				active;
+#if BITS_PER_LONG == 64
+	atomic64_t			count;
+#else
+	atomic_t			count32[2];
+#endif
+	u64				__irq_period;
+
+	struct hw_perf_counter		hw;
+
+	struct perf_counter_context	*ctx;
+	struct task_struct		*task;
+
+	/*
+	 * Protect attach/detach:
+	 */
+	struct mutex			mutex;
+
+	int				oncpu;
+	int				cpu;
+
+	s32				hw_event_type;
+	enum perf_record_type		record_type;
+
+	/* read() / irq related data */
+	wait_queue_head_t		waitq;
+	/* optional: for NMIs */
+	int				wakeup_pending;
+	struct perf_data		*irqdata;
+	struct perf_data		*usrdata;
+	struct perf_data		data[2];
+};
+
+/**
+ * struct perf_counter_context - counter context structure
+ *
+ * Used as a container for task counters and CPU counters as well:
+ */
+struct perf_counter_context {
+#ifdef CONFIG_PERF_COUNTERS
+	/*
+	 * Protect the list of counters:
+	 */
+	spinlock_t		lock;
+	struct list_head	counters;
+	int			nr_counters;
+	int			nr_active;
+	struct task_struct	*task;
+#endif
+};
+
+/**
+ * struct perf_counter_cpu_context - per cpu counter context structure
+ */
+struct perf_cpu_context {
+	struct perf_counter_context	ctx;
+	struct perf_counter_context	*task_ctx;
+	int				active_oncpu;
+	int				max_pertask;
+};
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_counters;
+
+#ifdef CONFIG_PERF_COUNTERS
+extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
+extern void perf_counter_task_tick(struct task_struct *task, int cpu);
+extern void perf_counter_init_task(struct task_struct *task);
+extern void perf_counter_notify(struct pt_regs *regs);
+extern void perf_counter_print_debug(void);
+#else
+static inline void
+perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
+static inline void perf_counter_init_task(struct task_struct *task)	{ }
+static inline void perf_counter_notify(struct pt_regs *regs)		{ }
+static inline void perf_counter_print_debug(void)			{ }
+#endif
+
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55e30d1..4c53027 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,7 @@ struct sched_param {
 #include <linux/fs_struct.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
+#include <linux/perf_counter.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
@@ -1326,6 +1327,7 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
+	struct perf_counter_context perf_counter_ctx;
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
@@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
+/*
+ * Call the function if the target task is executing on a CPU right now:
+ */
+extern void task_oncpu_function_call(struct task_struct *p,
+				     void (*func) (void *info), void *info);
+
+
 #ifdef CONFIG_MM_OWNER
 extern void mm_update_next_owner(struct mm_struct *mm);
 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 04fb47b..6cce728 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
+asmlinkage int
+sys_perf_counter_open(u32 hw_event_type,
+		      u32 hw_event_period,
+		      u32 record_type,
+		      pid_t pid,
+		      int cpu);
 #endif
diff --git a/init/Kconfig b/init/Kconfig
index f763762..78bede2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -732,6 +732,35 @@ config AIO
           by some high performance threaded applications. Disabling
           this option saves about 7k.
 
+config HAVE_PERF_COUNTERS
+	bool
+
+menu "Performance Counters"
+
+config PERF_COUNTERS
+	bool "Kernel Performance Counters"
+	depends on HAVE_PERF_COUNTERS
+	default y
+	help
+	  Enable kernel support for performance counter hardware.
+
+	  Performance counters are special hardware registers available
+	  on most modern CPUs. These registers count the number of certain
+	  types of hw events: such as instructions executed, cachemisses
+	  suffered, or branches mis-predicted - without slowing down the
+	  kernel or applications. These registers can also trigger interrupts
+	  when a threshold number of events have passed - and can thus be
+	  used to profile the code that runs on that CPU.
+
+	  The Linux Performance Counter subsystem provides an abstraction of
+	  these hardware capabilities, available via a system call. It
+	  provides per task and per CPU counters, and it provides event
+	  capabilities on top of those.
+
+	  Say Y if unsure.
+
+endmenu
+
 config VM_EVENT_COUNTERS
 	default y
 	bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad00..1f184a1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -89,6 +89,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/fork.c b/kernel/fork.c
index 2a372a0..441fadf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -975,6 +975,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	rt_mutex_init_task(p);
+	perf_counter_init_task(p);
 
 #ifdef CONFIG_PROVE_LOCKING
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
new file mode 100644
index 0000000..20508f0
--- /dev/null
+++ b/kernel/perf_counter.c
@@ -0,0 +1,943 @@
+/*
+ * Performance counter core code
+ *
+ *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/fs.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/perf_counter.h>
+
+/*
+ * Each CPU has a list of per CPU counters:
+ */
+DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+
+int perf_max_counters __read_mostly;
+static int perf_reserved_percpu __read_mostly;
+static int perf_overcommit __read_mostly = 1;
+
+/*
+ * Mutex for (sysadmin-configurable) counter reservations:
+ */
+static DEFINE_MUTEX(perf_resource_mutex);
+
+/*
+ * Architecture provided APIs - weak aliases:
+ */
+
+int __weak hw_perf_counter_init(struct perf_counter *counter, u32 hw_event_type)
+{
+	return -EINVAL;
+}
+
+void __weak hw_perf_counter_enable(struct perf_counter *counter)	 { }
+void __weak hw_perf_counter_disable(struct perf_counter *counter)	 { }
+void __weak hw_perf_counter_read(struct perf_counter *counter)		 { }
+void __weak hw_perf_disable_all(void) { }
+void __weak hw_perf_enable_all(void) { }
+void __weak hw_perf_counter_setup(void) { }
+
+#if BITS_PER_LONG == 64
+
+/*
+ * Read the cached counter in counter safe against cross CPU / NMI
+ * modifications. 64 bit version - no complications.
+ */
+static inline u64 perf_read_counter_safe(struct perf_counter *counter)
+{
+	return (u64) atomic64_read(&counter->count);
+}
+
+#else
+
+/*
+ * Read the cached counter in counter safe against cross CPU / NMI
+ * modifications. 32 bit version.
+ */
+static u64 perf_read_counter_safe(struct perf_counter *counter)
+{
+	u32 cntl, cnth;
+
+	local_irq_disable();
+	do {
+		cnth = atomic_read(&counter->count32[1]);
+		cntl = atomic_read(&counter->count32[0]);
+	} while (cnth != atomic_read(&counter->count32[1]));
+
+	local_irq_enable();
+
+	return cntl | ((u64) cnth) << 32;
+}
+
+#endif
+
+/*
+ * Cross CPU call to remove a performance counter
+ *
+ * We disable the counter on the hardware level first. After that we
+ * remove it from the context list.
+ */
+static void __perf_remove_from_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+
+	if (counter->active) {
+		hw_perf_counter_disable(counter);
+		counter->active = 0;
+		ctx->nr_active--;
+		cpuctx->active_oncpu--;
+		counter->task = NULL;
+	}
+	ctx->nr_counters--;
+
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * counters on a global level. NOP for non NMI based counters.
+	 */
+	hw_perf_disable_all();
+	list_del_init(&counter->list);
+	hw_perf_enable_all();
+
+	if (!ctx->task) {
+		/*
+		 * Allow more per task counters with respect to the
+		 * reservation:
+		 */
+		cpuctx->max_pertask =
+			min(perf_max_counters - ctx->nr_counters,
+			    perf_max_counters - perf_reserved_percpu);
+	}
+
+	spin_unlock(&ctx->lock);
+}
+
+
+/*
+ * Remove the counter from a task's (or a CPU's) list of counters.
+ *
+ * Must be called with counter->mutex held.
+ *
+ * CPU counters are removed with a smp call. For task counters we only
+ * call when the task is on a CPU.
+ */
+static void perf_remove_from_context(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu counters are removed via an smp call and
+		 * the removal is always sucessful.
+		 */
+		smp_call_function_single(counter->cpu,
+					 __perf_remove_from_context,
+					 counter, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_remove_from_context,
+				 counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the context is active we need to retry the smp call.
+	 */
+	if (ctx->nr_active && !list_empty(&counter->list)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can remove the counter safely, if it the call above did not
+	 * succeed.
+	 */
+	if (!list_empty(&counter->list)) {
+		ctx->nr_counters--;
+		list_del_init(&counter->list);
+		counter->task = NULL;
+	}
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Cross CPU call to install and enable a preformance counter
+ */
+static void __perf_install_in_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
+	int cpu = smp_processor_id();
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * counters on a global level. NOP for non NMI based counters.
+	 */
+	hw_perf_disable_all();
+	list_add_tail(&counter->list, &ctx->counters);
+	hw_perf_enable_all();
+
+	ctx->nr_counters++;
+
+	if (cpuctx->active_oncpu < perf_max_counters) {
+		hw_perf_counter_enable(counter);
+		counter->active = 1;
+		counter->oncpu = cpu;
+		ctx->nr_active++;
+		cpuctx->active_oncpu++;
+	}
+
+	if (!ctx->task && cpuctx->max_pertask)
+		cpuctx->max_pertask--;
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Attach a performance counter to a context
+ *
+ * First we add the counter to the list with the hardware enable bit
+ * in counter->hw_config cleared.
+ *
+ * If the counter is attached to a task which is on a CPU we use a smp
+ * call to enable it in the task context. The task might have been
+ * scheduled away, but we check this in the smp call again.
+ */
+static void
+perf_install_in_context(struct perf_counter_context *ctx,
+			struct perf_counter *counter,
+			int cpu)
+{
+	struct task_struct *task = ctx->task;
+
+	counter->ctx = ctx;
+	if (!task) {
+		/*
+		 * Per cpu counters are installed via an smp call and
+		 * the install is always sucessful.
+		 */
+		smp_call_function_single(cpu, __perf_install_in_context,
+					 counter, 1);
+		return;
+	}
+
+	counter->task = task;
+retry:
+	task_oncpu_function_call(task, __perf_install_in_context,
+				 counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the context is active and the counter has not been added
+	 * we need to retry the smp call.
+	 */
+	if (ctx->nr_active && list_empty(&counter->list)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can add the counter safely, if it the call above did not
+	 * succeed.
+	 */
+	if (list_empty(&counter->list)) {
+		list_add_tail(&counter->list, &ctx->counters);
+		ctx->nr_counters++;
+	}
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to remove the counters of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each counter and update the counter value in counter->count.
+ *
+ * This does not protect us against NMI, but hw_perf_counter_disable()
+ * sets the disabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * not restart the counter.
+ */
+void perf_counter_task_sched_out(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct perf_counter *counter;
+
+	if (likely(!cpuctx->task_ctx))
+		return;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counters, list) {
+		if (!ctx->nr_active)
+			break;
+		if (counter->active) {
+			hw_perf_counter_disable(counter);
+			counter->active = 0;
+			counter->oncpu = -1;
+			ctx->nr_active--;
+			cpuctx->active_oncpu--;
+		}
+	}
+	spin_unlock(&ctx->lock);
+	cpuctx->task_ctx = NULL;
+}
+
+/*
+ * Called from scheduler to add the counters of the current task
+ * with interrupts disabled.
+ *
+ * We restore the counter value and then enable it.
+ *
+ * This does not protect us against NMI, but hw_perf_counter_enable()
+ * sets the enabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * keep the counter running.
+ */
+void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct perf_counter *counter;
+
+	if (likely(!ctx->nr_counters))
+		return;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counters, list) {
+		if (ctx->nr_active == cpuctx->max_pertask)
+			break;
+		if (counter->cpu != -1 && counter->cpu != cpu)
+			continue;
+
+		hw_perf_counter_enable(counter);
+		counter->active = 1;
+		counter->oncpu = cpu;
+		ctx->nr_active++;
+		cpuctx->active_oncpu++;
+	}
+	spin_unlock(&ctx->lock);
+	cpuctx->task_ctx = ctx;
+}
+
+void perf_counter_task_tick(struct task_struct *curr, int cpu)
+{
+	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter *counter;
+
+	if (likely(!ctx->nr_counters))
+		return;
+
+	perf_counter_task_sched_out(curr, cpu);
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * Rotate the first entry last:
+	 */
+	hw_perf_disable_all();
+	list_for_each_entry(counter, &ctx->counters, list) {
+		list_del(&counter->list);
+		list_add_tail(&counter->list, &ctx->counters);
+		break;
+	}
+	hw_perf_enable_all();
+
+	spin_unlock(&ctx->lock);
+
+	perf_counter_task_sched_in(curr, cpu);
+}
+
+/*
+ * Initialize the perf_counter context in task_struct
+ */
+void perf_counter_init_task(struct task_struct *task)
+{
+	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+
+	spin_lock_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->counters);
+	ctx->nr_counters = 0;
+	ctx->task = task;
+}
+
+/*
+ * Cross CPU call to read the hardware counter
+ */
+static void __hw_perf_counter_read(void *info)
+{
+	hw_perf_counter_read(info);
+}
+
+static u64 perf_read_counter(struct perf_counter *counter)
+{
+	/*
+	 * If counter is enabled and currently active on a CPU, update the
+	 * value in the counter structure:
+	 */
+	if (counter->active) {
+		smp_call_function_single(counter->oncpu,
+					 __hw_perf_counter_read, counter, 1);
+	}
+
+	return perf_read_counter_safe(counter);
+}
+
+/*
+ * Cross CPU call to switch performance data pointers
+ */
+static void __perf_switch_irq_data(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_data *oldirqdata = counter->irqdata;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task) {
+		if (cpuctx->task_ctx != ctx)
+			return;
+		spin_lock(&ctx->lock);
+	}
+
+	/* Change the pointer NMI safe */
+	atomic_long_set((atomic_long_t *)&counter->irqdata,
+			(unsigned long) counter->usrdata);
+	counter->usrdata = oldirqdata;
+
+	if (ctx->task)
+		spin_unlock(&ctx->lock);
+}
+
+static struct perf_data *perf_switch_irq_data(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_data *oldirqdata = counter->irqdata;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		smp_call_function_single(counter->cpu,
+					 __perf_switch_irq_data,
+					 counter, 1);
+		return counter->usrdata;
+	}
+
+retry:
+	spin_lock_irq(&ctx->lock);
+	if (!counter->active) {
+		counter->irqdata = counter->usrdata;
+		counter->usrdata = oldirqdata;
+		spin_unlock_irq(&ctx->lock);
+		return oldirqdata;
+	}
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_switch_irq_data, counter);
+	/* Might have failed, because task was scheduled out */
+	if (counter->irqdata == oldirqdata)
+		goto retry;
+
+	return counter->usrdata;
+}
+
+static void put_context(struct perf_counter_context *ctx)
+{
+	if (ctx->task)
+		put_task_struct(ctx->task);
+}
+
+static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+	struct task_struct *task;
+
+	/*
+	 * If cpu is not a wildcard then this is a percpu counter:
+	 */
+	if (cpu != -1) {
+		/* Must be root to operate on a CPU counter: */
+		if (!capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EACCES);
+
+		if (cpu < 0 || cpu > num_possible_cpus())
+			return ERR_PTR(-EINVAL);
+
+		/*
+		 * We could be clever and allow to attach a counter to an
+		 * offline CPU and activate it when the CPU comes up, but
+		 * that's for later.
+		 */
+		if (!cpu_isset(cpu, cpu_online_map))
+			return ERR_PTR(-ENODEV);
+
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+
+		WARN_ON_ONCE(ctx->task);
+		return ctx;
+	}
+
+	rcu_read_lock();
+	if (!pid)
+		task = current;
+	else
+		task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+
+	if (!task)
+		return ERR_PTR(-ESRCH);
+
+	ctx = &task->perf_counter_ctx;
+	ctx->task = task;
+
+	/* Reuse ptrace permission checks for now. */
+	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+		put_context(ctx);
+		return ERR_PTR(-EACCES);
+	}
+
+	return ctx;
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct perf_counter *counter = file->private_data;
+	struct perf_counter_context *ctx = counter->ctx;
+
+	file->private_data = NULL;
+
+	mutex_lock(&counter->mutex);
+
+	perf_remove_from_context(counter);
+	put_context(ctx);
+
+	mutex_unlock(&counter->mutex);
+
+	kfree(counter);
+
+	return 0;
+}
+
+/*
+ * Read the performance counter - simple non blocking version for now
+ */
+static ssize_t
+perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
+{
+	u64 cntval;
+
+	if (count != sizeof(cntval))
+		return -EINVAL;
+
+	mutex_lock(&counter->mutex);
+	cntval = perf_read_counter(counter);
+	mutex_unlock(&counter->mutex);
+
+	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
+}
+
+static ssize_t
+perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count)
+{
+	if (!usrdata->len)
+		return 0;
+
+	count = min(count, (size_t)usrdata->len);
+	if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
+		return -EFAULT;
+
+	/* Adjust the counters */
+	usrdata->len -= count;
+	if (!usrdata->len)
+		usrdata->rd_idx = 0;
+	else
+		usrdata->rd_idx += count;
+
+	return count;
+}
+
+static ssize_t
+perf_read_irq_data(struct perf_counter	*counter,
+		   char __user		*buf,
+		   size_t		count,
+		   int			nonblocking)
+{
+	struct perf_data *irqdata, *usrdata;
+	DECLARE_WAITQUEUE(wait, current);
+	ssize_t res;
+
+	irqdata = counter->irqdata;
+	usrdata = counter->usrdata;
+
+	if (usrdata->len + irqdata->len >= count)
+		goto read_pending;
+
+	if (nonblocking)
+		return -EAGAIN;
+
+	spin_lock_irq(&counter->waitq.lock);
+	__add_wait_queue(&counter->waitq, &wait);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (usrdata->len + irqdata->len >= count)
+			break;
+
+		if (signal_pending(current))
+			break;
+
+		spin_unlock_irq(&counter->waitq.lock);
+		schedule();
+		spin_lock_irq(&counter->waitq.lock);
+	}
+	__remove_wait_queue(&counter->waitq, &wait);
+	__set_current_state(TASK_RUNNING);
+	spin_unlock_irq(&counter->waitq.lock);
+
+	if (usrdata->len + irqdata->len < count)
+		return -ERESTARTSYS;
+read_pending:
+	mutex_lock(&counter->mutex);
+
+	/* Drain pending data first: */
+	res = perf_copy_usrdata(usrdata, buf, count);
+	if (res < 0 || res == count)
+		goto out;
+
+	/* Switch irq buffer: */
+	usrdata = perf_switch_irq_data(counter);
+	if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) {
+		if (!res)
+			res = -EFAULT;
+	} else {
+		res = count;
+	}
+out:
+	mutex_unlock(&counter->mutex);
+
+	return res;
+}
+
+static ssize_t
+perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct perf_counter *counter = file->private_data;
+
+	switch (counter->record_type) {
+	case PERF_RECORD_SIMPLE:
+		return perf_read_hw(counter, buf, count);
+
+	case PERF_RECORD_IRQ:
+	case PERF_RECORD_GROUP:
+		return perf_read_irq_data(counter, buf, count,
+					  file->f_flags & O_NONBLOCK);
+	}
+	return -EINVAL;
+}
+
+static unsigned int perf_poll(struct file *file, poll_table *wait)
+{
+	struct perf_counter *counter = file->private_data;
+	unsigned int events = 0;
+	unsigned long flags;
+
+	poll_wait(file, &counter->waitq, wait);
+
+	spin_lock_irqsave(&counter->waitq.lock, flags);
+	if (counter->usrdata->len || counter->irqdata->len)
+		events |= POLLIN;
+	spin_unlock_irqrestore(&counter->waitq.lock, flags);
+
+	return events;
+}
+
+static const struct file_operations perf_fops = {
+	.release		= perf_release,
+	.read			= perf_read,
+	.poll			= perf_poll,
+};
+
+/*
+ * Allocate and initialize a counter structure
+ */
+static struct perf_counter *
+perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
+{
+	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+
+	if (!counter)
+		return NULL;
+
+	mutex_init(&counter->mutex);
+	INIT_LIST_HEAD(&counter->list);
+	init_waitqueue_head(&counter->waitq);
+
+	counter->irqdata	= &counter->data[0];
+	counter->usrdata	= &counter->data[1];
+	counter->cpu		= cpu;
+	counter->record_type	= record_type;
+	counter->__irq_period	= hw_event_period;
+	counter->wakeup_pending = 0;
+
+	return counter;
+}
+
+/**
+ * sys_perf_task_open - open a performance counter associate it to a task
+ * @hw_event_type:	event type for monitoring/sampling...
+ * @pid:		target pid
+ */
+asmlinkage int
+sys_perf_counter_open(u32 hw_event_type,
+		      u32 hw_event_period,
+		      u32 record_type,
+		      pid_t pid,
+		      int cpu)
+{
+	struct perf_counter_context *ctx;
+	struct perf_counter *counter;
+	int ret;
+
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	ret = -ENOMEM;
+	counter = perf_counter_alloc(hw_event_period, cpu, record_type);
+	if (!counter)
+		goto err_put_context;
+
+	ret = hw_perf_counter_init(counter, hw_event_type);
+	if (ret)
+		goto err_free_put_context;
+
+	perf_install_in_context(ctx, counter, cpu);
+
+	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
+	if (ret < 0)
+		goto err_remove_free_put_context;
+
+	return ret;
+
+err_remove_free_put_context:
+	mutex_lock(&counter->mutex);
+	perf_remove_from_context(counter);
+	mutex_unlock(&counter->mutex);
+
+err_free_put_context:
+	kfree(counter);
+
+err_put_context:
+	put_context(ctx);
+
+	return ret;
+}
+
+static void __cpuinit perf_init_cpu(int cpu)
+{
+	struct perf_cpu_context *ctx;
+
+	ctx = &per_cpu(perf_cpu_context, cpu);
+	spin_lock_init(&ctx->ctx.lock);
+	INIT_LIST_HEAD(&ctx->ctx.counters);
+
+	mutex_lock(&perf_resource_mutex);
+	ctx->max_pertask = perf_max_counters - perf_reserved_percpu;
+	mutex_unlock(&perf_resource_mutex);
+	hw_perf_counter_setup();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __perf_exit_cpu(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+	struct perf_counter *counter, *tmp;
+
+	list_for_each_entry_safe(counter, tmp, &ctx->counters, list)
+		__perf_remove_from_context(counter);
+
+}
+static void perf_exit_cpu(int cpu)
+{
+	smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1);
+}
+#else
+static inline void perf_exit_cpu(int cpu) { }
+#endif
+
+static int __cpuinit
+perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action) {
+
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		perf_init_cpu(cpu);
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		perf_exit_cpu(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata perf_cpu_nb = {
+	.notifier_call		= perf_cpu_notify,
+};
+
+static int __init perf_counter_init(void)
+{
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	register_cpu_notifier(&perf_cpu_nb);
+
+	return 0;
+}
+early_initcall(perf_counter_init);
+
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_reserved_percpu);
+}
+
+static ssize_t
+perf_set_reserve_percpu(struct sysdev_class *class,
+			const char *buf,
+			size_t count)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned long val;
+	int err, cpu, mpt;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > perf_max_counters)
+		return -EINVAL;
+
+	mutex_lock(&perf_resource_mutex);
+	perf_reserved_percpu = val;
+	for_each_online_cpu(cpu) {
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		spin_lock_irq(&cpuctx->ctx.lock);
+		mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
+			  perf_max_counters - perf_reserved_percpu);
+		cpuctx->max_pertask = mpt;
+		spin_unlock_irq(&cpuctx->ctx.lock);
+	}
+	mutex_unlock(&perf_resource_mutex);
+
+	return count;
+}
+
+static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_overcommit);
+}
+
+static ssize_t
+perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+{
+	unsigned long val;
+	int err;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > 1)
+		return -EINVAL;
+
+	mutex_lock(&perf_resource_mutex);
+	perf_overcommit = val;
+	mutex_unlock(&perf_resource_mutex);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(
+				reserve_percpu,
+				0644,
+				perf_show_reserve_percpu,
+				perf_set_reserve_percpu
+			);
+
+static SYSDEV_CLASS_ATTR(
+				overcommit,
+				0644,
+				perf_show_overcommit,
+				perf_set_overcommit
+			);
+
+static struct attribute *perfclass_attrs[] = {
+	&attr_reserve_percpu.attr,
+	&attr_overcommit.attr,
+	NULL
+};
+
+static struct attribute_group perfclass_attr_group = {
+	.attrs			= perfclass_attrs,
+	.name			= "perf_counters",
+};
+
+static int __init perf_counter_sysfs_init(void)
+{
+	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
+				  &perfclass_attr_group);
+}
+device_initcall(perf_counter_sysfs_init);
+
diff --git a/kernel/sched.c b/kernel/sched.c
index b7480fb..254d56d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2212,6 +2212,27 @@ static int sched_balance_self(int cpu, int flag)
 
 #endif /* CONFIG_SMP */
 
+/**
+ * task_oncpu_function_call - call a function on the cpu on which a task runs
+ * @p:		the task to evaluate
+ * @func:	the function to be called
+ * @info:	the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ */
+void task_oncpu_function_call(struct task_struct *p,
+			      void (*func) (void *info), void *info)
+{
+	int cpu;
+
+	preempt_disable();
+	cpu = task_cpu(p);
+	if (task_curr(p))
+		smp_call_function_single(cpu, func, info, 1);
+	preempt_enable();
+}
+
 /***
  * try_to_wake_up - wake up a thread
  * @p: the to-be-woken-up thread
@@ -2534,6 +2555,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
 		    struct task_struct *next)
 {
 	fire_sched_out_preempt_notifiers(prev, next);
+	perf_counter_task_sched_out(prev, cpu_of(rq));
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -2574,6 +2596,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
+	perf_counter_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
 #ifdef CONFIG_SMP
 	if (current->sched_class->post_schedule)
@@ -4296,6 +4319,7 @@ void scheduler_tick(void)
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
 #endif
+	perf_counter_task_tick(curr, cpu);
 }
 
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e14a232..4be8bbc 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -174,3 +174,6 @@ cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+
+/* performance counters: */
+cond_syscall(sys_perf_counter_open);
-- 
cgit v0.10.2


From e7bc62b6b3aeaa8849f8383e0cfb7ca6c003adc6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Dec 2008 20:13:45 +0100
Subject: performance counters: documentation

Add more documentation about performance counters.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt
new file mode 100644
index 0000000..19033a0
--- /dev/null
+++ b/Documentation/perf-counters.txt
@@ -0,0 +1,104 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, and
+it provides event capabilities on top of those.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the perf_counter_open()
+system call:
+
+ int
+ perf_counter_open(u32 hw_event_type,
+                   u32 hw_event_period,
+                   u32 record_type,
+                   pid_t pid,
+                   int cpu);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'hw_event_type' is one of:
+
+ enum hw_event_types {
+	PERF_COUNT_CYCLES,
+	PERF_COUNT_INSTRUCTIONS,
+	PERF_COUNT_CACHE_REFERENCES,
+	PERF_COUNT_CACHE_MISSES,
+	PERF_COUNT_BRANCH_INSTRUCTIONS,
+	PERF_COUNT_BRANCH_MISSES,
+ };
+
+These are standardized types of events that work uniformly on all CPUs
+that implements Performance Counters support under Linux. If a CPU is
+not able to count branch-misses, then the system call will return
+-EINVAL.
+
+[ Note: more hw_event_types are supported as well, but they are CPU
+  specific and are enumerated via /sys on a per CPU basis. Raw hw event
+  types can be passed in as negative numbers. For example, to count
+  "External bus cycles while bus lock signal asserted" events on Intel
+  Core CPUs, pass in a -0x4064 event type value. ]
+
+The parameter 'hw_event_period' is the number of events before waking up
+a read() that is blocked on a counter fd. Zero value means a non-blocking
+counter.
+
+'record_type' is the type of data that a read() will provide for the
+counter, and it can be one of:
+
+  enum perf_record_type {
+	PERF_RECORD_SIMPLE,
+	PERF_RECORD_IRQ,
+  };
+
+a "simple" counter is one that counts hardware events and allows
+them to be read out into a u64 count value. (read() returns 8 on
+a successful read of a simple counter.)
+
+An "irq" counter is one that will also provide an IRQ context information:
+the IP of the interrupted context. In this case read() will return
+the 8-byte counter value, plus the Instruction Pointer address of the
+interrupted context.
+
+The 'pid' parameter allows the counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a full
+CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
-- 
cgit v0.10.2


From 241771ef016b5c0c83cd7a4372a74321c973c1e6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Dec 2008 10:39:53 +0100
Subject: performance counters: x86 support

Implement performance counters for x86 Intel CPUs.

It's simplified right now: the PERFMON CPU feature is assumed,
which is available in Core2 and later Intel CPUs.

The design is flexible to be extended to more CPU types as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4d4cb7..f2fdc18 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -643,6 +643,7 @@ config X86_UP_IOAPIC
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
+	select HAVE_PERF_COUNTERS
 
 config X86_IO_APIC
 	def_bool y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b..3c14ed0 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -823,7 +823,8 @@ ia32_sys_call_table:
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
 	.quad sys_epoll_create1
-	.quad sys_dup3			/* 330 */
+	.quad sys_dup3				/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
+	.quad sys_perf_counter_open
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index 5ca135e..b3e475d 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -9,6 +9,7 @@ typedef struct {
 	unsigned long idle_timestamp;
 	unsigned int __nmi_count;	/* arch dependent */
 	unsigned int apic_timer_irqs;	/* arch dependent */
+	unsigned int apic_perf_irqs;	/* arch dependent */
 	unsigned int irq0_irqs;
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 8de644b..aa93e53 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -30,6 +30,8 @@
 /* Interrupt handlers registered during init_IRQ */
 extern void apic_timer_interrupt(void);
 extern void error_interrupt(void);
+extern void perf_counter_interrupt(void);
+
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
index fa0fd06..71598a9 100644
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ b/arch/x86/include/asm/intel_arch_perfmon.h
@@ -1,22 +1,24 @@
 #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
 #define _ASM_X86_INTEL_ARCH_PERFMON_H
 
-#define MSR_ARCH_PERFMON_PERFCTR0		0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1		0xc2
+#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
 
-#define MSR_ARCH_PERFMON_EVENTSEL0		0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1		0x187
+#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
 
-#define ARCH_PERFMON_EVENTSEL0_ENABLE	(1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT	(1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS	(1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR	(1 << 16)
+#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
 
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL	(0x3c)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK	(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
-	(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
 
 union cpuid10_eax {
 	struct {
@@ -28,4 +30,12 @@ union cpuid10_eax {
 	unsigned int full;
 };
 
+#ifdef CONFIG_PERF_COUNTERS
+extern void init_hw_perf_counters(void);
+extern void perf_counters_lapic_init(int nmi);
+#else
+static inline void init_hw_perf_counters(void)		{ }
+static inline void perf_counters_lapic_init(int nmi)	{ }
+#endif
+
 #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb..b8d277f 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -87,6 +87,11 @@
 #define LOCAL_TIMER_VECTOR	0xef
 
 /*
+ * Performance monitoring interrupt vector:
+ */
+#define LOCAL_PERF_VECTOR	0xee
+
+/*
  * First APIC vector available to drivers: (vectors 0x30-0xee) we
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8..ad31e5d 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -25,10 +25,15 @@ BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
  * a much simpler SMP time architecture:
  */
 #ifdef CONFIG_X86_LOCAL_APIC
+
 BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
+#ifdef CONFIG_PERF_COUNTERS
+BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
+#endif
+
 #ifdef CONFIG_X86_MCE_P4THERMAL
 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 #endif
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 2fbfff8..90a8d9d 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -30,6 +30,7 @@ struct x8664_pda {
 	short isidle;
 	struct mm_struct *active_mm;
 	unsigned apic_timer_irqs;
+	unsigned apic_perf_irqs;
 	unsigned irq0_irqs;
 	unsigned irq_resched_count;
 	unsigned irq_call_count;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379..810bf26 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,6 +80,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
+#define TIF_PERF_COUNTERS	11	/* notify perf counter work */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -103,6 +104,7 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
+#define _TIF_PERF_COUNTERS	(1 << TIF_PERF_COUNTERS)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -135,7 +137,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78..7e47658 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,7 @@
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
+#define __NR_perf_counter_open	333
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e..53025fe 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,7 +653,8 @@ __SYSCALL(__NR_dup3, sys_dup3)
 __SYSCALL(__NR_pipe2, sys_pipe2)
 #define __NR_inotify_init1			294
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
-
+#define __NR_perf_counter_open		295
+__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f9487..8ab8c18 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -31,6 +31,7 @@
 #include <linux/dmi.h>
 #include <linux/dmar.h>
 
+#include <asm/intel_arch_perfmon.h>
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/mtrr.h>
@@ -1147,6 +1148,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
+	perf_counters_lapic_init(0);
 
 	preempt_disable();
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec607..89e5336 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
 #
-# Makefile for x86-compatible CPU details and quirks
+# Makefile for x86-compatible CPU details, features and quirks
 #
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
@@ -16,11 +16,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR_64)	+= centaur_64.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_X86_MCE)	+= mcheck/
-obj-$(CONFIG_MTRR)	+= mtrr/
-obj-$(CONFIG_CPU_FREQ)	+= cpufreq/
+obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
 
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+obj-$(CONFIG_X86_MCE)			+= mcheck/
+obj-$(CONFIG_MTRR)			+= mtrr/
+obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0..4461011 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -17,6 +17,7 @@
 #include <asm/mmu_context.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#include <asm/intel_arch_perfmon.h>
 #include <asm/pat.h>
 #include <asm/asm.h>
 #include <asm/numa.h>
@@ -750,6 +751,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
+	init_hw_perf_counters();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 0000000..82440cb
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,571 @@
+/*
+ * Performance counter x86 architecture code
+ *
+ *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_counter.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+
+#include <asm/intel_arch_perfmon.h>
+#include <asm/apic.h>
+
+static bool perf_counters_initialized __read_mostly;
+
+/*
+ * Number of (generic) HW counters:
+ */
+static int nr_hw_counters __read_mostly;
+static u32 perf_counter_mask __read_mostly;
+
+/* No support for fixed function counters yet */
+
+#define MAX_HW_COUNTERS		8
+
+struct cpu_hw_counters {
+	struct perf_counter	*counters[MAX_HW_COUNTERS];
+	unsigned long		used[BITS_TO_LONGS(MAX_HW_COUNTERS)];
+	int			enable_all;
+};
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+
+const int intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_CYCLES]			= 0x003c,
+  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
+  [PERF_COUNT_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
+};
+
+const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
+
+/*
+ * Setup the hardware configuration for a given hw_event_type
+ */
+int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+
+	if (unlikely(!perf_counters_initialized))
+		return -EINVAL;
+
+	/*
+	 * Count user events, and generate PMC IRQs:
+	 * (keep 'enabled' bit clear for now)
+	 */
+	hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT;
+
+	/*
+	 * If privileged enough, count OS events too, and allow
+	 * NMI events as well:
+	 */
+	hwc->nmi = 0;
+	if (capable(CAP_SYS_ADMIN)) {
+		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+		if (hw_event_type & PERF_COUNT_NMI)
+			hwc->nmi = 1;
+	}
+
+	hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
+	hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
+
+	hwc->irq_period = counter->__irq_period;
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic counter period:
+	 */
+	if (!hwc->irq_period)
+		hwc->irq_period = 0x7FFFFFFF;
+
+	hwc->next_count = -((s32) hwc->irq_period);
+
+	/*
+	 * Negative event types mean raw encoded event+umask values:
+	 */
+	if (hw_event_type < 0) {
+		counter->hw_event_type = -hw_event_type;
+		counter->hw_event_type &= ~PERF_COUNT_NMI;
+	} else {
+		hw_event_type &= ~PERF_COUNT_NMI;
+		if (hw_event_type >= max_intel_perfmon_events)
+			return -EINVAL;
+		/*
+		 * The generic map:
+		 */
+		counter->hw_event_type = intel_perfmon_event_map[hw_event_type];
+	}
+	hwc->config |= counter->hw_event_type;
+	counter->wakeup_pending = 0;
+
+	return 0;
+}
+
+static void __hw_perf_enable_all(void)
+{
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
+}
+
+void hw_perf_enable_all(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	cpuc->enable_all = 1;
+	__hw_perf_enable_all();
+}
+
+void hw_perf_disable_all(void)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	cpuc->enable_all = 0;
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+}
+
+static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]);
+
+static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+{
+	per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count;
+
+	wrmsr(hwc->counter_base + idx, hwc->next_count, 0);
+	wrmsr(hwc->config_base + idx, hwc->config, 0);
+}
+
+void hw_perf_counter_enable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx = hwc->idx;
+
+	/* Try to get the previous counter again */
+	if (test_and_set_bit(idx, cpuc->used)) {
+		idx = find_first_zero_bit(cpuc->used, nr_hw_counters);
+		set_bit(idx, cpuc->used);
+		hwc->idx = idx;
+	}
+
+	perf_counters_lapic_init(hwc->nmi);
+
+	wrmsr(hwc->config_base + idx,
+	      hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+
+	cpuc->counters[idx] = counter;
+	counter->hw.config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	__hw_perf_counter_enable(hwc, idx);
+}
+
+#ifdef CONFIG_X86_64
+static inline void atomic64_counter_set(struct perf_counter *counter, u64 val)
+{
+	atomic64_set(&counter->count, val);
+}
+
+static inline u64 atomic64_counter_read(struct perf_counter *counter)
+{
+	return atomic64_read(&counter->count);
+}
+#else
+/*
+ * Todo: add proper atomic64_t support to 32-bit x86:
+ */
+static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64)
+{
+	u32 *val32 = (void *)&val64;
+
+	atomic_set(counter->count32 + 0, *(val32 + 0));
+	atomic_set(counter->count32 + 1, *(val32 + 1));
+}
+
+static inline u64 atomic64_counter_read(struct perf_counter *counter)
+{
+	return atomic_read(counter->count32 + 0) |
+		(u64) atomic_read(counter->count32 + 1) << 32;
+}
+#endif
+
+static void __hw_perf_save_counter(struct perf_counter *counter,
+				   struct hw_perf_counter *hwc, int idx)
+{
+	s64 raw = -1;
+	s64 delta;
+	int err;
+
+	/*
+	 * Get the raw hw counter value:
+	 */
+	err = rdmsrl_safe(hwc->counter_base + idx, &raw);
+	WARN_ON_ONCE(err);
+
+	/*
+	 * Rebase it to zero (it started counting at -irq_period),
+	 * to see the delta since ->prev_count:
+	 */
+	delta = (s64)hwc->irq_period + (s64)(s32)raw;
+
+	atomic64_counter_set(counter, hwc->prev_count + delta);
+
+	/*
+	 * Adjust the ->prev_count offset - if we went beyond
+	 * irq_period of units, then we got an IRQ and the counter
+	 * was set back to -irq_period:
+	 */
+	while (delta >= (s64)hwc->irq_period) {
+		hwc->prev_count += hwc->irq_period;
+		delta -= (s64)hwc->irq_period;
+	}
+
+	/*
+	 * Calculate the next raw counter value we'll write into
+	 * the counter at the next sched-in time:
+	 */
+	delta -= (s64)hwc->irq_period;
+
+	hwc->next_count = (s32)delta;
+}
+
+void perf_counter_print_debug(void)
+{
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count;
+	int cpu, err, idx;
+
+	local_irq_disable();
+
+	cpu = smp_processor_id();
+
+	err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_CTRL, &ctrl);
+	WARN_ON_ONCE(err);
+
+	err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_STATUS, &status);
+	WARN_ON_ONCE(err);
+
+	err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_OVF_CTRL, &overflow);
+	WARN_ON_ONCE(err);
+
+	printk(KERN_INFO "\n");
+	printk(KERN_INFO "CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
+	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
+
+	for (idx = 0; idx < nr_hw_counters; idx++) {
+		err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl);
+		WARN_ON_ONCE(err);
+
+		err = rdmsrl_safe(MSR_ARCH_PERFMON_PERFCTR0 + idx, &pmc_count);
+		WARN_ON_ONCE(err);
+
+		next_count = per_cpu(prev_next_count[idx], cpu);
+
+		printk(KERN_INFO "CPU#%d: PMC%d ctrl:  %016llx\n",
+			cpu, idx, pmc_ctrl);
+		printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+		printk(KERN_INFO "CPU#%d: PMC%d next:  %016llx\n",
+			cpu, idx, next_count);
+	}
+	local_irq_enable();
+}
+
+void hw_perf_counter_disable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+	unsigned int idx = hwc->idx;
+
+	counter->hw.config &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(hwc->config_base + idx, hwc->config, 0);
+
+	clear_bit(idx, cpuc->used);
+	cpuc->counters[idx] = NULL;
+	__hw_perf_save_counter(counter, hwc, idx);
+}
+
+void hw_perf_counter_read(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	unsigned long addr = hwc->counter_base + hwc->idx;
+	s64 offs, val = -1LL;
+	s32 val32;
+	int err;
+
+	/* Careful: NMI might modify the counter offset */
+	do {
+		offs = hwc->prev_count;
+		err = rdmsrl_safe(addr, &val);
+		WARN_ON_ONCE(err);
+	} while (offs != hwc->prev_count);
+
+	val32 = (s32) val;
+	val =  (s64)hwc->irq_period + (s64)val32;
+	atomic64_counter_set(counter, hwc->prev_count + val);
+}
+
+static void perf_store_irq_data(struct perf_counter *counter, u64 data)
+{
+	struct perf_data *irqdata = counter->irqdata;
+
+	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+		irqdata->overrun++;
+	} else {
+		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+		*p = data;
+		irqdata->len += sizeof(u64);
+	}
+}
+
+static void perf_save_and_restart(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	int idx = hwc->idx;
+
+	wrmsr(hwc->config_base + idx,
+	      hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+
+	if (hwc->config & ARCH_PERFMON_EVENTSEL0_ENABLE) {
+		__hw_perf_save_counter(counter, hwc, idx);
+		__hw_perf_counter_enable(hwc, idx);
+	}
+}
+
+static void
+perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
+{
+	struct perf_counter_context *ctx = leader->ctx;
+	struct perf_counter *counter;
+	int bit;
+
+	list_for_each_entry(counter, &ctx->counters, list) {
+		if (counter->record_type != PERF_RECORD_SIMPLE ||
+		    counter == leader)
+			continue;
+
+		if (counter->active) {
+			/*
+			 * When counter was not in the overflow mask, we have to
+			 * read it from hardware. We read it as well, when it
+			 * has not been read yet and clear the bit in the
+			 * status mask.
+			 */
+			bit = counter->hw.idx;
+			if (!test_bit(bit, (unsigned long *) overflown) ||
+			    test_bit(bit, (unsigned long *) status)) {
+				clear_bit(bit, (unsigned long *) status);
+				perf_save_and_restart(counter);
+			}
+		}
+		perf_store_irq_data(leader, counter->hw_event_type);
+		perf_store_irq_data(leader, atomic64_counter_read(counter));
+	}
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
+{
+	int bit, cpu = smp_processor_id();
+	struct cpu_hw_counters *cpuc;
+	u64 ack, status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	if (!status) {
+		ack_APIC_irq();
+		return;
+	}
+
+	/* Disable counters globally */
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+	ack_APIC_irq();
+
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+again:
+	ack = status;
+	for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) {
+		struct perf_counter *counter = cpuc->counters[bit];
+
+		clear_bit(bit, (unsigned long *) &status);
+		if (!counter)
+			continue;
+
+		perf_save_and_restart(counter);
+
+		switch (counter->record_type) {
+		case PERF_RECORD_SIMPLE:
+			continue;
+		case PERF_RECORD_IRQ:
+			perf_store_irq_data(counter, instruction_pointer(regs));
+			break;
+		case PERF_RECORD_GROUP:
+			perf_store_irq_data(counter, counter->hw_event_type);
+			perf_store_irq_data(counter,
+					    atomic64_counter_read(counter));
+			perf_handle_group(counter, &status, &ack);
+			break;
+		}
+		/*
+		 * From NMI context we cannot call into the scheduler to
+		 * do a task wakeup - but we mark these counters as
+		 * wakeup_pending and initate a wakeup callback:
+		 */
+		if (nmi) {
+			counter->wakeup_pending = 1;
+			set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+		} else {
+			wake_up(&counter->waitq);
+		}
+	}
+
+	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack, 0);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	if (status)
+		goto again;
+
+	/*
+	 * Do not reenable when global enable is off:
+	 */
+	if (cpuc->enable_all)
+		__hw_perf_enable_all();
+}
+
+void smp_perf_counter_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+#ifdef CONFIG_X86_64
+	add_pda(apic_perf_irqs, 1);
+#else
+	per_cpu(irq_stat, smp_processor_id()).apic_perf_irqs++;
+#endif
+	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
+	__smp_perf_counter_interrupt(regs, 0);
+
+	irq_exit();
+}
+
+/*
+ * This handler is triggered by NMI contexts:
+ */
+void perf_counter_notify(struct pt_regs *regs)
+{
+	struct cpu_hw_counters *cpuc;
+	unsigned long flags;
+	int bit, cpu;
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+	for_each_bit(bit, cpuc->used, nr_hw_counters) {
+		struct perf_counter *counter = cpuc->counters[bit];
+
+		if (!counter)
+			continue;
+
+		if (counter->wakeup_pending) {
+			counter->wakeup_pending = 0;
+			wake_up(&counter->waitq);
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+void __cpuinit perf_counters_lapic_init(int nmi)
+{
+	u32 apic_val;
+
+	if (!perf_counters_initialized)
+		return;
+	/*
+	 * Enable the performance counter vector in the APIC LVT:
+	 */
+	apic_val = apic_read(APIC_LVTERR);
+
+	apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
+	if (nmi)
+		apic_write(APIC_LVTPC, APIC_DM_NMI);
+	else
+		apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
+	apic_write(APIC_LVTERR, apic_val);
+}
+
+static int __kprobes
+perf_counter_nmi_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct pt_regs *regs;
+
+	if (likely(cmd != DIE_NMI_IPI))
+		return NOTIFY_DONE;
+
+	regs = args->regs;
+
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	__smp_perf_counter_interrupt(regs, 1);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
+	.notifier_call		= perf_counter_nmi_handler
+};
+
+void __init init_hw_perf_counters(void)
+{
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+		return;
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired Event or not.
+	 */
+	cpuid(10, &(eax.full), &ebx, &unused, &unused);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return;
+
+	printk(KERN_INFO "Intel Performance Monitoring support detected.\n");
+
+	printk(KERN_INFO "... version:      %d\n", eax.split.version_id);
+	printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters);
+	nr_hw_counters = eax.split.num_counters;
+	if (nr_hw_counters > MAX_HW_COUNTERS) {
+		nr_hw_counters = MAX_HW_COUNTERS;
+		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
+			nr_hw_counters, MAX_HW_COUNTERS);
+	}
+	perf_counter_mask = (1 << nr_hw_counters) - 1;
+	perf_max_counters = nr_hw_counters;
+
+	printk(KERN_INFO "... bit_width:    %d\n", eax.split.bit_width);
+	printk(KERN_INFO "... mask_length:  %d\n", eax.split.mask_length);
+
+	perf_counters_lapic_init(0);
+	register_die_notifier(&perf_counter_nmi_notifier);
+
+	perf_counters_initialized = true;
+}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3194636..fc013cf 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -984,6 +984,11 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
+#ifdef CONFIG_PERF_COUNTERS
+apicinterrupt LOCAL_PERF_VECTOR \
+	perf_counter_interrupt smp_perf_counter_interrupt
+#endif
+
 /*
  * Exception entry points.
  */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc5..d92bc71 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -56,6 +56,10 @@ static int show_other_interrupts(struct seq_file *p)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
 	seq_printf(p, "  Local timer interrupts\n");
+	seq_printf(p, "CNT: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+	seq_printf(p, "  Performance counter interrupts\n");
 #endif
 #ifdef CONFIG_SMP
 	seq_printf(p, "RES: ");
@@ -160,6 +164,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
+	sum += irq_stats(cpu)->apic_perf_irqs;
 #endif
 #ifdef CONFIG_SMP
 	sum += irq_stats(cpu)->irq_resched_count;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 607db63..6a33b5e 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -160,6 +160,9 @@ void __init native_init_IRQ(void)
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+# ifdef CONFIG_PERF_COUNTERS
+	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+# endif
 #endif
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8670b3c..91d785c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -138,6 +138,11 @@ static void __init apic_intr_init(void)
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+	/* Performance monitoring interrupt: */
+#ifdef CONFIG_PERF_COUNTERS
+	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+#endif
 }
 
 void __init native_init_IRQ(void)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b1cc6da..dee553c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,7 @@
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  */
-
+#include <linux/perf_counter.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -891,6 +891,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		tracehook_notify_resume(regs);
 	}
 
+	if (thread_info_flags & _TIF_PERF_COUNTERS) {
+		clear_thread_flag(TIF_PERF_COUNTERS);
+		perf_counter_notify(regs);
+	}
+
 #ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
 #endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d44395f..496726d 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,3 +332,4 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_perf_counter_open
-- 
cgit v0.10.2


From 87b9cf4623ad4e5fc009e48c020593dffd5d3793 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 8 Dec 2008 14:20:16 +0100
Subject: x86, perfcounters: read out MSR_CORE_PERF_GLOBAL_STATUS with counters
 disabled

Impact: make perfcounter NMI and IRQ sequence more robust

Make __smp_perf_counter_interrupt() a bit more conservative: first disable
all counters, then read out the status. Most invocations are because there
are real events, so there's no performance impact.

Code flow gets a bit simpler as well this way.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 82440cb..615e953 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -383,18 +383,16 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 	struct cpu_hw_counters *cpuc;
 	u64 ack, status;
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-	if (!status) {
-		ack_APIC_irq();
-		return;
-	}
-
 	/* Disable counters globally */
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
 	ack_APIC_irq();
 
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	if (!status)
+		goto out;
+
 again:
 	ack = status;
 	for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) {
@@ -440,7 +438,7 @@ again:
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	if (status)
 		goto again;
-
+out:
 	/*
 	 * Do not reenable when global enable is off:
 	 */
-- 
cgit v0.10.2


From 4c59e4676dc95f6f58a2cff5390b2699fa5b5549 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 8 Dec 2008 19:38:33 +0100
Subject: perfcounters: select ANON_INODES

The perfcounters subsystem depends on CONFIG_ANON_INODES facilities,
so make sure it's selected.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/init/Kconfig b/init/Kconfig
index 78bede2..7d147a3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -741,6 +741,7 @@ config PERF_COUNTERS
 	bool "Kernel Performance Counters"
 	depends on HAVE_PERF_COUNTERS
 	default y
+	select ANON_INODES
 	help
 	  Enable kernel support for performance counter hardware.
 
-- 
cgit v0.10.2


From 7e2ae34749edf19e76e594b9c4b2cdde1066afc5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 9 Dec 2008 11:40:46 +0100
Subject: perfcounters, x86: simplify disable/enable of counters

Impact: fix spurious missed counter wakeups

In the case of NMI events, close a race window that can occur if an NMI
hits counter code that temporarily disables+enables a counter, and the NMI
leaks into the disabled section.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 615e953..7d528ff 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -136,14 +136,25 @@ void hw_perf_disable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
 }
 
+static inline void
+__hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
+{
+	wrmsr(hwc->config_base + idx, hwc->config, 0);
+}
+
 static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]);
 
-static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx)
 {
 	per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count;
 
 	wrmsr(hwc->counter_base + idx, hwc->next_count, 0);
-	wrmsr(hwc->config_base + idx, hwc->config, 0);
+}
+
+static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+{
+	wrmsr(hwc->config_base + idx,
+	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
 void hw_perf_counter_enable(struct perf_counter *counter)
@@ -161,11 +172,11 @@ void hw_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	wrmsr(hwc->config_base + idx,
-	      hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+	__hw_perf_counter_disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
-	counter->hw.config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	__hw_perf_counter_set_period(hwc, idx);
 	__hw_perf_counter_enable(hwc, idx);
 }
 
@@ -286,8 +297,7 @@ void hw_perf_counter_disable(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	counter->hw.config &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsr(hwc->config_base + idx, hwc->config, 0);
+	__hw_perf_counter_disable(hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
@@ -328,18 +338,24 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data)
 	}
 }
 
+/*
+ * NMI-safe enable method:
+ */
 static void perf_save_and_restart(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	int idx = hwc->idx;
+	u64 pmc_ctrl;
+	int err;
 
-	wrmsr(hwc->config_base + idx,
-	      hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+	err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl);
+	WARN_ON_ONCE(err);
 
-	if (hwc->config & ARCH_PERFMON_EVENTSEL0_ENABLE) {
-		__hw_perf_save_counter(counter, hwc, idx);
+	__hw_perf_save_counter(counter, hwc, idx);
+	__hw_perf_counter_set_period(hwc, idx);
+
+	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
 		__hw_perf_counter_enable(hwc, idx);
-	}
 }
 
 static void
-- 
cgit v0.10.2


From 1e12567678054bc1d4c944ecfad17624b3e49345 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 9 Dec 2008 12:18:18 +0100
Subject: perfcounters, x86: clean up debug code

Impact: cleanup

Get rid of unused debug code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7d528ff..919ec46 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -214,13 +214,11 @@ static void __hw_perf_save_counter(struct perf_counter *counter,
 {
 	s64 raw = -1;
 	s64 delta;
-	int err;
 
 	/*
 	 * Get the raw hw counter value:
 	 */
-	err = rdmsrl_safe(hwc->counter_base + idx, &raw);
-	WARN_ON_ONCE(err);
+	rdmsrl(hwc->counter_base + idx, raw);
 
 	/*
 	 * Rebase it to zero (it started counting at -irq_period),
@@ -252,20 +250,18 @@ static void __hw_perf_save_counter(struct perf_counter *counter,
 void perf_counter_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count;
-	int cpu, err, idx;
+	int cpu, idx;
+
+	if (!nr_hw_counters)
+		return;
 
 	local_irq_disable();
 
 	cpu = smp_processor_id();
 
-	err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_CTRL, &ctrl);
-	WARN_ON_ONCE(err);
-
-	err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_STATUS, &status);
-	WARN_ON_ONCE(err);
-
-	err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_OVF_CTRL, &overflow);
-	WARN_ON_ONCE(err);
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
 
 	printk(KERN_INFO "\n");
 	printk(KERN_INFO "CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
@@ -273,11 +269,8 @@ void perf_counter_print_debug(void)
 	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
 
 	for (idx = 0; idx < nr_hw_counters; idx++) {
-		err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl);
-		WARN_ON_ONCE(err);
-
-		err = rdmsrl_safe(MSR_ARCH_PERFMON_PERFCTR0 + idx, &pmc_count);
-		WARN_ON_ONCE(err);
+		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
+		rdmsrl(MSR_ARCH_PERFMON_PERFCTR0  + idx, pmc_count);
 
 		next_count = per_cpu(prev_next_count[idx], cpu);
 
@@ -310,13 +303,11 @@ void hw_perf_counter_read(struct perf_counter *counter)
 	unsigned long addr = hwc->counter_base + hwc->idx;
 	s64 offs, val = -1LL;
 	s32 val32;
-	int err;
 
 	/* Careful: NMI might modify the counter offset */
 	do {
 		offs = hwc->prev_count;
-		err = rdmsrl_safe(addr, &val);
-		WARN_ON_ONCE(err);
+		rdmsrl(addr, val);
 	} while (offs != hwc->prev_count);
 
 	val32 = (s32) val;
@@ -346,10 +337,8 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	int idx = hwc->idx;
 	u64 pmc_ctrl;
-	int err;
 
-	err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl);
-	WARN_ON_ONCE(err);
+	rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 
 	__hw_perf_save_counter(counter, hwc, idx);
 	__hw_perf_counter_set_period(hwc, idx);
-- 
cgit v0.10.2


From 43874d238d5f208854a73c3225ca2a22833eec8b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 9 Dec 2008 12:23:59 +0100
Subject: perfcounters: consolidate global-disable codepaths

Impact: cleanup

Simplify global disable handling.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 919ec46..6a93d1f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -33,7 +33,6 @@ static u32 perf_counter_mask __read_mostly;
 struct cpu_hw_counters {
 	struct perf_counter	*counters[MAX_HW_COUNTERS];
 	unsigned long		used[BITS_TO_LONGS(MAX_HW_COUNTERS)];
-	int			enable_all;
 };
 
 /*
@@ -115,24 +114,13 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
 	return 0;
 }
 
-static void __hw_perf_enable_all(void)
-{
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
-}
-
 void hw_perf_enable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	cpuc->enable_all = 1;
-	__hw_perf_enable_all();
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
 void hw_perf_disable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	cpuc->enable_all = 0;
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
 }
 
@@ -385,8 +373,10 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
+	u64 ack, status, saved_global;
 	struct cpu_hw_counters *cpuc;
-	u64 ack, status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
 
 	/* Disable counters globally */
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
@@ -445,10 +435,9 @@ again:
 		goto again;
 out:
 	/*
-	 * Do not reenable when global enable is off:
+	 * Restore - do not reenable when global enable is off:
 	 */
-	if (cpuc->enable_all)
-		__hw_perf_enable_all();
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, saved_global, 0);
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
-- 
cgit v0.10.2


From 4ac13294e44664bb7edf4daf52edb71e7c6bbe84 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 9 Dec 2008 21:43:39 +0100
Subject: perf counters: protect them against CSTATE transitions

Impact: fix rare lost events problem

There are CPUs whose performance counters misbehave on CSTATE transitions,
so provide a way to just disable/enable them around deep idle methods.

(hw_perf_enable_all() is cheap on x86.)

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6a93d1f..0a7f3be 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -12,6 +12,7 @@
 #include <linux/notifier.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 
@@ -119,10 +120,21 @@ void hw_perf_enable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
-void hw_perf_disable_all(void)
+void hw_perf_restore_ctrl(u64 ctrl)
 {
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
+}
+EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl);
+
+u64 hw_perf_disable_all(void)
+{
+	u64 ctrl;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+	return ctrl;
 }
+EXPORT_SYMBOL_GPL(hw_perf_disable_all);
 
 static inline void
 __hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5f8d746..cca804e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -270,8 +270,11 @@ static atomic_t c3_cpu_count;
 /* Common C-state entry for C2, C3, .. */
 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 {
+	u64 pctrl;
+
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
+	pctrl = hw_perf_disable_all();
 	if (cstate->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cstate);
@@ -284,6 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
+	hw_perf_restore_ctrl(pctrl);
 	start_critical_timings();
 }
 #endif /* !CONFIG_CPU_IDLE */
@@ -1425,8 +1429,11 @@ static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
  */
 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
+	u64 pctrl;
+
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
+	pctrl = hw_perf_disable_all();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -1441,6 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
+	hw_perf_restore_ctrl(pctrl);
 	start_critical_timings();
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 22c4469..5031b56 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -156,6 +156,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *task);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
+extern void hw_perf_restore_ctrl(u64 ctrl);
+extern u64 hw_perf_disable_all(void);
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -166,6 +168,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *task)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
+static inline void hw_perf_restore_ctrl(u64 ctrl)			{ }
+static inline u64 hw_perf_disable_all(void)		{ return 0; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
-- 
cgit v0.10.2


From eab656ae04b9d3b83265e3db01c0d2c46b748ef7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Dec 2008 19:26:59 +0100
Subject: perf counters: clean up 'raw' type API

Impact: cleanup

Introduce a separate hw_event type.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5031b56..daedd7d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -38,6 +38,7 @@ enum hw_event_types {
 	 * If this bit is set in the type, then trigger NMI sampling:
 	 */
 	PERF_COUNT_NMI			= (1 << 30),
+	PERF_COUNT_RAW			= (1 << 31),
 };
 
 /*
@@ -49,6 +50,12 @@ enum perf_record_type {
 	PERF_RECORD_GROUP,
 };
 
+struct perf_counter_event {
+	u32			hw_event_type;
+	u32			hw_event_period;
+	u64			hw_raw_ctrl;
+};
+
 /**
  * struct hw_perf_counter - performance counter hardware details
  */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6cce728..3ecd73d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,6 +54,7 @@ struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
+struct perf_counter_event;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -625,9 +626,6 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage int
-sys_perf_counter_open(u32 hw_event_type,
-		      u32 hw_event_period,
-		      u32 record_type,
-		      pid_t pid,
-		      int cpu);
+sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
+		      pid_t pid, int cpu, int masterfd);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 20508f0..96c333a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -734,26 +734,27 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
  * @pid:		target pid
  */
 asmlinkage int
-sys_perf_counter_open(u32 hw_event_type,
-		      u32 hw_event_period,
-		      u32 record_type,
-		      pid_t pid,
-		      int cpu)
+sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
+		      pid_t pid, int cpu, int masterfd)
 {
 	struct perf_counter_context *ctx;
+	struct perf_counter_event event;
 	struct perf_counter *counter;
 	int ret;
 
+	if (copy_from_user(&event, uevent, sizeof(event)) != 0)
+		return -EFAULT;
+
 	ctx = find_get_context(pid, cpu);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(hw_event_period, cpu, record_type);
+	counter = perf_counter_alloc(event.hw_event_period, cpu, record_type);
 	if (!counter)
 		goto err_put_context;
 
-	ret = hw_perf_counter_init(counter, hw_event_type);
+	ret = hw_perf_counter_init(counter, event.hw_event_type);
 	if (ret)
 		goto err_free_put_context;
 
-- 
cgit v0.10.2


From dfa7c899b401d7dc5d85aca416aee64ac82812f2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Dec 2008 19:35:37 +0100
Subject: perf counters: expand use of counter->event

Impact: change syscall, cleanup

Make use of the new perf_counters event type.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 0a7f3be..30e7ebf 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -56,9 +56,10 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
-int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
+int hw_perf_counter_init(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
+	u32 hw_event_type = counter->event.hw_event_type;
 
 	if (unlikely(!perf_counters_initialized))
 		return -EINVAL;
@@ -83,7 +84,7 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
 	hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
 	hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
 
-	hwc->irq_period = counter->__irq_period;
+	hwc->irq_period = counter->event.hw_event_period;
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -95,21 +96,19 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
 	hwc->next_count = -((s32) hwc->irq_period);
 
 	/*
-	 * Negative event types mean raw encoded event+umask values:
+	 * Raw event type provide the config in the event structure
 	 */
-	if (hw_event_type < 0) {
-		counter->hw_event_type = -hw_event_type;
-		counter->hw_event_type &= ~PERF_COUNT_NMI;
+	hw_event_type &= ~PERF_COUNT_NMI;
+	if (hw_event_type == PERF_COUNT_RAW) {
+		hwc->config |= counter->event.hw_raw_ctrl;
 	} else {
-		hw_event_type &= ~PERF_COUNT_NMI;
 		if (hw_event_type >= max_intel_perfmon_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		counter->hw_event_type = intel_perfmon_event_map[hw_event_type];
+		hwc->config |= intel_perfmon_event_map[hw_event_type];
 	}
-	hwc->config |= counter->hw_event_type;
 	counter->wakeup_pending = 0;
 
 	return 0;
@@ -373,7 +372,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 				perf_save_and_restart(counter);
 			}
 		}
-		perf_store_irq_data(leader, counter->hw_event_type);
+		perf_store_irq_data(leader, counter->event.hw_event_type);
 		perf_store_irq_data(leader, atomic64_counter_read(counter));
 	}
 }
@@ -418,7 +417,8 @@ again:
 			perf_store_irq_data(counter, instruction_pointer(regs));
 			break;
 		case PERF_RECORD_GROUP:
-			perf_store_irq_data(counter, counter->hw_event_type);
+			perf_store_irq_data(counter,
+					    counter->event.hw_event_type);
 			perf_store_irq_data(counter,
 					    atomic64_counter_read(counter));
 			perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index daedd7d..1f00176 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -96,8 +96,7 @@ struct perf_counter {
 #else
 	atomic_t			count32[2];
 #endif
-	u64				__irq_period;
-
+	struct perf_counter_event	event;
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
@@ -111,7 +110,6 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
-	s32				hw_event_type;
 	enum perf_record_type		record_type;
 
 	/* read() / irq related data */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 96c333a..2557c67 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -37,7 +37,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
  * Architecture provided APIs - weak aliases:
  */
 
-int __weak hw_perf_counter_init(struct perf_counter *counter, u32 hw_event_type)
+int __weak hw_perf_counter_init(struct perf_counter *counter)
 {
 	return -EINVAL;
 }
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
+perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
 {
 	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 
@@ -722,7 +722,7 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
 	counter->usrdata	= &counter->data[1];
 	counter->cpu		= cpu;
 	counter->record_type	= record_type;
-	counter->__irq_period	= hw_event_period;
+	counter->event		= *event;
 	counter->wakeup_pending = 0;
 
 	return counter;
@@ -750,11 +750,11 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(event.hw_event_period, cpu, record_type);
+	counter = perf_counter_alloc(&event, cpu, record_type);
 	if (!counter)
 		goto err_put_context;
 
-	ret = hw_perf_counter_init(counter, event.hw_event_type);
+	ret = hw_perf_counter_init(counter);
 	if (ret)
 		goto err_free_put_context;
 
-- 
cgit v0.10.2


From 9f66a3810fe0d4100972db84290f3ae4a4d77025 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 10 Dec 2008 12:33:23 +0100
Subject: perf counters: restructure the API

Impact: clean up new API

Thorough cleanup of the new perf counters API, we now get clean separation
of the various concepts:

 - introduce perf_counter_hw_event to separate out the event source details

 - move special type flags into separate attributes: PERF_COUNT_NMI,
   PERF_COUNT_RAW

 - extend the type to u64 and reserve it fully to the architecture in the
   raw type case.

And make use of all these changes in the core and x86 perfcounters code.

Also change the syscall signature to:

  asmlinkage int sys_perf_counter_open(

	struct perf_counter_hw_event	*hw_event_uptr		__user,
	pid_t				pid,
	int				cpu,
	int				group_fd);

( Note that group_fd is unused for now - it's reserved for the counter
  groups abstraction. )

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 30e7ebf..ef1936a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
  */
 int hw_perf_counter_init(struct perf_counter *counter)
 {
+	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	struct hw_perf_counter *hwc = &counter->hw;
-	u32 hw_event_type = counter->event.hw_event_type;
 
 	if (unlikely(!perf_counters_initialized))
 		return -EINVAL;
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter)
 	hwc->nmi = 0;
 	if (capable(CAP_SYS_ADMIN)) {
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-		if (hw_event_type & PERF_COUNT_NMI)
+		if (hw_event->nmi)
 			hwc->nmi = 1;
 	}
 
-	hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
-	hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
+	hwc->config_base	= MSR_ARCH_PERFMON_EVENTSEL0;
+	hwc->counter_base	= MSR_ARCH_PERFMON_PERFCTR0;
 
-	hwc->irq_period = counter->event.hw_event_period;
+	hwc->irq_period		= hw_event->irq_period;
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter)
 	if (!hwc->irq_period)
 		hwc->irq_period = 0x7FFFFFFF;
 
-	hwc->next_count = -((s32) hwc->irq_period);
+	hwc->next_count	= -(s32)hwc->irq_period;
 
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	hw_event_type &= ~PERF_COUNT_NMI;
-	if (hw_event_type == PERF_COUNT_RAW) {
-		hwc->config |= counter->event.hw_raw_ctrl;
+	if (hw_event->raw) {
+		hwc->config |= hw_event->type;
 	} else {
-		if (hw_event_type >= max_intel_perfmon_events)
+		if (hw_event->type >= max_intel_perfmon_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= intel_perfmon_event_map[hw_event_type];
+		hwc->config |= intel_perfmon_event_map[hw_event->type];
 	}
 	counter->wakeup_pending = 0;
 
@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 	int bit;
 
 	list_for_each_entry(counter, &ctx->counters, list) {
-		if (counter->record_type != PERF_RECORD_SIMPLE ||
+		if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
 		    counter == leader)
 			continue;
 
@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 				perf_save_and_restart(counter);
 			}
 		}
-		perf_store_irq_data(leader, counter->event.hw_event_type);
+		perf_store_irq_data(leader, counter->hw_event.type);
 		perf_store_irq_data(leader, atomic64_counter_read(counter));
 	}
 }
@@ -410,7 +409,7 @@ again:
 
 		perf_save_and_restart(counter);
 
-		switch (counter->record_type) {
+		switch (counter->hw_event.record_type) {
 		case PERF_RECORD_SIMPLE:
 			continue;
 		case PERF_RECORD_IRQ:
@@ -418,7 +417,7 @@ again:
 			break;
 		case PERF_RECORD_GROUP:
 			perf_store_irq_data(counter,
-					    counter->event.hw_event_type);
+					    counter->hw_event.type);
 			perf_store_irq_data(counter,
 					    atomic64_counter_read(counter));
 			perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1f00176..a2b4852 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,65 +24,93 @@
 struct task_struct;
 
 /*
- * Generalized hardware event types, used by the hw_event_type parameter
- * of the sys_perf_counter_open() syscall:
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
  */
 enum hw_event_types {
-	PERF_COUNT_CYCLES,
-	PERF_COUNT_INSTRUCTIONS,
-	PERF_COUNT_CACHE_REFERENCES,
-	PERF_COUNT_CACHE_MISSES,
-	PERF_COUNT_BRANCH_INSTRUCTIONS,
-	PERF_COUNT_BRANCH_MISSES,
 	/*
-	 * If this bit is set in the type, then trigger NMI sampling:
+	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_NMI			= (1 << 30),
-	PERF_COUNT_RAW			= (1 << 31),
+	PERF_COUNT_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
 };
 
 /*
  * IRQ-notification data record type:
  */
-enum perf_record_type {
-	PERF_RECORD_SIMPLE,
-	PERF_RECORD_IRQ,
-	PERF_RECORD_GROUP,
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
 };
 
-struct perf_counter_event {
-	u32			hw_event_type;
-	u32			hw_event_period;
-	u64			hw_raw_ctrl;
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	u64			type;
+
+	u64			irq_period;
+	u32			record_type;
+
+	u32			disabled     :  1, /* off by default */
+				nmi	     :  1, /* NMI sampling   */
+				raw	     :  1, /* raw event type */
+				__reserved_1 : 29;
+
+	u64			__reserved_2;
 };
 
+/*
+ * Kernel-internal data types:
+ */
+
 /**
- * struct hw_perf_counter - performance counter hardware details
+ * struct hw_perf_counter - performance counter hardware details:
  */
 struct hw_perf_counter {
-	u64			config;
-	unsigned long		config_base;
-	unsigned long		counter_base;
-	int			nmi;
-	unsigned int		idx;
-	u64			prev_count;
-	s32			next_count;
-	u64			irq_period;
+	u64				config;
+	unsigned long			config_base;
+	unsigned long			counter_base;
+	int				nmi;
+	unsigned int			idx;
+	u64				prev_count;
+	u64				irq_period;
+	s32				next_count;
 };
 
 /*
  * Hardcoded buffer length limit for now, for IRQ-fed events:
  */
-#define PERF_DATA_BUFLEN	2048
+#define PERF_DATA_BUFLEN		2048
 
 /**
  * struct perf_data - performance counter IRQ data sampling ...
  */
 struct perf_data {
-	int			len;
-	int			rd_idx;
-	int			overrun;
-	u8			data[PERF_DATA_BUFLEN];
+	int				len;
+	int				rd_idx;
+	int				overrun;
+	u8				data[PERF_DATA_BUFLEN];
 };
 
 /**
@@ -96,7 +124,7 @@ struct perf_counter {
 #else
 	atomic_t			count32[2];
 #endif
-	struct perf_counter_event	event;
+	struct perf_counter_hw_event	hw_event;
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
@@ -110,8 +138,6 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
-	enum perf_record_type		record_type;
-
 	/* read() / irq related data */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3ecd73d..a549678 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
-struct perf_counter_event;
+struct perf_counter_hw_event;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
-		      pid_t pid, int cpu, int masterfd);
+
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2557c67..0d323ce 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct perf_counter *counter = file->private_data;
 
-	switch (counter->record_type) {
+	switch (counter->hw_event.record_type) {
 	case PERF_RECORD_SIMPLE:
 		return perf_read_hw(counter, buf, count);
 
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
+perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
 {
 	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 
@@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
 	INIT_LIST_HEAD(&counter->list);
 	init_waitqueue_head(&counter->waitq);
 
-	counter->irqdata	= &counter->data[0];
-	counter->usrdata	= &counter->data[1];
-	counter->cpu		= cpu;
-	counter->record_type	= record_type;
-	counter->event		= *event;
-	counter->wakeup_pending = 0;
+	counter->irqdata		= &counter->data[0];
+	counter->usrdata		= &counter->data[1];
+	counter->cpu			= cpu;
+	counter->hw_event		= *hw_event;
+	counter->wakeup_pending		= 0;
 
 	return counter;
 }
 
 /**
- * sys_perf_task_open - open a performance counter associate it to a task
- * @hw_event_type:	event type for monitoring/sampling...
+ * sys_perf_task_open - open a performance counter, associate it to a task/cpu
+ *
+ * @hw_event_uptr:	event type attributes for monitoring/sampling
  * @pid:		target pid
+ * @cpu:		target cpu
+ * @group_fd:		group leader counter fd
  */
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
-		      pid_t pid, int cpu, int masterfd)
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd)
+
 {
 	struct perf_counter_context *ctx;
-	struct perf_counter_event event;
+	struct perf_counter_hw_event hw_event;
 	struct perf_counter *counter;
 	int ret;
 
-	if (copy_from_user(&event, uevent, sizeof(event)) != 0)
+	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
 		return -EFAULT;
 
 	ctx = find_get_context(pid, cpu);
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(&event, cpu, record_type);
+	counter = perf_counter_alloc(&hw_event, cpu);
 	if (!counter)
 		goto err_put_context;
 
-- 
cgit v0.10.2


From 04289bb9891882202d7e961c4c04d2376930e9f9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 08:38:42 +0100
Subject: perf counters: add support for group counters

Impact: add group counters

This patch adds the "counter groups" abstraction.

Groups of counters behave much like normal 'single' counters, with a
few semantic and behavioral extensions on top of that.

A counter group is created by creating a new counter with the open()
syscall's group-leader group_fd file descriptor parameter pointing
to another, already existing counter.

Groups of counters are scheduled in and out in one atomic group, and
they are also roundrobin-scheduled atomically.

Counters that are member of a group can also record events with an
(atomic) extended timestamp that extends to all members of the group,
if the record type is set to PERF_RECORD_GROUP.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ef1936a..54b4ad0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -346,18 +346,22 @@ static void perf_save_and_restart(struct perf_counter *counter)
 }
 
 static void
-perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
+perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 {
-	struct perf_counter_context *ctx = leader->ctx;
-	struct perf_counter *counter;
+	struct perf_counter *counter, *group_leader = sibling->group_leader;
 	int bit;
 
-	list_for_each_entry(counter, &ctx->counters, list) {
-		if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
-		    counter == leader)
-			continue;
+	/*
+	 * Store the counter's own timestamp first:
+	 */
+	perf_store_irq_data(sibling, sibling->hw_event.type);
+	perf_store_irq_data(sibling, atomic64_counter_read(sibling));
 
-		if (counter->active) {
+	/*
+	 * Then store sibling timestamps (if any):
+	 */
+	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
+		if (!counter->active) {
 			/*
 			 * When counter was not in the overflow mask, we have to
 			 * read it from hardware. We read it as well, when it
@@ -371,8 +375,8 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 				perf_save_and_restart(counter);
 			}
 		}
-		perf_store_irq_data(leader, counter->hw_event.type);
-		perf_store_irq_data(leader, atomic64_counter_read(counter));
+		perf_store_irq_data(sibling, counter->hw_event.type);
+		perf_store_irq_data(sibling, atomic64_counter_read(counter));
 	}
 }
 
@@ -416,10 +420,6 @@ again:
 			perf_store_irq_data(counter, instruction_pointer(regs));
 			break;
 		case PERF_RECORD_GROUP:
-			perf_store_irq_data(counter,
-					    counter->hw_event.type);
-			perf_store_irq_data(counter,
-					    atomic64_counter_read(counter));
 			perf_handle_group(counter, &status, &ack);
 			break;
 		}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a2b4852..7af7d89 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -117,7 +117,10 @@ struct perf_data {
  * struct perf_counter - performance counter kernel representation:
  */
 struct perf_counter {
-	struct list_head		list;
+	struct list_head		list_entry;
+	struct list_head		sibling_list;
+	struct perf_counter		*group_leader;
+
 	int				active;
 #if BITS_PER_LONG == 64
 	atomic64_t			count;
@@ -158,7 +161,8 @@ struct perf_counter_context {
 	 * Protect the list of counters:
 	 */
 	spinlock_t		lock;
-	struct list_head	counters;
+
+	struct list_head	counter_list;
 	int			nr_counters;
 	int			nr_active;
 	struct task_struct	*task;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0d323ce..fa59fe8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/sysfs.h>
 #include <linux/ptrace.h>
@@ -55,7 +56,7 @@ void __weak hw_perf_counter_setup(void) { }
  * Read the cached counter in counter safe against cross CPU / NMI
  * modifications. 64 bit version - no complications.
  */
-static inline u64 perf_read_counter_safe(struct perf_counter *counter)
+static inline u64 perf_counter_read_safe(struct perf_counter *counter)
 {
 	return (u64) atomic64_read(&counter->count);
 }
@@ -66,7 +67,7 @@ static inline u64 perf_read_counter_safe(struct perf_counter *counter)
  * Read the cached counter in counter safe against cross CPU / NMI
  * modifications. 32 bit version.
  */
-static u64 perf_read_counter_safe(struct perf_counter *counter)
+static u64 perf_counter_read_safe(struct perf_counter *counter)
 {
 	u32 cntl, cnth;
 
@@ -83,13 +84,55 @@ static u64 perf_read_counter_safe(struct perf_counter *counter)
 
 #endif
 
+static void
+list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+	struct perf_counter *group_leader = counter->group_leader;
+
+	/*
+	 * Depending on whether it is a standalone or sibling counter,
+	 * add it straight to the context's counter list, or to the group
+	 * leader's sibling list:
+	 */
+	if (counter->group_leader == counter)
+		list_add_tail(&counter->list_entry, &ctx->counter_list);
+	else
+		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+}
+
+static void
+list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+	struct perf_counter *sibling, *tmp;
+
+	list_del_init(&counter->list_entry);
+
+	if (list_empty(&counter->sibling_list))
+		return;
+
+	/*
+	 * If this was a group counter with sibling counters then
+	 * upgrade the siblings to singleton counters by adding them
+	 * to the context list directly:
+	 */
+	list_for_each_entry_safe(sibling, tmp,
+				 &counter->sibling_list, list_entry) {
+
+		list_del_init(&sibling->list_entry);
+		list_add_tail(&sibling->list_entry, &ctx->counter_list);
+		WARN_ON_ONCE(!sibling->group_leader);
+		WARN_ON_ONCE(sibling->group_leader == sibling);
+		sibling->group_leader = sibling;
+	}
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
  * We disable the counter on the hardware level first. After that we
  * remove it from the context list.
  */
-static void __perf_remove_from_context(void *info)
+static void __perf_counter_remove_from_context(void *info)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
@@ -119,7 +162,7 @@ static void __perf_remove_from_context(void *info)
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
 	hw_perf_disable_all();
-	list_del_init(&counter->list);
+	list_del_counter(counter, ctx);
 	hw_perf_enable_all();
 
 	if (!ctx->task) {
@@ -144,7 +187,7 @@ static void __perf_remove_from_context(void *info)
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
  */
-static void perf_remove_from_context(struct perf_counter *counter)
+static void perf_counter_remove_from_context(struct perf_counter *counter)
 {
 	struct perf_counter_context *ctx = counter->ctx;
 	struct task_struct *task = ctx->task;
@@ -155,32 +198,32 @@ static void perf_remove_from_context(struct perf_counter *counter)
 		 * the removal is always sucessful.
 		 */
 		smp_call_function_single(counter->cpu,
-					 __perf_remove_from_context,
+					 __perf_counter_remove_from_context,
 					 counter, 1);
 		return;
 	}
 
 retry:
-	task_oncpu_function_call(task, __perf_remove_from_context,
+	task_oncpu_function_call(task, __perf_counter_remove_from_context,
 				 counter);
 
 	spin_lock_irq(&ctx->lock);
 	/*
 	 * If the context is active we need to retry the smp call.
 	 */
-	if (ctx->nr_active && !list_empty(&counter->list)) {
+	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
 
 	/*
 	 * The lock prevents that this context is scheduled in so we
-	 * can remove the counter safely, if it the call above did not
+	 * can remove the counter safely, if the call above did not
 	 * succeed.
 	 */
-	if (!list_empty(&counter->list)) {
+	if (!list_empty(&counter->list_entry)) {
 		ctx->nr_counters--;
-		list_del_init(&counter->list);
+		list_del_counter(counter, ctx);
 		counter->task = NULL;
 	}
 	spin_unlock_irq(&ctx->lock);
@@ -211,7 +254,7 @@ static void __perf_install_in_context(void *info)
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
 	hw_perf_disable_all();
-	list_add_tail(&counter->list, &ctx->counters);
+	list_add_counter(counter, ctx);
 	hw_perf_enable_all();
 
 	ctx->nr_counters++;
@@ -268,7 +311,7 @@ retry:
 	 * If the context is active and the counter has not been added
 	 * we need to retry the smp call.
 	 */
-	if (ctx->nr_active && list_empty(&counter->list)) {
+	if (ctx->nr_active && list_empty(&counter->list_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -278,13 +321,45 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list)) {
-		list_add_tail(&counter->list, &ctx->counters);
+	if (list_empty(&counter->list_entry)) {
+		list_add_counter(counter, ctx);
 		ctx->nr_counters++;
 	}
 	spin_unlock_irq(&ctx->lock);
 }
 
+static void
+counter_sched_out(struct perf_counter *counter,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_counter_context *ctx)
+{
+	if (!counter->active)
+		return;
+
+	hw_perf_counter_disable(counter);
+	counter->active	=  0;
+	counter->oncpu	= -1;
+
+	cpuctx->active_oncpu--;
+	ctx->nr_active--;
+}
+
+static void
+group_sched_out(struct perf_counter *group_counter,
+		struct perf_cpu_context *cpuctx,
+		struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_out(counter, cpuctx, ctx);
+}
+
 /*
  * Called from scheduler to remove the counters of the current task,
  * with interrupts disabled.
@@ -306,21 +381,48 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 		return;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counters, list) {
-		if (!ctx->nr_active)
-			break;
-		if (counter->active) {
-			hw_perf_counter_disable(counter);
-			counter->active = 0;
-			counter->oncpu = -1;
-			ctx->nr_active--;
-			cpuctx->active_oncpu--;
-		}
+	if (ctx->nr_active) {
+		list_for_each_entry(counter, &ctx->counter_list, list_entry)
+			group_sched_out(counter, cpuctx, ctx);
 	}
 	spin_unlock(&ctx->lock);
 	cpuctx->task_ctx = NULL;
 }
 
+static void
+counter_sched_in(struct perf_counter *counter,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_counter_context *ctx,
+		 int cpu)
+{
+	if (!counter->active)
+		return;
+
+	hw_perf_counter_enable(counter);
+	counter->active = 1;
+	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+
+	cpuctx->active_oncpu++;
+	ctx->nr_active++;
+}
+
+static void
+group_sched_in(struct perf_counter *group_counter,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx,
+	       int cpu)
+{
+	struct perf_counter *counter;
+
+	counter_sched_in(group_counter, cpuctx, ctx, cpu);
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_in(counter, cpuctx, ctx, cpu);
+}
+
 /*
  * Called from scheduler to add the counters of the current task
  * with interrupts disabled.
@@ -342,19 +444,21 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 		return;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counters, list) {
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (ctx->nr_active == cpuctx->max_pertask)
 			break;
+
+		/*
+		 * Listen to the 'cpu' scheduling filter constraint
+		 * of counters:
+		 */
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		hw_perf_counter_enable(counter);
-		counter->active = 1;
-		counter->oncpu = cpu;
-		ctx->nr_active++;
-		cpuctx->active_oncpu++;
+		group_sched_in(counter, cpuctx, ctx, cpu);
 	}
 	spin_unlock(&ctx->lock);
+
 	cpuctx->task_ctx = ctx;
 }
 
@@ -371,12 +475,12 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	spin_lock(&ctx->lock);
 
 	/*
-	 * Rotate the first entry last:
+	 * Rotate the first entry last (works just fine for group counters too):
 	 */
 	hw_perf_disable_all();
-	list_for_each_entry(counter, &ctx->counters, list) {
-		list_del(&counter->list);
-		list_add_tail(&counter->list, &ctx->counters);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		list_del(&counter->list_entry);
+		list_add_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
 	hw_perf_enable_all();
@@ -387,16 +491,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 }
 
 /*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	spin_lock_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	ctx->nr_counters	= 0;
+	ctx->task		= task;
+}
+/*
  * Initialize the perf_counter context in task_struct
  */
 void perf_counter_init_task(struct task_struct *task)
 {
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
-
-	spin_lock_init(&ctx->lock);
-	INIT_LIST_HEAD(&ctx->counters);
-	ctx->nr_counters = 0;
-	ctx->task = task;
+	__perf_counter_init_context(&task->perf_counter_ctx, task);
 }
 
 /*
@@ -407,7 +518,7 @@ static void __hw_perf_counter_read(void *info)
 	hw_perf_counter_read(info);
 }
 
-static u64 perf_read_counter(struct perf_counter *counter)
+static u64 perf_counter_read(struct perf_counter *counter)
 {
 	/*
 	 * If counter is enabled and currently active on a CPU, update the
@@ -418,7 +529,7 @@ static u64 perf_read_counter(struct perf_counter *counter)
 					 __hw_perf_counter_read, counter, 1);
 	}
 
-	return perf_read_counter_safe(counter);
+	return perf_counter_read_safe(counter);
 }
 
 /*
@@ -555,7 +666,7 @@ static int perf_release(struct inode *inode, struct file *file)
 
 	mutex_lock(&counter->mutex);
 
-	perf_remove_from_context(counter);
+	perf_counter_remove_from_context(counter);
 	put_context(ctx);
 
 	mutex_unlock(&counter->mutex);
@@ -577,7 +688,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 		return -EINVAL;
 
 	mutex_lock(&counter->mutex);
-	cntval = perf_read_counter(counter);
+	cntval = perf_counter_read(counter);
 	mutex_unlock(&counter->mutex);
 
 	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
@@ -707,15 +818,25 @@ static const struct file_operations perf_fops = {
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
+perf_counter_alloc(struct perf_counter_hw_event *hw_event,
+		   int cpu,
+		   struct perf_counter *group_leader)
 {
 	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 
 	if (!counter)
 		return NULL;
 
+	/*
+	 * Single counters are their own group leaders, with an
+	 * empty sibling list:
+	 */
+	if (!group_leader)
+		group_leader = counter;
+
 	mutex_init(&counter->mutex);
-	INIT_LIST_HEAD(&counter->list);
+	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
 	counter->irqdata		= &counter->data[0];
@@ -723,6 +844,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->wakeup_pending		= 0;
+	counter->group_leader		= group_leader;
 
 	return counter;
 }
@@ -743,20 +865,45 @@ asmlinkage int sys_perf_counter_open(
 	int				group_fd)
 
 {
-	struct perf_counter_context *ctx;
+	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
-	struct perf_counter *counter;
+	struct perf_counter_context *ctx;
+	struct file *group_file = NULL;
+	int fput_needed = 0;
 	int ret;
 
 	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
 		return -EFAULT;
 
+	/*
+	 * Look up the group leader:
+	 */
+	group_leader = NULL;
+	if (group_fd != -1) {
+		ret = -EINVAL;
+		group_file = fget_light(group_fd, &fput_needed);
+		if (!group_file)
+			goto out_fput;
+		if (group_file->f_op != &perf_fops)
+			goto out_fput;
+
+		group_leader = group_file->private_data;
+		/*
+		 * Do not allow a recursive hierarchy:
+		 */
+		if (group_leader->group_leader)
+			goto out_fput;
+	}
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
 	ctx = find_get_context(pid, cpu);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(&hw_event, cpu);
+	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
 	if (!counter)
 		goto err_put_context;
 
@@ -770,11 +917,14 @@ asmlinkage int sys_perf_counter_open(
 	if (ret < 0)
 		goto err_remove_free_put_context;
 
+out_fput:
+	fput_light(group_file, fput_needed);
+
 	return ret;
 
 err_remove_free_put_context:
 	mutex_lock(&counter->mutex);
-	perf_remove_from_context(counter);
+	perf_counter_remove_from_context(counter);
 	mutex_unlock(&counter->mutex);
 
 err_free_put_context:
@@ -783,40 +933,40 @@ err_free_put_context:
 err_put_context:
 	put_context(ctx);
 
-	return ret;
+	goto out_fput;
 }
 
-static void __cpuinit perf_init_cpu(int cpu)
+static void __cpuinit perf_counter_init_cpu(int cpu)
 {
-	struct perf_cpu_context *ctx;
+	struct perf_cpu_context *cpuctx;
 
-	ctx = &per_cpu(perf_cpu_context, cpu);
-	spin_lock_init(&ctx->ctx.lock);
-	INIT_LIST_HEAD(&ctx->ctx.counters);
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	__perf_counter_init_context(&cpuctx->ctx, NULL);
 
 	mutex_lock(&perf_resource_mutex);
-	ctx->max_pertask = perf_max_counters - perf_reserved_percpu;
+	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
 	mutex_unlock(&perf_resource_mutex);
+
 	hw_perf_counter_setup();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void __perf_exit_cpu(void *info)
+static void __perf_counter_exit_cpu(void *info)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = &cpuctx->ctx;
 	struct perf_counter *counter, *tmp;
 
-	list_for_each_entry_safe(counter, tmp, &ctx->counters, list)
-		__perf_remove_from_context(counter);
+	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+		__perf_counter_remove_from_context(counter);
 
 }
-static void perf_exit_cpu(int cpu)
+static void perf_counter_exit_cpu(int cpu)
 {
-	smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1);
+	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
 }
 #else
-static inline void perf_exit_cpu(int cpu) { }
+static inline void perf_counter_exit_cpu(int cpu) { }
 #endif
 
 static int __cpuinit
@@ -828,12 +978,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		perf_init_cpu(cpu);
+		perf_counter_init_cpu(cpu);
 		break;
 
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		perf_exit_cpu(cpu);
+		perf_counter_exit_cpu(cpu);
 		break;
 
 	default:
-- 
cgit v0.10.2


From ccff286d85098ba5438e22aa2ea807fc1e18cf2f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 11:26:29 +0100
Subject: perf counters: group counter, fixes

Impact: bugfix

Check that a group does not span outside the context of a CPU or a task.

Also, do not allow deep recursive hierarchies.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fa59fe8..278209c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -107,9 +107,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 	list_del_init(&counter->list_entry);
 
-	if (list_empty(&counter->sibling_list))
-		return;
-
 	/*
 	 * If this was a group counter with sibling counters then
 	 * upgrade the siblings to singleton counters by adding them
@@ -395,9 +392,6 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	if (!counter->active)
-		return;
-
 	hw_perf_counter_enable(counter);
 	counter->active = 1;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
@@ -876,32 +870,39 @@ asmlinkage int sys_perf_counter_open(
 		return -EFAULT;
 
 	/*
-	 * Look up the group leader:
+	 * Get the target context (task or percpu):
+	 */
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	/*
+	 * Look up the group leader (we will attach this counter to it):
 	 */
 	group_leader = NULL;
 	if (group_fd != -1) {
 		ret = -EINVAL;
 		group_file = fget_light(group_fd, &fput_needed);
 		if (!group_file)
-			goto out_fput;
+			goto err_put_context;
 		if (group_file->f_op != &perf_fops)
-			goto out_fput;
+			goto err_put_context;
 
 		group_leader = group_file->private_data;
 		/*
-		 * Do not allow a recursive hierarchy:
+		 * Do not allow a recursive hierarchy (this new sibling
+		 * becoming part of another group-sibling):
+		 */
+		if (group_leader->group_leader != group_leader)
+			goto err_put_context;
+		/*
+		 * Do not allow to attach to a group in a different
+		 * task or CPU context:
 		 */
-		if (group_leader->group_leader)
-			goto out_fput;
+		if (group_leader->ctx != ctx)
+			goto err_put_context;
 	}
 
-	/*
-	 * Get the target context (task or percpu):
-	 */
-	ctx = find_get_context(pid, cpu);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
 	ret = -ENOMEM;
 	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
 	if (!counter)
-- 
cgit v0.10.2


From 621a01eac89b5e2f81a4cf576568b31f40a02724 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 12:46:46 +0100
Subject: perf counters: hw driver API

Impact: restructure code, introduce hw_ops driver abstraction

Introduce this abstraction to handle counter details:

 struct hw_perf_counter_ops {
	void (*hw_perf_counter_enable)	(struct perf_counter *counter);
	void (*hw_perf_counter_disable)	(struct perf_counter *counter);
	void (*hw_perf_counter_read)	(struct perf_counter *counter);
 };

This will be useful to support assymetric hw details, and it will also
be useful to implement "software counters". (Counters that count kernel
managed sw events such as pagefaults, context-switches, wall-clock time
or task-local time.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 54b4ad0..718b635 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -56,7 +56,7 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
-int hw_perf_counter_init(struct perf_counter *counter)
+static int __hw_perf_counter_init(struct perf_counter *counter)
 {
 	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -135,7 +135,7 @@ u64 hw_perf_disable_all(void)
 EXPORT_SYMBOL_GPL(hw_perf_disable_all);
 
 static inline void
-__hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
+__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
 {
 	wrmsr(hwc->config_base + idx, hwc->config, 0);
 }
@@ -149,13 +149,13 @@ static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx)
 	wrmsr(hwc->counter_base + idx, hwc->next_count, 0);
 }
 
-static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
 {
 	wrmsr(hwc->config_base + idx,
 	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
-void hw_perf_counter_enable(struct perf_counter *counter)
+static void x86_perf_counter_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -170,12 +170,12 @@ void hw_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__hw_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
 
 	__hw_perf_counter_set_period(hwc, idx);
-	__hw_perf_counter_enable(hwc, idx);
+	__x86_perf_counter_enable(hwc, idx);
 }
 
 #ifdef CONFIG_X86_64
@@ -282,20 +282,20 @@ void perf_counter_print_debug(void)
 	local_irq_enable();
 }
 
-void hw_perf_counter_disable(struct perf_counter *counter)
+static void x86_perf_counter_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__hw_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
 	__hw_perf_save_counter(counter, hwc, idx);
 }
 
-void hw_perf_counter_read(struct perf_counter *counter)
+static void x86_perf_counter_read(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned long addr = hwc->counter_base + hwc->idx;
@@ -342,7 +342,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	__hw_perf_counter_set_period(hwc, idx);
 
 	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
-		__hw_perf_counter_enable(hwc, idx);
+		__x86_perf_counter_enable(hwc, idx);
 }
 
 static void
@@ -572,3 +572,20 @@ void __init init_hw_perf_counters(void)
 
 	perf_counters_initialized = true;
 }
+
+static struct hw_perf_counter_ops x86_perf_counter_ops = {
+	.hw_perf_counter_enable		= x86_perf_counter_enable,
+	.hw_perf_counter_disable	= x86_perf_counter_disable,
+	.hw_perf_counter_read		= x86_perf_counter_read,
+};
+
+struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter)
+{
+	int err;
+
+	err = __hw_perf_counter_init(counter);
+	if (err)
+		return NULL;
+
+	return &x86_perf_counter_ops;
+}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7af7d89..2738564 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -113,6 +113,17 @@ struct perf_data {
 	u8				data[PERF_DATA_BUFLEN];
 };
 
+struct perf_counter;
+
+/**
+ * struct hw_perf_counter_ops - performance counter hw ops
+ */
+struct hw_perf_counter_ops {
+	void (*hw_perf_counter_enable)	(struct perf_counter *counter);
+	void (*hw_perf_counter_disable)	(struct perf_counter *counter);
+	void (*hw_perf_counter_read)	(struct perf_counter *counter);
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -120,6 +131,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
+	struct hw_perf_counter_ops	*hw_ops;
 
 	int				active;
 #if BITS_PER_LONG == 64
@@ -185,6 +197,9 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
+extern struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter);
+
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 278209c..e6e41ca 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -37,18 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex);
 /*
  * Architecture provided APIs - weak aliases:
  */
-
-int __weak hw_perf_counter_init(struct perf_counter *counter)
+extern __weak struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
 {
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
-void __weak hw_perf_counter_enable(struct perf_counter *counter)	 { }
-void __weak hw_perf_counter_disable(struct perf_counter *counter)	 { }
-void __weak hw_perf_counter_read(struct perf_counter *counter)		 { }
-void __weak hw_perf_disable_all(void) { }
-void __weak hw_perf_enable_all(void) { }
-void __weak hw_perf_counter_setup(void) { }
+void __weak hw_perf_disable_all(void)	 { }
+void __weak hw_perf_enable_all(void)	 { }
+void __weak hw_perf_counter_setup(void)	 { }
 
 #if BITS_PER_LONG == 64
 
@@ -146,7 +143,7 @@ static void __perf_counter_remove_from_context(void *info)
 	spin_lock(&ctx->lock);
 
 	if (counter->active) {
-		hw_perf_counter_disable(counter);
+		counter->hw_ops->hw_perf_counter_disable(counter);
 		counter->active = 0;
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
@@ -257,7 +254,7 @@ static void __perf_install_in_context(void *info)
 	ctx->nr_counters++;
 
 	if (cpuctx->active_oncpu < perf_max_counters) {
-		hw_perf_counter_enable(counter);
+		counter->hw_ops->hw_perf_counter_enable(counter);
 		counter->active = 1;
 		counter->oncpu = cpu;
 		ctx->nr_active++;
@@ -333,7 +330,7 @@ counter_sched_out(struct perf_counter *counter,
 	if (!counter->active)
 		return;
 
-	hw_perf_counter_disable(counter);
+	counter->hw_ops->hw_perf_counter_disable(counter);
 	counter->active	=  0;
 	counter->oncpu	= -1;
 
@@ -392,7 +389,7 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	hw_perf_counter_enable(counter);
+	counter->hw_ops->hw_perf_counter_enable(counter);
 	counter->active = 1;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
@@ -509,7 +506,9 @@ void perf_counter_init_task(struct task_struct *task)
  */
 static void __hw_perf_counter_read(void *info)
 {
-	hw_perf_counter_read(info);
+	struct perf_counter *counter = info;
+
+	counter->hw_ops->hw_perf_counter_read(counter);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -816,8 +815,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
 		   struct perf_counter *group_leader)
 {
-	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+	struct hw_perf_counter_ops *hw_ops;
+	struct perf_counter *counter;
 
+	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 	if (!counter)
 		return NULL;
 
@@ -839,6 +840,14 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->hw_event		= *hw_event;
 	counter->wakeup_pending		= 0;
 	counter->group_leader		= group_leader;
+	counter->hw_ops			= NULL;
+
+	hw_ops = hw_perf_counter_init(counter);
+	if (!hw_ops) {
+		kfree(counter);
+		return NULL;
+	}
+	counter->hw_ops = hw_ops;
 
 	return counter;
 }
@@ -908,10 +917,6 @@ asmlinkage int sys_perf_counter_open(
 	if (!counter)
 		goto err_put_context;
 
-	ret = hw_perf_counter_init(counter);
-	if (ret)
-		goto err_free_put_context;
-
 	perf_install_in_context(ctx, counter, cpu);
 
 	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
@@ -927,8 +932,6 @@ err_remove_free_put_context:
 	mutex_lock(&counter->mutex);
 	perf_counter_remove_from_context(counter);
 	mutex_unlock(&counter->mutex);
-
-err_free_put_context:
 	kfree(counter);
 
 err_put_context:
-- 
cgit v0.10.2


From 5c92d12411dfe5f0f3d1b1c1e2f756245e6f7249 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 13:21:10 +0100
Subject: perf counters: implement PERF_COUNT_CPU_CLOCK

Impact: add new perf-counter type

The 'CPU clock' counter counts the amount of CPU clock time that is
elapsing, in nanoseconds. (regardless of how much of it the task is
spending on a CPU executing)

This counter type is a Linux kernel based abstraction, it is available
even if the hardware does not support native hardware performance counters.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 718b635..43c8e9a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -178,35 +178,6 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
 	__x86_perf_counter_enable(hwc, idx);
 }
 
-#ifdef CONFIG_X86_64
-static inline void atomic64_counter_set(struct perf_counter *counter, u64 val)
-{
-	atomic64_set(&counter->count, val);
-}
-
-static inline u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic64_read(&counter->count);
-}
-#else
-/*
- * Todo: add proper atomic64_t support to 32-bit x86:
- */
-static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64)
-{
-	u32 *val32 = (void *)&val64;
-
-	atomic_set(counter->count32 + 0, *(val32 + 0));
-	atomic_set(counter->count32 + 1, *(val32 + 1));
-}
-
-static inline u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic_read(counter->count32 + 0) |
-		(u64) atomic_read(counter->count32 + 1) << 32;
-}
-#endif
-
 static void __hw_perf_save_counter(struct perf_counter *counter,
 				   struct hw_perf_counter *hwc, int idx)
 {
@@ -309,7 +280,7 @@ static void x86_perf_counter_read(struct perf_counter *counter)
 	} while (offs != hwc->prev_count);
 
 	val32 = (s32) val;
-	val =  (s64)hwc->irq_period + (s64)val32;
+	val = (s64)hwc->irq_period + (s64)val32;
 	atomic64_counter_set(counter, hwc->prev_count + val);
 }
 
@@ -573,13 +544,14 @@ void __init init_hw_perf_counters(void)
 	perf_counters_initialized = true;
 }
 
-static struct hw_perf_counter_ops x86_perf_counter_ops = {
+static const struct hw_perf_counter_ops x86_perf_counter_ops = {
 	.hw_perf_counter_enable		= x86_perf_counter_enable,
 	.hw_perf_counter_disable	= x86_perf_counter_disable,
 	.hw_perf_counter_read		= x86_perf_counter_read,
 };
 
-struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter)
+const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
 {
 	int err;
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2738564..9a1713a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -131,7 +131,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
-	struct hw_perf_counter_ops	*hw_ops;
+	const struct hw_perf_counter_ops *hw_ops;
 
 	int				active;
 #if BITS_PER_LONG == 64
@@ -197,7 +197,7 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
-extern struct hw_perf_counter_ops *
+extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
@@ -208,6 +208,9 @@ extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
 extern void hw_perf_restore_ctrl(u64 ctrl);
 extern u64 hw_perf_disable_all(void);
+extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
+extern u64 atomic64_counter_read(struct perf_counter *counter);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -219,7 +222,7 @@ static inline void perf_counter_init_task(struct task_struct *task)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void hw_perf_restore_ctrl(u64 ctrl)			{ }
-static inline u64 hw_perf_disable_all(void)		{ return 0; }
+static inline u64 hw_perf_disable_all(void)		      { return 0; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e6e41ca..506286e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -37,15 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex);
 /*
  * Architecture provided APIs - weak aliases:
  */
-extern __weak struct hw_perf_counter_ops *
+extern __weak const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-void __weak hw_perf_disable_all(void)	 { }
-void __weak hw_perf_enable_all(void)	 { }
-void __weak hw_perf_counter_setup(void)	 { }
+u64 __weak hw_perf_disable_all(void)		{ return 0; }
+void __weak hw_perf_restore_ctrl(u64 ctrl)	{ }
+void __weak hw_perf_counter_setup(void)		{ }
 
 #if BITS_PER_LONG == 64
 
@@ -58,6 +58,16 @@ static inline u64 perf_counter_read_safe(struct perf_counter *counter)
 	return (u64) atomic64_read(&counter->count);
 }
 
+void atomic64_counter_set(struct perf_counter *counter, u64 val)
+{
+	atomic64_set(&counter->count, val);
+}
+
+u64 atomic64_counter_read(struct perf_counter *counter)
+{
+	return atomic64_read(&counter->count);
+}
+
 #else
 
 /*
@@ -79,6 +89,20 @@ static u64 perf_counter_read_safe(struct perf_counter *counter)
 	return cntl | ((u64) cnth) << 32;
 }
 
+void atomic64_counter_set(struct perf_counter *counter, u64 val64)
+{
+	u32 *val32 = (void *)&val64;
+
+	atomic_set(counter->count32 + 0, *(val32 + 0));
+	atomic_set(counter->count32 + 1, *(val32 + 1));
+}
+
+u64 atomic64_counter_read(struct perf_counter *counter)
+{
+	return atomic_read(counter->count32 + 0) |
+		(u64) atomic_read(counter->count32 + 1) << 32;
+}
+
 #endif
 
 static void
@@ -131,6 +155,7 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
+	u64 perf_flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -155,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	hw_perf_disable_all();
+	perf_flags = hw_perf_disable_all();
 	list_del_counter(counter, ctx);
-	hw_perf_enable_all();
+	hw_perf_restore_ctrl(perf_flags);
 
 	if (!ctx->task) {
 		/*
@@ -232,6 +257,7 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
 	int cpu = smp_processor_id();
+	u64 perf_flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -247,9 +273,9 @@ static void __perf_install_in_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	hw_perf_disable_all();
+	perf_flags = hw_perf_disable_all();
 	list_add_counter(counter, ctx);
-	hw_perf_enable_all();
+	hw_perf_restore_ctrl(perf_flags);
 
 	ctx->nr_counters++;
 
@@ -457,6 +483,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 {
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
+	u64 perf_flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
@@ -468,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
-	hw_perf_disable_all();
+	perf_flags = hw_perf_disable_all();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		list_del(&counter->list_entry);
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
-	hw_perf_enable_all();
+	hw_perf_restore_ctrl(perf_flags);
 
 	spin_unlock(&ctx->lock);
 
@@ -807,6 +834,42 @@ static const struct file_operations perf_fops = {
 	.poll			= perf_poll,
 };
 
+static void cpu_clock_perf_counter_enable(struct perf_counter *counter)
+{
+}
+
+static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
+{
+}
+
+static void cpu_clock_perf_counter_read(struct perf_counter *counter)
+{
+	int cpu = raw_smp_processor_id();
+
+	atomic64_counter_set(counter, cpu_clock(cpu));
+}
+
+static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+	.hw_perf_counter_enable		= cpu_clock_perf_counter_enable,
+	.hw_perf_counter_disable	= cpu_clock_perf_counter_disable,
+	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
+};
+
+static const struct hw_perf_counter_ops *
+sw_perf_counter_init(struct perf_counter *counter)
+{
+	const struct hw_perf_counter_ops *hw_ops = NULL;
+
+	switch (counter->hw_event.type) {
+	case PERF_COUNT_CPU_CLOCK:
+		hw_ops = &perf_ops_cpu_clock;
+		break;
+	default:
+		break;
+	}
+	return hw_ops;
+}
+
 /*
  * Allocate and initialize a counter structure
  */
@@ -815,7 +878,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
 		   struct perf_counter *group_leader)
 {
-	struct hw_perf_counter_ops *hw_ops;
+	const struct hw_perf_counter_ops *hw_ops;
 	struct perf_counter *counter;
 
 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
@@ -842,7 +905,13 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
 
-	hw_ops = hw_perf_counter_init(counter);
+	hw_ops = NULL;
+	if (!hw_event->raw && hw_event->type < 0)
+		hw_ops = sw_perf_counter_init(counter);
+	if (!hw_ops) {
+		hw_ops = hw_perf_counter_init(counter);
+	}
+
 	if (!hw_ops) {
 		kfree(counter);
 		return NULL;
@@ -912,7 +981,7 @@ asmlinkage int sys_perf_counter_open(
 			goto err_put_context;
 	}
 
-	ret = -ENOMEM;
+	ret = -EINVAL;
 	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
 	if (!counter)
 		goto err_put_context;
-- 
cgit v0.10.2


From 01b2838c4298c5e0d30b4993c195ac34dd9df61e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 13:45:51 +0100
Subject: perf counters: consolidate hw_perf save/restore APIs

Impact: cleanup

Rename them to better match up the usual IRQ disable/enable APIs:

 hw_perf_disable_all()  => hw_perf_save_disable()
 hw_perf_restore_ctrl() => hw_perf_restore()

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 43c8e9a..3e1dbeb 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -118,13 +118,13 @@ void hw_perf_enable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
-void hw_perf_restore_ctrl(u64 ctrl)
+void hw_perf_restore(u64 ctrl)
 {
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
 }
-EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl);
+EXPORT_SYMBOL_GPL(hw_perf_restore);
 
-u64 hw_perf_disable_all(void)
+u64 hw_perf_save_disable(void)
 {
 	u64 ctrl;
 
@@ -132,7 +132,7 @@ u64 hw_perf_disable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
 	return ctrl;
 }
-EXPORT_SYMBOL_GPL(hw_perf_disable_all);
+EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
 static inline void
 __x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index cca804e..a3e66a3 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -270,11 +270,11 @@ static atomic_t c3_cpu_count;
 /* Common C-state entry for C2, C3, .. */
 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 {
-	u64 pctrl;
+	u64 perf_flags;
 
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	pctrl = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	if (cstate->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cstate);
@@ -287,7 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	hw_perf_restore_ctrl(pctrl);
+	hw_perf_restore(perf_flags);
 	start_critical_timings();
 }
 #endif /* !CONFIG_CPU_IDLE */
@@ -1433,7 +1433,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	pctrl = hw_perf_disable_all();
+	pctrl = hw_perf_save_disable();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -1448,7 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	hw_perf_restore_ctrl(pctrl);
+	hw_perf_restore(pctrl);
 	start_critical_timings();
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9a1713a..68f6e3a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -67,7 +67,7 @@ enum perf_counter_record_type {
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	u64			type;
+	s64			type;
 
 	u64			irq_period;
 	u32			record_type;
@@ -206,8 +206,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *task);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
-extern void hw_perf_restore_ctrl(u64 ctrl);
-extern u64 hw_perf_disable_all(void);
+extern u64 hw_perf_save_disable(void);
+extern void hw_perf_restore(u64 ctrl);
 extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
 extern u64 atomic64_counter_read(struct perf_counter *counter);
 
@@ -221,8 +221,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *task)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
-static inline void hw_perf_restore_ctrl(u64 ctrl)			{ }
-static inline u64 hw_perf_disable_all(void)		      { return 0; }
+static inline void hw_perf_restore(u64 ctrl)			{ }
+static inline u64 hw_perf_save_disable(void)		      { return 0; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 506286e..0e93fea 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -43,8 +43,8 @@ hw_perf_counter_init(struct perf_counter *counter)
 	return ERR_PTR(-EINVAL);
 }
 
-u64 __weak hw_perf_disable_all(void)		{ return 0; }
-void __weak hw_perf_restore_ctrl(u64 ctrl)	{ }
+u64 __weak hw_perf_save_disable(void)		{ return 0; }
+void __weak hw_perf_restore(u64 ctrl)	{ }
 void __weak hw_perf_counter_setup(void)		{ }
 
 #if BITS_PER_LONG == 64
@@ -180,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	list_del_counter(counter, ctx);
-	hw_perf_restore_ctrl(perf_flags);
+	hw_perf_restore(perf_flags);
 
 	if (!ctx->task) {
 		/*
@@ -273,9 +273,9 @@ static void __perf_install_in_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	list_add_counter(counter, ctx);
-	hw_perf_restore_ctrl(perf_flags);
+	hw_perf_restore(perf_flags);
 
 	ctx->nr_counters++;
 
@@ -495,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
-	perf_flags = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		list_del(&counter->list_entry);
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
-	hw_perf_restore_ctrl(perf_flags);
+	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
 
-- 
cgit v0.10.2


From bae43c9945ebeef15e7952e317efb02393d3bfc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 14:03:20 +0100
Subject: perf counters: implement PERF_COUNT_TASK_CLOCK

Impact: add new perf-counter type

The 'task clock' counter counts the amount of time a task is executing,
in nanoseconds. It stops ticking when a task is scheduled out either due
to it blocking, sleeping or it being preempted.

This counter type is a Linux kernel based abstraction, it is available
even if the hardware does not support native hardware performance counters.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 68f6e3a..30c0ec8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -50,8 +50,11 @@ enum hw_event_types {
 	 */
 	PERF_COUNT_CPU_CLOCK		= -1,
 	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	/*
+	 * Future software events:
+	 */
+	/* PERF_COUNT_PAGE_FAULTS	= -3,
+	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0e93fea..a0fe847 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -855,6 +855,25 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
 };
 
+static void task_clock_perf_counter_enable(struct perf_counter *counter)
+{
+}
+
+static void task_clock_perf_counter_disable(struct perf_counter *counter)
+{
+}
+
+static void task_clock_perf_counter_read(struct perf_counter *counter)
+{
+	atomic64_counter_set(counter, current->se.sum_exec_runtime);
+}
+
+static const struct hw_perf_counter_ops perf_ops_task_clock = {
+	.hw_perf_counter_enable		= task_clock_perf_counter_enable,
+	.hw_perf_counter_disable	= task_clock_perf_counter_disable,
+	.hw_perf_counter_read		= task_clock_perf_counter_read,
+};
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -864,6 +883,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CPU_CLOCK:
 		hw_ops = &perf_ops_cpu_clock;
 		break;
+	case PERF_COUNT_TASK_CLOCK:
+		hw_ops = &perf_ops_task_clock;
+		break;
 	default:
 		break;
 	}
-- 
cgit v0.10.2


From 1d1c7ddbfab358445a542715551301b7fc363e28 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 14:59:31 +0100
Subject: perf counters: add prctl interface to disable/enable counters

Add a way for self-monitoring tasks to disable/enable counters summarily,
via a prctl:

	PR_TASK_PERF_COUNTERS_DISABLE		31
	PR_TASK_PERF_COUNTERS_ENABLE		32

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 30c0ec8..97d86c2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -213,6 +213,8 @@ extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
 extern u64 atomic64_counter_read(struct perf_counter *counter);
+extern int perf_counter_task_disable(void);
+extern int perf_counter_task_enable(void);
 
 #else
 static inline void
@@ -226,6 +228,8 @@ static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)			{ }
 static inline u64 hw_perf_save_disable(void)		      { return 0; }
+static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
+static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 48d887e..b00df4c 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,4 +85,7 @@
 #define PR_SET_TIMERSLACK 29
 #define PR_GET_TIMERSLACK 30
 
+#define PR_TASK_PERF_COUNTERS_DISABLE		31
+#define PR_TASK_PERF_COUNTERS_ENABLE		32
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a0fe847..4e679b9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -415,6 +415,9 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
+	if (counter->active == -1)
+		return;
+
 	counter->hw_ops->hw_perf_counter_enable(counter);
 	counter->active = 1;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
@@ -479,6 +482,79 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 	cpuctx->task_ctx = ctx;
 }
 
+int perf_counter_task_disable(void)
+{
+	struct task_struct *curr = current;
+	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter *counter;
+	u64 perf_flags;
+	int cpu;
+
+	if (likely(!ctx->nr_counters))
+		return 0;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+
+	perf_counter_task_sched_out(curr, cpu);
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * Disable all the counters:
+	 */
+	perf_flags = hw_perf_save_disable();
+
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		WARN_ON_ONCE(counter->active == 1);
+		counter->active = -1;
+	}
+	hw_perf_restore(perf_flags);
+
+	spin_unlock(&ctx->lock);
+
+	local_irq_enable();
+
+	return 0;
+}
+
+int perf_counter_task_enable(void)
+{
+	struct task_struct *curr = current;
+	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter *counter;
+	u64 perf_flags;
+	int cpu;
+
+	if (likely(!ctx->nr_counters))
+		return 0;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * Disable all the counters:
+	 */
+	perf_flags = hw_perf_save_disable();
+
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->active != -1)
+			continue;
+		counter->active = 0;
+	}
+	hw_perf_restore(perf_flags);
+
+	spin_unlock(&ctx->lock);
+
+	perf_counter_task_sched_in(curr, cpu);
+
+	local_irq_enable();
+
+	return 0;
+}
+
 void perf_counter_task_tick(struct task_struct *curr, int cpu)
 {
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
@@ -951,13 +1027,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd)
-
+asmlinkage int
+sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
+		      pid_t pid, int cpu, int group_fd)
 {
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8..0f66633 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,6 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
+#include <linux/perf_counter.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
@@ -1716,6 +1717,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 		case PR_SET_TSC:
 			error = SET_TSC_CTL(arg2);
 			break;
+		case PR_TASK_PERF_COUNTERS_DISABLE:
+			error = perf_counter_task_disable();
+			break;
+		case PR_TASK_PERF_COUNTERS_ENABLE:
+			error = perf_counter_task_enable();
+			break;
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
 			break;
-- 
cgit v0.10.2


From 6a930700c8b655a9e25e42fc4adc0b225ebbcefc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 15:17:03 +0100
Subject: perf counters: clean up state transitions

Impact: cleanup

Introduce a proper enum for the 3 states of a counter:

	PERF_COUNTER_STATE_OFF		= -1
	PERF_COUNTER_STATE_INACTIVE	=  0
	PERF_COUNTER_STATE_ACTIVE	=  1

and rename counter->active to counter->state and propagate the
changes everywhere.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3e1dbeb..4854cca 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -332,7 +332,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 	 * Then store sibling timestamps (if any):
 	 */
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		if (!counter->active) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
 			/*
 			 * When counter was not in the overflow mask, we have to
 			 * read it from hardware. We read it as well, when it
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 97d86c2..8cb095f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -128,6 +128,15 @@ struct hw_perf_counter_ops {
 };
 
 /**
+ * enum perf_counter_active_state - the states of a counter
+ */
+enum perf_counter_active_state {
+	PERF_COUNTER_STATE_OFF		= -1,
+	PERF_COUNTER_STATE_INACTIVE	=  0,
+	PERF_COUNTER_STATE_ACTIVE	=  1,
+};
+
+/**
  * struct perf_counter - performance counter kernel representation:
  */
 struct perf_counter {
@@ -136,7 +145,7 @@ struct perf_counter {
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
 
-	int				active;
+	enum perf_counter_active_state	state;
 #if BITS_PER_LONG == 64
 	atomic64_t			count;
 #else
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4e679b9..559130b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -167,9 +167,9 @@ static void __perf_counter_remove_from_context(void *info)
 
 	spin_lock(&ctx->lock);
 
-	if (counter->active) {
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		counter->hw_ops->hw_perf_counter_disable(counter);
-		counter->active = 0;
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
 		counter->task = NULL;
@@ -281,7 +281,7 @@ static void __perf_install_in_context(void *info)
 
 	if (cpuctx->active_oncpu < perf_max_counters) {
 		counter->hw_ops->hw_perf_counter_enable(counter);
-		counter->active = 1;
+		counter->state = PERF_COUNTER_STATE_ACTIVE;
 		counter->oncpu = cpu;
 		ctx->nr_active++;
 		cpuctx->active_oncpu++;
@@ -328,7 +328,6 @@ retry:
 
 	spin_lock_irq(&ctx->lock);
 	/*
-	 * If the context is active and the counter has not been added
 	 * we need to retry the smp call.
 	 */
 	if (ctx->nr_active && list_empty(&counter->list_entry)) {
@@ -353,12 +352,12 @@ counter_sched_out(struct perf_counter *counter,
 		  struct perf_cpu_context *cpuctx,
 		  struct perf_counter_context *ctx)
 {
-	if (!counter->active)
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return;
 
 	counter->hw_ops->hw_perf_counter_disable(counter);
-	counter->active	=  0;
-	counter->oncpu	= -1;
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->oncpu = -1;
 
 	cpuctx->active_oncpu--;
 	ctx->nr_active--;
@@ -415,11 +414,11 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	if (counter->active == -1)
+	if (counter->state == PERF_COUNTER_STATE_OFF)
 		return;
 
 	counter->hw_ops->hw_perf_counter_enable(counter);
-	counter->active = 1;
+	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
 	cpuctx->active_oncpu++;
@@ -506,8 +505,8 @@ int perf_counter_task_disable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		WARN_ON_ONCE(counter->active == 1);
-		counter->active = -1;
+		WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE);
+		counter->state = PERF_COUNTER_STATE_OFF;
 	}
 	hw_perf_restore(perf_flags);
 
@@ -540,9 +539,9 @@ int perf_counter_task_enable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->active != -1)
+		if (counter->state != PERF_COUNTER_STATE_OFF)
 			continue;
-		counter->active = 0;
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
 	}
 	hw_perf_restore(perf_flags);
 
@@ -620,7 +619,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	 * If counter is enabled and currently active on a CPU, update the
 	 * value in the counter structure:
 	 */
-	if (counter->active) {
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
 					 __hw_perf_counter_read, counter, 1);
 	}
@@ -673,7 +672,7 @@ static struct perf_data *perf_switch_irq_data(struct perf_counter *counter)
 
 retry:
 	spin_lock_irq(&ctx->lock);
-	if (!counter->active) {
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
 		counter->irqdata = counter->usrdata;
 		counter->usrdata = oldirqdata;
 		spin_unlock_irq(&ctx->lock);
-- 
cgit v0.10.2


From 447557ac7ce120306b4a31d6003faef39cb1bf14 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 20:40:18 +0100
Subject: perf counters: update docs

Impact: update docs

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt
index 19033a0..fddd321 100644
--- a/Documentation/perf-counters.txt
+++ b/Documentation/perf-counters.txt
@@ -10,8 +10,8 @@ trigger interrupts when a threshold number of events have passed - and can
 thus be used to profile the code that runs on that CPU.
 
 The Linux Performance Counter subsystem provides an abstraction of these
-hardware capabilities. It provides per task and per CPU counters, and
-it provides event capabilities on top of those.
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those.
 
 Performance counters are accessed via special file descriptors.
 There's one file descriptor per virtual counter used.
@@ -19,12 +19,8 @@ There's one file descriptor per virtual counter used.
 The special file descriptor is opened via the perf_counter_open()
 system call:
 
- int
- perf_counter_open(u32 hw_event_type,
-                   u32 hw_event_period,
-                   u32 record_type,
-                   pid_t pid,
-                   int cpu);
+   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+			     pid_t pid, int cpu, int group_fd);
 
 The syscall returns the new fd. The fd can be used via the normal
 VFS system calls: read() can be used to read the counter, fcntl()
@@ -33,39 +29,78 @@ can be used to set the blocking mode, etc.
 Multiple counters can be kept open at a time, and the counters
 can be poll()ed.
 
-When creating a new counter fd, 'hw_event_type' is one of:
-
- enum hw_event_types {
-	PERF_COUNT_CYCLES,
-	PERF_COUNT_INSTRUCTIONS,
-	PERF_COUNT_CACHE_REFERENCES,
-	PERF_COUNT_CACHE_MISSES,
-	PERF_COUNT_BRANCH_INSTRUCTIONS,
-	PERF_COUNT_BRANCH_MISSES,
- };
+When creating a new counter fd, 'perf_counter_hw_event' is:
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	s64			type;
+
+	u64			irq_period;
+	u32			record_type;
+
+	u32			disabled     :  1, /* off by default */
+				nmi	     :  1, /* NMI sampling   */
+				raw	     :  1, /* raw event type */
+				__reserved_1 : 29;
+
+	u64			__reserved_2;
+};
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	/*
+	 * Future software events:
+	 */
+	/* PERF_COUNT_PAGE_FAULTS	= -3,
+	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
+};
 
 These are standardized types of events that work uniformly on all CPUs
 that implements Performance Counters support under Linux. If a CPU is
 not able to count branch-misses, then the system call will return
 -EINVAL.
 
-[ Note: more hw_event_types are supported as well, but they are CPU
-  specific and are enumerated via /sys on a per CPU basis. Raw hw event
-  types can be passed in as negative numbers. For example, to count
-  "External bus cycles while bus lock signal asserted" events on Intel
-  Core CPUs, pass in a -0x4064 event type value. ]
-
-The parameter 'hw_event_period' is the number of events before waking up
-a read() that is blocked on a counter fd. Zero value means a non-blocking
-counter.
+More hw_event_types are supported as well, but they are CPU
+specific and are enumerated via /sys on a per CPU basis. Raw hw event
+types can be passed in under hw_event.type if hw_event.raw is 1.
+For example, to count "External bus cycles while bus lock signal asserted"
+events on Intel Core CPUs, pass in a 0x4064 event type value and set
+hw_event.raw to 1.
 
 'record_type' is the type of data that a read() will provide for the
 counter, and it can be one of:
 
-  enum perf_record_type {
-	PERF_RECORD_SIMPLE,
-	PERF_RECORD_IRQ,
-  };
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
+};
 
 a "simple" counter is one that counts hardware events and allows
 them to be read out into a u64 count value. (read() returns 8 on
@@ -76,6 +111,10 @@ the IP of the interrupted context. In this case read() will return
 the 8-byte counter value, plus the Instruction Pointer address of the
 interrupted context.
 
+The parameter 'hw_event_period' is the number of events before waking up
+a read() that is blocked on a counter fd. Zero value means a non-blocking
+counter.
+
 The 'pid' parameter allows the counter to be specific to a task:
 
  pid == 0: if the pid parameter is zero, the counter is attached to the
@@ -92,7 +131,7 @@ CPU:
  cpu >= 0: the counter is restricted to a specific CPU
  cpu == -1: the counter counts on all CPUs
 
-Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
 
 A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
 events of that task and 'follows' that task to whatever CPU the task
@@ -102,3 +141,7 @@ their own tasks.
 A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
 all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
 
+Group counters are created by passing in a group_fd of another counter.
+Groups are scheduled at once and can be used with PERF_RECORD_GROUP
+to record multi-dimensional timestamps.
+
-- 
cgit v0.10.2


From 9b194e831fb2c322ed81a373e49620f34edc2778 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 20:22:35 +0100
Subject: x86: implement atomic64_t on 32-bit

Impact: new API

Implement the atomic64_t APIs on 32-bit as well. Will be used by
the performance counters code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index ad5b9f6..9927e01 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -255,5 +255,223 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+/* An 64bit atomic type */
+
+typedef struct {
+	unsigned long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val)	{ (val) }
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+#define __atomic64_read(ptr)		((ptr)->counter)
+
+static inline unsigned long long
+cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
+{
+	asm volatile(
+
+		LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
+
+		     :		"=A" (old)
+
+		     : [ptr]	"D" (ptr),
+				"A" (old),
+				"b" (ll_low(new)),
+				"c" (ll_high(new))
+
+		     : "memory");
+
+	return old;
+}
+
+static inline unsigned long long
+atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
+		 unsigned long long new_val)
+{
+	return cmpxchg8b(&ptr->counter, old_val, new_val);
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ * @new_val:  value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
+{
+	unsigned long long old_val;
+
+	do {
+		old_val = atomic_read(ptr);
+	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+}
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ *
+ * Atomically reads the value of @ptr and returns it.
+ */
+static inline unsigned long long atomic64_read(atomic64_t *ptr)
+{
+	unsigned long long curr_val;
+
+	do {
+		curr_val = __atomic64_read(ptr);
+	} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
+
+	return curr_val;
+}
+
+/**
+ * atomic64_add_return - add and return
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns @delta + *@ptr
+ */
+static inline unsigned long long
+atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
+{
+	unsigned long long old_val, new_val;
+
+	do {
+		old_val = atomic_read(ptr);
+		new_val = old_val + delta;
+
+	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+	return new_val;
+}
+
+static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
+{
+	return atomic64_add_return(-delta, ptr);
+}
+
+static inline long atomic64_inc_return(atomic64_t *ptr)
+{
+	return atomic64_add_return(1, ptr);
+}
+
+static inline long atomic64_dec_return(atomic64_t *ptr)
+{
+	return atomic64_sub_return(1, ptr);
+}
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr.
+ */
+static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
+{
+	atomic64_add_return(delta, ptr);
+}
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @delta: integer value to subtract
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr.
+ */
+static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
+{
+	atomic64_add(-delta, ptr);
+}
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @delta: integer value to subtract
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int
+atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
+{
+	unsigned long long old_val = atomic64_sub_return(delta, ptr);
+
+	return old_val == 0;
+}
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1.
+ */
+static inline void atomic64_inc(atomic64_t *ptr)
+{
+	atomic64_add(1, ptr);
+}
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1.
+ */
+static inline void atomic64_dec(atomic64_t *ptr)
+{
+	atomic64_sub(1, ptr);
+}
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic64_dec_and_test(atomic64_t *ptr)
+{
+	return atomic64_sub_and_test(1, ptr);
+}
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic64_inc_and_test(atomic64_t *ptr)
+{
+	return atomic64_sub_and_test(-1, ptr);
+}
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @delta: integer value to add
+ * @ptr:   pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int
+atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
+{
+	long long old_val = atomic64_add_return(delta, ptr);
+
+	return old_val < 0;
+}
+
 #include <asm-generic/atomic.h>
 #endif /* _ASM_X86_ATOMIC_32_H */
-- 
cgit v0.10.2


From ee06094f8279e1312fc0a31591320cc7b6f0ab1e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 13 Dec 2008 09:00:03 +0100
Subject: perfcounters: restructure x86 counter math

Impact: restructure code

Change counter math from absolute values to clear delta logic.

We try to extract elapsed deltas from the raw hw counter - and put
that into the generic counter.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f2fdc18..fe94490 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -643,7 +643,7 @@ config X86_UP_IOAPIC
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_COUNTERS if (!M386 && !M486)
 
 config X86_IO_APIC
 	def_bool y
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b903f8d..5afae13 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -54,6 +54,48 @@ const int intel_perfmon_event_map[] =
 const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 
 /*
+ * Propagate counter elapsed time into the generic counter.
+ * Can only be executed on the CPU where the counter is active.
+ * Returns the delta events processed.
+ */
+static void
+x86_perf_counter_update(struct perf_counter *counter,
+			struct hw_perf_counter *hwc, int idx)
+{
+	u64 prev_raw_count, new_raw_count, delta;
+
+	WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE);
+	/*
+	 * Careful: an NMI might modify the previous counter value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic counter atomically:
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	rdmsrl(hwc->counter_base + idx, new_raw_count);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (counter-)time and add that to the generic counter.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count, so we do that by clipping the delta to 32 bits:
+	 */
+	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
+	WARN_ON_ONCE((int)delta < 0);
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+}
+
+/*
  * Setup the hardware configuration for a given hw_event_type
  */
 static int __hw_perf_counter_init(struct perf_counter *counter)
@@ -90,10 +132,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * so we install an artificial 1<<31 period regardless of
 	 * the generic counter period:
 	 */
-	if (!hwc->irq_period)
+	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
 		hwc->irq_period = 0x7FFFFFFF;
 
-	hwc->next_count	= -(s32)hwc->irq_period;
+	atomic64_set(&hwc->period_left, hwc->irq_period);
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -118,12 +160,6 @@ void hw_perf_enable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
-void hw_perf_restore(u64 ctrl)
-{
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
-}
-EXPORT_SYMBOL_GPL(hw_perf_restore);
-
 u64 hw_perf_save_disable(void)
 {
 	u64 ctrl;
@@ -134,27 +170,74 @@ u64 hw_perf_save_disable(void)
 }
 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
+void hw_perf_restore(u64 ctrl)
+{
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
+}
+EXPORT_SYMBOL_GPL(hw_perf_restore);
+
 static inline void
-__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
+__x86_perf_counter_disable(struct perf_counter *counter,
+			   struct hw_perf_counter *hwc, unsigned int idx)
 {
-	wrmsr(hwc->config_base + idx, hwc->config, 0);
+	int err;
+
+	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
+	WARN_ON_ONCE(err);
 }
 
-static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]);
+static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
 
-static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx)
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the counter disabled in hw:
+ */
+static void
+__hw_perf_counter_set_period(struct perf_counter *counter,
+			     struct hw_perf_counter *hwc, int idx)
 {
-	per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count;
+	s32 left = atomic64_read(&hwc->period_left);
+	s32 period = hwc->irq_period;
+
+	WARN_ON_ONCE(period <= 0);
+
+	/*
+	 * If we are way outside a reasoable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+	}
 
-	wrmsr(hwc->counter_base + idx, hwc->next_count, 0);
+	WARN_ON_ONCE(left <= 0);
+
+	per_cpu(prev_left[idx], smp_processor_id()) = left;
+
+	/*
+	 * The hw counter starts counting from this counter offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	atomic64_set(&hwc->prev_count, (u64)(s64)-left);
+
+	wrmsr(hwc->counter_base + idx, -left, 0);
 }
 
-static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+static void
+__x86_perf_counter_enable(struct perf_counter *counter,
+			  struct hw_perf_counter *hwc, int idx)
 {
 	wrmsr(hwc->config_base + idx,
 	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in counter:
+ */
 static void x86_perf_counter_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -170,55 +253,17 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__x86_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(counter, hwc, idx);
 
 	cpuc->counters[idx] = counter;
 
-	__hw_perf_counter_set_period(hwc, idx);
-	__x86_perf_counter_enable(hwc, idx);
-}
-
-static void __hw_perf_save_counter(struct perf_counter *counter,
-				   struct hw_perf_counter *hwc, int idx)
-{
-	s64 raw = -1;
-	s64 delta;
-
-	/*
-	 * Get the raw hw counter value:
-	 */
-	rdmsrl(hwc->counter_base + idx, raw);
-
-	/*
-	 * Rebase it to zero (it started counting at -irq_period),
-	 * to see the delta since ->prev_count:
-	 */
-	delta = (s64)hwc->irq_period + (s64)(s32)raw;
-
-	atomic64_counter_set(counter, hwc->prev_count + delta);
-
-	/*
-	 * Adjust the ->prev_count offset - if we went beyond
-	 * irq_period of units, then we got an IRQ and the counter
-	 * was set back to -irq_period:
-	 */
-	while (delta >= (s64)hwc->irq_period) {
-		hwc->prev_count += hwc->irq_period;
-		delta -= (s64)hwc->irq_period;
-	}
-
-	/*
-	 * Calculate the next raw counter value we'll write into
-	 * the counter at the next sched-in time:
-	 */
-	delta -= (s64)hwc->irq_period;
-
-	hwc->next_count = (s32)delta;
+	__hw_perf_counter_set_period(counter, hwc, idx);
+	__x86_perf_counter_enable(counter, hwc, idx);
 }
 
 void perf_counter_print_debug(void)
 {
-	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count;
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left;
 	int cpu, idx;
 
 	if (!nr_hw_counters)
@@ -241,14 +286,14 @@ void perf_counter_print_debug(void)
 		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 		rdmsrl(MSR_ARCH_PERFMON_PERFCTR0  + idx, pmc_count);
 
-		next_count = per_cpu(prev_next_count[idx], cpu);
+		prev_left = per_cpu(prev_left[idx], cpu);
 
 		printk(KERN_INFO "CPU#%d: PMC%d ctrl:  %016llx\n",
 			cpu, idx, pmc_ctrl);
 		printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
-		printk(KERN_INFO "CPU#%d: PMC%d next:  %016llx\n",
-			cpu, idx, next_count);
+		printk(KERN_INFO "CPU#%d: PMC%d left:  %016llx\n",
+			cpu, idx, prev_left);
 	}
 	local_irq_enable();
 }
@@ -259,29 +304,16 @@ static void x86_perf_counter_disable(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__x86_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
-	__hw_perf_save_counter(counter, hwc, idx);
-}
 
-static void x86_perf_counter_read(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	unsigned long addr = hwc->counter_base + hwc->idx;
-	s64 offs, val = -1LL;
-	s32 val32;
-
-	/* Careful: NMI might modify the counter offset */
-	do {
-		offs = hwc->prev_count;
-		rdmsrl(addr, val);
-	} while (offs != hwc->prev_count);
-
-	val32 = (s32) val;
-	val = (s64)hwc->irq_period + (s64)val32;
-	atomic64_counter_set(counter, hwc->prev_count + val);
+	/*
+	 * Drain the remaining delta count out of a counter
+	 * that we are disabling:
+	 */
+	x86_perf_counter_update(counter, hwc, idx);
 }
 
 static void perf_store_irq_data(struct perf_counter *counter, u64 data)
@@ -299,7 +331,8 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data)
 }
 
 /*
- * NMI-safe enable method:
+ * Save and restart an expired counter. Called by NMI contexts,
+ * so it has to be careful about preempting normal counter ops:
  */
 static void perf_save_and_restart(struct perf_counter *counter)
 {
@@ -309,45 +342,25 @@ static void perf_save_and_restart(struct perf_counter *counter)
 
 	rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 
-	__hw_perf_save_counter(counter, hwc, idx);
-	__hw_perf_counter_set_period(hwc, idx);
+	x86_perf_counter_update(counter, hwc, idx);
+	__hw_perf_counter_set_period(counter, hwc, idx);
 
 	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
-		__x86_perf_counter_enable(hwc, idx);
+		__x86_perf_counter_enable(counter, hwc, idx);
 }
 
 static void
 perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 {
 	struct perf_counter *counter, *group_leader = sibling->group_leader;
-	int bit;
-
-	/*
-	 * Store the counter's own timestamp first:
-	 */
-	perf_store_irq_data(sibling, sibling->hw_event.type);
-	perf_store_irq_data(sibling, atomic64_counter_read(sibling));
 
 	/*
-	 * Then store sibling timestamps (if any):
+	 * Store sibling timestamps (if any):
 	 */
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
-			/*
-			 * When counter was not in the overflow mask, we have to
-			 * read it from hardware. We read it as well, when it
-			 * has not been read yet and clear the bit in the
-			 * status mask.
-			 */
-			bit = counter->hw.idx;
-			if (!test_bit(bit, (unsigned long *) overflown) ||
-			    test_bit(bit, (unsigned long *) status)) {
-				clear_bit(bit, (unsigned long *) status);
-				perf_save_and_restart(counter);
-			}
-		}
+		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 		perf_store_irq_data(sibling, counter->hw_event.type);
-		perf_store_irq_data(sibling, atomic64_counter_read(counter));
+		perf_store_irq_data(sibling, atomic64_read(&counter->count));
 	}
 }
 
@@ -540,6 +553,11 @@ void __init init_hw_perf_counters(void)
 	perf_counters_initialized = true;
 }
 
+static void x86_perf_counter_read(struct perf_counter *counter)
+{
+	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+}
+
 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
 	.hw_perf_counter_enable		= x86_perf_counter_enable,
 	.hw_perf_counter_disable	= x86_perf_counter_disable,
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8cb095f..7246028 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -91,14 +91,16 @@ struct perf_counter_hw_event {
  * struct hw_perf_counter - performance counter hardware details:
  */
 struct hw_perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
 	u64				config;
 	unsigned long			config_base;
 	unsigned long			counter_base;
 	int				nmi;
 	unsigned int			idx;
-	u64				prev_count;
+	atomic64_t			prev_count;
 	u64				irq_period;
-	s32				next_count;
+	atomic64_t			period_left;
+#endif
 };
 
 /*
@@ -140,17 +142,15 @@ enum perf_counter_active_state {
  * struct perf_counter - performance counter kernel representation:
  */
 struct perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
 	struct list_head		list_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
 
 	enum perf_counter_active_state	state;
-#if BITS_PER_LONG == 64
 	atomic64_t			count;
-#else
-	atomic_t			count32[2];
-#endif
+
 	struct perf_counter_hw_event	hw_event;
 	struct hw_perf_counter		hw;
 
@@ -172,6 +172,7 @@ struct perf_counter {
 	struct perf_data		*irqdata;
 	struct perf_data		*usrdata;
 	struct perf_data		data[2];
+#endif
 };
 
 /**
@@ -220,8 +221,6 @@ extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
 extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
-extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
-extern u64 atomic64_counter_read(struct perf_counter *counter);
 extern int perf_counter_task_disable(void);
 extern int perf_counter_task_enable(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 559130b..416861c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -44,67 +44,9 @@ hw_perf_counter_init(struct perf_counter *counter)
 }
 
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
-void __weak hw_perf_restore(u64 ctrl)	{ }
+void __weak hw_perf_restore(u64 ctrl)		{ }
 void __weak hw_perf_counter_setup(void)		{ }
 
-#if BITS_PER_LONG == 64
-
-/*
- * Read the cached counter in counter safe against cross CPU / NMI
- * modifications. 64 bit version - no complications.
- */
-static inline u64 perf_counter_read_safe(struct perf_counter *counter)
-{
-	return (u64) atomic64_read(&counter->count);
-}
-
-void atomic64_counter_set(struct perf_counter *counter, u64 val)
-{
-	atomic64_set(&counter->count, val);
-}
-
-u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic64_read(&counter->count);
-}
-
-#else
-
-/*
- * Read the cached counter in counter safe against cross CPU / NMI
- * modifications. 32 bit version.
- */
-static u64 perf_counter_read_safe(struct perf_counter *counter)
-{
-	u32 cntl, cnth;
-
-	local_irq_disable();
-	do {
-		cnth = atomic_read(&counter->count32[1]);
-		cntl = atomic_read(&counter->count32[0]);
-	} while (cnth != atomic_read(&counter->count32[1]));
-
-	local_irq_enable();
-
-	return cntl | ((u64) cnth) << 32;
-}
-
-void atomic64_counter_set(struct perf_counter *counter, u64 val64)
-{
-	u32 *val32 = (void *)&val64;
-
-	atomic_set(counter->count32 + 0, *(val32 + 0));
-	atomic_set(counter->count32 + 1, *(val32 + 1));
-}
-
-u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic_read(counter->count32 + 0) |
-		(u64) atomic_read(counter->count32 + 1) << 32;
-}
-
-#endif
-
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -280,11 +222,11 @@ static void __perf_install_in_context(void *info)
 	ctx->nr_counters++;
 
 	if (cpuctx->active_oncpu < perf_max_counters) {
-		counter->hw_ops->hw_perf_counter_enable(counter);
 		counter->state = PERF_COUNTER_STATE_ACTIVE;
 		counter->oncpu = cpu;
 		ctx->nr_active++;
 		cpuctx->active_oncpu++;
+		counter->hw_ops->hw_perf_counter_enable(counter);
 	}
 
 	if (!ctx->task && cpuctx->max_pertask)
@@ -624,7 +566,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 					 __hw_perf_counter_read, counter, 1);
 	}
 
-	return perf_counter_read_safe(counter);
+	return atomic64_read(&counter->count);
 }
 
 /*
@@ -921,7 +863,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 {
 	int cpu = raw_smp_processor_id();
 
-	atomic64_counter_set(counter, cpu_clock(cpu));
+	atomic64_set(&counter->count, cpu_clock(cpu));
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
@@ -940,7 +882,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter)
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
-	atomic64_counter_set(counter, current->se.sum_exec_runtime);
+	atomic64_set(&counter->count, current->se.sum_exec_runtime);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-- 
cgit v0.10.2


From 9b51f66dcb09ac5eb6bc68fc111d5c7a1e0131d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 13:49:45 +0100
Subject: perfcounters: implement "counter inheritance"

Impact: implement new performance feature

Counter inheritance can be used to run performance counters in a workload,
transparently - and pipe back the counter results to the parent counter.

Inheritance for performance counters works the following way: when creating
a counter it can be marked with the .inherit=1 flag. Such counters are then
'inherited' by all child tasks (be they fork()-ed or clone()-ed). These
counters get inherited through exec() boundaries as well (except through
setuid boundaries).

The counter values get added back to the parent counter(s) when the child
task(s) exit - much like stime/utime statistics are gathered. So inherited
counters are ideal to gather summary statistics about an application's
behavior via shell commands, without having to modify that application.

The timec.c command utilizes counter inheritance:

  http://redhat.com/~mingo/perfcounters/timec.c

Sample output:

   $ ./timec -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null

   Performance counter stats for 'ls':

           163516953 instructions
                2295 cache-misses
             2855182 branch-misses

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7246028..e5d25bf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -75,10 +75,11 @@ struct perf_counter_hw_event {
 	u64			irq_period;
 	u32			record_type;
 
-	u32			disabled     :  1, /* off by default */
-				nmi	     :  1, /* NMI sampling   */
-				raw	     :  1, /* raw event type */
-				__reserved_1 : 29;
+	u32			disabled     :  1, /* off by default      */
+				nmi	     :  1, /* NMI sampling        */
+				raw	     :  1, /* raw event type      */
+				inherit	     :  1, /* children inherit it */
+				__reserved_1 : 28;
 
 	u64			__reserved_2;
 };
@@ -138,6 +139,8 @@ enum perf_counter_active_state {
 	PERF_COUNTER_STATE_ACTIVE	=  1,
 };
 
+struct file;
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -156,7 +159,10 @@ struct perf_counter {
 
 	struct perf_counter_context	*ctx;
 	struct task_struct		*task;
+	struct file			*filp;
 
+	unsigned int			nr_inherited;
+	struct perf_counter		*parent;
 	/*
 	 * Protect attach/detach:
 	 */
@@ -210,13 +216,16 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
+extern void
+perf_counter_show(struct perf_counter *counter, char *str, int trace);
 extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern void perf_counter_init_task(struct task_struct *task);
+extern void perf_counter_init_task(struct task_struct *child);
+extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
 extern u64 hw_perf_save_disable(void);
@@ -226,12 +235,15 @@ extern int perf_counter_task_enable(void);
 
 #else
 static inline void
+perf_counter_show(struct perf_counter *counter, char *str, int trace)   { }
+static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
 static inline void
 perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline void perf_counter_init_task(struct task_struct *task)	{ }
+static inline void perf_counter_init_task(struct task_struct *child)	{ }
+static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)			{ }
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7e..d336c90 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1093,11 +1093,12 @@ NORET_TYPE void do_exit(long code)
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 #endif
-#ifdef CONFIG_FUTEX
 	/*
-	 * This must happen late, after the PID is not
-	 * hashed anymore:
+	 * These must happen late, after the PID is not
+	 * hashed anymore, but still at a point that may sleep:
 	 */
+	perf_counter_exit_task(tsk);
+#ifdef CONFIG_FUTEX
 	if (unlikely(!list_empty(&tsk->pi_state_list)))
 		exit_pi_state_list(tsk);
 	if (unlikely(current->pi_state_cache))
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 416861c..f5e81dd 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -80,8 +80,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 		list_del_init(&sibling->list_entry);
 		list_add_tail(&sibling->list_entry, &ctx->counter_list);
-		WARN_ON_ONCE(!sibling->group_leader);
-		WARN_ON_ONCE(sibling->group_leader == sibling);
 		sibling->group_leader = sibling;
 	}
 }
@@ -97,6 +95,7 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long flags;
 	u64 perf_flags;
 
 	/*
@@ -107,7 +106,7 @@ static void __perf_counter_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		counter->hw_ops->hw_perf_counter_disable(counter);
@@ -136,7 +135,7 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
-	spin_unlock(&ctx->lock);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 
@@ -199,6 +198,7 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
 	int cpu = smp_processor_id();
+	unsigned long flags;
 	u64 perf_flags;
 
 	/*
@@ -209,7 +209,7 @@ static void __perf_install_in_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -232,7 +232,7 @@ static void __perf_install_in_context(void *info)
 	if (!ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
-	spin_unlock(&ctx->lock);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -446,10 +446,9 @@ int perf_counter_task_disable(void)
 	 */
 	perf_flags = hw_perf_save_disable();
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry)
 		counter->state = PERF_COUNTER_STATE_OFF;
-	}
+
 	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
@@ -526,26 +525,6 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 }
 
 /*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	spin_lock_init(&ctx->lock);
-	INIT_LIST_HEAD(&ctx->counter_list);
-	ctx->nr_counters	= 0;
-	ctx->task		= task;
-}
-/*
- * Initialize the perf_counter context in task_struct
- */
-void perf_counter_init_task(struct task_struct *task)
-{
-	__perf_counter_init_context(&task->perf_counter_ctx, task);
-}
-
-/*
  * Cross CPU call to read the hardware counter
  */
 static void __hw_perf_counter_read(void *info)
@@ -663,7 +642,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
 
-		WARN_ON_ONCE(ctx->task);
 		return ctx;
 	}
 
@@ -915,12 +893,13 @@ sw_perf_counter_init(struct perf_counter *counter)
 static struct perf_counter *
 perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
-		   struct perf_counter *group_leader)
+		   struct perf_counter *group_leader,
+		   gfp_t gfpflags)
 {
 	const struct hw_perf_counter_ops *hw_ops;
 	struct perf_counter *counter;
 
-	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+	counter = kzalloc(sizeof(*counter), gfpflags);
 	if (!counter)
 		return NULL;
 
@@ -947,9 +926,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	hw_ops = NULL;
 	if (!hw_event->raw && hw_event->type < 0)
 		hw_ops = sw_perf_counter_init(counter);
-	if (!hw_ops) {
+	if (!hw_ops)
 		hw_ops = hw_perf_counter_init(counter);
-	}
 
 	if (!hw_ops) {
 		kfree(counter);
@@ -975,8 +953,10 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
 	struct perf_counter_context *ctx;
+	struct file *counter_file = NULL;
 	struct file *group_file = NULL;
 	int fput_needed = 0;
+	int fput_needed2 = 0;
 	int ret;
 
 	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
@@ -1017,25 +997,29 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 	}
 
 	ret = -EINVAL;
-	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
+	counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
 	if (!counter)
 		goto err_put_context;
 
-	perf_install_in_context(ctx, counter, cpu);
-
 	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
 	if (ret < 0)
-		goto err_remove_free_put_context;
+		goto err_free_put_context;
+
+	counter_file = fget_light(ret, &fput_needed2);
+	if (!counter_file)
+		goto err_free_put_context;
+
+	counter->filp = counter_file;
+	perf_install_in_context(ctx, counter, cpu);
+
+	fput_light(counter_file, fput_needed2);
 
 out_fput:
 	fput_light(group_file, fput_needed);
 
 	return ret;
 
-err_remove_free_put_context:
-	mutex_lock(&counter->mutex);
-	perf_counter_remove_from_context(counter);
-	mutex_unlock(&counter->mutex);
+err_free_put_context:
 	kfree(counter);
 
 err_put_context:
@@ -1044,6 +1028,186 @@ err_put_context:
 	goto out_fput;
 }
 
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	ctx->task = task;
+}
+
+/*
+ * inherit a counter from parent task to child task:
+ */
+static int
+inherit_counter(struct perf_counter *parent_counter,
+	      struct task_struct *parent,
+	      struct perf_counter_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *child_counter;
+
+	child_counter = perf_counter_alloc(&parent_counter->hw_event,
+					    parent_counter->cpu, NULL,
+					    GFP_ATOMIC);
+	if (!child_counter)
+		return -ENOMEM;
+
+	/*
+	 * Link it up in the child's context:
+	 */
+	child_counter->ctx = child_ctx;
+	child_counter->task = child;
+	list_add_counter(child_counter, child_ctx);
+	child_ctx->nr_counters++;
+
+	child_counter->parent = parent_counter;
+	parent_counter->nr_inherited++;
+	/*
+	 * inherit into child's child as well:
+	 */
+	child_counter->hw_event.inherit = 1;
+
+	/*
+	 * Get a reference to the parent filp - we will fput it
+	 * when the child counter exits. This is safe to do because
+	 * we are in the parent and we know that the filp still
+	 * exists and has a nonzero count:
+	 */
+	atomic_long_inc(&parent_counter->filp->f_count);
+
+	return 0;
+}
+
+static void
+__perf_counter_exit_task(struct task_struct *child,
+			 struct perf_counter *child_counter,
+			 struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *parent_counter;
+	u64 parent_val, child_val;
+	u64 perf_flags;
+
+	/*
+	 * Disable and unlink this counter.
+	 *
+	 * Be careful about zapping the list - IRQ/NMI context
+	 * could still be processing it:
+	 */
+	local_irq_disable();
+	perf_flags = hw_perf_save_disable();
+
+	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE)
+		child_counter->hw_ops->hw_perf_counter_disable(child_counter);
+	list_del_init(&child_counter->list_entry);
+
+	hw_perf_restore(perf_flags);
+	local_irq_enable();
+
+	parent_counter = child_counter->parent;
+	/*
+	 * It can happen that parent exits first, and has counters
+	 * that are still around due to the child reference. These
+	 * counters need to be zapped - but otherwise linger.
+	 */
+	if (!parent_counter)
+		return;
+
+	parent_val = atomic64_read(&parent_counter->count);
+	child_val = atomic64_read(&child_counter->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_counter->count);
+
+	fput(parent_counter->filp);
+
+	kfree(child_counter);
+}
+
+/*
+ * When a child task exist, feed back counter values to parent counters.
+ *
+ * Note: we are running in child context, but the PID is not hashed
+ * anymore so new counters will not be added.
+ */
+void perf_counter_exit_task(struct task_struct *child)
+{
+	struct perf_counter *child_counter, *tmp;
+	struct perf_counter_context *child_ctx;
+
+	child_ctx = &child->perf_counter_ctx;
+
+	if (likely(!child_ctx->nr_counters))
+		return;
+
+	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
+				 list_entry)
+		__perf_counter_exit_task(child, child_counter, child_ctx);
+}
+
+/*
+ * Initialize the perf_counter context in task_struct
+ */
+void perf_counter_init_task(struct task_struct *child)
+{
+	struct perf_counter_context *child_ctx, *parent_ctx;
+	struct perf_counter *counter, *parent_counter;
+	struct task_struct *parent = current;
+	unsigned long flags;
+
+	child_ctx  =  &child->perf_counter_ctx;
+	parent_ctx = &parent->perf_counter_ctx;
+
+	__perf_counter_init_context(child_ctx, child);
+
+	/*
+	 * This is executed from the parent task context, so inherit
+	 * counters that have been marked for cloning:
+	 */
+
+	if (likely(!parent_ctx->nr_counters))
+		return;
+
+	/*
+	 * Lock the parent list. No need to lock the child - not PID
+	 * hashed yet and not running, so nobody can access it.
+	 */
+	spin_lock_irqsave(&parent_ctx->lock, flags);
+
+	/*
+	 * We dont have to disable NMIs - we are only looking at
+	 * the list, not manipulating it:
+	 */
+	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
+		if (!counter->hw_event.inherit || counter->group_leader != counter)
+			continue;
+
+		/*
+		 * Instead of creating recursive hierarchies of counters,
+		 * we link inheritd counters back to the original parent,
+		 * which has a filp for sure, which we use as the reference
+		 * count:
+		 */
+		parent_counter = counter;
+		if (counter->parent)
+			parent_counter = counter->parent;
+
+		if (inherit_counter(parent_counter, parent,
+				  parent_ctx, child, child_ctx))
+			break;
+	}
+
+	spin_unlock_irqrestore(&parent_ctx->lock, flags);
+}
+
 static void __cpuinit perf_counter_init_cpu(int cpu)
 {
 	struct perf_cpu_context *cpuctx;
-- 
cgit v0.10.2


From 8cb391e8786c8072367f0aeb90551903fef074ba Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 12:22:31 +0100
Subject: perfcounters: fix task clock counter

Impact: bugfix

Update the task clock counter to the new math.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f5e81dd..1f81cde 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -850,17 +850,36 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
 };
 
-static void task_clock_perf_counter_enable(struct perf_counter *counter)
+static void task_clock_perf_counter_update(struct perf_counter *counter)
 {
+	u64 prev, now;
+	s64 delta;
+
+	prev = atomic64_read(&counter->hw.prev_count);
+	now = current->se.sum_exec_runtime;
+
+	atomic64_set(&counter->hw.prev_count, now);
+
+	delta = now - prev;
+	if (WARN_ON_ONCE(delta < 0))
+		delta = 0;
+
+	atomic64_add(delta, &counter->count);
 }
 
-static void task_clock_perf_counter_disable(struct perf_counter *counter)
+static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
+	task_clock_perf_counter_update(counter);
 }
 
-static void task_clock_perf_counter_read(struct perf_counter *counter)
+static void task_clock_perf_counter_enable(struct perf_counter *counter)
+{
+	atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime);
+}
+
+static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->count, current->se.sum_exec_runtime);
+	task_clock_perf_counter_update(counter);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-- 
cgit v0.10.2


From 5d6a27d8a096868ae313f71f563b06074a7e34fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 12:28:33 +0100
Subject: perfcounters: add context switch counter

Impact: add new feature, new sw counter

Add a counter that counts the number of context-switches a task
is doing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e5d25bf..d2a1656 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -53,8 +53,8 @@ enum hw_event_types {
 	/*
 	 * Future software events:
 	 */
-	/* PERF_COUNT_PAGE_FAULTS	= -3,
-	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1f81cde..0928709 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -888,6 +888,54 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.hw_perf_counter_read		= task_clock_perf_counter_read,
 };
 
+static u64 get_context_switches(void)
+{
+	struct task_struct *curr = current;
+
+	return curr->nvcsw + curr->nivcsw;
+}
+
+static void context_switches_perf_counter_update(struct perf_counter *counter)
+{
+	u64 prev, now;
+	s64 delta;
+
+	prev = atomic64_read(&counter->hw.prev_count);
+	now = get_context_switches();
+
+	atomic64_set(&counter->hw.prev_count, now);
+
+	delta = now - prev;
+	if (WARN_ON_ONCE(delta < 0))
+		delta = 0;
+
+	atomic64_add(delta, &counter->count);
+}
+
+static void context_switches_perf_counter_read(struct perf_counter *counter)
+{
+	context_switches_perf_counter_update(counter);
+}
+
+static void context_switches_perf_counter_enable(struct perf_counter *counter)
+{
+	/*
+	 * ->nvcsw + curr->nivcsw is a per-task value already,
+	 * so we dont have to clear it on switch-in.
+	 */
+}
+
+static void context_switches_perf_counter_disable(struct perf_counter *counter)
+{
+	context_switches_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_context_switches = {
+	.hw_perf_counter_enable		= context_switches_perf_counter_enable,
+	.hw_perf_counter_disable	= context_switches_perf_counter_disable,
+	.hw_perf_counter_read		= context_switches_perf_counter_read,
+};
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -900,6 +948,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_TASK_CLOCK:
 		hw_ops = &perf_ops_task_clock;
 		break;
+	case PERF_COUNT_CONTEXT_SWITCHES:
+		hw_ops = &perf_ops_context_switches;
+		break;
 	default:
 		break;
 	}
-- 
cgit v0.10.2


From 6c594c21fcb02c662f11c97be4d7d2b73060a205 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 12:34:15 +0100
Subject: perfcounters: add task migrations counter

Impact: add new feature, new sw counter

Add a counter that counts the number of cross-CPU migrations a
task is suffering.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d2a1656..f30486f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -42,6 +42,8 @@ enum hw_event_types {
 	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
 	PERF_COUNT_BRANCH_MISSES	=  5,
 
+	PERF_HW_EVENTS_MAX		=  6,
+
 	/*
 	 * Special "software" counters provided by the kernel, even if
 	 * the hardware does not support performance counters. These
@@ -50,11 +52,11 @@ enum hw_event_types {
 	 */
 	PERF_COUNT_CPU_CLOCK		= -1,
 	PERF_COUNT_TASK_CLOCK		= -2,
-	/*
-	 * Future software events:
-	 */
 	PERF_COUNT_PAGE_FAULTS		= -3,
 	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	PERF_COUNT_CPU_MIGRATIONS	= -5,
+
+	PERF_SW_EVENTS_MIN		= -6,
 };
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4c53027..2e15be8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1014,6 +1014,8 @@ struct sched_entity {
 	u64			last_wakeup;
 	u64			avg_overlap;
 
+	u64			nr_migrations;
+
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
@@ -1029,7 +1031,6 @@ struct sched_entity {
 	u64			exec_max;
 	u64			slice_max;
 
-	u64			nr_migrations;
 	u64			nr_migrations_cold;
 	u64			nr_failed_migrations_affine;
 	u64			nr_failed_migrations_running;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0928709..fb11e35 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -936,6 +936,52 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
 	.hw_perf_counter_read		= context_switches_perf_counter_read,
 };
 
+static inline u64 get_cpu_migrations(void)
+{
+	return current->se.nr_migrations;
+}
+
+static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
+{
+	u64 prev, now;
+	s64 delta;
+
+	prev = atomic64_read(&counter->hw.prev_count);
+	now = get_cpu_migrations();
+
+	atomic64_set(&counter->hw.prev_count, now);
+
+	delta = now - prev;
+	if (WARN_ON_ONCE(delta < 0))
+		delta = 0;
+
+	atomic64_add(delta, &counter->count);
+}
+
+static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
+{
+	cpu_migrations_perf_counter_update(counter);
+}
+
+static void cpu_migrations_perf_counter_enable(struct perf_counter *counter)
+{
+	/*
+	 * se.nr_migrations is a per-task value already,
+	 * so we dont have to clear it on switch-in.
+	 */
+}
+
+static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
+{
+	cpu_migrations_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+	.hw_perf_counter_enable		= cpu_migrations_perf_counter_enable,
+	.hw_perf_counter_disable	= cpu_migrations_perf_counter_disable,
+	.hw_perf_counter_read		= cpu_migrations_perf_counter_read,
+};
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -951,6 +997,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CONTEXT_SWITCHES:
 		hw_ops = &perf_ops_context_switches;
 		break;
+	case PERF_COUNT_CPU_MIGRATIONS:
+		hw_ops = &perf_ops_cpu_migrations;
+		break;
 	default:
 		break;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 5c3f410..382cfdb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1852,12 +1852,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		p->se.sleep_start -= clock_offset;
 	if (p->se.block_start)
 		p->se.block_start -= clock_offset;
+#endif
 	if (old_cpu != new_cpu) {
-		schedstat_inc(p, se.nr_migrations);
+		p->se.nr_migrations++;
+#ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
-	}
 #endif
+	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
 					 new_cfsrq->min_vruntime;
 
@@ -2375,6 +2377,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
+	p->se.nr_migrations		= 0;
 	p->se.last_wakeup		= 0;
 	p->se.avg_overlap		= 0;
 
-- 
cgit v0.10.2


From e06c61a879910869aa5bf3f8f634abfee1a7bebc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 14:44:31 +0100
Subject: perfcounters: add nr-of-faults counter

Impact: add new feature, new sw counter

Add a counter that counts the number of pagefaults a task
is experiencing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fb11e35..59c52f9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -888,6 +888,54 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.hw_perf_counter_read		= task_clock_perf_counter_read,
 };
 
+static u64 get_page_faults(void)
+{
+	struct task_struct *curr = current;
+
+	return curr->maj_flt + curr->min_flt;
+}
+
+static void page_faults_perf_counter_update(struct perf_counter *counter)
+{
+	u64 prev, now;
+	s64 delta;
+
+	prev = atomic64_read(&counter->hw.prev_count);
+	now = get_page_faults();
+
+	atomic64_set(&counter->hw.prev_count, now);
+
+	delta = now - prev;
+	if (WARN_ON_ONCE(delta < 0))
+		delta = 0;
+
+	atomic64_add(delta, &counter->count);
+}
+
+static void page_faults_perf_counter_read(struct perf_counter *counter)
+{
+	page_faults_perf_counter_update(counter);
+}
+
+static void page_faults_perf_counter_enable(struct perf_counter *counter)
+{
+	/*
+	 * page-faults is a per-task value already,
+	 * so we dont have to clear it on switch-in.
+	 */
+}
+
+static void page_faults_perf_counter_disable(struct perf_counter *counter)
+{
+	page_faults_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_page_faults = {
+	.hw_perf_counter_enable		= page_faults_perf_counter_enable,
+	.hw_perf_counter_disable	= page_faults_perf_counter_disable,
+	.hw_perf_counter_read		= page_faults_perf_counter_read,
+};
+
 static u64 get_context_switches(void)
 {
 	struct task_struct *curr = current;
@@ -994,6 +1042,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_TASK_CLOCK:
 		hw_ops = &perf_ops_task_clock;
 		break;
+	case PERF_COUNT_PAGE_FAULTS:
+		hw_ops = &perf_ops_page_faults;
+		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
 		hw_ops = &perf_ops_context_switches;
 		break;
-- 
cgit v0.10.2


From 2b9ff0db19b5e2c77000b7201525f9c3d6e8328d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 18:36:30 +0100
Subject: perfcounters: fix non-intel-perfmon CPUs

Do not write MSR_CORE_PERF_GLOBAL_CTRL on CPUs where it does not exist.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5afae13..6d30f60 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -157,6 +157,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 
 void hw_perf_enable_all(void)
 {
+	if (unlikely(!perf_counters_initialized))
+		return;
+
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
@@ -164,14 +167,21 @@ u64 hw_perf_save_disable(void)
 {
 	u64 ctrl;
 
+	if (unlikely(!perf_counters_initialized))
+		return 0;
+
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+
 	return ctrl;
 }
 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
 void hw_perf_restore(u64 ctrl)
 {
+	if (unlikely(!perf_counters_initialized))
+		return;
+
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
 }
 EXPORT_SYMBOL_GPL(hw_perf_restore);
-- 
cgit v0.10.2


From 088e2852c858159d47f71ee8da38e0fb1b21f806 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 20:21:00 +0100
Subject: perfcounters, x86: fix sw counters on non-PMC CPUs

Make perf_max_counters default to at least 1 - this allows the sw
counters to be used.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 59c52f9..539fa82 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -25,7 +25,7 @@
  */
 DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
 
-int perf_max_counters __read_mostly;
+int perf_max_counters __read_mostly = 1;
 static int perf_reserved_percpu __read_mostly;
 static int perf_overcommit __read_mostly = 1;
 
-- 
cgit v0.10.2


From 75f224cf7700ed6006574dc3f2efa29860727570 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 21:58:46 +0100
Subject: perfcounters: fix lapic initialization

Fix non-working NMI sampling in certain bootup scenarios.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6d30f60..8a154bd 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -557,10 +557,10 @@ void __init init_hw_perf_counters(void)
 	printk(KERN_INFO "... bit_width:    %d\n", eax.split.bit_width);
 	printk(KERN_INFO "... mask_length:  %d\n", eax.split.mask_length);
 
+	perf_counters_initialized = true;
+
 	perf_counters_lapic_init(0);
 	register_die_notifier(&perf_counter_nmi_notifier);
-
-	perf_counters_initialized = true;
 }
 
 static void x86_perf_counter_read(struct perf_counter *counter)
-- 
cgit v0.10.2


From 0cc0c027d4e028632933f1be2dc4cd730358183b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 23:20:36 +0100
Subject: perfcounters: release CPU context when exiting task counters

If counters are exiting via do_exit() not via filp close, then
the CPU context needs to be released - otherwise future percpu
counter creations might fail.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 539fa82..16396e9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1273,8 +1273,19 @@ __perf_counter_exit_task(struct task_struct *child,
 	local_irq_disable();
 	perf_flags = hw_perf_save_disable();
 
-	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE)
+	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
+		struct perf_cpu_context *cpuctx;
+
+		cpuctx = &__get_cpu_var(perf_cpu_context);
+
 		child_counter->hw_ops->hw_perf_counter_disable(child_counter);
+		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+		child_counter->oncpu = -1;
+
+		cpuctx->active_oncpu--;
+		child_ctx->nr_active--;
+	}
+
 	list_del_init(&child_counter->list_entry);
 
 	hw_perf_restore(perf_flags);
@@ -1539,4 +1550,3 @@ static int __init perf_counter_sysfs_init(void)
 				  &perfclass_attr_group);
 }
 device_initcall(perf_counter_sysfs_init);
-
-- 
cgit v0.10.2


From f65cb45cba63f249458b669aa67069eabc37b2f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 16 Dec 2008 13:40:44 +0100
Subject: perfcounters: flush on setuid exec

Pavel Machek pointed out that performance counters should be flushed
when crossing protection domains on setuid execution.

Reported-by: Pavel Machek <pavel@suse.cz>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/fs/exec.c b/fs/exec.c
index ec5df9a..d5165d8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/perf_counter.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
@@ -1017,6 +1018,13 @@ int flush_old_exec(struct linux_binprm * bprm)
 		set_dumpable(current->mm, suid_dumpable);
 	}
 
+	/*
+	 * Flush performance counters when crossing a
+	 * security domain:
+	 */
+	if (!get_dumpable(current->mm))
+		perf_counter_exit_task(current);
+
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
 
-- 
cgit v0.10.2


From a86ed50859d65a08beec9474df97b88438a996df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 00:43:10 +0100
Subject: perfcounters: use hw_event.disable flag

Impact: implement default-off counters

Make sure that counters that are created with counter.hw_event.disabled=1,
get created in disabled state.

They can be enabled via:

        prctl(PR_TASK_PERF_COUNTERS_ENABLE);

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 16396e9..5431e79 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1093,6 +1093,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
 
+	if (hw_event->disabled)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
 	hw_ops = NULL;
 	if (!hw_event->raw && hw_event->type < 0)
 		hw_ops = sw_perf_counter_init(counter);
-- 
cgit v0.10.2


From 94c46572a6d9bb497eda0a14099d9f1360d57d5d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Fri, 19 Dec 2008 22:37:58 +0530
Subject: x86: perf_counter.c intel_perfmon_event_map and
 max_intel_perfmon_events should be static

Impact: cleanup, avoid sparse warnings, reduce kernel size a bit

Fixes these sparse warnings:
 arch/x86/kernel/cpu/perf_counter.c:44:11: warning: symbol 'intel_perfmon_event_map' was not declared. Should it be static?
 arch/x86/kernel/cpu/perf_counter.c:54:11: warning: symbol 'max_intel_perfmon_events' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8a154bd..bdbdb56 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -41,7 +41,7 @@ struct cpu_hw_counters {
  */
 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
-const int intel_perfmon_event_map[] =
+static const int intel_perfmon_event_map[] =
 {
   [PERF_COUNT_CYCLES]			= 0x003c,
   [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
@@ -51,7 +51,7 @@ const int intel_perfmon_event_map[] =
   [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
 };
 
-const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
+static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 
 /*
  * Propagate counter elapsed time into the generic counter.
-- 
cgit v0.10.2


From 8fb9331391af95ca1f4e5c0a0da8120b13cbae01 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:04:16 +0100
Subject: perfcounters: remove warnings

Impact: remove debug checks

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index bdbdb56..89fad5d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -64,7 +64,6 @@ x86_perf_counter_update(struct perf_counter *counter,
 {
 	u64 prev_raw_count, new_raw_count, delta;
 
-	WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE);
 	/*
 	 * Careful: an NMI might modify the previous counter value.
 	 *
@@ -89,7 +88,6 @@ again:
 	 * of the count, so we do that by clipping the delta to 32 bits:
 	 */
 	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
-	WARN_ON_ONCE((int)delta < 0);
 
 	atomic64_add(delta, &counter->count);
 	atomic64_sub(delta, &hwc->period_left);
@@ -193,7 +191,6 @@ __x86_perf_counter_disable(struct perf_counter *counter,
 	int err;
 
 	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
-	WARN_ON_ONCE(err);
 }
 
 static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
@@ -209,8 +206,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 	s32 left = atomic64_read(&hwc->period_left);
 	s32 period = hwc->irq_period;
 
-	WARN_ON_ONCE(period <= 0);
-
 	/*
 	 * If we are way outside a reasoable range then just skip forward:
 	 */
@@ -224,8 +219,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 		atomic64_set(&hwc->period_left, left);
 	}
 
-	WARN_ON_ONCE(left <= 0);
-
 	per_cpu(prev_left[idx], smp_processor_id()) = left;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f30486f..d038450 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -218,8 +218,6 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
-extern void
-perf_counter_show(struct perf_counter *counter, char *str, int trace);
 extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
@@ -237,8 +235,6 @@ extern int perf_counter_task_enable(void);
 
 #else
 static inline void
-perf_counter_show(struct perf_counter *counter, char *str, int trace)   { }
-static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
 static inline void
 perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5431e79..aab6c12 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -861,8 +861,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
@@ -906,8 +904,6 @@ static void page_faults_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
@@ -954,8 +950,6 @@ static void context_switches_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
@@ -1000,8 +994,6 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
-- 
cgit v0.10.2


From 7995888fcb0246543ee8027bf2835a250ba8c925 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 08:54:56 +0100
Subject: perfcounters: tweak group scheduling

Impact: schedule in groups atomically

If there are multiple groups in a task, make sure they are scheduled
in and out atomically.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index aab6c12..f8a4d9a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -367,21 +367,26 @@ counter_sched_in(struct perf_counter *counter,
 	ctx->nr_active++;
 }
 
-static void
+static int
 group_sched_in(struct perf_counter *group_counter,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx,
 	       int cpu)
 {
 	struct perf_counter *counter;
+	int was_group = 0;
 
 	counter_sched_in(group_counter, cpuctx, ctx, cpu);
 
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
 		counter_sched_in(counter, cpuctx, ctx, cpu);
+		was_group = 1;
+	}
+
+	return was_group;
 }
 
 /*
@@ -416,7 +421,12 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		group_sched_in(counter, cpuctx, ctx, cpu);
+		/*
+		 * If we scheduled in a group atomically and
+		 * exclusively, break out:
+		 */
+		if (group_sched_in(counter, cpuctx, ctx, cpu))
+			break;
 	}
 	spin_unlock(&ctx->lock);
 
-- 
cgit v0.10.2


From 5c167b8585c8d91206b395d57011ead7711e322f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 09:02:19 +0100
Subject: x86, perfcounters: rename intel_arch_perfmon.h => perf_counter.h

Impact: rename include file

We'll be providing an asm/perf_counter.h to the generic perfcounter code,
so use the already existing x86 file for this purpose and rename it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index 71598a9..0000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
-
-#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
-		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
-
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(int nmi);
-#else
-static inline void init_hw_perf_counters(void)		{ }
-static inline void perf_counters_lapic_init(int nmi)	{ }
-#endif
-
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 0000000..9dadce1
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_X86_PERF_COUNTER_H
+#define _ASM_X86_PERF_COUNTER_H
+
+#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+#ifdef CONFIG_PERF_COUNTERS
+extern void init_hw_perf_counters(void);
+extern void perf_counters_lapic_init(int nmi);
+#else
+static inline void init_hw_perf_counters(void)		{ }
+static inline void perf_counters_lapic_init(int nmi)	{ }
+#endif
+
+#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 0579ec1..4f859ac 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -31,7 +31,7 @@
 #include <linux/dmi.h>
 #include <linux/dmar.h>
 
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/mtrr.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4461011..ad331b4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -17,7 +17,7 @@
 #include <asm/mmu_context.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
 #include <asm/pat.h>
 #include <asm/asm.h>
 #include <asm/numa.h>
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 89fad5d..a4a3a09 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -16,7 +16,7 @@
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
 #include <asm/apic.h>
 
 static bool perf_counters_initialized __read_mostly;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9abd48b..d6f5b9f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
 
 struct nmi_watchdog_ctlblk {
 	unsigned int cccr_msr;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index e9f80c7..07c9145 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,7 @@
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
-#include <asm/intel_arch_perfmon.h>
+#include <asm/perf_counter.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
-- 
cgit v0.10.2


From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 09:09:13 +0100
Subject: x86, perfcounters: prepare for fixed-mode PMCs

Impact: refactor the x86 code for fixed-mode PMCs

Extend the data structures and rename the existing facilities
to allow for a 'generic' versus 'fixed' counter distinction.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 9dadce1..dd5a4a5 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -1,6 +1,13 @@
 #ifndef _ASM_X86_PERF_COUNTER_H
 #define _ASM_X86_PERF_COUNTER_H
 
+/*
+ * Performance counter hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC					8
+#define X86_PMC_MAX_FIXED					3
+
 #define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
 
@@ -20,6 +27,10 @@
 
 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
 
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
 union cpuid10_eax {
 	struct {
 		unsigned int version_id:8;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a4a3a09..fc3af86 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -27,13 +27,12 @@ static bool perf_counters_initialized __read_mostly;
 static int nr_hw_counters __read_mostly;
 static u32 perf_counter_mask __read_mostly;
 
-/* No support for fixed function counters yet */
-
-#define MAX_HW_COUNTERS		8
-
 struct cpu_hw_counters {
-	struct perf_counter	*counters[MAX_HW_COUNTERS];
-	unsigned long		used[BITS_TO_LONGS(MAX_HW_COUNTERS)];
+	struct perf_counter	*generic[X86_PMC_MAX_GENERIC];
+	unsigned long		used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)];
+
+	struct perf_counter	*fixed[X86_PMC_MAX_FIXED];
+	unsigned long		used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)];
 };
 
 /*
@@ -185,7 +184,7 @@ void hw_perf_restore(u64 ctrl)
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
 static inline void
-__x86_perf_counter_disable(struct perf_counter *counter,
+__pmc_generic_disable(struct perf_counter *counter,
 			   struct hw_perf_counter *hwc, unsigned int idx)
 {
 	int err;
@@ -193,7 +192,7 @@ __x86_perf_counter_disable(struct perf_counter *counter,
 	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
 }
 
-static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
+static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
@@ -231,7 +230,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 }
 
 static void
-__x86_perf_counter_enable(struct perf_counter *counter,
+__pmc_generic_enable(struct perf_counter *counter,
 			  struct hw_perf_counter *hwc, int idx)
 {
 	wrmsr(hwc->config_base + idx,
@@ -241,7 +240,7 @@ __x86_perf_counter_enable(struct perf_counter *counter,
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
-static void x86_perf_counter_enable(struct perf_counter *counter)
+static void pmc_generic_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -256,12 +255,12 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__x86_perf_counter_disable(counter, hwc, idx);
+	__pmc_generic_disable(counter, hwc, idx);
 
-	cpuc->counters[idx] = counter;
+	cpuc->generic[idx] = counter;
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
-	__x86_perf_counter_enable(counter, hwc, idx);
+	__pmc_generic_enable(counter, hwc, idx);
 }
 
 void perf_counter_print_debug(void)
@@ -301,16 +300,16 @@ void perf_counter_print_debug(void)
 	local_irq_enable();
 }
 
-static void x86_perf_counter_disable(struct perf_counter *counter)
+static void pmc_generic_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__x86_perf_counter_disable(counter, hwc, idx);
+	__pmc_generic_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
-	cpuc->counters[idx] = NULL;
+	cpuc->generic[idx] = NULL;
 
 	/*
 	 * Drain the remaining delta count out of a counter
@@ -349,7 +348,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	__hw_perf_counter_set_period(counter, hwc, idx);
 
 	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
-		__x86_perf_counter_enable(counter, hwc, idx);
+		__pmc_generic_enable(counter, hwc, idx);
 }
 
 static void
@@ -392,7 +391,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 again:
 	ack = status;
 	for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) {
-		struct perf_counter *counter = cpuc->counters[bit];
+		struct perf_counter *counter = cpuc->generic[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
 		if (!counter)
@@ -412,7 +411,7 @@ again:
 		}
 		/*
 		 * From NMI context we cannot call into the scheduler to
-		 * do a task wakeup - but we mark these counters as
+		 * do a task wakeup - but we mark these generic as
 		 * wakeup_pending and initate a wakeup callback:
 		 */
 		if (nmi) {
@@ -462,7 +461,7 @@ void perf_counter_notify(struct pt_regs *regs)
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	for_each_bit(bit, cpuc->used, nr_hw_counters) {
-		struct perf_counter *counter = cpuc->counters[bit];
+		struct perf_counter *counter = cpuc->generic[bit];
 
 		if (!counter)
 			continue;
@@ -539,10 +538,10 @@ void __init init_hw_perf_counters(void)
 	printk(KERN_INFO "... version:      %d\n", eax.split.version_id);
 	printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters);
 	nr_hw_counters = eax.split.num_counters;
-	if (nr_hw_counters > MAX_HW_COUNTERS) {
-		nr_hw_counters = MAX_HW_COUNTERS;
+	if (nr_hw_counters > X86_PMC_MAX_GENERIC) {
+		nr_hw_counters = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-			nr_hw_counters, MAX_HW_COUNTERS);
+			nr_hw_counters, X86_PMC_MAX_GENERIC);
 	}
 	perf_counter_mask = (1 << nr_hw_counters) - 1;
 	perf_max_counters = nr_hw_counters;
@@ -556,15 +555,15 @@ void __init init_hw_perf_counters(void)
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-static void x86_perf_counter_read(struct perf_counter *counter)
+static void pmc_generic_read(struct perf_counter *counter)
 {
 	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 }
 
 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
-	.hw_perf_counter_enable		= x86_perf_counter_enable,
-	.hw_perf_counter_disable	= x86_perf_counter_disable,
-	.hw_perf_counter_read		= x86_perf_counter_read,
+	.hw_perf_counter_enable		= pmc_generic_enable,
+	.hw_perf_counter_disable	= pmc_generic_disable,
+	.hw_perf_counter_read		= pmc_generic_read,
 };
 
 const struct hw_perf_counter_ops *
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d038450..984da54 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,6 +14,7 @@
 #define _LINUX_PERF_COUNTER_H
 
 #include <asm/atomic.h>
+#include <asm/perf_counter.h>
 
 #include <linux/list.h>
 #include <linux/mutex.h>
-- 
cgit v0.10.2


From 703e937c83bbad79075a7846e062e447c2fee6a4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 10:51:15 +0100
Subject: perfcounters: add fixed-mode PMC enumeration

Enumerate fixed-mode PMCs based on CPUID, and feed that into the
perfcounter code.

Does not use fixed-mode PMCs yet.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index dd5a4a5..945a315 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -41,6 +41,29 @@ union cpuid10_eax {
 	unsigned int full;
 };
 
+union cpuid10_edx {
+	struct {
+		unsigned int num_counters_fixed:4;
+		unsigned int reserved:28;
+	} split;
+	unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance counters:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
+
+
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
 extern void perf_counters_lapic_init(int nmi);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index fc3af86..2fca50c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -27,6 +27,8 @@ static bool perf_counters_initialized __read_mostly;
 static int nr_hw_counters __read_mostly;
 static u32 perf_counter_mask __read_mostly;
 
+static int nr_hw_counters_fixed __read_mostly;
+
 struct cpu_hw_counters {
 	struct perf_counter	*generic[X86_PMC_MAX_GENERIC];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)];
@@ -519,8 +521,9 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 void __init init_hw_perf_counters(void)
 {
 	union cpuid10_eax eax;
-	unsigned int unused;
 	unsigned int ebx;
+	unsigned int unused;
+	union cpuid10_edx edx;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return;
@@ -529,14 +532,14 @@ void __init init_hw_perf_counters(void)
 	 * Check whether the Architectural PerfMon supports
 	 * Branch Misses Retired Event or not.
 	 */
-	cpuid(10, &(eax.full), &ebx, &unused, &unused);
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
 		return;
 
 	printk(KERN_INFO "Intel Performance Monitoring support detected.\n");
 
-	printk(KERN_INFO "... version:      %d\n", eax.split.version_id);
-	printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters);
+	printk(KERN_INFO "... version:         %d\n", eax.split.version_id);
+	printk(KERN_INFO "... num counters:    %d\n", eax.split.num_counters);
 	nr_hw_counters = eax.split.num_counters;
 	if (nr_hw_counters > X86_PMC_MAX_GENERIC) {
 		nr_hw_counters = X86_PMC_MAX_GENERIC;
@@ -546,8 +549,16 @@ void __init init_hw_perf_counters(void)
 	perf_counter_mask = (1 << nr_hw_counters) - 1;
 	perf_max_counters = nr_hw_counters;
 
-	printk(KERN_INFO "... bit_width:    %d\n", eax.split.bit_width);
-	printk(KERN_INFO "... mask_length:  %d\n", eax.split.mask_length);
+	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
+	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);
+
+	nr_hw_counters_fixed = edx.split.num_counters_fixed;
+	if (nr_hw_counters_fixed > X86_PMC_MAX_FIXED) {
+		nr_hw_counters_fixed = X86_PMC_MAX_FIXED;
+		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
+			nr_hw_counters_fixed, X86_PMC_MAX_FIXED);
+	}
+	printk(KERN_INFO "... fixed counters:  %d\n", nr_hw_counters_fixed);
 
 	perf_counters_initialized = true;
 
-- 
cgit v0.10.2


From 862a1a5f346fe7e9181ea51eaae48cf2cd70f746 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 13:09:20 +0100
Subject: x86, perfcounters: refactor code for fixed-function PMCs

Impact: clean up

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 945a315..13745de 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -8,6 +8,10 @@
 #define X86_PMC_MAX_GENERIC					8
 #define X86_PMC_MAX_FIXED					3
 
+#define X86_PMC_IDX_GENERIC				        0
+#define X86_PMC_IDX_FIXED				       32
+#define X86_PMC_IDX_MAX					       64
+
 #define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
 
@@ -54,6 +58,15 @@ union cpuid10_edx {
  * Fixed-purpose performance counters:
  */
 
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
 /* Instr_Retired.Any: */
 #define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
 
@@ -63,7 +76,6 @@ union cpuid10_edx {
 /* CPU_CLK_Unhalted.Ref: */
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 
-
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
 extern void perf_counters_lapic_init(int nmi);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2fca50c..358af52 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -24,17 +24,14 @@ static bool perf_counters_initialized __read_mostly;
 /*
  * Number of (generic) HW counters:
  */
-static int nr_hw_counters __read_mostly;
-static u32 perf_counter_mask __read_mostly;
+static int nr_counters_generic __read_mostly;
+static u64 perf_counter_mask __read_mostly;
 
-static int nr_hw_counters_fixed __read_mostly;
+static int nr_counters_fixed __read_mostly;
 
 struct cpu_hw_counters {
-	struct perf_counter	*generic[X86_PMC_MAX_GENERIC];
-	unsigned long		used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)];
-
-	struct perf_counter	*fixed[X86_PMC_MAX_FIXED];
-	unsigned long		used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)];
+	struct perf_counter	*counters[X86_PMC_IDX_MAX];
+	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 };
 
 /*
@@ -159,7 +156,7 @@ void hw_perf_enable_all(void)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask);
 }
 
 u64 hw_perf_save_disable(void)
@@ -170,7 +167,7 @@ u64 hw_perf_save_disable(void)
 		return 0;
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
 	return ctrl;
 }
@@ -181,7 +178,7 @@ void hw_perf_restore(u64 ctrl)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 }
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
@@ -239,6 +236,11 @@ __pmc_generic_enable(struct perf_counter *counter,
 	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
+static int fixed_mode_idx(struct hw_perf_counter *hwc)
+{
+	return -1;
+}
+
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
@@ -250,7 +252,7 @@ static void pmc_generic_enable(struct perf_counter *counter)
 
 	/* Try to get the previous counter again */
 	if (test_and_set_bit(idx, cpuc->used)) {
-		idx = find_first_zero_bit(cpuc->used, nr_hw_counters);
+		idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
 		set_bit(idx, cpuc->used);
 		hwc->idx = idx;
 	}
@@ -259,7 +261,7 @@ static void pmc_generic_enable(struct perf_counter *counter)
 
 	__pmc_generic_disable(counter, hwc, idx);
 
-	cpuc->generic[idx] = counter;
+	cpuc->counters[idx] = counter;
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
 	__pmc_generic_enable(counter, hwc, idx);
@@ -270,7 +272,7 @@ void perf_counter_print_debug(void)
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left;
 	int cpu, idx;
 
-	if (!nr_hw_counters)
+	if (!nr_counters_generic)
 		return;
 
 	local_irq_disable();
@@ -286,7 +288,7 @@ void perf_counter_print_debug(void)
 	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
 	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
 
-	for (idx = 0; idx < nr_hw_counters; idx++) {
+	for (idx = 0; idx < nr_counters_generic; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 		rdmsrl(MSR_ARCH_PERFMON_PERFCTR0  + idx, pmc_count);
 
@@ -311,7 +313,7 @@ static void pmc_generic_disable(struct perf_counter *counter)
 	__pmc_generic_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
-	cpuc->generic[idx] = NULL;
+	cpuc->counters[idx] = NULL;
 
 	/*
 	 * Drain the remaining delta count out of a counter
@@ -381,7 +383,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
 
 	/* Disable counters globally */
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 	ack_APIC_irq();
 
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
@@ -392,8 +394,8 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 
 again:
 	ack = status;
-	for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) {
-		struct perf_counter *counter = cpuc->generic[bit];
+	for_each_bit(bit, (unsigned long *) &status, nr_counters_generic) {
+		struct perf_counter *counter = cpuc->counters[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
 		if (!counter)
@@ -424,7 +426,7 @@ again:
 		}
 	}
 
-	wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack, 0);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 
 	/*
 	 * Repeat if there is more work to be done:
@@ -436,7 +438,7 @@ out:
 	/*
 	 * Restore - do not reenable when global enable is off:
 	 */
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, saved_global, 0);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
@@ -462,8 +464,8 @@ void perf_counter_notify(struct pt_regs *regs)
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	for_each_bit(bit, cpuc->used, nr_hw_counters) {
-		struct perf_counter *counter = cpuc->generic[bit];
+	for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
+		struct perf_counter *counter = cpuc->counters[bit];
 
 		if (!counter)
 			continue;
@@ -540,26 +542,29 @@ void __init init_hw_perf_counters(void)
 
 	printk(KERN_INFO "... version:         %d\n", eax.split.version_id);
 	printk(KERN_INFO "... num counters:    %d\n", eax.split.num_counters);
-	nr_hw_counters = eax.split.num_counters;
-	if (nr_hw_counters > X86_PMC_MAX_GENERIC) {
-		nr_hw_counters = X86_PMC_MAX_GENERIC;
+	nr_counters_generic = eax.split.num_counters;
+	if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
+		nr_counters_generic = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-			nr_hw_counters, X86_PMC_MAX_GENERIC);
+			nr_counters_generic, X86_PMC_MAX_GENERIC);
 	}
-	perf_counter_mask = (1 << nr_hw_counters) - 1;
-	perf_max_counters = nr_hw_counters;
+	perf_counter_mask = (1 << nr_counters_generic) - 1;
+	perf_max_counters = nr_counters_generic;
 
 	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
 	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);
 
-	nr_hw_counters_fixed = edx.split.num_counters_fixed;
-	if (nr_hw_counters_fixed > X86_PMC_MAX_FIXED) {
-		nr_hw_counters_fixed = X86_PMC_MAX_FIXED;
+	nr_counters_fixed = edx.split.num_counters_fixed;
+	if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
+		nr_counters_fixed = X86_PMC_MAX_FIXED;
 		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-			nr_hw_counters_fixed, X86_PMC_MAX_FIXED);
+			nr_counters_fixed, X86_PMC_MAX_FIXED);
 	}
-	printk(KERN_INFO "... fixed counters:  %d\n", nr_hw_counters_fixed);
+	printk(KERN_INFO "... fixed counters:  %d\n", nr_counters_fixed);
+
+	perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
+	printk(KERN_INFO "... counter mask:    %016Lx\n", perf_counter_mask);
 	perf_counters_initialized = true;
 
 	perf_counters_lapic_init(0);
-- 
cgit v0.10.2


From 7671581f1666ef4b54a1c1e598c51ac44c060a9b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 14:20:28 +0100
Subject: perfcounters: hw ops rename

Impact: rename field names

Shorten them.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 358af52..b675571 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -577,9 +577,9 @@ static void pmc_generic_read(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
-	.hw_perf_counter_enable		= pmc_generic_enable,
-	.hw_perf_counter_disable	= pmc_generic_disable,
-	.hw_perf_counter_read		= pmc_generic_read,
+	.enable		= pmc_generic_enable,
+	.disable	= pmc_generic_disable,
+	.read		= pmc_generic_read,
 };
 
 const struct hw_perf_counter_ops *
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 984da54..48f76d2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -128,9 +128,9 @@ struct perf_counter;
  * struct hw_perf_counter_ops - performance counter hw ops
  */
 struct hw_perf_counter_ops {
-	void (*hw_perf_counter_enable)	(struct perf_counter *counter);
-	void (*hw_perf_counter_disable)	(struct perf_counter *counter);
-	void (*hw_perf_counter_read)	(struct perf_counter *counter);
+	void (*enable)			(struct perf_counter *counter);
+	void (*disable)			(struct perf_counter *counter);
+	void (*read)			(struct perf_counter *counter);
 };
 
 /**
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f8a4d9a..961d651 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -109,7 +109,7 @@ static void __perf_counter_remove_from_context(void *info)
 	spin_lock_irqsave(&ctx->lock, flags);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		counter->hw_ops->hw_perf_counter_disable(counter);
+		counter->hw_ops->disable(counter);
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
@@ -226,7 +226,7 @@ static void __perf_install_in_context(void *info)
 		counter->oncpu = cpu;
 		ctx->nr_active++;
 		cpuctx->active_oncpu++;
-		counter->hw_ops->hw_perf_counter_enable(counter);
+		counter->hw_ops->enable(counter);
 	}
 
 	if (!ctx->task && cpuctx->max_pertask)
@@ -297,7 +297,7 @@ counter_sched_out(struct perf_counter *counter,
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return;
 
-	counter->hw_ops->hw_perf_counter_disable(counter);
+	counter->hw_ops->disable(counter);
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->oncpu = -1;
 
@@ -327,7 +327,7 @@ group_sched_out(struct perf_counter *group_counter,
  *
  * We stop each counter and update the counter value in counter->count.
  *
- * This does not protect us against NMI, but hw_perf_counter_disable()
+ * This does not protect us against NMI, but disable()
  * sets the disabled bit in the control field of counter _before_
  * accessing the counter control register. If a NMI hits, then it will
  * not restart the counter.
@@ -359,7 +359,7 @@ counter_sched_in(struct perf_counter *counter,
 	if (counter->state == PERF_COUNTER_STATE_OFF)
 		return;
 
-	counter->hw_ops->hw_perf_counter_enable(counter);
+	counter->hw_ops->enable(counter);
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
@@ -395,7 +395,7 @@ group_sched_in(struct perf_counter *group_counter,
  *
  * We restore the counter value and then enable it.
  *
- * This does not protect us against NMI, but hw_perf_counter_enable()
+ * This does not protect us against NMI, but enable()
  * sets the enabled bit in the control field of counter _before_
  * accessing the counter control register. If a NMI hits, then it will
  * keep the counter running.
@@ -537,11 +537,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 /*
  * Cross CPU call to read the hardware counter
  */
-static void __hw_perf_counter_read(void *info)
+static void __read(void *info)
 {
 	struct perf_counter *counter = info;
 
-	counter->hw_ops->hw_perf_counter_read(counter);
+	counter->hw_ops->read(counter);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -552,7 +552,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	 */
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
-					 __hw_perf_counter_read, counter, 1);
+					 __read, counter, 1);
 	}
 
 	return atomic64_read(&counter->count);
@@ -855,9 +855,9 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
-	.hw_perf_counter_enable		= cpu_clock_perf_counter_enable,
-	.hw_perf_counter_disable	= cpu_clock_perf_counter_disable,
-	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
+	.enable		= cpu_clock_perf_counter_enable,
+	.disable	= cpu_clock_perf_counter_disable,
+	.read		= cpu_clock_perf_counter_read,
 };
 
 static void task_clock_perf_counter_update(struct perf_counter *counter)
@@ -891,9 +891,9 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-	.hw_perf_counter_enable		= task_clock_perf_counter_enable,
-	.hw_perf_counter_disable	= task_clock_perf_counter_disable,
-	.hw_perf_counter_read		= task_clock_perf_counter_read,
+	.enable		= task_clock_perf_counter_enable,
+	.disable	= task_clock_perf_counter_disable,
+	.read		= task_clock_perf_counter_read,
 };
 
 static u64 get_page_faults(void)
@@ -937,9 +937,9 @@ static void page_faults_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_page_faults = {
-	.hw_perf_counter_enable		= page_faults_perf_counter_enable,
-	.hw_perf_counter_disable	= page_faults_perf_counter_disable,
-	.hw_perf_counter_read		= page_faults_perf_counter_read,
+	.enable		= page_faults_perf_counter_enable,
+	.disable	= page_faults_perf_counter_disable,
+	.read		= page_faults_perf_counter_read,
 };
 
 static u64 get_context_switches(void)
@@ -983,9 +983,9 @@ static void context_switches_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_context_switches = {
-	.hw_perf_counter_enable		= context_switches_perf_counter_enable,
-	.hw_perf_counter_disable	= context_switches_perf_counter_disable,
-	.hw_perf_counter_read		= context_switches_perf_counter_read,
+	.enable		= context_switches_perf_counter_enable,
+	.disable	= context_switches_perf_counter_disable,
+	.read		= context_switches_perf_counter_read,
 };
 
 static inline u64 get_cpu_migrations(void)
@@ -1027,9 +1027,9 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
-	.hw_perf_counter_enable		= cpu_migrations_perf_counter_enable,
-	.hw_perf_counter_disable	= cpu_migrations_perf_counter_disable,
-	.hw_perf_counter_read		= cpu_migrations_perf_counter_read,
+	.enable		= cpu_migrations_perf_counter_enable,
+	.disable	= cpu_migrations_perf_counter_disable,
+	.read		= cpu_migrations_perf_counter_read,
 };
 
 static const struct hw_perf_counter_ops *
@@ -1283,7 +1283,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		child_counter->hw_ops->hw_perf_counter_disable(child_counter);
+		child_counter->hw_ops->disable(child_counter);
 		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
 		child_counter->oncpu = -1;
 
-- 
cgit v0.10.2


From aa9c4c0f967fdb482ea95e8473ec3d201e6e0781 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 14:10:57 +0100
Subject: perfcounters: fix task clock counter

Impact: fix per task clock counter precision

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4a145ca..1b2e324 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 	return sum;
 }
 
+
+/*
+ * Lock/unlock the current runqueue - to extract task statistics:
+ */
+extern void curr_rq_lock_irq_save(unsigned long *flags);
+extern void curr_rq_unlock_irq_restore(unsigned long *flags);
+extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
 extern unsigned long long task_delta_exec(struct task_struct *);
+
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_user_time_scaled(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/kernel/exit.c b/kernel/exit.c
index d336c90..244edfd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	forget_original_parent(tsk);
 	exit_task_namespaces(tsk);
 
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(tsk);
+
 	write_lock_irq(&tasklist_lock);
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code)
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 #endif
-	/*
-	 * These must happen late, after the PID is not
-	 * hashed anymore, but still at a point that may sleep:
-	 */
-	perf_counter_exit_task(tsk);
 #ifdef CONFIG_FUTEX
 	if (unlikely(!list_empty(&tsk->pi_state_list)))
 		exit_pi_state_list(tsk);
@@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	/*
+	 * These must happen late, after the PID is not
+	 * hashed anymore, but still at a point that may sleep:
+	 */
+	perf_counter_exit_task(tsk);
+
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 961d651..f1110ac 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
 
 /*
@@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		counter->hw_ops->disable(counter);
@@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
 }
 
 
@@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info)
 	if (!ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
 }
 
 /*
@@ -438,15 +443,19 @@ int perf_counter_task_disable(void)
 	struct task_struct *curr = current;
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
+	unsigned long flags;
 	u64 perf_flags;
 	int cpu;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	local_irq_disable();
+	curr_rq_lock_irq_save(&flags);
 	cpu = smp_processor_id();
 
+	/* force the update of the task clock: */
+	__task_delta_exec(curr, 1);
+
 	perf_counter_task_sched_out(curr, cpu);
 
 	spin_lock(&ctx->lock);
@@ -463,7 +472,7 @@ int perf_counter_task_disable(void)
 
 	spin_unlock(&ctx->lock);
 
-	local_irq_enable();
+	curr_rq_unlock_irq_restore(&flags);
 
 	return 0;
 }
@@ -473,15 +482,19 @@ int perf_counter_task_enable(void)
 	struct task_struct *curr = current;
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
+	unsigned long flags;
 	u64 perf_flags;
 	int cpu;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	local_irq_disable();
+	curr_rq_lock_irq_save(&flags);
 	cpu = smp_processor_id();
 
+	/* force the update of the task clock: */
+	__task_delta_exec(curr, 1);
+
 	spin_lock(&ctx->lock);
 
 	/*
@@ -493,6 +506,7 @@ int perf_counter_task_enable(void)
 		if (counter->state != PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->hw_event.disabled = 0;
 	}
 	hw_perf_restore(perf_flags);
 
@@ -500,7 +514,7 @@ int perf_counter_task_enable(void)
 
 	perf_counter_task_sched_in(curr, cpu);
 
-	local_irq_enable();
+	curr_rq_unlock_irq_restore(&flags);
 
 	return 0;
 }
@@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 static void __read(void *info)
 {
 	struct perf_counter *counter = info;
+	unsigned long flags;
 
+	curr_rq_lock_irq_save(&flags);
 	counter->hw_ops->read(counter);
+	curr_rq_unlock_irq_restore(&flags);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 	.read		= cpu_clock_perf_counter_read,
 };
 
-static void task_clock_perf_counter_update(struct perf_counter *counter)
+/*
+ * Called from within the scheduler:
+ */
+static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
 {
-	u64 prev, now;
+	struct task_struct *curr = counter->task;
+	u64 delta;
+
+	WARN_ON_ONCE(counter->task != current);
+
+	delta = __task_delta_exec(curr, update);
+
+	return curr->se.sum_exec_runtime + delta;
+}
+
+static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
+{
+	u64 prev;
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = current->se.sum_exec_runtime;
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
-	task_clock_perf_counter_update(counter);
+	u64 now = task_clock_perf_counter_val(counter, 1);
+
+	task_clock_perf_counter_update(counter, now);
 }
 
 static void task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime);
+	u64 now = task_clock_perf_counter_val(counter, 0);
+
+	atomic64_set(&counter->hw.prev_count, now);
 }
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	task_clock_perf_counter_update(counter);
+	u64 now = task_clock_perf_counter_val(counter, 0);
+
+	task_clock_perf_counter_update(counter, now);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
@@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child,
 {
 	struct perf_counter *parent_counter;
 	u64 parent_val, child_val;
+	unsigned long flags;
 	u64 perf_flags;
 
 	/*
@@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	 * Be careful about zapping the list - IRQ/NMI context
 	 * could still be processing it:
 	 */
-	local_irq_disable();
+	curr_rq_lock_irq_save(&flags);
 	perf_flags = hw_perf_save_disable();
 
 	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
@@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	list_del_init(&child_counter->list_entry);
 
 	hw_perf_restore(perf_flags);
-	local_irq_enable();
+	curr_rq_unlock_irq_restore(&flags);
 
 	parent_counter = child_counter->parent;
 	/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 382cfdb..4d84ff4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq)
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
 {
 	rq->clock = sched_clock_cpu(cpu_of(rq));
 }
@@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 	}
 }
 
+void curr_rq_lock_irq_save(unsigned long *flags)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	local_irq_save(*flags);
+	rq = cpu_rq(smp_processor_id());
+	spin_lock(&rq->lock);
+}
+
+void curr_rq_unlock_irq_restore(unsigned long *flags)
+	__releases(rq->lock)
+{
+	struct rq *rq;
+
+	rq = cpu_rq(smp_processor_id());
+	spin_unlock(&rq->lock);
+	local_irq_restore(*flags);
+}
+
 void task_rq_unlock_wait(struct task_struct *p)
 {
 	struct rq *rq = task_rq(p);
@@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
 		    struct task_struct *next)
 {
 	fire_sched_out_preempt_notifiers(prev, next);
-	perf_counter_task_sched_out(prev, cpu_of(rq));
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -4093,6 +4112,29 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
  */
+unsigned long long __task_delta_exec(struct task_struct *p, int update)
+{
+	s64 delta_exec;
+	struct rq *rq;
+
+	rq = task_rq(p);
+	WARN_ON_ONCE(!runqueue_is_locked());
+	WARN_ON_ONCE(!task_current(rq, p));
+
+	if (update)
+		update_rq_clock(rq);
+
+	delta_exec = rq->clock - p->se.exec_start;
+
+	WARN_ON_ONCE(delta_exec < 0);
+
+	return delta_exec;
+}
+
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ */
 unsigned long long task_delta_exec(struct task_struct *p)
 {
 	unsigned long flags;
@@ -4316,13 +4358,13 @@ void scheduler_tick(void)
 	update_rq_clock(rq);
 	update_cpu_load(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
+	perf_counter_task_tick(curr, cpu);
 	spin_unlock(&rq->lock);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
 #endif
-	perf_counter_task_tick(curr, cpu);
 }
 
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
@@ -4512,6 +4554,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
+		perf_counter_task_sched_out(prev, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
-- 
cgit v0.10.2


From eef6cbf5844c620d9db9be99e4908cdf92492fb9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 10:20:42 +0100
Subject: perfcounters: pull inherited counters

Change counter inheritance from a 'push' to a 'pull' model: instead of
child tasks pushing their final counts to the parent, reuse the wait4
infrastructure to pull counters as child tasks are exit-processed,
much like how cutime/cstime is collected.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 23fd890..54fa2fa 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -113,6 +113,14 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_INIT_EFF_SET
 #endif
 
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk)					\
+	.perf_counter_ctx.counter_list =				\
+		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -180,6 +188,7 @@ extern struct group_info init_groups;
 	INIT_IDS							\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
+	INIT_PERF_COUNTERS(tsk)						\
 }
 
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 244edfd..101b7ee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -153,6 +153,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
+#ifdef CONFIG_PERF_COUNTERS
+	WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+#endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 }
@@ -922,12 +925,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	forget_original_parent(tsk);
 	exit_task_namespaces(tsk);
 
-	/*
-	 * Flush inherited counters to the parent - before the parent
-	 * gets woken up by child-exit notifications.
-	 */
-	perf_counter_exit_task(tsk);
-
 	write_lock_irq(&tasklist_lock);
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1122,12 +1119,6 @@ NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
-	/*
-	 * These must happen late, after the PID is not
-	 * hashed anymore, but still at a point that may sleep:
-	 */
-	perf_counter_exit_task(tsk);
-
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
@@ -1371,6 +1362,12 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	 */
 	read_unlock(&tasklist_lock);
 
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(p);
+
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 		? p->signal->group_exit_code : p->exit_code;
-- 
cgit v0.10.2


From 78b6084c907cea15bb40a564b974e072f5163781 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 21 Dec 2008 15:07:49 +0100
Subject: perfcounters: fix init context lock

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 54fa2fa..467cff5 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -116,7 +116,9 @@ extern struct group_info init_groups;
 #ifdef CONFIG_PERF_COUNTERS
 # define INIT_PERF_COUNTERS(tsk)					\
 	.perf_counter_ctx.counter_list =				\
-		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),
+		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
+	.perf_counter_ctx.lock =					\
+		__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
 #else
 # define INIT_PERF_COUNTERS(tsk)
 #endif
-- 
cgit v0.10.2


From 95cdd2e7851cce79ab839cb0b3cbe68d7911d0f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 21 Dec 2008 13:50:42 +0100
Subject: perfcounters: enable lowlevel pmc code to schedule counters

Allow lowlevel ->enable() op to return an error if a counter can not be
added. This can be used to handle counter constraints.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b675571..74090a3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -244,7 +244,7 @@ static int fixed_mode_idx(struct hw_perf_counter *hwc)
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
-static void pmc_generic_enable(struct perf_counter *counter)
+static int pmc_generic_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -253,6 +253,8 @@ static void pmc_generic_enable(struct perf_counter *counter)
 	/* Try to get the previous counter again */
 	if (test_and_set_bit(idx, cpuc->used)) {
 		idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
+		if (idx == nr_counters_generic)
+			return -EAGAIN;
 		set_bit(idx, cpuc->used);
 		hwc->idx = idx;
 	}
@@ -265,6 +267,8 @@ static void pmc_generic_enable(struct perf_counter *counter)
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
 	__pmc_generic_enable(counter, hwc, idx);
+
+	return 0;
 }
 
 void perf_counter_print_debug(void)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 48f76d2..53af11d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -128,7 +128,7 @@ struct perf_counter;
  * struct hw_perf_counter_ops - performance counter hw ops
  */
 struct hw_perf_counter_ops {
-	void (*enable)			(struct perf_counter *counter);
+	int (*enable)			(struct perf_counter *counter);
 	void (*disable)			(struct perf_counter *counter);
 	void (*read)			(struct perf_counter *counter);
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f1110ac..2e73929 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -355,21 +355,25 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 	cpuctx->task_ctx = NULL;
 }
 
-static void
+static int
 counter_sched_in(struct perf_counter *counter,
 		 struct perf_cpu_context *cpuctx,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
 	if (counter->state == PERF_COUNTER_STATE_OFF)
-		return;
+		return 0;
+
+	if (counter->hw_ops->enable(counter))
+		return -EAGAIN;
 
-	counter->hw_ops->enable(counter);
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
 	cpuctx->active_oncpu++;
 	ctx->nr_active++;
+
+	return 0;
 }
 
 static int
@@ -378,20 +382,38 @@ group_sched_in(struct perf_counter *group_counter,
 	       struct perf_counter_context *ctx,
 	       int cpu)
 {
-	struct perf_counter *counter;
-	int was_group = 0;
+	struct perf_counter *counter, *partial_group;
+	int ret = 0;
 
-	counter_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
+		return -EAGAIN;
 
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
-		counter_sched_in(counter, cpuctx, ctx, cpu);
-		was_group = 1;
+		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
+			partial_group = counter;
+			goto group_error;
+		}
+		ret = -EAGAIN;
 	}
 
-	return was_group;
+	return ret;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		if (counter == partial_group)
+			break;
+		counter_sched_out(counter, cpuctx, ctx);
+	}
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	return -EAGAIN;
 }
 
 /*
@@ -416,9 +438,6 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 
 	spin_lock(&ctx->lock);
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (ctx->nr_active == cpuctx->max_pertask)
-			break;
-
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of counters:
@@ -856,8 +875,9 @@ static const struct file_operations perf_fops = {
 	.poll			= perf_poll,
 };
 
-static void cpu_clock_perf_counter_enable(struct perf_counter *counter)
+static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 {
+	return 0;
 }
 
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
@@ -913,11 +933,13 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 	task_clock_perf_counter_update(counter, now);
 }
 
-static void task_clock_perf_counter_enable(struct perf_counter *counter)
+static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
 	u64 now = task_clock_perf_counter_val(counter, 0);
 
 	atomic64_set(&counter->hw.prev_count, now);
+
+	return 0;
 }
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
@@ -960,12 +982,14 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 	page_faults_perf_counter_update(counter);
 }
 
-static void page_faults_perf_counter_enable(struct perf_counter *counter)
+static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
 	/*
 	 * page-faults is a per-task value already,
 	 * so we dont have to clear it on switch-in.
 	 */
+
+	return 0;
 }
 
 static void page_faults_perf_counter_disable(struct perf_counter *counter)
@@ -1006,12 +1030,14 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 	context_switches_perf_counter_update(counter);
 }
 
-static void context_switches_perf_counter_enable(struct perf_counter *counter)
+static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
 	/*
 	 * ->nvcsw + curr->nivcsw is a per-task value already,
 	 * so we dont have to clear it on switch-in.
 	 */
+
+	return 0;
 }
 
 static void context_switches_perf_counter_disable(struct perf_counter *counter)
@@ -1050,12 +1076,14 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 	cpu_migrations_perf_counter_update(counter);
 }
 
-static void cpu_migrations_perf_counter_enable(struct perf_counter *counter)
+static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
 	/*
 	 * se.nr_migrations is a per-task value already,
 	 * so we dont have to clear it on switch-in.
 	 */
+
+	return 0;
 }
 
 static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
-- 
cgit v0.10.2


From 0dff86aa7b9ec65a6d07167b7afb050b5fc98ddc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:28:12 +0100
Subject: x86, perfcounters: print out the ->used bitmask

Impact: extend debug printouts

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 74090a3..f3359c2 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -255,6 +255,7 @@ static int pmc_generic_enable(struct perf_counter *counter)
 		idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
 		if (idx == nr_counters_generic)
 			return -EAGAIN;
+
 		set_bit(idx, cpuc->used);
 		hwc->idx = idx;
 	}
@@ -274,6 +275,7 @@ static int pmc_generic_enable(struct perf_counter *counter)
 void perf_counter_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left;
+	struct cpu_hw_counters *cpuc;
 	int cpu, idx;
 
 	if (!nr_counters_generic)
@@ -282,6 +284,7 @@ void perf_counter_print_debug(void)
 	local_irq_disable();
 
 	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
@@ -291,6 +294,7 @@ void perf_counter_print_debug(void)
 	printk(KERN_INFO "CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
 	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
+	printk(KERN_INFO "CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
-- 
cgit v0.10.2


From 8fe91e61cdc407c7556d3cd71cf20141a25bbcea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:29:25 +0100
Subject: perfcounters: remove ->nr_inherited

Impact: remove dead code

nr_inherited was not maintained correctly (not decremented) - and also
not used - remove it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 53af11d..1ea08e9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -164,7 +164,6 @@ struct perf_counter {
 	struct task_struct		*task;
 	struct file			*filp;
 
-	unsigned int			nr_inherited;
 	struct perf_counter		*parent;
 	/*
 	 * Protect attach/detach:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2e73929..48e1dbc 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1308,7 +1308,6 @@ inherit_counter(struct perf_counter *parent_counter,
 	child_ctx->nr_counters++;
 
 	child_counter->parent = parent_counter;
-	parent_counter->nr_inherited++;
 	/*
 	 * inherit into child's child as well:
 	 */
-- 
cgit v0.10.2


From 235c7fc7c500e4fd1700c4ad01b5612bcdc1b449 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 21 Dec 2008 14:43:25 +0100
Subject: perfcounters: generalize the counter scheduler

Impact: clean up and refactor code

refactor the counter scheduler: separate out in/out functions and
introduce a counter-rotation function as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 48e1dbc..d7a79f3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info)
 	spin_lock(&ctx->lock);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		counter->hw_ops->disable(counter);
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->hw_ops->disable(counter);
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
 		counter->task = NULL;
+		counter->oncpu = -1;
 	}
 	ctx->nr_counters--;
 
@@ -192,8 +193,36 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
+static int
+counter_sched_in(struct perf_counter *counter,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_counter_context *ctx,
+		 int cpu)
+{
+	if (counter->state == PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	counter->state = PERF_COUNTER_STATE_ACTIVE;
+	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	/*
+	 * The new state must be visible before we turn it on in the hardware:
+	 */
+	smp_wmb();
+
+	if (counter->hw_ops->enable(counter)) {
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->oncpu = -1;
+		return -EAGAIN;
+	}
+
+	cpuctx->active_oncpu++;
+	ctx->nr_active++;
+
+	return 0;
+}
+
 /*
- * Cross CPU call to install and enable a preformance counter
+ * Cross CPU call to install and enable a performance counter
  */
 static void __perf_install_in_context(void *info)
 {
@@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info)
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
 	perf_flags = hw_perf_save_disable();
-	list_add_counter(counter, ctx);
-	hw_perf_restore(perf_flags);
 
+	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
 
-	if (cpuctx->active_oncpu < perf_max_counters) {
-		counter->state = PERF_COUNTER_STATE_ACTIVE;
-		counter->oncpu = cpu;
-		ctx->nr_active++;
-		cpuctx->active_oncpu++;
-		counter->hw_ops->enable(counter);
-	}
+	counter_sched_in(counter, cpuctx, ctx, cpu);
 
 	if (!ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
+	hw_perf_restore(perf_flags);
+
 	spin_unlock(&ctx->lock);
 	curr_rq_unlock_irq_restore(&flags);
 }
@@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter,
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return;
 
-	counter->hw_ops->disable(counter);
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->hw_ops->disable(counter);
 	counter->oncpu = -1;
 
 	cpuctx->active_oncpu--;
@@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter,
 		counter_sched_out(counter, cpuctx, ctx);
 }
 
+void __perf_counter_sched_out(struct perf_counter_context *ctx,
+			      struct perf_cpu_context *cpuctx)
+{
+	struct perf_counter *counter;
+
+	if (likely(!ctx->nr_counters))
+		return;
+
+	spin_lock(&ctx->lock);
+	if (ctx->nr_active) {
+		list_for_each_entry(counter, &ctx->counter_list, list_entry)
+			group_sched_out(counter, cpuctx, ctx);
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Called from scheduler to remove the counters of the current task,
  * with interrupts disabled.
@@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = &task->perf_counter_ctx;
-	struct perf_counter *counter;
 
 	if (likely(!cpuctx->task_ctx))
 		return;
 
-	spin_lock(&ctx->lock);
-	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->counter_list, list_entry)
-			group_sched_out(counter, cpuctx, ctx);
-	}
-	spin_unlock(&ctx->lock);
+	__perf_counter_sched_out(ctx, cpuctx);
+
 	cpuctx->task_ctx = NULL;
 }
 
-static int
-counter_sched_in(struct perf_counter *counter,
-		 struct perf_cpu_context *cpuctx,
-		 struct perf_counter_context *ctx,
-		 int cpu)
+static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
 {
-	if (counter->state == PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	if (counter->hw_ops->enable(counter))
-		return -EAGAIN;
-
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
-
-	cpuctx->active_oncpu++;
-	ctx->nr_active++;
-
-	return 0;
+	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
 }
 
 static int
@@ -416,21 +435,10 @@ group_error:
 	return -EAGAIN;
 }
 
-/*
- * Called from scheduler to add the counters of the current task
- * with interrupts disabled.
- *
- * We restore the counter value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * keep the counter running.
- */
-void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+static void
+__perf_counter_sched_in(struct perf_counter_context *ctx,
+			struct perf_cpu_context *cpuctx, int cpu)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
 	struct perf_counter *counter;
 
 	if (likely(!ctx->nr_counters))
@@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 			break;
 	}
 	spin_unlock(&ctx->lock);
+}
 
+/*
+ * Called from scheduler to add the counters of the current task
+ * with interrupts disabled.
+ *
+ * We restore the counter value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of counter _before_
+ * accessing the counter control register. If a NMI hits, then it will
+ * keep the counter running.
+ */
+void perf_counter_task_sched_in(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+
+	__perf_counter_sched_in(ctx, cpuctx, cpu);
 	cpuctx->task_ctx = ctx;
 }
 
+static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+
+	__perf_counter_sched_in(ctx, cpuctx, cpu);
+}
+
 int perf_counter_task_disable(void)
 {
 	struct task_struct *curr = current;
@@ -514,6 +547,8 @@ int perf_counter_task_enable(void)
 	/* force the update of the task clock: */
 	__task_delta_exec(curr, 1);
 
+	perf_counter_task_sched_out(curr, cpu);
+
 	spin_lock(&ctx->lock);
 
 	/*
@@ -538,19 +573,18 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
-void perf_counter_task_tick(struct task_struct *curr, int cpu)
+/*
+ * Round-robin a context's counters:
+ */
+static void rotate_ctx(struct perf_counter_context *ctx)
 {
-	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
 	u64 perf_flags;
 
-	if (likely(!ctx->nr_counters))
+	if (!ctx->nr_counters)
 		return;
 
-	perf_counter_task_sched_out(curr, cpu);
-
 	spin_lock(&ctx->lock);
-
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
@@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
+}
+
+void perf_counter_task_tick(struct task_struct *curr, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	const int rotate_percpu = 0;
+
+	if (rotate_percpu)
+		perf_counter_cpu_sched_out(cpuctx);
+	perf_counter_task_sched_out(curr, cpu);
 
+	if (rotate_percpu)
+		rotate_ctx(&cpuctx->ctx);
+	rotate_ctx(ctx);
+
+	if (rotate_percpu)
+		perf_counter_cpu_sched_in(cpuctx, cpu);
 	perf_counter_task_sched_in(curr, cpu);
 }
 
@@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
 	struct task_struct *curr = counter->task;
 	u64 delta;
 
-	WARN_ON_ONCE(counter->task != current);
-
 	delta = __task_delta_exec(curr, update);
 
 	return curr->se.sum_exec_runtime + delta;
@@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
 
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
 
@@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child,
 {
 	struct perf_counter *parent_counter;
 	u64 parent_val, child_val;
-	unsigned long flags;
-	u64 perf_flags;
 
 	/*
-	 * Disable and unlink this counter.
-	 *
-	 * Be careful about zapping the list - IRQ/NMI context
-	 * could still be processing it:
+	 * If we do not self-reap then we have to wait for the
+	 * child task to unschedule (it will happen for sure),
+	 * so that its counter is at its final count. (This
+	 * condition triggers rarely - child tasks usually get
+	 * off their CPU before the parent has a chance to
+	 * get this far into the reaping action)
 	 */
-	curr_rq_lock_irq_save(&flags);
-	perf_flags = hw_perf_save_disable();
-
-	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
+	if (child != current) {
+		wait_task_inactive(child, 0);
+		list_del_init(&child_counter->list_entry);
+	} else {
 		struct perf_cpu_context *cpuctx;
+		unsigned long flags;
+		u64 perf_flags;
+
+		/*
+		 * Disable and unlink this counter.
+		 *
+		 * Be careful about zapping the list - IRQ/NMI context
+		 * could still be processing it:
+		 */
+		curr_rq_lock_irq_save(&flags);
+		perf_flags = hw_perf_save_disable();
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		child_counter->hw_ops->disable(child_counter);
-		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-		child_counter->oncpu = -1;
+		if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
+			child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+			child_counter->hw_ops->disable(child_counter);
+			cpuctx->active_oncpu--;
+			child_ctx->nr_active--;
+			child_counter->oncpu = -1;
+		}
 
-		cpuctx->active_oncpu--;
-		child_ctx->nr_active--;
-	}
+		list_del_init(&child_counter->list_entry);
 
-	list_del_init(&child_counter->list_entry);
+		child_ctx->nr_counters--;
 
-	hw_perf_restore(perf_flags);
-	curr_rq_unlock_irq_restore(&flags);
+		hw_perf_restore(perf_flags);
+		curr_rq_unlock_irq_restore(&flags);
+	}
 
 	parent_counter = child_counter->parent;
 	/*
-- 
cgit v0.10.2


From f650a672359819454c3d8d4135ecd1558cde0b24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:17:29 +0100
Subject: perfcounters: add PERF_COUNT_BUS_CYCLES

Generalize "bus cycles" hw events - and map them to CPU_CLK_Unhalted.Ref
on x86. (which is a good enough approximation)

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f3359c2..86b2fdd 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -41,12 +41,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
 static const int intel_perfmon_event_map[] =
 {
-  [PERF_COUNT_CYCLES]			= 0x003c,
+  [PERF_COUNT_CPU_CYCLES]		= 0x003c,
   [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
   [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
   [PERF_COUNT_CACHE_MISSES]		= 0x412e,
   [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
   [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_BUS_CYCLES]		= 0x013c,
 };
 
 static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1ea08e9..ec77d16 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -36,14 +36,15 @@ enum hw_event_types {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CYCLES		=  0,
+	PERF_COUNT_CPU_CYCLES		=  0,
 	PERF_COUNT_INSTRUCTIONS		=  1,
 	PERF_COUNT_CACHE_REFERENCES	=  2,
 	PERF_COUNT_CACHE_MISSES		=  3,
 	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
 	PERF_COUNT_BRANCH_MISSES	=  5,
+	PERF_COUNT_BUS_CYCLES		=  6,
 
-	PERF_HW_EVENTS_MAX		=  6,
+	PERF_HW_EVENTS_MAX		=  7,
 
 	/*
 	 * Special "software" counters provided by the kernel, even if
-- 
cgit v0.10.2


From 2f18d1e8d07ae67dd0afce875287756d4bd31a46 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 22 Dec 2008 11:10:42 +0100
Subject: x86, perfcounters: add support for fixed-function pmcs

Impact: extend performance counter support on x86 Intel CPUs

Modern Intel CPUs have 3 "fixed-function" performance counters, which
count these hardware events:

    Instr_Retired.Any
    CPU_CLK_Unhalted.Core
    CPU_CLK_Unhalted.Ref

Add support for them to the performance counters subsystem.

Their use is transparent to user-space: the counter scheduler is
extended to automatically recognize the cases where a fixed-function
PMC can be utilized instead of a generic PMC. In such cases the
generic PMC is kept available for more counters.

The above fixed-function events map to these generic counter hw events:

        PERF_COUNT_INSTRUCTIONS
        PERF_COUNT_CPU_CYCLES
        PERF_COUNT_BUS_CYCLES

(The 'bus' cycles are in reality often CPU-ish cycles, just with a fixed
 frequency.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 13745de..2e08ed7 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -23,6 +23,11 @@
 #define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
 #define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
 
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK				    0xffff
+
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
@@ -69,12 +74,15 @@ union cpuid10_edx {
 
 /* Instr_Retired.Any: */
 #define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
 
 /* CPU_CLK_Unhalted.Core: */
 #define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
 
 /* CPU_CLK_Unhalted.Ref: */
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 86b2fdd..da46eca 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -26,6 +26,7 @@ static bool perf_counters_initialized __read_mostly;
  */
 static int nr_counters_generic __read_mostly;
 static u64 perf_counter_mask __read_mostly;
+static u64 counter_value_mask __read_mostly;
 
 static int nr_counters_fixed __read_mostly;
 
@@ -120,9 +121,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 			hwc->nmi = 1;
 	}
 
-	hwc->config_base	= MSR_ARCH_PERFMON_EVENTSEL0;
-	hwc->counter_base	= MSR_ARCH_PERFMON_PERFCTR0;
-
 	hwc->irq_period		= hw_event->irq_period;
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
@@ -184,15 +182,33 @@ void hw_perf_restore(u64 ctrl)
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
 static inline void
+__pmc_fixed_disable(struct perf_counter *counter,
+		    struct hw_perf_counter *hwc, unsigned int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+	int err;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
 __pmc_generic_disable(struct perf_counter *counter,
 			   struct hw_perf_counter *hwc, unsigned int idx)
 {
 	int err;
 
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
+		return __pmc_fixed_disable(counter, hwc, idx);
+
 	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
 }
 
-static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]);
+static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
@@ -202,8 +218,9 @@ static void
 __hw_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
-	s32 left = atomic64_read(&hwc->period_left);
+	s64 left = atomic64_read(&hwc->period_left);
 	s32 period = hwc->irq_period;
+	int err;
 
 	/*
 	 * If we are way outside a reasoable range then just skip forward:
@@ -224,21 +241,64 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 	 * The hw counter starts counting from this counter offset,
 	 * mark it to be able to extra future deltas:
 	 */
-	atomic64_set(&hwc->prev_count, (u64)(s64)-left);
+	atomic64_set(&hwc->prev_count, (u64)-left);
 
-	wrmsr(hwc->counter_base + idx, -left, 0);
+	err = checking_wrmsrl(hwc->counter_base + idx,
+			     (u64)(-left) & counter_value_mask);
+}
+
+static inline void
+__pmc_fixed_enable(struct perf_counter *counter,
+		   struct hw_perf_counter *hwc, unsigned int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8) and ring-3 counting (0x2),
+	 * and enable ring-0 counting if allowed:
+	 */
+	bits = 0x8ULL | 0x2ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
 }
 
 static void
 __pmc_generic_enable(struct perf_counter *counter,
 			  struct hw_perf_counter *hwc, int idx)
 {
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
+		return __pmc_fixed_enable(counter, hwc, idx);
+
 	wrmsr(hwc->config_base + idx,
 	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
-static int fixed_mode_idx(struct hw_perf_counter *hwc)
+static int
+fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 {
+	unsigned int event;
+
+	if (unlikely(hwc->nmi))
+		return -1;
+
+	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_INSTRUCTIONS]))
+		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
+	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_CPU_CYCLES]))
+		return X86_PMC_IDX_FIXED_CPU_CYCLES;
+	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_BUS_CYCLES]))
+		return X86_PMC_IDX_FIXED_BUS_CYCLES;
+
 	return -1;
 }
 
@@ -249,16 +309,39 @@ static int pmc_generic_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
+	int idx;
 
-	/* Try to get the previous counter again */
-	if (test_and_set_bit(idx, cpuc->used)) {
-		idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
-		if (idx == nr_counters_generic)
-			return -EAGAIN;
+	idx = fixed_mode_idx(counter, hwc);
+	if (idx >= 0) {
+		/*
+		 * Try to get the fixed counter, if that is already taken
+		 * then try to get a generic counter:
+		 */
+		if (test_and_set_bit(idx, cpuc->used))
+			goto try_generic;
 
-		set_bit(idx, cpuc->used);
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->counter_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
 		hwc->idx = idx;
+	} else {
+		idx = hwc->idx;
+		/* Try to get the previous generic counter again */
+		if (test_and_set_bit(idx, cpuc->used)) {
+try_generic:
+			idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
+			if (idx == nr_counters_generic)
+				return -EAGAIN;
+
+			set_bit(idx, cpuc->used);
+			hwc->idx = idx;
+		}
+		hwc->config_base  = MSR_ARCH_PERFMON_EVENTSEL0;
+		hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
 	}
 
 	perf_counters_lapic_init(hwc->nmi);
@@ -266,6 +349,10 @@ static int pmc_generic_enable(struct perf_counter *counter)
 	__pmc_generic_disable(counter, hwc, idx);
 
 	cpuc->counters[idx] = counter;
+	/*
+	 * Make it visible before enabling the hw:
+	 */
+	smp_wmb();
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
 	__pmc_generic_enable(counter, hwc, idx);
@@ -275,7 +362,7 @@ static int pmc_generic_enable(struct perf_counter *counter)
 
 void perf_counter_print_debug(void)
 {
-	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left;
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
 	struct cpu_hw_counters *cpuc;
 	int cpu, idx;
 
@@ -290,11 +377,13 @@ void perf_counter_print_debug(void)
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+	rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
 
 	printk(KERN_INFO "\n");
 	printk(KERN_INFO "CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
 	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
+	printk(KERN_INFO "CPU#%d: fixed:      %016llx\n", cpu, fixed);
 	printk(KERN_INFO "CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
@@ -303,13 +392,19 @@ void perf_counter_print_debug(void)
 
 		prev_left = per_cpu(prev_left[idx], cpu);
 
-		printk(KERN_INFO "CPU#%d: PMC%d ctrl:  %016llx\n",
+		printk(KERN_INFO "CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
 			cpu, idx, pmc_ctrl);
-		printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n",
+		printk(KERN_INFO "CPU#%d:   gen-PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
-		printk(KERN_INFO "CPU#%d: PMC%d left:  %016llx\n",
+		printk(KERN_INFO "CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
+	for (idx = 0; idx < nr_counters_fixed; idx++) {
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+		printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+	}
 	local_irq_enable();
 }
 
@@ -323,6 +418,11 @@ static void pmc_generic_disable(struct perf_counter *counter)
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
+	/*
+	 * Make sure the cleared pointer becomes visible before we
+	 * (potentially) free the counter:
+	 */
+	smp_wmb();
 
 	/*
 	 * Drain the remaining delta count out of a counter
@@ -353,14 +453,11 @@ static void perf_save_and_restart(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	int idx = hwc->idx;
-	u64 pmc_ctrl;
-
-	rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 
 	x86_perf_counter_update(counter, hwc, idx);
 	__hw_perf_counter_set_period(counter, hwc, idx);
 
-	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		__pmc_generic_enable(counter, hwc, idx);
 }
 
@@ -373,6 +470,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 	 * Store sibling timestamps (if any):
 	 */
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
+
 		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 		perf_store_irq_data(sibling, counter->hw_event.type);
 		perf_store_irq_data(sibling, atomic64_read(&counter->count));
@@ -403,7 +501,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 
 again:
 	ack = status;
-	for_each_bit(bit, (unsigned long *) &status, nr_counters_generic) {
+	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_counter *counter = cpuc->counters[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
@@ -561,6 +659,9 @@ void __init init_hw_perf_counters(void)
 	perf_max_counters = nr_counters_generic;
 
 	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
+	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
+	printk(KERN_INFO "... value mask:      %016Lx\n", counter_value_mask);
+
 	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);
 
 	nr_counters_fixed = edx.split.num_counters_fixed;
-- 
cgit v0.10.2


From e44aef58ecfbe061eb4c53b939bcc35fb1bee82d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 25 Dec 2008 09:02:11 +0100
Subject: perfcounters: include asm/perf_counter.h only if
 CONFIG_PERF_COUNTERS=y

Impact: build fix on ia64

KOSAKI Motohiro reported that -tip doesnt build on ia64 because
asm/perf_counter.h only exists on x86 for now. Fix it.

Reported-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index ec77d16..cc3a75a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,7 +14,10 @@
 #define _LINUX_PERF_COUNTER_H
 
 #include <asm/atomic.h>
-#include <asm/perf_counter.h>
+
+#ifdef CONFIG_PERF_COUNTERS
+# include <asm/perf_counter.h>
+#endif
 
 #include <linux/list.h>
 #include <linux/mutex.h>
-- 
cgit v0.10.2


From 01ea1ccaa24dea3552e103be13b7897211607a8b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 26 Dec 2008 21:05:06 -0800
Subject: perf_counter: more barrier in blank weak function

Impact: fix panic possible panic

Some versions of GCC inline the weak global function if it's empty.
Add a barrier() to work it around.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d7a79f3..37f7716 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -45,8 +45,8 @@ hw_perf_counter_init(struct perf_counter *counter)
 }
 
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
-void __weak hw_perf_restore(u64 ctrl)		{ }
-void __weak hw_perf_counter_setup(void)		{ }
+void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
+void __weak hw_perf_counter_setup(void)		{ barrier(); }
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-- 
cgit v0.10.2


From 2b583d8bc8d7105b58d7481a4a0ceb718dac49c6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 27 Dec 2008 19:15:43 +0530
Subject: x86: perf_counter remove unwanted hw_perf_enable_all

Impact: clean, reduce kernel size a bit, avoid sparse warnings

Fixes sparse warnings:

 arch/x86/kernel/cpu/perf_counter.c:153:6: warning: symbol 'hw_perf_enable_all' was not declared. Should it be static?
 arch/x86/kernel/cpu/perf_counter.c:279:3: warning: returning void-valued expression
 arch/x86/kernel/cpu/perf_counter.c:206:3: warning: returning void-valued expression
 arch/x86/kernel/cpu/perf_counter.c:206:3: warning: returning void-valued expression

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index da46eca..9376771 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -150,14 +150,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	return 0;
 }
 
-void hw_perf_enable_all(void)
-{
-	if (unlikely(!perf_counters_initialized))
-		return;
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask);
-}
-
 u64 hw_perf_save_disable(void)
 {
 	u64 ctrl;
@@ -200,12 +192,10 @@ static inline void
 __pmc_generic_disable(struct perf_counter *counter,
 			   struct hw_perf_counter *hwc, unsigned int idx)
 {
-	int err;
-
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
-		return __pmc_fixed_disable(counter, hwc, idx);
-
-	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
+		__pmc_fixed_disable(counter, hwc, idx);
+	else
+		wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
 }
 
 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
@@ -276,10 +266,10 @@ __pmc_generic_enable(struct perf_counter *counter,
 			  struct hw_perf_counter *hwc, int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
-		return __pmc_fixed_enable(counter, hwc, idx);
-
-	wrmsr(hwc->config_base + idx,
-	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+		__pmc_fixed_enable(counter, hwc, idx);
+	else
+		wrmsr(hwc->config_base + idx,
+		      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
 static int
-- 
cgit v0.10.2


From ff6f05416ece2caec1a7a1f8180d6598e0ab9272 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:19:25 +1100
Subject: perf_counter: Fix return value from dummy hw_perf_counter_init

Impact: fix oops-causing bug

Currently, if you try to use perf_counters on an architecture that has
no hardware support, and you select an event that doesn't map to any of
the defined software counters, you get an oops rather than an error.
This is because the dummy hw_perf_counter_init returns ERR_PTR(-EINVAL)
but the caller (perf_counter_alloc) only tests for NULL.

This makes the dummy hw_perf_counter_init return NULL instead.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 37f7716..4be1a8d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -41,7 +41,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
 extern __weak const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter)
 {
-	return ERR_PTR(-EINVAL);
+	return NULL;
 }
 
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
-- 
cgit v0.10.2


From 9abf8a08bc8f18a3b125f834f00e2e71b49c15d2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:26:43 +1100
Subject: perf_counter: Fix the cpu_clock software counter

Impact: bug fix

Currently if you do (e.g.) timec -e -1 ls, it will report 0 for the
value of the cpu_clock counter.  The reason is that the core assumes
that a counter's count field is up-to-date when the counter is inactive,
and doesn't call the counter's read function.  However, the cpu_clock
counter code only updates the count in the read function.

This fixes it by making both the read and disable functions update the
count.  It also makes the counter ignore time passing while the counter
is disabled, by making the enable function update the hw.prev_count field.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4be1a8d..b7a027a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -928,18 +928,32 @@ static const struct file_operations perf_fops = {
 
 static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 {
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
 	return 0;
 }
 
+static void cpu_clock_perf_counter_update(struct perf_counter *counter)
+{
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = atomic64_read(&counter->hw.prev_count);
+	atomic64_set(&counter->hw.prev_count, now);
+	atomic64_add(now - prev, &counter->count);
+}
+
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
+	cpu_clock_perf_counter_update(counter);
 }
 
 static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 {
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&counter->count, cpu_clock(cpu));
+	cpu_clock_perf_counter_update(counter);
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
-- 
cgit v0.10.2


From 3cbed429a9ccdb7a243f733b1056fe5c39e9004c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:43:42 +1100
Subject: perf_counter: Add optional hw_perf_group_sched_in arch function

Impact: extend perf_counter infrastructure

This adds an optional hw_perf_group_sched_in() arch function that enables
a whole group of counters in one go.  It returns 1 if it added the group
successfully, 0 if it did nothing (and therefore the core needs to add
the counters individually), or a negative number if an error occurred.
It should add all the counters and enable any software counters in the
group, or else add none of them and return an error.

There are a couple of related changes/improvements in the group handling
here:

* As an optimization, group_sched_out() and group_sched_in() now check the
  state of the group leader, and do nothing if the leader is not active
  or disabled.

* We now call hw_perf_save_disable/hw_perf_restore around the complete
  set of counter enable/disable calls in __perf_counter_sched_in/out,
  to give the arch code the opportunity to defer updating the hardware
  state until the hw_perf_restore call if it wants.

* We no longer stop adding groups after we get to a group that has more
  than one counter.  We will ultimately add an option for a group to be
  exclusive.  The current code doesn't really implement exclusive groups
  anyway, since a group could end up going on with other counters that
  get added before it.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index cc3a75a..b21d1ea 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -236,6 +236,9 @@ extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern int perf_counter_task_disable(void);
 extern int perf_counter_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu);
 
 #else
 static inline void
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b7a027a..9ad11e4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -47,6 +47,12 @@ hw_perf_counter_init(struct perf_counter *counter)
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
 void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
 void __weak hw_perf_counter_setup(void)		{ barrier(); }
+int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu)
+{
+	return 0;
+}
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
@@ -341,6 +347,9 @@ group_sched_out(struct perf_counter *group_counter,
 {
 	struct perf_counter *counter;
 
+	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
 	counter_sched_out(group_counter, cpuctx, ctx);
 
 	/*
@@ -354,15 +363,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
 	struct perf_counter *counter;
+	u64 flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
 
 	spin_lock(&ctx->lock);
+	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
+	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
 }
 
@@ -402,7 +414,14 @@ group_sched_in(struct perf_counter *group_counter,
 	       int cpu)
 {
 	struct perf_counter *counter, *partial_group;
-	int ret = 0;
+	int ret;
+
+	if (group_counter->state == PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
 
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
@@ -415,10 +434,9 @@ group_sched_in(struct perf_counter *group_counter,
 			partial_group = counter;
 			goto group_error;
 		}
-		ret = -EAGAIN;
 	}
 
-	return ret;
+	return 0;
 
 group_error:
 	/*
@@ -440,11 +458,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
 {
 	struct perf_counter *counter;
+	u64 flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
 
 	spin_lock(&ctx->lock);
+	flags = hw_perf_save_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
@@ -454,12 +474,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			continue;
 
 		/*
-		 * If we scheduled in a group atomically and
-		 * exclusively, break out:
+		 * If we scheduled in a group atomically and exclusively,
+		 * or if this group can't go on, break out:
 		 */
 		if (group_sched_in(counter, cpuctx, ctx, cpu))
 			break;
 	}
+	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
 }
 
-- 
cgit v0.10.2


From 4eb96fcfe07b7f2a05577e57533840f8e26bea53 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 17:24:34 +1100
Subject: perf_counter: Add dummy perf_counter_print_debug function

Impact: minimize requirements on architectures

Currently, an architecture just enabling CONFIG_PERF_COUNTERS but not
providing any extra functions will fail to build with
perf_counter_print_debug being undefined, since we don't provide an
empty dummy definition like we do with the hw_perf_* functions.

This provides an empty dummy perf_counter_print_debug() to make it
easier for architectures to turn on CONFIG_PERF_COUNTERS.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 9ad11e4..4c0dccb 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -54,6 +54,8 @@ int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 	return 0;
 }
 
+void __weak perf_counter_print_debug(void)	{ }
+
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
-- 
cgit v0.10.2


From d662ed26734473d4cb5f3d78cebfec8f9126e97c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 17:01:53 +1100
Subject: powerpc/perf_counter: Add perf_counter system call on powerpc

... with an empty/dummy asm/perf_counter.h so it builds.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 0000000..59530ae
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,10 @@
+/*
+ * Performance counter support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 803def23..da300c4 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,3 +322,4 @@ SYSCALL_SPU(epoll_create1)
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
+SYSCALL(perf_counter_open)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index e07d0c7..7cef5af 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,10 +341,11 @@
 #define __NR_dup3		316
 #define __NR_pipe2		317
 #define __NR_inotify_init1	318
+#define __NR_perf_counter_open	319
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		319
+#define __NR_syscalls		320
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 3d0c776..94dd1fb 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
 config PPC64
 	bool "64-bit kernel"
 	default n
+	select HAVE_PERF_COUNTERS
 	help
 	  This option selects whether a 32-bit or a 64-bit kernel
 	  will be built.
-- 
cgit v0.10.2


From 93a6d3ce6962044fe9badf528fed46b455d58292 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:52:19 +1100
Subject: powerpc: Provide a way to defer perf counter work until interrupts
 are enabled

Because 64-bit powerpc uses lazy (soft) interrupt disabling, it is
possible for a performance monitor exception to come in when the
kernel thinks interrupts are disabled (i.e. when they are
soft-disabled but hard-enabled).  In such a situation the performance
monitor exception handler might have some processing to do (such as
process wakeups) which can't be done in what is effectively an NMI
handler.

This provides a way to defer that work until interrupts get enabled,
either in raw_local_irq_restore() or by returning from an interrupt
handler to code that had interrupts enabled.  We have a per-processor
flag that indicates that there is work pending to do when interrupts
subsequently get re-enabled.  This flag is checked in the interrupt
return path and in raw_local_irq_restore(), and if it is set,
perf_counter_do_pending() is called to do the pending work.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index f75a5fc..e10f151 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,36 @@ static inline int irqs_disabled_flags(unsigned long flags)
  */
 struct hw_interrupt_type;
 
+#ifdef CONFIG_PERF_COUNTERS
+static inline unsigned long get_perf_counter_pending(void)
+{
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, perf_counter_pending)));
+	return x;
+}
+
+static inline void set_perf_counter_pending(int x)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (x),
+		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+extern void perf_counter_do_pending(void);
+
+#else
+
+static inline unsigned long get_perf_counter_pending(void)
+{
+	return 0;
+}
+
+static inline void set_perf_counter_pending(int x) {}
+static inline void perf_counter_do_pending(void) {}
+#endif /* CONFIG_PERF_COUNTERS */
+
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3ae..6ef0557 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
+	u8 perf_counter_pending;	/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d..cea4629 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -127,6 +127,7 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
+	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 383ed6e..f30b4e5 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
+#ifdef CONFIG_PERF_COUNTERS
+	/* check paca->perf_counter_pending if we're enabling ints */
+	lbz	r3,PACAPERFPEND(r13)
+	and.	r3,r3,r5
+	beq	27f
+	bl	.perf_counter_do_pending
+27:
+#endif /* CONFIG_PERF_COUNTERS */
+
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0..4efb886 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,6 +104,13 @@ static inline notrace void set_soft_enabled(unsigned long enable)
 	: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
+#ifdef CONFIG_PERF_COUNTERS
+notrace void __weak perf_counter_do_pending(void)
+{
+	set_perf_counter_pending(0);
+}
+#endif
+
 notrace void raw_local_irq_restore(unsigned long en)
 {
 	/*
@@ -135,6 +142,9 @@ notrace void raw_local_irq_restore(unsigned long en)
 			iseries_handle_interrupts();
 	}
 
+	if (get_perf_counter_pending())
+		perf_counter_do_pending();
+
 	/*
 	 * if (get_paca()->hard_enabled) return;
 	 * But again we need to take care that gcc gets hard_enabled directly
-- 
cgit v0.10.2


From 4574910e5087085a1f330ff8373cee4503f5c77c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 20:21:55 +1100
Subject: powerpc/perf_counter: Add generic support for POWER-family PMU
 hardware

This provides the architecture-specific functions needed to access
PMU hardware on the 64-bit PowerPC processors.  It has been designed
for the IBM POWER family (POWER 4/4+/5/5+/6 and PPC970) but will
hopefully also suit other 64-bit PowerPC machines (although probably
not Cell given how different it is in this area).  This doesn't
include back-ends for any specific processors.

This implements a system which allows back-ends to express the
constraints that their hardware has on what events can be counted
simultaneously.  The constraints are expressed as a 64-bit mask +
64-bit value for each event, and the encoding is capable of
expressing the constraints arising from having a set of multiplexers
feeding an event bus, with some events being available through
multiple multiplexer settings, such as we get on POWER4 and PPC970.
Furthermore, the back-end can supply alternative event codes for
each event, and the constraint checking code will try all possible
combinations of alternative event codes to try to find a combination
that will fit.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index 59530ae..9d7ff6d 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -8,3 +8,65 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <linux/types.h>
+
+#define MAX_HWCOUNTERS		8
+#define MAX_EVENT_ALTERNATIVES	8
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+	int	n_counter;
+	int	max_alternatives;
+	u64	add_fields;
+	u64	test_adder;
+	int	(*compute_mmcr)(unsigned int events[], int n_ev,
+				unsigned int hwc[], u64 mmcr[]);
+	int	(*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
+	int	(*get_alternatives)(unsigned int event, unsigned int alt[]);
+	void	(*disable_pmc)(unsigned int pmc, u64 mmcr[]);
+	int	n_generic;
+	int	*generic_events;
+};
+
+extern struct power_pmu *ppmu;
+
+/*
+ * The power_pmu.get_constraint function returns a 64-bit value and
+ * a 64-bit mask that express the constraints between this event and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event.  For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits.  The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class.  A field of k bits can be used for
+ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play.  The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes.  (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.)  For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits.  The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event's class set.  The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1308a86..fde190b 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 0000000..c7d4c29
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,754 @@
+/*
+ * Performance counter support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_counter.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+
+struct cpu_hw_counters {
+	int n_counters;
+	int n_percpu;
+	int disabled;
+	int n_added;
+	struct perf_counter *counter[MAX_HWCOUNTERS];
+	unsigned int events[MAX_HWCOUNTERS];
+	u64 mmcr[3];
+};
+DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+
+struct power_pmu *ppmu;
+
+void perf_counter_print_debug(void)
+{
+}
+
+/*
+ * Return 1 for a software counter, 0 for a hardware counter
+ */
+static inline int is_software_counter(struct perf_counter *counter)
+{
+	return !counter->hw_event.raw && counter->hw_event.type < 0;
+}
+
+/*
+ * Read one performance monitor counter (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 1:
+		val = mfspr(SPRN_PMC1);
+		break;
+	case 2:
+		val = mfspr(SPRN_PMC2);
+		break;
+	case 3:
+		val = mfspr(SPRN_PMC3);
+		break;
+	case 4:
+		val = mfspr(SPRN_PMC4);
+		break;
+	case 5:
+		val = mfspr(SPRN_PMC5);
+		break;
+	case 6:
+		val = mfspr(SPRN_PMC6);
+		break;
+	case 7:
+		val = mfspr(SPRN_PMC7);
+		break;
+	case 8:
+		val = mfspr(SPRN_PMC8);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 1:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC6, val);
+		break;
+	case 7:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 8:
+		mtspr(SPRN_PMC8, val);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event[].
+ */
+static int power_check_constraints(unsigned int event[], int n_ev)
+{
+	u64 mask, value, nv;
+	unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
+	int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
+	int i, j;
+	u64 addf = ppmu->add_fields;
+	u64 tadd = ppmu->test_adder;
+
+	if (n_ev > ppmu->n_counter)
+		return -1;
+
+	/* First see if the events will go on as-is */
+	for (i = 0; i < n_ev; ++i) {
+		alternatives[i][0] = event[i];
+		if (ppmu->get_constraint(event[i], &amasks[i][0],
+					 &avalues[i][0]))
+			return -1;
+		choice[i] = 0;
+	}
+	value = mask = 0;
+	for (i = 0; i < n_ev; ++i) {
+		nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
+		if ((((nv + tadd) ^ value) & mask) != 0 ||
+		    (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
+			break;
+		value = nv;
+		mask |= amasks[i][0];
+	}
+	if (i == n_ev)
+		return 0;	/* all OK */
+
+	/* doesn't work, gather alternatives... */
+	if (!ppmu->get_alternatives)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
+		for (j = 1; j < n_alt[i]; ++j)
+			ppmu->get_constraint(alternatives[i][j],
+					     &amasks[i][j], &avalues[i][j]);
+	}
+
+	/* enumerate all possibilities and see if any will work */
+	i = 0;
+	j = -1;
+	value = mask = nv = 0;
+	while (i < n_ev) {
+		if (j >= 0) {
+			/* we're backtracking, restore context */
+			value = svalues[i];
+			mask = smasks[i];
+			j = choice[i];
+		}
+		/*
+		 * See if any alternative k for event i,
+		 * where k > j, will satisfy the constraints.
+		 */
+		while (++j < n_alt[i]) {
+			nv = (value | avalues[i][j]) +
+				(value & avalues[i][j] & addf);
+			if ((((nv + tadd) ^ value) & mask) == 0 &&
+			    (((nv + tadd) ^ avalues[i][j])
+			     & amasks[i][j]) == 0)
+				break;
+		}
+		if (j >= n_alt[i]) {
+			/*
+			 * No feasible alternative, backtrack
+			 * to event i-1 and continue enumerating its
+			 * alternatives from where we got up to.
+			 */
+			if (--i < 0)
+				return -1;
+		} else {
+			/*
+			 * Found a feasible alternative for event i,
+			 * remember where we got up to with this event,
+			 * go on to the next event, and start with
+			 * the first alternative for it.
+			 */
+			choice[i] = j;
+			svalues[i] = value;
+			smasks[i] = mask;
+			value = nv;
+			mask |= amasks[i][j];
+			++i;
+			j = -1;
+		}
+	}
+
+	/* OK, we have a feasible combination, tell the caller the solution */
+	for (i = 0; i < n_ev; ++i)
+		event[i] = alternatives[i][choice[i]];
+	return 0;
+}
+
+static void power_perf_read(struct perf_counter *counter)
+{
+	long val, delta, prev;
+
+	if (!counter->hw.idx)
+		return;
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = atomic64_read(&counter->hw.prev_count);
+		barrier();
+		val = read_pmc(counter->hw.idx);
+	} while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
+
+	/* The counters are only 32 bits wide */
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &counter->hw.period_left);
+}
+
+/*
+ * Disable all counters to prevent PMU interrupts and to allow
+ * counters to be added or removed.
+ */
+u64 hw_perf_save_disable(void)
+{
+	struct cpu_hw_counters *cpuhw;
+	unsigned long ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+
+	ret = cpuhw->disabled;
+	if (!ret) {
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		/*
+		 * Set the 'freeze counters' bit.
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the counters
+		 * before we return.
+		 */
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+		mb();
+	}
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Re-enable all counters if disable == 0.
+ * If we were previously disabled and counters were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_restore(u64 disable)
+{
+	struct perf_counter *counter;
+	struct cpu_hw_counters *cpuhw;
+	unsigned long flags;
+	long i;
+	unsigned long val;
+	s64 left;
+	unsigned int hwc_index[MAX_HWCOUNTERS];
+
+	if (disable)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	cpuhw->disabled = 0;
+
+	/*
+	 * If we didn't change anything, or only removed counters,
+	 * no need to recalculate MMCR* settings and reset the PMCs.
+	 * Just reenable the PMU with the current MMCR* settings
+	 * (possibly updated for removal of counters).
+	 */
+	if (!cpuhw->n_added) {
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+		mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+		goto out;
+	}
+
+	/*
+	 * Compute MMCR* values for the new set of counters
+	 */
+	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
+			       cpuhw->mmcr)) {
+		/* shouldn't ever get here */
+		printk(KERN_ERR "oops compute_mmcr failed\n");
+		goto out;
+	}
+
+	/*
+	 * Write the new configuration to MMCR* with the freeze
+	 * bit set and set the hardware counters to their initial values.
+	 * Then unfreeze the counters.
+	 */
+	mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+				| MMCR0_FC);
+
+	/*
+	 * Read off any pre-existing counters that need to move
+	 * to another PMC.
+	 */
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
+			power_perf_read(counter);
+			write_pmc(counter->hw.idx, 0);
+			counter->hw.idx = 0;
+		}
+	}
+
+	/*
+	 * Initialize the PMCs for all the new and moved counters.
+	 */
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		if (counter->hw.idx)
+			continue;
+		val = 0;
+		if (counter->hw_event.irq_period) {
+			left = atomic64_read(&counter->hw.period_left);
+			if (left < 0x80000000L)
+				val = 0x80000000L - left;
+		}
+		atomic64_set(&counter->hw.prev_count, val);
+		counter->hw.idx = hwc_index[i] + 1;
+		write_pmc(counter->hw.idx, val);
+	}
+	mb();
+	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_counter *group, int max_count,
+			  struct perf_counter *ctrs[], unsigned int *events)
+{
+	int n = 0;
+	struct perf_counter *counter;
+
+	if (!is_software_counter(group)) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		events[n++] = group->hw.config;
+	}
+	list_for_each_entry(counter, &group->sibling_list, list_entry) {
+		if (!is_software_counter(counter) &&
+		    counter->state != PERF_COUNTER_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = counter;
+			events[n++] = counter->hw.config;
+		}
+	}
+	return n;
+}
+
+static void counter_sched_in(struct perf_counter *counter, int cpu)
+{
+	counter->state = PERF_COUNTER_STATE_ACTIVE;
+	counter->oncpu = cpu;
+	if (is_software_counter(counter))
+		counter->hw_ops->enable(counter);
+}
+
+/*
+ * Called to enable a whole group of counters.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu)
+{
+	struct cpu_hw_counters *cpuhw;
+	long i, n, n0;
+	struct perf_counter *sub;
+
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	n0 = cpuhw->n_counters;
+	n = collect_events(group_leader, ppmu->n_counter - n0,
+			   &cpuhw->counter[n0], &cpuhw->events[n0]);
+	if (n < 0)
+		return -EAGAIN;
+	if (power_check_constraints(cpuhw->events, n + n0))
+		return -EAGAIN;
+	cpuhw->n_counters = n0 + n;
+	cpuhw->n_added += n;
+
+	/*
+	 * OK, this group can go on; update counter states etc.,
+	 * and enable any software counters
+	 */
+	for (i = n0; i < n0 + n; ++i)
+		cpuhw->counter[i]->hw.config = cpuhw->events[i];
+	n = 1;
+	counter_sched_in(group_leader, cpu);
+	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+		if (sub->state != PERF_COUNTER_STATE_OFF) {
+			counter_sched_in(sub, cpu);
+			++n;
+		}
+	}
+	cpuctx->active_oncpu += n;
+	ctx->nr_active += n;
+
+	return 1;
+}
+
+/*
+ * Add a counter to the PMU.
+ * If all counters are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_restore to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_perf_enable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuhw;
+	unsigned long flags;
+	u64 pmudis;
+	int n0;
+	int ret = -EAGAIN;
+
+	local_irq_save(flags);
+	pmudis = hw_perf_save_disable();
+
+	/*
+	 * Add the counter to the list (if there is room)
+	 * and check whether the total set is still feasible.
+	 */
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	n0 = cpuhw->n_counters;
+	if (n0 >= ppmu->n_counter)
+		goto out;
+	cpuhw->counter[n0] = counter;
+	cpuhw->events[n0] = counter->hw.config;
+	if (power_check_constraints(cpuhw->events, n0 + 1))
+		goto out;
+
+	counter->hw.config = cpuhw->events[n0];
+	++cpuhw->n_counters;
+	++cpuhw->n_added;
+
+	ret = 0;
+ out:
+	hw_perf_restore(pmudis);
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Remove a counter from the PMU.
+ */
+static void power_perf_disable(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuhw;
+	long i;
+	u64 pmudis;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	pmudis = hw_perf_save_disable();
+
+	power_perf_read(counter);
+
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		if (counter == cpuhw->counter[i]) {
+			while (++i < cpuhw->n_counters)
+				cpuhw->counter[i-1] = cpuhw->counter[i];
+			--cpuhw->n_counters;
+			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
+			write_pmc(counter->hw.idx, 0);
+			counter->hw.idx = 0;
+			break;
+		}
+	}
+	if (cpuhw->n_counters == 0) {
+		/* disable exceptions if no counters are running */
+		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+	}
+
+	hw_perf_restore(pmudis);
+	local_irq_restore(flags);
+}
+
+struct hw_perf_counter_ops power_perf_ops = {
+	.enable = power_perf_enable,
+	.disable = power_perf_disable,
+	.read = power_perf_read
+};
+
+const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
+{
+	unsigned long ev;
+	struct perf_counter *ctrs[MAX_HWCOUNTERS];
+	unsigned int events[MAX_HWCOUNTERS];
+	int n;
+
+	if (!ppmu)
+		return NULL;
+	if ((s64)counter->hw_event.irq_period < 0)
+		return NULL;
+	ev = counter->hw_event.type;
+	if (!counter->hw_event.raw) {
+		if (ev >= ppmu->n_generic ||
+		    ppmu->generic_events[ev] == 0)
+			return NULL;
+		ev = ppmu->generic_events[ev];
+	}
+	counter->hw.config_base = ev;
+	counter->hw.idx = 0;
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware counters in the group.  We assume the counter
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (counter->group_leader != counter) {
+		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
+				   ctrs, events);
+		if (n < 0)
+			return NULL;
+	}
+	events[n++] = ev;
+	if (power_check_constraints(events, n))
+		return NULL;
+
+	counter->hw.config = events[n - 1];
+	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	return &power_perf_ops;
+}
+
+/*
+ * Handle wakeups.
+ */
+void perf_counter_do_pending(void)
+{
+	int i;
+	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
+	struct perf_counter *counter;
+
+	set_perf_counter_pending(0);
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		if (counter && counter->wakeup_pending) {
+			counter->wakeup_pending = 0;
+			wake_up(&counter->waitq);
+		}
+	}
+}
+
+/*
+ * Record data for an irq counter.
+ * This function was lifted from the x86 code; maybe it should
+ * go in the core?
+ */
+static void perf_store_irq_data(struct perf_counter *counter, u64 data)
+{
+	struct perf_data *irqdata = counter->irqdata;
+
+	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+		irqdata->overrun++;
+	} else {
+		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+		*p = data;
+		irqdata->len += sizeof(u64);
+	}
+}
+
+/*
+ * Record all the values of the counters in a group
+ */
+static void perf_handle_group(struct perf_counter *counter)
+{
+	struct perf_counter *leader, *sub;
+
+	leader = counter->group_leader;
+	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		if (sub != counter)
+			sub->hw_ops->read(sub);
+		perf_store_irq_data(counter, sub->hw_event.type);
+		perf_store_irq_data(counter, atomic64_read(&sub->count));
+	}
+}
+
+/*
+ * A counter has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_counter *counter, long val,
+			       struct pt_regs *regs)
+{
+	s64 prev, delta, left;
+	int record = 0;
+
+	/* we don't have to worry about interrupts here */
+	prev = atomic64_read(&counter->hw.prev_count);
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &counter->count);
+
+	/*
+	 * See if the total period for this counter has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = atomic64_read(&counter->hw.period_left) - delta;
+	if (counter->hw_event.irq_period) {
+		if (left <= 0) {
+			left += counter->hw_event.irq_period;
+			if (left <= 0)
+				left = counter->hw_event.irq_period;
+			record = 1;
+		}
+		if (left < 0x80000000L)
+			val = 0x80000000L - left;
+	}
+	write_pmc(counter->hw.idx, val);
+	atomic64_set(&counter->hw.prev_count, val);
+	atomic64_set(&counter->hw.period_left, left);
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		switch (counter->hw_event.record_type) {
+		case PERF_RECORD_SIMPLE:
+			break;
+		case PERF_RECORD_IRQ:
+			perf_store_irq_data(counter, instruction_pointer(regs));
+			counter->wakeup_pending = 1;
+			break;
+		case PERF_RECORD_GROUP:
+			perf_handle_group(counter);
+			counter->wakeup_pending = 1;
+			break;
+		}
+	}
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_counter_interrupt(struct pt_regs *regs)
+{
+	int i;
+	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
+	struct perf_counter *counter;
+	long val;
+	int need_wakeup = 0, found = 0;
+
+	for (i = 0; i < cpuhw->n_counters; ++i) {
+		counter = cpuhw->counter[i];
+		val = read_pmc(counter->hw.idx);
+		if ((int)val < 0) {
+			/* counter has overflowed */
+			found = 1;
+			record_and_restart(counter, val, regs);
+			if (counter->wakeup_pending)
+				need_wakeup = 1;
+		}
+	}
+
+	/*
+	 * In case we didn't find and reset the counter that caused
+	 * the interrupt, scan all counters and reset any that are
+	 * negative, to avoid getting continual interrupts.
+	 * Any that we processed in the previous loop will not be negative.
+	 */
+	if (!found) {
+		for (i = 0; i < ppmu->n_counter; ++i) {
+			val = read_pmc(i + 1);
+			if ((int)val < 0)
+				write_pmc(i + 1, 0);
+		}
+	}
+
+	/*
+	 * Reset MMCR0 to its normal value.  This will set PMXE and
+	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
+	 * and thus allow interrupts to occur again.
+	 * XXX might want to use MSR.PM to keep the counters frozen until
+	 * we get back out of this interrupt.
+	 */
+	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+
+	/*
+	 * If we need a wakeup, check whether interrupts were soft-enabled
+	 * when we took the interrupt.  If they were, we can wake stuff up
+	 * immediately; otherwise we'll have to set a flag and do the
+	 * wakeup when interrupts get soft-enabled.
+	 */
+	if (need_wakeup) {
+		if (regs->softe) {
+			irq_enter();
+			perf_counter_do_pending();
+			irq_exit();
+		} else {
+			set_perf_counter_pending(1);
+		}
+	}
+}
+
+static int init_perf_counters(void)
+{
+	if (reserve_pmc_hardware(perf_counter_interrupt)) {
+		printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+arch_initcall(init_perf_counters);
-- 
cgit v0.10.2


From 16b067993dee3dfde61b20027e0b168dc06201ee Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 10 Jan 2009 16:34:07 +1100
Subject: powerpc/perf_counter: Add support for PPC970 family

This adds the back-end for the PMU on the PPC970 family.

The PPC970 allows events from the ISU to be selected in two different
ways.  Rather than use alternative event codes to express this, we
instead use a single encoding for ISU events and express the
resulting constraint (that you can't select events from all three
of FPU/IFU/VPU, ISU and IDU/STS at the same time, since they all come
in through only 2 multiplexers) using a NAND constraint field, and
work out which multiplexer is used for ISU events at compute_mmcr
time.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fde190b..45798f6 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index c7d4c29..5561ecb 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -741,13 +741,26 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	}
 }
 
+extern struct power_pmu ppc970_pmu;
+
 static int init_perf_counters(void)
 {
+	unsigned long pvr;
+
 	if (reserve_pmc_hardware(perf_counter_interrupt)) {
 		printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
 		return -EBUSY;
 	}
 
+	/* XXX should get this from cputable */
+	pvr = mfspr(SPRN_PVR);
+	switch (PVR_VER(pvr)) {
+	case PV_970:
+	case PV_970FX:
+	case PV_970MP:
+		ppmu = &ppc970_pmu;
+		break;
+	}
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 0000000..c325658
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,375 @@
+/*
+ * Performance counter support for PPC970-family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/string.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for PPC970
+ */
+#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	4	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_PMCSEL_MSK	0xf
+
+/* Values in PM_UNIT field */
+#define PM_NONE		0
+#define PM_FPU		1
+#define PM_VPU		2
+#define PM_ISU		3
+#define PM_IFU		4
+#define PM_IDU		5
+#define PM_STS		6
+#define PM_LSU0		7
+#define PM_LSU1U	8
+#define PM_LSU1L	9
+#define PM_LASTUNIT	9
+
+/*
+ * Bits in MMCR0 for PPC970
+ */
+#define MMCR0_PMC1SEL_SH	8
+#define MMCR0_PMC2SEL_SH	1
+#define MMCR_PMCSEL_MSK		0x1f
+
+/*
+ * Bits in MMCR1 for PPC970
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	59
+#define MMCR1_TTM3SEL_SH	53
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	50
+#define MMCR1_TD_CP_DBG1SEL_SH	48
+#define MMCR1_TD_CP_DBG2SEL_SH	46
+#define MMCR1_TD_CP_DBG3SEL_SH	44
+#define MMCR1_PMC1_ADDER_SEL_SH	39
+#define MMCR1_PMC2_ADDER_SEL_SH	38
+#define MMCR1_PMC6_ADDER_SEL_SH	37
+#define MMCR1_PMC5_ADDER_SEL_SH	36
+#define MMCR1_PMC8_ADDER_SEL_SH	35
+#define MMCR1_PMC7_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC3SEL_SH	27
+#define MMCR1_PMC4SEL_SH	22
+#define MMCR1_PMC5SEL_SH	17
+#define MMCR1_PMC6SEL_SH	12
+#define MMCR1_PMC7SEL_SH	7
+#define MMCR1_PMC8SEL_SH	2
+
+static short mmcr1_adder_bits[8] = {
+	MMCR1_PMC1_ADDER_SEL_SH,
+	MMCR1_PMC2_ADDER_SEL_SH,
+	MMCR1_PMC3_ADDER_SEL_SH,
+	MMCR1_PMC4_ADDER_SEL_SH,
+	MMCR1_PMC5_ADDER_SEL_SH,
+	MMCR1_PMC6_ADDER_SEL_SH,
+	MMCR1_PMC7_ADDER_SEL_SH,
+	MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *                 <><>[  >[  >[  ><  ><  ><  ><  ><><><><><><><><>
+ *                 T0T1 UC  PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ *
+ * T0 - TTM0 constraint
+ *     46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
+ *     43: UC3 error 0x0800_0000_0000
+ *     42: FPU|IFU|VPU events needed 0x0400_0000_0000
+ *     41: ISU events needed 0x0200_0000_0000
+ *     40: IDU|STS events needed 0x0100_0000_0000
+ *
+ * PS1
+ *     39: PS1 error 0x0080_0000_0000
+ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ *     35: PS2 error 0x0008_0000_0000
+ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ *     28-31: Byte 0 event source 0xf000_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P1
+ *     15: P1 error 0x8000
+ *     14-15: Count of events needing PMC1
+ *
+ * P2..P8
+ *     0-13: Count of events needing PMC2..PMC8
+ */
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0xc80000000000ull, 0x040000000000ull },
+	[PM_VPU] =   { 0xc80000000000ull, 0xc40000000000ull },
+	[PM_ISU] =   { 0x080000000000ull, 0x020000000000ull },
+	[PM_IFU] =   { 0xc80000000000ull, 0x840000000000ull },
+	[PM_IDU] =   { 0x380000000000ull, 0x010000000000ull },
+	[PM_STS] =   { 0x380000000000ull, 0x310000000000ull },
+};
+
+static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, sh;
+	u64 mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 8)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		grp = ((pmc - 1) >> 1) & 1;
+	}
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit) {
+		if (unit > PM_LASTUNIT)
+			return -1;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (28 - 4 * byte);
+		value |= (u64)unit << (28 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2/5/6 field */
+		mask  |= 0x8000000000ull;
+		value |= 0x1000000000ull;
+	} else if (grp == 1) {
+		/* increment PMC3/4/7/8 field */
+		mask  |= 0x800000000ull;
+		value |= 0x100000000ull;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int p970_get_alternatives(unsigned int event, unsigned int alt[])
+{
+	alt[0] = event;
+
+	/* 2 alternatives for LSU empty */
+	if (event == 0x2002 || event == 0x3002) {
+		alt[1] = event ^ 0x1000;
+		return 2;
+	}
+		
+	return 1;
+}
+
+static int p970_compute_mmcr(unsigned int event[], int n_ev,
+			     unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm, grp;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
+	unsigned char ttmuse[2];
+	unsigned char pmcsel[8];
+	int i;
+
+	if (n_ev > 8)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2/5/6 vs 3/4/7/8 use */
+			++pmc_grp_use[((pmc - 1) >> 1) & 1];
+		}
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (unit) {
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
+		unitmap[PM_ISU] = 2 | 4;	/* move ISU to TTM1 */
+	/* Set TTM[01]SEL fields. */
+	ttmuse[0] = ttmuse[1] = 0;
+	for (i = PM_FPU; i <= PM_STS; ++i) {
+		if (!unituse[i])
+			continue;
+		ttm = unitmap[i];
+		++ttmuse[(ttm >> 2) & 1];
+		mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
+	}
+	/* Check only one unit per TTMx */
+	if (ttmuse[0] > 1 || ttmuse[1] > 1)
+		return -1;
+
+	/* Set byte lane select fields and TTM3SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit <= PM_STS)
+			ttm = (unitmap[unit] >> 2) & 1;
+		else if (unit == PM_LSU0)
+			ttm = 2;
+		else {
+			ttm = 3;
+			if (unit == PM_LSU1L && byte >= 2)
+				mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	memset(pmcsel, 0x8, sizeof(pmcsel));	/* 8 means don't count */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			if (unit)
+				psel |= 0x10 | ((byte & 2) << 2);
+			else
+				psel |= 8;
+			for (pmc = 0; pmc < 8; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (unit) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 4) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct event */
+			--pmc;
+			if (psel == 0 && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+		}
+		pmcsel[pmc] = psel;
+		hwc[i] = pmc;
+	}
+	for (pmc = 0; pmc < 2; ++pmc)
+		mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
+	for (; pmc < 8; ++pmc)
+		mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+	if (pmc_inuse & 1)
+		mmcr0 |= MMCR0_PMC1CE;
+	if (pmc_inuse & 0xfe)
+		mmcr0 |= MMCR0_PMCjCE;
+
+	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
+
+	/* Return MMCRx values */
+	mmcr[0] = mmcr0;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	int shift, i;
+
+	if (pmc <= 1) {
+		shift = MMCR0_PMC1SEL_SH - 7 * pmc;
+		i = 0;
+	} else {
+		shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
+		i = 1;
+	}
+	/*
+	 * Setting the PMCxSEL field to 0x08 disables PMC x.
+	 */
+	mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
+}
+
+static int ppc970_generic_events[] = {
+	[PERF_COUNT_CPU_CYCLES] = 7,
+	[PERF_COUNT_INSTRUCTIONS] = 1,
+	[PERF_COUNT_CACHE_REFERENCES] = 0x8810,		/* PM_LD_REF_L1 */
+	[PERF_COUNT_CACHE_MISSES] = 0x3810,		/* PM_LD_MISS_L1 */
+	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431,	/* PM_BR_ISSUED */
+	[PERF_COUNT_BRANCH_MISSES] = 0x327,		/* PM_GRP_BR_MPRED */
+};
+
+struct power_pmu ppc970_pmu = {
+	.n_counter = 8,
+	.max_alternatives = 2,
+	.add_fields = 0x001100005555ull,
+	.test_adder = 0x013300000000ull,
+	.compute_mmcr = p970_compute_mmcr,
+	.get_constraint = p970_get_constraint,
+	.get_alternatives = p970_get_alternatives,
+	.disable_pmc = p970_disable_pmc,
+	.n_generic = ARRAY_SIZE(ppc970_generic_events),
+	.generic_events = ppc970_generic_events,
+};
-- 
cgit v0.10.2


From f78628374a13bc150db77c6e02d4f2c0a7f932ef Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 21:05:35 +1100
Subject: powerpc/perf_counter: Add support for POWER6

This adds the back-end for the PMU on the POWER6 processor.
Fortunately, the event selection hardware is somewhat simpler on
POWER6 than on other POWER family processors, so the constraints
fit into only 32 bits.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 45798f6..0ebf4d0 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o power6-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5561ecb..df3fe05 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -742,6 +742,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 }
 
 extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power6_pmu;
 
 static int init_perf_counters(void)
 {
@@ -760,6 +761,9 @@ static int init_perf_counters(void)
 	case PV_970MP:
 		ppmu = &ppc970_pmu;
 		break;
+	case 0x3e:
+		ppmu = &power6_pmu;
+		break;
 	}
 	return 0;
 }
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 0000000..b1f61f3
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,283 @@
+/*
+ * Performance counter support for POWER6 processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER6
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0x7
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* Unit event comes (TTMxSEL encoding) */
+#define PM_UNIT_MSK	0xf
+#define PM_UNIT_MSKS	(PM_UNIT_MSK << PM_UNIT_SH)
+#define PM_LLAV		0x8000	/* Load lookahead match value */
+#define PM_LLA		0x4000	/* Load lookahead match enable */
+#define PM_BYTE_SH	12	/* Byte of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_SUBUNIT_SH	8	/* Subunit event comes from (NEST_SEL enc.) */
+#define PM_SUBUNIT_MSK	7
+#define PM_SUBUNIT_MSKS	(PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
+#define PM_PMCSEL_MSK	0xff	/* PMCxSEL value */
+#define PM_BUSEVENT_MSK	0xf3700
+
+/*
+ * Bits in MMCR1 for POWER6
+ */
+#define MMCR1_TTM0SEL_SH	60
+#define MMCR1_TTMSEL_SH(n)	(MMCR1_TTM0SEL_SH - (n) * 4)
+#define MMCR1_TTMSEL_MSK	0xf
+#define MMCR1_TTMSEL(m, n)	(((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
+#define MMCR1_NESTSEL_SH	45
+#define MMCR1_NESTSEL_MSK	0x7
+#define MMCR1_NESTSEL(m)	(((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
+#define MMCR1_PMC1_LLA		((u64)1 << 44)
+#define MMCR1_PMC1_LLA_VALUE	((u64)1 << 39)
+#define MMCR1_PMC1_ADDR_SEL	((u64)1 << 35)
+#define MMCR1_PMC1SEL_SH	24
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0xff
+
+/*
+ * Assign PMC numbers and compute MMCR1 value for a set of events
+ */
+static int p6_compute_mmcr(unsigned int event[], int n_ev,
+			   unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	int i;
+	unsigned int pmc, ev, b, u, s, psel;
+	unsigned int ttmset = 0;
+	unsigned int pmc_inuse = 0;
+
+	if (n_ev > 4)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;	/* collision! */
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+	}
+	for (i = 0; i < n_ev; ++i) {
+		ev = event[i];
+		pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			--pmc;
+		} else {
+			/* can go on any PMC; find a free one */
+			for (pmc = 0; pmc < 4; ++pmc)
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			pmc_inuse |= 1 << pmc;
+		}
+		hwc[i] = pmc;
+		psel = ev & PM_PMCSEL_MSK;
+		if (ev & PM_BUSEVENT_MSK) {
+			/* this event uses the event bus */
+			b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
+			u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
+			/* check for conflict on this byte of event bus */
+			if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
+				return -1;
+			mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
+			ttmset |= 1 << b;
+			if (u == 5) {
+				/* Nest events have a further mux */
+				s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+				if ((ttmset & 0x10) &&
+				    MMCR1_NESTSEL(mmcr1) != s)
+					return -1;
+				ttmset |= 0x10;
+				mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
+			}
+			if (0x30 <= psel && psel <= 0x3d) {
+				/* these need the PMCx_ADDR_SEL bits */
+				if (b >= 2)
+					mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
+			}
+			/* bus select values are different for PMC3/4 */
+			if (pmc >= 2 && (psel & 0x90) == 0x80)
+				psel ^= 0x20;
+		}
+		if (ev & PM_LLA) {
+			mmcr1 |= MMCR1_PMC1_LLA >> pmc;
+			if (ev & PM_LLAV)
+				mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
+		}
+		mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+	}
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0xe)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = 0;
+	return 0;
+}
+
+/*
+ * Layout of constraint bits:
+ *
+ *	0-1	add field: number of uses of PMC1 (max 1)
+ *	2-3, 4-5, 6-7: ditto for PMC2, 3, 4
+ *	8-10	select field: nest (subunit) event selector
+ *	16-19	select field: unit on byte 0 of event bus
+ *	20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ */
+static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, sh;
+	unsigned int mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 4)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		sh = byte * 4;
+		mask |= PM_UNIT_MSKS << sh;
+		value |= (event & PM_UNIT_MSKS) << sh;
+		if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
+			mask |= PM_SUBUNIT_MSKS;
+			value |= event & PM_SUBUNIT_MSKS;
+		}
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	4	/* at most 4 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x0130e8, 0x2000f6, 0x3000fc },	/* PM_PTEG_RELOAD_VALID */
+	{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
+	{ 0x080088, 0x200054, 0x3000f0 },	/* PM_ST_MISS_L1 */
+	{ 0x10000a, 0x2000f4 },			/* PM_RUN_CYC */
+	{ 0x10000b, 0x2000f5 },			/* PM_RUN_COUNT */
+	{ 0x10000e, 0x400010 },			/* PM_PURR */
+	{ 0x100010, 0x4000f8 },			/* PM_FLUSH */
+	{ 0x10001a, 0x200010 },			/* PM_MRK_INST_DISP */
+	{ 0x100026, 0x3000f8 },			/* PM_TB_BIT_TRANS */
+	{ 0x100054, 0x2000f0 },			/* PM_ST_FIN */
+	{ 0x100056, 0x2000fc },			/* PM_L1_ICACHE_MISS */
+	{ 0x1000f0, 0x40000a },			/* PM_INST_IMC_MATCH_CMPL */
+	{ 0x1000f8, 0x200008 },			/* PM_GCT_EMPTY_CYC */
+	{ 0x1000fc, 0x400006 },			/* PM_LSU_DERAT_MISS_CYC */
+	{ 0x20000e, 0x400007 },			/* PM_LSU_DERAT_MISS */
+	{ 0x200012, 0x300012 },			/* PM_INST_DISP */
+	{ 0x2000f2, 0x3000f2 },			/* PM_INST_DISP */
+	{ 0x2000f8, 0x300010 },			/* PM_EXT_INT */
+	{ 0x2000fe, 0x300056 },			/* PM_DATA_FROM_L2MISS */
+	{ 0x2d0030, 0x30001a },			/* PM_MRK_FPU_FIN */
+	{ 0x30000a, 0x400018 },			/* PM_MRK_INST_FIN */
+	{ 0x3000f6, 0x40000e },			/* PM_L1_DCACHE_RELOAD_VALID */
+	{ 0x3000fe, 0x400056 },			/* PM_DATA_FROM_L3MISS */
+};
+
+/*
+ * This could be made more efficient with a binary search on
+ * a presorted list, if necessary
+ */
+static int find_alternatives_list(unsigned int event)
+{
+	int i, j;
+	unsigned int alt;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			return -1;
+		for (j = 0; j < MAX_ALT; ++j) {
+			alt = event_alternatives[i][j];
+			if (!alt || event < alt)
+				break;
+			if (event == alt)
+				return i;
+		}
+	}
+	return -1;
+}
+
+static int p6_get_alternatives(unsigned int event, unsigned int alt[])
+{
+	int i, j;
+	unsigned int aevent, psel, pmc;
+	unsigned int nalt = 1;
+
+	alt[0] = event;
+
+	/* check the alternatives table */
+	i = find_alternatives_list(event);
+	if (i >= 0) {
+		/* copy out alternatives from list */
+		for (j = 0; j < MAX_ALT; ++j) {
+			aevent = event_alternatives[i][j];
+			if (!aevent)
+				break;
+			if (aevent != event)
+				alt[nalt++] = aevent;
+		}
+
+	} else {
+		/* Check for alternative ways of computing sum events */
+		/* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
+		psel = event & (PM_PMCSEL_MSK & ~1);	/* ignore edge bit */
+		pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc && (psel == 0x32 || psel == 0x34))
+			alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
+				((5 - pmc) << PM_PMC_SH);
+
+		/* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
+		if (pmc && (psel == 0x38 || psel == 0x3a))
+			alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
+				((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
+	}
+
+	return nalt;
+}
+
+static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	/* Set PMCxSEL to 0 to disable PMCx */
+	mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power6_generic_events[] = {
+	[PERF_COUNT_CPU_CYCLES] = 0x1e,
+	[PERF_COUNT_INSTRUCTIONS] = 2,
+	[PERF_COUNT_CACHE_REFERENCES] = 0x280030,	/* LD_REF_L1 */
+	[PERF_COUNT_CACHE_MISSES] = 0x30000c,		/* LD_MISS_L1 */
+	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0,	/* BR_PRED */ 
+	[PERF_COUNT_BRANCH_MISSES] = 0x400052,		/* BR_MPRED */
+};
+
+struct power_pmu power6_pmu = {
+	.n_counter = 4,
+	.max_alternatives = MAX_ALT,
+	.add_fields = 0x55,
+	.test_adder = 0,
+	.compute_mmcr = p6_compute_mmcr,
+	.get_constraint = p6_get_constraint,
+	.get_alternatives = p6_get_alternatives,
+	.disable_pmc = p6_disable_pmc,
+	.n_generic = ARRAY_SIZE(power6_generic_events),
+	.generic_events = power6_generic_events,
+};
-- 
cgit v0.10.2


From dd0e6ba22ea21bcc2c420b385a170593c58f4c08 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 12 Jan 2009 15:11:00 +1100
Subject: perf_counter: Always schedule all software counters in

Software counters aren't subject to the limitations imposed by the
fixed number of hardware counter registers, so there is no reason not
to enable them all in __perf_counter_sched_in.  Previously we used to
break out of the loop when we got to a group that wouldn't fit on the
PMU; with this we continue through the list but only schedule in
software counters (or groups containing only software counters) from
there on.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4c0dccb..3aef306 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -455,12 +455,37 @@ group_error:
 	return -EAGAIN;
 }
 
+/*
+ * Return 1 for a software counter, 0 for a hardware counter
+ */
+static inline int is_software_counter(struct perf_counter *counter)
+{
+	return !counter->hw_event.raw && counter->hw_event.type < 0;
+}
+
+/*
+ * Return 1 for a group consisting entirely of software counters,
+ * 0 if the group contains any hardware counters.
+ */
+static int is_software_only_group(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	if (!is_software_counter(leader))
+		return 0;
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		if (!is_software_counter(counter))
+			return 0;
+	return 1;
+}
+
 static void
 __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
 {
 	struct perf_counter *counter;
 	u64 flags;
+	int can_add_hw = 1;
 
 	if (likely(!ctx->nr_counters))
 		return;
@@ -477,10 +502,12 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 
 		/*
 		 * If we scheduled in a group atomically and exclusively,
-		 * or if this group can't go on, break out:
+		 * or if this group can't go on, don't add any more
+		 * hardware counters.
 		 */
-		if (group_sched_in(counter, cpuctx, ctx, cpu))
-			break;
+		if (can_add_hw || is_software_only_group(counter))
+			if (group_sched_in(counter, cpuctx, ctx, cpu))
+				can_add_hw = 0;
 	}
 	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
-- 
cgit v0.10.2


From 01d0287f068de2934109ba9b989d8807526cccc2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 14 Jan 2009 13:44:19 +1100
Subject: powerpc/perf_counter: Make sure PMU gets enabled properly

This makes sure that we call the platform-specific ppc_md.enable_pmcs
function on each CPU before we try to use the PMU on that CPU.  If the
CPU goes off-line and then on-line, we need to do the enable_pmcs call
again, so we use the hw_perf_counter_setup hook to ensure that.  It gets
called as each CPU comes online, but it isn't called on the CPU that is
coming up, so this adds the CPU number as an argument to it (there were
no non-empty instances of hw_perf_counter_setup before).

This also arranges to set the pmcregs_in_use field of the lppaca (data
structure shared with the hypervisor) on each CPU when we are using the
PMU and clear it when we are not.  This allows the hypervisor to optimize
partition switches by not saving/restoring the PMU registers when we
aren't using the PMU.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index df3fe05..85ad259 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -15,6 +15,7 @@
 #include <linux/hardirq.h>
 #include <asm/reg.h>
 #include <asm/pmc.h>
+#include <asm/machdep.h>
 
 struct cpu_hw_counters {
 	int n_counters;
@@ -24,6 +25,7 @@ struct cpu_hw_counters {
 	struct perf_counter *counter[MAX_HWCOUNTERS];
 	unsigned int events[MAX_HWCOUNTERS];
 	u64 mmcr[3];
+	u8 pmcs_enabled;
 };
 DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
@@ -262,6 +264,15 @@ u64 hw_perf_save_disable(void)
 		cpuhw->n_added = 0;
 
 		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			if (ppc_md.enable_pmcs)
+				ppc_md.enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		/*
 		 * Set the 'freeze counters' bit.
 		 * The barrier is to make sure the mtspr has been
 		 * executed and the PMU has frozen the counters
@@ -305,6 +316,8 @@ void hw_perf_restore(u64 disable)
 		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
 		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 		mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+		if (cpuhw->n_counters == 0)
+			get_lppaca()->pmcregs_in_use = 0;
 		goto out;
 	}
 
@@ -323,6 +336,7 @@ void hw_perf_restore(u64 disable)
 	 * bit set and set the hardware counters to their initial values.
 	 * Then unfreeze the counters.
 	 */
+	get_lppaca()->pmcregs_in_use = 1;
 	mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
 	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
@@ -741,6 +755,14 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	}
 }
 
+void hw_perf_counter_setup(int cpu)
+{
+	struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
+
+	memset(cpuhw, 0, sizeof(*cpuhw));
+	cpuhw->mmcr[0] = MMCR0_FC;
+}
+
 extern struct power_pmu ppc970_pmu;
 extern struct power_pmu power6_pmu;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3aef306..52f2f52 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,7 +46,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
 void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
-void __weak hw_perf_counter_setup(void)		{ barrier(); }
+void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
 int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu)
@@ -1598,7 +1598,7 @@ static void __cpuinit perf_counter_init_cpu(int cpu)
 	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
 	mutex_unlock(&perf_resource_mutex);
 
-	hw_perf_counter_setup();
+	hw_perf_counter_setup(cpu);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v0.10.2


From 3b6f9e5cb21964b7ce12bf81076f830885563ec8 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 14 Jan 2009 21:00:30 +1100
Subject: perf_counter: Add support for pinned and exclusive counter groups

Impact: New perf_counter features

A pinned counter group is one that the user wants to have on the CPU
whenever possible, i.e. whenever the associated task is running, for
a per-task group, or always for a per-cpu group.  If the system
cannot satisfy that, it puts the group into an error state where
it is not scheduled any more and reads from it return EOF (i.e. 0
bytes read).  The group can be released from error state and made
readable again using prctl(PR_TASK_PERF_COUNTERS_ENABLE).  When we
have finer-grained enable/disable controls on counters we'll be able
to reset the error state on individual groups.

An exclusive group is one that the user wants to be the only group
using the CPU performance monitor hardware whenever it is on.  The
counter group scheduler will not schedule an exclusive group if there
are already other groups on the CPU and will not schedule other groups
onto the CPU if there is an exclusive group scheduled (that statement
does not apply to groups containing only software counters, which can
always go on and which do not prevent an exclusive group from going on).
With an exclusive group, we will be able to let users program PMU
registers at a low level without the concern that those settings will
perturb other measurements.

Along the way this reorganizes things a little:
- is_software_counter() is moved to perf_counter.h.
- cpuctx->active_oncpu now records the number of hardware counters on
  the CPU, i.e. it now excludes software counters.  Nothing was reading
  cpuctx->active_oncpu before, so this change is harmless.
- A new cpuctx->exclusive field records whether we currently have an
  exclusive group on the CPU.
- counter_sched_out moves higher up in perf_counter.c and gets called
  from __perf_counter_remove_from_context and __perf_counter_exit_task,
  where we used to have essentially the same code.
- __perf_counter_sched_in now goes through the counter list twice, doing
  the pinned counters in the first loop and the non-pinned counters in
  the second loop, in order to give the pinned counters the best chance
  to be scheduled in.

Note that only a group leader can be exclusive or pinned, and that
attribute applies to the whole group.  This avoids some awkwardness in
some corner cases (e.g. where a group leader is closed and the other
group members get added to the context list).  If we want to relax that
restriction later, we can, and it is easier to relax a restriction than
to apply a new one.

This doesn't yet handle the case where a pinned counter is inherited
and goes into error state in the child - the error state is not
propagated up to the parent when the child exits, and arguably it
should.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 85ad259..5b02113 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -36,14 +36,6 @@ void perf_counter_print_debug(void)
 }
 
 /*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return !counter->hw_event.raw && counter->hw_event.type < 0;
-}
-
-/*
  * Read one performance monitor counter (PMC).
  */
 static unsigned long read_pmc(int idx)
@@ -443,6 +435,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	 */
 	for (i = n0; i < n0 + n; ++i)
 		cpuhw->counter[i]->hw.config = cpuhw->events[i];
+	cpuctx->active_oncpu += n;
 	n = 1;
 	counter_sched_in(group_leader, cpu);
 	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
@@ -451,7 +444,6 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 			++n;
 		}
 	}
-	cpuctx->active_oncpu += n;
 	ctx->nr_active += n;
 
 	return 1;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index b21d1ea..7ab8e5f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -86,7 +86,10 @@ struct perf_counter_hw_event {
 				nmi	     :  1, /* NMI sampling        */
 				raw	     :  1, /* raw event type      */
 				inherit	     :  1, /* children inherit it */
-				__reserved_1 : 28;
+				pinned	     :  1, /* must always be on PMU */
+				exclusive    :  1, /* only counter on PMU */
+
+				__reserved_1 : 26;
 
 	u64			__reserved_2;
 };
@@ -141,6 +144,7 @@ struct hw_perf_counter_ops {
  * enum perf_counter_active_state - the states of a counter
  */
 enum perf_counter_active_state {
+	PERF_COUNTER_STATE_ERROR	= -2,
 	PERF_COUNTER_STATE_OFF		= -1,
 	PERF_COUNTER_STATE_INACTIVE	=  0,
 	PERF_COUNTER_STATE_ACTIVE	=  1,
@@ -214,6 +218,7 @@ struct perf_cpu_context {
 	struct perf_counter_context	*task_ctx;
 	int				active_oncpu;
 	int				max_pertask;
+	int				exclusive;
 };
 
 /*
@@ -240,6 +245,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu);
 
+/*
+ * Return 1 for a software counter, 0 for a hardware counter
+ */
+static inline int is_software_counter(struct perf_counter *counter)
+{
+	return !counter->hw_event.raw && counter->hw_event.type < 0;
+}
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 52f2f52..faf671b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	}
 }
 
+static void
+counter_sched_out(struct perf_counter *counter,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_counter_context *ctx)
+{
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->hw_ops->disable(counter);
+	counter->oncpu = -1;
+
+	if (!is_software_counter(counter))
+		cpuctx->active_oncpu--;
+	ctx->nr_active--;
+	if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
@@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info)
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
 
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->hw_ops->disable(counter);
-		ctx->nr_active--;
-		cpuctx->active_oncpu--;
-		counter->task = NULL;
-		counter->oncpu = -1;
-	}
+	counter_sched_out(counter, cpuctx, ctx);
+
+	counter->task = NULL;
 	ctx->nr_counters--;
 
 	/*
@@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	if (counter->state == PERF_COUNTER_STATE_OFF)
+	if (counter->state <= PERF_COUNTER_STATE_OFF)
 		return 0;
 
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
@@ -223,13 +237,64 @@ counter_sched_in(struct perf_counter *counter,
 		return -EAGAIN;
 	}
 
-	cpuctx->active_oncpu++;
+	if (!is_software_counter(counter))
+		cpuctx->active_oncpu++;
 	ctx->nr_active++;
 
+	if (counter->hw_event.exclusive)
+		cpuctx->exclusive = 1;
+
 	return 0;
 }
 
 /*
+ * Return 1 for a group consisting entirely of software counters,
+ * 0 if the group contains any hardware counters.
+ */
+static int is_software_only_group(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	if (!is_software_counter(leader))
+		return 0;
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		if (!is_software_counter(counter))
+			return 0;
+	return 1;
+}
+
+/*
+ * Work out whether we can put this counter group on the CPU now.
+ */
+static int group_can_go_on(struct perf_counter *counter,
+			   struct perf_cpu_context *cpuctx,
+			   int can_add_hw)
+{
+	/*
+	 * Groups consisting entirely of software counters can always go on.
+	 */
+	if (is_software_only_group(counter))
+		return 1;
+	/*
+	 * If an exclusive group is already on, no other hardware
+	 * counters can go on.
+	 */
+	if (cpuctx->exclusive)
+		return 0;
+	/*
+	 * If this group is exclusive and there are already
+	 * counters on the CPU, it can't go on.
+	 */
+	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
+		return 0;
+	/*
+	 * Otherwise, try to add it if all previous groups were able
+	 * to go on.
+	 */
+	return can_add_hw;
+}
+
+/*
  * Cross CPU call to install and enable a performance counter
  */
 static void __perf_install_in_context(void *info)
@@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info)
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	u64 perf_flags;
+	int err;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info)
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
 
-	counter_sched_in(counter, cpuctx, ctx, cpu);
+	/*
+	 * An exclusive counter can't go on if there are already active
+	 * hardware counters, and no hardware counter can go on if there
+	 * is already an exclusive counter on.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
+	    !group_can_go_on(counter, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = counter_sched_in(counter, cpuctx, ctx, cpu);
+
+	if (err && counter->hw_event.pinned)
+		counter->state = PERF_COUNTER_STATE_ERROR;
 
-	if (!ctx->task && cpuctx->max_pertask)
+	if (!err && !ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
 	hw_perf_restore(perf_flags);
@@ -327,22 +405,6 @@ retry:
 }
 
 static void
-counter_sched_out(struct perf_counter *counter,
-		  struct perf_cpu_context *cpuctx,
-		  struct perf_counter_context *ctx)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->hw_ops->disable(counter);
-	counter->oncpu = -1;
-
-	cpuctx->active_oncpu--;
-	ctx->nr_active--;
-}
-
-static void
 group_sched_out(struct perf_counter *group_counter,
 		struct perf_cpu_context *cpuctx,
 		struct perf_counter_context *ctx)
@@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter,
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
 		counter_sched_out(counter, cpuctx, ctx);
+
+	if (group_counter->hw_event.exclusive)
+		cpuctx->exclusive = 0;
 }
 
 void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -455,30 +520,6 @@ group_error:
 	return -EAGAIN;
 }
 
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return !counter->hw_event.raw && counter->hw_event.type < 0;
-}
-
-/*
- * Return 1 for a group consisting entirely of software counters,
- * 0 if the group contains any hardware counters.
- */
-static int is_software_only_group(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	if (!is_software_counter(leader))
-		return 0;
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
-		if (!is_software_counter(counter))
-			return 0;
-	return 1;
-}
-
 static void
 __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
@@ -492,22 +533,49 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 
 	spin_lock(&ctx->lock);
 	flags = hw_perf_save_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state <= PERF_COUNTER_STATE_OFF ||
+		    !counter->hw_event.pinned)
+			continue;
+		if (counter->cpu != -1 && counter->cpu != cpu)
+			continue;
+
+		if (group_can_go_on(counter, cpuctx, 1))
+			group_sched_in(counter, cpuctx, ctx, cpu);
+
+		/*
+		 * If this pinned group hasn't been scheduled,
+		 * put it in error state.
+		 */
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+			counter->state = PERF_COUNTER_STATE_ERROR;
+	}
+
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		/*
+		 * Ignore counters in OFF or ERROR state, and
+		 * ignore pinned counters since we did them already.
+		 */
+		if (counter->state <= PERF_COUNTER_STATE_OFF ||
+		    counter->hw_event.pinned)
+			continue;
+
+		/*
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of counters:
 		 */
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		/*
-		 * If we scheduled in a group atomically and exclusively,
-		 * or if this group can't go on, don't add any more
-		 * hardware counters.
-		 */
-		if (can_add_hw || is_software_only_group(counter))
+		if (group_can_go_on(counter, cpuctx, can_add_hw)) {
 			if (group_sched_in(counter, cpuctx, ctx, cpu))
 				can_add_hw = 0;
+		}
 	}
 	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
@@ -567,8 +635,10 @@ int perf_counter_task_disable(void)
 	 */
 	perf_flags = hw_perf_save_disable();
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry)
-		counter->state = PERF_COUNTER_STATE_OFF;
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ERROR)
+			counter->state = PERF_COUNTER_STATE_OFF;
+	}
 
 	hw_perf_restore(perf_flags);
 
@@ -607,7 +677,7 @@ int perf_counter_task_enable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_OFF)
+		if (counter->state > PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->hw_event.disabled = 0;
@@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (count != sizeof(cntval))
 		return -EINVAL;
 
+	/*
+	 * Return end-of-file for a read on a counter that is in
+	 * error state (i.e. because it was pinned but it couldn't be
+	 * scheduled on to the CPU at some point).
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ERROR)
+		return 0;
+
 	mutex_lock(&counter->mutex);
 	cntval = perf_counter_read(counter);
 	mutex_unlock(&counter->mutex);
@@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter	*counter,
 {
 	struct perf_data *irqdata, *usrdata;
 	DECLARE_WAITQUEUE(wait, current);
-	ssize_t res;
+	ssize_t res, res2;
 
 	irqdata = counter->irqdata;
 	usrdata = counter->usrdata;
@@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter	*counter,
 		if (signal_pending(current))
 			break;
 
+		if (counter->state == PERF_COUNTER_STATE_ERROR)
+			break;
+
 		spin_unlock_irq(&counter->waitq.lock);
 		schedule();
 		spin_lock_irq(&counter->waitq.lock);
@@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter	*counter,
 	__set_current_state(TASK_RUNNING);
 	spin_unlock_irq(&counter->waitq.lock);
 
-	if (usrdata->len + irqdata->len < count)
+	if (usrdata->len + irqdata->len < count &&
+	    counter->state != PERF_COUNTER_STATE_ERROR)
 		return -ERESTARTSYS;
 read_pending:
 	mutex_lock(&counter->mutex);
@@ -925,11 +1007,12 @@ read_pending:
 
 	/* Switch irq buffer: */
 	usrdata = perf_switch_irq_data(counter);
-	if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) {
+	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
+	if (res2 < 0) {
 		if (!res)
 			res = -EFAULT;
 	} else {
-		res = count;
+		res += res2;
 	}
 out:
 	mutex_unlock(&counter->mutex);
@@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 		 */
 		if (group_leader->ctx != ctx)
 			goto err_put_context;
+		/*
+		 * Only a group leader can be exclusive or pinned
+		 */
+		if (hw_event.exclusive || hw_event.pinned)
+			goto err_put_context;
 	}
 
 	ret = -EINVAL;
@@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
-			child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-			child_counter->hw_ops->disable(child_counter);
-			cpuctx->active_oncpu--;
-			child_ctx->nr_active--;
-			child_counter->oncpu = -1;
-		}
+		counter_sched_out(child_counter, cpuctx, child_ctx);
 
 		list_del_init(&child_counter->list_entry);
 
-- 
cgit v0.10.2


From d859e29fe34cb833071b20aef860ee94fbad9bb2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 17 Jan 2009 18:10:22 +1100
Subject: perf_counter: Add counter enable/disable ioctls

Impact: New perf_counter features

This primarily adds a way for perf_counter users to enable and disable
counters and groups.  Enabling or disabling a counter or group also
enables or disables all of the child counters that have been cloned
from it to monitor children of the task monitored by the top-level
counter.  The userspace interface to enable/disable counters is via
ioctl on the counter file descriptor.

Along the way this extends the code that handles child counters to
handle child counter groups properly.  A group with multiple counters
will be cloned to child tasks if and only if the group leader has the
hw_event.inherit bit set - if it is set the whole group is cloned as a
group in the child task.

In order to be able to enable or disable all child counters of a given
top-level counter, we need a way to find them all.  Hence I have added
a child_list field to struct perf_counter, which is the head of the
list of children for a top-level counter, or the link in that list for
a child counter.  That list is protected by the perf_counter.mutex
field.

This also adds a mutex to the perf_counter_context struct.  Previously
the list of counters was protected just by the lock field in the
context, which meant that perf_counter_init_task had to take that lock
and then take whatever lock/mutex protects the top-level counter's
child_list.  But the counter enable/disable functions need to take
that lock in order to traverse the list, then for each counter take
the lock in that counter's context in order to change the counter's
state safely, which will lead to a deadlock.

To solve this, we now have both a mutex and a spinlock in the context,
and taking either is sufficient to ensure the list of counters can't
change - you have to take both before changing the list.  Now
perf_counter_init_task takes the mutex instead of the lock (which
incidentally means that inherit_counter can use GFP_KERNEL instead of
GFP_ATOMIC) and thus avoids the possible deadlock.  Similarly the new
enable/disable functions can take the mutex while traversing the list
of child counters without incurring a possible deadlock when the
counter manipulation code locks the context for a child counter.

We also had an misfeature that the first counter added to a context
would possibly not go on until the next sched-in, because we were
using ctx->nr_active to detect if the context was running on a CPU.
But nr_active is the number of active counters, and if that was zero
(because the context didn't have any counters yet) it would look like
the context wasn't running on a cpu and so the retry code in
__perf_install_in_context wouldn't retry.  So this adds an 'is_active'
field that is set when the context is on a CPU, even if it has no
counters.  The is_active field is only used for task contexts, not for
per-cpu contexts.

If we enable a subsidiary counter in a group that is active on a CPU,
and the arch code can't enable the counter, then we have to pull the
whole group off the CPU.  We do this with group_sched_out, which gets
moved up in the file so it comes before all its callers.  This also
adds similar logic to __perf_install_in_context so that the "all on,
or none" invariant of groups is preserved when adding a new counter to
a group.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7ab8e5f..33ba9fe 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,6 +14,7 @@
 #define _LINUX_PERF_COUNTER_H
 
 #include <asm/atomic.h>
+#include <asm/ioctl.h>
 
 #ifdef CONFIG_PERF_COUNTERS
 # include <asm/perf_counter.h>
@@ -95,6 +96,12 @@ struct perf_counter_hw_event {
 };
 
 /*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+
+/*
  * Kernel-internal data types:
  */
 
@@ -173,8 +180,10 @@ struct perf_counter {
 	struct file			*filp;
 
 	struct perf_counter		*parent;
+	struct list_head		child_list;
+
 	/*
-	 * Protect attach/detach:
+	 * Protect attach/detach and child_list:
 	 */
 	struct mutex			mutex;
 
@@ -199,13 +208,21 @@ struct perf_counter {
 struct perf_counter_context {
 #ifdef CONFIG_PERF_COUNTERS
 	/*
-	 * Protect the list of counters:
+	 * Protect the states of the counters in the list,
+	 * nr_active, and the list:
 	 */
 	spinlock_t		lock;
+	/*
+	 * Protect the list of counters.  Locking either mutex or lock
+	 * is sufficient to ensure the list doesn't change; to change
+	 * the list you need to lock both the mutex and the spinlock.
+	 */
+	struct mutex		mutex;
 
 	struct list_head	counter_list;
 	int			nr_counters;
 	int			nr_active;
+	int			is_active;
 	struct task_struct	*task;
 #endif
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index faf671b..1ac18da 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -112,6 +112,28 @@ counter_sched_out(struct perf_counter *counter,
 		cpuctx->exclusive = 0;
 }
 
+static void
+group_sched_out(struct perf_counter *group_counter,
+		struct perf_cpu_context *cpuctx,
+		struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+
+	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_out(counter, cpuctx, ctx);
+
+	if (group_counter->hw_event.exclusive)
+		cpuctx->exclusive = 0;
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
@@ -168,7 +190,7 @@ static void __perf_counter_remove_from_context(void *info)
 /*
  * Remove the counter from a task's (or a CPU's) list of counters.
  *
- * Must be called with counter->mutex held.
+ * Must be called with counter->mutex and ctx->mutex held.
  *
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
@@ -215,6 +237,99 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Cross CPU call to disable a performance counter
+ */
+static void __perf_counter_disable(void *info)
+{
+	struct perf_counter *counter = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long flags;
+
+	/*
+	 * If this is a per-task counter, need to check whether this
+	 * counter's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
+
+	/*
+	 * If the counter is on, turn it off.
+	 * If it is in error state, leave it in error state.
+	 */
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		if (counter == counter->group_leader)
+			group_sched_out(counter, cpuctx, ctx);
+		else
+			counter_sched_out(counter, cpuctx, ctx);
+		counter->state = PERF_COUNTER_STATE_OFF;
+	}
+
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Disable a counter.
+ */
+static void perf_counter_disable(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Disable the counter on the cpu that it's on
+		 */
+		smp_call_function_single(counter->cpu, __perf_counter_disable,
+					 counter, 1);
+		return;
+	}
+
+ retry:
+	task_oncpu_function_call(task, __perf_counter_disable, counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the counter is still active, we need to retry the cross-call.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Disable a counter and all its children.
+ */
+static void perf_counter_disable_family(struct perf_counter *counter)
+{
+	struct perf_counter *child;
+
+	perf_counter_disable(counter);
+
+	/*
+	 * Lock the mutex to protect the list of children
+	 */
+	mutex_lock(&counter->mutex);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_disable(child);
+	mutex_unlock(&counter->mutex);
+}
+
 static int
 counter_sched_in(struct perf_counter *counter,
 		 struct perf_cpu_context *cpuctx,
@@ -302,6 +417,7 @@ static void __perf_install_in_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *leader = counter->group_leader;
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	u64 perf_flags;
@@ -328,22 +444,39 @@ static void __perf_install_in_context(void *info)
 	ctx->nr_counters++;
 
 	/*
+	 * Don't put the counter on if it is disabled or if
+	 * it is in a group and the group isn't on.
+	 */
+	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
+	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
+		goto unlock;
+
+	/*
 	 * An exclusive counter can't go on if there are already active
 	 * hardware counters, and no hardware counter can go on if there
 	 * is already an exclusive counter on.
 	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
-	    !group_can_go_on(counter, cpuctx, 1))
+	if (!group_can_go_on(counter, cpuctx, 1))
 		err = -EEXIST;
 	else
 		err = counter_sched_in(counter, cpuctx, ctx, cpu);
 
-	if (err && counter->hw_event.pinned)
-		counter->state = PERF_COUNTER_STATE_ERROR;
+	if (err) {
+		/*
+		 * This counter couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the counter group is pinned then put it in error state.
+		 */
+		if (leader != counter)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->hw_event.pinned)
+			leader->state = PERF_COUNTER_STATE_ERROR;
+	}
 
 	if (!err && !ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
+ unlock:
 	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
@@ -359,6 +492,8 @@ static void __perf_install_in_context(void *info)
  * If the counter is attached to a task which is on a CPU we use a smp
  * call to enable it in the task context. The task might have been
  * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
  */
 static void
 perf_install_in_context(struct perf_counter_context *ctx,
@@ -387,7 +522,7 @@ retry:
 	/*
 	 * we need to retry the smp call.
 	 */
-	if (ctx->nr_active && list_empty(&counter->list_entry)) {
+	if (ctx->is_active && list_empty(&counter->list_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -404,26 +539,131 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
-static void
-group_sched_out(struct perf_counter *group_counter,
-		struct perf_cpu_context *cpuctx,
-		struct perf_counter_context *ctx)
+/*
+ * Cross CPU call to enable a performance counter
+ */
+static void __perf_counter_enable(void *info)
 {
-	struct perf_counter *counter;
+	struct perf_counter *counter = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *leader = counter->group_leader;
+	unsigned long flags;
+	int err;
 
-	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+	/*
+	 * If this is a per-task counter, need to check whether this
+	 * counter's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	counter_sched_out(group_counter, cpuctx, ctx);
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
+
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		goto unlock;
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
 
 	/*
-	 * Schedule out siblings (if any):
+	 * If the counter is in a group and isn't the group leader,
+	 * then don't put it on unless the group is on.
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
-		counter_sched_out(counter, cpuctx, ctx);
+	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
+		goto unlock;
 
-	if (group_counter->hw_event.exclusive)
-		cpuctx->exclusive = 0;
+	if (!group_can_go_on(counter, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = counter_sched_in(counter, cpuctx, ctx,
+				       smp_processor_id());
+
+	if (err) {
+		/*
+		 * If this counter can't go on and it's part of a
+		 * group, then the whole group has to come off.
+		 */
+		if (leader != counter)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->hw_event.pinned)
+			leader->state = PERF_COUNTER_STATE_ERROR;
+	}
+
+ unlock:
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Enable a counter.
+ */
+static void perf_counter_enable(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Enable the counter on the cpu that it's on
+		 */
+		smp_call_function_single(counter->cpu, __perf_counter_enable,
+					 counter, 1);
+		return;
+	}
+
+	spin_lock_irq(&ctx->lock);
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		goto out;
+
+	/*
+	 * If the counter is in error state, clear that first.
+	 * That way, if we see the counter in error state below, we
+	 * know that it has gone back into error state, as distinct
+	 * from the task having been scheduled away before the
+	 * cross-call arrived.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ERROR)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
+ retry:
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_counter_enable, counter);
+
+	spin_lock_irq(&ctx->lock);
+
+	/*
+	 * If the context is active and the counter is still off,
+	 * we need to retry the cross-call.
+	 */
+	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
+		goto retry;
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_OFF)
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
+ out:
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Enable a counter and all its children.
+ */
+static void perf_counter_enable_family(struct perf_counter *counter)
+{
+	struct perf_counter *child;
+
+	perf_counter_enable(counter);
+
+	/*
+	 * Lock the mutex to protect the list of children
+	 */
+	mutex_lock(&counter->mutex);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_enable(child);
+	mutex_unlock(&counter->mutex);
 }
 
 void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -432,16 +672,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	struct perf_counter *counter;
 	u64 flags;
 
+	spin_lock(&ctx->lock);
+	ctx->is_active = 0;
 	if (likely(!ctx->nr_counters))
-		return;
+		goto out;
 
-	spin_lock(&ctx->lock);
 	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
 	hw_perf_restore(flags);
+ out:
 	spin_unlock(&ctx->lock);
 }
 
@@ -528,10 +770,11 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	u64 flags;
 	int can_add_hw = 1;
 
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
 	if (likely(!ctx->nr_counters))
-		return;
+		goto out;
 
-	spin_lock(&ctx->lock);
 	flags = hw_perf_save_disable();
 
 	/*
@@ -578,6 +821,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		}
 	}
 	hw_perf_restore(flags);
+ out:
 	spin_unlock(&ctx->lock);
 }
 
@@ -896,12 +1140,14 @@ static int perf_release(struct inode *inode, struct file *file)
 
 	file->private_data = NULL;
 
+	mutex_lock(&ctx->mutex);
 	mutex_lock(&counter->mutex);
 
 	perf_counter_remove_from_context(counter);
 	put_context(ctx);
 
 	mutex_unlock(&counter->mutex);
+	mutex_unlock(&ctx->mutex);
 
 	kfree(counter);
 
@@ -1053,10 +1299,30 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct perf_counter *counter = file->private_data;
+	int err = 0;
+
+	switch (cmd) {
+	case PERF_COUNTER_IOC_ENABLE:
+		perf_counter_enable_family(counter);
+		break;
+	case PERF_COUNTER_IOC_DISABLE:
+		perf_counter_disable_family(counter);
+		break;
+	default:
+		err = -ENOTTY;
+	}
+	return err;
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
 	.poll			= perf_poll,
+	.unlocked_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_ioctl,
 };
 
 static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
@@ -1348,6 +1614,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
+	INIT_LIST_HEAD(&counter->child_list);
+
 	counter->irqdata		= &counter->data[0];
 	counter->usrdata		= &counter->data[1];
 	counter->cpu			= cpu;
@@ -1452,7 +1720,9 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 		goto err_free_put_context;
 
 	counter->filp = counter_file;
+	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, counter, cpu);
+	mutex_unlock(&ctx->mutex);
 
 	fput_light(counter_file, fput_needed2);
 
@@ -1479,6 +1749,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 {
 	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
 	ctx->task = task;
 }
@@ -1486,20 +1757,30 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 /*
  * inherit a counter from parent task to child task:
  */
-static int
+static struct perf_counter *
 inherit_counter(struct perf_counter *parent_counter,
 	      struct task_struct *parent,
 	      struct perf_counter_context *parent_ctx,
 	      struct task_struct *child,
+	      struct perf_counter *group_leader,
 	      struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *child_counter;
 
+	/*
+	 * Instead of creating recursive hierarchies of counters,
+	 * we link inherited counters back to the original parent,
+	 * which has a filp for sure, which we use as the reference
+	 * count:
+	 */
+	if (parent_counter->parent)
+		parent_counter = parent_counter->parent;
+
 	child_counter = perf_counter_alloc(&parent_counter->hw_event,
-					    parent_counter->cpu, NULL,
-					    GFP_ATOMIC);
+					    parent_counter->cpu, group_leader,
+					    GFP_KERNEL);
 	if (!child_counter)
-		return -ENOMEM;
+		return NULL;
 
 	/*
 	 * Link it up in the child's context:
@@ -1523,16 +1804,82 @@ inherit_counter(struct perf_counter *parent_counter,
 	 */
 	atomic_long_inc(&parent_counter->filp->f_count);
 
+	/*
+	 * Link this into the parent counter's child list
+	 */
+	mutex_lock(&parent_counter->mutex);
+	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
+
+	/*
+	 * Make the child state follow the state of the parent counter,
+	 * not its hw_event.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_counter_{en,dis}able_family.
+	 */
+	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+	else
+		child_counter->state = PERF_COUNTER_STATE_OFF;
+
+	mutex_unlock(&parent_counter->mutex);
+
+	return child_counter;
+}
+
+static int inherit_group(struct perf_counter *parent_counter,
+	      struct task_struct *parent,
+	      struct perf_counter_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *leader;
+	struct perf_counter *sub;
+
+	leader = inherit_counter(parent_counter, parent, parent_ctx,
+				 child, NULL, child_ctx);
+	if (!leader)
+		return -ENOMEM;
+	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+		if (!inherit_counter(sub, parent, parent_ctx,
+				     child, leader, child_ctx))
+			return -ENOMEM;
+	}
 	return 0;
 }
 
+static void sync_child_counter(struct perf_counter *child_counter,
+			       struct perf_counter *parent_counter)
+{
+	u64 parent_val, child_val;
+
+	parent_val = atomic64_read(&parent_counter->count);
+	child_val = atomic64_read(&child_counter->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_counter->count);
+
+	/*
+	 * Remove this counter from the parent's list
+	 */
+	mutex_lock(&parent_counter->mutex);
+	list_del_init(&child_counter->child_list);
+	mutex_unlock(&parent_counter->mutex);
+
+	/*
+	 * Release the parent counter, if this was the last
+	 * reference to it.
+	 */
+	fput(parent_counter->filp);
+}
+
 static void
 __perf_counter_exit_task(struct task_struct *child,
 			 struct perf_counter *child_counter,
 			 struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *parent_counter;
-	u64 parent_val, child_val;
+	struct perf_counter *sub, *tmp;
 
 	/*
 	 * If we do not self-reap then we have to wait for the
@@ -1561,7 +1908,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		counter_sched_out(child_counter, cpuctx, child_ctx);
+		group_sched_out(child_counter, cpuctx, child_ctx);
 
 		list_del_init(&child_counter->list_entry);
 
@@ -1577,26 +1924,23 @@ __perf_counter_exit_task(struct task_struct *child,
 	 * that are still around due to the child reference. These
 	 * counters need to be zapped - but otherwise linger.
 	 */
-	if (!parent_counter)
-		return;
-
-	parent_val = atomic64_read(&parent_counter->count);
-	child_val = atomic64_read(&child_counter->count);
-
-	/*
-	 * Add back the child's count to the parent's count:
-	 */
-	atomic64_add(child_val, &parent_counter->count);
-
-	fput(parent_counter->filp);
+	if (parent_counter) {
+		sync_child_counter(child_counter, parent_counter);
+		list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
+					 list_entry) {
+			if (sub->parent)
+				sync_child_counter(sub, sub->parent);
+			kfree(sub);
+		}
+	}
 
 	kfree(child_counter);
 }
 
 /*
- * When a child task exist, feed back counter values to parent counters.
+ * When a child task exits, feed back counter values to parent counters.
  *
- * Note: we are running in child context, but the PID is not hashed
+ * Note: we may be running in child context, but the PID is not hashed
  * anymore so new counters will not be added.
  */
 void perf_counter_exit_task(struct task_struct *child)
@@ -1620,9 +1964,8 @@ void perf_counter_exit_task(struct task_struct *child)
 void perf_counter_init_task(struct task_struct *child)
 {
 	struct perf_counter_context *child_ctx, *parent_ctx;
-	struct perf_counter *counter, *parent_counter;
+	struct perf_counter *counter;
 	struct task_struct *parent = current;
-	unsigned long flags;
 
 	child_ctx  =  &child->perf_counter_ctx;
 	parent_ctx = &parent->perf_counter_ctx;
@@ -1641,32 +1984,22 @@ void perf_counter_init_task(struct task_struct *child)
 	 * Lock the parent list. No need to lock the child - not PID
 	 * hashed yet and not running, so nobody can access it.
 	 */
-	spin_lock_irqsave(&parent_ctx->lock, flags);
+	mutex_lock(&parent_ctx->mutex);
 
 	/*
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
 	 */
 	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
-		if (!counter->hw_event.inherit || counter->group_leader != counter)
+		if (!counter->hw_event.inherit)
 			continue;
 
-		/*
-		 * Instead of creating recursive hierarchies of counters,
-		 * we link inheritd counters back to the original parent,
-		 * which has a filp for sure, which we use as the reference
-		 * count:
-		 */
-		parent_counter = counter;
-		if (counter->parent)
-			parent_counter = counter->parent;
-
-		if (inherit_counter(parent_counter, parent,
+		if (inherit_group(counter, parent,
 				  parent_ctx, child, child_ctx))
 			break;
 	}
 
-	spin_unlock_irqrestore(&parent_ctx->lock, flags);
+	mutex_unlock(&parent_ctx->mutex);
 }
 
 static void __cpuinit perf_counter_init_cpu(int cpu)
@@ -1692,11 +2025,15 @@ static void __perf_counter_exit_cpu(void *info)
 
 	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
 		__perf_counter_remove_from_context(counter);
-
 }
 static void perf_counter_exit_cpu(int cpu)
 {
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+
+	mutex_lock(&ctx->mutex);
 	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
+	mutex_unlock(&ctx->mutex);
 }
 #else
 static inline void perf_counter_exit_cpu(int cpu) { }
-- 
cgit v0.10.2


From 1b023a96d9b44f50f4d8ff28c15f5b80e354760f Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 23 Jan 2009 10:13:01 +0100
Subject: perfcounters: throttle on too high IRQ rates

Starting kerneltop with only -c 100 seems to be a bad idea, it can
easily lock the system due to perfcounter IRQ overload.

So add throttling: if a new IRQ arrives in a shorter than
PERFMON_MIN_PERIOD_NS time, turn off perfcounters and untrottle them
from the next timer tick.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 7b434e5..849c230 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -781,6 +781,8 @@ static void local_apic_timer_interrupt(void)
 	inc_irq_stat(apic_timer_irqs);
 
 	evt->event_handler(evt);
+
+	perf_counter_unthrottle();
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9376771..1a040b1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -33,6 +33,9 @@ static int nr_counters_fixed __read_mostly;
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	u64			last_interrupt;
+	u64			global_enable;
+	int			throttled;
 };
 
 /*
@@ -474,16 +477,19 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
-	u64 ack, status, saved_global;
-	struct cpu_hw_counters *cpuc;
+	u64 ack, status, now;
+	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
 
 	/* Disable counters globally */
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 	ack_APIC_irq();
 
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
+	now = sched_clock();
+	if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS)
+		cpuc->throttled = 1;
+	cpuc->last_interrupt = now;
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	if (!status)
@@ -533,9 +539,29 @@ again:
 		goto again;
 out:
 	/*
-	 * Restore - do not reenable when global enable is off:
+	 * Restore - do not reenable when global enable is off or throttled:
 	 */
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
+	if (!cpuc->throttled)
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+}
+
+void perf_counter_unthrottle(void)
+{
+	struct cpu_hw_counters *cpuc;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+		return;
+
+	if (unlikely(!perf_counters_initialized))
+		return;
+
+	cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
+	if (cpuc->throttled) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n");
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+		cpuc->throttled = 0;
+	}
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 33ba9fe..91f1ca4 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -254,6 +254,7 @@ extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
+extern void perf_counter_unthrottle(void);
 extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern int perf_counter_task_disable(void);
@@ -270,6 +271,8 @@ static inline int is_software_counter(struct perf_counter *counter)
 	return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
+#define PERFMON_MIN_PERIOD_NS 10000
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -281,6 +284,7 @@ static inline void perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
+static inline void perf_counter_unthrottle(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)			{ }
 static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-- 
cgit v0.10.2


From 4b39fd96855254a244f71245b41a91cdecb87d63 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 23 Jan 2009 14:36:16 +0100
Subject: perfcounters: ratelimit performance counter interrupts

Ratelimit performance counter interrupts to 100KHz per CPU.

This replaces the irq-delta-time based method.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1a040b1..a56d4cf 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -33,9 +33,8 @@ static int nr_counters_fixed __read_mostly;
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	u64			last_interrupt;
+	unsigned long		interrupts;
 	u64			global_enable;
-	int			throttled;
 };
 
 /*
@@ -471,13 +470,18 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 }
 
 /*
+ * Maximum interrupt frequency of 100KHz per CPU
+ */
+#define PERFMON_MAX_INTERRUPTS 100000/HZ
+
+/*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
  */
 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
-	u64 ack, status, now;
+	u64 ack, status;
 	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
@@ -486,11 +490,6 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 	ack_APIC_irq();
 
-	now = sched_clock();
-	if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS)
-		cpuc->throttled = 1;
-	cpuc->last_interrupt = now;
-
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	if (!status)
 		goto out;
@@ -541,13 +540,14 @@ out:
 	/*
 	 * Restore - do not reenable when global enable is off or throttled:
 	 */
-	if (!cpuc->throttled)
+	if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
 		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
 }
 
 void perf_counter_unthrottle(void)
 {
 	struct cpu_hw_counters *cpuc;
+	u64 global_enable;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return;
@@ -556,12 +556,15 @@ void perf_counter_unthrottle(void)
 		return;
 
 	cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
-	if (cpuc->throttled) {
+	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
 		if (printk_ratelimit())
-			printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n");
+			printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
 		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
-		cpuc->throttled = 0;
 	}
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
+	if (unlikely(cpuc->global_enable && !global_enable))
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+	cpuc->interrupts = 0;
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 91f1ca4..f55381f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -271,8 +271,6 @@ static inline int is_software_counter(struct perf_counter *counter)
 	return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
-#define PERFMON_MIN_PERIOD_NS 10000
-
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
-- 
cgit v0.10.2


From 3415dd9146c574bffe8f012c096bfc2bc62b9508 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 23 Jan 2009 14:16:53 +0100
Subject: perfcounters fix section mismatch warning in
 perf_counter.c::perf_counters_lapic_init()

Fix:

WARNING: arch/x86/kernel/built-in.o(.text+0xdd0f): Section mismatch in reference from the function pmc_generic_enable() to the function .cpuinit.text:perf_counters_lapic_init()
The function pmc_generic_enable() references
the function __cpuinit perf_counters_lapic_init().
This is often because pmc_generic_enable lacks a __cpuinit
annotation or the annotation of perf_counters_lapic_init is wrong.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a56d4cf..46c436c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -605,7 +605,7 @@ void perf_counter_notify(struct pt_regs *regs)
 	local_irq_restore(flags);
 }
 
-void __cpuinit perf_counters_lapic_init(int nmi)
+void perf_counters_lapic_init(int nmi)
 {
 	u32 apic_val;
 
-- 
cgit v0.10.2


From bb3f0b59ad005d2d2ecbbe9bd048eab6d1ecbd31 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 25 Jan 2009 02:38:09 -0800
Subject: x86: make irqinit_32.c more like irqinit_64.c, v2

Impact: cleanup

1. add smp_intr_init and apic_intr_init for 32bit, the same as 64bit
2. move the apic_intr_init() call before set gate with interrupt[i]
3. for 64bit, if ia32_emulation is not used, will make per_cpu to use 0x80 vector.

[ v2: should use !test_bit() instead of test_bit() with 32bit ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index c56496f..ddf3eb7 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -120,28 +120,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
 	return 0;
 }
 
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-void __init native_init_IRQ(void)
+static void __init smp_intr_init(void)
 {
-	int i;
-
-	/* all the set up before the call gates are initialised */
-	pre_intr_init_hook();
-
-	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
-	 */
-	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-	}
-
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -170,8 +150,13 @@ void __init native_init_IRQ(void)
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
+}
 
+static void __init apic_intr_init(void)
+{
 #ifdef CONFIG_X86_LOCAL_APIC
+	smp_intr_init();
+
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -181,12 +166,37 @@ void __init native_init_IRQ(void)
 # ifdef CONFIG_PERF_COUNTERS
 	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
 # endif
-#endif
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+# ifdef CONFIG_X86_MCE_P4THERMAL
 	/* thermal monitor LVT interrupt */
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+# endif
 #endif
+}
+
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+void __init native_init_IRQ(void)
+{
+	int i;
+
+	/* all the set up before the call gates are initialised */
+	pre_intr_init_hook();
+
+	apic_intr_init();
+
+	/*
+	 * Cover the whole vector space, no vector can escape
+	 * us. (some of these will be overridden and become
+	 * 'special' SMP interrupts)
+	 */
+	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+		int vector = FIRST_EXTERNAL_VECTOR + i;
+		/* SYSCALL_VECTOR was reserved in trap_init. */
+		if (!test_bit(vector, used_vectors))
+			set_intr_gate(vector, interrupt[i]);
+	}
 
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 6a71bfc..16e1fc6 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -162,6 +162,9 @@ void __init native_init_IRQ(void)
 	int i;
 
 	init_ISA_irqs();
+
+	apic_intr_init();
+
 	/*
 	 * Cover the whole vector space, no vector can escape
 	 * us. (some of these will be overridden and become
@@ -169,12 +172,10 @@ void __init native_init_IRQ(void)
 	 */
 	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (vector != IA32_SYSCALL_VECTOR)
+		if (!test_bit(vector, used_vectors))
 			set_intr_gate(vector, interrupt[i]);
 	}
 
-	apic_intr_init();
-
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ed5aee5..d36a502 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -979,8 +979,13 @@ void __init trap_init(void)
 #endif
 	set_intr_gate(19, &simd_coprocessor_error);
 
+	/* Reserve all the builtin and the syscall vector: */
+	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+		set_bit(i, used_vectors);
+
 #ifdef CONFIG_IA32_EMULATION
 	set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_32
@@ -997,17 +1002,9 @@ void __init trap_init(void)
 	}
 
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
-#endif
-
-	/* Reserve all the builtin and the syscall vector: */
-	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
-		set_bit(i, used_vectors);
-
-#ifdef CONFIG_X86_64
-	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
+
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
-- 
cgit v0.10.2


From 65d370862f64973611a271ced61864b5f9bb6fc0 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 29 Jan 2009 14:06:52 +0100
Subject: perfcounters: fix refcounting bug

don't kfree in use counters.

Running...

	while true; do perfstat -e 1 -c true; done

...on all cores for a while doesn't seem to be eating ram, and my oops
is gone.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1ac18da..f27a7e9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1934,7 +1934,8 @@ __perf_counter_exit_task(struct task_struct *child,
 		}
 	}
 
-	kfree(child_counter);
+	if (!child_counter->filp || !atomic_long_read(&child_counter->filp->f_count))
+		kfree(child_counter);
 }
 
 /*
-- 
cgit v0.10.2


From 15081c61362618a0c81cc8d04e45e7427bc1ed71 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sun, 1 Feb 2009 22:07:39 +0530
Subject: x86: irqinit_32.c fix compilation warning

Fix:

  arch/x86/kernel/irqinit_32.c:124: warning: 'smp_intr_init' defined but not used

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index ddf3eb7..520e6c1 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -154,9 +154,9 @@ static void __init smp_intr_init(void)
 
 static void __init apic_intr_init(void)
 {
-#ifdef CONFIG_X86_LOCAL_APIC
 	smp_intr_init();
 
+#ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
-- 
cgit v0.10.2


From 5b75af0a02fcf3b8899f38ff6f22164c5d8e2fdd Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 4 Feb 2009 17:11:34 +0100
Subject: perfcounters: fix "perf counters kill oprofile" bug

With oprofile as a module, and unloaded by profiling script,
both oprofile and kerneltop work fine.. unless you leave kerneltop
running when you start profiling, then you may see badness.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 46c436c..8bb2133 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -643,7 +643,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
 }
 
 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler
+	.notifier_call		= perf_counter_nmi_handler,
+	.next			= NULL,
+	.priority		= 1
 };
 
 void __init init_hw_perf_counters(void)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864a..c638685 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self,
 
 	switch (val) {
 	case DIE_NMI:
-		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
-			ret = NOTIFY_STOP;
+	case DIE_NMI_IPI:
+		model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
+		ret = NOTIFY_STOP;
 		break;
 	default:
 		break;
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy)
 static struct notifier_block profile_exceptions_nb = {
 	.notifier_call = profile_exceptions_notify,
 	.next = NULL,
-	.priority = 0
+	.priority = 2
 };
 
 static int nmi_setup(void)
-- 
cgit v0.10.2


From 82aa9a1829199233f9bdaf26e2ee271114f4701e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 5 Feb 2009 15:23:08 +0100
Subject: perfcounters: fix "perf counters kills oprofile" bug, v2

Impact: fix kernel crash

Both oprofile and perfcounters register an NMI die handler, but only one
can handle the NMI.  Conveniently, oprofile unregisters it's notifier
when not actively in use, so setting it's notifier priority higher than
perfcounter's allows oprofile to borrow the NMI for the duration of it's
run.  Tested/works both as module and built-in.

While testing, I found that if kerneltop was generating NMIs at very
high frequency, the kernel may panic when oprofile registered it's
handler.  This turned out to be because oprofile registers it's handler
before reset_value has been allocated, so if an NMI comes in while it's
still setting up, kabOom.  Rather than try more invasive changes, I
followed the lead of other places in op_model_ppro.c, and simply
returned in that highly unlikely event.  (debug warnings attached)

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 07c9145..85eb626 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -126,6 +126,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 	u64 val;
 	int i;
 
+	/*
+	 * This can happen if perf counters are in use when
+	 * we steal the die notifier NMI.
+	 */
+	if (unlikely(!reset_value))
+		goto out;
+
 	for (i = 0 ; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
@@ -136,6 +143,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 		}
 	}
 
+out:
 	/* Only P6 based Pentium M need to re-unmask the apic vector but it
 	 * doesn't hurt other P6 variant */
 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
-- 
cgit v0.10.2


From 23a185ca8abbeef64b6ffc33059b1d630e43ec10 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 9 Feb 2009 22:42:47 +1100
Subject: perf_counters: make software counters work as per-cpu counters

Impact: kernel crash fix

Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software
counter as a per-cpu counter would reliably crash the system, because
it calls __task_delta_exec with a null pointer.  The page fault,
context switch and cpu migration counters also won't function
correctly as per-cpu counters since they reference the current task.

This fixes the problem by redirecting the task_clock counter to the
cpu_clock counter when used as a per-cpu counter, and by implementing
per-cpu page fault, context switch and cpu migration counters.

Along the way, this:

- Initializes counter->ctx earlier, in perf_counter_alloc, so that
  sw_perf_counter_init can use it
- Adds code to kernel/sched.c to count task migrations into each
  cpu, in rq->nr_migrations_in
- Exports the per-cpu context switch and task migration counts
  via new functions added to kernel/sched.c
- Makes sure that if sw_perf_counter_init fails, we don't try to
  initialize the counter as a hardware counter.  Since the user has
  passed a negative, non-raw event type, they clearly don't intend
  for it to be interpreted as a hardware event.

Reported-by: "Zhang Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b85b10a..1e5f700 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern u64 cpu_nr_switches(int cpu);
+extern u64 cpu_nr_migrations(int cpu);
 
 struct seq_file;
 struct cfs_rq;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f27a7e9..544193c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -20,6 +20,8 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
 
 /*
  * Each CPU has a list of per CPU counters:
@@ -502,7 +504,6 @@ perf_install_in_context(struct perf_counter_context *ctx,
 {
 	struct task_struct *task = ctx->task;
 
-	counter->ctx = ctx;
 	if (!task) {
 		/*
 		 * Per cpu counters are installed via an smp call and
@@ -1417,11 +1418,19 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
-static u64 get_page_faults(void)
+#ifdef CONFIG_VM_EVENT_COUNTERS
+#define cpu_page_faults()	__get_cpu_var(vm_event_states).event[PGFAULT]
+#else
+#define cpu_page_faults()	0
+#endif
+
+static u64 get_page_faults(struct perf_counter *counter)
 {
-	struct task_struct *curr = current;
+	struct task_struct *curr = counter->ctx->task;
 
-	return curr->maj_flt + curr->min_flt;
+	if (curr)
+		return curr->maj_flt + curr->min_flt;
+	return cpu_page_faults();
 }
 
 static void page_faults_perf_counter_update(struct perf_counter *counter)
@@ -1430,7 +1439,7 @@ static void page_faults_perf_counter_update(struct perf_counter *counter)
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_page_faults();
+	now = get_page_faults(counter);
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -1446,11 +1455,7 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 
 static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
-	/*
-	 * page-faults is a per-task value already,
-	 * so we dont have to clear it on switch-in.
-	 */
-
+	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
 	return 0;
 }
 
@@ -1465,11 +1470,13 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = {
 	.read		= page_faults_perf_counter_read,
 };
 
-static u64 get_context_switches(void)
+static u64 get_context_switches(struct perf_counter *counter)
 {
-	struct task_struct *curr = current;
+	struct task_struct *curr = counter->ctx->task;
 
-	return curr->nvcsw + curr->nivcsw;
+	if (curr)
+		return curr->nvcsw + curr->nivcsw;
+	return cpu_nr_switches(smp_processor_id());
 }
 
 static void context_switches_perf_counter_update(struct perf_counter *counter)
@@ -1478,7 +1485,7 @@ static void context_switches_perf_counter_update(struct perf_counter *counter)
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_context_switches();
+	now = get_context_switches(counter);
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -1494,11 +1501,7 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 
 static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
-	/*
-	 * ->nvcsw + curr->nivcsw is a per-task value already,
-	 * so we dont have to clear it on switch-in.
-	 */
-
+	atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
 	return 0;
 }
 
@@ -1513,9 +1516,13 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
 	.read		= context_switches_perf_counter_read,
 };
 
-static inline u64 get_cpu_migrations(void)
+static inline u64 get_cpu_migrations(struct perf_counter *counter)
 {
-	return current->se.nr_migrations;
+	struct task_struct *curr = counter->ctx->task;
+
+	if (curr)
+		return curr->se.nr_migrations;
+	return cpu_nr_migrations(smp_processor_id());
 }
 
 static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
@@ -1524,7 +1531,7 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_cpu_migrations();
+	now = get_cpu_migrations(counter);
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -1540,11 +1547,7 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 
 static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
-	/*
-	 * se.nr_migrations is a per-task value already,
-	 * so we dont have to clear it on switch-in.
-	 */
-
+	atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
 	return 0;
 }
 
@@ -1569,7 +1572,14 @@ sw_perf_counter_init(struct perf_counter *counter)
 		hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
-		hw_ops = &perf_ops_task_clock;
+		/*
+		 * If the user instantiates this as a per-cpu counter,
+		 * use the cpu_clock counter instead.
+		 */
+		if (counter->ctx->task)
+			hw_ops = &perf_ops_task_clock;
+		else
+			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 		hw_ops = &perf_ops_page_faults;
@@ -1592,6 +1602,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 static struct perf_counter *
 perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
+		   struct perf_counter_context *ctx,
 		   struct perf_counter *group_leader,
 		   gfp_t gfpflags)
 {
@@ -1623,6 +1634,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->wakeup_pending		= 0;
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
+	counter->ctx			= ctx;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
@@ -1631,7 +1643,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	hw_ops = NULL;
 	if (!hw_event->raw && hw_event->type < 0)
 		hw_ops = sw_perf_counter_init(counter);
-	if (!hw_ops)
+	else
 		hw_ops = hw_perf_counter_init(counter);
 
 	if (!hw_ops) {
@@ -1707,7 +1719,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 	}
 
 	ret = -EINVAL;
-	counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
+	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
+				     GFP_KERNEL);
 	if (!counter)
 		goto err_put_context;
 
@@ -1777,15 +1790,14 @@ inherit_counter(struct perf_counter *parent_counter,
 		parent_counter = parent_counter->parent;
 
 	child_counter = perf_counter_alloc(&parent_counter->hw_event,
-					    parent_counter->cpu, group_leader,
-					    GFP_KERNEL);
+					   parent_counter->cpu, child_ctx,
+					   group_leader, GFP_KERNEL);
 	if (!child_counter)
 		return NULL;
 
 	/*
 	 * Link it up in the child's context:
 	 */
-	child_counter->ctx = child_ctx;
 	child_counter->task = child;
 	list_add_counter(child_counter, child_ctx);
 	child_ctx->nr_counters++;
diff --git a/kernel/sched.c b/kernel/sched.c
index 8db1a4c..173768f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -558,6 +558,7 @@ struct rq {
 	struct load_weight load;
 	unsigned long nr_load_updates;
 	u64 nr_switches;
+	u64 nr_migrations_in;
 
 	struct cfs_rq cfs;
 	struct rt_rq rt;
@@ -1908,6 +1909,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 #endif
 	if (old_cpu != new_cpu) {
 		p->se.nr_migrations++;
+		new_rq->nr_migrations_in++;
 #ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
@@ -2811,6 +2813,21 @@ unsigned long nr_active(void)
 }
 
 /*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_switches(cpu) - number of context switches on that cpu
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_switches(int cpu)
+{
+	return cpu_rq(cpu)->nr_switches;
+}
+
+u64 cpu_nr_migrations(int cpu)
+{
+	return cpu_rq(cpu)->nr_migrations_in;
+}
+
+/*
  * Update rq->cpu_load[] statistics. This function is usually called every
  * scheduler tick (TICK_NSEC).
  */
-- 
cgit v0.10.2


From d278c48435625cb6b7edcf6a547620768b175709 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 9 Feb 2009 07:38:50 +0100
Subject: perf_counters: account NMI interrupts

I noticed that kerneltop interrupts were accounted as NMI, but not their
perf counter origin.

Account NMI performance counter interrupts.

Signed-off-by: Mike Galbraith  <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

 arch/x86/kernel/cpu/perf_counter.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8bb2133..9901e46 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -495,6 +495,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 		goto out;
 
 again:
+	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_counter *counter = cpuc->counters[bit];
@@ -570,7 +571,6 @@ void perf_counter_unthrottle(void)
 void smp_perf_counter_interrupt(struct pt_regs *regs)
 {
 	irq_enter();
-	inc_irq_stat(apic_perf_irqs);
 	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
 	__smp_perf_counter_interrupt(regs, 0);
 
-- 
cgit v0.10.2


From 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 11 Feb 2009 14:35:35 +1100
Subject: perf_counters: allow users to count user, kernel and/or hypervisor
 events

Impact: new perf_counter feature

This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.

For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish.  Context switches and CPU migrations
are currently considered to occur in kernel mode.

On x86, this changes the previous policy that only root can count
kernel events.  Now non-root users can count kernel events or exclude
them.  Non-root users still can't use NMI events, though.  On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.

On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU.  Counters being added to a group have to have the same
settings as the other hardware counters in the group.  Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU.  If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5b02113..bd6ba85 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -16,6 +16,7 @@
 #include <asm/reg.h>
 #include <asm/pmc.h>
 #include <asm/machdep.h>
+#include <asm/firmware.h>
 
 struct cpu_hw_counters {
 	int n_counters;
@@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev)
 	return 0;
 }
 
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
+{
+	int eu, ek, eh;
+	int i, n;
+	struct perf_counter *counter;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	eu = ctrs[0]->hw_event.exclude_user;
+	ek = ctrs[0]->hw_event.exclude_kernel;
+	eh = ctrs[0]->hw_event.exclude_hv;
+	if (n_prev == 0)
+		n_prev = 1;
+	for (i = n_prev; i < n; ++i) {
+		counter = ctrs[i];
+		if (counter->hw_event.exclude_user != eu ||
+		    counter->hw_event.exclude_kernel != ek ||
+		    counter->hw_event.exclude_hv != eh)
+			return -EAGAIN;
+	}
+	return 0;
+}
+
 static void power_perf_read(struct perf_counter *counter)
 {
 	long val, delta, prev;
@@ -324,6 +355,20 @@ void hw_perf_restore(u64 disable)
 	}
 
 	/*
+	 * Add in MMCR0 freeze bits corresponding to the
+	 * hw_event.exclude_* bits for the first counter.
+	 * We have already checked that all counters have the
+	 * same values for these bits as the first counter.
+	 */
+	counter = cpuhw->counter[0];
+	if (counter->hw_event.exclude_user)
+		cpuhw->mmcr[0] |= MMCR0_FCP;
+	if (counter->hw_event.exclude_kernel)
+		cpuhw->mmcr[0] |= MMCR0_FCS;
+	if (counter->hw_event.exclude_hv)
+		cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+	/*
 	 * Write the new configuration to MMCR* with the freeze
 	 * bit set and set the hardware counters to their initial values.
 	 * Then unfreeze the counters.
@@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 			   &cpuhw->counter[n0], &cpuhw->events[n0]);
 	if (n < 0)
 		return -EAGAIN;
+	if (check_excludes(cpuhw->counter, n0, n))
+		return -EAGAIN;
 	if (power_check_constraints(cpuhw->events, n + n0))
 		return -EAGAIN;
 	cpuhw->n_counters = n0 + n;
@@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter)
 		goto out;
 	cpuhw->counter[n0] = counter;
 	cpuhw->events[n0] = counter->hw.config;
+	if (check_excludes(cpuhw->counter, n0, 1))
+		goto out;
 	if (power_check_constraints(cpuhw->events, n0 + 1))
 		goto out;
 
@@ -555,6 +604,17 @@ hw_perf_counter_init(struct perf_counter *counter)
 	counter->hw.idx = 0;
 
 	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.  This also means that we don't
+	 * set the MMCR0_FCHV bit, which unconditionally freezes
+	 * the counters on the PPC970 variants used in Apple G5
+	 * machines (since MSR.HV is always 1 on those machines).
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		counter->hw_event.exclude_hv = 0;
+	
+	/*
 	 * If this is in a group, check if it can go on with all the
 	 * other hardware counters in the group.  We assume the counter
 	 * hasn't been linked into its leader's sibling list at this point.
@@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 		if (n < 0)
 			return NULL;
 	}
-	events[n++] = ev;
-	if (power_check_constraints(events, n))
+	events[n] = ev;
+	if (check_excludes(ctrs, n, 1))
+		return NULL;
+	if (power_check_constraints(events, n + 1))
 		return NULL;
 
-	counter->hw.config = events[n - 1];
+	counter->hw.config = events[n];
 	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
 	return &power_perf_ops;
 }
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9901e46..383d4c6 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		return -EINVAL;
 
 	/*
-	 * Count user events, and generate PMC IRQs:
+	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
 	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT;
+	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
 	/*
-	 * If privileged enough, count OS events too, and allow
-	 * NMI events as well:
+	 * Count user and OS events unless requested not to.
 	 */
-	hwc->nmi = 0;
-	if (capable(CAP_SYS_ADMIN)) {
+	if (!hw_event->exclude_user)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!hw_event->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-		if (hw_event->nmi)
-			hwc->nmi = 1;
-	}
+
+	/*
+	 * If privileged enough, allow NMI events:
+	 */
+	hwc->nmi = 0;
+	if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
+		hwc->nmi = 1;
 
 	hwc->irq_period		= hw_event->irq_period;
 	/*
@@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter,
 	int err;
 
 	/*
-	 * Enable IRQ generation (0x8) and ring-3 counting (0x2),
-	 * and enable ring-0 counting if allowed:
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
 	 */
-	bits = 0x8ULL | 0x2ULL;
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
 	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
 		bits |= 0x1;
 	bits <<= (idx * 4);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f55381f..c83f51d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -83,14 +83,17 @@ struct perf_counter_hw_event {
 	u64			irq_period;
 	u32			record_type;
 
-	u32			disabled     :  1, /* off by default      */
-				nmi	     :  1, /* NMI sampling        */
-				raw	     :  1, /* raw event type      */
-				inherit	     :  1, /* children inherit it */
-				pinned	     :  1, /* must always be on PMU */
-				exclusive    :  1, /* only counter on PMU */
-
-				__reserved_1 : 26;
+	u32			disabled       :  1, /* off by default        */
+				nmi	       :  1, /* NMI sampling          */
+				raw	       :  1, /* raw event type        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+
+				__reserved_1 : 23;
 
 	u64			__reserved_2;
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 544193c..89d5e3f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter)
 {
 	const struct hw_perf_counter_ops *hw_ops = NULL;
 
+	/*
+	 * Software counters (currently) can't in general distinguish
+	 * between user, kernel and hypervisor events.
+	 * However, context switches and cpu migrations are considered
+	 * to be kernel events, and page faults are never hypervisor
+	 * events.
+	 */
 	switch (counter->hw_event.type) {
 	case PERF_COUNT_CPU_CLOCK:
-		hw_ops = &perf_ops_cpu_clock;
+		if (!(counter->hw_event.exclude_user ||
+		      counter->hw_event.exclude_kernel ||
+		      counter->hw_event.exclude_hv))
+			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
+		if (counter->hw_event.exclude_user ||
+		    counter->hw_event.exclude_kernel ||
+		    counter->hw_event.exclude_hv)
+			break;
 		/*
 		 * If the user instantiates this as a per-cpu counter,
 		 * use the cpu_clock counter instead.
@@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
-		hw_ops = &perf_ops_page_faults;
+		if (!(counter->hw_event.exclude_user ||
+		      counter->hw_event.exclude_kernel))
+			hw_ops = &perf_ops_page_faults;
 		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		hw_ops = &perf_ops_context_switches;
+		if (!counter->hw_event.exclude_kernel)
+			hw_ops = &perf_ops_context_switches;
 		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
-		hw_ops = &perf_ops_cpu_migrations;
+		if (!counter->hw_event.exclude_kernel)
+			hw_ops = &perf_ops_cpu_migrations;
 		break;
 	default:
 		break;
-- 
cgit v0.10.2


From 5af759176cc767e7426f89764bde4996ebaaf419 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 11 Feb 2009 10:53:37 +0100
Subject: perfcounters: fix use after free in perf_release()

running...

  while true; do
    foo -d 1 -f 1 -c 100000 & sleep 1
    kerneltop -d 1 -f 1 -e 1 -c 25000 -p `pidof foo`
  done

  while true; do
    killall foo; killall kerneltop; sleep 2
  done

...in two shells with SLUB_DEBUG enabled produces flood of:
BUG task_struct: Poison overwritten.

Fix the use-after-free bug in perf_release().

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 89d5e3f..e0576c3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1145,12 +1145,12 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_lock(&counter->mutex);
 
 	perf_counter_remove_from_context(counter);
-	put_context(ctx);
 
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
 	kfree(counter);
+	put_context(ctx);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 4bcf349a0f90d1e69eb35c6df0fa285c886c1cd6 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 11 Feb 2009 13:53:19 +0100
Subject: perfcounters: fix refcounting bug, take 2

Only free child_counter if it has a parent; if it doesn't, then it
has a file pointing to it and we'll free it in perf_release.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e0576c3..fcefb0a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1958,14 +1958,13 @@ __perf_counter_exit_task(struct task_struct *child,
 		sync_child_counter(child_counter, parent_counter);
 		list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
 					 list_entry) {
-			if (sub->parent)
+			if (sub->parent) {
 				sync_child_counter(sub, sub->parent);
-			kfree(sub);
+				kfree(sub);
+			}
 		}
-	}
-
-	if (!child_counter->filp || !atomic_long_read(&child_counter->filp->f_count))
 		kfree(child_counter);
+	}
 }
 
 /*
-- 
cgit v0.10.2


From c07c99b67233ccaad38a961c17405dc1e1542aa4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 13 Feb 2009 22:10:34 +1100
Subject: perfcounters: make context switch and migration software counters
 work again

Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the
context switch and migration software counters to report zero always.
With that commit, the software counters only count events that occur
between sched-in and sched-out for a task.  This is necessary for the
counter enable/disable prctls and ioctls to work.  However, the
context switch and migration counts are incremented after sched-out
for one task and before sched-in for the next.  Since the increment
doesn't occur while a task is scheduled in (as far as the software
counters are concerned) it doesn't count towards any counter.

Thus the context switch and migration counters need to count events
that occur at any time, provided the counter is enabled, not just
those that occur while the task is scheduled in (from the perf_counter
subsystem's point of view).  The problem though is that the software
counter code can't tell the difference between being enabled and being
scheduled in, and between being disabled and being scheduled out,
since we use the one pair of enable/disable entry points for both.
That is, the high-level disable operation simply arranges for the
counter to not be scheduled in any more, and the high-level enable
operation arranges for it to be scheduled in again.

One way to solve this would be to have sched_in/out operations in the
hw_perf_counter_ops struct as well as enable/disable.  However, this
takes a simpler approach: it adds a 'prev_state' field to the
perf_counter struct that allows a counter's enable method to know
whether the counter was previously disabled or just inactive
(scheduled out), and therefore whether the enable method is being
called as a result of a high-level enable or a schedule-in operation.

This then allows the context switch, migration and page fault counters
to reset their hw.prev_count value in their enable functions only if
they are called as a result of a high-level enable operation.
Although page faults would normally only occur while the counter is
scheduled in, this changes the page fault counter code too in case
there are ever circumstances where page faults get counted against a
task while its counters are not scheduled in.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c83f51d..32cd1ac 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -173,6 +173,7 @@ struct perf_counter {
 	const struct hw_perf_counter_ops *hw_ops;
 
 	enum perf_counter_active_state	state;
+	enum perf_counter_active_state	prev_state;
 	atomic64_t			count;
 
 	struct perf_counter_hw_event	hw_event;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fcefb0a..ad62965 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info)
 
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
+	counter->prev_state = PERF_COUNTER_STATE_OFF;
 
 	/*
 	 * Don't put the counter on if it is disabled or if
@@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info)
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
 
+	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
@@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter,
 	if (ret)
 		return ret < 0 ? ret : 0;
 
+	group_counter->prev_state = group_counter->state;
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
 
@@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter,
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		counter->prev_state = counter->state;
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 
 static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-	u64 now = task_clock_perf_counter_val(counter, 0);
-
-	atomic64_set(&counter->hw.prev_count, now);
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count,
+			     task_clock_perf_counter_val(counter, 0));
 
 	return 0;
 }
@@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 
 static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
 	return 0;
 }
 
@@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 
 static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count,
+			     get_context_switches(counter));
 	return 0;
 }
 
@@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 
 static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count,
+			     get_cpu_migrations(counter));
 	return 0;
 }
 
-- 
cgit v0.10.2


From 73ca2f8380311115723c7afe811f3ed1f0ba945e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 16 Feb 2009 01:08:17 +0100
Subject: perfcounters: remove duplicate definition of LOCAL_PERF_VECTOR

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index b66b518..b07278c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -112,11 +112,6 @@
 #define LOCAL_PERF_VECTOR		0xee
 
 /*
- * Performance monitoring interrupt vector:
- */
-#define LOCAL_PERF_VECTOR	0xee
-
-/*
  * First APIC vector available to drivers: (vectors 0x30-0xee) we
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
-- 
cgit v0.10.2


From 37a25424252b6cff4dd4b1937ab6a1dbfcadabcc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 16 Feb 2009 15:32:23 +0100
Subject: perfcounters: fix acpi_idle_do_entry() workaround

Fix merge error in drivers/acpi/processor_idle.c. This
resulted in non-working perfcounters on certain Nehalem
systems.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 259f6e8..08def2f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -826,12 +826,9 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
 	u64 perf_flags;
 
-	u64 pctrl;
-
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
 	perf_flags = hw_perf_save_disable();
-	pctrl = hw_perf_save_disable();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -847,7 +844,6 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
 	hw_perf_restore(perf_flags);
-	hw_perf_restore(pctrl);
 	start_critical_timings();
 }
 
-- 
cgit v0.10.2


From d095cd46dac104e4d2a4967c7c19b55a12f78240 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 23 Feb 2009 23:01:28 +1100
Subject: perfcounters/powerpc: Make exclude_kernel bit work on Apple G5
 processors

Currently, setting hw_event.exclude_kernel does nothing on the PPC970
variants used in Apple G5 machines, because they have the HV (hypervisor)
bit in the MSR forced to 1, so as far as the PMU is concerned, the
kernel runs in hypervisor mode.  Thus we have to use the MMCR0_FCHV
(freeze counters in hypervisor mode) bit rather than the MMCR0_FCS
(freeze counters in supervisor mode) bit.

This checks the MSR.HV bit at startup, and if it is set, we set the
freeze_counters_kernel variable to MMCR0_FCHV (it was initialized to
MMCR0_FCS).  We then use that whenever we need to exclude kernel events.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bd6ba85..6e27913 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -32,6 +32,15 @@ DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
 struct power_pmu *ppmu;
 
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_counters_kernel = MMCR0_FCS;
+
 void perf_counter_print_debug(void)
 {
 }
@@ -364,7 +373,7 @@ void hw_perf_restore(u64 disable)
 	if (counter->hw_event.exclude_user)
 		cpuhw->mmcr[0] |= MMCR0_FCP;
 	if (counter->hw_event.exclude_kernel)
-		cpuhw->mmcr[0] |= MMCR0_FCS;
+		cpuhw->mmcr[0] |= freeze_counters_kernel;
 	if (counter->hw_event.exclude_hv)
 		cpuhw->mmcr[0] |= MMCR0_FCHV;
 
@@ -606,10 +615,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * If we are not running on a hypervisor, force the
 	 * exclude_hv bit to 0 so that we don't care what
-	 * the user set it to.  This also means that we don't
-	 * set the MMCR0_FCHV bit, which unconditionally freezes
-	 * the counters on the PPC970 variants used in Apple G5
-	 * machines (since MSR.HV is always 1 on those machines).
+	 * the user set it to.
 	 */
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		counter->hw_event.exclude_hv = 0;
@@ -841,6 +847,13 @@ static int init_perf_counters(void)
 		ppmu = &power6_pmu;
 		break;
 	}
+
+	/*
+	 * Use FCHV to ignore kernel events if MSR.HV is set.
+	 */
+	if (mfmsr() & MSR_HV)
+		freeze_counters_kernel = MMCR0_FCHV;
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 742bd95ba96e19b3f7196c3a0834ebc17c8ba006 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 24 Feb 2009 11:33:56 +1100
Subject: perfcounters/powerpc: Add support for POWER5 processors

This adds the back-end for the PMU on the POWER5 processor.  This knows
how to use the fixed-function PMC5 and PMC6 (instructions completed and
run cycles).  Unlike POWER6, PMC5/6 obey the freeze conditions and can
generate interrupts, so their use doesn't impose any extra restrictions.

POWER5+ is different and is not supported by this patch.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7c941ec..b4c6f46 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,7 +94,8 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o power6-pmu.o
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o power5-pmu.o \
+				   power6-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 6e27913..112332d 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -824,6 +824,7 @@ void hw_perf_counter_setup(int cpu)
 }
 
 extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
 extern struct power_pmu power6_pmu;
 
 static int init_perf_counters(void)
@@ -843,6 +844,9 @@ static int init_perf_counters(void)
 	case PV_970MP:
 		ppmu = &ppc970_pmu;
 		break;
+	case PV_POWER5:
+		ppmu = &power5_pmu;
+		break;
 	case 0x3e:
 		ppmu = &power6_pmu;
 		break;
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 0000000..379ed10
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,475 @@
+/*
+ * Performance counter support for POWER5 (not POWER5++) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5 (not POWER5++)
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	12	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	7
+#define PM_GRS_SH	8	/* Storage subsystem mux select */
+#define PM_GRS_MSK	7
+#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
+#define PM_PMCSEL_MSK	0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU		0
+#define PM_ISU0		1
+#define PM_IFU		2
+#define PM_ISU1		3
+#define PM_IDU		4
+#define PM_ISU0_ALT	6
+#define PM_GRS		7
+#define PM_LSU0		8
+#define PM_LSU1		0xc
+#define PM_LASTUNIT	0xc
+
+/*
+ * Bits in MMCR1 for POWER5
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	60
+#define MMCR1_TTM2SEL_SH	58
+#define MMCR1_TTM3SEL_SH	56
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	54
+#define MMCR1_TD_CP_DBG1SEL_SH	52
+#define MMCR1_TD_CP_DBG2SEL_SH	50
+#define MMCR1_TD_CP_DBG3SEL_SH	48
+#define MMCR1_GRS_L2SEL_SH	46
+#define MMCR1_GRS_L2SEL_MSK	3
+#define MMCR1_GRS_L3SEL_SH	44
+#define MMCR1_GRS_L3SEL_MSK	3
+#define MMCR1_GRS_MCSEL_SH	41
+#define MMCR1_GRS_MCSEL_MSK	7
+#define MMCR1_GRS_FABSEL_SH	39
+#define MMCR1_GRS_FABSEL_MSK	3
+#define MMCR1_PMC1_ADDER_SEL_SH	35
+#define MMCR1_PMC2_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC1SEL_SH	25
+#define MMCR1_PMC2SEL_SH	17
+#define MMCR1_PMC3SEL_SH	9
+#define MMCR1_PMC4SEL_SH	1
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *         <><>[  ><><>< ><> [  >[ >[ ><  ><  ><  ><  ><><><><><><>
+ *         T0T1 NC G0G1G2 G3  UC PS1PS2 B0  B1  B2  B3 P6P5P4P3P2P1
+ *
+ * T0 - TTM0 constraint
+ *     54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
+ *
+ * NC - number of counters
+ *     51: NC error 0x0008_0000_0000_0000
+ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ *     46-47: GRS_L2SEL value
+ *     44-45: GRS_L3SEL value
+ *     41-44: GRS_MCSEL value
+ *     39-40: GRS_FABSEL value
+ *	Note that these match up with their bit positions in MMCR1
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ *     37: UC3 error 0x20_0000_0000
+ *     36: FPU|IFU|ISU1 events needed 0x10_0000_0000
+ *     35: ISU0 events needed 0x08_0000_0000
+ *     34: IDU|GRS events needed 0x04_0000_0000
+ *
+ * PS1
+ *     33: PS1 error 0x2_0000_0000
+ *     31-32: count of events needing PMC1/2 0x1_8000_0000
+ *
+ * PS2
+ *     30: PS2 error 0x4000_0000
+ *     28-29: count of events needing PMC3/4 0x3000_0000
+ *
+ * B0
+ *     24-27: Byte 0 event source 0x0f00_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P1..P6
+ *     0-11: Count of events needing PMC1..PMC6
+ */
+
+static const int grsel_shift[8] = {
+	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0xc0002000000000ull, 0x00001000000000ull },
+	[PM_ISU0] =  { 0x00002000000000ull, 0x00000800000000ull },
+	[PM_ISU1] =  { 0xc0002000000000ull, 0xc0001000000000ull },
+	[PM_IFU] =   { 0xc0002000000000ull, 0x80001000000000ull },
+	[PM_IDU] =   { 0x30002000000000ull, 0x00000400000000ull },
+	[PM_GRS] =   { 0x30002000000000ull, 0x30000400000000ull },
+};
+
+static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, sh;
+	int bit, fmask;
+	u64 mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc <= 4)
+			grp = (pmc - 1) >> 1;
+		else if (event != 0x500009 && event != 0x600005)
+			return -1;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+		if (unit > PM_LASTUNIT)
+			return -1;
+		if (unit == PM_ISU0_ALT)
+			unit = PM_ISU0;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (byte >= 4) {
+			if (unit != PM_LSU1)
+				return -1;
+			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+			++unit;
+			byte &= 3;
+		}
+		if (unit == PM_GRS) {
+			bit = event & 7;
+			fmask = (bit == 6)? 7: 3;
+			sh = grsel_shift[bit];
+			mask |= (u64)fmask << sh;
+			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+		}
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2; bytes 1 and 3 on PMC3/4.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (24 - 4 * byte);
+		value |= (u64)unit << (24 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2 field */
+		mask  |= 0x200000000ull;
+		value |= 0x080000000ull;
+	} else if (grp == 1) {
+		/* increment PMC3/4 field */
+		mask  |= 0x40000000ull;
+		value |= 0x10000000ull;
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000000000000ull;
+		value |= 0x1000000000000ull;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	3	/* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
+	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
+	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
+	{ 0x100009, 0x200009, 0x500009 },	/* PM_INST_CMPL */
+	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(unsigned int event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
+	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
+	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
+	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters.  This returns the alternative
+ * event code for those that do, or -1 otherwise.
+ */
+static int find_alternative_bdecode(unsigned int event)
+{
+	int pmc, altpmc, pp, j;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc == 0 || pmc > 4)
+		return -1;
+	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
+	pp = event & PM_PMCSEL_MSK;
+	for (j = 0; j < 4; ++j) {
+		if (bytedecode_alternatives[pmc - 1][j] == pp) {
+			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+				(altpmc << PM_PMC_SH) |
+				bytedecode_alternatives[altpmc - 1][j];
+		}
+	}
+	return -1;
+}
+
+static int power5_get_alternatives(unsigned int event, unsigned int alt[])
+{
+	int i, j, ae, nalt = 1;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_bdecode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+	return nalt;
+}
+
+static int power5_compute_mmcr(unsigned int event[], int n_ev,
+			       unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm, grp;
+	int i, isbus, bit, grsel;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	int ttmuse;
+
+	if (n_ev > 6)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2 vs 3/4 use */
+			if (pmc <= 4)
+				++pmc_grp_use[(pmc - 1) >> 1];
+		}
+		if (event[i] & PM_BUSEVENT_MSK) {
+			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (unit == PM_ISU0_ALT)
+				unit = PM_ISU0;
+			if (byte >= 4) {
+				if (unit != PM_LSU1)
+					return -1;
+				++unit;
+				byte &= 3;
+			}
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+	if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU0] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
+		unituse[PM_ISU0] = 0;
+	}
+	/* Set TTM[01]SEL fields. */
+	ttmuse = 0;
+	for (i = PM_FPU; i <= PM_ISU1; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+	}
+	ttmuse = 0;
+	for (; i <= PM_GRS; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+	}
+	if (ttmuse > 1)
+		return -1;
+
+	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+			/* get ISU0 through TTM1 rather than TTM0 */
+			unit = PM_ISU0_ALT;
+		} else if (unit == PM_LSU1 + 1) {
+			/* select lower word of LSU1 for this byte */
+			mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		ttm = unit >> 2;
+		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		isbus = event[i] & PM_BUSEVENT_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (isbus) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 2) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else if (pmc <= 4) {
+			/* Direct event */
+			--pmc;
+			if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		} else {
+			/* Instructions or run cycles on PMC5/6 */
+			--pmc;
+		}
+		if (isbus && unit == PM_GRS) {
+			bit = psel & 7;
+			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+			mmcr1 |= (u64)grsel << grsel_shift[bit];
+		}
+		if (pmc <= 3)
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = 0;
+	return 0;
+}
+
+static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	if (pmc <= 3)
+		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5_generic_events[] = {
+	[PERF_COUNT_CPU_CYCLES] = 0xf,
+	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
+	[PERF_COUNT_CACHE_REFERENCES] = 0x4c1090,	/* LD_REF_L1 */
+	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
+	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
+	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
+};
+
+struct power_pmu power5_pmu = {
+	.n_counter = 6,
+	.max_alternatives = MAX_ALT,
+	.add_fields = 0x7000090000555ull,
+	.test_adder = 0x3000490000000ull,
+	.compute_mmcr = power5_compute_mmcr,
+	.get_constraint = power5_get_constraint,
+	.get_alternatives = power5_get_alternatives,
+	.disable_pmc = power5_disable_pmc,
+	.n_generic = ARRAY_SIZE(power5_generic_events),
+	.generic_events = power5_generic_events,
+};
-- 
cgit v0.10.2


From f3dfd2656deb81a0addee4f4ceff66b50a387388 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 26 Feb 2009 22:43:46 +1100
Subject: perfcounters: fix a few minor cleanliness issues

This fixes three issues noticed by Arnd Bergmann:

- Add #ifdef __KERNEL__ and move some things around in perf_counter.h
  to make sure only the bits that userspace needs are exported to
  userspace.

- Use __u64, __s64, __u32 types in the structs exported to userspace
  rather than u64, s64, u32.

- Make the sys_perf_counter_open syscall available to the SPUs on
  Cell platforms.

And one issue that I noticed in looking at the code again:

- Wrap the perf_counter_open syscall with SYSCALL_DEFINE4 so we get
  the proper handling of int arguments on ppc64 (and some other 64-bit
  architectures).

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 4c8095f..d312eec 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,4 +322,4 @@ SYSCALL_SPU(epoll_create1)
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
-SYSCALL(perf_counter_open)
+SYSCALL_SPU(perf_counter_open)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 32cd1ac..186efaf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -13,20 +13,8 @@
 #ifndef _LINUX_PERF_COUNTER_H
 #define _LINUX_PERF_COUNTER_H
 
-#include <asm/atomic.h>
-#include <asm/ioctl.h>
-
-#ifdef CONFIG_PERF_COUNTERS
-# include <asm/perf_counter.h>
-#endif
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/spinlock.h>
-
-struct task_struct;
+#include <linux/types.h>
+#include <linux/ioctl.h>
 
 /*
  * User-space ABI bits:
@@ -78,12 +66,12 @@ enum perf_counter_record_type {
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	s64			type;
+	__s64			type;
 
-	u64			irq_period;
-	u32			record_type;
+	__u64			irq_period;
+	__u32			record_type;
 
-	u32			disabled       :  1, /* off by default        */
+	__u32			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
 				raw	       :  1, /* raw event type        */
 				inherit	       :  1, /* children inherit it   */
@@ -95,7 +83,7 @@ struct perf_counter_hw_event {
 
 				__reserved_1 : 23;
 
-	u64			__reserved_2;
+	__u64			__reserved_2;
 };
 
 /*
@@ -104,10 +92,24 @@ struct perf_counter_hw_event {
 #define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
 #define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
 
+#ifdef __KERNEL__
 /*
- * Kernel-internal data types:
+ * Kernel-internal data types and definitions:
  */
 
+#ifdef CONFIG_PERF_COUNTERS
+# include <asm/perf_counter.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+struct task_struct;
+
 /**
  * struct hw_perf_counter - performance counter hardware details:
  */
@@ -293,4 +295,5 @@ static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 #endif
 
+#endif /* __KERNEL__ */
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 88255d32..28ef2be 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -696,10 +696,7 @@ asmlinkage long sys_pipe(int __user *);
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd);
+asmlinkage long sys_perf_counter_open(
+		const struct perf_counter_hw_event __user *hw_event_uptr,
+		pid_t pid, int cpu, int group_fd);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ad62965..16b14ba 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1690,9 +1690,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
-		      pid_t pid, int cpu, int group_fd)
+SYSCALL_DEFINE4(perf_counter_open,
+		const struct perf_counter_hw_event __user *, hw_event_uptr,
+		pid_t, pid, int, cpu, int, group_fd)
 {
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
-- 
cgit v0.10.2


From b56a3802dc6df29aa27d2c12edf420258091ad66 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Fri, 27 Feb 2009 18:09:09 +0530
Subject: x86: prepare perf_counter to add more cpus

Introduced  struct pmc_x86_ops to add more cpus.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 383d4c6..a3c8852 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -3,6 +3,7 @@
  *
  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
+ *  Copyright(C) 2009 Jaswinder Singh Rajput
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -38,10 +39,24 @@ struct cpu_hw_counters {
 };
 
 /*
- * Intel PerfMon v3. Used on Core2 and later.
+ * struct pmc_x86_ops - performance counter x86 ops
  */
+struct pmc_x86_ops {
+	u64 (*save_disable_all)		(void);
+	void (*restore_all)		(u64 ctrl);
+	unsigned eventsel;
+	unsigned perfctr;
+	int (*event_map)		(int event);
+	int max_events;
+};
+
+static struct pmc_x86_ops *pmc_ops;
+
 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
 static const int intel_perfmon_event_map[] =
 {
   [PERF_COUNT_CPU_CYCLES]		= 0x003c,
@@ -53,7 +68,10 @@ static const int intel_perfmon_event_map[] =
   [PERF_COUNT_BUS_CYCLES]		= 0x013c,
 };
 
-static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
+static int pmc_intel_event_map(int event)
+{
+	return intel_perfmon_event_map[event];
+}
 
 /*
  * Propagate counter elapsed time into the generic counter.
@@ -144,38 +162,48 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (hw_event->raw) {
 		hwc->config |= hw_event->type;
 	} else {
-		if (hw_event->type >= max_intel_perfmon_events)
+		if (hw_event->type >= pmc_ops->max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= intel_perfmon_event_map[hw_event->type];
+		hwc->config |= pmc_ops->event_map(hw_event->type);
 	}
 	counter->wakeup_pending = 0;
 
 	return 0;
 }
 
-u64 hw_perf_save_disable(void)
+static u64 pmc_intel_save_disable_all(void)
 {
 	u64 ctrl;
 
-	if (unlikely(!perf_counters_initialized))
-		return 0;
-
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
 	return ctrl;
 }
+
+u64 hw_perf_save_disable(void)
+{
+	if (unlikely(!perf_counters_initialized))
+		return 0;
+
+	return pmc_ops->save_disable_all();
+}
 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
+static void pmc_intel_restore_all(u64 ctrl)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+}
+
 void hw_perf_restore(u64 ctrl)
 {
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+	pmc_ops->restore_all(ctrl);
 }
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
@@ -291,11 +319,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_INSTRUCTIONS]))
+	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_CPU_CYCLES]))
+	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_BUS_CYCLES]))
+	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
@@ -339,8 +367,8 @@ try_generic:
 			set_bit(idx, cpuc->used);
 			hwc->idx = idx;
 		}
-		hwc->config_base  = MSR_ARCH_PERFMON_EVENTSEL0;
-		hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
+		hwc->config_base  = pmc_ops->eventsel;
+		hwc->counter_base = pmc_ops->perfctr;
 	}
 
 	perf_counters_lapic_init(hwc->nmi);
@@ -386,8 +414,8 @@ void perf_counter_print_debug(void)
 	printk(KERN_INFO "CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
-		rdmsrl(MSR_ARCH_PERFMON_PERFCTR0  + idx, pmc_count);
+		rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
+		rdmsrl(pmc_ops->perfctr  + idx, pmc_count);
 
 		prev_left = per_cpu(prev_left[idx], cpu);
 
@@ -655,29 +683,56 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 	.priority		= 1
 };
 
-void __init init_hw_perf_counters(void)
+static struct pmc_x86_ops pmc_intel_ops = {
+	.save_disable_all	= pmc_intel_save_disable_all,
+	.restore_all		= pmc_intel_restore_all,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= pmc_intel_event_map,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+};
+
+static struct pmc_x86_ops *pmc_intel_init(void)
 {
 	union cpuid10_eax eax;
 	unsigned int ebx;
 	unsigned int unused;
 	union cpuid10_edx edx;
 
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-		return;
-
 	/*
 	 * Check whether the Architectural PerfMon supports
 	 * Branch Misses Retired Event or not.
 	 */
 	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return;
+		return NULL;
 
 	printk(KERN_INFO "Intel Performance Monitoring support detected.\n");
-
 	printk(KERN_INFO "... version:         %d\n", eax.split.version_id);
-	printk(KERN_INFO "... num counters:    %d\n", eax.split.num_counters);
+	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
+	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);
+
 	nr_counters_generic = eax.split.num_counters;
+	nr_counters_fixed = edx.split.num_counters_fixed;
+	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
+
+	return &pmc_intel_ops;
+}
+
+void __init init_hw_perf_counters(void)
+{
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+		return;
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		pmc_ops = pmc_intel_init();
+		break;
+	}
+	if (!pmc_ops)
+		return;
+
+	printk(KERN_INFO "... num counters:    %d\n", nr_counters_generic);
 	if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
 		nr_counters_generic = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
@@ -686,13 +741,8 @@ void __init init_hw_perf_counters(void)
 	perf_counter_mask = (1 << nr_counters_generic) - 1;
 	perf_max_counters = nr_counters_generic;
 
-	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
-	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
 	printk(KERN_INFO "... value mask:      %016Lx\n", counter_value_mask);
 
-	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);
-
-	nr_counters_fixed = edx.split.num_counters_fixed;
 	if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
 		nr_counters_fixed = X86_PMC_MAX_FIXED;
 		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-- 
cgit v0.10.2


From f87ad35d37fa543925210550f7db20a54c83ed70 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Fri, 27 Feb 2009 20:15:14 +0530
Subject: x86: AMD Support for perf_counter

Supported basic performance counter for AMD K7 and later:

$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null

 Performance counter stats for 'ls':

      12.298610  task clock ticks     (msecs)

        3298477  CPU cycles           (events)
        1406354  instructions         (events)
         749035  cache references     (events)
          16939  cache misses         (events)
         100589  branches             (events)
          11159  branch misses        (events)
       7.627540  cpu clock ticks      (msecs)
      12.298610  task clock ticks     (msecs)
            500  pagefaults           (events)
              6  context switches     (events)
              3  CPU migrations       (events)

 Wall-clock time elapsed:     8.672290 msecs

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 25423a5..edcde52 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -368,6 +368,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 >= 6)
 		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
+	/* Enable Performance counter for K7 and later */
+	if (c->x86 > 6 && c->x86 <= 0x11)
+		set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+
 	if (!c->x86_model_id[0]) {
 		switch (c->x86) {
 		case 0xf:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a3c8852..266618a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -74,6 +74,24 @@ static int pmc_intel_event_map(int event)
 }
 
 /*
+ * AMD Performance Monitor K7 and later.
+ */
+static const int amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_CACHE_REFERENCES]		= 0x0080,
+  [PERF_COUNT_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
+};
+
+static int pmc_amd_event_map(int event)
+{
+	return amd_perfmon_event_map[event];
+}
+
+/*
  * Propagate counter elapsed time into the generic counter.
  * Can only be executed on the CPU where the counter is active.
  * Returns the delta events processed.
@@ -151,8 +169,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * so we install an artificial 1<<31 period regardless of
 	 * the generic counter period:
 	 */
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
-		hwc->irq_period = 0x7FFFFFFF;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
+			hwc->irq_period = 0x7FFFFFFF;
 
 	atomic64_set(&hwc->period_left, hwc->irq_period);
 
@@ -184,6 +203,22 @@ static u64 pmc_intel_save_disable_all(void)
 	return ctrl;
 }
 
+static u64 pmc_amd_save_disable_all(void)
+{
+	int idx;
+	u64 val, ctrl = 0;
+
+	for (idx = 0; idx < nr_counters_generic; idx++) {
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+			ctrl |= (1 << idx);
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+
+	return ctrl;
+}
+
 u64 hw_perf_save_disable(void)
 {
 	if (unlikely(!perf_counters_initialized))
@@ -198,6 +233,20 @@ static void pmc_intel_restore_all(u64 ctrl)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 }
 
+static void pmc_amd_restore_all(u64 ctrl)
+{
+	u64 val;
+	int idx;
+
+	for (idx = 0; idx < nr_counters_generic; idx++) {
+		if (ctrl & (1 << idx)) {
+			rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		}
+	}
+}
+
 void hw_perf_restore(u64 ctrl)
 {
 	if (unlikely(!perf_counters_initialized))
@@ -314,6 +363,9 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 {
 	unsigned int event;
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return -1;
+
 	if (unlikely(hwc->nmi))
 		return -1;
 
@@ -401,6 +453,7 @@ void perf_counter_print_debug(void)
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
@@ -411,6 +464,7 @@ void perf_counter_print_debug(void)
 	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
 	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
 	printk(KERN_INFO "CPU#%d: fixed:      %016llx\n", cpu, fixed);
+	}
 	printk(KERN_INFO "CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
@@ -588,6 +642,9 @@ void perf_counter_unthrottle(void)
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return;
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return;
+
 	if (unlikely(!perf_counters_initialized))
 		return;
 
@@ -692,6 +749,15 @@ static struct pmc_x86_ops pmc_intel_ops = {
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 };
 
+static struct pmc_x86_ops pmc_amd_ops = {
+	.save_disable_all	= pmc_amd_save_disable_all,
+	.restore_all		= pmc_amd_restore_all,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= pmc_amd_event_map,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+};
+
 static struct pmc_x86_ops *pmc_intel_init(void)
 {
 	union cpuid10_eax eax;
@@ -719,6 +785,16 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 	return &pmc_intel_ops;
 }
 
+static struct pmc_x86_ops *pmc_amd_init(void)
+{
+	nr_counters_generic = 4;
+	nr_counters_fixed = 0;
+
+	printk(KERN_INFO "AMD Performance Monitoring support detected.\n");
+
+	return &pmc_amd_ops;
+}
+
 void __init init_hw_perf_counters(void)
 {
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -728,6 +804,9 @@ void __init init_hw_perf_counters(void)
 	case X86_VENDOR_INTEL:
 		pmc_ops = pmc_intel_init();
 		break;
+	case X86_VENDOR_AMD:
+		pmc_ops = pmc_amd_init();
+		break;
 	}
 	if (!pmc_ops)
 		return;
-- 
cgit v0.10.2


From 169e41eb7f5464c077a7e0e129f025759d04cc54 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sat, 28 Feb 2009 18:37:49 +0530
Subject: x86: decent declarations in perf_counter.c

Impact: cleanup

making decent declrations for struct pmc_x86_ops and
fix checkpatch error:
 ERROR: Macros with complex values should be enclosed in parenthesis

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 266618a..a1f3646 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -42,12 +42,12 @@ struct cpu_hw_counters {
  * struct pmc_x86_ops - performance counter x86 ops
  */
 struct pmc_x86_ops {
-	u64 (*save_disable_all)		(void);
-	void (*restore_all)		(u64 ctrl);
-	unsigned eventsel;
-	unsigned perfctr;
-	int (*event_map)		(int event);
-	int max_events;
+	u64		(*save_disable_all)(void);
+	void		(*restore_all)(u64 ctrl);
+	unsigned	eventsel;
+	unsigned	perfctr;
+	int		(*event_map)(int event);
+	int		max_events;
 };
 
 static struct pmc_x86_ops *pmc_ops;
@@ -561,7 +561,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 /*
  * Maximum interrupt frequency of 100KHz per CPU
  */
-#define PERFMON_MAX_INTERRUPTS 100000/HZ
+#define PERFMON_MAX_INTERRUPTS (100000/HZ)
 
 /*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
-- 
cgit v0.10.2


From a1ef58f442542d8b3e3b963339fbc522c36e827c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sat, 28 Feb 2009 18:45:39 +0530
Subject: x86: use pr_info in perf_counter.c

Impact: cleanup

using pr_info in perf_counter.c fixes various 80 characters warnings and
also indenting for conditional statement

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a1f3646..3b65f19 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -454,18 +454,18 @@ void perf_counter_print_debug(void)
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-	rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
-	rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
-	printk(KERN_INFO "\n");
-	printk(KERN_INFO "CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
-	printk(KERN_INFO "CPU#%d: status:     %016llx\n", cpu, status);
-	printk(KERN_INFO "CPU#%d: overflow:   %016llx\n", cpu, overflow);
-	printk(KERN_INFO "CPU#%d: fixed:      %016llx\n", cpu, fixed);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+		pr_info("\n");
+		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 	}
-	printk(KERN_INFO "CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
+	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
 		rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
@@ -473,17 +473,17 @@ void perf_counter_print_debug(void)
 
 		prev_left = per_cpu(prev_left[idx], cpu);
 
-		printk(KERN_INFO "CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
 			cpu, idx, pmc_ctrl);
-		printk(KERN_INFO "CPU#%d:   gen-PMC%d count: %016llx\n",
+		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
-		printk(KERN_INFO "CPU#%d:   gen-PMC%d left:  %016llx\n",
+		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
 	for (idx = 0; idx < nr_counters_fixed; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 
-		printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n",
+		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
 	}
 	local_irq_enable();
@@ -773,10 +773,10 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
 		return NULL;
 
-	printk(KERN_INFO "Intel Performance Monitoring support detected.\n");
-	printk(KERN_INFO "... version:         %d\n", eax.split.version_id);
-	printk(KERN_INFO "... bit width:       %d\n", eax.split.bit_width);
-	printk(KERN_INFO "... mask length:     %d\n", eax.split.mask_length);
+	pr_info("Intel Performance Monitoring support detected.\n");
+	pr_info("... version:         %d\n", eax.split.version_id);
+	pr_info("... bit width:       %d\n", eax.split.bit_width);
+	pr_info("... mask length:     %d\n", eax.split.mask_length);
 
 	nr_counters_generic = eax.split.num_counters;
 	nr_counters_fixed = edx.split.num_counters_fixed;
@@ -790,7 +790,7 @@ static struct pmc_x86_ops *pmc_amd_init(void)
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
 
-	printk(KERN_INFO "AMD Performance Monitoring support detected.\n");
+	pr_info("AMD Performance Monitoring support detected.\n");
 
 	return &pmc_amd_ops;
 }
@@ -811,7 +811,7 @@ void __init init_hw_perf_counters(void)
 	if (!pmc_ops)
 		return;
 
-	printk(KERN_INFO "... num counters:    %d\n", nr_counters_generic);
+	pr_info("... num counters:    %d\n", nr_counters_generic);
 	if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
 		nr_counters_generic = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
@@ -820,18 +820,18 @@ void __init init_hw_perf_counters(void)
 	perf_counter_mask = (1 << nr_counters_generic) - 1;
 	perf_max_counters = nr_counters_generic;
 
-	printk(KERN_INFO "... value mask:      %016Lx\n", counter_value_mask);
+	pr_info("... value mask:      %016Lx\n", counter_value_mask);
 
 	if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
 		nr_counters_fixed = X86_PMC_MAX_FIXED;
 		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
 			nr_counters_fixed, X86_PMC_MAX_FIXED);
 	}
-	printk(KERN_INFO "... fixed counters:  %d\n", nr_counters_fixed);
+	pr_info("... fixed counters:  %d\n", nr_counters_fixed);
 
 	perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
-	printk(KERN_INFO "... counter mask:    %016Lx\n", perf_counter_mask);
+	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
 	perf_counters_initialized = true;
 
 	perf_counters_lapic_init(0);
-- 
cgit v0.10.2


From 2743a5b0fa6f309da904f2190a9cc25deee34dbd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 4 Mar 2009 20:36:51 +1100
Subject: perfcounters: provide expansion room in the ABI

Impact: ABI change

This expands several fields in the perf_counter_hw_event struct and adds
a "flags" argument to the perf_counter_open system call, in order that
features can be added in future without ABI changes.

In particular the record_type field is expanded to 64 bits, and the
space for flag bits has been expanded from 32 to 64 bits.

This also adds some new fields:

* read_format (64 bits) is intended to provide a way to specify what
  userspace wants to get back when it does a read() on a simple
  (non-interrupting) counter;

* exclude_idle (1 bit) provides a way for userspace to ask that events
  that occur when the cpu is idle be excluded;

* extra_config_len will provide a way for userspace to supply an
  arbitrary amount of extra machine-specific PMU configuration data
  immediately following the perf_counter_hw_event struct, to allow
  sophisticated users to program things such as instruction matching
  CAMs and address range registers;

* __reserved_3 and __reserved_4 provide space for future expansion.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 186efaf..c42455a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -69,9 +69,10 @@ struct perf_counter_hw_event {
 	__s64			type;
 
 	__u64			irq_period;
-	__u32			record_type;
+	__u64			record_type;
+	__u64			read_format;
 
-	__u32			disabled       :  1, /* off by default        */
+	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
 				raw	       :  1, /* raw event type        */
 				inherit	       :  1, /* children inherit it   */
@@ -80,10 +81,15 @@ struct perf_counter_hw_event {
 				exclude_user   :  1, /* don't count user      */
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
 
-				__reserved_1 : 23;
+				__reserved_1 : 55;
+
+	__u32			extra_config_len;
+	__u32			__reserved_4;
 
 	__u64			__reserved_2;
+	__u64			__reserved_3;
 };
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 28ef2be..ab1d772 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -698,5 +698,5 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage long sys_perf_counter_open(
 		const struct perf_counter_hw_event __user *hw_event_uptr,
-		pid_t pid, int cpu, int group_fd);
+		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 16b14ba..b2e8389 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1683,16 +1683,16 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 }
 
 /**
- * sys_perf_task_open - open a performance counter, associate it to a task/cpu
+ * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
  * @hw_event_uptr:	event type attributes for monitoring/sampling
  * @pid:		target pid
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
-SYSCALL_DEFINE4(perf_counter_open,
+SYSCALL_DEFINE5(perf_counter_open,
 		const struct perf_counter_hw_event __user *, hw_event_uptr,
-		pid_t, pid, int, cpu, int, group_fd)
+		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
@@ -1703,6 +1703,10 @@ SYSCALL_DEFINE4(perf_counter_open,
 	int fput_needed2 = 0;
 	int ret;
 
+	/* for future expandability... */
+	if (flags)
+		return -EINVAL;
+
 	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
 		return -EFAULT;
 
-- 
cgit v0.10.2


From 2485e5184452ccbda34ff83883883d9107800dff Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 5 Mar 2009 12:33:16 +0100
Subject: perfcounters: fix reserved bits sizing

The sum of bits is 65 currently not 64 - so reduce the # of reserved bits
from 55 to 54.

Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c42455a..dde5645 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -83,7 +83,7 @@ struct perf_counter_hw_event {
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
 
-				__reserved_1 : 55;
+				__reserved_1   : 54;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
-- 
cgit v0.10.2


From b0f3f28e0f14eb335f67bfaae33ce8b8d74fd58b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 5 Mar 2009 18:08:27 +0100
Subject: perfcounters: IRQ and NMI support on AMD CPUs

The below completes the K7+ performance counter support:

 - IRQ support
 - NMI support

KernelTop output works now as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1236273633.5187.286.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3b65f19..6ebe9ab 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -28,6 +28,7 @@ static bool perf_counters_initialized __read_mostly;
 static int nr_counters_generic __read_mostly;
 static u64 perf_counter_mask __read_mostly;
 static u64 counter_value_mask __read_mostly;
+static int counter_value_bits __read_mostly;
 
 static int nr_counters_fixed __read_mostly;
 
@@ -35,7 +36,9 @@ struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
-	u64			global_enable;
+	u64			throttle_ctrl;
+	u64			active_mask;
+	int			enabled;
 };
 
 /*
@@ -43,21 +46,28 @@ struct cpu_hw_counters {
  */
 struct pmc_x86_ops {
 	u64		(*save_disable_all)(void);
-	void		(*restore_all)(u64 ctrl);
+	void		(*restore_all)(u64);
+	u64		(*get_status)(u64);
+	void		(*ack_status)(u64);
+	void		(*enable)(int, u64);
+	void		(*disable)(int, u64);
 	unsigned	eventsel;
 	unsigned	perfctr;
-	int		(*event_map)(int event);
+	u64		(*event_map)(int);
+	u64		(*raw_event)(u64);
 	int		max_events;
 };
 
 static struct pmc_x86_ops *pmc_ops;
 
-static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
+static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
+	.enabled = 1,
+};
 
 /*
  * Intel PerfMon v3. Used on Core2 and later.
  */
-static const int intel_perfmon_event_map[] =
+static const u64 intel_perfmon_event_map[] =
 {
   [PERF_COUNT_CPU_CYCLES]		= 0x003c,
   [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
@@ -68,15 +78,29 @@ static const int intel_perfmon_event_map[] =
   [PERF_COUNT_BUS_CYCLES]		= 0x013c,
 };
 
-static int pmc_intel_event_map(int event)
+static u64 pmc_intel_event_map(int event)
 {
 	return intel_perfmon_event_map[event];
 }
 
+static u64 pmc_intel_raw_event(u64 event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FF
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00
+#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000
+
+#define CORE_EVNTSEL_MASK 		\
+	(CORE_EVNTSEL_EVENT_MASK |	\
+	 CORE_EVNTSEL_UNIT_MASK  |	\
+	 CORE_EVNTSEL_COUNTER_MASK)
+
+	return event & CORE_EVNTSEL_MASK;
+}
+
 /*
  * AMD Performance Monitor K7 and later.
  */
-static const int amd_perfmon_event_map[] =
+static const u64 amd_perfmon_event_map[] =
 {
   [PERF_COUNT_CPU_CYCLES]		= 0x0076,
   [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
@@ -86,11 +110,25 @@ static const int amd_perfmon_event_map[] =
   [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
 };
 
-static int pmc_amd_event_map(int event)
+static u64 pmc_amd_event_map(int event)
 {
 	return amd_perfmon_event_map[event];
 }
 
+static u64 pmc_amd_raw_event(u64 event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0x7000000FF
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00
+#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_COUNTER_MASK)
+
+	return event & K7_EVNTSEL_MASK;
+}
+
 /*
  * Propagate counter elapsed time into the generic counter.
  * Can only be executed on the CPU where the counter is active.
@@ -179,7 +217,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * Raw event type provide the config in the event structure
 	 */
 	if (hw_event->raw) {
-		hwc->config |= hw_event->type;
+		hwc->config |= pmc_ops->raw_event(hw_event->type);
 	} else {
 		if (hw_event->type >= pmc_ops->max_events)
 			return -EINVAL;
@@ -205,18 +243,24 @@ static u64 pmc_intel_save_disable_all(void)
 
 static u64 pmc_amd_save_disable_all(void)
 {
-	int idx;
-	u64 val, ctrl = 0;
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	int enabled, idx;
+
+	enabled = cpuc->enabled;
+	cpuc->enabled = 0;
+	barrier();
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
+		u64 val;
+
 		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
-			ctrl |= (1 << idx);
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
+			val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		}
 	}
 
-	return ctrl;
+	return enabled;
 }
 
 u64 hw_perf_save_disable(void)
@@ -226,6 +270,9 @@ u64 hw_perf_save_disable(void)
 
 	return pmc_ops->save_disable_all();
 }
+/*
+ * Exported because of ACPI idle
+ */
 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
 static void pmc_intel_restore_all(u64 ctrl)
@@ -235,11 +282,18 @@ static void pmc_intel_restore_all(u64 ctrl)
 
 static void pmc_amd_restore_all(u64 ctrl)
 {
-	u64 val;
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	int idx;
 
+	cpuc->enabled = ctrl;
+	barrier();
+	if (!ctrl)
+		return;
+
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-		if (ctrl & (1 << idx)) {
+		if (test_bit(idx, (unsigned long *)&cpuc->active_mask)) {
+			u64 val;
+
 			rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
 			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
@@ -254,8 +308,112 @@ void hw_perf_restore(u64 ctrl)
 
 	pmc_ops->restore_all(ctrl);
 }
+/*
+ * Exported because of ACPI idle
+ */
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
+static u64 pmc_intel_get_status(u64 mask)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static u64 pmc_amd_get_status(u64 mask)
+{
+	u64 status = 0;
+	int idx;
+
+	for (idx = 0; idx < nr_counters_generic; idx++) {
+		s64 val;
+
+		if (!(mask & (1 << idx)))
+			continue;
+
+		rdmsrl(MSR_K7_PERFCTR0 + idx, val);
+		val <<= (64 - counter_value_bits);
+		if (val >= 0)
+			status |= (1 << idx);
+	}
+
+	return status;
+}
+
+static u64 hw_perf_get_status(u64 mask)
+{
+	if (unlikely(!perf_counters_initialized))
+		return 0;
+
+	return pmc_ops->get_status(mask);
+}
+
+static void pmc_intel_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static void pmc_amd_ack_status(u64 ack)
+{
+}
+
+static void hw_perf_ack_status(u64 ack)
+{
+	if (unlikely(!perf_counters_initialized))
+		return;
+
+	pmc_ops->ack_status(ack);
+}
+
+static void pmc_intel_enable(int idx, u64 config)
+{
+	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
+			config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+}
+
+static void pmc_amd_enable(int idx, u64 config)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	set_bit(idx, (unsigned long *)&cpuc->active_mask);
+	if (cpuc->enabled)
+		config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
+}
+
+static void hw_perf_enable(int idx, u64 config)
+{
+	if (unlikely(!perf_counters_initialized))
+		return;
+
+	pmc_ops->enable(idx, config);
+}
+
+static void pmc_intel_disable(int idx, u64 config)
+{
+	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
+}
+
+static void pmc_amd_disable(int idx, u64 config)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	clear_bit(idx, (unsigned long *)&cpuc->active_mask);
+	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
+
+}
+
+static void hw_perf_disable(int idx, u64 config)
+{
+	if (unlikely(!perf_counters_initialized))
+		return;
+
+	pmc_ops->disable(idx, config);
+}
+
 static inline void
 __pmc_fixed_disable(struct perf_counter *counter,
 		    struct hw_perf_counter *hwc, unsigned int __idx)
@@ -278,7 +436,7 @@ __pmc_generic_disable(struct perf_counter *counter,
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_disable(counter, hwc, idx);
 	else
-		wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
+		hw_perf_disable(idx, hwc->config);
 }
 
 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
@@ -354,8 +512,7 @@ __pmc_generic_enable(struct perf_counter *counter,
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_enable(counter, hwc, idx);
 	else
-		wrmsr(hwc->config_base + idx,
-		      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
+		hw_perf_enable(idx, hwc->config);
 }
 
 static int
@@ -567,22 +724,20 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
  */
-static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
+static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
 	u64 ack, status;
 	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
+	int ret = 0;
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
-
-	/* Disable counters globally */
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-	ack_APIC_irq();
+	cpuc->throttle_ctrl = hw_perf_save_disable();
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	status = hw_perf_get_status(cpuc->throttle_ctrl);
 	if (!status)
 		goto out;
 
+	ret = 1;
 again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
@@ -618,12 +773,12 @@ again:
 		}
 	}
 
-	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+	hw_perf_ack_status(ack);
 
 	/*
 	 * Repeat if there is more work to be done:
 	 */
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	status = hw_perf_get_status(cpuc->throttle_ctrl);
 	if (status)
 		goto again;
 out:
@@ -631,32 +786,27 @@ out:
 	 * Restore - do not reenable when global enable is off or throttled:
 	 */
 	if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
-		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+		hw_perf_restore(cpuc->throttle_ctrl);
+
+	return ret;
 }
 
 void perf_counter_unthrottle(void)
 {
 	struct cpu_hw_counters *cpuc;
-	u64 global_enable;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return;
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return;
-
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
+	cpuc = &__get_cpu_var(cpu_hw_counters);
 	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
 		if (printk_ratelimit())
 			printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
-		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+		hw_perf_restore(cpuc->throttle_ctrl);
 	}
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
-	if (unlikely(cpuc->global_enable && !global_enable))
-		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
 	cpuc->interrupts = 0;
 }
 
@@ -664,8 +814,8 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
 {
 	irq_enter();
 	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
+	ack_APIC_irq();
 	__smp_perf_counter_interrupt(regs, 0);
-
 	irq_exit();
 }
 
@@ -722,16 +872,23 @@ perf_counter_nmi_handler(struct notifier_block *self,
 {
 	struct die_args *args = __args;
 	struct pt_regs *regs;
+	int ret;
+
+	switch (cmd) {
+	case DIE_NMI:
+	case DIE_NMI_IPI:
+		break;
 
-	if (likely(cmd != DIE_NMI_IPI))
+	default:
 		return NOTIFY_DONE;
+	}
 
 	regs = args->regs;
 
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	__smp_perf_counter_interrupt(regs, 1);
+	ret = __smp_perf_counter_interrupt(regs, 1);
 
-	return NOTIFY_STOP;
+	return ret ? NOTIFY_STOP : NOTIFY_OK;
 }
 
 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
@@ -743,18 +900,28 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 static struct pmc_x86_ops pmc_intel_ops = {
 	.save_disable_all	= pmc_intel_save_disable_all,
 	.restore_all		= pmc_intel_restore_all,
+	.get_status		= pmc_intel_get_status,
+	.ack_status		= pmc_intel_ack_status,
+	.enable			= pmc_intel_enable,
+	.disable		= pmc_intel_disable,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= pmc_intel_event_map,
+	.raw_event		= pmc_intel_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 };
 
 static struct pmc_x86_ops pmc_amd_ops = {
 	.save_disable_all	= pmc_amd_save_disable_all,
 	.restore_all		= pmc_amd_restore_all,
+	.get_status		= pmc_amd_get_status,
+	.ack_status		= pmc_amd_ack_status,
+	.enable			= pmc_amd_enable,
+	.disable		= pmc_amd_disable,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= pmc_amd_event_map,
+	.raw_event		= pmc_amd_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
 };
 
@@ -787,8 +954,25 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 
 static struct pmc_x86_ops *pmc_amd_init(void)
 {
+	u64 old;
+	int bits;
+
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
+	counter_value_mask = ~0ULL;
+
+	rdmsrl(MSR_K7_PERFCTR0, old);
+	wrmsrl(MSR_K7_PERFCTR0, counter_value_mask);
+	/*
+	 * read the truncated mask
+	 */
+	rdmsrl(MSR_K7_PERFCTR0, counter_value_mask);
+	wrmsrl(MSR_K7_PERFCTR0, old);
+
+	bits = 32 + fls(counter_value_mask >> 32);
+	if (bits == 32)
+		bits = fls((u32)counter_value_mask);
+	counter_value_bits = bits;
 
 	pr_info("AMD Performance Monitoring support detected.\n");
 
-- 
cgit v0.10.2


From b5e8acf66ff5db707c7e08df49fdf6b415878442 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 5 Mar 2009 20:34:21 +0100
Subject: perfcounters: IRQ and NMI support on AMD CPUs, fix

The BKGD suggests that counter width on AMD CPUs is 48 for all
existing models (it certainly is for mine).

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6ebe9ab..f585371 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -959,20 +959,8 @@ static struct pmc_x86_ops *pmc_amd_init(void)
 
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
-	counter_value_mask = ~0ULL;
-
-	rdmsrl(MSR_K7_PERFCTR0, old);
-	wrmsrl(MSR_K7_PERFCTR0, counter_value_mask);
-	/*
-	 * read the truncated mask
-	 */
-	rdmsrl(MSR_K7_PERFCTR0, counter_value_mask);
-	wrmsrl(MSR_K7_PERFCTR0, old);
-
-	bits = 32 + fls(counter_value_mask >> 32);
-	if (bits == 32)
-		bits = fls((u32)counter_value_mask);
-	counter_value_bits = bits;
+	counter_value_mask = 0x0000FFFFFFFFFFFFULL;
+	counter_value_bits = 48;
 
 	pr_info("AMD Performance Monitoring support detected.\n");
 
-- 
cgit v0.10.2


From 86028598de16538f02519141756ccf4accfc29a6 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 5 Mar 2009 14:05:57 +1100
Subject: perfcounters/powerpc: fix oops with multiple counters in a group

Impact: fix oops-causing bug

This fixes a bug in the powerpc hw_perf_counter_init where the code
didn't initialize ctrs[n] before passing the ctrs array to check_excludes,
leading to possible oopses and other incorrect behaviour.  This fixes it
by initializing ctrs[n] correctly.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 112332d..4fec112 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -633,6 +633,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 			return NULL;
 	}
 	events[n] = ev;
+	ctrs[n] = counter;
 	if (check_excludes(ctrs, n, 1))
 		return NULL;
 	if (power_check_constraints(events, n + 1))
-- 
cgit v0.10.2


From aabbaa6036fd847c583f585c6bae82b5a033e6c7 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 6 Mar 2009 16:27:10 +1100
Subject: perfcounters/powerpc: add support for POWER5+ processors

Impact: more hardware support

This adds the back-end for the PMU on the POWER5+ processors (i.e. GS,
including GS DD3 aka POWER5++).  This doesn't use the fixed-function
PMC5 and PMC6 since they don't respect the freeze conditions and don't
generate interrupts, as on POWER6.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b4c6f46..49851e0 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,8 +94,8 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o power5-pmu.o \
-				   power6-pmu.o
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o \
+				   power5-pmu.o power5+-pmu.o power6-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 4fec112..162f398 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -826,6 +826,7 @@ void hw_perf_counter_setup(int cpu)
 
 extern struct power_pmu ppc970_pmu;
 extern struct power_pmu power5_pmu;
+extern struct power_pmu power5p_pmu;
 extern struct power_pmu power6_pmu;
 
 static int init_perf_counters(void)
@@ -848,6 +849,9 @@ static int init_perf_counters(void)
 	case PV_POWER5:
 		ppmu = &power5_pmu;
 		break;
+	case PV_POWER5p:
+		ppmu = &power5p_pmu;
+		break;
 	case 0x3e:
 		ppmu = &power6_pmu;
 		break;
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 0000000..cec21ea
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,452 @@
+/*
+ * Performance counter support for POWER5 (not POWER5++) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	12	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	7
+#define PM_GRS_SH	8	/* Storage subsystem mux select */
+#define PM_GRS_MSK	7
+#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
+#define PM_PMCSEL_MSK	0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU		0
+#define PM_ISU0		1
+#define PM_IFU		2
+#define PM_ISU1		3
+#define PM_IDU		4
+#define PM_ISU0_ALT	6
+#define PM_GRS		7
+#define PM_LSU0		8
+#define PM_LSU1		0xc
+#define PM_LASTUNIT	0xc
+
+/*
+ * Bits in MMCR1 for POWER5+
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	60
+#define MMCR1_TTM2SEL_SH	58
+#define MMCR1_TTM3SEL_SH	56
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	54
+#define MMCR1_TD_CP_DBG1SEL_SH	52
+#define MMCR1_TD_CP_DBG2SEL_SH	50
+#define MMCR1_TD_CP_DBG3SEL_SH	48
+#define MMCR1_GRS_L2SEL_SH	46
+#define MMCR1_GRS_L2SEL_MSK	3
+#define MMCR1_GRS_L3SEL_SH	44
+#define MMCR1_GRS_L3SEL_MSK	3
+#define MMCR1_GRS_MCSEL_SH	41
+#define MMCR1_GRS_MCSEL_MSK	7
+#define MMCR1_GRS_FABSEL_SH	39
+#define MMCR1_GRS_FABSEL_MSK	3
+#define MMCR1_PMC1_ADDER_SEL_SH	35
+#define MMCR1_PMC2_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC1SEL_SH	25
+#define MMCR1_PMC2SEL_SH	17
+#define MMCR1_PMC3SEL_SH	9
+#define MMCR1_PMC4SEL_SH	1
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0x7f
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *             [  ><><>< ><> <><>[  >      <  ><  ><  ><  ><><><><>
+ *             NC  G0G1G2 G3 T0T1 UC        B0  B1  B2  B3 P4P3P2P1
+ *
+ * NC - number of counters
+ *     51: NC error 0x0008_0000_0000_0000
+ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ *     46-47: GRS_L2SEL value
+ *     44-45: GRS_L3SEL value
+ *     41-44: GRS_MCSEL value
+ *     39-40: GRS_FABSEL value
+ *	Note that these match up with their bit positions in MMCR1
+ *
+ * T0 - TTM0 constraint
+ *     36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ *     33: UC3 error 0x02_0000_0000
+ *     32: FPU|IFU|ISU1 events needed 0x01_0000_0000
+ *     31: ISU0 events needed 0x01_8000_0000
+ *     30: IDU|GRS events needed 0x00_4000_0000
+ *
+ * B0
+ *     20-23: Byte 0 event source 0x00f0_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     16-19, 12-15, 8-11: Byte 1, 2, 3 event sources
+ *
+ * P4
+ *     7: P1 error 0x80
+ *     6-7: Count of events needing PMC4
+ *
+ * P1..P3
+ *     0-6: Count of events needing PMC1..PMC3
+ */
+
+static const int grsel_shift[8] = {
+	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static u64 unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0x3200000000ull, 0x0100000000ull },
+	[PM_ISU0] =  { 0x0200000000ull, 0x0080000000ull },
+	[PM_ISU1] =  { 0x3200000000ull, 0x3100000000ull },
+	[PM_IFU] =   { 0x3200000000ull, 0x2100000000ull },
+	[PM_IDU] =   { 0x0e00000000ull, 0x0040000000ull },
+	[PM_GRS] =   { 0x0e00000000ull, 0x0c40000000ull },
+};
+
+static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, sh;
+	int bit, fmask;
+	u64 mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 4)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+		if (unit > PM_LASTUNIT)
+			return -1;
+		if (unit == PM_ISU0_ALT)
+			unit = PM_ISU0;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (byte >= 4) {
+			if (unit != PM_LSU1)
+				return -1;
+			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+			++unit;
+			byte &= 3;
+		}
+		if (unit == PM_GRS) {
+			bit = event & 7;
+			fmask = (bit == 6)? 7: 3;
+			sh = grsel_shift[bit];
+			mask |= (u64)fmask << sh;
+			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
+		}
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (20 - 4 * byte);
+		value |= (u64)unit << (20 - 4 * byte);
+	}
+	mask  |= 0x8000000000000ull;
+	value |= 0x1000000000000ull;
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	3	/* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x100c0,  0x40001f },			/* PM_GCT_FULL_CYC */
+	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
+	{ 0x230e2,  0x323087 },			/* PM_BR_PRED_CR */
+	{ 0x230e3,  0x223087, 0x3230a0 },	/* PM_BR_PRED_TA */
+	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
+	{ 0x800c4,  0xc20e0 },			/* PM_DTLB_MISS */
+	{ 0xc50c6,  0xc60e0 },			/* PM_MRK_DTLB_MISS */
+	{ 0x100009, 0x200009 },			/* PM_INST_CMPL */
+	{ 0x200015, 0x300015 },			/* PM_LSU_LMQ_SRQ_EMPTY_CYC */
+	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(unsigned int event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
+	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
+	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
+	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters.  This returns the alternative
+ * event code for those that do, or -1 otherwise.  This also handles
+ * alternative PCMSEL values for add events.
+ */
+static int find_alternative_bdecode(unsigned int event)
+{
+	int pmc, altpmc, pp, j;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc == 0 || pmc > 4)
+		return -1;
+	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
+	pp = event & PM_PMCSEL_MSK;
+	for (j = 0; j < 4; ++j) {
+		if (bytedecode_alternatives[pmc - 1][j] == pp) {
+			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+				(altpmc << PM_PMC_SH) |
+				bytedecode_alternatives[altpmc - 1][j];
+		}
+	}
+
+	/* new decode alternatives for power5+ */
+	if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
+		return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
+	if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
+		return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
+
+	/* alternative add event encodings */
+	if (pp == 0x10 || pp == 0x28)
+		return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
+			(altpmc << PM_PMC_SH);
+
+	return -1;
+}
+
+static int power5p_get_alternatives(unsigned int event, unsigned int alt[])
+{
+	int i, j, ae, nalt = 1;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_bdecode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+	return nalt;
+}
+
+static int power5p_compute_mmcr(unsigned int event[], int n_ev,
+				unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm;
+	int i, isbus, bit, grsel;
+	unsigned int pmc_inuse = 0;
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	int ttmuse;
+
+	if (n_ev > 4)
+		return -1;
+
+	/* First pass to count resource use */
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 4)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+		if (event[i] & PM_BUSEVENT_MSK) {
+			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (unit == PM_ISU0_ALT)
+				unit = PM_ISU0;
+			if (byte >= 4) {
+				if (unit != PM_LSU1)
+					return -1;
+				++unit;
+				byte &= 3;
+			}
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU0] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
+		unituse[PM_ISU0] = 0;
+	}
+	/* Set TTM[01]SEL fields. */
+	ttmuse = 0;
+	for (i = PM_FPU; i <= PM_ISU1; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
+	}
+	ttmuse = 0;
+	for (; i <= PM_GRS; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
+	}
+	if (ttmuse > 1)
+		return -1;
+
+	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+			/* get ISU0 through TTM1 rather than TTM0 */
+			unit = PM_ISU0_ALT;
+		} else if (unit == PM_LSU1 + 1) {
+			/* select lower word of LSU1 for this byte */
+			mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		ttm = unit >> 2;
+		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		isbus = event[i] & PM_BUSEVENT_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct event */
+			--pmc;
+			if (isbus && (byte & 2) &&
+			    (psel == 8 || psel == 0x10 || psel == 0x28))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		}
+		if (isbus && unit == PM_GRS) {
+			bit = psel & 7;
+			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+			mmcr1 |= (u64)grsel << grsel_shift[bit];
+		}
+		if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
+			/* select alternate byte lane */
+			psel |= 0x10;
+		if (pmc <= 3)
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = 0;
+	return 0;
+}
+
+static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	if (pmc <= 3)
+		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5p_generic_events[] = {
+	[PERF_COUNT_CPU_CYCLES] = 0xf,
+	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
+	[PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8,	/* LD_REF_L1 */
+	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
+	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
+	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
+};
+
+struct power_pmu power5p_pmu = {
+	.n_counter = 4,
+	.max_alternatives = MAX_ALT,
+	.add_fields = 0x7000000000055ull,
+	.test_adder = 0x3000040000000ull,
+	.compute_mmcr = power5p_compute_mmcr,
+	.get_constraint = power5p_get_constraint,
+	.get_alternatives = power5p_get_alternatives,
+	.disable_pmc = power5p_disable_pmc,
+	.n_generic = ARRAY_SIZE(power5p_generic_events),
+	.generic_events = power5p_generic_events,
+};
-- 
cgit v0.10.2


From 880860e392d92c457e8116cdee39ec4d109174ee Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 6 Mar 2009 16:30:52 +1100
Subject: perfcounters/powerpc: add support for POWER4 processors

Impact: more hardware support

This adds the back-end for the PMU on the POWER4 and POWER4+ processors
(GP and GQ).  This is quite similar to the PPC970, with 8 PMCs, but has
fewer events than the PPC970.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 49851e0..8e5e2c7 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o ppc970-pmu.o \
+obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o power4-pmu.o ppc970-pmu.o \
 				   power5-pmu.o power5+-pmu.o power6-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 162f398..0e33d27 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -824,6 +824,7 @@ void hw_perf_counter_setup(int cpu)
 	cpuhw->mmcr[0] = MMCR0_FC;
 }
 
+extern struct power_pmu power4_pmu;
 extern struct power_pmu ppc970_pmu;
 extern struct power_pmu power5_pmu;
 extern struct power_pmu power5p_pmu;
@@ -841,6 +842,10 @@ static int init_perf_counters(void)
 	/* XXX should get this from cputable */
 	pvr = mfspr(SPRN_PVR);
 	switch (PVR_VER(pvr)) {
+	case PV_POWER4:
+	case PV_POWER4p:
+		ppmu = &power4_pmu;
+		break;
 	case PV_970:
 	case PV_970FX:
 	case PV_970MP:
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 0000000..1407b19
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,557 @@
+/*
+ * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER4
+ */
+#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_LOWER_SH	6
+#define PM_LOWER_MSK	1
+#define PM_LOWER_MSKS	0x40
+#define PM_BYTE_SH	4	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_PMCSEL_MSK	7
+
+/*
+ * Unit code values
+ */
+#define PM_FPU		1
+#define PM_ISU1		2
+#define PM_IFU		3
+#define PM_IDU0		4
+#define PM_ISU1_ALT	6
+#define PM_ISU2		7
+#define PM_IFU_ALT	8
+#define PM_LSU0		9
+#define PM_LSU1		0xc
+#define PM_GPS		0xf
+
+/*
+ * Bits in MMCR0 for POWER4
+ */
+#define MMCR0_PMC1SEL_SH	8
+#define MMCR0_PMC2SEL_SH	1
+#define MMCR_PMCSEL_MSK		0x1f
+
+/*
+ * Bits in MMCR1 for POWER4
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTC0SEL_SH	61
+#define MMCR1_TTM1SEL_SH	59
+#define MMCR1_TTC1SEL_SH	58
+#define MMCR1_TTM2SEL_SH	56
+#define MMCR1_TTC2SEL_SH	55
+#define MMCR1_TTM3SEL_SH	53
+#define MMCR1_TTC3SEL_SH	52
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	50
+#define MMCR1_TD_CP_DBG1SEL_SH	48
+#define MMCR1_TD_CP_DBG2SEL_SH	46
+#define MMCR1_TD_CP_DBG3SEL_SH	44
+#define MMCR1_DEBUG0SEL_SH	43
+#define MMCR1_DEBUG1SEL_SH	42
+#define MMCR1_DEBUG2SEL_SH	41
+#define MMCR1_DEBUG3SEL_SH	40
+#define MMCR1_PMC1_ADDER_SEL_SH	39
+#define MMCR1_PMC2_ADDER_SEL_SH	38
+#define MMCR1_PMC6_ADDER_SEL_SH	37
+#define MMCR1_PMC5_ADDER_SEL_SH	36
+#define MMCR1_PMC8_ADDER_SEL_SH	35
+#define MMCR1_PMC7_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC3SEL_SH	27
+#define MMCR1_PMC4SEL_SH	22
+#define MMCR1_PMC5SEL_SH	17
+#define MMCR1_PMC6SEL_SH	12
+#define MMCR1_PMC7SEL_SH	7
+#define MMCR1_PMC8SEL_SH	2	/* note bit 0 is in MMCRA for GP */
+
+static short mmcr1_adder_bits[8] = {
+	MMCR1_PMC1_ADDER_SEL_SH,
+	MMCR1_PMC2_ADDER_SEL_SH,
+	MMCR1_PMC3_ADDER_SEL_SH,
+	MMCR1_PMC4_ADDER_SEL_SH,
+	MMCR1_PMC5_ADDER_SEL_SH,
+	MMCR1_PMC6_ADDER_SEL_SH,
+	MMCR1_PMC7_ADDER_SEL_SH,
+	MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Bits in MMCRA
+ */
+#define MMCRA_PMC8SEL0_SH	17	/* PMC8SEL bit 0 for GP */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *        |[  >[  >[   >|||[  >[  ><  ><  ><  ><  ><><><><><><><><>
+ *        | UC1 UC2 UC3 ||| PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ * 	  \SMPL	        ||\TTC3SEL
+ * 		        |\TTC_IFU_SEL
+ * 		        \TTM2SEL0
+ *
+ * SMPL - SAMPLE_ENABLE constraint
+ *     56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
+ *
+ * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
+ *     55: UC1 error 0x0080_0000_0000_0000
+ *     54: FPU events needed 0x0040_0000_0000_0000
+ *     53: ISU1 events needed 0x0020_0000_0000_0000
+ *     52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
+ *
+ * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
+ *     51: UC2 error 0x0008_0000_0000_0000
+ *     50: FPU events needed 0x0004_0000_0000_0000
+ *     49: IFU events needed 0x0002_0000_0000_0000
+ *     48: LSU0 events needed 0x0001_0000_0000_0000
+ *
+ * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
+ *     47: UC3 error 0x8000_0000_0000
+ *     46: LSU0 events needed 0x4000_0000_0000
+ *     45: IFU events needed 0x2000_0000_0000
+ *     44: IDU0|ISU2 events needed 0x1000_0000_0000
+ *     43: ISU1 events needed 0x0800_0000_0000
+ *
+ * TTM2SEL0
+ *     42: 0 = IDU0 events needed
+ *     	   1 = ISU2 events needed 0x0400_0000_0000
+ *
+ * TTC_IFU_SEL
+ *     41: 0 = IFU.U events needed
+ *     	   1 = IFU.L events needed 0x0200_0000_0000
+ *
+ * TTC3SEL
+ *     40: 0 = LSU1.U events needed
+ *     	   1 = LSU1.L events needed 0x0100_0000_0000
+ *
+ * PS1
+ *     39: PS1 error 0x0080_0000_0000
+ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ *     35: PS2 error 0x0008_0000_0000
+ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ *     28-31: Byte 0 event source 0xf000_0000
+ *     	   1 = FPU
+ * 	   2 = ISU1
+ * 	   3 = IFU
+ * 	   4 = IDU0
+ * 	   7 = ISU2
+ * 	   9 = LSU0
+ * 	   c = LSU1
+ * 	   f = GPS
+ *
+ * B1, B2, B3
+ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P8
+ *     15: P8 error 0x8000
+ *     14-15: Count of events needing PMC8
+ *
+ * P1..P7
+ *     0-13: Count of events needing PMC1..PMC7
+ *
+ * Note: this doesn't allow events using IFU.U to be combined with events
+ * using IFU.L, though that is feasible (using TTM0 and TTM2).  However
+ * there are no listed events for IFU.L (they are debug events not
+ * verified for performance monitoring) so this shouldn't cause a
+ * problem.
+ */
+
+static struct unitinfo {
+	u64	value, mask;
+	int	unit;
+	int	lowerbit;
+} p4_unitinfo[16] = {
+	[PM_FPU]  = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
+	[PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
+	[PM_ISU1_ALT] =
+		    { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
+	[PM_IFU]  = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
+	[PM_IFU_ALT] =
+		    { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
+	[PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
+	[PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
+	[PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
+	[PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
+	[PM_GPS]  = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
+};
+
+static unsigned char direct_marked_event[8] = {
+	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+	(1<<3),			/* PMC3: PM_MRK_ST_CMPL_INT */
+	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+	(1<<4) | (1<<5),	/* PMC5: PM_MRK_GRP_TIMEO */
+	(1<<3) | (1<<4) | (1<<5),
+		/* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+	(1<<4),			/* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p4_marked_instr_event(unsigned int event)
+{
+	int pmc, psel, unit, byte, bit;
+	unsigned int mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc) {
+		if (direct_marked_event[pmc - 1] & (1 << psel))
+			return 1;
+		if (psel == 0)		/* add events */
+			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+		else if (psel == 6)	/* decode events */
+			bit = 4;
+		else
+			return 0;
+	} else
+		bit = psel;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = 0;
+	switch (unit) {
+	case PM_LSU1:
+		if (event & PM_LOWER_MSKS)
+			mask = 1 << 28;		/* byte 7 bit 4 */
+		else
+			mask = 6 << 24;		/* byte 3 bits 1 and 2 */
+		break;
+	case PM_LSU0:
+		/* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
+		mask = 0x083dff00;
+	}
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+{
+	int pmc, byte, unit, lower, sh;
+	u64 mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 8)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		grp = ((pmc - 1) >> 1) & 1;
+	}
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	if (unit) {
+		lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
+
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+
+		if (!p4_unitinfo[unit].unit)
+			return -1;
+		mask  |= p4_unitinfo[unit].mask;
+		value |= p4_unitinfo[unit].value;
+		sh = p4_unitinfo[unit].lowerbit;
+		if (sh > 1)
+			value |= (u64)lower << sh;
+		else if (lower != sh)
+			return -1;
+		unit = p4_unitinfo[unit].unit;
+
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (28 - 4 * byte);
+		value |= (u64)unit << (28 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2/5/6 field */
+		mask  |= 0x8000000000ull;
+		value |= 0x1000000000ull;
+	} else {
+		/* increment PMC3/4/7/8 field */
+		mask  |= 0x800000000ull;
+		value |= 0x100000000ull;
+	}
+
+	/* Marked instruction events need sample_enable set */
+	if (p4_marked_instr_event(event)) {
+		mask  |= 1ull << 56;
+		value |= 1ull << 56;
+	}
+
+	/* PMCSEL=6 decode events on byte 2 need sample_enable clear */
+	if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
+		mask  |= 1ull << 56;
+
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static unsigned int ppc_inst_cmpl[] = {
+	0x1001, 0x4001, 0x6001, 0x7001, 0x8001
+};
+
+static int p4_get_alternatives(unsigned int event, unsigned int alt[])
+{
+	int i, j, na;
+
+	alt[0] = event;
+	na = 1;
+
+	/* 2 possibilities for PM_GRP_DISP_REJECT */
+	if (event == 0x8003 || event == 0x0224) {
+		alt[1] = event ^ (0x8003 ^ 0x0224);
+		return 2;
+	}
+
+	/* 2 possibilities for PM_ST_MISS_L1 */
+	if (event == 0x0c13 || event == 0x0c23) {
+		alt[1] = event ^ (0x0c13 ^ 0x0c23);
+		return 2;
+	}
+
+	/* several possibilities for PM_INST_CMPL */
+	for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
+		if (event == ppc_inst_cmpl[i]) {
+			for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
+				if (j != i)
+					alt[na++] = ppc_inst_cmpl[j];
+			break;
+		}
+	}
+
+	return na;
+}
+
+static int p4_compute_mmcr(unsigned int event[], int n_ev,
+			   unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+	unsigned int pmc, unit, byte, psel, lower;
+	unsigned int ttm, grp;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	unsigned int unitlower = 0;
+	int i;
+
+	if (n_ev > 8)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2/5/6 vs 3/4/7/8 use */
+			++pmc_grp_use[((pmc - 1) >> 1) & 1];
+		}
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
+		if (unit) {
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (unit == 6 || unit == 8)
+				/* map alt ISU1/IFU codes: 6->2, 8->3 */
+				unit = (unit >> 1) - 1;
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			lower <<= unit;
+			if (unituse[unit] && lower != (unitlower & lower))
+				return -1;
+			unituse[unit] = 1;
+			unitlower |= lower;
+		}
+	}
+	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
+	 * Each TTMx can only select one unit, but since
+	 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
+	 * we have some choices.
+	 */
+	if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
+		unituse[6] = 1;		/* Move 2 to 6 */
+		unituse[2] = 0;
+	}
+	if (unituse[3] & (unituse[1] | unituse[2])) {
+		unituse[8] = 1;		/* Move 3 to 8 */
+		unituse[3] = 0;
+		unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
+	}
+	/* Check only one unit per TTMx */
+	if (unituse[1] + unituse[2] + unituse[3] > 1 ||
+	    unituse[4] + unituse[6] + unituse[7] > 1 ||
+	    unituse[8] + unituse[9] > 1 ||
+	    (unituse[5] | unituse[10] | unituse[11] |
+	     unituse[13] | unituse[14]))
+		return -1;
+
+	/* Set TTMxSEL fields.  Note, units 1-3 => TTM0SEL codes 0-2 */
+	mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
+	mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
+	mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
+
+	/* Set TTCxSEL fields. */
+	if (unitlower & 0xe)
+		mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
+	if (unitlower & 0xf0)
+		mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
+	if (unitlower & 0xf00)
+		mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
+	if (unitlower & 0x7000)
+		mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
+
+	/* Set byte lane select fields. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == 0xf) {
+			/* special case for GPS */
+			mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
+		} else {
+			if (!unituse[unit])
+				ttm = unit - 1;		/* 2->1, 3->2 */
+			else
+				ttm = unit >> 2;
+			mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
+		}
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or 00xxx direct event (off or cycles) */
+			if (unit)
+				psel |= 0x10 | ((byte & 2) << 2);
+			for (pmc = 0; pmc < 8; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (unit) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 4) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct event */
+			--pmc;
+			if (psel == 0 && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+			else if (psel == 6 && byte == 3)
+				/* seem to need to set sample_enable here */
+				mmcra |= MMCRA_SAMPLE_ENABLE;
+			psel |= 8;
+		}
+		if (pmc <= 1)
+			mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
+		else
+			mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+		if (pmc == 7)	/* PMC8 */
+			mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
+		hwc[i] = pmc;
+		if (p4_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+	}
+
+	if (pmc_inuse & 1)
+		mmcr0 |= MMCR0_PMC1CE;
+	if (pmc_inuse & 0xfe)
+		mmcr0 |= MMCR0_PMCjCE;
+
+	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
+
+	/* Return MMCRx values */
+	mmcr[0] = mmcr0;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	/*
+	 * Setting the PMCxSEL field to 0 disables PMC x.
+	 * (Note that pmc is 0-based here, not 1-based.)
+	 */
+	if (pmc <= 1) {
+		mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
+	} else {
+		mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
+		if (pmc == 7)
+			mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
+	}
+}
+
+static int p4_generic_events[] = {
+	[PERF_COUNT_CPU_CYCLES] = 7,
+	[PERF_COUNT_INSTRUCTIONS] = 0x1001,
+	[PERF_COUNT_CACHE_REFERENCES] = 0x8c10,		/* PM_LD_REF_L1 */
+	[PERF_COUNT_CACHE_MISSES] = 0x3c10,		/* PM_LD_MISS_L1 */
+	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330,	/* PM_BR_ISSUED */
+	[PERF_COUNT_BRANCH_MISSES] = 0x331,		/* PM_BR_MPRED_CR */
+};
+
+struct power_pmu power4_pmu = {
+	.n_counter = 8,
+	.max_alternatives = 5,
+	.add_fields = 0x0000001100005555ull,
+	.test_adder = 0x0011083300000000ull,
+	.compute_mmcr = p4_compute_mmcr,
+	.get_constraint = p4_get_constraint,
+	.get_alternatives = p4_get_alternatives,
+	.disable_pmc = p4_disable_pmc,
+	.n_generic = ARRAY_SIZE(p4_generic_events),
+	.generic_events = p4_generic_events,
+};
-- 
cgit v0.10.2


From 184fe4ab1f2e4dfa45584889bb3820031648386b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 8 Mar 2009 11:34:19 +0100
Subject: x86: perf_counter cleanup

Use and actual unsigned long bitmap instead of casting our way around.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
LKML-Reference: <1236508459.22914.3645.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f585371..1df4210 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -37,7 +37,7 @@ struct cpu_hw_counters {
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	u64			throttle_ctrl;
-	u64			active_mask;
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	int			enabled;
 };
 
@@ -291,7 +291,7 @@ static void pmc_amd_restore_all(u64 ctrl)
 		return;
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-		if (test_bit(idx, (unsigned long *)&cpuc->active_mask)) {
+		if (test_bit(idx, cpuc->active_mask)) {
 			u64 val;
 
 			rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
@@ -377,7 +377,7 @@ static void pmc_amd_enable(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
-	set_bit(idx, (unsigned long *)&cpuc->active_mask);
+	set_bit(idx, cpuc->active_mask);
 	if (cpuc->enabled)
 		config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 
@@ -401,7 +401,7 @@ static void pmc_amd_disable(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
-	clear_bit(idx, (unsigned long *)&cpuc->active_mask);
+	clear_bit(idx, cpuc->active_mask);
 	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
 
 }
-- 
cgit v0.10.2


From e255357764f92afcafafbd4879b222b8c752065a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sun, 8 Mar 2009 17:09:49 +0530
Subject: x86: perf_counter cleanup

Remove unused variables and duplicate header file.

Signed-off-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1df4210..155bc3c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -17,7 +17,6 @@
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 
-#include <asm/perf_counter.h>
 #include <asm/apic.h>
 
 static bool perf_counters_initialized __read_mostly;
@@ -954,9 +953,6 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 
 static struct pmc_x86_ops *pmc_amd_init(void)
 {
-	u64 old;
-	int bits;
-
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
 	counter_value_mask = 0x0000FFFFFFFFFFFFULL;
-- 
cgit v0.10.2


From bc44fb5f7d3e764ed7698c835a1a0f35aba2eb3d Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 13 Mar 2009 10:42:18 +0100
Subject: x86, bts: detect size of DS fields

Impact: more robust DS feature enumeration

Detect the size of the pointer-type fields in the DS area
configuration via the DTES64 features rather than based on
the cpuid.

Rename a variable to denote that size to reflect that it only
covers the pointer-type fields.

Add more boot-time diagnostics giving the detected size and
the sizes of BTS and PEBS records.

Use the size of the BTS/PEBS record to indicate that the
respective feature is not available (if the record size is zero).

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 87b67e3..6e5ec67 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -39,7 +39,7 @@ struct ds_configuration {
 	/* the size of one pointer-typed field in the DS structure and
 	   in the BTS and PEBS buffers in bytes;
 	   this covers the first 8 DS fields related to buffer management. */
-	unsigned char  sizeof_field;
+	unsigned char  sizeof_ptr_field;
 	/* the size of a BTS/PEBS record in bytes */
 	unsigned char  sizeof_rec[2];
 	/* a series of bit-masks to control various features indexed
@@ -142,14 +142,14 @@ enum ds_qualifier {
 static inline unsigned long ds_get(const unsigned char *base,
 				   enum ds_qualifier qual, enum ds_field field)
 {
-	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	return *(unsigned long *)base;
 }
 
 static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
 			  enum ds_field field, unsigned long value)
 {
-	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	(*(unsigned long *)base) = value;
 }
 
@@ -410,7 +410,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  * Later architectures use 64bit pointers throughout, whereas earlier
  * architectures use 32bit pointers in 32bit mode.
  *
- * We compute the base address for the first 8 fields based on:
+ * We compute the base address for the fields based on:
  * - the field size stored in the DS configuration
  * - the relative field position
  *
@@ -441,13 +441,13 @@ enum bts_field {
 
 static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-	base += (ds_cfg.sizeof_field * field);
+	base += (ds_cfg.sizeof_ptr_field * field);
 	return *(unsigned long *)base;
 }
 
 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 {
-	base += (ds_cfg.sizeof_field * field);;
+	base += (ds_cfg.sizeof_ptr_field * field);;
 	(*(unsigned long *)base) = val;
 }
 
@@ -593,6 +593,10 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 	struct ds_context *context;
 	int error;
 
+	error = -EOPNOTSUPP;
+	if (!ds_cfg.sizeof_rec[qual])
+		goto out;
+
 	error = -EINVAL;
 	if (!base)
 		goto out;
@@ -635,10 +639,6 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	unsigned long irq;
 	int error;
 
-	error = -EOPNOTSUPP;
-	if (!ds_cfg.ctl[dsf_bts])
-		goto out;
-
 	/* buffer overflow notification is not yet implemented */
 	error = -EOPNOTSUPP;
 	if (ovfl)
@@ -848,7 +848,8 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
 
 	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
 	tracer->trace.reset_value =
-		*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
+		*(u64 *)(tracer->ds.context->ds +
+			 (ds_cfg.sizeof_ptr_field * 8));
 
 	return &tracer->trace;
 }
@@ -884,7 +885,8 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
 	if (!tracer)
 		return -EINVAL;
 
-	*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
+	*(u64 *)(tracer->ds.context->ds +
+		 (ds_cfg.sizeof_ptr_field * 8)) = value;
 
 	return 0;
 }
@@ -894,52 +896,54 @@ static const struct ds_configuration ds_cfg_netburst = {
 	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
 	.ctl[dsf_bts_kernel]	= (1 << 5),
 	.ctl[dsf_bts_user]	= (1 << 6),
-
-	.sizeof_field		= sizeof(long),
-	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
-#ifdef __i386__
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
-#else
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
-#endif
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
 	.name = "Pentium M",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
-
-	.sizeof_field		= sizeof(long),
-	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
-#ifdef __i386__
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
-#else
-	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
-#endif
 };
 static const struct ds_configuration ds_cfg_core2_atom = {
 	.name = "Core 2/Atom",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
 	.ctl[dsf_bts_kernel]	= (1 << 9),
 	.ctl[dsf_bts_user]	= (1 << 10),
-
-	.sizeof_field		= 8,
-	.sizeof_rec[ds_bts]	= 8 * 3,
-	.sizeof_rec[ds_pebs]	= 8 * 18,
 };
 
 static void
-ds_configure(const struct ds_configuration *cfg)
+ds_configure(const struct ds_configuration *cfg,
+	     struct cpuinfo_x86 *cpu)
 {
+	unsigned long nr_pebs_fields = 0;
+
+	printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
+
+#ifdef __i386__
+	nr_pebs_fields = 10;
+#else
+	nr_pebs_fields = 18;
+#endif
+
 	memset(&ds_cfg, 0, sizeof(ds_cfg));
 	ds_cfg = *cfg;
 
-	printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
+	ds_cfg.sizeof_ptr_field =
+		(cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
 
-	if (!cpu_has_bts) {
-		ds_cfg.ctl[dsf_bts] = 0;
+	ds_cfg.sizeof_rec[ds_bts]  = ds_cfg.sizeof_ptr_field * 3;
+	ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
+
+	if (!cpu_has(cpu, X86_FEATURE_BTS)) {
+		ds_cfg.sizeof_rec[ds_bts] = 0;
 		printk(KERN_INFO "[ds] bts not available\n");
 	}
-	if (!cpu_has_pebs)
+	if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
+		ds_cfg.sizeof_rec[ds_pebs] = 0;
 		printk(KERN_INFO "[ds] pebs not available\n");
+	}
+
+	printk(KERN_INFO "[ds] sizes: address: %u bit, ",
+	       8 * ds_cfg.sizeof_ptr_field);
+	printk("bts/pebs record: %u/%u bytes\n",
+	       ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
 
 	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
 }
@@ -951,12 +955,12 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 		switch (c->x86_model) {
 		case 0x9:
 		case 0xd: /* Pentium M */
-			ds_configure(&ds_cfg_pentium_m);
+			ds_configure(&ds_cfg_pentium_m, c);
 			break;
 		case 0xf:
 		case 0x17: /* Core2 */
 		case 0x1c: /* Atom */
-			ds_configure(&ds_cfg_core2_atom);
+			ds_configure(&ds_cfg_core2_atom, c);
 			break;
 		case 0x1a: /* i7 */
 		default:
@@ -969,7 +973,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 		case 0x0:
 		case 0x1:
 		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_netburst);
+			ds_configure(&ds_cfg_netburst, c);
 			break;
 		default:
 			/* sorry, don't know about them */
-- 
cgit v0.10.2


From 8a327f6d1b05f5ce16572b4413a5df1d0e872283 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 13 Mar 2009 10:45:07 +0100
Subject: x86, bts: add selftest for BTS

Perform a selftest of branch trace store when a cpu is initialized.

WARN and disable branch trace store support if the selftest fails.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313104507.A30125@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fdb45df..dfd74ab 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -175,6 +175,15 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
+config X86_DS_SELFTEST
+    bool "DS selftest"
+    default y
+    depends on DEBUG_KERNEL
+    depends on X86_DS
+	---help---
+	  Perform Debug Store selftests at boot time.
+	  If in doubt, say "N".
+
 config HAVE_MMIOTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 339ce35..a0c9e13 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,6 +44,7 @@ obj-y				+= process.o
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
 obj-$(CONFIG_X86_DS)		+= ds.o
+obj-$(CONFIG_X86_DS_SELFTEST)		+= ds_selftest.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 6e5ec67..51c936c 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/kernel.h>
 
+#include "ds_selftest.h"
 
 /*
  * The configuration for a particular DS hardware implementation.
@@ -940,6 +941,26 @@ ds_configure(const struct ds_configuration *cfg,
 		printk(KERN_INFO "[ds] pebs not available\n");
 	}
 
+	if (ds_cfg.sizeof_rec[ds_bts]) {
+		int error;
+
+		error = ds_selftest_bts();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling bts.\n");
+			ds_cfg.sizeof_rec[ds_bts] = 0;
+		}
+	}
+
+	if (ds_cfg.sizeof_rec[ds_pebs]) {
+		int error;
+
+		error = ds_selftest_pebs();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling pebs.\n");
+			ds_cfg.sizeof_rec[ds_pebs] = 0;
+		}
+	}
+
 	printk(KERN_INFO "[ds] sizes: address: %u bit, ",
 	       8 * ds_cfg.sizeof_ptr_field);
 	printk("bts/pebs record: %u/%u bytes\n",
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
new file mode 100644
index 0000000..8c46fbf
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.c
@@ -0,0 +1,241 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#include "ds_selftest.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/ds.h>
+
+
+#define DS_SELFTEST_BUFFER_SIZE 1021 /* Intentionally chose an odd size. */
+
+
+static int ds_selftest_bts_consistency(const struct bts_trace *trace)
+{
+	int error = 0;
+
+	if (!trace) {
+		printk(KERN_CONT "failed to access trace...");
+		/* Bail out. Other tests are pointless. */
+		return -1;
+	}
+
+	if (!trace->read) {
+		printk(KERN_CONT "bts read not available...");
+		error = -1;
+	}
+
+	/* Do some sanity checks on the trace configuration. */
+	if (!trace->ds.n) {
+		printk(KERN_CONT "empty bts buffer...");
+		error = -1;
+	}
+	if (!trace->ds.size) {
+		printk(KERN_CONT "bad bts trace setup...");
+		error = -1;
+	}
+	if (trace->ds.end !=
+	    (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
+		printk(KERN_CONT "bad bts buffer setup...");
+		error = -1;
+	}
+	if ((trace->ds.top < trace->ds.begin) ||
+	    (trace->ds.end <= trace->ds.top)) {
+		printk(KERN_CONT "bts top out of bounds...");
+		error = -1;
+	}
+
+	return error;
+}
+
+static int ds_selftest_bts_read(struct bts_tracer *tracer,
+				const struct bts_trace *trace,
+				const void *from, const void *to)
+{
+	const unsigned char *at;
+
+	/*
+	 * Check a few things which do not belong to this test.
+	 * They should be covered by other tests.
+	 */
+	if (!trace)
+		return -1;
+
+	if (!trace->read)
+		return -1;
+
+	if (to < from)
+		return -1;
+
+	if (from < trace->ds.begin)
+		return -1;
+
+	if (trace->ds.end < to)
+		return -1;
+
+	if (!trace->ds.size)
+		return -1;
+
+	/* Now to the test itself. */
+	for (at = from; (void *)at < to; at += trace->ds.size) {
+		struct bts_struct bts;
+		size_t index;
+		int error;
+
+		if (((void *)at - trace->ds.begin) % trace->ds.size) {
+			printk(KERN_CONT
+			       "read from non-integer index...");
+			return -1;
+		}
+		index = ((void *)at - trace->ds.begin) / trace->ds.size;
+
+		memset(&bts, 0, sizeof(bts));
+		error = trace->read(tracer, at, &bts);
+		if (error < 0) {
+			printk(KERN_CONT
+			       "error reading bts trace at [%lu] (0x%p)...",
+			       index, at);
+			return error;
+		}
+
+		switch (bts.qualifier) {
+		case BTS_BRANCH:
+			break;
+		default:
+			printk(KERN_CONT
+			       "unexpected bts entry %llu at [%lu] (0x%p)...",
+			       bts.qualifier, index, at);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int ds_selftest_bts(void)
+{
+	const struct bts_trace *trace;
+	struct bts_tracer *tracer;
+	int error = 0;
+	void *top;
+	unsigned char buffer[DS_SELFTEST_BUFFER_SIZE];
+
+	printk(KERN_INFO "[ds] bts selftest...");
+
+	tracer = ds_request_bts(NULL, buffer, DS_SELFTEST_BUFFER_SIZE,
+				NULL, (size_t)-1, BTS_KERNEL);
+	if (IS_ERR(tracer)) {
+		error = PTR_ERR(tracer);
+		tracer = NULL;
+
+		printk(KERN_CONT
+		       "initialization failed (err: %d)...", error);
+		goto out;
+	}
+
+	/* The return should already give us enough trace. */
+	ds_suspend_bts(tracer);
+
+	/* Let's see if we can access the trace. */
+	trace = ds_read_bts(tracer);
+
+	error = ds_selftest_bts_consistency(trace);
+	if (error < 0)
+		goto out;
+
+	/* If everything went well, we should have a few trace entries. */
+	if (trace->ds.top == trace->ds.begin) {
+		/*
+		 * It is possible but highly unlikely that we got a
+		 * buffer overflow and end up at exactly the same
+		 * position we started from.
+		 * Let's issue a warning, but continue.
+		 */
+		printk(KERN_CONT "no trace/overflow...");
+	}
+
+	/* Let's try to read the trace we collected. */
+	error = ds_selftest_bts_read(tracer, trace,
+				     trace->ds.begin, trace->ds.top);
+	if (error < 0)
+		goto out;
+
+	/*
+	 * Let's read the trace again.
+	 * Since we suspended tracing, we should get the same result.
+	 */
+	top = trace->ds.top;
+
+	trace = ds_read_bts(tracer);
+	error = ds_selftest_bts_consistency(trace);
+	if (error < 0)
+		goto out;
+
+	if (top != trace->ds.top) {
+		printk(KERN_CONT "suspend not working...");
+		error = -1;
+		goto out;
+	}
+
+	/* Let's collect some more trace - see if resume is working. */
+	ds_resume_bts(tracer);
+	ds_suspend_bts(tracer);
+
+	trace = ds_read_bts(tracer);
+
+	error = ds_selftest_bts_consistency(trace);
+	if (error < 0)
+		goto out;
+
+	if (trace->ds.top == top) {
+		/*
+		 * It is possible but highly unlikely that we got a
+		 * buffer overflow and end up at exactly the same
+		 * position we started from.
+		 * Let's issue a warning and check the full trace.
+		 */
+		printk(KERN_CONT
+		       "no resume progress/overflow...");
+
+		error = ds_selftest_bts_read(tracer, trace,
+					     trace->ds.begin, trace->ds.end);
+	} else if (trace->ds.top < top) {
+		/*
+		 * We had a buffer overflow - the entire buffer should
+		 * contain trace records.
+		 */
+		error = ds_selftest_bts_read(tracer, trace,
+					     trace->ds.begin, trace->ds.end);
+	} else {
+		/*
+		 * It is quite likely that the buffer did not overflow.
+		 * Let's just check the delta trace.
+		 */
+		error = ds_selftest_bts_read(tracer, trace,
+					     top, trace->ds.top);
+	}
+	if (error < 0)
+		goto out;
+
+	error = 0;
+
+	/* The final test: release the tracer while tracing is suspended. */
+ out:
+	ds_release_bts(tracer);
+
+	printk(KERN_CONT "%s.\n", (error ? "failed" : "passed"));
+
+	return error;
+}
+
+int ds_selftest_pebs(void)
+{
+	return 0;
+}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
new file mode 100644
index 0000000..0e6e19d
--- /dev/null
+++ b/arch/x86/kernel/ds_selftest.h
@@ -0,0 +1,15 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#ifdef CONFIG_X86_DS_SELFTEST
+extern int ds_selftest_bts(void);
+extern int ds_selftest_pebs(void);
+#else
+static inline int ds_selftest_bts(void) { return 0; }
+static inline int ds_selftest_pebs(void) { return 0; }
+#endif /* CONFIG_X86_DS_SELFTEST */
-- 
cgit v0.10.2


From b8e47195451c5d3f62620b2b1b5928669afd56eb Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 13 Mar 2009 10:46:42 +0100
Subject: x86, bts: correct comment style in ds.c

Correct the comment style in ds.c.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313104642.A30149@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 51c936c..d9cab71 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -35,25 +35,22 @@
  * The configuration for a particular DS hardware implementation.
  */
 struct ds_configuration {
-	/* the name of the configuration */
+	/* The name of the configuration. */
 	const char *name;
-	/* the size of one pointer-typed field in the DS structure and
-	   in the BTS and PEBS buffers in bytes;
-	   this covers the first 8 DS fields related to buffer management. */
+	/* The size of pointer-typed fields in DS, BTS, and PEBS. */
 	unsigned char  sizeof_ptr_field;
-	/* the size of a BTS/PEBS record in bytes */
+	/* The size of a BTS/PEBS record in bytes. */
 	unsigned char  sizeof_rec[2];
-	/* a series of bit-masks to control various features indexed
-	 * by enum ds_feature */
+	/* Control bit-masks indexed by enum ds_feature. */
 	unsigned long ctl[dsf_ctl_max];
 };
 static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
 
 #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
 
-#define MAX_SIZEOF_DS (12 * 8)	/* maximal size of a DS configuration */
-#define MAX_SIZEOF_BTS (3 * 8)	/* maximal size of a BTS record */
-#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
+#define MAX_SIZEOF_DS (12 * 8)	/* Maximal size of a DS configuration. */
+#define MAX_SIZEOF_BTS (3 * 8)	/* Maximal size of a BTS record. */
+#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment. */
 
 #define BTS_CONTROL \
  (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
@@ -67,28 +64,28 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  * to identify tracers.
  */
 struct ds_tracer {
-	/* the DS context (partially) owned by this tracer */
+	/* The DS context (partially) owned by this tracer. */
 	struct ds_context *context;
-	/* the buffer provided on ds_request() and its size in bytes */
+	/* The buffer provided on ds_request() and its size in bytes. */
 	void *buffer;
 	size_t size;
 };
 
 struct bts_tracer {
-	/* the common DS part */
+	/* The common DS part. */
 	struct ds_tracer ds;
-	/* the trace including the DS configuration */
+	/* The trace including the DS configuration. */
 	struct bts_trace trace;
-	/* buffer overflow notification function */
+	/* Buffer overflow notification function. */
 	bts_ovfl_callback_t ovfl;
 };
 
 struct pebs_tracer {
-	/* the common DS part */
+	/* The common DS part. */
 	struct ds_tracer ds;
-	/* the trace including the DS configuration */
+	/* The trace including the DS configuration. */
 	struct pebs_trace trace;
-	/* buffer overflow notification function */
+	/* Buffer overflow notification function. */
 	pebs_ovfl_callback_t ovfl;
 };
 
@@ -214,18 +211,16 @@ static inline int check_tracer(struct task_struct *task)
  * deallocated when the last user puts the context.
  */
 struct ds_context {
-	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
+	/* The DS configuration; goes into MSR_IA32_DS_AREA. */
 	unsigned char ds[MAX_SIZEOF_DS];
-	/* the owner of the BTS and PEBS configuration, respectively */
+	/* The owner of the BTS and PEBS configuration, respectively. */
 	struct bts_tracer *bts_master;
 	struct pebs_tracer *pebs_master;
-	/* use count */
+	/* Use count. */
 	unsigned long count;
-	/* a pointer to the context location inside the thread_struct
-	 * or the per_cpu context array */
+	/* Pointer to the context pointer field. */
 	struct ds_context **this;
-	/* a pointer to the task owning this context, or NULL, if the
-	 * context is owned by a cpu */
+	/* The traced task; NULL for current cpu. */
 	struct task_struct *task;
 };
 
@@ -350,14 +345,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 		unsigned long write_size, adj_write_size;
 
 		/*
-		 * write as much as possible without producing an
+		 * Write as much as possible without producing an
 		 * overflow interrupt.
 		 *
-		 * interrupt_threshold must either be
+		 * Interrupt_threshold must either be
 		 * - bigger than absolute_maximum or
 		 * - point to a record between buffer_base and absolute_maximum
 		 *
-		 * index points to a valid record.
+		 * Index points to a valid record.
 		 */
 		base   = ds_get(context->ds, qual, ds_buffer_base);
 		index  = ds_get(context->ds, qual, ds_index);
@@ -366,8 +361,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 
 		write_end = min(end, int_th);
 
-		/* if we are already beyond the interrupt threshold,
-		 * we fill the entire buffer */
+		/*
+		 * If we are already beyond the interrupt threshold,
+		 * we fill the entire buffer.
+		 */
 		if (write_end <= index)
 			write_end = end;
 
@@ -384,7 +381,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
 		adj_write_size *= ds_cfg.sizeof_rec[qual];
 
-		/* zero out trailing bytes */
+		/* Zero out trailing bytes. */
 		memset((char *)index + write_size, 0,
 		       adj_write_size - write_size);
 		index += adj_write_size;
@@ -556,7 +553,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 			     unsigned int flags) {
 	unsigned long buffer, adj;
 
-	/* adjust the buffer address and size to meet alignment
+	/*
+	 * Adjust the buffer address and size to meet alignment
 	 * constraints:
 	 * - buffer is double-word aligned
 	 * - size is multiple of record size
@@ -578,7 +576,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 	trace->begin = (void *)buffer;
 	trace->top = trace->begin;
 	trace->end = (void *)(buffer + size);
-	/* The value for 'no threshold' is -1, which will set the
+	/*
+	 * The value for 'no threshold' is -1, which will set the
 	 * threshold outside of the buffer, just like we want it.
 	 */
 	trace->ith = (void *)(buffer + size - ith);
@@ -602,7 +601,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 	if (!base)
 		goto out;
 
-	/* we require some space to do alignment adjustments below */
+	/* We require some space to do alignment adjustments below. */
 	error = -EINVAL;
 	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
 		goto out;
@@ -640,7 +639,7 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	unsigned long irq;
 	int error;
 
-	/* buffer overflow notification is not yet implemented */
+	/* Buffer overflow notification is not yet implemented. */
 	error = -EOPNOTSUPP;
 	if (ovfl)
 		goto out;
@@ -700,7 +699,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	unsigned long irq;
 	int error;
 
-	/* buffer overflow notification is not yet implemented */
+	/* Buffer overflow notification is not yet implemented. */
 	error = -EOPNOTSUPP;
 	if (ovfl)
 		goto out;
@@ -983,9 +982,9 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 		case 0x1c: /* Atom */
 			ds_configure(&ds_cfg_core2_atom, c);
 			break;
-		case 0x1a: /* i7 */
+		case 0x1a: /* Core i7 */
 		default:
-			/* sorry, don't know about them */
+			/* Sorry, don't know about them. */
 			break;
 		}
 		break;
@@ -997,12 +996,12 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 			ds_configure(&ds_cfg_netburst, c);
 			break;
 		default:
-			/* sorry, don't know about them */
+			/* Sorry, don't know about them. */
 			break;
 		}
 		break;
 	default:
-		/* sorry, don't know about them */
+		/* Sorry, don't know about them. */
 		break;
 	}
 }
-- 
cgit v0.10.2


From ba9372a8f306c4e53a5f61dcbcd6c1e4a8c2e9ac Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 13 Mar 2009 10:48:52 +0100
Subject: x86, hw-branch-tracer: keep resources on stop

Distinguish init/reset and start/stop:

init/reset will allocate and release bts tracing resources
stop/start will suspend and resume bts tracing

Return an error on init() if no cpu can be traced.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313104852.A30168@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 7bfdf4c..a99a04c 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -19,7 +19,7 @@
 #include "trace_output.h"
 
 
-#define SIZEOF_BTS (1 << 13)
+#define BTS_BUFFER_SIZE (1 << 13)
 
 /*
  * The tracer lock protects the below per-cpu tracer array.
@@ -33,53 +33,68 @@
  */
 static DEFINE_SPINLOCK(bts_tracer_lock);
 static DEFINE_PER_CPU(struct bts_tracer *, tracer);
-static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
+static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
 
 #define this_tracer per_cpu(tracer, smp_processor_id())
 #define this_buffer per_cpu(buffer, smp_processor_id())
 
-static int __read_mostly trace_hw_branches_enabled;
+static int trace_hw_branches_enabled __read_mostly;
+static int trace_hw_branches_suspended __read_mostly;
 static struct trace_array *hw_branch_trace __read_mostly;
 
 
 /*
- * Start tracing on the current cpu.
+ * Initialize the tracer for the current cpu.
  * The argument is ignored.
  *
  * pre: bts_tracer_lock must be locked.
  */
-static void bts_trace_start_cpu(void *arg)
+static void bts_trace_init_cpu(void *arg)
 {
 	if (this_tracer)
 		ds_release_bts(this_tracer);
 
-	this_tracer =
-		ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
-			       /* ovfl = */ NULL, /* th = */ (size_t)-1,
-			       BTS_KERNEL);
+	this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE,
+				     NULL, (size_t)-1, BTS_KERNEL);
 	if (IS_ERR(this_tracer)) {
 		this_tracer = NULL;
 		return;
 	}
 }
 
-static void bts_trace_start(struct trace_array *tr)
+static int bts_trace_init(struct trace_array *tr)
 {
+	int cpu, avail;
+
 	spin_lock(&bts_tracer_lock);
 
-	on_each_cpu(bts_trace_start_cpu, NULL, 1);
-	trace_hw_branches_enabled = 1;
+	hw_branch_trace = tr;
+
+	on_each_cpu(bts_trace_init_cpu, NULL, 1);
+
+	/* Check on how many cpus we could enable tracing */
+	avail = 0;
+	for_each_online_cpu(cpu)
+		if (per_cpu(tracer, cpu))
+			avail++;
+
+	trace_hw_branches_enabled = (avail ? 1 : 0);
+	trace_hw_branches_suspended = 0;
 
 	spin_unlock(&bts_tracer_lock);
+
+
+	/* If we could not enable tracing on a single cpu, we fail. */
+	return avail ? 0 : -EOPNOTSUPP;
 }
 
 /*
- * Stop tracing on the current cpu.
+ * Release the tracer for the current cpu.
  * The argument is ignored.
  *
  * pre: bts_tracer_lock must be locked.
  */
-static void bts_trace_stop_cpu(void *arg)
+static void bts_trace_release_cpu(void *arg)
 {
 	if (this_tracer) {
 		ds_release_bts(this_tracer);
@@ -87,12 +102,57 @@ static void bts_trace_stop_cpu(void *arg)
 	}
 }
 
-static void bts_trace_stop(struct trace_array *tr)
+static void bts_trace_reset(struct trace_array *tr)
 {
 	spin_lock(&bts_tracer_lock);
 
+	on_each_cpu(bts_trace_release_cpu, NULL, 1);
 	trace_hw_branches_enabled = 0;
-	on_each_cpu(bts_trace_stop_cpu, NULL, 1);
+	trace_hw_branches_suspended = 0;
+
+	spin_unlock(&bts_tracer_lock);
+}
+
+/*
+ * Resume tracing on the current cpu.
+ * The argument is ignored.
+ *
+ * pre: bts_tracer_lock must be locked.
+ */
+static void bts_trace_resume_cpu(void *arg)
+{
+	if (this_tracer)
+		ds_resume_bts(this_tracer);
+}
+
+static void bts_trace_start(struct trace_array *tr)
+{
+	spin_lock(&bts_tracer_lock);
+
+	on_each_cpu(bts_trace_resume_cpu, NULL, 1);
+	trace_hw_branches_suspended = 0;
+
+	spin_unlock(&bts_tracer_lock);
+}
+
+/*
+ * Suspend tracing on the current cpu.
+ * The argument is ignored.
+ *
+ * pre: bts_tracer_lock must be locked.
+ */
+static void bts_trace_suspend_cpu(void *arg)
+{
+	if (this_tracer)
+		ds_suspend_bts(this_tracer);
+}
+
+static void bts_trace_stop(struct trace_array *tr)
+{
+	spin_lock(&bts_tracer_lock);
+
+	on_each_cpu(bts_trace_suspend_cpu, NULL, 1);
+	trace_hw_branches_suspended = 1;
 
 	spin_unlock(&bts_tracer_lock);
 }
@@ -110,10 +170,14 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
-		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+		smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1);
+
+		if (trace_hw_branches_suspended)
+			smp_call_function_single(cpu, bts_trace_suspend_cpu,
+						 NULL, 1);
 		break;
 	case CPU_DOWN_PREPARE:
-		smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
+		smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1);
 		break;
 	}
 
@@ -126,20 +190,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
 	.notifier_call = bts_hotcpu_handler
 };
 
-static int bts_trace_init(struct trace_array *tr)
-{
-	hw_branch_trace = tr;
-
-	bts_trace_start(tr);
-
-	return 0;
-}
-
-static void bts_trace_reset(struct trace_array *tr)
-{
-	bts_trace_stop(tr);
-}
-
 static void bts_trace_print_header(struct seq_file *m)
 {
 	seq_puts(m, "# CPU#        TO  <-  FROM\n");
@@ -228,7 +278,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
  */
 static void trace_bts_cpu(void *arg)
 {
-	struct trace_array *tr = (struct trace_array *) arg;
+	struct trace_array *tr = (struct trace_array *)arg;
 	const struct bts_trace *trace;
 	unsigned char *at;
 
@@ -276,7 +326,8 @@ void trace_hw_branch_oops(void)
 {
 	spin_lock(&bts_tracer_lock);
 
-	trace_bts_cpu(hw_branch_trace);
+	if (trace_hw_branches_enabled)
+		trace_bts_cpu(hw_branch_trace);
 
 	spin_unlock(&bts_tracer_lock);
 }
-- 
cgit v0.10.2


From 321bb5e1ac461c04b6a93f795010d6eb01d8c5ca Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 13 Mar 2009 10:50:27 +0100
Subject: x86, hw-branch-tracer: add selftest

Add a selftest for the hw-branch-tracer.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313105027.A30183@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 56ce34d..e7fbc82 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -576,6 +576,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
 					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
+extern int trace_selftest_startup_hw_branches(struct tracer *trace,
+					      struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index a99a04c..4ca8270 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -342,7 +342,10 @@ struct tracer bts_tracer __read_mostly =
 	.start		= bts_trace_start,
 	.stop		= bts_trace_stop,
 	.open		= trace_bts_prepare,
-	.close		= trace_bts_close
+	.close		= trace_bts_close,
+#ifdef CONFIG_FTRACE_SELFTEST
+	.selftest	= trace_selftest_startup_hw_branches,
+#endif /* CONFIG_FTRACE_SELFTEST */
 };
 
 __init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index f907a2b..3c7b797 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_BRANCH:
 	case TRACE_GRAPH_ENT:
 	case TRACE_GRAPH_RET:
+	case TRACE_HW_BRANCHES:
 		return 1;
 	}
 	return 0;
@@ -691,3 +692,55 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
 	return ret;
 }
 #endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_HW_BRANCH_TRACER
+int
+trace_selftest_startup_hw_branches(struct tracer *trace,
+				   struct trace_array *tr)
+{
+	unsigned long count;
+	int ret;
+	struct trace_iterator iter;
+	struct tracer tracer;
+
+	if (!trace->open) {
+		printk(KERN_CONT "missing open function...");
+		return -1;
+	}
+
+	ret = tracer_init(trace, tr);
+	if (ret) {
+		warn_failed_init_tracer(trace, ret);
+		return ret;
+	}
+
+	/*
+	 * The hw-branch tracer needs to collect the trace from the various
+	 * cpu trace buffers - before tracing is stopped.
+	 */
+	memset(&iter, 0, sizeof(iter));
+	memcpy(&tracer, trace, sizeof(tracer));
+
+	iter.trace = &tracer;
+	iter.tr = tr;
+	iter.pos = -1;
+	mutex_init(&iter.mutex);
+
+	trace->open(&iter);
+
+	mutex_destroy(&iter.mutex);
+
+	tracing_stop();
+
+	ret = trace_test_buffer(tr, &count);
+	trace->reset(tr);
+	tracing_start();
+
+	if (!ret && !count) {
+		printk(KERN_CONT "no entries found..");
+		ret = -1;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_HW_BRANCH_TRACER */
-- 
cgit v0.10.2


From e9a22d1fb94050b7d600019c32e6b672d539054b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 13 Mar 2009 11:54:40 +0100
Subject: x86, bts: cleanups

Impact: cleanup, no code changed

Cc: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d9cab71..7363e01 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -19,43 +19,52 @@
  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
-
-#include <asm/ds.h>
-
-#include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/slab.h>
+#include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/kernel.h>
+
+#include <asm/ds.h>
 
 #include "ds_selftest.h"
 
 /*
- * The configuration for a particular DS hardware implementation.
+ * The configuration for a particular DS hardware implementation:
  */
 struct ds_configuration {
-	/* The name of the configuration. */
-	const char *name;
-	/* The size of pointer-typed fields in DS, BTS, and PEBS. */
-	unsigned char  sizeof_ptr_field;
-	/* The size of a BTS/PEBS record in bytes. */
-	unsigned char  sizeof_rec[2];
-	/* Control bit-masks indexed by enum ds_feature. */
-	unsigned long ctl[dsf_ctl_max];
+	/* The name of the configuration: */
+	const char		*name;
+
+	/* The size of pointer-typed fields in DS, BTS, and PEBS: */
+	unsigned char		sizeof_ptr_field;
+
+	/* The size of a BTS/PEBS record in bytes: */
+	unsigned char		sizeof_rec[2];
+
+	/* Control bit-masks indexed by enum ds_feature: */
+	unsigned long		ctl[dsf_ctl_max];
 };
 static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
 
 #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
 
-#define MAX_SIZEOF_DS (12 * 8)	/* Maximal size of a DS configuration. */
-#define MAX_SIZEOF_BTS (3 * 8)	/* Maximal size of a BTS record. */
-#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment. */
+/* Maximal size of a DS configuration: */
+#define MAX_SIZEOF_DS		(12 * 8)
 
-#define BTS_CONTROL \
- (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
-  ds_cfg.ctl[dsf_bts_overflow])
+/* Maximal size of a BTS record: */
+#define MAX_SIZEOF_BTS		(3 * 8)
 
+/* BTS and PEBS buffer alignment: */
+#define DS_ALIGNMENT		(1 << 3)
+
+/* Mask of control bits in the DS MSR register: */
+#define BTS_CONTROL				  \
+	( ds_cfg.ctl[dsf_bts]			| \
+	  ds_cfg.ctl[dsf_bts_kernel]		| \
+	  ds_cfg.ctl[dsf_bts_user]		| \
+	  ds_cfg.ctl[dsf_bts_overflow] )
 
 /*
  * A BTS or PEBS tracer.
@@ -65,28 +74,32 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  */
 struct ds_tracer {
 	/* The DS context (partially) owned by this tracer. */
-	struct ds_context *context;
+	struct ds_context	*context;
 	/* The buffer provided on ds_request() and its size in bytes. */
-	void *buffer;
-	size_t size;
+	void			*buffer;
+	size_t			size;
 };
 
 struct bts_tracer {
-	/* The common DS part. */
-	struct ds_tracer ds;
-	/* The trace including the DS configuration. */
-	struct bts_trace trace;
-	/* Buffer overflow notification function. */
-	bts_ovfl_callback_t ovfl;
+	/* The common DS part: */
+	struct ds_tracer	ds;
+
+	/* The trace including the DS configuration: */
+	struct bts_trace	trace;
+
+	/* Buffer overflow notification function: */
+	bts_ovfl_callback_t	ovfl;
 };
 
 struct pebs_tracer {
-	/* The common DS part. */
-	struct ds_tracer ds;
-	/* The trace including the DS configuration. */
-	struct pebs_trace trace;
-	/* Buffer overflow notification function. */
-	pebs_ovfl_callback_t ovfl;
+	/* The common DS part: */
+	struct ds_tracer	ds;
+
+	/* The trace including the DS configuration: */
+	struct pebs_trace	trace;
+
+	/* Buffer overflow notification function: */
+	pebs_ovfl_callback_t	ovfl;
 };
 
 /*
@@ -95,6 +108,7 @@ struct pebs_tracer {
  *
  * The DS configuration consists of the following fields; different
  * architetures vary in the size of those fields.
+ *
  * - double-word aligned base linear address of the BTS buffer
  * - write pointer into the BTS buffer
  * - end linear address of the BTS buffer (one byte beyond the end of
@@ -133,19 +147,20 @@ enum ds_field {
 };
 
 enum ds_qualifier {
-	ds_bts  = 0,
+	ds_bts = 0,
 	ds_pebs
 };
 
-static inline unsigned long ds_get(const unsigned char *base,
-				   enum ds_qualifier qual, enum ds_field field)
+static inline unsigned long
+ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
 {
 	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	return *(unsigned long *)base;
 }
 
-static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
-			  enum ds_field field, unsigned long value)
+static inline void
+ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
+       unsigned long value)
 {
 	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
 	(*(unsigned long *)base) = value;
@@ -157,7 +172,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
  */
 static DEFINE_SPINLOCK(ds_lock);
 
-
 /*
  * We either support (system-wide) per-cpu or per-thread allocation.
  * We distinguish the two based on the task_struct pointer, where a
@@ -211,17 +225,21 @@ static inline int check_tracer(struct task_struct *task)
  * deallocated when the last user puts the context.
  */
 struct ds_context {
-	/* The DS configuration; goes into MSR_IA32_DS_AREA. */
-	unsigned char ds[MAX_SIZEOF_DS];
-	/* The owner of the BTS and PEBS configuration, respectively. */
-	struct bts_tracer *bts_master;
-	struct pebs_tracer *pebs_master;
-	/* Use count. */
+	/* The DS configuration; goes into MSR_IA32_DS_AREA: */
+	unsigned char		ds[MAX_SIZEOF_DS];
+
+	/* The owner of the BTS and PEBS configuration, respectively: */
+	struct bts_tracer	*bts_master;
+	struct pebs_tracer	*pebs_master;
+
+	/* Use count: */
 	unsigned long count;
-	/* Pointer to the context pointer field. */
-	struct ds_context **this;
-	/* The traced task; NULL for current cpu. */
-	struct task_struct *task;
+
+	/* Pointer to the context pointer field: */
+	struct ds_context	**this;
+
+	/* The traced task; NULL for current cpu: */
+	struct task_struct	*task;
 };
 
 static DEFINE_PER_CPU(struct ds_context *, system_context_array);
@@ -328,9 +346,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  * The remainder of any partially written record is zeroed out.
  *
  * context: the DS context
- * qual: the buffer type
- * record: the data to write
- * size: the size of the data
+ * qual:    the buffer type
+ * record:  the data to write
+ * size:    the size of the data
  */
 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 		    const void *record, size_t size)
@@ -429,12 +447,12 @@ enum bts_field {
 	bts_to,
 	bts_flags,
 
-	bts_qual = bts_from,
-	bts_jiffies = bts_to,
-	bts_pid = bts_flags,
+	bts_qual		= bts_from,
+	bts_jiffies		= bts_to,
+	bts_pid			= bts_flags,
 
-	bts_qual_mask = (bts_qual_max - 1),
-	bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+	bts_qual_mask		= (bts_qual_max - 1),
+	bts_escape		= ((unsigned long)-1 & ~bts_qual_mask)
 };
 
 static inline unsigned long bts_get(const char *base, enum bts_field field)
@@ -461,8 +479,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  *
  * return: bytes read/written on success; -Eerrno, otherwise
  */
-static int bts_read(struct bts_tracer *tracer, const void *at,
-		    struct bts_struct *out)
+static int
+bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
 {
 	if (!tracer)
 		return -EINVAL;
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
index 0e6e19d..2ba8745 100644
--- a/arch/x86/kernel/ds_selftest.h
+++ b/arch/x86/kernel/ds_selftest.h
@@ -12,4 +12,4 @@ extern int ds_selftest_pebs(void);
 #else
 static inline int ds_selftest_bts(void) { return 0; }
 static inline int ds_selftest_pebs(void) { return 0; }
-#endif /* CONFIG_X86_DS_SELFTEST */
+#endif
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 4ca8270..8b2109a 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,5 +1,5 @@
 /*
- * h/w branch tracer for x86 based on bts
+ * h/w branch tracer for x86 based on BTS
  *
  * Copyright (C) 2008-2009 Intel Corporation.
  * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
@@ -15,8 +15,8 @@
 
 #include <asm/ds.h>
 
-#include "trace.h"
 #include "trace_output.h"
+#include "trace.h"
 
 
 #define BTS_BUFFER_SIZE (1 << 13)
@@ -197,10 +197,10 @@ static void bts_trace_print_header(struct seq_file *m)
 
 static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 {
+	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 	struct trace_entry *entry = iter->ent;
 	struct trace_seq *seq = &iter->seq;
 	struct hw_branch_entry *it;
-	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 
 	trace_assign_type(it, entry);
 
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 3c7b797..b910912 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -189,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 #else
 # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
 #endif /* CONFIG_DYNAMIC_FTRACE */
+
 /*
  * Simple verification test of ftrace function tracer.
  * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -698,10 +699,10 @@ int
 trace_selftest_startup_hw_branches(struct tracer *trace,
 				   struct trace_array *tr)
 {
-	unsigned long count;
-	int ret;
 	struct trace_iterator iter;
 	struct tracer tracer;
+	unsigned long count;
+	int ret;
 
 	if (!trace->open) {
 		printk(KERN_CONT "missing open function...");
-- 
cgit v0.10.2


From 79258a354e0c69be94ae2871809a195bf4a647b1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 13 Mar 2009 12:02:08 +0100
Subject: x86, bts: detect size of DS fields, fix

Impact: build fix

One usage site was missed in the sizeof_field -> sizeof_ptr_field
rename.

Cc: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 7363e01..5fd5333 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -983,7 +983,7 @@ ds_configure(const struct ds_configuration *cfg,
 	printk("bts/pebs record: %u/%u bytes\n",
 	       ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
 
-	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
+	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field));
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
-- 
cgit v0.10.2


From c78a3956b982418186e40978a51636a2b43221bc Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 18 Mar 2009 19:27:00 +0100
Subject: x86, bts: use atomic memory allocation

Ds_request_bts() needs to allocate memory. It uses GFP_KERNEL.

Hw-branch-tracer calls ds_request_bts() within on_each_cpu().

Use atomic memory allocation to allow it to be used in that context.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
LKML-Reference: <20090318192700.A6038@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 5fd5333..b1d6e1f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -255,8 +255,13 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
 	struct ds_context *new_context = NULL;
 	unsigned long irq;
 
-	/* Chances are small that we already have a context. */
-	new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
+	/*
+	 * Chances are small that we already have a context.
+	 *
+	 * Contexts for per-cpu tracing are allocated using
+	 * smp_call_function(). We must not sleep.
+	 */
+	new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC);
 	if (!new_context)
 		return NULL;
 
@@ -662,8 +667,12 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	if (ovfl)
 		goto out;
 
+	/*
+	 * Per-cpu tracing is typically requested using smp_call_function().
+	 * We must not sleep.
+	 */
 	error = -ENOMEM;
-	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+	tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
 	if (!tracer)
 		goto out;
 	tracer->ovfl = ovfl;
@@ -722,8 +731,12 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	if (ovfl)
 		goto out;
 
+	/*
+	 * Per-cpu tracing is typically requested using smp_call_function().
+	 * We must not sleep.
+	 */
 	error = -ENOMEM;
-	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+	tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
 	if (!tracer)
 		goto out;
 	tracer->ovfl = ovfl;
-- 
cgit v0.10.2


From 425480081e936d8725f0d44b8829d699bf088c6b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 24 Mar 2009 13:38:36 -0400
Subject: tracing: add handler to trace_stat

Currently, if a trace_stat user wants a handle to some private data,
the trace_stat infrastructure does not supply a way to do that.

This patch passes the trace_stat structure to the start function of
the trace_stat code.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index ad8c22e..e6e3291 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -263,7 +263,7 @@ static int branch_stat_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static void *annotated_branch_stat_start(void)
+static void *annotated_branch_stat_start(struct tracer_stat *trace)
 {
 	return __start_annotated_branch_profile;
 }
@@ -338,7 +338,7 @@ static int all_branch_stat_headers(struct seq_file *m)
 	return 0;
 }
 
-static void *all_branch_stat_start(void)
+static void *all_branch_stat_start(struct tracer_stat *trace)
 {
 	return __start_branch_profile;
 }
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index f71b85b..f8f48d8 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -85,7 +85,7 @@ static int stat_seq_init(struct tracer_stat_session *session)
 	if (!ts->stat_cmp)
 		ts->stat_cmp = dummy_cmp;
 
-	stat = ts->stat_start();
+	stat = ts->stat_start(ts);
 	if (!stat)
 		goto exit;
 
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 202274c..f3546a2 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -12,7 +12,7 @@ struct tracer_stat {
 	/* The name of your stat file */
 	const char		*name;
 	/* Iteration over statistic entries */
-	void			*(*stat_start)(void);
+	void			*(*stat_start)(struct tracer_stat *trace);
 	void			*(*stat_next)(void *prev, int idx);
 	/* Compare two entries for stats sorting */
 	int			(*stat_cmp)(void *p1, void *p2);
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 9ab035b..ee533c2 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -152,7 +152,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
 	return ret;
 }
 
-static void *workqueue_stat_start(void)
+static void *workqueue_stat_start(struct tracer_stat *trace)
 {
 	int cpu;
 	void *ret = NULL;
-- 
cgit v0.10.2


From bac429f037f1a51a74d62bad6d1518c3be065df3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 20 Mar 2009 12:50:56 -0400
Subject: tracing: add function profiler

Impact: new profiling feature

This patch adds a function profiler. In debugfs/tracing/ two new
files are created.

  function_profile_enabled  - to enable or disable profiling

  trace_stat/functions   - the profiled functions.

For example:

  echo 1 > /debugfs/tracing/function_profile_enabled
  ./hackbench 50
  echo 0 > /debugfs/tracing/function_profile_enabled

yields:

  cat /debugfs/tracing/trace_stat/functions

  Function                               Hit
  --------                               ---
  _spin_lock                        10106442
  _spin_unlock                      10097492
  kfree                              6013704
  _spin_unlock_irqrestore            4423941
  _spin_lock_irqsave                 4406825
  __phys_addr                        4181686
  __slab_free                        4038222
  dput                               4030130
  path_put                           4023387
  unroll_tree_refs                   4019532
[...]

The most hit functions are listed first. Functions that are not
hit are not listed.

This feature depends on and uses dynamic function tracing. When the
function profiling is disabled, no overhead occurs. But it still
takes up around 300KB to hold the data, thus it is not recomended
to keep it enabled for systems low on memory.

When a '1' is echoed into the function_profile_enabled file, the
counters for is function is reset back to zero. Thus you can see what
functions are hit most by different programs.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 015a3d2..0456c3a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -153,6 +153,10 @@ struct dyn_ftrace {
 		unsigned long		flags;
 		struct dyn_ftrace	*newlist;
 	};
+#ifdef CONFIG_FUNCTION_PROFILER
+	unsigned long			counter;
+	struct hlist_node		node;
+#endif
 	struct dyn_arch_ftrace		arch;
 };
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8a4d729..95e9ad5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -105,6 +105,7 @@ config FUNCTION_GRAPH_TRACER
 	  This is done by setting the current return address on the current
 	  task structure into a stack of calls.
 
+
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
 	default n
@@ -376,6 +377,24 @@ config DYNAMIC_FTRACE
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FUNCTION_PROFILER
+	bool "Kernel function profiler"
+	depends on DYNAMIC_FTRACE
+	default n
+	help
+	 This option enables the kernel function profiler. When the dynamic
+	 function tracing is enabled, a counter is added into the function
+	 records used by the dynamic function tracer. A file is created in
+	 debugfs called function_profile_enabled which defaults to zero.
+	 When a 1 is echoed into this file profiling begins, and when a
+	 zero is entered, profiling stops. A file in the trace_stats
+	 directory called functions, that show the list of functions that
+	 have been hit and their counters.
+
+	 This takes up around 320K more memory.
+
+	 If in doubt, say N
+
 config FTRACE_MCOUNT_RECORD
 	def_bool y
 	depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b8722b..11f364c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -34,6 +34,7 @@
 #include <asm/ftrace.h>
 
 #include "trace.h"
+#include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)			\
 	do {					\
@@ -261,7 +262,6 @@ struct ftrace_func_probe {
 	struct rcu_head		rcu;
 };
 
-
 enum {
 	FTRACE_ENABLE_CALLS		= (1 << 0),
 	FTRACE_DISABLE_CALLS		= (1 << 1),
@@ -309,6 +309,307 @@ static struct dyn_ftrace *ftrace_free_records;
 		}				\
 	}
 
+#ifdef CONFIG_FUNCTION_PROFILER
+static struct hlist_head *ftrace_profile_hash;
+static int ftrace_profile_bits;
+static int ftrace_profile_enabled;
+static DEFINE_MUTEX(ftrace_profile_lock);
+
+static void *
+function_stat_next(void *v, int idx)
+{
+	struct dyn_ftrace *rec = v;
+	struct ftrace_page *pg;
+
+	pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK);
+
+ again:
+	rec++;
+	if ((void *)rec >= (void *)&pg->records[pg->index]) {
+		pg = pg->next;
+		if (!pg)
+			return NULL;
+		rec = &pg->records[0];
+	}
+
+	if (rec->flags & FTRACE_FL_FREE ||
+	    rec->flags & FTRACE_FL_FAILED ||
+	    !(rec->flags & FTRACE_FL_CONVERTED) ||
+	    /* ignore non hit functions */
+	    !rec->counter)
+		goto again;
+
+	return rec;
+}
+
+static void *function_stat_start(struct tracer_stat *trace)
+{
+	return function_stat_next(&ftrace_pages_start->records[0], 0);
+}
+
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct dyn_ftrace *a = p1;
+	struct dyn_ftrace *b = p2;
+
+	if (a->counter < b->counter)
+		return -1;
+	if (a->counter > b->counter)
+		return 1;
+	else
+		return 0;
+}
+
+static int function_stat_headers(struct seq_file *m)
+{
+	seq_printf(m, "  Function                               Hit\n"
+		      "  --------                               ---\n");
+	return 0;
+}
+
+static int function_stat_show(struct seq_file *m, void *v)
+{
+	struct dyn_ftrace *rec = v;
+	char str[KSYM_SYMBOL_LEN];
+
+	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+
+	seq_printf(m, "  %-30.30s  %10lu\n", str, rec->counter);
+	return 0;
+}
+
+static struct tracer_stat function_stats = {
+	.name = "functions",
+	.stat_start = function_stat_start,
+	.stat_next = function_stat_next,
+	.stat_cmp = function_stat_cmp,
+	.stat_headers = function_stat_headers,
+	.stat_show = function_stat_show
+};
+
+static void ftrace_profile_init(int nr_funcs)
+{
+	unsigned long addr;
+	int order;
+	int size;
+
+	/*
+	 * We are profiling all functions, lets make it 1/4th of the
+	 * number of functions that are in core kernel. So we have to
+	 * iterate 4 times.
+	 */
+	order = (sizeof(struct hlist_head) * nr_funcs) / 4;
+	order = get_order(order);
+	size = 1 << (PAGE_SHIFT + order);
+
+	pr_info("Allocating %d KB for profiler hash\n", size >> 10);
+
+	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!addr) {
+		pr_warning("Could not allocate function profiler hash\n");
+		return;
+	}
+
+	ftrace_profile_hash = (void *)addr;
+
+	/*
+	 * struct hlist_head should be a pointer of 4 or 8 bytes.
+	 * And a simple bit manipulation can be done, but if for
+	 * some reason struct hlist_head is not a mulitple of 2,
+	 * then we play it safe, and simply count. This function
+	 * is done once at boot up, so it is not that critical in
+	 * performance.
+	 */
+
+	size--;
+	size /= sizeof(struct hlist_head);
+
+	for (; size; size >>= 1)
+		ftrace_profile_bits++;
+
+	pr_info("Function profiler has %d hash buckets\n",
+		1 << ftrace_profile_bits);
+
+	return;
+}
+
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static void ftrace_profile_reset(void)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		rec->counter = 0;
+	} while_for_each_ftrace_rec();
+}
+
+static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip)
+{
+	struct dyn_ftrace *rec;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+	unsigned long flags;
+	unsigned long key;
+
+	if (!ftrace_profile_hash)
+		return NULL;
+
+	key = hash_long(ip, ftrace_profile_bits);
+	hhd = &ftrace_profile_hash[key];
+
+	if (hlist_empty(hhd))
+		return NULL;
+
+	local_irq_save(flags);
+	hlist_for_each_entry_rcu(rec, n, hhd, node) {
+		if (rec->ip == ip)
+			goto out;
+	}
+	rec = NULL;
+ out:
+	local_irq_restore(flags);
+
+	return rec;
+}
+
+static void
+function_profile_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct dyn_ftrace *rec;
+	unsigned long flags;
+
+	if (!ftrace_profile_enabled)
+		return;
+
+	local_irq_save(flags);
+	rec = ftrace_find_profiled_func(ip);
+	if (!rec)
+		goto out;
+
+	rec->counter++;
+ out:
+	local_irq_restore(flags);
+}
+
+static struct ftrace_ops ftrace_profile_ops __read_mostly =
+{
+	.func = function_profile_call,
+};
+
+static ssize_t
+ftrace_profile_write(struct file *filp, const char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	char buf[64];
+	int ret;
+
+	if (!ftrace_profile_hash) {
+		pr_info("Can not enable hash due to earlier problems\n");
+		return -ENODEV;
+	}
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	val = !!val;
+
+	mutex_lock(&ftrace_profile_lock);
+	if (ftrace_profile_enabled ^ val) {
+		if (val) {
+			ftrace_profile_reset();
+			register_ftrace_function(&ftrace_profile_ops);
+			ftrace_profile_enabled = 1;
+		} else {
+			ftrace_profile_enabled = 0;
+			unregister_ftrace_function(&ftrace_profile_ops);
+		}
+	}
+	mutex_unlock(&ftrace_profile_lock);
+
+	filp->f_pos += cnt;
+
+	return cnt;
+}
+
+static const struct file_operations ftrace_profile_fops = {
+	.open		= tracing_open_generic,
+	.read		= ftrace_profile_read,
+	.write		= ftrace_profile_write,
+};
+
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+	struct dentry *entry;
+	int ret;
+
+	ret = register_stat_tracer(&function_stats);
+	if (ret) {
+		pr_warning("Warning: could not register "
+			   "function stats\n");
+		return;
+	}
+
+	entry = debugfs_create_file("function_profile_enabled", 0644,
+				    d_tracer, NULL, &ftrace_profile_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'function_profile_enabled' entry\n");
+}
+
+static void ftrace_add_profile(struct dyn_ftrace *rec)
+{
+	unsigned long key;
+
+	if (!ftrace_profile_hash)
+		return;
+
+	key = hash_long(rec->ip, ftrace_profile_bits);
+	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
+}
+
+static void ftrace_profile_release(struct dyn_ftrace *rec)
+{
+	mutex_lock(&ftrace_profile_lock);
+	hlist_del(&rec->node);
+	mutex_unlock(&ftrace_profile_lock);
+}
+
+#else /* CONFIG_FUNCTION_PROFILER */
+static void ftrace_profile_init(int nr_funcs)
+{
+}
+static void ftrace_add_profile(struct dyn_ftrace *rec)
+{
+}
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+}
+static void ftrace_profile_release(struct dyn_ftrace *rec)
+{
+}
+#endif /* CONFIG_FUNCTION_PROFILER */
+
 #ifdef CONFIG_KPROBES
 
 static int frozen_record_count;
@@ -359,8 +660,10 @@ void ftrace_release(void *start, unsigned long size)
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if ((rec->ip >= s) && (rec->ip < e) &&
-		    !(rec->flags & FTRACE_FL_FREE))
+		    !(rec->flags & FTRACE_FL_FREE)) {
 			ftrace_free_rec(rec);
+			ftrace_profile_release(rec);
+		}
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
 }
@@ -414,6 +717,8 @@ ftrace_record_ip(unsigned long ip)
 	rec->newlist = ftrace_new_addrs;
 	ftrace_new_addrs = rec;
 
+	ftrace_add_profile(rec);
+
 	return rec;
 }
 
@@ -2157,6 +2462,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
+	ftrace_profile_debugfs(d_tracer);
+
 	return 0;
 }
 
@@ -2225,6 +2532,8 @@ void __init ftrace_init(void)
 	if (ret)
 		goto failed;
 
+	ftrace_profile_init(count);
+
 	last_ftrace_enabled = ftrace_enabled = 1;
 
 	ret = ftrace_convert_nops(NULL,
-- 
cgit v0.10.2


From 493762fc534c71d11d489f872c4b4a2c61173668 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 23 Mar 2009 17:12:36 -0400
Subject: tracing: move function profiler data out of function struct

Impact: reduce size of memory in function profiler

The function profiler originally introduces its counters into the
function records itself. There is 20 thousand different functions on
a normal system, and that is adding 20 thousand counters for profiling
event when not needed.

A normal run of the profiler yields only a couple of thousand functions
executed, depending on what is being profiled. This means we have around
18 thousand useless counters.

This patch rectifies this by moving the data out of the function
records used by dynamic ftrace. Data is preallocated to hold the functions
when the profiling begins. Checks are made during profiling to see if
more recorcds should be allocated, and they are allocated if it is safe
to do so.

This also removes the dependency from using dynamic ftrace, and also
removes the overhead by having it enabled.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0456c3a..015a3d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -153,10 +153,6 @@ struct dyn_ftrace {
 		unsigned long		flags;
 		struct dyn_ftrace	*newlist;
 	};
-#ifdef CONFIG_FUNCTION_PROFILER
-	unsigned long			counter;
-	struct hlist_node		node;
-#endif
 	struct dyn_arch_ftrace		arch;
 };
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 95e9ad5..8a41360 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -379,20 +379,16 @@ config DYNAMIC_FTRACE
 
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
-	depends on DYNAMIC_FTRACE
+	depends on FUNCTION_TRACER
 	default n
 	help
-	 This option enables the kernel function profiler. When the dynamic
-	 function tracing is enabled, a counter is added into the function
-	 records used by the dynamic function tracer. A file is created in
-	 debugfs called function_profile_enabled which defaults to zero.
+	 This option enables the kernel function profiler. A file is created
+	 in debugfs called function_profile_enabled which defaults to zero.
 	 When a 1 is echoed into this file profiling begins, and when a
 	 zero is entered, profiling stops. A file in the trace_stats
 	 directory called functions, that show the list of functions that
 	 have been hit and their counters.
 
-	 This takes up around 320K more memory.
-
 	 If in doubt, say N
 
 config FTRACE_MCOUNT_RECORD
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 11f364c..24dac44 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -241,87 +241,48 @@ static void ftrace_update_pid_func(void)
 #endif
 }
 
-/* set when tracing only a pid */
-struct pid *ftrace_pid_trace;
-static struct pid * const ftrace_swapper_pid = &init_struct_pid;
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-#ifndef CONFIG_FTRACE_MCOUNT_RECORD
-# error Dynamic ftrace depends on MCOUNT_RECORD
-#endif
-
-static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
-
-struct ftrace_func_probe {
-	struct hlist_node	node;
-	struct ftrace_probe_ops	*ops;
-	unsigned long		flags;
-	unsigned long		ip;
-	void			*data;
-	struct rcu_head		rcu;
+#ifdef CONFIG_FUNCTION_PROFILER
+struct ftrace_profile {
+	struct hlist_node		node;
+	unsigned long			ip;
+	unsigned long			counter;
 };
 
-enum {
-	FTRACE_ENABLE_CALLS		= (1 << 0),
-	FTRACE_DISABLE_CALLS		= (1 << 1),
-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
-	FTRACE_ENABLE_MCOUNT		= (1 << 3),
-	FTRACE_DISABLE_MCOUNT		= (1 << 4),
-	FTRACE_START_FUNC_RET		= (1 << 5),
-	FTRACE_STOP_FUNC_RET		= (1 << 6),
+struct ftrace_profile_page {
+	struct ftrace_profile_page	*next;
+	unsigned long			index;
+	struct ftrace_profile		records[];
 };
 
-static int ftrace_filtered;
+#define PROFILE_RECORDS_SIZE						\
+	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
 
-static struct dyn_ftrace *ftrace_new_addrs;
+#define PROFILES_PER_PAGE					\
+	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
 
-static DEFINE_MUTEX(ftrace_regex_lock);
-
-struct ftrace_page {
-	struct ftrace_page	*next;
-	int			index;
-	struct dyn_ftrace	records[];
-};
+/* TODO: make these percpu, to prevent cache line bouncing */
+static struct ftrace_profile_page *profile_pages_start;
+static struct ftrace_profile_page *profile_pages;
 
-#define ENTRIES_PER_PAGE \
-  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
-
-/* estimate from running different kernels */
-#define NR_TO_INIT		10000
-
-static struct ftrace_page	*ftrace_pages_start;
-static struct ftrace_page	*ftrace_pages;
-
-static struct dyn_ftrace *ftrace_free_records;
-
-/*
- * This is a double for. Do not use 'break' to break out of the loop,
- * you must use a goto.
- */
-#define do_for_each_ftrace_rec(pg, rec)					\
-	for (pg = ftrace_pages_start; pg; pg = pg->next) {		\
-		int _____i;						\
-		for (_____i = 0; _____i < pg->index; _____i++) {	\
-			rec = &pg->records[_____i];
-
-#define while_for_each_ftrace_rec()		\
-		}				\
-	}
-
-#ifdef CONFIG_FUNCTION_PROFILER
 static struct hlist_head *ftrace_profile_hash;
 static int ftrace_profile_bits;
 static int ftrace_profile_enabled;
 static DEFINE_MUTEX(ftrace_profile_lock);
 
+static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable);
+
+#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+
+static raw_spinlock_t ftrace_profile_rec_lock =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
 static void *
 function_stat_next(void *v, int idx)
 {
-	struct dyn_ftrace *rec = v;
-	struct ftrace_page *pg;
+	struct ftrace_profile *rec = v;
+	struct ftrace_profile_page *pg;
 
-	pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK);
+	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
 
  again:
 	rec++;
@@ -330,27 +291,22 @@ function_stat_next(void *v, int idx)
 		if (!pg)
 			return NULL;
 		rec = &pg->records[0];
+		if (!rec->counter)
+			goto again;
 	}
 
-	if (rec->flags & FTRACE_FL_FREE ||
-	    rec->flags & FTRACE_FL_FAILED ||
-	    !(rec->flags & FTRACE_FL_CONVERTED) ||
-	    /* ignore non hit functions */
-	    !rec->counter)
-		goto again;
-
 	return rec;
 }
 
 static void *function_stat_start(struct tracer_stat *trace)
 {
-	return function_stat_next(&ftrace_pages_start->records[0], 0);
+	return function_stat_next(&profile_pages_start->records[0], 0);
 }
 
 static int function_stat_cmp(void *p1, void *p2)
 {
-	struct dyn_ftrace *a = p1;
-	struct dyn_ftrace *b = p2;
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
 
 	if (a->counter < b->counter)
 		return -1;
@@ -369,7 +325,7 @@ static int function_stat_headers(struct seq_file *m)
 
 static int function_stat_show(struct seq_file *m, void *v)
 {
-	struct dyn_ftrace *rec = v;
+	struct ftrace_profile *rec = v;
 	char str[KSYM_SYMBOL_LEN];
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -387,115 +343,191 @@ static struct tracer_stat function_stats = {
 	.stat_show = function_stat_show
 };
 
-static void ftrace_profile_init(int nr_funcs)
+static void ftrace_profile_reset(void)
 {
-	unsigned long addr;
-	int order;
-	int size;
+	struct ftrace_profile_page *pg;
 
-	/*
-	 * We are profiling all functions, lets make it 1/4th of the
-	 * number of functions that are in core kernel. So we have to
-	 * iterate 4 times.
-	 */
-	order = (sizeof(struct hlist_head) * nr_funcs) / 4;
-	order = get_order(order);
-	size = 1 << (PAGE_SHIFT + order);
-
-	pr_info("Allocating %d KB for profiler hash\n", size >> 10);
+	pg = profile_pages = profile_pages_start;
 
-	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!addr) {
-		pr_warning("Could not allocate function profiler hash\n");
-		return;
+	while (pg) {
+		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
+		pg->index = 0;
+		pg = pg->next;
 	}
 
-	ftrace_profile_hash = (void *)addr;
+	memset(ftrace_profile_hash, 0,
+	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
+}
 
-	/*
-	 * struct hlist_head should be a pointer of 4 or 8 bytes.
-	 * And a simple bit manipulation can be done, but if for
-	 * some reason struct hlist_head is not a mulitple of 2,
-	 * then we play it safe, and simply count. This function
-	 * is done once at boot up, so it is not that critical in
-	 * performance.
-	 */
+int ftrace_profile_pages_init(void)
+{
+	struct ftrace_profile_page *pg;
+	int i;
 
-	size--;
-	size /= sizeof(struct hlist_head);
+	/* If we already allocated, do nothing */
+	if (profile_pages)
+		return 0;
 
-	for (; size; size >>= 1)
-		ftrace_profile_bits++;
+	profile_pages = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!profile_pages)
+		return -ENOMEM;
 
-	pr_info("Function profiler has %d hash buckets\n",
-		1 << ftrace_profile_bits);
+	pg = profile_pages_start = profile_pages;
 
-	return;
+	/* allocate 10 more pages to start */
+	for (i = 0; i < 10; i++) {
+		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+		/*
+		 * We only care about allocating profile_pages, if
+		 * we failed to allocate here, hopefully we will allocate
+		 * later.
+		 */
+		if (!pg->next)
+			break;
+		pg = pg->next;
+	}
+
+	return 0;
 }
 
-static ssize_t
-ftrace_profile_read(struct file *filp, char __user *ubuf,
-		     size_t cnt, loff_t *ppos)
+static int ftrace_profile_init(void)
 {
-	char buf[64];
-	int r;
+	int size;
 
-	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
+	if (ftrace_profile_hash) {
+		/* If the profile is already created, simply reset it */
+		ftrace_profile_reset();
+		return 0;
+	}
 
-static void ftrace_profile_reset(void)
-{
-	struct dyn_ftrace *rec;
-	struct ftrace_page *pg;
+	/*
+	 * We are profiling all functions, but usually only a few thousand
+	 * functions are hit. We'll make a hash of 1024 items.
+	 */
+	size = FTRACE_PROFILE_HASH_SIZE;
 
-	do_for_each_ftrace_rec(pg, rec) {
-		rec->counter = 0;
-	} while_for_each_ftrace_rec();
+	ftrace_profile_hash =
+		kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+
+	if (!ftrace_profile_hash)
+		return -ENOMEM;
+
+	size--;
+
+	for (; size; size >>= 1)
+		ftrace_profile_bits++;
+
+	/* Preallocate a few pages */
+	if (ftrace_profile_pages_init() < 0) {
+		kfree(ftrace_profile_hash);
+		ftrace_profile_hash = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
-static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip)
+/* interrupts must be disabled */
+static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
 {
-	struct dyn_ftrace *rec;
+	struct ftrace_profile *rec;
 	struct hlist_head *hhd;
 	struct hlist_node *n;
-	unsigned long flags;
 	unsigned long key;
 
-	if (!ftrace_profile_hash)
-		return NULL;
-
 	key = hash_long(ip, ftrace_profile_bits);
 	hhd = &ftrace_profile_hash[key];
 
 	if (hlist_empty(hhd))
 		return NULL;
 
-	local_irq_save(flags);
 	hlist_for_each_entry_rcu(rec, n, hhd, node) {
 		if (rec->ip == ip)
-			goto out;
+			return rec;
+	}
+
+	return NULL;
+}
+
+static void ftrace_add_profile(struct ftrace_profile *rec)
+{
+	unsigned long key;
+
+	key = hash_long(rec->ip, ftrace_profile_bits);
+	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
+}
+
+/* Interrupts must be disabled calling this */
+static struct ftrace_profile *
+ftrace_profile_alloc(unsigned long ip, bool alloc_safe)
+{
+	struct ftrace_profile *rec = NULL;
+
+	/* prevent recursion */
+	if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1)
+		goto out;
+
+	__raw_spin_lock(&ftrace_profile_rec_lock);
+
+	/* Try to always keep another page available */
+	if (!profile_pages->next && alloc_safe)
+		profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
+
+	/*
+	 * Try to find the function again since another
+	 * task on another CPU could have added it
+	 */
+	rec = ftrace_find_profiled_func(ip);
+	if (rec)
+		goto out_unlock;
+
+	if (profile_pages->index == PROFILES_PER_PAGE) {
+		if (!profile_pages->next)
+			goto out_unlock;
+		profile_pages = profile_pages->next;
 	}
-	rec = NULL;
+
+	rec = &profile_pages->records[profile_pages->index++];
+	rec->ip = ip;
+	ftrace_add_profile(rec);
+
+ out_unlock:
+	__raw_spin_unlock(&ftrace_profile_rec_lock);
  out:
-	local_irq_restore(flags);
+	atomic_dec(&__get_cpu_var(ftrace_profile_disable));
 
 	return rec;
 }
 
+/*
+ * If we are not in an interrupt, or softirq and
+ * and interrupts are disabled and preemption is not enabled
+ * (not in a spinlock) then it should be safe to allocate memory.
+ */
+static bool ftrace_safe_to_allocate(void)
+{
+	return !in_interrupt() && irqs_disabled() && !preempt_count();
+}
+
 static void
 function_profile_call(unsigned long ip, unsigned long parent_ip)
 {
-	struct dyn_ftrace *rec;
+	struct ftrace_profile *rec;
 	unsigned long flags;
+	bool alloc_safe;
 
 	if (!ftrace_profile_enabled)
 		return;
 
+	alloc_safe = ftrace_safe_to_allocate();
+
 	local_irq_save(flags);
 	rec = ftrace_find_profiled_func(ip);
-	if (!rec)
-		goto out;
+	if (!rec) {
+		rec = ftrace_profile_alloc(ip, alloc_safe);
+		if (!rec)
+			goto out;
+	}
 
 	rec->counter++;
  out:
@@ -515,11 +547,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 	char buf[64];
 	int ret;
 
-	if (!ftrace_profile_hash) {
-		pr_info("Can not enable hash due to earlier problems\n");
-		return -ENODEV;
-	}
-
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
 
@@ -537,7 +564,12 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 	mutex_lock(&ftrace_profile_lock);
 	if (ftrace_profile_enabled ^ val) {
 		if (val) {
-			ftrace_profile_reset();
+			ret = ftrace_profile_init();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
+
 			register_ftrace_function(&ftrace_profile_ops);
 			ftrace_profile_enabled = 1;
 		} else {
@@ -545,6 +577,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 			unregister_ftrace_function(&ftrace_profile_ops);
 		}
 	}
+ out:
 	mutex_unlock(&ftrace_profile_lock);
 
 	filp->f_pos += cnt;
@@ -552,6 +585,17 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
 static const struct file_operations ftrace_profile_fops = {
 	.open		= tracing_open_generic,
 	.read		= ftrace_profile_read,
@@ -577,39 +621,80 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
 			   "'function_profile_enabled' entry\n");
 }
 
-static void ftrace_add_profile(struct dyn_ftrace *rec)
-{
-	unsigned long key;
-
-	if (!ftrace_profile_hash)
-		return;
-
-	key = hash_long(rec->ip, ftrace_profile_bits);
-	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
-}
-
-static void ftrace_profile_release(struct dyn_ftrace *rec)
-{
-	mutex_lock(&ftrace_profile_lock);
-	hlist_del(&rec->node);
-	mutex_unlock(&ftrace_profile_lock);
-}
-
 #else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_init(int nr_funcs)
-{
-}
-static void ftrace_add_profile(struct dyn_ftrace *rec)
-{
-}
 static void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
 }
-static void ftrace_profile_release(struct dyn_ftrace *rec)
-{
-}
 #endif /* CONFIG_FUNCTION_PROFILER */
 
+/* set when tracing only a pid */
+struct pid *ftrace_pid_trace;
+static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+# error Dynamic ftrace depends on MCOUNT_RECORD
+#endif
+
+static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
+
+struct ftrace_func_probe {
+	struct hlist_node	node;
+	struct ftrace_probe_ops	*ops;
+	unsigned long		flags;
+	unsigned long		ip;
+	void			*data;
+	struct rcu_head		rcu;
+};
+
+enum {
+	FTRACE_ENABLE_CALLS		= (1 << 0),
+	FTRACE_DISABLE_CALLS		= (1 << 1),
+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+	FTRACE_ENABLE_MCOUNT		= (1 << 3),
+	FTRACE_DISABLE_MCOUNT		= (1 << 4),
+	FTRACE_START_FUNC_RET		= (1 << 5),
+	FTRACE_STOP_FUNC_RET		= (1 << 6),
+};
+
+static int ftrace_filtered;
+
+static struct dyn_ftrace *ftrace_new_addrs;
+
+static DEFINE_MUTEX(ftrace_regex_lock);
+
+struct ftrace_page {
+	struct ftrace_page	*next;
+	int			index;
+	struct dyn_ftrace	records[];
+};
+
+#define ENTRIES_PER_PAGE \
+  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+
+/* estimate from running different kernels */
+#define NR_TO_INIT		10000
+
+static struct ftrace_page	*ftrace_pages_start;
+static struct ftrace_page	*ftrace_pages;
+
+static struct dyn_ftrace *ftrace_free_records;
+
+/*
+ * This is a double for. Do not use 'break' to break out of the loop,
+ * you must use a goto.
+ */
+#define do_for_each_ftrace_rec(pg, rec)					\
+	for (pg = ftrace_pages_start; pg; pg = pg->next) {		\
+		int _____i;						\
+		for (_____i = 0; _____i < pg->index; _____i++) {	\
+			rec = &pg->records[_____i];
+
+#define while_for_each_ftrace_rec()		\
+		}				\
+	}
+
 #ifdef CONFIG_KPROBES
 
 static int frozen_record_count;
@@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size)
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if ((rec->ip >= s) && (rec->ip < e) &&
-		    !(rec->flags & FTRACE_FL_FREE)) {
+		    !(rec->flags & FTRACE_FL_FREE))
 			ftrace_free_rec(rec);
-			ftrace_profile_release(rec);
-		}
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
 }
@@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip)
 	rec->newlist = ftrace_new_addrs;
 	ftrace_new_addrs = rec;
 
-	ftrace_add_profile(rec);
-
 	return rec;
 }
 
@@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-	ftrace_profile_debugfs(d_tracer);
-
 	return 0;
 }
 
@@ -2532,8 +2611,6 @@ void __init ftrace_init(void)
 	if (ret)
 		goto failed;
 
-	ftrace_profile_init(count);
-
 	last_ftrace_enabled = ftrace_enabled = 1;
 
 	ret = ftrace_convert_nops(NULL,
@@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void)
 	if (!entry)
 		pr_warning("Could not create debugfs "
 			   "'set_ftrace_pid' entry\n");
+
+	ftrace_profile_debugfs(d_tracer);
+
 	return 0;
 }
 fs_initcall(ftrace_init_debugfs);
-- 
cgit v0.10.2


From 0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 23 Mar 2009 23:12:58 -0400
Subject: tracing: adding function timings to function profiler

If the function graph trace is enabled, the function profiler will
use it to take the timing of the functions.

 cat /debug/tracing/trace_stat/functions

  Function                               Hit    Time
  --------                               ---    ----
  mwait_idle                             127    183028.4 us
  schedule                                26    151997.7 us
  __schedule                              31    151975.1 us
  sys_wait4                                2    74080.53 us
  do_wait                                  2    74077.80 us
  sys_newlstat                           138    39929.16 us
  do_path_lookup                         179    39845.79 us
  vfs_lstat_fd                           138    39761.97 us
  user_path_at                           153    39469.58 us
  path_walk                              179    39435.76 us
  __link_path_walk                       189    39143.73 us
[...]

Note the times are skewed due to the function graph tracer not taking
into account schedules.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 24dac44..a9ccd71 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -33,7 +33,7 @@
 
 #include <asm/ftrace.h>
 
-#include "trace.h"
+#include "trace_output.h"
 #include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)			\
@@ -246,6 +246,9 @@ struct ftrace_profile {
 	struct hlist_node		node;
 	unsigned long			ip;
 	unsigned long			counter;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	unsigned long long		time;
+#endif
 };
 
 struct ftrace_profile_page {
@@ -303,6 +306,22 @@ static void *function_stat_start(struct tracer_stat *trace)
 	return function_stat_next(&profile_pages_start->records[0], 0);
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* function graph compares on total time */
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
+
+	if (a->time < b->time)
+		return -1;
+	if (a->time > b->time)
+		return 1;
+	else
+		return 0;
+}
+#else
+/* not function graph compares against hits */
 static int function_stat_cmp(void *p1, void *p2)
 {
 	struct ftrace_profile *a = p1;
@@ -315,11 +334,17 @@ static int function_stat_cmp(void *p1, void *p2)
 	else
 		return 0;
 }
+#endif
 
 static int function_stat_headers(struct seq_file *m)
 {
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	seq_printf(m, "  Function                               Hit    Time\n"
+		      "  --------                               ---    ----\n");
+#else
 	seq_printf(m, "  Function                               Hit\n"
 		      "  --------                               ---\n");
+#endif
 	return 0;
 }
 
@@ -327,10 +352,25 @@ static int function_stat_show(struct seq_file *m, void *v)
 {
 	struct ftrace_profile *rec = v;
 	char str[KSYM_SYMBOL_LEN];
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	static struct trace_seq s;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+	trace_seq_init(&s);
+	trace_print_graph_duration(rec->time, &s);
+#endif
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	seq_printf(m, "    ");
+	trace_print_seq(m, &s);
+	mutex_unlock(&mutex);
+#endif
+	seq_putc(m, '\n');
 
-	seq_printf(m, "  %-30.30s  %10lu\n", str, rec->counter);
 	return 0;
 }
 
@@ -534,11 +574,52 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int profile_graph_entry(struct ftrace_graph_ent *trace)
+{
+	function_profile_call(trace->func, 0);
+	return 1;
+}
+
+static void profile_graph_return(struct ftrace_graph_ret *trace)
+{
+	unsigned long flags;
+	struct ftrace_profile *rec;
+
+	local_irq_save(flags);
+	rec = ftrace_find_profiled_func(trace->func);
+	if (rec)
+		rec->time += trace->rettime - trace->calltime;
+	local_irq_restore(flags);
+}
+
+static int register_ftrace_profiler(void)
+{
+	return register_ftrace_graph(&profile_graph_return,
+				     &profile_graph_entry);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+	unregister_ftrace_graph();
+}
+#else
 static struct ftrace_ops ftrace_profile_ops __read_mostly =
 {
 	.func = function_profile_call,
 };
 
+static int register_ftrace_profiler(void)
+{
+	return register_ftrace_function(&ftrace_profile_ops);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+	unregister_ftrace_function(&ftrace_profile_ops);
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 static ssize_t
 ftrace_profile_write(struct file *filp, const char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
@@ -570,11 +651,15 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 				goto out;
 			}
 
-			register_ftrace_function(&ftrace_profile_ops);
+			ret = register_ftrace_profiler();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
 			ftrace_profile_enabled = 1;
 		} else {
 			ftrace_profile_enabled = 0;
-			unregister_ftrace_function(&ftrace_profile_ops);
+			unregister_ftrace_profiler();
 		}
 	}
  out:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 67c6a21..821bf49 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -402,17 +402,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 	return cnt;
 }
 
-static void
-trace_print_seq(struct seq_file *m, struct trace_seq *s)
-{
-	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
-
-	s->buffer[len] = 0;
-	seq_puts(m, s->buffer);
-
-	trace_seq_init(s);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d7410bb..c66ca3b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -605,6 +605,8 @@ extern unsigned long trace_flags;
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+extern enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
@@ -636,7 +638,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
 	return 1;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
-
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d28687e..85bba0f 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -426,8 +426,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 	return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s)
+enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
 	unsigned long nsecs_rem = do_div(duration, 1000);
 	/* log10(ULONG_MAX) + '\0' */
@@ -464,12 +464,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_print_graph_duration(duration, s);
+	if (ret != TRACE_TYPE_HANDLED)
+		return ret;
 
 	ret = trace_seq_printf(s, "|  ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
-	return TRACE_TYPE_HANDLED;
 
+	return TRACE_TYPE_HANDLED;
 }
 
 /* Case of a leaf function on its call entry */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 19261fd..a3b6e3f 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -19,6 +19,16 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
+void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+
+	s->buffer[len] = 0;
+	seq_puts(m, s->buffer);
+
+	trace_seq_init(s);
+}
+
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 35c422f..1eac297 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -20,6 +20,8 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
 trace_print_printk_msg_only(struct trace_iterator *iter);
 
+extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 extern int
-- 
cgit v0.10.2


From cafb168a1c92e4c9e1731fe3d666c39611762c49 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 24 Mar 2009 20:50:39 -0400
Subject: tracing: make the function profiler per cpu

Impact: speed enhancement

By making the function profiler record in per cpu data we not only
get better readings, avoid races, we also do not have to take any
locks.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a9ccd71..ed1fc50 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -257,28 +257,28 @@ struct ftrace_profile_page {
 	struct ftrace_profile		records[];
 };
 
+struct ftrace_profile_stat {
+	atomic_t			disabled;
+	struct hlist_head		*hash;
+	struct ftrace_profile_page	*pages;
+	struct ftrace_profile_page	*start;
+	struct tracer_stat		stat;
+};
+
 #define PROFILE_RECORDS_SIZE						\
 	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
 
 #define PROFILES_PER_PAGE					\
 	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
 
-/* TODO: make these percpu, to prevent cache line bouncing */
-static struct ftrace_profile_page *profile_pages_start;
-static struct ftrace_profile_page *profile_pages;
-
-static struct hlist_head *ftrace_profile_hash;
 static int ftrace_profile_bits;
 static int ftrace_profile_enabled;
 static DEFINE_MUTEX(ftrace_profile_lock);
 
-static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable);
+static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
 
 #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
 
-static raw_spinlock_t ftrace_profile_rec_lock =
-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-
 static void *
 function_stat_next(void *v, int idx)
 {
@@ -303,7 +303,13 @@ function_stat_next(void *v, int idx)
 
 static void *function_stat_start(struct tracer_stat *trace)
 {
-	return function_stat_next(&profile_pages_start->records[0], 0);
+	struct ftrace_profile_stat *stat =
+		container_of(trace, struct ftrace_profile_stat, stat);
+
+	if (!stat || !stat->start)
+		return NULL;
+
+	return function_stat_next(&stat->start->records[0], 0);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -374,20 +380,11 @@ static int function_stat_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct tracer_stat function_stats = {
-	.name = "functions",
-	.stat_start = function_stat_start,
-	.stat_next = function_stat_next,
-	.stat_cmp = function_stat_cmp,
-	.stat_headers = function_stat_headers,
-	.stat_show = function_stat_show
-};
-
-static void ftrace_profile_reset(void)
+static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
 {
 	struct ftrace_profile_page *pg;
 
-	pg = profile_pages = profile_pages_start;
+	pg = stat->pages = stat->start;
 
 	while (pg) {
 		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
@@ -395,24 +392,24 @@ static void ftrace_profile_reset(void)
 		pg = pg->next;
 	}
 
-	memset(ftrace_profile_hash, 0,
+	memset(stat->hash, 0,
 	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
 }
 
-int ftrace_profile_pages_init(void)
+int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
 {
 	struct ftrace_profile_page *pg;
 	int i;
 
 	/* If we already allocated, do nothing */
-	if (profile_pages)
+	if (stat->pages)
 		return 0;
 
-	profile_pages = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!profile_pages)
+	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!stat->pages)
 		return -ENOMEM;
 
-	pg = profile_pages_start = profile_pages;
+	pg = stat->start = stat->pages;
 
 	/* allocate 10 more pages to start */
 	for (i = 0; i < 10; i++) {
@@ -430,13 +427,16 @@ int ftrace_profile_pages_init(void)
 	return 0;
 }
 
-static int ftrace_profile_init(void)
+static int ftrace_profile_init_cpu(int cpu)
 {
+	struct ftrace_profile_stat *stat;
 	int size;
 
-	if (ftrace_profile_hash) {
+	stat = &per_cpu(ftrace_profile_stats, cpu);
+
+	if (stat->hash) {
 		/* If the profile is already created, simply reset it */
-		ftrace_profile_reset();
+		ftrace_profile_reset(stat);
 		return 0;
 	}
 
@@ -446,29 +446,45 @@ static int ftrace_profile_init(void)
 	 */
 	size = FTRACE_PROFILE_HASH_SIZE;
 
-	ftrace_profile_hash =
-		kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
 
-	if (!ftrace_profile_hash)
+	if (!stat->hash)
 		return -ENOMEM;
 
-	size--;
+	if (!ftrace_profile_bits) {
+		size--;
 
-	for (; size; size >>= 1)
-		ftrace_profile_bits++;
+		for (; size; size >>= 1)
+			ftrace_profile_bits++;
+	}
 
 	/* Preallocate a few pages */
-	if (ftrace_profile_pages_init() < 0) {
-		kfree(ftrace_profile_hash);
-		ftrace_profile_hash = NULL;
+	if (ftrace_profile_pages_init(stat) < 0) {
+		kfree(stat->hash);
+		stat->hash = NULL;
 		return -ENOMEM;
 	}
 
 	return 0;
 }
 
+static int ftrace_profile_init(void)
+{
+	int cpu;
+	int ret = 0;
+
+	for_each_online_cpu(cpu) {
+		ret = ftrace_profile_init_cpu(cpu);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
 /* interrupts must be disabled */
-static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
+static struct ftrace_profile *
+ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
 {
 	struct ftrace_profile *rec;
 	struct hlist_head *hhd;
@@ -476,7 +492,7 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
 	unsigned long key;
 
 	key = hash_long(ip, ftrace_profile_bits);
-	hhd = &ftrace_profile_hash[key];
+	hhd = &stat->hash[key];
 
 	if (hlist_empty(hhd))
 		return NULL;
@@ -489,52 +505,50 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
 	return NULL;
 }
 
-static void ftrace_add_profile(struct ftrace_profile *rec)
+static void ftrace_add_profile(struct ftrace_profile_stat *stat,
+			       struct ftrace_profile *rec)
 {
 	unsigned long key;
 
 	key = hash_long(rec->ip, ftrace_profile_bits);
-	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
+	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
 }
 
 /* Interrupts must be disabled calling this */
 static struct ftrace_profile *
-ftrace_profile_alloc(unsigned long ip, bool alloc_safe)
+ftrace_profile_alloc(struct ftrace_profile_stat *stat,
+		     unsigned long ip, bool alloc_safe)
 {
 	struct ftrace_profile *rec = NULL;
 
 	/* prevent recursion */
-	if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1)
+	if (atomic_inc_return(&stat->disabled) != 1)
 		goto out;
 
-	__raw_spin_lock(&ftrace_profile_rec_lock);
-
 	/* Try to always keep another page available */
-	if (!profile_pages->next && alloc_safe)
-		profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
+	if (!stat->pages->next && alloc_safe)
+		stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
 
 	/*
 	 * Try to find the function again since another
 	 * task on another CPU could have added it
 	 */
-	rec = ftrace_find_profiled_func(ip);
+	rec = ftrace_find_profiled_func(stat, ip);
 	if (rec)
-		goto out_unlock;
+		goto out;
 
-	if (profile_pages->index == PROFILES_PER_PAGE) {
-		if (!profile_pages->next)
-			goto out_unlock;
-		profile_pages = profile_pages->next;
+	if (stat->pages->index == PROFILES_PER_PAGE) {
+		if (!stat->pages->next)
+			goto out;
+		stat->pages = stat->pages->next;
 	}
 
-	rec = &profile_pages->records[profile_pages->index++];
+	rec = &stat->pages->records[stat->pages->index++];
 	rec->ip = ip;
-	ftrace_add_profile(rec);
+	ftrace_add_profile(stat, rec);
 
- out_unlock:
-	__raw_spin_unlock(&ftrace_profile_rec_lock);
  out:
-	atomic_dec(&__get_cpu_var(ftrace_profile_disable));
+	atomic_dec(&stat->disabled);
 
 	return rec;
 }
@@ -552,6 +566,7 @@ static bool ftrace_safe_to_allocate(void)
 static void
 function_profile_call(unsigned long ip, unsigned long parent_ip)
 {
+	struct ftrace_profile_stat *stat;
 	struct ftrace_profile *rec;
 	unsigned long flags;
 	bool alloc_safe;
@@ -562,9 +577,14 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
 	alloc_safe = ftrace_safe_to_allocate();
 
 	local_irq_save(flags);
-	rec = ftrace_find_profiled_func(ip);
+
+	stat = &__get_cpu_var(ftrace_profile_stats);
+	if (!stat->hash)
+		goto out;
+
+	rec = ftrace_find_profiled_func(stat, ip);
 	if (!rec) {
-		rec = ftrace_profile_alloc(ip, alloc_safe);
+		rec = ftrace_profile_alloc(stat, ip, alloc_safe);
 		if (!rec)
 			goto out;
 	}
@@ -583,13 +603,19 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
 
 static void profile_graph_return(struct ftrace_graph_ret *trace)
 {
-	unsigned long flags;
+	struct ftrace_profile_stat *stat;
 	struct ftrace_profile *rec;
+	unsigned long flags;
 
 	local_irq_save(flags);
-	rec = ftrace_find_profiled_func(trace->func);
+	stat = &__get_cpu_var(ftrace_profile_stats);
+	if (!stat->hash)
+		goto out;
+
+	rec = ftrace_find_profiled_func(stat, trace->func);
 	if (rec)
 		rec->time += trace->rettime - trace->calltime;
+ out:
 	local_irq_restore(flags);
 }
 
@@ -687,16 +713,51 @@ static const struct file_operations ftrace_profile_fops = {
 	.write		= ftrace_profile_write,
 };
 
+/* used to initialize the real stat files */
+static struct tracer_stat function_stats __initdata = {
+	.name = "functions",
+	.stat_start = function_stat_start,
+	.stat_next = function_stat_next,
+	.stat_cmp = function_stat_cmp,
+	.stat_headers = function_stat_headers,
+	.stat_show = function_stat_show
+};
+
 static void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
+	struct ftrace_profile_stat *stat;
 	struct dentry *entry;
+	char *name;
 	int ret;
+	int cpu;
 
-	ret = register_stat_tracer(&function_stats);
-	if (ret) {
-		pr_warning("Warning: could not register "
-			   "function stats\n");
-		return;
+	for_each_possible_cpu(cpu) {
+		stat = &per_cpu(ftrace_profile_stats, cpu);
+
+		/* allocate enough for function name + cpu number */
+		name = kmalloc(32, GFP_KERNEL);
+		if (!name) {
+			/*
+			 * The files created are permanent, if something happens
+			 * we still do not free memory.
+			 */
+			kfree(stat);
+			WARN(1,
+			     "Could not allocate stat file for cpu %d\n",
+			     cpu);
+			return;
+		}
+		stat->stat = function_stats;
+		snprintf(name, 32, "function%d", cpu);
+		stat->stat.name = name;
+		ret = register_stat_tracer(&stat->stat);
+		if (ret) {
+			WARN(1,
+			     "Could not register function stat for cpu %d\n",
+			     cpu);
+			kfree(name);
+			return;
+		}
 	}
 
 	entry = debugfs_create_file("function_profile_enabled", 0644,
-- 
cgit v0.10.2


From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 24 Mar 2009 23:17:58 -0400
Subject: function-graph: add option to calculate graph time or not

graph time is the time that a function is executing another function.
Thus if function A calls B, if graph-time is set, then the time for
A includes B. This is the default behavior. But if graph-time is off,
then the time spent executing B is subtracted from A.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 015a3d2..9e0a8d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -365,6 +365,7 @@ struct ftrace_ret_stack {
 	unsigned long ret;
 	unsigned long func;
 	unsigned long long calltime;
+	unsigned long long subtime;
 };
 
 /*
@@ -376,8 +377,6 @@ extern void return_to_handler(void);
 
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
-extern void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
 
 /*
  * Sometimes we don't want to trace a function with the function
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ed1fc50..71e5fae 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
 static void profile_graph_return(struct ftrace_graph_ret *trace)
 {
 	struct ftrace_profile_stat *stat;
+	unsigned long long calltime;
 	struct ftrace_profile *rec;
 	unsigned long flags;
 
@@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 	if (!stat->hash)
 		goto out;
 
+	calltime = trace->rettime - trace->calltime;
+
+	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
+		int index;
+
+		index = trace->depth;
+
+		/* Append this call time to the parent time to subtract */
+		if (index)
+			current->ret_stack[index - 1].subtime += calltime;
+
+		if (current->ret_stack[index].subtime < calltime)
+			calltime -= current->ret_stack[index].subtime;
+		else
+			calltime = 0;
+	}
+
 	rec = ftrace_find_profiled_func(stat, trace->func);
 	if (rec)
-		rec->time += trace->rettime - trace->calltime;
+		rec->time += calltime;
+
  out:
 	local_irq_restore(flags);
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 821bf49..5d1a16c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
+	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
+	TRACE_ITER_GRAPH_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +318,7 @@ static const char *trace_options[] = {
 	"latency-format",
 	"global-clock",
 	"sleep-time",
+	"graph-time",
 	NULL
 };
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c66ca3b..e3429a8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -685,6 +685,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_LATENCY_FMT		= 0x40000,
 	TRACE_ITER_GLOBAL_CLK		= 0x80000,
 	TRACE_ITER_SLEEP_TIME		= 0x100000,
+	TRACE_ITER_GRAPH_TIME		= 0x200000,
 };
 
 /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 85bba0f..10f6ad7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
+	current->ret_stack[index].subtime = 0;
 	*depth = index;
 
 	return 0;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
-void
+static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 {
 	int index;
@@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
 	trace->depth = index;
-	barrier();
-	current->curr_ret_stack--;
-
 }
 
 /*
@@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void)
 	ftrace_pop_return_trace(&trace, &ret);
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
+	barrier();
+	current->curr_ret_stack--;
 
 	if (unlikely(!ret)) {
 		ftrace_graph_stop();
-- 
cgit v0.10.2


From fb9fb015e92123fa3a8e0c2e2fff491d4a56b470 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 25 Mar 2009 13:26:41 -0400
Subject: tracing: clean up tracing profiler

Ingo Molnar suggested clean ups for the profiling code. This patch
makes those updates.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 71e5fae..a141d84 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -69,7 +69,7 @@ static DEFINE_MUTEX(ftrace_lock);
 
 static struct ftrace_ops ftrace_list_end __read_mostly =
 {
-	.func = ftrace_stub,
+	.func		= ftrace_stub,
 };
 
 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -271,8 +271,10 @@ struct ftrace_profile_stat {
 #define PROFILES_PER_PAGE					\
 	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
 
-static int ftrace_profile_bits;
-static int ftrace_profile_enabled;
+static int ftrace_profile_bits __read_mostly;
+static int ftrace_profile_enabled __read_mostly;
+
+/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
 static DEFINE_MUTEX(ftrace_profile_lock);
 
 static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
@@ -651,7 +653,7 @@ static void unregister_ftrace_profiler(void)
 #else
 static struct ftrace_ops ftrace_profile_ops __read_mostly =
 {
-	.func = function_profile_call,
+	.func		= function_profile_call,
 };
 
 static int register_ftrace_profiler(void)
@@ -670,7 +672,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
 	unsigned long val;
-	char buf[64];
+	char buf[64];		/* big enough to hold a number */
 	int ret;
 
 	if (cnt >= sizeof(buf))
@@ -719,7 +721,7 @@ static ssize_t
 ftrace_profile_read(struct file *filp, char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
-	char buf[64];
+	char buf[64];		/* big enough to hold a number */
 	int r;
 
 	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
@@ -734,12 +736,12 @@ static const struct file_operations ftrace_profile_fops = {
 
 /* used to initialize the real stat files */
 static struct tracer_stat function_stats __initdata = {
-	.name = "functions",
-	.stat_start = function_stat_start,
-	.stat_next = function_stat_next,
-	.stat_cmp = function_stat_cmp,
-	.stat_headers = function_stat_headers,
-	.stat_show = function_stat_show
+	.name		= "functions",
+	.stat_start	= function_stat_start,
+	.stat_next	= function_stat_next,
+	.stat_cmp	= function_stat_cmp,
+	.stat_headers	= function_stat_headers,
+	.stat_show	= function_stat_show
 };
 
 static void ftrace_profile_debugfs(struct dentry *d_tracer)
@@ -1954,7 +1956,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
 {
-	.func = function_trace_probe_call,
+	.func		= function_trace_probe_call,
 };
 
 static int ftrace_probe_registered;
-- 
cgit v0.10.2


From 318e0a73c9e41b9a17241829bcd0605a39b87cb9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 25 Mar 2009 20:06:34 -0400
Subject: tracing: remove on the fly allocator from function profiler

Impact: safer code

The on the fly allocator for the function profiler was to save
memory. But at the expense of stability. Although it survived several
tests, allocating from the function tracer is just too risky, just
to save space.

This patch removes the allocator and simply allocates enough entries
at start up.

Each function gets a profiling structure of 40 bytes. With an average
of 20K functions, and this is for each CPU, we have 800K per online
CPU. This is not too bad, at least for non-embedded.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a141d84..4d90c91 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -401,6 +401,8 @@ static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
 int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
 {
 	struct ftrace_profile_page *pg;
+	int functions;
+	int pages;
 	int i;
 
 	/* If we already allocated, do nothing */
@@ -411,22 +413,46 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
 	if (!stat->pages)
 		return -ENOMEM;
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+	functions = ftrace_update_tot_cnt;
+#else
+	/*
+	 * We do not know the number of functions that exist because
+	 * dynamic tracing is what counts them. With past experience
+	 * we have around 20K functions. That should be more than enough.
+	 * It is highly unlikely we will execute every function in
+	 * the kernel.
+	 */
+	functions = 20000;
+#endif
+
 	pg = stat->start = stat->pages;
 
-	/* allocate 10 more pages to start */
-	for (i = 0; i < 10; i++) {
+	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
+
+	for (i = 0; i < pages; i++) {
 		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
-		/*
-		 * We only care about allocating profile_pages, if
-		 * we failed to allocate here, hopefully we will allocate
-		 * later.
-		 */
 		if (!pg->next)
-			break;
+			goto out_free;
 		pg = pg->next;
 	}
 
 	return 0;
+
+ out_free:
+	pg = stat->start;
+	while (pg) {
+		unsigned long tmp = (unsigned long)pg;
+
+		pg = pg->next;
+		free_page(tmp);
+	}
+
+	free_page((unsigned long)stat->pages);
+	stat->pages = NULL;
+	stat->start = NULL;
+
+	return -ENOMEM;
 }
 
 static int ftrace_profile_init_cpu(int cpu)
@@ -460,7 +486,7 @@ static int ftrace_profile_init_cpu(int cpu)
 			ftrace_profile_bits++;
 	}
 
-	/* Preallocate a few pages */
+	/* Preallocate the function profiling pages */
 	if (ftrace_profile_pages_init(stat) < 0) {
 		kfree(stat->hash);
 		stat->hash = NULL;
@@ -516,24 +542,21 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat,
 	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
 }
 
-/* Interrupts must be disabled calling this */
+/*
+ * The memory is already allocated, this simply finds a new record to use.
+ */
 static struct ftrace_profile *
-ftrace_profile_alloc(struct ftrace_profile_stat *stat,
-		     unsigned long ip, bool alloc_safe)
+ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
 {
 	struct ftrace_profile *rec = NULL;
 
-	/* prevent recursion */
+	/* prevent recursion (from NMIs) */
 	if (atomic_inc_return(&stat->disabled) != 1)
 		goto out;
 
-	/* Try to always keep another page available */
-	if (!stat->pages->next && alloc_safe)
-		stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
-
 	/*
-	 * Try to find the function again since another
-	 * task on another CPU could have added it
+	 * Try to find the function again since an NMI
+	 * could have added it
 	 */
 	rec = ftrace_find_profiled_func(stat, ip);
 	if (rec)
@@ -555,29 +578,16 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat,
 	return rec;
 }
 
-/*
- * If we are not in an interrupt, or softirq and
- * and interrupts are disabled and preemption is not enabled
- * (not in a spinlock) then it should be safe to allocate memory.
- */
-static bool ftrace_safe_to_allocate(void)
-{
-	return !in_interrupt() && irqs_disabled() && !preempt_count();
-}
-
 static void
 function_profile_call(unsigned long ip, unsigned long parent_ip)
 {
 	struct ftrace_profile_stat *stat;
 	struct ftrace_profile *rec;
 	unsigned long flags;
-	bool alloc_safe;
 
 	if (!ftrace_profile_enabled)
 		return;
 
-	alloc_safe = ftrace_safe_to_allocate();
-
 	local_irq_save(flags);
 
 	stat = &__get_cpu_var(ftrace_profile_stats);
@@ -586,7 +596,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
 
 	rec = ftrace_find_profiled_func(stat, ip);
 	if (!rec) {
-		rec = ftrace_profile_alloc(stat, ip, alloc_safe);
+		rec = ftrace_profile_alloc(stat, ip);
 		if (!rec)
 			goto out;
 	}
-- 
cgit v0.10.2


From 34886c8bc590f078d4c0b88f50d061326639198d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 25 Mar 2009 21:00:47 -0400
Subject: tracing: add average time in function to function profiler

Show the average time in the function (Time / Hit)

  Function                               Hit    Time            Avg
  --------                               ---    ----            ---
  mwait_idle                              51    140326.6 us     2751.503 us
  smp_apic_timer_interrupt                47    3517.735 us     74.845 us
  schedule                                10    2738.754 us     273.875 us
  __schedule                              10    2732.857 us     273.285 us
  hrtimer_interrupt                       47    1896.104 us     40.342 us
  irq_exit                                56    1711.833 us     30.568 us
  __run_hrtimer                           47    1315.589 us     27.991 us
  tick_sched_timer                        47    1138.690 us     24.227 us
  do_softirq                              56    1116.829 us     19.943 us
  __do_softirq                            56    1066.932 us     19.052 us
  do_IRQ                                   9    926.153 us      102.905 us

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4d90c91..c7f4a4b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -347,8 +347,10 @@ static int function_stat_cmp(void *p1, void *p2)
 static int function_stat_headers(struct seq_file *m)
 {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	seq_printf(m, "  Function                               Hit    Time\n"
-		      "  --------                               ---    ----\n");
+	seq_printf(m, "  Function                               "
+		   "Hit    Time            Avg\n"
+		      "  --------                               "
+		   "---    ----            ---\n");
 #else
 	seq_printf(m, "  Function                               Hit\n"
 		      "  --------                               ---\n");
@@ -361,12 +363,9 @@ static int function_stat_show(struct seq_file *m, void *v)
 	struct ftrace_profile *rec = v;
 	char str[KSYM_SYMBOL_LEN];
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	static struct trace_seq s;
 	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);
-	trace_seq_init(&s);
-	trace_print_graph_duration(rec->time, &s);
+	static struct trace_seq s;
+	unsigned long long avg;
 #endif
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -374,6 +373,14 @@ static int function_stat_show(struct seq_file *m, void *v)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	seq_printf(m, "    ");
+	avg = rec->time;
+	do_div(avg, rec->counter);
+
+	mutex_lock(&mutex);
+	trace_seq_init(&s);
+	trace_print_graph_duration(rec->time, &s);
+	trace_seq_puts(&s, "    ");
+	trace_print_graph_duration(avg, &s);
 	trace_print_seq(m, &s);
 	mutex_unlock(&mutex);
 #endif
-- 
cgit v0.10.2


From a8a93f3f03b7a8008d720e8d91798efe599d416c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 12 Feb 2009 13:45:34 -0800
Subject: mm: disable preemption in apply_to_pte_range

Impact: bugfix

Lazy mmu mode needs preemption disabled, so if we're apply to
init_mm (which doesn't require any pte locks), then explicitly
disable preemption.  (Do it unconditionally after checking we've
successfully done the allocation to simplify the error handling.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/mm/memory.c b/mm/memory.c
index baa999e..b80cc31 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1718,6 +1718,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
 	BUG_ON(pmd_huge(*pmd));
 
+	preempt_disable();
 	arch_enter_lazy_mmu_mode();
 
 	token = pmd_pgtable(*pmd);
@@ -1729,6 +1730,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	arch_leave_lazy_mmu_mode();
+	preempt_enable();
 
 	if (mm != &init_mm)
 		pte_unmap_unlock(pte-1, ptl);
-- 
cgit v0.10.2


From b8bcfe997e46150fedcc3f5b26b846400122fdd9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 17 Feb 2009 23:05:19 -0800
Subject: x86/paravirt: remove lazy mode in interrupts

Impact: simplification, robustness

Make paravirt_lazy_mode() always return PARAVIRT_LAZY_NONE
when in an interrupt.  This prevents interrupt code from
accidentally inheriting an outer lazy state, and instead
does everything synchronously.  Outer batched operations
are left deferred.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 63dd358..8ab250a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -282,6 +282,9 @@ void paravirt_leave_lazy_cpu(void)
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 {
+	if (in_interrupt())
+		return PARAVIRT_LAZY_NONE;
+
 	return __get_cpu_var(paravirt_lazy_mode);
 }
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b727..cfbb4a7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	if (!pmd_present(*pmd_k))
 		return NULL;
 
-	if (!pmd_present(*pmd)) {
+	if (!pmd_present(*pmd))
 		set_pmd(pmd, *pmd_k);
-		arch_flush_lazy_mmu_mode();
-	} else {
+	else
 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-	}
 
 	return pmd_k;
 }
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 00f127c..e81dfa4 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -87,7 +87,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte(kmap_pte-idx, mk_pte(page, prot));
-	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
 }
@@ -117,7 +116,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 #endif
 	}
 
-	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
 
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 04102d4..b6a61f3 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -74,7 +74,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
 	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 
-	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9c42949..9015e5e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -824,13 +824,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	vm_unmap_aliases();
 
-	/*
-	 * If we're called with lazy mmu updates enabled, the
-	 * in-memory pte state may be stale.  Flush pending updates to
-	 * bring them up to date.
-	 */
-	arch_flush_lazy_mmu_mode();
-
 	cpa.vaddr = addr;
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
@@ -873,13 +866,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	} else
 		cpa_flush_all(cache);
 
-	/*
-	 * If we've been called with lazy mmu updates enabled, then
-	 * make sure that everything gets flushed out before we
-	 * return.
-	 */
-	arch_flush_lazy_mmu_mode();
-
 out:
 	return ret;
 }
-- 
cgit v0.10.2


From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 17 Feb 2009 23:24:03 -0800
Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with
 arch_start_context_switch

Impact: simplification, prepare for later changes

Make lazy cpu mode more specific to context switching, so that
it makes sense to do more context-switch specific things in
the callbacks.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0617d5c..7b28aba 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1420,19 +1420,17 @@ void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
 void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
 
-#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-static inline void arch_enter_lazy_cpu_mode(void)
+#define  __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
 }
 
-static inline void arch_leave_lazy_cpu_mode(void)
+static inline void arch_end_context_switch(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
 }
 
-void arch_flush_lazy_cpu_mode(void);
-
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
 {
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8ab250a..5eea954 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -301,19 +301,6 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_enable();
 }
 
-void arch_flush_lazy_cpu_mode(void)
-{
-	preempt_disable();
-
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-		WARN_ON(preempt_count() == 1);
-		arch_leave_lazy_cpu_mode();
-		arch_enter_lazy_cpu_mode();
-	}
-
-	preempt_enable();
-}
-
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 14014d7..57e49a8 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch();
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index abb7e6a..7115e60 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch();
 
 	/*
 	 * Switch FS and GS.
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4..6b98f87 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1119,10 +1119,8 @@ static void drop_other_mm_ref(void *info)
 
 	/* If this cpu still has a stale cr3 reference, then make sure
 	   it has been flushed. */
-	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
+	if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
 		load_cr3(swapper_pg_dir);
-		arch_flush_lazy_cpu_mode();
-	}
 }
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1135,7 +1133,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 			load_cr3(swapper_pg_dir);
 		else
 			leave_mm(smp_processor_id());
-		arch_flush_lazy_cpu_mode();
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index e16fdb1..235e34a 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; }
 #define pgtable_cache_init()		do {} while (0)
 #define arch_enter_lazy_mmu_mode()	do {} while (0)
 #define arch_leave_lazy_mmu_mode()	do {} while (0)
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
+
+#define arch_start_context_switch()	do {} while (0)
 
 #else /* !CONFIG_MMU */
 /*****************************************************************************/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e6d0ca..922f036 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 #endif
 
 /*
- * A facility to provide batching of the reload of page tables with the
- * actual context switch code for paravirtualized guests.  By convention,
- * only one of the lazy modes (CPU, MMU) should be active at any given
- * time, entry should never be nested, and entry and exits should always
- * be paired.  This is for sanity of maintaining and reasoning about the
- * kernel code.
+ * A facility to provide batching of the reload of page tables and
+ * other process state with the actual context switch code for
+ * paravirtualized guests.  By convention, only one of the batched
+ * update (lazy) modes (CPU, MMU) should be active at any given time,
+ * entry should never be nested, and entry and exits should always be
+ * paired.  This is for sanity of maintaining and reasoning about the
+ * kernel code.  In this case, the exit (end of the context switch) is
+ * in architecture-specific code, and so doesn't need a generic
+ * definition.
  */
-#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
-#define arch_flush_lazy_cpu_mode()	do {} while (0)
+#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
+#define arch_start_context_switch()	do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/kernel/sched.c b/kernel/sched.c
index 5757e03..7530fdd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * combine the page table reload and the switch backend into
 	 * one hypercall.
 	 */
-	arch_enter_lazy_cpu_mode();
+	arch_start_context_switch();
 
 	if (unlikely(!mm)) {
 		next->active_mm = oldmm;
-- 
cgit v0.10.2


From b407fc57b815b2016186220baabc76cc8264206e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 17 Feb 2009 23:46:21 -0800
Subject: x86/paravirt: flush pending mmu updates on context switch

Impact: allow preemption during lazy mmu updates

If we're in lazy mmu mode when context switching, leave
lazy mmu mode, but remember the task's state in
TIF_LAZY_MMU_UPDATES.  When we resume the task, check this
flag and re-enter lazy mmu mode if its set.

This sets things up for allowing lazy mmu mode while preemptible,
though that won't actually be active until the next change.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 7b28aba..58d2481 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1418,7 +1418,6 @@ void paravirt_enter_lazy_cpu(void);
 void paravirt_leave_lazy_cpu(void);
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
 
 #define  __HAVE_ARCH_START_CONTEXT_SWITCH
 static inline void arch_start_context_switch(void)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index df9d5f7..2f34d64 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,6 +94,7 @@ struct thread_info {
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
 #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
+#define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -115,6 +116,7 @@ struct thread_info {
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 #define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
+#define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 478bca9..5d7f6e7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -201,7 +201,7 @@ static void kvm_leave_lazy_mmu(void)
 	struct kvm_para_state *state = kvm_para_state();
 
 	mmu_queue_flush(state);
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
+	paravirt_leave_lazy_mmu();
 	state->mode = paravirt_get_lazy_mode();
 }
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 5eea954..430a0e3 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -252,7 +252,7 @@ static inline void enter_lazy(enum paravirt_lazy_mode mode)
 	__get_cpu_var(paravirt_lazy_mode) = mode;
 }
 
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
+static void leave_lazy(enum paravirt_lazy_mode mode)
 {
 	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
 	BUG_ON(preemptible());
@@ -267,17 +267,24 @@ void paravirt_enter_lazy_mmu(void)
 
 void paravirt_leave_lazy_mmu(void)
 {
-	paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
+	leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
 void paravirt_enter_lazy_cpu(void)
 {
+	if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
+		arch_leave_lazy_mmu_mode();
+		set_thread_flag(TIF_LAZY_MMU_UPDATES);
+	}
 	enter_lazy(PARAVIRT_LAZY_CPU);
 }
 
 void paravirt_leave_lazy_cpu(void)
 {
-	paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
+	leave_lazy(PARAVIRT_LAZY_CPU);
+
+	if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES))
+		arch_enter_lazy_mmu_mode();
 }
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 2cc4a90..950929c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -473,16 +473,22 @@ static void vmi_enter_lazy_cpu(void)
 	vmi_ops.set_lazy_mode(2);
 }
 
+static void vmi_leave_lazy_cpu(void)
+{
+	vmi_ops.set_lazy_mode(0);
+	paravirt_leave_lazy_cpu();
+}
+
 static void vmi_enter_lazy_mmu(void)
 {
 	paravirt_enter_lazy_mmu();
 	vmi_ops.set_lazy_mode(1);
 }
 
-static void vmi_leave_lazy(void)
+static void vmi_leave_lazy_mmu(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	vmi_ops.set_lazy_mode(0);
+	paravirt_leave_lazy_mmu();
 }
 
 static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -718,12 +724,12 @@ static inline int __init activate_vmi(void)
 
 	para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
+	para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu,
 		  set_lazy_mode, SetLazyMode);
 
 	para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
+	para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
 		  set_lazy_mode, SetLazyMode);
 
 	/* user and kernel flush are just handled with different flags to FlushTLB */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9fe4dda..41a5562 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -147,10 +147,16 @@ static void lazy_hcall(unsigned long call,
 
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
  * issue the do-nothing hypercall to flush any stored calls. */
-static void lguest_leave_lazy_mode(void)
+static void lguest_leave_lazy_mmu_mode(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+	paravirt_leave_lazy_mmu();
+}
+
+static void lguest_leave_lazy_cpu_mode(void)
+{
+	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+	paravirt_leave_lazy_cpu();
 }
 
 /*G:033
@@ -1026,7 +1032,7 @@ __init void lguest_init(void)
 	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
 	pv_cpu_ops.wbinvd = lguest_wbinvd;
 	pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
-	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode;
 
 	/* pagetable management */
 	pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1039,7 +1045,7 @@ __init void lguest_init(void)
 	pv_mmu_ops.read_cr2 = lguest_read_cr2;
 	pv_mmu_ops.read_cr3 = lguest_read_cr3;
 	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
-	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+	pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* apic read/write intercepts */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a..f586e63 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg)
 	return HYPERVISOR_get_debugreg(reg);
 }
 
-void xen_leave_lazy(void)
+static void xen_leave_lazy_cpu(void)
 {
-	paravirt_leave_lazy(paravirt_get_lazy_mode());
 	xen_mc_flush();
+	paravirt_leave_lazy_cpu();
 }
 
 static unsigned long xen_store_tr(void)
@@ -819,7 +819,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
 	.lazy_mode = {
 		.enter = paravirt_enter_lazy_cpu,
-		.leave = xen_leave_lazy,
+		.leave = xen_leave_lazy_cpu,
 	},
 };
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6b98f87..f5f8faa 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1816,6 +1816,11 @@ __init void xen_post_allocator_init(void)
 	xen_mark_init_mm_pinned();
 }
 
+static void xen_leave_lazy_mmu(void)
+{
+	xen_mc_flush();
+	paravirt_leave_lazy_mmu();
+}
 
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pagetable_setup_start = xen_pagetable_setup_start,
@@ -1891,7 +1896,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.lazy_mode = {
 		.enter = paravirt_enter_lazy_mmu,
-		.leave = xen_leave_lazy,
+		.leave = xen_leave_lazy_mmu,
 	},
 
 	.set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef26..f897cdf 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_ident_map_ISA(void);
 void xen_reserve_top(void);
 
-void xen_leave_lazy(void);
 void xen_post_allocator_init(void);
 
 char * __init xen_memory_setup(void);
-- 
cgit v0.10.2


From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 18 Feb 2009 11:18:57 -0800
Subject: x86/paravirt: finish change from lazy cpu to context switch start/end

Impact: fix lazy context switch API

Pass the previous and next tasks into the context switch start
end calls, so that the called functions can properly access the
task state (esp in end_context_switch, in which the next task
is not yet completely current).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 58d2481..dfdee0c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
 struct tss_struct;
 struct mm_struct;
 struct desc_struct;
+struct task_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
 
 	void (*swapgs)(void);
 
-	struct pv_lazy_ops lazy_mode;
+	void (*start_context_switch)(struct task_struct *prev);
+	void (*end_context_switch)(struct task_struct *next);
 };
 
 struct pv_irq_ops {
@@ -1414,20 +1416,21 @@ enum paravirt_lazy_mode {
 };
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_enter_lazy_cpu(void);
-void paravirt_leave_lazy_cpu(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
 
 #define  __HAVE_ARCH_START_CONTEXT_SWITCH
-static inline void arch_start_context_switch(void)
+static inline void arch_start_context_switch(struct task_struct *prev)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
+	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
 }
 
-static inline void arch_end_context_switch(void)
+static inline void arch_end_context_switch(struct task_struct *next)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
+	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
 }
 
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d0812e1..24e4283 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -83,6 +83,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
 #define pte_val(x)	native_pte_val(x)
 #define __pte(x)	native_make_pte(x)
 
+#define arch_end_context_switch(prev)	do {} while(0)
+
 #endif	/* CONFIG_PARAVIRT */
 
 /*
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 430a0e3..cf14375 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -270,20 +270,20 @@ void paravirt_leave_lazy_mmu(void)
 	leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
-void paravirt_enter_lazy_cpu(void)
+void paravirt_start_context_switch(struct task_struct *prev)
 {
 	if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
 		arch_leave_lazy_mmu_mode();
-		set_thread_flag(TIF_LAZY_MMU_UPDATES);
+		set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
 	}
 	enter_lazy(PARAVIRT_LAZY_CPU);
 }
 
-void paravirt_leave_lazy_cpu(void)
+void paravirt_end_context_switch(struct task_struct *next)
 {
 	leave_lazy(PARAVIRT_LAZY_CPU);
 
-	if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES))
+	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
 		arch_enter_lazy_mmu_mode();
 }
 
@@ -399,10 +399,8 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.set_iopl_mask = native_set_iopl_mask,
 	.io_delay = native_io_delay,
 
-	.lazy_mode = {
-		.enter = paravirt_nop,
-		.leave = paravirt_nop,
-	},
+	.start_context_switch = paravirt_nop,
+	.end_context_switch = paravirt_nop,
 };
 
 struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 57e49a8..d766c76 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_end_context_switch();
+	arch_end_context_switch(next_p);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7115e60..e8a9aaf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_end_context_switch();
+	arch_end_context_switch(next_p);
 
 	/*
 	 * Switch FS and GS.
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 950929c..55a5d69 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -467,16 +467,16 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 }
 #endif
 
-static void vmi_enter_lazy_cpu(void)
+static void vmi_start_context_switch(struct task_struct *prev)
 {
-	paravirt_enter_lazy_cpu();
+	paravirt_start_context_switch(prev);
 	vmi_ops.set_lazy_mode(2);
 }
 
-static void vmi_leave_lazy_cpu(void)
+static void vmi_end_context_switch(struct task_struct *next)
 {
 	vmi_ops.set_lazy_mode(0);
-	paravirt_leave_lazy_cpu();
+	paravirt_end_context_switch(next);
 }
 
 static void vmi_enter_lazy_mmu(void)
@@ -722,9 +722,9 @@ static inline int __init activate_vmi(void)
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
 
-	para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+	para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu,
+	para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
 		  set_lazy_mode, SetLazyMode);
 
 	para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 41a5562..5287081 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -153,10 +153,10 @@ static void lguest_leave_lazy_mmu_mode(void)
 	paravirt_leave_lazy_mmu();
 }
 
-static void lguest_leave_lazy_cpu_mode(void)
+static void lguest_end_context_switch(struct task_struct *next)
 {
 	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
-	paravirt_leave_lazy_cpu();
+	paravirt_end_context_switch(next);
 }
 
 /*G:033
@@ -1031,8 +1031,8 @@ __init void lguest_init(void)
 	pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
 	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
 	pv_cpu_ops.wbinvd = lguest_wbinvd;
-	pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
-	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode;
+	pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
+	pv_cpu_ops.end_context_switch = lguest_end_context_switch;
 
 	/* pagetable management */
 	pv_mmu_ops.write_cr3 = lguest_write_cr3;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f586e63..70b355d3a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg)
 	return HYPERVISOR_get_debugreg(reg);
 }
 
-static void xen_leave_lazy_cpu(void)
+static void xen_end_context_switch(struct task_struct *next)
 {
 	xen_mc_flush();
-	paravirt_leave_lazy_cpu();
+	paravirt_end_context_switch(next);
 }
 
 static unsigned long xen_store_tr(void)
@@ -817,10 +817,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	/* Xen takes care of %gs when switching to usermode for us */
 	.swapgs = paravirt_nop,
 
-	.lazy_mode = {
-		.enter = paravirt_enter_lazy_cpu,
-		.leave = xen_leave_lazy_cpu,
-	},
+	.start_context_switch = paravirt_start_context_switch,
+	.end_context_switch = xen_end_context_switch,
 };
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 235e34a..0988704 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -74,7 +74,7 @@ static inline int pte_file(pte_t pte) { return 0; }
 #define arch_enter_lazy_mmu_mode()	do {} while (0)
 #define arch_leave_lazy_mmu_mode()	do {} while (0)
 
-#define arch_start_context_switch()	do {} while (0)
+#define arch_start_context_switch(prev)	do {} while (0)
 
 #else /* !CONFIG_MMU */
 /*****************************************************************************/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 922f036..e410f60 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -291,7 +291,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
  * definition.
  */
 #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
-#define arch_start_context_switch()	do {} while (0)
+#define arch_start_context_switch(prev)	do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/kernel/sched.c b/kernel/sched.c
index 7530fdd..133762a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * combine the page table reload and the switch backend into
 	 * one hypercall.
 	 */
-	arch_start_context_switch();
+	arch_start_context_switch(prev);
 
 	if (unlikely(!mm)) {
 		next->active_mm = oldmm;
-- 
cgit v0.10.2


From 2829b449276aed45f3d649efb21e3418e39dd5d1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 17 Feb 2009 23:53:19 -0800
Subject: x86/paravirt: allow preemption with lazy mmu mode

Impact: remove obsolete checks, simplification

Lift restrictions on preemption with lazy mmu mode, as it is now allowed.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index cf14375..bf2e86e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -247,7 +247,6 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
 static inline void enter_lazy(enum paravirt_lazy_mode mode)
 {
 	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
-	BUG_ON(preemptible());
 
 	__get_cpu_var(paravirt_lazy_mode) = mode;
 }
@@ -255,7 +254,6 @@ static inline void enter_lazy(enum paravirt_lazy_mode mode)
 static void leave_lazy(enum paravirt_lazy_mode mode)
 {
 	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
-	BUG_ON(preemptible());
 
 	__get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
 }
@@ -272,6 +270,8 @@ void paravirt_leave_lazy_mmu(void)
 
 void paravirt_start_context_switch(struct task_struct *prev)
 {
+	BUG_ON(preemptible());
+
 	if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
 		arch_leave_lazy_mmu_mode();
 		set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
@@ -281,6 +281,8 @@ void paravirt_start_context_switch(struct task_struct *prev)
 
 void paravirt_end_context_switch(struct task_struct *next)
 {
+	BUG_ON(preemptible());
+
 	leave_lazy(PARAVIRT_LAZY_CPU);
 
 	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
@@ -300,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_disable();
 
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-		WARN_ON(preempt_count() == 1);
 		arch_leave_lazy_mmu_mode();
 		arch_enter_lazy_mmu_mode();
 	}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f5f8faa..3f2d0fe 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -419,10 +419,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
-	/* updates to init_mm may be done without lock */
-	if (mm == &init_mm)
-		preempt_disable();
-
 	ADD_STATS(set_pte_at, 1);
 //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
 	ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -443,9 +439,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	}
 	xen_set_pte(ptep, pteval);
 
-out:
-	if (mm == &init_mm)
-		preempt_enable();
+out:	return;
 }
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
-- 
cgit v0.10.2


From 252a6bf2a3a7e7add56b17d48aecf3f3ef213103 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 18 Feb 2009 00:11:28 -0800
Subject: mm: allow preemption in apply_to_pte_range

Impact: allow preemption in apply_to_pte_range updates to init_mm

Preemption is now allowed for lazy mmu mode, so don't disable
it for the inner loop of apply_to_pte_range.  This only applies
when doing updates to init_mm; user pagetables are still modified
under the pte lock, so preemption is disabled anyway.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/mm/memory.c b/mm/memory.c
index b80cc31..baa999e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1718,7 +1718,6 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
 	BUG_ON(pmd_huge(*pmd));
 
-	preempt_disable();
 	arch_enter_lazy_mmu_mode();
 
 	token = pmd_pgtable(*pmd);
@@ -1730,7 +1729,6 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
 	arch_leave_lazy_mmu_mode();
-	preempt_enable();
 
 	if (mm != &init_mm)
 		pte_unmap_unlock(pte-1, ptl);
-- 
cgit v0.10.2


From ab2f75f0b760d2b0c9a875b669a1b51dce02c85a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 18 Feb 2009 00:18:50 -0800
Subject: x86/paravirt: use percpu_ rather than __get_cpu_var

Impact: minor optimisation

percpu_read/write is a slightly more direct way of getting
to percpu data.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bf2e86e..254e8aa 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -246,16 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
 
 static inline void enter_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+	BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
 
-	__get_cpu_var(paravirt_lazy_mode) = mode;
+	percpu_write(paravirt_lazy_mode, mode);
 }
 
 static void leave_lazy(enum paravirt_lazy_mode mode)
 {
-	BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
+	BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
 
-	__get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
+	percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
 }
 
 void paravirt_enter_lazy_mmu(void)
@@ -294,7 +294,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	if (in_interrupt())
 		return PARAVIRT_LAZY_NONE;
 
-	return __get_cpu_var(paravirt_lazy_mode);
+	return percpu_read(paravirt_lazy_mode);
 }
 
 void arch_flush_lazy_mmu_mode(void)
-- 
cgit v0.10.2


From 5caecb9432428241d0c641897f07ff4003f1b55f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 20 Feb 2009 23:01:26 -0800
Subject: xen: disable preempt for leave_lazy_mmu

xen_mc_flush() requires preemption to be disabled for its own sanity,
so disable it while we're flushing.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3f2d0fe..0e57238 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1812,8 +1812,10 @@ __init void xen_post_allocator_init(void)
 
 static void xen_leave_lazy_mmu(void)
 {
+	preempt_disable();
 	xen_mc_flush();
 	paravirt_leave_lazy_mmu();
+	preempt_enable();
 }
 
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
-- 
cgit v0.10.2


From 59d7187142bbe9b404a403ed0f874d3227305f26 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 26 Feb 2009 15:48:33 -0800
Subject: xen: separate p2m allocation from setting

When doing very early p2m setting, we need to separate setting
from allocation, so split things up accordingly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0e57238..e0a55b7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -233,47 +233,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(get_phys_to_machine);
 
-static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+/* install a  new p2m_top page */
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
 {
-	unsigned long *p;
+	unsigned topidx = p2m_top_index(pfn);
+	unsigned long **pfnp, *mfnp;
 	unsigned i;
 
-	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
-	BUG_ON(p == NULL);
+	pfnp = &p2m_top[topidx];
+	mfnp = &p2m_top_mfn[topidx];
 
 	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
 		p[i] = INVALID_P2M_ENTRY;
 
-	if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
-		free_page((unsigned long)p);
-	else
+	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
 		*mfnp = virt_to_mfn(p);
+		return true;
+	}
+
+	return false;
 }
 
-void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+static void alloc_p2m(unsigned long pfn)
 {
-	unsigned topidx, idx;
+	unsigned long *p;
 
-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
+	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+	BUG_ON(p == NULL);
+
+	if (!install_p2mtop_page(pfn, p))
+		free_page((unsigned long)p);
+}
+
+/* Try to install p2m mapping; fail if intermediate bits missing */
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	unsigned topidx, idx;
 
 	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
 		BUG_ON(mfn != INVALID_P2M_ENTRY);
-		return;
+		return true;
 	}
 
 	topidx = p2m_top_index(pfn);
 	if (p2m_top[topidx] == p2m_missing) {
-		/* no need to allocate a page to store an invalid entry */
 		if (mfn == INVALID_P2M_ENTRY)
-			return;
-		alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+			return true;
+		return false;
 	}
 
 	idx = p2m_index(pfn);
 	p2m_top[topidx][idx] = mfn;
+
+	return true;
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
+	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
+		alloc_p2m(pfn);
+
+		if (!__set_phys_to_machine(pfn, mfn))
+			BUG();
+	}
 }
 
 unsigned long arbitrary_virt_to_mfn(void *vaddr)
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44..da73026 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
 };
 
 
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
+
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 
-- 
cgit v0.10.2


From a2bcd4731f77cb77ae4b5e4a3d7f5471cf346c33 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 29 Mar 2009 23:47:48 +0200
Subject: x86/mm: further cleanups of fault.c's include file section

Impact: cleanup

Eliminate more than 20 unnecessary #include lines in fault.c

Also fix include file dependency bug in asm/traps.h. (this was
masked before, by implicit inclusion)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <tip-56aea8468746e673a4bf50b6a13d97b2d1cbe1e8@git.kernel.org>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0d53425..37fb07a 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_TRAPS_H
 
 #include <asm/debugreg.h>
+#include <asm/siginfo.h>			/* TRAP_TRACE, ... */
 
 #ifdef CONFIG_X86_32
 #define dotraplinkage
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b727..24a36a6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,40 +3,16 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/interrupt.h>
-#include <linux/mmiotrace.h>
-#include <linux/bootmem.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/vt_kern.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/errno.h>
-#include <linux/magic.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/tty.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm-generic/sections.h>
-
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <asm/proto.h>
-#include <asm/traps.h>
-#include <asm/desc.h>
+#include <linux/magic.h>		/* STACK_END_MAGIC		*/
+#include <linux/sched.h>		/* test_thread_flag(), ...	*/
+#include <linux/kdebug.h>		/* oops_begin/end, ...		*/
+#include <linux/module.h>		/* search_exception_table	*/
+#include <linux/bootmem.h>		/* max_low_pfn			*/
+#include <linux/kprobes.h>		/* __kprobes, ...		*/
+#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
+
+#include <asm/traps.h>			/* dotraplinkage, ...		*/
+#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 
 /*
  * Page fault error code bits:
-- 
cgit v0.10.2


From 7571a60446030d2576d881438447e86a0755a83b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 27 Feb 2009 15:34:59 -0800
Subject: xen: split construction of p2m mfn tables from registration

Build the p2m_mfn_list_list early with the rest of the p2m table, but
register it later when the real shared_info structure is in place.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e0a55b7..67d2ab4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
 }
 
 /* Build the parallel p2m_top_mfn structures */
-void xen_setup_mfn_list_list(void)
+static void __init xen_build_mfn_list_list(void)
 {
 	unsigned pfn, idx;
 
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
 		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
 		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
 	}
+}
 
+void xen_setup_mfn_list_list(void)
+{
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
 
 		p2m_top[topidx] = &mfn_list[pfn];
 	}
+
+	xen_build_mfn_list_list();
 }
 
 unsigned long get_phys_to_machine(unsigned long pfn)
-- 
cgit v0.10.2


From 6ed6bf428aff64fe37cdc54b239d598fee6016f1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 4 Mar 2009 13:02:18 -0800
Subject: xen: clean up xen_load_gdt

Makes the logic a bit clearer.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 70b355d3a..5776dc2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -301,10 +301,21 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	frames = mcs.args;
 
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
-		frames[f] = arbitrary_virt_to_mfn((void *)va);
+		int level;
+		pte_t *ptep = lookup_address(va, &level);
+		unsigned long pfn, mfn;
+		void *virt;
+
+		BUG_ON(ptep == NULL);
+
+		pfn = pte_pfn(*ptep);
+		mfn = pfn_to_mfn(pfn);
+		virt = __va(PFN_PHYS(pfn));
+
+		frames[f] = mfn;
 
 		make_lowmem_page_readonly((void *)va);
-		make_lowmem_page_readonly(mfn_to_virt(frames[f]));
+		make_lowmem_page_readonly(virt);
 	}
 
 	MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
-- 
cgit v0.10.2


From 3ce5fa7ebff74b6a4dc5fdcdc22e6979f5a4ff85 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 4 Mar 2009 15:26:00 -0800
Subject: xen: make xen_load_gdt simpler

Remove use of multicall machinery which is unused (gdt loading
is never performance critical).  This removes the implicit use
of percpu variables, which simplifies understanding how
the percpu code's use of load_gdt interacts with this code.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5776dc2..48b399b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -284,12 +284,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 
 static void xen_load_gdt(const struct desc_ptr *dtr)
 {
-	unsigned long *frames;
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
 	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned long frames[pages];
 	int f;
-	struct multicall_space mcs;
 
 	/* A GDT can be up to 64k in size, which corresponds to 8192
 	   8-byte entries, or 16 4k pages.. */
@@ -297,9 +296,6 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	BUG_ON(size > 65536);
 	BUG_ON(va & ~PAGE_MASK);
 
-	mcs = xen_mc_entry(sizeof(*frames) * pages);
-	frames = mcs.args;
-
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 		int level;
 		pte_t *ptep = lookup_address(va, &level);
@@ -314,13 +310,15 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 
 		frames[f] = mfn;
 
+		printk("xen_load_gdt: %d va=%p mfn=%lx pfn=%lx va'=%p\n",
+		       f, (void *)va, mfn, pfn, virt);
+
 		make_lowmem_page_readonly((void *)va);
 		make_lowmem_page_readonly(virt);
 	}
 
-	MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
-
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
+	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+		BUG();
 }
 
 static void load_TLS_descriptor(struct thread_struct *t,
-- 
cgit v0.10.2


From b4b7e58590d0e94ed78bd6be1aa163caba7b6c74 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 4 Mar 2009 16:34:27 -0800
Subject: xen: remove xen_load_gdt debug

Don't need the noise.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 48b399b..75b7a0f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -310,9 +310,6 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 
 		frames[f] = mfn;
 
-		printk("xen_load_gdt: %d va=%p mfn=%lx pfn=%lx va'=%p\n",
-		       f, (void *)va, mfn, pfn, virt);
-
 		make_lowmem_page_readonly((void *)va);
 		make_lowmem_page_readonly(virt);
 	}
-- 
cgit v0.10.2


From e9e2d1ffcfdb38bed11a3064aa74bea9ee38ed80 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Thu, 5 Mar 2009 20:13:57 +0100
Subject: NULL noise: arch/x86/xen/smp.c

Fix this sparse warnings:
  arch/x86/xen/smp.c:316:52: warning: Using plain integer as NULL pointer
  arch/x86/xen/smp.c:421:60: warning: Using plain integer as NULL pointer

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8d47056..304d832 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	BUG_ON(rc);
 
 	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 		barrier();
 	}
 
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 	/* Make sure other vcpus get a chance to run if they need to. */
 	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
-			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 			break;
 		}
 	}
-- 
cgit v0.10.2


From e826fe1ba1563a9272345da8e3279a930ac160a7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Sat, 7 Mar 2009 17:09:27 -0800
Subject: xen: mask XSAVE from cpuid

Xen leaves XSAVE set in cpuid, but doesn't allow cr4.OSXSAVE
to be set.  This confuses the kernel and it ends up crashing on
an xsetbv instruction.

At boot time, try to set cr4.OSXSAVE, and mask XSAVE out of
cpuid it we can't.  This will produce a spurious error from Xen,
but allows us to support XSAVE if/when Xen does.

This also factors out the cpuid mask decisions to boot time.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75b7a0f..da33e0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -168,21 +168,23 @@ static void __init xen_banner(void)
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
+static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
+static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
+
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		      unsigned int *cx, unsigned int *dx)
 {
+	unsigned maskecx = ~0;
 	unsigned maskedx = ~0;
 
 	/*
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
 	 */
-	if (*ax == 1)
-		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
-			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-			    (1 << X86_FEATURE_MCE)  |  /* disable MCE */
-			    (1 << X86_FEATURE_MCA)  |  /* disable MCA */
-			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+	if (*ax == 1) {
+		maskecx = cpuid_leaf1_ecx_mask;
+		maskedx = cpuid_leaf1_edx_mask;
+	}
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
 		: "=a" (*ax),
@@ -190,9 +192,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		  "=c" (*cx),
 		  "=d" (*dx)
 		: "0" (*ax), "2" (*cx));
+
+	*cx &= maskecx;
 	*dx &= maskedx;
 }
 
+static __init void xen_init_cpuid_mask(void)
+{
+	unsigned int ax, bx, cx, dx;
+
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
+		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+
+	if (!xen_initial_domain())
+		cpuid_leaf1_edx_mask &=
+			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
+			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
+
+	ax = 1;
+	xen_cpuid(&ax, &bx, &cx, &dx);
+
+	/* cpuid claims we support xsave; try enabling it to see what happens */
+	if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
+		unsigned long cr4;
+
+		set_in_cr4(X86_CR4_OSXSAVE);
+		
+		cr4 = read_cr4();
+
+		if ((cr4 & X86_CR4_OSXSAVE) == 0)
+			cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
+
+		clear_in_cr4(X86_CR4_OSXSAVE);
+	}
+}
+
 static void xen_set_debugreg(int reg, unsigned long val)
 {
 	HYPERVISOR_set_debugreg(reg, val);
@@ -901,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_init_irq_ops();
 
+	xen_init_cpuid_mask();
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * set up the basic apic ops.
-- 
cgit v0.10.2


From 68509cdcde6583ee1a9542899d1270449c7d5903 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Sun, 8 Mar 2009 03:59:04 -0700
Subject: x86-64: remove PGE from must-have feature list

PGE may not be available when running paravirtualized, so test the cpuid
bit before using it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c5..a4737dd 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -50,7 +50,7 @@
 #ifdef CONFIG_X86_64
 #define NEED_PSE	0
 #define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
-#define NEED_PGE	(1<<(X86_FEATURE_PGE & 31))
+#define NEED_PGE	0
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
-- 
cgit v0.10.2


From 1e7449730853e7c9ae9a2458b2ced7ba12559a0e Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Mon, 9 Feb 2009 12:05:46 -0800
Subject: Xen: Add virt_to_pfn helper function

Signed-off-by: Alex Nixon <alex.nixon@citrix.com>

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dd..018a0a4 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
+#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
+#define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 static inline unsigned long pte_mfn(pte_t pte)
-- 
cgit v0.10.2


From 5f241e65f2be4661a33e1937e1c829252a80b2b8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 16 Mar 2009 17:08:48 -0700
Subject: x86-64: non-paravirt systems always has PSE and PGE

A paravirtualized system may not have PSE or PGE available to
guests, so they are not required features.  However, without
paravirt we can assume that any x86-64 implementation will have
them available.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index a4737dd..64cf2d2 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -48,9 +48,15 @@
 #endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE	0
-#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
 #define NEED_PGE	0
+#else
+#define NEED_PSE	(1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE	(1<<(X86_FEATURE_PGE) & 31)
+#endif
+#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
-- 
cgit v0.10.2


From 4185f35404dc96f8525298c7c548aee419f3b3f4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 17 Mar 2009 13:30:55 -0700
Subject: xen/mmu: some early pagetable cleanups

1. make sure early-allocated ptes are pinned, so they can be later
   unpinned
2. don't pin pmd+pud, just make them RO
3. scatter some __inits around

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 67d2ab4..df87c80 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1013,7 +1013,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
 	return 0;
 }
 
-void __init xen_mark_init_mm_pinned(void)
+static void __init xen_mark_init_mm_pinned(void)
 {
 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
@@ -1461,6 +1461,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 }
 #endif
 
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+	struct mmuext_op op;
+	op.cmd = cmd;
+	op.arg1.mfn = pfn_to_mfn(pfn);
+	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+		BUG();
+}
+
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1469,22 +1478,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 	BUG_ON(mem_map);	/* should only be used early */
 #endif
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+	pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+}
+
+/* Used for pmd and pud */
+static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+{
+#ifdef CONFIG_FLATMEM
+	BUG_ON(mem_map);	/* should only be used early */
+#endif
+	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(unsigned long pfn)
+static __init void xen_release_pte_init(unsigned long pfn)
 {
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+static __init void xen_release_pmd_init(unsigned long pfn)
 {
-	struct mmuext_op op;
-	op.cmd = cmd;
-	op.arg1.mfn = pfn_to_mfn(pfn);
-	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-		BUG();
+	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
 /* This needs to make sure the new pte page is pinned iff its being
@@ -1873,9 +1889,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
-	.alloc_pmd = xen_alloc_pte_init,
+	.alloc_pmd = xen_alloc_pmd_init,
 	.alloc_pmd_clone = paravirt_nop,
-	.release_pmd = xen_release_pte_init,
+	.release_pmd = xen_release_pmd_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1914,8 +1930,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
 
-	.alloc_pud = xen_alloc_pte_init,
-	.release_pud = xen_release_pte_init,
+	.alloc_pud = xen_alloc_pmd_init,
+	.release_pud = xen_release_pmd_init,
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 	.activate_mm = xen_activate_mm,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f897cdf..5c50a10 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -56,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
 bool xen_vcpu_stolen(int vcpu);
 
-void xen_mark_init_mm_pinned(void);
-
 void xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP
-- 
cgit v0.10.2


From 8de07bbdede03598801cf33ab23dcbcd28a918d2 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 4 Mar 2009 17:36:57 -0800
Subject: xen/mmu: weaken flush_tlb_other test

Impact: fixes crashing bug

There's no particular problem with getting an empty cpu mask,
so just shortcut-return if we get one.

Avoids crash reported by Christophe Saout <christophe@saout.de>

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index df87c80..e425a32 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1293,8 +1293,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	} *args;
 	struct multicall_space mcs;
 
-	BUG_ON(cpumask_empty(cpus));
-	BUG_ON(!mm);
+	if (cpumask_empty(cpus))
+		return;		/* nothing to do */
 
 	mcs = xen_mc_entry(sizeof(*args));
 	args = mcs.args;
-- 
cgit v0.10.2


From 1e6fcf840e11ceff8a656a678c6e4b0560a98e08 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Wed, 25 Mar 2009 17:46:42 +0000
Subject: xen: resume interrupts before system devices.

Impact: bugfix Xen domain restore

Otherwise the first timer interrupt after resume is missed and we never
get another.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 3ccd348..b703dd2 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -68,15 +68,15 @@ static int xen_suspend(void *data)
 	gnttab_resume();
 	xen_mm_unpin_all();
 
-	sysdev_resume();
-	device_power_up(PMSG_RESUME);
-
 	if (!*cancelled) {
 		xen_irq_resume();
 		xen_console_resume();
 		xen_timer_resume();
 	}
 
+	sysdev_resume();
+	device_power_up(PMSG_RESUME);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 707ebbc81c61eb480d8a51ca61e355e240df1d32 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 27 Mar 2009 11:29:02 -0700
Subject: xen: set _PAGE_NX in __supported_pte_mask before pagetable
 construction

Some 64-bit machines don't support the NX flag in ptes.
Check for NX before constructing the kernel pagetables.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index da33e0c..80f4c53 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
+#include <asm/proto.h>
 #include <asm/msr-index.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
@@ -912,7 +913,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
 	.emergency_restart = xen_emergency_restart,
 };
 
-
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -980,6 +980,11 @@ asmlinkage void __init xen_start_kernel(void)
 	if (!xen_initial_domain())
 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
+#ifdef CONFIG_X86_64
+	/* Work out if we support NX */
+	check_efer();
+#endif
+
 	/* Don't do the full vcpu_info placement stuff until we have a
 	   possible map and a non-dummy shared_info. */
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-- 
cgit v0.10.2


From 6d02c42698f99eccb290ac53d4f10ca883b9f90c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Sun, 29 Mar 2009 22:57:15 -0700
Subject: xen: clean up gate trap/interrupt constants

Use GATE_INTERRUPT/TRAP rather than 0xe/f.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 80f4c53..12a3159 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -428,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
-	if (val->type != 0xf && val->type != 0xe)
+	if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
 		return 0;
 
 	info->vector = vector;
@@ -436,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 	info->cs = gate_segment(*val);
 	info->flags = val->dpl;
 	/* interrupt gates clear IF */
-	if (val->type == 0xe)
-		info->flags |= 4;
+	if (val->type == GATE_INTERRUPT)
+		info->flags |= 1 << 2;
 
 	return 1;
 }
-- 
cgit v0.10.2


From d4c045364d3107603187f21a56ec231e74d26441 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:20:31 -0800
Subject: xen: add irq_from_evtchn

Given an evtchn, return the corresponding irq.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af..1cd2a0e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq)
 	return info_for_irq(irq)->evtchn;
 }
 
+unsigned irq_from_evtchn(unsigned int evtchn)
+{
+	return evtchn_to_irq[evtchn];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1ad..e68d59a 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq);
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq);
 
+/* Determine the IRQ which is bound to an event channel */
+unsigned irq_from_evtchn(unsigned int evtchn);
+
 #endif	/* _XEN_EVENTS_H */
-- 
cgit v0.10.2


From f7116284c734f3a47180cd9c907944a1837ccb3c Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:21:19 -0800
Subject: xen: add /dev/xen/evtchn driver

This driver is used by application which wish to receive notifications
from the hypervisor or other guests via Xen's event channel
mechanism. In particular it is used by the xenstore daemon in domain
0.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 526187c..1bbb910 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES
 	  secure, but slightly less efficient.
 	  If in doubt, say yes.
 
+config XEN_DEV_EVTCHN
+	tristate "Xen /dev/xen/evtchn device"
+	depends on XEN
+	default y
+	help
+	  The evtchn driver allows a userspace process to triger event
+	  channels and to receive notification of an event channel
+	  firing.
+	  If in doubt, say yes.
+
 config XENFS
 	tristate "Xen filesystem"
 	depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc..1567639 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,5 @@ obj-y	+= xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
+obj-$(CONFIG_XENFS)		+= xenfs/
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644
index 0000000..517b9ee
--- /dev/null
+++ b/drivers/xen/evtchn.c
@@ -0,0 +1,494 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004-2005, K A Fraser
+ * Multi-process extensions Copyright (c) 2004, Steven Smith
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/major.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/poll.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+
+struct per_user_data {
+	/* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+	evtchn_port_t *ring;
+	unsigned int ring_cons, ring_prod, ring_overflow;
+	struct mutex ring_cons_mutex; /* protect against concurrent readers */
+
+	/* Processes wait on this queue when ring is empty. */
+	wait_queue_head_t evtchn_wait;
+	struct fasync_struct *evtchn_async_queue;
+	const char *name;
+};
+
+/* Who's bound to each port? */
+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+static DEFINE_SPINLOCK(port_user_lock);
+
+irqreturn_t evtchn_interrupt(int irq, void *data)
+{
+	unsigned int port = (unsigned long)data;
+	struct per_user_data *u;
+
+	spin_lock(&port_user_lock);
+
+	u = port_user[port];
+
+	disable_irq_nosync(irq);
+
+	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+		wmb(); /* Ensure ring contents visible */
+		if (u->ring_cons == u->ring_prod++) {
+			wake_up_interruptible(&u->evtchn_wait);
+			kill_fasync(&u->evtchn_async_queue,
+				    SIGIO, POLL_IN);
+		}
+	} else {
+		u->ring_overflow = 1;
+	}
+
+	spin_unlock(&port_user_lock);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t evtchn_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int rc;
+	unsigned int c, p, bytes1 = 0, bytes2 = 0;
+	struct per_user_data *u = file->private_data;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	if (count == 0)
+		return 0;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	for (;;) {
+		mutex_lock(&u->ring_cons_mutex);
+
+		rc = -EFBIG;
+		if (u->ring_overflow)
+			goto unlock_out;
+
+		c = u->ring_cons;
+		p = u->ring_prod;
+		if (c != p)
+			break;
+
+		mutex_unlock(&u->ring_cons_mutex);
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		rc = wait_event_interruptible(u->evtchn_wait,
+					      u->ring_cons != u->ring_prod);
+		if (rc)
+			return rc;
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+			sizeof(evtchn_port_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+	} else {
+		bytes1 = (p - c) * sizeof(evtchn_port_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if (bytes1 > count) {
+		bytes1 = count;
+		bytes2 = 0;
+	} else if ((bytes1 + bytes2) > count) {
+		bytes2 = count - bytes1;
+	}
+
+	rc = -EFAULT;
+	rmb(); /* Ensure that we see the port before we copy it. */
+	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+	    ((bytes2 != 0) &&
+	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+		goto unlock_out;
+
+	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+	rc = bytes1 + bytes2;
+
+ unlock_out:
+	mutex_unlock(&u->ring_cons_mutex);
+	return rc;
+}
+
+static ssize_t evtchn_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int rc, i;
+	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	struct per_user_data *u = file->private_data;
+
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	rc = 0;
+	if (count == 0)
+		goto out;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	rc = -EFAULT;
+	if (copy_from_user(kbuf, buf, count) != 0)
+		goto out;
+
+	spin_lock_irq(&port_user_lock);
+	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
+		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+			enable_irq(irq_from_evtchn(kbuf[i]));
+	spin_unlock_irq(&port_user_lock);
+
+	rc = count;
+
+ out:
+	free_page((unsigned long)kbuf);
+	return rc;
+}
+
+static int evtchn_bind_to_user(struct per_user_data *u, int port)
+{
+	int irq;
+	int rc = 0;
+
+	spin_lock_irq(&port_user_lock);
+
+	BUG_ON(port_user[port] != NULL);
+
+	irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+					u->name, (void *)(unsigned long)port);
+	if (rc < 0)
+		goto fail;
+
+	port_user[port] = u;
+
+fail:
+	spin_unlock_irq(&port_user_lock);
+	return rc;
+}
+
+static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+{
+	int irq = irq_from_evtchn(port);
+
+	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+	port_user[port] = NULL;
+}
+
+static long evtchn_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	int rc;
+	struct per_user_data *u = file->private_data;
+	void __user *uarg = (void __user *) arg;
+
+	switch (cmd) {
+	case IOCTL_EVTCHN_BIND_VIRQ: {
+		struct ioctl_evtchn_bind_virq bind;
+		struct evtchn_bind_virq bind_virq;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_virq.virq = bind.virq;
+		bind_virq.vcpu = 0;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						 &bind_virq);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_virq.port);
+		if (rc == 0)
+			rc = bind_virq.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
+		struct ioctl_evtchn_bind_interdomain bind;
+		struct evtchn_bind_interdomain bind_interdomain;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_interdomain.remote_dom  = bind.remote_domain;
+		bind_interdomain.remote_port = bind.remote_port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+						 &bind_interdomain);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+		if (rc == 0)
+			rc = bind_interdomain.local_port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
+		struct ioctl_evtchn_bind_unbound_port bind;
+		struct evtchn_alloc_unbound alloc_unbound;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		alloc_unbound.dom        = DOMID_SELF;
+		alloc_unbound.remote_dom = bind.remote_domain;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+						 &alloc_unbound);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, alloc_unbound.port);
+		if (rc == 0)
+			rc = alloc_unbound.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_UNBIND: {
+		struct ioctl_evtchn_unbind unbind;
+
+		rc = -EFAULT;
+		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
+			break;
+
+		rc = -EINVAL;
+		if (unbind.port >= NR_EVENT_CHANNELS)
+			break;
+
+		spin_lock_irq(&port_user_lock);
+
+		rc = -ENOTCONN;
+		if (port_user[unbind.port] != u) {
+			spin_unlock_irq(&port_user_lock);
+			break;
+		}
+
+		evtchn_unbind_from_user(u, unbind.port);
+
+		spin_unlock_irq(&port_user_lock);
+
+		rc = 0;
+		break;
+	}
+
+	case IOCTL_EVTCHN_NOTIFY: {
+		struct ioctl_evtchn_notify notify;
+
+		rc = -EFAULT;
+		if (copy_from_user(&notify, uarg, sizeof(notify)))
+			break;
+
+		if (notify.port >= NR_EVENT_CHANNELS) {
+			rc = -EINVAL;
+		} else if (port_user[notify.port] != u) {
+			rc = -ENOTCONN;
+		} else {
+			notify_remote_via_evtchn(notify.port);
+			rc = 0;
+		}
+		break;
+	}
+
+	case IOCTL_EVTCHN_RESET: {
+		/* Initialise the ring to empty. Clear errors. */
+		mutex_lock(&u->ring_cons_mutex);
+		spin_lock_irq(&port_user_lock);
+		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+		spin_unlock_irq(&port_user_lock);
+		mutex_unlock(&u->ring_cons_mutex);
+		rc = 0;
+		break;
+	}
+
+	default:
+		rc = -ENOSYS;
+		break;
+	}
+
+	return rc;
+}
+
+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask = POLLOUT | POLLWRNORM;
+	struct per_user_data *u = file->private_data;
+
+	poll_wait(file, &u->evtchn_wait, wait);
+	if (u->ring_cons != u->ring_prod)
+		mask |= POLLIN | POLLRDNORM;
+	if (u->ring_overflow)
+		mask = POLLERR;
+	return mask;
+}
+
+static int evtchn_fasync(int fd, struct file *filp, int on)
+{
+	struct per_user_data *u = filp->private_data;
+	return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+}
+
+static int evtchn_open(struct inode *inode, struct file *filp)
+{
+	struct per_user_data *u;
+
+	u = kzalloc(sizeof(*u), GFP_KERNEL);
+	if (u == NULL)
+		return -ENOMEM;
+
+	u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+	if (u->name == NULL) {
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	init_waitqueue_head(&u->evtchn_wait);
+
+	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	if (u->ring == NULL) {
+		kfree(u->name);
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	mutex_init(&u->ring_cons_mutex);
+
+	filp->private_data = u;
+
+	return 0;
+}
+
+static int evtchn_release(struct inode *inode, struct file *filp)
+{
+	int i;
+	struct per_user_data *u = filp->private_data;
+
+	spin_lock_irq(&port_user_lock);
+
+	free_page((unsigned long)u->ring);
+
+	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (port_user[i] != u)
+			continue;
+
+		evtchn_unbind_from_user(port_user[i], i);
+	}
+
+	spin_unlock_irq(&port_user_lock);
+
+	kfree(u->name);
+	kfree(u);
+
+	return 0;
+}
+
+static const struct file_operations evtchn_fops = {
+	.owner   = THIS_MODULE,
+	.read    = evtchn_read,
+	.write   = evtchn_write,
+	.unlocked_ioctl = evtchn_ioctl,
+	.poll    = evtchn_poll,
+	.fasync  = evtchn_fasync,
+	.open    = evtchn_open,
+	.release = evtchn_release,
+};
+
+static struct miscdevice evtchn_miscdev = {
+	.minor        = MISC_DYNAMIC_MINOR,
+	.name         = "evtchn",
+	.fops         = &evtchn_fops,
+};
+static int __init evtchn_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	spin_lock_init(&port_user_lock);
+	memset(port_user, 0, sizeof(port_user));
+
+	/* Create '/dev/misc/evtchn'. */
+	err = misc_register(&evtchn_miscdev);
+	if (err != 0) {
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	printk(KERN_INFO "Event-channel device installed.\n");
+
+	return 0;
+}
+
+static void __exit evtchn_cleanup(void)
+{
+	misc_deregister(&evtchn_miscdev);
+}
+
+module_init(evtchn_init);
+module_exit(evtchn_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644
index 0000000..14e833e
--- /dev/null
+++ b/include/xen/evtchn.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/evtchn.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_EVTCHN_H__
+#define __LINUX_PUBLIC_EVTCHN_H__
+
+/*
+ * Bind a fresh port to VIRQ @virq.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_VIRQ				\
+	_IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
+struct ioctl_evtchn_bind_virq {
+	unsigned int virq;
+};
+
+/*
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\
+	_IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
+struct ioctl_evtchn_bind_interdomain {
+	unsigned int remote_domain, remote_port;
+};
+
+/*
+ * Allocate a fresh port for binding to @remote_domain.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\
+	_IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
+struct ioctl_evtchn_bind_unbound_port {
+	unsigned int remote_domain;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_UNBIND				\
+	_IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
+struct ioctl_evtchn_unbind {
+	unsigned int port;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY				\
+	_IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
+struct ioctl_evtchn_notify {
+	unsigned int port;
+};
+
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET				\
+	_IOC(_IOC_NONE, 'E', 5, 0)
+
+#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
-- 
cgit v0.10.2


From c5cfef0f79cacc3aa438fc28f4747f0d10c54d0d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:21:19 -0800
Subject: xen: export ioctl headers to userspace

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/include/Kbuild b/include/Kbuild
index d8c3e3c..fe36acc 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -8,3 +8,4 @@ header-y += mtd/
 header-y += rdma/
 header-y += video/
 header-y += drm/
+header-y += xen/
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644
index 0000000..4e65c16
--- /dev/null
+++ b/include/xen/Kbuild
@@ -0,0 +1 @@
+header-y += evtchn.h
-- 
cgit v0.10.2


From 0a4666b539a0e896ec4e8396a034a479e3573125 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 12 Feb 2009 13:03:24 -0800
Subject: xen/dev-evtchn: clean up locking in evtchn

Define a new per_user_data mutex to serialize bind/unbind operations
to prevent them from racing with each other.  Fix error returns
and don't do a bind while holding a spinlock.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 517b9ee..af03195 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -54,6 +54,8 @@
 #include <asm/xen/hypervisor.h>
 
 struct per_user_data {
+	struct mutex bind_mutex; /* serialize bind/unbind operations */
+
 	/* Notification ring, accessed via /dev/xen/evtchn. */
 #define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
 #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
@@ -69,7 +71,7 @@ struct per_user_data {
 
 /* Who's bound to each port? */
 static struct per_user_data *port_user[NR_EVENT_CHANNELS];
-static DEFINE_SPINLOCK(port_user_lock);
+static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
 
 irqreturn_t evtchn_interrupt(int irq, void *data)
 {
@@ -210,22 +212,24 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 
 static int evtchn_bind_to_user(struct per_user_data *u, int port)
 {
-	int irq;
 	int rc = 0;
 
-	spin_lock_irq(&port_user_lock);
-
+	/*
+	 * Ports are never reused, so every caller should pass in a
+	 * unique port.
+	 *
+	 * (Locking not necessary because we haven't registered the
+	 * interrupt handler yet, and our caller has already
+	 * serialized bind operations.)
+	 */
 	BUG_ON(port_user[port] != NULL);
-
-	irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
-					u->name, (void *)(unsigned long)port);
-	if (rc < 0)
-		goto fail;
-
 	port_user[port] = u;
 
-fail:
-	spin_unlock_irq(&port_user_lock);
+	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+				       u->name, (void *)(unsigned long)port);
+	if (rc >= 0)
+		rc = 0;
+
 	return rc;
 }
 
@@ -234,6 +238,10 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port)
 	int irq = irq_from_evtchn(port);
 
 	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+
+	/* make sure we unbind the irq handler before clearing the port */
+	barrier();
+
 	port_user[port] = NULL;
 }
 
@@ -244,6 +252,9 @@ static long evtchn_ioctl(struct file *file,
 	struct per_user_data *u = file->private_data;
 	void __user *uarg = (void __user *) arg;
 
+	/* Prevent bind from racing with unbind */
+	mutex_lock(&u->bind_mutex);
+
 	switch (cmd) {
 	case IOCTL_EVTCHN_BIND_VIRQ: {
 		struct ioctl_evtchn_bind_virq bind;
@@ -368,6 +379,7 @@ static long evtchn_ioctl(struct file *file,
 		rc = -ENOSYS;
 		break;
 	}
+	mutex_unlock(&u->bind_mutex);
 
 	return rc;
 }
@@ -414,6 +426,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 		return -ENOMEM;
 	}
 
+	mutex_init(&u->bind_mutex);
 	mutex_init(&u->ring_cons_mutex);
 
 	filp->private_data = u;
-- 
cgit v0.10.2


From a1ce1be578365a4da7e7d7db4812539d2d5da763 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:50 -0800
Subject: xen: remove suspend_cancel hook

Remove suspend_cancel hook from xenbus_driver, in preparation for using
the device model for suspending.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf..bd20361 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -689,27 +689,6 @@ static int suspend_dev(struct device *dev, void *data)
 	return 0;
 }
 
-static int suspend_cancel_dev(struct device *dev, void *data)
-{
-	int err = 0;
-	struct xenbus_driver *drv;
-	struct xenbus_device *xdev;
-
-	DPRINTK("");
-
-	if (dev->driver == NULL)
-		return 0;
-	drv = to_xenbus_driver(dev->driver);
-	xdev = container_of(dev, struct xenbus_device, dev);
-	if (drv->suspend_cancel)
-		err = drv->suspend_cancel(xdev);
-	if (err)
-		printk(KERN_WARNING
-		       "xenbus: suspend_cancel %s failed: %i\n",
-		       dev_name(dev), err);
-	return 0;
-}
-
 static int resume_dev(struct device *dev, void *data)
 {
 	int err;
@@ -777,8 +756,6 @@ EXPORT_SYMBOL_GPL(xenbus_resume);
 void xenbus_suspend_cancel(void)
 {
 	xs_suspend_cancel();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
-	xenbus_backend_resume(suspend_cancel_dev);
 }
 EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
 
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index f87f961..0836772 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -92,7 +92,6 @@ struct xenbus_driver {
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
 	int (*suspend)(struct xenbus_device *dev);
-	int (*suspend_cancel)(struct xenbus_device *dev);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
-- 
cgit v0.10.2


From de5b31bd47de7e6f41be2e271318dbc8f1af354d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:50 -0800
Subject: xen: use device model for suspending xenbus devices

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index b703dd2..5269bb4 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -104,9 +104,8 @@ static void do_suspend(void)
 		goto out;
 	}
 
-	printk("suspending xenbus...\n");
-	/* XXX use normal device tree? */
-	xenbus_suspend();
+	printk(KERN_DEBUG "suspending xenstore...\n");
+	xs_suspend();
 
 	err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
 	if (err) {
@@ -116,9 +115,9 @@ static void do_suspend(void)
 
 	if (!cancelled) {
 		xen_arch_resume();
-		xenbus_resume();
+		xs_resume();
 	} else
-		xenbus_suspend_cancel();
+		xs_suspend_cancel();
 
 	device_resume(PMSG_RESUME);
 
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd20361..4649213 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name);
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = {
 		.remove    = xenbus_dev_remove,
 		.shutdown  = xenbus_dev_shutdown,
 		.dev_attrs = xenbus_dev_attrs,
+
+		.suspend   = xenbus_dev_suspend,
+		.resume    = xenbus_dev_resume,
 	},
 };
 
@@ -669,7 +675,7 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };
 
-static int suspend_dev(struct device *dev, void *data)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -682,14 +688,14 @@ static int suspend_dev(struct device *dev, void *data)
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
-		err = drv->suspend(xdev);
+		err = drv->suspend(xdev, state);
 	if (err)
 		printk(KERN_WARNING
 		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
 	return 0;
 }
 
-static int resume_dev(struct device *dev, void *data)
+static int xenbus_dev_resume(struct device *dev)
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -734,31 +740,6 @@ static int resume_dev(struct device *dev, void *data)
 	return 0;
 }
 
-void xenbus_suspend(void)
-{
-	DPRINTK("");
-
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
-	xenbus_backend_suspend(suspend_dev);
-	xs_suspend();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend);
-
-void xenbus_resume(void)
-{
-	xb_init_comms();
-	xs_resume();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
-	xenbus_backend_resume(resume_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_resume);
-
-void xenbus_suspend_cancel(void)
-{
-	xs_suspend_cancel();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
-
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
 int xenstored_ready = 0;
 
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e325eab..eab33f1 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -673,6 +673,8 @@ void xs_resume(void)
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
+	xb_init_comms();
+
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	up_write(&xs_state.transaction_mutex);
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 0836772..b9763ba 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -91,7 +91,7 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
-	int (*suspend)(struct xenbus_device *dev);
+	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
-- 
cgit v0.10.2


From c6a960ce8858f20036cc3afc3b9422670d0d9021 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 9 Feb 2009 12:05:53 -0800
Subject: xen/xenbus: export xenbus_dev_changed

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4649213..d42e25d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -660,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
 
 	kfree(root);
 }
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
 
 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
-- 
cgit v0.10.2


From cff7e81b3dd7c25cd2248cd7a04c5764552d5d55 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 10 Mar 2009 14:39:59 -0700
Subject: xen: add /sys/hypervisor support

Adds support for Xen info under /sys/hypervisor.  Taken from Novell 2.6.27
backport tree.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 526187c..88bca1c 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -41,3 +41,13 @@ config XEN_COMPAT_XENFS
          a xen platform.
          If in doubt, say yes.
 
+config XEN_SYS_HYPERVISOR
+       bool "Create xen entries under /sys/hypervisor"
+       depends on XEN && SYSFS
+       select SYS_HYPERVISOR
+       default y
+       help
+         Create entries under /sys/hypervisor describing the Xen
+	 hypervisor environment.  When running native or in another
+	 virtual environment, /sys/hypervisor will still be present,
+	 but will have no xen contents.
\ No newline at end of file
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc..f3603a3 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,5 @@ obj-y	+= xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XENFS)		+= xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644
index 0000000..cb29d1c
--- /dev/null
+++ b/drivers/xen/sys-hypervisor.c
@@ -0,0 +1,475 @@
+/*
+ *  copyright (c) 2006 IBM Corporation
+ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xenbus.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
+
+#define HYPERVISOR_ATTR_RO(_name) \
+static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
+
+#define HYPERVISOR_ATTR_RW(_name) \
+static struct hyp_sysfs_attr _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+struct hyp_sysfs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct hyp_sysfs_attr *, char *);
+	ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
+	void *hyp_attr_data;
+};
+
+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return sprintf(buffer, "xen\n");
+}
+
+HYPERVISOR_ATTR_RO(type);
+
+static int __init xen_sysfs_type_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
+}
+
+static void xen_sysfs_type_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
+}
+
+/* xen version attributes */
+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version >> 16);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(major);
+
+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version & 0xff);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(minor);
+
+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *extra;
+
+	extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
+	if (extra) {
+		ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", extra);
+		kfree(extra);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(extra);
+
+static struct attribute *version_attrs[] = {
+	&major_attr.attr,
+	&minor_attr.attr,
+	&extra_attr.attr,
+	NULL
+};
+
+static struct attribute_group version_group = {
+	.name = "version",
+	.attrs = version_attrs,
+};
+
+static int __init xen_sysfs_version_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &version_group);
+}
+
+static void xen_sysfs_version_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &version_group);
+}
+
+/* UUID */
+
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	char *vm, *val;
+	int ret;
+	extern int xenstored_ready;
+
+	if (!xenstored_ready)
+		return -EBUSY;
+
+	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+	val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
+	kfree(vm);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+	ret = sprintf(buffer, "%s\n", val);
+	kfree(val);
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(uuid);
+
+static int __init xen_sysfs_uuid_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+static void xen_sysfs_uuid_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+/* xen compilation attributes */
+
+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compiler);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiler);
+
+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_by);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiled_by);
+
+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_date);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compile_date);
+
+static struct attribute *xen_compile_attrs[] = {
+	&compiler_attr.attr,
+	&compiled_by_attr.attr,
+	&compile_date_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_compilation_group = {
+	.name = "compilation",
+	.attrs = xen_compile_attrs,
+};
+
+int __init static xen_compilation_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+static void xen_compilation_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+/* xen properties info */
+
+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *caps;
+
+	caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
+	if (caps) {
+		ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", caps);
+		kfree(caps);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(capabilities);
+
+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *cset;
+
+	cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
+	if (cset) {
+		ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", cset);
+		kfree(cset);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(changeset);
+
+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_platform_parameters *parms;
+
+	parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
+	if (parms) {
+		ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
+					     parms);
+		if (!ret)
+			ret = sprintf(buffer, "%lx\n", parms->virt_start);
+		kfree(parms);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(virtual_start);
+
+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret;
+
+	ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
+	if (ret > 0)
+		ret = sprintf(buffer, "%x\n", ret);
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(pagesize);
+
+/* eventually there will be several more features to export */
+static ssize_t xen_feature_show(int index, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_feature_info *info;
+
+	info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
+	if (info) {
+		info->submap_idx = index;
+		ret = HYPERVISOR_xen_version(XENVER_get_features, info);
+		if (!ret)
+			ret = sprintf(buffer, "%d\n", info->submap);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return xen_feature_show(XENFEAT_writable_page_tables, buffer);
+}
+
+HYPERVISOR_ATTR_RO(writable_pt);
+
+static struct attribute *xen_properties_attrs[] = {
+	&capabilities_attr.attr,
+	&changeset_attr.attr,
+	&virtual_start_attr.attr,
+	&pagesize_attr.attr,
+	&writable_pt_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_properties_group = {
+	.name = "properties",
+	.attrs = xen_properties_attrs,
+};
+
+static int __init xen_properties_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static void xen_properties_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
+}
+
+#ifdef CONFIG_KEXEC
+
+extern size_t vmcoreinfo_size_xen;
+extern unsigned long paddr_vmcoreinfo_xen;
+
+static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
+{
+	return sprintf(page, "%lx %zx\n",
+		paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
+}
+
+HYPERVISOR_ATTR_RO(vmcoreinfo);
+
+static int __init xen_sysfs_vmcoreinfo_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj,
+				 &vmcoreinfo_attr.attr);
+}
+
+static void xen_sysfs_vmcoreinfo_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
+}
+
+#endif
+
+static int __init hyper_sysfs_init(void)
+{
+	int ret;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	ret = xen_sysfs_type_init();
+	if (ret)
+		goto out;
+	ret = xen_sysfs_version_init();
+	if (ret)
+		goto version_out;
+	ret = xen_compilation_init();
+	if (ret)
+		goto comp_out;
+	ret = xen_sysfs_uuid_init();
+	if (ret)
+		goto uuid_out;
+	ret = xen_properties_init();
+	if (ret)
+		goto prop_out;
+#ifdef CONFIG_KEXEC
+	if (vmcoreinfo_size_xen != 0) {
+		ret = xen_sysfs_vmcoreinfo_init();
+		if (ret)
+			goto vmcoreinfo_out;
+	}
+#endif
+
+	goto out;
+
+#ifdef CONFIG_KEXEC
+vmcoreinfo_out:
+#endif
+	xen_properties_destroy();
+prop_out:
+	xen_sysfs_uuid_destroy();
+uuid_out:
+	xen_compilation_destroy();
+comp_out:
+	xen_sysfs_version_destroy();
+version_out:
+	xen_sysfs_type_destroy();
+out:
+	return ret;
+}
+
+static void __exit hyper_sysfs_exit(void)
+{
+#ifdef CONFIG_KEXEC
+	if (vmcoreinfo_size_xen != 0)
+		xen_sysfs_vmcoreinfo_destroy();
+#endif
+	xen_properties_destroy();
+	xen_compilation_destroy();
+	xen_sysfs_uuid_destroy();
+	xen_sysfs_version_destroy();
+	xen_sysfs_type_destroy();
+
+}
+module_init(hyper_sysfs_init);
+module_exit(hyper_sysfs_exit);
+
+static ssize_t hyp_sysfs_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buffer)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->show)
+		return hyp_attr->show(hyp_attr, buffer);
+	return 0;
+}
+
+static ssize_t hyp_sysfs_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buffer,
+			       size_t len)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->store)
+		return hyp_attr->store(hyp_attr, buffer, len);
+	return 0;
+}
+
+static struct sysfs_ops hyp_sysfs_ops = {
+	.show = hyp_sysfs_show,
+	.store = hyp_sysfs_store,
+};
+
+static struct kobj_type hyp_sysfs_kobj_type = {
+	.sysfs_ops = &hyp_sysfs_ops,
+};
+
+static int __init hypervisor_subsys_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
+	return 0;
+}
+device_initcall(hypervisor_subsys_init);
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index 453235e..e8b6519 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -57,4 +57,7 @@ struct xen_feature_info {
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
 
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
-- 
cgit v0.10.2


From a649b720614d5675dc402bef75a92576143fede7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 10 Mar 2009 17:17:41 -0700
Subject: xen/sys/hypervisor: change writable_pt to features

/sys/hypervisor/properties/writable_pt was misnamed.  Rename to features,
expressed as a bit array in hex.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index cb29d1c..1267d6f 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -293,37 +293,48 @@ static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
 
 HYPERVISOR_ATTR_RO(pagesize);
 
-/* eventually there will be several more features to export */
 static ssize_t xen_feature_show(int index, char *buffer)
 {
-	int ret = -ENOMEM;
-	struct xen_feature_info *info;
+	ssize_t ret;
+	struct xen_feature_info info;
 
-	info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
-	if (info) {
-		info->submap_idx = index;
-		ret = HYPERVISOR_xen_version(XENVER_get_features, info);
-		if (!ret)
-			ret = sprintf(buffer, "%d\n", info->submap);
-		kfree(info);
-	}
+	info.submap_idx = index;
+	ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
+	if (!ret)
+		ret = sprintf(buffer, "%08x", info.submap);
 
 	return ret;
 }
 
-static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
+static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
 {
-	return xen_feature_show(XENFEAT_writable_page_tables, buffer);
+	ssize_t len;
+	int i;
+
+	len = 0;
+	for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
+		int ret = xen_feature_show(i, buffer + len);
+		if (ret < 0) {
+			if (len == 0)
+				len = ret;
+			break;
+		}
+		len += ret;
+	}
+	if (len > 0)
+		buffer[len++] = '\n';
+
+	return len;
 }
 
-HYPERVISOR_ATTR_RO(writable_pt);
+HYPERVISOR_ATTR_RO(features);
 
 static struct attribute *xen_properties_attrs[] = {
 	&capabilities_attr.attr,
 	&changeset_attr.attr,
 	&virtual_start_attr.attr,
 	&pagesize_attr.attr,
-	&writable_pt_attr.attr,
+	&features_attr.attr,
 	NULL
 };
 
-- 
cgit v0.10.2


From f0783708bf63a2827863cf2be57c08a24843e6bd Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Wed, 11 Mar 2009 10:19:54 +0000
Subject: xen: drop kexec bits from /sys/hypervisor since kexec isn't
 implemented yet

I needed this to compile since there is no kexec yet in pvops kernel
  CC      drivers/xen/sys-hypervisor.o
drivers/xen/sys-hypervisor.c: In function 'hyper_sysfs_init':
drivers/xen/sys-hypervisor.c:405: error: 'vmcoreinfo_size_xen' undeclared (first use in this function)
drivers/xen/sys-hypervisor.c:405: error: (Each undeclared identifier is reported only once
drivers/xen/sys-hypervisor.c:405: error: for each function it appears in.)
drivers/xen/sys-hypervisor.c:406: error: implicit declaration of function 'xen_sysfs_vmcoreinfo_init'
drivers/xen/sys-hypervisor.c: In function 'hyper_sysfs_exit':
drivers/xen/sys-hypervisor.c:433: error: 'vmcoreinfo_size_xen' undeclared (first use in this function)
drivers/xen/sys-hypervisor.c:434: error: implicit declaration of function 'xen_sysfs_vmcoreinfo_destroy'

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>

diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index 1267d6f..88a60e0 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -353,32 +353,6 @@ static void xen_properties_destroy(void)
 	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
 }
 
-#ifdef CONFIG_KEXEC
-
-extern size_t vmcoreinfo_size_xen;
-extern unsigned long paddr_vmcoreinfo_xen;
-
-static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
-{
-	return sprintf(page, "%lx %zx\n",
-		paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
-}
-
-HYPERVISOR_ATTR_RO(vmcoreinfo);
-
-static int __init xen_sysfs_vmcoreinfo_init(void)
-{
-	return sysfs_create_file(hypervisor_kobj,
-				 &vmcoreinfo_attr.attr);
-}
-
-static void xen_sysfs_vmcoreinfo_destroy(void)
-{
-	sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
-}
-
-#endif
-
 static int __init hyper_sysfs_init(void)
 {
 	int ret;
@@ -401,20 +375,9 @@ static int __init hyper_sysfs_init(void)
 	ret = xen_properties_init();
 	if (ret)
 		goto prop_out;
-#ifdef CONFIG_KEXEC
-	if (vmcoreinfo_size_xen != 0) {
-		ret = xen_sysfs_vmcoreinfo_init();
-		if (ret)
-			goto vmcoreinfo_out;
-	}
-#endif
 
 	goto out;
 
-#ifdef CONFIG_KEXEC
-vmcoreinfo_out:
-#endif
-	xen_properties_destroy();
 prop_out:
 	xen_sysfs_uuid_destroy();
 uuid_out:
@@ -429,10 +392,6 @@ out:
 
 static void __exit hyper_sysfs_exit(void)
 {
-#ifdef CONFIG_KEXEC
-	if (vmcoreinfo_size_xen != 0)
-		xen_sysfs_vmcoreinfo_destroy();
-#endif
 	xen_properties_destroy();
 	xen_compilation_destroy();
 	xen_sysfs_uuid_destroy();
-- 
cgit v0.10.2


From 818fd20673df82031e604bb784d836f1fc2e2451 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 6 Feb 2009 18:46:47 -0800
Subject: xen: add "capabilities" file

The xenfs capabilities file allows usermode to determine what
capabilities the domain has.  The only one at present is "control_d"
in a privileged domain.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 515741a..6559e0c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -20,10 +20,27 @@
 MODULE_DESCRIPTION("Xen filesystem");
 MODULE_LICENSE("GPL");
 
+static ssize_t capabilities_read(struct file *file, char __user *buf,
+				 size_t size, loff_t *off)
+{
+	char *tmp = "";
+
+	if (xen_initial_domain())
+		tmp = "control_d\n";
+
+	return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
+}
+
+static const struct file_operations capabilities_file_ops = {
+	.read = capabilities_read,
+};
+
 static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr xenfs_files[] = {
-		[2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
+		[1] = {},
+		{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
+		{ "capabilities", &capabilities_file_ops, S_IRUGO },
 		{""},
 	};
 
-- 
cgit v0.10.2


From 8a6f83afd0c5355db6d11394a798e94950306239 Mon Sep 17 00:00:00 2001
From: KaiGai Kohei <kaigai@ak.jp.nec.com>
Date: Wed, 1 Apr 2009 10:07:57 +0900
Subject: Permissive domain in userspace object manager

This patch enables applications to handle permissive domain correctly.

Since the v2.6.26 kernel, SELinux has supported an idea of permissive
domain which allows certain processes to work as if permissive mode,
even if the global setting is enforcing mode.
However, we don't have an application program interface to inform
what domains are permissive one, and what domains are not.
It means applications focuses on SELinux (XACE/SELinux, SE-PostgreSQL
and so on) cannot handle permissive domain correctly.

This patch add the sixth field (flags) on the reply of the /selinux/access
interface which is used to make an access control decision from userspace.
If the first bit of the flags field is positive, it means the required
access control decision is on permissive domain, so application should
allow any required actions, as the kernel doing.

This patch also has a side benefit. The av_decision.flags is set at
context_struct_compute_av(). It enables to check required permissions
without read_lock(&policy_rwlock).

Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Eric Paris <eparis@redhat.com>
--
 security/selinux/avc.c              |    2 +-
 security/selinux/include/security.h |    4 +++-
 security/selinux/selinuxfs.c        |    4 ++--
 security/selinux/ss/services.c      |   30 +++++-------------------------
 4 files changed, 11 insertions(+), 29 deletions(-)
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 7f9b5fa..b2ab608 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -927,7 +927,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 	if (denied) {
 		if (flags & AVC_STRICT)
 			rc = -EACCES;
-		else if (!selinux_enforcing || security_permissive_sid(ssid))
+		else if (!selinux_enforcing || (avd->flags & AVD_FLAGS_PERMISSIVE))
 			avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
 					tsid, tclass, avd->seqno);
 		else
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 5c3434f..a7be3f0 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -91,9 +91,11 @@ struct av_decision {
 	u32 auditallow;
 	u32 auditdeny;
 	u32 seqno;
+	u32 flags;
 };
 
-int security_permissive_sid(u32 sid);
+/* definitions of av_decision.flags */
+#define AVD_FLAGS_PERMISSIVE	0x0001
 
 int security_compute_av(u32 ssid, u32 tsid,
 	u16 tclass, u32 requested,
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 2d5136e..8d4007f 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -527,10 +527,10 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size)
 		goto out2;
 
 	length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT,
-			  "%x %x %x %x %u",
+			  "%x %x %x %x %u %x",
 			  avd.allowed, 0xffffffff,
 			  avd.auditallow, avd.auditdeny,
-			  avd.seqno);
+			  avd.seqno, avd.flags);
 out2:
 	kfree(tcon);
 out:
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index deeec6c..500e6f7 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -410,6 +410,7 @@ static int context_struct_compute_av(struct context *scontext,
 	avd->auditallow = 0;
 	avd->auditdeny = 0xffffffff;
 	avd->seqno = latest_granting;
+	avd->flags = 0;
 
 	/*
 	 * Check for all the invalid cases.
@@ -528,31 +529,6 @@ inval_class:
 	return 0;
 }
 
-/*
- * Given a sid find if the type has the permissive flag set
- */
-int security_permissive_sid(u32 sid)
-{
-	struct context *context;
-	u32 type;
-	int rc;
-
-	read_lock(&policy_rwlock);
-
-	context = sidtab_search(&sidtab, sid);
-	BUG_ON(!context);
-
-	type = context->type;
-	/*
-	 * we are intentionally using type here, not type-1, the 0th bit may
-	 * someday indicate that we are globally setting permissive in policy.
-	 */
-	rc = ebitmap_get_bit(&policydb.permissive_map, type);
-
-	read_unlock(&policy_rwlock);
-	return rc;
-}
-
 static int security_validtrans_handle_fail(struct context *ocontext,
 					   struct context *ncontext,
 					   struct context *tcontext,
@@ -767,6 +743,10 @@ int security_compute_av(u32 ssid,
 
 	rc = context_struct_compute_av(scontext, tcontext, tclass,
 				       requested, avd);
+
+	/* permissive domain? */
+	if (ebitmap_get_bit(&policydb.permissive_map, scontext->type))
+	    avd->flags |= AVD_FLAGS_PERMISSIVE;
 out:
 	read_unlock(&policy_rwlock);
 	return rc;
-- 
cgit v0.10.2


From 53152f957d4a5dfd537d17c823afeb1a2c03753e Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Thu, 2 Apr 2009 13:24:28 +0100
Subject: xen: honour VCPU availability on boot

If a VM is booted with offline VCPUs then unplug them during boot. Determining
the availability of a VCPU requires access to XenStore which is not available
at the point smp_prepare_cpus() is called, therefore we bring up all VCPUS
initially and unplug the offline ones as soon as XenStore becomes available.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>

diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 974f56d..411cb1f 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu)
 	cpu_clear(cpu, cpu_present_map);
 }
 
-static void vcpu_hotplug(unsigned int cpu)
+static int vcpu_online(unsigned int cpu)
 {
 	int err;
 	char dir[32], state[32];
 
-	if (!cpu_possible(cpu))
-		return;
-
 	sprintf(dir, "cpu/%u", cpu);
 	err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
 	if (err != 1) {
 		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-		return;
+		return err;
 	}
 
-	if (strcmp(state, "online") == 0) {
+	if (strcmp(state, "online") == 0)
+		return 1;
+	else if (strcmp(state, "offline") == 0)
+		return 0;
+
+	printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
+	return -EINVAL;
+}
+static void vcpu_hotplug(unsigned int cpu)
+{
+	if (!cpu_possible(cpu))
+		return;
+
+	switch (vcpu_online(cpu)) {
+	case 1:
 		enable_hotplug_cpu(cpu);
-	} else if (strcmp(state, "offline") == 0) {
+		break;
+	case 0:
 		(void)cpu_down(cpu);
 		disable_hotplug_cpu(cpu);
-	} else {
-		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
-		       state, cpu);
+		break;
+	default:
+		break;
 	}
 }
 
@@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
 static int setup_cpu_watcher(struct notifier_block *notifier,
 			      unsigned long event, void *data)
 {
+	int cpu;
 	static struct xenbus_watch cpu_watch = {
 		.node = "cpu",
 		.callback = handle_vcpu_hotplug_event};
 
 	(void)register_xenbus_watch(&cpu_watch);
 
+	for_each_possible_cpu(cpu) {
+		if (vcpu_online(cpu) == 0) {
+			(void)cpu_down(cpu);
+			cpu_clear(cpu, cpu_present_map);
+		}
+	}
+
 	return NOTIFY_DONE;
 }
 
-- 
cgit v0.10.2


From 3d43321b7015387cfebbe26436d0e9d299162ea1 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@ubuntu.com>
Date: Thu, 2 Apr 2009 15:49:29 -0700
Subject: modules: sysctl to block module loading

Implement a sysctl file that disables module-loading system-wide since
there is no longer a viable way to remove CAP_SYS_MODULE after the system
bounding capability set was removed in 2.6.25.

Value can only be set to "1", and is tested only if standard capability
checks allow CAP_SYS_MODULE.  Given existing /dev/mem protections, this
should allow administrators a one-way method to block module loading
after initial boot-time module loading has finished.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a4ccdd1..02b1349 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -30,6 +30,7 @@ show up in /proc/sys/kernel:
 - kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
+- modules_disabled
 - msgmax
 - msgmnb
 - msgmni
@@ -179,6 +180,16 @@ kernel stack.
 
 ==============================================================
 
+modules_disabled:
+
+A toggle value indicating if modules are allowed to be loaded
+in an otherwise modular kernel.  This toggle defaults to off
+(0), but can be set true (1).  Once true, modules can be
+neither loaded nor unloaded, and the toggle cannot be set back
+to false.
+
+==============================================================
+
 osrelease, ostype & version:
 
 # cat osrelease
diff --git a/kernel/module.c b/kernel/module.c
index f77ac32..eeb3f7b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -778,6 +778,9 @@ static void wait_for_zero_refcount(struct module *mod)
 	mutex_lock(&module_mutex);
 }
 
+/* Block module loading/unloading? */
+int modules_disabled = 0;
+
 SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		unsigned int, flags)
 {
@@ -785,7 +788,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 	char name[MODULE_NAME_LEN];
 	int ret, forced = 0;
 
-	if (!capable(CAP_SYS_MODULE))
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
 		return -EPERM;
 
 	if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
@@ -2349,7 +2352,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	int ret = 0;
 
 	/* Must have permission */
-	if (!capable(CAP_SYS_MODULE))
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
 		return -EPERM;
 
 	/* Only one module load at a time, please */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c5ef44f..2fb4246 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -113,6 +113,7 @@ static int ngroups_max = NGROUPS_MAX;
 
 #ifdef CONFIG_MODULES
 extern char modprobe_path[];
+extern int modules_disabled;
 #endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
@@ -533,6 +534,17 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= &proc_dostring,
 		.strategy	= &sysctl_string,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "modules_disabled",
+		.data		= &modules_disabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+	},
 #endif
 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
 	{
-- 
cgit v0.10.2


From b5f22a59c0356655a501190959db9f7f5dd07e3f Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Thu, 2 Apr 2009 18:47:14 -0500
Subject: don't raise all privs on setuid-root file with fE set (v2)

Distributions face a backward compatibility problem with starting to use
file capabilities.  For instance, removing setuid root from ping and
doing setcap cap_net_raw=pe means that booting with an older kernel
or one compiled without file capabilities means ping won't work for
non-root users.

In order to replace the setuid root bit on a capability-unaware
program, one has to set the effective, or legacy, file capability,
which makes the capability effective immediately.  This patch
uses the legacy bit as a queue to not automatically add full
privilege to a setuid-root program.

So, with this patch, an ordinary setuid-root program will run with
privilege.  But if /bin/ping has both setuid-root and cap_net_raw in
fP and fE, then ping (when run by non-root user) will not run
with only cap_net_raw.

Changelog:
	Apr 2 2009: Print a message once when such a binary is loaded,
		as per James Morris' suggestion.
	Apr 2 2009: Fix the condition to only catch uid!=0 && euid==0.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/commoncap.c b/security/commoncap.c
index 7cd61a5..97ac1f1 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -28,6 +28,28 @@
 #include <linux/prctl.h>
 #include <linux/securebits.h>
 
+/*
+ * If a non-root user executes a setuid-root binary in
+ * !secure(SECURE_NOROOT) mode, then we raise capabilities.
+ * However if fE is also set, then the intent is for only
+ * the file capabilities to be applied, and the setuid-root
+ * bit is left on either to change the uid (plausible) or
+ * to get full privilege on a kernel without file capabilities
+ * support.  So in that case we do not raise capabilities.
+ *
+ * Warn if that happens, once per boot.
+ */
+static void warn_setuid_and_fcaps_mixed(char *fname)
+{
+	static int warned;
+	if (!warned) {
+		printk(KERN_INFO "warning: `%s' has both setuid-root and"
+			" effective capabilities. Therefore not raising all"
+			" capabilities.\n", fname);
+		warned = 1;
+	}
+}
+
 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
 {
 	NETLINK_CB(skb).eff_cap = current_cap();
@@ -464,6 +486,15 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
 
 	if (!issecure(SECURE_NOROOT)) {
 		/*
+		 * If the legacy file capability is set, then don't set privs
+		 * for a setuid root binary run by a non-root user.  Do set it
+		 * for a root user just to cause least surprise to an admin.
+		 */
+		if (effective && new->uid != 0 && new->euid == 0) {
+			warn_setuid_and_fcaps_mixed(bprm->filename);
+			goto skip;
+		}
+		/*
 		 * To support inheritance of root-permissions and suid-root
 		 * executables under compatibility mode, we override the
 		 * capability sets for the file.
@@ -478,6 +509,7 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
 		if (new->euid == 0)
 			effective = true;
 	}
+skip:
 
 	/* Don't let someone trace a set[ug]id/setpcap binary with the revised
 	 * credentials unless they have the appropriate permit
-- 
cgit v0.10.2


From 31c9a24ec82926fcae49483e53566d231e705057 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 2 Apr 2009 21:06:25 -0700
Subject: RCU: make treercu be default

Impact: switch default config from CLASSIC_RCU to TREE_RCU

Given that I have not gotten any complaints or bug reports on treercu
recently, this patch makes it be the default.  There are a number of
other defconfig files that explicitly call out CLASSIC_RCU, but which
have comment headers saying not to edit them.  Probably holdovers from
one of the flavors of "make config", but...

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: akpm@linux-foundation.org
Cc: dipankar@in.ibm.com
Cc: niv@us.ibm.com
Cc: manfred@colorfullife.com
Cc: peterz@infradead.org
LKML-Reference: <20090403040625.GA9473@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/init/Kconfig b/init/Kconfig
index 14c483d..26ac877 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -302,7 +302,7 @@ menu "RCU Subsystem"
 
 choice
 	prompt "RCU Implementation"
-	default CLASSIC_RCU
+	default TREE_RCU
 
 config CLASSIC_RCU
 	bool "Classic RCU"
-- 
cgit v0.10.2


From 4b166da939012905f4c36fedada62067db31948e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Sat, 28 Mar 2009 19:47:01 +0100
Subject: ASoC: Add driver for s6000 I2S interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a driver for the I2S interface found on Stretch s6000
family processors.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig
index 3d2bb6f..3304f9d 100644
--- a/sound/soc/Kconfig
+++ b/sound/soc/Kconfig
@@ -32,6 +32,7 @@ source "sound/soc/fsl/Kconfig"
 source "sound/soc/omap/Kconfig"
 source "sound/soc/pxa/Kconfig"
 source "sound/soc/s3c24xx/Kconfig"
+source "sound/soc/s6000/Kconfig"
 source "sound/soc/sh/Kconfig"
 
 # Supported codecs
diff --git a/sound/soc/Makefile b/sound/soc/Makefile
index 0237879..8943a14 100644
--- a/sound/soc/Makefile
+++ b/sound/soc/Makefile
@@ -10,4 +10,5 @@ obj-$(CONFIG_SND_SOC)	+= fsl/
 obj-$(CONFIG_SND_SOC)	+= omap/
 obj-$(CONFIG_SND_SOC)	+= pxa/
 obj-$(CONFIG_SND_SOC)	+= s3c24xx/
+obj-$(CONFIG_SND_SOC)	+= s6000/
 obj-$(CONFIG_SND_SOC)	+= sh/
diff --git a/sound/soc/s6000/Kconfig b/sound/soc/s6000/Kconfig
new file mode 100644
index 0000000..4bfc8bc
--- /dev/null
+++ b/sound/soc/s6000/Kconfig
@@ -0,0 +1,10 @@
+config SND_S6000_SOC
+	tristate "SoC Audio for the Stretch s6000 family"
+	depends on XTENSA_VARIANT_S6000
+	help
+	  Say Y or M if you want to add support for codecs attached to
+	  s6000 family chips. You will also need to select the platform
+	  to support below.
+
+config SND_S6000_SOC_I2S
+	tristate
diff --git a/sound/soc/s6000/Makefile b/sound/soc/s6000/Makefile
new file mode 100644
index 0000000..df15f87
--- /dev/null
+++ b/sound/soc/s6000/Makefile
@@ -0,0 +1,6 @@
+# s6000 Platform Support
+snd-soc-s6000-objs := s6000-pcm.o
+snd-soc-s6000-i2s-objs := s6000-i2s.o
+
+obj-$(CONFIG_SND_S6000_SOC) += snd-soc-s6000.o
+obj-$(CONFIG_SND_S6000_SOC_I2S) += snd-soc-s6000-i2s.o
diff --git a/sound/soc/s6000/s6000-i2s.c b/sound/soc/s6000/s6000-i2s.c
new file mode 100644
index 0000000..dcc7904
--- /dev/null
+++ b/sound/soc/s6000/s6000-i2s.c
@@ -0,0 +1,629 @@
+/*
+ * ALSA SoC I2S Audio Layer for the Stretch S6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+
+#include "s6000-i2s.h"
+#include "s6000-pcm.h"
+
+struct s6000_i2s_dev {
+	dma_addr_t sifbase;
+	u8 __iomem *scbbase;
+	unsigned int wide;
+	unsigned int channel_in;
+	unsigned int channel_out;
+	unsigned int lines_in;
+	unsigned int lines_out;
+	struct s6000_pcm_dma_params dma_params;
+};
+
+#define S6_I2S_INTERRUPT_STATUS	0x00
+#define   S6_I2S_INT_OVERRUN	1
+#define   S6_I2S_INT_UNDERRUN	2
+#define   S6_I2S_INT_ALIGNMENT	4
+#define S6_I2S_INTERRUPT_ENABLE	0x04
+#define S6_I2S_INTERRUPT_RAW	0x08
+#define S6_I2S_INTERRUPT_CLEAR	0x0C
+#define S6_I2S_INTERRUPT_SET	0x10
+#define S6_I2S_MODE		0x20
+#define   S6_I2S_DUAL		0
+#define   S6_I2S_WIDE		1
+#define S6_I2S_TX_DEFAULT	0x24
+#define S6_I2S_DATA_CFG(c)	(0x40 + 0x10 * (c))
+#define   S6_I2S_IN		0
+#define   S6_I2S_OUT		1
+#define   S6_I2S_UNUSED		2
+#define S6_I2S_INTERFACE_CFG(c)	(0x44 + 0x10 * (c))
+#define   S6_I2S_DIV_MASK	0x001fff
+#define   S6_I2S_16BIT		0x000000
+#define   S6_I2S_20BIT		0x002000
+#define   S6_I2S_24BIT		0x004000
+#define   S6_I2S_32BIT		0x006000
+#define   S6_I2S_BITS_MASK	0x006000
+#define   S6_I2S_MEM_16BIT	0x000000
+#define   S6_I2S_MEM_32BIT	0x008000
+#define   S6_I2S_MEM_MASK	0x008000
+#define   S6_I2S_CHANNELS_SHIFT	16
+#define   S6_I2S_CHANNELS_MASK	0x030000
+#define   S6_I2S_SCK_IN		0x000000
+#define   S6_I2S_SCK_OUT	0x040000
+#define   S6_I2S_SCK_DIR	0x040000
+#define   S6_I2S_WS_IN		0x000000
+#define   S6_I2S_WS_OUT		0x080000
+#define   S6_I2S_WS_DIR		0x080000
+#define   S6_I2S_LEFT_FIRST	0x000000
+#define   S6_I2S_RIGHT_FIRST	0x100000
+#define   S6_I2S_FIRST		0x100000
+#define   S6_I2S_CUR_SCK	0x200000
+#define   S6_I2S_CUR_WS		0x400000
+#define S6_I2S_ENABLE(c)	(0x48 + 0x10 * (c))
+#define   S6_I2S_DISABLE_IF	0x02
+#define   S6_I2S_ENABLE_IF	0x03
+#define   S6_I2S_IS_BUSY	0x04
+#define   S6_I2S_DMA_ACTIVE	0x08
+#define   S6_I2S_IS_ENABLED	0x10
+
+#define S6_I2S_NUM_LINES	4
+
+#define S6_I2S_SIF_PORT0	0x0000000
+#define S6_I2S_SIF_PORT1	0x0000080 /* docs say 0x0000010 */
+
+static inline void s6_i2s_write_reg(struct s6000_i2s_dev *dev, int reg, u32 val)
+{
+	writel(val, dev->scbbase + reg);
+}
+
+static inline u32 s6_i2s_read_reg(struct s6000_i2s_dev *dev, int reg)
+{
+	return readl(dev->scbbase + reg);
+}
+
+static inline void s6_i2s_mod_reg(struct s6000_i2s_dev *dev, int reg,
+				  u32 mask, u32 val)
+{
+	val ^= s6_i2s_read_reg(dev, reg) & ~mask;
+	s6_i2s_write_reg(dev, reg, val);
+}
+
+static void s6000_i2s_start_channel(struct s6000_i2s_dev *dev, int channel)
+{
+	int i, j, cur, prev;
+
+	/*
+	 * Wait for WCLK to toggle 5 times before enabling the channel
+	 * s6000 Family Datasheet 3.6.4:
+	 *   "At least two cycles of WS must occur between commands
+	 *    to disable or enable the interface"
+	 */
+	j = 0;
+	prev = ~S6_I2S_CUR_WS;
+	for (i = 1000000; --i && j < 6; ) {
+		cur = s6_i2s_read_reg(dev, S6_I2S_INTERFACE_CFG(channel))
+		       & S6_I2S_CUR_WS;
+		if (prev != cur) {
+			prev = cur;
+			j++;
+		}
+	}
+	if (j < 6)
+		printk(KERN_WARNING "s6000-i2s: timeout waiting for WCLK\n");
+
+	s6_i2s_write_reg(dev, S6_I2S_ENABLE(channel), S6_I2S_ENABLE_IF);
+}
+
+static void s6000_i2s_stop_channel(struct s6000_i2s_dev *dev, int channel)
+{
+	s6_i2s_write_reg(dev, S6_I2S_ENABLE(channel), S6_I2S_DISABLE_IF);
+}
+
+static void s6000_i2s_start(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct s6000_i2s_dev *dev = rtd->dai->cpu_dai->private_data;
+	int channel;
+
+	channel = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
+			dev->channel_out : dev->channel_in;
+
+	s6000_i2s_start_channel(dev, channel);
+}
+
+static void s6000_i2s_stop(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct s6000_i2s_dev *dev = rtd->dai->cpu_dai->private_data;
+	int channel;
+
+	channel = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
+			dev->channel_out : dev->channel_in;
+
+	s6000_i2s_stop_channel(dev, channel);
+}
+
+static int s6000_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			     int after)
+{
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		if ((substream->stream == SNDRV_PCM_STREAM_CAPTURE) ^ !after)
+			s6000_i2s_start(substream);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		if (!after)
+			s6000_i2s_stop(substream);
+	}
+	return 0;
+}
+
+static unsigned int s6000_i2s_int_sources(struct s6000_i2s_dev *dev)
+{
+	unsigned int pending;
+	pending = s6_i2s_read_reg(dev, S6_I2S_INTERRUPT_RAW);
+	pending &= S6_I2S_INT_ALIGNMENT |
+		   S6_I2S_INT_UNDERRUN |
+		   S6_I2S_INT_OVERRUN;
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_CLEAR, pending);
+
+	return pending;
+}
+
+static unsigned int s6000_i2s_check_xrun(struct snd_soc_dai *cpu_dai)
+{
+	struct s6000_i2s_dev *dev = cpu_dai->private_data;
+	unsigned int errors;
+	unsigned int ret;
+
+	errors = s6000_i2s_int_sources(dev);
+	if (likely(!errors))
+		return 0;
+
+	ret = 0;
+	if (errors & S6_I2S_INT_ALIGNMENT)
+		printk(KERN_ERR "s6000-i2s: WCLK misaligned\n");
+	if (errors & S6_I2S_INT_UNDERRUN)
+		ret |= 1 << SNDRV_PCM_STREAM_PLAYBACK;
+	if (errors & S6_I2S_INT_OVERRUN)
+		ret |= 1 << SNDRV_PCM_STREAM_CAPTURE;
+	return ret;
+}
+
+static void s6000_i2s_wait_disabled(struct s6000_i2s_dev *dev)
+{
+	int channel;
+	int n = 50;
+	for (channel = 0; channel < 2; channel++) {
+		while (--n >= 0) {
+			int v = s6_i2s_read_reg(dev, S6_I2S_ENABLE(channel));
+			if ((v & S6_I2S_IS_ENABLED)
+			    || !(v & (S6_I2S_DMA_ACTIVE | S6_I2S_IS_BUSY)))
+				break;
+			udelay(20);
+		}
+	}
+	if (n < 0)
+		printk(KERN_WARNING "s6000-i2s: timeout disabling interfaces");
+}
+
+static int s6000_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
+				   unsigned int fmt)
+{
+	struct s6000_i2s_dev *dev = cpu_dai->private_data;
+	u32 w;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		w = S6_I2S_SCK_IN | S6_I2S_WS_IN;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFM:
+		w = S6_I2S_SCK_OUT | S6_I2S_WS_IN;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFS:
+		w = S6_I2S_SCK_IN | S6_I2S_WS_OUT;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		w = S6_I2S_SCK_OUT | S6_I2S_WS_OUT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_IB_IF:
+		w |= S6_I2S_LEFT_FIRST;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		w |= S6_I2S_RIGHT_FIRST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(0),
+		       S6_I2S_FIRST | S6_I2S_WS_DIR | S6_I2S_SCK_DIR, w);
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(1),
+		       S6_I2S_FIRST | S6_I2S_WS_DIR | S6_I2S_SCK_DIR, w);
+
+	return 0;
+}
+
+static int s6000_i2s_set_clkdiv(struct snd_soc_dai *dai, int div_id, int div)
+{
+	struct s6000_i2s_dev *dev = dai->private_data;
+
+	if (!div || (div & 1) || div > (S6_I2S_DIV_MASK + 1) * 2)
+		return -EINVAL;
+
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(div_id),
+		       S6_I2S_DIV_MASK, div / 2 - 1);
+	return 0;
+}
+
+static int s6000_i2s_hw_params(struct snd_pcm_substream *substream,
+			       struct snd_pcm_hw_params *params,
+			       struct snd_soc_dai *dai)
+{
+	struct s6000_i2s_dev *dev = dai->private_data;
+	int interf;
+	u32 w = 0;
+
+	if (dev->wide)
+		interf = 0;
+	else {
+		w |= (((params_channels(params) - 2) / 2)
+		      << S6_I2S_CHANNELS_SHIFT) & S6_I2S_CHANNELS_MASK;
+		interf = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+				? dev->channel_out : dev->channel_in;
+	}
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		w |= S6_I2S_16BIT | S6_I2S_MEM_16BIT;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		w |= S6_I2S_32BIT | S6_I2S_MEM_32BIT;
+		break;
+	default:
+		printk(KERN_WARNING "s6000-i2s: unsupported PCM format %x\n",
+		       params_format(params));
+		return -EINVAL;
+	}
+
+	if (s6_i2s_read_reg(dev, S6_I2S_INTERFACE_CFG(interf))
+	     & S6_I2S_IS_ENABLED) {
+		printk(KERN_ERR "s6000-i2s: interface already enabled\n");
+		return -EBUSY;
+	}
+
+	s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(interf),
+		       S6_I2S_CHANNELS_MASK|S6_I2S_MEM_MASK|S6_I2S_BITS_MASK,
+		       w);
+
+	return 0;
+}
+
+static int s6000_i2s_dai_probe(struct platform_device *pdev,
+			       struct snd_soc_dai *dai)
+{
+	struct s6000_i2s_dev *dev = dai->private_data;
+	struct s6000_snd_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	dev->wide = pdata->wide;
+	dev->channel_in = pdata->channel_in;
+	dev->channel_out = pdata->channel_out;
+	dev->lines_in = pdata->lines_in;
+	dev->lines_out = pdata->lines_out;
+
+	s6_i2s_write_reg(dev, S6_I2S_MODE,
+			 dev->wide ? S6_I2S_WIDE : S6_I2S_DUAL);
+
+	if (dev->wide) {
+		int i;
+
+		if (dev->lines_in + dev->lines_out > S6_I2S_NUM_LINES)
+			return -EINVAL;
+
+		dev->channel_in = 0;
+		dev->channel_out = 1;
+		dai->capture.channels_min = 2 * dev->lines_in;
+		dai->capture.channels_max = dai->capture.channels_min;
+		dai->playback.channels_min = 2 * dev->lines_out;
+		dai->playback.channels_max = dai->playback.channels_min;
+
+		for (i = 0; i < dev->lines_out; i++)
+			s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), S6_I2S_OUT);
+
+		for (; i < S6_I2S_NUM_LINES - dev->lines_in; i++)
+			s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i),
+					 S6_I2S_UNUSED);
+
+		for (; i < S6_I2S_NUM_LINES; i++)
+			s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), S6_I2S_IN);
+	} else {
+		unsigned int cfg[2] = {S6_I2S_UNUSED, S6_I2S_UNUSED};
+
+		if (dev->lines_in > 1 || dev->lines_out > 1)
+			return -EINVAL;
+
+		dai->capture.channels_min = 2 * dev->lines_in;
+		dai->capture.channels_max = 8 * dev->lines_in;
+		dai->playback.channels_min = 2 * dev->lines_out;
+		dai->playback.channels_max = 8 * dev->lines_out;
+
+		if (dev->lines_in)
+			cfg[dev->channel_in] = S6_I2S_IN;
+		if (dev->lines_out)
+			cfg[dev->channel_out] = S6_I2S_OUT;
+
+		s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(0), cfg[0]);
+		s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(1), cfg[1]);
+	}
+
+	if (dev->lines_out) {
+		if (dev->lines_in) {
+			if (!dev->dma_params.dma_out)
+				return -ENODEV;
+		} else {
+			dev->dma_params.dma_out = dev->dma_params.dma_in;
+			dev->dma_params.dma_in = 0;
+		}
+	}
+	dev->dma_params.sif_in = dev->sifbase + (dev->channel_in ?
+					S6_I2S_SIF_PORT1 : S6_I2S_SIF_PORT0);
+	dev->dma_params.sif_out = dev->sifbase + (dev->channel_out ?
+					S6_I2S_SIF_PORT1 : S6_I2S_SIF_PORT0);
+	dev->dma_params.same_rate = pdata->same_rate | pdata->wide;
+	return 0;
+}
+
+#define S6000_I2S_RATES	(SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_5512 | \
+			 SNDRV_PCM_RATE_8000_192000)
+#define S6000_I2S_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops s6000_i2s_dai_ops = {
+	.set_fmt = s6000_i2s_set_dai_fmt,
+	.set_clkdiv = s6000_i2s_set_clkdiv,
+	.hw_params = s6000_i2s_hw_params,
+};
+
+struct snd_soc_dai s6000_i2s_dai = {
+	.name = "s6000-i2s",
+	.id = 0,
+	.probe = s6000_i2s_dai_probe,
+	.playback = {
+		.channels_min = 2,
+		.channels_max = 8,
+		.formats = S6000_I2S_FORMATS,
+		.rates = S6000_I2S_RATES,
+		.rate_min = 0,
+		.rate_max = 1562500,
+	},
+	.capture = {
+		.channels_min = 2,
+		.channels_max = 8,
+		.formats = S6000_I2S_FORMATS,
+		.rates = S6000_I2S_RATES,
+		.rate_min = 0,
+		.rate_max = 1562500,
+	},
+	.ops = &s6000_i2s_dai_ops,
+}
+EXPORT_SYMBOL_GPL(s6000_i2s_dai);
+
+static int __devinit s6000_i2s_probe(struct platform_device *pdev)
+{
+	struct s6000_i2s_dev *dev;
+	struct resource *scbmem, *sifmem, *region, *dma1, *dma2;
+	u8 __iomem *mmio;
+	int ret;
+
+	scbmem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!scbmem) {
+		dev_err(&pdev->dev, "no mem resource?\n");
+		ret = -ENODEV;
+		goto err_release_none;
+	}
+
+	region = request_mem_region(scbmem->start,
+				    scbmem->end - scbmem->start + 1,
+				    pdev->name);
+	if (!region) {
+		dev_err(&pdev->dev, "I2S SCB region already claimed\n");
+		ret = -EBUSY;
+		goto err_release_none;
+	}
+
+	mmio = ioremap(scbmem->start, scbmem->end - scbmem->start + 1);
+	if (!mmio) {
+		dev_err(&pdev->dev, "can't ioremap SCB region\n");
+		ret = -ENOMEM;
+		goto err_release_scb;
+	}
+
+	sifmem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!sifmem) {
+		dev_err(&pdev->dev, "no second mem resource?\n");
+		ret = -ENODEV;
+		goto err_release_map;
+	}
+
+	region = request_mem_region(sifmem->start,
+				    sifmem->end - sifmem->start + 1,
+				    pdev->name);
+	if (!region) {
+		dev_err(&pdev->dev, "I2S SIF region already claimed\n");
+		ret = -EBUSY;
+		goto err_release_map;
+	}
+
+	dma1 = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!dma1) {
+		dev_err(&pdev->dev, "no dma resource?\n");
+		ret = -ENODEV;
+		goto err_release_sif;
+	}
+
+	region = request_mem_region(dma1->start, dma1->end - dma1->start + 1,
+				    pdev->name);
+	if (!region) {
+		dev_err(&pdev->dev, "I2S DMA region already claimed\n");
+		ret = -EBUSY;
+		goto err_release_sif;
+	}
+
+	dma2 = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (dma2) {
+		region = request_mem_region(dma2->start,
+					    dma2->end - dma2->start + 1,
+					    pdev->name);
+		if (!region) {
+			dev_err(&pdev->dev,
+				"I2S DMA region already claimed\n");
+			ret = -EBUSY;
+			goto err_release_dma1;
+		}
+	}
+
+	dev = kzalloc(sizeof(struct s6000_i2s_dev), GFP_KERNEL);
+	if (!dev) {
+		ret = -ENOMEM;
+		goto err_release_dma2;
+	}
+
+	s6000_i2s_dai.dev = &pdev->dev;
+	s6000_i2s_dai.private_data = dev;
+	s6000_i2s_dai.dma_data = &dev->dma_params;
+
+	dev->sifbase = sifmem->start;
+	dev->scbbase = mmio;
+
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, 0);
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_CLEAR,
+			 S6_I2S_INT_ALIGNMENT |
+			 S6_I2S_INT_UNDERRUN |
+			 S6_I2S_INT_OVERRUN);
+
+	s6000_i2s_stop_channel(dev, 0);
+	s6000_i2s_stop_channel(dev, 1);
+	s6000_i2s_wait_disabled(dev);
+
+	dev->dma_params.check_xrun = s6000_i2s_check_xrun;
+	dev->dma_params.trigger = s6000_i2s_trigger;
+	dev->dma_params.dma_in = dma1->start;
+	dev->dma_params.dma_out = dma2 ? dma2->start : 0;
+	dev->dma_params.irq = platform_get_irq(pdev, 0);
+	if (dev->dma_params.irq < 0) {
+		dev_err(&pdev->dev, "no irq resource?\n");
+		ret = -ENODEV;
+		goto err_release_dev;
+	}
+
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE,
+			 S6_I2S_INT_ALIGNMENT |
+			 S6_I2S_INT_UNDERRUN |
+			 S6_I2S_INT_OVERRUN);
+
+	ret = snd_soc_register_dai(&s6000_i2s_dai);
+	if (ret)
+		goto err_release_dev;
+
+	return 0;
+
+err_release_dev:
+	kfree(dev);
+err_release_dma2:
+	if (dma2)
+		release_mem_region(dma2->start, dma2->end - dma2->start + 1);
+err_release_dma1:
+	release_mem_region(dma1->start, dma1->end - dma1->start + 1);
+err_release_sif:
+	release_mem_region(sifmem->start, (sifmem->end - sifmem->start) + 1);
+err_release_map:
+	iounmap(mmio);
+err_release_scb:
+	release_mem_region(scbmem->start, (scbmem->end - scbmem->start) + 1);
+err_release_none:
+	return ret;
+}
+
+static void __devexit s6000_i2s_remove(struct platform_device *pdev)
+{
+	struct s6000_i2s_dev *dev = s6000_i2s_dai.private_data;
+	struct resource *region;
+	void __iomem *mmio = dev->scbbase;
+
+	snd_soc_unregister_dai(&s6000_i2s_dai);
+
+	s6000_i2s_stop_channel(dev, 0);
+	s6000_i2s_stop_channel(dev, 1);
+
+	s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, 0);
+	s6000_i2s_dai.private_data = 0;
+	kfree(dev);
+
+	region = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	release_mem_region(region->start, region->end - region->start + 1);
+
+	region = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (region)
+		release_mem_region(region->start,
+				   region->end - region->start + 1);
+
+	region = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(region->start, (region->end - region->start) + 1);
+
+	iounmap(mmio);
+	region = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	release_mem_region(region->start, (region->end - region->start) + 1);
+}
+
+static struct platform_driver s6000_i2s_driver = {
+	.probe  = s6000_i2s_probe,
+	.remove = __devexit_p(s6000_i2s_remove),
+	.driver = {
+		.name   = "s6000-i2s",
+		.owner  = THIS_MODULE,
+	},
+};
+
+static int __init s6000_i2s_init(void)
+{
+	return platform_driver_register(&s6000_i2s_driver);
+}
+module_init(s6000_i2s_init);
+
+static void __exit s6000_i2s_exit(void)
+{
+	platform_driver_unregister(&s6000_i2s_driver);
+}
+module_exit(s6000_i2s_exit);
+
+MODULE_AUTHOR("Daniel Gloeckner");
+MODULE_DESCRIPTION("Stretch s6000 family I2S SoC Interface");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/s6000/s6000-i2s.h b/sound/soc/s6000/s6000-i2s.h
new file mode 100644
index 0000000..2375fdf
--- /dev/null
+++ b/sound/soc/s6000/s6000-i2s.h
@@ -0,0 +1,25 @@
+/*
+ * ALSA SoC I2S Audio Layer for the Stretch s6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _S6000_I2S_H
+#define _S6000_I2S_H
+
+extern struct snd_soc_dai s6000_i2s_dai;
+
+struct s6000_snd_platform_data {
+	int lines_in;
+	int lines_out;
+	int channel_in;
+	int channel_out;
+	int wide;
+	int same_rate;
+};
+#endif
diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c
new file mode 100644
index 0000000..83b8028
--- /dev/null
+++ b/sound/soc/s6000/s6000-pcm.c
@@ -0,0 +1,497 @@
+/*
+ * ALSA PCM interface for the Stetch s6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+
+#include <asm/dma.h>
+#include <variant/dmac.h>
+
+#include "s6000-pcm.h"
+
+#define S6_PCM_PREALLOCATE_SIZE (96 * 1024)
+#define S6_PCM_PREALLOCATE_MAX  (2048 * 1024)
+
+static struct snd_pcm_hardware s6000_pcm_hardware = {
+	.info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		 SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
+		 SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_JOINT_DUPLEX),
+	.formats = (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE),
+	.rates = (SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_5512 | \
+		  SNDRV_PCM_RATE_8000_192000),
+	.rate_min = 0,
+	.rate_max = 1562500,
+	.channels_min = 2,
+	.channels_max = 8,
+	.buffer_bytes_max = 0x7ffffff0,
+	.period_bytes_min = 16,
+	.period_bytes_max = 0xfffff0,
+	.periods_min = 2,
+	.periods_max = 1024, /* no limit */
+	.fifo_size = 0,
+};
+
+struct s6000_runtime_data {
+	spinlock_t lock;
+	int period;		/* current DMA period */
+};
+
+static void s6000_pcm_enqueue_dma(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd = runtime->private_data;
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	int channel;
+	unsigned int period_size;
+	unsigned int dma_offset;
+	dma_addr_t dma_pos;
+	dma_addr_t src, dst;
+
+	period_size = snd_pcm_lib_period_bytes(substream);
+	dma_offset = prtd->period * period_size;
+	dma_pos = runtime->dma_addr + dma_offset;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		src = dma_pos;
+		dst = par->sif_out;
+		channel = par->dma_out;
+	} else {
+		src = par->sif_in;
+		dst = dma_pos;
+		channel = par->dma_in;
+	}
+
+	if (!s6dmac_channel_enabled(DMA_MASK_DMAC(channel),
+				    DMA_INDEX_CHNL(channel)))
+		return;
+
+	if (s6dmac_fifo_full(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel))) {
+		printk(KERN_ERR "s6000-pcm: fifo full\n");
+		return;
+	}
+
+	BUG_ON(period_size & 15);
+	s6dmac_put_fifo(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel),
+			src, dst, period_size);
+
+	prtd->period++;
+	if (unlikely(prtd->period >= runtime->periods))
+		prtd->period = 0;
+}
+
+static irqreturn_t s6000_pcm_irq(int irq, void *data)
+{
+	struct snd_pcm *pcm = data;
+	struct snd_soc_pcm_runtime *runtime = pcm->private_data;
+	struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data;
+	struct s6000_runtime_data *prtd;
+	unsigned int has_xrun;
+	int i, ret = IRQ_NONE;
+	u32 channel[2] = {
+		[SNDRV_PCM_STREAM_PLAYBACK] = params->dma_out,
+		[SNDRV_PCM_STREAM_CAPTURE] = params->dma_in
+	};
+
+	has_xrun = params->check_xrun(runtime->dai->cpu_dai);
+
+	for (i = 0; i < ARRAY_SIZE(channel); ++i) {
+		struct snd_pcm_substream *substream = pcm->streams[i].substream;
+		unsigned int pending;
+
+		if (!channel[i])
+			continue;
+
+		if (unlikely(has_xrun & (1 << i)) &&
+		    substream->runtime &&
+		    snd_pcm_running(substream)) {
+			dev_dbg(pcm->dev, "xrun\n");
+			snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
+			ret = IRQ_HANDLED;
+		}
+
+		pending = s6dmac_int_sources(DMA_MASK_DMAC(channel[i]),
+					     DMA_INDEX_CHNL(channel[i]));
+
+		if (pending & 1) {
+			ret = IRQ_HANDLED;
+			if (likely(substream->runtime &&
+				   snd_pcm_running(substream))) {
+				snd_pcm_period_elapsed(substream);
+				dev_dbg(pcm->dev, "period elapsed %x %x\n",
+				       s6dmac_cur_src(DMA_MASK_DMAC(channel[i]),
+						   DMA_INDEX_CHNL(channel[i])),
+				       s6dmac_cur_dst(DMA_MASK_DMAC(channel[i]),
+						   DMA_INDEX_CHNL(channel[i])));
+				prtd = substream->runtime->private_data;
+				spin_lock(&prtd->lock);
+				s6000_pcm_enqueue_dma(substream);
+				spin_unlock(&prtd->lock);
+			}
+		}
+
+		if (unlikely(pending & ~7)) {
+			if (pending & (1 << 3))
+				printk(KERN_WARNING
+				       "s6000-pcm: DMA %x Underflow\n",
+				       channel[i]);
+			if (pending & (1 << 4))
+				printk(KERN_WARNING
+				       "s6000-pcm: DMA %x Overflow\n",
+				       channel[i]);
+			if (pending & 0x1e0)
+				printk(KERN_WARNING
+				       "s6000-pcm: DMA %x Master Error "
+				       "(mask %x)\n",
+				       channel[i], pending >> 5);
+
+		}
+	}
+
+	return ret;
+}
+
+static int s6000_pcm_start(struct snd_pcm_substream *substream)
+{
+	struct s6000_runtime_data *prtd = substream->runtime->private_data;
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	unsigned long flags;
+	int srcinc;
+	u32 dma;
+
+	spin_lock_irqsave(&prtd->lock, flags);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		srcinc = 1;
+		dma = par->dma_out;
+	} else {
+		srcinc = 0;
+		dma = par->dma_in;
+	}
+	s6dmac_enable_chan(DMA_MASK_DMAC(dma), DMA_INDEX_CHNL(dma),
+			   1 /* priority 1 (0 is max) */,
+			   0 /* peripheral requests w/o xfer length mode */,
+			   srcinc /* source address increment */,
+			   srcinc^1 /* destination address increment */,
+			   0 /* chunksize 0 (skip impossible on this dma) */,
+			   0 /* source skip after chunk (impossible) */,
+			   0 /* destination skip after chunk (impossible) */,
+			   4 /* 16 byte burst size */,
+			   -1 /* don't conserve bandwidth */,
+			   0 /* low watermark irq descriptor theshold */,
+			   0 /* disable hardware timestamps */,
+			   1 /* enable channel */);
+
+	s6000_pcm_enqueue_dma(substream);
+	s6000_pcm_enqueue_dma(substream);
+
+	spin_unlock_irqrestore(&prtd->lock, flags);
+
+	return 0;
+}
+
+static int s6000_pcm_stop(struct snd_pcm_substream *substream)
+{
+	struct s6000_runtime_data *prtd = substream->runtime->private_data;
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	unsigned long flags;
+	u32 channel;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		channel = par->dma_out;
+	else
+		channel = par->dma_in;
+
+	s6dmac_set_terminal_count(DMA_MASK_DMAC(channel),
+				  DMA_INDEX_CHNL(channel), 0);
+
+	spin_lock_irqsave(&prtd->lock, flags);
+
+	s6dmac_disable_chan(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel));
+
+	spin_unlock_irqrestore(&prtd->lock, flags);
+
+	return 0;
+}
+
+static int s6000_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	int ret;
+
+	ret = par->trigger(substream, cmd, 0);
+	if (ret < 0)
+		return ret;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		ret = s6000_pcm_start(substream);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		ret = s6000_pcm_stop(substream);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (ret < 0)
+		return ret;
+
+	return par->trigger(substream, cmd, 1);
+}
+
+static int s6000_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct s6000_runtime_data *prtd = substream->runtime->private_data;
+
+	prtd->period = 0;
+
+	return 0;
+}
+
+static snd_pcm_uframes_t s6000_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd = runtime->private_data;
+	unsigned long flags;
+	unsigned int offset;
+	dma_addr_t count;
+
+	spin_lock_irqsave(&prtd->lock, flags);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		count = s6dmac_cur_src(DMA_MASK_DMAC(par->dma_out),
+				       DMA_INDEX_CHNL(par->dma_out));
+	else
+		count = s6dmac_cur_dst(DMA_MASK_DMAC(par->dma_in),
+				       DMA_INDEX_CHNL(par->dma_in));
+
+	count -= runtime->dma_addr;
+
+	spin_unlock_irqrestore(&prtd->lock, flags);
+
+	offset = bytes_to_frames(runtime, count);
+	if (unlikely(offset >= runtime->buffer_size))
+		offset = 0;
+
+	return offset;
+}
+
+static int s6000_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd;
+	int ret;
+
+	snd_soc_set_runtime_hwparams(substream, &s6000_pcm_hardware);
+
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+					 SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 16);
+	if (ret < 0)
+		return ret;
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+					 SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 16);
+	if (ret < 0)
+		return ret;
+	ret = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		return ret;
+
+	if (par->same_rate) {
+		int rate;
+		spin_lock(&par->lock); /* needed? */
+		rate = par->rate;
+		spin_unlock(&par->lock);
+		if (rate != -1) {
+			ret = snd_pcm_hw_constraint_minmax(runtime,
+							SNDRV_PCM_HW_PARAM_RATE,
+							rate, rate);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	prtd = kzalloc(sizeof(struct s6000_runtime_data), GFP_KERNEL);
+	if (prtd == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&prtd->lock);
+
+	runtime->private_data = prtd;
+
+	return 0;
+}
+
+static int s6000_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct s6000_runtime_data *prtd = runtime->private_data;
+
+	kfree(prtd);
+
+	return 0;
+}
+
+static int s6000_pcm_hw_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+	int ret;
+	ret = snd_pcm_lib_malloc_pages(substream,
+				       params_buffer_bytes(hw_params));
+	if (ret < 0) {
+		printk(KERN_WARNING "s6000-pcm: allocation of memory failed\n");
+		return ret;
+	}
+
+	if (par->same_rate) {
+		spin_lock(&par->lock);
+		if (par->rate == -1 ||
+		    !(par->in_use & ~(1 << substream->stream))) {
+			par->rate = params_rate(hw_params);
+			par->in_use |= 1 << substream->stream;
+		} else if (params_rate(hw_params) != par->rate) {
+			snd_pcm_lib_free_pages(substream);
+			par->in_use &= ~(1 << substream->stream);
+			ret = -EBUSY;
+		}
+		spin_unlock(&par->lock);
+	}
+	return ret;
+}
+
+static int s6000_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+	struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data;
+
+	spin_lock(&par->lock);
+	par->in_use &= ~(1 << substream->stream);
+	if (!par->in_use)
+		par->rate = -1;
+	spin_unlock(&par->lock);
+
+	return snd_pcm_lib_free_pages(substream);
+}
+
+static struct snd_pcm_ops s6000_pcm_ops = {
+	.open = 	s6000_pcm_open,
+	.close = 	s6000_pcm_close,
+	.ioctl = 	snd_pcm_lib_ioctl,
+	.hw_params = 	s6000_pcm_hw_params,
+	.hw_free = 	s6000_pcm_hw_free,
+	.trigger =	s6000_pcm_trigger,
+	.prepare = 	s6000_pcm_prepare,
+	.pointer = 	s6000_pcm_pointer,
+};
+
+static void s6000_pcm_free(struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *runtime = pcm->private_data;
+	struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data;
+
+	free_irq(params->irq, pcm);
+	snd_pcm_lib_preallocate_free_for_all(pcm);
+}
+
+static u64 s6000_pcm_dmamask = DMA_32BIT_MASK;
+
+static int s6000_pcm_new(struct snd_card *card,
+			 struct snd_soc_dai *dai, struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *runtime = pcm->private_data;
+	struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data;
+	int res;
+
+	if (!card->dev->dma_mask)
+		card->dev->dma_mask = &s6000_pcm_dmamask;
+	if (!card->dev->coherent_dma_mask)
+		card->dev->coherent_dma_mask = DMA_32BIT_MASK;
+
+	if (params->dma_in) {
+		s6dmac_disable_chan(DMA_MASK_DMAC(params->dma_in),
+				    DMA_INDEX_CHNL(params->dma_in));
+		s6dmac_int_sources(DMA_MASK_DMAC(params->dma_in),
+				   DMA_INDEX_CHNL(params->dma_in));
+	}
+
+	if (params->dma_out) {
+		s6dmac_disable_chan(DMA_MASK_DMAC(params->dma_out),
+				    DMA_INDEX_CHNL(params->dma_out));
+		s6dmac_int_sources(DMA_MASK_DMAC(params->dma_out),
+				   DMA_INDEX_CHNL(params->dma_out));
+	}
+
+	res = request_irq(params->irq, s6000_pcm_irq, IRQF_SHARED,
+			  s6000_soc_platform.name, pcm);
+	if (res) {
+		printk(KERN_ERR "s6000-pcm couldn't get IRQ\n");
+		return res;
+	}
+
+	res = snd_pcm_lib_preallocate_pages_for_all(pcm,
+						    SNDRV_DMA_TYPE_DEV,
+						    card->dev,
+						    S6_PCM_PREALLOCATE_SIZE,
+						    S6_PCM_PREALLOCATE_MAX);
+	if (res)
+		printk(KERN_WARNING "s6000-pcm: preallocation failed\n");
+
+	spin_lock_init(&params->lock);
+	params->in_use = 0;
+	params->rate = -1;
+	return 0;
+}
+
+struct snd_soc_platform s6000_soc_platform = {
+	.name = 	"s6000-audio",
+	.pcm_ops = 	&s6000_pcm_ops,
+	.pcm_new = 	s6000_pcm_new,
+	.pcm_free = 	s6000_pcm_free,
+};
+EXPORT_SYMBOL_GPL(s6000_soc_platform);
+
+static int __init s6000_pcm_init(void)
+{
+	return snd_soc_register_platform(&s6000_soc_platform);
+}
+module_init(s6000_pcm_init);
+
+static void __exit s6000_pcm_exit(void)
+{
+	snd_soc_unregister_platform(&s6000_soc_platform);
+}
+module_exit(s6000_pcm_exit);
+
+MODULE_AUTHOR("Daniel Gloeckner");
+MODULE_DESCRIPTION("Stretch s6000 family PCM DMA module");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/s6000/s6000-pcm.h b/sound/soc/s6000/s6000-pcm.h
new file mode 100644
index 0000000..96f23f6
--- /dev/null
+++ b/sound/soc/s6000/s6000-pcm.h
@@ -0,0 +1,35 @@
+/*
+ * ALSA PCM interface for the Stretch s6000 family
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _S6000_PCM_H
+#define _S6000_PCM_H
+
+struct snd_soc_dai;
+struct snd_pcm_substream;
+
+struct s6000_pcm_dma_params {
+	unsigned int (*check_xrun)(struct snd_soc_dai *cpu_dai);
+	int (*trigger)(struct snd_pcm_substream *substream, int cmd, int after);
+	dma_addr_t sif_in;
+	dma_addr_t sif_out;
+	u32 dma_in;
+	u32 dma_out;
+	int irq;
+	int same_rate;
+
+	spinlock_t lock;
+	int in_use;
+	int rate;
+};
+
+extern struct snd_soc_platform s6000_soc_platform;
+
+#endif
-- 
cgit v0.10.2


From 2b7dbbe0c9491e62b50978d1615193bec33a8291 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Sat, 28 Mar 2009 19:47:02 +0100
Subject: ASoC: s6105 IP camera machine specific ASoC code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds machine specific code for the audio part of the Stretch
s6105 IP camera reference design.

The device uses the tlv320aic31(01) codec to generate the clock for
both I2S ports of the soc. While the master clock is generated by a
configurable PLL chip, the code assumes the factory default settings.

An additional kcontrol has been added to handle the special routing of
the board, connecting both HPLCOM and HPROUT to the same pin of the audio
jack. One of these should always be switched off.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s6000/Kconfig b/sound/soc/s6000/Kconfig
index 4bfc8bc..c74eb3d 100644
--- a/sound/soc/s6000/Kconfig
+++ b/sound/soc/s6000/Kconfig
@@ -8,3 +8,12 @@ config SND_S6000_SOC
 
 config SND_S6000_SOC_I2S
 	tristate
+
+config SND_S6000_SOC_S6IPCAM
+	tristate "SoC Audio support for Stretch 6105 IP Camera"
+	depends on SND_S6000_SOC && XTENSA_PLATFORM_S6105
+	select SND_S6000_SOC_I2S
+	select SND_SOC_TLV320AIC3X
+	help
+	  Say Y if you want to add support for SoC audio on the
+	  Stretch s6105 IP Camera Reference Design.
diff --git a/sound/soc/s6000/Makefile b/sound/soc/s6000/Makefile
index df15f87..7a61361 100644
--- a/sound/soc/s6000/Makefile
+++ b/sound/soc/s6000/Makefile
@@ -4,3 +4,8 @@ snd-soc-s6000-i2s-objs := s6000-i2s.o
 
 obj-$(CONFIG_SND_S6000_SOC) += snd-soc-s6000.o
 obj-$(CONFIG_SND_S6000_SOC_I2S) += snd-soc-s6000-i2s.o
+
+# s6105 Machine Support
+snd-soc-s6ipcam-objs := s6105-ipcam.o
+
+obj-$(CONFIG_SND_S6000_SOC_S6IPCAM) += snd-soc-s6ipcam.o
diff --git a/sound/soc/s6000/s6105-ipcam.c b/sound/soc/s6000/s6105-ipcam.c
new file mode 100644
index 0000000..21c4f55
--- /dev/null
+++ b/sound/soc/s6000/s6105-ipcam.c
@@ -0,0 +1,244 @@
+/*
+ * ASoC driver for Stretch s6105 IP camera platform
+ *
+ * Author:      Daniel Gloeckner, <dg@emlix.com>
+ * Copyright:   (C) 2009 emlix GmbH <info@emlix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <variant/dmac.h>
+
+#include "../codecs/tlv320aic3x.h"
+#include "s6000-pcm.h"
+#include "s6000-i2s.h"
+
+#define S6105_CAM_CODEC_CLOCK 12288000
+
+static int s6105_hw_params(struct snd_pcm_substream *substream,
+			   struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int ret = 0;
+
+	/* set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai, SND_SOC_DAIFMT_I2S |
+					     SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0)
+		return ret;
+
+	/* set cpu DAI configuration */
+	ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_CBM_CFM |
+					   SND_SOC_DAIFMT_IB_IF);
+	if (ret < 0)
+		return ret;
+
+	/* set the codec system clock */
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, S6105_CAM_CODEC_CLOCK,
+					    SND_SOC_CLOCK_OUT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static struct snd_soc_ops s6105_ops = {
+	.hw_params = s6105_hw_params,
+};
+
+/* s6105 machine dapm widgets */
+static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = {
+	SND_SOC_DAPM_LINE("Audio Out Differential", NULL),
+	SND_SOC_DAPM_LINE("Audio Out Stereo", NULL),
+	SND_SOC_DAPM_LINE("Audio In", NULL),
+};
+
+/* s6105 machine audio_mapnections to the codec pins */
+static const struct snd_soc_dapm_route audio_map[] = {
+	/* Audio Out connected to HPLOUT, HPLCOM, HPROUT */
+	{"Audio Out Differential", NULL, "HPLOUT"},
+	{"Audio Out Differential", NULL, "HPLCOM"},
+	{"Audio Out Stereo", NULL, "HPLOUT"},
+	{"Audio Out Stereo", NULL, "HPROUT"},
+
+	/* Audio In connected to LINE1L, LINE1R */
+	{"LINE1L", NULL, "Audio In"},
+	{"LINE1R", NULL, "Audio In"},
+};
+
+static int output_type_info(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = 2;
+	if (uinfo->value.enumerated.item) {
+		uinfo->value.enumerated.item = 1;
+		strcpy(uinfo->value.enumerated.name, "HPLOUT/HPROUT");
+	} else {
+		strcpy(uinfo->value.enumerated.name, "HPLOUT/HPLCOM");
+	}
+	return 0;
+}
+
+static int output_type_get(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_value *ucontrol)
+{
+	ucontrol->value.enumerated.item[0] = kcontrol->private_value;
+	return 0;
+}
+
+static int output_type_put(struct snd_kcontrol *kcontrol,
+			   struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = kcontrol->private_data;
+	unsigned int val = (ucontrol->value.enumerated.item[0] != 0);
+	char *differential = "Audio Out Differential";
+	char *stereo = "Audio Out Stereo";
+
+	if (kcontrol->private_value == val)
+		return 0;
+	kcontrol->private_value = val;
+	snd_soc_dapm_disable_pin(codec, val ? differential : stereo);
+	snd_soc_dapm_sync(codec);
+	snd_soc_dapm_enable_pin(codec, val ? stereo : differential);
+	snd_soc_dapm_sync(codec);
+
+	return 1;
+}
+
+static const struct snd_kcontrol_new audio_out_mux = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "Master Output Mux",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.info = output_type_info,
+	.get = output_type_get,
+	.put = output_type_put,
+	.private_value = 1 /* default to stereo */
+};
+
+/* Logic for a aic3x as connected on the s6105 ip camera ref design */
+static int s6105_aic3x_init(struct snd_soc_codec *codec)
+{
+	/* Add s6105 specific widgets */
+	snd_soc_dapm_new_controls(codec, aic3x_dapm_widgets,
+				  ARRAY_SIZE(aic3x_dapm_widgets));
+
+	/* Set up s6105 specific audio path audio_map */
+	snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+
+	/* not present */
+	snd_soc_dapm_nc_pin(codec, "MONO_LOUT");
+	snd_soc_dapm_nc_pin(codec, "LINE2L");
+	snd_soc_dapm_nc_pin(codec, "LINE2R");
+
+	/* not connected */
+	snd_soc_dapm_nc_pin(codec, "MIC3L"); /* LINE2L on this chip */
+	snd_soc_dapm_nc_pin(codec, "MIC3R"); /* LINE2R on this chip */
+	snd_soc_dapm_nc_pin(codec, "LLOUT");
+	snd_soc_dapm_nc_pin(codec, "RLOUT");
+	snd_soc_dapm_nc_pin(codec, "HPRCOM");
+
+	/* always connected */
+	snd_soc_dapm_enable_pin(codec, "Audio In");
+
+	/* must correspond to audio_out_mux.private_value initializer */
+	snd_soc_dapm_disable_pin(codec, "Audio Out Differential");
+	snd_soc_dapm_sync(codec);
+	snd_soc_dapm_enable_pin(codec, "Audio Out Stereo");
+
+	snd_soc_dapm_sync(codec);
+
+	snd_ctl_add(codec->card, snd_ctl_new1(&audio_out_mux, codec));
+
+	return 0;
+}
+
+/* s6105 digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link s6105_dai = {
+	.name = "TLV320AIC31",
+	.stream_name = "AIC31",
+	.cpu_dai = &s6000_i2s_dai,
+	.codec_dai = &aic3x_dai,
+	.init = s6105_aic3x_init,
+	.ops = &s6105_ops,
+};
+
+/* s6105 audio machine driver */
+static struct snd_soc_card snd_soc_card_s6105 = {
+	.name = "Stretch IP Camera",
+	.platform = &s6000_soc_platform,
+	.dai_link = &s6105_dai,
+	.num_links = 1,
+};
+
+/* s6105 audio private data */
+static struct aic3x_setup_data s6105_aic3x_setup = {
+	.i2c_bus = 0,
+	.i2c_address = 0x18,
+};
+
+/* s6105 audio subsystem */
+static struct snd_soc_device s6105_snd_devdata = {
+	.card = &snd_soc_card_s6105,
+	.codec_dev = &soc_codec_dev_aic3x,
+	.codec_data = &s6105_aic3x_setup,
+};
+
+static struct s6000_snd_platform_data __initdata s6105_snd_data = {
+	.wide		= 0,
+	.channel_in	= 0,
+	.channel_out	= 1,
+	.lines_in	= 1,
+	.lines_out	= 1,
+	.same_rate	= 1,
+};
+
+static struct platform_device *s6105_snd_device;
+
+static int __init s6105_init(void)
+{
+	int ret;
+
+	s6105_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!s6105_snd_device)
+		return -ENOMEM;
+
+	platform_set_drvdata(s6105_snd_device, &s6105_snd_devdata);
+	s6105_snd_devdata.dev = &s6105_snd_device->dev;
+	platform_device_add_data(s6105_snd_device, &s6105_snd_data,
+				 sizeof(s6105_snd_data));
+
+	ret = platform_device_add(s6105_snd_device);
+	if (ret)
+		platform_device_put(s6105_snd_device);
+
+	return ret;
+}
+
+static void __exit s6105_exit(void)
+{
+	platform_device_unregister(s6105_snd_device);
+}
+
+module_init(s6105_init);
+module_exit(s6105_exit);
+
+MODULE_AUTHOR("Daniel Gloeckner");
+MODULE_DESCRIPTION("Stretch s6105 IP camera ASoC driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 595258aaeac4cc6e187b98b1bf29bb176febe763 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:28 +0100
Subject: perf_counter: x86: fix 32-bit irq_period assumption

No need to assume the irq_period is 32bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 155bc3c..1cedc34 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -449,7 +449,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s32 period = hwc->irq_period;
+	s64 period = hwc->irq_period;
 	int err;
 
 	/*
-- 
cgit v0.10.2


From 755642322aa66fbc5421a35fd3e1733f73e20083 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:29 +0100
Subject: perf_counter: use list_move_tail()

Instead of del/add use a move list-op.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b2e8389..0fe22c9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -89,8 +89,7 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	list_for_each_entry_safe(sibling, tmp,
 				 &counter->sibling_list, list_entry) {
 
-		list_del_init(&sibling->list_entry);
-		list_add_tail(&sibling->list_entry, &ctx->counter_list);
+		list_move_tail(&sibling->list_entry, &ctx->counter_list);
 		sibling->group_leader = sibling;
 	}
 }
@@ -959,8 +958,7 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 	 */
 	perf_flags = hw_perf_save_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		list_del(&counter->list_entry);
-		list_add_tail(&counter->list_entry, &ctx->counter_list);
+		list_move_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
 	hw_perf_restore(perf_flags);
-- 
cgit v0.10.2


From 60b3df9c1e24a18aabb412da9905208c5f04ebea Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:30 +0100
Subject: perf_counter: add comment to barrier

We need to ensure the enabled=0 write happens before we
start disabling the actual counters, so that a pcm_amd_enable()
will not enable one underneath us.

I think the race is impossible anyway, we always balance the
ops within any one context and perform enable() with IRQs disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1cedc34..a2e3b76 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -247,6 +247,10 @@ static u64 pmc_amd_save_disable_all(void)
 
 	enabled = cpuc->enabled;
 	cpuc->enabled = 0;
+	/*
+	 * ensure we write the disable before we start disabling the
+	 * counters proper, so that pcm_amd_enable() does the right thing.
+	 */
 	barrier();
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-- 
cgit v0.10.2


From 82bae4f8c2fd64a2bb1e2e72c508853ed2b4a299 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:31 +0100
Subject: perf_counter: x86: use ULL postfix for 64bit constants

Fix a build warning on 32bit machines by explicitly marking the
constants as 64-bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a2e3b76..22dab06 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -84,9 +84,9 @@ static u64 pmc_intel_event_map(int event)
 
 static u64 pmc_intel_raw_event(u64 event)
 {
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FF
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00
-#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK 		\
 	(CORE_EVNTSEL_EVENT_MASK |	\
@@ -116,9 +116,9 @@ static u64 pmc_amd_event_map(int event)
 
 static u64 pmc_amd_raw_event(u64 event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FF
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00
-#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000
+#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
 
 #define K7_EVNTSEL_MASK			\
 	(K7_EVNTSEL_EVENT_MASK |	\
-- 
cgit v0.10.2


From 15dbf27cc18559a14e99609f78678aa86b9c6ff1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:32 +0100
Subject: perf_counter: software counter event infrastructure

Provide generic software counter infrastructure that supports
software events.

This will be used to allow sample based profiling based on software
events such as pagefaults. The current infrastructure can only
provide a count of such events, no place information.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index dde5645..3fefc3b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -126,6 +126,7 @@ struct hw_perf_counter {
 	unsigned long			counter_base;
 	int				nmi;
 	unsigned int			idx;
+	atomic64_t			count; /* software */
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
@@ -283,6 +284,8 @@ static inline int is_software_counter(struct perf_counter *counter)
 	return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
+extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -295,10 +298,13 @@ static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_counter_unthrottle(void)			{ }
-static inline void hw_perf_restore(u64 ctrl)			{ }
+static inline void hw_perf_restore(u64 ctrl)				{ }
 static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
+
+static inline void perf_swcounter_event(enum hw_event_types event, u64 nr,
+					int nmi, struct pt_regs *regs)	{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0fe22c9..eeb1b46 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1328,6 +1328,185 @@ static const struct file_operations perf_fops = {
 	.compat_ioctl		= perf_ioctl,
 };
 
+/*
+ * Generic software counter infrastructure
+ */
+
+static void perf_swcounter_update(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 prev, now;
+	s64 delta;
+
+again:
+	prev = atomic64_read(&hwc->prev_count);
+	now = atomic64_read(&hwc->count);
+	if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
+		goto again;
+
+	delta = now - prev;
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+}
+
+static void perf_swcounter_set_period(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->irq_period;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_add(period, &hwc->period_left);
+	}
+
+	atomic64_set(&hwc->prev_count, -left);
+	atomic64_set(&hwc->count, -left);
+}
+
+static void perf_swcounter_save_and_restart(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+	perf_swcounter_set_period(counter);
+}
+
+static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
+{
+	struct perf_data *irqdata = counter->irqdata;
+
+	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+		irqdata->overrun++;
+	} else {
+		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+		*p = data;
+		irqdata->len += sizeof(u64);
+	}
+}
+
+static void perf_swcounter_handle_group(struct perf_counter *sibling)
+{
+	struct perf_counter *counter, *group_leader = sibling->group_leader;
+
+	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
+		perf_swcounter_update(counter);
+		perf_swcounter_store_irq(sibling, counter->hw_event.type);
+		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
+	}
+}
+
+static void perf_swcounter_interrupt(struct perf_counter *counter,
+				     int nmi, struct pt_regs *regs)
+{
+	perf_swcounter_save_and_restart(counter);
+
+	switch (counter->hw_event.record_type) {
+	case PERF_RECORD_SIMPLE:
+		break;
+
+	case PERF_RECORD_IRQ:
+		perf_swcounter_store_irq(counter, instruction_pointer(regs));
+		break;
+
+	case PERF_RECORD_GROUP:
+		perf_swcounter_handle_group(counter);
+		break;
+	}
+
+	if (nmi) {
+		counter->wakeup_pending = 1;
+		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+	} else
+		wake_up(&counter->waitq);
+}
+
+static int perf_swcounter_match(struct perf_counter *counter,
+				enum hw_event_types event,
+				struct pt_regs *regs)
+{
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return 0;
+
+	if (counter->hw_event.raw)
+		return 0;
+
+	if (counter->hw_event.type != event)
+		return 0;
+
+	if (counter->hw_event.exclude_user && user_mode(regs))
+		return 0;
+
+	if (counter->hw_event.exclude_kernel && !user_mode(regs))
+		return 0;
+
+	return 1;
+}
+
+static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
+				     enum hw_event_types event, u64 nr,
+				     int nmi, struct pt_regs *regs)
+{
+	struct perf_counter *counter;
+	unsigned long flags;
+	int neg;
+
+	if (list_empty(&ctx->counter_list))
+		return;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+
+	/*
+	 * XXX: make counter_list RCU safe
+	 */
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (perf_swcounter_match(counter, event, regs)) {
+			neg = atomic64_add_negative(nr, &counter->hw.count);
+			if (counter->hw.irq_period && !neg)
+				perf_swcounter_interrupt(counter, nmi, regs);
+		}
+	}
+
+	spin_unlock_irqrestore(&ctx->lock, flags);
+}
+
+void perf_swcounter_event(enum hw_event_types event, u64 nr,
+			  int nmi, struct pt_regs *regs)
+{
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+
+	perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs);
+	if (cpuctx->task_ctx)
+		perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs);
+
+	put_cpu_var(perf_cpu_context);
+}
+
+static void perf_swcounter_read(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+}
+
+static int perf_swcounter_enable(struct perf_counter *counter)
+{
+	perf_swcounter_set_period(counter);
+	return 0;
+}
+
+static void perf_swcounter_disable(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+}
+
+/*
+ * Software counter: cpu wall time clock
+ */
+
 static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 {
 	int cpu = raw_smp_processor_id();
@@ -1365,6 +1544,10 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 };
 
 /*
+ * Software counter: task time clock
+ */
+
+/*
  * Called from within the scheduler:
  */
 static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
@@ -1420,6 +1603,10 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
+/*
+ * Software counter: page faults
+ */
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 #define cpu_page_faults()	__get_cpu_var(vm_event_states).event[PGFAULT]
 #else
@@ -1473,6 +1660,10 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = {
 	.read		= page_faults_perf_counter_read,
 };
 
+/*
+ * Software counter: context switches
+ */
+
 static u64 get_context_switches(struct perf_counter *counter)
 {
 	struct task_struct *curr = counter->ctx->task;
@@ -1521,6 +1712,10 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
 	.read		= context_switches_perf_counter_read,
 };
 
+/*
+ * Software counter: cpu migrations
+ */
+
 static inline u64 get_cpu_migrations(struct perf_counter *counter)
 {
 	struct task_struct *curr = counter->ctx->task;
@@ -1572,7 +1767,9 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
+	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct hw_perf_counter_ops *hw_ops = NULL;
+	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -1618,6 +1815,10 @@ sw_perf_counter_init(struct perf_counter *counter)
 	default:
 		break;
 	}
+
+	if (hw_ops)
+		hwc->irq_period = hw_event->irq_period;
+
 	return hw_ops;
 }
 
-- 
cgit v0.10.2


From 7dd1fcc258b65da718f01e4684a7b9244501a9fb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:33 +0100
Subject: perf_counter: provide pagefault software events

We use the generic software counter infrastructure to provide
page fault events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7699394..eda5b0c 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/perf_counter.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
+	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b727..c872575 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -27,6 +27,7 @@
 #include <linux/tty.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/perf_counter.h>
 
 #include <asm-generic/sections.h>
 
@@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
+	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+
 	/*
 	 * If we're in an interrupt, have no user context or are running
 	 * in an atomic region then we must not take the fault:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index eeb1b46..1773c5d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1607,57 +1607,10 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
  * Software counter: page faults
  */
 
-#ifdef CONFIG_VM_EVENT_COUNTERS
-#define cpu_page_faults()	__get_cpu_var(vm_event_states).event[PGFAULT]
-#else
-#define cpu_page_faults()	0
-#endif
-
-static u64 get_page_faults(struct perf_counter *counter)
-{
-	struct task_struct *curr = counter->ctx->task;
-
-	if (curr)
-		return curr->maj_flt + curr->min_flt;
-	return cpu_page_faults();
-}
-
-static void page_faults_perf_counter_update(struct perf_counter *counter)
-{
-	u64 prev, now;
-	s64 delta;
-
-	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_page_faults(counter);
-
-	atomic64_set(&counter->hw.prev_count, now);
-
-	delta = now - prev;
-
-	atomic64_add(delta, &counter->count);
-}
-
-static void page_faults_perf_counter_read(struct perf_counter *counter)
-{
-	page_faults_perf_counter_update(counter);
-}
-
-static int page_faults_perf_counter_enable(struct perf_counter *counter)
-{
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
-	return 0;
-}
-
-static void page_faults_perf_counter_disable(struct perf_counter *counter)
-{
-	page_faults_perf_counter_update(counter);
-}
-
 static const struct hw_perf_counter_ops perf_ops_page_faults = {
-	.enable		= page_faults_perf_counter_enable,
-	.disable	= page_faults_perf_counter_disable,
-	.read		= page_faults_perf_counter_read,
+	.enable		= perf_swcounter_enable,
+	.disable	= perf_swcounter_disable,
+	.read		= perf_swcounter_read,
 };
 
 /*
-- 
cgit v0.10.2


From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:34 +0100
Subject: perf_counter: provide major/minor page fault software events

Provide separate sw counters for major and minor page faults.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index eda5b0c..17bbf6f 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -312,6 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
 			preempt_disable();
@@ -319,8 +320,10 @@ good_area:
 			preempt_enable();
 		}
 #endif
-	} else
+	} else {
 		current->min_flt++;
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+	}
 	up_read(&mm->mmap_sem);
 	return 0;
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c872575..f2d3324 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1140,10 +1140,13 @@ good_area:
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+	} else {
 		tsk->min_flt++;
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+	}
 
 	check_v8086_mode(regs, address, tsk);
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 3fefc3b..4b14a8e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -49,8 +49,10 @@ enum hw_event_types {
 	PERF_COUNT_PAGE_FAULTS		= -3,
 	PERF_COUNT_CONTEXT_SWITCHES	= -4,
 	PERF_COUNT_CPU_MIGRATIONS	= -5,
+	PERF_COUNT_PAGE_FAULTS_MIN	= -6,
+	PERF_COUNT_PAGE_FAULTS_MAJ	= -7,
 
-	PERF_SW_EVENTS_MIN		= -6,
+	PERF_SW_EVENTS_MIN		= -8,
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1773c5d..68950a3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1503,6 +1503,12 @@ static void perf_swcounter_disable(struct perf_counter *counter)
 	perf_swcounter_update(counter);
 }
 
+static const struct hw_perf_counter_ops perf_ops_generic = {
+	.enable		= perf_swcounter_enable,
+	.disable	= perf_swcounter_disable,
+	.read		= perf_swcounter_read,
+};
+
 /*
  * Software counter: cpu wall time clock
  */
@@ -1604,16 +1610,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 };
 
 /*
- * Software counter: page faults
- */
-
-static const struct hw_perf_counter_ops perf_ops_page_faults = {
-	.enable		= perf_swcounter_enable,
-	.disable	= perf_swcounter_disable,
-	.read		= perf_swcounter_read,
-};
-
-/*
  * Software counter: context switches
  */
 
@@ -1753,9 +1749,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
-		if (!(counter->hw_event.exclude_user ||
-		      counter->hw_event.exclude_kernel))
-			hw_ops = &perf_ops_page_faults;
+	case PERF_COUNT_PAGE_FAULTS_MIN:
+	case PERF_COUNT_PAGE_FAULTS_MAJ:
+		hw_ops = &perf_ops_generic;
 		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
 		if (!counter->hw_event.exclude_kernel)
-- 
cgit v0.10.2


From d6d020e9957745c61285ef3da9f294c5e6801f0f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:35 +0100
Subject: perf_counter: hrtimer based sampling for software time events

Use hrtimers to profile timer based sampling for the software time
counters.

This allows platforms without hardware counter support to still
perform sample based profiling.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4b14a8e..dfb4c7c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -114,6 +114,7 @@ struct perf_counter_hw_event {
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
+#include <linux/hrtimer.h>
 #include <asm/atomic.h>
 
 struct task_struct;
@@ -123,12 +124,19 @@ struct task_struct;
  */
 struct hw_perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
-	u64				config;
-	unsigned long			config_base;
-	unsigned long			counter_base;
-	int				nmi;
-	unsigned int			idx;
-	atomic64_t			count; /* software */
+	union {
+		struct { /* hardware */
+			u64				config;
+			unsigned long			config_base;
+			unsigned long			counter_base;
+			int				nmi;
+			unsigned int			idx;
+		};
+		union { /* software */
+			atomic64_t			count;
+			struct hrtimer			hrtimer;
+		};
+	};
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 68950a3..f9330d5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1395,7 +1395,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
 	struct perf_counter *counter, *group_leader = sibling->group_leader;
 
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		perf_swcounter_update(counter);
+		counter->hw_ops->read(counter);
 		perf_swcounter_store_irq(sibling, counter->hw_event.type);
 		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
 	}
@@ -1404,8 +1404,6 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
 static void perf_swcounter_interrupt(struct perf_counter *counter,
 				     int nmi, struct pt_regs *regs)
 {
-	perf_swcounter_save_and_restart(counter);
-
 	switch (counter->hw_event.record_type) {
 	case PERF_RECORD_SIMPLE:
 		break;
@@ -1426,6 +1424,38 @@ static void perf_swcounter_interrupt(struct perf_counter *counter,
 		wake_up(&counter->waitq);
 }
 
+static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+{
+	struct perf_counter *counter;
+	struct pt_regs *regs;
+
+	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
+	counter->hw_ops->read(counter);
+
+	regs = get_irq_regs();
+	/*
+	 * In case we exclude kernel IPs or are somehow not in interrupt
+	 * context, provide the next best thing, the user IP.
+	 */
+	if ((counter->hw_event.exclude_kernel || !regs) &&
+			!counter->hw_event.exclude_user)
+		regs = task_pt_regs(current);
+
+	if (regs)
+		perf_swcounter_interrupt(counter, 0, regs);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+
+	return HRTIMER_RESTART;
+}
+
+static void perf_swcounter_overflow(struct perf_counter *counter,
+				    int nmi, struct pt_regs *regs)
+{
+	perf_swcounter_save_and_restart(counter);
+	perf_swcounter_interrupt(counter, nmi, regs);
+}
+
 static int perf_swcounter_match(struct perf_counter *counter,
 				enum hw_event_types event,
 				struct pt_regs *regs)
@@ -1448,13 +1478,20 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	return 1;
 }
 
+static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
+			       int nmi, struct pt_regs *regs)
+{
+	int neg = atomic64_add_negative(nr, &counter->hw.count);
+	if (counter->hw.irq_period && !neg)
+		perf_swcounter_overflow(counter, nmi, regs);
+}
+
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum hw_event_types event, u64 nr,
 				     int nmi, struct pt_regs *regs)
 {
 	struct perf_counter *counter;
 	unsigned long flags;
-	int neg;
 
 	if (list_empty(&ctx->counter_list))
 		return;
@@ -1465,11 +1502,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 	 * XXX: make counter_list RCU safe
 	 */
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (perf_swcounter_match(counter, event, regs)) {
-			neg = atomic64_add_negative(nr, &counter->hw.count);
-			if (counter->hw.irq_period && !neg)
-				perf_swcounter_interrupt(counter, nmi, regs);
-		}
+		if (perf_swcounter_match(counter, event, regs))
+			perf_swcounter_add(counter, nr, nmi, regs);
 	}
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
@@ -1513,14 +1547,6 @@ static const struct hw_perf_counter_ops perf_ops_generic = {
  * Software counter: cpu wall time clock
  */
 
-static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
-	return 0;
-}
-
 static void cpu_clock_perf_counter_update(struct perf_counter *counter)
 {
 	int cpu = raw_smp_processor_id();
@@ -1533,8 +1559,26 @@ static void cpu_clock_perf_counter_update(struct perf_counter *counter)
 	atomic64_add(now - prev, &counter->count);
 }
 
+static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	if (hwc->irq_period) {
+		hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hwc->hrtimer.function = perf_swcounter_hrtimer;
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(hwc->irq_period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
+	hrtimer_cancel(&counter->hw.hrtimer);
 	cpu_clock_perf_counter_update(counter);
 }
 
@@ -1580,27 +1624,33 @@ static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now
 	atomic64_add(delta, &counter->count);
 }
 
-static void task_clock_perf_counter_read(struct perf_counter *counter)
-{
-	u64 now = task_clock_perf_counter_val(counter, 1);
-
-	task_clock_perf_counter_update(counter, now);
-}
-
 static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count,
-			     task_clock_perf_counter_val(counter, 0));
+	struct hw_perf_counter *hwc = &counter->hw;
+
+	atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0));
+	if (hwc->irq_period) {
+		hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hwc->hrtimer.function = perf_swcounter_hrtimer;
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(hwc->irq_period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
 
 	return 0;
 }
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	u64 now = task_clock_perf_counter_val(counter, 0);
+	hrtimer_cancel(&counter->hw.hrtimer);
+	task_clock_perf_counter_update(counter,
+			task_clock_perf_counter_val(counter, 0));
+}
 
-	task_clock_perf_counter_update(counter, now);
+static void task_clock_perf_counter_read(struct perf_counter *counter)
+{
+	task_clock_perf_counter_update(counter,
+			task_clock_perf_counter_val(counter, 1));
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
@@ -1729,16 +1779,12 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 */
 	switch (counter->hw_event.type) {
 	case PERF_COUNT_CPU_CLOCK:
-		if (!(counter->hw_event.exclude_user ||
-		      counter->hw_event.exclude_kernel ||
-		      counter->hw_event.exclude_hv))
-			hw_ops = &perf_ops_cpu_clock;
+		hw_ops = &perf_ops_cpu_clock;
+
+		if (hw_event->irq_period && hw_event->irq_period < 10000)
+			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
-		if (counter->hw_event.exclude_user ||
-		    counter->hw_event.exclude_kernel ||
-		    counter->hw_event.exclude_hv)
-			break;
 		/*
 		 * If the user instantiates this as a per-cpu counter,
 		 * use the cpu_clock counter instead.
@@ -1747,6 +1793,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_task_clock;
 		else
 			hw_ops = &perf_ops_cpu_clock;
+
+		if (hw_event->irq_period && hw_event->irq_period < 10000)
+			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
-- 
cgit v0.10.2


From 592903cdcbf606a838056bae6d03fc557806c914 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:36 +0100
Subject: perf_counter: add an event_list

I noticed that the counter_list only includes top-level counters, thus
perf_swcounter_event() will miss sw-counters in groups.

Since perf_swcounter_event() also wants an RCU safe list, create a new
event_list that includes all counters and uses RCU list ops and use call_rcu
to free the counter structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index dfb4c7c..08c11a6 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -187,6 +187,7 @@ struct file;
 struct perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
 	struct list_head		list_entry;
+	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
@@ -220,6 +221,8 @@ struct perf_counter {
 	struct perf_data		*irqdata;
 	struct perf_data		*usrdata;
 	struct perf_data		data[2];
+
+	struct rcu_head			rcu_head;
 #endif
 };
 
@@ -243,6 +246,7 @@ struct perf_counter_context {
 	struct mutex		mutex;
 
 	struct list_head	counter_list;
+	struct list_head	event_list;
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f9330d5..8d6ecfa 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -22,6 +22,7 @@
 #include <linux/perf_counter.h>
 #include <linux/mm.h>
 #include <linux/vmstat.h>
+#include <linux/rculist.h>
 
 /*
  * Each CPU has a list of per CPU counters:
@@ -72,6 +73,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 	else
 		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+
+	list_add_rcu(&counter->event_entry, &ctx->event_list);
 }
 
 static void
@@ -80,6 +83,7 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	struct perf_counter *sibling, *tmp;
 
 	list_del_init(&counter->list_entry);
+	list_del_rcu(&counter->event_entry);
 
 	/*
 	 * If this was a group counter with sibling counters then
@@ -1133,6 +1137,14 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	return ctx;
 }
 
+static void free_counter_rcu(struct rcu_head *head)
+{
+	struct perf_counter *counter;
+
+	counter = container_of(head, struct perf_counter, rcu_head);
+	kfree(counter);
+}
+
 /*
  * Called when the last reference to the file is gone.
  */
@@ -1151,7 +1163,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
-	kfree(counter);
+	call_rcu(&counter->rcu_head, free_counter_rcu);
 	put_context(ctx);
 
 	return 0;
@@ -1491,22 +1503,16 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     int nmi, struct pt_regs *regs)
 {
 	struct perf_counter *counter;
-	unsigned long flags;
 
-	if (list_empty(&ctx->counter_list))
+	if (list_empty(&ctx->event_list))
 		return;
 
-	spin_lock_irqsave(&ctx->lock, flags);
-
-	/*
-	 * XXX: make counter_list RCU safe
-	 */
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
 		if (perf_swcounter_match(counter, event, regs))
 			perf_swcounter_add(counter, nr, nmi, regs);
 	}
-
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	rcu_read_unlock();
 }
 
 void perf_swcounter_event(enum hw_event_types event, u64 nr,
@@ -1846,6 +1852,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	mutex_init(&counter->mutex);
 	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
@@ -1992,6 +1999,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->event_list);
 	ctx->task = task;
 }
 
-- 
cgit v0.10.2


From 039fc91e064b81c2820ff16c304be5aba35fd126 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 16:43:47 +0100
Subject: perf_counter: fix hrtimer sampling

Impact: fix deadlock with perfstat

Fix for the perfstat fubar..

We cannot unconditionally call hrtimer_cancel() without ever having done
hrtimer_init() on the thing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <1236959027.22447.149.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8d6ecfa..d6cc222 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1571,9 +1571,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	int cpu = raw_smp_processor_id();
 
 	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
-		hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		hwc->hrtimer.function = perf_swcounter_hrtimer;
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(hwc->irq_period), 0,
 				HRTIMER_MODE_REL, 0);
@@ -1635,9 +1635,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 
 	atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0));
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
-		hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		hwc->hrtimer.function = perf_swcounter_hrtimer;
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(hwc->irq_period), 0,
 				HRTIMER_MODE_REL, 0);
-- 
cgit v0.10.2


From 4e193bd4dfdc983d12969b51439b4a1fbaf2daad Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Sat, 14 Mar 2009 14:29:25 +0100
Subject: perf_counter: include missing header

Impact: build fix

In order to compile a kernel with performance counter patches,
<asm/irq_regs.h> has to be included to provide the declaration of
struct pt_regs *get_irq_regs(void);

[ This bug was masked by unrelated x86 header file changes in the
  x86 tree, but occurs in the tip:perfcounters/core standalone
  tree. ]

Signed-off-by: Tim Blechmann <tim@klingt.org>
Orig-LKML-Reference: <20090314142925.49c29c17@thinkpad>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d6cc222..0018c5e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -24,6 +24,8 @@
 #include <linux/vmstat.h>
 #include <linux/rculist.h>
 
+#include <asm/irq_regs.h>
+
 /*
  * Each CPU has a list of per CPU counters:
  */
-- 
cgit v0.10.2


From 7bb497bd885eedd0f56dfe3cc1b5ff20710d33b9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 18 Mar 2009 08:59:21 +0100
Subject: perf_counter: fix crash on perfmon v1 systems

Impact: fix boot crash on Intel Perfmon Version 1 systems

Intel Perfmon v1 does not support the global MSRs, nor does
it offer the generalized MSR ranges. So support v2 and later
CPUs only.

Also mark pmc_ops as read-mostly - to avoid false cacheline
sharing.

Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 22dab06..6cba9d4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -57,12 +57,14 @@ struct pmc_x86_ops {
 	int		max_events;
 };
 
-static struct pmc_x86_ops *pmc_ops;
+static struct pmc_x86_ops *pmc_ops __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
 	.enabled = 1,
 };
 
+static __read_mostly int intel_perfmon_version;
+
 /*
  * Intel PerfMon v3. Used on Core2 and later.
  */
@@ -613,7 +615,7 @@ void perf_counter_print_debug(void)
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+	if (intel_perfmon_version >= 2) {
 		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
@@ -930,10 +932,10 @@ static struct pmc_x86_ops pmc_amd_ops = {
 
 static struct pmc_x86_ops *pmc_intel_init(void)
 {
+	union cpuid10_edx edx;
 	union cpuid10_eax eax;
-	unsigned int ebx;
 	unsigned int unused;
-	union cpuid10_edx edx;
+	unsigned int ebx;
 
 	/*
 	 * Check whether the Architectural PerfMon supports
@@ -943,8 +945,12 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
 		return NULL;
 
+	intel_perfmon_version = eax.split.version_id;
+	if (intel_perfmon_version < 2)
+		return NULL;
+
 	pr_info("Intel Performance Monitoring support detected.\n");
-	pr_info("... version:         %d\n", eax.split.version_id);
+	pr_info("... version:         %d\n", intel_perfmon_version);
 	pr_info("... bit width:       %d\n", eax.split.bit_width);
 	pr_info("... mask length:     %d\n", eax.split.mask_length);
 
-- 
cgit v0.10.2


From b6c5a71da1477d261bc36254fe1f20d32b57598d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 16 Mar 2009 21:00:00 +1100
Subject: perf_counter: abstract wakeup flag setting in core to fix powerpc
 build

Impact: build fix for powerpc

Commit bd753921015e7905 ("perf_counter: software counter event
infrastructure") introduced a use of TIF_PERF_COUNTERS into the core
perfcounter code.  This breaks the build on powerpc because we use
a flag in a per-cpu area to signal wakeups on powerpc rather than
a thread_info flag, because the thread_info flags have to be
manipulated with atomic operations and are thus slower than per-cpu
flags.

This fixes the by changing the core to use an abstracted
set_perf_counter_pending() function, which is defined on x86 to set
the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag
(paca->perf_counter_pending).  It changes the previous powerpc
definition of set_perf_counter_pending to not take an argument and
adds a clear_perf_counter_pending, so as to simplify the definition
on x86.

On x86, set_perf_counter_pending() is defined as a macro.  Defining
it as a static inline in arch/x86/include/asm/perf_counters.h causes
compile failures because <asm/perf_counters.h> gets included early in
<linux/sched.h>, and the definitions of set_tsk_thread_flag etc. are
therefore not available in <asm/perf_counters.h>.  (On powerpc this
problem is avoided by defining set_perf_counter_pending etc. in
<asm/hw_irq.h>.)

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b43076f..cb32d57 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -142,10 +142,17 @@ static inline unsigned long get_perf_counter_pending(void)
 	return x;
 }
 
-static inline void set_perf_counter_pending(int x)
+static inline void set_perf_counter_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
-		"r" (x),
+		"r" (1),
+		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+}
+
+static inline void clear_perf_counter_pending(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (0),
 		"i" (offsetof(struct paca_struct, perf_counter_pending)));
 }
 
@@ -158,7 +165,8 @@ static inline unsigned long get_perf_counter_pending(void)
 	return 0;
 }
 
-static inline void set_perf_counter_pending(int x) {}
+static inline void set_perf_counter_pending(void) {}
+static inline void clear_perf_counter_pending(void) {}
 static inline void perf_counter_do_pending(void) {}
 #endif /* CONFIG_PERF_COUNTERS */
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 0d2e37c..469e963 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,13 +104,6 @@ static inline notrace void set_soft_enabled(unsigned long enable)
 	: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
-#ifdef CONFIG_PERF_COUNTERS
-notrace void __weak perf_counter_do_pending(void)
-{
-	set_perf_counter_pending(0);
-}
-#endif
-
 notrace void raw_local_irq_restore(unsigned long en)
 {
 	/*
@@ -142,8 +135,10 @@ notrace void raw_local_irq_restore(unsigned long en)
 			iseries_handle_interrupts();
 	}
 
-	if (get_perf_counter_pending())
+	if (get_perf_counter_pending()) {
+		clear_perf_counter_pending();
 		perf_counter_do_pending();
+	}
 
 	/*
 	 * if (get_paca()->hard_enabled) return;
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0e33d27..5008762 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -653,7 +653,6 @@ void perf_counter_do_pending(void)
 	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
 	struct perf_counter *counter;
 
-	set_perf_counter_pending(0);
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
 		if (counter && counter->wakeup_pending) {
@@ -811,7 +810,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 			perf_counter_do_pending();
 			irq_exit();
 		} else {
-			set_perf_counter_pending(1);
+			set_perf_counter_pending();
 		}
 	}
 }
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 2e08ed7..1662043 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,6 +84,9 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
+#define set_perf_counter_pending()	\
+		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
 extern void perf_counters_lapic_init(int nmi);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0018c5e..b39456a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1433,7 +1433,7 @@ static void perf_swcounter_interrupt(struct perf_counter *counter,
 
 	if (nmi) {
 		counter->wakeup_pending = 1;
-		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+		set_perf_counter_pending();
 	} else
 		wake_up(&counter->waitq);
 }
-- 
cgit v0.10.2


From 01ef09d9ffb5ce9f8d62d1e5206da3d5ca612acc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:11 +0100
Subject: perf_counter: fix uninitialized usage of event_list

Impact: fix boot crash

When doing the generic context switch event I ran into some early
boot hangs, which were caused by inf func recursion (event, fault,
event, fault).

I eventually tracked it down to event_list not being initialized
at the time of the first event. Fix this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.195392657@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 219748d..ca226a9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -124,6 +124,8 @@ extern struct cred init_cred;
 # define INIT_PERF_COUNTERS(tsk)					\
 	.perf_counter_ctx.counter_list =				\
 		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
+	.perf_counter_ctx.event_list =					\
+		LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list),	\
 	.perf_counter_ctx.lock =					\
 		__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
 #else
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b39456a..4c4e9eb 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1506,7 +1506,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 {
 	struct perf_counter *counter;
 
-	if (list_empty(&ctx->event_list))
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
 		return;
 
 	rcu_read_lock();
-- 
cgit v0.10.2


From 4a0deca657f3dbb8a707b5dc8f173beec01e7ed2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:12 +0100
Subject: perf_counter: generic context switch event

Impact: cleanup

Use the generic software events for context switches.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.283522645@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75b2fc5..7ed41f7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -138,7 +138,6 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
-extern u64 cpu_nr_switches(int cpu);
 extern u64 cpu_nr_migrations(int cpu);
 
 extern unsigned long get_parent_ip(unsigned long addr);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4c4e9eb..99d5930 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -710,10 +710,13 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct pt_regs *regs;
 
 	if (likely(!cpuctx->task_ctx))
 		return;
 
+	regs = task_pt_regs(task);
+	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
 	__perf_counter_sched_out(ctx, cpuctx);
 
 	cpuctx->task_ctx = NULL;
@@ -1668,58 +1671,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 };
 
 /*
- * Software counter: context switches
- */
-
-static u64 get_context_switches(struct perf_counter *counter)
-{
-	struct task_struct *curr = counter->ctx->task;
-
-	if (curr)
-		return curr->nvcsw + curr->nivcsw;
-	return cpu_nr_switches(smp_processor_id());
-}
-
-static void context_switches_perf_counter_update(struct perf_counter *counter)
-{
-	u64 prev, now;
-	s64 delta;
-
-	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_context_switches(counter);
-
-	atomic64_set(&counter->hw.prev_count, now);
-
-	delta = now - prev;
-
-	atomic64_add(delta, &counter->count);
-}
-
-static void context_switches_perf_counter_read(struct perf_counter *counter)
-{
-	context_switches_perf_counter_update(counter);
-}
-
-static int context_switches_perf_counter_enable(struct perf_counter *counter)
-{
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count,
-			     get_context_switches(counter));
-	return 0;
-}
-
-static void context_switches_perf_counter_disable(struct perf_counter *counter)
-{
-	context_switches_perf_counter_update(counter);
-}
-
-static const struct hw_perf_counter_ops perf_ops_context_switches = {
-	.enable		= context_switches_perf_counter_enable,
-	.disable	= context_switches_perf_counter_disable,
-	.read		= context_switches_perf_counter_read,
-};
-
-/*
  * Software counter: cpu migrations
  */
 
@@ -1808,11 +1759,8 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
 	case PERF_COUNT_PAGE_FAULTS_MAJ:
-		hw_ops = &perf_ops_generic;
-		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		if (!counter->hw_event.exclude_kernel)
-			hw_ops = &perf_ops_context_switches;
+		hw_ops = &perf_ops_generic;
 		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
 		if (!counter->hw_event.exclude_kernel)
diff --git a/kernel/sched.c b/kernel/sched.c
index 39e7086..f76e3c0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2900,14 +2900,8 @@ unsigned long nr_active(void)
 
 /*
  * Externally visible per-cpu scheduler statistics:
- * cpu_nr_switches(cpu) - number of context switches on that cpu
  * cpu_nr_migrations(cpu) - number of migrations into that cpu
  */
-u64 cpu_nr_switches(int cpu)
-{
-	return cpu_rq(cpu)->nr_switches;
-}
-
 u64 cpu_nr_migrations(int cpu)
 {
 	return cpu_rq(cpu)->nr_migrations_in;
-- 
cgit v0.10.2


From f16009527595ee562308653bc3d0039166d2ab15 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:16 +0100
Subject: perf_counter: fix up counter free paths

Impact: fix crash during perfcounters use

I found another counter free path, create a free_counter() call to
accomodate generic tear-down.

Fixes an RCU bug.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.652078652@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 99d5930..97f891f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1150,6 +1150,11 @@ static void free_counter_rcu(struct rcu_head *head)
 	kfree(counter);
 }
 
+static void free_counter(struct perf_counter *counter)
+{
+	call_rcu(&counter->rcu_head, free_counter_rcu);
+}
+
 /*
  * Called when the last reference to the file is gone.
  */
@@ -1168,7 +1173,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
-	call_rcu(&counter->rcu_head, free_counter_rcu);
+	free_counter(counter);
 	put_context(ctx);
 
 	return 0;
@@ -2128,10 +2133,10 @@ __perf_counter_exit_task(struct task_struct *child,
 					 list_entry) {
 			if (sub->parent) {
 				sync_child_counter(sub, sub->parent);
-				kfree(sub);
+				free_counter(sub);
 			}
 		}
-		kfree(child_counter);
+		free_counter(child_counter);
 	}
 }
 
-- 
cgit v0.10.2


From e077df4f439681e43f0db8255b2d215b342ebdc6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:17 +0100
Subject: perf_counter: hook up the tracepoint events

Impact: new perfcounters feature

Enable usage of tracepoints as perf counter events.

tracepoint event ids can be found in /debug/tracing/event/*/*/id
and (for now) are represented as -65536+id in the type field.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.744044174@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 08c11a6..065984c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -53,6 +53,8 @@ enum hw_event_types {
 	PERF_COUNT_PAGE_FAULTS_MAJ	= -7,
 
 	PERF_SW_EVENTS_MIN		= -8,
+
+	PERF_TP_EVENTS_MIN		= -65536
 };
 
 /*
@@ -222,6 +224,7 @@ struct perf_counter {
 	struct perf_data		*usrdata;
 	struct perf_data		data[2];
 
+	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
 #endif
 };
diff --git a/init/Kconfig b/init/Kconfig
index 38a2ecd..4f64714 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -947,6 +947,11 @@ config PERF_COUNTERS
 
 	  Say Y if unsure.
 
+config EVENT_PROFILE
+	bool "Tracepoint profile sources"
+	depends on PERF_COUNTERS && EVENT_TRACER
+	default y
+
 endmenu
 
 config VM_EVENT_COUNTERS
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 97f891f..0bbe3e4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1152,6 +1152,9 @@ static void free_counter_rcu(struct rcu_head *head)
 
 static void free_counter(struct perf_counter *counter)
 {
+	if (counter->destroy)
+		counter->destroy(counter);
+
 	call_rcu(&counter->rcu_head, free_counter_rcu);
 }
 
@@ -1727,6 +1730,45 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
 	.read		= cpu_migrations_perf_counter_read,
 };
 
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tpcounter_event(int event_id)
+{
+	perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1,
+			task_pt_regs(current));
+}
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_counter_destroy(struct perf_counter *counter)
+{
+	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+
+	ftrace_profile_disable(event_id);
+}
+
+static const struct hw_perf_counter_ops *
+tp_perf_counter_init(struct perf_counter *counter)
+{
+	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+	int ret;
+
+	ret = ftrace_profile_enable(event_id);
+	if (ret)
+		return NULL;
+
+	counter->destroy = tp_perf_counter_destroy;
+
+	return &perf_ops_generic;
+}
+#else
+static const struct hw_perf_counter_ops *
+tp_perf_counter_init(struct perf_counter *counter)
+{
+	return NULL;
+}
+#endif
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -1772,6 +1814,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_cpu_migrations;
 		break;
 	default:
+		hw_ops = tp_perf_counter_init(counter);
 		break;
 	}
 
-- 
cgit v0.10.2


From b8e83514b64577b48bfb794fe85fcde40a9343ca Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:18 +0100
Subject: perf_counter: revamp syscall input ABI

Impact: modify ABI

The hardware/software classification in hw_event->type became a little
strained due to the addition of tracepoint tracing.

Instead split up the field and provide a type field to explicitly specify
the counter type, while using the event_id field to specify which event to
use.

Raw counters still work as before, only the raw config now goes into
raw_event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.836807573@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5008762..26f69dc 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 	if ((s64)counter->hw_event.irq_period < 0)
 		return NULL;
-	ev = counter->hw_event.type;
+	ev = counter->hw_event.event_id;
 	if (!counter->hw_event.raw) {
 		if (ev >= ppmu->n_generic ||
 		    ppmu->generic_events[ev] == 0)
@@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter)
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 		if (sub != counter)
 			sub->hw_ops->read(sub);
-		perf_store_irq_data(counter, sub->hw_event.type);
+		perf_store_irq_data(counter, sub->hw_event.event_config);
 		perf_store_irq_data(counter, atomic64_read(&sub->count));
 	}
 }
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6cba9d4..d844ae4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (hw_event->raw) {
-		hwc->config |= pmc_ops->raw_event(hw_event->type);
+	if (hw_event->raw_type) {
+		hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
 	} else {
-		if (hw_event->type >= pmc_ops->max_events)
+		if (hw_event->event_id >= pmc_ops->max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= pmc_ops->event_map(hw_event->type);
+		hwc->config |= pmc_ops->event_map(hw_event->event_id);
 	}
 	counter->wakeup_pending = 0;
 
@@ -715,7 +715,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
 
 		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-		perf_store_irq_data(sibling, counter->hw_event.type);
+		perf_store_irq_data(sibling, counter->hw_event.event_config);
 		perf_store_irq_data(sibling, atomic64_read(&counter->count));
 	}
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 065984c..8f93949 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -21,56 +21,81 @@
  */
 
 /*
- * Generalized performance counter event types, used by the hw_event.type
- * parameter of the sys_perf_counter_open() syscall:
+ * hw_event.type
  */
-enum hw_event_types {
+enum perf_event_types {
+	PERF_TYPE_HARDWARE		= 0,
+	PERF_TYPE_SOFTWARE		= 1,
+	PERF_TYPE_TRACEPOINT		= 2,
+
 	/*
-	 * Common hardware events, generalized by the kernel:
+	 * available TYPE space, raw is the max value.
 	 */
-	PERF_COUNT_CPU_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-	PERF_COUNT_BUS_CYCLES		=  6,
 
-	PERF_HW_EVENTS_MAX		=  7,
+	PERF_TYPE_RAW			= 128,
+};
 
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_ids {
 	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
+	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
-	PERF_COUNT_CPU_MIGRATIONS	= -5,
-	PERF_COUNT_PAGE_FAULTS_MIN	= -6,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= -7,
-
-	PERF_SW_EVENTS_MIN		= -8,
+	PERF_COUNT_CPU_CYCLES		= 0,
+	PERF_COUNT_INSTRUCTIONS		= 1,
+	PERF_COUNT_CACHE_REFERENCES	= 2,
+	PERF_COUNT_CACHE_MISSES		= 3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_BRANCH_MISSES	= 5,
+	PERF_COUNT_BUS_CYCLES		= 6,
+
+	PERF_HW_EVENTS_MAX		= 7,
+};
 
-	PERF_TP_EVENTS_MIN		= -65536
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum sw_event_ids {
+	PERF_COUNT_CPU_CLOCK		= 0,
+	PERF_COUNT_TASK_CLOCK		= 1,
+	PERF_COUNT_PAGE_FAULTS		= 2,
+	PERF_COUNT_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
+
+	PERF_SW_EVENTS_MAX		= 7,
 };
 
 /*
  * IRQ-notification data record type:
  */
 enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
+	PERF_RECORD_SIMPLE		= 0,
+	PERF_RECORD_IRQ			= 1,
+	PERF_RECORD_GROUP		= 2,
 };
 
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	__s64			type;
+	union {
+		struct {
+			__u64			event_id	: 56,
+						type		:  8;
+		};
+		struct {
+			__u64			raw_event_id	: 63,
+						raw_type	:  1;
+		};
+		__u64		event_config;
+	};
 
 	__u64			irq_period;
 	__u64			record_type;
@@ -78,7 +103,6 @@ struct perf_counter_hw_event {
 
 	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
-				raw	       :  1, /* raw event type        */
 				inherit	       :  1, /* children inherit it   */
 				pinned	       :  1, /* must always be on PMU */
 				exclusive      :  1, /* only group on PMU     */
@@ -87,7 +111,7 @@ struct perf_counter_hw_event {
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
 
-				__reserved_1   : 54;
+				__reserved_1   : 55;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !counter->hw_event.raw && counter->hw_event.type < 0;
+	return !counter->hw_event.raw_type &&
+		counter->hw_event.type != PERF_TYPE_HARDWARE;
 }
 
-extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *);
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
 
 #else
 static inline void
@@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
-static inline void perf_swcounter_event(enum hw_event_types event, u64 nr,
+static inline void perf_swcounter_event(u32 event, u64 nr,
 					int nmi, struct pt_regs *regs)	{ }
 #endif
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0bbe3e4..68a56a6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 	atomic64_set(&hwc->count, -left);
 }
 
-static void perf_swcounter_save_and_restart(struct perf_counter *counter)
-{
-	perf_swcounter_update(counter);
-	perf_swcounter_set_period(counter);
-}
-
 static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
 {
 	struct perf_data *irqdata = counter->irqdata;
@@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
 
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
 		counter->hw_ops->read(counter);
-		perf_swcounter_store_irq(sibling, counter->hw_event.type);
+		perf_swcounter_store_irq(sibling, counter->hw_event.event_config);
 		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
 	}
 }
@@ -1477,21 +1471,25 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 static void perf_swcounter_overflow(struct perf_counter *counter,
 				    int nmi, struct pt_regs *regs)
 {
-	perf_swcounter_save_and_restart(counter);
+	perf_swcounter_update(counter);
+	perf_swcounter_set_period(counter);
 	perf_swcounter_interrupt(counter, nmi, regs);
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-				enum hw_event_types event,
-				struct pt_regs *regs)
+				enum perf_event_types type,
+				u32 event, struct pt_regs *regs)
 {
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return 0;
 
-	if (counter->hw_event.raw)
+	if (counter->hw_event.raw_type)
+		return 0;
+
+	if (counter->hw_event.type != type)
 		return 0;
 
-	if (counter->hw_event.type != event)
+	if (counter->hw_event.event_id != event)
 		return 0;
 
 	if (counter->hw_event.exclude_user && user_mode(regs))
@@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum hw_event_types event, u64 nr,
-				     int nmi, struct pt_regs *regs)
+				     enum perf_event_types type, u32 event,
+				     u64 nr, int nmi, struct pt_regs *regs)
 {
 	struct perf_counter *counter;
 
@@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, event, regs))
+		if (perf_swcounter_match(counter, type, event, regs))
 			perf_swcounter_add(counter, nr, nmi, regs);
 	}
 	rcu_read_unlock();
 }
 
-void perf_swcounter_event(enum hw_event_types event, u64 nr,
-			  int nmi, struct pt_regs *regs)
+static void __perf_swcounter_event(enum perf_event_types type, u32 event,
+				   u64 nr, int nmi, struct pt_regs *regs)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 
-	perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs);
-	if (cpuctx->task_ctx)
-		perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs);
+	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
+	if (cpuctx->task_ctx) {
+		perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
+				nr, nmi, regs);
+	}
 
 	put_cpu_var(perf_cpu_context);
 }
 
+void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
+{
+	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
+}
+
 static void perf_swcounter_read(struct perf_counter *counter)
 {
 	perf_swcounter_update(counter);
@@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
 #ifdef CONFIG_EVENT_PROFILE
 void perf_tpcounter_event(int event_id)
 {
-	perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1,
-			task_pt_regs(current));
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
 }
 
 extern int ftrace_profile_enable(int);
@@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int);
 
 static void tp_perf_counter_destroy(struct perf_counter *counter)
 {
-	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
-
-	ftrace_profile_disable(event_id);
+	ftrace_profile_disable(counter->hw_event.event_id);
 }
 
 static const struct hw_perf_counter_ops *
 tp_perf_counter_init(struct perf_counter *counter)
 {
-	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+	int event_id = counter->hw_event.event_id;
 	int ret;
 
 	ret = ftrace_profile_enable(event_id);
@@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
+	counter->hw.irq_period = counter->hw_event.irq_period;
 
 	return &perf_ops_generic;
 }
@@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (counter->hw_event.type) {
+	switch (counter->hw_event.event_id) {
 	case PERF_COUNT_CPU_CLOCK:
 		hw_ops = &perf_ops_cpu_clock;
 
@@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter *counter)
 		if (!counter->hw_event.exclude_kernel)
 			hw_ops = &perf_ops_cpu_migrations;
 		break;
-	default:
-		hw_ops = tp_perf_counter_init(counter);
-		break;
 	}
 
 	if (hw_ops)
@@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		counter->state = PERF_COUNTER_STATE_OFF;
 
 	hw_ops = NULL;
-	if (!hw_event->raw && hw_event->type < 0)
-		hw_ops = sw_perf_counter_init(counter);
-	else
+
+	if (hw_event->raw_type)
+		hw_ops = hw_perf_counter_init(counter);
+	else switch (hw_event->type) {
+	case PERF_TYPE_HARDWARE:
 		hw_ops = hw_perf_counter_init(counter);
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		hw_ops = sw_perf_counter_init(counter);
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		hw_ops = tp_perf_counter_init(counter);
+		break;
+	}
 
 	if (!hw_ops) {
 		kfree(counter);
-- 
cgit v0.10.2


From 0322cd6ec504b0bf08ca7b2c3d7f43bda37d79c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:19 +0100
Subject: perf_counter: unify irq output code

Impact: cleanup

Having 3 slightly different copies of the same code around does nobody
any good. First step in revamping the output format.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.929962222@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 26f69dc..88b72eb 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -663,41 +663,6 @@ void perf_counter_do_pending(void)
 }
 
 /*
- * Record data for an irq counter.
- * This function was lifted from the x86 code; maybe it should
- * go in the core?
- */
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
-	struct perf_data *irqdata = counter->irqdata;
-
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-		*p = data;
-		irqdata->len += sizeof(u64);
-	}
-}
-
-/*
- * Record all the values of the counters in a group
- */
-static void perf_handle_group(struct perf_counter *counter)
-{
-	struct perf_counter *leader, *sub;
-
-	leader = counter->group_leader;
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-		if (sub != counter)
-			sub->hw_ops->read(sub);
-		perf_store_irq_data(counter, sub->hw_event.event_config);
-		perf_store_irq_data(counter, atomic64_read(&sub->count));
-	}
-}
-
-/*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * here so there is no possibility of being interrupted.
@@ -736,20 +701,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	/*
 	 * Finally record data if requested.
 	 */
-	if (record) {
-		switch (counter->hw_event.record_type) {
-		case PERF_RECORD_SIMPLE:
-			break;
-		case PERF_RECORD_IRQ:
-			perf_store_irq_data(counter, instruction_pointer(regs));
-			counter->wakeup_pending = 1;
-			break;
-		case PERF_RECORD_GROUP:
-			perf_handle_group(counter);
-			counter->wakeup_pending = 1;
-			break;
-		}
-	}
+	if (record)
+		perf_counter_output(counter, 1, regs);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d844ae4..902282d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -674,20 +674,6 @@ static void pmc_generic_disable(struct perf_counter *counter)
 	x86_perf_counter_update(counter, hwc, idx);
 }
 
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
-	struct perf_data *irqdata = counter->irqdata;
-
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-		*p = data;
-		irqdata->len += sizeof(u64);
-	}
-}
-
 /*
  * Save and restart an expired counter. Called by NMI contexts,
  * so it has to be careful about preempting normal counter ops:
@@ -704,22 +690,6 @@ static void perf_save_and_restart(struct perf_counter *counter)
 		__pmc_generic_enable(counter, hwc, idx);
 }
 
-static void
-perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
-{
-	struct perf_counter *counter, *group_leader = sibling->group_leader;
-
-	/*
-	 * Store sibling timestamps (if any):
-	 */
-	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-
-		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-		perf_store_irq_data(sibling, counter->hw_event.event_config);
-		perf_store_irq_data(sibling, atomic64_read(&counter->count));
-	}
-}
-
 /*
  * Maximum interrupt frequency of 100KHz per CPU
  */
@@ -754,28 +724,7 @@ again:
 			continue;
 
 		perf_save_and_restart(counter);
-
-		switch (counter->hw_event.record_type) {
-		case PERF_RECORD_SIMPLE:
-			continue;
-		case PERF_RECORD_IRQ:
-			perf_store_irq_data(counter, instruction_pointer(regs));
-			break;
-		case PERF_RECORD_GROUP:
-			perf_handle_group(counter, &status, &ack);
-			break;
-		}
-		/*
-		 * From NMI context we cannot call into the scheduler to
-		 * do a task wakeup - but we mark these generic as
-		 * wakeup_pending and initate a wakeup callback:
-		 */
-		if (nmi) {
-			counter->wakeup_pending = 1;
-			set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
-		} else {
-			wake_up(&counter->waitq);
-		}
+		perf_counter_output(counter, nmi, regs);
 	}
 
 	hw_perf_ack_status(ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8f93949..a4b76c0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -317,6 +317,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu);
 
+extern void perf_counter_output(struct perf_counter *counter,
+				int nmi, struct pt_regs *regs);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 68a56a6..f054b8c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1354,6 +1354,60 @@ static const struct file_operations perf_fops = {
 };
 
 /*
+ * Output
+ */
+
+static void perf_counter_store_irq(struct perf_counter *counter, u64 data)
+{
+	struct perf_data *irqdata = counter->irqdata;
+
+	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+		irqdata->overrun++;
+	} else {
+		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+		*p = data;
+		irqdata->len += sizeof(u64);
+	}
+}
+
+static void perf_counter_handle_group(struct perf_counter *counter)
+{
+	struct perf_counter *leader, *sub;
+
+	leader = counter->group_leader;
+	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		if (sub != counter)
+			sub->hw_ops->read(sub);
+		perf_counter_store_irq(counter, sub->hw_event.event_config);
+		perf_counter_store_irq(counter, atomic64_read(&sub->count));
+	}
+}
+
+void perf_counter_output(struct perf_counter *counter,
+			 int nmi, struct pt_regs *regs)
+{
+	switch (counter->hw_event.record_type) {
+	case PERF_RECORD_SIMPLE:
+		return;
+
+	case PERF_RECORD_IRQ:
+		perf_counter_store_irq(counter, instruction_pointer(regs));
+		break;
+
+	case PERF_RECORD_GROUP:
+		perf_counter_handle_group(counter);
+		break;
+	}
+
+	if (nmi) {
+		counter->wakeup_pending = 1;
+		set_perf_counter_pending();
+	} else
+		wake_up(&counter->waitq);
+}
+
+/*
  * Generic software counter infrastructure
  */
 
@@ -1395,54 +1449,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 	atomic64_set(&hwc->count, -left);
 }
 
-static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
-{
-	struct perf_data *irqdata = counter->irqdata;
-
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-		*p = data;
-		irqdata->len += sizeof(u64);
-	}
-}
-
-static void perf_swcounter_handle_group(struct perf_counter *sibling)
-{
-	struct perf_counter *counter, *group_leader = sibling->group_leader;
-
-	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		counter->hw_ops->read(counter);
-		perf_swcounter_store_irq(sibling, counter->hw_event.event_config);
-		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
-	}
-}
-
-static void perf_swcounter_interrupt(struct perf_counter *counter,
-				     int nmi, struct pt_regs *regs)
-{
-	switch (counter->hw_event.record_type) {
-	case PERF_RECORD_SIMPLE:
-		break;
-
-	case PERF_RECORD_IRQ:
-		perf_swcounter_store_irq(counter, instruction_pointer(regs));
-		break;
-
-	case PERF_RECORD_GROUP:
-		perf_swcounter_handle_group(counter);
-		break;
-	}
-
-	if (nmi) {
-		counter->wakeup_pending = 1;
-		set_perf_counter_pending();
-	} else
-		wake_up(&counter->waitq);
-}
-
 static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
 	struct perf_counter *counter;
@@ -1461,7 +1467,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 		regs = task_pt_regs(current);
 
 	if (regs)
-		perf_swcounter_interrupt(counter, 0, regs);
+		perf_counter_output(counter, 0, regs);
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
 
@@ -1473,7 +1479,7 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 {
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	perf_swcounter_interrupt(counter, nmi, regs);
+	perf_counter_output(counter, nmi, regs);
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-- 
cgit v0.10.2


From db4fb5acf20295063d1d5105e67724eb51440207 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 19 Mar 2009 20:26:20 +0100
Subject: perf_counter: powerpc: clean up perc_counter_interrupt

Impact: cleanup

This updates the powerpc perf_counter_interrupt following on from the
"perf_counter: unify irq output code" patch.  Since we now use the
generic perf_counter_output code, which sets the perf_counter_pending
flag directly, we no longer need the need_wakeup variable.

This removes need_wakeup and makes perf_counter_interrupt use
get_perf_counter_pending() instead.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194234.024464535@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 88b72eb..830ca9c 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -723,8 +723,6 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 			/* counter has overflowed */
 			found = 1;
 			record_and_restart(counter, val, regs);
-			if (counter->wakeup_pending)
-				need_wakeup = 1;
 		}
 	}
 
@@ -754,17 +752,14 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	/*
 	 * If we need a wakeup, check whether interrupts were soft-enabled
 	 * when we took the interrupt.  If they were, we can wake stuff up
-	 * immediately; otherwise we'll have to set a flag and do the
-	 * wakeup when interrupts get soft-enabled.
+	 * immediately; otherwise we'll have do the wakeup when interrupts
+	 * get soft-enabled.
 	 */
-	if (need_wakeup) {
-		if (regs->softe) {
-			irq_enter();
-			perf_counter_do_pending();
-			irq_exit();
-		} else {
-			set_perf_counter_pending();
-		}
+	if (get_perf_counter_pending() && regs->softe) {
+		irq_enter();
+		clear_perf_counter_pending();
+		perf_counter_do_pending();
+		irq_exit();
 	}
 }
 
-- 
cgit v0.10.2


From 9aaa131a279834dff75c290c91f0058f62d72d46 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 21 Mar 2009 15:31:47 +1100
Subject: perf_counter: fix type/event_id layout on big-endian systems

Impact: build fix for powerpc

Commit db3a944aca35ae61 ("perf_counter: revamp syscall input ABI")
expanded the hw_event.type field into a union of structs containing
bitfields.  In particular it introduced a type field and a raw_type
field, with the intention that the 1-bit raw_type field should
overlay the most-significant bit of the 8-bit type field, and in fact
perf_counter_alloc() now assumes that (or at least, assumes that
raw_type doesn't overlay any of the bits that are 1 in the values of
PERF_TYPE_{HARDWARE,SOFTWARE,TRACEPOINT}).

Unfortunately this is not true on big-endian systems such as PowerPC,
where bitfields are laid out from left to right, i.e. from most
significant bit to least significant.  This means that setting
hw_event.type = PERF_TYPE_SOFTWARE will set hw_event.raw_type to 1.

This fixes it by making the layout depend on whether or not
__BIG_ENDIAN_BITFIELD is defined.  It's a bit ugly, but that's what
we get for using bitfields in a user/kernel ABI.

Also, that commit didn't fix up some places in arch/powerpc/kernel/
perf_counter.c where hw_event.raw and hw_event.event_id were used.
This fixes them too.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 830ca9c..6413d9c 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,12 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 	if ((s64)counter->hw_event.irq_period < 0)
 		return NULL;
-	ev = counter->hw_event.event_id;
-	if (!counter->hw_event.raw) {
-		if (ev >= ppmu->n_generic ||
-		    ppmu->generic_events[ev] == 0)
+	if (!counter->hw_event.raw_type) {
+		ev = counter->hw_event.event_id;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return NULL;
 		ev = ppmu->generic_events[ev];
+	} else {
+		ev = counter->hw_event.raw_event_id;
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a4b76c0..98f5990 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/ioctl.h>
+#include <asm/byteorder.h>
 
 /*
  * User-space ABI bits:
@@ -86,6 +87,7 @@ enum perf_counter_record_type {
  */
 struct perf_counter_hw_event {
 	union {
+#ifndef __BIG_ENDIAN_BITFIELD
 		struct {
 			__u64			event_id	: 56,
 						type		:  8;
@@ -94,6 +96,16 @@ struct perf_counter_hw_event {
 			__u64			raw_event_id	: 63,
 						raw_type	:  1;
 		};
+#else
+		struct {
+			__u64			type		:  8,
+						event_id	: 56;
+		};
+		struct {
+			__u64			raw_type	:  1,
+						raw_event_id	: 63;
+		};
+#endif /* __BIT_ENDIAN_BITFIELD */
 		__u64		event_config;
 	};
 
-- 
cgit v0.10.2


From 6f9f791eb53b56097cd311a1a5517a8e89bdaf35 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 23 Mar 2009 21:26:52 +0100
Subject: perf_counter: create Documentation/perf_counter/ and move
 perfcounters.txt there

We'll have more files in that directory, prepare for that.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt
deleted file mode 100644
index fddd321..0000000
--- a/Documentation/perf-counters.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-
-Performance Counters for Linux
-------------------------------
-
-Performance counters are special hardware registers available on most modern
-CPUs. These registers count the number of certain types of hw events: such
-as instructions executed, cachemisses suffered, or branches mis-predicted -
-without slowing down the kernel or applications. These registers can also
-trigger interrupts when a threshold number of events have passed - and can
-thus be used to profile the code that runs on that CPU.
-
-The Linux Performance Counter subsystem provides an abstraction of these
-hardware capabilities. It provides per task and per CPU counters, counter
-groups, and it provides event capabilities on top of those.
-
-Performance counters are accessed via special file descriptors.
-There's one file descriptor per virtual counter used.
-
-The special file descriptor is opened via the perf_counter_open()
-system call:
-
-   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
-			     pid_t pid, int cpu, int group_fd);
-
-The syscall returns the new fd. The fd can be used via the normal
-VFS system calls: read() can be used to read the counter, fcntl()
-can be used to set the blocking mode, etc.
-
-Multiple counters can be kept open at a time, and the counters
-can be poll()ed.
-
-When creating a new counter fd, 'perf_counter_hw_event' is:
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_hw_event {
-	s64			type;
-
-	u64			irq_period;
-	u32			record_type;
-
-	u32			disabled     :  1, /* off by default */
-				nmi	     :  1, /* NMI sampling   */
-				raw	     :  1, /* raw event type */
-				__reserved_1 : 29;
-
-	u64			__reserved_2;
-};
-
-/*
- * Generalized performance counter event types, used by the hw_event.type
- * parameter of the sys_perf_counter_open() syscall:
- */
-enum hw_event_types {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-
-	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
-	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	/*
-	 * Future software events:
-	 */
-	/* PERF_COUNT_PAGE_FAULTS	= -3,
-	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
-};
-
-These are standardized types of events that work uniformly on all CPUs
-that implements Performance Counters support under Linux. If a CPU is
-not able to count branch-misses, then the system call will return
--EINVAL.
-
-More hw_event_types are supported as well, but they are CPU
-specific and are enumerated via /sys on a per CPU basis. Raw hw event
-types can be passed in under hw_event.type if hw_event.raw is 1.
-For example, to count "External bus cycles while bus lock signal asserted"
-events on Intel Core CPUs, pass in a 0x4064 event type value and set
-hw_event.raw to 1.
-
-'record_type' is the type of data that a read() will provide for the
-counter, and it can be one of:
-
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
-};
-
-a "simple" counter is one that counts hardware events and allows
-them to be read out into a u64 count value. (read() returns 8 on
-a successful read of a simple counter.)
-
-An "irq" counter is one that will also provide an IRQ context information:
-the IP of the interrupted context. In this case read() will return
-the 8-byte counter value, plus the Instruction Pointer address of the
-interrupted context.
-
-The parameter 'hw_event_period' is the number of events before waking up
-a read() that is blocked on a counter fd. Zero value means a non-blocking
-counter.
-
-The 'pid' parameter allows the counter to be specific to a task:
-
- pid == 0: if the pid parameter is zero, the counter is attached to the
- current task.
-
- pid > 0: the counter is attached to a specific task (if the current task
- has sufficient privilege to do so)
-
- pid < 0: all tasks are counted (per cpu counters)
-
-The 'cpu' parameter allows a counter to be made specific to a full
-CPU:
-
- cpu >= 0: the counter is restricted to a specific CPU
- cpu == -1: the counter counts on all CPUs
-
-(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
-
-A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
-events of that task and 'follows' that task to whatever CPU the task
-gets schedule to. Per task counters can be created by any user, for
-their own tasks.
-
-A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
-all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
-
-Group counters are created by passing in a group_fd of another counter.
-Groups are scheduled at once and can be used with PERF_RECORD_GROUP
-to record multi-dimensional timestamps.
-
diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt
new file mode 100644
index 0000000..fddd321
--- /dev/null
+++ b/Documentation/perf_counter/design.txt
@@ -0,0 +1,147 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the perf_counter_open()
+system call:
+
+   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+			     pid_t pid, int cpu, int group_fd);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'perf_counter_hw_event' is:
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	s64			type;
+
+	u64			irq_period;
+	u32			record_type;
+
+	u32			disabled     :  1, /* off by default */
+				nmi	     :  1, /* NMI sampling   */
+				raw	     :  1, /* raw event type */
+				__reserved_1 : 29;
+
+	u64			__reserved_2;
+};
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	/*
+	 * Future software events:
+	 */
+	/* PERF_COUNT_PAGE_FAULTS	= -3,
+	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
+};
+
+These are standardized types of events that work uniformly on all CPUs
+that implements Performance Counters support under Linux. If a CPU is
+not able to count branch-misses, then the system call will return
+-EINVAL.
+
+More hw_event_types are supported as well, but they are CPU
+specific and are enumerated via /sys on a per CPU basis. Raw hw event
+types can be passed in under hw_event.type if hw_event.raw is 1.
+For example, to count "External bus cycles while bus lock signal asserted"
+events on Intel Core CPUs, pass in a 0x4064 event type value and set
+hw_event.raw to 1.
+
+'record_type' is the type of data that a read() will provide for the
+counter, and it can be one of:
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
+};
+
+a "simple" counter is one that counts hardware events and allows
+them to be read out into a u64 count value. (read() returns 8 on
+a successful read of a simple counter.)
+
+An "irq" counter is one that will also provide an IRQ context information:
+the IP of the interrupted context. In this case read() will return
+the 8-byte counter value, plus the Instruction Pointer address of the
+interrupted context.
+
+The parameter 'hw_event_period' is the number of events before waking up
+a read() that is blocked on a counter fd. Zero value means a non-blocking
+counter.
+
+The 'pid' parameter allows the counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a full
+CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
+Group counters are created by passing in a group_fd of another counter.
+Groups are scheduled at once and can be used with PERF_RECORD_GROUP
+to record multi-dimensional timestamps.
+
-- 
cgit v0.10.2


From e0143bad9dbf2a8fad4c5430562bceba196b66ea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 23 Mar 2009 21:29:59 +0100
Subject: perf_counter: add sample user-space to Documentation/perf_counter/

Initial version of kerneltop.c and perfstat.c.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
new file mode 100644
index 0000000..b457497
--- /dev/null
+++ b/Documentation/perf_counter/Makefile
@@ -0,0 +1,12 @@
+BINS = kerneltop perfstat
+
+all: $(BINS)
+
+kerneltop: kerneltop.c perfcounters.h
+	cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $<
+
+perfstat: kerneltop
+	ln -sf kerneltop perfstat
+
+clean:
+	rm $(BINS)
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
new file mode 100644
index 0000000..cf0e30b
--- /dev/null
+++ b/Documentation/perf_counter/kerneltop.c
@@ -0,0 +1,956 @@
+/*
+ * kerneltop.c: show top kernel functions - performance counters showcase
+
+   Build with:
+
+     cc -O6 -Wall `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c
+
+   Sample output:
+
+------------------------------------------------------------------------------
+ KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
+------------------------------------------------------------------------------
+
+             weight         RIP          kernel function
+             ______   ________________   _______________
+
+              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
+              33.00 - ffffffff804cb740 : sock_alloc_send_skb
+              31.26 - ffffffff804ce808 : skb_push
+              22.43 - ffffffff80510004 : tcp_established_options
+              19.00 - ffffffff8027d250 : find_get_page
+              15.76 - ffffffff804e4fc9 : eth_type_trans
+              15.20 - ffffffff804d8baa : dst_release
+              14.86 - ffffffff804cf5d8 : skb_release_head_state
+              14.00 - ffffffff802217d5 : read_hpet
+              12.00 - ffffffff804ffb7f : __ip_local_out
+              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
+               8.54 - ffffffff805001a3 : ip_queue_xmit
+
+  Started by Ingo Molnar <mingo@redhat.com>
+
+  Improvements and fixes by:
+
+    Arjan van de Ven <arjan@linux.intel.com>
+    Yanmin Zhang <yanmin.zhang@intel.com>
+    Mike Galbraith <efault@gmx.de>
+
+  Released under the GPL v2. (and only v2, not any later version)
+
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+
+#include <glib.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+
+#include <linux/unistd.h>
+
+#ifdef __x86_64__
+# define __NR_perf_counter_open	295
+#endif
+
+#ifdef __i386__
+# define __NR_perf_counter_open 333
+#endif
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+typedef unsigned int		__u32;
+typedef unsigned long long	__u64;
+typedef long long		__s64;
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_CPU_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+	PERF_COUNT_BUS_CYCLES		=  6,
+
+	PERF_HW_EVENTS_MAX		=  7,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	PERF_COUNT_CPU_MIGRATIONS	= -5,
+
+	PERF_SW_EVENTS_MIN		= -6,
+};
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
+};
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	__s64			type;
+
+	__u64			irq_period;
+	__u64			record_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				nmi	       :  1, /* NMI sampling          */
+				raw	       :  1, /* raw event type        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+
+				__reserved_1   : 54;
+
+	__u32			extra_config_len;
+	__u32			__reserved_4;
+
+	__u64			__reserved_2;
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd,
+	unsigned long			flags)
+{
+	int ret;
+
+	ret = syscall(
+		__NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
+#if defined(__x86_64__) || defined(__i386__)
+	if (ret < 0 && ret > -4096) {
+		errno = -ret;
+		ret = -1;
+	}
+#endif
+	return ret;
+}
+
+const char *event_types [] = {
+	"CPU cycles",
+	"instructions",
+	"cache-refs",
+	"cache-misses",
+	"branches",
+	"branch-misses",
+	"bus cycles"
+};
+
+const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE           31
+#define PR_TASK_PERF_COUNTERS_ENABLE            32
+
+#define MAX_COUNTERS		8
+
+static int			nr_counters			= -1;
+
+static __u64			count_filter		       = 100;
+
+#define MAX_NR_CPUS		256
+
+static int			event_count[MAX_COUNTERS];
+static unsigned long		event_id[MAX_COUNTERS];
+static int			event_raw[MAX_COUNTERS];
+
+static int			tid				= -1;
+static int			profile_cpu			= -1;
+static int			nr_cpus				=  0;
+static int			nmi				=  1;
+static int			group				=  0;
+
+static char			*vmlinux;
+
+static char			*sym_filter;
+static unsigned long		filter_start;
+static unsigned long		filter_end;
+
+static int			delay_secs			=  2;
+static int			zero;
+static int			dump_symtab;
+
+struct source_line {
+	uint64_t		EIP;
+	unsigned long		count;
+	char			*line;
+};
+
+static GList			*lines;
+
+static void display_help(void)
+{
+	printf(
+	"Usage: kerneltop [<options>]\n\n"
+	"KernelTop Options (up to %d event types can be specified at once):\n\n",
+		 MAX_COUNTERS);
+	printf(
+	" -e EID    --event_id=EID     # event type ID                    [default:  0]\n"
+	"                                   0: CPU cycles\n"
+	"                                   1: instructions\n"
+	"                                   2: cache accesses\n"
+	"                                   3: cache misses\n"
+	"                                   4: branch instructions\n"
+	"                                   5: branch prediction misses\n"
+	"                                   6: bus cycles\n\n"
+	"                                rNNN: raw PMU events (eventsel+umask)\n\n"
+	" -c CNT    --count=CNT        # event period to sample\n\n"
+	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
+	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
+	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
+	" -f CNT --filter=CNT          # min-event-count filter          [default: 100]\n\n"
+	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
+	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use:\n"
+	" -z        --zero             # zero counts after display\n"
+	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
+	"\n");
+
+	exit(0);
+}
+
+static void process_options(int argc, char *argv[])
+{
+	int error = 0, counter;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"count",	required_argument,	NULL, 'c'},
+			{"cpu",		required_argument,	NULL, 'C'},
+			{"delay",	required_argument,	NULL, 'd'},
+			{"dump_symtab",	no_argument,		NULL, 'D'},
+			{"event_id",	required_argument,	NULL, 'e'},
+			{"filter",	required_argument,	NULL, 'f'},
+			{"group",	required_argument,	NULL, 'g'},
+			{"help",	no_argument,		NULL, 'h'},
+			{"nmi",		required_argument,	NULL, 'n'},
+			{"pid",		required_argument,	NULL, 'p'},
+			{"vmlinux",	required_argument,	NULL, 'x'},
+			{"symbol",	required_argument,	NULL, 's'},
+			{"zero",	no_argument,		NULL, 'z'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "c:C:d:De:f:g:hn:p:s:x:z",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'c':
+			if (nr_counters == -1)
+				nr_counters = 0;
+			event_count[nr_counters]	=   atoi(optarg); break;
+		case 'C':
+			/* CPU and PID are mutually exclusive */
+			if (tid != -1) {
+				printf("WARNING: CPU switch overriding PID\n");
+				sleep(1);
+				tid = -1;
+			}
+			profile_cpu			=   atoi(optarg); break;
+		case 'd': delay_secs			=   atoi(optarg); break;
+		case 'D': dump_symtab			=              1; break;
+
+		case 'e':
+			nr_counters++;
+			if (nr_counters == MAX_COUNTERS) {
+				error = 1;
+				break;
+			}
+			if (*optarg == 'r') {
+				event_raw[nr_counters] = 1;
+				++optarg;
+			}
+			event_id[nr_counters] = strtol(optarg, NULL, 16);
+			break;
+
+		case 'f': count_filter			=   atoi(optarg); break;
+		case 'g': group				=   atoi(optarg); break;
+		case 'h':      				  display_help(); break;
+		case 'n': nmi				=   atoi(optarg); break;
+		case 'p':
+			/* CPU and PID are mutually exclusive */
+			if (profile_cpu != -1) {
+				printf("WARNING: PID switch overriding CPU\n");
+				sleep(1);
+				profile_cpu = -1;
+			}
+			tid				=   atoi(optarg); break;
+		case 's': sym_filter			= strdup(optarg); break;
+		case 'x': vmlinux			= strdup(optarg); break;
+		case 'z': zero				=              1; break;
+		default: error = 1; break;
+		}
+	}
+	if (error)
+		display_help();
+
+	nr_counters++;
+	if (nr_counters < 1)
+		nr_counters = 1;
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		if (event_id[counter] < PERF_HW_EVENTS_MAX)
+			event_count[counter] = default_count[event_id[counter]];
+		else
+			event_count[counter] = 100000;
+	}
+}
+
+static uint64_t			min_ip;
+static uint64_t			max_ip = -1ll;
+
+struct sym_entry {
+	unsigned long long	addr;
+	char			*sym;
+	unsigned long		count[MAX_COUNTERS];
+	int			skip;
+	GList			*source;
+};
+
+#define MAX_SYMS		100000
+
+static int sym_table_count;
+
+struct sym_entry		*sym_filter_entry;
+
+static struct sym_entry		sym_table[MAX_SYMS];
+
+static void show_details(struct sym_entry *sym);
+
+/*
+ * Ordering weight: count-1 * count-1 * ... / count-n
+ */
+static double sym_weight(const struct sym_entry *sym)
+{
+	double weight;
+	int counter;
+
+	weight = sym->count[0];
+
+	for (counter = 1; counter < nr_counters-1; counter++)
+		weight *= sym->count[counter];
+
+	weight /= (sym->count[counter] + 1);
+
+	return weight;
+}
+
+static int compare(const void *__sym1, const void *__sym2)
+{
+	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
+
+	return sym_weight(sym1) < sym_weight(sym2);
+}
+
+static time_t			last_refresh;
+static long			events;
+static long			userspace_events;
+static const char		CONSOLE_CLEAR[] = "[H[2J";
+
+static struct sym_entry		tmp[MAX_SYMS];
+
+static void print_sym_table(void)
+{
+	int i, printed;
+	int counter;
+	float events_per_sec = events/delay_secs;
+	float kevents_per_sec = (events-userspace_events)/delay_secs;
+
+	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
+	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
+
+	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
+
+	printf(
+"------------------------------------------------------------------------------\n");
+	printf( " KernelTop:%8.0f irqs/sec  kernel:%3.1f%% [%s, ",
+		events_per_sec,
+		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
+		nmi ? "NMI" : "IRQ");
+
+	if (nr_counters == 1)
+		printf("%d ", event_count[0]);
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (counter)
+			printf("/");
+
+		if (event_id[counter] < PERF_HW_EVENTS_MAX)
+			printf( "%s", event_types[event_id[counter]]);
+		else
+			printf( "raw:%04lx", event_id[counter]);
+	}
+
+	printf( "], ");
+
+	if (tid != -1)
+		printf(" (tid: %d", tid);
+	else
+		printf(" (all");
+
+	if (profile_cpu != -1)
+		printf(", cpu: %d)\n", profile_cpu);
+	else {
+		if (tid != -1)
+			printf(")\n");
+		else
+			printf(", %d CPUs)\n", nr_cpus);
+	}
+
+	printf("------------------------------------------------------------------------------\n\n");
+
+	if (nr_counters == 1)
+		printf("             events");
+	else
+		printf("  weight     events");
+
+	printf("         RIP          kernel function\n"
+	       	       "  ______     ______   ________________   _______________\n\n"
+	);
+
+	printed = 0;
+	for (i = 0; i < sym_table_count; i++) {
+		int count;
+
+		if (nr_counters == 1) {
+			if (printed <= 18 &&
+					tmp[i].count[0] >= count_filter) {
+				printf("%19.2f - %016llx : %s\n",
+				  sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
+				printed++;
+			}
+		} else {
+			if (printed <= 18 &&
+					tmp[i].count[0] >= count_filter) {
+				printf("%8.1f %10ld - %016llx : %s\n",
+				  sym_weight(tmp + i),
+				  tmp[i].count[0],
+				  tmp[i].addr, tmp[i].sym);
+				printed++;
+			}
+		}
+		/*
+		 * Add decay to the counts:
+		 */
+		for (count = 0; count < nr_counters; count++)
+			sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
+	}
+
+	if (sym_filter_entry)
+		show_details(sym_filter_entry);
+
+	last_refresh = time(NULL);
+
+	{
+		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+
+		if (poll(&stdin_poll, 1, 0) == 1) {
+			printf("key pressed - exiting.\n");
+			exit(0);
+		}
+	}
+}
+
+static int read_symbol(FILE *in, struct sym_entry *s)
+{
+	static int filter_match = 0;
+	char *sym, stype;
+	char str[500];
+	int rc, pos;
+
+	rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
+	if (rc == EOF)
+		return -1;
+
+	assert(rc == 3);
+
+	/* skip until end of line: */
+	pos = strlen(str);
+	do {
+		rc = fgetc(in);
+		if (rc == '\n' || rc == EOF || pos >= 499)
+			break;
+		str[pos] = rc;
+		pos++;
+	} while (1);
+	str[pos] = 0;
+
+	sym = str;
+
+	/* Filter out known duplicates and non-text symbols. */
+	if (!strcmp(sym, "_text"))
+		return 1;
+	if (!min_ip && !strcmp(sym, "_stext"))
+		return 1;
+	if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
+		return 1;
+	if (stype != 'T' && stype != 't')
+		return 1;
+	if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
+		return 1;
+	if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
+		return 1;
+
+	s->sym = malloc(strlen(str));
+	assert(s->sym);
+
+	strcpy((char *)s->sym, str);
+	s->skip = 0;
+
+	/* Tag events to be skipped. */
+	if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
+		s->skip = 1;
+	if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+		s->skip = 1;
+
+	if (filter_match == 1) {
+		filter_end = s->addr;
+		filter_match = -1;
+		if (filter_end - filter_start > 10000) {
+			printf("hm, too large filter symbol <%s> - skipping.\n",
+				sym_filter);
+			printf("symbol filter start: %016lx\n", filter_start);
+			printf("                end: %016lx\n", filter_end);
+			filter_end = filter_start = 0;
+			sym_filter = NULL;
+			sleep(1);
+		}
+	}
+	if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
+		filter_match = 1;
+		filter_start = s->addr;
+	}
+
+	return 0;
+}
+
+int compare_addr(const void *__sym1, const void *__sym2)
+{
+	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
+
+	return sym1->addr > sym2->addr;
+}
+
+static void sort_symbol_table(void)
+{
+	int i, dups;
+
+	do {
+		qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
+		for (i = 0, dups = 0; i < sym_table_count; i++) {
+			if (sym_table[i].addr == sym_table[i+1].addr) {
+				sym_table[i+1].addr = -1ll;
+				dups++;
+			}
+		}
+		sym_table_count -= dups;
+	} while(dups);
+}
+
+static void parse_symbols(void)
+{
+	struct sym_entry *last;
+
+	FILE *kallsyms = fopen("/proc/kallsyms", "r");
+
+	if (!kallsyms) {
+		printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
+		exit(-1);
+	}
+
+	while (!feof(kallsyms)) {
+		if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
+			sym_table_count++;
+			assert(sym_table_count <= MAX_SYMS);
+		}
+	}
+
+	sort_symbol_table();
+	min_ip = sym_table[0].addr;
+	max_ip = sym_table[sym_table_count-1].addr;
+	last = sym_table + sym_table_count++;
+
+	last->addr = -1ll;
+	last->sym = "<end>";
+
+	if (filter_end) {
+		int count;
+		for (count=0; count < sym_table_count; count ++) {
+			if (!strcmp(sym_table[count].sym, sym_filter)) {
+				sym_filter_entry = &sym_table[count];
+				break;
+			}
+		}
+	}
+	if (dump_symtab) {
+		int i;
+
+		for (i = 0; i < sym_table_count; i++)
+			fprintf(stderr, "%llx %s\n",
+				sym_table[i].addr, sym_table[i].sym);
+	}
+}
+
+
+static void parse_vmlinux(char *filename)
+{
+	FILE *file;
+	char command[PATH_MAX*2];
+	if (!filename)
+		return;
+
+	sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
+
+	file = popen(command, "r");
+	if (!file)
+		return;
+
+	while (!feof(file)) {
+		struct source_line *src;
+		size_t dummy = 0;
+		char *c;
+
+		src = malloc(sizeof(struct source_line));
+		assert(src != NULL);	
+		memset(src, 0, sizeof(struct source_line));
+
+		if (getline(&src->line, &dummy, file) < 0)
+			break;
+		if (!src->line)
+			break;
+
+		c = strchr(src->line, '\n');
+		if (c)
+			*c = 0;
+
+		lines = g_list_prepend(lines, src);
+
+		if (strlen(src->line)>8 && src->line[8] == ':')
+			src->EIP = strtoull(src->line, NULL, 16);
+		if (strlen(src->line)>8 && src->line[16] == ':')
+			src->EIP = strtoull(src->line, NULL, 16);
+	}
+	pclose(file);
+	lines = g_list_reverse(lines);
+}
+
+static void record_precise_ip(uint64_t ip)
+{
+	struct source_line *line;
+	GList *item;
+
+	item = g_list_first(lines);
+	while (item) {
+		line = item->data;
+		if (line->EIP == ip)
+			line->count++;
+		if (line->EIP > ip)
+			break;
+		item = g_list_next(item);
+	}
+}
+
+static void lookup_sym_in_vmlinux(struct sym_entry *sym)
+{
+	struct source_line *line;
+	GList *item;
+	char pattern[PATH_MAX];
+	sprintf(pattern, "<%s>:", sym->sym);
+
+	item = g_list_first(lines);
+	while (item) {
+		line = item->data;
+		if (strstr(line->line, pattern)) {
+			sym->source = item;
+			break;
+		}
+		item = g_list_next(item);
+	}
+}
+
+void show_lines(GList *item_queue, int item_queue_count)
+{
+	int i;
+	struct source_line *line;
+
+	for (i = 0; i < item_queue_count; i++) {
+		line = item_queue->data;
+		printf("%8li\t%s\n", line->count, line->line);
+		item_queue = g_list_next(item_queue);
+	}
+}
+
+#define TRACE_COUNT     3
+
+static void show_details(struct sym_entry *sym)
+{
+	struct source_line *line;
+	GList *item;
+	int displayed = 0;
+	GList *item_queue = NULL;
+	int item_queue_count = 0;
+
+	if (!sym->source)
+		lookup_sym_in_vmlinux(sym);
+	if (!sym->source)
+		return;
+
+	printf("Showing details for %s\n", sym->sym);
+
+	item = sym->source;
+	while (item) {
+		line = item->data;
+		if (displayed && strstr(line->line, ">:"))
+			break;
+
+		if (!item_queue_count)
+			item_queue = item;
+		item_queue_count ++;
+
+		if (line->count >= count_filter) {
+			show_lines(item_queue, item_queue_count);
+			item_queue_count = 0;
+			item_queue = NULL;
+		} else if (item_queue_count > TRACE_COUNT) {
+			item_queue = g_list_next(item_queue);
+			item_queue_count --;
+		}
+
+		line->count = 0;
+		displayed++;
+		if (displayed > 300)
+			break;
+		item = g_list_next(item);
+	}
+}
+
+/*
+ * Binary search in the histogram table and record the hit:
+ */
+static void record_ip(uint64_t ip, int counter)
+{
+	int left_idx, middle_idx, right_idx, idx;
+	unsigned long left, middle, right;
+
+	record_precise_ip(ip);
+
+	left_idx = 0;
+	right_idx = sym_table_count-1;
+	assert(ip <= max_ip && ip >= min_ip);
+
+	while (left_idx + 1 < right_idx) {
+		middle_idx = (left_idx + right_idx) / 2;
+
+		left   = sym_table[  left_idx].addr;
+		middle = sym_table[middle_idx].addr;
+		right  = sym_table[ right_idx].addr;
+
+		if (!(left <= middle && middle <= right)) {
+			printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
+			printf("%d %d %d\n", left_idx, middle_idx, right_idx);
+		}
+		assert(left <= middle && middle <= right);
+		if (!(left <= ip && ip <= right)) {
+			printf(" left: %016lx\n", left);
+			printf("   ip: %016lx\n", ip);
+			printf("right: %016lx\n", right);
+		}
+		assert(left <= ip && ip <= right);
+		/*
+		 * [ left .... target .... middle .... right ]
+		 *   => right := middle
+		 */
+		if (ip < middle) {
+			right_idx = middle_idx;
+			continue;
+		}
+		/*
+		 * [ left .... middle ... target ... right ]
+		 *   => left := middle
+		 */
+		left_idx = middle_idx;
+	}
+
+	idx = left_idx;
+
+	if (!sym_table[idx].skip)
+		sym_table[idx].count[counter]++;
+	else events--;
+}
+
+static void process_event(uint64_t ip, int counter)
+{
+	events++;
+
+	if (ip < min_ip || ip > max_ip) {
+		userspace_events++;
+		return;
+	}
+
+	record_ip(ip, counter);
+}
+
+int main(int argc, char *argv[])
+{
+	struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS];
+	struct perf_counter_hw_event hw_event;
+	int fd[MAX_NR_CPUS][MAX_COUNTERS];
+	int i, counter, group_fd;
+	unsigned int cpu;
+	uint64_t ip;
+	ssize_t res;
+	int ret;
+
+	process_options(argc, argv);
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (tid != -1 || profile_cpu != -1)
+		nr_cpus = 1;
+
+	assert(nr_cpus <= MAX_NR_CPUS);
+
+	for (i = 0; i < nr_cpus; i++) {
+		group_fd = -1;
+		for (counter = 0; counter < nr_counters; counter++) {
+
+			cpu	= profile_cpu;
+			if (tid == -1 && profile_cpu == -1)
+				cpu = i;
+
+			memset(&hw_event, 0, sizeof(hw_event));
+			hw_event.type		= event_id[counter];
+			hw_event.raw		= event_raw[counter];
+			hw_event.irq_period	= event_count[counter];
+			hw_event.record_type	= PERF_RECORD_IRQ;
+			hw_event.nmi		= nmi;
+
+			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
+			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+			if (fd[i][counter] < 0) {
+				printf("kerneltop error: syscall returned with %d (%s)\n",
+					fd[i][counter], strerror(-fd[i][counter]));
+				if (fd[i][counter] == -1)
+					printf("Are you root?\n");
+				exit(-1);
+			}
+			assert(fd[i][counter] >= 0);
+
+			/*
+			 * First counter acts as the group leader:
+			 */
+			if (group && group_fd == -1)
+				group_fd = fd[i][counter];
+
+			event_array[i][counter].fd = fd[i][counter];
+			event_array[i][counter].events = POLLIN;
+		}
+	}
+
+	parse_symbols();
+	if (vmlinux && sym_filter_entry)
+		parse_vmlinux(vmlinux);
+
+	printf("KernelTop refresh period: %d seconds\n", delay_secs);
+	last_refresh = time(NULL);
+
+	while (1) {
+		int hits = events;
+
+		for (i = 0; i < nr_cpus; i++) {
+			for (counter = 0; counter < nr_counters; counter++) {
+				res = read(fd[i][counter], (char *) &ip, sizeof(ip));
+				if (res > 0) {
+					assert(res == sizeof(ip));
+
+					process_event(ip, counter);
+				}
+			}
+		}
+
+		if (time(NULL) >= last_refresh + delay_secs) {
+			print_sym_table();
+			events = userspace_events = 0;
+		}
+
+		if (hits == events)
+			ret = poll(event_array[0], nr_cpus, 1000);
+		hits = events;
+	}
+
+	return 0;
+}
diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c
new file mode 100644
index 0000000..9a5808f
--- /dev/null
+++ b/Documentation/perf_counter/perfstat.c
@@ -0,0 +1,521 @@
+/*
+ * perfstat:  /usr/bin/time -alike performance counter statistics utility
+ *
+ *        It summarizes the counter events of all tasks (and child tasks),
+ *        covering all CPUs that the command (or workload) executes on.
+ *        It only counts the per-task events of the workload started,
+ *        independent of how many other tasks run on those CPUs.
+ *
+ * Build with:       cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c
+ *
+ * Sample output:
+ *
+
+   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
+
+   Performance counter stats for 'ls':
+
+           163516953 instructions
+                2295 cache-misses
+             2855182 branch-misses
+
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Released under the GPLv2 (not later).
+ *
+ * Percpu counter support by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
+ * Symbolic event options by: Wu Fengguang <fengguang.wu@intel.com>
+ */
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <getopt.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+
+#include <linux/unistd.h>
+
+#ifdef __x86_64__
+# define __NR_perf_counter_open	295
+#endif
+
+#ifdef __i386__
+# define __NR_perf_counter_open 333
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#endif
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+typedef unsigned int		__u32;
+typedef unsigned long long	__u64;
+typedef long long		__s64;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_CPU_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+	PERF_COUNT_BUS_CYCLES		=  6,
+
+	PERF_HW_EVENTS_MAX		=  7,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	PERF_COUNT_CPU_MIGRATIONS	= -5,
+
+	PERF_SW_EVENTS_MIN		= -6,
+};
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
+};
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	__s64			type;
+
+	__u64			irq_period;
+	__u64			record_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				nmi	       :  1, /* NMI sampling          */
+				raw	       :  1, /* raw event type        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+
+				__reserved_1   : 54;
+
+	__u32			extra_config_len;
+	__u32			__reserved_4;
+
+	__u64			__reserved_2;
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd,
+	unsigned long			flags)
+{
+	int ret;
+
+	ret = syscall(
+		__NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
+#if defined(__x86_64__) || defined(__i386__)
+	if (ret < 0 && ret > -4096) {
+		errno = -ret;
+		ret = -1;
+	}
+#endif
+	return ret;
+}
+
+
+static char *hw_event_names [] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names [] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+};
+
+struct event_symbol {
+	int event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols [] = {
+	{PERF_COUNT_CPU_CYCLES,			"cpu-cycles",		},
+	{PERF_COUNT_CPU_CYCLES,			"cycles",		},
+	{PERF_COUNT_INSTRUCTIONS,		"instructions",		},
+	{PERF_COUNT_CACHE_REFERENCES,		"cache-references",	},
+	{PERF_COUNT_CACHE_MISSES,		"cache-misses",		},
+	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branch-instructions",	},
+	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branches",		},
+	{PERF_COUNT_BRANCH_MISSES,		"branch-misses",	},
+	{PERF_COUNT_BUS_CYCLES,			"bus-cycles",		},
+	{PERF_COUNT_CPU_CLOCK,			"cpu-ticks",		},
+	{PERF_COUNT_CPU_CLOCK,			"ticks",		},
+	{PERF_COUNT_TASK_CLOCK,			"task-ticks",		},
+	{PERF_COUNT_PAGE_FAULTS,		"page-faults",		},
+	{PERF_COUNT_PAGE_FAULTS,		"faults",		},
+	{PERF_COUNT_CONTEXT_SWITCHES,		"context-switches",	},
+	{PERF_COUNT_CONTEXT_SWITCHES,		"cs",			},
+	{PERF_COUNT_CPU_MIGRATIONS,		"cpu-migrations",	},
+	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
+};
+
+#define MAX_COUNTERS					64
+#define MAX_NR_CPUS					256
+
+static int			nr_counters		= 0;
+static int			nr_cpus			= 0;
+
+static int			event_id[MAX_COUNTERS]	=
+					 { -2, -5, -4, -3, 0, 1, 2, 3};
+
+static int			event_raw[MAX_COUNTERS];
+
+static int			system_wide		= 0;
+
+static void display_help(void)
+{
+	unsigned int i;
+	int e;
+
+	printf(
+	"Usage: perfstat [<events...>] <cmd...>\n\n"
+	"PerfStat Options (up to %d event types can be specified):\n\n",
+		 MAX_COUNTERS);
+	printf(
+	" -e EVENT     --event=EVENT        #  symbolic-name        abbreviations");
+
+	for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
+		if (e != event_symbols[i].event) {
+			e = event_symbols[i].event;
+			printf(
+	"\n                                  %2d: %-20s", e, event_symbols[i].symbol);
+		} else
+			printf(" %s", event_symbols[i].symbol);
+	}
+
+	printf("\n"
+	"                                rNNN: raw event type\n\n"
+	" -s                                # system-wide collection\n\n"
+	" -c <cmd..>   --command=<cmd..>    # command+arguments to be timed.\n"
+	"\n");
+	exit(0);
+}
+
+static int type_valid(int type)
+{
+	if (type >= PERF_HW_EVENTS_MAX)
+		return 0;
+	if (type <= PERF_SW_EVENTS_MIN)
+		return 0;
+
+	return 1;
+}
+
+static char *event_name(int ctr)
+{
+	int type = event_id[ctr];
+	static char buf[32];
+
+	if (event_raw[ctr]) {
+		sprintf(buf, "raw 0x%x", type);
+		return buf;
+	}
+	if (!type_valid(type))
+		return "unknown";
+
+	if (type >= 0)
+		return hw_event_names[type];
+
+	return sw_event_names[-type-1];
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static int match_event_symbols(char *str)
+{
+	unsigned int i;
+
+	if (isdigit(str[0]) || str[0] == '-')
+		return atoi(str);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return PERF_HW_EVENTS_MAX;
+}
+
+static void parse_events(char *str)
+{
+	int type, raw;
+
+again:
+	nr_counters++;
+	if (nr_counters == MAX_COUNTERS)
+		display_help();
+
+	raw = 0;
+	if (*str == 'r') {
+		raw = 1;
+		++str;
+		type = strtol(str, NULL, 16);
+	} else {
+		type = match_event_symbols(str);
+		if (!type_valid(type))
+			display_help();
+	}
+
+	event_id[nr_counters] = type;
+	event_raw[nr_counters] = raw;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+}
+
+static void process_options(int argc, char *argv[])
+{
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"event",	required_argument,	NULL, 'e'},
+			{"help",	no_argument,		NULL, 'h'},
+			{"command",	no_argument,		NULL, 'c'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:e:c:s",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'c':
+			break;
+		case 's':
+			system_wide = 1;
+			break;
+		case 'e':
+			parse_events(optarg);
+			break;
+		default:
+			break;
+		}
+	}
+	if (optind == argc)
+		goto err;
+
+	if (!nr_counters)
+		nr_counters = 8;
+	else
+		nr_counters++;
+	return;
+
+err:
+	display_help();
+}
+
+char fault_here[1000000];
+
+#define PR_TASK_PERF_COUNTERS_DISABLE           31
+#define PR_TASK_PERF_COUNTERS_ENABLE            32
+
+static int fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static void create_counter(int counter)
+{
+	struct perf_counter_hw_event hw_event;
+
+	memset(&hw_event, 0, sizeof(hw_event));
+	hw_event.type		= event_id[counter];
+	hw_event.raw		= event_raw[counter];
+	hw_event.record_type	= PERF_RECORD_SIMPLE;
+	hw_event.nmi		= 0;
+
+	if (system_wide) {
+		int cpu;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
+			if (fd[cpu][counter] < 0) {
+				printf("perfstat error: syscall returned with %d (%s)\n",
+						fd[cpu][counter], strerror(errno));
+				exit(-1);
+			}
+			
+		}
+	} else {
+		hw_event.inherit	= 1;
+		hw_event.disabled	= 1;
+
+		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
+		if (fd[0][counter] < 0) {
+			printf("perfstat error: syscall returned with %d (%s)\n",
+					fd[0][counter], strerror(errno));
+			exit(-1);
+		}
+	}
+}
+
+
+#define rdclock()					\
+({							\
+	struct timespec ts;				\
+							\
+	clock_gettime(CLOCK_MONOTONIC, &ts);		\
+	ts.tv_sec * 1000000000ULL + ts.tv_nsec;		\
+})
+
+int main(int argc, char *argv[])
+{
+	unsigned long long t0, t1;
+	int counter;
+	ssize_t res;
+	int status;
+	int pid;
+
+	process_options(argc, argv);
+
+	if (system_wide) {
+		nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+		assert(nr_cpus <= MAX_NR_CPUS);
+		assert(nr_cpus >= 0);
+	} else
+		nr_cpus = 1;
+
+	for (counter = 0; counter < nr_counters; counter++)
+		create_counter(counter);
+
+	argc -= optind;
+	argv += optind;
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+	if (!pid) {
+		if (execvp(argv[0], argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+	while (wait(&status) >= 0)
+		;
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	t1 = rdclock();
+
+	fflush(stdout);
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Performance counter stats for \'%s\':\n",
+		argv[0]);
+	fprintf(stderr, "\n");
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		int cpu;
+		__u64 count, single_count;
+
+		count = 0;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			res = read(fd[cpu][counter],
+					(char *) &single_count, sizeof(single_count));
+			assert(res == sizeof(single_count));
+			count += single_count;
+		}
+
+		if (!event_raw[counter] &&
+		    (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
+		     event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
+
+			double msecs = (double)count / 1000000;
+
+			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
+				msecs, event_name(counter));
+		} else {
+			fprintf(stderr, " %14Ld  %-20s (events)\n",
+				count, event_name(counter));
+		}
+		if (!counter)
+			fprintf(stderr, "\n");
+	}
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
+			(double)(t1-t0)/1e6);
+	fprintf(stderr, "\n");
+
+	return 0;
+}
-- 
cgit v0.10.2


From cea92ce5b07078cd62c4712d51390b09a43dba2e Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:02 +0800
Subject: perf_counter tools: Merge common code into perfcounters.h

kerneltop's MAX_COUNTERS is increased from 8 to 64(the value used by perfstat).

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index cf0e30b..fe70a2c 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -65,126 +65,7 @@
 
 #include <linux/unistd.h>
 
-#ifdef __x86_64__
-# define __NR_perf_counter_open	295
-#endif
-
-#ifdef __i386__
-# define __NR_perf_counter_open 333
-#endif
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-typedef unsigned int		__u32;
-typedef unsigned long long	__u64;
-typedef long long		__s64;
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * Generalized performance counter event types, used by the hw_event.type
- * parameter of the sys_perf_counter_open() syscall:
- */
-enum hw_event_types {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_CPU_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-	PERF_COUNT_BUS_CYCLES		=  6,
-
-	PERF_HW_EVENTS_MAX		=  7,
-
-	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
-	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
-	PERF_COUNT_CPU_MIGRATIONS	= -5,
-
-	PERF_SW_EVENTS_MIN		= -6,
-};
-
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
-};
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_hw_event {
-	__s64			type;
-
-	__u64			irq_period;
-	__u64			record_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				nmi	       :  1, /* NMI sampling          */
-				raw	       :  1, /* raw event type        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-
-				__reserved_1   : 54;
-
-	__u32			extra_config_len;
-	__u32			__reserved_4;
-
-	__u64			__reserved_2;
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
-
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd,
-	unsigned long			flags)
-{
-	int ret;
-
-	ret = syscall(
-		__NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-#if defined(__x86_64__) || defined(__i386__)
-	if (ret < 0 && ret > -4096) {
-		errno = -ret;
-		ret = -1;
-	}
-#endif
-	return ret;
-}
+#include "perfcounters.h"
 
 const char *event_types [] = {
 	"CPU cycles",
@@ -205,21 +86,10 @@ const unsigned int default_count[] = {
 	  10000,
 };
 
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE           31
-#define PR_TASK_PERF_COUNTERS_ENABLE            32
-
-#define MAX_COUNTERS		8
-
 static int			nr_counters			= -1;
 
 static __u64			count_filter		       = 100;
 
-#define MAX_NR_CPUS		256
-
 static int			event_count[MAX_COUNTERS];
 static unsigned long		event_id[MAX_COUNTERS];
 static int			event_raw[MAX_COUNTERS];
diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h
new file mode 100644
index 0000000..8c1559b
--- /dev/null
+++ b/Documentation/perf_counter/perfcounters.h
@@ -0,0 +1,137 @@
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE	31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+typedef unsigned int		__u32;
+typedef unsigned long long	__u64;
+typedef long long		__s64;
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_CPU_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+	PERF_COUNT_BUS_CYCLES		=  6,
+
+	PERF_HW_EVENTS_MAX		=  7,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	PERF_COUNT_CPU_MIGRATIONS	= -5,
+
+	PERF_SW_EVENTS_MIN		= -6,
+};
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
+};
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	__s64			type;
+
+	__u64			irq_period;
+	__u64			record_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				nmi	       :  1, /* NMI sampling          */
+				raw	       :  1, /* raw event type        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+
+				__reserved_1   : 54;
+
+	__u32			extra_config_len;
+	__u32			__reserved_4;
+
+	__u64			__reserved_2;
+	__u64			__reserved_3;
+};
+
+#ifdef __x86_64__
+# define __NR_perf_counter_open	295
+#endif
+
+#ifdef __i386__
+# define __NR_perf_counter_open 333
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#endif
+
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd,
+	unsigned long			flags)
+{
+	int ret;
+
+	ret = syscall(
+		__NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
+#if defined(__x86_64__) || defined(__i386__)
+	if (ret < 0 && ret > -4096) {
+		errno = -ret;
+		ret = -1;
+	}
+#endif
+	return ret;
+}
+
diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c
index 9a5808f..a3d4a7a 100644
--- a/Documentation/perf_counter/perfstat.c
+++ b/Documentation/perf_counter/perfstat.c
@@ -52,133 +52,7 @@
 
 #include <linux/unistd.h>
 
-#ifdef __x86_64__
-# define __NR_perf_counter_open	295
-#endif
-
-#ifdef __i386__
-# define __NR_perf_counter_open 333
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#endif
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-typedef unsigned int		__u32;
-typedef unsigned long long	__u64;
-typedef long long		__s64;
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * Generalized performance counter event types, used by the hw_event.type
- * parameter of the sys_perf_counter_open() syscall:
- */
-enum hw_event_types {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_CPU_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-	PERF_COUNT_BUS_CYCLES		=  6,
-
-	PERF_HW_EVENTS_MAX		=  7,
-
-	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
-	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
-	PERF_COUNT_CPU_MIGRATIONS	= -5,
-
-	PERF_SW_EVENTS_MIN		= -6,
-};
-
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
-};
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_hw_event {
-	__s64			type;
-
-	__u64			irq_period;
-	__u64			record_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				nmi	       :  1, /* NMI sampling          */
-				raw	       :  1, /* raw event type        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-
-				__reserved_1   : 54;
-
-	__u32			extra_config_len;
-	__u32			__reserved_4;
-
-	__u64			__reserved_2;
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
-
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd,
-	unsigned long			flags)
-{
-	int ret;
-
-	ret = syscall(
-		__NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-#if defined(__x86_64__) || defined(__i386__)
-	if (ret < 0 && ret > -4096) {
-		errno = -ret;
-		ret = -1;
-	}
-#endif
-	return ret;
-}
-
+#include "perfcounters.h"
 
 static char *hw_event_names [] = {
 	"CPU cycles",
@@ -224,9 +98,6 @@ static struct event_symbol event_symbols [] = {
 	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
 };
 
-#define MAX_COUNTERS					64
-#define MAX_NR_CPUS					256
-
 static int			nr_counters		= 0;
 static int			nr_cpus			= 0;
 
@@ -388,9 +259,6 @@ err:
 
 char fault_here[1000000];
 
-#define PR_TASK_PERF_COUNTERS_DISABLE           31
-#define PR_TASK_PERF_COUNTERS_ENABLE            32
-
 static int fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static void create_counter(int counter)
-- 
cgit v0.10.2


From f49012fad4ed2231c7380c0b1901122242b3eab0 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:03 +0800
Subject: perf_counter tools: Move perfstat supporting code into perfcounters.h

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h
index 8c1559b..0f3764a 100644
--- a/Documentation/perf_counter/perfcounters.h
+++ b/Documentation/perf_counter/perfcounters.h
@@ -16,6 +16,14 @@
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
+#define rdclock()					\
+({							\
+	struct timespec ts;				\
+							\
+	clock_gettime(CLOCK_MONOTONIC, &ts);		\
+	ts.tv_sec * 1000000000ULL + ts.tv_nsec;		\
+})
+
 /*
  * Pick up some kernel type conventions:
  */
@@ -135,3 +143,125 @@ asmlinkage int sys_perf_counter_open(
 	return ret;
 }
 
+static char *hw_event_names [] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names [] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+};
+
+struct event_symbol {
+	int event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols [] = {
+	{PERF_COUNT_CPU_CYCLES,			"cpu-cycles",		},
+	{PERF_COUNT_CPU_CYCLES,			"cycles",		},
+	{PERF_COUNT_INSTRUCTIONS,		"instructions",		},
+	{PERF_COUNT_CACHE_REFERENCES,		"cache-references",	},
+	{PERF_COUNT_CACHE_MISSES,		"cache-misses",		},
+	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branch-instructions",	},
+	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branches",		},
+	{PERF_COUNT_BRANCH_MISSES,		"branch-misses",	},
+	{PERF_COUNT_BUS_CYCLES,			"bus-cycles",		},
+	{PERF_COUNT_CPU_CLOCK,			"cpu-ticks",		},
+	{PERF_COUNT_CPU_CLOCK,			"ticks",		},
+	{PERF_COUNT_TASK_CLOCK,			"task-ticks",		},
+	{PERF_COUNT_PAGE_FAULTS,		"page-faults",		},
+	{PERF_COUNT_PAGE_FAULTS,		"faults",		},
+	{PERF_COUNT_CONTEXT_SWITCHES,		"context-switches",	},
+	{PERF_COUNT_CONTEXT_SWITCHES,		"cs",			},
+	{PERF_COUNT_CPU_MIGRATIONS,		"cpu-migrations",	},
+	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
+};
+
+static int type_valid(int type)
+{
+	if (type >= PERF_HW_EVENTS_MAX)
+		return 0;
+	if (type <= PERF_SW_EVENTS_MIN)
+		return 0;
+
+	return 1;
+}
+
+static char *event_name(int ctr)
+{
+	int type = event_id[ctr];
+	static char buf[32];
+
+	if (event_raw[ctr]) {
+		sprintf(buf, "raw 0x%x", type);
+		return buf;
+	}
+	if (!type_valid(type))
+		return "unknown";
+
+	if (type >= 0)
+		return hw_event_names[type];
+
+	return sw_event_names[-type-1];
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static int match_event_symbols(char *str)
+{
+	unsigned int i;
+
+	if (isdigit(str[0]) || str[0] == '-')
+		return atoi(str);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return PERF_HW_EVENTS_MAX;
+}
+
+static void parse_events(char *str)
+{
+	int type, raw;
+
+again:
+	nr_counters++;
+	if (nr_counters == MAX_COUNTERS)
+		display_help();
+
+	raw = 0;
+	if (*str == 'r') {
+		raw = 1;
+		++str;
+		type = strtol(str, NULL, 16);
+	} else {
+		type = match_event_symbols(str);
+		if (!type_valid(type))
+			display_help();
+	}
+
+	event_id[nr_counters] = type;
+	event_raw[nr_counters] = raw;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+}
+
diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c
index a3d4a7a..3364dcb 100644
--- a/Documentation/perf_counter/perfstat.c
+++ b/Documentation/perf_counter/perfstat.c
@@ -54,50 +54,6 @@
 
 #include "perfcounters.h"
 
-static char *hw_event_names [] = {
-	"CPU cycles",
-	"instructions",
-	"cache references",
-	"cache misses",
-	"branches",
-	"branch misses",
-	"bus cycles",
-};
-
-static char *sw_event_names [] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-};
-
-struct event_symbol {
-	int event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols [] = {
-	{PERF_COUNT_CPU_CYCLES,			"cpu-cycles",		},
-	{PERF_COUNT_CPU_CYCLES,			"cycles",		},
-	{PERF_COUNT_INSTRUCTIONS,		"instructions",		},
-	{PERF_COUNT_CACHE_REFERENCES,		"cache-references",	},
-	{PERF_COUNT_CACHE_MISSES,		"cache-misses",		},
-	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branch-instructions",	},
-	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branches",		},
-	{PERF_COUNT_BRANCH_MISSES,		"branch-misses",	},
-	{PERF_COUNT_BUS_CYCLES,			"bus-cycles",		},
-	{PERF_COUNT_CPU_CLOCK,			"cpu-ticks",		},
-	{PERF_COUNT_CPU_CLOCK,			"ticks",		},
-	{PERF_COUNT_TASK_CLOCK,			"task-ticks",		},
-	{PERF_COUNT_PAGE_FAULTS,		"page-faults",		},
-	{PERF_COUNT_PAGE_FAULTS,		"faults",		},
-	{PERF_COUNT_CONTEXT_SWITCHES,		"context-switches",	},
-	{PERF_COUNT_CONTEXT_SWITCHES,		"cs",			},
-	{PERF_COUNT_CPU_MIGRATIONS,		"cpu-migrations",	},
-	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
-};
-
 static int			nr_counters		= 0;
 static int			nr_cpus			= 0;
 
@@ -137,84 +93,6 @@ static void display_help(void)
 	exit(0);
 }
 
-static int type_valid(int type)
-{
-	if (type >= PERF_HW_EVENTS_MAX)
-		return 0;
-	if (type <= PERF_SW_EVENTS_MIN)
-		return 0;
-
-	return 1;
-}
-
-static char *event_name(int ctr)
-{
-	int type = event_id[ctr];
-	static char buf[32];
-
-	if (event_raw[ctr]) {
-		sprintf(buf, "raw 0x%x", type);
-		return buf;
-	}
-	if (!type_valid(type))
-		return "unknown";
-
-	if (type >= 0)
-		return hw_event_names[type];
-
-	return sw_event_names[-type-1];
-}
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static int match_event_symbols(char *str)
-{
-	unsigned int i;
-
-	if (isdigit(str[0]) || str[0] == '-')
-		return atoi(str);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return PERF_HW_EVENTS_MAX;
-}
-
-static void parse_events(char *str)
-{
-	int type, raw;
-
-again:
-	nr_counters++;
-	if (nr_counters == MAX_COUNTERS)
-		display_help();
-
-	raw = 0;
-	if (*str == 'r') {
-		raw = 1;
-		++str;
-		type = strtol(str, NULL, 16);
-	} else {
-		type = match_event_symbols(str);
-		if (!type_valid(type))
-			display_help();
-	}
-
-	event_id[nr_counters] = type;
-	event_raw[nr_counters] = raw;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-}
-
 static void process_options(int argc, char *argv[])
 {
 	for (;;) {
@@ -296,14 +174,6 @@ static void create_counter(int counter)
 }
 
 
-#define rdclock()					\
-({							\
-	struct timespec ts;				\
-							\
-	clock_gettime(CLOCK_MONOTONIC, &ts);		\
-	ts.tv_sec * 1000000000ULL + ts.tv_nsec;		\
-})
-
 int main(int argc, char *argv[])
 {
 	unsigned long long t0, t1;
-- 
cgit v0.10.2


From 95bb3be1b3ca4a71cc168787b675d5b7852fc6be Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:04 +0800
Subject: perf_counter tools: support symbolic event names in kerneltop

- kerneltop: --event_id => --event
- kerneltop: can accept SW event types now
- perfstat: it used to implicitly add event -2(task-clock),
	    the new code no longer does this. Shall we?

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index fe70a2c..edc5b09 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -86,13 +86,9 @@ const unsigned int default_count[] = {
 	  10000,
 };
 
-static int			nr_counters			= -1;
-
 static __u64			count_filter		       = 100;
 
 static int			event_count[MAX_COUNTERS];
-static unsigned long		event_id[MAX_COUNTERS];
-static int			event_raw[MAX_COUNTERS];
 
 static int			tid				= -1;
 static int			profile_cpu			= -1;
@@ -125,7 +121,7 @@ static void display_help(void)
 	"KernelTop Options (up to %d event types can be specified at once):\n\n",
 		 MAX_COUNTERS);
 	printf(
-	" -e EID    --event_id=EID     # event type ID                    [default:  0]\n"
+	" -e EID    --event=EID        # event type ID                    [default:  0]\n"
 	"                                   0: CPU cycles\n"
 	"                                   1: instructions\n"
 	"                                   2: cache accesses\n"
@@ -160,7 +156,7 @@ static void process_options(int argc, char *argv[])
 			{"cpu",		required_argument,	NULL, 'C'},
 			{"delay",	required_argument,	NULL, 'd'},
 			{"dump_symtab",	no_argument,		NULL, 'D'},
-			{"event_id",	required_argument,	NULL, 'e'},
+			{"event",	required_argument,	NULL, 'e'},
 			{"filter",	required_argument,	NULL, 'f'},
 			{"group",	required_argument,	NULL, 'g'},
 			{"help",	no_argument,		NULL, 'h'},
@@ -178,8 +174,6 @@ static void process_options(int argc, char *argv[])
 
 		switch (c) {
 		case 'c':
-			if (nr_counters == -1)
-				nr_counters = 0;
 			event_count[nr_counters]	=   atoi(optarg); break;
 		case 'C':
 			/* CPU and PID are mutually exclusive */
@@ -192,18 +186,7 @@ static void process_options(int argc, char *argv[])
 		case 'd': delay_secs			=   atoi(optarg); break;
 		case 'D': dump_symtab			=              1; break;
 
-		case 'e':
-			nr_counters++;
-			if (nr_counters == MAX_COUNTERS) {
-				error = 1;
-				break;
-			}
-			if (*optarg == 'r') {
-				event_raw[nr_counters] = 1;
-				++optarg;
-			}
-			event_id[nr_counters] = strtol(optarg, NULL, 16);
-			break;
+		case 'e': error				= parse_events(optarg); break;
 
 		case 'f': count_filter			=   atoi(optarg); break;
 		case 'g': group				=   atoi(optarg); break;
@@ -226,9 +209,10 @@ static void process_options(int argc, char *argv[])
 	if (error)
 		display_help();
 
-	nr_counters++;
-	if (nr_counters < 1)
+	if (!nr_counters) {
 		nr_counters = 1;
+		event_id[0] = 0;
+	}
 
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (event_count[counter])
diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h
index 0f3764a..99a90d8 100644
--- a/Documentation/perf_counter/perfcounters.h
+++ b/Documentation/perf_counter/perfcounters.h
@@ -143,6 +143,10 @@ asmlinkage int sys_perf_counter_open(
 	return ret;
 }
 
+static int			nr_counters			= 0;
+static long			event_id[MAX_COUNTERS]		= { -2, -5, -4, -3, 0, 1, 2, 3};
+static int			event_raw[MAX_COUNTERS];
+
 static char *hw_event_names [] = {
 	"CPU cycles",
 	"instructions",
@@ -235,14 +239,13 @@ static int match_event_symbols(char *str)
 	return PERF_HW_EVENTS_MAX;
 }
 
-static void parse_events(char *str)
+static int parse_events(char *str)
 {
 	int type, raw;
 
 again:
-	nr_counters++;
 	if (nr_counters == MAX_COUNTERS)
-		display_help();
+		return -1;
 
 	raw = 0;
 	if (*str == 'r') {
@@ -252,16 +255,18 @@ again:
 	} else {
 		type = match_event_symbols(str);
 		if (!type_valid(type))
-			display_help();
+			return -1;
 	}
 
 	event_id[nr_counters] = type;
 	event_raw[nr_counters] = raw;
+	nr_counters++;
 
 	str = strstr(str, ",");
 	if (str) {
 		str++;
 		goto again;
 	}
-}
 
+	return 0;
+}
diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c
index 3364dcb..fd59446 100644
--- a/Documentation/perf_counter/perfstat.c
+++ b/Documentation/perf_counter/perfstat.c
@@ -54,14 +54,8 @@
 
 #include "perfcounters.h"
 
-static int			nr_counters		= 0;
 static int			nr_cpus			= 0;
 
-static int			event_id[MAX_COUNTERS]	=
-					 { -2, -5, -4, -3, 0, 1, 2, 3};
-
-static int			event_raw[MAX_COUNTERS];
-
 static int			system_wide		= 0;
 
 static void display_help(void)
@@ -127,8 +121,6 @@ static void process_options(int argc, char *argv[])
 
 	if (!nr_counters)
 		nr_counters = 8;
-	else
-		nr_counters++;
 	return;
 
 err:
-- 
cgit v0.10.2


From e3908612d93dff9d7746d85d37c42593254bf282 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:05 +0800
Subject: perf_counter tools: Reuse event_name() in kerneltop

- can handle sw counters now
- the outputs will look slightly different

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index edc5b09..cba5cb0 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -67,16 +67,6 @@
 
 #include "perfcounters.h"
 
-const char *event_types [] = {
-	"CPU cycles",
-	"instructions",
-	"cache-refs",
-	"cache-misses",
-	"branches",
-	"branch-misses",
-	"bus cycles"
-};
-
 const unsigned int default_count[] = {
 	1000000,
 	1000000,
@@ -304,10 +294,7 @@ static void print_sym_table(void)
 		if (counter)
 			printf("/");
 
-		if (event_id[counter] < PERF_HW_EVENTS_MAX)
-			printf( "%s", event_types[event_id[counter]]);
-		else
-			printf( "raw:%04lx", event_id[counter]);
+		printf("%s", event_name(counter));
 	}
 
 	printf( "], ");
-- 
cgit v0.10.2


From f7524bda8be8be98db356d6a83ac1da451ecdb2e Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:06 +0800
Subject: perf_counter tools: move remaining code into kerneltop.c

- perfstat.c can be safely removed now
- perfstat: -s => -a for system wide accounting
- kerneltop: add -S/--stat for perfstat mode
- minor adjustments to kerneltop --help, perfstat --help

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index cba5cb0..9db65a4 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -3,7 +3,7 @@
 
    Build with:
 
-     cc -O6 -Wall `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c
+     cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c
 
    Sample output:
 
@@ -26,18 +26,40 @@
               12.00 - ffffffff804ffb7f : __ip_local_out
               11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
                8.54 - ffffffff805001a3 : ip_queue_xmit
+ */
 
-  Started by Ingo Molnar <mingo@redhat.com>
+/*
+ * perfstat:  /usr/bin/time -alike performance counter statistics utility
 
-  Improvements and fixes by:
+          It summarizes the counter events of all tasks (and child tasks),
+          covering all CPUs that the command (or workload) executes on.
+          It only counts the per-task events of the workload started,
+          independent of how many other tasks run on those CPUs.
 
-    Arjan van de Ven <arjan@linux.intel.com>
-    Yanmin Zhang <yanmin.zhang@intel.com>
-    Mike Galbraith <efault@gmx.de>
+   Sample output:
 
-  Released under the GPL v2. (and only v2, not any later version)
+   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
 
+   Performance counter stats for 'ls':
+
+           163516953 instructions
+                2295 cache-misses
+             2855182 branch-misses
  */
+
+ /*
+  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+  *
+  * Improvements and fixes by:
+  *
+  *   Arjan van de Ven <arjan@linux.intel.com>
+  *   Yanmin Zhang <yanmin.zhang@intel.com>
+  *   Wu Fengguang <fengguang.wu@intel.com>
+  *   Mike Galbraith <efault@gmx.de>
+  *
+  * Released under the GPL v2. (and only v2, not any later version)
+  */
+
 #define _GNU_SOURCE
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -67,18 +89,22 @@
 
 #include "perfcounters.h"
 
-const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
 
-static __u64			count_filter		       = 100;
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define DEF_PERFSTAT_EVENTS		{ -2, -5, -4, -3, 0, 1, 2, 3}
+
+static int			run_perfstat			=  0;
+static int			system_wide			=  0;
 
+static int			nr_counters			=  0;
+static long			event_id[MAX_COUNTERS]		= DEF_PERFSTAT_EVENTS;
+static int			event_raw[MAX_COUNTERS];
 static int			event_count[MAX_COUNTERS];
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static __u64			count_filter		       = 100;
 
 static int			tid				= -1;
 static int			profile_cpu			= -1;
@@ -96,125 +122,335 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
+static GList			*lines;
+
 struct source_line {
 	uint64_t		EIP;
 	unsigned long		count;
 	char			*line;
 };
 
-static GList			*lines;
+
+const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+static char *hw_event_names[] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+};
+
+struct event_symbol {
+	int event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{PERF_COUNT_CPU_CYCLES,			"cpu-cycles",		},
+	{PERF_COUNT_CPU_CYCLES,			"cycles",		},
+	{PERF_COUNT_INSTRUCTIONS,		"instructions",		},
+	{PERF_COUNT_CACHE_REFERENCES,		"cache-references",	},
+	{PERF_COUNT_CACHE_MISSES,		"cache-misses",		},
+	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branch-instructions",	},
+	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branches",		},
+	{PERF_COUNT_BRANCH_MISSES,		"branch-misses",	},
+	{PERF_COUNT_BUS_CYCLES,			"bus-cycles",		},
+	{PERF_COUNT_CPU_CLOCK,			"cpu-ticks",		},
+	{PERF_COUNT_CPU_CLOCK,			"ticks",		},
+	{PERF_COUNT_TASK_CLOCK,			"task-ticks",		},
+	{PERF_COUNT_PAGE_FAULTS,		"page-faults",		},
+	{PERF_COUNT_PAGE_FAULTS,		"faults",		},
+	{PERF_COUNT_CONTEXT_SWITCHES,		"context-switches",	},
+	{PERF_COUNT_CONTEXT_SWITCHES,		"cs",			},
+	{PERF_COUNT_CPU_MIGRATIONS,		"cpu-migrations",	},
+	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
+};
+
+static void display_events_help(void)
+{
+	unsigned int i;
+	int e;
+
+	printf(
+	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
+
+	for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
+		if (e != event_symbols[i].event) {
+			e = event_symbols[i].event;
+			printf(
+	"\n                             %2d: %-20s", e, event_symbols[i].symbol);
+		} else
+			printf(" %s", event_symbols[i].symbol);
+	}
+
+	printf("\n"
+	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
+}
+
+static void display_perfstat_help(void)
+{
+	printf(
+	"Usage: perfstat [<events...>] <cmd...>\n\n"
+	"PerfStat Options (up to %d event types can be specified):\n\n",
+		 MAX_COUNTERS);
+
+	display_events_help();
+
+	printf(
+	" -a                           # system-wide collection\n");
+	exit(0);
+}
 
 static void display_help(void)
 {
+	if (run_perfstat)
+		return display_perfstat_help();
+
 	printf(
-	"Usage: kerneltop [<options>]\n\n"
+	"Usage: kerneltop [<options>]\n"
+	"   Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
 	"KernelTop Options (up to %d event types can be specified at once):\n\n",
 		 MAX_COUNTERS);
+
+	display_events_help();
+
 	printf(
-	" -e EID    --event=EID        # event type ID                    [default:  0]\n"
-	"                                   0: CPU cycles\n"
-	"                                   1: instructions\n"
-	"                                   2: cache accesses\n"
-	"                                   3: cache misses\n"
-	"                                   4: branch instructions\n"
-	"                                   5: branch prediction misses\n"
-	"                                   6: bus cycles\n\n"
-	"                                rNNN: raw PMU events (eventsel+umask)\n\n"
+	" -S        --stat             # perfstat COMMAND\n"
+	" -a                           # system-wide collection (for perfstat)\n\n"
 	" -c CNT    --count=CNT        # event period to sample\n\n"
 	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
 	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
 	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
-	" -f CNT --filter=CNT          # min-event-count filter          [default: 100]\n\n"
+	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
 	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
-	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use:\n"
+	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
 	" -z        --zero             # zero counts after display\n"
 	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
-	"\n");
+	);
 
 	exit(0);
 }
 
-static void process_options(int argc, char *argv[])
+static int type_valid(int type)
 {
-	int error = 0, counter;
+	if (type >= PERF_HW_EVENTS_MAX)
+		return 0;
+	if (type <= PERF_SW_EVENTS_MIN)
+		return 0;
 
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"count",	required_argument,	NULL, 'c'},
-			{"cpu",		required_argument,	NULL, 'C'},
-			{"delay",	required_argument,	NULL, 'd'},
-			{"dump_symtab",	no_argument,		NULL, 'D'},
-			{"event",	required_argument,	NULL, 'e'},
-			{"filter",	required_argument,	NULL, 'f'},
-			{"group",	required_argument,	NULL, 'g'},
-			{"help",	no_argument,		NULL, 'h'},
-			{"nmi",		required_argument,	NULL, 'n'},
-			{"pid",		required_argument,	NULL, 'p'},
-			{"vmlinux",	required_argument,	NULL, 'x'},
-			{"symbol",	required_argument,	NULL, 's'},
-			{"zero",	no_argument,		NULL, 'z'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "c:C:d:De:f:g:hn:p:s:x:z",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
+	return 1;
+}
 
-		switch (c) {
-		case 'c':
-			event_count[nr_counters]	=   atoi(optarg); break;
-		case 'C':
-			/* CPU and PID are mutually exclusive */
-			if (tid != -1) {
-				printf("WARNING: CPU switch overriding PID\n");
-				sleep(1);
-				tid = -1;
-			}
-			profile_cpu			=   atoi(optarg); break;
-		case 'd': delay_secs			=   atoi(optarg); break;
-		case 'D': dump_symtab			=              1; break;
+static char *event_name(int ctr)
+{
+	int type = event_id[ctr];
+	static char buf[32];
 
-		case 'e': error				= parse_events(optarg); break;
+	if (event_raw[ctr]) {
+		sprintf(buf, "raw 0x%x", type);
+		return buf;
+	}
+	if (!type_valid(type))
+		return "unknown";
 
-		case 'f': count_filter			=   atoi(optarg); break;
-		case 'g': group				=   atoi(optarg); break;
-		case 'h':      				  display_help(); break;
-		case 'n': nmi				=   atoi(optarg); break;
-		case 'p':
-			/* CPU and PID are mutually exclusive */
-			if (profile_cpu != -1) {
-				printf("WARNING: PID switch overriding CPU\n");
-				sleep(1);
-				profile_cpu = -1;
+	if (type >= 0)
+		return hw_event_names[type];
+
+	return sw_event_names[-type-1];
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static int match_event_symbols(char *str)
+{
+	unsigned int i;
+
+	if (isdigit(str[0]) || str[0] == '-')
+		return atoi(str);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return PERF_HW_EVENTS_MAX;
+}
+
+static int parse_events(char *str)
+{
+	int type, raw;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	raw = 0;
+	if (*str == 'r') {
+		raw = 1;
+		++str;
+		type = strtol(str, NULL, 16);
+	} else {
+		type = match_event_symbols(str);
+		if (!type_valid(type))
+			return -1;
+	}
+
+	event_id[nr_counters] = type;
+	event_raw[nr_counters] = raw;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+
+/*
+ * perfstat
+ */
+
+char fault_here[1000000];
+
+static void create_perfstat_counter(int counter)
+{
+	struct perf_counter_hw_event hw_event;
+
+	memset(&hw_event, 0, sizeof(hw_event));
+	hw_event.type		= event_id[counter];
+	hw_event.raw		= event_raw[counter];
+	hw_event.record_type	= PERF_RECORD_SIMPLE;
+	hw_event.nmi		= 0;
+
+	if (system_wide) {
+		int cpu;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
+			if (fd[cpu][counter] < 0) {
+				printf("perfstat error: syscall returned with %d (%s)\n",
+						fd[cpu][counter], strerror(errno));
+				exit(-1);
 			}
-			tid				=   atoi(optarg); break;
-		case 's': sym_filter			= strdup(optarg); break;
-		case 'x': vmlinux			= strdup(optarg); break;
-		case 'z': zero				=              1; break;
-		default: error = 1; break;
+		}
+	} else {
+		hw_event.inherit	= 1;
+		hw_event.disabled	= 1;
+
+		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
+		if (fd[0][counter] < 0) {
+			printf("perfstat error: syscall returned with %d (%s)\n",
+					fd[0][counter], strerror(errno));
+			exit(-1);
 		}
 	}
-	if (error)
-		display_help();
+}
 
-	if (!nr_counters) {
-		nr_counters = 1;
-		event_id[0] = 0;
+int do_perfstat(int argc, char *argv[])
+{
+	unsigned long long t0, t1;
+	int counter;
+	ssize_t res;
+	int status;
+	int pid;
+
+	if (!system_wide)
+		nr_cpus = 1;
+
+	for (counter = 0; counter < nr_counters; counter++)
+		create_perfstat_counter(counter);
+
+	argc -= optind;
+	argv += optind;
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+	if (!pid) {
+		if (execvp(argv[0], argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
 	}
+	while (wait(&status) >= 0)
+		;
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	t1 = rdclock();
+
+	fflush(stdout);
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Performance counter stats for \'%s\':\n",
+		argv[0]);
+	fprintf(stderr, "\n");
 
 	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
+		int cpu;
+		__u64 count, single_count;
+
+		count = 0;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			res = read(fd[cpu][counter],
+					(char *) &single_count, sizeof(single_count));
+			assert(res == sizeof(single_count));
+			count += single_count;
+		}
 
-		if (event_id[counter] < PERF_HW_EVENTS_MAX)
-			event_count[counter] = default_count[event_id[counter]];
-		else
-			event_count[counter] = 100000;
+		if (!event_raw[counter] &&
+		    (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
+		     event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
+
+			double msecs = (double)count / 1000000;
+
+			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
+				msecs, event_name(counter));
+		} else {
+			fprintf(stderr, " %14Ld  %-20s (events)\n",
+				count, event_name(counter));
+		}
+		if (!counter)
+			fprintf(stderr, "\n");
 	}
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
+			(double)(t1-t0)/1e6);
+	fprintf(stderr, "\n");
+
+	return 0;
 }
 
+/*
+ * Symbols
+ */
+
 static uint64_t			min_ip;
 static uint64_t			max_ip = -1ll;
 
@@ -507,6 +743,9 @@ static void parse_symbols(void)
 	}
 }
 
+/*
+ * Source lines
+ */
 
 static void parse_vmlinux(char *filename)
 {
@@ -527,7 +766,7 @@ static void parse_vmlinux(char *filename)
 		char *c;
 
 		src = malloc(sizeof(struct source_line));
-		assert(src != NULL);	
+		assert(src != NULL);
 		memset(src, 0, sizeof(struct source_line));
 
 		if (getline(&src->line, &dummy, file) < 0)
@@ -706,11 +945,100 @@ static void process_event(uint64_t ip, int counter)
 	record_ip(ip, counter);
 }
 
+static void process_options(int argc, char *argv[])
+{
+	int error = 0, counter;
+
+	if (strstr(argv[0], "perfstat"))
+		run_perfstat = 1;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"count",	required_argument,	NULL, 'c'},
+			{"cpu",		required_argument,	NULL, 'C'},
+			{"delay",	required_argument,	NULL, 'd'},
+			{"dump_symtab",	no_argument,		NULL, 'D'},
+			{"event",	required_argument,	NULL, 'e'},
+			{"filter",	required_argument,	NULL, 'f'},
+			{"group",	required_argument,	NULL, 'g'},
+			{"help",	no_argument,		NULL, 'h'},
+			{"nmi",		required_argument,	NULL, 'n'},
+			{"pid",		required_argument,	NULL, 'p'},
+			{"vmlinux",	required_argument,	NULL, 'x'},
+			{"symbol",	required_argument,	NULL, 's'},
+			{"stat",	no_argument,		NULL, 'S'},
+			{"zero",	no_argument,		NULL, 'z'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'a': system_wide			=	       1; break;
+		case 'c': event_count[nr_counters]	=   atoi(optarg); break;
+		case 'C':
+			/* CPU and PID are mutually exclusive */
+			if (tid != -1) {
+				printf("WARNING: CPU switch overriding PID\n");
+				sleep(1);
+				tid = -1;
+			}
+			profile_cpu			=   atoi(optarg); break;
+		case 'd': delay_secs			=   atoi(optarg); break;
+		case 'D': dump_symtab			=              1; break;
+
+		case 'e': error				= parse_events(optarg); break;
+
+		case 'f': count_filter			=   atoi(optarg); break;
+		case 'g': group				=   atoi(optarg); break;
+		case 'h':      				  display_help(); break;
+		case 'n': nmi				=   atoi(optarg); break;
+		case 'p':
+			/* CPU and PID are mutually exclusive */
+			if (profile_cpu != -1) {
+				printf("WARNING: PID switch overriding CPU\n");
+				sleep(1);
+				profile_cpu = -1;
+			}
+			tid				=   atoi(optarg); break;
+		case 's': sym_filter			= strdup(optarg); break;
+		case 'S': run_perfstat			=	       1; break;
+		case 'x': vmlinux			= strdup(optarg); break;
+		case 'z': zero				=              1; break;
+		default: error = 1; break;
+		}
+	}
+	if (error)
+		display_help();
+
+	if (!nr_counters) {
+		if (run_perfstat)
+			nr_counters = 8;
+		else {
+			nr_counters = 1;
+			event_id[0] = 0;
+		}
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		if (event_id[counter] < PERF_HW_EVENTS_MAX)
+			event_count[counter] = default_count[event_id[counter]];
+		else
+			event_count[counter] = 100000;
+	}
+}
+
 int main(int argc, char *argv[])
 {
 	struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS];
 	struct perf_counter_hw_event hw_event;
-	int fd[MAX_NR_CPUS][MAX_COUNTERS];
 	int i, counter, group_fd;
 	unsigned int cpu;
 	uint64_t ip;
@@ -720,11 +1048,15 @@ int main(int argc, char *argv[])
 	process_options(argc, argv);
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (run_perfstat)
+		return do_perfstat(argc, argv);
+
 	if (tid != -1 || profile_cpu != -1)
 		nr_cpus = 1;
 
-	assert(nr_cpus <= MAX_NR_CPUS);
-
 	for (i = 0; i < nr_cpus; i++) {
 		group_fd = -1;
 		for (counter = 0; counter < nr_counters; counter++) {
diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h
index 99a90d8..32e24b9 100644
--- a/Documentation/perf_counter/perfcounters.h
+++ b/Documentation/perf_counter/perfcounters.h
@@ -11,9 +11,6 @@
 #define PR_TASK_PERF_COUNTERS_DISABLE	31
 #define PR_TASK_PERF_COUNTERS_ENABLE    32
 
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
-
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
 #define rdclock()					\
@@ -110,6 +107,7 @@ struct perf_counter_hw_event {
 	__u64			__reserved_3;
 };
 
+
 #ifdef __x86_64__
 # define __NR_perf_counter_open	295
 #endif
@@ -142,131 +140,3 @@ asmlinkage int sys_perf_counter_open(
 #endif
 	return ret;
 }
-
-static int			nr_counters			= 0;
-static long			event_id[MAX_COUNTERS]		= { -2, -5, -4, -3, 0, 1, 2, 3};
-static int			event_raw[MAX_COUNTERS];
-
-static char *hw_event_names [] = {
-	"CPU cycles",
-	"instructions",
-	"cache references",
-	"cache misses",
-	"branches",
-	"branch misses",
-	"bus cycles",
-};
-
-static char *sw_event_names [] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-};
-
-struct event_symbol {
-	int event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols [] = {
-	{PERF_COUNT_CPU_CYCLES,			"cpu-cycles",		},
-	{PERF_COUNT_CPU_CYCLES,			"cycles",		},
-	{PERF_COUNT_INSTRUCTIONS,		"instructions",		},
-	{PERF_COUNT_CACHE_REFERENCES,		"cache-references",	},
-	{PERF_COUNT_CACHE_MISSES,		"cache-misses",		},
-	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branch-instructions",	},
-	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branches",		},
-	{PERF_COUNT_BRANCH_MISSES,		"branch-misses",	},
-	{PERF_COUNT_BUS_CYCLES,			"bus-cycles",		},
-	{PERF_COUNT_CPU_CLOCK,			"cpu-ticks",		},
-	{PERF_COUNT_CPU_CLOCK,			"ticks",		},
-	{PERF_COUNT_TASK_CLOCK,			"task-ticks",		},
-	{PERF_COUNT_PAGE_FAULTS,		"page-faults",		},
-	{PERF_COUNT_PAGE_FAULTS,		"faults",		},
-	{PERF_COUNT_CONTEXT_SWITCHES,		"context-switches",	},
-	{PERF_COUNT_CONTEXT_SWITCHES,		"cs",			},
-	{PERF_COUNT_CPU_MIGRATIONS,		"cpu-migrations",	},
-	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
-};
-
-static int type_valid(int type)
-{
-	if (type >= PERF_HW_EVENTS_MAX)
-		return 0;
-	if (type <= PERF_SW_EVENTS_MIN)
-		return 0;
-
-	return 1;
-}
-
-static char *event_name(int ctr)
-{
-	int type = event_id[ctr];
-	static char buf[32];
-
-	if (event_raw[ctr]) {
-		sprintf(buf, "raw 0x%x", type);
-		return buf;
-	}
-	if (!type_valid(type))
-		return "unknown";
-
-	if (type >= 0)
-		return hw_event_names[type];
-
-	return sw_event_names[-type-1];
-}
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static int match_event_symbols(char *str)
-{
-	unsigned int i;
-
-	if (isdigit(str[0]) || str[0] == '-')
-		return atoi(str);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return PERF_HW_EVENTS_MAX;
-}
-
-static int parse_events(char *str)
-{
-	int type, raw;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	raw = 0;
-	if (*str == 'r') {
-		raw = 1;
-		++str;
-		type = strtol(str, NULL, 16);
-	} else {
-		type = match_event_symbols(str);
-		if (!type_valid(type))
-			return -1;
-	}
-
-	event_id[nr_counters] = type;
-	event_raw[nr_counters] = raw;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-- 
cgit v0.10.2


From ef45fa9e6c1694d3e8063f39749097a6e496b12c Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:07 +0800
Subject: perf_counter tools: fix comment for sym_weight()

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 9db65a4..7bf2a51 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -473,7 +473,7 @@ static struct sym_entry		sym_table[MAX_SYMS];
 static void show_details(struct sym_entry *sym);
 
 /*
- * Ordering weight: count-1 * count-1 * ... / count-n
+ * Ordering weight: count-1 * count-2 * ... / count-n
  */
 static double sym_weight(const struct sym_entry *sym)
 {
-- 
cgit v0.10.2


From 3ab8d792b1348eaabfe550ba60902d781d160dd4 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:08 +0800
Subject: perf_counter tools: fix event_id type

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 7bf2a51..7bfb0f0 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -99,7 +99,7 @@ static int			run_perfstat			=  0;
 static int			system_wide			=  0;
 
 static int			nr_counters			=  0;
-static long			event_id[MAX_COUNTERS]		= DEF_PERFSTAT_EVENTS;
+static __s64			event_id[MAX_COUNTERS]		= DEF_PERFSTAT_EVENTS;
 static int			event_raw[MAX_COUNTERS];
 static int			event_count[MAX_COUNTERS];
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
@@ -261,11 +261,11 @@ static int type_valid(int type)
 
 static char *event_name(int ctr)
 {
-	int type = event_id[ctr];
+	__s64 type = event_id[ctr];
 	static char buf[32];
 
 	if (event_raw[ctr]) {
-		sprintf(buf, "raw 0x%x", type);
+		sprintf(buf, "raw 0x%llx", (long long)type);
 		return buf;
 	}
 	if (!type_valid(type))
@@ -299,7 +299,8 @@ static int match_event_symbols(char *str)
 
 static int parse_events(char *str)
 {
-	int type, raw;
+	__s64 type;
+	int raw;
 
 again:
 	if (nr_counters == MAX_COUNTERS)
-- 
cgit v0.10.2


From dda7c02f33833bfa9412ba6f0e410b0a18b42c88 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:09 +0800
Subject: perf_counter tools: cut down default count for cpu-cycles

In my system, it takes kerneltop dozens of minutes to
show up usable numbers. Make the default count 100 times
smaller fixed this long startup latency.

I'm not sure if it's the right solution though.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 7bfb0f0..0bd3c13 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -132,7 +132,7 @@ struct source_line {
 
 
 const unsigned int default_count[] = {
-	1000000,
+	  10000,
 	1000000,
 	  10000,
 	  10000,
-- 
cgit v0.10.2


From af9522cf133e9be6da8525a46a9ed7e7659f0e1a Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 20 Mar 2009 10:08:10 +0800
Subject: perf_counter tools: when no command is feed to perfstat, display help
 and exit

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 0bd3c13..81a68aa 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -387,6 +387,9 @@ int do_perfstat(int argc, char *argv[])
 	argc -= optind;
 	argv += optind;
 
+	if (!argc)
+		display_help();
+
 	/*
 	 * Enable counters and exec the command:
 	 */
-- 
cgit v0.10.2


From f4a2deb4860497f4332cf6a1acddab3dd628ddf0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:06 +0100
Subject: perf_counter: remove the event config bitfields

Since the bitfields turned into a bit of a mess, remove them and rely on
good old masks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.059499915@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 6413d9c..d056515 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,13 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 	if ((s64)counter->hw_event.irq_period < 0)
 		return NULL;
-	if (!counter->hw_event.raw_type) {
-		ev = counter->hw_event.event_id;
+	if (!perf_event_raw(&counter->hw_event)) {
+		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return NULL;
 		ev = ppmu->generic_events[ev];
 	} else {
-		ev = counter->hw_event.raw_event_id;
+		ev = perf_event_config(&counter->hw_event);
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 902282d..3f95b0c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (hw_event->raw_type) {
-		hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
+	if (perf_event_raw(hw_event)) {
+		hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
 	} else {
-		if (hw_event->event_id >= pmc_ops->max_events)
+		if (perf_event_id(hw_event) >= pmc_ops->max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= pmc_ops->event_map(hw_event->event_id);
+		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
 	}
 	counter->wakeup_pending = 0;
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 98f5990..56099e5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -82,32 +82,37 @@ enum perf_counter_record_type {
 	PERF_RECORD_GROUP		= 2,
 };
 
+#define __PERF_COUNTER_MASK(name) 			\
+	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
+	 PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW_BITS		1
+#define PERF_COUNTER_RAW_SHIFT		63
+#define PERF_COUNTER_RAW_MASK		__PERF_COUNTER_MASK(RAW)
+
+#define PERF_COUNTER_CONFIG_BITS	63
+#define PERF_COUNTER_CONFIG_SHIFT	0
+#define PERF_COUNTER_CONFIG_MASK	__PERF_COUNTER_MASK(CONFIG)
+
+#define PERF_COUNTER_TYPE_BITS		7
+#define PERF_COUNTER_TYPE_SHIFT		56
+#define PERF_COUNTER_TYPE_MASK		__PERF_COUNTER_MASK(TYPE)
+
+#define PERF_COUNTER_EVENT_BITS		56
+#define PERF_COUNTER_EVENT_SHIFT	0
+#define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
+
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	union {
-#ifndef __BIG_ENDIAN_BITFIELD
-		struct {
-			__u64			event_id	: 56,
-						type		:  8;
-		};
-		struct {
-			__u64			raw_event_id	: 63,
-						raw_type	:  1;
-		};
-#else
-		struct {
-			__u64			type		:  8,
-						event_id	: 56;
-		};
-		struct {
-			__u64			raw_type	:  1,
-						raw_event_id	: 63;
-		};
-#endif /* __BIT_ENDIAN_BITFIELD */
-		__u64		event_config;
-	};
+	/*
+	 * The MSB of the config word signifies if the rest contains cpu
+	 * specific (raw) counter configuration data, if unset, the next
+	 * 7 bits are an event type and the rest of the bits are the event
+	 * identifier.
+	 */
+	__u64			config;
 
 	__u64			irq_period;
 	__u64			record_type;
@@ -157,6 +162,27 @@ struct perf_counter_hw_event {
 
 struct task_struct;
 
+static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event)
+{
+	return hw_event->config & PERF_COUNTER_RAW_MASK;
+}
+
+static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event)
+{
+	return hw_event->config & PERF_COUNTER_CONFIG_MASK;
+}
+
+static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event)
+{
+	return (hw_event->config & PERF_COUNTER_TYPE_MASK) >>
+		PERF_COUNTER_TYPE_SHIFT;
+}
+
+static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event)
+{
+	return hw_event->config & PERF_COUNTER_EVENT_MASK;
+}
+
 /**
  * struct hw_perf_counter - performance counter hardware details:
  */
@@ -336,8 +362,8 @@ extern void perf_counter_output(struct perf_counter *counter,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !counter->hw_event.raw_type &&
-		counter->hw_event.type != PERF_TYPE_HARDWARE;
+	return !perf_event_raw(&counter->hw_event) &&
+		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f054b8c..ca14fc4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1379,7 +1379,7 @@ static void perf_counter_handle_group(struct perf_counter *counter)
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 		if (sub != counter)
 			sub->hw_ops->read(sub);
-		perf_counter_store_irq(counter, sub->hw_event.event_config);
+		perf_counter_store_irq(counter, sub->hw_event.config);
 		perf_counter_store_irq(counter, atomic64_read(&sub->count));
 	}
 }
@@ -1489,13 +1489,13 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return 0;
 
-	if (counter->hw_event.raw_type)
+	if (perf_event_raw(&counter->hw_event))
 		return 0;
 
-	if (counter->hw_event.type != type)
+	if (perf_event_type(&counter->hw_event) != type)
 		return 0;
 
-	if (counter->hw_event.event_id != event)
+	if (perf_event_id(&counter->hw_event) != event)
 		return 0;
 
 	if (counter->hw_event.exclude_user && user_mode(regs))
@@ -1757,13 +1757,13 @@ extern void ftrace_profile_disable(int);
 
 static void tp_perf_counter_destroy(struct perf_counter *counter)
 {
-	ftrace_profile_disable(counter->hw_event.event_id);
+	ftrace_profile_disable(perf_event_id(&counter->hw_event));
 }
 
 static const struct hw_perf_counter_ops *
 tp_perf_counter_init(struct perf_counter *counter)
 {
-	int event_id = counter->hw_event.event_id;
+	int event_id = perf_event_id(&counter->hw_event);
 	int ret;
 
 	ret = ftrace_profile_enable(event_id);
@@ -1797,7 +1797,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (counter->hw_event.event_id) {
+	switch (perf_event_id(&counter->hw_event)) {
 	case PERF_COUNT_CPU_CLOCK:
 		hw_ops = &perf_ops_cpu_clock;
 
@@ -1882,9 +1882,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	hw_ops = NULL;
 
-	if (hw_event->raw_type)
+	if (perf_event_raw(hw_event)) {
 		hw_ops = hw_perf_counter_init(counter);
-	else switch (hw_event->type) {
+		goto done;
+	}
+
+	switch (perf_event_type(hw_event)) {
 	case PERF_TYPE_HARDWARE:
 		hw_ops = hw_perf_counter_init(counter);
 		break;
@@ -1902,6 +1905,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		kfree(counter);
 		return NULL;
 	}
+done:
 	counter->hw_ops = hw_ops;
 
 	return counter;
-- 
cgit v0.10.2


From 96f6d4444302bb2ea2cf409529eef816462f6ce0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:07 +0100
Subject: perf_counter: avoid recursion

Tracepoint events like lock_acquire and software counters like
pagefaults can recurse into the perf counter code again, avoid that.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.152096433@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 56099e5..18dc17d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -328,6 +328,13 @@ struct perf_cpu_context {
 	int				active_oncpu;
 	int				max_pertask;
 	int				exclusive;
+
+	/*
+	 * Recursion avoidance:
+	 *
+	 * task, softirq, irq, nmi context
+	 */
+	int			recursion[4];
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ca14fc4..ce34bff 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/vmstat.h>
 #include <linux/rculist.h>
+#include <linux/hardirq.h>
 
 #include <asm/irq_regs.h>
 
@@ -1532,10 +1533,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 	rcu_read_unlock();
 }
 
+static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
+{
+	if (in_nmi())
+		return &cpuctx->recursion[3];
+
+	if (in_irq())
+		return &cpuctx->recursion[2];
+
+	if (in_softirq())
+		return &cpuctx->recursion[1];
+
+	return &cpuctx->recursion[0];
+}
+
 static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 				   u64 nr, int nmi, struct pt_regs *regs)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int *recursion = perf_swcounter_recursion_context(cpuctx);
+
+	if (*recursion)
+		goto out;
+
+	(*recursion)++;
+	barrier();
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
 	if (cpuctx->task_ctx) {
@@ -1543,6 +1565,10 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 				nr, nmi, regs);
 	}
 
+	barrier();
+	(*recursion)--;
+
+out:
 	put_cpu_var(perf_cpu_context);
 }
 
-- 
cgit v0.10.2


From 37d81828385f8ff823caaaf1a83e72d065b6cfa1 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 23 Mar 2009 18:22:08 +0100
Subject: perf_counter: add an mmap method to allow userspace to read hardware
 counters

Impact: new feature giving performance improvement

This adds the ability for userspace to do an mmap on a hardware counter
fd and get access to a read-only page that contains the information
needed to translate a hardware counter value to the full 64-bit
counter value that would be returned by a read on the fd.  This is
useful on architectures that allow user programs to read the hardware
counters, such as PowerPC.

The mmap will only succeed if the counter is a hardware counter
monitoring the current process.

On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter
and translate it to the full 64-bit value in about 30ns using the
mmapped page, compared to about 830ns for the read syscall on the
counter, so this does give a significant performance improvement.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090323172417.297057964@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d056515..e434928 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable)
 		atomic64_set(&counter->hw.prev_count, val);
 		counter->hw.idx = hwc_index[i] + 1;
 		write_pmc(counter->hw.idx, val);
+		if (counter->user_page)
+			perf_counter_update_userpage(counter);
 	}
 	mb();
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
@@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter)
 			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
 			write_pmc(counter->hw.idx, 0);
 			counter->hw.idx = 0;
+			if (counter->user_page)
+				perf_counter_update_userpage(counter);
 			break;
 		}
 	}
@@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	write_pmc(counter->hw.idx, val);
 	atomic64_set(&counter->hw.prev_count, val);
 	atomic64_set(&counter->hw.period_left, left);
+	if (counter->user_page)
+		perf_counter_update_userpage(counter);
 
 	/*
 	 * Finally record data if requested.
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 18dc17d..40b324e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -143,6 +143,17 @@ struct perf_counter_hw_event {
 #define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
 #define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
 
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware counter identifier */
+	__s64	offset;			/* add to hardware counter value */
+};
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -278,6 +289,9 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
+	/* pointer to page shared with userspace via mmap */
+	unsigned long			user_page;
+
 	/* read() / irq related data */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
@@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void);
 extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu);
+extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 extern void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ce34bff..d9cfd90 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
+	free_page(counter->user_page);
 	free_counter(counter);
 	put_context(ctx);
 
@@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+	struct perf_counter_mmap_page *userpg;
+
+	if (!counter->user_page)
+		return;
+	userpg = (struct perf_counter_mmap_page *) counter->user_page;
+
+	++userpg->lock;
+	smp_wmb();
+	userpg->index = counter->hw.idx;
+	userpg->offset = atomic64_read(&counter->count);
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+		userpg->offset -= atomic64_read(&counter->hw.prev_count);
+	smp_wmb();
+	++userpg->lock;
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	if (!counter->user_page)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = virt_to_page(counter->user_page);
+	get_page(vmf->page);
+	return 0;
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+	.fault = perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = file->private_data;
+	unsigned long userpg;
+
+	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+		return -EINVAL;
+
+	/*
+	 * For now, restrict to the case of a hardware counter
+	 * on the current task.
+	 */
+	if (is_software_counter(counter) || counter->task != current)
+		return -EINVAL;
+
+	userpg = counter->user_page;
+	if (!userpg) {
+		userpg = get_zeroed_page(GFP_KERNEL);
+		mutex_lock(&counter->mutex);
+		if (counter->user_page) {
+			free_page(userpg);
+			userpg = counter->user_page;
+		} else {
+			counter->user_page = userpg;
+		}
+		mutex_unlock(&counter->mutex);
+		if (!userpg)
+			return -ENOMEM;
+	}
+
+	perf_counter_update_userpage(counter);
+
+	vma->vm_flags &= ~VM_MAYWRITE;
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_ops = &perf_mmap_vmops;
+	return 0;
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
 	.poll			= perf_poll,
 	.unlocked_ioctl		= perf_ioctl,
 	.compat_ioctl		= perf_ioctl,
+	.mmap			= perf_mmap,
 };
 
 /*
-- 
cgit v0.10.2


From 9ab772cd535c4b256a577eae516f9c7462346b2d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 23 Mar 2009 18:22:09 +0100
Subject: mutex: add atomic_dec_and_mutex_lock()

Much like the atomic_dec_and_lock() function in which we take an hold a
spin_lock if we drop the atomic to 0 this function takes and holds the
mutex if we dec the atomic to 0.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.410913479@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3069ec7..93054fc 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+
 #endif
-- 
cgit v0.10.2


From b09d2501ed3d294619cbfbcf828ad39324d0e548 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 17:21:56 -0700
Subject: mutex: drop "inline" from mutex_lock() inside kernel/mutex.c

Impact: build fix

mutex_lock() is was defined inline in kernel/mutex.c, but wasn't
declared so not in <linux/mutex.h>.  This didn't cause a problem until
checkin 3a2d367d9aabac486ac4444c6c7ec7a1dab16267 added the
atomic_dec_and_mutex_lock() inline in between declaration and
definion.

This broke building with CONFIG_ALLOW_WARNINGS=n, e.g. make
allnoconfig.

Either from the source code nor the allnoconfig binary output I cannot
find any internal references to mutex_lock() in kernel/mutex.c, so
presumably this "inline" is now-useless legacy.

Cc: Eric Paris <eparis@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <tip-3a2d367d9aabac486ac4444c6c7ec7a1dab16267@git.kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5d79781..fd95eaa 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count);
  *
  * This function is similar to (but not equivalent to) down().
  */
-void inline __sched mutex_lock(struct mutex *lock)
+void __sched mutex_lock(struct mutex *lock)
 {
 	might_sleep();
 	/*
-- 
cgit v0.10.2


From 7b732a75047738e4f85438ed2f9cd34bf5f2a19a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:10 +0100
Subject: perf_counter: new output ABI - part 1

Impact: Rework the perfcounter output ABI

use sys_read() only for instant data and provide mmap() output for all
async overflow data.

The first mmap() determines the size of the output buffer. The mmap()
size must be a PAGE_SIZE multiple of 1+pages, where pages must be a
power of 2 or 0. Further mmap()s of the same fd must have the same
size. Once all maps are gone, you can again mmap() with a new size.

In case of 0 extra pages there is no data output and the first page
only contains meta data.

When there are data pages, a poll() event will be generated for each
full page of data. Furthermore, the output is circular. This means
that although 1 page is a valid configuration, its useless, since
we'll start overwriting it the instant we report a full page.

Future work will focus on the output format (currently maintained)
where we'll likey want each entry denoted by a header which includes a
type and length.

Further future work will allow to splice() the fd, also containing the
async overflow data -- splice() would be mutually exclusive with
mmap() of the data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.470536358@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index e434928..d48596a 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -417,8 +417,7 @@ void hw_perf_restore(u64 disable)
 		atomic64_set(&counter->hw.prev_count, val);
 		counter->hw.idx = hwc_index[i] + 1;
 		write_pmc(counter->hw.idx, val);
-		if (counter->user_page)
-			perf_counter_update_userpage(counter);
+		perf_counter_update_userpage(counter);
 	}
 	mb();
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
@@ -574,8 +573,7 @@ static void power_perf_disable(struct perf_counter *counter)
 			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
 			write_pmc(counter->hw.idx, 0);
 			counter->hw.idx = 0;
-			if (counter->user_page)
-				perf_counter_update_userpage(counter);
+			perf_counter_update_userpage(counter);
 			break;
 		}
 	}
@@ -702,8 +700,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	write_pmc(counter->hw.idx, val);
 	atomic64_set(&counter->hw.prev_count, val);
 	atomic64_set(&counter->hw.period_left, left);
-	if (counter->user_page)
-		perf_counter_update_userpage(counter);
+	perf_counter_update_userpage(counter);
 
 	/*
 	 * Finally record data if requested.
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 40b324e..2b5e66d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -152,6 +152,8 @@ struct perf_counter_mmap_page {
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
+
+	__u32   data_head;		/* head in the data section */
 };
 
 #ifdef __KERNEL__
@@ -218,21 +220,6 @@ struct hw_perf_counter {
 #endif
 };
 
-/*
- * Hardcoded buffer length limit for now, for IRQ-fed events:
- */
-#define PERF_DATA_BUFLEN		2048
-
-/**
- * struct perf_data - performance counter IRQ data sampling ...
- */
-struct perf_data {
-	int				len;
-	int				rd_idx;
-	int				overrun;
-	u8				data[PERF_DATA_BUFLEN];
-};
-
 struct perf_counter;
 
 /**
@@ -256,6 +243,14 @@ enum perf_counter_active_state {
 
 struct file;
 
+struct perf_mmap_data {
+	struct rcu_head			rcu_head;
+	int				nr_pages;
+	atomic_t			head;
+	struct perf_counter_mmap_page   *user_page;
+	void 				*data_pages[0];
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -289,16 +284,15 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
-	/* pointer to page shared with userspace via mmap */
-	unsigned long			user_page;
+	/* mmap bits */
+	struct mutex			mmap_mutex;
+	atomic_t			mmap_count;
+	struct perf_mmap_data		*data;
 
-	/* read() / irq related data */
+	/* poll related */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
 	int				wakeup_pending;
-	struct perf_data		*irqdata;
-	struct perf_data		*usrdata;
-	struct perf_data		data[2];
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d9cfd90..0dfe910 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -4,7 +4,8 @@
  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ *
+ *  For licensing details see kernel-base/COPYING
  */
 
 #include <linux/fs.h>
@@ -1022,66 +1023,6 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	return atomic64_read(&counter->count);
 }
 
-/*
- * Cross CPU call to switch performance data pointers
- */
-static void __perf_switch_irq_data(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_data *oldirqdata = counter->irqdata;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 */
-	if (ctx->task) {
-		if (cpuctx->task_ctx != ctx)
-			return;
-		spin_lock(&ctx->lock);
-	}
-
-	/* Change the pointer NMI safe */
-	atomic_long_set((atomic_long_t *)&counter->irqdata,
-			(unsigned long) counter->usrdata);
-	counter->usrdata = oldirqdata;
-
-	if (ctx->task)
-		spin_unlock(&ctx->lock);
-}
-
-static struct perf_data *perf_switch_irq_data(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_data *oldirqdata = counter->irqdata;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		smp_call_function_single(counter->cpu,
-					 __perf_switch_irq_data,
-					 counter, 1);
-		return counter->usrdata;
-	}
-
-retry:
-	spin_lock_irq(&ctx->lock);
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
-		counter->irqdata = counter->usrdata;
-		counter->usrdata = oldirqdata;
-		spin_unlock_irq(&ctx->lock);
-		return oldirqdata;
-	}
-	spin_unlock_irq(&ctx->lock);
-	task_oncpu_function_call(task, __perf_switch_irq_data, counter);
-	/* Might have failed, because task was scheduled out */
-	if (counter->irqdata == oldirqdata)
-		goto retry;
-
-	return counter->usrdata;
-}
-
 static void put_context(struct perf_counter_context *ctx)
 {
 	if (ctx->task)
@@ -1177,7 +1118,6 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
-	free_page(counter->user_page);
 	free_counter(counter);
 	put_context(ctx);
 
@@ -1192,7 +1132,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
 	u64 cntval;
 
-	if (count != sizeof(cntval))
+	if (count < sizeof(cntval))
 		return -EINVAL;
 
 	/*
@@ -1211,121 +1151,20 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 }
 
 static ssize_t
-perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count)
-{
-	if (!usrdata->len)
-		return 0;
-
-	count = min(count, (size_t)usrdata->len);
-	if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
-		return -EFAULT;
-
-	/* Adjust the counters */
-	usrdata->len -= count;
-	if (!usrdata->len)
-		usrdata->rd_idx = 0;
-	else
-		usrdata->rd_idx += count;
-
-	return count;
-}
-
-static ssize_t
-perf_read_irq_data(struct perf_counter	*counter,
-		   char __user		*buf,
-		   size_t		count,
-		   int			nonblocking)
-{
-	struct perf_data *irqdata, *usrdata;
-	DECLARE_WAITQUEUE(wait, current);
-	ssize_t res, res2;
-
-	irqdata = counter->irqdata;
-	usrdata = counter->usrdata;
-
-	if (usrdata->len + irqdata->len >= count)
-		goto read_pending;
-
-	if (nonblocking)
-		return -EAGAIN;
-
-	spin_lock_irq(&counter->waitq.lock);
-	__add_wait_queue(&counter->waitq, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (usrdata->len + irqdata->len >= count)
-			break;
-
-		if (signal_pending(current))
-			break;
-
-		if (counter->state == PERF_COUNTER_STATE_ERROR)
-			break;
-
-		spin_unlock_irq(&counter->waitq.lock);
-		schedule();
-		spin_lock_irq(&counter->waitq.lock);
-	}
-	__remove_wait_queue(&counter->waitq, &wait);
-	__set_current_state(TASK_RUNNING);
-	spin_unlock_irq(&counter->waitq.lock);
-
-	if (usrdata->len + irqdata->len < count &&
-	    counter->state != PERF_COUNTER_STATE_ERROR)
-		return -ERESTARTSYS;
-read_pending:
-	mutex_lock(&counter->mutex);
-
-	/* Drain pending data first: */
-	res = perf_copy_usrdata(usrdata, buf, count);
-	if (res < 0 || res == count)
-		goto out;
-
-	/* Switch irq buffer: */
-	usrdata = perf_switch_irq_data(counter);
-	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
-	if (res2 < 0) {
-		if (!res)
-			res = -EFAULT;
-	} else {
-		res += res2;
-	}
-out:
-	mutex_unlock(&counter->mutex);
-
-	return res;
-}
-
-static ssize_t
 perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct perf_counter *counter = file->private_data;
 
-	switch (counter->hw_event.record_type) {
-	case PERF_RECORD_SIMPLE:
-		return perf_read_hw(counter, buf, count);
-
-	case PERF_RECORD_IRQ:
-	case PERF_RECORD_GROUP:
-		return perf_read_irq_data(counter, buf, count,
-					  file->f_flags & O_NONBLOCK);
-	}
-	return -EINVAL;
+	return perf_read_hw(counter, buf, count);
 }
 
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_counter *counter = file->private_data;
-	unsigned int events = 0;
-	unsigned long flags;
+	unsigned int events = POLLIN;
 
 	poll_wait(file, &counter->waitq, wait);
 
-	spin_lock_irqsave(&counter->waitq.lock, flags);
-	if (counter->usrdata->len || counter->irqdata->len)
-		events |= POLLIN;
-	spin_unlock_irqrestore(&counter->waitq.lock, flags);
-
 	return events;
 }
 
@@ -1347,78 +1186,207 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-void perf_counter_update_userpage(struct perf_counter *counter)
+static void __perf_counter_update_userpage(struct perf_counter *counter,
+					   struct perf_mmap_data *data)
 {
-	struct perf_counter_mmap_page *userpg;
-
-	if (!counter->user_page)
-		return;
-	userpg = (struct perf_counter_mmap_page *) counter->user_page;
+	struct perf_counter_mmap_page *userpg = data->user_page;
 
+	/*
+	 * Disable preemption so as to not let the corresponding user-space
+	 * spin too long if we get preempted.
+	 */
+	preempt_disable();
 	++userpg->lock;
 	smp_wmb();
 	userpg->index = counter->hw.idx;
 	userpg->offset = atomic64_read(&counter->count);
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
+
+	userpg->data_head = atomic_read(&data->head);
 	smp_wmb();
 	++userpg->lock;
+	preempt_enable();
+}
+
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data)
+		__perf_counter_update_userpage(counter, data);
+	rcu_read_unlock();
 }
 
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct perf_counter *counter = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
 
-	if (!counter->user_page)
-		return VM_FAULT_SIGBUS;
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff == 0) {
+		vmf->page = virt_to_page(data->user_page);
+	} else {
+		int nr = vmf->pgoff - 1;
 
-	vmf->page = virt_to_page(counter->user_page);
+		if ((unsigned)nr > data->nr_pages)
+			goto unlock;
+
+		vmf->page = virt_to_page(data->data_pages[nr]);
+	}
 	get_page(vmf->page);
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	int i;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!data->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!data->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	data->nr_pages = nr_pages;
+
+	rcu_assign_pointer(counter->data, data);
+
 	return 0;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)data->data_pages[i]);
+
+	free_page((unsigned long)data->user_page);
+
+fail_user_page:
+	kfree(data);
+
+fail:
+	return -ENOMEM;
+}
+
+static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data = container_of(rcu_head,
+			struct perf_mmap_data, rcu_head);
+	int i;
+
+	free_page((unsigned long)data->user_page);
+	for (i = 0; i < data->nr_pages; i++)
+		free_page((unsigned long)data->data_pages[i]);
+	kfree(data);
+}
+
+static void perf_mmap_data_free(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data = counter->data;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	rcu_assign_pointer(counter->data, NULL);
+	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+}
+
+static void perf_mmap_open(struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	atomic_inc(&counter->mmap_count);
+}
+
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
+				      &counter->mmap_mutex)) {
+		perf_mmap_data_free(counter);
+		mutex_unlock(&counter->mmap_mutex);
+	}
 }
 
 static struct vm_operations_struct perf_mmap_vmops = {
+	.open = perf_mmap_open,
+	.close = perf_mmap_close,
 	.fault = perf_mmap_fault,
 };
 
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
-	unsigned long userpg;
+	unsigned long vma_size;
+	unsigned long nr_pages;
+	unsigned long locked, lock_limit;
+	int ret = 0;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
-	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+
+	vma_size = vma->vm_end - vma->vm_start;
+	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	if (nr_pages == 0 || !is_power_of_2(nr_pages))
 		return -EINVAL;
 
-	/*
-	 * For now, restrict to the case of a hardware counter
-	 * on the current task.
-	 */
-	if (is_software_counter(counter) || counter->task != current)
+	if (vma_size != PAGE_SIZE * (1 + nr_pages))
 		return -EINVAL;
 
-	userpg = counter->user_page;
-	if (!userpg) {
-		userpg = get_zeroed_page(GFP_KERNEL);
-		mutex_lock(&counter->mutex);
-		if (counter->user_page) {
-			free_page(userpg);
-			userpg = counter->user_page;
-		} else {
-			counter->user_page = userpg;
-		}
-		mutex_unlock(&counter->mutex);
-		if (!userpg)
-			return -ENOMEM;
-	}
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	locked = vma_size >>  PAGE_SHIFT;
+	locked += vma->vm_mm->locked_vm;
 
-	perf_counter_update_userpage(counter);
+	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit >>= PAGE_SHIFT;
+
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK))
+		return -EPERM;
+
+	mutex_lock(&counter->mmap_mutex);
+	if (atomic_inc_not_zero(&counter->mmap_count))
+		goto out;
+
+	WARN_ON(counter->data);
+	ret = perf_mmap_data_alloc(counter, nr_pages);
+	if (!ret)
+		atomic_set(&counter->mmap_count, 1);
+out:
+	mutex_unlock(&counter->mmap_mutex);
 
 	vma->vm_flags &= ~VM_MAYWRITE;
 	vma->vm_flags |= VM_RESERVED;
 	vma->vm_ops = &perf_mmap_vmops;
-	return 0;
+
+	return ret;
 }
 
 static const struct file_operations perf_fops = {
@@ -1434,30 +1402,94 @@ static const struct file_operations perf_fops = {
  * Output
  */
 
-static void perf_counter_store_irq(struct perf_counter *counter, u64 data)
+static int perf_output_write(struct perf_counter *counter, int nmi,
+			     void *buf, ssize_t size)
 {
-	struct perf_data *irqdata = counter->irqdata;
+	struct perf_mmap_data *data;
+	unsigned int offset, head, nr;
+	unsigned int len;
+	int ret, wakeup;
 
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+	rcu_read_lock();
+	ret = -ENOSPC;
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto out;
+
+	if (!data->nr_pages)
+		goto out;
+
+	ret = -EINVAL;
+	if (size > PAGE_SIZE)
+		goto out;
+
+	do {
+		offset = head = atomic_read(&data->head);
+		head += sizeof(u64);
+	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
+
+	wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
 
-		*p = data;
-		irqdata->len += sizeof(u64);
+	nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1);
+	offset &= PAGE_SIZE - 1;
+
+	len = min_t(unsigned int, PAGE_SIZE - offset, size);
+	memcpy(data->data_pages[nr] + offset, buf, len);
+	size -= len;
+
+	if (size) {
+		nr = (nr + 1) & (data->nr_pages - 1);
+		memcpy(data->data_pages[nr], buf + len, size);
+	}
+
+	/*
+	 * generate a poll() wakeup for every page boundary crossed
+	 */
+	if (wakeup) {
+		__perf_counter_update_userpage(counter, data);
+		if (nmi) {
+			counter->wakeup_pending = 1;
+			set_perf_counter_pending();
+		} else
+			wake_up(&counter->waitq);
 	}
+	ret = 0;
+out:
+	rcu_read_unlock();
+
+	return ret;
 }
 
-static void perf_counter_handle_group(struct perf_counter *counter)
+static void perf_output_simple(struct perf_counter *counter,
+			       int nmi, struct pt_regs *regs)
+{
+	u64 entry;
+
+	entry = instruction_pointer(regs);
+
+	perf_output_write(counter, nmi, &entry, sizeof(entry));
+}
+
+struct group_entry {
+	u64 event;
+	u64 counter;
+};
+
+static void perf_output_group(struct perf_counter *counter, int nmi)
 {
 	struct perf_counter *leader, *sub;
 
 	leader = counter->group_leader;
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		struct group_entry entry;
+
 		if (sub != counter)
 			sub->hw_ops->read(sub);
-		perf_counter_store_irq(counter, sub->hw_event.config);
-		perf_counter_store_irq(counter, atomic64_read(&sub->count));
+
+		entry.event = sub->hw_event.config;
+		entry.counter = atomic64_read(&sub->count);
+
+		perf_output_write(counter, nmi, &entry, sizeof(entry));
 	}
 }
 
@@ -1469,19 +1501,13 @@ void perf_counter_output(struct perf_counter *counter,
 		return;
 
 	case PERF_RECORD_IRQ:
-		perf_counter_store_irq(counter, instruction_pointer(regs));
+		perf_output_simple(counter, nmi, regs);
 		break;
 
 	case PERF_RECORD_GROUP:
-		perf_counter_handle_group(counter);
+		perf_output_group(counter, nmi);
 		break;
 	}
-
-	if (nmi) {
-		counter->wakeup_pending = 1;
-		set_perf_counter_pending();
-	} else
-		wake_up(&counter->waitq);
 }
 
 /*
@@ -1967,10 +1993,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
+	mutex_init(&counter->mmap_mutex);
+
 	INIT_LIST_HEAD(&counter->child_list);
 
-	counter->irqdata		= &counter->data[0];
-	counter->usrdata		= &counter->data[1];
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->wakeup_pending		= 0;
-- 
cgit v0.10.2


From 803d4f3980f6e220b27311a283aab0a4d68b6709 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:11 +0100
Subject: perf_counter tools: update to new syscall ABI

update the kerneltop userspace to work with the latest syscall ABI

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.559643732@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 81a68aa..a72c9bd 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -87,20 +87,90 @@
 
 #include <linux/unistd.h>
 
-#include "perfcounters.h"
+#include "include/linux/perf_counter.h"
 
 
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define rdclock()                                       \
+({                                                      \
+        struct timespec ts;                             \
+                                                        \
+        clock_gettime(CLOCK_MONOTONIC, &ts);            \
+        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
+})
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+typedef unsigned int            __u32;
+typedef unsigned long long      __u64;
+typedef long long               __s64;
+
+
+#ifdef __x86_64__
+# define __NR_perf_counter_open 295
+#endif
+
+#ifdef __i386__
+# define __NR_perf_counter_open 333
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#endif
+
+asmlinkage int sys_perf_counter_open(
+        struct perf_counter_hw_event    *hw_event_uptr          __user,
+        pid_t                           pid,
+        int                             cpu,
+        int                             group_fd,
+        unsigned long                   flags)
+{
+        int ret;
+
+        ret = syscall(
+                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
+#if defined(__x86_64__) || defined(__i386__)
+        if (ret < 0 && ret > -4096) {
+                errno = -ret;
+                ret = -1;
+        }
+#endif
+        return ret;
+}
+
 #define MAX_COUNTERS			64
 #define MAX_NR_CPUS			256
 
-#define DEF_PERFSTAT_EVENTS		{ -2, -5, -4, -3, 0, 1, 2, 3}
+#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
 
 static int			run_perfstat			=  0;
 static int			system_wide			=  0;
 
 static int			nr_counters			=  0;
-static __s64			event_id[MAX_COUNTERS]		= DEF_PERFSTAT_EVENTS;
-static int			event_raw[MAX_COUNTERS];
+static __u64			event_id[MAX_COUNTERS]		= {
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
+};
+static int			default_interval = 100000;
 static int			event_count[MAX_COUNTERS];
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
@@ -156,49 +226,63 @@ static char *sw_event_names[] = {
 	"pagefaults",
 	"context switches",
 	"CPU migrations",
+	"minor faults",
+	"major faults",
 };
 
 struct event_symbol {
-	int event;
+	__u64 event;
 	char *symbol;
 };
 
 static struct event_symbol event_symbols[] = {
-	{PERF_COUNT_CPU_CYCLES,			"cpu-cycles",		},
-	{PERF_COUNT_CPU_CYCLES,			"cycles",		},
-	{PERF_COUNT_INSTRUCTIONS,		"instructions",		},
-	{PERF_COUNT_CACHE_REFERENCES,		"cache-references",	},
-	{PERF_COUNT_CACHE_MISSES,		"cache-misses",		},
-	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branch-instructions",	},
-	{PERF_COUNT_BRANCH_INSTRUCTIONS,	"branches",		},
-	{PERF_COUNT_BRANCH_MISSES,		"branch-misses",	},
-	{PERF_COUNT_BUS_CYCLES,			"bus-cycles",		},
-	{PERF_COUNT_CPU_CLOCK,			"cpu-ticks",		},
-	{PERF_COUNT_CPU_CLOCK,			"ticks",		},
-	{PERF_COUNT_TASK_CLOCK,			"task-ticks",		},
-	{PERF_COUNT_PAGE_FAULTS,		"page-faults",		},
-	{PERF_COUNT_PAGE_FAULTS,		"faults",		},
-	{PERF_COUNT_CONTEXT_SWITCHES,		"context-switches",	},
-	{PERF_COUNT_CONTEXT_SWITCHES,		"cs",			},
-	{PERF_COUNT_CPU_MIGRATIONS,		"cpu-migrations",	},
-	{PERF_COUNT_CPU_MIGRATIONS,		"migrations",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
 };
 
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
 static void display_events_help(void)
 {
 	unsigned int i;
-	int e;
+	__u64 e;
 
 	printf(
 	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
 
-	for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
-		if (e != event_symbols[i].event) {
-			e = event_symbols[i].event;
-			printf(
-	"\n                             %2d: %-20s", e, event_symbols[i].symbol);
-		} else
-			printf(" %s", event_symbols[i].symbol);
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		printf("\n                             %d:%d: %-20s",
+				type, id, event_symbols[i].symbol);
 	}
 
 	printf("\n"
@@ -249,44 +333,51 @@ static void display_help(void)
 	exit(0);
 }
 
-static int type_valid(int type)
-{
-	if (type >= PERF_HW_EVENTS_MAX)
-		return 0;
-	if (type <= PERF_SW_EVENTS_MIN)
-		return 0;
-
-	return 1;
-}
-
 static char *event_name(int ctr)
 {
-	__s64 type = event_id[ctr];
+	__u64 config = event_id[ctr];
+	int type = PERF_COUNTER_TYPE(config);
+	int id = PERF_COUNTER_ID(config);
 	static char buf[32];
 
-	if (event_raw[ctr]) {
-		sprintf(buf, "raw 0x%llx", (long long)type);
+	if (PERF_COUNTER_RAW(config)) {
+		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
 		return buf;
 	}
-	if (!type_valid(type))
-		return "unknown";
 
-	if (type >= 0)
-		return hw_event_names[type];
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (id < PERF_HW_EVENTS_MAX)
+			return hw_event_names[id];
+		return "unknown-hardware";
+
+	case PERF_TYPE_SOFTWARE:
+		if (id < PERF_SW_EVENTS_MAX)
+			return sw_event_names[id];
+		return "unknown-software";
 
-	return sw_event_names[-type-1];
+	default:
+		break;
+	}
+
+	return "unknown";
 }
 
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
  */
-static int match_event_symbols(char *str)
+static __u64 match_event_symbols(char *str)
 {
+	__u64 config, id;
+	int type;
 	unsigned int i;
 
-	if (isdigit(str[0]) || str[0] == '-')
-		return atoi(str);
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		if (!strncmp(str, event_symbols[i].symbol,
@@ -294,31 +385,22 @@ static int match_event_symbols(char *str)
 			return event_symbols[i].event;
 	}
 
-	return PERF_HW_EVENTS_MAX;
+	return ~0ULL;
 }
 
 static int parse_events(char *str)
 {
-	__s64 type;
-	int raw;
+	__u64 config;
 
 again:
 	if (nr_counters == MAX_COUNTERS)
 		return -1;
 
-	raw = 0;
-	if (*str == 'r') {
-		raw = 1;
-		++str;
-		type = strtol(str, NULL, 16);
-	} else {
-		type = match_event_symbols(str);
-		if (!type_valid(type))
-			return -1;
-	}
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
 
-	event_id[nr_counters] = type;
-	event_raw[nr_counters] = raw;
+	event_id[nr_counters] = config;
 	nr_counters++;
 
 	str = strstr(str, ",");
@@ -342,8 +424,7 @@ static void create_perfstat_counter(int counter)
 	struct perf_counter_hw_event hw_event;
 
 	memset(&hw_event, 0, sizeof(hw_event));
-	hw_event.type		= event_id[counter];
-	hw_event.raw		= event_raw[counter];
+	hw_event.config		= event_id[counter];
 	hw_event.record_type	= PERF_RECORD_SIMPLE;
 	hw_event.nmi		= 0;
 
@@ -428,7 +509,7 @@ int do_perfstat(int argc, char *argv[])
 			count += single_count;
 		}
 
-		if (!event_raw[counter] &&
+		if (!PERF_COUNTER_RAW(event_id[counter]) &&
 		    (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
 		     event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
 
@@ -911,7 +992,7 @@ static void record_ip(uint64_t ip, int counter)
 		assert(left <= middle && middle <= right);
 		if (!(left <= ip && ip <= right)) {
 			printf(" left: %016lx\n", left);
-			printf("   ip: %016lx\n", ip);
+			printf("   ip: %016llx\n", ip);
 			printf("right: %016lx\n", right);
 		}
 		assert(left <= ip && ip <= right);
@@ -983,7 +1064,7 @@ static void process_options(int argc, char *argv[])
 
 		switch (c) {
 		case 'a': system_wide			=	       1; break;
-		case 'c': event_count[nr_counters]	=   atoi(optarg); break;
+		case 'c': default_interval		=   atoi(optarg); break;
 		case 'C':
 			/* CPU and PID are mutually exclusive */
 			if (tid != -1) {
@@ -1032,10 +1113,7 @@ static void process_options(int argc, char *argv[])
 		if (event_count[counter])
 			continue;
 
-		if (event_id[counter] < PERF_HW_EVENTS_MAX)
-			event_count[counter] = default_count[event_id[counter]];
-		else
-			event_count[counter] = 100000;
+		event_count[counter] = default_interval;
 	}
 }
 
@@ -1070,12 +1148,13 @@ int main(int argc, char *argv[])
 				cpu = i;
 
 			memset(&hw_event, 0, sizeof(hw_event));
-			hw_event.type		= event_id[counter];
-			hw_event.raw		= event_raw[counter];
+			hw_event.config		= event_id[counter];
 			hw_event.irq_period	= event_count[counter];
 			hw_event.record_type	= PERF_RECORD_IRQ;
 			hw_event.nmi		= nmi;
 
+			printf("FOO: %d %llx %llx\n", counter, event_id[counter], event_count[counter]);
+
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
 			if (fd[i][counter] < 0) {
-- 
cgit v0.10.2


From bcbcb37cdb67d8100acfa66df40c4d636c28c4d1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:12 +0100
Subject: perf_counter tools: use mmap() output

update kerneltop to use the mmap() output to gather overflow information

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.677932499@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index a72c9bd..80b7905 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -84,6 +84,7 @@
 #include <sys/prctl.h>
 #include <sys/wait.h>
 #include <sys/uio.h>
+#include <sys/mman.h>
 
 #include <linux/unistd.h>
 
@@ -119,17 +120,25 @@ typedef long long               __s64;
 
 
 #ifdef __x86_64__
-# define __NR_perf_counter_open 295
+#define __NR_perf_counter_open 295
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __i386__
-# define __NR_perf_counter_open 333
+#define __NR_perf_counter_open 333
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __powerpc__
 #define __NR_perf_counter_open 319
+#define rmb() 		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
 #endif
 
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+
 asmlinkage int sys_perf_counter_open(
         struct perf_counter_hw_event    *hw_event_uptr          __user,
         pid_t                           pid,
@@ -181,6 +190,7 @@ static int			profile_cpu			= -1;
 static int			nr_cpus				=  0;
 static int			nmi				=  1;
 static int			group				=  0;
+static unsigned int		page_size;
 
 static char			*vmlinux;
 
@@ -1117,16 +1127,68 @@ static void process_options(int argc, char *argv[])
 	}
 }
 
+struct mmap_data {
+	int counter;
+	void *base;
+	unsigned int mask;
+	unsigned int prev;
+};
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	unsigned int seq, head;
+
+repeat:
+	rmb();
+	seq = pc->lock;
+
+	if (unlikely(seq & 1)) {
+		cpu_relax();
+		goto repeat;
+	}
+
+	head = pc->data_head;
+
+	rmb();
+	if (pc->lock != seq)
+		goto repeat;
+
+	return head;
+}
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+
+	if (head - old > md->mask) {
+		printf("ERROR: failed to keep up with mmap data\n");
+		exit(-1);
+	}
+
+	for (; old != head;) {
+		__u64 *ptr = (__u64 *)&data[old & md->mask];
+		old += sizeof(__u64);
+
+		process_event(*ptr, md->counter);
+	}
+
+	md->prev = old;
+}
+
 int main(int argc, char *argv[])
 {
 	struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS];
+	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 	struct perf_counter_hw_event hw_event;
 	int i, counter, group_fd;
 	unsigned int cpu;
-	uint64_t ip;
-	ssize_t res;
 	int ret;
 
+	page_size = sysconf(_SC_PAGE_SIZE);
+
 	process_options(argc, argv);
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -1153,8 +1215,6 @@ int main(int argc, char *argv[])
 			hw_event.record_type	= PERF_RECORD_IRQ;
 			hw_event.nmi		= nmi;
 
-			printf("FOO: %d %llx %llx\n", counter, event_id[counter], event_count[counter]);
-
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
 			if (fd[i][counter] < 0) {
@@ -1174,6 +1234,17 @@ int main(int argc, char *argv[])
 
 			event_array[i][counter].fd = fd[i][counter];
 			event_array[i][counter].events = POLLIN;
+
+			mmap_array[i][counter].counter = counter;
+			mmap_array[i][counter].prev = 0;
+			mmap_array[i][counter].mask = 2*page_size - 1;
+			mmap_array[i][counter].base = mmap(NULL, 3*page_size,
+					PROT_READ, MAP_SHARED, fd[i][counter], 0);
+			if (mmap_array[i][counter].base == MAP_FAILED) {
+				printf("kerneltop error: failed to mmap with %d (%s)\n",
+						errno, strerror(errno));
+				exit(-1);
+			}
 		}
 	}
 
@@ -1188,14 +1259,8 @@ int main(int argc, char *argv[])
 		int hits = events;
 
 		for (i = 0; i < nr_cpus; i++) {
-			for (counter = 0; counter < nr_counters; counter++) {
-				res = read(fd[i][counter], (char *) &ip, sizeof(ip));
-				if (res > 0) {
-					assert(res == sizeof(ip));
-
-					process_event(ip, counter);
-				}
-			}
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
 		}
 
 		if (time(NULL) >= last_refresh + delay_secs) {
-- 
cgit v0.10.2


From 383c5f8cd7d253be0017d8d5a39cbb78aaf70206 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 23 Mar 2009 21:49:25 +0100
Subject: perf_counter tools: tidy up in-kernel dependencies

Remove now unified perfstat.c and perf_counter.h, and link to the
in-kernel perf_counter.h.

Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090323172417.677932499@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index b457497..666da95 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -2,7 +2,7 @@ BINS = kerneltop perfstat
 
 all: $(BINS)
 
-kerneltop: kerneltop.c perfcounters.h
+kerneltop: kerneltop.c ../../include/linux/perf_counter.h
 	cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $<
 
 perfstat: kerneltop
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 80b7905..25e80bc 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -88,7 +88,7 @@
 
 #include <linux/unistd.h>
 
-#include "include/linux/perf_counter.h"
+#include "../../include/linux/perf_counter.h"
 
 
 /*
diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h
deleted file mode 100644
index 32e24b9..0000000
--- a/Documentation/perf_counter/perfcounters.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE	31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define rdclock()					\
-({							\
-	struct timespec ts;				\
-							\
-	clock_gettime(CLOCK_MONOTONIC, &ts);		\
-	ts.tv_sec * 1000000000ULL + ts.tv_nsec;		\
-})
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-typedef unsigned int		__u32;
-typedef unsigned long long	__u64;
-typedef long long		__s64;
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * Generalized performance counter event types, used by the hw_event.type
- * parameter of the sys_perf_counter_open() syscall:
- */
-enum hw_event_types {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_CPU_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-	PERF_COUNT_BUS_CYCLES		=  6,
-
-	PERF_HW_EVENTS_MAX		=  7,
-
-	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
-	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
-	PERF_COUNT_CPU_MIGRATIONS	= -5,
-
-	PERF_SW_EVENTS_MIN		= -6,
-};
-
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
-};
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_hw_event {
-	__s64			type;
-
-	__u64			irq_period;
-	__u64			record_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				nmi	       :  1, /* NMI sampling          */
-				raw	       :  1, /* raw event type        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-
-				__reserved_1   : 54;
-
-	__u32			extra_config_len;
-	__u32			__reserved_4;
-
-	__u64			__reserved_2;
-	__u64			__reserved_3;
-};
-
-
-#ifdef __x86_64__
-# define __NR_perf_counter_open	295
-#endif
-
-#ifdef __i386__
-# define __NR_perf_counter_open 333
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#endif
-
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd,
-	unsigned long			flags)
-{
-	int ret;
-
-	ret = syscall(
-		__NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-#if defined(__x86_64__) || defined(__i386__)
-	if (ret < 0 && ret > -4096) {
-		errno = -ret;
-		ret = -1;
-	}
-#endif
-	return ret;
-}
diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c
deleted file mode 100644
index fd59446..0000000
--- a/Documentation/perf_counter/perfstat.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * perfstat:  /usr/bin/time -alike performance counter statistics utility
- *
- *        It summarizes the counter events of all tasks (and child tasks),
- *        covering all CPUs that the command (or workload) executes on.
- *        It only counts the per-task events of the workload started,
- *        independent of how many other tasks run on those CPUs.
- *
- * Build with:       cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c
- *
- * Sample output:
- *
-
-   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
-
-   Performance counter stats for 'ls':
-
-           163516953 instructions
-                2295 cache-misses
-             2855182 branch-misses
-
- *
- * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
- *
- * Released under the GPLv2 (not later).
- *
- * Percpu counter support by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
- * Symbolic event options by: Wu Fengguang <fengguang.wu@intel.com>
- */
-#define _GNU_SOURCE
-
-#include <assert.h>
-#include <getopt.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <time.h>
-
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/prctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-
-#include <linux/unistd.h>
-
-#include "perfcounters.h"
-
-static int			nr_cpus			= 0;
-
-static int			system_wide		= 0;
-
-static void display_help(void)
-{
-	unsigned int i;
-	int e;
-
-	printf(
-	"Usage: perfstat [<events...>] <cmd...>\n\n"
-	"PerfStat Options (up to %d event types can be specified):\n\n",
-		 MAX_COUNTERS);
-	printf(
-	" -e EVENT     --event=EVENT        #  symbolic-name        abbreviations");
-
-	for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
-		if (e != event_symbols[i].event) {
-			e = event_symbols[i].event;
-			printf(
-	"\n                                  %2d: %-20s", e, event_symbols[i].symbol);
-		} else
-			printf(" %s", event_symbols[i].symbol);
-	}
-
-	printf("\n"
-	"                                rNNN: raw event type\n\n"
-	" -s                                # system-wide collection\n\n"
-	" -c <cmd..>   --command=<cmd..>    # command+arguments to be timed.\n"
-	"\n");
-	exit(0);
-}
-
-static void process_options(int argc, char *argv[])
-{
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"event",	required_argument,	NULL, 'e'},
-			{"help",	no_argument,		NULL, 'h'},
-			{"command",	no_argument,		NULL, 'c'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:e:c:s",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'c':
-			break;
-		case 's':
-			system_wide = 1;
-			break;
-		case 'e':
-			parse_events(optarg);
-			break;
-		default:
-			break;
-		}
-	}
-	if (optind == argc)
-		goto err;
-
-	if (!nr_counters)
-		nr_counters = 8;
-	return;
-
-err:
-	display_help();
-}
-
-char fault_here[1000000];
-
-static int fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static void create_counter(int counter)
-{
-	struct perf_counter_hw_event hw_event;
-
-	memset(&hw_event, 0, sizeof(hw_event));
-	hw_event.type		= event_id[counter];
-	hw_event.raw		= event_raw[counter];
-	hw_event.record_type	= PERF_RECORD_SIMPLE;
-	hw_event.nmi		= 0;
-
-	if (system_wide) {
-		int cpu;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
-			if (fd[cpu][counter] < 0) {
-				printf("perfstat error: syscall returned with %d (%s)\n",
-						fd[cpu][counter], strerror(errno));
-				exit(-1);
-			}
-			
-		}
-	} else {
-		hw_event.inherit	= 1;
-		hw_event.disabled	= 1;
-
-		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
-		if (fd[0][counter] < 0) {
-			printf("perfstat error: syscall returned with %d (%s)\n",
-					fd[0][counter], strerror(errno));
-			exit(-1);
-		}
-	}
-}
-
-
-int main(int argc, char *argv[])
-{
-	unsigned long long t0, t1;
-	int counter;
-	ssize_t res;
-	int status;
-	int pid;
-
-	process_options(argc, argv);
-
-	if (system_wide) {
-		nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-		assert(nr_cpus <= MAX_NR_CPUS);
-		assert(nr_cpus >= 0);
-	} else
-		nr_cpus = 1;
-
-	for (counter = 0; counter < nr_counters; counter++)
-		create_counter(counter);
-
-	argc -= optind;
-	argv += optind;
-
-	/*
-	 * Enable counters and exec the command:
-	 */
-	t0 = rdclock();
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
-
-	if ((pid = fork()) < 0)
-		perror("failed to fork");
-	if (!pid) {
-		if (execvp(argv[0], argv)) {
-			perror(argv[0]);
-			exit(-1);
-		}
-	}
-	while (wait(&status) >= 0)
-		;
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
-	t1 = rdclock();
-
-	fflush(stdout);
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for \'%s\':\n",
-		argv[0]);
-	fprintf(stderr, "\n");
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		int cpu;
-		__u64 count, single_count;
-
-		count = 0;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			res = read(fd[cpu][counter],
-					(char *) &single_count, sizeof(single_count));
-			assert(res == sizeof(single_count));
-			count += single_count;
-		}
-
-		if (!event_raw[counter] &&
-		    (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
-		     event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
-
-			double msecs = (double)count / 1000000;
-
-			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
-				msecs, event_name(counter));
-		} else {
-			fprintf(stderr, " %14Ld  %-20s (events)\n",
-				count, event_name(counter));
-		}
-		if (!counter)
-			fprintf(stderr, "\n");
-	}
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
-			(double)(t1-t0)/1e6);
-	fprintf(stderr, "\n");
-
-	return 0;
-}
-- 
cgit v0.10.2


From 193e8df1b465f206f8a286202680702e1c153367 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 23 Mar 2009 22:23:16 +0100
Subject: perf_counter tools: fix build warning in kerneltop.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix:

 kerneltop.c: In function ‘record_ip’:
 kerneltop.c:1005: warning: format ‘%016llx’ expects type ‘long long unsigned int’, but argument 2 has type ‘uint64_t’

Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090323172417.677932499@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 25e80bc..8f9a303 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -1002,7 +1002,7 @@ static void record_ip(uint64_t ip, int counter)
 		assert(left <= middle && middle <= right);
 		if (!(left <= ip && ip <= right)) {
 			printf(" left: %016lx\n", left);
-			printf("   ip: %016llx\n", ip);
+			printf("   ip: %016lx\n", (unsigned long)ip);
 			printf("right: %016lx\n", right);
 		}
 		assert(left <= ip && ip <= right);
-- 
cgit v0.10.2


From 81cdbe0509542324ad7d3282ab67c2b6716df663 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 23 Mar 2009 22:29:50 +0100
Subject: perf_counter tools: increase cpu-cycles again

Commit b7368fdd7d decreased the CPU cycles interval 100-fold, but
this is causig kerneltop failures on my Nehalem box:

 aldebaran:/home/mingo/linux/linux/Documentation/perf_counter>
 ./kerneltop
 KernelTop refresh period: 2 seconds
 ERROR: failed to keep up with mmap data

10,000 cycles is way too short.

What we should do instead on mostly-idle systems is some sort of
read/poll timeout, so that we display something every 2 seconds
for sure.

Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 8f9a303..2ab29b5 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -212,7 +212,7 @@ struct source_line {
 
 
 const unsigned int default_count[] = {
-	  10000,
+	1000000,
 	1000000,
 	  10000,
 	  10000,
-- 
cgit v0.10.2


From cbe46555dc4de6403cd757139d42289b5f21abb9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 24 Mar 2009 16:52:34 +1100
Subject: perf_counter tools: remove glib dependency and fix bugs in
 kerneltop.c

The glib dependency in kerneltop.c is only for a little bit of list
manipulation, and I find it inconvenient.  This adds a 'next' field to
struct source_line, which lets us link them together into a list.  The
code to do the linking ourselves turns out to be no longer or more
difficult than using glib.

This also fixes a few other problems:

- We need to #include <limits.h> to get PATH_MAX on powerpc.

- We need to #include <linux/types.h> rather than have our own
  definitions of __u64 and __s64; on powerpc the installed headers
  define them to be unsigned long and long respectively, and if we
  have our own, different definition here that causes a compile error.

- This takes out the x86 setting of errno from -ret in
  sys_perf_counter_open.  My experiments on x86 indicate that the
  glibc syscall() does this for us already.

- We had two CPU migration counters in the default set, which seems
  unnecessary; I changed one of them to a context switch counter.

- In perfstat mode we were printing CPU cycles and instructions as
  milliseconds, and the cpu clock and task clock counters as events.
  This fixes that.

- In perfstat mode we were still printing a blank line after the first
  counter, which was a holdover from when a task clock counter was
  automatically included as the first counter.  This removes the blank
  line.

- On a test machine here, parse_symbols() and parse_vmlinux() were
  taking long enough (almost 0.5 seconds) for the mmap buffer to
  overflow before we got to the first mmap_read() call, so this moves
  them before we open all the counters.

- The error message if sys_perf_counter_open fails needs to use errno,
  not -fd[i][counter].

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Orig-LKML-Reference: <18888.29986.340328.540512@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 666da95..194b662 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -3,7 +3,7 @@ BINS = kerneltop perfstat
 all: $(BINS)
 
 kerneltop: kerneltop.c ../../include/linux/perf_counter.h
-	cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $<
+	cc -O6 -Wall -lrt -o $@ $<
 
 perfstat: kerneltop
 	ln -sf kerneltop perfstat
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 2ab29b5..ea13e4e 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -3,7 +3,7 @@
 
    Build with:
 
-     cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c
+     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
 
    Sample output:
 
@@ -56,6 +56,7 @@
   *   Yanmin Zhang <yanmin.zhang@intel.com>
   *   Wu Fengguang <fengguang.wu@intel.com>
   *   Mike Galbraith <efault@gmx.de>
+  *   Paul Mackerras <paulus@samba.org>
   *
   * Released under the GPL v2. (and only v2, not any later version)
   */
@@ -68,6 +69,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 #include <getopt.h>
 #include <assert.h>
 #include <fcntl.h>
@@ -76,8 +78,6 @@
 #include <ctype.h>
 #include <time.h>
 
-#include <glib.h>
-
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <sys/poll.h>
@@ -87,6 +87,7 @@
 #include <sys/mman.h>
 
 #include <linux/unistd.h>
+#include <linux/types.h>
 
 #include "../../include/linux/perf_counter.h"
 
@@ -114,11 +115,6 @@
 #define __user
 #define asmlinkage
 
-typedef unsigned int            __u32;
-typedef unsigned long long      __u64;
-typedef long long               __s64;
-
-
 #ifdef __x86_64__
 #define __NR_perf_counter_open 295
 #define rmb()		asm volatile("lfence" ::: "memory")
@@ -146,17 +142,8 @@ asmlinkage int sys_perf_counter_open(
         int                             group_fd,
         unsigned long                   flags)
 {
-        int ret;
-
-        ret = syscall(
+        return syscall(
                 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-#if defined(__x86_64__) || defined(__i386__)
-        if (ret < 0 && ret > -4096) {
-                errno = -ret;
-                ret = -1;
-        }
-#endif
-        return ret;
 }
 
 #define MAX_COUNTERS			64
@@ -170,7 +157,7 @@ static int			system_wide			=  0;
 static int			nr_counters			=  0;
 static __u64			event_id[MAX_COUNTERS]		= {
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
 
@@ -202,14 +189,15 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
-static GList			*lines;
-
 struct source_line {
 	uint64_t		EIP;
 	unsigned long		count;
 	char			*line;
+	struct source_line	*next;
 };
 
+static struct source_line	*lines;
+static struct source_line	**lines_tail;
 
 const unsigned int default_count[] = {
 	1000000,
@@ -519,9 +507,8 @@ int do_perfstat(int argc, char *argv[])
 			count += single_count;
 		}
 
-		if (!PERF_COUNTER_RAW(event_id[counter]) &&
-		    (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
-		     event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
+		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
+		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
 
 			double msecs = (double)count / 1000000;
 
@@ -531,8 +518,6 @@ int do_perfstat(int argc, char *argv[])
 			fprintf(stderr, " %14Ld  %-20s (events)\n",
 				count, event_name(counter));
 		}
-		if (!counter)
-			fprintf(stderr, "\n");
 	}
 	fprintf(stderr, "\n");
 	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
@@ -554,7 +539,7 @@ struct sym_entry {
 	char			*sym;
 	unsigned long		count[MAX_COUNTERS];
 	int			skip;
-	GList			*source;
+	struct source_line	*source;
 };
 
 #define MAX_SYMS		100000
@@ -855,6 +840,7 @@ static void parse_vmlinux(char *filename)
 	if (!file)
 		return;
 
+	lines_tail = &lines;
 	while (!feof(file)) {
 		struct source_line *src;
 		size_t dummy = 0;
@@ -873,7 +859,9 @@ static void parse_vmlinux(char *filename)
 		if (c)
 			*c = 0;
 
-		lines = g_list_prepend(lines, src);
+		src->next = NULL;
+		*lines_tail = src;
+		lines_tail = &src->next;
 
 		if (strlen(src->line)>8 && src->line[8] == ':')
 			src->EIP = strtoull(src->line, NULL, 16);
@@ -881,52 +869,43 @@ static void parse_vmlinux(char *filename)
 			src->EIP = strtoull(src->line, NULL, 16);
 	}
 	pclose(file);
-	lines = g_list_reverse(lines);
 }
 
 static void record_precise_ip(uint64_t ip)
 {
 	struct source_line *line;
-	GList *item;
 
-	item = g_list_first(lines);
-	while (item) {
-		line = item->data;
+	for (line = lines; line; line = line->next) {
 		if (line->EIP == ip)
 			line->count++;
 		if (line->EIP > ip)
 			break;
-		item = g_list_next(item);
 	}
 }
 
 static void lookup_sym_in_vmlinux(struct sym_entry *sym)
 {
 	struct source_line *line;
-	GList *item;
 	char pattern[PATH_MAX];
 	sprintf(pattern, "<%s>:", sym->sym);
 
-	item = g_list_first(lines);
-	while (item) {
-		line = item->data;
+	for (line = lines; line; line = line->next) {
 		if (strstr(line->line, pattern)) {
-			sym->source = item;
+			sym->source = line;
 			break;
 		}
-		item = g_list_next(item);
 	}
 }
 
-void show_lines(GList *item_queue, int item_queue_count)
+static void show_lines(struct source_line *line_queue, int line_queue_count)
 {
 	int i;
 	struct source_line *line;
 
-	for (i = 0; i < item_queue_count; i++) {
-		line = item_queue->data;
+	line = line_queue;
+	for (i = 0; i < line_queue_count; i++) {
 		printf("%8li\t%s\n", line->count, line->line);
-		item_queue = g_list_next(item_queue);
+		line = line->next;
 	}
 }
 
@@ -935,10 +914,9 @@ void show_lines(GList *item_queue, int item_queue_count)
 static void show_details(struct sym_entry *sym)
 {
 	struct source_line *line;
-	GList *item;
+	struct source_line *line_queue = NULL;
 	int displayed = 0;
-	GList *item_queue = NULL;
-	int item_queue_count = 0;
+	int line_queue_count = 0;
 
 	if (!sym->source)
 		lookup_sym_in_vmlinux(sym);
@@ -947,30 +925,29 @@ static void show_details(struct sym_entry *sym)
 
 	printf("Showing details for %s\n", sym->sym);
 
-	item = sym->source;
-	while (item) {
-		line = item->data;
+	line = sym->source;
+	while (line) {
 		if (displayed && strstr(line->line, ">:"))
 			break;
 
-		if (!item_queue_count)
-			item_queue = item;
-		item_queue_count ++;
+		if (!line_queue_count)
+			line_queue = line;
+		line_queue_count ++;
 
 		if (line->count >= count_filter) {
-			show_lines(item_queue, item_queue_count);
-			item_queue_count = 0;
-			item_queue = NULL;
-		} else if (item_queue_count > TRACE_COUNT) {
-			item_queue = g_list_next(item_queue);
-			item_queue_count --;
+			show_lines(line_queue, line_queue_count);
+			line_queue_count = 0;
+			line_queue = NULL;
+		} else if (line_queue_count > TRACE_COUNT) {
+			line_queue = line_queue->next;
+			line_queue_count --;
 		}
 
 		line->count = 0;
 		displayed++;
 		if (displayed > 300)
 			break;
-		item = g_list_next(item);
+		line = line->next;
 	}
 }
 
@@ -1201,6 +1178,10 @@ int main(int argc, char *argv[])
 	if (tid != -1 || profile_cpu != -1)
 		nr_cpus = 1;
 
+	parse_symbols();
+	if (vmlinux && sym_filter_entry)
+		parse_vmlinux(vmlinux);
+
 	for (i = 0; i < nr_cpus; i++) {
 		group_fd = -1;
 		for (counter = 0; counter < nr_counters; counter++) {
@@ -1216,15 +1197,16 @@ int main(int argc, char *argv[])
 			hw_event.nmi		= nmi;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
-			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
 			if (fd[i][counter] < 0) {
+				int err = errno;
 				printf("kerneltop error: syscall returned with %d (%s)\n",
-					fd[i][counter], strerror(-fd[i][counter]));
-				if (fd[i][counter] == -1)
+					fd[i][counter], strerror(err));
+				if (err == EPERM)
 					printf("Are you root?\n");
 				exit(-1);
 			}
 			assert(fd[i][counter] >= 0);
+			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
 
 			/*
 			 * First counter acts as the group leader:
@@ -1248,10 +1230,6 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	parse_symbols();
-	if (vmlinux && sym_filter_entry)
-		parse_vmlinux(vmlinux);
-
 	printf("KernelTop refresh period: %d seconds\n", delay_secs);
 	last_refresh = time(NULL);
 
-- 
cgit v0.10.2


From 0fd112e41cd6f6d4779cbe327c3632d087e31476 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 24 Mar 2009 10:50:24 +0100
Subject: perf_counter tools: remove glib dependency and fix bugs in
 kerneltop.c, fix poll()

Paul Mackerras wrote:

> I noticed the poll stuff is bogus - we have a 2D array of struct
> pollfds (MAX_NR_CPUS x MAX_COUNTERS), we fill in a sub-array (with the
> rest being uninitialized, since the array is on the stack) and then
> pass the first nr_cpus elements to poll.  Not what we really meant, I
> suspect. :)  Not even if we only have one counter, since it's the
> counter dimension that varies fastest.

This should fix the most obvious poll fubar.. not enough to fix the
full problem though..

Reported-by: Paul Mackerras <paulus@samba.org>
Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Orig-LKML-Reference: <18888.29986.340328.540512@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index ea13e4e..7ebde7a 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -1157,10 +1157,10 @@ static void mmap_read(struct mmap_data *md)
 
 int main(int argc, char *argv[])
 {
-	struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS];
+	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
 	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 	struct perf_counter_hw_event hw_event;
-	int i, counter, group_fd;
+	int i, counter, group_fd, nr_poll = 0;
 	unsigned int cpu;
 	int ret;
 
@@ -1214,8 +1214,9 @@ int main(int argc, char *argv[])
 			if (group && group_fd == -1)
 				group_fd = fd[i][counter];
 
-			event_array[i][counter].fd = fd[i][counter];
-			event_array[i][counter].events = POLLIN;
+			event_array[nr_poll].fd = fd[i][counter];
+			event_array[nr_poll].events = POLLIN;
+			nr_poll++;
 
 			mmap_array[i][counter].counter = counter;
 			mmap_array[i][counter].prev = 0;
@@ -1247,7 +1248,7 @@ int main(int argc, char *argv[])
 		}
 
 		if (hits == events)
-			ret = poll(event_array[0], nr_cpus, 1000);
+			ret = poll(event_array, nr_poll, 1000);
 		hits = events;
 	}
 
-- 
cgit v0.10.2


From f66c6b2066b44d4ab8e8ac1ee4cae543738fe2ac Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 23 Mar 2009 10:29:36 +1100
Subject: perf_counter: update documentation

Impact: documentation fix

This updates the perfcounter documentation to reflect recent changes.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt
index fddd321..aaf105c 100644
--- a/Documentation/perf_counter/design.txt
+++ b/Documentation/perf_counter/design.txt
@@ -11,7 +11,9 @@ thus be used to profile the code that runs on that CPU.
 
 The Linux Performance Counter subsystem provides an abstraction of these
 hardware capabilities. It provides per task and per CPU counters, counter
-groups, and it provides event capabilities on top of those.
+groups, and it provides event capabilities on top of those.  It
+provides "virtual" 64-bit counters, regardless of the width of the
+underlying hardware counters.
 
 Performance counters are accessed via special file descriptors.
 There's one file descriptor per virtual counter used.
@@ -20,7 +22,8 @@ The special file descriptor is opened via the perf_counter_open()
 system call:
 
    int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
-			     pid_t pid, int cpu, int group_fd);
+			     pid_t pid, int cpu, int group_fd,
+			     unsigned long flags);
 
 The syscall returns the new fd. The fd can be used via the normal
 VFS system calls: read() can be used to read the counter, fcntl()
@@ -32,90 +35,180 @@ can be poll()ed.
 When creating a new counter fd, 'perf_counter_hw_event' is:
 
 /*
- * Hardware event to monitor via a performance monitoring counter:
+ * Event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	s64			type;
+	__u64			event_config;
 
-	u64			irq_period;
-	u32			record_type;
+	__u64			irq_period;
+	__u64			record_type;
+	__u64			read_format;
 
-	u32			disabled     :  1, /* off by default */
-				nmi	     :  1, /* NMI sampling   */
-				raw	     :  1, /* raw event type */
-				__reserved_1 : 29;
+	__u64			disabled       :  1, /* off by default        */
+				nmi	       :  1, /* NMI sampling          */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
 
-	u64			__reserved_2;
+				__reserved_1   : 55;
+
+	__u32			extra_config_len;
+
+	__u32			__reserved_4;
+	__u64			__reserved_2;
+	__u64			__reserved_3;
 };
 
+The 'event_config' field specifies what the counter should count.  It
+is divided into 3 bit-fields:
+
+raw_type: 1 bit (most significant bit)		0x8000_0000_0000_0000
+type:	  7 bits (next most significant)	0x7f00_0000_0000_0000
+event_id: 56 bits (least significant)		0x00ff_0000_0000_0000
+
+If 'raw_type' is 1, then the counter will count a hardware event
+specified by the remaining 63 bits of event_config.  The encoding is
+machine-specific.
+
+If 'raw_type' is 0, then the 'type' field says what kind of counter
+this is, with the following encoding:
+
+enum perf_event_types {
+	PERF_TYPE_HARDWARE		= 0,
+	PERF_TYPE_SOFTWARE		= 1,
+	PERF_TYPE_TRACEPOINT		= 2,
+};
+
+A counter of PERF_TYPE_HARDWARE will count the hardware event
+specified by 'event_id':
+
 /*
- * Generalized performance counter event types, used by the hw_event.type
+ * Generalized performance counter event types, used by the hw_event.event_id
  * parameter of the sys_perf_counter_open() syscall:
  */
-enum hw_event_types {
+enum hw_event_ids {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-
-	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
-	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	/*
-	 * Future software events:
-	 */
-	/* PERF_COUNT_PAGE_FAULTS	= -3,
-	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
+	PERF_COUNT_CPU_CYCLES		= 0,
+	PERF_COUNT_INSTRUCTIONS		= 1,
+	PERF_COUNT_CACHE_REFERENCES	= 2,
+	PERF_COUNT_CACHE_MISSES		= 3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_BRANCH_MISSES	= 5,
+	PERF_COUNT_BUS_CYCLES		= 6,
 };
 
-These are standardized types of events that work uniformly on all CPUs
-that implements Performance Counters support under Linux. If a CPU is
-not able to count branch-misses, then the system call will return
--EINVAL.
+These are standardized types of events that work relatively uniformly
+on all CPUs that implement Performance Counters support under Linux,
+although there may be variations (e.g., different CPUs might count
+cache references and misses at different levels of the cache hierarchy).
+If a CPU is not able to count the selected event, then the system call
+will return -EINVAL.
 
-More hw_event_types are supported as well, but they are CPU
-specific and are enumerated via /sys on a per CPU basis. Raw hw event
-types can be passed in under hw_event.type if hw_event.raw is 1.
-For example, to count "External bus cycles while bus lock signal asserted"
-events on Intel Core CPUs, pass in a 0x4064 event type value and set
-hw_event.raw to 1.
+More hw_event_types are supported as well, but they are CPU-specific
+and accessed as raw events.  For example, to count "External bus
+cycles while bus lock signal asserted" events on Intel Core CPUs, pass
+in a 0x4064 event_id value and set hw_event.raw_type to 1.
 
-'record_type' is the type of data that a read() will provide for the
-counter, and it can be one of:
+A counter of type PERF_TYPE_SOFTWARE will count one of the available
+software events, selected by 'event_id':
 
 /*
- * IRQ-notification data record type:
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
  */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
+enum sw_event_ids {
+	PERF_COUNT_CPU_CLOCK		= 0,
+	PERF_COUNT_TASK_CLOCK		= 1,
+	PERF_COUNT_PAGE_FAULTS		= 2,
+	PERF_COUNT_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
 };
 
-a "simple" counter is one that counts hardware events and allows
-them to be read out into a u64 count value. (read() returns 8 on
-a successful read of a simple counter.)
+Counters come in two flavours: counting counters and sampling
+counters.  A "counting" counter is one that is used for counting the
+number of events that occur, and is characterised by having
+irq_period = 0 and record_type = PERF_RECORD_SIMPLE.  A read() on a
+counting counter simply returns the current value of the counter as
+an 8-byte number.
 
-An "irq" counter is one that will also provide an IRQ context information:
-the IP of the interrupted context. In this case read() will return
-the 8-byte counter value, plus the Instruction Pointer address of the
-interrupted context.
+A "sampling" counter is one that is set up to generate an interrupt
+every N events, where N is given by 'irq_period'.  A sampling counter
+has irq_period > 0 and record_type != PERF_RECORD_SIMPLE.  The
+record_type controls what data is recorded on each interrupt, and the
+available values are currently:
 
-The parameter 'hw_event_period' is the number of events before waking up
-a read() that is blocked on a counter fd. Zero value means a non-blocking
-counter.
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		= 0,
+	PERF_RECORD_IRQ			= 1,
+	PERF_RECORD_GROUP		= 2,
+};
 
-The 'pid' parameter allows the counter to be specific to a task:
+A record_type value of PERF_RECORD_IRQ will record the instruction
+pointer (IP) at which the interrupt occurred.  A record_type value of
+PERF_RECORD_GROUP will record the event_config and counter value of
+all of the other counters in the group, and should only be used on a
+group leader (see below).  Currently these two values are mutually
+exclusive, but record_type will become a bit-mask in future and
+support other values.
+
+A sampling counter has an event queue, into which an event is placed
+on each interrupt.  A read() on a sampling counter will read the next
+event from the event queue.  If the queue is empty, the read() will
+either block or return an EAGAIN error, depending on whether the fd
+has been set to non-blocking mode or not.
+
+The 'disabled' bit specifies whether the counter starts out disabled
+or enabled.  If it is initially disabled, it can be enabled by ioctl
+or prctl (see below).
+
+The 'nmi' bit specifies, for hardware events, whether the counter
+should be set up to request non-maskable interrupts (NMIs) or normal
+interrupts.  This bit is ignored if the user doesn't have
+CAP_SYS_ADMIN privilege (i.e. is not root) or if the CPU doesn't
+generate NMIs from hardware counters.
+
+The 'inherit' bit, if set, specifies that this counter should count
+events on descendant tasks as well as the task specified.  This only
+applies to new descendents, not to any existing descendents at the
+time the counter is created (nor to any new descendents of existing
+descendents).
+
+The 'pinned' bit, if set, specifies that the counter should always be
+on the CPU if at all possible.  It only applies to hardware counters
+and only to group leaders.  If a pinned counter cannot be put onto the
+CPU (e.g. because there are not enough hardware counters or because of
+a conflict with some other event), then the counter goes into an
+'error' state, where reads return end-of-file (i.e. read() returns 0)
+until the counter is subsequently enabled or disabled.
+
+The 'exclusive' bit, if set, specifies that when this counter's group
+is on the CPU, it should be the only group using the CPU's counters.
+In future, this will allow sophisticated monitoring programs to supply
+extra configuration information via 'extra_config_len' to exploit
+advanced features of the CPU's Performance Monitor Unit (PMU) that are
+not otherwise accessible and that might disrupt other hardware
+counters.
+
+The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
+way to request that counting of events be restricted to times when the
+CPU is in user, kernel and/or hypervisor mode.
+
+
+The 'pid' parameter to the perf_counter_open() system call allows the
+counter to be specific to a task:
 
  pid == 0: if the pid parameter is zero, the counter is attached to the
  current task.
@@ -125,8 +218,7 @@ The 'pid' parameter allows the counter to be specific to a task:
 
  pid < 0: all tasks are counted (per cpu counters)
 
-The 'cpu' parameter allows a counter to be made specific to a full
-CPU:
+The 'cpu' parameter allows a counter to be made specific to a CPU:
 
  cpu >= 0: the counter is restricted to a specific CPU
  cpu == -1: the counter counts on all CPUs
@@ -141,7 +233,51 @@ their own tasks.
 A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
 all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
 
-Group counters are created by passing in a group_fd of another counter.
-Groups are scheduled at once and can be used with PERF_RECORD_GROUP
-to record multi-dimensional timestamps.
+The 'flags' parameter is currently unused and must be zero.
+
+The 'group_fd' parameter allows counter "groups" to be set up.  A
+counter group has one counter which is the group "leader".  The leader
+is created first, with group_fd = -1 in the perf_counter_open call
+that creates it.  The rest of the group members are created
+subsequently, with group_fd giving the fd of the group leader.
+(A single counter on its own is created with group_fd = -1 and is
+considered to be a group with only 1 member.)
+
+A counter group is scheduled onto the CPU as a unit, that is, it will
+only be put onto the CPU if all of the counters in the group can be
+put onto the CPU.  This means that the values of the member counters
+can be meaningfully compared, added, divided (to get ratios), etc.,
+with each other, since they have counted events for the same set of
+executed instructions.
+
+Counters can be enabled and disabled in two ways: via ioctl and via
+prctl.  When a counter is disabled, it doesn't count or generate
+events but does continue to exist and maintain its count value.
+
+An individual counter or counter group can be enabled with
+
+	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
+
+or disabled with
+
+	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
+
+Enabling or disabling the leader of a group enables or disables the
+whole group; that is, while the group leader is disabled, none of the
+counters in the group will count.  Enabling or disabling a member of a
+group other than the leader only affects that counter - disabling an
+non-leader stops that counter from counting but doesn't affect any
+other counter.
+
+A process can enable or disable all the counter groups that are
+attached to it, using prctl:
+
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+
+This applies to all counters on the current process, whether created
+by this process or by another, and doesn't affect any counters that
+this process has created on other processes.  It only enables or
+disables the group leaders, not any other members in the groups.
 
-- 
cgit v0.10.2


From c7138f37f905bb7987b1f9f5a8ee73667db39f25 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 24 Mar 2009 13:18:16 +0100
Subject: perf_counter: fix perf_poll()

Impact: fix kerneltop 100% CPU usage

Only return a poll event when there's actually been one, poll_wait()
doesn't actually wait for the waitq you pass it, it only enqueues
you on it.

Only once all FDs have been iterated and none of thm returned a
poll-event will it schedule().

Also make it return POLL_HUP when there's not mmap() area to read from.

Further, fix a silly bug in the write code.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Orig-LKML-Reference: <1237897096.24918.181.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2b5e66d..48212c1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -246,6 +246,7 @@ struct file;
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;
+	atomic_t			wakeup;
 	atomic_t			head;
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0dfe910..affe227 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1161,7 +1161,16 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_counter *counter = file->private_data;
-	unsigned int events = POLLIN;
+	struct perf_mmap_data *data;
+	unsigned int events;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data)
+		events = atomic_xchg(&data->wakeup, 0);
+	else
+		events = POLL_HUP;
+	rcu_read_unlock();
 
 	poll_wait(file, &counter->waitq, wait);
 
@@ -1425,7 +1434,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
 
 	do {
 		offset = head = atomic_read(&data->head);
-		head += sizeof(u64);
+		head += size;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
 	wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
@@ -1446,6 +1455,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
 	 * generate a poll() wakeup for every page boundary crossed
 	 */
 	if (wakeup) {
+		atomic_xchg(&data->wakeup, POLL_IN);
 		__perf_counter_update_userpage(counter, data);
 		if (nmi) {
 			counter->wakeup_pending = 1;
-- 
cgit v0.10.2


From b9cacc7bf193df16532bfa7d7ca77fe50fc3c2e6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:22 +0100
Subject: perf_counter: more elaborate write API

Provide a begin, copy, end interface to the output buffer.

begin() reserves the space,
 copy() copies the data over, considering page boundaries,
  end() finalizes the event and does the wakeup.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113316.740550870@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index affe227..0422fd9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/file.h>
@@ -16,15 +17,14 @@
 #include <linux/sysfs.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
+#include <linux/vmstat.h>
+#include <linux/hardirq.h>
+#include <linux/rculist.h>
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/rculist.h>
-#include <linux/hardirq.h>
 
 #include <asm/irq_regs.h>
 
@@ -1411,16 +1411,20 @@ static const struct file_operations perf_fops = {
  * Output
  */
 
-static int perf_output_write(struct perf_counter *counter, int nmi,
-			     void *buf, ssize_t size)
+struct perf_output_handle {
+	struct perf_counter	*counter;
+	struct perf_mmap_data	*data;
+	unsigned int		offset;
+	int			wakeup;
+};
+
+static int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_counter *counter, unsigned int size)
 {
 	struct perf_mmap_data *data;
-	unsigned int offset, head, nr;
-	unsigned int len;
-	int ret, wakeup;
+	unsigned int offset, head;
 
 	rcu_read_lock();
-	ret = -ENOSPC;
 	data = rcu_dereference(counter->data);
 	if (!data)
 		goto out;
@@ -1428,45 +1432,82 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
 	if (!data->nr_pages)
 		goto out;
 
-	ret = -EINVAL;
-	if (size > PAGE_SIZE)
-		goto out;
-
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
-	wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
+	handle->counter	= counter;
+	handle->data	= data;
+	handle->offset	= offset;
+	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
 
-	nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1);
-	offset &= PAGE_SIZE - 1;
+	return 0;
 
-	len = min_t(unsigned int, PAGE_SIZE - offset, size);
-	memcpy(data->data_pages[nr] + offset, buf, len);
-	size -= len;
+out:
+	rcu_read_unlock();
 
-	if (size) {
-		nr = (nr + 1) & (data->nr_pages - 1);
-		memcpy(data->data_pages[nr], buf + len, size);
-	}
+	return -ENOSPC;
+}
 
-	/*
-	 * generate a poll() wakeup for every page boundary crossed
-	 */
-	if (wakeup) {
-		atomic_xchg(&data->wakeup, POLL_IN);
-		__perf_counter_update_userpage(counter, data);
+static void perf_output_copy(struct perf_output_handle *handle,
+			     void *buf, unsigned int len)
+{
+	unsigned int pages_mask;
+	unsigned int offset;
+	unsigned int size;
+	void **pages;
+
+	offset		= handle->offset;
+	pages_mask	= handle->data->nr_pages - 1;
+	pages		= handle->data->data_pages;
+
+	do {
+		unsigned int page_offset;
+		int nr;
+
+		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
+		page_offset = offset & (PAGE_SIZE - 1);
+		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+		memcpy(pages[nr] + page_offset, buf, size);
+
+		len	    -= size;
+		buf	    += size;
+		offset	    += size;
+	} while (len);
+
+	handle->offset = offset;
+}
+
+static void perf_output_end(struct perf_output_handle *handle, int nmi)
+{
+	if (handle->wakeup) {
+		(void)atomic_xchg(&handle->data->wakeup, POLL_IN);
+		__perf_counter_update_userpage(handle->counter, handle->data);
 		if (nmi) {
-			counter->wakeup_pending = 1;
+			handle->counter->wakeup_pending = 1;
 			set_perf_counter_pending();
 		} else
-			wake_up(&counter->waitq);
+			wake_up(&handle->counter->waitq);
 	}
-	ret = 0;
-out:
 	rcu_read_unlock();
+}
+
+static int perf_output_write(struct perf_counter *counter, int nmi,
+			     void *buf, ssize_t size)
+{
+	struct perf_output_handle handle;
+	int ret;
 
+	ret = perf_output_begin(&handle, counter, size);
+	if (ret)
+		goto out;
+
+	perf_output_copy(&handle, buf, size);
+	perf_output_end(&handle, nmi);
+
+out:
 	return ret;
 }
 
-- 
cgit v0.10.2


From 5c1481943250ab65fa5130e05ec479c93216e9f7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:23 +0100
Subject: perf_counter: output objects

Provide a {type,size} header for each output entry.

This should provide extensible output, and the ability to mix multiple streams.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113316.831607932@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 48212c1..c256635 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -156,6 +156,16 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
+struct perf_event_header {
+	__u32	type;
+	__u32	size;
+};
+
+enum perf_event_type {
+	PERF_EVENT_IP		= 0,
+	PERF_EVENT_GROUP	= 1,
+};
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -260,6 +270,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
+	int 				nr_siblings;
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0422fd9..d76e311 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -75,8 +75,10 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 */
 	if (counter->group_leader == counter)
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
-	else
+	else {
 		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+		group_leader->nr_siblings++;
+	}
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 }
@@ -89,6 +91,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
 
+	if (counter->group_leader != counter)
+		counter->group_leader->nr_siblings--;
+
 	/*
 	 * If this was a group counter with sibling counters then
 	 * upgrade the siblings to singleton counters by adding them
@@ -381,9 +386,11 @@ static int is_software_only_group(struct perf_counter *leader)
 
 	if (!is_software_counter(leader))
 		return 0;
+
 	list_for_each_entry(counter, &leader->sibling_list, list_entry)
 		if (!is_software_counter(counter))
 			return 0;
+
 	return 1;
 }
 
@@ -1480,6 +1487,9 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	handle->offset = offset;
 }
 
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
 static void perf_output_end(struct perf_output_handle *handle, int nmi)
 {
 	if (handle->wakeup) {
@@ -1514,34 +1524,53 @@ out:
 static void perf_output_simple(struct perf_counter *counter,
 			       int nmi, struct pt_regs *regs)
 {
-	u64 entry;
+	struct {
+		struct perf_event_header header;
+		u64 ip;
+	} event;
 
-	entry = instruction_pointer(regs);
+	event.header.type = PERF_EVENT_IP;
+	event.header.size = sizeof(event);
+	event.ip = instruction_pointer(regs);
 
-	perf_output_write(counter, nmi, &entry, sizeof(entry));
+	perf_output_write(counter, nmi, &event, sizeof(event));
 }
 
-struct group_entry {
-	u64 event;
-	u64 counter;
-};
-
 static void perf_output_group(struct perf_counter *counter, int nmi)
 {
+	struct perf_output_handle handle;
+	struct perf_event_header header;
 	struct perf_counter *leader, *sub;
+	unsigned int size;
+	struct {
+		u64 event;
+		u64 counter;
+	} entry;
+	int ret;
+
+	size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+
+	ret = perf_output_begin(&handle, counter, size);
+	if (ret)
+		return;
+
+	header.type = PERF_EVENT_GROUP;
+	header.size = size;
+
+	perf_output_put(&handle, header);
 
 	leader = counter->group_leader;
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-		struct group_entry entry;
-
 		if (sub != counter)
 			sub->hw_ops->read(sub);
 
 		entry.event = sub->hw_event.config;
 		entry.counter = atomic64_read(&sub->count);
 
-		perf_output_write(counter, nmi, &entry, sizeof(entry));
+		perf_output_put(&handle, entry);
 	}
+
+	perf_output_end(&handle, nmi);
 }
 
 void perf_counter_output(struct perf_counter *counter,
-- 
cgit v0.10.2


From 63e35b25d6b5c3136d22ef249dbbf96716aa08bf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:24 +0100
Subject: perf_counter: sanity check on the output API

Ensure we never write more than we said we would.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113316.921433024@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d76e311..7669afe 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1422,6 +1422,7 @@ struct perf_output_handle {
 	struct perf_counter	*counter;
 	struct perf_mmap_data	*data;
 	unsigned int		offset;
+	unsigned int		head;
 	int			wakeup;
 };
 
@@ -1447,6 +1448,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	handle->counter	= counter;
 	handle->data	= data;
 	handle->offset	= offset;
+	handle->head	= head;
 	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
 
 	return 0;
@@ -1485,6 +1487,8 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	} while (len);
 
 	handle->offset = offset;
+
+	WARN_ON_ONCE(handle->offset > handle->head);
 }
 
 #define perf_output_put(handle, x) \
-- 
cgit v0.10.2


From ea5d20cf99db5d26d43b6d322d3ace17e08a6552 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:25 +0100
Subject: perf_counter: optionally provide the pid/tid of the sampled task

Allow cpu wide counters to profile userspace by providing what process
the sample belongs to.

This raises the first issue with the output type, lots of these
options: group, tid, callchain, etc.. are non-exclusive and could be
combined, suggesting a bitfield.

However, things like the mmap() data stream doesn't fit in that.

How to split the type field...

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113317.013775235@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c256635..7fdbdf8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -127,8 +127,9 @@ struct perf_counter_hw_event {
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
+				include_tid    :  1, /* include the tid */
 
-				__reserved_1   : 55;
+				__reserved_1   : 54;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -164,6 +165,8 @@ struct perf_event_header {
 enum perf_event_type {
 	PERF_EVENT_IP		= 0,
 	PERF_EVENT_GROUP	= 1,
+
+	__PERF_EVENT_TID	= 0x100,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7669afe..f3e1b27 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1528,16 +1528,30 @@ out:
 static void perf_output_simple(struct perf_counter *counter,
 			       int nmi, struct pt_regs *regs)
 {
+	unsigned int size;
 	struct {
 		struct perf_event_header header;
 		u64 ip;
+		u32 pid, tid;
 	} event;
 
 	event.header.type = PERF_EVENT_IP;
-	event.header.size = sizeof(event);
 	event.ip = instruction_pointer(regs);
 
-	perf_output_write(counter, nmi, &event, sizeof(event));
+	size = sizeof(event);
+
+	if (counter->hw_event.include_tid) {
+		/* namespace issues */
+		event.pid = current->group_leader->pid;
+		event.tid = current->pid;
+
+		event.header.type |= __PERF_EVENT_TID;
+	} else
+		size -= sizeof(u64);
+
+	event.header.size = size;
+
+	perf_output_write(counter, nmi, &event, size);
 }
 
 static void perf_output_group(struct perf_counter *counter, int nmi)
-- 
cgit v0.10.2


From 4c4ba21d2c3659e4c0421533939b58a8fd9f06c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:26 +0100
Subject: perf_counter: kerneltop: mmap_pages argument

provide a knob to set the number of mmap data pages.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113317.104545398@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 7ebde7a..3e45bf6 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -178,6 +178,7 @@ static int			nr_cpus				=  0;
 static int			nmi				=  1;
 static int			group				=  0;
 static unsigned int		page_size;
+static unsigned int		mmap_pages			=  4;
 
 static char			*vmlinux;
 
@@ -326,6 +327,7 @@ static void display_help(void)
 	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
 	" -z        --zero             # zero counts after display\n"
 	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
+	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
 	);
 
 	exit(0);
@@ -732,7 +734,9 @@ static int read_symbol(FILE *in, struct sym_entry *s)
 	/* Tag events to be skipped. */
 	if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
 		s->skip = 1;
-	if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+	else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+		s->skip = 1;
+	else if (!strcmp("mwait_idle", s->sym))
 		s->skip = 1;
 
 	if (filter_match == 1) {
@@ -1042,9 +1046,10 @@ static void process_options(int argc, char *argv[])
 			{"symbol",	required_argument,	NULL, 's'},
 			{"stat",	no_argument,		NULL, 'S'},
 			{"zero",	no_argument,		NULL, 'z'},
+			{"mmap_pages",	required_argument,	NULL, 'm'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1081,6 +1086,7 @@ static void process_options(int argc, char *argv[])
 		case 'S': run_perfstat			=	       1; break;
 		case 'x': vmlinux			= strdup(optarg); break;
 		case 'z': zero				=              1; break;
+		case 'm': mmap_pages			=   atoi(optarg); break;
 		default: error = 1; break;
 		}
 	}
@@ -1134,17 +1140,30 @@ repeat:
 	return head;
 }
 
+struct timeval last_read, this_read;
+
 static void mmap_read(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
 	unsigned int old = md->prev;
 	unsigned char *data = md->base + page_size;
 
+	gettimeofday(&this_read, NULL);
+
 	if (head - old > md->mask) {
-		printf("ERROR: failed to keep up with mmap data\n");
-		exit(-1);
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data.  Last read %lu msecs ago.\n", msecs);
+
+		old = head;
 	}
 
+	last_read = this_read;
+
 	for (; old != head;) {
 		__u64 *ptr = (__u64 *)&data[old & md->mask];
 		old += sizeof(__u64);
@@ -1220,8 +1239,8 @@ int main(int argc, char *argv[])
 
 			mmap_array[i][counter].counter = counter;
 			mmap_array[i][counter].prev = 0;
-			mmap_array[i][counter].mask = 2*page_size - 1;
-			mmap_array[i][counter].base = mmap(NULL, 3*page_size,
+			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
 					PROT_READ, MAP_SHARED, fd[i][counter], 0);
 			if (mmap_array[i][counter].base == MAP_FAILED) {
 				printf("kerneltop error: failed to mmap with %d (%s)\n",
-- 
cgit v0.10.2


From 00f0ad73ac90e3fba8b4cbe4cf21b2fb9a56cb72 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:27 +0100
Subject: perf_counter: kerneltop: output event support

Teach kerneltop about the new output ABI.

XXX: anybody fancy integrating the PID/TID data into the output?

Bump the mmap_data pages a little because we bloated the output and
have to be more careful about overruns with structured data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113317.192910290@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 3e45bf6..fda1438 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -134,6 +134,11 @@
 #endif
 
 #define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
 
 asmlinkage int sys_perf_counter_open(
         struct perf_counter_hw_event    *hw_event_uptr          __user,
@@ -178,7 +183,7 @@ static int			nr_cpus				=  0;
 static int			nmi				=  1;
 static int			group				=  0;
 static unsigned int		page_size;
-static unsigned int		mmap_pages			=  4;
+static unsigned int		mmap_pages			=  16;
 
 static char			*vmlinux;
 
@@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *md)
 	unsigned int head = mmap_read_head(md);
 	unsigned int old = md->prev;
 	unsigned char *data = md->base + page_size;
+	int diff;
 
 	gettimeofday(&this_read, NULL);
 
-	if (head - old > md->mask) {
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and screw up the events under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
 		struct timeval iv;
 		unsigned long msecs;
 
 		timersub(&this_read, &last_read, &iv);
 		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
 
-		fprintf(stderr, "WARNING: failed to keep up with mmap data.  Last read %lu msecs ago.\n", msecs);
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
 
+		/*
+		 * head points to a known good entry, start there.
+		 */
 		old = head;
 	}
 
 	last_read = this_read;
 
 	for (; old != head;) {
-		__u64 *ptr = (__u64 *)&data[old & md->mask];
-		old += sizeof(__u64);
+		struct event_struct {
+			struct perf_event_header header;
+			__u64 ip;
+			__u32 pid, tid;
+		} *event = (struct event_struct *)&data[old & md->mask];
+		struct event_struct event_copy;
+
+		unsigned int size = event->header.size;
+
+		/*
+		 * Event straddles the mmap boundary -- header should always
+		 * be inside due to u64 alignment of output.
+		 */
+		if ((old & md->mask) + size != ((old + size) & md->mask)) {
+			unsigned int offset = old;
+			unsigned int len = sizeof(*event), cpy;
+			void *dst = &event_copy;
+
+			do {
+				cpy = min(md->mask + 1 - (offset & md->mask), len);
+				memcpy(dst, &data[offset & md->mask], cpy);
+				offset += cpy;
+				dst += cpy;
+				len -= cpy;
+			} while (len);
+
+			event = &event_copy;
+		}
 
-		process_event(*ptr, md->counter);
+		old += size;
+
+		switch (event->header.type) {
+		case PERF_EVENT_IP:
+		case PERF_EVENT_IP | __PERF_EVENT_TID:
+			process_event(event->ip, md->counter);
+			break;
+		}
 	}
 
 	md->prev = old;
@@ -1214,6 +1266,7 @@ int main(int argc, char *argv[])
 			hw_event.irq_period	= event_count[counter];
 			hw_event.record_type	= PERF_RECORD_IRQ;
 			hw_event.nmi		= nmi;
+			hw_event.include_tid	= 1;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
-- 
cgit v0.10.2


From 7730d8655880f41f2ea519aca2ca6a1413dfd2c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:48:31 +0100
Subject: perf_counter: allow and require one-page mmap on counting counters

A brainfart stopped single page mmap()s working. The rest of the code
should be perfectly fine with not having any data pages.

Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <1237981712.7972.812.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f3e1b27..95e0257 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1369,7 +1369,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	vma_size = vma->vm_end - vma->vm_start;
 	nr_pages = (vma_size / PAGE_SIZE) - 1;
 
-	if (nr_pages == 0 || !is_power_of_2(nr_pages))
+	/*
+	 * If we have data pages ensure they're a power-of-two number, so we
+	 * can do bitmasks instead of modulo.
+	 */
+	if (nr_pages != 0 && !is_power_of_2(nr_pages))
 		return -EINVAL;
 
 	if (vma_size != PAGE_SIZE * (1 + nr_pages))
-- 
cgit v0.10.2


From 53cfbf593758916aac41db728f029986a62f1254 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 25 Mar 2009 22:46:58 +1100
Subject: perf_counter: record time running and time enabled for each counter

Impact: new functionality

Currently, if there are more counters enabled than can fit on the CPU,
the kernel will multiplex the counters on to the hardware using
round-robin scheduling.  That isn't too bad for sampling counters, but
for counting counters it means that the value read from a counter
represents some unknown fraction of the true count of events that
occurred while the counter was enabled.

This remedies the situation by keeping track of how long each counter
is enabled for, and how long it is actually on the cpu and counting
events.  These times are recorded in nanoseconds using the task clock
for per-task counters and the cpu clock for per-cpu counters.

These values can be supplied to userspace on a read from the counter.
Userspace requests that they be supplied after the counter value by
setting the PERF_FORMAT_TOTAL_TIME_ENABLED and/or
PERF_FORMAT_TOTAL_TIME_RUNNING bits in the hw_event.read_format field
when creating the counter.  (There is no way to change the read format
after the counter is created, though it would be possible to add some
way to do that.)

Using this information it is possible for userspace to scale the count
it reads from the counter to get an estimate of the true count:

true_count_estimate = count * total_time_enabled / total_time_running

This also lets userspace detect the situation where the counter never
got to go on the cpu: total_time_running == 0.

This functionality has been requested by the PAPI developers, and will
be generally needed for interpreting the count values from counting
counters correctly.

In the implementation, this keeps 5 time values (in nanoseconds) for
each counter: total_time_enabled and total_time_running are used when
the counter is in state OFF or ERROR and for reporting back to
userspace.  When the counter is in state INACTIVE or ACTIVE, it is the
tstamp_enabled, tstamp_running and tstamp_stopped values that are
relevant, and total_time_enabled and total_time_running are determined
from them.  (tstamp_stopped is only used in INACTIVE state.)  The
reason for doing it like this is that it means that only counters
being enabled or disabled at sched-in and sched-out time need to be
updated.  There are no new loops that iterate over all counters to
update total_time_enabled or total_time_running.

This also keeps separate child_total_time_running and
child_total_time_enabled fields that get added in when reporting the
totals to userspace.  They are separate fields so that they can be
atomic.  We don't want to use atomics for total_time_running,
total_time_enabled etc., because then we would have to use atomic
sequences to update them, which are slower than regular arithmetic and
memory accesses.

It is possible to measure total_time_running by adding a task_clock
counter to each group of counters, and total_time_enabled can be
measured approximately with a top-level task_clock counter (though
inaccuracies will creep in if you need to disable and enable groups
since it is not possible in general to disable/enable the top-level
task_clock counter simultaneously with another group).  However, that
adds extra overhead - I measured around 15% increase in the context
switch latency reported by lat_ctx (from lmbench) when a task_clock
counter was added to each of 2 groups, and around 25% increase when a
task_clock counter was added to each of 4 groups.  (In both cases a
top-level task-clock counter was also added.)

In contrast, the code added in this commit gives better information
with no overhead that I could measure (in fact in some cases I
measured lower times with this code, but the differences were all less
than one standard deviation).

[ v2: address review comments by Andrew Morton. ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Orig-LKML-Reference: <18890.6578.728637.139402@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d48596a..df007fe 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -455,6 +455,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 {
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;
+	counter->tstamp_running += counter->ctx->time_now -
+		counter->tstamp_stopped;
 	if (is_software_counter(counter))
 		counter->hw_ops->enable(counter);
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7fdbdf8..6bf67ce 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -103,6 +103,16 @@ enum perf_counter_record_type {
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
 /*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_counter_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1,
+	PERF_FORMAT_TOTAL_TIME_RUNNING	=  2,
+};
+
+/*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
@@ -281,6 +291,32 @@ struct perf_counter {
 	enum perf_counter_active_state	prev_state;
 	atomic64_t			count;
 
+	/*
+	 * These are the total time in nanoseconds that the counter
+	 * has been enabled (i.e. eligible to run, and the task has
+	 * been scheduled in, if this is a per-task counter)
+	 * and running (scheduled onto the CPU), respectively.
+	 *
+	 * They are computed from tstamp_enabled, tstamp_running and
+	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
+	 */
+	u64				total_time_enabled;
+	u64				total_time_running;
+
+	/*
+	 * These are timestamps used for computing total_time_enabled
+	 * and total_time_running when the counter is in INACTIVE or
+	 * ACTIVE state, measured in nanoseconds from an arbitrary point
+	 * in time.
+	 * tstamp_enabled: the notional time when the counter was enabled
+	 * tstamp_running: the notional time when the counter was scheduled on
+	 * tstamp_stopped: in INACTIVE state, the notional time when the
+	 *	counter was scheduled off.
+	 */
+	u64				tstamp_enabled;
+	u64				tstamp_running;
+	u64				tstamp_stopped;
+
 	struct perf_counter_hw_event	hw_event;
 	struct hw_perf_counter		hw;
 
@@ -292,6 +328,13 @@ struct perf_counter {
 	struct list_head		child_list;
 
 	/*
+	 * These accumulate total time (in nanoseconds) that children
+	 * counters have been enabled and running, respectively.
+	 */
+	atomic64_t			child_total_time_enabled;
+	atomic64_t			child_total_time_running;
+
+	/*
 	 * Protect attach/detach and child_list:
 	 */
 	struct mutex			mutex;
@@ -339,6 +382,16 @@ struct perf_counter_context {
 	int			nr_active;
 	int			is_active;
 	struct task_struct	*task;
+
+	/*
+	 * time_now is the current time in nanoseconds since an arbitrary
+	 * point in the past.  For per-task counters, this is based on the
+	 * task clock, and for per-cpu counters it is based on the cpu clock.
+	 * time_lost is an offset from the task/cpu clock, used to make it
+	 * appear that time only passes while the context is scheduled in.
+	 */
+	u64			time_now;
+	u64			time_lost;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 95e0257..3b862a7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -116,6 +116,7 @@ counter_sched_out(struct perf_counter *counter,
 		return;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->tstamp_stopped = ctx->time_now;
 	counter->hw_ops->disable(counter);
 	counter->oncpu = -1;
 
@@ -252,6 +253,60 @@ retry:
 }
 
 /*
+ * Get the current time for this context.
+ * If this is a task context, we use the task's task clock,
+ * or for a per-cpu context, we use the cpu clock.
+ */
+static u64 get_context_time(struct perf_counter_context *ctx, int update)
+{
+	struct task_struct *curr = ctx->task;
+
+	if (!curr)
+		return cpu_clock(smp_processor_id());
+
+	return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime;
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_counter_context *ctx, int update)
+{
+	ctx->time_now = get_context_time(ctx, update) - ctx->time_lost;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a counter.
+ */
+static void update_counter_times(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	u64 run_end;
+
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		counter->total_time_enabled = ctx->time_now -
+			counter->tstamp_enabled;
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+			run_end = counter->tstamp_stopped;
+		else
+			run_end = ctx->time_now;
+		counter->total_time_running = run_end - counter->tstamp_running;
+	}
+}
+
+/*
+ * Update total_time_enabled and total_time_running for all counters in a group.
+ */
+static void update_group_times(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	update_counter_times(leader);
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		update_counter_times(counter);
+}
+
+/*
  * Cross CPU call to disable a performance counter
  */
 static void __perf_counter_disable(void *info)
@@ -276,6 +331,8 @@ static void __perf_counter_disable(void *info)
 	 * If it is in error state, leave it in error state.
 	 */
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		update_context_time(ctx, 1);
+		update_counter_times(counter);
 		if (counter == counter->group_leader)
 			group_sched_out(counter, cpuctx, ctx);
 		else
@@ -320,8 +377,10 @@ static void perf_counter_disable(struct perf_counter *counter)
 	 * Since we have the lock this context can't be scheduled
 	 * in, so we can change the state safely.
 	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+		update_counter_times(counter);
 		counter->state = PERF_COUNTER_STATE_OFF;
+	}
 
 	spin_unlock_irq(&ctx->lock);
 }
@@ -366,6 +425,8 @@ counter_sched_in(struct perf_counter *counter,
 		return -EAGAIN;
 	}
 
+	counter->tstamp_running += ctx->time_now - counter->tstamp_stopped;
+
 	if (!is_software_counter(counter))
 		cpuctx->active_oncpu++;
 	ctx->nr_active++;
@@ -425,6 +486,17 @@ static int group_can_go_on(struct perf_counter *counter,
 	return can_add_hw;
 }
 
+static void add_counter_to_ctx(struct perf_counter *counter,
+			       struct perf_counter_context *ctx)
+{
+	list_add_counter(counter, ctx);
+	ctx->nr_counters++;
+	counter->prev_state = PERF_COUNTER_STATE_OFF;
+	counter->tstamp_enabled = ctx->time_now;
+	counter->tstamp_running = ctx->time_now;
+	counter->tstamp_stopped = ctx->time_now;
+}
+
 /*
  * Cross CPU call to install and enable a performance counter
  */
@@ -449,6 +521,7 @@ static void __perf_install_in_context(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
+	update_context_time(ctx, 1);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -456,9 +529,7 @@ static void __perf_install_in_context(void *info)
 	 */
 	perf_flags = hw_perf_save_disable();
 
-	list_add_counter(counter, ctx);
-	ctx->nr_counters++;
-	counter->prev_state = PERF_COUNTER_STATE_OFF;
+	add_counter_to_ctx(counter, ctx);
 
 	/*
 	 * Don't put the counter on if it is disabled or if
@@ -486,8 +557,10 @@ static void __perf_install_in_context(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned)
+		if (leader->hw_event.pinned) {
+			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
+		}
 	}
 
 	if (!err && !ctx->task && cpuctx->max_pertask)
@@ -548,10 +621,8 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list_entry)) {
-		list_add_counter(counter, ctx);
-		ctx->nr_counters++;
-	}
+	if (list_empty(&counter->list_entry))
+		add_counter_to_ctx(counter, ctx);
 	spin_unlock_irq(&ctx->lock);
 }
 
@@ -576,11 +647,13 @@ static void __perf_counter_enable(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
+	update_context_time(ctx, 1);
 
 	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -602,8 +675,10 @@ static void __perf_counter_enable(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned)
+		if (leader->hw_event.pinned) {
+			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
+		}
 	}
 
  unlock:
@@ -659,8 +734,11 @@ static void perf_counter_enable(struct perf_counter *counter)
 	 * Since we have the lock this context can't be scheduled
 	 * in, so we can change the state safely.
 	 */
-	if (counter->state == PERF_COUNTER_STATE_OFF)
+	if (counter->state == PERF_COUNTER_STATE_OFF) {
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->tstamp_enabled = ctx->time_now -
+			counter->total_time_enabled;
+	}
  out:
 	spin_unlock_irq(&ctx->lock);
 }
@@ -693,6 +771,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	ctx->is_active = 0;
 	if (likely(!ctx->nr_counters))
 		goto out;
+	update_context_time(ctx, 0);
 
 	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
@@ -797,6 +876,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	if (likely(!ctx->nr_counters))
 		goto out;
 
+	/*
+	 * Add any time since the last sched_out to the lost time
+	 * so it doesn't get included in the total_time_enabled and
+	 * total_time_running measures for counters in the context.
+	 */
+	ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now;
+
 	flags = hw_perf_save_disable();
 
 	/*
@@ -817,8 +903,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		 * If this pinned group hasn't been scheduled,
 		 * put it in error state.
 		 */
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+			update_group_times(counter);
 			counter->state = PERF_COUNTER_STATE_ERROR;
+		}
 	}
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -902,8 +990,10 @@ int perf_counter_task_disable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ERROR)
+		if (counter->state != PERF_COUNTER_STATE_ERROR) {
+			update_group_times(counter);
 			counter->state = PERF_COUNTER_STATE_OFF;
+		}
 	}
 
 	hw_perf_restore(perf_flags);
@@ -946,6 +1036,8 @@ int perf_counter_task_enable(void)
 		if (counter->state > PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->tstamp_enabled = ctx->time_now -
+			counter->total_time_enabled;
 		counter->hw_event.disabled = 0;
 	}
 	hw_perf_restore(perf_flags);
@@ -1009,10 +1101,14 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 static void __read(void *info)
 {
 	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
 
 	curr_rq_lock_irq_save(&flags);
+	if (ctx->is_active)
+		update_context_time(ctx, 1);
 	counter->hw_ops->read(counter);
+	update_counter_times(counter);
 	curr_rq_unlock_irq_restore(&flags);
 }
 
@@ -1025,6 +1121,8 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
 					 __read, counter, 1);
+	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+		update_counter_times(counter);
 	}
 
 	return atomic64_read(&counter->count);
@@ -1137,10 +1235,8 @@ static int perf_release(struct inode *inode, struct file *file)
 static ssize_t
 perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
-	u64 cntval;
-
-	if (count < sizeof(cntval))
-		return -EINVAL;
+	u64 values[3];
+	int n;
 
 	/*
 	 * Return end-of-file for a read on a counter that is in
@@ -1151,10 +1247,24 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 		return 0;
 
 	mutex_lock(&counter->mutex);
-	cntval = perf_counter_read(counter);
+	values[0] = perf_counter_read(counter);
+	n = 1;
+	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = counter->total_time_enabled +
+			atomic64_read(&counter->child_total_time_enabled);
+	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = counter->total_time_running +
+			atomic64_read(&counter->child_total_time_running);
 	mutex_unlock(&counter->mutex);
 
-	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
+	if (count < n * sizeof(u64))
+		return -EINVAL;
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
 }
 
 static ssize_t
@@ -2290,8 +2400,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	 * Link it up in the child's context:
 	 */
 	child_counter->task = child;
-	list_add_counter(child_counter, child_ctx);
-	child_ctx->nr_counters++;
+	add_counter_to_ctx(child_counter, child_ctx);
 
 	child_counter->parent = parent_counter;
 	/*
@@ -2361,6 +2470,10 @@ static void sync_child_counter(struct perf_counter *child_counter,
 	 * Add back the child's count to the parent's count:
 	 */
 	atomic64_add(child_val, &parent_counter->count);
+	atomic64_add(child_counter->total_time_enabled,
+		     &parent_counter->child_total_time_enabled);
+	atomic64_add(child_counter->total_time_running,
+		     &parent_counter->child_total_time_running);
 
 	/*
 	 * Remove this counter from the parent's list
@@ -2395,6 +2508,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	if (child != current) {
 		wait_task_inactive(child, 0);
 		list_del_init(&child_counter->list_entry);
+		update_counter_times(child_counter);
 	} else {
 		struct perf_cpu_context *cpuctx;
 		unsigned long flags;
@@ -2412,6 +2526,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
 		group_sched_out(child_counter, cpuctx, child_ctx);
+		update_counter_times(child_counter);
 
 		list_del_init(&child_counter->list_entry);
 
-- 
cgit v0.10.2


From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:02 +0200
Subject: perf_counter: unify and fix delayed counter wakeup

While going over the wakeup code I noticed delayed wakeups only work
for hardware counters but basically all software counters rely on
them.

This patch unifies and generalizes the delayed wakeup to fix this
issue.

Since we're dealing with NMI context bits here, use a cmpxchg() based
single link list implementation to track counters that have pending
wakeups.

[ This should really be generic code for delayed wakeups, but since we
  cannot use cmpxchg()/xchg() in generic code, I've let it live in the
  perf_counter code. -- Eric Dumazet could use it to aggregate the
  network wakeups. ]

Furthermore, the x86 method of using TIF flags was flawed in that its
quite possible to end up setting the bit on the idle task, loosing the
wakeup.

The powerpc method uses per-cpu storage and does appear to be
sufficient.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.153932974@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index cb32d57..20a44d0 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags)
 struct irq_chip;
 
 #ifdef CONFIG_PERF_COUNTERS
-static inline unsigned long get_perf_counter_pending(void)
+static inline unsigned long test_perf_counter_pending(void)
 {
 	unsigned long x;
 
@@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void);
 
 #else
 
-static inline unsigned long get_perf_counter_pending(void)
+static inline unsigned long test_perf_counter_pending(void)
 {
 	return 0;
 }
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 469e963..2cd471f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en)
 			iseries_handle_interrupts();
 	}
 
-	if (get_perf_counter_pending()) {
+	if (test_perf_counter_pending()) {
 		clear_perf_counter_pending();
 		perf_counter_do_pending();
 	}
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index df007fe..cde720f 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -650,24 +650,6 @@ hw_perf_counter_init(struct perf_counter *counter)
 }
 
 /*
- * Handle wakeups.
- */
-void perf_counter_do_pending(void)
-{
-	int i;
-	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-	struct perf_counter *counter;
-
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter && counter->wakeup_pending) {
-			counter->wakeup_pending = 0;
-			wake_up(&counter->waitq);
-		}
-	}
-}
-
-/*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * here so there is no possibility of being interrupted.
@@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
 	struct perf_counter *counter;
 	long val;
-	int need_wakeup = 0, found = 0;
+	int found = 0;
 
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
@@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	 * immediately; otherwise we'll have do the wakeup when interrupts
 	 * get soft-enabled.
 	 */
-	if (get_perf_counter_pending() && regs->softe) {
+	if (test_perf_counter_pending() && regs->softe) {
 		irq_enter();
 		clear_perf_counter_pending();
 		perf_counter_do_pending();
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 1662043..e2b0e66 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,8 +84,9 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
-#define set_perf_counter_pending()	\
-		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+#define set_perf_counter_pending()	do { } while (0)
+#define clear_perf_counter_pending()	do { } while (0)
+#define test_perf_counter_pending()	(0)
 
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 3ffd5d2..8820a73 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,7 +83,6 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
-#define TIF_PERF_COUNTERS	11	/* notify perf counter work */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -107,7 +106,6 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
-#define _TIF_PERF_COUNTERS	(1 << TIF_PERF_COUNTERS)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -141,7 +139,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3f95b0c..7aab177 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -227,7 +227,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		 */
 		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
 	}
-	counter->wakeup_pending = 0;
 
 	return 0;
 }
@@ -773,34 +772,6 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
-/*
- * This handler is triggered by NMI contexts:
- */
-void perf_counter_notify(struct pt_regs *regs)
-{
-	struct cpu_hw_counters *cpuc;
-	unsigned long flags;
-	int bit, cpu;
-
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
-
-	for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
-		struct perf_counter *counter = cpuc->counters[bit];
-
-		if (!counter)
-			continue;
-
-		if (counter->wakeup_pending) {
-			counter->wakeup_pending = 0;
-			wake_up(&counter->waitq);
-		}
-	}
-
-	local_irq_restore(flags);
-}
-
 void perf_counters_lapic_init(int nmi)
 {
 	u32 apic_val;
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 611615a..0a813b1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  */
-#include <linux/perf_counter.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -872,11 +871,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		tracehook_notify_resume(regs);
 	}
 
-	if (thread_info_flags & _TIF_PERF_COUNTERS) {
-		clear_thread_flag(TIF_PERF_COUNTERS);
-		perf_counter_notify(regs);
-	}
-
 #ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
 #endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6bf67ce..0d83322 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -275,6 +275,10 @@ struct perf_mmap_data {
 	void 				*data_pages[0];
 };
 
+struct perf_wakeup_entry {
+	struct perf_wakeup_entry *next;
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -350,7 +354,7 @@ struct perf_counter {
 	/* poll related */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
-	int				wakeup_pending;
+	struct perf_wakeup_entry	wakeup;
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
@@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
-extern void perf_counter_notify(struct pt_regs *regs);
+extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void perf_counter_unthrottle(void);
 extern u64 hw_perf_save_disable(void);
@@ -461,7 +465,7 @@ static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
-static inline void perf_counter_notify(struct pt_regs *regs)		{ }
+static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_counter_unthrottle(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)				{ }
@@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
-static inline void perf_swcounter_event(u32 event, u64 nr,
-					int nmi, struct pt_regs *regs)	{ }
+static inline void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)	{ }
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b862a7..f70ff80 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head)
 	kfree(counter);
 }
 
+static void perf_pending_sync(struct perf_counter *counter);
+
 static void free_counter(struct perf_counter *counter)
 {
+	perf_pending_sync(counter);
+
 	if (counter->destroy)
 		counter->destroy(counter);
 
@@ -1529,6 +1533,118 @@ static const struct file_operations perf_fops = {
 };
 
 /*
+ * Perf counter wakeup
+ *
+ * If there's data, ensure we set the poll() state and publish everything
+ * to user-space before waking everybody up.
+ */
+
+void perf_counter_wakeup(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data) {
+		(void)atomic_xchg(&data->wakeup, POLL_IN);
+		__perf_counter_update_userpage(counter, data);
+	}
+	rcu_read_unlock();
+
+	wake_up_all(&counter->waitq);
+}
+
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_counter *counter)
+{
+	struct perf_wakeup_entry **head;
+	struct perf_wakeup_entry *prev, *next;
+
+	if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	head = &get_cpu_var(perf_wakeup_head);
+
+	do {
+		prev = counter->wakeup.next = *head;
+		next = &counter->wakeup;
+	} while (cmpxchg(head, prev, next) != prev);
+
+	set_perf_counter_pending();
+
+	put_cpu_var(perf_wakeup_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_wakeup_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		struct perf_counter *counter = container_of(list,
+				struct perf_counter, wakeup);
+
+		list = list->next;
+
+		counter->wakeup.next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		perf_counter_wakeup(counter);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_counter *counter)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return counter->wakeup.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_counter *counter)
+{
+	wait_event(counter->waitq, perf_not_pending(counter));
+}
+
+void perf_counter_do_pending(void)
+{
+	__perf_pending_run();
+}
+
+/*
  * Output
  */
 
@@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle,
 static void perf_output_end(struct perf_output_handle *handle, int nmi)
 {
 	if (handle->wakeup) {
-		(void)atomic_xchg(&handle->data->wakeup, POLL_IN);
-		__perf_counter_update_userpage(handle->counter, handle->data);
-		if (nmi) {
-			handle->counter->wakeup_pending = 1;
-			set_perf_counter_pending();
-		} else
-			wake_up(&handle->counter->waitq);
+		if (nmi)
+			perf_pending_queue(handle->counter);
+		else
+			perf_counter_wakeup(handle->counter);
 	}
 	rcu_read_unlock();
 }
@@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
-	counter->wakeup_pending		= 0;
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
 	counter->ctx			= ctx;
diff --git a/kernel/timer.c b/kernel/timer.c
index b455556..672ca25 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1167,6 +1168,8 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
+	perf_counter_do_pending();
+
 	hrtimer_run_pending();
 
 	if (time_after_eq(jiffies, base->timer_jiffies))
-- 
cgit v0.10.2


From 38ff667b321b00f5e6830e93fb4ab11a653a2920 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:03 +0200
Subject: perf_counter: fix update_userpage()

It just occured to me it is possible to have multiple contending
updates of the userpage (mmap information vs overflow vs counter).
This would break the seqlock logic.

It appear the arch code uses this from NMI context, so we cannot
possibly serialize its use, therefore separate the data_head update
from it and let it return to its original use.

The arch code needs to make sure there are no contending callers by
disabling the counter before using it -- powerpc appears to do this
nicely.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.241410660@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0d83322..8ac1885 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -160,10 +160,45 @@ struct perf_counter_hw_event {
 struct perf_counter_mmap_page {
 	__u32	version;		/* version number of this structure */
 	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 * The index and offset should be read atomically using the seqlock:
+	 *
+	 *   __u32 seq, index;
+	 *   __s64 offset;
+	 *
+	 * again:
+	 *   rmb();
+	 *   seq = pc->lock;
+	 *
+	 *   if (unlikely(seq & 1)) {
+	 *     cpu_relax();
+	 *     goto again;
+	 *   }
+	 *
+	 *   index = pc->index;
+	 *   offset = pc->offset;
+	 *
+	 *   rmb();
+	 *   if (pc->lock != seq)
+	 *     goto again;
+	 *
+	 * After this, index contains architecture specific counter index + 1,
+	 * so that 0 means unavailable, offset contains the value to be added
+	 * to the result of the raw timer read to obtain this counter's value.
+	 */
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
 
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading this value should issue an rmb(), on SMP capable
+	 * platforms, after reading this value -- see perf_counter_wakeup().
+	 */
 	__u32   data_head;		/* head in the data section */
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f70ff80..c95e923 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1316,10 +1316,22 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-static void __perf_counter_update_userpage(struct perf_counter *counter,
-					   struct perf_mmap_data *data)
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_counter_update_userpage(struct perf_counter *counter)
 {
-	struct perf_counter_mmap_page *userpg = data->user_page;
+	struct perf_mmap_data *data;
+	struct perf_counter_mmap_page *userpg;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
 
 	/*
 	 * Disable preemption so as to not let the corresponding user-space
@@ -1333,20 +1345,10 @@ static void __perf_counter_update_userpage(struct perf_counter *counter,
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
-	userpg->data_head = atomic_read(&data->head);
 	smp_wmb();
 	++userpg->lock;
 	preempt_enable();
-}
-
-void perf_counter_update_userpage(struct perf_counter *counter)
-{
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data)
-		__perf_counter_update_userpage(counter, data);
+unlock:
 	rcu_read_unlock();
 }
 
@@ -1547,7 +1549,13 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	data = rcu_dereference(counter->data);
 	if (data) {
 		(void)atomic_xchg(&data->wakeup, POLL_IN);
-		__perf_counter_update_userpage(counter, data);
+		/*
+		 * Ensure all data writes are issued before updating the
+		 * user-space data head information. The matching rmb()
+		 * will be in userspace after reading this value.
+		 */
+		smp_wmb();
+		data->user_page->data_head = atomic_read(&data->head);
 	}
 	rcu_read_unlock();
 
-- 
cgit v0.10.2


From 195564390210977954fe4ef45b39cdee34f41b59 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:04 +0200
Subject: perf_counter: kerneltop: simplify data_head read

Now that the kernel side changed, match up again.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.327144324@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index fda1438..2779c57 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -1125,22 +1125,10 @@ struct mmap_data {
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
 	struct perf_counter_mmap_page *pc = md->base;
-	unsigned int seq, head;
-
-repeat:
-	rmb();
-	seq = pc->lock;
-
-	if (unlikely(seq & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
+	int head;
 
 	head = pc->data_head;
-
 	rmb();
-	if (pc->lock != seq)
-		goto repeat;
 
 	return head;
 }
-- 
cgit v0.10.2


From 0a4a93919bdc5cee48fe4367591e8e0449c1086c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:05 +0200
Subject: perf_counter: executable mmap() information

Currently the profiling information returns userspace IPs but no way
to correlate them to userspace code. Userspace could look into
/proc/$pid/maps but that might not be current or even present anymore
at the time of analyzing the IPs.

Therefore provide means to track the mmap information and provide it
in the output stream.

XXX: only covers mmap()/munmap(), mremap() and mprotect() are missing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Orig-LKML-Reference: <20090330171023.417259499@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8ac1885..037a811 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -137,9 +137,11 @@ struct perf_counter_hw_event {
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
-				include_tid    :  1, /* include the tid */
+				include_tid    :  1, /* include the tid       */
+				mmap           :  1, /* include mmap data     */
+				munmap         :  1, /* include munmap data   */
 
-				__reserved_1   : 54;
+				__reserved_1   : 52;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -211,6 +213,9 @@ enum perf_event_type {
 	PERF_EVENT_IP		= 0,
 	PERF_EVENT_GROUP	= 1,
 
+	PERF_EVENT_MMAP		= 2,
+	PERF_EVENT_MUNMAP	= 3,
+
 	__PERF_EVENT_TID	= 0x100,
 };
 
@@ -491,6 +496,12 @@ static inline int is_software_counter(struct perf_counter *counter)
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
 
+extern void perf_counter_mmap(unsigned long addr, unsigned long len,
+			      unsigned long pgoff, struct file *file);
+
+extern void perf_counter_munmap(unsigned long addr, unsigned long len,
+				unsigned long pgoff, struct file *file);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -511,6 +522,15 @@ static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 static inline void
 perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)	{ }
 
+
+static inline void
+perf_counter_mmap(unsigned long addr, unsigned long len,
+		  unsigned long pgoff, struct file *file)		{ }
+
+static inline void
+perf_counter_munmap(unsigned long addr, unsigned long len,
+		    unsigned long pgoff, struct file *file) 		{ }
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c95e923..f35e89e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -25,6 +25,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
+#include <linux/dcache.h>
 
 #include <asm/irq_regs.h>
 
@@ -1844,6 +1845,150 @@ void perf_counter_output(struct perf_counter *counter,
 }
 
 /*
+ * mmap tracking
+ */
+
+struct perf_mmap_event {
+	struct file	*file;
+	char		*file_name;
+	int		file_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+		u64				start;
+		u64				len;
+		u64				pgoff;
+	} event;
+};
+
+static void perf_counter_mmap_output(struct perf_counter *counter,
+				     struct perf_mmap_event *mmap_event)
+{
+	struct perf_output_handle handle;
+	int size = mmap_event->event.header.size;
+	int ret = perf_output_begin(&handle, counter, size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, mmap_event->event);
+	perf_output_copy(&handle, mmap_event->file_name,
+				   mmap_event->file_size);
+	perf_output_end(&handle, 0);
+}
+
+static int perf_counter_mmap_match(struct perf_counter *counter,
+				   struct perf_mmap_event *mmap_event)
+{
+	if (counter->hw_event.mmap &&
+	    mmap_event->event.header.type == PERF_EVENT_MMAP)
+		return 1;
+
+	if (counter->hw_event.munmap &&
+	    mmap_event->event.header.type == PERF_EVENT_MUNMAP)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
+				  struct perf_mmap_event *mmap_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_mmap_match(counter, mmap_event))
+			perf_counter_mmap_output(counter, mmap_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct file *file = mmap_event->file;
+	unsigned int size;
+	char tmp[16];
+	char *buf = NULL;
+	char *name;
+
+	if (file) {
+		buf = kzalloc(PATH_MAX, GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = dentry_path(file->f_dentry, buf, PATH_MAX);
+		if (IS_ERR(name)) {
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
+		}
+	} else {
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
+	}
+
+got_name:
+	size = ALIGN(strlen(name), sizeof(u64));
+
+	mmap_event->file_name = name;
+	mmap_event->file_size = size;
+
+	mmap_event->event.header.size = sizeof(mmap_event->event) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
+	put_cpu_var(perf_cpu_context);
+
+	perf_counter_mmap_ctx(&current->perf_counter_ctx, mmap_event);
+
+	kfree(buf);
+}
+
+void perf_counter_mmap(unsigned long addr, unsigned long len,
+		       unsigned long pgoff, struct file *file)
+{
+	struct perf_mmap_event mmap_event = {
+		.file   = file,
+		.event  = {
+			.header = { .type = PERF_EVENT_MMAP, },
+			.pid	= current->group_leader->pid,
+			.tid	= current->pid,
+			.start  = addr,
+			.len    = len,
+			.pgoff  = pgoff,
+		},
+	};
+
+	perf_counter_mmap_event(&mmap_event);
+}
+
+void perf_counter_munmap(unsigned long addr, unsigned long len,
+			 unsigned long pgoff, struct file *file)
+{
+	struct perf_mmap_event mmap_event = {
+		.file   = file,
+		.event  = {
+			.header = { .type = PERF_EVENT_MUNMAP, },
+			.pid	= current->group_leader->pid,
+			.tid	= current->pid,
+			.start  = addr,
+			.len    = len,
+			.pgoff  = pgoff,
+		},
+	};
+
+	perf_counter_mmap_event(&mmap_event);
+}
+
+/*
  * Generic software counter infrastructure
  */
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 4a38411..1df63f6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,6 +28,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1223,6 +1224,9 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
+	if (vm_flags & VM_EXEC)
+		perf_counter_mmap(addr, len, pgoff, file);
+
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
@@ -1756,6 +1760,12 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 	do {
 		long nrpages = vma_pages(vma);
 
+		if (vma->vm_flags & VM_EXEC) {
+			perf_counter_munmap(vma->vm_start,
+					nrpages << PAGE_SHIFT,
+					vma->vm_pgoff, vma->vm_file);
+		}
+
 		mm->total_vm -= nrpages;
 		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
 		vma = remove_vma(vma);
-- 
cgit v0.10.2


From 3c1ba6fafecaed295017881f8863a18602f32c1d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:06 +0200
Subject: perf_counter: kerneltop: parse the mmap data stream

frob the kerneltop code to print the mmap data in the stream

Better use would be collecting the IPs per PID and mapping them onto
the provided userspace code.. TODO

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.501902515@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 2779c57..995111d 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -184,6 +184,8 @@ static int			nmi				=  1;
 static int			group				=  0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
+static int			use_mmap			= 0;
+static int			use_munmap			= 0;
 
 static char			*vmlinux;
 
@@ -333,6 +335,8 @@ static void display_help(void)
 	" -z        --zero             # zero counts after display\n"
 	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
 	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
+	" -M        --mmap_info        # print mmap info stream\n"
+	" -U        --munmap_info      # print munmap info stream\n"
 	);
 
 	exit(0);
@@ -1052,9 +1056,11 @@ static void process_options(int argc, char *argv[])
 			{"stat",	no_argument,		NULL, 'S'},
 			{"zero",	no_argument,		NULL, 'z'},
 			{"mmap_pages",	required_argument,	NULL, 'm'},
+			{"mmap_info",	no_argument,		NULL, 'M'},
+			{"munmap_info",	no_argument,		NULL, 'U'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1092,6 +1098,8 @@ static void process_options(int argc, char *argv[])
 		case 'x': vmlinux			= strdup(optarg); break;
 		case 'z': zero				=              1; break;
 		case 'm': mmap_pages			=   atoi(optarg); break;
+		case 'M': use_mmap			=              1; break;
+		case 'U': use_munmap			=              1; break;
 		default: error = 1; break;
 		}
 	}
@@ -1172,12 +1180,29 @@ static void mmap_read(struct mmap_data *md)
 	last_read = this_read;
 
 	for (; old != head;) {
-		struct event_struct {
+		struct ip_event {
 			struct perf_event_header header;
 			__u64 ip;
 			__u32 pid, tid;
-		} *event = (struct event_struct *)&data[old & md->mask];
-		struct event_struct event_copy;
+		};
+		struct mmap_event {
+			struct perf_event_header header;
+			__u32 pid, tid;
+			__u64 start;
+			__u64 len;
+			__u64 pgoff;
+			char filename[PATH_MAX];
+		};
+
+		typedef union event_union {
+			struct perf_event_header header;
+			struct ip_event ip;
+			struct mmap_event mmap;
+		} event_t;
+
+		event_t *event = (event_t *)&data[old & md->mask];
+
+		event_t event_copy;
 
 		unsigned int size = event->header.size;
 
@@ -1187,7 +1212,7 @@ static void mmap_read(struct mmap_data *md)
 		 */
 		if ((old & md->mask) + size != ((old + size) & md->mask)) {
 			unsigned int offset = old;
-			unsigned int len = sizeof(*event), cpy;
+			unsigned int len = min(sizeof(*event), size), cpy;
 			void *dst = &event_copy;
 
 			do {
@@ -1206,7 +1231,18 @@ static void mmap_read(struct mmap_data *md)
 		switch (event->header.type) {
 		case PERF_EVENT_IP:
 		case PERF_EVENT_IP | __PERF_EVENT_TID:
-			process_event(event->ip, md->counter);
+			process_event(event->ip.ip, md->counter);
+			break;
+
+		case PERF_EVENT_MMAP:
+		case PERF_EVENT_MUNMAP:
+			printf("%s: %Lu %Lu %Lu %s\n",
+					event->header.type == PERF_EVENT_MMAP
+					  ? "mmap" : "munmap",
+					event->mmap.start,
+					event->mmap.len,
+					event->mmap.pgoff,
+					event->mmap.filename);
 			break;
 		}
 	}
@@ -1255,6 +1291,8 @@ int main(int argc, char *argv[])
 			hw_event.record_type	= PERF_RECORD_IRQ;
 			hw_event.nmi		= nmi;
 			hw_event.include_tid	= 1;
+			hw_event.mmap		= use_mmap;
+			hw_event.munmap		= use_munmap;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
-- 
cgit v0.10.2


From 7595d63b3a9ce65d14c4fbd0e7de448a343d7215 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Mar 2009 19:07:07 +0200
Subject: perf_counter: powerpc: only reserve PMU hardware when we need it

Impact: cooperate with oprofile

At present, on PowerPC, if you have perf_counters compiled in, oprofile
doesn't work.  There is code to allow the PMU to be shared between
competing subsystems, such as perf_counters and oprofile, but currently
the perf_counter subsystem reserves the PMU for itself at boot time,
and never releases it.

This makes perf_counter play nicely with oprofile.  Now we keep a count
of how many perf_counter instances are counting hardware events, and
reserve the PMU when that count becomes non-zero, and release the PMU
when that count becomes zero.  This means that it is possible to have
perf_counters compiled in and still use oprofile, as long as there are
no hardware perf_counters active.  This also means that if oprofile is
active, sys_perf_counter_open will fail if the hw_event specifies a
hardware event.

To avoid races with other tasks creating and destroying perf_counters,
we use a mutex.  We use atomic_inc_not_zero and atomic_add_unless to
avoid having to take the mutex unless there is a possibility of the
count going between 0 and 1.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090330171023.627912475@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index cde720f..560dd1e 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -41,6 +41,8 @@ struct power_pmu *ppmu;
  */
 static unsigned int freeze_counters_kernel = MMCR0_FCS;
 
+static void perf_counter_interrupt(struct pt_regs *regs);
+
 void perf_counter_print_debug(void)
 {
 }
@@ -594,6 +596,24 @@ struct hw_perf_counter_ops power_perf_ops = {
 	.read = power_perf_read
 };
 
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+	if (!atomic_add_unless(&num_counters, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_counters) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
 const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter)
 {
@@ -601,6 +621,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 	struct perf_counter *ctrs[MAX_HWCOUNTERS];
 	unsigned int events[MAX_HWCOUNTERS];
 	int n;
+	int err;
 
 	if (!ppmu)
 		return NULL;
@@ -646,6 +667,27 @@ hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no counters are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_counters)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_counters) == 0 &&
+		    reserve_pmc_hardware(perf_counter_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_counters);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	counter->destroy = hw_perf_counter_destroy;
+
+	if (err)
+		return NULL;
 	return &power_perf_ops;
 }
 
@@ -769,11 +811,6 @@ static int init_perf_counters(void)
 {
 	unsigned long pvr;
 
-	if (reserve_pmc_hardware(perf_counter_interrupt)) {
-		printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
-		return -EBUSY;
-	}
-
 	/* XXX should get this from cputable */
 	pvr = mfspr(SPRN_PVR);
 	switch (PVR_VER(pvr)) {
-- 
cgit v0.10.2


From d5d2bc0dd0379deddb9ede66fec90a3083eaec57 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Mar 2009 19:07:08 +0200
Subject: perf_counter: make it possible for hw_perf_counter_init to return
 error codes

Impact: better error reporting

At present, if hw_perf_counter_init encounters an error, all it can do
is return NULL, which causes sys_perf_counter_open to return an EINVAL
error to userspace.  This isn't very informative for userspace; it means
that userspace can't tell the difference between "sorry, oprofile is
already using the PMU" and "we don't support this CPU" and "this CPU
doesn't support the requested generic hardware event".

This commit uses the PTR_ERR/ERR_PTR/IS_ERR set of macros to let
hw_perf_counter_init return an error code on error rather than just NULL
if it wishes.  If it does so, that error code will be returned from
sys_perf_counter_open to userspace.  If it returns NULL, an EINVAL
error will be returned to userspace, as before.

This also adapts the powerpc hw_perf_counter_init to make use of this
to return ENXIO, EINVAL, EBUSY, or EOPNOTSUPP as appropriate.  It would
be good to add extra error numbers in future to allow userspace to
distinguish the various errors that are currently reported as EINVAL,
i.e. irq_period < 0, too many events in a group, conflict between
exclude_* settings in a group, and PMU resource conflict in a group.

[ v2: fix a bug pointed out by Corey Ashford where error returns from
      hw_perf_counter_init were not handled correctly in the case of
      raw hardware events.]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090330171023.682428180@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 560dd1e..0a4d14f 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -624,13 +624,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 	int err;
 
 	if (!ppmu)
-		return NULL;
+		return ERR_PTR(-ENXIO);
 	if ((s64)counter->hw_event.irq_period < 0)
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return NULL;
+			return ERR_PTR(-EOPNOTSUPP);
 		ev = ppmu->generic_events[ev];
 	} else {
 		ev = perf_event_config(&counter->hw_event);
@@ -656,14 +656,14 @@ hw_perf_counter_init(struct perf_counter *counter)
 		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
 				   ctrs, events);
 		if (n < 0)
-			return NULL;
+			return ERR_PTR(-EINVAL);
 	}
 	events[n] = ev;
 	ctrs[n] = counter;
 	if (check_excludes(ctrs, n, 1))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	if (power_check_constraints(events, n + 1))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	counter->hw.config = events[n];
 	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
@@ -687,7 +687,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 	counter->destroy = hw_perf_counter_destroy;
 
 	if (err)
-		return NULL;
+		return ERR_PTR(err);
 	return &power_perf_ops;
 }
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f35e89e..d07b452 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2453,10 +2453,11 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 {
 	const struct hw_perf_counter_ops *hw_ops;
 	struct perf_counter *counter;
+	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
 	if (!counter)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * Single counters are their own group leaders, with an
@@ -2505,12 +2506,18 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		hw_ops = tp_perf_counter_init(counter);
 		break;
 	}
+done:
+	err = 0;
+	if (!hw_ops)
+		err = -EINVAL;
+	else if (IS_ERR(hw_ops))
+		err = PTR_ERR(hw_ops);
 
-	if (!hw_ops) {
+	if (err) {
 		kfree(counter);
-		return NULL;
+		return ERR_PTR(err);
 	}
-done:
+
 	counter->hw_ops = hw_ops;
 
 	return counter;
@@ -2583,10 +2590,10 @@ SYSCALL_DEFINE5(perf_counter_open,
 			goto err_put_context;
 	}
 
-	ret = -EINVAL;
 	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
 				     GFP_KERNEL);
-	if (!counter)
+	ret = PTR_ERR(counter);
+	if (IS_ERR(counter))
 		goto err_put_context;
 
 	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
@@ -2658,8 +2665,8 @@ inherit_counter(struct perf_counter *parent_counter,
 	child_counter = perf_counter_alloc(&parent_counter->hw_event,
 					   parent_counter->cpu, child_ctx,
 					   group_leader, GFP_KERNEL);
-	if (!child_counter)
-		return NULL;
+	if (IS_ERR(child_counter))
+		return child_counter;
 
 	/*
 	 * Link it up in the child's context:
@@ -2710,15 +2717,17 @@ static int inherit_group(struct perf_counter *parent_counter,
 {
 	struct perf_counter *leader;
 	struct perf_counter *sub;
+	struct perf_counter *child_ctr;
 
 	leader = inherit_counter(parent_counter, parent, parent_ctx,
 				 child, NULL, child_ctx);
-	if (!leader)
-		return -ENOMEM;
+	if (IS_ERR(leader))
+		return PTR_ERR(leader);
 	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
-		if (!inherit_counter(sub, parent, parent_ctx,
-				     child, leader, child_ctx))
-			return -ENOMEM;
+		child_ctr = inherit_counter(sub, parent, parent_ctx,
+					    child, leader, child_ctx);
+		if (IS_ERR(child_ctr))
+			return PTR_ERR(child_ctr);
 	}
 	return 0;
 }
-- 
cgit v0.10.2


From 9ea98e191255ee642e64a5745014424fc63f83b0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:09 +0200
Subject: perf_counter: x86: proper error propagation for the x86
 hw_perf_counter_init()

Now that Paul cleaned up the error propagation paths, pass down the
x86 error as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.792822360@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7aab177..b8885cc 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -954,7 +954,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 
 	err = __hw_perf_counter_init(counter);
 	if (err)
-		return NULL;
+		return ERR_PTR(err);
 
 	return &x86_perf_counter_ops;
 }
-- 
cgit v0.10.2


From 31f004df8d14212f0a8a2fb12a8ed44a3d80e2fb Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Mar 2009 19:07:10 +0200
Subject: perf_counter tools: optionally scale counter values in perfstat mode

Impact: new functionality

This adds add an option to the perfstat mode of kerneltop to scale the
reported counter values according to the fraction of time that each
counter gets to count.  This is invoked with the -l option (I used 'l'
because s, c, a and e were all taken already.)  This uses the new
PERF_RECORD_TOTAL_TIME_{ENABLED,RUNNING} read format options.

With this, we get output like this:

$ ./perfstat -l -e 0:0,0:1,0:2,0:3,0:4,0:5 ./spin

 Performance counter stats for './spin':

     4016072055  CPU cycles           (events)  (scaled from 66.53%)
     2005887318  instructions         (events)  (scaled from 66.53%)
        1762849  cache references     (events)  (scaled from 66.69%)
         165229  cache misses         (events)  (scaled from 66.85%)
     1001298009  branches             (events)  (scaled from 66.78%)
          41566  branch misses        (events)  (scaled from 66.61%)

 Wall-clock time elapsed:  2438.227446 msecs

This also lets us detect when a counter is zero because the counter
never got to go on the CPU at all.  In that case we print <not counted>
rather than 0.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090330171023.871484899@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 995111d..c0ca015 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -197,6 +197,8 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
+static int			scale;
+
 struct source_line {
 	uint64_t		EIP;
 	unsigned long		count;
@@ -305,6 +307,7 @@ static void display_perfstat_help(void)
 	display_events_help();
 
 	printf(
+	" -l                           # scale counter values\n"
 	" -a                           # system-wide collection\n");
 	exit(0);
 }
@@ -328,6 +331,7 @@ static void display_help(void)
 	" -c CNT    --count=CNT        # event period to sample\n\n"
 	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
 	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
+	" -l                           # show scale factor for RR events\n"
 	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
 	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
 	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
@@ -436,6 +440,9 @@ static void create_perfstat_counter(int counter)
 	hw_event.config		= event_id[counter];
 	hw_event.record_type	= PERF_RECORD_SIMPLE;
 	hw_event.nmi		= 0;
+	if (scale)
+		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
+					  PERF_FORMAT_TOTAL_TIME_RUNNING;
 
 	if (system_wide) {
 		int cpu;
@@ -507,28 +514,53 @@ int do_perfstat(int argc, char *argv[])
 	fprintf(stderr, "\n");
 
 	for (counter = 0; counter < nr_counters; counter++) {
-		int cpu;
-		__u64 count, single_count;
+		int cpu, nv;
+		__u64 count[3], single_count[3];
+		int scaled;
 
-		count = 0;
+		count[0] = count[1] = count[2] = 0;
+		nv = scale ? 3 : 1;
 		for (cpu = 0; cpu < nr_cpus; cpu ++) {
 			res = read(fd[cpu][counter],
-					(char *) &single_count, sizeof(single_count));
-			assert(res == sizeof(single_count));
-			count += single_count;
+				   single_count, nv * sizeof(__u64));
+			assert(res == nv * sizeof(__u64));
+
+			count[0] += single_count[0];
+			if (scale) {
+				count[1] += single_count[1];
+				count[2] += single_count[2];
+			}
+		}
+
+		scaled = 0;
+		if (scale) {
+			if (count[2] == 0) {
+				fprintf(stderr, " %14s  %-20s\n",
+					"<not counted>", event_name(counter));
+				continue;
+			}
+			if (count[2] < count[1]) {
+				scaled = 1;
+				count[0] = (unsigned long long)
+					((double)count[0] * count[1] / count[2] + 0.5);
+			}
 		}
 
 		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
 		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
 
-			double msecs = (double)count / 1000000;
+			double msecs = (double)count[0] / 1000000;
 
-			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
+			fprintf(stderr, " %14.6f  %-20s (msecs)",
 				msecs, event_name(counter));
 		} else {
-			fprintf(stderr, " %14Ld  %-20s (events)\n",
-				count, event_name(counter));
+			fprintf(stderr, " %14Ld  %-20s (events)",
+				count[0], event_name(counter));
 		}
+		if (scaled)
+			fprintf(stderr, "  (scaled from %.2f%%)",
+				(double) count[2] / count[1] * 100);
+		fprintf(stderr, "\n");
 	}
 	fprintf(stderr, "\n");
 	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
@@ -1049,6 +1081,7 @@ static void process_options(int argc, char *argv[])
 			{"filter",	required_argument,	NULL, 'f'},
 			{"group",	required_argument,	NULL, 'g'},
 			{"help",	no_argument,		NULL, 'h'},
+			{"scale",	no_argument,		NULL, 'l'},
 			{"nmi",		required_argument,	NULL, 'n'},
 			{"pid",		required_argument,	NULL, 'p'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
@@ -1060,7 +1093,7 @@ static void process_options(int argc, char *argv[])
 			{"munmap_info",	no_argument,		NULL, 'U'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1084,6 +1117,7 @@ static void process_options(int argc, char *argv[])
 		case 'f': count_filter			=   atoi(optarg); break;
 		case 'g': group				=   atoi(optarg); break;
 		case 'h':      				  display_help(); break;
+		case 'l': scale				=	       1; break;
 		case 'n': nmi				=   atoi(optarg); break;
 		case 'p':
 			/* CPU and PID are mutually exclusive */
-- 
cgit v0.10.2


From 78d613eb129fc4edf0e2cabfcc6a4c5285482d21 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:11 +0200
Subject: perf_counter: small cleanup of the output routines

Move the nmi argument to the _begin() function, so that _end() only needs the
handle. This allows the _begin() function to generate a wakeup on event loss.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.959404268@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d07b452..4471e7e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1663,10 +1663,20 @@ struct perf_output_handle {
 	unsigned int		offset;
 	unsigned int		head;
 	int			wakeup;
+	int			nmi;
 };
 
+static inline void __perf_output_wakeup(struct perf_output_handle *handle)
+{
+	if (handle->nmi)
+		perf_pending_queue(handle->counter);
+	else
+		perf_counter_wakeup(handle->counter);
+}
+
 static int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_counter *counter, unsigned int size)
+			     struct perf_counter *counter, unsigned int size,
+			     int nmi)
 {
 	struct perf_mmap_data *data;
 	unsigned int offset, head;
@@ -1676,15 +1686,17 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data)
 		goto out;
 
+	handle->counter	= counter;
+	handle->nmi	= nmi;
+
 	if (!data->nr_pages)
-		goto out;
+		goto fail;
 
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
-	handle->counter	= counter;
 	handle->data	= data;
 	handle->offset	= offset;
 	handle->head	= head;
@@ -1692,6 +1704,8 @@ static int perf_output_begin(struct perf_output_handle *handle,
 
 	return 0;
 
+fail:
+	__perf_output_wakeup(handle);
 out:
 	rcu_read_unlock();
 
@@ -1733,14 +1747,10 @@ static void perf_output_copy(struct perf_output_handle *handle,
 #define perf_output_put(handle, x) \
 	perf_output_copy((handle), &(x), sizeof(x))
 
-static void perf_output_end(struct perf_output_handle *handle, int nmi)
+static void perf_output_end(struct perf_output_handle *handle)
 {
-	if (handle->wakeup) {
-		if (nmi)
-			perf_pending_queue(handle->counter);
-		else
-			perf_counter_wakeup(handle->counter);
-	}
+	if (handle->wakeup)
+		__perf_output_wakeup(handle);
 	rcu_read_unlock();
 }
 
@@ -1750,12 +1760,12 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
 	struct perf_output_handle handle;
 	int ret;
 
-	ret = perf_output_begin(&handle, counter, size);
+	ret = perf_output_begin(&handle, counter, size, nmi);
 	if (ret)
 		goto out;
 
 	perf_output_copy(&handle, buf, size);
-	perf_output_end(&handle, nmi);
+	perf_output_end(&handle);
 
 out:
 	return ret;
@@ -1804,7 +1814,7 @@ static void perf_output_group(struct perf_counter *counter, int nmi)
 
 	size = sizeof(header) + counter->nr_siblings * sizeof(entry);
 
-	ret = perf_output_begin(&handle, counter, size);
+	ret = perf_output_begin(&handle, counter, size, nmi);
 	if (ret)
 		return;
 
@@ -1824,7 +1834,7 @@ static void perf_output_group(struct perf_counter *counter, int nmi)
 		perf_output_put(&handle, entry);
 	}
 
-	perf_output_end(&handle, nmi);
+	perf_output_end(&handle);
 }
 
 void perf_counter_output(struct perf_counter *counter,
@@ -1869,7 +1879,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 {
 	struct perf_output_handle handle;
 	int size = mmap_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size);
+	int ret = perf_output_begin(&handle, counter, size, 0);
 
 	if (ret)
 		return;
@@ -1877,7 +1887,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 	perf_output_put(&handle, mmap_event->event);
 	perf_output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
-	perf_output_end(&handle, 0);
+	perf_output_end(&handle);
 }
 
 static int perf_counter_mmap_match(struct perf_counter *counter,
-- 
cgit v0.10.2


From 5ed00415e304203a0a9dcaef226d6d3f1106070e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:12 +0200
Subject: perf_counter: re-arrange the perf_event_type

Breaks ABI yet again :-)

Change the event type so that [0, 2^31-1] are regular event types, but
[2^31, 2^32-1] forms a bitmask for overflow events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171024.047961770@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 037a811..edf5bfb 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -210,13 +210,15 @@ struct perf_event_header {
 };
 
 enum perf_event_type {
-	PERF_EVENT_IP		= 0,
+
 	PERF_EVENT_GROUP	= 1,
 
 	PERF_EVENT_MMAP		= 2,
 	PERF_EVENT_MUNMAP	= 3,
 
-	__PERF_EVENT_TID	= 0x100,
+	PERF_EVENT_OVERFLOW	= 1UL << 31,
+	__PERF_EVENT_IP		= 1UL << 30,
+	__PERF_EVENT_TID	= 1UL << 29,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4471e7e..d93e9dd 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1754,50 +1754,44 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-static int perf_output_write(struct perf_counter *counter, int nmi,
-			     void *buf, ssize_t size)
-{
-	struct perf_output_handle handle;
-	int ret;
-
-	ret = perf_output_begin(&handle, counter, size, nmi);
-	if (ret)
-		goto out;
-
-	perf_output_copy(&handle, buf, size);
-	perf_output_end(&handle);
-
-out:
-	return ret;
-}
-
 static void perf_output_simple(struct perf_counter *counter,
 			       int nmi, struct pt_regs *regs)
 {
-	unsigned int size;
+	int ret;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	u64 ip;
 	struct {
-		struct perf_event_header header;
-		u64 ip;
 		u32 pid, tid;
-	} event;
+	} tid_entry;
 
-	event.header.type = PERF_EVENT_IP;
-	event.ip = instruction_pointer(regs);
+	header.type = PERF_EVENT_OVERFLOW;
+	header.size = sizeof(header);
 
-	size = sizeof(event);
+	ip = instruction_pointer(regs);
+	header.type |= __PERF_EVENT_IP;
+	header.size += sizeof(ip);
 
 	if (counter->hw_event.include_tid) {
 		/* namespace issues */
-		event.pid = current->group_leader->pid;
-		event.tid = current->pid;
+		tid_entry.pid = current->group_leader->pid;
+		tid_entry.tid = current->pid;
 
-		event.header.type |= __PERF_EVENT_TID;
-	} else
-		size -= sizeof(u64);
+		header.type |= __PERF_EVENT_TID;
+		header.size += sizeof(tid_entry);
+	}
 
-	event.header.size = size;
+	ret = perf_output_begin(&handle, counter, header.size, nmi);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, header);
+	perf_output_put(&handle, ip);
+
+	if (counter->hw_event.include_tid)
+		perf_output_put(&handle, tid_entry);
 
-	perf_output_write(counter, nmi, &event, size);
+	perf_output_end(&handle);
 }
 
 static void perf_output_group(struct perf_counter *counter, int nmi)
-- 
cgit v0.10.2


From 023c54c42288416b4f43c67bfd5049a76926fad6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:13 +0200
Subject: perf_counter tools: kerneltop: update event_types

Go along with the new perf_event_type ABI.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171024.133985461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index c0ca015..430810d 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -1263,8 +1263,8 @@ static void mmap_read(struct mmap_data *md)
 		old += size;
 
 		switch (event->header.type) {
-		case PERF_EVENT_IP:
-		case PERF_EVENT_IP | __PERF_EVENT_TID:
+		case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP:
+		case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID:
 			process_event(event->ip.ip, md->counter);
 			break;
 
-- 
cgit v0.10.2


From 394ee07623cf556c8daae2b3c00cf5fea47f0811 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:14 +0200
Subject: perf_counter: provide generic callchain bits

Provide the generic callchain support bits. If hw_event->callchain is
set the arch specific perf_callchain() function is called upon to
provide a perf_callchain_entry structure filled with the current
callchain.

If it does so, it is added to the overflow output event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171024.254266860@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index edf5bfb..43083af 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -140,8 +140,9 @@ struct perf_counter_hw_event {
 				include_tid    :  1, /* include the tid       */
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
+				callchain      :  1, /* add callchain data    */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -219,6 +220,7 @@ enum perf_event_type {
 	PERF_EVENT_OVERFLOW	= 1UL << 31,
 	__PERF_EVENT_IP		= 1UL << 30,
 	__PERF_EVENT_TID	= 1UL << 29,
+	__PERF_EVENT_CALLCHAIN  = 1UL << 28,
 };
 
 #ifdef __KERNEL__
@@ -504,6 +506,15 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
+#define MAX_STACK_DEPTH		255
+
+struct perf_callchain_entry {
+	u64	nr;
+	u64	ip[MAX_STACK_DEPTH];
+};
+
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d93e9dd..860cdc2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1654,6 +1654,17 @@ void perf_counter_do_pending(void)
 }
 
 /*
+ * Callchain support -- arch specific
+ */
+
+struct perf_callchain_entry *
+__attribute__((weak))
+perf_callchain(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+/*
  * Output
  */
 
@@ -1764,6 +1775,8 @@ static void perf_output_simple(struct perf_counter *counter,
 	struct {
 		u32 pid, tid;
 	} tid_entry;
+	struct perf_callchain_entry *callchain = NULL;
+	int callchain_size = 0;
 
 	header.type = PERF_EVENT_OVERFLOW;
 	header.size = sizeof(header);
@@ -1781,6 +1794,17 @@ static void perf_output_simple(struct perf_counter *counter,
 		header.size += sizeof(tid_entry);
 	}
 
+	if (counter->hw_event.callchain) {
+		callchain = perf_callchain(regs);
+
+		if (callchain) {
+			callchain_size = (1 + callchain->nr) * sizeof(u64);
+
+			header.type |= __PERF_EVENT_CALLCHAIN;
+			header.size += callchain_size;
+		}
+	}
+
 	ret = perf_output_begin(&handle, counter, header.size, nmi);
 	if (ret)
 		return;
@@ -1791,6 +1815,9 @@ static void perf_output_simple(struct perf_counter *counter,
 	if (counter->hw_event.include_tid)
 		perf_output_put(&handle, tid_entry);
 
+	if (callchain)
+		perf_output_copy(&handle, callchain, callchain_size);
+
 	perf_output_end(&handle);
 }
 
-- 
cgit v0.10.2


From d7d59fb323833682b117b528d77eeb8ef587036a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:15 +0200
Subject: perf_counter: x86: callchain support

Provide the x86 perf_callchain() implementation.

Code based on the ftrace/sysprof code from Soeren Sandmann Pedersen.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Cc: Soeren Sandmann Pedersen <sandmann@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Orig-LKML-Reference: <20090330171024.341993293@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b8885cc..e16dfaf 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -16,8 +16,10 @@
 #include <linux/module.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <linux/uaccess.h>
 
 #include <asm/apic.h>
+#include <asm/stacktrace.h>
 
 static bool perf_counters_initialized __read_mostly;
 
@@ -958,3 +960,155 @@ hw_perf_counter_init(struct perf_counter *counter)
 
 	return &x86_perf_counter_ops;
 }
+
+/*
+ * callchain support
+ */
+
+static inline
+void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
+{
+	if (entry->nr < MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
+
+
+static void
+backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	/* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+	/* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+	/* Don't bother with IRQ stacks for now */
+	return -1;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+	struct perf_callchain_entry *entry = data;
+
+	if (reliable)
+		callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+	.warning		= backtrace_warning,
+	.warning_symbol		= backtrace_warning_symbol,
+	.stack			= backtrace_stack,
+	.address		= backtrace_address,
+};
+
+static void
+perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	unsigned long bp;
+	char *stack;
+
+	callchain_store(entry, instruction_pointer(regs));
+
+	stack = ((char *)regs + sizeof(struct pt_regs));
+#ifdef CONFIG_FRAME_POINTER
+	bp = frame_pointer(regs);
+#else
+	bp = 0;
+#endif
+
+	dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+}
+
+
+struct stack_frame {
+	const void __user	*next_fp;
+	unsigned long		return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+		return 0;
+
+	ret = 1;
+	pagefault_disable();
+	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+		ret = 0;
+	pagefault_enable();
+
+	return ret;
+}
+
+static void
+perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	struct stack_frame frame;
+	const void __user *fp;
+
+	regs = (struct pt_regs *)current->thread.sp0 - 1;
+	fp   = (void __user *)regs->bp;
+
+	callchain_store(entry, regs->ip);
+
+	while (entry->nr < MAX_STACK_DEPTH) {
+		frame.next_fp	     = NULL;
+		frame.return_address = 0;
+
+		if (!copy_stack_frame(fp, &frame))
+			break;
+
+		if ((unsigned long)fp < user_stack_pointer(regs))
+			break;
+
+		callchain_store(entry, frame.return_address);
+		fp = frame.next_fp;
+	}
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	int is_user;
+
+	if (!regs)
+		return;
+
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	if (!is_user)
+		perf_callchain_kernel(regs, entry);
+
+	if (current->mm)
+		perf_callchain_user(regs, entry);
+}
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry;
+
+	if (in_nmi())
+		entry = &__get_cpu_var(nmi_entry);
+	else
+		entry = &__get_cpu_var(irq_entry);
+
+	entry->nr = 0;
+
+	perf_do_callchain(regs, entry);
+
+	return entry;
+}
-- 
cgit v0.10.2


From 4e935e47177c3b26cf383e79849bae2a464d0160 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:16 +0200
Subject: perf_counter: pmc arbitration

Follow the example set by powerpc and try to play nice with oprofile
and the nmi watchdog.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171024.459968444@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index e16dfaf..2a946a1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -20,6 +20,7 @@
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
+#include <asm/nmi.h>
 
 static bool perf_counters_initialized __read_mostly;
 
@@ -172,6 +173,65 @@ again:
 	atomic64_sub(delta, &hwc->period_left);
 }
 
+static atomic_t num_counters;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+static bool reserve_pmc_hardware(void)
+{
+	int i;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		disable_lapic_nmi_watchdog();
+
+	for (i = 0; i < nr_counters_generic; i++) {
+		if (!reserve_perfctr_nmi(pmc_ops->perfctr + i))
+			goto perfctr_fail;
+	}
+
+	for (i = 0; i < nr_counters_generic; i++) {
+		if (!reserve_evntsel_nmi(pmc_ops->eventsel + i))
+			goto eventsel_fail;
+	}
+
+	return true;
+
+eventsel_fail:
+	for (i--; i >= 0; i--)
+		release_evntsel_nmi(pmc_ops->eventsel + i);
+
+	i = nr_counters_generic;
+
+perfctr_fail:
+	for (i--; i >= 0; i--)
+		release_perfctr_nmi(pmc_ops->perfctr + i);
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+
+	return false;
+}
+
+static void release_pmc_hardware(void)
+{
+	int i;
+
+	for (i = 0; i < nr_counters_generic; i++) {
+		release_perfctr_nmi(pmc_ops->perfctr + i);
+		release_evntsel_nmi(pmc_ops->eventsel + i);
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+}
+
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+	if (atomic_dec_and_mutex_lock(&num_counters, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
@@ -179,10 +239,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 {
 	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	struct hw_perf_counter *hwc = &counter->hw;
+	int err;
 
 	if (unlikely(!perf_counters_initialized))
 		return -EINVAL;
 
+	err = 0;
+	if (atomic_inc_not_zero(&num_counters)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_counters) == 0 && !reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_counters);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	if (err)
+		return err;
+
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
@@ -230,6 +303,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
 	}
 
+	counter->destroy = hw_perf_counter_destroy;
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 9dd499889bdb12ac0e412ccdd718fe0d348258f2 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 27 Mar 2009 12:13:43 +0100
Subject: perf_counter tools: kerneltop: add real-time data acquisition thread

Decouple kerneltop display from event acquisition by introducing
a separate data acquisition thread. This fixes annnoying kerneltop
display refresh jitter and missed events.

Also add a -r <prio> option, to switch the data acquisition thread
to real-time priority.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 430810d..33b4fcf 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -77,6 +77,8 @@
 #include <errno.h>
 #include <ctype.h>
 #include <time.h>
+#include <sched.h>
+#include <pthread.h>
 
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
@@ -181,6 +183,7 @@ static int			tid				= -1;
 static int			profile_cpu			= -1;
 static int			nr_cpus				=  0;
 static int			nmi				=  1;
+static unsigned int		realtime_prio			=  0;
 static int			group				=  0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
@@ -334,6 +337,7 @@ static void display_help(void)
 	" -l                           # show scale factor for RR events\n"
 	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
 	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
+	" -r prio   --realtime=<prio>  # event acquisition runs with SCHED_FIFO policy\n"
 	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
 	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
 	" -z        --zero             # zero counts after display\n"
@@ -620,7 +624,6 @@ static int compare(const void *__sym1, const void *__sym2)
 	return sym_weight(sym1) < sym_weight(sym2);
 }
 
-static time_t			last_refresh;
 static long			events;
 static long			userspace_events;
 static const char		CONSOLE_CLEAR[] = "[H[2J";
@@ -634,6 +637,7 @@ static void print_sym_table(void)
 	float events_per_sec = events/delay_secs;
 	float kevents_per_sec = (events-userspace_events)/delay_secs;
 
+	events = userspace_events = 0;
 	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
 	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
 
@@ -714,8 +718,6 @@ static void print_sym_table(void)
 	if (sym_filter_entry)
 		show_details(sym_filter_entry);
 
-	last_refresh = time(NULL);
-
 	{
 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
 
@@ -726,6 +728,16 @@ static void print_sym_table(void)
 	}
 }
 
+static void *display_thread(void *arg)
+{
+	printf("KernelTop refresh period: %d seconds\n", delay_secs);
+
+	while (!sleep(delay_secs))
+		print_sym_table();
+
+	return NULL;
+}
+
 static int read_symbol(FILE *in, struct sym_entry *s)
 {
 	static int filter_match = 0;
@@ -1081,19 +1093,20 @@ static void process_options(int argc, char *argv[])
 			{"filter",	required_argument,	NULL, 'f'},
 			{"group",	required_argument,	NULL, 'g'},
 			{"help",	no_argument,		NULL, 'h'},
-			{"scale",	no_argument,		NULL, 'l'},
 			{"nmi",		required_argument,	NULL, 'n'},
+			{"mmap_info",	no_argument,		NULL, 'M'},
+			{"mmap_pages",	required_argument,	NULL, 'm'},
+			{"munmap_info",	no_argument,		NULL, 'U'},
 			{"pid",		required_argument,	NULL, 'p'},
-			{"vmlinux",	required_argument,	NULL, 'x'},
+			{"realtime",	required_argument,	NULL, 'r'},
+			{"scale",	no_argument,		NULL, 'l'},
 			{"symbol",	required_argument,	NULL, 's'},
 			{"stat",	no_argument,		NULL, 'S'},
+			{"vmlinux",	required_argument,	NULL, 'x'},
 			{"zero",	no_argument,		NULL, 'z'},
-			{"mmap_pages",	required_argument,	NULL, 'm'},
-			{"mmap_info",	no_argument,		NULL, 'M'},
-			{"munmap_info",	no_argument,		NULL, 'U'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1127,6 +1140,7 @@ static void process_options(int argc, char *argv[])
 				profile_cpu = -1;
 			}
 			tid				=   atoi(optarg); break;
+		case 'r': realtime_prio			=   atoi(optarg); break;
 		case 's': sym_filter			= strdup(optarg); break;
 		case 'S': run_perfstat			=	       1; break;
 		case 'x': vmlinux			= strdup(optarg); break;
@@ -1289,6 +1303,7 @@ int main(int argc, char *argv[])
 	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
 	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 	struct perf_counter_hw_event hw_event;
+	pthread_t thread;
 	int i, counter, group_fd, nr_poll = 0;
 	unsigned int cpu;
 	int ret;
@@ -1363,8 +1378,20 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("KernelTop refresh period: %d seconds\n", delay_secs);
-	last_refresh = time(NULL);
+	if (pthread_create(&thread, NULL, display_thread, NULL)) {
+		printf("Could not create display thread.\n");
+		exit(-1);
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
 
 	while (1) {
 		int hits = events;
@@ -1374,14 +1401,8 @@ int main(int argc, char *argv[])
 				mmap_read(&mmap_array[i][counter]);
 		}
 
-		if (time(NULL) >= last_refresh + delay_secs) {
-			print_sym_table();
-			events = userspace_events = 0;
-		}
-
 		if (hits == events)
-			ret = poll(event_array, nr_poll, 1000);
-		hits = events;
+			ret = poll(event_array, nr_poll, 100);
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 8a057d84912f36e53f970c4d177cb4bb6b2f9e08 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:11:59 +0200
Subject: perf_counter: move the event overflow output bits to record_type

Per suggestion from Paul, move the event overflow bits to record_type
and sanitize the enums a bit.

Breaks the ABI -- again ;-)

Suggested-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.151921176@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 43083af..06a6fba 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -73,15 +73,6 @@ enum sw_event_ids {
 	PERF_SW_EVENTS_MAX		= 7,
 };
 
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		= 0,
-	PERF_RECORD_IRQ			= 1,
-	PERF_RECORD_GROUP		= 2,
-};
-
 #define __PERF_COUNTER_MASK(name) 			\
 	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
 	 PERF_COUNTER_##name##_SHIFT)
@@ -103,6 +94,17 @@ enum perf_counter_record_type {
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
 /*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_record_format {
+	PERF_RECORD_IP		= 1U << 0,
+	PERF_RECORD_TID		= 1U << 1,
+	PERF_RECORD_GROUP	= 1U << 2,
+	PERF_RECORD_CALLCHAIN	= 1U << 3,
+};
+
+/*
  * Bits that can be set in hw_event.read_format to request that
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
@@ -125,8 +127,8 @@ struct perf_counter_hw_event {
 	__u64			config;
 
 	__u64			irq_period;
-	__u64			record_type;
-	__u64			read_format;
+	__u32			record_type;
+	__u32			read_format;
 
 	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
@@ -137,12 +139,10 @@ struct perf_counter_hw_event {
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
-				include_tid    :  1, /* include the tid       */
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
-				callchain      :  1, /* add callchain data    */
 
-				__reserved_1   : 51;
+				__reserved_1   : 53;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -212,15 +212,21 @@ struct perf_event_header {
 
 enum perf_event_type {
 
-	PERF_EVENT_GROUP	= 1,
-
-	PERF_EVENT_MMAP		= 2,
-	PERF_EVENT_MUNMAP	= 3,
+	PERF_EVENT_MMAP			= 1,
+	PERF_EVENT_MUNMAP		= 2,
 
-	PERF_EVENT_OVERFLOW	= 1UL << 31,
-	__PERF_EVENT_IP		= 1UL << 30,
-	__PERF_EVENT_TID	= 1UL << 29,
-	__PERF_EVENT_CALLCHAIN  = 1UL << 28,
+	/*
+	 * Half the event type space is reserved for the counter overflow
+	 * bitfields, as found in hw_event.record_type.
+	 *
+	 * These events will have types of the form:
+	 *   PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } *
+	 */
+	PERF_EVENT_COUNTER_OVERFLOW	= 1UL << 31,
+	__PERF_EVENT_IP			= PERF_RECORD_IP,
+	__PERF_EVENT_TID		= PERF_RECORD_TID,
+	__PERF_EVENT_GROUP		= PERF_RECORD_GROUP,
+	__PERF_EVENT_CALLCHAIN		= PERF_RECORD_CALLCHAIN,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 860cdc2..995063d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-static void perf_output_simple(struct perf_counter *counter,
-			       int nmi, struct pt_regs *regs)
+void perf_counter_output(struct perf_counter *counter,
+			 int nmi, struct pt_regs *regs)
 {
 	int ret;
+	u64 record_type = counter->hw_event.record_type;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	u64 ip;
 	struct {
 		u32 pid, tid;
 	} tid_entry;
+	struct {
+		u64 event;
+		u64 counter;
+	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
 
-	header.type = PERF_EVENT_OVERFLOW;
+	header.type = PERF_EVENT_COUNTER_OVERFLOW;
 	header.size = sizeof(header);
 
-	ip = instruction_pointer(regs);
-	header.type |= __PERF_EVENT_IP;
-	header.size += sizeof(ip);
+	if (record_type & PERF_RECORD_IP) {
+		ip = instruction_pointer(regs);
+		header.type |= __PERF_EVENT_IP;
+		header.size += sizeof(ip);
+	}
 
-	if (counter->hw_event.include_tid) {
+	if (record_type & PERF_RECORD_TID) {
 		/* namespace issues */
 		tid_entry.pid = current->group_leader->pid;
 		tid_entry.tid = current->pid;
@@ -1794,7 +1801,13 @@ static void perf_output_simple(struct perf_counter *counter,
 		header.size += sizeof(tid_entry);
 	}
 
-	if (counter->hw_event.callchain) {
+	if (record_type & PERF_RECORD_GROUP) {
+		header.type |= __PERF_EVENT_GROUP;
+		header.size += sizeof(u64) +
+			counter->nr_siblings * sizeof(group_entry);
+	}
+
+	if (record_type & PERF_RECORD_CALLCHAIN) {
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
@@ -1810,69 +1823,35 @@ static void perf_output_simple(struct perf_counter *counter,
 		return;
 
 	perf_output_put(&handle, header);
-	perf_output_put(&handle, ip);
 
-	if (counter->hw_event.include_tid)
-		perf_output_put(&handle, tid_entry);
+	if (record_type & PERF_RECORD_IP)
+		perf_output_put(&handle, ip);
 
-	if (callchain)
-		perf_output_copy(&handle, callchain, callchain_size);
-
-	perf_output_end(&handle);
-}
-
-static void perf_output_group(struct perf_counter *counter, int nmi)
-{
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_counter *leader, *sub;
-	unsigned int size;
-	struct {
-		u64 event;
-		u64 counter;
-	} entry;
-	int ret;
-
-	size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+	if (record_type & PERF_RECORD_TID)
+		perf_output_put(&handle, tid_entry);
 
-	ret = perf_output_begin(&handle, counter, size, nmi);
-	if (ret)
-		return;
+	if (record_type & PERF_RECORD_GROUP) {
+		struct perf_counter *leader, *sub;
+		u64 nr = counter->nr_siblings;
 
-	header.type = PERF_EVENT_GROUP;
-	header.size = size;
+		perf_output_put(&handle, nr);
 
-	perf_output_put(&handle, header);
+		leader = counter->group_leader;
+		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+			if (sub != counter)
+				sub->hw_ops->read(sub);
 
-	leader = counter->group_leader;
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-		if (sub != counter)
-			sub->hw_ops->read(sub);
+			group_entry.event = sub->hw_event.config;
+			group_entry.counter = atomic64_read(&sub->count);
 
-		entry.event = sub->hw_event.config;
-		entry.counter = atomic64_read(&sub->count);
-
-		perf_output_put(&handle, entry);
+			perf_output_put(&handle, group_entry);
+		}
 	}
 
-	perf_output_end(&handle);
-}
-
-void perf_counter_output(struct perf_counter *counter,
-			 int nmi, struct pt_regs *regs)
-{
-	switch (counter->hw_event.record_type) {
-	case PERF_RECORD_SIMPLE:
-		return;
-
-	case PERF_RECORD_IRQ:
-		perf_output_simple(counter, nmi, regs);
-		break;
+	if (callchain)
+		perf_output_copy(&handle, callchain, callchain_size);
 
-	case PERF_RECORD_GROUP:
-		perf_output_group(counter, nmi);
-		break;
-	}
+	perf_output_end(&handle);
 }
 
 /*
-- 
cgit v0.10.2


From c457810ab4a825161aec6ef71b581e1bc8febd1a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:01 +0200
Subject: perf_counter: per event wakeups

By request, provide a way to request a wakeup every 'n' events instead
of every page of output.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.323309784@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 06a6fba..5428ba1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -145,7 +145,7 @@ struct perf_counter_hw_event {
 				__reserved_1   : 53;
 
 	__u32			extra_config_len;
-	__u32			__reserved_4;
+	__u32			wakeup_events;	/* wakeup every n events */
 
 	__u64			__reserved_2;
 	__u64			__reserved_3;
@@ -321,6 +321,7 @@ struct perf_mmap_data {
 	int				nr_pages;
 	atomic_t			wakeup;
 	atomic_t			head;
+	atomic_t			events;
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 995063d..9bcab10 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1760,7 +1760,15 @@ static void perf_output_copy(struct perf_output_handle *handle,
 
 static void perf_output_end(struct perf_output_handle *handle)
 {
-	if (handle->wakeup)
+	int wakeup_events = handle->counter->hw_event.wakeup_events;
+
+	if (wakeup_events) {
+		int events = atomic_inc_return(&handle->data->events);
+		if (events >= wakeup_events) {
+			atomic_sub(wakeup_events, &handle->data->events);
+			__perf_output_wakeup(handle);
+		}
+	} else if (handle->wakeup)
 		__perf_output_wakeup(handle);
 	rcu_read_unlock();
 }
-- 
cgit v0.10.2


From 3df70fd623bb109e0079e697c0276d220a4b7908 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:02 +0200
Subject: perf_counter: kerneltop: update to new ABI

Update to reflect the new record_type ABI changes.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.407283141@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 33b4fcf..4f8d791 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -442,7 +442,7 @@ static void create_perfstat_counter(int counter)
 
 	memset(&hw_event, 0, sizeof(hw_event));
 	hw_event.config		= event_id[counter];
-	hw_event.record_type	= PERF_RECORD_SIMPLE;
+	hw_event.record_type	= 0;
 	hw_event.nmi		= 0;
 	if (scale)
 		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -1277,8 +1277,8 @@ static void mmap_read(struct mmap_data *md)
 		old += size;
 
 		switch (event->header.type) {
-		case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP:
-		case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID:
+		case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP:
+		case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID:
 			process_event(event->ip.ip, md->counter);
 			break;
 
@@ -1337,9 +1337,8 @@ int main(int argc, char *argv[])
 			memset(&hw_event, 0, sizeof(hw_event));
 			hw_event.config		= event_id[counter];
 			hw_event.irq_period	= event_count[counter];
-			hw_event.record_type	= PERF_RECORD_IRQ;
+			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
 			hw_event.nmi		= nmi;
-			hw_event.include_tid	= 1;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
 
-- 
cgit v0.10.2


From 5872bdb88a35fae7d224bd6b21e5f377e854ccfc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:03 +0200
Subject: perf_counter: add more context information

Put in counts to tell which ips belong to what context.

  -----
   | |  hv
   | --
nr | |  kernel
   | --
   | |  user
  -----

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.493101305@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2a946a1..c74e20d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1088,6 +1088,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
 	unsigned long bp;
 	char *stack;
+	int nr = entry->nr;
 
 	callchain_store(entry, instruction_pointer(regs));
 
@@ -1099,6 +1100,8 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 #endif
 
 	dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+
+	entry->kernel = entry->nr - nr;
 }
 
 
@@ -1128,6 +1131,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
 	struct stack_frame frame;
 	const void __user *fp;
+	int nr = entry->nr;
 
 	regs = (struct pt_regs *)current->thread.sp0 - 1;
 	fp   = (void __user *)regs->bp;
@@ -1147,6 +1151,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		callchain_store(entry, frame.return_address);
 		fp = frame.next_fp;
 	}
+
+	entry->user = entry->nr - nr;
 }
 
 static void
@@ -1182,6 +1188,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 		entry = &__get_cpu_var(irq_entry);
 
 	entry->nr = 0;
+	entry->hv = 0;
+	entry->kernel = 0;
+	entry->user = 0;
 
 	perf_do_callchain(regs, entry);
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5428ba1..90cce0c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -513,10 +513,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
-#define MAX_STACK_DEPTH		255
+#define MAX_STACK_DEPTH		254
 
 struct perf_callchain_entry {
-	u64	nr;
+	u32	nr, hv, kernel, user;
 	u64	ip[MAX_STACK_DEPTH];
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 9bcab10..f105a6e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1819,7 +1819,7 @@ void perf_counter_output(struct perf_counter *counter,
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
-			callchain_size = (1 + callchain->nr) * sizeof(u64);
+			callchain_size = (2 + callchain->nr) * sizeof(u64);
 
 			header.type |= __PERF_EVENT_CALLCHAIN;
 			header.size += callchain_size;
-- 
cgit v0.10.2


From 92f22a3865abe87eea2609a6f8e5be5123f7ce4f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:04 +0200
Subject: perf_counter: update mmap() counter read

Paul noted that we don't need SMP barriers for the mmap() counter read
because its always on the same cpu (otherwise you can't access the hw
counter anyway).

So remove the SMP barriers and replace them with regular compiler
barriers.

Further, update the comment to include a race free method of reading
said hardware counter. The primary change is putting the pmc_read
inside the seq-loop, otherwise we can still race and read rubbish.

Noticed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.577951445@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 90cce0c..f2b914d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -167,30 +167,28 @@ struct perf_counter_mmap_page {
 	/*
 	 * Bits needed to read the hw counters in user-space.
 	 *
-	 * The index and offset should be read atomically using the seqlock:
-	 *
-	 *   __u32 seq, index;
-	 *   __s64 offset;
+	 *   u32 seq;
+	 *   s64 count;
 	 *
 	 * again:
-	 *   rmb();
 	 *   seq = pc->lock;
-	 *
 	 *   if (unlikely(seq & 1)) {
 	 *     cpu_relax();
 	 *     goto again;
 	 *   }
 	 *
-	 *   index = pc->index;
-	 *   offset = pc->offset;
+	 *   if (pc->index) {
+	 *     count = pmc_read(pc->index - 1);
+	 *     count += pc->offset;
+	 *   } else
+	 *     goto regular_read;
 	 *
-	 *   rmb();
+	 *   barrier();
 	 *   if (pc->lock != seq)
 	 *     goto again;
 	 *
-	 * After this, index contains architecture specific counter index + 1,
-	 * so that 0 means unavailable, offset contains the value to be added
-	 * to the result of the raw timer read to obtain this counter's value.
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
 	 */
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f105a6e..2a5d4f5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter)
 	 */
 	preempt_disable();
 	++userpg->lock;
-	smp_wmb();
+	barrier();
 	userpg->index = counter->hw.idx;
 	userpg->offset = atomic64_read(&counter->count);
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
-	smp_wmb();
+	barrier();
 	++userpg->lock;
 	preempt_enable();
 unlock:
-- 
cgit v0.10.2


From ca5f9524d61f54b1f618293ab92fc6b49cac864d Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:39:33 -0700
Subject: futex: separate futex_wait_queue_me() logic from futex_wait()

Refactor futex_wait() in preparation for futex_wait_requeue_pi(). In
order to reuse a good chunk of the futex_wait() code for the upcoming
futex_wait_requeue_pi() function, this patch breaks out the
queue-to-wakeup section of futex_wait() into futex_wait_queue_me().

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 6b50a02..ebb48d6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1115,24 +1115,87 @@ handle_fault:
 
 static long futex_wait_restart(struct restart_block *restart);
 
+/**
+ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
+ * @hb:		the futex hash bucket, must be locked by the caller
+ * @q:		the futex_q to queue up on
+ * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
+ * @wait:	the wait_queue to add to the futex_q after queueing in the hb
+ */
+static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
+				struct hrtimer_sleeper *timeout,
+				wait_queue_t *wait)
+{
+	queue_me(q, hb);
+
+	/*
+	 * There might have been scheduling since the queue_me(), as we
+	 * cannot hold a spinlock across the get_user() in case it
+	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
+	 * queueing ourselves into the futex hash.  This code thus has to
+	 * rely on the futex_wake() code removing us from hash when it
+	 * wakes us up.
+	 */
+
+	/* add_wait_queue is the barrier after __set_current_state. */
+	__set_current_state(TASK_INTERRUPTIBLE);
+
+	/*
+	 * Add current as the futex_q waiter.  We don't remove ourselves from
+	 * the wait_queue because we are the only user of it.
+	 */
+	add_wait_queue(&q->waiter, wait);
+
+	/* Arm the timer */
+	if (timeout) {
+		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+		if (!hrtimer_active(&timeout->timer))
+			timeout->task = NULL;
+	}
+
+	/*
+	 * !plist_node_empty() is safe here without any lock.
+	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+	 */
+	if (likely(!plist_node_empty(&q->list))) {
+		/*
+		 * If the timer has already expired, current will already be
+		 * flagged for rescheduling. Only call schedule if there
+		 * is no timeout, or if it has yet to expire.
+		 */
+		if (!timeout || timeout->task)
+			schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
 static int futex_wait(u32 __user *uaddr, int fshared,
 		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
 {
-	struct task_struct *curr = current;
+	struct hrtimer_sleeper timeout, *to = NULL;
+	DECLARE_WAITQUEUE(wait, current);
 	struct restart_block *restart;
-	DECLARE_WAITQUEUE(wait, curr);
 	struct futex_hash_bucket *hb;
 	struct futex_q q;
 	u32 uval;
 	int ret;
-	struct hrtimer_sleeper t;
-	int rem = 0;
 
 	if (!bitset)
 		return -EINVAL;
 
 	q.pi_state = NULL;
 	q.bitset = bitset;
+
+	if (abs_time) {
+		to = &timeout;
+
+		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper(to, current);
+		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+					     current->timer_slack_ns);
+	}
+
 retry:
 	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
@@ -1178,75 +1241,22 @@ retry_private:
 		goto retry;
 	}
 	ret = -EWOULDBLOCK;
+
+	/* Only actually queue if *uaddr contained val.  */
 	if (unlikely(uval != val)) {
 		queue_unlock(&q, hb);
 		goto out_put_key;
 	}
 
-	/* Only actually queue if *uaddr contained val.  */
-	queue_me(&q, hb);
-
-	/*
-	 * There might have been scheduling since the queue_me(), as we
-	 * cannot hold a spinlock across the get_user() in case it
-	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
-	 * queueing ourselves into the futex hash.  This code thus has to
-	 * rely on the futex_wake() code removing us from hash when it
-	 * wakes us up.
-	 */
-
-	/* add_wait_queue is the barrier after __set_current_state. */
-	__set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&q.waiter, &wait);
-	/*
-	 * !plist_node_empty() is safe here without any lock.
-	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
-	 */
-	if (likely(!plist_node_empty(&q.list))) {
-		if (!abs_time)
-			schedule();
-		else {
-			hrtimer_init_on_stack(&t.timer,
-					      clockrt ? CLOCK_REALTIME :
-					      CLOCK_MONOTONIC,
-					      HRTIMER_MODE_ABS);
-			hrtimer_init_sleeper(&t, current);
-			hrtimer_set_expires_range_ns(&t.timer, *abs_time,
-						     current->timer_slack_ns);
-
-			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
-			if (!hrtimer_active(&t.timer))
-				t.task = NULL;
-
-			/*
-			 * the timer could have already expired, in which
-			 * case current would be flagged for rescheduling.
-			 * Don't bother calling schedule.
-			 */
-			if (likely(t.task))
-				schedule();
-
-			hrtimer_cancel(&t.timer);
-
-			/* Flag if a timeout occured */
-			rem = (t.task == NULL);
-
-			destroy_hrtimer_on_stack(&t.timer);
-		}
-	}
-	__set_current_state(TASK_RUNNING);
-
-	/*
-	 * NOTE: we don't remove ourselves from the waitqueue because
-	 * we are the only user of it.
-	 */
+	/* queue_me and wait for wakeup, timeout, or a signal. */
+	futex_wait_queue_me(hb, &q, to, &wait);
 
 	/* If we were woken (and unqueued), we succeeded, whatever. */
 	ret = 0;
 	if (!unqueue_me(&q))
 		goto out_put_key;
 	ret = -ETIMEDOUT;
-	if (rem)
+	if (to && !to->task)
 		goto out_put_key;
 
 	/*
@@ -1275,6 +1285,10 @@ retry_private:
 out_put_key:
 	put_futex_key(fshared, &q.key);
 out:
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
 	return ret;
 }
 
-- 
cgit v0.10.2


From 4b1c486b3587d2abf50bee4a05eb488cd4045f2c Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:39:42 -0700
Subject: futex: add helper to find the top prio waiter of a futex

Improve legibility by wrapping finding the top waiter in a function.
This will be used by the follow-on patches for enabling requeue pi.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index ebb48d6..421fb5e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -276,6 +276,25 @@ void put_futex_key(int fshared, union futex_key *key)
 	drop_futex_key_refs(key);
 }
 
+/**
+ * futex_top_waiter() - Return the highest priority waiter on a futex
+ * @hb:     the hash bucket the futex_q's reside in
+ * @key:    the futex key (to distinguish it from other futex futex_q's)
+ *
+ * Must be called with the hb lock held.
+ */
+static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
+					union futex_key *key)
+{
+	struct futex_q *this;
+
+	plist_for_each_entry(this, &hb->chain, list) {
+		if (match_futex(&this->key, key))
+			return this;
+	}
+	return NULL;
+}
+
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
 {
 	u32 curval;
-- 
cgit v0.10.2


From 1a52084d0919c2799258737c21fb328a9de159b5 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:39:52 -0700
Subject: futex: split out atomic logic from futex_lock_pi()

Refactor the atomic portion of futex_lock_pi() into futex_lock_pi_atomic().

This logic will be needed by requeue_pi, so modularize it to reduce
code duplication.  The only significant change is passing of the task
to try and take the lock for.  This simplifies the -EDEADLK test as if
the lock is owned by task t, it's a deadlock, regardless of if we are
doing requeue pi or not.  This patch updates the corresponding comment
accordingly.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 421fb5e..986b16e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -556,6 +556,127 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 	return 0;
 }
 
+/**
+ * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex
+ * @uaddr:	the pi futex user address
+ * @hb:		the pi futex hash bucket
+ * @key:	the futex key associated with uaddr and hb
+ * @ps:		the pi_state pointer where we store the result of the lookup
+ * @task:	the task to perform the atomic lock work for.  This will be
+ * 		"current" except in the case of requeue pi.
+ *
+ * Returns:
+ *  0 - ready to wait
+ *  1 - acquired the lock
+ * <0 - error
+ *
+ * The hb->lock and futex_key refs shall be held by the caller.
+ */
+static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+				union futex_key *key,
+				struct futex_pi_state **ps,
+				struct task_struct *task)
+{
+	int lock_taken, ret, ownerdied = 0;
+	u32 uval, newval, curval;
+
+retry:
+	ret = lock_taken = 0;
+
+	/*
+	 * To avoid races, we attempt to take the lock here again
+	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
+	 * the locks. It will most likely not succeed.
+	 */
+	newval = task_pid_vnr(task);
+
+	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
+
+	if (unlikely(curval == -EFAULT))
+		return -EFAULT;
+
+	/*
+	 * Detect deadlocks.
+	 */
+	if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
+		return -EDEADLK;
+
+	/*
+	 * Surprise - we got the lock. Just return to userspace:
+	 */
+	if (unlikely(!curval))
+		return 1;
+
+	uval = curval;
+
+	/*
+	 * Set the FUTEX_WAITERS flag, so the owner will know it has someone
+	 * to wake at the next unlock.
+	 */
+	newval = curval | FUTEX_WAITERS;
+
+	/*
+	 * There are two cases, where a futex might have no owner (the
+	 * owner TID is 0): OWNER_DIED. We take over the futex in this
+	 * case. We also do an unconditional take over, when the owner
+	 * of the futex died.
+	 *
+	 * This is safe as we are protected by the hash bucket lock !
+	 */
+	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
+		/* Keep the OWNER_DIED bit */
+		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
+		ownerdied = 0;
+		lock_taken = 1;
+	}
+
+	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+	if (unlikely(curval == -EFAULT))
+		return -EFAULT;
+	if (unlikely(curval != uval))
+		goto retry;
+
+	/*
+	 * We took the lock due to owner died take over.
+	 */
+	if (unlikely(lock_taken))
+		return 1;
+
+	/*
+	 * We dont have the lock. Look up the PI state (or create it if
+	 * we are the first waiter):
+	 */
+	ret = lookup_pi_state(uval, hb, key, ps);
+
+	if (unlikely(ret)) {
+		switch (ret) {
+		case -ESRCH:
+			/*
+			 * No owner found for this futex. Check if the
+			 * OWNER_DIED bit is set to figure out whether
+			 * this is a robust futex or not.
+			 */
+			if (get_futex_value_locked(&curval, uaddr))
+				return -EFAULT;
+
+			/*
+			 * We simply start over in case of a robust
+			 * futex. The code above will take the futex
+			 * and return happy.
+			 */
+			if (curval & FUTEX_OWNER_DIED) {
+				ownerdied = 1;
+				goto retry;
+			}
+		default:
+			break;
+		}
+	}
+
+	return ret;
+}
+
 /*
  * The hash bucket lock must be held when this is called.
  * Afterwards, the futex_q must not be accessed.
@@ -1340,9 +1461,9 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
-	u32 uval, newval, curval;
+	u32 uval;
 	struct futex_q q;
-	int ret, lock_taken, ownerdied = 0;
+	int ret;
 
 	if (refill_pi_state_cache())
 		return -ENOMEM;
@@ -1365,81 +1486,15 @@ retry:
 retry_private:
 	hb = queue_lock(&q);
 
-retry_locked:
-	ret = lock_taken = 0;
-
-	/*
-	 * To avoid races, we attempt to take the lock here again
-	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
-	 * the locks. It will most likely not succeed.
-	 */
-	newval = task_pid_vnr(current);
-
-	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
-
-	if (unlikely(curval == -EFAULT))
-		goto uaddr_faulted;
-
-	/*
-	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
-	 * situation and we return success to user space.
-	 */
-	if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
-		ret = -EDEADLK;
-		goto out_unlock_put_key;
-	}
-
-	/*
-	 * Surprise - we got the lock. Just return to userspace:
-	 */
-	if (unlikely(!curval))
-		goto out_unlock_put_key;
-
-	uval = curval;
-
-	/*
-	 * Set the WAITERS flag, so the owner will know it has someone
-	 * to wake at next unlock
-	 */
-	newval = curval | FUTEX_WAITERS;
-
-	/*
-	 * There are two cases, where a futex might have no owner (the
-	 * owner TID is 0): OWNER_DIED. We take over the futex in this
-	 * case. We also do an unconditional take over, when the owner
-	 * of the futex died.
-	 *
-	 * This is safe as we are protected by the hash bucket lock !
-	 */
-	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
-		/* Keep the OWNER_DIED bit */
-		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
-		ownerdied = 0;
-		lock_taken = 1;
-	}
-
-	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
-	if (unlikely(curval == -EFAULT))
-		goto uaddr_faulted;
-	if (unlikely(curval != uval))
-		goto retry_locked;
-
-	/*
-	 * We took the lock due to owner died take over.
-	 */
-	if (unlikely(lock_taken))
-		goto out_unlock_put_key;
-
-	/*
-	 * We dont have the lock. Look up the PI state (or create it if
-	 * we are the first waiter):
-	 */
-	ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
-
+	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current);
 	if (unlikely(ret)) {
 		switch (ret) {
-
+		case 1:
+			/* We got the lock. */
+			ret = 0;
+			goto out_unlock_put_key;
+		case -EFAULT:
+			goto uaddr_faulted;
 		case -EAGAIN:
 			/*
 			 * Task is exiting and we just wait for the
@@ -1449,25 +1504,6 @@ retry_locked:
 			put_futex_key(fshared, &q.key);
 			cond_resched();
 			goto retry;
-
-		case -ESRCH:
-			/*
-			 * No owner found for this futex. Check if the
-			 * OWNER_DIED bit is set to figure out whether
-			 * this is a robust futex or not.
-			 */
-			if (get_futex_value_locked(&curval, uaddr))
-				goto uaddr_faulted;
-
-			/*
-			 * We simply start over in case of a robust
-			 * futex. The code above will take the futex
-			 * and return happy.
-			 */
-			if (curval & FUTEX_OWNER_DIED) {
-				ownerdied = 1;
-				goto retry_locked;
-			}
 		default:
 			goto out_unlock_put_key;
 		}
-- 
cgit v0.10.2


From dd9739980b50c8cde33e1f8eb08b7e0140bcd61e Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:02 -0700
Subject: futex: split out fixup owner logic from futex_lock_pi()

Refactor the post lock acquisition logic from futex_lock_pi(). This
code will be reused in futex_wait_requeue_pi().

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 986b16e..af831fb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1256,6 +1256,79 @@ handle_fault:
 static long futex_wait_restart(struct restart_block *restart);
 
 /**
+ * fixup_owner() - Post lock pi_state and corner case management
+ * @uaddr:	user address of the futex
+ * @fshared:	whether the futex is shared (1) or not (0)
+ * @q:		futex_q (contains pi_state and access to the rt_mutex)
+ * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
+ *
+ * After attempting to lock an rt_mutex, this function is called to cleanup
+ * the pi_state owner as well as handle race conditions that may allow us to
+ * acquire the lock. Must be called with the hb lock held.
+ *
+ * Returns:
+ *  1 - success, lock taken
+ *  0 - success, lock not taken
+ * <0 - on error (-EFAULT)
+ */
+static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
+		       int locked)
+{
+	struct task_struct *owner;
+	int ret = 0;
+
+	if (locked) {
+		/*
+		 * Got the lock. We might not be the anticipated owner if we
+		 * did a lock-steal - fix up the PI-state in that case:
+		 */
+		if (q->pi_state->owner != current)
+			ret = fixup_pi_state_owner(uaddr, q, current, fshared);
+		goto out;
+	}
+
+	/*
+	 * Catch the rare case, where the lock was released when we were on the
+	 * way back before we locked the hash bucket.
+	 */
+	if (q->pi_state->owner == current) {
+		/*
+		 * Try to get the rt_mutex now. This might fail as some other
+		 * task acquired the rt_mutex after we removed ourself from the
+		 * rt_mutex waiters list.
+		 */
+		if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
+			locked = 1;
+			goto out;
+		}
+
+		/*
+		 * pi_state is incorrect, some other task did a lock steal and
+		 * we returned due to timeout or signal without taking the
+		 * rt_mutex. Too late. We can access the rt_mutex_owner without
+		 * locking, as the other task is now blocked on the hash bucket
+		 * lock. Fix the state up.
+		 */
+		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
+		ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
+		goto out;
+	}
+
+	/*
+	 * Paranoia check. If we did not take the lock, then we should not be
+	 * the owner, nor the pending owner, of the rt_mutex.
+	 */
+	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
+		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
+				"pi-state %p\n", ret,
+				q->pi_state->pi_mutex.owner,
+				q->pi_state->owner);
+
+out:
+	return ret ? ret : locked;
+}
+
+/**
  * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
  * @hb:		the futex hash bucket, must be locked by the caller
  * @q:		the futex_q to queue up on
@@ -1459,11 +1532,10 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 			 int detect, ktime_t *time, int trylock)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
-	struct task_struct *curr = current;
 	struct futex_hash_bucket *hb;
 	u32 uval;
 	struct futex_q q;
-	int ret;
+	int res, ret;
 
 	if (refill_pi_state_cache())
 		return -ENOMEM;
@@ -1527,71 +1599,21 @@ retry_private:
 	}
 
 	spin_lock(q.lock_ptr);
-
-	if (!ret) {
-		/*
-		 * Got the lock. We might not be the anticipated owner
-		 * if we did a lock-steal - fix up the PI-state in
-		 * that case:
-		 */
-		if (q.pi_state->owner != curr)
-			ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
-	} else {
-		/*
-		 * Catch the rare case, where the lock was released
-		 * when we were on the way back before we locked the
-		 * hash bucket.
-		 */
-		if (q.pi_state->owner == curr) {
-			/*
-			 * Try to get the rt_mutex now. This might
-			 * fail as some other task acquired the
-			 * rt_mutex after we removed ourself from the
-			 * rt_mutex waiters list.
-			 */
-			if (rt_mutex_trylock(&q.pi_state->pi_mutex))
-				ret = 0;
-			else {
-				/*
-				 * pi_state is incorrect, some other
-				 * task did a lock steal and we
-				 * returned due to timeout or signal
-				 * without taking the rt_mutex. Too
-				 * late. We can access the
-				 * rt_mutex_owner without locking, as
-				 * the other task is now blocked on
-				 * the hash bucket lock. Fix the state
-				 * up.
-				 */
-				struct task_struct *owner;
-				int res;
-
-				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
-				res = fixup_pi_state_owner(uaddr, &q, owner,
-							   fshared);
-
-				/* propagate -EFAULT, if the fixup failed */
-				if (res)
-					ret = res;
-			}
-		} else {
-			/*
-			 * Paranoia check. If we did not take the lock
-			 * in the trylock above, then we should not be
-			 * the owner of the rtmutex, neither the real
-			 * nor the pending one:
-			 */
-			if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
-				printk(KERN_ERR "futex_lock_pi: ret = %d "
-				       "pi-mutex: %p pi-state %p\n", ret,
-				       q.pi_state->pi_mutex.owner,
-				       q.pi_state->owner);
-		}
-	}
+	/*
+	 * Fixup the pi_state owner and possibly acquire the lock if we
+	 * haven't already.
+	 */
+	res = fixup_owner(uaddr, fshared, &q, !ret);
+	/*
+	 * If fixup_owner() returned an error, proprogate that.  If it acquired
+	 * the lock, clear our -ETIMEDOUT or -EINTR.
+	 */
+	if (res)
+		ret = (res < 0) ? res : 0;
 
 	/*
-	 * If fixup_pi_state_owner() faulted and was unable to handle the
-	 * fault, unlock it and return the fault to userspace.
+	 * If fixup_owner() faulted and was unable to handle the fault, unlock
+	 * it and return the fault to userspace.
 	 */
 	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
 		rt_mutex_unlock(&q.pi_state->pi_mutex);
@@ -1599,9 +1621,7 @@ retry_private:
 	/* Unqueue and drop the lock */
 	unqueue_me_pi(&q);
 
-	if (to)
-		destroy_hrtimer_on_stack(&to->timer);
-	return ret != -EINTR ? ret : -ERESTARTNOINTR;
+	goto out;
 
 out_unlock_put_key:
 	queue_unlock(&q, hb);
@@ -1611,7 +1631,7 @@ out_put_key:
 out:
 	if (to)
 		destroy_hrtimer_on_stack(&to->timer);
-	return ret;
+	return ret != -EINTR ? ret : -ERESTARTNOINTR;
 
 uaddr_faulted:
 	/*
-- 
cgit v0.10.2


From 8dac456a681bd94272ff50ecb31be6b669382c2b Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:12 -0700
Subject: rt_mutex: add proxy lock routines

This patch is a prerequisite for futex requeue_pi. It basically splits
rt_mutex_slowlock() right down the middle, just before the first call
to schedule(). It further adds helper functions which make use of the
split and provide the rt-mutex preliminaries for futex requeue_pi.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 69d9cb9..fec77e7 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  * assigned pending owner [which might not have taken the
  * lock yet]:
  */
-static inline int try_to_steal_lock(struct rt_mutex *lock)
+static inline int try_to_steal_lock(struct rt_mutex *lock,
+				    struct task_struct *task)
 {
 	struct task_struct *pendowner = rt_mutex_owner(lock);
 	struct rt_mutex_waiter *next;
@@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
 	if (!rt_mutex_owner_pending(lock))
 		return 0;
 
-	if (pendowner == current)
+	if (pendowner == task)
 		return 1;
 
 	spin_lock_irqsave(&pendowner->pi_lock, flags);
-	if (current->prio >= pendowner->prio) {
+	if (task->prio >= pendowner->prio) {
 		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
 		return 0;
 	}
@@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
 	 * We are going to steal the lock and a waiter was
 	 * enqueued on the pending owners pi_waiters queue. So
 	 * we have to enqueue this waiter into
-	 * current->pi_waiters list. This covers the case,
-	 * where current is boosted because it holds another
+	 * task->pi_waiters list. This covers the case,
+	 * where task is boosted because it holds another
 	 * lock and gets unboosted because the booster is
 	 * interrupted, so we would delay a waiter with higher
-	 * priority as current->normal_prio.
+	 * priority as task->normal_prio.
 	 *
 	 * Note: in the rare case of a SCHED_OTHER task changing
 	 * its priority and thus stealing the lock, next->task
-	 * might be current:
+	 * might be task:
 	 */
-	if (likely(next->task != current)) {
-		spin_lock_irqsave(&current->pi_lock, flags);
-		plist_add(&next->pi_list_entry, &current->pi_waiters);
-		__rt_mutex_adjust_prio(current);
-		spin_unlock_irqrestore(&current->pi_lock, flags);
+	if (likely(next->task != task)) {
+		spin_lock_irqsave(&task->pi_lock, flags);
+		plist_add(&next->pi_list_entry, &task->pi_waiters);
+		__rt_mutex_adjust_prio(task);
+		spin_unlock_irqrestore(&task->pi_lock, flags);
 	}
 	return 1;
 }
@@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
 	 */
 	mark_rt_mutex_waiters(lock);
 
-	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
+	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
 		return 0;
 
 	/* We got the lock. */
@@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 				   struct rt_mutex_waiter *waiter,
+				   struct task_struct *task,
 				   int detect_deadlock)
 {
 	struct task_struct *owner = rt_mutex_owner(lock);
@@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	unsigned long flags;
 	int chain_walk = 0, res;
 
-	spin_lock_irqsave(&current->pi_lock, flags);
-	__rt_mutex_adjust_prio(current);
-	waiter->task = current;
+	spin_lock_irqsave(&task->pi_lock, flags);
+	__rt_mutex_adjust_prio(task);
+	waiter->task = task;
 	waiter->lock = lock;
-	plist_node_init(&waiter->list_entry, current->prio);
-	plist_node_init(&waiter->pi_list_entry, current->prio);
+	plist_node_init(&waiter->list_entry, task->prio);
+	plist_node_init(&waiter->pi_list_entry, task->prio);
 
 	/* Get the top priority waiter on the lock */
 	if (rt_mutex_has_waiters(lock))
 		top_waiter = rt_mutex_top_waiter(lock);
 	plist_add(&waiter->list_entry, &lock->wait_list);
 
-	current->pi_blocked_on = waiter;
+	task->pi_blocked_on = waiter;
 
-	spin_unlock_irqrestore(&current->pi_lock, flags);
+	spin_unlock_irqrestore(&task->pi_lock, flags);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
 		spin_lock_irqsave(&owner->pi_lock, flags);
@@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	spin_unlock(&lock->wait_lock);
 
 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-					 current);
+					 task);
 
 	spin_lock(&lock->wait_lock);
 
@@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task)
 	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
 }
 
-/*
- * Slow path lock function:
+/**
+ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+ * @lock:		 the rt_mutex to take
+ * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
+ * 			 or TASK_UNINTERRUPTIBLE)
+ * @timeout:		 the pre-initialized and started timer, or NULL for none
+ * @waiter:		 the pre-initialized rt_mutex_waiter
+ * @detect_deadlock:	 passed to task_blocks_on_rt_mutex
+ *
+ * lock->wait_lock must be held by the caller.
  */
 static int __sched
-rt_mutex_slowlock(struct rt_mutex *lock, int state,
-		  struct hrtimer_sleeper *timeout,
-		  int detect_deadlock)
+__rt_mutex_slowlock(struct rt_mutex *lock, int state,
+		    struct hrtimer_sleeper *timeout,
+		    struct rt_mutex_waiter *waiter,
+		    int detect_deadlock)
 {
-	struct rt_mutex_waiter waiter;
 	int ret = 0;
 
-	debug_rt_mutex_init_waiter(&waiter);
-	waiter.task = NULL;
-
-	spin_lock(&lock->wait_lock);
-
-	/* Try to acquire the lock again: */
-	if (try_to_take_rt_mutex(lock)) {
-		spin_unlock(&lock->wait_lock);
-		return 0;
-	}
-
-	set_current_state(state);
-
-	/* Setup the timer, when timeout != NULL */
-	if (unlikely(timeout)) {
-		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
-		if (!hrtimer_active(&timeout->timer))
-			timeout->task = NULL;
-	}
-
 	for (;;) {
 		/* Try to acquire the lock: */
 		if (try_to_take_rt_mutex(lock))
@@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 		}
 
 		/*
-		 * waiter.task is NULL the first time we come here and
+		 * waiter->task is NULL the first time we come here and
 		 * when we have been woken up by the previous owner
 		 * but the lock got stolen by a higher prio task.
 		 */
-		if (!waiter.task) {
-			ret = task_blocks_on_rt_mutex(lock, &waiter,
+		if (!waiter->task) {
+			ret = task_blocks_on_rt_mutex(lock, waiter, current,
 						      detect_deadlock);
 			/*
 			 * If we got woken up by the owner then start loop
 			 * all over without going into schedule to try
 			 * to get the lock now:
 			 */
-			if (unlikely(!waiter.task)) {
+			if (unlikely(!waiter->task)) {
 				/*
 				 * Reset the return value. We might
 				 * have returned with -EDEADLK and the
@@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
 		spin_unlock(&lock->wait_lock);
 
-		debug_rt_mutex_print_deadlock(&waiter);
+		debug_rt_mutex_print_deadlock(waiter);
 
-		if (waiter.task)
+		if (waiter->task)
 			schedule_rt_mutex(lock);
 
 		spin_lock(&lock->wait_lock);
 		set_current_state(state);
 	}
 
+	return ret;
+}
+
+/*
+ * Slow path lock function:
+ */
+static int __sched
+rt_mutex_slowlock(struct rt_mutex *lock, int state,
+		  struct hrtimer_sleeper *timeout,
+		  int detect_deadlock)
+{
+	struct rt_mutex_waiter waiter;
+	int ret = 0;
+
+	debug_rt_mutex_init_waiter(&waiter);
+	waiter.task = NULL;
+
+	spin_lock(&lock->wait_lock);
+
+	/* Try to acquire the lock again: */
+	if (try_to_take_rt_mutex(lock)) {
+		spin_unlock(&lock->wait_lock);
+		return 0;
+	}
+
+	set_current_state(state);
+
+	/* Setup the timer, when timeout != NULL */
+	if (unlikely(timeout)) {
+		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+		if (!hrtimer_active(&timeout->timer))
+			timeout->task = NULL;
+	}
+
+	ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
+				  detect_deadlock);
+
 	set_current_state(TASK_RUNNING);
 
 	if (unlikely(waiter.task))
@@ -986,6 +1013,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
 }
 
 /**
+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock:		the rt_mutex to take
+ * @waiter:		the pre-initialized rt_mutex_waiter
+ * @task:		the task to prepare
+ * @detect_deadlock:	perform deadlock detection (1) or not (0)
+ *
+ * Returns:
+ *  0 - task blocked on lock
+ *  1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for FUTEX_REQUEUE_PI support.
+ */
+int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+			      struct rt_mutex_waiter *waiter,
+			      struct task_struct *task, int detect_deadlock)
+{
+	int ret;
+
+	spin_lock(&lock->wait_lock);
+
+	mark_rt_mutex_waiters(lock);
+
+	if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
+		/* We got the lock for task. */
+		debug_rt_mutex_lock(lock);
+
+		rt_mutex_set_owner(lock, task, 0);
+
+		rt_mutex_deadlock_account_lock(lock, task);
+		return 1;
+	}
+
+	ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
+
+
+	if (ret && !waiter->task) {
+		/*
+		 * Reset the return value. We might have
+		 * returned with -EDEADLK and the owner
+		 * released the lock while we were walking the
+		 * pi chain.  Let the waiter sort it out.
+		 */
+		ret = 0;
+	}
+	spin_unlock(&lock->wait_lock);
+
+	debug_rt_mutex_print_deadlock(waiter);
+
+	return ret;
+}
+
+/**
  * rt_mutex_next_owner - return the next owner of the lock
  *
  * @lock: the rt lock query
@@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
 
 	return rt_mutex_top_waiter(lock)->task;
 }
+
+/**
+ * rt_mutex_finish_proxy_lock() - Complete lock acquisition
+ * @lock:		the rt_mutex we were woken on
+ * @to:			the timeout, null if none. hrtimer should already have
+ * 			been started.
+ * @waiter:		the pre-initialized rt_mutex_waiter
+ * @detect_deadlock:	perform deadlock detection (1) or not (0)
+ *
+ * Complete the lock acquisition started our behalf by another thread.
+ *
+ * Returns:
+ *  0 - success
+ * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
+ *
+ * Special API call for PI-futex requeue support
+ */
+int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+			       struct hrtimer_sleeper *to,
+			       struct rt_mutex_waiter *waiter,
+			       int detect_deadlock)
+{
+	int ret;
+
+	spin_lock(&lock->wait_lock);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
+				  detect_deadlock);
+
+	set_current_state(TASK_RUNNING);
+
+	if (unlikely(waiter->task))
+		remove_waiter(lock, waiter);
+
+	/*
+	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+	 * have to fix that up.
+	 */
+	fixup_rt_mutex_waiters(lock);
+
+	spin_unlock(&lock->wait_lock);
+
+	/*
+	 * Readjust priority, when we did not get the lock. We might have been
+	 * the pending owner and boosted. Since we did not take the lock, the
+	 * PI boost has to go.
+	 */
+	if (unlikely(ret))
+		rt_mutex_adjust_prio(current);
+
+	return ret;
+}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index e124bf5..97a2f81 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 				       struct task_struct *proxy_owner);
 extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
 				  struct task_struct *proxy_owner);
+extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+				     struct rt_mutex_waiter *waiter,
+				     struct task_struct *task,
+				     int detect_deadlock);
+extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+				      struct hrtimer_sleeper *to,
+				      struct rt_mutex_waiter *waiter,
+				      int detect_deadlock);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"
-- 
cgit v0.10.2


From a72188d8a64ebe74722f1cf7ffac41b41ffdba21 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:22 -0700
Subject: futex: add FUTEX_HAS_TIMEOUT flag to restart.futex.flags

Currently restart is only used if there is a timeout. The requeue_pi
functionality requires restarting to futex_lock_pi() on signal after
wakeup in futex_wait_requeue_pi() regardless of if there was a timeout
or not. Using 0 for the timeout value is confusing as that could
indicate an expired timer. The flag makes this explicit. While the
check is not technically needed in futex_wait_restart(), doing so
makes the code consistent with and will avoid confusion should the
need arise to restart wait without a timeout.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index af831fb..6b597cf 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1252,6 +1252,7 @@ handle_fault:
  */
 #define FLAGS_SHARED		0x01
 #define FLAGS_CLOCKRT		0x02
+#define FLAGS_HAS_TIMEOUT	0x04
 
 static long futex_wait_restart(struct restart_block *restart);
 
@@ -1486,7 +1487,7 @@ retry_private:
 	restart->futex.val = val;
 	restart->futex.time = abs_time->tv64;
 	restart->futex.bitset = bitset;
-	restart->futex.flags = 0;
+	restart->futex.flags = FLAGS_HAS_TIMEOUT;
 
 	if (fshared)
 		restart->futex.flags |= FLAGS_SHARED;
@@ -1510,13 +1511,16 @@ static long futex_wait_restart(struct restart_block *restart)
 {
 	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
 	int fshared = 0;
-	ktime_t t;
+	ktime_t t, *tp = NULL;
 
-	t.tv64 = restart->futex.time;
+	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
+		t.tv64 = restart->futex.time;
+		tp = &t;
+	}
 	restart->fn = do_no_restart_syscall;
 	if (restart->futex.flags & FLAGS_SHARED)
 		fshared = 1;
-	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
+	return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
 				restart->futex.bitset,
 				restart->futex.flags & FLAGS_CLOCKRT);
 }
-- 
cgit v0.10.2


From 9121e4783cd5c7e2a407763f3b61c2d573891133 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:31 -0700
Subject: futex: distangle futex_requeue()

futex_requeue() is getting a bit long-winded, and will be getting more
so after the requeue_pi patch. Factor out the actual requeueing into a
nicely contained inline function to reduce function length and improve
legibility.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 6b597cf..e76942e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -940,6 +940,34 @@ out:
 	return ret;
 }
 
+/**
+ * requeue_futex() - Requeue a futex_q from one hb to another
+ * @q:		the futex_q to requeue
+ * @hb1:	the source hash_bucket
+ * @hb2:	the target hash_bucket
+ * @key2:	the new key for the requeued futex_q
+ */
+static inline
+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
+		   struct futex_hash_bucket *hb2, union futex_key *key2)
+{
+
+	/*
+	 * If key1 and key2 hash to the same bucket, no need to
+	 * requeue.
+	 */
+	if (likely(&hb1->chain != &hb2->chain)) {
+		plist_del(&q->list, &hb1->chain);
+		plist_add(&q->list, &hb2->chain);
+		q->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+		q->list.plist.lock = &hb2->lock;
+#endif
+	}
+	get_futex_key_refs(key2);
+	q->key = *key2;
+}
+
 /*
  * Requeue all waiters hashed on one physical page to another
  * physical page.
@@ -999,20 +1027,7 @@ retry_private:
 		if (++ret <= nr_wake) {
 			wake_futex(this);
 		} else {
-			/*
-			 * If key1 and key2 hash to the same bucket, no need to
-			 * requeue.
-			 */
-			if (likely(head1 != &hb2->chain)) {
-				plist_del(&this->list, &hb1->chain);
-				plist_add(&this->list, &hb2->chain);
-				this->lock_ptr = &hb2->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
-				this->list.plist.lock = &hb2->lock;
-#endif
-			}
-			this->key = key2;
-			get_futex_key_refs(&key2);
+			requeue_futex(this, hb1, hb2, &key2);
 			drop_count++;
 
 			if (ret - nr_wake >= nr_requeue)
-- 
cgit v0.10.2


From f801073f87aa22ddf0e9146355fec3993163790f Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:40 -0700
Subject: futex: split out futex value validation code

Refactor the code to validate the expected futex value in order to
reuse it with the requeue_pi code.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index e76942e..dbe857a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1398,42 +1398,29 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
 	__set_current_state(TASK_RUNNING);
 }
 
-static int futex_wait(u32 __user *uaddr, int fshared,
-		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+/**
+ * futex_wait_setup() - Prepare to wait on a futex
+ * @uaddr:	the futex userspace address
+ * @val:	the expected value
+ * @fshared:	whether the futex is shared (1) or not (0)
+ * @q:		the associated futex_q
+ * @hb:		storage for hash_bucket pointer to be returned to caller
+ *
+ * Setup the futex_q and locate the hash_bucket.  Get the futex value and
+ * compare it with the expected value.  Handle atomic faults internally.
+ * Return with the hb lock held and a q.key reference on success, and unlocked
+ * with no q.key reference on failure.
+ *
+ * Returns:
+ *  0 - uaddr contains val and hb has been locked
+ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
+ */
+static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
+			   struct futex_q *q, struct futex_hash_bucket **hb)
 {
-	struct hrtimer_sleeper timeout, *to = NULL;
-	DECLARE_WAITQUEUE(wait, current);
-	struct restart_block *restart;
-	struct futex_hash_bucket *hb;
-	struct futex_q q;
 	u32 uval;
 	int ret;
 
-	if (!bitset)
-		return -EINVAL;
-
-	q.pi_state = NULL;
-	q.bitset = bitset;
-
-	if (abs_time) {
-		to = &timeout;
-
-		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
-				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-		hrtimer_init_sleeper(to, current);
-		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
-					     current->timer_slack_ns);
-	}
-
-retry:
-	q.key = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr, fshared, &q.key);
-	if (unlikely(ret != 0))
-		goto out;
-
-retry_private:
-	hb = queue_lock(&q);
-
 	/*
 	 * Access the page AFTER the hash-bucket is locked.
 	 * Order is important:
@@ -1450,33 +1437,74 @@ retry_private:
 	 * A consequence is that futex_wait() can return zero and absorb
 	 * a wakeup when *uaddr != val on entry to the syscall.  This is
 	 * rare, but normal.
-	 *
-	 * For shared futexes, we hold the mmap semaphore, so the mapping
-	 * cannot have changed since we looked it up in get_futex_key.
 	 */
+retry:
+	q->key = FUTEX_KEY_INIT;
+	ret = get_futex_key(uaddr, fshared, &q->key);
+	if (unlikely(ret != 0))
+		goto out;
+
+retry_private:
+	*hb = queue_lock(q);
+
 	ret = get_futex_value_locked(&uval, uaddr);
 
-	if (unlikely(ret)) {
-		queue_unlock(&q, hb);
+	if (ret) {
+		queue_unlock(q, *hb);
 
 		ret = get_user(uval, uaddr);
 		if (ret)
-			goto out_put_key;
+			goto out;
 
 		if (!fshared)
 			goto retry_private;
 
-		put_futex_key(fshared, &q.key);
+		put_futex_key(fshared, &q->key);
 		goto retry;
 	}
-	ret = -EWOULDBLOCK;
 
-	/* Only actually queue if *uaddr contained val.  */
-	if (unlikely(uval != val)) {
-		queue_unlock(&q, hb);
-		goto out_put_key;
+	if (uval != val) {
+		queue_unlock(q, *hb);
+		ret = -EWOULDBLOCK;
 	}
 
+out:
+	if (ret)
+		put_futex_key(fshared, &q->key);
+	return ret;
+}
+
+static int futex_wait(u32 __user *uaddr, int fshared,
+		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+{
+	struct hrtimer_sleeper timeout, *to = NULL;
+	DECLARE_WAITQUEUE(wait, current);
+	struct restart_block *restart;
+	struct futex_hash_bucket *hb;
+	struct futex_q q;
+	int ret;
+
+	if (!bitset)
+		return -EINVAL;
+
+	q.pi_state = NULL;
+	q.bitset = bitset;
+
+	if (abs_time) {
+		to = &timeout;
+
+		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper(to, current);
+		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+					     current->timer_slack_ns);
+	}
+
+	/* Prepare to wait on uaddr. */
+	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+	if (ret)
+		goto out;
+
 	/* queue_me and wait for wakeup, timeout, or a signal. */
 	futex_wait_queue_me(hb, &q, to, &wait);
 
-- 
cgit v0.10.2


From 52400ba946759af28442dee6265c5c0180ac7122 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:49 -0700
Subject: futex: add requeue_pi functionality

PI Futexes and their underlying rt_mutex cannot be left ownerless if
there are pending waiters as this will break the PI boosting logic, so
the standard requeue commands aren't sufficient.  The new commands
properly manage pi futex ownership by ensuring a futex with waiters
has an owner at all times.  This will allow glibc to properly handle
pi mutexes with pthread_condvars.

The approach taken here is to create two new futex op codes:

FUTEX_WAIT_REQUEUE_PI:
Tasks will use this op code to wait on a futex (such as a non-pi waitqueue)
and wake after they have been requeued to a pi futex.  Prior to returning to
userspace, they will acquire this pi futex (and the underlying rt_mutex).

futex_wait_requeue_pi() is the result of a high speed collision between
futex_wait() and futex_lock_pi() (with the first part of futex_lock_pi() being
done by futex_proxy_trylock_atomic() on behalf of the top_waiter).

FUTEX_REQUEUE_PI (and FUTEX_CMP_REQUEUE_PI):
This call must be used to wake tasks waiting with FUTEX_WAIT_REQUEUE_PI,
regardless of how many tasks the caller intends to wake or requeue.
pthread_cond_broadcast() should call this with nr_wake=1 and
nr_requeue=INT_MAX.  pthread_cond_signal() should call this with nr_wake=1 and
nr_requeue=0.  The reason being we need both callers to get the benefit of the
futex_proxy_trylock_atomic() routine.  futex_requeue() also enqueues the
top_waiter on the rt_mutex via rt_mutex_start_proxy_lock().

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 3bf5bb5..b05519c 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -23,6 +23,9 @@ union ktime;
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
+#define FUTEX_WAIT_REQUEUE_PI	11
+#define FUTEX_REQUEUE_PI	12
+#define FUTEX_CMP_REQUEUE_PI	13
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -38,6 +41,11 @@ union ktime;
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#define FUTEX_REQUEUE_PI_PRIVATE	(FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e6b820f..a8cc4e1 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -21,13 +21,14 @@ struct restart_block {
 		struct {
 			unsigned long arg0, arg1, arg2, arg3;
 		};
-		/* For futex_wait */
+		/* For futex_wait and futex_wait_requeue_pi */
 		struct {
 			u32 *uaddr;
 			u32 val;
 			u32 flags;
 			u32 bitset;
 			u64 time;
+			u32 *uaddr2;
 		} futex;
 		/* For nanosleep */
 		struct {
diff --git a/kernel/futex.c b/kernel/futex.c
index dbe857a..185c981 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -19,6 +19,10 @@
  *  PRIVATE futexes by Eric Dumazet
  *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
  *
+ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
+ *  Copyright (C) IBM Corporation, 2009
+ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -109,6 +113,9 @@ struct futex_q {
 	struct futex_pi_state *pi_state;
 	struct task_struct *task;
 
+	/* rt_waiter storage for requeue_pi: */
+	struct rt_mutex_waiter *rt_waiter;
+
 	/* Bitset for the optional bitmasked wakeup */
 	u32 bitset;
 };
@@ -827,7 +834,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key)) {
-			if (this->pi_state) {
+			if (this->pi_state || this->rt_waiter) {
 				ret = -EINVAL;
 				break;
 			}
@@ -968,20 +975,138 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 	q->key = *key2;
 }
 
-/*
- * Requeue all waiters hashed on one physical page to another
- * physical page.
+/**
+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
+ * q:	the futex_q
+ * key:	the key of the requeue target futex
+ *
+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
+ * target futex if it is uncontended or via a lock steal.  Set the futex_q key
+ * to the requeue target futex so the waiter can detect the wakeup on the right
+ * futex, but remove it from the hb and NULL the rt_waiter so it can detect
+ * atomic lock acquisition.  Must be called with the q->lock_ptr held.
+ */
+static inline
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+{
+	drop_futex_key_refs(&q->key);
+	get_futex_key_refs(key);
+	q->key = *key;
+
+	WARN_ON(plist_node_empty(&q->list));
+	plist_del(&q->list, &q->list.plist);
+
+	WARN_ON(!q->rt_waiter);
+	q->rt_waiter = NULL;
+
+	wake_up(&q->waiter);
+}
+
+/**
+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
+ * @pifutex:	the user address of the to futex
+ * @hb1:	the from futex hash bucket, must be locked by the caller
+ * @hb2:	the to futex hash bucket, must be locked by the caller
+ * @key1:	the from futex key
+ * @key2:	the to futex key
+ *
+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
+ * Wake the top waiter if we succeed.  hb1 and hb2 must be held by the caller.
+ *
+ * Returns:
+ *  0 - failed to acquire the lock atomicly
+ *  1 - acquired the lock
+ * <0 - error
+ */
+static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+				 struct futex_hash_bucket *hb1,
+				 struct futex_hash_bucket *hb2,
+				 union futex_key *key1, union futex_key *key2,
+				 struct futex_pi_state **ps)
+{
+	struct futex_q *top_waiter;
+	u32 curval;
+	int ret;
+
+	if (get_futex_value_locked(&curval, pifutex))
+		return -EFAULT;
+
+	top_waiter = futex_top_waiter(hb1, key1);
+
+	/* There are no waiters, nothing for us to do. */
+	if (!top_waiter)
+		return 0;
+
+	/*
+	 * Either take the lock for top_waiter or set the FUTEX_WAITERS bit.
+	 * The pi_state is returned in ps in contended cases.
+	 */
+	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task);
+	if (ret == 1)
+		requeue_pi_wake_futex(top_waiter, key2);
+
+	return ret;
+}
+
+/**
+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
+ * uaddr1:	source futex user address
+ * uaddr2:	target futex user address
+ * nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
+ * nr_requeue:	number of waiters to requeue (0-INT_MAX)
+ * requeue_pi:	if we are attempting to requeue from a non-pi futex to a
+ * 		pi futex (pi to pi requeue is not supported)
+ *
+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
+ * uaddr2 atomically on behalf of the top waiter.
+ *
+ * Returns:
+ * >=0 - on success, the number of tasks requeued or woken
+ *  <0 - on error
  */
 static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-			 int nr_wake, int nr_requeue, u32 *cmpval)
+			 int nr_wake, int nr_requeue, u32 *cmpval,
+			 int requeue_pi)
 {
 	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+	int drop_count = 0, task_count = 0, ret;
+	struct futex_pi_state *pi_state = NULL;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head1;
 	struct futex_q *this, *next;
-	int ret, drop_count = 0;
+	u32 curval2;
+
+	if (requeue_pi) {
+		/*
+		 * requeue_pi requires a pi_state, try to allocate it now
+		 * without any locks in case it fails.
+		 */
+		if (refill_pi_state_cache())
+			return -ENOMEM;
+		/*
+		 * requeue_pi must wake as many tasks as it can, up to nr_wake
+		 * + nr_requeue, since it acquires the rt_mutex prior to
+		 * returning to userspace, so as to not leave the rt_mutex with
+		 * waiters and no owner.  However, second and third wake-ups
+		 * cannot be predicted as they involve race conditions with the
+		 * first wake and a fault while looking up the pi_state.  Both
+		 * pthread_cond_signal() and pthread_cond_broadcast() should
+		 * use nr_wake=1.
+		 */
+		if (nr_wake != 1)
+			return -EINVAL;
+	}
 
 retry:
+	if (pi_state != NULL) {
+		/*
+		 * We will have to lookup the pi_state again, so free this one
+		 * to keep the accounting correct.
+		 */
+		free_pi_state(pi_state);
+		pi_state = NULL;
+	}
+
 	ret = get_futex_key(uaddr1, fshared, &key1);
 	if (unlikely(ret != 0))
 		goto out;
@@ -1020,19 +1145,94 @@ retry_private:
 		}
 	}
 
+	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+		/* Attempt to acquire uaddr2 and wake the top_waiter. */
+		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+						 &key2, &pi_state);
+
+		/*
+		 * At this point the top_waiter has either taken uaddr2 or is
+		 * waiting on it.  If the former, then the pi_state will not
+		 * exist yet, look it up one more time to ensure we have a
+		 * reference to it.
+		 */
+		if (ret == 1) {
+			WARN_ON(pi_state);
+			task_count++;
+			ret = get_futex_value_locked(&curval2, uaddr2);
+			if (!ret)
+				ret = lookup_pi_state(curval2, hb2, &key2,
+						      &pi_state);
+		}
+
+		switch (ret) {
+		case 0:
+			break;
+		case -EFAULT:
+			double_unlock_hb(hb1, hb2);
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+			ret = get_user(curval2, uaddr2);
+			if (!ret)
+				goto retry;
+			goto out;
+		case -EAGAIN:
+			/* The owner was exiting, try again. */
+			double_unlock_hb(hb1, hb2);
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+			cond_resched();
+			goto retry;
+		default:
+			goto out_unlock;
+		}
+	}
+
 	head1 = &hb1->chain;
 	plist_for_each_entry_safe(this, next, head1, list) {
-		if (!match_futex (&this->key, &key1))
+		if (task_count - nr_wake >= nr_requeue)
+			break;
+
+		if (!match_futex(&this->key, &key1))
 			continue;
-		if (++ret <= nr_wake) {
+
+		WARN_ON(!requeue_pi && this->rt_waiter);
+		WARN_ON(requeue_pi && !this->rt_waiter);
+
+		/*
+		 * Wake nr_wake waiters.  For requeue_pi, if we acquired the
+		 * lock, we already woke the top_waiter.  If not, it will be
+		 * woken by futex_unlock_pi().
+		 */
+		if (++task_count <= nr_wake && !requeue_pi) {
 			wake_futex(this);
-		} else {
-			requeue_futex(this, hb1, hb2, &key2);
-			drop_count++;
+			continue;
+		}
 
-			if (ret - nr_wake >= nr_requeue)
-				break;
+		/*
+		 * Requeue nr_requeue waiters and possibly one more in the case
+		 * of requeue_pi if we couldn't acquire the lock atomically.
+		 */
+		if (requeue_pi) {
+			/* Prepare the waiter to take the rt_mutex. */
+			atomic_inc(&pi_state->refcount);
+			this->pi_state = pi_state;
+			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+							this->rt_waiter,
+							this->task, 1);
+			if (ret == 1) {
+				/* We got the lock. */
+				requeue_pi_wake_futex(this, &key2);
+				continue;
+			} else if (ret) {
+				/* -EDEADLK */
+				this->pi_state = NULL;
+				free_pi_state(pi_state);
+				goto out_unlock;
+			}
 		}
+		requeue_futex(this, hb1, hb2, &key2);
+		drop_count++;
 	}
 
 out_unlock:
@@ -1047,7 +1247,9 @@ out_put_keys:
 out_put_key1:
 	put_futex_key(fshared, &key1);
 out:
-	return ret;
+	if (pi_state != NULL)
+		free_pi_state(pi_state);
+	return ret ? ret : task_count;
 }
 
 /* The key must be already stored in q->key. */
@@ -1270,6 +1472,7 @@ handle_fault:
 #define FLAGS_HAS_TIMEOUT	0x04
 
 static long futex_wait_restart(struct restart_block *restart);
+static long futex_lock_pi_restart(struct restart_block *restart);
 
 /**
  * fixup_owner() - Post lock pi_state and corner case management
@@ -1489,6 +1692,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 
 	q.pi_state = NULL;
 	q.bitset = bitset;
+	q.rt_waiter = NULL;
 
 	if (abs_time) {
 		to = &timeout;
@@ -1596,6 +1800,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 	}
 
 	q.pi_state = NULL;
+	q.rt_waiter = NULL;
 retry:
 	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
@@ -1701,6 +1906,20 @@ uaddr_faulted:
 	goto retry;
 }
 
+static long futex_lock_pi_restart(struct restart_block *restart)
+{
+	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
+	ktime_t t, *tp = NULL;
+	int fshared = restart->futex.flags & FLAGS_SHARED;
+
+	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
+		t.tv64 = restart->futex.time;
+		tp = &t;
+	}
+	restart->fn = do_no_restart_syscall;
+
+	return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0);
+}
 
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
@@ -1803,6 +2022,253 @@ pi_faulted:
 	return ret;
 }
 
+/**
+ * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
+ * @hb:		the hash_bucket futex_q was original enqueued on
+ * @q:		the futex_q woken while waiting to be requeued
+ * @key2:	the futex_key of the requeue target futex
+ * @timeout:	the timeout associated with the wait (NULL if none)
+ *
+ * Detect if the task was woken on the initial futex as opposed to the requeue
+ * target futex.  If so, determine if it was a timeout or a signal that caused
+ * the wakeup and return the appropriate error code to the caller.  Must be
+ * called with the hb lock held.
+ *
+ * Returns
+ *  0 - no early wakeup detected
+ * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?)
+ */
+static inline
+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
+				   struct futex_q *q, union futex_key *key2,
+				   struct hrtimer_sleeper *timeout)
+{
+	int ret = 0;
+
+	/*
+	 * With the hb lock held, we avoid races while we process the wakeup.
+	 * We only need to hold hb (and not hb2) to ensure atomicity as the
+	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
+	 * It can't be requeued from uaddr2 to something else since we don't
+	 * support a PI aware source futex for requeue.
+	 */
+	if (!match_futex(&q->key, key2)) {
+		WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
+		/*
+		 * We were woken prior to requeue by a timeout or a signal.
+		 * Unqueue the futex_q and determine which it was.
+		 */
+		plist_del(&q->list, &q->list.plist);
+		drop_futex_key_refs(&q->key);
+
+		if (timeout && !timeout->task)
+			ret = -ETIMEDOUT;
+		else {
+			/*
+			 * We expect signal_pending(current), but another
+			 * thread may have handled it for us already.
+			 */
+			/* FIXME: ERESTARTSYS or ERESTARTNOINTR?  Do we care if
+			 * the user specified SA_RESTART or not? */
+			ret = -ERESTARTSYS;
+		}
+	}
+	return ret;
+}
+
+/**
+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
+ * @uaddr:	the futex we initialyl wait on (non-pi)
+ * @fshared:	whether the futexes are shared (1) or not (0).  They must be
+ * 		the same type, no requeueing from private to shared, etc.
+ * @val:	the expected value of uaddr
+ * @abs_time:	absolute timeout
+ * @bitset:	32 bit wakeup bitset set by userspace, defaults to all.
+ * @clockrt:	whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
+ * @uaddr2:	the pi futex we will take prior to returning to user-space
+ *
+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
+ * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
+ * complete the acquisition of the rt_mutex prior to returning to userspace.
+ * This ensures the rt_mutex maintains an owner when it has waiters; without
+ * one, the pi logic wouldn't know which task to boost/deboost, if there was a
+ * need to.
+ *
+ * We call schedule in futex_wait_queue_me() when we enqueue and return there
+ * via the following:
+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
+ * 2) wakeup on uaddr2 after a requeue and subsequent unlock
+ * 3) signal (before or after requeue)
+ * 4) timeout (before or after requeue)
+ *
+ * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
+ *
+ * If 2, we may then block on trying to take the rt_mutex and return via:
+ * 5) successful lock
+ * 6) signal
+ * 7) timeout
+ * 8) other lock acquisition failure
+ *
+ * If 6, we setup a restart_block with futex_lock_pi() as the function.
+ *
+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
+ *
+ * Returns:
+ *  0 - On success
+ * <0 - On error
+ */
+static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
+				 u32 val, ktime_t *abs_time, u32 bitset,
+				 int clockrt, u32 __user *uaddr2)
+{
+	struct hrtimer_sleeper timeout, *to = NULL;
+	struct rt_mutex_waiter rt_waiter;
+	struct rt_mutex *pi_mutex = NULL;
+	DECLARE_WAITQUEUE(wait, current);
+	struct restart_block *restart;
+	struct futex_hash_bucket *hb;
+	union futex_key key2;
+	struct futex_q q;
+	int res, ret;
+	u32 uval;
+
+	if (!bitset)
+		return -EINVAL;
+
+	if (abs_time) {
+		to = &timeout;
+		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper(to, current);
+		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+					     current->timer_slack_ns);
+	}
+
+	/*
+	 * The waiter is allocated on our stack, manipulated by the requeue
+	 * code while we sleep on uaddr.
+	 */
+	debug_rt_mutex_init_waiter(&rt_waiter);
+	rt_waiter.task = NULL;
+
+	q.pi_state = NULL;
+	q.bitset = bitset;
+	q.rt_waiter = &rt_waiter;
+
+	key2 = FUTEX_KEY_INIT;
+	ret = get_futex_key(uaddr2, fshared, &key2);
+	if (unlikely(ret != 0))
+		goto out;
+
+	/* Prepare to wait on uaddr. */
+	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+	if (ret) {
+		put_futex_key(fshared, &key2);
+		goto out;
+	}
+
+	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
+	futex_wait_queue_me(hb, &q, to, &wait);
+
+	spin_lock(&hb->lock);
+	ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+	spin_unlock(&hb->lock);
+	if (ret)
+		goto out_put_keys;
+
+	/*
+	 * In order for us to be here, we know our q.key == key2, and since
+	 * we took the hb->lock above, we also know that futex_requeue() has
+	 * completed and we no longer have to concern ourselves with a wakeup
+	 * race with the atomic proxy lock acquition by the requeue code.
+	 */
+
+	/* Check if the requeue code acquired the second futex for us. */
+	if (!q.rt_waiter) {
+		/*
+		 * Got the lock. We might not be the anticipated owner if we
+		 * did a lock-steal - fix up the PI-state in that case.
+		 */
+		if (q.pi_state && (q.pi_state->owner != current)) {
+			spin_lock(q.lock_ptr);
+			ret = fixup_pi_state_owner(uaddr2, &q, current,
+						   fshared);
+			spin_unlock(q.lock_ptr);
+		}
+	} else {
+		/*
+		 * We have been woken up by futex_unlock_pi(), a timeout, or a
+		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
+		 * the pi_state.
+		 */
+		WARN_ON(!&q.pi_state);
+		pi_mutex = &q.pi_state->pi_mutex;
+		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+		debug_rt_mutex_free_waiter(&rt_waiter);
+
+		spin_lock(q.lock_ptr);
+		/*
+		 * Fixup the pi_state owner and possibly acquire the lock if we
+		 * haven't already.
+		 */
+		res = fixup_owner(uaddr2, fshared, &q, !ret);
+		/*
+		 * If fixup_owner() returned an error, proprogate that.  If it
+		 * acquired the lock, clear our -ETIMEDOUT or -EINTR.
+		 */
+		if (res)
+			ret = (res < 0) ? res : 0;
+
+		/* Unqueue and drop the lock. */
+		unqueue_me_pi(&q);
+	}
+
+	/*
+	 * If fixup_pi_state_owner() faulted and was unable to handle the
+	 * fault, unlock the rt_mutex and return the fault to userspace.
+	 */
+	if (ret == -EFAULT) {
+		if (rt_mutex_owner(pi_mutex) == current)
+			rt_mutex_unlock(pi_mutex);
+	} else if (ret == -EINTR) {
+		ret = -EFAULT;
+		if (get_user(uval, uaddr2))
+			goto out_put_keys;
+
+		/*
+		 * We've already been requeued, so restart by calling
+		 * futex_lock_pi() directly, rather then returning to this
+		 * function.
+		 */
+		ret = -ERESTART_RESTARTBLOCK;
+		restart = &current_thread_info()->restart_block;
+		restart->fn = futex_lock_pi_restart;
+		restart->futex.uaddr = (u32 *)uaddr2;
+		restart->futex.val = uval;
+		restart->futex.flags = 0;
+		if (abs_time) {
+			restart->futex.flags |= FLAGS_HAS_TIMEOUT;
+			restart->futex.time = abs_time->tv64;
+		}
+
+		if (fshared)
+			restart->futex.flags |= FLAGS_SHARED;
+		if (clockrt)
+			restart->futex.flags |= FLAGS_CLOCKRT;
+	}
+
+out_put_keys:
+	put_futex_key(fshared, &q.key);
+	put_futex_key(fshared, &key2);
+
+out:
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
+	return ret;
+}
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
@@ -2025,7 +2491,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		fshared = 1;
 
 	clockrt = op & FUTEX_CLOCK_REALTIME;
-	if (clockrt && cmd != FUTEX_WAIT_BITSET)
+	if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
 		return -ENOSYS;
 
 	switch (cmd) {
@@ -2040,10 +2506,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		ret = futex_wake(uaddr, fshared, val, val3);
 		break;
 	case FUTEX_REQUEUE:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
 		break;
 	case FUTEX_CMP_REQUEUE:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    0);
 		break;
 	case FUTEX_WAKE_OP:
 		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
@@ -2060,6 +2527,18 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		if (futex_cmpxchg_enabled)
 			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
 		break;
+	case FUTEX_WAIT_REQUEUE_PI:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
+					    clockrt, uaddr2);
+		break;
+	case FUTEX_REQUEUE_PI:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1);
+		break;
+	case FUTEX_CMP_REQUEUE_PI:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    1);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
@@ -2077,7 +2556,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	int cmd = op & FUTEX_CMD_MASK;
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-		      cmd == FUTEX_WAIT_BITSET)) {
+		      cmd == FUTEX_WAIT_BITSET ||
+		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
@@ -2089,10 +2569,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		tp = &t;
 	}
 	/*
-	 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
+	 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
 	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
 	 */
 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+	    cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI ||
 	    cmd == FUTEX_WAKE_OP)
 		val2 = (u32) (unsigned long) utime;
 
-- 
cgit v0.10.2


From 80fbe6ac9b47cbc11e174a9bf853834dc281da35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Mon, 6 Apr 2009 11:50:22 +0200
Subject: ASoC: correct s6000 I2S clock polarity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the data sheet data is clocked out on the falling edge
and latched on the rising edge of the bit clock. While the left sample
is transmitted the word clock line is low.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s6000/s6000-i2s.c b/sound/soc/s6000/s6000-i2s.c
index dcc7904..c5cda18 100644
--- a/sound/soc/s6000/s6000-i2s.c
+++ b/sound/soc/s6000/s6000-i2s.c
@@ -252,10 +252,10 @@ static int s6000_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 	}
 
 	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
-	case SND_SOC_DAIFMT_IB_IF:
+	case SND_SOC_DAIFMT_NB_NF:
 		w |= S6_I2S_LEFT_FIRST;
 		break;
-	case SND_SOC_DAIFMT_IB_NF:
+	case SND_SOC_DAIFMT_NB_IF:
 		w |= S6_I2S_RIGHT_FIRST;
 		break;
 	default:
diff --git a/sound/soc/s6000/s6105-ipcam.c b/sound/soc/s6000/s6105-ipcam.c
index 21c4f55..b5f95f9 100644
--- a/sound/soc/s6000/s6105-ipcam.c
+++ b/sound/soc/s6000/s6105-ipcam.c
@@ -43,7 +43,7 @@ static int s6105_hw_params(struct snd_pcm_substream *substream,
 
 	/* set cpu DAI configuration */
 	ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_CBM_CFM |
-					   SND_SOC_DAIFMT_IB_IF);
+					   SND_SOC_DAIFMT_NB_NF);
 	if (ret < 0)
 		return ret;
 
-- 
cgit v0.10.2


From 7ba5779533819fc061b4afafcb4a609d55f37057 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 6 Apr 2009 20:49:14 +0900
Subject: tomoyo: remove "undelete domain" command.

Since TOMOYO's policy management tools does not use the "undelete domain"
command, we decided to remove that command.

Signed-off-by: Kentaro Takeda <takedakn@nttdata.co.jp>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Toshiharu Harada <haradats@nttdata.co.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 92cea65..a0affd9 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1252,15 +1252,12 @@ static int tomoyo_write_domain_policy(struct tomoyo_io_buffer *head)
 	struct tomoyo_domain_info *domain = head->write_var1;
 	bool is_delete = false;
 	bool is_select = false;
-	bool is_undelete = false;
 	unsigned int profile;
 
 	if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE))
 		is_delete = true;
 	else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_SELECT))
 		is_select = true;
-	else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_UNDELETE))
-		is_undelete = true;
 	if (is_select && tomoyo_is_select_one(head, data))
 		return 0;
 	/* Don't allow updating policies by non manager programs. */
@@ -1274,9 +1271,7 @@ static int tomoyo_write_domain_policy(struct tomoyo_io_buffer *head)
 			down_read(&tomoyo_domain_list_lock);
 			domain = tomoyo_find_domain(data);
 			up_read(&tomoyo_domain_list_lock);
-		} else if (is_undelete)
-			domain = tomoyo_undelete_domain(data);
-		else
+		} else
 			domain = tomoyo_find_or_assign_new_domain(data, 0);
 		head->write_var1 = domain;
 		return 0;
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 26a76d6..e77e6a6 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -88,10 +88,7 @@ struct tomoyo_domain_info {
 	/* Name of this domain. Never NULL.          */
 	const struct tomoyo_path_info *domainname;
 	u8 profile;        /* Profile number to use. */
-	u8 is_deleted;     /* Delete flag.
-			      0 = active.
-			      1 = deleted but undeletable.
-			      255 = deleted and no longer undeletable. */
+	bool is_deleted;   /* Delete flag.           */
 	bool quota_warned; /* Quota warnning flag.   */
 	/* DOMAIN_FLAGS_*. Use tomoyo_set_domain_flag() to modify. */
 	u8 flags;
@@ -144,7 +141,6 @@ struct tomoyo_double_path_acl_record {
 #define TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN      "no_initialize_domain "
 #define TOMOYO_KEYWORD_NO_KEEP_DOMAIN            "no_keep_domain "
 #define TOMOYO_KEYWORD_SELECT                    "select "
-#define TOMOYO_KEYWORD_UNDELETE                  "undelete "
 #define TOMOYO_KEYWORD_USE_PROFILE               "use_profile "
 #define TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ  "ignore_global_allow_read"
 /* A domain definition starts with <kernel>. */
@@ -267,8 +263,6 @@ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
 struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 							    domainname,
 							    const u8 profile);
-/* Undelete a domain. */
-struct tomoyo_domain_info *tomoyo_undelete_domain(const char *domainname);
 /* Check mode for specified functionality. */
 unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
 				const u8 index);
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 093a756..2f2b449 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -551,9 +551,7 @@ int tomoyo_write_alias_policy(char *data, const bool is_delete)
 	return tomoyo_update_alias_entry(data, cp, is_delete);
 }
 
-/* Domain create/delete/undelete handler. */
-
-/* #define TOMOYO_DEBUG_DOMAIN_UNDELETE */
+/* Domain create/delete handler. */
 
 /**
  * tomoyo_delete_domain - Delete a domain.
@@ -571,41 +569,15 @@ int tomoyo_delete_domain(char *domainname)
 	tomoyo_fill_path_info(&name);
 	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_list_lock);
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-	printk(KERN_DEBUG "tomoyo_delete_domain %s\n", domainname);
-	list_for_each_entry(domain, &tomoyo_domain_list, list) {
-		if (tomoyo_pathcmp(domain->domainname, &name))
-			continue;
-		printk(KERN_DEBUG "List: %p %u\n", domain, domain->is_deleted);
-	}
-#endif
 	/* Is there an active domain? */
 	list_for_each_entry(domain, &tomoyo_domain_list, list) {
-		struct tomoyo_domain_info *domain2;
 		/* Never delete tomoyo_kernel_domain */
 		if (domain == &tomoyo_kernel_domain)
 			continue;
 		if (domain->is_deleted ||
 		    tomoyo_pathcmp(domain->domainname, &name))
 			continue;
-		/* Mark already deleted domains as non undeletable. */
-		list_for_each_entry(domain2, &tomoyo_domain_list, list) {
-			if (!domain2->is_deleted ||
-			    tomoyo_pathcmp(domain2->domainname, &name))
-				continue;
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-			if (domain2->is_deleted != 255)
-				printk(KERN_DEBUG
-				       "Marked %p as non undeletable\n",
-				       domain2);
-#endif
-			domain2->is_deleted = 255;
-		}
-		/* Delete and mark active domain as undeletable. */
-		domain->is_deleted = 1;
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-		printk(KERN_DEBUG "Marked %p as undeletable\n", domain);
-#endif
+		domain->is_deleted = true;
 		break;
 	}
 	up_write(&tomoyo_domain_list_lock);
@@ -614,58 +586,6 @@ int tomoyo_delete_domain(char *domainname)
 }
 
 /**
- * tomoyo_undelete_domain - Undelete a domain.
- *
- * @domainname: The name of domain.
- *
- * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise.
- */
-struct tomoyo_domain_info *tomoyo_undelete_domain(const char *domainname)
-{
-	struct tomoyo_domain_info *domain;
-	struct tomoyo_domain_info *candidate_domain = NULL;
-	struct tomoyo_path_info name;
-
-	name.name = domainname;
-	tomoyo_fill_path_info(&name);
-	/***** EXCLUSIVE SECTION START *****/
-	down_write(&tomoyo_domain_list_lock);
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-	printk(KERN_DEBUG "tomoyo_undelete_domain %s\n", domainname);
-	list_for_each_entry(domain, &tomoyo_domain_list, list) {
-		if (tomoyo_pathcmp(domain->domainname, &name))
-			continue;
-		printk(KERN_DEBUG "List: %p %u\n", domain, domain->is_deleted);
-	}
-#endif
-	list_for_each_entry(domain, &tomoyo_domain_list, list) {
-		if (tomoyo_pathcmp(&name, domain->domainname))
-			continue;
-		if (!domain->is_deleted) {
-			/* This domain is active. I can't undelete. */
-			candidate_domain = NULL;
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-			printk(KERN_DEBUG "%p is active. I can't undelete.\n",
-			       domain);
-#endif
-			break;
-		}
-		/* Is this domain undeletable? */
-		if (domain->is_deleted == 1)
-			candidate_domain = domain;
-	}
-	if (candidate_domain) {
-		candidate_domain->is_deleted = 0;
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-		printk(KERN_DEBUG "%p was undeleted.\n", candidate_domain);
-#endif
-	}
-	up_write(&tomoyo_domain_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
-	return candidate_domain;
-}
-
-/**
  * tomoyo_find_or_assign_new_domain - Create a domain.
  *
  * @domainname: The name of domain.
@@ -711,10 +631,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 		/***** CRITICAL SECTION END *****/
 		if (flag)
 			continue;
-#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE
-		printk(KERN_DEBUG "Reusing %p %s\n", domain,
-		       domain->domainname->name);
-#endif
 		list_for_each_entry(ptr, &domain->acl_info_list, list) {
 			ptr->type |= TOMOYO_ACL_DELETED;
 		}
@@ -722,7 +638,7 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 		domain->profile = profile;
 		domain->quota_warned = false;
 		mb(); /* Avoid out-of-order execution. */
-		domain->is_deleted = 0;
+		domain->is_deleted = false;
 		goto out;
 	}
 	/* No memory reusable. Create using new memory. */
-- 
cgit v0.10.2


From a2e87d06ddbe6e6fdb8d6d2e5e985efe4efb07dd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:44:59 +0200
Subject: perf_counter: update mmap() counter read, take 2

Update the userspace read method.

Paul noted that:
 - userspace cannot observe ->lock & 1 on the same cpu.
 - we need a barrier() between reading ->lock and ->index
   to ensure we read them in that prticular order.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.368446033@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f2b914d..e22ab47 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -170,22 +170,18 @@ struct perf_counter_mmap_page {
 	 *   u32 seq;
 	 *   s64 count;
 	 *
-	 * again:
-	 *   seq = pc->lock;
-	 *   if (unlikely(seq & 1)) {
-	 *     cpu_relax();
-	 *     goto again;
-	 *   }
+	 *   do {
+	 *     seq = pc->lock;
 	 *
-	 *   if (pc->index) {
-	 *     count = pmc_read(pc->index - 1);
-	 *     count += pc->offset;
-	 *   } else
-	 *     goto regular_read;
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
 	 *
-	 *   barrier();
-	 *   if (pc->lock != seq)
-	 *     goto again;
+	 *     barrier();
+	 *   } while (pc->lock != seq);
 	 *
 	 * NOTE: for obvious reason this only works on self-monitoring
 	 *       processes.
-- 
cgit v0.10.2


From 9c03d88e328d5f28f13191622c2ea1349c36b799 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:00 +0200
Subject: perf_counter: add more context information

Change the callchain context entries to u16, so as to gain some space.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.457320003@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e22ab47..f9d5cf0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -507,10 +507,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
-#define MAX_STACK_DEPTH		254
+#define MAX_STACK_DEPTH		255
 
 struct perf_callchain_entry {
-	u32	nr, hv, kernel, user;
+	u16	nr, hv, kernel, user;
 	u64	ip[MAX_STACK_DEPTH];
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2a5d4f5..727624d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1657,9 +1657,7 @@ void perf_counter_do_pending(void)
  * Callchain support -- arch specific
  */
 
-struct perf_callchain_entry *
-__attribute__((weak))
-perf_callchain(struct pt_regs *regs)
+__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 {
 	return NULL;
 }
@@ -1819,7 +1817,7 @@ void perf_counter_output(struct perf_counter *counter,
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
-			callchain_size = (2 + callchain->nr) * sizeof(u64);
+			callchain_size = (1 + callchain->nr) * sizeof(u64);
 
 			header.type |= __PERF_EVENT_CALLCHAIN;
 			header.size += callchain_size;
-- 
cgit v0.10.2


From 3c446b3d3b38f991f97e9d2df0ad26a60a94dcff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:01 +0200
Subject: perf_counter: SIGIO support

Provide support for fcntl() I/O availability signals.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.579788800@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f9d5cf0..8d5d11b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -238,6 +238,7 @@ enum perf_event_type {
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/hrtimer.h>
+#include <linux/fs.h>
 #include <asm/atomic.h>
 
 struct task_struct;
@@ -398,6 +399,7 @@ struct perf_counter {
 
 	/* poll related */
 	wait_queue_head_t		waitq;
+	struct fasync_struct		*fasync;
 	/* optional: for NMIs */
 	struct perf_wakeup_entry	wakeup;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 727624d..c58cc64 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1526,6 +1526,22 @@ out:
 	return ret;
 }
 
+static int perf_fasync(int fd, struct file *filp, int on)
+{
+	struct perf_counter *counter = filp->private_data;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = fasync_helper(fd, filp, on, &counter->fasync);
+	mutex_unlock(&inode->i_mutex);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
@@ -1533,6 +1549,7 @@ static const struct file_operations perf_fops = {
 	.unlocked_ioctl		= perf_ioctl,
 	.compat_ioctl		= perf_ioctl,
 	.mmap			= perf_mmap,
+	.fasync			= perf_fasync,
 };
 
 /*
@@ -1549,7 +1566,7 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
 	if (data) {
-		(void)atomic_xchg(&data->wakeup, POLL_IN);
+		atomic_set(&data->wakeup, POLL_IN);
 		/*
 		 * Ensure all data writes are issued before updating the
 		 * user-space data head information. The matching rmb()
@@ -1561,6 +1578,7 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	rcu_read_unlock();
 
 	wake_up_all(&counter->waitq);
+	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
 }
 
 /*
-- 
cgit v0.10.2


From 671dec5daf3b3c43c5777be282f00120a44cf37f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:02 +0200
Subject: perf_counter: generalize pending infrastructure

Prepare the pending infrastructure to do more than wakeups.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.634732847@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8d5d11b..977fb15 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -321,8 +321,9 @@ struct perf_mmap_data {
 	void 				*data_pages[0];
 };
 
-struct perf_wakeup_entry {
-	struct perf_wakeup_entry *next;
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
 };
 
 /**
@@ -401,7 +402,7 @@ struct perf_counter {
 	wait_queue_head_t		waitq;
 	struct fasync_struct		*fasync;
 	/* optional: for NMIs */
-	struct perf_wakeup_entry	wakeup;
+	struct perf_pending_entry	pending;
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c58cc64..0a2ade2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1581,6 +1581,14 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
 }
 
+static void perf_pending_wakeup(struct perf_pending_entry *entry)
+{
+	struct perf_counter *counter = container_of(entry,
+			struct perf_counter, pending);
+
+	perf_counter_wakeup(counter);
+}
+
 /*
  * Pending wakeups
  *
@@ -1590,45 +1598,47 @@ void perf_counter_wakeup(struct perf_counter *counter)
  * single linked list and use cmpxchg() to add entries lockless.
  */
 
-#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL)
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
 
-static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = {
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
 	PENDING_TAIL,
 };
 
-static void perf_pending_queue(struct perf_counter *counter)
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
 {
-	struct perf_wakeup_entry **head;
-	struct perf_wakeup_entry *prev, *next;
+	struct perf_pending_entry **head;
 
-	if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL)
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
 		return;
 
-	head = &get_cpu_var(perf_wakeup_head);
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
 
 	do {
-		prev = counter->wakeup.next = *head;
-		next = &counter->wakeup;
-	} while (cmpxchg(head, prev, next) != prev);
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
 
 	set_perf_counter_pending();
 
-	put_cpu_var(perf_wakeup_head);
+	put_cpu_var(perf_pending_head);
 }
 
 static int __perf_pending_run(void)
 {
-	struct perf_wakeup_entry *list;
+	struct perf_pending_entry *list;
 	int nr = 0;
 
-	list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL);
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
 	while (list != PENDING_TAIL) {
-		struct perf_counter *counter = container_of(list,
-				struct perf_counter, wakeup);
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
 
 		list = list->next;
 
-		counter->wakeup.next = NULL;
+		func = entry->func;
+		entry->next = NULL;
 		/*
 		 * Ensure we observe the unqueue before we issue the wakeup,
 		 * so that we won't be waiting forever.
@@ -1636,7 +1646,7 @@ static int __perf_pending_run(void)
 		 */
 		smp_wmb();
 
-		perf_counter_wakeup(counter);
+		func(entry);
 		nr++;
 	}
 
@@ -1658,7 +1668,7 @@ static inline int perf_not_pending(struct perf_counter *counter)
 	 * so that we do not miss the wakeup. -- see perf_pending_handle()
 	 */
 	smp_rmb();
-	return counter->wakeup.next == NULL;
+	return counter->pending.next == NULL;
 }
 
 static void perf_pending_sync(struct perf_counter *counter)
@@ -1695,9 +1705,10 @@ struct perf_output_handle {
 
 static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 {
-	if (handle->nmi)
-		perf_pending_queue(handle->counter);
-	else
+	if (handle->nmi) {
+		perf_pending_queue(&handle->counter->pending,
+				   perf_pending_wakeup);
+	} else
 		perf_counter_wakeup(handle->counter);
 }
 
-- 
cgit v0.10.2


From b6276f353bf490add62dcf7db0ebd75baa3e1a37 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:03 +0200
Subject: perf_counter: x86: self-IPI for pending work

Implement set_perf_counter_pending() with a self-IPI so that it will
run ASAP in a usable context.

For now use a second IRQ vector, because the primary vector pokes
the apic in funny ways that seem to confuse things.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.724626696@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bed..fe24d28 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
 #ifdef CONFIG_PERF_COUNTERS
 BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
+BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
 #ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 2545442..f5ebe2a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,6 +14,7 @@ typedef struct {
 #endif
 	unsigned int generic_irqs;	/* arch dependent */
 	unsigned int apic_perf_irqs;
+	unsigned int apic_pending_irqs;
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ae80f64..7309c0a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -30,6 +30,7 @@ extern void apic_timer_interrupt(void);
 extern void generic_interrupt(void);
 extern void error_interrupt(void);
 extern void perf_counter_interrupt(void);
+extern void perf_pending_interrupt(void);
 
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79b..545bb81 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -117,6 +117,11 @@
 #define GENERIC_INTERRUPT_VECTOR	0xed
 
 /*
+ * Performance monitoring pending work vector:
+ */
+#define LOCAL_PENDING_VECTOR		0xec
+
+/*
  * First APIC vector available to drivers: (vectors 0x30-0xee) we
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index e2b0e66..d08dd52 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,7 +84,8 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
-#define set_perf_counter_pending()	do { } while (0)
+extern void set_perf_counter_pending(void);
+
 #define clear_perf_counter_pending()	do { } while (0)
 #define test_perf_counter_pending()	(0)
 
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index c74e20d..4384158 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -849,6 +849,20 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
+void smp_perf_pending_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_pending_irqs);
+	perf_counter_do_pending();
+	irq_exit();
+}
+
+void set_perf_counter_pending(void)
+{
+	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+}
+
 void perf_counters_lapic_init(int nmi)
 {
 	u32 apic_val;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3f129d9..1d46cba 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1028,6 +1028,8 @@ apicinterrupt SPURIOUS_APIC_VECTOR \
 #ifdef CONFIG_PERF_COUNTERS
 apicinterrupt LOCAL_PERF_VECTOR \
 	perf_counter_interrupt smp_perf_counter_interrupt
+apicinterrupt LOCAL_PENDING_VECTOR \
+	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
 
 /*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9c27543..d465487 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,6 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance counter interrupts\n");
+	seq_printf(p, "PND: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
+	seq_printf(p, "  Performance pending work\n");
 #endif
 	if (generic_interrupt_extension) {
 		seq_printf(p, "PLT: ");
@@ -171,6 +175,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
+	sum += irq_stats(cpu)->apic_pending_irqs;
 #endif
 	if (generic_interrupt_extension)
 		sum += irq_stats(cpu)->generic_irqs;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 925d87c..3190a6b 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -166,6 +166,7 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 # ifdef CONFIG_PERF_COUNTERS
 	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
 # ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 665e2ab..53ceb26 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -156,6 +156,7 @@ static void __init apic_intr_init(void)
 	/* Performance monitoring interrupt: */
 #ifdef CONFIG_PERF_COUNTERS
 	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 #endif
 }
 
-- 
cgit v0.10.2


From f6c7d5fe58b4846ee0cb4b98b6042489705eced4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:04 +0200
Subject: perf_counter: theres more to overflow than writing events

Prepare for more generic overflow handling. The new perf_counter_overflow()
method will handle the generic bits of the counter overflow, and can return
a !0 return value, in which case the counter should be (soft) disabled, so
that it won't count until it's properly disabled.

XXX: do powerpc and swcounter

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.812109629@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0a4d14f..f88c35d 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -732,7 +732,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record)
-		perf_counter_output(counter, 1, regs);
+		perf_counter_overflow(counter, 1, regs);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 4384158..1116a41 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -800,7 +800,8 @@ again:
 			continue;
 
 		perf_save_and_restart(counter);
-		perf_counter_output(counter, nmi, regs);
+		if (perf_counter_overflow(counter, nmi, regs))
+			__pmc_generic_disable(counter, &counter->hw, bit);
 	}
 
 	hw_perf_ack_status(ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 977fb15..ca2d4df 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -491,8 +491,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_counter_context *ctx, int cpu);
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
-extern void perf_counter_output(struct perf_counter *counter,
-				int nmi, struct pt_regs *regs);
+extern int perf_counter_overflow(struct perf_counter *counter,
+				 int nmi, struct pt_regs *regs);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0a2ade2..195e976 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1800,8 +1800,8 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-void perf_counter_output(struct perf_counter *counter,
-			 int nmi, struct pt_regs *regs)
+static void perf_counter_output(struct perf_counter *counter,
+				int nmi, struct pt_regs *regs)
 {
 	int ret;
 	u64 record_type = counter->hw_event.record_type;
@@ -2034,6 +2034,17 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 }
 
 /*
+ * Generic counter overflow handling.
+ */
+
+int perf_counter_overflow(struct perf_counter *counter,
+			  int nmi, struct pt_regs *regs)
+{
+	perf_counter_output(counter, nmi, regs);
+	return 0;
+}
+
+/*
  * Generic software counter infrastructure
  */
 
@@ -2077,6 +2088,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 
 static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
+	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
 
@@ -2092,12 +2104,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			!counter->hw_event.exclude_user)
 		regs = task_pt_regs(current);
 
-	if (regs)
-		perf_counter_output(counter, 0, regs);
+	if (regs) {
+		if (perf_counter_overflow(counter, 0, regs))
+			ret = HRTIMER_NORESTART;
+	}
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
 
-	return HRTIMER_RESTART;
+	return ret;
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
@@ -2105,7 +2119,10 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 {
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	perf_counter_output(counter, nmi, regs);
+	if (perf_counter_overflow(counter, nmi, regs))
+		/* soft-disable the counter */
+		;
+
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-- 
cgit v0.10.2


From ebb3c4c4cb81d64cc041356915ec015e2c57092a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:05 +0200
Subject: perf_counter: fix the mlock accounting

Reading through the code I saw I forgot the finish the mlock accounting.
Do so now.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.899767331@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 195e976..c841563 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1461,13 +1461,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		vma->vm_mm->locked_vm -= counter->data->nr_pages + 1;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
 	}
 }
 
 static struct vm_operations_struct perf_mmap_vmops = {
-	.open = perf_mmap_open,
+	.open  = perf_mmap_open,
 	.close = perf_mmap_close,
 	.fault = perf_mmap_fault,
 };
@@ -1499,24 +1500,32 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_pgoff != 0)
 		return -EINVAL;
 
-	locked = vma_size >>  PAGE_SHIFT;
-	locked += vma->vm_mm->locked_vm;
+	mutex_lock(&counter->mmap_mutex);
+	if (atomic_inc_not_zero(&counter->mmap_count)) {
+		if (nr_pages != counter->data->nr_pages)
+			ret = -EINVAL;
+		goto unlock;
+	}
+
+	locked = vma->vm_mm->locked_vm;
+	locked += nr_pages + 1;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
 
-	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK))
-		return -EPERM;
-
-	mutex_lock(&counter->mmap_mutex);
-	if (atomic_inc_not_zero(&counter->mmap_count))
-		goto out;
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		ret = -EPERM;
+		goto unlock;
+	}
 
 	WARN_ON(counter->data);
 	ret = perf_mmap_data_alloc(counter, nr_pages);
-	if (!ret)
-		atomic_set(&counter->mmap_count, 1);
-out:
+	if (ret)
+		goto unlock;
+
+	atomic_set(&counter->mmap_count, 1);
+	vma->vm_mm->locked_vm += nr_pages + 1;
+unlock:
 	mutex_unlock(&counter->mmap_mutex);
 
 	vma->vm_flags &= ~VM_MAYWRITE;
-- 
cgit v0.10.2


From 339f7c90b8a2f3aa2dd4267e79f797999e8a3c59 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:06 +0200
Subject: perf_counter: PERF_RECORD_TIME

By popular request, provide means to log a timestamp along with the
counter overflow event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.024173282@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index ca2d4df..928a7fa 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -102,6 +102,7 @@ enum perf_counter_record_format {
 	PERF_RECORD_TID		= 1U << 1,
 	PERF_RECORD_GROUP	= 1U << 2,
 	PERF_RECORD_CALLCHAIN	= 1U << 3,
+	PERF_RECORD_TIME	= 1U << 4,
 };
 
 /*
@@ -221,6 +222,7 @@ enum perf_event_type {
 	__PERF_EVENT_TID		= PERF_RECORD_TID,
 	__PERF_EVENT_GROUP		= PERF_RECORD_GROUP,
 	__PERF_EVENT_CALLCHAIN		= PERF_RECORD_CALLCHAIN,
+	__PERF_EVENT_TIME		= PERF_RECORD_TIME,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c841563..19990d1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1826,6 +1826,7 @@ static void perf_counter_output(struct perf_counter *counter,
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
+	u64 time;
 
 	header.type = PERF_EVENT_COUNTER_OVERFLOW;
 	header.size = sizeof(header);
@@ -1862,6 +1863,16 @@ static void perf_counter_output(struct perf_counter *counter,
 		}
 	}
 
+	if (record_type & PERF_RECORD_TIME) {
+		/*
+		 * Maybe do better on x86 and provide cpu_clock_nmi()
+		 */
+		time = sched_clock();
+
+		header.type |= __PERF_EVENT_TIME;
+		header.size += sizeof(u64);
+	}
+
 	ret = perf_output_begin(&handle, counter, header.size, nmi);
 	if (ret)
 		return;
@@ -1895,6 +1906,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (callchain)
 		perf_output_copy(&handle, callchain, callchain_size);
 
+	if (record_type & PERF_RECORD_TIME)
+		perf_output_put(&handle, time);
+
 	perf_output_end(&handle);
 }
 
-- 
cgit v0.10.2


From 79f146415623fe74f39af67c0f6adc208939a410 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:07 +0200
Subject: perf_counter: counter overflow limit

Provide means to auto-disable the counter after 'n' overflow events.

Create the counter with hw_event.disabled = 1, and then issue an
ioctl(fd, PREF_COUNTER_IOC_REFRESH, n); to set the limit and enable
the counter.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.083139737@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 928a7fa..ef4dcbf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -155,8 +155,9 @@ struct perf_counter_hw_event {
 /*
  * Ioctls that can be done on a perf counter fd:
  */
-#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
 
 /*
  * Structure of the page that can be mapped via mmap
@@ -403,9 +404,14 @@ struct perf_counter {
 	/* poll related */
 	wait_queue_head_t		waitq;
 	struct fasync_struct		*fasync;
-	/* optional: for NMIs */
+
+	/* delayed work for NMIs and such */
+	int				pending_wakeup;
+	int				pending_disable;
 	struct perf_pending_entry	pending;
 
+	atomic_t			event_limit;
+
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 19990d1..c05e103 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -744,6 +744,12 @@ static void perf_counter_enable(struct perf_counter *counter)
 	spin_unlock_irq(&ctx->lock);
 }
 
+static void perf_counter_refresh(struct perf_counter *counter, int refresh)
+{
+	atomic_add(refresh, &counter->event_limit);
+	perf_counter_enable(counter);
+}
+
 /*
  * Enable a counter and all its children.
  */
@@ -1311,6 +1317,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_COUNTER_IOC_DISABLE:
 		perf_counter_disable_family(counter);
 		break;
+	case PERF_COUNTER_IOC_REFRESH:
+		perf_counter_refresh(counter, arg);
+		break;
 	default:
 		err = -ENOTTY;
 	}
@@ -1590,14 +1599,6 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
 }
 
-static void perf_pending_wakeup(struct perf_pending_entry *entry)
-{
-	struct perf_counter *counter = container_of(entry,
-			struct perf_counter, pending);
-
-	perf_counter_wakeup(counter);
-}
-
 /*
  * Pending wakeups
  *
@@ -1607,6 +1608,22 @@ static void perf_pending_wakeup(struct perf_pending_entry *entry)
  * single linked list and use cmpxchg() to add entries lockless.
  */
 
+static void perf_pending_counter(struct perf_pending_entry *entry)
+{
+	struct perf_counter *counter = container_of(entry,
+			struct perf_counter, pending);
+
+	if (counter->pending_disable) {
+		counter->pending_disable = 0;
+		perf_counter_disable(counter);
+	}
+
+	if (counter->pending_wakeup) {
+		counter->pending_wakeup = 0;
+		perf_counter_wakeup(counter);
+	}
+}
+
 #define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
 
 static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
@@ -1715,8 +1732,9 @@ struct perf_output_handle {
 static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 {
 	if (handle->nmi) {
+		handle->counter->pending_wakeup = 1;
 		perf_pending_queue(&handle->counter->pending,
-				   perf_pending_wakeup);
+				   perf_pending_counter);
 	} else
 		perf_counter_wakeup(handle->counter);
 }
@@ -2063,8 +2081,21 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 int perf_counter_overflow(struct perf_counter *counter,
 			  int nmi, struct pt_regs *regs)
 {
+	int events = atomic_read(&counter->event_limit);
+	int ret = 0;
+
+	if (events && atomic_dec_and_test(&counter->event_limit)) {
+		ret = 1;
+		if (nmi) {
+			counter->pending_disable = 1;
+			perf_pending_queue(&counter->pending,
+					   perf_pending_counter);
+		} else
+			perf_counter_disable(counter);
+	}
+
 	perf_counter_output(counter, nmi, regs);
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v0.10.2


From 0c593b3411341e3a05a61f5527df36ab02bd11e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:08 +0200
Subject: perf_counter: comment the perf_event_type stuff

Describe the event format.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.211174347@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index ef4dcbf..81220188 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -208,6 +208,20 @@ struct perf_event_header {
 
 enum perf_event_type {
 
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u32				pid, tid;
+	 * 	u64				addr;
+	 * 	u64				len;
+	 * 	u64				pgoff;
+	 * 	char				filename[];
+	 * };
+	 */
 	PERF_EVENT_MMAP			= 1,
 	PERF_EVENT_MUNMAP		= 2,
 
@@ -217,6 +231,24 @@ enum perf_event_type {
 	 *
 	 * These events will have types of the form:
 	 *   PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } *
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	{ u64			ip;	  } && __PERF_EVENT_IP
+	 * 	{ u32			pid, tid; } && __PERF_EVENT_TID
+	 *
+	 * 	{ u64			nr;
+	 * 	  { u64 event, val; } 	cnt[nr];  } && __PERF_EVENT_GROUP
+	 *
+	 * 	{ u16			nr,
+	 * 				hv,
+	 * 				kernel,
+	 * 				user;
+	 * 	  u64			ips[nr];  } && __PERF_EVENT_CALLCHAIN
+	 *
+	 * 	{ u64			time;     } && __PERF_EVENT_TIME
+	 * };
 	 */
 	PERF_EVENT_COUNTER_OVERFLOW	= 1UL << 31,
 	__PERF_EVENT_IP			= PERF_RECORD_IP,
-- 
cgit v0.10.2


From 4c9e25428ff46b968a30f1dfafdba550cb6e4141 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:09 +0200
Subject: perf_counter: change event definition

Currently the definition of an event is slightly ambiguous. We have
wakeup events, for poll() and SIGIO, which are either generated
when a record crosses a page boundary (hw_events.wakeup_events == 0),
or every wakeup_events new records.

Now a record can be either a counter overflow record, or a number of
different things, like the mmap PROT_EXEC region notifications.

Then there is the PERF_COUNTER_IOC_REFRESH event limit, which only
considers counter overflows.

This patch changes then wakeup_events and SIGIO notification to only
consider overflow events. Furthermore it changes the SIGIO notification
to report SIGHUP when the event limit is reached and the counter will
be disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.266679874@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 81220188..0f5a400 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -439,6 +439,7 @@ struct perf_counter {
 
 	/* delayed work for NMIs and such */
 	int				pending_wakeup;
+	int				pending_kill;
 	int				pending_disable;
 	struct perf_pending_entry	pending;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c05e103..8c8eaf0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1596,7 +1596,11 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	rcu_read_unlock();
 
 	wake_up_all(&counter->waitq);
-	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
+
+	if (counter->pending_kill) {
+		kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
+		counter->pending_kill = 0;
+	}
 }
 
 /*
@@ -1727,6 +1731,7 @@ struct perf_output_handle {
 	unsigned int		head;
 	int			wakeup;
 	int			nmi;
+	int			overflow;
 };
 
 static inline void __perf_output_wakeup(struct perf_output_handle *handle)
@@ -1741,7 +1746,7 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 
 static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size,
-			     int nmi)
+			     int nmi, int overflow)
 {
 	struct perf_mmap_data *data;
 	unsigned int offset, head;
@@ -1751,8 +1756,9 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data)
 		goto out;
 
-	handle->counter	= counter;
-	handle->nmi	= nmi;
+	handle->counter	 = counter;
+	handle->nmi	 = nmi;
+	handle->overflow = overflow;
 
 	if (!data->nr_pages)
 		goto fail;
@@ -1816,7 +1822,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 {
 	int wakeup_events = handle->counter->hw_event.wakeup_events;
 
-	if (wakeup_events) {
+	if (handle->overflow && wakeup_events) {
 		int events = atomic_inc_return(&handle->data->events);
 		if (events >= wakeup_events) {
 			atomic_sub(wakeup_events, &handle->data->events);
@@ -1891,7 +1897,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
-	ret = perf_output_begin(&handle, counter, header.size, nmi);
+	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
 	if (ret)
 		return;
 
@@ -1955,7 +1961,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 {
 	struct perf_output_handle handle;
 	int size = mmap_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0);
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
 
 	if (ret)
 		return;
@@ -2084,8 +2090,10 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->pending_kill = POLL_IN;
 	if (events && atomic_dec_and_test(&counter->event_limit)) {
 		ret = 1;
+		counter->pending_kill = POLL_HUP;
 		if (nmi) {
 			counter->pending_disable = 1;
 			perf_pending_queue(&counter->pending,
-- 
cgit v0.10.2


From 4af4998b8aa35600f4c4a4f3c3a23baca6081d02 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:10 +0200
Subject: perf_counter: rework context time

Since perf_counter_context is switched along with tasks, we can
maintain the context time without using the task runtime clock.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.353552838@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0f5a400..7f5d353 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -477,14 +477,10 @@ struct perf_counter_context {
 	struct task_struct	*task;
 
 	/*
-	 * time_now is the current time in nanoseconds since an arbitrary
-	 * point in the past.  For per-task counters, this is based on the
-	 * task clock, and for per-cpu counters it is based on the cpu clock.
-	 * time_lost is an offset from the task/cpu clock, used to make it
-	 * appear that time only passes while the context is scheduled in.
+	 * Context clock, runs when context enabled.
 	 */
-	u64			time_now;
-	u64			time_lost;
+	u64			time;
+	u64			timestamp;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8c8eaf0..84d85ab 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -117,7 +117,7 @@ counter_sched_out(struct perf_counter *counter,
 		return;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_stopped = ctx->time_now;
+	counter->tstamp_stopped = ctx->time;
 	counter->hw_ops->disable(counter);
 	counter->oncpu = -1;
 
@@ -253,27 +253,20 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
-/*
- * Get the current time for this context.
- * If this is a task context, we use the task's task clock,
- * or for a per-cpu context, we use the cpu clock.
- */
-static u64 get_context_time(struct perf_counter_context *ctx, int update)
+static inline u64 perf_clock(void)
 {
-	struct task_struct *curr = ctx->task;
-
-	if (!curr)
-		return cpu_clock(smp_processor_id());
-
-	return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime;
+	return cpu_clock(smp_processor_id());
 }
 
 /*
  * Update the record of the current time in a context.
  */
-static void update_context_time(struct perf_counter_context *ctx, int update)
+static void update_context_time(struct perf_counter_context *ctx)
 {
-	ctx->time_now = get_context_time(ctx, update) - ctx->time_lost;
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
 }
 
 /*
@@ -284,15 +277,17 @@ static void update_counter_times(struct perf_counter *counter)
 	struct perf_counter_context *ctx = counter->ctx;
 	u64 run_end;
 
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		counter->total_time_enabled = ctx->time_now -
-			counter->tstamp_enabled;
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
-			run_end = counter->tstamp_stopped;
-		else
-			run_end = ctx->time_now;
-		counter->total_time_running = run_end - counter->tstamp_running;
-	}
+	if (counter->state < PERF_COUNTER_STATE_INACTIVE)
+		return;
+
+	counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
+
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		run_end = counter->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	counter->total_time_running = run_end - counter->tstamp_running;
 }
 
 /*
@@ -332,7 +327,7 @@ static void __perf_counter_disable(void *info)
 	 * If it is in error state, leave it in error state.
 	 */
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		update_context_time(ctx, 1);
+		update_context_time(ctx);
 		update_counter_times(counter);
 		if (counter == counter->group_leader)
 			group_sched_out(counter, cpuctx, ctx);
@@ -426,7 +421,7 @@ counter_sched_in(struct perf_counter *counter,
 		return -EAGAIN;
 	}
 
-	counter->tstamp_running += ctx->time_now - counter->tstamp_stopped;
+	counter->tstamp_running += ctx->time - counter->tstamp_stopped;
 
 	if (!is_software_counter(counter))
 		cpuctx->active_oncpu++;
@@ -493,9 +488,9 @@ static void add_counter_to_ctx(struct perf_counter *counter,
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
 	counter->prev_state = PERF_COUNTER_STATE_OFF;
-	counter->tstamp_enabled = ctx->time_now;
-	counter->tstamp_running = ctx->time_now;
-	counter->tstamp_stopped = ctx->time_now;
+	counter->tstamp_enabled = ctx->time;
+	counter->tstamp_running = ctx->time;
+	counter->tstamp_stopped = ctx->time;
 }
 
 /*
@@ -522,7 +517,7 @@ static void __perf_install_in_context(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
-	update_context_time(ctx, 1);
+	update_context_time(ctx);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -648,13 +643,13 @@ static void __perf_counter_enable(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
-	update_context_time(ctx, 1);
+	update_context_time(ctx);
 
 	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled;
+	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -737,8 +732,8 @@ static void perf_counter_enable(struct perf_counter *counter)
 	 */
 	if (counter->state == PERF_COUNTER_STATE_OFF) {
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->tstamp_enabled = ctx->time_now -
-			counter->total_time_enabled;
+		counter->tstamp_enabled =
+			ctx->time - counter->total_time_enabled;
 	}
  out:
 	spin_unlock_irq(&ctx->lock);
@@ -778,7 +773,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	ctx->is_active = 0;
 	if (likely(!ctx->nr_counters))
 		goto out;
-	update_context_time(ctx, 0);
+	update_context_time(ctx);
 
 	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
@@ -883,12 +878,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	if (likely(!ctx->nr_counters))
 		goto out;
 
-	/*
-	 * Add any time since the last sched_out to the lost time
-	 * so it doesn't get included in the total_time_enabled and
-	 * total_time_running measures for counters in the context.
-	 */
-	ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now;
+	ctx->timestamp = perf_clock();
 
 	flags = hw_perf_save_disable();
 
@@ -1043,8 +1033,8 @@ int perf_counter_task_enable(void)
 		if (counter->state > PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->tstamp_enabled = ctx->time_now -
-			counter->total_time_enabled;
+		counter->tstamp_enabled =
+			ctx->time - counter->total_time_enabled;
 		counter->hw_event.disabled = 0;
 	}
 	hw_perf_restore(perf_flags);
@@ -1113,7 +1103,7 @@ static void __read(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	if (ctx->is_active)
-		update_context_time(ctx, 1);
+		update_context_time(ctx);
 	counter->hw_ops->read(counter);
 	update_counter_times(counter);
 	curr_rq_unlock_irq_restore(&flags);
-- 
cgit v0.10.2


From a39d6f2556c4a19f58f538c6aa28bf8faca4fcb8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:11 +0200
Subject: perf_counter: rework the task clock software counter

Rework the task clock software counter to use the context time instead
of the task runtime clock, this removes the last such user.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.445450972@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 84d85ab..56b7eb5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -974,9 +974,6 @@ int perf_counter_task_disable(void)
 	curr_rq_lock_irq_save(&flags);
 	cpu = smp_processor_id();
 
-	/* force the update of the task clock: */
-	__task_delta_exec(curr, 1);
-
 	perf_counter_task_sched_out(curr, cpu);
 
 	spin_lock(&ctx->lock);
@@ -1017,9 +1014,6 @@ int perf_counter_task_enable(void)
 	curr_rq_lock_irq_save(&flags);
 	cpu = smp_processor_id();
 
-	/* force the update of the task clock: */
-	__task_delta_exec(curr, 1);
-
 	perf_counter_task_sched_out(curr, cpu);
 
 	spin_lock(&ctx->lock);
@@ -2347,38 +2341,28 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
  * Software counter: task time clock
  */
 
-/*
- * Called from within the scheduler:
- */
-static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
-{
-	struct task_struct *curr = counter->task;
-	u64 delta;
-
-	delta = __task_delta_exec(curr, update);
-
-	return curr->se.sum_exec_runtime + delta;
-}
-
-static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
+static void task_clock_perf_counter_update(struct perf_counter *counter)
 {
-	u64 prev;
+	u64 prev, now;
 	s64 delta;
 
-	prev = atomic64_read(&counter->hw.prev_count);
-
-	atomic64_set(&counter->hw.prev_count, now);
+	update_context_time(counter->ctx);
+	now = counter->ctx->time;
 
+	prev = atomic64_xchg(&counter->hw.prev_count, now);
 	delta = now - prev;
-
 	atomic64_add(delta, &counter->count);
 }
 
 static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
+	u64 now;
+
+	update_context_time(counter->ctx);
+	now = counter->ctx->time;
 
-	atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0));
+	atomic64_set(&hwc->prev_count, now);
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
@@ -2393,14 +2377,12 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
 	hrtimer_cancel(&counter->hw.hrtimer);
-	task_clock_perf_counter_update(counter,
-			task_clock_perf_counter_val(counter, 0));
+	task_clock_perf_counter_update(counter);
 }
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
-	task_clock_perf_counter_update(counter,
-			task_clock_perf_counter_val(counter, 1));
+	task_clock_perf_counter_update(counter);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-- 
cgit v0.10.2


From 849691a6cd40270ff5f4a8846d5f6bf8df663ffc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:12 +0200
Subject: perf_counter: remove rq->lock usage

Now that all the task runtime clock users are gone, remove the ugly
rq->lock usage from perf counters, which solves the nasty deadlock
seen when a software task clock counter was read from an NMI overflow
context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.531137582@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b6d2887..080d1fd 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -85,8 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 /*
  * Lock/unlock the current runqueue - to extract task statistics:
  */
-extern void curr_rq_lock_irq_save(unsigned long *flags);
-extern void curr_rq_unlock_irq_restore(unsigned long *flags);
 extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
 extern unsigned long long task_delta_exec(struct task_struct *);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 56b7eb5..f4f7596 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -172,8 +172,7 @@ static void __perf_counter_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	counter_sched_out(counter, cpuctx, ctx);
 
@@ -198,8 +197,7 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 
@@ -319,8 +317,7 @@ static void __perf_counter_disable(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	/*
 	 * If the counter is on, turn it off.
@@ -336,8 +333,7 @@ static void __perf_counter_disable(void *info)
 		counter->state = PERF_COUNTER_STATE_OFF;
 	}
 
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -515,8 +511,7 @@ static void __perf_install_in_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 	update_context_time(ctx);
 
 	/*
@@ -565,8 +560,7 @@ static void __perf_install_in_context(void *info)
  unlock:
 	hw_perf_restore(perf_flags);
 
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -641,8 +635,7 @@ static void __perf_counter_enable(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 	update_context_time(ctx);
 
 	counter->prev_state = counter->state;
@@ -678,8 +671,7 @@ static void __perf_counter_enable(void *info)
 	}
 
  unlock:
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -971,7 +963,7 @@ int perf_counter_task_disable(void)
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	curr_rq_lock_irq_save(&flags);
+	local_irq_save(flags);
 	cpu = smp_processor_id();
 
 	perf_counter_task_sched_out(curr, cpu);
@@ -992,9 +984,7 @@ int perf_counter_task_disable(void)
 
 	hw_perf_restore(perf_flags);
 
-	spin_unlock(&ctx->lock);
-
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 
 	return 0;
 }
@@ -1011,7 +1001,7 @@ int perf_counter_task_enable(void)
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	curr_rq_lock_irq_save(&flags);
+	local_irq_save(flags);
 	cpu = smp_processor_id();
 
 	perf_counter_task_sched_out(curr, cpu);
@@ -1037,7 +1027,7 @@ int perf_counter_task_enable(void)
 
 	perf_counter_task_sched_in(curr, cpu);
 
-	curr_rq_unlock_irq_restore(&flags);
+	local_irq_restore(flags);
 
 	return 0;
 }
@@ -1095,12 +1085,12 @@ static void __read(void *info)
 	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
 
-	curr_rq_lock_irq_save(&flags);
+	local_irq_save(flags);
 	if (ctx->is_active)
 		update_context_time(ctx);
 	counter->hw_ops->read(counter);
 	update_counter_times(counter);
-	curr_rq_unlock_irq_restore(&flags);
+	local_irq_restore(flags);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -2890,7 +2880,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		 * Be careful about zapping the list - IRQ/NMI context
 		 * could still be processing it:
 		 */
-		curr_rq_lock_irq_save(&flags);
+		local_irq_save(flags);
 		perf_flags = hw_perf_save_disable();
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
@@ -2903,7 +2893,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		child_ctx->nr_counters--;
 
 		hw_perf_restore(perf_flags);
-		curr_rq_unlock_irq_restore(&flags);
+		local_irq_restore(flags);
 	}
 
 	parent_counter = child_counter->parent;
diff --git a/kernel/sched.c b/kernel/sched.c
index f76e3c0..0de2f81 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -997,26 +997,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 	}
 }
 
-void curr_rq_lock_irq_save(unsigned long *flags)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
-
-	local_irq_save(*flags);
-	rq = cpu_rq(smp_processor_id());
-	spin_lock(&rq->lock);
-}
-
-void curr_rq_unlock_irq_restore(unsigned long *flags)
-	__releases(rq->lock)
-{
-	struct rq *rq;
-
-	rq = cpu_rq(smp_processor_id());
-	spin_unlock(&rq->lock);
-	local_irq_restore(*flags);
-}
-
 void task_rq_unlock_wait(struct task_struct *p)
 {
 	struct rq *rq = task_rq(p);
-- 
cgit v0.10.2


From bce379bf358738ab8efc8cda2594a299ac685085 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:13 +0200
Subject: perf_counter: minimize context time updates

Push the update_context_time() calls up the stack so that we get less
invokations and thereby a less noisy output:

before:

 # ./perfstat -e 1:0 -e 1:1 -e 1:1 -e 1:1 -l ls > /dev/null

 Performance counter stats for 'ls':

      10.163691  cpu clock ticks      (msecs)  (scaled from 98.94%)
      10.215360  task clock ticks     (msecs)  (scaled from 98.18%)
      10.185549  task clock ticks     (msecs)  (scaled from 98.53%)
      10.183581  task clock ticks     (msecs)  (scaled from 98.71%)

 Wall-clock time elapsed:    11.912858 msecs

after:

 # ./perfstat -e 1:0 -e 1:1 -e 1:1 -e 1:1 -l ls > /dev/null

 Performance counter stats for 'ls':

       9.316630  cpu clock ticks      (msecs)
       9.280789  task clock ticks     (msecs)
       9.280789  task clock ticks     (msecs)
       9.280789  task clock ticks     (msecs)

 Wall-clock time elapsed:     9.574872 msecs

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.618876874@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f4f7596..863703b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -319,6 +319,8 @@ static void __perf_counter_disable(void *info)
 
 	spin_lock_irqsave(&ctx->lock, flags);
 
+	update_context_time(ctx);
+
 	/*
 	 * If the counter is on, turn it off.
 	 * If it is in error state, leave it in error state.
@@ -797,6 +799,8 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 	if (likely(!cpuctx->task_ctx))
 		return;
 
+	update_context_time(ctx);
+
 	regs = task_pt_regs(task);
 	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
 	__perf_counter_sched_out(ctx, cpuctx);
@@ -2336,7 +2340,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
 	u64 prev, now;
 	s64 delta;
 
-	update_context_time(counter->ctx);
 	now = counter->ctx->time;
 
 	prev = atomic64_xchg(&counter->hw.prev_count, now);
@@ -2349,7 +2352,6 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	u64 now;
 
-	update_context_time(counter->ctx);
 	now = counter->ctx->time;
 
 	atomic64_set(&hwc->prev_count, now);
@@ -2372,6 +2374,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter)
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
+	update_context_time(counter->ctx);
 	task_clock_perf_counter_update(counter);
 }
 
-- 
cgit v0.10.2


From 6278af660ff83fbafb18e53fc2747eb2ee6780fa Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 2 Apr 2009 10:40:28 +0200
Subject: perf_counter tools: kerneltop: display per function percentage along
 with event count

------------------------------------------------------------------------------
 KernelTop:   90551 irqs/sec  kernel:15.0% [NMI, 100000 CPU cycles],  (all, 4 CPUs)
------------------------------------------------------------------------------

             events    pcnt         RIP          kernel function
  ______     ______   _____   ________________   _______________

           16871.00 - 19.1% - ffffffff80328e20 : clear_page_c
            8810.00 -  9.9% - ffffffff8048ce80 : page_fault
            4746.00 -  5.4% - ffffffff8048cae2 : _spin_lock
            4428.00 -  5.0% - ffffffff80328e70 : copy_page_c
            3340.00 -  3.8% - ffffffff80329090 : copy_user_generic_string!
            2679.00 -  3.0% - ffffffff8028a16b : get_page_from_freelist
            2254.00 -  2.5% - ffffffff80296f19 : unmap_vmas
            2082.00 -  2.4% - ffffffff80297e19 : handle_mm_fault
            1754.00 -  2.0% - ffffffff80288dc8 : __rmqueue_smallest
            1553.00 -  1.8% - ffffffff8048ca58 : _spin_lock_irqsave
            1400.00 -  1.6% - ffffffff8028cdc8 : release_pages
            1337.00 -  1.5% - ffffffff80285400 : find_get_page
            1335.00 -  1.5% - ffffffff80225a23 : do_page_fault
            1299.00 -  1.5% - ffffffff802ba8e7 : __d_lookup
            1174.00 -  1.3% - ffffffff802b38f3 : __link_path_walk
            1155.00 -  1.3% - ffffffff802843e1 : perf_swcounter_ctx_event!
            1137.00 -  1.3% - ffffffff8028d118 : ____pagevec_lru_add
             963.00 -  1.1% - ffffffff802a670b : kmem_cache_alloc
             885.00 -  1.0% - ffffffff8024bc61 : __wake_up_bit

Display per function percentage along with event count.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 4f8d791..15f3a5f 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -636,16 +636,20 @@ static void print_sym_table(void)
 	int counter;
 	float events_per_sec = events/delay_secs;
 	float kevents_per_sec = (events-userspace_events)/delay_secs;
+	float sum_kevents = 0.0;
 
 	events = userspace_events = 0;
 	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
 	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
 
+	for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
+		sum_kevents += tmp[i].count[0];
+
 	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
 
 	printf(
 "------------------------------------------------------------------------------\n");
-	printf( " KernelTop:%8.0f irqs/sec  kernel:%3.1f%% [%s, ",
+	printf( " KernelTop:%8.0f irqs/sec  kernel:%4.1f%% [%s, ",
 		events_per_sec,
 		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
 		nmi ? "NMI" : "IRQ");
@@ -679,34 +683,31 @@ static void print_sym_table(void)
 	printf("------------------------------------------------------------------------------\n\n");
 
 	if (nr_counters == 1)
-		printf("             events");
+		printf("             events    pcnt");
 	else
-		printf("  weight     events");
+		printf("  weight     events    pcnt");
 
 	printf("         RIP          kernel function\n"
-	       	       "  ______     ______   ________________   _______________\n\n"
+	       	       "  ______     ______   _____   ________________   _______________\n\n"
 	);
 
-	printed = 0;
-	for (i = 0; i < sym_table_count; i++) {
+	for (i = 0, printed = 0; i < sym_table_count; i++) {
+		float pcnt;
 		int count;
 
-		if (nr_counters == 1) {
-			if (printed <= 18 &&
-					tmp[i].count[0] >= count_filter) {
-				printf("%19.2f - %016llx : %s\n",
-				  sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
-				printed++;
-			}
-		} else {
-			if (printed <= 18 &&
-					tmp[i].count[0] >= count_filter) {
-				printf("%8.1f %10ld - %016llx : %s\n",
-				  sym_weight(tmp + i),
-				  tmp[i].count[0],
-				  tmp[i].addr, tmp[i].sym);
-				printed++;
-			}
+		if (printed <= 18 && tmp[i].count[0] >= count_filter) {
+			pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
+
+			if (nr_counters == 1)
+				printf("%19.2f - %4.1f%% - %016llx : %s\n",
+					sym_weight(tmp + i),
+					pcnt, tmp[i].addr, tmp[i].sym);
+			else
+				printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
+					sym_weight(tmp + i),
+					tmp[i].count[0],
+					pcnt, tmp[i].addr, tmp[i].sym);
+			printed++;
 		}
 		/*
 		 * Add decay to the counts:
-- 
cgit v0.10.2


From 98c2aaf8be5baf7193be37fb28bce8e7327158bc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 7 Apr 2009 11:30:17 +0200
Subject: x86, perfcounters: add atomic64_xchg()

Complete atomic64_t support on the 32-bit side by adding atomic64_xch().

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090406094518.445450972@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 977250e..aff9f1f 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -291,19 +291,37 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
 }
 
 /**
- * atomic64_set - set atomic64 variable
+ * atomic64_xchg - xchg atomic64 variable
  * @ptr:      pointer to type atomic64_t
  * @new_val:  value to assign
+ * @old_val:  old value that was there
  *
- * Atomically sets the value of @ptr to @new_val.
+ * Atomically xchgs the value of @ptr to @new_val and returns
+ * the old value.
  */
-static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
+
+static inline unsigned long long
+atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
 {
 	unsigned long long old_val;
 
 	do {
 		old_val = atomic_read(ptr);
 	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
+
+	return old_val;
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr:      pointer to type atomic64_t
+ * @new_val:  value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
+{
+	atomic64_xchg(ptr, new_val);
 }
 
 /**
-- 
cgit v0.10.2


From cac94f979326212831c0ea44ed9ea1622b4f4e93 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:33 +0200
Subject: x86, bts: fix race when bts tracer is removed

When the bts tracer is removed while the traced task is running,
the write to clear the bts tracer pointer races with context switch code.

Read the tracer once during a context switch.

When a new tracer is installed, the bts tracer is set in the ds context
before the tracer is initialized in order to claim the context for that
tracer.

This may result in write accesses using an uninitialized trace configuration
when scheduling timestamps have been requested.

Store active tracing flags separately and only set active flags after
the tracing configuration has been initialized.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144548.881338000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index b1d6e1f..c730155 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -89,6 +89,9 @@ struct bts_tracer {
 
 	/* Buffer overflow notification function: */
 	bts_ovfl_callback_t	ovfl;
+
+	/* Active flags affecting trace collection. */
+	unsigned int		flags;
 };
 
 struct pebs_tracer {
@@ -799,6 +802,8 @@ void ds_suspend_bts(struct bts_tracer *tracer)
 	if (!tracer)
 		return;
 
+	tracer->flags = 0;
+
 	task = tracer->ds.context->task;
 
 	if (!task || (task == current))
@@ -820,6 +825,8 @@ void ds_resume_bts(struct bts_tracer *tracer)
 	if (!tracer)
 		return;
 
+	tracer->flags = tracer->trace.ds.flags;
+
 	task = tracer->ds.context->task;
 
 	control = ds_cfg.ctl[dsf_bts];
@@ -1037,43 +1044,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 	}
 }
 
+static inline void ds_take_timestamp(struct ds_context *context,
+				     enum bts_qualifier qualifier,
+				     struct task_struct *task)
+{
+	struct bts_tracer *tracer = context->bts_master;
+	struct bts_struct ts;
+
+	/* Prevent compilers from reading the tracer pointer twice. */
+	barrier();
+
+	if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
+		return;
+
+	memset(&ts, 0, sizeof(ts));
+	ts.qualifier			= qualifier;
+	ts.variant.timestamp.jiffies	= jiffies_64;
+	ts.variant.timestamp.pid	= task->pid;
+
+	bts_write(tracer, &ts);
+}
+
 /*
  * Change the DS configuration from tracing prev to tracing next.
  */
 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 {
-	struct ds_context *prev_ctx = prev->thread.ds_ctx;
-	struct ds_context *next_ctx = next->thread.ds_ctx;
+	struct ds_context *prev_ctx	= prev->thread.ds_ctx;
+	struct ds_context *next_ctx	= next->thread.ds_ctx;
+	unsigned long debugctlmsr	= next->thread.debugctlmsr;
+
+	/* Make sure all data is read before we start. */
+	barrier();
 
 	if (prev_ctx) {
 		update_debugctlmsr(0);
 
-		if (prev_ctx->bts_master &&
-		    (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-			struct bts_struct ts = {
-				.qualifier = bts_task_departs,
-				.variant.timestamp.jiffies = jiffies_64,
-				.variant.timestamp.pid = prev->pid
-			};
-			bts_write(prev_ctx->bts_master, &ts);
-		}
+		ds_take_timestamp(prev_ctx, bts_task_departs, prev);
 	}
 
 	if (next_ctx) {
-		if (next_ctx->bts_master &&
-		    (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-			struct bts_struct ts = {
-				.qualifier = bts_task_arrives,
-				.variant.timestamp.jiffies = jiffies_64,
-				.variant.timestamp.pid = next->pid
-			};
-			bts_write(next_ctx->bts_master, &ts);
-		}
+		ds_take_timestamp(next_ctx, bts_task_arrives, next);
 
 		wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
 	}
 
-	update_debugctlmsr(next->thread.debugctlmsr);
+	update_debugctlmsr(debugctlmsr);
 }
 
 void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
-- 
cgit v0.10.2


From a26b89f05d194413c7238e0bea071054f6b5d3c8 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:34 +0200
Subject: sched, hw-branch-tracer: add wait_task_context_switch() function to
 sched.h

Add a function to wait until some other task has been
switched out at least once.

This differs from wait_task_inactive() subtly, in that the
latter will wait until the task has left the CPU.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: markus.t.metzger@gmail.com
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144549.794157000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b94f354..a5b9a83 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1993,8 +1993,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cc1fd5..f91bc81 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2003,6 +2003,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 }
 
 /*
+ * wait_task_context_switch -	wait for a thread to complete at least one
+ *				context switch.
+ *
+ * @p must not be current.
+ */
+void wait_task_context_switch(struct task_struct *p)
+{
+	unsigned long nvcsw, nivcsw, flags;
+	int running;
+	struct rq *rq;
+
+	nvcsw	= p->nvcsw;
+	nivcsw	= p->nivcsw;
+	for (;;) {
+		/*
+		 * The runqueue is assigned before the actual context
+		 * switch. We need to take the runqueue lock.
+		 *
+		 * We could check initially without the lock but it is
+		 * very likely that we need to take the lock in every
+		 * iteration.
+		 */
+		rq = task_rq_lock(p, &flags);
+		running = task_running(rq, p);
+		task_rq_unlock(rq, &flags);
+
+		if (likely(!running))
+			break;
+		/*
+		 * The switch count is incremented before the actual
+		 * context switch. We thus wait for two switches to be
+		 * sure at least one completed.
+		 */
+		if ((p->nvcsw - nvcsw) > 1)
+			break;
+		if ((p->nivcsw - nivcsw) > 1)
+			break;
+
+		cpu_relax();
+	}
+}
+
+/*
  * wait_task_inactive - wait for a thread to unschedule.
  *
  * If @match_state is nonzero, it's the @p->state value just checked and
-- 
cgit v0.10.2


From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:35 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping

When a ptraced task is unlinked, we need to stop branch tracing for
that task.

Since the unlink is called with interrupts disabled, and we need
interrupts enabled to stop branch tracing, we defer the work.

Collect all branch tracing related stuff in a branch tracing context.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144550.712401000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 34c5237..2483807 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -458,10 +458,6 @@ struct thread_struct {
 /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
 	struct ds_context	*ds_ctx;
 #endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-	unsigned int	bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fe9345c..7c21d1e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/ftrace.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -577,17 +578,119 @@ static int ioperm_get(struct task_struct *target,
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+	/* The branch trace handle. */
+	struct bts_tracer	*tracer;
+
+	/* The buffer used to store the branch trace and its size. */
+	void			*buffer;
+	unsigned int		size;
+
+	/* The mm that paid for the above buffer. */
+	struct mm_struct	*mm;
+
+	/* The task this context belongs to. */
+	struct task_struct	*task;
+
+	/* The signal to send on a bts buffer overflow. */
+	unsigned int		bts_ovfl_signal;
+
+	/* The work struct to destroy a context. */
+	struct work_struct	work;
+};
+
+static inline void alloc_bts_buffer(struct bts_context *context,
+				    unsigned int size)
+{
+	void *buffer;
+
+	buffer = alloc_locked_buffer(size);
+	if (buffer) {
+		context->buffer = buffer;
+		context->size = size;
+		context->mm = get_task_mm(current);
+	}
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+	if (!context->buffer)
+		return;
+
+	kfree(context->buffer);
+	context->buffer = NULL;
+
+	refund_locked_buffer_memory(context->mm, context->size);
+	context->size = 0;
+
+	mmput(context->mm);
+	context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+	struct bts_context *context;
+
+	context = container_of(w, struct bts_context, work);
+
+	ds_release_bts(context->tracer);
+	put_task_struct(context->task);
+	free_bts_buffer(context);
+	kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+	INIT_WORK(&context->work, free_bts_context_work);
+	schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+	struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (context) {
+		context->task = task;
+		task->bts = context;
+
+		get_task_struct(task);
+	}
+
+	return context;
+}
+
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct bts_struct bts;
 	const unsigned char *at;
 	int error;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	at = trace->ds.top - ((index + 1) * trace->ds.size);
 	if ((void *)at < trace->ds.begin)
@@ -596,7 +699,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 	if (!trace->read)
 		return -EOPNOTSUPP;
 
-	error = trace->read(child->bts, at, &bts);
+	error = trace->read(context->tracer, at, &bts);
 	if (error < 0)
 		return error;
 
@@ -610,13 +713,18 @@ static int ptrace_bts_drain(struct task_struct *child,
 			    long size,
 			    struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	const unsigned char *at;
 	int error, drained = 0;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	if (!trace->read)
 		return -EOPNOTSUPP;
@@ -627,9 +735,8 @@ static int ptrace_bts_drain(struct task_struct *child,
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     out++, drained++, at += trace->ds.size) {
 		struct bts_struct bts;
-		int error;
 
-		error = trace->read(child->bts, at, &bts);
+		error = trace->read(context->tracer, at, &bts);
 		if (error < 0)
 			return error;
 
@@ -639,35 +746,18 @@ static int ptrace_bts_drain(struct task_struct *child,
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	error = ds_reset_bts(child->bts);
+	error = ds_reset_bts(context->tracer);
 	if (error < 0)
 		return error;
 
 	return drained;
 }
 
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
-	child->bts_buffer = alloc_locked_buffer(size);
-	if (!child->bts_buffer)
-		return -ENOMEM;
-
-	child->bts_size = size;
-
-	return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
-	free_locked_buffer(child->bts_buffer, child->bts_size);
-	child->bts_buffer = NULL;
-	child->bts_size = 0;
-}
-
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	struct ptrace_bts_config cfg;
 	unsigned int flags = 0;
 
@@ -677,28 +767,31 @@ static int ptrace_bts_config(struct task_struct *child,
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
 		return -EFAULT;
 
-	if (child->bts) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-	}
+	context = child->bts;
+	if (!context)
+		context = alloc_bts_context(child);
+	if (!context)
+		return -ENOMEM;
 
 	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 		if (!cfg.signal)
 			return -EINVAL;
 
-		child->thread.bts_ovfl_signal = cfg.signal;
 		return -EOPNOTSUPP;
+		context->bts_ovfl_signal = cfg.signal;
 	}
 
-	if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
-	    (cfg.size != child->bts_size)) {
-		int error;
+	ds_release_bts(context->tracer);
+	context->tracer = NULL;
 
-		ptrace_bts_free_buffer(child);
+	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+		free_bts_buffer(context);
+		if (!cfg.size)
+			return 0;
 
-		error = ptrace_bts_allocate_buffer(child, cfg.size);
-		if (error < 0)
-			return error;
+		alloc_bts_buffer(context, cfg.size);
+		if (!context->buffer)
+			return -ENOMEM;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -707,15 +800,13 @@ static int ptrace_bts_config(struct task_struct *child,
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		flags |= BTS_TIMESTAMPS;
 
-	child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
-				    /* ovfl = */ NULL, /* th = */ (size_t)-1,
-				    flags);
-	if (IS_ERR(child->bts)) {
-		int error = PTR_ERR(child->bts);
-
-		ptrace_bts_free_buffer(child);
-		child->bts = NULL;
+	context->tracer = ds_request_bts(child, context->buffer, context->size,
+					 NULL, (size_t)-1, flags);
+	if (unlikely(IS_ERR(context->tracer))) {
+		int error = PTR_ERR(context->tracer);
 
+		free_bts_buffer(context);
+		context->tracer = NULL;
 		return error;
 	}
 
@@ -726,20 +817,25 @@ static int ptrace_bts_status(struct task_struct *child,
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct ptrace_bts_config cfg;
 
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
-	trace = ds_read_bts(child->bts);
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(&cfg, 0, sizeof(cfg));
-	cfg.size = trace->ds.end - trace->ds.begin;
-	cfg.signal = child->thread.bts_ovfl_signal;
-	cfg.bts_size = sizeof(struct bts_struct);
+	cfg.size	= trace->ds.end - trace->ds.begin;
+	cfg.signal	= context->bts_ovfl_signal;
+	cfg.bts_size	= sizeof(struct bts_struct);
 
 	if (cfg.signal)
 		cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -758,67 +854,56 @@ static int ptrace_bts_status(struct task_struct *child,
 
 static int ptrace_bts_clear(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	return ds_reset_bts(child->bts);
+	return ds_reset_bts(context->tracer);
 }
 
 static int ptrace_bts_size(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static void ptrace_bts_fork(struct task_struct *tsk)
+static inline void ptrace_bts_fork(struct task_struct *tsk)
 {
 	tsk->bts = NULL;
-	tsk->bts_buffer = NULL;
-	tsk->bts_size = 0;
-	tsk->thread.bts_ovfl_signal = 0;
 }
 
-static void ptrace_bts_untrace(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+static inline void ptrace_bts_untrace(struct task_struct *child)
 {
 	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
+		free_bts_context(child->bts);
 		child->bts = NULL;
-
-		/* We cannot update total_vm and locked_vm since
-		   child's mm is already gone. But we can reclaim the
-		   memory. */
-		kfree(child->bts_buffer);
-		child->bts_buffer = NULL;
-		child->bts_size = 0;
 	}
 }
-
-static void ptrace_bts_detach(struct task_struct *child)
-{
-	/*
-	 * Ptrace_detach() races with ptrace_untrace() in case
-	 * the child dies and is reaped by another thread.
-	 *
-	 * We only do the memory accounting at this point and
-	 * leave the buffer deallocation and the bts tracer
-	 * release to ptrace_bts_untrace() which will be called
-	 * later on with tasklist_lock held.
-	 */
-	release_locked_buffer(child->bts_buffer, child->bts_size);
-}
 #else
 static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
 static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
@@ -843,7 +928,6 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d..64d8ed2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,6 +13,7 @@
 #include <linux/prio_tree.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
+#include <linux/sched.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1321,6 +1322,6 @@ void vmemmap_populate_print_last(void);
 
 extern void *alloc_locked_buffer(size_t size);
 extern void free_locked_buffer(void *buffer, size_t size);
-extern void release_locked_buffer(void *buffer, size_t size);
+extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5b9a83..52b8cd0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,8 +96,8 @@ struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
-struct bts_tracer;
 struct fs_struct;
+struct bts_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1210,12 +1210,7 @@ struct task_struct {
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
-	struct bts_tracer *bts;
-	/*
-	 * The buffer to hold the BTS data.
-	 */
-	void *bts_buffer;
-	size_t bts_size;
+	struct bts_context *bts;
 #endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
diff --git a/mm/mlock.c b/mm/mlock.c
index cbe9e05..749383b 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -660,21 +660,20 @@ void *alloc_locked_buffer(size_t size)
 	return buffer;
 }
 
-void release_locked_buffer(void *buffer, size_t size)
+void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
 {
 	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
-	current->mm->total_vm  -= pgsz;
-	current->mm->locked_vm -= pgsz;
+	mm->total_vm  -= pgsz;
+	mm->locked_vm -= pgsz;
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 }
 
 void free_locked_buffer(void *buffer, size_t size)
 {
-	release_locked_buffer(buffer, size);
-
+	refund_locked_buffer_memory(current->mm, size);
 	kfree(buffer);
 }
-- 
cgit v0.10.2


From 8d99b3ac2726e5edd97ad147fa5c1f2acb63a745 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:36 +0200
Subject: x86, bts: wait until traced task has been scheduled out

In order to stop branch tracing for a running task, we need to first
clear the branch tracing control bits before we may free the tracing
buffer.

If the traced task is running, the cpu might still trace that task
after the branch trace control bits have cleared.

Wait until the traced task has been scheduled out before proceeding.

A similar problem affects the task debug store context. We first remove
the context, then we need to wait until the task has been scheduled
out before we can free the context memory.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144551.919636000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index c730155..5cd137a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -299,6 +299,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
 
 static inline void ds_put_context(struct ds_context *context)
 {
+	struct task_struct *task;
 	unsigned long irq;
 
 	if (!context)
@@ -313,14 +314,20 @@ static inline void ds_put_context(struct ds_context *context)
 
 	*(context->this) = NULL;
 
-	if (context->task)
-		clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+	task = context->task;
+
+	if (task)
+		clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-	if (!context->task || (context->task == current))
+	if (!task || (task == current))
 		wrmsrl(MSR_IA32_DS_AREA, 0);
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
+	/* The context might still be in use for context switching. */
+	if (task && (task != current))
+		wait_task_context_switch(task);
+
 	kfree(context);
 }
 
@@ -781,15 +788,23 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 
 void ds_release_bts(struct bts_tracer *tracer)
 {
+	struct task_struct *task;
+
 	if (!tracer)
 		return;
 
+	task = tracer->ds.context->task;
+
 	ds_suspend_bts(tracer);
 
 	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
 	tracer->ds.context->bts_master = NULL;
 
-	put_tracer(tracer->ds.context->task);
+	/* Make sure tracing stopped and the tracer is not in use. */
+	if (task && (task != current))
+		wait_task_context_switch(task);
+
+	put_tracer(task);
 	ds_put_context(tracer->ds.context);
 
 	kfree(tracer);
-- 
cgit v0.10.2


From 38f801129ad07b9afa7f9bd3779f61b805416d8c Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:37 +0200
Subject: x86, bts: fix race between per-task and per-cpu branch tracing

Per-task branch tracing installs a debug store context with the traced
task. This immediately results in the branch trace control bits to be
cleared for the next context switch of that task, if not set before.

Either per-cpu or per-task tracing are allowed at the same time.

An active per-cpu tracing would be disabled even if the per-task tracing
request is rejected and the task debug store context removed.

Check the tracing type (per-cpu or per-task) before installing a task
debug store context.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144552.856000000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 5cd137a..f03f117 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -193,12 +193,28 @@ static DEFINE_SPINLOCK(ds_lock);
  */
 static atomic_t tracers = ATOMIC_INIT(0);
 
-static inline void get_tracer(struct task_struct *task)
+static inline int get_tracer(struct task_struct *task)
 {
-	if (task)
+	int error;
+
+	spin_lock_irq(&ds_lock);
+
+	if (task) {
+		error = -EPERM;
+		if (atomic_read(&tracers) < 0)
+			goto out;
 		atomic_inc(&tracers);
-	else
+	} else {
+		error = -EPERM;
+		if (atomic_read(&tracers) > 0)
+			goto out;
 		atomic_dec(&tracers);
+	}
+
+	error = 0;
+out:
+	spin_unlock_irq(&ds_lock);
+	return error;
 }
 
 static inline void put_tracer(struct task_struct *task)
@@ -209,14 +225,6 @@ static inline void put_tracer(struct task_struct *task)
 		atomic_inc(&tracers);
 }
 
-static inline int check_tracer(struct task_struct *task)
-{
-	return task ?
-		(atomic_read(&tracers) >= 0) :
-		(atomic_read(&tracers) <= 0);
-}
-
-
 /*
  * The DS context is either attached to a thread or to a cpu:
  * - in the former case, the thread_struct contains a pointer to the
@@ -677,6 +685,10 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	if (ovfl)
 		goto out;
 
+	error = get_tracer(task);
+	if (error < 0)
+		goto out;
+
 	/*
 	 * Per-cpu tracing is typically requested using smp_call_function().
 	 * We must not sleep.
@@ -684,7 +696,7 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	error = -ENOMEM;
 	tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
 	if (!tracer)
-		goto out;
+		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
@@ -696,13 +708,8 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	spin_lock_irqsave(&ds_lock, irq);
 
 	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
-
-	error = -EPERM;
 	if (tracer->ds.context->bts_master)
-		goto out_put_tracer;
+		goto out_unlock;
 	tracer->ds.context->bts_master = tracer;
 
 	spin_unlock_irqrestore(&ds_lock, irq);
@@ -716,13 +723,13 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 
 	return tracer;
 
- out_put_tracer:
-	put_tracer(task);
  out_unlock:
 	spin_unlock_irqrestore(&ds_lock, irq);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
+ out_put_tracer:
+	put_tracer(task);
  out:
 	return ERR_PTR(error);
 }
@@ -741,6 +748,10 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	if (ovfl)
 		goto out;
 
+	error = get_tracer(task);
+	if (error < 0)
+		goto out;
+
 	/*
 	 * Per-cpu tracing is typically requested using smp_call_function().
 	 * We must not sleep.
@@ -748,7 +759,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	error = -ENOMEM;
 	tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
 	if (!tracer)
-		goto out;
+		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
@@ -759,13 +770,8 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	spin_lock_irqsave(&ds_lock, irq);
 
 	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
-
-	error = -EPERM;
 	if (tracer->ds.context->pebs_master)
-		goto out_put_tracer;
+		goto out_unlock;
 	tracer->ds.context->pebs_master = tracer;
 
 	spin_unlock_irqrestore(&ds_lock, irq);
@@ -775,13 +781,13 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 
 	return tracer;
 
- out_put_tracer:
-	put_tracer(task);
  out_unlock:
 	spin_unlock_irqrestore(&ds_lock, irq);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
+ out_put_tracer:
+	put_tracer(task);
  out:
 	return ERR_PTR(error);
 }
@@ -804,8 +810,8 @@ void ds_release_bts(struct bts_tracer *tracer)
 	if (task && (task != current))
 		wait_task_context_switch(task);
 
-	put_tracer(task);
 	ds_put_context(tracer->ds.context);
+	put_tracer(task);
 
 	kfree(tracer);
 }
@@ -861,16 +867,20 @@ void ds_resume_bts(struct bts_tracer *tracer)
 
 void ds_release_pebs(struct pebs_tracer *tracer)
 {
+	struct task_struct *task;
+
 	if (!tracer)
 		return;
 
+	task = tracer->ds.context->task;
+
 	ds_suspend_pebs(tracer);
 
 	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
 	tracer->ds.context->pebs_master = NULL;
 
-	put_tracer(tracer->ds.context->task);
 	ds_put_context(tracer->ds.context);
+	put_tracer(task);
 
 	kfree(tracer);
 }
-- 
cgit v0.10.2


From 15879d042164650b93d83281ad5f87ad323bfbfe Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:38 +0200
Subject: x86, bts: use trace_clock_global() for timestamps

Rename the bts_struct timestamp field to event.

Use trace_clock_global() for time measurement.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144553.773216000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a8f672b..772f141 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -170,9 +170,9 @@ struct bts_struct {
 		} lbr;
 		/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
 		struct {
-			__u64 jiffies;
+			__u64 clock;
 			pid_t pid;
-		} timestamp;
+		} event;
 	} variant;
 };
 
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index f03f117..2071b99 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -25,6 +25,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/trace_clock.h>
 
 #include <asm/ds.h>
 
@@ -471,7 +472,7 @@ enum bts_field {
 	bts_flags,
 
 	bts_qual		= bts_from,
-	bts_jiffies		= bts_to,
+	bts_clock		= bts_to,
 	bts_pid			= bts_flags,
 
 	bts_qual_mask		= (bts_qual_max - 1),
@@ -517,8 +518,8 @@ bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
 	memset(out, 0, sizeof(*out));
 	if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
 		out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
-		out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
-		out->variant.timestamp.pid = bts_get(at, bts_pid);
+		out->variant.event.clock = bts_get(at, bts_clock);
+		out->variant.event.pid = bts_get(at, bts_pid);
 	} else {
 		out->qualifier = bts_branch;
 		out->variant.lbr.from = bts_get(at, bts_from);
@@ -555,8 +556,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
 	case bts_task_arrives:
 	case bts_task_departs:
 		bts_set(raw, bts_qual, (bts_escape | in->qualifier));
-		bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
-		bts_set(raw, bts_pid, in->variant.timestamp.pid);
+		bts_set(raw, bts_clock, in->variant.event.clock);
+		bts_set(raw, bts_pid, in->variant.event.pid);
 		break;
 	default:
 		return -EINVAL;
@@ -1083,9 +1084,9 @@ static inline void ds_take_timestamp(struct ds_context *context,
 		return;
 
 	memset(&ts, 0, sizeof(ts));
-	ts.qualifier			= qualifier;
-	ts.variant.timestamp.jiffies	= jiffies_64;
-	ts.variant.timestamp.pid	= task->pid;
+	ts.qualifier		= qualifier;
+	ts.variant.event.clock	= trace_clock_global();
+	ts.variant.event.pid	= task->pid;
 
 	bts_write(tracer, &ts);
 }
-- 
cgit v0.10.2


From 35bb7600c17762bb129588c1877d2717fe325289 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:39 +0200
Subject: x86, debugctlmsr: add _on_cpu variants to debugctlmsr functions

Add functions to get and set the debugctlmsr on different cpus.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144554.738772000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2483807..1efeb49 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -785,6 +785,21 @@ static inline unsigned long get_debugctlmsr(void)
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+	u64 debugctlmsr = 0;
+	u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
+#endif
+	rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+	debugctlmsr = val1 | ((u64)val2 << 32);
+
+	return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -794,6 +809,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+					     unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return;
+#endif
+	wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+		     (u32)((u64)debugctlmsr),
+		     (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
-- 
cgit v0.10.2


From de79f54f5347ad7ec6ff55ccbb6d4ab2a21f6a93 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:40 +0200
Subject: x86, bts, hw-branch-tracer: add _noirq variants to the debug store
 interface

The hw-branch-tracer uses debug store functions from an on_each_cpu()
context, which is simply wrong since the functions may sleep.

Add _noirq variants for most functions, which  may be called with
interrupts disabled.

Separate per-cpu and per-task tracing and allow per-cpu tracing to be
controlled from any cpu.

Make the hw-branch-tracer use the new debug store interface, synchronize
with hotplug cpu event using get/put_online_cpus(), and remove the
unnecessary spinlock.

Make the ptrace bts and the ds selftest code use the new interface.

Defer the ds selftest.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144555.658136000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 772f141..413e127 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -15,8 +15,8 @@
  * - buffer allocation (memory accounting)
  *
  *
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
 #ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@ enum ds_feature {
  * The interrupt threshold is independent from the overflow callback
  * to allow users to use their own overflow interrupt handling mechanism.
  *
- * task: the task to request recording for;
- *       NULL for per-cpu recording on the current cpu
+ * The function might sleep.
+ *
+ * task: the task to request recording for
+ * cpu:  the cpu to request recording for
  * base: the base pointer for the (non-pageable) buffer;
  * size: the size of the provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@ enum ds_feature {
  *     -1 if no interrupt threshold is requested.
  * flags: a bit-mask of the above flags
  */
-extern struct bts_tracer *ds_request_bts(struct task_struct *task,
-					 void *base, size_t size,
-					 bts_ovfl_callback_t ovfl,
-					 size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-					   void *base, size_t size,
-					   pebs_ovfl_callback_t ovfl,
-					   size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+					      void *base, size_t size,
+					      bts_ovfl_callback_t ovfl,
+					      size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+					     bts_ovfl_callback_t ovfl,
+					     size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+						void *base, size_t size,
+						pebs_ovfl_callback_t ovfl,
+						size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
+					       void *base, size_t size,
+					       pebs_ovfl_callback_t ovfl,
+					       size_t th, unsigned int flags);
 
 /*
  * Release BTS or PEBS resources
  * Suspend and resume BTS or PEBS tracing
  *
+ * Must be called with irq's enabled.
+ *
  * tracer: the tracer handle returned from ds_request_~()
  */
 extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
 extern void ds_suspend_pebs(struct pebs_tracer *tracer);
 extern void ds_resume_pebs(struct pebs_tracer *tracer);
 
+/*
+ * Release BTS or PEBS resources
+ * Suspend and resume BTS or PEBS tracing
+ *
+ * Cpu tracers must call this on the traced cpu.
+ * Task tracers must call ds_release_~_noirq() for themselves.
+ *
+ * May be called with irq's disabled.
+ *
+ * Returns 0 if successful;
+ * -EPERM if the cpu tracer does not trace the current cpu.
+ * -EPERM if the task tracer does not trace itself.
+ *
+ * tracer: the tracer handle returned from ds_request_~()
+ */
+extern int ds_release_bts_noirq(struct bts_tracer *tracer);
+extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
+extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
+extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
+
 
 /*
  * The raw DS buffer state as it is used for BTS and PEBS recording.
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2071b99..21a3852 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -245,60 +245,50 @@ struct ds_context {
 	struct pebs_tracer	*pebs_master;
 
 	/* Use count: */
-	unsigned long count;
+	unsigned long		count;
 
 	/* Pointer to the context pointer field: */
 	struct ds_context	**this;
 
-	/* The traced task; NULL for current cpu: */
+	/* The traced task; NULL for cpu tracing: */
 	struct task_struct	*task;
-};
 
-static DEFINE_PER_CPU(struct ds_context *, system_context_array);
+	/* The traced cpu; only valid if task is NULL: */
+	int			cpu;
+};
 
-#define system_context per_cpu(system_context_array, smp_processor_id())
+static DEFINE_PER_CPU(struct ds_context *, cpu_context);
 
 
-static inline struct ds_context *ds_get_context(struct task_struct *task)
+static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
 {
 	struct ds_context **p_context =
-		(task ? &task->thread.ds_ctx : &system_context);
+		(task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
 	struct ds_context *context = NULL;
 	struct ds_context *new_context = NULL;
-	unsigned long irq;
 
-	/*
-	 * Chances are small that we already have a context.
-	 *
-	 * Contexts for per-cpu tracing are allocated using
-	 * smp_call_function(). We must not sleep.
-	 */
-	new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC);
+	/* Chances are small that we already have a context. */
+	new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
 	if (!new_context)
 		return NULL;
 
-	spin_lock_irqsave(&ds_lock, irq);
+	spin_lock_irq(&ds_lock);
 
 	context = *p_context;
-	if (!context) {
+	if (likely(!context)) {
 		context = new_context;
 
 		context->this = p_context;
 		context->task = task;
+		context->cpu = cpu;
 		context->count = 0;
 
-		if (task)
-			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
-		if (!task || (task == current))
-			wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
-
 		*p_context = context;
 	}
 
 	context->count++;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
 	if (context != new_context)
 		kfree(new_context);
@@ -306,7 +296,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
 	return context;
 }
 
-static inline void ds_put_context(struct ds_context *context)
+static void ds_put_context(struct ds_context *context)
 {
 	struct task_struct *task;
 	unsigned long irq;
@@ -328,8 +318,15 @@ static inline void ds_put_context(struct ds_context *context)
 	if (task)
 		clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-	if (!task || (task == current))
-		wrmsrl(MSR_IA32_DS_AREA, 0);
+	/*
+	 * We leave the (now dangling) pointer to the DS configuration in
+	 * the DS_AREA msr. This is as good or as bad as replacing it with
+	 * NULL - the hardware would crash if we enabled tracing.
+	 *
+	 * This saves us some problems with having to write an msr on a
+	 * different cpu while preventing others from doing the same for the
+	 * next context for that same cpu.
+	 */
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
@@ -340,6 +337,31 @@ static inline void ds_put_context(struct ds_context *context)
 	kfree(context);
 }
 
+static void ds_install_ds_area(struct ds_context *context)
+{
+	unsigned long ds;
+
+	ds = (unsigned long)context->ds;
+
+	/*
+	 * There is a race between the bts master and the pebs master.
+	 *
+	 * The thread/cpu access is synchronized via get/put_cpu() for
+	 * task tracing and via wrmsr_on_cpu for cpu tracing.
+	 *
+	 * If bts and pebs are collected for the same task or same cpu,
+	 * the same confiuration is written twice.
+	 */
+	if (context->task) {
+		get_cpu();
+		if (context->task == current)
+			wrmsrl(MSR_IA32_DS_AREA, ds);
+		set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+		put_cpu();
+	} else
+		wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
+			     (u32)((u64)ds), (u32)((u64)ds >> 32));
+}
 
 /*
  * Call the tracer's callback on a buffer overflow.
@@ -622,6 +644,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 	 * The value for 'no threshold' is -1, which will set the
 	 * threshold outside of the buffer, just like we want it.
 	 */
+	ith *= ds_cfg.sizeof_rec[qual];
 	trace->ith = (void *)(buffer + size - ith);
 
 	trace->flags = flags;
@@ -630,7 +653,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 
 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 		      enum ds_qualifier qual, struct task_struct *task,
-		      void *base, size_t size, size_t th, unsigned int flags)
+		      int cpu, void *base, size_t size, size_t th)
 {
 	struct ds_context *context;
 	int error;
@@ -643,7 +666,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 	if (!base)
 		goto out;
 
-	/* We require some space to do alignment adjustments below. */
+	/* We need space for alignment adjustments in ds_init_ds_trace(). */
 	error = -EINVAL;
 	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
 		goto out;
@@ -660,25 +683,27 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 	tracer->size = size;
 
 	error = -ENOMEM;
-	context = ds_get_context(task);
+	context = ds_get_context(task, cpu);
 	if (!context)
 		goto out;
 	tracer->context = context;
 
-	ds_init_ds_trace(trace, qual, base, size, th, flags);
+	/*
+	 * Defer any tracer-specific initialization work for the context until
+	 * context ownership has been clarified.
+	 */
 
 	error = 0;
  out:
 	return error;
 }
 
-struct bts_tracer *ds_request_bts(struct task_struct *task,
-				  void *base, size_t size,
-				  bts_ovfl_callback_t ovfl, size_t th,
-				  unsigned int flags)
+static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
+					 void *base, size_t size,
+					 bts_ovfl_callback_t ovfl, size_t th,
+					 unsigned int flags)
 {
 	struct bts_tracer *tracer;
-	unsigned long irq;
 	int error;
 
 	/* Buffer overflow notification is not yet implemented. */
@@ -690,42 +715,46 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	if (error < 0)
 		goto out;
 
-	/*
-	 * Per-cpu tracing is typically requested using smp_call_function().
-	 * We must not sleep.
-	 */
 	error = -ENOMEM;
-	tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
+	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 	if (!tracer)
 		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
+	/* Do some more error checking and acquire a tracing context. */
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_bts, task, base, size, th, flags);
+			   ds_bts, task, cpu, base, size, th);
 	if (error < 0)
 		goto out_tracer;
 
-
-	spin_lock_irqsave(&ds_lock, irq);
+	/* Claim the bts part of the tracing context we acquired above. */
+	spin_lock_irq(&ds_lock);
 
 	error = -EPERM;
 	if (tracer->ds.context->bts_master)
 		goto out_unlock;
 	tracer->ds.context->bts_master = tracer;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
+	/*
+	 * Now that we own the bts part of the context, let's complete the
+	 * initialization for that part.
+	 */
+	ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_install_ds_area(tracer->ds.context);
 
 	tracer->trace.read  = bts_read;
 	tracer->trace.write = bts_write;
 
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	/* Start tracing. */
 	ds_resume_bts(tracer);
 
 	return tracer;
 
  out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
@@ -735,13 +764,27 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	return ERR_PTR(error);
 }
 
-struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-				    void *base, size_t size,
-				    pebs_ovfl_callback_t ovfl, size_t th,
-				    unsigned int flags)
+struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+				       void *base, size_t size,
+				       bts_ovfl_callback_t ovfl,
+				       size_t th, unsigned int flags)
+{
+	return ds_request_bts(task, 0, base, size, ovfl, th, flags);
+}
+
+struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+				      bts_ovfl_callback_t ovfl,
+				      size_t th, unsigned int flags)
+{
+	return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
+					   void *base, size_t size,
+					   pebs_ovfl_callback_t ovfl, size_t th,
+					   unsigned int flags)
 {
 	struct pebs_tracer *tracer;
-	unsigned long irq;
 	int error;
 
 	/* Buffer overflow notification is not yet implemented. */
@@ -753,37 +796,43 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	if (error < 0)
 		goto out;
 
-	/*
-	 * Per-cpu tracing is typically requested using smp_call_function().
-	 * We must not sleep.
-	 */
 	error = -ENOMEM;
-	tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC);
+	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
 	if (!tracer)
 		goto out_put_tracer;
 	tracer->ovfl = ovfl;
 
+	/* Do some more error checking and acquire a tracing context. */
 	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_pebs, task, base, size, th, flags);
+			   ds_pebs, task, cpu, base, size, th);
 	if (error < 0)
 		goto out_tracer;
 
-	spin_lock_irqsave(&ds_lock, irq);
+	/* Claim the pebs part of the tracing context we acquired above. */
+	spin_lock_irq(&ds_lock);
 
 	error = -EPERM;
 	if (tracer->ds.context->pebs_master)
 		goto out_unlock;
 	tracer->ds.context->pebs_master = tracer;
 
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 
+	/*
+	 * Now that we own the pebs part of the context, let's complete the
+	 * initialization for that part.
+	 */
+	ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
 	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
+	ds_install_ds_area(tracer->ds.context);
+
+	/* Start tracing. */
 	ds_resume_pebs(tracer);
 
 	return tracer;
 
  out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
+	spin_unlock_irq(&ds_lock);
 	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
@@ -793,16 +842,26 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	return ERR_PTR(error);
 }
 
-void ds_release_bts(struct bts_tracer *tracer)
+struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+					 void *base, size_t size,
+					 pebs_ovfl_callback_t ovfl,
+					 size_t th, unsigned int flags)
 {
-	struct task_struct *task;
+	return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
+}
 
-	if (!tracer)
-		return;
+struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
+					pebs_ovfl_callback_t ovfl,
+					size_t th, unsigned int flags)
+{
+	return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
+}
 
-	task = tracer->ds.context->task;
+static void ds_free_bts(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
 
-	ds_suspend_bts(tracer);
+	task = tracer->ds.context->task;
 
 	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
 	tracer->ds.context->bts_master = NULL;
@@ -817,9 +876,69 @@ void ds_release_bts(struct bts_tracer *tracer)
 	kfree(tracer);
 }
 
+void ds_release_bts(struct bts_tracer *tracer)
+{
+	might_sleep();
+
+	if (!tracer)
+		return;
+
+	ds_suspend_bts(tracer);
+	ds_free_bts(tracer);
+}
+
+int ds_release_bts_noirq(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long irq;
+	int error;
+
+	if (!tracer)
+		return 0;
+
+	task = tracer->ds.context->task;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task &&
+	    (tracer->ds.context->cpu != smp_processor_id()))
+		goto out;
+
+	error = -EPERM;
+	if (task && (task != current))
+		goto out;
+
+	ds_suspend_bts_noirq(tracer);
+	ds_free_bts(tracer);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static void update_task_debugctlmsr(struct task_struct *task,
+				    unsigned long debugctlmsr)
+{
+	task->thread.debugctlmsr = debugctlmsr;
+
+	get_cpu();
+	if (task == current)
+		update_debugctlmsr(debugctlmsr);
+
+	if (task->thread.debugctlmsr)
+		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
+	else
+		clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
+	put_cpu();
+}
+
 void ds_suspend_bts(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
+	unsigned long debugctlmsr;
+	int cpu;
 
 	if (!tracer)
 		return;
@@ -827,29 +946,60 @@ void ds_suspend_bts(struct bts_tracer *tracer)
 	tracer->flags = 0;
 
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
 
-	if (!task || (task == current))
-		update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
+	WARN_ON(!task && irqs_disabled());
 
-	if (task) {
-		task->thread.debugctlmsr &= ~BTS_CONTROL;
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr_on_cpu(cpu));
+	debugctlmsr &= ~BTS_CONTROL;
 
-		if (!task->thread.debugctlmsr)
-			clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	}
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
 }
 
-void ds_resume_bts(struct bts_tracer *tracer)
+int ds_suspend_bts_noirq(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
-	unsigned long control;
+	unsigned long debugctlmsr, irq;
+	int cpu, error = 0;
 
 	if (!tracer)
-		return;
+		return 0;
 
-	tracer->flags = tracer->trace.ds.flags;
+	tracer->flags = 0;
 
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task && (cpu != smp_processor_id()))
+		goto out;
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr());
+	debugctlmsr &= ~BTS_CONTROL;
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr(debugctlmsr);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static unsigned long ds_bts_control(struct bts_tracer *tracer)
+{
+	unsigned long control;
 
 	control = ds_cfg.ctl[dsf_bts];
 	if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -857,25 +1007,77 @@ void ds_resume_bts(struct bts_tracer *tracer)
 	if (!(tracer->trace.ds.flags & BTS_USER))
 		control |= ds_cfg.ctl[dsf_bts_user];
 
-	if (task) {
-		task->thread.debugctlmsr |= control;
-		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	}
-
-	if (!task || (task == current))
-		update_debugctlmsr(get_debugctlmsr() | control);
+	return control;
 }
 
-void ds_release_pebs(struct pebs_tracer *tracer)
+void ds_resume_bts(struct bts_tracer *tracer)
 {
 	struct task_struct *task;
+	unsigned long debugctlmsr;
+	int cpu;
 
 	if (!tracer)
 		return;
 
+	tracer->flags = tracer->trace.ds.flags;
+
 	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
 
-	ds_suspend_pebs(tracer);
+	WARN_ON(!task && irqs_disabled());
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr_on_cpu(cpu));
+	debugctlmsr |= ds_bts_control(tracer);
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
+}
+
+int ds_resume_bts_noirq(struct bts_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long debugctlmsr, irq;
+	int cpu, error = 0;
+
+	if (!tracer)
+		return 0;
+
+	tracer->flags = tracer->trace.ds.flags;
+
+	task = tracer->ds.context->task;
+	cpu  = tracer->ds.context->cpu;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task && (cpu != smp_processor_id()))
+		goto out;
+
+	debugctlmsr = (task ?
+		       task->thread.debugctlmsr :
+		       get_debugctlmsr());
+	debugctlmsr |= ds_bts_control(tracer);
+
+	if (task)
+		update_task_debugctlmsr(task, debugctlmsr);
+	else
+		update_debugctlmsr(debugctlmsr);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
+static void ds_free_pebs(struct pebs_tracer *tracer)
+{
+	struct task_struct *task;
+
+	task = tracer->ds.context->task;
 
 	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
 	tracer->ds.context->pebs_master = NULL;
@@ -886,16 +1088,68 @@ void ds_release_pebs(struct pebs_tracer *tracer)
 	kfree(tracer);
 }
 
+void ds_release_pebs(struct pebs_tracer *tracer)
+{
+	might_sleep();
+
+	if (!tracer)
+		return;
+
+	ds_suspend_pebs(tracer);
+	ds_free_pebs(tracer);
+}
+
+int ds_release_pebs_noirq(struct pebs_tracer *tracer)
+{
+	struct task_struct *task;
+	unsigned long irq;
+	int error;
+
+	if (!tracer)
+		return 0;
+
+	task = tracer->ds.context->task;
+
+	local_irq_save(irq);
+
+	error = -EPERM;
+	if (!task &&
+	    (tracer->ds.context->cpu != smp_processor_id()))
+		goto out;
+
+	error = -EPERM;
+	if (task && (task != current))
+		goto out;
+
+	ds_suspend_pebs_noirq(tracer);
+	ds_free_pebs(tracer);
+
+	error = 0;
+ out:
+	local_irq_restore(irq);
+	return error;
+}
+
 void ds_suspend_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
+{
+	return 0;
+}
+
 void ds_resume_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
+{
+	return 0;
+}
+
 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 {
 	if (!tracer)
@@ -1004,26 +1258,6 @@ ds_configure(const struct ds_configuration *cfg,
 		printk(KERN_INFO "[ds] pebs not available\n");
 	}
 
-	if (ds_cfg.sizeof_rec[ds_bts]) {
-		int error;
-
-		error = ds_selftest_bts();
-		if (error) {
-			WARN(1, "[ds] selftest failed. disabling bts.\n");
-			ds_cfg.sizeof_rec[ds_bts] = 0;
-		}
-	}
-
-	if (ds_cfg.sizeof_rec[ds_pebs]) {
-		int error;
-
-		error = ds_selftest_pebs();
-		if (error) {
-			WARN(1, "[ds] selftest failed. disabling pebs.\n");
-			ds_cfg.sizeof_rec[ds_pebs] = 0;
-		}
-	}
-
 	printk(KERN_INFO "[ds] sizes: address: %u bit, ",
 	       8 * ds_cfg.sizeof_ptr_field);
 	printk("bts/pebs record: %u/%u bytes\n",
@@ -1127,3 +1361,29 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
 void ds_exit_thread(struct task_struct *tsk)
 {
 }
+
+static __init int ds_selftest(void)
+{
+	if (ds_cfg.sizeof_rec[ds_bts]) {
+		int error;
+
+		error = ds_selftest_bts();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling bts.\n");
+			ds_cfg.sizeof_rec[ds_bts] = 0;
+		}
+	}
+
+	if (ds_cfg.sizeof_rec[ds_pebs]) {
+		int error;
+
+		error = ds_selftest_pebs();
+		if (error) {
+			WARN(1, "[ds] selftest failed. disabling pebs.\n");
+			ds_cfg.sizeof_rec[ds_pebs] = 0;
+		}
+	}
+
+	return 0;
+}
+device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index 8c46fbf..e5a263c 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -10,11 +10,12 @@
 
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/smp.h>
 
 #include <asm/ds.h>
 
 
-#define DS_SELFTEST_BUFFER_SIZE 1021 /* Intentionally chose an odd size. */
+#define BUFFER_SIZE 1021 /* Intentionally chose an odd size. */
 
 
 static int ds_selftest_bts_consistency(const struct bts_trace *trace)
@@ -125,12 +126,12 @@ int ds_selftest_bts(void)
 	struct bts_tracer *tracer;
 	int error = 0;
 	void *top;
-	unsigned char buffer[DS_SELFTEST_BUFFER_SIZE];
+	unsigned char buffer[BUFFER_SIZE];
 
 	printk(KERN_INFO "[ds] bts selftest...");
 
-	tracer = ds_request_bts(NULL, buffer, DS_SELFTEST_BUFFER_SIZE,
-				NULL, (size_t)-1, BTS_KERNEL);
+	tracer = ds_request_bts_cpu(smp_processor_id(), buffer, BUFFER_SIZE,
+				    NULL, (size_t)-1, BTS_KERNEL);
 	if (IS_ERR(tracer)) {
 		error = PTR_ERR(tracer);
 		tracer = NULL;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7c21d1e..adbb243 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -800,8 +800,9 @@ static int ptrace_bts_config(struct task_struct *child,
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		flags |= BTS_TIMESTAMPS;
 
-	context->tracer = ds_request_bts(child, context->buffer, context->size,
-					 NULL, (size_t)-1, flags);
+	context->tracer =
+		ds_request_bts_task(child, context->buffer, context->size,
+				    NULL, (size_t)-1, flags);
 	if (unlikely(IS_ERR(context->tracer))) {
 		int error = PTR_ERR(context->tracer);
 
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 8b2109a..50565d8 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -4,7 +4,6 @@
  * Copyright (C) 2008-2009 Intel Corporation.
  * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
  */
-#include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
@@ -21,168 +20,113 @@
 
 #define BTS_BUFFER_SIZE (1 << 13)
 
-/*
- * The tracer lock protects the below per-cpu tracer array.
- * It needs to be held to:
- * - start tracing on all cpus
- * - stop tracing on all cpus
- * - start tracing on a single hotplug cpu
- * - stop tracing on a single hotplug cpu
- * - read the trace from all cpus
- * - read the trace from a single cpu
- */
-static DEFINE_SPINLOCK(bts_tracer_lock);
 static DEFINE_PER_CPU(struct bts_tracer *, tracer);
 static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
 
 #define this_tracer per_cpu(tracer, smp_processor_id())
-#define this_buffer per_cpu(buffer, smp_processor_id())
 
 static int trace_hw_branches_enabled __read_mostly;
 static int trace_hw_branches_suspended __read_mostly;
 static struct trace_array *hw_branch_trace __read_mostly;
 
 
-/*
- * Initialize the tracer for the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_init_cpu(void *arg)
+static void bts_trace_init_cpu(int cpu)
 {
-	if (this_tracer)
-		ds_release_bts(this_tracer);
+	per_cpu(tracer, cpu) =
+		ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
+				   NULL, (size_t)-1, BTS_KERNEL);
 
-	this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE,
-				     NULL, (size_t)-1, BTS_KERNEL);
-	if (IS_ERR(this_tracer)) {
-		this_tracer = NULL;
-		return;
-	}
+	if (IS_ERR(per_cpu(tracer, cpu)))
+		per_cpu(tracer, cpu) = NULL;
 }
 
 static int bts_trace_init(struct trace_array *tr)
 {
-	int cpu, avail;
-
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
 	hw_branch_trace = tr;
+	trace_hw_branches_enabled = 0;
 
-	on_each_cpu(bts_trace_init_cpu, NULL, 1);
-
-	/* Check on how many cpus we could enable tracing */
-	avail = 0;
-	for_each_online_cpu(cpu)
-		if (per_cpu(tracer, cpu))
-			avail++;
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		bts_trace_init_cpu(cpu);
 
-	trace_hw_branches_enabled = (avail ? 1 : 0);
+		if (likely(per_cpu(tracer, cpu)))
+			trace_hw_branches_enabled = 1;
+	}
 	trace_hw_branches_suspended = 0;
-
-	spin_unlock(&bts_tracer_lock);
-
+	put_online_cpus();
 
 	/* If we could not enable tracing on a single cpu, we fail. */
-	return avail ? 0 : -EOPNOTSUPP;
-}
-
-/*
- * Release the tracer for the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_release_cpu(void *arg)
-{
-	if (this_tracer) {
-		ds_release_bts(this_tracer);
-		this_tracer = NULL;
-	}
+	return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
 }
 
 static void bts_trace_reset(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	on_each_cpu(bts_trace_release_cpu, NULL, 1);
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (likely(per_cpu(tracer, cpu))) {
+			ds_release_bts(per_cpu(tracer, cpu));
+			per_cpu(tracer, cpu) = NULL;
+		}
+	}
 	trace_hw_branches_enabled = 0;
 	trace_hw_branches_suspended = 0;
-
-	spin_unlock(&bts_tracer_lock);
-}
-
-/*
- * Resume tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_resume_cpu(void *arg)
-{
-	if (this_tracer)
-		ds_resume_bts(this_tracer);
+	put_online_cpus();
 }
 
 static void bts_trace_start(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	on_each_cpu(bts_trace_resume_cpu, NULL, 1);
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_resume_bts(per_cpu(tracer, cpu));
 	trace_hw_branches_suspended = 0;
-
-	spin_unlock(&bts_tracer_lock);
-}
-
-/*
- * Suspend tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_suspend_cpu(void *arg)
-{
-	if (this_tracer)
-		ds_suspend_bts(this_tracer);
+	put_online_cpus();
 }
 
 static void bts_trace_stop(struct trace_array *tr)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
-	on_each_cpu(bts_trace_suspend_cpu, NULL, 1);
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_suspend_bts(per_cpu(tracer, cpu));
 	trace_hw_branches_suspended = 1;
-
-	spin_unlock(&bts_tracer_lock);
+	put_online_cpus();
 }
 
 static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
 				     unsigned long action, void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
-
-	spin_lock(&bts_tracer_lock);
-
-	if (!trace_hw_branches_enabled)
-		goto out;
+	int cpu = (long)hcpu;
 
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
-		smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1);
-
-		if (trace_hw_branches_suspended)
-			smp_call_function_single(cpu, bts_trace_suspend_cpu,
-						 NULL, 1);
+		/* The notification is sent with interrupts enabled. */
+		if (trace_hw_branches_enabled) {
+			bts_trace_init_cpu(cpu);
+
+			if (trace_hw_branches_suspended &&
+			    likely(per_cpu(tracer, cpu)))
+				ds_suspend_bts(per_cpu(tracer, cpu));
+		}
 		break;
+
 	case CPU_DOWN_PREPARE:
-		smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1);
-		break;
+		/* The notification is sent with interrupts enabled. */
+		if (likely(per_cpu(tracer, cpu))) {
+			ds_release_bts(per_cpu(tracer, cpu));
+			per_cpu(tracer, cpu) = NULL;
+		}
 	}
 
- out:
-	spin_unlock(&bts_tracer_lock);
 	return NOTIFY_DONE;
 }
 
@@ -274,7 +218,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
 /*
  * Collect the trace on the current cpu and write it into the ftrace buffer.
  *
- * pre: bts_tracer_lock must be locked
+ * pre: tracing must be suspended on the current cpu
  */
 static void trace_bts_cpu(void *arg)
 {
@@ -291,10 +235,9 @@ static void trace_bts_cpu(void *arg)
 	if (unlikely(!this_tracer))
 		return;
 
-	ds_suspend_bts(this_tracer);
 	trace = ds_read_bts(this_tracer);
 	if (!trace)
-		goto out;
+		return;
 
 	for (at = trace->ds.top; (void *)at < trace->ds.end;
 	     at += trace->ds.size)
@@ -303,18 +246,27 @@ static void trace_bts_cpu(void *arg)
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     at += trace->ds.size)
 		trace_bts_at(trace, at);
-
-out:
-	ds_resume_bts(this_tracer);
 }
 
 static void trace_bts_prepare(struct trace_iterator *iter)
 {
-	spin_lock(&bts_tracer_lock);
+	int cpu;
 
+	get_online_cpus();
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_suspend_bts(per_cpu(tracer, cpu));
+	/*
+	 * We need to collect the trace on the respective cpu since ftrace
+	 * implicitly adds the record for the current cpu.
+	 * Once that is more flexible, we could collect the data from any cpu.
+	 */
 	on_each_cpu(trace_bts_cpu, iter->tr, 1);
 
-	spin_unlock(&bts_tracer_lock);
+	for_each_online_cpu(cpu)
+		if (likely(per_cpu(tracer, cpu)))
+			ds_resume_bts(per_cpu(tracer, cpu));
+	put_online_cpus();
 }
 
 static void trace_bts_close(struct trace_iterator *iter)
@@ -324,12 +276,11 @@ static void trace_bts_close(struct trace_iterator *iter)
 
 void trace_hw_branch_oops(void)
 {
-	spin_lock(&bts_tracer_lock);
-
-	if (trace_hw_branches_enabled)
+	if (this_tracer) {
+		ds_suspend_bts_noirq(this_tracer);
 		trace_bts_cpu(hw_branch_trace);
-
-	spin_unlock(&bts_tracer_lock);
+		ds_resume_bts_noirq(this_tracer);
+	}
 }
 
 struct tracer bts_tracer __read_mostly =
-- 
cgit v0.10.2


From 4d657e51dfc042216febd4a007c6f36881f9256d Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:41 +0200
Subject: x86, hw-branch-tracer: allocate selftest iterator on heap

Allocate the trace_iterator for the hw-branch-tracer selftest on the heap.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144556.578777000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 499d01c..00dd648 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -757,7 +757,7 @@ int
 trace_selftest_startup_hw_branches(struct tracer *trace,
 				   struct trace_array *tr)
 {
-	struct trace_iterator iter;
+	struct trace_iterator *iter;
 	struct tracer tracer;
 	unsigned long count;
 	int ret;
@@ -777,17 +777,21 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
 	 * The hw-branch tracer needs to collect the trace from the various
 	 * cpu trace buffers - before tracing is stopped.
 	 */
-	memset(&iter, 0, sizeof(iter));
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+
 	memcpy(&tracer, trace, sizeof(tracer));
 
-	iter.trace = &tracer;
-	iter.tr = tr;
-	iter.pos = -1;
-	mutex_init(&iter.mutex);
+	iter->trace = &tracer;
+	iter->tr = tr;
+	iter->pos = -1;
+	mutex_init(&iter->mutex);
 
-	trace->open(&iter);
+	trace->open(iter);
 
-	mutex_destroy(&iter.mutex);
+	mutex_destroy(&iter->mutex);
+	kfree(iter);
 
 	tracing_stop();
 
-- 
cgit v0.10.2


From 353afeea24cc51aafc0ff21a72ec740b6f0af50c Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:42 +0200
Subject: x86, ds: fix compiler warning

Size_t is defined differently on i386 and x86_64.
Change type to avoid compiler warning.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144557.523964000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index e5a263c..e1ba510 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -87,7 +87,7 @@ static int ds_selftest_bts_read(struct bts_tracer *tracer,
 	/* Now to the test itself. */
 	for (at = from; (void *)at < to; at += trace->ds.size) {
 		struct bts_struct bts;
-		size_t index;
+		unsigned long index;
 		int error;
 
 		if (((void *)at - trace->ds.begin) % trace->ds.size) {
-- 
cgit v0.10.2


From 84f201139245c30777ff858e71b8d7e134b8c3ed Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:43 +0200
Subject: x86, ds: fix bounds check in ds selftest

Fix a bad bounds check in the debug store selftest.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144558.450027000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index e1ba510..cccc19a 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -47,8 +47,13 @@ static int ds_selftest_bts_consistency(const struct bts_trace *trace)
 		printk(KERN_CONT "bad bts buffer setup...");
 		error = -1;
 	}
+	/*
+	 * We allow top in [begin; end], since its not clear when the
+	 * overflow adjustment happens: after the increment or before the
+	 * write.
+	 */
 	if ((trace->ds.top < trace->ds.begin) ||
-	    (trace->ds.end <= trace->ds.top)) {
+	    (trace->ds.end < trace->ds.top)) {
 		printk(KERN_CONT "bts top out of bounds...");
 		error = -1;
 	}
-- 
cgit v0.10.2


From 01f6569ece6915616f6cae1d7d8b46ab8da9c1bd Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:44 +0200
Subject: x86, ds: selftest each cpu

Perform debug store selftests on each cpu.

Cover both the normal and the _noirq variant of the debug store interface.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144559.394583000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index cccc19a..599a963 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -11,13 +11,21 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/smp.h>
+#include <linux/cpu.h>
 
 #include <asm/ds.h>
 
 
-#define BUFFER_SIZE 1021 /* Intentionally chose an odd size. */
+#define BUFFER_SIZE	521 /* Intentionally chose an odd size. */
 
 
+struct ds_selftest_bts_conf {
+	struct bts_tracer *tracer;
+	int error;
+	int (*suspend)(struct bts_tracer *);
+	int (*resume)(struct bts_tracer *);
+};
+
 static int ds_selftest_bts_consistency(const struct bts_trace *trace)
 {
 	int error = 0;
@@ -125,36 +133,32 @@ static int ds_selftest_bts_read(struct bts_tracer *tracer,
 	return 0;
 }
 
-int ds_selftest_bts(void)
+static void ds_selftest_bts_cpu(void *arg)
 {
+	struct ds_selftest_bts_conf *conf = arg;
 	const struct bts_trace *trace;
-	struct bts_tracer *tracer;
-	int error = 0;
 	void *top;
-	unsigned char buffer[BUFFER_SIZE];
 
-	printk(KERN_INFO "[ds] bts selftest...");
-
-	tracer = ds_request_bts_cpu(smp_processor_id(), buffer, BUFFER_SIZE,
-				    NULL, (size_t)-1, BTS_KERNEL);
-	if (IS_ERR(tracer)) {
-		error = PTR_ERR(tracer);
-		tracer = NULL;
+	if (IS_ERR(conf->tracer)) {
+		conf->error = PTR_ERR(conf->tracer);
+		conf->tracer = NULL;
 
 		printk(KERN_CONT
-		       "initialization failed (err: %d)...", error);
-		goto out;
+		       "initialization failed (err: %d)...", conf->error);
+		return;
 	}
 
-	/* The return should already give us enough trace. */
-	ds_suspend_bts(tracer);
+	/* We should meanwhile have enough trace. */
+	conf->error = conf->suspend(conf->tracer);
+	if (conf->error < 0)
+		return;
 
 	/* Let's see if we can access the trace. */
-	trace = ds_read_bts(tracer);
+	trace = ds_read_bts(conf->tracer);
 
-	error = ds_selftest_bts_consistency(trace);
-	if (error < 0)
-		goto out;
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
 
 	/* If everything went well, we should have a few trace entries. */
 	if (trace->ds.top == trace->ds.begin) {
@@ -168,10 +172,11 @@ int ds_selftest_bts(void)
 	}
 
 	/* Let's try to read the trace we collected. */
-	error = ds_selftest_bts_read(tracer, trace,
+	conf->error =
+		ds_selftest_bts_read(conf->tracer, trace,
 				     trace->ds.begin, trace->ds.top);
-	if (error < 0)
-		goto out;
+	if (conf->error < 0)
+		return;
 
 	/*
 	 * Let's read the trace again.
@@ -179,26 +184,31 @@ int ds_selftest_bts(void)
 	 */
 	top = trace->ds.top;
 
-	trace = ds_read_bts(tracer);
-	error = ds_selftest_bts_consistency(trace);
-	if (error < 0)
-		goto out;
+	trace = ds_read_bts(conf->tracer);
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
 
 	if (top != trace->ds.top) {
 		printk(KERN_CONT "suspend not working...");
-		error = -1;
-		goto out;
+		conf->error = -1;
+		return;
 	}
 
 	/* Let's collect some more trace - see if resume is working. */
-	ds_resume_bts(tracer);
-	ds_suspend_bts(tracer);
+	conf->error = conf->resume(conf->tracer);
+	if (conf->error < 0)
+		return;
+
+	conf->error = conf->suspend(conf->tracer);
+	if (conf->error < 0)
+		return;
 
-	trace = ds_read_bts(tracer);
+	trace = ds_read_bts(conf->tracer);
 
-	error = ds_selftest_bts_consistency(trace);
-	if (error < 0)
-		goto out;
+	conf->error = ds_selftest_bts_consistency(trace);
+	if (conf->error < 0)
+		return;
 
 	if (trace->ds.top == top) {
 		/*
@@ -210,35 +220,113 @@ int ds_selftest_bts(void)
 		printk(KERN_CONT
 		       "no resume progress/overflow...");
 
-		error = ds_selftest_bts_read(tracer, trace,
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace,
 					     trace->ds.begin, trace->ds.end);
 	} else if (trace->ds.top < top) {
 		/*
 		 * We had a buffer overflow - the entire buffer should
 		 * contain trace records.
 		 */
-		error = ds_selftest_bts_read(tracer, trace,
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace,
 					     trace->ds.begin, trace->ds.end);
 	} else {
 		/*
 		 * It is quite likely that the buffer did not overflow.
 		 * Let's just check the delta trace.
 		 */
-		error = ds_selftest_bts_read(tracer, trace,
-					     top, trace->ds.top);
+		conf->error =
+			ds_selftest_bts_read(conf->tracer, trace, top,
+					     trace->ds.top);
 	}
-	if (error < 0)
-		goto out;
+	if (conf->error < 0)
+		return;
 
-	error = 0;
+	conf->error = 0;
+}
 
-	/* The final test: release the tracer while tracing is suspended. */
- out:
-	ds_release_bts(tracer);
+static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
+{
+	ds_suspend_bts(tracer);
+	return 0;
+}
+
+static int ds_resume_bts_wrap(struct bts_tracer *tracer)
+{
+	ds_resume_bts(tracer);
+	return 0;
+}
 
-	printk(KERN_CONT "%s.\n", (error ? "failed" : "passed"));
+static void ds_release_bts_noirq_wrap(void *tracer)
+{
+	(void)ds_release_bts_noirq(tracer);
+}
 
-	return error;
+static int ds_selftest_bts_bad_release_noirq(int cpu,
+					     struct bts_tracer *tracer)
+{
+	int error = -EPERM;
+
+	/* Try to release the tracer on the wrong cpu. */
+	get_cpu();
+	if (cpu != smp_processor_id()) {
+		error = ds_release_bts_noirq(tracer);
+		if (error != -EPERM)
+			printk(KERN_CONT "release on wrong cpu...");
+	}
+	put_cpu();
+
+	return error ? 0 : -1;
+}
+
+int ds_selftest_bts(void)
+{
+	struct ds_selftest_bts_conf conf;
+	unsigned char buffer[BUFFER_SIZE];
+	int cpu;
+
+	printk(KERN_INFO "[ds] bts selftest...");
+	conf.error = 0;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		conf.suspend = ds_suspend_bts_wrap;
+		conf.resume = ds_resume_bts_wrap;
+		conf.tracer =
+			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+					   NULL, (size_t)-1, BTS_KERNEL);
+		ds_selftest_bts_cpu(&conf);
+		ds_release_bts(conf.tracer);
+		if (conf.error < 0)
+			goto out;
+
+		conf.suspend = ds_suspend_bts_noirq;
+		conf.resume = ds_resume_bts_noirq;
+		conf.tracer =
+			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+					   NULL, (size_t)-1, BTS_KERNEL);
+		smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
+		if (conf.error >= 0) {
+			conf.error =
+				ds_selftest_bts_bad_release_noirq(cpu,
+								  conf.tracer);
+			/* We must not release the tracer twice. */
+			if (conf.error < 0)
+				conf.tracer = NULL;
+		}
+		smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
+					 conf.tracer, 1);
+		if (conf.error < 0)
+			goto out;
+	}
+
+	conf.error = 0;
+ out:
+	put_online_cpus();
+	printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
+
+	return conf.error;
 }
 
 int ds_selftest_pebs(void)
-- 
cgit v0.10.2


From 3a68eef945b234f286406d96dc690fe17863c203 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:45 +0200
Subject: x86, ds: add task tracing selftest

Add selftests to cover per-task branch tracing.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144600.329346000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index 599a963..a40b253 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -280,10 +280,51 @@ static int ds_selftest_bts_bad_release_noirq(int cpu,
 	return error ? 0 : -1;
 }
 
+static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
+{
+	struct bts_tracer *tracer;
+	int error;
+
+	/* Try to request cpu tracing while task tracing is active. */
+	tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
+				    (size_t)-1, BTS_KERNEL);
+	error = PTR_ERR(tracer);
+	if (!IS_ERR(tracer)) {
+		ds_release_bts(tracer);
+		error = 0;
+	}
+
+	if (error != -EPERM)
+		printk(KERN_CONT "cpu/task tracing overlap...");
+
+	return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_task(void *buffer)
+{
+	struct bts_tracer *tracer;
+	int error;
+
+	/* Try to request cpu tracing while task tracing is active. */
+	tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
+				    (size_t)-1, BTS_KERNEL);
+	error = PTR_ERR(tracer);
+	if (!IS_ERR(tracer)) {
+		error = 0;
+		ds_release_bts(tracer);
+	}
+
+	if (error != -EPERM)
+		printk(KERN_CONT "task/cpu tracing overlap...");
+
+	return error ? 0 : -1;
+}
+
 int ds_selftest_bts(void)
 {
 	struct ds_selftest_bts_conf conf;
 	unsigned char buffer[BUFFER_SIZE];
+	unsigned long irq;
 	int cpu;
 
 	printk(KERN_INFO "[ds] bts selftest...");
@@ -297,6 +338,8 @@ int ds_selftest_bts(void)
 			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
 					   NULL, (size_t)-1, BTS_KERNEL);
 		ds_selftest_bts_cpu(&conf);
+		if (conf.error >= 0)
+			conf.error = ds_selftest_bts_bad_request_task(buffer);
 		ds_release_bts(conf.tracer);
 		if (conf.error < 0)
 			goto out;
@@ -315,12 +358,40 @@ int ds_selftest_bts(void)
 			if (conf.error < 0)
 				conf.tracer = NULL;
 		}
+		if (conf.error >= 0)
+			conf.error = ds_selftest_bts_bad_request_task(buffer);
 		smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
 					 conf.tracer, 1);
 		if (conf.error < 0)
 			goto out;
 	}
 
+	conf.suspend = ds_suspend_bts_wrap;
+	conf.resume = ds_resume_bts_wrap;
+	conf.tracer =
+		ds_request_bts_task(current, buffer, BUFFER_SIZE,
+				    NULL, (size_t)-1, BTS_KERNEL);
+	ds_selftest_bts_cpu(&conf);
+	if (conf.error >= 0)
+		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+	ds_release_bts(conf.tracer);
+	if (conf.error < 0)
+		goto out;
+
+	conf.suspend = ds_suspend_bts_noirq;
+	conf.resume = ds_resume_bts_noirq;
+	conf.tracer =
+		ds_request_bts_task(current, buffer, BUFFER_SIZE,
+				   NULL, (size_t)-1, BTS_KERNEL);
+	local_irq_save(irq);
+	ds_selftest_bts_cpu(&conf);
+	if (conf.error >= 0)
+		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+	ds_release_bts_noirq(conf.tracer);
+	local_irq_restore(irq);
+	if (conf.error < 0)
+		goto out;
+
 	conf.error = 0;
  out:
 	put_online_cpus();
-- 
cgit v0.10.2


From 2311f0de21c17b2a8b960677a9cccfbfa52beb35 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:46 +0200
Subject: x86, ds: add leakage warning

Add a warning in case a debug store context is not removed before
the task it is attached to is freed.

Remove the old warning at thread exit. It is too early.

Declare the debug store context field in thread_struct unconditionally.

Remove ds_copy_thread() and ds_exit_thread() and do the work directly
in process*.c.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144601.254472000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 413e127..149e520 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -285,21 +285,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
  */
 extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
-/*
- * Task clone/init and cleanup work
- */
-extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
-extern void ds_exit_thread(struct task_struct *tsk);
-
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 static inline void ds_switch_to(struct task_struct *prev,
 				struct task_struct *next) {}
-static inline void ds_copy_thread(struct task_struct *tsk,
-				  struct task_struct *father) {}
-static inline void ds_exit_thread(struct task_struct *tsk) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1efeb49..7c39de7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -454,10 +454,8 @@ struct thread_struct {
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	/* Debug Store context; see asm/ds.h */
 	struct ds_context	*ds_ctx;
-#endif /* CONFIG_X86_DS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 21a3852..71cab3b 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -1352,16 +1352,6 @@ void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 	update_debugctlmsr(debugctlmsr);
 }
 
-void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
-{
-	clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
-	tsk->thread.ds_ctx = NULL;
-}
-
-void ds_exit_thread(struct task_struct *tsk)
-{
-}
-
 static __init int ds_selftest(void)
 {
 	if (ds_cfg.sizeof_rec[ds_bts]) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ca98915..fb5dfb8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -14,6 +14,7 @@
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -45,6 +46,8 @@ void free_thread_xstate(struct task_struct *tsk)
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
+
+	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -83,8 +86,6 @@ void exit_thread(void)
 		put_cpu();
 		kfree(bp);
 	}
-
-	ds_exit_thread(current);
 }
 
 void flush_thread(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84..b5e4bfe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -290,7 +290,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		p->thread.io_bitmap_max = 0;
 	}
 
-	ds_copy_thread(p, current);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b751a41..5a1a1de 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -335,7 +335,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 			goto out;
 	}
 
-	ds_copy_thread(p, me);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
-- 
cgit v0.10.2


From ee811517a5604aa63fae803b7c044712699e1303 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:47 +0200
Subject: x86, ds: use single debug store cpu configuration

Use a single configuration for all cpus.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144602.191165000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 71cab3b..443f415 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -47,9 +47,8 @@ struct ds_configuration {
 	/* Control bit-masks indexed by enum ds_feature: */
 	unsigned long		ctl[dsf_ctl_max];
 };
-static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+static struct ds_configuration ds_cfg __read_mostly;
 
-#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
 
 /* Maximal size of a DS configuration: */
 #define MAX_SIZEOF_DS		(12 * 8)
@@ -1268,6 +1267,10 @@ ds_configure(const struct ds_configuration *cfg,
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 {
+	/* Only configure the first cpu. Others are identical. */
+	if (ds_cfg.name)
+		return;
+
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
-- 
cgit v0.10.2


From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:48 +0200
Subject: x86, ptrace: add bts context unconditionally

Add the ptrace bts context field to task_struct unconditionally.

Initialize the field directly in copy_process().
Remove all the unneeded functionality used to initialize that field.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144603.292754000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index e304b66..5cdd19f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
-			    unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
 
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk)	ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index adbb243..b32a8ee 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -887,37 +887,19 @@ static int ptrace_bts_size(struct task_struct *child)
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static inline void ptrace_bts_fork(struct task_struct *tsk)
-{
-	tsk->bts = NULL;
-}
-
 /*
  * Called from __ptrace_unlink() after the child has been moved back
  * to its original parent.
  */
-static inline void ptrace_bts_untrace(struct task_struct *child)
+void ptrace_bts_untrace(struct task_struct *child)
 {
 	if (unlikely(child->bts)) {
 		free_bts_context(child->bts);
 		child->bts = NULL;
 	}
 }
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
-	ptrace_bts_untrace(child);
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 67c1565..59e133d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task)
 #define arch_ptrace_untrace(task)		do { } while (0)
 #endif
 
-#ifndef arch_ptrace_fork
-/*
- * Do machine-specific work to initialize a new task.
- *
- * This is called from copy_process().
- */
-#define arch_ptrace_fork(child, clone_flags)	do { } while (0)
-#endif
-
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52b8cd0..451186a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1205,13 +1205,11 @@ struct task_struct {
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
-#ifdef CONFIG_X86_PTRACE_BTS
 	/*
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
 	struct bts_context *bts;
-#endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
diff --git a/kernel/fork.c b/kernel/fork.c
index 660c2b8..69bde7a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1086,8 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
-	if (unlikely(current->ptrace))
-		ptrace_fork(p, clone_flags);
+
+	p->bts = NULL;
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index aaad0ec..321127d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -27,16 +27,6 @@
 
 
 /*
- * Initialize a new task whose father had been ptraced.
- *
- * Called from copy_process().
- */
-void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	arch_ptrace_fork(child, clone_flags);
-}
-
-/*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
-- 
cgit v0.10.2


From 6047550d3d26fed88b18a208b31f8b90b5ef3e9b Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:49 +0200
Subject: x86, ds: dont use TIF_DEBUGCTLMSR

Debug store already uses TIF_DS_AREA_MSR to trigger debug store context
switch handling. No need to use TIF_DEBUGCTLMSR, as well.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144604.256645000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 443f415..cab2832 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -925,11 +925,6 @@ static void update_task_debugctlmsr(struct task_struct *task,
 	get_cpu();
 	if (task == current)
 		update_debugctlmsr(debugctlmsr);
-
-	if (task->thread.debugctlmsr)
-		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-	else
-		clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
 	put_cpu();
 }
 
-- 
cgit v0.10.2


From 608780a9048efa3e85fbc4d8649b26805cc588aa Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:50 +0200
Subject: x86, ds: fix bad ds_reset_pebs()

Ds_reset_pebs() passed the wrong qualifier to a shared function resulting
in a reset of bts, rather than pebs.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144605.206510000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index cab2832..ebfb0fd 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -1186,7 +1186,7 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
 
 	tracer->trace.ds.top = tracer->trace.ds.begin;
 
-	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+	ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
 	       (unsigned long)tracer->trace.ds.top);
 
 	return 0;
-- 
cgit v0.10.2


From 150f5164c1258e05b7dea16f29e592f354c48f34 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:51 +0200
Subject: x86, ds: allow small debug store buffers

Check the buffer size more precisely to allow buffers for exactly
one element provided the base address is already properly aligned.

Add a debug store selftest.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144606.139137000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index ebfb0fd..4e05157 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -656,6 +656,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 {
 	struct ds_context *context;
 	int error;
+	size_t req_size;
 
 	error = -EOPNOTSUPP;
 	if (!ds_cfg.sizeof_rec[qual])
@@ -665,9 +666,13 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
 	if (!base)
 		goto out;
 
-	/* We need space for alignment adjustments in ds_init_ds_trace(). */
+	req_size = ds_cfg.sizeof_rec[qual];
+	/* We might need space for alignment adjustments. */
+	if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
+		req_size += DS_ALIGNMENT;
+
 	error = -EINVAL;
-	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+	if (size < req_size)
 		goto out;
 
 	if (th != (size_t)-1) {
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index a40b253..5f104a0 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -16,8 +16,8 @@
 #include <asm/ds.h>
 
 
-#define BUFFER_SIZE	521 /* Intentionally chose an odd size. */
-
+#define BUFFER_SIZE		521	/* Intentionally chose an odd size. */
+#define SMALL_BUFFER_SIZE	24	/* A single bts entry. */
 
 struct ds_selftest_bts_conf {
 	struct bts_tracer *tracer;
@@ -381,7 +381,7 @@ int ds_selftest_bts(void)
 	conf.suspend = ds_suspend_bts_noirq;
 	conf.resume = ds_resume_bts_noirq;
 	conf.tracer =
-		ds_request_bts_task(current, buffer, BUFFER_SIZE,
+		ds_request_bts_task(current, buffer, SMALL_BUFFER_SIZE,
 				   NULL, (size_t)-1, BTS_KERNEL);
 	local_irq_save(irq);
 	ds_selftest_bts_cpu(&conf);
-- 
cgit v0.10.2


From 017bc617657c928cb9a0c45a7a7e9f4e66695347 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:52 +0200
Subject: x86, ds: support Core i7

Add debug store support for Core i7.

Core i7 adds a reset value for each performance counter and a new
PEBS record format.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144607.088997000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 149e520..70dac19 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -234,8 +234,12 @@ struct bts_trace {
 struct pebs_trace {
 	struct ds_trace ds;
 
-	/* the PEBS reset value */
-	unsigned long long reset_value;
+	/* the number of valid counters in the below array */
+	unsigned int counters;
+
+#define MAX_PEBS_COUNTERS 4
+	/* the counter reset value */
+	unsigned long long counter_reset[MAX_PEBS_COUNTERS];
 };
 
 
@@ -270,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
  * Returns 0 on success; -Eerrno on error
  *
  * tracer: the tracer handle returned from ds_request_pebs()
+ * counter: the index of the counter
  * value: the new counter reset value
  */
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
+			     unsigned int counter, u64 value);
 
 /*
  * Initialization
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 4e05157..48bfe13 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -44,6 +44,9 @@ struct ds_configuration {
 	/* The size of a BTS/PEBS record in bytes: */
 	unsigned char		sizeof_rec[2];
 
+	/* The number of pebs counter reset values in the DS structure. */
+	unsigned char		nr_counter_reset;
+
 	/* Control bit-masks indexed by enum ds_feature: */
 	unsigned long		ctl[dsf_ctl_max];
 };
@@ -51,7 +54,7 @@ static struct ds_configuration ds_cfg __read_mostly;
 
 
 /* Maximal size of a DS configuration: */
-#define MAX_SIZEOF_DS		(12 * 8)
+#define MAX_SIZEOF_DS		0x80
 
 /* Maximal size of a BTS record: */
 #define MAX_SIZEOF_BTS		(3 * 8)
@@ -59,6 +62,12 @@ static struct ds_configuration ds_cfg __read_mostly;
 /* BTS and PEBS buffer alignment: */
 #define DS_ALIGNMENT		(1 << 3)
 
+/* Number of buffer pointers in DS: */
+#define NUM_DS_PTR_FIELDS	8
+
+/* Size of a pebs reset value in DS: */
+#define PEBS_RESET_FIELD_SIZE	8
+
 /* Mask of control bits in the DS MSR register: */
 #define BTS_CONTROL				  \
 	( ds_cfg.ctl[dsf_bts]			| \
@@ -1164,9 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
 		return NULL;
 
 	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-	tracer->trace.reset_value =
-		*(u64 *)(tracer->ds.context->ds +
-			 (ds_cfg.sizeof_ptr_field * 8));
+
+	tracer->trace.counters = ds_cfg.nr_counter_reset;
+	memcpy(tracer->trace.counter_reset,
+	       tracer->ds.context->ds +
+	       (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
+	       ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
 
 	return &tracer->trace;
 }
@@ -1197,13 +1209,18 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
 	return 0;
 }
 
-int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer,
+		      unsigned int counter, u64 value)
 {
 	if (!tracer)
 		return -EINVAL;
 
+	if (ds_cfg.nr_counter_reset < counter)
+		return -EINVAL;
+
 	*(u64 *)(tracer->ds.context->ds +
-		 (ds_cfg.sizeof_ptr_field * 8)) = value;
+		 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
+		 (counter * PEBS_RESET_FIELD_SIZE)) = value;
 
 	return 0;
 }
@@ -1213,16 +1230,26 @@ static const struct ds_configuration ds_cfg_netburst = {
 	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
 	.ctl[dsf_bts_kernel]	= (1 << 5),
 	.ctl[dsf_bts_user]	= (1 << 6),
+	.nr_counter_reset	= 1,
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
 	.name = "Pentium M",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
+	.nr_counter_reset	= 1,
 };
 static const struct ds_configuration ds_cfg_core2_atom = {
 	.name = "Core 2/Atom",
 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
 	.ctl[dsf_bts_kernel]	= (1 << 9),
 	.ctl[dsf_bts_user]	= (1 << 10),
+	.nr_counter_reset	= 1,
+};
+static const struct ds_configuration ds_cfg_core_i7 = {
+	.name = "Core i7",
+	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
+	.ctl[dsf_bts_kernel]	= (1 << 9),
+	.ctl[dsf_bts_user]	= (1 << 10),
+	.nr_counter_reset	= 4,
 };
 
 static void
@@ -1239,6 +1266,32 @@ ds_configure(const struct ds_configuration *cfg,
 	nr_pebs_fields = 18;
 #endif
 
+	/*
+	 * Starting with version 2, architectural performance
+	 * monitoring supports a format specifier.
+	 */
+	if ((cpuid_eax(0xa) & 0xff) > 1) {
+		unsigned long perf_capabilities, format;
+
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+		format = (perf_capabilities >> 8) & 0xf;
+
+		switch (format) {
+		case 0:
+			nr_pebs_fields = 18;
+			break;
+		case 1:
+			nr_pebs_fields = 22;
+			break;
+		default:
+			printk(KERN_INFO
+			       "[ds] unknown PEBS format: %lu\n", format);
+			nr_pebs_fields = 0;
+			break;
+		}
+	}
+
 	memset(&ds_cfg, 0, sizeof(ds_cfg));
 	ds_cfg = *cfg;
 
@@ -1262,7 +1315,7 @@ ds_configure(const struct ds_configuration *cfg,
 	printk("bts/pebs record: %u/%u bytes\n",
 	       ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
 
-	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field));
+	WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -1284,6 +1337,8 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 			ds_configure(&ds_cfg_core2_atom, c);
 			break;
 		case 0x1a: /* Core i7 */
+			ds_configure(&ds_cfg_core_i7, c);
+			break;
 		default:
 			/* Sorry, don't know about them. */
 			break;
-- 
cgit v0.10.2


From a5dec5573f3c7e63f2f9b5852b9759ea342a5ff9 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 27 Mar 2009 14:55:44 +0800
Subject: tracing: use macros to denote usec and nsec per second

Impact: cleanup

Use USEC_PER_SEC and NSEC_PER_SEC instead of 1000000 and 1000000000.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <49CC7870.9000309@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7a30fc4..a29ef23 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -9,6 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
 #include <linux/kallsyms.h>
+#include <linux/time.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter)
 	trace_assign_type(field, entry);
 	call = &field->boot_call;
 	ts = iter->ts;
-	nsec_rem = do_div(ts, 1000000000);
+	nsec_rem = do_div(ts, NSEC_PER_SEC);
 
 	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
 			(unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter)
 	trace_assign_type(field, entry);
 	init_ret = &field->boot_ret;
 	ts = iter->ts;
-	nsec_rem = do_div(ts, 1000000000);
+	nsec_rem = do_div(ts, NSEC_PER_SEC);
 
 	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
 			"returned %d after %llu msecs\n",
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 8e37fcd..d53b45e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,8 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <linux/time.h>
+
 #include <asm/atomic.h>
 
 #include "trace.h"
@@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 	struct mmiotrace_rw *rw;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret = 1;
 
@@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 	struct mmiotrace_map *m;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret;
 
-- 
cgit v0.10.2


From 5452af664f6fba26b80eb2c8c4ceae2999d5cf56 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 27 Mar 2009 00:25:38 +0100
Subject: tracing/ftrace: factorize the tracing files creation

Impact: cleanup

Most of the tracing files creation follow the same pattern:

ret = debugfs_create_file(...)
if (!ret)
	pr_warning("Couldn't create ... entry\n")

Unify it!

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1238109938-11840-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 678e3d6..6ea5a1a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2698,38 +2698,23 @@ static const struct file_operations ftrace_graph_fops = {
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 {
-	struct dentry *entry;
 
-	entry = debugfs_create_file("available_filter_functions", 0444,
-				    d_tracer, NULL, &ftrace_avail_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'available_filter_functions' entry\n");
+	trace_create_file("available_filter_functions", 0444,
+			d_tracer, NULL, &ftrace_avail_fops);
 
-	entry = debugfs_create_file("failures", 0444,
-				    d_tracer, NULL, &ftrace_failures_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'failures' entry\n");
+	trace_create_file("failures", 0444,
+			d_tracer, NULL, &ftrace_failures_fops);
 
-	entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
-				    NULL, &ftrace_filter_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_filter' entry\n");
+	trace_create_file("set_ftrace_filter", 0644, d_tracer,
+			NULL, &ftrace_filter_fops);
 
-	entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+	trace_create_file("set_ftrace_notrace", 0644, d_tracer,
 				    NULL, &ftrace_notrace_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_notrace' entry\n");
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+	trace_create_file("set_graph_function", 0444, d_tracer,
 				    NULL,
 				    &ftrace_graph_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	return 0;
@@ -2987,7 +2972,6 @@ static const struct file_operations ftrace_pid_fops = {
 static __init int ftrace_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -2995,11 +2979,8 @@ static __init int ftrace_init_debugfs(void)
 
 	ftrace_init_dyn_debugfs(d_tracer);
 
-	entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
-				    NULL, &ftrace_pid_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'set_ftrace_pid' entry\n");
+	trace_create_file("set_ftrace_pid", 0644, d_tracer,
+			    NULL, &ftrace_pid_fops);
 
 	ftrace_profile_debugfs(d_tracer);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 960cbf4..74a1180 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2845,14 +2845,11 @@ static const struct file_operations rb_simple_fops = {
 static __init int rb_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("tracing_on", 0644, d_tracer,
-				    &ring_buffer_flags, &rb_simple_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_on' entry\n");
+	trace_create_file("tracing_on", 0644, d_tracer,
+			    &ring_buffer_flags, &rb_simple_fops);
 
 	return 0;
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 32653c8..0615751 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3581,7 +3581,7 @@ struct dentry *tracing_dentry_percpu(void)
 static void tracing_init_debugfs_percpu(long cpu)
 {
 	struct dentry *d_percpu = tracing_dentry_percpu();
-	struct dentry *entry, *d_cpu;
+	struct dentry *d_cpu;
 	/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
 	char cpu_dir[7];
 
@@ -3596,21 +3596,15 @@ static void tracing_init_debugfs_percpu(long cpu)
 	}
 
 	/* per cpu trace_pipe */
-	entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
-				(void *) cpu, &tracing_pipe_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_pipe' entry\n");
+	trace_create_file("trace_pipe", 0444, d_cpu,
+			(void *) cpu, &tracing_pipe_fops);
 
 	/* per cpu trace */
-	entry = debugfs_create_file("trace", 0644, d_cpu,
-				(void *) cpu, &tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
+	trace_create_file("trace", 0644, d_cpu,
+			(void *) cpu, &tracing_fops);
 
-	entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
-				    (void *) cpu, &tracing_buffers_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
+	trace_create_file("trace_pipe_raw", 0444, d_cpu,
+			(void *) cpu, &tracing_buffers_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -3766,6 +3760,22 @@ static const struct file_operations trace_options_core_fops = {
 	.write = trace_options_core_write,
 };
 
+struct dentry *trace_create_file(const char *name,
+				 mode_t mode,
+				 struct dentry *parent,
+				 void *data,
+				 const struct file_operations *fops)
+{
+	struct dentry *ret;
+
+	ret = debugfs_create_file(name, mode, parent, data, fops);
+	if (!ret)
+		pr_warning("Could not create debugfs '%s' entry\n", name);
+
+	return ret;
+}
+
+
 static struct dentry *trace_options_init_dentry(void)
 {
 	struct dentry *d_tracer;
@@ -3793,7 +3803,6 @@ create_trace_option_file(struct trace_option_dentry *topt,
 			 struct tracer_opt *opt)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
@@ -3802,11 +3811,9 @@ create_trace_option_file(struct trace_option_dentry *topt,
 	topt->flags = flags;
 	topt->opt = opt;
 
-	entry = debugfs_create_file(opt->name, 0644, t_options, topt,
+	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
 				    &trace_options_fops);
 
-	topt->entry = entry;
-
 }
 
 static struct trace_option_dentry *
@@ -3861,123 +3868,81 @@ static struct dentry *
 create_trace_option_core_file(const char *option, long index)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
 		return NULL;
 
-	entry = debugfs_create_file(option, 0644, t_options, (void *)index,
+	return trace_create_file(option, 0644, t_options, (void *)index,
 				    &trace_options_core_fops);
-
-	return entry;
 }
 
 static __init void create_trace_options_dir(void)
 {
 	struct dentry *t_options;
-	struct dentry *entry;
 	int i;
 
 	t_options = trace_options_init_dentry();
 	if (!t_options)
 		return;
 
-	for (i = 0; trace_options[i]; i++) {
-		entry = create_trace_option_core_file(trace_options[i], i);
-		if (!entry)
-			pr_warning("Could not create debugfs %s entry\n",
-				   trace_options[i]);
-	}
+	for (i = 0; trace_options[i]; i++)
+		create_trace_option_core_file(trace_options[i], i);
 }
 
 static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 	int cpu;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
-				    &global_trace, &tracing_ctrl_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+	trace_create_file("tracing_enabled", 0644, d_tracer,
+			&global_trace, &tracing_ctrl_fops);
 
-	entry = debugfs_create_file("trace_options", 0644, d_tracer,
-				    NULL, &tracing_iter_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace_options' entry\n");
+	trace_create_file("trace_options", 0644, d_tracer,
+			NULL, &tracing_iter_fops);
 
-	create_trace_options_dir();
+	trace_create_file("tracing_cpumask", 0644, d_tracer,
+			NULL, &tracing_cpumask_fops);
+
+	trace_create_file("trace", 0644, d_tracer,
+			(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
+
+	trace_create_file("available_tracers", 0444, d_tracer,
+			&global_trace, &show_traces_fops);
+
+	trace_create_file("current_tracer", 0444, d_tracer,
+			&global_trace, &set_tracer_fops);
+
+	trace_create_file("tracing_max_latency", 0644, d_tracer,
+			&tracing_max_latency, &tracing_max_lat_fops);
+
+	trace_create_file("tracing_thresh", 0644, d_tracer,
+			&tracing_thresh, &tracing_max_lat_fops);
 
-	entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
-				    NULL, &tracing_cpumask_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
-
-	entry = debugfs_create_file("trace", 0644, d_tracer,
-				 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'trace' entry\n");
-
-	entry = debugfs_create_file("available_tracers", 0444, d_tracer,
-				    &global_trace, &show_traces_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'available_tracers' entry\n");
-
-	entry = debugfs_create_file("current_tracer", 0444, d_tracer,
-				    &global_trace, &set_tracer_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'current_tracer' entry\n");
-
-	entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
-				    &tracing_max_latency,
-				    &tracing_max_lat_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'tracing_max_latency' entry\n");
-
-	entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
-				    &tracing_thresh, &tracing_max_lat_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'tracing_thresh' entry\n");
-	entry = debugfs_create_file("README", 0644, d_tracer,
-				    NULL, &tracing_readme_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'README' entry\n");
-
-	entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
+	trace_create_file("README", 0644, d_tracer,
+			NULL, &tracing_readme_fops);
+
+	trace_create_file("trace_pipe", 0444, d_tracer,
 			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'trace_pipe' entry\n");
-
-	entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
-				    &global_trace, &tracing_entries_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'buffer_size_kb' entry\n");
-
-	entry = debugfs_create_file("trace_marker", 0220, d_tracer,
-				    NULL, &tracing_mark_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'trace_marker' entry\n");
+
+	trace_create_file("buffer_size_kb", 0644, d_tracer,
+			&global_trace, &tracing_entries_fops);
+
+	trace_create_file("trace_marker", 0220, d_tracer,
+			NULL, &tracing_mark_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-				    &ftrace_update_tot_cnt,
-				    &tracing_dyn_info_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'dyn_ftrace_total_info' entry\n");
+	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 #ifdef CONFIG_SYSPROF_TRACER
 	init_tracer_sysprof_debugfs(d_tracer);
 #endif
 
+	create_trace_options_dir();
+
 	for_each_tracing_cpu(cpu)
 		tracing_init_debugfs_percpu(cpu);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 47aa6d0..f76a8f8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -470,6 +470,12 @@ void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
 int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *trace_create_file(const char *name,
+				 mode_t mode,
+				 struct dentry *parent,
+				 void *data,
+				 const struct file_operations *fops);
+
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 22cba99..199de9c 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -28,4 +28,3 @@ void ftrace_profile_disable(int event_id)
 			return event->profile_disable(event);
 	}
 }
-
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index eb81556..9bece96 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = {
 static __init int init_trace_printk_function_export(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
 		return 0;
 
-	entry = debugfs_create_file("printk_formats", 0444, d_tracer,
+	trace_create_file("printk_formats", 0444, d_tracer,
 				    NULL, &ftrace_formats_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'printk_formats' entry\n");
 
 	return 0;
 }
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c750f65..1796f00 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace);
 static __init int stack_trace_init(void)
 {
 	struct dentry *d_tracer;
-	struct dentry *entry;
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
-				    &max_stack_size, &stack_max_size_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+	trace_create_file("stack_max_size", 0644, d_tracer,
+			&max_stack_size, &stack_max_size_fops);
 
-	entry = debugfs_create_file("stack_trace", 0444, d_tracer,
-				    NULL, &stack_trace_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'stack_trace' entry\n");
+	trace_create_file("stack_trace", 0444, d_tracer,
+			NULL, &stack_trace_fops);
 
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 91fd19c..e04b76c 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = {
 
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
 {
-	struct dentry *entry;
 
-	entry = debugfs_create_file("sysprof_sample_period", 0644,
+	trace_create_file("sysprof_sample_period", 0644,
 			d_tracer, NULL, &sysprof_sample_fops);
-	if (entry)
-		return;
-	pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
 }
-- 
cgit v0.10.2


From 597af81537654097b67fd7a0c92775e66d4a86fe Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 3 Apr 2009 15:24:12 -0400
Subject: function-graph: use int instead of atomic for ftrace_graph_active

Impact: cleanup

The variable ftrace_graph_active is only modified under the
ftrace_lock mutex, thus an atomic is not necessary for modification.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6ea5a1a..8e6a0b5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3092,7 +3092,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static atomic_t ftrace_graph_active;
+static int ftrace_graph_active;
 static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -3244,7 +3244,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 	mutex_lock(&ftrace_lock);
 
 	/* we currently allow only one tracer registered at a time */
-	if (atomic_read(&ftrace_graph_active)) {
+	if (ftrace_graph_active) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -3252,10 +3252,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
 	register_pm_notifier(&ftrace_suspend_notifier);
 
-	atomic_inc(&ftrace_graph_active);
+	ftrace_graph_active++;
 	ret = start_graph_tracing();
 	if (ret) {
-		atomic_dec(&ftrace_graph_active);
+		ftrace_graph_active--;
 		goto out;
 	}
 
@@ -3273,10 +3273,10 @@ void unregister_ftrace_graph(void)
 {
 	mutex_lock(&ftrace_lock);
 
-	if (!unlikely(atomic_read(&ftrace_graph_active)))
+	if (unlikely(!ftrace_graph_active))
 		goto out;
 
-	atomic_dec(&ftrace_graph_active);
+	ftrace_graph_active--;
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -3290,7 +3290,7 @@ void unregister_ftrace_graph(void)
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
-	if (atomic_read(&ftrace_graph_active)) {
+	if (ftrace_graph_active) {
 		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
 				* sizeof(struct ftrace_ret_stack),
 				GFP_KERNEL);
-- 
cgit v0.10.2


From dcef788eb9659b61a2110284fcce3ca6e63480d2 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Tue, 31 Mar 2009 15:26:14 +0800
Subject: ftrace: clean up enable logic for sched_switch

Unify sched_switch and sched_wakeup's action to following logic:
Do record_cmdline when start_cmdline_record() is called.
Start tracing events when the tracer is started.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
LKML-Reference: <49D1C596.5050203@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9117cea..9d8cccd 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 	int cpu;
 	int pc;
 
-	if (!sched_ref || sched_stopped)
+	if (unlikely(!sched_ref))
 		return;
 
 	tracing_record_cmdline(prev);
 	tracing_record_cmdline(next);
 
-	if (!tracer_enabled)
+	if (!tracer_enabled || sched_stopped)
 		return;
 
 	pc = preempt_count();
@@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
 	unsigned long flags;
 	int cpu, pc;
 
-	if (!likely(tracer_enabled))
+	if (unlikely(!sched_ref))
 		return;
 
-	pc = preempt_count();
 	tracing_record_cmdline(current);
 
-	if (sched_stopped)
+	if (!tracer_enabled || sched_stopped)
 		return;
 
+	pc = preempt_count();
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = ctx_trace->data[cpu];
-- 
cgit v0.10.2


From 9c83633ad38138855181af6936e8ac570ef7e2cb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 7 Apr 2009 14:48:16 +0300
Subject: missing unlock in jfs_quota_write()

We should unlock &inode->i_mutex on the error path.  This bug was
in ext2_quota_write().  I sent a patch to them today as well.

Found by smatch (http://repo.or.cz/w/smatch.git).  Compile tested.

regards,
dan carpenter

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>

diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 6f21adf..d9b0e92 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -720,8 +720,10 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
 		blk++;
 	}
 out:
-	if (len == towrite)
+	if (len == towrite) {
+		mutex_unlock(&inode->i_mutex);
 		return err;
+	}
 	if (inode->i_size < off+len-towrite)
 		i_size_write(inode, off+len-towrite);
 	inode->i_version++;
-- 
cgit v0.10.2


From 6553e192d48af88184029066c30c9464516ea0b7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 6 Apr 2009 16:59:32 +0100
Subject: ASoC: Display return code when failing to add a DAPM kcontrol

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 735903a..46485de 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -357,8 +357,9 @@ static int dapm_new_mixer(struct snd_soc_codec *codec,
 				path->long_name);
 			ret = snd_ctl_add(codec->card, path->kcontrol);
 			if (ret < 0) {
-				printk(KERN_ERR "asoc: failed to add dapm kcontrol %s\n",
-						path->long_name);
+				printk(KERN_ERR "asoc: failed to add dapm kcontrol %s: %d\n",
+				       path->long_name,
+				       ret);
 				kfree(path->long_name);
 				path->long_name = NULL;
 				return ret;
-- 
cgit v0.10.2


From 06f409d76f1d382167eb1cadde2e23a73272865d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 7 Apr 2009 18:10:13 +0100
Subject: ASoC: Provide core support for symmetric sample rates

Many devices require symmetric configurations of capture and playback
data formats, often due to shared clocking but sometimes also due to
other shared playback and record configuration in the device. Start
providing core support for this by allowing the DAIs or the machine
to specify that the sample rates used should be kept symmetric.

A flag symmetric_rates is provided in the snd_soc_dai and
snd_soc_dai_link structures. If this is set in either of the DAIs or in
the machine then a constraint will be applied when a stream is already
open preventing any changes in sample rate.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 1367647..22b729f 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -208,6 +208,7 @@ struct snd_soc_dai {
 	/* DAI capabilities */
 	struct snd_soc_pcm_stream capture;
 	struct snd_soc_pcm_stream playback;
+	unsigned int symmetric_rates:1;
 
 	/* DAI runtime info */
 	struct snd_pcm_runtime *runtime;
diff --git a/include/sound/soc.h b/include/sound/soc.h
index a40bc6f..b1f2f88 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -417,6 +417,12 @@ struct snd_soc_dai_link  {
 	/* codec/machine specific init - e.g. add machine controls */
 	int (*init)(struct snd_soc_codec *codec);
 
+	/* Symmetry requirements */
+	unsigned int symmetric_rates:1;
+
+	/* Symmetry data - only valid if symmetry is being enforced */
+	unsigned int rate;
+
 	/* DAI pcm */
 	struct snd_pcm *pcm;
 };
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 99712f6..dd28009 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -113,6 +113,35 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec)
 }
 #endif
 
+static int soc_pcm_apply_symmetry(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_dai_link *machine = rtd->dai;
+	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
+	struct snd_soc_dai *codec_dai = machine->codec_dai;
+	int ret;
+
+	if (codec_dai->symmetric_rates || cpu_dai->symmetric_rates ||
+	    machine->symmetric_rates) {
+		dev_dbg(card->dev, "Symmetry forces %dHz rate\n", 
+			machine->rate);
+
+		ret = snd_pcm_hw_constraint_minmax(substream->runtime,
+						   SNDRV_PCM_HW_PARAM_RATE,
+						   machine->rate,
+						   machine->rate);
+		if (ret < 0) {
+			dev_err(card->dev,
+				"Unable to apply rate symmetry constraint: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Called by ALSA when a PCM substream is opened, the runtime->hw record is
  * then initialized and any private data can be allocated. This also calls
@@ -221,6 +250,13 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 		goto machine_err;
 	}
 
+	/* Symmetry only applies if we've already got an active stream. */
+	if (cpu_dai->active || codec_dai->active) {
+		ret = soc_pcm_apply_symmetry(substream);
+		if (ret != 0)
+			goto machine_err;
+	}
+
 	pr_debug("asoc: %s <-> %s info:\n", codec_dai->name, cpu_dai->name);
 	pr_debug("asoc: rate mask 0x%x\n", runtime->hw.rates);
 	pr_debug("asoc: min ch %d max ch %d\n", runtime->hw.channels_min,
@@ -521,6 +557,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 		}
 	}
 
+	machine->rate = params_rate(params);
+
 out:
 	mutex_unlock(&pcm_mutex);
 	return ret;
-- 
cgit v0.10.2


From 5409fb4e327a84972483047ecf4fb41f279453e2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 7 Apr 2009 18:45:21 +0100
Subject: ASoC: Add WM8988 CODEC driver

The WM8988 is a low power, high quality stereo CODEC designed for
portable digital audio applications.

The device integrates complete interfaces to 2 stereo headphone or line
out ports. External component requirements are drastically reduced as no
separate headphone amplifiers are required. Advanced on-chip digital
signal processing performs graphic equaliser, 3-D sound enhancement and
automatic level control for the microphone or line input.

The WM8988 can operate as a master or a slave, with various master clock
frequencies including 12 or 24MHz for USB devices, or standard 256fs
rates like 12.288MHz and 24.576MHz. Different audio sample rates such as
96kHz, 48kHz, 44.1kHz are generated directly from the master clock
without the need for an external PLL.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index b6c7f7a..ab36485 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -36,6 +36,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_WM8900 if I2C
 	select SND_SOC_WM8903 if I2C
 	select SND_SOC_WM8971 if I2C
+	select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8990 if I2C
 	select SND_SOC_WM9705 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9712 if SND_SOC_AC97_BUS
@@ -141,6 +142,9 @@ config SND_SOC_WM8903
 config SND_SOC_WM8971
 	tristate
 
+config SND_SOC_WM8988
+	tristate
+
 config SND_SOC_WM8990
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 030d245..a72548d 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -24,6 +24,7 @@ snd-soc-wm8753-objs := wm8753.o
 snd-soc-wm8900-objs := wm8900.o
 snd-soc-wm8903-objs := wm8903.o
 snd-soc-wm8971-objs := wm8971.o
+snd-soc-wm8988-objs := wm8988.o
 snd-soc-wm8990-objs := wm8990.o
 snd-soc-wm9705-objs := wm9705.o
 snd-soc-wm9712-objs := wm9712.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_SND_SOC_WM8753)	+= snd-soc-wm8753.o
 obj-$(CONFIG_SND_SOC_WM8900)	+= snd-soc-wm8900.o
 obj-$(CONFIG_SND_SOC_WM8903)	+= snd-soc-wm8903.o
 obj-$(CONFIG_SND_SOC_WM8971)	+= snd-soc-wm8971.o
+obj-$(CONFIG_SND_SOC_WM8988)	+= snd-soc-wm8988.o
 obj-$(CONFIG_SND_SOC_WM8990)	+= snd-soc-wm8990.o
 obj-$(CONFIG_SND_SOC_WM8991)	+= snd-soc-wm8991.o
 obj-$(CONFIG_SND_SOC_WM9705)	+= snd-soc-wm9705.o
diff --git a/sound/soc/codecs/wm8988.c b/sound/soc/codecs/wm8988.c
new file mode 100644
index 0000000..c05f718
--- /dev/null
+++ b/sound/soc/codecs/wm8988.c
@@ -0,0 +1,1097 @@
+/*
+ * wm8988.c -- WM8988 ALSA SoC audio driver
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ * Copyright 2005 Openedhand Ltd.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/tlv.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+
+#include "wm8988.h"
+
+/*
+ * wm8988 register cache
+ * We can't read the WM8988 register space when we
+ * are using 2 wire for device control, so we cache them instead.
+ */
+static const u16 wm8988_reg[] = {
+	0x0097, 0x0097, 0x0079, 0x0079,  /*  0 */
+	0x0000, 0x0008, 0x0000, 0x000a,  /*  4 */
+	0x0000, 0x0000, 0x00ff, 0x00ff,  /*  8 */
+	0x000f, 0x000f, 0x0000, 0x0000,  /* 12 */
+	0x0000, 0x007b, 0x0000, 0x0032,  /* 16 */
+	0x0000, 0x00c3, 0x00c3, 0x00c0,  /* 20 */
+	0x0000, 0x0000, 0x0000, 0x0000,  /* 24 */
+	0x0000, 0x0000, 0x0000, 0x0000,  /* 28 */
+	0x0000, 0x0000, 0x0050, 0x0050,  /* 32 */
+	0x0050, 0x0050, 0x0050, 0x0050,  /* 36 */
+	0x0079, 0x0079, 0x0079,          /* 40 */
+};
+
+/* codec private data */
+struct wm8988_priv {
+	unsigned int sysclk;
+	struct snd_soc_codec codec;
+	struct snd_pcm_hw_constraint_list *sysclk_constraints;
+	u16 reg_cache[WM8988_NUM_REG];
+};
+
+
+/*
+ * read wm8988 register cache
+ */
+static inline unsigned int wm8988_read_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg > WM8988_NUM_REG)
+		return -1;
+	return cache[reg];
+}
+
+/*
+ * write wm8988 register cache
+ */
+static inline void wm8988_write_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg, unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg > WM8988_NUM_REG)
+		return;
+	cache[reg] = value;
+}
+
+static int wm8988_write(struct snd_soc_codec *codec, unsigned int reg,
+	unsigned int value)
+{
+	u8 data[2];
+
+	/* data is
+	 *   D15..D9 WM8753 register offset
+	 *   D8...D0 register data
+	 */
+	data[0] = (reg << 1) | ((value >> 8) & 0x0001);
+	data[1] = value & 0x00ff;
+
+	wm8988_write_reg_cache(codec, reg, value);
+	if (codec->hw_write(codec->control_data, data, 2) == 2)
+		return 0;
+	else
+		return -EIO;
+}
+
+#define wm8988_reset(c)	wm8988_write(c, WM8988_RESET, 0)
+
+/*
+ * WM8988 Controls
+ */
+
+static const char *bass_boost_txt[] = {"Linear Control", "Adaptive Boost"};
+static const struct soc_enum bass_boost =
+	SOC_ENUM_SINGLE(WM8988_BASS, 7, 2, bass_boost_txt);
+
+static const char *bass_filter_txt[] = { "130Hz @ 48kHz", "200Hz @ 48kHz" };
+static const struct soc_enum bass_filter =
+	SOC_ENUM_SINGLE(WM8988_BASS, 6, 2, bass_filter_txt);
+
+static const char *treble_txt[] = {"8kHz", "4kHz"};
+static const struct soc_enum treble =
+	SOC_ENUM_SINGLE(WM8988_TREBLE, 6, 2, treble_txt);
+
+static const char *stereo_3d_lc_txt[] = {"200Hz", "500Hz"};
+static const struct soc_enum stereo_3d_lc =
+	SOC_ENUM_SINGLE(WM8988_3D, 5, 2, stereo_3d_lc_txt);
+
+static const char *stereo_3d_uc_txt[] = {"2.2kHz", "1.5kHz"};
+static const struct soc_enum stereo_3d_uc =
+	SOC_ENUM_SINGLE(WM8988_3D, 6, 2, stereo_3d_uc_txt);
+
+static const char *stereo_3d_func_txt[] = {"Capture", "Playback"};
+static const struct soc_enum stereo_3d_func =
+	SOC_ENUM_SINGLE(WM8988_3D, 7, 2, stereo_3d_func_txt);
+
+static const char *alc_func_txt[] = {"Off", "Right", "Left", "Stereo"};
+static const struct soc_enum alc_func =
+	SOC_ENUM_SINGLE(WM8988_ALC1, 7, 4, alc_func_txt);
+
+static const char *ng_type_txt[] = {"Constant PGA Gain",
+				    "Mute ADC Output"};
+static const struct soc_enum ng_type =
+	SOC_ENUM_SINGLE(WM8988_NGATE, 1, 2, ng_type_txt);
+
+static const char *deemph_txt[] = {"None", "32Khz", "44.1Khz", "48Khz"};
+static const struct soc_enum deemph =
+	SOC_ENUM_SINGLE(WM8988_ADCDAC, 1, 4, deemph_txt);
+
+static const char *adcpol_txt[] = {"Normal", "L Invert", "R Invert",
+				   "L + R Invert"};
+static const struct soc_enum adcpol =
+	SOC_ENUM_SINGLE(WM8988_ADCDAC, 5, 4, adcpol_txt);
+
+static const DECLARE_TLV_DB_SCALE(pga_tlv, -1725, 75, 0);
+static const DECLARE_TLV_DB_SCALE(adc_tlv, -9750, 50, 1);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -12750, 50, 1);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1);
+static const DECLARE_TLV_DB_SCALE(bypass_tlv, -1500, 300, 0);
+
+static const struct snd_kcontrol_new wm8988_snd_controls[] = {
+
+SOC_ENUM("Bass Boost", bass_boost),
+SOC_ENUM("Bass Filter", bass_filter),
+SOC_SINGLE("Bass Volume", WM8988_BASS, 0, 15, 1),
+
+SOC_SINGLE("Treble Volume", WM8988_TREBLE, 0, 15, 0),
+SOC_ENUM("Treble Cut-off", treble),
+
+SOC_SINGLE("3D Switch", WM8988_3D, 0, 1, 0),
+SOC_SINGLE("3D Volume", WM8988_3D, 1, 15, 0),
+SOC_ENUM("3D Lower Cut-off", stereo_3d_lc),
+SOC_ENUM("3D Upper Cut-off", stereo_3d_uc),
+SOC_ENUM("3D Mode", stereo_3d_func),
+
+SOC_SINGLE("ALC Capture Target Volume", WM8988_ALC1, 0, 7, 0),
+SOC_SINGLE("ALC Capture Max Volume", WM8988_ALC1, 4, 7, 0),
+SOC_ENUM("ALC Capture Function", alc_func),
+SOC_SINGLE("ALC Capture ZC Switch", WM8988_ALC2, 7, 1, 0),
+SOC_SINGLE("ALC Capture Hold Time", WM8988_ALC2, 0, 15, 0),
+SOC_SINGLE("ALC Capture Decay Time", WM8988_ALC3, 4, 15, 0),
+SOC_SINGLE("ALC Capture Attack Time", WM8988_ALC3, 0, 15, 0),
+SOC_SINGLE("ALC Capture NG Threshold", WM8988_NGATE, 3, 31, 0),
+SOC_ENUM("ALC Capture NG Type", ng_type),
+SOC_SINGLE("ALC Capture NG Switch", WM8988_NGATE, 0, 1, 0),
+
+SOC_SINGLE("ZC Timeout Switch", WM8988_ADCTL1, 0, 1, 0),
+
+SOC_DOUBLE_R_TLV("Capture Digital Volume", WM8988_LADC, WM8988_RADC,
+		 0, 255, 0, adc_tlv),
+SOC_DOUBLE_R_TLV("Capture Volume", WM8988_LINVOL, WM8988_RINVOL,
+		 0, 63, 0, pga_tlv),
+SOC_DOUBLE_R("Capture ZC Switch", WM8988_LINVOL, WM8988_RINVOL, 6, 1, 0),
+SOC_DOUBLE_R("Capture Switch", WM8988_LINVOL, WM8988_RINVOL, 7, 1, 1),
+
+SOC_ENUM("Playback De-emphasis", deemph),
+
+SOC_ENUM("Capture Polarity", adcpol),
+SOC_SINGLE("Playback 6dB Attenuate", WM8988_ADCDAC, 7, 1, 0),
+SOC_SINGLE("Capture 6dB Attenuate", WM8988_ADCDAC, 8, 1, 0),
+
+SOC_DOUBLE_R_TLV("PCM Volume", WM8988_LDAC, WM8988_RDAC, 0, 255, 0, dac_tlv),
+
+SOC_SINGLE_TLV("Left Mixer Left Bypass Volume", WM8988_LOUTM1, 4, 7, 1,
+	       bypass_tlv),
+SOC_SINGLE_TLV("Left Mixer Right Bypass Volume", WM8988_LOUTM2, 4, 7, 1,
+	       bypass_tlv),
+SOC_SINGLE_TLV("Right Mixer Left Bypass Volume", WM8988_ROUTM1, 4, 7, 1,
+	       bypass_tlv),
+SOC_SINGLE_TLV("Right Mixer Right Bypass Volume", WM8988_ROUTM2, 4, 7, 1,
+	       bypass_tlv),
+
+SOC_DOUBLE_R("Output 1 Playback ZC Switch", WM8988_LOUT1V,
+	     WM8988_ROUT1V, 7, 1, 0),
+SOC_DOUBLE_R_TLV("Output 1 Playback Volume", WM8988_LOUT1V, WM8988_ROUT1V,
+		 0, 127, 0, out_tlv),
+
+SOC_DOUBLE_R("Output 2 Playback ZC Switch", WM8988_LOUT2V,
+	     WM8988_ROUT2V, 7, 1, 0),
+SOC_DOUBLE_R_TLV("Output 2 Playback Volume", WM8988_LOUT2V, WM8988_ROUT2V,
+		 0, 127, 0, out_tlv),
+
+};
+
+/*
+ * DAPM Controls
+ */
+
+static int wm8988_lrc_control(struct snd_soc_dapm_widget *w,
+			      struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	u16 adctl2 = wm8988_read_reg_cache(codec, WM8988_ADCTL2);
+
+	/* Use the DAC to gate LRC if active, otherwise use ADC */
+	if (wm8988_read_reg_cache(codec, WM8988_PWR2) & 0x180)
+		adctl2 &= ~0x4;
+	else
+		adctl2 |= 0x4;
+
+	return wm8988_write(codec, WM8988_ADCTL2, adctl2);
+}
+
+static const char *wm8988_line_texts[] = {
+	"Line 1", "Line 2", "PGA", "Differential"};
+
+static const unsigned int wm8988_line_values[] = {
+	0, 1, 3, 4};
+
+static const struct soc_enum wm8988_lline_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_LOUTM1, 0, 7,
+			      ARRAY_SIZE(wm8988_line_texts),
+			      wm8988_line_texts,
+			      wm8988_line_values);
+static const struct snd_kcontrol_new wm8988_left_line_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_lline_enum);
+
+static const struct soc_enum wm8988_rline_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_ROUTM1, 0, 7,
+			      ARRAY_SIZE(wm8988_line_texts),
+			      wm8988_line_texts,
+			      wm8988_line_values);
+static const struct snd_kcontrol_new wm8988_right_line_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_lline_enum);
+
+/* Left Mixer */
+static const struct snd_kcontrol_new wm8988_left_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Playback Switch", WM8988_LOUTM1, 8, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", WM8988_LOUTM1, 7, 1, 0),
+	SOC_DAPM_SINGLE("Right Playback Switch", WM8988_LOUTM2, 8, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", WM8988_LOUTM2, 7, 1, 0),
+};
+
+/* Right Mixer */
+static const struct snd_kcontrol_new wm8988_right_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Left Playback Switch", WM8988_ROUTM1, 8, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", WM8988_ROUTM1, 7, 1, 0),
+	SOC_DAPM_SINGLE("Playback Switch", WM8988_ROUTM2, 8, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", WM8988_ROUTM2, 7, 1, 0),
+};
+
+static const char *wm8988_pga_sel[] = {"Line 1", "Line 2", "Differential"};
+static const unsigned int wm8988_pga_val[] = { 0, 1, 3 };
+
+/* Left PGA Mux */
+static const struct soc_enum wm8988_lpga_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_LADCIN, 6, 3,
+			      ARRAY_SIZE(wm8988_pga_sel),
+			      wm8988_pga_sel,
+			      wm8988_pga_val);
+static const struct snd_kcontrol_new wm8988_left_pga_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_lpga_enum);
+
+/* Right PGA Mux */
+static const struct soc_enum wm8988_rpga_enum =
+	SOC_VALUE_ENUM_SINGLE(WM8988_RADCIN, 6, 3,
+			      ARRAY_SIZE(wm8988_pga_sel),
+			      wm8988_pga_sel,
+			      wm8988_pga_val);
+static const struct snd_kcontrol_new wm8988_right_pga_controls =
+	SOC_DAPM_VALUE_ENUM("Route", wm8988_rpga_enum);
+
+/* Differential Mux */
+static const char *wm8988_diff_sel[] = {"Line 1", "Line 2"};
+static const struct soc_enum diffmux =
+	SOC_ENUM_SINGLE(WM8988_ADCIN, 8, 2, wm8988_diff_sel);
+static const struct snd_kcontrol_new wm8988_diffmux_controls =
+	SOC_DAPM_ENUM("Route", diffmux);
+
+/* Mono ADC Mux */
+static const char *wm8988_mono_mux[] = {"Stereo", "Mono (Left)",
+	"Mono (Right)", "Digital Mono"};
+static const struct soc_enum monomux =
+	SOC_ENUM_SINGLE(WM8988_ADCIN, 6, 4, wm8988_mono_mux);
+static const struct snd_kcontrol_new wm8988_monomux_controls =
+	SOC_DAPM_ENUM("Route", monomux);
+
+static const struct snd_soc_dapm_widget wm8988_dapm_widgets[] = {
+	SND_SOC_DAPM_MICBIAS("Mic Bias", WM8988_PWR1, 1, 0),
+
+	SND_SOC_DAPM_MUX("Differential Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_diffmux_controls),
+	SND_SOC_DAPM_MUX("Left ADC Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_monomux_controls),
+	SND_SOC_DAPM_MUX("Right ADC Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_monomux_controls),
+
+	SND_SOC_DAPM_MUX("Left PGA Mux", WM8988_PWR1, 5, 0,
+		&wm8988_left_pga_controls),
+	SND_SOC_DAPM_MUX("Right PGA Mux", WM8988_PWR1, 4, 0,
+		&wm8988_right_pga_controls),
+
+	SND_SOC_DAPM_MUX("Left Line Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_left_line_controls),
+	SND_SOC_DAPM_MUX("Right Line Mux", SND_SOC_NOPM, 0, 0,
+		&wm8988_right_line_controls),
+
+	SND_SOC_DAPM_ADC("Right ADC", "Right Capture", WM8988_PWR1, 2, 0),
+	SND_SOC_DAPM_ADC("Left ADC", "Left Capture", WM8988_PWR1, 3, 0),
+
+	SND_SOC_DAPM_DAC("Right DAC", "Right Playback", WM8988_PWR2, 7, 0),
+	SND_SOC_DAPM_DAC("Left DAC", "Left Playback", WM8988_PWR2, 8, 0),
+
+	SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0,
+		&wm8988_left_mixer_controls[0],
+		ARRAY_SIZE(wm8988_left_mixer_controls)),
+	SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0,
+		&wm8988_right_mixer_controls[0],
+		ARRAY_SIZE(wm8988_right_mixer_controls)),
+
+	SND_SOC_DAPM_PGA("Right Out 2", WM8988_PWR2, 3, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Left Out 2", WM8988_PWR2, 4, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Right Out 1", WM8988_PWR2, 5, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Left Out 1", WM8988_PWR2, 6, 0, NULL, 0),
+
+	SND_SOC_DAPM_POST("LRC control", wm8988_lrc_control),
+
+	SND_SOC_DAPM_OUTPUT("LOUT1"),
+	SND_SOC_DAPM_OUTPUT("ROUT1"),
+	SND_SOC_DAPM_OUTPUT("LOUT2"),
+	SND_SOC_DAPM_OUTPUT("ROUT2"),
+	SND_SOC_DAPM_OUTPUT("VREF"),
+
+	SND_SOC_DAPM_INPUT("LINPUT1"),
+	SND_SOC_DAPM_INPUT("LINPUT2"),
+	SND_SOC_DAPM_INPUT("RINPUT1"),
+	SND_SOC_DAPM_INPUT("RINPUT2"),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+
+	{ "Left Line Mux", "Line 1", "LINPUT1" },
+	{ "Left Line Mux", "Line 2", "LINPUT2" },
+	{ "Left Line Mux", "PGA", "Left PGA Mux" },
+	{ "Left Line Mux", "Differential", "Differential Mux" },
+
+	{ "Right Line Mux", "Line 1", "RINPUT1" },
+	{ "Right Line Mux", "Line 2", "RINPUT2" },
+	{ "Right Line Mux", "PGA", "Right PGA Mux" },
+	{ "Right Line Mux", "Differential", "Differential Mux" },
+
+	{ "Left PGA Mux", "Line 1", "LINPUT1" },
+	{ "Left PGA Mux", "Line 2", "LINPUT2" },
+	{ "Left PGA Mux", "Differential", "Differential Mux" },
+
+	{ "Right PGA Mux", "Line 1", "RINPUT1" },
+	{ "Right PGA Mux", "Line 2", "RINPUT2" },
+	{ "Right PGA Mux", "Differential", "Differential Mux" },
+
+	{ "Differential Mux", "Line 1", "LINPUT1" },
+	{ "Differential Mux", "Line 1", "RINPUT1" },
+	{ "Differential Mux", "Line 2", "LINPUT2" },
+	{ "Differential Mux", "Line 2", "RINPUT2" },
+
+	{ "Left ADC Mux", "Stereo", "Left PGA Mux" },
+	{ "Left ADC Mux", "Mono (Left)", "Left PGA Mux" },
+	{ "Left ADC Mux", "Digital Mono", "Left PGA Mux" },
+
+	{ "Right ADC Mux", "Stereo", "Right PGA Mux" },
+	{ "Right ADC Mux", "Mono (Right)", "Right PGA Mux" },
+	{ "Right ADC Mux", "Digital Mono", "Right PGA Mux" },
+
+	{ "Left ADC", NULL, "Left ADC Mux" },
+	{ "Right ADC", NULL, "Right ADC Mux" },
+
+	{ "Left Line Mux", "Line 1", "LINPUT1" },
+	{ "Left Line Mux", "Line 2", "LINPUT2" },
+	{ "Left Line Mux", "PGA", "Left PGA Mux" },
+	{ "Left Line Mux", "Differential", "Differential Mux" },
+
+	{ "Right Line Mux", "Line 1", "RINPUT1" },
+	{ "Right Line Mux", "Line 2", "RINPUT2" },
+	{ "Right Line Mux", "PGA", "Right PGA Mux" },
+	{ "Right Line Mux", "Differential", "Differential Mux" },
+
+	{ "Left Mixer", "Playback Switch", "Left DAC" },
+	{ "Left Mixer", "Left Bypass Switch", "Left Line Mux" },
+	{ "Left Mixer", "Right Playback Switch", "Right DAC" },
+	{ "Left Mixer", "Right Bypass Switch", "Right Line Mux" },
+
+	{ "Right Mixer", "Left Playback Switch", "Left DAC" },
+	{ "Right Mixer", "Left Bypass Switch", "Left Line Mux" },
+	{ "Right Mixer", "Playback Switch", "Right DAC" },
+	{ "Right Mixer", "Right Bypass Switch", "Right Line Mux" },
+
+	{ "Left Out 1", NULL, "Left Mixer" },
+	{ "LOUT1", NULL, "Left Out 1" },
+	{ "Right Out 1", NULL, "Right Mixer" },
+	{ "ROUT1", NULL, "Right Out 1" },
+
+	{ "Left Out 2", NULL, "Left Mixer" },
+	{ "LOUT2", NULL, "Left Out 2" },
+	{ "Right Out 2", NULL, "Right Mixer" },
+	{ "ROUT2", NULL, "Right Out 2" },
+};
+
+struct _coeff_div {
+	u32 mclk;
+	u32 rate;
+	u16 fs;
+	u8 sr:5;
+	u8 usb:1;
+};
+
+/* codec hifi mclk clock divider coefficients */
+static const struct _coeff_div coeff_div[] = {
+	/* 8k */
+	{12288000, 8000, 1536, 0x6, 0x0},
+	{11289600, 8000, 1408, 0x16, 0x0},
+	{18432000, 8000, 2304, 0x7, 0x0},
+	{16934400, 8000, 2112, 0x17, 0x0},
+	{12000000, 8000, 1500, 0x6, 0x1},
+
+	/* 11.025k */
+	{11289600, 11025, 1024, 0x18, 0x0},
+	{16934400, 11025, 1536, 0x19, 0x0},
+	{12000000, 11025, 1088, 0x19, 0x1},
+
+	/* 16k */
+	{12288000, 16000, 768, 0xa, 0x0},
+	{18432000, 16000, 1152, 0xb, 0x0},
+	{12000000, 16000, 750, 0xa, 0x1},
+
+	/* 22.05k */
+	{11289600, 22050, 512, 0x1a, 0x0},
+	{16934400, 22050, 768, 0x1b, 0x0},
+	{12000000, 22050, 544, 0x1b, 0x1},
+
+	/* 32k */
+	{12288000, 32000, 384, 0xc, 0x0},
+	{18432000, 32000, 576, 0xd, 0x0},
+	{12000000, 32000, 375, 0xa, 0x1},
+
+	/* 44.1k */
+	{11289600, 44100, 256, 0x10, 0x0},
+	{16934400, 44100, 384, 0x11, 0x0},
+	{12000000, 44100, 272, 0x11, 0x1},
+
+	/* 48k */
+	{12288000, 48000, 256, 0x0, 0x0},
+	{18432000, 48000, 384, 0x1, 0x0},
+	{12000000, 48000, 250, 0x0, 0x1},
+
+	/* 88.2k */
+	{11289600, 88200, 128, 0x1e, 0x0},
+	{16934400, 88200, 192, 0x1f, 0x0},
+	{12000000, 88200, 136, 0x1f, 0x1},
+
+	/* 96k */
+	{12288000, 96000, 128, 0xe, 0x0},
+	{18432000, 96000, 192, 0xf, 0x0},
+	{12000000, 96000, 125, 0xe, 0x1},
+};
+
+static inline int get_coeff(int mclk, int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(coeff_div); i++) {
+		if (coeff_div[i].rate == rate && coeff_div[i].mclk == mclk)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/* The set of rates we can generate from the above for each SYSCLK */
+
+static unsigned int rates_12288[] = {
+	8000, 12000, 16000, 24000, 24000, 32000, 48000, 96000,
+};
+
+static struct snd_pcm_hw_constraint_list constraints_12288 = {
+	.count	= ARRAY_SIZE(rates_12288),
+	.list	= rates_12288,
+};
+
+static unsigned int rates_112896[] = {
+	8000, 11025, 22050, 44100,
+};
+
+static struct snd_pcm_hw_constraint_list constraints_112896 = {
+	.count	= ARRAY_SIZE(rates_112896),
+	.list	= rates_112896,
+};
+
+static unsigned int rates_12[] = {
+	8000, 11025, 12000, 16000, 22050, 2400, 32000, 41100, 48000,
+	48000, 88235, 96000,
+};
+
+static struct snd_pcm_hw_constraint_list constraints_12 = {
+	.count	= ARRAY_SIZE(rates_12),
+	.list	= rates_12,
+};
+
+/*
+ * Note that this should be called from init rather than from hw_params.
+ */
+static int wm8988_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm8988_priv *wm8988 = codec->private_data;
+
+	switch (freq) {
+	case 11289600:
+	case 18432000:
+	case 22579200:
+	case 36864000:
+		wm8988->sysclk_constraints = &constraints_112896;
+		wm8988->sysclk = freq;
+		return 0;
+
+	case 12288000:
+	case 16934400:
+	case 24576000:
+	case 33868800:
+		wm8988->sysclk_constraints = &constraints_12288;
+		wm8988->sysclk = freq;
+		return 0;
+
+	case 12000000:
+	case 24000000:
+		wm8988->sysclk_constraints = &constraints_12;
+		wm8988->sysclk = freq;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int wm8988_set_dai_fmt(struct snd_soc_dai *codec_dai,
+		unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = 0;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		iface = 0x0040;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* interface format */
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= 0x0002;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= 0x0001;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= 0x0003;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= 0x0013;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= 0x0090;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= 0x0080;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= 0x0010;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm8988_write(codec, WM8988_IFACE, iface);
+	return 0;
+}
+
+static int wm8988_pcm_startup(struct snd_pcm_substream *substream,
+			      struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm8988_priv *wm8988 = codec->private_data;
+
+	/* The set of sample rates that can be supported depends on the
+	 * MCLK supplied to the CODEC - enforce this.
+	 */
+	if (!wm8988->sysclk) {
+		dev_err(codec->dev,
+			"No MCLK configured, call set_sysclk() on init\n");
+		return -EINVAL;
+	}
+
+	snd_pcm_hw_constraint_list(substream->runtime, 0,
+				   SNDRV_PCM_HW_PARAM_RATE,
+				   wm8988->sysclk_constraints);
+
+	return 0;
+}
+
+static int wm8988_pcm_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	struct wm8988_priv *wm8988 = codec->private_data;
+	u16 iface = wm8988_read_reg_cache(codec, WM8988_IFACE) & 0x1f3;
+	u16 srate = wm8988_read_reg_cache(codec, WM8988_SRATE) & 0x180;
+	int coeff;
+
+	coeff = get_coeff(wm8988->sysclk, params_rate(params));
+	if (coeff < 0) {
+		coeff = get_coeff(wm8988->sysclk / 2, params_rate(params));
+		srate |= 0x40;
+	}
+	if (coeff < 0) {
+		dev_err(codec->dev,
+			"Unable to configure sample rate %dHz with %dHz MCLK\n",
+			params_rate(params), wm8988->sysclk);
+		return coeff;
+	}
+
+	/* bit size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= 0x0004;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= 0x0008;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		iface |= 0x000c;
+		break;
+	}
+
+	/* set iface & srate */
+	wm8988_write(codec, WM8988_IFACE, iface);
+	if (coeff >= 0)
+		wm8988_write(codec, WM8988_SRATE, srate |
+			(coeff_div[coeff].sr << 1) | coeff_div[coeff].usb);
+
+	return 0;
+}
+
+static int wm8988_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u16 mute_reg = wm8988_read_reg_cache(codec, WM8988_ADCDAC) & 0xfff7;
+
+	if (mute)
+		wm8988_write(codec, WM8988_ADCDAC, mute_reg | 0x8);
+	else
+		wm8988_write(codec, WM8988_ADCDAC, mute_reg);
+	return 0;
+}
+
+static int wm8988_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 pwr_reg = wm8988_read_reg_cache(codec, WM8988_PWR1) & ~0x1c1;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* VREF, VMID=2x50k, digital enabled */
+		wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x00c0);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* VREF, VMID=2x5k */
+			wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x1c1);
+
+			/* Charge caps */
+			msleep(100);
+		}
+
+		/* VREF, VMID=2*500k, digital stopped */
+		wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x0141);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		wm8988_write(codec, WM8988_PWR1, 0x0000);
+		break;
+	}
+	codec->bias_level = level;
+	return 0;
+}
+
+#define WM8988_RATES SNDRV_PCM_RATE_8000_96000
+
+#define WM8988_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\
+	SNDRV_PCM_FMTBIT_S24_LE)
+
+static struct snd_soc_dai_ops wm8988_ops = {
+	.startup = wm8988_pcm_startup,
+	.hw_params = wm8988_pcm_hw_params,
+	.set_fmt = wm8988_set_dai_fmt,
+	.set_sysclk = wm8988_set_dai_sysclk,
+	.digital_mute = wm8988_mute,
+};
+
+struct snd_soc_dai wm8988_dai = {
+	.name = "WM8988",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8988_RATES,
+		.formats = WM8988_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8988_RATES,
+		.formats = WM8988_FORMATS,
+	 },
+	.ops = &wm8988_ops,
+	.symmetric_rates = 1,
+};
+EXPORT_SYMBOL_GPL(wm8988_dai);
+
+static int wm8988_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm8988_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int wm8988_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int i;
+	u8 data[2];
+	u16 *cache = codec->reg_cache;
+
+	/* Sync reg_cache with the hardware */
+	for (i = 0; i < WM8988_NUM_REG; i++) {
+		if (i == WM8988_RESET)
+			continue;
+		data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001);
+		data[1] = cache[i] & 0x00ff;
+		codec->hw_write(codec->control_data, data, 2);
+	}
+
+	wm8988_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	return 0;
+}
+
+static struct snd_soc_codec *wm8988_codec;
+
+static int wm8988_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	int ret = 0;
+
+	if (wm8988_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm8988_codec;
+	codec = wm8988_codec;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm8988_snd_controls,
+				ARRAY_SIZE(wm8988_snd_controls));
+	snd_soc_dapm_new_controls(codec, wm8988_dapm_widgets,
+				  ARRAY_SIZE(wm8988_dapm_widgets));
+	snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+	snd_soc_dapm_new_widgets(codec);
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm8988_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_wm8988 = {
+	.probe = 	wm8988_probe,
+	.remove = 	wm8988_remove,
+	.suspend = 	wm8988_suspend,
+	.resume =	wm8988_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8988);
+
+static int wm8988_register(struct wm8988_priv *wm8988)
+{
+	struct snd_soc_codec *codec = &wm8988->codec;
+	int ret;
+	u16 reg;
+
+	if (wm8988_codec) {
+		dev_err(codec->dev, "Another WM8988 is registered\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm8988;
+	codec->name = "WM8988";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8988_read_reg_cache;
+	codec->write = wm8988_write;
+	codec->dai = &wm8988_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm8988->reg_cache);
+	codec->reg_cache = &wm8988->reg_cache;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8988_set_bias_level;
+
+	memcpy(codec->reg_cache, wm8988_reg,
+	       sizeof(wm8988_reg));
+
+	ret = wm8988_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	/* set the update bits (we always update left then right) */
+	reg = wm8988_read_reg_cache(codec, WM8988_RADC);
+	wm8988_write(codec, WM8988_RADC, reg | 0x100);
+	reg = wm8988_read_reg_cache(codec, WM8988_RDAC);
+	wm8988_write(codec, WM8988_RDAC, reg | 0x0100);
+	reg = wm8988_read_reg_cache(codec, WM8988_ROUT1V);
+	wm8988_write(codec, WM8988_ROUT1V, reg | 0x0100);
+	reg = wm8988_read_reg_cache(codec, WM8988_ROUT2V);
+	wm8988_write(codec, WM8988_ROUT2V, reg | 0x0100);
+	reg = wm8988_read_reg_cache(codec, WM8988_RINVOL);
+	wm8988_write(codec, WM8988_RINVOL, reg | 0x0100);
+
+	wm8988_set_bias_level(&wm8988->codec, SND_SOC_BIAS_STANDBY);
+
+	wm8988_dai.dev = codec->dev;
+
+	wm8988_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm8988_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+
+err:
+	kfree(wm8988);
+	return ret;
+}
+
+static void wm8988_unregister(struct wm8988_priv *wm8988)
+{
+	wm8988_set_bias_level(&wm8988->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm8988_dai);
+	snd_soc_unregister_codec(&wm8988->codec);
+	kfree(wm8988);
+	wm8988_codec = NULL;
+}
+
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+static int wm8988_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm8988_priv *wm8988;
+	struct snd_soc_codec *codec;
+
+	wm8988 = kzalloc(sizeof(struct wm8988_priv), GFP_KERNEL);
+	if (wm8988 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8988->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+
+	i2c_set_clientdata(i2c, wm8988);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm8988_register(wm8988);
+}
+
+static int wm8988_i2c_remove(struct i2c_client *client)
+{
+	struct wm8988_priv *wm8988 = i2c_get_clientdata(client);
+	wm8988_unregister(wm8988);
+	return 0;
+}
+
+static const struct i2c_device_id wm8988_i2c_id[] = {
+	{ "wm8988", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8988_i2c_id);
+
+static struct i2c_driver wm8988_i2c_driver = {
+	.driver = {
+		.name = "WM8988",
+		.owner = THIS_MODULE,
+	},
+	.probe = wm8988_i2c_probe,
+	.remove = wm8988_i2c_remove,
+	.id_table = wm8988_i2c_id,
+};
+#endif
+
+#if defined(CONFIG_SPI_MASTER)
+static int wm8988_spi_write(struct spi_device *spi, const char *data, int len)
+{
+	struct spi_transfer t;
+	struct spi_message m;
+	u8 msg[2];
+
+	if (len <= 0)
+		return 0;
+
+	msg[0] = data[0];
+	msg[1] = data[1];
+
+	spi_message_init(&m);
+	memset(&t, 0, (sizeof t));
+
+	t.tx_buf = &msg[0];
+	t.len = len;
+
+	spi_message_add_tail(&t, &m);
+	spi_sync(spi, &m);
+
+	return len;
+}
+
+static int __devinit wm8988_spi_probe(struct spi_device *spi)
+{
+	struct wm8988_priv *wm8988;
+	struct snd_soc_codec *codec;
+
+	wm8988 = kzalloc(sizeof(struct wm8988_priv), GFP_KERNEL);
+	if (wm8988 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8988->codec;
+	codec->hw_write = (hw_write_t)wm8988_spi_write;
+	codec->control_data = spi;
+	codec->dev = &spi->dev;
+
+	spi->dev.driver_data = wm8988;
+
+	return wm8988_register(wm8988);
+}
+
+static int __devexit wm8988_spi_remove(struct spi_device *spi)
+{
+	struct wm8988_priv *wm8988 = spi->dev.driver_data;
+
+	wm8988_unregister(wm8988);
+
+	return 0;
+}
+
+static struct spi_driver wm8988_spi_driver = {
+	.driver = {
+		.name	= "wm8988",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= wm8988_spi_probe,
+	.remove		= __devexit_p(wm8988_spi_remove),
+};
+#endif
+
+static int __init wm8988_modinit(void)
+{
+	int ret;
+
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+	ret = i2c_add_driver(&wm8988_i2c_driver);
+	if (ret != 0)
+		pr_err("WM8988: Unable to register I2C driver: %d\n", ret);
+#endif
+#if defined(CONFIG_SPI_MASTER)
+	ret = spi_register_driver(&wm8988_spi_driver);
+	if (ret != 0)
+		pr_err("WM8988: Unable to register SPI driver: %d\n", ret);
+#endif
+	return ret;
+}
+module_init(wm8988_modinit);
+
+static void __exit wm8988_exit(void)
+{
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
+	i2c_del_driver(&wm8988_i2c_driver);
+#endif
+#if defined(CONFIG_SPI_MASTER)
+	spi_unregister_driver(&wm8988_spi_driver);
+#endif
+}
+module_exit(wm8988_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM8988 driver");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm8988.h b/sound/soc/codecs/wm8988.h
new file mode 100644
index 0000000..4552d37
--- /dev/null
+++ b/sound/soc/codecs/wm8988.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2005 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <richard@openedhand.com>
+ *
+ * Based on WM8753.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _WM8988_H
+#define _WM8988_H
+
+/* WM8988 register space */
+
+#define WM8988_LINVOL    0x00
+#define WM8988_RINVOL    0x01
+#define WM8988_LOUT1V    0x02
+#define WM8988_ROUT1V    0x03
+#define WM8988_ADCDAC    0x05
+#define WM8988_IFACE     0x07
+#define WM8988_SRATE     0x08
+#define WM8988_LDAC      0x0a
+#define WM8988_RDAC      0x0b
+#define WM8988_BASS      0x0c
+#define WM8988_TREBLE    0x0d
+#define WM8988_RESET     0x0f
+#define WM8988_3D        0x10
+#define WM8988_ALC1      0x11
+#define WM8988_ALC2      0x12
+#define WM8988_ALC3      0x13
+#define WM8988_NGATE     0x14
+#define WM8988_LADC      0x15
+#define WM8988_RADC      0x16
+#define WM8988_ADCTL1    0x17
+#define WM8988_ADCTL2    0x18
+#define WM8988_PWR1      0x19
+#define WM8988_PWR2      0x1a
+#define WM8988_ADCTL3    0x1b
+#define WM8988_ADCIN     0x1f
+#define WM8988_LADCIN    0x20
+#define WM8988_RADCIN    0x21
+#define WM8988_LOUTM1    0x22
+#define WM8988_LOUTM2    0x23
+#define WM8988_ROUTM1    0x24
+#define WM8988_ROUTM2    0x25
+#define WM8988_LOUT2V    0x28
+#define WM8988_ROUT2V    0x29
+#define WM8988_LPPB      0x43
+#define WM8988_NUM_REG   0x44
+
+#define WM8988_SYSCLK	0
+
+extern struct snd_soc_dai wm8988_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8988;
+
+#endif
-- 
cgit v0.10.2


From 169aafbc8d3f05431b5cfeb60294a12b8ef2bcee Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 7 Apr 2009 13:37:26 -0700
Subject: lguest: update lazy mmu changes to match lguest's use of kvm
 hypercalls

Duplicate hcall -> kvm_hypercall0 convertion from "lguest: use KVM
hypercalls".

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Matias Zabaljauregui <zabaljauregui at gmail.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5ab2397..cfb2d68 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -168,7 +168,7 @@ static void lazy_hcall3(unsigned long call,
  * issue the do-nothing hypercall to flush any stored calls. */
 static void lguest_leave_lazy_mmu_mode(void)
 {
-	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
+	kvm_hypercall0(LHCALL_FLUSH_ASYNC);
 	paravirt_leave_lazy_mmu();
 }
 
-- 
cgit v0.10.2


From 44bc9dc729e33a4ec6ebed4d0b6c08e8d20b42cf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 10:47:17 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping, cleanup

Andrew Morton noticed that mm.h needlessly includes sched.h - remove it.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 64d8ed2..776b641 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,7 +13,6 @@
 #include <linux/prio_tree.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
-#include <linux/sched.h>
 
 struct mempolicy;
 struct anon_vma;
-- 
cgit v0.10.2


From a34b50ddc265bae058c66661b096ef6384c5a8b1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 10:56:54 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping, remove dead code

Remove the unused free_locked_buffer() API.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 776b641..a3963ba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1320,7 +1320,6 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
 extern void *alloc_locked_buffer(size_t size);
-extern void free_locked_buffer(void *buffer, size_t size);
 extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 749383b..28be15e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -671,9 +671,3 @@ void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
 
 	up_write(&mm->mmap_sem);
 }
-
-void free_locked_buffer(void *buffer, size_t size)
-{
-	refund_locked_buffer_memory(current->mm, size);
-	kfree(buffer);
-}
-- 
cgit v0.10.2


From dc66270b51a62b1a6888d5309229e638a305c47b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 8 Apr 2009 20:30:10 +1000
Subject: perf_counter: fix powerpc build

Commit 4af4998b ("perf_counter: rework context time") changed struct
perf_counter_context to have a 'time' field instead of a 'time_now'
field, but neglected to fix the place in the powerpc perf_counter.c
where the time_now field was accessed.  This fixes it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18908.31922.411398.147810@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index f88c35d..0e56513 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -457,8 +457,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 {
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;
-	counter->tstamp_running += counter->ctx->time_now -
-		counter->tstamp_stopped;
+	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
 	if (is_software_counter(counter))
 		counter->hw_ops->enable(counter);
 }
-- 
cgit v0.10.2


From f708223d49ac39f5af1643985056206c98033f5b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 8 Apr 2009 20:30:18 +1000
Subject: perf_counter: powerpc: set sample enable bit for marked instruction
 events

Impact: enable access to hardware feature

POWER processors have the ability to "mark" a subset of the instructions
and provide more detailed information on what happens to the marked
instructions as they flow through the pipeline.  This marking is
enabled by the "sample enable" bit in MMCRA, and there are
synchronization requirements around setting and clearing the bit.

This adds logic to the processor-specific back-ends so that they know
which events relate to marked instructions and set the sampling enable
bit if any event that we want to put on the PMU is a marked instruction
event.  It also adds logic to the generic powerpc code to do the
necessary synchronization if that bit is set.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18908.31930.1024.228867@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0e56513..0697ade 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -307,6 +307,15 @@ u64 hw_perf_save_disable(void)
 		}
 
 		/*
+		 * Disable instruction sampling if it was enabled
+		 */
+		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+			mtspr(SPRN_MMCRA,
+			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+			mb();
+		}
+
+		/*
 		 * Set the 'freeze counters' bit.
 		 * The barrier is to make sure the mtspr has been
 		 * executed and the PMU has frozen the counters
@@ -347,12 +356,11 @@ void hw_perf_restore(u64 disable)
 	 * (possibly updated for removal of counters).
 	 */
 	if (!cpuhw->n_added) {
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
 		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-		mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 		if (cpuhw->n_counters == 0)
 			get_lppaca()->pmcregs_in_use = 0;
-		goto out;
+		goto out_enable;
 	}
 
 	/*
@@ -385,7 +393,7 @@ void hw_perf_restore(u64 disable)
 	 * Then unfreeze the counters.
 	 */
 	get_lppaca()->pmcregs_in_use = 1;
-	mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
 	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
 				| MMCR0_FC);
@@ -421,10 +429,20 @@ void hw_perf_restore(u64 disable)
 		write_pmc(counter->hw.idx, val);
 		perf_counter_update_userpage(counter);
 	}
-	mb();
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+	mb();
 	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 
+	/*
+	 * Enable instruction sampling if necessary
+	 */
+	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+		mb();
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	}
+
  out:
 	local_irq_restore(flags);
 }
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index cec21ea..1222c8e 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -1,5 +1,5 @@
 /*
- * Performance counter support for POWER5 (not POWER5++) processors.
+ * Performance counter support for POWER5+/++ (not POWER5) processors.
  *
  * Copyright 2009 Paul Mackerras, IBM Corporation.
  *
@@ -281,10 +281,107 @@ static int power5p_get_alternatives(unsigned int event, unsigned int alt[])
 	return nalt;
 }
 
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+	0,	/* 00 */
+	0x1f,	/* 01 PM_IOPS_CMPL */
+	0x2,	/* 02 PM_MRK_GRP_DISP */
+	0xe,	/* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0,	/* 04 */
+	0x1c,	/* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+	0x80,	/* 06 */
+	0x80,	/* 07 */
+	0, 0, 0,/* 08 - 0a */
+	0x18,	/* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+	0,	/* 0c */
+	0x80,	/* 0d */
+	0x80,	/* 0e */
+	0,	/* 0f */
+	0,	/* 10 */
+	0x14,	/* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+	0,	/* 12 */
+	0x10,	/* 13 PM_MRK_GRP_CMPL */
+	0x1f,	/* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x2,	/* 15 PM_MRK_GRP_ISSUED */
+	0x80,	/* 16 */
+	0x80,	/* 17 */
+	0, 0, 0, 0, 0,
+	0x80,	/* 1d */
+	0x80,	/* 1e */
+	0,	/* 1f */
+	0x80,	/* 20 */
+	0x80,	/* 21 */
+	0x80,	/* 22 */
+	0x80,	/* 23 */
+	0x80,	/* 24 */
+	0x80,	/* 25 */
+	0x80,	/* 26 */
+	0x80,	/* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5p_marked_instr_event(unsigned int event)
+{
+	int pmc, psel;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		if (direct_event_is_marked[psel] & (1 << pmc))
+			return 1;
+		if (direct_event_is_marked[psel] & 0x80)
+			bit = 4;
+		else if (psel == 0x08)
+			bit = pmc - 1;
+		else if (psel == 0x10)
+			bit = 4 - pmc;
+		else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+			bit = 4;
+	} else if ((psel & 0x48) == 0x40) {
+		bit = psel & 7;
+	} else if (psel == 0x28) {
+		bit = pmc - 1;
+	} else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
+		bit = 4;
+	}
+
+	if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == PM_LSU0) {
+		/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+		mask = 0x5dff00;
+	} else if (unit == PM_LSU1 && byte >= 4) {
+		byte -= 4;
+		/* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
+		mask = 0x5f11c000;
+	} else
+		return 0;
+
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
 static int power5p_compute_mmcr(unsigned int event[], int n_ev,
 				unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr1 = 0;
+	u64 mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
 	unsigned int ttm;
 	int i, isbus, bit, grsel;
@@ -404,6 +501,8 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
 			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
 			mmcr1 |= (u64)grsel << grsel_shift[bit];
 		}
+		if (power5p_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
 		if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
 			/* select alternate byte lane */
 			psel |= 0x10;
@@ -419,7 +518,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
 	if (pmc_inuse & 0x3e)
 		mmcr[0] |= MMCR0_PMCjCE;
 	mmcr[1] = mmcr1;
-	mmcr[2] = 0;
+	mmcr[2] = mmcra;
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 379ed10..116c4bb 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -290,10 +290,102 @@ static int power5_get_alternatives(unsigned int event, unsigned int alt[])
 	return nalt;
 }
 
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+	0,	/* 00 */
+	0x1f,	/* 01 PM_IOPS_CMPL */
+	0x2,	/* 02 PM_MRK_GRP_DISP */
+	0xe,	/* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0,	/* 04 */
+	0x1c,	/* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+	0x80,	/* 06 */
+	0x80,	/* 07 */
+	0, 0, 0,/* 08 - 0a */
+	0x18,	/* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+	0,	/* 0c */
+	0x80,	/* 0d */
+	0x80,	/* 0e */
+	0,	/* 0f */
+	0,	/* 10 */
+	0x14,	/* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+	0,	/* 12 */
+	0x10,	/* 13 PM_MRK_GRP_CMPL */
+	0x1f,	/* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x2,	/* 15 PM_MRK_GRP_ISSUED */
+	0x80,	/* 16 */
+	0x80,	/* 17 */
+	0, 0, 0, 0, 0,
+	0x80,	/* 1d */
+	0x80,	/* 1e */
+	0,	/* 1f */
+	0x80,	/* 20 */
+	0x80,	/* 21 */
+	0x80,	/* 22 */
+	0x80,	/* 23 */
+	0x80,	/* 24 */
+	0x80,	/* 25 */
+	0x80,	/* 26 */
+	0x80,	/* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5_marked_instr_event(unsigned int event)
+{
+	int pmc, psel;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		if (direct_event_is_marked[psel] & (1 << pmc))
+			return 1;
+		if (direct_event_is_marked[psel] & 0x80)
+			bit = 4;
+		else if (psel == 0x08)
+			bit = pmc - 1;
+		else if (psel == 0x10)
+			bit = 4 - pmc;
+		else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+			bit = 4;
+	} else if ((psel & 0x58) == 0x40)
+		bit = psel & 7;
+
+	if (!(event & PM_BUSEVENT_MSK))
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == PM_LSU0) {
+		/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+		mask = 0x5dff00;
+	} else if (unit == PM_LSU1 && byte >= 4) {
+		byte -= 4;
+		/* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
+		mask = 0x5f00c0aa;
+	} else
+		return 0;
+
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
 static int power5_compute_mmcr(unsigned int event[], int n_ev,
 			       unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr1 = 0;
+	u64 mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
 	unsigned int ttm, grp;
 	int i, isbus, bit, grsel;
@@ -430,6 +522,8 @@ static int power5_compute_mmcr(unsigned int event[], int n_ev,
 			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
 			mmcr1 |= (u64)grsel << grsel_shift[bit];
 		}
+		if (power5_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
 		if (pmc <= 3)
 			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
 		hwc[i] = pmc;
@@ -442,7 +536,7 @@ static int power5_compute_mmcr(unsigned int event[], int n_ev,
 	if (pmc_inuse & 0x3e)
 		mmcr[0] |= MMCR0_PMCjCE;
 	mmcr[1] = mmcr1;
-	mmcr[2] = 0;
+	mmcr[2] = mmcra;
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index b1f61f3..fce1fc2 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -49,12 +49,134 @@
 #define MMCR1_PMCSEL_MSK	0xff
 
 /*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value >> 1.
+ * Bottom 4 bits are a map of which PMCs are interesting,
+ * top 4 bits say what sort of event:
+ *   0 = direct marked event,
+ *   1 = byte decode event,
+ *   4 = add/and event (PMC1 -> bits 0 & 4),
+ *   5 = add/and event (PMC1 -> bits 1 & 5),
+ *   6 = add/and event (PMC1 -> bits 2 & 6),
+ *   7 = add/and event (PMC1 -> bits 3 & 7).
+ */
+static unsigned char direct_event_is_marked[0x60 >> 1] = {
+	0,	/* 00 */
+	0,	/* 02 */
+	0,	/* 04 */
+	0x07,	/* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0x04,	/* 08 PM_MRK_DFU_FIN */
+	0x06,	/* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
+	0,	/* 0c */
+	0,	/* 0e */
+	0x02,	/* 10 PM_MRK_INST_DISP */
+	0x08,	/* 12 PM_MRK_LSU_DERAT_MISS */
+	0,	/* 14 */
+	0,	/* 16 */
+	0x0c,	/* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
+	0x0f,	/* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x01,	/* 1c PM_MRK_INST_ISSUED */
+	0,	/* 1e */
+	0,	/* 20 */
+	0,	/* 22 */
+	0,	/* 24 */
+	0,	/* 26 */
+	0x15,	/* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
+	0,	/* 2a */
+	0,	/* 2c */
+	0,	/* 2e */
+	0x4f,	/* 30 */
+	0x7f,	/* 32 */
+	0x4f,	/* 34 */
+	0x5f,	/* 36 */
+	0x6f,	/* 38 */
+	0x4f,	/* 3a */
+	0,	/* 3c */
+	0x08,	/* 3e PM_MRK_INST_TIMEO */
+	0x1f,	/* 40 */
+	0x1f,	/* 42 */
+	0x1f,	/* 44 */
+	0x1f,	/* 46 */
+	0x1f,	/* 48 */
+	0x1f,	/* 4a */
+	0x1f,	/* 4c */
+	0x1f,	/* 4e */
+	0,	/* 50 */
+	0x05,	/* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
+	0x1c,	/* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
+	0x02,	/* 56 PM_MRK_LD_MISS_L1 */
+	0,	/* 58 */
+	0,	/* 5a */
+	0,	/* 5c */
+	0,	/* 5e */
+};
+
+/*
+ * Masks showing for each unit which bits are marked events.
+ * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
+ */
+static u32 marked_bus_events[16] = {
+	0x01000000,	/* direct events set 1: byte 3 bit 0 */
+	0x00010000,	/* direct events set 2: byte 2 bit 0 */
+	0, 0, 0, 0,	/* IDU, IFU, nest: nothing */
+	0x00000088,	/* VMX set 1: byte 0 bits 3, 7 */
+	0x000000c0,	/* VMX set 2: byte 0 bits 4-7 */
+	0x04010000,	/* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
+	0xff010000u,	/* LSU set 2: byte 2 bit 0, all of byte 3 */
+	0,		/* LSU set 3 */
+	0x00000010,	/* VMX set 3: byte 0 bit 4 */
+	0,		/* BFP set 1 */
+	0x00000022,	/* BFP set 2: byte 0 bits 1, 5 */
+	0, 0
+};
+	
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power6_marked_instr_event(unsigned int event)
+{
+	int pmc, psel, ptype;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = (event & PM_PMCSEL_MSK) >> 1;	/* drop edge/level bit */
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		ptype = direct_event_is_marked[psel];
+		if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
+			return 0;
+		ptype >>= 4;
+		if (ptype == 0)
+			return 1;
+		if (ptype == 1)
+			bit = 0;
+		else
+			bit = ptype ^ (pmc - 1);
+	} else if ((psel & 0x48) == 0x40)
+		bit = psel & 7;
+
+	if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = marked_bus_events[unit];
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/*
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
 static int p6_compute_mmcr(unsigned int event[], int n_ev,
 			   unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr1 = 0;
+	u64 mmcra = 0;
 	int i;
 	unsigned int pmc, ev, b, u, s, psel;
 	unsigned int ttmset = 0;
@@ -116,6 +238,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
 			if (ev & PM_LLAV)
 				mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
 		}
+		if (power6_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
 		mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
 	}
 	mmcr[0] = 0;
@@ -124,7 +248,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
 	if (pmc_inuse & 0xe)
 		mmcr[0] |= MMCR0_PMCjCE;
 	mmcr[1] = mmcr1;
-	mmcr[2] = 0;
+	mmcr[2] = mmcra;
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index c325658..aed8ccd 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -19,6 +19,8 @@
 #define PM_PMC_MSK	0xf
 #define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
 #define PM_UNIT_MSK	0xf
+#define PM_SPCSEL_SH	6
+#define PM_SPCSEL_MSK	3
 #define PM_BYTE_SH	4	/* Byte number of event bus to use */
 #define PM_BYTE_MSK	3
 #define PM_PMCSEL_MSK	0xf
@@ -88,8 +90,11 @@ static short mmcr1_adder_bits[8] = {
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
- *                 <><>[  >[  >[  ><  ><  ><  ><  ><><><><><><><><>
- *                 T0T1 UC  PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ *               <><><>[  >[  >[  ><  ><  ><  ><  ><><><><><><><><>
+ *               SPT0T1 UC  PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ *
+ * SP - SPCSEL constraint
+ *     48-49: SPCSEL value 0x3_0000_0000_0000
  *
  * T0 - TTM0 constraint
  *     46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
@@ -126,6 +131,57 @@ static short mmcr1_adder_bits[8] = {
  *     0-13: Count of events needing PMC2..PMC8
  */
 
+static unsigned char direct_marked_event[8] = {
+	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+	(1<<3) | (1<<5),	/* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
+	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+	(1<<4) | (1<<5),	/* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
+	(1<<3) | (1<<4) | (1<<5),
+		/* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+	(1<<4)			/* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p970_marked_instr_event(unsigned int event)
+{
+	int pmc, psel, unit, byte, bit;
+	unsigned int mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc) {
+		if (direct_marked_event[pmc - 1] & (1 << psel))
+			return 1;
+		if (psel == 0)		/* add events */
+			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+		else if (psel == 7 || psel == 13)	/* decode events */
+			bit = 4;
+		else
+			return 0;
+	} else
+		bit = psel;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = 0;
+	switch (unit) {
+	case PM_VPU:
+		mask = 0x4c;		/* byte 0 bits 2,3,6 */
+	case PM_LSU0:
+		/* byte 2 bits 0,2,3,4,6; all of byte 1 */
+		mask = 0x085dff00;
+	case PM_LSU1L:
+		mask = 0x50 << 24;	/* byte 3 bits 4,6 */
+		break;
+	}
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
 /* Masks and values for using events from the various units */
 static u64 unit_cons[PM_LASTUNIT+1][2] = {
 	[PM_FPU] =   { 0xc80000000000ull, 0x040000000000ull },
@@ -138,7 +194,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = {
 
 static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 {
-	int pmc, byte, unit, sh;
+	int pmc, byte, unit, sh, spcsel;
 	u64 mask = 0, value = 0;
 	int grp = -1;
 
@@ -177,6 +233,11 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 		mask  |= 0x800000000ull;
 		value |= 0x100000000ull;
 	}
+	spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+	if (spcsel) {
+		mask  |= 3ull << 48;
+		value |= (u64)spcsel << 48;
+	}
 	*maskp = mask;
 	*valp = value;
 	return 0;
@@ -209,6 +270,7 @@ static int p970_compute_mmcr(unsigned int event[], int n_ev,
 	unsigned char ttmuse[2];
 	unsigned char pmcsel[8];
 	int i;
+	int spcsel;
 
 	if (n_ev > 8)
 		return -1;
@@ -316,6 +378,10 @@ static int p970_compute_mmcr(unsigned int event[], int n_ev,
 		}
 		pmcsel[pmc] = psel;
 		hwc[i] = pmc;
+		spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+		mmcr1 |= spcsel;
+		if (p970_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
 	}
 	for (pmc = 0; pmc < 2; ++pmc)
 		mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
-- 
cgit v0.10.2


From bab5bc9e857638880facef76e4b4c3fa807f8c73 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Tue, 7 Apr 2009 23:23:50 -0700
Subject: futex: fixup unlocked requeue pi case

Thomas's testing caught a problem when the requeue target futex is
unowned and multiple tasks are requeued to it. This patch ensures
the FUTEX_WAITERS bit gets set if futex_requeue() will requeue one
or more tasks in addition to the one acquiring the lock.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 185c981..041bf3a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -565,12 +565,14 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 
 /**
  * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex
- * @uaddr:	the pi futex user address
- * @hb:		the pi futex hash bucket
- * @key:	the futex key associated with uaddr and hb
- * @ps:		the pi_state pointer where we store the result of the lookup
- * @task:	the task to perform the atomic lock work for.  This will be
- * 		"current" except in the case of requeue pi.
+ * @uaddr:		the pi futex user address
+ * @hb:			the pi futex hash bucket
+ * @key:		the futex key associated with uaddr and hb
+ * @ps:			the pi_state pointer where we store the result of the
+ *			lookup
+ * @task:		the task to perform the atomic lock work for.  This will
+ *			be "current" except in the case of requeue pi.
+ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Returns:
  *  0 - ready to wait
@@ -582,7 +584,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
 				union futex_key *key,
 				struct futex_pi_state **ps,
-				struct task_struct *task)
+				struct task_struct *task, int set_waiters)
 {
 	int lock_taken, ret, ownerdied = 0;
 	u32 uval, newval, curval;
@@ -596,6 +598,8 @@ retry:
 	 * the locks. It will most likely not succeed.
 	 */
 	newval = task_pid_vnr(task);
+	if (set_waiters)
+		newval |= FUTEX_WAITERS;
 
 	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
 
@@ -1004,14 +1008,18 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
 
 /**
  * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
- * @pifutex:	the user address of the to futex
- * @hb1:	the from futex hash bucket, must be locked by the caller
- * @hb2:	the to futex hash bucket, must be locked by the caller
- * @key1:	the from futex key
- * @key2:	the to futex key
+ * @pifutex:		the user address of the to futex
+ * @hb1:		the from futex hash bucket, must be locked by the caller
+ * @hb2:		the to futex hash bucket, must be locked by the caller
+ * @key1:		the from futex key
+ * @key2:		the to futex key
+ * @ps:			address to store the pi_state pointer
+ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Try and get the lock on behalf of the top waiter if we can do it atomically.
- * Wake the top waiter if we succeed.  hb1 and hb2 must be held by the caller.
+ * Wake the top waiter if we succeed.  If the caller specified set_waiters,
+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+ * hb1 and hb2 must be held by the caller.
  *
  * Returns:
  *  0 - failed to acquire the lock atomicly
@@ -1022,15 +1030,23 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 				 struct futex_hash_bucket *hb1,
 				 struct futex_hash_bucket *hb2,
 				 union futex_key *key1, union futex_key *key2,
-				 struct futex_pi_state **ps)
+				 struct futex_pi_state **ps, int set_waiters)
 {
-	struct futex_q *top_waiter;
+	struct futex_q *top_waiter = NULL;
 	u32 curval;
 	int ret;
 
 	if (get_futex_value_locked(&curval, pifutex))
 		return -EFAULT;
 
+	/*
+	 * Find the top_waiter and determine if there are additional waiters.
+	 * If the caller intends to requeue more than 1 waiter to pifutex,
+	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
+	 * as we have means to handle the possible fault.  If not, don't set
+	 * the bit unecessarily as it will force the subsequent unlock to enter
+	 * the kernel.
+	 */
 	top_waiter = futex_top_waiter(hb1, key1);
 
 	/* There are no waiters, nothing for us to do. */
@@ -1038,10 +1054,12 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 		return 0;
 
 	/*
-	 * Either take the lock for top_waiter or set the FUTEX_WAITERS bit.
-	 * The pi_state is returned in ps in contended cases.
+	 * Try to take the lock for top_waiter.  Set the FUTEX_WAITERS bit in
+	 * the contended case or if set_waiters is 1.  The pi_state is returned
+	 * in ps in contended cases.
 	 */
-	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task);
+	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+				   set_waiters);
 	if (ret == 1)
 		requeue_pi_wake_futex(top_waiter, key2);
 
@@ -1146,9 +1164,14 @@ retry_private:
 	}
 
 	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
-		/* Attempt to acquire uaddr2 and wake the top_waiter. */
+		/*
+		 * Attempt to acquire uaddr2 and wake the top waiter. If we
+		 * intend to requeue waiters, force setting the FUTEX_WAITERS
+		 * bit.  We force this here where we are able to easily handle
+		 * faults rather in the requeue loop below.
+		 */
 		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-						 &key2, &pi_state);
+						 &key2, &pi_state, nr_requeue);
 
 		/*
 		 * At this point the top_waiter has either taken uaddr2 or is
@@ -1810,7 +1833,7 @@ retry:
 retry_private:
 	hb = queue_lock(&q);
 
-	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current);
+	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
 	if (unlikely(ret)) {
 		switch (ret) {
 		case 1:
-- 
cgit v0.10.2


From fcb2ac5bdfa3a7a04fb9749b916f64400f4c35a8 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 8 Apr 2009 13:31:58 +0200
Subject: x86_32: introduce restore_fpu_checking()

Impact: cleanup, prepare FPU code unificaton

Like on x86_64, return an error from restore_fpu and kill the task
if it fails.

Also rename restore_fpu to restore_fpu_checking which allows ifdefs
to be removed in math_state_restore().

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
LKML-Reference: <1239190320-23952-1-git-send-email-jirislaby@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 71c9e51..09a2d6d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -185,12 +185,10 @@ static inline void tolerant_fwait(void)
 	asm volatile("fnclex ; fwait");
 }
 
-static inline void restore_fpu(struct task_struct *tsk)
+static inline int restore_fpu_checking(struct task_struct *tsk)
 {
-	if (task_thread_info(tsk)->status & TS_XSAVE) {
-		xrstor_checking(&tsk->thread.xstate->xsave);
-		return;
-	}
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		return xrstor_checking(&tsk->thread.xstate->xsave);
 	/*
 	 * The "nop" is needed to make the instructions the same
 	 * length.
@@ -200,6 +198,7 @@ static inline void restore_fpu(struct task_struct *tsk)
 		"fxrstor %1",
 		X86_FEATURE_FXSR,
 		"m" (tsk->thread.xstate->fxsave));
+	return 0;
 }
 
 /* We need a safe address that is cheap to find and that is already
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d2883..d696145 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -839,9 +839,6 @@ asmlinkage void math_state_restore(void)
 	}
 
 	clts();				/* Allow maths ops (or we recurse) */
-#ifdef CONFIG_X86_32
-	restore_fpu(tsk);
-#else
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
@@ -850,7 +847,7 @@ asmlinkage void math_state_restore(void)
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
-#endif
+
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;
 }
-- 
cgit v0.10.2


From 34ba476a01e128aad51e02f9be854584e9ec73cf Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 8 Apr 2009 13:31:59 +0200
Subject: x86: unify restore_fpu_checking

Impact: cleanup

On x86_32, separate f*rstor to an inline function which makes
restore_fpu_checking the same on both platforms -> move it
outside the ifdefs.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
LKML-Reference: <1239190320-23952-2-git-send-email-jirislaby@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 09a2d6d..7a6f21d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 	return err;
 }
 
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
-	if (task_thread_info(tsk)->status & TS_XSAVE)
-		return xrstor_checking(&tsk->thread.xstate->xsave);
-	else
-		return fxrstor_checking(&tsk->thread.xstate->fxsave);
-}
-
 /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
    is pending. Clear the x87 state here by setting it to fixed
    values. The kernel data segment can be sometimes 0 and sometimes
@@ -185,10 +177,9 @@ static inline void tolerant_fwait(void)
 	asm volatile("fnclex ; fwait");
 }
 
-static inline int restore_fpu_checking(struct task_struct *tsk)
+/* perform fxrstor iff the processor has extended states, otherwise frstor */
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
-	if (task_thread_info(tsk)->status & TS_XSAVE)
-		return xrstor_checking(&tsk->thread.xstate->xsave);
 	/*
 	 * The "nop" is needed to make the instructions the same
 	 * length.
@@ -197,7 +188,8 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
 		"nop ; frstor %1",
 		"fxrstor %1",
 		X86_FEATURE_FXSR,
-		"m" (tsk->thread.xstate->fxsave));
+		"m" (*fx));
+
 	return 0;
 }
 
@@ -261,6 +253,14 @@ end:
 
 #endif	/* CONFIG_X86_64 */
 
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		return xrstor_checking(&tsk->thread.xstate->xsave);
+	else
+		return fxrstor_checking(&tsk->thread.xstate->fxsave);
+}
+
 /*
  * Signal frame handlers...
  */
-- 
cgit v0.10.2


From 4ecf458492c2d97b3f9d850a5f92d79792e0a7e7 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 8 Apr 2009 13:32:00 +0200
Subject: x86_64: fix incorrect comments

Impact: cleanup

The comments which fxrstor_checking and fxsave_uset refer to is
now in fxsave. Change the comments appropriately.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
LKML-Reference: <1239190320-23952-3-git-send-email-jirislaby@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 7a6f21d..63d1850 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err)
-#if 0 /* See comment in __save_init_fpu() below. */
+#if 0 /* See comment in fxsave() below. */
 		     : [fx] "r" (fx), "m" (*fx), "0" (0));
 #else
 		     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
@@ -112,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
 		     ".previous\n"
 		     _ASM_EXTABLE(1b, 3b)
 		     : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in __fxsave_clear() below. */
+#if 0 /* See comment in fxsave() below. */
 		     : [fx] "r" (fx), "0" (0));
 #else
 		     : [fx] "cdaSDb" (fx), "0" (0));
-- 
cgit v0.10.2


From a59dacfdc9ba06903652fa4883bf1106278b18ec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Oct 2008 14:38:08 +0200
Subject: x86 early quirks: eliminate unused function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: cleanup

this warning:

  arch/x86/kernel/early-quirks.c:99: warning: ‘ati_ixp4x0_rev’ defined but not used

triggers because ati_ixp4x0_rev() is only used in the
ACPI && X86_IO_APIC case.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd9..ebdb85c 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, int slot, int func)
 }
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
 static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
 {
 	u32 d;
@@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
 	d &= 0xff;
 	return d;
 }
+#endif
 
 static void __init ati_bugs(int num, int slot, int func)
 {
-- 
cgit v0.10.2


From cdc1cb0d4445f39561a65204d26f89365f917550 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 3 Apr 2009 17:15:14 -0700
Subject: x86: make wakeup_secondary_cpu_via_init static

Impact: cleanup

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <49D6A692.6040400@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58d24ef..bddf2cc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 	return (send_status | accept_status);
 }
 
-int __devinit
+static int __devinit
 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
-- 
cgit v0.10.2


From 02421f98ec55c3ff118f358740ff640f096c7ad6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 3 Apr 2009 17:15:53 -0700
Subject: x86: consistent about warm_reset_vector for UN_NON_UNIQUE_APIC

Impact: cleanup

didn't set it for UV_NON_UNIQUE_APIC, so don't restore it

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <49D6A6B9.6060501@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bddf2cc..bf8ad63 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -822,10 +822,12 @@ do_rest:
 	/* mark "stuck" area as not stuck */
 	*((volatile unsigned long *)trampoline_base) = 0;
 
-	/*
-	 * Cleanup possible dangling ends...
-	 */
-	smpboot_restore_warm_reset_vector();
+	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
+		/*
+		 * Cleanup possible dangling ends...
+		 */
+		smpboot_restore_warm_reset_vector();
+	}
 
 	return boot_error;
 }
-- 
cgit v0.10.2


From ceb5ac3264686e75e6951de6a18d4baa9bdecb92 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:15 -0500
Subject: swiotlb: comment corrections

Impact: cleanup

swiotlb_map/unmap_single are now swiotlb_map/unmap_page;
trivially change all the comments to reference new names.

Also, there were some comments that should have been
referring to just plain old map_single, not swiotlb_map_single;
fix those as well.

Also change a use of the word "pointer", when what is
referred to is actually a dma/physical address.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-2-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2b0b5a7..170cf56 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -60,8 +60,8 @@ enum dma_sync_target {
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -560,7 +560,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 				   size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
-		 * Fall back on swiotlb_map_single().
 		 */
 		free_pages((unsigned long) ret, order);
 		ret = NULL;
@@ -568,9 +567,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	if (!ret) {
 		/*
 		 * We are either out of memory or the device can't DMA
-		 * to GFP_DMA memory; fall back on
-		 * swiotlb_map_single(), which will grab memory from
-		 * the lowest available address range.
+		 * to GFP_DMA memory; fall back on map_single(), which
+		 * will grab memory from the lowest available address range.
 		 */
 		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
 		if (!ret)
@@ -634,7 +632,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
  * physical address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    unsigned long offset, size_t size,
@@ -648,7 +646,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 
 	BUG_ON(dir == DMA_NONE);
 	/*
-	 * If the pointer passed in happens to be in the device's DMA window,
+	 * If the address happens to be in the device's DMA window,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
@@ -679,7 +677,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
+ * match what was provided for in a previous swiotlb_map_page call.  All
  * other usages are undefined.
  *
  * After this call, reads by the cpu to the buffer are guaranteed to see
@@ -703,7 +701,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
  *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
  * using the cpu, yet do not wish to teardown the dma mapping, you must
  * call this function before doing so.  At the next point you give the dma
  * address back to the card, you must first perform a
@@ -777,7 +775,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
@@ -788,7 +786,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
@@ -836,7 +834,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-- 
cgit v0.10.2


From 67131ad0514d7105b55003a0506209cf1bba3f00 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:16 -0500
Subject: swiotlb: fix compile warning

Squash a build warning seen on 32-bit powerpc caused by
calling min() with 2 different types. Use min_t() instead.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-3-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 170cf56..4fd6a76 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -341,7 +341,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 		unsigned long flags;
 
 		while (size) {
-			sz = min(PAGE_SIZE - offset, size);
+			sz = min_t(size_t, PAGE_SIZE - offset, size);
 
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn),
-- 
cgit v0.10.2


From dd6b02fe427f30520d0adc94aa52352367227873 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:17 -0500
Subject: swiotlb: map_page fix for highmem systems

The current code calls virt_to_phys() on address that might
be in highmem, which is bad.  This wasn't needed, anyway, because
we already have the physical address we need.

Get rid of the now-unused virtual address as well.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-4-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 4fd6a76..e8a47c8 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -640,7 +640,6 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    struct dma_attrs *attrs)
 {
 	phys_addr_t phys = page_to_phys(page) + offset;
-	void *ptr = page_address(page) + offset;
 	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
 	void *map;
 
@@ -651,7 +650,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	 * buffering it.
 	 */
 	if (!address_needs_mapping(dev, dev_addr, size) &&
-	    !range_needs_mapping(virt_to_phys(ptr), size))
+	    !range_needs_mapping(phys, size))
 		return dev_addr;
 
 	/*
-- 
cgit v0.10.2


From ef5722f698bde01cfec2b98fff733a48663ebf55 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:18 -0500
Subject: swiotlb: allow arch override of address_needs_mapping

Some architectures require additional checking to determine
if a device can dma to an address and need to provide their
own address_needs_mapping..

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-5-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index e8a47c8..d81afab 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -145,6 +145,12 @@ static void *swiotlb_bus_to_virt(dma_addr_t address)
 	return phys_to_virt(swiotlb_bus_to_phys(address));
 }
 
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+					       dma_addr_t addr, size_t size)
+{
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+}
+
 int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
 {
 	return 0;
@@ -309,10 +315,10 @@ cleanup1:
 	return -ENOMEM;
 }
 
-static int
+static inline int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
 }
 
 static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
-- 
cgit v0.10.2


From 7fcebbd2d984eac3fdd6da2f4453e7c43d32de89 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:19 -0500
Subject: swiotlb: rename unmap_single to do_unmap_single

Previously, swiotlb_unmap_page and swiotlb_unmap_sg were
duplicating very similar code.  Refactor that code into a
new unmap_single and unmap_single use do_unmap_single.

Note that the swiotlb_unmap_sg code was previously doing
a complicated comparison to determine if an addresses needed
to be unmapped where a simple is_swiotlb_buffer() call
would have sufficed.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-6-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d81afab..2bde54a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -482,7 +482,7 @@ found:
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -591,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		       (unsigned long long)dev_addr);
 
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+		do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
 		return NULL;
 	}
 	*dma_handle = dev_addr;
@@ -608,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		free_pages((unsigned long) vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+		do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -688,17 +688,29 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-			size_t size, enum dma_data_direction dir,
-			struct dma_attrs *attrs)
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+			 size_t size, int dir)
 {
 	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dma_addr))
-		unmap_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		dma_mark_clean(dma_addr, size);
+
+	if (is_swiotlb_buffer(dma_addr)) {
+		do_unmap_single(hwdev, dma_addr, size, dir);
+		return;
+	}
+
+	if (dir != DMA_FROM_DEVICE)
+		return;
+
+	dma_mark_clean(dma_addr, size);
+}
+
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+			size_t size, enum dma_data_direction dir,
+			struct dma_attrs *attrs)
+{
+	unmap_single(hwdev, dev_addr, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -850,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 
 	BUG_ON(dir == DMA_NONE);
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-				     sg->dma_length, dir);
-		else if (dir == DMA_FROM_DEVICE)
-			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-	}
+	for_each_sg(sgl, sg, nelems, i)
+		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
-- 
cgit v0.10.2


From 380d687833aee098c4a2c3b35beaefe1c1f48d01 Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:20 -0500
Subject: swiotlb: use swiotlb_sync_single instead of duplicating code

Right now both swiotlb_sync_single_range and swiotlb_sync_sg
were duplicating the code in swiotlb_sync_single.  Just call it
instead.  Also rearrange the sync_single code for readability.

Note that the swiotlb_sync_sg code was previously doing
a complicated comparison to determine if an addresses needed
to be unmapped where a simple is_swiotlb_buffer() call
would have sufficed.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-7-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2bde54a..d912f06 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -731,10 +731,16 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dma_addr))
+
+	if (is_swiotlb_buffer(dma_addr)) {
 		sync_single(hwdev, dma_addr, size, dir, target);
-	else if (dir == DMA_FROM_DEVICE)
-		dma_mark_clean(dma_addr, size);
+		return;
+	}
+
+	if (dir != DMA_FROM_DEVICE)
+		return;
+
+	dma_mark_clean(dma_addr, size);
 }
 
 void
@@ -761,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
 			  unsigned long offset, size_t size,
 			  int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dma_addr))
-		sync_single(hwdev, dma_addr, size, dir, target);
-	else if (dir == DMA_FROM_DEVICE)
-		dma_mark_clean(dma_addr, size);
+	swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
 }
 
 void
@@ -890,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 	struct scatterlist *sg;
 	int i;
 
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+	for_each_sg(sgl, sg, nelems, i)
+		swiotlb_sync_single(hwdev, sg->dma_address,
 				    sg->dma_length, dir, target);
-		else if (dir == DMA_FROM_DEVICE)
-			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-	}
 }
 
 void
-- 
cgit v0.10.2


From 42d7c5e353cef9062129b0de3ec9ddf10567b9ca Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:21 -0500
Subject: swiotlb: change swiotlb_bus_to[phys,virt] prototypes

Add a hwdev argument that is needed on some architectures
in order to access a per-device offset that is taken into
account when producing a physical address (also needed to
get from bus address to virtual address because the physical
address is an intermediate step).

Also make swiotlb_bus_to_virt weak so architectures can
override it.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-8-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 34f12e9..887388a 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 	return paddr;
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return baddr;
 }
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ac9ff54..cb1a663 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
 				      phys_addr_t address);
-extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
+				       dma_addr_t address);
 
 extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d912f06..bffe6d7 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 	return paddr;
 }
 
-phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return baddr;
 }
@@ -140,9 +140,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-	return phys_to_virt(swiotlb_bus_to_phys(address));
+	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
 }
 
 int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
@@ -691,7 +691,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			 size_t size, int dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
@@ -728,7 +728,7 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
-- 
cgit v0.10.2


From a4e94ef0dd391eae05bdeacd12b8da3510957a97 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 27 Mar 2009 17:07:05 +0800
Subject: printk: add support of hh length modifier for printk

Impact: new feature, extend vsprintf format strings

hh is used as length modifier for signed char or unsigned char.
It is supported by glibc, we add kernel support now.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: torvalds@linux-foundation.org
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <49CC9739.30107@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7536ace..b56f6d0 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -408,6 +408,8 @@ enum format_type {
 	FORMAT_TYPE_LONG_LONG,
 	FORMAT_TYPE_ULONG,
 	FORMAT_TYPE_LONG,
+	FORMAT_TYPE_UBYTE,
+	FORMAT_TYPE_BYTE,
 	FORMAT_TYPE_USHORT,
 	FORMAT_TYPE_SHORT,
 	FORMAT_TYPE_UINT,
@@ -853,11 +855,15 @@ qualifier:
 	spec->qualifier = -1;
 	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
 	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-		spec->qualifier = *fmt;
-		++fmt;
-		if (spec->qualifier == 'l' && *fmt == 'l') {
-			spec->qualifier = 'L';
-			++fmt;
+		spec->qualifier = *fmt++;
+		if (unlikely(spec->qualifier == *fmt)) {
+			if (spec->qualifier == 'l') {
+				spec->qualifier = 'L';
+				++fmt;
+			} else if (spec->qualifier == 'h') {
+				spec->qualifier = 'H';
+				++fmt;
+			}
 		}
 	}
 
@@ -919,6 +925,11 @@ qualifier:
 		spec->type = FORMAT_TYPE_SIZE_T;
 	} else if (spec->qualifier == 't') {
 		spec->type = FORMAT_TYPE_PTRDIFF;
+	} else if (spec->qualifier == 'H') {
+		if (spec->flags & SIGN)
+			spec->type = FORMAT_TYPE_BYTE;
+		else
+			spec->type = FORMAT_TYPE_UBYTE;
 	} else if (spec->qualifier == 'h') {
 		if (spec->flags & SIGN)
 			spec->type = FORMAT_TYPE_SHORT;
@@ -1087,6 +1098,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			case FORMAT_TYPE_PTRDIFF:
 				num = va_arg(args, ptrdiff_t);
 				break;
+			case FORMAT_TYPE_UBYTE:
+				num = (unsigned char) va_arg(args, int);
+				break;
+			case FORMAT_TYPE_BYTE:
+				num = (signed char) va_arg(args, int);
+				break;
 			case FORMAT_TYPE_USHORT:
 				num = (unsigned short) va_arg(args, int);
 				break;
@@ -1363,6 +1380,10 @@ do {									\
 			case FORMAT_TYPE_PTRDIFF:
 				save_arg(ptrdiff_t);
 				break;
+			case FORMAT_TYPE_UBYTE:
+			case FORMAT_TYPE_BYTE:
+				save_arg(char);
+				break;
 			case FORMAT_TYPE_USHORT:
 			case FORMAT_TYPE_SHORT:
 				save_arg(short);
@@ -1538,6 +1559,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			case FORMAT_TYPE_PTRDIFF:
 				num = get_arg(ptrdiff_t);
 				break;
+			case FORMAT_TYPE_UBYTE:
+				num = get_arg(unsigned char);
+				break;
+			case FORMAT_TYPE_BYTE:
+				num = get_arg(signed char);
+				break;
 			case FORMAT_TYPE_USHORT:
 				num = get_arg(unsigned short);
 				break;
-- 
cgit v0.10.2


From 7333a8003cdc0470e8c0ae8b949cbc44f3165ff3 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 25 Mar 2009 10:50:34 +0900
Subject: x86: smarten /proc/interrupts output for new counters

Now /proc/interrupts of tip tree has new counters:

  CNT: Performance counter interrupts

Format change of output, as like that by commit:

  commit 7a81d9a7da03d2f27840d659f97ef140d032f609
  x86: smarten /proc/interrupts output

should be applied to these new counters too.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <49C98DEA.8060208@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d465487..dccaaa8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,7 +63,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
-	seq_printf(p, "CNT: ");
+	seq_printf(p, "%*s: ", prec, "CNT");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance counter interrupts\n");
-- 
cgit v0.10.2


From ae9e6bc9f74f8247cbca50a6a93c80e0d686fa19 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 2 Apr 2009 13:19:54 +0900
Subject: percpu: don't put the first chunk in reverse-map rbtree

Impact: both first chunks don't use rbtree, no functional change

There can be two first chunks - reserved and dynamic with the former
one being optional.  Dynamic first chunk was linked on reverse-mapping
rbtree while the reserved one was mapped manually using the start
address and reserved offset limit.

This patch makes both first chunks to be looked up manually without
using the rbtree.  This is to help getting rid of the rbtree.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: rusty@rustcorp.com.au
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: rmk@arm.linux.org.uk
Cc: starvik@axis.com
Cc: ralf@linux-mips.org
Cc: davem@davemloft.net
Cc: cooloney@kernel.org
Cc: kyle@mcmartin.ca
Cc: matthew@wil.cx
Cc: grundler@parisc-linux.org
Cc: takata@linux-m32r.org
Cc: benh@kernel.crashing.org
Cc: rth@twiddle.net
Cc: ink@jurassic.park.msu.ru
Cc: heiko.carstens@de.ibm.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux.com>
LKML-Reference: <49D43CEA.3040609@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/mm/percpu.c b/mm/percpu.c
index 1aa5d8f..bf1bf1f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -110,9 +110,21 @@ static size_t pcpu_chunk_struct_size __read_mostly;
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
-/* optional reserved chunk, only accessible for reserved allocations */
+/*
+ * The first chunk which always exists.  Note that unlike other
+ * chunks, this one can be allocated and mapped in several different
+ * ways and thus often doesn't live in the vmalloc area.
+ */
+static struct pcpu_chunk *pcpu_first_chunk;
+
+/*
+ * Optional reserved chunk.  This chunk reserves part of the first
+ * chunk and serves it for reserved allocations.  The amount of
+ * reserved offset is in pcpu_reserved_chunk_limit.  When reserved
+ * area doesn't exist, the following variables contain NULL and 0
+ * respectively.
+ */
 static struct pcpu_chunk *pcpu_reserved_chunk;
-/* offset limit of the reserved chunk */
 static int pcpu_reserved_chunk_limit;
 
 /*
@@ -297,15 +309,16 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr,
  */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
+	void *first_start = pcpu_first_chunk->vm->addr;
 	struct rb_node *n, *parent;
 	struct pcpu_chunk *chunk;
 
-	/* is it in the reserved chunk? */
-	if (pcpu_reserved_chunk) {
-		void *start = pcpu_reserved_chunk->vm->addr;
-
-		if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
+	/* is it in the first chunk? */
+	if (addr >= first_start && addr < first_start + pcpu_chunk_size) {
+		/* is it in the reserved area? */
+		if (addr < first_start + pcpu_reserved_chunk_limit)
 			return pcpu_reserved_chunk;
+		return pcpu_first_chunk;
 	}
 
 	/* nah... search the regular ones */
@@ -1147,7 +1160,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 
 	if (reserved_size) {
 		schunk->free_size = reserved_size;
-		pcpu_reserved_chunk = schunk;	/* not for dynamic alloc */
+		pcpu_reserved_chunk = schunk;
+		pcpu_reserved_chunk_limit = static_size + reserved_size;
 	} else {
 		schunk->free_size = dyn_size;
 		dyn_size = 0;			/* dynamic area covered */
@@ -1158,8 +1172,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	if (schunk->free_size)
 		schunk->map[schunk->map_used++] = schunk->free_size;
 
-	pcpu_reserved_chunk_limit = static_size + schunk->free_size;
-
 	/* init dynamic chunk if necessary */
 	if (dyn_size) {
 		dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
@@ -1226,13 +1238,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	}
 
 	/* link the first chunk in */
-	if (!dchunk) {
-		pcpu_chunk_relocate(schunk, -1);
-		pcpu_chunk_addr_insert(schunk);
-	} else {
-		pcpu_chunk_relocate(dchunk, -1);
-		pcpu_chunk_addr_insert(dchunk);
-	}
+	pcpu_first_chunk = dchunk ?: schunk;
+	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
 	/* we're done */
 	pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
-- 
cgit v0.10.2


From e1b9aa3f47242e757c776a3771bb6613e675bf9c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 2 Apr 2009 13:21:44 +0900
Subject: percpu: remove rbtree and use page->index instead

Impact: use page->index for addr to chunk mapping instead of dedicated rbtree

The rbtree is used to determine the chunk from the virtual address.
However, we can already determine the page struct from a virtual
address and there are several unused fields in page struct used by
vmalloc.  Use the index field to store a pointer to the chunk. Then
there is no need anymore for an rbtree.

tj: * s/(set|get)_chunk/pcpu_\1_page_chunk/

    * Drop inline from the above two functions and moved them upwards
      so that they are with other simple helpers.

    * Initial pages might not (actually most of the time don't) live
      in the vmalloc area.  With the previous patch to manually
      reverse-map both first chunks, this is no longer an issue.
      Removed pcpu_set_chunk() call on initial pages.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: rusty@rustcorp.com.au
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: rmk@arm.linux.org.uk
Cc: starvik@axis.com
Cc: ralf@linux-mips.org
Cc: davem@davemloft.net
Cc: cooloney@kernel.org
Cc: kyle@mcmartin.ca
Cc: matthew@wil.cx
Cc: grundler@parisc-linux.org
Cc: takata@linux-m32r.org
Cc: benh@kernel.crashing.org
Cc: rth@twiddle.net
Cc: ink@jurassic.park.msu.ru
Cc: heiko.carstens@de.ibm.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>
LKML-Reference: <49D43D58.4050102@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/mm/percpu.c b/mm/percpu.c
index bf1bf1f..c0b2c1a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -23,7 +23,7 @@
  * Allocation is done in offset-size areas of single unit space.  Ie,
  * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
  * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers UNIT_SIZE apart.
+ * percpu base registers pcpu_unit_size apart.
  *
  * There are usually many small percpu allocations many of them as
  * small as 4 bytes.  The allocator organizes chunks into lists
@@ -38,8 +38,8 @@
  * region and negative allocated.  Allocation inside a chunk is done
  * by scanning this map sequentially and serving the first matching
  * entry.  This is mostly copied from the percpu_modalloc() allocator.
- * Chunks are also linked into a rb tree to ease address to chunk
- * mapping during free.
+ * Chunks can be determined from the address using the index field
+ * in the page struct. The index field contains a pointer to the chunk.
  *
  * To use this allocator, arch code should do the followings.
  *
@@ -61,7 +61,6 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/pfn.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
@@ -88,7 +87,6 @@
 
 struct pcpu_chunk {
 	struct list_head	list;		/* linked to pcpu_slot lists */
-	struct rb_node		rb_node;	/* key is chunk->vm->addr */
 	int			free_size;	/* free bytes in the chunk */
 	int			contig_hint;	/* max contiguous size hint */
 	struct vm_struct	*vm;		/* mapped vmalloc region */
@@ -133,7 +131,7 @@ static int pcpu_reserved_chunk_limit;
  * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
  * protects allocation/reclaim paths, chunks and chunk->page arrays.
  * The latter is a spinlock and protects the index data structures -
- * chunk slots, rbtree, chunks and area maps in chunks.
+ * chunk slots, chunks and area maps in chunks.
  *
  * During allocation, pcpu_alloc_mutex is kept locked all the time and
  * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -152,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex);	/* protects whole alloc and reclaim */
 static DEFINE_SPINLOCK(pcpu_lock);	/* protects index data structures */
 
 static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
-static struct rb_root pcpu_addr_root = RB_ROOT;	/* chunks by address */
 
 /* reclaim work to release fully free chunks, scheduled from free path */
 static void pcpu_reclaim(struct work_struct *work);
@@ -203,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
 	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
 }
 
+/* set the pointer to a chunk in a page struct */
+static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
+{
+	page->index = (unsigned long)pcpu;
+}
+
+/* obtain pointer to a chunk from a page struct */
+static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
+{
+	return (struct pcpu_chunk *)page->index;
+}
+
 /**
  * pcpu_mem_alloc - allocate memory
  * @size: bytes to allocate
@@ -269,40 +278,9 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
 	}
 }
 
-static struct rb_node **pcpu_chunk_rb_search(void *addr,
-					     struct rb_node **parentp)
-{
-	struct rb_node **p = &pcpu_addr_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct pcpu_chunk *chunk;
-
-	while (*p) {
-		parent = *p;
-		chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
-
-		if (addr < chunk->vm->addr)
-			p = &(*p)->rb_left;
-		else if (addr > chunk->vm->addr)
-			p = &(*p)->rb_right;
-		else
-			break;
-	}
-
-	if (parentp)
-		*parentp = parent;
-	return p;
-}
-
 /**
- * pcpu_chunk_addr_search - search for chunk containing specified address
- * @addr: address to search for
- *
- * Look for chunk which might contain @addr.  More specifically, it
- * searchs for the chunk with the highest start address which isn't
- * beyond @addr.
- *
- * CONTEXT:
- * pcpu_lock.
+ * pcpu_chunk_addr_search - determine chunk containing specified address
+ * @addr: address for which the chunk needs to be determined.
  *
  * RETURNS:
  * The address of the found chunk.
@@ -310,8 +288,6 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr,
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
 	void *first_start = pcpu_first_chunk->vm->addr;
-	struct rb_node *n, *parent;
-	struct pcpu_chunk *chunk;
 
 	/* is it in the first chunk? */
 	if (addr >= first_start && addr < first_start + pcpu_chunk_size) {
@@ -321,42 +297,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 		return pcpu_first_chunk;
 	}
 
-	/* nah... search the regular ones */
-	n = *pcpu_chunk_rb_search(addr, &parent);
-	if (!n) {
-		/* no exactly matching chunk, the parent is the closest */
-		n = parent;
-		BUG_ON(!n);
-	}
-	chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-
-	if (addr < chunk->vm->addr) {
-		/* the parent was the next one, look for the previous one */
-		n = rb_prev(n);
-		BUG_ON(!n);
-		chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-	}
-
-	return chunk;
-}
-
-/**
- * pcpu_chunk_addr_insert - insert chunk into address rb tree
- * @new: chunk to insert
- *
- * Insert @new into address rb tree.
- *
- * CONTEXT:
- * pcpu_lock.
- */
-static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
-{
-	struct rb_node **p, *parent;
-
-	p = pcpu_chunk_rb_search(new->vm->addr, &parent);
-	BUG_ON(*p);
-	rb_link_node(&new->rb_node, parent, p);
-	rb_insert_color(&new->rb_node, &pcpu_addr_root);
+	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
 /**
@@ -768,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
 						  alloc_mask, 0);
 			if (!*pagep)
 				goto err;
+			pcpu_set_page_chunk(*pagep, chunk);
 		}
 	}
 
@@ -892,7 +834,6 @@ restart:
 
 	spin_lock_irq(&pcpu_lock);
 	pcpu_chunk_relocate(chunk, -1);
-	pcpu_chunk_addr_insert(chunk);
 	goto restart;
 
 area_found:
@@ -981,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work)
 		if (chunk == list_first_entry(head, struct pcpu_chunk, list))
 			continue;
 
-		rb_erase(&chunk->rb_node, &pcpu_addr_root);
 		list_move(&chunk->list, &todo);
 	}
 
-- 
cgit v0.10.2


From e30e08f65c7ef6c230424264f09c3d53f117f58b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:25 +0200
Subject: perf_counter: fix NMI race in task clock

We should not be updating ctx->time from NMI context, work around that.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130408.681326666@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 863703b..84a3908 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -319,8 +319,6 @@ static void __perf_counter_disable(void *info)
 
 	spin_lock_irqsave(&ctx->lock, flags);
 
-	update_context_time(ctx);
-
 	/*
 	 * If the counter is on, turn it off.
 	 * If it is in error state, leave it in error state.
@@ -2335,13 +2333,11 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
  * Software counter: task time clock
  */
 
-static void task_clock_perf_counter_update(struct perf_counter *counter)
+static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
 {
-	u64 prev, now;
+	u64 prev;
 	s64 delta;
 
-	now = counter->ctx->time;
-
 	prev = atomic64_xchg(&counter->hw.prev_count, now);
 	delta = now - prev;
 	atomic64_add(delta, &counter->count);
@@ -2369,13 +2365,24 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
 	hrtimer_cancel(&counter->hw.hrtimer);
-	task_clock_perf_counter_update(counter);
+	task_clock_perf_counter_update(counter, counter->ctx->time);
+
 }
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
-	update_context_time(counter->ctx);
-	task_clock_perf_counter_update(counter);
+	u64 time;
+
+	if (!in_nmi()) {
+		update_context_time(counter->ctx);
+		time = counter->ctx->time;
+	} else {
+		u64 now = perf_clock();
+		u64 delta = now - counter->ctx->timestamp;
+		time = counter->ctx->time + delta;
+	}
+
+	task_clock_perf_counter_update(counter, time);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-- 
cgit v0.10.2


From 6fab01927e8bdbbc77bafba2abb4810c5591ad52 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:26 +0200
Subject: perf_counter: provide misc bits in the event header

Limit the size of each record to 64k (or should we count in multiples
of u64 and have a 512K limit?), this gives 16 bits or spare room in the
header, which we can use for misc bits, so as to not have to grow the
record with u64 every time we have a few bits to report.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130408.769271806@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7f5d353..5bd8817 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -201,9 +201,13 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
+#define PERF_EVENT_MISC_KERNEL	(1 << 0)
+#define PERF_EVENT_MISC_USER	(1 << 1)
+
 struct perf_event_header {
 	__u32	type;
-	__u32	size;
+	__u16	misc;
+	__u16	size;
 };
 
 enum perf_event_type {
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 84a3908..4af98f9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1831,6 +1831,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.type = PERF_EVENT_COUNTER_OVERFLOW;
 	header.size = sizeof(header);
 
+	header.misc = user_mode(regs) ?
+		PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
+
 	if (record_type & PERF_RECORD_IP) {
 		ip = instruction_pointer(regs);
 		header.type |= __PERF_EVENT_IP;
-- 
cgit v0.10.2


From 6b6e5486b3a168f0328c82a8d4376caf901472b1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:27 +0200
Subject: perf_counter: use misc field to widen type

Push the PERF_EVENT_COUNTER_OVERFLOW bit into the misc field so that
we can have the full 32bit for PERF_RECORD_ bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130408.891867663@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5bd8817..4809ae1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -201,8 +201,9 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
-#define PERF_EVENT_MISC_KERNEL	(1 << 0)
-#define PERF_EVENT_MISC_USER	(1 << 1)
+#define PERF_EVENT_MISC_KERNEL		(1 << 0)
+#define PERF_EVENT_MISC_USER		(1 << 1)
+#define PERF_EVENT_MISC_OVERFLOW	(1 << 2)
 
 struct perf_event_header {
 	__u32	type;
@@ -230,36 +231,27 @@ enum perf_event_type {
 	PERF_EVENT_MUNMAP		= 2,
 
 	/*
-	 * Half the event type space is reserved for the counter overflow
-	 * bitfields, as found in hw_event.record_type.
-	 *
-	 * These events will have types of the form:
-	 *   PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } *
+	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+	 * will be PERF_RECORD_*
 	 *
 	 * struct {
 	 * 	struct perf_event_header	header;
 	 *
-	 * 	{ u64			ip;	  } && __PERF_EVENT_IP
-	 * 	{ u32			pid, tid; } && __PERF_EVENT_TID
+	 * 	{ u64			ip;	  } && PERF_RECORD_IP
+	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
 	 *
 	 * 	{ u64			nr;
-	 * 	  { u64 event, val; } 	cnt[nr];  } && __PERF_EVENT_GROUP
+	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
 	 *
 	 * 	{ u16			nr,
 	 * 				hv,
 	 * 				kernel,
 	 * 				user;
-	 * 	  u64			ips[nr];  } && __PERF_EVENT_CALLCHAIN
+	 * 	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
 	 *
-	 * 	{ u64			time;     } && __PERF_EVENT_TIME
+	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * };
 	 */
-	PERF_EVENT_COUNTER_OVERFLOW	= 1UL << 31,
-	__PERF_EVENT_IP			= PERF_RECORD_IP,
-	__PERF_EVENT_TID		= PERF_RECORD_TID,
-	__PERF_EVENT_GROUP		= PERF_RECORD_GROUP,
-	__PERF_EVENT_CALLCHAIN		= PERF_RECORD_CALLCHAIN,
-	__PERF_EVENT_TIME		= PERF_RECORD_TIME,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4af98f9..bf12df6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1828,15 +1828,16 @@ static void perf_counter_output(struct perf_counter *counter,
 	int callchain_size = 0;
 	u64 time;
 
-	header.type = PERF_EVENT_COUNTER_OVERFLOW;
+	header.type = 0;
 	header.size = sizeof(header);
 
-	header.misc = user_mode(regs) ?
+	header.misc = PERF_EVENT_MISC_OVERFLOW;
+	header.misc |= user_mode(regs) ?
 		PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
 
 	if (record_type & PERF_RECORD_IP) {
 		ip = instruction_pointer(regs);
-		header.type |= __PERF_EVENT_IP;
+		header.type |= PERF_RECORD_IP;
 		header.size += sizeof(ip);
 	}
 
@@ -1845,12 +1846,12 @@ static void perf_counter_output(struct perf_counter *counter,
 		tid_entry.pid = current->group_leader->pid;
 		tid_entry.tid = current->pid;
 
-		header.type |= __PERF_EVENT_TID;
+		header.type |= PERF_RECORD_TID;
 		header.size += sizeof(tid_entry);
 	}
 
 	if (record_type & PERF_RECORD_GROUP) {
-		header.type |= __PERF_EVENT_GROUP;
+		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
 			counter->nr_siblings * sizeof(group_entry);
 	}
@@ -1861,7 +1862,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
 
-			header.type |= __PERF_EVENT_CALLCHAIN;
+			header.type |= PERF_RECORD_CALLCHAIN;
 			header.size += callchain_size;
 		}
 	}
@@ -1872,7 +1873,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		 */
 		time = sched_clock();
 
-		header.type |= __PERF_EVENT_TIME;
+		header.type |= PERF_RECORD_TIME;
 		header.size += sizeof(u64);
 	}
 
-- 
cgit v0.10.2


From 808382b33bb4c60df6379ec2db39f332cc56b82a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:28 +0200
Subject: perf_counter: kerneltop: keep up with ABI changes

Update kerneltop to use PERF_EVENT_MISC_OVERFLOW

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130408.947197470@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 15f3a5f..042c1b8 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -1277,22 +1277,22 @@ static void mmap_read(struct mmap_data *md)
 
 		old += size;
 
-		switch (event->header.type) {
-		case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP:
-		case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID:
-			process_event(event->ip.ip, md->counter);
-			break;
-
-		case PERF_EVENT_MMAP:
-		case PERF_EVENT_MUNMAP:
-			printf("%s: %Lu %Lu %Lu %s\n",
-					event->header.type == PERF_EVENT_MMAP
-					  ? "mmap" : "munmap",
-					event->mmap.start,
-					event->mmap.len,
-					event->mmap.pgoff,
-					event->mmap.filename);
-			break;
+		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
+			if (event->header.type & PERF_RECORD_IP)
+				process_event(event->ip.ip, md->counter);
+		} else {
+			switch (event->header.type) {
+				case PERF_EVENT_MMAP:
+				case PERF_EVENT_MUNMAP:
+					printf("%s: %Lu %Lu %Lu %s\n",
+							event->header.type == PERF_EVENT_MMAP
+							? "mmap" : "munmap",
+							event->mmap.start,
+							event->mmap.len,
+							event->mmap.pgoff,
+							event->mmap.filename);
+					break;
+			}
 		}
 	}
 
-- 
cgit v0.10.2


From 8740f9418c78dcad694b46ab25d1645d5aef1f5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:29 +0200
Subject: perf_counter: add some comments

Add a few comments because I was forgetting what field what for what
functionality.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.036984214@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4809ae1..8bf764fc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -344,10 +344,12 @@ struct file;
 
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
-	int				nr_pages;
-	atomic_t			wakeup;
-	atomic_t			head;
-	atomic_t			events;
+	int				nr_pages;	/* nr of data pages  */
+
+	atomic_t			wakeup;		/* POLL_ for wakeups */
+	atomic_t			head;		/* write position    */
+	atomic_t			events;		/* event limit       */
+
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
-- 
cgit v0.10.2


From 8d1b2d9361b494bfc761700c348c65ebbe3deb5b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:30 +0200
Subject: perf_counter: track task-comm data

Similar to the mmap data stream, add one that tracks the task COMM field,
so that the userspace reporting knows what to call a task.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.127422406@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/fs/exec.c b/fs/exec.c
index e015c0b..bf47ed0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -951,6 +951,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	task_lock(tsk);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
+	perf_counter_comm(tsk);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8bf764fc..a70a55f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -142,8 +142,9 @@ struct perf_counter_hw_event {
 				exclude_idle   :  1, /* don't count when idle */
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
+				comm	       :  1, /* include comm data     */
 
-				__reserved_1   : 53;
+				__reserved_1   : 52;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -231,6 +232,16 @@ enum perf_event_type {
 	PERF_EVENT_MUNMAP		= 2,
 
 	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u32				pid, tid;
+	 * 	char				comm[];
+	 * };
+	 */
+	PERF_EVENT_COMM			= 3,
+
+	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
 	 *
@@ -545,6 +556,8 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
+extern void perf_counter_comm(struct task_struct *tsk);
+
 #define MAX_STACK_DEPTH		255
 
 struct perf_callchain_entry {
@@ -583,6 +596,7 @@ static inline void
 perf_counter_munmap(unsigned long addr, unsigned long len,
 		    unsigned long pgoff, struct file *file) 		{ }
 
+static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index bf12df6..2d4aebb 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1917,6 +1917,99 @@ static void perf_counter_output(struct perf_counter *counter,
 }
 
 /*
+ * comm tracking
+ */
+
+struct perf_comm_event {
+	struct task_struct 	*task;
+	char 			*comm;
+	int			comm_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+	} event;
+};
+
+static void perf_counter_comm_output(struct perf_counter *counter,
+				     struct perf_comm_event *comm_event)
+{
+	struct perf_output_handle handle;
+	int size = comm_event->event.header.size;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, comm_event->event);
+	perf_output_copy(&handle, comm_event->comm,
+				   comm_event->comm_size);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_comm_match(struct perf_counter *counter,
+				   struct perf_comm_event *comm_event)
+{
+	if (counter->hw_event.comm &&
+	    comm_event->event.header.type == PERF_EVENT_COMM)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
+				  struct perf_comm_event *comm_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_comm_match(counter, comm_event))
+			perf_counter_comm_output(counter, comm_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_comm_event(struct perf_comm_event *comm_event)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned int size;
+	char *comm = comm_event->task->comm;
+
+	size = ALIGN(strlen(comm), sizeof(u64));
+
+	comm_event->comm = comm;
+	comm_event->comm_size = size;
+
+	comm_event->event.header.size = sizeof(comm_event->event) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
+	put_cpu_var(perf_cpu_context);
+
+	perf_counter_comm_ctx(&current->perf_counter_ctx, comm_event);
+}
+
+void perf_counter_comm(struct task_struct *task)
+{
+	struct perf_comm_event comm_event = {
+		.task	= task,
+		.event  = {
+			.header = { .type = PERF_EVENT_COMM, },
+			.pid	= task->group_leader->pid,
+			.tid	= task->pid,
+		},
+	};
+
+	perf_counter_comm_event(&comm_event);
+}
+
+/*
  * mmap tracking
  */
 
-- 
cgit v0.10.2


From de9ac07bbf8f51e0ce40e5428c3a8f627bd237c2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:31 +0200
Subject: perf_counter: some simple userspace profiling

# perf-record make -j4 kernel/
 # perf-report | tail -15

  0.39              cc1 [kernel] lock_acquired
  0.42              cc1 [kernel] lock_acquire
  0.51              cc1 [ user ] /lib64/libc-2.8.90.so: _int_free
  0.51               as [kernel] clear_page_c
  0.53              cc1 [ user ] /lib64/libc-2.8.90.so: memcpy
  0.56              cc1 [ user ] /lib64/libc-2.8.90.so: _IO_vfprintf
  0.63              cc1 [kernel] lock_release
  0.67              cc1 [ user ] /lib64/libc-2.8.90.so: strlen
  0.68              cc1 [kernel] debug_smp_processor_id
  1.38              cc1 [ user ] /lib64/libc-2.8.90.so: _int_malloc
  1.55              cc1 [ user ] /lib64/libc-2.8.90.so: memset
  1.77              cc1 [kernel] __lock_acquire
  1.88              cc1 [kernel] clear_page_c
  3.61               as [ user ] /usr/bin/as: <unknown>
 59.16              cc1 [ user ] /usr/libexec/gcc/x86_64-redhat-linux/4.3.2/cc1: <unknown>

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
LKML-Reference: <20090408130409.220518450@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 194b662..1dd37ee 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -1,10 +1,16 @@
-BINS = kerneltop perfstat
+BINS = kerneltop perfstat perf-record perf-report
 
 all: $(BINS)
 
 kerneltop: kerneltop.c ../../include/linux/perf_counter.h
 	cc -O6 -Wall -lrt -o $@ $<
 
+perf-record: perf-record.c ../../include/linux/perf_counter.h
+	cc -O6 -Wall -lrt -o $@ $<
+
+perf-report: perf-report.cc ../../include/linux/perf_counter.h
+	g++ -O6 -Wall -lrt -o $@ $<
+
 perfstat: kerneltop
 	ln -sf kerneltop perfstat
 
diff --git a/Documentation/perf_counter/perf-record.c b/Documentation/perf_counter/perf-record.c
new file mode 100644
index 0000000..614de7c
--- /dev/null
+++ b/Documentation/perf_counter/perf-record.c
@@ -0,0 +1,530 @@
+
+
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#include "../../include/linux/perf_counter.h"
+
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define rdclock()                                       \
+({                                                      \
+        struct timespec ts;                             \
+                                                        \
+        clock_gettime(CLOCK_MONOTONIC, &ts);            \
+        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
+})
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#ifdef __x86_64__
+#define __NR_perf_counter_open 295
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __i386__
+#define __NR_perf_counter_open 333
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#define rmb() 		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+asmlinkage int sys_perf_counter_open(
+        struct perf_counter_hw_event    *hw_event_uptr          __user,
+        pid_t                           pid,
+        int                             cpu,
+        int                             group_fd,
+        unsigned long                   flags)
+{
+        return syscall(
+                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
+}
+
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+
+static int			nr_counters			=  0;
+static __u64			event_id[MAX_COUNTERS]		= { };
+static int			default_interval = 100000;
+static int			event_count[MAX_COUNTERS];
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+static int			nr_cpus				=  0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			= 16;
+static int			output;
+static char 			*output_name			= "output.perf";
+static int			group				= 0;
+static unsigned int		realtime_prio			=  0;
+
+const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+static char *hw_event_names[] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+	"minor faults",
+	"major faults",
+};
+
+struct event_symbol {
+	__u64 event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+};
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static __u64 match_event_symbols(char *str)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return ~0ULL;
+}
+
+static int parse_events(char *str)
+{
+	__u64 config;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
+
+	event_id[nr_counters] = config;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static void display_events_help(void)
+{
+	unsigned int i;
+	__u64 e;
+
+	printf(
+	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		printf("\n                             %d:%d: %-20s",
+				type, id, event_symbols[i].symbol);
+	}
+
+	printf("\n"
+	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
+}
+
+static void display_help(void)
+{
+	printf(
+	"Usage: perf-record [<options>]\n"
+	"perf-record Options (up to %d event types can be specified at once):\n\n",
+		 MAX_COUNTERS);
+
+	display_events_help();
+
+	printf(
+	" -c CNT    --count=CNT          # event period to sample\n"
+	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
+	" -o file   --output=<file>      # output file\n"
+	" -r prio   --realtime=<prio>    # use RT prio\n"
+	);
+
+	exit(0);
+}
+
+static void process_options(int argc, char *argv[])
+{
+	int error = 0, counter;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"count",	required_argument,	NULL, 'c'},
+			{"event",	required_argument,	NULL, 'e'},
+			{"mmap_pages",	required_argument,	NULL, 'm'},
+			{"output",	required_argument,	NULL, 'o'},
+			{"realtime",	required_argument,	NULL, 'r'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:c:e:m:o:r:",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'c': default_interval		=   atoi(optarg); break;
+		case 'e': error				= parse_events(optarg); break;
+		case 'm': mmap_pages			=   atoi(optarg); break;
+		case 'o': output_name			= strdup(optarg); break;
+		case 'r': realtime_prio			=   atoi(optarg); break;
+		default: error = 1; break;
+		}
+	}
+	if (error)
+		display_help();
+
+	if (!nr_counters) {
+		nr_counters = 1;
+		event_id[0] = 0;
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		event_count[counter] = default_interval;
+	}
+}
+
+struct mmap_data {
+	int counter;
+	void *base;
+	unsigned int mask;
+	unsigned int prev;
+};
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+static long events;
+static struct timeval last_read, this_read;
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and screw up the events under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	if (old != head)
+		events++;
+
+	size = head - old;
+
+	if ((old & md->mask) + size != (head & md->mask)) {
+		buf = &data[old & md->mask];
+		size = md->mask + 1 - (old & md->mask);
+		old += size;
+		while (size) {
+			int ret = write(output, buf, size);
+			if (ret < 0) {
+				perror("failed to write");
+				exit(-1);
+			}
+			size -= ret;
+			buf += ret;
+		}
+	}
+
+	buf = &data[old & md->mask];
+	size = head - old;
+	old += size;
+	while (size) {
+		int ret = write(output, buf, size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+		size -= ret;
+		buf += ret;
+	}
+
+	md->prev = old;
+}
+
+static volatile int done = 0;
+
+static void sigchld_handler(int sig)
+{
+	if (sig == SIGCHLD)
+		done = 1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+	struct perf_counter_hw_event hw_event;
+	int i, counter, group_fd, nr_poll = 0;
+	pid_t pid;
+	int ret;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	process_options(argc, argv);
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
+	if (output < 0) {
+		perror("failed to create output file");
+		exit(-1);
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	for (i = 0; i < nr_cpus; i++) {
+		group_fd = -1;
+		for (counter = 0; counter < nr_counters; counter++) {
+
+			memset(&hw_event, 0, sizeof(hw_event));
+			hw_event.config		= event_id[counter];
+			hw_event.irq_period	= event_count[counter];
+			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
+			hw_event.nmi		= 1;
+			hw_event.mmap		= 1;
+			hw_event.comm		= 1;
+
+			fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0);
+			if (fd[i][counter] < 0) {
+				int err = errno;
+				printf("kerneltop error: syscall returned with %d (%s)\n",
+					fd[i][counter], strerror(err));
+				if (err == EPERM)
+					printf("Are you root?\n");
+				exit(-1);
+			}
+			assert(fd[i][counter] >= 0);
+			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+
+			/*
+			 * First counter acts as the group leader:
+			 */
+			if (group && group_fd == -1)
+				group_fd = fd[i][counter];
+
+			event_array[nr_poll].fd = fd[i][counter];
+			event_array[nr_poll].events = POLLIN;
+			nr_poll++;
+
+			mmap_array[i][counter].counter = counter;
+			mmap_array[i][counter].prev = 0;
+			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+					PROT_READ, MAP_SHARED, fd[i][counter], 0);
+			if (mmap_array[i][counter].base == MAP_FAILED) {
+				printf("kerneltop error: failed to mmap with %d (%s)\n",
+						errno, strerror(errno));
+				exit(-1);
+			}
+		}
+	}
+
+	signal(SIGCHLD, sigchld_handler);
+
+	pid = fork();
+	if (pid < 0)
+		perror("failed to fork");
+
+	if (!pid) {
+		if (execvp(argv[0], argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	/*
+	 * TODO: store the current /proc/$/maps information somewhere
+	 */
+
+	while (!done) {
+		int hits = events;
+
+		for (i = 0; i < nr_cpus; i++) {
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
+		}
+
+		if (hits == events)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	return 0;
+}
diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc
new file mode 100644
index 0000000..09da0ba
--- /dev/null
+++ b/Documentation/perf_counter/perf-report.cc
@@ -0,0 +1,472 @@
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <getopt.h>
+
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#include "../../include/linux/perf_counter.h"
+
+#include <set>
+#include <map>
+#include <string>
+
+
+static char 		const *input_name = "output.perf";
+static int		input;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+};
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid,tid;
+	char comm[16];
+};
+
+typedef union event_union {
+	struct perf_event_header header;
+	struct ip_event ip;
+	struct mmap_event mmap;
+	struct comm_event comm;
+} event_t;
+
+struct section {
+	uint64_t start;
+	uint64_t end;
+
+	uint64_t offset;
+
+	std::string name;
+
+	section() { };
+
+	section(uint64_t stab) : end(stab) { };
+
+	section(uint64_t start, uint64_t size, uint64_t offset, std::string name) :
+		start(start), end(start + size), offset(offset), name(name)
+	{ };
+
+	bool operator < (const struct section &s) const {
+		return end < s.end;
+	};
+};
+
+typedef std::set<struct section> sections_t;
+
+struct symbol {
+	uint64_t start;
+	uint64_t end;
+
+	std::string name;
+
+	symbol() { };
+
+	symbol(uint64_t ip) : start(ip) { }
+
+	symbol(uint64_t start, uint64_t len, std::string name) :
+		start(start), end(start + len), name(name)
+	{ };
+
+	bool operator < (const struct symbol &s) const {
+		return start < s.start;
+	};
+};
+
+typedef std::set<struct symbol> symbols_t;
+
+struct dso {
+	sections_t sections;
+	symbols_t syms;
+};
+
+static std::map<std::string, struct dso> dsos;
+
+static void load_dso_sections(std::string dso_name)
+{
+	struct dso &dso = dsos[dso_name];
+
+	std::string cmd = "readelf -DSW " + dso_name;
+
+	FILE *file = popen(cmd.c_str(), "r");
+	if (!file) {
+		perror("failed to open pipe");
+		exit(-1);
+	}
+
+	char *line = NULL;
+	size_t n = 0;
+
+	while (!feof(file)) {
+		uint64_t addr, off, size;
+		char name[32];
+
+		if (getline(&line, &n, file) < 0)
+			break;
+		if (!line)
+			break;
+
+		if (sscanf(line, "  [%*2d] %16s %*14s %Lx %Lx %Lx",
+					name, &addr, &off, &size) == 4) {
+
+			dso.sections.insert(section(addr, size, addr - off, name));
+		}
+#if 0
+		/*
+		 * for reading readelf symbols (-s), however these don't seem
+		 * to include nearly everything, so use nm for that.
+		 */
+		if (sscanf(line, " %*4d %*3d: %Lx %5Lu %*7s %*6s %*7s %3d %s",
+			   &start, &size, &section, sym) == 4) {
+
+			start -= dso.section_offsets[section];
+
+			dso.syms.insert(symbol(start, size, std::string(sym)));
+		}
+#endif
+	}
+	pclose(file);
+}
+
+static void load_dso_symbols(std::string dso_name, std::string args)
+{
+	struct dso &dso = dsos[dso_name];
+
+	std::string cmd = "nm -nSC " + args + " " + dso_name;
+
+	FILE *file = popen(cmd.c_str(), "r");
+	if (!file) {
+		perror("failed to open pipe");
+		exit(-1);
+	}
+
+	char *line = NULL;
+	size_t n = 0;
+
+	while (!feof(file)) {
+		uint64_t start, size;
+		char c;
+		char sym[1024];
+
+		if (getline(&line, &n, file) < 0)
+			break;
+		if (!line)
+			break;
+
+
+		if (sscanf(line, "%Lx %Lx %c %s", &start, &size, &c, sym) == 4) {
+			sections_t::const_iterator si =
+				dso.sections.upper_bound(section(start));
+			if (si == dso.sections.end()) {
+				printf("symbol in unknown section: %s\n", sym);
+				continue;
+			}
+
+			start -= si->offset;
+
+			dso.syms.insert(symbol(start, size, sym));
+		}
+	}
+	pclose(file);
+}
+
+static void load_dso(std::string dso_name)
+{
+	load_dso_sections(dso_name);
+	load_dso_symbols(dso_name, "-D"); /* dynamic symbols */
+	load_dso_symbols(dso_name, "");   /* regular ones */
+}
+
+void load_kallsyms(void)
+{
+	struct dso &dso = dsos["[kernel]"];
+
+	FILE *file = fopen("/proc/kallsyms", "r");
+	if (!file) {
+		perror("failed to open kallsyms");
+		exit(-1);
+	}
+
+	char *line;
+	size_t n;
+
+	while (!feof(file)) {
+		uint64_t start;
+		char c;
+		char sym[1024];
+
+		if (getline(&line, &n, file) < 0)
+			break;
+		if (!line)
+			break;
+
+		if (sscanf(line, "%Lx %c %s", &start, &c, sym) == 3)
+			dso.syms.insert(symbol(start, 0x1000000, std::string(sym)));
+	}
+	fclose(file);
+}
+
+struct map {
+	uint64_t start;
+	uint64_t end;
+	uint64_t pgoff;
+
+	std::string dso;
+
+	map() { };
+
+	map(uint64_t ip) : end(ip) { }
+
+	map(mmap_event *mmap) {
+		start = mmap->start;
+		end = mmap->start + mmap->len;
+		pgoff = mmap->pgoff;
+
+		dso = std::string(mmap->filename);
+
+		if (dsos.find(dso) == dsos.end())
+			load_dso(dso);
+	};
+
+	bool operator < (const struct map &m) const {
+		return end < m.end;
+	};
+};
+
+typedef std::set<struct map> maps_t;
+
+static std::map<int, maps_t> maps;
+
+static std::map<int, std::string> comms;
+
+static std::map<std::string, int> hist;
+static std::multimap<int, std::string> rev_hist;
+
+static std::string resolve_comm(int pid)
+{
+	std::string comm = "<unknown>";
+	std::map<int, std::string>::const_iterator ci = comms.find(pid);
+	if (ci != comms.end())
+		comm = ci->second;
+
+	return comm;
+}
+
+static std::string resolve_user_symbol(int pid, uint64_t ip)
+{
+	std::string sym = "<unknown>";
+
+	maps_t &m = maps[pid];
+	maps_t::const_iterator mi = m.upper_bound(map(ip));
+	if (mi == m.end())
+		return sym;
+
+	ip -= mi->start + mi->pgoff;
+
+	symbols_t &s = dsos[mi->dso].syms;
+	symbols_t::const_iterator si = s.upper_bound(symbol(ip));
+
+	sym = mi->dso + ": <unknown>";
+
+	if (si == s.begin())
+		return sym;
+	si--;
+
+	if (si->start <= ip && ip < si->end)
+		sym = mi->dso + ": " + si->name;
+#if 0
+	else if (si->start <= ip)
+		sym = mi->dso + ": ?" + si->name;
+#endif
+
+	return sym;
+}
+
+static std::string resolve_kernel_symbol(uint64_t ip)
+{
+	std::string sym = "<unknown>";
+
+	symbols_t &s = dsos["[kernel]"].syms;
+	symbols_t::const_iterator si = s.upper_bound(symbol(ip));
+
+	if (si == s.begin())
+		return sym;
+	si--;
+
+	if (si->start <= ip && ip < si->end)
+		sym = si->name;
+
+	return sym;
+}
+
+static void display_help(void)
+{
+	printf(
+	"Usage: perf-report [<options>]\n"
+	" -i file   --input=<file>      # input file\n"
+	);
+
+	exit(0);
+}
+
+static void process_options(int argc, char *argv[])
+{
+	int error = 0;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"input",	required_argument,	NULL, 'i'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:i:",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'i': input_name			= strdup(optarg); break;
+		default: error = 1; break;
+		}
+	}
+
+	if (error)
+		display_help();
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	char *buf;
+	event_t *event;
+	int ret;
+	unsigned long total = 0;
+
+	page_size = getpagesize();
+
+	process_options(argc, argv);
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	load_kallsyms();
+
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+
+		munmap(buf, page_size * mmap_window);
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+	head += event->header.size;
+
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
+		std::string comm, sym, level;
+		char output[1024];
+
+		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+			level = "[kernel]";
+			sym = resolve_kernel_symbol(event->ip.ip);
+		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+			level = "[ user ]";
+			sym = resolve_user_symbol(event->ip.pid, event->ip.ip);
+		} else {
+			level = "[  hv  ]";
+		}
+		comm = resolve_comm(event->ip.pid);
+
+		snprintf(output, sizeof(output), "%16s %s %s",
+				comm.c_str(), level.c_str(), sym.c_str());
+		hist[output]++;
+
+		total++;
+
+	} else switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		maps[event->mmap.pid].insert(map(&event->mmap));
+		break;
+
+	case PERF_EVENT_COMM:
+		comms[event->comm.pid] = std::string(event->comm.comm);
+		break;
+	}
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	close(input);
+
+	std::map<std::string, int>::iterator hi = hist.begin();
+
+	while (hi != hist.end()) {
+		rev_hist.insert(std::pair<int, std::string>(hi->second, hi->first));
+		hist.erase(hi++);
+	}
+
+	std::multimap<int, std::string>::const_iterator ri = rev_hist.begin();
+
+	while (ri != rev_hist.end()) {
+		printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str());
+		ri++;
+	}
+
+	return 0;
+}
+
-- 
cgit v0.10.2


From 4d855457d84b819fefcd1cd1b0a2a0a0ec475c07 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:32 +0200
Subject: perf_counter: move PERF_RECORD_TIME

Move PERF_RECORD_TIME so that all the fixed length items come before
the variable length ones.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.307926436@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a70a55f..8bd1be5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -100,9 +100,9 @@ enum sw_event_ids {
 enum perf_counter_record_format {
 	PERF_RECORD_IP		= 1U << 0,
 	PERF_RECORD_TID		= 1U << 1,
-	PERF_RECORD_GROUP	= 1U << 2,
-	PERF_RECORD_CALLCHAIN	= 1U << 3,
-	PERF_RECORD_TIME	= 1U << 4,
+	PERF_RECORD_TIME	= 1U << 2,
+	PERF_RECORD_GROUP	= 1U << 3,
+	PERF_RECORD_CALLCHAIN	= 1U << 4,
 };
 
 /*
@@ -250,6 +250,7 @@ enum perf_event_type {
 	 *
 	 * 	{ u64			ip;	  } && PERF_RECORD_IP
 	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
+	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
@@ -259,8 +260,6 @@ enum perf_event_type {
 	 * 				kernel,
 	 * 				user;
 	 * 	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
-	 *
-	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * };
 	 */
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2d4aebb..4dc8600d2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1850,6 +1850,16 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(tid_entry);
 	}
 
+	if (record_type & PERF_RECORD_TIME) {
+		/*
+		 * Maybe do better on x86 and provide cpu_clock_nmi()
+		 */
+		time = sched_clock();
+
+		header.type |= PERF_RECORD_TIME;
+		header.size += sizeof(u64);
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -1867,16 +1877,6 @@ static void perf_counter_output(struct perf_counter *counter,
 		}
 	}
 
-	if (record_type & PERF_RECORD_TIME) {
-		/*
-		 * Maybe do better on x86 and provide cpu_clock_nmi()
-		 */
-		time = sched_clock();
-
-		header.type |= PERF_RECORD_TIME;
-		header.size += sizeof(u64);
-	}
-
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
 	if (ret)
 		return;
@@ -1889,6 +1889,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_TID)
 		perf_output_put(&handle, tid_entry);
 
+	if (record_type & PERF_RECORD_TIME)
+		perf_output_put(&handle, time);
+
 	if (record_type & PERF_RECORD_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
@@ -1910,9 +1913,6 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (callchain)
 		perf_output_copy(&handle, callchain, callchain_size);
 
-	if (record_type & PERF_RECORD_TIME)
-		perf_output_put(&handle, time);
-
 	perf_output_end(&handle);
 }
 
-- 
cgit v0.10.2


From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:33 +0200
Subject: perf_counter: allow for data addresses to be recorded

Paul suggested we allow for data addresses to be recorded along with
the traditional IPs as power can provide these.

For now, only the software pagefault events provide data addresses,
but in the future power might as well for some events.

x86 doesn't seem capable of providing this atm.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.394816925@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0697ade..c9d019f 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record)
-		perf_counter_overflow(counter, 1, regs);
+		perf_counter_overflow(counter, 1, regs, 0);
 }
 
 /*
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 17bbf6f..ac0e112 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,8 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
 			preempt_disable();
@@ -322,7 +323,8 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1116a41..0fcbaab 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -800,7 +800,7 @@ again:
 			continue;
 
 		perf_save_and_restart(counter);
-		if (perf_counter_overflow(counter, nmi, regs))
+		if (perf_counter_overflow(counter, nmi, regs, 0))
 			__pmc_generic_disable(counter, &counter->hw, bit);
 	}
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f2d3324..6f9df2b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1142,10 +1142,12 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
 	}
 
 	check_v8086_mode(regs, address, tsk);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8bd1be5..c22363a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -101,8 +101,9 @@ enum perf_counter_record_format {
 	PERF_RECORD_IP		= 1U << 0,
 	PERF_RECORD_TID		= 1U << 1,
 	PERF_RECORD_TIME	= 1U << 2,
-	PERF_RECORD_GROUP	= 1U << 3,
-	PERF_RECORD_CALLCHAIN	= 1U << 4,
+	PERF_RECORD_ADDR	= 1U << 3,
+	PERF_RECORD_GROUP	= 1U << 4,
+	PERF_RECORD_CALLCHAIN	= 1U << 5,
 };
 
 /*
@@ -251,6 +252,7 @@ enum perf_event_type {
 	 * 	{ u64			ip;	  } && PERF_RECORD_IP
 	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
 	 * 	{ u64			time;     } && PERF_RECORD_TIME
+	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
@@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 extern int perf_counter_overflow(struct perf_counter *counter,
-				 int nmi, struct pt_regs *regs);
+				 int nmi, struct pt_regs *regs, u64 addr);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
@@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter)
 		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
 }
 
-extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 
 extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 			      unsigned long pgoff, struct file *file);
@@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
 static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)	{ }
-
+perf_swcounter_event(u32 event, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
 
 static inline void
 perf_counter_mmap(unsigned long addr, unsigned long len,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4dc8600d2..321c57e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 	update_context_time(ctx);
 
 	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
+	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
 	__perf_counter_sched_out(ctx, cpuctx);
 
 	cpuctx->task_ctx = NULL;
@@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 }
 
 static void perf_counter_output(struct perf_counter *counter,
-				int nmi, struct pt_regs *regs)
+				int nmi, struct pt_regs *regs, u64 addr)
 {
 	int ret;
 	u64 record_type = counter->hw_event.record_type;
@@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
+	if (record_type & PERF_RECORD_ADDR) {
+		header.type |= PERF_RECORD_ADDR;
+		header.size += sizeof(u64);
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_TIME)
 		perf_output_put(&handle, time);
 
+	if (record_type & PERF_RECORD_ADDR)
+		perf_output_put(&handle, addr);
+
 	if (record_type & PERF_RECORD_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
@@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
  */
 
 int perf_counter_overflow(struct perf_counter *counter,
-			  int nmi, struct pt_regs *regs)
+			  int nmi, struct pt_regs *regs, u64 addr)
 {
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
@@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter,
 			perf_counter_disable(counter);
 	}
 
-	perf_counter_output(counter, nmi, regs);
+	perf_counter_output(counter, nmi, regs, addr);
 	return ret;
 }
 
@@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 		regs = task_pt_regs(current);
 
 	if (regs) {
-		if (perf_counter_overflow(counter, 0, regs))
+		if (perf_counter_overflow(counter, 0, regs, 0))
 			ret = HRTIMER_NORESTART;
 	}
 
@@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct pt_regs *regs)
+				    int nmi, struct pt_regs *regs, u64 addr)
 {
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	if (perf_counter_overflow(counter, nmi, regs))
+	if (perf_counter_overflow(counter, nmi, regs, addr))
 		/* soft-disable the counter */
 		;
 
@@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter,
 }
 
 static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct pt_regs *regs)
+			       int nmi, struct pt_regs *regs, u64 addr)
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
 	if (counter->hw.irq_period && !neg)
-		perf_swcounter_overflow(counter, nmi, regs);
+		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum perf_event_types type, u32 event,
-				     u64 nr, int nmi, struct pt_regs *regs)
+				     u64 nr, int nmi, struct pt_regs *regs,
+				     u64 addr)
 {
 	struct perf_counter *counter;
 
@@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
 		if (perf_swcounter_match(counter, type, event, regs))
-			perf_swcounter_add(counter, nr, nmi, regs);
+			perf_swcounter_add(counter, nr, nmi, regs, addr);
 	}
 	rcu_read_unlock();
 }
@@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 }
 
 static void __perf_swcounter_event(enum perf_event_types type, u32 event,
-				   u64 nr, int nmi, struct pt_regs *regs)
+				   u64 nr, int nmi, struct pt_regs *regs,
+				   u64 addr)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 	int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 	(*recursion)++;
 	barrier();
 
-	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
+	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
+				 nr, nmi, regs, addr);
 	if (cpuctx->task_ctx) {
 		perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
-				nr, nmi, regs);
+					 nr, nmi, regs, addr);
 	}
 
 	barrier();
@@ -2349,9 +2360,10 @@ out:
 	put_cpu_var(perf_cpu_context);
 }
 
-void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
+void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
-	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
+	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
 }
 
 static void perf_swcounter_read(struct perf_counter *counter)
@@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id)
 	if (!regs)
 		regs = task_pt_regs(current);
 
-	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
+	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
 }
 
 extern int ftrace_profile_enable(int);
-- 
cgit v0.10.2


From 1c1452be2e9ae282a7316c3b23987811bd7acda6 Mon Sep 17 00:00:00 2001
From: Jonas Larsson <jonas.larsson@martinsson.se>
Date: Tue, 31 Mar 2009 11:16:48 +0200
Subject: atmel-mci: Add support for inverted detect pin

Same patch as before, modified to use bool. Also adds description of
the new field in struct atmel_mci that I missed in the first patch.

This patch adds Atmel MCI support for inverted detect pins.

Signed-off-by: Jonas Larsson <jonas.larsson@martinsson.se>
Acked-by: Pierre Ossman <pierre@ossman.eu>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index cf6a100..7b603e4 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -177,6 +177,7 @@ struct atmel_mci {
  *	available.
  * @wp_pin: GPIO pin used for card write protect sending, or negative
  *	if not available.
+ * @detect_is_active_high: The state of the detect pin when it is active.
  * @detect_timer: Timer used for debouncing @detect_pin interrupts.
  */
 struct atmel_mci_slot {
@@ -196,6 +197,7 @@ struct atmel_mci_slot {
 
 	int			detect_pin;
 	int			wp_pin;
+	bool			detect_is_active_high;
 
 	struct timer_list	detect_timer;
 };
@@ -924,7 +926,8 @@ static int atmci_get_cd(struct mmc_host *mmc)
 	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
 	if (gpio_is_valid(slot->detect_pin)) {
-		present = !gpio_get_value(slot->detect_pin);
+		present = !(gpio_get_value(slot->detect_pin) ^
+			    slot->detect_is_active_high);
 		dev_dbg(&mmc->class_dev, "card is %spresent\n",
 				present ? "" : "not ");
 	}
@@ -1028,7 +1031,8 @@ static void atmci_detect_change(unsigned long data)
 		return;
 
 	enable_irq(gpio_to_irq(slot->detect_pin));
-	present = !gpio_get_value(slot->detect_pin);
+	present = !(gpio_get_value(slot->detect_pin) ^
+		    slot->detect_is_active_high);
 	present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
 	dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n",
@@ -1456,6 +1460,7 @@ static int __init atmci_init_slot(struct atmel_mci *host,
 	slot->host = host;
 	slot->detect_pin = slot_data->detect_pin;
 	slot->wp_pin = slot_data->wp_pin;
+	slot->detect_is_active_high = slot_data->detect_is_active_high;
 	slot->sdc_reg = sdc_reg;
 
 	mmc->ops = &atmci_ops;
@@ -1477,7 +1482,8 @@ static int __init atmci_init_slot(struct atmel_mci *host,
 		if (gpio_request(slot->detect_pin, "mmc_detect")) {
 			dev_dbg(&mmc->class_dev, "no detect pin available\n");
 			slot->detect_pin = -EBUSY;
-		} else if (gpio_get_value(slot->detect_pin)) {
+		} else if (gpio_get_value(slot->detect_pin) ^
+				slot->detect_is_active_high) {
 			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
 		}
 	}
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 2f1f957..57b1846 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -10,6 +10,7 @@
  * @bus_width: Number of data lines wired up the slot
  * @detect_pin: GPIO pin wired to the card detect switch
  * @wp_pin: GPIO pin wired to the write protect sensor
+ * @detect_is_active_high: The state of the detect pin when it is active
  *
  * If a given slot is not present on the board, @bus_width should be
  * set to 0. The other fields are ignored in this case.
@@ -24,6 +25,7 @@ struct mci_slot_pdata {
 	unsigned int		bus_width;
 	int			detect_pin;
 	int			wp_pin;
+	bool			detect_is_active_high;
 };
 
 /**
-- 
cgit v0.10.2


From 7ebcfcf19723254ebe90cdf8718436b9955a9a01 Mon Sep 17 00:00:00 2001
From: Jonas Larsson <jonas.larsson@martinsson.se>
Date: Tue, 31 Mar 2009 11:16:52 +0200
Subject: avr32: Solves problem with inverted MCI detect pin on Merisc board

Same patch as before, modified to use bool.

This patch solves the problem with the inverted mci detect pin on Merisc
boards.

Signed-off-by: Jonas Larsson <jonas.larsson@martinsson.se>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/boards/merisc/setup.c b/arch/avr32/boards/merisc/setup.c
index 20b300c..623b077 100644
--- a/arch/avr32/boards/merisc/setup.c
+++ b/arch/avr32/boards/merisc/setup.c
@@ -94,9 +94,10 @@ static struct spi_board_info __initdata spi0_board_info[] = {
 
 static struct mci_platform_data __initdata mci0_data = {
 	.slot[0] = {
-		.bus_width	= 4,
-		.detect_pin	= GPIO_PIN_PE(19),
-		.wp_pin		= GPIO_PIN_PE(20),
+		.bus_width		= 4,
+		.detect_pin		= GPIO_PIN_PE(19),
+		.wp_pin			= GPIO_PIN_PE(20),
+		.detect_is_active_high	= true,
 	},
 };
 
-- 
cgit v0.10.2


From e85abf8f432bb2a13733ab7609fbb8e1500af51d Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Wed, 8 Apr 2009 14:07:25 -0700
Subject: x86: consolidate SMP code in io_apic.c

Impact: Cleanup

Reorganizes the code in arch/x86/kernel/io_apic.c by
combining two '#ifdef CONFIG_SMP' regions.  In addition
to making the code easier to understand the first
'#ifdef CONFIG_SMP' region is moved to a location later
in the file which will reduce the need for function
forward declarations when the code subsequently revised.

The only changes other than relocating code to a different
position in the file were the removal of the assign_irq_vector()
forward declaration which was no longer needed and some line
length reduction formatting changes.

Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Cc: lcm@us.ibm.com
LKML-Reference: <20090408210725.GC11159@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 767fe7e..7c9d045 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -518,120 +518,6 @@ static void ioapic_mask_entry(int apic, int pin)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-	u8 vector = cfg->vector;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return BAD_APICID;
-
-	/* check that before desc->addinity get updated */
-	set_extra_move_desc(desc, mask);
-
-	cpumask_copy(desc->affinity, mask);
-
-	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	dest = set_desc_affinity(desc, mask);
-	if (dest != BAD_APICID) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, cfg);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -2360,6 +2246,115 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+static void
+__target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int irq;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return BAD_APICID;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return BAD_APICID;
+
+	/* check that before desc->addinity get updated */
+	set_extra_move_desc(desc, mask);
+
+	cpumask_copy(desc->affinity, mask);
+
+	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	set_ioapic_affinity_irq_desc(desc, mask);
+}
 
 #ifdef CONFIG_INTR_REMAP
 
-- 
cgit v0.10.2


From 6c0b324435ff49fb3c68fe808a93853d81c7fb97 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 9 Apr 2009 09:27:37 +1000
Subject: perf_counter: add MAINTAINERS entry

This adds an entry in MAINTAINERS for the perf_counter subsystem.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18909.13033.345975.434902@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/MAINTAINERS b/MAINTAINERS
index c3b2159..16fb45a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3483,6 +3483,16 @@ M:	balbir@linux.vnet.ibm.com
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
+PERFORMANCE COUNTER SUBSYSTEM
+P:	Peter Zijlstra
+M:	a.p.zijlstra@chello.nl
+P:	Paul Mackerras
+M:	paulus@samba.org
+P:	Ingo Molnar
+M:	mingo@elte.hu
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+
 PERSONALITY HANDLING
 P:	Christoph Hellwig
 M:	hch@infradead.org
-- 
cgit v0.10.2


From ca8f2d7f019a8547f39ddb9ed0144932f12807f2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 9 Apr 2009 14:42:56 +1000
Subject: perf_counter: powerpc: add nmi_enter/nmi_exit calls

Impact: fix potential deadlocks on powerpc

Now that the core is using in_nmi() (added in e30e08f6, "perf_counter:
fix NMI race in task clock"), we need the powerpc perf_counter_interrupt
to call nmi_enter() and nmi_exit() in those cases where the interrupt
happens when interrupts are soft-disabled.

If interrupts were soft-enabled, we can treat it as a regular interrupt
and do irq_enter/irq_exit around the whole routine. This lets us get rid
of the test_perf_counter_pending() call at the end of
perf_counter_interrupt, thus simplifying things a little.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18909.31952.873098.336615@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index c9d019f..bd76d0f 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -714,7 +714,7 @@ hw_perf_counter_init(struct perf_counter *counter)
  * here so there is no possibility of being interrupted.
  */
 static void record_and_restart(struct perf_counter *counter, long val,
-			       struct pt_regs *regs)
+			       struct pt_regs *regs, int nmi)
 {
 	s64 prev, delta, left;
 	int record = 0;
@@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record)
-		perf_counter_overflow(counter, 1, regs, 0);
+		perf_counter_overflow(counter, nmi, regs, 0);
 }
 
 /*
@@ -762,6 +762,17 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	struct perf_counter *counter;
 	long val;
 	int found = 0;
+	int nmi;
+
+	/*
+	 * If interrupts were soft-disabled when this PMU interrupt
+	 * occurred, treat it as an NMI.
+	 */
+	nmi = !regs->softe;
+	if (nmi)
+		nmi_enter();
+	else
+		irq_enter();
 
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
@@ -769,7 +780,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 		if ((int)val < 0) {
 			/* counter has overflowed */
 			found = 1;
-			record_and_restart(counter, val, regs);
+			record_and_restart(counter, val, regs, nmi);
 		}
 	}
 
@@ -796,18 +807,10 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	 */
 	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 
-	/*
-	 * If we need a wakeup, check whether interrupts were soft-enabled
-	 * when we took the interrupt.  If they were, we can wake stuff up
-	 * immediately; otherwise we'll have do the wakeup when interrupts
-	 * get soft-enabled.
-	 */
-	if (test_perf_counter_pending() && regs->softe) {
-		irq_enter();
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (nmi)
+		nmi_exit();
+	else
 		irq_exit();
-	}
 }
 
 void hw_perf_counter_setup(int cpu)
-- 
cgit v0.10.2


From d2de688891909b148efe83a6fc9520a9cd6015f0 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 9 Apr 2009 15:17:22 +1000
Subject: sparc64: extend TI_RESTART_BLOCK space by 8 bytes

Impact: build fix

Today's linux-next build (sparc64 defconfig) failed like this:

  arch/sparc/kernel/built-in.o: In function `trap_init':
  (.init.text+0x4): undefined reference to `thread_info_offsets_are_bolixed_dave'

Caused by commit 52400ba946759af28442dee6265c5c0180ac7122 ("futex: add
requeue_pi functionality") (from the tip-core tree) which changed the
size of struct restart_block.

Shift TI_KUNA_REGS and TI_KUNA_INSN up by 8 bytes to make space for the
larger restart block.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: "David S. Miller" <davem@davemloft.net>
Cc: Darren Hart <dvhltc@us.ibm.com>
LKML-Reference: <20090409151722.c8eabb56.sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 639ac80..6586572 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -102,8 +102,8 @@ struct thread_info {
 #define TI_KERN_CNTD1	0x00000488
 #define TI_PCR		0x00000490
 #define TI_RESTART_BLOCK 0x00000498
-#define TI_KUNA_REGS	0x000004c0
-#define TI_KUNA_INSN	0x000004c8
+#define TI_KUNA_REGS	0x000004c8
+#define TI_KUNA_INSN	0x000004d0
 #define TI_FPREGS	0x00000500
 
 /* We embed this in the uppermost byte of thread_info->flags */
-- 
cgit v0.10.2


From 002f128b473fb82f454654be5081b0919ee01ab2 Mon Sep 17 00:00:00 2001
From: Paul Turner <pjt@google.com>
Date: Wed, 8 Apr 2009 15:29:43 -0700
Subject: sched: remove redundant hierarchy walk in check_preempt_wakeup

Impact: micro-optimization

Under group scheduling we traverse up until we are at common siblings
to make the wakeup comparison on.

At this point however, they should have the same parent so continuing
to check up the tree is redundant.

Signed-off-by: Paul Turner <pjt@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <alpine.DEB.1.00.0904081520320.30317@kitami.corp.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3816f21..5f9650e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
 
 	find_matching_se(&se, &pse);
 
-	while (se) {
-		BUG_ON(!pse);
+	BUG_ON(!pse);
 
-		if (wakeup_preempt_entity(se, pse) == 1) {
-			resched_task(curr);
-			break;
-		}
-
-		se = parent_entity(se);
-		pse = parent_entity(pse);
-	}
+	if (wakeup_preempt_entity(se, pse) == 1)
+		resched_task(curr);
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)
-- 
cgit v0.10.2


From 888fcee066a2f4abd0d0bc9418c0535f9b01e6e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 9 Apr 2009 09:48:22 +0200
Subject: perf_counter: fix off task->comm by one

strlen() does not include the \0.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 321c57e..b07195b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1989,7 +1989,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 	unsigned int size;
 	char *comm = comm_event->task->comm;
 
-	size = ALIGN(strlen(comm), sizeof(u64));
+	size = ALIGN(strlen(comm)+1, sizeof(u64));
 
 	comm_event->comm = comm;
 	comm_event->comm_size = size;
@@ -2109,7 +2109,7 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 	}
 
 got_name:
-	size = ALIGN(strlen(name), sizeof(u64));
+	size = ALIGN(strlen(name)+1, sizeof(u64));
 
 	mmap_event->file_name = name;
 	mmap_event->file_size = size;
-- 
cgit v0.10.2


From b3828ebb3901adfe989d8d4157ed28247aeec132 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 9 Apr 2009 09:50:04 +0200
Subject: perf_counter tools: include PID in perf-report output, tweak
 user/kernel printut

It's handier than an <unknown> entry.
Also replace the kernel/user column with a more compact version:

  0.52              cc1  [k]  page_fault
  0.57               :0  [k]   _spin_lock
  0.59            :7506  [.]  <unknown>
  0.69               as  [.]  /usr/bin/as: <unknown>
  0.76              cc1  [.]  /lib64/libc-2.8.so: _int_free
  0.92              cc1  [k]  clear_page_c
  1.00            :7465  [.]  <unknown>
  1.43              cc1  [.]  /lib64/libc-2.8.so: memset
  1.86              cc1  [.]  /lib64/libc-2.8.so: _int_malloc
 70.33              cc1  [.]  /usr/libexec/gcc/x86_64-redhat-linux/4.3.2/cc1: <unknown>

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc
index 09da0ba..1727317 100644
--- a/Documentation/perf_counter/perf-report.cc
+++ b/Documentation/perf_counter/perf-report.cc
@@ -277,10 +277,17 @@ static std::multimap<int, std::string> rev_hist;
 
 static std::string resolve_comm(int pid)
 {
-	std::string comm = "<unknown>";
+	std::string comm;
+
 	std::map<int, std::string>::const_iterator ci = comms.find(pid);
-	if (ci != comms.end())
+	if (ci != comms.end()) {
 		comm = ci->second;
+	} else {
+		char pid_str[30];
+
+		sprintf(pid_str, ":%d", pid);
+		comm = pid_str;
+	}
 
 	return comm;
 }
@@ -422,13 +429,13 @@ more:
 		char output[1024];
 
 		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
-			level = "[kernel]";
+			level = " [k] ";
 			sym = resolve_kernel_symbol(event->ip.ip);
 		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
-			level = "[ user ]";
+			level = " [.] ";
 			sym = resolve_user_symbol(event->ip.pid, event->ip.ip);
 		} else {
-			level = "[  hv  ]";
+			level = " [H] ";
 		}
 		comm = resolve_comm(event->ip.pid);
 
-- 
cgit v0.10.2


From 9ee318a7825929bc3734110b83ae8e20e53d9de3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 9 Apr 2009 10:53:44 +0200
Subject: perf_counter: optimize mmap/comm tracking

Impact: performance optimization

The mmap/comm tracking code does quite a lot of work before it discovers
there's no interest in it, avoid that by keeping a counter.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090409085524.427173196@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b07195b..76376ec 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -38,6 +38,10 @@ int perf_max_counters __read_mostly = 1;
 static int perf_reserved_percpu __read_mostly;
 static int perf_overcommit __read_mostly = 1;
 
+static atomic_t nr_mmap_tracking __read_mostly;
+static atomic_t nr_munmap_tracking __read_mostly;
+static atomic_t nr_comm_tracking __read_mostly;
+
 /*
  * Mutex for (sysadmin-configurable) counter reservations:
  */
@@ -1186,6 +1190,13 @@ static void free_counter(struct perf_counter *counter)
 {
 	perf_pending_sync(counter);
 
+	if (counter->hw_event.mmap)
+		atomic_dec(&nr_mmap_tracking);
+	if (counter->hw_event.munmap)
+		atomic_dec(&nr_munmap_tracking);
+	if (counter->hw_event.comm)
+		atomic_dec(&nr_comm_tracking);
+
 	if (counter->destroy)
 		counter->destroy(counter);
 
@@ -2005,7 +2016,12 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 
 void perf_counter_comm(struct task_struct *task)
 {
-	struct perf_comm_event comm_event = {
+	struct perf_comm_event comm_event;
+
+	if (!atomic_read(&nr_comm_tracking))
+		return;
+       
+	comm_event = (struct perf_comm_event){
 		.task	= task,
 		.event  = {
 			.header = { .type = PERF_EVENT_COMM, },
@@ -2128,7 +2144,12 @@ got_name:
 void perf_counter_mmap(unsigned long addr, unsigned long len,
 		       unsigned long pgoff, struct file *file)
 {
-	struct perf_mmap_event mmap_event = {
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_mmap_tracking))
+		return;
+
+	mmap_event = (struct perf_mmap_event){
 		.file   = file,
 		.event  = {
 			.header = { .type = PERF_EVENT_MMAP, },
@@ -2146,7 +2167,12 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 void perf_counter_munmap(unsigned long addr, unsigned long len,
 			 unsigned long pgoff, struct file *file)
 {
-	struct perf_mmap_event mmap_event = {
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_munmap_tracking))
+		return;
+
+	mmap_event = (struct perf_mmap_event){
 		.file   = file,
 		.event  = {
 			.header = { .type = PERF_EVENT_MUNMAP, },
@@ -2725,6 +2751,13 @@ done:
 
 	counter->hw_ops = hw_ops;
 
+	if (counter->hw_event.mmap)
+		atomic_inc(&nr_mmap_tracking);
+	if (counter->hw_event.munmap)
+		atomic_inc(&nr_munmap_tracking);
+	if (counter->hw_event.comm)
+		atomic_inc(&nr_comm_tracking);
+
 	return counter;
 }
 
-- 
cgit v0.10.2


From 1ccd15497869f3ed83b5225d410df53a96e52757 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 9 Apr 2009 10:53:45 +0200
Subject: perf_counter: sysctl for system wide perf counters

Impact: add sysctl for paranoid/relaxed perfcounters policy

Allow the use of system wide perf counters to everybody, but provide
a sysctl to disable it for the paranoid security minded.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090409085524.514046352@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c22363a..9814328 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -568,6 +568,8 @@ struct perf_callchain_entry {
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
+extern int sysctl_perf_counter_priv;
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 76376ec..7efb7eb 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,8 @@ static atomic_t nr_mmap_tracking __read_mostly;
 static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
+int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+
 /*
  * Mutex for (sysadmin-configurable) counter reservations:
  */
@@ -1132,7 +1134,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	 */
 	if (cpu != -1) {
 		/* Must be root to operate on a CPU counter: */
-		if (!capable(CAP_SYS_ADMIN))
+		if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
 
 		if (cpu < 0 || cpu > num_possible_cpus())
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4286b62..8ba4578 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,6 +49,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -920,6 +921,16 @@ static struct ctl_table kern_table[] = {
 		.child		= slow_work_sysctls,
 	},
 #endif
+#ifdef CONFIG_PERF_COUNTERS
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_privileged",
+		.data		= &sysctl_perf_counter_priv,
+		.maxlen		= sizeof(sysctl_perf_counter_priv),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
-- 
cgit v0.10.2


From d3d21c412d8525eb2e208d990ab5eee5fb0fe03d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 9 Apr 2009 10:53:46 +0200
Subject: perf_counter: log full path names

Impact: fix perf-report output for /home mounted binaries, etc.

dentry_path() only provide path-names up to the mount root, which is
unsuited for out purpose, use d_path() instead.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090409085524.601794134@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7efb7eb..7f9521c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2116,7 +2116,7 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 			name = strncpy(tmp, "//enomem", sizeof(tmp));
 			goto got_name;
 		}
-		name = dentry_path(file->f_dentry, buf, PATH_MAX);
+		name = d_path(&file->f_path, buf, PATH_MAX);
 		if (IS_ERR(name)) {
 			name = strncpy(tmp, "//toolong", sizeof(tmp));
 			goto got_name;
-- 
cgit v0.10.2


From 894bf92fdec9909fefcfe907786c6c6944a22052 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 9 Apr 2009 12:34:40 +0300
Subject: ASoC: tlv320aic23: add DSP_A format support

Add DSP_A interface format support by setting the LRP bit in
DSP mode.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c
index c3f4afb..21f69df 100644
--- a/sound/soc/codecs/tlv320aic23.c
+++ b/sound/soc/codecs/tlv320aic23.c
@@ -523,6 +523,8 @@ static int tlv320aic23_set_dai_fmt(struct snd_soc_dai *codec_dai,
 	case SND_SOC_DAIFMT_I2S:
 		iface_reg |= TLV320AIC23_FOR_I2S;
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface_reg |= TLV320AIC23_LRP_ON;
 	case SND_SOC_DAIFMT_DSP_B:
 		iface_reg |= TLV320AIC23_FOR_DSP;
 		break;
-- 
cgit v0.10.2


From e7c064889606aab3569669078c69b87b2c527e72 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Sat, 7 Mar 2009 23:48:41 -0800
Subject: xen: add FIX_TEXT_POKE to fixmap

FIX_TEXT_POKE[01] are used to map kernel addresses, so they're mapping
pfns, not mfns.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 77b242c..a96f5b9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1812,6 +1812,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
 #ifdef CONFIG_X86_LOCAL_APIC
 	case FIX_APIC_BASE:	/* maps dummy local APIC */
 #endif
+	case FIX_TEXT_POKE0:
+	case FIX_TEXT_POKE1:
+		/* All local page mappings */
 		pte = pfn_pte(phys, prot);
 		break;
 
-- 
cgit v0.10.2


From 7a734e7dd93b9aea08ed51036a9a0e2c9dfd8dac Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 18:08:28 -0700
Subject: x86, setup: "glove box" BIOS calls -- infrastructure

Impact: new interfaces (not yet used)

For all the platforms out there, there is an infinite number of buggy
BIOSes.  This adds infrastructure to treat BIOS interrupts more like
toxic waste and "glove box" them -- we switch out the register set,
perform the BIOS interrupt, and then restore the previous state.

LKML-Reference: <49DE7F79.4030106@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 6633b6e..658bc52 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,9 +26,10 @@ targets		:= vmlinux.bin setup.bin setup.elf bzImage
 targets		+= fdimage fdimage144 fdimage288 image.iso mtools.conf
 subdir-		:= compressed
 
-setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y		+= printf.o string.o tty.o video.o video-mode.o version.o
+setup-y		+= printf.o regs.o string.o tty.o video.o video-mode.o
+setup-y		+= version.o
 setup-$(CONFIG_X86_APM_BOOT) += apm.o
 
 # The link order of the video-*.o modules can matter.  In particular,
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644
index 0000000..22b4b3e
--- /dev/null
+++ b/arch/x86/boot/bioscall.S
@@ -0,0 +1,82 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * "Glove box" for BIOS calls.  Avoids the constant problems with BIOSes
+ * touching memory they shouldn't be.
+ */
+
+	.code16
+	.text
+	.globl	intcall
+	.type	intcall, @function
+intcall:
+	/* Self-modify the INT instruction.  Ugly, but works. */
+	cmpb	%al, 3f
+	je	1f
+	movb	%al, 3f
+	jmp	1f		/* Synchronize pipeline */
+1:
+	/* Save state */
+	pushfl
+	pushw	%fs
+	pushw	%gs
+	pushal
+
+	/* Copy input state to stack frame */
+	subw	$44, %sp
+	movw	%dx, %si
+	movw	%sp, %di
+	movw	$11, %cx
+	rep; movsd
+
+	/* Pop full state from the stack */
+	popal
+	popw	%gs
+	popw	%fs
+	popw	%es
+	popw	%ds
+	popfl
+
+	/* Actual INT */
+	.byte	0xcd		/* INT opcode */
+3:	.byte	0
+
+	/* Push full state to the stack */
+	pushfl
+	pushw	%ds
+	pushw	%es
+	pushw	%fs
+	pushw	%gs
+	pushal
+
+	/* Re-establish C environment invariants */
+	cld
+	movzwl	%sp, %esp
+	movw	%cs, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+
+	/* Copy output state from stack frame */
+	movw	68(%esp), %di	/* Original %cx == 3rd argument */
+	andw	%di, %di
+	jz	4f
+	movw	%sp, %si
+	movw	$11, %cx
+	rep; movsd
+4:	addw	$44, %sp
+
+	/* Restore state and return */
+	popal
+	popw	%gs
+	popw	%fs
+	popfl
+	retl
+	.size	intcall, .-intcall
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 7b2692e..98239d2 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -26,6 +27,7 @@
 #include <asm/setup.h>
 #include "bitops.h"
 #include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
 
 /* Useful macros */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -241,6 +243,49 @@ int enable_a20(void);
 /* apm.c */
 int query_apm_bios(void);
 
+/* bioscall.c */
+struct biosregs {
+	union {
+		struct {
+			u32 edi;
+			u32 esi;
+			u32 ebp;
+			u32 _esp;
+			u32 ebx;
+			u32 edx;
+			u32 ecx;
+			u32 eax;
+			u32 _fsgs;
+			u32 _dses;
+			u32 eflags;
+		};
+		struct {
+			u16 di, hdi;
+			u16 si, hsi;
+			u16 bp, hbp;
+			u16 _sp, _hsp;
+			u16 bx, hbx;
+			u16 dx, hdx;
+			u16 cx, hcx;
+			u16 ax, hax;
+			u16 gs, fs;
+			u16 es, ds;
+			u16 flags, hflags;
+		};
+		struct {
+			u8 dil, dih, edi2, edi3;
+			u8 sil, sih, esi2, esi3;
+			u8 bpl, bph, ebp2, ebp3;
+			u8 _spl, _sph, _esp2, _esp3;
+			u8 bl, bh, ebx2, ebx3;
+			u8 dl, dh, edx2, edx3;
+			u8 cl, ch, ecx2, ecx3;
+			u8 al, ah, eax2, eax3;
+		};
+	};
+};
+void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
+
 /* cmdline.c */
 int cmdline_find_option(const char *option, char *buffer, int bufsize);
 int cmdline_find_option_bool(const char *option);
@@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...);
 int vsprintf(char *buf, const char *fmt, va_list args);
 int printf(const char *fmt, ...);
 
+/* regs.c */
+void initregs(struct biosregs *regs);
+
 /* string.c */
 int strcmp(const char *str1, const char *str2);
 size_t strnlen(const char *s, size_t maxlen);
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 5d84d1c..486d97f 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -221,7 +221,7 @@ setup_data:		.quad 0			# 64-bit physical pointer to
 
 # End of setup header #####################################################
 
-	.section ".inittext", "ax"
+	.section ".entrytext", "ax"
 start_of_setup:
 #ifdef SAFE_RESET_DISK_CONTROLLER
 # Reset the disk controller.
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644
index 0000000..958019b
--- /dev/null
+++ b/arch/x86/boot/regs.c
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple helper function for initializing a register set.
+ *
+ * Note that this sets EFLAGS_CF in the input register set; this
+ * makes it easier to catch functions which do nothing but don't
+ * explicitly set CF.
+ */
+
+#include "boot.h"
+
+void initregs(struct biosregs *reg)
+{
+	memset(reg, 0, sizeof *reg);
+	reg->eflags |= X86_EFLAGS_CF;
+	reg->ds = ds();
+	reg->es = ds();
+	reg->fs = fs();
+	reg->gs = gs();
+}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index bb8dc2d..0f6ec455 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -15,8 +15,11 @@ SECTIONS
 
 	. = 497;
 	.header		: { *(.header) }
+	.entrytext	: { *(.entrytext) }
 	.inittext	: { *(.inittext) }
 	.initdata	: { *(.initdata) }
+	__end_init = .;
+
 	.text		: { *(.text) }
 	.text32		: { *(.text32) }
 
@@ -52,4 +55,7 @@ SECTIONS
 
 	. = ASSERT(_end <= 0x8000, "Setup too big!");
 	. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
+	/* Necessary for the very-old-loader check to work... */
+	. = ASSERT(__end_init <= 5*512, "init sections too big!");
+
 }
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 1c31cc0..167bc16 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -9,7 +9,7 @@
 always		:= wakeup.bin
 targets		:= wakeup.elf wakeup.lds
 
-wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o
+wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
 
 # The link order of the video-*.o modules can matter.  In particular,
 # video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
new file mode 100644
index 0000000..f51eb0b
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/bioscall.S
@@ -0,0 +1 @@
+#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
new file mode 100644
index 0000000..6206033
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/regs.c
@@ -0,0 +1 @@
+#include "../../../boot/regs.c"
-- 
cgit v0.10.2


From df7699c56421c0476704f24a43409ac8c505f3d2 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 18:13:46 -0700
Subject: x86, setup: "glove box" BIOS interrupts in the core boot code

Impact: BIOS proofing

"Glove box" off BIOS interrupts in the core boot code.

LKML-Reference: <49DE7F79.4030106@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index 7c19ce8..64a31a6 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -2,7 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
- *   Copyright 2009 Intel Corporation
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -90,8 +90,11 @@ static int a20_test_long(void)
 
 static void enable_a20_bios(void)
 {
-	asm volatile("pushfl; int $0x15; popfl"
-		     : : "a" ((u16)0x2401));
+	struct biosregs ireg;
+
+	initregs(&ireg);
+	ireg.ax = 0x2401;
+	intcall(0x15, &ireg, NULL);
 }
 
 static void enable_a20_kbc(void)
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 58f0415..140172b 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -61,11 +62,10 @@ static void copy_boot_params(void)
  */
 static void keyboard_set_repeat(void)
 {
-	u16 ax = 0x0305;
-	u16 bx = 0;
-	asm volatile("int $0x16"
-		     : "+a" (ax), "+b" (bx)
-		     : : "ecx", "edx", "esi", "edi");
+	struct biosregs ireg;
+	initregs(&ireg);
+	ireg.ax = 0x0305;
+	intcall(0x16, &ireg, NULL);
 }
 
 /*
@@ -73,18 +73,22 @@ static void keyboard_set_repeat(void)
  */
 static void query_ist(void)
 {
+	struct biosregs ireg, oreg;
+
 	/* Some older BIOSes apparently crash on this call, so filter
 	   it from machines too old to have SpeedStep at all. */
 	if (cpu.level < 6)
 		return;
 
-	asm("int $0x15"
-	    : "=a" (boot_params.ist_info.signature),
-	      "=b" (boot_params.ist_info.command),
-	      "=c" (boot_params.ist_info.event),
-	      "=d" (boot_params.ist_info.perf_level)
-	    : "a" (0x0000e980),	 /* IST Support */
-	      "d" (0x47534943)); /* Request value */
+	initregs(&ireg);
+	ireg.ax  = 0xe980;	 /* IST Support */
+	ireg.edx = 0x47534943;	 /* Request value */
+	intcall(0x15, &ireg, &oreg);
+
+	boot_params.ist_info.signature  = oreg.eax;
+	boot_params.ist_info.command    = oreg.ebx;
+	boot_params.ist_info.event      = oreg.ecx;
+	boot_params.ist_info.perf_level = oreg.edx;
 }
 
 /*
@@ -93,13 +97,12 @@ static void query_ist(void)
 static void set_bios_mode(void)
 {
 #ifdef CONFIG_X86_64
-	u32 eax, ebx;
+	struct biosregs ireg;
 
-	eax = 0xec00;
-	ebx = 2;
-	asm volatile("int $0x15"
-		     : "+a" (eax), "+b" (ebx)
-		     : : "ecx", "edx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ax = 0xec00;
+	ireg.bx = 2;
+	intcall(0x15, &ireg, NULL);
 #endif
 }
 
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 5054c2d..d989de8 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -25,12 +25,16 @@ struct e820_ext_entry {
 static int detect_memory_e820(void)
 {
 	int count = 0;
-	u32 next = 0;
-	u32 size, id, edi;
-	u8 err;
+	struct biosregs ireg, oreg;
 	struct e820entry *desc = boot_params.e820_map;
 	static struct e820_ext_entry buf; /* static so it is zeroed */
 
+	initregs(&ireg);
+	ireg.ax  = 0xe820;
+	ireg.cx  = sizeof buf;
+	ireg.edx = SMAP;
+	ireg.di  = (size_t)&buf;
+
 	/*
 	 * Set this here so that if the BIOS doesn't change this field
 	 * but still doesn't change %ecx, we're still okay...
@@ -38,22 +42,13 @@ static int detect_memory_e820(void)
 	buf.ext_flags = 1;
 
 	do {
-		size = sizeof buf;
-
-		/* Important: %edx and %esi are clobbered by some BIOSes,
-		   so they must be either used for the error output
-		   or explicitly marked clobbered.  Given that, assume there
-		   is something out there clobbering %ebp and %edi, too. */
-		asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0"
-		    : "=d" (err), "+b" (next), "=a" (id), "+c" (size),
-		      "=D" (edi), "+m" (buf)
-		    : "D" (&buf), "d" (SMAP), "a" (0xe820)
-		    : "esi");
+		intcall(0x15, &ireg, &oreg);
+		ireg.ebx = oreg.ebx; /* for next iteration... */
 
 		/* BIOSes which terminate the chain with CF = 1 as opposed
 		   to %ebx = 0 don't always report the SMAP signature on
 		   the final, failing, probe. */
-		if (err)
+		if (oreg.eflags & X86_EFLAGS_CF)
 			break;
 
 		/* Some BIOSes stop returning SMAP in the middle of
@@ -61,7 +56,7 @@ static int detect_memory_e820(void)
 		   screwed up the map at that point, we might have a
 		   partial map, the full map, or complete garbage, so
 		   just return failure. */
-		if (id != SMAP) {
+		if (oreg.eax != SMAP) {
 			count = 0;
 			break;
 		}
@@ -69,58 +64,62 @@ static int detect_memory_e820(void)
 		/* ACPI 3.0 added the extended flags support.  If bit 0
 		   in the extended flags is zero, we're supposed to simply
 		   ignore the entry -- a backwards incompatible change! */
-		if (size > 20 && !(buf.ext_flags & 1))
+		if (oreg.cx > 20 && !(buf.ext_flags & 1))
 			continue;
 
 		*desc++ = buf.std;
 		count++;
-	} while (next && count < ARRAY_SIZE(boot_params.e820_map));
+	} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
 
 	return boot_params.e820_entries = count;
 }
 
 static int detect_memory_e801(void)
 {
-	u16 ax, bx, cx, dx;
-	u8 err;
+	struct biosregs ireg, oreg;
 
-	bx = cx = dx = 0;
-	ax = 0xe801;
-	asm("stc; int $0x15; setc %0"
-	    : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx));
+	initregs(&ireg);
+	ireg.ax = 0xe801;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err)
+	if (oreg.eflags & X86_EFLAGS_CF)
 		return -1;
 
 	/* Do we really need to do this? */
-	if (cx || dx) {
-		ax = cx;
-		bx = dx;
+	if (oreg.cx || oreg.dx) {
+		oreg.ax = oreg.cx;
+		oreg.bx = oreg.dx;
 	}
 
-	if (ax > 15*1024)
+	if (oreg.ax > 15*1024) {
 		return -1;	/* Bogus! */
-
-	/* This ignores memory above 16MB if we have a memory hole
-	   there.  If someone actually finds a machine with a memory
-	   hole at 16MB and no support for 0E820h they should probably
-	   generate a fake e820 map. */
-	boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax;
+	} else if (oreg.ax == 15*1024) {
+		boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+	} else {
+		/*
+		 * This ignores memory above 16MB if we have a memory
+		 * hole there.  If someone actually finds a machine
+		 * with a memory hole at 16MB and no support for
+		 * 0E820h they should probably generate a fake e820
+		 * map.
+		 */
+		boot_params.alt_mem_k = oreg.ax;
+	}
 
 	return 0;
 }
 
 static int detect_memory_88(void)
 {
-	u16 ax;
-	u8 err;
+	struct biosregs ireg, oreg;
 
-	ax = 0x8800;
-	asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax));
+	initregs(&ireg);
+	ireg.ah = 0x88;
+	intcall(0x15, &ireg, &oreg);
 
-	boot_params.screen_info.ext_mem_k = ax;
+	boot_params.screen_info.ext_mem_k = oreg.ax;
 
-	return -err;
+	return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
 }
 
 int detect_memory(void)
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 7e8e8b2..01ec69c 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -22,24 +23,23 @@
 
 void __attribute__((section(".inittext"))) putchar(int ch)
 {
-	unsigned char c = ch;
+	struct biosregs ireg;
 
-	if (c == '\n')
+	if (ch == '\n')
 		putchar('\r');	/* \n -> \r\n */
 
-	/* int $0x10 is known to have bugs involving touching registers
-	   it shouldn't.  Be extra conservative... */
-	asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal"
-		     : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch));
+	initregs(&ireg);
+	ireg.bx = 0x0007;
+	ireg.cx = 0x0001;
+	ireg.ah = 0x0e;
+	ireg.al = ch;
+	intcall(0x10, &ireg, NULL);
 }
 
 void __attribute__((section(".inittext"))) puts(const char *str)
 {
-	int n = 0;
-	while (*str) {
+	while (*str)
 		putchar(*str++);
-		n++;
-	}
 }
 
 /*
@@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str)
 
 static u8 gettime(void)
 {
-	u16 ax = 0x0200;
-	u16 cx, dx;
+	struct biosregs ireg, oreg;
 
-	asm volatile("int $0x1a"
-		     : "+a" (ax), "=c" (cx), "=d" (dx)
-		     : : "ebx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x02;
+	intcall(0x1a, &ireg, &oreg);
 
-	return dx >> 8;
+	return oreg.dh;
 }
 
 /*
@@ -64,19 +63,24 @@ static u8 gettime(void)
  */
 int getchar(void)
 {
-	u16 ax = 0;
-	asm volatile("int $0x16" : "+a" (ax));
+	struct biosregs ireg, oreg;
+
+	initregs(&ireg);
+	/* ireg.ah = 0x00; */
+	intcall(0x16, &ireg, &oreg);
 
-	return ax & 0xff;
+	return oreg.al;
 }
 
 static int kbd_pending(void)
 {
-	u8 pending;
-	asm volatile("int $0x16; setnz %0"
-		     : "=qm" (pending)
-		     : "a" (0x0100));
-	return pending;
+	struct biosregs ireg, oreg;
+
+	initregs(&ireg);
+	ireg.ah = 0x01;
+	intcall(0x16, &ireg, &oreg);
+
+	return !(oreg.eflags & X86_EFLAGS_ZF);
 }
 
 void kbd_flush(void)
-- 
cgit v0.10.2


From d54ea252e4c92357226992cf65d94616a96e6fce Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 18:14:26 -0700
Subject: x86, setup: "glove box" BIOS interrupts in the APM code

Impact: BIOS proofing

"Glove box" off BIOS interrupts in the APM code.

LKML-Reference: <49DE7F79.4030106@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>

diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c
index 7aa6033..ee27483 100644
--- a/arch/x86/boot/apm.c
+++ b/arch/x86/boot/apm.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   Original APM BIOS checking by Stephen Rothwell, May 1994
  *   (sfr@canb.auug.org.au)
@@ -19,75 +20,56 @@
 
 int query_apm_bios(void)
 {
-	u16 ax, bx, cx, dx, di;
-	u32 ebx, esi;
-	u8 err;
+	struct biosregs ireg, oreg;
 
 	/* APM BIOS installation check */
-	ax = 0x5300;
-	bx = cx = 0;
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
-		     : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
-		     : : "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x53;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err)
+	if (oreg.flags & X86_EFLAGS_CF)
 		return -1;		/* No APM BIOS */
 
-	if (bx != 0x504d)	/* "PM" signature */
+	if (oreg.bx != 0x504d)		/* "PM" signature */
 		return -1;
 
-	if (!(cx & 0x02))		/* 32 bits supported? */
+	if (!(oreg.cx & 0x02))		/* 32 bits supported? */
 		return -1;
 
 	/* Disconnect first, just in case */
-	ax = 0x5304;
-	bx = 0;
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
-		     : "+a" (ax), "+b" (bx)
-		     : : "ecx", "edx", "esi", "edi");
-
-	/* Paranoia */
-	ebx = esi = 0;
-	cx = dx = di = 0;
+	ireg.al = 0x04;
+	intcall(0x15, &ireg, NULL);
 
 	/* 32-bit connect */
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6"
-		     : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx),
-		       "+S" (esi), "+D" (di), "=m" (err)
-		     : "a" (0x5303));
-
-	boot_params.apm_bios_info.cseg = ax;
-	boot_params.apm_bios_info.offset = ebx;
-	boot_params.apm_bios_info.cseg_16 = cx;
-	boot_params.apm_bios_info.dseg = dx;
-	boot_params.apm_bios_info.cseg_len = (u16)esi;
-	boot_params.apm_bios_info.cseg_16_len = esi >> 16;
-	boot_params.apm_bios_info.dseg_len = di;
-
-	if (err)
+	ireg.al = 0x03;
+	intcall(0x15, &ireg, &oreg);
+
+	boot_params.apm_bios_info.cseg        = oreg.ax;
+	boot_params.apm_bios_info.offset      = oreg.ebx;
+	boot_params.apm_bios_info.cseg_16     = oreg.cx;
+	boot_params.apm_bios_info.dseg        = oreg.dx;
+	boot_params.apm_bios_info.cseg_len    = oreg.si;
+	boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
+	boot_params.apm_bios_info.dseg_len    = oreg.di;
+
+	if (oreg.flags & X86_EFLAGS_CF)
 		return -1;
 
 	/* Redo the installation check as the 32-bit connect;
 	   some BIOSes return different flags this way... */
 
-	ax = 0x5300;
-	bx = cx = 0;
-	asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
-		     : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
-		     : : "esi", "edi");
+	ireg.al = 0x00;
+	intcall(0x15, &ireg, &oreg);
 
-	if (err || bx != 0x504d) {
+	if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
 		/* Failure with 32-bit connect, try to disconect and ignore */
-		ax = 0x5304;
-		bx = 0;
-		asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
-			     : "+a" (ax), "+b" (bx)
-			     : : "ecx", "edx", "esi", "edi");
+		ireg.al = 0x04;
+		intcall(0x15, &ireg, NULL);
 		return -1;
 	}
 
-	boot_params.apm_bios_info.version = ax;
-	boot_params.apm_bios_info.flags = cx;
+	boot_params.apm_bios_info.version = oreg.ax;
+	boot_params.apm_bios_info.flags   = oreg.cx;
 	return 0;
 }
 
-- 
cgit v0.10.2


From 3435d3476c5ed955d56a6216ed2d156847b3a575 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 18:17:17 -0700
Subject: x86, setup: "glove box" BIOS interrupts in the EDD code

Impact: BIOS proofing

"Glove box" off BIOS interrupts in the EDD code.

LKML-Reference: <49DE7F79.4030106@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 1aae8f3..c501a5b 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -22,17 +23,17 @@
  */
 static int read_mbr(u8 devno, void *buf)
 {
-	u16 ax, bx, cx, dx;
+	struct biosregs ireg, oreg;
 
-	ax = 0x0201;		/* Legacy Read, one sector */
-	cx = 0x0001;		/* Sector 0-0-1 */
-	dx = devno;
-	bx = (size_t)buf;
-	asm volatile("pushfl; stc; int $0x13; setc %%al; popfl"
-		     : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
-		     : : "esi", "edi", "memory");
+	initregs(&ireg);
+	ireg.ax = 0x0201;		/* Legacy Read, one sector */
+	ireg.cx = 0x0001;		/* Sector 0-0-1 */
+	ireg.dl = devno;
+	ireg.bx = (size_t)buf;
 
-	return -(u8)ax;		/* 0 or -1 */
+	intcall(0x13, &ireg, &oreg);
+
+	return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
 }
 
 static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
@@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
 
 static int get_edd_info(u8 devno, struct edd_info *ei)
 {
-	u16 ax, bx, cx, dx, di;
+	struct biosregs ireg, oreg;
 
 	memset(ei, 0, sizeof *ei);
 
 	/* Check Extensions Present */
 
-	ax = 0x4100;
-	bx = EDDMAGIC1;
-	dx = devno;
-	asm("pushfl; stc; int $0x13; setc %%al; popfl"
-	    : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx)
-	    : : "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x41;
+	ireg.bx = EDDMAGIC1;
+	ireg.dl = devno;
+	intcall(0x13, &ireg, &oreg);
 
-	if ((u8)ax)
+	if (oreg.eflags & X86_EFLAGS_CF)
 		return -1;	/* No extended information */
 
-	if (bx != EDDMAGIC2)
+	if (oreg.bx != EDDMAGIC2)
 		return -1;
 
 	ei->device  = devno;
-	ei->version = ax >> 8;	/* EDD version number */
-	ei->interface_support = cx; /* EDD functionality subsets */
+	ei->version = oreg.ah;		 /* EDD version number */
+	ei->interface_support = oreg.cx; /* EDD functionality subsets */
 
 	/* Extended Get Device Parameters */
 
 	ei->params.length = sizeof(ei->params);
-	ax = 0x4800;
-	dx = devno;
-	asm("pushfl; int $0x13; popfl"
-	    : "+a" (ax), "+d" (dx), "=m" (ei->params)
-	    : "S" (&ei->params)
-	    : "ebx", "ecx", "edi");
+	ireg.ah = 0x48;
+	ireg.si = (size_t)&ei->params;
+	intcall(0x13, &ireg, &oreg);
 
 	/* Get legacy CHS parameters */
 
 	/* Ralf Brown recommends setting ES:DI to 0:0 */
-	ax = 0x0800;
-	dx = devno;
-	di = 0;
-	asm("pushw %%es; "
-	    "movw %%di,%%es; "
-	    "pushfl; stc; int $0x13; setc %%al; popfl; "
-	    "popw %%es"
-	    : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di)
-	    : : "esi");
-
-	if ((u8)ax == 0) {
-		ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2);
-		ei->legacy_max_head = dx >> 8;
-		ei->legacy_sectors_per_track = cx & 0x3f;
+	ireg.ah = 0x08;
+	ireg.es = 0;
+	intcall(0x13, &ireg, &oreg);
+
+	if (!(oreg.eflags & X86_EFLAGS_CF)) {
+		ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
+		ei->legacy_max_head = oreg.dh;
+		ei->legacy_sectors_per_track = oreg.cl & 0x3f;
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 0a706db320768f8f6e43bbf73b58d2aabdc93354 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 18:19:00 -0700
Subject: x86, setup: "glove box" BIOS interrupts in the MCA code

Impact: BIOS proofing

"Glove box" off BIOS interrupts in the MCA code.

LKML-Reference: <49DE7F79.4030106@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c
index 911eaae..a95a531 100644
--- a/arch/x86/boot/mca.c
+++ b/arch/x86/boot/mca.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -16,26 +17,22 @@
 
 int query_mca(void)
 {
-	u8 err;
-	u16 es, bx, len;
-
-	asm("pushw %%es ; "
-	    "int $0x15 ; "
-	    "setc %0 ; "
-	    "movw %%es, %1 ; "
-	    "popw %%es"
-	    : "=acd" (err), "=acdSD" (es), "=b" (bx)
-	    : "a" (0xc000));
-
-	if (err)
+	struct biosregs ireg, oreg;
+	u16 len;
+
+	initregs(&ireg);
+	ireg.ah = 0xc0;
+	intcall(0x15, &ireg, &oreg);
+
+	if (oreg.eflags & X86_EFLAGS_CF)
 		return -1;	/* No MCA present */
 
-	set_fs(es);
-	len = rdfs16(bx);
+	set_fs(oreg.es);
+	len = rdfs16(oreg.bx);
 
 	if (len > sizeof(boot_params.sys_desc_table))
 		len = sizeof(boot_params.sys_desc_table);
 
-	copy_from_fs(&boot_params.sys_desc_table, bx, len);
+	copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
 	return 0;
 }
-- 
cgit v0.10.2


From cf06de7b9cdd3efee7a59dced1977b3c21d43732 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 1 Apr 2009 18:20:11 -0700
Subject: x86, setup: "glove box" BIOS interrupts in the video code

Impact: BIOS proofing

"Glove box" off BIOS interrupts in the video code.

LKML-Reference: <49DE7F79.4030106@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index 3fa979c..d660be4 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi)
 
 static int set_bios_mode(u8 mode)
 {
-	u16 ax;
+	struct biosregs ireg, oreg;
 	u8 new_mode;
 
-	ax = mode;		/* AH=0x00 Set Video Mode */
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
+	initregs(&ireg);
+	ireg.al = mode;		/* AH=0x00 Set Video Mode */
+	intcall(0x10, &ireg, NULL);
 
-	ax = 0x0f00;		/* Get Current Video Mode */
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
+
+	ireg.ah = 0x0f;		/* Get Current Video Mode */
+	intcall(0x10, &ireg, &oreg);
 
 	do_restore = 1;		/* Assume video contents were lost */
-	new_mode = ax & 0x7f;	/* Not all BIOSes are clean with the top bit */
+
+	/* Not all BIOSes are clean with the top bit */
+	new_mode = ireg.al & 0x7f;
 
 	if (new_mode == mode)
 		return 0;	/* Mode change OK */
@@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode)
 		/* Mode setting failed, but we didn't end up where we
 		   started.  That's bad.  Try to revert to the original
 		   video mode. */
-		ax = boot_params.screen_info.orig_video_mode;
-		asm volatile(INT10
-			     : "+a" (ax)
-			     : : "ebx", "ecx", "edx", "esi", "edi");
+		ireg.ax = boot_params.screen_info.orig_video_mode;
+		intcall(0x10, &ireg, NULL);
 	}
 #endif
 	return -1;
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 4a58c8c..c700147 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {}
 static int vesa_probe(void)
 {
 #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
-	u16 ax, cx, di;
+	struct biosregs ireg, oreg;
 	u16 mode;
 	addr_t mode_ptr;
 	struct mode_info *mi;
@@ -39,13 +40,12 @@ static int vesa_probe(void)
 
 	video_vesa.modes = GET_HEAP(struct mode_info, 0);
 
-	ax = 0x4f00;
-	di = (size_t)&vginfo;
-	asm(INT10
-	    : "+a" (ax), "+D" (di), "=m" (vginfo)
-	    : : "ebx", "ecx", "edx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f00;
+	ireg.di = (size_t)&vginfo;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f ||
+	if (ireg.ax != 0x004f ||
 	    vginfo.signature != VESA_MAGIC ||
 	    vginfo.version < 0x0102)
 		return 0;	/* Not present */
@@ -65,14 +65,12 @@ static int vesa_probe(void)
 
 		memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
 
-		ax = 0x4f01;
-		cx = mode;
-		di = (size_t)&vminfo;
-		asm(INT10
-		    : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
-		    : : "ebx", "edx", "esi");
+		ireg.ax = 0x4f01;
+		ireg.cx = mode;
+		ireg.di = (size_t)&vminfo;
+		intcall(0x10, &ireg, &oreg);
 
-		if (ax != 0x004f)
+		if (ireg.ax != 0x004f)
 			continue;
 
 		if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -111,20 +109,19 @@ static int vesa_probe(void)
 
 static int vesa_set_mode(struct mode_info *mode)
 {
-	u16 ax, bx, cx, di;
+	struct biosregs ireg, oreg;
 	int is_graphic;
 	u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
 
 	memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
 
-	ax = 0x4f01;
-	cx = vesa_mode;
-	di = (size_t)&vminfo;
-	asm(INT10
-	    : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
-	    : : "ebx", "edx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f01;
+	ireg.cx = vesa_mode;
+	ireg.di = (size_t)&vminfo;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return -1;
 
 	if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode)
 	}
 
 
-	ax = 0x4f02;
-	bx = vesa_mode;
-	di = 0;
-	asm volatile(INT10
-		     : "+a" (ax), "+b" (bx), "+D" (di)
-		     : : "ecx", "edx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f02;
+	ireg.bx = vesa_mode;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return -1;
 
 	graphic_mode = is_graphic;
@@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode)
 /* Switch DAC to 8-bit mode */
 static void vesa_dac_set_8bits(void)
 {
+	struct biosregs ireg, oreg;
 	u8 dac_size = 6;
 
 	/* If possible, switch the DAC to 8-bit mode */
 	if (vginfo.capabilities & 1) {
-		u16 ax, bx;
-
-		ax = 0x4f08;
-		bx = 0x0800;
-		asm volatile(INT10
-			     : "+a" (ax), "+b" (bx)
-			     : : "ecx", "edx", "esi", "edi");
-
-		if (ax == 0x004f)
-			dac_size = bx >> 8;
+		initregs(&ireg);
+		ireg.ax = 0x4f08;
+		ireg.bh = 0x08;
+		intcall(0x10, &ireg, &oreg);
+		if (oreg.ax == 0x004f)
+			dac_size = oreg.bh;
 	}
 
 	/* Set the color sizes to the DAC size, and offsets to 0 */
-	boot_params.screen_info.red_size = dac_size;
+	boot_params.screen_info.red_size   = dac_size;
 	boot_params.screen_info.green_size = dac_size;
-	boot_params.screen_info.blue_size = dac_size;
-	boot_params.screen_info.rsvd_size = dac_size;
+	boot_params.screen_info.blue_size  = dac_size;
+	boot_params.screen_info.rsvd_size  = dac_size;
 
-	boot_params.screen_info.red_pos = 0;
-	boot_params.screen_info.green_pos = 0;
-	boot_params.screen_info.blue_pos = 0;
-	boot_params.screen_info.rsvd_pos = 0;
+	boot_params.screen_info.red_pos    = 0;
+	boot_params.screen_info.green_pos  = 0;
+	boot_params.screen_info.blue_pos   = 0;
+	boot_params.screen_info.rsvd_pos   = 0;
 }
 
 /* Save the VESA protected mode info */
 static void vesa_store_pm_info(void)
 {
-	u16 ax, bx, di, es;
+	struct biosregs ireg, oreg;
 
-	ax = 0x4f0a;
-	bx = di = 0;
-	asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es"
-	    : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di)
-	    : : "ecx", "esi");
+	initregs(&ireg);
+	ireg.ax = 0x4f0a;
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return;
 
-	boot_params.screen_info.vesapm_seg = es;
-	boot_params.screen_info.vesapm_off = di;
+	boot_params.screen_info.vesapm_seg = oreg.es;
+	boot_params.screen_info.vesapm_off = oreg.di;
 }
 
 /*
@@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void)
 void vesa_store_edid(void)
 {
 #ifdef CONFIG_FIRMWARE_EDID
-	u16 ax, bx, cx, dx, di;
+	struct biosregs ireg, oreg;
 
 	/* Apparently used as a nonsense token... */
 	memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
@@ -260,33 +250,26 @@ void vesa_store_edid(void)
 	if (vginfo.version < 0x0200)
 		return;		/* EDID requires VBE 2.0+ */
 
-	ax = 0x4f15;		/* VBE DDC */
-	bx = 0x0000;		/* Report DDC capabilities */
-	cx = 0;			/* Controller 0 */
-	di = 0;			/* ES:DI must be 0 by spec */
-
-	/* Note: The VBE DDC spec is different from the main VESA spec;
-	   we genuinely have to assume all registers are destroyed here. */
-
-	asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
-	    : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di)
-	    : : "esi", "edx");
+	initregs(&ireg);
+	ireg.ax = 0x4f15;		/* VBE DDC */
+	/* ireg.bx = 0x0000; */		/* Report DDC capabilities */
+	/* ireg.cx = 0;	*/		/* Controller 0 */
+	ireg.es = 0;			/* ES:DI must be 0 by spec */
+	intcall(0x10, &ireg, &oreg);
 
-	if (ax != 0x004f)
+	if (oreg.ax != 0x004f)
 		return;		/* No EDID */
 
 	/* BH = time in seconds to transfer EDD information */
 	/* BL = DDC level supported */
 
-	ax = 0x4f15;		/* VBE DDC */
-	bx = 0x0001;		/* Read EDID */
-	cx = 0;			/* Controller 0 */
-	dx = 0;			/* EDID block number */
-	di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */
-	asm(INT10
-	    : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info),
-	      "+c" (cx), "+D" (di)
-	    : : "esi");
+	ireg.ax = 0x4f15;		/* VBE DDC */
+	ireg.bx = 0x0001;		/* Read EDID */
+	/* ireg.cx = 0; */		/* Controller 0 */
+	/* ireg.dx = 0;	*/		/* EDID block number */
+	ireg.es = ds();
+	ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
+	intcall(0x10, &ireg, &oreg);
 #endif /* CONFIG_FIRMWARE_EDID */
 }
 
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 9e0587a..8f8d827 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -39,30 +40,30 @@ static __videocard video_vga;
 /* Set basic 80x25 mode */
 static u8 vga_set_basic_mode(void)
 {
+	struct biosregs ireg, oreg;
 	u16 ax;
 	u8 rows;
 	u8 mode;
 
+	initregs(&ireg);
+
 #ifdef CONFIG_VIDEO_400_HACK
 	if (adapter >= ADAPTER_VGA) {
-		asm volatile(INT10
-			     : : "a" (0x1202), "b" (0x0030)
-			     : "ecx", "edx", "esi", "edi");
+		ireg.ax = 0x1202;
+		ireg.bx = 0x0030;
+		intcall(0x10, &ireg, NULL);
 	}
 #endif
 
 	ax = 0x0f00;
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
-
-	mode = (u8)ax;
+	intcall(0x10, &ireg, &oreg);
+	mode = oreg.al;
 
 	set_fs(0);
 	rows = rdfs8(0x484);	/* rows minus one */
 
 #ifndef CONFIG_VIDEO_400_HACK
-	if ((ax == 0x5003 || ax == 0x5007) &&
+	if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
 	    (rows == 0 || rows == 24))
 		return mode;
 #endif
@@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void)
 		mode = 3;
 
 	/* Set the mode */
-	ax = mode;
-	asm volatile(INT10
-		     : "+a" (ax)
-		     : : "ebx", "ecx", "edx", "esi", "edi");
+	ireg.ax = mode;		/* AH=0: set mode */
+	intcall(0x10, &ireg, NULL);
 	do_restore = 1;
 	return mode;
 }
@@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void)
 static void vga_set_8font(void)
 {
 	/* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
+	struct biosregs ireg;
+
+	initregs(&ireg);
 
 	/* Set 8x8 font */
-	asm volatile(INT10 : : "a" (0x1112), "b" (0));
+	ireg.ax = 0x1112;
+	/* ireg.bl = 0; */
+	intcall(0x10, &ireg, NULL);
 
 	/* Use alternate print screen */
-	asm volatile(INT10 : : "a" (0x1200), "b" (0x20));
+	ireg.ax = 0x1200;
+	ireg.bl = 0x20;
+	intcall(0x10, &ireg, NULL);
 
 	/* Turn off cursor emulation */
-	asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+	ireg.ax = 0x1201;
+	ireg.bl = 0x34;
+	intcall(0x10, &ireg, NULL);
 
 	/* Cursor is scan lines 6-7 */
-	asm volatile(INT10 : : "a" (0x0100), "c" (0x0607));
+	ireg.ax = 0x0100;
+	ireg.cx = 0x0607;
+	intcall(0x10, &ireg, NULL);
 }
 
 static void vga_set_14font(void)
 {
 	/* Set 9x14 font - 80x28 on VGA */
+	struct biosregs ireg;
+
+	initregs(&ireg);
 
 	/* Set 9x14 font */
-	asm volatile(INT10 : : "a" (0x1111), "b" (0));
+	ireg.ax = 0x1111;
+	/* ireg.bl = 0; */
+	intcall(0x10, &ireg, NULL);
 
 	/* Turn off cursor emulation */
-	asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+	ireg.ax = 0x1201;
+	ireg.bl = 0x34;
+	intcall(0x10, &ireg, NULL);
 
 	/* Cursor is scan lines 11-12 */
-	asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c));
+	ireg.ax = 0x0100;
+	ireg.cx = 0x0b0c;
+	intcall(0x10, &ireg, NULL);
 }
 
 static void vga_set_80x43(void)
 {
 	/* Set 80x43 mode on VGA (not EGA) */
+	struct biosregs ireg;
+
+	initregs(&ireg);
 
 	/* Set 350 scans */
-	asm volatile(INT10 : : "a" (0x1201), "b" (0x30));
+	ireg.ax = 0x1201;
+	ireg.bl = 0x30;
+	intcall(0x10, &ireg, NULL);
 
 	/* Reset video mode */
-	asm volatile(INT10 : : "a" (0x0003));
+	ireg.ax = 0x0003;
+	intcall(0x10, &ireg, NULL);
 
 	vga_set_8font();
 }
@@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode)
  */
 static int vga_probe(void)
 {
-	u16 ega_bx;
-
 	static const char *card_name[] = {
 		"CGA/MDA/HGC", "EGA", "VGA"
 	};
@@ -240,26 +263,26 @@ static int vga_probe(void)
 		sizeof(ega_modes)/sizeof(struct mode_info),
 		sizeof(vga_modes)/sizeof(struct mode_info),
 	};
-	u8 vga_flag;
 
-	asm(INT10
-	    : "=b" (ega_bx)
-	    : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
-	    : "ecx", "edx", "esi", "edi");
+	struct biosregs ireg, oreg;
+
+	initregs(&ireg);
+
+	ireg.ax = 0x1200;
+	ireg.bl = 0x10;		/* Check EGA/VGA */
+	intcall(0x10, &ireg, &oreg);
 
 #ifndef _WAKEUP
-	boot_params.screen_info.orig_video_ega_bx = ega_bx;
+	boot_params.screen_info.orig_video_ega_bx = oreg.bx;
 #endif
 
 	/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
-	if ((u8)ega_bx != 0x10) {
+	if (oreg.bl != 0x10) {
 		/* EGA/VGA */
-		asm(INT10
-		    : "=a" (vga_flag)
-		    : "a" (0x1a00)
-		    : "ebx", "ecx", "edx", "esi", "edi");
+		ireg.ax = 0x1a00;
+		intcall(0x10, &ireg, &oreg);
 
-		if (vga_flag == 0x1a) {
+		if (oreg.al == 0x1a) {
 			adapter = ADAPTER_VGA;
 #ifndef _WAKEUP
 			boot_params.screen_info.orig_video_isVGA = 1;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 3bef2c1..bad728b 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -18,33 +19,29 @@
 
 static void store_cursor_position(void)
 {
-	u16 curpos;
-	u16 ax, bx;
+	struct biosregs ireg, oreg;
 
-	ax = 0x0300;
-	bx = 0;
-	asm(INT10
-	    : "=d" (curpos), "+a" (ax), "+b" (bx)
-	    : : "ecx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x03;
+	intcall(0x10, &ireg, &oreg);
 
-	boot_params.screen_info.orig_x = curpos;
-	boot_params.screen_info.orig_y = curpos >> 8;
+	boot_params.screen_info.orig_x = oreg.dl;
+	boot_params.screen_info.orig_y = oreg.dh;
 }
 
 static void store_video_mode(void)
 {
-	u16 ax, page;
+	struct biosregs ireg, oreg;
 
 	/* N.B.: the saving of the video page here is a bit silly,
 	   since we pretty much assume page 0 everywhere. */
-	ax = 0x0f00;
-	asm(INT10
-	    : "+a" (ax), "=b" (page)
-	    : : "ecx", "edx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x0f;
+	intcall(0x10, &ireg, &oreg);
 
 	/* Not all BIOSes are clean with respect to the top bit */
-	boot_params.screen_info.orig_video_mode = ax & 0x7f;
-	boot_params.screen_info.orig_video_page = page >> 8;
+	boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
+	boot_params.screen_info.orig_video_page = oreg.bh;
 }
 
 /*
@@ -257,7 +254,7 @@ static void restore_screen(void)
 	int y;
 	addr_t dst = 0;
 	u16 *src = saved.data;
-	u16 ax, bx, dx;
+	struct biosregs ireg;
 
 	if (graphic_mode)
 		return;		/* Can't restore onto a graphic mode */
@@ -296,12 +293,11 @@ static void restore_screen(void)
 	}
 
 	/* Restore cursor position */
-	ax = 0x0200;		/* Set cursor position */
-	bx = 0;			/* Page number (<< 8) */
-	dx = (saved.cury << 8)+saved.curx;
-	asm volatile(INT10
-		     : "+a" (ax), "+b" (bx), "+d" (dx)
-		     : : "ecx", "esi", "edi");
+	initregs(&ireg);
+	ireg.ah = 0x02;		/* Set cursor position */
+	ireg.dh = saved.cury;
+	ireg.dl = saved.curx;
+	intcall(0x10, &ireg, NULL);
 }
 #else
 #define save_screen()		((void)0)
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index ee63f5d..5bb174a 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -112,20 +112,6 @@ extern int force_x, force_y;	/* Don't query the BIOS for cols/rows */
 extern int do_restore;		/* Restore screen contents */
 extern int graphic_mode;	/* Graphics mode with linear frame buffer */
 
-/*
- * int $0x10 is notorious for touching registers it shouldn't.
- * gcc doesn't like %ebp being clobbered, so define it as a push/pop
- * sequence here.
- *
- * A number of systems, including the original PC can clobber %bp in
- * certain circumstances, like when scrolling.  There exists at least
- * one Trident video card which could clobber DS under a set of
- * circumstances that we are unlikely to encounter (scrolling when
- * using an extended graphics mode of more than 800x600 pixels), but
- * it's cheap insurance to deal with that here.
- */
-#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp"
-
 /* Accessing VGA indexed registers */
 static inline u8 in_idx(u16 port, u8 index)
 {
-- 
cgit v0.10.2


From 2062501ae6505dbc5bff3a792246c2661d114050 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 6 Apr 2009 01:49:33 +0200
Subject: tracing/lockdep: report the time waited for a lock

While trying to optimize the new lock on reiserfs to replace
the bkl, I find the lock tracing very useful though it lacks
something important for performance (and latency) instrumentation:
the time a task waits for a lock.

That's what this patch implements:

  bash-4816  [000]   202.652815: lock_contended: lock_contended: &sb->s_type->i_mutex_key
  bash-4816  [000]   202.652819: lock_acquired: &rq->lock (0.000 us)
 <...>-4787  [000]   202.652825: lock_acquired: &rq->lock (0.000 us)
 <...>-4787  [000]   202.652829: lock_acquired: &rq->lock (0.000 us)
  bash-4816  [000]   202.652833: lock_acquired: &sb->s_type->i_mutex_key (16.005 us)

As shown above, the "lock acquired" field is followed by the time
it has been waiting for the lock. Usually, a lock contended entry
is followed by a near lock_acquired entry with a non-zero time waited.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1238975373-15739-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
index adccfcd..863f1e4 100644
--- a/include/trace/lockdep_event_types.h
+++ b/include/trace/lockdep_event_types.h
@@ -32,11 +32,24 @@ TRACE_FORMAT(lock_contended,
 	TP_FMT("%s", lock->name)
 	);
 
-TRACE_FORMAT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__entry->name = lock->name;
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
 
 #endif
 #endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b0f0118..c4582a6 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3061,6 +3061,8 @@ found_it:
 	put_lock_stats(stats);
 }
 
+DEFINE_TRACE(lock_acquired);
+
 static void
 __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
@@ -3099,6 +3101,8 @@ found_it:
 		hlock->holdtime_stamp = now;
 	}
 
+	trace_lock_acquired(lock, ip, waittime);
+
 	stats = get_lock_stats(hlock_class(hlock));
 	if (waittime) {
 		if (hlock->read)
@@ -3137,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-DEFINE_TRACE(lock_acquired);
-
 void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_acquired(lock, ip);
-
 	if (unlikely(!lock_stat))
 		return;
 
-- 
cgit v0.10.2


From e71e99c294058a61b7a8b9bb6da2f745ac51aa4f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 25 Mar 2009 14:30:04 -0400
Subject: x86, function-graph: only save return values on x86_64

Impact: speed up

The return to handler portion of the function graph tracer should only
need to save the return values. The caller already saved off the
registers that the callee can modify. The returning function already
saved the registers it modified. When we call our own trace function
it too will save the registers that the callee must restore.

There's no reason to save off anything more that the registers used
to return the values.

Note, I did a complete kernel build with this modification and the
function graph tracer running on x86_64.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a331ec3..1ac9986 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -147,27 +147,14 @@ END(ftrace_graph_caller)
 GLOBAL(return_to_handler)
 	subq  $80, %rsp
 
+	/* Save the return values */
 	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-	movq %r10, 56(%rsp)
-	movq %r11, 64(%rsp)
+	movq %rdx, 8(%rsp)
 
 	call ftrace_return_to_handler
 
 	movq %rax, 72(%rsp)
-	movq 64(%rsp), %r11
-	movq 56(%rsp), %r10
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
+	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $72, %rsp
 	retq
-- 
cgit v0.10.2


From 5cb3d1d9d34ac04bcaa2034139345b2a5fea54c1 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Thu, 9 Apr 2009 14:08:18 +0800
Subject: tracing, net, skb tracepoint: make skb tracepoint use the
 TRACE_EVENT() macro

TRACE_EVENT is a more generic way to define a tracepoint.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Steven Rostedt ;" <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49DD90D2.5020604@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/skb.h b/include/trace/skb.h
index b66206d..d2de717 100644
--- a/include/trace/skb.h
+++ b/include/trace/skb.h
@@ -4,8 +4,6 @@
 #include <linux/skbuff.h>
 #include <linux/tracepoint.h>
 
-DECLARE_TRACE(kfree_skb,
-	TP_PROTO(struct sk_buff *skb, void *location),
-	TP_ARGS(skb, location));
+#include <trace/skb_event_types.h>
 
 #endif
diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h
new file mode 100644
index 0000000..4a1c504
--- /dev/null
+++ b/include/trace/skb_event_types.h
@@ -0,0 +1,38 @@
+
+/* use <trace/skb.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
index df56f56..33b6bfc 100644
--- a/include/trace/trace_event_types.h
+++ b/include/trace/trace_event_types.h
@@ -3,3 +3,4 @@
 #include <trace/sched_event_types.h>
 #include <trace/irq_event_types.h>
 #include <trace/lockdep_event_types.h>
+#include <trace/skb_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index fd13750..0e2aa80 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -3,3 +3,4 @@
 #include <trace/sched.h>
 #include <trace/irq.h>
 #include <trace/lockdep.h>
+#include <trace/skb.h>
-- 
cgit v0.10.2


From bda869c614c937c318547c3ee1d65a316b693c21 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 9 Apr 2009 15:05:10 +0200
Subject: x86: cacheinfo: use L3 cache index disable feature only for CPUs that
 support it

AMD family 0x11 CPU doesn't support the feature.

Some AMD family 0x10 CPUs do not support it or have an erratum, see
erratum #382 in "Revision Guide for AMD Family 10h Processors, 41322
Rev. 3.40 February 2009".

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
CC: Mark Langsdorf <mark.langsdorf@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090409130510.GG31527@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 483eda9..7240126 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -291,6 +291,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 {
 	if (index < 3)
 		return;
+
+	if (boot_cpu_data.x86 == 0x11)
+		return;
+
+	/* see erratum #382 */
+	if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+		return;
+
 	this_leaf->can_disable = 1;
 }
 
-- 
cgit v0.10.2


From 845d8c761ec763871936c62b837c4a9ea6d0fbdb Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 9 Apr 2009 15:07:29 +0200
Subject: x86: cacheinfo: correct return value when cache_disable feature is
 not active

Impact: bug fix

If user writes to "cache_disable" attribute on a CPU that does not support
this feature, the process hangs due to an invalid return value in
store_cache_disable().

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Langsdorf <mark.langsdorf@amd.com>
LKML-Reference: <20090409130729.GH31527@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7240126..1ab46e0 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -771,7 +771,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
 	unsigned int ret, index, val;
 
 	if (!this_leaf->can_disable)
-		return 0;
+		return -EINVAL;
 
 	if (strlen(buf) > 15)
 		return -EINVAL;
-- 
cgit v0.10.2


From afd9fceec55225d33be878927056a548c2eef26c Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 9 Apr 2009 15:16:17 +0200
Subject: x86: cacheinfo: use cached K8 NB_MISC devices instead of scanning for
 it

Impact: avoid code duplication

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Langsdorf <mark.langsdorf@amd.com>
LKML-Reference: <20090409131617.GI31527@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index 54c8cc5..c23b3d1 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -12,4 +12,12 @@ extern int cache_k8_northbridges(void);
 extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
+#ifdef CONFIG_K8_NB
+#define node_to_k8_nb_misc(node) \
+	(node < num_k8_northbridges) ? k8_northbridges[node] : NULL
+#else
+#define node_to_k8_nb_misc(node) NULL
+#endif
+
+
 #endif /* _ASM_X86_K8_H */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1ab46e0..0cde071 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,6 +17,7 @@
 
 #include <asm/processor.h>
 #include <asm/smp.h>
+#include <asm/k8.h>
 
 #define LVL_1_INST	1
 #define LVL_1_DATA	2
@@ -159,14 +160,6 @@ struct _cpuid4_info_regs {
 	unsigned long can_disable;
 };
 
-#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
-static struct pci_device_id k8_nb_id[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
-	{}
-};
-#endif
-
 unsigned short			num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -704,30 +697,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
 #define to_object(k)	container_of(k, struct _index_kobject, kobj)
 #define to_attr(a)	container_of(a, struct _cache_attr, attr)
 
-#ifdef CONFIG_PCI
-static struct pci_dev *get_k8_northbridge(int node)
-{
-	struct pci_dev *dev = NULL;
-	int i;
-
-	for (i = 0; i <= node; i++) {
-		do {
-			dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
-			if (!dev)
-				break;
-		} while (!pci_match_id(&k8_nb_id[0], dev));
-		if (!dev)
-			break;
-	}
-	return dev;
-}
-#else
-static struct pci_dev *get_k8_northbridge(int node)
-{
-	return NULL;
-}
-#endif
-
 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
 {
 	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
@@ -739,7 +708,7 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
 	if (!this_leaf->can_disable)
 		return sprintf(buf, "Feature not enabled\n");
 
-	dev = get_k8_northbridge(node);
+	dev = node_to_k8_nb_misc(node);
 	if (!dev) {
 		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
 		return -EINVAL;
@@ -783,7 +752,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
 		return -EINVAL;
 
 	val |= 0xc0000000;
-	dev = get_k8_northbridge(node);
+	dev = node_to_k8_nb_misc(node);
 	if (!dev) {
 		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
 		return -EINVAL;
-- 
cgit v0.10.2


From f8b201fc7110c3673437254e8ba02451461ece0b Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Thu, 9 Apr 2009 15:18:49 +0200
Subject: x86: cacheinfo: replace sysfs interface for cache_disable feature

Impact: replace sysfs attribute

Current interface violates against "one-value-per-sysfs-attribute
rule". This patch replaces current attribute with two attributes --
one for each L3 Cache Index Disable register.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090409131849.GJ31527@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0cde071..fc28291 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -697,73 +697,69 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
 #define to_object(k)	container_of(k, struct _index_kobject, kobj)
 #define to_attr(a)	container_of(a, struct _cache_attr, attr)
 
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+				  unsigned int index)
 {
-	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-	int node = cpu_to_node(cpumask_first(mask));
-	struct pci_dev *dev = NULL;
-	ssize_t ret = 0;
-	int i;
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = cpu_to_node(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned int reg = 0;
 
 	if (!this_leaf->can_disable)
-		return sprintf(buf, "Feature not enabled\n");
-
-	dev = node_to_k8_nb_misc(node);
-	if (!dev) {
-		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
 		return -EINVAL;
-	}
 
-	for (i = 0; i < 2; i++) {
-		unsigned int reg;
+	if (!dev)
+		return -EINVAL;
 
-		pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+	pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+	return sprintf(buf, "%x\n", reg);
+}
 
-		ret += sprintf(buf, "%sEntry: %d\n", buf, i);
-		ret += sprintf(buf, "%sReads:  %s\tNew Entries: %s\n",  
-			buf,
-			reg & 0x80000000 ? "Disabled" : "Allowed",
-			reg & 0x40000000 ? "Disabled" : "Allowed");
-		ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
-			buf, (reg & 0x30000) >> 16, reg & 0xfff);
-	}
-	return ret;
+#define SHOW_CACHE_DISABLE(index)					\
+static ssize_t								\
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)  	\
+{									\
+	return show_cache_disable(this_leaf, buf, index);		\
 }
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
 
-static ssize_t
-store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
-		    size_t count)
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
+	const char *buf, size_t count, unsigned int index)
 {
-	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-	int node = cpu_to_node(cpumask_first(mask));
-	struct pci_dev *dev = NULL;
-	unsigned int ret, index, val;
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = cpu_to_node(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned long val = 0;
 
 	if (!this_leaf->can_disable)
 		return -EINVAL;
 
-	if (strlen(buf) > 15)
-		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
 
-	ret = sscanf(buf, "%x %x", &index, &val);
-	if (ret != 2)
-		return -EINVAL;
-	if (index > 1)
+	if (!dev)
 		return -EINVAL;
 
-	val |= 0xc0000000;
-	dev = node_to_k8_nb_misc(node);
-	if (!dev) {
-		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+	if (strict_strtoul(buf, 10, &val) < 0)
 		return -EINVAL;
-	}
 
+	val |= 0xc0000000;
 	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
 	wbinvd();
 	pci_write_config_dword(dev, 0x1BC + index * 4, val);
+	return count;
+}
 
-	return 1;
+#define STORE_CACHE_DISABLE(index)					\
+static ssize_t								\
+store_cache_disable_##index(struct _cpuid4_info *this_leaf,	     	\
+			    const char *buf, size_t count)		\
+{									\
+	return store_cache_disable(this_leaf, buf, count, index);	\
 }
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
 
 struct _cache_attr {
 	struct attribute attr;
@@ -785,7 +781,10 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+		show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+		show_cache_disable_1, store_cache_disable_1);
 
 static struct attribute * default_attrs[] = {
 	&type.attr,
@@ -797,7 +796,8 @@ static struct attribute * default_attrs[] = {
 	&size.attr,
 	&shared_cpu_map.attr,
 	&shared_cpu_list.attr,
-	&cache_disable.attr,
+	&cache_disable_0.attr,
+	&cache_disable_1.attr,
 	NULL
 };
 
-- 
cgit v0.10.2


From ba518bea2db21c72d44a6cbfd825b026ef9cdcb6 Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Thu, 9 Apr 2009 15:24:06 +0200
Subject: x86: cacheinfo: disable L3 ECC scrubbing when L3 cache index is
 disabled

(Use correct mask to zero out bits 24-28 by Andreas)

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090409132406.GK31527@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fc28291..d46a849 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -731,6 +731,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 	int node = cpu_to_node(cpu);
 	struct pci_dev *dev = node_to_k8_nb_misc(node);
 	unsigned long val = 0;
+	unsigned int scrubber = 0;
 
 	if (!this_leaf->can_disable)
 		return -EINVAL;
@@ -745,6 +746,11 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 		return -EINVAL;
 
 	val |= 0xc0000000;
+
+	pci_read_config_dword(dev, 0x58, &scrubber);
+	scrubber &= ~0x1f000000;
+	pci_write_config_dword(dev, 0x58, scrubber);
+
 	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
 	wbinvd();
 	pci_write_config_dword(dev, 0x1BC + index * 4, val);
-- 
cgit v0.10.2


From 2fad2d9bb8310889f3261035b594b4e068b6eb8b Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Thu, 9 Apr 2009 15:31:53 +0200
Subject: x86/docs: add description for cache_disable sysfs interface

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090409133153.GL31527@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable
new file mode 100644
index 0000000..175bb4f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable
@@ -0,0 +1,18 @@
+What:      /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date:      August 2008
+KernelVersion:	2.6.27
+Contact:	mark.langsdorf@amd.com
+Description:	These files exist in every cpu's cache index directories.
+		There are currently 2 cache_disable_# files in each
+		directory.  Reading from these files on a supported
+		processor will return that cache disable index value
+		for that processor and node.  Writing to one of these
+		files will cause the specificed cache index to be disabled.
+
+		Currently, only AMD Family 10h Processors support cache index
+		disable, and only for their L3 caches.  See the BIOS and
+		Kernel Developer's Guide at
+		http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
+		for formatting information and other details on the
+		cache index disable.
+Users:    joachim.deguara@amd.com
-- 
cgit v0.10.2


From f465145235313c451164bdfa9037ac254bf00c9a Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:18 +0300
Subject: x86: move x86_quirk_pre_intr_init() to irqinit_32.c

Impact: cleanup

In preparation for unifying irqinit_{32,64}.c, make
x86_quirk_pre_intr_init() local to irqinit_32.c.

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1a99e6c..58d7091 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip;
 extern void mask_8259A(void);
 extern void unmask_8259A(void);
 
-#ifdef CONFIG_X86_32
-extern void init_ISA_irqs(void);
-#endif
-
 #endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index bdc2ada..4093d1e 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -33,7 +33,6 @@ struct x86_quirks {
 	int (*setup_ioapic_ids)(void);
 };
 
-extern void x86_quirk_pre_intr_init(void);
 extern void x86_quirk_intr_init(void);
 
 extern void x86_quirk_trap_init(void);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 368b0a8..0c0dedc 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -53,7 +53,7 @@ static struct irqaction fpu_irq = {
 	.name = "fpu",
 };
 
-void __init init_ISA_irqs(void)
+static void __init init_ISA_irqs(void)
 {
 	int i;
 
@@ -121,6 +121,24 @@ int vector_used_by_percpu_irq(unsigned int vector)
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *	Perform any necessary interrupt initialisation prior to setting up
+ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *	interrupts should be initialised here if the machine emulates a PC
+ *	in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+	if (x86_quirks->arch_pre_intr_init) {
+		if (x86_quirks->arch_pre_intr_init())
+			return;
+	}
+	init_ISA_irqs();
+}
+
 void __init native_init_IRQ(void)
 {
 	int i;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b415843..523bb69 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -997,24 +997,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_X86_32
 
 /**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-
-/**
  * x86_quirk_intr_init - post gate setup interrupt initialisation
  *
  * Description:
-- 
cgit v0.10.2


From 7371d9fcb88dc9185be9719f64744a339c537a92 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:19 +0300
Subject: x86: move init_ISA_irqs() in irqinit_32.c to match ordering in
 irqinit_64.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 0c0dedc..c5cb769 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -53,30 +53,6 @@ static struct irqaction fpu_irq = {
 	.name = "fpu",
 };
 
-static void __init init_ISA_irqs(void)
-{
-	int i;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	init_bsp_APIC();
-#endif
-	init_8259A(0);
-
-	/*
-	 * 16 old-style INTA-cycle interrupts:
-	 */
-	for (i = 0; i < NR_IRQS_LEGACY; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = NULL;
-		desc->depth = 1;
-
-		set_irq_chip_and_handler_name(i, &i8259A_chip,
-					      handle_level_irq, "XT");
-	}
-}
-
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
  */
@@ -118,6 +94,30 @@ int vector_used_by_percpu_irq(unsigned int vector)
 	return 0;
 }
 
+static void __init init_ISA_irqs(void)
+{
+	int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	init_bsp_APIC();
+#endif
+	init_8259A(0);
+
+	/*
+	 * 16 old-style INTA-cycle interrupts:
+	 */
+	for (i = 0; i < NR_IRQS_LEGACY; i++) {
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
+		set_irq_chip_and_handler_name(i, &i8259A_chip,
+					      handle_level_irq, "XT");
+	}
+}
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
-- 
cgit v0.10.2


From 36290d87f5abf260a543e5b711be4ceed03e6b1a Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:20 +0300
Subject: x86: introduce smp_intr_init() in irqinit_32.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index c5cb769..df0aad5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -121,6 +121,38 @@ static void __init init_ISA_irqs(void)
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
+static void __init smp_intr_init(void)
+{
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+	/*
+	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+	 * IPI, driven by wakeup.
+	 */
+	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+	/* IPIs for invalidation */
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+
+	/* IPI for generic function call */
+	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for single call function */
+	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				 call_function_single_interrupt);
+
+	/* Low priority IPI to cleanup after moving an irq */
+	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+#endif
+}
+
 /**
  * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
  *
@@ -158,34 +190,7 @@ void __init native_init_IRQ(void)
 	}
 
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
-	/*
-	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-	 * IPI, driven by wakeup.
-	 */
-	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-	/* IPIs for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
-	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-	/* IPI for single call function */
-	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-				 call_function_single_interrupt);
-
-	/* Low priority IPI to cleanup after moving an irq */
-	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-#endif
+	smp_intr_init();
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
-- 
cgit v0.10.2


From 22813c45228160b07244a7c4ed7580388ac0f33d Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:21 +0300
Subject: x86: introduce apic_intr_init() in irqinit_32.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index df0aad5..9ba68c4 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -171,25 +171,8 @@ static void __init x86_quirk_pre_intr_init(void)
 	init_ISA_irqs();
 }
 
-void __init native_init_IRQ(void)
+static void __init apic_intr_init(void)
 {
-	int i;
-
-	/* Execute any quirks before the call gates are initialised: */
-	x86_quirk_pre_intr_init();
-
-	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
-	 */
-	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-	}
-
-
 	smp_intr_init();
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -208,6 +191,27 @@ void __init native_init_IRQ(void)
 	/* thermal monitor LVT interrupt */
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
+}
+
+void __init native_init_IRQ(void)
+{
+	int i;
+
+	/* Execute any quirks before the call gates are initialised: */
+	x86_quirk_pre_intr_init();
+
+	/*
+	 * Cover the whole vector space, no vector can escape
+	 * us. (some of these will be overridden and become
+	 * 'special' SMP interrupts)
+	 */
+	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+		/* SYSCALL_VECTOR was reserved in trap_init. */
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+	}
+
+	apic_intr_init();
 
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
-- 
cgit v0.10.2


From d3496c85cae22fb7713af6ed542a6aeae8ee4210 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:22 +0300
Subject: x86: use identical loop constructs in 32-bit and 64-bit
 native_init_IRQ()

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9ba68c4..1029a18 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -205,7 +205,7 @@ void __init native_init_IRQ(void)
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
-	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* SYSCALL_VECTOR was reserved in trap_init. */
 		if (i != SYSCALL_VECTOR)
 			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8cd1053..1c8858b 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -159,15 +159,16 @@ void __init native_init_IRQ(void)
 	int i;
 
 	init_ISA_irqs();
+
 	/*
 	 * Cover the whole vector space, no vector can escape
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
-	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
-		if (vector != IA32_SYSCALL_VECTOR)
-			set_intr_gate(vector, interrupt[i]);
+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
+		if (i != IA32_SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
 	}
 
 	apic_intr_init();
-- 
cgit v0.10.2


From b0096bb0b640d0a7713618b3472fd0f4adf30a96 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:23 +0300
Subject: x86: unify smp_intr_init() in irqinit_{32,64}.h

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1029a18..ef2528d 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -123,7 +123,8 @@ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
 static void __init smp_intr_init(void)
 {
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -143,14 +144,15 @@ static void __init smp_intr_init(void)
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
-	/* IPI for single call function */
+	/* IPI for generic single function call */
 	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-				 call_function_single_interrupt);
+			call_function_single_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
+#endif /* CONFIG_SMP */
 }
 
 /**
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 1c8858b..9e7c57d 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -107,6 +107,7 @@ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -134,6 +135,7 @@ static void __init smp_intr_init(void)
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
+#endif /* CONFIG_SMP */
 }
 
 static void __init apic_intr_init(void)
-- 
cgit v0.10.2


From 598c73d250ffb112715aa48fb325d79e255be23b Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:24 +0300
Subject: x86: unify init_ISA_irqs() in irqinit_{32,64}.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index ef2528d..4488b71 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -98,7 +98,7 @@ static void __init init_ISA_irqs(void)
 {
 	int i;
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	init_bsp_APIC();
 #endif
 	init_8259A(0);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 9e7c57d..61c9a92 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -84,9 +84,14 @@ static void __init init_ISA_irqs(void)
 {
 	int i;
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	init_bsp_APIC();
+#endif
 	init_8259A(0);
 
+	/*
+	 * 16 old-style INTA-cycle interrupts:
+	 */
 	for (i = 0; i < NR_IRQS_LEGACY; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
 
@@ -94,11 +99,8 @@ static void __init init_ISA_irqs(void)
 		desc->action = NULL;
 		desc->depth = 1;
 
-		/*
-		 * 16 old-style INTA-cycle interrupts:
-		 */
 		set_irq_chip_and_handler_name(i, &i8259A_chip,
-						      handle_level_irq, "XT");
+					      handle_level_irq, "XT");
 	}
 }
 
-- 
cgit v0.10.2


From 320fd99672a44ece6d1cd0d838ba31c8ebbf5979 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:25 +0300
Subject: x86: unify native_init_IRQ() in irqinit_{32,64}.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 4488b71..a780de3 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -22,7 +22,7 @@
 #include <asm/i8259.h>
 #include <asm/traps.h>
 
-
+#ifdef CONFIG_X86_32
 /*
  * Note that on a 486, we don't want to do a SIGFPE on an irq13
  * as the irq is unreliable, and exception 16 works correctly
@@ -52,6 +52,7 @@ static struct irqaction fpu_irq = {
 	.handler = math_error_irq,
 	.name = "fpu",
 };
+#endif
 
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
@@ -155,24 +156,6 @@ static void __init smp_intr_init(void)
 #endif /* CONFIG_SMP */
 }
 
-/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-static void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-
 static void __init apic_intr_init(void)
 {
 	smp_intr_init();
@@ -195,12 +178,36 @@ static void __init apic_intr_init(void)
 #endif
 }
 
+#ifdef CONFIG_X86_32
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *	Perform any necessary interrupt initialisation prior to setting up
+ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *	interrupts should be initialised here if the machine emulates a PC
+ *	in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+	if (x86_quirks->arch_pre_intr_init) {
+		if (x86_quirks->arch_pre_intr_init())
+			return;
+	}
+	init_ISA_irqs();
+}
+#endif
+
 void __init native_init_IRQ(void)
 {
 	int i;
 
+#ifdef CONFIG_X86_32
 	/* Execute any quirks before the call gates are initialised: */
 	x86_quirk_pre_intr_init();
+#else
+	init_ISA_irqs();
+#endif
 
 	/*
 	 * Cover the whole vector space, no vector can escape
@@ -208,9 +215,15 @@ void __init native_init_IRQ(void)
 	 * 'special' SMP interrupts)
 	 */
 	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+#ifdef CONFIG_X86_32
 		/* SYSCALL_VECTOR was reserved in trap_init. */
 		if (i != SYSCALL_VECTOR)
 			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+#else
+		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
+		if (i != IA32_SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+#endif
 	}
 
 	apic_intr_init();
@@ -218,6 +231,7 @@ void __init native_init_IRQ(void)
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
 
+#ifdef CONFIG_X86_32
 	/*
 	 * Call quirks after call gates are initialised (usually add in
 	 * the architecture specific gates):
@@ -232,4 +246,5 @@ void __init native_init_IRQ(void)
 		setup_irq(FPU_IRQ, &fpu_irq);
 
 	irq_ctx_init(smp_processor_id());
+#endif
 }
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 61c9a92..ed50e35 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -39,14 +39,46 @@
  * (these are usually mapped into the 0x30-0xff vector range)
  */
 
+#ifdef CONFIG_X86_32
 /*
- * IRQ2 is cascade interrupt to second interrupt controller
+ * Note that on a 486, we don't want to do a SIGFPE on an irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+
+static irqreturn_t math_error_irq(int cpl, void *dev_id)
+{
+	outb(0, 0xF0);
+	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
+		return IRQ_NONE;
+	math_error((void __user *)get_irq_regs()->ip);
+	return IRQ_HANDLED;
+}
+
+/*
+ * New motherboards sometimes make IRQ 13 be a PCI interrupt,
+ * so allow interrupt sharing.
  */
+static struct irqaction fpu_irq = {
+	.handler = math_error_irq,
+	.name = "fpu",
+};
+#endif
 
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
 static struct irqaction irq2 = {
 	.handler = no_action,
 	.name = "cascade",
 };
+
 DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[0 ... IRQ0_VECTOR - 1] = -1,
 	[IRQ0_VECTOR] = 0,
@@ -158,11 +190,36 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 }
 
+#ifdef CONFIG_X86_32
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *	Perform any necessary interrupt initialisation prior to setting up
+ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *	interrupts should be initialised here if the machine emulates a PC
+ *	in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+	if (x86_quirks->arch_pre_intr_init) {
+		if (x86_quirks->arch_pre_intr_init())
+			return;
+	}
+	init_ISA_irqs();
+}
+#endif
+
 void __init native_init_IRQ(void)
 {
 	int i;
 
+#ifdef CONFIG_X86_32
+	/* Execute any quirks before the call gates are initialised: */
+	x86_quirk_pre_intr_init();
+#else
 	init_ISA_irqs();
+#endif
 
 	/*
 	 * Cover the whole vector space, no vector can escape
@@ -170,13 +227,36 @@ void __init native_init_IRQ(void)
 	 * 'special' SMP interrupts)
 	 */
 	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+#ifdef CONFIG_X86_32
+		/* SYSCALL_VECTOR was reserved in trap_init. */
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+#else
 		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
 		if (i != IA32_SYSCALL_VECTOR)
 			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+#endif
 	}
 
 	apic_intr_init();
 
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Call quirks after call gates are initialised (usually add in
+	 * the architecture specific gates):
+	 */
+	x86_quirk_intr_init();
+
+	/*
+	 * External FPU? Set up irq13 if so, for
+	 * original braindamaged IBM FERR coupling.
+	 */
+	if (boot_cpu_data.hard_math && !cpu_has_fpu)
+		setup_irq(FPU_IRQ, &fpu_irq);
+
+	irq_ctx_init(smp_processor_id());
+#endif
 }
-- 
cgit v0.10.2


From 778838600eb6973bdb6fd11e7f91b43cea4d6f45 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:26 +0300
Subject: x86: unify trivial differences in irqinit_{32,64}.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index a780de3..72ce942 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -1,20 +1,24 @@
+#include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/acpi.h>
 #include <linux/io.h>
 #include <linux/delay.h>
 
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/timer.h>
+#include <asm/hw_irq.h>
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
@@ -22,6 +26,22 @@
 #include <asm/i8259.h>
 #include <asm/traps.h>
 
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+
 #ifdef CONFIG_X86_32
 /*
  * Note that on a 486, we don't want to do a SIGFPE on an irq13
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ed50e35..687b6c3 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -17,11 +17,14 @@
 
 #include <asm/atomic.h>
 #include <asm/system.h>
+#include <asm/timer.h>
 #include <asm/hw_irq.h>
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
+#include <asm/setup.h>
 #include <asm/i8259.h>
+#include <asm/traps.h>
 
 /*
  * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -136,6 +139,7 @@ static void __init init_ISA_irqs(void)
 	}
 }
 
+/* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
 static void __init smp_intr_init(void)
-- 
cgit v0.10.2


From ab19c25abd14db28d7454f00805ea59f22ed6057 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:27 +0300
Subject: x86: unify apic_intr_init() in irqinit_{32,64}.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 72ce942..f3be5e9 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -180,7 +180,12 @@ static void __init apic_intr_init(void)
 {
 	smp_intr_init();
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_X86_64
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -192,10 +197,12 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 #endif
 
+#ifdef CONFIG_X86_32
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
 	/* thermal monitor LVT interrupt */
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
+#endif
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 687b6c3..f3be5e9 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -180,9 +180,12 @@ static void __init apic_intr_init(void)
 {
 	smp_intr_init();
 
+#ifdef CONFIG_X86_64
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -192,6 +195,14 @@ static void __init apic_intr_init(void)
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+	/* thermal monitor LVT interrupt */
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+#endif
+#endif
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v0.10.2


From 31cb45ef2600d47191d51253ec94b5e3f689260d Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:28 +0300
Subject: x86: unify irqinit_{32,64}.c into irqinit.c

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 145cce7..16e3acf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,7 +28,7 @@ CFLAGS_paravirt.o	:= $(nostackp)
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y			+= setup.o i8259.o irqinit_$(BITS).o
+obj-y			+= setup.o i8259.o irqinit.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
new file mode 100644
index 0000000..f3be5e9
--- /dev/null
+++ b/arch/x86/kernel/irqinit.c
@@ -0,0 +1,277 @@
+#include <linux/linkage.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/bitops.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/timer.h>
+#include <asm/hw_irq.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+#include <asm/i8259.h>
+#include <asm/traps.h>
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+
+#ifdef CONFIG_X86_32
+/*
+ * Note that on a 486, we don't want to do a SIGFPE on an irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+
+static irqreturn_t math_error_irq(int cpl, void *dev_id)
+{
+	outb(0, 0xF0);
+	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
+		return IRQ_NONE;
+	math_error((void __user *)get_irq_regs()->ip);
+	return IRQ_HANDLED;
+}
+
+/*
+ * New motherboards sometimes make IRQ 13 be a PCI interrupt,
+ * so allow interrupt sharing.
+ */
+static struct irqaction fpu_irq = {
+	.handler = math_error_irq,
+	.name = "fpu",
+};
+#endif
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = {
+	.handler = no_action,
+	.name = "cascade",
+};
+
+DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+	[0 ... IRQ0_VECTOR - 1] = -1,
+	[IRQ0_VECTOR] = 0,
+	[IRQ1_VECTOR] = 1,
+	[IRQ2_VECTOR] = 2,
+	[IRQ3_VECTOR] = 3,
+	[IRQ4_VECTOR] = 4,
+	[IRQ5_VECTOR] = 5,
+	[IRQ6_VECTOR] = 6,
+	[IRQ7_VECTOR] = 7,
+	[IRQ8_VECTOR] = 8,
+	[IRQ9_VECTOR] = 9,
+	[IRQ10_VECTOR] = 10,
+	[IRQ11_VECTOR] = 11,
+	[IRQ12_VECTOR] = 12,
+	[IRQ13_VECTOR] = 13,
+	[IRQ14_VECTOR] = 14,
+	[IRQ15_VECTOR] = 15,
+	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+};
+
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void __init init_ISA_irqs(void)
+{
+	int i;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	init_bsp_APIC();
+#endif
+	init_8259A(0);
+
+	/*
+	 * 16 old-style INTA-cycle interrupts:
+	 */
+	for (i = 0; i < NR_IRQS_LEGACY; i++) {
+		struct irq_desc *desc = irq_to_desc(i);
+
+		desc->status = IRQ_DISABLED;
+		desc->action = NULL;
+		desc->depth = 1;
+
+		set_irq_chip_and_handler_name(i, &i8259A_chip,
+					      handle_level_irq, "XT");
+	}
+}
+
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+static void __init smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	/*
+	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+	 * IPI, driven by wakeup.
+	 */
+	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+	/* IPIs for invalidation */
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+
+	/* IPI for generic function call */
+	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for generic single function call */
+	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+			call_function_single_interrupt);
+
+	/* Low priority IPI to cleanup after moving an irq */
+	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+#endif
+#endif /* CONFIG_SMP */
+}
+
+static void __init apic_intr_init(void)
+{
+	smp_intr_init();
+
+#ifdef CONFIG_X86_64
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	/* self generated IPI for local APIC timer */
+	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+	/* generic IPI for platform specific use */
+	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+
+	/* IPI vectors for APIC spurious and error interrupts */
+	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+	/* thermal monitor LVT interrupt */
+	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+#endif
+#endif
+}
+
+#ifdef CONFIG_X86_32
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *	Perform any necessary interrupt initialisation prior to setting up
+ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *	interrupts should be initialised here if the machine emulates a PC
+ *	in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+	if (x86_quirks->arch_pre_intr_init) {
+		if (x86_quirks->arch_pre_intr_init())
+			return;
+	}
+	init_ISA_irqs();
+}
+#endif
+
+void __init native_init_IRQ(void)
+{
+	int i;
+
+#ifdef CONFIG_X86_32
+	/* Execute any quirks before the call gates are initialised: */
+	x86_quirk_pre_intr_init();
+#else
+	init_ISA_irqs();
+#endif
+
+	/*
+	 * Cover the whole vector space, no vector can escape
+	 * us. (some of these will be overridden and become
+	 * 'special' SMP interrupts)
+	 */
+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+#ifdef CONFIG_X86_32
+		/* SYSCALL_VECTOR was reserved in trap_init. */
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+#else
+		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
+		if (i != IA32_SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+#endif
+	}
+
+	apic_intr_init();
+
+	if (!acpi_ioapic)
+		setup_irq(2, &irq2);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Call quirks after call gates are initialised (usually add in
+	 * the architecture specific gates):
+	 */
+	x86_quirk_intr_init();
+
+	/*
+	 * External FPU? Set up irq13 if so, for
+	 * original braindamaged IBM FERR coupling.
+	 */
+	if (boot_cpu_data.hard_math && !cpu_has_fpu)
+		setup_irq(FPU_IRQ, &fpu_irq);
+
+	irq_ctx_init(smp_processor_id());
+#endif
+}
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
deleted file mode 100644
index f3be5e9..0000000
--- a/arch/x86/kernel/irqinit_32.c
+++ /dev/null
@@ -1,277 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/timer.h>
-#include <asm/hw_irq.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-#include <asm/setup.h>
-#include <asm/i8259.h>
-#include <asm/traps.h>
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
-
-#ifdef CONFIG_X86_32
-/*
- * Note that on a 486, we don't want to do a SIGFPE on an irq13
- * as the irq is unreliable, and exception 16 works correctly
- * (ie as explained in the intel literature). On a 386, you
- * can't use exception 16 due to bad IBM design, so we have to
- * rely on the less exact irq13.
- *
- * Careful.. Not only is IRQ13 unreliable, but it is also
- * leads to races. IBM designers who came up with it should
- * be shot.
- */
-
-static irqreturn_t math_error_irq(int cpl, void *dev_id)
-{
-	outb(0, 0xF0);
-	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
-		return IRQ_NONE;
-	math_error((void __user *)get_irq_regs()->ip);
-	return IRQ_HANDLED;
-}
-
-/*
- * New motherboards sometimes make IRQ 13 be a PCI interrupt,
- * so allow interrupt sharing.
- */
-static struct irqaction fpu_irq = {
-	.handler = math_error_irq,
-	.name = "fpu",
-};
-#endif
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
-	.handler = no_action,
-	.name = "cascade",
-};
-
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
-	[0 ... IRQ0_VECTOR - 1] = -1,
-	[IRQ0_VECTOR] = 0,
-	[IRQ1_VECTOR] = 1,
-	[IRQ2_VECTOR] = 2,
-	[IRQ3_VECTOR] = 3,
-	[IRQ4_VECTOR] = 4,
-	[IRQ5_VECTOR] = 5,
-	[IRQ6_VECTOR] = 6,
-	[IRQ7_VECTOR] = 7,
-	[IRQ8_VECTOR] = 8,
-	[IRQ9_VECTOR] = 9,
-	[IRQ10_VECTOR] = 10,
-	[IRQ11_VECTOR] = 11,
-	[IRQ12_VECTOR] = 12,
-	[IRQ13_VECTOR] = 13,
-	[IRQ14_VECTOR] = 14,
-	[IRQ15_VECTOR] = 15,
-	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
-};
-
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (per_cpu(vector_irq, cpu)[vector] != -1)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void __init init_ISA_irqs(void)
-{
-	int i;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	init_bsp_APIC();
-#endif
-	init_8259A(0);
-
-	/*
-	 * 16 old-style INTA-cycle interrupts:
-	 */
-	for (i = 0; i < NR_IRQS_LEGACY; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = NULL;
-		desc->depth = 1;
-
-		set_irq_chip_and_handler_name(i, &i8259A_chip,
-					      handle_level_irq, "XT");
-	}
-}
-
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	/*
-	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-	 * IPI, driven by wakeup.
-	 */
-	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-	/* IPIs for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
-	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-	/* IPI for generic single function call */
-	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-			call_function_single_interrupt);
-
-	/* Low priority IPI to cleanup after moving an irq */
-	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-#endif
-#endif /* CONFIG_SMP */
-}
-
-static void __init apic_intr_init(void)
-{
-	smp_intr_init();
-
-#ifdef CONFIG_X86_64
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-#endif
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	/* self generated IPI for local APIC timer */
-	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-	/* generic IPI for platform specific use */
-	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
-
-	/* IPI vectors for APIC spurious and error interrupts */
-	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-#endif
-
-#ifdef CONFIG_X86_32
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
-	/* thermal monitor LVT interrupt */
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-#endif
-}
-
-#ifdef CONFIG_X86_32
-/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-static void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-#endif
-
-void __init native_init_IRQ(void)
-{
-	int i;
-
-#ifdef CONFIG_X86_32
-	/* Execute any quirks before the call gates are initialised: */
-	x86_quirk_pre_intr_init();
-#else
-	init_ISA_irqs();
-#endif
-
-	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
-	 */
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-#ifdef CONFIG_X86_32
-		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-#else
-		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != IA32_SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-#endif
-	}
-
-	apic_intr_init();
-
-	if (!acpi_ioapic)
-		setup_irq(2, &irq2);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Call quirks after call gates are initialised (usually add in
-	 * the architecture specific gates):
-	 */
-	x86_quirk_intr_init();
-
-	/*
-	 * External FPU? Set up irq13 if so, for
-	 * original braindamaged IBM FERR coupling.
-	 */
-	if (boot_cpu_data.hard_math && !cpu_has_fpu)
-		setup_irq(FPU_IRQ, &fpu_irq);
-
-	irq_ctx_init(smp_processor_id());
-#endif
-}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
deleted file mode 100644
index f3be5e9..0000000
--- a/arch/x86/kernel/irqinit_64.c
+++ /dev/null
@@ -1,277 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/timer.h>
-#include <asm/hw_irq.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-#include <asm/setup.h>
-#include <asm/i8259.h>
-#include <asm/traps.h>
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
-
-#ifdef CONFIG_X86_32
-/*
- * Note that on a 486, we don't want to do a SIGFPE on an irq13
- * as the irq is unreliable, and exception 16 works correctly
- * (ie as explained in the intel literature). On a 386, you
- * can't use exception 16 due to bad IBM design, so we have to
- * rely on the less exact irq13.
- *
- * Careful.. Not only is IRQ13 unreliable, but it is also
- * leads to races. IBM designers who came up with it should
- * be shot.
- */
-
-static irqreturn_t math_error_irq(int cpl, void *dev_id)
-{
-	outb(0, 0xF0);
-	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
-		return IRQ_NONE;
-	math_error((void __user *)get_irq_regs()->ip);
-	return IRQ_HANDLED;
-}
-
-/*
- * New motherboards sometimes make IRQ 13 be a PCI interrupt,
- * so allow interrupt sharing.
- */
-static struct irqaction fpu_irq = {
-	.handler = math_error_irq,
-	.name = "fpu",
-};
-#endif
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
-	.handler = no_action,
-	.name = "cascade",
-};
-
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
-	[0 ... IRQ0_VECTOR - 1] = -1,
-	[IRQ0_VECTOR] = 0,
-	[IRQ1_VECTOR] = 1,
-	[IRQ2_VECTOR] = 2,
-	[IRQ3_VECTOR] = 3,
-	[IRQ4_VECTOR] = 4,
-	[IRQ5_VECTOR] = 5,
-	[IRQ6_VECTOR] = 6,
-	[IRQ7_VECTOR] = 7,
-	[IRQ8_VECTOR] = 8,
-	[IRQ9_VECTOR] = 9,
-	[IRQ10_VECTOR] = 10,
-	[IRQ11_VECTOR] = 11,
-	[IRQ12_VECTOR] = 12,
-	[IRQ13_VECTOR] = 13,
-	[IRQ14_VECTOR] = 14,
-	[IRQ15_VECTOR] = 15,
-	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
-};
-
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (per_cpu(vector_irq, cpu)[vector] != -1)
-			return 1;
-	}
-
-	return 0;
-}
-
-static void __init init_ISA_irqs(void)
-{
-	int i;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	init_bsp_APIC();
-#endif
-	init_8259A(0);
-
-	/*
-	 * 16 old-style INTA-cycle interrupts:
-	 */
-	for (i = 0; i < NR_IRQS_LEGACY; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = NULL;
-		desc->depth = 1;
-
-		set_irq_chip_and_handler_name(i, &i8259A_chip,
-					      handle_level_irq, "XT");
-	}
-}
-
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	/*
-	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-	 * IPI, driven by wakeup.
-	 */
-	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-	/* IPIs for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
-	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-	/* IPI for generic single function call */
-	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-			call_function_single_interrupt);
-
-	/* Low priority IPI to cleanup after moving an irq */
-	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-#endif
-#endif /* CONFIG_SMP */
-}
-
-static void __init apic_intr_init(void)
-{
-	smp_intr_init();
-
-#ifdef CONFIG_X86_64
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-#endif
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	/* self generated IPI for local APIC timer */
-	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-	/* generic IPI for platform specific use */
-	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
-
-	/* IPI vectors for APIC spurious and error interrupts */
-	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-#endif
-
-#ifdef CONFIG_X86_32
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
-	/* thermal monitor LVT interrupt */
-	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-#endif
-}
-
-#ifdef CONFIG_X86_32
-/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-static void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-#endif
-
-void __init native_init_IRQ(void)
-{
-	int i;
-
-#ifdef CONFIG_X86_32
-	/* Execute any quirks before the call gates are initialised: */
-	x86_quirk_pre_intr_init();
-#else
-	init_ISA_irqs();
-#endif
-
-	/*
-	 * Cover the whole vector space, no vector can escape
-	 * us. (some of these will be overridden and become
-	 * 'special' SMP interrupts)
-	 */
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-#ifdef CONFIG_X86_32
-		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-#else
-		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != IA32_SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-#endif
-	}
-
-	apic_intr_init();
-
-	if (!acpi_ioapic)
-		setup_irq(2, &irq2);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Call quirks after call gates are initialised (usually add in
-	 * the architecture specific gates):
-	 */
-	x86_quirk_intr_init();
-
-	/*
-	 * External FPU? Set up irq13 if so, for
-	 * original braindamaged IBM FERR coupling.
-	 */
-	if (boot_cpu_data.hard_math && !cpu_has_fpu)
-		setup_irq(FPU_IRQ, &fpu_irq);
-
-	irq_ctx_init(smp_processor_id());
-#endif
-}
-- 
cgit v0.10.2


From ac3048dfd4740becf8d768844cf47ebee363c9f8 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:29 +0300
Subject: x86: define IA32_SYSCALL_VECTOR on 32-bit to reduce ifdefs

Impact: cleanup

We can remove some #ifdefs if we define IA32_SYSCALL_VECTOR on 32-bit.

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79b..910b5a3 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -34,6 +34,7 @@
 
 #ifdef CONFIG_X86_32
 # define SYSCALL_VECTOR			0x80
+# define IA32_SYSCALL_VECTOR		0x80
 #else
 # define IA32_SYSCALL_VECTOR		0x80
 #endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f3be5e9..f2c60a5 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -242,15 +242,9 @@ void __init native_init_IRQ(void)
 	 * 'special' SMP interrupts)
 	 */
 	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-#ifdef CONFIG_X86_32
-		/* SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-#else
 		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
 		if (i != IA32_SYSCALL_VECTOR)
 			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
-#endif
 	}
 
 	apic_intr_init();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d2883..2310700 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -969,11 +969,8 @@ void __init trap_init(void)
 	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 		set_bit(i, used_vectors);
 
-#ifdef CONFIG_X86_64
 	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
-	set_bit(SYSCALL_VECTOR, used_vectors);
-#endif
+
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
-- 
cgit v0.10.2


From abdb5a5713330e17dfe91ab0d3e29c4744d95162 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 9 Apr 2009 11:52:30 +0300
Subject: x86: remove some ifdefs from native_init_IRQ()

Impact: cleanup

Reviewed-by Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f2c60a5..6269772 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -205,7 +205,6 @@ static void __init apic_intr_init(void)
 #endif
 }
 
-#ifdef CONFIG_X86_32
 /**
  * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
  *
@@ -217,24 +216,21 @@ static void __init apic_intr_init(void)
  **/
 static void __init x86_quirk_pre_intr_init(void)
 {
+#ifdef CONFIG_X86_32
 	if (x86_quirks->arch_pre_intr_init) {
 		if (x86_quirks->arch_pre_intr_init())
 			return;
 	}
+#endif
 	init_ISA_irqs();
 }
-#endif
 
 void __init native_init_IRQ(void)
 {
 	int i;
 
-#ifdef CONFIG_X86_32
 	/* Execute any quirks before the call gates are initialised: */
 	x86_quirk_pre_intr_init();
-#else
-	init_ISA_irqs();
-#endif
 
 	/*
 	 * Cover the whole vector space, no vector can escape
-- 
cgit v0.10.2


From 6265ff19ca08df0d96c859ae5e4dc2d9ad07070e Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 9 Apr 2009 15:47:10 +0200
Subject: x86: cacheinfo: complete L2/L3 Cache and TLB associativity field
 definitions

See "CPUID Specification" (AMD Publication #: 25481, Rev. 2.28, April 2008)

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Mark Langsdorf <mark.langsdorf@amd.com>
LKML-Reference: <20090409134710.GA8026@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index d46a849..789efe2 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -200,10 +200,17 @@ union l3_cache {
 };
 
 static const unsigned short __cpuinitconst assocs[] = {
-	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
-	[8] = 16, [0xa] = 32, [0xb] = 48,
+	[1] = 1,
+	[2] = 2,
+	[4] = 4,
+	[6] = 8,
+	[8] = 16,
+	[0xa] = 32,
+	[0xb] = 48,
 	[0xc] = 64,
-	[0xf] = 0xffff // ??
+	[0xd] = 96,
+	[0xe] = 128,
+	[0xf] = 0xffff /* fully associative - no way to show this currently */
 };
 
 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
@@ -264,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 	eax->split.type = types[leaf];
 	eax->split.level = levels[leaf];
 	if (leaf == 3)
-		eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+		eax->split.num_threads_sharing =
+			current_cpu_data.x86_max_cores - 1;
 	else
 		eax->split.num_threads_sharing = 0;
 	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
-- 
cgit v0.10.2


From 47f16ca7631f9c6bad8e6d968cfb1433029b09ec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 10 Apr 2009 14:58:05 +0200
Subject: x86, irqinit: preempt merge conflicts

To make the topic merge life easier for tip:perfcounters/core,
include two (inactive in this topic) IRQ vector initializations
here.

Also fix build bug - missing kprobes.h inclusion.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 6269772..b424c32 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -7,6 +7,7 @@
 #include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
+#include <linux/kprobes.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
@@ -195,6 +196,13 @@ static void __init apic_intr_init(void)
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+	/* Performance monitoring interrupts: */
+# ifdef CONFIG_PERF_COUNTERS
+	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+# endif
+
 #endif
 
 #ifdef CONFIG_X86_32
-- 
cgit v0.10.2


From a5a2a0c7fa039c59619bc908b3b1ed24734d442a Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 10 Apr 2009 09:50:05 -0700
Subject: futex: fix futex_wait_setup key handling

If the get_futex_key() call were to fail, the existing code would
try and put_futex_key() prior to returning.  This patch makes sure
we only put_futex_key() if get_futex_key() succeeded.

Reported-by: Clark Williams <williams@redhat.com>
Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
LKML-Reference: <20090410165005.14342.16973.stgit@Aeon>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 041bf3a..6d2daa4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1668,7 +1668,7 @@ retry:
 	q->key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q->key);
 	if (unlikely(ret != 0))
-		goto out;
+		return ret;
 
 retry_private:
 	*hb = queue_lock(q);
-- 
cgit v0.10.2


From 295c01f90db2b8b8394e1f7cde59b96d71f98e07 Mon Sep 17 00:00:00 2001
From: Christian Hohnstaedt <chohnstaedt@innominate.com>
Date: Sun, 12 Apr 2009 13:01:44 +0800
Subject: crypto: ixp4xx - check firmware for crypto support

 - the loaded firmware may not support crypto at all or
   only support DES and 3DES but not AES or
   support DES, 3DES and AES.

 - in case of no crypto support of the firmware, the module load will fail.
 - in case of missing AES support, the AES algorithms are not registered
   and a warning is printed during module load.

Signed-off-by: Christian Hohnstaedt <chohnstaedt@innominate.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index af9761c..9224c1f 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -415,6 +415,7 @@ static void crypto_done_action(unsigned long arg)
 static int init_ixp_crypto(void)
 {
 	int ret = -ENODEV;
+	u32 msg[2] = { 0, 0 };
 
 	if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH |
 				IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) {
@@ -426,9 +427,35 @@ static int init_ixp_crypto(void)
 		return ret;
 
 	if (!npe_running(npe_c)) {
-		npe_load_firmware(npe_c, npe_name(npe_c), dev);
+		ret = npe_load_firmware(npe_c, npe_name(npe_c), dev);
+		if (ret) {
+			return ret;
+		}
+		if (npe_recv_message(npe_c, msg, "STATUS_MSG"))
+			goto npe_error;
+	} else {
+		if (npe_send_message(npe_c, msg, "STATUS_MSG"))
+			goto npe_error;
+
+		if (npe_recv_message(npe_c, msg, "STATUS_MSG"))
+			goto npe_error;
 	}
 
+	switch ((msg[1]>>16) & 0xff) {
+	case 3:
+		printk(KERN_WARNING "Firmware of %s lacks AES support\n",
+				npe_name(npe_c));
+		support_aes = 0;
+		break;
+	case 4:
+	case 5:
+		support_aes = 1;
+		break;
+	default:
+		printk(KERN_ERR "Firmware of %s lacks crypto support\n",
+			npe_name(npe_c));
+		return -ENODEV;
+	}
 	/* buffer_pool will also be used to sometimes store the hmac,
 	 * so assure it is large enough
 	 */
@@ -459,6 +486,10 @@ static int init_ixp_crypto(void)
 
 	qmgr_enable_irq(RECV_QID);
 	return 0;
+
+npe_error:
+	printk(KERN_ERR "%s not responding\n", npe_name(npe_c));
+	ret = -EIO;
 err:
 	if (ctx_pool)
 		dma_pool_destroy(ctx_pool);
-- 
cgit v0.10.2


From cf9972a921470b0a2da7906104bcd540b20e33bf Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sat, 11 Apr 2009 22:24:05 -0700
Subject: x86, setup: fix comment in the "glove box" code

Impact: Comment change only

The glove box is about avoiding problems with *registers* being
touched, not *memory*.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 22b4b3e..5077937 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -10,7 +10,7 @@
 
 /*
  * "Glove box" for BIOS calls.  Avoids the constant problems with BIOSes
- * touching memory they shouldn't be.
+ * touching registers they shouldn't be.
  */
 
 	.code16
-- 
cgit v0.10.2


From f4c1724f3437ac70d8330968379148c954ca34c7 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Sun, 12 Apr 2009 05:04:43 +0400
Subject: ASoC: n810: replace BUG() with BUG_ON()

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index a6d1178..e54e1c2 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -383,10 +383,9 @@ static int __init n810_soc_init(void)
 	clk_set_parent(sys_clkout2_src, func96m_clk);
 	clk_set_rate(sys_clkout2, 12000000);
 
-	if (gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0)
-		BUG();
-	if (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0)
-		BUG();
+	BUG_ON((gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0) ||
+	       (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0));
+
 	gpio_direction_output(N810_HEADSET_AMP_GPIO, 0);
 	gpio_direction_output(N810_SPEAKER_AMP_GPIO, 0);
 
-- 
cgit v0.10.2


From 2de1f33e99cec5fd79542a1d0e26efb9c36a98bb Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 11 Apr 2009 12:55:26 +0530
Subject: x86: apic/x2apic_cluster.c x86_cpu_to_logical_apicid should be static

Impact: reduce kernel size a bit, address sparse warning

Addresses the problem pointed out by this sparse warning:
  arch/x86/kernel/apic/x2apic_cluster.c:13:1: warning: symbol 'per_cpu__x86_cpu_to_logical_apicid' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <1239434726.4418.24.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 4a903e2..8e4cbb2 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -10,7 +10,7 @@
 #include <asm/apic.h>
 #include <asm/ipi.h>
 
-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-- 
cgit v0.10.2


From 56c49951747f250d8398582509e02ae5ce1d36d1 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 11 Apr 2009 15:51:19 -0400
Subject: tracing: Add documentation for the power tracer

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
LKML-Reference: <1239479479-2603-4-git-send-email-tytso@mit.edu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 0000000..cd805e1
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
+The power tracer collects detailed information about C-state and P-state
+transitions, instead of just looking at the high-level "average"
+information.
+
+There is a helper script found in scrips/tracing/power.pl in the kernel
+sources which can be used to parse this information and create a
+Scalable Vector Graphics (SVG) picture from the trace data.
+
+To use this tracer:
+
+	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+	echo power > /sys/kernel/debug/tracing/current_tracer
+	echo 1 > /sys/kernel/debug/tracing/tracing_enabled
+	sleep 1
+	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+	cat /sys/kernel/debug/tracing/trace | \
+		perl scripts/tracing/power.pl > out.sv
-- 
cgit v0.10.2


From abd41443ac76d3e9c29a8c1d9e9a3312306cc55e Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 11 Apr 2009 15:51:18 -0400
Subject: tracing: Document the event tracing system

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1239479479-2603-3-git-send-email-tytso@mit.edu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 0000000..abdee66
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,135 @@
+			     Event Tracing
+
+		Documentation written by Theodore Ts'o
+
+Introduction
+============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used
+without creating custom kernel modules to register probe functions
+using the event tracing infrastructure.
+
+Not all tracepoints can be traced using the event tracing system;
+the kernel developer must provide code snippets which define how the
+tracing information is saved into the tracing buffer, and how the
+the tracing information should be printed.
+
+Using Event Tracing
+===================
+
+The events which are available for tracing can be found in the file
+/sys/kernel/debug/tracing/available_events.
+
+To enable a particular event, such as 'sched_wakeup', simply echo it
+to /sys/debug/tracing/set_event. For example:
+
+	# echo sched_wakeup > /sys/kernel/debug/tracing/set_event
+
+[ Note: events can also be enabled/disabled via the 'enabled' toggle
+  found in the /sys/kernel/tracing/events/ hierarchy of directories. ]
+
+To disable an event, echo the event name to the set_event file prefixed
+with an exclamation point:
+
+	# echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event
+
+To disable events, echo an empty line to the set_event file:
+
+	# echo > /sys/kernel/debug/tracing/set_event
+
+The events are organized into subsystems, such as ext4, irq, sched,
+etc., and a full event name looks like this: <subsystem>:<event>.  The
+subsystem name is optional, but it is displayed in the available_events
+file.  All of the events in a subsystem can be specified via the syntax
+"<subsystem>:*"; for example, to enable all irq events, you can use the
+command:
+
+	# echo 'irq:*' > /sys/kernel/debug/tracing/set_event
+
+Defining an event-enabled tracepoint
+------------------------------------
+
+A kernel developer which wishes to define an event-enabled tracepoint
+must declare the tracepoint using TRACE_EVENT instead of DECLARE_TRACE.
+This is done via two header files in include/trace.  For example, to
+event-enable the jbd2 subsystem, we must create two files,
+include/trace/jbd2.h and include/trace/jbd2_event_types.h.  The
+include/trace/jbd2.h file should be included by kernel source files that
+will have a tracepoint inserted, and might look like this:
+
+#ifndef _TRACE_JBD2_H
+#define _TRACE_JBD2_H
+
+#include <linux/jbd2.h>
+#include <linux/tracepoint.h>
+
+#include <trace/jbd2_event_types.h>
+
+#endif
+
+In a file that utilizes a jbd2 tracepoint, this header file would be
+included.  Note that you still have to use DEFINE_TRACE().  So for
+example, if fs/jbd2/commit.c planned to use the jbd2_start_commit
+tracepoint, it would have the following near the beginning of the file:
+
+#include <trace/jbd2.h>
+
+DEFINE_TRACE(jbd2_start_commit);
+
+Then in the function that would call the tracepoint, it would call the
+tracepoint function.  (For more information, please see the tracepoint
+documentation in Documentation/trace/tracepoints.txt):
+
+	trace_jbd2_start_commit(journal, commit_transaction);
+
+The code snippets which allow jbd2_start_commit to be an event-enabled
+tracepoint are placed in the file include/trace/jbd2_event_types.h:
+
+/* use <trace/jbd2.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM jbd2
+
+#include <linux/jbd2.h>
+
+TRACE_EVENT(jbd2_start_commit,
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+	TP_ARGS(journal, commit_transaction),
+	TP_STRUCT__entry(
+		__array(	char,	devname, BDEVNAME_SIZE+24 )
+		__field(	int,	transaction		  )
+	),
+	TP_fast_assign(
+		memcpy(__entry->devname, journal->j_devname, BDEVNAME_SIZE+24);
+		__entry->transaction	= commit_transaction->t_tid;
+	),
+	TP_printk("dev %s transaction %d",
+		  __entry->devname, __entry->transaction)
+);
+
+The TP_PROTO and TP_ARGS are unchanged from DECLARE_TRACE.  The new
+arguments to TRACE_EVENT are TP_STRUCT__entry, TP_fast_assign, and
+TP_printk.
+
+TP_STRUCT__entry defines the data structure which will be stored in the
+trace buffer.  Normally, fields in __entry will be arrays or simple
+types.  It is possible to place data structures in __entry --- however,
+pointers in the data structure can not be trusted, since they will be
+accessed sometime later by TP_printk, and if the data structure contains
+fields that will not or cannot be used by TP_printk, this will waste
+space in the trace buffer.  In general, data structures should be
+avoided, unless they do only contain non-pointer types and all of the
+fields will be used by TP_printk.
+
+TP_fast_assign defines the code snippet which saves information into the
+__entry data structure, using the passed-in arguments defined in
+TP_PROTO and TP_ARGS.
+
+Finally, TP_printk will print the __entry data structure.  At the time
+when the code snippet defined by TP_printk is executed, it will not have
+access to the TP_ARGS arguments; it can only use the information saved
+in the __entry data structure.
-- 
cgit v0.10.2


From 2c1b284e4fa260fd922b9a65c99169e2630c6862 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 11 Apr 2009 00:03:10 +0530
Subject: x86: clean up declarations and variables

Impact: cleanup, no code changed

 - syscalls.h       update declarations due to unifications
 - irq.c            declare smp_generic_interrupt() before it gets used
 - process.c        declare sys_fork() and sys_vfork() before they get used
 - tsc.c            rename tsc_khz shadowed variable
 - apic/probe_32.c  declare apic_default before it gets used
 - apic/nmi.c       prev_nmi_count should be unsigned
 - apic/io_apic.c   declare smp_irq_move_cleanup_interrupt() before it gets used
 - mm/init.c        declare direct_gbpages and free_initrd_mem before they get used

Signed-off-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f83..5773660 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -478,6 +478,9 @@ static inline unsigned int read_apic_id(void)
 extern void default_setup_apic_routing(void);
 
 #ifdef CONFIG_X86_32
+
+extern struct apic apic_default;
+
 /*
  * Set up the logical destination ID.
  *
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea4..be9ae41 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -78,7 +78,11 @@ extern void eisa_set_level_irq(unsigned int irq);
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
 extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_generic_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_IO_APIC
+extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
+#endif
 #ifdef CONFIG_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 29d96d1..3f8d09d 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -503,6 +503,8 @@ static inline int pgd_none(pgd_t pgd)
 
 #ifndef __ASSEMBLY__
 
+extern int direct_gbpages;
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 {
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 6b87bc6..abde308 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[];
 
 extern void paging_init(void);
 
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
 #define pte_ERROR(e)					\
 	printk("%s:%d: bad pte %p(%016lx).\n",		\
 	       __FILE__, __LINE__, &(e), pte_val(e))
@@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-extern int direct_gbpages;
-
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 7043408..372b76e 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,7 +1,7 @@
 /*
  * syscalls.h - Linux syscall interfaces (arch-specific)
  *
- * Copyright (c) 2008 Jaswinder Singh
+ * Copyright (c) 2008 Jaswinder Singh Rajput
  *
  * This file is released under the GPLv2.
  * See the file COPYING for more details.
@@ -12,50 +12,55 @@
 
 #include <linux/compiler.h>
 #include <linux/linkage.h>
-#include <linux/types.h>
 #include <linux/signal.h>
+#include <linux/types.h>
 
 /* Common in X86_32 and X86_64 */
 /* kernel/ioport.c */
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 
+/* kernel/process.c */
+int sys_fork(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+
 /* kernel/ldt.c */
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
+/* kernel/signal.c */
+long sys_rt_sigreturn(struct pt_regs *);
+
 /* kernel/tls.c */
 asmlinkage int sys_set_thread_area(struct user_desc __user *);
 asmlinkage int sys_get_thread_area(struct user_desc __user *);
 
 /* X86_32 only */
 #ifdef CONFIG_X86_32
+/* kernel/ioport.c */
+long sys_iopl(struct pt_regs *);
+
 /* kernel/process_32.c */
-int sys_fork(struct pt_regs *);
 int sys_clone(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
 int sys_execve(struct pt_regs *);
 
-/* kernel/signal_32.c */
+/* kernel/signal.c */
 asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
 asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
 int sys_sigaltstack(struct pt_regs *);
 unsigned long sys_sigreturn(struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
-
-/* kernel/ioport.c */
-long sys_iopl(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
+struct mmap_arg_struct;
+struct sel_arg_struct;
+struct oldold_utsname;
+struct old_utsname;
+
 asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
 			  unsigned long, unsigned long, unsigned long);
-struct mmap_arg_struct;
 asmlinkage int old_mmap(struct mmap_arg_struct __user *);
-struct sel_arg_struct;
 asmlinkage int old_select(struct sel_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
-struct old_utsname;
 asmlinkage int sys_uname(struct old_utsname __user *);
-struct oldold_utsname;
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
 /* kernel/vm86_32.c */
@@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *);
 #else /* CONFIG_X86_32 */
 
 /* X86_64 only */
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
 /* kernel/process_64.c */
-asmlinkage long sys_fork(struct pt_regs *);
 asmlinkage long sys_clone(unsigned long, unsigned long,
 			  void __user *, void __user *,
 			  struct pt_regs *);
-asmlinkage long sys_vfork(struct pt_regs *);
 asmlinkage long sys_execve(char __user *, char __user * __user *,
 			   char __user * __user *,
 			   struct pt_regs *);
 long sys_arch_prctl(int, unsigned long);
 
-/* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
-
-/* kernel/signal_64.c */
+/* kernel/signal.c */
 asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
 				struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
 
 /* kernel/sys_x86_64.c */
+struct new_utsname;
+
 asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
 			 unsigned long, unsigned long, unsigned long);
-struct new_utsname;
 asmlinkage long sys_uname(struct new_utsname __user *);
 
 #endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 767fe7e..870c92d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/hw_irq.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_irq.h>
 
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index d6bd624..0205631 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data)
 }
 #endif
 
-static void report_broken_nmi(int cpu, int *prev_nmi_count)
+static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
 {
 	printk(KERN_CONT "\n");
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 01eda2a..440a8bc 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -160,7 +160,6 @@ extern struct apic apic_summit;
 extern struct apic apic_bigsmp;
 extern struct apic apic_es7000;
 extern struct apic apic_es7000_cluster;
-extern struct apic apic_default;
 
 struct apic *apic = &apic_default;
 EXPORT_SYMBOL_GPL(apic);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3aaf7b9..2188267 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,7 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/hw_irq.h>
 
 atomic_t irq_err_count;
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ca98915..3e21e38 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -11,6 +11,7 @@
 #include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
+#include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7a567eb..a8dc0d0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -384,13 +384,13 @@ unsigned long native_calibrate_tsc(void)
 {
 	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-	unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
+	unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
-	tsc_khz = get_hypervisor_tsc_freq();
-	if (tsc_khz) {
+	hv_tsc_khz = get_hypervisor_tsc_freq();
+	if (hv_tsc_khz) {
 		printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
-		return tsc_khz;
+		return hv_tsc_khz;
 	}
 
 	local_irq_save(flags);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd3da1d..40924e4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,3 +1,4 @@
+#include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
 
-- 
cgit v0.10.2


From 02af61bb50f5d5f0322dbe5ab2a0d75808d25c7b Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 10 Apr 2009 14:26:18 +0800
Subject: tracing, kmemtrace: Separate include/trace/kmemtrace.h to kmemtrace
 part and tracepoint part

Impact: refactor code for future changes

Current kmemtrace.h is used both as header file of kmemtrace and kmem's
tracepoints definition.

Tracepoints' definition file may be used by other code, and should only have
definition of tracepoint.

We can separate include/trace/kmemtrace.h into 2 files:

  include/linux/kmemtrace.h: header file for kmemtrace
  include/trace/kmem.h:      definition of kmem tracepoints

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49DEE68A.5040902@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 0000000..15c45a2
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <trace/kmem.h>
+
+#ifdef CONFIG_KMEMTRACE
+extern void kmemtrace_init(void);
+#else
+static inline void kmemtrace_init(void)
+{
+}
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 5ac9b0b..713f841 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,7 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5046f90..be5d40c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,7 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
new file mode 100644
index 0000000..24d2519
--- /dev/null
+++ b/include/trace/kmem.h
@@ -0,0 +1,44 @@
+#ifndef _TRACE_KMEM_H
+#define _TRACE_KMEM_H
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+DECLARE_TRACE(kmalloc,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
+DECLARE_TRACE(kmem_cache_alloc,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
+DECLARE_TRACE(kmalloc_node,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags,
+		      int node),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
+DECLARE_TRACE(kmem_cache_alloc_node,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags,
+		      int node),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
+DECLARE_TRACE(kfree,
+	      TP_PROTO(unsigned long call_site, const void *ptr),
+	      TP_ARGS(call_site, ptr));
+DECLARE_TRACE(kmem_cache_free,
+	      TP_PROTO(unsigned long call_site, const void *ptr),
+	      TP_ARGS(call_site, ptr));
+
+#endif /* _TRACE_KMEM_H */
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644
index 28ee69f..0000000
--- a/include/trace/kmemtrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <linux/tracepoint.h>
-#include <linux/types.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-DECLARE_TRACE(kmalloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/init/main.c b/init/main.c
index 3585f07..eece40c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -64,6 +64,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -71,7 +72,6 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
-#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 5011f4d..7a0aa0e 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -12,7 +12,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 #include "trace_output.h"
 #include "trace.h"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f76a8f8..34b94c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,7 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <trace/power.h>
 
 enum trace_type {
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00..f85831d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,7 @@
 #include	<linux/cpu.h>
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
-#include	<trace/kmemtrace.h>
+#include	<linux/kmemtrace.h>
 #include	<linux/rcupdate.h>
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index a2d4ab3..494f05f 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,7 +65,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <asm/atomic.h>
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 7ab54ec..ea9e716 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
-- 
cgit v0.10.2


From fc182a4330fc22ea1b68fa3d5064dd85a73a4c4a Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 10 Apr 2009 14:27:38 +0800
Subject: tracing, kmemtrace: Make kmem tracepoints use TRACE_EVENT macro

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49DEE6DA.80600@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/kmem.h b/include/trace/kmem.h
index 24d2519..46efc24 100644
--- a/include/trace/kmem.h
+++ b/include/trace/kmem.h
@@ -1,44 +1,9 @@
 #ifndef _TRACE_KMEM_H
 #define _TRACE_KMEM_H
 
-#include <linux/tracepoint.h>
 #include <linux/types.h>
+#include <linux/tracepoint.h>
 
-DECLARE_TRACE(kmalloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
+#include <trace/kmem_event_types.h>
 
 #endif /* _TRACE_KMEM_H */
diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h
new file mode 100644
index 0000000..4ff420f
--- /dev/null
+++ b/include/trace/kmem_event_types.h
@@ -0,0 +1,193 @@
+
+/* use <trace/kmem.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
index 33b6bfc..552a50e 100644
--- a/include/trace/trace_event_types.h
+++ b/include/trace/trace_event_types.h
@@ -4,3 +4,4 @@
 #include <trace/irq_event_types.h>
 #include <trace/lockdep_event_types.h>
 #include <trace/skb_event_types.h>
+#include <trace/kmem_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 0e2aa80..13d6b85 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -4,3 +4,4 @@
 #include <trace/irq.h>
 #include <trace/lockdep.h>
 #include <trace/skb.h>
+#include <trace/kmem.h>
-- 
cgit v0.10.2


From b78825d608f30a47e3154ab6872a03f0de0c9d45 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 1 Apr 2009 16:18:53 +0800
Subject: blktrace: fix output of unknown events

Not all events are pc (packet command) events. An event is a pc
event only if it has BLK_TC_PC bit set.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <49D3236D.3090705@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d..e45e1af 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1182,7 +1182,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
 	}
 
 	if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
-		ret = trace_seq_printf(s, "Bad pc action %x\n", what);
+		ret = trace_seq_printf(s, "Unknown action %x\n", what);
 	else {
 		ret = log_action(iter, what2act[what].act[long_act]);
 		if (ret)
-- 
cgit v0.10.2


From 66de7792c02693b49671afe58c771fde3b092fc7 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 1 Apr 2009 16:19:19 +0800
Subject: blktrace: fix output of BLK_TC_PC events

BLK_TC_PC events should be treated differently with BLK_TC_FS events.

Before this patch:

 # echo 1 > /sys/block/sda/sda1/trace/enable
 # echo pc > /sys/block/sda/sda1/trace/act_mask
 # echo blk > /debugfs/tracing/current_tracer
 # (generate some BLK_TC_PC events)
 # cat trace
        bash-2184  [000]  1774.275413:   8,7    I   N [bash]
        bash-2184  [000]  1774.275435:   8,7    D   N [bash]
        bash-2184  [000]  1774.275540:   8,7    I   R [bash]
        bash-2184  [000]  1774.275547:   8,7    D   R [bash]
 ksoftirqd/0-4     [000]  1774.275580:   8,7    C   N 0 [0]
        bash-2184  [000]  1774.275648:   8,7    I   R [bash]
        bash-2184  [000]  1774.275653:   8,7    D   R [bash]
 ksoftirqd/0-4     [000]  1774.275682:   8,7    C   N 0 [0]
        bash-2184  [000]  1774.275739:   8,7    I   R [bash]
        bash-2184  [000]  1774.275744:   8,7    D   R [bash]
 ksoftirqd/0-4     [000]  1774.275771:   8,7    C   N 0 [0]
        bash-2184  [000]  1774.275804:   8,7    I   R [bash]
        bash-2184  [000]  1774.275808:   8,7    D   R [bash]
 ksoftirqd/0-4     [000]  1774.275836:   8,7    C   N 0 [0]

After this patch:

 # cat trace
        bash-2263  [000]   366.782149:   8,7    I   N 0 (00 ..) [bash]
        bash-2263  [000]   366.782323:   8,7    D   N 0 (00 ..) [bash]
        bash-2263  [000]   366.782557:   8,7    I   R 8 (25 00 ..) [bash]
        bash-2263  [000]   366.782560:   8,7    D   R 8 (25 00 ..) [bash]
 ksoftirqd/0-4     [000]   366.782582:   8,7    C   N (25 00 ..) [0]
        bash-2263  [000]   366.782648:   8,7    I   R 8 (5a 00 3f 00) [bash]
        bash-2263  [000]   366.782650:   8,7    D   R 8 (5a 00 3f 00) [bash]
 ksoftirqd/0-4     [000]   366.782669:   8,7    C   N (5a 00 3f 00) [0]
        bash-2263  [000]   366.782710:   8,7    I   R 8 (5a 00 08 00) [bash]
        bash-2263  [000]   366.782713:   8,7    D   R 8 (5a 00 08 00) [bash]
 ksoftirqd/0-4     [000]   366.782730:   8,7    C   N (5a 00 08 00) [0]
        bash-2263  [000]   366.783375:   8,7    I   R 36 (5a 00 08 00) [bash]
        bash-2263  [000]   366.783379:   8,7    D   R 36 (5a 00 08 00) [bash]
 ksoftirqd/0-4     [000]   366.783404:   8,7    C   N (5a 00 08 00) [0]

This is what we do with PC events in user-space blktrace.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <49D32387.9040106@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e45e1af..2b98195 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -971,6 +971,16 @@ static inline const void *pdu_start(const struct trace_entry *ent)
 	return te_blk_io_trace(ent) + 1;
 }
 
+static inline u32 t_action(const struct trace_entry *ent)
+{
+	return te_blk_io_trace(ent)->action;
+}
+
+static inline u32 t_bytes(const struct trace_entry *ent)
+{
+	return te_blk_io_trace(ent)->bytes;
+}
+
 static inline u32 t_sec(const struct trace_entry *ent)
 {
 	return te_blk_io_trace(ent)->bytes >> 9;
@@ -1031,25 +1041,87 @@ static int blk_log_action(struct trace_iterator *iter, const char *act)
 				MAJOR(t->device), MINOR(t->device), act, rwbs);
 }
 
+static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
+{
+	const char *pdu_buf;
+	int pdu_len;
+	int i, end, ret;
+
+	pdu_buf = pdu_start(ent);
+	pdu_len = te_blk_io_trace(ent)->pdu_len;
+
+	if (!pdu_len)
+		return 1;
+
+	/* find the last zero that needs to be printed */
+	for (end = pdu_len - 1; end >= 0; end--)
+		if (pdu_buf[end])
+			break;
+	end++;
+
+	if (!trace_seq_putc(s, '('))
+		return 0;
+
+	for (i = 0; i < pdu_len; i++) {
+
+		ret = trace_seq_printf(s, "%s%02x",
+				       i == 0 ? "" : " ", pdu_buf[i]);
+		if (!ret)
+			return ret;
+
+		/*
+		 * stop when the rest is just zeroes and indicate so
+		 * with a ".." appended
+		 */
+		if (i == end && end != pdu_len - 1)
+			return trace_seq_puts(s, " ..) ");
+	}
+
+	return trace_seq_puts(s, ") ");
+}
+
 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
 {
 	char cmd[TASK_COMM_LEN];
 
 	trace_find_cmdline(ent->pid, cmd);
 
-	if (t_sec(ent))
-		return trace_seq_printf(s, "%llu + %u [%s]\n",
-					t_sector(ent), t_sec(ent), cmd);
-	return trace_seq_printf(s, "[%s]\n", cmd);
+	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+		int ret;
+
+		ret = trace_seq_printf(s, "%u ", t_bytes(ent));
+		if (!ret)
+			return 0;
+		ret = blk_log_dump_pdu(s, ent);
+		if (!ret)
+			return 0;
+		return trace_seq_printf(s, "[%s]\n", cmd);
+	} else {
+		if (t_sec(ent))
+			return trace_seq_printf(s, "%llu + %u [%s]\n",
+						t_sector(ent), t_sec(ent), cmd);
+		return trace_seq_printf(s, "[%s]\n", cmd);
+	}
 }
 
 static int blk_log_with_error(struct trace_seq *s,
 			      const struct trace_entry *ent)
 {
-	if (t_sec(ent))
-		return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
-					t_sec(ent), t_error(ent));
-	return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
+	if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+		int ret;
+
+		ret = blk_log_dump_pdu(s, ent);
+		if (ret)
+			return trace_seq_printf(s, "[%d]\n", t_error(ent));
+		return 0;
+	} else {
+		if (t_sec(ent))
+			return trace_seq_printf(s, "%llu + %u [%d]\n",
+						t_sector(ent),
+						t_sec(ent), t_error(ent));
+		return trace_seq_printf(s, "%llu [%d]\n",
+					t_sector(ent), t_error(ent));
+	}
 }
 
 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
-- 
cgit v0.10.2


From 3fa89ca7ba5ba50b3924a11f6604b4bdce5f7842 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sun, 12 Apr 2009 20:37:25 +0530
Subject: x86: vdso/vma.c declare vdso_enabled and arch_setup_additional_pages
 before they get used

Impact: cleanup, address sparse warnings

Addresses the problem pointed out by these sparse warning:
  arch/x86/vdso/vma.c:19:28: warning: symbol 'vdso_enabled' was not declared. Should it be static?
  arch/x86/vdso/vma.c:101:5: warning: symbol 'arch_setup_additional_pages' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
LKML-Reference: <1239548845.4170.2.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7133cdf..cac0833 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/elf.h>
 #include <asm/vsyscall.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
-- 
cgit v0.10.2


From edea7148a87c099e5d5d4838285cc27e459588b7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 12 Apr 2009 20:47:39 +0400
Subject: x86: irq.c - tiny cleanup

Impact: cleanup, robustization

 1) guard ack_bad_irq with printk_ratelimit since there is no
    guarantee we will not be flooded one day

 2) use pr_emerg() helper

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090412165058.277579847@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3aaf7b9..6603492 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -24,7 +24,8 @@ void (*generic_interrupt_extension)(void) = NULL;
  */
 void ack_bad_irq(unsigned int irq)
 {
-	printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+	if (printk_ratelimit())
+		pr_err("unexpected IRQ trap at vector %02x\n", irq);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
@@ -178,7 +179,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_thermal_count;
 # ifdef CONFIG_X86_64
 	sum += irq_stats(cpu)->irq_threshold_count;
-#endif
+# endif
 #endif
 	return sum;
 }
@@ -219,8 +220,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 #endif
 
 		if (printk_ratelimit())
-			printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
-			       __func__, smp_processor_id(), vector, irq);
+			pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+				__func__, smp_processor_id(), vector, irq);
 	}
 
 	irq_exit();
-- 
cgit v0.10.2


From c0eaa4536f08b98fbcfa7fce5b7b0de1bebcb0e1 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 12 Apr 2009 20:47:40 +0400
Subject: x86: apic - introduce imcr_ helpers

Impact: cleanup

Distinguish port writting magic into helpers with comments.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090412165058.535921550@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 098ec84..c3be10f5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -98,6 +98,29 @@ early_param("lapic", parse_lapic);
 /* Local APIC was disabled by the BIOS and enabled by the kernel */
 static int enabled_via_apicbase;
 
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline imcr_pic_to_apic(void)
+{
+	/* select IMCR register */
+	outb(0x70, 0x22);
+	/* NMI and 8259 INTR go through APIC */
+	outb(0x01, 0x23);
+}
+
+static inline imcr_apic_to_pic(void)
+{
+	/* select IMCR register */
+	outb(0x70, 0x22);
+	/* NMI and 8259 INTR go directly to BSP */
+	outb(0x00, 0x23);
+}
 #endif
 
 #ifdef CONFIG_X86_64
@@ -1727,8 +1750,7 @@ void __init connect_bsp_APIC(void)
 		 */
 		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
 				"enabling APIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x01, 0x23);
+		imcr_pic_to_apic();
 	}
 #endif
 	if (apic->enable_apic_mode)
@@ -1756,8 +1778,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 		 */
 		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
 				"entering PIC mode.\n");
-		outb(0x70, 0x22);
-		outb(0x00, 0x23);
+		imcr_apic_to_pic();
 		return;
 	}
 #endif
-- 
cgit v0.10.2


From 08306ce61d6848e6fbf74fa4cc693c3fb29e943f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 12 Apr 2009 20:47:41 +0400
Subject: x86: apic - introduce dummy apic operations

Impact: refactor, speed up and robustize code

In case if apic was disabled by kernel option
or by hardware limits we can use dummy operations
in apic->write to simplify the ack_APIC_irq() code.

At the lame time the patch fixes the missed EOI in
do_IRQ function (which has place if kernel is compiled
as X86-32 and interrupt without handler happens where
apic was not asked to be disabled via kernel option).

Note that native_apic_write_dummy() consists of
WARN_ON_ONCE to catch any buggy writes on enabled
APICs. Could be removed after some time of testing.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090412165058.724788431@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f83..2bd5a46 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -212,6 +212,7 @@ static inline void ack_x2APIC_irq(void)
 }
 #endif
 
+extern void apic_disable(void);
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void connect_bsp_APIC(void);
@@ -252,7 +253,7 @@ static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok		1
 static inline void init_apic_mappings(void) { }
 static inline void disable_local_APIC(void) { }
-
+static inline void apic_disable(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c3be10f5..9b849d4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -232,6 +232,24 @@ static int modern_apic(void)
 	return lapic_get_version() >= 0x14;
 }
 
+/*
+ * bare function to substitute write operation
+ * and it's _that_ fast :)
+ */
+void native_apic_write_dummy(u32 reg, u32 v)
+{
+	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
+/*
+ * right after this call apic->write doesn't do anything
+ * note that there is no restore operation it works one way
+ */
+void apic_disable(void)
+{
+	apic->write = native_apic_write_dummy;
+}
+
 void native_apic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
@@ -1582,6 +1600,12 @@ void __init init_apic_mappings(void)
 	 */
 	if (boot_cpu_physical_apicid == -1U)
 		boot_cpu_physical_apicid = read_apic_id();
+
+	/* lets check if we may to NOP'ify apic operations */
+	if (!cpu_has_apic) {
+		pr_info("APIC: disable apic facility\n");
+		apic_disable();
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6603492..fd57bf3 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -27,7 +27,6 @@ void ack_bad_irq(unsigned int irq)
 	if (printk_ratelimit())
 		pr_err("unexpected IRQ trap at vector %02x\n", irq);
 
-#ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * Currently unexpected vectors happen only on SMP and APIC.
 	 * We _must_ ack these because every local APIC has only N
@@ -37,9 +36,7 @@ void ack_bad_irq(unsigned int irq)
 	 * completely.
 	 * But only ack when the APIC is enabled -AK
 	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
-#endif
+	ack_APIC_irq();
 }
 
 #define irq_stats(x)		(&per_cpu(irq_stat, x))
@@ -214,10 +211,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	irq = __get_cpu_var(vector_irq)[vector];
 
 	if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
-		if (!disable_apic)
-			ack_APIC_irq();
-#endif
+		ack_APIC_irq();
 
 		if (printk_ratelimit())
 			pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
-- 
cgit v0.10.2


From b9b34f24b23ba9e79e07c0980e7fff16af2a67d1 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 12 Apr 2009 20:47:42 +0400
Subject: x86: smp.c - align smp_ops assignments

Impact: cleanup

It's a bit hard to parse by eyes without
them being aligned.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090412165058.924175574@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 13f33ea..f6db48c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
 }
 
 struct smp_ops smp_ops = {
-	.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
-	.smp_prepare_cpus = native_smp_prepare_cpus,
-	.smp_cpus_done = native_smp_cpus_done,
+	.smp_prepare_boot_cpu	= native_smp_prepare_boot_cpu,
+	.smp_prepare_cpus	= native_smp_prepare_cpus,
+	.smp_cpus_done		= native_smp_cpus_done,
 
-	.smp_send_stop = native_smp_send_stop,
-	.smp_send_reschedule = native_smp_send_reschedule,
+	.smp_send_stop		= native_smp_send_stop,
+	.smp_send_reschedule	= native_smp_send_reschedule,
 
-	.cpu_up = native_cpu_up,
-	.cpu_die = native_cpu_die,
-	.cpu_disable = native_cpu_disable,
-	.play_dead = native_play_dead,
+	.cpu_up			= native_cpu_up,
+	.cpu_die		= native_cpu_die,
+	.cpu_disable		= native_cpu_disable,
+	.play_dead		= native_play_dead,
 
-	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_ipi	= native_send_call_func_ipi,
 	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-- 
cgit v0.10.2


From f4976116a98f108bf385f217332aadb3ca98fe66 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 13 Apr 2009 10:53:02 +0100
Subject: ASoC: WM9713 requires symmetric rates on the voice DAI

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index 523bad0..aa94cc6 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -1069,6 +1069,7 @@ struct snd_soc_dai wm9713_dai[] = {
 		.rates = WM9713_PCM_RATES,
 		.formats = WM9713_PCM_FORMATS,},
 	.ops = &wm9713_dai_ops_voice,
+	.symmetric_rates = 1,
 	},
 };
 EXPORT_SYMBOL_GPL(wm9713_dai);
-- 
cgit v0.10.2


From 025756eca458b4a3d5e3d76baaffb2e8e3df79db Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 13 Apr 2009 11:09:18 +0100
Subject: ASoC: Factor out application of power for generic widgets

This is simple code motion, intended to support future refactoring of
the DAPM algorithms and (more immediately) the additon of events for
DACs and ADCs.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 46485de..713d125 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -522,6 +522,65 @@ int dapm_reg_event(struct snd_soc_dapm_widget *w,
 }
 EXPORT_SYMBOL_GPL(dapm_reg_event);
 
+/* Standard power change method, used to apply power changes to most
+ * widgets.
+ */
+static int dapm_generic_apply_power(struct snd_soc_dapm_widget *w)
+{
+	int ret;
+
+	/* call any power change event handlers */
+	if (w->event)
+		pr_debug("power %s event for %s flags %x\n",
+			 w->power ? "on" : "off",
+			 w->name, w->event_flags);
+
+	/* power up pre event */
+	if (w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_PRE_PMU)) {
+		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMU);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* power down pre event */
+	if (!w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_PRE_PMD)) {
+		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMD);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Lower PGA volume to reduce pops */
+	if (w->id == snd_soc_dapm_pga && !w->power)
+		dapm_set_pga(w, w->power);
+
+	dapm_update_bits(w);
+
+	/* Raise PGA volume to reduce pops */
+	if (w->id == snd_soc_dapm_pga && w->power)
+		dapm_set_pga(w, w->power);
+
+	/* power up post event */
+	if (w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_POST_PMU)) {
+		ret = w->event(w,
+			       NULL, SND_SOC_DAPM_POST_PMU);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* power down post event */
+	if (!w->power && w->event &&
+	    (w->event_flags & SND_SOC_DAPM_POST_PMD)) {
+		ret = w->event(w, NULL, SND_SOC_DAPM_POST_PMD);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /*
  * Scan a single DAPM widget for a complete audio path and update the
  * power status appropriately.
@@ -601,56 +660,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 	if (!power_change)
 		return 0;
 
-	/* call any power change event handlers */
-	if (w->event)
-		pr_debug("power %s event for %s flags %x\n",
-			 w->power ? "on" : "off",
-			 w->name, w->event_flags);
-
-	/* power up pre event */
-	if (power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_PRE_PMU)) {
-		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMU);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* power down pre event */
-	if (!power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_PRE_PMD)) {
-		ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMD);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* Lower PGA volume to reduce pops */
-	if (w->id == snd_soc_dapm_pga && !power)
-		dapm_set_pga(w, power);
-
-	dapm_update_bits(w);
-
-	/* Raise PGA volume to reduce pops */
-	if (w->id == snd_soc_dapm_pga && power)
-		dapm_set_pga(w, power);
-
-	/* power up post event */
-	if (power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_POST_PMU)) {
-		ret = w->event(w,
-			       NULL, SND_SOC_DAPM_POST_PMU);
-		if (ret < 0)
-			return ret;
-	}
-
-	/* power down post event */
-	if (!power && w->event &&
-	    (w->event_flags & SND_SOC_DAPM_POST_PMD)) {
-		ret = w->event(w, NULL, SND_SOC_DAPM_POST_PMD);
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
+	return dapm_generic_apply_power(w);
 }
 
 /*
-- 
cgit v0.10.2


From f6d655a6e6974e474a11b25052c29d10b80814b3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 13 Apr 2009 11:27:03 +0100
Subject: ASoC: Support DAPM events for DACs and ADCs

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index a7def6a..fcc929d 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -140,9 +140,19 @@
 #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \
 	.shift = wshift, .invert = winvert}
+#define SND_SOC_DAPM_DAC_E(wname, stname, wreg, wshift, winvert, \
+			   wevent, wflags)				\
+{	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \
+	.shift = wshift, .invert = winvert, \
+	.event = wevent, .event_flags = wflags}
 #define SND_SOC_DAPM_ADC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \
 	.shift = wshift, .invert = winvert}
+#define SND_SOC_DAPM_ADC_E(wname, stname, wreg, wshift, winvert, \
+			   wevent, wflags)				\
+{	.id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \
+	.shift = wshift, .invert = winvert, \
+	.event = wevent, .event_flags = wflags}
 
 /* generic register modifier widget */
 #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 713d125..a6d7337 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -598,18 +598,22 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 	if (w->id == snd_soc_dapm_adc && w->active) {
 		in = is_connected_input_ep(w);
 		dapm_clear_walk(w->codec);
-		w->power = (in != 0) ? 1 : 0;
-		dapm_update_bits(w);
-		return 0;
+		power = (in != 0) ? 1 : 0;
+		if (power == w->power)
+			return 0;
+		w->power = power;
+		return dapm_generic_apply_power(w);
 	}
 
 	/* active DAC */
 	if (w->id == snd_soc_dapm_dac && w->active) {
 		out = is_connected_output_ep(w);
 		dapm_clear_walk(w->codec);
-		w->power = (out != 0) ? 1 : 0;
-		dapm_update_bits(w);
-		return 0;
+		power = (out != 0) ? 1 : 0;
+		if (power == w->power)
+			return 0;
+		w->power = power;
+		return dapm_generic_apply_power(w);
 	}
 
 	/* pre and post event widgets */
-- 
cgit v0.10.2


From 6bbcb459cd50807511491ddf96bca1ef92006bf8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 13 Apr 2009 11:29:10 +0100
Subject: ASoC: Move the WM9713 voice DAC powerdown to a DAPM event

This ensures that we sync with the DAPM powerdown sequencing properly
and don't need to bounce the power on the voice DAC so often.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index aa94cc6..a6feb784 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -189,6 +189,26 @@ SOC_SINGLE("3D Lower Cut-off Switch", AC97_REC_GAIN_MIC, 4, 1, 0),
 SOC_SINGLE("3D Depth", AC97_REC_GAIN_MIC, 0, 15, 1),
 };
 
+static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w,
+				 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	u16 status, rate;
+
+	BUG_ON(event != SND_SOC_DAPM_PRE_PMD);
+
+	/* Gracefully shut down the voice interface. */
+	status = ac97_read(codec, AC97_EXTENDED_MID) | 0x1000;
+	rate = ac97_read(codec, AC97_HANDSET_RATE) & 0xF0FF;
+	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0200);
+	schedule_timeout_interruptible(msecs_to_jiffies(1));
+	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0F00);
+	ac97_write(codec, AC97_EXTENDED_MID, status);
+
+	return 0;
+}
+
+
 /* We have to create a fake left and right HP mixers because
  * the codec only has a single control that is shared by both channels.
  * This makes it impossible to determine the audio path using the current
@@ -400,7 +420,8 @@ SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 SND_SOC_DAPM_MIXER("HP Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 SND_SOC_DAPM_MIXER("Line Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
 SND_SOC_DAPM_MIXER("Capture Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
-SND_SOC_DAPM_DAC("Voice DAC", "Voice Playback", AC97_EXTENDED_MID, 12, 1),
+SND_SOC_DAPM_DAC_E("Voice DAC", "Voice Playback", AC97_EXTENDED_MID, 12, 1,
+		   wm9713_voice_shutdown, SND_SOC_DAPM_PRE_PMD),
 SND_SOC_DAPM_DAC("Aux DAC", "Aux Playback", AC97_EXTENDED_MID, 11, 1),
 SND_SOC_DAPM_PGA("Left ADC", AC97_EXTENDED_MID, 5, 1, NULL, 0),
 SND_SOC_DAPM_PGA("Right ADC", AC97_EXTENDED_MID, 4, 1, NULL, 0),
@@ -936,21 +957,6 @@ static int wm9713_pcm_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static void wm9713_voiceshutdown(struct snd_pcm_substream *substream,
-				 struct snd_soc_dai *dai)
-{
-	struct snd_soc_codec *codec = dai->codec;
-	u16 status, rate;
-
-	/* Gracefully shut down the voice interface. */
-	status = ac97_read(codec, AC97_EXTENDED_STATUS) | 0x1000;
-	rate = ac97_read(codec, AC97_HANDSET_RATE) & 0xF0FF;
-	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0200);
-	schedule_timeout_interruptible(msecs_to_jiffies(1));
-	ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0F00);
-	ac97_write(codec, AC97_EXTENDED_MID, status);
-}
-
 static int ac97_hifi_prepare(struct snd_pcm_substream *substream,
 			     struct snd_soc_dai *dai)
 {
@@ -1019,7 +1025,6 @@ static struct snd_soc_dai_ops wm9713_dai_ops_aux = {
 
 static struct snd_soc_dai_ops wm9713_dai_ops_voice = {
 	.hw_params	= wm9713_pcm_hw_params,
-	.shutdown	= wm9713_voiceshutdown,
 	.set_clkdiv	= wm9713_set_dai_clkdiv,
 	.set_pll	= wm9713_set_dai_pll,
 	.set_fmt	= wm9713_set_dai_fmt,
-- 
cgit v0.10.2


From a820532002e70e3a06f1ea7133e9b02443d07382 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Wed, 8 Apr 2009 10:51:24 -0300
Subject: ASoC: pxa-ssp.c fix clock/frame invert

SCMODE(0): Data Driven (Falling), Data Sampled (Rising), Idle State (Low)
SCMODE(1): Data Driven (Rising), Data Sampled (Falling), Idle State (Low)
SCMODE(2): Data Driven (Rising), Data Sampled (Falling), Idle State (High)
SCMODE(3): Data Driven (Falling), Data Sampled (Rising), Idle State (High)

SCMODE(3) does not invert the clock polarity compared to the default SCMODE(0).

This patch also adds all possible NF/IF, NB/IB combinations to the DSP_A and
DSP_B modes.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index c7c1996..176af7ff 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -568,7 +568,10 @@ static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		case SND_SOC_DAIFMT_NB_IF:
 			break;
 		case SND_SOC_DAIFMT_IB_IF:
-			sspsp |= SSPSP_SCMODE(3);
+			sspsp |= SSPSP_SCMODE(2);
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			sspsp |= SSPSP_SCMODE(2) | SSPSP_SFRMP;
 			break;
 		default:
 			return -EINVAL;
@@ -585,7 +588,13 @@ static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		case SND_SOC_DAIFMT_NB_NF:
 			sspsp |= SSPSP_SFRMP;
 			break;
+		case SND_SOC_DAIFMT_NB_IF:
+			break;
 		case SND_SOC_DAIFMT_IB_IF:
+			sspsp |= SSPSP_SCMODE(2);
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			sspsp |= SSPSP_SCMODE(2) | SSPSP_SFRMP;
 			break;
 		default:
 			return -EINVAL;
-- 
cgit v0.10.2


From f2644a2c00a06236a9c5e85488b0680825bad39c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 7 Apr 2009 19:20:14 +0100
Subject: ASoC: Add WM8960 CODEC driver

The WM8960 is a low power, high quality stereo codec designed for
portable digital audio applications.

Stereo class D speaker drivers provide 1W per channel into 8W loads.
Guaranteed low leakage, excellent PSRR and pop/click suppression
mechanisms enable direct battery connection for the speaker supply.

The device also integrates a complete microphone interface and a stereo
headphone driver. External component requirements are drastically
reduced as no separate microphone, speaker or headphone amplifiers are
required. Advanced on-chip digital signal processing performs automatic
level control for the microphone or line input.

Stereo 24-bit sigma-delta ADCs and DACs are used with low power
over-sampling digital interpolation and decimation filters and a
flexible digital audio interface.

The master clock can be input directly or generated internally by an
onboard PLL, supporting most commonly-used clocking schemes.

This driver was originally written by Liam Girdwood, with substantial
subsequent additions and updates for feature completeness and changes in
the ASoC framework from me.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index ab36485..121d63f 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -35,6 +35,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_WM8753 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8900 if I2C
 	select SND_SOC_WM8903 if I2C
+	select SND_SOC_WM8960 if I2C
 	select SND_SOC_WM8971 if I2C
 	select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8990 if I2C
@@ -139,6 +140,9 @@ config SND_SOC_WM8900
 config SND_SOC_WM8903
 	tristate
 
+config SND_SOC_WM8960
+	tristate
+
 config SND_SOC_WM8971
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index a72548d..d8e15a4 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -23,6 +23,7 @@ snd-soc-wm8750-objs := wm8750.o
 snd-soc-wm8753-objs := wm8753.o
 snd-soc-wm8900-objs := wm8900.o
 snd-soc-wm8903-objs := wm8903.o
+snd-soc-wm8960-objs := wm8960.o
 snd-soc-wm8971-objs := wm8971.o
 snd-soc-wm8988-objs := wm8988.o
 snd-soc-wm8990-objs := wm8990.o
@@ -56,6 +57,7 @@ obj-$(CONFIG_SND_SOC_WM8753)	+= snd-soc-wm8753.o
 obj-$(CONFIG_SND_SOC_WM8900)	+= snd-soc-wm8900.o
 obj-$(CONFIG_SND_SOC_WM8903)	+= snd-soc-wm8903.o
 obj-$(CONFIG_SND_SOC_WM8971)	+= snd-soc-wm8971.o
+obj-$(CONFIG_SND_SOC_WM8960)	+= snd-soc-wm8960.o
 obj-$(CONFIG_SND_SOC_WM8988)	+= snd-soc-wm8988.o
 obj-$(CONFIG_SND_SOC_WM8990)	+= snd-soc-wm8990.o
 obj-$(CONFIG_SND_SOC_WM8991)	+= snd-soc-wm8991.o
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
new file mode 100644
index 0000000..e224d8a
--- /dev/null
+++ b/sound/soc/codecs/wm8960.c
@@ -0,0 +1,969 @@
+/*
+ * wm8960.c  --  WM8960 ALSA SoC Audio driver
+ *
+ * Author: Liam Girdwood
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "wm8960.h"
+
+#define AUDIO_NAME "wm8960"
+
+struct snd_soc_codec_device soc_codec_dev_wm8960;
+
+/* R25 - Power 1 */
+#define WM8960_VREF      0x40
+
+/* R28 - Anti-pop 1 */
+#define WM8960_POBCTRL   0x80
+#define WM8960_BUFDCOPEN 0x10
+#define WM8960_BUFIOEN   0x08
+#define WM8960_SOFT_ST   0x04
+#define WM8960_HPSTBY    0x01
+
+/* R29 - Anti-pop 2 */
+#define WM8960_DISOP     0x40
+
+/*
+ * wm8960 register cache
+ * We can't read the WM8960 register space when we are
+ * using 2 wire for device control, so we cache them instead.
+ */
+static const u16 wm8960_reg[WM8960_CACHEREGNUM] = {
+	0x0097, 0x0097, 0x0000, 0x0000,
+	0x0000, 0x0008, 0x0000, 0x000a,
+	0x01c0, 0x0000, 0x00ff, 0x00ff,
+	0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x007b, 0x0100, 0x0032,
+	0x0000, 0x00c3, 0x00c3, 0x01c0,
+	0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000,
+	0x0100, 0x0100, 0x0050, 0x0050,
+	0x0050, 0x0050, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0040, 0x0000,
+	0x0000, 0x0050, 0x0050, 0x0000,
+	0x0002, 0x0037, 0x004d, 0x0080,
+	0x0008, 0x0031, 0x0026, 0x00e9,
+};
+
+struct wm8960_priv {
+	u16 reg_cache[WM8960_CACHEREGNUM];
+	struct snd_soc_codec codec;
+};
+
+/*
+ * read wm8960 register cache
+ */
+static inline unsigned int wm8960_read_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg == WM8960_RESET)
+		return 0;
+	if (reg >= WM8960_CACHEREGNUM)
+		return -1;
+	return cache[reg];
+}
+
+/*
+ * write wm8960 register cache
+ */
+static inline void wm8960_write_reg_cache(struct snd_soc_codec *codec,
+	u16 reg, unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	if (reg >= WM8960_CACHEREGNUM)
+		return;
+	cache[reg] = value;
+}
+
+static inline unsigned int wm8960_read(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	return wm8960_read_reg_cache(codec, reg);
+}
+
+/*
+ * write to the WM8960 register space
+ */
+static int wm8960_write(struct snd_soc_codec *codec, unsigned int reg,
+	unsigned int value)
+{
+	u8 data[2];
+
+	/* data is
+	 *   D15..D9 WM8960 register offset
+	 *   D8...D0 register data
+	 */
+	data[0] = (reg << 1) | ((value >> 8) & 0x0001);
+	data[1] = value & 0x00ff;
+
+	wm8960_write_reg_cache(codec, reg, value);
+	if (codec->hw_write(codec->control_data, data, 2) == 2)
+		return 0;
+	else
+		return -EIO;
+}
+
+#define wm8960_reset(c)	wm8960_write(c, WM8960_RESET, 0)
+
+/* enumerated controls */
+static const char *wm8960_deemph[] = {"None", "32Khz", "44.1Khz", "48Khz"};
+static const char *wm8960_polarity[] = {"No Inversion", "Left Inverted",
+	"Right Inverted", "Stereo Inversion"};
+static const char *wm8960_3d_upper_cutoff[] = {"High", "Low"};
+static const char *wm8960_3d_lower_cutoff[] = {"Low", "High"};
+static const char *wm8960_alcfunc[] = {"Off", "Right", "Left", "Stereo"};
+static const char *wm8960_alcmode[] = {"ALC", "Limiter"};
+
+static const struct soc_enum wm8960_enum[] = {
+	SOC_ENUM_SINGLE(WM8960_DACCTL1, 1, 4, wm8960_deemph),
+	SOC_ENUM_SINGLE(WM8960_DACCTL1, 5, 4, wm8960_polarity),
+	SOC_ENUM_SINGLE(WM8960_DACCTL2, 5, 4, wm8960_polarity),
+	SOC_ENUM_SINGLE(WM8960_3D, 6, 2, wm8960_3d_upper_cutoff),
+	SOC_ENUM_SINGLE(WM8960_3D, 5, 2, wm8960_3d_lower_cutoff),
+	SOC_ENUM_SINGLE(WM8960_ALC1, 7, 4, wm8960_alcfunc),
+	SOC_ENUM_SINGLE(WM8960_ALC3, 8, 2, wm8960_alcmode),
+};
+
+static const DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 50, 0);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -12700, 50, 1);
+static const DECLARE_TLV_DB_SCALE(bypass_tlv, -2100, 300, 0);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1);
+
+static const struct snd_kcontrol_new wm8960_snd_controls[] = {
+SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL,
+		 0, 63, 0, adc_tlv),
+SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL,
+	6, 1, 0),
+SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
+	7, 1, 0),
+
+SOC_DOUBLE_R_TLV("Playback Volume", WM8960_LDAC, WM8960_RDAC,
+		 0, 255, 0, dac_tlv),
+
+SOC_DOUBLE_R_TLV("Headphone Playback Volume", WM8960_LOUT1, WM8960_ROUT1,
+		 0, 127, 0, out_tlv),
+SOC_DOUBLE_R("Headphone Playback ZC Switch", WM8960_LOUT1, WM8960_ROUT1,
+	7, 1, 0),
+
+SOC_DOUBLE_R_TLV("Speaker Playback Volume", WM8960_LOUT2, WM8960_ROUT2,
+		 0, 127, 0, out_tlv),
+SOC_DOUBLE_R("Speaker Playback ZC Switch", WM8960_LOUT2, WM8960_ROUT2,
+	7, 1, 0),
+SOC_SINGLE("Speaker DC Volume", WM8960_CLASSD3, 3, 5, 0),
+SOC_SINGLE("Speaker AC Volume", WM8960_CLASSD3, 0, 5, 0),
+
+SOC_SINGLE("PCM Playback -6dB Switch", WM8960_DACCTL1, 7, 1, 0),
+SOC_ENUM("ADC Polarity", wm8960_enum[1]),
+SOC_ENUM("Playback De-emphasis", wm8960_enum[0]),
+SOC_SINGLE("ADC High Pass Filter Switch", WM8960_DACCTL1, 0, 1, 0),
+
+SOC_ENUM("DAC Polarity", wm8960_enum[2]),
+
+SOC_ENUM("3D Filter Upper Cut-Off", wm8960_enum[3]),
+SOC_ENUM("3D Filter Lower Cut-Off", wm8960_enum[4]),
+SOC_SINGLE("3D Volume", WM8960_3D, 1, 15, 0),
+SOC_SINGLE("3D Switch", WM8960_3D, 0, 1, 0),
+
+SOC_ENUM("ALC Function", wm8960_enum[5]),
+SOC_SINGLE("ALC Max Gain", WM8960_ALC1, 4, 7, 0),
+SOC_SINGLE("ALC Target", WM8960_ALC1, 0, 15, 1),
+SOC_SINGLE("ALC Min Gain", WM8960_ALC2, 4, 7, 0),
+SOC_SINGLE("ALC Hold Time", WM8960_ALC2, 0, 15, 0),
+SOC_ENUM("ALC Mode", wm8960_enum[6]),
+SOC_SINGLE("ALC Decay", WM8960_ALC3, 4, 15, 0),
+SOC_SINGLE("ALC Attack", WM8960_ALC3, 0, 15, 0),
+
+SOC_SINGLE("Noise Gate Threshold", WM8960_NOISEG, 3, 31, 0),
+SOC_SINGLE("Noise Gate Switch", WM8960_NOISEG, 0, 1, 0),
+
+SOC_DOUBLE_R("ADC PCM Capture Volume", WM8960_LINPATH, WM8960_RINPATH,
+	0, 127, 0),
+
+SOC_SINGLE_TLV("Left Output Mixer Boost Bypass Volume",
+	       WM8960_BYPASS1, 4, 7, 1, bypass_tlv),
+SOC_SINGLE_TLV("Left Output Mixer LINPUT3 Volume",
+	       WM8960_LOUTMIX, 4, 7, 1, bypass_tlv),
+SOC_SINGLE_TLV("Right Output Mixer Boost Bypass Volume",
+	       WM8960_BYPASS2, 4, 7, 1, bypass_tlv),
+SOC_SINGLE_TLV("Right Output Mixer RINPUT3 Volume",
+	       WM8960_ROUTMIX, 4, 7, 1, bypass_tlv),
+};
+
+static const struct snd_kcontrol_new wm8960_lin_boost[] = {
+SOC_DAPM_SINGLE("LINPUT2 Switch", WM8960_LINPATH, 6, 1, 0),
+SOC_DAPM_SINGLE("LINPUT3 Switch", WM8960_LINPATH, 7, 1, 0),
+SOC_DAPM_SINGLE("LINPUT1 Switch", WM8960_LINPATH, 8, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_lin[] = {
+SOC_DAPM_SINGLE("Boost Switch", WM8960_LINPATH, 3, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_rin_boost[] = {
+SOC_DAPM_SINGLE("RINPUT2 Switch", WM8960_RINPATH, 6, 1, 0),
+SOC_DAPM_SINGLE("RINPUT3 Switch", WM8960_RINPATH, 7, 1, 0),
+SOC_DAPM_SINGLE("RINPUT1 Switch", WM8960_RINPATH, 8, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_rin[] = {
+SOC_DAPM_SINGLE("Boost Switch", WM8960_RINPATH, 3, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_loutput_mixer[] = {
+SOC_DAPM_SINGLE("PCM Playback Switch", WM8960_LOUTMIX, 8, 1, 0),
+SOC_DAPM_SINGLE("LINPUT3 Switch", WM8960_LOUTMIX, 7, 1, 0),
+SOC_DAPM_SINGLE("Boost Bypass Switch", WM8960_BYPASS1, 7, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_routput_mixer[] = {
+SOC_DAPM_SINGLE("PCM Playback Switch", WM8960_ROUTMIX, 8, 1, 0),
+SOC_DAPM_SINGLE("RINPUT3 Switch", WM8960_ROUTMIX, 7, 1, 0),
+SOC_DAPM_SINGLE("Boost Bypass Switch", WM8960_BYPASS2, 7, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8960_mono_out[] = {
+SOC_DAPM_SINGLE("Left Switch", WM8960_MONOMIX1, 7, 1, 0),
+SOC_DAPM_SINGLE("Right Switch", WM8960_MONOMIX2, 7, 1, 0),
+};
+
+static const struct snd_soc_dapm_widget wm8960_dapm_widgets[] = {
+SND_SOC_DAPM_INPUT("LINPUT1"),
+SND_SOC_DAPM_INPUT("RINPUT1"),
+SND_SOC_DAPM_INPUT("LINPUT2"),
+SND_SOC_DAPM_INPUT("RINPUT2"),
+SND_SOC_DAPM_INPUT("LINPUT3"),
+SND_SOC_DAPM_INPUT("RINPUT3"),
+
+SND_SOC_DAPM_MICBIAS("MICB", WM8960_POWER1, 1, 0),
+
+SND_SOC_DAPM_MIXER("Left Boost Mixer", WM8960_POWER1, 5, 0,
+		   wm8960_lin_boost, ARRAY_SIZE(wm8960_lin_boost)),
+SND_SOC_DAPM_MIXER("Right Boost Mixer", WM8960_POWER1, 4, 0,
+		   wm8960_rin_boost, ARRAY_SIZE(wm8960_rin_boost)),
+
+SND_SOC_DAPM_MIXER("Left Input Mixer", WM8960_POWER3, 5, 0,
+		   wm8960_lin, ARRAY_SIZE(wm8960_lin)),
+SND_SOC_DAPM_MIXER("Right Input Mixer", WM8960_POWER3, 4, 0,
+		   wm8960_rin, ARRAY_SIZE(wm8960_rin)),
+
+SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER2, 3, 0),
+SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER2, 2, 0),
+
+SND_SOC_DAPM_DAC("Left DAC", "Playback", WM8960_POWER2, 8, 0),
+SND_SOC_DAPM_DAC("Right DAC", "Playback", WM8960_POWER2, 7, 0),
+
+SND_SOC_DAPM_MIXER("Left Output Mixer", WM8960_POWER3, 3, 0,
+	&wm8960_loutput_mixer[0],
+	ARRAY_SIZE(wm8960_loutput_mixer)),
+SND_SOC_DAPM_MIXER("Right Output Mixer", WM8960_POWER3, 2, 0,
+	&wm8960_routput_mixer[0],
+	ARRAY_SIZE(wm8960_routput_mixer)),
+
+SND_SOC_DAPM_MIXER("Mono Output Mixer", WM8960_POWER2, 1, 0,
+	&wm8960_mono_out[0],
+	ARRAY_SIZE(wm8960_mono_out)),
+
+SND_SOC_DAPM_PGA("LOUT1 PGA", WM8960_POWER2, 6, 0, NULL, 0),
+SND_SOC_DAPM_PGA("ROUT1 PGA", WM8960_POWER2, 5, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("Left Speaker PGA", WM8960_POWER2, 4, 0, NULL, 0),
+SND_SOC_DAPM_PGA("Right Speaker PGA", WM8960_POWER2, 3, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA("Right Speaker Output", WM8960_CLASSD1, 7, 0, NULL, 0),
+SND_SOC_DAPM_PGA("Left Speaker Output", WM8960_CLASSD1, 6, 0, NULL, 0),
+
+SND_SOC_DAPM_OUTPUT("SPK_LP"),
+SND_SOC_DAPM_OUTPUT("SPK_LN"),
+SND_SOC_DAPM_OUTPUT("HP_L"),
+SND_SOC_DAPM_OUTPUT("HP_R"),
+SND_SOC_DAPM_OUTPUT("SPK_RP"),
+SND_SOC_DAPM_OUTPUT("SPK_RN"),
+SND_SOC_DAPM_OUTPUT("OUT3"),
+};
+
+static const struct snd_soc_dapm_route audio_paths[] = {
+	{ "Left Boost Mixer", "LINPUT1 Switch", "LINPUT1" },
+	{ "Left Boost Mixer", "LINPUT2 Switch", "LINPUT2" },
+	{ "Left Boost Mixer", "LINPUT3 Switch", "LINPUT3" },
+
+	{ "Left Input Mixer", "Boost Switch", "Left Boost Mixer", },
+	{ "Left Input Mixer", NULL, "LINPUT1", },  /* Really Boost Switch */
+	{ "Left Input Mixer", NULL, "LINPUT2" },
+	{ "Left Input Mixer", NULL, "LINPUT3" },
+
+	{ "Right Boost Mixer", "RINPUT1 Switch", "RINPUT1" },
+	{ "Right Boost Mixer", "RINPUT2 Switch", "RINPUT2" },
+	{ "Right Boost Mixer", "RINPUT3 Switch", "RINPUT3" },
+
+	{ "Right Input Mixer", "Boost Switch", "Right Boost Mixer", },
+	{ "Right Input Mixer", NULL, "RINPUT1", },  /* Really Boost Switch */
+	{ "Right Input Mixer", NULL, "RINPUT2" },
+	{ "Right Input Mixer", NULL, "LINPUT3" },
+
+	{ "Left ADC", NULL, "Left Input Mixer" },
+	{ "Right ADC", NULL, "Right Input Mixer" },
+
+	{ "Left Output Mixer", "LINPUT3 Switch", "LINPUT3" },
+	{ "Left Output Mixer", "Boost Bypass Switch", "Left Boost Mixer"} ,
+	{ "Left Output Mixer", "PCM Playback Switch", "Left DAC" },
+
+	{ "Right Output Mixer", "RINPUT3 Switch", "RINPUT3" },
+	{ "Right Output Mixer", "Boost Bypass Switch", "Right Boost Mixer" } ,
+	{ "Right Output Mixer", "PCM Playback Switch", "Right DAC" },
+
+	{ "Mono Output Mixer", "Left Switch", "Left Output Mixer" },
+	{ "Mono Output Mixer", "Right Switch", "Right Output Mixer" },
+
+	{ "LOUT1 PGA", NULL, "Left Output Mixer" },
+	{ "ROUT1 PGA", NULL, "Right Output Mixer" },
+
+	{ "HP_L", NULL, "LOUT1 PGA" },
+	{ "HP_R", NULL, "ROUT1 PGA" },
+
+	{ "Left Speaker PGA", NULL, "Left Output Mixer" },
+	{ "Right Speaker PGA", NULL, "Right Output Mixer" },
+
+	{ "Left Speaker Output", NULL, "Left Speaker PGA" },
+	{ "Right Speaker Output", NULL, "Right Speaker PGA" },
+
+	{ "SPK_LN", NULL, "Left Speaker Output" },
+	{ "SPK_LP", NULL, "Left Speaker Output" },
+	{ "SPK_RN", NULL, "Right Speaker Output" },
+	{ "SPK_RP", NULL, "Right Speaker Output" },
+
+	{ "OUT3", NULL, "Mono Output Mixer", }
+};
+
+static int wm8960_add_widgets(struct snd_soc_codec *codec)
+{
+	snd_soc_dapm_new_controls(codec, wm8960_dapm_widgets,
+				  ARRAY_SIZE(wm8960_dapm_widgets));
+
+	snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths));
+
+	snd_soc_dapm_new_widgets(codec);
+	return 0;
+}
+
+static int wm8960_set_dai_fmt(struct snd_soc_dai *codec_dai,
+		unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = 0;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		iface |= 0x0040;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* interface format */
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= 0x0002;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= 0x0001;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= 0x0003;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= 0x0013;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= 0x0090;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= 0x0080;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= 0x0010;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set iface */
+	wm8960_write(codec, WM8960_IFACE1, iface);
+	return 0;
+}
+
+static int wm8960_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 iface = wm8960_read(codec, WM8960_IFACE1) & 0xfff3;
+
+	/* bit size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= 0x0004;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= 0x0008;
+		break;
+	}
+
+	/* set iface */
+	wm8960_write(codec, WM8960_IFACE1, iface);
+	return 0;
+}
+
+static int wm8960_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u16 mute_reg = wm8960_read(codec, WM8960_DACCTL1) & 0xfff7;
+
+	if (mute)
+		wm8960_write(codec, WM8960_DACCTL1, mute_reg | 0x8);
+	else
+		wm8960_write(codec, WM8960_DACCTL1, mute_reg);
+	return 0;
+}
+
+static int wm8960_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	struct wm8960_data *pdata = codec->dev->platform_data;
+	u16 reg;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* Set VMID to 2x50k */
+		reg = wm8960_read(codec, WM8960_POWER1);
+		reg &= ~0x180;
+		reg |= 0x80;
+		wm8960_write(codec, WM8960_POWER1, reg);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* Enable anti-pop features */
+			wm8960_write(codec, WM8960_APOP1,
+				     WM8960_POBCTRL | WM8960_SOFT_ST |
+				     WM8960_BUFDCOPEN | WM8960_BUFIOEN);
+
+			/* Discharge HP output */
+			reg = WM8960_DISOP;
+			if (pdata)
+				reg |= pdata->dres << 4;
+			wm8960_write(codec, WM8960_APOP2, reg);
+
+			msleep(400);
+
+			wm8960_write(codec, WM8960_APOP2, 0);
+
+			/* Enable & ramp VMID at 2x50k */
+			reg = wm8960_read(codec, WM8960_POWER1);
+			reg |= 0x80;
+			wm8960_write(codec, WM8960_POWER1, reg);
+			msleep(100);
+
+			/* Enable VREF */
+			wm8960_write(codec, WM8960_POWER1, reg | WM8960_VREF);
+
+			/* Disable anti-pop features */
+			wm8960_write(codec, WM8960_APOP1, WM8960_BUFIOEN);
+		}
+
+		/* Set VMID to 2x250k */
+		reg = wm8960_read(codec, WM8960_POWER1);
+		reg &= ~0x180;
+		reg |= 0x100;
+		wm8960_write(codec, WM8960_POWER1, reg);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		/* Enable anti-pop features */
+		wm8960_write(codec, WM8960_APOP1,
+			     WM8960_POBCTRL | WM8960_SOFT_ST |
+			     WM8960_BUFDCOPEN | WM8960_BUFIOEN);
+
+		/* Disable VMID and VREF, let them discharge */
+		wm8960_write(codec, WM8960_POWER1, 0);
+		msleep(600);
+
+		wm8960_write(codec, WM8960_APOP1, 0);
+		break;
+	}
+
+	codec->bias_level = level;
+
+	return 0;
+}
+
+/* PLL divisors */
+struct _pll_div {
+	u32 pre_div:1;
+	u32 n:4;
+	u32 k:24;
+};
+
+/* The size in bits of the pll divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_PLL_SIZE ((1 << 24) * 10)
+
+static int pll_factors(unsigned int source, unsigned int target,
+		       struct _pll_div *pll_div)
+{
+	unsigned long long Kpart;
+	unsigned int K, Ndiv, Nmod;
+
+	pr_debug("WM8960 PLL: setting %dHz->%dHz\n", source, target);
+
+	/* Scale up target to PLL operating frequency */
+	target *= 4;
+
+	Ndiv = target / source;
+	if (Ndiv < 6) {
+		source >>= 1;
+		pll_div->pre_div = 1;
+		Ndiv = target / source;
+	} else
+		pll_div->pre_div = 0;
+
+	if ((Ndiv < 6) || (Ndiv > 12)) {
+		pr_err("WM8960 PLL: Unsupported N=%d\n", Ndiv);
+		return -EINVAL;
+	}
+
+	pll_div->n = Ndiv;
+	Nmod = target % source;
+	Kpart = FIXED_PLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, source);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	/* Check if we need to round */
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	K /= 10;
+
+	pll_div->k = K;
+
+	pr_debug("WM8960 PLL: N=%x K=%x pre_div=%d\n",
+		 pll_div->n, pll_div->k, pll_div->pre_div);
+
+	return 0;
+}
+
+static int wm8960_set_dai_pll(struct snd_soc_dai *codec_dai,
+		int pll_id, unsigned int freq_in, unsigned int freq_out)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+	static struct _pll_div pll_div;
+	int ret;
+
+	if (freq_in && freq_out) {
+		ret = pll_factors(freq_in, freq_out, &pll_div);
+		if (ret != 0)
+			return ret;
+	}
+
+	/* Disable the PLL: even if we are changing the frequency the
+	 * PLL needs to be disabled while we do so. */
+	wm8960_write(codec, WM8960_CLOCK1,
+		     wm8960_read(codec, WM8960_CLOCK1) & ~1);
+	wm8960_write(codec, WM8960_POWER2,
+		     wm8960_read(codec, WM8960_POWER2) & ~1);
+
+	if (!freq_in || !freq_out)
+		return 0;
+
+	reg = wm8960_read(codec, WM8960_PLL1) & ~0x3f;
+	reg |= pll_div.pre_div << 4;
+	reg |= pll_div.n;
+
+	if (pll_div.k) {
+		reg |= 0x20;
+
+		wm8960_write(codec, WM8960_PLL2, (pll_div.k >> 18) & 0x3f);
+		wm8960_write(codec, WM8960_PLL3, (pll_div.k >> 9) & 0x1ff);
+		wm8960_write(codec, WM8960_PLL4, pll_div.k & 0x1ff);
+	}
+	wm8960_write(codec, WM8960_PLL1, reg);
+
+	/* Turn it on */
+	wm8960_write(codec, WM8960_POWER2,
+		     wm8960_read(codec, WM8960_POWER2) | 1);
+	msleep(250);
+	wm8960_write(codec, WM8960_CLOCK1,
+		     wm8960_read(codec, WM8960_CLOCK1) | 1);
+
+	return 0;
+}
+
+static int wm8960_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
+		int div_id, int div)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+
+	switch (div_id) {
+	case WM8960_SYSCLKSEL:
+		reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1fe;
+		wm8960_write(codec, WM8960_CLOCK1, reg | div);
+		break;
+	case WM8960_SYSCLKDIV:
+		reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1f9;
+		wm8960_write(codec, WM8960_CLOCK1, reg | div);
+		break;
+	case WM8960_DACDIV:
+		reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1c7;
+		wm8960_write(codec, WM8960_CLOCK1, reg | div);
+		break;
+	case WM8960_OPCLKDIV:
+		reg = wm8960_read(codec, WM8960_PLL1) & 0x03f;
+		wm8960_write(codec, WM8960_PLL1, reg | div);
+		break;
+	case WM8960_DCLKDIV:
+		reg = wm8960_read(codec, WM8960_CLOCK2) & 0x03f;
+		wm8960_write(codec, WM8960_CLOCK2, reg | div);
+		break;
+	case WM8960_TOCLKSEL:
+		reg = wm8960_read(codec, WM8960_ADDCTL1) & 0x1fd;
+		wm8960_write(codec, WM8960_ADDCTL1, reg | div);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define WM8960_RATES SNDRV_PCM_RATE_8000_48000
+
+#define WM8960_FORMATS \
+	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+	SNDRV_PCM_FMTBIT_S24_LE)
+
+static struct snd_soc_dai_ops wm8960_dai_ops = {
+	.hw_params = wm8960_hw_params,
+	.digital_mute = wm8960_mute,
+	.set_fmt = wm8960_set_dai_fmt,
+	.set_clkdiv = wm8960_set_dai_clkdiv,
+	.set_pll = wm8960_set_dai_pll,
+};
+
+struct snd_soc_dai wm8960_dai = {
+	.name = "WM8960",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8960_RATES,
+		.formats = WM8960_FORMATS,},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8960_RATES,
+		.formats = WM8960_FORMATS,},
+	.ops = &wm8960_dai_ops,
+	.symmetric_rates = 1,
+};
+EXPORT_SYMBOL_GPL(wm8960_dai);
+
+static int wm8960_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm8960_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int wm8960_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int i;
+	u8 data[2];
+	u16 *cache = codec->reg_cache;
+
+	/* Sync reg_cache with the hardware */
+	for (i = 0; i < ARRAY_SIZE(wm8960_reg); i++) {
+		data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001);
+		data[1] = cache[i] & 0x00ff;
+		codec->hw_write(codec->control_data, data, 2);
+	}
+
+	wm8960_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	wm8960_set_bias_level(codec, codec->suspend_bias_level);
+	return 0;
+}
+
+static struct snd_soc_codec *wm8960_codec;
+
+static int wm8960_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	int ret = 0;
+
+	if (wm8960_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm8960_codec;
+	codec = wm8960_codec;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm8960_snd_controls,
+			     ARRAY_SIZE(wm8960_snd_controls));
+	wm8960_add_widgets(codec);
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+/* power down chip */
+static int wm8960_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_wm8960 = {
+	.probe = 	wm8960_probe,
+	.remove = 	wm8960_remove,
+	.suspend = 	wm8960_suspend,
+	.resume =	wm8960_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8960);
+
+static int wm8960_register(struct wm8960_priv *wm8960)
+{
+	struct wm8960_data *pdata = wm8960->codec.dev->platform_data;
+	struct snd_soc_codec *codec = &wm8960->codec;
+	int ret;
+	u16 reg;
+
+	if (wm8960_codec) {
+		dev_err(codec->dev, "Another WM8960 is registered\n");
+		return -EINVAL;
+	}
+
+	if (!pdata) {
+		dev_warn(codec->dev, "No platform data supplied\n");
+	} else {
+		if (pdata->dres > WM8960_DRES_MAX) {
+			dev_err(codec->dev, "Invalid DRES: %d\n", pdata->dres);
+			pdata->dres = 0;
+		}
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm8960;
+	codec->name = "WM8960";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8960_read_reg_cache;
+	codec->write = wm8960_write;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8960_set_bias_level;
+	codec->dai = &wm8960_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = WM8960_CACHEREGNUM;
+	codec->reg_cache = &wm8960->reg_cache;
+
+	memcpy(codec->reg_cache, wm8960_reg, sizeof(wm8960_reg));
+
+	ret = wm8960_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm8960_dai.dev = codec->dev;
+
+	wm8960_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	/* Latch the update bits */
+	reg = wm8960_read(codec, WM8960_LINVOL);
+	wm8960_write(codec, WM8960_LINVOL, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_RINVOL);
+	wm8960_write(codec, WM8960_RINVOL, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LADC);
+	wm8960_write(codec, WM8960_LADC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_RADC);
+	wm8960_write(codec, WM8960_RADC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LDAC);
+	wm8960_write(codec, WM8960_LDAC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_RDAC);
+	wm8960_write(codec, WM8960_RDAC, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LOUT1);
+	wm8960_write(codec, WM8960_LOUT1, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_ROUT1);
+	wm8960_write(codec, WM8960_ROUT1, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_LOUT2);
+	wm8960_write(codec, WM8960_LOUT2, reg | 0x100);
+	reg = wm8960_read(codec, WM8960_ROUT2);
+	wm8960_write(codec, WM8960_ROUT2, reg | 0x100);
+
+	wm8960_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm8960_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void wm8960_unregister(struct wm8960_priv *wm8960)
+{
+	wm8960_set_bias_level(&wm8960->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm8960_dai);
+	snd_soc_unregister_codec(&wm8960->codec);
+	kfree(wm8960);
+	wm8960_codec = NULL;
+}
+
+static __devinit int wm8960_i2c_probe(struct i2c_client *i2c,
+				      const struct i2c_device_id *id)
+{
+	struct wm8960_priv *wm8960;
+	struct snd_soc_codec *codec;
+
+	wm8960 = kzalloc(sizeof(struct wm8960_priv), GFP_KERNEL);
+	if (wm8960 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8960->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+
+	i2c_set_clientdata(i2c, wm8960);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm8960_register(wm8960);
+}
+
+static __devexit int wm8960_i2c_remove(struct i2c_client *client)
+{
+	struct wm8960_priv *wm8960 = i2c_get_clientdata(client);
+	wm8960_unregister(wm8960);
+	return 0;
+}
+
+static const struct i2c_device_id wm8960_i2c_id[] = {
+	{ "wm8960", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8960_i2c_id);
+
+static struct i2c_driver wm8960_i2c_driver = {
+	.driver = {
+		.name = "WM8960 I2C Codec",
+		.owner = THIS_MODULE,
+	},
+	.probe =    wm8960_i2c_probe,
+	.remove =   __devexit_p(wm8960_i2c_remove),
+	.id_table = wm8960_i2c_id,
+};
+
+static int __init wm8960_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm8960_i2c_driver);
+	if (ret != 0) {
+		printk(KERN_ERR "Failed to register WM8960 I2C driver: %d\n",
+		       ret);
+	}
+
+	return ret;
+}
+module_init(wm8960_modinit);
+
+static void __exit wm8960_exit(void)
+{
+	i2c_del_driver(&wm8960_i2c_driver);
+}
+module_exit(wm8960_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM8960 driver");
+MODULE_AUTHOR("Liam Girdwood");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm8960.h b/sound/soc/codecs/wm8960.h
new file mode 100644
index 0000000..c9af56c
--- /dev/null
+++ b/sound/soc/codecs/wm8960.h
@@ -0,0 +1,127 @@
+/*
+ * wm8960.h  --  WM8960 Soc Audio driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _WM8960_H
+#define _WM8960_H
+
+/* WM8960 register space */
+
+
+#define WM8960_CACHEREGNUM 	56
+
+#define WM8960_LINVOL		0x0
+#define WM8960_RINVOL		0x1
+#define WM8960_LOUT1		0x2
+#define WM8960_ROUT1		0x3
+#define WM8960_CLOCK1		0x4
+#define WM8960_DACCTL1		0x5
+#define WM8960_DACCTL2		0x6
+#define WM8960_IFACE1		0x7
+#define WM8960_CLOCK2		0x8
+#define WM8960_IFACE2		0x9
+#define WM8960_LDAC		0xa
+#define WM8960_RDAC		0xb
+
+#define WM8960_RESET		0xf
+#define WM8960_3D		0x10
+#define WM8960_ALC1		0x11
+#define WM8960_ALC2		0x12
+#define WM8960_ALC3		0x13
+#define WM8960_NOISEG		0x14
+#define WM8960_LADC		0x15
+#define WM8960_RADC		0x16
+#define WM8960_ADDCTL1		0x17
+#define WM8960_ADDCTL2		0x18
+#define WM8960_POWER1		0x19
+#define WM8960_POWER2		0x1a
+#define WM8960_ADDCTL3		0x1b
+#define WM8960_APOP1		0x1c
+#define WM8960_APOP2		0x1d
+
+#define WM8960_LINPATH		0x20
+#define WM8960_RINPATH		0x21
+#define WM8960_LOUTMIX		0x22
+
+#define WM8960_ROUTMIX		0x25
+#define WM8960_MONOMIX1		0x26
+#define WM8960_MONOMIX2		0x27
+#define WM8960_LOUT2		0x28
+#define WM8960_ROUT2		0x29
+#define WM8960_MONO		0x2a
+#define WM8960_INBMIX1		0x2b
+#define WM8960_INBMIX2		0x2c
+#define WM8960_BYPASS1		0x2d
+#define WM8960_BYPASS2		0x2e
+#define WM8960_POWER3		0x2f
+#define WM8960_ADDCTL4		0x30
+#define WM8960_CLASSD1		0x31
+
+#define WM8960_CLASSD3		0x33
+#define WM8960_PLL1		0x34
+#define WM8960_PLL2		0x35
+#define WM8960_PLL3		0x36
+#define WM8960_PLL4		0x37
+
+
+/*
+ * WM8960 Clock dividers
+ */
+#define WM8960_SYSCLKDIV 		0
+#define WM8960_DACDIV			1
+#define WM8960_OPCLKDIV			2
+#define WM8960_DCLKDIV			3
+#define WM8960_TOCLKSEL			4
+#define WM8960_SYSCLKSEL		5
+
+#define WM8960_SYSCLK_DIV_1		(0 << 1)
+#define WM8960_SYSCLK_DIV_2		(2 << 1)
+
+#define WM8960_SYSCLK_MCLK		(0 << 0)
+#define WM8960_SYSCLK_PLL		(1 << 0)
+
+#define WM8960_DAC_DIV_1		(0 << 3)
+#define WM8960_DAC_DIV_1_5		(1 << 3)
+#define WM8960_DAC_DIV_2		(2 << 3)
+#define WM8960_DAC_DIV_3		(3 << 3)
+#define WM8960_DAC_DIV_4		(4 << 3)
+#define WM8960_DAC_DIV_5_5		(5 << 3)
+#define WM8960_DAC_DIV_6		(6 << 3)
+
+#define WM8960_DCLK_DIV_1_5		(0 << 6)
+#define WM8960_DCLK_DIV_2		(1 << 6)
+#define WM8960_DCLK_DIV_3		(2 << 6)
+#define WM8960_DCLK_DIV_4		(3 << 6)
+#define WM8960_DCLK_DIV_6		(4 << 6)
+#define WM8960_DCLK_DIV_8		(5 << 6)
+#define WM8960_DCLK_DIV_12		(6 << 6)
+#define WM8960_DCLK_DIV_16		(7 << 6)
+
+#define WM8960_TOCLK_F19		(0 << 1)
+#define WM8960_TOCLK_F21		(1 << 1)
+
+#define WM8960_OPCLK_DIV_1		(0 << 0)
+#define WM8960_OPCLK_DIV_2		(1 << 0)
+#define WM8960_OPCLK_DIV_3		(2 << 0)
+#define WM8960_OPCLK_DIV_4		(3 << 0)
+#define WM8960_OPCLK_DIV_5_5		(4 << 0)
+#define WM8960_OPCLK_DIV_6		(5 << 0)
+
+extern struct snd_soc_dai wm8960_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8960;
+
+#define WM8960_DRES_400R 0
+#define WM8960_DRES_200R 1
+#define WM8960_DRES_600R 2
+#define WM8960_DRES_150R 3
+#define WM8960_DRES_MAX  3
+
+struct wm8960_data {
+	int dres;
+};
+
+#endif
-- 
cgit v0.10.2


From 0f3fd87ce43727d6b8573191ce89e874533b1429 Mon Sep 17 00:00:00 2001
From: Luis Henriques <henrix@sapo.pt>
Date: Mon, 13 Apr 2009 20:24:50 +0100
Subject: perf_counter: fix alignment in /proc/interrupts

Trivial fix on columns alignment in /proc/interrupts file.

Signed-off-by: Luis Henriques <henrix@sapo.pt>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090413192449.GA3920@hades.domain.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index dccaaa8..849cfab 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,7 +67,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance counter interrupts\n");
-	seq_printf(p, "PND: ");
+	seq_printf(p, "%*s: ", prec, "PND");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
 	seq_printf(p, "  Performance pending work\n");
-- 
cgit v0.10.2


From 5cda395f4a262788d8ed79ac8a26a2b821e5f751 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Mon, 13 Apr 2009 17:39:24 +0200
Subject: x86: fix function definitions after: x86: apic - introduce imcr_
 helpers

The patch "introduce imcr_ helpers" introduced good comments, but
also a few new compile warnings. This fixes the function definitions
to have a 'void' return type.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090413153924.GA20287@mailshack.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 9b849d4..4b48ff9 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -106,7 +106,7 @@ static int enabled_via_apicbase;
  * the BIOS or the operating system must switch out of
  * PIC Mode by changing the IMCR.
  */
-static inline imcr_pic_to_apic(void)
+static inline void imcr_pic_to_apic(void)
 {
 	/* select IMCR register */
 	outb(0x70, 0x22);
@@ -114,7 +114,7 @@ static inline imcr_pic_to_apic(void)
 	outb(0x01, 0x23);
 }
 
-static inline imcr_apic_to_pic(void)
+static inline void imcr_apic_to_pic(void)
 {
 	/* select IMCR register */
 	outb(0x70, 0x22);
-- 
cgit v0.10.2


From e1112b4d96859367a93468027c9635e2ac04eb3f Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 31 Mar 2009 00:48:49 -0500
Subject: tracing/filters: add run-time field descriptions to
 TRACE_EVENT_FORMAT events

This patch adds run-time field descriptions to all the event formats
exported using TRACE_EVENT_FORMAT.  It also hooks up all the tracers
that use them (i.e. the tracers in the 'ftrace subsystem') so they can
also have their output filtered by the event-filtering mechanism.

When I was testing this, there were a couple of things that fooled me
into thinking the filters weren't working, when actually they were -
I'll mention them here so others don't make the same mistakes (and file
bug reports. ;-)

One is that some of the tracers trace multiple events e.g. the
sched_switch tracer uses the context_switch and wakeup events, and if
you don't set filters on all of the traced events, the unfiltered output
from the events without filters on them can make it look like the
filtering as a whole isn't working properly, when actually it is doing
what it was asked to do - it just wasn't asked to do the right thing.

The other is that for the really high-volume tracers e.g. the function
tracer, the volume of filtered events can be so high that it pushes the
unfiltered events out of the ring buffer before they can be read so e.g.
cat'ing the trace file repeatedly shows either no output, or once in
awhile some output but that isn't there the next time you read the
trace, which isn't what you normally expect when reading the trace file.
If you read from the trace_pipe file though, you can catch them before
they disappear.

Changes from v1:

As suggested by Frederic Weisbecker:

- get rid of externs in functions
- added unlikely() to filter_check_discard()

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 7a0aa0e..9419ad1 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
 				   gfp_t gfp_flags,
 				   int node)
 {
+	struct ftrace_event_call *call = &event_kmem_alloc;
 	struct trace_array *tr = kmemtrace_array;
 	struct kmemtrace_alloc_entry *entry;
 	struct ring_buffer_event *event;
@@ -62,6 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
 	entry->gfp_flags	= gfp_flags;
 	entry->node		= node;
 
+	filter_check_discard(call, entry, event);
+
 	ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
@@ -71,6 +74,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
 				  unsigned long call_site,
 				  const void *ptr)
 {
+	struct ftrace_event_call *call = &event_kmem_free;
 	struct trace_array *tr = kmemtrace_array;
 	struct kmemtrace_free_entry *entry;
 	struct ring_buffer_event *event;
@@ -86,6 +90,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
 	entry->call_site	= call_site;
 	entry->ptr		= ptr;
 
+	filter_check_discard(call, entry, event);
+
 	ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4865459..962e617 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -898,6 +898,7 @@ trace_function(struct trace_array *tr,
 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
 	       int pc)
 {
+	struct ftrace_event_call *call = &event_function;
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
 
@@ -912,6 +913,9 @@ trace_function(struct trace_array *tr,
 	entry	= ring_buffer_event_data(event);
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
+
+	filter_check_discard(call, entry, event);
+
 	ring_buffer_unlock_commit(tr->buffer, event);
 }
 
@@ -921,6 +925,7 @@ static int __trace_graph_entry(struct trace_array *tr,
 				unsigned long flags,
 				int pc)
 {
+	struct ftrace_event_call *call = &event_funcgraph_entry;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ent_entry *entry;
 
@@ -933,6 +938,7 @@ static int __trace_graph_entry(struct trace_array *tr,
 		return 0;
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
+	filter_check_discard(call, entry, event);
 	ring_buffer_unlock_commit(global_trace.buffer, event);
 
 	return 1;
@@ -943,6 +949,7 @@ static void __trace_graph_return(struct trace_array *tr,
 				unsigned long flags,
 				int pc)
 {
+	struct ftrace_event_call *call = &event_funcgraph_exit;
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ret_entry *entry;
 
@@ -955,6 +962,7 @@ static void __trace_graph_return(struct trace_array *tr,
 		return;
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
+	filter_check_discard(call, entry, event);
 	ring_buffer_unlock_commit(global_trace.buffer, event);
 }
 #endif
@@ -973,6 +981,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 				 int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+	struct ftrace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
 	struct stack_entry *entry;
 	struct stack_trace trace;
@@ -990,6 +999,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
+	filter_check_discard(call, entry, event);
 	ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
@@ -1015,6 +1025,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 				   unsigned long flags, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+	struct ftrace_event_call *call = &event_user_stack;
 	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
 	struct stack_trace trace;
@@ -1036,6 +1047,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 	trace.entries		= entry->caller;
 
 	save_stack_trace_user(&trace);
+	filter_check_discard(call, entry, event);
 	ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
@@ -1052,6 +1064,7 @@ ftrace_trace_special(void *__tr,
 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
 		     int pc)
 {
+	struct ftrace_event_call *call = &event_special;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = __tr;
 	struct special_entry *entry;
@@ -1064,6 +1077,7 @@ ftrace_trace_special(void *__tr,
 	entry->arg1			= arg1;
 	entry->arg2			= arg2;
 	entry->arg3			= arg3;
+	filter_check_discard(call, entry, event);
 	trace_buffer_unlock_commit(tr, event, 0, pc);
 }
 
@@ -1080,6 +1094,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 			   struct task_struct *next,
 			   unsigned long flags, int pc)
 {
+	struct ftrace_event_call *call = &event_context_switch;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
@@ -1095,6 +1110,9 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_prio		= next->prio;
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
+
+	filter_check_discard(call, entry, event);
+
 	trace_buffer_unlock_commit(tr, event, flags, pc);
 }
 
@@ -1104,6 +1122,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 			   struct task_struct *curr,
 			   unsigned long flags, int pc)
 {
+	struct ftrace_event_call *call = &event_wakeup;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
@@ -1120,6 +1139,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 
+	filter_check_discard(call, entry, event);
+
 	ring_buffer_unlock_commit(tr->buffer, event);
 	ftrace_trace_stack(tr, flags, 6, pc);
 	ftrace_trace_userstack(tr, flags, pc);
@@ -1221,6 +1242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	static u32 trace_buf[TRACE_BUF_SIZE];
 
+	struct ftrace_event_call *call = &event_bprint;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
@@ -1260,6 +1282,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	entry->fmt			= fmt;
 
 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+	filter_check_discard(call, entry, event);
 	ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
@@ -1279,6 +1302,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
 	static char trace_buf[TRACE_BUF_SIZE];
 
+	struct ftrace_event_call *call = &event_print;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
@@ -1314,6 +1338,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
 	memcpy(&entry->buf, trace_buf, len);
 	entry->buf[len] = 0;
+	filter_check_discard(call, entry, event);
 	ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 34b94c3..e773728 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -866,6 +866,21 @@ extern void filter_free_subsystem_preds(struct event_subsystem *system);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
 				     struct filter_pred *pred);
 
+static inline void
+filter_check_discard(struct ftrace_event_call *call, void *rec,
+		     struct ring_buffer_event *event)
+{
+	if (unlikely(call->preds) && !filter_match_preds(call, rec))
+		ring_buffer_event_discard(event);
+}
+
+#define __common_field(type, item)					\
+	ret = trace_define_field(event_call, #type, "common_" #item,	\
+				 offsetof(typeof(field.ent), item),	\
+				 sizeof(field.ent.item));		\
+	if (ret)							\
+		return ret;
+
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
@@ -897,4 +912,9 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+	extern struct ftrace_event_call event_##call;
+#include "trace_event_types.h"
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index e6e3291..c95c25d 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -30,6 +30,7 @@ static struct trace_array *branch_tracer;
 static void
 probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 {
+	struct ftrace_event_call *call = &event_branch;
 	struct trace_array *tr = branch_tracer;
 	struct ring_buffer_event *event;
 	struct trace_branch *entry;
@@ -73,6 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 	entry->line = f->line;
 	entry->correct = val == expect;
 
+	filter_check_discard(call, entry, event);
+
 	ring_buffer_unlock_commit(tr->buffer, event);
 
  out:
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index fd78bee..95b147a 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
 TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned int, line, line)
-		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
-		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
+		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
+				    TRACE_FUNC_SIZE+1, func)
+		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
+				    TRACE_FUNC_SIZE+1, file)
 		TRACE_FIELD(char, correct, correct)
 	),
 	TP_RAW_FMT("%u:%s:%s (%u)")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 64ec4d2..be9299a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -680,6 +680,7 @@ static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
 	struct event_subsystem *system;
+	struct dentry *entry;
 
 	/* First see if we did not already create this dir */
 	list_for_each_entry(system, &event_subsystems, list) {
@@ -708,6 +709,12 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
 	system->preds = NULL;
 
+	entry = debugfs_create_file("filter", 0644, system->entry, system,
+				    &ftrace_subsystem_filter_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'%s/filter' entry\n", name);
+
 	return system->entry;
 }
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 026be41..470ad94 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -185,7 +185,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
 	}
 
 	events_for_each(call) {
-		if (!call->name || !call->regfunc)
+		if (!call->define_fields)
 			continue;
 
 		if (!strcmp(call->system, system->name))
@@ -324,7 +324,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 	events_for_each(call) {
 		int err;
 
-		if (!call->name || !call->regfunc)
+		if (!call->define_fields)
 			continue;
 
 		if (strcmp(call->system, system->name))
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 30743f7..1c94b87 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -146,13 +146,6 @@ ftrace_format_##call(struct trace_seq *s)				\
 	if (ret)							\
 		return ret;
 
-#define __common_field(type, item)					\
-	ret = trace_define_field(event_call, #type, "common_" #item,	\
-				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
-	if (ret)							\
-		return ret;
-
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 int									\
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 07a22c3..f4e4661 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -30,7 +30,7 @@
 
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd)			\
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)			\
 	ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"	\
 			       "offset:%u;\tsize:%u;\n",		\
 			       (unsigned int)offsetof(typeof(field), item), \
@@ -85,18 +85,69 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define TRACE_ENTRY	entry
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)	\
 	cmd;
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+int ftrace_define_fields_##call(void);					\
+static int ftrace_raw_init_event_##call(void);				\
 									\
-static struct ftrace_event_call __used					\
+struct ftrace_event_call __used						\
 __attribute__((__aligned__(4)))						\
 __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.id			= proto,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
 	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+};									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	return 0;							\
+}									\
+
+#include "trace_event_types.h"
+
+#undef TRACE_FIELD
+#define TRACE_FIELD(type, item, assign)					\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item));			\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SPECIAL
+#define TRACE_FIELD_SPECIAL(type, item, len, cmd)			\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item));			\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_ZERO_CHAR
+#define TRACE_FIELD_ZERO_CHAR(item)
+
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	struct args field;						\
+	int ret;							\
+									\
+	__common_field(unsigned char, type);				\
+	__common_field(unsigned char, flags);				\
+	__common_field(unsigned char, preempt_count);			\
+	__common_field(int, pid);					\
+	__common_field(int, tgid);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
 }
+
 #include "trace_event_types.h"
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 7bfdf4c..e6b275b 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -168,6 +168,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 
 void trace_hw_branch(u64 from, u64 to)
 {
+	struct ftrace_event_call *call = &event_hw_branch;
 	struct trace_array *tr = hw_branch_trace;
 	struct ring_buffer_event *event;
 	struct hw_branch_entry *entry;
@@ -194,6 +195,7 @@ void trace_hw_branch(u64 from, u64 to)
 	entry->ent.type = TRACE_HW_BRANCHES;
 	entry->from = from;
 	entry->to   = to;
+	filter_check_discard(call, entry, event);
 	trace_buffer_unlock_commit(tr, event, 0, 0);
 
  out:
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index bae791e..8ce7d7d 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type,
 
 static void probe_power_end(struct power_trace *it)
 {
+	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
@@ -54,6 +55,7 @@ static void probe_power_end(struct power_trace *it)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
+	filter_check_discard(call, entry, event);
 	trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
@@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it)
 static void probe_power_mark(struct power_trace *it, unsigned int type,
 				unsigned int level)
 {
+	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
@@ -84,6 +87,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
+	filter_check_discard(call, entry, event);
 	trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
-- 
cgit v0.10.2


From e45f2e2bd298e1ff687448e5fd15a3588b5807ec Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 31 Mar 2009 00:49:16 -0500
Subject: tracing/filters: add TRACE_EVENT_FORMAT_NOFILTER event macro

Frederic Weisbecker suggested that the trace_special event shouldn't be
filterable; this patch adds a TRACE_EVENT_FORMAT_NOFILTER event macro
that allows an event format to be exported without having a filter
attached, and removes filtering from the trace_special event.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 962e617..c209d21 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1064,7 +1064,6 @@ ftrace_trace_special(void *__tr,
 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
 		     int pc)
 {
-	struct ftrace_event_call *call = &event_special;
 	struct ring_buffer_event *event;
 	struct trace_array *tr = __tr;
 	struct special_entry *entry;
@@ -1077,7 +1076,6 @@ ftrace_trace_special(void *__tr,
 	entry->arg1			= arg1;
 	entry->arg2			= arg2;
 	entry->arg3			= arg3;
-	filter_check_discard(call, entry, event);
 	trace_buffer_unlock_commit(tr, event, 0, pc);
 }
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e773728..3cf856f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -915,6 +915,8 @@ do {									\
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
 	extern struct ftrace_event_call event_##call;
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
 #include "trace_event_types.h"
 
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 95b147a..cfcecc4 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
 	TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
 );
 
-TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
+TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned long, arg1, arg1)
 		TRACE_FIELD(unsigned long, arg2, arg2)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index f4e4661..77c494f 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -65,6 +65,22 @@ ftrace_format_##call(struct trace_seq *s)				\
 	return ret;							\
 }
 
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)				\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct args field;						\
+	int ret;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);		\
+									\
+	return ret;							\
+}
+
 #include "trace_event_types.h"
 
 #undef TRACE_ZERO_CHAR
@@ -109,6 +125,19 @@ static int ftrace_raw_init_event_##call(void)				\
 	return 0;							\
 }									\
 
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)				\
+									\
+struct ftrace_event_call __used						\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.id			= proto,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.show_format		= ftrace_format_##call,			\
+};
+
 #include "trace_event_types.h"
 
 #undef TRACE_FIELD
@@ -150,4 +179,8 @@ ftrace_define_fields_##call(void)					\
 	return ret;							\
 }
 
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
+				    tpfmt)
+
 #include "trace_event_types.h"
-- 
cgit v0.10.2


From fa1b47dd85453ec7d4bcfe4aa4a2d172ba452fc3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 2 Apr 2009 00:09:41 -0400
Subject: ring-buffer: add ring_buffer_discard_commit

The ring_buffer_discard_commit is similar to ring_buffer_event_discard
but it can only be done on an event that has yet to be commited.
Unpredictable results can happen otherwise.

The main difference between ring_buffer_discard_commit and
ring_buffer_event_discard is that ring_buffer_discard_commit will try
to free the data in the ring buffer if nothing has addded data
after the reserved event. If something did, then it acts almost the
same as ring_buffer_event_discard followed by a
ring_buffer_unlock_commit.

Note, either ring_buffer_commit_discard and ring_buffer_unlock_commit
can be called on an event, not both.

This commit also exports both discard functions to be usable by
GPL modules.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e1b7b21..f0aa486 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -68,9 +68,38 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
 	return event->time_delta;
 }
 
+/*
+ * ring_buffer_event_discard can discard any event in the ring buffer.
+ *   it is up to the caller to protect against a reader from
+ *   consuming it or a writer from wrapping and replacing it.
+ *
+ * No external protection is needed if this is called before
+ * the event is commited. But in that case it would be better to
+ * use ring_buffer_discard_commit.
+ *
+ * Note, if an event that has not been committed is discarded
+ * with ring_buffer_event_discard, it must still be committed.
+ */
 void ring_buffer_event_discard(struct ring_buffer_event *event);
 
 /*
+ * ring_buffer_discard_commit will remove an event that has not
+ *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+ *   must not be called on the discarded event. This function
+ *   will try to remove the event from the ring buffer completely
+ *   if another event has not been written after it.
+ *
+ * Example use:
+ *
+ *  if (some_condition)
+ *    ring_buffer_discard_commit(buffer, event);
+ *  else
+ *    ring_buffer_unlock_commit(buffer, event);
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event);
+
+/*
  * size is in bytes for each per CPU buffer.
  */
 struct ring_buffer *
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 74a1180..f935bd5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -205,27 +205,6 @@ static void rb_event_set_padding(struct ring_buffer_event *event)
 	event->time_delta = 0;
 }
 
-/**
- * ring_buffer_event_discard - discard an event in the ring buffer
- * @buffer: the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-	event->type = RINGBUF_TYPE_PADDING;
-	/* time delta must be non zero */
-	if (!event->time_delta)
-		event->time_delta = 1;
-}
-
 static unsigned
 rb_event_data_length(struct ring_buffer_event *event)
 {
@@ -1571,6 +1550,110 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 /**
+ * ring_buffer_event_discard - discard any event in the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
+{
+	event->type = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	if (!event->time_delta)
+		event->time_delta = 1;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
+
+/**
+ * ring_buffer_commit_discard - discard an event that has not been committed
+ * @buffer: the ring buffer
+ * @event: non committed event to discard
+ *
+ * This is similar to ring_buffer_event_discard but must only be
+ * performed on an event that has not been committed yet. The difference
+ * is that this will also try to free the event from the ring buffer
+ * if another event has not been added behind it.
+ *
+ * If another event has been added behind it, it will set the event
+ * up as discarded, and perform the commit.
+ *
+ * If this function is called, do not call ring_buffer_unlock_commit on
+ * the event.
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long new_index, old_index;
+	struct buffer_page *bpage;
+	unsigned long index;
+	unsigned long addr;
+	int cpu;
+
+	/* The event is discarded regardless */
+	ring_buffer_event_discard(event);
+
+	/*
+	 * This must only be called if the event has not been
+	 * committed yet. Thus we can assume that preemption
+	 * is still disabled.
+	 */
+	RB_WARN_ON(buffer, !preempt_count());
+
+	cpu = smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+
+	new_index = rb_event_index(event);
+	old_index = new_index + rb_event_length(event);
+	addr = (unsigned long)event;
+	addr &= PAGE_MASK;
+
+	bpage = cpu_buffer->tail_page;
+
+	if (bpage == (void *)addr && rb_page_write(bpage) == old_index) {
+		/*
+		 * This is on the tail page. It is possible that
+		 * a write could come in and move the tail page
+		 * and write to the next page. That is fine
+		 * because we just shorten what is on this page.
+		 */
+		index = local_cmpxchg(&bpage->write, old_index, new_index);
+		if (index == old_index)
+			goto out;
+	}
+
+	/*
+	 * The commit is still visible by the reader, so we
+	 * must increment entries.
+	 */
+	cpu_buffer->entries++;
+ out:
+	/*
+	 * If a write came in and pushed the tail page
+	 * we still need to update the commit pointer
+	 * if we were the commit.
+	 */
+	if (rb_is_commit(cpu_buffer, event))
+		rb_set_commit_to_write(cpu_buffer);
+
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1)
+		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+	else
+		preempt_enable_no_resched_notrace();
+
+}
+EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
+
+/**
  * ring_buffer_write - write data to the buffer without reserving
  * @buffer: The ring buffer to write to.
  * @length: The length of the data being written (excluding the event header)
-- 
cgit v0.10.2


From 77d9f465d46fd67cdb82ee5e1ab99dd57a17c486 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 2 Apr 2009 01:16:59 -0400
Subject: tracing/filters: use ring_buffer_discard_commit for discarded events

The ring_buffer_discard_commit makes better usage of the ring_buffer
when an event has been discarded. It tries to remove it completely if
possible.

This patch converts the trace event filtering to use
ring_buffer_discard_commit instead of the ring_buffer_event_discard.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c209d21..d880ab2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -884,13 +884,18 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
 }
 
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+}
+
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+{
+	ring_buffer_discard_commit(global_trace.buffer, event);
 }
 
 void
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3cf856f..dfefffd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -497,6 +497,7 @@ void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc);
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc);
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
 
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 9d2fa78..d2f34bf 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -223,9 +223,9 @@ static void ftrace_raw_event_##call(proto)				\
 	assign;								\
 									\
 	if (call->preds && !filter_match_preds(call, entry))		\
-		ring_buffer_event_discard(event);			\
-									\
-	trace_nowake_buffer_unlock_commit(event, irq_flags, pc);	\
+		trace_current_buffer_discard_commit(event);		\
+	else								\
+		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
 									\
 }									\
 									\
-- 
cgit v0.10.2


From 5f77a88b3f8268b11940b51d2e03d26a663ceb90 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 8 Apr 2009 03:14:01 -0500
Subject: tracing/infrastructure: separate event tracer from event support

Add a new config option, CONFIG_EVENT_TRACING that gets selected
when CONFIG_TRACING is selected and adds everything needed by the stuff
in trace_export - basically all the event tracing support needed by e.g.
bprint, minus the actual events, which are only included if
CONFIG_EVENT_TRACER is selected.

So CONFIG_EVENT_TRACER can be used to turn on or off the generated events
(what I think of as the 'event tracer'), while CONFIG_EVENT_TRACING turns
on or off the base event tracing support used by both the event tracer and
the other things such as bprint that can't be configured out.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
LKML-Reference: <1239178441.10295.34.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7fa660f..7e9b1e9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -61,7 +61,7 @@
 #define BRANCH_PROFILE()
 #endif
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
 			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 23b96eb..644606e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -48,6 +48,9 @@ config FTRACE_NMI_ENTER
        depends on HAVE_FTRACE_NMI_ENTER
        default y
 
+config EVENT_TRACING
+	bool
+
 config TRACING
 	bool
 	select DEBUG_FS
@@ -56,6 +59,7 @@ config TRACING
 	select TRACEPOINTS
 	select NOP_TRACER
 	select BINARY_PRINTF
+	select EVENT_TRACING
 
 #
 # Minimum requirements an architecture has to meet for us to
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f51..3ad367e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -40,11 +40,11 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACER) += events.o
-obj-$(CONFIG_EVENT_TRACER) += trace_export.o
+obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 
 libftrace-y := ftrace.o
-- 
cgit v0.10.2


From eb02ce017dd83985041a7e54c6449f92d53b026f Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 8 Apr 2009 03:15:54 -0500
Subject: tracing/filters: use ring_buffer_discard_commit() in
 filter_check_discard()

This patch changes filter_check_discard() to make use of the new
ring_buffer_discard_commit() function and modifies the current users to
call the old commit function in the non-discard case.

It also introduces a version of filter_check_discard() that uses the
global trace buffer (filter_current_check_discard()) for those cases.

v2 changes:

- fix compile error noticed by Ingo Molnar

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
LKML-Reference: <1239178554.10295.36.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 9419ad1..86cdf67 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -63,9 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
 	entry->gfp_flags	= gfp_flags;
 	entry->node		= node;
 
-	filter_check_discard(call, entry, event);
-
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
 }
@@ -90,9 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
 	entry->call_site	= call_site;
 	entry->ptr		= ptr;
 
-	filter_check_discard(call, entry, event);
-
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 	trace_wake_up();
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d880ab2..c0047fc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -171,6 +171,12 @@ static struct trace_array	global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
+int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+				 struct ring_buffer_event *event)
+{
+	return filter_check_discard(call, rec, global_trace.buffer, event);
+}
+
 cycle_t ftrace_now(int cpu)
 {
 	u64 ts;
@@ -919,9 +925,8 @@ trace_function(struct trace_array *tr,
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	filter_check_discard(call, entry, event);
-
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -943,8 +948,8 @@ static int __trace_graph_entry(struct trace_array *tr,
 		return 0;
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
-	filter_check_discard(call, entry, event);
-	ring_buffer_unlock_commit(global_trace.buffer, event);
+	if (!filter_current_check_discard(call, entry, event))
+		ring_buffer_unlock_commit(global_trace.buffer, event);
 
 	return 1;
 }
@@ -967,8 +972,8 @@ static void __trace_graph_return(struct trace_array *tr,
 		return;
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
-	filter_check_discard(call, entry, event);
-	ring_buffer_unlock_commit(global_trace.buffer, event);
+	if (!filter_current_check_discard(call, entry, event))
+		ring_buffer_unlock_commit(global_trace.buffer, event);
 }
 #endif
 
@@ -1004,8 +1009,8 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
-	filter_check_discard(call, entry, event);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1052,8 +1057,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 	trace.entries		= entry->caller;
 
 	save_stack_trace_user(&trace);
-	filter_check_discard(call, entry, event);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1114,9 +1119,8 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
 
-	filter_check_discard(call, entry, event);
-
-	trace_buffer_unlock_commit(tr, event, flags, pc);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, flags, pc);
 }
 
 void
@@ -1142,9 +1146,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 
-	filter_check_discard(call, entry, event);
-
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 	ftrace_trace_stack(tr, flags, 6, pc);
 	ftrace_trace_userstack(tr, flags, pc);
 }
@@ -1285,8 +1288,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	entry->fmt			= fmt;
 
 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-	filter_check_discard(call, entry, event);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
@@ -1341,8 +1344,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
 	memcpy(&entry->buf, trace_buf, len);
 	entry->buf[len] = 0;
-	filter_check_discard(call, entry, event);
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dfefffd..9729d14 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -866,13 +866,21 @@ extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern void filter_free_subsystem_preds(struct event_subsystem *system);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
 				     struct filter_pred *pred);
+extern int filter_current_check_discard(struct ftrace_event_call *call,
+					void *rec,
+					struct ring_buffer_event *event);
 
-static inline void
+static inline int
 filter_check_discard(struct ftrace_event_call *call, void *rec,
+		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->preds) && !filter_match_preds(call, rec))
-		ring_buffer_event_discard(event);
+	if (unlikely(call->preds) && !filter_match_preds(call, rec)) {
+		ring_buffer_discard_commit(buffer, event);
+		return 1;
+	}
+
+	return 0;
 }
 
 #define __common_field(type, item)					\
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index c95c25d..8e64e60 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -74,9 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 	entry->line = f->line;
 	entry->correct = val == expect;
 
-	filter_check_discard(call, entry, event);
-
-	ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		ring_buffer_unlock_commit(tr->buffer, event);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index d2f34bf..b2b2982 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -222,11 +222,8 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 	assign;								\
 									\
-	if (call->preds && !filter_match_preds(call, entry))		\
-		trace_current_buffer_discard_commit(event);		\
-	else								\
+	if (!filter_current_check_discard(call, entry, event))		\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
-									\
 }									\
 									\
 static int ftrace_raw_reg_event_##call(void)				\
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index e6b275b..8683d50 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -195,8 +195,8 @@ void trace_hw_branch(u64 from, u64 to)
 	entry->ent.type = TRACE_HW_BRANCHES;
 	entry->from = from;
 	entry->to   = to;
-	filter_check_discard(call, entry, event);
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
 
  out:
 	atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 8ce7d7d..810a5b7 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -55,8 +55,8 @@ static void probe_power_end(struct power_trace *it)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	filter_check_discard(call, entry, event);
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
 }
@@ -87,8 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	filter_check_discard(call, entry, event);
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, tr->buffer, event))
+		trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
 	preempt_enable();
 }
-- 
cgit v0.10.2


From 0a19e53c1514ad8e9c3cbab40c6c3f52c86f403d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Mon, 13 Apr 2009 03:17:50 -0500
Subject: tracing/filters: allow on-the-fly filter switching

This patch allows event filters to be safely removed or switched
on-the-fly while avoiding the use of rcu or the suspension of tracing of
previous versions.

It does it by adding a new filter_pred_none() predicate function which
does nothing and by never deallocating either the predicates or any of
the filter_pred members used in matching; the predicate lists are
allocated and initialized during ftrace_event_calls initialization.

Whenever a filter is removed or replaced, the filter_pred_* functions
currently in use by the affected ftrace_event_call are immediately
switched over to to the filter_pred_none() function, while the rest of
the filter_pred members are left intact, allowing any currently
executing filter_pred_* functions to finish up, using the values they're
currently using.

In the case of filter replacement, the new predicate values are copied
into the old predicates after the above step, and the filter_pred_none()
functions are replaced by the filter_pred_* functions for the new
filter.  In this case, it is possible though very unlikely that a
previous filter_pred_* is still running even after the
filter_pred_none() switch and the switch to the new filter_pred_*.  In
that case, however, because nothing has been deallocated in the
filter_pred, the worst that can happen is that the old filter_pred_*
function sees the new values and as a result produces either a false
positive or a false negative, depending on the values it finds.

So one downside to this method is that rarely, it can produce a bad
match during the filter switch, but it should be possible to live with
that, IMHO.

The other downside is that at least in this patch the predicate lists
are always pre-allocated, taking up memory from the start.  They could
probably be allocated on first-use, and de-allocated when tracing is
completely stopped - if this patch makes sense, I could create another
one to do that later on.

Oh, and it also places a restriction on the size of __arrays in events,
currently set to 128, since they can't be larger than the now embedded
str_val arrays in the filter_pred struct.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: paulmck@linux.vnet.ibm.com
LKML-Reference: <1239610670.6660.49.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9729d14..b05b6ac 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -813,6 +813,7 @@ struct ftrace_event_call {
 	int			(*show_format)(struct trace_seq *s);
 	int			(*define_fields)(void);
 	struct list_head	fields;
+	int			n_preds;
 	struct filter_pred	**preds;
 
 #ifdef CONFIG_EVENT_PROFILE
@@ -826,6 +827,7 @@ struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
 	struct dentry		*entry;
+	int			n_preds;
 	struct filter_pred	**preds;
 };
 
@@ -834,7 +836,8 @@ struct event_subsystem {
 	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
 	     event++)
 
-#define MAX_FILTER_PRED 8
+#define MAX_FILTER_PRED		8
+#define MAX_FILTER_STR_VAL	128
 
 struct filter_pred;
 
@@ -843,7 +846,7 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
 struct filter_pred {
 	filter_pred_fn_t fn;
 	u64 val;
-	char *str_val;
+	char str_val[MAX_FILTER_STR_VAL];
 	int str_len;
 	char *field_name;
 	int offset;
@@ -855,13 +858,14 @@ struct filter_pred {
 
 int trace_define_field(struct ftrace_event_call *call, char *type,
 		       char *name, int offset, int size);
+extern int init_preds(struct ftrace_event_call *call);
 extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct filter_pred **preds,
+extern void filter_print_preds(struct filter_pred **preds, int n_preds,
 			       struct trace_seq *s);
 extern int filter_parse(char **pbuf, struct filter_pred *pred);
 extern int filter_add_pred(struct ftrace_event_call *call,
 			   struct filter_pred *pred);
-extern void filter_free_preds(struct ftrace_event_call *call);
+extern void filter_disable_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern void filter_free_subsystem_preds(struct event_subsystem *system);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
@@ -875,7 +879,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->preds) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->n_preds) && !filter_match_preds(call, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 789e14e..ead68ac 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -481,7 +481,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_preds(call->preds, s);
+	filter_print_preds(call->preds, call->n_preds, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -516,7 +516,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	}
 
 	if (pred->clear) {
-		filter_free_preds(call);
+		filter_disable_preds(call);
 		filter_free_pred(pred);
 		return cnt;
 	}
@@ -527,6 +527,8 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		return err;
 	}
 
+	filter_free_pred(pred);
+
 	*ppos += cnt;
 
 	return cnt;
@@ -549,7 +551,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_preds(system->preds, s);
+	filter_print_preds(system->preds, system->n_preds, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -712,6 +714,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 	list_add(&system->list, &event_subsystems);
 
 	system->preds = NULL;
+	system->n_preds = 0;
 
 	entry = debugfs_create_file("filter", 0644, system->entry, system,
 				    &ftrace_subsystem_filter_fops);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 9f8ecca..de42dad 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -82,25 +82,27 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
 	return match;
 }
 
+static int filter_pred_none(struct filter_pred *pred, void *event)
+{
+	return 0;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
 	int i, matched, and_failed = 0;
 	struct filter_pred *pred;
 
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (call->preds[i]) {
-			pred = call->preds[i];
-			if (and_failed && !pred->or)
-				continue;
-			matched = pred->fn(pred, rec);
-			if (!matched && !pred->or) {
-				and_failed = 1;
-				continue;
-			} else if (matched && pred->or)
-				return 1;
-		} else
-			break;
+	for (i = 0; i < call->n_preds; i++) {
+		pred = call->preds[i];
+		if (and_failed && !pred->or)
+			continue;
+		matched = pred->fn(pred, rec);
+		if (!matched && !pred->or) {
+			and_failed = 1;
+			continue;
+		} else if (matched && pred->or)
+			return 1;
 	}
 
 	if (and_failed)
@@ -109,31 +111,29 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec)
 	return 1;
 }
 
-void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
+void filter_print_preds(struct filter_pred **preds, int n_preds,
+			struct trace_seq *s)
 {
 	char *field_name;
 	struct filter_pred *pred;
 	int i;
 
-	if (!preds) {
+	if (!n_preds) {
 		trace_seq_printf(s, "none\n");
 		return;
 	}
 
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (preds[i]) {
-			pred = preds[i];
-			field_name = pred->field_name;
-			if (i)
-				trace_seq_printf(s, pred->or ? "|| " : "&& ");
-			trace_seq_printf(s, "%s ", field_name);
-			trace_seq_printf(s, pred->not ? "!= " : "== ");
-			if (pred->str_val)
-				trace_seq_printf(s, "%s\n", pred->str_val);
-			else
-				trace_seq_printf(s, "%llu\n", pred->val);
-		} else
-			break;
+	for (i = 0; i < n_preds; i++) {
+		pred = preds[i];
+		field_name = pred->field_name;
+		if (i)
+			trace_seq_printf(s, pred->or ? "|| " : "&& ");
+		trace_seq_printf(s, "%s ", field_name);
+		trace_seq_printf(s, pred->not ? "!= " : "== ");
+		if (pred->str_len)
+			trace_seq_printf(s, "%s\n", pred->str_val);
+		else
+			trace_seq_printf(s, "%llu\n", pred->val);
 	}
 }
 
@@ -156,20 +156,69 @@ void filter_free_pred(struct filter_pred *pred)
 		return;
 
 	kfree(pred->field_name);
-	kfree(pred->str_val);
 	kfree(pred);
 }
 
-void filter_free_preds(struct ftrace_event_call *call)
+static void filter_clear_pred(struct filter_pred *pred)
+{
+	kfree(pred->field_name);
+	pred->field_name = NULL;
+	pred->str_len = 0;
+}
+
+static int filter_set_pred(struct filter_pred *dest,
+			   struct filter_pred *src,
+			   filter_pred_fn_t fn)
+{
+	*dest = *src;
+	dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
+	if (!dest->field_name)
+		return -ENOMEM;
+	dest->fn = fn;
+
+	return 0;
+}
+
+void filter_disable_preds(struct ftrace_event_call *call)
 {
 	int i;
 
-	if (call->preds) {
-		for (i = 0; i < MAX_FILTER_PRED; i++)
+	call->n_preds = 0;
+
+	for (i = 0; i < MAX_FILTER_PRED; i++)
+		call->preds[i]->fn = filter_pred_none;
+}
+
+int init_preds(struct ftrace_event_call *call)
+{
+	struct filter_pred *pred;
+	int i;
+
+	call->n_preds = 0;
+
+	call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
+	if (!call->preds)
+		return -ENOMEM;
+
+	for (i = 0; i < MAX_FILTER_PRED; i++) {
+		pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+		if (!pred)
+			goto oom;
+		pred->fn = filter_pred_none;
+		call->preds[i] = pred;
+	}
+
+	return 0;
+
+oom:
+	for (i = 0; i < MAX_FILTER_PRED; i++) {
+		if (call->preds[i])
 			filter_free_pred(call->preds[i]);
-		kfree(call->preds);
-		call->preds = NULL;
 	}
+	kfree(call->preds);
+	call->preds = NULL;
+
+	return -ENOMEM;
 }
 
 void filter_free_subsystem_preds(struct event_subsystem *system)
@@ -177,11 +226,12 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
 	struct ftrace_event_call *call = __start_ftrace_events;
 	int i;
 
-	if (system->preds) {
-		for (i = 0; i < MAX_FILTER_PRED; i++)
+	if (system->n_preds) {
+		for (i = 0; i < system->n_preds; i++)
 			filter_free_pred(system->preds[i]);
 		kfree(system->preds);
 		system->preds = NULL;
+		system->n_preds = 0;
 	}
 
 	events_for_each(call) {
@@ -189,33 +239,31 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
 			continue;
 
 		if (!strcmp(call->system, system->name))
-			filter_free_preds(call);
+			filter_disable_preds(call);
 	}
 }
 
 static int __filter_add_pred(struct ftrace_event_call *call,
-			     struct filter_pred *pred)
+			     struct filter_pred *pred,
+			     filter_pred_fn_t fn)
 {
-	int i;
+	int idx, err;
 
-	if (call->preds && !pred->compound)
-		filter_free_preds(call);
+	if (call->n_preds && !pred->compound)
+		filter_disable_preds(call);
 
-	if (!call->preds) {
-		call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
-				      GFP_KERNEL);
-		if (!call->preds)
-			return -ENOMEM;
-	}
+	if (call->n_preds == MAX_FILTER_PRED)
+		return -ENOSPC;
 
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (!call->preds[i]) {
-			call->preds[i] = pred;
-			return 0;
-		}
-	}
+	idx = call->n_preds;
+	filter_clear_pred(call->preds[idx]);
+	err = filter_set_pred(call->preds[idx], pred, fn);
+	if (err)
+		return err;
+
+	call->n_preds++;
 
-	return -ENOSPC;
+	return 0;
 }
 
 static int is_string_field(const char *type)
@@ -229,98 +277,66 @@ static int is_string_field(const char *type)
 int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 {
 	struct ftrace_event_field *field;
+	filter_pred_fn_t fn;
 
 	field = find_event_field(call, pred->field_name);
 	if (!field)
 		return -EINVAL;
 
+	pred->fn = filter_pred_none;
 	pred->offset = field->offset;
 
 	if (is_string_field(field->type)) {
-		if (!pred->str_val)
+		if (!pred->str_len)
 			return -EINVAL;
-		pred->fn = filter_pred_string;
+		fn = filter_pred_string;
 		pred->str_len = field->size;
-		return __filter_add_pred(call, pred);
+		return __filter_add_pred(call, pred, fn);
 	} else {
-		if (pred->str_val)
+		if (pred->str_len)
 			return -EINVAL;
 	}
 
 	switch (field->size) {
 	case 8:
-		pred->fn = filter_pred_64;
+		fn = filter_pred_64;
 		break;
 	case 4:
-		pred->fn = filter_pred_32;
+		fn = filter_pred_32;
 		break;
 	case 2:
-		pred->fn = filter_pred_16;
+		fn = filter_pred_16;
 		break;
 	case 1:
-		pred->fn = filter_pred_8;
+		fn = filter_pred_8;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	return __filter_add_pred(call, pred);
-}
-
-static struct filter_pred *copy_pred(struct filter_pred *pred)
-{
-	struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
-	if (!new_pred)
-		return NULL;
-
-	memcpy(new_pred, pred, sizeof(*pred));
-
-	if (pred->field_name) {
-		new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-		if (!new_pred->field_name) {
-			kfree(new_pred);
-			return NULL;
-		}
-	}
-
-	if (pred->str_val) {
-		new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
-		if (!new_pred->str_val) {
-			filter_free_pred(new_pred);
-			return NULL;
-		}
-	}
-
-	return new_pred;
+	return __filter_add_pred(call, pred, fn);
 }
 
 int filter_add_subsystem_pred(struct event_subsystem *system,
 			      struct filter_pred *pred)
 {
 	struct ftrace_event_call *call = __start_ftrace_events;
-	struct filter_pred *event_pred;
-	int i;
 
-	if (system->preds && !pred->compound)
+	if (system->n_preds && !pred->compound)
 		filter_free_subsystem_preds(system);
 
-	if (!system->preds) {
+	if (!system->n_preds) {
 		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
 					GFP_KERNEL);
 		if (!system->preds)
 			return -ENOMEM;
 	}
 
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (!system->preds[i]) {
-			system->preds[i] = pred;
-			break;
-		}
-	}
-
-	if (i == MAX_FILTER_PRED)
+	if (system->n_preds == MAX_FILTER_PRED)
 		return -ENOSPC;
 
+	system->preds[system->n_preds] = pred;
+
 	events_for_each(call) {
 		int err;
 
@@ -333,22 +349,16 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 		if (!find_event_field(call, pred->field_name))
 			continue;
 
-		event_pred = copy_pred(pred);
-		if (!event_pred)
-			goto oom;
-
-		err = filter_add_pred(call, event_pred);
-		if (err)
-			filter_free_pred(event_pred);
-		if (err == -ENOMEM)
-			goto oom;
+		err = filter_add_pred(call, pred);
+		if (err == -ENOMEM) {
+			system->preds[system->n_preds] = NULL;
+			return err;
+		}
 	}
 
-	return 0;
+	system->n_preds++;
 
-oom:
-	system->preds[i] = NULL;
-	return -ENOMEM;
+	return 0;
 }
 
 int filter_parse(char **pbuf, struct filter_pred *pred)
@@ -410,7 +420,8 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
 		}
 	}
 
-	if (!val_str) {
+	if (!val_str || !strlen(val_str)
+	    || strlen(val_str) >= MAX_FILTER_STR_VAL) {
 		pred->field_name = NULL;
 		return -EINVAL;
 	}
@@ -419,11 +430,12 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
 	if (!pred->field_name)
 		return -ENOMEM;
 
+	pred->str_len = 0;
 	pred->val = simple_strtoull(val_str, &tmp, 0);
 	if (tmp == val_str) {
-		pred->str_val = kstrdup(val_str, GFP_KERNEL);
-		if (!pred->str_val)
-			return -ENOMEM;
+		strncpy(pred->str_val, val_str, MAX_FILTER_STR_VAL);
+		pred->str_len = strlen(val_str);
+		pred->str_val[pred->str_len] = '\0';
 	} else if (*tmp != '\0')
 		return -EINVAL;
 
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 02fb710..59cfd7d 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -140,6 +140,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 
 #undef __array
 #define __array(type, item, len)					\
+	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item));			\
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index b2b2982..5bb1b7f 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -255,6 +255,7 @@ static int ftrace_raw_init_event_##call(void)				\
 		return -ENODEV;						\
 	event_##call.id = id;						\
 	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
 	return 0;							\
 }									\
 									\
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 77c494f..48fc02f 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -122,6 +122,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 static int ftrace_raw_init_event_##call(void)				\
 {									\
 	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
 	return 0;							\
 }									\
 
-- 
cgit v0.10.2


From 6e837fb152410e571a81aaadbd9884f0bc46a55e Mon Sep 17 00:00:00 2001
From: Etienne Basset <etienne.basset@numericable.fr>
Date: Wed, 8 Apr 2009 20:39:40 +0200
Subject: smack: implement logging V3

This patch creates auditing functions usable by LSM to audit security
events. It provides standard dumping of FS, NET, task etc ... events
(code borrowed from SELinux)
and provides 2 callbacks to define LSM specific auditing, which should be
flexible enough to convert SELinux too.

Signed-off-by: Etienne Basset <etienne.basset@numericable.fr>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
cked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
new file mode 100644
index 0000000..e461b2c
--- /dev/null
+++ b/include/linux/lsm_audit.h
@@ -0,0 +1,111 @@
+/*
+ * Common LSM logging functions
+ * Heavily borrowed from selinux/avc.h
+ *
+ * Author : Etienne BASSET  <etienne.basset@ensta.org>
+ *
+ * All credits to : Stephen Smalley, <sds@epoch.ncsc.mil>
+ * All BUGS to : Etienne BASSET  <etienne.basset@ensta.org>
+ */
+#ifndef _LSM_COMMON_LOGGING_
+#define _LSM_COMMON_LOGGING_
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/audit.h>
+#include <linux/in6.h>
+#include <linux/path.h>
+#include <linux/key.h>
+#include <linux/skbuff.h>
+#include <asm/system.h>
+
+
+/* Auxiliary data to use in generating the audit record. */
+struct common_audit_data {
+	char    type;
+#define LSM_AUDIT_DATA_FS      1
+#define LSM_AUDIT_DATA_NET     2
+#define LSM_AUDIT_DATA_CAP     3
+#define LSM_AUDIT_DATA_IPC     4
+#define LSM_AUDIT_DATA_TASK    5
+#define LSM_AUDIT_DATA_KEY     6
+	struct task_struct *tsk;
+	union 	{
+		struct {
+			struct path path;
+			struct inode *inode;
+		} fs;
+		struct {
+			int netif;
+			struct sock *sk;
+			u16 family;
+			__be16 dport;
+			__be16 sport;
+			union {
+				struct {
+					__be32 daddr;
+					__be32 saddr;
+				} v4;
+				struct {
+					struct in6_addr daddr;
+					struct in6_addr saddr;
+				} v6;
+			} fam;
+		} net;
+		int cap;
+		int ipc_id;
+		struct task_struct *tsk;
+#ifdef CONFIG_KEYS
+		struct {
+			key_serial_t key;
+			char *key_desc;
+		} key_struct;
+#endif
+	} u;
+	const char *function;
+	/* this union contains LSM specific data */
+	union {
+		/* SMACK data */
+		struct smack_audit_data {
+			char *subject;
+			char *object;
+			char *request;
+			int result;
+		} smack_audit_data;
+		/* SELinux data */
+		struct {
+			u32 ssid;
+			u32 tsid;
+			u16 tclass;
+			u32 requested;
+			u32 audited;
+			struct av_decision *avd;
+			int result;
+		} selinux_audit_data;
+	} lsm_priv;
+	/* these callback will be implemented by a specific LSM */
+	void (*lsm_pre_audit)(struct audit_buffer *, void *);
+	void (*lsm_post_audit)(struct audit_buffer *, void *);
+};
+
+#define v4info fam.v4
+#define v6info fam.v6
+
+int ipv4_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto);
+
+int ipv6_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto);
+
+/* Initialize an LSM audit data structure. */
+#define COMMON_AUDIT_DATA_INIT(_d, _t) \
+	{ memset((_d), 0, sizeof(struct common_audit_data)); \
+	 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+
+void common_lsm_audit(struct common_audit_data *a);
+
+#endif
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
new file mode 100644
index 0000000..94b8684
--- /dev/null
+++ b/security/lsm_audit.c
@@ -0,0 +1,386 @@
+/*
+ * common LSM auditing functions
+ *
+ * Based on code written for SELinux by :
+ *			Stephen Smalley, <sds@epoch.ncsc.mil>
+ * 			James Morris <jmorris@redhat.com>
+ * Author : Etienne Basset, <etienne.basset@ensta.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <linux/un.h>
+#include <net/af_unix.h>
+#include <linux/audit.h>
+#include <linux/ipv6.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <linux/lsm_audit.h>
+
+/**
+ * ipv4_skb_to_auditdata : fill auditdata from skb
+ * @skb : the skb
+ * @ad : the audit data to fill
+ * @proto : the layer 4 protocol
+ *
+ * return  0 on success
+ */
+int ipv4_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto)
+{
+	int ret = 0;
+	struct iphdr *ih;
+
+	ih = ip_hdr(skb);
+	if (ih == NULL)
+		return -EINVAL;
+
+	ad->u.net.v4info.saddr = ih->saddr;
+	ad->u.net.v4info.daddr = ih->daddr;
+
+	if (proto)
+		*proto = ih->protocol;
+	/* non initial fragment */
+	if (ntohs(ih->frag_off) & IP_OFFSET)
+		return 0;
+
+	switch (ih->protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr *th = tcp_hdr(skb);
+		if (th == NULL)
+			break;
+
+		ad->u.net.sport = th->source;
+		ad->u.net.dport = th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr *uh = udp_hdr(skb);
+		if (uh == NULL)
+			break;
+
+		ad->u.net.sport = uh->source;
+		ad->u.net.dport = uh->dest;
+		break;
+	}
+	case IPPROTO_DCCP: {
+		struct dccp_hdr *dh = dccp_hdr(skb);
+		if (dh == NULL)
+			break;
+
+		ad->u.net.sport = dh->dccph_sport;
+		ad->u.net.dport = dh->dccph_dport;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr *sh = sctp_hdr(skb);
+		if (sh == NULL)
+			break;
+		ad->u.net.sport = sh->source;
+		ad->u.net.dport = sh->dest;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * ipv6_skb_to_auditdata : fill auditdata from skb
+ * @skb : the skb
+ * @ad : the audit data to fill
+ * @proto : the layer 4 protocol
+ *
+ * return  0 on success
+ */
+int ipv6_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto)
+{
+	int offset, ret = 0;
+	struct ipv6hdr *ip6;
+	u8 nexthdr;
+
+	ip6 = ipv6_hdr(skb);
+	if (ip6 == NULL)
+		return -EINVAL;
+	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
+	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ret = 0;
+	/* IPv6 can have several extension header before the Transport header
+	 * skip them */
+	offset = skb_network_offset(skb);
+	offset += sizeof(*ip6);
+	nexthdr = ip6->nexthdr;
+	offset = ipv6_skip_exthdr(skb, offset, &nexthdr);
+	if (offset < 0)
+		return 0;
+	if (proto)
+		*proto = nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph, *th;
+
+		th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+		if (th == NULL)
+			break;
+
+		ad->u.net.sport = th->source;
+		ad->u.net.dport = th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr _udph, *uh;
+
+		uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+		if (uh == NULL)
+			break;
+
+		ad->u.net.sport = uh->source;
+		ad->u.net.dport = uh->dest;
+		break;
+	}
+	case IPPROTO_DCCP: {
+		struct dccp_hdr _dccph, *dh;
+
+		dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
+		if (dh == NULL)
+			break;
+
+		ad->u.net.sport = dh->dccph_sport;
+		ad->u.net.dport = dh->dccph_dport;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr _sctph, *sh;
+
+		sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph);
+		if (sh == NULL)
+			break;
+		ad->u.net.sport = sh->source;
+		ad->u.net.dport = sh->dest;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#endif
+
+
+static inline void print_ipv6_addr(struct audit_buffer *ab,
+				   struct in6_addr *addr, __be16 port,
+				   char *name1, char *name2)
+{
+	if (!ipv6_addr_any(addr))
+		audit_log_format(ab, " %s=%pI6", name1, addr);
+	if (port)
+		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+}
+
+static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
+				   __be16 port, char *name1, char *name2)
+{
+	if (addr)
+		audit_log_format(ab, " %s=%pI4", name1, &addr);
+	if (port)
+		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+}
+
+/**
+ * dump_common_audit_data - helper to dump common audit data
+ * @a : common audit data
+ *
+ */
+static void dump_common_audit_data(struct audit_buffer *ab,
+				   struct common_audit_data *a)
+{
+	struct inode *inode = NULL;
+	struct task_struct *tsk = current;
+
+	if (a->tsk)
+		tsk = a->tsk;
+	if (tsk && tsk->pid) {
+		audit_log_format(ab, " pid=%d comm=", tsk->pid);
+		audit_log_untrustedstring(ab, tsk->comm);
+	}
+
+	switch (a->type) {
+	case LSM_AUDIT_DATA_IPC:
+		audit_log_format(ab, " key=%d ", a->u.ipc_id);
+		break;
+	case LSM_AUDIT_DATA_CAP:
+		audit_log_format(ab, " capability=%d ", a->u.cap);
+		break;
+	case LSM_AUDIT_DATA_FS:
+		if (a->u.fs.path.dentry) {
+			struct dentry *dentry = a->u.fs.path.dentry;
+			if (a->u.fs.path.mnt) {
+				audit_log_d_path(ab, "path=", &a->u.fs.path);
+			} else {
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab,
+						 dentry->d_name.name);
+			}
+			inode = dentry->d_inode;
+		} else if (a->u.fs.inode) {
+			struct dentry *dentry;
+			inode = a->u.fs.inode;
+			dentry = d_find_alias(inode);
+			if (dentry) {
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab,
+						 dentry->d_name.name);
+				dput(dentry);
+			}
+		}
+		if (inode)
+			audit_log_format(ab, " dev=%s ino=%lu",
+					inode->i_sb->s_id,
+					inode->i_ino);
+		break;
+	case LSM_AUDIT_DATA_TASK:
+		tsk = a->u.tsk;
+		if (tsk && tsk->pid) {
+			audit_log_format(ab, " pid=%d comm=", tsk->pid);
+			audit_log_untrustedstring(ab, tsk->comm);
+		}
+		break;
+	case LSM_AUDIT_DATA_NET:
+		if (a->u.net.sk) {
+			struct sock *sk = a->u.net.sk;
+			struct unix_sock *u;
+			int len = 0;
+			char *p = NULL;
+
+			switch (sk->sk_family) {
+			case AF_INET: {
+				struct inet_sock *inet = inet_sk(sk);
+
+				print_ipv4_addr(ab, inet->rcv_saddr,
+						inet->sport,
+						"laddr", "lport");
+				print_ipv4_addr(ab, inet->daddr,
+						inet->dport,
+						"faddr", "fport");
+				break;
+			}
+			case AF_INET6: {
+				struct inet_sock *inet = inet_sk(sk);
+				struct ipv6_pinfo *inet6 = inet6_sk(sk);
+
+				print_ipv6_addr(ab, &inet6->rcv_saddr,
+						inet->sport,
+						"laddr", "lport");
+				print_ipv6_addr(ab, &inet6->daddr,
+						inet->dport,
+						"faddr", "fport");
+				break;
+			}
+			case AF_UNIX:
+				u = unix_sk(sk);
+				if (u->dentry) {
+					struct path path = {
+						.dentry = u->dentry,
+						.mnt = u->mnt
+					};
+					audit_log_d_path(ab, "path=", &path);
+					break;
+				}
+				if (!u->addr)
+					break;
+				len = u->addr->len-sizeof(short);
+				p = &u->addr->name->sun_path[0];
+				audit_log_format(ab, " path=");
+				if (*p)
+					audit_log_untrustedstring(ab, p);
+				else
+					audit_log_n_hex(ab, p, len);
+				break;
+			}
+		}
+
+		switch (a->u.net.family) {
+		case AF_INET:
+			print_ipv4_addr(ab, a->u.net.v4info.saddr,
+					a->u.net.sport,
+					"saddr", "src");
+			print_ipv4_addr(ab, a->u.net.v4info.daddr,
+					a->u.net.dport,
+					"daddr", "dest");
+			break;
+		case AF_INET6:
+			print_ipv6_addr(ab, &a->u.net.v6info.saddr,
+					a->u.net.sport,
+					"saddr", "src");
+			print_ipv6_addr(ab, &a->u.net.v6info.daddr,
+					a->u.net.dport,
+					"daddr", "dest");
+			break;
+		}
+		if (a->u.net.netif > 0) {
+			struct net_device *dev;
+
+			/* NOTE: we always use init's namespace */
+			dev = dev_get_by_index(&init_net, a->u.net.netif);
+			if (dev) {
+				audit_log_format(ab, " netif=%s", dev->name);
+				dev_put(dev);
+			}
+		}
+		break;
+#ifdef CONFIG_KEYS
+	case LSM_AUDIT_DATA_KEY:
+		audit_log_format(ab, " key_serial=%u", a->u.key_struct.key);
+		if (a->u.key_struct.key_desc) {
+			audit_log_format(ab, " key_desc=");
+			audit_log_untrustedstring(ab, a->u.key_struct.key_desc);
+		}
+		break;
+#endif
+	} /* switch (a->type) */
+}
+
+/**
+ * common_lsm_audit - generic LSM auditing function
+ * @a:  auxiliary audit data
+ *
+ * setup the audit buffer for common security information
+ * uses callback to print LSM specific information
+ */
+void common_lsm_audit(struct common_audit_data *a)
+{
+	struct audit_buffer *ab;
+
+	if (a == NULL)
+		return;
+	/* we use GFP_ATOMIC so we won't sleep */
+	ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
+
+	if (ab == NULL)
+		return;
+
+	if (a->lsm_pre_audit)
+		a->lsm_pre_audit(ab, a);
+
+	dump_common_audit_data(ab, a);
+
+	if (a->lsm_post_audit)
+		a->lsm_post_audit(ab, a);
+
+	audit_log_end(ab);
+}
-- 
cgit v0.10.2


From ecfcc53fef3c357574bb6143dce6631e6d56295c Mon Sep 17 00:00:00 2001
From: Etienne Basset <etienne.basset@numericable.fr>
Date: Wed, 8 Apr 2009 20:40:06 +0200
Subject: smack: implement logging V3

the following patch, add logging of Smack security decisions.
This is of course very useful to understand what your current smack policy does.
As suggested by Casey, it also now forbids labels with ', " or \

It introduces a '/smack/logging' switch :
0: no logging
1: log denied (default)
2: log accepted
3: log denied&accepted

Signed-off-by: Etienne Basset <etienne.basset@numericable.fr>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
index 629c92e..34614b4 100644
--- a/Documentation/Smack.txt
+++ b/Documentation/Smack.txt
@@ -184,8 +184,9 @@ length. Single character labels using special characters, that being anything
 other than a letter or digit, are reserved for use by the Smack development
 team. Smack labels are unstructured, case sensitive, and the only operation
 ever performed on them is comparison for equality. Smack labels cannot
-contain unprintable characters or the "/" (slash) character. Smack labels
-cannot begin with a '-', which is reserved for special options.
+contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
+(quote) and '"' (double-quote) characters.
+Smack labels cannot begin with a '-', which is reserved for special options.
 
 There are some predefined labels:
 
@@ -523,3 +524,18 @@ Smack supports some mount options:
 
 These mount options apply to all file system types.
 
+Smack auditing
+
+If you want Smack auditing of security events, you need to set CONFIG_AUDIT
+in your kernel configuration.
+By default, all denied events will be audited. You can change this behavior by
+writing a single character to the /smack/logging file :
+0 : no logging
+1 : log denied (default)
+2 : log accepted
+3 : log denied & accepted
+
+Events are logged as 'key=value' pairs, for each event you at least will get
+the subjet, the object, the rights requested, the action, the kernel function
+that triggered the event, plus other pairs depending on the type of event
+audited.
diff --git a/security/Makefile b/security/Makefile
index fa77021..c67557c 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,6 +16,9 @@ obj-$(CONFIG_SECURITYFS)		+= inode.o
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)		+= smack/built-in.o
+ifeq ($(CONFIG_AUDIT),y)
+obj-$(CONFIG_SECURITY_SMACK)		+= lsm_audit.o
+endif
 obj-$(CONFIG_SECURITY_TOMOYO)		+= tomoyo/built-in.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)		+= root_plug.o
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 42ef313..243bec1 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -20,6 +20,7 @@
 #include <net/netlabel.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
+#include <linux/lsm_audit.h>
 
 /*
  * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is
@@ -179,6 +180,20 @@ struct smack_known {
 #define MAY_NOT		0
 
 /*
+ * Number of access types used by Smack (rwxa)
+ */
+#define SMK_NUM_ACCESS_TYPE 4
+
+/*
+ * Smack audit data; is empty if CONFIG_AUDIT not set
+ * to save some stack
+ */
+struct smk_audit_info {
+#ifdef CONFIG_AUDIT
+	struct common_audit_data a;
+#endif
+};
+/*
  * These functions are in smack_lsm.c
  */
 struct inode_smack *new_inode_smack(char *);
@@ -186,8 +201,8 @@ struct inode_smack *new_inode_smack(char *);
 /*
  * These functions are in smack_access.c
  */
-int smk_access(char *, char *, int);
-int smk_curacc(char *, u32);
+int smk_access(char *, char *, int, struct smk_audit_info *);
+int smk_curacc(char *, u32, struct smk_audit_info *);
 int smack_to_cipso(const char *, struct smack_cipso *);
 void smack_from_cipso(u32, char *, char *);
 char *smack_from_secid(const u32);
@@ -237,4 +252,93 @@ static inline char *smk_of_inode(const struct inode *isp)
 	return sip->smk_inode;
 }
 
+/*
+ * logging functions
+ */
+#define SMACK_AUDIT_DENIED 0x1
+#define SMACK_AUDIT_ACCEPT 0x2
+extern int log_policy;
+
+void smack_log(char *subject_label, char *object_label,
+		int request,
+		int result, struct smk_audit_info *auditdata);
+
+#ifdef CONFIG_AUDIT
+
+/*
+ * some inline functions to set up audit data
+ * they do nothing if CONFIG_AUDIT is not set
+ *
+ */
+static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
+			       char type)
+{
+	memset(a, 0, sizeof(*a));
+	a->a.type = type;
+	a->a.function = func;
+}
+
+static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
+					 struct task_struct *t)
+{
+	a->a.u.tsk = t;
+}
+static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
+						    struct dentry *d)
+{
+	a->a.u.fs.path.dentry = d;
+}
+static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
+						 struct vfsmount *m)
+{
+	a->a.u.fs.path.mnt = m;
+}
+static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
+					      struct inode *i)
+{
+	a->a.u.fs.inode = i;
+}
+static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
+					     struct path p)
+{
+	a->a.u.fs.path = p;
+}
+static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
+					    struct sock *sk)
+{
+	a->a.u.net.sk = sk;
+}
+
+#else /* no AUDIT */
+
+static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
+			       char type)
+{
+}
+static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
+					 struct task_struct *t)
+{
+}
+static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a,
+						    struct dentry *d)
+{
+}
+static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a,
+						 struct vfsmount *m)
+{
+}
+static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a,
+					      struct inode *i)
+{
+}
+static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a,
+					     struct path p)
+{
+}
+static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a,
+					    struct sock *sk)
+{
+}
+#endif
+
 #endif  /* _SECURITY_SMACK_H */
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index ac0a270..513dc1a 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -59,11 +59,18 @@ LIST_HEAD(smack_known_list);
  */
 static u32 smack_next_secid = 10;
 
+/*
+ * what events do we log
+ * can be overwritten at run-time by /smack/logging
+ */
+int log_policy = SMACK_AUDIT_DENIED;
+
 /**
  * smk_access - determine if a subject has a specific access to an object
  * @subject_label: a pointer to the subject's Smack label
  * @object_label: a pointer to the object's Smack label
  * @request: the access requested, in "MAY" format
+ * @a : a pointer to the audit data
  *
  * This function looks up the subject/object pair in the
  * access rule list and returns 0 if the access is permitted,
@@ -78,10 +85,12 @@ static u32 smack_next_secid = 10;
  * will be on the list, so checking the pointers may be a worthwhile
  * optimization.
  */
-int smk_access(char *subject_label, char *object_label, int request)
+int smk_access(char *subject_label, char *object_label, int request,
+	       struct smk_audit_info *a)
 {
 	u32 may = MAY_NOT;
 	struct smack_rule *srp;
+	int rc = 0;
 
 	/*
 	 * Hardcoded comparisons.
@@ -89,8 +98,10 @@ int smk_access(char *subject_label, char *object_label, int request)
 	 * A star subject can't access any object.
 	 */
 	if (subject_label == smack_known_star.smk_known ||
-	    strcmp(subject_label, smack_known_star.smk_known) == 0)
-		return -EACCES;
+	    strcmp(subject_label, smack_known_star.smk_known) == 0) {
+		rc = -EACCES;
+		goto out_audit;
+	}
 	/*
 	 * An internet object can be accessed by any subject.
 	 * Tasks cannot be assigned the internet label.
@@ -100,20 +111,20 @@ int smk_access(char *subject_label, char *object_label, int request)
 	    subject_label == smack_known_web.smk_known ||
 	    strcmp(object_label, smack_known_web.smk_known) == 0 ||
 	    strcmp(subject_label, smack_known_web.smk_known) == 0)
-		return 0;
+		goto out_audit;
 	/*
 	 * A star object can be accessed by any subject.
 	 */
 	if (object_label == smack_known_star.smk_known ||
 	    strcmp(object_label, smack_known_star.smk_known) == 0)
-		return 0;
+		goto out_audit;
 	/*
 	 * An object can be accessed in any way by a subject
 	 * with the same label.
 	 */
 	if (subject_label == object_label ||
 	    strcmp(subject_label, object_label) == 0)
-		return 0;
+		goto out_audit;
 	/*
 	 * A hat subject can read any object.
 	 * A floor object can be read by any subject.
@@ -121,10 +132,10 @@ int smk_access(char *subject_label, char *object_label, int request)
 	if ((request & MAY_ANYREAD) == request) {
 		if (object_label == smack_known_floor.smk_known ||
 		    strcmp(object_label, smack_known_floor.smk_known) == 0)
-			return 0;
+			goto out_audit;
 		if (subject_label == smack_known_hat.smk_known ||
 		    strcmp(subject_label, smack_known_hat.smk_known) == 0)
-			return 0;
+			goto out_audit;
 	}
 	/*
 	 * Beyond here an explicit relationship is required.
@@ -148,28 +159,36 @@ int smk_access(char *subject_label, char *object_label, int request)
 	 * This is a bit map operation.
 	 */
 	if ((request & may) == request)
-		return 0;
-
-	return -EACCES;
+		goto out_audit;
+
+	rc = -EACCES;
+out_audit:
+#ifdef CONFIG_AUDIT
+	if (a)
+		smack_log(subject_label, object_label, request, rc, a);
+#endif
+	return rc;
 }
 
 /**
  * smk_curacc - determine if current has a specific access to an object
  * @obj_label: a pointer to the object's Smack label
  * @mode: the access requested, in "MAY" format
+ * @a : common audit data
  *
  * This function checks the current subject label/object label pair
  * in the access rule list and returns 0 if the access is permitted,
  * non zero otherwise. It allows that current may have the capability
  * to override the rules.
  */
-int smk_curacc(char *obj_label, u32 mode)
+int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
 {
 	int rc;
+	char *sp = current_security();
 
-	rc = smk_access(current_security(), obj_label, mode);
+	rc = smk_access(sp, obj_label, mode, NULL);
 	if (rc == 0)
-		return 0;
+		goto out_audit;
 
 	/*
 	 * Return if a specific label has been designated as the
@@ -177,14 +196,105 @@ int smk_curacc(char *obj_label, u32 mode)
 	 * have that label.
 	 */
 	if (smack_onlycap != NULL && smack_onlycap != current->cred->security)
-		return rc;
+		goto out_audit;
 
 	if (capable(CAP_MAC_OVERRIDE))
 		return 0;
 
+out_audit:
+#ifdef CONFIG_AUDIT
+	if (a)
+		smack_log(sp, obj_label, mode, rc, a);
+#endif
 	return rc;
 }
 
+#ifdef CONFIG_AUDIT
+/**
+ * smack_str_from_perm : helper to transalate an int to a
+ * readable string
+ * @string : the string to fill
+ * @access : the int
+ *
+ */
+static inline void smack_str_from_perm(char *string, int access)
+{
+	int i = 0;
+	if (access & MAY_READ)
+		string[i++] = 'r';
+	if (access & MAY_WRITE)
+		string[i++] = 'w';
+	if (access & MAY_EXEC)
+		string[i++] = 'x';
+	if (access & MAY_APPEND)
+		string[i++] = 'a';
+	string[i] = '\0';
+}
+/**
+ * smack_log_callback - SMACK specific information
+ * will be called by generic audit code
+ * @ab : the audit_buffer
+ * @a  : audit_data
+ *
+ */
+static void smack_log_callback(struct audit_buffer *ab, void *a)
+{
+	struct common_audit_data *ad = a;
+	struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
+	audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
+			 sad->result ? "denied" : "granted");
+	audit_log_format(ab, " subject=");
+	audit_log_untrustedstring(ab, sad->subject);
+	audit_log_format(ab, " object=");
+	audit_log_untrustedstring(ab, sad->object);
+	audit_log_format(ab, " requested=%s", sad->request);
+}
+
+/**
+ *  smack_log - Audit the granting or denial of permissions.
+ *  @subject_label : smack label of the requester
+ *  @object_label  : smack label of the object being accessed
+ *  @request: requested permissions
+ *  @result: result from smk_access
+ *  @a:  auxiliary audit data
+ *
+ * Audit the granting or denial of permissions in accordance
+ * with the policy.
+ */
+void smack_log(char *subject_label, char *object_label, int request,
+	       int result, struct smk_audit_info *ad)
+{
+	char request_buffer[SMK_NUM_ACCESS_TYPE + 1];
+	struct smack_audit_data *sad;
+	struct common_audit_data *a = &ad->a;
+
+	/* check if we have to log the current event */
+	if (result != 0 && (log_policy & SMACK_AUDIT_DENIED) == 0)
+		return;
+	if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
+		return;
+
+	if (a->function == NULL)
+		a->function = "unknown";
+
+	/* end preparing the audit data */
+	sad = &a->lsm_priv.smack_audit_data;
+	smack_str_from_perm(request_buffer, request);
+	sad->subject = subject_label;
+	sad->object  = object_label;
+	sad->request = request_buffer;
+	sad->result  = result;
+	a->lsm_pre_audit = smack_log_callback;
+
+	common_lsm_audit(a);
+}
+#else /* #ifdef CONFIG_AUDIT */
+void smack_log(char *subject_label, char *object_label, int request,
+               int result, struct smk_audit_info *ad)
+{
+}
+#endif
+
 static DEFINE_MUTEX(smack_known_lock);
 
 /**
@@ -209,7 +319,8 @@ struct smack_known *smk_import_entry(const char *string, int len)
 		if (found)
 			smack[i] = '\0';
 		else if (i >= len || string[i] > '~' || string[i] <= ' ' ||
-			 string[i] == '/') {
+			 string[i] == '/' || string[i] == '"' ||
+			 string[i] == '\\' || string[i] == '\'') {
 			smack[i] = '\0';
 			found = 1;
 		} else
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 9215149..f557767 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,7 +30,6 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/audit.h>
-
 #include "smack.h"
 
 #define task_security(task)	(task_cred_xxx((task), security))
@@ -103,14 +102,24 @@ struct inode_smack *new_inode_smack(char *smack)
 static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
 {
 	int rc;
+	struct smk_audit_info ad;
+	char *sp, *tsp;
 
 	rc = cap_ptrace_may_access(ctp, mode);
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE);
+	sp = current_security();
+	tsp = task_security(ctp);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, ctp);
+
+	/* we won't log here, because rc can be overriden */
+	rc = smk_access(sp, tsp, MAY_READWRITE, NULL);
 	if (rc != 0 && capable(CAP_MAC_OVERRIDE))
-		return 0;
+		rc = 0;
+
+	smack_log(sp, tsp, MAY_READWRITE, rc, &ad);
 	return rc;
 }
 
@@ -125,14 +134,24 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
 static int smack_ptrace_traceme(struct task_struct *ptp)
 {
 	int rc;
+	struct smk_audit_info ad;
+	char *sp, *tsp;
 
 	rc = cap_ptrace_traceme(ptp);
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, ptp);
+
+	sp = current_security();
+	tsp = task_security(ptp);
+	/* we won't log here, because rc can be overriden */
+	rc = smk_access(tsp, sp, MAY_READWRITE, NULL);
 	if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
-		return 0;
+		rc = 0;
+
+	smack_log(tsp, sp, MAY_READWRITE, rc, &ad);
 	return rc;
 }
 
@@ -327,8 +346,14 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
 static int smack_sb_statfs(struct dentry *dentry)
 {
 	struct superblock_smack *sbp = dentry->d_sb->s_security;
+	int rc;
+	struct smk_audit_info ad;
 
-	return smk_curacc(sbp->smk_floor, MAY_READ);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
+	rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad);
+	return rc;
 }
 
 /**
@@ -346,8 +371,12 @@ static int smack_sb_mount(char *dev_name, struct path *path,
 			  char *type, unsigned long flags, void *data)
 {
 	struct superblock_smack *sbp = path->mnt->mnt_sb->s_security;
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path(&ad, *path);
 
-	return smk_curacc(sbp->smk_floor, MAY_WRITE);
+	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
 }
 
 /**
@@ -361,10 +390,14 @@ static int smack_sb_mount(char *dev_name, struct path *path,
 static int smack_sb_umount(struct vfsmount *mnt, int flags)
 {
 	struct superblock_smack *sbp;
+	struct smk_audit_info ad;
 
-	sbp = mnt->mnt_sb->s_security;
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_mountpoint);
+	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
 
-	return smk_curacc(sbp->smk_floor, MAY_WRITE);
+	sbp = mnt->mnt_sb->s_security;
+	return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad);
 }
 
 /*
@@ -441,15 +474,20 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
 static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
 			    struct dentry *new_dentry)
 {
-	int rc;
 	char *isp;
+	struct smk_audit_info ad;
+	int rc;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
-	rc = smk_curacc(isp, MAY_WRITE);
+	rc = smk_curacc(isp, MAY_WRITE, &ad);
 
 	if (rc == 0 && new_dentry->d_inode != NULL) {
 		isp = smk_of_inode(new_dentry->d_inode);
-		rc = smk_curacc(isp, MAY_WRITE);
+		smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
+		rc = smk_curacc(isp, MAY_WRITE, &ad);
 	}
 
 	return rc;
@@ -466,18 +504,24 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
 static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *ip = dentry->d_inode;
+	struct smk_audit_info ad;
 	int rc;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
 	/*
 	 * You need write access to the thing you're unlinking
 	 */
-	rc = smk_curacc(smk_of_inode(ip), MAY_WRITE);
-	if (rc == 0)
+	rc = smk_curacc(smk_of_inode(ip), MAY_WRITE, &ad);
+	if (rc == 0) {
 		/*
 		 * You also need write access to the containing directory
 		 */
-		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE);
-
+		smk_ad_setfield_u_fs_path_dentry(&ad, NULL);
+		smk_ad_setfield_u_fs_inode(&ad, dir);
+		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad);
+	}
 	return rc;
 }
 
@@ -491,17 +535,24 @@ static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
  */
 static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
 {
+	struct smk_audit_info ad;
 	int rc;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
 	/*
 	 * You need write access to the thing you're removing
 	 */
-	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
-	if (rc == 0)
+	rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
+	if (rc == 0) {
 		/*
 		 * You also need write access to the containing directory
 		 */
-		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE);
+		smk_ad_setfield_u_fs_path_dentry(&ad, NULL);
+		smk_ad_setfield_u_fs_inode(&ad, dir);
+		rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad);
+	}
 
 	return rc;
 }
@@ -525,15 +576,19 @@ static int smack_inode_rename(struct inode *old_inode,
 {
 	int rc;
 	char *isp;
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
 	isp = smk_of_inode(old_dentry->d_inode);
-	rc = smk_curacc(isp, MAY_READWRITE);
+	rc = smk_curacc(isp, MAY_READWRITE, &ad);
 
 	if (rc == 0 && new_dentry->d_inode != NULL) {
 		isp = smk_of_inode(new_dentry->d_inode);
-		rc = smk_curacc(isp, MAY_READWRITE);
+		smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
+		rc = smk_curacc(isp, MAY_READWRITE, &ad);
 	}
-
 	return rc;
 }
 
@@ -548,13 +603,15 @@ static int smack_inode_rename(struct inode *old_inode,
  */
 static int smack_inode_permission(struct inode *inode, int mask)
 {
+	struct smk_audit_info ad;
 	/*
 	 * No permission to check. Existence test. Yup, it's there.
 	 */
 	if (mask == 0)
 		return 0;
-
-	return smk_curacc(smk_of_inode(inode), mask);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_inode(&ad, inode);
+	return smk_curacc(smk_of_inode(inode), mask, &ad);
 }
 
 /**
@@ -566,13 +623,16 @@ static int smack_inode_permission(struct inode *inode, int mask)
  */
 static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 {
+	struct smk_audit_info ad;
 	/*
 	 * Need to allow for clearing the setuid bit.
 	 */
 	if (iattr->ia_valid & ATTR_FORCE)
 		return 0;
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
-	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 }
 
 /**
@@ -584,7 +644,12 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
  */
 static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 {
-	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ);
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
 }
 
 /**
@@ -602,6 +667,7 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 				const void *value, size_t size, int flags)
 {
+	struct smk_audit_info ad;
 	int rc = 0;
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
@@ -615,8 +681,11 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
 	} else
 		rc = cap_inode_setxattr(dentry, name, value, size, flags);
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
 	if (rc == 0)
-		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 
 	return rc;
 }
@@ -671,7 +740,12 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
  */
 static int smack_inode_getxattr(struct dentry *dentry, const char *name)
 {
-	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ);
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
+
+	return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
 }
 
 /*
@@ -685,6 +759,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
  */
 static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 {
+	struct smk_audit_info ad;
 	int rc = 0;
 
 	if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
@@ -695,8 +770,10 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
 	} else
 		rc = cap_inode_removexattr(dentry, name);
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 	if (rc == 0)
-		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE);
+		rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
 
 	return rc;
 }
@@ -855,12 +932,16 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
 	int rc = 0;
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 
 	if (_IOC_DIR(cmd) & _IOC_WRITE)
-		rc = smk_curacc(file->f_security, MAY_WRITE);
+		rc = smk_curacc(file->f_security, MAY_WRITE, &ad);
 
 	if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ))
-		rc = smk_curacc(file->f_security, MAY_READ);
+		rc = smk_curacc(file->f_security, MAY_READ, &ad);
 
 	return rc;
 }
@@ -874,7 +955,11 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
  */
 static int smack_file_lock(struct file *file, unsigned int cmd)
 {
-	return smk_curacc(file->f_security, MAY_WRITE);
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry);
+	return smk_curacc(file->f_security, MAY_WRITE, &ad);
 }
 
 /**
@@ -888,8 +973,12 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
 static int smack_file_fcntl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
+	struct smk_audit_info ad;
 	int rc;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
+
 	switch (cmd) {
 	case F_DUPFD:
 	case F_GETFD:
@@ -897,7 +986,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
 	case F_GETLK:
 	case F_GETOWN:
 	case F_GETSIG:
-		rc = smk_curacc(file->f_security, MAY_READ);
+		rc = smk_curacc(file->f_security, MAY_READ, &ad);
 		break;
 	case F_SETFD:
 	case F_SETFL:
@@ -905,10 +994,10 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
 	case F_SETLKW:
 	case F_SETOWN:
 	case F_SETSIG:
-		rc = smk_curacc(file->f_security, MAY_WRITE);
+		rc = smk_curacc(file->f_security, MAY_WRITE, &ad);
 		break;
 	default:
-		rc = smk_curacc(file->f_security, MAY_READWRITE);
+		rc = smk_curacc(file->f_security, MAY_READWRITE, &ad);
 	}
 
 	return rc;
@@ -943,14 +1032,21 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
 {
 	struct file *file;
 	int rc;
+	char *tsp = tsk->cred->security;
+	struct smk_audit_info ad;
 
 	/*
 	 * struct fown_struct is never outside the context of a struct file
 	 */
 	file = container_of(fown, struct file, f_owner);
-	rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE);
+	/* we don't log here as rc can be overriden */
+	rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL);
 	if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
-		return 0;
+		rc = 0;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, tsk);
+	smack_log(file->f_security, tsp, MAY_WRITE, rc, &ad);
 	return rc;
 }
 
@@ -963,7 +1059,10 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
 static int smack_file_receive(struct file *file)
 {
 	int may = 0;
+	struct smk_audit_info ad;
 
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 	/*
 	 * This code relies on bitmasks.
 	 */
@@ -972,7 +1071,7 @@ static int smack_file_receive(struct file *file)
 	if (file->f_mode & FMODE_WRITE)
 		may |= MAY_WRITE;
 
-	return smk_curacc(file->f_security, may);
+	return smk_curacc(file->f_security, may, &ad);
 }
 
 /*
@@ -1052,6 +1151,22 @@ static int smack_kernel_create_files_as(struct cred *new,
 }
 
 /**
+ * smk_curacc_on_task - helper to log task related access
+ * @p: the task object
+ * @access : the access requested
+ *
+ * Return 0 if access is permitted
+ */
+static int smk_curacc_on_task(struct task_struct *p, int access)
+{
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, p);
+	return smk_curacc(task_security(p), access, &ad);
+}
+
+/**
  * smack_task_setpgid - Smack check on setting pgid
  * @p: the task object
  * @pgid: unused
@@ -1060,7 +1175,7 @@ static int smack_kernel_create_files_as(struct cred *new,
  */
 static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
 {
-	return smk_curacc(task_security(p), MAY_WRITE);
+	return smk_curacc_on_task(p, MAY_WRITE);
 }
 
 /**
@@ -1071,7 +1186,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
  */
 static int smack_task_getpgid(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1082,7 +1197,7 @@ static int smack_task_getpgid(struct task_struct *p)
  */
 static int smack_task_getsid(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1110,7 +1225,7 @@ static int smack_task_setnice(struct task_struct *p, int nice)
 
 	rc = cap_task_setnice(p, nice);
 	if (rc == 0)
-		rc = smk_curacc(task_security(p), MAY_WRITE);
+		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
 }
 
@@ -1127,7 +1242,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
 
 	rc = cap_task_setioprio(p, ioprio);
 	if (rc == 0)
-		rc = smk_curacc(task_security(p), MAY_WRITE);
+		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
 }
 
@@ -1139,7 +1254,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
  */
 static int smack_task_getioprio(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1157,7 +1272,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
 
 	rc = cap_task_setscheduler(p, policy, lp);
 	if (rc == 0)
-		rc = smk_curacc(task_security(p), MAY_WRITE);
+		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
 }
 
@@ -1169,7 +1284,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
  */
 static int smack_task_getscheduler(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_READ);
+	return smk_curacc_on_task(p, MAY_READ);
 }
 
 /**
@@ -1180,7 +1295,7 @@ static int smack_task_getscheduler(struct task_struct *p)
  */
 static int smack_task_movememory(struct task_struct *p)
 {
-	return smk_curacc(task_security(p), MAY_WRITE);
+	return smk_curacc_on_task(p, MAY_WRITE);
 }
 
 /**
@@ -1198,18 +1313,23 @@ static int smack_task_movememory(struct task_struct *p)
 static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 			   int sig, u32 secid)
 {
+	struct smk_audit_info ad;
+
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, p);
 	/*
 	 * Sending a signal requires that the sender
 	 * can write the receiver.
 	 */
 	if (secid == 0)
-		return smk_curacc(task_security(p), MAY_WRITE);
+		return smk_curacc(task_security(p), MAY_WRITE, &ad);
 	/*
 	 * If the secid isn't 0 we're dealing with some USB IO
 	 * specific behavior. This is not clean. For one thing
 	 * we can't take privilege into account.
 	 */
-	return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE);
+	return smk_access(smack_from_secid(secid), task_security(p),
+			  MAY_WRITE, &ad);
 }
 
 /**
@@ -1220,11 +1340,15 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
  */
 static int smack_task_wait(struct task_struct *p)
 {
+	struct smk_audit_info ad;
+	char *sp = current_security();
+	char *tsp = task_security(p);
 	int rc;
 
-	rc = smk_access(current_security(), task_security(p), MAY_WRITE);
+	/* we don't log here, we can be overriden */
+	rc = smk_access(sp, tsp, MAY_WRITE, NULL);
 	if (rc == 0)
-		return 0;
+		goto out_log;
 
 	/*
 	 * Allow the operation to succeed if either task
@@ -1238,8 +1362,12 @@ static int smack_task_wait(struct task_struct *p)
 	 * the smack value.
 	 */
 	if (capable(CAP_MAC_OVERRIDE) || has_capability(p, CAP_MAC_OVERRIDE))
-		return 0;
-
+		rc = 0;
+	/* we log only if we didn't get overriden */
+ out_log:
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
+	smk_ad_setfield_u_tsk(&ad, p);
+	smack_log(sp, tsp, MAY_WRITE, rc, &ad);
 	return rc;
 }
 
@@ -1455,12 +1583,19 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap)
 	int sk_lbl;
 	char *hostsp;
 	struct socket_smack *ssp = sk->sk_security;
+	struct smk_audit_info ad;
 
 	rcu_read_lock();
 	hostsp = smack_host_label(sap);
 	if (hostsp != NULL) {
 		sk_lbl = SMACK_UNLABELED_SOCKET;
-		rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE);
+#ifdef CONFIG_AUDIT
+		smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+		ad.a.u.net.family = sap->sin_family;
+		ad.a.u.net.dport = sap->sin_port;
+		ad.a.u.net.v4info.daddr = sap->sin_addr.s_addr;
+#endif
+		rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE, &ad);
 	} else {
 		sk_lbl = SMACK_CIPSO_SOCKET;
 		rc = 0;
@@ -1656,6 +1791,25 @@ static void smack_shm_free_security(struct shmid_kernel *shp)
 }
 
 /**
+ * smk_curacc_shm : check if current has access on shm
+ * @shp : the object
+ * @access : access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smk_curacc_shm(struct shmid_kernel *shp, int access)
+{
+	char *ssp = smack_of_shm(shp);
+	struct smk_audit_info ad;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = shp->shm_perm.id;
+#endif
+	return smk_curacc(ssp, access, &ad);
+}
+
+/**
  * smack_shm_associate - Smack access check for shm
  * @shp: the object
  * @shmflg: access requested
@@ -1664,11 +1818,10 @@ static void smack_shm_free_security(struct shmid_kernel *shp)
  */
 static int smack_shm_associate(struct shmid_kernel *shp, int shmflg)
 {
-	char *ssp = smack_of_shm(shp);
 	int may;
 
 	may = smack_flags_to_may(shmflg);
-	return smk_curacc(ssp, may);
+	return smk_curacc_shm(shp, may);
 }
 
 /**
@@ -1680,7 +1833,6 @@ static int smack_shm_associate(struct shmid_kernel *shp, int shmflg)
  */
 static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
 {
-	char *ssp;
 	int may;
 
 	switch (cmd) {
@@ -1703,9 +1855,7 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
 	default:
 		return -EINVAL;
 	}
-
-	ssp = smack_of_shm(shp);
-	return smk_curacc(ssp, may);
+	return smk_curacc_shm(shp, may);
 }
 
 /**
@@ -1719,11 +1869,10 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd)
 static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr,
 			   int shmflg)
 {
-	char *ssp = smack_of_shm(shp);
 	int may;
 
 	may = smack_flags_to_may(shmflg);
-	return smk_curacc(ssp, may);
+	return smk_curacc_shm(shp, may);
 }
 
 /**
@@ -1765,6 +1914,25 @@ static void smack_sem_free_security(struct sem_array *sma)
 }
 
 /**
+ * smk_curacc_sem : check if current has access on sem
+ * @sma : the object
+ * @access : access requested
+ *
+ * Returns 0 if current has the requested access, error code otherwise
+ */
+static int smk_curacc_sem(struct sem_array *sma, int access)
+{
+	char *ssp = smack_of_sem(sma);
+	struct smk_audit_info ad;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = sma->sem_perm.id;
+#endif
+	return smk_curacc(ssp, access, &ad);
+}
+
+/**
  * smack_sem_associate - Smack access check for sem
  * @sma: the object
  * @semflg: access requested
@@ -1773,11 +1941,10 @@ static void smack_sem_free_security(struct sem_array *sma)
  */
 static int smack_sem_associate(struct sem_array *sma, int semflg)
 {
-	char *ssp = smack_of_sem(sma);
 	int may;
 
 	may = smack_flags_to_may(semflg);
-	return smk_curacc(ssp, may);
+	return smk_curacc_sem(sma, may);
 }
 
 /**
@@ -1789,7 +1956,6 @@ static int smack_sem_associate(struct sem_array *sma, int semflg)
  */
 static int smack_sem_semctl(struct sem_array *sma, int cmd)
 {
-	char *ssp;
 	int may;
 
 	switch (cmd) {
@@ -1818,8 +1984,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd)
 		return -EINVAL;
 	}
 
-	ssp = smack_of_sem(sma);
-	return smk_curacc(ssp, may);
+	return smk_curacc_sem(sma, may);
 }
 
 /**
@@ -1836,9 +2001,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd)
 static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops,
 			   unsigned nsops, int alter)
 {
-	char *ssp = smack_of_sem(sma);
-
-	return smk_curacc(ssp, MAY_READWRITE);
+	return smk_curacc_sem(sma, MAY_READWRITE);
 }
 
 /**
@@ -1880,6 +2043,25 @@ static char *smack_of_msq(struct msg_queue *msq)
 }
 
 /**
+ * smk_curacc_msq : helper to check if current has access on msq
+ * @msq : the msq
+ * @access : access requested
+ *
+ * return 0 if current has access, error otherwise
+ */
+static int smk_curacc_msq(struct msg_queue *msq, int access)
+{
+	char *msp = smack_of_msq(msq);
+	struct smk_audit_info ad;
+
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = msq->q_perm.id;
+#endif
+	return smk_curacc(msp, access, &ad);
+}
+
+/**
  * smack_msg_queue_associate - Smack access check for msg_queue
  * @msq: the object
  * @msqflg: access requested
@@ -1888,11 +2070,10 @@ static char *smack_of_msq(struct msg_queue *msq)
  */
 static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg)
 {
-	char *msp = smack_of_msq(msq);
 	int may;
 
 	may = smack_flags_to_may(msqflg);
-	return smk_curacc(msp, may);
+	return smk_curacc_msq(msq, may);
 }
 
 /**
@@ -1904,7 +2085,6 @@ static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg)
  */
 static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
 {
-	char *msp;
 	int may;
 
 	switch (cmd) {
@@ -1926,8 +2106,7 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
 		return -EINVAL;
 	}
 
-	msp = smack_of_msq(msq);
-	return smk_curacc(msp, may);
+	return smk_curacc_msq(msq, may);
 }
 
 /**
@@ -1941,11 +2120,10 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd)
 static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 				  int msqflg)
 {
-	char *msp = smack_of_msq(msq);
-	int rc;
+	int may;
 
-	rc = smack_flags_to_may(msqflg);
-	return smk_curacc(msp, rc);
+	may = smack_flags_to_may(msqflg);
+	return smk_curacc_msq(msq, may);
 }
 
 /**
@@ -1961,9 +2139,7 @@ static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 			struct task_struct *target, long type, int mode)
 {
-	char *msp = smack_of_msq(msq);
-
-	return smk_curacc(msp, MAY_READWRITE);
+	return smk_curacc_msq(msq, MAY_READWRITE);
 }
 
 /**
@@ -1976,10 +2152,14 @@ static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag)
 {
 	char *isp = ipp->security;
-	int may;
+	int may = smack_flags_to_may(flag);
+	struct smk_audit_info ad;
 
-	may = smack_flags_to_may(flag);
-	return smk_curacc(isp, may);
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC);
+	ad.a.u.ipc_id = ipp->id;
+#endif
+	return smk_curacc(isp, may, &ad);
 }
 
 /**
@@ -2238,8 +2418,12 @@ static int smack_unix_stream_connect(struct socket *sock,
 {
 	struct inode *sp = SOCK_INODE(sock);
 	struct inode *op = SOCK_INODE(other);
+	struct smk_audit_info ad;
 
-	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	smk_ad_setfield_u_net_sk(&ad, other->sk);
+	return smk_access(smk_of_inode(sp), smk_of_inode(op),
+				 MAY_READWRITE, &ad);
 }
 
 /**
@@ -2254,8 +2438,11 @@ static int smack_unix_may_send(struct socket *sock, struct socket *other)
 {
 	struct inode *sp = SOCK_INODE(sock);
 	struct inode *op = SOCK_INODE(other);
+	struct smk_audit_info ad;
 
-	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE);
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	smk_ad_setfield_u_net_sk(&ad, other->sk);
+	return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE, &ad);
 }
 
 /**
@@ -2370,7 +2557,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	char smack[SMK_LABELLEN];
 	char *csp;
 	int rc;
-
+	struct smk_audit_info ad;
 	if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
 		return 0;
 
@@ -2388,13 +2575,19 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	netlbl_secattr_destroy(&secattr);
 
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	ad.a.u.net.family = sk->sk_family;
+	ad.a.u.net.netif = skb->iif;
+	ipv4_skb_to_auditdata(skb, &ad.a, NULL);
+#endif
 	/*
 	 * Receiving a packet requires that the other end
 	 * be able to write here. Read access is not required.
 	 * This is the simplist possible security model
 	 * for networking.
 	 */
-	rc = smk_access(csp, ssp->smk_in, MAY_WRITE);
+	rc = smk_access(csp, ssp->smk_in, MAY_WRITE, &ad);
 	if (rc != 0)
 		netlbl_skbuff_err(skb, rc, 0);
 	return rc;
@@ -2523,6 +2716,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 	struct iphdr *hdr;
 	char smack[SMK_LABELLEN];
 	int rc;
+	struct smk_audit_info ad;
 
 	/* handle mapped IPv4 packets arriving via IPv6 sockets */
 	if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
@@ -2536,11 +2730,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 		strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN);
 	netlbl_secattr_destroy(&secattr);
 
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
+	ad.a.u.net.family = family;
+	ad.a.u.net.netif = skb->iif;
+	ipv4_skb_to_auditdata(skb, &ad.a, NULL);
+#endif
 	/*
 	 * Receiving a packet requires that the other end be able to write
 	 * here. Read access is not required.
 	 */
-	rc = smk_access(smack, ssp->smk_in, MAY_WRITE);
+	rc = smk_access(smack, ssp->smk_in, MAY_WRITE, &ad);
 	if (rc != 0)
 		return rc;
 
@@ -2642,6 +2842,7 @@ static int smack_key_permission(key_ref_t key_ref,
 				const struct cred *cred, key_perm_t perm)
 {
 	struct key *keyp;
+	struct smk_audit_info ad;
 
 	keyp = key_ref_to_ptr(key_ref);
 	if (keyp == NULL)
@@ -2657,8 +2858,13 @@ static int smack_key_permission(key_ref_t key_ref,
 	 */
 	if (cred->security == NULL)
 		return -EACCES;
-
-	return smk_access(cred->security, keyp->security, MAY_READWRITE);
+#ifdef CONFIG_AUDIT
+	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY);
+	ad.a.u.key_struct.key = keyp->serial;
+	ad.a.u.key_struct.key_desc = keyp->description;
+#endif
+	return smk_access(cred->security, keyp->security,
+				 MAY_READWRITE, &ad);
 }
 #endif /* CONFIG_KEYS */
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index e03a7e1..904af34 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -41,6 +41,7 @@ enum smk_inos {
 	SMK_AMBIENT	= 7,	/* internet ambient label */
 	SMK_NETLBLADDR	= 8,	/* single label hosts */
 	SMK_ONLYCAP	= 9,	/* the only "capable" label */
+	SMK_LOGGING	= 10,	/* logging */
 };
 
 /*
@@ -1192,6 +1193,69 @@ static const struct file_operations smk_onlycap_ops = {
 };
 
 /**
+ * smk_read_logging - read() for /smack/logging
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @cn: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_logging(struct file *filp, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char temp[32];
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	sprintf(temp, "%d\n", log_policy);
+	rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+	return rc;
+}
+
+/**
+ * smk_write_logging - write() for /smack/logging
+ * @file: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_logging(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	char temp[32];
+	int i;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	if (count >= sizeof(temp) || count == 0)
+		return -EINVAL;
+
+	if (copy_from_user(temp, buf, count) != 0)
+		return -EFAULT;
+
+	temp[count] = '\0';
+
+	if (sscanf(temp, "%d", &i) != 1)
+		return -EINVAL;
+	if (i < 0 || i > 3)
+		return -EINVAL;
+	log_policy = i;
+	return count;
+}
+
+
+
+static const struct file_operations smk_logging_ops = {
+	.read		= smk_read_logging,
+	.write		= smk_write_logging,
+};
+/**
  * smk_fill_super - fill the /smackfs superblock
  * @sb: the empty superblock
  * @data: unused
@@ -1221,6 +1285,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 			{"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
 		[SMK_ONLYCAP]	=
 			{"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
+		[SMK_LOGGING]	=
+			{"logging", &smk_logging_ops, S_IRUGO|S_IWUSR},
 		/* last one */ {""}
 	};
 
-- 
cgit v0.10.2


From 7ba5c840e64d4a967379f1ae3eca73278180b11d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 13 Apr 2009 21:31:17 -0700
Subject: rcu: Add __rcu_pending tracing to hierarchical RCU

Add tracing to __rcu_pending() to provide information on why RCU
processing was kicked off.  This is helpful for debugging hierarchical
RCU, and might also be helpful in learning how hierarchical RCU operates.

Located-by: Anton Blanchard <anton@au1.ibm.com>
Tested-by: Anton Blanchard <anton@au1.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: anton@samba.org
Cc: akpm@linux-foundation.org
Cc: dipankar@in.ibm.com
Cc: manfred@colorfullife.com
Cc: cl@linux-foundation.org
Cc: josht@linux.vnet.ibm.com
Cc: schamp@sgi.com
Cc: niv@us.ibm.com
Cc: dvhltc@us.ibm.com
Cc: ego@in.ibm.com
Cc: laijs@cn.fujitsu.com
Cc: rostedt@goodmis.org
Cc: peterz@infradead.org
Cc: penberg@cs.helsinki.fi
Cc: andi@firstfloor.org
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
LKML-Reference: <1239683479943-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 58b2aa5..5a51538 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -161,8 +161,15 @@ struct rcu_data {
 	unsigned long offline_fqs;	/* Kicked due to being offline. */
 	unsigned long resched_ipi;	/* Sent a resched IPI. */
 
-	/* 5) For future __rcu_pending statistics. */
+	/* 5) __rcu_pending() statistics. */
 	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rp_qs_pending;
+	long n_rp_cb_ready;
+	long n_rp_cpu_needs_gp;
+	long n_rp_gp_completed;
+	long n_rp_gp_started;
+	long n_rp_need_fqs;
+	long n_rp_need_nothing;
 
 	int cpu;
 };
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d2a372f..0dccfbb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	check_cpu_stall(rsp, rdp);
 
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
-	if (rdp->qs_pending)
+	if (rdp->qs_pending) {
+		rdp->n_rp_qs_pending++;
 		return 1;
+	}
 
 	/* Does this CPU have callbacks ready to invoke? */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+		rdp->n_rp_cb_ready++;
 		return 1;
+	}
 
 	/* Has RCU gone idle with this CPU needing another grace period? */
-	if (cpu_needs_another_gp(rsp, rdp))
+	if (cpu_needs_another_gp(rsp, rdp)) {
+		rdp->n_rp_cpu_needs_gp++;
 		return 1;
+	}
 
 	/* Has another RCU grace period completed?  */
-	if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+	if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
+		rdp->n_rp_gp_completed++;
 		return 1;
+	}
 
 	/* Has a new RCU grace period started? */
-	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
+		rdp->n_rp_gp_started++;
 		return 1;
+	}
 
 	/* Has an RCU GP gone long enough to send resched IPIs &c? */
 	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
-	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
+	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
+		rdp->n_rp_need_fqs++;
 		return 1;
+	}
 
 	/* nothing to do */
+	rdp->n_rp_need_nothing++;
 	return 0;
 }
 
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4b1875b..fe1dcdb 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = {
 	.release = single_release,
 };
 
-static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
+{
+	seq_printf(m, "%3d%cnp=%ld "
+		   "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
+		   rdp->n_rcu_pending,
+		   rdp->n_rp_qs_pending,
+		   rdp->n_rp_cb_ready,
+		   rdp->n_rp_cpu_needs_gp,
+		   rdp->n_rp_gp_completed,
+		   rdp->n_rp_gp_started,
+		   rdp->n_rp_need_fqs,
+		   rdp->n_rp_need_nothing);
+}
+
+static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
+{
+	int cpu;
+	struct rcu_data *rdp;
+
+	for_each_possible_cpu(cpu) {
+		rdp = rsp->rda[cpu];
+		if (rdp->beenonline)
+			print_one_rcu_pending(m, rdp);
+	}
+}
+
+static int show_rcu_pending(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	print_rcu_pendings(m, &rcu_state);
+	seq_puts(m, "rcu_bh:\n");
+	print_rcu_pendings(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcu_pending_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcu_pending, NULL);
+}
+
+static struct file_operations rcu_pending_fops = {
+	.owner = THIS_MODULE,
+	.open = rcu_pending_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *rcudir;
+static struct dentry *datadir;
+static struct dentry *datadir_csv;
+static struct dentry *gpdir;
+static struct dentry *hierdir;
+static struct dentry *rcu_pendingdir;
+
 static int __init rcuclassic_trace_init(void)
 {
 	rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void)
 						NULL, &rcuhier_fops);
 	if (!hierdir)
 		goto free_out;
+
+	rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
+						NULL, &rcu_pending_fops);
+	if (!rcu_pendingdir)
+		goto free_out;
 	return 0;
 free_out:
 	if (datadir)
@@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void)
 	debugfs_remove(datadir_csv);
 	debugfs_remove(gpdir);
 	debugfs_remove(hierdir);
+	debugfs_remove(rcu_pendingdir);
 	debugfs_remove(rcudir);
 }
 
-- 
cgit v0.10.2


From 6fd9b3a40b82081d9e6490b0d7cd656e9a78a134 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 13 Apr 2009 21:31:18 -0700
Subject: rcu: Update RCU tracing documentation for __rcu_pending

This patch updates the RCU documentation to reflect the changes in
tracing made in the previous patch in the set.

Located-by: Anton Blanchard <anton@au1.ibm.com>
Tested-by: Anton Blanchard <anton@au1.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: anton@samba.org
Cc: akpm@linux-foundation.org
Cc: dipankar@in.ibm.com
Cc: manfred@colorfullife.com
Cc: cl@linux-foundation.org
Cc: josht@linux.vnet.ibm.com
Cc: schamp@sgi.com
Cc: niv@us.ibm.com
Cc: dvhltc@us.ibm.com
Cc: ego@in.ibm.com
Cc: laijs@cn.fujitsu.com
Cc: rostedt@goodmis.org
Cc: peterz@infradead.org
Cc: penberg@cs.helsinki.fi
Cc: andi@firstfloor.org
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
LKML-Reference: <12396834792865-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 0688482..02cced1 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu:
-  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
-  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
-  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
-  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
-  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
-  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
-  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
-  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu:
+  0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
+  1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
+  2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
+  3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
+  4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
+  5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
+  6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
+  7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
 rcu_bh:
-  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
-  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
-  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
-  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
-  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
-  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
+  2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
+  4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
 
 The first section lists the rcu_data structures for rcu, the second for
 rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@ o	"pqc" indicates which grace period the last-observed quiescent
 o	"qp" indicates that RCU still expects a quiescent state from
 	this CPU.
 
-o	"rpfq" is the number of rcu_pending() calls on this CPU required
-	to induce this CPU to invoke force_quiescent_state().
-
-o	"rp" is low-order four hex digits of the count of how many times
-	rcu_pending() has been invoked on this CPU.
-
 o	"dt" is the current value of the dyntick counter that is incremented
 	when entering or leaving dynticks idle state, either by the
 	scheduler or by irq.  The number after the "/" is the interrupt
@@ -305,6 +300,9 @@ o	"b" is the batch limit for this CPU.  If more than this number
 	of RCU callbacks is ready to invoke, then the remainder will
 	be deferred.
 
+There is also an rcu/rcudata.csv file with the same information in
+comma-separated-variable spreadsheet format.
+
 
 The output of "cat rcu/rcugp" looks as follows:
 
@@ -411,3 +409,63 @@ o	Each element of the form "1/1 0:127 ^0" represents one struct
 		For example, the first entry at the lowest level shows
 		"^0", indicating that it corresponds to bit zero in
 		the first entry at the middle level.
+
+
+The output of "cat rcu/rcu_pending" looks as follows:
+
+rcu:
+  0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
+  1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
+  2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
+  3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
+  4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
+  5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
+  6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
+  7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
+rcu_bh:
+  0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
+  1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
+  2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
+  3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
+  4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
+  5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
+  6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
+  7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
+
+As always, this is once again split into "rcu" and "rcu_bh" portions.
+The fields are as follows:
+
+o	"np" is the number of times that __rcu_pending() has been invoked
+	for the corresponding flavor of RCU.
+
+o	"qsp" is the number of times that the RCU was waiting for a
+	quiescent state from this CPU.
+
+o	"cbr" is the number of times that this CPU had RCU callbacks
+	that had passed through a grace period, and were thus ready
+	to be invoked.
+
+o	"cng" is the number of times that this CPU needed another
+	grace period while RCU was idle.
+
+o	"gpc" is the number of times that an old grace period had
+	completed, but this CPU was not yet aware of it.
+
+o	"gps" is the number of times that a new grace period had started,
+	but this CPU was not yet aware of it.
+
+o	"nf" is the number of times that this CPU suspected that the
+	current grace period had run for too long, and thus needed to
+	be forced.
+
+	Please note that "forcing" consists of sending resched IPIs
+	to holdout CPUs.  If that CPU really still is in an old RCU
+	read-side critical section, then we really do have to wait for it.
+	The assumption behing "forcing" is that the CPU is not still in
+	an old RCU read-side critical section, but has not yet responded
+	for some other reason.
+
+o	"nn" is the number of times that this CPU needed nothing.  Alert
+	readers will note that the rcu "nn" number for a given CPU very
+	closely matches the rcu_bh "np" number for that same CPU.  This
+	is due to short-circuit evaluation in rcu_pending().
-- 
cgit v0.10.2


From 66aa230e437d89ca56224135f617e2d8e391a3ef Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Tue, 14 Apr 2009 12:54:29 +0530
Subject: x86: page_types.h unification of declarations

Impact: unification of declarations, cleanup

Unification of declarations:
 moved init_memory_mapping, initmem_init and free_initmem from
page_XX_types.h to page_types.h

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <1239693869.3033.31.camel@ht.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0f915ae..6f1b733 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
 
 extern void find_low_pfn_range(void);
-extern unsigned long init_memory_mapping(unsigned long start,
-					 unsigned long end);
-extern void initmem_init(unsigned long, unsigned long);
-extern void free_initmem(void);
 extern void setup_bootmem_allocator(void);
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d38c91b..3f58718 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -71,12 +71,6 @@ extern unsigned long __phys_addr(unsigned long);
 
 #define vmemmap ((struct page *)VMEMMAP_START)
 
-extern unsigned long init_memory_mapping(unsigned long start,
-					 unsigned long end);
-
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern void free_initmem(void);
-
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 826ad37..6473f5c 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
+extern unsigned long init_memory_mapping(unsigned long start,
+					 unsigned long end);
+
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif	/* _ASM_X86_PAGE_DEFS_H */
-- 
cgit v0.10.2


From e7d43a74cb07cbc4b8e9b5e4a914816b33fb0719 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Tue, 14 Apr 2009 13:18:28 +0530
Subject: x86: avoid multiple declaration of kstack_depth_to_print

Impact: cleanup

asm/stacktrace.h is more appropriate so removing other 2 declarations.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
LKML-Reference: <1239695308.3033.34.camel@ht.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0d53425..9aa3ab2 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,7 +74,6 @@ static inline int get_si_code(unsigned long condition)
 }
 
 extern int panic_on_unrecovered_nmi;
-extern int kstack_depth_to_print;
 
 void math_error(void __user *);
 void math_emulate(struct math_emu_info *);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index da87590b..81086c2 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *sp, unsigned long bp, char *log_lvl);
 
 extern unsigned int code_bytes;
-extern int kstack_depth_to_print;
 
 /* The form of the top of the frame on the stack */
 struct stack_frame {
-- 
cgit v0.10.2


From f711f6090a81cbd396b63de90f415d33f563af9b Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Tue, 14 Apr 2009 10:25:30 +0530
Subject: sched: Nominate idle load balancer from a semi-idle package.

Currently the nomination of idle-load balancer is done by choosing the first
idle cpu in the nohz.cpu_mask. This may not be power-efficient, since
such an idle cpu could come from a completely idle core/package thereby
preventing the whole core/package from being in a low-power state.

For eg, consider a quad-core dual package system. The cpu numbering need
not be sequential and can something like [0, 2, 4, 6] and [1, 3, 5, 7].
With sched_mc/smt_power_savings and the power-aware IRQ balance, we try to keep
as fewer Packages/Cores active. But the current idle load balancer logic
goes against this by choosing the first_cpu in the nohz.cpu_mask and not
taking the system topology into consideration.

Improve the algorithm to nominate the idle load balancer from a semi idle
cores/packages thereby increasing the probability of the cores/packages being
in deeper sleep states for longer duration.

The algorithm is activated only when sched_mc/smt_power_savings != 0.

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090414045530.7645.12175.stgit@sofia.in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 5724508..b0fefa3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4240,10 +4240,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 static struct {
 	atomic_t load_balancer;
 	cpumask_var_t cpu_mask;
+	cpumask_var_t ilb_grp_nohz_mask;
 } nohz ____cacheline_aligned = {
 	.load_balancer = ATOMIC_INIT(-1),
 };
 
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+/**
+ * lowest_flag_domain - Return lowest sched_domain containing flag.
+ * @cpu:	The cpu whose lowest level of sched domain is to
+ *		be returned.
+ * @flag:	The flag to check for the lowest sched_domain
+ *		for the given cpu.
+ *
+ * Returns the lowest sched_domain of a cpu which contains the given flag.
+ */
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd;
+
+	for_each_domain(cpu, sd)
+		if (sd && (sd->flags & flag))
+			break;
+
+	return sd;
+}
+
+/**
+ * for_each_flag_domain - Iterates over sched_domains containing the flag.
+ * @cpu:	The cpu whose domains we're iterating over.
+ * @sd:		variable holding the value of the power_savings_sd
+ *		for cpu.
+ * @flag:	The flag to filter the sched_domains to be iterated.
+ *
+ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
+ * set, starting from the lowest sched_domain to the highest.
+ */
+#define for_each_flag_domain(cpu, sd, flag) \
+	for (sd = lowest_flag_domain(cpu, flag); \
+		(sd && (sd->flags & flag)); sd = sd->parent)
+
+/**
+ * is_semi_idle_group - Checks if the given sched_group is semi-idle.
+ * @ilb_group:	group to be checked for semi-idleness
+ *
+ * Returns:	1 if the group is semi-idle. 0 otherwise.
+ *
+ * We define a sched_group to be semi idle if it has atleast one idle-CPU
+ * and atleast one non-idle CPU. This helper function checks if the given
+ * sched_group is semi-idle or not.
+ */
+static inline int is_semi_idle_group(struct sched_group *ilb_group)
+{
+	cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+					sched_group_cpus(ilb_group));
+
+	/*
+	 * A sched_group is semi-idle when it has atleast one busy cpu
+	 * and atleast one idle cpu.
+	 */
+	if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+		return 0;
+
+	if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+		return 0;
+
+	return 1;
+}
+/**
+ * find_new_ilb - Finds the optimum idle load balancer for nomination.
+ * @cpu:	The cpu which is nominating a new idle_load_balancer.
+ *
+ * Returns:	Returns the id of the idle load balancer if it exists,
+ *		Else, returns >= nr_cpu_ids.
+ *
+ * This algorithm picks the idle load balancer such that it belongs to a
+ * semi-idle powersavings sched_domain. The idea is to try and avoid
+ * completely idle packages/cores just for the purpose of idle load balancing
+ * when there are other idle cpu's which are better suited for that job.
+ */
+static int find_new_ilb(int cpu)
+{
+	struct sched_domain *sd;
+	struct sched_group *ilb_group;
+
+	/*
+	 * Have idle load balancer selection from semi-idle packages only
+	 * when power-aware load balancing is enabled
+	 */
+	if (!(sched_smt_power_savings || sched_mc_power_savings))
+		goto out_done;
+
+	/*
+	 * Optimize for the case when we have no idle CPUs or only one
+	 * idle CPU. Don't walk the sched_domain hierarchy in such cases
+	 */
+	if (cpumask_weight(nohz.cpu_mask) < 2)
+		goto out_done;
+
+	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
+		ilb_group = sd->groups;
+
+		do {
+			if (is_semi_idle_group(ilb_group))
+				return cpumask_first(nohz.ilb_grp_nohz_mask);
+
+			ilb_group = ilb_group->next;
+
+		} while (ilb_group != sd->groups);
+	}
+
+out_done:
+	return cpumask_first(nohz.cpu_mask);
+}
+#else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
+static inline int find_new_ilb(int call_cpu)
+{
+	return first_cpu(nohz.cpu_mask);
+}
+#endif
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4468,15 +4584,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 		}
 
 		if (atomic_read(&nohz.load_balancer) == -1) {
-			/*
-			 * simple selection for now: Nominate the
-			 * first cpu in the nohz list to be the next
-			 * ilb owner.
-			 *
-			 * TBD: Traverse the sched domains and nominate
-			 * the nearest cpu in the nohz.cpu_mask.
-			 */
-			int ilb = cpumask_first(nohz.cpu_mask);
+			int ilb = find_new_ilb(cpu);
 
 			if (ilb < nr_cpu_ids)
 				resched_cpu(ilb);
@@ -9051,6 +9159,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
 	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+	alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask);
 #endif
 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
 #endif /* SMP */
-- 
cgit v0.10.2


From e790fb0ba64bfec158e1219d899cb588275d12ab Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Tue, 14 Apr 2009 10:25:35 +0530
Subject: sched: Nominate a power-efficient ilb in select_nohz_balancer()

The CPU that first goes idle becomes the idle-load-balancer and remains
that until either it picks up a task or till all the CPUs of the system
goes idle.

Optimize this further to allow it to relinquish it's post
once all it's siblings in the power-aware sched_domain go idle, thereby
allowing the whole package-core to go idle. While relinquising the post,
nominate another an idle-load balancer from a semi-idle core/package.

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090414045535.7645.31641.stgit@sofia.in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index b0fefa3..36d213b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4414,8 +4414,24 @@ int select_nohz_load_balancer(int stop_tick)
 			/* make me the ilb owner */
 			if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
 				return 1;
-		} else if (atomic_read(&nohz.load_balancer) == cpu)
+		} else if (atomic_read(&nohz.load_balancer) == cpu) {
+			int new_ilb;
+
+			if (!(sched_smt_power_savings ||
+						sched_mc_power_savings))
+				return 1;
+			/*
+			 * Check to see if there is a more power-efficient
+			 * ilb.
+			 */
+			new_ilb = find_new_ilb(cpu);
+			if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
+				atomic_set(&nohz.load_balancer, -1);
+				resched_cpu(new_ilb);
+				return 0;
+			}
 			return 1;
+		}
 	} else {
 		if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
 			return 0;
-- 
cgit v0.10.2


From 6424fb38667fffbbb1b90be0ffd9a0c540db6a4b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 13 Apr 2009 23:51:46 -0700
Subject: x86: remove (null) in /sys kernel_page_tables

Impact: cleanup

%p prints out 0x000000000000000 as (null)
so use %lx instead.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <49E43282.1090607@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e7277cb..a725b7f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 		   st->current_address >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
+		int width = sizeof(unsigned long) * 2;
 
 		/*
 		 * Now print the actual finished series
 		 */
-		seq_printf(m, "0x%p-0x%p   ",
-			   (void *)st->start_address,
-			   (void *)st->current_address);
+		seq_printf(m, "0x%0*lx-0x%0*lx   ",
+			   width, st->start_address,
+			   width, st->current_address);
 
 		delta = (st->current_address - st->start_address) >> 10;
 		while (!(delta & 1023) && unit[1]) {
-- 
cgit v0.10.2


From 02bec490450836ebbd628e97ec03f10b57def8ce Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Tue, 24 Mar 2009 12:24:35 +0100
Subject: ALSA: lx6464es - driver for the digigram lx6464es interface

prototype of a driver for the digigram lx6464es 64 channel ethersound
interface.

Signed-off-by: Tim Blechmann <tim@klingt.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ee98cd5..2b1a695 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1005,6 +1005,7 @@
 #define PCI_DEVICE_ID_PLX_PCI200SYN	0x3196
 #define PCI_DEVICE_ID_PLX_9030          0x9030
 #define PCI_DEVICE_ID_PLX_9050		0x9050
+#define PCI_DEVICE_ID_PLX_9056		0x9056
 #define PCI_DEVICE_ID_PLX_9080		0x9080
 #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2	0xa001
 
@@ -1847,6 +1848,10 @@
 #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO	0x0107
 #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2	0x0108
 
+#define PCI_VENDOR_ID_DIGIGRAM		0x1369
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM	0xc001
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM	0xc002
+
 #define PCI_VENDOR_ID_KAWASAKI		0x136b
 #define PCI_DEVICE_ID_MCHIP_KL5A72002	0xff01
 
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 93422e3..e912b70 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -622,6 +622,16 @@ config SND_KORG1212
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-korg1212.
 
+config SND_LX6464ES
+	tristate "Digigram LX6464ES"
+	select SND_PCM
+	help
+	  Say Y here to include support for Digigram LX6464ES boards.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-lx6464es.
+
+
 config SND_MAESTRO3
 	tristate "ESS Allegro/Maestro3"
 	select SND_AC97_CODEC
diff --git a/sound/pci/Makefile b/sound/pci/Makefile
index 65b25d2..7d83e08 100644
--- a/sound/pci/Makefile
+++ b/sound/pci/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_SND) += \
 	ca0106/ \
 	cs46xx/ \
 	cs5535audio/ \
+	lx6464es/ \
 	echoaudio/ \
 	emu10k1/ \
 	hda/ \
diff --git a/sound/pci/lx6464es/Makefile b/sound/pci/lx6464es/Makefile
new file mode 100644
index 0000000..eb04a6c
--- /dev/null
+++ b/sound/pci/lx6464es/Makefile
@@ -0,0 +1,2 @@
+snd-lx6464es-objs := lx6464es.o lx_core.o
+obj-$(CONFIG_SND_LX6464ES) += snd-lx6464es.o
diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
new file mode 100644
index 0000000..7bc8b8c
--- /dev/null
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -0,0 +1,1152 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ *
+ * Copyright (c) 2008, 2009 Tim Blechmann <tim@klingt.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include <sound/initval.h>
+#include <sound/control.h>
+#include <sound/info.h>
+
+#include "lx6464es.h"
+
+MODULE_AUTHOR("Tim Blechmann");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("digigram lx6464es");
+MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}");
+
+
+static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+static const char card_name[] = "LX6464ES";
+
+
+#define PCI_DEVICE_ID_PLX_LX6464ES		PCI_DEVICE_ID_PLX_9056
+
+static struct pci_device_id snd_lx6464es_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES),
+	  .subvendor = PCI_VENDOR_ID_DIGIGRAM,
+	  .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM
+	},			/* LX6464ES */
+	{ PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES),
+	  .subvendor = PCI_VENDOR_ID_DIGIGRAM,
+	  .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM
+	},			/* LX6464ES-CAE */
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, snd_lx6464es_ids);
+
+
+
+/* PGO pour USERo dans le registre pci_0x06/loc_0xEC */
+#define CHIPSC_RESET_XILINX (1L<<16)
+
+
+/* alsa callbacks */
+static struct snd_pcm_hardware lx_caps = {
+	.info             = (SNDRV_PCM_INFO_MMAP |
+			     SNDRV_PCM_INFO_INTERLEAVED |
+			     SNDRV_PCM_INFO_MMAP_VALID |
+			     SNDRV_PCM_INFO_SYNC_START),
+	.formats	  = (SNDRV_PCM_FMTBIT_S16_LE |
+			     SNDRV_PCM_FMTBIT_S16_BE |
+			     SNDRV_PCM_FMTBIT_S24_3LE |
+			     SNDRV_PCM_FMTBIT_S24_3BE),
+	.rates            = (SNDRV_PCM_RATE_CONTINUOUS |
+			     SNDRV_PCM_RATE_8000_192000),
+	.rate_min         = 8000,
+	.rate_max         = 192000,
+	.channels_min     = 2,
+	.channels_max     = 64,
+	.buffer_bytes_max = 64*2*3*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER,
+	.period_bytes_min = (2*2*MICROBLAZE_IBL_MIN*2),
+	.period_bytes_max = (4*64*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER),
+	.periods_min      = 2,
+	.periods_max      = MAX_STREAM_BUFFER,
+};
+
+static int lx_set_granularity(struct lx6464es *chip, u32 gran);
+
+
+static int lx_hardware_open(struct lx6464es *chip,
+			    struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int channels = runtime->channels;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_pcm_uframes_t period_size = runtime->period_size;
+
+	snd_printd(LXP "allocating pipe for %d channels\n", channels);
+	err = lx_pipe_allocate(chip, 0, is_capture, channels);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "allocating pipe failed\n");
+		return err;
+	}
+
+	err = lx_set_granularity(chip, period_size);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "setting granularity to %ld failed\n",
+			   period_size);
+		return err;
+	}
+
+	return 0;
+}
+
+static int lx_hardware_start(struct lx6464es *chip,
+			     struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "setting stream format\n");
+	err = lx_stream_set_format(chip, runtime, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "setting stream format failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "starting pipe\n");
+	err = lx_pipe_start(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "starting pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "waiting for pipe to start\n");
+	err = lx_pipe_wait_for_start(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "waiting for pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_hardware_stop(struct lx6464es *chip,
+			    struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "pausing pipe\n");
+	err = lx_pipe_pause(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "pausing pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "waiting for pipe to become idle\n");
+	err = lx_pipe_wait_for_idle(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "waiting for pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "stopping pipe\n");
+	err = lx_pipe_stop(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(LXP "stopping pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_hardware_close(struct lx6464es *chip,
+			     struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "releasing pipe\n");
+	err = lx_pipe_release(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(LXP "releasing pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int err = 0;
+	int board_rate;
+
+	snd_printdd("->lx_pcm_open\n");
+	mutex_lock(&chip->setup_mutex);
+
+	/* copy the struct snd_pcm_hardware struct */
+	runtime->hw = lx_caps;
+
+#if 0
+	/* buffer-size should better be multiple of period-size */
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not constrain periods\n");
+		goto exit;
+	}
+#endif
+
+	/* the clock rate cannot be changed */
+	board_rate = chip->board_sample_rate;
+	err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE,
+					   board_rate, board_rate);
+
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not constrain periods\n");
+		goto exit;
+	}
+
+	/* constrain period size */
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+					   MICROBLAZE_IBL_MIN,
+					   MICROBLAZE_IBL_MAX);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP
+			   "could not constrain period size\n");
+		goto exit;
+	}
+
+	snd_pcm_hw_constraint_step(runtime, 0,
+				   SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 32);
+
+	snd_pcm_set_sync(substream);
+	err = 0;
+
+exit:
+	runtime->private_data = chip;
+
+	mutex_unlock(&chip->setup_mutex);
+	snd_printdd("<-lx_pcm_open, %d\n", err);
+	return err;
+}
+
+static int lx_pcm_close(struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	snd_printdd("->lx_pcm_close\n");
+	return err;
+}
+
+static snd_pcm_uframes_t lx_pcm_stream_pointer(struct snd_pcm_substream
+					       *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	snd_pcm_uframes_t pos;
+	unsigned long flags;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	struct lx_stream *lx_stream = is_capture ? &chip->capture_stream :
+		&chip->playback_stream;
+
+	snd_printdd("->lx_pcm_stream_pointer\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+	pos = lx_stream->frame_pos * substream->runtime->period_size;
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	snd_printdd(LXP "stream_pointer at %ld\n", pos);
+	return pos;
+}
+
+static int lx_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+	const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printdd("->lx_pcm_prepare\n");
+
+	mutex_lock(&chip->setup_mutex);
+
+	if (chip->hardware_running[is_capture]) {
+		err = lx_hardware_stop(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to stop hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		err = lx_hardware_close(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to close hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+	}
+
+	snd_printd(LXP "opening hardware\n");
+	err = lx_hardware_open(chip, substream);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "failed to open hardware. "
+			   "Error code %d\n", err);
+		goto exit;
+	}
+
+	err = lx_hardware_start(chip, substream);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "failed to start hardware. "
+			   "Error code %d\n", err);
+		goto exit;
+	}
+
+	chip->hardware_running[is_capture] = 1;
+
+	if (chip->board_sample_rate != substream->runtime->rate) {
+		if (!err)
+			chip->board_sample_rate = substream->runtime->rate;
+	}
+
+exit:
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static int lx_pcm_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *hw_params, int is_capture)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+
+	snd_printdd("->lx_pcm_hw_params\n");
+
+	mutex_lock(&chip->setup_mutex);
+
+	/* set dma buffer */
+	err = snd_pcm_lib_malloc_pages(substream,
+				       params_buffer_bytes(hw_params));
+
+	if (is_capture)
+		chip->capture_stream.stream = substream;
+	else
+		chip->playback_stream.stream = substream;
+
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static int lx_pcm_hw_params_playback(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	return lx_pcm_hw_params(substream, hw_params, 0);
+}
+
+static int lx_pcm_hw_params_capture(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	return lx_pcm_hw_params(substream, hw_params, 1);
+}
+
+static int lx_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printdd("->lx_pcm_hw_free\n");
+	mutex_lock(&chip->setup_mutex);
+
+	if (chip->hardware_running[is_capture]) {
+		err = lx_hardware_stop(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to stop hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		err = lx_hardware_close(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to close hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		chip->hardware_running[is_capture] = 0;
+	}
+
+	err = snd_pcm_lib_free_pages(substream);
+
+	if (is_capture)
+		chip->capture_stream.stream = 0;
+	else
+		chip->playback_stream.stream = 0;
+
+exit:
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static void lx_trigger_start(struct lx6464es *chip, struct lx_stream *lx_stream)
+{
+	struct snd_pcm_substream *substream = lx_stream->stream;
+	const int is_capture = lx_stream->is_capture;
+
+	int err;
+
+	const u32 channels = substream->runtime->channels;
+	const u32 bytes_per_frame = channels * 3;
+	const u32 period_size = substream->runtime->period_size;
+	const u32 periods = substream->runtime->periods;
+	const u32 period_bytes = period_size * bytes_per_frame;
+
+	dma_addr_t buf = substream->dma_buffer.addr;
+	int i;
+
+	u32 needed, freed;
+	u32 size_array[5];
+
+	for (i = 0; i != periods; ++i) {
+		u32 buffer_index = 0;
+
+		err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed,
+				    size_array);
+		snd_printdd(LXP "starting: needed %d, freed %d\n",
+			    needed, freed);
+
+		err = lx_buffer_give(chip, 0, is_capture, period_bytes,
+				     lower_32_bits(buf), upper_32_bits(buf),
+				     &buffer_index);
+
+		snd_printdd(LXP "starting: buffer index %x on %p (%d bytes)\n",
+			    buffer_index, (void *)buf, period_bytes);
+		buf += period_bytes;
+	}
+
+	err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array);
+	snd_printdd(LXP "starting: needed %d, freed %d\n", needed, freed);
+
+	snd_printd(LXP "starting: starting stream\n");
+	err = lx_stream_start(chip, 0, is_capture);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP "couldn't start stream\n");
+	else
+		lx_stream->status = LX_STREAM_STATUS_RUNNING;
+
+	lx_stream->frame_pos = 0;
+}
+
+static void lx_trigger_stop(struct lx6464es *chip, struct lx_stream *lx_stream)
+{
+	const int is_capture = lx_stream->is_capture;
+	int err;
+
+	snd_printd(LXP "stopping: stopping stream\n");
+	err = lx_stream_stop(chip, 0, is_capture);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP "couldn't stop stream\n");
+	else
+		lx_stream->status = LX_STREAM_STATUS_FREE;
+
+}
+
+static void lx_trigger_tasklet_dispatch_stream(struct lx6464es *chip,
+					       struct lx_stream *lx_stream)
+{
+	switch (lx_stream->status) {
+	case LX_STREAM_STATUS_SCHEDULE_RUN:
+		lx_trigger_start(chip, lx_stream);
+		break;
+
+	case LX_STREAM_STATUS_SCHEDULE_STOP:
+		lx_trigger_stop(chip, lx_stream);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void lx_trigger_tasklet(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	unsigned long flags;
+
+	snd_printdd("->lx_trigger_tasklet\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+	lx_trigger_tasklet_dispatch_stream(chip, &chip->capture_stream);
+	lx_trigger_tasklet_dispatch_stream(chip, &chip->playback_stream);
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static int lx_pcm_trigger_dispatch(struct lx6464es *chip,
+				   struct lx_stream *lx_stream, int cmd)
+{
+	int err = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		lx_stream->status = LX_STREAM_STATUS_SCHEDULE_RUN;
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		lx_stream->status = LX_STREAM_STATUS_SCHEDULE_STOP;
+		break;
+
+	default:
+		err = -EINVAL;
+		goto exit;
+	}
+	tasklet_schedule(&chip->trigger_tasklet);
+
+exit:
+	return err;
+}
+
+
+static int lx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+	struct lx_stream *stream = is_capture ? &chip->capture_stream :
+		&chip->playback_stream;
+
+	snd_printdd("->lx_pcm_trigger\n");
+
+	return lx_pcm_trigger_dispatch(chip, stream, cmd);
+}
+
+static int snd_lx6464es_free(struct lx6464es *chip)
+{
+	snd_printdd("->snd_lx6464es_free\n");
+
+	lx_irq_disable(chip);
+
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
+
+	iounmap(chip->port_dsp_bar);
+	ioport_unmap(chip->port_plx_remapped);
+
+	pci_release_regions(chip->pci);
+	pci_disable_device(chip->pci);
+
+	kfree(chip);
+
+	return 0;
+}
+
+static int snd_lx6464es_dev_free(struct snd_device *device)
+{
+	return snd_lx6464es_free(device->device_data);
+}
+
+/* reset the dsp during initialization */
+static int __devinit lx_init_xilinx_reset(struct lx6464es *chip)
+{
+	int i;
+	u32 plx_reg = lx_plx_reg_read(chip, ePLX_CHIPSC);
+
+	snd_printdd("->lx_init_xilinx_reset\n");
+
+	/* activate reset of xilinx */
+	plx_reg &= ~CHIPSC_RESET_XILINX;
+
+	lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg);
+	msleep(1);
+
+	lx_plx_reg_write(chip, ePLX_MBOX3, 0);
+	msleep(1);
+
+	plx_reg |= CHIPSC_RESET_XILINX;
+	lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg);
+
+	/* deactivate reset of xilinx */
+	for (i = 0; i != 100; ++i) {
+		u32 reg_mbox3;
+		msleep(10);
+		reg_mbox3 = lx_plx_reg_read(chip, ePLX_MBOX3);
+		if (reg_mbox3) {
+			snd_printd(LXP "xilinx reset done\n");
+			snd_printdd(LXP "xilinx took %d loops\n", i);
+			break;
+		}
+	}
+
+	/* todo: add some error handling? */
+
+	/* clear mr */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	/* le xilinx ES peut ne pas etre encore pret, on attend. */
+	msleep(600);
+
+	return 0;
+}
+
+static int __devinit lx_init_xilinx_test(struct lx6464es *chip)
+{
+	u32 reg;
+
+	snd_printdd("->lx_init_xilinx_test\n");
+
+	/* TEST if we have access to Xilinx/MicroBlaze */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	reg = lx_dsp_reg_read(chip, eReg_CSM);
+
+	if (reg) {
+		snd_printk(KERN_ERR LXP "Problem: Reg_CSM %x.\n", reg);
+
+		/* PCI9056_SPACE0_REMAP */
+		lx_plx_reg_write(chip, ePLX_PCICR, 1);
+
+		reg = lx_dsp_reg_read(chip, eReg_CSM);
+		if (reg) {
+			snd_printk(KERN_ERR LXP "Error: Reg_CSM %x.\n", reg);
+			return -EAGAIN; /* seems to be appropriate */
+		}
+	}
+
+	snd_printd(LXP "Xilinx/MicroBlaze access test successful\n");
+
+	return 0;
+}
+
+/* initialize ethersound */
+static int __devinit lx_init_ethersound_config(struct lx6464es *chip)
+{
+	int i;
+	u32 orig_conf_es = lx_dsp_reg_read(chip, eReg_CONFES);
+
+	u32 default_conf_es = (64 << IOCR_OUTPUTS_OFFSET) |
+		(64 << IOCR_INPUTS_OFFSET) |
+		(FREQ_RATIO_SINGLE_MODE << FREQ_RATIO_OFFSET);
+
+	u32 conf_es = (orig_conf_es & CONFES_READ_PART_MASK)
+		| (default_conf_es & CONFES_WRITE_PART_MASK);
+
+	snd_printdd("->lx_init_ethersound\n");
+
+	chip->freq_ratio = FREQ_RATIO_SINGLE_MODE;
+
+	/*
+	 * write it to the card !
+	 * this actually kicks the ES xilinx, the first time since poweron.
+	 * the MAC address in the Reg_ADMACESMSB Reg_ADMACESLSB registers
+	 * is not ready before this is done, and the bit 2 in Reg_CSES is set.
+	 * */
+	lx_dsp_reg_write(chip, eReg_CONFES, conf_es);
+
+	for (i = 0; i != 1000; ++i) {
+		if (lx_dsp_reg_read(chip, eReg_CSES) & 4) {
+			snd_printd(LXP "ethersound initialized after %dms\n",
+				   i);
+			goto ethersound_initialized;
+		}
+		msleep(1);
+	}
+	snd_printk(KERN_WARNING LXP
+		   "ethersound could not be initialized after %dms\n", i);
+	return -ETIMEDOUT;
+
+ ethersound_initialized:
+	snd_printd(LXP "ethersound initialized\n");
+	return 0;
+}
+
+static int __devinit lx_init_get_version_features(struct lx6464es *chip)
+{
+	u32 dsp_version;
+
+	int err;
+
+	snd_printdd("->lx_init_get_version_features\n");
+
+	err = lx_dsp_get_version(chip, &dsp_version);
+
+	if (err == 0) {
+		u32 freq;
+
+		snd_printk(LXP "DSP version: V%02d.%02d #%d\n",
+			   (dsp_version>>16) & 0xff, (dsp_version>>8) & 0xff,
+			   dsp_version & 0xff);
+
+		/* later: what firmware version do we expect? */
+
+		/* retrieve Play/Rec features */
+		/* done here because we may have to handle alternate
+		 * DSP files. */
+		/* later */
+
+		/* init the EtherSound sample rate */
+		err = lx_dsp_get_clock_frequency(chip, &freq);
+		if (err == 0)
+			chip->board_sample_rate = freq;
+		snd_printd(LXP "actual clock frequency %d\n", freq);
+	} else {
+		snd_printk(KERN_ERR LXP "DSP corrupted \n");
+		err = -EAGAIN;
+	}
+
+	return err;
+}
+
+static int lx_set_granularity(struct lx6464es *chip, u32 gran)
+{
+	int err = 0;
+	u32 snapped_gran = MICROBLAZE_IBL_MIN;
+
+	snd_printdd("->lx_set_granularity\n");
+
+	/* blocksize is a power of 2 */
+	while ((snapped_gran < gran) &&
+	       (snapped_gran < MICROBLAZE_IBL_MAX)) {
+		snapped_gran *= 2;
+	}
+
+	if (snapped_gran == chip->pcm_granularity)
+		return 0;
+
+	err = lx_dsp_set_granularity(chip, snapped_gran);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not set granularity\n");
+		err = -EAGAIN;
+	}
+
+	if (snapped_gran != gran)
+		snd_printk(LXP "snapped blocksize to %d\n", snapped_gran);
+
+	snd_printd(LXP "set blocksize on board %d\n", snapped_gran);
+	chip->pcm_granularity = snapped_gran;
+
+	return err;
+}
+
+/* initialize and test the xilinx dsp chip */
+static int __devinit lx_init_dsp(struct lx6464es *chip)
+{
+	int err;
+	u8 mac_address[6];
+	int i;
+
+	snd_printdd("->lx_init_dsp\n");
+
+	snd_printd(LXP "initialize board\n");
+	err = lx_init_xilinx_reset(chip);
+	if (err)
+		return err;
+
+	snd_printd(LXP "testing board\n");
+	err = lx_init_xilinx_test(chip);
+	if (err)
+		return err;
+
+	snd_printd(LXP "initialize ethersound configuration\n");
+	err = lx_init_ethersound_config(chip);
+	if (err)
+		return err;
+
+	lx_irq_enable(chip);
+
+	/** \todo the mac address should be ready by not, but it isn't,
+	 *  so we wait for it */
+	for (i = 0; i != 1000; ++i) {
+		err = lx_dsp_get_mac(chip, mac_address);
+		if (err)
+			return err;
+		if (mac_address[0] || mac_address[1] || mac_address[2] ||
+		    mac_address[3] || mac_address[4] || mac_address[5])
+			goto mac_ready;
+		msleep(1);
+	}
+	return -ETIMEDOUT;
+
+mac_ready:
+	snd_printd(LXP "mac address ready read after: %dms\n", i);
+	snd_printk(LXP "mac address: %02X.%02X.%02X.%02X.%02X.%02X\n",
+		   mac_address[0], mac_address[1], mac_address[2],
+		   mac_address[3], mac_address[4], mac_address[5]);
+
+	err = lx_init_get_version_features(chip);
+	if (err)
+		return err;
+
+	lx_set_granularity(chip, MICROBLAZE_IBL_DEFAULT);
+
+	chip->playback_mute = 0;
+
+	return err;
+}
+
+static struct snd_pcm_ops lx_ops_playback = {
+	.open      = lx_pcm_open,
+	.close     = lx_pcm_close,
+	.ioctl     = snd_pcm_lib_ioctl,
+	.prepare   = lx_pcm_prepare,
+	.hw_params = lx_pcm_hw_params_playback,
+	.hw_free   = lx_pcm_hw_free,
+	.trigger   = lx_pcm_trigger,
+	.pointer   = lx_pcm_stream_pointer,
+};
+
+static struct snd_pcm_ops lx_ops_capture = {
+	.open      = lx_pcm_open,
+	.close     = lx_pcm_close,
+	.ioctl     = snd_pcm_lib_ioctl,
+	.prepare   = lx_pcm_prepare,
+	.hw_params = lx_pcm_hw_params_capture,
+	.hw_free   = lx_pcm_hw_free,
+	.trigger   = lx_pcm_trigger,
+	.pointer   = lx_pcm_stream_pointer,
+};
+
+static int __devinit lx_pcm_create(struct lx6464es *chip)
+{
+	int err;
+	struct snd_pcm *pcm;
+
+	u32 size = 64 *		     /* channels */
+		3 *		     /* 24 bit samples */
+		MAX_STREAM_BUFFER *  /* periods */
+		MICROBLAZE_IBL_MAX * /* frames per period */
+		2;		     /* duplex */
+
+	size = PAGE_ALIGN(size);
+
+	/* hardcoded device name & channel count */
+	err = snd_pcm_new(chip->card, (char *)card_name, 0,
+			  1, 1, &pcm);
+
+	pcm->private_data = chip;
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &lx_ops_playback);
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &lx_ops_capture);
+
+	pcm->info_flags = 0;
+	strcpy(pcm->name, card_name);
+
+	err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+						    snd_dma_pci_data(chip->pci),
+						    size, size);
+	if (err < 0)
+		return err;
+
+	chip->pcm = pcm;
+	chip->capture_stream.is_capture = 1;
+
+	return 0;
+}
+
+static int lx_control_playback_info(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int lx_control_playback_get(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct lx6464es *chip = snd_kcontrol_chip(kcontrol);
+	ucontrol->value.integer.value[0] = chip->playback_mute;
+	return 0;
+}
+
+static int lx_control_playback_put(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct lx6464es *chip = snd_kcontrol_chip(kcontrol);
+	int changed = 0;
+	int current_value = chip->playback_mute;
+
+	if (current_value != ucontrol->value.integer.value[0]) {
+		lx_level_unmute(chip, 0, !current_value);
+		chip->playback_mute = !current_value;
+		changed = 1;
+	}
+	return changed;
+}
+
+static struct snd_kcontrol_new lx_control_playback_switch __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "PCM Playback Switch",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = lx_control_playback_info,
+	.get = lx_control_playback_get,
+	.put = lx_control_playback_put
+};
+
+
+
+static void lx_proc_levels_read(struct snd_info_entry *entry,
+				struct snd_info_buffer *buffer)
+{
+	u32 levels[64];
+	int err;
+	int i, j;
+	struct lx6464es *chip = entry->private_data;
+
+	snd_iprintf(buffer, "capture levels:\n");
+	err = lx_level_peaks(chip, 1, 64, levels);
+	if (err < 0)
+		return;
+
+	for (i = 0; i != 8; ++i) {
+		for (j = 0; j != 8; ++j)
+			snd_iprintf(buffer, "%08x ", levels[i*8+j]);
+		snd_iprintf(buffer, "\n");
+	}
+
+	snd_iprintf(buffer, "\nplayback levels:\n");
+
+	err = lx_level_peaks(chip, 0, 64, levels);
+	if (err < 0)
+		return;
+
+	for (i = 0; i != 8; ++i) {
+		for (j = 0; j != 8; ++j)
+			snd_iprintf(buffer, "%08x ", levels[i*8+j]);
+		snd_iprintf(buffer, "\n");
+	}
+
+	snd_iprintf(buffer, "\n");
+}
+
+static int __devinit lx_proc_create(struct snd_card *card, struct lx6464es *chip)
+{
+	struct snd_info_entry *entry;
+	int err = snd_card_proc_new(card, "levels", &entry);
+	if (err < 0)
+		return err;
+
+	snd_info_set_text_ops(entry, chip, lx_proc_levels_read);
+	return 0;
+}
+
+
+static int __devinit snd_lx6464es_create(struct snd_card *card,
+					 struct pci_dev *pci,
+					 struct lx6464es **rchip)
+{
+	struct lx6464es *chip;
+	int err;
+
+	static struct snd_device_ops ops = {
+		.dev_free = snd_lx6464es_dev_free,
+	};
+
+	snd_printdd("->snd_lx6464es_create\n");
+
+	*rchip = NULL;
+
+	/* enable PCI device */
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	pci_set_master(pci);
+
+	/* check if we can restrict PCI DMA transfers to 32 bits */
+	err = pci_set_dma_mask(pci, DMA_32BIT_MASK);
+	if (err < 0) {
+		snd_printk(KERN_ERR "architecture does not support "
+			   "32bit PCI busmaster DMA\n");
+		pci_disable_device(pci);
+		return -ENXIO;
+	}
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (chip == NULL) {
+		err = -ENOMEM;
+		goto alloc_failed;
+	}
+
+	chip->card = card;
+	chip->pci = pci;
+	chip->irq = -1;
+
+	/* initialize synchronization structs */
+	spin_lock_init(&chip->lock);
+	spin_lock_init(&chip->msg_lock);
+	mutex_init(&chip->setup_mutex);
+	tasklet_init(&chip->trigger_tasklet, lx_trigger_tasklet,
+		     (unsigned long)chip);
+	tasklet_init(&chip->tasklet_capture, lx_tasklet_capture,
+		     (unsigned long)chip);
+	tasklet_init(&chip->tasklet_playback, lx_tasklet_playback,
+		     (unsigned long)chip);
+
+	/* request resources */
+	err = pci_request_regions(pci, card_name);
+	if (err < 0)
+		goto request_regions_failed;
+
+	/* plx port */
+	chip->port_plx = pci_resource_start(pci, 1);
+	chip->port_plx_remapped = ioport_map(chip->port_plx,
+					     pci_resource_len(pci, 1));
+
+	/* dsp port */
+	chip->port_dsp_bar = pci_ioremap_bar(pci, 2);
+
+	err = request_irq(pci->irq, lx_interrupt, IRQF_SHARED,
+			  card_name, chip);
+	if (err) {
+		snd_printk(KERN_ERR LXP "unable to grab IRQ %d\n", pci->irq);
+		goto request_irq_failed;
+	}
+	chip->irq = pci->irq;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+	if (err < 0)
+		goto device_new_failed;
+
+	err = lx_init_dsp(chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "error during DSP initialization\n");
+		return err;
+	}
+
+	err = lx_pcm_create(chip);
+	if (err < 0)
+		return err;
+
+	err = lx_proc_create(card, chip);
+	if (err < 0)
+		return err;
+
+	err = snd_ctl_add(card, snd_ctl_new1(&lx_control_playback_switch,
+					     chip));
+	if (err < 0)
+		return err;
+
+	snd_card_set_dev(card, &pci->dev);
+
+	*rchip = chip;
+	return 0;
+
+device_new_failed:
+	free_irq(pci->irq, chip);
+
+request_irq_failed:
+	pci_release_regions(pci);
+
+request_regions_failed:
+	kfree(chip);
+
+alloc_failed:
+	pci_disable_device(pci);
+
+	return err;
+}
+
+static int __devinit snd_lx6464es_probe(struct pci_dev *pci,
+					const struct pci_device_id *pci_id)
+{
+	static int dev;
+	struct snd_card *card;
+	struct lx6464es *chip;
+	int err;
+
+	snd_printdd("->snd_lx6464es_probe\n");
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+	if (!enable[dev]) {
+		dev++;
+		return -ENOENT;
+	}
+
+	card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+	if (card == NULL)
+		return -ENOMEM;
+
+	err = snd_lx6464es_create(card, pci, &chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "error during snd_lx6464es_create\n");
+		goto out_free;
+	}
+
+	strcpy(card->driver, "lx6464es");
+	strcpy(card->shortname, "Digigram LX6464ES");
+	sprintf(card->longname, "%s at 0x%lx, 0x%p, irq %i",
+		card->shortname, chip->port_plx,
+		chip->port_dsp_bar, chip->irq);
+
+	err = snd_card_register(card);
+	if (err < 0)
+		goto out_free;
+
+	snd_printdd(LXP "initialization successful\n");
+	pci_set_drvdata(pci, card);
+	dev++;
+	return 0;
+
+out_free:
+	snd_card_free(card);
+	return err;
+
+}
+
+static void __devexit snd_lx6464es_remove(struct pci_dev *pci)
+{
+	snd_card_free(pci_get_drvdata(pci));
+	pci_set_drvdata(pci, NULL);
+}
+
+
+static struct pci_driver driver = {
+	.name =     "Digigram LX6464ES",
+	.id_table = snd_lx6464es_ids,
+	.probe =    snd_lx6464es_probe,
+	.remove = __devexit_p(snd_lx6464es_remove),
+};
+
+
+/* module initialization */
+static int __init mod_init(void)
+{
+	return pci_register_driver(&driver);
+}
+
+static void __exit mod_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h
new file mode 100644
index 0000000..012c010
--- /dev/null
+++ b/sound/pci/lx6464es/lx6464es.h
@@ -0,0 +1,114 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX6464ES_H
+#define LX6464ES_H
+
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+
+#include "lx_core.h"
+
+#define LXP "LX6464ES: "
+
+enum {
+    ES_cmd_free         = 0,    /* no command executing */
+    ES_cmd_processing   = 1,	/* execution of a read/write command */
+    ES_read_pending     = 2,    /* a asynchron read command is pending */
+    ES_read_finishing   = 3,    /* a read command has finished waiting (set by
+				 * Interrupt or CancelIrp) */
+};
+
+enum lx_stream_status {
+	LX_STREAM_STATUS_FREE,
+/* 	LX_STREAM_STATUS_OPEN, */
+	LX_STREAM_STATUS_SCHEDULE_RUN,
+/* 	LX_STREAM_STATUS_STARTED, */
+	LX_STREAM_STATUS_RUNNING,
+	LX_STREAM_STATUS_SCHEDULE_STOP,
+/* 	LX_STREAM_STATUS_STOPPED, */
+/* 	LX_STREAM_STATUS_PAUSED */
+};
+
+
+struct lx_stream {
+	struct snd_pcm_substream  *stream;
+	snd_pcm_uframes_t          frame_pos;
+	enum lx_stream_status      status; /* free, open, running, draining
+					    * pause */
+	int                        is_capture:1;
+};
+
+
+struct lx6464es {
+	struct snd_card        *card;
+	struct pci_dev         *pci;
+	int			irq;
+
+	spinlock_t		lock;        /* interrupt spinlock */
+	struct mutex            setup_mutex; /* mutex used in hw_params, open
+					      * and close */
+
+	struct tasklet_struct   trigger_tasklet; /* trigger tasklet */
+	struct tasklet_struct   tasklet_capture;
+	struct tasklet_struct   tasklet_playback;
+
+	/* ports */
+	unsigned long		port_plx;	   /* io port (size=256) */
+	void __iomem           *port_plx_remapped; /* remapped plx port */
+	void __iomem           *port_dsp_bar;      /* memory port (32-bit,
+						    * non-prefetchable,
+						    * size=8K) */
+
+	/* messaging */
+	spinlock_t		msg_lock;          /* message spinlock */
+	atomic_t	        send_message_locked;
+	struct lx_rmh           rmh;
+
+	/* configuration */
+	uint			freq_ratio : 2;
+	uint                    playback_mute : 1;
+	uint                    hardware_running[2];
+	u32                     board_sample_rate; /* sample rate read from
+						    * board */
+	u32                     sample_rate;	   /* our sample rate */
+	u16                     pcm_granularity;   /* board blocksize */
+
+	/* dma */
+	struct snd_dma_buffer   capture_dma_buf;
+	struct snd_dma_buffer   playback_dma_buf;
+
+	/* pcm */
+	struct snd_pcm         *pcm;
+
+	/* streams */
+	struct lx_stream        capture_stream;
+	struct lx_stream        playback_stream;
+};
+
+
+#endif /* LX6464ES_H */
diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
new file mode 100644
index 0000000..a9f8f88
--- /dev/null
+++ b/sound/pci/lx6464es/lx_core.c
@@ -0,0 +1,1442 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * low-level interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* #define RMH_DEBUG 1 */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "lx6464es.h"
+#include "lx_core.h"
+
+/* low-level register access */
+
+static const unsigned long dsp_port_offsets[] = {
+	0,
+	0x400,
+	0x401,
+	0x402,
+	0x403,
+	0x404,
+	0x405,
+	0x406,
+	0x407,
+	0x408,
+	0x409,
+	0x40a,
+	0x40b,
+	0x40c,
+
+	0x410,
+	0x411,
+	0x412,
+	0x413,
+	0x414,
+	0x415,
+	0x416,
+
+	0x420,
+	0x430,
+	0x431,
+	0x432,
+	0x433,
+	0x434,
+	0x440
+};
+
+static void __iomem *lx_dsp_register(struct lx6464es *chip, int port)
+{
+	void __iomem *base_address = chip->port_dsp_bar;
+	return base_address + dsp_port_offsets[port]*4;
+}
+
+unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	return ioread32(address);
+}
+
+void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	memcpy_fromio(data, address, len*sizeof(u32));
+}
+
+
+void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	iowrite32(data, address);
+}
+
+void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data,
+			 u32 len)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	memcpy_toio(address, data, len*sizeof(u32));
+}
+
+
+static const unsigned long plx_port_offsets[] = {
+	0x04,
+	0x40,
+	0x44,
+	0x48,
+	0x4c,
+	0x50,
+	0x54,
+	0x58,
+	0x5c,
+	0x64,
+	0x68,
+	0x6C
+};
+
+static void __iomem *lx_plx_register(struct lx6464es *chip, int port)
+{
+	void __iomem *base_address = chip->port_plx_remapped;
+	return base_address + plx_port_offsets[port];
+}
+
+unsigned long lx_plx_reg_read(struct lx6464es *chip, int port)
+{
+	void __iomem *address = lx_plx_register(chip, port);
+	return ioread32(address);
+}
+
+void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data)
+{
+	void __iomem *address = lx_plx_register(chip, port);
+	iowrite32(data, address);
+}
+
+u32 lx_plx_mbox_read(struct lx6464es *chip, int mbox_nr)
+{
+	int index;
+
+	switch (mbox_nr) {
+	case 1:
+		index = ePLX_MBOX1;    break;
+	case 2:
+		index = ePLX_MBOX2;    break;
+	case 3:
+		index = ePLX_MBOX3;    break;
+	case 4:
+		index = ePLX_MBOX4;    break;
+	case 5:
+		index = ePLX_MBOX5;    break;
+	case 6:
+		index = ePLX_MBOX6;    break;
+	case 7:
+		index = ePLX_MBOX7;    break;
+	case 0:			/* reserved for HF flags */
+		snd_BUG();
+	default:
+		return 0xdeadbeef;
+	}
+
+	return lx_plx_reg_read(chip, index);
+}
+
+int lx_plx_mbox_write(struct lx6464es *chip, int mbox_nr, u32 value)
+{
+	int index = -1;
+
+	switch (mbox_nr) {
+	case 1:
+		index = ePLX_MBOX1;    break;
+	case 3:
+		index = ePLX_MBOX3;    break;
+	case 4:
+		index = ePLX_MBOX4;    break;
+	case 5:
+		index = ePLX_MBOX5;    break;
+	case 6:
+		index = ePLX_MBOX6;    break;
+	case 7:
+		index = ePLX_MBOX7;    break;
+	case 0:			/* reserved for HF flags */
+	case 2:			/* reserved for Pipe States
+				 * the DSP keeps an image of it */
+		snd_BUG();
+		return -EBADRQC;
+	}
+
+	lx_plx_reg_write(chip, index, value);
+	return 0;
+}
+
+
+/* rmh */
+
+#ifdef CONFIG_SND_DEBUG
+#define CMD_NAME(a) a
+#else
+#define CMD_NAME(a) NULL
+#endif
+
+#define Reg_CSM_MR			0x00000002
+#define Reg_CSM_MC			0x00000001
+
+struct dsp_cmd_info {
+	u32    dcCodeOp;	/* Op Code of the command (usually 1st 24-bits
+				 * word).*/
+	u16    dcCmdLength;	/* Command length in words of 24 bits.*/
+	u16    dcStatusType;	/* Status type: 0 for fixed length, 1 for
+				 * random. */
+	u16    dcStatusLength;	/* Status length (if fixed).*/
+	char  *dcOpName;
+};
+
+/*
+  Initialization and control data for the Microblaze interface
+  - OpCode:
+    the opcode field of the command set at the proper offset
+  - CmdLength
+    the number of command words
+  - StatusType
+    offset in the status registers: 0 means that the return value may be
+    different from 0, and must be read
+  - StatusLength
+    the number of status words (in addition to the return value)
+*/
+
+static struct dsp_cmd_info dsp_commands[] =
+{
+	{ (CMD_00_INFO_DEBUG << OPCODE_OFFSET)			, 1 /*custom*/
+	  , 1	, 0 /**/		    , CMD_NAME("INFO_DEBUG") },
+	{ (CMD_01_GET_SYS_CFG << OPCODE_OFFSET) 		, 1 /**/
+	  , 1      , 2 /**/		    , CMD_NAME("GET_SYS_CFG") },
+	{ (CMD_02_SET_GRANULARITY << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_GRANULARITY") },
+	{ (CMD_03_SET_TIMER_IRQ << OPCODE_OFFSET)		, 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_TIMER_IRQ") },
+	{ (CMD_04_GET_EVENT << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 0 /*up to 10*/     , CMD_NAME("GET_EVENT") },
+	{ (CMD_05_GET_PIPES << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 2 /*up to 4*/      , CMD_NAME("GET_PIPES") },
+	{ (CMD_06_ALLOCATE_PIPE << OPCODE_OFFSET)		, 1 /**/
+	  , 0      , 0 /**/		    , CMD_NAME("ALLOCATE_PIPE") },
+	{ (CMD_07_RELEASE_PIPE << OPCODE_OFFSET)		, 1 /**/
+	  , 0      , 0 /**/		    , CMD_NAME("RELEASE_PIPE") },
+	{ (CMD_08_ASK_BUFFERS << OPCODE_OFFSET) 		, 1 /**/
+	  , 1      , MAX_STREAM_BUFFER  , CMD_NAME("ASK_BUFFERS") },
+	{ (CMD_09_STOP_PIPE << OPCODE_OFFSET)			, 1 /**/
+	  , 0      , 0 /*up to 2*/      , CMD_NAME("STOP_PIPE") },
+	{ (CMD_0A_GET_PIPE_SPL_COUNT << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 1 /*up to 2*/      , CMD_NAME("GET_PIPE_SPL_COUNT") },
+	{ (CMD_0B_TOGGLE_PIPE_STATE << OPCODE_OFFSET)           , 1 /*up to 5*/
+	  , 1      , 0 /**/		    , CMD_NAME("TOGGLE_PIPE_STATE") },
+	{ (CMD_0C_DEF_STREAM << OPCODE_OFFSET)			, 1 /*up to 4*/
+	  , 1      , 0 /**/		    , CMD_NAME("DEF_STREAM") },
+	{ (CMD_0D_SET_MUTE  << OPCODE_OFFSET)			, 3 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_MUTE") },
+	{ (CMD_0E_GET_STREAM_SPL_COUNT << OPCODE_OFFSET)        , 1/**/
+	  , 1      , 2 /**/		    , CMD_NAME("GET_STREAM_SPL_COUNT") },
+	{ (CMD_0F_UPDATE_BUFFER << OPCODE_OFFSET)		, 3 /*up to 4*/
+	  , 0      , 1 /**/		    , CMD_NAME("UPDATE_BUFFER") },
+	{ (CMD_10_GET_BUFFER << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 4 /**/		    , CMD_NAME("GET_BUFFER") },
+	{ (CMD_11_CANCEL_BUFFER << OPCODE_OFFSET)		, 1 /**/
+	  , 1      , 1 /*up to 4*/      , CMD_NAME("CANCEL_BUFFER") },
+	{ (CMD_12_GET_PEAK << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 1 /**/		    , CMD_NAME("GET_PEAK") },
+	{ (CMD_13_SET_STREAM_STATE << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_STREAM_STATE") },
+};
+
+static void lx_message_init(struct lx_rmh *rmh, enum cmd_mb_opcodes cmd)
+{
+	snd_BUG_ON(cmd >= CMD_14_INVALID);
+
+	rmh->cmd[0] = dsp_commands[cmd].dcCodeOp;
+	rmh->cmd_len = dsp_commands[cmd].dcCmdLength;
+	rmh->stat_len = dsp_commands[cmd].dcStatusLength;
+	rmh->dsp_stat = dsp_commands[cmd].dcStatusType;
+	rmh->cmd_idx = cmd;
+	memset(&rmh->cmd[1], 0, (REG_CRM_NUMBER - 1) * sizeof(u32));
+
+#ifdef CONFIG_SND_DEBUG
+	memset(rmh->stat, 0, REG_CRM_NUMBER * sizeof(u32));
+#endif
+#ifdef RMH_DEBUG
+	rmh->cmd_idx = cmd;
+#endif
+}
+
+#ifdef RMH_DEBUG
+#define LXRMH "lx6464es rmh: "
+static void lx_message_dump(struct lx_rmh *rmh)
+{
+	u8 idx = rmh->cmd_idx;
+	int i;
+
+	snd_printk(LXRMH "command %s\n", dsp_commands[idx].dcOpName);
+
+	for (i = 0; i != rmh->cmd_len; ++i)
+		snd_printk(LXRMH "\tcmd[%d] %08x\n", i, rmh->cmd[i]);
+
+	for (i = 0; i != rmh->stat_len; ++i)
+		snd_printk(LXRMH "\tstat[%d]: %08x\n", i, rmh->stat[i]);
+	snd_printk("\n");
+}
+#else
+static inline void lx_message_dump(struct lx_rmh *rmh)
+{}
+#endif
+
+
+
+/* sleep 500 - 100 = 400 times 100us -> the timeout is >= 40 ms */
+#define XILINX_TIMEOUT_MS       40
+#define XILINX_POLL_NO_SLEEP    100
+#define XILINX_POLL_ITERATIONS  150
+
+static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh)
+{
+	u32 reg = ED_DSP_TIMED_OUT;
+	int dwloop;
+	int answer_received;
+
+	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
+		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
+		return -EBUSY;
+	}
+
+	/* write command */
+	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
+
+	snd_BUG_ON(atomic_read(&chip->send_message_locked) != 0);
+	atomic_set(&chip->send_message_locked, 1);
+
+	/* MicoBlaze gogogo */
+	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
+
+	/* wait for interrupt to answer */
+	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS; ++dwloop) {
+		answer_received = atomic_read(&chip->send_message_locked);
+		if (answer_received == 0)
+			break;
+		msleep(1);
+	}
+
+	if (answer_received == 0) {
+		/* in Debug mode verify Reg_CSM_MR */
+		snd_BUG_ON(!(lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR));
+
+		/* command finished, read status */
+		if (rmh->dsp_stat == 0)
+			reg = lx_dsp_reg_read(chip, eReg_CRM1);
+		else
+			reg = 0;
+	} else {
+		int i;
+		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
+			   "Interrupts disabled?\n");
+
+		/* attente bit Reg_CSM_MR */
+		for (i = 0; i != XILINX_POLL_ITERATIONS; i++) {
+			if ((lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)) {
+				if (rmh->dsp_stat == 0)
+					reg = lx_dsp_reg_read(chip, eReg_CRM1);
+				else
+					reg = 0;
+				goto polling_successful;
+			}
+
+			if (i > XILINX_POLL_NO_SLEEP)
+				msleep(1);
+		}
+		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
+			   "polling failed\n");
+
+polling_successful:
+		atomic_set(&chip->send_message_locked, 0);
+	}
+
+	if ((reg & ERROR_VALUE) == 0) {
+		/* read response */
+		if (rmh->stat_len) {
+			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
+
+			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
+					   rmh->stat_len);
+		}
+	} else
+		snd_printk(KERN_WARNING LXP "lx_message_send: error_value %x\n",
+			   reg);
+
+	/* clear Reg_CSM_MR */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	switch (reg) {
+	case ED_DSP_TIMED_OUT:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
+		return -ETIMEDOUT;
+
+	case ED_DSP_CRASHED:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
+		return -EAGAIN;
+	}
+
+	lx_message_dump(rmh);
+	return 0;
+}
+
+static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh)
+{
+	u32 reg = ED_DSP_TIMED_OUT;
+	int dwloop;
+
+	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
+		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
+		return -EBUSY;
+	}
+
+	/* write command */
+	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
+
+	/* MicoBlaze gogogo */
+	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
+
+	/* wait for interrupt to answer */
+	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS * 1000; ++dwloop) {
+		if (lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR) {
+			if (rmh->dsp_stat == 0)
+				reg = lx_dsp_reg_read(chip, eReg_CRM1);
+			else
+				reg = 0;
+			goto polling_successful;
+		} else
+			udelay(1);
+	}
+	snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send_atomic! "
+		   "polling failed\n");
+
+polling_successful:
+	if ((reg & ERROR_VALUE) == 0) {
+		/* read response */
+		if (rmh->stat_len) {
+			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
+			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
+					   rmh->stat_len);
+		}
+	} else
+		snd_printk(LXP "rmh error: %08x\n", reg);
+
+	/* clear Reg_CSM_MR */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	switch (reg) {
+	case ED_DSP_TIMED_OUT:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
+		return -ETIMEDOUT;
+
+	case ED_DSP_CRASHED:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
+		return -EAGAIN;
+	}
+
+	lx_message_dump(rmh);
+
+	return reg;
+}
+
+
+/* low-level dsp access */
+int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version)
+{
+	u16 ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG);
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	*rdsp_version = chip->rmh.stat[1];
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq)
+{
+	u16 ret = 0;
+	unsigned long flags;
+	u32 freq_raw = 0;
+	u32 freq = 0;
+	u32 frequency = 0;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG);
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (ret == 0) {
+		freq_raw = chip->rmh.stat[0] >> FREQ_FIELD_OFFSET;
+		freq = freq_raw & XES_FREQ_COUNT8_MASK;
+
+		if ((freq < XES_FREQ_COUNT8_48_MAX) ||
+		    (freq > XES_FREQ_COUNT8_44_MIN))
+			frequency = 0; /* unknown */
+		else if (freq >= XES_FREQ_COUNT8_44_MAX)
+			frequency = 44100;
+		else
+			frequency = 48000;
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	*rfreq = frequency * chip->freq_ratio;
+
+	return ret;
+}
+
+int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address)
+{
+	u32 macmsb, maclsb;
+
+	macmsb = lx_dsp_reg_read(chip, eReg_ADMACESMSB) & 0x00FFFFFF;
+	maclsb = lx_dsp_reg_read(chip, eReg_ADMACESLSB) & 0x00FFFFFF;
+
+	/* todo: endianess handling */
+	mac_address[5] = ((u8 *)(&maclsb))[0];
+	mac_address[4] = ((u8 *)(&maclsb))[1];
+	mac_address[3] = ((u8 *)(&maclsb))[2];
+	mac_address[2] = ((u8 *)(&macmsb))[0];
+	mac_address[1] = ((u8 *)(&macmsb))[1];
+	mac_address[0] = ((u8 *)(&macmsb))[2];
+
+	return 0;
+}
+
+
+int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_02_SET_GRANULARITY);
+	chip->rmh.cmd[0] |= gran;
+
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_04_GET_EVENT);
+	chip->rmh.stat_len = 9;	/* we don't necessarily need the full length */
+
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (!ret)
+		memcpy(data, chip->rmh.stat, chip->rmh.stat_len * sizeof(u32));
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+#define CSES_TIMEOUT        100     /* microseconds */
+#define CSES_CE             0x0001
+#define CSES_BROADCAST      0x0002
+#define CSES_UPDATE_LDSV    0x0004
+
+int lx_dsp_es_check_pipeline(struct lx6464es *chip)
+{
+	int i;
+
+	for (i = 0; i != CSES_TIMEOUT; ++i) {
+		/*
+		 * le bit CSES_UPDATE_LDSV est Ã  1 dÃ©s que le macprog
+		 * est pret. il re-passe Ã  0 lorsque le premier read a
+		 * Ã©tÃ© fait. pour l'instant on retire le test car ce bit
+		 * passe a 1 environ 200 Ã  400 ms aprÃ©s que le registre
+		 * confES Ã  Ã©tÃ© Ã©crit (kick du xilinx ES).
+		 *
+		 * On ne teste que le bit CE.
+		 * */
+
+		u32 cses = lx_dsp_reg_read(chip, eReg_CSES);
+
+		if ((cses & CSES_CE) == 0)
+			return 0;
+
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+
+#define PIPE_INFO_TO_CMD(capture, pipe)					\
+	((u32)((u32)(pipe) | ((capture) ? ID_IS_CAPTURE : 0L)) << ID_OFFSET)
+
+
+
+/* low-level pipe handling */
+int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture,
+		     int channels)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_06_ALLOCATE_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= channels;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	if (err != 0)
+		snd_printk(KERN_ERR "lx6464es: could not allocate pipe\n");
+
+	return err;
+}
+
+int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_07_RELEASE_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+		  u32 *r_needed, u32 *r_freed, u32 *size_array)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+#ifdef CONFIG_SND_DEBUG
+	if (size_array)
+		memset(size_array, 0, sizeof(u32)*MAX_STREAM_BUFFER);
+#endif
+
+	*r_needed = 0;
+	*r_freed = 0;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_08_ASK_BUFFERS);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (!err) {
+		int i;
+		for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+			u32 stat = chip->rmh.stat[i];
+			if (stat & (BF_EOB << BUFF_FLAGS_OFFSET)) {
+				/* finished */
+				*r_freed += 1;
+				if (size_array)
+					size_array[i] = stat & MASK_DATA_SIZE;
+			} else if ((stat & (BF_VALID << BUFF_FLAGS_OFFSET))
+				   == 0)
+				/* free */
+				*r_needed += 1;
+		}
+
+#if 0
+		snd_printdd(LXP "CMD_08_ASK_BUFFERS: needed %d, freed %d\n",
+			    *r_needed, *r_freed);
+		for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+			for (i = 0; i != chip->rmh.stat_len; ++i)
+				snd_printdd("  stat[%d]: %x, %x\n", i,
+					    chip->rmh.stat[i],
+					    chip->rmh.stat[i] & MASK_DATA_SIZE);
+		}
+#endif
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_09_STOP_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static int lx_pipe_toggle_state(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0B_TOGGLE_PIPE_STATE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+
+	err = lx_pipe_wait_for_idle(chip, pipe, is_capture);
+	if (err < 0)
+		return err;
+
+	err = lx_pipe_toggle_state(chip, pipe, is_capture);
+
+	return err;
+}
+
+int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err = 0;
+
+	err = lx_pipe_wait_for_start(chip, pipe, is_capture);
+	if (err < 0)
+		return err;
+
+	err = lx_pipe_toggle_state(chip, pipe, is_capture);
+
+	return err;
+}
+
+
+int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture,
+			 u64 *rsample_count)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.stat_len = 2;	/* need all words here! */
+
+	err = lx_message_send_atomic(chip, &chip->rmh); /* don't sleep! */
+
+	if (err != 0)
+		snd_printk(KERN_ERR
+			   "lx6464es: could not query pipe's sample count\n");
+	else {
+		*rsample_count = ((u64)(chip->rmh.stat[0] & MASK_SPL_COUNT_HI)
+				  << 24)     /* hi part */
+			+ chip->rmh.stat[1]; /* lo part */
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err != 0)
+		snd_printk(KERN_ERR "lx6464es: could not query pipe's state\n");
+	else
+		*rstate = (chip->rmh.stat[0] >> PSTATE_OFFSET) & 0x0F;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static int lx_pipe_wait_for_state(struct lx6464es *chip, u32 pipe,
+				  int is_capture, u16 state)
+{
+	int i;
+
+	/* max 2*PCMOnlyGranularity = 2*1024 at 44100 = < 50 ms:
+	 * timeout 50 ms */
+	for (i = 0; i != 50; ++i) {
+		u16 current_state;
+		int err = lx_pipe_state(chip, pipe, is_capture, &current_state);
+
+		if (err < 0)
+			return err;
+
+		if (current_state == state)
+			return 0;
+
+		mdelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_RUN);
+}
+
+int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_IDLE);
+}
+
+/* low-level stream handling */
+int lx_stream_set_state(struct lx6464es *chip, u32 pipe,
+			       int is_capture, enum stream_state_t state)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_13_SET_STREAM_STATE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= state;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime,
+			 u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	u32 channels = runtime->channels;
+
+	if (runtime->channels != channels)
+		snd_printk(KERN_ERR LXP "channel count mismatch: %d vs %d",
+			   runtime->channels, channels);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0C_DEF_STREAM);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	if (runtime->sample_bits == 16)
+		/* 16 bit format */
+		chip->rmh.cmd[0] |= (STREAM_FMT_16b << STREAM_FMT_OFFSET);
+
+	if (snd_pcm_format_little_endian(runtime->format))
+		/* little endian/intel format */
+		chip->rmh.cmd[0] |= (STREAM_FMT_intel << STREAM_FMT_OFFSET);
+
+	chip->rmh.cmd[0] |= channels-1;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture,
+		    int *rstate)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	*rstate = (chip->rmh.stat[0] & SF_START) ? START_STATE : PAUSE_STATE;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture,
+			      u64 *r_bytepos)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	*r_bytepos = ((u64) (chip->rmh.stat[0] & MASK_SPL_COUNT_HI)
+		      << 32)	     /* hi part */
+		+ chip->rmh.stat[1]; /* lo part */
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+/* low-level buffer handling */
+int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi,
+		   u32 *r_buffer_index)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0F_UPDATE_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= BF_NOTIFY_EOB; /* request interrupt notification */
+
+	/* todo: pause request, circular buffer */
+
+	chip->rmh.cmd[1] = buffer_size & MASK_DATA_SIZE;
+	chip->rmh.cmd[2] = buf_address_lo;
+
+	if (buf_address_hi) {
+		chip->rmh.cmd_len = 4;
+		chip->rmh.cmd[3] = buf_address_hi;
+		chip->rmh.cmd[0] |= BF_64BITS_ADR;
+	}
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err == 0) {
+		*r_buffer_index = chip->rmh.stat[0];
+		goto done;
+	}
+
+	if (err == EB_RBUFFERS_TABLE_OVERFLOW)
+		snd_printk(LXP "lx_buffer_give EB_RBUFFERS_TABLE_OVERFLOW\n");
+
+	if (err == EB_INVALID_STREAM)
+		snd_printk(LXP "lx_buffer_give EB_INVALID_STREAM\n");
+
+	if (err == EB_CMD_REFUSED)
+		snd_printk(LXP "lx_buffer_give EB_CMD_REFUSED\n");
+
+ done:
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 *r_buffer_size)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= MASK_BUFFER_ID; /* ask for the current buffer: the
+					     * microblaze will seek for it */
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err == 0)
+		*r_buffer_size = chip->rmh.stat[0]  & MASK_DATA_SIZE;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture,
+		     u32 buffer_index)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= buffer_index;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+/* low-level gain/peak handling
+ *
+ * \todo: can we unmute capture/playback channels independently?
+ *
+ * */
+int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute)
+{
+	int err;
+	unsigned long flags;
+
+	/* bit set to 1: channel muted */
+	u64 mute_mask = unmute ? 0 : 0xFFFFFFFFFFFFFFFFLLU;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0D_SET_MUTE);
+
+	chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, 0);
+
+	chip->rmh.cmd[1] = (u32)(mute_mask >> (u64)32);	       /* hi part */
+	chip->rmh.cmd[2] = (u32)(mute_mask & (u64)0xFFFFFFFF); /* lo part */
+
+	snd_printk("mute %x %x %x\n", chip->rmh.cmd[0], chip->rmh.cmd[1],
+		   chip->rmh.cmd[2]);
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static u32 peak_map[] = {
+	0x00000109, /* -90.308dB */
+	0x0000083B, /* -72.247dB */
+	0x000020C4, /* -60.205dB */
+	0x00008273, /* -48.030dB */
+	0x00020756, /* -36.005dB */
+	0x00040C37, /* -30.001dB */
+	0x00081385, /* -24.002dB */
+	0x00101D3F, /* -18.000dB */
+	0x0016C310, /* -15.000dB */
+	0x002026F2, /* -12.001dB */
+	0x002D6A86, /* -9.000dB */
+	0x004026E6, /* -6.004dB */
+	0x005A9DF6, /* -3.000dB */
+	0x0065AC8B, /* -2.000dB */
+	0x00721481, /* -1.000dB */
+	0x007FFFFF, /* FS */
+};
+
+int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels,
+		   u32 *r_levels)
+{
+	int err = 0;
+	unsigned long flags;
+	int i;
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	for (i = 0; i < channels; i += 4) {
+		u32 s0, s1, s2, s3;
+
+		lx_message_init(&chip->rmh, CMD_12_GET_PEAK);
+		chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, i);
+
+		err = lx_message_send_atomic(chip, &chip->rmh);
+
+		if (err == 0) {
+			s0 = peak_map[chip->rmh.stat[0] & 0x0F];
+			s1 = peak_map[(chip->rmh.stat[0] >>  4) & 0xf];
+			s2 = peak_map[(chip->rmh.stat[0] >>  8) & 0xf];
+			s3 = peak_map[(chip->rmh.stat[0] >>  12) & 0xf];
+		} else
+			s0 = s1 = s2 = s3 = 0;
+
+		r_levels[0] = s0;
+		r_levels[1] = s1;
+		r_levels[2] = s2;
+		r_levels[3] = s3;
+
+		r_levels += 4;
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+/* interrupt handling */
+#define PCX_IRQ_NONE 0
+#define IRQCS_ACTIVE_PCIDB  0x00002000L         /* Bit nÃÂ¸ 13 */
+#define IRQCS_ENABLE_PCIIRQ 0x00000100L         /* Bit nÃÂ¸ 08 */
+#define IRQCS_ENABLE_PCIDB  0x00000200L         /* Bit nÃÂ¸ 09 */
+
+static u32 lx_interrupt_test_ack(struct lx6464es *chip)
+{
+	u32 irqcs = lx_plx_reg_read(chip, ePLX_IRQCS);
+
+	/* Test if PCI Doorbell interrupt is active */
+	if (irqcs & IRQCS_ACTIVE_PCIDB)	{
+		u32 temp;
+		irqcs = PCX_IRQ_NONE;
+
+		while ((temp = lx_plx_reg_read(chip, ePLX_L2PCIDB))) {
+			/* RAZ interrupt */
+			irqcs |= temp;
+			lx_plx_reg_write(chip, ePLX_L2PCIDB, temp);
+		}
+
+		return irqcs;
+	}
+	return PCX_IRQ_NONE;
+}
+
+static int lx_interrupt_ack(struct lx6464es *chip, u32 *r_irqsrc,
+			    int *r_async_pending, int *r_async_escmd)
+{
+	u32 irq_async;
+	u32 irqsrc = lx_interrupt_test_ack(chip);
+
+	if (irqsrc == PCX_IRQ_NONE)
+		return 0;
+
+	*r_irqsrc = irqsrc;
+
+	irq_async = irqsrc & MASK_SYS_ASYNC_EVENTS; /* + EtherSound response
+						     * (set by xilinx) + EOB */
+
+	if (irq_async & MASK_SYS_STATUS_ESA) {
+		irq_async &= ~MASK_SYS_STATUS_ESA;
+		*r_async_escmd = 1;
+	}
+
+	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
+		/* xilinx command notification */
+		atomic_set(&chip->send_message_locked, 0);
+
+	if (irq_async) {
+		/* snd_printd("interrupt: async event pending\n"); */
+		*r_async_pending = 1;
+	}
+
+	return 1;
+}
+
+static int lx_interrupt_handle_async_events(struct lx6464es *chip, u32 irqsrc,
+					    int *r_freq_changed,
+					    u64 *r_notified_in_pipe_mask,
+					    u64 *r_notified_out_pipe_mask)
+{
+	int err;
+	u32 stat[9];		/* answer from CMD_04_GET_EVENT */
+
+	/* On peut optimiser pour ne pas lire les evenements vides
+	 * les mots de rÃÂ©ponse sont dans l'ordre suivant :
+	 * Stat[0]	mot de status gÃÂ©nÃÂ©ral
+	 * Stat[1]	fin de buffer OUT pF
+	 * Stat[2]	fin de buffer OUT pf
+	 * Stat[3]	fin de buffer IN pF
+	 * Stat[4]	fin de buffer IN pf
+	 * Stat[5]	underrun poid fort
+	 * Stat[6]	underrun poid faible
+	 * Stat[7]	overrun poid fort
+	 * Stat[8]	overrun poid faible
+	 * */
+
+	u64 orun_mask;
+	u64 urun_mask;
+#if 0
+	int has_underrun   = (irqsrc & MASK_SYS_STATUS_URUN) ? 1 : 0;
+	int has_overrun    = (irqsrc & MASK_SYS_STATUS_ORUN) ? 1 : 0;
+#endif
+	int eb_pending_out = (irqsrc & MASK_SYS_STATUS_EOBO) ? 1 : 0;
+	int eb_pending_in  = (irqsrc & MASK_SYS_STATUS_EOBI) ? 1 : 0;
+
+	*r_freq_changed = (irqsrc & MASK_SYS_STATUS_FREQ) ? 1 : 0;
+
+	err = lx_dsp_read_async_events(chip, stat);
+	if (err < 0)
+		return err;
+
+	if (eb_pending_in) {
+		*r_notified_in_pipe_mask = ((u64)stat[3] << 32)
+			+ stat[4];
+		snd_printdd(LXP "interrupt: EOBI pending %llx\n",
+			    *r_notified_in_pipe_mask);
+	}
+	if (eb_pending_out) {
+		*r_notified_out_pipe_mask = ((u64)stat[1] << 32)
+			+ stat[2];
+		snd_printdd(LXP "interrupt: EOBO pending %llx\n",
+			    *r_notified_out_pipe_mask);
+	}
+
+	orun_mask = ((u64)stat[7] << 32) + stat[8];
+	urun_mask = ((u64)stat[5] << 32) + stat[6];
+
+	/* todo: handle xrun notification */
+
+	return err;
+}
+
+static int lx_interrupt_request_new_buffer(struct lx6464es *chip,
+					   struct lx_stream *lx_stream)
+{
+	struct snd_pcm_substream *substream = lx_stream->stream;
+	int is_capture = lx_stream->is_capture;
+	int err;
+	unsigned long flags;
+
+	const u32 channels = substream->runtime->channels;
+	const u32 bytes_per_frame = channels * 3;
+	const u32 period_size = substream->runtime->period_size;
+	const u32 period_bytes = period_size * bytes_per_frame;
+	const u32 pos = lx_stream->frame_pos;
+	const u32 next_pos = ((pos+1) == substream->runtime->periods) ?
+		0 : pos + 1;
+
+	dma_addr_t buf = substream->dma_buffer.addr + pos * period_bytes;
+	u32 buf_hi = 0;
+	u32 buf_lo = 0;
+	u32 buffer_index = 0;
+
+	u32 needed, freed;
+	u32 size_array[MAX_STREAM_BUFFER];
+
+	snd_printdd("->lx_interrupt_request_new_buffer\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array);
+	snd_printdd(LXP "interrupt: needed %d, freed %d\n", needed, freed);
+
+	unpack_pointer(buf, &buf_lo, &buf_hi);
+	err = lx_buffer_give(chip, 0, is_capture, period_bytes, buf_lo, buf_hi,
+			     &buffer_index);
+	snd_printdd(LXP "interrupt: gave buffer index %x on %p (%d bytes)\n",
+		    buffer_index, (void *)buf, period_bytes);
+
+	lx_stream->frame_pos = next_pos;
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return err;
+}
+
+void lx_tasklet_playback(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	struct lx_stream *lx_stream = &chip->playback_stream;
+	int err;
+
+	snd_printdd("->lx_tasklet_playback\n");
+
+	err = lx_interrupt_request_new_buffer(chip, lx_stream);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP
+			   "cannot request new buffer for playback\n");
+
+	snd_pcm_period_elapsed(lx_stream->stream);
+}
+
+void lx_tasklet_capture(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	struct lx_stream *lx_stream = &chip->capture_stream;
+	int err;
+
+	snd_printdd("->lx_tasklet_capture\n");
+	err = lx_interrupt_request_new_buffer(chip, lx_stream);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP
+			   "cannot request new buffer for capture\n");
+
+	snd_pcm_period_elapsed(lx_stream->stream);
+}
+
+
+
+static int lx_interrupt_handle_audio_transfer(struct lx6464es *chip,
+					      u64 notified_in_pipe_mask,
+					      u64 notified_out_pipe_mask)
+{
+	int err = 0;
+
+	if (notified_in_pipe_mask) {
+		snd_printdd(LXP "requesting audio transfer for capture\n");
+		tasklet_hi_schedule(&chip->tasklet_capture);
+	}
+
+	if (notified_out_pipe_mask) {
+		snd_printdd(LXP "requesting audio transfer for playback\n");
+		tasklet_hi_schedule(&chip->tasklet_playback);
+	}
+
+	return err;
+}
+
+
+irqreturn_t lx_interrupt(int irq, void *dev_id)
+{
+	struct lx6464es *chip = dev_id;
+	int async_pending, async_escmd;
+	u32 irqsrc;
+
+	spin_lock(&chip->lock);
+
+	snd_printdd("**************************************************\n");
+
+	if (!lx_interrupt_ack(chip, &irqsrc, &async_pending, &async_escmd)) {
+		spin_unlock(&chip->lock);
+		snd_printdd("IRQ_NONE\n");
+		return IRQ_NONE; /* this device did not cause the interrupt */
+	}
+
+	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
+		goto exit;
+
+#if 0
+	if (irqsrc & MASK_SYS_STATUS_EOBI)
+		snd_printdd(LXP "interrupt: EOBI\n");
+
+	if (irqsrc & MASK_SYS_STATUS_EOBO)
+		snd_printdd(LXP "interrupt: EOBO\n");
+
+	if (irqsrc & MASK_SYS_STATUS_URUN)
+		snd_printdd(LXP "interrupt: URUN\n");
+
+	if (irqsrc & MASK_SYS_STATUS_ORUN)
+		snd_printdd(LXP "interrupt: ORUN\n");
+#endif
+
+	if (async_pending) {
+		u64 notified_in_pipe_mask = 0;
+		u64 notified_out_pipe_mask = 0;
+		int freq_changed;
+		int err;
+
+		/* handle async events */
+		err = lx_interrupt_handle_async_events(chip, irqsrc,
+						       &freq_changed,
+						       &notified_in_pipe_mask,
+						       &notified_out_pipe_mask);
+		if (err)
+			snd_printk(KERN_ERR LXP
+				   "error handling async events\n");
+
+		err = lx_interrupt_handle_audio_transfer(chip,
+							 notified_in_pipe_mask,
+							 notified_out_pipe_mask
+			);
+		if (err)
+			snd_printk(KERN_ERR LXP
+				   "error during audio transfer\n");
+	}
+
+	if (async_escmd) {
+#if 0
+		/* backdoor for ethersound commands
+		 *
+		 * for now, we do not need this
+		 *
+		 * */
+
+		snd_printdd("lx6464es: interrupt requests escmd handling\n");
+#endif
+	}
+
+exit:
+	spin_unlock(&chip->lock);
+	return IRQ_HANDLED;	/* this device caused the interrupt */
+}
+
+
+static void lx_irq_set(struct lx6464es *chip, int enable)
+{
+	u32 reg = lx_plx_reg_read(chip, ePLX_IRQCS);
+
+	/* enable/disable interrupts
+	 *
+	 * Set the Doorbell and PCI interrupt enable bits
+	 *
+	 * */
+	if (enable)
+		reg |=  (IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB);
+	else
+		reg &= ~(IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB);
+	lx_plx_reg_write(chip, ePLX_IRQCS, reg);
+}
+
+void lx_irq_enable(struct lx6464es *chip)
+{
+	snd_printdd("->lx_irq_enable\n");
+	lx_irq_set(chip, 1);
+}
+
+void lx_irq_disable(struct lx6464es *chip)
+{
+	snd_printdd("->lx_irq_disable\n");
+	lx_irq_set(chip, 0);
+}
diff --git a/sound/pci/lx6464es/lx_core.h b/sound/pci/lx6464es/lx_core.h
new file mode 100644
index 0000000..6bd9cbb
--- /dev/null
+++ b/sound/pci/lx6464es/lx_core.h
@@ -0,0 +1,242 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * low-level interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX_CORE_H
+#define LX_CORE_H
+
+#include <linux/interrupt.h>
+
+#include "lx_defs.h"
+
+#define REG_CRM_NUMBER		12
+
+struct lx6464es;
+
+/* low-level register access */
+
+/* dsp register access */
+enum {
+	eReg_BASE,
+	eReg_CSM,
+	eReg_CRM1,
+	eReg_CRM2,
+	eReg_CRM3,
+	eReg_CRM4,
+	eReg_CRM5,
+	eReg_CRM6,
+	eReg_CRM7,
+	eReg_CRM8,
+	eReg_CRM9,
+	eReg_CRM10,
+	eReg_CRM11,
+	eReg_CRM12,
+
+	eReg_ICR,
+	eReg_CVR,
+	eReg_ISR,
+	eReg_RXHTXH,
+	eReg_RXMTXM,
+	eReg_RHLTXL,
+	eReg_RESETDSP,
+
+	eReg_CSUF,
+	eReg_CSES,
+	eReg_CRESMSB,
+	eReg_CRESLSB,
+	eReg_ADMACESMSB,
+	eReg_ADMACESLSB,
+	eReg_CONFES,
+
+	eMaxPortLx
+};
+
+unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port);
+void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len);
+void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data);
+void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data,
+			 u32 len);
+
+/* plx register access */
+enum {
+    ePLX_PCICR,
+
+    ePLX_MBOX0,
+    ePLX_MBOX1,
+    ePLX_MBOX2,
+    ePLX_MBOX3,
+    ePLX_MBOX4,
+    ePLX_MBOX5,
+    ePLX_MBOX6,
+    ePLX_MBOX7,
+
+    ePLX_L2PCIDB,
+    ePLX_IRQCS,
+    ePLX_CHIPSC,
+
+    eMaxPort
+};
+
+unsigned long lx_plx_reg_read(struct lx6464es *chip, int port);
+void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data);
+
+/* rhm */
+struct lx_rmh {
+	u16	cmd_len;	/* length of the command to send (WORDs) */
+	u16	stat_len;	/* length of the status received (WORDs) */
+	u16	dsp_stat;	/* status type, RMP_SSIZE_XXX */
+	u16	cmd_idx;	/* index of the command */
+	u32	cmd[REG_CRM_NUMBER];
+	u32	stat[REG_CRM_NUMBER];
+};
+
+
+/* low-level dsp access */
+int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version);
+int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq);
+int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran);
+int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data);
+int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address);
+
+
+/* low-level pipe handling */
+int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture,
+		     int channels);
+int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture,
+			 u64 *rsample_count);
+int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate);
+int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture);
+
+int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture);
+
+/* low-level stream handling */
+int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime,
+			 u32 pipe, int is_capture);
+int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture,
+		    int *rstate);
+int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture,
+			      u64 *r_bytepos);
+
+int lx_stream_set_state(struct lx6464es *chip, u32 pipe,
+			int is_capture, enum stream_state_t state);
+
+static inline int lx_stream_start(struct lx6464es *chip, u32 pipe,
+				  int is_capture)
+{
+	snd_printdd("->lx_stream_start\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_RUN);
+}
+
+static inline int lx_stream_pause(struct lx6464es *chip, u32 pipe,
+				  int is_capture)
+{
+	snd_printdd("->lx_stream_pause\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_PAUSE);
+}
+
+static inline int lx_stream_stop(struct lx6464es *chip, u32 pipe,
+				 int is_capture)
+{
+	snd_printdd("->lx_stream_stop\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_STOP);
+}
+
+/* low-level buffer handling */
+int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+		  u32 *r_needed, u32 *r_freed, u32 *size_array);
+int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi,
+		   u32 *r_buffer_index);
+int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 *r_buffer_size);
+int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture,
+		     u32 buffer_index);
+
+/* low-level gain/peak handling */
+int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute);
+int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels,
+		   u32 *r_levels);
+
+
+/* interrupt handling */
+irqreturn_t lx_interrupt(int irq, void *dev_id);
+void lx_irq_enable(struct lx6464es *chip);
+void lx_irq_disable(struct lx6464es *chip);
+
+void lx_tasklet_capture(unsigned long data);
+void lx_tasklet_playback(unsigned long data);
+
+
+/* Stream Format Header Defines (for LIN and IEEE754) */
+#define HEADER_FMT_BASE		HEADER_FMT_BASE_LIN
+#define HEADER_FMT_BASE_LIN	0xFED00000
+#define HEADER_FMT_BASE_FLOAT	0xFAD00000
+#define HEADER_FMT_MONO		0x00000080 /* bit 23 in header_lo. WARNING: old
+					    * bit 22 is ignored in float
+					    * format */
+#define HEADER_FMT_INTEL	0x00008000
+#define HEADER_FMT_16BITS	0x00002000
+#define HEADER_FMT_24BITS	0x00004000
+#define HEADER_FMT_UPTO11	0x00000200 /* frequency is less or equ. to 11k.
+					    * */
+#define HEADER_FMT_UPTO32	0x00000100 /* frequency is over 11k and less
+					    * then 32k.*/
+
+
+#define BIT_FMP_HEADER          23
+#define BIT_FMP_SD              22
+#define BIT_FMP_MULTICHANNEL    19
+
+#define START_STATE             1
+#define PAUSE_STATE             0
+
+
+
+
+
+/* from PcxAll_e.h */
+/* Start/Pause condition for pipes (PCXStartPipe, PCXPausePipe) */
+#define START_PAUSE_IMMEDIATE           0
+#define START_PAUSE_ON_SYNCHRO          1
+#define START_PAUSE_ON_TIME_CODE        2
+
+
+/* Pipe / Stream state */
+#define START_STATE             1
+#define PAUSE_STATE             0
+
+static inline void unpack_pointer(dma_addr_t ptr, u32 *r_low, u32 *r_high)
+{
+	*r_low = (u32)(ptr & 0xffffffff);
+#if BITS_PER_LONG == 32
+	*r_high = 0;
+#else
+	*r_high = (u32)((u64)ptr>>32);
+#endif
+}
+
+#endif /* LX_CORE_H */
diff --git a/sound/pci/lx6464es/lx_defs.h b/sound/pci/lx6464es/lx_defs.h
new file mode 100644
index 0000000..49d36bd
--- /dev/null
+++ b/sound/pci/lx6464es/lx_defs.h
@@ -0,0 +1,376 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * adapted upstream headers
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX_DEFS_H
+#define LX_DEFS_H
+
+/* code adapted from ethersound.h */
+#define	XES_FREQ_COUNT8_MASK    0x00001FFF /* compteur 25MHz entre 8 ech. */
+#define	XES_FREQ_COUNT8_44_MIN  0x00001288 /* 25M /
+					    * [ 44k - ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+#define	XES_FREQ_COUNT8_44_MAX	0x000010F0 /* 25M / [ ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+#define	XES_FREQ_COUNT8_48_MAX	0x00000F08 /* 25M /
+					    * [ 48k + ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+
+/* code adapted from LXES_registers.h */
+
+#define IOCR_OUTPUTS_OFFSET 0	/* (rw) offset for the number of OUTs in the
+				 * ConfES register. */
+#define IOCR_INPUTS_OFFSET  8	/* (rw) offset for the number of INs in the
+				 * ConfES register. */
+#define FREQ_RATIO_OFFSET  19	/* (rw) offset for frequency ratio in the
+				 * ConfES register. */
+#define	FREQ_RATIO_SINGLE_MODE 0x01 /* value for single mode frequency ratio:
+				     * sample rate = frequency rate. */
+
+#define CONFES_READ_PART_MASK	0x00070000
+#define CONFES_WRITE_PART_MASK	0x00F80000
+
+/* code adapted from if_drv_mb.h */
+
+#define MASK_SYS_STATUS_ERROR	(1L << 31) /* events that lead to a PCI irq if
+					    * not yet pending */
+#define MASK_SYS_STATUS_URUN	(1L << 30)
+#define MASK_SYS_STATUS_ORUN	(1L << 29)
+#define MASK_SYS_STATUS_EOBO	(1L << 28)
+#define MASK_SYS_STATUS_EOBI	(1L << 27)
+#define MASK_SYS_STATUS_FREQ	(1L << 26)
+#define MASK_SYS_STATUS_ESA	(1L << 25) /* reserved, this is set by the
+					    * XES */
+#define MASK_SYS_STATUS_TIMER	(1L << 24)
+
+#define MASK_SYS_ASYNC_EVENTS	(MASK_SYS_STATUS_ERROR |		\
+				 MASK_SYS_STATUS_URUN  |		\
+				 MASK_SYS_STATUS_ORUN  |		\
+				 MASK_SYS_STATUS_EOBO  |		\
+				 MASK_SYS_STATUS_EOBI  |		\
+				 MASK_SYS_STATUS_FREQ  |		\
+				 MASK_SYS_STATUS_ESA)
+
+#define MASK_SYS_PCI_EVENTS		(MASK_SYS_ASYNC_EVENTS |	\
+					 MASK_SYS_STATUS_TIMER)
+
+#define MASK_SYS_TIMER_COUNT	0x0000FFFF
+
+#define MASK_SYS_STATUS_EOT_PLX		(1L << 22) /* event that remains
+						    * internal: reserved fo end
+						    * of plx dma */
+#define MASK_SYS_STATUS_XES		(1L << 21) /* event that remains
+						    * internal: pending XES
+						    * IRQ */
+#define MASK_SYS_STATUS_CMD_DONE	(1L << 20) /* alternate command
+						    * management: notify driver
+						    * instead of polling */
+
+
+#define MAX_STREAM_BUFFER 5	/* max amount of stream buffers. */
+
+#define MICROBLAZE_IBL_MIN		 32
+#define MICROBLAZE_IBL_DEFAULT	        128
+#define MICROBLAZE_IBL_MAX		512
+/* #define MASK_GRANULARITY		(2*MICROBLAZE_IBL_MAX-1) */
+
+
+
+/* command opcodes, see reference for details */
+
+/*
+ the capture bit position in the object_id field in driver commands
+ depends upon the number of managed channels. For now, 64 IN + 64 OUT are
+ supported. HOwever, the communication protocol forsees 1024 channels, hence
+ bit 10 indicates a capture (input) object).
+*/
+#define ID_IS_CAPTURE (1L << 10)
+#define ID_OFFSET	13	/* object ID is at the 13th bit in the
+				 * 1st command word.*/
+#define ID_CH_MASK    0x3F
+#define OPCODE_OFFSET	24	/* offset of the command opcode in the first
+				 * command word.*/
+
+enum cmd_mb_opcodes {
+	CMD_00_INFO_DEBUG	        = 0x00,
+	CMD_01_GET_SYS_CFG		= 0x01,
+	CMD_02_SET_GRANULARITY		= 0x02,
+	CMD_03_SET_TIMER_IRQ		= 0x03,
+	CMD_04_GET_EVENT		= 0x04,
+	CMD_05_GET_PIPES		= 0x05,
+
+	CMD_06_ALLOCATE_PIPE            = 0x06,
+	CMD_07_RELEASE_PIPE		= 0x07,
+	CMD_08_ASK_BUFFERS		= 0x08,
+	CMD_09_STOP_PIPE		= 0x09,
+	CMD_0A_GET_PIPE_SPL_COUNT	= 0x0a,
+	CMD_0B_TOGGLE_PIPE_STATE	= 0x0b,
+
+	CMD_0C_DEF_STREAM		= 0x0c,
+	CMD_0D_SET_MUTE			= 0x0d,
+	CMD_0E_GET_STREAM_SPL_COUNT     = 0x0e,
+	CMD_0F_UPDATE_BUFFER		= 0x0f,
+	CMD_10_GET_BUFFER		= 0x10,
+	CMD_11_CANCEL_BUFFER		= 0x11,
+	CMD_12_GET_PEAK			= 0x12,
+	CMD_13_SET_STREAM_STATE		= 0x13,
+	CMD_14_INVALID			= 0x14,
+};
+
+/* pipe states */
+enum pipe_state_t {
+	PSTATE_IDLE	= 0,	/* the pipe is not processed in the XES_IRQ
+				 * (free or stopped, or paused). */
+	PSTATE_RUN	= 1,	/* sustained play/record state. */
+	PSTATE_PURGE	= 2,	/* the ES channels are now off, render pipes do
+				 * not DMA, record pipe do a last DMA. */
+	PSTATE_ACQUIRE	= 3,	/* the ES channels are now on, render pipes do
+				 * not yet increase their sample count, record
+				 * pipes do not DMA. */
+	PSTATE_CLOSING	= 4,	/* the pipe is releasing, and may not yet
+				 * receive an "alloc" command. */
+};
+
+/* stream states */
+enum stream_state_t {
+	SSTATE_STOP	=  0x00,       /* setting to stop resets the stream spl
+					* count.*/
+	SSTATE_RUN	= (0x01 << 0), /* start DMA and spl count handling. */
+	SSTATE_PAUSE	= (0x01 << 1), /* pause DMA and spl count handling. */
+};
+
+/* buffer flags */
+enum buffer_flags {
+	BF_VALID	= 0x80,	/* set if the buffer is valid, clear if free.*/
+	BF_CURRENT	= 0x40,	/* set if this is the current buffer (there is
+				 * always a current buffer).*/
+	BF_NOTIFY_EOB	= 0x20,	/* set if this buffer must cause a PCI event
+				 * when finished.*/
+	BF_CIRCULAR	= 0x10,	/* set if buffer[1] must be copied to buffer[0]
+				 * by the end of this buffer.*/
+	BF_64BITS_ADR	= 0x08,	/* set if the hi part of the address is valid.*/
+	BF_xx		= 0x04,	/* future extension.*/
+	BF_EOB		= 0x02,	/* set if finished, but not yet free.*/
+	BF_PAUSE	= 0x01,	/* pause stream at buffer end.*/
+	BF_ZERO		= 0x00,	/* no flags (init).*/
+};
+
+/**
+*	Stream Flags definitions
+*/
+enum stream_flags {
+	SF_ZERO		= 0x00000000, /* no flags (stream invalid). */
+	SF_VALID	= 0x10000000, /* the stream has a valid DMA_conf
+				       * info (setstreamformat). */
+	SF_XRUN		= 0x20000000, /* the stream is un x-run state. */
+	SF_START	= 0x40000000, /* the DMA is running.*/
+	SF_ASIO		= 0x80000000, /* ASIO.*/
+};
+
+
+#define MASK_SPL_COUNT_HI 0x00FFFFFF /* 4 MSBits are status bits */
+#define PSTATE_OFFSET             28 /* 4 MSBits are status bits */
+
+
+#define MASK_STREAM_HAS_MAPPING	(1L << 12)
+#define MASK_STREAM_IS_ASIO	(1L <<  9)
+#define STREAM_FMT_OFFSET	10   /* the stream fmt bits start at the 10th
+				      * bit in the command word. */
+
+#define STREAM_FMT_16b          0x02
+#define STREAM_FMT_intel        0x01
+
+#define FREQ_FIELD_OFFSET	15  /* offset of the freq field in the response
+				     * word */
+
+#define BUFF_FLAGS_OFFSET	  24 /*  offset of the buffer flags in the
+				      *  response word. */
+#define MASK_DATA_SIZE	  0x00FFFFFF /* this must match the field size of
+				      * datasize in the buffer_t structure. */
+
+#define MASK_BUFFER_ID	        0xFF /* the cancel command awaits a buffer ID,
+				      * may be 0xFF for "current". */
+
+
+/* code adapted from PcxErr_e.h */
+
+/* Bits masks */
+
+#define ERROR_MASK              0x8000
+
+#define SOURCE_MASK             0x7800
+
+#define E_SOURCE_BOARD          0x4000 /* 8 >> 1 */
+#define E_SOURCE_DRV            0x2000 /* 4 >> 1 */
+#define E_SOURCE_API            0x1000 /* 2 >> 1 */
+/* Error tools */
+#define E_SOURCE_TOOLS          0x0800 /* 1 >> 1 */
+/* Error pcxaudio */
+#define E_SOURCE_AUDIO          0x1800 /* 3 >> 1 */
+/* Error virtual pcx */
+#define E_SOURCE_VPCX           0x2800 /* 5 >> 1 */
+/* Error dispatcher */
+#define E_SOURCE_DISPATCHER     0x3000 /* 6 >> 1 */
+/* Error from CobraNet firmware */
+#define E_SOURCE_COBRANET       0x3800 /* 7 >> 1 */
+
+#define E_SOURCE_USER           0x7800
+
+#define CLASS_MASK              0x0700
+
+#define CODE_MASK               0x00FF
+
+/* Bits values */
+
+/* Values for the error/warning bit */
+#define ERROR_VALUE             0x8000
+#define WARNING_VALUE           0x0000
+
+/* Class values */
+#define E_CLASS_GENERAL                  0x0000
+#define E_CLASS_INVALID_CMD              0x0100
+#define E_CLASS_INVALID_STD_OBJECT       0x0200
+#define E_CLASS_RSRC_IMPOSSIBLE          0x0300
+#define E_CLASS_WRONG_CONTEXT            0x0400
+#define E_CLASS_BAD_SPECIFIC_PARAMETER   0x0500
+#define E_CLASS_REAL_TIME_ERROR          0x0600
+#define E_CLASS_DIRECTSHOW               0x0700
+#define E_CLASS_FREE                     0x0700
+
+
+/* Complete DRV error code for the general class */
+#define ED_GN           (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_GENERAL)
+#define ED_CONCURRENCY                  (ED_GN | 0x01)
+#define ED_DSP_CRASHED                  (ED_GN | 0x02)
+#define ED_UNKNOWN_BOARD                (ED_GN | 0x03)
+#define ED_NOT_INSTALLED                (ED_GN | 0x04)
+#define ED_CANNOT_OPEN_SVC_MANAGER      (ED_GN | 0x05)
+#define ED_CANNOT_READ_REGISTRY         (ED_GN | 0x06)
+#define ED_DSP_VERSION_MISMATCH         (ED_GN | 0x07)
+#define ED_UNAVAILABLE_FEATURE          (ED_GN | 0x08)
+#define ED_CANCELLED                    (ED_GN | 0x09)
+#define ED_NO_RESPONSE_AT_IRQA          (ED_GN | 0x10)
+#define ED_INVALID_ADDRESS              (ED_GN | 0x11)
+#define ED_DSP_CORRUPTED                (ED_GN | 0x12)
+#define ED_PENDING_OPERATION            (ED_GN | 0x13)
+#define ED_NET_ALLOCATE_MEMORY_IMPOSSIBLE   (ED_GN | 0x14)
+#define ED_NET_REGISTER_ERROR               (ED_GN | 0x15)
+#define ED_NET_THREAD_ERROR                 (ED_GN | 0x16)
+#define ED_NET_OPEN_ERROR                   (ED_GN | 0x17)
+#define ED_NET_CLOSE_ERROR                  (ED_GN | 0x18)
+#define ED_NET_NO_MORE_PACKET               (ED_GN | 0x19)
+#define ED_NET_NO_MORE_BUFFER               (ED_GN | 0x1A)
+#define ED_NET_SEND_ERROR                   (ED_GN | 0x1B)
+#define ED_NET_RECEIVE_ERROR                (ED_GN | 0x1C)
+#define ED_NET_WRONG_MSG_SIZE               (ED_GN | 0x1D)
+#define ED_NET_WAIT_ERROR                   (ED_GN | 0x1E)
+#define ED_NET_EEPROM_ERROR                 (ED_GN | 0x1F)
+#define ED_INVALID_RS232_COM_NUMBER         (ED_GN | 0x20)
+#define ED_INVALID_RS232_INIT               (ED_GN | 0x21)
+#define ED_FILE_ERROR                       (ED_GN | 0x22)
+#define ED_INVALID_GPIO_CMD                 (ED_GN | 0x23)
+#define ED_RS232_ALREADY_OPENED             (ED_GN | 0x24)
+#define ED_RS232_NOT_OPENED                 (ED_GN | 0x25)
+#define ED_GPIO_ALREADY_OPENED              (ED_GN | 0x26)
+#define ED_GPIO_NOT_OPENED                  (ED_GN | 0x27)
+#define ED_REGISTRY_ERROR                   (ED_GN | 0x28) /* <- NCX */
+#define ED_INVALID_SERVICE                  (ED_GN | 0x29) /* <- NCX */
+
+#define ED_READ_FILE_ALREADY_OPENED	    (ED_GN | 0x2a) /* <- Decalage
+							    * pour RCX
+							    * (old 0x28)
+							    * */
+#define ED_READ_FILE_INVALID_COMMAND	    (ED_GN | 0x2b) /* ~ */
+#define ED_READ_FILE_INVALID_PARAMETER	    (ED_GN | 0x2c) /* ~ */
+#define ED_READ_FILE_ALREADY_CLOSED	    (ED_GN | 0x2d) /* ~ */
+#define ED_READ_FILE_NO_INFORMATION	    (ED_GN | 0x2e) /* ~ */
+#define ED_READ_FILE_INVALID_HANDLE	    (ED_GN | 0x2f) /* ~ */
+#define ED_READ_FILE_END_OF_FILE	    (ED_GN | 0x30) /* ~ */
+#define ED_READ_FILE_ERROR	            (ED_GN | 0x31) /* ~ */
+
+#define ED_DSP_CRASHED_EXC_DSPSTACK_OVERFLOW (ED_GN | 0x32) /* <- Decalage pour
+							     * PCX (old 0x14) */
+#define ED_DSP_CRASHED_EXC_SYSSTACK_OVERFLOW (ED_GN | 0x33) /* ~ */
+#define ED_DSP_CRASHED_EXC_ILLEGAL           (ED_GN | 0x34) /* ~ */
+#define ED_DSP_CRASHED_EXC_TIMER_REENTRY     (ED_GN | 0x35) /* ~ */
+#define ED_DSP_CRASHED_EXC_FATAL_ERROR       (ED_GN | 0x36) /* ~ */
+
+#define ED_FLASH_PCCARD_NOT_PRESENT          (ED_GN | 0x37)
+
+#define ED_NO_CURRENT_CLOCK                  (ED_GN | 0x38)
+
+/* Complete DRV error code for real time class */
+#define ED_RT           (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_REAL_TIME_ERROR)
+#define ED_DSP_TIMED_OUT                (ED_RT | 0x01)
+#define ED_DSP_CHK_TIMED_OUT            (ED_RT | 0x02)
+#define ED_STREAM_OVERRUN               (ED_RT | 0x03)
+#define ED_DSP_BUSY                     (ED_RT | 0x04)
+#define ED_DSP_SEMAPHORE_TIME_OUT       (ED_RT | 0x05)
+#define ED_BOARD_TIME_OUT               (ED_RT | 0x06)
+#define ED_XILINX_ERROR                 (ED_RT | 0x07)
+#define ED_COBRANET_ITF_NOT_RESPONDING  (ED_RT | 0x08)
+
+/* Complete BOARD error code for the invaid standard object class */
+#define EB_ISO          (ERROR_VALUE | E_SOURCE_BOARD | \
+			 E_CLASS_INVALID_STD_OBJECT)
+#define EB_INVALID_EFFECT               (EB_ISO | 0x00)
+#define EB_INVALID_PIPE                 (EB_ISO | 0x40)
+#define EB_INVALID_STREAM               (EB_ISO | 0x80)
+#define EB_INVALID_AUDIO                (EB_ISO | 0xC0)
+
+/* Complete BOARD error code for impossible resource allocation class */
+#define EB_RI           (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_RSRC_IMPOSSIBLE)
+#define EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE (EB_RI | 0x01)
+#define EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE           (EB_RI | 0x02)
+
+#define EB_ALLOCATE_MEM_STREAM_IMPOSSIBLE		\
+	EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE
+#define EB_ALLOCATE_MEM_PIPE_IMPOSSIBLE			\
+	EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE
+
+#define EB_ALLOCATE_DIFFERED_CMD_IMPOSSIBLE     (EB_RI | 0x03)
+#define EB_TOO_MANY_DIFFERED_CMD                (EB_RI | 0x04)
+#define EB_RBUFFERS_TABLE_OVERFLOW              (EB_RI | 0x05)
+#define EB_ALLOCATE_EFFECTS_IMPOSSIBLE          (EB_RI | 0x08)
+#define EB_ALLOCATE_EFFECT_POS_IMPOSSIBLE       (EB_RI | 0x09)
+#define EB_RBUFFER_NOT_AVAILABLE                (EB_RI | 0x0A)
+#define EB_ALLOCATE_CONTEXT_LIII_IMPOSSIBLE     (EB_RI | 0x0B)
+#define EB_STATUS_DIALOG_IMPOSSIBLE             (EB_RI | 0x1D)
+#define EB_CONTROL_CMD_IMPOSSIBLE               (EB_RI | 0x1E)
+#define EB_STATUS_SEND_IMPOSSIBLE               (EB_RI | 0x1F)
+#define EB_ALLOCATE_PIPE_IMPOSSIBLE             (EB_RI | 0x40)
+#define EB_ALLOCATE_STREAM_IMPOSSIBLE           (EB_RI | 0x80)
+#define EB_ALLOCATE_AUDIO_IMPOSSIBLE            (EB_RI | 0xC0)
+
+/* Complete BOARD error code for wrong call context class */
+#define EB_WCC          (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_WRONG_CONTEXT)
+#define EB_CMD_REFUSED                  (EB_WCC | 0x00)
+#define EB_START_STREAM_REFUSED         (EB_WCC | 0xFC)
+#define EB_SPC_REFUSED                  (EB_WCC | 0xFD)
+#define EB_CSN_REFUSED                  (EB_WCC | 0xFE)
+#define EB_CSE_REFUSED                  (EB_WCC | 0xFF)
+
+
+
+
+#endif /* LX_DEFS_H */
-- 
cgit v0.10.2


From 7852fd08fdc78bf43150137bdbfdfdccdefffe0f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 14 Apr 2009 12:25:52 +0200
Subject: ALSA: lx6464es - Use snd_card_create()

Use snd_card_create() instead of the obsoleted snd_card_new().

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
index 7bc8b8c..870bfc5 100644
--- a/sound/pci/lx6464es/lx6464es.c
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -1091,9 +1091,9 @@ static int __devinit snd_lx6464es_probe(struct pci_dev *pci,
 		return -ENOENT;
 	}
 
-	card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
-	if (card == NULL)
-		return -ENOMEM;
+	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+	if (err < 0)
+		return err;
 
 	err = snd_lx6464es_create(card, pci, &chip);
 	if (err < 0) {
-- 
cgit v0.10.2


From d7dee4d7744d039bf28e4f6d4f5674f44820265a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 14 Apr 2009 12:27:12 +0200
Subject: ALSA: lx6464es - Disable lx_message_send()

Disable lx_message_send() function temporarily as it's not used
anywhere.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
index a9f8f88..5812780 100644
--- a/sound/pci/lx6464es/lx_core.c
+++ b/sound/pci/lx6464es/lx_core.c
@@ -314,6 +314,7 @@ static inline void lx_message_dump(struct lx_rmh *rmh)
 #define XILINX_POLL_NO_SLEEP    100
 #define XILINX_POLL_ITERATIONS  150
 
+#if 0 /* not used now */
 static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh)
 {
 	u32 reg = ED_DSP_TIMED_OUT;
@@ -404,6 +405,7 @@ polling_successful:
 	lx_message_dump(rmh);
 	return 0;
 }
+#endif /* not used now */
 
 static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh)
 {
-- 
cgit v0.10.2


From 8338c300642f67af5a8cc279ca5e088c7055b7eb Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 14 Apr 2009 12:30:36 +0200
Subject: ALSA: Add missing description of lx6464es to ALSA-Configuration.txt

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..dfd266e 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -1093,6 +1093,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     This module supports multiple cards.
     The driver requires the firmware loader support on kernel.
 
+  Module snd-lx6464es
+  -------------------
+
+    Module for Digigram LX6464ES boards
+
+    This module supports multiple cards.
+
   Module snd-maestro3
   -------------------
 
-- 
cgit v0.10.2


From 7e05575c422d45f393c2d9b5900e97a30bf69bea Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 14 Apr 2009 12:12:29 +0900
Subject: x86: calgary: remove IOMMU_DEBUG

CONFIG_IOMMU_DEBUG has depends on CONFIG_GART_IOMMU:

config IOMMU_DEBUG
	bool "Enable IOMMU debugging"
	depends on GART_IOMMU && DEBUG_KERNEL
	depends on X86_64

So it's not useful to have CONFIG_IOMMU_DEBUG in Calgary IOMMU code,
which does the extra checking of the bitmap space management.

And Calgary uses the iommu helper for the bitmap space management now
so it would be better to have the extra checking feature in the iommu
helper rather than Calgary code (if necessary).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: alexisb@us.ibm.com
LKML-Reference: <20090414120827G.fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 755c21e..971a3be 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
 
 static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
 
-/* enable this to stress test the chip's TCE cache */
-#ifdef CONFIG_IOMMU_DEBUG
-static int debugging = 1;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-	int expected, unsigned long start, unsigned long end)
-{
-	unsigned long idx = start;
-
-	BUG_ON(start >= end);
-
-	while (idx < end) {
-		if (!!test_bit(idx, bitmap) != expected)
-			return idx;
-		++idx;
-	}
-
-	/* all bits have the expected value */
-	return ~0UL;
-}
-#else /* debugging is disabled */
-static int debugging;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-	int expected, unsigned long start, unsigned long end)
-{
-	return ~0UL;
-}
-
-#endif /* CONFIG_IOMMU_DEBUG */
-
 static inline int translation_enabled(struct iommu_table *tbl)
 {
 	/* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 {
 	unsigned long index;
 	unsigned long end;
-	unsigned long badbit;
 	unsigned long flags;
 
 	index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	badbit = verify_bit_range(tbl->it_map, 0, index, end);
-	if (badbit != ~0UL) {
-		if (printk_ratelimit())
-			printk(KERN_ERR "Calgary: entry already allocated at "
-			       "0x%lx tbl %p dma 0x%lx npages %u\n",
-			       badbit, tbl, start_addr, npages);
-	}
-
 	iommu_area_reserve(tbl->it_map, index, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 	unsigned int npages)
 {
 	unsigned long entry;
-	unsigned long badbit;
 	unsigned long badend;
 	unsigned long flags;
 
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 
 	spin_lock_irqsave(&tbl->it_lock, flags);
 
-	badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
-	if (badbit != ~0UL) {
-		if (printk_ratelimit())
-			printk(KERN_ERR "Calgary: bit is off at 0x%lx "
-			       "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
-			       badbit, tbl, dma_addr, entry, npages);
-	}
-
 	iommu_area_free(tbl->it_map, entry, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
 		iommu_detected = 1;
 		calgary_detected = 1;
 		printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
-		printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
-		       "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
-		       debugging ? "enabled" : "disabled");
+		printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
+		       specified_table_size);
 
 		/* swiotlb for devices that aren't behind the Calgary. */
 		if (max_pfn > MAX_DMA32_PFN)
-- 
cgit v0.10.2


From c282866101bfde888a44da3babd2f9ab265ca6f9 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@poczta.fm>
Date: Sat, 4 Apr 2009 14:48:32 +0200
Subject: ALSA: sc6000: add support for SC-6600 and SC-7000

Add support for later cards based on CompuMedia ASC-9408 chipsets.
These cards were produced by Gallant.

This patch make the OSS aedsp16 driver redundant.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index c5c9a92..64129bf 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -177,15 +177,18 @@ config SND_ES18XX
 	  will be called snd-es18xx.
 
 config SND_SC6000
-	tristate "Gallant SC-6000, Audio Excel DSP 16"
+	tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16"
 	depends on HAS_IOPORT
 	select SND_WSS_LIB
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	help
-	  Say Y here to include support for Gallant SC-6000 card and clones:
+	  Say Y here to include support for Gallant SC-6000, SC-6600, SC-7000
+	  cards and clones:
 	  Audio Excel DSP 16 and Zoltrix AV302.
 
+	  These cards are based on CompuMedia ASC-9308 or ASC-9408 chips.
+
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-sc6000.
 
diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index 7820106..983ab7e 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -2,6 +2,8 @@
  *  Driver for Gallant SC-6000 soundcard. This card is also known as
  *  Audio Excel DSP 16 or Zoltrix AV302.
  *  These cards use CompuMedia ASC-9308 chip + AD1848 codec.
+ *  SC-6600 and SC-7000 cards are also supported. They are based on
+ *  CompuMedia ASC-9408 chip and CS4231 codec.
  *
  *  Copyright (C) 2007 Krzysztof Helt <krzysztof.h1@wp.pl>
  *
@@ -191,7 +193,7 @@ static __devinit unsigned char sc6000_mpu_irq_to_softcfg(int mpu_irq)
 	return val;
 }
 
-static __devinit int sc6000_wait_data(char __iomem *vport)
+static int sc6000_wait_data(char __iomem *vport)
 {
 	int loop = 1000;
 	unsigned char val = 0;
@@ -206,7 +208,7 @@ static __devinit int sc6000_wait_data(char __iomem *vport)
 	return -EAGAIN;
 }
 
-static __devinit int sc6000_read(char __iomem *vport)
+static int sc6000_read(char __iomem *vport)
 {
 	if (sc6000_wait_data(vport))
 		return -EBUSY;
@@ -215,7 +217,7 @@ static __devinit int sc6000_read(char __iomem *vport)
 
 }
 
-static __devinit int sc6000_write(char __iomem *vport, int cmd)
+static int sc6000_write(char __iomem *vport, int cmd)
 {
 	unsigned char val;
 	int loop = 500000;
@@ -276,8 +278,33 @@ static int __devinit sc6000_dsp_reset(char __iomem *vport)
 }
 
 /* detection and initialization */
-static int __devinit sc6000_cfg_write(char __iomem *vport,
-				      unsigned char softcfg)
+static int __devinit sc6000_hw_cfg_write(char __iomem *vport, const int *cfg)
+{
+	if (sc6000_write(vport, COMMAND_6C) < 0) {
+		snd_printk(KERN_WARNING "CMD 0x%x: failed!\n", COMMAND_6C);
+		return -EIO;
+	}
+	if (sc6000_write(vport, COMMAND_5C) < 0) {
+		snd_printk(KERN_ERR "CMD 0x%x: failed!\n", COMMAND_5C);
+		return -EIO;
+	}
+	if (sc6000_write(vport, cfg[0]) < 0) {
+		snd_printk(KERN_ERR "DATA 0x%x: failed!\n", cfg[0]);
+		return -EIO;
+	}
+	if (sc6000_write(vport, cfg[1]) < 0) {
+		snd_printk(KERN_ERR "DATA 0x%x: failed!\n", cfg[1]);
+		return -EIO;
+	}
+	if (sc6000_write(vport, COMMAND_C5) < 0) {
+		snd_printk(KERN_ERR "CMD 0x%x: failed!\n", COMMAND_C5);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int sc6000_cfg_write(char __iomem *vport, unsigned char softcfg)
 {
 
 	if (sc6000_write(vport, WRITE_MDIRQ_CFG)) {
@@ -291,7 +318,7 @@ static int __devinit sc6000_cfg_write(char __iomem *vport,
 	return 0;
 }
 
-static int __devinit sc6000_setup_board(char __iomem *vport, int config)
+static int sc6000_setup_board(char __iomem *vport, int config)
 {
 	int loop = 10;
 
@@ -334,16 +361,38 @@ static int __devinit sc6000_init_mss(char __iomem *vport, int config,
 	return 0;
 }
 
-static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma,
-					char __iomem *vmss_port, int mpu_irq)
+static void __devinit sc6000_hw_cfg_encode(char __iomem *vport, int *cfg,
+					   long xport, long xmpu,
+					   long xmss_port)
+{
+	cfg[0] = 0;
+	cfg[1] = 0;
+	if (xport == 0x240)
+		cfg[0] |= 1;
+	if (xmpu != SNDRV_AUTO_PORT) {
+		cfg[0] |= (xmpu & 0x30) >> 2;
+		cfg[1] |= 0x20;
+	}
+	if (xmss_port == 0xe80)
+		cfg[0] |= 0x10;
+	cfg[0] |= 0x40;		/* always set */
+	cfg[1] |= 0x80;		/* enable WSS system */
+	cfg[1] &= ~0x40;	/* disable IDE */
+	snd_printd("hw cfg %x, %x\n", cfg[0], cfg[1]);
+}
+
+static int __devinit sc6000_init_board(char __iomem *vport,
+					char __iomem *vmss_port, int dev)
 {
 	char answer[15];
 	char version[2];
-	int mss_config = sc6000_irq_to_softcfg(irq) |
-			 sc6000_dma_to_softcfg(dma);
+	int mss_config = sc6000_irq_to_softcfg(irq[dev]) |
+			 sc6000_dma_to_softcfg(dma[dev]);
 	int config = mss_config |
-		     sc6000_mpu_irq_to_softcfg(mpu_irq);
+		     sc6000_mpu_irq_to_softcfg(mpu_irq[dev]);
 	int err;
+	int cfg[2];
+	int old = 0;
 
 	err = sc6000_dsp_reset(vport);
 	if (err < 0) {
@@ -360,7 +409,6 @@ static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma,
 	/*
 	 * My SC-6000 card return "SC-6000" in DSPCopyright, so
 	 * if we have something different, we have to be warned.
-	 * Mine returns "SC-6000A " - KH
 	 */
 	if (strncmp("SC-6000", answer, 7))
 		snd_printk(KERN_WARNING "Warning: non SC-6000 audio card!\n");
@@ -372,13 +420,29 @@ static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma,
 	printk(KERN_INFO PFX "Detected model: %s, DSP version %d.%d\n",
 		answer, version[0], version[1]);
 
-	/*
-	 * 0x0A == (IRQ 7, DMA 1, MIRQ 0)
-	 */
-	err = sc6000_cfg_write(vport, 0x0a);
+	/* set configuration */
+	sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev],
+			     mss_port[dev]);
+	if (sc6000_hw_cfg_write(vport, cfg) < 0) {
+		snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n");
+		return -EIO;
+	}
+	err = sc6000_setup_board(vport, config);
 	if (err < 0) {
-		snd_printk(KERN_ERR "sc6000_cfg_write: failed!\n");
-		return -EFAULT;
+		snd_printk(KERN_ERR "sc6000_setup_board: failed!\n");
+		return -ENODEV;
+	}
+
+	sc6000_dsp_reset(vport);
+	sc6000_write(vport, COMMAND_5C);
+	if (sc6000_read(vport) < 0)
+		old = 1;
+	sc6000_dsp_reset(vport);
+
+	if (!old) {
+		sc6000_write(vport, COMMAND_60);
+		sc6000_write(vport, 0x02);
+		sc6000_dsp_reset(vport);
 	}
 
 	err = sc6000_setup_board(vport, config);
@@ -386,10 +450,9 @@ static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma,
 		snd_printk(KERN_ERR "sc6000_setup_board: failed!\n");
 		return -ENODEV;
 	}
-
 	err = sc6000_init_mss(vport, config, vmss_port, mss_config);
 	if (err < 0) {
-		snd_printk(KERN_ERR "Can not initialize "
+		snd_printk(KERN_ERR "Cannot initialize "
 			   "Microsoft Sound System mode.\n");
 		return -ENODEV;
 	}
@@ -485,14 +548,16 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev)
 	struct snd_card *card;
 	struct snd_wss *chip;
 	struct snd_opl3 *opl3;
-	char __iomem *vport;
+	char __iomem **vport;
 	char __iomem *vmss_port;
 
 
-	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+	err = snd_card_create(index[dev], id[dev], THIS_MODULE, sizeof(vport),
+				&card);
 	if (err < 0)
 		return err;
 
+	vport = card->private_data;
 	if (xirq == SNDRV_AUTO_IRQ) {
 		xirq = snd_legacy_find_free_irq(possible_irqs);
 		if (xirq < 0) {
@@ -517,8 +582,8 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev)
 		err = -EBUSY;
 		goto err_exit;
 	}
-	vport = devm_ioport_map(devptr, port[dev], 0x10);
-	if (!vport) {
+	*vport = devm_ioport_map(devptr, port[dev], 0x10);
+	if (*vport == NULL) {
 		snd_printk(KERN_ERR PFX
 			   "I/O port cannot be iomaped.\n");
 		err = -EBUSY;
@@ -533,7 +598,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev)
 		goto err_unmap1;
 	}
 	vmss_port = devm_ioport_map(devptr, mss_port[dev], 4);
-	if (!vport) {
+	if (!vmss_port) {
 		snd_printk(KERN_ERR PFX
 			   "MSS port I/O cannot be iomaped.\n");
 		err = -EBUSY;
@@ -544,7 +609,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev)
 		   port[dev], xirq, xdma,
 		   mpu_irq[dev] == SNDRV_AUTO_IRQ ? 0 : mpu_irq[dev]);
 
-	err = sc6000_init_board(vport, xirq, xdma, vmss_port, mpu_irq[dev]);
+	err = sc6000_init_board(*vport, vmss_port, dev);
 	if (err < 0)
 		goto err_unmap2;
 
@@ -552,7 +617,6 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev)
 			     WSS_HW_DETECT, 0, &chip);
 	if (err < 0)
 		goto err_unmap2;
-	card->private_data = chip;
 
 	err = snd_wss_pcm(chip, 0, NULL);
 	if (err < 0) {
@@ -608,6 +672,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev)
 	return 0;
 
 err_unmap2:
+	sc6000_setup_board(*vport, 0);
 	release_region(mss_port[dev], 4);
 err_unmap1:
 	release_region(port[dev], 0x10);
@@ -618,11 +683,17 @@ err_exit:
 
 static int __devexit snd_sc6000_remove(struct device *devptr, unsigned int dev)
 {
+	struct snd_card *card = dev_get_drvdata(devptr);
+	char __iomem **vport = card->private_data;
+
+	if (sc6000_setup_board(*vport, 0) < 0)
+		snd_printk(KERN_WARNING "sc6000_setup_board failed on exit!\n");
+
 	release_region(port[dev], 0x10);
 	release_region(mss_port[dev], 4);
 
-	snd_card_free(dev_get_drvdata(devptr));
 	dev_set_drvdata(devptr, NULL);
+	snd_card_free(card);
 	return 0;
 }
 
-- 
cgit v0.10.2


From ea20d9293ce423a39717ed4375393129a2e701f9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 08:54:16 -0400
Subject: tracing: consolidate trace and trace_event headers

Impact: clean up

Neil Horman (et. al.) criticized the way the trace events were broken up
into two files. The reason for that was that ftrace needed to separate out
the declarations from where the #include <linux/tracepoint.h> was used.
It then dawned on me that the tracepoint.h header only needs to define the
TRACE_EVENT macro if it is not already defined.

The solution is simply to test if TRACE_EVENT is defined, and if it is not
then the linux/tracepoint.h header can define it. This change consolidates
all the <traces>.h and <traces>_event_types.h into the <traces>.h file.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Theodore Tso <tytso@mit.edu>
Reported-by: Jiaying Zhang <jiayingz@google.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d35a7ee..4353f3f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,8 @@ struct tracepoint {
 					 * Keep in sync with vmlinux.lds.h.
 					 */
 
+#ifndef DECLARE_TRACE
+
 #define TP_PROTO(args...)	args
 #define TP_ARGS(args...)		args
 
@@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end)
 { }
 #endif /* CONFIG_TRACEPOINTS */
+#endif /* DECLARE_TRACE */
 
 /*
  * Connect a probe to a tracepoint.
@@ -154,10 +157,13 @@ static inline void tracepoint_synchronize_unregister(void)
 }
 
 #define PARAMS(args...) args
+
+#ifndef TRACE_FORMAT
 #define TRACE_FORMAT(name, proto, args, fmt)		\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
-
+#ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
  *
@@ -262,5 +268,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
 #endif
diff --git a/include/trace/irq.h b/include/trace/irq.h
index ff5d449..04ab4c6 100644
--- a/include/trace/irq.h
+++ b/include/trace/irq.h
@@ -1,9 +1,54 @@
-#ifndef _TRACE_IRQ_H
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_IRQ_H
 
-#include <linux/interrupt.h>
 #include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+/*
+ * Tracepoint for entry of interrupt handler:
+ */
+TRACE_FORMAT(irq_handler_entry,
+	TP_PROTO(int irq, struct irqaction *action),
+	TP_ARGS(irq, action),
+	TP_FMT("irq=%d handler=%s", irq, action->name)
+	);
+
+/*
+ * Tracepoint for return of an interrupt handler:
+ */
+TRACE_EVENT(irq_handler_exit,
+
+	TP_PROTO(int irq, struct irqaction *action, int ret),
+
+	TP_ARGS(irq, action, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq	)
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq	= irq;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("irq=%d return=%s",
+		  __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+TRACE_FORMAT(softirq_entry,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
 
-#include <trace/irq_event_types.h>
+TRACE_FORMAT(softirq_exit,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
 
 #endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644
index 85964eb..0000000
--- a/include/trace/irq_event_types.h
+++ /dev/null
@@ -1,55 +0,0 @@
-
-/* use <trace/irq.h> instead */
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-	TP_PROTO(int irq, struct irqaction *action),
-	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-	TP_PROTO(int irq, struct irqaction *action, int ret),
-
-	TP_ARGS(irq, action, ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	irq	)
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->irq	= irq;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("irq=%d return=%s",
-		  __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-TRACE_FORMAT(softirq_exit,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
index 46efc24..d7d1218 100644
--- a/include/trace/kmem.h
+++ b/include/trace/kmem.h
@@ -1,9 +1,192 @@
-#ifndef _TRACE_KMEM_H
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_KMEM_H
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
-#include <trace/kmem_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
 
-#endif /* _TRACE_KMEM_H */
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+#endif
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
index 5ca67df..8ee7900 100644
--- a/include/trace/lockdep.h
+++ b/include/trace/lockdep.h
@@ -1,9 +1,57 @@
-#ifndef _TRACE_LOCKDEP_H
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_LOCKDEP_H
 
 #include <linux/lockdep.h>
 #include <linux/tracepoint.h>
 
-#include <trace/lockdep_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lock
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_FORMAT(lock_acquire,
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+	TP_FMT("%s%s%s", trylock ? "try " : "",
+		read ? "read " : "", lock->name)
+	);
+
+TRACE_FORMAT(lock_release,
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+	TP_ARGS(lock, nested, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_FORMAT(lock_contended,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+	TP_ARGS(lock, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__entry->name = lock->name;
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
 
 #endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644
index 863f1e4..0000000
--- a/include/trace/lockdep_event_types.h
+++ /dev/null
@@ -1,57 +0,0 @@
-
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
-
-TRACE_FORMAT(lock_release,
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-TRACE_EVENT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-
-	TP_ARGS(lock, ip, waittime),
-
-	TP_STRUCT__entry(
-		__field(const char *, name)
-		__field(unsigned long, wait_usec)
-		__field(unsigned long, wait_nsec_rem)
-	),
-	TP_fast_assign(
-		__entry->name = lock->name;
-		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
-		__entry->wait_usec = (unsigned long) waittime;
-	),
-	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
-				       __entry->wait_nsec_rem)
-);
-
-#endif
-#endif
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
index 4e372a1..5b1cf4a 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -1,9 +1,336 @@
-#ifndef _TRACE_SCHED_H
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SCHED_H
 
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-#include <trace/sched_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
 
-#endif
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+		__entry->pid	= t->pid;
+	),
+
+	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+);
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+	TP_PROTO(int ret),
+
+	TP_ARGS(ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->ret	= ret;
+	),
+
+	TP_printk("ret %d", __entry->ret)
+);
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wait_task,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p),
+
+	TP_ARGS(rq, p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->prio	= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for waking up a task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for waking up a new task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup_new,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_switch,
+
+	TP_PROTO(struct rq *rq, struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(rq, prev, next),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	int,	prev_prio			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	int,	next_prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_prio	= prev->prio;
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		__entry->next_prio	= next->prio;
+	),
+
+	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->next_comm, __entry->next_pid, __entry->next_prio)
+);
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+
+	TP_ARGS(p, orig_cpu, dest_cpu),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	orig_cpu		)
+		__field(	int,	dest_cpu		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->orig_cpu	= orig_cpu;
+		__entry->dest_cpu	= dest_cpu;
+	),
+
+	TP_printk("task %s:%d [%d] from: %d  to: %d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->orig_cpu, __entry->dest_cpu)
+);
+
+/*
+ * Tracepoint for freeing a task:
+ */
+TRACE_EVENT(sched_process_free,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a task exiting:
+ */
+TRACE_EVENT(sched_process_exit,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+	TP_PROTO(struct pid *pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->pid		= pid_nr(pid);
+		__entry->prio		= current->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+	TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+	TP_ARGS(parent, child),
+
+	TP_STRUCT__entry(
+		__array(	char,	parent_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	parent_pid			)
+		__array(	char,	child_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	child_pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+		__entry->parent_pid	= parent->pid;
+		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+		__entry->child_pid	= child->pid;
+	),
+
+	TP_printk("parent %s:%d  child %s:%d",
+		__entry->parent_comm, __entry->parent_pid,
+		__entry->child_comm, __entry->child_pid)
+);
+
+/*
+ * Tracepoint for sending a signal:
+ */
+TRACE_EVENT(sched_signal_send,
+
+	TP_PROTO(int sig, struct task_struct *p),
+
+	TP_ARGS(sig, p),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig			)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->sig	= sig;
+	),
+
+	TP_printk("sig: %d  task %s:%d",
+		  __entry->sig, __entry->comm, __entry->pid)
+);
+
+#endif /* _TRACE_SCHED_H */
diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h
deleted file mode 100644
index 63547dc..0000000
--- a/include/trace/sched_event_types.h
+++ /dev/null
@@ -1,337 +0,0 @@
-
-/* use <trace/sched.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM sched
-
-/*
- * Tracepoint for calling kthread_stop, performed to end a kthread:
- */
-TRACE_EVENT(sched_kthread_stop,
-
-	TP_PROTO(struct task_struct *t),
-
-	TP_ARGS(t),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
-		__entry->pid	= t->pid;
-	),
-
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
-);
-
-/*
- * Tracepoint for the return value of the kthread stopping:
- */
-TRACE_EVENT(sched_kthread_stop_ret,
-
-	TP_PROTO(int ret),
-
-	TP_ARGS(ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->ret	= ret;
-	),
-
-	TP_printk("ret %d", __entry->ret)
-);
-
-/*
- * Tracepoint for waiting on task to unschedule:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wait_task,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p),
-
-	TP_ARGS(rq, p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->prio	= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for waking up a task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for waking up a new task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup_new,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for task switches, performed by the scheduler:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_switch,
-
-	TP_PROTO(struct rq *rq, struct task_struct *prev,
-		 struct task_struct *next),
-
-	TP_ARGS(rq, prev, next),
-
-	TP_STRUCT__entry(
-		__array(	char,	prev_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	prev_pid			)
-		__field(	int,	prev_prio			)
-		__array(	char,	next_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	next_pid			)
-		__field(	int,	next_prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
-		__entry->prev_pid	= prev->pid;
-		__entry->prev_prio	= prev->prio;
-		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
-		__entry->next_pid	= next->pid;
-		__entry->next_prio	= next->prio;
-	),
-
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
-		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->next_comm, __entry->next_pid, __entry->next_prio)
-);
-
-/*
- * Tracepoint for a task being migrated:
- */
-TRACE_EVENT(sched_migrate_task,
-
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
-
-	TP_ARGS(p, orig_cpu, dest_cpu),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	orig_cpu		)
-		__field(	int,	dest_cpu		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
-		__entry->dest_cpu	= dest_cpu;
-	),
-
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->orig_cpu, __entry->dest_cpu)
-);
-
-/*
- * Tracepoint for freeing a task:
- */
-TRACE_EVENT(sched_process_free,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a task exiting:
- */
-TRACE_EVENT(sched_process_exit,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a waiting task:
- */
-TRACE_EVENT(sched_process_wait,
-
-	TP_PROTO(struct pid *pid),
-
-	TP_ARGS(pid),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-		__entry->pid		= pid_nr(pid);
-		__entry->prio		= current->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for do_fork:
- */
-TRACE_EVENT(sched_process_fork,
-
-	TP_PROTO(struct task_struct *parent, struct task_struct *child),
-
-	TP_ARGS(parent, child),
-
-	TP_STRUCT__entry(
-		__array(	char,	parent_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	parent_pid			)
-		__array(	char,	child_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	child_pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
-		__entry->parent_pid	= parent->pid;
-		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
-		__entry->child_pid	= child->pid;
-	),
-
-	TP_printk("parent %s:%d  child %s:%d",
-		__entry->parent_comm, __entry->parent_pid,
-		__entry->child_comm, __entry->child_pid)
-);
-
-/*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-	TP_PROTO(int sig, struct task_struct *p),
-
-	TP_ARGS(sig, p),
-
-	TP_STRUCT__entry(
-		__field(	int,	sig			)
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->sig	= sig;
-	),
-
-	TP_printk("sig: %d  task %s:%d",
-		  __entry->sig, __entry->comm, __entry->pid)
-);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/skb.h b/include/trace/skb.h
index d2de717..e6fd281 100644
--- a/include/trace/skb.h
+++ b/include/trace/skb.h
@@ -1,9 +1,37 @@
-#ifndef _TRACE_SKB_H_
-#define _TRACE_SKB_H_
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
 
 #include <linux/skbuff.h>
 #include <linux/tracepoint.h>
 
-#include <trace/skb_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
 
-#endif
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h
deleted file mode 100644
index 4a1c504..0000000
--- a/include/trace/skb_event_types.h
+++ /dev/null
@@ -1,38 +0,0 @@
-
-/* use <trace/skb.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM skb
-
-/*
- * Tracepoint for free an sk_buff:
- */
-TRACE_EVENT(kfree_skb,
-
-	TP_PROTO(struct sk_buff *skb, void *location),
-
-	TP_ARGS(skb, location),
-
-	TP_STRUCT__entry(
-		__field(	void *,		skbaddr		)
-		__field(	unsigned short,	protocol	)
-		__field(	void *,		location	)
-	),
-
-	TP_fast_assign(
-		__entry->skbaddr = skb;
-		if (skb) {
-			__entry->protocol = ntohs(skb->protocol);
-		}
-		__entry->location = location;
-	),
-
-	TP_printk("skbaddr=%p protocol=%u location=%p",
-		__entry->skbaddr, __entry->protocol, __entry->location)
-);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644
index 552a50e..0000000
--- a/include/trace/trace_event_types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* trace/<type>_event_types.h here */
-
-#include <trace/sched_event_types.h>
-#include <trace/irq_event_types.h>
-#include <trace/lockdep_event_types.h>
-#include <trace/skb_event_types.h>
-#include <trace/kmem_event_types.h>
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
index 246f2aa..5a35a91 100644
--- a/kernel/trace/events.c
+++ b/kernel/trace/events.c
@@ -8,6 +8,7 @@
 
 #include "trace_output.h"
 
+#define TRACE_HEADER_MULTI_READ
 #include "trace_events_stage_1.h"
 #include "trace_events_stage_2.h"
 #include "trace_events_stage_3.h"
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
index 38985f9..475f46a 100644
--- a/kernel/trace/trace_events_stage_1.h
+++ b/kernel/trace/trace_events_stage_1.h
@@ -1,7 +1,7 @@
 /*
  * Stage 1 of the trace events.
  *
- * Override the macros in <trace/trace_event_types.h> to include the following:
+ * Override the macros in <trace/trace_events.h> to include the following:
  *
  * struct ftrace_raw_<call> {
  *	struct trace_entry		ent;
@@ -36,4 +36,4 @@
 	};							\
 	static struct ftrace_event_call event_##name
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 59cfd7d..aa4a67a 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -1,7 +1,7 @@
 /*
  * Stage 2 of the trace events.
  *
- * Override the macros in <trace/trace_event_types.h> to include the following:
+ * Override the macros in <trace/trace_events.h> to include the following:
  *
  * enum print_line_t
  * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
@@ -64,7 +64,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	return TRACE_TYPE_HANDLED;					\
 }
 	
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
 
 /*
  * Setup the showing format of trace point.
@@ -128,7 +128,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 	return ret;							\
 }
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
 
 #undef __field
 #define __field(type, item)						\
@@ -167,4 +167,4 @@ ftrace_define_fields_##call(void)					\
 	return ret;							\
 }
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 5bb1b7f..45c04e1 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -1,7 +1,7 @@
 /*
  * Stage 3 of the trace events.
  *
- * Override the macros in <trace/trace_event_types.h> to include the following:
+ * Override the macros in <trace/trace_events.h> to include the following:
  *
  * static void ftrace_event_<call>(proto)
  * {
@@ -272,7 +272,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
 
 #undef _TRACE_PROFILE
 #undef _TRACE_PROFILE_INIT
-- 
cgit v0.10.2


From 78ddb08feb7d4fbe3c0a9931804c51ee58be4023 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 14 Apr 2009 16:53:05 +0200
Subject: wait: don't use __wake_up_common()

'777c6c5 wait: prevent exclusive waiter starvation' made
__wake_up_common() global to be used from abort_exclusive_wait().

It was needed to do a wake-up with the waitqueue lock held while
passing down a key to the wake-up function.

Since '4ede816 epoll keyed wakeups: add __wake_up_locked_key() and
__wake_up_sync_key()' there is an appropriate wrapper for this case:
__wake_up_locked_key().

Use it here and make __wake_up_common() private to the scheduler
again.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1239720785-19661-1-git-send-email-hannes@cmpxchg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 5d631c1..67d4e89 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
 	list_del(&old->task_list);
 }
 
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-			int nr_exclusive, int sync, void *key);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
diff --git a/kernel/sched.c b/kernel/sched.c
index 36d213b..92b4b56 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5345,7 +5345,7 @@ EXPORT_SYMBOL(default_wake_function);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, int sync, void *key)
 {
 	wait_queue_t *curr, *next;
diff --git a/kernel/wait.c b/kernel/wait.c
index 42a2dbc..ea7c3b4 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
 	if (!list_empty(&wait->task_list))
 		list_del_init(&wait->task_list);
 	else if (waitqueue_active(q))
-		__wake_up_common(q, mode, 1, 0, key);
+		__wake_up_locked_key(q, mode, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(abort_exclusive_wait);
-- 
cgit v0.10.2


From 56449f437add737a1e5e1cb7e00f63ac8ead1938 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 14 Apr 2009 11:24:36 +0200
Subject: tracing: make the trace clocks available generally

Jeremy Fitzhardinge reported this build failure:

 LD	 .tmp_vmlinux1
 arch/x86/kernel/built-in.o: In function `ds_take_timestamp':
 git/linux/arch/x86/kernel/ds.c:1380: undefined reference to `trace_clock_global'
 git/linux/arch/x86/kernel/ds.c:1380: undefined reference to `trace_clock_global'

Which is due to !CONFIG_TRACING && CONFIG_X86_DS=y.

Expose the trace clock code to CONFIG_X86_DS as well.

[ Unfortunately librarizing doesnt work well - ancient architectures
  with no raw_local_irq_save() primitive break the build. ]

Reported-by: Jeremy Fitzhardinge <jeremy@goop.org>
LKML-Reference: <49E4413F.7070700@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/Makefile b/kernel/Makefile
index bab1dff..c8e1be5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f51..ecc671e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,11 +15,16 @@ ifdef CONFIG_TRACING_BRANCHES
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 endif
 
+#
+# Make the trace clocks available generally: it's infrastructure
+# relied on by ptrace for example:
+#
+obj-y += trace_clock.o
+
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
-obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
-- 
cgit v0.10.2


From 72c6a9870f901045f2464c3dc6ee8914bfdc07aa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 14 Apr 2009 17:33:57 +0200
Subject: rculist.h: introduce list_entry_rcu() and list_first_entry_rcu()

I've run into the situation where I need to use list_first_entry with
rcu-guarded list. This patch introduces this.

Also simplify list_for_each_entry_rcu() to use new list_entry_rcu()
instead of list_entry().

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
LKML-Reference: <20090414153356.GC3999@psychotron.englab.brq.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3..5710f43 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	at->prev = last;
 }
 
+/**
+ * list_entry_rcu - get the struct for this entry
+ * @ptr:        the &struct list_head pointer.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_entry_rcu(ptr, type, member) \
+	container_of(rcu_dereference(ptr), type, member)
+
+/**
+ * list_first_entry_rcu - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_first_entry_rcu(ptr, type, member) \
+	list_entry_rcu((ptr)->next, type, member)
+
 #define __list_for_each_rcu(pos, head) \
 	for (pos = rcu_dereference((head)->next); \
 		pos != (head); \
@@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
+	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
 		prefetch(pos->member.next), &pos->member != (head); \
-		pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
+		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
 /**
-- 
cgit v0.10.2


From a8d154b009168337494fbf345671bab74d3e4b8b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 09:36:00 -0400
Subject: tracing: create automated trace defines

This patch lowers the number of places a developer must modify to add
new tracepoints. The current method to add a new tracepoint
into an existing system is to write the trace point macro in the
trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or
DECLARE_TRACE, then they must add the same named item into the C file
with the macro DEFINE_TRACE(name) and then add the trace point.

This change cuts out the needing to add the DEFINE_TRACE(name).
Every file that uses the tracepoint must still include the trace/<type>.h
file, but the one C file must also add a define before the including
of that file.

 #define CREATE_TRACE_POINTS
 #include <trace/mytrace.h>

This will cause the trace/mytrace.h file to also produce the C code
necessary to implement the trace point.

Note, if more than one trace/<type>.h is used to create the C code
it is best to list them all together.

 #define CREATE_TRACE_POINTS
 #include <trace/foo.h>
 #include <trace/bar.h>
 #include <trace/fido.h>

Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with
the cleaner solution of the define above the includes over my first
design to have the C code include a "special" header.

This patch converts sched, irq and lockdep and skb to use this new
method.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644
index 0000000..de9dc7d
--- /dev/null
+++ b/include/trace/define_trace.h
@@ -0,0 +1,75 @@
+/*
+ * Trace files that want to automate creationg of all tracepoints defined
+ * in their file should include this file. The following are macros that the
+ * trace file may define:
+ *
+ * TRACE_SYSTEM defines the system the tracepoint is for
+ *
+ * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
+ *     This macro may be defined to tell define_trace.h what file to include.
+ *     Note, leave off the ".h".
+ *
+ * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
+ *     then this macro can define the path to use. Note, the path is relative to
+ *     define_trace.h, not the file including it. Full path names for out of tree
+ *     modules must be used.
+ */
+
+#ifdef CREATE_TRACE_POINTS
+
+/* Prevent recursion */
+#undef CREATE_TRACE_POINTS
+
+#include <linux/stringify.h>
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	DEFINE_TRACE(name)
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(name, proto, args, print)	\
+	DEFINE_TRACE(name)
+
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)	\
+	DEFINE_TRACE(name)
+
+#undef TRACE_INCLUDE
+#undef __TRACE_INCLUDE
+
+#ifndef TRACE_INCLUDE_FILE
+# define TRACE_INCLUDE_FILE TRACE_SYSTEM
+# define UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifndef TRACE_INCLUDE_PATH
+# define __TRACE_INCLUDE(system) <trace/system.h>
+# define UNDEF_TRACE_INCLUDE_FILE
+#else
+# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
+#endif
+
+# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
+
+/* Let the trace headers be reread */
+#define TRACE_HEADER_MULTI_READ
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef TRACE_HEADER_MULTI_READ
+
+/* Only undef what we defined in this file */
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+/* We may be processing more files */
+#define CREATE_TRACE_POINTS
+
+#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/irq.h b/include/trace/irq.h
index 04ab4c6..75e3468 100644
--- a/include/trace/irq.h
+++ b/include/trace/irq.h
@@ -51,4 +51,7 @@ TRACE_FORMAT(softirq_exit,
 	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
 	);
 
-#endif
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
index d7d1218..c22c42f 100644
--- a/include/trace/kmem.h
+++ b/include/trace/kmem.h
@@ -188,5 +188,7 @@ TRACE_EVENT(kmem_cache_free,
 
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
+#endif /* _TRACE_KMEM_H */
 
-#endif
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
index 8ee7900..4d301e7 100644
--- a/include/trace/lockdep.h
+++ b/include/trace/lockdep.h
@@ -55,3 +55,6 @@ TRACE_EVENT(lock_acquired,
 #endif
 
 #endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/sched.h b/include/trace/sched.h
index 5b1cf4a..ffa1cab 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -334,3 +334,6 @@ TRACE_EVENT(sched_signal_send,
 );
 
 #endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/skb.h b/include/trace/skb.h
index e6fd281..1e8fabb 100644
--- a/include/trace/skb.h
+++ b/include/trace/skb.h
@@ -35,3 +35,6 @@ TRACE_EVENT(kfree_skb,
 );
 
 #endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3..2fe9d2c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,10 +56,6 @@
 #include <asm/mmu_context.h>
 #include "cred-internals.h"
 
-DEFINE_TRACE(sched_process_free);
-DEFINE_TRACE(sched_process_exit);
-DEFINE_TRACE(sched_process_wait);
-
 static void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..4bebf26 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -83,8 +83,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
-DEFINE_TRACE(sched_process_fork);
-
 int nr_processes(void)
 {
 	int cpu;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d82142b..983d8be 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,9 +17,11 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <trace/irq.h>
 #include <linux/bootmem.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/irq.h>
+
 #include "internals.h"
 
 /*
@@ -348,9 +350,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
 	       "but no thread function available.", irq, action->name);
 }
 
-DEFINE_TRACE(irq_handler_entry);
-DEFINE_TRACE(irq_handler_exit);
-
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ebaf85..e1c7692 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-DEFINE_TRACE(sched_kthread_stop);
-DEFINE_TRACE(sched_kthread_stop_ret);
-
 struct kthread_create_info
 {
 	/* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c4582a6..257f21a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -42,12 +42,14 @@
 #include <linux/hash.h>
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
-#include <trace/lockdep.h>
 
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/lockdep.h>
+
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
 module_param(prove_locking, int, 0644);
@@ -2929,8 +2931,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
 }
 EXPORT_SYMBOL_GPL(lock_set_class);
 
-DEFINE_TRACE(lock_acquire);
-
 /*
  * We are not always called with irqs disabled - do that here,
  * and also avoid lockdep recursion:
@@ -2957,8 +2957,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 }
 EXPORT_SYMBOL_GPL(lock_acquire);
 
-DEFINE_TRACE(lock_release);
-
 void lock_release(struct lockdep_map *lock, int nested,
 			  unsigned long ip)
 {
@@ -3061,8 +3059,6 @@ found_it:
 	put_lock_stats(stats);
 }
 
-DEFINE_TRACE(lock_acquired);
-
 static void
 __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
@@ -3118,8 +3114,6 @@ found_it:
 	lock->ip = ip;
 }
 
-DEFINE_TRACE(lock_contended);
-
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index 5724508..e6d4518 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -72,13 +72,15 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/sched.h>
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
-DEFINE_TRACE(sched_wait_task);
-DEFINE_TRACE(sched_wakeup);
-DEFINE_TRACE(sched_wakeup_new);
-DEFINE_TRACE(sched_switch);
-DEFINE_TRACE(sched_migrate_task);
-
 #ifdef CONFIG_SMP
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
diff --git a/kernel/signal.c b/kernel/signal.c
index d803473..1d5703f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,8 +41,6 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
-DEFINE_TRACE(sched_signal_send);
-
 static void __user *sig_handler(struct task_struct *t, int sig)
 {
 	return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2fecefa..a2d9b45 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -186,9 +186,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
  */
 #define MAX_SOFTIRQ_RESTART 10
 
-DEFINE_TRACE(softirq_entry);
-DEFINE_TRACE(softirq_exit);
-
 asmlinkage void __do_softirq(void)
 {
 	struct softirq_action *h;
diff --git a/mm/util.c b/mm/util.c
index 2599e83..0e74a22 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/sched.h>
-#include <linux/tracepoint.h>
 #include <asm/uaccess.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/kmem.h>
+
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
@@ -239,13 +241,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
 /* Tracepoints definitions. */
-DEFINE_TRACE(kmalloc);
-DEFINE_TRACE(kmem_cache_alloc);
-DEFINE_TRACE(kmalloc_node);
-DEFINE_TRACE(kmem_cache_alloc_node);
-DEFINE_TRACE(kfree);
-DEFINE_TRACE(kmem_cache_free);
-
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb456..8017720 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,11 @@
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
-#include <trace/skb.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/skb.h>
 
-DEFINE_TRACE(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
-- 
cgit v0.10.2


From 9504504cbab29ecb694186b1c5b15d3579c43c51 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 11 Apr 2009 12:59:57 -0400
Subject: tracing: make trace_seq operations available for core kernel

In the process to make TRACE_EVENT macro work for modules, the trace_seq
operations must be available for core kernel code.

These operations are quite useful and can be used for other implementations.

The main idea is that we create a trace_seq handle that acts very much
like the seq_file handle.

	struct trace_seq *s = kmalloc(sizeof(*s, GFP_KERNEL);

	trace_seq_init(s);
	trace_seq_printf(s, "some data %d\n", variable);

	printk("%s", s->buffer);

The main use is to allow a top level function call several other functions
that may store printf like data into the buffer. Then at the end, the top
level function can process all the data with any method it would like to.
It could be passed to userspace, output via printk or even use seq_file:

	trace_seq_to_user(s, ubuf, cnt);
	seq_puts(m, s->buffer);

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644
index 0000000..28051da
--- /dev/null
+++ b/include/linux/trace_seq.h
@@ -0,0 +1,89 @@
+#ifndef _LINUX_TRACE_SEQ_H
+#define _LINUX_TRACE_SEQ_H
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE.
+ */
+
+struct trace_seq {
+	unsigned char		buffer[PAGE_SIZE];
+	unsigned int		len;
+	unsigned int		readpos;
+};
+
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/*
+ * Currently only defined when tracing is enabled.
+ */
+#ifdef CONFIG_TRACING
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt);
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				size_t len);
+extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+extern int trace_seq_path(struct trace_seq *s, struct path *path);
+
+#else /* CONFIG_TRACING */
+static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)))
+{
+	return 0;
+}
+static inline int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+	return 0;
+}
+
+static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+}
+static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt)
+{
+	return 0;
+}
+static inline int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	return 0;
+}
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c);
+{
+	return 0;
+}
+static inline int
+trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+{
+	return 0;
+}
+static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				       size_t len)
+{
+	return 0;
+}
+static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+	return NULL;
+}
+static inline int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	return 0;
+}
+#endif /* CONFIG_TRACING */
+
+#endif /* _LINUX_TRACE_SEQ_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b05b6ac..1882846 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -12,6 +12,8 @@
 #include <linux/kmemtrace.h>
 #include <trace/power.h>
 
+#include <linux/trace_seq.h>
+
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
@@ -423,19 +425,6 @@ struct tracer {
 	struct tracer_stat	*stats;
 };
 
-struct trace_seq {
-	unsigned char		buffer[PAGE_SIZE];
-	unsigned int		len;
-	unsigned int		readpos;
-};
-
-static inline void
-trace_seq_init(struct trace_seq *s)
-{
-	s->len = 0;
-	s->readpos = 0;
-}
-
 
 #define TRACE_PIPE_ALL_CPU	-1
 
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 9163021..5c7cbfb 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,6 +1,7 @@
 #ifndef __TRACE_EVENTS_H
 #define __TRACE_EVENTS_H
 
+#include <linux/trace_seq.h>
 #include "trace.h"
 
 typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
@@ -20,24 +21,9 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
 trace_print_printk_msg_only(struct trace_iterator *iter);
 
-extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
-
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
-extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-				size_t len);
-extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
-extern int trace_seq_path(struct trace_seq *s, struct path *path);
 extern int seq_print_userip_objs(const struct userstack_entry *entry,
 				 struct trace_seq *s, unsigned long sym_flags);
 extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
-- 
cgit v0.10.2


From 97f2025153499faa17267a0d4e18c7afaf73f39d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 13 Apr 2009 11:20:49 -0400
Subject: tracing/events: move declarations from trace directory to core
 include

In preparation to allowing trace events to happen in modules, we need
to move some of the local declarations in the kernel/trace directory
into include/linux.

This patch simply moves the declarations and performs no context changes.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644
index 0000000..496b76d
--- /dev/null
+++ b/include/linux/ftrace_event.h
@@ -0,0 +1,146 @@
+#ifndef _LINUX_FTRACE_EVENT_H
+#define _LINUX_FTRACE_EVENT_H
+
+#include <linux/trace_seq.h>
+#include <linux/ring_buffer.h>
+
+
+struct trace_array;
+struct tracer;
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned char		type;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+	int			tgid;
+};
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+	struct trace_array	*tr;
+	struct tracer		*trace;
+	void			*private;
+	int			cpu_file;
+	struct mutex		mutex;
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+
+	/* The below is zeroed out in pipe_read */
+	struct trace_seq	seq;
+	struct trace_entry	*ent;
+	int			cpu;
+	u64			ts;
+
+	unsigned long		iter_flags;
+	loff_t			pos;
+	long			idx;
+
+	cpumask_var_t		started;
+};
+
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+					      int flags);
+struct trace_event {
+	struct hlist_node	node;
+	int			type;
+	trace_print_func	trace;
+	trace_print_func	raw;
+	trace_print_func	hex;
+	trace_print_func	binary;
+};
+
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
+	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
+};
+
+
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+				  unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
+
+void tracing_record_cmdline(struct task_struct *tsk);
+
+struct ftrace_event_call {
+	char			*name;
+	char			*system;
+	struct dentry		*dir;
+	int			enabled;
+	int			(*regfunc)(void);
+	void			(*unregfunc)(void);
+	int			id;
+	int			(*raw_init)(void);
+	int			(*show_format)(struct trace_seq *s);
+	int			(*define_fields)(void);
+	struct list_head	fields;
+	int			n_preds;
+	struct filter_pred	**preds;
+
+#ifdef CONFIG_EVENT_PROFILE
+	atomic_t	profile_count;
+	int		(*profile_enable)(struct ftrace_event_call *);
+	void		(*profile_disable)(struct ftrace_event_call *);
+#endif
+};
+
+#define MAX_FILTER_PRED		8
+#define MAX_FILTER_STR_VAL	128
+
+extern int init_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_current_check_discard(struct ftrace_event_call *call,
+					void *rec,
+					struct ring_buffer_event *event);
+
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+			      char *name, int offset, int size);
+
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)				\
+do {									\
+	__trace_printk_check_format(fmt, ##args);			\
+	tracing_record_cmdline(current);				\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt			\
+		  __attribute__((section("__trace_printk_fmt"))) =	\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
+	} else								\
+		__trace_printk(ip, fmt, ##args);			\
+} while (0)
+
+#define __common_field(type, item)					\
+	ret = trace_define_field(event_call, #type, "common_" #item,	\
+				 offsetof(typeof(field.ent), item),	\
+				 sizeof(field.ent.item));		\
+	if (ret)							\
+		return ret;
+
+#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1882846..6bcdf4a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,6 +13,7 @@
 #include <trace/power.h>
 
 #include <linux/trace_seq.h>
+#include <linux/ftrace_event.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -44,20 +45,6 @@ enum trace_type {
 };
 
 /*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-	unsigned char		type;
-	unsigned char		flags;
-	unsigned char		preempt_count;
-	int			pid;
-	int			tgid;
-};
-
-/*
  * Function trace entry - function address and parent function addres:
  */
 struct ftrace_entry {
@@ -265,8 +252,6 @@ struct trace_array_cpu {
 	char			comm[TASK_COMM_LEN];
 };
 
-struct trace_iterator;
-
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -341,15 +326,6 @@ extern void __ftrace_bad_type(void);
 		__ftrace_bad_type();					\
 	} while (0)
 
-/* Return values for print_line callback */
-enum print_line_t {
-	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
-	TRACE_TYPE_HANDLED	= 1,
-	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
-	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
-};
-
-
 /*
  * An option specific to a tracer. This is a boolean value.
  * The bit is the bit index that sets its value on the
@@ -428,31 +404,6 @@ struct tracer {
 
 #define TRACE_PIPE_ALL_CPU	-1
 
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-	struct trace_array	*tr;
-	struct tracer		*trace;
-	void			*private;
-	int			cpu_file;
-	struct mutex		mutex;
-	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
-
-	/* The below is zeroed out in pipe_read */
-	struct trace_seq	seq;
-	struct trace_entry	*ent;
-	int			cpu;
-	u64			ts;
-
-	unsigned long		iter_flags;
-	loff_t			pos;
-	long			idx;
-
-	cpumask_var_t		started;
-};
-
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
@@ -479,15 +430,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
 
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
-				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
-
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 
@@ -510,7 +452,6 @@ void tracing_sched_switch_trace(struct trace_array *tr,
 				struct task_struct *prev,
 				struct task_struct *next,
 				unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct task_struct *wakee,
@@ -790,28 +731,6 @@ struct ftrace_event_field {
 	int			size;
 };
 
-struct ftrace_event_call {
-	char			*name;
-	char			*system;
-	struct dentry		*dir;
-	int			enabled;
-	int			(*regfunc)(void);
-	void			(*unregfunc)(void);
-	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct trace_seq *s);
-	int			(*define_fields)(void);
-	struct list_head	fields;
-	int			n_preds;
-	struct filter_pred	**preds;
-
-#ifdef CONFIG_EVENT_PROFILE
-	atomic_t	profile_count;
-	int		(*profile_enable)(struct ftrace_event_call *);
-	void		(*profile_disable)(struct ftrace_event_call *);
-#endif
-};
-
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
@@ -825,9 +744,6 @@ struct event_subsystem {
 	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
 	     event++)
 
-#define MAX_FILTER_PRED		8
-#define MAX_FILTER_STR_VAL	128
-
 struct filter_pred;
 
 typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
@@ -845,9 +761,6 @@ struct filter_pred {
 	int clear;
 };
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size);
-extern int init_preds(struct ftrace_event_call *call);
 extern void filter_free_pred(struct filter_pred *pred);
 extern void filter_print_preds(struct filter_pred **preds, int n_preds,
 			       struct trace_seq *s);
@@ -855,13 +768,9 @@ extern int filter_parse(char **pbuf, struct filter_pred *pred);
 extern int filter_add_pred(struct ftrace_event_call *call,
 			   struct filter_pred *pred);
 extern void filter_disable_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern void filter_free_subsystem_preds(struct event_subsystem *system);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
 				     struct filter_pred *pred);
-extern int filter_current_check_discard(struct ftrace_event_call *call,
-					void *rec,
-					struct ring_buffer_event *event);
 
 static inline int
 filter_check_discard(struct ftrace_event_call *call, void *rec,
@@ -876,14 +785,6 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
-#define __common_field(type, item)					\
-	ret = trace_define_field(event_call, #type, "common_" #item,	\
-				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
-	if (ret)							\
-		return ret;
-
-void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
 
@@ -895,25 +796,6 @@ extern struct ftrace_event_call __stop_ftrace_events[];
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement optimizing out.
- */
-#define event_trace_printk(ip, fmt, args...)				\
-do {									\
-	__trace_printk_check_format(fmt, ##args);			\
-	tracing_record_cmdline(current);				\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
-									\
-		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
-	} else								\
-		__trace_printk(ip, fmt, ##args);			\
-} while (0)
-
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
 	extern struct ftrace_event_call event_##call;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 5c7cbfb..6e220a8 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -4,18 +4,6 @@
 #include <linux/trace_seq.h>
 #include "trace.h"
 
-typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
-					      int flags);
-
-struct trace_event {
-	struct hlist_node	node;
-	int			type;
-	trace_print_func	trace;
-	trace_print_func	raw;
-	trace_print_func	hex;
-	trace_print_func	binary;
-};
-
 extern enum print_line_t
 trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
@@ -33,8 +21,6 @@ extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
 extern struct trace_event *ftrace_find_event(int type);
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags);
-- 
cgit v0.10.2


From f42c85e74faa422cf0bc747ed808681145448f88 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 13 Apr 2009 12:25:37 -0400
Subject: tracing/events: move the ftrace event tracing code to core

This patch moves the ftrace creation into include/trace/ftrace.h and
simplifies the work of developers in adding new tracepoints.
Just the act of creating the trace points in include/trace and including
define_trace.h will create the events in the debugfs/tracing/events
directory.

This patch removes the need of include/trace/trace_events.h

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index de9dc7d..980eb66 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -56,6 +56,10 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#ifdef CONFIG_EVENT_TRACER
+#include <trace/ftrace.h>
+#endif
+
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644
index 0000000..955b967
--- /dev/null
+++ b/include/trace/ftrace.h
@@ -0,0 +1,492 @@
+/*
+ * Stage 1 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * struct ftrace_raw_<call> {
+ *	struct trace_entry		ent;
+ *	<type>				<item>;
+ *	<type2>				<item2>[<len>];
+ *	[...]
+ * };
+ *
+ * The <type> <item> is created by the __field(type, item) macro or
+ * the __array(type2, item2, len) macro.
+ * We simply do "type item;", and that will create the fields
+ * in the structure.
+ */
+
+#include <linux/ftrace_event.h>
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(call, proto, args, fmt)
+
+#undef __array
+#define __array(type, item, len)	type	item[len];
+
+#undef __field
+#define __field(type, item)		type	item;
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {				\
+		struct trace_entry	ent;			\
+		tstruct						\
+	};							\
+	static struct ftrace_event_call event_##name
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 2 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * enum print_line_t
+ * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
+ * {
+ *	struct trace_seq *s = &iter->seq;
+ *	struct ftrace_raw_<call> *field; <-- defined in stage 1
+ *	struct trace_entry *entry;
+ *	int ret;
+ *
+ *	entry = iter->ent;
+ *
+ *	if (entry->type != event_<call>.id) {
+ *		WARN_ON_ONCE(1);
+ *		return TRACE_TYPE_UNHANDLED;
+ *	}
+ *
+ *	field = (typeof(field))entry;
+ *
+ *	ret = trace_seq_printf(s, <TP_printk> "\n");
+ *	if (!ret)
+ *		return TRACE_TYPE_PARTIAL_LINE;
+ *
+ *	return TRACE_TYPE_HANDLED;
+ * }
+ *
+ * This is the method used to print the raw event to the trace
+ * output format. Note, this is not needed if the data is read
+ * in binary.
+ */
+
+#undef __entry
+#define __entry field
+
+#undef TP_printk
+#define TP_printk(fmt, args...) fmt "\n", args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+enum print_line_t							\
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##call *field;				\
+	struct trace_entry *entry;					\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_##call.id) {				\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	ret = trace_seq_printf(s, #call ": " print);			\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+	
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *	struct ftrace_raw_##call field;
+ *	int ret;
+ *
+ *	ret = trace_seq_printf(s, #type " " #item ";"
+ *			       " offset:%u; size:%u;\n",
+ *			       offsetof(struct ftrace_raw_##call, item),
+ *			       sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)					\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __array
+#define __array(type, item, len)						\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __entry
+#define __entry REC
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct ftrace_raw_##call field;					\
+	int ret;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef __field
+#define __field(type, item)						\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item));			\
+	if (ret)							\
+		return ret;
+
+#undef __array
+#define __array(type, item, len)					\
+	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item));			\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_raw_##call field;					\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	int ret;							\
+									\
+	__common_field(unsigned char, type);				\
+	__common_field(unsigned char, flags);				\
+	__common_field(unsigned char, preempt_count);			\
+	__common_field(int, pid);					\
+	__common_field(int, tgid);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * static void ftrace_event_<call>(proto)
+ * {
+ *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
+ * }
+ *
+ * static int ftrace_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to  <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_event_<call>);
+ * }
+ *
+ * For those macros defined with TRACE_FORMAT:
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *	.name			= "<call>",
+ *	.regfunc		= ftrace_reg_event_<call>,
+ *	.unregfunc		= ftrace_unreg_event_<call>,
+ * }
+ *
+ *
+ * For those macros defined with TRACE_EVENT:
+ *
+ * static struct ftrace_event_call event_<call>;
+ *
+ * static void ftrace_raw_event_<call>(proto)
+ * {
+ *	struct ring_buffer_event *event;
+ *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
+ *	unsigned long irq_flags;
+ *	int pc;
+ *
+ *	local_save_flags(irq_flags);
+ *	pc = preempt_count();
+ *
+ *	event = trace_current_buffer_lock_reserve(event_<call>.id,
+ *				  sizeof(struct ftrace_raw_<call>),
+ *				  irq_flags, pc);
+ *	if (!event)
+ *		return;
+ *	entry	= ring_buffer_event_data(event);
+ *
+ *	<assign>;  <-- Here we assign the entries by the __field and
+ *			__array macros.
+ *
+ *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
+ * }
+ *
+ * static int ftrace_raw_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_raw_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_raw_event_<call>);
+ * }
+ *
+ * static struct trace_event ftrace_event_type_<call> = {
+ *	.trace			= ftrace_raw_output_<call>, <-- stage 2
+ * };
+ *
+ * static int ftrace_raw_init_event_<call>(void)
+ * {
+ *	int id;
+ *
+ *	id = register_ftrace_event(&ftrace_event_type_<call>);
+ *	if (!id)
+ *		return -ENODEV;
+ *	event_<call>.id = id;
+ *	return 0;
+ * }
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *	.name			= "<call>",
+ *	.system			= "<system>",
+ *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.regfunc		= ftrace_reg_event_<call>,
+ *	.unregfunc		= ftrace_unreg_event_<call>,
+ *	.show_format		= ftrace_format_<call>,
+ * }
+ *
+ */
+
+#undef TP_FMT
+#define TP_FMT(fmt, args...)	fmt "\n", ##args
+
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args)				\
+static void ftrace_profile_##call(proto)				\
+{									\
+	extern void perf_tpcounter_event(int);				\
+	perf_tpcounter_event(event_##call.id);				\
+}									\
+									\
+static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
+{									\
+	int ret = 0;							\
+									\
+	if (!atomic_inc_return(&call->profile_count))			\
+		ret = register_trace_##call(ftrace_profile_##call);	\
+									\
+	return ret;							\
+}									\
+									\
+static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
+{									\
+	if (atomic_add_negative(-1, &call->profile_count))		\
+		unregister_trace_##call(ftrace_profile_##call);		\
+}
+
+#define _TRACE_PROFILE_INIT(call)					\
+	.profile_count = ATOMIC_INIT(-1),				\
+	.profile_enable = ftrace_profile_enable_##call,			\
+	.profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
+#define _TRACE_FORMAT(call, proto, args, fmt)				\
+static void ftrace_event_##call(proto)					\
+{									\
+	event_trace_printk(_RET_IP_, #call ": " fmt);			\
+}									\
+									\
+static int ftrace_reg_event_##call(void)				\
+{									\
+	int ret;							\
+									\
+	ret = register_trace_##call(ftrace_event_##call);		\
+	if (ret)							\
+		pr_info("event trace: Could not activate trace point "	\
+			"probe to " #call "\n");			\
+	return ret;							\
+}									\
+									\
+static void ftrace_unreg_event_##call(void)				\
+{									\
+	unregister_trace_##call(ftrace_event_##call);			\
+}									\
+									\
+static struct ftrace_event_call event_##call;				\
+									\
+static int ftrace_init_event_##call(void)				\
+{									\
+	int id;								\
+									\
+	id = register_ftrace_event(NULL);				\
+	if (!id)							\
+		return -ENODEV;						\
+	event_##call.id = id;						\
+	return 0;							\
+}
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(call, proto, args, fmt)				\
+_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_init_event_##call,		\
+	.regfunc		= ftrace_reg_event_##call,		\
+	.unregfunc		= ftrace_unreg_event_##call,		\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#undef __entry
+#define __entry entry
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
+									\
+static struct ftrace_event_call event_##call;				\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	struct ftrace_event_call *call = &event_##call;			\
+	struct ring_buffer_event *event;				\
+	struct ftrace_raw_##call *entry;				\
+	unsigned long irq_flags;					\
+	int pc;								\
+									\
+	local_save_flags(irq_flags);					\
+	pc = preempt_count();						\
+									\
+	event = trace_current_buffer_lock_reserve(event_##call.id,	\
+				  sizeof(struct ftrace_raw_##call),	\
+				  irq_flags, pc);			\
+	if (!event)							\
+		return;							\
+	entry	= ring_buffer_event_data(event);			\
+									\
+	assign;								\
+									\
+	if (!filter_current_check_discard(call, entry, event))		\
+		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+}									\
+									\
+static int ftrace_raw_reg_event_##call(void)				\
+{									\
+	int ret;							\
+									\
+	ret = register_trace_##call(ftrace_raw_event_##call);		\
+	if (ret)							\
+		pr_info("event trace: Could not activate trace point "	\
+			"probe to " #call "\n");			\
+	return ret;							\
+}									\
+									\
+static void ftrace_raw_unreg_event_##call(void)				\
+{									\
+	unregister_trace_##call(ftrace_raw_event_##call);		\
+}									\
+									\
+static struct trace_event ftrace_event_type_##call = {			\
+	.trace			= ftrace_raw_output_##call,		\
+};									\
+									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	int id;								\
+									\
+	id = register_ftrace_event(&ftrace_event_type_##call);		\
+	if (!id)							\
+		return -ENODEV;						\
+	event_##call.id = id;						\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
+	return 0;							\
+}									\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644
index 13d6b85..0000000
--- a/include/trace/trace_events.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* trace/<type>.h here */
-
-#include <trace/sched.h>
-#include <trace/irq.h>
-#include <trace/lockdep.h>
-#include <trace/skb.h>
-#include <trace/kmem.h>
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 3ad367e..fb9d7f9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events.o
-obj-$(CONFIG_EVENT_TRACER) += events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644
index 5a35a91..0000000
--- a/kernel/trace/events.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * This is the place to register all trace points as events.
- */
-
-#include <linux/stringify.h>
-
-#include <trace/trace_events.h>
-
-#include "trace_output.h"
-
-#define TRACE_HEADER_MULTI_READ
-#include "trace_events_stage_1.h"
-#include "trace_events_stage_2.h"
-#include "trace_events_stage_3.h"
-
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644
index 475f46a..0000000
--- a/kernel/trace/trace_events_stage_1.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Stage 1 of the trace events.
- *
- * Override the macros in <trace/trace_events.h> to include the following:
- *
- * struct ftrace_raw_<call> {
- *	struct trace_entry		ent;
- *	<type>				<item>;
- *	<type2>				<item2>[<len>];
- *	[...]
- * };
- *
- * The <type> <item> is created by the __field(type, item) macro or
- * the __array(type2, item2, len) macro.
- * We simply do "type item;", and that will create the fields
- * in the structure.
- */
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
-#undef __array
-#define __array(type, item, len)	type	item[len];
-
-#undef __field
-#define __field(type, item)		type	item;
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-	};							\
-	static struct ftrace_event_call event_##name
-
-#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644
index aa4a67a..0000000
--- a/kernel/trace/trace_events_stage_2.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Stage 2 of the trace events.
- *
- * Override the macros in <trace/trace_events.h> to include the following:
- *
- * enum print_line_t
- * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
- * {
- *	struct trace_seq *s = &iter->seq;
- *	struct ftrace_raw_<call> *field; <-- defined in stage 1
- *	struct trace_entry *entry;
- *	int ret;
- *
- *	entry = iter->ent;
- *
- *	if (entry->type != event_<call>.id) {
- *		WARN_ON_ONCE(1);
- *		return TRACE_TYPE_UNHANDLED;
- *	}
- *
- *	field = (typeof(field))entry;
- *
- *	ret = trace_seq_printf(s, <TP_printk> "\n");
- *	if (!ret)
- *		return TRACE_TYPE_PARTIAL_LINE;
- *
- *	return TRACE_TYPE_HANDLED;
- * }
- *
- * This is the method used to print the raw event to the trace
- * output format. Note, this is not needed if the data is read
- * in binary.
- */
-
-#undef __entry
-#define __entry field
-
-#undef TP_printk
-#define TP_printk(fmt, args...) fmt "\n", args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-enum print_line_t							\
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
-{									\
-	struct trace_seq *s = &iter->seq;				\
-	struct ftrace_raw_##call *field;				\
-	struct trace_entry *entry;					\
-	int ret;							\
-									\
-	entry = iter->ent;						\
-									\
-	if (entry->type != event_##call.id) {				\
-		WARN_ON_ONCE(1);					\
-		return TRACE_TYPE_UNHANDLED;				\
-	}								\
-									\
-	field = (typeof(field))entry;					\
-									\
-	ret = trace_seq_printf(s, #call ": " print);			\
-	if (!ret)							\
-		return TRACE_TYPE_PARTIAL_LINE;				\
-									\
-	return TRACE_TYPE_HANDLED;					\
-}
-	
-#include <trace/trace_events.h>
-
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-static int								\
-ftrace_format_##call(struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field;					\
-	int ret;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_events.h>
-
-#undef __field
-#define __field(type, item)						\
-	ret = trace_define_field(event_call, #type, #item,		\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#undef __array
-#define __array(type, item, len)					\
-	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-int									\
-ftrace_define_fields_##call(void)					\
-{									\
-	struct ftrace_raw_##call field;					\
-	struct ftrace_event_call *event_call = &event_##call;		\
-	int ret;							\
-									\
-	__common_field(unsigned char, type);				\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
-									\
-	tstruct;							\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644
index 45c04e1..0000000
--- a/kernel/trace/trace_events_stage_3.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Stage 3 of the trace events.
- *
- * Override the macros in <trace/trace_events.h> to include the following:
- *
- * static void ftrace_event_<call>(proto)
- * {
- *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
- * }
- *
- * static int ftrace_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to  <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_event_<call>);
- * }
- *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- * }
- *
- *
- * For those macros defined with TRACE_EVENT:
- *
- * static struct ftrace_event_call event_<call>;
- *
- * static void ftrace_raw_event_<call>(proto)
- * {
- *	struct ring_buffer_event *event;
- *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
- *	unsigned long irq_flags;
- *	int pc;
- *
- *	local_save_flags(irq_flags);
- *	pc = preempt_count();
- *
- *	event = trace_current_buffer_lock_reserve(event_<call>.id,
- *				  sizeof(struct ftrace_raw_<call>),
- *				  irq_flags, pc);
- *	if (!event)
- *		return;
- *	entry	= ring_buffer_event_data(event);
- *
- *	<assign>;  <-- Here we assign the entries by the __field and
- *			__array macros.
- *
- *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
- * }
- *
- * static int ftrace_raw_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_raw_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
- * static struct trace_event ftrace_event_type_<call> = {
- *	.trace			= ftrace_raw_output_<call>, <-- stage 2
- * };
- *
- * static int ftrace_raw_init_event_<call>(void)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- *	.show_format		= ftrace_format_<call>,
- * }
- *
- */
-
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
-#ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)				\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int);				\
-	perf_tpcounter_event(event_##call.id);				\
-}									\
-									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
-{									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&call->profile_count))			\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
-}									\
-									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
-{									\
-	if (atomic_add_negative(-1, &call->profile_count))		\
-		unregister_trace_##call(ftrace_profile_##call);		\
-}
-
-#define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
-	.profile_enable = ftrace_profile_enable_##call,			\
-	.profile_disable = ftrace_profile_disable_##call,
-
-#else
-#define _TRACE_PROFILE(call, proto, args)
-#define _TRACE_PROFILE_INIT(call)
-#endif
-
-#define _TRACE_FORMAT(call, proto, args, fmt)				\
-static void ftrace_event_##call(proto)					\
-{									\
-	event_trace_printk(_RET_IP_, #call ": " fmt);			\
-}									\
-									\
-static int ftrace_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_event_##call);			\
-}									\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static int ftrace_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(NULL);				\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	return 0;							\
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)				\
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_init_event_##call,		\
-	.regfunc		= ftrace_reg_event_##call,		\
-	.unregfunc		= ftrace_unreg_event_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#undef __entry
-#define __entry entry
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static void ftrace_raw_event_##call(proto)				\
-{									\
-	struct ftrace_event_call *call = &event_##call;			\
-	struct ring_buffer_event *event;				\
-	struct ftrace_raw_##call *entry;				\
-	unsigned long irq_flags;					\
-	int pc;								\
-									\
-	local_save_flags(irq_flags);					\
-	pc = preempt_count();						\
-									\
-	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				  sizeof(struct ftrace_raw_##call),	\
-				  irq_flags, pc);			\
-	if (!event)							\
-		return;							\
-	entry	= ring_buffer_event_data(event);			\
-									\
-	assign;								\
-									\
-	if (!filter_current_check_discard(call, entry, event))		\
-		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
-}									\
-									\
-static int ftrace_raw_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_raw_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_raw_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_raw_event_##call);		\
-}									\
-									\
-static struct trace_event ftrace_event_type_##call = {			\
-	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	init_preds(&event_##call);					\
-	return 0;							\
-}									\
-									\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
-	.regfunc		= ftrace_raw_reg_event_##call,		\
-	.unregfunc		= ftrace_raw_unreg_event_##call,	\
-	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#include <trace/trace_events.h>
-
-#undef _TRACE_PROFILE
-#undef _TRACE_PROFILE_INIT
-
-- 
cgit v0.10.2


From a59fd6027218bd7c994e39d14afe0242f895144f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 13:52:20 -0400
Subject: tracing/events: convert event call sites to use a link list

Impact: makes it possible to define events in modules

The events are created by reading down the section that they are linked
in by the macros. But this is not scalable to modules. This patch converts
the manipulations to use a global link list, and on boot up it adds
the items in the section to the list.

This change will allow modules to add their tracing events to the list as
well.

Note, this change alone does not permit modules to use the TRACE_EVENT macros,
but the change is needed for them to eventually do so.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 496b76d..1781085 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -83,6 +83,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 struct ftrace_event_call {
+	struct list_head	list;
 	char			*name;
 	char			*system;
 	struct dentry		*dir;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6bcdf4a..8817c18 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -739,11 +739,6 @@ struct event_subsystem {
 	struct filter_pred	**preds;
 };
 
-#define events_for_each(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
-
 struct filter_pred;
 
 typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
@@ -785,13 +780,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
-extern struct ftrace_event_call __start_ftrace_events[];
-extern struct ftrace_event_call __stop_ftrace_events[];
-
-#define for_each_event(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
+extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 199de9c..7bf2ad6 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -11,7 +11,7 @@ int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
 
-	for_each_event(event) {
+	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id)
 			return event->profile_enable(event);
 	}
@@ -23,7 +23,7 @@ void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
 
-	for_each_event(event) {
+	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id)
 			return event->profile_disable(event);
 	}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ead68ac..5c66aaf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -19,6 +19,8 @@
 
 static DEFINE_MUTEX(event_mutex);
 
+LIST_HEAD(ftrace_events);
+
 int trace_define_field(struct ftrace_event_call *call, char *type,
 		       char *name, int offset, int size)
 {
@@ -54,16 +56,14 @@ err:
 
 static void ftrace_clear_events(void)
 {
-	struct ftrace_event_call *call = (void *)__start_ftrace_events;
-
+	struct ftrace_event_call *call;
 
-	while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
+	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (call->enabled) {
 			call->enabled = 0;
 			call->unregfunc();
 		}
-		call++;
 	}
 }
 
@@ -89,7 +89,7 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 
 static int ftrace_set_clr_event(char *buf, int set)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	char *event = NULL, *sub = NULL, *match;
 	int ret = -EINVAL;
 
@@ -118,7 +118,7 @@ static int ftrace_set_clr_event(char *buf, int set)
 	}
 
 	mutex_lock(&event_mutex);
-	for_each_event(call) {
+	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (!call->name || !call->regfunc)
 			continue;
@@ -224,15 +224,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next = call;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
 	for (;;) {
-		if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+		if (list == &ftrace_events)
 			return NULL;
 
+		call = list_entry(list, struct ftrace_event_call, list);
+
 		/*
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
@@ -240,11 +242,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		if (call->regfunc)
 			break;
 
-		call++;
-		next = call;
+		list = list->next;
 	}
 
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
@@ -257,22 +258,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
  retry:
-	if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+	if (list == &ftrace_events)
 		return NULL;
 
+	call = list_entry(list, struct ftrace_event_call, list);
+
 	if (!call->enabled) {
-		call++;
+		list = list->next;
 		goto retry;
 	}
 
-	next = call;
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
@@ -312,7 +314,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
 	if (!ret) {
 		struct seq_file *m = file->private_data;
 
-		m->private = __start_ftrace_events;
+		m->private = ftrace_events.next;
 	}
 	return ret;
 }
@@ -797,9 +799,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 	return 0;
 }
 
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
+
+#define for_each_event(event)						\
+	for (event = __start_ftrace_events;				\
+	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+	     event++)
+
 static __init int event_trace_init(void)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	struct dentry *d_tracer;
 	struct dentry *entry;
 	struct dentry *d_events;
@@ -830,6 +840,7 @@ static __init int event_trace_init(void)
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
+		list_add(&call->list, &ftrace_events);
 		event_create_dir(call, d_events);
 	}
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index de42dad..d30b06b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -223,7 +223,7 @@ oom:
 
 void filter_free_subsystem_preds(struct event_subsystem *system)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	int i;
 
 	if (system->n_preds) {
@@ -234,7 +234,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
 		system->n_preds = 0;
 	}
 
-	events_for_each(call) {
+	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->define_fields)
 			continue;
 
@@ -320,7 +320,7 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 int filter_add_subsystem_pred(struct event_subsystem *system,
 			      struct filter_pred *pred)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 
 	if (system->n_preds && !pred->compound)
 		filter_free_subsystem_preds(system);
@@ -337,7 +337,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 
 	system->preds[system->n_preds] = pred;
 
-	events_for_each(call) {
+	list_for_each_entry(call, &ftrace_events, list) {
 		int err;
 
 		if (!call->define_fields)
-- 
cgit v0.10.2


From 17c873ec280a03894bc718af817f7f24fa787ae1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 18:12:50 -0400
Subject: tracing/events: add export symbols for trace events in modules

Impact: let modules add trace events

The trace event code requires some functions to be exported to allow
modules to use TRACE_EVENT. This patch adds EXPORT_SYMBOL_GPL to the
necessary functions.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c0047fc..2d69b26 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -176,6 +176,7 @@ int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
 {
 	return filter_check_discard(call, rec, global_trace.buffer, event);
 }
+EXPORT_SYMBOL_GPL(filter_current_check_discard);
 
 cycle_t ftrace_now(int cpu)
 {
@@ -886,6 +887,7 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
 	return trace_buffer_lock_reserve(&global_trace,
 					 type, len, flags, pc);
 }
+EXPORT_SYMBOL(trace_current_buffer_lock_reserve);
 
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
@@ -903,6 +905,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
 {
 	ring_buffer_discard_commit(global_trace.buffer, event);
 }
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
 
 void
 trace_function(struct trace_array *tr,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5c66aaf..8b9e621 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -53,6 +53,7 @@ err:
 
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(trace_define_field);
 
 static void ftrace_clear_events(void)
 {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d30b06b..f8e5eab 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -110,6 +110,7 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec)
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(filter_match_preds);
 
 void filter_print_preds(struct filter_pred **preds, int n_preds,
 			struct trace_seq *s)
@@ -220,6 +221,7 @@ oom:
 
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(init_preds);
 
 void filter_free_subsystem_preds(struct event_subsystem *system)
 {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 0e70fb0..83a8abb 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -94,6 +94,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_printf);
 
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
@@ -538,6 +539,7 @@ int register_ftrace_event(struct trace_event *event)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_event);
 
 /**
  * unregister_ftrace_event - remove a no longer used event
@@ -551,6 +553,7 @@ int unregister_ftrace_event(struct trace_event *event)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_event);
 
 /*
  * Standard events
-- 
cgit v0.10.2


From 6d723736e472f7a0cd5b62c84152fceead241328 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 14:53:50 -0400
Subject: tracing/events: add support for modules to TRACE_EVENT

Impact: allow modules to add TRACE_EVENTS on load

This patch adds the final hooks to allow modules to use the TRACE_EVENT
macro. A notifier and a data structure are used to link the TRACE_EVENTs
defined in the module to connect them with the ftrace event tracing system.

It also adds the necessary automated clean ups to the trace events when a
module is removed.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 1781085..75f3ac0 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -7,6 +7,7 @@
 
 struct trace_array;
 struct tracer;
+struct dentry;
 
 /*
  * The trace entry - the most basic unit of tracing. This is what
@@ -87,6 +88,7 @@ struct ftrace_event_call {
 	char			*name;
 	char			*system;
 	struct dentry		*dir;
+	struct trace_event	*event;
 	int			enabled;
 	int			(*regfunc)(void);
 	void			(*unregfunc)(void);
@@ -97,6 +99,7 @@ struct ftrace_event_call {
 	struct list_head	fields;
 	int			n_preds;
 	struct filter_pred	**preds;
+	void			*mod;
 
 #ifdef CONFIG_EVENT_PROFILE
 	atomic_t	profile_count;
diff --git a/include/linux/module.h b/include/linux/module.h
index 627ac08..6155fa4 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -337,6 +337,10 @@ struct module
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
 #endif
+#ifdef CONFIG_EVENT_TRACING
+	struct ftrace_event_call *trace_events;
+	unsigned int num_trace_events;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 28051da..15ca2c7 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_TRACE_SEQ_H
 #define _LINUX_TRACE_SEQ_H
 
+#include <linux/fs.h>
+
 /*
  * Trace sequences are used to allow a function to call several other functions
  * to create a string of data to use (up to a max of PAGE_SIZE.
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 955b967..60c5323 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -477,6 +477,7 @@ __attribute__((__aligned__(4)))						\
 __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
 	.raw_init		= ftrace_raw_init_event_##call,		\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
diff --git a/kernel/module.c b/kernel/module.c
index e797812..a039470 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,6 +18,7 @@
 */
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
@@ -2172,6 +2173,12 @@ static noinline struct module *load_module(void __user *umod,
 					sizeof(*mod->tracepoints),
 					&mod->num_tracepoints);
 #endif
+#ifdef CONFIG_EVENT_TRACING
+	mod->trace_events = section_objs(hdr, sechdrs, secstrings,
+					 "_ftrace_events",
+					 sizeof(*mod->trace_events),
+					 &mod->num_trace_events);
+#endif
 
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8b9e621..a4b1777 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -713,7 +713,13 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 		return d_events;
 	}
 
-	system->name = name;
+	system->name = kstrdup(name, GFP_KERNEL);
+	if (!system->name) {
+		debugfs_remove(system->entry);
+		kfree(system);
+		return d_events;
+	}
+
 	list_add(&system->list, &event_subsystems);
 
 	system->preds = NULL;
@@ -738,7 +744,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 	 * If the trace point header did not define TRACE_SYSTEM
 	 * then the system would be called "TRACE_SYSTEM".
 	 */
-	if (strcmp(call->system, "TRACE_SYSTEM") != 0)
+	if (strcmp(call->system, TRACE_SYSTEM) != 0)
 		d_events = event_subsystem_dir(call->system, d_events);
 
 	if (call->raw_init) {
@@ -757,21 +763,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 		return -1;
 	}
 
-	if (call->regfunc) {
-		entry = debugfs_create_file("enable", 0644, call->dir, call,
-					    &ftrace_enable_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/enable' entry\n", call->name);
-	}
+	if (call->regfunc)
+		entry = trace_create_file("enable", 0644, call->dir, call,
+					  &ftrace_enable_fops);
 
-	if (call->id) {
-		entry = debugfs_create_file("id", 0444, call->dir, call,
-				&ftrace_event_id_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs '%s/id' entry\n",
-					call->name);
-	}
+	if (call->id)
+		entry = trace_create_file("id", 0444, call->dir, call,
+					  &ftrace_event_id_fops);
 
 	if (call->define_fields) {
 		ret = call->define_fields();
@@ -780,40 +778,102 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 				   " events/%s\n", call->name);
 			return ret;
 		}
-		entry = debugfs_create_file("filter", 0644, call->dir, call,
-					    &ftrace_event_filter_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/filter' entry\n", call->name);
+		entry = trace_create_file("filter", 0644, call->dir, call,
+					  &ftrace_event_filter_fops);
 	}
 
 	/* A trace may not want to export its format */
 	if (!call->show_format)
 		return 0;
 
-	entry = debugfs_create_file("format", 0444, call->dir, call,
-				    &ftrace_event_format_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'%s/format' entry\n", call->name);
+	entry = trace_create_file("format", 0444, call->dir, call,
+				  &ftrace_event_format_fops);
+
+	return 0;
+}
+
+#define for_each_event(event, start, end)			\
+	for (event = start;					\
+	     (unsigned long)event < (unsigned long)end;		\
+	     event++)
+
+static void trace_module_add_events(struct module *mod)
+{
+	struct ftrace_event_call *call, *start, *end;
+	struct dentry *d_events;
+
+	start = mod->trace_events;
+	end = mod->trace_events + mod->num_trace_events;
+
+	if (start == end)
+		return;
+
+	d_events = event_trace_events_dir();
+	if (!d_events)
+		return;
+
+	for_each_event(call, start, end) {
+		/* The linker may leave blanks */
+		if (!call->name)
+			continue;
+		call->mod = mod;
+		list_add(&call->list, &ftrace_events);
+		event_create_dir(call, d_events);
+	}
+}
+
+static void trace_module_remove_events(struct module *mod)
+{
+	struct ftrace_event_call *call, *p;
+
+	list_for_each_entry_safe(call, p, &ftrace_events, list) {
+		if (call->mod == mod) {
+			if (call->enabled) {
+				call->enabled = 0;
+				call->unregfunc();
+			}
+			if (call->event)
+				unregister_ftrace_event(call->event);
+			debugfs_remove_recursive(call->dir);
+			list_del(&call->list);
+		}
+	}
+}
+
+int trace_module_notify(struct notifier_block *self,
+			unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	mutex_lock(&event_mutex);
+	switch (val) {
+	case MODULE_STATE_COMING:
+		trace_module_add_events(mod);
+		break;
+	case MODULE_STATE_GOING:
+		trace_module_remove_events(mod);
+		break;
+	}
+	mutex_unlock(&event_mutex);
 
 	return 0;
 }
 
+struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
+
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
 
-#define for_each_event(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
-
 static __init int event_trace_init(void)
 {
 	struct ftrace_event_call *call;
 	struct dentry *d_tracer;
 	struct dentry *entry;
 	struct dentry *d_events;
+	int ret;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -837,7 +897,7 @@ static __init int event_trace_init(void)
 	if (!d_events)
 		return 0;
 
-	for_each_event(call) {
+	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
@@ -845,6 +905,10 @@ static __init int event_trace_init(void)
 		event_create_dir(call, d_events);
 	}
 
+	ret = register_module_notifier(&trace_module_nb);
+	if (!ret)
+		pr_warning("Failed to register trace events module notifier\n");
+
 	return 0;
 }
 fs_initcall(event_trace_init);
-- 
cgit v0.10.2


From 19e4529ee7345079eeacc8e40cf69a304a64dc23 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 14 Apr 2009 17:27:18 +1000
Subject: modules: Fix up build when CONFIG_MODULE_UNLOAD=n.

Commit 3d43321b7015387cfebbe26436d0e9d299162ea1 ("modules: sysctl to
block module loading") introduces a modules_disabled variable that is
only defined if CONFIG_MODULE_UNLOAD is enabled, despite being used in
other places. This moves it up and fixes up the build.

  CC      kernel/module.o
kernel/module.c: In function 'sys_init_module':
kernel/module.c:2401: error: 'modules_disabled' undeclared (first use in this function)
kernel/module.c:2401: error: (Each undeclared identifier is reported only once
kernel/module.c:2401: error: for each function it appears in.)
make[1]: *** [kernel/module.o] Error 1
make: *** [kernel/module.o] Error 2

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/kernel/module.c b/kernel/module.c
index eeb3f7b..ee7ab61 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -71,6 +71,9 @@
 static DEFINE_MUTEX(module_mutex);
 static LIST_HEAD(modules);
 
+/* Block module loading/unloading? */
+int modules_disabled = 0;
+
 /* Waiting for a module to finish initializing? */
 static DECLARE_WAIT_QUEUE_HEAD(module_wq);
 
@@ -778,9 +781,6 @@ static void wait_for_zero_refcount(struct module *mod)
 	mutex_lock(&module_mutex);
 }
 
-/* Block module loading/unloading? */
-int modules_disabled = 0;
-
 SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		unsigned int, flags)
 {
-- 
cgit v0.10.2


From 61f919a12fbdc3fd20f980a34a118d597198a392 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 14 Apr 2009 18:22:32 -0400
Subject: tracing/events: fix compile for modules disabled

Impact: compile fix

The addition of TRACE_EVENT for modules breaks the build for when
modules are disabled. This code fixes that.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a4b1777..6591d83 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -797,6 +797,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 	     (unsigned long)event < (unsigned long)end;		\
 	     event++)
 
+#ifdef CONFIG_MODULES
 static void trace_module_add_events(struct module *mod)
 {
 	struct ftrace_event_call *call, *start, *end;
@@ -840,8 +841,8 @@ static void trace_module_remove_events(struct module *mod)
 	}
 }
 
-int trace_module_notify(struct notifier_block *self,
-			unsigned long val, void *data)
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
 {
 	struct module *mod = data;
 
@@ -858,6 +859,13 @@ int trace_module_notify(struct notifier_block *self,
 
 	return 0;
 }
+#else
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
 
 struct notifier_block trace_module_nb = {
 	.notifier_call = trace_module_notify,
-- 
cgit v0.10.2


From ecda8ae02a08ef065ff387f5cb2a2d4999da2408 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 14 Apr 2009 18:49:38 -0400
Subject: tracing/events: fix lockdep system name

Impact: fix compile error of lockdep event tracer

Ingo Molnar pointed out that the system name for the lockdep tracer was "lock"
which is used to include the event trace file name. It should be "lockdep"

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
index 4d301e7..45e326b 100644
--- a/include/trace/lockdep.h
+++ b/include/trace/lockdep.h
@@ -5,7 +5,7 @@
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
+#define TRACE_SYSTEM lockdep
 
 #ifdef CONFIG_LOCKDEP
 
-- 
cgit v0.10.2


From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 14 Apr 2009 19:39:12 -0400
Subject: tracing/events: move trace point headers into include/trace/events

Impact: clean up

Create a sub directory in include/trace called events to keep the
trace point headers in their own separate directory. Only headers that
declare trace points should be defined in this directory.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
index 15c45a2..b616d39 100644
--- a/include/linux/kmemtrace.h
+++ b/include/linux/kmemtrace.h
@@ -9,7 +9,7 @@
 
 #ifdef __KERNEL__
 
-#include <trace/kmem.h>
+#include <trace/events/kmem.h>
 
 #ifdef CONFIG_KMEMTRACE
 extern void kmemtrace_init(void);
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 980eb66..1886941 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -43,7 +43,7 @@
 #endif
 
 #ifndef TRACE_INCLUDE_PATH
-# define __TRACE_INCLUDE(system) <trace/system.h>
+# define __TRACE_INCLUDE(system) <trace/events/system.h>
 # define UNDEF_TRACE_INCLUDE_FILE
 #else
 # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644
index 0000000..75e3468
--- /dev/null
+++ b/include/trace/events/irq.h
@@ -0,0 +1,57 @@
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+/*
+ * Tracepoint for entry of interrupt handler:
+ */
+TRACE_FORMAT(irq_handler_entry,
+	TP_PROTO(int irq, struct irqaction *action),
+	TP_ARGS(irq, action),
+	TP_FMT("irq=%d handler=%s", irq, action->name)
+	);
+
+/*
+ * Tracepoint for return of an interrupt handler:
+ */
+TRACE_EVENT(irq_handler_exit,
+
+	TP_PROTO(int irq, struct irqaction *action, int ret),
+
+	TP_ARGS(irq, action, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq	)
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq	= irq;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("irq=%d return=%s",
+		  __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+TRACE_FORMAT(softirq_entry,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
+
+TRACE_FORMAT(softirq_exit,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644
index 0000000..c22c42f
--- /dev/null
+++ b/include/trace/events/kmem.h
@@ -0,0 +1,194 @@
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KMEM_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+#endif /* _TRACE_KMEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644
index 0000000..45e326b
--- /dev/null
+++ b/include/trace/events/lockdep.h
@@ -0,0 +1,60 @@
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKDEP_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockdep
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_FORMAT(lock_acquire,
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+	TP_FMT("%s%s%s", trylock ? "try " : "",
+		read ? "read " : "", lock->name)
+	);
+
+TRACE_FORMAT(lock_release,
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+	TP_ARGS(lock, nested, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_FORMAT(lock_contended,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+	TP_ARGS(lock, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__entry->name = lock->name;
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
+
+#endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
new file mode 100644
index 0000000..ffa1cab
--- /dev/null
+++ b/include/trace/events/sched.h
@@ -0,0 +1,339 @@
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+		__entry->pid	= t->pid;
+	),
+
+	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+);
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+	TP_PROTO(int ret),
+
+	TP_ARGS(ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->ret	= ret;
+	),
+
+	TP_printk("ret %d", __entry->ret)
+);
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wait_task,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p),
+
+	TP_ARGS(rq, p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->prio	= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for waking up a task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for waking up a new task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup_new,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_switch,
+
+	TP_PROTO(struct rq *rq, struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(rq, prev, next),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	int,	prev_prio			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	int,	next_prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_prio	= prev->prio;
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		__entry->next_prio	= next->prio;
+	),
+
+	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->next_comm, __entry->next_pid, __entry->next_prio)
+);
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+
+	TP_ARGS(p, orig_cpu, dest_cpu),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	orig_cpu		)
+		__field(	int,	dest_cpu		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->orig_cpu	= orig_cpu;
+		__entry->dest_cpu	= dest_cpu;
+	),
+
+	TP_printk("task %s:%d [%d] from: %d  to: %d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->orig_cpu, __entry->dest_cpu)
+);
+
+/*
+ * Tracepoint for freeing a task:
+ */
+TRACE_EVENT(sched_process_free,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a task exiting:
+ */
+TRACE_EVENT(sched_process_exit,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+	TP_PROTO(struct pid *pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->pid		= pid_nr(pid);
+		__entry->prio		= current->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+	TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+	TP_ARGS(parent, child),
+
+	TP_STRUCT__entry(
+		__array(	char,	parent_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	parent_pid			)
+		__array(	char,	child_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	child_pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+		__entry->parent_pid	= parent->pid;
+		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+		__entry->child_pid	= child->pid;
+	),
+
+	TP_printk("parent %s:%d  child %s:%d",
+		__entry->parent_comm, __entry->parent_pid,
+		__entry->child_comm, __entry->child_pid)
+);
+
+/*
+ * Tracepoint for sending a signal:
+ */
+TRACE_EVENT(sched_signal_send,
+
+	TP_PROTO(int sig, struct task_struct *p),
+
+	TP_ARGS(sig, p),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig			)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->sig	= sig;
+	),
+
+	TP_printk("sig: %d  task %s:%d",
+		  __entry->sig, __entry->comm, __entry->pid)
+);
+
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644
index 0000000..1e8fabb
--- /dev/null
+++ b/include/trace/events/skb.h
@@ -0,0 +1,40 @@
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
+
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644
index 75e3468..0000000
--- a/include/trace/irq.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_IRQ_H
-
-#include <linux/tracepoint.h>
-#include <linux/interrupt.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-	TP_PROTO(int irq, struct irqaction *action),
-	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-	TP_PROTO(int irq, struct irqaction *action, int ret),
-
-	TP_ARGS(irq, action, ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	irq	)
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->irq	= irq;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("irq=%d return=%s",
-		  __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-TRACE_FORMAT(softirq_exit,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-#endif /*  _TRACE_IRQ_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
deleted file mode 100644
index c22c42f..0000000
--- a/include/trace/kmem.h
+++ /dev/null
@@ -1,194 +0,0 @@
-#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KMEM_H
-
-#include <linux/types.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kmem
-
-TRACE_EVENT(kmalloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmem_cache_alloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmalloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kmem_cache_alloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kfree,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-
-TRACE_EVENT(kmem_cache_free,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-#endif /* _TRACE_KMEM_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644
index 45e326b..0000000
--- a/include/trace/lockdep.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_LOCKDEP_H
-
-#include <linux/lockdep.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lockdep
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
-
-TRACE_FORMAT(lock_release,
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-TRACE_EVENT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-
-	TP_ARGS(lock, ip, waittime),
-
-	TP_STRUCT__entry(
-		__field(const char *, name)
-		__field(unsigned long, wait_usec)
-		__field(unsigned long, wait_nsec_rem)
-	),
-	TP_fast_assign(
-		__entry->name = lock->name;
-		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
-		__entry->wait_usec = (unsigned long) waittime;
-	),
-	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
-				       __entry->wait_nsec_rem)
-);
-
-#endif
-#endif
-
-#endif /* _TRACE_LOCKDEP_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644
index ffa1cab..0000000
--- a/include/trace/sched.h
+++ /dev/null
@@ -1,339 +0,0 @@
-#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_SCHED_H
-
-#include <linux/sched.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM sched
-
-/*
- * Tracepoint for calling kthread_stop, performed to end a kthread:
- */
-TRACE_EVENT(sched_kthread_stop,
-
-	TP_PROTO(struct task_struct *t),
-
-	TP_ARGS(t),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
-		__entry->pid	= t->pid;
-	),
-
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
-);
-
-/*
- * Tracepoint for the return value of the kthread stopping:
- */
-TRACE_EVENT(sched_kthread_stop_ret,
-
-	TP_PROTO(int ret),
-
-	TP_ARGS(ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->ret	= ret;
-	),
-
-	TP_printk("ret %d", __entry->ret)
-);
-
-/*
- * Tracepoint for waiting on task to unschedule:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wait_task,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p),
-
-	TP_ARGS(rq, p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->prio	= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for waking up a task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for waking up a new task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup_new,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for task switches, performed by the scheduler:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_switch,
-
-	TP_PROTO(struct rq *rq, struct task_struct *prev,
-		 struct task_struct *next),
-
-	TP_ARGS(rq, prev, next),
-
-	TP_STRUCT__entry(
-		__array(	char,	prev_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	prev_pid			)
-		__field(	int,	prev_prio			)
-		__array(	char,	next_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	next_pid			)
-		__field(	int,	next_prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
-		__entry->prev_pid	= prev->pid;
-		__entry->prev_prio	= prev->prio;
-		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
-		__entry->next_pid	= next->pid;
-		__entry->next_prio	= next->prio;
-	),
-
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
-		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->next_comm, __entry->next_pid, __entry->next_prio)
-);
-
-/*
- * Tracepoint for a task being migrated:
- */
-TRACE_EVENT(sched_migrate_task,
-
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
-
-	TP_ARGS(p, orig_cpu, dest_cpu),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	orig_cpu		)
-		__field(	int,	dest_cpu		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
-		__entry->dest_cpu	= dest_cpu;
-	),
-
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->orig_cpu, __entry->dest_cpu)
-);
-
-/*
- * Tracepoint for freeing a task:
- */
-TRACE_EVENT(sched_process_free,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a task exiting:
- */
-TRACE_EVENT(sched_process_exit,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a waiting task:
- */
-TRACE_EVENT(sched_process_wait,
-
-	TP_PROTO(struct pid *pid),
-
-	TP_ARGS(pid),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-		__entry->pid		= pid_nr(pid);
-		__entry->prio		= current->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for do_fork:
- */
-TRACE_EVENT(sched_process_fork,
-
-	TP_PROTO(struct task_struct *parent, struct task_struct *child),
-
-	TP_ARGS(parent, child),
-
-	TP_STRUCT__entry(
-		__array(	char,	parent_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	parent_pid			)
-		__array(	char,	child_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	child_pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
-		__entry->parent_pid	= parent->pid;
-		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
-		__entry->child_pid	= child->pid;
-	),
-
-	TP_printk("parent %s:%d  child %s:%d",
-		__entry->parent_comm, __entry->parent_pid,
-		__entry->child_comm, __entry->child_pid)
-);
-
-/*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-	TP_PROTO(int sig, struct task_struct *p),
-
-	TP_ARGS(sig, p),
-
-	TP_STRUCT__entry(
-		__field(	int,	sig			)
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->sig	= sig;
-	),
-
-	TP_printk("sig: %d  task %s:%d",
-		  __entry->sig, __entry->comm, __entry->pid)
-);
-
-#endif /* _TRACE_SCHED_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644
index 1e8fabb..0000000
--- a/include/trace/skb.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_SKB_H
-
-#include <linux/skbuff.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM skb
-
-/*
- * Tracepoint for free an sk_buff:
- */
-TRACE_EVENT(kfree_skb,
-
-	TP_PROTO(struct sk_buff *skb, void *location),
-
-	TP_ARGS(skb, location),
-
-	TP_STRUCT__entry(
-		__field(	void *,		skbaddr		)
-		__field(	unsigned short,	protocol	)
-		__field(	void *,		location	)
-	),
-
-	TP_fast_assign(
-		__entry->skbaddr = skb;
-		if (skb) {
-			__entry->protocol = ntohs(skb->protocol);
-		}
-		__entry->location = location;
-	),
-
-	TP_printk("skbaddr=%p protocol=%u location=%p",
-		__entry->skbaddr, __entry->protocol, __entry->location)
-);
-
-#endif /* _TRACE_SKB_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 2fe9d2c..cab535c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 4bebf26..085f73e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,6 @@
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
-#include <trace/sched.h>
 #include <linux/magic.h>
 
 #include <asm/pgtable.h>
@@ -71,6 +70,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/sched.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 983d8be..37c6363 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -20,7 +20,7 @@
 #include <linux/bootmem.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/irq.h>
+#include <trace/events/irq.h>
 
 #include "internals.h"
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index e1c7692..41c88fe 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,7 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 257f21a..47b201e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -48,7 +48,7 @@
 #include "lockdep_internals.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/lockdep.h>
+#include <trace/events/lockdep.h>
 
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
diff --git a/kernel/sched.c b/kernel/sched.c
index e6d4518..9f7ffd0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -79,7 +79,7 @@
 #include "sched_cpupri.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
diff --git a/kernel/signal.c b/kernel/signal.c
index 1d5703f..94ec0a4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a2d9b45..7ab9dfd 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,7 +24,7 @@
 #include <linux/ftrace.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
-#include <trace/irq.h>
+#include <trace/events/irq.h>
 
 #include <asm/irq.h>
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8e6a0b5..a234889 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,7 +29,7 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
 
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9d8cccd..a98106d 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -10,7 +10,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5bc00e8f..b8b13c5 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
diff --git a/mm/util.c b/mm/util.c
index 0e74a22..6794a33 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,7 +7,7 @@
 #include <asm/uaccess.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/kmem.h>
+#include <trace/events/kmem.h>
 
 /**
  * kstrdup - allocate space for and copy an existing string
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3..b75b6ce 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -23,7 +23,7 @@
 #include <linux/bitops.h>
 #include <net/genetlink.h>
 
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include <asm/unaligned.h>
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 8017720..499a67e 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -24,6 +24,6 @@
 #include <asm/bitops.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ce6356c..12806b8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,7 +65,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include "kmap_skb.h"
 
-- 
cgit v0.10.2


From 9cfe06f8cd5c8c3ad6ab323973e87dde670642b8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 14 Apr 2009 21:37:03 -0400
Subject: tracing/events: add trace-events-sample

This patch adds a sample to the samples directory on how to create
and use TRACE_EVENT trace points.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/samples/Kconfig b/samples/Kconfig
index 4b02f5a..93f41c0 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -19,6 +19,13 @@ config SAMPLE_TRACEPOINTS
 	help
 	  This build tracepoints example modules.
 
+config SAMPLE_TRACE_EVENTS
+	tristate "Build trace_events examples"
+	depends on EVENT_TRACING
+	default m
+	help
+	  This build trace event example modules.
+
 config SAMPLE_KOBJECT
 	tristate "Build kobject examples"
 	help
diff --git a/samples/Makefile b/samples/Makefile
index 10eaca8..13e4b47 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/
+obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644
index 0000000..06c6dea
--- /dev/null
+++ b/samples/trace_events/Makefile
@@ -0,0 +1,8 @@
+# builds the trace events example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+PWD := $(shell pwd)
+
+CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+
+obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644
index 0000000..f33b3ba
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.c
@@ -0,0 +1,56 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+
+/*
+ * Any file that uses trace points, must include the header.
+ * But only one file, must include the header by defining
+ * CREATE_TRACE_POINTS first.  This will make the C code that
+ * creates the handles for the trace points.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace-events-sample.h"
+
+
+static void simple_thread_func(int cnt)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	schedule_timeout(HZ);
+	trace_foo_bar("hello", cnt);
+
+	if (!(cnt % 10))
+		/* It is really important that I say "hi!" */
+		printk(KERN_EMERG "hi!\n");
+}
+
+static int simple_thread(void *arg)
+{
+	int cnt = 0;
+
+	while (!kthread_should_stop())
+		simple_thread_func(cnt++);
+
+	return 0;
+}
+
+static struct task_struct *simple_tsk;
+
+static int __init trace_event_init(void)
+{
+	simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
+	if (IS_ERR(simple_tsk))
+		return -1;
+
+	return 0;
+}
+
+static void __exit trace_event_exit(void)
+{
+	kthread_stop(simple_tsk);
+}
+
+module_init(trace_event_init);
+module_exit(trace_event_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-events-sample");
+MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644
index 0000000..eab4644
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.h
@@ -0,0 +1,124 @@
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ *  || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENT_SAMPLE_H
+
+/*
+ * All trace headers should include tracepoint.h, until we finally
+ * make it into a standard header.
+ */
+#include <linux/tracepoint.h>
+
+/*
+ * If TRACE_SYSTEM is defined, that will be the directory created
+ * in the ftrace directory under /debugfs/tracing/events/<system>
+ *
+ * The define_trace.h belowe will also look for a file name of
+ * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ *
+ * If you want a different system than file name, you can override
+ * the header name by defining TRACE_INCLUDE_FILE
+ *
+ * If this file was called, goofy.h, then we would define:
+ *
+ * #define TRACE_INCLUDE_FILE goofy
+ *
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM trace-events-sample
+
+/*
+ * The TRACE_EVENT macro is broken up into 5 parts.
+ *
+ * name: name of the trace point. This is also how to enable the tracepoint.
+ *   A function called trace_foo_bar() will be created.
+ *
+ * proto: the prototype of the function trace_foo_bar()
+ *   Here it is trace_foo_bar(char *foo, int bar).
+ *
+ * args:  must match the arguments in the prototype.
+ *    Here it is simply "foo, bar".
+ *
+ * struct:  This defines the way the data will be stored in the ring buffer.
+ *    There are currently two types of elements. __field and __array.
+ *    a __field is broken up into (type, name). Where type can be any
+ *    type but an array.
+ *    For an array. there are three fields. (type, name, size). The
+ *    type of elements in the array, the name of the field and the size
+ *    of the array.
+ *
+ *    __array( char, foo, 10) is the same as saying   char foo[10].
+ *
+ * fast_assign: This is a C like function that is used to store the items
+ *    into the ring buffer.
+ *
+ * printk: This is a way to print out the data in pretty print. This is
+ *    useful if the system crashes and you are logging via a serial line,
+ *    the data can be printed to the console using this "printk" method.
+ *
+ * Note, that for both the assign and the printk, __entry is the handler
+ * to the data structure in the ring buffer, and is defined by the
+ * TP_STRUCT__entry.
+ */
+TRACE_EVENT(foo_bar,
+
+	TP_PROTO(char *foo, int bar),
+
+	TP_ARGS(foo, bar),
+
+	TP_STRUCT__entry(
+		__array(	char,	foo,    10		)
+		__field(	int,	bar			)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->foo, foo, 10);
+		__entry->bar	= bar;
+	),
+
+	TP_printk("foo %s %d", __entry->foo, __entry->bar)
+);
+#endif
+
+/***** NOTICE! The #if protection ends here. *****/
+
+
+/*
+ * There are several ways I could have done this. If I left out the
+ * TRACE_INCLUDE_PATH, then it would default to the kernel source
+ * include/trace/events directory.
+ *
+ * I could specify a path from the define_trace.h file back to this
+ * file.
+ *
+ * #define TRACE_INCLUDE_PATH ../../samples/trace_events
+ *
+ * But I chose to simply make it use the current directory and then in
+ * the Makefile I added:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+ *
+ * This will make sure the current path is part of the include
+ * structure for our file so that we can find it.
+ *
+ * I could have made only the top level directory the include:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)
+ *
+ * And then let the path to this directory be the TRACE_INCLUDE_PATH:
+ *
+ * #define TRACE_INCLUDE_PATH samples/trace_events
+ *
+ * But then if something defines "samples" or "trace_events" then we
+ * could risk that being converted too, and give us an unexpected
+ * result.
+ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
-- 
cgit v0.10.2


From 05725f7eb4b8acb147c5fc7b91397b1f6bcab00d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 14 Apr 2009 20:17:16 +0200
Subject: rculist: use list_entry_rcu in places where it's appropriate

Use previously introduced list_entry_rcu instead of an open-coded
list_entry + rcu_dereference combination.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
LKML-Reference: <20090414181715.GA3634@psychotron.englab.brq.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..886df41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -77,6 +77,7 @@ struct sched_param {
 #include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
 #include <linux/time.h>
@@ -2010,7 +2011,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2049,8 +2051,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-	return list_entry(rcu_dereference(p->thread_group.next),
-			  struct task_struct, thread_group);
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
 }
 
 static inline int thread_group_empty(struct task_struct *p)
diff --git a/ipc/sem.c b/ipc/sem.c
index 16a2189..87c2b64 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk)
 		int i;
 
 		rcu_read_lock();
-		un = list_entry(rcu_dereference(ulp->list_proc.next),
-					struct sem_undo, list_proc);
+		un = list_entry_rcu(ulp->list_proc.next,
+				    struct sem_undo, list_proc);
 		if (&un->list_proc == &ulp->list_proc)
 			semid = -1;
 		 else
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ffbe259..510186f 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
 	 * against concurrent list-extension
 	 */
 	rcu_read_lock();
-	qe = list_entry(rcu_dereference(qe->later.next),
-			struct ima_queue_entry, later);
+	qe = list_entry_rcu(qe->later.next,
+			    struct ima_queue_entry, later);
 	rcu_read_unlock();
 	(*pos)++;
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index e03a7e1..11d2cb1 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
 		return;
 	}
 
-	m = list_entry(rcu_dereference(smk_netlbladdr_list.next),
-			 struct smk_netlbladdr, list);
+	m = list_entry_rcu(smk_netlbladdr_list.next,
+			   struct smk_netlbladdr, list);
 
 	/* the comparison '>' is a bit hacky, but works */
 	if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
@@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
 			list_add_rcu(&new->list, &m->list);
 			return;
 		}
-		m_next = list_entry(rcu_dereference(m->list.next),
-				 struct smk_netlbladdr, list);
+		m_next = list_entry_rcu(m->list.next,
+					struct smk_netlbladdr, list);
 		if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
 			list_add_rcu(&new->list, &m->list);
 			return;
-- 
cgit v0.10.2


From b206525ad1f653b7da35f5827be93770d28eae11 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Tue, 14 Apr 2009 23:04:37 +0530
Subject: x86: k8 convert node_to_k8_nb_misc() from a macro to an inline
 function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Converting node_to_k8_nb_misc() from a macro to an inline function
makes compiler see the 'node' parameter in the !CONFIG_K8_NB too,
which eliminates these compiler warnings:

  arch/x86/kernel/cpu/intel_cacheinfo.c: In function ‘show_cache_disable’:
  arch/x86/kernel/cpu/intel_cacheinfo.c:712: warning: unused variable ‘node’
  arch/x86/kernel/cpu/intel_cacheinfo.c: In function ‘store_cache_disable’:
  arch/x86/kernel/cpu/intel_cacheinfo.c:739: warning: unused variable ‘node’

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Mark Langsdorf <mark.langsdorf@amd.com>
LKML-Reference: <1239730477.2966.26.camel@ht.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index c23b3d1..c2d1f3b 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -13,10 +13,15 @@ extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
 #ifdef CONFIG_K8_NB
-#define node_to_k8_nb_misc(node) \
-	(node < num_k8_northbridges) ? k8_northbridges[node] : NULL
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+	return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
+}
 #else
-#define node_to_k8_nb_misc(node) NULL
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+	return NULL;
+}
 #endif
 
 
-- 
cgit v0.10.2


From e6a1a89d572c31b62d6dcf11a371c7323852d9b2 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 15 Apr 2009 18:22:41 +0900
Subject: dma-debug: add dma_debug_resize_entries() to adjust the number of
 dma_debug_entries

We use a static value for the number of dma_debug_entries. It can be
overwritten by a kernel command line option.

Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel
command line option because they can't know such value until they
finish initializing up their hardware.

This patch adds dma_debug_resize_entries() enables IOMMUs to adjust
the number of dma_debug_entries anytime.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: fujita.tomonori@lab.ntt.co.jp
Cc: akpm@linux-foundation.org
LKML-Reference: <20090415182234R.fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb..171ad8a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
 
 extern void dma_debug_init(u32 num_entries);
 
+extern int dma_debug_resize_entries(u32 num_entries);
+
 extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       size_t offset, size_t size,
 			       int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
 {
 }
 
+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+	return 0;
+}
+
 static inline void debug_dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction, dma_addr_t dma_addr,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d3da7ed..5d61019 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -85,6 +85,7 @@ static u32 show_num_errors = 1;
 
 static u32 num_free_entries;
 static u32 min_free_entries;
+static u32 nr_total_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
 static u32 req_entries;
@@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
 	put_hash_bucket(bucket, &flags);
 }
 
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+	struct dma_debug_entry *entry;
+
+	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+	list_del(&entry->list);
+	memset(entry, 0, sizeof(*entry));
+
+	num_free_entries -= 1;
+	if (num_free_entries < min_free_entries)
+		min_free_entries = num_free_entries;
+
+	return entry;
+}
+
 /* struct dma_entry allocator
  *
  * The next two functions implement the allocator for
@@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 		goto out;
 	}
 
-	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-	list_del(&entry->list);
-	memset(entry, 0, sizeof(*entry));
+	entry = __dma_entry_alloc();
 
 #ifdef CONFIG_STACKTRACE
 	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 	entry->stacktrace.skip = 2;
 	save_stack_trace(&entry->stacktrace);
 #endif
-	num_free_entries -= 1;
-	if (num_free_entries < min_free_entries)
-		min_free_entries = num_free_entries;
 
 out:
 	spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +321,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
 	spin_unlock_irqrestore(&free_entries_lock, flags);
 }
 
+int dma_debug_resize_entries(u32 num_entries)
+{
+	int i, delta, ret = 0;
+	unsigned long flags;
+	struct dma_debug_entry *entry;
+	LIST_HEAD(tmp);
+
+	spin_lock_irqsave(&free_entries_lock, flags);
+
+	if (nr_total_entries < num_entries) {
+		delta = num_entries - nr_total_entries;
+
+		spin_unlock_irqrestore(&free_entries_lock, flags);
+
+		for (i = 0; i < delta; i++) {
+			entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+			if (!entry)
+				break;
+
+			list_add_tail(&entry->list, &tmp);
+		}
+
+		spin_lock_irqsave(&free_entries_lock, flags);
+
+		list_splice(&tmp, &free_entries);
+		nr_total_entries += i;
+		num_free_entries += i;
+	} else {
+		delta = nr_total_entries - num_entries;
+
+		for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+			entry = __dma_entry_alloc();
+			kfree(entry);
+		}
+
+		nr_total_entries -= i;
+	}
+
+	if (nr_total_entries != num_entries)
+		ret = 1;
+
+	spin_unlock_irqrestore(&free_entries_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
 /*
  * DMA-API debugging init code
  *
@@ -490,6 +548,8 @@ void dma_debug_init(u32 num_entries)
 		return;
 	}
 
+	nr_total_entries = num_free_entries;
+
 	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
 }
 
-- 
cgit v0.10.2


From 19c1a6f5764d787113fa323ffb18be7991208f82 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 14 Apr 2009 09:43:19 +0900
Subject: x86 gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG

IOMMU_LEAK, GART's own feature, dumps the used IOMMU entries when
IOMMU entries is full, which might be useful to find a bad driver that
eats IOMMU entries.

DMA_API_DEBUG provides the similar feature, debug_dma_dump_mappings,
and it's better than GART's IOMMU_LEAK feature. GART's IOMMU_LEAK
feature doesn't say who uses IOMMU entries so it's hard to find a bad
driver.

This patch reimplements the GART's IOMMU_LEAK feature by using
DMA_API_DEBUG.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <1239669799-23579-2-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8359e7..5865712 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -161,8 +161,7 @@ config IOMMU_DEBUG
 
 config IOMMU_LEAK
 	bool "IOMMU leak tracing"
-	depends on DEBUG_KERNEL
-	depends on IOMMU_DEBUG
+	depends on IOMMU_DEBUG && DMA_API_DEBUG
 	---help---
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b284b58..1e8920d 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -144,48 +144,21 @@ static void flush_gart(void)
 }
 
 #ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x)							\
-	do {								\
-		if (iommu_leak_tab)					\
-			iommu_leak_tab[x] = __builtin_return_address(0);\
-	} while (0)
-
-#define CLEAR_LEAK(x)							\
-	do {								\
-		if (iommu_leak_tab)					\
-			iommu_leak_tab[x] = NULL;			\
-	} while (0)
-
 /* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
 static int leak_trace;
 static int iommu_leak_pages = 20;
 
 static void dump_leak(void)
 {
-	int i;
 	static int dump;
 
-	if (dump || !iommu_leak_tab)
+	if (dump)
 		return;
 	dump = 1;
-	show_stack(NULL, NULL);
 
-	/* Very crude. dump some from the end of the table too */
-	printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
-	       iommu_leak_pages);
-	for (i = 0; i < iommu_leak_pages; i += 2) {
-		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
-		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
-				0);
-		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
-	}
-	printk(KERN_DEBUG "\n");
+	show_stack(NULL, NULL);
+	debug_dma_dump_mappings(NULL);
 }
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
 #endif
 
 static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 
 	for (i = 0; i < npages; i++) {
 		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-		SET_LEAK(iommu_page + i);
 		phys_mem += PAGE_SIZE;
 	}
 	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
 	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 	for (i = 0; i < npages; i++) {
 		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-		CLEAR_LEAK(iommu_page + i);
 	}
 	free_iommu(iommu_page, npages);
 }
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
 		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
 		while (pages--) {
 			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
-			SET_LEAK(iommu_page);
 			addr += PAGE_SIZE;
 			iommu_page++;
 		}
@@ -801,11 +771,12 @@ void __init gart_iommu_init(void)
 
 #ifdef CONFIG_IOMMU_LEAK
 	if (leak_trace) {
-		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-				  get_order(iommu_pages*sizeof(void *)));
-		if (!iommu_leak_tab)
+		int ret;
+
+		ret = dma_debug_resize_entries(iommu_pages);
+		if (ret)
 			printk(KERN_DEBUG
-			       "PCI-DMA: Cannot allocate leak trace area\n");
+			       "PCI-DMA: Cannot trace all the entries\n");
 	}
 #endif
 
-- 
cgit v0.10.2


From 13318a7186d8e0ae08c996ea4111a945e7789772 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 15 Apr 2009 09:59:10 +0800
Subject: sched: use group_first_cpu() instead of
 cpumask_first(sched_group_cpus())

Impact: cleanup

This patch changes cpumask_first(sched_group_cpus()) to group_first_cpu()
for maintainability.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 92b4b56..7601cee 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7995,7 +7995,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 			struct sched_domain *sd;
 
 			sd = &per_cpu(phys_domains, j).sd;
-			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
+			if (j != group_first_cpu(sd->groups)) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -8073,7 +8073,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
+	if (cpu != group_first_cpu(sd->groups))
 		return;
 
 	child = sd->child;
-- 
cgit v0.10.2


From abe5fa7899fb5809ddc7336d8dd0edd5b2b96665 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 15 Apr 2009 20:45:03 +0800
Subject: crypto: eseqiv - Fix IV generation for sync algorithms

If crypto_ablkcipher_encrypt() returns synchronous,
eseqiv_complete2() is called even if req->giv is already the
pointer to the generated IV. The generated IV is overwritten
with some random data in this case. This patch fixes this by
calling eseqiv_complete2() just if the generated IV has to be
copied to req->giv.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c
index 2a342c8..3ca3b66 100644
--- a/crypto/eseqiv.c
+++ b/crypto/eseqiv.c
@@ -153,7 +153,8 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
 	if (err)
 		goto out;
 
-	eseqiv_complete2(req);
+	if (giv != req->giv)
+		eseqiv_complete2(req);
 
 out:
 	return err;
-- 
cgit v0.10.2


From 77857dc07247ed5fa700a197c96ef842d8dbebdf Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 15 Apr 2009 11:57:01 -0700
Subject: x86: use used_vectors in init_IRQ()

Impact: fix crash with many devices

I found this crash:

[  552.616646] general protection fault: 0403 [#1] SMP
[  552.620013] last sysfs file:
/sys/devices/pci0000:00/0000:00:02.0/usb1/1-1/1-1:1.0/host13/target13:0:0/13:0:0:0/block/sr0/size
[  552.620013] CPU 0
[  552.620013] Modules linked in:
[  552.620013] Pid: 0, comm: swapper Not tainted 2.6.30-rc1-tip-01931-g8fcafd8-dirty #28 Sun Fire X4440
[  552.620013] RIP: 0010:[<ffffffff8023bada>]  [<ffffffff8023bada>] default_idle+0x7d/0xda
[  552.620013] RSP: 0018:ffffffff81345e68  EFLAGS: 00010246
[  552.620013] RAX: 0000000000000000 RBX: ffffffff8133d870 RCX: ffffc20000000000
[  552.620013] RDX: 00000000001d0620 RSI: ffffffff8023bad8 RDI: ffffffff802a3169
[  552.620013] RBP: ffffffff81345e98 R08: 0000000000000000 R09: ffffffff812244a0
[  552.620013] R10: ffffffff81345dc8 R11: 7ebe1b6fa0bcac50 R12: 4ec4ec4ec4ec4ec5
[  552.620013] R13: ffffffff813a54d0 R14: ffffffff813a7a40 R15: 0000000000000000
[  552.620013] FS:  00000000006d1880(0000) GS:ffffc20000000000(0000) knlGS:0000000000000000
[  552.620013] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
[  552.620013] CR2: 00007fec9d936a50 CR3: 000000007d1a9000 CR4: 00000000000006e0
[  552.620013] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  552.620013] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  552.620013] Process swapper (pid: 0, threadinfo ffffffff81344000,task ffffffff812244a0)
[  552.620013] Stack:
[  552.620013]  0000000000000000 ffffc20000000000 00000000001d0620 7ebe1b6fa0bcac50
[  552.620013]  ffffffff8133d870 4ec4ec4ec4ec4ec5 ffffffff81345ec8 ffffffff8023bd84
[  552.620013]  4ec4ec4ec4ec4ec5 ffffffff813a54d0 7ebe1b6fa0bcac50 ffffffff8133d870
[  552.620013] Call Trace:
[  552.620013]  [<ffffffff8023bd84>] c1e_idle+0x109/0x124
[  552.620013]  [<ffffffff8023314b>] cpu_idle+0xb8/0x101
[  552.620013]  [<ffffffff80c16d6a>] rest_init+0x7e/0x94
[  552.620013]  [<ffffffff81357efc>] start_kernel+0x3dc/0x3fd
[  552.620013]  [<ffffffff813572a9>] x86_64_start_reservations+0xb9/0xd4
[  552.620013]  [<ffffffff813573b2>] x86_64_start_kernel+0xee/0x109
[  552.620013] Code: 48 8b 04 25 f8 b4 00 00 83 a0 3c e0 ff ff fb 0f ae f0 65 48 8b 04 25 f8 b4 00 00 f6 80 38 e0 ff ff 08 75 09 e8 71 76 06 00 fb f4 <eb> 06 e8 68 76 06 00 fb 65 48 8b 04 25 f8 b4 00 00 83 88 3c e0
[  552.620013] RIP  [<ffffffff8023bada>] default_idle+0x7d/0xda
[  552.620013]  RSP <ffffffff81345e68>
[  552.828646] ---[ end trace 4cbfc5c01382af7f ]---

Joerg Roedel said
	"The 0403 error code means that there was an external interrupt with vector
	0x80. Yinghai, my theory is that the kernel on this machine has no 32bit
	emulation compiled in, right? In this case the selector points to a zero entry
	which may cause the #gpf right after the hlt.
	But I have no idea where the external int 0x80 comes from"

it turns out that we could use 0x80 for external device on 64-bit
when 32-bit emulation is disabled.

But we forgot to set the gate for it.

try to set gate for it by checking used_vectors.

Also move apic_intr_init() early to avoid setting
that gate two times.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Joerg Roedel <joerg.roedel@amd.com>
LKML-Reference: <49E62DFD.6010904@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index b424c32..2e08b10 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -240,19 +240,19 @@ void __init native_init_IRQ(void)
 	/* Execute any quirks before the call gates are initialised: */
 	x86_quirk_pre_intr_init();
 
+	apic_intr_init();
+
 	/*
 	 * Cover the whole vector space, no vector can escape
 	 * us. (some of these will be overridden and become
 	 * 'special' SMP interrupts)
 	 */
 	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-		/* IA32_SYSCALL_VECTOR was reserved in trap_init. */
-		if (i != IA32_SYSCALL_VECTOR)
+		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
+		if (!test_bit(i, used_vectors))
 			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
 	}
 
-	apic_intr_init();
-
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
 
-- 
cgit v0.10.2


From ff7b1b4f000cea84f071c1b6aa2918b2119d66f1 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 15 Apr 2009 16:55:05 +0100
Subject: perfcounters: export perf_tpcounter_event

Needed for modular tracepoint support.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7f9521c..0939609 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2590,6 +2590,7 @@ void perf_tpcounter_event(int event_id)
 
 	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
 }
+EXPORT_SYMBOL_GPL(perf_tpcounter_event);
 
 extern int ftrace_profile_enable(int);
 extern void ftrace_profile_disable(int);
-- 
cgit v0.10.2


From 0207a2efb43d81e29e23662b5d035945688a103f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Thu, 16 Apr 2009 14:40:56 +0900
Subject: sh: Add support for SH7724 (SH-Mobile R2R) CPU subtype.

This implements initial support for the SH-Mobile R2R CPU.
Based on Rev 0.11 of the initial SH7724 hardware manual.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e7390dd..505d1ac 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -347,6 +347,15 @@ config CPU_SUBTYPE_SH7723
 	help
 	  Select SH7723 if you have an SH-MobileR2 CPU.
 
+config CPU_SUBTYPE_SH7724
+	bool "Support SH7724 processor"
+	select CPU_SH4A
+	select CPU_SHX2
+	select ARCH_SPARSEMEM_ENABLE
+	select SYS_SUPPORTS_CMT
+	help
+	  Select SH7724 if you have an SH-MobileR2R CPU.
+
 config CPU_SUBTYPE_SH7763
 	bool "Support SH7763 processor"
 	select CPU_SH4A
@@ -495,6 +504,7 @@ config SH_PCLK_FREQ
 			      CPU_SUBTYPE_SH7203 || CPU_SUBTYPE_SH7206 || \
 			      CPU_SUBTYPE_SH7263 || CPU_SUBTYPE_MXG    || \
 			      CPU_SUBTYPE_SH7786
+	default "41666666" if CPU_SUBTYPE_SH7724
 	default "60000000" if CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R
 	default "66000000" if CPU_SUBTYPE_SH4_202
 	default "50000000"
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 1fd58b4..005c962 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -32,7 +32,7 @@ enum cpu_type {
 
 	/* SH-4A types */
 	CPU_SH7763, CPU_SH7770, CPU_SH7780, CPU_SH7781, CPU_SH7785, CPU_SH7786,
-	CPU_SH7723, CPU_SHX3,
+	CPU_SH7723, CPU_SH7724, CPU_SHX3,
 
 	/* SH4AL-DSP types */
 	CPU_SH7343, CPU_SH7722, CPU_SH7366,
diff --git a/arch/sh/include/cpu-sh4/cpu/freq.h b/arch/sh/include/cpu-sh4/cpu/freq.h
index 749d1c4..ccf1d99 100644
--- a/arch/sh/include/cpu-sh4/cpu/freq.h
+++ b/arch/sh/include/cpu-sh4/cpu/freq.h
@@ -25,6 +25,24 @@
 #elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
       defined(CONFIG_CPU_SUBTYPE_SH7780)
 #define	FRQCR			0xffc80000
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+#define FRQCRA			0xa4150000
+#define FRQCRB			0xa4150004
+#define VCLKCR			0xa4150048
+
+#define FCLKACR			0xa4150008
+#define FCLKBCR			0xa415000c
+#define FRQCR			FRQCRA
+#define SCLKACR			FCLKACR
+#define SCLKBCR			FCLKBCR
+#define FCLKACR			0xa4150008
+#define FCLKBCR			0xa415000c
+#define IrDACLKCR		0xa4150018
+
+#define MSTPCR0			0xa4150030
+#define MSTPCR1			0xa4150034
+#define MSTPCR2			0xa4150038
+
 #elif defined(CONFIG_CPU_SUBTYPE_SH7785)
 #define FRQCR0			0xffc80000
 #define FRQCR1			0xffc80004
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h
new file mode 100644
index 0000000..34605c9
--- /dev/null
+++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h
@@ -0,0 +1,255 @@
+#ifndef __ASM_SH7724_H__
+#define __ASM_SH7724_H__
+
+enum {
+	/* PTA */
+	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
+	GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0,
+
+	/* PTB */
+	GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4,
+	GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0,
+
+	/* PTC */
+	GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4,
+	GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0,
+
+	/* PTD */
+	GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4,
+	GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0,
+
+	/* PTE */
+	GPIO_PTE7, GPIO_PTE6, GPIO_PTE5, GPIO_PTE4,
+	GPIO_PTE3, GPIO_PTE2, GPIO_PTE1, GPIO_PTE0,
+
+	/* PTF */
+	GPIO_PTF7, GPIO_PTF6, GPIO_PTF5, GPIO_PTF4,
+	GPIO_PTF3, GPIO_PTF2, GPIO_PTF1, GPIO_PTF0,
+
+	/* PTG */
+			      GPIO_PTG5, GPIO_PTG4,
+	GPIO_PTG3, GPIO_PTG2, GPIO_PTG1, GPIO_PTG0,
+
+	/* PTH */
+	GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4,
+	GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0,
+
+	/* PTJ */
+	GPIO_PTJ7, GPIO_PTJ6, GPIO_PTJ5,
+	GPIO_PTJ3, GPIO_PTJ2, GPIO_PTJ1, GPIO_PTJ0,
+
+	/* PTK */
+	GPIO_PTK7, GPIO_PTK6, GPIO_PTK5, GPIO_PTK4,
+	GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0,
+
+	/* PTL */
+	GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4,
+	GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0,
+
+	/* PTM */
+	GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4,
+	GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0,
+
+	/* PTN */
+	GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4,
+	GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0,
+
+	/* PTQ */
+	GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4,
+	GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0,
+
+	/* PTR */
+	GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4,
+	GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0,
+
+	/* PTS */
+		   GPIO_PTS6, GPIO_PTS5, GPIO_PTS4,
+	GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0,
+
+	/* PTT */
+	GPIO_PTT7, GPIO_PTT6, GPIO_PTT5, GPIO_PTT4,
+	GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0,
+
+	/* PTU */
+	GPIO_PTU7, GPIO_PTU6, GPIO_PTU5, GPIO_PTU4,
+	GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0,
+
+	/* PTV */
+	GPIO_PTV7, GPIO_PTV6, GPIO_PTV5, GPIO_PTV4,
+	GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0,
+
+	/* PTW */
+	GPIO_PTW7, GPIO_PTW6, GPIO_PTW5, GPIO_PTW4,
+	GPIO_PTW3, GPIO_PTW2, GPIO_PTW1, GPIO_PTW0,
+
+	/* PTX */
+	GPIO_PTX7, GPIO_PTX6, GPIO_PTX5, GPIO_PTX4,
+	GPIO_PTX3, GPIO_PTX2, GPIO_PTX1, GPIO_PTX0,
+
+	/* PTY */
+	GPIO_PTY7, GPIO_PTY6, GPIO_PTY5, GPIO_PTY4,
+	GPIO_PTY3, GPIO_PTY2, GPIO_PTY1, GPIO_PTY0,
+
+	/* PTZ */
+	GPIO_PTZ7, GPIO_PTZ6, GPIO_PTZ5, GPIO_PTZ4,
+	GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1, GPIO_PTZ0,
+
+	/* BSC (PTA/PTB/PTJ/PTQ/PTR/PTT) */
+	GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28,
+	GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24,
+	GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20,
+	GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16,
+	GPIO_FN_D15, GPIO_FN_D14, GPIO_FN_D13, GPIO_FN_D12,
+	GPIO_FN_D11, GPIO_FN_D10, GPIO_FN_D9,  GPIO_FN_D8,
+	GPIO_FN_D7,  GPIO_FN_D6,  GPIO_FN_D5,  GPIO_FN_D4,
+	GPIO_FN_D3,  GPIO_FN_D2,  GPIO_FN_D1,  GPIO_FN_D0,
+	GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22,
+	GPIO_FN_CS6B_CE1B,  GPIO_FN_CS6A_CE2B,
+	GPIO_FN_CS5B_CE1A,  GPIO_FN_CS5A_CE2A,
+	GPIO_FN_WE3_ICIOWR, GPIO_FN_WE2_ICIORD,
+	GPIO_FN_IOIS16,     GPIO_FN_WAIT,
+	GPIO_FN_BS,
+
+	/* KEYSC (PTA/PTB)*/
+	GPIO_FN_KEYOUT5_IN5, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYIN4,
+	GPIO_FN_KEYIN3,  GPIO_FN_KEYIN2,  GPIO_FN_KEYIN1,  GPIO_FN_KEYIN0,
+	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT2, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT0,
+
+	/* ATAPI (PTA/PTB/PTK/PTR/PTS/PTW) */
+	GPIO_FN_IDED15, GPIO_FN_IDED14, GPIO_FN_IDED13, GPIO_FN_IDED12,
+	GPIO_FN_IDED11, GPIO_FN_IDED10, GPIO_FN_IDED9,  GPIO_FN_IDED8,
+	GPIO_FN_IDED7,  GPIO_FN_IDED6,  GPIO_FN_IDED5,  GPIO_FN_IDED4,
+	GPIO_FN_IDED3,  GPIO_FN_IDED2,  GPIO_FN_IDED1,  GPIO_FN_IDED0,
+	GPIO_FN_IDEA2,     GPIO_FN_IDEA1,     GPIO_FN_IDEA0,  GPIO_FN_IDEIOWR,
+	GPIO_FN_IODREQ,    GPIO_FN_IDECS0,    GPIO_FN_IDECS1, GPIO_FN_IDEIORD,
+	GPIO_FN_DIRECTION, GPIO_FN_EXBUF_ENB, GPIO_FN_IDERST, GPIO_FN_IODACK,
+	GPIO_FN_IDEINT,    GPIO_FN_IDEIORDY,
+
+	/* TPU (PTB/PTR/PTS) */
+	GPIO_FN_TPUTO3, GPIO_FN_TPUTO2, GPIO_FN_TPUTO1, GPIO_FN_TPUTO0,
+	GPIO_FN_TPUTI3, GPIO_FN_TPUTI2,
+
+	/* LCDC (PTC/PTD/PTE/PTF/PTM/PTR) */
+	GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20,
+	GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16,
+	GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12,
+	GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9,  GPIO_FN_LCDD8,
+	GPIO_FN_LCDD7,  GPIO_FN_LCDD6,  GPIO_FN_LCDD5,  GPIO_FN_LCDD4,
+	GPIO_FN_LCDD3,  GPIO_FN_LCDD2,  GPIO_FN_LCDD1,  GPIO_FN_LCDD0,
+	GPIO_FN_LCDVSYN,  GPIO_FN_LCDDISP,  GPIO_FN_LCDRS,  GPIO_FN_LCDHSYN,
+	GPIO_FN_LCDCS,    GPIO_FN_LCDDON,   GPIO_FN_LCDDCK, GPIO_FN_LCDWR,
+	GPIO_FN_LCDVEPWC, GPIO_FN_LCDVCPWC, GPIO_FN_LCDRD,  GPIO_FN_LCDLCLK,
+
+	/* SCIF0 (PTF/PTM) */
+	GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD, GPIO_FN_SCIF0_SCK,
+
+	/* SCIF1 (PTL) */
+	GPIO_FN_SCIF1_SCK, GPIO_FN_SCIF1_RXD, GPIO_FN_SCIF1_TXD,
+
+	/* SCIF2 (PTE/PTF/PTN) with LCDC, VOU */
+	GPIO_FN_SCIF2_L_TXD, GPIO_FN_SCIF2_L_SCK, GPIO_FN_SCIF2_L_RXD,
+	GPIO_FN_SCIF2_V_TXD, GPIO_FN_SCIF2_V_SCK, GPIO_FN_SCIF2_V_RXD,
+
+	/* SCIF3 (PTL/PTN/PTZ) with VOU, IRQ */
+	GPIO_FN_SCIF3_V_SCK, GPIO_FN_SCIF3_V_RXD, GPIO_FN_SCIF3_V_TXD,
+	GPIO_FN_SCIF3_V_CTS, GPIO_FN_SCIF3_V_RTS,
+	GPIO_FN_SCIF3_I_SCK, GPIO_FN_SCIF3_I_RXD, GPIO_FN_SCIF3_I_TXD,
+	GPIO_FN_SCIF3_I_CTS, GPIO_FN_SCIF3_I_RTS,
+
+	/* SCIF4 (PTE) */
+	GPIO_FN_SCIF4_SCK, GPIO_FN_SCIF4_RXD, GPIO_FN_SCIF4_TXD,
+
+	/* SCIF5 (PTS) */
+	GPIO_FN_SCIF5_SCK, GPIO_FN_SCIF5_RXD, GPIO_FN_SCIF5_TXD,
+
+	/* FSI (PTE/PTU/PTV) */
+	GPIO_FN_FSIMCKB,   GPIO_FN_FSIMCKA,    GPIO_FN_FSIOASD,
+	GPIO_FN_FSIIABCK,  GPIO_FN_FSIIALRCK,  GPIO_FN_FSIOABCK,
+	GPIO_FN_FSIOALRCK, GPIO_FN_CLKAUDIOAO, GPIO_FN_FSIIBSD,
+	GPIO_FN_FSIOBSD,   GPIO_FN_FSIIBBCK,   GPIO_FN_FSIIBLRCK,
+	GPIO_FN_FSIOBBCK,  GPIO_FN_FSIOBLRCK,  GPIO_FN_CLKAUDIOBO,
+	GPIO_FN_FSIIASD,
+
+	/* AUD (PTG) */
+	GPIO_FN_AUDCK,   GPIO_FN_AUDSYNC, GPIO_FN_AUDATA3,
+	GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0,
+
+	/* VIO (PTS) (common?) */
+	GPIO_FN_VIO_CKO,
+
+	/* VIO0 (PTH/PTK) */
+	GPIO_FN_VIO0_D15, GPIO_FN_VIO0_D14, GPIO_FN_VIO0_D13, GPIO_FN_VIO0_D12,
+	GPIO_FN_VIO0_D11, GPIO_FN_VIO0_D10, GPIO_FN_VIO0_D9,  GPIO_FN_VIO0_D8,
+	GPIO_FN_VIO0_D7,  GPIO_FN_VIO0_D6,  GPIO_FN_VIO0_D5,  GPIO_FN_VIO0_D4,
+	GPIO_FN_VIO0_D3,  GPIO_FN_VIO0_D2,  GPIO_FN_VIO0_D1,  GPIO_FN_VIO0_D0,
+	GPIO_FN_VIO0_VD,  GPIO_FN_VIO0_CLK,
+	GPIO_FN_VIO0_FLD, GPIO_FN_VIO0_HD,
+
+	/* VIO1 (PTK/PTS) */
+	GPIO_FN_VIO1_D7,  GPIO_FN_VIO1_D6, GPIO_FN_VIO1_D5, GPIO_FN_VIO1_D4,
+	GPIO_FN_VIO1_D3,  GPIO_FN_VIO1_D2, GPIO_FN_VIO1_D1, GPIO_FN_VIO1_D0,
+	GPIO_FN_VIO1_FLD, GPIO_FN_VIO1_HD, GPIO_FN_VIO1_VD, GPIO_FN_VIO1_CLK,
+
+	/* Eth  (PTL/PTN/PTX) */
+	GPIO_FN_RMII_RXD0,    GPIO_FN_RMII_RXD1,
+	GPIO_FN_RMII_TXD0,    GPIO_FN_RMII_TXD1,
+	GPIO_FN_RMII_REF_CLK, GPIO_FN_RMII_TX_EN,
+	GPIO_FN_RMII_RX_ER,   GPIO_FN_RMII_CRS_DV,
+	GPIO_FN_LNKSTA,       GPIO_FN_MDIO,
+	GPIO_FN_MDC,
+
+	/* System (PTJ) */
+	GPIO_FN_PDSTATUS, GPIO_FN_STATUS2, GPIO_FN_STATUS0,
+
+	/* VOU (PTL/PTM/PTN*/
+	GPIO_FN_DV_D15,  GPIO_FN_DV_D14, GPIO_FN_DV_D13,   GPIO_FN_DV_D12,
+	GPIO_FN_DV_D11,  GPIO_FN_DV_D10, GPIO_FN_DV_D9,    GPIO_FN_DV_D8,
+	GPIO_FN_DV_D7,   GPIO_FN_DV_D6,  GPIO_FN_DV_D5,    GPIO_FN_DV_D4,
+	GPIO_FN_DV_D3,   GPIO_FN_DV_D2,  GPIO_FN_DV_D1,    GPIO_FN_DV_D0,
+	GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_VSYNC, GPIO_FN_DV_HSYNC,
+
+	/* MSIOF0 (PTL/PTM) */
+	GPIO_FN_MSIOF0_RXD,   GPIO_FN_MSIOF0_TXD,
+	GPIO_FN_MSIOF0_MCK,   GPIO_FN_MSIOF0_TSCK,
+	GPIO_FN_MSIOF0_SS1,   GPIO_FN_MSIOF0_SS2,
+	GPIO_FN_MSIOF0_TSYNC, GPIO_FN_MSIOF0_RSCK,
+	GPIO_FN_MSIOF0_RSYNC,
+
+	/* MSIOF1 (PTV) */
+	GPIO_FN_MSIOF1_RXD,   GPIO_FN_MSIOF1_TXD,
+	GPIO_FN_MSIOF1_MCK,   GPIO_FN_MSIOF1_TSCK,
+	GPIO_FN_MSIOF1_SS1,   GPIO_FN_MSIOF1_SS2,
+	GPIO_FN_MSIOF1_TSYNC, GPIO_FN_MSIOF1_RSCK,
+	GPIO_FN_MSIOF1_RSYNC,
+
+	/* DMAC (PTU/PTX) */
+	GPIO_FN_DMAC_DACK0, GPIO_FN_DMAC_DREQ0,
+	GPIO_FN_DMAC_DACK1, GPIO_FN_DMAC_DREQ1,
+
+	/* SDHI0 (PTY) */
+	GPIO_FN_SDHI0CD, GPIO_FN_SDHI0WP, GPIO_FN_SDHI0CMD, GPIO_FN_SDHI0CLK,
+	GPIO_FN_SDHI0D3, GPIO_FN_SDHI0D2, GPIO_FN_SDHI0D1,  GPIO_FN_SDHI0D0,
+
+	/* SDHI1 (PTW) */
+	GPIO_FN_SDHI1CD, GPIO_FN_SDHI1WP, GPIO_FN_SDHI1CMD, GPIO_FN_SDHI1CLK,
+	GPIO_FN_SDHI1D3, GPIO_FN_SDHI1D2, GPIO_FN_SDHI1D1,  GPIO_FN_SDHI1D0,
+
+	/* MMC (PTW/PTX)*/
+	GPIO_FN_MMC_D7,  GPIO_FN_MMC_D6,  GPIO_FN_MMC_D5, GPIO_FN_MMC_D4,
+	GPIO_FN_MMC_D3,  GPIO_FN_MMC_D2,  GPIO_FN_MMC_D1, GPIO_FN_MMC_D0,
+	GPIO_FN_MMC_CLK, GPIO_FN_MMC_CMD,
+
+	/* IrDA (PTX) */
+	GPIO_FN_IRDA_OUT, GPIO_FN_IRDA_IN,
+
+	/* TSIF (PTX) */
+	GPIO_FN_TSIF_TS0_SDAT, GPIO_FN_TSIF_TS0_SCK,
+	GPIO_FN_TSIF_TS0_SDEN, GPIO_FN_TSIF_TS0_SPSYNC,
+
+	/* IRQ (PTZ) */
+	GPIO_FN_INTC_IRQ7, GPIO_FN_INTC_IRQ6, GPIO_FN_INTC_IRQ5,
+	GPIO_FN_INTC_IRQ4, GPIO_FN_INTC_IRQ3, GPIO_FN_INTC_IRQ2,
+	GPIO_FN_INTC_IRQ1, GPIO_FN_INTC_IRQ0,
+};
+
+#endif /* __ASM_SH7724_H__ */
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index 91e3677..973ff83 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -156,6 +156,12 @@ int __init detect_cpu_and_cache_system(void)
 			break;
 		}
 		break;
+	case 0x300b:
+		boot_cpu_data.type = CPU_SH7724;
+		boot_cpu_data.icache.ways = 4;
+		boot_cpu_data.dcache.ways = 4;
+		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_FPU;
+		break;
 	case 0x4000:	/* 1st cut */
 	case 0x4001:	/* 2nd cut */
 		boot_cpu_data.type = CPU_SHX3;
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 1a92361..afd6fba4 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_CPU_SUBTYPE_SH7786)	+= setup-sh7786.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7343)	+= setup-sh7343.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7722)	+= setup-sh7722.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7723)	+= setup-sh7723.o
+obj-$(CONFIG_CPU_SUBTYPE_SH7724)	+= setup-sh7724.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7366)	+= setup-sh7366.o
 obj-$(CONFIG_CPU_SUBTYPE_SHX3)		+= setup-shx3.o
 
@@ -26,12 +27,14 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
 # Pinmux setup
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7722)	:= pinmux-sh7722.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7723)	:= pinmux-sh7723.o
+pinmux-$(CONFIG_CPU_SUBTYPE_SH7724)	:= pinmux-sh7724.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7785)	:= pinmux-sh7785.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7786)	:= pinmux-sh7786.o
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 0e174af..1ccdfc5 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -130,6 +130,12 @@ static void adjust_clocks(int originate, int *l, unsigned long v[],
  * is quite simple..
  */
 
+#if defined(CONFIG_CPU_SUBTYPE_SH7724)
+#define STCPLL(frqcr) ((((frqcr >> 24) & 0x3f) + 1) * 2)
+#else
+#define STCPLL(frqcr) (((frqcr >> 24) & 0x1f) + 1)
+#endif
+
 /*
  * Instead of having two separate multipliers/divisors set, like this:
  *
@@ -139,13 +145,17 @@ static void adjust_clocks(int originate, int *l, unsigned long v[],
  * I created the divisors2 array, which is used to calculate rate like
  *   rate = parent * 2 / divisors2[ divisor ];
 */
+#if defined(CONFIG_CPU_SUBTYPE_SH7724)
+static int divisors2[] = { 4, 1, 8, 12, 16, 24, 32, 1, 48, 64, 72, 96, 1, 144 };
+#else
 static int divisors2[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32, 40 };
+#endif
 
 static void master_clk_recalc(struct clk *clk)
 {
 	unsigned frqcr = ctrl_inl(FRQCR);
 
-	clk->rate = CONFIG_SH_PCLK_FREQ * (((frqcr >> 24) & 0x1f) + 1);
+	clk->rate = CONFIG_SH_PCLK_FREQ * STCPLL(frqcr);
 }
 
 static void master_clk_init(struct clk *clk)
@@ -161,13 +171,30 @@ static void module_clk_recalc(struct clk *clk)
 {
 	unsigned long frqcr = ctrl_inl(FRQCR);
 
-	clk->rate = clk->parent->rate / (((frqcr >> 24) & 0x1f) + 1);
+	clk->rate = clk->parent->rate / STCPLL(frqcr);
 }
 
+#if defined(CONFIG_CPU_SUBTYPE_SH7724)
+#define MASTERDIVS	{ 12, 16, 24, 30, 32, 36, 48 }
+#define STCMASK		0x3f
+#define DIVCALC(div)	(div/2-1)
+#define FRQCRKICK	0x80000000
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#define MASTERDIVS	{ 6, 8, 12, 16 }
+#define STCMASK		0x1f
+#define DIVCALC(div)	(div-1)
+#define FRQCRKICK	0x00000000
+#else
+#define MASTERDIVS	{ 2, 3, 4, 6, 8, 16 }
+#define STCMASK		0x1f
+#define DIVCALC(div)	(div-1)
+#define FRQCRKICK	0x00000000
+#endif
+
 static int master_clk_setrate(struct clk *clk, unsigned long rate, int id)
 {
 	int div = rate / clk->rate;
-	int master_divs[] = { 2, 3, 4, 6, 8, 16 };
+	int master_divs[] = MASTERDIVS;
 	int index;
 	unsigned long frqcr;
 
@@ -180,8 +207,9 @@ static int master_clk_setrate(struct clk *clk, unsigned long rate, int id)
 	div = master_divs[index - 1];
 
 	frqcr = ctrl_inl(FRQCR);
-	frqcr &= ~(0xF << 24);
-	frqcr |= ( (div-1) << 24);
+	frqcr &= ~(STCMASK << 24);
+	frqcr |= (DIVCALC(div) << 24);
+	frqcr |= FRQCRKICK;
 	ctrl_outl(frqcr, FRQCR);
 
 	return 0;
@@ -377,6 +405,7 @@ static int sh7722_frqcr_set_rate(struct clk *clk, unsigned long rate,
 	/* clear FRQCR bits */
 	frqcr &= ~(ctx.mask << ctx.shift);
 	frqcr |= div << ctx.shift;
+	frqcr |= FRQCRKICK;
 
 	/* ...and perform actual change */
 	ctrl_outl(frqcr, FRQCR);
@@ -542,8 +571,8 @@ static struct clk sh7722_r_clock = {
 	.flags = CLK_RATE_PROPAGATES,
 };
 
-#ifndef CONFIG_CPU_SUBTYPE_SH7343
-
+#if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\
+    !defined(CONFIG_CPU_SUBTYPE_SH7724)
 /*
  * these three clocks - SIU A, SIU B, IrDA - share the same clk_ops
  * methods of clk_ops determine which register they should access by
@@ -560,15 +589,16 @@ static struct clk sh7722_siu_b_clock = {
 	.arch_flags = SCLKBCR,
 	.ops = &sh7722_siu_clk_ops,
 };
+#endif /* CONFIG_CPU_SUBTYPE_SH7343, SH7724 */
 
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
+#if defined(CONFIG_CPU_SUBTYPE_SH7722) ||\
+    defined(CONFIG_CPU_SUBTYPE_SH7724)
 static struct clk sh7722_irda_clock = {
 	.name = "irda_clk",
 	.arch_flags = IrDACLKCR,
 	.ops = &sh7722_siu_clk_ops,
 };
 #endif
-#endif /* CONFIG_CPU_SUBTYPE_SH7343 */
 
 static struct clk sh7722_video_clock = {
 	.name = "video_clk",
@@ -715,6 +745,61 @@ static struct clk sh7722_mstpcr_clocks[] = {
 	MSTPCR("vpu0", "bus_clk", 2, 1),
 	MSTPCR("lcdc0", "bus_clk", 2, 0),
 #endif
+#if defined(CONFIG_CPU_SUBTYPE_SH7724)
+	/* See Datasheet : Overview -> Block Diagram */
+	MSTPCR("tlb0", "cpu_clk", 0, 31),
+	MSTPCR("ic0", "cpu_clk", 0, 30),
+	MSTPCR("oc0", "cpu_clk", 0, 29),
+	MSTPCR("rs0", "bus_clk", 0, 28),
+	MSTPCR("ilmem0", "cpu_clk", 0, 27),
+	MSTPCR("l2c0", "sh_clk", 0, 26),
+	MSTPCR("fpu0", "cpu_clk", 0, 24),
+	MSTPCR("intc0", "peripheral_clk", 0, 22),
+	MSTPCR("dmac0", "bus_clk", 0, 21),
+	MSTPCR("sh0", "sh_clk", 0, 20),
+	MSTPCR("hudi0", "peripheral_clk", 0, 19),
+	MSTPCR("ubc0", "cpu_clk", 0, 17),
+	MSTPCR("tmu0", "peripheral_clk", 0, 15),
+	MSTPCR("cmt0", "r_clk", 0, 14),
+	MSTPCR("rwdt0", "r_clk", 0, 13),
+	MSTPCR("dmac1", "bus_clk", 0, 12),
+	MSTPCR("tmu1", "peripheral_clk", 0, 10),
+	MSTPCR("scif0", "peripheral_clk", 0, 9),
+	MSTPCR("scif1", "peripheral_clk", 0, 8),
+	MSTPCR("scif2", "peripheral_clk", 0, 7),
+	MSTPCR("scif3", "bus_clk", 0, 6),
+	MSTPCR("scif4", "bus_clk", 0, 5),
+	MSTPCR("scif5", "bus_clk", 0, 4),
+	MSTPCR("msiof0", "bus_clk", 0, 2),
+	MSTPCR("msiof1", "bus_clk", 0, 1),
+	MSTPCR("keysc0", "r_clk", 1, 12),
+	MSTPCR("rtc0", "r_clk", 1, 11),
+	MSTPCR("i2c0", "peripheral_clk", 1, 9),
+	MSTPCR("i2c1", "peripheral_clk", 1, 8),
+	MSTPCR("mmc0", "bus_clk", 2, 29),
+	MSTPCR("eth0", "bus_clk", 2, 28),
+	MSTPCR("atapi0", "bus_clk", 2, 26),
+	MSTPCR("tpu0", "bus_clk", 2, 25),
+	MSTPCR("irda0", "peripheral_clk", 2, 24),
+	MSTPCR("tsif0", "bus_clk", 2, 22),
+	MSTPCR("usb1", "bus_clk", 2, 21),
+	MSTPCR("usb0", "bus_clk", 2, 20),
+	MSTPCR("2dg0", "bus_clk", 2, 19),
+	MSTPCR("sdhi0", "bus_clk", 2, 18),
+	MSTPCR("sdhi1", "bus_clk", 2, 17),
+	MSTPCR("veu1", "bus_clk", 2, 15),
+	MSTPCR("ceu1", "bus_clk", 2, 13),
+	MSTPCR("beu1", "bus_clk", 2, 12),
+	MSTPCR("2ddmac0", "sh_clk", 2, 10),
+	MSTPCR("spu0", "bus_clk", 2, 9),
+	MSTPCR("jpu0", "bus_clk", 2, 6),
+	MSTPCR("vou0", "bus_clk", 2, 5),
+	MSTPCR("beu0", "bus_clk", 2, 4),
+	MSTPCR("ceu0", "bus_clk", 2, 3),
+	MSTPCR("veu0", "bus_clk", 2, 2),
+	MSTPCR("vpu0", "bus_clk", 2, 1),
+	MSTPCR("lcdc0", "bus_clk", 2, 0),
+#endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7343)
 	MSTPCR("uram0", "umem_clk", 0, 28),
 	MSTPCR("xymem0", "bus_clk", 0, 26),
@@ -786,12 +871,15 @@ static struct clk *sh7722_clocks[] = {
 	&sh7722_sh_clock,
 	&sh7722_peripheral_clock,
 	&sh7722_sdram_clock,
-#ifndef CONFIG_CPU_SUBTYPE_SH7343
+#if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\
+    !defined(CONFIG_CPU_SUBTYPE_SH7724)
 	&sh7722_siu_a_clock,
 	&sh7722_siu_b_clock,
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-	&sh7722_irda_clock,
 #endif
+/* 7724 should support FSI clock */
+#if defined(CONFIG_CPU_SUBTYPE_SH7722) || \
+    defined(CONFIG_CPU_SUBTYPE_SH7724)
+	&sh7722_irda_clock,
 #endif
 	&sh7722_video_clock,
 };
diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c
new file mode 100644
index 0000000..1af0f95
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c
@@ -0,0 +1,2230 @@
+/*
+ * SH7724 Pinmux
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on SH7723 Pinmux
+ *  Copyright (C) 2008  Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <cpu/sh7724.h>
+
+enum {
+	PINMUX_RESERVED = 0,
+
+	PINMUX_DATA_BEGIN,
+	PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+	PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA,
+	PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+	PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA,
+	PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+	PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA,
+	PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+	PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA,
+	PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+	PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA,
+	PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+	PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA,
+			      PTG5_DATA, PTG4_DATA,
+	PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA,
+	PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+	PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA,
+	PTJ7_DATA, PTJ6_DATA, PTJ5_DATA,
+	PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA,
+	PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+	PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA,
+	PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+	PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA,
+	PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+	PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA,
+	PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+	PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA,
+	PTQ7_DATA, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+	PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA,
+	PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+	PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA,
+		   PTS6_DATA, PTS5_DATA, PTS4_DATA,
+	PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA,
+	PTT7_DATA, PTT6_DATA, PTT5_DATA, PTT4_DATA,
+	PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA,
+	PTU7_DATA, PTU6_DATA, PTU5_DATA, PTU4_DATA,
+	PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA,
+	PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+	PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA,
+	PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+	PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA,
+	PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+	PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA,
+	PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+	PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA,
+	PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+	PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA,
+	PINMUX_DATA_END,
+
+	PINMUX_INPUT_BEGIN,
+	PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN,
+	PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN,
+	PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN,
+	PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN,
+	PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN,
+	PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN,
+	PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN,
+	PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN,
+	PTE7_IN, PTE6_IN, PTE5_IN, PTE4_IN,
+	PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN,
+	PTF7_IN, PTF6_IN, PTF5_IN, PTF4_IN,
+	PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN,
+	PTH7_IN, PTH6_IN, PTH5_IN, PTH4_IN,
+	PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN,
+	PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN,
+	PTK7_IN, PTK6_IN, PTK5_IN, PTK4_IN,
+	PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN,
+	PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN,
+	PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN,
+	PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN,
+	PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN,
+	PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN,
+	PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN,
+	PTQ7_IN, PTQ6_IN, PTQ5_IN, PTQ4_IN,
+	PTQ3_IN, PTQ2_IN, PTQ1_IN, PTQ0_IN,
+	PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN,
+	PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN,
+		 PTS6_IN, PTS5_IN, PTS4_IN,
+	PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN,
+	PTT7_IN, PTT6_IN, PTT5_IN, PTT4_IN,
+	PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN,
+	PTU7_IN, PTU6_IN, PTU5_IN, PTU4_IN,
+	PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN,
+	PTV7_IN, PTV6_IN, PTV5_IN, PTV4_IN,
+	PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN,
+	PTW7_IN, PTW6_IN, PTW5_IN, PTW4_IN,
+	PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN,
+	PTX7_IN, PTX6_IN, PTX5_IN, PTX4_IN,
+	PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN,
+	PTY7_IN, PTY6_IN, PTY5_IN, PTY4_IN,
+	PTY3_IN, PTY2_IN, PTY1_IN, PTY0_IN,
+	PTZ7_IN, PTZ6_IN, PTZ5_IN, PTZ4_IN,
+	PTZ3_IN, PTZ2_IN, PTZ1_IN, PTZ0_IN,
+	PINMUX_INPUT_END,
+
+	PINMUX_INPUT_PULLUP_BEGIN,
+	PTA7_IN_PU, PTA6_IN_PU, PTA5_IN_PU, PTA4_IN_PU,
+	PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU,
+	PTB7_IN_PU, PTB6_IN_PU, PTB5_IN_PU, PTB4_IN_PU,
+	PTB3_IN_PU, PTB2_IN_PU, PTB1_IN_PU, PTB0_IN_PU,
+	PTC7_IN_PU, PTC6_IN_PU, PTC5_IN_PU, PTC4_IN_PU,
+	PTC3_IN_PU, PTC2_IN_PU, PTC1_IN_PU, PTC0_IN_PU,
+	PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU,
+	PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU, PTD0_IN_PU,
+	PTE7_IN_PU, PTE6_IN_PU, PTE5_IN_PU, PTE4_IN_PU,
+	PTE3_IN_PU, PTE2_IN_PU, PTE1_IN_PU, PTE0_IN_PU,
+	PTF7_IN_PU, PTF6_IN_PU, PTF5_IN_PU, PTF4_IN_PU,
+	PTF3_IN_PU, PTF2_IN_PU, PTF1_IN_PU, PTF0_IN_PU,
+	PTH7_IN_PU, PTH6_IN_PU, PTH5_IN_PU, PTH4_IN_PU,
+	PTH3_IN_PU, PTH2_IN_PU, PTH1_IN_PU, PTH0_IN_PU,
+	PTJ3_IN_PU, PTJ2_IN_PU, PTJ1_IN_PU, PTJ0_IN_PU,
+	PTK7_IN_PU, PTK6_IN_PU, PTK5_IN_PU, PTK4_IN_PU,
+	PTK3_IN_PU, PTK2_IN_PU, PTK1_IN_PU, PTK0_IN_PU,
+	PTL7_IN_PU, PTL6_IN_PU, PTL5_IN_PU, PTL4_IN_PU,
+	PTL3_IN_PU, PTL2_IN_PU, PTL1_IN_PU, PTL0_IN_PU,
+	PTM7_IN_PU, PTM6_IN_PU, PTM5_IN_PU, PTM4_IN_PU,
+	PTM3_IN_PU, PTM2_IN_PU, PTM1_IN_PU, PTM0_IN_PU,
+	PTN7_IN_PU, PTN6_IN_PU, PTN5_IN_PU, PTN4_IN_PU,
+	PTN3_IN_PU, PTN2_IN_PU, PTN1_IN_PU, PTN0_IN_PU,
+	PTQ7_IN_PU, PTQ6_IN_PU, PTQ5_IN_PU, PTQ4_IN_PU,
+	PTQ3_IN_PU, PTQ2_IN_PU, PTQ1_IN_PU, PTQ0_IN_PU,
+	PTR7_IN_PU, PTR6_IN_PU, PTR5_IN_PU, PTR4_IN_PU,
+	PTR3_IN_PU, PTR2_IN_PU, PTR1_IN_PU, PTR0_IN_PU,
+		    PTS6_IN_PU, PTS5_IN_PU, PTS4_IN_PU,
+	PTS3_IN_PU, PTS2_IN_PU, PTS1_IN_PU, PTS0_IN_PU,
+	PTT7_IN_PU, PTT6_IN_PU, PTT5_IN_PU, PTT4_IN_PU,
+	PTT3_IN_PU, PTT2_IN_PU, PTT1_IN_PU, PTT0_IN_PU,
+	PTU7_IN_PU, PTU6_IN_PU, PTU5_IN_PU, PTU4_IN_PU,
+	PTU3_IN_PU, PTU2_IN_PU, PTU1_IN_PU, PTU0_IN_PU,
+	PTV7_IN_PU, PTV6_IN_PU, PTV5_IN_PU, PTV4_IN_PU,
+	PTV3_IN_PU, PTV2_IN_PU, PTV1_IN_PU, PTV0_IN_PU,
+	PTW7_IN_PU, PTW6_IN_PU, PTW5_IN_PU, PTW4_IN_PU,
+	PTW3_IN_PU, PTW2_IN_PU, PTW1_IN_PU, PTW0_IN_PU,
+	PTX7_IN_PU, PTX6_IN_PU, PTX5_IN_PU, PTX4_IN_PU,
+	PTX3_IN_PU, PTX2_IN_PU, PTX1_IN_PU, PTX0_IN_PU,
+	PTY7_IN_PU, PTY6_IN_PU, PTY5_IN_PU, PTY4_IN_PU,
+	PTY3_IN_PU, PTY2_IN_PU, PTY1_IN_PU, PTY0_IN_PU,
+	PTZ7_IN_PU, PTZ6_IN_PU, PTZ5_IN_PU, PTZ4_IN_PU,
+	PTZ3_IN_PU, PTZ2_IN_PU, PTZ1_IN_PU, PTZ0_IN_PU,
+	PINMUX_INPUT_PULLUP_END,
+
+	PINMUX_OUTPUT_BEGIN,
+	PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT,
+	PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT,
+	PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT,
+	PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT,
+	PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT,
+	PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT,
+	PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT,
+	PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT,
+	PTE7_OUT, PTE6_OUT, PTE5_OUT, PTE4_OUT,
+	PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT,
+	PTF7_OUT, PTF6_OUT, PTF5_OUT, PTF4_OUT,
+	PTF3_OUT, PTF2_OUT, PTF1_OUT, PTF0_OUT,
+			    PTG5_OUT, PTG4_OUT,
+	PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT,
+	PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT,
+	PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT,
+	PTJ7_OUT, PTJ6_OUT, PTJ5_OUT,
+	PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT,
+	PTK7_OUT, PTK6_OUT, PTK5_OUT, PTK4_OUT,
+	PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT,
+	PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT,
+	PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT,
+	PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT,
+	PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT,
+	PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT,
+	PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT,
+	PTQ7_OUT, PTQ6_OUT, PTQ5_OUT, PTQ4_OUT,
+	PTQ3_OUT, PTQ2_OUT, PTQ1_OUT, PTQ0_OUT,
+	PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT,
+			    PTR1_OUT, PTR0_OUT,
+		  PTS6_OUT, PTS5_OUT, PTS4_OUT,
+	PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT,
+	PTT7_OUT, PTT6_OUT, PTT5_OUT, PTT4_OUT,
+	PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT,
+	PTU7_OUT, PTU6_OUT, PTU5_OUT, PTU4_OUT,
+	PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT,
+	PTV7_OUT, PTV6_OUT, PTV5_OUT, PTV4_OUT,
+	PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT,
+	PTW7_OUT, PTW6_OUT, PTW5_OUT, PTW4_OUT,
+	PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT,
+	PTX7_OUT, PTX6_OUT, PTX5_OUT, PTX4_OUT,
+	PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT,
+	PTY7_OUT, PTY6_OUT, PTY5_OUT, PTY4_OUT,
+	PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT,
+	PTZ7_OUT, PTZ6_OUT, PTZ5_OUT, PTZ4_OUT,
+	PTZ3_OUT, PTZ2_OUT, PTZ1_OUT, PTZ0_OUT,
+	PINMUX_OUTPUT_END,
+
+	PINMUX_FUNCTION_BEGIN,
+	PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN,
+	PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN,
+	PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN,
+	PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN,
+	PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN,
+	PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN,
+	PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN,
+	PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN,
+	PTE7_FN, PTE6_FN, PTE5_FN, PTE4_FN,
+	PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN,
+	PTF7_FN, PTF6_FN, PTF5_FN, PTF4_FN,
+	PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN,
+			  PTG5_FN, PTG4_FN,
+	PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN,
+	PTH7_FN, PTH6_FN, PTH5_FN, PTH4_FN,
+	PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN,
+	PTJ7_FN, PTJ6_FN, PTJ5_FN,
+	PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN,
+	PTK7_FN, PTK6_FN, PTK5_FN, PTK4_FN,
+	PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN,
+	PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN,
+	PTL3_FN, PTL2_FN, PTL1_FN, PTL0_FN,
+	PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN,
+	PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN,
+	PTN7_FN, PTN6_FN, PTN5_FN, PTN4_FN,
+	PTN3_FN, PTN2_FN, PTN1_FN, PTN0_FN,
+	PTQ7_FN, PTQ6_FN, PTQ5_FN, PTQ4_FN,
+	PTQ3_FN, PTQ2_FN, PTQ1_FN, PTQ0_FN,
+	PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN,
+	PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN,
+		 PTS6_FN, PTS5_FN, PTS4_FN,
+	PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN,
+	PTT7_FN, PTT6_FN, PTT5_FN, PTT4_FN,
+	PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN,
+	PTU7_FN, PTU6_FN, PTU5_FN, PTU4_FN,
+	PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN,
+	PTV7_FN, PTV6_FN, PTV5_FN, PTV4_FN,
+	PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN,
+	PTW7_FN, PTW6_FN, PTW5_FN, PTW4_FN,
+	PTW3_FN, PTW2_FN, PTW1_FN, PTW0_FN,
+	PTX7_FN, PTX6_FN, PTX5_FN, PTX4_FN,
+	PTX3_FN, PTX2_FN, PTX1_FN, PTX0_FN,
+	PTY7_FN, PTY6_FN, PTY5_FN, PTY4_FN,
+	PTY3_FN, PTY2_FN, PTY1_FN, PTY0_FN,
+	PTZ7_FN, PTZ6_FN, PTZ5_FN, PTZ4_FN,
+	PTZ3_FN, PTZ2_FN, PTZ1_FN, PTZ0_FN,
+
+
+	PSA15_0, PSA15_1,
+	PSA14_0, PSA14_1,
+	PSA13_0, PSA13_1,
+	PSA12_0, PSA12_1,
+	PSA10_0, PSA10_1,
+	PSA9_0,  PSA9_1,
+	PSA8_0,  PSA8_1,
+	PSA7_0,  PSA7_1,
+	PSA6_0,  PSA6_1,
+	PSA5_0,  PSA5_1,
+	PSA3_0,  PSA3_1,
+	PSA2_0,  PSA2_1,
+	PSA1_0,  PSA1_1,
+	PSA0_0,  PSA0_1,
+
+	PSB14_0, PSB14_1,
+	PSB13_0, PSB13_1,
+	PSB12_0, PSB12_1,
+	PSB11_0, PSB11_1,
+	PSB10_0, PSB10_1,
+	PSB9_0,  PSB9_1,
+	PSB8_0,  PSB8_1,
+	PSB7_0,  PSB7_1,
+	PSB6_0,  PSB6_1,
+	PSB5_0,  PSB5_1,
+	PSB4_0,  PSB4_1,
+	PSB3_0,  PSB3_1,
+	PSB2_0,  PSB2_1,
+	PSB1_0,  PSB1_1,
+	PSB0_0,  PSB0_1,
+
+	PSC15_0, PSC15_1,
+	PSC14_0, PSC14_1,
+	PSC13_0, PSC13_1,
+	PSC12_0, PSC12_1,
+	PSC11_0, PSC11_1,
+	PSC10_0, PSC10_1,
+	PSC9_0,  PSC9_1,
+	PSC8_0,  PSC8_1,
+	PSC7_0,  PSC7_1,
+	PSC6_0,  PSC6_1,
+	PSC5_0,  PSC5_1,
+	PSC4_0,  PSC4_1,
+	PSC2_0,  PSC2_1,
+	PSC1_0,  PSC1_1,
+	PSC0_0,  PSC0_1,
+
+	PSD15_0, PSD15_1,
+	PSD14_0, PSD14_1,
+	PSD13_0, PSD13_1,
+	PSD12_0, PSD12_1,
+	PSD11_0, PSD11_1,
+	PSD10_0, PSD10_1,
+	PSD9_0,  PSD9_1,
+	PSD8_0,  PSD8_1,
+	PSD7_0,  PSD7_1,
+	PSD6_0,  PSD6_1,
+	PSD5_0,  PSD5_1,
+	PSD4_0,  PSD4_1,
+	PSD3_0,  PSD3_1,
+	PSD2_0,  PSD2_1,
+	PSD1_0,  PSD1_1,
+	PSD0_0,  PSD0_1,
+
+	PSE15_0, PSE15_1,
+	PSE14_0, PSE14_1,
+	PSE13_0, PSE13_1,
+	PSE12_0, PSE12_1,
+	PSE11_0, PSE11_1,
+	PSE10_0, PSE10_1,
+	PSE9_0,  PSE9_1,
+	PSE8_0,  PSE8_1,
+	PSE7_0,  PSE7_1,
+	PSE6_0,  PSE6_1,
+	PSE5_0,  PSE5_1,
+	PSE4_0,  PSE4_1,
+	PSE3_0,  PSE3_1,
+	PSE2_0,  PSE2_1,
+	PSE1_0,  PSE1_1,
+	PSE0_0,  PSE0_1,
+	PINMUX_FUNCTION_END,
+
+	PINMUX_MARK_BEGIN,
+	/*PTA*/
+	D23_MARK,	KEYOUT2_MARK,		IDED15_MARK,
+	D22_MARK,	KEYOUT1_MARK,		IDED14_MARK,
+	D21_MARK,	KEYOUT0_MARK,		IDED13_MARK,
+	D20_MARK,	KEYIN4_MARK,		IDED12_MARK,
+	D19_MARK,	KEYIN3_MARK,		IDED11_MARK,
+	D18_MARK,	KEYIN2_MARK,		IDED10_MARK,
+	D17_MARK,	KEYIN1_MARK,		IDED9_MARK,
+	D16_MARK,	KEYIN0_MARK,		IDED8_MARK,
+
+	/*PTB*/
+	D31_MARK,	TPUTO1_MARK,		IDEA1_MARK,
+	D30_MARK,	TPUTO0_MARK,		IDEA0_MARK,
+	D29_MARK,				IODREQ_MARK,
+	D28_MARK,				IDECS0_MARK,
+	D27_MARK,				IDECS1_MARK,
+	D26_MARK,	KEYOUT5_IN5_MARK,	IDEIORD_MARK,
+	D25_MARK,	KEYOUT4_IN6_MARK,	IDEIOWR_MARK,
+	D24_MARK,	KEYOUT3_MARK,		IDEINT_MARK,
+
+	/*PTC*/
+	LCDD7_MARK,
+	LCDD6_MARK,
+	LCDD5_MARK,
+	LCDD4_MARK,
+	LCDD3_MARK,
+	LCDD2_MARK,
+	LCDD1_MARK,
+	LCDD0_MARK,
+
+	/*PTD*/
+	LCDD15_MARK,
+	LCDD14_MARK,
+	LCDD13_MARK,
+	LCDD12_MARK,
+	LCDD11_MARK,
+	LCDD10_MARK,
+	LCDD9_MARK,
+	LCDD8_MARK,
+
+	/*PTE*/
+	FSIMCKB_MARK,
+	FSIMCKA_MARK,
+	LCDD21_MARK,	SCIF2_L_TXD_MARK,
+	LCDD20_MARK,	SCIF4_SCK_MARK,
+	LCDD19_MARK,	SCIF4_RXD_MARK,
+	LCDD18_MARK,	SCIF4_TXD_MARK,
+	LCDD17_MARK,
+	LCDD16_MARK,
+
+	/*PTF*/
+	LCDVSYN_MARK,
+	LCDDISP_MARK,	LCDRS_MARK,
+	LCDHSYN_MARK,	LCDCS_MARK,
+	LCDDON_MARK,
+	LCDDCK_MARK,	LCDWR_MARK,
+	LCDVEPWC_MARK,	SCIF0_TXD_MARK,
+	LCDD23_MARK,	SCIF2_L_SCK_MARK,
+	LCDD22_MARK,	SCIF2_L_RXD_MARK,
+
+	/*PTG*/
+	AUDCK_MARK,
+	AUDSYNC_MARK,
+	AUDATA3_MARK,
+	AUDATA2_MARK,
+	AUDATA1_MARK,
+	AUDATA0_MARK,
+
+	/*PTH*/
+	VIO0_VD_MARK,
+	VIO0_CLK_MARK,
+	VIO0_D7_MARK,
+	VIO0_D6_MARK,
+	VIO0_D5_MARK,
+	VIO0_D4_MARK,
+	VIO0_D3_MARK,
+	VIO0_D2_MARK,
+
+	/*PTJ*/
+	PDSTATUS_MARK,
+	STATUS2_MARK,
+	STATUS0_MARK,
+	A25_MARK,		BS_MARK,
+	A24_MARK,
+	A23_MARK,
+	A22_MARK,
+
+	/*PTK*/
+	VIO1_D5_MARK,	VIO0_D13_MARK,	IDED5_MARK,
+	VIO1_D4_MARK,	VIO0_D12_MARK,	IDED4_MARK,
+	VIO1_D3_MARK,	VIO0_D11_MARK,	IDED3_MARK,
+	VIO1_D2_MARK,	VIO0_D10_MARK,	IDED2_MARK,
+	VIO1_D1_MARK,	VIO0_D9_MARK,	IDED1_MARK,
+	VIO1_D0_MARK,	VIO0_D8_MARK,	IDED0_MARK,
+	VIO0_FLD_MARK,
+	VIO0_HD_MARK,
+
+	/*PTL*/
+	DV_D5_MARK,	SCIF3_V_SCK_MARK,	RMII_RXD0_MARK,
+	DV_D4_MARK,	SCIF3_V_RXD_MARK,	RMII_RXD1_MARK,
+	DV_D3_MARK,	SCIF3_V_TXD_MARK,	RMII_REF_CLK_MARK,
+	DV_D2_MARK,	SCIF1_SCK_MARK,		RMII_TX_EN_MARK,
+	DV_D1_MARK,	SCIF1_RXD_MARK,		RMII_TXD0_MARK,
+	DV_D0_MARK,	SCIF1_TXD_MARK,		RMII_TXD1_MARK,
+	DV_D15_MARK,
+	DV_D14_MARK,	MSIOF0_MCK_MARK,
+
+	/*PTM*/
+	DV_D13_MARK,	MSIOF0_TSCK_MARK,
+	DV_D12_MARK,	MSIOF0_RXD_MARK,
+	DV_D11_MARK,	MSIOF0_TXD_MARK,
+	DV_D10_MARK,	MSIOF0_TSYNC_MARK,
+	DV_D9_MARK,	MSIOF0_SS1_MARK,	MSIOF0_RSCK_MARK,
+	DV_D8_MARK,	MSIOF0_SS2_MARK,	MSIOF0_RSYNC_MARK,
+	LCDVCPWC_MARK,	SCIF0_RXD_MARK,
+	LCDRD_MARK,	SCIF0_SCK_MARK,
+
+	/*PTN*/
+	VIO0_D1_MARK,
+	VIO0_D0_MARK,
+	DV_CLKI_MARK,
+	DV_CLK_MARK,	SCIF2_V_SCK_MARK,
+	DV_VSYNC_MARK,	SCIF2_V_RXD_MARK,
+	DV_HSYNC_MARK,	SCIF2_V_TXD_MARK,
+	DV_D7_MARK,	SCIF3_V_CTS_MARK,	RMII_RX_ER_MARK,
+	DV_D6_MARK,	SCIF3_V_RTS_MARK,	RMII_CRS_DV_MARK,
+
+	/*PTQ*/
+	D7_MARK,
+	D6_MARK,
+	D5_MARK,
+	D4_MARK,
+	D3_MARK,
+	D2_MARK,
+	D1_MARK,
+	D0_MARK,
+
+	/*PTR*/
+	CS6B_CE1B_MARK,
+	CS6A_CE2B_MARK,
+	CS5B_CE1A_MARK,
+	CS5A_CE2A_MARK,
+	IOIS16_MARK,		LCDLCLK_MARK,
+	WAIT_MARK,
+	WE3_ICIOWR_MARK,	TPUTO3_MARK,	TPUTI3_MARK,
+	WE2_ICIORD_MARK,	TPUTO2_MARK,	IDEA2_MARK,
+
+	/*PTS*/
+	VIO_CKO_MARK,
+	VIO1_FLD_MARK,	TPUTI2_MARK,		IDEIORDY_MARK,
+	VIO1_HD_MARK,	SCIF5_SCK_MARK,
+	VIO1_VD_MARK,	SCIF5_RXD_MARK,
+	VIO1_CLK_MARK,	SCIF5_TXD_MARK,
+	VIO1_D7_MARK,	VIO0_D15_MARK,		IDED7_MARK,
+	VIO1_D6_MARK,	VIO0_D14_MARK,		IDED6_MARK,
+
+	/*PTT*/
+	D15_MARK,
+	D14_MARK,
+	D13_MARK,
+	D12_MARK,
+	D11_MARK,
+	D10_MARK,
+	D9_MARK,
+	D8_MARK,
+
+	/*PTU*/
+	DMAC_DACK0_MARK,
+	DMAC_DREQ0_MARK,
+	FSIOASD_MARK,
+	FSIIABCK_MARK,
+	FSIIALRCK_MARK,
+	FSIOABCK_MARK,
+	FSIOALRCK_MARK,
+	CLKAUDIOAO_MARK,
+
+	/*PTV*/
+	FSIIBSD_MARK,		MSIOF1_SS2_MARK,	MSIOF1_RSYNC_MARK,
+	FSIOBSD_MARK,		MSIOF1_SS1_MARK,	MSIOF1_RSCK_MARK,
+	FSIIBBCK_MARK,		MSIOF1_RXD_MARK,
+	FSIIBLRCK_MARK,		MSIOF1_TSYNC_MARK,
+	FSIOBBCK_MARK,		MSIOF1_TSCK_MARK,
+	FSIOBLRCK_MARK,		MSIOF1_TXD_MARK,
+	CLKAUDIOBO_MARK,	MSIOF1_MCK_MARK,
+	FSIIASD_MARK,
+
+	/*PTW*/
+	MMC_D7_MARK,		SDHI1CD_MARK,		IODACK_MARK,
+	MMC_D6_MARK,		SDHI1WP_MARK,		IDERST_MARK,
+	MMC_D5_MARK,		SDHI1D3_MARK,		EXBUF_ENB_MARK,
+	MMC_D4_MARK,		SDHI1D2_MARK,		DIRECTION_MARK,
+	MMC_D3_MARK,		SDHI1D1_MARK,
+	MMC_D2_MARK,		SDHI1D0_MARK,
+	MMC_D1_MARK,		SDHI1CMD_MARK,
+	MMC_D0_MARK,		SDHI1CLK_MARK,
+
+	/*PTX*/
+	DMAC_DACK1_MARK,	IRDA_OUT_MARK,
+	DMAC_DREQ1_MARK,	IRDA_IN_MARK,
+	TSIF_TS0_SDAT_MARK,				LNKSTA_MARK,
+	TSIF_TS0_SCK_MARK,				MDIO_MARK,
+	TSIF_TS0_SDEN_MARK,				MDC_MARK,
+	TSIF_TS0_SPSYNC_MARK,
+	MMC_CLK_MARK,
+	MMC_CMD_MARK,
+
+	/*PTY*/
+	SDHI0CD_MARK,
+	SDHI0WP_MARK,
+	SDHI0D3_MARK,
+	SDHI0D2_MARK,
+	SDHI0D1_MARK,
+	SDHI0D0_MARK,
+	SDHI0CMD_MARK,
+	SDHI0CLK_MARK,
+
+	/*PTZ*/
+	INTC_IRQ7_MARK,		SCIF3_I_CTS_MARK,
+	INTC_IRQ6_MARK,		SCIF3_I_RTS_MARK,
+	INTC_IRQ5_MARK,		SCIF3_I_SCK_MARK,
+	INTC_IRQ4_MARK,		SCIF3_I_RXD_MARK,
+	INTC_IRQ3_MARK,		SCIF3_I_TXD_MARK,
+	INTC_IRQ2_MARK,
+	INTC_IRQ1_MARK,
+	INTC_IRQ0_MARK,
+	PINMUX_MARK_END,
+};
+
+static pinmux_enum_t pinmux_data[] = {
+	/* PTA GPIO */
+	PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT, PTA7_IN_PU),
+	PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT, PTA6_IN_PU),
+	PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT, PTA5_IN_PU),
+	PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU),
+	PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU),
+	PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU),
+	PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU),
+	PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU),
+
+	/* PTB GPIO */
+	PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT, PTB7_IN_PU),
+	PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT, PTB6_IN_PU),
+	PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT, PTB5_IN_PU),
+	PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT, PTB4_IN_PU),
+	PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT, PTB3_IN_PU),
+	PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU),
+	PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU),
+	PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT, PTB0_IN_PU),
+
+	/* PTC GPIO */
+	PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT, PTC7_IN_PU),
+	PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT, PTC6_IN_PU),
+	PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT, PTC5_IN_PU),
+	PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT, PTC4_IN_PU),
+	PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT, PTC3_IN_PU),
+	PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT, PTC2_IN_PU),
+	PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT, PTC1_IN_PU),
+	PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT, PTC0_IN_PU),
+
+	/* PTD GPIO */
+	PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT, PTD7_IN_PU),
+	PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT, PTD6_IN_PU),
+	PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT, PTD5_IN_PU),
+	PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT, PTD4_IN_PU),
+	PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT, PTD3_IN_PU),
+	PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT, PTD2_IN_PU),
+	PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT, PTD1_IN_PU),
+	PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT, PTD0_IN_PU),
+
+	/* PTE GPIO */
+	PINMUX_DATA(PTE7_DATA, PTE7_IN, PTE7_OUT, PTE7_IN_PU),
+	PINMUX_DATA(PTE6_DATA, PTE6_IN, PTE6_OUT, PTE6_IN_PU),
+	PINMUX_DATA(PTE5_DATA, PTE5_IN, PTE5_OUT, PTE5_IN_PU),
+	PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT, PTE4_IN_PU),
+	PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT, PTE3_IN_PU),
+	PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT, PTE2_IN_PU),
+	PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT, PTE1_IN_PU),
+	PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT, PTE0_IN_PU),
+
+	/* PTF GPIO */
+	PINMUX_DATA(PTF7_DATA, PTF7_IN, PTF7_OUT, PTF7_IN_PU),
+	PINMUX_DATA(PTF6_DATA, PTF6_IN, PTF6_OUT, PTF6_IN_PU),
+	PINMUX_DATA(PTF5_DATA, PTF5_IN, PTF5_OUT, PTF5_IN_PU),
+	PINMUX_DATA(PTF4_DATA, PTF4_IN, PTF4_OUT, PTF4_IN_PU),
+	PINMUX_DATA(PTF3_DATA, PTF3_IN, PTF3_OUT, PTF3_IN_PU),
+	PINMUX_DATA(PTF2_DATA, PTF2_IN, PTF2_OUT, PTF2_IN_PU),
+	PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_OUT, PTF1_IN_PU),
+	PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT, PTF0_IN_PU),
+
+	/* PTG GPIO */
+	PINMUX_DATA(PTG5_DATA, PTG5_OUT),
+	PINMUX_DATA(PTG4_DATA, PTG4_OUT),
+	PINMUX_DATA(PTG3_DATA, PTG3_OUT),
+	PINMUX_DATA(PTG2_DATA, PTG2_OUT),
+	PINMUX_DATA(PTG1_DATA, PTG1_OUT),
+	PINMUX_DATA(PTG0_DATA, PTG0_OUT),
+
+	/* PTH GPIO */
+	PINMUX_DATA(PTH7_DATA, PTH7_IN, PTH7_OUT, PTH7_IN_PU),
+	PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT, PTH6_IN_PU),
+	PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT, PTH5_IN_PU),
+	PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT, PTH4_IN_PU),
+	PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT, PTH3_IN_PU),
+	PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT, PTH2_IN_PU),
+	PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT, PTH1_IN_PU),
+	PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT, PTH0_IN_PU),
+
+	/* PTJ GPIO */
+	PINMUX_DATA(PTJ7_DATA, PTJ7_OUT),
+	PINMUX_DATA(PTJ6_DATA, PTJ6_OUT),
+	PINMUX_DATA(PTJ5_DATA, PTJ5_OUT),
+	PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT, PTJ3_IN_PU),
+	PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT, PTJ2_IN_PU),
+	PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT, PTJ1_IN_PU),
+	PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT, PTJ0_IN_PU),
+
+	/* PTK GPIO */
+	PINMUX_DATA(PTK7_DATA, PTK7_IN, PTK7_OUT, PTK7_IN_PU),
+	PINMUX_DATA(PTK6_DATA, PTK6_IN, PTK6_OUT, PTK6_IN_PU),
+	PINMUX_DATA(PTK5_DATA, PTK5_IN, PTK5_OUT, PTK5_IN_PU),
+	PINMUX_DATA(PTK4_DATA, PTK4_IN, PTK4_OUT, PTK4_IN_PU),
+	PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT, PTK3_IN_PU),
+	PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT, PTK2_IN_PU),
+	PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT, PTK1_IN_PU),
+	PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT, PTK0_IN_PU),
+
+	/* PTL GPIO */
+	PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT, PTL7_IN_PU),
+	PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT, PTL6_IN_PU),
+	PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT, PTL5_IN_PU),
+	PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT, PTL4_IN_PU),
+	PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT, PTL3_IN_PU),
+	PINMUX_DATA(PTL2_DATA, PTL2_IN, PTL2_OUT, PTL2_IN_PU),
+	PINMUX_DATA(PTL1_DATA, PTL1_IN, PTL1_OUT, PTL1_IN_PU),
+	PINMUX_DATA(PTL0_DATA, PTL0_IN, PTL0_OUT, PTL0_IN_PU),
+
+	/* PTM GPIO */
+	PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT, PTM7_IN_PU),
+	PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT, PTM6_IN_PU),
+	PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT, PTM5_IN_PU),
+	PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT, PTM4_IN_PU),
+	PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT, PTM3_IN_PU),
+	PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT, PTM2_IN_PU),
+	PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT, PTM1_IN_PU),
+	PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT, PTM0_IN_PU),
+
+	/* PTN GPIO */
+	PINMUX_DATA(PTN7_DATA, PTN7_IN, PTN7_OUT, PTN7_IN_PU),
+	PINMUX_DATA(PTN6_DATA, PTN6_IN, PTN6_OUT, PTN6_IN_PU),
+	PINMUX_DATA(PTN5_DATA, PTN5_IN, PTN5_OUT, PTN5_IN_PU),
+	PINMUX_DATA(PTN4_DATA, PTN4_IN, PTN4_OUT, PTN4_IN_PU),
+	PINMUX_DATA(PTN3_DATA, PTN3_IN, PTN3_OUT, PTN3_IN_PU),
+	PINMUX_DATA(PTN2_DATA, PTN2_IN, PTN2_OUT, PTN2_IN_PU),
+	PINMUX_DATA(PTN1_DATA, PTN1_IN, PTN1_OUT, PTN1_IN_PU),
+	PINMUX_DATA(PTN0_DATA, PTN0_IN, PTN0_OUT, PTN0_IN_PU),
+
+	/* PTQ GPIO */
+	PINMUX_DATA(PTQ7_DATA, PTQ7_IN, PTQ7_OUT, PTQ7_IN_PU),
+	PINMUX_DATA(PTQ6_DATA, PTQ6_IN, PTQ6_OUT, PTQ6_IN_PU),
+	PINMUX_DATA(PTQ5_DATA, PTQ5_IN, PTQ5_OUT, PTQ5_IN_PU),
+	PINMUX_DATA(PTQ4_DATA, PTQ4_IN, PTQ4_OUT, PTQ4_IN_PU),
+	PINMUX_DATA(PTQ3_DATA, PTQ3_IN, PTQ3_OUT, PTQ3_IN_PU),
+	PINMUX_DATA(PTQ2_DATA, PTQ2_IN, PTQ2_OUT, PTQ2_IN_PU),
+	PINMUX_DATA(PTQ1_DATA, PTQ1_IN, PTQ1_OUT, PTQ1_IN_PU),
+	PINMUX_DATA(PTQ0_DATA, PTQ0_IN, PTQ0_OUT, PTQ0_IN_PU),
+
+	/* PTR GPIO */
+	PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT, PTR7_IN_PU),
+	PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT, PTR6_IN_PU),
+	PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT, PTR5_IN_PU),
+	PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT, PTR4_IN_PU),
+	PINMUX_DATA(PTR3_DATA, PTR3_IN,           PTR3_IN_PU),
+	PINMUX_DATA(PTR2_DATA, PTR2_IN,           PTR2_IN_PU),
+	PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT, PTR1_IN_PU),
+	PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT, PTR0_IN_PU),
+
+	/* PTS GPIO */
+	PINMUX_DATA(PTS6_DATA, PTS6_IN, PTS6_OUT, PTS6_IN_PU),
+	PINMUX_DATA(PTS5_DATA, PTS5_IN, PTS5_OUT, PTS5_IN_PU),
+	PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT, PTS4_IN_PU),
+	PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT, PTS3_IN_PU),
+	PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT, PTS2_IN_PU),
+	PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT, PTS1_IN_PU),
+	PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT, PTS0_IN_PU),
+
+	/* PTT GPIO */
+	PINMUX_DATA(PTT7_DATA, PTT7_IN, PTT7_OUT, PTT7_IN_PU),
+	PINMUX_DATA(PTT6_DATA, PTT6_IN, PTT6_OUT, PTT6_IN_PU),
+	PINMUX_DATA(PTT5_DATA, PTT5_IN, PTT5_OUT, PTT5_IN_PU),
+	PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT, PTT4_IN_PU),
+	PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT, PTT3_IN_PU),
+	PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT, PTT2_IN_PU),
+	PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT, PTT1_IN_PU),
+	PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT, PTT0_IN_PU),
+
+	/* PTU GPIO */
+	PINMUX_DATA(PTU7_DATA, PTU7_IN, PTU7_OUT, PTU7_IN_PU),
+	PINMUX_DATA(PTU6_DATA, PTU6_IN, PTU6_OUT, PTU6_IN_PU),
+	PINMUX_DATA(PTU5_DATA, PTU5_IN, PTU5_OUT, PTU5_IN_PU),
+	PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT, PTU4_IN_PU),
+	PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT, PTU3_IN_PU),
+	PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT, PTU2_IN_PU),
+	PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT, PTU1_IN_PU),
+	PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT, PTU0_IN_PU),
+
+	/* PTV GPIO */
+	PINMUX_DATA(PTV7_DATA, PTV7_IN, PTV7_OUT, PTV7_IN_PU),
+	PINMUX_DATA(PTV6_DATA, PTV6_IN, PTV6_OUT, PTV6_IN_PU),
+	PINMUX_DATA(PTV5_DATA, PTV5_IN, PTV5_OUT, PTV5_IN_PU),
+	PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT, PTV4_IN_PU),
+	PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT, PTV3_IN_PU),
+	PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT, PTV2_IN_PU),
+	PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT, PTV1_IN_PU),
+	PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT, PTV0_IN_PU),
+
+	/* PTW GPIO */
+	PINMUX_DATA(PTW7_DATA, PTW7_IN, PTW7_OUT, PTW7_IN_PU),
+	PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_OUT, PTW6_IN_PU),
+	PINMUX_DATA(PTW5_DATA, PTW5_IN, PTW5_OUT, PTW5_IN_PU),
+	PINMUX_DATA(PTW4_DATA, PTW4_IN, PTW4_OUT, PTW4_IN_PU),
+	PINMUX_DATA(PTW3_DATA, PTW3_IN, PTW3_OUT, PTW3_IN_PU),
+	PINMUX_DATA(PTW2_DATA, PTW2_IN, PTW2_OUT, PTW2_IN_PU),
+	PINMUX_DATA(PTW1_DATA, PTW1_IN, PTW1_OUT, PTW1_IN_PU),
+	PINMUX_DATA(PTW0_DATA, PTW0_IN, PTW0_OUT, PTW0_IN_PU),
+
+	/* PTX GPIO */
+	PINMUX_DATA(PTX7_DATA, PTX7_IN, PTX7_OUT, PTX7_IN_PU),
+	PINMUX_DATA(PTX6_DATA, PTX6_IN, PTX6_OUT, PTX6_IN_PU),
+	PINMUX_DATA(PTX5_DATA, PTX5_IN, PTX5_OUT, PTX5_IN_PU),
+	PINMUX_DATA(PTX4_DATA, PTX4_IN, PTX4_OUT, PTX4_IN_PU),
+	PINMUX_DATA(PTX3_DATA, PTX3_IN, PTX3_OUT, PTX3_IN_PU),
+	PINMUX_DATA(PTX2_DATA, PTX2_IN, PTX2_OUT, PTX2_IN_PU),
+	PINMUX_DATA(PTX1_DATA, PTX1_IN, PTX1_OUT, PTX1_IN_PU),
+	PINMUX_DATA(PTX0_DATA, PTX0_IN, PTX0_OUT, PTX0_IN_PU),
+
+	/* PTY GPIO */
+	PINMUX_DATA(PTY7_DATA, PTY7_IN, PTY7_OUT, PTY7_IN_PU),
+	PINMUX_DATA(PTY6_DATA, PTY6_IN, PTY6_OUT, PTY6_IN_PU),
+	PINMUX_DATA(PTY5_DATA, PTY5_IN, PTY5_OUT, PTY5_IN_PU),
+	PINMUX_DATA(PTY4_DATA, PTY4_IN, PTY4_OUT, PTY4_IN_PU),
+	PINMUX_DATA(PTY3_DATA, PTY3_IN, PTY3_OUT, PTY3_IN_PU),
+	PINMUX_DATA(PTY2_DATA, PTY2_IN, PTY2_OUT, PTY2_IN_PU),
+	PINMUX_DATA(PTY1_DATA, PTY1_IN, PTY1_OUT, PTY1_IN_PU),
+	PINMUX_DATA(PTY0_DATA, PTY0_IN, PTY0_OUT, PTY0_IN_PU),
+
+	/* PTZ GPIO */
+	PINMUX_DATA(PTZ7_DATA, PTZ7_IN, PTZ7_OUT, PTZ7_IN_PU),
+	PINMUX_DATA(PTZ6_DATA, PTZ6_IN, PTZ6_OUT, PTZ6_IN_PU),
+	PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_OUT, PTZ5_IN_PU),
+	PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_OUT, PTZ4_IN_PU),
+	PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_OUT, PTZ3_IN_PU),
+	PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_OUT, PTZ2_IN_PU),
+	PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_OUT, PTZ1_IN_PU),
+	PINMUX_DATA(PTZ0_DATA, PTZ0_IN, PTZ0_OUT, PTZ0_IN_PU),
+
+	/* PTA FN */
+	PINMUX_DATA(D23_MARK,	PSA15_0, PSA14_0, PTA7_FN),
+	PINMUX_DATA(D22_MARK,	PSA15_0, PSA14_0, PTA6_FN),
+	PINMUX_DATA(D21_MARK,	PSA15_0, PSA14_0, PTA5_FN),
+	PINMUX_DATA(D20_MARK,	PSA15_0, PSA14_0, PTA4_FN),
+	PINMUX_DATA(D19_MARK,	PSA15_0, PSA14_0, PTA3_FN),
+	PINMUX_DATA(D18_MARK,	PSA15_0, PSA14_0, PTA2_FN),
+	PINMUX_DATA(D17_MARK,	PSA15_0, PSA14_0, PTA1_FN),
+	PINMUX_DATA(D16_MARK,	PSA15_0, PSA14_0, PTA0_FN),
+
+	PINMUX_DATA(KEYOUT2_MARK,	PSA15_0, PSA14_1, PTA7_FN),
+	PINMUX_DATA(KEYOUT1_MARK,	PSA15_0, PSA14_1, PTA6_FN),
+	PINMUX_DATA(KEYOUT0_MARK,	PSA15_0, PSA14_1, PTA5_FN),
+	PINMUX_DATA(KEYIN4_MARK,	PSA15_0, PSA14_1, PTA4_FN),
+	PINMUX_DATA(KEYIN3_MARK,	PSA15_0, PSA14_1, PTA3_FN),
+	PINMUX_DATA(KEYIN2_MARK,	PSA15_0, PSA14_1, PTA2_FN),
+	PINMUX_DATA(KEYIN1_MARK,	PSA15_0, PSA14_1, PTA1_FN),
+	PINMUX_DATA(KEYIN0_MARK,	PSA15_0, PSA14_1, PTA0_FN),
+
+	PINMUX_DATA(IDED15_MARK,	PSA15_1, PSA14_0, PTA7_FN),
+	PINMUX_DATA(IDED14_MARK,	PSA15_1, PSA14_0, PTA6_FN),
+	PINMUX_DATA(IDED13_MARK,	PSA15_1, PSA14_0, PTA5_FN),
+	PINMUX_DATA(IDED12_MARK,	PSA15_1, PSA14_0, PTA4_FN),
+	PINMUX_DATA(IDED11_MARK,	PSA15_1, PSA14_0, PTA3_FN),
+	PINMUX_DATA(IDED10_MARK,	PSA15_1, PSA14_0, PTA2_FN),
+	PINMUX_DATA(IDED9_MARK,		PSA15_1, PSA14_0, PTA1_FN),
+	PINMUX_DATA(IDED8_MARK,		PSA15_1, PSA14_0, PTA0_FN),
+
+	/* PTB FN */
+	PINMUX_DATA(D31_MARK,		PSE15_0, PSE14_0, PTB7_FN),
+	PINMUX_DATA(D30_MARK,		PSE15_0, PSE14_0, PTB6_FN),
+	PINMUX_DATA(D29_MARK,		PSE11_0,          PTB5_FN),
+	PINMUX_DATA(D28_MARK,		PSE11_0,          PTB4_FN),
+	PINMUX_DATA(D27_MARK,		PSE11_0,          PTB3_FN),
+	PINMUX_DATA(D26_MARK,		PSA15_0, PSA14_0, PTB2_FN),
+	PINMUX_DATA(D25_MARK,		PSA15_0, PSA14_0, PTB1_FN),
+	PINMUX_DATA(D24_MARK,		PSA15_0, PSA14_0, PTB0_FN),
+
+	PINMUX_DATA(IDEA1_MARK,		PSE15_1, PSE14_0, PTB7_FN),
+	PINMUX_DATA(IDEA0_MARK,		PSE15_1, PSE14_0, PTB6_FN),
+	PINMUX_DATA(IODREQ_MARK,	PSE11_1,          PTB5_FN),
+	PINMUX_DATA(IDECS0_MARK,	PSE11_1,          PTB4_FN),
+	PINMUX_DATA(IDECS1_MARK,	PSE11_1,          PTB3_FN),
+	PINMUX_DATA(IDEIORD_MARK,	PSA15_1, PSA14_0, PTB2_FN),
+	PINMUX_DATA(IDEIOWR_MARK,	PSA15_1, PSA14_0, PTB1_FN),
+	PINMUX_DATA(IDEINT_MARK,	PSA15_1, PSA14_0, PTB0_FN),
+
+	PINMUX_DATA(TPUTO1_MARK,	PSE15_0, PSE14_1, PTB7_FN),
+	PINMUX_DATA(TPUTO0_MARK,	PSE15_0, PSE14_1, PTB6_FN),
+
+	PINMUX_DATA(KEYOUT5_IN5_MARK,	PSA15_0, PSA14_1, PTB2_FN),
+	PINMUX_DATA(KEYOUT4_IN6_MARK,	PSA15_0, PSA14_1, PTB1_FN),
+	PINMUX_DATA(KEYOUT3_MARK,	PSA15_0, PSA14_1, PTB0_FN),
+
+	/* PTC FN */
+	PINMUX_DATA(LCDD7_MARK, PSD5_0, PTC7_FN),
+	PINMUX_DATA(LCDD6_MARK, PSD5_0, PTC6_FN),
+	PINMUX_DATA(LCDD5_MARK, PSD5_0, PTC5_FN),
+	PINMUX_DATA(LCDD4_MARK, PSD5_0, PTC4_FN),
+	PINMUX_DATA(LCDD3_MARK, PSD5_0, PTC3_FN),
+	PINMUX_DATA(LCDD2_MARK, PSD5_0, PTC2_FN),
+	PINMUX_DATA(LCDD1_MARK, PSD5_0, PTC1_FN),
+	PINMUX_DATA(LCDD0_MARK, PSD5_0, PTC0_FN),
+
+	/* PTD FN */
+	PINMUX_DATA(LCDD15_MARK, PSD5_0, PTD7_FN),
+	PINMUX_DATA(LCDD14_MARK, PSD5_0, PTD6_FN),
+	PINMUX_DATA(LCDD13_MARK, PSD5_0, PTD5_FN),
+	PINMUX_DATA(LCDD12_MARK, PSD5_0, PTD4_FN),
+	PINMUX_DATA(LCDD11_MARK, PSD5_0, PTD3_FN),
+	PINMUX_DATA(LCDD10_MARK, PSD5_0, PTD2_FN),
+	PINMUX_DATA(LCDD9_MARK,  PSD5_0, PTD1_FN),
+	PINMUX_DATA(LCDD8_MARK,  PSD5_0, PTD0_FN),
+
+	/* PTE FN */
+	PINMUX_DATA(FSIMCKB_MARK, PTE7_FN),
+	PINMUX_DATA(FSIMCKA_MARK, PTE6_FN),
+
+	PINMUX_DATA(LCDD21_MARK,	PSC5_0, PSC4_0, PTE5_FN),
+	PINMUX_DATA(LCDD20_MARK,	PSD3_0, PSD2_0, PTE4_FN),
+	PINMUX_DATA(LCDD19_MARK,	PSA3_0, PSA2_0, PTE3_FN),
+	PINMUX_DATA(LCDD18_MARK,	PSA3_0, PSA2_0, PTE2_FN),
+	PINMUX_DATA(LCDD17_MARK,	PSD5_0,         PTE1_FN),
+	PINMUX_DATA(LCDD16_MARK,	PSD5_0,         PTE0_FN),
+
+	PINMUX_DATA(SCIF2_L_TXD_MARK,	PSC5_0, PSC4_1, PTE5_FN),
+	PINMUX_DATA(SCIF4_SCK_MARK,	PSD3_0, PSD2_1, PTE4_FN),
+	PINMUX_DATA(SCIF4_RXD_MARK,	PSA3_0, PSA2_1, PTE3_FN),
+	PINMUX_DATA(SCIF4_TXD_MARK,	PSA3_0, PSA2_1, PTE2_FN),
+
+	/* PTF FN */
+	PINMUX_DATA(LCDVSYN_MARK,	PSD8_0,          PTF7_FN),
+	PINMUX_DATA(LCDDISP_MARK,	PSD10_0, PSD9_0, PTF6_FN),
+	PINMUX_DATA(LCDHSYN_MARK,	PSD10_0, PSD9_0, PTF5_FN),
+	PINMUX_DATA(LCDDON_MARK,	PSD8_0,          PTF4_FN),
+	PINMUX_DATA(LCDDCK_MARK,	PSD10_0, PSD9_0, PTF3_FN),
+	PINMUX_DATA(LCDVEPWC_MARK,	PSA6_0,          PTF2_FN),
+	PINMUX_DATA(LCDD23_MARK,	PSC7_0,  PSC6_0, PTF1_FN),
+	PINMUX_DATA(LCDD22_MARK,	PSC5_0,  PSC4_0, PTF0_FN),
+
+	PINMUX_DATA(LCDRS_MARK,		PSD10_0, PSD9_1, PTF6_FN),
+	PINMUX_DATA(LCDCS_MARK,		PSD10_0, PSD9_1, PTF5_FN),
+	PINMUX_DATA(LCDWR_MARK,		PSD10_0, PSD9_1, PTF3_FN),
+
+	PINMUX_DATA(SCIF0_TXD_MARK,	PSA6_1,          PTF2_FN),
+	PINMUX_DATA(SCIF2_L_SCK_MARK,	PSC7_0,  PSC6_1, PTF1_FN),
+	PINMUX_DATA(SCIF2_L_RXD_MARK,	PSC5_0,  PSC4_1, PTF0_FN),
+
+	/* PTG FN */
+	PINMUX_DATA(AUDCK_MARK,   PTG5_FN),
+	PINMUX_DATA(AUDSYNC_MARK, PTG4_FN),
+	PINMUX_DATA(AUDATA3_MARK, PTG3_FN),
+	PINMUX_DATA(AUDATA2_MARK, PTG2_FN),
+	PINMUX_DATA(AUDATA1_MARK, PTG1_FN),
+	PINMUX_DATA(AUDATA0_MARK, PTG0_FN),
+
+	/* PTH FN */
+	PINMUX_DATA(VIO0_VD_MARK,  PTH7_FN),
+	PINMUX_DATA(VIO0_CLK_MARK, PTH6_FN),
+	PINMUX_DATA(VIO0_D7_MARK,  PTH5_FN),
+	PINMUX_DATA(VIO0_D6_MARK,  PTH4_FN),
+	PINMUX_DATA(VIO0_D5_MARK,  PTH3_FN),
+	PINMUX_DATA(VIO0_D4_MARK,  PTH2_FN),
+	PINMUX_DATA(VIO0_D3_MARK,  PTH1_FN),
+	PINMUX_DATA(VIO0_D2_MARK,  PTH0_FN),
+
+	/* PTJ FN */
+	PINMUX_DATA(PDSTATUS_MARK,	PTJ7_FN),
+	PINMUX_DATA(STATUS2_MARK,	PTJ6_FN),
+	PINMUX_DATA(STATUS0_MARK,	PTJ5_FN),
+	PINMUX_DATA(A25_MARK,		PSA8_0, PTJ3_FN),
+	PINMUX_DATA(BS_MARK,		PSA8_1, PTJ3_FN),
+	PINMUX_DATA(A24_MARK,		PTJ2_FN),
+	PINMUX_DATA(A23_MARK,		PTJ1_FN),
+	PINMUX_DATA(A22_MARK,		PTJ0_FN),
+
+	/* PTK FN */
+	PINMUX_DATA(VIO1_D5_MARK,	PSB7_0, PSB6_0, PTK7_FN),
+	PINMUX_DATA(VIO1_D4_MARK,	PSB7_0, PSB6_0, PTK6_FN),
+	PINMUX_DATA(VIO1_D3_MARK,	PSB7_0, PSB6_0, PTK5_FN),
+	PINMUX_DATA(VIO1_D2_MARK,	PSB7_0, PSB6_0, PTK4_FN),
+	PINMUX_DATA(VIO1_D1_MARK,	PSB7_0, PSB6_0, PTK3_FN),
+	PINMUX_DATA(VIO1_D0_MARK,	PSB7_0, PSB6_0, PTK2_FN),
+
+	PINMUX_DATA(VIO0_D13_MARK,	PSB7_0, PSB6_1, PTK7_FN),
+	PINMUX_DATA(VIO0_D12_MARK,	PSB7_0, PSB6_1, PTK6_FN),
+	PINMUX_DATA(VIO0_D11_MARK,	PSB7_0, PSB6_1, PTK5_FN),
+	PINMUX_DATA(VIO0_D10_MARK,	PSB7_0, PSB6_1, PTK4_FN),
+	PINMUX_DATA(VIO0_D9_MARK,	PSB7_0, PSB6_1, PTK3_FN),
+	PINMUX_DATA(VIO0_D8_MARK,	PSB7_0, PSB6_1, PTK2_FN),
+
+	PINMUX_DATA(IDED5_MARK,		PSB7_1, PSB6_0, PTK7_FN),
+	PINMUX_DATA(IDED4_MARK,		PSB7_1, PSB6_0, PTK6_FN),
+	PINMUX_DATA(IDED3_MARK,		PSB7_1, PSB6_0, PTK5_FN),
+	PINMUX_DATA(IDED2_MARK,		PSB7_1, PSB6_0, PTK4_FN),
+	PINMUX_DATA(IDED1_MARK,		PSB7_1, PSB6_0, PTK3_FN),
+	PINMUX_DATA(IDED0_MARK,		PSB7_1, PSB6_0, PTK2_FN),
+
+	PINMUX_DATA(VIO0_FLD_MARK,	PTK1_FN),
+	PINMUX_DATA(VIO0_HD_MARK,	PTK0_FN),
+
+	/* PTL FN */
+	PINMUX_DATA(DV_D5_MARK,		PSB9_0, PSB8_0, PTL7_FN),
+	PINMUX_DATA(DV_D4_MARK,		PSB9_0, PSB8_0, PTL6_FN),
+	PINMUX_DATA(DV_D3_MARK,		PSE7_0, PSE6_0, PTL5_FN),
+	PINMUX_DATA(DV_D2_MARK,		PSC9_0, PSC8_0, PTL4_FN),
+	PINMUX_DATA(DV_D1_MARK,		PSC9_0, PSC8_0, PTL3_FN),
+	PINMUX_DATA(DV_D0_MARK,		PSC9_0, PSC8_0, PTL2_FN),
+	PINMUX_DATA(DV_D15_MARK,	PSD4_0,         PTL1_FN),
+	PINMUX_DATA(DV_D14_MARK,	PSE5_0, PSE4_0, PTL0_FN),
+
+	PINMUX_DATA(SCIF3_V_SCK_MARK,	PSB9_0, PSB8_1, PTL7_FN),
+	PINMUX_DATA(SCIF3_V_RXD_MARK,	PSB9_0, PSB8_1, PTL6_FN),
+	PINMUX_DATA(SCIF3_V_TXD_MARK,	PSE7_0, PSE6_1, PTL5_FN),
+	PINMUX_DATA(SCIF1_SCK_MARK,	PSC9_0, PSC8_1, PTL4_FN),
+	PINMUX_DATA(SCIF1_RXD_MARK,	PSC9_0, PSC8_1, PTL3_FN),
+	PINMUX_DATA(SCIF1_TXD_MARK,	PSC9_0, PSC8_1, PTL2_FN),
+
+	PINMUX_DATA(RMII_RXD0_MARK,	PSB9_1, PSB8_0, PTL7_FN),
+	PINMUX_DATA(RMII_RXD1_MARK,	PSB9_1, PSB8_0, PTL6_FN),
+	PINMUX_DATA(RMII_REF_CLK_MARK,	PSE7_1, PSE6_0, PTL5_FN),
+	PINMUX_DATA(RMII_TX_EN_MARK,	PSC9_1, PSC8_0, PTL4_FN),
+	PINMUX_DATA(RMII_TXD0_MARK,	PSC9_1, PSC8_0, PTL3_FN),
+	PINMUX_DATA(RMII_TXD1_MARK,	PSC9_1, PSC8_0, PTL2_FN),
+
+	PINMUX_DATA(MSIOF0_MCK_MARK,	PSE5_0, PSE4_1, PTL0_FN),
+
+	/* PTM FN */
+	PINMUX_DATA(DV_D13_MARK,	PSC13_0, PSC12_0, PTM7_FN),
+	PINMUX_DATA(DV_D12_MARK,	PSC13_0, PSC12_0, PTM6_FN),
+	PINMUX_DATA(DV_D11_MARK,	PSC13_0, PSC12_0, PTM5_FN),
+	PINMUX_DATA(DV_D10_MARK,	PSC13_0, PSC12_0, PTM4_FN),
+	PINMUX_DATA(DV_D9_MARK,		PSC11_0, PSC10_0, PTM3_FN),
+	PINMUX_DATA(DV_D8_MARK,		PSC11_0, PSC10_0, PTM2_FN),
+
+	PINMUX_DATA(MSIOF0_TSCK_MARK,	PSC13_0, PSC12_1, PTM7_FN),
+	PINMUX_DATA(MSIOF0_RXD_MARK,	PSC13_0, PSC12_1, PTM6_FN),
+	PINMUX_DATA(MSIOF0_TXD_MARK,	PSC13_0, PSC12_1, PTM5_FN),
+	PINMUX_DATA(MSIOF0_TSYNC_MARK,	PSC13_0, PSC12_1, PTM4_FN),
+	PINMUX_DATA(MSIOF0_SS1_MARK,	PSC11_0, PSC10_1, PTM3_FN),
+	PINMUX_DATA(MSIOF0_RSCK_MARK,	PSC11_1, PSC10_0, PTM3_FN),
+	PINMUX_DATA(MSIOF0_SS2_MARK,	PSC11_0, PSC10_1, PTM2_FN),
+	PINMUX_DATA(MSIOF0_RSYNC_MARK,	PSC11_1, PSC10_0, PTM2_FN),
+
+	PINMUX_DATA(LCDVCPWC_MARK,	PSA6_0, PTM1_FN),
+	PINMUX_DATA(LCDRD_MARK,		PSA7_0, PTM0_FN),
+
+	PINMUX_DATA(SCIF0_RXD_MARK,	PSA6_1, PTM1_FN),
+	PINMUX_DATA(SCIF0_SCK_MARK,	PSA7_1, PTM0_FN),
+
+	/* PTN FN */
+	PINMUX_DATA(VIO0_D1_MARK,	PTN7_FN),
+	PINMUX_DATA(VIO0_D0_MARK,	PTN6_FN),
+
+	PINMUX_DATA(DV_CLKI_MARK,	PSD11_0,          PTN5_FN),
+	PINMUX_DATA(DV_CLK_MARK,	PSD13_0, PSD12_0, PTN4_FN),
+	PINMUX_DATA(DV_VSYNC_MARK,	PSD15_0, PSD14_0, PTN3_FN),
+	PINMUX_DATA(DV_HSYNC_MARK,	PSB5_0,  PSB4_0,  PTN2_FN),
+	PINMUX_DATA(DV_D7_MARK,		PSB3_0,  PSB2_0,  PTN1_FN),
+	PINMUX_DATA(DV_D6_MARK,		PSB1_0,  PSB0_0,  PTN0_FN),
+
+	PINMUX_DATA(SCIF2_V_SCK_MARK,	PSD13_0, PSD12_1, PTN4_FN),
+	PINMUX_DATA(SCIF2_V_RXD_MARK,	PSD15_0, PSD14_1, PTN3_FN),
+	PINMUX_DATA(SCIF2_V_TXD_MARK,	PSB5_0,  PSB4_1,  PTN2_FN),
+	PINMUX_DATA(SCIF3_V_CTS_MARK,	PSB3_0,  PSB2_1,  PTN1_FN),
+	PINMUX_DATA(SCIF3_V_RTS_MARK,	PSB1_0,  PSB0_1,  PTN0_FN),
+
+	PINMUX_DATA(RMII_RX_ER_MARK,	PSB3_1, PSB2_0, PTN1_FN),
+	PINMUX_DATA(RMII_CRS_DV_MARK,	PSB1_1, PSB0_0, PTN0_FN),
+
+	/* PTQ FN */
+	PINMUX_DATA(D7_MARK, PTQ7_FN),
+	PINMUX_DATA(D6_MARK, PTQ6_FN),
+	PINMUX_DATA(D5_MARK, PTQ5_FN),
+	PINMUX_DATA(D4_MARK, PTQ4_FN),
+	PINMUX_DATA(D3_MARK, PTQ3_FN),
+	PINMUX_DATA(D2_MARK, PTQ2_FN),
+	PINMUX_DATA(D1_MARK, PTQ1_FN),
+	PINMUX_DATA(D0_MARK, PTQ0_FN),
+
+	/* PTR FN */
+	PINMUX_DATA(CS6B_CE1B_MARK,	                PTR7_FN),
+	PINMUX_DATA(CS6A_CE2B_MARK,	                PTR6_FN),
+	PINMUX_DATA(CS5B_CE1A_MARK,	                PTR5_FN),
+	PINMUX_DATA(CS5A_CE2A_MARK,	                PTR4_FN),
+	PINMUX_DATA(IOIS16_MARK,	PSA5_0,         PTR3_FN),
+	PINMUX_DATA(WAIT_MARK,		                PTR2_FN),
+	PINMUX_DATA(WE3_ICIOWR_MARK,	PSA1_0, PSA0_0, PTR1_FN),
+	PINMUX_DATA(WE2_ICIORD_MARK,	PSD1_0, PSD0_0, PTR0_FN),
+
+	PINMUX_DATA(LCDLCLK_MARK,	PSA5_1,         PTR3_FN),
+
+	PINMUX_DATA(IDEA2_MARK,		PSD1_1, PSD0_0, PTR0_FN),
+
+	PINMUX_DATA(TPUTO3_MARK,	PSA1_0, PSA0_1, PTR1_FN),
+	PINMUX_DATA(TPUTI3_MARK,	PSA1_1, PSA0_0, PTR1_FN),
+	PINMUX_DATA(TPUTO2_MARK,	PSD1_0, PSD0_1, PTR0_FN),
+
+	/* PTS FN */
+	PINMUX_DATA(VIO_CKO_MARK,	PTS6_FN),
+
+	PINMUX_DATA(TPUTI2_MARK,	PSE9_0, PSE8_1, PTS5_FN),
+
+	PINMUX_DATA(IDEIORDY_MARK,	PSE9_1, PSE8_0, PTS5_FN),
+
+	PINMUX_DATA(VIO1_FLD_MARK,	PSE9_0, PSE8_0, PTS5_FN),
+	PINMUX_DATA(VIO1_HD_MARK,	PSA10_0,        PTS4_FN),
+	PINMUX_DATA(VIO1_VD_MARK,	PSA9_0,         PTS3_FN),
+	PINMUX_DATA(VIO1_CLK_MARK,	PSA9_0,         PTS2_FN),
+	PINMUX_DATA(VIO1_D7_MARK,	PSB7_0, PSB6_0, PTS1_FN),
+	PINMUX_DATA(VIO1_D6_MARK,	PSB7_0, PSB6_0, PTS0_FN),
+
+	PINMUX_DATA(SCIF5_SCK_MARK,	PSA10_1, PTS4_FN),
+	PINMUX_DATA(SCIF5_RXD_MARK,	PSA9_1,  PTS3_FN),
+	PINMUX_DATA(SCIF5_TXD_MARK,	PSA9_1,  PTS2_FN),
+
+	PINMUX_DATA(VIO0_D15_MARK,	PSB7_0, PSB6_1, PTS1_FN),
+	PINMUX_DATA(VIO0_D14_MARK,	PSB7_0, PSB6_1, PTS0_FN),
+
+	PINMUX_DATA(IDED7_MARK,		PSB7_1, PSB6_0, PTS1_FN),
+	PINMUX_DATA(IDED6_MARK,		PSB7_1, PSB6_0, PTS0_FN),
+
+	/* PTT FN */
+	PINMUX_DATA(D15_MARK, PTT7_FN),
+	PINMUX_DATA(D14_MARK, PTT6_FN),
+	PINMUX_DATA(D13_MARK, PTT5_FN),
+	PINMUX_DATA(D12_MARK, PTT4_FN),
+	PINMUX_DATA(D11_MARK, PTT3_FN),
+	PINMUX_DATA(D10_MARK, PTT2_FN),
+	PINMUX_DATA(D9_MARK,  PTT1_FN),
+	PINMUX_DATA(D8_MARK,  PTT0_FN),
+
+	/* PTU FN */
+	PINMUX_DATA(DMAC_DACK0_MARK, PTU7_FN),
+	PINMUX_DATA(DMAC_DREQ0_MARK, PTU6_FN),
+
+	PINMUX_DATA(FSIOASD_MARK,	PSE1_0, PTU5_FN),
+	PINMUX_DATA(FSIIABCK_MARK,	PSE1_0, PTU4_FN),
+	PINMUX_DATA(FSIIALRCK_MARK,	PSE1_0, PTU3_FN),
+	PINMUX_DATA(FSIOABCK_MARK,	PSE1_0, PTU2_FN),
+	PINMUX_DATA(FSIOALRCK_MARK,	PSE1_0, PTU1_FN),
+	PINMUX_DATA(CLKAUDIOAO_MARK,	PSE0_0, PTU0_FN),
+
+	/* PTV FN */
+	PINMUX_DATA(FSIIBSD_MARK,	PSD7_0,  PSD6_0,  PTV7_FN),
+	PINMUX_DATA(FSIOBSD_MARK,	PSD7_0,  PSD6_0,  PTV6_FN),
+	PINMUX_DATA(FSIIBBCK_MARK,	PSC15_0, PSC14_0, PTV5_FN),
+	PINMUX_DATA(FSIIBLRCK_MARK,	PSC15_0, PSC14_0, PTV4_FN),
+	PINMUX_DATA(FSIOBBCK_MARK,	PSC15_0, PSC14_0, PTV3_FN),
+	PINMUX_DATA(FSIOBLRCK_MARK,	PSC15_0, PSC14_0, PTV2_FN),
+	PINMUX_DATA(CLKAUDIOBO_MARK,	PSE3_0,  PSE2_0,  PTV1_FN),
+	PINMUX_DATA(FSIIASD_MARK,	PSE10_0,          PTV0_FN),
+
+	PINMUX_DATA(MSIOF1_SS2_MARK,	PSD7_0,  PSD6_1,  PTV7_FN),
+	PINMUX_DATA(MSIOF1_RSYNC_MARK,	PSD7_1,  PSD6_0,  PTV7_FN),
+	PINMUX_DATA(MSIOF1_SS1_MARK,	PSD7_0,  PSD6_1,  PTV6_FN),
+	PINMUX_DATA(MSIOF1_RSCK_MARK,	PSD7_1,  PSD6_0,  PTV6_FN),
+	PINMUX_DATA(MSIOF1_RXD_MARK,	PSC15_0, PSC14_1, PTV5_FN),
+	PINMUX_DATA(MSIOF1_TSYNC_MARK,	PSC15_0, PSC14_1, PTV4_FN),
+	PINMUX_DATA(MSIOF1_TSCK_MARK,	PSC15_0, PSC14_1, PTV3_FN),
+	PINMUX_DATA(MSIOF1_TXD_MARK,	PSC15_0, PSC14_1, PTV2_FN),
+	PINMUX_DATA(MSIOF1_MCK_MARK,	PSE3_0,  PSE2_1,  PTV1_FN),
+
+	/* PTW FN */
+	PINMUX_DATA(MMC_D7_MARK,	PSE13_0, PSE12_0, PTW7_FN),
+	PINMUX_DATA(MMC_D6_MARK,	PSE13_0, PSE12_0, PTW6_FN),
+	PINMUX_DATA(MMC_D5_MARK,	PSE13_0, PSE12_0, PTW5_FN),
+	PINMUX_DATA(MMC_D4_MARK,	PSE13_0, PSE12_0, PTW4_FN),
+	PINMUX_DATA(MMC_D3_MARK,	PSA13_0,          PTW3_FN),
+	PINMUX_DATA(MMC_D2_MARK,	PSA13_0,          PTW2_FN),
+	PINMUX_DATA(MMC_D1_MARK,	PSA13_0,          PTW1_FN),
+	PINMUX_DATA(MMC_D0_MARK,	PSA13_0,          PTW0_FN),
+
+	PINMUX_DATA(SDHI1CD_MARK,	PSE13_0, PSE12_1, PTW7_FN),
+	PINMUX_DATA(SDHI1WP_MARK,	PSE13_0, PSE12_1, PTW6_FN),
+	PINMUX_DATA(SDHI1D3_MARK,	PSE13_0, PSE12_1, PTW5_FN),
+	PINMUX_DATA(SDHI1D2_MARK,	PSE13_0, PSE12_1, PTW4_FN),
+	PINMUX_DATA(SDHI1D1_MARK,	PSA13_1,          PTW3_FN),
+	PINMUX_DATA(SDHI1D0_MARK,	PSA13_1,          PTW2_FN),
+	PINMUX_DATA(SDHI1CMD_MARK,	PSA13_1,          PTW1_FN),
+	PINMUX_DATA(SDHI1CLK_MARK,	PSA13_1,          PTW0_FN),
+
+	PINMUX_DATA(IODACK_MARK,	PSE13_1, PSE12_0, PTW7_FN),
+	PINMUX_DATA(IDERST_MARK,	PSE13_1, PSE12_0, PTW6_FN),
+	PINMUX_DATA(EXBUF_ENB_MARK,	PSE13_1, PSE12_0, PTW5_FN),
+	PINMUX_DATA(DIRECTION_MARK,	PSE13_1, PSE12_0, PTW4_FN),
+
+	/* PTX FN */
+	PINMUX_DATA(DMAC_DACK1_MARK,	PSA12_0, PTX7_FN),
+	PINMUX_DATA(DMAC_DREQ1_MARK,	PSA12_0, PTX6_FN),
+
+	PINMUX_DATA(IRDA_OUT_MARK,	PSA12_1, PTX7_FN),
+	PINMUX_DATA(IRDA_IN_MARK,	PSA12_1, PTX6_FN),
+
+	PINMUX_DATA(TSIF_TS0_SDAT_MARK,	PSC0_0, PTX5_FN),
+	PINMUX_DATA(TSIF_TS0_SCK_MARK,	PSC1_0, PTX4_FN),
+	PINMUX_DATA(TSIF_TS0_SDEN_MARK,	PSC2_0, PTX3_FN),
+	PINMUX_DATA(TSIF_TS0_SPSYNC_MARK,       PTX2_FN),
+
+	PINMUX_DATA(LNKSTA_MARK,	PSC0_1, PTX5_FN),
+	PINMUX_DATA(MDIO_MARK,		PSC1_1, PTX4_FN),
+	PINMUX_DATA(MDC_MARK,		PSC2_1, PTX3_FN),
+
+	PINMUX_DATA(MMC_CLK_MARK, PTX1_FN),
+	PINMUX_DATA(MMC_CMD_MARK, PTX0_FN),
+
+	/* PTY FN */
+	PINMUX_DATA(SDHI0CD_MARK,  PTY7_FN),
+	PINMUX_DATA(SDHI0WP_MARK,  PTY6_FN),
+	PINMUX_DATA(SDHI0D3_MARK,  PTY5_FN),
+	PINMUX_DATA(SDHI0D2_MARK,  PTY4_FN),
+	PINMUX_DATA(SDHI0D1_MARK,  PTY3_FN),
+	PINMUX_DATA(SDHI0D0_MARK,  PTY2_FN),
+	PINMUX_DATA(SDHI0CMD_MARK, PTY1_FN),
+	PINMUX_DATA(SDHI0CLK_MARK, PTY0_FN),
+
+	/* PTZ FN */
+	PINMUX_DATA(INTC_IRQ7_MARK,	PSB10_0, PTZ7_FN),
+	PINMUX_DATA(INTC_IRQ6_MARK,	PSB11_0, PTZ6_FN),
+	PINMUX_DATA(INTC_IRQ5_MARK,	PSB12_0, PTZ5_FN),
+	PINMUX_DATA(INTC_IRQ4_MARK,	PSB13_0, PTZ4_FN),
+	PINMUX_DATA(INTC_IRQ3_MARK,	PSB14_0, PTZ3_FN),
+	PINMUX_DATA(INTC_IRQ2_MARK,	         PTZ2_FN),
+	PINMUX_DATA(INTC_IRQ1_MARK,	         PTZ1_FN),
+	PINMUX_DATA(INTC_IRQ0_MARK,	         PTZ0_FN),
+
+	PINMUX_DATA(SCIF3_I_CTS_MARK,	PSB10_1, PTZ7_FN),
+	PINMUX_DATA(SCIF3_I_RTS_MARK,	PSB11_1, PTZ6_FN),
+	PINMUX_DATA(SCIF3_I_SCK_MARK,	PSB12_1, PTZ5_FN),
+	PINMUX_DATA(SCIF3_I_RXD_MARK,	PSB13_1, PTZ4_FN),
+	PINMUX_DATA(SCIF3_I_TXD_MARK,	PSB14_1, PTZ3_FN),
+};
+
+static struct pinmux_gpio pinmux_gpios[] = {
+	/* PTA */
+	PINMUX_GPIO(GPIO_PTA7, PTA7_DATA),
+	PINMUX_GPIO(GPIO_PTA6, PTA6_DATA),
+	PINMUX_GPIO(GPIO_PTA5, PTA5_DATA),
+	PINMUX_GPIO(GPIO_PTA4, PTA4_DATA),
+	PINMUX_GPIO(GPIO_PTA3, PTA3_DATA),
+	PINMUX_GPIO(GPIO_PTA2, PTA2_DATA),
+	PINMUX_GPIO(GPIO_PTA1, PTA1_DATA),
+	PINMUX_GPIO(GPIO_PTA0, PTA0_DATA),
+
+	/* PTB */
+	PINMUX_GPIO(GPIO_PTB7, PTB7_DATA),
+	PINMUX_GPIO(GPIO_PTB6, PTB6_DATA),
+	PINMUX_GPIO(GPIO_PTB5, PTB5_DATA),
+	PINMUX_GPIO(GPIO_PTB4, PTB4_DATA),
+	PINMUX_GPIO(GPIO_PTB3, PTB3_DATA),
+	PINMUX_GPIO(GPIO_PTB2, PTB2_DATA),
+	PINMUX_GPIO(GPIO_PTB1, PTB1_DATA),
+	PINMUX_GPIO(GPIO_PTB0, PTB0_DATA),
+
+	/* PTC */
+	PINMUX_GPIO(GPIO_PTC7, PTC7_DATA),
+	PINMUX_GPIO(GPIO_PTC6, PTC6_DATA),
+	PINMUX_GPIO(GPIO_PTC5, PTC5_DATA),
+	PINMUX_GPIO(GPIO_PTC4, PTC4_DATA),
+	PINMUX_GPIO(GPIO_PTC3, PTC3_DATA),
+	PINMUX_GPIO(GPIO_PTC2, PTC2_DATA),
+	PINMUX_GPIO(GPIO_PTC1, PTC1_DATA),
+	PINMUX_GPIO(GPIO_PTC0, PTC0_DATA),
+
+	/* PTD */
+	PINMUX_GPIO(GPIO_PTD7, PTD7_DATA),
+	PINMUX_GPIO(GPIO_PTD6, PTD6_DATA),
+	PINMUX_GPIO(GPIO_PTD5, PTD5_DATA),
+	PINMUX_GPIO(GPIO_PTD4, PTD4_DATA),
+	PINMUX_GPIO(GPIO_PTD3, PTD3_DATA),
+	PINMUX_GPIO(GPIO_PTD2, PTD2_DATA),
+	PINMUX_GPIO(GPIO_PTD1, PTD1_DATA),
+	PINMUX_GPIO(GPIO_PTD0, PTD0_DATA),
+
+	/* PTE */
+	PINMUX_GPIO(GPIO_PTE7, PTE7_DATA),
+	PINMUX_GPIO(GPIO_PTE6, PTE6_DATA),
+	PINMUX_GPIO(GPIO_PTE5, PTE5_DATA),
+	PINMUX_GPIO(GPIO_PTE4, PTE4_DATA),
+	PINMUX_GPIO(GPIO_PTE3, PTE3_DATA),
+	PINMUX_GPIO(GPIO_PTE2, PTE2_DATA),
+	PINMUX_GPIO(GPIO_PTE1, PTE1_DATA),
+	PINMUX_GPIO(GPIO_PTE0, PTE0_DATA),
+
+	/* PTF */
+	PINMUX_GPIO(GPIO_PTF7, PTF7_DATA),
+	PINMUX_GPIO(GPIO_PTF6, PTF6_DATA),
+	PINMUX_GPIO(GPIO_PTF5, PTF5_DATA),
+	PINMUX_GPIO(GPIO_PTF4, PTF4_DATA),
+	PINMUX_GPIO(GPIO_PTF3, PTF3_DATA),
+	PINMUX_GPIO(GPIO_PTF2, PTF2_DATA),
+	PINMUX_GPIO(GPIO_PTF1, PTF1_DATA),
+	PINMUX_GPIO(GPIO_PTF0, PTF0_DATA),
+
+	/* PTG */
+	PINMUX_GPIO(GPIO_PTG5, PTG5_DATA),
+	PINMUX_GPIO(GPIO_PTG4, PTG4_DATA),
+	PINMUX_GPIO(GPIO_PTG3, PTG3_DATA),
+	PINMUX_GPIO(GPIO_PTG2, PTG2_DATA),
+	PINMUX_GPIO(GPIO_PTG1, PTG1_DATA),
+	PINMUX_GPIO(GPIO_PTG0, PTG0_DATA),
+
+	/* PTH */
+	PINMUX_GPIO(GPIO_PTH7, PTH7_DATA),
+	PINMUX_GPIO(GPIO_PTH6, PTH6_DATA),
+	PINMUX_GPIO(GPIO_PTH5, PTH5_DATA),
+	PINMUX_GPIO(GPIO_PTH4, PTH4_DATA),
+	PINMUX_GPIO(GPIO_PTH3, PTH3_DATA),
+	PINMUX_GPIO(GPIO_PTH2, PTH2_DATA),
+	PINMUX_GPIO(GPIO_PTH1, PTH1_DATA),
+	PINMUX_GPIO(GPIO_PTH0, PTH0_DATA),
+
+	/* PTJ */
+	PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA),
+	PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA),
+	PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA),
+	PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA),
+	PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA),
+	PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA),
+	PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA),
+
+	/* PTK */
+	PINMUX_GPIO(GPIO_PTK7, PTK7_DATA),
+	PINMUX_GPIO(GPIO_PTK6, PTK6_DATA),
+	PINMUX_GPIO(GPIO_PTK5, PTK5_DATA),
+	PINMUX_GPIO(GPIO_PTK4, PTK4_DATA),
+	PINMUX_GPIO(GPIO_PTK3, PTK3_DATA),
+	PINMUX_GPIO(GPIO_PTK2, PTK2_DATA),
+	PINMUX_GPIO(GPIO_PTK1, PTK1_DATA),
+	PINMUX_GPIO(GPIO_PTK0, PTK0_DATA),
+
+	/* PTL */
+	PINMUX_GPIO(GPIO_PTL7, PTL7_DATA),
+	PINMUX_GPIO(GPIO_PTL6, PTL6_DATA),
+	PINMUX_GPIO(GPIO_PTL5, PTL5_DATA),
+	PINMUX_GPIO(GPIO_PTL4, PTL4_DATA),
+	PINMUX_GPIO(GPIO_PTL3, PTL3_DATA),
+	PINMUX_GPIO(GPIO_PTL2, PTL2_DATA),
+	PINMUX_GPIO(GPIO_PTL1, PTL1_DATA),
+	PINMUX_GPIO(GPIO_PTL0, PTL0_DATA),
+
+	/* PTM */
+	PINMUX_GPIO(GPIO_PTM7, PTM7_DATA),
+	PINMUX_GPIO(GPIO_PTM6, PTM6_DATA),
+	PINMUX_GPIO(GPIO_PTM5, PTM5_DATA),
+	PINMUX_GPIO(GPIO_PTM4, PTM4_DATA),
+	PINMUX_GPIO(GPIO_PTM3, PTM3_DATA),
+	PINMUX_GPIO(GPIO_PTM2, PTM2_DATA),
+	PINMUX_GPIO(GPIO_PTM1, PTM1_DATA),
+	PINMUX_GPIO(GPIO_PTM0, PTM0_DATA),
+
+	/* PTN */
+	PINMUX_GPIO(GPIO_PTN7, PTN7_DATA),
+	PINMUX_GPIO(GPIO_PTN6, PTN6_DATA),
+	PINMUX_GPIO(GPIO_PTN5, PTN5_DATA),
+	PINMUX_GPIO(GPIO_PTN4, PTN4_DATA),
+	PINMUX_GPIO(GPIO_PTN3, PTN3_DATA),
+	PINMUX_GPIO(GPIO_PTN2, PTN2_DATA),
+	PINMUX_GPIO(GPIO_PTN1, PTN1_DATA),
+	PINMUX_GPIO(GPIO_PTN0, PTN0_DATA),
+
+	/* PTQ */
+	PINMUX_GPIO(GPIO_PTQ7, PTQ7_DATA),
+	PINMUX_GPIO(GPIO_PTQ6, PTQ6_DATA),
+	PINMUX_GPIO(GPIO_PTQ5, PTQ5_DATA),
+	PINMUX_GPIO(GPIO_PTQ4, PTQ4_DATA),
+	PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA),
+	PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA),
+	PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA),
+	PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA),
+
+	/* PTR */
+	PINMUX_GPIO(GPIO_PTR7, PTR7_DATA),
+	PINMUX_GPIO(GPIO_PTR6, PTR6_DATA),
+	PINMUX_GPIO(GPIO_PTR5, PTR5_DATA),
+	PINMUX_GPIO(GPIO_PTR4, PTR4_DATA),
+	PINMUX_GPIO(GPIO_PTR3, PTR3_DATA),
+	PINMUX_GPIO(GPIO_PTR2, PTR2_DATA),
+	PINMUX_GPIO(GPIO_PTR1, PTR1_DATA),
+	PINMUX_GPIO(GPIO_PTR0, PTR0_DATA),
+
+	/* PTS */
+	PINMUX_GPIO(GPIO_PTS6, PTS6_DATA),
+	PINMUX_GPIO(GPIO_PTS5, PTS5_DATA),
+	PINMUX_GPIO(GPIO_PTS4, PTS4_DATA),
+	PINMUX_GPIO(GPIO_PTS3, PTS3_DATA),
+	PINMUX_GPIO(GPIO_PTS2, PTS2_DATA),
+	PINMUX_GPIO(GPIO_PTS1, PTS1_DATA),
+	PINMUX_GPIO(GPIO_PTS0, PTS0_DATA),
+
+	/* PTT */
+	PINMUX_GPIO(GPIO_PTT7, PTT7_DATA),
+	PINMUX_GPIO(GPIO_PTT6, PTT6_DATA),
+	PINMUX_GPIO(GPIO_PTT5, PTT5_DATA),
+	PINMUX_GPIO(GPIO_PTT4, PTT4_DATA),
+	PINMUX_GPIO(GPIO_PTT3, PTT3_DATA),
+	PINMUX_GPIO(GPIO_PTT2, PTT2_DATA),
+	PINMUX_GPIO(GPIO_PTT1, PTT1_DATA),
+	PINMUX_GPIO(GPIO_PTT0, PTT0_DATA),
+
+	/* PTU */
+	PINMUX_GPIO(GPIO_PTU7, PTU7_DATA),
+	PINMUX_GPIO(GPIO_PTU6, PTU6_DATA),
+	PINMUX_GPIO(GPIO_PTU5, PTU5_DATA),
+	PINMUX_GPIO(GPIO_PTU4, PTU4_DATA),
+	PINMUX_GPIO(GPIO_PTU3, PTU3_DATA),
+	PINMUX_GPIO(GPIO_PTU2, PTU2_DATA),
+	PINMUX_GPIO(GPIO_PTU1, PTU1_DATA),
+	PINMUX_GPIO(GPIO_PTU0, PTU0_DATA),
+
+	/* PTV */
+	PINMUX_GPIO(GPIO_PTV7, PTV7_DATA),
+	PINMUX_GPIO(GPIO_PTV6, PTV6_DATA),
+	PINMUX_GPIO(GPIO_PTV5, PTV5_DATA),
+	PINMUX_GPIO(GPIO_PTV4, PTV4_DATA),
+	PINMUX_GPIO(GPIO_PTV3, PTV3_DATA),
+	PINMUX_GPIO(GPIO_PTV2, PTV2_DATA),
+	PINMUX_GPIO(GPIO_PTV1, PTV1_DATA),
+	PINMUX_GPIO(GPIO_PTV0, PTV0_DATA),
+
+	/* PTW */
+	PINMUX_GPIO(GPIO_PTW7, PTW7_DATA),
+	PINMUX_GPIO(GPIO_PTW6, PTW6_DATA),
+	PINMUX_GPIO(GPIO_PTW5, PTW5_DATA),
+	PINMUX_GPIO(GPIO_PTW4, PTW4_DATA),
+	PINMUX_GPIO(GPIO_PTW3, PTW3_DATA),
+	PINMUX_GPIO(GPIO_PTW2, PTW2_DATA),
+	PINMUX_GPIO(GPIO_PTW1, PTW1_DATA),
+	PINMUX_GPIO(GPIO_PTW0, PTW0_DATA),
+
+	/* PTX */
+	PINMUX_GPIO(GPIO_PTX7, PTX7_DATA),
+	PINMUX_GPIO(GPIO_PTX6, PTX6_DATA),
+	PINMUX_GPIO(GPIO_PTX5, PTX5_DATA),
+	PINMUX_GPIO(GPIO_PTX4, PTX4_DATA),
+	PINMUX_GPIO(GPIO_PTX3, PTX3_DATA),
+	PINMUX_GPIO(GPIO_PTX2, PTX2_DATA),
+	PINMUX_GPIO(GPIO_PTX1, PTX1_DATA),
+	PINMUX_GPIO(GPIO_PTX0, PTX0_DATA),
+
+	/* PTY */
+	PINMUX_GPIO(GPIO_PTY7, PTY7_DATA),
+	PINMUX_GPIO(GPIO_PTY6, PTY6_DATA),
+	PINMUX_GPIO(GPIO_PTY5, PTY5_DATA),
+	PINMUX_GPIO(GPIO_PTY4, PTY4_DATA),
+	PINMUX_GPIO(GPIO_PTY3, PTY3_DATA),
+	PINMUX_GPIO(GPIO_PTY2, PTY2_DATA),
+	PINMUX_GPIO(GPIO_PTY1, PTY1_DATA),
+	PINMUX_GPIO(GPIO_PTY0, PTY0_DATA),
+
+	/* PTZ */
+	PINMUX_GPIO(GPIO_PTZ7, PTZ7_DATA),
+	PINMUX_GPIO(GPIO_PTZ6, PTZ6_DATA),
+	PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA),
+	PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA),
+	PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA),
+	PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA),
+	PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA),
+	PINMUX_GPIO(GPIO_PTZ0, PTZ0_DATA),
+
+	/* BSC */
+	PINMUX_GPIO(GPIO_FN_D31,	D31_MARK),
+	PINMUX_GPIO(GPIO_FN_D30,	D30_MARK),
+	PINMUX_GPIO(GPIO_FN_D29,	D29_MARK),
+	PINMUX_GPIO(GPIO_FN_D28,	D28_MARK),
+	PINMUX_GPIO(GPIO_FN_D27,	D27_MARK),
+	PINMUX_GPIO(GPIO_FN_D26,	D26_MARK),
+	PINMUX_GPIO(GPIO_FN_D25,	D25_MARK),
+	PINMUX_GPIO(GPIO_FN_D24,	D24_MARK),
+	PINMUX_GPIO(GPIO_FN_D23,	D23_MARK),
+	PINMUX_GPIO(GPIO_FN_D22,	D22_MARK),
+	PINMUX_GPIO(GPIO_FN_D21,	D21_MARK),
+	PINMUX_GPIO(GPIO_FN_D20,	D20_MARK),
+	PINMUX_GPIO(GPIO_FN_D19,	D19_MARK),
+	PINMUX_GPIO(GPIO_FN_D18,	D18_MARK),
+	PINMUX_GPIO(GPIO_FN_D17,	D17_MARK),
+	PINMUX_GPIO(GPIO_FN_D16,	D16_MARK),
+	PINMUX_GPIO(GPIO_FN_D15,	D15_MARK),
+	PINMUX_GPIO(GPIO_FN_D14,	D14_MARK),
+	PINMUX_GPIO(GPIO_FN_D13,	D13_MARK),
+	PINMUX_GPIO(GPIO_FN_D12,	D12_MARK),
+	PINMUX_GPIO(GPIO_FN_D11,	D11_MARK),
+	PINMUX_GPIO(GPIO_FN_D10,	D10_MARK),
+	PINMUX_GPIO(GPIO_FN_D9,		D9_MARK),
+	PINMUX_GPIO(GPIO_FN_D8,		D8_MARK),
+	PINMUX_GPIO(GPIO_FN_D7,		D7_MARK),
+	PINMUX_GPIO(GPIO_FN_D6,		D6_MARK),
+	PINMUX_GPIO(GPIO_FN_D5,		D5_MARK),
+	PINMUX_GPIO(GPIO_FN_D4,		D4_MARK),
+	PINMUX_GPIO(GPIO_FN_D3,		D3_MARK),
+	PINMUX_GPIO(GPIO_FN_D2,		D2_MARK),
+	PINMUX_GPIO(GPIO_FN_D1,		D1_MARK),
+	PINMUX_GPIO(GPIO_FN_D0,		D0_MARK),
+	PINMUX_GPIO(GPIO_FN_A25,	A25_MARK),
+	PINMUX_GPIO(GPIO_FN_A24,	A24_MARK),
+	PINMUX_GPIO(GPIO_FN_A23,	A23_MARK),
+	PINMUX_GPIO(GPIO_FN_A22,	A22_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6B_CE1B,	CS6B_CE1B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS6A_CE2B,	CS6A_CE2B_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5B_CE1A,	CS5B_CE1A_MARK),
+	PINMUX_GPIO(GPIO_FN_CS5A_CE2A,	CS5A_CE2A_MARK),
+	PINMUX_GPIO(GPIO_FN_WE3_ICIOWR,	WE3_ICIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_WE2_ICIORD,	WE2_ICIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_IOIS16,	IOIS16_MARK),
+	PINMUX_GPIO(GPIO_FN_WAIT,	WAIT_MARK),
+	PINMUX_GPIO(GPIO_FN_BS,		BS_MARK),
+
+	/* KEYSC */
+	PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5,	KEYOUT5_IN5_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6,	KEYOUT4_IN6_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN4,		KEYIN4_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN3,		KEYIN3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN2,		KEYIN2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN1,		KEYIN1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYIN0,		KEYIN0_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT3,		KEYOUT3_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT2,		KEYOUT2_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT1,		KEYOUT1_MARK),
+	PINMUX_GPIO(GPIO_FN_KEYOUT0,		KEYOUT0_MARK),
+
+	/* ATAPI */
+	PINMUX_GPIO(GPIO_FN_IDED15,	IDED15_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED14,	IDED14_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED13,	IDED13_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED12,	IDED12_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED11,	IDED11_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED10,	IDED10_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED9,	IDED9_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED8,	IDED8_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED7,	IDED7_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED6,	IDED6_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED5,	IDED5_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED4,	IDED4_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED3,	IDED3_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED2,	IDED2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED1,	IDED1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDED0,	IDED0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA2,	IDEA2_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA1,	IDEA1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEA0,	IDEA0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIOWR,	IDEIOWR_MARK),
+	PINMUX_GPIO(GPIO_FN_IODREQ,	IODREQ_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS0,	IDECS0_MARK),
+	PINMUX_GPIO(GPIO_FN_IDECS1,	IDECS1_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORD,	IDEIORD_MARK),
+	PINMUX_GPIO(GPIO_FN_DIRECTION,	DIRECTION_MARK),
+	PINMUX_GPIO(GPIO_FN_EXBUF_ENB,	EXBUF_ENB_MARK),
+	PINMUX_GPIO(GPIO_FN_IDERST,	IDERST_MARK),
+	PINMUX_GPIO(GPIO_FN_IODACK,	IODACK_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEINT,	IDEINT_MARK),
+	PINMUX_GPIO(GPIO_FN_IDEIORDY,	IDEIORDY_MARK),
+
+	/* TPU */
+	PINMUX_GPIO(GPIO_FN_TPUTO3,	TPUTO3_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO2,	TPUTO2_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO1,	TPUTO1_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTO0,	TPUTO0_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTI3,	TPUTI3_MARK),
+	PINMUX_GPIO(GPIO_FN_TPUTI2,	TPUTI2_MARK),
+
+	/* LCDC */
+	PINMUX_GPIO(GPIO_FN_LCDD23,	LCDD23_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD22,	LCDD22_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD21,	LCDD21_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD20,	LCDD20_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD19,	LCDD19_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD18,	LCDD18_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD17,	LCDD17_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD16,	LCDD16_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD15,	LCDD15_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD14,	LCDD14_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD13,	LCDD13_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD12,	LCDD12_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD11,	LCDD11_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD10,	LCDD10_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD9,	LCDD9_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD8,	LCDD8_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD7,	LCDD7_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD6,	LCDD6_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD5,	LCDD5_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD4,	LCDD4_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD3,	LCDD3_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD2,	LCDD2_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD1,	LCDD1_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDD0,	LCDD0_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVSYN,	LCDVSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDISP,	LCDDISP_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRS,	LCDRS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDHSYN,	LCDHSYN_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDCS,	LCDCS_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDON,	LCDDON_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDDCK,	LCDDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDWR,	LCDWR_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVEPWC,	LCDVEPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDVCPWC,	LCDVCPWC_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDRD,	LCDRD_MARK),
+	PINMUX_GPIO(GPIO_FN_LCDLCLK,	LCDLCLK_MARK),
+
+	/* SCIF0 */
+	PINMUX_GPIO(GPIO_FN_SCIF0_TXD,	SCIF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_RXD,	SCIF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF0_SCK,	SCIF0_SCK_MARK),
+
+	/* SCIF1 */
+	PINMUX_GPIO(GPIO_FN_SCIF1_SCK,	SCIF1_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_RXD,	SCIF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF1_TXD,	SCIF1_TXD_MARK),
+
+	/* SCIF2 */
+	PINMUX_GPIO(GPIO_FN_SCIF2_L_TXD,	SCIF2_L_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_L_SCK,	SCIF2_L_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_L_RXD,	SCIF2_L_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_V_TXD,	SCIF2_V_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_V_SCK,	SCIF2_V_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF2_V_RXD,	SCIF2_V_RXD_MARK),
+
+	/* SCIF3 */
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_SCK,	SCIF3_V_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_RXD,	SCIF3_V_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_TXD,	SCIF3_V_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_CTS,	SCIF3_V_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_V_RTS,	SCIF3_V_RTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_SCK,	SCIF3_I_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_RXD,	SCIF3_I_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_TXD,	SCIF3_I_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_CTS,	SCIF3_I_CTS_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF3_I_RTS,	SCIF3_I_RTS_MARK),
+
+	/* SCIF4 */
+	PINMUX_GPIO(GPIO_FN_SCIF4_SCK,	SCIF4_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_RXD,	SCIF4_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF4_TXD,	SCIF4_TXD_MARK),
+
+	/* SCIF5 */
+	PINMUX_GPIO(GPIO_FN_SCIF5_SCK,	SCIF5_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_RXD,	SCIF5_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_SCIF5_TXD,	SCIF5_TXD_MARK),
+
+	/* FSI */
+	PINMUX_GPIO(GPIO_FN_FSIMCKB,	FSIMCKB_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIMCKA,	FSIMCKA_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOASD,	FSIOASD_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIABCK,	FSIIABCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIALRCK,	FSIIALRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOABCK,	FSIOABCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOALRCK,	FSIOALRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_CLKAUDIOAO,	CLKAUDIOAO_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIBSD,	FSIIBSD_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOBSD,	FSIOBSD_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIBBCK,	FSIIBBCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIBLRCK,	FSIIBLRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOBBCK,	FSIOBBCK_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIOBLRCK,	FSIOBLRCK_MARK),
+	PINMUX_GPIO(GPIO_FN_CLKAUDIOBO,	CLKAUDIOBO_MARK),
+	PINMUX_GPIO(GPIO_FN_FSIIASD,	FSIIASD_MARK),
+
+	/* AUD */
+	PINMUX_GPIO(GPIO_FN_AUDCK,	AUDCK_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDSYNC,	AUDSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA3,	AUDATA3_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA2,	AUDATA2_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA1,	AUDATA1_MARK),
+	PINMUX_GPIO(GPIO_FN_AUDATA0,	AUDATA0_MARK),
+
+	/* VIO */
+	PINMUX_GPIO(GPIO_FN_VIO_CKO,	VIO_CKO_MARK),
+
+	/* VIO0 */
+	PINMUX_GPIO(GPIO_FN_VIO0_D15,	VIO0_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D14,	VIO0_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D13,	VIO0_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D12,	VIO0_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D11,	VIO0_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D10,	VIO0_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D9,	VIO0_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D8,	VIO0_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D7,	VIO0_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D6,	VIO0_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D5,	VIO0_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D4,	VIO0_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D3,	VIO0_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D2,	VIO0_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D1,	VIO0_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_D0,	VIO0_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_VD,	VIO0_VD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_CLK,	VIO0_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_FLD,	VIO0_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO0_HD,	VIO0_HD_MARK),
+
+	/* VIO1 */
+	PINMUX_GPIO(GPIO_FN_VIO1_D7,	VIO1_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D6,	VIO1_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D5,	VIO1_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D4,	VIO1_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D3,	VIO1_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D2,	VIO1_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D1,	VIO1_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_D0,	VIO1_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_FLD,	VIO1_FLD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_HD,	VIO1_HD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_VD,	VIO1_VD_MARK),
+	PINMUX_GPIO(GPIO_FN_VIO1_CLK,	VIO1_CLK_MARK),
+
+	/* Eth */
+	PINMUX_GPIO(GPIO_FN_RMII_RXD0,		RMII_RXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_RXD1,		RMII_RXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_TXD0,		RMII_TXD0_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_TXD1,		RMII_TXD1_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_REF_CLK,	RMII_REF_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_TX_EN,		RMII_TX_EN_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_RX_ER,		RMII_RX_ER_MARK),
+	PINMUX_GPIO(GPIO_FN_RMII_CRS_DV,	RMII_CRS_DV_MARK),
+	PINMUX_GPIO(GPIO_FN_LNKSTA,		LNKSTA_MARK),
+	PINMUX_GPIO(GPIO_FN_MDIO,		MDIO_MARK),
+	PINMUX_GPIO(GPIO_FN_MDC,		MDC_MARK),
+
+	/* System */
+	PINMUX_GPIO(GPIO_FN_PDSTATUS,	PDSTATUS_MARK),
+	PINMUX_GPIO(GPIO_FN_STATUS2,	STATUS2_MARK),
+	PINMUX_GPIO(GPIO_FN_STATUS0,	STATUS0_MARK),
+
+	/* VOU */
+	PINMUX_GPIO(GPIO_FN_DV_D15,	DV_D15_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D14,	DV_D14_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D13,	DV_D13_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D12,	DV_D12_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D11,	DV_D11_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D10,	DV_D10_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D9,	DV_D9_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D8,	DV_D8_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D7,	DV_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D6,	DV_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D5,	DV_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D4,	DV_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D3,	DV_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D2,	DV_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D1,	DV_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_D0,	DV_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLKI,	DV_CLKI_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_CLK,	DV_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_VSYNC,	DV_VSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_DV_HSYNC,	DV_HSYNC_MARK),
+
+	/* MSIOF0 */
+	PINMUX_GPIO(GPIO_FN_MSIOF0_RXD,		MSIOF0_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_TXD,		MSIOF0_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_MCK,		MSIOF0_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_TSCK,	MSIOF0_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_SS1,		MSIOF0_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_SS2,		MSIOF0_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_TSYNC,	MSIOF0_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_RSCK,	MSIOF0_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF0_RSYNC,	MSIOF0_RSYNC_MARK),
+
+	/* MSIOF1 */
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RXD,		MSIOF1_RXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TXD,		MSIOF1_TXD_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_MCK,		MSIOF1_MCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSCK,	MSIOF1_TSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS1,		MSIOF1_SS1_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_SS2,		MSIOF1_SS2_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_TSYNC,	MSIOF1_TSYNC_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSCK,	MSIOF1_RSCK_MARK),
+	PINMUX_GPIO(GPIO_FN_MSIOF1_RSYNC,	MSIOF1_RSYNC_MARK),
+
+	/* DMAC */
+	PINMUX_GPIO(GPIO_FN_DMAC_DACK0,	DMAC_DACK0_MARK),
+	PINMUX_GPIO(GPIO_FN_DMAC_DREQ0,	DMAC_DREQ0_MARK),
+	PINMUX_GPIO(GPIO_FN_DMAC_DACK1,	DMAC_DACK1_MARK),
+	PINMUX_GPIO(GPIO_FN_DMAC_DREQ1,	DMAC_DREQ1_MARK),
+
+	/* SDHI0 */
+	PINMUX_GPIO(GPIO_FN_SDHI0CD,	SDHI0CD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0WP,	SDHI0WP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CMD,	SDHI0CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0CLK,	SDHI0CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D3,	SDHI0D3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D2,	SDHI0D2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D1,	SDHI0D1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI0D0,	SDHI0D0_MARK),
+
+	/* SDHI1 */
+	PINMUX_GPIO(GPIO_FN_SDHI1CD,	SDHI1CD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1WP,	SDHI1WP_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CMD,	SDHI1CMD_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1CLK,	SDHI1CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D3,	SDHI1D3_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D2,	SDHI1D2_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D1,	SDHI1D1_MARK),
+	PINMUX_GPIO(GPIO_FN_SDHI1D0,	SDHI1D0_MARK),
+
+	/* MMC */
+	PINMUX_GPIO(GPIO_FN_MMC_D7,	MMC_D7_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D6,	MMC_D6_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D5,	MMC_D5_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D4,	MMC_D4_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D3,	MMC_D3_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D2,	MMC_D2_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D1,	MMC_D1_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_D0,	MMC_D0_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CLK,	MMC_CLK_MARK),
+	PINMUX_GPIO(GPIO_FN_MMC_CMD,	MMC_CMD_MARK),
+
+	/* IrDA */
+	PINMUX_GPIO(GPIO_FN_IRDA_OUT,	IRDA_OUT_MARK),
+	PINMUX_GPIO(GPIO_FN_IRDA_IN,	IRDA_IN_MARK),
+
+	/* TSIF */
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SDAT,	TSIF_TS0_SDAT_MARK),
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SCK,	TSIF_TS0_SCK_MARK),
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SDEN,	TSIF_TS0_SDEN_MARK),
+	PINMUX_GPIO(GPIO_FN_TSIF_TS0_SPSYNC,	TSIF_TS0_SPSYNC_MARK),
+
+	/* IRQ */
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ7,	INTC_IRQ7_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ6,	INTC_IRQ6_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ5,	INTC_IRQ5_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ4,	INTC_IRQ4_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ3,	INTC_IRQ3_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ2,	INTC_IRQ2_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ1,	INTC_IRQ1_MARK),
+	PINMUX_GPIO(GPIO_FN_INTC_IRQ0,	INTC_IRQ0_MARK),
+ };
+
+static struct pinmux_cfg_reg pinmux_config_regs[] = {
+	{ PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) {
+		PTA7_FN, PTA7_OUT, PTA7_IN_PU, PTA7_IN,
+		PTA6_FN, PTA6_OUT, PTA6_IN_PU, PTA6_IN,
+		PTA5_FN, PTA5_OUT, PTA5_IN_PU, PTA5_IN,
+		PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN,
+		PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN,
+		PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN,
+		PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN,
+		PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN }
+	},
+	{ PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) {
+		PTB7_FN, PTB7_OUT, PTB7_IN_PU, PTB7_IN,
+		PTB6_FN, PTB6_OUT, PTB6_IN_PU, PTB6_IN,
+		PTB5_FN, PTB5_OUT, PTB5_IN_PU, PTB5_IN,
+		PTB4_FN, PTB4_OUT, PTB4_IN_PU, PTB4_IN,
+		PTB3_FN, PTB3_OUT, PTB3_IN_PU, PTB3_IN,
+		PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN,
+		PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN,
+		PTB0_FN, PTB0_OUT, PTB0_IN_PU, PTB0_IN }
+	},
+	{ PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) {
+		PTC7_FN, PTC7_OUT, PTC7_IN_PU, PTC7_IN,
+		PTC6_FN, PTC6_OUT, PTC6_IN_PU, PTC6_IN,
+		PTC5_FN, PTC5_OUT, PTC5_IN_PU, PTC5_IN,
+		PTC4_FN, PTC4_OUT, PTC4_IN_PU, PTC4_IN,
+		PTC3_FN, PTC3_OUT, PTC3_IN_PU, PTC3_IN,
+		PTC2_FN, PTC2_OUT, PTC2_IN_PU, PTC2_IN,
+		PTC1_FN, PTC1_OUT, PTC1_IN_PU, PTC1_IN,
+		PTC0_FN, PTC0_OUT, PTC0_IN_PU, PTC0_IN }
+	},
+	{ PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) {
+		PTD7_FN, PTD7_OUT, PTD7_IN_PU, PTD7_IN,
+		PTD6_FN, PTD6_OUT, PTD6_IN_PU, PTD6_IN,
+		PTD5_FN, PTD5_OUT, PTD5_IN_PU, PTD5_IN,
+		PTD4_FN, PTD4_OUT, PTD4_IN_PU, PTD4_IN,
+		PTD3_FN, PTD3_OUT, PTD3_IN_PU, PTD3_IN,
+		PTD2_FN, PTD2_OUT, PTD2_IN_PU, PTD2_IN,
+		PTD1_FN, PTD1_OUT, PTD1_IN_PU, PTD1_IN,
+		PTD0_FN, PTD0_OUT, PTD0_IN_PU, PTD0_IN }
+	},
+	{ PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) {
+		PTE7_FN, PTE7_OUT, PTE7_IN_PU, PTE7_IN,
+		PTE6_FN, PTE6_OUT, PTE6_IN_PU, PTE6_IN,
+		PTE5_FN, PTE5_OUT, PTE5_IN_PU, PTE5_IN,
+		PTE4_FN, PTE4_OUT, PTE4_IN_PU, PTE4_IN,
+		PTE3_FN, PTE3_OUT, PTE3_IN_PU, PTE3_IN,
+		PTE2_FN, PTE2_OUT, PTE2_IN_PU, PTE2_IN,
+		PTE1_FN, PTE1_OUT, PTE1_IN_PU, PTE1_IN,
+		PTE0_FN, PTE0_OUT, PTE0_IN_PU, PTE0_IN }
+	},
+	{ PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) {
+		PTF7_FN, PTF7_OUT, PTF7_IN_PU, PTF7_IN,
+		PTF6_FN, PTF6_OUT, PTF6_IN_PU, PTF6_IN,
+		PTF5_FN, PTF5_OUT, PTF5_IN_PU, PTF5_IN,
+		PTF4_FN, PTF4_OUT, PTF4_IN_PU, PTF4_IN,
+		PTF3_FN, PTF3_OUT, PTF3_IN_PU, PTF3_IN,
+		PTF2_FN, PTF2_OUT, PTF2_IN_PU, PTF2_IN,
+		PTF1_FN, PTF1_OUT, PTF1_IN_PU, PTF1_IN,
+		PTF0_FN, PTF0_OUT, PTF0_IN_PU, PTF0_IN }
+	},
+	{ PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		PTG5_FN, PTG5_OUT, 0, 0,
+		PTG4_FN, PTG4_OUT, 0, 0,
+		PTG3_FN, PTG3_OUT, 0, 0,
+		PTG2_FN, PTG2_OUT, 0, 0,
+		PTG1_FN, PTG1_OUT, 0, 0,
+		PTG0_FN, PTG0_OUT, 0, 0 }
+	},
+	{ PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) {
+		PTH7_FN, PTH7_OUT, PTH7_IN_PU, PTH7_IN,
+		PTH6_FN, PTH6_OUT, PTH6_IN_PU, PTH6_IN,
+		PTH5_FN, PTH5_OUT, PTH5_IN_PU, PTH5_IN,
+		PTH4_FN, PTH4_OUT, PTH4_IN_PU, PTH4_IN,
+		PTH3_FN, PTH3_OUT, PTH3_IN_PU, PTH3_IN,
+		PTH2_FN, PTH2_OUT, PTH2_IN_PU, PTH2_IN,
+		PTH1_FN, PTH1_OUT, PTH1_IN_PU, PTH1_IN,
+		PTH0_FN, PTH0_OUT, PTH0_IN_PU, PTH0_IN }
+	},
+	{ PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) {
+		PTJ7_FN, PTJ7_OUT, 0, 0,
+		PTJ6_FN, PTJ6_OUT, 0, 0,
+		PTJ5_FN, PTJ5_OUT, 0, 0,
+		0, 0, 0, 0,
+		PTJ3_FN, PTJ3_OUT, PTJ3_IN_PU, PTJ3_IN,
+		PTJ2_FN, PTJ2_OUT, PTJ2_IN_PU, PTJ2_IN,
+		PTJ1_FN, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN,
+		PTJ0_FN, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN }
+	},
+	{ PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) {
+		PTK7_FN, PTK7_OUT, PTK7_IN_PU, PTK7_IN,
+		PTK6_FN, PTK6_OUT, PTK6_IN_PU, PTK6_IN,
+		PTK5_FN, PTK5_OUT, PTK5_IN_PU, PTK5_IN,
+		PTK4_FN, PTK4_OUT, PTK4_IN_PU, PTK4_IN,
+		PTK3_FN, PTK3_OUT, PTK3_IN_PU, PTK3_IN,
+		PTK2_FN, PTK2_OUT, PTK2_IN_PU, PTK2_IN,
+		PTK1_FN, PTK1_OUT, PTK1_IN_PU, PTK1_IN,
+		PTK0_FN, PTK0_OUT, PTK0_IN_PU, PTK0_IN }
+	},
+	{ PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) {
+		PTL7_FN, PTL7_OUT, PTL7_IN_PU, PTL7_IN,
+		PTL6_FN, PTL6_OUT, PTL6_IN_PU, PTL6_IN,
+		PTL5_FN, PTL5_OUT, PTL5_IN_PU, PTL5_IN,
+		PTL4_FN, PTL4_OUT, PTL4_IN_PU, PTL4_IN,
+		PTL3_FN, PTL3_OUT, PTL3_IN_PU, PTL3_IN,
+		PTL2_FN, PTL2_OUT, PTL2_IN_PU, PTL2_IN,
+		PTL1_FN, PTL1_OUT, PTL1_IN_PU, PTL1_IN,
+		PTL0_FN, PTL0_OUT, PTL0_IN_PU, PTL0_IN }
+	},
+	{ PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) {
+		PTM7_FN, PTM7_OUT, PTM7_IN_PU, PTM7_IN,
+		PTM6_FN, PTM6_OUT, PTM6_IN_PU, PTM6_IN,
+		PTM5_FN, PTM5_OUT, PTM5_IN_PU, PTM5_IN,
+		PTM4_FN, PTM4_OUT, PTM4_IN_PU, PTM4_IN,
+		PTM3_FN, PTM3_OUT, PTM3_IN_PU, PTM3_IN,
+		PTM2_FN, PTM2_OUT, PTM2_IN_PU, PTM2_IN,
+		PTM1_FN, PTM1_OUT, PTM1_IN_PU, PTM1_IN,
+		PTM0_FN, PTM0_OUT, PTM0_IN_PU, PTM0_IN }
+	},
+	{ PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) {
+		PTN7_FN, PTN7_OUT, PTN7_IN_PU, PTN7_IN,
+		PTN6_FN, PTN6_OUT, PTN6_IN_PU, PTN6_IN,
+		PTN5_FN, PTN5_OUT, PTN5_IN_PU, PTN5_IN,
+		PTN4_FN, PTN4_OUT, PTN4_IN_PU, PTN4_IN,
+		PTN3_FN, PTN3_OUT, PTN3_IN_PU, PTN3_IN,
+		PTN2_FN, PTN2_OUT, PTN2_IN_PU, PTN2_IN,
+		PTN1_FN, PTN1_OUT, PTN1_IN_PU, PTN1_IN,
+		PTN0_FN, PTN0_OUT, PTN0_IN_PU, PTN0_IN }
+	},
+	{ PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) {
+		PTQ7_FN, PTQ7_OUT, PTQ7_IN_PU, PTQ7_IN,
+		PTQ6_FN, PTQ6_OUT, PTQ6_IN_PU, PTQ6_IN,
+		PTQ5_FN, PTQ5_OUT, PTQ5_IN_PU, PTQ5_IN,
+		PTQ4_FN, PTQ4_OUT, PTQ4_IN_PU, PTQ4_IN,
+		PTQ3_FN, PTQ3_OUT, PTQ3_IN_PU, PTQ3_IN,
+		PTQ2_FN, PTQ2_OUT, PTQ2_IN_PU, PTQ2_IN,
+		PTQ1_FN, PTQ1_OUT, PTQ1_IN_PU, PTQ1_IN,
+		PTQ0_FN, PTQ0_OUT, PTQ0_IN_PU, PTQ0_IN }
+	},
+	{ PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) {
+		PTR7_FN, PTR7_OUT, PTR7_IN_PU, PTR7_IN,
+		PTR6_FN, PTR6_OUT, PTR6_IN_PU, PTR6_IN,
+		PTR5_FN, PTR5_OUT, PTR5_IN_PU, PTR5_IN,
+		PTR4_FN, PTR4_OUT, PTR4_IN_PU, PTR4_IN,
+		PTR3_FN, 0,        PTR3_IN_PU, PTR3_IN,
+		PTR2_FN, 0,        PTR2_IN_PU, PTR2_IN,
+		PTR1_FN, PTR1_OUT, PTR1_IN_PU, PTR1_IN,
+		PTR0_FN, PTR0_OUT, PTR0_IN_PU, PTR0_IN }
+	},
+	{ PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) {
+		0, 0, 0, 0,
+		PTS6_FN, PTS6_OUT, PTS6_IN_PU, PTS6_IN,
+		PTS5_FN, PTS5_OUT, PTS5_IN_PU, PTS5_IN,
+		PTS4_FN, PTS4_OUT, PTS4_IN_PU, PTS4_IN,
+		PTS3_FN, PTS3_OUT, PTS3_IN_PU, PTS3_IN,
+		PTS2_FN, PTS2_OUT, PTS2_IN_PU, PTS2_IN,
+		PTS1_FN, PTS1_OUT, PTS1_IN_PU, PTS1_IN,
+		PTS0_FN, PTS0_OUT, PTS0_IN_PU, PTS0_IN }
+	},
+	{ PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) {
+		PTT7_FN, PTT7_OUT, PTT7_IN_PU, PTT7_IN,
+		PTT6_FN, PTT6_OUT, PTT6_IN_PU, PTT6_IN,
+		PTT5_FN, PTT5_OUT, PTT5_IN_PU, PTT5_IN,
+		PTT4_FN, PTT4_OUT, PTT4_IN_PU, PTT4_IN,
+		PTT3_FN, PTT3_OUT, PTT3_IN_PU, PTT3_IN,
+		PTT2_FN, PTT2_OUT, PTT2_IN_PU, PTT2_IN,
+		PTT1_FN, PTT1_OUT, PTT1_IN_PU, PTT1_IN,
+		PTT0_FN, PTT0_OUT, PTT0_IN_PU, PTT0_IN }
+	},
+	{ PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) {
+		PTU7_FN, PTU7_OUT, PTU7_IN_PU, PTU7_IN,
+		PTU6_FN, PTU6_OUT, PTU6_IN_PU, PTU6_IN,
+		PTU5_FN, PTU5_OUT, PTU5_IN_PU, PTU5_IN,
+		PTU4_FN, PTU4_OUT, PTU4_IN_PU, PTU4_IN,
+		PTU3_FN, PTU3_OUT, PTU3_IN_PU, PTU3_IN,
+		PTU2_FN, PTU2_OUT, PTU2_IN_PU, PTU2_IN,
+		PTU1_FN, PTU1_OUT, PTU1_IN_PU, PTU1_IN,
+		PTU0_FN, PTU0_OUT, PTU0_IN_PU, PTU0_IN }
+	},
+	{ PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) {
+		PTV7_FN, PTV7_OUT, PTV7_IN_PU, PTV7_IN,
+		PTV6_FN, PTV6_OUT, PTV6_IN_PU, PTV6_IN,
+		PTV5_FN, PTV5_OUT, PTV5_IN_PU, PTV5_IN,
+		PTV4_FN, PTV4_OUT, PTV4_IN_PU, PTV4_IN,
+		PTV3_FN, PTV3_OUT, PTV3_IN_PU, PTV3_IN,
+		PTV2_FN, PTV2_OUT, PTV2_IN_PU, PTV2_IN,
+		PTV1_FN, PTV1_OUT, PTV1_IN_PU, PTV1_IN,
+		PTV0_FN, PTV0_OUT, PTV0_IN_PU, PTV0_IN }
+	},
+	{ PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) {
+		PTW7_FN, PTW7_OUT, PTW7_IN_PU, PTW7_IN,
+		PTW6_FN, PTW6_OUT, PTW6_IN_PU, PTW6_IN,
+		PTW5_FN, PTW5_OUT, PTW5_IN_PU, PTW5_IN,
+		PTW4_FN, PTW4_OUT, PTW4_IN_PU, PTW4_IN,
+		PTW3_FN, PTW3_OUT, PTW3_IN_PU, PTW3_IN,
+		PTW2_FN, PTW2_OUT, PTW2_IN_PU, PTW2_IN,
+		PTW1_FN, PTW1_OUT, PTW1_IN_PU, PTW1_IN,
+		PTW0_FN, PTW0_OUT, PTW0_IN_PU, PTW0_IN }
+	},
+	{ PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) {
+		PTX7_FN, PTX7_OUT, PTX7_IN_PU, PTX7_IN,
+		PTX6_FN, PTX6_OUT, PTX6_IN_PU, PTX6_IN,
+		PTX5_FN, PTX5_OUT, PTX5_IN_PU, PTX5_IN,
+		PTX4_FN, PTX4_OUT, PTX4_IN_PU, PTX4_IN,
+		PTX3_FN, PTX3_OUT, PTX3_IN_PU, PTX3_IN,
+		PTX2_FN, PTX2_OUT, PTX2_IN_PU, PTX2_IN,
+		PTX1_FN, PTX1_OUT, PTX1_IN_PU, PTX1_IN,
+		PTX0_FN, PTX0_OUT, PTX0_IN_PU, PTX0_IN }
+	},
+	{ PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) {
+		PTY7_FN, PTY7_OUT, PTY7_IN_PU, PTY7_IN,
+		PTY6_FN, PTY6_OUT, PTY6_IN_PU, PTY6_IN,
+		PTY5_FN, PTY5_OUT, PTY5_IN_PU, PTY5_IN,
+		PTY4_FN, PTY4_OUT, PTY4_IN_PU, PTY4_IN,
+		PTY3_FN, PTY3_OUT, PTY3_IN_PU, PTY3_IN,
+		PTY2_FN, PTY2_OUT, PTY2_IN_PU, PTY2_IN,
+		PTY1_FN, PTY1_OUT, PTY1_IN_PU, PTY1_IN,
+		PTY0_FN, PTY0_OUT, PTY0_IN_PU, PTY0_IN }
+	},
+	{ PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) {
+		PTZ7_FN, PTZ7_OUT, PTZ7_IN_PU, PTZ7_IN,
+		PTZ6_FN, PTZ6_OUT, PTZ6_IN_PU, PTZ6_IN,
+		PTZ5_FN, PTZ5_OUT, PTZ5_IN_PU, PTZ5_IN,
+		PTZ4_FN, PTZ4_OUT, PTZ4_IN_PU, PTZ4_IN,
+		PTZ3_FN, PTZ3_OUT, PTZ3_IN_PU, PTZ3_IN,
+		PTZ2_FN, PTZ2_OUT, PTZ2_IN_PU, PTZ2_IN,
+		PTZ1_FN, PTZ1_OUT, PTZ1_IN_PU, PTZ1_IN,
+		PTZ0_FN, PTZ0_OUT, PTZ0_IN_PU, PTZ0_IN }
+	},
+	{ PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 1) {
+		PSA15_0, PSA15_1,
+		PSA14_0, PSA14_1,
+		PSA13_0, PSA13_1,
+		PSA12_0, PSA12_1,
+		0, 0,
+		PSA10_0, PSA10_1,
+		PSA9_0,  PSA9_1,
+		PSA8_0,  PSA8_1,
+		PSA7_0,  PSA7_1,
+		PSA6_0,  PSA6_1,
+		PSA5_0,  PSA5_1,
+		0, 0,
+		PSA3_0,  PSA3_1,
+		PSA2_0,  PSA2_1,
+		PSA1_0,  PSA1_1,
+		PSA0_0,  PSA0_1}
+	},
+	{ PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 1) {
+		0, 0,
+		PSB14_0, PSB14_1,
+		PSB13_0, PSB13_1,
+		PSB12_0, PSB12_1,
+		PSB11_0, PSB11_1,
+		PSB10_0, PSB10_1,
+		PSB9_0,  PSB9_1,
+		PSB8_0,  PSB8_1,
+		PSB7_0,  PSB7_1,
+		PSB6_0,  PSB6_1,
+		PSB5_0,  PSB5_1,
+		PSB4_0,  PSB4_1,
+		PSB3_0,  PSB3_1,
+		PSB2_0,  PSB2_1,
+		PSB1_0,  PSB1_1,
+		PSB0_0,  PSB0_1}
+	},
+	{ PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 1) {
+		PSC15_0, PSC15_1,
+		PSC14_0, PSC14_1,
+		PSC13_0, PSC13_1,
+		PSC12_0, PSC12_1,
+		PSC11_0, PSC11_1,
+		PSC10_0, PSC10_1,
+		PSC9_0,  PSC9_1,
+		PSC8_0,  PSC8_1,
+		PSC7_0,  PSC7_1,
+		PSC6_0,  PSC6_1,
+		PSC5_0,  PSC5_1,
+		PSC4_0,  PSC4_1,
+		0, 0,
+		PSC2_0,  PSC2_1,
+		PSC1_0,  PSC1_1,
+		PSC0_0,  PSC0_1}
+	},
+	{ PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 1) {
+		PSD15_0, PSD15_1,
+		PSD14_0, PSD14_1,
+		PSD13_0, PSD13_1,
+		PSD12_0, PSD12_1,
+		PSD11_0, PSD11_1,
+		PSD10_0, PSD10_1,
+		PSD9_0,  PSD9_1,
+		PSD8_0,  PSD8_1,
+		PSD7_0,  PSD7_1,
+		PSD6_0,  PSD6_1,
+		PSD5_0,  PSD5_1,
+		PSD4_0,  PSD4_1,
+		PSD3_0,  PSD3_1,
+		PSD2_0,  PSD2_1,
+		PSD1_0,  PSD1_1,
+		PSD0_0,  PSD0_1}
+	},
+	{ PINMUX_CFG_REG("PSELE", 0xa4050156, 16, 1) {
+		PSE15_0, PSE15_1,
+		PSE14_0, PSE14_1,
+		PSE13_0, PSE13_1,
+		PSE12_0, PSE12_1,
+		PSE11_0, PSE11_1,
+		PSE10_0, PSE10_1,
+		PSE9_0,  PSE9_1,
+		PSE8_0,  PSE8_1,
+		PSE7_0,  PSE7_1,
+		PSE6_0,  PSE6_1,
+		PSE5_0,  PSE5_1,
+		PSE4_0,  PSE4_1,
+		PSE3_0,  PSE3_1,
+		PSE2_0,  PSE2_1,
+		PSE1_0,  PSE1_1,
+		PSE0_0,  PSE0_1}
+	},
+	{}
+};
+
+static struct pinmux_data_reg pinmux_data_regs[] = {
+	{ PINMUX_DATA_REG("PADR", 0xa4050120, 8) {
+		PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA,
+		PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA }
+	},
+	{ PINMUX_DATA_REG("PBDR", 0xa4050122, 8) {
+		PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA,
+		PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA }
+	},
+	{ PINMUX_DATA_REG("PCDR", 0xa4050124, 8) {
+		PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA,
+		PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA }
+	},
+	{ PINMUX_DATA_REG("PDDR", 0xa4050126, 8) {
+		PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA,
+		PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA }
+	},
+	{ PINMUX_DATA_REG("PEDR", 0xa4050128, 8) {
+		PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA,
+		PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA }
+	},
+	{ PINMUX_DATA_REG("PFDR", 0xa405012a, 8) {
+		PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA,
+		PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA }
+	},
+	{ PINMUX_DATA_REG("PGDR", 0xa405012c, 8) {
+		0,         0,         PTG5_DATA, PTG4_DATA,
+		PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA }
+	},
+	{ PINMUX_DATA_REG("PHDR", 0xa405012e, 8) {
+		PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA,
+		PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA }
+	},
+	{ PINMUX_DATA_REG("PJDR", 0xa4050130, 8) {
+		PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, 0,
+		PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PKDR", 0xa4050132, 8) {
+		PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA,
+		PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA }
+	},
+	{ PINMUX_DATA_REG("PLDR", 0xa4050134, 8) {
+		PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA,
+		PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA }
+	},
+	{ PINMUX_DATA_REG("PMDR", 0xa4050136, 8) {
+		PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA,
+		PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA }
+	},
+	{ PINMUX_DATA_REG("PNDR", 0xa4050138, 8) {
+		PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA,
+		PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA }
+	},
+	{ PINMUX_DATA_REG("PQDR", 0xa405013a, 8) {
+		PTQ7_DATA, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA,
+		PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA }
+	},
+	{ PINMUX_DATA_REG("PRDR", 0xa405013c, 8) {
+		PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA,
+		PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA }
+	},
+	{ PINMUX_DATA_REG("PSDR", 0xa405013e, 8) {
+		0,         PTS6_DATA, PTS5_DATA, PTS4_DATA,
+		PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA }
+	},
+	{ PINMUX_DATA_REG("PTDR", 0xa4050160, 8) {
+		PTT7_DATA, PTT6_DATA, PTT5_DATA, PTT4_DATA,
+		PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA }
+	},
+	{ PINMUX_DATA_REG("PUDR", 0xa4050162, 8) {
+		PTU7_DATA, PTU6_DATA, PTU5_DATA, PTU4_DATA,
+		PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA }
+	},
+	{ PINMUX_DATA_REG("PVDR", 0xa4050164, 8) {
+		PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA,
+		PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA }
+	},
+	{ PINMUX_DATA_REG("PWDR", 0xa4050166, 8) {
+		PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA,
+		PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA }
+	},
+	{ PINMUX_DATA_REG("PXDR", 0xa4050168, 8) {
+		PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA,
+		PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA }
+	},
+	{ PINMUX_DATA_REG("PYDR", 0xa405016a, 8) {
+		PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA,
+		PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA }
+	},
+	{ PINMUX_DATA_REG("PZDR", 0xa405016c, 8) {
+		PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA,
+		PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA }
+	},
+	{ },
+};
+
+static struct pinmux_info sh7724_pinmux_info = {
+	.name = "sh7724_pfc",
+	.reserved_id = PINMUX_RESERVED,
+	.data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END },
+	.input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END },
+	.input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END },
+	.output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END },
+	.mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END },
+	.function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END },
+
+	.first_gpio = GPIO_PTA7,
+	.last_gpio = GPIO_FN_INTC_IRQ0,
+
+	.gpios = pinmux_gpios,
+	.cfg_regs = pinmux_config_regs,
+	.data_regs = pinmux_data_regs,
+
+	.gpio_data = pinmux_data,
+	.gpio_data_size = ARRAY_SIZE(pinmux_data),
+};
+
+static int __init plat_pinmux_setup(void)
+{
+	return register_pinmux(&sh7724_pinmux_info);
+}
+arch_initcall(plat_pinmux_setup);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
new file mode 100644
index 0000000..4327b1e
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -0,0 +1,371 @@
+/*
+ * SH7724 Setup
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on SH7723 Setup
+ * Copyright (C) 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/serial.h>
+#include <linux/mm.h>
+#include <linux/serial_sci.h>
+#include <linux/uio_driver.h>
+#include <linux/sh_cmt.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+#include <asm/mmzone.h>
+
+/* Serial */
+static struct plat_sci_port sci_platform_data[] = {
+	{
+		.mapbase        = 0xffe00000,
+		.flags          = UPF_BOOT_AUTOCONF,
+		.type           = PORT_SCIF,
+		.irqs           = { 80, 80, 80, 80 },
+	}, {
+		.mapbase        = 0xffe10000,
+		.flags          = UPF_BOOT_AUTOCONF,
+		.type           = PORT_SCIF,
+		.irqs           = { 81, 81, 81, 81 },
+	}, {
+		.mapbase        = 0xffe20000,
+		.flags          = UPF_BOOT_AUTOCONF,
+		.type           = PORT_SCIF,
+		.irqs           = { 82, 82, 82, 82 },
+	}, {
+		.mapbase	= 0xa4e30000,
+		.flags		= UPF_BOOT_AUTOCONF,
+		.type		= PORT_SCIFA,
+		.irqs		= { 56, 56, 56, 56 },
+	}, {
+		.mapbase	= 0xa4e40000,
+		.flags		= UPF_BOOT_AUTOCONF,
+		.type		= PORT_SCIFA,
+		.irqs		= { 88, 88, 88, 88 },
+	}, {
+		.mapbase	= 0xa4e50000,
+		.flags		= UPF_BOOT_AUTOCONF,
+		.type		= PORT_SCIFA,
+		.irqs		= { 109, 109, 109, 109 },
+	}, {
+		.flags = 0,
+	}
+};
+
+static struct platform_device sci_device = {
+	.name		= "sh-sci",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= sci_platform_data,
+	},
+};
+
+/* RTC */
+static struct resource rtc_resources[] = {
+	[0] = {
+		.start	= 0xa465fec0,
+		.end	= 0xa465fec0 + 0x58 - 1,
+		.flags	= IORESOURCE_IO,
+	},
+	[1] = {
+		/* Period IRQ */
+		.start	= 69,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* Carry IRQ */
+		.start	= 70,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[3] = {
+		/* Alarm IRQ */
+		.start	= 68,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device rtc_device = {
+	.name		= "sh-rtc",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(rtc_resources),
+	.resource	= rtc_resources,
+};
+
+static struct platform_device *sh7724_devices[] __initdata = {
+	&sci_device,
+	&rtc_device,
+};
+
+static int __init sh7724_devices_setup(void)
+{
+	clk_always_enable("rtc0");   /* RTC */
+
+	return platform_add_devices(sh7724_devices,
+				    ARRAY_SIZE(sh7724_devices));
+}
+device_initcall(sh7724_devices_setup);
+
+enum {
+	UNUSED = 0,
+
+	/* interrupt sources */
+	IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7,
+	HUDI,
+	DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3,
+	_2DG_TRI, _2DG_INI, _2DG_CEI, _2DG_BRK,
+	DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3,
+	VIO_CEU20I, VIO_BEU20I, VIO_VEU3F1, VIO_VOUI,
+	SCIFA_SCIFA0,
+	VPU_VPUI,
+	TPU_TPUI,
+	CEU21I,
+	BEU21I,
+	USB_USI0,
+	ATAPI,
+	RTC_ATI, RTC_PRI, RTC_CUI,
+	DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR,
+	DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR,
+	KEYSC_KEYI,
+	SCIF_SCIF0, SCIF_SCIF1, SCIF_SCIF2,
+	VEU3F0I,
+	MSIOF_MSIOFI0, MSIOF_MSIOFI1,
+	SPU_SPUI0, SPU_SPUI1,
+	SCIFA_SCIFA1,
+/*	ICB_ICBI, */
+	ETHI,
+	I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI,
+	I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI,
+	SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2,
+	CMT_CMTI,
+	TSIF_TSIFI,
+/*	ICB_LMBI, */
+	FSI_FSI,
+	SCIFA_SCIFA2,
+	TMU0_TUNI0, TMU0_TUNI1, TMU0_TUNI2,
+	IRDA_IRDAI,
+	SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2,
+	JPU_JPUI,
+	MMC_MMCI0, MMC_MMCI1, MMC_MMCI2,
+	LCDC_LCDCI,
+	TMU1_TUNI0, TMU1_TUNI1, TMU1_TUNI2,
+
+	/* interrupt groups */
+	DMAC1A, _2DG, DMAC0A, VIO, RTC,
+	DMAC1B, DMAC0B, I2C0, I2C1, SDHI0, SDHI1, SPU, MMC,
+};
+
+static struct intc_vect vectors[] __initdata = {
+	INTC_VECT(IRQ0, 0x600), INTC_VECT(IRQ1, 0x620),
+	INTC_VECT(IRQ2, 0x640), INTC_VECT(IRQ3, 0x660),
+	INTC_VECT(IRQ4, 0x680), INTC_VECT(IRQ5, 0x6a0),
+	INTC_VECT(IRQ6, 0x6c0), INTC_VECT(IRQ7, 0x6e0),
+
+	INTC_VECT(DMAC1A_DEI0, 0x700),
+	INTC_VECT(DMAC1A_DEI1, 0x720),
+	INTC_VECT(DMAC1A_DEI2, 0x740),
+	INTC_VECT(DMAC1A_DEI3, 0x760),
+
+	INTC_VECT(_2DG_TRI, 0x780),
+	INTC_VECT(_2DG_INI, 0x7A0),
+	INTC_VECT(_2DG_CEI, 0x7C0),
+	INTC_VECT(_2DG_BRK, 0x7E0),
+
+	INTC_VECT(DMAC0A_DEI0, 0x800),
+	INTC_VECT(DMAC0A_DEI1, 0x820),
+	INTC_VECT(DMAC0A_DEI2, 0x840),
+	INTC_VECT(DMAC0A_DEI3, 0x860),
+
+	INTC_VECT(VIO_CEU20I, 0x880),
+	INTC_VECT(VIO_BEU20I, 0x8A0),
+	INTC_VECT(VIO_VEU3F1, 0x8C0),
+	INTC_VECT(VIO_VOUI, 0x8E0),
+
+	INTC_VECT(SCIFA_SCIFA0, 0x900),
+	INTC_VECT(VPU_VPUI, 0x980),
+	INTC_VECT(TPU_TPUI, 0x9A0),
+	INTC_VECT(CEU21I, 0x9E0),
+	INTC_VECT(BEU21I, 0xA00),
+	INTC_VECT(USB_USI0, 0xA20),
+	INTC_VECT(ATAPI, 0xA60),
+
+	INTC_VECT(RTC_ATI, 0xA80),
+	INTC_VECT(RTC_PRI, 0xAA0),
+	INTC_VECT(RTC_CUI, 0xAC0),
+
+	INTC_VECT(DMAC1B_DEI4, 0xB00),
+	INTC_VECT(DMAC1B_DEI5, 0xB20),
+	INTC_VECT(DMAC1B_DADERR, 0xB40),
+
+	INTC_VECT(DMAC0B_DEI4, 0xB80),
+	INTC_VECT(DMAC0B_DEI5, 0xBA0),
+	INTC_VECT(DMAC0B_DADERR, 0xBC0),
+
+	INTC_VECT(KEYSC_KEYI, 0xBE0),
+	INTC_VECT(SCIF_SCIF0, 0xC00),
+	INTC_VECT(SCIF_SCIF1, 0xC20),
+	INTC_VECT(SCIF_SCIF2, 0xC40),
+	INTC_VECT(VEU3F0I, 0xC60),
+	INTC_VECT(MSIOF_MSIOFI0, 0xC80),
+	INTC_VECT(MSIOF_MSIOFI1, 0xCA0),
+	INTC_VECT(SPU_SPUI0, 0xCC0),
+	INTC_VECT(SPU_SPUI1, 0xCE0),
+	INTC_VECT(SCIFA_SCIFA1, 0xD00),
+
+/*	INTC_VECT(ICB_ICBI, 0xD20), */
+	INTC_VECT(ETHI, 0xD60),
+
+	INTC_VECT(I2C1_ALI, 0xD80),
+	INTC_VECT(I2C1_TACKI, 0xDA0),
+	INTC_VECT(I2C1_WAITI, 0xDC0),
+	INTC_VECT(I2C1_DTEI, 0xDE0),
+
+	INTC_VECT(I2C0_ALI, 0xE00),
+	INTC_VECT(I2C0_TACKI, 0xE20),
+	INTC_VECT(I2C0_WAITI, 0xE40),
+	INTC_VECT(I2C0_DTEI, 0xE60),
+
+	INTC_VECT(SDHI0_SDHII0, 0xE80),
+	INTC_VECT(SDHI0_SDHII1, 0xEA0),
+	INTC_VECT(SDHI0_SDHII2, 0xEC0),
+
+	INTC_VECT(CMT_CMTI, 0xF00),
+	INTC_VECT(TSIF_TSIFI, 0xF20),
+/*	INTC_VECT(ICB_LMBI, 0xF60), */
+	INTC_VECT(FSI_FSI, 0xF80),
+	INTC_VECT(SCIFA_SCIFA2, 0xFA0),
+
+	INTC_VECT(TMU0_TUNI0, 0x400),
+	INTC_VECT(TMU0_TUNI1, 0x420),
+	INTC_VECT(TMU0_TUNI2, 0x440),
+
+	INTC_VECT(IRDA_IRDAI, 0x480),
+
+	INTC_VECT(SDHI1_SDHII0, 0x4E0),
+	INTC_VECT(SDHI1_SDHII1, 0x500),
+	INTC_VECT(SDHI1_SDHII2, 0x520),
+
+	INTC_VECT(JPU_JPUI, 0x560),
+
+	INTC_VECT(MMC_MMCI0, 0x580),
+	INTC_VECT(MMC_MMCI1, 0x5A0),
+	INTC_VECT(MMC_MMCI2, 0x5C0),
+
+	INTC_VECT(LCDC_LCDCI, 0xF40),
+
+	INTC_VECT(TMU1_TUNI0, 0x920),
+	INTC_VECT(TMU1_TUNI1, 0x940),
+	INTC_VECT(TMU1_TUNI2, 0x960),
+};
+
+static struct intc_group groups[] __initdata = {
+	INTC_GROUP(DMAC1A, DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3),
+	INTC_GROUP(_2DG, _2DG_TRI, _2DG_INI, _2DG_CEI, _2DG_BRK),
+	INTC_GROUP(DMAC0A, DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3),
+	INTC_GROUP(VIO, VIO_CEU20I, VIO_BEU20I, VIO_VEU3F1, VIO_VOUI),
+	INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI),
+	INTC_GROUP(DMAC1B, DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR),
+	INTC_GROUP(DMAC0B, DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR),
+	INTC_GROUP(I2C0, I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI),
+	INTC_GROUP(I2C1, I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI),
+	INTC_GROUP(SDHI0, SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2),
+	INTC_GROUP(SDHI1, SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2),
+	INTC_GROUP(SPU, SPU_SPUI0, SPU_SPUI1),
+	INTC_GROUP(MMC, MMC_MMCI0, MMC_MMCI1, MMC_MMCI2),
+};
+
+/* FIXMEEEEEEEEEEEEEEEEEEE !!!!! */
+/* very bad manual !! */
+static struct intc_mask_reg mask_registers[] __initdata = {
+	{ 0xa4080080, 0xa40800c0, 8, /* IMR0 / IMCR0 */
+	  { 0, TMU1_TUNI2, TMU1_TUNI1, TMU1_TUNI0,
+	    /*SDHII3?*/0, SDHI1_SDHII2, SDHI1_SDHII1, SDHI1_SDHII0 } },
+	{ 0xa4080084, 0xa40800c4, 8, /* IMR1 / IMCR1 */
+	  { VIO_VOUI, VIO_VEU3F1, VIO_BEU20I, VIO_CEU20I,
+	    DMAC0A_DEI3, DMAC0A_DEI2, DMAC0A_DEI1, DMAC0A_DEI0 } },
+	{ 0xa4080088, 0xa40800c8, 8, /* IMR2 / IMCR2 */
+	  { 0, 0, 0, VPU_VPUI, ATAPI, ETHI, 0, /*SCIFA3*/SCIFA_SCIFA0 } },
+	{ 0xa408008c, 0xa40800cc, 8, /* IMR3 / IMCR3 */
+	  { DMAC1A_DEI3, DMAC1A_DEI2, DMAC1A_DEI1, DMAC1A_DEI0,
+	    SPU_SPUI1, SPU_SPUI0, BEU21I, IRDA_IRDAI } },
+	{ 0xa4080090, 0xa40800d0, 8, /* IMR4 / IMCR4 */
+	  { 0, TMU0_TUNI2, TMU0_TUNI1, TMU0_TUNI0,
+	    JPU_JPUI, 0, 0, LCDC_LCDCI } },
+	{ 0xa4080094, 0xa40800d4, 8, /* IMR5 / IMCR5 */
+	  { KEYSC_KEYI, DMAC0B_DADERR, DMAC0B_DEI5, DMAC0B_DEI4,
+	    VEU3F0I, SCIF_SCIF2, SCIF_SCIF1, SCIF_SCIF0 } },
+	{ 0xa4080098, 0xa40800d8, 8, /* IMR6 / IMCR6 */
+	  { 0, 0, /*ICB_ICBI*/0, /*SCIFA4*/SCIFA_SCIFA1,
+	    CEU21I, 0, MSIOF_MSIOFI1, MSIOF_MSIOFI0 } },
+	{ 0xa408009c, 0xa40800dc, 8, /* IMR7 / IMCR7 */
+	  { I2C0_DTEI, I2C0_WAITI, I2C0_TACKI, I2C0_ALI,
+	    I2C1_DTEI, I2C1_WAITI, I2C1_TACKI, I2C1_ALI } },
+	{ 0xa40800a0, 0xa40800e0, 8, /* IMR8 / IMCR8 */
+	  { /*SDHII3*/0, SDHI0_SDHII2, SDHI0_SDHII1, SDHI0_SDHII0,
+	    0, 0, /*SCIFA5*/SCIFA_SCIFA2, FSI_FSI } },
+	{ 0xa40800a4, 0xa40800e4, 8, /* IMR9 / IMCR9 */
+	  { 0, 0, 0, CMT_CMTI, 0, /*USB1*/0, USB_USI0, 0 } },
+	{ 0xa40800a8, 0xa40800e8, 8, /* IMR10 / IMCR10 */
+	  { 0, DMAC1B_DADERR, DMAC1B_DEI5, DMAC1B_DEI4,
+	    0, RTC_ATI, RTC_PRI, RTC_CUI } },
+	{ 0xa40800ac, 0xa40800ec, 8, /* IMR11 / IMCR11 */
+	  { _2DG_BRK, _2DG_CEI, _2DG_INI, _2DG_TRI,
+	    0, TPU_TPUI, /*ICB_LMBI*/0, TSIF_TSIFI } },
+	{ 0xa40800b0, 0xa40800f0, 8, /* IMR12 / IMCR12 */
+	  { 0, 0, 0, 0, 0, 0, 0, 0/*2DDMAC*/ } },
+	{ 0xa4140044, 0xa4140064, 8, /* INTMSK00 / INTMSKCLR00 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static struct intc_prio_reg prio_registers[] __initdata = {
+	{ 0xa4080000, 0, 16, 4, /* IPRA */ { TMU0_TUNI0, TMU0_TUNI1,
+					     TMU0_TUNI2, IRDA_IRDAI } },
+	{ 0xa4080004, 0, 16, 4, /* IPRB */ { JPU_JPUI, LCDC_LCDCI,
+					     DMAC1A, BEU21I } },
+	{ 0xa4080008, 0, 16, 4, /* IPRC */ { TMU1_TUNI0, TMU1_TUNI1,
+					     TMU1_TUNI2, SPU } },
+	{ 0xa408000c, 0, 16, 4, /* IPRD */ { 0, MMC, 0, ATAPI } },
+	{ 0xa4080010, 0, 16, 4, /* IPRE */
+	  { DMAC0A, /*BEU?VEU?*/VIO, /*SCIFA3*/SCIFA_SCIFA0, /*VPU5F*/
+	    VPU_VPUI } },
+	{ 0xa4080014, 0, 16, 4, /* IPRF */ { KEYSC_KEYI, DMAC0B,
+					     USB_USI0, CMT_CMTI } },
+	{ 0xa4080018, 0, 16, 4, /* IPRG */ { SCIF_SCIF0, SCIF_SCIF1,
+					     SCIF_SCIF2, VEU3F0I } },
+	{ 0xa408001c, 0, 16, 4, /* IPRH */ { MSIOF_MSIOFI0, MSIOF_MSIOFI1,
+					     I2C1, I2C0 } },
+	{ 0xa4080020, 0, 16, 4, /* IPRI */ { /*SCIFA4*/SCIFA_SCIFA1, /*ICB*/0,
+					     TSIF_TSIFI, _2DG/*ICB?*/ } },
+	{ 0xa4080024, 0, 16, 4, /* IPRJ */ { CEU21I, ETHI, FSI_FSI, SDHI1 } },
+	{ 0xa4080028, 0, 16, 4, /* IPRK */ { RTC, DMAC1B, /*ICB?*/0, SDHI0 } },
+	{ 0xa408002c, 0, 16, 4, /* IPRL */ { /*SCIFA5*/SCIFA_SCIFA2, 0,
+					     TPU_TPUI, /*2DDMAC*/0 } },
+	{ 0xa4140010, 0, 32, 4, /* INTPRI00 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static struct intc_sense_reg sense_registers[] __initdata = {
+	{ 0xa414001c, 16, 2, /* ICR1 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static struct intc_mask_reg ack_registers[] __initdata = {
+	{ 0xa4140024, 0, 8, /* INTREQ00 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+};
+
+static DECLARE_INTC_DESC_ACK(intc_desc, "sh7724", vectors, groups,
+			     mask_registers, prio_registers, sense_registers,
+			     ack_registers);
+
+void __init plat_irq_setup(void)
+{
+	register_intc_controller(&intc_desc);
+}
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 04a6004..0e6ed80 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -435,7 +435,8 @@ static const char *cpu_name[] = {
 	[CPU_SH7722]	= "SH7722",	[CPU_SHX3]	= "SH-X3",
 	[CPU_SH5_101]	= "SH5-101",	[CPU_SH5_103]	= "SH5-103",
 	[CPU_MXG]	= "MX-G",	[CPU_SH7723]	= "SH7723",
-	[CPU_SH7366]	= "SH7366",	[CPU_SH_NONE]	= "Unknown"
+	[CPU_SH7366]	= "SH7366",	[CPU_SH7724]	= "SH7724",
+	[CPU_SH_NONE]	= "Unknown"
 };
 
 const char *get_cpu_subtype(struct sh_cpuinfo *c)
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index 1b9d430..44f4e31 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -109,6 +109,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	case CPU_SH7785:
 	case CPU_SH7786:
 	case CPU_SH7723:
+	case CPU_SH7724:
 	case CPU_SHX3:
 		lmodel = &op_model_sh4a_ops;
 		break;
-- 
cgit v0.10.2


From 47948d2bd6d27648a107a27357b3bc5ad054ff64 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Wed, 15 Apr 2009 11:42:47 +0900
Subject: serial: sh-sci: SH7724 support.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index d0aa82d..84cc651 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -91,6 +91,9 @@
 # define SCSPTR5                0xa4050128
 # define SCIF_ORER              0x0001  /* overrun error bit */
 # define SCSCR_INIT(port)       0x0038  /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+# define SCIF_ORER              0x0001  /* overrun error bit */
+# define SCSCR_INIT(port)       0x0038  /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */
 #elif defined(CONFIG_CPU_SUBTYPE_SH4_202)
 # define SCSPTR2 0xffe80020 /* 16 bit SCIF */
 # define SCIF_ORER 0x0001   /* overrun error bit */
@@ -361,7 +364,8 @@
                  h8_sci_offset, h8_sci_size) \
   CPU_SCI_FNS(name, h8_sci_offset, h8_sci_size)
 #define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size)
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
+      defined(CONFIG_CPU_SUBTYPE_SH7724)
         #define SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size) \
                 CPU_SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size)
         #define SCIF_FNS(name, sh4_scif_offset, sh4_scif_size) \
@@ -390,7 +394,8 @@ SCIF_FNS(SCFDR,  0x1c, 16)
 SCIF_FNS(SCxTDR, 0x20,  8)
 SCIF_FNS(SCxRDR, 0x24,  8)
 SCIF_FNS(SCLSR,  0x24, 16)
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
+      defined(CONFIG_CPU_SUBTYPE_SH7724)
 SCIx_FNS(SCSMR,  0x00, 16, 0x00, 16)
 SCIx_FNS(SCBRR,  0x04,  8, 0x04,  8)
 SCIx_FNS(SCSCR,  0x08, 16, 0x08, 16)
@@ -604,6 +609,17 @@ static inline int sci_rxd_in(struct uart_port *port)
                 return ctrl_inb(SCSPTR5) & 0x0008 ? 1 : 0; /* SCIF5 */
         return 1;
 }
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+#  define SCFSR    0x0010
+#  define SCASSR   0x0014
+static inline int sci_rxd_in(struct uart_port *port)
+{
+	if (port->type == PORT_SCIF)
+		return ctrl_inw((port->mapbase + SCFSR))  & SCIF_BRK ? 1 : 0;
+	if (port->type == PORT_SCIFA)
+		return ctrl_inw((port->mapbase + SCASSR)) & SCIF_BRK ? 1 : 0;
+	return 1;
+}
 #elif defined(CONFIG_CPU_SUBTYPE_SH5_101) || defined(CONFIG_CPU_SUBTYPE_SH5_103)
 static inline int sci_rxd_in(struct uart_port *port)
 {
@@ -757,7 +773,8 @@ static inline int sci_rxd_in(struct uart_port *port)
       defined(CONFIG_CPU_SUBTYPE_SH7720) || \
       defined(CONFIG_CPU_SUBTYPE_SH7721)
 #define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1)
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
+      defined(CONFIG_CPU_SUBTYPE_SH7724)
 static inline int scbrr_calc(struct uart_port *port, int bps, int clk)
 {
 	if (port->type == PORT_SCIF)
-- 
cgit v0.10.2


From 40c7e8be556715079d0a9d7454ceb5371a2f0b39 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Thu, 16 Apr 2009 13:16:07 +0900
Subject: sh: sh7724: Add I2C support.

This adds support for the SH-Mobile I2C controller on the SH7724.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 4327b1e..f17eda6 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -99,9 +99,55 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
+/* I2C0 */
+static struct resource iic0_resources[] = {
+	[0] = {
+		.name	= "IIC0",
+		.start  = 0x04470000,
+		.end    = 0x04470018 - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 96,
+		.end    = 99,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device iic0_device = {
+	.name           = "i2c-sh_mobile",
+	.id             = 0, /* "i2c0" clock */
+	.num_resources  = ARRAY_SIZE(iic0_resources),
+	.resource       = iic0_resources,
+};
+
+/* I2C1 */
+static struct resource iic1_resources[] = {
+	[0] = {
+		.name	= "IIC1",
+		.start  = 0x04750000,
+		.end    = 0x04750018 - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 92,
+		.end    = 95,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device iic1_device = {
+	.name           = "i2c-sh_mobile",
+	.id             = 1, /* "i2c1" clock */
+	.num_resources  = ARRAY_SIZE(iic1_resources),
+	.resource       = iic1_resources,
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
 	&sci_device,
 	&rtc_device,
+	&iic0_device,
+	&iic1_device,
 };
 
 static int __init sh7724_devices_setup(void)
-- 
cgit v0.10.2


From cd5b9ef776feff440e7a889d1a565ceabfecbfa1 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Wed, 15 Apr 2009 11:43:03 +0900
Subject: sh: sh7724: Add VPU support.

This adds uio_pdrv_genirq support for the VPU.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index f17eda6..499a6fc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -143,16 +143,49 @@ static struct platform_device iic1_device = {
 	.resource       = iic1_resources,
 };
 
+/* VPU */
+static struct uio_info vpu_platform_data = {
+	.name = "VPU5F",
+	.version = "0",
+	.irq = 60,
+};
+
+static struct resource vpu_resources[] = {
+	[0] = {
+		.name	= "VPU",
+		.start	= 0xfe900000,
+		.end	= 0xfe902807,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device vpu_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &vpu_platform_data,
+	},
+	.resource	= vpu_resources,
+	.num_resources	= ARRAY_SIZE(vpu_resources),
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
 	&sci_device,
 	&rtc_device,
 	&iic0_device,
 	&iic1_device,
+	&vpu_device,
 };
 
 static int __init sh7724_devices_setup(void)
 {
 	clk_always_enable("rtc0");   /* RTC */
+	clk_always_enable("vpu0");   /* VPU */
+
+	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 
 	return platform_add_devices(sh7724_devices,
 				    ARRAY_SIZE(sh7724_devices));
-- 
cgit v0.10.2


From ad95b78c9f735da11ff9ec760e9b038cd82aead6 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Wed, 15 Apr 2009 11:43:07 +0900
Subject: sh: sh7724: Add VEU support.

This adds uio_pdrv_genirq support for the VEU.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 499a6fc..65570ed 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -172,20 +172,84 @@ static struct platform_device vpu_device = {
 	.num_resources	= ARRAY_SIZE(vpu_resources),
 };
 
+/* VEU0 */
+static struct uio_info veu0_platform_data = {
+	.name = "VEU3F0",
+	.version = "0",
+	.irq = 83,
+};
+
+static struct resource veu0_resources[] = {
+	[0] = {
+		.name	= "VEU3F0",
+		.start	= 0xfe920000,
+		.end	= 0xfe9200cb - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device veu0_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &veu0_platform_data,
+	},
+	.resource	= veu0_resources,
+	.num_resources	= ARRAY_SIZE(veu0_resources),
+};
+
+/* VEU1 */
+static struct uio_info veu1_platform_data = {
+	.name = "VEU3F1",
+	.version = "0",
+	.irq = 54,
+};
+
+static struct resource veu1_resources[] = {
+	[0] = {
+		.name	= "VEU3F1",
+		.start	= 0xfe924000,
+		.end	= 0xfe9240cb - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device veu1_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &veu1_platform_data,
+	},
+	.resource	= veu1_resources,
+	.num_resources	= ARRAY_SIZE(veu1_resources),
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
 	&sci_device,
 	&rtc_device,
 	&iic0_device,
 	&iic1_device,
 	&vpu_device,
+	&veu0_device,
+	&veu1_device,
 };
 
 static int __init sh7724_devices_setup(void)
 {
 	clk_always_enable("rtc0");   /* RTC */
 	clk_always_enable("vpu0");   /* VPU */
+	clk_always_enable("veu1");   /* VEU3F1 */
+	clk_always_enable("veu0");   /* VEU3F0 */
 
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
+	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
+	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
 
 	return platform_add_devices(sh7724_devices,
 				    ARRAY_SIZE(sh7724_devices));
-- 
cgit v0.10.2


From 6a3395beb99d7ae882ddf701c6fa6005ad7edebf Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 16 Apr 2009 15:36:13 +0900
Subject: sh: sh7724: Add CMT clockevents support.

This enables support for the CMT clockevents driver on SH7724.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 65570ed..8b87ba8 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -230,7 +230,40 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
+static struct sh_cmt_config cmt_platform_data = {
+	.name = "CMT",
+	.channel_offset = 0x60,
+	.timer_bit = 5,
+	.clk = "cmt0",
+	.clockevent_rating = 125,
+	.clocksource_rating = 200,
+};
+
+static struct resource cmt_resources[] = {
+	[0] = {
+		.name	= "CMT",
+		.start	= 0x044a0060,
+		.end	= 0x044a006b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt_platform_data,
+	},
+	.resource	= cmt_resources,
+	.num_resources	= ARRAY_SIZE(cmt_resources),
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
+	&cmt_device,
 	&sci_device,
 	&rtc_device,
 	&iic0_device,
-- 
cgit v0.10.2


From 59fe700dcbf3d6257ae86ca8c0192fc64b2eea1c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 16 Apr 2009 15:43:42 +0900
Subject: sh: Have SH7724 select ARCH_SHMOBILE.

This is an SH-Mobile CPU, so select ARCH_SHMOBILE. This enables all of
the PM functionality, amongst other things.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 505d1ac..4c68fde 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -351,6 +351,7 @@ config CPU_SUBTYPE_SH7724
 	bool "Support SH7724 processor"
 	select CPU_SH4A
 	select CPU_SHX2
+	select ARCH_SHMOBILE
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_CMT
 	help
-- 
cgit v0.10.2


From bc9f3d4291f8275924a9197a81208a5df0a15095 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 16 Apr 2009 15:44:58 +0900
Subject: sh: Add a generic defconfig for SH7724 platforms.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig
new file mode 100644
index 0000000..268d04e
--- /dev/null
+++ b/arch/sh/configs/sh7724_generic_defconfig
@@ -0,0 +1,707 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc2
+# Thu Apr 16 15:42:20 2009
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+# CONFIG_GENERIC_GPIO is not set
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SYS_SUPPORTS_CMT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_NS is not set
+# CONFIG_CGROUP_FREEZER is not set
+# CONFIG_CGROUP_DEVICE is not set
+# CONFIG_CPUSETS is not set
+# CONFIG_CGROUP_CPUACCT is not set
+# CONFIG_RESOURCE_COUNTERS is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+# CONFIG_MODULES is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_FREEZER=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+CONFIG_CPU_SH4A=y
+CONFIG_CPU_SHX2=y
+CONFIG_ARCH_SHMOBILE=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7201 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+CONFIG_CPU_SUBTYPE_SH7724=y
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SH7786 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x08000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+# CONFIG_X2TLB is not set
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_ENTRY_OFFSET=0x00001000
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
+CONFIG_SPARSEMEM_STATIC=y
+
+#
+# Memory hotplug is currently incompatible with Software Suspend
+#
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Cache configuration
+#
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_CMT=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=41666666
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_SH_CPU_FREQ=y
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+# CONFIG_HEARTBEAT is not set
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_SCHED_HRTICK=y
+CONFIG_KEXEC=y
+# CONFIG_CRASH_DUMP is not set
+CONFIG_KEXEC_JUMP=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_GUSA=y
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options (EXPERIMENTAL)
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_SLEEP=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=6
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_SH_MOBILE=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_SH=y
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+# CONFIG_PROC_FS is not set
+# CONFIG_SYSFS is not set
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_SH_STANDARD_BIOS is not set
+# CONFIG_EARLY_SCIF_CONSOLE is not set
+# CONFIG_MORE_COMPILE_OPTIONS is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From 4e01f54bfd3f423db8fd6c91c4f0471f18aa0c50 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 16 Apr 2009 08:53:34 +0200
Subject: ALSA: hda - Add Creative CA0110-IBG support

Added the support for Creative SB X-Fi boards with UAA (HD-audio) mode.
In the HD-audio mode, no multiple streams are supported by just it
behaves like a normal HD-audio device.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index eb2a19b..c710150 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -139,6 +139,19 @@ config SND_HDA_CODEC_CONEXANT
 	  snd-hda-codec-conexant.
 	  This module is automatically loaded at probing.
 
+config SND_HDA_CODEC_CA0110
+	bool "Build Creative CA0110-IBG codec support"
+	depends on SND_HDA_INTEL
+	default y
+	help
+	  Say Y here to include Creative CA0110-IBG codec support in
+	  snd-hda-intel driver, found on some Creative X-Fi cards.
+
+	  When the HD-audio driver is built as a module, the codec
+	  support code is also built as another module,
+	  snd-hda-codec-ca0110.
+	  This module is automatically loaded at probing.
+
 config SND_HDA_CODEC_CMEDIA
 	bool "Build C-Media HD-audio codec support"
 	default y
diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile
index 50f9d09..e3081d4 100644
--- a/sound/pci/hda/Makefile
+++ b/sound/pci/hda/Makefile
@@ -13,6 +13,7 @@ snd-hda-codec-analog-objs :=	patch_analog.o
 snd-hda-codec-idt-objs :=	patch_sigmatel.o
 snd-hda-codec-si3054-objs :=	patch_si3054.o
 snd-hda-codec-atihdmi-objs :=	patch_atihdmi.o
+snd-hda-codec-ca0110-objs :=	patch_ca0110.o
 snd-hda-codec-conexant-objs :=	patch_conexant.o
 snd-hda-codec-via-objs :=	patch_via.o
 snd-hda-codec-nvhdmi-objs :=	patch_nvhdmi.o
@@ -40,6 +41,9 @@ endif
 ifdef CONFIG_SND_HDA_CODEC_ATIHDMI
 obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-atihdmi.o
 endif
+ifdef CONFIG_SND_HDA_CODEC_CA0110
+obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-ca0110.o
+endif
 ifdef CONFIG_SND_HDA_CODEC_CONEXANT
 obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-conexant.o
 endif
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index fd6e6f3..37f24ce 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -48,6 +48,7 @@ static struct hda_vendor_id hda_vendor_ids[] = {
 	{ 0x1095, "Silicon Image" },
 	{ 0x10de, "Nvidia" },
 	{ 0x10ec, "Realtek" },
+	{ 0x1102, "Creative" },
 	{ 0x1106, "VIA" },
 	{ 0x111d, "IDT" },
 	{ 0x11c1, "LSI" },
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 21e99cf..21a3092 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2513,6 +2513,11 @@ static struct pci_device_id azx_ids[] = {
 	{ PCI_DEVICE(0x10de, 0x0d97), .driver_data = AZX_DRIVER_NVIDIA },
 	/* Teradici */
 	{ PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA },
+	/* Creative X-Fi (CA0110-IBG) */
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_ANY_ID),
+	  .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
+	  .class_mask = 0xffffff,
+	  .driver_data = AZX_DRIVER_GENERIC },
 	/* AMD Generic, PCI class code and Vendor ID for HD Audio */
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_ANY_ID),
 	  .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c
new file mode 100644
index 0000000..7ec41da
--- /dev/null
+++ b/sound/pci/hda/patch_ca0110.c
@@ -0,0 +1,574 @@
+/*
+ * HD audio interface patch for Creative X-Fi CA0110-IBG chip
+ *
+ * Copyright (c) 2008 Takashi Iwai <tiwai@suse.de>
+ *
+ *  This driver is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This driver is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <sound/core.h>
+#include "hda_codec.h"
+#include "hda_local.h"
+
+/*
+ */
+
+struct ca0110_spec {
+	struct auto_pin_cfg autocfg;
+	struct hda_multi_out multiout;
+	hda_nid_t out_pins[AUTO_CFG_MAX_OUTS];
+	hda_nid_t dacs[AUTO_CFG_MAX_OUTS];
+	hda_nid_t hp_dac;
+	hda_nid_t input_pins[AUTO_PIN_LAST];
+	hda_nid_t adcs[AUTO_PIN_LAST];
+	hda_nid_t dig_out;
+	hda_nid_t dig_in;
+	unsigned int num_inputs;
+	const char *input_labels[AUTO_PIN_LAST];
+	struct hda_pcm pcm_rec[2];	/* PCM information */
+};
+
+/*
+ * PCM callbacks
+ */
+static int ca0110_playback_pcm_open(struct hda_pcm_stream *hinfo,
+				    struct hda_codec *codec,
+				    struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_analog_open(codec, &spec->multiout, substream,
+					     hinfo);
+}
+
+static int ca0110_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
+				       struct hda_codec *codec,
+				       unsigned int stream_tag,
+				       unsigned int format,
+				       struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_analog_prepare(codec, &spec->multiout,
+						stream_tag, format, substream);
+}
+
+static int ca0110_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
+				       struct hda_codec *codec,
+				       struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_analog_cleanup(codec, &spec->multiout);
+}
+
+/*
+ * Digital out
+ */
+static int ca0110_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_open(codec, &spec->multiout);
+}
+
+static int ca0110_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
+					 struct hda_codec *codec,
+					 struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
+}
+
+static int ca0110_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
+					   struct hda_codec *codec,
+					   unsigned int stream_tag,
+					   unsigned int format,
+					   struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_prepare(codec, &spec->multiout, stream_tag,
+					     format, substream);
+}
+
+/*
+ * Analog capture
+ */
+static int ca0110_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
+				      struct hda_codec *codec,
+				      unsigned int stream_tag,
+				      unsigned int format,
+				      struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+
+	snd_hda_codec_setup_stream(codec, spec->adcs[substream->number],
+				   stream_tag, 0, format);
+	return 0;
+}
+
+static int ca0110_capture_pcm_cleanup(struct hda_pcm_stream *hinfo,
+				      struct hda_codec *codec,
+				      struct snd_pcm_substream *substream)
+{
+	struct ca0110_spec *spec = codec->spec;
+
+	snd_hda_codec_cleanup_stream(codec, spec->adcs[substream->number]);
+	return 0;
+}
+
+/*
+ */
+
+static char *dirstr[2] = { "Playback", "Capture" };
+
+static int _add_switch(struct hda_codec *codec, hda_nid_t nid, const char *pfx,
+		       int chan, int dir)
+{
+	char namestr[44];
+	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO(namestr, nid, chan, 0, type);
+	sprintf(namestr, "%s %s Switch", pfx, dirstr[dir]);
+	return snd_hda_ctl_add(codec, snd_ctl_new1(&knew, codec));
+}
+
+static int _add_volume(struct hda_codec *codec, hda_nid_t nid, const char *pfx,
+		       int chan, int dir)
+{
+	char namestr[44];
+	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_VOLUME_MONO(namestr, nid, chan, 0, type);
+	sprintf(namestr, "%s %s Volume", pfx, dirstr[dir]);
+	return snd_hda_ctl_add(codec, snd_ctl_new1(&knew, codec));
+}
+
+#define add_out_switch(codec, nid, pfx)	_add_switch(codec, nid, pfx, 3, 0)
+#define add_out_volume(codec, nid, pfx)	_add_volume(codec, nid, pfx, 3, 0)
+#define add_in_switch(codec, nid, pfx)	_add_switch(codec, nid, pfx, 3, 1)
+#define add_in_volume(codec, nid, pfx)	_add_volume(codec, nid, pfx, 3, 1)
+#define add_mono_switch(codec, nid, pfx, chan) \
+	_add_switch(codec, nid, pfx, chan, 0)
+#define add_mono_volume(codec, nid, pfx, chan) \
+	_add_volume(codec, nid, pfx, chan, 0)
+
+static int ca0110_build_controls(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	static char *prefix[AUTO_CFG_MAX_OUTS] = {
+		"Front", "Surround", NULL, "Side", "Multi"
+	};
+	hda_nid_t mutenid;
+	int i, err;
+
+	for (i = 0; i < spec->multiout.num_dacs; i++) {
+		if (get_wcaps(codec, spec->out_pins[i]) & AC_WCAP_OUT_AMP)
+			mutenid = spec->out_pins[i];
+		else
+			mutenid = spec->multiout.dac_nids[i];
+		if (!prefix[i]) {
+			err = add_mono_switch(codec, mutenid,
+					      "Center", 1);
+			if (err < 0)
+				return err;
+			err = add_mono_switch(codec, mutenid,
+					      "LFE", 1);
+			if (err < 0)
+				return err;
+			err = add_mono_volume(codec, spec->multiout.dac_nids[i],
+					      "Center", 1);
+			if (err < 0)
+				return err;
+			err = add_mono_volume(codec, spec->multiout.dac_nids[i],
+					      "LFE", 1);
+			if (err < 0)
+				return err;
+		} else {
+			err = add_out_switch(codec, mutenid,
+					     prefix[i]);
+			if (err < 0)
+				return err;
+			err = add_out_volume(codec, spec->multiout.dac_nids[i],
+					 prefix[i]);
+			if (err < 0)
+				return err;
+		}
+	}
+	if (cfg->hp_outs) {
+		if (get_wcaps(codec, cfg->hp_pins[0]) & AC_WCAP_OUT_AMP)
+			mutenid = cfg->hp_pins[0];
+		else
+			mutenid = spec->multiout.dac_nids[i];
+
+		err = add_out_switch(codec, mutenid, "Headphone");
+		if (err < 0)
+			return err;
+		if (spec->hp_dac) {
+			err = add_out_volume(codec, spec->hp_dac, "Headphone");
+			if (err < 0)
+				return err;
+		}
+	}
+	for (i = 0; i < spec->num_inputs; i++) {
+		const char *label = spec->input_labels[i];
+		if (get_wcaps(codec, spec->input_pins[i]) & AC_WCAP_IN_AMP)
+			mutenid = spec->input_pins[i];
+		else
+			mutenid = spec->adcs[i];
+		err = add_in_switch(codec, mutenid, label);
+		if (err < 0)
+			return err;
+		err = add_in_volume(codec, spec->adcs[i], label);
+		if (err < 0)
+			return err;
+	}
+
+	if (spec->dig_out) {
+		err = snd_hda_create_spdif_out_ctls(codec, spec->dig_out);
+		if (err < 0)
+			return err;
+		err = snd_hda_create_spdif_share_sw(codec, &spec->multiout);
+		if (err < 0)
+			return err;
+		spec->multiout.share_spdif = 1;
+	}
+	if (spec->dig_in) {
+		err = snd_hda_create_spdif_in_ctls(codec, spec->dig_in);
+		if (err < 0)
+			return err;
+		err = add_in_volume(codec, spec->dig_in, "IEC958");
+	}
+	return 0;
+}
+
+/*
+ */
+static struct hda_pcm_stream ca0110_pcm_analog_playback = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 8,
+	.ops = {
+		.open = ca0110_playback_pcm_open,
+		.prepare = ca0110_playback_pcm_prepare,
+		.cleanup = ca0110_playback_pcm_cleanup
+	},
+};
+
+static struct hda_pcm_stream ca0110_pcm_analog_capture = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+	.ops = {
+		.prepare = ca0110_capture_pcm_prepare,
+		.cleanup = ca0110_capture_pcm_cleanup
+	},
+};
+
+static struct hda_pcm_stream ca0110_pcm_digital_playback = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+	.ops = {
+		.open = ca0110_dig_playback_pcm_open,
+		.close = ca0110_dig_playback_pcm_close,
+		.prepare = ca0110_dig_playback_pcm_prepare
+	},
+};
+
+static struct hda_pcm_stream ca0110_pcm_digital_capture = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 2,
+};
+
+static int ca0110_build_pcms(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct hda_pcm *info = spec->pcm_rec;
+
+	codec->pcm_info = info;
+	codec->num_pcms = 0;
+
+	info->name = "CA0110 Analog";
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ca0110_pcm_analog_playback;
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dacs[0];
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max =
+		spec->multiout.num_dacs * 2;
+	info->stream[SNDRV_PCM_STREAM_CAPTURE] = ca0110_pcm_analog_capture;
+	info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = spec->num_inputs;
+	info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0];
+	codec->num_pcms++;
+
+	if (!spec->dig_out && !spec->dig_in)
+		return 0;
+
+	info++;
+	info->name = "CA0110 Digital";
+	info->pcm_type = HDA_PCM_TYPE_SPDIF;
+	if (spec->dig_out) {
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK] =
+			ca0110_pcm_digital_playback;
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dig_out;
+	}
+	if (spec->dig_in) {
+		info->stream[SNDRV_PCM_STREAM_CAPTURE] =
+			ca0110_pcm_digital_capture;
+		info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->dig_in;
+	}
+	codec->num_pcms++;
+
+	return 0;
+}
+
+static void init_output(struct hda_codec *codec, hda_nid_t pin, hda_nid_t dac)
+{
+	if (pin) {
+		snd_hda_codec_write(codec, pin, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP);
+		if (get_wcaps(codec, pin) & AC_WCAP_OUT_AMP)
+			snd_hda_codec_write(codec, pin, 0,
+					    AC_VERB_SET_AMP_GAIN_MUTE,
+					    AMP_OUT_UNMUTE);
+	}
+	if (dac)
+		snd_hda_codec_write(codec, dac, 0,
+				    AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO);
+}
+
+static void init_input(struct hda_codec *codec, hda_nid_t pin, hda_nid_t adc)
+{
+	if (pin) {
+		snd_hda_codec_write(codec, pin, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80);
+		if (get_wcaps(codec, pin) & AC_WCAP_IN_AMP)
+			snd_hda_codec_write(codec, pin, 0,
+					    AC_VERB_SET_AMP_GAIN_MUTE,
+					    AMP_IN_UNMUTE(0));
+	}
+	if (adc)
+		snd_hda_codec_write(codec, adc, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+				    AMP_IN_UNMUTE(0));
+}
+
+static int ca0110_init(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i;
+
+	for (i = 0; i < spec->multiout.num_dacs; i++)
+		init_output(codec, spec->out_pins[i],
+			    spec->multiout.dac_nids[i]);
+	init_output(codec, cfg->hp_pins[0], spec->hp_dac);
+	init_output(codec, cfg->dig_out_pins[0], spec->dig_out);
+
+	for (i = 0; i < spec->num_inputs; i++)
+		init_input(codec, spec->input_pins[i], spec->adcs[i]);
+	init_input(codec, cfg->dig_in_pin, spec->dig_in);
+	return 0;
+}
+
+static void ca0110_free(struct hda_codec *codec)
+{
+	kfree(codec->spec);
+}
+
+static struct hda_codec_ops ca0110_patch_ops = {
+	.build_controls = ca0110_build_controls,
+	.build_pcms = ca0110_build_pcms,
+	.init = ca0110_init,
+	.free = ca0110_free,
+};
+
+
+static void parse_line_outs(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i, n;
+	unsigned int def_conf;
+	hda_nid_t nid;
+
+	n = 0;
+	for (i = 0; i < cfg->line_outs; i++) {
+		nid = cfg->line_out_pins[i];
+		def_conf = snd_hda_codec_read(codec, nid, 0,
+					      AC_VERB_GET_CONFIG_DEFAULT, 0);
+		if (!def_conf)
+			continue; /* invalid pin */
+		if (snd_hda_get_connections(codec, nid, &spec->dacs[i], 1) != 1)
+			continue;
+		spec->out_pins[n++] = nid;
+	}
+	spec->multiout.dac_nids = spec->dacs;
+	spec->multiout.num_dacs = n;
+}
+
+static void parse_hp_out(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	int i;
+	unsigned int def_conf;
+	hda_nid_t nid, dac;
+
+	if (!cfg->hp_outs)
+		return;
+	nid = cfg->hp_pins[0];
+	def_conf = snd_hda_codec_read(codec, nid, 0,
+				      AC_VERB_GET_CONFIG_DEFAULT, 0);
+	if (!def_conf) {
+		cfg->hp_outs = 0;
+		return;
+	}
+	if (snd_hda_get_connections(codec, nid, &dac, 1) != 1)
+		return;
+
+	for (i = 0; i < cfg->line_outs; i++)
+		if (dac == spec->dacs[i])
+			break;
+	if (i >= cfg->line_outs) {
+		spec->hp_dac = dac;
+		spec->multiout.hp_nid = dac;
+	}
+}
+
+static void parse_input(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	hda_nid_t nid, pin;
+	int n, i, j;
+
+	n = 0;
+	nid = codec->start_nid;
+	for (i = 0; i < codec->num_nodes; i++, nid++) {
+		unsigned int wcaps = get_wcaps(codec, nid);
+		unsigned int type = (wcaps & AC_WCAP_TYPE) >>
+			AC_WCAP_TYPE_SHIFT;
+		if (type != AC_WID_AUD_IN)
+			continue;
+		if (snd_hda_get_connections(codec, nid, &pin, 1) != 1)
+			continue;
+		if (pin == cfg->dig_in_pin) {
+			spec->dig_in = nid;
+			continue;
+		}
+		for (j = 0; j < AUTO_PIN_LAST; j++)
+			if (cfg->input_pins[j] == pin)
+				break;
+		if (j >= AUTO_PIN_LAST)
+			continue;
+		spec->input_pins[n] = pin;
+		spec->input_labels[n] = auto_pin_cfg_labels[j];
+		spec->adcs[n] = nid;
+		n++;
+	}
+	spec->num_inputs = n;
+}
+
+static void parse_digital(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+
+	if (cfg->dig_outs &&
+	    snd_hda_get_connections(codec, cfg->dig_out_pins[0],
+				    &spec->dig_out, 1) == 1)
+		spec->multiout.dig_out_nid = cfg->dig_out_pins[0];
+}
+
+static int ca0110_parse_auto_config(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec = codec->spec;
+	int err;
+
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
+	if (err < 0)
+		return err;
+
+	parse_line_outs(codec);
+	parse_hp_out(codec);
+	parse_digital(codec);
+	parse_input(codec);
+	return 0;
+}
+
+
+int patch_ca0110(struct hda_codec *codec)
+{
+	struct ca0110_spec *spec;
+	int err;
+
+	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+	codec->spec = spec;
+
+	codec->bus->needs_damn_long_delay = 1;
+
+	err = ca0110_parse_auto_config(codec);
+	if (err < 0)
+		goto error;
+
+	codec->patch_ops = ca0110_patch_ops;
+
+	return 0;
+
+ error:
+	kfree(codec->spec);
+	codec->spec = NULL;
+	return err;
+}
+
+
+/*
+ * patch entries
+ */
+static struct hda_codec_preset snd_hda_preset_ca0110[] = {
+	{ .id = 0x1102000a, .name = "CA0110-IBG", .patch = patch_ca0110 },
+	{ .id = 0x1102000b, .name = "CA0110-IBG", .patch = patch_ca0110 },
+	{ .id = 0x1102000d, .name = "SB0880 X-Fi", .patch = patch_ca0110 },
+	{} /* terminator */
+};
+
+MODULE_ALIAS("snd-hda-codec-id:1102000a");
+MODULE_ALIAS("snd-hda-codec-id:1102000b");
+MODULE_ALIAS("snd-hda-codec-id:1102000d");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Creative CA0110-IBG HD-audio codec");
+
+static struct hda_codec_preset_list ca0110_list = {
+	.preset = snd_hda_preset_ca0110,
+	.owner = THIS_MODULE,
+};
+
+static int __init patch_ca0110_init(void)
+{
+	return snd_hda_add_codec_preset(&ca0110_list);
+}
+
+static void __exit patch_ca0110_exit(void)
+{
+	snd_hda_delete_codec_preset(&ca0110_list);
+}
+
+module_init(patch_ca0110_init)
+module_exit(patch_ca0110_exit)
-- 
cgit v0.10.2


From b8b47bfbe4eb1ae0e6891e49c86a5f4fb00413be Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 11 Mar 2009 15:41:51 +0900
Subject: sh: pass along struct pci_channel

These patches rework the pci code for the sh architecture.

Currently each board implements some kind of ioport to address mapping.
Some boards use generic_io_base others try passing addresses as io ports.
This is the first set of patches that try to unify the pci code as much
as possible to avoid duplicated code. This will in the end lead to fewer
lines board specific code and more generic code.

This patch makes sure a struct pci_channel pointer is passed along to
various pci functions such as pci_read_reg(), pci_write_reg(),
pci_fixup_pcic(), sh7751_pcic_init() and sh7780_pcic_init().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c
index 1c1d412..a82011d 100644
--- a/arch/sh/drivers/pci/fixups-lboxre2.c
+++ b/arch/sh/drivers/pci/fixups-lboxre2.c
@@ -9,33 +9,34 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/pci.h>
 #include "pci-sh4.h"
 
 #define PCIMCR_MRSET_OFF	0xBFFFFFFF
 #define PCIMCR_RFSH_OFF		0xFFFFFFFB
 
-int pci_fixup_pcic(void)
+int pci_fixup_pcic(struct pci_channel *chan)
 {
 	unsigned long bcr1, mcr;
 
 	bcr1 = ctrl_inl(SH7751_BCR1);
 	bcr1 |= 0x40080000;	/* Enable Bit 19 BREQEN, set PCIC to slave */
-	pci_write_reg(bcr1, SH4_PCIBCR1);
+	pci_write_reg(chan, bcr1, SH4_PCIBCR1);
 
 	/* Enable all interrupts, so we known what to fix */
-	pci_write_reg(0x0000c3ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
-	pci_write_reg(0xfb900047, SH7751_PCICONF1);
-	pci_write_reg(0xab000001, SH7751_PCICONF4);
+	pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM);
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
+	pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1);
+	pci_write_reg(chan, 0xab000001, SH7751_PCICONF4);
 
 	mcr = ctrl_inl(SH7751_MCR);
 	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-	pci_write_reg(mcr, SH4_PCIMCR);
+	pci_write_reg(chan, mcr, SH4_PCIMCR);
 
-	pci_write_reg(0x0c000000, SH7751_PCICONF5);
-	pci_write_reg(0xd0000000, SH7751_PCICONF6);
-	pci_write_reg(0x0c000000, SH4_PCILAR0);
-	pci_write_reg(0x00000000, SH4_PCILAR1);
+	pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5);
+	pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6);
+	pci_write_reg(chan, 0x0c000000, SH4_PCILAR0);
+	pci_write_reg(chan, 0x00000000, SH4_PCILAR1);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 3e321df..5b25021 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -14,32 +14,33 @@
 #include "pci-sh4.h"
 #include <asm/io.h>
 
-int pci_fixup_pcic(void)
+int pci_fixup_pcic(struct pci_channel *chan)
 {
-	pci_write_reg(0x000043ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
+	pci_write_reg(chan, 0x000043ff, SH4_PCIINTM);
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
 
-	pci_write_reg(0xfbb00047, SH7780_PCICMD);
-	pci_write_reg(0x00000000, SH7780_PCIIBAR);
+	pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD);
+	pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR);
 
-	pci_write_reg(0x00011912, SH7780_PCISVID);
-	pci_write_reg(0x08000000, SH7780_PCICSCR0);
-	pci_write_reg(0x0000001b, SH7780_PCICSAR0);
-	pci_write_reg(0xfd000000, SH7780_PCICSCR1);
-	pci_write_reg(0x0000000f, SH7780_PCICSAR1);
+	pci_write_reg(chan, 0x00011912, SH7780_PCISVID);
+	pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0);
+	pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0);
+	pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1);
+	pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1);
 
-	pci_write_reg(0xfd000000, SH7780_PCIMBR0);
-	pci_write_reg(0x00fc0000, SH7780_PCIMBMR0);
+	pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0);
+	pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0);
 
 #ifdef CONFIG_32BIT
-	pci_write_reg(0xc0000000, SH7780_PCIMBR2);
-	pci_write_reg(0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
+	pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2);
+	pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
 #endif
 
 	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg((PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)),
+	pci_write_reg(chan, (PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)),
 		      SH7780_PCIIOBR);
-	pci_write_reg(((SH7780_PCI_IO_SIZE-1) & (7<<18)), SH7780_PCIIOBMR);
+	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)),
+		      SH7780_PCIIOBMR);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c
index 904bce8..3885233 100644
--- a/arch/sh/drivers/pci/fixups-rts7751r2d.c
+++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c
@@ -10,34 +10,35 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/pci.h>
 #include "pci-sh4.h"
 
 #define PCIMCR_MRSET_OFF	0xBFFFFFFF
 #define PCIMCR_RFSH_OFF		0xFFFFFFFB
 
-int pci_fixup_pcic(void)
+int pci_fixup_pcic(struct pci_channel *chan)
 {
 	unsigned long bcr1, mcr;
 
 	bcr1 = ctrl_inl(SH7751_BCR1);
 	bcr1 |= 0x40080000;	/* Enable Bit 19 BREQEN, set PCIC to slave */
-	pci_write_reg(bcr1, SH4_PCIBCR1);
+	pci_write_reg(chan, bcr1, SH4_PCIBCR1);
 
 	/* Enable all interrupts, so we known what to fix */
-	pci_write_reg(0x0000c3ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
+	pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM);
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
 
-	pci_write_reg(0xfb900047, SH7751_PCICONF1);
-	pci_write_reg(0xab000001, SH7751_PCICONF4);
+	pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1);
+	pci_write_reg(chan, 0xab000001, SH7751_PCICONF4);
 
 	mcr = ctrl_inl(SH7751_MCR);
 	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-	pci_write_reg(mcr, SH4_PCIMCR);
+	pci_write_reg(chan, mcr, SH4_PCIMCR);
 
-	pci_write_reg(0x0c000000, SH7751_PCICONF5);
-	pci_write_reg(0xd0000000, SH7751_PCICONF6);
-	pci_write_reg(0x0c000000, SH4_PCILAR0);
-	pci_write_reg(0x00000000, SH4_PCILAR1);
+	pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5);
+	pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6);
+	pci_write_reg(chan, 0x0c000000, SH4_PCILAR0);
+	pci_write_reg(chan, 0x00000000, SH4_PCILAR1);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index 2f88630..3f6754a 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -14,46 +14,48 @@
 #include "pci-sh4.h"
 #include <asm/io.h>
 
-int pci_fixup_pcic(void)
+int pci_fixup_pcic(struct pci_channel *chan)
 {
 	ctrl_outl(0x00000001, SH7780_PCI_VCR2);
 
 	/* Enable all interrupts, so we know what to fix */
-	pci_write_reg(0x0000C3FF, SH7780_PCIIMR);
-	pci_write_reg(0x0000380F, SH7780_PCIAINTM);
+	pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR);
+	pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM);
 
 	/* Set up standard PCI config registers */
-	pci_write_reg(0xFB00, SH7780_PCISTATUS);
-	pci_write_reg(0x0047, SH7780_PCICMD);
-	pci_write_reg(0x00, SH7780_PCIPIF);
-	pci_write_reg(0x00, SH7780_PCISUB);
-	pci_write_reg(0x06, SH7780_PCIBCC);
-	pci_write_reg(0x1912, SH7780_PCISVID);
-	pci_write_reg(0x0001, SH7780_PCISID);
+	pci_write_reg(chan, 0xFB00, SH7780_PCISTATUS);
+	pci_write_reg(chan, 0x0047, SH7780_PCICMD);
+	pci_write_reg(chan, 0x00, SH7780_PCIPIF);
+	pci_write_reg(chan, 0x00, SH7780_PCISUB);
+	pci_write_reg(chan, 0x06, SH7780_PCIBCC);
+	pci_write_reg(chan, 0x1912, SH7780_PCISVID);
+	pci_write_reg(chan, 0x0001, SH7780_PCISID);
 
-	pci_write_reg(0x08000000, SH7780_PCIMBAR0);	/* PCI */
-	pci_write_reg(0x08000000, SH7780_PCILAR0);	/* SHwy */
-	pci_write_reg(0x07F00001, SH7780_PCILSR);	/* size 128M w/ MBAR */
+	pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0);	/* PCI */
+	pci_write_reg(chan, 0x08000000, SH7780_PCILAR0);	/* SHwy */
+	pci_write_reg(chan, 0x07F00001, SH7780_PCILSR);	/* size 128M w/ MBAR */
 
-	pci_write_reg(0x00000000, SH7780_PCIMBAR1);
-	pci_write_reg(0x00000000, SH7780_PCILAR1);
-	pci_write_reg(0x00000000, SH7780_PCILSR1);
+	pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1);
+	pci_write_reg(chan, 0x00000000, SH7780_PCILAR1);
+	pci_write_reg(chan, 0x00000000, SH7780_PCILSR1);
 
-	pci_write_reg(0xAB000801, SH7780_PCIIBAR);
+	pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR);
 
 	/*
 	 * Set the MBR so PCI address is one-to-one with window,
 	 * meaning all calls go straight through... use ifdef to
 	 * catch erroneous assumption.
 	 */
-	pci_write_reg(0xFD000000 , SH7780_PCIMBR0);
-	pci_write_reg(0x00FC0000 , SH7780_PCIMBMR0);	/* 16M */
+	pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0);
+	pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0);	/* 16M */
 
 	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR);
-	pci_write_reg((SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR);
+	pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1),
+		      SH7780_PCIIOBR);
+	pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18),
+		      SH7780_PCIIOBMR);
 
-	pci_write_reg(0xA5000C01, SH7780_PCICR);
+	pci_write_reg(chan, 0xA5000C01, SH7780_PCICR);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c
index 880cea1..b8e735e 100644
--- a/arch/sh/drivers/pci/fixups-se7780.c
+++ b/arch/sh/drivers/pci/fixups-se7780.c
@@ -15,13 +15,13 @@
 #include "pci-sh4.h"
 #include <asm/io.h>
 
-int pci_fixup_pcic(void)
+int pci_fixup_pcic(struct pci_channel *chan)
 {
 	ctrl_outl(0x00000001, SH7780_PCI_VCR2);
 
 	/* Enable all interrupts, so we know what to fix */
-	pci_write_reg(0x0000C3FF, SH7780_PCIIMR);
-	pci_write_reg(0x0000380F, SH7780_PCIAINTM);
+	pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR);
+	pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM);
 
 	/* Set up standard PCI config registers */
 	ctrl_outw(0xFB00, PCI_REG(SH7780_PCISTATUS));
@@ -32,29 +32,31 @@ int pci_fixup_pcic(void)
 	ctrl_outw(0x1912, PCI_REG(SH7780_PCISVID));
 	ctrl_outw(0x0001, PCI_REG(SH7780_PCISID));
 
-	pci_write_reg(0x08000000, SH7780_PCIMBAR0);     /* PCI */
-	pci_write_reg(0x08000000, SH7780_PCILAR0);     /* SHwy */
-	pci_write_reg(0x07F00001, SH7780_PCILSR);      /* size 128M w/ MBAR */
+	pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0);     /* PCI */
+	pci_write_reg(chan, 0x08000000, SH7780_PCILAR0);     /* SHwy */
+	pci_write_reg(chan, 0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */
 
-	pci_write_reg(0x00000000, SH7780_PCIMBAR1);
-	pci_write_reg(0x00000000, SH7780_PCILAR1);
-	pci_write_reg(0x00000000, SH7780_PCILSR1);
+	pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1);
+	pci_write_reg(chan, 0x00000000, SH7780_PCILAR1);
+	pci_write_reg(chan, 0x00000000, SH7780_PCILSR1);
 
-	pci_write_reg(0xAB000801, SH7780_PCIIBAR);
+	pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR);
 
 	/*
 	 * Set the MBR so PCI address is one-to-one with window,
 	 * meaning all calls go straight through... use ifdef to
 	 * catch erroneous assumption.
 	 */
-	pci_write_reg(0xFD000000 , SH7780_PCIMBR0);
-	pci_write_reg(0x00FC0000 , SH7780_PCIMBMR0);    /* 16M */
+	pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0);
+	pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0);    /* 16M */
 
 	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR);
-	pci_write_reg((SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR);
+	pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1),
+		      SH7780_PCIIOBR);
+	pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18),
+		      SH7780_PCIIOBMR);
 
-	pci_write_reg(0xA5000C01, SH7780_PCICR);
+	pci_write_reg(chan, 0xA5000C01, SH7780_PCICR);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c
index bff09ec..343c072 100644
--- a/arch/sh/drivers/pci/ops-landisk.c
+++ b/arch/sh/drivers/pci/ops-landisk.c
@@ -45,7 +45,7 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7751_pcic_init(&sh7751_pci_map);
+	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
 }
 
 int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index 86c0b6f..8bff32a 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -59,5 +59,5 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7751_pcic_init(&sh7751_pci_map);
+	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index 8555238..bf32ee8 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -64,5 +64,5 @@ static struct sh4_pci_address_map sh7780_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7780_pcic_init(&sh7780_pci_map);
+	return sh7780_pcic_init(&board_pci_channels[0], &sh7780_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index d6ca74b..e4208a6 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -69,6 +69,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 int __init pcibios_init_platform(void)
 {
 	__set_io_port_base(SH7751_PCI_IO_BASE);
-	return sh7751_pcic_init(&sh7751_pci_map);
+	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
 }
 
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 4dcc641..21d59d4 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -69,5 +69,5 @@ static struct sh4_pci_address_map sdk7780_pci_map = {
 int __init pcibios_init_platform(void)
 {
 	printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n");
-	return sh7780_pcic_init(&sdk7780_pci_map);
+	return sh7780_pcic_init(&board_pci_channels[0], &sdk7780_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
index 3145c62..78a6f2b 100644
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ b/arch/sh/drivers/pci/ops-se7780.c
@@ -92,5 +92,5 @@ int __init pcibios_init_platform(void)
 	ctrl_outw(0x0013, FPGA_PCI_INTSEL1);
 	ctrl_outw(0xE402, FPGA_PCI_INTSEL2);
 
-	return sh7780_pcic_init(&se7780_pci_map);
+	return sh7780_pcic_init(&board_pci_channels[0], &se7780_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index 710a3b0..92d27f7 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -34,8 +34,8 @@ static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn,
 	 * so we must do byte alignment by hand
 	 */
 	spin_lock_irqsave(&sh4_pci_lock, flags);
-	pci_write_reg(CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
-	data = pci_read_reg(SH4_PCIPDR);
+	pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
+	data = pci_read_reg(NULL, SH4_PCIPDR);
 	spin_unlock_irqrestore(&sh4_pci_lock, flags);
 
 	switch (size) {
@@ -68,8 +68,8 @@ static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn,
 	u32 data;
 
 	spin_lock_irqsave(&sh4_pci_lock, flags);
-	pci_write_reg(CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
-	data = pci_read_reg(SH4_PCIPDR);
+	pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
+	data = pci_read_reg(NULL, SH4_PCIPDR);
 	spin_unlock_irqrestore(&sh4_pci_lock, flags);
 
 	switch (size) {
@@ -90,7 +90,7 @@ static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn,
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
 
-	pci_write_reg(data, SH4_PCIPDR);
+	pci_write_reg(NULL, data, SH4_PCIPDR);
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -106,25 +106,25 @@ struct pci_ops sh4_pci_ops = {
  */
 static unsigned int pci_probe = PCI_PROBE_CONF1;
 
-int __init sh4_pci_check_direct(void)
+int __init sh4_pci_check_direct(struct pci_channel *chan)
 {
 	/*
 	 * Check if configuration works.
 	 */
 	if (pci_probe & PCI_PROBE_CONF1) {
-		unsigned int tmp = pci_read_reg(SH4_PCIPAR);
+		unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR);
 
-		pci_write_reg(P1SEG, SH4_PCIPAR);
+		pci_write_reg(chan, P1SEG, SH4_PCIPAR);
 
-		if (pci_read_reg(SH4_PCIPAR) == P1SEG) {
-			pci_write_reg(tmp, SH4_PCIPAR);
+		if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) {
+			pci_write_reg(chan, tmp, SH4_PCIPAR);
 			printk(KERN_INFO "PCI: Using configuration type 1\n");
 			request_region(PCI_REG(SH4_PCIPAR), 8, "PCI conf1");
 
 			return 0;
 		}
 
-		pci_write_reg(tmp, SH4_PCIPAR);
+		pci_write_reg(chan, tmp, SH4_PCIPAR);
 	}
 
 	pr_debug("PCI: pci_check_direct failed\n");
@@ -163,7 +163,7 @@ char * __devinit pcibios_setup(char *str)
 	return str;
 }
 
-int __attribute__((weak)) pci_fixup_pcic(void)
+int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan)
 {
 	/* Nothing to do. */
 	return 0;
diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
index 53dd893..cba8015 100644
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ b/arch/sh/drivers/pci/ops-snapgear.c
@@ -66,7 +66,7 @@ static struct sh4_pci_address_map sh7751_pci_map = {
  */
 int __init pcibios_init_platform(void)
 {
-	return sh7751_pcic_init(&sh7751_pci_map);
+	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
 }
 
 int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index a8f7801..69fcc5c 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -73,5 +73,5 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7751_pcic_init(&sh7751_pci_map);
+	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
 }
diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h
index a83dcf7..62ba350 100644
--- a/arch/sh/drivers/pci/pci-sh4.h
+++ b/arch/sh/drivers/pci/pci-sh4.h
@@ -154,8 +154,8 @@
 
 /* arch/sh/kernel/drivers/pci/ops-sh4.c */
 extern struct pci_ops sh4_pci_ops;
-int sh4_pci_check_direct(void);
-int pci_fixup_pcic(void);
+int sh4_pci_check_direct(struct pci_channel *chan);
+int pci_fixup_pcic(struct pci_channel *chan);
 
 struct sh4_pci_address_space {
 	unsigned long base;
@@ -168,13 +168,16 @@ struct sh4_pci_address_map {
 	unsigned long flags;
 };
 
-static inline void pci_write_reg(unsigned long val, unsigned long reg)
+static inline void pci_write_reg(struct pci_channel *chan,
+				 unsigned long val, unsigned long reg)
 {
 	ctrl_outl(val, PCI_REG(reg));
 }
 
-static inline unsigned long pci_read_reg(unsigned long reg)
+static inline unsigned long pci_read_reg(struct pci_channel *chan,
+					 unsigned long reg)
 {
 	return ctrl_inl(PCI_REG(reg));
 }
+
 #endif /* __PCI_SH4_H */
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 3065eb184..9c2c014 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -40,21 +40,22 @@ static int __init sh7751_pci_init(void)
 	pr_debug("PCI: Starting intialization.\n");
 
 	/* check for SH7751/SH7751R hardware */
-	id = pci_read_reg(SH7751_PCICONF0);
+	id = pci_read_reg(NULL, SH7751_PCICONF0);
 	if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) &&
 	    id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) {
 		pr_debug("PCI: This is not an SH7751(R) (%x)\n", id);
 		return -ENODEV;
 	}
 
-	if ((ret = sh4_pci_check_direct()) != 0)
+	if ((ret = sh4_pci_check_direct(NULL)) != 0)
 		return ret;
 
 	return pcibios_init_platform();
 }
 subsys_initcall(sh7751_pci_init);
 
-static int __init __area_sdram_check(unsigned int area)
+static int __init __area_sdram_check(struct pci_channel *chan,
+				     unsigned int area)
 {
 	u32 word;
 
@@ -65,7 +66,7 @@ static int __init __area_sdram_check(unsigned int area)
 		       area, word);
 		return 0;
 	}
-	pci_write_reg(word, SH4_PCIBCR1);
+	pci_write_reg(chan, word, SH4_PCIBCR1);
 
 	word = (u16)ctrl_inw(SH7751_BCR2);
 	/* check BCR2 for 32bit SDRAM interface*/
@@ -74,12 +75,13 @@ static int __init __area_sdram_check(unsigned int area)
 		       area, word);
 		return 0;
 	}
-	pci_write_reg(word, SH4_PCIBCR2);
+	pci_write_reg(chan, word, SH4_PCIBCR2);
 
 	return 1;
 }
 
-int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
+int __init sh7751_pcic_init(struct pci_channel *chan,
+			    struct sh4_pci_address_map *map)
 {
 	u32 reg;
 	u32 word;
@@ -90,10 +92,10 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
 	ctrl_outl(reg, SH7751_BCR1);
 
 	/* Turn the clocks back on (not done in reset)*/
-	pci_write_reg(0, SH4_PCICLKR);
+	pci_write_reg(chan, 0, SH4_PCICLKR);
 	/* Clear Powerdown IRQ's (not done in reset) */
 	word = SH4_PCIPINT_D3 | SH4_PCIPINT_D0;
-	pci_write_reg(word, SH4_PCIPINT);
+	pci_write_reg(chan, word, SH4_PCIPINT);
 
 	/*
 	 * This code is unused for some boards as it is done in the
@@ -103,11 +105,11 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
 	if (!(map->flags & SH4_PCIC_NO_RESET)) {
 		/* toggle PCI reset pin */
 		word = SH4_PCICR_PREFIX | SH4_PCICR_PRST;
-		pci_write_reg(word, SH4_PCICR);
+		pci_write_reg(chan, word, SH4_PCICR);
 		/* Wait for a long time... not 1 sec. but long enough */
 		mdelay(100);
 		word = SH4_PCICR_PREFIX;
-		pci_write_reg(word, SH4_PCICR);
+		pci_write_reg(chan, word, SH4_PCICR);
 	}
 
 	/* set the command/status bits to:
@@ -116,11 +118,11 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
 	 */
 	word = SH7751_PCICONF1_WCC | SH7751_PCICONF1_PER |
 	       SH7751_PCICONF1_BUM | SH7751_PCICONF1_MES;
-	pci_write_reg(word, SH7751_PCICONF1);
+	pci_write_reg(chan, word, SH7751_PCICONF1);
 
 	/* define this host as the host bridge */
 	word = PCI_BASE_CLASS_BRIDGE << 24;
-	pci_write_reg(word, SH7751_PCICONF2);
+	pci_write_reg(chan, word, SH7751_PCICONF2);
 
 	/* Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
@@ -128,24 +130,24 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
 	 * Window1 = map->window1.size @ cached area base = SDRAM
 	 */
 	word = map->window0.size - 1;
-	pci_write_reg(word, SH4_PCILSR0);
+	pci_write_reg(chan, word, SH4_PCILSR0);
 	word = map->window1.size - 1;
-	pci_write_reg(word, SH4_PCILSR1);
+	pci_write_reg(chan, word, SH4_PCILSR1);
 	/* Set the values on window 0 PCI config registers */
 	word = P2SEGADDR(map->window0.base);
-	pci_write_reg(word, SH4_PCILAR0);
-	pci_write_reg(word, SH7751_PCICONF5);
+	pci_write_reg(chan, word, SH4_PCILAR0);
+	pci_write_reg(chan, word, SH7751_PCICONF5);
 	/* Set the values on window 1 PCI config registers */
 	word =  PHYSADDR(map->window1.base);
-	pci_write_reg(word, SH4_PCILAR1);
-	pci_write_reg(word, SH7751_PCICONF6);
+	pci_write_reg(chan, word, SH4_PCILAR1);
+	pci_write_reg(chan, word, SH7751_PCICONF6);
 
 	/* Set the local 16MB PCI memory space window to
 	 * the lowest PCI mapped address
 	 */
 	word = PCIBIOS_MIN_MEM & SH4_PCIMBR_MASK;
 	pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word);
-	pci_write_reg(word , SH4_PCIMBR);
+	pci_write_reg(chan, word , SH4_PCIMBR);
 
 	/* Map IO space into PCI IO window
 	 * The IO window is 64K-PCIBIOS_MIN_IO in size
@@ -160,19 +162,19 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
 	 * correctly */
 	word = PCIBIOS_MIN_IO & SH4_PCIIOBR_MASK;
 	pr_debug("PCI: Setting upper bits of IO window to 0x%x\n", word);
-	pci_write_reg(word, SH4_PCIIOBR);
+	pci_write_reg(chan, word, SH4_PCIIOBR);
 
 	/* Set PCI WCRx, BCRx's, copy from BSC locations */
 
 	/* check BCR for SDRAM in specified area */
 	switch (map->window0.base) {
-	case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(0); break;
-	case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(1); break;
-	case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(2); break;
-	case SH7751_CS3_BASE_ADDR: word = __area_sdram_check(3); break;
-	case SH7751_CS4_BASE_ADDR: word = __area_sdram_check(4); break;
-	case SH7751_CS5_BASE_ADDR: word = __area_sdram_check(5); break;
-	case SH7751_CS6_BASE_ADDR: word = __area_sdram_check(6); break;
+	case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(chan, 0); break;
+	case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(chan, 1); break;
+	case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(chan, 2); break;
+	case SH7751_CS3_BASE_ADDR: word = __area_sdram_check(chan, 3); break;
+	case SH7751_CS4_BASE_ADDR: word = __area_sdram_check(chan, 4); break;
+	case SH7751_CS5_BASE_ADDR: word = __area_sdram_check(chan, 5); break;
+	case SH7751_CS6_BASE_ADDR: word = __area_sdram_check(chan, 6); break;
 	}
 
 	if (!word)
@@ -180,25 +182,25 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map)
 
 	/* configure the wait control registers */
 	word = ctrl_inl(SH7751_WCR1);
-	pci_write_reg(word, SH4_PCIWCR1);
+	pci_write_reg(chan, word, SH4_PCIWCR1);
 	word = ctrl_inl(SH7751_WCR2);
-	pci_write_reg(word, SH4_PCIWCR2);
+	pci_write_reg(chan, word, SH4_PCIWCR2);
 	word = ctrl_inl(SH7751_WCR3);
-	pci_write_reg(word, SH4_PCIWCR3);
+	pci_write_reg(chan, word, SH4_PCIWCR3);
 	word = ctrl_inl(SH7751_MCR);
-	pci_write_reg(word, SH4_PCIMCR);
+	pci_write_reg(chan, word, SH4_PCIMCR);
 
 	/* NOTE: I'm ignoring the PCI error IRQs for now..
 	 * TODO: add support for the internal error interrupts and
 	 * DMA interrupts...
 	 */
 
-	pci_fixup_pcic();
+	pci_fixup_pcic(chan);
 
 	/* SH7751 init done, set central function init complete */
 	/* use round robin mode to stop a device starving/overruning */
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM;
-	pci_write_reg(word, SH4_PCICR);
+	pci_write_reg(chan, word, SH4_PCICR);
 
 	return 1;
 }
diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h
index 68e3cb5..6f101e5 100644
--- a/arch/sh/drivers/pci/pci-sh7751.h
+++ b/arch/sh/drivers/pci/pci-sh7751.h
@@ -130,6 +130,7 @@
 struct sh4_pci_address_map;
 
 /* arch/sh/drivers/pci/pci-sh7751.c */
-int sh7751_pcic_init(struct sh4_pci_address_map *map);
+int sh7751_pcic_init(struct pci_channel *chan,
+		     struct sh4_pci_address_map *map);
 
 #endif /* _PCI_SH7751_H_ */
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index bae6a2c..56f673f 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -55,7 +55,7 @@ static int __init sh7780_pci_init(void)
 	ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */
 
 	/* check for SH7780/SH7780R hardware */
-	id = pci_read_reg(SH7780_PCIVID);
+	id = pci_read_reg(NULL, SH7780_PCIVID);
 	if ((id & 0xffff) == SH7780_VENDOR_ID) {
 		switch ((id >> 16) & 0xffff) {
 		case SH7763_DEVICE_ID:
@@ -82,14 +82,15 @@ static int __init sh7780_pci_init(void)
 		ctrl_outl(0x33333333, INTC_INTPRI);
 	}
 
-	if ((ret = sh4_pci_check_direct()) != 0)
+	if ((ret = sh4_pci_check_direct(NULL)) != 0)
 		return ret;
 
 	return pcibios_init_platform();
 }
 core_initcall(sh7780_pci_init);
 
-int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
+int __init sh7780_pcic_init(struct pci_channel *chan,
+			    struct sh4_pci_address_map *map)
 {
 	u32 word;
 
@@ -101,34 +102,34 @@ int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
 	if (!(map->flags & SH4_PCIC_NO_RESET)) {
 		/* toggle PCI reset pin */
 		word = SH4_PCICR_PREFIX | SH4_PCICR_PRST;
-		pci_write_reg(word, SH4_PCICR);
+		pci_write_reg(chan, word, SH4_PCICR);
 		/* Wait for a long time... not 1 sec. but long enough */
 		mdelay(100);
 		word = SH4_PCICR_PREFIX;
-		pci_write_reg(word, SH4_PCICR);
+		pci_write_reg(chan, word, SH4_PCICR);
 	}
 
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
 	 * Mem space enable
 	 */
-	pci_write_reg(0x00000046, SH7780_PCICMD);
+	pci_write_reg(chan, 0x00000046, SH7780_PCICMD);
 
 	/* define this host as the host bridge */
 	word = PCI_BASE_CLASS_BRIDGE << 24;
-	pci_write_reg(word, SH7780_PCIRID);
+	pci_write_reg(chan, word, SH7780_PCIRID);
 
 	/* Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
 	 */
-	pci_write_reg(map->window0.size - 0xfffff, SH4_PCILSR0);
-	pci_write_reg(map->window1.size - 0xfffff, SH4_PCILSR1);
+	pci_write_reg(chan, map->window0.size - 0xfffff, SH4_PCILSR0);
+	pci_write_reg(chan, map->window1.size - 0xfffff, SH4_PCILSR1);
 	/* Set the values on window 0 PCI config registers */
-	pci_write_reg(map->window0.base, SH4_PCILAR0);
-	pci_write_reg(map->window0.base, SH7780_PCIMBAR0);
+	pci_write_reg(chan, map->window0.base, SH4_PCILAR0);
+	pci_write_reg(chan, map->window0.base, SH7780_PCIMBAR0);
 	/* Set the values on window 1 PCI config registers */
-	pci_write_reg(map->window1.base, SH4_PCILAR1);
-	pci_write_reg(map->window1.base, SH7780_PCIMBAR1);
+	pci_write_reg(chan, map->window1.base, SH4_PCILAR1);
+	pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1);
 
 	/* Map IO space into PCI IO window
 	 * The IO window is 64K-PCIBIOS_MIN_IO in size
@@ -145,12 +146,12 @@ int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
 	 */
 
 	/* Apply any last-minute PCIC fixups */
-	pci_fixup_pcic();
+	pci_fixup_pcic(chan);
 
 	/* SH7780 init done, set central function init complete */
 	/* use round robin mode to stop a device starving/overruning */
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO;
-	pci_write_reg(word, SH4_PCICR);
+	pci_write_reg(chan, word, SH4_PCICR);
 
 	return 1;
 }
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 93adc71..d349611 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -109,6 +109,7 @@
 struct sh4_pci_address_map;
 
 /* arch/sh/drivers/pci/pci-sh7780.c */
-int sh7780_pcic_init(struct sh4_pci_address_map *map);
+int sh7780_pcic_init(struct pci_channel *chan,
+		     struct sh4_pci_address_map *map);
 
 #endif /* _PCI_SH7780_H_ */
-- 
cgit v0.10.2


From d0e3db40e2a1352aa2a2f425a7d4631bddc03d51 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 11 Mar 2009 15:46:14 +0900
Subject: sh: add init member to pci_channel data

This patch adds an init callback to struct pci_channel and makes sure
it is initialized properly. Code is added to call this init function
from pcibios_init(). Return values are adjusted and a warning is is
printed if init fails.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-dreamcast/setup.c b/arch/sh/boards/mach-dreamcast/setup.c
index d1bee48..ebe9922 100644
--- a/arch/sh/boards/mach-dreamcast/setup.c
+++ b/arch/sh/boards/mach-dreamcast/setup.c
@@ -30,7 +30,6 @@
 
 extern struct irq_chip systemasic_int;
 extern void aica_time_init(void);
-extern int gapspci_init(void);
 extern int systemasic_irq_demux(int);
 
 static void __init dreamcast_setup(char **cmdline_p)
@@ -51,11 +50,6 @@ static void __init dreamcast_setup(char **cmdline_p)
 					 handle_level_irq);
 
 	board_time_init = aica_time_init;
-
-#ifdef CONFIG_PCI
-	if (gapspci_init() < 0)
-		printk(KERN_WARNING "GAPSPCI was not detected.\n");
-#endif
 }
 
 static struct sh_machine_vector mv_dreamcast __initmv = {
diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c
index 38ef762..f4a5e14 100644
--- a/arch/sh/drivers/pci/ops-cayman.c
+++ b/arch/sh/drivers/pci/ops-cayman.c
@@ -77,7 +77,7 @@ int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin)
 }
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh5_pci_ops, NULL, NULL, 0, 0xff },
+	{ sh5_pci_init, &sh5_pci_ops, NULL, NULL, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c
index f5d2a2a..f62063e 100644
--- a/arch/sh/drivers/pci/ops-dreamcast.c
+++ b/arch/sh/drivers/pci/ops-dreamcast.c
@@ -42,15 +42,6 @@ static struct resource gapspci_mem_resource = {
 	.flags	= IORESOURCE_MEM,
 };
 
-static struct pci_ops gapspci_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &gapspci_pci_ops, &gapspci_io_resource,
-	  &gapspci_mem_resource, 0, 1 },
-	{ 0, }
-};
-EXPORT_SYMBOL(board_pci_channels);
-
 /*
  * The !gapspci_config_access case really shouldn't happen, ever, unless
  * someone implicitly messes around with the last devfn value.. otherwise we
@@ -116,7 +107,7 @@ static struct pci_ops gapspci_pci_ops = {
  * gapspci init
  */
 
-int __init gapspci_init(void)
+static int __init gapspci_init(struct pci_channel *chan)
 {
 	char idbuf[16];
 	int i;
@@ -168,3 +159,10 @@ char * __devinit pcibios_setup(char *str)
 {
 	return str;
 }
+
+struct pci_channel board_pci_channels[] = {
+	{ gapspci_init, &gapspci_pci_ops, &gapspci_io_resource,
+	  &gapspci_mem_resource, 0, 1 },
+	{ 0, }
+};
+EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c
index 343c072..c46911d9 100644
--- a/arch/sh/drivers/pci/ops-landisk.c
+++ b/arch/sh/drivers/pci/ops-landisk.c
@@ -30,7 +30,7 @@ static struct resource sh7751_mem_resource = {
 };
 
 struct pci_channel board_pci_channels[] = {
-	{&sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff},
+	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff},
 	{NULL, NULL, NULL, 0, 0},
 };
 
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index 8bff32a..f606df2 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -39,7 +39,7 @@ static struct resource sh7751_mem_resource = {
 extern struct pci_ops sh7751_pci_ops;
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
+	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index bf32ee8..b51b7e4 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -43,7 +43,7 @@ static struct resource sh7780_mem_resource = {
 extern struct pci_ops sh7780_pci_ops;
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff },
+	{ sh7780_pci_init, &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index e4208a6..fe5a231 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -47,7 +47,7 @@ static struct resource sh7751_mem_resource = {
 extern struct pci_ops sh7751_pci_ops;
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
+	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 21d59d4..7277cd1 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -49,7 +49,7 @@ static struct resource sdk7780_mem_resource = {
 };
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff },
+	{ sh7780_pci_init, &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
index 78a6f2b..76a74fb 100644
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ b/arch/sh/drivers/pci/ops-se7780.c
@@ -58,7 +58,7 @@ static struct resource se7780_mem_resource = {
 extern struct pci_ops se7780_pci_ops;
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff },
+	{ sh7780_pci_init, &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-sh03.c b/arch/sh/drivers/pci/ops-sh03.c
index e1703ff..0218135 100644
--- a/arch/sh/drivers/pci/ops-sh03.c
+++ b/arch/sh/drivers/pci/ops-sh03.c
@@ -39,7 +39,7 @@ static struct resource sh7751_mem_resource = {
 extern struct pci_ops sh4_pci_ops;
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
+	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 
diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
index cba8015..2e254c6 100644
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ b/arch/sh/drivers/pci/ops-snapgear.c
@@ -40,7 +40,7 @@ static struct resource sh7751_mem_resource = {
 };
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
+	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ 0, }
 };
 
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index 69fcc5c..ffa79bd 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -52,7 +52,7 @@ static struct resource sh7751_mem_resource = {
 };
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
+	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index 7a97438..008a02e 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -27,6 +27,12 @@
 unsigned long pcicr_virt;
 unsigned long PCI_IO_AREA;
 
+int __init sh5_pci_init(struct pci_channel *chan)
+{
+	pr_debug("PCI: Starting intialization.\n");
+	return pcibios_init_platform();
+}
+
 /* Rounds a number UP to the nearest power of two. Used for
  * sizing the PCI window.
  */
diff --git a/arch/sh/drivers/pci/pci-sh5.h b/arch/sh/drivers/pci/pci-sh5.h
index 7cff3fc..af09f38 100644
--- a/arch/sh/drivers/pci/pci-sh5.h
+++ b/arch/sh/drivers/pci/pci-sh5.h
@@ -108,6 +108,7 @@ extern unsigned long pcicr_virt;
 extern struct pci_ops sh5_pci_ops;
 
 /* arch/sh/drivers/pci/pci-sh5.c */
+int sh5_pci_init(struct pci_channel *chan);
 int sh5pci_init(unsigned long memStart, unsigned long memSize);
 
 #endif /* __PCI_SH5_H */
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 9c2c014..230db8b 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -32,7 +32,7 @@
  * space mapping) will be called via the platform defined function
  * pcibios_init_platform().
  */
-static int __init sh7751_pci_init(void)
+int __init sh7751_pci_init(struct pci_channel *chan)
 {
 	unsigned int id;
 	int ret;
@@ -40,19 +40,18 @@ static int __init sh7751_pci_init(void)
 	pr_debug("PCI: Starting intialization.\n");
 
 	/* check for SH7751/SH7751R hardware */
-	id = pci_read_reg(NULL, SH7751_PCICONF0);
+	id = pci_read_reg(chan, SH7751_PCICONF0);
 	if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) &&
 	    id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) {
 		pr_debug("PCI: This is not an SH7751(R) (%x)\n", id);
 		return -ENODEV;
 	}
 
-	if ((ret = sh4_pci_check_direct(NULL)) != 0)
+	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
 	return pcibios_init_platform();
 }
-subsys_initcall(sh7751_pci_init);
 
 static int __init __area_sdram_check(struct pci_channel *chan,
 				     unsigned int area)
@@ -178,7 +177,7 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	}
 
 	if (!word)
-		return 0;
+		return -1;
 
 	/* configure the wait control registers */
 	word = ctrl_inl(SH7751_WCR1);
@@ -202,5 +201,5 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM;
 	pci_write_reg(chan, word, SH4_PCICR);
 
-	return 1;
+	return 0;
 }
diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h
index 6f101e5..0ea4387 100644
--- a/arch/sh/drivers/pci/pci-sh7751.h
+++ b/arch/sh/drivers/pci/pci-sh7751.h
@@ -130,6 +130,7 @@
 struct sh4_pci_address_map;
 
 /* arch/sh/drivers/pci/pci-sh7751.c */
+int sh7751_pci_init(struct pci_channel *chan);
 int sh7751_pcic_init(struct pci_channel *chan,
 		     struct sh4_pci_address_map *map);
 
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 56f673f..4706e88 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -45,7 +45,7 @@
  * space mapping) will be called via the platform defined function
  * pcibios_init_platform().
  */
-static int __init sh7780_pci_init(void)
+int __init sh7780_pci_init(struct pci_channel *chan)
 {
 	unsigned int id;
 	int ret, match = 0;
@@ -55,7 +55,7 @@ static int __init sh7780_pci_init(void)
 	ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */
 
 	/* check for SH7780/SH7780R hardware */
-	id = pci_read_reg(NULL, SH7780_PCIVID);
+	id = pci_read_reg(chan, SH7780_PCIVID);
 	if ((id & 0xffff) == SH7780_VENDOR_ID) {
 		switch ((id >> 16) & 0xffff) {
 		case SH7763_DEVICE_ID:
@@ -82,12 +82,11 @@ static int __init sh7780_pci_init(void)
 		ctrl_outl(0x33333333, INTC_INTPRI);
 	}
 
-	if ((ret = sh4_pci_check_direct(NULL)) != 0)
+	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
 	return pcibios_init_platform();
 }
-core_initcall(sh7780_pci_init);
 
 int __init sh7780_pcic_init(struct pci_channel *chan,
 			    struct sh4_pci_address_map *map)
@@ -153,5 +152,5 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO;
 	pci_write_reg(chan, word, SH4_PCICR);
 
-	return 1;
+	return 0;
 }
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index d349611..2f3c920 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -109,6 +109,7 @@
 struct sh4_pci_address_map;
 
 /* arch/sh/drivers/pci/pci-sh7780.c */
+int sh7780_pci_init(struct pci_channel *chan);
 int sh7780_pcic_init(struct pci_channel *chan,
 		     struct sh4_pci_address_map *map);
 
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 0d6ac7a..29ec16e 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -28,18 +28,31 @@ static int __init pcibios_init(void)
 	struct pci_bus *bus;
 	int busno;
 
+	/* init channels */
+	busno = 0;
+	for (p = board_pci_channels; p->init; p++) {
+		if (p->init(p) == 0)
+			p->enabled = 1;
+		else
+			pr_err("Unable to init pci channel %d\n", busno);
+		busno++;
+	}
+
 #ifdef CONFIG_PCI_AUTO
 	/* assign resources */
 	busno = 0;
-	for (p = board_pci_channels; p->pci_ops != NULL; p++)
-		busno = pciauto_assign_resources(busno, p) + 1;
+	for (p = board_pci_channels; p->init; p++)
+		if (p->enabled)
+			busno = pciauto_assign_resources(busno, p) + 1;
 #endif
 
 	/* scan the buses */
 	busno = 0;
-	for (p = board_pci_channels; p->pci_ops != NULL; p++) {
-		bus = pci_scan_bus(busno, p->pci_ops, p);
-		busno = bus->subordinate + 1;
+	for (p = board_pci_channels; p->init; p++) {
+		if (p->enabled) {
+			bus = pci_scan_bus(busno, p->pci_ops, p);
+			busno = bus->subordinate + 1;
+		}
 	}
 
 	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index df1d383..5c7a8f1 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -17,11 +17,13 @@
  * external) PCI controllers.
  */
 struct pci_channel {
+	int (*init)(struct pci_channel *chan);
 	struct pci_ops *pci_ops;
 	struct resource *io_resource;
 	struct resource *mem_resource;
 	int first_devfn;
 	int last_devfn;
+	int enabled;
 };
 
 /*
-- 
cgit v0.10.2


From 710fa3c81151948ac4d836ef52b57cef91b0ab72 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 11 Mar 2009 15:47:23 +0900
Subject: sh: avoid using PCIBIOS_MIN_xxx

Replaces PCIBIOS_MIN_IO and PCIBIOS_MIN_MEM with direct struct
pci_channel access. This allows us to have more than one pci
channel.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 5b25021..31f8dfb 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -37,7 +37,7 @@ int pci_fixup_pcic(struct pci_channel *chan)
 #endif
 
 	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg(chan, (PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)),
+	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
 		      SH7780_PCIIOBR);
 	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)),
 		      SH7780_PCIIOBMR);
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index 3f6754a..c295731 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -50,7 +50,7 @@ int pci_fixup_pcic(struct pci_channel *chan)
 	pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0);	/* 16M */
 
 	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1),
+	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
 		      SH7780_PCIIOBR);
 	pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18),
 		      SH7780_PCIIOBMR);
diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c
index b8e735e..a968af7 100644
--- a/arch/sh/drivers/pci/fixups-se7780.c
+++ b/arch/sh/drivers/pci/fixups-se7780.c
@@ -51,7 +51,7 @@ int pci_fixup_pcic(struct pci_channel *chan)
 	pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0);    /* 16M */
 
 	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1),
+	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
 		      SH7780_PCIIOBR);
 	pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18),
 		      SH7780_PCIIOBMR);
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index 008a02e..7750da2 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -206,6 +206,9 @@ int __init sh5pci_init(unsigned long memStart, unsigned long memSize)
 	return 0;
 }
 
+#define xPCIBIOS_MIN_IO		board_pci_channels->io_resource->start
+#define xPCIBIOS_MIN_MEM	board_pci_channels->mem_resource->start
+
 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
 	struct pci_dev *dev = bus->self;
@@ -223,9 +226,9 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 		/* For now, propagate host limits to the bus;
 		 * we'll adjust them later. */
 		bus->resource[0]->end = 64*1024 - 1 ;
-		bus->resource[1]->end = PCIBIOS_MIN_MEM+(256*1024*1024)-1;
-		bus->resource[0]->start = PCIBIOS_MIN_IO;
-		bus->resource[1]->start = PCIBIOS_MIN_MEM;
+		bus->resource[1]->end = xPCIBIOS_MIN_MEM+(256*1024*1024)-1;
+		bus->resource[0]->start = xPCIBIOS_MIN_IO;
+		bus->resource[1]->start = xPCIBIOS_MIN_MEM;
 
 		/* Turn off downstream PF memory address range by default */
 		bus->resource[2]->start = 1024*1024;
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 230db8b..447234c 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -144,22 +144,20 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	/* Set the local 16MB PCI memory space window to
 	 * the lowest PCI mapped address
 	 */
-	word = PCIBIOS_MIN_MEM & SH4_PCIMBR_MASK;
+	word = chan->mem_resource->start & SH4_PCIMBR_MASK;
 	pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word);
 	pci_write_reg(chan, word , SH4_PCIMBR);
 
-	/* Map IO space into PCI IO window
-	 * The IO window is 64K-PCIBIOS_MIN_IO in size
-	 * IO addresses will be translated to the
-	 * PCI IO window base address
+	/* Map IO space into PCI IO window:
+	 * IO addresses will be translated to the PCI IO window base address
 	 */
 	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n",
-		 PCIBIOS_MIN_IO, (64 << 10),
-		 SH7751_PCI_IO_BASE + PCIBIOS_MIN_IO);
+		 chan->io_resource->start, chan->io_resource->end,
+		 SH7751_PCI_IO_BASE + chan->io_resource->start);
 
 	/* Make sure the MSB's of IO window are set to access PCI space
 	 * correctly */
-	word = PCIBIOS_MIN_IO & SH4_PCIIOBR_MASK;
+	word = chan->io_resource->start & SH4_PCIIOBR_MASK;
 	pr_debug("PCI: Setting upper bits of IO window to 0x%x\n", word);
 	pci_write_reg(chan, word, SH4_PCIIOBR);
 
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 4706e88..e8f3a30 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -130,14 +130,12 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 	pci_write_reg(chan, map->window1.base, SH4_PCILAR1);
 	pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1);
 
-	/* Map IO space into PCI IO window
-	 * The IO window is 64K-PCIBIOS_MIN_IO in size
-	 * IO addresses will be translated to the
-	 * PCI IO window base address
+	/* Map IO space into PCI IO window:
+	 * IO addresses will be translated to the PCI IO window base address
 	 */
 	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n",
-		 PCIBIOS_MIN_IO, (64 << 10),
-		 SH7780_PCI_IO_BASE + PCIBIOS_MIN_IO);
+		 chan->io_resource->start, chan->io_resource->end,
+		 SH7780_PCI_IO_BASE + chan->io_resource->start);
 
 	/* NOTE: I'm ignoring the PCI error IRQs for now..
 	 * TODO: add support for the internal error interrupts and
-- 
cgit v0.10.2


From b6706ef10f75921733d7275fd45d268f2f6254c8 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 19 Feb 2008 21:34:55 +0900
Subject: sh: hook in struct pci_channel in sysdata

Store a struct pci_channel pointer in bus->sysdata. This makes whatever
struct pci_channel assigned to a bus available for sh4_pci_read() and
sh4_pci_write(). We also modify PCIBIOS_MIN_IO and PCIBIOS_MIN_MEM to
use bus->sysdata - this to gives us support for multiple pci channels.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index 92d27f7..ee62e6d 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -26,6 +26,7 @@ static DEFINE_SPINLOCK(sh4_pci_lock);
 static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn,
 			   int where, int size, u32 *val)
 {
+	struct pci_channel *chan = bus->sysdata;
 	unsigned long flags;
 	u32 data;
 
@@ -34,8 +35,8 @@ static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn,
 	 * so we must do byte alignment by hand
 	 */
 	spin_lock_irqsave(&sh4_pci_lock, flags);
-	pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
-	data = pci_read_reg(NULL, SH4_PCIPDR);
+	pci_write_reg(chan, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
+	data = pci_read_reg(chan, SH4_PCIPDR);
 	spin_unlock_irqrestore(&sh4_pci_lock, flags);
 
 	switch (size) {
@@ -63,13 +64,14 @@ static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn,
 static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn,
 			 int where, int size, u32 val)
 {
+	struct pci_channel *chan = bus->sysdata;
 	unsigned long flags;
 	int shift;
 	u32 data;
 
 	spin_lock_irqsave(&sh4_pci_lock, flags);
-	pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
-	data = pci_read_reg(NULL, SH4_PCIPDR);
+	pci_write_reg(chan, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR);
+	data = pci_read_reg(chan, SH4_PCIPDR);
 	spin_unlock_irqrestore(&sh4_pci_lock, flags);
 
 	switch (size) {
@@ -90,7 +92,7 @@ static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn,
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
 
-	pci_write_reg(NULL, data, SH4_PCIPDR);
+	pci_write_reg(chan, data, SH4_PCIPDR);
 
 	return PCIBIOS_SUCCESSFUL;
 }
diff --git a/arch/sh/drivers/pci/pci-auto.c b/arch/sh/drivers/pci/pci-auto.c
index cf48b12..1d715ec 100644
--- a/arch/sh/drivers/pci/pci-auto.c
+++ b/arch/sh/drivers/pci/pci-auto.c
@@ -67,6 +67,7 @@ static struct pci_dev *fake_pci_dev(struct pci_channel *hose,
 	dev.devfn = devfn;
 	bus.number = busnr;
 	bus.ops = hose->pci_ops;
+	bus.sysdata = hose;
 
 	if(busnr != top_bus)
 		/* Fake a parent bus structure. */
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 5c7a8f1..386587e 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -31,8 +31,10 @@ struct pci_channel {
  */
 extern struct pci_channel board_pci_channels[];
 
-#define PCIBIOS_MIN_IO		board_pci_channels->io_resource->start
-#define PCIBIOS_MIN_MEM		board_pci_channels->mem_resource->start
+/* ugly as hell, but makes drivers/pci/setup-res.c compile and work */
+#define __PCI_CHAN(bus)		((struct pci_channel *)bus->sysdata)
+#define PCIBIOS_MIN_IO		__PCI_CHAN(bus)->io_resource->start
+#define PCIBIOS_MIN_MEM		__PCI_CHAN(bus)->mem_resource->start
 
 /*
  * I/O routine helpers
-- 
cgit v0.10.2


From e4c6a3604e07185046e2ce4be82a201f4447d788 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 19 Feb 2008 21:35:04 +0900
Subject: sh: add reg_base member to pci_channel

Store the base address of the pci host controller registers in struct
pci_channel and use the address in pci_read_reg() and pci_write_reg().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index ee62e6d..540683d 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -121,8 +121,8 @@ int __init sh4_pci_check_direct(struct pci_channel *chan)
 		if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) {
 			pci_write_reg(chan, tmp, SH4_PCIPAR);
 			printk(KERN_INFO "PCI: Using configuration type 1\n");
-			request_region(PCI_REG(SH4_PCIPAR), 8, "PCI conf1");
-
+			request_region(chan->reg_base + SH4_PCIPAR, 8,
+				       "PCI conf1");
 			return 0;
 		}
 
diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h
index 62ba350..90abfe3 100644
--- a/arch/sh/drivers/pci/pci-sh4.h
+++ b/arch/sh/drivers/pci/pci-sh4.h
@@ -171,13 +171,13 @@ struct sh4_pci_address_map {
 static inline void pci_write_reg(struct pci_channel *chan,
 				 unsigned long val, unsigned long reg)
 {
-	ctrl_outl(val, PCI_REG(reg));
+	ctrl_outl(val, chan->reg_base + reg);
 }
 
 static inline unsigned long pci_read_reg(struct pci_channel *chan,
 					 unsigned long reg)
 {
-	return ctrl_inl(PCI_REG(reg));
+	return ctrl_inl(chan->reg_base + reg);
 }
 
 #endif /* __PCI_SH4_H */
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 447234c..201266b 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -39,6 +39,8 @@ int __init sh7751_pci_init(struct pci_channel *chan)
 
 	pr_debug("PCI: Starting intialization.\n");
 
+	chan->reg_base = 0xfe200000;
+
 	/* check for SH7751/SH7751R hardware */
 	id = pci_read_reg(chan, SH7751_PCICONF0);
 	if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) &&
diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h
index 0ea4387..c390dd2 100644
--- a/arch/sh/drivers/pci/pci-sh7751.h
+++ b/arch/sh/drivers/pci/pci-sh7751.h
@@ -26,7 +26,6 @@
 #define SH7751_PCI_IO_SIZE           0x40000     /* Size of IO window */
 
 #define SH7751_PCIREG_BASE           0xFE200000  /* PCI regs base address */
-#define PCI_REG(n)                  (SH7751_PCIREG_BASE+ n)
 
 #define SH7751_PCICONF0            0x0           /* PCI Config Reg 0 */
   #define SH7751_PCICONF0_DEVID      0xFFFF0000  /* Device ID */
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index e8f3a30..9d6483a 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -52,6 +52,8 @@ int __init sh7780_pci_init(struct pci_channel *chan)
 
 	pr_debug("PCI: Starting intialization.\n");
 
+	chan->reg_base = 0xfe040000;
+
 	ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */
 
 	/* check for SH7780/SH7780R hardware */
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 2f3c920..fffcf1d 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -35,7 +35,6 @@
 #define SH7780_PCI_IO_SIZE	0x00400000	/* Size of IO window */
 
 #define SH7780_PCIREG_BASE	0xFE040000	/* PCI regs base address */
-#define PCI_REG(n)		(SH7780_PCIREG_BASE+n)
 
 /* SH7780 PCI Config Registers */
 #define SH7780_PCIVID		0x000		/* Vendor ID */
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 386587e..8e9e0ed 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -24,6 +24,7 @@ struct pci_channel {
 	int first_devfn;
 	int last_devfn;
 	int enabled;
+	unsigned long reg_base;
 };
 
 /*
-- 
cgit v0.10.2


From ef53fdeb7e0cb139aff33665635b886700137abb Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 19 Feb 2008 21:35:14 +0900
Subject: sh: add io_base member to pci_channel

Store the io window base address in struct pci_channel and use that one
instead of SH77xx_PCI_IO_BASE.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 201266b..2a6c7aa 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -40,6 +40,7 @@ int __init sh7751_pci_init(struct pci_channel *chan)
 	pr_debug("PCI: Starting intialization.\n");
 
 	chan->reg_base = 0xfe200000;
+	chan->io_base = 0xfe240000;
 
 	/* check for SH7751/SH7751R hardware */
 	id = pci_read_reg(chan, SH7751_PCICONF0);
@@ -153,9 +154,9 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	/* Map IO space into PCI IO window:
 	 * IO addresses will be translated to the PCI IO window base address
 	 */
-	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n",
+	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n",
 		 chan->io_resource->start, chan->io_resource->end,
-		 SH7751_PCI_IO_BASE + chan->io_resource->start);
+		 chan->io_base + chan->io_resource->start);
 
 	/* Make sure the MSB's of IO window are set to access PCI space
 	 * correctly */
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 9d6483a..87a7f3b 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -53,6 +53,7 @@ int __init sh7780_pci_init(struct pci_channel *chan)
 	pr_debug("PCI: Starting intialization.\n");
 
 	chan->reg_base = 0xfe040000;
+	chan->io_base = 0xfe200000;
 
 	ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */
 
@@ -135,9 +136,9 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 	/* Map IO space into PCI IO window:
 	 * IO addresses will be translated to the PCI IO window base address
 	 */
-	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n",
+	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n",
 		 chan->io_resource->start, chan->io_resource->end,
-		 SH7780_PCI_IO_BASE + chan->io_resource->start);
+		 chan->io_base + chan->io_resource->start);
 
 	/* NOTE: I'm ignoring the PCI error IRQs for now..
 	 * TODO: add support for the internal error interrupts and
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 8e9e0ed..84d12eb 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -25,6 +25,7 @@ struct pci_channel {
 	int last_devfn;
 	int enabled;
 	unsigned long reg_base;
+	unsigned long io_base;
 };
 
 /*
-- 
cgit v0.10.2


From ef339f241b08a16af58897e6288ba200e0c7a8c7 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 19 Feb 2008 21:35:22 +0900
Subject: sh: pci memory range checking code

This patch changes the code to use __is_pci_memory() instead of
is_pci_memaddr(). __is_pci_memory() loops through all the pci
channels on the system to match memory windows.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c
index 4badad4..053b3ed 100644
--- a/arch/sh/boards/mach-titan/io.c
+++ b/arch/sh/boards/mach-titan/io.c
@@ -117,7 +117,7 @@ void titan_outsl(unsigned long port, const void *src, unsigned long count)
 
 void __iomem *titan_ioport_map(unsigned long port, unsigned int size)
 {
-	if (PXSEG(port) || is_pci_memaddr(port))
+	if (PXSEG(port))
 		return (void __iomem *)port;
 	else if (is_pci_ioaddr(port))
 		return (void __iomem *)pci_ioaddr(port);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 29ec16e..b9aca54 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -167,9 +167,8 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 	/*
 	 * Presently the IORESOURCE_MEM case is a bit special, most
 	 * SH7751 style PCI controllers have PCI memory at a fixed
-	 * location in the address space where no remapping is desired
-	 * (typically at 0xfd000000, but is_pci_memaddr() will know
-	 * best). With the IORESOURCE_MEM case more care has to be taken
+	 * location in the address space where no remapping is desired.
+	 * With the IORESOURCE_MEM case more care has to be taken
 	 * to inhibit page table mapping for legacy cores, but this is
 	 * punted off to __ioremap().
 	 *					-- PFM.
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 84d12eb..ccf5c5f 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -61,12 +61,8 @@ extern unsigned long PCI_IO_AREA;
 #define is_pci_ioaddr(port)		\
 	(((port) >= PCIBIOS_MIN_IO) &&	\
 	 ((port) < (PCIBIOS_MIN_IO + PCI_IO_SIZE)))
-#define is_pci_memaddr(port)		\
-	(((port) >= PCIBIOS_MIN_MEM) &&	\
-	 ((port) < (PCIBIOS_MIN_MEM + PCI_MEM_SIZE)))
 #else
 #define is_pci_ioaddr(port)	(0)
-#define is_pci_memaddr(port)	(0)
 #endif
 
 struct pci_dev;
@@ -127,6 +123,25 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+
+static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
+{
+	struct pci_channel *p;
+	struct resource *res;
+
+	for (p = board_pci_channels; p->init; p++) {
+		res = p->mem_resource;
+		if (p->enabled && (phys_addr >= res->start) &&
+		    (phys_addr + size) <= (res->end + 1))
+			return 1;
+	}
+	return 0;
+}
+#else
+static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
+{
+	return 0;
+}
 #endif
 
 /* Board-specific fixup routines. */
diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c
index 60cc486..7e04cc8 100644
--- a/arch/sh/mm/ioremap_32.c
+++ b/arch/sh/mm/ioremap_32.c
@@ -56,7 +56,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	 * P1/P2 space, ioremap() will already do the right thing,
 	 * and we'll never get this far.
 	 */
-	if (is_pci_memaddr(phys_addr) && is_pci_memaddr(last_addr))
+	if (__is_pci_memory(phys_addr, size))
 		return (void __iomem *)phys_addr;
 
 #if !defined(CONFIG_PMB_FIXED)
@@ -121,7 +121,7 @@ void __iounmap(void __iomem *addr)
 	unsigned long seg = PXSEG(vaddr);
 	struct vm_struct *p;
 
-	if (seg < P3SEG || vaddr >= P3_ADDR_MAX || is_pci_memaddr(vaddr))
+	if (seg < P3SEG || vaddr >= P3_ADDR_MAX || __is_pci_memory(vaddr, 0))
 		return;
 
 #ifdef CONFIG_PMB
-- 
cgit v0.10.2


From 8ce0143b11cdc519b8e1fd94a262b654ef0bc3ab Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 19 Feb 2008 21:35:31 +0900
Subject: sh: pci io port base address code

Adds a __get_pci_io_base() function which is used to match a port range
against struct pci_channel. This allows us to detect if a port range is
assigned to pci or happens to be legacy port io. While at it, remove unused
cpu-specific cruft.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index fe5a231..58ed1d5 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -68,7 +68,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	__set_io_port_base(SH7751_PCI_IO_BASE);
 	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
 }
 
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index ccf5c5f..bb2c2fc 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -38,33 +38,6 @@ extern struct pci_channel board_pci_channels[];
 #define PCIBIOS_MIN_IO		__PCI_CHAN(bus)->io_resource->start
 #define PCIBIOS_MIN_MEM		__PCI_CHAN(bus)->mem_resource->start
 
-/*
- * I/O routine helpers
- */
-#if defined(CONFIG_CPU_SUBTYPE_SH7780) || defined(CONFIG_CPU_SUBTYPE_SH7785)
-#define PCI_IO_AREA		0xFE400000
-#define PCI_IO_SIZE		0x00400000
-#elif defined(CONFIG_CPU_SH5)
-extern unsigned long PCI_IO_AREA;
-#define PCI_IO_SIZE		0x00010000
-#else
-#define PCI_IO_AREA		0xFE240000
-#define PCI_IO_SIZE		0x00040000
-#endif
-
-#define PCI_MEM_SIZE		0x01000000
-
-#define SH4_PCIIOBR_MASK	0xFFFC0000
-#define pci_ioaddr(addr)	(PCI_IO_AREA + (addr & ~SH4_PCIIOBR_MASK))
-
-#if defined(CONFIG_PCI)
-#define is_pci_ioaddr(port)		\
-	(((port) >= PCIBIOS_MIN_IO) &&	\
-	 ((port) < (PCIBIOS_MIN_IO + PCI_IO_SIZE)))
-#else
-#define is_pci_ioaddr(port)	(0)
-#endif
-
 struct pci_dev;
 
 extern void pcibios_set_master(struct pci_dev *dev);
@@ -137,11 +110,31 @@ static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
 	}
 	return 0;
 }
+
+static inline void __iomem *__get_pci_io_base(unsigned long port,
+					      unsigned long size)
+{
+	struct pci_channel *p;
+	struct resource *res;
+
+	for (p = board_pci_channels; p->init; p++) {
+		res = p->io_resource;
+		if (p->enabled && (port >= res->start) &&
+		    (port + size) <= (res->end + 1))
+			return (void __iomem *)(p->io_base + port);
+	}
+	return NULL;
+}
 #else
 static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
 {
 	return 0;
 }
+static inline void __iomem *__get_pci_io_base(unsigned long port,
+					      unsigned long size)
+{
+	return NULL;
+}
 #endif
 
 /* Board-specific fixup routines. */
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index 29cf458..59fb020 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -12,6 +12,7 @@
  * for more details.
  */
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
 
@@ -69,6 +70,10 @@ void __iomem *ioport_map(unsigned long port, unsigned int nr)
 	if (ret)
 		return ret;
 
+	ret = __get_pci_io_base(port, nr);
+	if (ret)
+		return ret;
+
 	return __ioport_map(port, nr);
 }
 EXPORT_SYMBOL(ioport_map);
-- 
cgit v0.10.2


From aa5d3ff99cc1f3dfe262ab9dd9179131fcfe39fd Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 19 Feb 2008 21:35:40 +0900
Subject: sh: export board_pci_channels in one place

Instead of sometimes exporting board_pci_channels[] in the board specific
code just export it in one place.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c
index f4a5e14..cbaaf02 100644
--- a/arch/sh/drivers/pci/ops-cayman.c
+++ b/arch/sh/drivers/pci/ops-cayman.c
@@ -80,7 +80,6 @@ struct pci_channel board_pci_channels[] = {
 	{ sh5_pci_init, &sh5_pci_ops, NULL, NULL, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 int __init pcibios_init_platform(void)
 {
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index f606df2..781496b 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -43,8 +43,6 @@ struct pci_channel board_pci_channels[] = {
 	{ NULL, NULL, NULL, 0, 0 },
 };
 
-EXPORT_SYMBOL(board_pci_channels);
-
 static struct sh4_pci_address_map sh7751_pci_map = {
 	.window0	= {
 		.base	= SH7751_CS3_BASE_ADDR,
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index b51b7e4..c58f1cf 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -46,7 +46,6 @@ struct pci_channel board_pci_channels[] = {
 	{ sh7780_pci_init, &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map sh7780_pci_map = {
 	.window0	= {
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index 58ed1d5..d374cd3 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -50,7 +50,6 @@ struct pci_channel board_pci_channels[] = {
 	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map sh7751_pci_map = {
 	.window0	= {
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 7277cd1..b34fbc5 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -52,7 +52,6 @@ struct pci_channel board_pci_channels[] = {
 	{ sh7780_pci_init, &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map sdk7780_pci_map = {
 	.window0	= {
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
index 76a74fb..4730207 100644
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ b/arch/sh/drivers/pci/ops-se7780.c
@@ -61,7 +61,6 @@ struct pci_channel board_pci_channels[] = {
 	{ sh7780_pci_init, &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map se7780_pci_map = {
 	.window0	= {
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index ffa79bd..31ed037 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -55,7 +55,6 @@ struct pci_channel board_pci_channels[] = {
 	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map sh7751_pci_map = {
 	.window0	= {
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index b9aca54..8aaacc9 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -187,3 +187,5 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
 	iounmap(addr);
 }
 EXPORT_SYMBOL(pci_iounmap);
+
+EXPORT_SYMBOL(board_pci_channels);
-- 
cgit v0.10.2


From 10591578c84825a320734e59272f161385edcc41 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 11 Mar 2009 15:58:04 +0900
Subject: sh: drop duplicate symbol export on dreamcast and sh7785lcr.

With board_pci_channels now being exported in a single place, update the
boards that duplicated the export.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c
index f62063e..5bc81d5 100644
--- a/arch/sh/drivers/pci/ops-dreamcast.c
+++ b/arch/sh/drivers/pci/ops-dreamcast.c
@@ -165,4 +165,3 @@ struct pci_channel board_pci_channels[] = {
 	  &gapspci_mem_resource, 0, 1 },
 	{ 0, }
 };
-EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
index fb0869f..2a9f9a5 100644
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ b/arch/sh/drivers/pci/ops-sh7785lcr.c
@@ -44,7 +44,6 @@ struct pci_channel board_pci_channels[] = {
 	{ &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
-EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map sh7785_pci_map = {
 	.window0	= {
-- 
cgit v0.10.2


From 3aabce8d3d2f9af2c08c2f590ac9acb272ca8c95 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 11 Mar 2009 16:12:39 +0900
Subject: sh: sh7785lcr: Update for recent PCI changes.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/fixups-sh7785lcr.c b/arch/sh/drivers/pci/fixups-sh7785lcr.c
index 4949e60..9e7dc79 100644
--- a/arch/sh/drivers/pci/fixups-sh7785lcr.c
+++ b/arch/sh/drivers/pci/fixups-sh7785lcr.c
@@ -15,32 +15,33 @@
 #include <linux/pci.h>
 #include "pci-sh4.h"
 
-int pci_fixup_pcic(void)
+int pci_fixup_pcic(struct pci_channel *chan)
 {
-	pci_write_reg(0x000043ff, SH4_PCIINTM);
-	pci_write_reg(0x0000380f, SH4_PCIAINTM);
+	pci_write_reg(chan, 0x000043ff, SH4_PCIINTM);
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
 
-	pci_write_reg(0xfbb00047, SH7780_PCICMD);
-	pci_write_reg(0x00000000, SH7780_PCIIBAR);
+	pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD);
+	pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR);
 
-	pci_write_reg(0x00011912, SH7780_PCISVID);
-	pci_write_reg(0x08000000, SH7780_PCICSCR0);
-	pci_write_reg(0x0000001b, SH7780_PCICSAR0);
-	pci_write_reg(0xfd000000, SH7780_PCICSCR1);
-	pci_write_reg(0x0000000f, SH7780_PCICSAR1);
+	pci_write_reg(chan, 0x00011912, SH7780_PCISVID);
+	pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0);
+	pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0);
+	pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1);
+	pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1);
 
-	pci_write_reg(0xfd000000, SH7780_PCIMBR0);
-	pci_write_reg(0x00fc0000, SH7780_PCIMBMR0);
+	pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0);
+	pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0);
 
 #ifdef CONFIG_32BIT
-	pci_write_reg(0xc0000000, SH7780_PCIMBR2);
-	pci_write_reg(0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
+	pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2);
+	pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
 #endif
 
 	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg((PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)),
+	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE - 1),
 		      SH7780_PCIIOBR);
-	pci_write_reg(((SH7780_PCI_IO_SIZE - 1) & (7 << 18)), SH7780_PCIIOBMR);
+	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE - 1) & (7 << 18)),
+		      SH7780_PCIIOBMR);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
index 2a9f9a5..afbb9bd 100644
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ b/arch/sh/drivers/pci/ops-sh7785lcr.c
@@ -41,7 +41,7 @@ static struct resource sh7785_mem_resource = {
 };
 
 struct pci_channel board_pci_channels[] = {
-	{ &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
+	{ sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
 	{ NULL, NULL, NULL, 0, 0 },
 };
 
@@ -61,5 +61,5 @@ static struct sh4_pci_address_map sh7785_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7780_pcic_init(&sh7785_pci_map);
+	return sh7780_pcic_init(&board_pci_channels[0], &sh7785_pci_map);
 }
-- 
cgit v0.10.2


From f1a9ba8f15f89f3f444985251efaf809cd16da53 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 11 Mar 2009 16:17:53 +0900
Subject: sh: pci: drop duplicate PCIC fixups for SE7780 and SH7785LCR.

SE7780 has the same PCIC fixup as SDK7780, and SH7785LCR the same
as R7780RP. Switch to using those, and drop the duplicate code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 847e9089..67d710a 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -21,6 +21,6 @@ obj-$(CONFIG_SH_SDK7780)		+= ops-sdk7780.o fixups-sdk7780.o
 obj-$(CONFIG_SH_TITAN)			+= ops-titan.o
 obj-$(CONFIG_SH_LANDISK)		+= ops-landisk.o
 obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-lboxre2.o
-obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= ops-se7780.o fixups-se7780.o
+obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= ops-se7780.o fixups-sdk7780.o
 obj-$(CONFIG_SH_CAYMAN)			+= ops-cayman.o
-obj-$(CONFIG_SH_SH7785LCR)		+= ops-sh7785lcr.o fixups-sh7785lcr.o
+obj-$(CONFIG_SH_SH7785LCR)		+= ops-sh7785lcr.o fixups-r7780rp.o
diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c
deleted file mode 100644
index a968af7..0000000
--- a/arch/sh/drivers/pci/fixups-se7780.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * arch/sh/drivers/pci/fixups-se7780.c
- *
- * HITACHI UL Solution Engine 7780  PCI fixups
- *
- * Copyright (C) 2003  Lineo uSolutions, Inc.
- * Copyright (C) 2004 - 2006  Paul Mundt
- * Copyright (C) 2006  Nobuhiro Iwamatsu
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/pci.h>
-#include "pci-sh4.h"
-#include <asm/io.h>
-
-int pci_fixup_pcic(struct pci_channel *chan)
-{
-	ctrl_outl(0x00000001, SH7780_PCI_VCR2);
-
-	/* Enable all interrupts, so we know what to fix */
-	pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR);
-	pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM);
-
-	/* Set up standard PCI config registers */
-	ctrl_outw(0xFB00, PCI_REG(SH7780_PCISTATUS));
-	ctrl_outw(0x0047, PCI_REG(SH7780_PCICMD));
-	ctrl_outb(  0x00, PCI_REG(SH7780_PCIPIF));
-	ctrl_outb(  0x00, PCI_REG(SH7780_PCISUB));
-	ctrl_outb(  0x06, PCI_REG(SH7780_PCIBCC));
-	ctrl_outw(0x1912, PCI_REG(SH7780_PCISVID));
-	ctrl_outw(0x0001, PCI_REG(SH7780_PCISID));
-
-	pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0);     /* PCI */
-	pci_write_reg(chan, 0x08000000, SH7780_PCILAR0);     /* SHwy */
-	pci_write_reg(chan, 0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */
-
-	pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1);
-	pci_write_reg(chan, 0x00000000, SH7780_PCILAR1);
-	pci_write_reg(chan, 0x00000000, SH7780_PCILSR1);
-
-	pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR);
-
-	/*
-	 * Set the MBR so PCI address is one-to-one with window,
-	 * meaning all calls go straight through... use ifdef to
-	 * catch erroneous assumption.
-	 */
-	pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0);
-	pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0);    /* 16M */
-
-	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
-		      SH7780_PCIIOBR);
-	pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18),
-		      SH7780_PCIIOBMR);
-
-	pci_write_reg(chan, 0xA5000C01, SH7780_PCICR);
-
-	return 0;
-}
diff --git a/arch/sh/drivers/pci/fixups-sh7785lcr.c b/arch/sh/drivers/pci/fixups-sh7785lcr.c
deleted file mode 100644
index 9e7dc79..0000000
--- a/arch/sh/drivers/pci/fixups-sh7785lcr.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * arch/sh/drivers/pci/fixups-sh7785lcr.c
- *
- * R0P7785LC0011RL PCI fixups
- * Copyright (C) 2008  Yoshihiro Shimoda
- *
- * Based on arch/sh/drivers/pci/fixups-r7780rp.c
- * Copyright (C) 2003  Lineo uSolutions, Inc.
- * Copyright (C) 2004 - 2006  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-int pci_fixup_pcic(struct pci_channel *chan)
-{
-	pci_write_reg(chan, 0x000043ff, SH4_PCIINTM);
-	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
-
-	pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD);
-	pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR);
-
-	pci_write_reg(chan, 0x00011912, SH7780_PCISVID);
-	pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0);
-	pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0);
-	pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1);
-	pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1);
-
-	pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0);
-	pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0);
-
-#ifdef CONFIG_32BIT
-	pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2);
-	pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
-#endif
-
-	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE - 1),
-		      SH7780_PCIIOBR);
-	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE - 1) & (7 << 18)),
-		      SH7780_PCIIOBMR);
-
-	return 0;
-}
-- 
cgit v0.10.2


From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001
From: Shawn Du <duyuyang@gmail.com>
Date: Tue, 14 Apr 2009 13:58:56 +0800
Subject: blktrace: support per-partition tracing

Though one can specify '-d /dev/sda1' when using blktrace, it still
traces the whole sda.

To support per-partition tracing, when we start tracing, we initialize
bt->start_lba and bt->end_lba to the start and end sector of that
partition.

Note some actions are per device, thus we don't filter 0-sector events.

The original patch and discussion can be found here:
	http://marc.info/?l=linux-btrace&m=122949374214540&w=2

Signed-off-by: Shawn Du <duyuyang@gmail.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
LKML-Reference: <49E42620.4050701@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615d..f8c218c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
 	memcpy(&buts.name, &cbuts.name, 32);
 
 	mutex_lock(&bdev->bd_mutex);
-	ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
+	ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
 	mutex_unlock(&bdev->bd_mutex);
 	if (ret)
 		return ret;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82312df..49c9873 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 		return blk_trace_setup(sdp->device->request_queue,
 				       sdp->disk->disk_name,
 				       MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
+				       NULL,
 				       (char *)arg);
 	case BLKTRACESTART:
 		return blk_trace_startstop(sdp->device->request_queue, 1);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d960889..267edc4 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -165,8 +165,9 @@ struct blk_trace {
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern int do_blk_trace_setup(struct request_queue *q,
-	char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern int do_blk_trace_setup(struct request_queue *q, char *name,
+			      dev_t dev, struct block_device *bdev,
+			      struct blk_user_trace_setup *buts);
 extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
@@ -193,6 +194,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
 				void *data, size_t len);
 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+			   struct block_device *bdev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
@@ -200,15 +202,15 @@ extern int blk_trace_remove(struct request_queue *q);
 extern struct attribute_group blk_trace_attr_group;
 
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
-#define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
-#define blk_trace_shutdown(q)			do { } while (0)
-#define do_blk_trace_setup(q, name, dev, buts)	(-ENOTTY)
-#define blk_add_driver_data(q, rq, data, len)	do {} while (0)
-#define blk_trace_setup(q, name, dev, arg)	(-ENOTTY)
-#define blk_trace_startstop(q, start)		(-ENOTTY)
-#define blk_trace_remove(q)			(-ENOTTY)
-#define blk_add_trace_msg(q, fmt, ...)		do { } while (0)
-
+# define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
+# define blk_trace_shutdown(q)				do { } while (0)
+# define do_blk_trace_setup(q, name, dev, bdev, buts)	(-ENOTTY)
+# define blk_add_driver_data(q, rq, data, len)		do {} while (0)
+# define blk_trace_setup(q, name, dev, bdev, arg)	(-ENOTTY)
+# define blk_trace_startstop(q, start)			(-ENOTTY)
+# define blk_trace_remove(q)				(-ENOTTY)
+# define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2b98195..e932654 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 {
 	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
 		return 1;
-	if (sector < bt->start_lba || sector > bt->end_lba)
+	if (sector && (sector < bt->start_lba || sector > bt->end_lba))
 		return 1;
 	if (bt->pid && pid != bt->pid)
 		return 1;
@@ -192,7 +192,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	what |= MASK_TC_BIT(rw, DISCARD);
 
 	pid = tsk->pid;
-	if (unlikely(act_log_check(bt, what, sector, pid)))
+	if (act_log_check(bt, what, sector, pid))
 		return;
 	cpu = raw_smp_processor_id();
 
@@ -407,11 +407,13 @@ static struct rchan_callbacks blk_relay_callbacks = {
  * Setup everything required to start tracing
  */
 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
-			struct blk_user_trace_setup *buts)
+		       struct block_device *bdev,
+		       struct blk_user_trace_setup *buts)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	struct dentry *dir = NULL;
 	int ret, i;
+	struct hd_struct *part = NULL;
 
 	if (!buts->buf_size || !buts->buf_nr)
 		return -EINVAL;
@@ -480,11 +482,21 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (!bt->act_mask)
 		bt->act_mask = (u16) -1;
 
-	bt->start_lba = buts->start_lba;
-	bt->end_lba = buts->end_lba;
-	if (!bt->end_lba)
+	if (bdev)
+		part = bdev->bd_part;
+
+	if (part) {
+		bt->start_lba = part->start_sect;
+		bt->end_lba = part->start_sect + part->nr_sects;
+	} else
 		bt->end_lba = -1ULL;
 
+	/* overwrite with user settings */
+	if (buts->start_lba)
+		bt->start_lba = buts->start_lba;
+	if (buts->end_lba)
+		bt->end_lba = buts->end_lba;
+
 	bt->pid = buts->pid;
 	bt->trace_state = Blktrace_setup;
 
@@ -505,6 +517,7 @@ err:
 }
 
 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+		    struct block_device *bdev,
 		    char __user *arg)
 {
 	struct blk_user_trace_setup buts;
@@ -514,7 +527,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (ret)
 		return -EFAULT;
 
-	ret = do_blk_trace_setup(q, name, dev, &buts);
+	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
 	if (ret)
 		return ret;
 
@@ -582,7 +595,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 	switch (cmd) {
 	case BLKTRACESETUP:
 		bdevname(bdev, b);
-		ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
+		ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
 	case BLKTRACESTART:
 		start = 1;
-- 
cgit v0.10.2


From 9908c30997b8a73c95f836170b9998dae9aa3f4a Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 14 Apr 2009 13:59:34 +0800
Subject: blktrace: support per-partition tracing for ftrace plugin

The previous patch adds support to trace a single partition for
relay+ioctl blktrace, and this patch is for ftrace plugin blktrace:

  # echo 1 > /sys/block/sda/sda7/enable
  # cat start_lba
  102398373
  # cat end_lba
  102703545

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Shawn Du <duyuyang@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
LKML-Reference: <49E42646.4060608@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e932654..d109898 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -403,6 +403,23 @@ static struct rchan_callbacks blk_relay_callbacks = {
 	.remove_buf_file	= blk_remove_buf_file_callback,
 };
 
+static void blk_trace_setup_lba(struct blk_trace *bt,
+				struct block_device *bdev)
+{
+	struct hd_struct *part = NULL;
+
+	if (bdev)
+		part = bdev->bd_part;
+
+	if (part) {
+		bt->start_lba = part->start_sect;
+		bt->end_lba = part->start_sect + part->nr_sects;
+	} else {
+		bt->start_lba = 0;
+		bt->end_lba = -1ULL;
+	}
+}
+
 /*
  * Setup everything required to start tracing
  */
@@ -413,7 +430,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	struct blk_trace *old_bt, *bt = NULL;
 	struct dentry *dir = NULL;
 	int ret, i;
-	struct hd_struct *part = NULL;
 
 	if (!buts->buf_size || !buts->buf_nr)
 		return -EINVAL;
@@ -482,14 +498,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (!bt->act_mask)
 		bt->act_mask = (u16) -1;
 
-	if (bdev)
-		part = bdev->bd_part;
-
-	if (part) {
-		bt->start_lba = part->start_sect;
-		bt->end_lba = part->start_sect + part->nr_sects;
-	} else
-		bt->end_lba = -1ULL;
+	blk_trace_setup_lba(bt, bdev);
 
 	/* overwrite with user settings */
 	if (buts->start_lba)
@@ -1370,7 +1379,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
 /*
  * Setup everything required to start tracing
  */
-static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
+static int blk_trace_setup_queue(struct request_queue *q,
+				 struct block_device *bdev)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	int ret = -ENOMEM;
@@ -1383,9 +1393,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
 	if (!bt->msg_data)
 		goto free_bt;
 
-	bt->dev = dev;
+	bt->dev = bdev->bd_dev;
 	bt->act_mask = (u16)-1;
-	bt->end_lba = -1ULL;
+
+	blk_trace_setup_lba(bt, bdev);
 
 	old_bt = xchg(&q->blk_trace, bt);
 	if (old_bt != NULL) {
@@ -1602,7 +1613,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 
 	if (attr == &dev_attr_enable) {
 		if (value)
-			ret = blk_trace_setup_queue(q, bdev->bd_dev);
+			ret = blk_trace_setup_queue(q, bdev);
 		else
 			ret = blk_trace_remove_queue(q);
 		goto out_unlock_bdev;
@@ -1610,7 +1621,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 
 	ret = 0;
 	if (q->blk_trace == NULL)
-		ret = blk_trace_setup_queue(q, bdev->bd_dev);
+		ret = blk_trace_setup_queue(q, bdev);
 
 	if (ret == 0) {
 		if (attr == &dev_attr_act_mask)
-- 
cgit v0.10.2


From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 14 Apr 2009 14:00:05 +0800
Subject: blktrace: add trace/ to /sys/block/sda

Impact: allow ftrace-plugin blktrace to trace device-mapper devices

To trace a single partition:
  # echo 1 > /sys/block/sda/sda1/enable

To trace the whole sda instead:
  # echo 1 > /sys/block/sda/enable

Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace
can't be used to trace device-mapper devices.

Now:

  # echo 1 > /sys/block/dm-0/trace/enable
  echo: write error: No such device or address
  # mount -t ext4 /dev/dm-0 /mnt
  # echo 1 > /sys/block/dm-0/trace/enable
  # echo blk > /debug/tracing/current_tracer

Reported-by: Theodore Tso <tytso@mit.edu>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Shawn Du <duyuyang@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
LKML-Reference: <49E42665.6020506@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 73f36be..8653d71 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -387,16 +387,21 @@ struct kobj_type blk_queue_ktype = {
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
+	struct device *dev = disk_to_dev(disk);
 
 	struct request_queue *q = disk->queue;
 
 	if (WARN_ON(!q))
 		return -ENXIO;
 
+	ret = blk_trace_init_sysfs(dev);
+	if (ret)
+		return ret;
+
 	if (!q->request_fn)
 		return 0;
 
-	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
+	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
 			  "%s", "queue");
 	if (ret < 0)
 		return ret;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 267edc4..62763c9 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
 
@@ -210,6 +211,11 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+static inline int blk_trace_init_sysfs(struct device *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #endif /* __KERNEL__ */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d109898..8e7c5da 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1644,3 +1644,8 @@ out:
 	return ret ? ret : count;
 }
 
+int blk_trace_init_sysfs(struct device *dev)
+{
+	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
+}
+
-- 
cgit v0.10.2


From f3948f8857ef5de239f28a61dddb1554a0ae4c2c Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 15 Apr 2009 11:02:56 +0800
Subject: blktrace: fix context-info when mixed-using blk tracer and trace
 events

When current tracer is set to blk tracer, TRACE_ITER_CONTEXT_INFO is
unset, but actually context-info is printed:

    pdflush-431   [000]   821.181576:   8,0    P   N [pdflush]

And then if we enable TRACE_ITER_CONTEXT_INFO:

    # echo context-info > trace_options

We'll see context-info printed twice. What's worse, when we use blk
tracer and trace events at the same time, we'll see no context-info
for trace events at all:

    jbd2_commit_logging: dev dm-0:8 transaction 333227
    jbd2_end_commit: dev dm-0:8 transaction 333227 head 332814
      rm-25433 [001]  9578.307485:   8,18   m   N cfq25433 slice expired t=0
      rm-25433 [001]  9578.307486:   8,18   m   N cfq25433 put_queue

This patch adds blk_tracer->set_flags(), and context-info flag is unset
only when we set the output to classic mode.

Note after this patch, one should unset context-info explicitly if he
wants to get binary output that can be parsed by blkparse:

    # echo nocontext-info > trace_options
    # echo bin > trace_options
    # echo blk > current_tracer
    # cat trace_pipe | blkparse -i -

Reported-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <49E54E60.50408@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 8e7c5da..c32062b 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1211,7 +1211,6 @@ static void blk_tracer_print_header(struct seq_file *m)
 static void blk_tracer_start(struct trace_array *tr)
 {
 	blk_tracer_enabled = true;
-	trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
 }
 
 static int blk_tracer_init(struct trace_array *tr)
@@ -1224,7 +1223,6 @@ static int blk_tracer_init(struct trace_array *tr)
 static void blk_tracer_stop(struct trace_array *tr)
 {
 	blk_tracer_enabled = false;
-	trace_flags |= TRACE_ITER_CONTEXT_INFO;
 }
 
 static void blk_tracer_reset(struct trace_array *tr)
@@ -1289,9 +1287,6 @@ out:
 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
 					       int flags)
 {
-	if (!trace_print_context(iter))
-		return TRACE_TYPE_PARTIAL_LINE;
-
 	return print_one_line(iter, false);
 }
 
@@ -1326,6 +1321,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
 	return print_one_line(iter, true);
 }
 
+static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
+{
+	/* don't output context-info for blk_classic output */
+	if (bit == TRACE_BLK_OPT_CLASSIC) {
+		if (set)
+			trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
+		else
+			trace_flags |= TRACE_ITER_CONTEXT_INFO;
+	}
+	return 0;
+}
+
 static struct tracer blk_tracer __read_mostly = {
 	.name		= "blk",
 	.init		= blk_tracer_init,
@@ -1335,6 +1342,7 @@ static struct tracer blk_tracer __read_mostly = {
 	.print_header	= blk_tracer_print_header,
 	.print_line	= blk_tracer_print_line,
 	.flags		= &blk_tracer_flags,
+	.set_flag	= blk_tracer_set_flag,
 };
 
 static struct trace_event trace_blk_event = {
-- 
cgit v0.10.2


From 18cb7109d3e83195b605ff2905981020e86f72ca Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 16 Apr 2009 10:22:24 +0200
Subject: ALSA: hda - Check strcpy length

Check the length to copy via strlen() beforehand to avoid the stack
corruption, or use strlcpy() to be safe in HD-audio codes.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 37f24ce..48f0cea 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1431,6 +1431,8 @@ _snd_hda_find_mixer_ctl(struct hda_codec *codec,
 	memset(&id, 0, sizeof(id));
 	id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
 	id.index = idx;
+	if (snd_BUG_ON(strlen(name) >= sizeof(id.name)))
+		return NULL;
 	strcpy(id.name, name);
 	return snd_ctl_find_id(codec->bus->card, &id);
 }
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 21a3092..41db5d4 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1830,7 +1830,7 @@ azx_attach_pcm_stream(struct hda_bus *bus, struct hda_codec *codec,
 			  &pcm);
 	if (err < 0)
 		return err;
-	strcpy(pcm->name, cpcm->name);
+	strlcpy(pcm->name, cpcm->name, sizeof(pcm->name));
 	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
 	if (apcm == NULL)
 		return -ENOMEM;
@@ -2358,9 +2358,11 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	}
 
 	strcpy(card->driver, "HDA-Intel");
-	strcpy(card->shortname, driver_short_names[chip->driver_type]);
-	sprintf(card->longname, "%s at 0x%lx irq %i",
-		card->shortname, chip->addr, chip->irq);
+	strlcpy(card->shortname, driver_short_names[chip->driver_type],
+		sizeof(card->shortname));
+	snprintf(card->longname, sizeof(card->longname),
+		 "%s at 0x%lx irq %i",
+		 card->shortname, chip->addr, chip->irq);
 
 	*rchip = chip;
 	return 0;
-- 
cgit v0.10.2


From 0232ba9ce031d0fd8f331fa8b3c00e16901f54e6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 16 Apr 2009 18:01:31 +0900
Subject: sh: pci: Kill off unused SH4_PCIC_NO_RESET code.

Nothing ended up using this anymore, so just kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c
index c46911d9..178b778 100644
--- a/arch/sh/drivers/pci/ops-landisk.c
+++ b/arch/sh/drivers/pci/ops-landisk.c
@@ -39,8 +39,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 		.base	= SH7751_CS3_BASE_ADDR,
 		.size	= (64 << 20),	/* 64MB */
 	},
-
-	.flags = SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index 781496b..91cabd8 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -48,11 +48,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 		.base	= SH7751_CS3_BASE_ADDR,
 		.size	= 0x04000000,
 	},
-	.window1	= {
-		.base	= 0x00000000,	/* Unused */
-		.size	= 0x00000000,	/* Unused */
-	},
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index c58f1cf..8ec6d22 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -57,8 +57,6 @@ static struct sh4_pci_address_map sh7780_pci_map = {
 		.base	= SH7780_CS3_BASE_ADDR,
 		.size	= 0x04000000,
 	},
-
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index d374cd3..96b916c 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -56,13 +56,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 		.base	= SH7751_CS3_BASE_ADDR,
 		.size	= 0x04000000,
 	},
-
-	.window1	= {
-		.base	= 0x00000000,	/* Unused */
-		.size	= 0x00000000,	/* Unused */
-	},
-
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index b34fbc5..6a0b7c0 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -62,7 +62,6 @@ static struct sh4_pci_address_map sdk7780_pci_map = {
 		.base	= SH7780_CS3_BASE_ADDR,
 		.size	= 0x04000000,
 	},
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
index 4730207..583b8e8 100644
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ b/arch/sh/drivers/pci/ops-se7780.c
@@ -67,7 +67,6 @@ static struct sh4_pci_address_map se7780_pci_map = {
 		.base	= SH7780_CS2_BASE_ADDR,
 		.size	= 0x04000000,
 	},
-	.flags  = SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
index afbb9bd..ab0d1de 100644
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ b/arch/sh/drivers/pci/ops-sh7785lcr.c
@@ -55,8 +55,6 @@ static struct sh4_pci_address_map sh7785_pci_map = {
 		.size	= 0x20000000,
 #endif
 	},
-
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
index 2e254c6..dd2c5df 100644
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ b/arch/sh/drivers/pci/ops-snapgear.c
@@ -54,8 +54,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 		.base	= SH7751_CS2_BASE_ADDR,
 		.size	= SNAPGEAR_LSR1_SIZE,
 	},
-
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 /*
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index 31ed037..e45bb62 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -66,8 +66,6 @@ static struct sh4_pci_address_map sh7751_pci_map = {
 		.base	= SH7751_CS2_BASE_ADDR,
 		.size	= SH7751_MEM_REGION_SIZE*2,
 	},
-
-	.flags	= SH4_PCIC_NO_RESET,
 };
 
 int __init pcibios_init_platform(void)
diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h
index 90abfe3..3d5296c 100644
--- a/arch/sh/drivers/pci/pci-sh4.h
+++ b/arch/sh/drivers/pci/pci-sh4.h
@@ -149,9 +149,6 @@
   #define SH4_PCIPDTR_PB0	  0x000000001	/* Port 0 Enable */
 #define SH4_PCIPDR		0x220		/* Port IO Data Register */
 
-/* Flags */
-#define SH4_PCIC_NO_RESET	0x0001
-
 /* arch/sh/kernel/drivers/pci/ops-sh4.c */
 extern struct pci_ops sh4_pci_ops;
 int sh4_pci_check_direct(struct pci_channel *chan);
@@ -165,7 +162,6 @@ struct sh4_pci_address_space {
 struct sh4_pci_address_map {
 	struct sh4_pci_address_space window0;
 	struct sh4_pci_address_space window1;
-	unsigned long flags;
 };
 
 static inline void pci_write_reg(struct pci_channel *chan,
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 2a6c7aa..af88744 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -99,21 +99,6 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	word = SH4_PCIPINT_D3 | SH4_PCIPINT_D0;
 	pci_write_reg(chan, word, SH4_PCIPINT);
 
-	/*
-	 * This code is unused for some boards as it is done in the
-	 * bootloader and doing it here means the MAC addresses loaded
-	 * by the bootloader get lost.
-	 */
-	if (!(map->flags & SH4_PCIC_NO_RESET)) {
-		/* toggle PCI reset pin */
-		word = SH4_PCICR_PREFIX | SH4_PCICR_PRST;
-		pci_write_reg(chan, word, SH4_PCICR);
-		/* Wait for a long time... not 1 sec. but long enough */
-		mdelay(100);
-		word = SH4_PCICR_PREFIX;
-		pci_write_reg(chan, word, SH4_PCICR);
-	}
-
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
 	 * Mem space enable
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 87a7f3b..282cabe 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -96,21 +96,6 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 {
 	u32 word;
 
-	/*
-	 * This code is unused for some boards as it is done in the
-	 * bootloader and doing it here means the MAC addresses loaded
-	 * by the bootloader get lost.
-	 */
-	if (!(map->flags & SH4_PCIC_NO_RESET)) {
-		/* toggle PCI reset pin */
-		word = SH4_PCICR_PREFIX | SH4_PCICR_PRST;
-		pci_write_reg(chan, word, SH4_PCICR);
-		/* Wait for a long time... not 1 sec. but long enough */
-		mdelay(100);
-		word = SH4_PCICR_PREFIX;
-		pci_write_reg(chan, word, SH4_PCICR);
-	}
-
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
 	 * Mem space enable
-- 
cgit v0.10.2


From 3f1a4d826751d9759fc95da4e47d08d2745e0055 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 15 Apr 2009 21:35:26 +0100
Subject: ASoC: Check we have DAI ops when calling via accessor functions

Also make sure we're checking for the right operation while we're here.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index dd28009..9250392 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2100,7 +2100,7 @@ EXPORT_SYMBOL_GPL(snd_soc_put_volsw_s8);
 int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
 	unsigned int freq, int dir)
 {
-	if (dai->ops->set_sysclk)
+	if (dai->ops && dai->ops->set_sysclk)
 		return dai->ops->set_sysclk(dai, clk_id, freq, dir);
 	else
 		return -EINVAL;
@@ -2120,7 +2120,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_sysclk);
 int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai,
 	int div_id, int div)
 {
-	if (dai->ops->set_clkdiv)
+	if (dai->ops && dai->ops->set_clkdiv)
 		return dai->ops->set_clkdiv(dai, div_id, div);
 	else
 		return -EINVAL;
@@ -2139,7 +2139,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_clkdiv);
 int snd_soc_dai_set_pll(struct snd_soc_dai *dai,
 	int pll_id, unsigned int freq_in, unsigned int freq_out)
 {
-	if (dai->ops->set_pll)
+	if (dai->ops && dai->ops->set_pll)
 		return dai->ops->set_pll(dai, pll_id, freq_in, freq_out);
 	else
 		return -EINVAL;
@@ -2155,7 +2155,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_pll);
  */
 int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 {
-	if (dai->ops->set_fmt)
+	if (dai->ops && dai->ops->set_fmt)
 		return dai->ops->set_fmt(dai, fmt);
 	else
 		return -EINVAL;
@@ -2174,7 +2174,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_fmt);
 int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
 	unsigned int mask, int slots)
 {
-	if (dai->ops->set_sysclk)
+	if (dai->ops && dai->ops->set_tdm_slot)
 		return dai->ops->set_tdm_slot(dai, mask, slots);
 	else
 		return -EINVAL;
@@ -2190,7 +2190,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tdm_slot);
  */
 int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate)
 {
-	if (dai->ops->set_sysclk)
+	if (dai->ops && dai->ops->set_tristate)
 		return dai->ops->set_tristate(dai, tristate);
 	else
 		return -EINVAL;
@@ -2206,7 +2206,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tristate);
  */
 int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute)
 {
-	if (dai->ops->digital_mute)
+	if (dai->ops && dai->ops->digital_mute)
 		return dai->ops->digital_mute(dai, mute);
 	else
 		return -EINVAL;
-- 
cgit v0.10.2


From fd5dfad9cf51bc3575b5e50193403de4a3de23bc Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 15 Apr 2009 21:37:46 +0100
Subject: ASoC: Volume controls are never of boolean type

Some limited volume controls (mostly simple attenuations) have only two
settings so the ASoC info functions misreport them as booleans. Since
we currently have no better information check for " Volume" in the
control name and always report any controls matching as being integer.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 9250392..af11791 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1779,7 +1779,7 @@ int snd_soc_info_volsw_ext(struct snd_kcontrol *kcontrol,
 {
 	int max = kcontrol->private_value;
 
-	if (max == 1)
+	if (max == 1 && !strstr(kcontrol->id.name, " Volume"))
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
 	else
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
@@ -1809,7 +1809,7 @@ int snd_soc_info_volsw(struct snd_kcontrol *kcontrol,
 	unsigned int shift = mc->shift;
 	unsigned int rshift = mc->rshift;
 
-	if (max == 1)
+	if (max == 1 && !strstr(kcontrol->id.name, " Volume"))
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
 	else
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
@@ -1916,7 +1916,7 @@ int snd_soc_info_volsw_2r(struct snd_kcontrol *kcontrol,
 		(struct soc_mixer_control *)kcontrol->private_value;
 	int max = mc->max;
 
-	if (max == 1)
+	if (max == 1 && !strstr(kcontrol->id.name, " Volume"))
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
 	else
 		uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
-- 
cgit v0.10.2


From 0d960e8891459f5af85e5781bce3f1da5f7db0fb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 16 Apr 2009 10:08:39 +0100
Subject: ASoC: Request shared rates for WM8903

It has a shared LRCLK.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 8cf571f..c539184 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1523,6 +1523,7 @@ struct snd_soc_dai wm8903_dai = {
 		 .formats = WM8903_FORMATS,
 	 },
 	.ops = &wm8903_dai_ops,
+	.symmetric_rates = 1,
 };
 EXPORT_SYMBOL_GPL(wm8903_dai);
 
-- 
cgit v0.10.2


From c29b206ffd0700acb2dc1fdb70856cc4b907216c Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 15 Apr 2009 15:38:55 +0300
Subject: ASoC: OMAP: Use single-phase for DSP mode

Use single-phase mode for the DSP mode and keep the dual phase
mode for the I2S mode.

The mono (1 channel) mode already used single phase mode,
now it is more cleaner. There is no need to configure the
second phase, when the single phase is used.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Jarkko Nikula <jarkko.nikula@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 9c09b94..402a1eb 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -214,8 +214,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 	struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
 	struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
 	int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id;
-	int wlen, channels;
+	int wlen, channels, wpf;
 	unsigned long port;
+	unsigned int format;
 
 	if (cpu_class_is_omap1()) {
 		dma = omap1_dma_reqs[bus_id][substream->stream];
@@ -243,18 +244,23 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 		return 0;
 	}
 
-	channels = params_channels(params);
+	format = mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK;
+	wpf = channels = params_channels(params);
 	switch (channels) {
 	case 2:
-		/* Use dual-phase frames */
-		regs->rcr2	|= RPHASE;
-		regs->xcr2	|= XPHASE;
+		if (format == SND_SOC_DAIFMT_I2S) {
+			/* Use dual-phase frames */
+			regs->rcr2	|= RPHASE;
+			regs->xcr2	|= XPHASE;
+			/* Set 1 word per (McBSP) frame for phase1 and phase2 */
+			wpf--;
+			regs->rcr2	|= RFRLEN2(wpf - 1);
+			regs->xcr2	|= XFRLEN2(wpf - 1);
+		}
 	case 1:
-		/* Set 1 word per (McBSP) frame */
-		regs->rcr2	|= RFRLEN2(1 - 1);
-		regs->rcr1	|= RFRLEN1(1 - 1);
-		regs->xcr2	|= XFRLEN2(1 - 1);
-		regs->xcr1	|= XFRLEN1(1 - 1);
+		/* Set word per (McBSP) frame for phase1 */
+		regs->rcr1	|= RFRLEN1(wpf - 1);
+		regs->xcr1	|= XFRLEN1(wpf - 1);
 		break;
 	default:
 		/* Unsupported number of channels */
@@ -276,9 +282,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 	}
 
 	/* Set FS period and length in terms of bit clock periods */
-	switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	switch (format) {
 	case SND_SOC_DAIFMT_I2S:
-		regs->srgr2	|= FPER(wlen * 2 - 1);
+		regs->srgr2	|= FPER(wlen * channels - 1);
 		regs->srgr1	|= FWID(wlen - 1);
 		break;
 	case SND_SOC_DAIFMT_DSP_B:
-- 
cgit v0.10.2


From 3ba191ce051a32b20757f063120496e860ea8f9d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Wed, 15 Apr 2009 15:38:56 +0300
Subject: ASoC: OMAP: Add DSP_A mode support for mcbsp

DSP_A mode is similar to the DSP_B, but the MSB is delayed with
one bclk (appears after the FS pulse and not under it).

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Jarkko Nikula <jarkko.nikula@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 402a1eb..2b4a8da 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -287,6 +287,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 		regs->srgr2	|= FPER(wlen * channels - 1);
 		regs->srgr1	|= FWID(wlen - 1);
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
 	case SND_SOC_DAIFMT_DSP_B:
 		regs->srgr2	|= FPER(wlen * channels - 1);
 		regs->srgr1	|= FWID(wlen * channels - 2);
@@ -330,6 +331,13 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 		regs->rcr2	|= RDATDLY(1);
 		regs->xcr2	|= XDATDLY(1);
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		/* 1-bit data delay */
+		regs->rcr2      |= RDATDLY(1);
+		regs->xcr2      |= XDATDLY(1);
+		/* Invert FS polarity configuration */
+		temp_fmt ^= SND_SOC_DAIFMT_NB_IF;
+		break;
 	case SND_SOC_DAIFMT_DSP_B:
 		/* 0-bit data delay */
 		regs->rcr2      |= RDATDLY(0);
-- 
cgit v0.10.2


From d1f0ae5e2e45e74cff4c3bdefb0fc77608cdfeec Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 15 Apr 2009 21:34:55 +0200
Subject: x86: standardize Kbuild rules

Introducing this Kbuild file allow us to:

    make arch/x86/

And thus building all the core part of x86.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
new file mode 100644
index 0000000..ad8ec35
--- /dev/null
+++ b/arch/x86/Kbuild
@@ -0,0 +1,16 @@
+
+obj-$(CONFIG_KVM) += kvm/
+
+# Xen paravirtualization support
+obj-$(CONFIG_XEN) += xen/
+
+# lguest paravirtualization support
+obj-$(CONFIG_LGUEST_GUEST) += lguest/
+
+obj-y += kernel/
+obj-y += mm/
+
+obj-y += crypto/
+obj-y += vdso/
+obj-$(CONFIG_IA32_EMULATION) += ia32/
+
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f05d8c9..e81f0b2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -7,8 +7,6 @@ else
         KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif
 
-core-$(CONFIG_KVM) += arch/x86/kvm/
-
 # BITS is used as extension for files which are available in a 32 bit
 # and a 64 bit version to simplify shared Makefiles.
 # e.g.: obj-y += foo_$(BITS).o
@@ -118,21 +116,8 @@ head-y += arch/x86/kernel/init_task.o
 
 libs-y  += arch/x86/lib/
 
-# Sub architecture files that needs linking first
-core-y += $(fcore-y)
-
-# Xen paravirtualization support
-core-$(CONFIG_XEN) += arch/x86/xen/
-
-# lguest paravirtualization support
-core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
-
-core-y += arch/x86/kernel/
-core-y += arch/x86/mm/
-
-core-y += arch/x86/crypto/
-core-y += arch/x86/vdso/
-core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+# See arch/x86/Kbuild for content of core part of the kernel
+core-y += arch/x86/
 
 # drivers-y are linked after core-y
 drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
-- 
cgit v0.10.2


From 84971bb401866d79c6b353cb1d8861c2b4621867 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 12:44:25 +0900
Subject: sh: pci: Kill off useless debugging printk() in pci-sh7780 init.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 282cabe..b826c7b 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -118,18 +118,6 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 	pci_write_reg(chan, map->window1.base, SH4_PCILAR1);
 	pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1);
 
-	/* Map IO space into PCI IO window:
-	 * IO addresses will be translated to the PCI IO window base address
-	 */
-	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n",
-		 chan->io_resource->start, chan->io_resource->end,
-		 chan->io_base + chan->io_resource->start);
-
-	/* NOTE: I'm ignoring the PCI error IRQs for now..
-	 * TODO: add support for the internal error interrupts and
-	 * DMA interrupts...
-	 */
-
 	/* Apply any last-minute PCIC fixups */
 	pci_fixup_pcic(chan);
 
-- 
cgit v0.10.2


From b627b4ed3d056c5d00e8f3cb32d033b0ee6619a9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 13:00:18 +0900
Subject: sh: pci: Move se7780 INTC fixups out of pci-sh7780.c.

These fixups belong in the board INTC setup code, not in the middle of
pci-sh7780.c.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-se/7780/irq.c b/arch/sh/boards/mach-se/7780/irq.c
index 66ad292..44b61a5 100644
--- a/arch/sh/boards/mach-se/7780/irq.c
+++ b/arch/sh/boards/mach-se/7780/irq.c
@@ -12,10 +12,13 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <asm/irq.h>
-#include <asm/io.h>
+#include <linux/irq.h>
+#include <linux/io.h>
 #include <mach-se/mach/se7780.h>
 
+#define INTC_BASE	0xffd00000
+#define INTC_ICR1	(INTC_BASE+0x1c)
+
 /*
  * Initialize IRQ setting
  */
@@ -43,4 +46,7 @@ void __init init_se7780_IRQ(void)
 	ctrl_outw((IRQPIN_PCCPW << IRQPOS_PCCPW), FPGA_INTSEL3);
 
 	plat_irq_setup_pins(IRQ_MODE_IRQ); /* install handlers for IRQ0-7 */
+
+	/* ICR1: detect low level(for 2ndcut) */
+	ctrl_outl(0xAAAA0000, INTC_ICR1);
 }
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index b826c7b..45fa423 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -22,20 +22,6 @@
 #include <linux/delay.h>
 #include "pci-sh4.h"
 
-#define INTC_BASE	0xffd00000
-#define INTC_ICR0	(INTC_BASE+0x0)
-#define INTC_ICR1	(INTC_BASE+0x1c)
-#define INTC_INTPRI	(INTC_BASE+0x10)
-#define INTC_INTREQ	(INTC_BASE+0x24)
-#define INTC_INTMSK0	(INTC_BASE+0x44)
-#define INTC_INTMSK1	(INTC_BASE+0x48)
-#define INTC_INTMSK2	(INTC_BASE+0x40080)
-#define INTC_INTMSKCLR0	(INTC_BASE+0x64)
-#define INTC_INTMSKCLR1	(INTC_BASE+0x68)
-#define INTC_INTMSKCLR2	(INTC_BASE+0x40084)
-#define INTC_INT2MSKR	(INTC_BASE+0x40038)
-#define INTC_INT2MSKCR	(INTC_BASE+0x4003c)
-
 /*
  * Initialization. Try all known PCI access methods. Note that we support
  * using both PCI BIOS and direct access: in such cases, we use I/O ports
@@ -75,16 +61,6 @@ int __init sh7780_pci_init(struct pci_channel *chan)
 		return -ENODEV;
 	}
 
-	/* Setup the INTC */
-	if (mach_is_7780se()) {
-		/* ICR0: IRL=use separately */
-		ctrl_outl(0x00C00020, INTC_ICR0);
-		/* ICR1: detect low level(for 2ndcut) */
-		ctrl_outl(0xAAAA0000, INTC_ICR1);
-		/* INTPRI: priority=3(all) */
-		ctrl_outl(0x33333333, INTC_INTPRI);
-	}
-
 	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
-- 
cgit v0.10.2


From 7e4ba0d77c96d328ba968ddff4a464d4d2fa7abc Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 14:07:57 +0900
Subject: sh: pci: Prefer P1SEG over P1SEGADDR for CONFIG_CMD.

P1SEGADDR is obsolete and will be killed off completely in the future,
so transition off of it and reference P1SEG explicitly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index 540683d..2a7f7b5 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -1,22 +1,22 @@
 /*
  * Generic SH-4 / SH-4A PCIC operations (SH7751, SH7780).
  *
- * Copyright (C) 2002 - 2006  Paul Mundt
+ * Copyright (C) 2002 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License v2. See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
 #include <asm/addrspace.h>
-#include <asm/io.h>
 #include "pci-sh4.h"
 
 /*
  * Direct access to PCI hardware...
  */
 #define CONFIG_CMD(bus, devfn, where) \
-	P1SEGADDR((bus->number << 16) | (devfn << 8) | (where & ~3))
+	(P1SEG | (bus->number << 16) | (devfn << 8) | (where & ~3))
 
 static DEFINE_SPINLOCK(sh4_pci_lock);
 
-- 
cgit v0.10.2


From 0bbc9bc3189f24de946777af43c9033c8c4871e4 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 14:09:09 +0900
Subject: sh: pci: Set class/sub-class code correctly for SH7780 PCIC.

The SH7780 PCI host controller implements a configuration header that
requires a fair bit of hand-holding to initialize properly. By default
it appears as a pre-2.0 host controller given the zeroed out class code,
so fix this up properly.

Some boards that happened to be using the R7780RP version of the PCIC
fixups had set this correctly, but this belongs in the standard
initialization, and is by no means board specific.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index c295731..004efd4 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -16,8 +16,6 @@
 
 int pci_fixup_pcic(struct pci_channel *chan)
 {
-	ctrl_outl(0x00000001, SH7780_PCI_VCR2);
-
 	/* Enable all interrupts, so we know what to fix */
 	pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR);
 	pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM);
@@ -26,8 +24,6 @@ int pci_fixup_pcic(struct pci_channel *chan)
 	pci_write_reg(chan, 0xFB00, SH7780_PCISTATUS);
 	pci_write_reg(chan, 0x0047, SH7780_PCICMD);
 	pci_write_reg(chan, 0x00, SH7780_PCIPIF);
-	pci_write_reg(chan, 0x00, SH7780_PCISUB);
-	pci_write_reg(chan, 0x06, SH7780_PCIBCC);
 	pci_write_reg(chan, 0x1912, SH7780_PCISVID);
 	pci_write_reg(chan, 0x0001, SH7780_PCISID);
 
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 45fa423..7f4f590 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -72,16 +72,15 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 {
 	u32 word;
 
+	pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST >> 8, SH7780_PCIBCC);
+	pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST & 0xff, SH7780_PCISUB);
+
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
 	 * Mem space enable
 	 */
 	pci_write_reg(chan, 0x00000046, SH7780_PCICMD);
 
-	/* define this host as the host bridge */
-	word = PCI_BASE_CLASS_BRIDGE << 24;
-	pci_write_reg(chan, word, SH7780_PCIRID);
-
 	/* Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
 	 */
-- 
cgit v0.10.2


From 4e7b7fdb129995640f144b7de114e109c6b46a2a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 15:05:19 +0900
Subject: sh: pci: Rework SH7780 host controller detection.

This reworks how the host controller is probed, and makes it a bit more
verbose in the event a new type of controller is detected. Additionally,
we also log the revision information.

This now uses the proper access sizes for the vendor/device registers,
rather than relying on a larger access that encapsulated both of them.
Not all devices support 32-bit read cycles for these registers.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 7f4f590..63b5151 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -34,33 +34,39 @@
 int __init sh7780_pci_init(struct pci_channel *chan)
 {
 	unsigned int id;
-	int ret, match = 0;
+	const char *type = NULL;
+	int ret;
 
-	pr_debug("PCI: Starting intialization.\n");
+	printk(KERN_NOTICE "PCI: Starting intialization.\n");
 
 	chan->reg_base = 0xfe040000;
 	chan->io_base = 0xfe200000;
 
-	ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */
-
-	/* check for SH7780/SH7780R hardware */
-	id = pci_read_reg(chan, SH7780_PCIVID);
-	if ((id & 0xffff) == SH7780_VENDOR_ID) {
-		switch ((id >> 16) & 0xffff) {
-		case SH7763_DEVICE_ID:
-		case SH7780_DEVICE_ID:
-		case SH7781_DEVICE_ID:
-		case SH7785_DEVICE_ID:
-			match = 1;
-			break;
-		}
-	}
+	/* Enable CPU access to the PCIC registers. */
+	__raw_writel(PCIECR_ENBL, PCIECR);
 
-	if (unlikely(!match)) {
-		printk(KERN_ERR "PCI: This is not an SH7780 (%x)\n", id);
+	id = __raw_readw(chan->reg_base + SH7780_PCIVID);
+	if (id != SH7780_VENDOR_ID) {
+		printk(KERN_ERR "PCI: Unknown vendor ID 0x%04x.\n", id);
 		return -ENODEV;
 	}
 
+	id = __raw_readw(chan->reg_base + SH7780_PCIDID);
+	type = (id == SH7763_DEVICE_ID)	? "SH7763" :
+	       (id == SH7780_DEVICE_ID) ? "SH7780" :
+	       (id == SH7781_DEVICE_ID) ? "SH7781" :
+	       (id == SH7785_DEVICE_ID) ? "SH7785" :
+					  NULL;
+	if (unlikely(!type)) {
+		printk(KERN_ERR "PCI: Found an unsupported Renesas host "
+		       "controller, device id 0x%04x.\n", id);
+		return -EINVAL;
+	}
+
+	printk(KERN_NOTICE "PCI: Found a Renesas %s host "
+	       "controller, revision %d.\n", type,
+	       __raw_readb(chan->reg_base + SH7780_PCIRID));
+
 	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index fffcf1d..213f1d8 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -20,9 +20,8 @@
 #define SH7785_DEVICE_ID	0x0007
 
 /* SH7780 Control Registers */
-#define	SH7780_PCI_VCR0		0xFE000000
-#define	SH7780_PCI_VCR1		0xFE000004
-#define	SH7780_PCI_VCR2		0xFE000008
+#define	PCIECR			0xFE000008
+#define PCIECR_ENBL		0x01
 
 /* SH7780 Specific Values */
 #define SH7780_PCI_CONFIG_BASE	0xFD000000	/* Config space base addr */
-- 
cgit v0.10.2


From ab78cbcf6877334fc20868b7df7887349e2e01c8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 15:08:01 +0900
Subject: sh: pci: Use the proper write size for class/sub-class code.

Don't use pci_write_reg() for these, as it defaults to 32-bit. Rather
than using the helper, use __raw_writeb() directly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 63b5151..19bac21 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -78,8 +78,10 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 {
 	u32 word;
 
-	pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST >> 8, SH7780_PCIBCC);
-	pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST & 0xff, SH7780_PCISUB);
+	__raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8,
+		     chan->reg_base + SH7780_PCIBCC);
+	__raw_writeb(PCI_CLASS_BRIDGE_HOST & 0xff,
+		     chan->reg_base + SH7780_PCISUB);
 
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
-- 
cgit v0.10.2


From c66c1d79a94a7a302e2dc6c93da40902423eac3e Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 16:38:00 +0900
Subject: sh: pci: Set pci_cache_line_size on SH7780 via the PCICLS register.

The SH7780 PCIC contains a read-only cache line size register that we can
derive pci_cache_line_size from. So, make sure that the software idea of
the cache line size actually matches the host controller's idea.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 19bac21..fa73b0d 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -22,15 +22,6 @@
 #include <linux/delay.h>
 #include "pci-sh4.h"
 
-/*
- * Initialization. Try all known PCI access methods. Note that we support
- * using both PCI BIOS and direct access: in such cases, we use I/O ports
- * to access config space.
- *
- * Note that the platform specific initialization (BSC registers, and memory
- * space mapping) will be called via the platform defined function
- * pcibios_init_platform().
- */
 int __init sh7780_pci_init(struct pci_channel *chan)
 {
 	unsigned int id;
@@ -70,19 +61,31 @@ int __init sh7780_pci_init(struct pci_channel *chan)
 	if ((ret = sh4_pci_check_direct(chan)) != 0)
 		return ret;
 
+	/*
+	 * Platform specific initialization (BSC registers, and memory space
+	 * mapping) will be called via the platform defined function
+	 * pcibios_init_platform().
+	 */
 	return pcibios_init_platform();
 }
 
+extern u8 pci_cache_line_size;
+
 int __init sh7780_pcic_init(struct pci_channel *chan,
 			    struct sh4_pci_address_map *map)
 {
 	u32 word;
 
+	/*
+	 * Set the class and sub-class codes.
+	 */
 	__raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8,
 		     chan->reg_base + SH7780_PCIBCC);
 	__raw_writeb(PCI_CLASS_BRIDGE_HOST & 0xff,
 		     chan->reg_base + SH7780_PCISUB);
 
+	pci_cache_line_size = pci_read_reg(chan, SH7780_PCICLS) / 4;
+
 	/* set the command/status bits to:
 	 * Wait Cycle Control + Parity Enable + Bus Master +
 	 * Mem space enable
-- 
cgit v0.10.2


From f1dcab756687622b658154ded1657538984edcdb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 17:00:27 +0900
Subject: sh: pci: Set the I/O port base to the SH7780 I/O window default.

Presently the I/O port base isn't being set anywhere, which allows things
like generic_inl() to blow up. Fix this up to point at the PCI IO window.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index fa73b0d..207b720 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -112,5 +112,7 @@ int __init sh7780_pcic_init(struct pci_channel *chan,
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO;
 	pci_write_reg(chan, word, SH4_PCICR);
 
+	__set_io_port_base(SH7780_PCI_IO_BASE);
+
 	return 0;
 }
-- 
cgit v0.10.2


From ab1363a8929f32cc163cd3f50ca72f20d901b00c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 17:07:47 +0900
Subject: sh: pci: Consolidate PCI I/O and mem window definitions for SH7780.

This consolidates all of the PCI I/O and memory window definitions across
the pci-sh7780 users in pci-sh7780 itself. No functional changes, in that
every platform had exactly the same implementation.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index 8ec6d22..044525d 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -26,27 +26,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 	return irq_tab[slot];
 }
 
-static struct resource sh7780_io_resource = {
-	.name	= "SH7780_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7780_mem_resource = {
-	.name	= "SH7780_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7780_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7780_pci_init, &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
 static struct sh4_pci_address_map sh7780_pci_map = {
 	.window0	= {
 		.base	= SH7780_CS2_BASE_ADDR,
@@ -61,5 +40,5 @@ static struct sh4_pci_address_map sh7780_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7780_pcic_init(&board_pci_channels[0], &sh7780_pci_map);
+	return sh7780_pcic_init(&sh7780_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 6a0b7c0..570fd71 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -34,25 +34,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
        return sdk7780_irq_tab[pin-1][slot];
 }
 
-static struct resource sdk7780_io_resource = {
-	.name	= "SH7780_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sdk7780_mem_resource = {
-	.name	= "SH7780_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7780_pci_init, &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
 static struct sh4_pci_address_map sdk7780_pci_map = {
 	.window0	= {
 		.base	= SH7780_CS2_BASE_ADDR,
@@ -67,5 +48,5 @@ static struct sh4_pci_address_map sdk7780_pci_map = {
 int __init pcibios_init_platform(void)
 {
 	printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n");
-	return sh7780_pcic_init(&board_pci_channels[0], &sdk7780_pci_map);
+	return sh7780_pcic_init(&sdk7780_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
index 583b8e8..8b09415 100644
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ b/arch/sh/drivers/pci/ops-se7780.c
@@ -41,27 +41,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
        return se7780_irq_tab[pin-1][slot];
 }
 
-static struct resource se7780_io_resource = {
-	.name	= "SH7780_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource se7780_mem_resource = {
-	.name	= "SH7780_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops se7780_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7780_pci_init, &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
 static struct sh4_pci_address_map se7780_pci_map = {
 	.window0	= {
 		.base	= SH7780_CS2_BASE_ADDR,
@@ -90,5 +69,5 @@ int __init pcibios_init_platform(void)
 	ctrl_outw(0x0013, FPGA_PCI_INTSEL1);
 	ctrl_outw(0xE402, FPGA_PCI_INTSEL2);
 
-	return sh7780_pcic_init(&board_pci_channels[0], &se7780_pci_map);
+	return sh7780_pcic_init(&se7780_pci_map);
 }
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
index ab0d1de..bac46b5 100644
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ b/arch/sh/drivers/pci/ops-sh7785lcr.c
@@ -26,25 +26,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 	return irq_tab[slot];
 }
 
-static struct resource sh7785_io_resource = {
-	.name	= "SH7785_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7785_mem_resource = {
-	.name	= "SH7785_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
 static struct sh4_pci_address_map sh7785_pci_map = {
 	.window0	= {
 #if defined(CONFIG_32BIT)
@@ -59,5 +40,5 @@ static struct sh4_pci_address_map sh7785_pci_map = {
 
 int __init pcibios_init_platform(void)
 {
-	return sh7780_pcic_init(&board_pci_channels[0], &sh7785_pci_map);
+	return sh7780_pcic_init(&sh7785_pci_map);
 }
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 207b720..eb217dd 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -22,7 +22,7 @@
 #include <linux/delay.h>
 #include "pci-sh4.h"
 
-int __init sh7780_pci_init(struct pci_channel *chan)
+static int __init sh7780_pci_init(struct pci_channel *chan)
 {
 	unsigned int id;
 	const char *type = NULL;
@@ -71,9 +71,28 @@ int __init sh7780_pci_init(struct pci_channel *chan)
 
 extern u8 pci_cache_line_size;
 
-int __init sh7780_pcic_init(struct pci_channel *chan,
-			    struct sh4_pci_address_map *map)
+static struct resource sh7785_io_resource = {
+	.name	= "SH7785_IO",
+	.start	= SH7780_PCI_IO_BASE,
+	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource sh7785_mem_resource = {
+	.name	= "SH7785_mem",
+	.start	= SH7780_PCI_MEMORY_BASE,
+	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+struct pci_channel board_pci_channels[] = {
+	{ sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
+	{ NULL, NULL, NULL, 0, 0 },
+};
+
+int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
 {
+	struct pci_channel *chan = &board_pci_channels[0];
 	u32 word;
 
 	/*
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 213f1d8..7a4f8a8 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -107,8 +107,6 @@
 struct sh4_pci_address_map;
 
 /* arch/sh/drivers/pci/pci-sh7780.c */
-int sh7780_pci_init(struct pci_channel *chan);
-int sh7780_pcic_init(struct pci_channel *chan,
-		     struct sh4_pci_address_map *map);
+int sh7780_pcic_init(struct sh4_pci_address_map *map);
 
 #endif /* _PCI_SH7780_H_ */
-- 
cgit v0.10.2


From 4c7a47de897e89c25a40e228ac5319cbac7257fe Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 17:21:36 +0900
Subject: sh: pci: Kill off platform-specific multi-window mappings.

Commit 68b42d1b548be1840aff7122fdebeb804daf0fa3 ("sh: sh7785lcr: Map
whole PCI address space.") changed around the semantics of how various
chip-selects are made accessible to PCI. Now that there is a single
large mapping covering from CS0-CS6, there is no longer any need to
do multi-window mapping. Subsequently, all of the differing
implementations can be consolidated in to pci-sh7780.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-se/7780/irq.c b/arch/sh/boards/mach-se/7780/irq.c
index 44b61a5..b8d43b6 100644
--- a/arch/sh/boards/mach-se/7780/irq.c
+++ b/arch/sh/boards/mach-se/7780/irq.c
@@ -49,4 +49,21 @@ void __init init_se7780_IRQ(void)
 
 	/* ICR1: detect low level(for 2ndcut) */
 	ctrl_outl(0xAAAA0000, INTC_ICR1);
+
+	/*
+	 * FPGA PCISEL register initialize
+	 *
+	 *  CPU  || SLOT1 | SLOT2 | S-ATA | USB
+	 *  -------------------------------------
+	 *  INTA || INTA  | INTD  |  --   | INTB
+	 *  -------------------------------------
+	 *  INTB || INTB  | INTA  |  --   | INTC
+	 *  -------------------------------------
+	 *  INTC || INTC  | INTB  | INTA  |  --
+	 *  -------------------------------------
+	 *  INTD || INTD  | INTC  |  --   | INTA
+	 *  -------------------------------------
+	 */
+	ctrl_outw(0x0013, FPGA_PCI_INTSEL1);
+	ctrl_outw(0xE402, FPGA_PCI_INTSEL2);
 }
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index 044525d..4ea136e 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -25,20 +25,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	return irq_tab[slot];
 }
-
-static struct sh4_pci_address_map sh7780_pci_map = {
-	.window0	= {
-		.base	= SH7780_CS2_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-
-	.window1	= {
-		.base	= SH7780_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7780_pcic_init(&sh7780_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
index 570fd71..1d9a91b 100644
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ b/arch/sh/drivers/pci/ops-sdk7780.c
@@ -33,20 +33,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
        return sdk7780_irq_tab[pin-1][slot];
 }
-
-static struct sh4_pci_address_map sdk7780_pci_map = {
-	.window0	= {
-		.base	= SH7780_CS2_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-	.window1	= {
-		.base	= SH7780_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n");
-	return sh7780_pcic_init(&sdk7780_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
index 8b09415..6c088cc 100644
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ b/arch/sh/drivers/pci/ops-se7780.c
@@ -40,34 +40,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
        return se7780_irq_tab[pin-1][slot];
 }
-
-static struct sh4_pci_address_map se7780_pci_map = {
-	.window0	= {
-		.base	= SH7780_CS2_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	printk("SH7780 PCI: Finished initialization of the PCI controller\n");
-
-	/*
-	 * FPGA PCISEL register initialize
-	 *
-	 *  CPU  || SLOT1 | SLOT2 | S-ATA | USB
-	 *  -------------------------------------
-	 *  INTA || INTA  | INTD  |  --   | INTB
-	 *  -------------------------------------
-	 *  INTB || INTB  | INTA  |  --   | INTC
-	 *  -------------------------------------
-	 *  INTC || INTC  | INTB  | INTA  |  --
-	 *  -------------------------------------
-	 *  INTD || INTD  | INTC  |  --   | INTA
-	 *  -------------------------------------
-	 */
-	ctrl_outw(0x0013, FPGA_PCI_INTSEL1);
-	ctrl_outw(0xE402, FPGA_PCI_INTSEL2);
-
-	return sh7780_pcic_init(&se7780_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
index bac46b5..0fe423e 100644
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ b/arch/sh/drivers/pci/ops-sh7785lcr.c
@@ -25,20 +25,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	return irq_tab[slot];
 }
-
-static struct sh4_pci_address_map sh7785_pci_map = {
-	.window0	= {
-#if defined(CONFIG_32BIT)
-		.base	= SH7780_32BIT_DDR_BASE_ADDR,
-		.size	= 0x40000000,
-#else
-		.base	= SH7780_CS0_BASE_ADDR,
-		.size	= 0x20000000,
-#endif
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7780_pcic_init(&sh7785_pci_map);
-}
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index eb217dd..07c5529 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -90,7 +90,19 @@ struct pci_channel board_pci_channels[] = {
 	{ NULL, NULL, NULL, 0, 0 },
 };
 
-int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
+static struct sh4_pci_address_map sh7780_pci_map = {
+	.window0	= {
+#if defined(CONFIG_32BIT)
+		.base	= SH7780_32BIT_DDR_BASE_ADDR,
+		.size	= 0x40000000,
+#else
+		.base	= SH7780_CS0_BASE_ADDR,
+		.size	= 0x20000000,
+#endif
+	},
+};
+
+int __init pcibios_init_platform(void)
 {
 	struct pci_channel *chan = &board_pci_channels[0];
 	u32 word;
@@ -114,14 +126,10 @@ int __init sh7780_pcic_init(struct sh4_pci_address_map *map)
 	/* Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
 	 */
-	pci_write_reg(chan, map->window0.size - 0xfffff, SH4_PCILSR0);
-	pci_write_reg(chan, map->window1.size - 0xfffff, SH4_PCILSR1);
+	pci_write_reg(chan, sh7780_pci_map.window0.size - 0xfffff, SH4_PCILSR0);
 	/* Set the values on window 0 PCI config registers */
-	pci_write_reg(chan, map->window0.base, SH4_PCILAR0);
-	pci_write_reg(chan, map->window0.base, SH7780_PCIMBAR0);
-	/* Set the values on window 1 PCI config registers */
-	pci_write_reg(chan, map->window1.base, SH4_PCILAR1);
-	pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1);
+	pci_write_reg(chan, sh7780_pci_map.window0.base, SH4_PCILAR0);
+	pci_write_reg(chan, sh7780_pci_map.window0.base, SH7780_PCIMBAR0);
 
 	/* Apply any last-minute PCIC fixups */
 	pci_fixup_pcic(chan);
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 7a4f8a8..4b65d4b 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -104,9 +104,4 @@
 
 #define SH7780_32BIT_DDR_BASE_ADDR	0x40000000
 
-struct sh4_pci_address_map;
-
-/* arch/sh/drivers/pci/pci-sh7780.c */
-int sh7780_pcic_init(struct sh4_pci_address_map *map);
-
 #endif /* _PCI_SH7780_H_ */
-- 
cgit v0.10.2


From a6d377b6969235a3b5a6e87bdcef387d0976b41c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 20:11:44 +0900
Subject: sh: pci: Consolidate SH7780 PCIC IRQ routing.

Now that the platform code is a bit leaner, we can start consolidating
the various IRQ routing implementations. There are effectively only 2
variants, and the others can use those directly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 67d710a..5003971 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -16,11 +16,11 @@ obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o
 obj-$(CONFIG_SH_SECUREEDGE5410)		+= ops-snapgear.o
 obj-$(CONFIG_SH_RTS7751R2D)		+= ops-rts7751r2d.o fixups-rts7751r2d.o
 obj-$(CONFIG_SH_SH03)			+= ops-sh03.o fixups-sh03.o
-obj-$(CONFIG_SH_HIGHLANDER)		+= ops-r7780rp.o fixups-r7780rp.o
-obj-$(CONFIG_SH_SDK7780)		+= ops-sdk7780.o fixups-sdk7780.o
+obj-$(CONFIG_SH_HIGHLANDER)		+= fixups-r7780rp.o
+obj-$(CONFIG_SH_SH7785LCR)		+= fixups-r7780rp.o
+obj-$(CONFIG_SH_SDK7780)		+= fixups-sdk7780.o
+obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= fixups-sdk7780.o
 obj-$(CONFIG_SH_TITAN)			+= ops-titan.o
 obj-$(CONFIG_SH_LANDISK)		+= ops-landisk.o
 obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-lboxre2.o
-obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= ops-se7780.o fixups-sdk7780.o
 obj-$(CONFIG_SH_CAYMAN)			+= ops-cayman.o
-obj-$(CONFIG_SH_SH7785LCR)		+= ops-sh7785lcr.o fixups-r7780rp.o
diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 31f8dfb..864e92f 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -11,9 +11,17 @@
  * for more details.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
 #include "pci-sh4.h"
-#include <asm/io.h>
 
+static char irq_tab[] __initdata = {
+	65, 66, 67, 68,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	return irq_tab[slot];
+}
 int pci_fixup_pcic(struct pci_channel *chan)
 {
 	pci_write_reg(chan, 0x000043ff, SH4_PCIINTM);
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index 004efd4..da60e99 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -5,15 +5,32 @@
  *
  * Copyright (C) 2003  Lineo uSolutions, Inc.
  * Copyright (C) 2004 - 2006  Paul Mundt
+ * Copyright (C) 2006  Nobuhiro Iwamatsu
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
 #include "pci-sh4.h"
-#include <asm/io.h>
 
+/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
+static char sdk7780_irq_tab[4][16] __initdata = {
+	/* INTA */
+	{ 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+	/* INTB */
+	{ 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+	/* INTC */
+	{ 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+	/* INTD */
+	{ 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+       return sdk7780_irq_tab[pin-1][slot];
+}
 int pci_fixup_pcic(struct pci_channel *chan)
 {
 	/* Enable all interrupts, so we know what to fix */
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
deleted file mode 100644
index 4ea136e..0000000
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Renesas SH7780 Highlander R7780RP-1 board
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <mach/highlander.h>
-#include <asm/io.h>
-#include "pci-sh4.h"
-
-static char irq_tab[] __initdata = {
-	65, 66, 67, 68,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return irq_tab[slot];
-}
diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c
deleted file mode 100644
index 1d9a91b..0000000
--- a/arch/sh/drivers/pci/ops-sdk7780.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-sdk7780.c
- *
- * Copyright (C) 2006  Nobuhiro Iwamatsu
- *
- * PCI initialization for the SDK7780SE03
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <mach/sdk7780.h>
-#include <asm/io.h>
-#include "pci-sh4.h"
-
-/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
-static char sdk7780_irq_tab[4][16] __initdata = {
-	/* INTA */
-	{ 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTB */
-	{ 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTC */
-	{ 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTD */
-	{ 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-       return sdk7780_irq_tab[pin-1][slot];
-}
diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c
deleted file mode 100644
index 6c088cc..0000000
--- a/arch/sh/drivers/pci/ops-se7780.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-se7780.c
- *
- * Copyright (C) 2006  Nobuhiro Iwamatsu
- *
- * PCI initialization for the Hitachi UL Solution Engine 7780SE03
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <mach-se/mach/se7780.h>
-#include <asm/io.h>
-#include "pci-sh4.h"
-
-/*
- * IDSEL = AD16  PCI slot
- * IDSEL = AD17  PCI slot
- * IDSEL = AD18  Serial ATA Controller (Silicon Image SiL3512A)
- * IDSEL = AD19  USB Host Controller (NEC uPD7210100A)
- */
-
-/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
-static char se7780_irq_tab[4][16] __initdata = {
-	/* INTA */
-	{ 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTB */
-	{ 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTC */
-	{ 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-	/* INTD */
-	{ 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-       return se7780_irq_tab[pin-1][slot];
-}
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
deleted file mode 100644
index 0fe423e..0000000
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Renesas R0P7785LC0011RL board
- * Based on arch/sh/drivers/pci/ops-r7780rp.c
- *
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-static char irq_tab[] __initdata = {
-	65, 66, 67, 68,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return irq_tab[slot];
-}
-- 
cgit v0.10.2


From 62c7ae87cb5962d3dfaa6d916a15e4faa9e07363 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 17 Apr 2009 20:37:16 +0900
Subject: sh: pci: Start unifying the SH7780 PCIC initialization.

This starts moving out the common initialization bits from the various
fixup paths in to the shared init path.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 864e92f..15ca65c 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -22,33 +22,15 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	return irq_tab[slot];
 }
+
 int pci_fixup_pcic(struct pci_channel *chan)
 {
 	pci_write_reg(chan, 0x000043ff, SH4_PCIINTM);
-	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
-
-	pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD);
 	pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR);
-
-	pci_write_reg(chan, 0x00011912, SH7780_PCISVID);
 	pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0);
 	pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0);
 	pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1);
 	pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1);
 
-	pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0);
-	pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0);
-
-#ifdef CONFIG_32BIT
-	pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2);
-	pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
-#endif
-
-	/* Set IOBR for windows containing area specified in pci.h */
-	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
-		      SH7780_PCIIOBR);
-	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)),
-		      SH7780_PCIIOBMR);
-
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index da60e99..250b0ed 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -35,40 +35,18 @@ int pci_fixup_pcic(struct pci_channel *chan)
 {
 	/* Enable all interrupts, so we know what to fix */
 	pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR);
-	pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM);
 
 	/* Set up standard PCI config registers */
-	pci_write_reg(chan, 0xFB00, SH7780_PCISTATUS);
-	pci_write_reg(chan, 0x0047, SH7780_PCICMD);
-	pci_write_reg(chan, 0x00, SH7780_PCIPIF);
-	pci_write_reg(chan, 0x1912, SH7780_PCISVID);
-	pci_write_reg(chan, 0x0001, SH7780_PCISID);
-
 	pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0);	/* PCI */
-	pci_write_reg(chan, 0x08000000, SH7780_PCILAR0);	/* SHwy */
-	pci_write_reg(chan, 0x07F00001, SH7780_PCILSR);	/* size 128M w/ MBAR */
+	pci_write_reg(chan, 0x08000000, SH4_PCILAR0);	/* SHwy */
+	pci_write_reg(chan, 0x07F00001, SH4_PCILSR0);	/* size 128M w/ MBAR */
 
 	pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1);
-	pci_write_reg(chan, 0x00000000, SH7780_PCILAR1);
-	pci_write_reg(chan, 0x00000000, SH7780_PCILSR1);
+	pci_write_reg(chan, 0x00000000, SH4_PCILAR1);
+	pci_write_reg(chan, 0x00000000, SH4_PCILSR1);
 
 	pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR);
-
-	/*
-	 * Set the MBR so PCI address is one-to-one with window,
-	 * meaning all calls go straight through... use ifdef to
-	 * catch erroneous assumption.
-	 */
-	pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0);
-	pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0);	/* 16M */
-
-	/* Set IOBR for window containing area specified in pci.h */
-	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
-		      SH7780_PCIIOBR);
-	pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18),
-		      SH7780_PCIIOBMR);
-
-	pci_write_reg(chan, 0xA5000C01, SH7780_PCICR);
+	pci_write_reg(chan, 0xA5000C01, SH4_PCICR);
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 07c5529..f02d9df 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -1,19 +1,12 @@
 /*
- *	Low-Level PCI Support for the SH7780
+ * Low-Level PCI Support for the SH7780
  *
- *  Dustin McIntire (dustin@sensoria.com)
- *	Derived from arch/i386/kernel/pci-*.c which bore the message:
- *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
- *
- *  Ported to the new API by Paul Mundt <lethal@linux-sh.org>
- *  With cleanup by Paul van Gool <pvangool@mimotech.com>
- *
- *  May be copied or modified under the terms of the GNU General Public
- *  License.  See linux/COPYING for more information.
+ *  Copyright (C) 2005 - 2009  Paul Mundt
  *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
-#undef DEBUG
-
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -117,13 +110,8 @@ int __init pcibios_init_platform(void)
 
 	pci_cache_line_size = pci_read_reg(chan, SH7780_PCICLS) / 4;
 
-	/* set the command/status bits to:
-	 * Wait Cycle Control + Parity Enable + Bus Master +
-	 * Mem space enable
-	 */
-	pci_write_reg(chan, 0x00000046, SH7780_PCICMD);
-
-	/* Set IO and Mem windows to local address
+	/*
+	 * Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
 	 */
 	pci_write_reg(chan, sh7780_pci_map.window0.size - 0xfffff, SH4_PCILSR0);
@@ -131,9 +119,33 @@ int __init pcibios_init_platform(void)
 	pci_write_reg(chan, sh7780_pci_map.window0.base, SH4_PCILAR0);
 	pci_write_reg(chan, sh7780_pci_map.window0.base, SH7780_PCIMBAR0);
 
+	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
+
+	/* Set up standard PCI config registers */
+	__raw_writew(0xFB00, chan->reg_base + SH7780_PCISTATUS);
+	__raw_writew(0x0047, chan->reg_base + SH7780_PCICMD);
+	__raw_writew(0x1912, chan->reg_base + SH7780_PCISVID);
+	__raw_writew(0x0001, chan->reg_base + SH7780_PCISID);
+
+	__raw_writeb(0x00, chan->reg_base + SH7780_PCIPIF);
+
 	/* Apply any last-minute PCIC fixups */
 	pci_fixup_pcic(chan);
 
+	pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0);
+	pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0);
+
+#ifdef CONFIG_32BIT
+	pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2);
+	pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2);
+#endif
+
+	/* Set IOBR for windows containing area specified in pci.h */
+	pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1),
+		      SH7780_PCIIOBR);
+	pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)),
+		      SH7780_PCIIOBMR);
+
 	/* SH7780 init done, set central function init complete */
 	/* use round robin mode to stop a device starving/overruning */
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO;
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 4b65d4b..4a52478 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -65,11 +65,6 @@
 #define SH7780_PCIPMCSR_BSE	0x046
 #define SH7780_PCICDD		0x047
 
-#define SH7780_PCICR		0x100		/* PCI Control Register */
-#define SH7780_PCILSR		0x104		/* PCI Local Space Register0 */
-#define SH7780_PCILSR1		0x108		/* PCI Local Space Register1 */
-#define SH7780_PCILAR0		0x10C		/* PCI Local Address Register1 */
-#define SH7780_PCILAR1		0x110		/* PCI Local Address Register1 */
 #define SH7780_PCIIR		0x114		/* PCI Interrupt Register */
 #define SH7780_PCIIMR		0x118		/* PCI Interrupt Mask Register */
 #define SH7780_PCIAIR		0x11C		/* Error Address Register */
-- 
cgit v0.10.2


From 93eb677d74a4f7d3edfb678c94f6c0544d9fbad2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 15 Apr 2009 13:24:06 -0400
Subject: ftrace: use module notifier for function tracer

The hooks in the module code for the function tracer must be called
before any of that module code runs. The function tracer hooks
modify the module (replacing calls to mcount to nops). If the code
is executed while the change occurs, then the CPU can take a GPF.

To handle the above with a bit of paranoia, I originally implemented
the hooks as calls directly from the module code.

After examining the notifier calls, it looks as though the start up
notify is called before any of the module's code is executed. This makes
the use of the notify safe with ftrace.

Only the startup notify is required to be "safe". The shutdown simply
removes the entries from the ftrace function list, and does not modify
any code.

This change has another benefit. It removes a issue with a reverse dependency
in the mutexes of ftrace_lock and module_mutex.

[ Impact: fix lock dependency bug, cleanup ]

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 53869be..97c83e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
 
-extern void ftrace_release(void *start, unsigned long size);
-
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
 #else
@@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-extern void ftrace_init_module(struct module *mod,
-			       unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
-static inline void
-ftrace_init_module(struct module *mod,
-		   unsigned long *start, unsigned long *end) { }
 #endif
 
 /*
diff --git a/include/linux/module.h b/include/linux/module.h
index 6155fa4..a8f2c0a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -341,6 +341,10 @@ struct module
 	struct ftrace_event_call *trace_events;
 	unsigned int num_trace_events;
 #endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	unsigned long *ftrace_callsites;
+	unsigned int num_ftrace_callsites;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
diff --git a/kernel/module.c b/kernel/module.c
index a039470..2383e60 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1490,9 +1490,6 @@ static void free_module(struct module *mod)
 	/* Free any allocated parameters. */
 	destroy_params(mod->kp, mod->num_kp);
 
-	/* release any pointers to mcount in this module */
-	ftrace_release(mod->module_core, mod->core_size);
-
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
 	unsigned int symindex = 0;
 	unsigned int strindex = 0;
 	unsigned int modindex, versindex, infoindex, pcpuindex;
-	unsigned int num_mcount;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
-	unsigned long *mseg;
 	mm_segment_t old_fs;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod,
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
 #endif
-
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	/* sechdrs[0].sh_size is always zero */
+	mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
+					     "__mcount_loc",
+					     sizeof(*mod->ftrace_callsites),
+					     &mod->num_ftrace_callsites);
+#endif
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
 	    || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
 			dynamic_debug_setup(debug, num_debug);
 	}
 
-	/* sechdrs[0].sh_size is always zero */
-	mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
-			    sizeof(*mseg), &num_mcount);
-	ftrace_init_module(mod, mseg, mseg + num_mcount);
-
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
-	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a234889..5b606f4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
 	rec->flags |= FTRACE_FL_FREE;
 }
 
-void ftrace_release(void *start, unsigned long size)
-{
-	struct dyn_ftrace *rec;
-	struct ftrace_page *pg;
-	unsigned long s = (unsigned long)start;
-	unsigned long e = s + size;
-
-	if (ftrace_disabled || !start)
-		return;
-
-	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
-		if ((rec->ip >= s) && (rec->ip < e)) {
-			/*
-			 * rec->ip is changed in ftrace_free_rec()
-			 * It should not between s and e if record was freed.
-			 */
-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-			ftrace_free_rec(rec);
-		}
-	} while_for_each_ftrace_rec();
-	mutex_unlock(&ftrace_lock);
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod,
 	return 0;
 }
 
-void ftrace_init_module(struct module *mod,
-			unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = (unsigned long)end;
+
+	if (ftrace_disabled || !start || start == end)
+		return;
+
+	mutex_lock(&ftrace_lock);
+	do_for_each_ftrace_rec(pg, rec) {
+		if ((rec->ip >= s) && (rec->ip < e)) {
+			/*
+			 * rec->ip is changed in ftrace_free_rec()
+			 * It should not between s and e if record was freed.
+			 */
+			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+			ftrace_free_rec(rec);
+		}
+	} while_for_each_ftrace_rec();
+	mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+			       unsigned long *start, unsigned long *end)
 {
 	if (ftrace_disabled || start == end)
 		return;
 	ftrace_convert_nops(mod, start, end);
 }
 
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		ftrace_init_module(mod, mod->ftrace_callsites,
+				   mod->ftrace_callsites +
+				   mod->num_ftrace_callsites);
+		break;
+	case MODULE_STATE_GOING:
+		ftrace_release(mod->ftrace_callsites,
+			       mod->ftrace_callsites +
+			       mod->num_ftrace_callsites);
+		break;
+	}
+
+	return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+	.notifier_call = ftrace_module_notify,
+	.priority = 0,
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -2791,6 +2825,10 @@ void __init ftrace_init(void)
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
+	ret = register_module_notifier(&ftrace_module_nb);
+	if (!ret)
+		pr_warning("Failed to register trace ftrace module notifier\n");
+
 	return;
  failed:
 	ftrace_disabled = 1;
-- 
cgit v0.10.2


From e6187007d6c365b551c69ea3df46f06fd1c8bd19 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 15 Apr 2009 13:36:40 -0400
Subject: tracing/events: add startup tests for events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As events start to become popular, and the new way to add tracing
infrastructure into ftrace, it is important to catch any problems
that might happen with a mistake in the TRACE_EVENT macro.

This patch introduces a startup self test on the registered trace
events. Note, it can only do a generic test, any type of testing that
needs more involement is needed to be implemented by the tracepoint
creators.

The test goes down one by one enabling a trace point and running
some random tasks (random in the sense that I just made them up).
Those tasks are creating threads, grabbing mutexes and spinlocks
and using workqueues.

After testing each event individually, it does the same test after
enabling each system of trace points. Like sched, irq, lockdep.

Then finally it enables all tracepoints and performs the tasks again.
The output to the console on bootup will look like this when everything
works:

Running tests on trace events:
Testing event kfree_skb: OK
Testing event kmalloc: OK
Testing event kmem_cache_alloc: OK
Testing event kmalloc_node: OK
Testing event kmem_cache_alloc_node: OK
Testing event kfree: OK
Testing event kmem_cache_free: OK
Testing event irq_handler_exit: OK
Testing event irq_handler_entry: OK
Testing event softirq_entry: OK
Testing event softirq_exit: OK
Testing event lock_acquire: OK
Testing event lock_release: OK
Testing event sched_kthread_stop: OK
Testing event sched_kthread_stop_ret: OK
Testing event sched_wait_task: OK
Testing event sched_wakeup: OK
Testing event sched_wakeup_new: OK
Testing event sched_switch: OK
Testing event sched_migrate_task: OK
Testing event sched_process_free: OK
Testing event sched_process_exit: OK
Testing event sched_process_wait: OK
Testing event sched_process_fork: OK
Testing event sched_signal_send: OK
Running tests on trace event systems:
Testing event system skb: OK
Testing event system kmem: OK
Testing event system irq: OK
Testing event system lockdep: OK
Testing event system sched: OK
Running tests on all trace events:
Testing all events: OK

[ folded in:

  tracing: add #include <linux/delay.h> to fix build failure in test_work()

  This build failure occured on a few rare configs:

   kernel/trace/trace_events.c: In function ‘test_work’:
   kernel/trace/trace_events.c:975: error: implicit declaration of function ‘udelay’
   kernel/trace/trace_events.c:980: error: implicit declaration of function ‘msleep’

  delay.h is included in way too many other headers, hiding cases
  where new usage is added without header inclusion.

  [ Impact: build fix ]

  Signed-off-by: Ingo Molnar <mingo@elte.hu>
]

[ Impact: add event tracer self-tests ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6591d83..f81d6ee 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -8,10 +8,14 @@
  *
  */
 
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 #include "trace_output.h"
 
@@ -920,3 +924,177 @@ static __init int event_trace_init(void)
 	return 0;
 }
 fs_initcall(event_trace_init);
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static DEFINE_SPINLOCK(test_spinlock);
+static DEFINE_SPINLOCK(test_spinlock_irq);
+static DEFINE_MUTEX(test_mutex);
+
+static __init void test_work(struct work_struct *dummy)
+{
+	spin_lock(&test_spinlock);
+	spin_lock_irq(&test_spinlock_irq);
+	udelay(1);
+	spin_unlock_irq(&test_spinlock_irq);
+	spin_unlock(&test_spinlock);
+
+	mutex_lock(&test_mutex);
+	msleep(1);
+	mutex_unlock(&test_mutex);
+}
+
+static __init int event_test_thread(void *unused)
+{
+	void *test_malloc;
+
+	test_malloc = kmalloc(1234, GFP_KERNEL);
+	if (!test_malloc)
+		pr_info("failed to kmalloc\n");
+
+	schedule_on_each_cpu(test_work);
+
+	kfree(test_malloc);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop())
+		schedule();
+
+	return 0;
+}
+
+/*
+ * Do various things that may trigger events.
+ */
+static __init void event_test_stuff(void)
+{
+	struct task_struct *test_thread;
+
+	test_thread = kthread_run(event_test_thread, NULL, "test-events");
+	msleep(1);
+	kthread_stop(test_thread);
+}
+
+/*
+ * For every trace event defined, we will test each trace point separately,
+ * and then by groups, and finally all trace points.
+ */
+static __init int event_trace_self_tests(void)
+{
+	struct ftrace_event_call *call;
+	struct event_subsystem *system;
+	char *sysname;
+	int ret;
+
+	pr_info("Running tests on trace events:\n");
+
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		/* Only test those that have a regfunc */
+		if (!call->regfunc)
+			continue;
+
+		pr_info("Testing event %s: ", call->name);
+
+		/*
+		 * If an event is already enabled, someone is using
+		 * it and the self test should not be on.
+		 */
+		if (call->enabled) {
+			pr_warning("Enabled event during self test!\n");
+			WARN_ON_ONCE(1);
+			continue;
+		}
+
+		call->enabled = 1;
+		call->regfunc();
+
+		event_test_stuff();
+
+		call->unregfunc();
+		call->enabled = 0;
+
+		pr_cont("OK\n");
+	}
+
+	/* Now test at the sub system level */
+
+	pr_info("Running tests on trace event systems:\n");
+
+	list_for_each_entry(system, &event_subsystems, list) {
+
+		/* the ftrace system is special, skip it */
+		if (strcmp(system->name, "ftrace") == 0)
+			continue;
+
+		pr_info("Testing event system %s: ", system->name);
+
+		/* ftrace_set_clr_event can modify the name passed in. */
+		sysname = kstrdup(system->name, GFP_KERNEL);
+		if (WARN_ON(!sysname)) {
+			pr_warning("Can't allocate memory, giving up!\n");
+			return 0;
+		}
+		ret = ftrace_set_clr_event(sysname, 1);
+		kfree(sysname);
+		if (WARN_ON_ONCE(ret)) {
+			pr_warning("error enabling system %s\n",
+				   system->name);
+			continue;
+		}
+
+		event_test_stuff();
+
+		sysname = kstrdup(system->name, GFP_KERNEL);
+		if (WARN_ON(!sysname)) {
+			pr_warning("Can't allocate memory, giving up!\n");
+			return 0;
+		}
+		ret = ftrace_set_clr_event(sysname, 0);
+		kfree(sysname);
+
+		if (WARN_ON_ONCE(ret))
+			pr_warning("error disabling system %s\n",
+				   system->name);
+
+		pr_cont("OK\n");
+	}
+
+	/* Test with all events enabled */
+
+	pr_info("Running tests on all trace events:\n");
+	pr_info("Testing all events: ");
+
+	sysname = kmalloc(4, GFP_KERNEL);
+	if (WARN_ON(!sysname)) {
+		pr_warning("Can't allocate memory, giving up!\n");
+		return 0;
+	}
+	memcpy(sysname, "*:*", 4);
+	ret = ftrace_set_clr_event(sysname, 1);
+	if (WARN_ON_ONCE(ret)) {
+		kfree(sysname);
+		pr_warning("error enabling all events\n");
+		return 0;
+	}
+
+	event_test_stuff();
+
+	/* reset sysname */
+	memcpy(sysname, "*:*", 4);
+	ret = ftrace_set_clr_event(sysname, 0);
+	kfree(sysname);
+
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error disabling all events\n");
+		return 0;
+	}
+
+	pr_cont("OK\n");
+
+	return 0;
+}
+
+late_initcall(event_trace_self_tests);
+
+#endif
-- 
cgit v0.10.2


From 6b87a91f5417226c7fe62100b0e7217e7096b789 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Fri, 17 Apr 2009 15:55:08 +0300
Subject: ASoC: TWL4030: Fix for the constraint handling

The original implementation of the constraints were good against sane
applications.
If the opening sequence is:
stream1_open, stream1_hw_params, stream2_open, stream2_hw_params -> the
constraints are set correctly for stream2.

But if the sequence is:
stream1_open, stream2_open, stream2_hw_params, stream1_hw_params -> than stream2
would receive constraint rate = 0, sample_bits = 0, since the stream1 has not
yet called hw_params...

The command to trigger this event:
gst-launch-0.10 alsasrc device=hw:0 ! alsasink device=hw:0 sync=false

This patch does some 'black magic' in order to always set the correct
constraints and sets it only when it is needed for the other stream.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 921b205..a1b76d7 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -125,6 +125,11 @@ struct twl4030_priv {
 
 	struct snd_pcm_substream *master_substream;
 	struct snd_pcm_substream *slave_substream;
+
+	unsigned int configured;
+	unsigned int rate;
+	unsigned int sample_bits;
+	unsigned int channels;
 };
 
 /*
@@ -1220,6 +1225,36 @@ static int twl4030_set_bias_level(struct snd_soc_codec *codec,
 	return 0;
 }
 
+static void twl4030_constraints(struct twl4030_priv *twl4030,
+				struct snd_pcm_substream *mst_substream)
+{
+	struct snd_pcm_substream *slv_substream;
+
+	/* Pick the stream, which need to be constrained */
+	if (mst_substream == twl4030->master_substream)
+		slv_substream = twl4030->slave_substream;
+	else if (mst_substream == twl4030->slave_substream)
+		slv_substream = twl4030->master_substream;
+	else /* This should not happen.. */
+		return;
+
+	/* Set the constraints according to the already configured stream */
+	snd_pcm_hw_constraint_minmax(slv_substream->runtime,
+				SNDRV_PCM_HW_PARAM_RATE,
+				twl4030->rate,
+				twl4030->rate);
+
+	snd_pcm_hw_constraint_minmax(slv_substream->runtime,
+				SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
+				twl4030->sample_bits,
+				twl4030->sample_bits);
+
+	snd_pcm_hw_constraint_minmax(slv_substream->runtime,
+				SNDRV_PCM_HW_PARAM_CHANNELS,
+				twl4030->channels,
+				twl4030->channels);
+}
+
 static int twl4030_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
@@ -1228,26 +1263,16 @@ static int twl4030_startup(struct snd_pcm_substream *substream,
 	struct snd_soc_codec *codec = socdev->card->codec;
 	struct twl4030_priv *twl4030 = codec->private_data;
 
-	/* If we already have a playback or capture going then constrain
-	 * this substream to match it.
-	 */
 	if (twl4030->master_substream) {
-		struct snd_pcm_runtime *master_runtime;
-		master_runtime = twl4030->master_substream->runtime;
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_RATE,
-					     master_runtime->rate,
-					     master_runtime->rate);
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
-					     master_runtime->sample_bits,
-					     master_runtime->sample_bits);
-
 		twl4030->slave_substream = substream;
-	} else
+		/* The DAI has one configuration for playback and capture, so
+		 * if the DAI has been already configured then constrain this
+		 * substream to match it. */
+		if (twl4030->configured)
+			twl4030_constraints(twl4030, twl4030->master_substream);
+	} else {
 		twl4030->master_substream = substream;
+	}
 
 	return 0;
 }
@@ -1264,6 +1289,13 @@ static void twl4030_shutdown(struct snd_pcm_substream *substream,
 		twl4030->master_substream = twl4030->slave_substream;
 
 	twl4030->slave_substream = NULL;
+
+	/* If all streams are closed, or the remaining stream has not yet
+	 * been configured than set the DAI as not configured. */
+	if (!twl4030->master_substream)
+		twl4030->configured = 0;
+	 else if (!twl4030->master_substream->runtime->channels)
+		twl4030->configured = 0;
 }
 
 static int twl4030_hw_params(struct snd_pcm_substream *substream,
@@ -1276,8 +1308,8 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream,
 	struct twl4030_priv *twl4030 = codec->private_data;
 	u8 mode, old_mode, format, old_format;
 
-	if (substream == twl4030->slave_substream)
-		/* Ignoring hw_params for slave substream */
+	if (twl4030->configured)
+		/* Ignoring hw_params for already configured DAI */
 		return 0;
 
 	/* bit rate */
@@ -1357,6 +1389,21 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream,
 		/* set CODECPDZ afterwards */
 		twl4030_codec_enable(codec, 1);
 	}
+
+	/* Store the important parameters for the DAI configuration and set
+	 * the DAI as configured */
+	twl4030->configured = 1;
+	twl4030->rate = params_rate(params);
+	twl4030->sample_bits = hw_param_interval(params,
+					SNDRV_PCM_HW_PARAM_SAMPLE_BITS)->min;
+	twl4030->channels = params_channels(params);
+
+	/* If both playback and capture streams are open, and one of them
+	 * is setting the hw parameters right now (since we are here), set
+	 * constraints to the other stream to match the current one. */
+	if (twl4030->slave_substream)
+		twl4030_constraints(twl4030, substream);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From d1b182a8d49ed6416325b4e0a1cb0f17cd4e702a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 15 Apr 2009 16:53:47 -0400
Subject: tracing/events/ring-buffer: expose format of ring buffer headers to
 users

Currently, every thing needed to read the binary output from the
ring buffers is available, with the exception of the way the ring
buffers handles itself internally.

This patch creates two special files in the debugfs/tracing/events
directory:

 # cat /debug/tracing/events/header_page
        field: u64 timestamp;   offset:0;       size:8;
        field: local_t commit;  offset:8;       size:8;
        field: char data;       offset:16;      size:4080;

 # cat /debug/tracing/events/header_event
        type        :    2 bits
        len         :    3 bits
        time_delta  :   27 bits
        array       :   32 bits

        padding     : type == 0
        time_extend : type == 1
        data        : type == 3

This is to allow a userspace app to see if the ring buffer format changes
or not.

[ Impact: allow userspace apps to know of ringbuffer format changes ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index f0aa486..fac8f1a 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -166,6 +166,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
 			  size_t len, int cpu, int full);
 
+struct trace_seq;
+
+int ring_buffer_print_entry_header(struct trace_seq *s);
+int ring_buffer_print_page_header(struct trace_seq *s);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f935bd5..84a6055 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,28 @@
 #include "trace.h"
 
 /*
+ * The ring buffer header is special. We must manually up keep it.
+ */
+int ring_buffer_print_entry_header(struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_seq_printf(s, "\ttype        :    2 bits\n");
+	ret = trace_seq_printf(s, "\tlen         :    3 bits\n");
+	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
+	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
+	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
+			       RINGBUF_TYPE_PADDING);
+	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
+			       RINGBUF_TYPE_TIME_EXTEND);
+	ret = trace_seq_printf(s, "\tdata        : type == %d\n",
+			       RINGBUF_TYPE_DATA);
+
+	return ret;
+}
+
+/*
  * The ring buffer is made up of a list of pages. A separate list of pages is
  * allocated for each CPU. A writer may only write to a buffer that is
  * associated with the CPU it is currently executing on.  A reader may read
@@ -340,6 +362,28 @@ static inline int test_time_stamp(u64 delta)
 
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
 
+int ring_buffer_print_page_header(struct trace_seq *s)
+{
+	struct buffer_data_page field;
+	int ret;
+
+	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
+			       "offset:0;\tsize:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp));
+
+	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), commit),
+			       (unsigned int)sizeof(field.commit));
+
+	ret = trace_seq_printf(s, "\tfield: char data;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), data),
+			       (unsigned int)BUF_PAGE_SIZE);
+
+	return ret;
+}
+
 /*
  * head_page == tail_page && head == tail then buffer is empty.
  */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f81d6ee..7163a2b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -610,6 +610,30 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
+static ssize_t
+show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	int (*func)(struct trace_seq *s) = filp->private_data;
+	struct trace_seq *s;
+	int r;
+
+	if (*ppos)
+		return 0;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	trace_seq_init(s);
+
+	func(s);
+	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return r;
+}
+
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
 	.next = t_next,
@@ -667,6 +691,11 @@ static const struct file_operations ftrace_subsystem_filter_fops = {
 	.write = subsystem_filter_write,
 };
 
+static const struct file_operations ftrace_show_header_fops = {
+	.open = tracing_open_generic,
+	.read = show_header,
+};
+
 static struct dentry *event_trace_events_dir(void)
 {
 	static struct dentry *d_tracer;
@@ -909,6 +938,15 @@ static __init int event_trace_init(void)
 	if (!d_events)
 		return 0;
 
+	/* ring buffer internal formats */
+	trace_create_file("header_page", 0444, d_events,
+			  ring_buffer_print_page_header,
+			  &ftrace_show_header_fops);
+
+	trace_create_file("header_event", 0444, d_events,
+			  ring_buffer_print_entry_header,
+			  &ftrace_show_header_fops);
+
 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
-- 
cgit v0.10.2


From 69abe6a5d18a9394baa325bab8f57748b037c517 Mon Sep 17 00:00:00 2001
From: Avadh Patel <avadh4all@gmail.com>
Date: Fri, 10 Apr 2009 16:04:48 -0400
Subject: tracing: add saved_cmdlines file to show cached task comms

Export the cached task comms to userspace. This allows user apps to translate
the pids from a trace into their respective task command lines.

[ Impact: let userspace apps reading binary buffer know comm's of pids ]

Signed-off-by: Avadh Patel <avadh4all@gmail.com>
[ added error checking and use of buf pointer to index file_buf ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2d69b26..031c46f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2422,6 +2422,56 @@ static const struct file_operations tracing_readme_fops = {
 };
 
 static ssize_t
+tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	char *buf_comm;
+	char *file_buf;
+	char *buf;
+	int len = 0;
+	int pid;
+	int i;
+
+	file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
+	if (!file_buf)
+		return -ENOMEM;
+
+	buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
+	if (!buf_comm) {
+		kfree(file_buf);
+		return -ENOMEM;
+	}
+
+	buf = file_buf;
+
+	for (i = 0; i < SAVED_CMDLINES; i++) {
+		int r;
+
+		pid = map_cmdline_to_pid[i];
+		if (pid == -1 || pid == NO_CMDLINE_MAP)
+			continue;
+
+		trace_find_cmdline(pid, buf_comm);
+		r = sprintf(buf, "%d %s\n", pid, buf_comm);
+		buf += r;
+		len += r;
+	}
+
+	len = simple_read_from_buffer(ubuf, cnt, ppos,
+				      file_buf, len);
+
+	kfree(file_buf);
+	kfree(buf_comm);
+
+	return len;
+}
+
+static const struct file_operations tracing_saved_cmdlines_fops = {
+    .open       = tracing_open_generic,
+    .read       = tracing_saved_cmdlines_read,
+};
+
+static ssize_t
 tracing_ctrl_read(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
@@ -3973,6 +4023,9 @@ static __init int tracer_init_debugfs(void)
 	trace_create_file("trace_marker", 0220, d_tracer,
 			NULL, &tracing_mark_fops);
 
+	trace_create_file("saved_cmdlines", 0444, d_tracer,
+			NULL, &tracing_saved_cmdlines_fops);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
-- 
cgit v0.10.2


From 9ea21c1ecdb35ecdcac5fd9d95f62a1f6a7ffec0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 16 Apr 2009 12:15:44 -0400
Subject: tracing/events: perform function tracing in event selftests

We can find some bugs in the trace events if we stress the writes as well.
The function tracer is a good way to stress the events.

[ Impact: extend scope of event tracer self-tests ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090416161746.604786131@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7163a2b..1137f95 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1017,7 +1017,7 @@ static __init void event_test_stuff(void)
  * For every trace event defined, we will test each trace point separately,
  * and then by groups, and finally all trace points.
  */
-static __init int event_trace_self_tests(void)
+static __init void event_trace_self_tests(void)
 {
 	struct ftrace_event_call *call;
 	struct event_subsystem *system;
@@ -1071,7 +1071,7 @@ static __init int event_trace_self_tests(void)
 		sysname = kstrdup(system->name, GFP_KERNEL);
 		if (WARN_ON(!sysname)) {
 			pr_warning("Can't allocate memory, giving up!\n");
-			return 0;
+			return;
 		}
 		ret = ftrace_set_clr_event(sysname, 1);
 		kfree(sysname);
@@ -1086,7 +1086,7 @@ static __init int event_trace_self_tests(void)
 		sysname = kstrdup(system->name, GFP_KERNEL);
 		if (WARN_ON(!sysname)) {
 			pr_warning("Can't allocate memory, giving up!\n");
-			return 0;
+			return;
 		}
 		ret = ftrace_set_clr_event(sysname, 0);
 		kfree(sysname);
@@ -1106,14 +1106,14 @@ static __init int event_trace_self_tests(void)
 	sysname = kmalloc(4, GFP_KERNEL);
 	if (WARN_ON(!sysname)) {
 		pr_warning("Can't allocate memory, giving up!\n");
-		return 0;
+		return;
 	}
 	memcpy(sysname, "*:*", 4);
 	ret = ftrace_set_clr_event(sysname, 1);
 	if (WARN_ON_ONCE(ret)) {
 		kfree(sysname);
 		pr_warning("error enabling all events\n");
-		return 0;
+		return;
 	}
 
 	event_test_stuff();
@@ -1125,10 +1125,76 @@ static __init int event_trace_self_tests(void)
 
 	if (WARN_ON_ONCE(ret)) {
 		pr_warning("error disabling all events\n");
-		return 0;
+		return;
 	}
 
 	pr_cont("OK\n");
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+static DEFINE_PER_CPU(atomic_t, test_event_disable);
+
+static void
+function_test_events_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ring_buffer_event *event;
+	struct ftrace_entry *entry;
+	unsigned long flags;
+	long disabled;
+	int resched;
+	int cpu;
+	int pc;
+
+	pc = preempt_count();
+	resched = ftrace_preempt_disable();
+	cpu = raw_smp_processor_id();
+	disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+
+	if (disabled != 1)
+		goto out;
+
+	local_save_flags(flags);
+
+	event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
+						  flags, pc);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	entry->ip			= ip;
+	entry->parent_ip		= parent_ip;
+
+	trace_current_buffer_unlock_commit(event, flags, pc);
+
+ out:
+	atomic_dec(&per_cpu(test_event_disable, cpu));
+	ftrace_preempt_enable(resched);
+}
+
+static struct ftrace_ops trace_ops __initdata  =
+{
+	.func = function_test_events_call,
+};
+
+static __init void event_trace_self_test_with_function(void)
+{
+	register_ftrace_function(&trace_ops);
+	pr_info("Running tests again, along with the function tracer\n");
+	event_trace_self_tests();
+	unregister_ftrace_function(&trace_ops);
+}
+#else
+static __init void event_trace_self_test_with_function(void)
+{
+}
+#endif
+
+static __init int event_trace_self_tests_init(void)
+{
+
+	event_trace_self_tests();
+
+	event_trace_self_test_with_function();
 
 	return 0;
 }
-- 
cgit v0.10.2


From 76aa81118ddfbb3dc31533030cf3ec329dd067a6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 16 Apr 2009 23:35:39 -0700
Subject: tracing: avoid warnings from zero-arg tracepoints

Tracepoints with no arguments can issue two warnings:

	"field" defined by not used
	"ret" is uninitialized in this function

Mark field as being OK to leave unused, and initialize ret.

[ Impact: fix false positive compiler warnings. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: mathieu.desnoyers@polymtl.ca
LKML-Reference: <1239950139-1119-5-git-send-email-jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 60c5323..39a3351 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -160,8 +160,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 static int								\
 ftrace_format_##call(struct trace_seq *s)				\
 {									\
-	struct ftrace_raw_##call field;					\
-	int ret;							\
+	struct ftrace_raw_##call field __attribute__((unused));		\
+	int ret = 0;							\
 									\
 	tstruct;							\
 									\
-- 
cgit v0.10.2


From 339ae5d3c3fc2025e3657637921495fd600027c7 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 17 Apr 2009 10:34:30 +0800
Subject: tracing: fix file mode of trace and README

trace is read-write and README is read-only.

[ Impact: fix /debug/tracing/ file permissions. ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <49E7EAB6.4070605@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 031c46f..f681f64 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4002,7 +4002,7 @@ static __init int tracer_init_debugfs(void)
 	trace_create_file("available_tracers", 0444, d_tracer,
 			&global_trace, &show_traces_fops);
 
-	trace_create_file("current_tracer", 0444, d_tracer,
+	trace_create_file("current_tracer", 0644, d_tracer,
 			&global_trace, &set_tracer_fops);
 
 	trace_create_file("tracing_max_latency", 0644, d_tracer,
@@ -4011,7 +4011,7 @@ static __init int tracer_init_debugfs(void)
 	trace_create_file("tracing_thresh", 0644, d_tracer,
 			&tracing_thresh, &tracing_max_lat_fops);
 
-	trace_create_file("README", 0644, d_tracer,
+	trace_create_file("README", 0444, d_tracer,
 			NULL, &tracing_readme_fops);
 
 	trace_create_file("trace_pipe", 0444, d_tracer,
-- 
cgit v0.10.2


From 9b94b3a19b13e094c10f65f24bc358f6ffe4eacd Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 17 Apr 2009 12:07:46 +0200
Subject: x86: fixup numa_node information for AMD CPU northbridge functions

Currently the numa_node attribute for these PCI devices is 0 (it
corresponds to the numa_node for PCI bus 0). This is not a big issue
but incorrect.

This inconsistency can be fixed by reading the node number from CPU
NB function 0.

[ Impact: fill in dev->numa_node information, to optimize DMA allocations ]

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: jbarnes@virtuousgeek.org
LKML-Reference: <20090417100746.GG16198@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e95022e..94ad0c0 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -493,5 +493,42 @@ void force_hpet_resume(void)
 		break;
 	}
 }
+#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
+/* Set correct numa_node information for AMD NB functions */
+static void __init quirk_amd_nb_node(struct pci_dev *dev)
+{
+	struct pci_dev *nb_ht;
+	unsigned int devfn;
+	u32 val;
+
+	devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+	nb_ht = pci_get_slot(dev->bus, devfn);
+	if (!nb_ht)
+		return;
+
+	pci_read_config_dword(nb_ht, 0x60, &val);
+	set_dev_node(&dev->dev, val & 7);
+	pci_dev_put(dev);
+}
 
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
+			quirk_amd_nb_node);
 #endif
-- 
cgit v0.10.2


From 46de405f25f1d9fa73b657ffbb752aa0cc87a91d Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 17 Apr 2009 10:53:43 +0800
Subject: tracing: Remove include/trace/kmem_event_types.h

kmem_event_types.h is no longer necessary since tracepoint definitions
are put into include/trace/events/kmem.h

[ Impact: remove now-unused file. ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49E7EF37.2080205@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h
deleted file mode 100644
index 4ff420f..0000000
--- a/include/trace/kmem_event_types.h
+++ /dev/null
@@ -1,193 +0,0 @@
-
-/* use <trace/kmem.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kmem
-
-TRACE_EVENT(kmalloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmem_cache_alloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmalloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kmem_cache_alloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kfree,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-
-TRACE_EVENT(kmem_cache_free,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-
-#undef TRACE_SYSTEM
-- 
cgit v0.10.2


From ac1adc55fc71c7515caa2eb0e63e49b3d1c6a47c Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Fri, 17 Apr 2009 00:27:08 -0500
Subject: tracing/filters: add filter_mutex to protect filter predicates

This patch adds a filter_mutex to prevent the filter predicates from
being accessed concurrently by various external functions.

It's based on a previous patch by Li Zefan:
        "[PATCH 7/7] tracing/filters: make filter preds RCU safe"

v2 changes:

- fixed wrong value returned in a add_subsystem_pred() failure case
  noticed by Li Zefan.

[ Impact: fix trace filter corruption/crashes on parallel access ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: paulmck@linux.vnet.ibm.com
LKML-Reference: <1239946028.6639.13.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8817c18..247948e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -757,13 +757,15 @@ struct filter_pred {
 };
 
 extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct filter_pred **preds, int n_preds,
+extern void filter_print_preds(struct ftrace_event_call *call,
 			       struct trace_seq *s);
 extern int filter_parse(char **pbuf, struct filter_pred *pred);
 extern int filter_add_pred(struct ftrace_event_call *call,
 			   struct filter_pred *pred);
 extern void filter_disable_preds(struct ftrace_event_call *call);
 extern void filter_free_subsystem_preds(struct event_subsystem *system);
+extern void filter_print_subsystem_preds(struct event_subsystem *system,
+					 struct trace_seq *s);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
 				     struct filter_pred *pred);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1137f95..64f9d6d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -488,7 +488,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_preds(call->preds, call->n_preds, s);
+	filter_print_preds(call, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -558,7 +558,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_preds(system->preds, system->n_preds, s);
+	filter_print_subsystem_preds(system, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f8e5eab..e0fcfd2 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,10 +22,13 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/mutex.h>
 
 #include "trace.h"
 #include "trace_output.h"
 
+static DEFINE_MUTEX(filter_mutex);
+
 static int filter_pred_64(struct filter_pred *pred, void *event)
 {
 	u64 *addr = (u64 *)(event + pred->offset);
@@ -112,8 +115,8 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec)
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-void filter_print_preds(struct filter_pred **preds, int n_preds,
-			struct trace_seq *s)
+static void __filter_print_preds(struct filter_pred **preds, int n_preds,
+				 struct trace_seq *s)
 {
 	char *field_name;
 	struct filter_pred *pred;
@@ -138,6 +141,21 @@ void filter_print_preds(struct filter_pred **preds, int n_preds,
 	}
 }
 
+void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s)
+{
+	mutex_lock(&filter_mutex);
+	__filter_print_preds(call->preds, call->n_preds, s);
+	mutex_unlock(&filter_mutex);
+}
+
+void filter_print_subsystem_preds(struct event_subsystem *system,
+				  struct trace_seq *s)
+{
+	mutex_lock(&filter_mutex);
+	__filter_print_preds(system->preds, system->n_preds, s);
+	mutex_unlock(&filter_mutex);
+}
+
 static struct ftrace_event_field *
 find_event_field(struct ftrace_event_call *call, char *name)
 {
@@ -180,7 +198,7 @@ static int filter_set_pred(struct filter_pred *dest,
 	return 0;
 }
 
-void filter_disable_preds(struct ftrace_event_call *call)
+static void __filter_disable_preds(struct ftrace_event_call *call)
 {
 	int i;
 
@@ -190,6 +208,13 @@ void filter_disable_preds(struct ftrace_event_call *call)
 		call->preds[i]->fn = filter_pred_none;
 }
 
+void filter_disable_preds(struct ftrace_event_call *call)
+{
+	mutex_lock(&filter_mutex);
+	__filter_disable_preds(call);
+	mutex_unlock(&filter_mutex);
+}
+
 int init_preds(struct ftrace_event_call *call)
 {
 	struct filter_pred *pred;
@@ -223,7 +248,7 @@ oom:
 }
 EXPORT_SYMBOL_GPL(init_preds);
 
-void filter_free_subsystem_preds(struct event_subsystem *system)
+static void __filter_free_subsystem_preds(struct event_subsystem *system)
 {
 	struct ftrace_event_call *call;
 	int i;
@@ -241,18 +266,25 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
 			continue;
 
 		if (!strcmp(call->system, system->name))
-			filter_disable_preds(call);
+			__filter_disable_preds(call);
 	}
 }
 
-static int __filter_add_pred(struct ftrace_event_call *call,
-			     struct filter_pred *pred,
-			     filter_pred_fn_t fn)
+void filter_free_subsystem_preds(struct event_subsystem *system)
+{
+	mutex_lock(&filter_mutex);
+	__filter_free_subsystem_preds(system);
+	mutex_unlock(&filter_mutex);
+}
+
+static int filter_add_pred_fn(struct ftrace_event_call *call,
+			      struct filter_pred *pred,
+			      filter_pred_fn_t fn)
 {
 	int idx, err;
 
 	if (call->n_preds && !pred->compound)
-		filter_disable_preds(call);
+		__filter_disable_preds(call);
 
 	if (call->n_preds == MAX_FILTER_PRED)
 		return -ENOSPC;
@@ -276,7 +308,8 @@ static int is_string_field(const char *type)
 	return 0;
 }
 
-int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
+static int __filter_add_pred(struct ftrace_event_call *call,
+			     struct filter_pred *pred)
 {
 	struct ftrace_event_field *field;
 	filter_pred_fn_t fn;
@@ -293,7 +326,7 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 			return -EINVAL;
 		fn = filter_pred_string;
 		pred->str_len = field->size;
-		return __filter_add_pred(call, pred, fn);
+		return filter_add_pred_fn(call, pred, fn);
 	} else {
 		if (pred->str_len)
 			return -EINVAL;
@@ -316,7 +349,18 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 		return -EINVAL;
 	}
 
-	return __filter_add_pred(call, pred, fn);
+	return filter_add_pred_fn(call, pred, fn);
+}
+
+int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
+{
+	int err;
+
+	mutex_lock(&filter_mutex);
+	err = __filter_add_pred(call, pred);
+	mutex_unlock(&filter_mutex);
+
+	return err;
 }
 
 int filter_add_subsystem_pred(struct event_subsystem *system,
@@ -324,20 +368,27 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 {
 	struct ftrace_event_call *call;
 
+	mutex_lock(&filter_mutex);
+
 	if (system->n_preds && !pred->compound)
-		filter_free_subsystem_preds(system);
+		__filter_free_subsystem_preds(system);
 
 	if (!system->n_preds) {
 		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
 					GFP_KERNEL);
-		if (!system->preds)
+		if (!system->preds) {
+			mutex_unlock(&filter_mutex);
 			return -ENOMEM;
+		}
 	}
 
-	if (system->n_preds == MAX_FILTER_PRED)
+	if (system->n_preds == MAX_FILTER_PRED) {
+		mutex_unlock(&filter_mutex);
 		return -ENOSPC;
+	}
 
 	system->preds[system->n_preds] = pred;
+	system->n_preds++;
 
 	list_for_each_entry(call, &ftrace_events, list) {
 		int err;
@@ -348,17 +399,16 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 		if (strcmp(call->system, system->name))
 			continue;
 
-		if (!find_event_field(call, pred->field_name))
-			continue;
-
-		err = filter_add_pred(call, pred);
+		err = __filter_add_pred(call, pred);
 		if (err == -ENOMEM) {
 			system->preds[system->n_preds] = NULL;
+			system->n_preds--;
+			mutex_unlock(&filter_mutex);
 			return err;
 		}
 	}
 
-	system->n_preds++;
+	mutex_unlock(&filter_mutex);
 
 	return 0;
 }
-- 
cgit v0.10.2


From b0afdc126d0515e76890f0a5f26b28501cfa298e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 17 Apr 2009 13:02:22 -0400
Subject: tracing/events: enable code with EVENT_TRACING not EVENT_TRACER

The CONFIG_EVENT_TRACER is the way to turn on event tracing when no
other tracing has been configured. All code to get enabled should
depend on CONFIG_EVENT_TRACING. That is what is enabled when TRACING
(or CONFIG_EVENT_TRACER) is selected.

This patch enables the include/trace/ftrace.h file when
CONFIG_EVENT_TRACING is enabled.

[ Impact: fix warning in event tracer selftest ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 1886941..7f1f23d 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -56,7 +56,7 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #include <trace/ftrace.h>
 #endif
 
-- 
cgit v0.10.2


From 12acd473d45cf2e40de3782cb2de712e5cd4d715 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 17 Apr 2009 16:01:56 -0400
Subject: tracing: add EXPORT_SYMBOL_GPL for trace commits

Not all the necessary symbols were exported to allow for tracing
by modules. This patch adds them in.

[ Impact: allow modules to commit data to the ring buffer ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f681f64..183d788 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -894,18 +894,20 @@ void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 {
 	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
 }
+EXPORT_SYMBOL(trace_current_buffer_unlock_commit);
 
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
 	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
 }
+EXPORT_SYMBOL(trace_nowake_buffer_unlock_commit);
 
 void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
 {
 	ring_buffer_discard_commit(global_trace.buffer, event);
 }
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
+EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
 void
 trace_function(struct trace_array *tr,
-- 
cgit v0.10.2


From 261842b7c9099f56de2eb969c8ad65402d68e00e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 16 Apr 2009 21:41:52 -0400
Subject: tracing: add same level recursion detection

The tracing infrastructure allows for recursion. That is, an interrupt
may interrupt the act of tracing an event, and that interrupt may very well
perform its own trace. This is a recursive trace, and is fine to do.

The problem arises when there is a bug, and the utility doing the trace
calls something that recurses back into the tracer. This recursion is not
caused by an external event like an interrupt, but by code that is not
expected to recurse. The result could be a lockup.

This patch adds a bitmask to the task structure that keeps track
of the trace recursion. To find the interrupt depth, the following
algorithm is used:

  level = hardirq_count() + softirq_count() + in_nmi;

Here, level will be the depth of interrutps and softirqs, and even handles
the nmi. Then the corresponding bit is set in the recursion bitmask.
If the bit was already set, we know we had a recursion at the same level
and we warn about it and fail the writing to the buffer.

After the data has been committed to the buffer, we clear the bit.
No atomics are needed. The only races are with interrupts and they reset
the bitmask before returning anywy.

[ Impact: detect same irq level trace recursion ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 97c83e1..39b95c5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -488,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
 
 extern int ftrace_dump_on_oops;
 
+#ifdef CONFIG_PREEMPT
+#define INIT_TRACE_RECURSION		.trace_recursion = 0,
+#endif
+
 #endif /* CONFIG_TRACING */
 
+#ifndef INIT_TRACE_RECURSION
+#define INIT_TRACE_RECURSION
+#endif
 
 #ifdef CONFIG_HW_BRANCH_TRACER
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfb933..6fc2185 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -187,6 +187,7 @@ extern struct cred init_cred;
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
+	INIT_TRACE_RECURSION						\
 }
 
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..7ede5e4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1428,7 +1428,9 @@ struct task_struct {
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-#endif
+	/* bitmask of trace recursion */
+	unsigned long trace_recursion;
+#endif /* CONFIG_TRACING */
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 84a6055..b421b0e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1481,6 +1481,40 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
+static int trace_irq_level(void)
+{
+	return hardirq_count() + softirq_count() + in_nmi();
+}
+
+static int trace_recursive_lock(void)
+{
+	int level;
+
+	level = trace_irq_level();
+
+	if (unlikely(current->trace_recursion & (1 << level))) {
+		/* Disable all tracing before we do anything else */
+		tracing_off_permanent();
+		WARN_ON_ONCE(1);
+		return -1;
+	}
+
+	current->trace_recursion |= 1 << level;
+
+	return 0;
+}
+
+static void trace_recursive_unlock(void)
+{
+	int level;
+
+	level = trace_irq_level();
+
+	WARN_ON_ONCE(!current->trace_recursion & (1 << level));
+
+	current->trace_recursion &= ~(1 << level);
+}
+
 static DEFINE_PER_CPU(int, rb_need_resched);
 
 /**
@@ -1514,6 +1548,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	/* If we are tracing schedule, we don't want to recurse */
 	resched = ftrace_preempt_disable();
 
+	if (trace_recursive_lock())
+		goto out_nocheck;
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1543,6 +1580,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	return event;
 
  out:
+	trace_recursive_unlock();
+
+ out_nocheck:
 	ftrace_preempt_enable(resched);
 	return NULL;
 }
@@ -1581,6 +1621,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 	rb_commit(cpu_buffer, event);
 
+	trace_recursive_unlock();
+
 	/*
 	 * Only the last preempt count needs to restore preemption.
 	 */
-- 
cgit v0.10.2


From 3189cdb31622f4e40688ce5a6fc5d940b42bc805 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 17 Apr 2009 16:13:55 -0400
Subject: tracing: protect trace_printk from recursion

trace_printk can be called from any context, including NMIs.
If this happens, then we must test for for recursion before
grabbing any spinlocks.

This patch prevents trace_printk from being called recursively.

[ Impact: prevent hard lockup in lockdep event tracer ]

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 183d788..b9a3adc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1259,6 +1259,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	struct trace_array_cpu *data;
 	struct bprint_entry *entry;
 	unsigned long flags;
+	int disable;
 	int resched;
 	int cpu, len = 0, size, pc;
 
@@ -1273,7 +1274,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(atomic_read(&data->disabled)))
+	disable = atomic_inc_return(&data->disabled);
+	if (unlikely(disable != 1))
 		goto out;
 
 	/* Lockdep uses trace_printk for lock tracing */
@@ -1301,6 +1303,7 @@ out_unlock:
 	local_irq_restore(flags);
 
 out:
+	atomic_dec_return(&data->disabled);
 	ftrace_preempt_enable(resched);
 	unpause_graph_tracing();
 
@@ -1320,6 +1323,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	int cpu, len = 0, size, pc;
 	struct print_entry *entry;
 	unsigned long irq_flags;
+	int disable;
 
 	if (tracing_disabled || tracing_selftest_running)
 		return 0;
@@ -1329,7 +1333,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(atomic_read(&data->disabled)))
+	disable = atomic_inc_return(&data->disabled);
+	if (unlikely(disable != 1))
 		goto out;
 
 	pause_graph_tracing();
@@ -1357,6 +1362,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	raw_local_irq_restore(irq_flags);
 	unpause_graph_tracing();
  out:
+	atomic_dec_return(&data->disabled);
 	preempt_enable_notrace();
 
 	return len;
-- 
cgit v0.10.2


From 8e668b5b3455207e4540fc7ccab9ecf70142f288 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 17 Apr 2009 17:17:55 -0400
Subject: tracing: remove format attribute of inline function

Due to a cut and paste error, I added the gcc attribute for printf
format to the static inline stub of trace_seq_printf.

This will cause a compile failure.

[ Impact: fix compiler error when CONFIG_TRACING is off ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: =?ISO-8859-15?Q?Fr=E9d=E9ric_Weisbecker?= <fweisbec@gmail.com>
LKML-Reference: <alpine.DEB.2.00.0904171717080.1016@gandalf.stny.rr.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 15ca2c7..37db9bd 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -42,7 +42,6 @@ extern int trace_seq_path(struct trace_seq *s, struct path *path);
 
 #else /* CONFIG_TRACING */
 static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)))
 {
 	return 0;
 }
-- 
cgit v0.10.2


From 6f41469c627fe118121dfce2a8134ad24da3df28 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: block: clear req->errors on bio completion only for fs requests

Impact: subtle behavior change

For fs requests, rq is only carrier of bios and rq error status as a
whole doesn't mean much.  This is the reason why rq->errors is being
cleared on each partial completion of a request as on each partial
completion the error status is transferred to the respective bios.

For pc requests, rq->errors is used to carry error status to the
issuer and thus __end_that_request_first() doesn't clear it on such
cases.

The condition was fine till now as only fs and pc requests have used
bio and thus the bio completion path.  However, future changes will
unify data accesses to bio and all non fs users care about rq error
status.  Clear rq->errors on bio completion only for fs requests.

In general, the implicit clearing is a bit too subtle especially as
the meaning of rq->errors is completely dependent on low level
drivers.  Unifying / cleaning up rq->errors usage and letting llds
manage it would be better.  TODO comment added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jens Axboe <axboe@kernel.dk>

diff --git a/block/blk-core.c b/block/blk-core.c
index 07ab754..cce7a88 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1739,10 +1739,14 @@ static int __end_that_request_first(struct request *req, int error,
 	trace_block_rq_complete(req->q, req);
 
 	/*
-	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
-	 * sense key with us all the way through
+	 * For fs requests, rq is just carrier of independent bio's
+	 * and each partial completion should be handled separately.
+	 * Reset per-request error on each partial completion.
+	 *
+	 * TODO: tj: This is too subtle.  It would be better to let
+	 * low level drivers do what they see fit.
 	 */
-	if (!blk_pc_request(req))
+	if (blk_fs_request(req))
 		req->errors = 0;
 
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
-- 
cgit v0.10.2


From 1e75540ec5202cae63cd238c86bd880e3d496546 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide-tape: remove back-to-back REQUEST_SENSE detection

Impact: fix an oops which always triggers

ide_tape_issue_pc() assumed drive->pc isn't NULL on invocation when
checking for back-to-back request sense issues but drive->pc can be
NULL and even when it's not NULL, it's not safe to dereference it once
the previous command is complete because pc could have been freed or
was on stack.  Kill back-to-back REQUEST_SENSE detection.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index cb942a9..3a53e08 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -614,12 +614,6 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 {
 	idetape_tape_t *tape = drive->driver_data;
 
-	if (drive->pc->c[0] == REQUEST_SENSE &&
-	    pc->c[0] == REQUEST_SENSE) {
-		printk(KERN_ERR "ide-tape: possible ide-tape.c bug - "
-			"Two request sense in serial were issued\n");
-	}
-
 	if (drive->failed_pc == NULL && pc->c[0] != REQUEST_SENSE)
 		drive->failed_pc = pc;
 
-- 
cgit v0.10.2


From 853280a4dc8e3cc97ff10c1c02234d96078f437b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide: use blk_run_queue() instead of blk_start_queueing()

blk_start_queueing() is being phased out in favor of
[__]blk_run_queue().  Switch.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 310d03f..a914023 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 			start_queue = 1;
 		spin_unlock_irq(&hwif->lock);
 
-		if (start_queue) {
-			spin_lock_irq(q->queue_lock);
-			blk_start_queueing(q);
-			spin_unlock_irq(q->queue_lock);
-		}
+		if (start_queue)
+			blk_run_queue(q);
 		return;
 	}
 	spin_unlock_irq(&hwif->lock);
-- 
cgit v0.10.2


From 55f3f399422a4a3f6cb84ea4096dfaddf8817399 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide: don't set REQ_SOFTBARRIER

ide doesn't have to worry about REQ_SOFTBARRIER.  Don't set it.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c..c243880 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 	cmd->protocol = ATA_PROT_NODATA;
 
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->special = cmd;
 }
 
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index c1c25eb..5991b23 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive)
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_len = 1;
 	rq->cmd[0] = REQ_DRIVE_RESET;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	if (blk_execute_rq(drive->queue, NULL, rq, 1))
 		ret = rq->errors;
 	blk_put_request(rq);
-- 
cgit v0.10.2


From 46a802e852c2106d755f697819ea80cd3ffdc222 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide kill unused ide_cmd->special

Impact: removal of unused field

No one uses ide_cmd->special anymore.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/include/linux/ide.h b/include/linux/ide.h
index ff65fff..846a1e1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -324,7 +324,6 @@ struct ide_cmd {
 	unsigned int		cursg_ofs;
 
 	struct request		*rq;		/* copy of request */
-	void			*special;	/* valid_t generally */
 };
 
 /* ATAPI packet command flags */
-- 
cgit v0.10.2


From 1873b90cdea038715ec7140fccc2116fb930ffb5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide-cd: clear sense buffer before issuing request sense

Impact: code simplification

ide_cd_request_sense_fixup() clears the tail of the sense buffer if
the device didn't completely fill it.  This patch makes
cdrom_queue_request_sense() clear the sense buffer before issuing the
command instead of clearing it afterwards.  This simplifies code and
eases future changes.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3aec19d..509f129 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -217,6 +217,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	if (sense == NULL)
 		sense = &info->sense_data;
 
+	memset(sense, 0, 18);
+
 	/* stuff the sense request in front of our current request */
 	blk_rq_init(NULL, rq);
 	rq->cmd_type = REQ_TYPE_ATA_PC;
@@ -504,14 +506,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 	 * and some drives don't send them.  Sigh.
 	 */
 	if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
-	    cmd->nleft > 0 && cmd->nleft <= 5) {
-		unsigned int ofs = cmd->nbytes - cmd->nleft;
-
-		while (cmd->nleft > 0) {
-			*((u8 *)rq->data + ofs++) = 0;
-			cmd->nleft--;
-		}
-	}
+	    cmd->nleft > 0 && cmd->nleft <= 5)
+		cmd->nleft = 0;
 }
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
-- 
cgit v0.10.2


From 7f006dc24fae158131116c9472874f12e16cf040 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-floppy: block pc always uses bio

Impact: remove unnecessary code path

Block pc requests always use bio and rq->data is always NULL.  No need
to worry about !rq->bio cases in idefloppy_block_pc_cmd().  Note that
ide-atapi uses ide_pio_bytes() for bio PIO transfer which handle sg
fine.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2b4868d..3b22e06 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -216,15 +216,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 	ide_init_pc(pc);
 	memcpy(pc->c, rq->cmd, sizeof(pc->c));
 	pc->rq = rq;
-	if (rq->data_len && rq_data_dir(rq) == WRITE)
-		pc->flags |= PC_FLAG_WRITING;
-	pc->buf = rq->data;
-	if (rq->bio)
+	if (rq->data_len) {
 		pc->flags |= PC_FLAG_DMA_OK;
-	/*
-	 * possibly problematic, doesn't look like ide-floppy correctly
-	 * handled scattered requests if dma fails...
-	 */
+		if (rq_data_dir(rq) == WRITE)
+			pc->flags |= PC_FLAG_WRITING;
+	}
+	/* pio will be performed by ide_pio_bytes() which handles sg fine */
+	pc->buf = NULL;
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-- 
cgit v0.10.2


From eace4cb04c0edc9388e987bf9bbdef461f6daca4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-taskfile: don't abuse rq->buffer

Impact: rq->buffer usage cleanup

ide_raw_taskfile() directly uses rq->buffer to carry pointer to the
data buffer.  This complicates both block interface and ide backend
request handling.  Use blk_rq_map_kern() instead and drop special
handling for REQ_TYPE_ATA_TASKFILE from ide_map_sg().

Note that REQ_RW setting is moved upwards as blk_rq_map_kern() uses it
to initialize bio rw flag.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 35dc38d..9b9e8b1 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -248,10 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 	struct scatterlist *sg = hwif->sg_table;
 	struct request *rq = cmd->rq;
 
-	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
-		cmd->sg_nents = 1;
-	} else if (!rq->bio) {
+	if (!rq->bio) {
 		sg_init_one(sg, rq->data, rq->data_len);
 		cmd->sg_nents = 1;
 	} else
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 4aa6223..f400eb4 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->buffer = buf;
+
+	if (cmd->tf_flags & IDE_TFLAG_WRITE)
+		rq->cmd_flags |= REQ_RW;
 
 	/*
 	 * (ks) We transfer currently only whole sectors.
@@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	 * if we would find a solution to transfer any size.
 	 * To support special commands like READ LONG.
 	 */
-	rq->hard_nr_sectors = rq->nr_sectors = nsect;
-	rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		rq->cmd_flags |= REQ_RW;
+	if (nsect) {
+		error = blk_rq_map_kern(drive->queue, rq, buf,
+					nsect * SECTOR_SIZE, __GFP_WAIT);
+		if (error)
+			goto put_req;
+	}
 
 	rq->special = cmd;
 	cmd->rq = rq;
 
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
-	blk_put_request(rq);
 
+put_req:
+	blk_put_request(rq);
 	return error;
 }
 
-- 
cgit v0.10.2


From c267cc1c4db4ccb3406d045a8da8660f0bbfe08d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-atapi: don't abuse rq->buffer

Impact: rq->buffer usage cleanup

ide-atapi uses rq->buffer as private opaque value for internal special
requests.  rq->special isn't used for these cases (the only case where
rq->special is used is for ide-tape rw requests).  Use rq->special
instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7201b17..2894577 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -90,7 +90,7 @@ static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
 	blk_rq_init(NULL, rq);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_flags |= REQ_PREEMPT;
-	rq->buffer = (char *)pc;
+	rq->special = (char *)pc;
 	rq->rq_disk = disk;
 
 	if (pc->req_xfer) {
@@ -119,7 +119,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->buffer = (char *)pc;
+	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
 		rq->data = pc->buf;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3b22e06..9460033 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -264,7 +264,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
 	} else if (blk_special_request(rq)) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
 		idefloppy_blockpc_cmd(floppy, pc, rq);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 3a53e08..aadf53c 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -828,7 +828,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 		goto out;
 	}
 	if (rq->cmd[13] & REQ_IDETAPE_PC1) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+		pc = (struct ide_atapi_pc *)rq->special;
 		rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
 		rq->cmd[13] |= REQ_IDETAPE_PC2;
 		goto out;
-- 
cgit v0.10.2


From cbfd082abfcbed8c57a12636f36e9bead8d6cfc6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd: don't abuse rq->buffer

Impact: rq->buffer usage cleanup

ide-cd uses rq->buffer to carry pointer to the original request when
issuing REQUEST_SENSE.  Use rq->special instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 509f129..eb3c299 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -232,8 +232,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	rq->cmd_type = REQ_TYPE_SENSE;
 	rq->cmd_flags |= REQ_PREEMPT;
 
-	/* NOTE! Save the failed command in "rq->buffer" */
-	rq->buffer = (void *) failed_command;
+	/* NOTE! Save the failed command in "rq->special" */
+	rq->special = (void *)failed_command;
 
 	if (failed_command)
 		ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
@@ -247,10 +247,10 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
-	 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+	 * For REQ_TYPE_SENSE, "rq->special" points to the original
 	 * failed request
 	 */
-	struct request *failed = (struct request *)rq->buffer;
+	struct request *failed = (struct request *)rq->special;
 	struct cdrom_info *info = drive->driver_data;
 	void *sense = &info->sense_data;
 
-- 
cgit v0.10.2


From a1df5169f9bf08f6067029bfb840a05e282b1b97 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide: add helpers for preparing sense requests

This is in preparation of removing the queueing of a sense request out
of the IRQ handler path.

Use struct request_sense as a general sense buffer for all ATAPI
devices ide-{floppy,tape,cd}.

tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as
      it can cause deadlock.  Converted to use inline struct request
      and blk_rq_init().

    * Added xfer / cdb len selection depending on device type.

    * All sense prep logics folded into ide_prep_sense() which never
      fails.

    * hwif->rq clearing and sense_rq used handling moved into
      ide_queue_sense_rq().

    * blk_rq_map_kern() conversion is moved to later patch.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2894577..c6e0348 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+	struct request_sense *sense = &drive->sense_data;
+	struct request *sense_rq = &drive->sense_rq;
+	unsigned int cmd_len, sense_len;
+
+	debug_log("%s: enter\n", __func__);
+
+	switch (drive->media) {
+	case ide_floppy:
+		cmd_len = 255;
+		sense_len = 18;
+		break;
+	case ide_tape:
+		cmd_len = 20;
+		sense_len = 20;
+		break;
+	default:
+		cmd_len = 18;
+		sense_len = 18;
+	}
+
+	BUG_ON(sense_len > sizeof(*sense));
+
+	if (blk_sense_request(rq) || drive->sense_rq_armed)
+		return;
+
+	memset(sense, 0, sizeof(*sense));
+
+	blk_rq_init(rq->q, sense_rq);
+	sense_rq->rq_disk = rq->rq_disk;
+
+	sense_rq->data = sense;
+	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+	sense_rq->cmd[4] = cmd_len;
+	sense_rq->data_len = sense_len;
+
+	sense_rq->cmd_type = REQ_TYPE_SENSE;
+	sense_rq->cmd_flags |= REQ_PREEMPT;
+
+	if (drive->media == ide_tape)
+		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+	drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+	BUG_ON(!drive->sense_rq_armed);
+
+	drive->sense_rq.special = special;
+	drive->sense_rq_armed = false;
+
+	drive->hwif->rq = NULL;
+
+	elv_add_request(drive->queue, &drive->sense_rq,
+			ELEVATOR_INSERT_FRONT, 0);
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
  * We queue a request sense packet command in the head of the request list.
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 846a1e1..a69ccac 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -602,6 +605,11 @@ struct ide_drive_s {
 
 	struct ide_atapi_pc request_sense_pc;
 	struct request request_sense_rq;
+
+	/* current sense rq and buffer */
+	bool sense_rq_armed;
+	struct request sense_rq;
+	struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+
 int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
-- 
cgit v0.10.2


From 746d5e43274e9ea6cbd58818afc9239d41fb4e1e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd: convert to using generic sense request

Preallocate a sense request in the ->do_request method and reinitialize
it only on demand, in case it's been consumed in the IRQ handler path.
The reason for this is that we don't want to be mapping rq to bio in
the IRQ path and introduce all kinds of unnecessary hacks to the block
layer.

tj: * Both user and kernel PC requests expect sense data to be stored
      in separate storage other than drive->sense_data.  Copy sense
      data to rq->sense on completion if rq->sense is not NULL.  This
      fixes bogus sense data on PC requests.

As a result, remove cdrom_queue_request_sense.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index eb3c299..7b21c7e 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -206,44 +206,6 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
 	ide_cd_log_error(drive->name, failed_command, sense);
 }
 
-static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
-				      struct request *failed_command)
-{
-	struct cdrom_info *info		= drive->driver_data;
-	struct request *rq		= &drive->request_sense_rq;
-
-	ide_debug_log(IDE_DBG_SENSE, "enter");
-
-	if (sense == NULL)
-		sense = &info->sense_data;
-
-	memset(sense, 0, 18);
-
-	/* stuff the sense request in front of our current request */
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_ATA_PC;
-	rq->rq_disk = info->disk;
-
-	rq->data = sense;
-	rq->cmd[0] = GPCMD_REQUEST_SENSE;
-	rq->cmd[4] = 18;
-	rq->data_len = 18;
-
-	rq->cmd_type = REQ_TYPE_SENSE;
-	rq->cmd_flags |= REQ_PREEMPT;
-
-	/* NOTE! Save the failed command in "rq->special" */
-	rq->special = (void *)failed_command;
-
-	if (failed_command)
-		ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
-					     failed_command->cmd[0]);
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
@@ -251,11 +213,16 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 	 * failed request
 	 */
 	struct request *failed = (struct request *)rq->special;
-	struct cdrom_info *info = drive->driver_data;
-	void *sense = &info->sense_data;
+	struct request_sense *sense = &drive->sense_data;
 
 	if (failed) {
 		if (failed->sense) {
+			/*
+			 * Sense is always read into drive->sense_data.
+			 * Copy back if the failed request has its
+			 * sense pointer set.
+			 */
+			memcpy(failed->sense, sense, 18);
 			sense = failed->sense;
 			failed->sense_len = rq->sense_len;
 		}
@@ -431,7 +398,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		cdrom_queue_request_sense(drive, NULL, NULL);
+		ide_queue_sense_rq(drive, NULL);
 	return 1;
 
 end_request:
@@ -445,7 +412,7 @@ end_request:
 
 		hwif->rq = NULL;
 
-		cdrom_queue_request_sense(drive, rq->sense, rq);
+		ide_queue_sense_rq(drive, rq);
 		return 1;
 	} else
 		return 2;
@@ -893,6 +860,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		goto out_end;
 	}
 
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 1d97101..93a3cf1b 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -87,10 +87,6 @@ struct cdrom_info {
 
 	struct atapi_toc *toc;
 
-	/* The result of the last successful request sense command
-	   on this device. */
-	struct request_sense sense_data;
-
 	u8 max_speed;		/* Max speed of the drive. */
 	u8 current_speed;	/* Current speed of the drive. */
 
-- 
cgit v0.10.2


From 6b544fcc8cd0a04eb42de9d1ecdd345e979d6ada Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense
 buffer

Since we're issuing REQ_TYPE_SENSE now we need to allow those types of
rqs in the ->do_request callbacks. As a future improvement, sense_len
assignment might be unified across all ATAPI devices. Borislav to
check with specs and test.

As a result, get rid of ide_queue_pc_head() and
drive->request_sense_rq.

tj: * Init request sense ide_atapi_pc from sense request.  In the
      longer timer, it would probably better to fold
      ide_create_request_sense_cmd() into its only current user -
      ide_floppy_get_format_progress().

    * ide_retry_pc() no longer takes @disk.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c6e0348..972c522 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc)
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
 /*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-			      struct ide_atapi_pc *pc, struct request *rq)
-{
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = (char *)pc;
-	rq->rq_disk = disk;
-
-	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
-	}
-
-	memcpy(rq->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
-/*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
  */
@@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-	struct request *rq = &drive->request_sense_rq;
+	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
-	ide_create_request_sense_cmd(drive, pc);
+
+	/* init pc from sense_rq */
+	ide_init_pc(pc);
+	memcpy(pc->c, sense_rq->cmd, 12);
+	pc->buf = sense_rq->data;
+	pc->req_xfer = sense_rq->data_len;
+
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-	ide_queue_pc_head(drive, disk, pc, rq);
+
+	ide_queue_sense_rq(drive, pc);
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
 			/* Retry operation */
-			ide_retry_pc(drive, rq->rq_disk);
+			ide_retry_pc(drive);
 
 			/* queued, but not started */
 			return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 9460033..d3302cc 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq)) {
+	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
 		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
@@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		goto out_end;
 	}
 
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index aadf53c..8324dfa 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
 				printk(KERN_ERR "ide-tape: %s: I/O error, ",
 						tape->name);
 			/* Retry operation */
-			ide_retry_pc(drive, tape->disk);
+			ide_retry_pc(drive);
 			return ide_stopped;
 		}
 		pc->error = 0;
@@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			(unsigned long long)rq->sector, rq->nr_sectors,
 			rq->current_nr_sectors);
 
-	if (!blk_special_request(rq)) {
+	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 			"request queue (%d)\n", drive->name, rq->cmd_type);
@@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	BUG();
 
 out:
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a69ccac..9e67cca 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -604,7 +604,6 @@ struct ide_drive_s {
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-	struct request request_sense_rq;
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
@@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
 void ide_queue_sense_rq(ide_drive_t *drive, void *special);
-- 
cgit v0.10.2


From 5c4be57249e2e09136446597d2fe2a967c6ffef0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd,atapi: use bio for internal commands

Impact: unify request data buffer handling

rq->data is used mostly to pass kernel buffer through request queue
without using bio.  There are only a couple of places which still do
this in kernel and converting to bio isn't difficult.

This patch converts ide-cd and atapi to use bio instead of rq->data
for request sense and internal pc commands.  With previous change to
unify sense request handling, this is relatively easily achieved by
adding blk_rq_map_kern() during sense_rq prep and PC issue.

If blk_rq_map_kern() fails for sense, the error is deferred till sense
issue and aborts the failed command which triggered the sense.  Note
that this is a slim possibility as sense prep is done on each command
issue, so for the above condition to actually trigger, all preps since
the last sense issue till the issue of the request which would require
a sense should fail.

* do_request functions might sleep now.  This should be okay as ide
  request_fn - do_ide_request() - is invoked only from make_request
  and plug work.  Make sure this is the case by adding might_sleep()
  to do_ide_request().

* Functions which access the read sense data before the sense request
  is complete now should access bio_data(sense_rq->bio) as the sense
  buffer might have been copied during blk_rq_map_kern().

* ide-tape updated to map sg.

* cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC
  special case.  Simplified.

* tp_ops->output/input_data path dropped from ide_pc_intr().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 972c522..5cefe12 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+					GFP_NOIO);
+		if (error)
+			goto put_req;
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		rq->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
 	blk_put_request(rq);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	struct request_sense *sense = &drive->sense_data;
 	struct request *sense_rq = &drive->sense_rq;
 	unsigned int cmd_len, sense_len;
+	int err;
 
 	debug_log("%s: enter\n", __func__);
 
@@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	sense_rq->rq_disk = rq->rq_disk;
 
-	sense_rq->data = sense;
+	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+			      GFP_NOIO);
+	if (unlikely(err)) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: failed to map sense buffer\n",
+			       drive->name);
+		return;
+	}
+
+	sense_rq->rq_disk = rq->rq_disk;
 	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	sense_rq->cmd[4] = cmd_len;
-	sense_rq->data_len = sense_len;
-
 	sense_rq->cmd_type = REQ_TYPE_SENSE;
 	sense_rq->cmd_flags |= REQ_PREEMPT;
 
@@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(ide_prep_sense);
 
-void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
-	BUG_ON(!drive->sense_rq_armed);
+	/* deferred failure from ide_prep_sense() */
+	if (!drive->sense_rq_armed) {
+		printk(KERN_WARNING "%s: failed queue sense request\n",
+		       drive->name);
+		return -ENOMEM;
+	}
 
 	drive->sense_rq.special = special;
 	drive->sense_rq_armed = false;
@@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special)
 
 	elv_add_request(drive->queue, &drive->sense_rq,
 			ELEVATOR_INSERT_FRONT, 0);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
@@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive)
 	/* init pc from sense_rq */
 	ide_init_pc(pc);
 	memcpy(pc->c, sense_rq->cmd, 12);
-	pc->buf = sense_rq->data;
+	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
 	pc->req_xfer = sense_rq->data_len;
 
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
-	ide_queue_sense_rq(drive, pc);
+	if (ide_queue_sense_rq(drive, pc))
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	struct ide_cmd *cmd = &hwif->cmd;
 	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	xfer_func_t *xferfunc;
 	unsigned int timeout, done;
 	u16 bcount;
 	u8 stat, ireason, dsc = 0;
@@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape)
+			if (drive->media == ide_tape && !rq->bio)
 				done = ide_rq_bytes(rq); /* FIXME */
 			else
 				done = blk_rq_bytes(rq);
@@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
-
-	if (drive->media == ide_floppy && pc->buf == NULL) {
+	if (drive->media == ide_tape && pc->bh)
+		done = drive->pc_io_buffers(drive, pc, bcount, write);
+	else {
 		done = min_t(unsigned int, bcount, cmd->nleft);
 		ide_pio_bytes(drive, cmd, write, done);
-	} else if (drive->media == ide_tape && pc->bh) {
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	} else {
-		done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-		xferfunc(drive, NULL, pc->cur_pos, done);
 	}
 
 	/* Update the current position */
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 7b21c7e..392a5bd 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
 	 * For REQ_TYPE_SENSE, "rq->special" points to the original
-	 * failed request
+	 * failed request.  Also, the sense data should be read
+	 * directly from rq which might be different from the original
+	 * sense buffer if it got copied during mapping.
 	 */
 	struct request *failed = (struct request *)rq->special;
-	struct request_sense *sense = &drive->sense_data;
+	void *sense = bio_data(rq->bio);
 
 	if (failed) {
 		if (failed->sense) {
@@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		ide_queue_sense_rq(drive, NULL);
+		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
 	return 1;
 
 end_request:
@@ -412,8 +414,7 @@ end_request:
 
 		hwif->rq = NULL;
 
-		ide_queue_sense_rq(drive, rq);
-		return 1;
+		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
 }
@@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		rq->cmd_flags |= cmd_flags;
 		rq->timeout = timeout;
 		if (buffer) {
-			rq->data = buffer;
-			rq->data_len = *bufflen;
+			error = blk_rq_map_kern(drive->queue, rq, buffer,
+						*bufflen, GFP_NOIO);
+			if (error) {
+				blk_put_request(rq);
+				return error;
+			}
 		}
 
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	drive->dma = 0;
 
 	/* sg request */
-	if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+	if (rq->bio) {
 		struct request_queue *q = drive->queue;
+		char *buf = bio_data(rq->bio);
 		unsigned int alignment;
-		char *buf;
-
-		if (rq->bio)
-			buf = bio_data(rq->bio);
-		else
-			buf = rq->data;
 
 		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9b9e8b1..3245c2d 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q)
 
 	spin_unlock_irq(q->queue_lock);
 
+	/* HLD do_request() callback might sleep, make sure it's okay */
+	might_sleep();
+
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8324dfa..9b762a2 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -850,6 +850,9 @@ out:
 
 	cmd.rq = rq;
 
+	ide_init_sg_cmd(&cmd, pc->req_xfer);
+	ide_map_sg(drive, &cmd);
+
 	return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9e67cca..1957461 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
-void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
-- 
cgit v0.10.2


From fc38b521dcffcb07447cd98fedc56f495c10b90d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:43 +0900
Subject: ide-pm: don't abuse rq->data

Impact: cleanup rq->data usage

ide-pm uses rq->data to carry pointer to struct request_pm_state
through request queue and rq->special is used to carray pointer to
local struct ide_cmd, which isn't necessary.  Use rq->special for
request_pm_state instead and use local ide_cmd in
ide_start_power_step().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 3245c2d..6e3094e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -368,7 +368,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->data;
+			struct request_pm_state *pm = rq->special;
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, pm->pm_step);
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0d8a151..ba1488b 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int ret;
 
 	/* call ACPI _GTM only once */
@@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 		ide_acpi_get_timing(hwif);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
@@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev)
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int err;
 
 	/* call ACPI _PS0 / _STM only once */
@@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev)
 	ide_acpi_exec_tfs(drive);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_RESUME;
 	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;
 	rqpm.pm_state = PM_EVENT_ON;
 
@@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev)
 
 void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 #ifdef DEBUG_PM
 	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 
 ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
-	struct ide_cmd *cmd = rq->special;
-
-	memset(cmd, 0, sizeof(*cmd));
+	struct request_pm_state *pm = rq->special;
+	struct ide_cmd cmd = { };
 
 	switch (pm->pm_step) {
 	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
@@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 			return ide_stopped;
 		}
 		if (ata_id_flush_ext_enabled(drive->id))
-			cmd->tf.command = ATA_CMD_FLUSH_EXT;
+			cmd.tf.command = ATA_CMD_FLUSH_EXT;
 		else
-			cmd->tf.command = ATA_CMD_FLUSH;
+			cmd.tf.command = ATA_CMD_FLUSH;
 		goto out_do_tf;
 	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		cmd->tf.command = ATA_CMD_STANDBYNOW1;
+		cmd.tf.command = ATA_CMD_STANDBYNOW1;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
 		ide_set_max_pio(drive);
@@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 			ide_complete_power_step(drive, rq);
 		return ide_stopped;
 	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		cmd->tf.command = ATA_CMD_IDLEIMMEDIATE;
+		cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
 		/*
@@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 	return ide_stopped;
 
 out_do_tf:
-	cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd->valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd->protocol = ATA_PROT_NODATA;
+	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
+	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
+	cmd.protocol = ATA_PROT_NODATA;
 
-	return do_rw_taskfile(drive, cmd);
+	return do_rw_taskfile(drive, &cmd);
 }
 
 /**
@@ -181,7 +173,7 @@ out_do_tf:
 void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 {
 	struct request_queue *q = drive->queue;
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 	unsigned long flags;
 
 	ide_complete_power_step(drive, rq);
@@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == IDE_PM_START_SUSPEND)
-- 
cgit v0.10.2


From ea7066afcd590e4663e6dc010f93704164050f48 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape,floppy: fix failed command completion after request sense

Impact: fix infinite retry loop

After a command failed, ide-tape and floppy inserts REQUEST_SENSE in
front of the failed command and according to the result, sets
pc->retries, flags and errors.  After REQUEST_SENSE is complete, the
failed command is again at the front of the queue and if the verdict
was to terminate the request, the issue functions tries to complete it
directly by calling drive->pc_callback() and returning ide_stopped.

However, drive->pc_callback() doesn't complete a request.  It only
prepares for completion of the request.  As a result, this creates an
infinite loop where the failed request is retried perpetually.

Fix it by actually ending the request by calling ide_complete_rq().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index d3302cc..d20704a 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -141,6 +141,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9b762a2..2b9a136 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -643,6 +643,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 		}
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 		return ide_stopped;
 	}
 	debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
-- 
cgit v0.10.2


From b3071d190d6757b14af002a9d79832f12de61bce Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-atapi,tape,floppy: allow ->pc_callback() to change rq->data_len

Impact: allow residual count implementation in ->pc_callback()

rq->data_len has two duties - carrying the number of input bytes on
issue and carrying residual count back to the issuer on completion.
ide-atapi completion callback ->pc_callback() is the right place to do
this but currently ide-atapi depends on rq->data_len carrying the
original request size after calling ->pc_callback() to complete the pc
request.

This patch makes ide_pc_intr(), ide_tape_issue_pc() and
ide_floppy_issue_pc() cache length to complete before calling
->pc_callback() so that it can modify rq->data_len as necessary.

Note: As using rq->data_len for two purposes can make cases like this
      incorrect in subtle ways, future changes will introduce separate
      field for residual count.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5cefe12..3df5442 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -409,6 +409,16 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
 			dsc = 1;
 
+		/*
+		 * ->pc_callback() might change rq->data_len for
+		 * residual count, cache total length.
+		 */
+		if (!blk_special_request(rq) &&
+		    (drive->media == ide_tape && !rq->bio))
+			done = ide_rq_bytes(rq);        /* FIXME */
+		else
+			done = blk_rq_bytes(rq);
+
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
 
@@ -417,7 +427,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (blk_special_request(rq)) {
 			rq->errors = 0;
-			done = blk_rq_bytes(rq);
 			error = 0;
 		} else {
 
@@ -426,11 +435,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape && !rq->bio)
-				done = ide_rq_bytes(rq); /* FIXME */
-			else
-				done = blk_rq_bytes(rq);
-
 			error = uptodate ? 0 : -EIO;
 		}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index d20704a..537b7c5 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -134,14 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 	drive->pc = pc;
 
 	if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
 			ide_floppy_report_error(floppy, pc);
+
 		/* Giving up */
 		pc->error = IDE_DRV_ERROR_GENERAL;
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 2b9a136..8226d52 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -622,6 +622,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
 	if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
 		(pc->flags & PC_FLAG_ABORT)) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		/*
 		 * We will "abort" retrying a packet command in case legitimate
 		 * error code was received (crossing a filemark, or end of the
@@ -641,9 +643,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 			/* Giving up */
 			pc->error = IDE_DRV_ERROR_GENERAL;
 		}
+
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 	debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
-- 
cgit v0.10.2


From 35ab8d3251833e4052aa64b09b08195e949518c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: use single continuous buffer

Impact: simpler buffer allocation and handling, kills OOM, fix DMA transfers

ide-tape has its own multiple buffer mechanism using struct
idetape_bh.  It allocates buffer with decreasing order-of-two
allocations so that it results in minimum number of segments.
However, the implementation is quite complex and works in a way that
no other block or ide driver works necessitating a lot of special case
handling.

The benefit this complex allocation scheme brings is questionable as
PIO or DMA the number of segments (16 maximum) doesn't make any
noticeable difference and it also doesn't negate the need for multiple
order allocation which can fail under memory pressure or high
fragmentation although it does lower the highest order necessary by
one when the buffer size isn't power of two.

As the first step to remove the custom buffer management, this patch
makes ide-tape allocate single continous buffer.  The maximum order is
four.  I doubt the change would cause any trouble but if it ever
matters, it should be converted to regular sg mechanism like everyone
else and even in that case dropping custom buffer handling and moving
to standard mechanism first make sense as an intermediate step.

This patch makes the first bh to contain the whole buffer and drops
multi bh handling code.  Following patches will make further changes.

This patch has the side effect of killing OOM triggered by allocation
path and fixing DMA transfers.  Previously, bug in alloc path
triggered OOM on command issue and commands were passed to DMA engine
without DMA-mapping all the segments.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8226d52..b2afbc7 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -134,7 +134,6 @@ enum {
 struct idetape_bh {
 	u32 b_size;
 	atomic_t b_count;
-	struct idetape_bh *b_reqnext;
 	char *b_data;
 };
 
@@ -228,10 +227,6 @@ typedef struct ide_tape_obj {
 	char *b_data;
 	int b_count;
 
-	int pages_per_buffer;
-	/* Wasted space in each stage */
-	int excess_bh_size;
-
 	/* Measures average tape speed */
 	unsigned long avg_time;
 	int avg_size;
@@ -303,9 +298,7 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	struct idetape_bh *bh = pc->bh;
 	int count;
 
-	while (bcount) {
-		if (bh == NULL)
-			break;
+	if (bcount && bh) {
 		count = min(
 			(unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
 			bcount);
@@ -313,15 +306,10 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 					atomic_read(&bh->b_count), count);
 		bcount -= count;
 		atomic_add(count, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
+		if (atomic_read(&bh->b_count) == bh->b_size)
+			pc->bh = NULL;
 	}
 
-	pc->bh = bh;
-
 	return bcount;
 }
 
@@ -331,22 +319,14 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	struct idetape_bh *bh = pc->bh;
 	int count;
 
-	while (bcount) {
-		if (bh == NULL)
-			break;
+	if (bcount && bh) {
 		count = min((unsigned int)pc->b_count, (unsigned int)bcount);
 		drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
 		bcount -= count;
 		pc->b_data += count;
 		pc->b_count -= count;
-		if (!pc->b_count) {
-			bh = bh->b_reqnext;
-			pc->bh = bh;
-			if (bh) {
-				pc->b_data = bh->b_data;
-				pc->b_count = atomic_read(&bh->b_count);
-			}
-		}
+		if (!pc->b_count)
+			pc->bh = NULL;
 	}
 
 	return bcount;
@@ -355,24 +335,20 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
 	struct idetape_bh *bh = pc->bh;
-	int count;
 	unsigned int bcount = pc->xferred;
 
 	if (pc->flags & PC_FLAG_WRITING)
 		return;
-	while (bcount) {
-		if (bh == NULL) {
+	if (bcount) {
+		if (bh == NULL || bcount > bh->b_size) {
 			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
 					__func__);
 			return;
 		}
-		count = min((unsigned int)bh->b_size, (unsigned int)bcount);
-		atomic_set(&bh->b_count, count);
+		atomic_set(&bh->b_count, bcount);
 		if (atomic_read(&bh->b_count) == bh->b_size)
-			bh = bh->b_reqnext;
-		bcount -= count;
+			pc->bh = NULL;
 	}
-	pc->bh = bh;
 }
 
 /*
@@ -439,24 +415,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 /* Free data buffers completely. */
 static void ide_tape_kfree_buffer(idetape_tape_t *tape)
 {
-	struct idetape_bh *prev_bh, *bh = tape->merge_bh;
-
-	while (bh) {
-		u32 size = bh->b_size;
-
-		while (size) {
-			unsigned int order = fls(size >> PAGE_SHIFT)-1;
-
-			if (bh->b_data)
-				free_pages((unsigned long)bh->b_data, order);
+	struct idetape_bh *bh = tape->merge_bh;
 
-			size &= (order-1);
-			bh->b_data += (1 << order) * PAGE_SIZE;
-		}
-		prev_bh = bh;
-		bh = bh->b_reqnext;
-		kfree(prev_bh);
-	}
+	kfree(bh->b_data);
+	kfree(bh);
 }
 
 static void ide_tape_handle_dsc(ide_drive_t *);
@@ -861,117 +823,50 @@ out:
 }
 
 /*
- * The function below uses __get_free_pages to allocate a data buffer of size
- * tape->buffer_size (or a bit more). We attempt to combine sequential pages as
- * much as possible.
- *
- * It returns a pointer to the newly allocated buffer, or NULL in case of
- * failure.
+ * It returns a pointer to the newly allocated buffer, or NULL in case
+ * of failure.
  */
 static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-						  int full, int clear)
-{
-	struct idetape_bh *prev_bh, *bh, *merge_bh;
-	int pages = tape->pages_per_buffer;
-	unsigned int order, b_allocd;
-	char *b_data = NULL;
-
-	merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-	bh = merge_bh;
-	if (bh == NULL)
-		goto abort;
-
-	order = fls(pages) - 1;
-	bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-	if (!bh->b_data)
-		goto abort;
-	b_allocd = (1 << order) * PAGE_SIZE;
-	pages &= (order-1);
-
-	if (clear)
-		memset(bh->b_data, 0, b_allocd);
-	bh->b_reqnext = NULL;
-	bh->b_size = b_allocd;
-	atomic_set(&bh->b_count, full ? bh->b_size : 0);
+						  int full)
+{
+	struct idetape_bh *bh;
 
-	while (pages) {
-		order = fls(pages) - 1;
-		b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-		if (!b_data)
-			goto abort;
-		b_allocd = (1 << order) * PAGE_SIZE;
-
-		if (clear)
-			memset(b_data, 0, b_allocd);
-
-		/* newly allocated page frames below buffer header or ...*/
-		if (bh->b_data == b_data + b_allocd) {
-			bh->b_size += b_allocd;
-			bh->b_data -= b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		/* they are above the header */
-		if (b_data == bh->b_data + bh->b_size) {
-			bh->b_size += b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		prev_bh = bh;
-		bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-		if (!bh) {
-			free_pages((unsigned long) b_data, order);
-			goto abort;
-		}
-		bh->b_reqnext = NULL;
-		bh->b_data = b_data;
-		bh->b_size = b_allocd;
-		atomic_set(&bh->b_count, full ? bh->b_size : 0);
-		prev_bh->b_reqnext = bh;
+	bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
+	if (!bh)
+		return NULL;
 
-		pages &= (order-1);
+	bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (!bh->b_data) {
+		kfree(bh);
+		return NULL;
 	}
 
-	bh->b_size -= tape->excess_bh_size;
-	if (full)
-		atomic_sub(tape->excess_bh_size, &bh->b_count);
-	return merge_bh;
-abort:
-	ide_tape_kfree_buffer(tape);
-	return NULL;
+	bh->b_size = tape->buffer_size;
+	atomic_set(&bh->b_count, full ? bh->b_size : 0);
+
+	return bh;
 }
 
 static int idetape_copy_stage_from_user(idetape_tape_t *tape,
 					const char __user *buf, int n)
 {
 	struct idetape_bh *bh = tape->bh;
-	int count;
 	int ret = 0;
 
-	while (n) {
-		if (bh == NULL) {
+	if (n) {
+		if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) {
 			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
 					__func__);
 			return 1;
 		}
-		count = min((unsigned int)
-				(bh->b_size - atomic_read(&bh->b_count)),
-				(unsigned int)n);
 		if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-				count))
+				   n))
 			ret = 1;
-		n -= count;
-		atomic_add(count, &bh->b_count);
-		buf += count;
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
+		atomic_add(n, &bh->b_count);
+		if (atomic_read(&bh->b_count) == bh->b_size)
+			tape->bh = NULL;
 	}
-	tape->bh = bh;
+
 	return ret;
 }
 
@@ -979,30 +874,20 @@ static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
 				      int n)
 {
 	struct idetape_bh *bh = tape->bh;
-	int count;
 	int ret = 0;
 
-	while (n) {
-		if (bh == NULL) {
+	if (n) {
+		if (bh == NULL || n > tape->b_count) {
 			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
 					__func__);
 			return 1;
 		}
-		count = min(tape->b_count, n);
-		if  (copy_to_user(buf, tape->b_data, count))
+		if (copy_to_user(buf, tape->b_data, n))
 			ret = 1;
-		n -= count;
-		tape->b_data += count;
-		tape->b_count -= count;
-		buf += count;
-		if (!tape->b_count) {
-			bh = bh->b_reqnext;
-			tape->bh = bh;
-			if (bh) {
-				tape->b_data = bh->b_data;
-				tape->b_count = atomic_read(&bh->b_count);
-			}
-		}
+		tape->b_data += n;
+		tape->b_count -= n;
+		if (!tape->b_count)
+			tape->bh = NULL;
 	}
 	return ret;
 }
@@ -1254,7 +1139,7 @@ static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int blocks, min;
+	int blocks;
 	struct idetape_bh *bh;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
@@ -1269,31 +1154,16 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 	if (tape->merge_bh_size) {
 		blocks = tape->merge_bh_size / tape->blk_size;
 		if (tape->merge_bh_size % tape->blk_size) {
-			unsigned int i;
-
+			unsigned int i = tape->blk_size -
+				tape->merge_bh_size % tape->blk_size;
 			blocks++;
-			i = tape->blk_size - tape->merge_bh_size %
-				tape->blk_size;
-			bh = tape->bh->b_reqnext;
-			while (bh) {
-				atomic_set(&bh->b_count, 0);
-				bh = bh->b_reqnext;
-			}
 			bh = tape->bh;
-			while (i) {
-				if (bh == NULL) {
-					printk(KERN_INFO "ide-tape: bug,"
-							 " bh NULL\n");
-					break;
-				}
-				min = min(i, (unsigned int)(bh->b_size -
-						atomic_read(&bh->b_count)));
+			if (bh) {
 				memset(bh->b_data + atomic_read(&bh->b_count),
-						0, min);
-				atomic_add(min, &bh->b_count);
-				i -= min;
-				bh = bh->b_reqnext;
-			}
+				       0, i);
+				atomic_add(i, &bh->b_count);
+			} else
+				printk(KERN_INFO "ide-tape: bug, bh NULL\n");
 		}
 		(void) idetape_add_chrdev_write_request(drive, blocks);
 		tape->merge_bh_size = 0;
@@ -1321,7 +1191,7 @@ static int idetape_init_read(ide_drive_t *drive)
 					 " 0 now\n");
 			tape->merge_bh_size = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
+		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
 		if (!tape->merge_bh)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_READ;
@@ -1368,23 +1238,18 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	struct idetape_bh *bh;
+	struct idetape_bh *bh = tape->merge_bh;
 	int blocks;
 
 	while (bcount) {
 		unsigned int count;
 
-		bh = tape->merge_bh;
 		count = min(tape->buffer_size, bcount);
 		bcount -= count;
 		blocks = count / tape->blk_size;
-		while (count) {
-			atomic_set(&bh->b_count,
-				   min(count, (unsigned int)bh->b_size));
-			memset(bh->b_data, 0, atomic_read(&bh->b_count));
-			count -= atomic_read(&bh->b_count);
-			bh = bh->b_reqnext;
-		}
+		atomic_set(&bh->b_count, count);
+		memset(bh->b_data, 0, atomic_read(&bh->b_count));
+
 		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
 				      tape->merge_bh);
 	}
@@ -1596,7 +1461,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 				"should be 0 now\n");
 			tape->merge_bh_size = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
+		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
 		if (!tape->merge_bh)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_WRITE;
@@ -1970,7 +1835,7 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
 	idetape_tape_t *tape = drive->driver_data;
 
 	ide_tape_flush_merge_buffer(drive);
-	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0);
+	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1);
 	if (tape->merge_bh != NULL) {
 		idetape_pad_zeros(drive, tape->blk_size *
 				(tape->user_bs_factor - 1));
@@ -2201,11 +2066,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
 		tape->buffer_size = *ctl * tape->blk_size;
 	}
 	buffer_size = tape->buffer_size;
-	tape->pages_per_buffer = buffer_size / PAGE_SIZE;
-	if (buffer_size % PAGE_SIZE) {
-		tape->pages_per_buffer++;
-		tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE;
-	}
 
 	/* select the "best" DSC read/write polling freq */
 	speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
-- 
cgit v0.10.2


From 21d9c5d227593d15630ae83a336d1519653e9b8a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: use standard data transfer mechanism

Impact: use standard way to transfer data

ide-tape uses rq in an interesting way.  For r/w requests, rq->special
is used to carry a private buffer management structure idetape_bh and
rq->nr_sectors and current_nr_sectors are initialized to the number of
idetape blocks which isn't necessary 512 bytes.  Also,
rq->current_nr_sectors is used to report back the residual count in
units of idetape blocks.

This peculiarity taxes both block layer and ide.  ide-atapi has
different paths and hooks to accomodate it and what a rq means becomes
quite confusing and making changes at the block layer becomes quite
difficult and error-prone.

This patch makes ide-tape use bio instead.  With the previous patch,
ide-tape currently is using single contiguos buffer so replacing it
isn't difficult.  Data buffer is mapped into bio using
blk_rq_map_kern() in idetape_queue_rw_tail().  idetape_io_buffers()
and idetape_update_buffers() are dropped and pc->bh is set to null to
tell ide-atapi to use standard data transfer mechanism and idetape_bh
byte counts are updated by the issuer on completion using the residual
count.

This change also nicely removes the FIXME in ide_pc_intr() where
ide-tape rqs need to be completed using ide_rq_bytes() instead of
blk_rq_bytes() (although this didn't really matter as the request
didn't have bio).

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 3df5442..b9dd450 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -413,11 +413,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		 * ->pc_callback() might change rq->data_len for
 		 * residual count, cache total length.
 		 */
-		if (!blk_special_request(rq) &&
-		    (drive->media == ide_tape && !rq->bio))
-			done = ide_rq_bytes(rq);        /* FIXME */
-		else
-			done = blk_rq_bytes(rq);
+		done = blk_rq_bytes(rq);
 
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index b2afbc7..b373ac8 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -292,65 +292,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
 	return tape;
 }
 
-static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				  unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	if (bcount && bh) {
-		count = min(
-			(unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
-			bcount);
-		drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data +
-					atomic_read(&bh->b_count), count);
-		bcount -= count;
-		atomic_add(count, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			pc->bh = NULL;
-	}
-
-	return bcount;
-}
-
-static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				   unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	if (bcount && bh) {
-		count = min((unsigned int)pc->b_count, (unsigned int)bcount);
-		drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
-		bcount -= count;
-		pc->b_data += count;
-		pc->b_count -= count;
-		if (!pc->b_count)
-			pc->bh = NULL;
-	}
-
-	return bcount;
-}
-
-static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	struct idetape_bh *bh = pc->bh;
-	unsigned int bcount = pc->xferred;
-
-	if (pc->flags & PC_FLAG_WRITING)
-		return;
-	if (bcount) {
-		if (bh == NULL || bcount > bh->b_size) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return;
-		}
-		atomic_set(&bh->b_count, bcount);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			pc->bh = NULL;
-	}
-}
-
 /*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
@@ -368,12 +309,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 		 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
 
 	/* Correct pc->xferred by asking the tape.	 */
-	if (pc->flags & PC_FLAG_DMA_ERROR) {
+	if (pc->flags & PC_FLAG_DMA_ERROR)
 		pc->xferred = pc->req_xfer -
 			tape->blk_size *
 			get_unaligned_be32(&sense[3]);
-		idetape_update_buffers(drive, pc);
-	}
 
 	/*
 	 * If error was the result of a zero-length read or write command,
@@ -458,7 +397,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->current_nr_sectors -= blocks;
+		rq->data_len -= blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
@@ -520,19 +459,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive)
 	idetape_postpone_request(drive);
 }
 
-static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				unsigned int bcount, int write)
-{
-	unsigned int bleft;
-
-	if (write)
-		bleft = idetape_output_buffers(drive, pc, bcount);
-	else
-		bleft = idetape_input_buffers(drive, pc, bcount);
-
-	return bcount - bleft;
-}
-
 /*
  * Packet Command Interface
  *
@@ -683,7 +609,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = bh;
+	pc->bh = NULL;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
@@ -1063,10 +989,12 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 				 struct idetape_bh *bh)
 {
 	idetape_tape_t *tape = drive->driver_data;
+	size_t size = blocks * tape->blk_size;
 	struct request *rq;
-	int ret, errors;
+	int ret;
 
 	debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
+	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
@@ -1074,21 +1002,29 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 	rq->rq_disk = tape->disk;
 	rq->special = (void *)bh;
 	rq->sector = tape->first_frame;
-	rq->nr_sectors = blocks;
-	rq->current_nr_sectors = blocks;
+
+	if (size) {
+		ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size,
+				      __GFP_WAIT);
+		if (ret)
+			goto out_put;
+	}
+
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-	errors = rq->errors;
-	ret = tape->blk_size * (blocks - rq->current_nr_sectors);
-	blk_put_request(rq);
+	/* calculate the number of transferred bytes and update bh */
+	size -= rq->data_len;
+	if (cmd == REQ_IDETAPE_READ)
+		atomic_add(size, &bh->b_count);
 
-	if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
-		return 0;
+	ret = size;
+	if (rq->errors == IDE_DRV_ERROR_GENERAL)
+		ret = -EIO;
 
 	if (tape->merge_bh)
 		idetape_init_merge_buffer(tape);
-	if (errors == IDE_DRV_ERROR_GENERAL)
-		return -EIO;
+out_put:
+	blk_put_request(rq);
 	return ret;
 }
 
@@ -2034,8 +1970,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
 	u16 *ctl = (u16 *)&tape->caps[12];
 
 	drive->pc_callback	 = ide_tape_callback;
-	drive->pc_update_buffers = idetape_update_buffers;
-	drive->pc_io_buffers	 = ide_tape_io_buffers;
 
 	drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
 
-- 
cgit v0.10.2


From 963da55c4b9eeeb2085ca74ba927cf77bce966d4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: kill idetape_bh

Impact: kill now unnecessary idetape_bh

With everything using standard mechanisms, there is no need for
idetape_bh anymore.  Kill it and use tape->buf, cur and valid to
describe data buffer instead.

Changes worth mentioning are...

* idetape_queue_rq_tail() now always queue tape->buf and and adjusts
  buffer state properly before completion.

* idetape_pad_zeros() clears the buffer only once.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index b373ac8..d2e9c09 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -131,12 +131,6 @@ enum {
 	IDETAPE_DIR_WRITE = (1 << 2),
 };
 
-struct idetape_bh {
-	u32 b_size;
-	atomic_t b_count;
-	char *b_data;
-};
-
 /* Tape door status */
 #define DOOR_UNLOCKED			0
 #define DOOR_LOCKED			1
@@ -218,14 +212,12 @@ typedef struct ide_tape_obj {
 
 	/* Data buffer size chosen based on the tape's recommendation */
 	int buffer_size;
-	/* merge buffer */
-	struct idetape_bh *merge_bh;
-	/* size of the merge buffer */
-	int merge_bh_size;
-	/* pointer to current buffer head within the merge buffer */
-	struct idetape_bh *bh;
-	char *b_data;
-	int b_count;
+	/* Staging buffer of buffer_size bytes */
+	void *buf;
+	/* The read/write cursor */
+	void *cur;
+	/* The number of valid bytes in buf */
+	size_t valid;
 
 	/* Measures average tape speed */
 	unsigned long avg_time;
@@ -351,15 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 	}
 }
 
-/* Free data buffers completely. */
-static void ide_tape_kfree_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *bh = tape->merge_bh;
-
-	kfree(bh->b_data);
-	kfree(bh);
-}
-
 static void ide_tape_handle_dsc(ide_drive_t *);
 
 static int ide_tape_callback(ide_drive_t *drive, int dsc)
@@ -603,8 +586,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 				   struct ide_atapi_pc *pc, struct request *rq,
 				   u8 opcode)
 {
-	struct idetape_bh *bh = (struct idetape_bh *)rq->special;
-	unsigned int length = rq->current_nr_sectors;
+	unsigned int length = rq->nr_sectors;
 
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
@@ -616,14 +598,11 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	if (pc->req_xfer == tape->buffer_size)
 		pc->flags |= PC_FLAG_DMA_OK;
 
-	if (opcode == READ_6) {
+	if (opcode == READ_6)
 		pc->c[0] = READ_6;
-		atomic_set(&bh->b_count, 0);
-	} else if (opcode == WRITE_6) {
+	else if (opcode == WRITE_6) {
 		pc->c[0] = WRITE_6;
 		pc->flags |= PC_FLAG_WRITING;
-		pc->b_data = bh->b_data;
-		pc->b_count = atomic_read(&bh->b_count);
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
@@ -639,10 +618,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	struct ide_cmd cmd;
 	u8 stat;
 
-	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu,"
-			" current_nr_sectors: %u\n",
-			(unsigned long long)rq->sector, rq->nr_sectors,
-			rq->current_nr_sectors);
+	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n"
+		  (unsigned long long)rq->sector, rq->nr_sectors);
 
 	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
@@ -749,89 +726,6 @@ out:
 }
 
 /*
- * It returns a pointer to the newly allocated buffer, or NULL in case
- * of failure.
- */
-static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-						  int full)
-{
-	struct idetape_bh *bh;
-
-	bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-	if (!bh)
-		return NULL;
-
-	bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL);
-	if (!bh->b_data) {
-		kfree(bh);
-		return NULL;
-	}
-
-	bh->b_size = tape->buffer_size;
-	atomic_set(&bh->b_count, full ? bh->b_size : 0);
-
-	return bh;
-}
-
-static int idetape_copy_stage_from_user(idetape_tape_t *tape,
-					const char __user *buf, int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int ret = 0;
-
-	if (n) {
-		if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-				   n))
-			ret = 1;
-		atomic_add(n, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			tape->bh = NULL;
-	}
-
-	return ret;
-}
-
-static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
-				      int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int ret = 0;
-
-	if (n) {
-		if (bh == NULL || n > tape->b_count) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		if (copy_to_user(buf, tape->b_data, n))
-			ret = 1;
-		tape->b_data += n;
-		tape->b_count -= n;
-		if (!tape->b_count)
-			tape->bh = NULL;
-	}
-	return ret;
-}
-
-static void idetape_init_merge_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *bh = tape->merge_bh;
-	tape->bh = tape->merge_bh;
-
-	if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
-		atomic_set(&bh->b_count, 0);
-	else {
-		tape->b_data = bh->b_data;
-		tape->b_count = atomic_read(&bh->b_count);
-	}
-}
-
-/*
  * Write a filemark if write_filemark=1. Flush the device buffers without
  * writing a filemark otherwise.
  */
@@ -928,10 +822,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
 		return;
 
 	clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
-	tape->merge_bh_size = 0;
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+	tape->valid = 0;
+	if (tape->buf != NULL) {
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
@@ -985,8 +879,7 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
-				 struct idetape_bh *bh)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	size_t size = blocks * tape->blk_size;
@@ -1000,11 +893,10 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
-	rq->special = (void *)bh;
 	rq->sector = tape->first_frame;
 
 	if (size) {
-		ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size,
+		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
 				      __GFP_WAIT);
 		if (ret)
 			goto out_put;
@@ -1012,17 +904,17 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-	/* calculate the number of transferred bytes and update bh */
+	/* calculate the number of transferred bytes and update buffer state */
 	size -= rq->data_len;
+	tape->cur = tape->buf;
 	if (cmd == REQ_IDETAPE_READ)
-		atomic_add(size, &bh->b_count);
+		tape->valid = size;
+	else
+		tape->valid = 0;
 
 	ret = size;
 	if (rq->errors == IDE_DRV_ERROR_GENERAL)
 		ret = -EIO;
-
-	if (tape->merge_bh)
-		idetape_init_merge_buffer(tape);
 out_put:
 	blk_put_request(rq);
 	return ret;
@@ -1064,49 +956,33 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
 /* Queue up a character device originated write request. */
 static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
 {
-	idetape_tape_t *tape = drive->driver_data;
-
 	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-				     blocks, tape->merge_bh);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks);
 }
 
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	int blocks;
-	struct idetape_bh *bh;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
 				" but we are not writing.\n");
 		return;
 	}
-	if (tape->merge_bh_size > tape->buffer_size) {
-		printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n");
-		tape->merge_bh_size = tape->buffer_size;
-	}
-	if (tape->merge_bh_size) {
-		blocks = tape->merge_bh_size / tape->blk_size;
-		if (tape->merge_bh_size % tape->blk_size) {
-			unsigned int i = tape->blk_size -
-				tape->merge_bh_size % tape->blk_size;
+	if (tape->buf) {
+		blocks = tape->valid / tape->blk_size;
+		if (tape->valid % tape->blk_size) {
 			blocks++;
-			bh = tape->bh;
-			if (bh) {
-				memset(bh->b_data + atomic_read(&bh->b_count),
-				       0, i);
-				atomic_add(i, &bh->b_count);
-			} else
-				printk(KERN_INFO "ide-tape: bug, bh NULL\n");
+			memset(tape->buf + tape->valid, 0,
+			       tape->blk_size - tape->valid % tape->blk_size);
 		}
 		(void) idetape_add_chrdev_write_request(drive, blocks);
-		tape->merge_bh_size = 0;
 	}
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+	if (tape->buf != NULL) {
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
@@ -1122,15 +998,15 @@ static int idetape_init_read(ide_drive_t *drive)
 			ide_tape_flush_merge_buffer(drive);
 			idetape_flush_tape_buffers(drive);
 		}
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size should be"
-					 " 0 now\n");
-			tape->merge_bh_size = 0;
+		if (tape->buf || tape->valid) {
+			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+			tape->valid = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
-		if (!tape->merge_bh)
+		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+		if (!tape->buf)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_READ;
+		tape->cur = tape->buf;
 
 		/*
 		 * Issue a read 0 command to ensure that DSC handshake is
@@ -1140,11 +1016,10 @@ static int idetape_init_read(ide_drive_t *drive)
 		 */
 		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
 			bytes_read = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_READ, 0,
-							tape->merge_bh);
+							REQ_IDETAPE_READ, 0);
 			if (bytes_read < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
+				kfree(tape->buf);
+				tape->buf = NULL;
 				tape->chrdev_dir = IDETAPE_DIR_NONE;
 				return bytes_read;
 			}
@@ -1157,8 +1032,6 @@ static int idetape_init_read(ide_drive_t *drive)
 /* called from idetape_chrdev_read() to service a chrdev read request. */
 static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 {
-	idetape_tape_t *tape = drive->driver_data;
-
 	debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
 
 	/* If we are at a filemark, return a read length of 0 */
@@ -1167,27 +1040,21 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 
 	idetape_init_read(drive);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks,
-				     tape->merge_bh);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks);
 }
 
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	struct idetape_bh *bh = tape->merge_bh;
-	int blocks;
+
+	memset(tape->buf, 0, tape->buffer_size);
 
 	while (bcount) {
-		unsigned int count;
+		unsigned int count = min(tape->buffer_size, bcount);
 
-		count = min(tape->buffer_size, bcount);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+				      count / tape->blk_size);
 		bcount -= count;
-		blocks = count / tape->blk_size;
-		atomic_set(&bh->b_count, count);
-		memset(bh->b_data, 0, atomic_read(&bh->b_count));
-
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
-				      tape->merge_bh);
 	}
 }
 
@@ -1267,7 +1134,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
 	}
 
 	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-		tape->merge_bh_size = 0;
+		tape->valid = 0;
 		if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 			++count;
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1333,20 +1200,20 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		return rc;
 	if (count == 0)
 		return (0);
-	if (tape->merge_bh_size) {
-		actually_read = min((unsigned int)(tape->merge_bh_size),
-				    (unsigned int)count);
-		if (idetape_copy_stage_to_user(tape, buf, actually_read))
+	if (tape->valid) {
+		actually_read = min_t(unsigned int, tape->valid, count);
+		if (copy_to_user(buf, tape->cur, actually_read))
 			ret = -EFAULT;
 		buf += actually_read;
-		tape->merge_bh_size -= actually_read;
 		count -= actually_read;
+		tape->cur += actually_read;
+		tape->valid -= actually_read;
 	}
 	while (count >= tape->buffer_size) {
 		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
 		if (bytes_read <= 0)
 			goto finish;
-		if (idetape_copy_stage_to_user(tape, buf, bytes_read))
+		if (copy_to_user(buf, tape->cur, bytes_read))
 			ret = -EFAULT;
 		buf += bytes_read;
 		count -= bytes_read;
@@ -1357,10 +1224,11 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		if (bytes_read <= 0)
 			goto finish;
 		temp = min((unsigned long)count, (unsigned long)bytes_read);
-		if (idetape_copy_stage_to_user(tape, buf, temp))
+		if (copy_to_user(buf, tape->cur, temp))
 			ret = -EFAULT;
 		actually_read += temp;
-		tape->merge_bh_size = bytes_read-temp;
+		tape->cur += temp;
+		tape->valid -= temp;
 	}
 finish:
 	if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
@@ -1392,16 +1260,15 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		if (tape->chrdev_dir == IDETAPE_DIR_READ)
 			ide_tape_discard_merge_buffer(drive, 1);
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size "
-				"should be 0 now\n");
-			tape->merge_bh_size = 0;
+		if (tape->buf || tape->valid) {
+			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+			tape->valid = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
-		if (!tape->merge_bh)
+		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+		if (!tape->buf)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_WRITE;
-		idetape_init_merge_buffer(tape);
+		tape->cur = tape->buf;
 
 		/*
 		 * Issue a write 0 command to ensure that DSC handshake is
@@ -1411,11 +1278,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 		 */
 		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
 			ssize_t retval = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_WRITE, 0,
-							tape->merge_bh);
+							REQ_IDETAPE_WRITE, 0);
 			if (retval < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
+				kfree(tape->buf);
+				tape->buf = NULL;
 				tape->chrdev_dir = IDETAPE_DIR_NONE;
 				return retval;
 			}
@@ -1423,23 +1289,19 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	}
 	if (count == 0)
 		return (0);
-	if (tape->merge_bh_size) {
-		if (tape->merge_bh_size >= tape->buffer_size) {
-			printk(KERN_ERR "ide-tape: bug: merge buf too big\n");
-			tape->merge_bh_size = 0;
-		}
-		actually_written = min((unsigned int)
-				(tape->buffer_size - tape->merge_bh_size),
-				(unsigned int)count);
-		if (idetape_copy_stage_from_user(tape, buf, actually_written))
-				ret = -EFAULT;
+	if (tape->valid < tape->buffer_size) {
+		actually_written = min_t(unsigned int,
+					 tape->buffer_size - tape->valid,
+					 count);
+		if (copy_from_user(tape->cur, buf, actually_written))
+			ret = -EFAULT;
 		buf += actually_written;
-		tape->merge_bh_size += actually_written;
 		count -= actually_written;
+		tape->cur += actually_written;
+		tape->valid += actually_written;
 
-		if (tape->merge_bh_size == tape->buffer_size) {
+		if (tape->valid == tape->buffer_size) {
 			ssize_t retval;
-			tape->merge_bh_size = 0;
 			retval = idetape_add_chrdev_write_request(drive, ctl);
 			if (retval <= 0)
 				return (retval);
@@ -1447,7 +1309,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	}
 	while (count >= tape->buffer_size) {
 		ssize_t retval;
-		if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size))
+		if (copy_from_user(tape->cur, buf, tape->buffer_size))
 			ret = -EFAULT;
 		buf += tape->buffer_size;
 		count -= tape->buffer_size;
@@ -1458,9 +1320,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	}
 	if (count) {
 		actually_written += count;
-		if (idetape_copy_stage_from_user(tape, buf, count))
+		if (copy_from_user(tape->cur, buf, count))
 			ret = -EFAULT;
-		tape->merge_bh_size += count;
+		tape->cur += count;
+		tape->valid += count;
 	}
 	return ret ? ret : actually_written;
 }
@@ -1623,7 +1486,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file,
 		idetape_flush_tape_buffers(drive);
 	}
 	if (cmd == MTIOCGET || cmd == MTIOCPOS) {
-		block_offset = tape->merge_bh_size /
+		block_offset = tape->valid /
 			(tape->blk_size * tape->user_bs_factor);
 		position = idetape_read_position(drive);
 		if (position < 0)
@@ -1771,12 +1634,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
 	idetape_tape_t *tape = drive->driver_data;
 
 	ide_tape_flush_merge_buffer(drive);
-	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1);
-	if (tape->merge_bh != NULL) {
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (tape->buf != NULL) {
 		idetape_pad_zeros(drive, tape->blk_size *
 				(tape->user_bs_factor - 1));
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	idetape_write_filemark(drive);
 	idetape_flush_tape_buffers(drive);
@@ -2042,7 +1905,7 @@ static void ide_tape_release(struct device *dev)
 	ide_drive_t *drive = tape->drive;
 	struct gendisk *g = tape->disk;
 
-	BUG_ON(tape->merge_bh_size);
+	BUG_ON(tape->valid);
 
 	drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
 	drive->driver_data = NULL;
-- 
cgit v0.10.2


From 88f1b941c5c94016a59144a3c94c9ca31eb16205 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: unify r/w init paths

Impact: cleanup

Read and write init paths are almost identical.  Unify them into
idetape_init_rw().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d2e9c09..4da7fa9 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -987,42 +987,50 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
 
-static int idetape_init_read(ide_drive_t *drive)
+static int idetape_init_rw(ide_drive_t *drive, int dir)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int bytes_read;
+	int rc;
 
-	/* Initialize read operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-			ide_tape_flush_merge_buffer(drive);
-			idetape_flush_tape_buffers(drive);
-		}
-		if (tape->buf || tape->valid) {
-			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
-			tape->valid = 0;
-		}
-		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
-		if (!tape->buf)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_READ;
-		tape->cur = tape->buf;
+	BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
 
-		/*
-		 * Issue a read 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode.
-		 * No point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			bytes_read = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_READ, 0);
-			if (bytes_read < 0) {
-				kfree(tape->buf);
-				tape->buf = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return bytes_read;
-			}
+	if (tape->chrdev_dir == dir)
+		return 0;
+
+	if (tape->chrdev_dir == IDETAPE_DIR_READ)
+		ide_tape_discard_merge_buffer(drive, 1);
+	else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
+		ide_tape_flush_merge_buffer(drive);
+		idetape_flush_tape_buffers(drive);
+	}
+
+	if (tape->buf || tape->valid) {
+		printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+		tape->valid = 0;
+	}
+
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (!tape->buf)
+		return -ENOMEM;
+	tape->chrdev_dir = dir;
+	tape->cur = tape->buf;
+
+	/*
+	 * Issue a 0 rw command to ensure that DSC handshake is
+	 * switched from completion mode to buffer available mode.  No
+	 * point in issuing this if DSC overlap isn't supported, some
+	 * drives (Seagate STT3401A) will return an error.
+	 */
+	if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
+		int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
+						  : REQ_IDETAPE_WRITE;
+
+		rc = idetape_queue_rw_tail(drive, cmd, 0);
+		if (rc < 0) {
+			kfree(tape->buf);
+			tape->buf = NULL;
+			tape->chrdev_dir = IDETAPE_DIR_NONE;
+			return rc;
 		}
 	}
 
@@ -1038,7 +1046,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 		return 0;
 
-	idetape_init_read(drive);
+	idetape_init_rw(drive, IDETAPE_DIR_READ);
 
 	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks);
 }
@@ -1195,7 +1203,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
 	}
-	rc = idetape_init_read(drive);
+	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
 	if (rc < 0)
 		return rc;
 	if (count == 0)
@@ -1249,6 +1257,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	ssize_t actually_written = 0;
 	ssize_t ret = 0;
 	u16 ctl = *(u16 *)&tape->caps[12];
+	int rc;
 
 	/* The drive is write protected. */
 	if (tape->write_prot)
@@ -1257,36 +1266,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	/* Initialize write operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
-		if (tape->chrdev_dir == IDETAPE_DIR_READ)
-			ide_tape_discard_merge_buffer(drive, 1);
-		if (tape->buf || tape->valid) {
-			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
-			tape->valid = 0;
-		}
-		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
-		if (!tape->buf)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_WRITE;
-		tape->cur = tape->buf;
-
-		/*
-		 * Issue a write 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode. No
-		 * point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			ssize_t retval = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_WRITE, 0);
-			if (retval < 0) {
-				kfree(tape->buf);
-				tape->buf = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return retval;
-			}
-		}
-	}
+	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
+	if (rc < 0)
+		return rc;
 	if (count == 0)
 		return (0);
 	if (tape->valid < tape->buffer_size) {
-- 
cgit v0.10.2


From 6bb11dd14f70228f8dab25fd25dabeb9bc74926d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-tape: use byte size instead of sectors on rw issue functions

Impact: cleanup

Byte size is what most issue functions deal with, make
idetape_queue_rw_tail() and its wrappers take byte size instead of
sector counts.  idetape_chrdev_read() and write() functions are
converted to use tape->buffer_size instead of ctl from tape->cap.

This cleans up code a little bit and will ease the next r/w
reimplementation.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 4da7fa9..d5e9bb2 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -879,15 +879,15 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	size_t size = blocks * tape->blk_size;
 	struct request *rq;
 	int ret;
 
 	debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
 	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
+	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
@@ -954,17 +954,16 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
 }
 
 /* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
+static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size)
 {
 	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size);
 }
 
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int blocks;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
@@ -972,15 +971,10 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 		return;
 	}
 	if (tape->buf) {
-		blocks = tape->valid / tape->blk_size;
-		if (tape->valid % tape->blk_size) {
-			blocks++;
-			memset(tape->buf + tape->valid, 0,
-			       tape->blk_size - tape->valid % tape->blk_size);
-		}
-		(void) idetape_add_chrdev_write_request(drive, blocks);
-	}
-	if (tape->buf != NULL) {
+		size_t aligned = roundup(tape->valid, tape->blk_size);
+
+		memset(tape->cur, 0, aligned - tape->valid);
+		idetape_add_chrdev_write_request(drive, aligned);
 		kfree(tape->buf);
 		tape->buf = NULL;
 	}
@@ -1038,9 +1032,9 @@ static int idetape_init_rw(ide_drive_t *drive, int dir)
 }
 
 /* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
+static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size)
 {
-	debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
+	debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size);
 
 	/* If we are at a filemark, return a read length of 0 */
 	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
@@ -1048,7 +1042,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 
 	idetape_init_rw(drive, IDETAPE_DIR_READ);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size);
 }
 
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
@@ -1060,8 +1054,7 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 	while (bcount) {
 		unsigned int count = min(tape->buffer_size, bcount);
 
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-				      count / tape->blk_size);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
 		bcount -= count;
 	}
 }
@@ -1193,7 +1186,6 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 	ide_drive_t *drive = tape->drive;
 	ssize_t bytes_read, temp, actually_read = 0, rc;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
 
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
@@ -1218,7 +1210,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		tape->valid -= actually_read;
 	}
 	while (count >= tape->buffer_size) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
+		bytes_read = idetape_add_chrdev_read_request(drive,
+							     tape->buffer_size);
 		if (bytes_read <= 0)
 			goto finish;
 		if (copy_to_user(buf, tape->cur, bytes_read))
@@ -1228,7 +1221,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		actually_read += bytes_read;
 	}
 	if (count) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
+		bytes_read = idetape_add_chrdev_read_request(drive,
+							     tape->buffer_size);
 		if (bytes_read <= 0)
 			goto finish;
 		temp = min((unsigned long)count, (unsigned long)bytes_read);
@@ -1256,7 +1250,6 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	ide_drive_t *drive = tape->drive;
 	ssize_t actually_written = 0;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
 	int rc;
 
 	/* The drive is write protected. */
@@ -1284,7 +1277,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 
 		if (tape->valid == tape->buffer_size) {
 			ssize_t retval;
-			retval = idetape_add_chrdev_write_request(drive, ctl);
+			retval = idetape_add_chrdev_write_request(drive,
+							tape->buffer_size);
 			if (retval <= 0)
 				return (retval);
 		}
@@ -1295,7 +1289,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 			ret = -EFAULT;
 		buf += tape->buffer_size;
 		count -= tape->buffer_size;
-		retval = idetape_add_chrdev_write_request(drive, ctl);
+		retval = idetape_add_chrdev_write_request(drive,
+							  tape->buffer_size);
 		actually_written += tape->buffer_size;
 		if (retval <= 0)
 			return (retval);
-- 
cgit v0.10.2


From 07bd9686c50c2b1f10e48089d4fb836a971f5177 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-tape: simplify read/write functions

Impact: cleanup

idetape_chrdev_read/write() functions are unnecessarily complex when
everything can be handled in a single loop.  Collapse
idetape_add_chrdev_read/write_request() into the rw functions and
simplify the implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d5e9bb2..2599579 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -953,14 +953,6 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
 	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
 }
 
-/* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size)
-{
-	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size);
-}
-
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
@@ -974,7 +966,7 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 		size_t aligned = roundup(tape->valid, tape->blk_size);
 
 		memset(tape->cur, 0, aligned - tape->valid);
-		idetape_add_chrdev_write_request(drive, aligned);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
 		kfree(tape->buf);
 		tape->buf = NULL;
 	}
@@ -1031,20 +1023,6 @@ static int idetape_init_rw(ide_drive_t *drive, int dir)
 	return 0;
 }
 
-/* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size)
-{
-	debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size);
-
-	/* If we are at a filemark, return a read length of 0 */
-	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
-		return 0;
-
-	idetape_init_rw(drive, IDETAPE_DIR_READ);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size);
-}
-
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
@@ -1184,8 +1162,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t bytes_read, temp, actually_read = 0, rc;
+	size_t done = 0;
 	ssize_t ret = 0;
+	int rc;
 
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
@@ -1195,52 +1174,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
 	}
+
 	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
 	if (rc < 0)
 		return rc;
-	if (count == 0)
-		return (0);
-	if (tape->valid) {
-		actually_read = min_t(unsigned int, tape->valid, count);
-		if (copy_to_user(buf, tape->cur, actually_read))
-			ret = -EFAULT;
-		buf += actually_read;
-		count -= actually_read;
-		tape->cur += actually_read;
-		tape->valid -= actually_read;
-	}
-	while (count >= tape->buffer_size) {
-		bytes_read = idetape_add_chrdev_read_request(drive,
-							     tape->buffer_size);
-		if (bytes_read <= 0)
-			goto finish;
-		if (copy_to_user(buf, tape->cur, bytes_read))
-			ret = -EFAULT;
-		buf += bytes_read;
-		count -= bytes_read;
-		actually_read += bytes_read;
-	}
-	if (count) {
-		bytes_read = idetape_add_chrdev_read_request(drive,
-							     tape->buffer_size);
-		if (bytes_read <= 0)
-			goto finish;
-		temp = min((unsigned long)count, (unsigned long)bytes_read);
-		if (copy_to_user(buf, tape->cur, temp))
+
+	while (done < count) {
+		size_t todo;
+
+		/* refill if staging buffer is empty */
+		if (!tape->valid) {
+			/* If we are at a filemark, nothing more to read */
+			if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+				break;
+			/* read */
+			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
+						  tape->buffer_size) <= 0)
+				break;
+		}
+
+		/* copy out */
+		todo = min_t(size_t, count - done, tape->valid);
+		if (copy_to_user(buf + done, tape->cur, todo))
 			ret = -EFAULT;
-		actually_read += temp;
-		tape->cur += temp;
-		tape->valid -= temp;
+
+		tape->cur += todo;
+		tape->valid -= todo;
+		done += todo;
 	}
-finish:
-	if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
+
+	if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
 		debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
 		idetape_space_over_filemarks(drive, MTFSF, 1);
 		return 0;
 	}
 
-	return ret ? ret : actually_read;
+	return ret ? ret : done;
 }
 
 static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
@@ -1248,7 +1218,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t actually_written = 0;
+	size_t done = 0;
 	ssize_t ret = 0;
 	int rc;
 
@@ -1262,47 +1232,28 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
 	if (rc < 0)
 		return rc;
-	if (count == 0)
-		return (0);
-	if (tape->valid < tape->buffer_size) {
-		actually_written = min_t(unsigned int,
-					 tape->buffer_size - tape->valid,
-					 count);
-		if (copy_from_user(tape->cur, buf, actually_written))
-			ret = -EFAULT;
-		buf += actually_written;
-		count -= actually_written;
-		tape->cur += actually_written;
-		tape->valid += actually_written;
-
-		if (tape->valid == tape->buffer_size) {
-			ssize_t retval;
-			retval = idetape_add_chrdev_write_request(drive,
-							tape->buffer_size);
-			if (retval <= 0)
-				return (retval);
-		}
-	}
-	while (count >= tape->buffer_size) {
-		ssize_t retval;
-		if (copy_from_user(tape->cur, buf, tape->buffer_size))
-			ret = -EFAULT;
-		buf += tape->buffer_size;
-		count -= tape->buffer_size;
-		retval = idetape_add_chrdev_write_request(drive,
-							  tape->buffer_size);
-		actually_written += tape->buffer_size;
-		if (retval <= 0)
-			return (retval);
-	}
-	if (count) {
-		actually_written += count;
-		if (copy_from_user(tape->cur, buf, count))
+
+	while (done < count) {
+		size_t todo;
+
+		/* flush if staging buffer is full */
+		if (tape->valid == tape->buffer_size &&
+		    idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+					  tape->buffer_size) <= 0)
+			return rc;
+
+		/* copy in */
+		todo = min_t(size_t, count - done,
+			     tape->buffer_size - tape->valid);
+		if (copy_from_user(tape->cur, buf + done, todo))
 			ret = -EFAULT;
-		tape->cur += count;
-		tape->valid += count;
+
+		tape->cur += todo;
+		tape->valid += todo;
+		done += todo;
 	}
-	return ret ? ret : actually_written;
+
+	return ret ? ret : done;
 }
 
 static int idetape_write_filemark(ide_drive_t *drive)
-- 
cgit v0.10.2


From 6d7003877c2f0578f1c08f66d05c3f72ef4ae596 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-atapi: kill unused fields and callbacks

Impact: remove fields and code paths which are no longer necessary

Now that ide-tape uses standard mechanisms to transfer data, special
case handling for bh handling can be dropped from ide-atapi.  Drop the
followings.

* pc->cur_pos, b_count, bh and b_data
* drive->pc_update_buffers() and pc_io_buffers().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index b9dd450..afe5a43 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else {
+		} else
 			pc->xferred = pc->req_xfer;
-			if (drive->pc_update_buffers)
-				drive->pc_update_buffers(drive, pc);
-		}
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	if (drive->media == ide_tape && pc->bh)
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	else {
-		done = min_t(unsigned int, bcount, cmd->nleft);
-		ide_pio_bytes(drive, cmd, write, done);
-	}
+	done = min_t(unsigned int, bcount, cmd->nleft);
+	ide_pio_bytes(drive, cmd, write, done);
 
-	/* Update the current position */
+	/* Update transferred byte count */
 	pc->xferred += done;
-	pc->cur_pos += done;
 
 	bcount -= done;
 
@@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 
 		/* We haven't transferred any data yet */
 		pc->xferred = 0;
-		pc->cur_pos = pc->buf;
 
 		valid_tf = IDE_VALID_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 2599579..8dfc688 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = NULL;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1957461..34c128f 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -362,11 +362,7 @@ struct ide_atapi_pc {
 
 	/* data buffer */
 	u8 *buf;
-	/* current buffer position */
-	u8 *cur_pos;
 	int buf_size;
-	/* missing/available data on the current buffer */
-	int b_count;
 
 	/* the corresponding request */
 	struct request *rq;
@@ -379,10 +375,6 @@ struct ide_atapi_pc {
 	 */
 	u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-	/* idetape only */
-	struct idetape_bh *bh;
-	char *b_data;
-
 	unsigned long timeout;
 };
 
@@ -595,10 +587,6 @@ struct ide_drive_s {
 	/* callback for packet commands */
 	int  (*pc_callback)(struct ide_drive_s *, int);
 
-	void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-			      unsigned int, int);
-
 	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
 	unsigned long atapi_flags;
-- 
cgit v0.10.2


From 2c316bb57ad4e9f0f3de2d7ef1ae85530c2a7e69 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide: drop rq->data handling from ide_map_sg()

Impact: remove code path which is no longer necessary

All IDE data transfers now use rq->bio.  Simplify ide_map_sg()
accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6e3094e..a0309ea 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -248,11 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 	struct scatterlist *sg = hwif->sg_table;
 	struct request *rq = cmd->rq;
 
-	if (!rq->bio) {
-		sg_init_one(sg, rq->data, rq->data_len);
-		cmd->sg_nents = 1;
-	} else
-		cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
+	cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 }
 EXPORT_SYMBOL_GPL(ide_map_sg);
 
-- 
cgit v0.10.2


From e475eedb09ee9a0fd855f3e923aa9af31c17d141 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 15 Apr 2009 10:50:04 +0000
Subject: clocksource: sh_cmt earlytimer support

Add Early Platform Driver support to the sh_cmt driver
using the earlytimer class.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 1c92c39..02bae39 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -566,9 +566,19 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 static int __devinit sh_cmt_probe(struct platform_device *pdev)
 {
 	struct sh_cmt_priv *p = platform_get_drvdata(pdev);
+	struct sh_cmt_config *cfg = pdev->dev.platform_data;
 	int ret;
 
-	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p) {
+		pr_info("sh_cmt: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	if (is_early_platform_device(pdev))
+		p = alloc_bootmem(sizeof(*p));
+	else
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+
 	if (p == NULL) {
 		dev_err(&pdev->dev, "failed to allocate driver data\n");
 		return -ENOMEM;
@@ -576,7 +586,10 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev)
 
 	ret = sh_cmt_setup(p, pdev);
 	if (ret) {
-		kfree(p);
+		if (is_early_platform_device(pdev))
+			free_bootmem(__pa(p), sizeof(*p));
+		else
+			kfree(p);
 
 		platform_set_drvdata(pdev, NULL);
 	}
@@ -606,6 +619,7 @@ static void __exit sh_cmt_exit(void)
 	platform_driver_unregister(&sh_cmt_device_driver);
 }
 
+early_platform_init("earlytimer", &sh_cmt_device_driver);
 module_init(sh_cmt_init);
 module_exit(sh_cmt_exit);
 
-- 
cgit v0.10.2


From eaab89197b733d0f81f07d6c44294b674479fda8 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 15 Apr 2009 10:50:12 +0000
Subject: sh: arch earlytimer support

Extend the 32-bit SuperH timer code to register and probe
the earlytimer class of Early Platform Drivers.

This registers the sh_cmt driver if compiled-in.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index c34e1e0..04b8c6a 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/clockchips.h>
 #include <linux/mc146818rtc.h>	/* for rtc_lock */
+#include <linux/platform_device.h>
 #include <linux/smp.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
@@ -229,6 +230,14 @@ void __init time_init(void)
 #endif
 
 	/*
+	 * Make sure all compiled-in early timers register themselves.
+	 * Run probe() for one "earlytimer" device.
+	 */
+	early_platform_driver_register_all("earlytimer");
+	if (early_platform_driver_probe("earlytimer", 1, 0))
+		return;
+
+	/*
 	 * Find the timer to use as the system timer, it will be
 	 * initialized for us.
 	 */
-- 
cgit v0.10.2


From 87a00dc059e3af46303f1f56b0e8df41af988c7b Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 15 Apr 2009 10:50:21 +0000
Subject: sh: Add plat_early_device_setup()

Add a plat_early_device_setup() function to allow
processor-specific code to register Early Platform Data.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/device.h b/arch/sh/include/asm/device.h
index efd511d..8688a88 100644
--- a/arch/sh/include/asm/device.h
+++ b/arch/sh/include/asm/device.h
@@ -10,3 +10,5 @@ struct platform_device;
 int platform_resource_setup_memory(struct platform_device *pdev,
 				   char *name, unsigned long memsize);
 
+void plat_early_device_setup(void);
+
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 04a6004..22b976d 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <linux/mmzone.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/platform_device.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
@@ -328,6 +329,10 @@ static int __init parse_elfcorehdr(char *arg)
 early_param("elfcorehdr", parse_elfcorehdr);
 #endif
 
+void __init __attribute__ ((weak)) plat_early_device_setup(void)
+{
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	enable_mmu();
@@ -381,6 +386,8 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
+	plat_early_device_setup();
+
 	sh_mv_setup();
 
 	/*
-- 
cgit v0.10.2


From 28fde6863e6ed1f4bfb7cef080e67bf439c20628 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 15 Apr 2009 10:50:30 +0000
Subject: sh: Early Platform Data for SuperH Mobile

Use plat_early_device_setup() to register Early Platform Data
for SuperH Mobile processors.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index c154938..cb5b4db 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -234,6 +234,16 @@ static int __init sh7343_devices_setup(void)
 }
 __initcall(sh7343_devices_setup);
 
+static struct platform_device *sh7343_early_devices[] __initdata = {
+	&cmt_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7343_early_devices,
+				   ARRAY_SIZE(sh7343_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 93ecf8e..2a771f4 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -226,6 +226,16 @@ static int __init sh7366_devices_setup(void)
 }
 __initcall(sh7366_devices_setup);
 
+static struct platform_device *sh7366_early_devices[] __initdata = {
+	&cmt_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7366_early_devices,
+				   ARRAY_SIZE(sh7366_early_devices));
+}
+
 enum {
 	UNUSED=0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 0e5d204..8f974d0 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -270,6 +270,16 @@ static int __init sh7722_devices_setup(void)
 }
 __initcall(sh7722_devices_setup);
 
+static struct platform_device *sh7722_early_devices[] __initdata = {
+	&cmt_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7722_early_devices,
+				   ARRAY_SIZE(sh7722_early_devices));
+}
+
 enum {
 	UNUSED=0,
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index 5338dac..21a58d6 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -281,6 +281,16 @@ static int __init sh7723_devices_setup(void)
 }
 __initcall(sh7723_devices_setup);
 
+static struct platform_device *sh7723_early_devices[] __initdata = {
+	&cmt_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7723_early_devices,
+				   ARRAY_SIZE(sh7723_early_devices));
+}
+
 enum {
 	UNUSED=0,
 
-- 
cgit v0.10.2


From d9aed8b95f062b2f96e37b0e07373f36a6e7066d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 19 Apr 2009 13:12:04 +0900
Subject: sh: sh7724: Don't default enable the RTC clock.

rtc-sh takes care of this now, so no need to have this always enabled.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 8b87ba8..195274a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -275,7 +275,6 @@ static struct platform_device *sh7724_devices[] __initdata = {
 
 static int __init sh7724_devices_setup(void)
 {
-	clk_always_enable("rtc0");   /* RTC */
 	clk_always_enable("vpu0");   /* VPU */
 	clk_always_enable("veu1");   /* VEU3F1 */
 	clk_always_enable("veu0");   /* VEU3F0 */
-- 
cgit v0.10.2


From 8fb2bae4b41eb64f6e233e9bd3f3a789fbb04a06 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 19 Apr 2009 13:14:29 +0900
Subject: sh: sh7724: Register CMT as an early platform device here too.

Follows the SH7722/SH7723 changes.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 195274a..8429396 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -288,6 +288,16 @@ static int __init sh7724_devices_setup(void)
 }
 device_initcall(sh7724_devices_setup);
 
+static struct platform_device *sh7724_early_devices[] __initdata = {
+	&cmt_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7724_early_devices,
+				   ARRAY_SIZE(sh7724_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From 2b2fd87a6ef56ba7647a578e81bb8c8efda166b8 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 17 Apr 2009 16:42:12 +0800
Subject: docs, x86: add nox2apic back to kernel-parameters.txt

"nox2apic" was removed from kernel-parameters.txt by mistake, when
entries were sorted in alpha order (commit 0cb55ad2). But this early
parameter is still there, add it back to kernel-parameters.txt.

[ Impact: add boot parameter description ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: iommu@lists.linux-foundation.org
Cc: allen.m.kay@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1239957736-6161-2-git-send-email-weidong.han@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6172e43..33989d2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1588,6 +1588,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nowb		[ARM]
 
+	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.
+
 	nptcg=		[IA64] Override max number of concurrent global TLB
 			purges which is reported from either PAL_VM_SUMMARY or
 			SAL PALO.
-- 
cgit v0.10.2


From 5d0ae2db6deac4f15dac4f42f23bc56448fc8d4d Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 17 Apr 2009 16:42:13 +0800
Subject: x86, intr-remap: fix ack for interrupt remapping

Shouldn't call ack_apic_edge() in ir_ack_apic_edge(), because
ack_apic_edge() does more than just ack: it also does irq migration
in the non-interrupt-remapping case. But there is no such need for
interrupt-remapping case, as irq migration is done in the process
context.

Similarly, ir_ack_apic_level() shouldn't call ack_apic_level, and
instead should do the local cpu's EOI + directed EOI to the io-apic.

ack_x2APIC_irq() is not neccessary, because ack_APIC_irq() will use MSR
write for x2apic, and uncached write for non-x2apic.

[ Impact: simplify/standardize intr-remap IRQ acking, fix on !x2apic ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: allen.m.kay@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1239957736-6161-3-git-send-email-weidong.han@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2bd5a46..d4cb7e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -204,14 +204,6 @@ static inline int x2apic_enabled(void)
 
 extern int get_physical_broadcast(void);
 
-#ifdef CONFIG_X86_X2APIC
-static inline void ack_x2APIC_irq(void)
-{
-	/* Docs say use 0 for future compatibility */
-	native_apic_msr_write(APIC_EOI, 0);
-}
-#endif
-
 extern void apic_disable(void);
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8499000..ea22a86 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2552,20 +2552,6 @@ eoi_ioapic_irq(struct irq_desc *desc)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	ack_x2APIC_irq();
-	eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
 static void ack_apic_edge(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2629,9 +2615,6 @@ static void ack_apic_level(unsigned int irq)
 	 */
 	ack_APIC_irq();
 
-	if (irq_remapped(irq))
-		eoi_ioapic_irq(desc);
-
 	/* Now we can move and renable the irq */
 	if (unlikely(do_unmask_irq)) {
 		/* Only migrate the irq if the ack has been received.
@@ -2680,20 +2663,15 @@ static void ack_apic_level(unsigned int irq)
 #ifdef CONFIG_INTR_REMAP
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+	ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	ack_APIC_irq();
+	eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
-- 
cgit v0.10.2


From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 17 Apr 2009 16:42:14 +0800
Subject: x86, intr-remap: enable interrupt remapping early

Currently, when x2apic is not enabled, interrupt remapping
will be enabled in init_dmars(), where it is too late to remap
ioapic interrupts, that is, ioapic interrupts are really in
compatibility mode, not remappable mode.

This patch always enables interrupt remapping before ioapic
setup, it guarantees all interrupts will be remapped when
interrupt remapping is enabled. Thus it doesn't need to set
the compatibility interrupt bit.

[ Impact: refactor intr-remap init sequence, enable fuller remap mode ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: allen.m.kay@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d4cb7e5..fbdd654 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -169,7 +169,6 @@ static inline u64 native_x2apic_icr_read(void)
 extern int x2apic, x2apic_phys;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
-extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
 static inline int x2apic_enabled(void)
 {
@@ -190,18 +189,18 @@ static inline void check_x2apic(void)
 static inline void enable_x2apic(void)
 {
 }
-static inline void enable_IR_x2apic(void)
-{
-}
 static inline int x2apic_enabled(void)
 {
 	return 0;
 }
 
 #define	x2apic	0
+#define	x2apic_preenabled 0
 
 #endif
 
+extern void enable_IR_x2apic(void);
+
 extern int get_physical_broadcast(void);
 
 extern void apic_disable(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 83e47fe..0cf1eea 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -141,6 +141,8 @@ static int x2apic_preenabled;
 static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
+	if (x2apic_enabled())
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
 	disable_x2apic = 1;
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
 	return 0;
@@ -1345,6 +1347,7 @@ void enable_x2apic(void)
 		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
 	}
 }
+#endif /* CONFIG_X86_X2APIC */
 
 void __init enable_IR_x2apic(void)
 {
@@ -1353,32 +1356,21 @@ void __init enable_IR_x2apic(void)
 	unsigned long flags;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
-			"because of nox2apic\n");
-		return;
+	ret = dmar_table_init();
+	if (ret) {
+		pr_debug("dmar_table_init() failed with %d:\n", ret);
+		goto ir_failed;
 	}
 
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
-			"because of skipping io-apic setup\n");
-		return;
+	if (!intr_remapping_supported()) {
+		pr_debug("intr-remapping not supported\n");
+		goto ir_failed;
 	}
 
-	ret = dmar_table_init();
-	if (ret) {
-		pr_info("dmar_table_init() failed with %d:\n", ret);
 
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			pr_info("Not enabling x2apic,Intr-remapping\n");
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		pr_info("Skipped enabling intr-remap because of skipping "
+			"io-apic setup\n");
 		return;
 	}
 
@@ -1398,20 +1390,25 @@ void __init enable_IR_x2apic(void)
 	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
 
-	ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
+#ifdef CONFIG_X86_X2APIC
+	if (cpu_has_x2apic)
+		ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
+	else
+#endif
+		ret = enable_intr_remapping(EIM_8BIT_APIC_ID);
 
 	if (ret)
 		goto end_restore;
 
-	if (!x2apic) {
+	pr_info("Enabled Interrupt-remapping\n");
+
+#ifdef CONFIG_X86_X2APIC
+	if (cpu_has_x2apic && !x2apic) {
 		x2apic = 1;
 		enable_x2apic();
+		pr_info("Enabled x2apic\n");
 	}
+#endif
 
 end_restore:
 	if (ret)
@@ -1426,30 +1423,29 @@ end_restore:
 	local_irq_restore(flags);
 
 end:
-	if (!ret) {
-		if (!x2apic_preenabled)
-			pr_info("Enabled x2apic and interrupt-remapping\n");
-		else
-			pr_info("Enabled Interrupt-remapping\n");
-	} else
-		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
 	if (ioapic_entries)
 		free_ioapic_entries(ioapic_entries);
+
+	if (!ret)
+		return;
+
+ir_failed:
+	if (x2apic_preenabled)
+		panic("x2apic enabled by bios. But IR enabling failed");
+	else if (cpu_has_x2apic)
+		pr_info("Not enabling x2apic,Intr-remapping\n");
 #else
 	if (!cpu_has_x2apic)
 		return;
 
 	if (x2apic_preenabled)
 		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-		" and x2apic\n");
+		      " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
 #endif
 
 	return;
 }
-#endif /* CONFIG_X86_X2APIC */
+
 
 #ifdef CONFIG_X86_64
 /*
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 001b328..9ce8f07 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1968,15 +1968,6 @@ static int __init init_dmars(void)
 		}
 	}
 
-#ifdef CONFIG_INTR_REMAP
-	if (!intr_remapping_enabled) {
-		ret = enable_intr_remapping(0);
-		if (ret)
-			printk(KERN_ERR
-			       "IOMMU: enable interrupt remapping failed\n");
-	}
-#endif
-
 	/*
 	 * For each rmrr
 	 *   for each dev attached to rmrr
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f5e0ea7..5c21426 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -423,20 +423,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 		      readl, (sts & DMA_GSTS_IRTPS), sts);
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-	if (mode == 0) {
-		spin_lock_irqsave(&iommu->register_lock, flags);
-
-		/* enable comaptiblity format interrupt pass through */
-		cmd = iommu->gcmd | DMA_GCMD_CFI;
-		iommu->gcmd |= DMA_GCMD_CFI;
-		writel(cmd, iommu->reg + DMAR_GCMD_REG);
-
-		IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
-			      readl, (sts & DMA_GSTS_CFIS), sts);
-
-		spin_unlock_irqrestore(&iommu->register_lock, flags);
-	}
-
 	/*
 	 * global invalidation of interrupt entry cache before enabling
 	 * interrupt-remapping.
@@ -516,6 +502,20 @@ end:
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+int __init intr_remapping_supported(void)
+{
+	struct dmar_drhd_unit *drhd;
+
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		if (!ecap_ir_support(iommu->ecap))
+			return 0;
+	}
+
+	return 1;
+}
+
 int __init enable_intr_remapping(int eim)
 {
 	struct dmar_drhd_unit *drhd;
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e397dc3..06f592a 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -108,6 +108,7 @@ struct irte {
 };
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
+extern int intr_remapping_supported(void);
 extern int enable_intr_remapping(int);
 extern void disable_intr_remapping(void);
 extern int reenable_intr_remapping(int);
-- 
cgit v0.10.2


From 03ea81550676296d94596e4337c771c6ba29f542 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 17 Apr 2009 16:42:15 +0800
Subject: x86, intr-remap: add option to disable interrupt remapping

Add option "nointremap" to disable interrupt remapping.

[ Impact: add new boot option ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: allen.m.kay@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1239957736-6161-5-git-send-email-weidong.han@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 33989d2..843cb66 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1533,6 +1533,9 @@ and is between 256 and 4096 characters. It is defined in the file
 	noinitrd	[RAM] Tells the kernel not to load any configured
 			initial RAM disk.
 
+	nointremap	[X86-64, Intel-IOMMU] Do not enable interrupt
+			remapping.
+
 	nointroute	[IA-64]
 
 	nojitter	[IA64] Disables jitter checking for ITC timers.
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 5c21426..842039e 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -15,6 +15,14 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
+static int disable_intremap;
+static __init int setup_nointremap(char *str)
+{
+	disable_intremap = 1;
+	return 0;
+}
+early_param("nointremap", setup_nointremap);
+
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
 	u16 irte_index;
@@ -506,6 +514,9 @@ int __init intr_remapping_supported(void)
 {
 	struct dmar_drhd_unit *drhd;
 
+	if (disable_intremap)
+		return 0;
+
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
-- 
cgit v0.10.2


From 9a2755c3569e4db92bd9b1daadeddb4045b0cccd Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 17 Apr 2009 16:42:16 +0800
Subject: x86, intr-remap: fix x2apic/intr-remap resume

Interrupt remapping was decoupled from x2apic. Shouldn't check
x2apic before resume interrupt remapping. Otherwise, interrupt
remapping won't be resumed when x2apic is not enabled.

[ Impact: fix potential intr-remap resume hang on !x2apic ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: allen.m.kay@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1239957736-6161-6-git-send-email-weidong.han@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0cf1eea..7b41a32 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2032,7 +2032,7 @@ static int lapic_resume(struct sys_device *dev)
 		return 0;
 
 	local_irq_save(flags);
-	if (x2apic) {
+	if (intr_remapping_enabled) {
 		ioapic_entries = alloc_ioapic_entries();
 		if (!ioapic_entries) {
 			WARN(1, "Alloc ioapic_entries in lapic resume failed.");
@@ -2048,8 +2048,10 @@ static int lapic_resume(struct sys_device *dev)
 
 		mask_IO_APIC_setup(ioapic_entries);
 		mask_8259A();
-		enable_x2apic();
 	}
+
+	if (x2apic)
+		enable_x2apic();
 #else
 	if (!apic_pm_state.active)
 		return 0;
@@ -2097,10 +2099,12 @@ static int lapic_resume(struct sys_device *dev)
 	apic_read(APIC_ESR);
 
 #ifdef CONFIG_INTR_REMAP
-	if (intr_remapping_enabled)
-		reenable_intr_remapping(EIM_32BIT_APIC_ID);
+	if (intr_remapping_enabled) {
+		if (x2apic)
+			reenable_intr_remapping(EIM_32BIT_APIC_ID);
+		else
+			reenable_intr_remapping(EIM_8BIT_APIC_ID);
 
-	if (x2apic) {
 		unmask_8259A();
 		restore_IO_APIC_setup(ioapic_entries);
 		free_ioapic_entries(ioapic_entries);
@@ -2109,7 +2113,6 @@ static int lapic_resume(struct sys_device *dev)
 
 	local_irq_restore(flags);
 
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From cece3155d869a50ba534ed161b5a05e8a29dcad0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 18 Apr 2009 23:45:28 +0400
Subject: x86: smpboot - wakeup_secondary should be done via __cpuinit section

A caller (do_boot_cpu) already has __cpuinit attribute.

Since HOTPLUG_CPU depends on SMP && HOTPLUG it doesn't
lead to panic at moment.

[ Impact: cleanup ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090418194528.GD25510@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bf8ad63..d2e8de9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid)
  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  * won't ... remember to clear down the APIC, etc later.
  */
-int __devinit
+int __cpuinit
 wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
@@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 	return (send_status | accept_status);
 }
 
-static int __devinit
+static int __cpuinit
 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
 	unsigned long send_status, accept_status = 0;
-- 
cgit v0.10.2


From 667c5296cc76fefe0abcb79228952b28d9af45e3 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 19 Apr 2009 11:43:11 +0400
Subject: x86: es7000, uv - use __cpuinit for kicking secondary cpus

The caller already has __cpuinit attribute.

[ Impact: save memory, address section mismatch warning ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
LKML-Reference: <20090419074311.GA8670@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 1c11b81..8e07c14 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi)
 	return gsi;
 }
 
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 {
 	unsigned long vect = 0, psaival = 0;
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index de1a50a..873bf71 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -91,7 +91,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	cpumask_set_cpu(cpu, retmask);
 }
 
-static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 #ifdef CONFIG_SMP
 	unsigned long val;
-- 
cgit v0.10.2


From e057a5e5647a1c9d0d0054fbd298bfa04b3d1cb4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 Apr 2009 23:38:12 +0200
Subject: tracing/core: Add current context on tracing recursion warning

In case of tracing recursion detection, we only get the stacktrace.
But the current context may be very useful to debug the issue.

This patch adds the softirq/hardirq/nmi context with the warning
using lockdep context display to have a familiar output.

v2: Use printk_once()
v3: drop {hardirq,softirq}_context which depend on lockdep,
    only keep what is part of current->trace_recursion,
    sufficient to debug the warning source.

[ Impact: print context necessary to debug recursion ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b421b0e..bffde63 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1495,6 +1495,13 @@ static int trace_recursive_lock(void)
 	if (unlikely(current->trace_recursion & (1 << level))) {
 		/* Disable all tracing before we do anything else */
 		tracing_off_permanent();
+
+		printk_once(KERN_WARNING "Tracing recursion: "
+			    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+			    hardirq_count() >> HARDIRQ_SHIFT,
+			    softirq_count() >> SOFTIRQ_SHIFT,
+			    in_nmi());
+
 		WARN_ON_ONCE(1);
 		return -1;
 	}
-- 
cgit v0.10.2


From 9ade1217c9ba39ad2f004a898ddfbb815fd5fe74 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 15:38:25 +0900
Subject: sh: pci: Drop asm-generic/pci.h, so we can use our own fixups.

The new PCI code wants its own bus<->resource mappings instead of the
generic equivalents, so drop the asm-generic include in preparation.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 8aaacc9..6d659cd 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -72,6 +72,20 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 	pci_read_bridge_bases(bus);
 }
 
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			     struct resource *res)
+{
+	region->start = res->start;
+	region->end = res->end;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			     struct pci_bus_region *region)
+{
+	res->start = region->start;
+	res->end = region->end;
+}
+
 void pcibios_align_resource(void *data, struct resource *res,
 			    resource_size_t size, resource_size_t align)
 			    __attribute__ ((weak));
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index bb2c2fc..69cb615 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -146,13 +146,34 @@ int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin);
 int pciauto_assign_resources(int busno, struct pci_channel *hose);
 #endif
 
-#endif /* __KERNEL__ */
+extern void pcibios_resource_to_bus(struct pci_dev *dev,
+	struct pci_bus_region *region, struct resource *res);
+
+extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+				    struct pci_bus_region *region);
+
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
 
-/* generic pci stuff */
-#include <asm-generic/pci.h>
+	return root;
+}
+
+/* Chances are this interrupt is wired PC-style ...  */
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? 15 : 14;
+}
 
 /* generic DMA-mapping stuff */
 #include <asm-generic/pci-dma-compat.h>
 
+#endif /* __KERNEL__ */
 #endif /* __ASM_SH_PCI_H */
 
-- 
cgit v0.10.2


From 4c5107e44514a9bde74d0af77982705d602d9e39 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 15:43:36 +0900
Subject: sh: pci: Split out new-style PCI core.

This splits off a 'pci-new.c' which is aimed at gradually replacing the
pci-auto backend and the arch/sh/drivers/pci/pci.c core respectively.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index 7e816ed..efe8cf9 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -18,10 +18,17 @@ config SH_PCIDMA_NONCOHERENT
 	  bridge integrated with your SH CPU, refer carefully to the chip specs
 	  to see if you can say 'N' here. Otherwise, leave it as 'Y'.
 
+# Temporary config option for transitioning off of PCI_AUTO
+config PCI_NEW
+	bool
+	depends on PCI
+	default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
+		     CPU_SUBTYPE_SH7785
+
 # This is also board-specific
 config PCI_AUTO
 	bool
-	depends on PCI
+	depends on PCI && !PCI_NEW
 	default y
 
 config PCI_AUTO_UPDATE_RESOURCES
@@ -34,4 +41,3 @@ config PCI_AUTO_UPDATE_RESOURCES
 	  for some reason, you have a board that simply refuses to work
 	  with its resources updated beyond what they are when the device
 	  is powered up, set this to N. Everyone else will want this as Y.
-
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 5003971..362a1ee 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -2,8 +2,8 @@
 # Makefile for the PCI specific kernel interface routines under Linux.
 #
 
-obj-y					+= pci.o
-obj-$(CONFIG_PCI_AUTO)			+= pci-auto.o
+obj-$(CONFIG_PCI_AUTO)			:= pci.o pci-auto.o
+obj-$(CONFIG_PCI_NEW)			:= pci-new.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7751)	+= pci-sh7751.o ops-sh4.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7751R)	+= pci-sh7751.o ops-sh4.o
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
new file mode 100644
index 0000000..097eb88
--- /dev/null
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -0,0 +1,248 @@
+/*
+ * New-style PCI core.
+ *
+ * Copyright (c) 2002  M. R. Brown
+ * Copyright (c) 2004 - 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/dma-debug.h>
+#include <linux/io.h>
+
+static int __init pcibios_init(void)
+{
+	struct pci_channel *p;
+	struct pci_bus *bus;
+	int busno;
+
+	/* init channels */
+	busno = 0;
+	for (p = board_pci_channels; p->init; p++) {
+		if (p->init(p) == 0)
+			p->enabled = 1;
+		else
+			pr_err("Unable to init pci channel %d\n", busno);
+		busno++;
+	}
+
+	/* scan the buses */
+	busno = 0;
+	for (p = board_pci_channels; p->init; p++) {
+		if (p->enabled) {
+			bus = pci_scan_bus(busno, p->pci_ops, p);
+			busno = bus->subordinate + 1;
+
+			pci_bus_size_bridges(bus);
+			pci_bus_assign_resources(bus);
+			pci_enable_bridges(bus);
+		}
+	}
+
+	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
+
+	dma_debug_add_bus(&pci_bus_type);
+
+	return 0;
+}
+subsys_initcall(pcibios_init);
+
+static void pcibios_fixup_device_resources(struct pci_dev *dev,
+	struct pci_bus *bus)
+{
+	/* Update device resources.  */
+	struct pci_channel *chan = bus->sysdata;
+	unsigned long offset = 0;
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		if (!dev->resource[i].start)
+			continue;
+		if (dev->resource[i].flags & IORESOURCE_PCI_FIXED)
+			continue;
+		if (dev->resource[i].flags & IORESOURCE_IO)
+			offset = chan->io_base;
+		else if (dev->resource[i].flags & IORESOURCE_MEM)
+			offset = 0;
+
+		dev->resource[i].start += offset;
+		dev->resource[i].end += offset;
+	}
+}
+
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+	struct list_head *ln;
+	struct pci_channel *chan = bus->sysdata;
+
+	if (!dev) {
+		bus->resource[0] = chan->io_resource;
+		bus->resource[1] = chan->mem_resource;
+	}
+
+	for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
+		dev = pci_dev_b(ln);
+
+		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			pcibios_fixup_device_resources(dev, bus);
+	}
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			 struct resource *res)
+{
+	struct pci_channel *chan = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = chan->io_base;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = 0;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region)
+{
+	struct pci_channel *chan = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = chan->io_base;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = 0;
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+
+void pcibios_align_resource(void *data, struct resource *res,
+			    resource_size_t size, resource_size_t align)
+			    __attribute__ ((weak));
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+	if (res->flags & IORESOURCE_IO) {
+		resource_size_t start = res->start;
+
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	}
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for(idx=0; idx<6; idx++) {
+		if (!(mask & (1 << idx)))
+			continue;
+		r = &dev->resource[idx];
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available because "
+			       "of resource collisions\n", pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (dev->resource[PCI_ROM_RESOURCE].start)
+		cmd |= PCI_COMMAND_MEMORY;
+	if (cmd != old_cmd) {
+		printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n",
+		       pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check and set
+ *  the latency timer as it may not be properly set.
+ */
+static unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
+	       pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (unlikely(!len || !start))
+		return NULL;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+
+	/*
+	 * Presently the IORESOURCE_MEM case is a bit special, most
+	 * SH7751 style PCI controllers have PCI memory at a fixed
+	 * location in the address space where no remapping is desired.
+	 * With the IORESOURCE_MEM case more care has to be taken
+	 * to inhibit page table mapping for legacy cores, but this is
+	 * punted off to __ioremap().
+	 *					-- PFM.
+	 */
+	if (flags & IORESOURCE_IO)
+		return ioport_map(start, len);
+	if (flags & IORESOURCE_MEM)
+		return ioremap(start, len);
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+EXPORT_SYMBOL(board_pci_channels);
-- 
cgit v0.10.2


From 9833385131fc4e8c52f95320ab899051d1c06831 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 15:51:45 +0900
Subject: sh: pci: HAVE_PCI_MMAP support.

Derived from the MIPS version, now uses pgprot_noncached().

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 362a1ee..c8eab14 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -1,9 +1,9 @@
 #
 # Makefile for the PCI specific kernel interface routines under Linux.
 #
-
-obj-$(CONFIG_PCI_AUTO)			:= pci.o pci-auto.o
-obj-$(CONFIG_PCI_NEW)			:= pci-new.o
+obj-y					+= pci-lib.o
+obj-$(CONFIG_PCI_AUTO)			+= pci.o pci-auto.o
+obj-$(CONFIG_PCI_NEW)			+= pci-new.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7751)	+= pci-sh7751.o ops-sh4.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7751R)	+= pci-sh7751.o ops-sh4.o
diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
new file mode 100644
index 0000000..1a43a35
--- /dev/null
+++ b/arch/sh/drivers/pci/pci-lib.c
@@ -0,0 +1,26 @@
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	/*
+	 * I/O space can be accessed via normal processor loads and stores on
+	 * this platform but for now we elect not to do this and portable
+	 * drivers should not do this anyway.
+	 */
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/*
+	 * Ignore write-combine; for now only return uncached mappings.
+	 */
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 69cb615..46afd44 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -40,6 +40,9 @@ extern struct pci_channel board_pci_channels[];
 
 struct pci_dev;
 
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+	enum pci_mmap_state mmap_state, int write_combine);
 extern void pcibios_set_master(struct pci_dev *dev);
 
 static inline void pcibios_penalize_isa_irq(int irq, int active)
-- 
cgit v0.10.2


From a3c0e0d0032d5bbfd7dc04827a257c717d432a5b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 16:14:29 +0900
Subject: sh: pci: Consolidate pcibios_align_resource() definitions.

This introduces a saner pcibios_align_resource() that can be used
regardless of whether pci-auto or pci-new are being used, and
consolidates it in pci-lib.c.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-se/7751/pci.c b/arch/sh/boards/mach-se/7751/pci.c
index 203b292..9ec64a4 100644
--- a/arch/sh/boards/mach-se/7751/pci.c
+++ b/arch/sh/boards/mach-se/7751/pci.c
@@ -30,6 +30,9 @@
 #define PCIC_WRITE(x,v) writel((v), PCI_REG(x))
 #define PCIC_READ(x) readl(PCI_REG(x))
 
+#define xPCIBIOS_MIN_IO		board_pci_channels->io_resource->start
+#define xPCIBIOS_MIN_MEM	board_pci_channels->mem_resource->start
+
 /*
  * Description:  This function sets up and initializes the pcic, sets
  * up the BARS, maps the DRAM into the address space etc, etc.
@@ -97,12 +100,12 @@ int __init pcibios_init_platform(void)
     * meaning all calls go straight through... use BUG_ON to
     * catch erroneous assumption.
     */
-   BUG_ON(PCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE);
+   BUG_ON(xPCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE);
 
-   PCIC_WRITE(SH7751_PCIMBR, PCIBIOS_MIN_MEM);
+   PCIC_WRITE(SH7751_PCIMBR, xPCIBIOS_MIN_MEM);
 
    /* Set IOBR for window containing area specified in pci.h */
-   PCIC_WRITE(SH7751_PCIIOBR, (PCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK));
+   PCIC_WRITE(SH7751_PCIIOBR, (xPCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK));
 
    /* All done, may as well say so... */
    printk("SH7751 PCI: Finished initialization of the PCI controller\n");
diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
index 1a43a35..8ab1a2d 100644
--- a/arch/sh/drivers/pci/pci-lib.c
+++ b/arch/sh/drivers/pci/pci-lib.c
@@ -4,6 +4,41 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 
+unsigned long PCIBIOS_MIN_IO = 0x0000;
+unsigned long PCIBIOS_MIN_MEM = 0;
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	struct pci_channel *chan = dev->sysdata;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO) {
+		if (start < PCIBIOS_MIN_IO + chan->io_resource->start)
+			start = PCIBIOS_MIN_IO + chan->io_resource->start;
+
+		/*
+                 * Put everything into 0x00-0xff region modulo 0x400.
+		 */
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	} else if (res->flags & IORESOURCE_MEM) {
+		if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start)
+			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
+	}
+
+	res->start = start;
+}
+
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
@@ -24,3 +59,10 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			       vma->vm_end - vma->vm_start,
 			       vma->vm_page_prot);
 }
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+EXPORT_SYMBOL(PCIBIOS_MIN_IO);
+EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
+#endif
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index 097eb88..4e9251f 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -129,29 +129,6 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->end = region->end + offset;
 }
 
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-			    __attribute__ ((weak));
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-{
-	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
-
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	}
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 6d659cd..f670988 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -86,29 +86,6 @@ void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->end = region->end;
 }
 
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-			    __attribute__ ((weak));
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-{
-	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
-
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	}
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 46afd44..5212bf6 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -33,10 +33,7 @@ struct pci_channel {
  */
 extern struct pci_channel board_pci_channels[];
 
-/* ugly as hell, but makes drivers/pci/setup-res.c compile and work */
-#define __PCI_CHAN(bus)		((struct pci_channel *)bus->sysdata)
-#define PCIBIOS_MIN_IO		__PCI_CHAN(bus)->io_resource->start
-#define PCIBIOS_MIN_MEM		__PCI_CHAN(bus)->mem_resource->start
+extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM;
 
 struct pci_dev;
 
-- 
cgit v0.10.2


From 394b6d2fe624246e258a218dac68d44fe9a8411f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 16:18:46 +0900
Subject: sh: pci: Kill off unused pcibios_fixup().

This is left over cruft that hasn't been used by anything in a long time,
kill off bits that weren't purged previously.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
index dd2c5df..b64f2b9 100644
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ b/arch/sh/drivers/pci/ops-snapgear.c
@@ -85,8 +85,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 
 	return irq;
 }
-
-void __init pcibios_fixup(void)
-{
-	/* Nothing to fixup .. */
-}
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 5212bf6..e8265fd 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -138,7 +138,6 @@ static inline void __iomem *__get_pci_io_base(unsigned long port,
 #endif
 
 /* Board-specific fixup routines. */
-void pcibios_fixup(void);
 int pcibios_init_platform(void);
 int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin);
 
-- 
cgit v0.10.2


From 0bb34a6bf1f71d5ad2abfda582a2c2794957bc7b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 16:38:00 +0900
Subject: sh: pci: Consolidate pci_iomap() and use the generic I/O base.

This consolidates the pci_iomap() definitions and reworks how the I/O
port base is handled. PCI channels can register their own I/O map base,
or if none is provided, the system-wide generic I/O base is used instead.

Functionally nothing changes, while this allows us to kill off lots of
I/O address special casing and lookups.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
index 8ab1a2d..654ffcc 100644
--- a/arch/sh/drivers/pci/pci-lib.c
+++ b/arch/sh/drivers/pci/pci-lib.c
@@ -60,6 +60,57 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			       vma->vm_page_prot);
 }
 
+static void __iomem *ioport_map_pci(struct pci_dev *dev,
+				    unsigned long port, unsigned int nr)
+{
+	struct pci_channel *chan = dev->sysdata;
+
+	if (!chan->io_map_base)
+		chan->io_map_base = generic_io_base;
+
+	return (void __iomem *)(chan->io_map_base + port);
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (unlikely(!len || !start))
+		return NULL;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+
+	if (flags & IORESOURCE_IO)
+		return ioport_map_pci(dev, start, len);
+
+	/*
+	 * Presently the IORESOURCE_MEM case is a bit special, most
+	 * SH7751 style PCI controllers have PCI memory at a fixed
+	 * location in the address space where no remapping is desired.
+	 * With the IORESOURCE_MEM case more care has to be taken
+	 * to inhibit page table mapping for legacy cores, but this is
+	 * punted off to __ioremap().
+	 *					-- PFM.
+	 */
+	if (flags & IORESOURCE_MEM) {
+		if (flags & IORESOURCE_CACHEABLE)
+			return ioremap(start, len);
+
+		return ioremap_nocache(start, len);
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
 #ifdef CONFIG_HOTPLUG
 EXPORT_SYMBOL(pcibios_resource_to_bus);
 EXPORT_SYMBOL(pcibios_bus_to_resource);
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index 4e9251f..c92e650 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -187,39 +187,4 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (unlikely(!len || !start))
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-
-	/*
-	 * Presently the IORESOURCE_MEM case is a bit special, most
-	 * SH7751 style PCI controllers have PCI memory at a fixed
-	 * location in the address space where no remapping is desired.
-	 * With the IORESOURCE_MEM case more care has to be taken
-	 * to inhibit page table mapping for legacy cores, but this is
-	 * punted off to __ioremap().
-	 *					-- PFM.
-	 */
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM)
-		return ioremap(start, len);
-
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{
-	iounmap(addr);
-}
-EXPORT_SYMBOL(pci_iounmap);
-
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index f670988..d39f240 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -144,39 +144,4 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (unlikely(!len || !start))
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-
-	/*
-	 * Presently the IORESOURCE_MEM case is a bit special, most
-	 * SH7751 style PCI controllers have PCI memory at a fixed
-	 * location in the address space where no remapping is desired.
-	 * With the IORESOURCE_MEM case more care has to be taken
-	 * to inhibit page table mapping for legacy cores, but this is
-	 * punted off to __ioremap().
-	 *					-- PFM.
-	 */
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM)
-		return ioremap(start, len);
-
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{
-	iounmap(addr);
-}
-EXPORT_SYMBOL(pci_iounmap);
-
 EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index e8265fd..5324282 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -26,6 +26,8 @@ struct pci_channel {
 	int enabled;
 	unsigned long reg_base;
 	unsigned long io_base;
+
+	unsigned long io_map_base;
 };
 
 /*
@@ -110,31 +112,11 @@ static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
 	}
 	return 0;
 }
-
-static inline void __iomem *__get_pci_io_base(unsigned long port,
-					      unsigned long size)
-{
-	struct pci_channel *p;
-	struct resource *res;
-
-	for (p = board_pci_channels; p->init; p++) {
-		res = p->io_resource;
-		if (p->enabled && (port >= res->start) &&
-		    (port + size) <= (res->end + 1))
-			return (void __iomem *)(p->io_base + port);
-	}
-	return NULL;
-}
 #else
 static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
 {
 	return 0;
 }
-static inline void __iomem *__get_pci_io_base(unsigned long port,
-					      unsigned long size)
-{
-	return NULL;
-}
 #endif
 
 /* Board-specific fixup routines. */
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index 59fb020..4f85fff 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -70,10 +70,6 @@ void __iomem *ioport_map(unsigned long port, unsigned int nr)
 	if (ret)
 		return ret;
 
-	ret = __get_pci_io_base(port, nr);
-	if (ret)
-		return ret;
-
 	return __ioport_map(port, nr);
 }
 EXPORT_SYMBOL(ioport_map);
-- 
cgit v0.10.2


From 67667263674663767ddf4250bab2437a00ee780e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 20 Apr 2009 10:49:25 +0200
Subject: ALSA: hda - Fix channels_max setting for CA0110

Added the missing definition of max channels for CA0110, which resulted
in an error at opening PCM devices.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c
index 7ec41da..9398d92 100644
--- a/sound/pci/hda/patch_ca0110.c
+++ b/sound/pci/hda/patch_ca0110.c
@@ -309,7 +309,7 @@ static int ca0110_build_pcms(struct hda_codec *codec)
 	info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ca0110_pcm_analog_playback;
 	info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dacs[0];
 	info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max =
-		spec->multiout.num_dacs * 2;
+		spec->multiout.max_channels;
 	info->stream[SNDRV_PCM_STREAM_CAPTURE] = ca0110_pcm_analog_capture;
 	info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = spec->num_inputs;
 	info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0];
@@ -418,6 +418,7 @@ static void parse_line_outs(struct hda_codec *codec)
 	}
 	spec->multiout.dac_nids = spec->dacs;
 	spec->multiout.num_dacs = n;
+	spec->multiout.max_channels = n * 2;
 }
 
 static void parse_hp_out(struct hda_codec *codec)
-- 
cgit v0.10.2


From 7670dc41b51983b369f9adfb8694a580e7b0cef2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 20 Apr 2009 10:51:11 +0200
Subject: ALSA: hda - Use snd_hda_codec_get_pincfg() in patch_ca0110.c

Use the new function to reduce the access and allow the user setup
via sysfs, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c
index 9398d92..392d108 100644
--- a/sound/pci/hda/patch_ca0110.c
+++ b/sound/pci/hda/patch_ca0110.c
@@ -408,8 +408,7 @@ static void parse_line_outs(struct hda_codec *codec)
 	n = 0;
 	for (i = 0; i < cfg->line_outs; i++) {
 		nid = cfg->line_out_pins[i];
-		def_conf = snd_hda_codec_read(codec, nid, 0,
-					      AC_VERB_GET_CONFIG_DEFAULT, 0);
+		def_conf = snd_hda_codec_get_pincfg(codec, nid);
 		if (!def_conf)
 			continue; /* invalid pin */
 		if (snd_hda_get_connections(codec, nid, &spec->dacs[i], 1) != 1)
@@ -432,8 +431,7 @@ static void parse_hp_out(struct hda_codec *codec)
 	if (!cfg->hp_outs)
 		return;
 	nid = cfg->hp_pins[0];
-	def_conf = snd_hda_codec_read(codec, nid, 0,
-				      AC_VERB_GET_CONFIG_DEFAULT, 0);
+	def_conf = snd_hda_codec_get_pincfg(codec, nid);
 	if (!def_conf) {
 		cfg->hp_outs = 0;
 		return;
-- 
cgit v0.10.2


From f3b9aae16219aaeca2dd5a9ca69f7a10faa063df Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 Apr 2009 23:39:33 +0200
Subject: tracing/ring-buffer: Add unlock recursion protection on discard

The pair of helpers trace_recursive_lock() and trace_recursive_unlock()
have been introduced recently to provide generic tracing recursion
protection.

They are used in a symetric way:

 - trace_recursive_lock() on buffer reserve
 - trace_recursive_unlock() on buffer commit

However sometimes, we don't commit but discard on entry
to the buffer, ie: in case of filter checking.

Then we must also unlock the recursion protection on discard time,
otherwise the tracing gets definitely deactivated and a warning
is raised spuriously, such as:

111.119821] ------------[ cut here ]------------
[  111.119829] WARNING: at kernel/trace/ring_buffer.c:1498 ring_buffer_lock_reserve+0x1b7/0x1d0()
[  111.119835] Hardware name: AMILO Li 2727
[  111.119839] Modules linked in:
[  111.119846] Pid: 5731, comm: Xorg Tainted: G        W  2.6.30-rc1 #69
[  111.119851] Call Trace:
[  111.119863]  [<ffffffff8025ce68>] warn_slowpath+0xd8/0x130
[  111.119873]  [<ffffffff8028a30f>] ? __lock_acquire+0x19f/0x1ae0
[  111.119882]  [<ffffffff8028a30f>] ? __lock_acquire+0x19f/0x1ae0
[  111.119891]  [<ffffffff802199b0>] ? native_sched_clock+0x20/0x70
[  111.119899]  [<ffffffff80286dee>] ? put_lock_stats+0xe/0x30
[  111.119906]  [<ffffffff80286eb8>] ? lock_release_holdtime+0xa8/0x150
[  111.119913]  [<ffffffff802c8ae7>] ring_buffer_lock_reserve+0x1b7/0x1d0
[  111.119921]  [<ffffffff802cd110>] trace_buffer_lock_reserve+0x30/0x70
[  111.119930]  [<ffffffff802ce000>] trace_current_buffer_lock_reserve+0x20/0x30
[  111.119939]  [<ffffffff802474e8>] ftrace_raw_event_sched_switch+0x58/0x100
[  111.119948]  [<ffffffff808103b7>] __schedule+0x3a7/0x4cd
[  111.119957]  [<ffffffff80211b56>] ? ftrace_call+0x5/0x2b
[  111.119964]  [<ffffffff80211b56>] ? ftrace_call+0x5/0x2b
[  111.119971]  [<ffffffff80810c08>] schedule+0x18/0x40
[  111.119977]  [<ffffffff80810e09>] preempt_schedule+0x39/0x60
[  111.119985]  [<ffffffff80813bd3>] _read_unlock+0x53/0x60
[  111.119993]  [<ffffffff807259d2>] sock_def_readable+0x72/0x80
[  111.120002]  [<ffffffff807ad5ed>] unix_stream_sendmsg+0x24d/0x3d0
[  111.120011]  [<ffffffff807219a3>] sock_aio_write+0x143/0x160
[  111.120019]  [<ffffffff80211b56>] ? ftrace_call+0x5/0x2b
[  111.120026]  [<ffffffff80721860>] ? sock_aio_write+0x0/0x160
[  111.120033]  [<ffffffff80721860>] ? sock_aio_write+0x0/0x160
[  111.120042]  [<ffffffff8031c283>] do_sync_readv_writev+0xf3/0x140
[  111.120049]  [<ffffffff80211b56>] ? ftrace_call+0x5/0x2b
[  111.120057]  [<ffffffff80276ff0>] ? autoremove_wake_function+0x0/0x40
[  111.120067]  [<ffffffff8045d489>] ? cap_file_permission+0x9/0x10
[  111.120074]  [<ffffffff8045c1e6>] ? security_file_permission+0x16/0x20
[  111.120082]  [<ffffffff8031cab4>] do_readv_writev+0xd4/0x1f0
[  111.120089]  [<ffffffff80211b56>] ? ftrace_call+0x5/0x2b
[  111.120097]  [<ffffffff80211b56>] ? ftrace_call+0x5/0x2b
[  111.120105]  [<ffffffff8031cc18>] vfs_writev+0x48/0x70
[  111.120111]  [<ffffffff8031cd65>] sys_writev+0x55/0xc0
[  111.120119]  [<ffffffff80211e32>] system_call_fastpath+0x16/0x1b
[  111.120125] ---[ end trace 15605f4e98d5ccb5 ]---

[ Impact: fix spurious warning triggering tracing shutdown ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bffde63..e145969 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1642,6 +1642,14 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
+static inline void rb_event_discard(struct ring_buffer_event *event)
+{
+	event->type = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	if (!event->time_delta)
+		event->time_delta = 1;
+}
+
 /**
  * ring_buffer_event_discard - discard any event in the ring buffer
  * @event: the event to discard
@@ -1656,10 +1664,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
  */
 void ring_buffer_event_discard(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
-	/* time delta must be non zero */
-	if (!event->time_delta)
-		event->time_delta = 1;
+	rb_event_discard(event);
+	trace_recursive_unlock();
 }
 EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
 
@@ -1690,7 +1696,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	int cpu;
 
 	/* The event is discarded regardless */
-	ring_buffer_event_discard(event);
+	rb_event_discard(event);
 
 	/*
 	 * This must only be called if the event has not been
@@ -1735,6 +1741,8 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	if (rb_is_commit(cpu_buffer, event))
 		rb_set_commit_to_write(cpu_buffer);
 
+	trace_recursive_unlock();
+
 	/*
 	 * Only the last preempt count needs to restore preemption.
 	 */
-- 
cgit v0.10.2


From 99f95f117848088f2708b45c70be73152e78bb8a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 18:24:57 +0900
Subject: sh: pci: Rework fixed region checks in ioremap().

Not all PCI channels have non-translatable memory windows, this is a
special property of the on-chip PCIC with its 0xfd00... mapping, handle
this explicitly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 5324282..82a9369 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -90,7 +90,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
 #endif
 
-#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -99,24 +98,18 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strategy_parameter = ~0UL;
 }
 
-static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
-{
-	struct pci_channel *p;
-	struct resource *res;
-
-	for (p = board_pci_channels; p->init; p++) {
-		res = p->mem_resource;
-		if (p->enabled && (phys_addr >= res->start) &&
-		    (phys_addr + size) <= (res->end + 1))
-			return 1;
-	}
-	return 0;
-}
+#ifdef CONFIG_SUPERH32
+/*
+ * If we're on an SH7751 or SH7780 PCI controller, PCI memory is mapped
+ * at the end of the address space in a special non-translatable area.
+ */
+#define PCI_MEM_FIXED_START	0xfd000000
+#define PCI_MEM_FIXED_END	(PCI_MEM_FIXED_START + 0x01000000)
+
+#define is_pci_memory_fixed_range(s, e)	\
+	((s) >= PCI_MEM_FIXED_START && (e) < PCI_MEM_FIXED_END)
 #else
-static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size)
-{
-	return 0;
-}
+#define is_pci_memory_fixed_range(s, e)	(0)
 #endif
 
 /* Board-specific fixup routines. */
diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c
index 7e04cc8..da2f418 100644
--- a/arch/sh/mm/ioremap_32.c
+++ b/arch/sh/mm/ioremap_32.c
@@ -46,17 +46,15 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 		return NULL;
 
 	/*
-	 * If we're on an SH7751 or SH7780 PCI controller, PCI memory is
-	 * mapped at the end of the address space (typically 0xfd000000)
-	 * in a non-translatable area, so mapping through page tables for
-	 * this area is not only pointless, but also fundamentally
-	 * broken. Just return the physical address instead.
+	 * If we're in the fixed PCI memory range, mapping through page
+	 * tables is not only pointless, but also fundamentally broken.
+	 * Just return the physical address instead.
 	 *
 	 * For boards that map a small PCI memory aperture somewhere in
 	 * P1/P2 space, ioremap() will already do the right thing,
 	 * and we'll never get this far.
 	 */
-	if (__is_pci_memory(phys_addr, size))
+	if (is_pci_memory_fixed_range(phys_addr, size))
 		return (void __iomem *)phys_addr;
 
 #if !defined(CONFIG_PMB_FIXED)
@@ -121,7 +119,9 @@ void __iounmap(void __iomem *addr)
 	unsigned long seg = PXSEG(vaddr);
 	struct vm_struct *p;
 
-	if (seg < P3SEG || vaddr >= P3_ADDR_MAX || __is_pci_memory(vaddr, 0))
+	if (seg < P3SEG || vaddr >= P3_ADDR_MAX)
+		return;
+	if (is_pci_memory_fixed_range(vaddr, 0))
 		return;
 
 #ifdef CONFIG_PMB
-- 
cgit v0.10.2


From e79066a659b893debe19010179d3f3f015d76d1c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 18:29:22 +0900
Subject: sh: pci: New-style controller registration.

This moves off of the board_pci_channels[] approach for bus registration
and over to a cleaner register_pci_controller(), all derived from the
MIPS code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index c92e650..78b7292 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -13,40 +13,90 @@
 #include <linux/init.h>
 #include <linux/dma-debug.h>
 #include <linux/io.h>
+#include <linux/mutex.h>
 
-static int __init pcibios_init(void)
+/*
+ * The PCI controller list.
+ */
+static struct pci_channel *hose_head, **hose_tail = &hose_head;
+
+static int pci_initialized;
+
+static void __devinit pcibios_scanbus(struct pci_channel *hose)
 {
-	struct pci_channel *p;
+	static int next_busno;
 	struct pci_bus *bus;
-	int busno;
-
-	/* init channels */
-	busno = 0;
-	for (p = board_pci_channels; p->init; p++) {
-		if (p->init(p) == 0)
-			p->enabled = 1;
-		else
-			pr_err("Unable to init pci channel %d\n", busno);
-		busno++;
+
+	/* Catch botched conversion attempts */
+	BUG_ON(hose->init);
+
+	bus = pci_scan_bus(next_busno, hose->pci_ops, hose);
+	if (bus) {
+		next_busno = bus->subordinate + 1;
+		/* Don't allow 8-bit bus number overflow inside the hose -
+		   reserve some space for bridges. */
+		if (next_busno > 224)
+			next_busno = 0;
+
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+		pci_enable_bridges(bus);
 	}
+}
 
-	/* scan the buses */
-	busno = 0;
-	for (p = board_pci_channels; p->init; p++) {
-		if (p->enabled) {
-			bus = pci_scan_bus(busno, p->pci_ops, p);
-			busno = bus->subordinate + 1;
+static DEFINE_MUTEX(pci_scan_mutex);
 
-			pci_bus_size_bridges(bus);
-			pci_bus_assign_resources(bus);
-			pci_enable_bridges(bus);
-		}
+void __devinit register_pci_controller(struct pci_channel *hose)
+{
+	if (request_resource(&iomem_resource, hose->mem_resource) < 0)
+		goto out;
+	if (request_resource(&ioport_resource, hose->io_resource) < 0) {
+		release_resource(hose->mem_resource);
+		goto out;
+	}
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	/*
+	 * Do not panic here but later - this might hapen before console init.
+	 */
+	if (!hose->io_map_base) {
+		printk(KERN_WARNING
+		       "registering PCI controller with io_map_base unset\n");
+	}
+
+	/*
+	 * Scan the bus if it is register after the PCI subsystem
+	 * initialization.
+	 */
+	if (pci_initialized) {
+		mutex_lock(&pci_scan_mutex);
+		pcibios_scanbus(hose);
+		mutex_unlock(&pci_scan_mutex);
 	}
 
+	return;
+
+out:
+	printk(KERN_WARNING
+	       "Skipping PCI bus scan due to resource conflict\n");
+}
+
+static int __init pcibios_init(void)
+{
+	struct pci_channel *hose;
+
+	/* Scan all of the recorded PCI controllers.  */
+	for (hose = hose_head; hose; hose = hose->next)
+		pcibios_scanbus(hose);
+
 	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
 
 	dma_debug_add_bus(&pci_bus_type);
 
+	pci_initialized = 1;
+
 	return 0;
 }
 subsys_initcall(pcibios_init);
@@ -74,7 +124,6 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev,
 	}
 }
 
-
 /*
  *  Called after each bus is probed, but before its children
  *  are examined.
@@ -186,5 +235,3 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
-
-EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index f02d9df..4dd6e3b 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -15,11 +15,47 @@
 #include <linux/delay.h>
 #include "pci-sh4.h"
 
-static int __init sh7780_pci_init(struct pci_channel *chan)
+extern u8 pci_cache_line_size;
+
+static struct resource sh7785_io_resource = {
+	.name	= "SH7785_IO",
+	.start	= SH7780_PCI_IO_BASE,
+	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource sh7785_mem_resource = {
+	.name	= "SH7785_mem",
+	.start	= SH7780_PCI_MEMORY_BASE,
+	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_channel sh7780_pci_controller = {
+	.pci_ops	= &sh4_pci_ops,
+	.mem_resource	= &sh7785_mem_resource,
+	.io_resource	= &sh7785_io_resource,
+};
+
+static struct sh4_pci_address_map sh7780_pci_map = {
+	.window0	= {
+#if defined(CONFIG_32BIT)
+		.base	= SH7780_32BIT_DDR_BASE_ADDR,
+		.size	= 0x40000000,
+#else
+		.base	= SH7780_CS0_BASE_ADDR,
+		.size	= 0x20000000,
+#endif
+	},
+};
+
+static int __init sh7780_pci_init(void)
 {
+	struct pci_channel *chan = &sh7780_pci_controller;
 	unsigned int id;
 	const char *type = NULL;
 	int ret;
+	u32 word;
 
 	printk(KERN_NOTICE "PCI: Starting intialization.\n");
 
@@ -55,52 +91,6 @@ static int __init sh7780_pci_init(struct pci_channel *chan)
 		return ret;
 
 	/*
-	 * Platform specific initialization (BSC registers, and memory space
-	 * mapping) will be called via the platform defined function
-	 * pcibios_init_platform().
-	 */
-	return pcibios_init_platform();
-}
-
-extern u8 pci_cache_line_size;
-
-static struct resource sh7785_io_resource = {
-	.name	= "SH7785_IO",
-	.start	= SH7780_PCI_IO_BASE,
-	.end	= SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7785_mem_resource = {
-	.name	= "SH7785_mem",
-	.start	= SH7780_PCI_MEMORY_BASE,
-	.end	= SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
-static struct sh4_pci_address_map sh7780_pci_map = {
-	.window0	= {
-#if defined(CONFIG_32BIT)
-		.base	= SH7780_32BIT_DDR_BASE_ADDR,
-		.size	= 0x40000000,
-#else
-		.base	= SH7780_CS0_BASE_ADDR,
-		.size	= 0x20000000,
-#endif
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	struct pci_channel *chan = &board_pci_channels[0];
-	u32 word;
-
-	/*
 	 * Set the class and sub-class codes.
 	 */
 	__raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8,
@@ -153,5 +143,8 @@ int __init pcibios_init_platform(void)
 
 	__set_io_port_base(SH7780_PCI_IO_BASE);
 
+	register_pci_controller(chan);
+
 	return 0;
 }
+arch_initcall(sh7780_pci_init);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 82a9369..e057ebd 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -17,17 +17,22 @@
  * external) PCI controllers.
  */
 struct pci_channel {
-	int (*init)(struct pci_channel *chan);
-	struct pci_ops *pci_ops;
-	struct resource *io_resource;
-	struct resource *mem_resource;
-	int first_devfn;
-	int last_devfn;
-	int enabled;
-	unsigned long reg_base;
-	unsigned long io_base;
-
-	unsigned long io_map_base;
+	struct pci_channel	*next;
+
+	int			(*init)(struct pci_channel *chan);
+
+	struct pci_ops		*pci_ops;
+	struct resource		*io_resource;
+	struct resource		*mem_resource;
+
+	int			first_devfn;
+	int			last_devfn;
+	int			enabled;
+
+	unsigned long		reg_base;
+	unsigned long		io_base;
+
+	unsigned long		io_map_base;
 };
 
 /*
@@ -35,6 +40,8 @@ struct pci_channel {
  */
 extern struct pci_channel board_pci_channels[];
 
+extern void register_pci_controller(struct pci_channel *hose);
+
 extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM;
 
 struct pci_dev;
-- 
cgit v0.10.2


From 09cfeb133e3cac39b8b9a2cb1d8ab4f77e396248 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 18:42:00 +0900
Subject: sh: pci: Track io and mem_offset per-channel.

This implements a per-hose offset for I/O and mem resources.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index 78b7292..e8ac8da 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -105,7 +105,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev,
 	struct pci_bus *bus)
 {
 	/* Update device resources.  */
-	struct pci_channel *chan = bus->sysdata;
+	struct pci_channel *hose = bus->sysdata;
 	unsigned long offset = 0;
 	int i;
 
@@ -115,9 +115,9 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev,
 		if (dev->resource[i].flags & IORESOURCE_PCI_FIXED)
 			continue;
 		if (dev->resource[i].flags & IORESOURCE_IO)
-			offset = chan->io_base;
+			offset = hose->io_offset;
 		else if (dev->resource[i].flags & IORESOURCE_MEM)
-			offset = 0;
+			offset = hose->mem_offset;
 
 		dev->resource[i].start += offset;
 		dev->resource[i].end += offset;
@@ -150,13 +150,13 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
 			 struct resource *res)
 {
-	struct pci_channel *chan = dev->sysdata;
+	struct pci_channel *hose = dev->sysdata;
 	unsigned long offset = 0;
 
 	if (res->flags & IORESOURCE_IO)
-		offset = chan->io_base;
+		offset = hose->io_offset;
 	else if (res->flags & IORESOURCE_MEM)
-		offset = 0;
+		offset = hose->mem_offset;
 
 	region->start = res->start - offset;
 	region->end = res->end - offset;
@@ -166,13 +166,13 @@ void __devinit
 pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 			struct pci_bus_region *region)
 {
-	struct pci_channel *chan = dev->sysdata;
+	struct pci_channel *hose = dev->sysdata;
 	unsigned long offset = 0;
 
 	if (res->flags & IORESOURCE_IO)
-		offset = chan->io_base;
+		offset = hose->io_offset;
 	else if (res->flags & IORESOURCE_MEM)
-		offset = 0;
+		offset = hose->mem_offset;
 
 	res->start = region->start + offset;
 	res->end = region->end + offset;
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 4dd6e3b..57a3b87 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -34,7 +34,9 @@ static struct resource sh7785_mem_resource = {
 static struct pci_channel sh7780_pci_controller = {
 	.pci_ops	= &sh4_pci_ops,
 	.mem_resource	= &sh7785_mem_resource,
+	.mem_offset	= 0x00000000,
 	.io_resource	= &sh7785_io_resource,
+	.io_offset	= 0x00000000,
 };
 
 static struct sh4_pci_address_map sh7780_pci_map = {
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index e057ebd..0be2052 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -25,6 +25,9 @@ struct pci_channel {
 	struct resource		*io_resource;
 	struct resource		*mem_resource;
 
+	unsigned long		io_offset;
+	unsigned long		mem_offset;
+
 	int			first_devfn;
 	int			last_devfn;
 	int			enabled;
-- 
cgit v0.10.2


From 5160d3f782a5e0cdb3bdaa8a891a1fb9d9ab83ec Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 18:47:21 +0900
Subject: sh: pci: Consolidate bus<->resource mapping in pci-lib.

Now that the io and mem offsets are tracked accordingly, the pci-new
version of the bus<->resource mappers can be used generically. This
moves them in to pci-lib.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
index 654ffcc..9fd3af9 100644
--- a/arch/sh/drivers/pci/pci-lib.c
+++ b/arch/sh/drivers/pci/pci-lib.c
@@ -39,6 +39,37 @@ void pcibios_align_resource(void *data, struct resource *res,
 	res->start = start;
 }
 
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			 struct resource *res)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index e8ac8da..9d42614 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -147,37 +147,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			 struct resource *res)
-{
-	struct pci_channel *hose = dev->sysdata;
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO)
-		offset = hose->io_offset;
-	else if (res->flags & IORESOURCE_MEM)
-		offset = hose->mem_offset;
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-
-void __devinit
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-			struct pci_bus_region *region)
-{
-	struct pci_channel *hose = dev->sysdata;
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO)
-		offset = hose->io_offset;
-	else if (res->flags & IORESOURCE_MEM)
-		offset = hose->mem_offset;
-
-	res->start = region->start + offset;
-	res->end = region->end + offset;
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index d39f240..c15a6f0 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -72,20 +72,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 	pci_read_bridge_bases(bus);
 }
 
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			     struct resource *res)
-{
-	region->start = res->start;
-	region->end = res->end;
-}
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-			     struct pci_bus_region *region)
-{
-	res->start = region->start;
-	res->end = region->end;
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
-- 
cgit v0.10.2


From 3f8daeacd7ed7a502daf0998e2515cea4f467f21 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 18:53:41 +0900
Subject: sh: pci: Consolidate the remaining common bits.

This moves the remaining common bits in to pci-lib. Thereby reducing
pci.c/pci-new.c to simple bus fixups and controller registration.

As more platforms are moved over, the old code will disappear completely
and the pci-new bits will be rolled in to pci-lib, eventually replacing
pci.c completely.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
index 9fd3af9..ea83629 100644
--- a/arch/sh/drivers/pci/pci-lib.c
+++ b/arch/sh/drivers/pci/pci-lib.c
@@ -70,6 +70,70 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->end = region->end + offset;
 }
 
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
+			continue;
+
+		r = &dev->resource[idx];
+		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if ((idx == PCI_ROM_RESOURCE) &&
+				(!(r->flags & IORESOURCE_ROM_ENABLE)))
+			continue;
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available "
+			       "because of resource collisions\n",
+			       pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (cmd != old_cmd) {
+		printk("PCI: Enabling device %s (%04x -> %04x)\n",
+		       pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check and set
+ *  the latency timer as it may not be properly set.
+ */
+static unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
+	       pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index 9d42614..8c0b136 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -146,61 +146,3 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 			pcibios_fixup_device_resources(dev, bus);
 	}
 }
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for(idx=0; idx<6; idx++) {
-		if (!(mask & (1 << idx)))
-			continue;
-		r = &dev->resource[idx];
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because "
-			       "of resource collisions\n", pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (dev->resource[PCI_ROM_RESOURCE].start)
-		cmd |= PCI_COMMAND_MEMORY;
-	if (cmd != old_cmd) {
-		printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-/*
- *  If we set up a device for bus mastering, we need to check and set
- *  the latency timer as it may not be properly set.
- */
-static unsigned int pcibios_max_latency = 255;
-
-void pcibios_set_master(struct pci_dev *dev)
-{
-	u8 lat;
-	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
-	if (lat < 16)
-		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
-	else if (lat > pcibios_max_latency)
-		lat = pcibios_max_latency;
-	else
-		return;
-	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
-	       pci_name(dev), lat);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
-}
-
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c15a6f0..8c332b2 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -72,62 +72,4 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 	pci_read_bridge_bases(bus);
 }
 
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for(idx=0; idx<6; idx++) {
-		if (!(mask & (1 << idx)))
-			continue;
-		r = &dev->resource[idx];
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available because "
-			       "of resource collisions\n", pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (dev->resource[PCI_ROM_RESOURCE].start)
-		cmd |= PCI_COMMAND_MEMORY;
-	if (cmd != old_cmd) {
-		printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-/*
- *  If we set up a device for bus mastering, we need to check and set
- *  the latency timer as it may not be properly set.
- */
-static unsigned int pcibios_max_latency = 255;
-
-void pcibios_set_master(struct pci_dev *dev)
-{
-	u8 lat;
-	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
-	if (lat < 16)
-		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
-	else if (lat > pcibios_max_latency)
-		lat = pcibios_max_latency;
-	else
-		return;
-	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
-	       pci_name(dev), lat);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
-}
-
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 EXPORT_SYMBOL(board_pci_channels);
-- 
cgit v0.10.2


From 5ba7205fc49ff72e88784c94fb661f93e7ae7d36 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 19:00:32 +0900
Subject: sh: pci: Kill off the now unused hose->io_base.

Nothing is using this any more, so kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index af88744..4c08fd7 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -40,7 +40,6 @@ int __init sh7751_pci_init(struct pci_channel *chan)
 	pr_debug("PCI: Starting intialization.\n");
 
 	chan->reg_base = 0xfe200000;
-	chan->io_base = 0xfe240000;
 
 	/* check for SH7751/SH7751R hardware */
 	id = pci_read_reg(chan, SH7751_PCICONF0);
@@ -136,13 +135,6 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word);
 	pci_write_reg(chan, word , SH4_PCIMBR);
 
-	/* Map IO space into PCI IO window:
-	 * IO addresses will be translated to the PCI IO window base address
-	 */
-	pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n",
-		 chan->io_resource->start, chan->io_resource->end,
-		 chan->io_base + chan->io_resource->start);
-
 	/* Make sure the MSB's of IO window are set to access PCI space
 	 * correctly */
 	word = chan->io_resource->start & SH4_PCIIOBR_MASK;
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 57a3b87..ae13ff9 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -62,7 +62,6 @@ static int __init sh7780_pci_init(void)
 	printk(KERN_NOTICE "PCI: Starting intialization.\n");
 
 	chan->reg_base = 0xfe040000;
-	chan->io_base = 0xfe200000;
 
 	/* Enable CPU access to the PCIC registers. */
 	__raw_writel(PCIECR_ENBL, PCIECR);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 0be2052..f36c789 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -33,7 +33,6 @@ struct pci_channel {
 	int			enabled;
 
 	unsigned long		reg_base;
-	unsigned long		io_base;
 
 	unsigned long		io_map_base;
 };
-- 
cgit v0.10.2


From bb3396477bf46c7cae6bd04b969580277957966e Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 19:26:45 +0900
Subject: sh: pci: Kill off superfluous lboxre2 pci fixups.

This is a verbatim copy of the r2d one, use that instead.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index c8eab14..4cac866 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -22,5 +22,5 @@ obj-$(CONFIG_SH_SDK7780)		+= fixups-sdk7780.o
 obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= fixups-sdk7780.o
 obj-$(CONFIG_SH_TITAN)			+= ops-titan.o
 obj-$(CONFIG_SH_LANDISK)		+= ops-landisk.o
-obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-lboxre2.o
+obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-rts7751r2d.o
 obj-$(CONFIG_SH_CAYMAN)			+= ops-cayman.o
diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c
deleted file mode 100644
index a82011d..0000000
--- a/arch/sh/drivers/pci/fixups-lboxre2.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * arch/sh/drivers/pci/fixups-lboxre2.c
- *
- * L-BOX RE2 PCI fixups
- *
- * Copyright (C) 2007 Nobuhiro Iwamatsu
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-#define PCIMCR_MRSET_OFF	0xBFFFFFFF
-#define PCIMCR_RFSH_OFF		0xFFFFFFFB
-
-int pci_fixup_pcic(struct pci_channel *chan)
-{
-	unsigned long bcr1, mcr;
-
-	bcr1 = ctrl_inl(SH7751_BCR1);
-	bcr1 |= 0x40080000;	/* Enable Bit 19 BREQEN, set PCIC to slave */
-	pci_write_reg(chan, bcr1, SH4_PCIBCR1);
-
-	/* Enable all interrupts, so we known what to fix */
-	pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM);
-	pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM);
-	pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1);
-	pci_write_reg(chan, 0xab000001, SH7751_PCICONF4);
-
-	mcr = ctrl_inl(SH7751_MCR);
-	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-	pci_write_reg(chan, mcr, SH4_PCIMCR);
-
-	pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5);
-	pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6);
-	pci_write_reg(chan, 0x0c000000, SH4_PCILAR0);
-	pci_write_reg(chan, 0x00000000, SH4_PCILAR1);
-
-	return 0;
-}
-- 
cgit v0.10.2


From d556fcc101c4b0d57ac9742ab806a6bfed78eac1 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 19:31:20 +0900
Subject: sh: pci: Flag the dreamcast BBA as IORESOURCE_PCI_FIXED.

This isn't a real BAR, so prevent any attempts to move it, as we don't
wish to encourage a bus luck by overzealous PCI initialization code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c
index 2bf85cf..48c6381 100644
--- a/arch/sh/drivers/pci/fixups-dreamcast.c
+++ b/arch/sh/drivers/pci/fixups-dreamcast.c
@@ -41,6 +41,13 @@ static void __init gapspci_fixup_resources(struct pci_dev *dev)
 		 */
 		dev->resource[1].start	= p->io_resource->start  + 0x100;
 		dev->resource[1].end	= dev->resource[1].start + 0x200 - 1;
+
+		/*
+		 * This is not a normal BAR, prevent any attempts to move
+		 * the BAR, as this will result in a bus lock.
+		 */
+		dev->resource[1].flags |= IORESOURCE_PCI_FIXED;
+
 		/*
 		 * Redirect dma memory allocations to special memory window.
 		 */
-- 
cgit v0.10.2


From c563bf0965c86cc6087b09c34ca063fe96d6deca Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 19:39:57 +0900
Subject: sh: pci: Kill off dead references to is_pci_ioaddr and friends.

Some old boards are still using this in their I/O routines, kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-se/7751/io.c b/arch/sh/boards/mach-se/7751/io.c
index 6287ae5..6e75bd4 100644
--- a/arch/sh/boards/mach-se/7751/io.c
+++ b/arch/sh/boards/mach-se/7751/io.c
@@ -34,8 +34,6 @@ unsigned char sh7751se_inb(unsigned long port)
 {
 	if (PXSEG(port))
 		return *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		return (*port2adr(port)) & 0xff;
 }
@@ -46,8 +44,6 @@ unsigned char sh7751se_inb_p(unsigned long port)
 
         if (PXSEG(port))
                 v = *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-                v = *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		v = (*port2adr(port)) & 0xff;
 	ctrl_delay();
@@ -58,8 +54,6 @@ unsigned short sh7751se_inw(unsigned long port)
 {
         if (PXSEG(port))
                 return *(volatile unsigned short *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned short *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -71,8 +65,6 @@ unsigned int sh7751se_inl(unsigned long port)
 {
         if (PXSEG(port))
                 return *(volatile unsigned long *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned int *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -85,8 +77,6 @@ void sh7751se_outb(unsigned char value, unsigned long port)
 
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 }
@@ -95,8 +85,6 @@ void sh7751se_outb_p(unsigned char value, unsigned long port)
 {
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 	ctrl_delay();
@@ -106,8 +94,6 @@ void sh7751se_outw(unsigned short value, unsigned long port)
 {
         if (PXSEG(port))
                 *(volatile unsigned short *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned short *)pci_ioaddr(port)) = value;
 	else if (port >= 0x2000)
 		*port2adr(port) = value;
 	else
@@ -118,8 +104,6 @@ void sh7751se_outl(unsigned int value, unsigned long port)
 {
         if (PXSEG(port))
                 *(volatile unsigned long *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned long*)pci_ioaddr(port)) = value;
 	else
 		maybebadio(port);
 }
diff --git a/arch/sh/boards/mach-snapgear/io.c b/arch/sh/boards/mach-snapgear/io.c
index 0f48242..476650e 100644
--- a/arch/sh/boards/mach-snapgear/io.c
+++ b/arch/sh/boards/mach-snapgear/io.c
@@ -36,8 +36,6 @@ unsigned char snapgear_inb(unsigned long port)
 {
 	if (PXSEG(port))
 		return *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		return (*port2adr(port)) & 0xff;
 }
@@ -48,8 +46,6 @@ unsigned char snapgear_inb_p(unsigned long port)
 
 	if (PXSEG(port))
 		v = *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		v = *(volatile unsigned char *)pci_ioaddr(port);
 	else
 		v = (*port2adr(port))&0xff;
 	ctrl_delay();
@@ -60,8 +56,6 @@ unsigned short snapgear_inw(unsigned long port)
 {
 	if (PXSEG(port))
 		return *(volatile unsigned short *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned short *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -73,8 +67,6 @@ unsigned int snapgear_inl(unsigned long port)
 {
 	if (PXSEG(port))
 		return *(volatile unsigned long *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned int *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else
@@ -87,8 +79,6 @@ void snapgear_outb(unsigned char value, unsigned long port)
 
 	if (PXSEG(port))
 		*(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 }
@@ -97,8 +87,6 @@ void snapgear_outb_p(unsigned char value, unsigned long port)
 {
 	if (PXSEG(port))
 		*(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else
 		*(port2adr(port)) = value;
 	ctrl_delay();
@@ -108,8 +96,6 @@ void snapgear_outw(unsigned short value, unsigned long port)
 {
 	if (PXSEG(port))
 		*(volatile unsigned short *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned short *)pci_ioaddr(port)) = value;
 	else if (port >= 0x2000)
 		*port2adr(port) = value;
 	else
@@ -120,8 +106,6 @@ void snapgear_outl(unsigned int value, unsigned long port)
 {
 	if (PXSEG(port))
 		*(volatile unsigned long *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned long*)pci_ioaddr(port)) = value;
 	else
 		maybebadio(port);
 }
diff --git a/arch/sh/boards/mach-systemh/io.c b/arch/sh/boards/mach-systemh/io.c
index dec3db0..15577ff 100644
--- a/arch/sh/boards/mach-systemh/io.c
+++ b/arch/sh/boards/mach-systemh/io.c
@@ -35,8 +35,6 @@ unsigned char sh7751systemh_inb(unsigned long port)
 {
 	if (PXSEG(port))
 		return *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-		return *(volatile unsigned char *)pci_ioaddr(port);
 	else if (port <= 0x3F1)
 		return *(volatile unsigned char *)ETHER_IOMAP(port);
 	else
@@ -49,8 +47,6 @@ unsigned char sh7751systemh_inb_p(unsigned long port)
 
         if (PXSEG(port))
                 v = *(volatile unsigned char *)port;
-	else if (is_pci_ioaddr(port))
-                v = *(volatile unsigned char *)pci_ioaddr(port);
 	else if (port <= 0x3F1)
 		v = *(volatile unsigned char *)ETHER_IOMAP(port);
 	else
@@ -63,8 +59,6 @@ unsigned short sh7751systemh_inw(unsigned long port)
 {
         if (PXSEG(port))
                 return *(volatile unsigned short *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned short *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else if (port <= 0x3F1)
@@ -78,8 +72,6 @@ unsigned int sh7751systemh_inl(unsigned long port)
 {
         if (PXSEG(port))
                 return *(volatile unsigned long *)port;
-	else if (is_pci_ioaddr(port))
-                return *(volatile unsigned int *)pci_ioaddr(port);
 	else if (port >= 0x2000)
 		return *port2adr(port);
 	else if (port <= 0x3F1)
@@ -94,8 +86,6 @@ void sh7751systemh_outb(unsigned char value, unsigned long port)
 
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else if (port <= 0x3F1)
 		*(volatile unsigned char *)ETHER_IOMAP(port) = value;
 	else
@@ -106,8 +96,6 @@ void sh7751systemh_outb_p(unsigned char value, unsigned long port)
 {
         if (PXSEG(port))
                 *(volatile unsigned char *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned char*)pci_ioaddr(port)) = value;
 	else if (port <= 0x3F1)
 		*(volatile unsigned char *)ETHER_IOMAP(port) = value;
 	else
@@ -119,8 +107,6 @@ void sh7751systemh_outw(unsigned short value, unsigned long port)
 {
         if (PXSEG(port))
                 *(volatile unsigned short *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned short *)pci_ioaddr(port)) = value;
 	else if (port >= 0x2000)
 		*port2adr(port) = value;
 	else if (port <= 0x3F1)
@@ -133,8 +119,6 @@ void sh7751systemh_outl(unsigned int value, unsigned long port)
 {
         if (PXSEG(port))
                 *(volatile unsigned long *)port = value;
-	else if (is_pci_ioaddr(port))
-		*((unsigned long*)pci_ioaddr(port)) = value;
 	else
 		maybebadio(port);
 }
diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c
index 053b3ed..0130e98 100644
--- a/arch/sh/boards/mach-titan/io.c
+++ b/arch/sh/boards/mach-titan/io.c
@@ -17,8 +17,6 @@ u8 titan_inb(unsigned long port)
 {
         if (PXSEG(port))
                 return ctrl_inb(port);
-        else if (is_pci_ioaddr(port))
-                return ctrl_inb(pci_ioaddr(port));
         return ctrl_inw(port2adr(port)) & 0xff;
 }
 
@@ -28,8 +26,6 @@ u8 titan_inb_p(unsigned long port)
 
         if (PXSEG(port))
                 v = ctrl_inb(port);
-        else if (is_pci_ioaddr(port))
-                v = ctrl_inb(pci_ioaddr(port));
         else
                 v = ctrl_inw(port2adr(port)) & 0xff;
         ctrl_delay();
@@ -40,8 +36,6 @@ u16 titan_inw(unsigned long port)
 {
         if (PXSEG(port))
                 return ctrl_inw(port);
-        else if (is_pci_ioaddr(port))
-                return ctrl_inw(pci_ioaddr(port));
         else if (port >= 0x2000)
                 return ctrl_inw(port2adr(port));
         else
@@ -53,8 +47,6 @@ u32 titan_inl(unsigned long port)
 {
         if (PXSEG(port))
                 return ctrl_inl(port);
-        else if (is_pci_ioaddr(port))
-                return ctrl_inl(pci_ioaddr(port));
         else if (port >= 0x2000)
                 return ctrl_inw(port2adr(port));
         else
@@ -66,8 +58,6 @@ void titan_outb(u8 value, unsigned long port)
 {
         if (PXSEG(port))
                 ctrl_outb(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outb(value, pci_ioaddr(port));
         else
                 ctrl_outw(value, port2adr(port));
 }
@@ -76,8 +66,6 @@ void titan_outb_p(u8 value, unsigned long port)
 {
         if (PXSEG(port))
                 ctrl_outb(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outb(value, pci_ioaddr(port));
         else
                 ctrl_outw(value, port2adr(port));
         ctrl_delay();
@@ -87,8 +75,6 @@ void titan_outw(u16 value, unsigned long port)
 {
         if (PXSEG(port))
                 ctrl_outw(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outw(value, pci_ioaddr(port));
         else if (port >= 0x2000)
                 ctrl_outw(value, port2adr(port));
         else
@@ -99,8 +85,6 @@ void titan_outl(u32 value, unsigned long port)
 {
         if (PXSEG(port))
                 ctrl_outl(value, port);
-        else if (is_pci_ioaddr(port))
-                ctrl_outl(value, pci_ioaddr(port));
         else
                 maybebadio(port);
 }
@@ -119,8 +103,6 @@ void __iomem *titan_ioport_map(unsigned long port, unsigned int size)
 {
 	if (PXSEG(port))
 		return (void __iomem *)port;
-	else if (is_pci_ioaddr(port))
-		return (void __iomem *)pci_ioaddr(port);
 
 	return (void __iomem *)port2adr(port);
 }
-- 
cgit v0.10.2


From 0e75148108914062cb46ad3dc8f054a628018df7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 19:48:48 +0900
Subject: sh: pci: Consolidate pcibios_setup() in pci-lib.

This wasn't really being used for anything useful, so just stub it in
pci-lib.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c
index 5bc81d5..529aa4f 100644
--- a/arch/sh/drivers/pci/ops-dreamcast.c
+++ b/arch/sh/drivers/pci/ops-dreamcast.c
@@ -154,12 +154,6 @@ static int __init gapspci_init(struct pci_channel *chan)
 	return 0;
 }
 
-/* Haven't done anything here as yet */
-char * __devinit pcibios_setup(char *str)
-{
-	return str;
-}
-
 struct pci_channel board_pci_channels[] = {
 	{ gapspci_init, &gapspci_pci_ops, &gapspci_io_resource,
 	  &gapspci_mem_resource, 0, 1 },
diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index 2a7f7b5..7cc1fcc 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -106,30 +106,27 @@ struct pci_ops sh4_pci_ops = {
  * Not really related to pci_ops, but it's common and not worth shoving
  * somewhere else for now..
  */
-static unsigned int pci_probe = PCI_PROBE_CONF1;
-
 int __init sh4_pci_check_direct(struct pci_channel *chan)
 {
 	/*
 	 * Check if configuration works.
 	 */
-	if (pci_probe & PCI_PROBE_CONF1) {
-		unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR);
-
-		pci_write_reg(chan, P1SEG, SH4_PCIPAR);
+	unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR);
 
-		if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) {
-			pci_write_reg(chan, tmp, SH4_PCIPAR);
-			printk(KERN_INFO "PCI: Using configuration type 1\n");
-			request_region(chan->reg_base + SH4_PCIPAR, 8,
-				       "PCI conf1");
-			return 0;
-		}
+	pci_write_reg(chan, P1SEG, SH4_PCIPAR);
 
+	if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) {
 		pci_write_reg(chan, tmp, SH4_PCIPAR);
+		printk(KERN_INFO "PCI: Using configuration type 1\n");
+		request_region(chan->reg_base + SH4_PCIPAR, 8,
+			       "PCI conf1");
+		return 0;
 	}
 
-	pr_debug("PCI: pci_check_direct failed\n");
+	pci_write_reg(chan, tmp, SH4_PCIPAR);
+
+	printk(KERN_ERR "PCI: %s failed\n", __func__);
+
 	return -EINVAL;
 }
 
@@ -155,16 +152,6 @@ static void __init pci_fixup_ide_bases(struct pci_dev *d)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
 
-char * __devinit pcibios_setup(char *str)
-{
-	if (!strcmp(str, "off")) {
-		pci_probe = 0;
-		return NULL;
-	}
-
-	return str;
-}
-
 int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan)
 {
 	/* Nothing to do. */
diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c
index 729e38a..b10e2d5 100644
--- a/arch/sh/drivers/pci/ops-sh5.c
+++ b/arch/sh/drivers/pci/ops-sh5.c
@@ -42,11 +42,6 @@ static void __init pci_fixup_ide_bases(struct pci_dev *d)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
 
-char * __devinit pcibios_setup(char *str)
-{
-	return str;
-}
-
 static int sh5pci_read(struct pci_bus *bus, unsigned int devfn, int where,
 			int size, u32 *val)
 {
diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
index ea83629..f072aca 100644
--- a/arch/sh/drivers/pci/pci-lib.c
+++ b/arch/sh/drivers/pci/pci-lib.c
@@ -134,6 +134,11 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
+char * __devinit pcibios_setup(char *str)
+{
+	return str;
+}
+
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
-- 
cgit v0.10.2


From 0db38cea69fc478a5c25b3c915ec680cc5538783 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 19:54:47 +0900
Subject: sh: pci: Kill off legacy ide quirks.

These fixups seem to have bitrotted a bit since their introduction in the
2.4 days. As we never had much use for them in the first place, and
nothing is using them any more, kill them off the rest of the way.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c
index 7cc1fcc..78bebeb 100644
--- a/arch/sh/drivers/pci/ops-sh4.c
+++ b/arch/sh/drivers/pci/ops-sh4.c
@@ -130,28 +130,6 @@ int __init sh4_pci_check_direct(struct pci_channel *chan)
 	return -EINVAL;
 }
 
-/* Handle generic fixups */
-static void __init pci_fixup_ide_bases(struct pci_dev *d)
-{
-	int i;
-
-	/*
-	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
-	 */
-	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
-		return;
-	pr_debug("PCI: IDE base address fixup for %s\n", pci_name(d));
-	for(i = 0; i < 4; i++) {
-		struct resource *r = &d->resource[i];
-
-		if ((r->start & ~0x80) == 0x374) {
-			r->start |= 2;
-			r->end = r->start;
-		}
-	}
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
-
 int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan)
 {
 	/* Nothing to do. */
diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c
index b10e2d5..4ce95a0 100644
--- a/arch/sh/drivers/pci/ops-sh5.c
+++ b/arch/sh/drivers/pci/ops-sh5.c
@@ -22,26 +22,6 @@
 #include <asm/io.h>
 #include "pci-sh5.h"
 
-static void __init pci_fixup_ide_bases(struct pci_dev *d)
-{
-	int i;
-
-	/*
-	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
-	 */
-	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
-		return;
-	printk("PCI: IDE base address fixup for %s\n", pci_name(d));
-	for(i=0; i<4; i++) {
-		struct resource *r = &d->resource[i];
-		if ((r->start & ~0x80) == 0x374) {
-			r->start |= 2;
-			r->end = r->start;
-		}
-	}
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
-
 static int sh5pci_read(struct pci_bus *bus, unsigned int devfn, int where,
 			int size, u32 *val)
 {
-- 
cgit v0.10.2


From 3444f5ec49bc6cb901ffea38e085db1d76e1189c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 20:22:05 +0900
Subject: sh: pci: Tidy up the dreamcast PCI support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 4cac866..2160a06 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -12,7 +12,8 @@ obj-$(CONFIG_CPU_SUBTYPE_SH7780)	+= pci-sh7780.o ops-sh4.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7785)	+= pci-sh7780.o ops-sh4.o
 obj-$(CONFIG_CPU_SH5)			+= pci-sh5.o ops-sh5.o
 
-obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o
+obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o \
+					   pci-dreamcast.o
 obj-$(CONFIG_SH_SECUREEDGE5410)		+= ops-snapgear.o
 obj-$(CONFIG_SH_RTS7751R2D)		+= ops-rts7751r2d.o fixups-rts7751r2d.o
 obj-$(CONFIG_SH_SH03)			+= ops-sh03.o fixups-sh03.o
diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c
index 529aa4f..e83d0d3 100644
--- a/arch/sh/drivers/pci/ops-dreamcast.c
+++ b/arch/sh/drivers/pci/ops-dreamcast.c
@@ -1,15 +1,9 @@
 /*
- * arch/sh/drivers/pci/ops-dreamcast.c
- *
  * PCI operations for the Sega Dreamcast
  *
  * Copyright (C) 2001, 2002  M. R. Brown
  * Copyright (C) 2002, 2003  Paul Mundt
  *
- * This file originally bore the message (with enclosed-$):
- *	Id: pci.c,v 1.3 2003/05/04 19:29:46 lethal Exp
- *	Dreamcast PCI: Supports SEGA Broadband Adaptor only.
- *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -23,25 +17,10 @@
 #include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
+#include <linux/io.h>
+#include <linux/irq.h>
 #include <mach/pci.h>
 
-static struct resource gapspci_io_resource = {
-	.name	= "GAPSPCI IO",
-	.start	= GAPSPCI_BBA_CONFIG,
-	.end	= GAPSPCI_BBA_CONFIG + GAPSPCI_BBA_CONFIG_SIZE - 1,
-	.flags	= IORESOURCE_IO,
-};
-
-static struct resource gapspci_mem_resource = {
-	.name	= "GAPSPCI mem",
-	.start	= GAPSPCI_DMA_BASE,
-	.end	= GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1,
-	.flags	= IORESOURCE_MEM,
-};
-
 /*
  * The !gapspci_config_access case really shouldn't happen, ever, unless
  * someone implicitly messes around with the last devfn value.. otherwise we
@@ -76,10 +55,10 @@ static int gapspci_read(struct pci_bus *bus, unsigned int devfn, int where, int
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	switch (size) {
-		case 1: *val = inb(GAPSPCI_BBA_CONFIG+where); break;
-		case 2: *val = inw(GAPSPCI_BBA_CONFIG+where); break;
-		case 4: *val = inl(GAPSPCI_BBA_CONFIG+where); break;
-	}	
+	case 1: *val = inb(GAPSPCI_BBA_CONFIG+where); break;
+	case 2: *val = inw(GAPSPCI_BBA_CONFIG+where); break;
+	case 4: *val = inl(GAPSPCI_BBA_CONFIG+where); break;
+	}
 
         return PCIBIOS_SUCCESSFUL;
 }
@@ -90,72 +69,15 @@ static int gapspci_write(struct pci_bus *bus, unsigned int devfn, int where, int
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	switch (size) {
-		case 1: outb(( u8)val, GAPSPCI_BBA_CONFIG+where); break;
-		case 2: outw((u16)val, GAPSPCI_BBA_CONFIG+where); break;
-		case 4: outl((u32)val, GAPSPCI_BBA_CONFIG+where); break;
+	case 1: outb(( u8)val, GAPSPCI_BBA_CONFIG+where); break;
+	case 2: outw((u16)val, GAPSPCI_BBA_CONFIG+where); break;
+	case 4: outl((u32)val, GAPSPCI_BBA_CONFIG+where); break;
 	}
 
         return PCIBIOS_SUCCESSFUL;
 }
 
-static struct pci_ops gapspci_pci_ops = {
+struct pci_ops gapspci_pci_ops = {
 	.read	= gapspci_read,
 	.write	= gapspci_write,
 };
-
-/*
- * gapspci init
- */
-
-static int __init gapspci_init(struct pci_channel *chan)
-{
-	char idbuf[16];
-	int i;
-
-	/*
-	 * FIXME: All of this wants documenting to some degree,
-	 * even some basic register definitions would be nice.
-	 *
-	 * I haven't seen anything this ugly since.. maple.
-	 */
-
-	for (i=0; i<16; i++)
-		idbuf[i] = inb(GAPSPCI_REGS+i);
-
-	if (strncmp(idbuf, "GAPSPCI_BRIDGE_2", 16))
-		return -ENODEV;
-
-	outl(0x5a14a501, GAPSPCI_REGS+0x18);
-
-	for (i=0; i<1000000; i++)
-		;
-
-	if (inl(GAPSPCI_REGS+0x18) != 1)
-		return -EINVAL;
-
-	outl(0x01000000, GAPSPCI_REGS+0x20);
-	outl(0x01000000, GAPSPCI_REGS+0x24);
-
-	outl(GAPSPCI_DMA_BASE, GAPSPCI_REGS+0x28);
-	outl(GAPSPCI_DMA_BASE+GAPSPCI_DMA_SIZE, GAPSPCI_REGS+0x2c);
-
-	outl(1, GAPSPCI_REGS+0x14);
-	outl(1, GAPSPCI_REGS+0x34);
-
-	/* Setting Broadband Adapter */
-	outw(0xf900, GAPSPCI_BBA_CONFIG+0x06);
-	outl(0x00000000, GAPSPCI_BBA_CONFIG+0x30);
-	outb(0x00, GAPSPCI_BBA_CONFIG+0x3c);
-	outb(0xf0, GAPSPCI_BBA_CONFIG+0x0d);
-	outw(0x0006, GAPSPCI_BBA_CONFIG+0x04);
-	outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10);
-	outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14);
-
-	return 0;
-}
-
-struct pci_channel board_pci_channels[] = {
-	{ gapspci_init, &gapspci_pci_ops, &gapspci_io_resource,
-	  &gapspci_mem_resource, 0, 1 },
-	{ 0, }
-};
diff --git a/arch/sh/drivers/pci/pci-dreamcast.c b/arch/sh/drivers/pci/pci-dreamcast.c
new file mode 100644
index 0000000..0897be5
--- /dev/null
+++ b/arch/sh/drivers/pci/pci-dreamcast.c
@@ -0,0 +1,105 @@
+/*
+ * PCI support for the Sega Dreamcast
+ *
+ * Copyright (C) 2001, 2002  M. R. Brown
+ * Copyright (C) 2002, 2003  Paul Mundt
+ *
+ * This file originally bore the message (with enclosed-$):
+ *	Id: pci.c,v 1.3 2003/05/04 19:29:46 lethal Exp
+ *	Dreamcast PCI: Supports SEGA Broadband Adaptor only.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <mach/pci.h>
+
+static struct resource gapspci_io_resource = {
+	.name	= "GAPSPCI IO",
+	.start	= GAPSPCI_BBA_CONFIG,
+	.end	= GAPSPCI_BBA_CONFIG + GAPSPCI_BBA_CONFIG_SIZE - 1,
+	.flags	= IORESOURCE_IO,
+};
+
+static struct resource gapspci_mem_resource = {
+	.name	= "GAPSPCI mem",
+	.start	= GAPSPCI_DMA_BASE,
+	.end	= GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/*
+ * gapspci init
+ */
+
+static int __init gapspci_init(struct pci_channel *chan)
+{
+	char idbuf[16];
+	int i;
+
+	/*
+	 * FIXME: All of this wants documenting to some degree,
+	 * even some basic register definitions would be nice.
+	 *
+	 * I haven't seen anything this ugly since.. maple.
+	 */
+
+	for (i=0; i<16; i++)
+		idbuf[i] = inb(GAPSPCI_REGS+i);
+
+	if (strncmp(idbuf, "GAPSPCI_BRIDGE_2", 16))
+		return -ENODEV;
+
+	outl(0x5a14a501, GAPSPCI_REGS+0x18);
+
+	for (i=0; i<1000000; i++)
+		cpu_relax();
+
+	if (inl(GAPSPCI_REGS+0x18) != 1)
+		return -EINVAL;
+
+	outl(0x01000000, GAPSPCI_REGS+0x20);
+	outl(0x01000000, GAPSPCI_REGS+0x24);
+
+	outl(GAPSPCI_DMA_BASE, GAPSPCI_REGS+0x28);
+	outl(GAPSPCI_DMA_BASE+GAPSPCI_DMA_SIZE, GAPSPCI_REGS+0x2c);
+
+	outl(1, GAPSPCI_REGS+0x14);
+	outl(1, GAPSPCI_REGS+0x34);
+
+	/* Setting Broadband Adapter */
+	outw(0xf900, GAPSPCI_BBA_CONFIG+0x06);
+	outl(0x00000000, GAPSPCI_BBA_CONFIG+0x30);
+	outb(0x00, GAPSPCI_BBA_CONFIG+0x3c);
+	outb(0xf0, GAPSPCI_BBA_CONFIG+0x0d);
+	outw(0x0006, GAPSPCI_BBA_CONFIG+0x04);
+	outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10);
+	outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14);
+
+	return 0;
+}
+
+struct pci_channel board_pci_channels[] = {
+	{
+		.init		= gapspci_init,
+		.pci_ops	= &gapspci_pci_ops,
+		.io_resource	= &gapspci_io_resource,
+		.mem_resource	= &gapspci_mem_resource,
+		.first_devfn	= 0,
+		.last_devfn	= 1,
+	}, {
+		.init		= NULL,
+	}
+};
diff --git a/arch/sh/include/mach-dreamcast/mach/pci.h b/arch/sh/include/mach-dreamcast/mach/pci.h
index 75fc900..0314d97 100644
--- a/arch/sh/include/mach-dreamcast/mach/pci.h
+++ b/arch/sh/include/mach-dreamcast/mach/pci.h
@@ -21,5 +21,7 @@
 
 #define	GAPSPCI_IRQ		HW_EVENT_EXTERNAL
 
+extern struct pci_ops gapspci_pci_ops;
+
 #endif /* __ASM_SH_DREAMCAST_PCI_H */
 
-- 
cgit v0.10.2


From 48e4237d96fdcb1237b63bcddb37771f97452eec Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 20:40:48 +0900
Subject: sh: pci: Convert the SH-5 code over to the new interface.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index efe8cf9..f9fb1d1 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -23,7 +23,7 @@ config PCI_NEW
 	bool
 	depends on PCI
 	default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
-		     CPU_SUBTYPE_SH7785
+		     CPU_SUBTYPE_SH7785 || CPU_SH5
 
 # This is also board-specific
 config PCI_AUTO
diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c
index cbaaf02..b68b61d 100644
--- a/arch/sh/drivers/pci/ops-cayman.c
+++ b/arch/sh/drivers/pci/ops-cayman.c
@@ -75,14 +75,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin)
 
 	return result;
 }
-
-struct pci_channel board_pci_channels[] = {
-	{ sh5_pci_init, &sh5_pci_ops, NULL, NULL, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh5pci_init(__pa(memory_start),
-			   __pa(memory_end) - __pa(memory_start));
-}
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index 7750da2..cf43185 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -27,12 +27,6 @@
 unsigned long pcicr_virt;
 unsigned long PCI_IO_AREA;
 
-int __init sh5_pci_init(struct pci_channel *chan)
-{
-	pr_debug("PCI: Starting intialization.\n");
-	return pcibios_init_platform();
-}
-
 /* Rounds a number UP to the nearest power of two. Used for
  * sizing the PCI window.
  */
@@ -95,8 +89,21 @@ static irqreturn_t pcish5_serr_irq(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
-int __init sh5pci_init(unsigned long memStart, unsigned long memSize)
+static struct resource sh5_io_resource = { /* place holder */ };
+static struct resource sh5_mem_resource = { /* place holder */ };
+
+static struct pci_channel sh5pci_controller = {
+	.pci_ops		= &sh5_pci_ops,
+	.mem_resource		= &sh5_mem_resource,
+	.mem_offset		= 0x00000000,
+	.io_resource		= &sh5_io_resource,
+	.io_offset		= 0x00000000,
+};
+
+static int __init sh5pci_init(void)
 {
+	unsigned long memStart = __pa(memory_start);
+	unsigned long memSize = __pa(memory_end) - memStart;
 	u32 lsr0;
 	u32 uval;
 
@@ -203,35 +210,14 @@ int __init sh5pci_init(unsigned long memStart, unsigned long memSize)
         SH5PCI_WRITE(AINTM, ~0);
         SH5PCI_WRITE(PINTM, ~0);
 
-	return 0;
-}
+	sh5_io_resource.start = PCI_IO_AREA;
+	sh5_io_resource.end = PCI_IO_AREA + 0x10000;
 
-#define xPCIBIOS_MIN_IO		board_pci_channels->io_resource->start
-#define xPCIBIOS_MIN_MEM	board_pci_channels->mem_resource->start
+	sh5_mem_resource.start = memStart;
+	sh5_mem_resource.end = memStart + memSize;
 
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
-{
-	struct pci_dev *dev = bus->self;
-	int i;
-
-	if (dev) {
-		for (i= 0; i < 3; i++) {
-			bus->resource[i] =
-				&dev->resource[PCI_BRIDGE_RESOURCES+i];
-			bus->resource[i]->name = bus->name;
-		}
-		bus->resource[0]->flags |= IORESOURCE_IO;
-		bus->resource[1]->flags |= IORESOURCE_MEM;
-
-		/* For now, propagate host limits to the bus;
-		 * we'll adjust them later. */
-		bus->resource[0]->end = 64*1024 - 1 ;
-		bus->resource[1]->end = xPCIBIOS_MIN_MEM+(256*1024*1024)-1;
-		bus->resource[0]->start = xPCIBIOS_MIN_IO;
-		bus->resource[1]->start = xPCIBIOS_MIN_MEM;
-
-		/* Turn off downstream PF memory address range by default */
-		bus->resource[2]->start = 1024*1024;
-		bus->resource[2]->end = bus->resource[2]->start - 1;
-	}
+	register_pci_controller(&sh5pci_controller);
+
+	return 0;
 }
+arch_initcall(sh5pci_init);
diff --git a/arch/sh/drivers/pci/pci-sh5.h b/arch/sh/drivers/pci/pci-sh5.h
index af09f38..f277628 100644
--- a/arch/sh/drivers/pci/pci-sh5.h
+++ b/arch/sh/drivers/pci/pci-sh5.h
@@ -107,8 +107,4 @@ extern unsigned long pcicr_virt;
 
 extern struct pci_ops sh5_pci_ops;
 
-/* arch/sh/drivers/pci/pci-sh5.c */
-int sh5_pci_init(struct pci_channel *chan);
-int sh5pci_init(unsigned long memStart, unsigned long memSize);
-
 #endif /* __PCI_SH5_H */
-- 
cgit v0.10.2


From a5b08047129f214af1899bd9088605c7adc21ed5 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 20:41:45 +0900
Subject: sh: pci: Rename ops-cayman -> fixups-cayman.

Now that ops-cayman.c only contains IRQ routing fixups, rename it.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index 2160a06..cb2190c 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -24,4 +24,4 @@ obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= fixups-sdk7780.o
 obj-$(CONFIG_SH_TITAN)			+= ops-titan.o
 obj-$(CONFIG_SH_LANDISK)		+= ops-landisk.o
 obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-rts7751r2d.o
-obj-$(CONFIG_SH_CAYMAN)			+= ops-cayman.o
+obj-$(CONFIG_SH_CAYMAN)			+= fixups-cayman.o
diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c
new file mode 100644
index 0000000..b68b61d
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-cayman.c
@@ -0,0 +1,77 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <cpu/irq.h>
+#include "pci-sh5.h"
+
+int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	int result = -1;
+
+	/* The complication here is that the PCI IRQ lines from the Cayman's 2
+	   5V slots get into the CPU via a different path from the IRQ lines
+	   from the 3 3.3V slots.  Thus, we have to detect whether the card's
+	   interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling'
+	   at the point where we cross from 5V to 3.3V is not the normal case.
+
+	   The added complication is that we don't know that the 5V slots are
+	   always bus 2, because a card containing a PCI-PCI bridge may be
+	   plugged into a 3.3V slot, and this changes the bus numbering.
+
+	   Also, the Cayman has an intermediate PCI bus that goes a custom
+	   expansion board header (and to the secondary bridge).  This bus has
+	   never been used in practice.
+
+	   The 1ary onboard PCI-PCI bridge is device 3 on bus 0
+	   The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of
+	   the 1ary bridge.
+	   */
+
+	struct slot_pin {
+		int slot;
+		int pin;
+	} path[4];
+	int i=0;
+
+	while (dev->bus->number > 0) {
+
+		slot = path[i].slot = PCI_SLOT(dev->devfn);
+		pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin);
+		dev = dev->bus->self;
+		i++;
+		if (i > 3) panic("PCI path to root bus too long!\n");
+	}
+
+	slot = PCI_SLOT(dev->devfn);
+	/* This is the slot on bus 0 through which the device is eventually
+	   reachable. */
+
+	/* Now work back up. */
+	if ((slot < 3) || (i == 0)) {
+		/* Bus 0 (incl. PCI-PCI bridge itself) : perform the final
+		   swizzle now. */
+		result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1;
+	} else {
+		i--;
+		slot = path[i].slot;
+		pin  = path[i].pin;
+		if (slot > 0) {
+			panic("PCI expansion bus device found - not handled!\n");
+		} else {
+			if (i > 0) {
+				/* 5V slots */
+				i--;
+				slot = path[i].slot;
+				pin  = path[i].pin;
+				/* 'pin' was swizzled earlier wrt slot, don't do it again. */
+				result = IRQ_P2INTA + (pin - 1);
+			} else {
+				/* IRQ for 2ary PCI-PCI bridge : unused */
+				result = -1;
+			}
+		}
+	}
+
+	return result;
+}
diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c
deleted file mode 100644
index b68b61d..0000000
--- a/arch/sh/drivers/pci/ops-cayman.c
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/types.h>
-#include <cpu/irq.h>
-#include "pci-sh5.h"
-
-int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin)
-{
-	int result = -1;
-
-	/* The complication here is that the PCI IRQ lines from the Cayman's 2
-	   5V slots get into the CPU via a different path from the IRQ lines
-	   from the 3 3.3V slots.  Thus, we have to detect whether the card's
-	   interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling'
-	   at the point where we cross from 5V to 3.3V is not the normal case.
-
-	   The added complication is that we don't know that the 5V slots are
-	   always bus 2, because a card containing a PCI-PCI bridge may be
-	   plugged into a 3.3V slot, and this changes the bus numbering.
-
-	   Also, the Cayman has an intermediate PCI bus that goes a custom
-	   expansion board header (and to the secondary bridge).  This bus has
-	   never been used in practice.
-
-	   The 1ary onboard PCI-PCI bridge is device 3 on bus 0
-	   The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of
-	   the 1ary bridge.
-	   */
-
-	struct slot_pin {
-		int slot;
-		int pin;
-	} path[4];
-	int i=0;
-
-	while (dev->bus->number > 0) {
-
-		slot = path[i].slot = PCI_SLOT(dev->devfn);
-		pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin);
-		dev = dev->bus->self;
-		i++;
-		if (i > 3) panic("PCI path to root bus too long!\n");
-	}
-
-	slot = PCI_SLOT(dev->devfn);
-	/* This is the slot on bus 0 through which the device is eventually
-	   reachable. */
-
-	/* Now work back up. */
-	if ((slot < 3) || (i == 0)) {
-		/* Bus 0 (incl. PCI-PCI bridge itself) : perform the final
-		   swizzle now. */
-		result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1;
-	} else {
-		i--;
-		slot = path[i].slot;
-		pin  = path[i].pin;
-		if (slot > 0) {
-			panic("PCI expansion bus device found - not handled!\n");
-		} else {
-			if (i > 0) {
-				/* 5V slots */
-				i--;
-				slot = path[i].slot;
-				pin  = path[i].pin;
-				/* 'pin' was swizzled earlier wrt slot, don't do it again. */
-				result = IRQ_P2INTA + (pin - 1);
-			} else {
-				/* IRQ for 2ary PCI-PCI bridge : unused */
-				result = -1;
-			}
-		}
-	}
-
-	return result;
-}
-- 
cgit v0.10.2


From 7154b3e80203ee91f9ba7d0a43d3daa05c49d9e9 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Mon, 20 Apr 2009 19:21:35 +0900
Subject: ASoC: TWL4030: Add support Voice DAI

Add Voice DAI to support the PCM voice interface of the twl4030 codec.

The PCM voice interface can be used with 8-kHz(voice narrowband) or
16-kHz(voice wideband) sampling rates, and 16bits, and mono RX and mono
TX or stereo TX.

The PCM voice interface has two modes
 - PCM mode1 : This uses the normal FS polarity and the rising edge of
               the clock signal.
 - PCM mode2 : This uses the FS polarity inverted and the falling edge
               of the clock signal.

If the system master clock is not 26MHz or the twl4030 codec mode is not
option2, the voice PCM interface is not available.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index a1b76d7..cc2968c 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1484,6 +1484,144 @@ static int twl4030_set_dai_fmt(struct snd_soc_dai *codec_dai,
 	return 0;
 }
 
+static int twl4030_voice_startup(struct snd_pcm_substream *substream,
+		struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u8 infreq;
+	u8 mode;
+
+	/* If the system master clock is not 26MHz, the voice PCM interface is
+	 * not avilable.
+	 */
+	infreq = twl4030_read_reg_cache(codec, TWL4030_REG_APLL_CTL)
+		& TWL4030_APLL_INFREQ;
+
+	if (infreq != TWL4030_APLL_INFREQ_26000KHZ) {
+		printk(KERN_ERR "TWL4030 voice startup: "
+			"MCLK is not 26MHz, call set_sysclk() on init\n");
+		return -EINVAL;
+	}
+
+	/* If the codec mode is not option2, the voice PCM interface is not
+	 * avilable.
+	 */
+	mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE)
+		& TWL4030_OPT_MODE;
+
+	if (mode != TWL4030_OPTION_2) {
+		printk(KERN_ERR "TWL4030 voice startup: "
+			"the codec mode is not option2\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int twl4030_voice_hw_params(struct snd_pcm_substream *substream,
+		struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u8 old_mode, mode;
+
+	/* bit rate */
+	old_mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE)
+		& ~(TWL4030_CODECPDZ);
+	mode = old_mode;
+
+	switch (params_rate(params)) {
+	case 8000:
+		mode &= ~(TWL4030_SEL_16K);
+		break;
+	case 16000:
+		mode |= TWL4030_SEL_16K;
+		break;
+	default:
+		printk(KERN_ERR "TWL4030 voice hw params: unknown rate %d\n",
+			params_rate(params));
+		return -EINVAL;
+	}
+
+	if (mode != old_mode) {
+		/* change rate and set CODECPDZ */
+		twl4030_codec_enable(codec, 0);
+		twl4030_write(codec, TWL4030_REG_CODEC_MODE, mode);
+		twl4030_codec_enable(codec, 1);
+	}
+
+	return 0;
+}
+
+static int twl4030_voice_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u8 infreq;
+
+	switch (freq) {
+	case 26000000:
+		infreq = TWL4030_APLL_INFREQ_26000KHZ;
+		break;
+	default:
+		printk(KERN_ERR "TWL4030 voice set sysclk: unknown rate %d\n",
+			freq);
+		return -EINVAL;
+	}
+
+	infreq |= TWL4030_APLL_EN;
+	twl4030_write(codec, TWL4030_REG_APLL_CTL, infreq);
+
+	return 0;
+}
+
+static int twl4030_voice_set_dai_fmt(struct snd_soc_dai *codec_dai,
+		unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u8 old_format, format;
+
+	/* get format */
+	old_format = twl4030_read_reg_cache(codec, TWL4030_REG_VOICE_IF);
+	format = old_format;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFM:
+		format &= ~(TWL4030_VIF_SLAVE_EN);
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		format |= TWL4030_VIF_SLAVE_EN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_IB_NF:
+		format &= ~(TWL4030_VIF_FORMAT);
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		format |= TWL4030_VIF_FORMAT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (format != old_format) {
+		/* change format and set CODECPDZ */
+		twl4030_codec_enable(codec, 0);
+		twl4030_write(codec, TWL4030_REG_VOICE_IF, format);
+		twl4030_codec_enable(codec, 1);
+	}
+
+	return 0;
+}
+
 #define TWL4030_RATES	 (SNDRV_PCM_RATE_8000_48000)
 #define TWL4030_FORMATS	 (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FORMAT_S24_LE)
 
@@ -1495,7 +1633,15 @@ static struct snd_soc_dai_ops twl4030_dai_ops = {
 	.set_fmt	= twl4030_set_dai_fmt,
 };
 
-struct snd_soc_dai twl4030_dai = {
+static struct snd_soc_dai_ops twl4030_dai_voice_ops = {
+	.startup	= twl4030_voice_startup,
+	.hw_params	= twl4030_voice_hw_params,
+	.set_sysclk	= twl4030_voice_set_dai_sysclk,
+	.set_fmt	= twl4030_voice_set_dai_fmt,
+};
+
+struct snd_soc_dai twl4030_dai[] = {
+{
 	.name = "twl4030",
 	.playback = {
 		.stream_name = "Playback",
@@ -1510,6 +1656,23 @@ struct snd_soc_dai twl4030_dai = {
 		.rates = TWL4030_RATES,
 		.formats = TWL4030_FORMATS,},
 	.ops = &twl4030_dai_ops,
+},
+{
+	.name = "twl4030 Voice",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 1,
+		.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000,
+		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+	.ops = &twl4030_dai_voice_ops,
+},
 };
 EXPORT_SYMBOL_GPL(twl4030_dai);
 
@@ -1550,8 +1713,8 @@ static int twl4030_init(struct snd_soc_device *socdev)
 	codec->read = twl4030_read_reg_cache;
 	codec->write = twl4030_write;
 	codec->set_bias_level = twl4030_set_bias_level;
-	codec->dai = &twl4030_dai;
-	codec->num_dai = 1;
+	codec->dai = twl4030_dai;
+	codec->num_dai = ARRAY_SIZE(twl4030_dai),
 	codec->reg_cache_size = sizeof(twl4030_reg);
 	codec->reg_cache = kmemdup(twl4030_reg, sizeof(twl4030_reg),
 					GFP_KERNEL);
@@ -1645,13 +1808,13 @@ EXPORT_SYMBOL_GPL(soc_codec_dev_twl4030);
 
 static int __init twl4030_modinit(void)
 {
-	return snd_soc_register_dai(&twl4030_dai);
+	return snd_soc_register_dais(&twl4030_dai[0], ARRAY_SIZE(twl4030_dai));
 }
 module_init(twl4030_modinit);
 
 static void __exit twl4030_exit(void)
 {
-	snd_soc_unregister_dai(&twl4030_dai);
+	snd_soc_unregister_dais(&twl4030_dai[0], ARRAY_SIZE(twl4030_dai));
 }
 module_exit(twl4030_exit);
 
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index cb63765..981ec60 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -113,6 +113,8 @@
 #define TWL4030_SEL_16K			0x04
 #define TWL4030_CODECPDZ		0x02
 #define TWL4030_OPT_MODE		0x01
+#define TWL4030_OPTION_1		(1 << 0)
+#define TWL4030_OPTION_2		(0 << 0)
 
 /* TWL4030_REG_MICBIAS_CTL (0x04) Fields */
 
@@ -171,6 +173,17 @@
 #define TWL4030_CLK256FS_EN		0x02
 #define TWL4030_AIF_EN			0x01
 
+/* VOICE_IF (0x0F) Fields */
+
+#define TWL4030_VIF_SLAVE_EN		0x80
+#define TWL4030_VIF_DIN_EN		0x40
+#define TWL4030_VIF_DOUT_EN		0x20
+#define TWL4030_VIF_SWAP		0x10
+#define TWL4030_VIF_FORMAT		0x08
+#define TWL4030_VIF_TRI_EN		0x04
+#define TWL4030_VIF_SUB_EN		0x02
+#define TWL4030_VIF_EN			0x01
+
 /* EAR_CTL (0x21) */
 #define TWL4030_EAR_GAIN		0x30
 
@@ -236,7 +249,10 @@
 #define TWL4030_SMOOTH_ANAVOL_EN	0x02
 #define TWL4030_DIGMIC_LR_SWAP_EN	0x01
 
-extern struct snd_soc_dai twl4030_dai;
+#define TWL4030_DAI_HIFI		0
+#define TWL4030_DAI_VOICE		1
+
+extern struct snd_soc_dai twl4030_dai[2];
 extern struct snd_soc_codec_device soc_codec_dev_twl4030;
 
 #endif	/* End of __TWL4030_AUDIO_H__ */
diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c
index 0c2322d..027e1a4 100644
--- a/sound/soc/omap/omap2evm.c
+++ b/sound/soc/omap/omap2evm.c
@@ -86,7 +86,7 @@ static struct snd_soc_dai_link omap2evm_dai = {
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.ops = &omap2evm_ops,
 };
 
diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c
index fd24a4a..6aa428e 100644
--- a/sound/soc/omap/omap3beagle.c
+++ b/sound/soc/omap/omap3beagle.c
@@ -83,7 +83,7 @@ static struct snd_soc_dai_link omap3beagle_dai = {
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.ops = &omap3beagle_ops,
 };
 
diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c
index fe282d4..ad219aa 100644
--- a/sound/soc/omap/omap3pandora.c
+++ b/sound/soc/omap/omap3pandora.c
@@ -228,14 +228,14 @@ static struct snd_soc_dai_link omap3pandora_dai[] = {
 		.name = "PCM1773",
 		.stream_name = "HiFi Out",
 		.cpu_dai = &omap_mcbsp_dai[0],
-		.codec_dai = &twl4030_dai,
+		.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 		.ops = &omap3pandora_out_ops,
 		.init = omap3pandora_out_init,
 	}, {
 		.name = "TWL4030",
 		.stream_name = "Line/Mic In",
 		.cpu_dai = &omap_mcbsp_dai[1],
-		.codec_dai = &twl4030_dai,
+		.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 		.ops = &omap3pandora_in_ops,
 		.init = omap3pandora_in_init,
 	}
diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c
index a72dc4e..ec4f8fd 100644
--- a/sound/soc/omap/overo.c
+++ b/sound/soc/omap/overo.c
@@ -83,7 +83,7 @@ static struct snd_soc_dai_link overo_dai = {
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.ops = &overo_ops,
 };
 
diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c
index 10f1c86..1c79741 100644
--- a/sound/soc/omap/sdp3430.c
+++ b/sound/soc/omap/sdp3430.c
@@ -197,7 +197,7 @@ static struct snd_soc_dai_link sdp3430_dai = {
 	.name = "TWL4030",
 	.stream_name = "TWL4030",
 	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai,
+	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
 	.init = sdp3430_twl4030_init,
 	.ops = &sdp3430_ops,
 };
-- 
cgit v0.10.2


From 757e3c16f8bafa2a470aebf9b04671c5d4d18f49 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:11:07 +0900
Subject: sh: pci: Rewrite SH7751 PCI support to follow SH7780.

This follows the similar sort of scheme that the refactored SH7780 code
uses, using a 64MB CS3 mapping to handle the window0 case, and simply
discarding window1. This vastly simplifies the code, and allows most of
the board-specific setup to go die.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index f9fb1d1..5aaee3c 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -23,7 +23,8 @@ config PCI_NEW
 	bool
 	depends on PCI
 	default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
-		     CPU_SUBTYPE_SH7785 || CPU_SH5
+		     CPU_SUBTYPE_SH7785 || CPU_SH5 || \
+		     CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R
 
 # This is also board-specific
 config PCI_AUTO
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index cb2190c..b8667de 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o \
 					   pci-dreamcast.o
 obj-$(CONFIG_SH_SECUREEDGE5410)		+= ops-snapgear.o
 obj-$(CONFIG_SH_RTS7751R2D)		+= ops-rts7751r2d.o fixups-rts7751r2d.o
-obj-$(CONFIG_SH_SH03)			+= ops-sh03.o fixups-sh03.o
+obj-$(CONFIG_SH_SH03)			+= fixups-sh03.o
 obj-$(CONFIG_SH_HIGHLANDER)		+= fixups-r7780rp.o
 obj-$(CONFIG_SH_SH7785LCR)		+= fixups-r7780rp.o
 obj-$(CONFIG_SH_SDK7780)		+= fixups-sdk7780.o
diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c
index 178b778..bb1a6bb 100644
--- a/arch/sh/drivers/pci/ops-landisk.c
+++ b/arch/sh/drivers/pci/ops-landisk.c
@@ -15,37 +15,6 @@
 #include <linux/pci.h>
 #include "pci-sh4.h"
 
-static struct resource sh7751_io_resource = {
-	.name = "SH7751 IO",
-	.start = SH7751_PCI_IO_BASE,
-	.end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags = IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name = "SH7751 mem",
-	.start = SH7751_PCI_MEMORY_BASE,
-	.end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags = IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff},
-	{NULL, NULL, NULL, 0, 0},
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0 = {
-		.base	= SH7751_CS3_BASE_ADDR,
-		.size	= (64 << 20),	/* 64MB */
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
-}
-
 int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	/*
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
index 91cabd8..6db2c20 100644
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ b/arch/sh/drivers/pci/ops-lboxre2.c
@@ -21,36 +21,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	return lboxre2_irq_tab[slot];
 }
-
-static struct resource sh7751_io_resource = {
-	.name	= "SH7751_IO",
-	.start	= SH7751_PCI_IO_BASE ,
-	.end	= SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name	= "SH7751_mem",
-	.start	= SH7751_PCI_MEMORY_BASE,
-	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7751_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
-}
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index 96b916c..d950b8a 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -29,37 +29,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	return rts7751r2d_irq_tab[slot];
 }
-
-static struct resource sh7751_io_resource = {
-	.name	= "SH7751_IO",
-	.start	= 0x4000,
-	.end	= SH7751_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name	= "SH7751_mem",
-	.start	= SH7751_PCI_MEMORY_BASE,
-	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7751_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS3_BASE_ADDR,
-		.size	= 0x04000000,
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
-}
-
diff --git a/arch/sh/drivers/pci/ops-sh03.c b/arch/sh/drivers/pci/ops-sh03.c
deleted file mode 100644
index 0218135..0000000
--- a/arch/sh/drivers/pci/ops-sh03.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-sh03.c
- *
- * PCI initialization for the Interface CTP/PCI-SH03 board
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <asm/io.h>
-#include "pci-sh7751.h"
-
-/*
- * Description:  This function sets up and initializes the pcic, sets
- * up the BARS, maps the DRAM into the address space etc, etc.
- */
-int __init pcibios_init_platform(void)
-{
-	__set_io_port_base(SH7751_PCI_IO_BASE);
-	return 1;
-}
-
-static struct resource sh7751_io_resource = {
-	.name   = "SH03 IO",
-	.start  = SH7751_PCI_IO_BASE,
-	.end    = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags  = IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name   = "SH03 mem",
-	.start  = SH7751_PCI_MEMORY_BASE,
-	.end    = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags  = IORESOURCE_MEM
-};
-
-extern struct pci_ops sh4_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
index b64f2b9..5a39ecc 100644
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ b/arch/sh/drivers/pci/ops-snapgear.c
@@ -18,55 +18,6 @@
 #include <linux/pci.h>
 #include "pci-sh4.h"
 
-#define SNAPGEAR_PCI_IO		0x4000
-#define SNAPGEAR_PCI_MEM	0xfd000000
-
-/* PCI: default LOCAL memory window sizes (seen from PCI bus) */
-#define SNAPGEAR_LSR0_SIZE    (64*(1<<20)) //64MB
-#define SNAPGEAR_LSR1_SIZE    (64*(1<<20)) //64MB
-
-static struct resource sh7751_io_resource = {
-	.name		= "SH7751 IO",
-	.start		= SNAPGEAR_PCI_IO,
-	.end		= SNAPGEAR_PCI_IO + (64*1024) - 1, /* 64KiB I/O */
-	.flags		= IORESOURCE_IO,
-};
-
-static struct resource sh7751_mem_resource = {
-	.name		= "SH7751 mem",
-	.start		= SNAPGEAR_PCI_MEM,
-	.end		= SNAPGEAR_PCI_MEM + (64*1024*1024) - 1, /* 64MiB mem */
-	.flags		= IORESOURCE_MEM,
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ 0, }
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SNAPGEAR_LSR0_SIZE,
-	},
-
-	.window1	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SNAPGEAR_LSR1_SIZE,
-	},
-};
-
-/*
- * Initialize the SnapGear PCI interface
- * Setup hardware to be Central Funtion
- * Copy the BSR regs to the PCI interface
- * Setup PCI windows into local RAM
- */
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
-}
-
 int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
 	int irq = -1;
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
index e45bb62..3a79fa8 100644
--- a/arch/sh/drivers/pci/ops-titan.c
+++ b/arch/sh/drivers/pci/ops-titan.c
@@ -36,39 +36,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 
 	return irq;
 }
-
-static struct resource sh7751_io_resource = {
-	.name	= "SH7751_IO",
-	.start	= SH7751_PCI_IO_BASE,
-	.end	= SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags	= IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name	= "SH7751_mem",
-	.start	= SH7751_PCI_MEMORY_BASE,
-	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags	= IORESOURCE_MEM
-};
-
-struct pci_channel board_pci_channels[] = {
-	{ sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
-static struct sh4_pci_address_map sh7751_pci_map = {
-	.window0	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SH7751_MEM_REGION_SIZE*2,	/* cs2 and cs3 */
-	},
-
-	.window1	= {
-		.base	= SH7751_CS2_BASE_ADDR,
-		.size	= SH7751_MEM_REGION_SIZE*2,
-	},
-};
-
-int __init pcibios_init_platform(void)
-{
-	return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map);
-}
diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index 4c08fd7..c4fa0bb1 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -1,78 +1,41 @@
 /*
- *	Low-Level PCI Support for the SH7751
+ * Low-Level PCI Support for the SH7751
  *
- *  Dustin McIntire (dustin@sensoria.com)
- *	Derived from arch/i386/kernel/pci-*.c which bore the message:
- *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
+ *  Copyright (C) 2003 - 2009  Paul Mundt
+ *  Copyright (C) 2001  Dustin McIntire
  *
- *  Ported to the new API by Paul Mundt <lethal@linux-sh.org>
- *  With cleanup by Paul van Gool <pvangool@mimotech.com>
- *
- *  May be copied or modified under the terms of the GNU General Public
- *  License.  See linux/COPYING for more information.
+ *  With cleanup by Paul van Gool <pvangool@mimotech.com>, 2003.
  *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
-#undef DEBUG
-
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/delay.h>
+#include <linux/io.h>
 #include "pci-sh4.h"
 #include <asm/addrspace.h>
-#include <asm/io.h>
-
-/*
- * Initialization. Try all known PCI access methods. Note that we support
- * using both PCI BIOS and direct access: in such cases, we use I/O ports
- * to access config space.
- *
- * Note that the platform specific initialization (BSC registers, and memory
- * space mapping) will be called via the platform defined function
- * pcibios_init_platform().
- */
-int __init sh7751_pci_init(struct pci_channel *chan)
-{
-	unsigned int id;
-	int ret;
-
-	pr_debug("PCI: Starting intialization.\n");
-
-	chan->reg_base = 0xfe200000;
-
-	/* check for SH7751/SH7751R hardware */
-	id = pci_read_reg(chan, SH7751_PCICONF0);
-	if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) &&
-	    id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) {
-		pr_debug("PCI: This is not an SH7751(R) (%x)\n", id);
-		return -ENODEV;
-	}
-
-	if ((ret = sh4_pci_check_direct(chan)) != 0)
-		return ret;
-
-	return pcibios_init_platform();
-}
 
 static int __init __area_sdram_check(struct pci_channel *chan,
 				     unsigned int area)
 {
-	u32 word;
+	unsigned long word;
 
-	word = ctrl_inl(SH7751_BCR1);
+	word = __raw_readl(SH7751_BCR1);
 	/* check BCR for SDRAM in area */
 	if (((word >> area) & 1) == 0) {
-		printk("PCI: Area %d is not configured for SDRAM. BCR1=0x%x\n",
+		printk("PCI: Area %d is not configured for SDRAM. BCR1=0x%lx\n",
 		       area, word);
 		return 0;
 	}
 	pci_write_reg(chan, word, SH4_PCIBCR1);
 
-	word = (u16)ctrl_inw(SH7751_BCR2);
+	word = __raw_readw(SH7751_BCR2);
 	/* check BCR2 for 32bit SDRAM interface*/
 	if (((word >> (area << 1)) & 0x3) != 0x3) {
-		printk("PCI: Area %d is not 32 bit SDRAM. BCR2=0x%x\n",
+		printk("PCI: Area %d is not 32 bit SDRAM. BCR2=0x%lx\n",
 		       area, word);
 		return 0;
 	}
@@ -81,11 +44,56 @@ static int __init __area_sdram_check(struct pci_channel *chan,
 	return 1;
 }
 
-int __init sh7751_pcic_init(struct pci_channel *chan,
-			    struct sh4_pci_address_map *map)
+static struct resource sh7751_io_resource = {
+	.name	= "SH7751_IO",
+	.start	= SH7751_PCI_IO_BASE,
+	.end	= SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource sh7751_mem_resource = {
+	.name	= "SH7785_mem",
+	.start	= SH7751_PCI_MEMORY_BASE,
+	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_channel sh7751_pci_controller = {
+	.pci_ops	= &sh4_pci_ops,
+	.mem_resource	= &sh7751_mem_resource,
+	.mem_offset	= 0x00000000,
+	.io_resource	= &sh7751_io_resource,
+	.io_offset	= 0x00000000,
+};
+
+static struct sh4_pci_address_map sh7751_pci_map = {
+	.window0	= {
+		.base	= SH7751_CS3_BASE_ADDR,
+		.size	= 0x04000000,
+	},
+};
+
+static int __init sh7751_pci_init(void)
 {
-	u32 reg;
-	u32 word;
+	struct pci_channel *chan = &sh7751_pci_controller;
+	unsigned int id;
+	u32 word, reg;
+	int ret;
+
+	printk(KERN_NOTICE "PCI: Starting intialization.\n");
+
+	chan->reg_base = 0xfe200000;
+
+	/* check for SH7751/SH7751R hardware */
+	id = pci_read_reg(chan, SH7751_PCICONF0);
+	if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) &&
+	    id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) {
+		pr_debug("PCI: This is not an SH7751(R) (%x)\n", id);
+		return -ENODEV;
+	}
+
+	if ((ret = sh4_pci_check_direct(chan)) != 0)
+		return ret;
 
 	/* Set the BCR's to enable PCI access */
 	reg = ctrl_inl(SH7751_BCR1);
@@ -112,21 +120,13 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 
 	/* Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
-	 * Window0 = map->window0.size @ non-cached area base = SDRAM
-	 * Window1 = map->window1.size @ cached area base = SDRAM
 	 */
-	word = map->window0.size - 1;
+	word = sh7751_pci_map.window0.size - 1;
 	pci_write_reg(chan, word, SH4_PCILSR0);
-	word = map->window1.size - 1;
-	pci_write_reg(chan, word, SH4_PCILSR1);
 	/* Set the values on window 0 PCI config registers */
-	word = P2SEGADDR(map->window0.base);
+	word = P2SEGADDR(sh7751_pci_map.window0.base);
 	pci_write_reg(chan, word, SH4_PCILAR0);
 	pci_write_reg(chan, word, SH7751_PCICONF5);
-	/* Set the values on window 1 PCI config registers */
-	word =  PHYSADDR(map->window1.base);
-	pci_write_reg(chan, word, SH4_PCILAR1);
-	pci_write_reg(chan, word, SH7751_PCICONF6);
 
 	/* Set the local 16MB PCI memory space window to
 	 * the lowest PCI mapped address
@@ -144,7 +144,7 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	/* Set PCI WCRx, BCRx's, copy from BSC locations */
 
 	/* check BCR for SDRAM in specified area */
-	switch (map->window0.base) {
+	switch (sh7751_pci_map.window0.base) {
 	case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(chan, 0); break;
 	case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(chan, 1); break;
 	case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(chan, 2); break;
@@ -179,5 +179,10 @@ int __init sh7751_pcic_init(struct pci_channel *chan,
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM;
 	pci_write_reg(chan, word, SH4_PCICR);
 
+	__set_io_port_base(SH7751_PCI_IO_BASE);
+
+	register_pci_controller(chan);
+
 	return 0;
 }
+arch_initcall(sh7751_pci_init);
diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h
index c390dd2..4983a4d 100644
--- a/arch/sh/drivers/pci/pci-sh7751.h
+++ b/arch/sh/drivers/pci/pci-sh7751.h
@@ -57,7 +57,7 @@
   #define SH7751_PCICONF2_SCC        0x00FF0000  /* Sub-Class Code */
   #define SH7751_PCICONF2_RLPI       0x0000FF00  /* Programming Interface */
   #define SH7751_PCICONF2_REV        0x000000FF  /* Revision ID */
-#define SH7751_PCICONF3            0xC           /* PCI Config Reg 3 */ 
+#define SH7751_PCICONF3            0xC           /* PCI Config Reg 3 */
   #define SH7751_PCICONF3_BIST7      0x80000000  /* Bist Supported */
   #define SH7751_PCICONF3_BIST6      0x40000000  /* Bist Executing */
   #define SH7751_PCICONF3_BIST3_0    0x0F000000  /* Bist Passed */
@@ -72,12 +72,12 @@
   #define SH7751_PCICONF5_BASE       0xFFFFFFF0  /* Mem Space Base Addr */
   #define SH7751_PCICONF5_LAP        0x00000008  /* Prefetch Enabled */
   #define SH7751_PCICONF5_LAT        0x00000006  /* Local Memory type */
-  #define SH7751_PCICONF5_ASI        0x00000001  /* Address Space Type */  
+  #define SH7751_PCICONF5_ASI        0x00000001  /* Address Space Type */
 #define SH7751_PCICONF6            0x18          /* PCI Config Reg 6 */
   #define SH7751_PCICONF6_BASE       0xFFFFFFF0  /* Mem Space Base Addr */
   #define SH7751_PCICONF6_LAP        0x00000008  /* Prefetch Enabled */
   #define SH7751_PCICONF6_LAT        0x00000006  /* Local Memory type */
-  #define SH7751_PCICONF6_ASI        0x00000001  /* Address Space Type */  
+  #define SH7751_PCICONF6_ASI        0x00000001  /* Address Space Type */
 /* PCICONF7 - PCICONF10 are undefined */
 #define SH7751_PCICONF11           0x2C          /* PCI Config Reg 11 */
   #define SH7751_PCICONF11_SSID      0xFFFF0000  /* Subsystem ID */
@@ -126,11 +126,4 @@
 #define SH7751_CS5_BASE_ADDR       (SH7751_CS4_BASE_ADDR + SH7751_MEM_REGION_SIZE)
 #define SH7751_CS6_BASE_ADDR       (SH7751_CS5_BASE_ADDR + SH7751_MEM_REGION_SIZE)
 
-struct sh4_pci_address_map;
-
-/* arch/sh/drivers/pci/pci-sh7751.c */
-int sh7751_pci_init(struct pci_channel *chan);
-int sh7751_pcic_init(struct pci_channel *chan,
-		     struct sh4_pci_address_map *map);
-
 #endif /* _PCI_SH7751_H_ */
-- 
cgit v0.10.2


From 84972ec0c206b73ffb74e5114b574186ce6166b8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:17:10 +0900
Subject: sh: pci: Rename SH7751 platform ops files to fixups.

None of these contain pci_ops, only IRQ routing bits, rename them
accordingly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index b8667de..abd931c 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -14,14 +14,14 @@ obj-$(CONFIG_CPU_SH5)			+= pci-sh5.o ops-sh5.o
 
 obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o \
 					   pci-dreamcast.o
-obj-$(CONFIG_SH_SECUREEDGE5410)		+= ops-snapgear.o
+obj-$(CONFIG_SH_SECUREEDGE5410)		+= fixups-snapgear.o
 obj-$(CONFIG_SH_RTS7751R2D)		+= ops-rts7751r2d.o fixups-rts7751r2d.o
 obj-$(CONFIG_SH_SH03)			+= fixups-sh03.o
 obj-$(CONFIG_SH_HIGHLANDER)		+= fixups-r7780rp.o
 obj-$(CONFIG_SH_SH7785LCR)		+= fixups-r7780rp.o
 obj-$(CONFIG_SH_SDK7780)		+= fixups-sdk7780.o
 obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= fixups-sdk7780.o
-obj-$(CONFIG_SH_TITAN)			+= ops-titan.o
-obj-$(CONFIG_SH_LANDISK)		+= ops-landisk.o
-obj-$(CONFIG_SH_LBOX_RE2)		+= ops-lboxre2.o fixups-rts7751r2d.o
+obj-$(CONFIG_SH_TITAN)			+= fixups-titan.o
+obj-$(CONFIG_SH_LANDISK)		+= fixups-landisk.o
+obj-$(CONFIG_SH_LBOX_RE2)		+= fixups-lboxre2.o fixups-rts7751r2d.o
 obj-$(CONFIG_SH_CAYMAN)			+= fixups-cayman.o
diff --git a/arch/sh/drivers/pci/fixups-landisk.c b/arch/sh/drivers/pci/fixups-landisk.c
new file mode 100644
index 0000000..bb1a6bb
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-landisk.c
@@ -0,0 +1,34 @@
+/*
+ * arch/sh/drivers/pci/ops-landisk.c
+ *
+ * PCI initialization for the I-O DATA Device, Inc. LANDISK board
+ *
+ * Copyright (C) 2006 kogiidena
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include "pci-sh4.h"
+
+int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	/*
+	 * slot0: pin1-4 = irq5,6,7,8
+	 * slot1: pin1-4 = irq6,7,8,5
+	 * slot2: pin1-4 = irq7,8,5,6
+	 * slot3: pin1-4 = irq8,5,6,7
+	 */
+	int irq = ((slot + pin - 1) & 0x3) + 5;
+
+	if ((slot | (pin - 1)) > 0x3) {
+		printk("PCI: Bad IRQ mapping request for slot %d pin %c\n",
+		       slot, pin - 1 + 'A');
+		return -1;
+	}
+	return irq;
+}
diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c
new file mode 100644
index 0000000..6db2c20
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-lboxre2.c
@@ -0,0 +1,23 @@
+/*
+ * linux/arch/sh/drivers/pci/ops-lboxre2.c
+ *
+ * Copyright (C) 2007 Nobuhiro Iwamatsu
+ *
+ * PCI initialization for the NTT COMWARE L-BOX RE2
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <mach/lboxre2.h>
+#include "pci-sh4.h"
+
+static char lboxre2_irq_tab[] __initdata = {
+	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	return lboxre2_irq_tab[slot];
+}
diff --git a/arch/sh/drivers/pci/fixups-snapgear.c b/arch/sh/drivers/pci/fixups-snapgear.c
new file mode 100644
index 0000000..5a39ecc
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-snapgear.c
@@ -0,0 +1,38 @@
+/*
+ * arch/sh/drivers/pci/ops-snapgear.c
+ *
+ * Author:  David McCullough <davidm@snapgear.com>
+ *
+ * Ported to new API by Paul Mundt <lethal@linux-sh.org>
+ *
+ * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * PCI initialization for the SnapGear boards
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include "pci-sh4.h"
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	int irq = -1;
+
+	switch (slot) {
+	case 8:  /* the PCI bridge */ break;
+	case 11: irq = 8;  break; /* USB    */
+	case 12: irq = 11; break; /* PCMCIA */
+	case 13: irq = 5;  break; /* eth0   */
+	case 14: irq = 8;  break; /* eth1   */
+	case 15: irq = 11; break; /* safenet (unused) */
+	}
+
+	printk("PCI: Mapping SnapGear IRQ for slot %d, pin %c to irq %d\n",
+	       slot, pin - 1 + 'A', irq);
+
+	return irq;
+}
diff --git a/arch/sh/drivers/pci/fixups-titan.c b/arch/sh/drivers/pci/fixups-titan.c
new file mode 100644
index 0000000..3a79fa8
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-titan.c
@@ -0,0 +1,38 @@
+/*
+ * arch/sh/drivers/pci/ops-titan.c
+ *
+ * Ported to new API by Paul Mundt <lethal@linux-sh.org>
+ *
+ * Modified from ops-snapgear.c written by  David McCullough
+ * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * PCI initialization for the Titan boards
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <mach/titan.h>
+#include "pci-sh4.h"
+
+static char titan_irq_tab[] __initdata = {
+	TITAN_IRQ_WAN,
+	TITAN_IRQ_LAN,
+	TITAN_IRQ_MPCIA,
+	TITAN_IRQ_MPCIB,
+	TITAN_IRQ_USB,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	int irq = titan_irq_tab[slot];
+
+	printk("PCI: Mapping TITAN IRQ for slot %d, pin %c to irq %d\n",
+		slot, pin - 1 + 'A', irq);
+
+	return irq;
+}
diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c
deleted file mode 100644
index bb1a6bb..0000000
--- a/arch/sh/drivers/pci/ops-landisk.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * arch/sh/drivers/pci/ops-landisk.c
- *
- * PCI initialization for the I-O DATA Device, Inc. LANDISK board
- *
- * Copyright (C) 2006 kogiidena
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	/*
-	 * slot0: pin1-4 = irq5,6,7,8
-	 * slot1: pin1-4 = irq6,7,8,5
-	 * slot2: pin1-4 = irq7,8,5,6
-	 * slot3: pin1-4 = irq8,5,6,7
-	 */
-	int irq = ((slot + pin - 1) & 0x3) + 5;
-
-	if ((slot | (pin - 1)) > 0x3) {
-		printk("PCI: Bad IRQ mapping request for slot %d pin %c\n",
-		       slot, pin - 1 + 'A');
-		return -1;
-	}
-	return irq;
-}
diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c
deleted file mode 100644
index 6db2c20..0000000
--- a/arch/sh/drivers/pci/ops-lboxre2.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-lboxre2.c
- *
- * Copyright (C) 2007 Nobuhiro Iwamatsu
- *
- * PCI initialization for the NTT COMWARE L-BOX RE2
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/lboxre2.h>
-#include "pci-sh4.h"
-
-static char lboxre2_irq_tab[] __initdata = {
-	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return lboxre2_irq_tab[slot];
-}
diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c
deleted file mode 100644
index 5a39ecc..0000000
--- a/arch/sh/drivers/pci/ops-snapgear.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * arch/sh/drivers/pci/ops-snapgear.c
- *
- * Author:  David McCullough <davidm@snapgear.com>
- *
- * Ported to new API by Paul Mundt <lethal@linux-sh.org>
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the SnapGear boards
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include "pci-sh4.h"
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	int irq = -1;
-
-	switch (slot) {
-	case 8:  /* the PCI bridge */ break;
-	case 11: irq = 8;  break; /* USB    */
-	case 12: irq = 11; break; /* PCMCIA */
-	case 13: irq = 5;  break; /* eth0   */
-	case 14: irq = 8;  break; /* eth1   */
-	case 15: irq = 11; break; /* safenet (unused) */
-	}
-
-	printk("PCI: Mapping SnapGear IRQ for slot %d, pin %c to irq %d\n",
-	       slot, pin - 1 + 'A', irq);
-
-	return irq;
-}
diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c
deleted file mode 100644
index 3a79fa8..0000000
--- a/arch/sh/drivers/pci/ops-titan.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * arch/sh/drivers/pci/ops-titan.c
- *
- * Ported to new API by Paul Mundt <lethal@linux-sh.org>
- *
- * Modified from ops-snapgear.c written by  David McCullough
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Titan boards
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/titan.h>
-#include "pci-sh4.h"
-
-static char titan_irq_tab[] __initdata = {
-	TITAN_IRQ_WAN,
-	TITAN_IRQ_LAN,
-	TITAN_IRQ_MPCIA,
-	TITAN_IRQ_MPCIB,
-	TITAN_IRQ_USB,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	int irq = titan_irq_tab[slot];
-
-	printk("PCI: Mapping TITAN IRQ for slot %d, pin %c to irq %d\n",
-		slot, pin - 1 + 'A', irq);
-
-	return irq;
-}
-- 
cgit v0.10.2


From 37c8ac361efdbae969eff1a603bc39412d3e873f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:27:15 +0900
Subject: sh: pci: Consolidate lboxre2 and r2d IRQ fixups.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index abd931c..fbebd73 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_CPU_SH5)			+= pci-sh5.o ops-sh5.o
 obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o \
 					   pci-dreamcast.o
 obj-$(CONFIG_SH_SECUREEDGE5410)		+= fixups-snapgear.o
-obj-$(CONFIG_SH_RTS7751R2D)		+= ops-rts7751r2d.o fixups-rts7751r2d.o
+obj-$(CONFIG_SH_RTS7751R2D)		+= fixups-rts7751r2d.o
 obj-$(CONFIG_SH_SH03)			+= fixups-sh03.o
 obj-$(CONFIG_SH_HIGHLANDER)		+= fixups-r7780rp.o
 obj-$(CONFIG_SH_SH7785LCR)		+= fixups-r7780rp.o
@@ -23,5 +23,5 @@ obj-$(CONFIG_SH_SDK7780)		+= fixups-sdk7780.o
 obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= fixups-sdk7780.o
 obj-$(CONFIG_SH_TITAN)			+= fixups-titan.o
 obj-$(CONFIG_SH_LANDISK)		+= fixups-landisk.o
-obj-$(CONFIG_SH_LBOX_RE2)		+= fixups-lboxre2.o fixups-rts7751r2d.o
+obj-$(CONFIG_SH_LBOX_RE2)		+= fixups-rts7751r2d.o
 obj-$(CONFIG_SH_CAYMAN)			+= fixups-cayman.o
diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c
deleted file mode 100644
index 6db2c20..0000000
--- a/arch/sh/drivers/pci/fixups-lboxre2.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-lboxre2.c
- *
- * Copyright (C) 2007 Nobuhiro Iwamatsu
- *
- * PCI initialization for the NTT COMWARE L-BOX RE2
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/lboxre2.h>
-#include "pci-sh4.h"
-
-static char lboxre2_irq_tab[] __initdata = {
-	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return lboxre2_irq_tab[slot];
-}
diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c
index 3885233..052b354 100644
--- a/arch/sh/drivers/pci/fixups-rts7751r2d.c
+++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c
@@ -1,21 +1,44 @@
 /*
  * arch/sh/drivers/pci/fixups-rts7751r2d.c
  *
- * RTS7751R2D PCI fixups
+ * RTS7751R2D / LBOXRE2 PCI fixups
  *
  * Copyright (C) 2003  Lineo uSolutions, Inc.
  * Copyright (C) 2004  Paul Mundt
+ * Copyright (C) 2007  Nobuhiro Iwamatsu
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/pci.h>
+#include <mach/lboxre2.h>
+#include <mach/r2d.h>
 #include "pci-sh4.h"
+#include <asm/machtypes.h>
 
 #define PCIMCR_MRSET_OFF	0xBFFFFFFF
 #define PCIMCR_RFSH_OFF		0xFFFFFFFB
 
+static u8 rts7751r2d_irq_tab[] __initdata = {
+	IRQ_PCI_INTA,
+	IRQ_PCI_INTB,
+	IRQ_PCI_INTC,
+	IRQ_PCI_INTD,
+};
+
+static char lboxre2_irq_tab[] __initdata = {
+	IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
+};
+
+int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	if (mach_is_lboxre2())
+		return lboxre2_irq_tab[slot];
+	else
+		return rts7751r2d_irq_tab[slot];
+}
+
 int pci_fixup_pcic(struct pci_channel *chan)
 {
 	unsigned long bcr1, mcr;
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
deleted file mode 100644
index d950b8a..0000000
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * linux/arch/sh/drivers/pci/ops-rts7751r2d.c
- *
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Renesas SH7751R RTS7751R2D board
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <mach/r2d.h>
-#include "pci-sh4.h"
-
-static u8 rts7751r2d_irq_tab[] __initdata = {
-	IRQ_PCI_INTA,
-	IRQ_PCI_INTB,
-	IRQ_PCI_INTC,
-	IRQ_PCI_INTD,
-};
-
-int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
-{
-	return rts7751r2d_irq_tab[slot];
-}
-- 
cgit v0.10.2


From 951a681bda844491de8699b3bdc6c3899cbd4c9f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:34:36 +0900
Subject: sh: pci: Convert dreamcast to new-style interface.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index 5aaee3c..1d53496 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -20,11 +20,8 @@ config SH_PCIDMA_NONCOHERENT
 
 # Temporary config option for transitioning off of PCI_AUTO
 config PCI_NEW
-	bool
+	def_bool y
 	depends on PCI
-	default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
-		     CPU_SUBTYPE_SH7785 || CPU_SH5 || \
-		     CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R
 
 # This is also board-specific
 config PCI_AUTO
diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c
index 48c6381..ed7f489 100644
--- a/arch/sh/drivers/pci/fixups-dreamcast.c
+++ b/arch/sh/drivers/pci/fixups-dreamcast.c
@@ -30,7 +30,7 @@
 
 static void __init gapspci_fixup_resources(struct pci_dev *dev)
 {
-	struct pci_channel *p = board_pci_channels;
+	struct pci_channel *p = dev->sysdata;
 
 	printk(KERN_NOTICE "PCI: Fixing up device %s\n", pci_name(dev));
 
diff --git a/arch/sh/drivers/pci/pci-dreamcast.c b/arch/sh/drivers/pci/pci-dreamcast.c
index 0897be5..210f9d4 100644
--- a/arch/sh/drivers/pci/pci-dreamcast.c
+++ b/arch/sh/drivers/pci/pci-dreamcast.c
@@ -21,7 +21,6 @@
 #include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/module.h>
-
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <mach/pci.h>
@@ -40,11 +39,19 @@ static struct resource gapspci_mem_resource = {
 	.flags	= IORESOURCE_MEM,
 };
 
+static struct pci_channel dreamcast_pci_controller = {
+	.pci_ops	= &gapspci_pci_ops,
+	.io_resource	= &gapspci_io_resource,
+	.io_offset	= 0x00000000,
+	.mem_resource	= &gapspci_mem_resource,
+	.mem_offset	= 0x00000000,
+};
+
 /*
  * gapspci init
  */
 
-static int __init gapspci_init(struct pci_channel *chan)
+static int __init gapspci_init(void)
 {
 	char idbuf[16];
 	int i;
@@ -88,18 +95,8 @@ static int __init gapspci_init(struct pci_channel *chan)
 	outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10);
 	outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14);
 
+	register_pci_controller(&dreamcast_pci_controller);
+
 	return 0;
 }
-
-struct pci_channel board_pci_channels[] = {
-	{
-		.init		= gapspci_init,
-		.pci_ops	= &gapspci_pci_ops,
-		.io_resource	= &gapspci_io_resource,
-		.mem_resource	= &gapspci_mem_resource,
-		.first_devfn	= 0,
-		.last_devfn	= 1,
-	}, {
-		.init		= NULL,
-	}
-};
+arch_initcall(gapspci_init);
-- 
cgit v0.10.2


From 2d5efc190eb415dbff79ffab4f8ea731ab0288a9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:42:59 +0900
Subject: sh: pci: Move the se7751 fixups in to arch/sh/drivers/pci/.

The se7751 was still doing the PCI fixups in its own board directory,
so we move it over to arch/sh/drivers/pci/ with the rest of the board
fixups. It has bitrotted significantly over the years, so will still
likely need a bit of work to bring back up to date.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-se/7751/Makefile b/arch/sh/boards/mach-se/7751/Makefile
index dbc29f3..e6f4341 100644
--- a/arch/sh/boards/mach-se/7751/Makefile
+++ b/arch/sh/boards/mach-se/7751/Makefile
@@ -3,5 +3,3 @@
 #
 
 obj-y	 := setup.o io.o irq.o
-
-obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/sh/boards/mach-se/7751/pci.c b/arch/sh/boards/mach-se/7751/pci.c
deleted file mode 100644
index 9ec64a4..0000000
--- a/arch/sh/boards/mach-se/7751/pci.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * linux/arch/sh/boards/se/7751/pci.c
- *
- * Author:  Ian DaSilva (idasilva@mvista.com)
- *
- * Highly leveraged from pci-bigsur.c, written by Dustin McIntire.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * PCI initialization for the Hitachi SH7751 Solution Engine board (MS7751SE01)
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-
-#include <asm/io.h>
-#include "../../../drivers/pci/pci-sh7751.h"
-
-#define PCIMCR_MRSET_OFF	0xBFFFFFFF
-#define PCIMCR_RFSH_OFF		0xFFFFFFFB
-
-/*
- * Only long word accesses of the PCIC's internal local registers and the
- * configuration registers from the CPU is supported.
- */
-#define PCIC_WRITE(x,v) writel((v), PCI_REG(x))
-#define PCIC_READ(x) readl(PCI_REG(x))
-
-#define xPCIBIOS_MIN_IO		board_pci_channels->io_resource->start
-#define xPCIBIOS_MIN_MEM	board_pci_channels->mem_resource->start
-
-/*
- * Description:  This function sets up and initializes the pcic, sets
- * up the BARS, maps the DRAM into the address space etc, etc.
- */
-int __init pcibios_init_platform(void)
-{
-   unsigned long bcr1, wcr1, wcr2, wcr3, mcr;
-   unsigned short bcr2;
-
-   /*
-    * Initialize the slave bus controller on the pcic.  The values used
-    * here should not be hardcoded, but they should be taken from the bsc
-    * on the processor, to make this function as generic as possible.
-    * (i.e. Another sbc may usr different SDRAM timing settings -- in order
-    * for the pcic to work, its settings need to be exactly the same.)
-    */
-   bcr1 = (*(volatile unsigned long*)(SH7751_BCR1));
-   bcr2 = (*(volatile unsigned short*)(SH7751_BCR2));
-   wcr1 = (*(volatile unsigned long*)(SH7751_WCR1));
-   wcr2 = (*(volatile unsigned long*)(SH7751_WCR2));
-   wcr3 = (*(volatile unsigned long*)(SH7751_WCR3));
-   mcr = (*(volatile unsigned long*)(SH7751_MCR));
-
-   bcr1 = bcr1 | 0x00080000;  /* Enable Bit 19, BREQEN */
-   (*(volatile unsigned long*)(SH7751_BCR1)) = bcr1;   
-
-   bcr1 = bcr1 | 0x40080000;  /* Enable Bit 19 BREQEN, set PCIC to slave */
-   PCIC_WRITE(SH7751_PCIBCR1, bcr1);	 /* PCIC BCR1 */
-   PCIC_WRITE(SH7751_PCIBCR2, bcr2);     /* PCIC BCR2 */
-   PCIC_WRITE(SH7751_PCIWCR1, wcr1);     /* PCIC WCR1 */
-   PCIC_WRITE(SH7751_PCIWCR2, wcr2);     /* PCIC WCR2 */
-   PCIC_WRITE(SH7751_PCIWCR3, wcr3);     /* PCIC WCR3 */
-   mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
-   PCIC_WRITE(SH7751_PCIMCR, mcr);      /* PCIC MCR */
-
-
-   /* Enable all interrupts, so we know what to fix */
-   PCIC_WRITE(SH7751_PCIINTM, 0x0000c3ff);
-   PCIC_WRITE(SH7751_PCIAINTM, 0x0000380f);
-
-   /* Set up standard PCI config registers */
-   PCIC_WRITE(SH7751_PCICONF1, 	0xF39000C7); /* Bus Master, Mem & I/O access */
-   PCIC_WRITE(SH7751_PCICONF2, 	0x00000000); /* PCI Class code & Revision ID */
-   PCIC_WRITE(SH7751_PCICONF4, 	0xab000001); /* PCI I/O address (local regs) */
-   PCIC_WRITE(SH7751_PCICONF5, 	0x0c000000); /* PCI MEM address (local RAM)  */
-   PCIC_WRITE(SH7751_PCICONF6, 	0xd0000000); /* PCI MEM address (unused)     */
-   PCIC_WRITE(SH7751_PCICONF11, 0x35051054); /* PCI Subsystem ID & Vendor ID */
-   PCIC_WRITE(SH7751_PCILSR0, 0x03f00000);   /* MEM (full 64M exposed)       */
-   PCIC_WRITE(SH7751_PCILSR1, 0x00000000);   /* MEM (unused)                 */
-   PCIC_WRITE(SH7751_PCILAR0, 0x0c000000);   /* MEM (direct map from PCI)    */
-   PCIC_WRITE(SH7751_PCILAR1, 0x00000000);   /* MEM (unused)                 */
-
-   /* Now turn it on... */
-   PCIC_WRITE(SH7751_PCICR, 0xa5000001);
-
-   /*
-    * Set PCIMBR and PCIIOBR here, assuming a single window
-    * (16M MEM, 256K IO) is enough.  If a larger space is
-    * needed, the readx/writex and inx/outx functions will
-    * have to do more (e.g. setting registers for each call).
-    */
-
-   /*
-    * Set the MBR so PCI address is one-to-one with window,
-    * meaning all calls go straight through... use BUG_ON to
-    * catch erroneous assumption.
-    */
-   BUG_ON(xPCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE);
-
-   PCIC_WRITE(SH7751_PCIMBR, xPCIBIOS_MIN_MEM);
-
-   /* Set IOBR for window containing area specified in pci.h */
-   PCIC_WRITE(SH7751_PCIIOBR, (xPCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK));
-
-   /* All done, may as well say so... */
-   printk("SH7751 PCI: Finished initialization of the PCI controller\n");
-
-   return 1;
-}
-
-int __init pcibios_map_platform_irq(u8 slot, u8 pin)
-{
-        switch (slot) {
-        case 0: return 13;
-        case 1: return 13; 	/* AMD Ethernet controller */
-        case 2: return -1;
-        case 3: return -1;
-        case 4: return -1;
-        default:
-                printk("PCI: Bad IRQ mapping request for slot %d\n", slot);
-                return -1;
-        }
-}
-
-static struct resource sh7751_io_resource = {
-	.name   = "SH7751 IO",
-	.start  = SH7751_PCI_IO_BASE,
-	.end    = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1,
-	.flags  = IORESOURCE_IO
-};
-
-static struct resource sh7751_mem_resource = {
-	.name   = "SH7751 mem",
-	.start  = SH7751_PCI_MEMORY_BASE,
-	.end    = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
-	.flags  = IORESOURCE_MEM
-};
-
-extern struct pci_ops sh7751_pci_ops;
-
-struct pci_channel board_pci_channels[] = {
-	{ &sh7751_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff },
-	{ NULL, NULL, NULL, 0, 0 },
-};
-
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index fbebd73..e388a70 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_CPU_SH5)			+= pci-sh5.o ops-sh5.o
 obj-$(CONFIG_SH_DREAMCAST)		+= ops-dreamcast.o fixups-dreamcast.o \
 					   pci-dreamcast.o
 obj-$(CONFIG_SH_SECUREEDGE5410)		+= fixups-snapgear.o
+obj-$(CONFIG_SH_7751_SOLUTION_ENGINE)	+= fixups-se7751.o
 obj-$(CONFIG_SH_RTS7751R2D)		+= fixups-rts7751r2d.o
 obj-$(CONFIG_SH_SH03)			+= fixups-sh03.o
 obj-$(CONFIG_SH_HIGHLANDER)		+= fixups-r7780rp.o
diff --git a/arch/sh/drivers/pci/fixups-se7751.c b/arch/sh/drivers/pci/fixups-se7751.c
new file mode 100644
index 0000000..475fa9f
--- /dev/null
+++ b/arch/sh/drivers/pci/fixups-se7751.c
@@ -0,0 +1,111 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include "pci-sh4.h"
+
+int __init pcibios_map_platform_irq(u8 slot, u8 pin)
+{
+        switch (slot) {
+        case 0: return 13;
+        case 1: return 13;	/* AMD Ethernet controller */
+        case 2: return -1;
+        case 3: return -1;
+        case 4: return -1;
+        default:
+                printk("PCI: Bad IRQ mapping request for slot %d\n", slot);
+                return -1;
+        }
+}
+
+#define PCIMCR_MRSET_OFF	0xBFFFFFFF
+#define PCIMCR_RFSH_OFF		0xFFFFFFFB
+
+/*
+ * Only long word accesses of the PCIC's internal local registers and the
+ * configuration registers from the CPU is supported.
+ */
+#define PCIC_WRITE(x,v) writel((v), PCI_REG(x))
+#define PCIC_READ(x) readl(PCI_REG(x))
+
+/*
+ * Description:  This function sets up and initializes the pcic, sets
+ * up the BARS, maps the DRAM into the address space etc, etc.
+ */
+int pci_fixup_pcic(struct pci_channel *chan)
+{
+	unsigned long bcr1, wcr1, wcr2, wcr3, mcr;
+	unsigned short bcr2;
+
+	/*
+	* Initialize the slave bus controller on the pcic.  The values used
+	* here should not be hardcoded, but they should be taken from the bsc
+	* on the processor, to make this function as generic as possible.
+	* (i.e. Another sbc may usr different SDRAM timing settings -- in order
+	* for the pcic to work, its settings need to be exactly the same.)
+	*/
+	bcr1 = (*(volatile unsigned long*)(SH7751_BCR1));
+	bcr2 = (*(volatile unsigned short*)(SH7751_BCR2));
+	wcr1 = (*(volatile unsigned long*)(SH7751_WCR1));
+	wcr2 = (*(volatile unsigned long*)(SH7751_WCR2));
+	wcr3 = (*(volatile unsigned long*)(SH7751_WCR3));
+	mcr = (*(volatile unsigned long*)(SH7751_MCR));
+
+	bcr1 = bcr1 | 0x00080000;  /* Enable Bit 19, BREQEN */
+	(*(volatile unsigned long*)(SH7751_BCR1)) = bcr1;
+
+	bcr1 = bcr1 | 0x40080000;  /* Enable Bit 19 BREQEN, set PCIC to slave */
+	PCIC_WRITE(SH7751_PCIBCR1, bcr1);	 /* PCIC BCR1 */
+	PCIC_WRITE(SH7751_PCIBCR2, bcr2);     /* PCIC BCR2 */
+	PCIC_WRITE(SH7751_PCIWCR1, wcr1);     /* PCIC WCR1 */
+	PCIC_WRITE(SH7751_PCIWCR2, wcr2);     /* PCIC WCR2 */
+	PCIC_WRITE(SH7751_PCIWCR3, wcr3);     /* PCIC WCR3 */
+	mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF;
+	PCIC_WRITE(SH7751_PCIMCR, mcr);      /* PCIC MCR */
+
+
+	/* Enable all interrupts, so we know what to fix */
+	PCIC_WRITE(SH7751_PCIINTM, 0x0000c3ff);
+	PCIC_WRITE(SH7751_PCIAINTM, 0x0000380f);
+
+	/* Set up standard PCI config registers */
+	PCIC_WRITE(SH7751_PCICONF1,	0xF39000C7); /* Bus Master, Mem & I/O access */
+	PCIC_WRITE(SH7751_PCICONF2,	0x00000000); /* PCI Class code & Revision ID */
+	PCIC_WRITE(SH7751_PCICONF4,	0xab000001); /* PCI I/O address (local regs) */
+	PCIC_WRITE(SH7751_PCICONF5,	0x0c000000); /* PCI MEM address (local RAM)  */
+	PCIC_WRITE(SH7751_PCICONF6,	0xd0000000); /* PCI MEM address (unused)     */
+	PCIC_WRITE(SH7751_PCICONF11, 0x35051054); /* PCI Subsystem ID & Vendor ID */
+	PCIC_WRITE(SH7751_PCILSR0, 0x03f00000);   /* MEM (full 64M exposed)       */
+	PCIC_WRITE(SH7751_PCILSR1, 0x00000000);   /* MEM (unused)                 */
+	PCIC_WRITE(SH7751_PCILAR0, 0x0c000000);   /* MEM (direct map from PCI)    */
+	PCIC_WRITE(SH7751_PCILAR1, 0x00000000);   /* MEM (unused)                 */
+
+	/* Now turn it on... */
+	PCIC_WRITE(SH7751_PCICR, 0xa5000001);
+
+	/*
+	* Set PCIMBR and PCIIOBR here, assuming a single window
+	* (16M MEM, 256K IO) is enough.  If a larger space is
+	* needed, the readx/writex and inx/outx functions will
+	* have to do more (e.g. setting registers for each call).
+	*/
+
+	/*
+	* Set the MBR so PCI address is one-to-one with window,
+	* meaning all calls go straight through... use BUG_ON to
+	* catch erroneous assumption.
+	*/
+	BUG_ON(chan->mem_resource->start != SH7751_PCI_MEMORY_BASE);
+
+	PCIC_WRITE(SH7751_PCIMBR, chan->mem_resource->start);
+
+	/* Set IOBR for window containing area specified in pci.h */
+	PCIC_WRITE(SH7751_PCIIOBR, (chan->io_resource->start & SH7751_PCIIOBR_MASK));
+
+	/* All done, may as well say so... */
+	printk("SH7751 PCI: Finished initialization of the PCI controller\n");
+
+	return 1;
+}
-- 
cgit v0.10.2


From 805fcc88999162b361ef0b0ce25782ef65f147d7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:46:42 +0900
Subject: sh: pci: Kill off the last remnants of the now unused pci-auto code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index 1d53496..ea903a9 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -18,24 +18,6 @@ config SH_PCIDMA_NONCOHERENT
 	  bridge integrated with your SH CPU, refer carefully to the chip specs
 	  to see if you can say 'N' here. Otherwise, leave it as 'Y'.
 
-# Temporary config option for transitioning off of PCI_AUTO
 config PCI_NEW
 	def_bool y
 	depends on PCI
-
-# This is also board-specific
-config PCI_AUTO
-	bool
-	depends on PCI && !PCI_NEW
-	default y
-
-config PCI_AUTO_UPDATE_RESOURCES
-	bool
-	depends on PCI_AUTO
-	default y if !SH_DREAMCAST
-	help
-	  Selecting this option will cause the PCI auto code to leave your
-	  BAR values alone. Otherwise they will be updated automatically. If
-	  for some reason, you have a board that simply refuses to work
-	  with its resources updated beyond what they are when the device
-	  is powered up, set this to N. Everyone else will want this as Y.
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index e388a70..a8350cc 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the PCI specific kernel interface routines under Linux.
 #
 obj-y					+= pci-lib.o
-obj-$(CONFIG_PCI_AUTO)			+= pci.o pci-auto.o
 obj-$(CONFIG_PCI_NEW)			+= pci-new.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7751)	+= pci-sh7751.o ops-sh4.o
diff --git a/arch/sh/drivers/pci/pci-auto.c b/arch/sh/drivers/pci/pci-auto.c
deleted file mode 100644
index 1d715ec..0000000
--- a/arch/sh/drivers/pci/pci-auto.c
+++ /dev/null
@@ -1,546 +0,0 @@
-/*
- * PCI autoconfiguration library
- *
- * Author: Matt Porter <mporter@mvista.com>
- *
- * Copyright 2000, 2001 MontaVista Software Inc.
- * Copyright 2001 Bradley D. LaRonde <brad@ltc.com>
- * Copyright 2003 Paul Mundt <lethal@linux-sh.org>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-/*
- * Modified for MIPS by Jun Sun, jsun@mvista.com
- *
- * . Simplify the interface between pci_auto and the rest: a single function.
- * . Assign resources from low address to upper address.
- * . change most int to u32.
- *
- * Further modified to include it as mips generic code, ppopov@mvista.com.
- *
- * 2001-10-26  Bradley D. LaRonde <brad@ltc.com>
- * - Add a top_bus argument to the "early config" functions so that
- *   they can set a fake parent bus pointer to convince the underlying
- *   pci ops to use type 1 configuration for sub busses.
- * - Set bridge base and limit registers correctly.
- * - Align io and memory base properly before and after bridge setup.
- * - Don't fall through to pci_setup_bars for bridge.
- * - Reformat the debug output to look more like lspci's output.
- *
- * Cloned for SuperH by M. R. Brown, mrbrown@0xd6.org
- *
- * 2003-08-05  Paul Mundt <lethal@linux-sh.org>
- * - Don't update the BAR values on systems that already have valid addresses
- *   and don't want these updated for whatever reason, by way of a new config
- *   option check. However, we still read in the old BAR values so that they
- *   can still be reported through the debug output.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-
-#define	DEBUG
-#ifdef	DEBUG
-#define	DBG(x...)	printk(x)
-#else
-#define	DBG(x...)
-#endif
-
-/*
- * These functions are used early on before PCI scanning is done
- * and all of the pci_dev and pci_bus structures have been created.
- */
-static struct pci_dev *fake_pci_dev(struct pci_channel *hose,
-	int top_bus, int busnr, int devfn)
-{
-	static struct pci_dev dev;
-	static struct pci_bus bus;
-
-	dev.bus = &bus;
-	dev.sysdata = hose;
-	dev.devfn = devfn;
-	bus.number = busnr;
-	bus.ops = hose->pci_ops;
-	bus.sysdata = hose;
-
-	if(busnr != top_bus)
-		/* Fake a parent bus structure. */
-		bus.parent = &bus;
-	else
-		bus.parent = NULL;
-
-	return &dev;
-}
-
-#define EARLY_PCI_OP(rw, size, type)					\
-static int early_##rw##_config_##size(struct pci_channel *hose,		\
-	int top_bus, int bus, int devfn, int offset, type value)	\
-{									\
-	return pci_##rw##_config_##size(				\
-		fake_pci_dev(hose, top_bus, bus, devfn),		\
-		offset, value);						\
-}
-
-EARLY_PCI_OP(read, byte, u8 *)
-EARLY_PCI_OP(read, word, u16 *)
-EARLY_PCI_OP(read, dword, u32 *)
-EARLY_PCI_OP(write, byte, u8)
-EARLY_PCI_OP(write, word, u16)
-EARLY_PCI_OP(write, dword, u32)
-
-static struct resource *io_resource_inuse;
-static struct resource *mem_resource_inuse;
-
-static u32 pciauto_lower_iospc;
-static u32 pciauto_upper_iospc;
-
-static u32 pciauto_lower_memspc;
-static u32 pciauto_upper_memspc;
-
-static void __init
-pciauto_setup_bars(struct pci_channel *hose,
-		   int top_bus,
-		   int current_bus,
-		   int pci_devfn,
-		   int bar_limit)
-{
-	u32 bar_response, bar_size, bar_value;
-	u32 bar, addr_mask, bar_nr = 0;
-	u32 * upper_limit;
-	u32 * lower_limit;
-	int found_mem64 = 0;
-
-	for (bar = PCI_BASE_ADDRESS_0; bar <= bar_limit; bar+=4) {
-		u32 bar_addr;
-
-		/* Read the old BAR value */
-		early_read_config_dword(hose, top_bus,
-					current_bus,
-					pci_devfn,
-					bar,
-					&bar_addr);
-
-		/* Tickle the BAR and get the response */
-		early_write_config_dword(hose, top_bus,
-					 current_bus,
-					 pci_devfn,
-					 bar,
-					 0xffffffff);
-
-		early_read_config_dword(hose, top_bus,
-					current_bus,
-					pci_devfn,
-					bar,
-					&bar_response);
-
-		/*
-		 * Write the old BAR value back out, only update the BAR
-		 * if we implicitly want resources to be updated, which
-		 * is done by the generic code further down. -- PFM.
-		 */
-		early_write_config_dword(hose, top_bus,
-					 current_bus,
-					 pci_devfn,
-					 bar,
-					 bar_addr);
-
-		/* If BAR is not implemented go to the next BAR */
-		if (!bar_response)
-			continue;
-
-		/*
-		 * Workaround for a BAR that doesn't use its upper word,
-		 * like the ALi 1535D+ PCI DC-97 Controller Modem (M5457).
-		 * bdl <brad@ltc.com>
-		 */
-		if (!(bar_response & 0xffff0000))
-			bar_response |= 0xffff0000;
-
-retry:
-		/* Check the BAR type and set our address mask */
-		if (bar_response & PCI_BASE_ADDRESS_SPACE) {
-			addr_mask = PCI_BASE_ADDRESS_IO_MASK;
-			upper_limit = &pciauto_upper_iospc;
-			lower_limit = &pciauto_lower_iospc;
-			DBG("        I/O");
-		} else {
-			if ((bar_response & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
-			    PCI_BASE_ADDRESS_MEM_TYPE_64)
-				found_mem64 = 1;
-
-			addr_mask = PCI_BASE_ADDRESS_MEM_MASK;
-			upper_limit = &pciauto_upper_memspc;
-			lower_limit = &pciauto_lower_memspc;
-			DBG("        Mem");
-		}
-
-
-		/* Calculate requested size */
-		bar_size = ~(bar_response & addr_mask) + 1;
-
-		/* Allocate a base address */
-		bar_value = ((*lower_limit - 1) & ~(bar_size - 1)) + bar_size;
-
-		if ((bar_value + bar_size) > *upper_limit) {
-			if (bar_response & PCI_BASE_ADDRESS_SPACE) {
-				if (io_resource_inuse->child) {
-					io_resource_inuse =
-						io_resource_inuse->child;
-					pciauto_lower_iospc =
-						io_resource_inuse->start;
-					pciauto_upper_iospc =
-						io_resource_inuse->end + 1;
-					goto retry;
-				}
-
-			} else {
-				if (mem_resource_inuse->child) {
-					mem_resource_inuse =
-						mem_resource_inuse->child;
-					pciauto_lower_memspc =
-						mem_resource_inuse->start;
-					pciauto_upper_memspc =
-						mem_resource_inuse->end + 1;
-					goto retry;
-				}
-			}
-			DBG(" unavailable -- skipping, value %x size %x\n",
-					bar_value, bar_size);
-			continue;
-		}
-
-		if (bar_value < *lower_limit || (bar_value + bar_size) >= *upper_limit) {
-			DBG(" unavailable -- skipping, value %x size %x\n",
-					bar_value, bar_size);
-			continue;
-		}
-
-#ifdef CONFIG_PCI_AUTO_UPDATE_RESOURCES
-		/* Write it out and update our limit */
-		early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-					 bar, bar_value);
-#endif
-
-		*lower_limit = bar_value + bar_size;
-
-		/*
-		 * If we are a 64-bit decoder then increment to the
-		 * upper 32 bits of the bar and force it to locate
-		 * in the lower 4GB of memory.
-		 */
-		if (found_mem64) {
-			bar += 4;
-			early_write_config_dword(hose, top_bus,
-						 current_bus,
-						 pci_devfn,
-						 bar,
-						 0x00000000);
-		}
-
-		DBG(" at 0x%.8x [size=0x%x]\n", bar_value, bar_size);
-
-		bar_nr++;
-	}
-
-}
-
-static void __init
-pciauto_prescan_setup_bridge(struct pci_channel *hose,
-			     int top_bus,
-			     int current_bus,
-			     int pci_devfn,
-			     int sub_bus)
-{
-	/* Configure bus number registers */
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-	                        PCI_PRIMARY_BUS, current_bus);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SECONDARY_BUS, sub_bus + 1);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, 0xff);
-
-	/* Align memory and I/O to 1MB and 4KB boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x100000 - 1))
-		& ~(0x100000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-
-	/* Set base (lower limit) of address range behind bridge. */
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_MEMORY_BASE, pciauto_lower_memspc >> 16);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_BASE, (pciauto_lower_iospc & 0x0000f000) >> 8);
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_BASE_UPPER16, pciauto_lower_iospc >> 16);
-
-	/* We don't support prefetchable memory for now, so disable */
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-				PCI_PREF_MEMORY_BASE, 0);
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-				PCI_PREF_MEMORY_LIMIT, 0);
-}
-
-static void __init
-pciauto_postscan_setup_bridge(struct pci_channel *hose,
-			      int top_bus,
-			      int current_bus,
-			      int pci_devfn,
-			      int sub_bus)
-{
-	u32 temp;
-
-	/*
-	 * [jsun] we always bump up baselines a little, so that if there
-	 * nothing behind P2P bridge, we don't wind up overlapping IO/MEM
-	 * spaces.
-	 */
-	pciauto_lower_memspc += 1;
-	pciauto_lower_iospc += 1;
-
-	/* Configure bus number registers */
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, sub_bus);
-
-	/* Set upper limit of address range behind bridge. */
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_MEMORY_LIMIT, pciauto_lower_memspc >> 16);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_LIMIT, (pciauto_lower_iospc & 0x0000f000) >> 8);
-	early_write_config_word(hose, top_bus, current_bus, pci_devfn,
-		PCI_IO_LIMIT_UPPER16, pciauto_lower_iospc >> 16);
-
-	/* Align memory and I/O to 1MB and 4KB boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x100000 - 1))
-		& ~(0x100000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-
-	/* Enable memory and I/O accesses, enable bus master */
-	early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, &temp);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, temp | PCI_COMMAND_IO | PCI_COMMAND_MEMORY
-		| PCI_COMMAND_MASTER);
-}
-
-static void __init
-pciauto_prescan_setup_cardbus_bridge(struct pci_channel *hose,
-			int top_bus,
-			int current_bus,
-			int pci_devfn,
-			int sub_bus)
-{
-	/* Configure bus number registers */
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_PRIMARY_BUS, current_bus);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SECONDARY_BUS, sub_bus + 1);
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, 0xff);
-
-	/* Align memory and I/O to 4KB and 4 byte boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x4 - 1))
-		& ~(0x4 - 1);
-
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_MEMORY_BASE_0, pciauto_lower_memspc);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_IO_BASE_0, pciauto_lower_iospc);
-}
-
-static void __init
-pciauto_postscan_setup_cardbus_bridge(struct pci_channel *hose,
-			int top_bus,
-			int current_bus,
-			int pci_devfn,
-			int sub_bus)
-{
-	u32 temp;
-
-	/*
-	 * [jsun] we always bump up baselines a little, so that if there
-	 * nothing behind P2P bridge, we don't wind up overlapping IO/MEM
-	 * spaces.
-	 */
-	pciauto_lower_memspc += 1;
-	pciauto_lower_iospc += 1;
-
-	/*
-	 * Configure subordinate bus number.  The PCI subsystem
-	 * bus scan will renumber buses (reserving three additional
-	 * for this PCI<->CardBus bridge for the case where a CardBus
-	 * adapter contains a P2P or CB2CB bridge.
-	 */
-
-	early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-				PCI_SUBORDINATE_BUS, sub_bus);
-
-	/*
-	 * Reserve an additional 4MB for mem space and 16KB for
-	 * I/O space.  This should cover any additional space
-	 * requirement of unusual CardBus devices with
-	 * additional bridges that can consume more address space.
-	 *
-	 * Although pcmcia-cs currently will reprogram bridge
-	 * windows, the goal is to add an option to leave them
-	 * alone and use the bridge window ranges as the regions
-	 * that are searched for free resources upon hot-insertion
-	 * of a device.  This will allow a PCI<->CardBus bridge
-	 * configured by this routine to happily live behind a
-	 * P2P bridge in a system.
-	 */
-	/* Align memory and I/O to 4KB and 4 byte boundaries. */
-	pciauto_lower_memspc = (pciauto_lower_memspc + (0x1000 - 1))
-		& ~(0x1000 - 1);
-	pciauto_lower_iospc = (pciauto_lower_iospc + (0x4 - 1))
-		& ~(0x4 - 1);
-	/* Set up memory and I/O filter limits, assume 32-bit I/O space */
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_MEMORY_LIMIT_0, pciauto_lower_memspc - 1);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_CB_IO_LIMIT_0, pciauto_lower_iospc - 1);
-
-	/* Enable memory and I/O accesses, enable bus master */
-	early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, &temp);
-	early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-		PCI_COMMAND, temp | PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
-		PCI_COMMAND_MASTER);
-}
-
-#define	PCIAUTO_IDE_MODE_MASK		0x05
-
-static int __init
-pciauto_bus_scan(struct pci_channel *hose, int top_bus, int current_bus)
-{
-	int sub_bus;
-	u32 pci_devfn, pci_class, cmdstat, found_multi=0;
-	unsigned short vid, did;
-	unsigned char header_type;
-	int devfn_start = 0;
-	int devfn_stop = 0xff;
-
-	sub_bus = current_bus;
-
-	if (hose->first_devfn)
-		devfn_start = hose->first_devfn;
-	if (hose->last_devfn)
-		devfn_stop = hose->last_devfn;
-
-	for (pci_devfn=devfn_start; pci_devfn<devfn_stop; pci_devfn++) {
-
-		if (PCI_FUNC(pci_devfn) && !found_multi)
-			continue;
-
-		early_read_config_word(hose, top_bus, current_bus, pci_devfn,
-				       PCI_VENDOR_ID, &vid);
-
-		if (vid == 0xffff) continue;
-
-		early_read_config_byte(hose, top_bus, current_bus, pci_devfn,
-				       PCI_HEADER_TYPE, &header_type);
-
-		if (!PCI_FUNC(pci_devfn))
-			found_multi = header_type & 0x80;
-
-		early_read_config_word(hose, top_bus, current_bus, pci_devfn,
-				       PCI_DEVICE_ID, &did);
-
-		early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-					PCI_CLASS_REVISION, &pci_class);
-
-		DBG("%.2x:%.2x.%x Class %.4x: %.4x:%.4x",
-			current_bus, PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn),
-			pci_class >> 16, vid, did);
-		if (pci_class & 0xff)
-			DBG(" (rev %.2x)", pci_class & 0xff);
-		DBG("\n");
-
-		if ((pci_class >> 16) == PCI_CLASS_BRIDGE_PCI) {
-			DBG("        Bridge: primary=%.2x, secondary=%.2x\n",
-				current_bus, sub_bus + 1);
-			pciauto_prescan_setup_bridge(hose, top_bus, current_bus,
-						     pci_devfn, sub_bus);
-			DBG("Scanning sub bus %.2x, I/O 0x%.8x, Mem 0x%.8x\n",
-				sub_bus + 1,
-				pciauto_lower_iospc, pciauto_lower_memspc);
-			sub_bus = pciauto_bus_scan(hose, top_bus, sub_bus+1);
-			DBG("Back to bus %.2x\n", current_bus);
-			pciauto_postscan_setup_bridge(hose, top_bus, current_bus,
-							pci_devfn, sub_bus);
-			continue;
-		} else if ((pci_class >> 16) == PCI_CLASS_BRIDGE_CARDBUS) {
-			DBG("  CARDBUS  Bridge: primary=%.2x, secondary=%.2x\n",
-				current_bus, sub_bus + 1);
-			DBG("PCI Autoconfig: Found CardBus bridge, device %d function %d\n", PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn));
-			/* Place CardBus Socket/ExCA registers */
-			pciauto_setup_bars(hose, top_bus, current_bus, pci_devfn, PCI_BASE_ADDRESS_0);
-
-			pciauto_prescan_setup_cardbus_bridge(hose, top_bus,
-					current_bus, pci_devfn, sub_bus);
-
-			DBG("Scanning sub bus %.2x, I/O 0x%.8x, Mem 0x%.8x\n",
-				sub_bus + 1,
-				pciauto_lower_iospc, pciauto_lower_memspc);
-			sub_bus = pciauto_bus_scan(hose, top_bus, sub_bus+1);
-			DBG("Back to bus %.2x, sub_bus is %x\n", current_bus, sub_bus);
-			pciauto_postscan_setup_cardbus_bridge(hose, top_bus,
-					current_bus, pci_devfn, sub_bus);
-			continue;
-		} else if ((pci_class >> 16) == PCI_CLASS_STORAGE_IDE) {
-
-			unsigned char prg_iface;
-
-			early_read_config_byte(hose, top_bus, current_bus,
-				pci_devfn, PCI_CLASS_PROG, &prg_iface);
-			if (!(prg_iface & PCIAUTO_IDE_MODE_MASK)) {
-				DBG("Skipping legacy mode IDE controller\n");
-				continue;
-			}
-		}
-
-		/*
-		 * Found a peripheral, enable some standard
-		 * settings
-		 */
-		early_read_config_dword(hose, top_bus, current_bus, pci_devfn,
-					PCI_COMMAND, &cmdstat);
-		early_write_config_dword(hose, top_bus, current_bus, pci_devfn,
-					 PCI_COMMAND, cmdstat | PCI_COMMAND_IO |
-					 PCI_COMMAND_MEMORY |
-					 PCI_COMMAND_MASTER);
-		early_write_config_byte(hose, top_bus, current_bus, pci_devfn,
-					PCI_LATENCY_TIMER, 0x80);
-
-		/* Allocate PCI I/O and/or memory space */
-		pciauto_setup_bars(hose, top_bus, current_bus, pci_devfn, PCI_BASE_ADDRESS_5);
-	}
-	return sub_bus;
-}
-
-int __init
-pciauto_assign_resources(int busno, struct pci_channel *hose)
-{
-	/* setup resource limits */
-	io_resource_inuse = hose->io_resource;
-	mem_resource_inuse = hose->mem_resource;
-
-	pciauto_lower_iospc = io_resource_inuse->start;
-	pciauto_upper_iospc = io_resource_inuse->end + 1;
-	pciauto_lower_memspc = mem_resource_inuse->start;
-	pciauto_upper_memspc = mem_resource_inuse->end + 1;
-	DBG("Autoconfig PCI channel 0x%p\n", hose);
-	DBG("Scanning bus %.2x, I/O 0x%.8x:0x%.8x, Mem 0x%.8x:0x%.8x\n",
-		busno, pciauto_lower_iospc, pciauto_upper_iospc, 
-		pciauto_lower_memspc, pciauto_upper_memspc);
-
-	return pciauto_bus_scan(hose, busno, busno);
-}
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
deleted file mode 100644
index 8c332b2..0000000
--- a/arch/sh/drivers/pci/pci.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/sh/drivers/pci/pci.c
- *
- * Copyright (c) 2002 M. R. Brown  <mrbrown@linux-sh.org>
- * Copyright (c) 2004 - 2006 Paul Mundt  <lethal@linux-sh.org>
- *
- * These functions are collected here to reduce duplication of common
- * code amongst the many platform-specific PCI support code files.
- *
- * These routines require the following board-specific routines:
- * void pcibios_fixup_irqs();
- *
- * See include/asm-sh/pci.h for more information.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/dma-debug.h>
-#include <asm/io.h>
-
-static int __init pcibios_init(void)
-{
-	struct pci_channel *p;
-	struct pci_bus *bus;
-	int busno;
-
-	/* init channels */
-	busno = 0;
-	for (p = board_pci_channels; p->init; p++) {
-		if (p->init(p) == 0)
-			p->enabled = 1;
-		else
-			pr_err("Unable to init pci channel %d\n", busno);
-		busno++;
-	}
-
-#ifdef CONFIG_PCI_AUTO
-	/* assign resources */
-	busno = 0;
-	for (p = board_pci_channels; p->init; p++)
-		if (p->enabled)
-			busno = pciauto_assign_resources(busno, p) + 1;
-#endif
-
-	/* scan the buses */
-	busno = 0;
-	for (p = board_pci_channels; p->init; p++) {
-		if (p->enabled) {
-			bus = pci_scan_bus(busno, p->pci_ops, p);
-			busno = bus->subordinate + 1;
-		}
-	}
-
-	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
-
-	dma_debug_add_bus(&pci_bus_type);
-
-	return 0;
-}
-subsys_initcall(pcibios_init);
-
-/*
- *  Called after each bus is probed, but before its children
- *  are examined.
- */
-void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
-{
-	pci_read_bridge_bases(bus);
-}
-
-EXPORT_SYMBOL(board_pci_channels);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index f36c789..f9101215 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -19,8 +19,6 @@
 struct pci_channel {
 	struct pci_channel	*next;
 
-	int			(*init)(struct pci_channel *chan);
-
 	struct pci_ops		*pci_ops;
 	struct resource		*io_resource;
 	struct resource		*mem_resource;
@@ -28,20 +26,11 @@ struct pci_channel {
 	unsigned long		io_offset;
 	unsigned long		mem_offset;
 
-	int			first_devfn;
-	int			last_devfn;
-	int			enabled;
-
 	unsigned long		reg_base;
 
 	unsigned long		io_map_base;
 };
 
-/*
- * Each board initializes this array and terminates it with a NULL entry.
- */
-extern struct pci_channel board_pci_channels[];
-
 extern void register_pci_controller(struct pci_channel *hose);
 
 extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM;
@@ -122,13 +111,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 #endif
 
 /* Board-specific fixup routines. */
-int pcibios_init_platform(void);
 int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin);
 
-#ifdef CONFIG_PCI_AUTO
-int pciauto_assign_resources(int busno, struct pci_channel *hose);
-#endif
-
 extern void pcibios_resource_to_bus(struct pci_dev *dev,
 	struct pci_bus_region *region, struct resource *res);
 
-- 
cgit v0.10.2


From 35bcfffd86aac933205fbf8401e3e7e4dde68264 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:51:19 +0900
Subject: sh: pci: Roll pci-lib in to pci-new.

Now that the pci-auto cruft is gone, pci-lib can go away.
Roll it back in to pci-new.c where it originally split off from.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index a8350cc..eacac7f 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -1,7 +1,6 @@
 #
 # Makefile for the PCI specific kernel interface routines under Linux.
 #
-obj-y					+= pci-lib.o
 obj-$(CONFIG_PCI_NEW)			+= pci-new.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7751)	+= pci-sh7751.o ops-sh4.o
diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c
deleted file mode 100644
index f072aca..0000000
--- a/arch/sh/drivers/pci/pci-lib.c
+++ /dev/null
@@ -1,219 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-
-unsigned long PCIBIOS_MIN_IO = 0x0000;
-unsigned long PCIBIOS_MIN_MEM = 0;
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-{
-	struct pci_dev *dev = data;
-	struct pci_channel *chan = dev->sysdata;
-	resource_size_t start = res->start;
-
-	if (res->flags & IORESOURCE_IO) {
-		if (start < PCIBIOS_MIN_IO + chan->io_resource->start)
-			start = PCIBIOS_MIN_IO + chan->io_resource->start;
-
-		/*
-                 * Put everything into 0x00-0xff region modulo 0x400.
-		 */
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	} else if (res->flags & IORESOURCE_MEM) {
-		if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start)
-			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
-	}
-
-	res->start = start;
-}
-
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			 struct resource *res)
-{
-	struct pci_channel *hose = dev->sysdata;
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO)
-		offset = hose->io_offset;
-	else if (res->flags & IORESOURCE_MEM)
-		offset = hose->mem_offset;
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-
-void __devinit
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-			struct pci_bus_region *region)
-{
-	struct pci_channel *hose = dev->sysdata;
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO)
-		offset = hose->io_offset;
-	else if (res->flags & IORESOURCE_MEM)
-		offset = hose->mem_offset;
-
-	res->start = region->start + offset;
-	res->end = region->end + offset;
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
-		/* Only set up the requested stuff */
-		if (!(mask & (1<<idx)))
-			continue;
-
-		r = &dev->resource[idx];
-		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-			continue;
-		if ((idx == PCI_ROM_RESOURCE) &&
-				(!(r->flags & IORESOURCE_ROM_ENABLE)))
-			continue;
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available "
-			       "because of resource collisions\n",
-			       pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-/*
- *  If we set up a device for bus mastering, we need to check and set
- *  the latency timer as it may not be properly set.
- */
-static unsigned int pcibios_max_latency = 255;
-
-void pcibios_set_master(struct pci_dev *dev)
-{
-	u8 lat;
-	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
-	if (lat < 16)
-		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
-	else if (lat > pcibios_max_latency)
-		lat = pcibios_max_latency;
-	else
-		return;
-	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
-	       pci_name(dev), lat);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
-}
-
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
-char * __devinit pcibios_setup(char *str)
-{
-	return str;
-}
-
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
-			enum pci_mmap_state mmap_state, int write_combine)
-{
-	/*
-	 * I/O space can be accessed via normal processor loads and stores on
-	 * this platform but for now we elect not to do this and portable
-	 * drivers should not do this anyway.
-	 */
-	if (mmap_state == pci_mmap_io)
-		return -EINVAL;
-
-	/*
-	 * Ignore write-combine; for now only return uncached mappings.
-	 */
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start,
-			       vma->vm_page_prot);
-}
-
-static void __iomem *ioport_map_pci(struct pci_dev *dev,
-				    unsigned long port, unsigned int nr)
-{
-	struct pci_channel *chan = dev->sysdata;
-
-	if (!chan->io_map_base)
-		chan->io_map_base = generic_io_base;
-
-	return (void __iomem *)(chan->io_map_base + port);
-}
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (unlikely(!len || !start))
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-
-	if (flags & IORESOURCE_IO)
-		return ioport_map_pci(dev, start, len);
-
-	/*
-	 * Presently the IORESOURCE_MEM case is a bit special, most
-	 * SH7751 style PCI controllers have PCI memory at a fixed
-	 * location in the address space where no remapping is desired.
-	 * With the IORESOURCE_MEM case more care has to be taken
-	 * to inhibit page table mapping for legacy cores, but this is
-	 * punted off to __ioremap().
-	 *					-- PFM.
-	 */
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-
-		return ioremap_nocache(start, len);
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{
-	iounmap(addr);
-}
-EXPORT_SYMBOL(pci_iounmap);
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-EXPORT_SYMBOL(PCIBIOS_MIN_IO);
-EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
-#endif
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
index 8c0b136..54d77cb 100644
--- a/arch/sh/drivers/pci/pci-new.c
+++ b/arch/sh/drivers/pci/pci-new.c
@@ -1,20 +1,28 @@
 /*
  * New-style PCI core.
  *
- * Copyright (c) 2002  M. R. Brown
  * Copyright (c) 2004 - 2009  Paul Mundt
+ * Copyright (c) 2002  M. R. Brown
+ *
+ * Modelled after arch/mips/pci/pci.c:
+ *  Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org)
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/types.h>
 #include <linux/dma-debug.h>
 #include <linux/io.h>
 #include <linux/mutex.h>
 
+unsigned long PCIBIOS_MIN_IO = 0x0000;
+unsigned long PCIBIOS_MIN_MEM = 0;
+
 /*
  * The PCI controller list.
  */
@@ -27,9 +35,6 @@ static void __devinit pcibios_scanbus(struct pci_channel *hose)
 	static int next_busno;
 	struct pci_bus *bus;
 
-	/* Catch botched conversion attempts */
-	BUG_ON(hose->init);
-
 	bus = pci_scan_bus(next_busno, hose->pci_ops, hose);
 	if (bus) {
 		next_busno = bus->subordinate + 1;
@@ -128,7 +133,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev,
  *  Called after each bus is probed, but before its children
  *  are examined.
  */
-void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
 	struct pci_dev *dev = bus->self;
 	struct list_head *ln;
@@ -146,3 +151,214 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus)
 			pcibios_fixup_device_resources(dev, bus);
 	}
 }
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	struct pci_channel *chan = dev->sysdata;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO) {
+		if (start < PCIBIOS_MIN_IO + chan->io_resource->start)
+			start = PCIBIOS_MIN_IO + chan->io_resource->start;
+
+		/*
+                 * Put everything into 0x00-0xff region modulo 0x400.
+		 */
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	} else if (res->flags & IORESOURCE_MEM) {
+		if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start)
+			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
+	}
+
+	res->start = start;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			 struct resource *res)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
+			continue;
+
+		r = &dev->resource[idx];
+		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if ((idx == PCI_ROM_RESOURCE) &&
+				(!(r->flags & IORESOURCE_ROM_ENABLE)))
+			continue;
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available "
+			       "because of resource collisions\n",
+			       pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (cmd != old_cmd) {
+		printk("PCI: Enabling device %s (%04x -> %04x)\n",
+		       pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check and set
+ *  the latency timer as it may not be properly set.
+ */
+static unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
+	       pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+char * __devinit pcibios_setup(char *str)
+{
+	return str;
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	/*
+	 * I/O space can be accessed via normal processor loads and stores on
+	 * this platform but for now we elect not to do this and portable
+	 * drivers should not do this anyway.
+	 */
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/*
+	 * Ignore write-combine; for now only return uncached mappings.
+	 */
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
+static void __iomem *ioport_map_pci(struct pci_dev *dev,
+				    unsigned long port, unsigned int nr)
+{
+	struct pci_channel *chan = dev->sysdata;
+
+	if (!chan->io_map_base)
+		chan->io_map_base = generic_io_base;
+
+	return (void __iomem *)(chan->io_map_base + port);
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (unlikely(!len || !start))
+		return NULL;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+
+	if (flags & IORESOURCE_IO)
+		return ioport_map_pci(dev, start, len);
+
+	/*
+	 * Presently the IORESOURCE_MEM case is a bit special, most
+	 * SH7751 style PCI controllers have PCI memory at a fixed
+	 * location in the address space where no remapping is desired.
+	 * With the IORESOURCE_MEM case more care has to be taken
+	 * to inhibit page table mapping for legacy cores, but this is
+	 * punted off to __ioremap().
+	 *					-- PFM.
+	 */
+	if (flags & IORESOURCE_MEM) {
+		if (flags & IORESOURCE_CACHEABLE)
+			return ioremap(start, len);
+
+		return ioremap_nocache(start, len);
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+EXPORT_SYMBOL(PCIBIOS_MIN_IO);
+EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
+#endif
-- 
cgit v0.10.2


From cf242007a1eb29dcf93d1cb34713ec9b3f4a0e1b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Apr 2009 21:53:33 +0900
Subject: sh: pci: Rename pci-new.c to pci.c.

pci-new.c is now in a state to replace the old pci.c, rename it
accordingly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
index ea903a9..e8db585 100644
--- a/arch/sh/drivers/pci/Kconfig
+++ b/arch/sh/drivers/pci/Kconfig
@@ -17,7 +17,3 @@ config SH_PCIDMA_NONCOHERENT
 	  code will not have to flush the CPU's caches. If you have a PCI host
 	  bridge integrated with your SH CPU, refer carefully to the chip specs
 	  to see if you can say 'N' here. Otherwise, leave it as 'Y'.
-
-config PCI_NEW
-	def_bool y
-	depends on PCI
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index eacac7f..d2ffc47 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the PCI specific kernel interface routines under Linux.
 #
-obj-$(CONFIG_PCI_NEW)			+= pci-new.o
+obj-y					+= pci.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7751)	+= pci-sh7751.o ops-sh4.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7751R)	+= pci-sh7751.o ops-sh4.o
diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c
deleted file mode 100644
index 54d77cb..0000000
--- a/arch/sh/drivers/pci/pci-new.c
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * New-style PCI core.
- *
- * Copyright (c) 2004 - 2009  Paul Mundt
- * Copyright (c) 2002  M. R. Brown
- *
- * Modelled after arch/mips/pci/pci.c:
- *  Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org)
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/dma-debug.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-
-unsigned long PCIBIOS_MIN_IO = 0x0000;
-unsigned long PCIBIOS_MIN_MEM = 0;
-
-/*
- * The PCI controller list.
- */
-static struct pci_channel *hose_head, **hose_tail = &hose_head;
-
-static int pci_initialized;
-
-static void __devinit pcibios_scanbus(struct pci_channel *hose)
-{
-	static int next_busno;
-	struct pci_bus *bus;
-
-	bus = pci_scan_bus(next_busno, hose->pci_ops, hose);
-	if (bus) {
-		next_busno = bus->subordinate + 1;
-		/* Don't allow 8-bit bus number overflow inside the hose -
-		   reserve some space for bridges. */
-		if (next_busno > 224)
-			next_busno = 0;
-
-		pci_bus_size_bridges(bus);
-		pci_bus_assign_resources(bus);
-		pci_enable_bridges(bus);
-	}
-}
-
-static DEFINE_MUTEX(pci_scan_mutex);
-
-void __devinit register_pci_controller(struct pci_channel *hose)
-{
-	if (request_resource(&iomem_resource, hose->mem_resource) < 0)
-		goto out;
-	if (request_resource(&ioport_resource, hose->io_resource) < 0) {
-		release_resource(hose->mem_resource);
-		goto out;
-	}
-
-	*hose_tail = hose;
-	hose_tail = &hose->next;
-
-	/*
-	 * Do not panic here but later - this might hapen before console init.
-	 */
-	if (!hose->io_map_base) {
-		printk(KERN_WARNING
-		       "registering PCI controller with io_map_base unset\n");
-	}
-
-	/*
-	 * Scan the bus if it is register after the PCI subsystem
-	 * initialization.
-	 */
-	if (pci_initialized) {
-		mutex_lock(&pci_scan_mutex);
-		pcibios_scanbus(hose);
-		mutex_unlock(&pci_scan_mutex);
-	}
-
-	return;
-
-out:
-	printk(KERN_WARNING
-	       "Skipping PCI bus scan due to resource conflict\n");
-}
-
-static int __init pcibios_init(void)
-{
-	struct pci_channel *hose;
-
-	/* Scan all of the recorded PCI controllers.  */
-	for (hose = hose_head; hose; hose = hose->next)
-		pcibios_scanbus(hose);
-
-	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
-
-	dma_debug_add_bus(&pci_bus_type);
-
-	pci_initialized = 1;
-
-	return 0;
-}
-subsys_initcall(pcibios_init);
-
-static void pcibios_fixup_device_resources(struct pci_dev *dev,
-	struct pci_bus *bus)
-{
-	/* Update device resources.  */
-	struct pci_channel *hose = bus->sysdata;
-	unsigned long offset = 0;
-	int i;
-
-	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-		if (!dev->resource[i].start)
-			continue;
-		if (dev->resource[i].flags & IORESOURCE_PCI_FIXED)
-			continue;
-		if (dev->resource[i].flags & IORESOURCE_IO)
-			offset = hose->io_offset;
-		else if (dev->resource[i].flags & IORESOURCE_MEM)
-			offset = hose->mem_offset;
-
-		dev->resource[i].start += offset;
-		dev->resource[i].end += offset;
-	}
-}
-
-/*
- *  Called after each bus is probed, but before its children
- *  are examined.
- */
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
-{
-	struct pci_dev *dev = bus->self;
-	struct list_head *ln;
-	struct pci_channel *chan = bus->sysdata;
-
-	if (!dev) {
-		bus->resource[0] = chan->io_resource;
-		bus->resource[1] = chan->mem_resource;
-	}
-
-	for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
-		dev = pci_dev_b(ln);
-
-		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
-			pcibios_fixup_device_resources(dev, bus);
-	}
-}
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
-{
-	struct pci_dev *dev = data;
-	struct pci_channel *chan = dev->sysdata;
-	resource_size_t start = res->start;
-
-	if (res->flags & IORESOURCE_IO) {
-		if (start < PCIBIOS_MIN_IO + chan->io_resource->start)
-			start = PCIBIOS_MIN_IO + chan->io_resource->start;
-
-		/*
-                 * Put everything into 0x00-0xff region modulo 0x400.
-		 */
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	} else if (res->flags & IORESOURCE_MEM) {
-		if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start)
-			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
-	}
-
-	res->start = start;
-}
-
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			 struct resource *res)
-{
-	struct pci_channel *hose = dev->sysdata;
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO)
-		offset = hose->io_offset;
-	else if (res->flags & IORESOURCE_MEM)
-		offset = hose->mem_offset;
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-
-void __devinit
-pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-			struct pci_bus_region *region)
-{
-	struct pci_channel *hose = dev->sysdata;
-	unsigned long offset = 0;
-
-	if (res->flags & IORESOURCE_IO)
-		offset = hose->io_offset;
-	else if (res->flags & IORESOURCE_MEM)
-		offset = hose->mem_offset;
-
-	res->start = region->start + offset;
-	res->end = region->end + offset;
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
-		/* Only set up the requested stuff */
-		if (!(mask & (1<<idx)))
-			continue;
-
-		r = &dev->resource[idx];
-		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-			continue;
-		if ((idx == PCI_ROM_RESOURCE) &&
-				(!(r->flags & IORESOURCE_ROM_ENABLE)))
-			continue;
-		if (!r->start && r->end) {
-			printk(KERN_ERR "PCI: Device %s not available "
-			       "because of resource collisions\n",
-			       pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n",
-		       pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-/*
- *  If we set up a device for bus mastering, we need to check and set
- *  the latency timer as it may not be properly set.
- */
-static unsigned int pcibios_max_latency = 255;
-
-void pcibios_set_master(struct pci_dev *dev)
-{
-	u8 lat;
-	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
-	if (lat < 16)
-		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
-	else if (lat > pcibios_max_latency)
-		lat = pcibios_max_latency;
-	else
-		return;
-	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
-	       pci_name(dev), lat);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
-}
-
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
-char * __devinit pcibios_setup(char *str)
-{
-	return str;
-}
-
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
-			enum pci_mmap_state mmap_state, int write_combine)
-{
-	/*
-	 * I/O space can be accessed via normal processor loads and stores on
-	 * this platform but for now we elect not to do this and portable
-	 * drivers should not do this anyway.
-	 */
-	if (mmap_state == pci_mmap_io)
-		return -EINVAL;
-
-	/*
-	 * Ignore write-combine; for now only return uncached mappings.
-	 */
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start,
-			       vma->vm_page_prot);
-}
-
-static void __iomem *ioport_map_pci(struct pci_dev *dev,
-				    unsigned long port, unsigned int nr)
-{
-	struct pci_channel *chan = dev->sysdata;
-
-	if (!chan->io_map_base)
-		chan->io_map_base = generic_io_base;
-
-	return (void __iomem *)(chan->io_map_base + port);
-}
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (unlikely(!len || !start))
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-
-	if (flags & IORESOURCE_IO)
-		return ioport_map_pci(dev, start, len);
-
-	/*
-	 * Presently the IORESOURCE_MEM case is a bit special, most
-	 * SH7751 style PCI controllers have PCI memory at a fixed
-	 * location in the address space where no remapping is desired.
-	 * With the IORESOURCE_MEM case more care has to be taken
-	 * to inhibit page table mapping for legacy cores, but this is
-	 * punted off to __ioremap().
-	 *					-- PFM.
-	 */
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-
-		return ioremap_nocache(start, len);
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(pci_iomap);
-
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{
-	iounmap(addr);
-}
-EXPORT_SYMBOL(pci_iounmap);
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-EXPORT_SYMBOL(PCIBIOS_MIN_IO);
-EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
-#endif
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
new file mode 100644
index 0000000..54d77cb
--- /dev/null
+++ b/arch/sh/drivers/pci/pci.c
@@ -0,0 +1,364 @@
+/*
+ * New-style PCI core.
+ *
+ * Copyright (c) 2004 - 2009  Paul Mundt
+ * Copyright (c) 2002  M. R. Brown
+ *
+ * Modelled after arch/mips/pci/pci.c:
+ *  Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org)
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/dma-debug.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+
+unsigned long PCIBIOS_MIN_IO = 0x0000;
+unsigned long PCIBIOS_MIN_MEM = 0;
+
+/*
+ * The PCI controller list.
+ */
+static struct pci_channel *hose_head, **hose_tail = &hose_head;
+
+static int pci_initialized;
+
+static void __devinit pcibios_scanbus(struct pci_channel *hose)
+{
+	static int next_busno;
+	struct pci_bus *bus;
+
+	bus = pci_scan_bus(next_busno, hose->pci_ops, hose);
+	if (bus) {
+		next_busno = bus->subordinate + 1;
+		/* Don't allow 8-bit bus number overflow inside the hose -
+		   reserve some space for bridges. */
+		if (next_busno > 224)
+			next_busno = 0;
+
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+		pci_enable_bridges(bus);
+	}
+}
+
+static DEFINE_MUTEX(pci_scan_mutex);
+
+void __devinit register_pci_controller(struct pci_channel *hose)
+{
+	if (request_resource(&iomem_resource, hose->mem_resource) < 0)
+		goto out;
+	if (request_resource(&ioport_resource, hose->io_resource) < 0) {
+		release_resource(hose->mem_resource);
+		goto out;
+	}
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	/*
+	 * Do not panic here but later - this might hapen before console init.
+	 */
+	if (!hose->io_map_base) {
+		printk(KERN_WARNING
+		       "registering PCI controller with io_map_base unset\n");
+	}
+
+	/*
+	 * Scan the bus if it is register after the PCI subsystem
+	 * initialization.
+	 */
+	if (pci_initialized) {
+		mutex_lock(&pci_scan_mutex);
+		pcibios_scanbus(hose);
+		mutex_unlock(&pci_scan_mutex);
+	}
+
+	return;
+
+out:
+	printk(KERN_WARNING
+	       "Skipping PCI bus scan due to resource conflict\n");
+}
+
+static int __init pcibios_init(void)
+{
+	struct pci_channel *hose;
+
+	/* Scan all of the recorded PCI controllers.  */
+	for (hose = hose_head; hose; hose = hose->next)
+		pcibios_scanbus(hose);
+
+	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
+
+	dma_debug_add_bus(&pci_bus_type);
+
+	pci_initialized = 1;
+
+	return 0;
+}
+subsys_initcall(pcibios_init);
+
+static void pcibios_fixup_device_resources(struct pci_dev *dev,
+	struct pci_bus *bus)
+{
+	/* Update device resources.  */
+	struct pci_channel *hose = bus->sysdata;
+	unsigned long offset = 0;
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		if (!dev->resource[i].start)
+			continue;
+		if (dev->resource[i].flags & IORESOURCE_PCI_FIXED)
+			continue;
+		if (dev->resource[i].flags & IORESOURCE_IO)
+			offset = hose->io_offset;
+		else if (dev->resource[i].flags & IORESOURCE_MEM)
+			offset = hose->mem_offset;
+
+		dev->resource[i].start += offset;
+		dev->resource[i].end += offset;
+	}
+}
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+	struct list_head *ln;
+	struct pci_channel *chan = bus->sysdata;
+
+	if (!dev) {
+		bus->resource[0] = chan->io_resource;
+		bus->resource[1] = chan->mem_resource;
+	}
+
+	for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
+		dev = pci_dev_b(ln);
+
+		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			pcibios_fixup_device_resources(dev, bus);
+	}
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	struct pci_channel *chan = dev->sysdata;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO) {
+		if (start < PCIBIOS_MIN_IO + chan->io_resource->start)
+			start = PCIBIOS_MIN_IO + chan->io_resource->start;
+
+		/*
+                 * Put everything into 0x00-0xff region modulo 0x400.
+		 */
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	} else if (res->flags & IORESOURCE_MEM) {
+		if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start)
+			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
+	}
+
+	res->start = start;
+}
+
+void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			 struct resource *res)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region)
+{
+	struct pci_channel *hose = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = hose->io_offset;
+	else if (res->flags & IORESOURCE_MEM)
+		offset = hose->mem_offset;
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<idx)))
+			continue;
+
+		r = &dev->resource[idx];
+		if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if ((idx == PCI_ROM_RESOURCE) &&
+				(!(r->flags & IORESOURCE_ROM_ENABLE)))
+			continue;
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available "
+			       "because of resource collisions\n",
+			       pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (cmd != old_cmd) {
+		printk("PCI: Enabling device %s (%04x -> %04x)\n",
+		       pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check and set
+ *  the latency timer as it may not be properly set.
+ */
+static unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n",
+	       pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+char * __devinit pcibios_setup(char *str)
+{
+	return str;
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	/*
+	 * I/O space can be accessed via normal processor loads and stores on
+	 * this platform but for now we elect not to do this and portable
+	 * drivers should not do this anyway.
+	 */
+	if (mmap_state == pci_mmap_io)
+		return -EINVAL;
+
+	/*
+	 * Ignore write-combine; for now only return uncached mappings.
+	 */
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
+static void __iomem *ioport_map_pci(struct pci_dev *dev,
+				    unsigned long port, unsigned int nr)
+{
+	struct pci_channel *chan = dev->sysdata;
+
+	if (!chan->io_map_base)
+		chan->io_map_base = generic_io_base;
+
+	return (void __iomem *)(chan->io_map_base + port);
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (unlikely(!len || !start))
+		return NULL;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+
+	if (flags & IORESOURCE_IO)
+		return ioport_map_pci(dev, start, len);
+
+	/*
+	 * Presently the IORESOURCE_MEM case is a bit special, most
+	 * SH7751 style PCI controllers have PCI memory at a fixed
+	 * location in the address space where no remapping is desired.
+	 * With the IORESOURCE_MEM case more care has to be taken
+	 * to inhibit page table mapping for legacy cores, but this is
+	 * punted off to __ioremap().
+	 *					-- PFM.
+	 */
+	if (flags & IORESOURCE_MEM) {
+		if (flags & IORESOURCE_CACHEABLE)
+			return ioremap(start, len);
+
+		return ioremap_nocache(start, len);
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+EXPORT_SYMBOL(PCIBIOS_MIN_IO);
+EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
+#endif
-- 
cgit v0.10.2


From 9ae5b8790037d05d32746f521af146c32089bfec Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 10:27:58 -0400
Subject: tracing: change branch profiling to a choice selection

This patch makes the branch profiling into a choice selection:

  None               - no branch profiling
  likely/unlikely    - only profile likely/unlikely branches
  all                - profile all branches

The all profiler will also enable the likely/unlikely branches.

This does not change the way the profiler works or the dependencies
between the profilers.

What this patch does, is keep the branch profiling from being selected
by an allyesconfig make. The branch profiler is very intrusive and
it is known to break various architecture builds when selected as an
allyesconfig.

[ Impact: prevent branch profiler from being selected in allyesconfig ]

Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 57981d3..3ee28db 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -212,8 +212,36 @@ config BOOT_TRACER
 	  to enable this on bootup.
 
 config TRACE_BRANCH_PROFILING
-	bool "Trace likely/unlikely profiler"
+	bool
 	select TRACING
+
+choice
+	prompt "Branch Profiling"
+	default BRANCH_PROFILE_NONE
+	help
+	 The branch profiling is a software profiler. It will add hooks
+	 into the C conditionals to test which path a branch takes.
+
+	 The likely/unlikely profiler only looks at the conditions that
+	 are annotated with a likely or unlikely macro.
+
+	 The "all branch" profiler will profile every if statement in the
+	 kernel. This profiler will also enable the likely/unlikely
+	 profiler as well.
+
+	 Either of the above profilers add a bit of overhead to the system.
+	 If unsure choose "No branch profiling".
+
+config BRANCH_PROFILE_NONE
+	bool "No branch profiling"
+	help
+	 No branch profiling. Branch profiling adds a bit of overhead.
+	 Only enable it if you want to analyse the branching behavior.
+	 Otherwise keep it disabled.
+
+config PROFILE_ANNOTATED_BRANCHES
+	bool "Trace likely/unlikely profiler"
+	select TRACE_BRANCH_PROFILING
 	help
 	  This tracer profiles all the the likely and unlikely macros
 	  in the kernel. It will display the results in:
@@ -223,11 +251,9 @@ config TRACE_BRANCH_PROFILING
 	  Note: this will add a significant overhead, only turn this
 	  on if you need to profile the system's use of these macros.
 
-	  Say N if unsure.
-
 config PROFILE_ALL_BRANCHES
 	bool "Profile all if conditionals"
-	depends on TRACE_BRANCH_PROFILING
+	select TRACE_BRANCH_PROFILING
 	help
 	  This tracer profiles all branch conditions. Every if ()
 	  taken in the kernel is recorded whether it hit or miss.
@@ -235,11 +261,12 @@ config PROFILE_ALL_BRANCHES
 
 	  /debugfs/tracing/profile_branch
 
+	  This option also enables the likely/unlikely profiler.
+
 	  This configuration, when enabled, will impose a great overhead
 	  on the system. This should only be enabled when the system
 	  is to be analyzed
-
-	  Say N if unsure.
+endchoice
 
 config TRACING_BRANCHES
 	bool
-- 
cgit v0.10.2


From 4ed9f0716e46bb9646f26e73f4a1b5b24db7947a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 10:47:36 -0400
Subject: tracing: create menuconfig for tracing infrastructure

During testing we often use randconfig to test various kernels.
The current configuration set up does not give an easy way to disable
all tracing with a single config. The case where randconfig would
test all tracing disabled is very unlikely.

This patch adds a config option to enable or disable all tracing.
It is hooked into the tracing menu just like other submenus are done.

[ Impact: allow randconfig to easily produce all traces disabled ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 3ee28db..3fa36d2 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -77,7 +77,12 @@ config TRACING_SUPPORT
 
 if TRACING_SUPPORT
 
-menu "Tracers"
+menuconfig FTRACE
+	bool "Tracers"
+	help
+	 Enable the kernel tracing infrastructure.
+
+if FTRACE
 
 config FUNCTION_TRACER
 	bool "Kernel Function Tracer"
@@ -462,7 +467,7 @@ config MMIOTRACE_TEST
 
 	  Say N, unless you absolutely know what you are doing.
 
-endmenu
+endif # FTRACE
 
 endif # TRACING_SUPPORT
 
-- 
cgit v0.10.2


From d24e473e5b2ca86d1288b9416227ccc603313d0f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 13:32:07 +0200
Subject: perf_counter: copy in Git's top Makefile

We'd like to have a similar user-space structure as Git has, for the
perfcounter tools - so copy in Git's toplevel makefile as-is.

We'll strip it down in subsequent commits to make it fit the
perfcounters code.

The Git version used: 66996ec: Sync with 1.6.2.4

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 1dd37ee..6e0838b 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -1,18 +1,1731 @@
-BINS = kerneltop perfstat perf-record perf-report
+# The default target of this Makefile is...
+all::
 
-all: $(BINS)
+# Define V=1 to have a more verbose compile.
+#
+# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
+# or vsnprintf() return -1 instead of number of characters which would
+# have been written to the final string if enough space had been available.
+#
+# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
+# when attempting to read from an fopen'ed directory.
+#
+# Define NO_OPENSSL environment variable if you do not have OpenSSL.
+# This also implies MOZILLA_SHA1.
+#
+# Define NO_CURL if you do not have libcurl installed.  git-http-pull and
+# git-http-push are not built, and you cannot use http:// and https://
+# transports.
+#
+# Define CURLDIR=/foo/bar if your curl header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+# Define NO_EXPAT if you do not have expat installed.  git-http-push is
+# not built, and you cannot push using http:// and https:// transports.
+#
+# Define EXPATDIR=/foo/bar if your expat header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
+#
+# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
+# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
+#
+# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
+# do not support the 'size specifiers' introduced by C99, namely ll, hh,
+# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
+# some C compilers supported these specifiers prior to C99 as an extension.
+#
+# Define NO_STRCASESTR if you don't have strcasestr.
+#
+# Define NO_MEMMEM if you don't have memmem.
+#
+# Define NO_STRLCPY if you don't have strlcpy.
+#
+# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
+# If your compiler also does not support long long or does not have
+# strtoull, define NO_STRTOULL.
+#
+# Define NO_SETENV if you don't have setenv in the C library.
+#
+# Define NO_UNSETENV if you don't have unsetenv in the C library.
+#
+# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
+#
+# Define NO_SYS_SELECT_H if you don't have sys/select.h.
+#
+# Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link.
+# Enable it on Windows.  By default, symrefs are still used.
+#
+# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
+# tests.  These tests take up a significant amount of the total test time
+# but are not needed unless you plan to talk to SVN repos.
+#
+# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
+# installed in /sw, but don't want GIT to link against any libraries
+# installed there.  If defined you may specify your own (or Fink's)
+# include directories and library directories by defining CFLAGS
+# and LDFLAGS appropriately.
+#
+# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
+# have DarwinPorts installed in /opt/local, but don't want GIT to
+# link against any libraries installed there.  If defined you may
+# specify your own (or DarwinPort's) include directories and
+# library directories by defining CFLAGS and LDFLAGS appropriately.
+#
+# Define PPC_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine optimized for PowerPC.
+#
+# Define ARM_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine optimized for ARM.
+#
+# Define MOZILLA_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
+# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
+# choice) has very fast version optimized for i586.
+#
+# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
+#
+# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
+#
+# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
+# Patrick Mauritz).
+#
+# Define NO_MMAP if you want to avoid mmap.
+#
+# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
+#
+# Define NO_PREAD if you have a problem with pread() system call (e.g.
+# cygwin.dll before v1.5.22).
+#
+# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
+# generally faster on your platform than accessing the working directory.
+#
+# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
+# the executable mode bit, but doesn't really do so.
+#
+# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
+#
+# Define NO_SOCKADDR_STORAGE if your platform does not have struct
+# sockaddr_storage.
+#
+# Define NO_ICONV if your libc does not properly support iconv.
+#
+# Define OLD_ICONV if your library has an old iconv(), where the second
+# (input buffer pointer) parameter is declared with type (const char **).
+#
+# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
+#
+# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
+# that tells runtime paths to dynamic libraries;
+# "-Wl,-rpath=/path/lib" is used instead.
+#
+# Define USE_NSEC below if you want git to care about sub-second file mtimes
+# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
+# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
+# randomly break unless your underlying filesystem supports those sub-second
+# times (my ext3 doesn't).
+#
+# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
+# "st_ctim"
+#
+# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
+# available.  This automatically turns USE_NSEC off.
+#
+# Define USE_STDEV below if you want git to care about the underlying device
+# change being considered an inode change from the update-index perspective.
+#
+# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
+# field that counts the on-disk footprint in 512-byte blocks.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
+# MakeMaker (e.g. using ActiveState under Cygwin).
+#
+# Define NO_PERL if you do not want Perl scripts or libraries at all.
+#
+# Define NO_TCLTK if you do not want Tcl/Tk GUI.
+#
+# The TCL_PATH variable governs the location of the Tcl interpreter
+# used to optimize git-gui for your system.  Only used if NO_TCLTK
+# is not set.  Defaults to the bare 'tclsh'.
+#
+# The TCLTK_PATH variable governs the location of the Tcl/Tk interpreter.
+# If not set it defaults to the bare 'wish'. If it is set to the empty
+# string then NO_TCLTK will be forced (this is used by configure script).
+#
+# Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit
+# parallel delta searching when packing objects.
+#
+# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
+# is a simplified version of the merge sort used in glibc. This is
+# recommended if Git triggers O(n^2) behavior in your platform's qsort().
+#
+# Define NO_EXTERNAL_GREP if you don't want "git grep" to ever call
+# your external grep (e.g., if your system lacks grep, if its grep is
+# broken, or spawning external process is slower than built-in grep git has).
 
-kerneltop: kerneltop.c ../../include/linux/perf_counter.h
-	cc -O6 -Wall -lrt -o $@ $<
+GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
+	@$(SHELL_PATH) ./GIT-VERSION-GEN
+-include GIT-VERSION-FILE
 
-perf-record: perf-record.c ../../include/linux/perf_counter.h
-	cc -O6 -Wall -lrt -o $@ $<
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
+uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
+uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
+uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
+uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
-perf-report: perf-report.cc ../../include/linux/perf_counter.h
-	g++ -O6 -Wall -lrt -o $@ $<
+# CFLAGS and LDFLAGS are for the users to override from the command line.
 
-perfstat: kerneltop
-	ln -sf kerneltop perfstat
+CFLAGS = -g -O2 -Wall
+LDFLAGS =
+ALL_CFLAGS = $(CFLAGS)
+ALL_LDFLAGS = $(LDFLAGS)
+STRIP ?= strip
+
+# Among the variables below, these:
+#   gitexecdir
+#   template_dir
+#   mandir
+#   infodir
+#   htmldir
+#   ETC_GITCONFIG (but not sysconfdir)
+# can be specified as a relative path some/where/else;
+# this is interpreted as relative to $(prefix) and "git" at
+# runtime figures out where they are based on the path to the executable.
+# This can help installing the suite in a relocatable way.
+
+prefix = $(HOME)
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+mandir = share/man
+infodir = share/info
+gitexecdir = libexec/git-core
+sharedir = $(prefix)/share
+template_dir = share/git-core/templates
+htmldir = share/doc/git-doc
+ifeq ($(prefix),/usr)
+sysconfdir = /etc
+ETC_GITCONFIG = $(sysconfdir)/gitconfig
+else
+sysconfdir = $(prefix)/etc
+ETC_GITCONFIG = etc/gitconfig
+endif
+lib = lib
+# DESTDIR=
+
+# default configuration for gitweb
+GITWEB_CONFIG = gitweb_config.perl
+GITWEB_CONFIG_SYSTEM = /etc/gitweb.conf
+GITWEB_HOME_LINK_STR = projects
+GITWEB_SITENAME =
+GITWEB_PROJECTROOT = /pub/git
+GITWEB_PROJECT_MAXDEPTH = 2007
+GITWEB_EXPORT_OK =
+GITWEB_STRICT_EXPORT =
+GITWEB_BASE_URL =
+GITWEB_LIST =
+GITWEB_HOMETEXT = indextext.html
+GITWEB_CSS = gitweb.css
+GITWEB_LOGO = git-logo.png
+GITWEB_FAVICON = git-favicon.png
+GITWEB_SITE_HEADER =
+GITWEB_SITE_FOOTER =
+
+export prefix bindir sharedir sysconfdir
+
+CC = gcc
+AR = ar
+RM = rm -f
+TAR = tar
+FIND = find
+INSTALL = install
+RPMBUILD = rpmbuild
+TCL_PATH = tclsh
+TCLTK_PATH = wish
+PTHREAD_LIBS = -lpthread
+
+export TCL_PATH TCLTK_PATH
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+
+
+### --- END CONFIGURATION SECTION ---
+
+# Those must not be GNU-specific; they are shared with perl/ which may
+# be built by a different compiler. (Note that this is an artifact now
+# but it still might be nice to keep that distinction.)
+BASIC_CFLAGS =
+BASIC_LDFLAGS =
+
+# Guard against environment variables
+BUILTIN_OBJS =
+BUILT_INS =
+COMPAT_CFLAGS =
+COMPAT_OBJS =
+LIB_H =
+LIB_OBJS =
+PROGRAMS =
+SCRIPT_PERL =
+SCRIPT_SH =
+TEST_PROGRAMS =
+
+SCRIPT_SH += git-am.sh
+SCRIPT_SH += git-bisect.sh
+SCRIPT_SH += git-difftool--helper.sh
+SCRIPT_SH += git-filter-branch.sh
+SCRIPT_SH += git-lost-found.sh
+SCRIPT_SH += git-merge-octopus.sh
+SCRIPT_SH += git-merge-one-file.sh
+SCRIPT_SH += git-merge-resolve.sh
+SCRIPT_SH += git-mergetool.sh
+SCRIPT_SH += git-mergetool--lib.sh
+SCRIPT_SH += git-parse-remote.sh
+SCRIPT_SH += git-pull.sh
+SCRIPT_SH += git-quiltimport.sh
+SCRIPT_SH += git-rebase--interactive.sh
+SCRIPT_SH += git-rebase.sh
+SCRIPT_SH += git-repack.sh
+SCRIPT_SH += git-request-pull.sh
+SCRIPT_SH += git-sh-setup.sh
+SCRIPT_SH += git-stash.sh
+SCRIPT_SH += git-submodule.sh
+SCRIPT_SH += git-web--browse.sh
+
+SCRIPT_PERL += git-add--interactive.perl
+SCRIPT_PERL += git-difftool.perl
+SCRIPT_PERL += git-archimport.perl
+SCRIPT_PERL += git-cvsexportcommit.perl
+SCRIPT_PERL += git-cvsimport.perl
+SCRIPT_PERL += git-cvsserver.perl
+SCRIPT_PERL += git-relink.perl
+SCRIPT_PERL += git-send-email.perl
+SCRIPT_PERL += git-svn.perl
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
+	  $(patsubst %.perl,%,$(SCRIPT_PERL)) \
+	  git-instaweb
+
+# Empty...
+EXTRA_PROGRAMS =
+
+# ... and all the rest that could be moved out of bindir to gitexecdir
+PROGRAMS += $(EXTRA_PROGRAMS)
+PROGRAMS += git-fast-import$X
+PROGRAMS += git-hash-object$X
+PROGRAMS += git-index-pack$X
+PROGRAMS += git-merge-index$X
+PROGRAMS += git-merge-tree$X
+PROGRAMS += git-mktag$X
+PROGRAMS += git-mktree$X
+PROGRAMS += git-pack-redundant$X
+PROGRAMS += git-patch-id$X
+PROGRAMS += git-shell$X
+PROGRAMS += git-show-index$X
+PROGRAMS += git-unpack-file$X
+PROGRAMS += git-update-server-info$X
+PROGRAMS += git-upload-pack$X
+PROGRAMS += git-var$X
+
+# List built-in command $C whose implementation cmd_$C() is not in
+# builtin-$C.o but is linked in as part of some other command.
+BUILT_INS += $(patsubst builtin-%.o,git-%$X,$(BUILTIN_OBJS))
+
+BUILT_INS += git-cherry$X
+BUILT_INS += git-cherry-pick$X
+BUILT_INS += git-format-patch$X
+BUILT_INS += git-fsck-objects$X
+BUILT_INS += git-get-tar-commit-id$X
+BUILT_INS += git-init$X
+BUILT_INS += git-merge-subtree$X
+BUILT_INS += git-peek-remote$X
+BUILT_INS += git-repo-config$X
+BUILT_INS += git-show$X
+BUILT_INS += git-stage$X
+BUILT_INS += git-status$X
+BUILT_INS += git-whatchanged$X
+
+# what 'all' will build and 'install' will install, in gitexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in gitexecdir
+OTHER_PROGRAMS = git$X
+ifndef NO_PERL
+OTHER_PROGRAMS += gitweb/gitweb.cgi
+endif
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+	SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+LIB_FILE=libgit.a
+XDIFF_LIB=xdiff/lib.a
+
+LIB_H += archive.h
+LIB_H += attr.h
+LIB_H += blob.h
+LIB_H += builtin.h
+LIB_H += cache.h
+LIB_H += cache-tree.h
+LIB_H += commit.h
+LIB_H += compat/cygwin.h
+LIB_H += compat/mingw.h
+LIB_H += csum-file.h
+LIB_H += decorate.h
+LIB_H += delta.h
+LIB_H += diffcore.h
+LIB_H += diff.h
+LIB_H += dir.h
+LIB_H += fsck.h
+LIB_H += git-compat-util.h
+LIB_H += graph.h
+LIB_H += grep.h
+LIB_H += hash.h
+LIB_H += help.h
+LIB_H += levenshtein.h
+LIB_H += list-objects.h
+LIB_H += ll-merge.h
+LIB_H += log-tree.h
+LIB_H += mailmap.h
+LIB_H += merge-recursive.h
+LIB_H += object.h
+LIB_H += pack.h
+LIB_H += pack-refs.h
+LIB_H += pack-revindex.h
+LIB_H += parse-options.h
+LIB_H += patch-ids.h
+LIB_H += pkt-line.h
+LIB_H += progress.h
+LIB_H += quote.h
+LIB_H += reflog-walk.h
+LIB_H += refs.h
+LIB_H += remote.h
+LIB_H += rerere.h
+LIB_H += revision.h
+LIB_H += run-command.h
+LIB_H += sha1-lookup.h
+LIB_H += sideband.h
+LIB_H += sigchain.h
+LIB_H += strbuf.h
+LIB_H += string-list.h
+LIB_H += tag.h
+LIB_H += transport.h
+LIB_H += tree.h
+LIB_H += tree-walk.h
+LIB_H += unpack-trees.h
+LIB_H += userdiff.h
+LIB_H += utf8.h
+LIB_H += wt-status.h
+
+LIB_OBJS += abspath.o
+LIB_OBJS += alias.o
+LIB_OBJS += alloc.o
+LIB_OBJS += archive.o
+LIB_OBJS += archive-tar.o
+LIB_OBJS += archive-zip.o
+LIB_OBJS += attr.o
+LIB_OBJS += base85.o
+LIB_OBJS += bisect.o
+LIB_OBJS += blob.o
+LIB_OBJS += branch.o
+LIB_OBJS += bundle.o
+LIB_OBJS += cache-tree.o
+LIB_OBJS += color.o
+LIB_OBJS += combine-diff.o
+LIB_OBJS += commit.o
+LIB_OBJS += config.o
+LIB_OBJS += connect.o
+LIB_OBJS += convert.o
+LIB_OBJS += copy.o
+LIB_OBJS += csum-file.o
+LIB_OBJS += ctype.o
+LIB_OBJS += date.o
+LIB_OBJS += decorate.o
+LIB_OBJS += diffcore-break.o
+LIB_OBJS += diffcore-delta.o
+LIB_OBJS += diffcore-order.o
+LIB_OBJS += diffcore-pickaxe.o
+LIB_OBJS += diffcore-rename.o
+LIB_OBJS += diff-delta.o
+LIB_OBJS += diff-lib.o
+LIB_OBJS += diff-no-index.o
+LIB_OBJS += diff.o
+LIB_OBJS += dir.o
+LIB_OBJS += editor.o
+LIB_OBJS += entry.o
+LIB_OBJS += environment.o
+LIB_OBJS += exec_cmd.o
+LIB_OBJS += fsck.o
+LIB_OBJS += graph.o
+LIB_OBJS += grep.o
+LIB_OBJS += hash.o
+LIB_OBJS += help.o
+LIB_OBJS += ident.o
+LIB_OBJS += levenshtein.o
+LIB_OBJS += list-objects.o
+LIB_OBJS += ll-merge.o
+LIB_OBJS += lockfile.o
+LIB_OBJS += log-tree.o
+LIB_OBJS += mailmap.o
+LIB_OBJS += match-trees.o
+LIB_OBJS += merge-file.o
+LIB_OBJS += merge-recursive.o
+LIB_OBJS += name-hash.o
+LIB_OBJS += object.o
+LIB_OBJS += pack-check.o
+LIB_OBJS += pack-refs.o
+LIB_OBJS += pack-revindex.o
+LIB_OBJS += pack-write.o
+LIB_OBJS += pager.o
+LIB_OBJS += parse-options.o
+LIB_OBJS += patch-delta.o
+LIB_OBJS += patch-ids.o
+LIB_OBJS += path.o
+LIB_OBJS += pkt-line.o
+LIB_OBJS += preload-index.o
+LIB_OBJS += pretty.o
+LIB_OBJS += progress.o
+LIB_OBJS += quote.o
+LIB_OBJS += reachable.o
+LIB_OBJS += read-cache.o
+LIB_OBJS += reflog-walk.o
+LIB_OBJS += refs.o
+LIB_OBJS += remote.o
+LIB_OBJS += rerere.o
+LIB_OBJS += revision.o
+LIB_OBJS += run-command.o
+LIB_OBJS += server-info.o
+LIB_OBJS += setup.o
+LIB_OBJS += sha1-lookup.o
+LIB_OBJS += sha1_file.o
+LIB_OBJS += sha1_name.o
+LIB_OBJS += shallow.o
+LIB_OBJS += sideband.o
+LIB_OBJS += sigchain.o
+LIB_OBJS += strbuf.o
+LIB_OBJS += string-list.o
+LIB_OBJS += symlinks.o
+LIB_OBJS += tag.o
+LIB_OBJS += trace.o
+LIB_OBJS += transport.o
+LIB_OBJS += tree-diff.o
+LIB_OBJS += tree.o
+LIB_OBJS += tree-walk.o
+LIB_OBJS += unpack-trees.o
+LIB_OBJS += usage.o
+LIB_OBJS += userdiff.o
+LIB_OBJS += utf8.o
+LIB_OBJS += walker.o
+LIB_OBJS += wrapper.o
+LIB_OBJS += write_or_die.o
+LIB_OBJS += ws.o
+LIB_OBJS += wt-status.o
+LIB_OBJS += xdiff-interface.o
+
+BUILTIN_OBJS += builtin-add.o
+BUILTIN_OBJS += builtin-annotate.o
+BUILTIN_OBJS += builtin-apply.o
+BUILTIN_OBJS += builtin-archive.o
+BUILTIN_OBJS += builtin-bisect--helper.o
+BUILTIN_OBJS += builtin-blame.o
+BUILTIN_OBJS += builtin-branch.o
+BUILTIN_OBJS += builtin-bundle.o
+BUILTIN_OBJS += builtin-cat-file.o
+BUILTIN_OBJS += builtin-check-attr.o
+BUILTIN_OBJS += builtin-check-ref-format.o
+BUILTIN_OBJS += builtin-checkout-index.o
+BUILTIN_OBJS += builtin-checkout.o
+BUILTIN_OBJS += builtin-clean.o
+BUILTIN_OBJS += builtin-clone.o
+BUILTIN_OBJS += builtin-commit-tree.o
+BUILTIN_OBJS += builtin-commit.o
+BUILTIN_OBJS += builtin-config.o
+BUILTIN_OBJS += builtin-count-objects.o
+BUILTIN_OBJS += builtin-describe.o
+BUILTIN_OBJS += builtin-diff-files.o
+BUILTIN_OBJS += builtin-diff-index.o
+BUILTIN_OBJS += builtin-diff-tree.o
+BUILTIN_OBJS += builtin-diff.o
+BUILTIN_OBJS += builtin-fast-export.o
+BUILTIN_OBJS += builtin-fetch--tool.o
+BUILTIN_OBJS += builtin-fetch-pack.o
+BUILTIN_OBJS += builtin-fetch.o
+BUILTIN_OBJS += builtin-fmt-merge-msg.o
+BUILTIN_OBJS += builtin-for-each-ref.o
+BUILTIN_OBJS += builtin-fsck.o
+BUILTIN_OBJS += builtin-gc.o
+BUILTIN_OBJS += builtin-grep.o
+BUILTIN_OBJS += builtin-help.o
+BUILTIN_OBJS += builtin-init-db.o
+BUILTIN_OBJS += builtin-log.o
+BUILTIN_OBJS += builtin-ls-files.o
+BUILTIN_OBJS += builtin-ls-remote.o
+BUILTIN_OBJS += builtin-ls-tree.o
+BUILTIN_OBJS += builtin-mailinfo.o
+BUILTIN_OBJS += builtin-mailsplit.o
+BUILTIN_OBJS += builtin-merge.o
+BUILTIN_OBJS += builtin-merge-base.o
+BUILTIN_OBJS += builtin-merge-file.o
+BUILTIN_OBJS += builtin-merge-ours.o
+BUILTIN_OBJS += builtin-merge-recursive.o
+BUILTIN_OBJS += builtin-mv.o
+BUILTIN_OBJS += builtin-name-rev.o
+BUILTIN_OBJS += builtin-pack-objects.o
+BUILTIN_OBJS += builtin-pack-refs.o
+BUILTIN_OBJS += builtin-prune-packed.o
+BUILTIN_OBJS += builtin-prune.o
+BUILTIN_OBJS += builtin-push.o
+BUILTIN_OBJS += builtin-read-tree.o
+BUILTIN_OBJS += builtin-receive-pack.o
+BUILTIN_OBJS += builtin-reflog.o
+BUILTIN_OBJS += builtin-remote.o
+BUILTIN_OBJS += builtin-rerere.o
+BUILTIN_OBJS += builtin-reset.o
+BUILTIN_OBJS += builtin-rev-list.o
+BUILTIN_OBJS += builtin-rev-parse.o
+BUILTIN_OBJS += builtin-revert.o
+BUILTIN_OBJS += builtin-rm.o
+BUILTIN_OBJS += builtin-send-pack.o
+BUILTIN_OBJS += builtin-shortlog.o
+BUILTIN_OBJS += builtin-show-branch.o
+BUILTIN_OBJS += builtin-show-ref.o
+BUILTIN_OBJS += builtin-stripspace.o
+BUILTIN_OBJS += builtin-symbolic-ref.o
+BUILTIN_OBJS += builtin-tag.o
+BUILTIN_OBJS += builtin-tar-tree.o
+BUILTIN_OBJS += builtin-unpack-objects.o
+BUILTIN_OBJS += builtin-update-index.o
+BUILTIN_OBJS += builtin-update-ref.o
+BUILTIN_OBJS += builtin-upload-archive.o
+BUILTIN_OBJS += builtin-verify-pack.o
+BUILTIN_OBJS += builtin-verify-tag.o
+BUILTIN_OBJS += builtin-write-tree.o
+
+GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
+EXTLIBS =
+
+#
+# Platform specific tweaks
+#
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain.  If
+# we had "elif" things would have been much nicer...
+
+ifeq ($(uname_S),Linux)
+	NO_STRLCPY = YesPlease
+	THREADED_DELTA_SEARCH = YesPlease
+endif
+ifeq ($(uname_S),GNU/kFreeBSD)
+	NO_STRLCPY = YesPlease
+	THREADED_DELTA_SEARCH = YesPlease
+endif
+ifeq ($(uname_S),UnixWare)
+	CC = cc
+	NEEDS_SOCKET = YesPlease
+	NEEDS_NSL = YesPlease
+	NEEDS_SSL_WITH_CRYPTO = YesPlease
+	NEEDS_LIBICONV = YesPlease
+	SHELL_PATH = /usr/local/bin/bash
+	NO_IPV6 = YesPlease
+	NO_HSTRERROR = YesPlease
+	BASIC_CFLAGS += -Kthread
+	BASIC_CFLAGS += -I/usr/local/include
+	BASIC_LDFLAGS += -L/usr/local/lib
+	INSTALL = ginstall
+	TAR = gtar
+	NO_STRCASESTR = YesPlease
+	NO_MEMMEM = YesPlease
+endif
+ifeq ($(uname_S),SCO_SV)
+	ifeq ($(uname_R),3.2)
+		CFLAGS = -O2
+	endif
+	ifeq ($(uname_R),5)
+		CC = cc
+		BASIC_CFLAGS += -Kthread
+	endif
+	NEEDS_SOCKET = YesPlease
+	NEEDS_NSL = YesPlease
+	NEEDS_SSL_WITH_CRYPTO = YesPlease
+	NEEDS_LIBICONV = YesPlease
+	SHELL_PATH = /usr/bin/bash
+	NO_IPV6 = YesPlease
+	NO_HSTRERROR = YesPlease
+	BASIC_CFLAGS += -I/usr/local/include
+	BASIC_LDFLAGS += -L/usr/local/lib
+	NO_STRCASESTR = YesPlease
+	NO_MEMMEM = YesPlease
+	INSTALL = ginstall
+	TAR = gtar
+endif
+ifeq ($(uname_S),Darwin)
+	NEEDS_SSL_WITH_CRYPTO = YesPlease
+	NEEDS_LIBICONV = YesPlease
+	ifeq ($(shell expr "$(uname_R)" : '[15678]\.'),2)
+		OLD_ICONV = UnfortunatelyYes
+	endif
+	ifeq ($(shell expr "$(uname_R)" : '[15]\.'),2)
+		NO_STRLCPY = YesPlease
+	endif
+	NO_MEMMEM = YesPlease
+	THREADED_DELTA_SEARCH = YesPlease
+	USE_ST_TIMESPEC = YesPlease
+endif
+ifeq ($(uname_S),SunOS)
+	NEEDS_SOCKET = YesPlease
+	NEEDS_NSL = YesPlease
+	SHELL_PATH = /bin/bash
+	NO_STRCASESTR = YesPlease
+	NO_MEMMEM = YesPlease
+	NO_HSTRERROR = YesPlease
+	NO_MKDTEMP = YesPlease
+	OLD_ICONV = UnfortunatelyYes
+	ifeq ($(uname_R),5.8)
+		NO_UNSETENV = YesPlease
+		NO_SETENV = YesPlease
+		NO_C99_FORMAT = YesPlease
+		NO_STRTOUMAX = YesPlease
+	endif
+	ifeq ($(uname_R),5.9)
+		NO_UNSETENV = YesPlease
+		NO_SETENV = YesPlease
+		NO_C99_FORMAT = YesPlease
+		NO_STRTOUMAX = YesPlease
+	endif
+	INSTALL = ginstall
+	TAR = gtar
+	BASIC_CFLAGS += -D__EXTENSIONS__
+endif
+ifeq ($(uname_O),Cygwin)
+	NO_D_TYPE_IN_DIRENT = YesPlease
+	NO_D_INO_IN_DIRENT = YesPlease
+	NO_STRCASESTR = YesPlease
+	NO_MEMMEM = YesPlease
+	NO_SYMLINK_HEAD = YesPlease
+	NEEDS_LIBICONV = YesPlease
+	NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
+	NO_TRUSTABLE_FILEMODE = UnfortunatelyYes
+	OLD_ICONV = UnfortunatelyYes
+	# There are conflicting reports about this.
+	# On some boxes NO_MMAP is needed, and not so elsewhere.
+	# Try commenting this out if you suspect MMAP is more efficient
+	NO_MMAP = YesPlease
+	NO_IPV6 = YesPlease
+	X = .exe
+endif
+ifeq ($(uname_S),FreeBSD)
+	NEEDS_LIBICONV = YesPlease
+	NO_MEMMEM = YesPlease
+	BASIC_CFLAGS += -I/usr/local/include
+	BASIC_LDFLAGS += -L/usr/local/lib
+	DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease
+	USE_ST_TIMESPEC = YesPlease
+	THREADED_DELTA_SEARCH = YesPlease
+	ifeq ($(shell expr "$(uname_R)" : '4\.'),2)
+		PTHREAD_LIBS = -pthread
+		NO_UINTMAX_T = YesPlease
+		NO_STRTOUMAX = YesPlease
+	endif
+endif
+ifeq ($(uname_S),OpenBSD)
+	NO_STRCASESTR = YesPlease
+	NO_MEMMEM = YesPlease
+	NEEDS_LIBICONV = YesPlease
+	BASIC_CFLAGS += -I/usr/local/include
+	BASIC_LDFLAGS += -L/usr/local/lib
+	THREADED_DELTA_SEARCH = YesPlease
+endif
+ifeq ($(uname_S),NetBSD)
+	ifeq ($(shell expr "$(uname_R)" : '[01]\.'),2)
+		NEEDS_LIBICONV = YesPlease
+	endif
+	BASIC_CFLAGS += -I/usr/pkg/include
+	BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib
+	THREADED_DELTA_SEARCH = YesPlease
+endif
+ifeq ($(uname_S),AIX)
+	NO_STRCASESTR=YesPlease
+	NO_MEMMEM = YesPlease
+	NO_MKDTEMP = YesPlease
+	NO_STRLCPY = YesPlease
+	NO_NSEC = YesPlease
+	FREAD_READS_DIRECTORIES = UnfortunatelyYes
+	INTERNAL_QSORT = UnfortunatelyYes
+	NEEDS_LIBICONV=YesPlease
+	BASIC_CFLAGS += -D_LARGE_FILES
+	ifneq ($(shell expr "$(uname_V)" : '[1234]'),1)
+		THREADED_DELTA_SEARCH = YesPlease
+	else
+		NO_PTHREADS = YesPlease
+	endif
+endif
+ifeq ($(uname_S),GNU)
+	# GNU/Hurd
+	NO_STRLCPY=YesPlease
+endif
+ifeq ($(uname_S),IRIX64)
+	NO_IPV6=YesPlease
+	NO_SETENV=YesPlease
+	NO_STRCASESTR=YesPlease
+	NO_MEMMEM = YesPlease
+	NO_STRLCPY = YesPlease
+	NO_SOCKADDR_STORAGE=YesPlease
+	SHELL_PATH=/usr/gnu/bin/bash
+	BASIC_CFLAGS += -DPATH_MAX=1024
+	# for now, build 32-bit version
+	BASIC_LDFLAGS += -L/usr/lib32
+endif
+ifeq ($(uname_S),HP-UX)
+	NO_IPV6=YesPlease
+	NO_SETENV=YesPlease
+	NO_STRCASESTR=YesPlease
+	NO_MEMMEM = YesPlease
+	NO_STRLCPY = YesPlease
+	NO_MKDTEMP = YesPlease
+	NO_UNSETENV = YesPlease
+	NO_HSTRERROR = YesPlease
+	NO_SYS_SELECT_H = YesPlease
+	SNPRINTF_RETURNS_BOGUS = YesPlease
+endif
+ifneq (,$(findstring CYGWIN,$(uname_S)))
+	COMPAT_OBJS += compat/cygwin.o
+endif
+ifneq (,$(findstring MINGW,$(uname_S)))
+	NO_PREAD = YesPlease
+	NO_OPENSSL = YesPlease
+	NO_CURL = YesPlease
+	NO_SYMLINK_HEAD = YesPlease
+	NO_IPV6 = YesPlease
+	NO_SETENV = YesPlease
+	NO_UNSETENV = YesPlease
+	NO_STRCASESTR = YesPlease
+	NO_STRLCPY = YesPlease
+	NO_MEMMEM = YesPlease
+	NO_PTHREADS = YesPlease
+	NEEDS_LIBICONV = YesPlease
+	OLD_ICONV = YesPlease
+	NO_C99_FORMAT = YesPlease
+	NO_STRTOUMAX = YesPlease
+	NO_MKDTEMP = YesPlease
+	SNPRINTF_RETURNS_BOGUS = YesPlease
+	NO_SVN_TESTS = YesPlease
+	NO_PERL_MAKEMAKER = YesPlease
+	RUNTIME_PREFIX = YesPlease
+	NO_POSIX_ONLY_PROGRAMS = YesPlease
+	NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
+	NO_NSEC = YesPlease
+	USE_WIN32_MMAP = YesPlease
+	COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/regex -Icompat/fnmatch
+	COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1
+	COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
+	COMPAT_OBJS += compat/mingw.o compat/fnmatch/fnmatch.o compat/regex/regex.o compat/winansi.o
+	EXTLIBS += -lws2_32
+	X = .exe
+endif
+ifneq (,$(findstring arm,$(uname_M)))
+	ARM_SHA1 = YesPlease
+endif
+
+-include config.mak.autogen
+-include config.mak
+
+ifeq ($(uname_S),Darwin)
+	ifndef NO_FINK
+		ifeq ($(shell test -d /sw/lib && echo y),y)
+			BASIC_CFLAGS += -I/sw/include
+			BASIC_LDFLAGS += -L/sw/lib
+		endif
+	endif
+	ifndef NO_DARWIN_PORTS
+		ifeq ($(shell test -d /opt/local/lib && echo y),y)
+			BASIC_CFLAGS += -I/opt/local/include
+			BASIC_LDFLAGS += -L/opt/local/lib
+		endif
+	endif
+	PTHREAD_LIBS =
+endif
+
+ifndef CC_LD_DYNPATH
+	ifdef NO_R_TO_GCC_LINKER
+		# Some gcc does not accept and pass -R to the linker to specify
+		# the runtime dynamic library path.
+		CC_LD_DYNPATH = -Wl,-rpath,
+	else
+		CC_LD_DYNPATH = -R
+	endif
+endif
+
+ifdef NO_CURL
+	BASIC_CFLAGS += -DNO_CURL
+else
+	ifdef CURLDIR
+		# Try "-Wl,-rpath=$(CURLDIR)/$(lib)" in such a case.
+		BASIC_CFLAGS += -I$(CURLDIR)/include
+		CURL_LIBCURL = -L$(CURLDIR)/$(lib) $(CC_LD_DYNPATH)$(CURLDIR)/$(lib) -lcurl
+	else
+		CURL_LIBCURL = -lcurl
+	endif
+	BUILTIN_OBJS += builtin-http-fetch.o
+	EXTLIBS += $(CURL_LIBCURL)
+	LIB_OBJS += http.o http-walker.o
+	curl_check := $(shell (echo 070908; curl-config --vernum) | sort -r | sed -ne 2p)
+	ifeq "$(curl_check)" "070908"
+		ifndef NO_EXPAT
+			PROGRAMS += git-http-push$X
+		endif
+	endif
+	ifndef NO_EXPAT
+		ifdef EXPATDIR
+			BASIC_CFLAGS += -I$(EXPATDIR)/include
+			EXPAT_LIBEXPAT = -L$(EXPATDIR)/$(lib) $(CC_LD_DYNPATH)$(EXPATDIR)/$(lib) -lexpat
+		else
+			EXPAT_LIBEXPAT = -lexpat
+		endif
+	endif
+endif
+
+ifdef ZLIB_PATH
+	BASIC_CFLAGS += -I$(ZLIB_PATH)/include
+	EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib)
+endif
+EXTLIBS += -lz
+
+ifndef NO_POSIX_ONLY_PROGRAMS
+	PROGRAMS += git-daemon$X
+	PROGRAMS += git-imap-send$X
+endif
+ifndef NO_OPENSSL
+	OPENSSL_LIBSSL = -lssl
+	ifdef OPENSSLDIR
+		BASIC_CFLAGS += -I$(OPENSSLDIR)/include
+		OPENSSL_LINK = -L$(OPENSSLDIR)/$(lib) $(CC_LD_DYNPATH)$(OPENSSLDIR)/$(lib)
+	else
+		OPENSSL_LINK =
+	endif
+else
+	BASIC_CFLAGS += -DNO_OPENSSL
+	MOZILLA_SHA1 = 1
+	OPENSSL_LIBSSL =
+endif
+ifdef NEEDS_SSL_WITH_CRYPTO
+	LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto -lssl
+else
+	LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto
+endif
+ifdef NEEDS_LIBICONV
+	ifdef ICONVDIR
+		BASIC_CFLAGS += -I$(ICONVDIR)/include
+		ICONV_LINK = -L$(ICONVDIR)/$(lib) $(CC_LD_DYNPATH)$(ICONVDIR)/$(lib)
+	else
+		ICONV_LINK =
+	endif
+	EXTLIBS += $(ICONV_LINK) -liconv
+endif
+ifdef NEEDS_SOCKET
+	EXTLIBS += -lsocket
+endif
+ifdef NEEDS_NSL
+	EXTLIBS += -lnsl
+endif
+ifdef NO_D_TYPE_IN_DIRENT
+	BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
+endif
+ifdef NO_D_INO_IN_DIRENT
+	BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
+endif
+ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+	BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
+endif
+ifdef USE_NSEC
+	BASIC_CFLAGS += -DUSE_NSEC
+endif
+ifdef USE_ST_TIMESPEC
+	BASIC_CFLAGS += -DUSE_ST_TIMESPEC
+endif
+ifdef NO_NSEC
+	BASIC_CFLAGS += -DNO_NSEC
+endif
+ifdef NO_C99_FORMAT
+	BASIC_CFLAGS += -DNO_C99_FORMAT
+endif
+ifdef SNPRINTF_RETURNS_BOGUS
+	COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
+	COMPAT_OBJS += compat/snprintf.o
+endif
+ifdef FREAD_READS_DIRECTORIES
+	COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
+	COMPAT_OBJS += compat/fopen.o
+endif
+ifdef NO_SYMLINK_HEAD
+	BASIC_CFLAGS += -DNO_SYMLINK_HEAD
+endif
+ifdef NO_STRCASESTR
+	COMPAT_CFLAGS += -DNO_STRCASESTR
+	COMPAT_OBJS += compat/strcasestr.o
+endif
+ifdef NO_STRLCPY
+	COMPAT_CFLAGS += -DNO_STRLCPY
+	COMPAT_OBJS += compat/strlcpy.o
+endif
+ifdef NO_STRTOUMAX
+	COMPAT_CFLAGS += -DNO_STRTOUMAX
+	COMPAT_OBJS += compat/strtoumax.o
+endif
+ifdef NO_STRTOULL
+	COMPAT_CFLAGS += -DNO_STRTOULL
+endif
+ifdef NO_SETENV
+	COMPAT_CFLAGS += -DNO_SETENV
+	COMPAT_OBJS += compat/setenv.o
+endif
+ifdef NO_MKDTEMP
+	COMPAT_CFLAGS += -DNO_MKDTEMP
+	COMPAT_OBJS += compat/mkdtemp.o
+endif
+ifdef NO_UNSETENV
+	COMPAT_CFLAGS += -DNO_UNSETENV
+	COMPAT_OBJS += compat/unsetenv.o
+endif
+ifdef NO_SYS_SELECT_H
+	BASIC_CFLAGS += -DNO_SYS_SELECT_H
+endif
+ifdef NO_MMAP
+	COMPAT_CFLAGS += -DNO_MMAP
+	COMPAT_OBJS += compat/mmap.o
+else
+	ifdef USE_WIN32_MMAP
+		COMPAT_CFLAGS += -DUSE_WIN32_MMAP
+		COMPAT_OBJS += compat/win32mmap.o
+	endif
+endif
+ifdef NO_PREAD
+	COMPAT_CFLAGS += -DNO_PREAD
+	COMPAT_OBJS += compat/pread.o
+endif
+ifdef NO_FAST_WORKING_DIRECTORY
+	BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
+endif
+ifdef NO_TRUSTABLE_FILEMODE
+	BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
+endif
+ifdef NO_IPV6
+	BASIC_CFLAGS += -DNO_IPV6
+endif
+ifdef NO_UINTMAX_T
+	BASIC_CFLAGS += -Duintmax_t=uint32_t
+endif
+ifdef NO_SOCKADDR_STORAGE
+ifdef NO_IPV6
+	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
+else
+	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
+endif
+endif
+ifdef NO_INET_NTOP
+	LIB_OBJS += compat/inet_ntop.o
+endif
+ifdef NO_INET_PTON
+	LIB_OBJS += compat/inet_pton.o
+endif
+
+ifdef NO_ICONV
+	BASIC_CFLAGS += -DNO_ICONV
+endif
+
+ifdef OLD_ICONV
+	BASIC_CFLAGS += -DOLD_ICONV
+endif
+
+ifdef NO_DEFLATE_BOUND
+	BASIC_CFLAGS += -DNO_DEFLATE_BOUND
+endif
+
+ifdef PPC_SHA1
+	SHA1_HEADER = "ppc/sha1.h"
+	LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
+else
+ifdef ARM_SHA1
+	SHA1_HEADER = "arm/sha1.h"
+	LIB_OBJS += arm/sha1.o arm/sha1_arm.o
+else
+ifdef MOZILLA_SHA1
+	SHA1_HEADER = "mozilla-sha1/sha1.h"
+	LIB_OBJS += mozilla-sha1/sha1.o
+else
+	SHA1_HEADER = <openssl/sha.h>
+	EXTLIBS += $(LIB_4_CRYPTO)
+endif
+endif
+endif
+ifdef NO_PERL_MAKEMAKER
+	export NO_PERL_MAKEMAKER
+endif
+ifdef NO_HSTRERROR
+	COMPAT_CFLAGS += -DNO_HSTRERROR
+	COMPAT_OBJS += compat/hstrerror.o
+endif
+ifdef NO_MEMMEM
+	COMPAT_CFLAGS += -DNO_MEMMEM
+	COMPAT_OBJS += compat/memmem.o
+endif
+ifdef INTERNAL_QSORT
+	COMPAT_CFLAGS += -DINTERNAL_QSORT
+	COMPAT_OBJS += compat/qsort.o
+endif
+ifdef RUNTIME_PREFIX
+	COMPAT_CFLAGS += -DRUNTIME_PREFIX
+endif
+
+ifdef NO_PTHREADS
+	THREADED_DELTA_SEARCH =
+	BASIC_CFLAGS += -DNO_PTHREADS
+else
+	EXTLIBS += $(PTHREAD_LIBS)
+endif
+
+ifdef THREADED_DELTA_SEARCH
+	BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH
+	LIB_OBJS += thread-utils.o
+endif
+ifdef DIR_HAS_BSD_GROUP_SEMANTICS
+	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
+endif
+ifdef NO_EXTERNAL_GREP
+	BASIC_CFLAGS += -DNO_EXTERNAL_GREP
+endif
+
+ifeq ($(TCLTK_PATH),)
+NO_TCLTK=NoThanks
+endif
+
+ifeq ($(PERL_PATH),)
+NO_PERL=NoThanks
+endif
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_CC       = @echo '   ' CC $@;
+	QUIET_AR       = @echo '   ' AR $@;
+	QUIET_LINK     = @echo '   ' LINK $@;
+	QUIET_BUILT_IN = @echo '   ' BUILTIN $@;
+	QUIET_GEN      = @echo '   ' GEN $@;
+	QUIET_SUBDIR0  = +@subdir=
+	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			 $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+	export QUIET_GEN
+	export QUIET_BUILT_IN
+endif
+endif
+
+ifdef ASCIIDOC8
+	export ASCIIDOC8
+endif
+
+# Shell quote (do not use $(call) to accommodate ancient setups);
+
+SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
+ETC_GITCONFIG_SQ = $(subst ','\'',$(ETC_GITCONFIG))
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+mandir_SQ = $(subst ','\'',$(mandir))
+infodir_SQ = $(subst ','\'',$(infodir))
+gitexecdir_SQ = $(subst ','\'',$(gitexecdir))
+template_dir_SQ = $(subst ','\'',$(template_dir))
+htmldir_SQ = $(subst ','\'',$(htmldir))
+prefix_SQ = $(subst ','\'',$(prefix))
+
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
+TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_PATH))
+
+LIBS = $(GITLIBS) $(EXTLIBS)
+
+BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
+	$(COMPAT_CFLAGS)
+LIB_OBJS += $(COMPAT_OBJS)
+
+ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_LDFLAGS += $(BASIC_LDFLAGS)
+
+export TAR INSTALL DESTDIR SHELL_PATH
+
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS
+ifneq (,$X)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
+endif
+
+all::
+ifndef NO_TCLTK
+	$(QUIET_SUBDIR0)git-gui $(QUIET_SUBDIR1) gitexecdir='$(gitexec_instdir_SQ)' all
+	$(QUIET_SUBDIR0)gitk-git $(QUIET_SUBDIR1) all
+endif
+ifndef NO_PERL
+	$(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' all
+endif
+	$(QUIET_SUBDIR0)templates $(QUIET_SUBDIR1)
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+	@$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) git$X
+	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) git$X
+
+git.o: git.c common-cmds.h GIT-CFLAGS
+	$(QUIET_CC)$(CC) -DGIT_VERSION='"$(GIT_VERSION)"' \
+		'-DGIT_HTML_PATH="$(htmldir_SQ)"' \
+		$(ALL_CFLAGS) -c $(filter %.c,$^)
+
+git$X: git.o $(BUILTIN_OBJS) $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ git.o \
+		$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
+
+builtin-help.o: builtin-help.c common-cmds.h GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
+		'-DGIT_HTML_PATH="$(htmldir_SQ)"' \
+		'-DGIT_MAN_PATH="$(mandir_SQ)"' \
+		'-DGIT_INFO_PATH="$(infodir_SQ)"' $<
+
+$(BUILT_INS): git$X
+	$(QUIET_BUILT_IN)$(RM) $@ && \
+	ln git$X $@ 2>/dev/null || \
+	ln -s git$X $@ 2>/dev/null || \
+	cp git$X $@
+
+common-cmds.h: ./generate-cmdlist.sh command-list.txt
+
+common-cmds.h: $(wildcard Documentation/git-*.txt)
+	$(QUIET_GEN)./generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
+	    -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
+	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
+	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
+	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
+	    $@.sh >$@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+
+ifndef NO_PERL
+$(patsubst %.perl,%,$(SCRIPT_PERL)): perl/perl.mak
+
+perl/perl.mak: GIT-CFLAGS perl/Makefile perl/Makefile.PL
+	$(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' $(@F)
+
+$(patsubst %.perl,%,$(SCRIPT_PERL)): % : %.perl
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	INSTLIBDIR=`MAKEFLAGS= $(MAKE) -C perl -s --no-print-directory instlibdir` && \
+	sed -e '1{' \
+	    -e '	s|#!.*perl|#!$(PERL_PATH_SQ)|' \
+	    -e '	h' \
+	    -e '	s=.*=use lib (split(/:/, $$ENV{GITPERLLIB} || "@@INSTLIBDIR@@"));=' \
+	    -e '	H' \
+	    -e '	x' \
+	    -e '}' \
+	    -e 's|@@INSTLIBDIR@@|'"$$INSTLIBDIR"'|g' \
+	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
+	    $@.perl >$@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+
+gitweb/gitweb.cgi: gitweb/gitweb.perl
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	sed -e '1s|#!.*perl|#!$(PERL_PATH_SQ)|' \
+	    -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \
+	    -e 's|++GIT_BINDIR++|$(bindir)|g' \
+	    -e 's|++GITWEB_CONFIG++|$(GITWEB_CONFIG)|g' \
+	    -e 's|++GITWEB_CONFIG_SYSTEM++|$(GITWEB_CONFIG_SYSTEM)|g' \
+	    -e 's|++GITWEB_HOME_LINK_STR++|$(GITWEB_HOME_LINK_STR)|g' \
+	    -e 's|++GITWEB_SITENAME++|$(GITWEB_SITENAME)|g' \
+	    -e 's|++GITWEB_PROJECTROOT++|$(GITWEB_PROJECTROOT)|g' \
+	    -e 's|"++GITWEB_PROJECT_MAXDEPTH++"|$(GITWEB_PROJECT_MAXDEPTH)|g' \
+	    -e 's|++GITWEB_EXPORT_OK++|$(GITWEB_EXPORT_OK)|g' \
+	    -e 's|++GITWEB_STRICT_EXPORT++|$(GITWEB_STRICT_EXPORT)|g' \
+	    -e 's|++GITWEB_BASE_URL++|$(GITWEB_BASE_URL)|g' \
+	    -e 's|++GITWEB_LIST++|$(GITWEB_LIST)|g' \
+	    -e 's|++GITWEB_HOMETEXT++|$(GITWEB_HOMETEXT)|g' \
+	    -e 's|++GITWEB_CSS++|$(GITWEB_CSS)|g' \
+	    -e 's|++GITWEB_LOGO++|$(GITWEB_LOGO)|g' \
+	    -e 's|++GITWEB_FAVICON++|$(GITWEB_FAVICON)|g' \
+	    -e 's|++GITWEB_SITE_HEADER++|$(GITWEB_SITE_HEADER)|g' \
+	    -e 's|++GITWEB_SITE_FOOTER++|$(GITWEB_SITE_FOOTER)|g' \
+	    $< >$@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+
+git-instaweb: git-instaweb.sh gitweb/gitweb.cgi gitweb/gitweb.css
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
+	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
+	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
+	    -e '/@@GITWEB_CGI@@/r gitweb/gitweb.cgi' \
+	    -e '/@@GITWEB_CGI@@/d' \
+	    -e '/@@GITWEB_CSS@@/r gitweb/gitweb.css' \
+	    -e '/@@GITWEB_CSS@@/d' \
+	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
+	    $@.sh > $@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+else # NO_PERL
+$(patsubst %.perl,%,$(SCRIPT_PERL)) git-instaweb: % : unimplemented.sh
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
+	    -e 's|@@REASON@@|NO_PERL=$(NO_PERL)|g' \
+	    unimplemented.sh >$@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+endif # NO_PERL
+
+configure: configure.ac
+	$(QUIET_GEN)$(RM) $@ $<+ && \
+	sed -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
+	    $< > $<+ && \
+	autoconf -o $@ $<+ && \
+	$(RM) $<+
+
+# These can record GIT_VERSION
+git.o git.spec \
+	$(patsubst %.sh,%,$(SCRIPT_SH)) \
+	$(patsubst %.perl,%,$(SCRIPT_PERL)) \
+	: GIT-VERSION-FILE
+
+%.o: %.c GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
+%.s: %.c GIT-CFLAGS
+	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
+%.o: %.S
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
+
+exec_cmd.o: exec_cmd.c GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
+		'-DGIT_EXEC_PATH="$(gitexecdir_SQ)"' \
+		'-DBINDIR="$(bindir_relative_SQ)"' \
+		'-DPREFIX="$(prefix_SQ)"' \
+		$<
+
+builtin-init-db.o: builtin-init-db.c GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
+
+config.o: config.c GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"' $<
+
+http.o: http.c GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DGIT_USER_AGENT='"git/$(GIT_VERSION)"' $<
+
+ifdef NO_EXPAT
+http-walker.o: http-walker.c http.h GIT-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DNO_EXPAT $<
+endif
+
+git-%$X: %.o $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+git-imap-send$X: imap-send.o $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS) $(OPENSSL_LINK) $(OPENSSL_LIBSSL)
+
+http.o http-walker.o http-push.o transport.o: http.h
+
+git-http-push$X: revision.o http.o http-push.o $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+builtin-revert.o wt-status.o: wt-status.h
+
+$(LIB_FILE): $(LIB_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
+	xdiff/xmerge.o xdiff/xpatience.o
+$(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
+	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
+
+$(XDIFF_LIB): $(XDIFF_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS)
+
+
+doc:
+	$(MAKE) -C Documentation all
+
+man:
+	$(MAKE) -C Documentation man
+
+html:
+	$(MAKE) -C Documentation html
+
+info:
+	$(MAKE) -C Documentation info
+
+pdf:
+	$(MAKE) -C Documentation pdf
+
+TAGS:
+	$(RM) TAGS
+	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
+
+tags:
+	$(RM) tags
+	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+
+cscope:
+	$(RM) cscope*
+	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
+             $(bindir_SQ):$(gitexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
+
+GIT-CFLAGS: .FORCE-GIT-CFLAGS
+	@FLAGS='$(TRACK_CFLAGS)'; \
+	    if test x"$$FLAGS" != x"`cat GIT-CFLAGS 2>/dev/null`" ; then \
+		echo 1>&2 "    * new build flags or prefix"; \
+		echo "$$FLAGS" >GIT-CFLAGS; \
+            fi
+
+# We need to apply sq twice, once to protect from the shell
+# that runs GIT-BUILD-OPTIONS, and then again to protect it
+# and the first level quoting from the shell that runs "echo".
+GIT-BUILD-OPTIONS: .FORCE-GIT-BUILD-OPTIONS
+	@echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
+	@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
+	@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
+	@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
+
+### Detect Tck/Tk interpreter path changes
+ifndef NO_TCLTK
+TRACK_VARS = $(subst ','\'',-DTCLTK_PATH='$(TCLTK_PATH_SQ)')
+
+GIT-GUI-VARS: .FORCE-GIT-GUI-VARS
+	@VARS='$(TRACK_VARS)'; \
+	    if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \
+		echo 1>&2 "    * new Tcl/Tk interpreter location"; \
+		echo "$$VARS" >$@; \
+            fi
+
+.PHONY: .FORCE-GIT-GUI-VARS
+endif
+
+### Testing rules
+
+TEST_PROGRAMS += test-chmtime$X
+TEST_PROGRAMS += test-ctype$X
+TEST_PROGRAMS += test-date$X
+TEST_PROGRAMS += test-delta$X
+TEST_PROGRAMS += test-dump-cache-tree$X
+TEST_PROGRAMS += test-genrandom$X
+TEST_PROGRAMS += test-match-trees$X
+TEST_PROGRAMS += test-parse-options$X
+TEST_PROGRAMS += test-path-utils$X
+TEST_PROGRAMS += test-sha1$X
+TEST_PROGRAMS += test-sigchain$X
+
+all:: $(TEST_PROGRAMS)
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+export NO_SVN_TESTS
+
+test: all
+	$(MAKE) -C t/ all
+
+test-ctype$X: ctype.o
+
+test-date$X: date.o ctype.o
+
+test-delta$X: diff-delta.o patch-delta.o
+
+test-parse-options$X: parse-options.o
+
+.PRECIOUS: $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS))
+
+test-%$X: test-%.o $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+check-sha1:: test-sha1$X
+	./test-sha1.sh
+
+check: common-cmds.h
+	if sparse; \
+	then \
+		for i in *.c; \
+		do \
+			sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+		done; \
+	else \
+		echo 2>&1 "Did you mean 'make test'?"; \
+		exit 1; \
+	fi
+
+remove-dashes:
+	./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
+
+### Installation rules
+
+ifneq ($(filter /%,$(firstword $(template_dir))),)
+template_instdir = $(template_dir)
+else
+template_instdir = $(prefix)/$(template_dir)
+endif
+export template_instdir
+
+ifneq ($(filter /%,$(firstword $(gitexecdir))),)
+gitexec_instdir = $(gitexecdir)
+else
+gitexec_instdir = $(prefix)/$(gitexecdir)
+endif
+gitexec_instdir_SQ = $(subst ','\'',$(gitexec_instdir))
+export gitexec_instdir
+
+install: all
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(gitexec_instdir_SQ)'
+	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)'
+	$(INSTALL) git$X git-upload-pack$X git-receive-pack$X git-upload-archive$X git-shell$X git-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install
+	$(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install
+ifndef NO_TCLTK
+	$(MAKE) -C gitk-git install
+	$(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install
+endif
+ifneq (,$X)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';)
+endif
+	bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \
+	execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \
+	{ $(RM) "$$execdir/git-add$X" && \
+		ln "$$bindir/git$X" "$$execdir/git-add$X" 2>/dev/null || \
+		cp "$$bindir/git$X" "$$execdir/git-add$X"; } && \
+	{ for p in $(filter-out git-add$X,$(BUILT_INS)); do \
+		$(RM) "$$execdir/$$p" && \
+		ln "$$execdir/git-add$X" "$$execdir/$$p" 2>/dev/null || \
+		ln -s "git-add$X" "$$execdir/$$p" 2>/dev/null || \
+		cp "$$execdir/git-add$X" "$$execdir/$$p" || exit; \
+	  done } && \
+	./check_bindir "z$$bindir" "z$$execdir" "$$bindir/git-add$X"
+
+install-doc:
+	$(MAKE) -C Documentation install
+
+install-man:
+	$(MAKE) -C Documentation install-man
+
+install-html:
+	$(MAKE) -C Documentation install-html
+
+install-info:
+	$(MAKE) -C Documentation install-info
+
+install-pdf:
+	$(MAKE) -C Documentation install-pdf
+
+quick-install-doc:
+	$(MAKE) -C Documentation quick-install
+
+quick-install-man:
+	$(MAKE) -C Documentation quick-install-man
+
+quick-install-html:
+	$(MAKE) -C Documentation quick-install-html
+
+
+
+### Maintainer's dist rules
+
+git.spec: git.spec.in
+	sed -e 's/@@VERSION@@/$(GIT_VERSION)/g' < $< > $@+
+	mv $@+ $@
+
+GIT_TARNAME=git-$(GIT_VERSION)
+dist: git.spec git-archive$(X) configure
+	./git-archive --format=tar \
+		--prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
+	@mkdir -p $(GIT_TARNAME)
+	@cp git.spec configure $(GIT_TARNAME)
+	@echo $(GIT_VERSION) > $(GIT_TARNAME)/version
+	@$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version
+	$(TAR) rf $(GIT_TARNAME).tar \
+		$(GIT_TARNAME)/git.spec \
+		$(GIT_TARNAME)/configure \
+		$(GIT_TARNAME)/version \
+		$(GIT_TARNAME)/git-gui/version
+	@$(RM) -r $(GIT_TARNAME)
+	gzip -f -9 $(GIT_TARNAME).tar
+
+rpm: dist
+	$(RPMBUILD) -ta $(GIT_TARNAME).tar.gz
+
+htmldocs = git-htmldocs-$(GIT_VERSION)
+manpages = git-manpages-$(GIT_VERSION)
+dist-doc:
+	$(RM) -r .doc-tmp-dir
+	mkdir .doc-tmp-dir
+	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
+	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
+	gzip -n -9 -f $(htmldocs).tar
+	:
+	$(RM) -r .doc-tmp-dir
+	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
+	$(MAKE) -C Documentation DESTDIR=./ \
+		man1dir=../.doc-tmp-dir/man1 \
+		man5dir=../.doc-tmp-dir/man5 \
+		man7dir=../.doc-tmp-dir/man7 \
+		install
+	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
+	gzip -n -9 -f $(manpages).tar
+	$(RM) -r .doc-tmp-dir
+
+### Cleaning rules
+
+distclean: clean
+	$(RM) configure
 
 clean:
-	rm $(BINS)
+	$(RM) *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \
+		$(LIB_FILE) $(XDIFF_LIB)
+	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) git$X
+	$(RM) $(TEST_PROGRAMS)
+	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
+	$(RM) -r autom4te.cache
+	$(RM) config.log config.mak.autogen config.mak.append config.status config.cache
+	$(RM) -r $(GIT_TARNAME) .doc-tmp-dir
+	$(RM) $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz
+	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
+	$(MAKE) -C Documentation/ clean
+ifndef NO_PERL
+	$(RM) gitweb/gitweb.cgi
+	$(MAKE) -C perl clean
+endif
+	$(MAKE) -C templates/ clean
+	$(MAKE) -C t/ clean
+ifndef NO_TCLTK
+	$(MAKE) -C gitk-git clean
+	$(MAKE) -C git-gui clean
+endif
+	$(RM) GIT-VERSION-FILE GIT-CFLAGS GIT-GUI-VARS GIT-BUILD-OPTIONS
+
+.PHONY: all install clean strip
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: .FORCE-GIT-VERSION-FILE TAGS tags cscope .FORCE-GIT-CFLAGS
+.PHONY: .FORCE-GIT-BUILD-OPTIONS
+
+### Check documentation
+#
+check-docs::
+	@(for v in $(ALL_PROGRAMS) $(BUILT_INS) git gitk; \
+	do \
+		case "$$v" in \
+		git-merge-octopus | git-merge-ours | git-merge-recursive | \
+		git-merge-resolve | git-merge-subtree | \
+		git-fsck-objects | git-init-db | \
+		git-?*--?* ) continue ;; \
+		esac ; \
+		test -f "Documentation/$$v.txt" || \
+		echo "no doc: $$v"; \
+		sed -e '/^#/d' command-list.txt | \
+		grep -q "^$$v[ 	]" || \
+		case "$$v" in \
+		git) ;; \
+		*) echo "no link: $$v";; \
+		esac ; \
+	done; \
+	( \
+		sed -e '/^#/d' \
+		    -e 's/[ 	].*//' \
+		    -e 's/^/listed /' command-list.txt; \
+		ls -1 Documentation/git*txt | \
+		sed -e 's|Documentation/|documented |' \
+		    -e 's/\.txt//'; \
+	) | while read how cmd; \
+	do \
+		case "$$how,$$cmd" in \
+		*,git-citool | \
+		*,git-gui | \
+		*,git-help | \
+		documented,gitattributes | \
+		documented,gitignore | \
+		documented,gitmodules | \
+		documented,gitcli | \
+		documented,git-tools | \
+		documented,gitcore-tutorial | \
+		documented,gitcvs-migration | \
+		documented,gitdiffcore | \
+		documented,gitglossary | \
+		documented,githooks | \
+		documented,gitrepository-layout | \
+		documented,gittutorial | \
+		documented,gittutorial-2 | \
+		sentinel,not,matching,is,ok ) continue ;; \
+		esac; \
+		case " $(ALL_PROGRAMS) $(BUILT_INS) git gitk " in \
+		*" $$cmd "*)	;; \
+		*) echo "removed but $$how: $$cmd" ;; \
+		esac; \
+	done ) | sort
+
+### Make sure built-ins do not have dups and listed in git.c
+#
+check-builtins::
+	./check-builtins.sh
+
+### Test suite coverage testing
+#
+.PHONY: coverage coverage-clean coverage-build coverage-report
+
+coverage:
+	$(MAKE) coverage-build
+	$(MAKE) coverage-report
+
+coverage-clean:
+	rm -f *.gcda *.gcno
+
+COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
+COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
+
+coverage-build: coverage-clean
+	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
+	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
+		-j1 test
+
+coverage-report:
+	gcov -b *.c
+	grep '^function.*called 0 ' *.c.gcov \
+		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
+		| tee coverage-untested-functions
-- 
cgit v0.10.2


From 0780060124011b94af55830939c86cc0916be0f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 15:00:56 +0200
Subject: perf_counter tools: add in basic glue from Git

First very raw version at having a central 'perf' command and
a list of subcommands:

  perf top
  perf stat
  perf record
  perf report
  ...

This is done by picking up Git's collection of utility functions,
and hacking them to build fine in this new environment.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/.gitignore b/Documentation/perf_counter/.gitignore
new file mode 100644
index 0000000..41c0b20
--- /dev/null
+++ b/Documentation/perf_counter/.gitignore
@@ -0,0 +1,179 @@
+GIT-BUILD-OPTIONS
+GIT-CFLAGS
+GIT-GUI-VARS
+GIT-VERSION-FILE
+git
+git-add
+git-add--interactive
+git-am
+git-annotate
+git-apply
+git-archimport
+git-archive
+git-bisect
+git-bisect--helper
+git-blame
+git-branch
+git-bundle
+git-cat-file
+git-check-attr
+git-check-ref-format
+git-checkout
+git-checkout-index
+git-cherry
+git-cherry-pick
+git-clean
+git-clone
+git-commit
+git-commit-tree
+git-config
+git-count-objects
+git-cvsexportcommit
+git-cvsimport
+git-cvsserver
+git-daemon
+git-diff
+git-diff-files
+git-diff-index
+git-diff-tree
+git-difftool
+git-difftool--helper
+git-describe
+git-fast-export
+git-fast-import
+git-fetch
+git-fetch--tool
+git-fetch-pack
+git-filter-branch
+git-fmt-merge-msg
+git-for-each-ref
+git-format-patch
+git-fsck
+git-fsck-objects
+git-gc
+git-get-tar-commit-id
+git-grep
+git-hash-object
+git-help
+git-http-fetch
+git-http-push
+git-imap-send
+git-index-pack
+git-init
+git-init-db
+git-instaweb
+git-log
+git-lost-found
+git-ls-files
+git-ls-remote
+git-ls-tree
+git-mailinfo
+git-mailsplit
+git-merge
+git-merge-base
+git-merge-index
+git-merge-file
+git-merge-tree
+git-merge-octopus
+git-merge-one-file
+git-merge-ours
+git-merge-recursive
+git-merge-resolve
+git-merge-subtree
+git-mergetool
+git-mergetool--lib
+git-mktag
+git-mktree
+git-name-rev
+git-mv
+git-pack-redundant
+git-pack-objects
+git-pack-refs
+git-parse-remote
+git-patch-id
+git-peek-remote
+git-prune
+git-prune-packed
+git-pull
+git-push
+git-quiltimport
+git-read-tree
+git-rebase
+git-rebase--interactive
+git-receive-pack
+git-reflog
+git-relink
+git-remote
+git-repack
+git-repo-config
+git-request-pull
+git-rerere
+git-reset
+git-rev-list
+git-rev-parse
+git-revert
+git-rm
+git-send-email
+git-send-pack
+git-sh-setup
+git-shell
+git-shortlog
+git-show
+git-show-branch
+git-show-index
+git-show-ref
+git-stage
+git-stash
+git-status
+git-stripspace
+git-submodule
+git-svn
+git-symbolic-ref
+git-tag
+git-tar-tree
+git-unpack-file
+git-unpack-objects
+git-update-index
+git-update-ref
+git-update-server-info
+git-upload-archive
+git-upload-pack
+git-var
+git-verify-pack
+git-verify-tag
+git-web--browse
+git-whatchanged
+git-write-tree
+git-core-*/?*
+gitk-wish
+gitweb/gitweb.cgi
+test-chmtime
+test-ctype
+test-date
+test-delta
+test-dump-cache-tree
+test-genrandom
+test-match-trees
+test-parse-options
+test-path-utils
+test-sha1
+test-sigchain
+common-cmds.h
+*.tar.gz
+*.dsc
+*.deb
+git.spec
+*.exe
+*.[aos]
+*.py[co]
+config.mak
+autom4te.cache
+config.cache
+config.log
+config.status
+config.mak.autogen
+config.mak.append
+configure
+tags
+TAGS
+cscope*
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 6e0838b..11809b9 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -13,16 +13,9 @@ all::
 # Define NO_OPENSSL environment variable if you do not have OpenSSL.
 # This also implies MOZILLA_SHA1.
 #
-# Define NO_CURL if you do not have libcurl installed.  git-http-pull and
-# git-http-push are not built, and you cannot use http:// and https://
-# transports.
-#
 # Define CURLDIR=/foo/bar if your curl header and library files are in
 # /foo/bar/include and /foo/bar/lib directories.
 #
-# Define NO_EXPAT if you do not have expat installed.  git-http-push is
-# not built, and you cannot push using http:// and https:// transports.
-#
 # Define EXPATDIR=/foo/bar if your expat header and library files are in
 # /foo/bar/include and /foo/bar/lib directories.
 #
@@ -40,8 +33,6 @@ all::
 #
 # Define NO_MEMMEM if you don't have memmem.
 #
-# Define NO_STRLCPY if you don't have strlcpy.
-#
 # Define NO_STRTOUMAX if you don't have strtoumax in the C library.
 # If your compiler also does not support long long or does not have
 # strtoull, define NO_STRTOULL.
@@ -54,7 +45,7 @@ all::
 #
 # Define NO_SYS_SELECT_H if you don't have sys/select.h.
 #
-# Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link.
+# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
 # Enable it on Windows.  By default, symrefs are still used.
 #
 # Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
@@ -62,13 +53,13 @@ all::
 # but are not needed unless you plan to talk to SVN repos.
 #
 # Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
-# installed in /sw, but don't want GIT to link against any libraries
+# installed in /sw, but don't want PERF to link against any libraries
 # installed there.  If defined you may specify your own (or Fink's)
 # include directories and library directories by defining CFLAGS
 # and LDFLAGS appropriately.
 #
 # Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
-# have DarwinPorts installed in /opt/local, but don't want GIT to
+# have DarwinPorts installed in /opt/local, but don't want PERF to
 # link against any libraries installed there.  If defined you may
 # specify your own (or DarwinPort's) include directories and
 # library directories by defining CFLAGS and LDFLAGS appropriately.
@@ -120,7 +111,7 @@ all::
 # that tells runtime paths to dynamic libraries;
 # "-Wl,-rpath=/path/lib" is used instead.
 #
-# Define USE_NSEC below if you want git to care about sub-second file mtimes
+# Define USE_NSEC below if you want perf to care about sub-second file mtimes
 # and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
 # it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
 # randomly break unless your underlying filesystem supports those sub-second
@@ -132,7 +123,7 @@ all::
 # Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
 # available.  This automatically turns USE_NSEC off.
 #
-# Define USE_STDEV below if you want git to care about the underlying device
+# Define USE_STDEV below if you want perf to care about the underlying device
 # change being considered an inode change from the update-index perspective.
 #
 # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
@@ -150,27 +141,24 @@ all::
 # Define NO_TCLTK if you do not want Tcl/Tk GUI.
 #
 # The TCL_PATH variable governs the location of the Tcl interpreter
-# used to optimize git-gui for your system.  Only used if NO_TCLTK
+# used to optimize perf-gui for your system.  Only used if NO_TCLTK
 # is not set.  Defaults to the bare 'tclsh'.
 #
 # The TCLTK_PATH variable governs the location of the Tcl/Tk interpreter.
 # If not set it defaults to the bare 'wish'. If it is set to the empty
 # string then NO_TCLTK will be forced (this is used by configure script).
 #
-# Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit
-# parallel delta searching when packing objects.
-#
 # Define INTERNAL_QSORT to use Git's implementation of qsort(), which
 # is a simplified version of the merge sort used in glibc. This is
 # recommended if Git triggers O(n^2) behavior in your platform's qsort().
 #
-# Define NO_EXTERNAL_GREP if you don't want "git grep" to ever call
+# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
 # your external grep (e.g., if your system lacks grep, if its grep is
-# broken, or spawning external process is slower than built-in grep git has).
+# broken, or spawning external process is slower than built-in grep perf has).
 
-GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
-	@$(SHELL_PATH) ./GIT-VERSION-GEN
--include GIT-VERSION-FILE
+PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+	@$(SHELL_PATH) ./PERF-VERSION-GEN
+-include PERF-VERSION-FILE
 
 uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
 uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
@@ -182,20 +170,20 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
 CFLAGS = -g -O2 -Wall
-LDFLAGS =
+LDFLAGS = -lpthread -lrt
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
 STRIP ?= strip
 
 # Among the variables below, these:
-#   gitexecdir
+#   perfexecdir
 #   template_dir
 #   mandir
 #   infodir
 #   htmldir
-#   ETC_GITCONFIG (but not sysconfdir)
+#   ETC_PERFCONFIG (but not sysconfdir)
 # can be specified as a relative path some/where/else;
-# this is interpreted as relative to $(prefix) and "git" at
+# this is interpreted as relative to $(prefix) and "perf" at
 # runtime figures out where they are based on the path to the executable.
 # This can help installing the suite in a relocatable way.
 
@@ -204,38 +192,20 @@ bindir_relative = bin
 bindir = $(prefix)/$(bindir_relative)
 mandir = share/man
 infodir = share/info
-gitexecdir = libexec/git-core
+perfexecdir = libexec/perf-core
 sharedir = $(prefix)/share
-template_dir = share/git-core/templates
-htmldir = share/doc/git-doc
+template_dir = share/perf-core/templates
+htmldir = share/doc/perf-doc
 ifeq ($(prefix),/usr)
 sysconfdir = /etc
-ETC_GITCONFIG = $(sysconfdir)/gitconfig
+ETC_PERFCONFIG = $(sysconfdir)/perfconfig
 else
 sysconfdir = $(prefix)/etc
-ETC_GITCONFIG = etc/gitconfig
+ETC_PERFCONFIG = etc/perfconfig
 endif
 lib = lib
 # DESTDIR=
 
-# default configuration for gitweb
-GITWEB_CONFIG = gitweb_config.perl
-GITWEB_CONFIG_SYSTEM = /etc/gitweb.conf
-GITWEB_HOME_LINK_STR = projects
-GITWEB_SITENAME =
-GITWEB_PROJECTROOT = /pub/git
-GITWEB_PROJECT_MAXDEPTH = 2007
-GITWEB_EXPORT_OK =
-GITWEB_STRICT_EXPORT =
-GITWEB_BASE_URL =
-GITWEB_LIST =
-GITWEB_HOMETEXT = indextext.html
-GITWEB_CSS = gitweb.css
-GITWEB_LOGO = git-logo.png
-GITWEB_FAVICON = git-favicon.png
-GITWEB_SITE_HEADER =
-GITWEB_SITE_FOOTER =
-
 export prefix bindir sharedir sysconfdir
 
 CC = gcc
@@ -277,89 +247,46 @@ SCRIPT_PERL =
 SCRIPT_SH =
 TEST_PROGRAMS =
 
-SCRIPT_SH += git-am.sh
-SCRIPT_SH += git-bisect.sh
-SCRIPT_SH += git-difftool--helper.sh
-SCRIPT_SH += git-filter-branch.sh
-SCRIPT_SH += git-lost-found.sh
-SCRIPT_SH += git-merge-octopus.sh
-SCRIPT_SH += git-merge-one-file.sh
-SCRIPT_SH += git-merge-resolve.sh
-SCRIPT_SH += git-mergetool.sh
-SCRIPT_SH += git-mergetool--lib.sh
-SCRIPT_SH += git-parse-remote.sh
-SCRIPT_SH += git-pull.sh
-SCRIPT_SH += git-quiltimport.sh
-SCRIPT_SH += git-rebase--interactive.sh
-SCRIPT_SH += git-rebase.sh
-SCRIPT_SH += git-repack.sh
-SCRIPT_SH += git-request-pull.sh
-SCRIPT_SH += git-sh-setup.sh
-SCRIPT_SH += git-stash.sh
-SCRIPT_SH += git-submodule.sh
-SCRIPT_SH += git-web--browse.sh
-
-SCRIPT_PERL += git-add--interactive.perl
-SCRIPT_PERL += git-difftool.perl
-SCRIPT_PERL += git-archimport.perl
-SCRIPT_PERL += git-cvsexportcommit.perl
-SCRIPT_PERL += git-cvsimport.perl
-SCRIPT_PERL += git-cvsserver.perl
-SCRIPT_PERL += git-relink.perl
-SCRIPT_PERL += git-send-email.perl
-SCRIPT_PERL += git-svn.perl
+#
+# No scripts right now:
+#
+
+# SCRIPT_SH += perf-am.sh
+
+#
+# No Perl scripts right now:
+#
+
+# SCRIPT_PERL += perf-add--interactive.perl
 
 SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
-	  $(patsubst %.perl,%,$(SCRIPT_PERL)) \
-	  git-instaweb
+	  $(patsubst %.perl,%,$(SCRIPT_PERL))
 
 # Empty...
 EXTRA_PROGRAMS =
 
-# ... and all the rest that could be moved out of bindir to gitexecdir
+# ... and all the rest that could be moved out of bindir to perfexecdir
 PROGRAMS += $(EXTRA_PROGRAMS)
-PROGRAMS += git-fast-import$X
-PROGRAMS += git-hash-object$X
-PROGRAMS += git-index-pack$X
-PROGRAMS += git-merge-index$X
-PROGRAMS += git-merge-tree$X
-PROGRAMS += git-mktag$X
-PROGRAMS += git-mktree$X
-PROGRAMS += git-pack-redundant$X
-PROGRAMS += git-patch-id$X
-PROGRAMS += git-shell$X
-PROGRAMS += git-show-index$X
-PROGRAMS += git-unpack-file$X
-PROGRAMS += git-update-server-info$X
-PROGRAMS += git-upload-pack$X
-PROGRAMS += git-var$X
+
+#
+# None right now:
+#
+# PROGRAMS += perf-fast-import$X
 
 # List built-in command $C whose implementation cmd_$C() is not in
 # builtin-$C.o but is linked in as part of some other command.
-BUILT_INS += $(patsubst builtin-%.o,git-%$X,$(BUILTIN_OBJS))
-
-BUILT_INS += git-cherry$X
-BUILT_INS += git-cherry-pick$X
-BUILT_INS += git-format-patch$X
-BUILT_INS += git-fsck-objects$X
-BUILT_INS += git-get-tar-commit-id$X
-BUILT_INS += git-init$X
-BUILT_INS += git-merge-subtree$X
-BUILT_INS += git-peek-remote$X
-BUILT_INS += git-repo-config$X
-BUILT_INS += git-show$X
-BUILT_INS += git-stage$X
-BUILT_INS += git-status$X
-BUILT_INS += git-whatchanged$X
-
-# what 'all' will build and 'install' will install, in gitexecdir
+BUILT_INS += $(patsubst builtin-%.o,perf-%$X,$(BUILTIN_OBJS))
+
+#
+# None right now:
+#
+# BUILT_INS += perf-init $X
+
+# what 'all' will build and 'install' will install, in perfexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
 
-# what 'all' will build but not install in gitexecdir
-OTHER_PROGRAMS = git$X
-ifndef NO_PERL
-OTHER_PROGRAMS += gitweb/gitweb.cgi
-endif
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = perf$X
 
 # Set paths to tools early so that they can be used for version tests.
 ifndef SHELL_PATH
@@ -371,250 +298,34 @@ endif
 
 export PERL_PATH
 
-LIB_FILE=libgit.a
-XDIFF_LIB=xdiff/lib.a
-
-LIB_H += archive.h
-LIB_H += attr.h
-LIB_H += blob.h
-LIB_H += builtin.h
-LIB_H += cache.h
-LIB_H += cache-tree.h
-LIB_H += commit.h
-LIB_H += compat/cygwin.h
-LIB_H += compat/mingw.h
-LIB_H += csum-file.h
-LIB_H += decorate.h
-LIB_H += delta.h
-LIB_H += diffcore.h
-LIB_H += diff.h
-LIB_H += dir.h
-LIB_H += fsck.h
-LIB_H += git-compat-util.h
-LIB_H += graph.h
-LIB_H += grep.h
-LIB_H += hash.h
-LIB_H += help.h
+LIB_FILE=libperf.a
+
+LIB_H += ../../include/linux/perf_counter.h
 LIB_H += levenshtein.h
-LIB_H += list-objects.h
-LIB_H += ll-merge.h
-LIB_H += log-tree.h
-LIB_H += mailmap.h
-LIB_H += merge-recursive.h
-LIB_H += object.h
-LIB_H += pack.h
-LIB_H += pack-refs.h
-LIB_H += pack-revindex.h
 LIB_H += parse-options.h
-LIB_H += patch-ids.h
-LIB_H += pkt-line.h
-LIB_H += progress.h
 LIB_H += quote.h
-LIB_H += reflog-walk.h
-LIB_H += refs.h
-LIB_H += remote.h
-LIB_H += rerere.h
-LIB_H += revision.h
-LIB_H += run-command.h
-LIB_H += sha1-lookup.h
-LIB_H += sideband.h
-LIB_H += sigchain.h
 LIB_H += strbuf.h
-LIB_H += string-list.h
-LIB_H += tag.h
-LIB_H += transport.h
-LIB_H += tree.h
-LIB_H += tree-walk.h
-LIB_H += unpack-trees.h
-LIB_H += userdiff.h
-LIB_H += utf8.h
-LIB_H += wt-status.h
+LIB_H += run-command.h
 
 LIB_OBJS += abspath.o
 LIB_OBJS += alias.o
-LIB_OBJS += alloc.o
-LIB_OBJS += archive.o
-LIB_OBJS += archive-tar.o
-LIB_OBJS += archive-zip.o
-LIB_OBJS += attr.o
-LIB_OBJS += base85.o
-LIB_OBJS += bisect.o
-LIB_OBJS += blob.o
-LIB_OBJS += branch.o
-LIB_OBJS += bundle.o
-LIB_OBJS += cache-tree.o
-LIB_OBJS += color.o
-LIB_OBJS += combine-diff.o
-LIB_OBJS += commit.o
 LIB_OBJS += config.o
-LIB_OBJS += connect.o
-LIB_OBJS += convert.o
-LIB_OBJS += copy.o
-LIB_OBJS += csum-file.o
 LIB_OBJS += ctype.o
-LIB_OBJS += date.o
-LIB_OBJS += decorate.o
-LIB_OBJS += diffcore-break.o
-LIB_OBJS += diffcore-delta.o
-LIB_OBJS += diffcore-order.o
-LIB_OBJS += diffcore-pickaxe.o
-LIB_OBJS += diffcore-rename.o
-LIB_OBJS += diff-delta.o
-LIB_OBJS += diff-lib.o
-LIB_OBJS += diff-no-index.o
-LIB_OBJS += diff.o
-LIB_OBJS += dir.o
-LIB_OBJS += editor.o
-LIB_OBJS += entry.o
-LIB_OBJS += environment.o
 LIB_OBJS += exec_cmd.o
-LIB_OBJS += fsck.o
-LIB_OBJS += graph.o
-LIB_OBJS += grep.o
-LIB_OBJS += hash.o
 LIB_OBJS += help.o
-LIB_OBJS += ident.o
 LIB_OBJS += levenshtein.o
-LIB_OBJS += list-objects.o
-LIB_OBJS += ll-merge.o
-LIB_OBJS += lockfile.o
-LIB_OBJS += log-tree.o
-LIB_OBJS += mailmap.o
-LIB_OBJS += match-trees.o
-LIB_OBJS += merge-file.o
-LIB_OBJS += merge-recursive.o
-LIB_OBJS += name-hash.o
-LIB_OBJS += object.o
-LIB_OBJS += pack-check.o
-LIB_OBJS += pack-refs.o
-LIB_OBJS += pack-revindex.o
-LIB_OBJS += pack-write.o
-LIB_OBJS += pager.o
 LIB_OBJS += parse-options.o
-LIB_OBJS += patch-delta.o
-LIB_OBJS += patch-ids.o
 LIB_OBJS += path.o
-LIB_OBJS += pkt-line.o
-LIB_OBJS += preload-index.o
-LIB_OBJS += pretty.o
-LIB_OBJS += progress.o
-LIB_OBJS += quote.o
-LIB_OBJS += reachable.o
-LIB_OBJS += read-cache.o
-LIB_OBJS += reflog-walk.o
-LIB_OBJS += refs.o
-LIB_OBJS += remote.o
-LIB_OBJS += rerere.o
-LIB_OBJS += revision.o
 LIB_OBJS += run-command.o
-LIB_OBJS += server-info.o
-LIB_OBJS += setup.o
-LIB_OBJS += sha1-lookup.o
-LIB_OBJS += sha1_file.o
-LIB_OBJS += sha1_name.o
-LIB_OBJS += shallow.o
-LIB_OBJS += sideband.o
-LIB_OBJS += sigchain.o
+LIB_OBJS += quote.o
 LIB_OBJS += strbuf.o
-LIB_OBJS += string-list.o
-LIB_OBJS += symlinks.o
-LIB_OBJS += tag.o
-LIB_OBJS += trace.o
-LIB_OBJS += transport.o
-LIB_OBJS += tree-diff.o
-LIB_OBJS += tree.o
-LIB_OBJS += tree-walk.o
-LIB_OBJS += unpack-trees.o
 LIB_OBJS += usage.o
-LIB_OBJS += userdiff.o
-LIB_OBJS += utf8.o
-LIB_OBJS += walker.o
 LIB_OBJS += wrapper.o
-LIB_OBJS += write_or_die.o
-LIB_OBJS += ws.o
-LIB_OBJS += wt-status.o
-LIB_OBJS += xdiff-interface.o
-
-BUILTIN_OBJS += builtin-add.o
-BUILTIN_OBJS += builtin-annotate.o
-BUILTIN_OBJS += builtin-apply.o
-BUILTIN_OBJS += builtin-archive.o
-BUILTIN_OBJS += builtin-bisect--helper.o
-BUILTIN_OBJS += builtin-blame.o
-BUILTIN_OBJS += builtin-branch.o
-BUILTIN_OBJS += builtin-bundle.o
-BUILTIN_OBJS += builtin-cat-file.o
-BUILTIN_OBJS += builtin-check-attr.o
-BUILTIN_OBJS += builtin-check-ref-format.o
-BUILTIN_OBJS += builtin-checkout-index.o
-BUILTIN_OBJS += builtin-checkout.o
-BUILTIN_OBJS += builtin-clean.o
-BUILTIN_OBJS += builtin-clone.o
-BUILTIN_OBJS += builtin-commit-tree.o
-BUILTIN_OBJS += builtin-commit.o
-BUILTIN_OBJS += builtin-config.o
-BUILTIN_OBJS += builtin-count-objects.o
-BUILTIN_OBJS += builtin-describe.o
-BUILTIN_OBJS += builtin-diff-files.o
-BUILTIN_OBJS += builtin-diff-index.o
-BUILTIN_OBJS += builtin-diff-tree.o
-BUILTIN_OBJS += builtin-diff.o
-BUILTIN_OBJS += builtin-fast-export.o
-BUILTIN_OBJS += builtin-fetch--tool.o
-BUILTIN_OBJS += builtin-fetch-pack.o
-BUILTIN_OBJS += builtin-fetch.o
-BUILTIN_OBJS += builtin-fmt-merge-msg.o
-BUILTIN_OBJS += builtin-for-each-ref.o
-BUILTIN_OBJS += builtin-fsck.o
-BUILTIN_OBJS += builtin-gc.o
-BUILTIN_OBJS += builtin-grep.o
+
 BUILTIN_OBJS += builtin-help.o
-BUILTIN_OBJS += builtin-init-db.o
-BUILTIN_OBJS += builtin-log.o
-BUILTIN_OBJS += builtin-ls-files.o
-BUILTIN_OBJS += builtin-ls-remote.o
-BUILTIN_OBJS += builtin-ls-tree.o
-BUILTIN_OBJS += builtin-mailinfo.o
-BUILTIN_OBJS += builtin-mailsplit.o
-BUILTIN_OBJS += builtin-merge.o
-BUILTIN_OBJS += builtin-merge-base.o
-BUILTIN_OBJS += builtin-merge-file.o
-BUILTIN_OBJS += builtin-merge-ours.o
-BUILTIN_OBJS += builtin-merge-recursive.o
-BUILTIN_OBJS += builtin-mv.o
-BUILTIN_OBJS += builtin-name-rev.o
-BUILTIN_OBJS += builtin-pack-objects.o
-BUILTIN_OBJS += builtin-pack-refs.o
-BUILTIN_OBJS += builtin-prune-packed.o
-BUILTIN_OBJS += builtin-prune.o
-BUILTIN_OBJS += builtin-push.o
-BUILTIN_OBJS += builtin-read-tree.o
-BUILTIN_OBJS += builtin-receive-pack.o
-BUILTIN_OBJS += builtin-reflog.o
-BUILTIN_OBJS += builtin-remote.o
-BUILTIN_OBJS += builtin-rerere.o
-BUILTIN_OBJS += builtin-reset.o
-BUILTIN_OBJS += builtin-rev-list.o
-BUILTIN_OBJS += builtin-rev-parse.o
-BUILTIN_OBJS += builtin-revert.o
-BUILTIN_OBJS += builtin-rm.o
-BUILTIN_OBJS += builtin-send-pack.o
-BUILTIN_OBJS += builtin-shortlog.o
-BUILTIN_OBJS += builtin-show-branch.o
-BUILTIN_OBJS += builtin-show-ref.o
-BUILTIN_OBJS += builtin-stripspace.o
-BUILTIN_OBJS += builtin-symbolic-ref.o
-BUILTIN_OBJS += builtin-tag.o
-BUILTIN_OBJS += builtin-tar-tree.o
-BUILTIN_OBJS += builtin-unpack-objects.o
-BUILTIN_OBJS += builtin-update-index.o
-BUILTIN_OBJS += builtin-update-ref.o
-BUILTIN_OBJS += builtin-upload-archive.o
-BUILTIN_OBJS += builtin-verify-pack.o
-BUILTIN_OBJS += builtin-verify-tag.o
-BUILTIN_OBJS += builtin-write-tree.o
-
-GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
+BUILTIN_OBJS += builtin-top.o
+
+PERFLIBS = $(LIB_FILE)
 EXTLIBS =
 
 #
@@ -625,221 +336,6 @@ EXTLIBS =
 # because maintaining the nesting to match is a pain.  If
 # we had "elif" things would have been much nicer...
 
-ifeq ($(uname_S),Linux)
-	NO_STRLCPY = YesPlease
-	THREADED_DELTA_SEARCH = YesPlease
-endif
-ifeq ($(uname_S),GNU/kFreeBSD)
-	NO_STRLCPY = YesPlease
-	THREADED_DELTA_SEARCH = YesPlease
-endif
-ifeq ($(uname_S),UnixWare)
-	CC = cc
-	NEEDS_SOCKET = YesPlease
-	NEEDS_NSL = YesPlease
-	NEEDS_SSL_WITH_CRYPTO = YesPlease
-	NEEDS_LIBICONV = YesPlease
-	SHELL_PATH = /usr/local/bin/bash
-	NO_IPV6 = YesPlease
-	NO_HSTRERROR = YesPlease
-	BASIC_CFLAGS += -Kthread
-	BASIC_CFLAGS += -I/usr/local/include
-	BASIC_LDFLAGS += -L/usr/local/lib
-	INSTALL = ginstall
-	TAR = gtar
-	NO_STRCASESTR = YesPlease
-	NO_MEMMEM = YesPlease
-endif
-ifeq ($(uname_S),SCO_SV)
-	ifeq ($(uname_R),3.2)
-		CFLAGS = -O2
-	endif
-	ifeq ($(uname_R),5)
-		CC = cc
-		BASIC_CFLAGS += -Kthread
-	endif
-	NEEDS_SOCKET = YesPlease
-	NEEDS_NSL = YesPlease
-	NEEDS_SSL_WITH_CRYPTO = YesPlease
-	NEEDS_LIBICONV = YesPlease
-	SHELL_PATH = /usr/bin/bash
-	NO_IPV6 = YesPlease
-	NO_HSTRERROR = YesPlease
-	BASIC_CFLAGS += -I/usr/local/include
-	BASIC_LDFLAGS += -L/usr/local/lib
-	NO_STRCASESTR = YesPlease
-	NO_MEMMEM = YesPlease
-	INSTALL = ginstall
-	TAR = gtar
-endif
-ifeq ($(uname_S),Darwin)
-	NEEDS_SSL_WITH_CRYPTO = YesPlease
-	NEEDS_LIBICONV = YesPlease
-	ifeq ($(shell expr "$(uname_R)" : '[15678]\.'),2)
-		OLD_ICONV = UnfortunatelyYes
-	endif
-	ifeq ($(shell expr "$(uname_R)" : '[15]\.'),2)
-		NO_STRLCPY = YesPlease
-	endif
-	NO_MEMMEM = YesPlease
-	THREADED_DELTA_SEARCH = YesPlease
-	USE_ST_TIMESPEC = YesPlease
-endif
-ifeq ($(uname_S),SunOS)
-	NEEDS_SOCKET = YesPlease
-	NEEDS_NSL = YesPlease
-	SHELL_PATH = /bin/bash
-	NO_STRCASESTR = YesPlease
-	NO_MEMMEM = YesPlease
-	NO_HSTRERROR = YesPlease
-	NO_MKDTEMP = YesPlease
-	OLD_ICONV = UnfortunatelyYes
-	ifeq ($(uname_R),5.8)
-		NO_UNSETENV = YesPlease
-		NO_SETENV = YesPlease
-		NO_C99_FORMAT = YesPlease
-		NO_STRTOUMAX = YesPlease
-	endif
-	ifeq ($(uname_R),5.9)
-		NO_UNSETENV = YesPlease
-		NO_SETENV = YesPlease
-		NO_C99_FORMAT = YesPlease
-		NO_STRTOUMAX = YesPlease
-	endif
-	INSTALL = ginstall
-	TAR = gtar
-	BASIC_CFLAGS += -D__EXTENSIONS__
-endif
-ifeq ($(uname_O),Cygwin)
-	NO_D_TYPE_IN_DIRENT = YesPlease
-	NO_D_INO_IN_DIRENT = YesPlease
-	NO_STRCASESTR = YesPlease
-	NO_MEMMEM = YesPlease
-	NO_SYMLINK_HEAD = YesPlease
-	NEEDS_LIBICONV = YesPlease
-	NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
-	NO_TRUSTABLE_FILEMODE = UnfortunatelyYes
-	OLD_ICONV = UnfortunatelyYes
-	# There are conflicting reports about this.
-	# On some boxes NO_MMAP is needed, and not so elsewhere.
-	# Try commenting this out if you suspect MMAP is more efficient
-	NO_MMAP = YesPlease
-	NO_IPV6 = YesPlease
-	X = .exe
-endif
-ifeq ($(uname_S),FreeBSD)
-	NEEDS_LIBICONV = YesPlease
-	NO_MEMMEM = YesPlease
-	BASIC_CFLAGS += -I/usr/local/include
-	BASIC_LDFLAGS += -L/usr/local/lib
-	DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease
-	USE_ST_TIMESPEC = YesPlease
-	THREADED_DELTA_SEARCH = YesPlease
-	ifeq ($(shell expr "$(uname_R)" : '4\.'),2)
-		PTHREAD_LIBS = -pthread
-		NO_UINTMAX_T = YesPlease
-		NO_STRTOUMAX = YesPlease
-	endif
-endif
-ifeq ($(uname_S),OpenBSD)
-	NO_STRCASESTR = YesPlease
-	NO_MEMMEM = YesPlease
-	NEEDS_LIBICONV = YesPlease
-	BASIC_CFLAGS += -I/usr/local/include
-	BASIC_LDFLAGS += -L/usr/local/lib
-	THREADED_DELTA_SEARCH = YesPlease
-endif
-ifeq ($(uname_S),NetBSD)
-	ifeq ($(shell expr "$(uname_R)" : '[01]\.'),2)
-		NEEDS_LIBICONV = YesPlease
-	endif
-	BASIC_CFLAGS += -I/usr/pkg/include
-	BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib
-	THREADED_DELTA_SEARCH = YesPlease
-endif
-ifeq ($(uname_S),AIX)
-	NO_STRCASESTR=YesPlease
-	NO_MEMMEM = YesPlease
-	NO_MKDTEMP = YesPlease
-	NO_STRLCPY = YesPlease
-	NO_NSEC = YesPlease
-	FREAD_READS_DIRECTORIES = UnfortunatelyYes
-	INTERNAL_QSORT = UnfortunatelyYes
-	NEEDS_LIBICONV=YesPlease
-	BASIC_CFLAGS += -D_LARGE_FILES
-	ifneq ($(shell expr "$(uname_V)" : '[1234]'),1)
-		THREADED_DELTA_SEARCH = YesPlease
-	else
-		NO_PTHREADS = YesPlease
-	endif
-endif
-ifeq ($(uname_S),GNU)
-	# GNU/Hurd
-	NO_STRLCPY=YesPlease
-endif
-ifeq ($(uname_S),IRIX64)
-	NO_IPV6=YesPlease
-	NO_SETENV=YesPlease
-	NO_STRCASESTR=YesPlease
-	NO_MEMMEM = YesPlease
-	NO_STRLCPY = YesPlease
-	NO_SOCKADDR_STORAGE=YesPlease
-	SHELL_PATH=/usr/gnu/bin/bash
-	BASIC_CFLAGS += -DPATH_MAX=1024
-	# for now, build 32-bit version
-	BASIC_LDFLAGS += -L/usr/lib32
-endif
-ifeq ($(uname_S),HP-UX)
-	NO_IPV6=YesPlease
-	NO_SETENV=YesPlease
-	NO_STRCASESTR=YesPlease
-	NO_MEMMEM = YesPlease
-	NO_STRLCPY = YesPlease
-	NO_MKDTEMP = YesPlease
-	NO_UNSETENV = YesPlease
-	NO_HSTRERROR = YesPlease
-	NO_SYS_SELECT_H = YesPlease
-	SNPRINTF_RETURNS_BOGUS = YesPlease
-endif
-ifneq (,$(findstring CYGWIN,$(uname_S)))
-	COMPAT_OBJS += compat/cygwin.o
-endif
-ifneq (,$(findstring MINGW,$(uname_S)))
-	NO_PREAD = YesPlease
-	NO_OPENSSL = YesPlease
-	NO_CURL = YesPlease
-	NO_SYMLINK_HEAD = YesPlease
-	NO_IPV6 = YesPlease
-	NO_SETENV = YesPlease
-	NO_UNSETENV = YesPlease
-	NO_STRCASESTR = YesPlease
-	NO_STRLCPY = YesPlease
-	NO_MEMMEM = YesPlease
-	NO_PTHREADS = YesPlease
-	NEEDS_LIBICONV = YesPlease
-	OLD_ICONV = YesPlease
-	NO_C99_FORMAT = YesPlease
-	NO_STRTOUMAX = YesPlease
-	NO_MKDTEMP = YesPlease
-	SNPRINTF_RETURNS_BOGUS = YesPlease
-	NO_SVN_TESTS = YesPlease
-	NO_PERL_MAKEMAKER = YesPlease
-	RUNTIME_PREFIX = YesPlease
-	NO_POSIX_ONLY_PROGRAMS = YesPlease
-	NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
-	NO_NSEC = YesPlease
-	USE_WIN32_MMAP = YesPlease
-	COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/regex -Icompat/fnmatch
-	COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1
-	COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
-	COMPAT_OBJS += compat/mingw.o compat/fnmatch/fnmatch.o compat/regex/regex.o compat/winansi.o
-	EXTLIBS += -lws2_32
-	X = .exe
-endif
-ifneq (,$(findstring arm,$(uname_M)))
-	ARM_SHA1 = YesPlease
-endif
-
 -include config.mak.autogen
 -include config.mak
 
@@ -869,72 +365,12 @@ ifndef CC_LD_DYNPATH
 	endif
 endif
 
-ifdef NO_CURL
-	BASIC_CFLAGS += -DNO_CURL
-else
-	ifdef CURLDIR
-		# Try "-Wl,-rpath=$(CURLDIR)/$(lib)" in such a case.
-		BASIC_CFLAGS += -I$(CURLDIR)/include
-		CURL_LIBCURL = -L$(CURLDIR)/$(lib) $(CC_LD_DYNPATH)$(CURLDIR)/$(lib) -lcurl
-	else
-		CURL_LIBCURL = -lcurl
-	endif
-	BUILTIN_OBJS += builtin-http-fetch.o
-	EXTLIBS += $(CURL_LIBCURL)
-	LIB_OBJS += http.o http-walker.o
-	curl_check := $(shell (echo 070908; curl-config --vernum) | sort -r | sed -ne 2p)
-	ifeq "$(curl_check)" "070908"
-		ifndef NO_EXPAT
-			PROGRAMS += git-http-push$X
-		endif
-	endif
-	ifndef NO_EXPAT
-		ifdef EXPATDIR
-			BASIC_CFLAGS += -I$(EXPATDIR)/include
-			EXPAT_LIBEXPAT = -L$(EXPATDIR)/$(lib) $(CC_LD_DYNPATH)$(EXPATDIR)/$(lib) -lexpat
-		else
-			EXPAT_LIBEXPAT = -lexpat
-		endif
-	endif
-endif
-
 ifdef ZLIB_PATH
 	BASIC_CFLAGS += -I$(ZLIB_PATH)/include
 	EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib)
 endif
 EXTLIBS += -lz
 
-ifndef NO_POSIX_ONLY_PROGRAMS
-	PROGRAMS += git-daemon$X
-	PROGRAMS += git-imap-send$X
-endif
-ifndef NO_OPENSSL
-	OPENSSL_LIBSSL = -lssl
-	ifdef OPENSSLDIR
-		BASIC_CFLAGS += -I$(OPENSSLDIR)/include
-		OPENSSL_LINK = -L$(OPENSSLDIR)/$(lib) $(CC_LD_DYNPATH)$(OPENSSLDIR)/$(lib)
-	else
-		OPENSSL_LINK =
-	endif
-else
-	BASIC_CFLAGS += -DNO_OPENSSL
-	MOZILLA_SHA1 = 1
-	OPENSSL_LIBSSL =
-endif
-ifdef NEEDS_SSL_WITH_CRYPTO
-	LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto -lssl
-else
-	LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto
-endif
-ifdef NEEDS_LIBICONV
-	ifdef ICONVDIR
-		BASIC_CFLAGS += -I$(ICONVDIR)/include
-		ICONV_LINK = -L$(ICONVDIR)/$(lib) $(CC_LD_DYNPATH)$(ICONVDIR)/$(lib)
-	else
-		ICONV_LINK =
-	endif
-	EXTLIBS += $(ICONV_LINK) -liconv
-endif
 ifdef NEEDS_SOCKET
 	EXTLIBS += -lsocket
 endif
@@ -977,10 +413,6 @@ ifdef NO_STRCASESTR
 	COMPAT_CFLAGS += -DNO_STRCASESTR
 	COMPAT_OBJS += compat/strcasestr.o
 endif
-ifdef NO_STRLCPY
-	COMPAT_CFLAGS += -DNO_STRLCPY
-	COMPAT_OBJS += compat/strlcpy.o
-endif
 ifdef NO_STRTOUMAX
 	COMPAT_CFLAGS += -DNO_STRTOUMAX
 	COMPAT_OBJS += compat/strtoumax.o
@@ -1090,17 +522,6 @@ ifdef RUNTIME_PREFIX
 	COMPAT_CFLAGS += -DRUNTIME_PREFIX
 endif
 
-ifdef NO_PTHREADS
-	THREADED_DELTA_SEARCH =
-	BASIC_CFLAGS += -DNO_PTHREADS
-else
-	EXTLIBS += $(PTHREAD_LIBS)
-endif
-
-ifdef THREADED_DELTA_SEARCH
-	BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH
-	LIB_OBJS += thread-utils.o
-endif
 ifdef DIR_HAS_BSD_GROUP_SEMANTICS
 	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
 endif
@@ -1148,14 +569,14 @@ endif
 # Shell quote (do not use $(call) to accommodate ancient setups);
 
 SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
-ETC_GITCONFIG_SQ = $(subst ','\'',$(ETC_GITCONFIG))
+ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
 
 DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
 bindir_SQ = $(subst ','\'',$(bindir))
 bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
 mandir_SQ = $(subst ','\'',$(mandir))
 infodir_SQ = $(subst ','\'',$(infodir))
-gitexecdir_SQ = $(subst ','\'',$(gitexecdir))
+perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
 template_dir_SQ = $(subst ','\'',$(template_dir))
 htmldir_SQ = $(subst ','\'',$(htmldir))
 prefix_SQ = $(subst ','\'',$(prefix))
@@ -1164,7 +585,7 @@ SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
 TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_PATH))
 
-LIBS = $(GITLIBS) $(EXTLIBS)
+LIBS = $(PERFLIBS) $(EXTLIBS)
 
 BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
 	$(COMPAT_CFLAGS)
@@ -1180,15 +601,15 @@ export TAR INSTALL DESTDIR SHELL_PATH
 
 SHELL = $(SHELL_PATH)
 
-all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS
+all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS
 ifneq (,$X)
-	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
 endif
 
 all::
 ifndef NO_TCLTK
-	$(QUIET_SUBDIR0)git-gui $(QUIET_SUBDIR1) gitexecdir='$(gitexec_instdir_SQ)' all
-	$(QUIET_SUBDIR0)gitk-git $(QUIET_SUBDIR1) all
+	$(QUIET_SUBDIR0)perf-gui $(QUIET_SUBDIR1) perfexecdir='$(perfexec_instdir_SQ)' all
+	$(QUIET_SUBDIR0)perfk-perf $(QUIET_SUBDIR1) all
 endif
 ifndef NO_PERL
 	$(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' all
@@ -1200,33 +621,33 @@ please_set_SHELL_PATH_to_a_more_modern_shell:
 
 shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
 
-strip: $(PROGRAMS) git$X
-	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) git$X
+strip: $(PROGRAMS) perf$X
+	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X
 
-git.o: git.c common-cmds.h GIT-CFLAGS
-	$(QUIET_CC)$(CC) -DGIT_VERSION='"$(GIT_VERSION)"' \
-		'-DGIT_HTML_PATH="$(htmldir_SQ)"' \
+perf.o: perf.c common-cmds.h PERF-CFLAGS
+	$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
 		$(ALL_CFLAGS) -c $(filter %.c,$^)
 
-git$X: git.o $(BUILTIN_OBJS) $(GITLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ git.o \
+perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \
 		$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
 
-builtin-help.o: builtin-help.c common-cmds.h GIT-CFLAGS
+builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
-		'-DGIT_HTML_PATH="$(htmldir_SQ)"' \
-		'-DGIT_MAN_PATH="$(mandir_SQ)"' \
-		'-DGIT_INFO_PATH="$(infodir_SQ)"' $<
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
+		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
 
-$(BUILT_INS): git$X
+$(BUILT_INS): perf$X
 	$(QUIET_BUILT_IN)$(RM) $@ && \
-	ln git$X $@ 2>/dev/null || \
-	ln -s git$X $@ 2>/dev/null || \
-	cp git$X $@
+	ln perf$X $@ 2>/dev/null || \
+	ln -s perf$X $@ 2>/dev/null || \
+	cp perf$X $@
 
 common-cmds.h: ./generate-cmdlist.sh command-list.txt
 
-common-cmds.h: $(wildcard Documentation/git-*.txt)
+common-cmds.h: $(wildcard Documentation/perf-*.txt)
 	$(QUIET_GEN)./generate-cmdlist.sh > $@+ && mv $@+ $@
 
 $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
@@ -1234,152 +655,55 @@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
 	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
 	    -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
 	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
-	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
+	    -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
 	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
 	    $@.sh >$@+ && \
 	chmod +x $@+ && \
 	mv $@+ $@
 
-ifndef NO_PERL
-$(patsubst %.perl,%,$(SCRIPT_PERL)): perl/perl.mak
-
-perl/perl.mak: GIT-CFLAGS perl/Makefile perl/Makefile.PL
-	$(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' $(@F)
-
-$(patsubst %.perl,%,$(SCRIPT_PERL)): % : %.perl
-	$(QUIET_GEN)$(RM) $@ $@+ && \
-	INSTLIBDIR=`MAKEFLAGS= $(MAKE) -C perl -s --no-print-directory instlibdir` && \
-	sed -e '1{' \
-	    -e '	s|#!.*perl|#!$(PERL_PATH_SQ)|' \
-	    -e '	h' \
-	    -e '	s=.*=use lib (split(/:/, $$ENV{GITPERLLIB} || "@@INSTLIBDIR@@"));=' \
-	    -e '	H' \
-	    -e '	x' \
-	    -e '}' \
-	    -e 's|@@INSTLIBDIR@@|'"$$INSTLIBDIR"'|g' \
-	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
-	    $@.perl >$@+ && \
-	chmod +x $@+ && \
-	mv $@+ $@
-
-gitweb/gitweb.cgi: gitweb/gitweb.perl
-	$(QUIET_GEN)$(RM) $@ $@+ && \
-	sed -e '1s|#!.*perl|#!$(PERL_PATH_SQ)|' \
-	    -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \
-	    -e 's|++GIT_BINDIR++|$(bindir)|g' \
-	    -e 's|++GITWEB_CONFIG++|$(GITWEB_CONFIG)|g' \
-	    -e 's|++GITWEB_CONFIG_SYSTEM++|$(GITWEB_CONFIG_SYSTEM)|g' \
-	    -e 's|++GITWEB_HOME_LINK_STR++|$(GITWEB_HOME_LINK_STR)|g' \
-	    -e 's|++GITWEB_SITENAME++|$(GITWEB_SITENAME)|g' \
-	    -e 's|++GITWEB_PROJECTROOT++|$(GITWEB_PROJECTROOT)|g' \
-	    -e 's|"++GITWEB_PROJECT_MAXDEPTH++"|$(GITWEB_PROJECT_MAXDEPTH)|g' \
-	    -e 's|++GITWEB_EXPORT_OK++|$(GITWEB_EXPORT_OK)|g' \
-	    -e 's|++GITWEB_STRICT_EXPORT++|$(GITWEB_STRICT_EXPORT)|g' \
-	    -e 's|++GITWEB_BASE_URL++|$(GITWEB_BASE_URL)|g' \
-	    -e 's|++GITWEB_LIST++|$(GITWEB_LIST)|g' \
-	    -e 's|++GITWEB_HOMETEXT++|$(GITWEB_HOMETEXT)|g' \
-	    -e 's|++GITWEB_CSS++|$(GITWEB_CSS)|g' \
-	    -e 's|++GITWEB_LOGO++|$(GITWEB_LOGO)|g' \
-	    -e 's|++GITWEB_FAVICON++|$(GITWEB_FAVICON)|g' \
-	    -e 's|++GITWEB_SITE_HEADER++|$(GITWEB_SITE_HEADER)|g' \
-	    -e 's|++GITWEB_SITE_FOOTER++|$(GITWEB_SITE_FOOTER)|g' \
-	    $< >$@+ && \
-	chmod +x $@+ && \
-	mv $@+ $@
-
-git-instaweb: git-instaweb.sh gitweb/gitweb.cgi gitweb/gitweb.css
-	$(QUIET_GEN)$(RM) $@ $@+ && \
-	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
-	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
-	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
-	    -e '/@@GITWEB_CGI@@/r gitweb/gitweb.cgi' \
-	    -e '/@@GITWEB_CGI@@/d' \
-	    -e '/@@GITWEB_CSS@@/r gitweb/gitweb.css' \
-	    -e '/@@GITWEB_CSS@@/d' \
-	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
-	    $@.sh > $@+ && \
-	chmod +x $@+ && \
-	mv $@+ $@
-else # NO_PERL
-$(patsubst %.perl,%,$(SCRIPT_PERL)) git-instaweb: % : unimplemented.sh
-	$(QUIET_GEN)$(RM) $@ $@+ && \
-	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
-	    -e 's|@@REASON@@|NO_PERL=$(NO_PERL)|g' \
-	    unimplemented.sh >$@+ && \
-	chmod +x $@+ && \
-	mv $@+ $@
-endif # NO_PERL
-
 configure: configure.ac
 	$(QUIET_GEN)$(RM) $@ $<+ && \
-	sed -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
+	sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
 	    $< > $<+ && \
 	autoconf -o $@ $<+ && \
 	$(RM) $<+
 
-# These can record GIT_VERSION
-git.o git.spec \
+# These can record PERF_VERSION
+perf.o perf.spec \
 	$(patsubst %.sh,%,$(SCRIPT_SH)) \
 	$(patsubst %.perl,%,$(SCRIPT_PERL)) \
-	: GIT-VERSION-FILE
+	: PERF-VERSION-FILE
 
-%.o: %.c GIT-CFLAGS
+%.o: %.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
-%.s: %.c GIT-CFLAGS
+%.s: %.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
 %.o: %.S
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
 
-exec_cmd.o: exec_cmd.c GIT-CFLAGS
+exec_cmd.o: exec_cmd.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
-		'-DGIT_EXEC_PATH="$(gitexecdir_SQ)"' \
+		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
 		'-DBINDIR="$(bindir_relative_SQ)"' \
 		'-DPREFIX="$(prefix_SQ)"' \
 		$<
 
-builtin-init-db.o: builtin-init-db.c GIT-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
-
-config.o: config.c GIT-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"' $<
+builtin-init-db.o: builtin-init-db.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
 
-http.o: http.c GIT-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DGIT_USER_AGENT='"git/$(GIT_VERSION)"' $<
+config.o: config.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
-ifdef NO_EXPAT
-http-walker.o: http-walker.c http.h GIT-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DNO_EXPAT $<
-endif
-
-git-%$X: %.o $(GITLIBS)
+perf-%$X: %.o $(PERFLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
-git-imap-send$X: imap-send.o $(GITLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
-		$(LIBS) $(OPENSSL_LINK) $(OPENSSL_LIBSSL)
-
-http.o http-walker.o http-push.o transport.o: http.h
-
-git-http-push$X: revision.o http.o http-push.o $(GITLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
-		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
-
 $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
 builtin-revert.o wt-status.o: wt-status.h
 
 $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
 
-XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
-	xdiff/xmerge.o xdiff/xpatience.o
-$(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \
-	xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h
-
-$(XDIFF_LIB): $(XDIFF_OBJS)
-	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS)
-
-
 doc:
 	$(MAKE) -C Documentation all
 
@@ -1409,19 +733,19 @@ cscope:
 
 ### Detect prefix changes
 TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
-             $(bindir_SQ):$(gitexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
+             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
 
-GIT-CFLAGS: .FORCE-GIT-CFLAGS
+PERF-CFLAGS: .FORCE-PERF-CFLAGS
 	@FLAGS='$(TRACK_CFLAGS)'; \
-	    if test x"$$FLAGS" != x"`cat GIT-CFLAGS 2>/dev/null`" ; then \
+	    if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \
 		echo 1>&2 "    * new build flags or prefix"; \
-		echo "$$FLAGS" >GIT-CFLAGS; \
+		echo "$$FLAGS" >PERF-CFLAGS; \
             fi
 
 # We need to apply sq twice, once to protect from the shell
-# that runs GIT-BUILD-OPTIONS, and then again to protect it
+# that runs PERF-BUILD-OPTIONS, and then again to protect it
 # and the first level quoting from the shell that runs "echo".
-GIT-BUILD-OPTIONS: .FORCE-GIT-BUILD-OPTIONS
+PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
 	@echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
 	@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
 	@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
@@ -1431,14 +755,14 @@ GIT-BUILD-OPTIONS: .FORCE-GIT-BUILD-OPTIONS
 ifndef NO_TCLTK
 TRACK_VARS = $(subst ','\'',-DTCLTK_PATH='$(TCLTK_PATH_SQ)')
 
-GIT-GUI-VARS: .FORCE-GIT-GUI-VARS
+PERF-GUI-VARS: .FORCE-PERF-GUI-VARS
 	@VARS='$(TRACK_VARS)'; \
 	    if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \
 		echo 1>&2 "    * new Tcl/Tk interpreter location"; \
 		echo "$$VARS" >$@; \
             fi
 
-.PHONY: .FORCE-GIT-GUI-VARS
+.PHONY: .FORCE-PERF-GUI-VARS
 endif
 
 ### Testing rules
@@ -1476,7 +800,7 @@ test-parse-options$X: parse-options.o
 
 .PRECIOUS: $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS))
 
-test-%$X: test-%.o $(GITLIBS)
+test-%$X: test-%.o $(PERFLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
 check-sha1:: test-sha1$X
@@ -1506,40 +830,40 @@ template_instdir = $(prefix)/$(template_dir)
 endif
 export template_instdir
 
-ifneq ($(filter /%,$(firstword $(gitexecdir))),)
-gitexec_instdir = $(gitexecdir)
+ifneq ($(filter /%,$(firstword $(perfexecdir))),)
+perfexec_instdir = $(perfexecdir)
 else
-gitexec_instdir = $(prefix)/$(gitexecdir)
+perfexec_instdir = $(prefix)/$(perfexecdir)
 endif
-gitexec_instdir_SQ = $(subst ','\'',$(gitexec_instdir))
-export gitexec_instdir
+perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+export perfexec_instdir
 
 install: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(gitexec_instdir_SQ)'
-	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)'
-	$(INSTALL) git$X git-upload-pack$X git-receive-pack$X git-upload-archive$X git-shell$X git-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(INSTALL) perf$X perf-upload-pack$X perf-receive-pack$X perf-upload-archive$X perf-shell$X perf-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install
 	$(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install
 ifndef NO_TCLTK
-	$(MAKE) -C gitk-git install
-	$(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install
+	$(MAKE) -C perfk-perf install
+	$(MAKE) -C perf-gui perfexecdir='$(perfexec_instdir_SQ)' install
 endif
 ifneq (,$X)
-	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
 endif
 	bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \
-	execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \
-	{ $(RM) "$$execdir/git-add$X" && \
-		ln "$$bindir/git$X" "$$execdir/git-add$X" 2>/dev/null || \
-		cp "$$bindir/git$X" "$$execdir/git-add$X"; } && \
-	{ for p in $(filter-out git-add$X,$(BUILT_INS)); do \
+	execdir=$$(cd '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' && pwd) && \
+	{ $(RM) "$$execdir/perf-add$X" && \
+		ln "$$bindir/perf$X" "$$execdir/perf-add$X" 2>/dev/null || \
+		cp "$$bindir/perf$X" "$$execdir/perf-add$X"; } && \
+	{ for p in $(filter-out perf-add$X,$(BUILT_INS)); do \
 		$(RM) "$$execdir/$$p" && \
-		ln "$$execdir/git-add$X" "$$execdir/$$p" 2>/dev/null || \
-		ln -s "git-add$X" "$$execdir/$$p" 2>/dev/null || \
-		cp "$$execdir/git-add$X" "$$execdir/$$p" || exit; \
+		ln "$$execdir/perf-add$X" "$$execdir/$$p" 2>/dev/null || \
+		ln -s "perf-add$X" "$$execdir/$$p" 2>/dev/null || \
+		cp "$$execdir/perf-add$X" "$$execdir/$$p" || exit; \
 	  done } && \
-	./check_bindir "z$$bindir" "z$$execdir" "$$bindir/git-add$X"
+	./check_bindir "z$$bindir" "z$$execdir" "$$bindir/perf-add$X"
 
 install-doc:
 	$(MAKE) -C Documentation install
@@ -1569,31 +893,31 @@ quick-install-html:
 
 ### Maintainer's dist rules
 
-git.spec: git.spec.in
-	sed -e 's/@@VERSION@@/$(GIT_VERSION)/g' < $< > $@+
+perf.spec: perf.spec.in
+	sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
 	mv $@+ $@
 
-GIT_TARNAME=git-$(GIT_VERSION)
-dist: git.spec git-archive$(X) configure
-	./git-archive --format=tar \
-		--prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
-	@mkdir -p $(GIT_TARNAME)
-	@cp git.spec configure $(GIT_TARNAME)
-	@echo $(GIT_VERSION) > $(GIT_TARNAME)/version
-	@$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version
-	$(TAR) rf $(GIT_TARNAME).tar \
-		$(GIT_TARNAME)/git.spec \
-		$(GIT_TARNAME)/configure \
-		$(GIT_TARNAME)/version \
-		$(GIT_TARNAME)/git-gui/version
-	@$(RM) -r $(GIT_TARNAME)
-	gzip -f -9 $(GIT_TARNAME).tar
+PERF_TARNAME=perf-$(PERF_VERSION)
+dist: perf.spec perf-archive$(X) configure
+	./perf-archive --format=tar \
+		--prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
+	@mkdir -p $(PERF_TARNAME)
+	@cp perf.spec configure $(PERF_TARNAME)
+	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
+	@$(MAKE) -C perf-gui TARDIR=../$(PERF_TARNAME)/perf-gui dist-version
+	$(TAR) rf $(PERF_TARNAME).tar \
+		$(PERF_TARNAME)/perf.spec \
+		$(PERF_TARNAME)/configure \
+		$(PERF_TARNAME)/version \
+		$(PERF_TARNAME)/perf-gui/version
+	@$(RM) -r $(PERF_TARNAME)
+	gzip -f -9 $(PERF_TARNAME).tar
 
 rpm: dist
-	$(RPMBUILD) -ta $(GIT_TARNAME).tar.gz
+	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
 
-htmldocs = git-htmldocs-$(GIT_VERSION)
-manpages = git-manpages-$(GIT_VERSION)
+htmldocs = perf-htmldocs-$(PERF_VERSION)
+manpages = perf-manpages-$(PERF_VERSION)
 dist-doc:
 	$(RM) -r .doc-tmp-dir
 	mkdir .doc-tmp-dir
@@ -1618,51 +942,46 @@ distclean: clean
 	$(RM) configure
 
 clean:
-	$(RM) *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \
-		$(LIB_FILE) $(XDIFF_LIB)
-	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) git$X
+	$(RM) *.o $(LIB_FILE)
+	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
 	$(RM) -r autom4te.cache
 	$(RM) config.log config.mak.autogen config.mak.append config.status config.cache
-	$(RM) -r $(GIT_TARNAME) .doc-tmp-dir
-	$(RM) $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz
+	$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
+	$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
 	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
 	$(MAKE) -C Documentation/ clean
-ifndef NO_PERL
-	$(RM) gitweb/gitweb.cgi
-	$(MAKE) -C perl clean
-endif
 	$(MAKE) -C templates/ clean
 	$(MAKE) -C t/ clean
 ifndef NO_TCLTK
-	$(MAKE) -C gitk-git clean
-	$(MAKE) -C git-gui clean
+	$(MAKE) -C perfk-perf clean
+	$(MAKE) -C perf-gui clean
 endif
-	$(RM) GIT-VERSION-FILE GIT-CFLAGS GIT-GUI-VARS GIT-BUILD-OPTIONS
+	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-GUI-VARS PERF-BUILD-OPTIONS
 
 .PHONY: all install clean strip
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: .FORCE-GIT-VERSION-FILE TAGS tags cscope .FORCE-GIT-CFLAGS
-.PHONY: .FORCE-GIT-BUILD-OPTIONS
+.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: .FORCE-PERF-BUILD-OPTIONS
 
 ### Check documentation
 #
 check-docs::
-	@(for v in $(ALL_PROGRAMS) $(BUILT_INS) git gitk; \
+	@(for v in $(ALL_PROGRAMS) $(BUILT_INS) perf perfk; \
 	do \
 		case "$$v" in \
-		git-merge-octopus | git-merge-ours | git-merge-recursive | \
-		git-merge-resolve | git-merge-subtree | \
-		git-fsck-objects | git-init-db | \
-		git-?*--?* ) continue ;; \
+		perf-merge-octopus | perf-merge-ours | perf-merge-recursive | \
+		perf-merge-resolve | perf-merge-subtree | \
+		perf-fsck-objects | perf-init-db | \
+		perf-?*--?* ) continue ;; \
 		esac ; \
 		test -f "Documentation/$$v.txt" || \
 		echo "no doc: $$v"; \
 		sed -e '/^#/d' command-list.txt | \
 		grep -q "^$$v[ 	]" || \
 		case "$$v" in \
-		git) ;; \
+		perf) ;; \
 		*) echo "no link: $$v";; \
 		esac ; \
 	done; \
@@ -1670,37 +989,37 @@ check-docs::
 		sed -e '/^#/d' \
 		    -e 's/[ 	].*//' \
 		    -e 's/^/listed /' command-list.txt; \
-		ls -1 Documentation/git*txt | \
+		ls -1 Documentation/perf*txt | \
 		sed -e 's|Documentation/|documented |' \
 		    -e 's/\.txt//'; \
 	) | while read how cmd; \
 	do \
 		case "$$how,$$cmd" in \
-		*,git-citool | \
-		*,git-gui | \
-		*,git-help | \
-		documented,gitattributes | \
-		documented,gitignore | \
-		documented,gitmodules | \
-		documented,gitcli | \
-		documented,git-tools | \
-		documented,gitcore-tutorial | \
-		documented,gitcvs-migration | \
-		documented,gitdiffcore | \
-		documented,gitglossary | \
-		documented,githooks | \
-		documented,gitrepository-layout | \
-		documented,gittutorial | \
-		documented,gittutorial-2 | \
+		*,perf-citool | \
+		*,perf-gui | \
+		*,perf-help | \
+		documented,perfattributes | \
+		documented,perfignore | \
+		documented,perfmodules | \
+		documented,perfcli | \
+		documented,perf-tools | \
+		documented,perfcore-tutorial | \
+		documented,perfcvs-migration | \
+		documented,perfdiffcore | \
+		documented,perfglossary | \
+		documented,perfhooks | \
+		documented,perfrepository-layout | \
+		documented,perftutorial | \
+		documented,perftutorial-2 | \
 		sentinel,not,matching,is,ok ) continue ;; \
 		esac; \
-		case " $(ALL_PROGRAMS) $(BUILT_INS) git gitk " in \
+		case " $(ALL_PROGRAMS) $(BUILT_INS) perf perfk " in \
 		*" $$cmd "*)	;; \
 		*) echo "removed but $$how: $$cmd" ;; \
 		esac; \
 	done ) | sort
 
-### Make sure built-ins do not have dups and listed in git.c
+### Make sure built-ins do not have dups and listed in perf.c
 #
 check-builtins::
 	./check-builtins.sh
diff --git a/Documentation/perf_counter/PERF-BUILD-OPTIONS b/Documentation/perf_counter/PERF-BUILD-OPTIONS
new file mode 100644
index 0000000..46d8d6c
--- /dev/null
+++ b/Documentation/perf_counter/PERF-BUILD-OPTIONS
@@ -0,0 +1,4 @@
+SHELL_PATH='/bin/sh'
+TAR='tar'
+NO_CURL=''
+NO_PERL=''
diff --git a/Documentation/perf_counter/PERF-CFLAGS b/Documentation/perf_counter/PERF-CFLAGS
new file mode 100644
index 0000000..f24906c
--- /dev/null
+++ b/Documentation/perf_counter/PERF-CFLAGS
@@ -0,0 +1 @@
+-g -O2 -Wall  -DSHA1_HEADER='<openssl/sha.h>' : /home/mingo/bin:libexec/perf-core:share/perf-core/templates:/home/mingo
diff --git a/Documentation/perf_counter/PERF-VERSION-FILE b/Documentation/perf_counter/PERF-VERSION-FILE
new file mode 100644
index 0000000..328e244
--- /dev/null
+++ b/Documentation/perf_counter/PERF-VERSION-FILE
@@ -0,0 +1 @@
+PERF_VERSION = 0.0.1.PERF
diff --git a/Documentation/perf_counter/PERF-VERSION-GEN b/Documentation/perf_counter/PERF-VERSION-GEN
new file mode 100755
index 0000000..c561d15
--- /dev/null
+++ b/Documentation/perf_counter/PERF-VERSION-GEN
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+GVF=PERF-VERSION-FILE
+DEF_VER=v0.0.1.PERF
+
+LF='
+'
+
+# First see if there is a version file (included in release tarballs),
+# then try git-describe, then default.
+if test -f version
+then
+	VN=$(cat version) || VN="$DEF_VER"
+elif test -d .git -o -f .git &&
+	VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
+	case "$VN" in
+	*$LF*) (exit 1) ;;
+	v[0-9]*)
+		git update-index -q --refresh
+		test -z "$(git diff-index --name-only HEAD --)" ||
+		VN="$VN-dirty" ;;
+	esac
+then
+	VN=$(echo "$VN" | sed -e 's/-/./g');
+else
+	VN="$DEF_VER"
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
+else
+	VC=unset
+fi
+test "$VN" = "$VC" || {
+	echo >&2 "PERF_VERSION = $VN"
+	echo "PERF_VERSION = $VN" >$GVF
+}
+
+
diff --git a/Documentation/perf_counter/abspath.c b/Documentation/perf_counter/abspath.c
new file mode 100644
index 0000000..649f34f
--- /dev/null
+++ b/Documentation/perf_counter/abspath.c
@@ -0,0 +1,117 @@
+#include "cache.h"
+
+/*
+ * Do not use this for inspecting *tracked* content.  When path is a
+ * symlink to a directory, we do not want to say it is a directory when
+ * dealing with tracked content in the working tree.
+ */
+int is_directory(const char *path)
+{
+	struct stat st;
+	return (!stat(path, &st) && S_ISDIR(st.st_mode));
+}
+
+/* We allow "recursive" symbolic links. Only within reason, though. */
+#define MAXDEPTH 5
+
+const char *make_absolute_path(const char *path)
+{
+	static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
+	char cwd[1024] = "";
+	int buf_index = 1, len;
+
+	int depth = MAXDEPTH;
+	char *last_elem = NULL;
+	struct stat st;
+
+	if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+		die ("Too long path: %.*s", 60, path);
+
+	while (depth--) {
+		if (!is_directory(buf)) {
+			char *last_slash = strrchr(buf, '/');
+			if (last_slash) {
+				*last_slash = '\0';
+				last_elem = xstrdup(last_slash + 1);
+			} else {
+				last_elem = xstrdup(buf);
+				*buf = '\0';
+			}
+		}
+
+		if (*buf) {
+			if (!*cwd && !getcwd(cwd, sizeof(cwd)))
+				die ("Could not get current working directory");
+
+			if (chdir(buf))
+				die ("Could not switch to '%s'", buf);
+		}
+		if (!getcwd(buf, PATH_MAX))
+			die ("Could not get current working directory");
+
+		if (last_elem) {
+			int len = strlen(buf);
+			if (len + strlen(last_elem) + 2 > PATH_MAX)
+				die ("Too long path name: '%s/%s'",
+						buf, last_elem);
+			buf[len] = '/';
+			strcpy(buf + len + 1, last_elem);
+			free(last_elem);
+			last_elem = NULL;
+		}
+
+		if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
+			len = readlink(buf, next_buf, PATH_MAX);
+			if (len < 0)
+				die ("Invalid symlink: %s", buf);
+			if (PATH_MAX <= len)
+				die("symbolic link too long: %s", buf);
+			next_buf[len] = '\0';
+			buf = next_buf;
+			buf_index = 1 - buf_index;
+			next_buf = bufs[buf_index];
+		} else
+			break;
+	}
+
+	if (*cwd && chdir(cwd))
+		die ("Could not change back to '%s'", cwd);
+
+	return buf;
+}
+
+static const char *get_pwd_cwd(void)
+{
+	static char cwd[PATH_MAX + 1];
+	char *pwd;
+	struct stat cwd_stat, pwd_stat;
+	if (getcwd(cwd, PATH_MAX) == NULL)
+		return NULL;
+	pwd = getenv("PWD");
+	if (pwd && strcmp(pwd, cwd)) {
+		stat(cwd, &cwd_stat);
+		if (!stat(pwd, &pwd_stat) &&
+		    pwd_stat.st_dev == cwd_stat.st_dev &&
+		    pwd_stat.st_ino == cwd_stat.st_ino) {
+			strlcpy(cwd, pwd, PATH_MAX);
+		}
+	}
+	return cwd;
+}
+
+const char *make_nonrelative_path(const char *path)
+{
+	static char buf[PATH_MAX + 1];
+
+	if (is_absolute_path(path)) {
+		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	} else {
+		const char *cwd = get_pwd_cwd();
+		if (!cwd)
+			die("Cannot determine the current working directory");
+		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	}
+	return buf;
+}
diff --git a/Documentation/perf_counter/alias.c b/Documentation/perf_counter/alias.c
new file mode 100644
index 0000000..9b3dd2b
--- /dev/null
+++ b/Documentation/perf_counter/alias.c
@@ -0,0 +1,77 @@
+#include "cache.h"
+
+static const char *alias_key;
+static char *alias_val;
+
+static int alias_lookup_cb(const char *k, const char *v, void *cb)
+{
+	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
+		if (!v)
+			return config_error_nonbool(k);
+		alias_val = strdup(v);
+		return 0;
+	}
+	return 0;
+}
+
+char *alias_lookup(const char *alias)
+{
+	alias_key = alias;
+	alias_val = NULL;
+	perf_config(alias_lookup_cb, NULL);
+	return alias_val;
+}
+
+int split_cmdline(char *cmdline, const char ***argv)
+{
+	int src, dst, count = 0, size = 16;
+	char quoted = 0;
+
+	*argv = malloc(sizeof(char*) * size);
+
+	/* split alias_string */
+	(*argv)[count++] = cmdline;
+	for (src = dst = 0; cmdline[src];) {
+		char c = cmdline[src];
+		if (!quoted && isspace(c)) {
+			cmdline[dst++] = 0;
+			while (cmdline[++src]
+					&& isspace(cmdline[src]))
+				; /* skip */
+			if (count >= size) {
+				size += 16;
+				*argv = realloc(*argv, sizeof(char*) * size);
+			}
+			(*argv)[count++] = cmdline + dst;
+		} else if (!quoted && (c == '\'' || c == '"')) {
+			quoted = c;
+			src++;
+		} else if (c == quoted) {
+			quoted = 0;
+			src++;
+		} else {
+			if (c == '\\' && quoted != '\'') {
+				src++;
+				c = cmdline[src];
+				if (!c) {
+					free(*argv);
+					*argv = NULL;
+					return error("cmdline ends with \\");
+				}
+			}
+			cmdline[dst++] = c;
+			src++;
+		}
+	}
+
+	cmdline[dst] = 0;
+
+	if (quoted) {
+		free(*argv);
+		*argv = NULL;
+		return error("unclosed quote");
+	}
+
+	return count;
+}
+
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
new file mode 100644
index 0000000..125fcc2
--- /dev/null
+++ b/Documentation/perf_counter/builtin-help.c
@@ -0,0 +1,463 @@
+/*
+ * builtin-help.c
+ *
+ * Builtin help command
+ */
+#include "cache.h"
+#include "builtin.h"
+#include "exec_cmd.h"
+#include "common-cmds.h"
+#include "parse-options.h"
+#include "run-command.h"
+#include "help.h"
+
+static struct man_viewer_list {
+	struct man_viewer_list *next;
+	char name[FLEX_ARRAY];
+} *man_viewer_list;
+
+static struct man_viewer_info_list {
+	struct man_viewer_info_list *next;
+	const char *info;
+	char name[FLEX_ARRAY];
+} *man_viewer_info_list;
+
+enum help_format {
+	HELP_FORMAT_MAN,
+	HELP_FORMAT_INFO,
+	HELP_FORMAT_WEB,
+};
+
+static int show_all = 0;
+static enum help_format help_format = HELP_FORMAT_MAN;
+static struct option builtin_help_options[] = {
+	OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
+	OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
+	OPT_SET_INT('w', "web", &help_format, "show manual in web browser",
+			HELP_FORMAT_WEB),
+	OPT_SET_INT('i', "info", &help_format, "show info page",
+			HELP_FORMAT_INFO),
+	OPT_END(),
+};
+
+static const char * const builtin_help_usage[] = {
+	"perf help [--all] [--man|--web|--info] [command]",
+	NULL
+};
+
+static enum help_format parse_help_format(const char *format)
+{
+	if (!strcmp(format, "man"))
+		return HELP_FORMAT_MAN;
+	if (!strcmp(format, "info"))
+		return HELP_FORMAT_INFO;
+	if (!strcmp(format, "web") || !strcmp(format, "html"))
+		return HELP_FORMAT_WEB;
+	die("unrecognized help format '%s'", format);
+}
+
+static const char *get_man_viewer_info(const char *name)
+{
+	struct man_viewer_info_list *viewer;
+
+	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
+	{
+		if (!strcasecmp(name, viewer->name))
+			return viewer->info;
+	}
+	return NULL;
+}
+
+static int check_emacsclient_version(void)
+{
+	struct strbuf buffer = STRBUF_INIT;
+	struct child_process ec_process;
+	const char *argv_ec[] = { "emacsclient", "--version", NULL };
+	int version;
+
+	/* emacsclient prints its version number on stderr */
+	memset(&ec_process, 0, sizeof(ec_process));
+	ec_process.argv = argv_ec;
+	ec_process.err = -1;
+	ec_process.stdout_to_stderr = 1;
+	if (start_command(&ec_process)) {
+		fprintf(stderr, "Failed to start emacsclient.\n");
+		return -1;
+	}
+	strbuf_read(&buffer, ec_process.err, 20);
+	close(ec_process.err);
+
+	/*
+	 * Don't bother checking return value, because "emacsclient --version"
+	 * seems to always exits with code 1.
+	 */
+	finish_command(&ec_process);
+
+	if (prefixcmp(buffer.buf, "emacsclient")) {
+		fprintf(stderr, "Failed to parse emacsclient version.\n");
+		strbuf_release(&buffer);
+		return -1;
+	}
+
+	strbuf_remove(&buffer, 0, strlen("emacsclient"));
+	version = atoi(buffer.buf);
+
+	if (version < 22) {
+		fprintf(stderr,
+			"emacsclient version '%d' too old (< 22).\n",
+			version);
+		strbuf_release(&buffer);
+		return -1;
+	}
+
+	strbuf_release(&buffer);
+	return 0;
+}
+
+static void exec_woman_emacs(const char* path, const char *page)
+{
+	if (!check_emacsclient_version()) {
+		/* This works only with emacsclient version >= 22. */
+		struct strbuf man_page = STRBUF_INIT;
+
+		if (!path)
+			path = "emacsclient";
+		strbuf_addf(&man_page, "(woman \"%s\")", page);
+		execlp(path, "emacsclient", "-e", man_page.buf, NULL);
+		warning("failed to exec '%s': %s", path, strerror(errno));
+	}
+}
+
+static void exec_man_konqueror(const char* path, const char *page)
+{
+	const char *display = getenv("DISPLAY");
+	if (display && *display) {
+		struct strbuf man_page = STRBUF_INIT;
+		const char *filename = "kfmclient";
+
+		/* It's simpler to launch konqueror using kfmclient. */
+		if (path) {
+			const char *file = strrchr(path, '/');
+			if (file && !strcmp(file + 1, "konqueror")) {
+				char *new = strdup(path);
+				char *dest = strrchr(new, '/');
+
+				/* strlen("konqueror") == strlen("kfmclient") */
+				strcpy(dest + 1, "kfmclient");
+				path = new;
+			}
+			if (file)
+				filename = file;
+		} else
+			path = "kfmclient";
+		strbuf_addf(&man_page, "man:%s(1)", page);
+		execlp(path, filename, "newTab", man_page.buf, NULL);
+		warning("failed to exec '%s': %s", path, strerror(errno));
+	}
+}
+
+static void exec_man_man(const char* path, const char *page)
+{
+	if (!path)
+		path = "man";
+	execlp(path, "man", page, NULL);
+	warning("failed to exec '%s': %s", path, strerror(errno));
+}
+
+static void exec_man_cmd(const char *cmd, const char *page)
+{
+	struct strbuf shell_cmd = STRBUF_INIT;
+	strbuf_addf(&shell_cmd, "%s %s", cmd, page);
+	execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
+	warning("failed to exec '%s': %s", cmd, strerror(errno));
+}
+
+static void add_man_viewer(const char *name)
+{
+	struct man_viewer_list **p = &man_viewer_list;
+	size_t len = strlen(name);
+
+	while (*p)
+		p = &((*p)->next);
+	*p = calloc(1, (sizeof(**p) + len + 1));
+	strncpy((*p)->name, name, len);
+}
+
+static int supported_man_viewer(const char *name, size_t len)
+{
+	return (!strncasecmp("man", name, len) ||
+		!strncasecmp("woman", name, len) ||
+		!strncasecmp("konqueror", name, len));
+}
+
+static void do_add_man_viewer_info(const char *name,
+				   size_t len,
+				   const char *value)
+{
+	struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
+
+	strncpy(new->name, name, len);
+	new->info = strdup(value);
+	new->next = man_viewer_info_list;
+	man_viewer_info_list = new;
+}
+
+static int add_man_viewer_path(const char *name,
+			       size_t len,
+			       const char *value)
+{
+	if (supported_man_viewer(name, len))
+		do_add_man_viewer_info(name, len, value);
+	else
+		warning("'%s': path for unsupported man viewer.\n"
+			"Please consider using 'man.<tool>.cmd' instead.",
+			name);
+
+	return 0;
+}
+
+static int add_man_viewer_cmd(const char *name,
+			      size_t len,
+			      const char *value)
+{
+	if (supported_man_viewer(name, len))
+		warning("'%s': cmd for supported man viewer.\n"
+			"Please consider using 'man.<tool>.path' instead.",
+			name);
+	else
+		do_add_man_viewer_info(name, len, value);
+
+	return 0;
+}
+
+static int add_man_viewer_info(const char *var, const char *value)
+{
+	const char *name = var + 4;
+	const char *subkey = strrchr(name, '.');
+
+	if (!subkey)
+		return error("Config with no key for man viewer: %s", name);
+
+	if (!strcmp(subkey, ".path")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_path(name, subkey - name, value);
+	}
+	if (!strcmp(subkey, ".cmd")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_cmd(name, subkey - name, value);
+	}
+
+	warning("'%s': unsupported man viewer sub key.", subkey);
+	return 0;
+}
+
+static int perf_help_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.format")) {
+		if (!value)
+			return config_error_nonbool(var);
+		help_format = parse_help_format(value);
+		return 0;
+	}
+	if (!strcmp(var, "man.viewer")) {
+		if (!value)
+			return config_error_nonbool(var);
+		add_man_viewer(value);
+		return 0;
+	}
+	if (!prefixcmp(var, "man."))
+		return add_man_viewer_info(var, value);
+
+	return perf_default_config(var, value, cb);
+}
+
+static struct cmdnames main_cmds, other_cmds;
+
+void list_common_cmds_help(void)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		if (longest < strlen(common_cmds[i].name))
+			longest = strlen(common_cmds[i].name);
+	}
+
+	puts("The most commonly used perf commands are:");
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		printf("   %s   ", common_cmds[i].name);
+		mput_char(' ', longest - strlen(common_cmds[i].name));
+		puts(common_cmds[i].help);
+	}
+}
+
+static int is_perf_command(const char *s)
+{
+	return is_in_cmdlist(&main_cmds, s) ||
+		is_in_cmdlist(&other_cmds, s);
+}
+
+static const char *prepend(const char *prefix, const char *cmd)
+{
+	size_t pre_len = strlen(prefix);
+	size_t cmd_len = strlen(cmd);
+	char *p = malloc(pre_len + cmd_len + 1);
+	memcpy(p, prefix, pre_len);
+	strcpy(p + pre_len, cmd);
+	return p;
+}
+
+static const char *cmd_to_page(const char *perf_cmd)
+{
+	if (!perf_cmd)
+		return "perf";
+	else if (!prefixcmp(perf_cmd, "perf"))
+		return perf_cmd;
+	else if (is_perf_command(perf_cmd))
+		return prepend("perf-", perf_cmd);
+	else
+		return prepend("perf", perf_cmd);
+}
+
+static void setup_man_path(void)
+{
+	struct strbuf new_path = STRBUF_INIT;
+	const char *old_path = getenv("MANPATH");
+
+	/* We should always put ':' after our path. If there is no
+	 * old_path, the ':' at the end will let 'man' to try
+	 * system-wide paths after ours to find the manual page. If
+	 * there is old_path, we need ':' as delimiter. */
+	strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
+	strbuf_addch(&new_path, ':');
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+
+	setenv("MANPATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+static void exec_viewer(const char *name, const char *page)
+{
+	const char *info = get_man_viewer_info(name);
+
+	if (!strcasecmp(name, "man"))
+		exec_man_man(info, page);
+	else if (!strcasecmp(name, "woman"))
+		exec_woman_emacs(info, page);
+	else if (!strcasecmp(name, "konqueror"))
+		exec_man_konqueror(info, page);
+	else if (info)
+		exec_man_cmd(info, page);
+	else
+		warning("'%s': unknown man viewer.", name);
+}
+
+static void show_man_page(const char *perf_cmd)
+{
+	struct man_viewer_list *viewer;
+	const char *page = cmd_to_page(perf_cmd);
+	const char *fallback = getenv("PERF_MAN_VIEWER");
+
+	setup_man_path();
+	for (viewer = man_viewer_list; viewer; viewer = viewer->next)
+	{
+		exec_viewer(viewer->name, page); /* will return when unable */
+	}
+	if (fallback)
+		exec_viewer(fallback, page);
+	exec_viewer("man", page);
+	die("no man viewer handled the request");
+}
+
+static void show_info_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
+	execlp("info", "info", "perfman", page, NULL);
+}
+
+static void get_html_page_path(struct strbuf *page_path, const char *page)
+{
+	struct stat st;
+	const char *html_path = system_path(PERF_HTML_PATH);
+
+	/* Check that we have a perf documentation directory. */
+	if (stat(mkpath("%s/perf.html", html_path), &st)
+	    || !S_ISREG(st.st_mode))
+		die("'%s': not a documentation directory.", html_path);
+
+	strbuf_init(page_path, 0);
+	strbuf_addf(page_path, "%s/%s.html", html_path, page);
+}
+
+/*
+ * If open_html is not defined in a platform-specific way (see for
+ * example compat/mingw.h), we use the script web--browse to display
+ * HTML.
+ */
+#ifndef open_html
+void open_html(const char *path)
+{
+	execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
+}
+#endif
+
+static void show_html_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	struct strbuf page_path; /* it leaks but we exec bellow */
+
+	get_html_page_path(&page_path, page);
+
+	open_html(page_path.buf);
+}
+
+int cmd_help(int argc, const char **argv, const char *prefix)
+{
+	int nonperf;
+	const char *alias;
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	/* setup_perf_directory_gently(&nonperf); */
+	perf_config(perf_help_config, NULL);
+
+	argc = parse_options(argc, argv, builtin_help_options,
+			builtin_help_usage, 0);
+
+	if (show_all) {
+		printf("usage: %s\n\n", perf_usage_string);
+		list_commands("perf commands", &main_cmds, &other_cmds);
+		printf("%s\n", perf_more_info_string);
+		return 0;
+	}
+
+	if (!argv[0]) {
+		printf("usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n%s\n", perf_more_info_string);
+		return 0;
+	}
+
+	alias = alias_lookup(argv[0]);
+	if (alias && !is_perf_command(argv[0])) {
+		printf("`perf %s' is aliased to `%s'\n", argv[0], alias);
+		return 0;
+	}
+
+	switch (help_format) {
+	case HELP_FORMAT_MAN:
+		show_man_page(argv[0]);
+		break;
+	case HELP_FORMAT_INFO:
+		show_info_page(argv[0]);
+		break;
+	case HELP_FORMAT_WEB:
+		show_html_page(argv[0]);
+		break;
+	}
+
+	return 0;
+}
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
new file mode 100644
index 0000000..9d2c769
--- /dev/null
+++ b/Documentation/perf_counter/builtin-top.c
@@ -0,0 +1,1411 @@
+/*
+ * kerneltop.c: show top kernel functions - performance counters showcase
+
+   Build with:
+
+     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
+
+   Sample output:
+
+------------------------------------------------------------------------------
+ KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
+------------------------------------------------------------------------------
+
+             weight         RIP          kernel function
+             ______   ________________   _______________
+
+              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
+              33.00 - ffffffff804cb740 : sock_alloc_send_skb
+              31.26 - ffffffff804ce808 : skb_push
+              22.43 - ffffffff80510004 : tcp_established_options
+              19.00 - ffffffff8027d250 : find_get_page
+              15.76 - ffffffff804e4fc9 : eth_type_trans
+              15.20 - ffffffff804d8baa : dst_release
+              14.86 - ffffffff804cf5d8 : skb_release_head_state
+              14.00 - ffffffff802217d5 : read_hpet
+              12.00 - ffffffff804ffb7f : __ip_local_out
+              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
+               8.54 - ffffffff805001a3 : ip_queue_xmit
+ */
+
+/*
+ * perfstat:  /usr/bin/time -alike performance counter statistics utility
+
+          It summarizes the counter events of all tasks (and child tasks),
+          covering all CPUs that the command (or workload) executes on.
+          It only counts the per-task events of the workload started,
+          independent of how many other tasks run on those CPUs.
+
+   Sample output:
+
+   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
+
+   Performance counter stats for 'ls':
+
+           163516953 instructions
+                2295 cache-misses
+             2855182 branch-misses
+ */
+
+ /*
+  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+  *
+  * Improvements and fixes by:
+  *
+  *   Arjan van de Ven <arjan@linux.intel.com>
+  *   Yanmin Zhang <yanmin.zhang@intel.com>
+  *   Wu Fengguang <fengguang.wu@intel.com>
+  *   Mike Galbraith <efault@gmx.de>
+  *   Paul Mackerras <paulus@samba.org>
+  *
+  * Released under the GPL v2. (and only v2, not any later version)
+  */
+
+#include "util.h"
+
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#include "../../include/linux/perf_counter.h"
+
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define rdclock()                                       \
+({                                                      \
+        struct timespec ts;                             \
+                                                        \
+        clock_gettime(CLOCK_MONOTONIC, &ts);            \
+        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
+})
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#ifdef __x86_64__
+#define __NR_perf_counter_open 295
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __i386__
+#define __NR_perf_counter_open 333
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#define rmb() 		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+asmlinkage int sys_perf_counter_open(
+        struct perf_counter_hw_event    *hw_event_uptr          __user,
+        pid_t                           pid,
+        int                             cpu,
+        int                             group_fd,
+        unsigned long                   flags)
+{
+        return syscall(
+                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
+}
+
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+
+static int			run_perfstat			=  0;
+static int			system_wide			=  0;
+
+static int			nr_counters			=  0;
+static __u64			event_id[MAX_COUNTERS]		= {
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
+};
+static int			default_interval = 100000;
+static int			event_count[MAX_COUNTERS];
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static __u64			count_filter		       = 100;
+
+static int			tid				= -1;
+static int			profile_cpu			= -1;
+static int			nr_cpus				=  0;
+static int			nmi				=  1;
+static unsigned int		realtime_prio			=  0;
+static int			group				=  0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			=  16;
+static int			use_mmap			= 0;
+static int			use_munmap			= 0;
+
+static char			*vmlinux;
+
+static char			*sym_filter;
+static unsigned long		filter_start;
+static unsigned long		filter_end;
+
+static int			delay_secs			=  2;
+static int			zero;
+static int			dump_symtab;
+
+static int			scale;
+
+struct source_line {
+	uint64_t		EIP;
+	unsigned long		count;
+	char			*line;
+	struct source_line	*next;
+};
+
+static struct source_line	*lines;
+static struct source_line	**lines_tail;
+
+const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+static char *hw_event_names[] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+	"minor faults",
+	"major faults",
+};
+
+struct event_symbol {
+	__u64 event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+};
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static void display_events_help(void)
+{
+	unsigned int i;
+	__u64 e;
+
+	printf(
+	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		printf("\n                             %d:%d: %-20s",
+				type, id, event_symbols[i].symbol);
+	}
+
+	printf("\n"
+	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
+}
+
+static void display_perfstat_help(void)
+{
+	printf(
+	"Usage: perfstat [<events...>] <cmd...>\n\n"
+	"PerfStat Options (up to %d event types can be specified):\n\n",
+		 MAX_COUNTERS);
+
+	display_events_help();
+
+	printf(
+	" -l                           # scale counter values\n"
+	" -a                           # system-wide collection\n");
+	exit(0);
+}
+
+static void display_help(void)
+{
+	if (run_perfstat)
+		return display_perfstat_help();
+
+	printf(
+	"Usage: kerneltop [<options>]\n"
+	"   Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
+	"KernelTop Options (up to %d event types can be specified at once):\n\n",
+		 MAX_COUNTERS);
+
+	display_events_help();
+
+	printf(
+	" -S        --stat             # perfstat COMMAND\n"
+	" -a                           # system-wide collection (for perfstat)\n\n"
+	" -c CNT    --count=CNT        # event period to sample\n\n"
+	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
+	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
+	" -l                           # show scale factor for RR events\n"
+	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
+	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
+	" -r prio   --realtime=<prio>  # event acquisition runs with SCHED_FIFO policy\n"
+	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
+	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
+	" -z        --zero             # zero counts after display\n"
+	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
+	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
+	" -M        --mmap_info        # print mmap info stream\n"
+	" -U        --munmap_info      # print munmap info stream\n"
+	);
+
+	exit(0);
+}
+
+static char *event_name(int ctr)
+{
+	__u64 config = event_id[ctr];
+	int type = PERF_COUNTER_TYPE(config);
+	int id = PERF_COUNTER_ID(config);
+	static char buf[32];
+
+	if (PERF_COUNTER_RAW(config)) {
+		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
+		return buf;
+	}
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (id < PERF_HW_EVENTS_MAX)
+			return hw_event_names[id];
+		return "unknown-hardware";
+
+	case PERF_TYPE_SOFTWARE:
+		if (id < PERF_SW_EVENTS_MAX)
+			return sw_event_names[id];
+		return "unknown-software";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static __u64 match_event_symbols(char *str)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return ~0ULL;
+}
+
+static int parse_events(char *str)
+{
+	__u64 config;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
+
+	event_id[nr_counters] = config;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+
+/*
+ * perfstat
+ */
+
+char fault_here[1000000];
+
+static void create_perfstat_counter(int counter)
+{
+	struct perf_counter_hw_event hw_event;
+
+	memset(&hw_event, 0, sizeof(hw_event));
+	hw_event.config		= event_id[counter];
+	hw_event.record_type	= 0;
+	hw_event.nmi		= 0;
+	if (scale)
+		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
+					  PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	if (system_wide) {
+		int cpu;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
+			if (fd[cpu][counter] < 0) {
+				printf("perfstat error: syscall returned with %d (%s)\n",
+						fd[cpu][counter], strerror(errno));
+				exit(-1);
+			}
+		}
+	} else {
+		hw_event.inherit	= 1;
+		hw_event.disabled	= 1;
+
+		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
+		if (fd[0][counter] < 0) {
+			printf("perfstat error: syscall returned with %d (%s)\n",
+					fd[0][counter], strerror(errno));
+			exit(-1);
+		}
+	}
+}
+
+int do_perfstat(int argc, char *argv[])
+{
+	unsigned long long t0, t1;
+	int counter;
+	ssize_t res;
+	int status;
+	int pid;
+
+	if (!system_wide)
+		nr_cpus = 1;
+
+	for (counter = 0; counter < nr_counters; counter++)
+		create_perfstat_counter(counter);
+
+	argc -= optind;
+	argv += optind;
+
+	if (!argc)
+		display_help();
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+	if (!pid) {
+		if (execvp(argv[0], argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+	while (wait(&status) >= 0)
+		;
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	t1 = rdclock();
+
+	fflush(stdout);
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Performance counter stats for \'%s\':\n",
+		argv[0]);
+	fprintf(stderr, "\n");
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		int cpu, nv;
+		__u64 count[3], single_count[3];
+		int scaled;
+
+		count[0] = count[1] = count[2] = 0;
+		nv = scale ? 3 : 1;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			res = read(fd[cpu][counter],
+				   single_count, nv * sizeof(__u64));
+			assert(res == nv * sizeof(__u64));
+
+			count[0] += single_count[0];
+			if (scale) {
+				count[1] += single_count[1];
+				count[2] += single_count[2];
+			}
+		}
+
+		scaled = 0;
+		if (scale) {
+			if (count[2] == 0) {
+				fprintf(stderr, " %14s  %-20s\n",
+					"<not counted>", event_name(counter));
+				continue;
+			}
+			if (count[2] < count[1]) {
+				scaled = 1;
+				count[0] = (unsigned long long)
+					((double)count[0] * count[1] / count[2] + 0.5);
+			}
+		}
+
+		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
+		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
+
+			double msecs = (double)count[0] / 1000000;
+
+			fprintf(stderr, " %14.6f  %-20s (msecs)",
+				msecs, event_name(counter));
+		} else {
+			fprintf(stderr, " %14Ld  %-20s (events)",
+				count[0], event_name(counter));
+		}
+		if (scaled)
+			fprintf(stderr, "  (scaled from %.2f%%)",
+				(double) count[2] / count[1] * 100);
+		fprintf(stderr, "\n");
+	}
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
+			(double)(t1-t0)/1e6);
+	fprintf(stderr, "\n");
+
+	return 0;
+}
+
+/*
+ * Symbols
+ */
+
+static uint64_t			min_ip;
+static uint64_t			max_ip = -1ll;
+
+struct sym_entry {
+	unsigned long long	addr;
+	char			*sym;
+	unsigned long		count[MAX_COUNTERS];
+	int			skip;
+	struct source_line	*source;
+};
+
+#define MAX_SYMS		100000
+
+static int sym_table_count;
+
+struct sym_entry		*sym_filter_entry;
+
+static struct sym_entry		sym_table[MAX_SYMS];
+
+static void show_details(struct sym_entry *sym);
+
+/*
+ * Ordering weight: count-1 * count-2 * ... / count-n
+ */
+static double sym_weight(const struct sym_entry *sym)
+{
+	double weight;
+	int counter;
+
+	weight = sym->count[0];
+
+	for (counter = 1; counter < nr_counters-1; counter++)
+		weight *= sym->count[counter];
+
+	weight /= (sym->count[counter] + 1);
+
+	return weight;
+}
+
+static int compare(const void *__sym1, const void *__sym2)
+{
+	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
+
+	return sym_weight(sym1) < sym_weight(sym2);
+}
+
+static long			events;
+static long			userspace_events;
+static const char		CONSOLE_CLEAR[] = "[H[2J";
+
+static struct sym_entry		tmp[MAX_SYMS];
+
+static void print_sym_table(void)
+{
+	int i, printed;
+	int counter;
+	float events_per_sec = events/delay_secs;
+	float kevents_per_sec = (events-userspace_events)/delay_secs;
+	float sum_kevents = 0.0;
+
+	events = userspace_events = 0;
+	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
+	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
+
+	for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
+		sum_kevents += tmp[i].count[0];
+
+	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
+
+	printf(
+"------------------------------------------------------------------------------\n");
+	printf( " KernelTop:%8.0f irqs/sec  kernel:%4.1f%% [%s, ",
+		events_per_sec,
+		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
+		nmi ? "NMI" : "IRQ");
+
+	if (nr_counters == 1)
+		printf("%d ", event_count[0]);
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (counter)
+			printf("/");
+
+		printf("%s", event_name(counter));
+	}
+
+	printf( "], ");
+
+	if (tid != -1)
+		printf(" (tid: %d", tid);
+	else
+		printf(" (all");
+
+	if (profile_cpu != -1)
+		printf(", cpu: %d)\n", profile_cpu);
+	else {
+		if (tid != -1)
+			printf(")\n");
+		else
+			printf(", %d CPUs)\n", nr_cpus);
+	}
+
+	printf("------------------------------------------------------------------------------\n\n");
+
+	if (nr_counters == 1)
+		printf("             events    pcnt");
+	else
+		printf("  weight     events    pcnt");
+
+	printf("         RIP          kernel function\n"
+	       	       "  ______     ______   _____   ________________   _______________\n\n"
+	);
+
+	for (i = 0, printed = 0; i < sym_table_count; i++) {
+		float pcnt;
+		int count;
+
+		if (printed <= 18 && tmp[i].count[0] >= count_filter) {
+			pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
+
+			if (nr_counters == 1)
+				printf("%19.2f - %4.1f%% - %016llx : %s\n",
+					sym_weight(tmp + i),
+					pcnt, tmp[i].addr, tmp[i].sym);
+			else
+				printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
+					sym_weight(tmp + i),
+					tmp[i].count[0],
+					pcnt, tmp[i].addr, tmp[i].sym);
+			printed++;
+		}
+		/*
+		 * Add decay to the counts:
+		 */
+		for (count = 0; count < nr_counters; count++)
+			sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
+	}
+
+	if (sym_filter_entry)
+		show_details(sym_filter_entry);
+
+	{
+		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+
+		if (poll(&stdin_poll, 1, 0) == 1) {
+			printf("key pressed - exiting.\n");
+			exit(0);
+		}
+	}
+}
+
+static void *display_thread(void *arg)
+{
+	printf("KernelTop refresh period: %d seconds\n", delay_secs);
+
+	while (!sleep(delay_secs))
+		print_sym_table();
+
+	return NULL;
+}
+
+static int read_symbol(FILE *in, struct sym_entry *s)
+{
+	static int filter_match = 0;
+	char *sym, stype;
+	char str[500];
+	int rc, pos;
+
+	rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
+	if (rc == EOF)
+		return -1;
+
+	assert(rc == 3);
+
+	/* skip until end of line: */
+	pos = strlen(str);
+	do {
+		rc = fgetc(in);
+		if (rc == '\n' || rc == EOF || pos >= 499)
+			break;
+		str[pos] = rc;
+		pos++;
+	} while (1);
+	str[pos] = 0;
+
+	sym = str;
+
+	/* Filter out known duplicates and non-text symbols. */
+	if (!strcmp(sym, "_text"))
+		return 1;
+	if (!min_ip && !strcmp(sym, "_stext"))
+		return 1;
+	if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
+		return 1;
+	if (stype != 'T' && stype != 't')
+		return 1;
+	if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
+		return 1;
+	if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
+		return 1;
+
+	s->sym = malloc(strlen(str));
+	assert(s->sym);
+
+	strcpy((char *)s->sym, str);
+	s->skip = 0;
+
+	/* Tag events to be skipped. */
+	if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
+		s->skip = 1;
+	else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+		s->skip = 1;
+	else if (!strcmp("mwait_idle", s->sym))
+		s->skip = 1;
+
+	if (filter_match == 1) {
+		filter_end = s->addr;
+		filter_match = -1;
+		if (filter_end - filter_start > 10000) {
+			printf("hm, too large filter symbol <%s> - skipping.\n",
+				sym_filter);
+			printf("symbol filter start: %016lx\n", filter_start);
+			printf("                end: %016lx\n", filter_end);
+			filter_end = filter_start = 0;
+			sym_filter = NULL;
+			sleep(1);
+		}
+	}
+	if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
+		filter_match = 1;
+		filter_start = s->addr;
+	}
+
+	return 0;
+}
+
+int compare_addr(const void *__sym1, const void *__sym2)
+{
+	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
+
+	return sym1->addr > sym2->addr;
+}
+
+static void sort_symbol_table(void)
+{
+	int i, dups;
+
+	do {
+		qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
+		for (i = 0, dups = 0; i < sym_table_count; i++) {
+			if (sym_table[i].addr == sym_table[i+1].addr) {
+				sym_table[i+1].addr = -1ll;
+				dups++;
+			}
+		}
+		sym_table_count -= dups;
+	} while(dups);
+}
+
+static void parse_symbols(void)
+{
+	struct sym_entry *last;
+
+	FILE *kallsyms = fopen("/proc/kallsyms", "r");
+
+	if (!kallsyms) {
+		printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
+		exit(-1);
+	}
+
+	while (!feof(kallsyms)) {
+		if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
+			sym_table_count++;
+			assert(sym_table_count <= MAX_SYMS);
+		}
+	}
+
+	sort_symbol_table();
+	min_ip = sym_table[0].addr;
+	max_ip = sym_table[sym_table_count-1].addr;
+	last = sym_table + sym_table_count++;
+
+	last->addr = -1ll;
+	last->sym = "<end>";
+
+	if (filter_end) {
+		int count;
+		for (count=0; count < sym_table_count; count ++) {
+			if (!strcmp(sym_table[count].sym, sym_filter)) {
+				sym_filter_entry = &sym_table[count];
+				break;
+			}
+		}
+	}
+	if (dump_symtab) {
+		int i;
+
+		for (i = 0; i < sym_table_count; i++)
+			fprintf(stderr, "%llx %s\n",
+				sym_table[i].addr, sym_table[i].sym);
+	}
+}
+
+/*
+ * Source lines
+ */
+
+static void parse_vmlinux(char *filename)
+{
+	FILE *file;
+	char command[PATH_MAX*2];
+	if (!filename)
+		return;
+
+	sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
+
+	file = popen(command, "r");
+	if (!file)
+		return;
+
+	lines_tail = &lines;
+	while (!feof(file)) {
+		struct source_line *src;
+		size_t dummy = 0;
+		char *c;
+
+		src = malloc(sizeof(struct source_line));
+		assert(src != NULL);
+		memset(src, 0, sizeof(struct source_line));
+
+		if (getline(&src->line, &dummy, file) < 0)
+			break;
+		if (!src->line)
+			break;
+
+		c = strchr(src->line, '\n');
+		if (c)
+			*c = 0;
+
+		src->next = NULL;
+		*lines_tail = src;
+		lines_tail = &src->next;
+
+		if (strlen(src->line)>8 && src->line[8] == ':')
+			src->EIP = strtoull(src->line, NULL, 16);
+		if (strlen(src->line)>8 && src->line[16] == ':')
+			src->EIP = strtoull(src->line, NULL, 16);
+	}
+	pclose(file);
+}
+
+static void record_precise_ip(uint64_t ip)
+{
+	struct source_line *line;
+
+	for (line = lines; line; line = line->next) {
+		if (line->EIP == ip)
+			line->count++;
+		if (line->EIP > ip)
+			break;
+	}
+}
+
+static void lookup_sym_in_vmlinux(struct sym_entry *sym)
+{
+	struct source_line *line;
+	char pattern[PATH_MAX];
+	sprintf(pattern, "<%s>:", sym->sym);
+
+	for (line = lines; line; line = line->next) {
+		if (strstr(line->line, pattern)) {
+			sym->source = line;
+			break;
+		}
+	}
+}
+
+static void show_lines(struct source_line *line_queue, int line_queue_count)
+{
+	int i;
+	struct source_line *line;
+
+	line = line_queue;
+	for (i = 0; i < line_queue_count; i++) {
+		printf("%8li\t%s\n", line->count, line->line);
+		line = line->next;
+	}
+}
+
+#define TRACE_COUNT     3
+
+static void show_details(struct sym_entry *sym)
+{
+	struct source_line *line;
+	struct source_line *line_queue = NULL;
+	int displayed = 0;
+	int line_queue_count = 0;
+
+	if (!sym->source)
+		lookup_sym_in_vmlinux(sym);
+	if (!sym->source)
+		return;
+
+	printf("Showing details for %s\n", sym->sym);
+
+	line = sym->source;
+	while (line) {
+		if (displayed && strstr(line->line, ">:"))
+			break;
+
+		if (!line_queue_count)
+			line_queue = line;
+		line_queue_count ++;
+
+		if (line->count >= count_filter) {
+			show_lines(line_queue, line_queue_count);
+			line_queue_count = 0;
+			line_queue = NULL;
+		} else if (line_queue_count > TRACE_COUNT) {
+			line_queue = line_queue->next;
+			line_queue_count --;
+		}
+
+		line->count = 0;
+		displayed++;
+		if (displayed > 300)
+			break;
+		line = line->next;
+	}
+}
+
+/*
+ * Binary search in the histogram table and record the hit:
+ */
+static void record_ip(uint64_t ip, int counter)
+{
+	int left_idx, middle_idx, right_idx, idx;
+	unsigned long left, middle, right;
+
+	record_precise_ip(ip);
+
+	left_idx = 0;
+	right_idx = sym_table_count-1;
+	assert(ip <= max_ip && ip >= min_ip);
+
+	while (left_idx + 1 < right_idx) {
+		middle_idx = (left_idx + right_idx) / 2;
+
+		left   = sym_table[  left_idx].addr;
+		middle = sym_table[middle_idx].addr;
+		right  = sym_table[ right_idx].addr;
+
+		if (!(left <= middle && middle <= right)) {
+			printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
+			printf("%d %d %d\n", left_idx, middle_idx, right_idx);
+		}
+		assert(left <= middle && middle <= right);
+		if (!(left <= ip && ip <= right)) {
+			printf(" left: %016lx\n", left);
+			printf("   ip: %016lx\n", (unsigned long)ip);
+			printf("right: %016lx\n", right);
+		}
+		assert(left <= ip && ip <= right);
+		/*
+		 * [ left .... target .... middle .... right ]
+		 *   => right := middle
+		 */
+		if (ip < middle) {
+			right_idx = middle_idx;
+			continue;
+		}
+		/*
+		 * [ left .... middle ... target ... right ]
+		 *   => left := middle
+		 */
+		left_idx = middle_idx;
+	}
+
+	idx = left_idx;
+
+	if (!sym_table[idx].skip)
+		sym_table[idx].count[counter]++;
+	else events--;
+}
+
+static void process_event(uint64_t ip, int counter)
+{
+	events++;
+
+	if (ip < min_ip || ip > max_ip) {
+		userspace_events++;
+		return;
+	}
+
+	record_ip(ip, counter);
+}
+
+static void process_options(int argc, char *argv[])
+{
+	int error = 0, counter;
+
+	if (strstr(argv[0], "perfstat"))
+		run_perfstat = 1;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"count",	required_argument,	NULL, 'c'},
+			{"cpu",		required_argument,	NULL, 'C'},
+			{"delay",	required_argument,	NULL, 'd'},
+			{"dump_symtab",	no_argument,		NULL, 'D'},
+			{"event",	required_argument,	NULL, 'e'},
+			{"filter",	required_argument,	NULL, 'f'},
+			{"group",	required_argument,	NULL, 'g'},
+			{"help",	no_argument,		NULL, 'h'},
+			{"nmi",		required_argument,	NULL, 'n'},
+			{"mmap_info",	no_argument,		NULL, 'M'},
+			{"mmap_pages",	required_argument,	NULL, 'm'},
+			{"munmap_info",	no_argument,		NULL, 'U'},
+			{"pid",		required_argument,	NULL, 'p'},
+			{"realtime",	required_argument,	NULL, 'r'},
+			{"scale",	no_argument,		NULL, 'l'},
+			{"symbol",	required_argument,	NULL, 's'},
+			{"stat",	no_argument,		NULL, 'S'},
+			{"vmlinux",	required_argument,	NULL, 'x'},
+			{"zero",	no_argument,		NULL, 'z'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'a': system_wide			=	       1; break;
+		case 'c': default_interval		=   atoi(optarg); break;
+		case 'C':
+			/* CPU and PID are mutually exclusive */
+			if (tid != -1) {
+				printf("WARNING: CPU switch overriding PID\n");
+				sleep(1);
+				tid = -1;
+			}
+			profile_cpu			=   atoi(optarg); break;
+		case 'd': delay_secs			=   atoi(optarg); break;
+		case 'D': dump_symtab			=              1; break;
+
+		case 'e': error				= parse_events(optarg); break;
+
+		case 'f': count_filter			=   atoi(optarg); break;
+		case 'g': group				=   atoi(optarg); break;
+		case 'h':      				  display_help(); break;
+		case 'l': scale				=	       1; break;
+		case 'n': nmi				=   atoi(optarg); break;
+		case 'p':
+			/* CPU and PID are mutually exclusive */
+			if (profile_cpu != -1) {
+				printf("WARNING: PID switch overriding CPU\n");
+				sleep(1);
+				profile_cpu = -1;
+			}
+			tid				=   atoi(optarg); break;
+		case 'r': realtime_prio			=   atoi(optarg); break;
+		case 's': sym_filter			= strdup(optarg); break;
+		case 'S': run_perfstat			=	       1; break;
+		case 'x': vmlinux			= strdup(optarg); break;
+		case 'z': zero				=              1; break;
+		case 'm': mmap_pages			=   atoi(optarg); break;
+		case 'M': use_mmap			=              1; break;
+		case 'U': use_munmap			=              1; break;
+		default: error = 1; break;
+		}
+	}
+	if (error)
+		display_help();
+
+	if (!nr_counters) {
+		if (run_perfstat)
+			nr_counters = 8;
+		else {
+			nr_counters = 1;
+			event_id[0] = 0;
+		}
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		event_count[counter] = default_interval;
+	}
+}
+
+struct mmap_data {
+	int counter;
+	void *base;
+	unsigned int mask;
+	unsigned int prev;
+};
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+struct timeval last_read, this_read;
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and screw up the events under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	for (; old != head;) {
+		struct ip_event {
+			struct perf_event_header header;
+			__u64 ip;
+			__u32 pid, tid;
+		};
+		struct mmap_event {
+			struct perf_event_header header;
+			__u32 pid, tid;
+			__u64 start;
+			__u64 len;
+			__u64 pgoff;
+			char filename[PATH_MAX];
+		};
+
+		typedef union event_union {
+			struct perf_event_header header;
+			struct ip_event ip;
+			struct mmap_event mmap;
+		} event_t;
+
+		event_t *event = (event_t *)&data[old & md->mask];
+
+		event_t event_copy;
+
+		unsigned int size = event->header.size;
+
+		/*
+		 * Event straddles the mmap boundary -- header should always
+		 * be inside due to u64 alignment of output.
+		 */
+		if ((old & md->mask) + size != ((old + size) & md->mask)) {
+			unsigned int offset = old;
+			unsigned int len = min(sizeof(*event), size), cpy;
+			void *dst = &event_copy;
+
+			do {
+				cpy = min(md->mask + 1 - (offset & md->mask), len);
+				memcpy(dst, &data[offset & md->mask], cpy);
+				offset += cpy;
+				dst += cpy;
+				len -= cpy;
+			} while (len);
+
+			event = &event_copy;
+		}
+
+		old += size;
+
+		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
+			if (event->header.type & PERF_RECORD_IP)
+				process_event(event->ip.ip, md->counter);
+		} else {
+			switch (event->header.type) {
+				case PERF_EVENT_MMAP:
+				case PERF_EVENT_MUNMAP:
+					printf("%s: %Lu %Lu %Lu %s\n",
+							event->header.type == PERF_EVENT_MMAP
+							? "mmap" : "munmap",
+							event->mmap.start,
+							event->mmap.len,
+							event->mmap.pgoff,
+							event->mmap.filename);
+					break;
+			}
+		}
+	}
+
+	md->prev = old;
+}
+
+int cmd_top(int argc, const char **argv, const char *prefix)
+{
+	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+	struct perf_counter_hw_event hw_event;
+	pthread_t thread;
+	int i, counter, group_fd, nr_poll = 0;
+	unsigned int cpu;
+	int ret;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	process_options(argc, argv);
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (run_perfstat)
+		return do_perfstat(argc, argv);
+
+	if (tid != -1 || profile_cpu != -1)
+		nr_cpus = 1;
+
+	parse_symbols();
+	if (vmlinux && sym_filter_entry)
+		parse_vmlinux(vmlinux);
+
+	for (i = 0; i < nr_cpus; i++) {
+		group_fd = -1;
+		for (counter = 0; counter < nr_counters; counter++) {
+
+			cpu	= profile_cpu;
+			if (tid == -1 && profile_cpu == -1)
+				cpu = i;
+
+			memset(&hw_event, 0, sizeof(hw_event));
+			hw_event.config		= event_id[counter];
+			hw_event.irq_period	= event_count[counter];
+			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
+			hw_event.nmi		= nmi;
+			hw_event.mmap		= use_mmap;
+			hw_event.munmap		= use_munmap;
+
+			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
+			if (fd[i][counter] < 0) {
+				int err = errno;
+				printf("kerneltop error: syscall returned with %d (%s)\n",
+					fd[i][counter], strerror(err));
+				if (err == EPERM)
+					printf("Are you root?\n");
+				exit(-1);
+			}
+			assert(fd[i][counter] >= 0);
+			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+
+			/*
+			 * First counter acts as the group leader:
+			 */
+			if (group && group_fd == -1)
+				group_fd = fd[i][counter];
+
+			event_array[nr_poll].fd = fd[i][counter];
+			event_array[nr_poll].events = POLLIN;
+			nr_poll++;
+
+			mmap_array[i][counter].counter = counter;
+			mmap_array[i][counter].prev = 0;
+			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+					PROT_READ, MAP_SHARED, fd[i][counter], 0);
+			if (mmap_array[i][counter].base == MAP_FAILED) {
+				printf("kerneltop error: failed to mmap with %d (%s)\n",
+						errno, strerror(errno));
+				exit(-1);
+			}
+		}
+	}
+
+	if (pthread_create(&thread, NULL, display_thread, NULL)) {
+		printf("Could not create display thread.\n");
+		exit(-1);
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	while (1) {
+		int hits = events;
+
+		for (i = 0; i < nr_cpus; i++) {
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
+		}
+
+		if (hits == events)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	return 0;
+}
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
new file mode 100644
index 0000000..4163744
--- /dev/null
+++ b/Documentation/perf_counter/builtin.h
@@ -0,0 +1,18 @@
+#ifndef BUILTIN_H
+#define BUILTIN_H
+
+#include "util.h"
+#include "strbuf.h"
+
+extern const char perf_version_string[];
+extern const char perf_usage_string[];
+extern const char perf_more_info_string[];
+
+extern void list_common_cmds_help(void);
+extern const char *help_unknown_cmd(const char *cmd);
+extern void prune_packed_objects(int);
+extern int read_line_with_nul(char *buf, int size, FILE *file);
+extern int check_pager_config(const char *cmd);
+
+extern int cmd_top(int argc, const char **argv, const char *prefix);
+#endif
diff --git a/Documentation/perf_counter/cache.h b/Documentation/perf_counter/cache.h
new file mode 100644
index 0000000..dc08564
--- /dev/null
+++ b/Documentation/perf_counter/cache.h
@@ -0,0 +1,97 @@
+#ifndef CACHE_H
+#define CACHE_H
+
+#include "util.h"
+#include "strbuf.h"
+
+#define PERF_DIR_ENVIRONMENT "PERF_DIR"
+#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
+#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
+#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY"
+#define INDEX_ENVIRONMENT "PERF_INDEX_FILE"
+#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE"
+#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR"
+#define CONFIG_ENVIRONMENT "PERF_CONFIG"
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES"
+#define PERFATTRIBUTES_FILE ".perfattributes"
+#define INFOATTRIBUTES_FILE "info/attributes"
+#define ATTRIBUTE_MACRO_PREFIX "[attr]"
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+extern int perf_default_config(const char *, const char *, void *);
+extern int perf_config_from_file(config_fn_t fn, const char *, void *);
+extern int perf_config(config_fn_t fn, void *);
+extern int perf_parse_ulong(const char *, unsigned long *);
+extern int perf_config_int(const char *, const char *);
+extern unsigned long perf_config_ulong(const char *, const char *);
+extern int perf_config_bool_or_int(const char *, const char *, int *);
+extern int perf_config_bool(const char *, const char *);
+extern int perf_config_string(const char **, const char *, const char *);
+extern int perf_config_set(const char *, const char *);
+extern int perf_config_set_multivar(const char *, const char *, const char *, int);
+extern int perf_config_rename_section(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
+extern int check_repository_format_version(const char *var, const char *value, void *cb);
+extern int perf_config_system(void);
+extern int perf_config_global(void);
+extern int config_error_nonbool(const char *);
+extern const char *config_exclusive_filename;
+
+#define MAX_PERFNAME (1000)
+extern char perf_default_email[MAX_PERFNAME];
+extern char perf_default_name[MAX_PERFNAME];
+extern int user_ident_explicitly_given;
+
+extern const char *perf_log_output_encoding;
+extern const char *perf_mailmap_file;
+
+/* IO helper functions */
+extern void maybe_flush_or_die(FILE *, const char *);
+extern int copy_fd(int ifd, int ofd);
+extern int copy_file(const char *dst, const char *src, int mode);
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+extern ssize_t write_in_full(int fd, const void *buf, size_t count);
+extern void write_or_die(int fd, const void *buf, size_t count);
+extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
+extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
+extern void fsync_or_die(int fd, const char *);
+
+/* pager.c */
+extern void setup_pager(void);
+extern const char *pager_program;
+extern int pager_in_use(void);
+extern int pager_use_color;
+
+extern const char *editor_program;
+extern const char *excludes_file;
+
+char *alias_lookup(const char *alias);
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+	do { \
+		if ((nr) > alloc) { \
+			if (alloc_nr(alloc) < (nr)) \
+				alloc = (nr); \
+			else \
+				alloc = alloc_nr(alloc); \
+			x = xrealloc((x), alloc * sizeof(*(x))); \
+		} \
+	} while(0)
+
+
+static inline int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+#endif /* CACHE_H */
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
new file mode 100644
index 0000000..1eab365
--- /dev/null
+++ b/Documentation/perf_counter/command-list.txt
@@ -0,0 +1,4 @@
+# List of known perf commands.
+# command name				category [deprecated] [common]
+perf-top                                mainporcelain common
+
diff --git a/Documentation/perf_counter/config.c b/Documentation/perf_counter/config.c
new file mode 100644
index 0000000..672d539
--- /dev/null
+++ b/Documentation/perf_counter/config.c
@@ -0,0 +1,966 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include "util.h"
+#include "cache.h"
+#include "exec_cmd.h"
+
+#define MAXNAME (256)
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+static int zlib_compression_seen;
+
+const char *config_exclusive_filename = NULL;
+
+static int get_next_char(void)
+{
+	int c;
+	FILE *f;
+
+	c = '\n';
+	if ((f = config_file) != NULL) {
+		c = fgetc(f);
+		if (c == '\r') {
+			/* DOS like systems */
+			c = fgetc(f);
+			if (c != '\n') {
+				ungetc(c, f);
+				c = '\r';
+			}
+		}
+		if (c == '\n')
+			config_linenr++;
+		if (c == EOF) {
+			config_file_eof = 1;
+			c = '\n';
+		}
+	}
+	return c;
+}
+
+static char *parse_value(void)
+{
+	static char value[1024];
+	int quote = 0, comment = 0, len = 0, space = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (len >= sizeof(value) - 1)
+			return NULL;
+		if (c == '\n') {
+			if (quote)
+				return NULL;
+			value[len] = 0;
+			return value;
+		}
+		if (comment)
+			continue;
+		if (isspace(c) && !quote) {
+			space = 1;
+			continue;
+		}
+		if (!quote) {
+			if (c == ';' || c == '#') {
+				comment = 1;
+				continue;
+			}
+		}
+		if (space) {
+			if (len)
+				value[len++] = ' ';
+			space = 0;
+		}
+		if (c == '\\') {
+			c = get_next_char();
+			switch (c) {
+			case '\n':
+				continue;
+			case 't':
+				c = '\t';
+				break;
+			case 'b':
+				c = '\b';
+				break;
+			case 'n':
+				c = '\n';
+				break;
+			/* Some characters escape as themselves */
+			case '\\': case '"':
+				break;
+			/* Reject unknown escape sequences */
+			default:
+				return NULL;
+			}
+			value[len++] = c;
+			continue;
+		}
+		if (c == '"') {
+			quote = 1-quote;
+			continue;
+		}
+		value[len++] = c;
+	}
+}
+
+static inline int iskeychar(int c)
+{
+	return isalnum(c) || c == '-';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+	int c;
+	char *value;
+
+	/* Get the full name */
+	for (;;) {
+		c = get_next_char();
+		if (config_file_eof)
+			break;
+		if (!iskeychar(c))
+			break;
+		name[len++] = tolower(c);
+		if (len >= MAXNAME)
+			return -1;
+	}
+	name[len] = 0;
+	while (c == ' ' || c == '\t')
+		c = get_next_char();
+
+	value = NULL;
+	if (c != '\n') {
+		if (c != '=')
+			return -1;
+		value = parse_value();
+		if (!value)
+			return -1;
+	}
+	return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+	do {
+		if (c == '\n')
+			return -1;
+		c = get_next_char();
+	} while (isspace(c));
+
+	/* We require the format to be '[base "extension"]' */
+	if (c != '"')
+		return -1;
+	name[baselen++] = '.';
+
+	for (;;) {
+		int c = get_next_char();
+		if (c == '\n')
+			return -1;
+		if (c == '"')
+			break;
+		if (c == '\\') {
+			c = get_next_char();
+			if (c == '\n')
+				return -1;
+		}
+		name[baselen++] = c;
+		if (baselen > MAXNAME / 2)
+			return -1;
+	}
+
+	/* Final ']' */
+	if (get_next_char() != ']')
+		return -1;
+	return baselen;
+}
+
+static int get_base_var(char *name)
+{
+	int baselen = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (config_file_eof)
+			return -1;
+		if (c == ']')
+			return baselen;
+		if (isspace(c))
+			return get_extended_base_var(name, baselen, c);
+		if (!iskeychar(c) && c != '.')
+			return -1;
+		if (baselen > MAXNAME / 2)
+			return -1;
+		name[baselen++] = tolower(c);
+	}
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+	int comment = 0;
+	int baselen = 0;
+	static char var[MAXNAME];
+
+	/* U+FEFF Byte Order Mark in UTF8 */
+	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+	const unsigned char *bomptr = utf8_bom;
+
+	for (;;) {
+		int c = get_next_char();
+		if (bomptr && *bomptr) {
+			/* We are at the file beginning; skip UTF8-encoded BOM
+			 * if present. Sane editors won't put this in on their
+			 * own, but e.g. Windows Notepad will do it happily. */
+			if ((unsigned char) c == *bomptr) {
+				bomptr++;
+				continue;
+			} else {
+				/* Do not tolerate partial BOM. */
+				if (bomptr != utf8_bom)
+					break;
+				/* No BOM at file beginning. Cool. */
+				bomptr = NULL;
+			}
+		}
+		if (c == '\n') {
+			if (config_file_eof)
+				return 0;
+			comment = 0;
+			continue;
+		}
+		if (comment || isspace(c))
+			continue;
+		if (c == '#' || c == ';') {
+			comment = 1;
+			continue;
+		}
+		if (c == '[') {
+			baselen = get_base_var(var);
+			if (baselen <= 0)
+				break;
+			var[baselen++] = '.';
+			var[baselen] = 0;
+			continue;
+		}
+		if (!isalpha(c))
+			break;
+		var[baselen] = tolower(c);
+		if (get_value(fn, data, var, baselen+1) < 0)
+			break;
+	}
+	die("bad config file line %d in %s", config_linenr, config_file_name);
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+	if (!*end)
+		return 1;
+	else if (!strcasecmp(end, "k")) {
+		*val *= 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "m")) {
+		*val *= 1024 * 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "g")) {
+		*val *= 1024 * 1024 * 1024;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long val = strtol(value, &end, 0);
+		unsigned long factor = 1;
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+int perf_parse_ulong(const char *value, unsigned long *ret)
+{
+	if (value && *value) {
+		char *end;
+		unsigned long val = strtoul(value, &end, 0);
+		if (!parse_unit_factor(end, &val))
+			return 0;
+		*ret = val;
+		return 1;
+	}
+	return 0;
+}
+
+static void die_bad_config(const char *name)
+{
+	if (config_file_name)
+		die("bad config value for '%s' in %s", name, config_file_name);
+	die("bad config value for '%s'", name);
+}
+
+int perf_config_int(const char *name, const char *value)
+{
+	long ret = 0;
+	if (!perf_parse_long(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+unsigned long perf_config_ulong(const char *name, const char *value)
+{
+	unsigned long ret;
+	if (!perf_parse_ulong(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+	*is_bool = 1;
+	if (!value)
+		return 1;
+	if (!*value)
+		return 0;
+	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+		return 1;
+	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+		return 0;
+	*is_bool = 0;
+	return perf_config_int(name, value);
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+	int discard;
+	return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+int perf_config_string(const char **dest, const char *var, const char *value)
+{
+	if (!value)
+		return config_error_nonbool(var);
+	*dest = strdup(value);
+	return 0;
+}
+
+static int perf_default_core_config(const char *var, const char *value)
+{
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_default_config(const char *var, const char *value, void *dummy)
+{
+	if (!prefixcmp(var, "core."))
+		return perf_default_core_config(var, value);
+
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+	int ret;
+	FILE *f = fopen(filename, "r");
+
+	ret = -1;
+	if (f) {
+		config_file = f;
+		config_file_name = filename;
+		config_linenr = 1;
+		config_file_eof = 0;
+		ret = perf_parse_file(fn, data);
+		fclose(f);
+		config_file_name = NULL;
+	}
+	return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+	static const char *system_wide;
+	if (!system_wide)
+		system_wide = system_path(ETC_PERFCONFIG);
+	return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+	const char *v = getenv(k);
+	return v ? perf_config_bool(k, v) : def;
+}
+
+int perf_config_system(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+int perf_config_global(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+	int ret = 0, found = 0;
+	char *repo_config = NULL;
+	const char *home = NULL;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(fn, config_exclusive_filename, data);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
+					    data);
+		found += 1;
+	}
+
+	home = getenv("HOME");
+	if (perf_config_global() && home) {
+		char *user_config = strdup(mkpath("%s/.perfconfig", home));
+		if (!access(user_config, R_OK)) {
+			ret += perf_config_from_file(fn, user_config, data);
+			found += 1;
+		}
+		free(user_config);
+	}
+
+	repo_config = perf_pathdup("config");
+	if (!access(repo_config, R_OK)) {
+		ret += perf_config_from_file(fn, repo_config, data);
+		found += 1;
+	}
+	free(repo_config);
+	if (found == 0)
+		return -1;
+	return ret;
+}
+
+/*
+ * Find all the stuff for perf_config_set() below.
+ */
+
+#define MAX_MATCHES 512
+
+static struct {
+	int baselen;
+	char* key;
+	int do_not_match;
+	regex_t* value_regex;
+	int multi_replace;
+	size_t offset[MAX_MATCHES];
+	enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state;
+	int seen;
+} store;
+
+static int matches(const char* key, const char* value)
+{
+	return !strcmp(key, store.key) &&
+		(store.value_regex == NULL ||
+		 (store.do_not_match ^
+		  !regexec(store.value_regex, value, 0, NULL, 0)));
+}
+
+static int store_aux(const char* key, const char* value, void *cb)
+{
+	const char *ep;
+	size_t section_len;
+
+	switch (store.state) {
+	case KEY_SEEN:
+		if (matches(key, value)) {
+			if (store.seen == 1 && store.multi_replace == 0) {
+				warning("%s has multiple values", key);
+			} else if (store.seen >= MAX_MATCHES) {
+				error("too many matches for %s", key);
+				return 1;
+			}
+
+			store.offset[store.seen] = ftell(config_file);
+			store.seen++;
+		}
+		break;
+	case SECTION_SEEN:
+		/*
+		 * What we are looking for is in store.key (both
+		 * section and var), and its section part is baselen
+		 * long.  We found key (again, both section and var).
+		 * We would want to know if this key is in the same
+		 * section as what we are looking for.  We already
+		 * know we are in the same section as what should
+		 * hold store.key.
+		 */
+		ep = strrchr(key, '.');
+		section_len = ep - key;
+
+		if ((section_len != store.baselen) ||
+		    memcmp(key, store.key, section_len+1)) {
+			store.state = SECTION_END_SEEN;
+			break;
+		}
+
+		/*
+		 * Do not increment matches: this is no match, but we
+		 * just made sure we are in the desired section.
+		 */
+		store.offset[store.seen] = ftell(config_file);
+		/* fallthru */
+	case SECTION_END_SEEN:
+	case START:
+		if (matches(key, value)) {
+			store.offset[store.seen] = ftell(config_file);
+			store.state = KEY_SEEN;
+			store.seen++;
+		} else {
+			if (strrchr(key, '.') - key == store.baselen &&
+			      !strncmp(key, store.key, store.baselen)) {
+					store.state = SECTION_SEEN;
+					store.offset[store.seen] = ftell(config_file);
+			}
+		}
+	}
+	return 0;
+}
+
+static int write_error(const char *filename)
+{
+	error("failed to write new configuration file %s", filename);
+
+	/* Same error code as "failed to rename". */
+	return 4;
+}
+
+static int store_write_section(int fd, const char* key)
+{
+	const char *dot;
+	int i, success;
+	struct strbuf sb = STRBUF_INIT;
+
+	dot = memchr(key, '.', store.baselen);
+	if (dot) {
+		strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key);
+		for (i = dot - key + 1; i < store.baselen; i++) {
+			if (key[i] == '"' || key[i] == '\\')
+				strbuf_addch(&sb, '\\');
+			strbuf_addch(&sb, key[i]);
+		}
+		strbuf_addstr(&sb, "\"]\n");
+	} else {
+		strbuf_addf(&sb, "[%.*s]\n", store.baselen, key);
+	}
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static int store_write_pair(int fd, const char* key, const char* value)
+{
+	int i, success;
+	int length = strlen(key + store.baselen + 1);
+	const char *quote = "";
+	struct strbuf sb = STRBUF_INIT;
+
+	/*
+	 * Check to see if the value needs to be surrounded with a dq pair.
+	 * Note that problematic characters are always backslash-quoted; this
+	 * check is about not losing leading or trailing SP and strings that
+	 * follow beginning-of-comment characters (i.e. ';' and '#') by the
+	 * configuration parser.
+	 */
+	if (value[0] == ' ')
+		quote = "\"";
+	for (i = 0; value[i]; i++)
+		if (value[i] == ';' || value[i] == '#')
+			quote = "\"";
+	if (i && value[i - 1] == ' ')
+		quote = "\"";
+
+	strbuf_addf(&sb, "\t%.*s = %s",
+		    length, key + store.baselen + 1, quote);
+
+	for (i = 0; value[i]; i++)
+		switch (value[i]) {
+		case '\n':
+			strbuf_addstr(&sb, "\\n");
+			break;
+		case '\t':
+			strbuf_addstr(&sb, "\\t");
+			break;
+		case '"':
+		case '\\':
+			strbuf_addch(&sb, '\\');
+		default:
+			strbuf_addch(&sb, value[i]);
+			break;
+		}
+	strbuf_addf(&sb, "%s\n", quote);
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static ssize_t find_beginning_of_line(const char* contents, size_t size,
+	size_t offset_, int* found_bracket)
+{
+	size_t equal_offset = size, bracket_offset = size;
+	ssize_t offset;
+
+contline:
+	for (offset = offset_-2; offset > 0
+			&& contents[offset] != '\n'; offset--)
+		switch (contents[offset]) {
+			case '=': equal_offset = offset; break;
+			case ']': bracket_offset = offset; break;
+		}
+	if (offset > 0 && contents[offset-1] == '\\') {
+		offset_ = offset;
+		goto contline;
+	}
+	if (bracket_offset < equal_offset) {
+		*found_bracket = 1;
+		offset = bracket_offset+1;
+	} else
+		offset++;
+
+	return offset;
+}
+
+int perf_config_set(const char* key, const char* value)
+{
+	return perf_config_set_multivar(key, value, NULL, 0);
+}
+
+/*
+ * If value==NULL, unset in (remove from) config,
+ * if value_regex!=NULL, disregard key/value pairs where value does not match.
+ * if multi_replace==0, nothing, or only one matching key/value is replaced,
+ *     else all matching key/values (regardless how many) are removed,
+ *     before the new pair is written.
+ *
+ * Returns 0 on success.
+ *
+ * This function does this:
+ *
+ * - it locks the config file by creating ".perf/config.lock"
+ *
+ * - it then parses the config using store_aux() as validator to find
+ *   the position on the key/value pair to replace. If it is to be unset,
+ *   it must be found exactly once.
+ *
+ * - the config file is mmap()ed and the part before the match (if any) is
+ *   written to the lock file, then the changed part and the rest.
+ *
+ * - the config file is removed and the lock file rename()d to it.
+ *
+ */
+int perf_config_set_multivar(const char* key, const char* value,
+	const char* value_regex, int multi_replace)
+{
+	int i, dot;
+	int fd = -1, in_fd;
+	int ret;
+	char* config_filename;
+	const char* last_dot = strrchr(key, '.');
+
+	if (config_exclusive_filename)
+		config_filename = strdup(config_exclusive_filename);
+	else
+		config_filename = perf_pathdup("config");
+
+	/*
+	 * Since "key" actually contains the section name and the real
+	 * key name separated by a dot, we have to know where the dot is.
+	 */
+
+	if (last_dot == NULL) {
+		error("key does not contain a section: %s", key);
+		ret = 2;
+		goto out_free;
+	}
+	store.baselen = last_dot - key;
+
+	store.multi_replace = multi_replace;
+
+	/*
+	 * Validate the key and while at it, lower case it for matching.
+	 */
+	store.key = malloc(strlen(key) + 1);
+	dot = 0;
+	for (i = 0; key[i]; i++) {
+		unsigned char c = key[i];
+		if (c == '.')
+			dot = 1;
+		/* Leave the extended basename untouched.. */
+		if (!dot || i > store.baselen) {
+			if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) {
+				error("invalid key: %s", key);
+				free(store.key);
+				ret = 1;
+				goto out_free;
+			}
+			c = tolower(c);
+		} else if (c == '\n') {
+			error("invalid key (newline): %s", key);
+			free(store.key);
+			ret = 1;
+			goto out_free;
+		}
+		store.key[i] = c;
+	}
+	store.key[i] = 0;
+
+	/*
+	 * If .perf/config does not exist yet, write a minimal version.
+	 */
+	in_fd = open(config_filename, O_RDONLY);
+	if ( in_fd < 0 ) {
+		free(store.key);
+
+		if ( ENOENT != errno ) {
+			error("opening %s: %s", config_filename,
+			      strerror(errno));
+			ret = 3; /* same as "invalid config file" */
+			goto out_free;
+		}
+		/* if nothing to unset, error out */
+		if (value == NULL) {
+			ret = 5;
+			goto out_free;
+		}
+
+		store.key = (char*)key;
+		if (!store_write_section(fd, key) ||
+		    !store_write_pair(fd, key, value))
+			goto write_err_out;
+	} else {
+		struct stat st;
+		char* contents;
+		size_t contents_sz, copy_begin, copy_end;
+		int i, new_line = 0;
+
+		if (value_regex == NULL)
+			store.value_regex = NULL;
+		else {
+			if (value_regex[0] == '!') {
+				store.do_not_match = 1;
+				value_regex++;
+			} else
+				store.do_not_match = 0;
+
+			store.value_regex = (regex_t*)malloc(sizeof(regex_t));
+			if (regcomp(store.value_regex, value_regex,
+					REG_EXTENDED)) {
+				error("invalid pattern: %s", value_regex);
+				free(store.value_regex);
+				ret = 6;
+				goto out_free;
+			}
+		}
+
+		store.offset[0] = 0;
+		store.state = START;
+		store.seen = 0;
+
+		/*
+		 * After this, store.offset will contain the *end* offset
+		 * of the last match, or remain at 0 if no match was found.
+		 * As a side effect, we make sure to transform only a valid
+		 * existing config file.
+		 */
+		if (perf_config_from_file(store_aux, config_filename, NULL)) {
+			error("invalid config file %s", config_filename);
+			free(store.key);
+			if (store.value_regex != NULL) {
+				regfree(store.value_regex);
+				free(store.value_regex);
+			}
+			ret = 3;
+			goto out_free;
+		}
+
+		free(store.key);
+		if (store.value_regex != NULL) {
+			regfree(store.value_regex);
+			free(store.value_regex);
+		}
+
+		/* if nothing to unset, or too many matches, error out */
+		if ((store.seen == 0 && value == NULL) ||
+				(store.seen > 1 && multi_replace == 0)) {
+			ret = 5;
+			goto out_free;
+		}
+
+		fstat(in_fd, &st);
+		contents_sz = xsize_t(st.st_size);
+		contents = mmap(NULL, contents_sz, PROT_READ,
+			MAP_PRIVATE, in_fd, 0);
+		close(in_fd);
+
+		if (store.seen == 0)
+			store.seen = 1;
+
+		for (i = 0, copy_begin = 0; i < store.seen; i++) {
+			if (store.offset[i] == 0) {
+				store.offset[i] = copy_end = contents_sz;
+			} else if (store.state != KEY_SEEN) {
+				copy_end = store.offset[i];
+			} else
+				copy_end = find_beginning_of_line(
+					contents, contents_sz,
+					store.offset[i]-2, &new_line);
+
+			if (copy_end > 0 && contents[copy_end-1] != '\n')
+				new_line = 1;
+
+			/* write the first part of the config */
+			if (copy_end > copy_begin) {
+				if (write_in_full(fd, contents + copy_begin,
+						  copy_end - copy_begin) <
+				    copy_end - copy_begin)
+					goto write_err_out;
+				if (new_line &&
+				    write_in_full(fd, "\n", 1) != 1)
+					goto write_err_out;
+			}
+			copy_begin = store.offset[i];
+		}
+
+		/* write the pair (value == NULL means unset) */
+		if (value != NULL) {
+			if (store.state == START) {
+				if (!store_write_section(fd, key))
+					goto write_err_out;
+			}
+			if (!store_write_pair(fd, key, value))
+				goto write_err_out;
+		}
+
+		/* write the rest of the config */
+		if (copy_begin < contents_sz)
+			if (write_in_full(fd, contents + copy_begin,
+					  contents_sz - copy_begin) <
+			    contents_sz - copy_begin)
+				goto write_err_out;
+
+		munmap(contents, contents_sz);
+	}
+
+	ret = 0;
+
+out_free:
+	free(config_filename);
+	return ret;
+
+write_err_out:
+	goto out_free;
+
+}
+
+static int section_name_match (const char *buf, const char *name)
+{
+	int i = 0, j = 0, dot = 0;
+	for (; buf[i] && buf[i] != ']'; i++) {
+		if (!dot && isspace(buf[i])) {
+			dot = 1;
+			if (name[j++] != '.')
+				break;
+			for (i++; isspace(buf[i]); i++)
+				; /* do nothing */
+			if (buf[i] != '"')
+				break;
+			continue;
+		}
+		if (buf[i] == '\\' && dot)
+			i++;
+		else if (buf[i] == '"' && dot) {
+			for (i++; isspace(buf[i]); i++)
+				; /* do_nothing */
+			break;
+		}
+		if (buf[i] != name[j++])
+			break;
+	}
+	return (buf[i] == ']' && name[j] == 0);
+}
+
+/* if new_name == NULL, the section is removed instead */
+int perf_config_rename_section(const char *old_name, const char *new_name)
+{
+	int ret = 0, remove = 0;
+	char *config_filename;
+	int out_fd;
+	char buf[1024];
+
+	if (config_exclusive_filename)
+		config_filename = strdup(config_exclusive_filename);
+	else
+		config_filename = perf_pathdup("config");
+	if (out_fd < 0) {
+		ret = error("could not lock config file %s", config_filename);
+		goto out;
+	}
+
+	if (!(config_file = fopen(config_filename, "rb"))) {
+		/* no config file means nothing to rename, no error */
+		goto unlock_and_out;
+	}
+
+	while (fgets(buf, sizeof(buf), config_file)) {
+		int i;
+		int length;
+		for (i = 0; buf[i] && isspace(buf[i]); i++)
+			; /* do nothing */
+		if (buf[i] == '[') {
+			/* it's a section */
+			if (section_name_match (&buf[i+1], old_name)) {
+				ret++;
+				if (new_name == NULL) {
+					remove = 1;
+					continue;
+				}
+				store.baselen = strlen(new_name);
+				if (!store_write_section(out_fd, new_name)) {
+					goto out;
+				}
+				continue;
+			}
+			remove = 0;
+		}
+		if (remove)
+			continue;
+		length = strlen(buf);
+		if (write_in_full(out_fd, buf, length) != length) {
+			goto out;
+		}
+	}
+	fclose(config_file);
+ unlock_and_out:
+ out:
+	free(config_filename);
+	return ret;
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+	return error("Missing value for '%s'", var);
+}
diff --git a/Documentation/perf_counter/ctype.c b/Documentation/perf_counter/ctype.c
new file mode 100644
index 0000000..b90ec00
--- /dev/null
+++ b/Documentation/perf_counter/ctype.c
@@ -0,0 +1,26 @@
+/*
+ * Sane locale-independent, ASCII ctype.
+ *
+ * No surprises, and works with signed and unsigned chars.
+ */
+#include "cache.h"
+
+enum {
+	S = GIT_SPACE,
+	A = GIT_ALPHA,
+	D = GIT_DIGIT,
+	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
+	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
+};
+
+unsigned char sane_ctype[256] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
+	S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0,		/*  32.. 47 */
+	D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G,		/*  48.. 63 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
+	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0,		/*  80.. 95 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
+	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0,		/* 112..127 */
+	/* Nothing in the 128.. range */
+};
diff --git a/Documentation/perf_counter/exec_cmd.c b/Documentation/perf_counter/exec_cmd.c
new file mode 100644
index 0000000..d392922
--- /dev/null
+++ b/Documentation/perf_counter/exec_cmd.c
@@ -0,0 +1,165 @@
+#include "cache.h"
+#include "exec_cmd.h"
+#include "quote.h"
+#define MAX_ARGS	32
+
+extern char **environ;
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+const char *system_path(const char *path)
+{
+#ifdef RUNTIME_PREFIX
+	static const char *prefix;
+#else
+	static const char *prefix = PREFIX;
+#endif
+	struct strbuf d = STRBUF_INIT;
+
+	if (is_absolute_path(path))
+		return path;
+
+#ifdef RUNTIME_PREFIX
+	assert(argv0_path);
+	assert(is_absolute_path(argv0_path));
+
+	if (!prefix &&
+	    !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
+	    !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
+	    !(prefix = strip_path_suffix(argv0_path, "perf"))) {
+		prefix = PREFIX;
+		fprintf(stderr, "RUNTIME_PREFIX requested, "
+				"but prefix computation failed.  "
+				"Using static fallback '%s'.\n", prefix);
+	}
+#endif
+
+	strbuf_addf(&d, "%s/%s", prefix, path);
+	path = strbuf_detach(&d, NULL);
+	return path;
+}
+
+const char *perf_extract_argv0_path(const char *argv0)
+{
+	const char *slash;
+
+	if (!argv0 || !*argv0)
+		return NULL;
+	slash = argv0 + strlen(argv0);
+
+	while (argv0 <= slash && !is_dir_sep(*slash))
+		slash--;
+
+	if (slash >= argv0) {
+		argv0_path = strndup(argv0, slash - argv0);
+		return slash + 1;
+	}
+
+	return argv0;
+}
+
+void perf_set_argv_exec_path(const char *exec_path)
+{
+	argv_exec_path = exec_path;
+	/*
+	 * Propagate this setting to external programs.
+	 */
+	setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
+}
+
+
+/* Returns the highest-priority, location to look for perf programs. */
+const char *perf_exec_path(void)
+{
+	const char *env;
+
+	if (argv_exec_path)
+		return argv_exec_path;
+
+	env = getenv(EXEC_PATH_ENVIRONMENT);
+	if (env && *env) {
+		return env;
+	}
+
+	return system_path(PERF_EXEC_PATH);
+}
+
+static void add_path(struct strbuf *out, const char *path)
+{
+	if (path && *path) {
+		if (is_absolute_path(path))
+			strbuf_addstr(out, path);
+		else
+			strbuf_addstr(out, make_nonrelative_path(path));
+
+		strbuf_addch(out, PATH_SEP);
+	}
+}
+
+void setup_path(void)
+{
+	const char *old_path = getenv("PATH");
+	struct strbuf new_path = STRBUF_INIT;
+
+	add_path(&new_path, perf_exec_path());
+	add_path(&new_path, argv0_path);
+
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+	else
+		strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+	setenv("PATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+const char **prepare_perf_cmd(const char **argv)
+{
+	int argc;
+	const char **nargv;
+
+	for (argc = 0; argv[argc]; argc++)
+		; /* just counting */
+	nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+	nargv[0] = "perf";
+	for (argc = 0; argv[argc]; argc++)
+		nargv[argc + 1] = argv[argc];
+	nargv[argc + 1] = NULL;
+	return nargv;
+}
+
+int execv_perf_cmd(const char **argv) {
+	const char **nargv = prepare_perf_cmd(argv);
+
+	/* execvp() can only ever return if it fails */
+	execvp("perf", (char **)nargv);
+
+	free(nargv);
+	return -1;
+}
+
+
+int execl_perf_cmd(const char *cmd,...)
+{
+	int argc;
+	const char *argv[MAX_ARGS + 1];
+	const char *arg;
+	va_list param;
+
+	va_start(param, cmd);
+	argv[0] = cmd;
+	argc = 1;
+	while (argc < MAX_ARGS) {
+		arg = argv[argc++] = va_arg(param, char *);
+		if (!arg)
+			break;
+	}
+	va_end(param);
+	if (MAX_ARGS <= argc)
+		return error("too many args to run %s", cmd);
+
+	argv[argc] = NULL;
+	return execv_perf_cmd(argv);
+}
diff --git a/Documentation/perf_counter/exec_cmd.h b/Documentation/perf_counter/exec_cmd.h
new file mode 100644
index 0000000..effe25e
--- /dev/null
+++ b/Documentation/perf_counter/exec_cmd.h
@@ -0,0 +1,13 @@
+#ifndef PERF_EXEC_CMD_H
+#define PERF_EXEC_CMD_H
+
+extern void perf_set_argv_exec_path(const char *exec_path);
+extern const char *perf_extract_argv0_path(const char *path);
+extern const char *perf_exec_path(void);
+extern void setup_path(void);
+extern const char **prepare_perf_cmd(const char **argv);
+extern int execv_perf_cmd(const char **argv); /* NULL terminated */
+extern int execl_perf_cmd(const char *cmd, ...);
+extern const char *system_path(const char *path);
+
+#endif /* PERF_EXEC_CMD_H */
diff --git a/Documentation/perf_counter/generate-cmdlist.sh b/Documentation/perf_counter/generate-cmdlist.sh
new file mode 100755
index 0000000..75c68d9
--- /dev/null
+++ b/Documentation/perf_counter/generate-cmdlist.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+    char name[16];
+    char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^git-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/git-'"$cmd"'/H
+     ${
+            x
+            s/.*git-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/git-$cmd.txt"
+done
+echo "};"
diff --git a/Documentation/perf_counter/help.c b/Documentation/perf_counter/help.c
new file mode 100644
index 0000000..ec01167
--- /dev/null
+++ b/Documentation/perf_counter/help.c
@@ -0,0 +1,366 @@
+#include "cache.h"
+#include "builtin.h"
+#include "exec_cmd.h"
+#include "levenshtein.h"
+#include "help.h"
+
+/* most GUI terminals set COLUMNS (although some don't export it) */
+static int term_columns(void)
+{
+	char *col_string = getenv("COLUMNS");
+	int n_cols;
+
+	if (col_string && (n_cols = atoi(col_string)) > 0)
+		return n_cols;
+
+#ifdef TIOCGWINSZ
+	{
+		struct winsize ws;
+		if (!ioctl(1, TIOCGWINSZ, &ws)) {
+			if (ws.ws_col)
+				return ws.ws_col;
+		}
+	}
+#endif
+
+	return 80;
+}
+
+void add_cmdname(struct cmdnames *cmds, const char *name, int len)
+{
+	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+
+	ent->len = len;
+	memcpy(ent->name, name, len);
+	ent->name[len] = 0;
+
+	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
+	cmds->names[cmds->cnt++] = ent;
+}
+
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+	int i;
+	for (i = 0; i < cmds->cnt; ++i)
+		free(cmds->names[i]);
+	free(cmds->names);
+	cmds->cnt = 0;
+	cmds->alloc = 0;
+}
+
+static int cmdname_compare(const void *a_, const void *b_)
+{
+	struct cmdname *a = *(struct cmdname **)a_;
+	struct cmdname *b = *(struct cmdname **)b_;
+	return strcmp(a->name, b->name);
+}
+
+static void uniq(struct cmdnames *cmds)
+{
+	int i, j;
+
+	if (!cmds->cnt)
+		return;
+
+	for (i = j = 1; i < cmds->cnt; i++)
+		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			cmds->names[j++] = cmds->names[i];
+
+	cmds->cnt = j;
+}
+
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+{
+	int ci, cj, ei;
+	int cmp;
+
+	ci = cj = ei = 0;
+	while (ci < cmds->cnt && ei < excludes->cnt) {
+		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
+		if (cmp < 0)
+			cmds->names[cj++] = cmds->names[ci++];
+		else if (cmp == 0)
+			ci++, ei++;
+		else if (cmp > 0)
+			ei++;
+	}
+
+	while (ci < cmds->cnt)
+		cmds->names[cj++] = cmds->names[ci++];
+
+	cmds->cnt = cj;
+}
+
+static void pretty_print_string_list(struct cmdnames *cmds, int longest)
+{
+	int cols = 1, rows;
+	int space = longest + 1; /* min 1 SP between words */
+	int max_cols = term_columns() - 1; /* don't print *on* the edge */
+	int i, j;
+
+	if (space < max_cols)
+		cols = max_cols / space;
+	rows = (cmds->cnt + cols - 1) / cols;
+
+	for (i = 0; i < rows; i++) {
+		printf("  ");
+
+		for (j = 0; j < cols; j++) {
+			int n = j * rows + i;
+			int size = space;
+			if (n >= cmds->cnt)
+				break;
+			if (j == cols-1 || n + rows >= cmds->cnt)
+				size = 1;
+			printf("%-*s", size, cmds->names[n]->name);
+		}
+		putchar('\n');
+	}
+}
+
+static int is_executable(const char *name)
+{
+	struct stat st;
+
+	if (stat(name, &st) || /* stat, not lstat */
+	    !S_ISREG(st.st_mode))
+		return 0;
+
+#ifdef __MINGW32__
+	/* cannot trust the executable bit, peek into the file instead */
+	char buf[3] = { 0 };
+	int n;
+	int fd = open(name, O_RDONLY);
+	st.st_mode &= ~S_IXUSR;
+	if (fd >= 0) {
+		n = read(fd, buf, 2);
+		if (n == 2)
+			/* DOS executables start with "MZ" */
+			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
+				st.st_mode |= S_IXUSR;
+		close(fd);
+	}
+#endif
+	return st.st_mode & S_IXUSR;
+}
+
+static void list_commands_in_dir(struct cmdnames *cmds,
+					 const char *path,
+					 const char *prefix)
+{
+	int prefix_len;
+	DIR *dir = opendir(path);
+	struct dirent *de;
+	struct strbuf buf = STRBUF_INIT;
+	int len;
+
+	if (!dir)
+		return;
+	if (!prefix)
+		prefix = "perf-";
+	prefix_len = strlen(prefix);
+
+	strbuf_addf(&buf, "%s/", path);
+	len = buf.len;
+
+	while ((de = readdir(dir)) != NULL) {
+		int entlen;
+
+		if (prefixcmp(de->d_name, prefix))
+			continue;
+
+		strbuf_setlen(&buf, len);
+		strbuf_addstr(&buf, de->d_name);
+		if (!is_executable(buf.buf))
+			continue;
+
+		entlen = strlen(de->d_name) - prefix_len;
+		if (has_extension(de->d_name, ".exe"))
+			entlen -= 4;
+
+		add_cmdname(cmds, de->d_name + prefix_len, entlen);
+	}
+	closedir(dir);
+	strbuf_release(&buf);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds)
+{
+	const char *env_path = getenv("PATH");
+	const char *exec_path = perf_exec_path();
+
+	if (exec_path) {
+		list_commands_in_dir(main_cmds, exec_path, prefix);
+		qsort(main_cmds->names, main_cmds->cnt,
+		      sizeof(*main_cmds->names), cmdname_compare);
+		uniq(main_cmds);
+	}
+
+	if (env_path) {
+		char *paths, *path, *colon;
+		path = paths = strdup(env_path);
+		while (1) {
+			if ((colon = strchr(path, PATH_SEP)))
+				*colon = 0;
+			if (!exec_path || strcmp(path, exec_path))
+				list_commands_in_dir(other_cmds, path, prefix);
+
+			if (!colon)
+				break;
+			path = colon + 1;
+		}
+		free(paths);
+
+		qsort(other_cmds->names, other_cmds->cnt,
+		      sizeof(*other_cmds->names), cmdname_compare);
+		uniq(other_cmds);
+	}
+	exclude_cmds(other_cmds, main_cmds);
+}
+
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < main_cmds->cnt; i++)
+		if (longest < main_cmds->names[i]->len)
+			longest = main_cmds->names[i]->len;
+	for (i = 0; i < other_cmds->cnt; i++)
+		if (longest < other_cmds->names[i]->len)
+			longest = other_cmds->names[i]->len;
+
+	if (main_cmds->cnt) {
+		const char *exec_path = perf_exec_path();
+		printf("available %s in '%s'\n", title, exec_path);
+		printf("----------------");
+		mput_char('-', strlen(title) + strlen(exec_path));
+		putchar('\n');
+		pretty_print_string_list(main_cmds, longest);
+		putchar('\n');
+	}
+
+	if (other_cmds->cnt) {
+		printf("%s available from elsewhere on your $PATH\n", title);
+		printf("---------------------------------------");
+		mput_char('-', strlen(title));
+		putchar('\n');
+		pretty_print_string_list(other_cmds, longest);
+		putchar('\n');
+	}
+}
+
+int is_in_cmdlist(struct cmdnames *c, const char *s)
+{
+	int i;
+	for (i = 0; i < c->cnt; i++)
+		if (!strcmp(s, c->names[i]->name))
+			return 1;
+	return 0;
+}
+
+static int autocorrect;
+static struct cmdnames aliases;
+
+static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.autocorrect"))
+		autocorrect = perf_config_int(var,value);
+	/* Also use aliases for command lookup */
+	if (!prefixcmp(var, "alias."))
+		add_cmdname(&aliases, var + 6, strlen(var + 6));
+
+	return perf_default_config(var, value, cb);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+	int i;
+	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
+
+	for (i = 0; i < old->cnt; i++)
+		cmds->names[cmds->cnt++] = old->names[i];
+	free(old->names);
+	old->cnt = 0;
+	old->names = NULL;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+	int i, n, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+	memset(&aliases, 0, sizeof(aliases));
+
+	perf_config(perf_unknown_cmd_config, NULL);
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	add_cmd_list(&main_cmds, &aliases);
+	add_cmd_list(&main_cmds, &other_cmds);
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(main_cmds.names), cmdname_compare);
+	uniq(&main_cmds);
+
+	/* This reuses cmdname->len for similarity index */
+	for (i = 0; i < main_cmds.cnt; ++i)
+		main_cmds.names[i]->len =
+			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(*main_cmds.names), levenshtein_compare);
+
+	if (!main_cmds.cnt)
+		die ("Uh oh. Your system reports no Git commands at all.");
+
+	best_similarity = main_cmds.names[0]->len;
+	n = 1;
+	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+		++n;
+	if (autocorrect && n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a Git program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		if (autocorrect > 0) {
+			fprintf(stderr, "in %0.1f seconds automatically...\n",
+				(float)autocorrect/10.0);
+			poll(NULL, 0, autocorrect * 100);
+		}
+		return assumed;
+	}
+
+	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+	if (best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+
+	exit(1);
+}
+
+int cmd_version(int argc, const char **argv, const char *prefix)
+{
+	printf("perf version %s\n", perf_version_string);
+	return 0;
+}
diff --git a/Documentation/perf_counter/help.h b/Documentation/perf_counter/help.h
new file mode 100644
index 0000000..56bc154
--- /dev/null
+++ b/Documentation/perf_counter/help.h
@@ -0,0 +1,29 @@
+#ifndef HELP_H
+#define HELP_H
+
+struct cmdnames {
+	int alloc;
+	int cnt;
+	struct cmdname {
+		size_t len; /* also used for similarity index in help.c */
+		char name[FLEX_ARRAY];
+	} **names;
+};
+
+static inline void mput_char(char c, unsigned int num)
+{
+	while(num--)
+		putchar(c);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds);
+void add_cmdname(struct cmdnames *cmds, const char *name, int len);
+/* Here we require that excludes is a sorted list. */
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
+int is_in_cmdlist(struct cmdnames *c, const char *s);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds);
+
+#endif /* HELP_H */
diff --git a/Documentation/perf_counter/levenshtein.c b/Documentation/perf_counter/levenshtein.c
new file mode 100644
index 0000000..e521d15
--- /dev/null
+++ b/Documentation/perf_counter/levenshtein.c
@@ -0,0 +1,84 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings.  To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substition, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = malloc(sizeof(int) * (len2 + 1));
+	int *row1 = malloc(sizeof(int) * (len2 + 1));
+	int *row2 = malloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/Documentation/perf_counter/levenshtein.h b/Documentation/perf_counter/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/Documentation/perf_counter/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif
diff --git a/Documentation/perf_counter/parse-options.c b/Documentation/perf_counter/parse-options.c
new file mode 100644
index 0000000..7464f34
--- /dev/null
+++ b/Documentation/perf_counter/parse-options.c
@@ -0,0 +1,495 @@
+#include "util.h"
+#include "parse-options.h"
+#include "cache.h"
+
+#define OPT_SHORT 1
+#define OPT_UNSET 2
+
+static int opterror(const struct option *opt, const char *reason, int flags)
+{
+	if (flags & OPT_SHORT)
+		return error("switch `%c' %s", opt->short_name, reason);
+	if (flags & OPT_UNSET)
+		return error("option `no-%s' %s", opt->long_name, reason);
+	return error("option `%s' %s", opt->long_name, reason);
+}
+
+static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
+		   int flags, const char **arg)
+{
+	if (p->opt) {
+		*arg = p->opt;
+		p->opt = NULL;
+	} else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) {
+		*arg = (const char *)opt->defval;
+	} else if (p->argc > 1) {
+		p->argc--;
+		*arg = *++p->argv;
+	} else
+		return opterror(opt, "requires a value", flags);
+	return 0;
+}
+
+static int get_value(struct parse_opt_ctx_t *p,
+		     const struct option *opt, int flags)
+{
+	const char *s, *arg;
+	const int unset = flags & OPT_UNSET;
+
+	if (unset && p->opt)
+		return opterror(opt, "takes no value", flags);
+	if (unset && (opt->flags & PARSE_OPT_NONEG))
+		return opterror(opt, "isn't available", flags);
+
+	if (!(flags & OPT_SHORT) && p->opt) {
+		switch (opt->type) {
+		case OPTION_CALLBACK:
+			if (!(opt->flags & PARSE_OPT_NOARG))
+				break;
+			/* FALLTHROUGH */
+		case OPTION_BOOLEAN:
+		case OPTION_BIT:
+		case OPTION_SET_INT:
+		case OPTION_SET_PTR:
+			return opterror(opt, "takes no value", flags);
+		default:
+			break;
+		}
+	}
+
+	switch (opt->type) {
+	case OPTION_BIT:
+		if (unset)
+			*(int *)opt->value &= ~opt->defval;
+		else
+			*(int *)opt->value |= opt->defval;
+		return 0;
+
+	case OPTION_BOOLEAN:
+		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
+		return 0;
+
+	case OPTION_SET_INT:
+		*(int *)opt->value = unset ? 0 : opt->defval;
+		return 0;
+
+	case OPTION_SET_PTR:
+		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
+		return 0;
+
+	case OPTION_STRING:
+		if (unset)
+			*(const char **)opt->value = NULL;
+		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			*(const char **)opt->value = (const char *)opt->defval;
+		else
+			return get_arg(p, opt, flags, (const char **)opt->value);
+		return 0;
+
+	case OPTION_CALLBACK:
+		if (unset)
+			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_NOARG)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
+
+	case OPTION_INTEGER:
+		if (unset) {
+			*(int *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(int *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(int *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	default:
+		die("should not happen, someone must be hit on the forehead");
+	}
+}
+
+static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
+{
+	for (; options->type != OPTION_END; options++) {
+		if (options->short_name == *p->opt) {
+			p->opt = p->opt[1] ? p->opt + 1 : NULL;
+			return get_value(p, options, OPT_SHORT);
+		}
+	}
+	return -2;
+}
+
+static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
+                          const struct option *options)
+{
+	const char *arg_end = strchr(arg, '=');
+	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
+	int abbrev_flags = 0, ambiguous_flags = 0;
+
+	if (!arg_end)
+		arg_end = arg + strlen(arg);
+
+	for (; options->type != OPTION_END; options++) {
+		const char *rest;
+		int flags = 0;
+
+		if (!options->long_name)
+			continue;
+
+		rest = skip_prefix(arg, options->long_name);
+		if (options->type == OPTION_ARGUMENT) {
+			if (!rest)
+				continue;
+			if (*rest == '=')
+				return opterror(options, "takes no value", flags);
+			if (*rest)
+				continue;
+			p->out[p->cpidx++] = arg - 2;
+			return 0;
+		}
+		if (!rest) {
+			/* abbreviated? */
+			if (!strncmp(options->long_name, arg, arg_end - arg)) {
+is_abbreviated:
+				if (abbrev_option) {
+					/*
+					 * If this is abbreviated, it is
+					 * ambiguous. So when there is no
+					 * exact match later, we need to
+					 * error out.
+					 */
+					ambiguous_option = abbrev_option;
+					ambiguous_flags = abbrev_flags;
+				}
+				if (!(flags & OPT_UNSET) && *arg_end)
+					p->opt = arg_end + 1;
+				abbrev_option = options;
+				abbrev_flags = flags;
+				continue;
+			}
+			/* negated and abbreviated very much? */
+			if (!prefixcmp("no-", arg)) {
+				flags |= OPT_UNSET;
+				goto is_abbreviated;
+			}
+			/* negated? */
+			if (strncmp(arg, "no-", 3))
+				continue;
+			flags |= OPT_UNSET;
+			rest = skip_prefix(arg + 3, options->long_name);
+			/* abbreviated and negated? */
+			if (!rest && !prefixcmp(options->long_name, arg + 3))
+				goto is_abbreviated;
+			if (!rest)
+				continue;
+		}
+		if (*rest) {
+			if (*rest != '=')
+				continue;
+			p->opt = rest + 1;
+		}
+		return get_value(p, options, flags);
+	}
+
+	if (ambiguous_option)
+		return error("Ambiguous option: %s "
+			"(could be --%s%s or --%s%s)",
+			arg,
+			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
+			ambiguous_option->long_name,
+			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
+			abbrev_option->long_name);
+	if (abbrev_option)
+		return get_value(p, abbrev_option, abbrev_flags);
+	return -2;
+}
+
+static void check_typos(const char *arg, const struct option *options)
+{
+	if (strlen(arg) < 3)
+		return;
+
+	if (!prefixcmp(arg, "no-")) {
+		error ("did you mean `--%s` (with two dashes ?)", arg);
+		exit(129);
+	}
+
+	for (; options->type != OPTION_END; options++) {
+		if (!options->long_name)
+			continue;
+		if (!prefixcmp(options->long_name, arg)) {
+			error ("did you mean `--%s` (with two dashes ?)", arg);
+			exit(129);
+		}
+	}
+}
+
+void parse_options_start(struct parse_opt_ctx_t *ctx,
+			 int argc, const char **argv, int flags)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->argc = argc - 1;
+	ctx->argv = argv + 1;
+	ctx->out  = argv;
+	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
+	ctx->flags = flags;
+	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
+	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
+		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
+}
+
+static int usage_with_options_internal(const char * const *,
+				       const struct option *, int);
+
+int parse_options_step(struct parse_opt_ctx_t *ctx,
+		       const struct option *options,
+		       const char * const usagestr[])
+{
+	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
+
+	/* we must reset ->opt, unknown short option leave it dangling */
+	ctx->opt = NULL;
+
+	for (; ctx->argc; ctx->argc--, ctx->argv++) {
+		const char *arg = ctx->argv[0];
+
+		if (*arg != '-' || !arg[1]) {
+			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
+				break;
+			ctx->out[ctx->cpidx++] = ctx->argv[0];
+			continue;
+		}
+
+		if (arg[1] != '-') {
+			ctx->opt = arg + 1;
+			if (internal_help && *ctx->opt == 'h')
+				return parse_options_usage(usagestr, options);
+			switch (parse_short_opt(ctx, options)) {
+			case -1:
+				return parse_options_usage(usagestr, options);
+			case -2:
+				goto unknown;
+			}
+			if (ctx->opt)
+				check_typos(arg + 1, options);
+			while (ctx->opt) {
+				if (internal_help && *ctx->opt == 'h')
+					return parse_options_usage(usagestr, options);
+				switch (parse_short_opt(ctx, options)) {
+				case -1:
+					return parse_options_usage(usagestr, options);
+				case -2:
+					/* fake a short option thing to hide the fact that we may have
+					 * started to parse aggregated stuff
+					 *
+					 * This is leaky, too bad.
+					 */
+					ctx->argv[0] = strdup(ctx->opt - 1);
+					*(char *)ctx->argv[0] = '-';
+					goto unknown;
+				}
+			}
+			continue;
+		}
+
+		if (!arg[2]) { /* "--" */
+			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
+				ctx->argc--;
+				ctx->argv++;
+			}
+			break;
+		}
+
+		if (internal_help && !strcmp(arg + 2, "help-all"))
+			return usage_with_options_internal(usagestr, options, 1);
+		if (internal_help && !strcmp(arg + 2, "help"))
+			return parse_options_usage(usagestr, options);
+		switch (parse_long_opt(ctx, arg + 2, options)) {
+		case -1:
+			return parse_options_usage(usagestr, options);
+		case -2:
+			goto unknown;
+		}
+		continue;
+unknown:
+		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
+			return PARSE_OPT_UNKNOWN;
+		ctx->out[ctx->cpidx++] = ctx->argv[0];
+		ctx->opt = NULL;
+	}
+	return PARSE_OPT_DONE;
+}
+
+int parse_options_end(struct parse_opt_ctx_t *ctx)
+{
+	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
+	ctx->out[ctx->cpidx + ctx->argc] = NULL;
+	return ctx->cpidx + ctx->argc;
+}
+
+int parse_options(int argc, const char **argv, const struct option *options,
+		  const char * const usagestr[], int flags)
+{
+	struct parse_opt_ctx_t ctx;
+
+	parse_options_start(&ctx, argc, argv, flags);
+	switch (parse_options_step(&ctx, options, usagestr)) {
+	case PARSE_OPT_HELP:
+		exit(129);
+	case PARSE_OPT_DONE:
+		break;
+	default: /* PARSE_OPT_UNKNOWN */
+		if (ctx.argv[0][1] == '-') {
+			error("unknown option `%s'", ctx.argv[0] + 2);
+		} else {
+			error("unknown switch `%c'", *ctx.opt);
+		}
+		usage_with_options(usagestr, options);
+	}
+
+	return parse_options_end(&ctx);
+}
+
+#define USAGE_OPTS_WIDTH 24
+#define USAGE_GAP         2
+
+int usage_with_options_internal(const char * const *usagestr,
+				const struct option *opts, int full)
+{
+	if (!usagestr)
+		return PARSE_OPT_HELP;
+
+	fprintf(stderr, "usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "   or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+
+	if (opts->type != OPTION_GROUP)
+		fputc('\n', stderr);
+
+	for (; opts->type != OPTION_END; opts++) {
+		size_t pos;
+		int pad;
+
+		if (opts->type == OPTION_GROUP) {
+			fputc('\n', stderr);
+			if (*opts->help)
+				fprintf(stderr, "%s\n", opts->help);
+			continue;
+		}
+		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+			continue;
+
+		pos = fprintf(stderr, "    ");
+		if (opts->short_name)
+			pos += fprintf(stderr, "-%c", opts->short_name);
+		if (opts->long_name && opts->short_name)
+			pos += fprintf(stderr, ", ");
+		if (opts->long_name)
+			pos += fprintf(stderr, "--%s", opts->long_name);
+
+		switch (opts->type) {
+		case OPTION_ARGUMENT:
+			break;
+		case OPTION_INTEGER:
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=<n>]");
+				else
+					pos += fprintf(stderr, "[<n>]");
+			else
+				pos += fprintf(stderr, " <n>");
+			break;
+		case OPTION_CALLBACK:
+			if (opts->flags & PARSE_OPT_NOARG)
+				break;
+			/* FALLTHROUGH */
+		case OPTION_STRING:
+			if (opts->argh) {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=<%s>]", opts->argh);
+					else
+						pos += fprintf(stderr, "[<%s>]", opts->argh);
+				else
+					pos += fprintf(stderr, " <%s>", opts->argh);
+			} else {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=...]");
+					else
+						pos += fprintf(stderr, "[...]");
+				else
+					pos += fprintf(stderr, " ...");
+			}
+			break;
+		default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */
+			break;
+		}
+
+		if (pos <= USAGE_OPTS_WIDTH)
+			pad = USAGE_OPTS_WIDTH - pos;
+		else {
+			fputc('\n', stderr);
+			pad = USAGE_OPTS_WIDTH;
+		}
+		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+	}
+	fputc('\n', stderr);
+
+	return PARSE_OPT_HELP;
+}
+
+void usage_with_options(const char * const *usagestr,
+			const struct option *opts)
+{
+	usage_with_options_internal(usagestr, opts, 0);
+	exit(129);
+}
+
+int parse_options_usage(const char * const *usagestr,
+			const struct option *opts)
+{
+	return usage_with_options_internal(usagestr, opts, 0);
+}
+
+
+/*----- some often used options -----*/
+#include "cache.h"
+
+int parse_opt_verbosity_cb(const struct option *opt, const char *arg,
+			   int unset)
+{
+	int *target = opt->value;
+
+	if (unset)
+		/* --no-quiet, --no-verbose */
+		*target = 0;
+	else if (opt->short_name == 'v') {
+		if (*target >= 0)
+			(*target)++;
+		else
+			*target = 1;
+	} else {
+		if (*target <= 0)
+			(*target)--;
+		else
+			*target = -1;
+	}
+	return 0;
+}
diff --git a/Documentation/perf_counter/parse-options.h b/Documentation/perf_counter/parse-options.h
new file mode 100644
index 0000000..a81c7fa
--- /dev/null
+++ b/Documentation/perf_counter/parse-options.h
@@ -0,0 +1,172 @@
+#ifndef PARSE_OPTIONS_H
+#define PARSE_OPTIONS_H
+
+enum parse_opt_type {
+	/* special types */
+	OPTION_END,
+	OPTION_ARGUMENT,
+	OPTION_GROUP,
+	/* options with no arguments */
+	OPTION_BIT,
+	OPTION_BOOLEAN, /* _INCR would have been a better name */
+	OPTION_SET_INT,
+	OPTION_SET_PTR,
+	/* options with arguments (usually) */
+	OPTION_STRING,
+	OPTION_INTEGER,
+	OPTION_CALLBACK,
+};
+
+enum parse_opt_flags {
+	PARSE_OPT_KEEP_DASHDASH = 1,
+	PARSE_OPT_STOP_AT_NON_OPTION = 2,
+	PARSE_OPT_KEEP_ARGV0 = 4,
+	PARSE_OPT_KEEP_UNKNOWN = 8,
+	PARSE_OPT_NO_INTERNAL_HELP = 16,
+};
+
+enum parse_opt_option_flags {
+	PARSE_OPT_OPTARG  = 1,
+	PARSE_OPT_NOARG   = 2,
+	PARSE_OPT_NONEG   = 4,
+	PARSE_OPT_HIDDEN  = 8,
+	PARSE_OPT_LASTARG_DEFAULT = 16,
+};
+
+struct option;
+typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
+
+/*
+ * `type`::
+ *   holds the type of the option, you must have an OPTION_END last in your
+ *   array.
+ *
+ * `short_name`::
+ *   the character to use as a short option name, '\0' if none.
+ *
+ * `long_name`::
+ *   the long option name, without the leading dashes, NULL if none.
+ *
+ * `value`::
+ *   stores pointers to the values to be filled.
+ *
+ * `argh`::
+ *   token to explain the kind of argument this option wants. Keep it
+ *   homogenous across the repository.
+ *
+ * `help`::
+ *   the short help associated to what the option does.
+ *   Must never be NULL (except for OPTION_END).
+ *   OPTION_GROUP uses this pointer to store the group header.
+ *
+ * `flags`::
+ *   mask of parse_opt_option_flags.
+ *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
+ *   PARSE_OPT_NONEG: says that this option cannot be negated
+ *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
+ *                    the long one.
+ *
+ * `callback`::
+ *   pointer to the callback to use for OPTION_CALLBACK.
+ *
+ * `defval`::
+ *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
+ *   OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in
+ *   the value when met.
+ *   CALLBACKS can use it like they want.
+ */
+struct option {
+	enum parse_opt_type type;
+	int short_name;
+	const char *long_name;
+	void *value;
+	const char *argh;
+	const char *help;
+
+	int flags;
+	parse_opt_cb *callback;
+	intptr_t defval;
+};
+
+#define OPT_END()                   { OPTION_END }
+#define OPT_ARGUMENT(l, h)          { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) }
+#define OPT_GROUP(h)                { OPTION_GROUP, 0, NULL, NULL, NULL, (h) }
+#define OPT_BIT(s, l, v, h, b)      { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) }
+#define OPT_BOOLEAN(s, l, v, h)     { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) }
+#define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
+#define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
+#define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
+#define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
+#define OPT_DATE(s, l, v, h) \
+	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
+	  parse_opt_approxidate_cb }
+#define OPT_CALLBACK(s, l, v, a, h, f) \
+	{ OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) }
+
+/* parse_options() will filter out the processed options and leave the
+ * non-option argments in argv[].
+ * Returns the number of arguments left in argv[].
+ */
+extern int parse_options(int argc, const char **argv,
+                         const struct option *options,
+                         const char * const usagestr[], int flags);
+
+extern NORETURN void usage_with_options(const char * const *usagestr,
+                                        const struct option *options);
+
+/*----- incremantal advanced APIs -----*/
+
+enum {
+	PARSE_OPT_HELP = -1,
+	PARSE_OPT_DONE,
+	PARSE_OPT_UNKNOWN,
+};
+
+/*
+ * It's okay for the caller to consume argv/argc in the usual way.
+ * Other fields of that structure are private to parse-options and should not
+ * be modified in any way.
+ */
+struct parse_opt_ctx_t {
+	const char **argv;
+	const char **out;
+	int argc, cpidx;
+	const char *opt;
+	int flags;
+};
+
+extern int parse_options_usage(const char * const *usagestr,
+			       const struct option *opts);
+
+extern void parse_options_start(struct parse_opt_ctx_t *ctx,
+				int argc, const char **argv, int flags);
+
+extern int parse_options_step(struct parse_opt_ctx_t *ctx,
+			      const struct option *options,
+			      const char * const usagestr[]);
+
+extern int parse_options_end(struct parse_opt_ctx_t *ctx);
+
+
+/*----- some often used options -----*/
+extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
+extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
+extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
+
+#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
+#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
+#define OPT__VERBOSITY(var) \
+	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
+	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
+#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
+#define OPT__ABBREV(var)  \
+	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
+	  "use <n> digits to display SHA-1s", \
+	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
+
+extern const char *parse_options_fix_filename(const char *prefix, const char *file);
+
+#endif
diff --git a/Documentation/perf_counter/path.c b/Documentation/perf_counter/path.c
new file mode 100644
index 0000000..891b612
--- /dev/null
+++ b/Documentation/perf_counter/path.c
@@ -0,0 +1,392 @@
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "cache.h"
+
+static char bad_path[] = "/bad-path/";
+/*
+ * Two hacks:
+ */
+
+static char *get_perf_dir(void)
+{
+	return ".";
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+
+
+static char *get_pathname(void)
+{
+	static char pathname_array[4][PATH_MAX];
+	static int index;
+	return pathname_array[3 & ++index];
+}
+
+static char *cleanup_path(char *path)
+{
+	/* Clean it up */
+	if (!memcmp(path, "./", 2)) {
+		path += 2;
+		while (*path == '/')
+			path++;
+	}
+	return path;
+}
+
+char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+
+	va_start(args, fmt);
+	len = vsnprintf(buf, n, fmt, args);
+	va_end(args);
+	if (len >= n) {
+		strlcpy(buf, bad_path, n);
+		return buf;
+	}
+	return cleanup_path(buf);
+}
+
+static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
+{
+	const char *perf_dir = get_perf_dir();
+	size_t len;
+
+	len = strlen(perf_dir);
+	if (n < len + 1)
+		goto bad;
+	memcpy(buf, perf_dir, len);
+	if (len && !is_dir_sep(perf_dir[len-1]))
+		buf[len++] = '/';
+	len += vsnprintf(buf + len, n - len, fmt, args);
+	if (len >= n)
+		goto bad;
+	return cleanup_path(buf);
+bad:
+	strlcpy(buf, bad_path, n);
+	return buf;
+}
+
+char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(buf, n, fmt, args);
+	va_end(args);
+	return buf;
+}
+
+char *perf_pathdup(const char *fmt, ...)
+{
+	char path[PATH_MAX];
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(path, sizeof(path), fmt, args);
+	va_end(args);
+	return xstrdup(path);
+}
+
+char *mkpath(const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+	char *pathname = get_pathname();
+
+	va_start(args, fmt);
+	len = vsnprintf(pathname, PATH_MAX, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+char *perf_path(const char *fmt, ...)
+{
+	const char *perf_dir = get_perf_dir();
+	char *pathname = get_pathname();
+	va_list args;
+	unsigned len;
+
+	len = strlen(perf_dir);
+	if (len > PATH_MAX-100)
+		return bad_path;
+	memcpy(pathname, perf_dir, len);
+	if (len && perf_dir[len-1] != '/')
+		pathname[len++] = '/';
+	va_start(args, fmt);
+	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+int perf_mkstemp(char *path, size_t len, const char *template)
+{
+	const char *tmp;
+	size_t n;
+
+	tmp = getenv("TMPDIR");
+	if (!tmp)
+		tmp = "/tmp";
+	n = snprintf(path, len, "%s/%s", tmp, template);
+	if (len <= n) {
+		errno = ENAMETOOLONG;
+		return -1;
+	}
+	return mkstemp(path);
+}
+
+
+static char *user_path(char *buf, char *path, int sz)
+{
+	struct passwd *pw;
+	char *slash;
+	int len, baselen;
+
+	if (!path || path[0] != '~')
+		return NULL;
+	path++;
+	slash = strchr(path, '/');
+	if (path[0] == '/' || !path[0]) {
+		pw = getpwuid(getuid());
+	}
+	else {
+		if (slash) {
+			*slash = 0;
+			pw = getpwnam(path);
+			*slash = '/';
+		}
+		else
+			pw = getpwnam(path);
+	}
+	if (!pw || !pw->pw_dir || sz <= strlen(pw->pw_dir))
+		return NULL;
+	baselen = strlen(pw->pw_dir);
+	memcpy(buf, pw->pw_dir, baselen);
+	while ((1 < baselen) && (buf[baselen-1] == '/')) {
+		buf[baselen-1] = 0;
+		baselen--;
+	}
+	if (slash && slash[1]) {
+		len = strlen(slash);
+		if (sz <= baselen + len)
+			return NULL;
+		memcpy(buf + baselen, slash, len + 1);
+	}
+	return buf;
+}
+
+const char *make_relative_path(const char *abs, const char *base)
+{
+	static char buf[PATH_MAX + 1];
+	int baselen;
+	if (!base)
+		return abs;
+	baselen = strlen(base);
+	if (prefixcmp(abs, base))
+		return abs;
+	if (abs[baselen] == '/')
+		baselen++;
+	else if (base[baselen - 1] != '/')
+		return abs;
+	strcpy(buf, abs + baselen);
+	return buf;
+}
+
+/*
+ * It is okay if dst == src, but they should not overlap otherwise.
+ *
+ * Performs the following normalizations on src, storing the result in dst:
+ * - Ensures that components are separated by '/' (Windows only)
+ * - Squashes sequences of '/'.
+ * - Removes "." components.
+ * - Removes ".." components, and the components the precede them.
+ * Returns failure (non-zero) if a ".." component appears as first path
+ * component anytime during the normalization. Otherwise, returns success (0).
+ *
+ * Note that this function is purely textual.  It does not follow symlinks,
+ * verify the existence of the path, or make any system calls.
+ */
+int normalize_path_copy(char *dst, const char *src)
+{
+	char *dst0;
+
+	if (has_dos_drive_prefix(src)) {
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
+	dst0 = dst;
+
+	if (is_dir_sep(*src)) {
+		*dst++ = '/';
+		while (is_dir_sep(*src))
+			src++;
+	}
+
+	for (;;) {
+		char c = *src;
+
+		/*
+		 * A path component that begins with . could be
+		 * special:
+		 * (1) "." and ends   -- ignore and terminate.
+		 * (2) "./"           -- ignore them, eat slash and continue.
+		 * (3) ".." and ends  -- strip one and terminate.
+		 * (4) "../"          -- strip one, eat slash and continue.
+		 */
+		if (c == '.') {
+			if (!src[1]) {
+				/* (1) */
+				src++;
+			} else if (is_dir_sep(src[1])) {
+				/* (2) */
+				src += 2;
+				while (is_dir_sep(*src))
+					src++;
+				continue;
+			} else if (src[1] == '.') {
+				if (!src[2]) {
+					/* (3) */
+					src += 2;
+					goto up_one;
+				} else if (is_dir_sep(src[2])) {
+					/* (4) */
+					src += 3;
+					while (is_dir_sep(*src))
+						src++;
+					goto up_one;
+				}
+			}
+		}
+
+		/* copy up to the next '/', and eat all '/' */
+		while ((c = *src++) != '\0' && !is_dir_sep(c))
+			*dst++ = c;
+		if (is_dir_sep(c)) {
+			*dst++ = '/';
+			while (is_dir_sep(c))
+				c = *src++;
+			src--;
+		} else if (!c)
+			break;
+		continue;
+
+	up_one:
+		/*
+		 * dst0..dst is prefix portion, and dst[-1] is '/';
+		 * go up one level.
+		 */
+		dst--;	/* go to trailing '/' */
+		if (dst <= dst0)
+			return -1;
+		/* Windows: dst[-1] cannot be backslash anymore */
+		while (dst0 < dst && dst[-1] != '/')
+			dst--;
+	}
+	*dst = '\0';
+	return 0;
+}
+
+/*
+ * path = Canonical absolute path
+ * prefix_list = Colon-separated list of absolute paths
+ *
+ * Determines, for each path in prefix_list, whether the "prefix" really
+ * is an ancestor directory of path.  Returns the length of the longest
+ * ancestor directory, excluding any trailing slashes, or -1 if no prefix
+ * is an ancestor.  (Note that this means 0 is returned if prefix_list is
+ * "/".) "/foo" is not considered an ancestor of "/foobar".  Directories
+ * are not considered to be their own ancestors.  path must be in a
+ * canonical form: empty components, or "." or ".." components are not
+ * allowed.  prefix_list may be null, which is like "".
+ */
+int longest_ancestor_length(const char *path, const char *prefix_list)
+{
+	char buf[PATH_MAX+1];
+	const char *ceil, *colon;
+	int len, max_len = -1;
+
+	if (prefix_list == NULL || !strcmp(path, "/"))
+		return -1;
+
+	for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
+		for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
+		len = colon - ceil;
+		if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
+			continue;
+		strlcpy(buf, ceil, len+1);
+		if (normalize_path_copy(buf, buf) < 0)
+			continue;
+		len = strlen(buf);
+		if (len > 0 && buf[len-1] == '/')
+			buf[--len] = '\0';
+
+		if (!strncmp(path, buf, len) &&
+		    path[len] == '/' &&
+		    len > max_len) {
+			max_len = len;
+		}
+	}
+
+	return max_len;
+}
+
+/* strip arbitrary amount of directory separators at end of path */
+static inline int chomp_trailing_dir_sep(const char *path, int len)
+{
+	while (len && is_dir_sep(path[len - 1]))
+		len--;
+	return len;
+}
+
+/*
+ * If path ends with suffix (complete path components), returns the
+ * part before suffix (sans trailing directory separators).
+ * Otherwise returns NULL.
+ */
+char *strip_path_suffix(const char *path, const char *suffix)
+{
+	int path_len = strlen(path), suffix_len = strlen(suffix);
+
+	while (suffix_len) {
+		if (!path_len)
+			return NULL;
+
+		if (is_dir_sep(path[path_len - 1])) {
+			if (!is_dir_sep(suffix[suffix_len - 1]))
+				return NULL;
+			path_len = chomp_trailing_dir_sep(path, path_len);
+			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
+		}
+		else if (path[--path_len] != suffix[--suffix_len])
+			return NULL;
+	}
+
+	if (path_len && !is_dir_sep(path[path_len - 1]))
+		return NULL;
+	return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
+}
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
new file mode 100644
index 0000000..9256f6a
--- /dev/null
+++ b/Documentation/perf_counter/perf.c
@@ -0,0 +1,411 @@
+#include "builtin.h"
+#include "exec_cmd.h"
+#include "cache.h"
+//#include "quote.h"
+#include "run-command.h"
+
+const char perf_usage_string[] =
+	"perf [--version] [--exec-path[=PERF_EXEC_PATH]] [--html-path] [-p|--paginate|--no-pager] [--bare] [--perf-dir=PERF_DIR] [--work-tree=PERF_WORK_TREE] [--help] COMMAND [ARGS]";
+
+const char perf_more_info_string[] =
+	"See 'perf help COMMAND' for more information on a specific command.";
+
+static int use_pager = -1;
+struct pager_config {
+	const char *cmd;
+	int val;
+};
+
+static int pager_command_config(const char *var, const char *value, void *data)
+{
+	struct pager_config *c = data;
+	if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd))
+		c->val = perf_config_bool(var, value);
+	return 0;
+}
+
+/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
+int check_pager_config(const char *cmd)
+{
+	struct pager_config c;
+	c.cmd = cmd;
+	c.val = -1;
+	perf_config(pager_command_config, &c);
+	return c.val;
+}
+
+static void commit_pager_choice(void) {
+	switch (use_pager) {
+	case 0:
+		setenv("PERF_PAGER", "cat", 1);
+		break;
+	case 1:
+		/* setup_pager(); */
+		break;
+	default:
+		break;
+	}
+}
+
+static int handle_options(const char*** argv, int* argc, int* envchanged)
+{
+	int handled = 0;
+
+	while (*argc > 0) {
+		const char *cmd = (*argv)[0];
+		if (cmd[0] != '-')
+			break;
+
+		/*
+		 * For legacy reasons, the "version" and "help"
+		 * commands can be written with "--" prepended
+		 * to make them look like flags.
+		 */
+		if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
+			break;
+
+		/*
+		 * Check remaining flags.
+		 */
+		if (!prefixcmp(cmd, "--exec-path")) {
+			cmd += 11;
+			if (*cmd == '=')
+				perf_set_argv_exec_path(cmd + 1);
+			else {
+				puts(perf_exec_path());
+				exit(0);
+			}
+		} else if (!strcmp(cmd, "--html-path")) {
+			puts(system_path(PERF_HTML_PATH));
+			exit(0);
+		} else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
+			use_pager = 1;
+		} else if (!strcmp(cmd, "--no-pager")) {
+			use_pager = 0;
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--perf-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --perf-dir.\n" );
+				usage(perf_usage_string);
+			}
+			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+			handled++;
+		} else if (!prefixcmp(cmd, "--perf-dir=")) {
+			setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1);
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--work-tree")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --work-tree.\n" );
+				usage(perf_usage_string);
+			}
+			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+		} else if (!prefixcmp(cmd, "--work-tree=")) {
+			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1);
+			if (envchanged)
+				*envchanged = 1;
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", cmd);
+			usage(perf_usage_string);
+		}
+
+		(*argv)++;
+		(*argc)--;
+		handled++;
+	}
+	return handled;
+}
+
+static int handle_alias(int *argcp, const char ***argv)
+{
+	int envchanged = 0, ret = 0, saved_errno = errno;
+	int count, option_count;
+	const char** new_argv;
+	const char *alias_command;
+	char *alias_string;
+	int unused_nonperf;
+
+	alias_command = (*argv)[0];
+	alias_string = alias_lookup(alias_command);
+	if (alias_string) {
+		if (alias_string[0] == '!') {
+			if (*argcp > 1) {
+				struct strbuf buf;
+
+				strbuf_init(&buf, PATH_MAX);
+				strbuf_addstr(&buf, alias_string);
+				sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
+				free(alias_string);
+				alias_string = buf.buf;
+			}
+			ret = system(alias_string + 1);
+			if (ret >= 0 && WIFEXITED(ret) &&
+			    WEXITSTATUS(ret) != 127)
+				exit(WEXITSTATUS(ret));
+			die("Failed to run '%s' when expanding alias '%s'",
+			    alias_string + 1, alias_command);
+		}
+		count = split_cmdline(alias_string, &new_argv);
+		if (count < 0)
+			die("Bad alias.%s string", alias_command);
+		option_count = handle_options(&new_argv, &count, &envchanged);
+		if (envchanged)
+			die("alias '%s' changes environment variables\n"
+				 "You can use '!perf' in the alias to do this.",
+				 alias_command);
+		memmove(new_argv - option_count, new_argv,
+				count * sizeof(char *));
+		new_argv -= option_count;
+
+		if (count < 1)
+			die("empty alias for %s", alias_command);
+
+		if (!strcmp(alias_command, new_argv[0]))
+			die("recursive alias: %s", alias_command);
+
+		new_argv = realloc(new_argv, sizeof(char*) *
+				    (count + *argcp + 1));
+		/* insert after command name */
+		memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp);
+		new_argv[count+*argcp] = NULL;
+
+		*argv = new_argv;
+		*argcp += count - 1;
+
+		ret = 1;
+	}
+
+	errno = saved_errno;
+
+	return ret;
+}
+
+const char perf_version_string[] = PERF_VERSION;
+
+#define RUN_SETUP	(1<<0)
+#define USE_PAGER	(1<<1)
+/*
+ * require working tree to be present -- anything uses this needs
+ * RUN_SETUP for reading from the configuration file.
+ */
+#define NEED_WORK_TREE	(1<<2)
+
+struct cmd_struct {
+	const char *cmd;
+	int (*fn)(int, const char **, const char *);
+	int option;
+};
+
+static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
+{
+	int status;
+	struct stat st;
+	const char *prefix;
+
+	prefix = NULL;
+	if (p->option & RUN_SETUP)
+		prefix = NULL; /* setup_perf_directory(); */
+
+	if (use_pager == -1 && p->option & RUN_SETUP)
+		use_pager = check_pager_config(p->cmd);
+	if (use_pager == -1 && p->option & USE_PAGER)
+		use_pager = 1;
+	commit_pager_choice();
+
+	if (p->option & NEED_WORK_TREE)
+		/* setup_work_tree() */;
+
+	status = p->fn(argc, argv, prefix);
+	if (status)
+		return status & 0xff;
+
+	/* Somebody closed stdout? */
+	if (fstat(fileno(stdout), &st))
+		return 0;
+	/* Ignore write errors for pipes and sockets.. */
+	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
+		return 0;
+
+	/* Check for ENOSPC and EIO errors.. */
+	if (fflush(stdout))
+		die("write failure on standard output: %s", strerror(errno));
+	if (ferror(stdout))
+		die("unknown write failure on standard output");
+	if (fclose(stdout))
+		die("close failed on standard output: %s", strerror(errno));
+	return 0;
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+	const char *cmd = argv[0];
+	static struct cmd_struct commands[] = {
+		{ "top", cmd_top, 0 },
+	};
+	int i;
+	static const char ext[] = STRIP_EXTENSION;
+
+	if (sizeof(ext) > 1) {
+		i = strlen(argv[0]) - strlen(ext);
+		if (i > 0 && !strcmp(argv[0] + i, ext)) {
+			char *argv0 = strdup(argv[0]);
+			argv[0] = cmd = argv0;
+			argv0[i] = '\0';
+		}
+	}
+
+	/* Turn "perf cmd --help" into "perf help cmd" */
+	if (argc > 1 && !strcmp(argv[1], "--help")) {
+		argv[1] = argv[0];
+		argv[0] = cmd = "help";
+	}
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		struct cmd_struct *p = commands+i;
+		if (strcmp(p->cmd, cmd))
+			continue;
+		exit(run_builtin(p, argc, argv));
+	}
+}
+
+static void execv_dashed_external(const char **argv)
+{
+	struct strbuf cmd = STRBUF_INIT;
+	const char *tmp;
+	int status;
+
+	strbuf_addf(&cmd, "perf-%s", argv[0]);
+
+	/*
+	 * argv[0] must be the perf command, but the argv array
+	 * belongs to the caller, and may be reused in
+	 * subsequent loop iterations. Save argv[0] and
+	 * restore it on error.
+	 */
+	tmp = argv[0];
+	argv[0] = cmd.buf;
+
+	/*
+	 * if we fail because the command is not found, it is
+	 * OK to return. Otherwise, we just pass along the status code.
+	 */
+	status = run_command_v_opt(argv, 0);
+	if (status != -ERR_RUN_COMMAND_EXEC) {
+		if (IS_RUN_COMMAND_ERR(status))
+			die("unable to run '%s'", argv[0]);
+		exit(-status);
+	}
+	errno = ENOENT; /* as if we called execvp */
+
+	argv[0] = tmp;
+
+	strbuf_release(&cmd);
+}
+
+static int run_argv(int *argcp, const char ***argv)
+{
+	int done_alias = 0;
+
+	while (1) {
+		/* See if it's an internal command */
+		handle_internal_command(*argcp, *argv);
+
+		/* .. then try the external ones */
+		execv_dashed_external(*argv);
+
+		/* It could be an alias -- this works around the insanity
+		 * of overriding "perf log" with "perf show" by having
+		 * alias.log = show
+		 */
+		if (done_alias || !handle_alias(argcp, argv))
+			break;
+		done_alias = 1;
+	}
+
+	return done_alias;
+}
+
+
+int main(int argc, const char **argv)
+{
+	const char *cmd;
+
+	cmd = perf_extract_argv0_path(argv[0]);
+	if (!cmd)
+		cmd = "perf-help";
+
+	/*
+	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
+	 *
+	 *  - cannot take flags in between the "perf" and the "xxxx".
+	 *  - cannot execute it externally (since it would just do
+	 *    the same thing over again)
+	 *
+	 * So we just directly call the internal command handler, and
+	 * die if that one cannot handle it.
+	 */
+	if (!prefixcmp(cmd, "perf-")) {
+		cmd += 4;
+		argv[0] = cmd;
+		handle_internal_command(argc, argv);
+		die("cannot handle %s internally", cmd);
+	}
+
+	/* Look for flags.. */
+	argv++;
+	argc--;
+	handle_options(&argv, &argc, NULL);
+	commit_pager_choice();
+	if (argc > 0) {
+		if (!prefixcmp(argv[0], "--"))
+			argv[0] += 2;
+	} else {
+		/* The user didn't specify a command; give them help */
+		printf("usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n%s\n", perf_more_info_string);
+		exit(1);
+	}
+	cmd = argv[0];
+
+	/*
+	 * We use PATH to find perf commands, but we prepend some higher
+	 * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH
+	 * environment, and the $(perfexecdir) from the Makefile at build
+	 * time.
+	 */
+	setup_path();
+
+	while (1) {
+		static int done_help = 0;
+		static int was_alias = 0;
+		was_alias = run_argv(&argc, &argv);
+		if (errno != ENOENT)
+			break;
+		if (was_alias) {
+			fprintf(stderr, "Expansion of alias '%s' failed; "
+				"'%s' is not a perf-command\n",
+				cmd, argv[0]);
+			exit(1);
+		}
+		if (!done_help) {
+			cmd = argv[0] = help_unknown_cmd(cmd);
+			done_help = 1;
+		} else
+			break;
+	}
+
+	fprintf(stderr, "Failed to run command '%s': %s\n",
+		cmd, strerror(errno));
+
+	return 1;
+}
diff --git a/Documentation/perf_counter/quote.c b/Documentation/perf_counter/quote.c
new file mode 100644
index 0000000..7a49fcf
--- /dev/null
+++ b/Documentation/perf_counter/quote.c
@@ -0,0 +1,478 @@
+#include "cache.h"
+#include "quote.h"
+
+int quote_path_fully = 1;
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ *
+ * E.g.
+ *  original     sq_quote     result
+ *  name     ==> name      ==> 'name'
+ *  a b      ==> a b       ==> 'a b'
+ *  a'b      ==> a'\''b    ==> 'a'\''b'
+ *  a!b      ==> a'\!'b    ==> 'a'\!'b'
+ */
+static inline int need_bs_quote(char c)
+{
+	return (c == '\'' || c == '!');
+}
+
+void sq_quote_buf(struct strbuf *dst, const char *src)
+{
+	char *to_free = NULL;
+
+	if (dst->buf == src)
+		to_free = strbuf_detach(dst, NULL);
+
+	strbuf_addch(dst, '\'');
+	while (*src) {
+		size_t len = strcspn(src, "'!");
+		strbuf_add(dst, src, len);
+		src += len;
+		while (need_bs_quote(*src)) {
+			strbuf_addstr(dst, "'\\");
+			strbuf_addch(dst, *src++);
+			strbuf_addch(dst, '\'');
+		}
+	}
+	strbuf_addch(dst, '\'');
+	free(to_free);
+}
+
+void sq_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('\'', stream);
+	while ((c = *src++)) {
+		if (need_bs_quote(c)) {
+			fputs("'\\", stream);
+			fputc(c, stream);
+			fputc('\'', stream);
+		} else {
+			fputc(c, stream);
+		}
+	}
+	fputc('\'', stream);
+}
+
+void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+{
+	int i;
+
+	/* Copy into destination buffer. */
+	strbuf_grow(dst, 255);
+	for (i = 0; argv[i]; ++i) {
+		strbuf_addch(dst, ' ');
+		sq_quote_buf(dst, argv[i]);
+		if (maxlen && dst->len > maxlen)
+			die("Too many or long arguments");
+	}
+}
+
+char *sq_dequote_step(char *arg, char **next)
+{
+	char *dst = arg;
+	char *src = arg;
+	char c;
+
+	if (*src != '\'')
+		return NULL;
+	for (;;) {
+		c = *++src;
+		if (!c)
+			return NULL;
+		if (c != '\'') {
+			*dst++ = c;
+			continue;
+		}
+		/* We stepped out of sq */
+		switch (*++src) {
+		case '\0':
+			*dst = 0;
+			if (next)
+				*next = NULL;
+			return arg;
+		case '\\':
+			c = *++src;
+			if (need_bs_quote(c) && *++src == '\'') {
+				*dst++ = c;
+				continue;
+			}
+		/* Fallthrough */
+		default:
+			if (!next || !isspace(*src))
+				return NULL;
+			do {
+				c = *++src;
+			} while (isspace(c));
+			*dst = 0;
+			*next = src;
+			return arg;
+		}
+	}
+}
+
+char *sq_dequote(char *arg)
+{
+	return sq_dequote_step(arg, NULL);
+}
+
+int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
+{
+	char *next = arg;
+
+	if (!*arg)
+		return 0;
+	do {
+		char *dequoted = sq_dequote_step(next, &next);
+		if (!dequoted)
+			return -1;
+		ALLOC_GROW(*argv, *nr + 1, *alloc);
+		(*argv)[(*nr)++] = dequoted;
+	} while (next);
+
+	return 0;
+}
+
+/* 1 means: quote as octal
+ * 0 means: quote as octal if (quote_path_fully)
+ * -1 means: never quote
+ * c: quote as "\\c"
+ */
+#define X8(x)   x, x, x, x, x, x, x, x
+#define X16(x)  X8(x), X8(x)
+static signed char const sq_lookup[256] = {
+	/*           0    1    2    3    4    5    6    7 */
+	/* 0x00 */   1,   1,   1,   1,   1,   1,   1, 'a',
+	/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r',   1,   1,
+	/* 0x10 */ X16(1),
+	/* 0x20 */  -1,  -1, '"',  -1,  -1,  -1,  -1,  -1,
+	/* 0x28 */ X16(-1), X16(-1), X16(-1),
+	/* 0x58 */  -1,  -1,  -1,  -1,'\\',  -1,  -1,  -1,
+	/* 0x60 */ X16(-1), X8(-1),
+	/* 0x78 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,   1,
+	/* 0x80 */ /* set to 0 */
+};
+
+static inline int sq_must_quote(char c)
+{
+	return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
+}
+
+/* returns the longest prefix not needing a quote up to maxlen if positive.
+   This stops at the first \0 because it's marked as a character needing an
+   escape */
+static size_t next_quote_pos(const char *s, ssize_t maxlen)
+{
+	size_t len;
+	if (maxlen < 0) {
+		for (len = 0; !sq_must_quote(s[len]); len++);
+	} else {
+		for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
+	}
+	return len;
+}
+
+/*
+ * C-style name quoting.
+ *
+ * (1) if sb and fp are both NULL, inspect the input name and counts the
+ *     number of bytes that are needed to hold c_style quoted version of name,
+ *     counting the double quotes around it but not terminating NUL, and
+ *     returns it.
+ *     However, if name does not need c_style quoting, it returns 0.
+ *
+ * (2) if sb or fp are not NULL, it emits the c_style quoted version
+ *     of name, enclosed with double quotes if asked and needed only.
+ *     Return value is the same as in (1).
+ */
+static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
+                                    struct strbuf *sb, FILE *fp, int no_dq)
+{
+#undef EMIT
+#define EMIT(c)                                 \
+	do {                                        \
+		if (sb) strbuf_addch(sb, (c));          \
+		if (fp) fputc((c), fp);                 \
+		count++;                                \
+	} while (0)
+#define EMITBUF(s, l)                           \
+	do {                                        \
+		if (sb) strbuf_add(sb, (s), (l));       \
+		if (fp) fwrite((s), (l), 1, fp);        \
+		count += (l);                           \
+	} while (0)
+
+	size_t len, count = 0;
+	const char *p = name;
+
+	for (;;) {
+		int ch;
+
+		len = next_quote_pos(p, maxlen);
+		if (len == maxlen || !p[len])
+			break;
+
+		if (!no_dq && p == name)
+			EMIT('"');
+
+		EMITBUF(p, len);
+		EMIT('\\');
+		p += len;
+		ch = (unsigned char)*p++;
+		if (sq_lookup[ch] >= ' ') {
+			EMIT(sq_lookup[ch]);
+		} else {
+			EMIT(((ch >> 6) & 03) + '0');
+			EMIT(((ch >> 3) & 07) + '0');
+			EMIT(((ch >> 0) & 07) + '0');
+		}
+	}
+
+	EMITBUF(p, len);
+	if (p == name)   /* no ending quote needed */
+		return 0;
+
+	if (!no_dq)
+		EMIT('"');
+	return count;
+}
+
+size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
+{
+	return quote_c_style_counted(name, -1, sb, fp, nodq);
+}
+
+void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
+{
+	if (quote_c_style(prefix, NULL, NULL, 0) ||
+	    quote_c_style(path, NULL, NULL, 0)) {
+		if (!nodq)
+			strbuf_addch(sb, '"');
+		quote_c_style(prefix, sb, NULL, 1);
+		quote_c_style(path, sb, NULL, 1);
+		if (!nodq)
+			strbuf_addch(sb, '"');
+	} else {
+		strbuf_addstr(sb, prefix);
+		strbuf_addstr(sb, path);
+	}
+}
+
+void write_name_quoted(const char *name, FILE *fp, int terminator)
+{
+	if (terminator) {
+		quote_c_style(name, NULL, fp, 0);
+	} else {
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *fp, int terminator)
+{
+	int needquote = 0;
+
+	if (terminator) {
+		needquote = next_quote_pos(pfx, pfxlen) < pfxlen
+			|| name[next_quote_pos(name, -1)];
+	}
+	if (needquote) {
+		fputc('"', fp);
+		quote_c_style_counted(pfx, pfxlen, NULL, fp, 1);
+		quote_c_style(name, NULL, fp, 1);
+		fputc('"', fp);
+	} else {
+		fwrite(pfx, pfxlen, 1, fp);
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix)
+{
+	int needquote;
+
+	if (len < 0)
+		len = strlen(in);
+
+	/* "../" prefix itself does not need quoting, but "in" might. */
+	needquote = next_quote_pos(in, len) < len;
+	strbuf_setlen(out, 0);
+	strbuf_grow(out, len);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (prefix) {
+		int off = 0;
+		while (prefix[off] && off < len && prefix[off] == in[off])
+			if (prefix[off] == '/') {
+				prefix += off + 1;
+				in += off + 1;
+				len -= off + 1;
+				off = 0;
+			} else
+				off++;
+
+		for (; *prefix; prefix++)
+			if (*prefix == '/')
+				strbuf_addstr(out, "../");
+	}
+
+	quote_c_style_counted (in, len, out, NULL, 1);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (!out->len)
+		strbuf_addstr(out, "./");
+
+	return out->buf;
+}
+
+/*
+ * C-style name unquoting.
+ *
+ * Quoted should point at the opening double quote.
+ * + Returns 0 if it was able to unquote the string properly, and appends the
+ *   result in the strbuf `sb'.
+ * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
+ *   that this function will allocate memory in the strbuf, so calling
+ *   strbuf_release is mandatory whichever result unquote_c_style returns.
+ *
+ * Updates endp pointer to point at one past the ending double quote if given.
+ */
+int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
+{
+	size_t oldlen = sb->len, len;
+	int ch, ac;
+
+	if (*quoted++ != '"')
+		return -1;
+
+	for (;;) {
+		len = strcspn(quoted, "\"\\");
+		strbuf_add(sb, quoted, len);
+		quoted += len;
+
+		switch (*quoted++) {
+		  case '"':
+			if (endp)
+				*endp = quoted;
+			return 0;
+		  case '\\':
+			break;
+		  default:
+			goto error;
+		}
+
+		switch ((ch = *quoted++)) {
+		case 'a': ch = '\a'; break;
+		case 'b': ch = '\b'; break;
+		case 'f': ch = '\f'; break;
+		case 'n': ch = '\n'; break;
+		case 'r': ch = '\r'; break;
+		case 't': ch = '\t'; break;
+		case 'v': ch = '\v'; break;
+
+		case '\\': case '"':
+			break; /* verbatim */
+
+		/* octal values with first digit over 4 overflow */
+		case '0': case '1': case '2': case '3':
+					ac = ((ch - '0') << 6);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= ((ch - '0') << 3);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= (ch - '0');
+					ch = ac;
+					break;
+				default:
+			goto error;
+			}
+		strbuf_addch(sb, ch);
+		}
+
+  error:
+	strbuf_setlen(sb, oldlen);
+	return -1;
+}
+
+/* quoting as a string literal for other languages */
+
+void perl_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void python_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char nl = '\n';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == nl) {
+			fputc(bq, stream);
+			fputc('n', stream);
+			continue;
+		}
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void tcl_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('"', stream);
+	while ((c = *src++)) {
+		switch (c) {
+		case '[': case ']':
+		case '{': case '}':
+		case '$': case '\\': case '"':
+			fputc('\\', stream);
+		default:
+			fputc(c, stream);
+			break;
+		case '\f':
+			fputs("\\f", stream);
+			break;
+		case '\r':
+			fputs("\\r", stream);
+			break;
+		case '\n':
+			fputs("\\n", stream);
+			break;
+		case '\t':
+			fputs("\\t", stream);
+			break;
+		case '\v':
+			fputs("\\v", stream);
+			break;
+		}
+	}
+	fputc('"', stream);
+}
diff --git a/Documentation/perf_counter/quote.h b/Documentation/perf_counter/quote.h
new file mode 100644
index 0000000..66730f2
--- /dev/null
+++ b/Documentation/perf_counter/quote.h
@@ -0,0 +1,68 @@
+#ifndef QUOTE_H
+#define QUOTE_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ * single quote pair.
+ *
+ * For example, if you are passing the result to system() as an
+ * argument:
+ *
+ * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
+ *
+ * would be appropriate.  If the system() is going to call ssh to
+ * run the command on the other side:
+ *
+ * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
+ * sprintf(rcmd, "ssh %s %s", sq_quote(host), sq_quote(cmd));
+ *
+ * Note that the above examples leak memory!  Remember to free result from
+ * sq_quote() in a real application.
+ *
+ * sq_quote_buf() writes to an existing buffer of specified size; it
+ * will return the number of characters that would have been written
+ * excluding the final null regardless of the buffer size.
+ */
+
+extern void sq_quote_print(FILE *stream, const char *src);
+
+extern void sq_quote_buf(struct strbuf *, const char *src);
+extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+
+/* This unwraps what sq_quote() produces in place, but returns
+ * NULL if the input does not look like what sq_quote would have
+ * produced.
+ */
+extern char *sq_dequote(char *);
+
+/*
+ * Same as the above, but can be used to unwrap many arguments in the
+ * same string separated by space. "next" is changed to point to the
+ * next argument that should be passed as first parameter. When there
+ * is no more argument to be dequoted, "next" is updated to point to NULL.
+ */
+extern char *sq_dequote_step(char *arg, char **next);
+extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc);
+
+extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
+extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq);
+extern void quote_two_c_style(struct strbuf *, const char *, const char *, int);
+
+extern void write_name_quoted(const char *name, FILE *, int terminator);
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *, int terminator);
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix);
+
+/* quoting as a string literal for other languages */
+extern void perl_quote_print(FILE *stream, const char *src);
+extern void python_quote_print(FILE *stream, const char *src);
+extern void tcl_quote_print(FILE *stream, const char *src);
+
+#endif
diff --git a/Documentation/perf_counter/run-command.c b/Documentation/perf_counter/run-command.c
new file mode 100644
index 0000000..b2f5e85
--- /dev/null
+++ b/Documentation/perf_counter/run-command.c
@@ -0,0 +1,395 @@
+#include "cache.h"
+#include "run-command.h"
+#include "exec_cmd.h"
+
+static inline void close_pair(int fd[2])
+{
+	close(fd[0]);
+	close(fd[1]);
+}
+
+static inline void dup_devnull(int to)
+{
+	int fd = open("/dev/null", O_RDWR);
+	dup2(fd, to);
+	close(fd);
+}
+
+int start_command(struct child_process *cmd)
+{
+	int need_in, need_out, need_err;
+	int fdin[2], fdout[2], fderr[2];
+
+	/*
+	 * In case of errors we must keep the promise to close FDs
+	 * that have been passed in via ->in and ->out.
+	 */
+
+	need_in = !cmd->no_stdin && cmd->in < 0;
+	if (need_in) {
+		if (pipe(fdin) < 0) {
+			if (cmd->out > 0)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->in = fdin[1];
+	}
+
+	need_out = !cmd->no_stdout
+		&& !cmd->stdout_to_stderr
+		&& cmd->out < 0;
+	if (need_out) {
+		if (pipe(fdout) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->out = fdout[0];
+	}
+
+	need_err = !cmd->no_stderr && cmd->err < 0;
+	if (need_err) {
+		if (pipe(fderr) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			if (need_out)
+				close_pair(fdout);
+			else if (cmd->out)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->err = fderr[0];
+	}
+
+#ifndef __MINGW32__
+	fflush(NULL);
+	cmd->pid = fork();
+	if (!cmd->pid) {
+		if (cmd->no_stdin)
+			dup_devnull(0);
+		else if (need_in) {
+			dup2(fdin[0], 0);
+			close_pair(fdin);
+		} else if (cmd->in) {
+			dup2(cmd->in, 0);
+			close(cmd->in);
+		}
+
+		if (cmd->no_stderr)
+			dup_devnull(2);
+		else if (need_err) {
+			dup2(fderr[1], 2);
+			close_pair(fderr);
+		}
+
+		if (cmd->no_stdout)
+			dup_devnull(1);
+		else if (cmd->stdout_to_stderr)
+			dup2(2, 1);
+		else if (need_out) {
+			dup2(fdout[1], 1);
+			close_pair(fdout);
+		} else if (cmd->out > 1) {
+			dup2(cmd->out, 1);
+			close(cmd->out);
+		}
+
+		if (cmd->dir && chdir(cmd->dir))
+			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
+			    cmd->dir, strerror(errno));
+		if (cmd->env) {
+			for (; *cmd->env; cmd->env++) {
+				if (strchr(*cmd->env, '='))
+					putenv((char*)*cmd->env);
+				else
+					unsetenv(*cmd->env);
+			}
+		}
+		if (cmd->preexec_cb)
+			cmd->preexec_cb();
+		if (cmd->perf_cmd) {
+			execv_perf_cmd(cmd->argv);
+		} else {
+			execvp(cmd->argv[0], (char *const*) cmd->argv);
+		}
+		exit(127);
+	}
+#else
+	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
+	const char **sargv = cmd->argv;
+	char **env = environ;
+
+	if (cmd->no_stdin) {
+		s0 = dup(0);
+		dup_devnull(0);
+	} else if (need_in) {
+		s0 = dup(0);
+		dup2(fdin[0], 0);
+	} else if (cmd->in) {
+		s0 = dup(0);
+		dup2(cmd->in, 0);
+	}
+
+	if (cmd->no_stderr) {
+		s2 = dup(2);
+		dup_devnull(2);
+	} else if (need_err) {
+		s2 = dup(2);
+		dup2(fderr[1], 2);
+	}
+
+	if (cmd->no_stdout) {
+		s1 = dup(1);
+		dup_devnull(1);
+	} else if (cmd->stdout_to_stderr) {
+		s1 = dup(1);
+		dup2(2, 1);
+	} else if (need_out) {
+		s1 = dup(1);
+		dup2(fdout[1], 1);
+	} else if (cmd->out > 1) {
+		s1 = dup(1);
+		dup2(cmd->out, 1);
+	}
+
+	if (cmd->dir)
+		die("chdir in start_command() not implemented");
+	if (cmd->env) {
+		env = copy_environ();
+		for (; *cmd->env; cmd->env++)
+			env = env_setenv(env, *cmd->env);
+	}
+
+	if (cmd->perf_cmd) {
+		cmd->argv = prepare_perf_cmd(cmd->argv);
+	}
+
+	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
+
+	if (cmd->env)
+		free_environ(env);
+	if (cmd->perf_cmd)
+		free(cmd->argv);
+
+	cmd->argv = sargv;
+	if (s0 >= 0)
+		dup2(s0, 0), close(s0);
+	if (s1 >= 0)
+		dup2(s1, 1), close(s1);
+	if (s2 >= 0)
+		dup2(s2, 2), close(s2);
+#endif
+
+	if (cmd->pid < 0) {
+		int err = errno;
+		if (need_in)
+			close_pair(fdin);
+		else if (cmd->in)
+			close(cmd->in);
+		if (need_out)
+			close_pair(fdout);
+		else if (cmd->out)
+			close(cmd->out);
+		if (need_err)
+			close_pair(fderr);
+		return err == ENOENT ?
+			-ERR_RUN_COMMAND_EXEC :
+			-ERR_RUN_COMMAND_FORK;
+	}
+
+	if (need_in)
+		close(fdin[0]);
+	else if (cmd->in)
+		close(cmd->in);
+
+	if (need_out)
+		close(fdout[1]);
+	else if (cmd->out)
+		close(cmd->out);
+
+	if (need_err)
+		close(fderr[1]);
+
+	return 0;
+}
+
+static int wait_or_whine(pid_t pid)
+{
+	for (;;) {
+		int status, code;
+		pid_t waiting = waitpid(pid, &status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			error("waitpid failed (%s)", strerror(errno));
+			return -ERR_RUN_COMMAND_WAITPID;
+		}
+		if (waiting != pid)
+			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+		if (WIFSIGNALED(status))
+			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+
+		if (!WIFEXITED(status))
+			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+		code = WEXITSTATUS(status);
+		switch (code) {
+		case 127:
+			return -ERR_RUN_COMMAND_EXEC;
+		case 0:
+			return 0;
+		default:
+			return -code;
+		}
+	}
+}
+
+int finish_command(struct child_process *cmd)
+{
+	return wait_or_whine(cmd->pid);
+}
+
+int run_command(struct child_process *cmd)
+{
+	int code = start_command(cmd);
+	if (code)
+		return code;
+	return finish_command(cmd);
+}
+
+static void prepare_run_command_v_opt(struct child_process *cmd,
+				      const char **argv,
+				      int opt)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->argv = argv;
+	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
+	cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
+	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
+}
+
+int run_command_v_opt(const char **argv, int opt)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	return run_command(&cmd);
+}
+
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	cmd.dir = dir;
+	cmd.env = env;
+	return run_command(&cmd);
+}
+
+#ifdef __MINGW32__
+static __stdcall unsigned run_thread(void *data)
+{
+	struct async *async = data;
+	return async->proc(async->fd_for_proc, async->data);
+}
+#endif
+
+int start_async(struct async *async)
+{
+	int pipe_out[2];
+
+	if (pipe(pipe_out) < 0)
+		return error("cannot create pipe: %s", strerror(errno));
+	async->out = pipe_out[0];
+
+#ifndef __MINGW32__
+	/* Flush stdio before fork() to avoid cloning buffers */
+	fflush(NULL);
+
+	async->pid = fork();
+	if (async->pid < 0) {
+		error("fork (async) failed: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+	if (!async->pid) {
+		close(pipe_out[0]);
+		exit(!!async->proc(pipe_out[1], async->data));
+	}
+	close(pipe_out[1]);
+#else
+	async->fd_for_proc = pipe_out[1];
+	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
+	if (!async->tid) {
+		error("cannot create thread: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+#endif
+	return 0;
+}
+
+int finish_async(struct async *async)
+{
+#ifndef __MINGW32__
+	int ret = 0;
+
+	if (wait_or_whine(async->pid))
+		ret = error("waitpid (async) failed");
+#else
+	DWORD ret = 0;
+	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
+		ret = error("waiting for thread failed: %lu", GetLastError());
+	else if (!GetExitCodeThread(async->tid, &ret))
+		ret = error("cannot get thread exit code: %lu", GetLastError());
+	CloseHandle(async->tid);
+#endif
+	return ret;
+}
+
+int run_hook(const char *index_file, const char *name, ...)
+{
+	struct child_process hook;
+	const char **argv = NULL, *env[2];
+	char index[PATH_MAX];
+	va_list args;
+	int ret;
+	size_t i = 0, alloc = 0;
+
+	if (access(perf_path("hooks/%s", name), X_OK) < 0)
+		return 0;
+
+	va_start(args, name);
+	ALLOC_GROW(argv, i + 1, alloc);
+	argv[i++] = perf_path("hooks/%s", name);
+	while (argv[i-1]) {
+		ALLOC_GROW(argv, i + 1, alloc);
+		argv[i++] = va_arg(args, const char *);
+	}
+	va_end(args);
+
+	memset(&hook, 0, sizeof(hook));
+	hook.argv = argv;
+	hook.no_stdin = 1;
+	hook.stdout_to_stderr = 1;
+	if (index_file) {
+		snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file);
+		env[0] = index;
+		env[1] = NULL;
+		hook.env = env;
+	}
+
+	ret = start_command(&hook);
+	free(argv);
+	if (ret) {
+		warning("Could not spawn %s", argv[0]);
+		return ret;
+	}
+	ret = finish_command(&hook);
+	if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
+		warning("%s exited due to uncaught signal", argv[0]);
+
+	return ret;
+}
diff --git a/Documentation/perf_counter/run-command.h b/Documentation/perf_counter/run-command.h
new file mode 100644
index 0000000..328289f
--- /dev/null
+++ b/Documentation/perf_counter/run-command.h
@@ -0,0 +1,93 @@
+#ifndef RUN_COMMAND_H
+#define RUN_COMMAND_H
+
+enum {
+	ERR_RUN_COMMAND_FORK = 10000,
+	ERR_RUN_COMMAND_EXEC,
+	ERR_RUN_COMMAND_PIPE,
+	ERR_RUN_COMMAND_WAITPID,
+	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
+	ERR_RUN_COMMAND_WAITPID_SIGNAL,
+	ERR_RUN_COMMAND_WAITPID_NOEXIT,
+};
+#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
+
+struct child_process {
+	const char **argv;
+	pid_t pid;
+	/*
+	 * Using .in, .out, .err:
+	 * - Specify 0 for no redirections (child inherits stdin, stdout,
+	 *   stderr from parent).
+	 * - Specify -1 to have a pipe allocated as follows:
+	 *     .in: returns the writable pipe end; parent writes to it,
+	 *          the readable pipe end becomes child's stdin
+	 *     .out, .err: returns the readable pipe end; parent reads from
+	 *          it, the writable pipe end becomes child's stdout/stderr
+	 *   The caller of start_command() must close the returned FDs
+	 *   after it has completed reading from/writing to it!
+	 * - Specify > 0 to set a channel to a particular FD as follows:
+	 *     .in: a readable FD, becomes child's stdin
+	 *     .out: a writable FD, becomes child's stdout/stderr
+	 *     .err > 0 not supported
+	 *   The specified FD is closed by start_command(), even in case
+	 *   of errors!
+	 */
+	int in;
+	int out;
+	int err;
+	const char *dir;
+	const char *const *env;
+	unsigned no_stdin:1;
+	unsigned no_stdout:1;
+	unsigned no_stderr:1;
+	unsigned perf_cmd:1; /* if this is to be perf sub-command */
+	unsigned stdout_to_stderr:1;
+	void (*preexec_cb)(void);
+};
+
+int start_command(struct child_process *);
+int finish_command(struct child_process *);
+int run_command(struct child_process *);
+
+extern int run_hook(const char *index_file, const char *name, ...);
+
+#define RUN_COMMAND_NO_STDIN 1
+#define RUN_PERF_CMD	     2	/*If this is to be perf sub-command */
+#define RUN_COMMAND_STDOUT_TO_STDERR 4
+int run_command_v_opt(const char **argv, int opt);
+
+/*
+ * env (the environment) is to be formatted like environ: "VAR=VALUE".
+ * To unset an environment variable use just "VAR".
+ */
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env);
+
+/*
+ * The purpose of the following functions is to feed a pipe by running
+ * a function asynchronously and providing output that the caller reads.
+ *
+ * It is expected that no synchronization and mutual exclusion between
+ * the caller and the feed function is necessary so that the function
+ * can run in a thread without interfering with the caller.
+ */
+struct async {
+	/*
+	 * proc writes to fd and closes it;
+	 * returns 0 on success, non-zero on failure
+	 */
+	int (*proc)(int fd, void *data);
+	void *data;
+	int out;	/* caller reads from here and closes it */
+#ifndef __MINGW32__
+	pid_t pid;
+#else
+	HANDLE tid;
+	int fd_for_proc;
+#endif
+};
+
+int start_async(struct async *async);
+int finish_async(struct async *async);
+
+#endif
diff --git a/Documentation/perf_counter/strbuf.c b/Documentation/perf_counter/strbuf.c
new file mode 100644
index 0000000..eaba093
--- /dev/null
+++ b/Documentation/perf_counter/strbuf.c
@@ -0,0 +1,359 @@
+#include "cache.h"
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+void strbuf_init(struct strbuf *sb, size_t hint)
+{
+	sb->alloc = sb->len = 0;
+	sb->buf = strbuf_slopbuf;
+	if (hint)
+		strbuf_grow(sb, hint);
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+	if (sb->alloc) {
+		free(sb->buf);
+		strbuf_init(sb, 0);
+	}
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+	char *res = sb->alloc ? sb->buf : NULL;
+	if (sz)
+		*sz = sb->len;
+	strbuf_init(sb, 0);
+	return res;
+}
+
+void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
+{
+	strbuf_release(sb);
+	sb->buf   = buf;
+	sb->len   = len;
+	sb->alloc = alloc;
+	strbuf_grow(sb, 0);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_grow(struct strbuf *sb, size_t extra)
+{
+	if (sb->len + extra + 1 <= sb->len)
+		die("you want to use way too much memory");
+	if (!sb->alloc)
+		sb->buf = NULL;
+	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+}
+
+void strbuf_trim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+void strbuf_rtrim(struct strbuf *sb)
+{
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_ltrim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_tolower(struct strbuf *sb)
+{
+	int i;
+	for (i = 0; i < sb->len; i++)
+		sb->buf[i] = tolower(sb->buf[i]);
+}
+
+struct strbuf **strbuf_split(const struct strbuf *sb, int delim)
+{
+	int alloc = 2, pos = 0;
+	char *n, *p;
+	struct strbuf **ret;
+	struct strbuf *t;
+
+	ret = calloc(alloc, sizeof(struct strbuf *));
+	p = n = sb->buf;
+	while (n < sb->buf + sb->len) {
+		int len;
+		n = memchr(n, delim, sb->len - (n - sb->buf));
+		if (pos + 1 >= alloc) {
+			alloc = alloc * 2;
+			ret = realloc(ret, sizeof(struct strbuf *) * alloc);
+		}
+		if (!n)
+			n = sb->buf + sb->len - 1;
+		len = n - p + 1;
+		t = malloc(sizeof(struct strbuf));
+		strbuf_init(t, len);
+		strbuf_add(t, p, len);
+		ret[pos] = t;
+		ret[++pos] = NULL;
+		p = ++n;
+	}
+	return ret;
+}
+
+void strbuf_list_free(struct strbuf **sbs)
+{
+	struct strbuf **s = sbs;
+
+	while (*s) {
+		strbuf_release(*s);
+		free(*s++);
+	}
+	free(sbs);
+}
+
+int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
+{
+	int len = a->len < b->len ? a->len: b->len;
+	int cmp = memcmp(a->buf, b->buf, len);
+	if (cmp)
+		return cmp;
+	return a->len < b->len ? -1: a->len != b->len;
+}
+
+void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
+				   const void *data, size_t dlen)
+{
+	if (pos + len < pos)
+		die("you want to use way too much memory");
+	if (pos > sb->len)
+		die("`pos' is too far after the end of the buffer");
+	if (pos + len > sb->len)
+		die("`pos + len' is too far after the end of the buffer");
+
+	if (dlen >= len)
+		strbuf_grow(sb, dlen - len);
+	memmove(sb->buf + pos + dlen,
+			sb->buf + pos + len,
+			sb->len - pos - len);
+	memcpy(sb->buf + pos, data, dlen);
+	strbuf_setlen(sb, sb->len + dlen - len);
+}
+
+void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
+{
+	strbuf_splice(sb, pos, 0, data, len);
+}
+
+void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_splice(sb, pos, len, NULL, 0);
+}
+
+void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, data, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, sb->buf + pos, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+	int len;
+	va_list ap;
+
+	if (!strbuf_avail(sb))
+		strbuf_grow(sb, 64);
+	va_start(ap, fmt);
+	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+	va_end(ap);
+	if (len < 0)
+		die("your vsnprintf is broken");
+	if (len > strbuf_avail(sb)) {
+		strbuf_grow(sb, len);
+		va_start(ap, fmt);
+		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+		va_end(ap);
+		if (len > strbuf_avail(sb)) {
+			die("this should not happen, your snprintf is broken");
+		}
+	}
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+		   void *context)
+{
+	for (;;) {
+		const char *percent;
+		size_t consumed;
+
+		percent = strchrnul(format, '%');
+		strbuf_add(sb, format, percent - format);
+		if (!*percent)
+			break;
+		format = percent + 1;
+
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
+			strbuf_addch(sb, '%');
+	}
+}
+
+size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
+		void *context)
+{
+	struct strbuf_expand_dict_entry *e = context;
+	size_t len;
+
+	for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
+		if (!strncmp(placeholder, e->placeholder, len)) {
+			if (e->value)
+				strbuf_addstr(sb, e->value);
+			return len;
+		}
+	}
+	return 0;
+}
+
+size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
+{
+	size_t res;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, size);
+	res = fread(sb->buf + sb->len, 1, size, f);
+	if (res > 0)
+		strbuf_setlen(sb, sb->len + res);
+	else if (res < 0 && oldalloc == 0)
+		strbuf_release(sb);
+	return res;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
+{
+	size_t oldlen = sb->len;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, hint ? hint : 8192);
+	for (;;) {
+		ssize_t cnt;
+
+		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+		if (cnt < 0) {
+			if (oldalloc == 0)
+				strbuf_release(sb);
+			else
+				strbuf_setlen(sb, oldlen);
+			return -1;
+		}
+		if (!cnt)
+			break;
+		sb->len += cnt;
+		strbuf_grow(sb, 8192);
+	}
+
+	sb->buf[sb->len] = '\0';
+	return sb->len - oldlen;
+}
+
+#define STRBUF_MAXLINK (2*PATH_MAX)
+
+int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
+{
+	size_t oldalloc = sb->alloc;
+
+	if (hint < 32)
+		hint = 32;
+
+	while (hint < STRBUF_MAXLINK) {
+		int len;
+
+		strbuf_grow(sb, hint);
+		len = readlink(path, sb->buf, hint);
+		if (len < 0) {
+			if (errno != ERANGE)
+				break;
+		} else if (len < hint) {
+			strbuf_setlen(sb, len);
+			return 0;
+		}
+
+		/* .. the buffer was too small - try again */
+		hint *= 2;
+	}
+	if (oldalloc == 0)
+		strbuf_release(sb);
+	return -1;
+}
+
+int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
+{
+	int ch;
+
+	strbuf_grow(sb, 0);
+	if (feof(fp))
+		return EOF;
+
+	strbuf_reset(sb);
+	while ((ch = fgetc(fp)) != EOF) {
+		if (ch == term)
+			break;
+		strbuf_grow(sb, 1);
+		sb->buf[sb->len++] = ch;
+	}
+	if (ch == EOF && sb->len == 0)
+		return EOF;
+
+	sb->buf[sb->len] = '\0';
+	return 0;
+}
+
+int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
+{
+	int fd, len;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -1;
+	len = strbuf_read(sb, fd, hint);
+	close(fd);
+	if (len < 0)
+		return -1;
+
+	return len;
+}
diff --git a/Documentation/perf_counter/strbuf.h b/Documentation/perf_counter/strbuf.h
new file mode 100644
index 0000000..9ee908a
--- /dev/null
+++ b/Documentation/perf_counter/strbuf.h
@@ -0,0 +1,137 @@
+#ifndef STRBUF_H
+#define STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ *    build complex strings/buffers whose final size isn't easily known.
+ *
+ *    It is NOT legal to copy the ->buf pointer away.
+ *    `strbuf_detach' is the operation that detachs a buffer from its shell
+ *    while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ *    allocated. The extra byte is used to store a '\0', allowing the ->buf
+ *    member to be a valid C-string. Every strbuf function ensure this
+ *    invariant is preserved.
+ *
+ *    Note that it is OK to "play" with the buffer directly if you work it
+ *    that way:
+ *
+ *    strbuf_grow(sb, SOME_SIZE);
+ *       ... Here, the memory array starting at sb->buf, and of length
+ *       ... strbuf_avail(sb) is all yours, and you are sure that
+ *       ... strbuf_avail(sb) is at least SOME_SIZE.
+ *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ *    Doing so is safe, though if it has to be done in many places, adding the
+ *    missing API to the strbuf module is the way to go.
+ *
+ *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ *         even if it's true in the current implementation. Alloc is somehow a
+ *         "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+	size_t alloc;
+	size_t len;
+	char *buf;
+};
+
+#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+extern void strbuf_init(struct strbuf *, size_t);
+extern void strbuf_release(struct strbuf *);
+extern char *strbuf_detach(struct strbuf *, size_t *);
+extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
+static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) {
+	struct strbuf tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+/*----- strbuf size related -----*/
+static inline size_t strbuf_avail(const struct strbuf *sb) {
+	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+extern void strbuf_grow(struct strbuf *, size_t);
+
+static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
+	if (!sb->alloc)
+		strbuf_grow(sb, 0);
+	assert(len < sb->alloc);
+	sb->len = len;
+	sb->buf[len] = '\0';
+}
+#define strbuf_reset(sb)  strbuf_setlen(sb, 0)
+
+/*----- content related -----*/
+extern void strbuf_trim(struct strbuf *);
+extern void strbuf_rtrim(struct strbuf *);
+extern void strbuf_ltrim(struct strbuf *);
+extern int strbuf_cmp(const struct strbuf *, const struct strbuf *);
+extern void strbuf_tolower(struct strbuf *);
+
+extern struct strbuf **strbuf_split(const struct strbuf *, int delim);
+extern void strbuf_list_free(struct strbuf **);
+
+/*----- add data in your buffer -----*/
+static inline void strbuf_addch(struct strbuf *sb, int c) {
+	strbuf_grow(sb, 1);
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+}
+
+extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
+extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
+
+/* splice pos..pos+len with given data */
+extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
+                          const void *, size_t);
+
+extern void strbuf_add(struct strbuf *, const void *, size_t);
+static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
+	strbuf_add(sb, s, strlen(s));
+}
+static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) {
+	strbuf_add(sb, sb2->buf, sb2->len);
+}
+extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
+
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
+struct strbuf_expand_dict_entry {
+	const char *placeholder;
+	const char *value;
+};
+extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context);
+
+__attribute__((format(printf,2,3)))
+extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+
+extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
+/* XXX: if read fails, any partial read is undone */
+extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
+extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
+extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
+
+extern int strbuf_getline(struct strbuf *, FILE *, int);
+
+extern void stripspace(struct strbuf *buf, int skip_comments);
+extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env);
+
+extern int strbuf_branchname(struct strbuf *sb, const char *name);
+extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
+
+#endif /* STRBUF_H */
diff --git a/Documentation/perf_counter/usage.c b/Documentation/perf_counter/usage.c
new file mode 100644
index 0000000..7a10421
--- /dev/null
+++ b/Documentation/perf_counter/usage.c
@@ -0,0 +1,80 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+
+static void report(const char *prefix, const char *err, va_list params)
+{
+	char msg[1024];
+	vsnprintf(msg, sizeof(msg), err, params);
+	fprintf(stderr, "%s%s\n", prefix, msg);
+}
+
+static NORETURN void usage_builtin(const char *err)
+{
+	fprintf(stderr, "usage: %s\n", err);
+	exit(129);
+}
+
+static NORETURN void die_builtin(const char *err, va_list params)
+{
+	report("fatal: ", err, params);
+	exit(128);
+}
+
+static void error_builtin(const char *err, va_list params)
+{
+	report("error: ", err, params);
+}
+
+static void warn_builtin(const char *warn, va_list params)
+{
+	report("warning: ", warn, params);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
+static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
+static void (*error_routine)(const char *err, va_list params) = error_builtin;
+static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
+
+void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
+{
+	die_routine = routine;
+}
+
+void usage(const char *err)
+{
+	usage_routine(err);
+}
+
+void die(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	die_routine(err, params);
+	va_end(params);
+}
+
+int error(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	error_routine(err, params);
+	va_end(params);
+	return -1;
+}
+
+void warning(const char *warn, ...)
+{
+	va_list params;
+
+	va_start(params, warn);
+	warn_routine(warn, params);
+	va_end(params);
+}
diff --git a/Documentation/perf_counter/util.h b/Documentation/perf_counter/util.h
new file mode 100644
index 0000000..13f8bdc
--- /dev/null
+++ b/Documentation/perf_counter/util.h
@@ -0,0 +1,394 @@
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _FILE_OFFSET_BITS 64
+
+#ifndef FLEX_ARRAY
+/*
+ * See if our compiler is known to support flexible array members.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+# define FLEX_ARRAY /* empty */
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 3)
+#  define FLEX_ARRAY /* empty */
+# else
+#  define FLEX_ARRAY 0 /* older GNU extension */
+# endif
+#endif
+
+/*
+ * Otherwise, default to safer but a bit wasteful traditional style
+ */
+#ifndef FLEX_ARRAY
+# define FLEX_ARRAY 1
+#endif
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+#ifdef __GNUC__
+#define TYPEOF(x) (__typeof__(x))
+#else
+#define TYPEOF(x)
+#endif
+
+#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
+#define HAS_MULTI_BITS(i)  ((i) & ((i) - 1))  /* checks if an integer has more than 1 bit set */
+
+/* Approximation of the length of the decimal representation of this type. */
+#define decimal_length(x)	((int)(sizeof(x) * 2.56 + 0.5) + 1)
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__)  && !defined(__USLC__) && !defined(_M_UNIX)
+#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
+#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
+#endif
+#define _ALL_SOURCE 1
+#define _GNU_SOURCE 1
+#define _BSD_SOURCE 1
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/time.h>
+#include <time.h>
+#include <signal.h>
+#include <fnmatch.h>
+#include <assert.h>
+#include <regex.h>
+#include <utime.h>
+#ifndef __MINGW32__
+#include <sys/wait.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#ifndef NO_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <pwd.h>
+#include <inttypes.h>
+#if defined(__CYGWIN__)
+#undef _XOPEN_SOURCE
+#include <grp.h>
+#define _XOPEN_SOURCE 600
+#include "compat/cygwin.h"
+#else
+#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
+#include <grp.h>
+#define _ALL_SOURCE 1
+#endif
+#else 	/* __MINGW32__ */
+/* pull in Windows compatibility stuff */
+#include "compat/mingw.h"
+#endif	/* __MINGW32__ */
+
+#ifndef NO_ICONV
+#include <iconv.h>
+#endif
+
+#ifndef NO_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#endif
+
+/* On most systems <limits.h> would have given us this, but
+ * not on some systems (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef PRIuMAX
+#define PRIuMAX "llu"
+#endif
+
+#ifndef PRIu32
+#define PRIu32 "u"
+#endif
+
+#ifndef PRIx32
+#define PRIx32 "x"
+#endif
+
+#ifndef PATH_SEP
+#define PATH_SEP ':'
+#endif
+
+#ifndef STRIP_EXTENSION
+#define STRIP_EXTENSION ""
+#endif
+
+#ifndef has_dos_drive_prefix
+#define has_dos_drive_prefix(path) 0
+#endif
+
+#ifndef is_dir_sep
+#define is_dir_sep(c) ((c) == '/')
+#endif
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#ifndef __attribute__
+#define __attribute__(x)
+#endif
+#endif
+
+/* General helper functions */
+extern void usage(const char *err) NORETURN;
+extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+
+extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
+
+extern int prefixcmp(const char *str, const char *prefix);
+extern time_t tm_to_time_t(const struct tm *tm);
+
+static inline const char *skip_prefix(const char *str, const char *prefix)
+{
+	size_t len = strlen(prefix);
+	return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
+
+#ifndef PROT_READ
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define MAP_PRIVATE 1
+#define MAP_FAILED ((void*)-1)
+#endif
+
+#define mmap git_mmap
+#define munmap git_munmap
+extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern int git_munmap(void *start, size_t length);
+
+#else /* NO_MMAP || USE_WIN32_MMAP */
+
+#include <sys/mman.h>
+
+#endif /* NO_MMAP || USE_WIN32_MMAP */
+
+#ifdef NO_MMAP
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
+
+#else /* NO_MMAP */
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
+	(sizeof(void*) >= 8 \
+		?  1 * 1024 * 1024 * 1024 \
+		: 32 * 1024 * 1024)
+
+#endif /* NO_MMAP */
+
+#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+#define on_disk_bytes(st) ((st).st_size)
+#else
+#define on_disk_bytes(st) ((st).st_blocks * 512)
+#endif
+
+#define DEFAULT_PACKED_GIT_LIMIT \
+	((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
+
+#ifdef NO_PREAD
+#define pread git_pread
+extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
+#endif
+/*
+ * Forward decl that will remind us if its twin in cache.h changes.
+ * This function is used in compat/pread.c.  But we can't include
+ * cache.h there.
+ */
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+
+#ifdef NO_SETENV
+#define setenv gitsetenv
+extern int gitsetenv(const char *, const char *, int);
+#endif
+
+#ifdef NO_MKDTEMP
+#define mkdtemp gitmkdtemp
+extern char *gitmkdtemp(char *);
+#endif
+
+#ifdef NO_UNSETENV
+#define unsetenv gitunsetenv
+extern void gitunsetenv(const char *);
+#endif
+
+#ifdef NO_STRCASESTR
+#define strcasestr gitstrcasestr
+extern char *gitstrcasestr(const char *haystack, const char *needle);
+#endif
+
+#ifdef NO_STRLCPY
+#define strlcpy gitstrlcpy
+extern size_t gitstrlcpy(char *, const char *, size_t);
+#endif
+
+#ifdef NO_STRTOUMAX
+#define strtoumax gitstrtoumax
+extern uintmax_t gitstrtoumax(const char *, char **, int);
+#endif
+
+#ifdef NO_HSTRERROR
+#define hstrerror githstrerror
+extern const char *githstrerror(int herror);
+#endif
+
+#ifdef NO_MEMMEM
+#define memmem gitmemmem
+void *gitmemmem(const void *haystack, size_t haystacklen,
+                const void *needle, size_t needlelen);
+#endif
+
+#ifdef FREAD_READS_DIRECTORIES
+#ifdef fopen
+#undef fopen
+#endif
+#define fopen(a,b) git_fopen(a,b)
+extern FILE *git_fopen(const char*, const char*);
+#endif
+
+#ifdef SNPRINTF_RETURNS_BOGUS
+#define snprintf git_snprintf
+extern int git_snprintf(char *str, size_t maxsize,
+			const char *format, ...);
+#define vsnprintf git_vsnprintf
+extern int git_vsnprintf(char *str, size_t maxsize,
+			 const char *format, va_list ap);
+#endif
+
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 1)
+#define HAVE_STRCHRNUL
+#endif
+#endif
+
+#ifndef HAVE_STRCHRNUL
+#define strchrnul gitstrchrnul
+static inline char *gitstrchrnul(const char *s, int c)
+{
+	while (*s && *s != c)
+		s++;
+	return (char *)s;
+}
+#endif
+
+static inline size_t xsize_t(off_t len)
+{
+	return (size_t)len;
+}
+
+static inline int has_extension(const char *filename, const char *ext)
+{
+	size_t len = strlen(filename);
+	size_t extlen = strlen(ext);
+	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isalpha
+#undef isalnum
+#undef tolower
+#undef toupper
+extern unsigned char sane_ctype[256];
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
+#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+	if (sane_istest(x, GIT_ALPHA))
+		x = (x & ~0x20) | high;
+	return x;
+}
+
+static inline int strtoul_ui(char const *s, int base, unsigned int *result)
+{
+	unsigned long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtoul(s, &p, base);
+	if (errno || *p || p == s || (unsigned int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+static inline int strtol_i(char const *s, int base, int *result)
+{
+	long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtol(s, &p, base);
+	if (errno || *p || p == s || (int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+#ifdef INTERNAL_QSORT
+void git_qsort(void *base, size_t nmemb, size_t size,
+	       int(*compar)(const void *, const void *));
+#define qsort git_qsort
+#endif
+
+#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
+# define FORCE_DIR_SET_GID S_ISGID
+#else
+# define FORCE_DIR_SET_GID 0
+#endif
+
+#ifdef NO_NSEC
+#undef USE_NSEC
+#define ST_CTIME_NSEC(st) 0
+#define ST_MTIME_NSEC(st) 0
+#else
+#ifdef USE_ST_TIMESPEC
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
+#else
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
+#endif
+#endif
+
+#endif
diff --git a/Documentation/perf_counter/wrapper.c b/Documentation/perf_counter/wrapper.c
new file mode 100644
index 0000000..6350d65
--- /dev/null
+++ b/Documentation/perf_counter/wrapper.c
@@ -0,0 +1,206 @@
+/*
+ * Various trivial helper wrappers around standard functions
+ */
+#include "cache.h"
+
+/*
+ * There's no pack memory to release - but stay close to the Git
+ * version so wrap this away:
+ */
+static inline void release_pack_memory(size_t size, int flag)
+{
+}
+
+char *xstrdup(const char *str)
+{
+	char *ret = strdup(str);
+	if (!ret) {
+		release_pack_memory(strlen(str) + 1, -1);
+		ret = strdup(str);
+		if (!ret)
+			die("Out of memory, strdup failed");
+	}
+	return ret;
+}
+
+void *xmalloc(size_t size)
+{
+	void *ret = malloc(size);
+	if (!ret && !size)
+		ret = malloc(1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = malloc(size);
+		if (!ret && !size)
+			ret = malloc(1);
+		if (!ret)
+			die("Out of memory, malloc failed");
+	}
+#ifdef XMALLOC_POISON
+	memset(ret, 0xA5, size);
+#endif
+	return ret;
+}
+
+/*
+ * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
+ * "data" to the allocated memory, zero terminates the allocated memory,
+ * and returns a pointer to the allocated memory. If the allocation fails,
+ * the program dies.
+ */
+void *xmemdupz(const void *data, size_t len)
+{
+	char *p = xmalloc(len + 1);
+	memcpy(p, data, len);
+	p[len] = '\0';
+	return p;
+}
+
+char *xstrndup(const char *str, size_t len)
+{
+	char *p = memchr(str, '\0', len);
+	return xmemdupz(str, p ? p - str : len);
+}
+
+void *xrealloc(void *ptr, size_t size)
+{
+	void *ret = realloc(ptr, size);
+	if (!ret && !size)
+		ret = realloc(ptr, 1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = realloc(ptr, size);
+		if (!ret && !size)
+			ret = realloc(ptr, 1);
+		if (!ret)
+			die("Out of memory, realloc failed");
+	}
+	return ret;
+}
+
+void *xcalloc(size_t nmemb, size_t size)
+{
+	void *ret = calloc(nmemb, size);
+	if (!ret && (!nmemb || !size))
+		ret = calloc(1, 1);
+	if (!ret) {
+		release_pack_memory(nmemb * size, -1);
+		ret = calloc(nmemb, size);
+		if (!ret && (!nmemb || !size))
+			ret = calloc(1, 1);
+		if (!ret)
+			die("Out of memory, calloc failed");
+	}
+	return ret;
+}
+
+void *xmmap(void *start, size_t length,
+	int prot, int flags, int fd, off_t offset)
+{
+	void *ret = mmap(start, length, prot, flags, fd, offset);
+	if (ret == MAP_FAILED) {
+		if (!length)
+			return NULL;
+		release_pack_memory(length, fd);
+		ret = mmap(start, length, prot, flags, fd, offset);
+		if (ret == MAP_FAILED)
+			die("Out of memory? mmap failed: %s", strerror(errno));
+	}
+	return ret;
+}
+
+/*
+ * xread() is the same a read(), but it automatically restarts read()
+ * operations with a recoverable error (EAGAIN and EINTR). xread()
+ * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xread(int fd, void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = read(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+/*
+ * xwrite() is the same a write(), but it automatically restarts write()
+ * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
+ * GUARANTEE that "len" bytes is written even if the operation is successful.
+ */
+ssize_t xwrite(int fd, const void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = write(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+ssize_t read_in_full(int fd, void *buf, size_t count)
+{
+	char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t loaded = xread(fd, p, count);
+		if (loaded <= 0)
+			return total ? total : loaded;
+		count -= loaded;
+		p += loaded;
+		total += loaded;
+	}
+
+	return total;
+}
+
+ssize_t write_in_full(int fd, const void *buf, size_t count)
+{
+	const char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t written = xwrite(fd, p, count);
+		if (written < 0)
+			return -1;
+		if (!written) {
+			errno = ENOSPC;
+			return -1;
+		}
+		count -= written;
+		p += written;
+		total += written;
+	}
+
+	return total;
+}
+
+int xdup(int fd)
+{
+	int ret = dup(fd);
+	if (ret < 0)
+		die("dup failed: %s", strerror(errno));
+	return ret;
+}
+
+FILE *xfdopen(int fd, const char *mode)
+{
+	FILE *stream = fdopen(fd, mode);
+	if (stream == NULL)
+		die("Out of memory? fdopen failed: %s", strerror(errno));
+	return stream;
+}
+
+int xmkstemp(char *template)
+{
+	int fd;
+
+	fd = mkstemp(template);
+	if (fd < 0)
+		die("Unable to create temporary file: %s", strerror(errno));
+	return fd;
+}
-- 
cgit v0.10.2


From 6f06ccbc86f8a02aa32271263249657ce484eb25 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 15:22:22 +0200
Subject: perf_counter tools: clean up after introduction of the Git command
 framework

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 11809b9..1b60265 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -138,16 +138,6 @@ all::
 #
 # Define NO_PERL if you do not want Perl scripts or libraries at all.
 #
-# Define NO_TCLTK if you do not want Tcl/Tk GUI.
-#
-# The TCL_PATH variable governs the location of the Tcl interpreter
-# used to optimize perf-gui for your system.  Only used if NO_TCLTK
-# is not set.  Defaults to the bare 'tclsh'.
-#
-# The TCLTK_PATH variable governs the location of the Tcl/Tk interpreter.
-# If not set it defaults to the bare 'wish'. If it is set to the empty
-# string then NO_TCLTK will be forced (this is used by configure script).
-#
 # Define INTERNAL_QSORT to use Git's implementation of qsort(), which
 # is a simplified version of the merge sort used in glibc. This is
 # recommended if Git triggers O(n^2) behavior in your platform's qsort().
@@ -215,12 +205,8 @@ TAR = tar
 FIND = find
 INSTALL = install
 RPMBUILD = rpmbuild
-TCL_PATH = tclsh
-TCLTK_PATH = wish
 PTHREAD_LIBS = -lpthread
 
-export TCL_PATH TCLTK_PATH
-
 # sparse is architecture-neutral, which means that we need to tell it
 # explicitly what architecture to check for. Fix this up for yours..
 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
@@ -529,10 +515,6 @@ ifdef NO_EXTERNAL_GREP
 	BASIC_CFLAGS += -DNO_EXTERNAL_GREP
 endif
 
-ifeq ($(TCLTK_PATH),)
-NO_TCLTK=NoThanks
-endif
-
 ifeq ($(PERL_PATH),)
 NO_PERL=NoThanks
 endif
@@ -583,7 +565,6 @@ prefix_SQ = $(subst ','\'',$(prefix))
 
 SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
-TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_PATH))
 
 LIBS = $(PERFLIBS) $(EXTLIBS)
 
@@ -607,14 +588,6 @@ ifneq (,$X)
 endif
 
 all::
-ifndef NO_TCLTK
-	$(QUIET_SUBDIR0)perf-gui $(QUIET_SUBDIR1) perfexecdir='$(perfexec_instdir_SQ)' all
-	$(QUIET_SUBDIR0)perfk-perf $(QUIET_SUBDIR1) all
-endif
-ifndef NO_PERL
-	$(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' all
-endif
-	$(QUIET_SUBDIR0)templates $(QUIET_SUBDIR1)
 
 please_set_SHELL_PATH_to_a_more_modern_shell:
 	@$$(:)
@@ -704,21 +677,6 @@ builtin-revert.o wt-status.o: wt-status.h
 $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
 
-doc:
-	$(MAKE) -C Documentation all
-
-man:
-	$(MAKE) -C Documentation man
-
-html:
-	$(MAKE) -C Documentation html
-
-info:
-	$(MAKE) -C Documentation info
-
-pdf:
-	$(MAKE) -C Documentation pdf
-
 TAGS:
 	$(RM) TAGS
 	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
@@ -751,33 +709,12 @@ PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
 	@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
 	@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
 
-### Detect Tck/Tk interpreter path changes
-ifndef NO_TCLTK
-TRACK_VARS = $(subst ','\'',-DTCLTK_PATH='$(TCLTK_PATH_SQ)')
-
-PERF-GUI-VARS: .FORCE-PERF-GUI-VARS
-	@VARS='$(TRACK_VARS)'; \
-	    if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \
-		echo 1>&2 "    * new Tcl/Tk interpreter location"; \
-		echo "$$VARS" >$@; \
-            fi
-
-.PHONY: .FORCE-PERF-GUI-VARS
-endif
-
 ### Testing rules
 
-TEST_PROGRAMS += test-chmtime$X
-TEST_PROGRAMS += test-ctype$X
-TEST_PROGRAMS += test-date$X
-TEST_PROGRAMS += test-delta$X
-TEST_PROGRAMS += test-dump-cache-tree$X
-TEST_PROGRAMS += test-genrandom$X
-TEST_PROGRAMS += test-match-trees$X
-TEST_PROGRAMS += test-parse-options$X
-TEST_PROGRAMS += test-path-utils$X
-TEST_PROGRAMS += test-sha1$X
-TEST_PROGRAMS += test-sigchain$X
+#
+# None right now:
+#
+# TEST_PROGRAMS += test-something$X
 
 all:: $(TEST_PROGRAMS)
 
@@ -787,25 +724,6 @@ all:: $(TEST_PROGRAMS)
 
 export NO_SVN_TESTS
 
-test: all
-	$(MAKE) -C t/ all
-
-test-ctype$X: ctype.o
-
-test-date$X: date.o ctype.o
-
-test-delta$X: diff-delta.o patch-delta.o
-
-test-parse-options$X: parse-options.o
-
-.PRECIOUS: $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS))
-
-test-%$X: test-%.o $(PERFLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
-
-check-sha1:: test-sha1$X
-	./test-sha1.sh
-
 check: common-cmds.h
 	if sparse; \
 	then \
@@ -845,10 +763,6 @@ install: all
 	$(INSTALL) perf$X perf-upload-pack$X perf-receive-pack$X perf-upload-archive$X perf-shell$X perf-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install
 	$(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install
-ifndef NO_TCLTK
-	$(MAKE) -C perfk-perf install
-	$(MAKE) -C perf-gui perfexecdir='$(perfexec_instdir_SQ)' install
-endif
 ifneq (,$X)
 	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
 endif
@@ -865,32 +779,6 @@ endif
 	  done } && \
 	./check_bindir "z$$bindir" "z$$execdir" "$$bindir/perf-add$X"
 
-install-doc:
-	$(MAKE) -C Documentation install
-
-install-man:
-	$(MAKE) -C Documentation install-man
-
-install-html:
-	$(MAKE) -C Documentation install-html
-
-install-info:
-	$(MAKE) -C Documentation install-info
-
-install-pdf:
-	$(MAKE) -C Documentation install-pdf
-
-quick-install-doc:
-	$(MAKE) -C Documentation quick-install
-
-quick-install-man:
-	$(MAKE) -C Documentation quick-install-man
-
-quick-install-html:
-	$(MAKE) -C Documentation quick-install-html
-
-
-
 ### Maintainer's dist rules
 
 perf.spec: perf.spec.in
@@ -904,38 +792,16 @@ dist: perf.spec perf-archive$(X) configure
 	@mkdir -p $(PERF_TARNAME)
 	@cp perf.spec configure $(PERF_TARNAME)
 	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
-	@$(MAKE) -C perf-gui TARDIR=../$(PERF_TARNAME)/perf-gui dist-version
 	$(TAR) rf $(PERF_TARNAME).tar \
 		$(PERF_TARNAME)/perf.spec \
 		$(PERF_TARNAME)/configure \
-		$(PERF_TARNAME)/version \
-		$(PERF_TARNAME)/perf-gui/version
+		$(PERF_TARNAME)/version
 	@$(RM) -r $(PERF_TARNAME)
 	gzip -f -9 $(PERF_TARNAME).tar
 
 rpm: dist
 	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
 
-htmldocs = perf-htmldocs-$(PERF_VERSION)
-manpages = perf-manpages-$(PERF_VERSION)
-dist-doc:
-	$(RM) -r .doc-tmp-dir
-	mkdir .doc-tmp-dir
-	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
-	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
-	gzip -n -9 -f $(htmldocs).tar
-	:
-	$(RM) -r .doc-tmp-dir
-	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
-	$(MAKE) -C Documentation DESTDIR=./ \
-		man1dir=../.doc-tmp-dir/man1 \
-		man5dir=../.doc-tmp-dir/man5 \
-		man7dir=../.doc-tmp-dir/man7 \
-		install
-	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
-	gzip -n -9 -f $(manpages).tar
-	$(RM) -r .doc-tmp-dir
-
 ### Cleaning rules
 
 distclean: clean
@@ -951,74 +817,13 @@ clean:
 	$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
 	$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
 	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
-	$(MAKE) -C Documentation/ clean
-	$(MAKE) -C templates/ clean
-	$(MAKE) -C t/ clean
-ifndef NO_TCLTK
-	$(MAKE) -C perfk-perf clean
-	$(MAKE) -C perf-gui clean
-endif
-	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-GUI-VARS PERF-BUILD-OPTIONS
+	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
 
 .PHONY: all install clean strip
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
 .PHONY: .FORCE-PERF-BUILD-OPTIONS
 
-### Check documentation
-#
-check-docs::
-	@(for v in $(ALL_PROGRAMS) $(BUILT_INS) perf perfk; \
-	do \
-		case "$$v" in \
-		perf-merge-octopus | perf-merge-ours | perf-merge-recursive | \
-		perf-merge-resolve | perf-merge-subtree | \
-		perf-fsck-objects | perf-init-db | \
-		perf-?*--?* ) continue ;; \
-		esac ; \
-		test -f "Documentation/$$v.txt" || \
-		echo "no doc: $$v"; \
-		sed -e '/^#/d' command-list.txt | \
-		grep -q "^$$v[ 	]" || \
-		case "$$v" in \
-		perf) ;; \
-		*) echo "no link: $$v";; \
-		esac ; \
-	done; \
-	( \
-		sed -e '/^#/d' \
-		    -e 's/[ 	].*//' \
-		    -e 's/^/listed /' command-list.txt; \
-		ls -1 Documentation/perf*txt | \
-		sed -e 's|Documentation/|documented |' \
-		    -e 's/\.txt//'; \
-	) | while read how cmd; \
-	do \
-		case "$$how,$$cmd" in \
-		*,perf-citool | \
-		*,perf-gui | \
-		*,perf-help | \
-		documented,perfattributes | \
-		documented,perfignore | \
-		documented,perfmodules | \
-		documented,perfcli | \
-		documented,perf-tools | \
-		documented,perfcore-tutorial | \
-		documented,perfcvs-migration | \
-		documented,perfdiffcore | \
-		documented,perfglossary | \
-		documented,perfhooks | \
-		documented,perfrepository-layout | \
-		documented,perftutorial | \
-		documented,perftutorial-2 | \
-		sentinel,not,matching,is,ok ) continue ;; \
-		esac; \
-		case " $(ALL_PROGRAMS) $(BUILT_INS) perf perfk " in \
-		*" $$cmd "*)	;; \
-		*) echo "removed but $$how: $$cmd" ;; \
-		esac; \
-	done ) | sort
-
 ### Make sure built-ins do not have dups and listed in perf.c
 #
 check-builtins::
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
index 125fcc2..a136d61 100644
--- a/Documentation/perf_counter/builtin-help.c
+++ b/Documentation/perf_counter/builtin-help.c
@@ -417,11 +417,9 @@ static void show_html_page(const char *perf_cmd)
 
 int cmd_help(int argc, const char **argv, const char *prefix)
 {
-	int nonperf;
 	const char *alias;
 	load_command_list("perf-", &main_cmds, &other_cmds);
 
-	/* setup_perf_directory_gently(&nonperf); */
 	perf_config(perf_help_config, NULL);
 
 	argc = parse_options(argc, argv, builtin_help_options,
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 9d2c769..601bddb 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -63,15 +63,6 @@
 
 #include "util.h"
 
-#define _GNU_SOURCE
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
 #include <getopt.h>
 #include <assert.h>
 #include <fcntl.h>
@@ -103,8 +94,6 @@
 #define PR_TASK_PERF_COUNTERS_DISABLE   31
 #define PR_TASK_PERF_COUNTERS_ENABLE    32
 
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
 #define rdclock()                                       \
 ({                                                      \
         struct timespec ts;                             \
@@ -1077,7 +1066,7 @@ static void process_event(uint64_t ip, int counter)
 	record_ip(ip, counter);
 }
 
-static void process_options(int argc, char *argv[])
+static void process_options(int argc, char **argv)
 {
 	int error = 0, counter;
 
@@ -1255,7 +1244,7 @@ static void mmap_read(struct mmap_data *md)
 
 		event_t event_copy;
 
-		unsigned int size = event->header.size;
+		size_t size = event->header.size;
 
 		/*
 		 * Event straddles the mmap boundary -- header should always
@@ -1301,7 +1290,7 @@ static void mmap_read(struct mmap_data *md)
 	md->prev = old;
 }
 
-int cmd_top(int argc, const char **argv, const char *prefix)
+int cmd_top(int argc, char **argv, const char *prefix)
 {
 	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
 	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
diff --git a/Documentation/perf_counter/cache.h b/Documentation/perf_counter/cache.h
index dc08564..7108051 100644
--- a/Documentation/perf_counter/cache.h
+++ b/Documentation/perf_counter/cache.h
@@ -94,4 +94,24 @@ static inline int is_absolute_path(const char *path)
 {
 	return path[0] == '/';
 }
+
+const char *make_absolute_path(const char *path);
+const char *make_nonrelative_path(const char *path);
+const char *make_relative_path(const char *abs, const char *base);
+int normalize_path_copy(char *dst, const char *src);
+int longest_ancestor_length(const char *path, const char *prefix_list);
+char *strip_path_suffix(const char *path, const char *suffix);
+
+extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+
+extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_pathdup(const char *fmt, ...)
+	__attribute__((format (printf, 1, 2)));
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
 #endif /* CACHE_H */
diff --git a/Documentation/perf_counter/config.c b/Documentation/perf_counter/config.c
index 672d539..3dd13fa 100644
--- a/Documentation/perf_counter/config.c
+++ b/Documentation/perf_counter/config.c
@@ -15,7 +15,6 @@ static FILE *config_file;
 static const char *config_file_name;
 static int config_linenr;
 static int config_file_eof;
-static int zlib_compression_seen;
 
 const char *config_exclusive_filename = NULL;
 
@@ -533,14 +532,6 @@ static int store_aux(const char* key, const char* value, void *cb)
 	return 0;
 }
 
-static int write_error(const char *filename)
-{
-	error("failed to write new configuration file %s", filename);
-
-	/* Same error code as "failed to rename". */
-	return 4;
-}
-
 static int store_write_section(int fd, const char* key)
 {
 	const char *dot;
@@ -673,7 +664,7 @@ int perf_config_set_multivar(const char* key, const char* value,
 {
 	int i, dot;
 	int fd = -1, in_fd;
-	int ret;
+	int ret = 0;
 	char* config_filename;
 	const char* last_dot = strrchr(key, '.');
 
@@ -872,90 +863,6 @@ write_err_out:
 
 }
 
-static int section_name_match (const char *buf, const char *name)
-{
-	int i = 0, j = 0, dot = 0;
-	for (; buf[i] && buf[i] != ']'; i++) {
-		if (!dot && isspace(buf[i])) {
-			dot = 1;
-			if (name[j++] != '.')
-				break;
-			for (i++; isspace(buf[i]); i++)
-				; /* do nothing */
-			if (buf[i] != '"')
-				break;
-			continue;
-		}
-		if (buf[i] == '\\' && dot)
-			i++;
-		else if (buf[i] == '"' && dot) {
-			for (i++; isspace(buf[i]); i++)
-				; /* do_nothing */
-			break;
-		}
-		if (buf[i] != name[j++])
-			break;
-	}
-	return (buf[i] == ']' && name[j] == 0);
-}
-
-/* if new_name == NULL, the section is removed instead */
-int perf_config_rename_section(const char *old_name, const char *new_name)
-{
-	int ret = 0, remove = 0;
-	char *config_filename;
-	int out_fd;
-	char buf[1024];
-
-	if (config_exclusive_filename)
-		config_filename = strdup(config_exclusive_filename);
-	else
-		config_filename = perf_pathdup("config");
-	if (out_fd < 0) {
-		ret = error("could not lock config file %s", config_filename);
-		goto out;
-	}
-
-	if (!(config_file = fopen(config_filename, "rb"))) {
-		/* no config file means nothing to rename, no error */
-		goto unlock_and_out;
-	}
-
-	while (fgets(buf, sizeof(buf), config_file)) {
-		int i;
-		int length;
-		for (i = 0; buf[i] && isspace(buf[i]); i++)
-			; /* do nothing */
-		if (buf[i] == '[') {
-			/* it's a section */
-			if (section_name_match (&buf[i+1], old_name)) {
-				ret++;
-				if (new_name == NULL) {
-					remove = 1;
-					continue;
-				}
-				store.baselen = strlen(new_name);
-				if (!store_write_section(out_fd, new_name)) {
-					goto out;
-				}
-				continue;
-			}
-			remove = 0;
-		}
-		if (remove)
-			continue;
-		length = strlen(buf);
-		if (write_in_full(out_fd, buf, length) != length) {
-			goto out;
-		}
-	}
-	fclose(config_file);
- unlock_and_out:
- out:
-	free(config_filename);
-	return ret;
-}
-
 /*
  * Call this to report error for your variable that should not
  * get a boolean value (i.e. "[my] var" means "true").
diff --git a/Documentation/perf_counter/path.c b/Documentation/perf_counter/path.c
index 891b612..a501a40 100644
--- a/Documentation/perf_counter/path.c
+++ b/Documentation/perf_counter/path.c
@@ -161,45 +161,6 @@ int perf_mkstemp(char *path, size_t len, const char *template)
 }
 
 
-static char *user_path(char *buf, char *path, int sz)
-{
-	struct passwd *pw;
-	char *slash;
-	int len, baselen;
-
-	if (!path || path[0] != '~')
-		return NULL;
-	path++;
-	slash = strchr(path, '/');
-	if (path[0] == '/' || !path[0]) {
-		pw = getpwuid(getuid());
-	}
-	else {
-		if (slash) {
-			*slash = 0;
-			pw = getpwnam(path);
-			*slash = '/';
-		}
-		else
-			pw = getpwnam(path);
-	}
-	if (!pw || !pw->pw_dir || sz <= strlen(pw->pw_dir))
-		return NULL;
-	baselen = strlen(pw->pw_dir);
-	memcpy(buf, pw->pw_dir, baselen);
-	while ((1 < baselen) && (buf[baselen-1] == '/')) {
-		buf[baselen-1] = 0;
-		baselen--;
-	}
-	if (slash && slash[1]) {
-		len = strlen(slash);
-		if (sz <= baselen + len)
-			return NULL;
-		memcpy(buf + baselen, slash, len + 1);
-	}
-	return buf;
-}
-
 const char *make_relative_path(const char *abs, const char *base)
 {
 	static char buf[PATH_MAX + 1];
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 9256f6a..63f8a89 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -1,7 +1,7 @@
 #include "builtin.h"
 #include "exec_cmd.h"
 #include "cache.h"
-//#include "quote.h"
+#include "quote.h"
 #include "run-command.h"
 
 const char perf_usage_string[] =
@@ -132,7 +132,6 @@ static int handle_alias(int *argcp, const char ***argv)
 	const char** new_argv;
 	const char *alias_command;
 	char *alias_string;
-	int unused_nonperf;
 
 	alias_command = (*argv)[0];
 	alias_string = alias_lookup(alias_command);
diff --git a/Documentation/perf_counter/util.h b/Documentation/perf_counter/util.h
index 13f8bdc..36e40c3 100644
--- a/Documentation/perf_counter/util.h
+++ b/Documentation/perf_counter/util.h
@@ -295,6 +295,20 @@ static inline char *gitstrchrnul(const char *s, int c)
 }
 #endif
 
+/*
+ * Wrappers:
+ */
+extern char *xstrdup(const char *str);
+extern void *xmalloc(size_t size);
+extern void *xmemdupz(const void *data, size_t len);
+extern char *xstrndup(const char *str, size_t len);
+extern void *xrealloc(void *ptr, size_t size);
+extern void *xcalloc(size_t nmemb, size_t size);
+extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern ssize_t xread(int fd, void *buf, size_t len);
+extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern int xdup(int fd);
+extern FILE *xfdopen(int fd, const char *mode);
 static inline size_t xsize_t(off_t len)
 {
 	return (size_t)len;
-- 
cgit v0.10.2


From ddcacfa0febff6454dba6cea1931f3020a9f6c24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 15:37:32 +0200
Subject: perf_counter tools: separate kerneltop into 'perf top' and 'perf
 stat'

Lets use the Git framework of built-in commands.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 1b60265..fb8b717 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -309,6 +309,7 @@ LIB_OBJS += usage.o
 LIB_OBJS += wrapper.o
 
 BUILTIN_OBJS += builtin-help.o
+BUILTIN_OBJS += builtin-stat.o
 BUILTIN_OBJS += builtin-top.o
 
 PERFLIBS = $(LIB_FILE)
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
new file mode 100644
index 0000000..169a2d1
--- /dev/null
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -0,0 +1,592 @@
+/*
+ * kerneltop.c: show top kernel functions - performance counters showcase
+
+   Build with:
+
+     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
+
+   Sample output:
+
+------------------------------------------------------------------------------
+ KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
+------------------------------------------------------------------------------
+
+             weight         RIP          kernel function
+             ______   ________________   _______________
+
+              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
+              33.00 - ffffffff804cb740 : sock_alloc_send_skb
+              31.26 - ffffffff804ce808 : skb_push
+              22.43 - ffffffff80510004 : tcp_established_options
+              19.00 - ffffffff8027d250 : find_get_page
+              15.76 - ffffffff804e4fc9 : eth_type_trans
+              15.20 - ffffffff804d8baa : dst_release
+              14.86 - ffffffff804cf5d8 : skb_release_head_state
+              14.00 - ffffffff802217d5 : read_hpet
+              12.00 - ffffffff804ffb7f : __ip_local_out
+              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
+               8.54 - ffffffff805001a3 : ip_queue_xmit
+ */
+
+/*
+ * perfstat:  /usr/bin/time -alike performance counter statistics utility
+
+          It summarizes the counter events of all tasks (and child tasks),
+          covering all CPUs that the command (or workload) executes on.
+          It only counts the per-task events of the workload started,
+          independent of how many other tasks run on those CPUs.
+
+   Sample output:
+
+   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
+
+   Performance counter stats for 'ls':
+
+           163516953 instructions
+                2295 cache-misses
+             2855182 branch-misses
+ */
+
+ /*
+  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+  *
+  * Improvements and fixes by:
+  *
+  *   Arjan van de Ven <arjan@linux.intel.com>
+  *   Yanmin Zhang <yanmin.zhang@intel.com>
+  *   Wu Fengguang <fengguang.wu@intel.com>
+  *   Mike Galbraith <efault@gmx.de>
+  *   Paul Mackerras <paulus@samba.org>
+  *
+  * Released under the GPL v2. (and only v2, not any later version)
+  */
+
+#include "util.h"
+
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#include "../../include/linux/perf_counter.h"
+
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define rdclock()                                       \
+({                                                      \
+        struct timespec ts;                             \
+                                                        \
+        clock_gettime(CLOCK_MONOTONIC, &ts);            \
+        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
+})
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#ifdef __x86_64__
+#define __NR_perf_counter_open 295
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __i386__
+#define __NR_perf_counter_open 333
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#define rmb() 		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+extern asmlinkage int sys_perf_counter_open(
+        struct perf_counter_hw_event    *hw_event_uptr          __user,
+        pid_t                           pid,
+        int                             cpu,
+        int                             group_fd,
+        unsigned long                   flags);
+
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+
+static int			system_wide			=  0;
+
+static int			nr_counters			=  0;
+static __u64			event_id[MAX_COUNTERS]		= {
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
+	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
+	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
+};
+static int			default_interval = 100000;
+static int			event_count[MAX_COUNTERS];
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int			tid				= -1;
+static int			profile_cpu			= -1;
+static int			nr_cpus				=  0;
+static int			nmi				=  1;
+static int			group				=  0;
+static unsigned int		page_size;
+
+static int			zero;
+
+static int			scale;
+
+static const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+static char *hw_event_names[] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+	"minor faults",
+	"major faults",
+};
+
+struct event_symbol {
+	__u64 event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+};
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static void display_events_help(void)
+{
+	unsigned int i;
+	__u64 e;
+
+	printf(
+	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		printf("\n                             %d:%d: %-20s",
+				type, id, event_symbols[i].symbol);
+	}
+
+	printf("\n"
+	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
+}
+
+static void display_help(void)
+{
+	printf(
+	"Usage: perfstat [<events...>] <cmd...>\n\n"
+	"PerfStat Options (up to %d event types can be specified):\n\n",
+		 MAX_COUNTERS);
+
+	display_events_help();
+
+	printf(
+	" -l                           # scale counter values\n"
+	" -a                           # system-wide collection\n");
+	exit(0);
+}
+
+static char *event_name(int ctr)
+{
+	__u64 config = event_id[ctr];
+	int type = PERF_COUNTER_TYPE(config);
+	int id = PERF_COUNTER_ID(config);
+	static char buf[32];
+
+	if (PERF_COUNTER_RAW(config)) {
+		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
+		return buf;
+	}
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (id < PERF_HW_EVENTS_MAX)
+			return hw_event_names[id];
+		return "unknown-hardware";
+
+	case PERF_TYPE_SOFTWARE:
+		if (id < PERF_SW_EVENTS_MAX)
+			return sw_event_names[id];
+		return "unknown-software";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static __u64 match_event_symbols(char *str)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return ~0ULL;
+}
+
+static int parse_events(char *str)
+{
+	__u64 config;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
+
+	event_id[nr_counters] = config;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+
+/*
+ * perfstat
+ */
+
+char fault_here[1000000];
+
+static void create_perfstat_counter(int counter)
+{
+	struct perf_counter_hw_event hw_event;
+
+	memset(&hw_event, 0, sizeof(hw_event));
+	hw_event.config		= event_id[counter];
+	hw_event.record_type	= 0;
+	hw_event.nmi		= 0;
+	if (scale)
+		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
+					  PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	if (system_wide) {
+		int cpu;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
+			if (fd[cpu][counter] < 0) {
+				printf("perfstat error: syscall returned with %d (%s)\n",
+						fd[cpu][counter], strerror(errno));
+				exit(-1);
+			}
+		}
+	} else {
+		hw_event.inherit	= 1;
+		hw_event.disabled	= 1;
+
+		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
+		if (fd[0][counter] < 0) {
+			printf("perfstat error: syscall returned with %d (%s)\n",
+					fd[0][counter], strerror(errno));
+			exit(-1);
+		}
+	}
+}
+
+int do_perfstat(int argc, char *argv[])
+{
+	unsigned long long t0, t1;
+	int counter;
+	ssize_t res;
+	int status;
+	int pid;
+
+	if (!system_wide)
+		nr_cpus = 1;
+
+	for (counter = 0; counter < nr_counters; counter++)
+		create_perfstat_counter(counter);
+
+	argc -= optind;
+	argv += optind;
+
+	if (!argc)
+		display_help();
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+	if (!pid) {
+		if (execvp(argv[0], argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+	while (wait(&status) >= 0)
+		;
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	t1 = rdclock();
+
+	fflush(stdout);
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Performance counter stats for \'%s\':\n",
+		argv[0]);
+	fprintf(stderr, "\n");
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		int cpu, nv;
+		__u64 count[3], single_count[3];
+		int scaled;
+
+		count[0] = count[1] = count[2] = 0;
+		nv = scale ? 3 : 1;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			res = read(fd[cpu][counter],
+				   single_count, nv * sizeof(__u64));
+			assert(res == nv * sizeof(__u64));
+
+			count[0] += single_count[0];
+			if (scale) {
+				count[1] += single_count[1];
+				count[2] += single_count[2];
+			}
+		}
+
+		scaled = 0;
+		if (scale) {
+			if (count[2] == 0) {
+				fprintf(stderr, " %14s  %-20s\n",
+					"<not counted>", event_name(counter));
+				continue;
+			}
+			if (count[2] < count[1]) {
+				scaled = 1;
+				count[0] = (unsigned long long)
+					((double)count[0] * count[1] / count[2] + 0.5);
+			}
+		}
+
+		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
+		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
+
+			double msecs = (double)count[0] / 1000000;
+
+			fprintf(stderr, " %14.6f  %-20s (msecs)",
+				msecs, event_name(counter));
+		} else {
+			fprintf(stderr, " %14Ld  %-20s (events)",
+				count[0], event_name(counter));
+		}
+		if (scaled)
+			fprintf(stderr, "  (scaled from %.2f%%)",
+				(double) count[2] / count[1] * 100);
+		fprintf(stderr, "\n");
+	}
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
+			(double)(t1-t0)/1e6);
+	fprintf(stderr, "\n");
+
+	return 0;
+}
+
+static void process_options(int argc, char **argv)
+{
+	int error = 0, counter;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"count",	required_argument,	NULL, 'c'},
+			{"cpu",		required_argument,	NULL, 'C'},
+			{"delay",	required_argument,	NULL, 'd'},
+			{"dump_symtab",	no_argument,		NULL, 'D'},
+			{"event",	required_argument,	NULL, 'e'},
+			{"filter",	required_argument,	NULL, 'f'},
+			{"group",	required_argument,	NULL, 'g'},
+			{"help",	no_argument,		NULL, 'h'},
+			{"nmi",		required_argument,	NULL, 'n'},
+			{"munmap_info",	no_argument,		NULL, 'U'},
+			{"pid",		required_argument,	NULL, 'p'},
+			{"realtime",	required_argument,	NULL, 'r'},
+			{"scale",	no_argument,		NULL, 'l'},
+			{"symbol",	required_argument,	NULL, 's'},
+			{"stat",	no_argument,		NULL, 'S'},
+			{"vmlinux",	required_argument,	NULL, 'x'},
+			{"zero",	no_argument,		NULL, 'z'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'a': system_wide			=	       1; break;
+		case 'c': default_interval		=   atoi(optarg); break;
+		case 'C':
+			/* CPU and PID are mutually exclusive */
+			if (tid != -1) {
+				printf("WARNING: CPU switch overriding PID\n");
+				sleep(1);
+				tid = -1;
+			}
+			profile_cpu			=   atoi(optarg); break;
+
+		case 'e': error				= parse_events(optarg); break;
+
+		case 'g': group				=   atoi(optarg); break;
+		case 'h':      				  display_help(); break;
+		case 'l': scale				=	       1; break;
+		case 'n': nmi				=   atoi(optarg); break;
+		case 'p':
+			/* CPU and PID are mutually exclusive */
+			if (profile_cpu != -1) {
+				printf("WARNING: PID switch overriding CPU\n");
+				sleep(1);
+				profile_cpu = -1;
+			}
+			tid				=   atoi(optarg); break;
+		case 'z': zero				=              1; break;
+		default: error = 1; break;
+		}
+	}
+	if (error)
+		display_help();
+
+	if (!nr_counters) {
+		nr_counters = 8;
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		event_count[counter] = default_interval;
+	}
+}
+
+int cmd_stat(int argc, char **argv, const char *prefix)
+{
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	process_options(argc, argv);
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	return do_perfstat(argc, argv);
+}
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 601bddb..98e8690 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -28,25 +28,6 @@
                8.54 - ffffffff805001a3 : ip_queue_xmit
  */
 
-/*
- * perfstat:  /usr/bin/time -alike performance counter statistics utility
-
-          It summarizes the counter events of all tasks (and child tasks),
-          covering all CPUs that the command (or workload) executes on.
-          It only counts the per-task events of the workload started,
-          independent of how many other tasks run on those CPUs.
-
-   Sample output:
-
-   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
-
-   Performance counter stats for 'ls':
-
-           163516953 instructions
-                2295 cache-misses
-             2855182 branch-misses
- */
-
  /*
   * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
   *
@@ -149,7 +130,6 @@ asmlinkage int sys_perf_counter_open(
 
 #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
 
-static int			run_perfstat			=  0;
 static int			system_wide			=  0;
 
 static int			nr_counters			=  0;
@@ -203,7 +183,7 @@ struct source_line {
 static struct source_line	*lines;
 static struct source_line	**lines_tail;
 
-const unsigned int default_count[] = {
+static const unsigned int default_count[] = {
 	1000000,
 	1000000,
 	  10000,
@@ -291,26 +271,8 @@ static void display_events_help(void)
 	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
 }
 
-static void display_perfstat_help(void)
-{
-	printf(
-	"Usage: perfstat [<events...>] <cmd...>\n\n"
-	"PerfStat Options (up to %d event types can be specified):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -l                           # scale counter values\n"
-	" -a                           # system-wide collection\n");
-	exit(0);
-}
-
 static void display_help(void)
 {
-	if (run_perfstat)
-		return display_perfstat_help();
-
 	printf(
 	"Usage: kerneltop [<options>]\n"
 	"   Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
@@ -320,8 +282,6 @@ static void display_help(void)
 	display_events_help();
 
 	printf(
-	" -S        --stat             # perfstat COMMAND\n"
-	" -a                           # system-wide collection (for perfstat)\n\n"
 	" -c CNT    --count=CNT        # event period to sample\n\n"
 	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
 	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
@@ -420,151 +380,6 @@ again:
 	return 0;
 }
 
-
-/*
- * perfstat
- */
-
-char fault_here[1000000];
-
-static void create_perfstat_counter(int counter)
-{
-	struct perf_counter_hw_event hw_event;
-
-	memset(&hw_event, 0, sizeof(hw_event));
-	hw_event.config		= event_id[counter];
-	hw_event.record_type	= 0;
-	hw_event.nmi		= 0;
-	if (scale)
-		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
-					  PERF_FORMAT_TOTAL_TIME_RUNNING;
-
-	if (system_wide) {
-		int cpu;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
-			if (fd[cpu][counter] < 0) {
-				printf("perfstat error: syscall returned with %d (%s)\n",
-						fd[cpu][counter], strerror(errno));
-				exit(-1);
-			}
-		}
-	} else {
-		hw_event.inherit	= 1;
-		hw_event.disabled	= 1;
-
-		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
-		if (fd[0][counter] < 0) {
-			printf("perfstat error: syscall returned with %d (%s)\n",
-					fd[0][counter], strerror(errno));
-			exit(-1);
-		}
-	}
-}
-
-int do_perfstat(int argc, char *argv[])
-{
-	unsigned long long t0, t1;
-	int counter;
-	ssize_t res;
-	int status;
-	int pid;
-
-	if (!system_wide)
-		nr_cpus = 1;
-
-	for (counter = 0; counter < nr_counters; counter++)
-		create_perfstat_counter(counter);
-
-	argc -= optind;
-	argv += optind;
-
-	if (!argc)
-		display_help();
-
-	/*
-	 * Enable counters and exec the command:
-	 */
-	t0 = rdclock();
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
-
-	if ((pid = fork()) < 0)
-		perror("failed to fork");
-	if (!pid) {
-		if (execvp(argv[0], argv)) {
-			perror(argv[0]);
-			exit(-1);
-		}
-	}
-	while (wait(&status) >= 0)
-		;
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
-	t1 = rdclock();
-
-	fflush(stdout);
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for \'%s\':\n",
-		argv[0]);
-	fprintf(stderr, "\n");
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		int cpu, nv;
-		__u64 count[3], single_count[3];
-		int scaled;
-
-		count[0] = count[1] = count[2] = 0;
-		nv = scale ? 3 : 1;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			res = read(fd[cpu][counter],
-				   single_count, nv * sizeof(__u64));
-			assert(res == nv * sizeof(__u64));
-
-			count[0] += single_count[0];
-			if (scale) {
-				count[1] += single_count[1];
-				count[2] += single_count[2];
-			}
-		}
-
-		scaled = 0;
-		if (scale) {
-			if (count[2] == 0) {
-				fprintf(stderr, " %14s  %-20s\n",
-					"<not counted>", event_name(counter));
-				continue;
-			}
-			if (count[2] < count[1]) {
-				scaled = 1;
-				count[0] = (unsigned long long)
-					((double)count[0] * count[1] / count[2] + 0.5);
-			}
-		}
-
-		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
-		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
-
-			double msecs = (double)count[0] / 1000000;
-
-			fprintf(stderr, " %14.6f  %-20s (msecs)",
-				msecs, event_name(counter));
-		} else {
-			fprintf(stderr, " %14Ld  %-20s (events)",
-				count[0], event_name(counter));
-		}
-		if (scaled)
-			fprintf(stderr, "  (scaled from %.2f%%)",
-				(double) count[2] / count[1] * 100);
-		fprintf(stderr, "\n");
-	}
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
-			(double)(t1-t0)/1e6);
-	fprintf(stderr, "\n");
-
-	return 0;
-}
-
 /*
  * Symbols
  */
@@ -805,7 +620,7 @@ static int read_symbol(FILE *in, struct sym_entry *s)
 	return 0;
 }
 
-int compare_addr(const void *__sym1, const void *__sym2)
+static int compare_addr(const void *__sym1, const void *__sym2)
 {
 	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
 
@@ -1070,9 +885,6 @@ static void process_options(int argc, char **argv)
 {
 	int error = 0, counter;
 
-	if (strstr(argv[0], "perfstat"))
-		run_perfstat = 1;
-
 	for (;;) {
 		int option_index = 0;
 		/** Options for getopt */
@@ -1134,7 +946,6 @@ static void process_options(int argc, char **argv)
 			tid				=   atoi(optarg); break;
 		case 'r': realtime_prio			=   atoi(optarg); break;
 		case 's': sym_filter			= strdup(optarg); break;
-		case 'S': run_perfstat			=	       1; break;
 		case 'x': vmlinux			= strdup(optarg); break;
 		case 'z': zero				=              1; break;
 		case 'm': mmap_pages			=   atoi(optarg); break;
@@ -1147,12 +958,8 @@ static void process_options(int argc, char **argv)
 		display_help();
 
 	if (!nr_counters) {
-		if (run_perfstat)
-			nr_counters = 8;
-		else {
-			nr_counters = 1;
-			event_id[0] = 0;
-		}
+		nr_counters = 1;
+		event_id[0] = 0;
 	}
 
 	for (counter = 0; counter < nr_counters; counter++) {
@@ -1308,9 +1115,6 @@ int cmd_top(int argc, char **argv, const char *prefix)
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
 
-	if (run_perfstat)
-		return do_perfstat(argc, argv);
-
 	if (tid != -1 || profile_cpu != -1)
 		nr_cpus = 1;
 
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index 4163744..a3bb6cd 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -14,5 +14,6 @@ extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int check_pager_config(const char *cmd);
 
-extern int cmd_top(int argc, const char **argv, const char *prefix);
+extern int cmd_top(int argc, char **argv, const char *prefix);
+extern int cmd_stat(int argc, char **argv, const char *prefix);
 #endif
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
index 1eab365..52455d4 100644
--- a/Documentation/perf_counter/command-list.txt
+++ b/Documentation/perf_counter/command-list.txt
@@ -1,4 +1,5 @@
 # List of known perf commands.
 # command name				category [deprecated] [common]
 perf-top                                mainporcelain common
+perf-stat                               mainporcelain common
 
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
deleted file mode 100644
index 042c1b8..0000000
--- a/Documentation/perf_counter/kerneltop.c
+++ /dev/null
@@ -1,1409 +0,0 @@
-/*
- * kerneltop.c: show top kernel functions - performance counters showcase
-
-   Build with:
-
-     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
-
-   Sample output:
-
-------------------------------------------------------------------------------
- KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
-------------------------------------------------------------------------------
-
-             weight         RIP          kernel function
-             ______   ________________   _______________
-
-              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
-              33.00 - ffffffff804cb740 : sock_alloc_send_skb
-              31.26 - ffffffff804ce808 : skb_push
-              22.43 - ffffffff80510004 : tcp_established_options
-              19.00 - ffffffff8027d250 : find_get_page
-              15.76 - ffffffff804e4fc9 : eth_type_trans
-              15.20 - ffffffff804d8baa : dst_release
-              14.86 - ffffffff804cf5d8 : skb_release_head_state
-              14.00 - ffffffff802217d5 : read_hpet
-              12.00 - ffffffff804ffb7f : __ip_local_out
-              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
-               8.54 - ffffffff805001a3 : ip_queue_xmit
- */
-
-/*
- * perfstat:  /usr/bin/time -alike performance counter statistics utility
-
-          It summarizes the counter events of all tasks (and child tasks),
-          covering all CPUs that the command (or workload) executes on.
-          It only counts the per-task events of the workload started,
-          independent of how many other tasks run on those CPUs.
-
-   Sample output:
-
-   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
-
-   Performance counter stats for 'ls':
-
-           163516953 instructions
-                2295 cache-misses
-             2855182 branch-misses
- */
-
- /*
-  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
-  *
-  * Improvements and fixes by:
-  *
-  *   Arjan van de Ven <arjan@linux.intel.com>
-  *   Yanmin Zhang <yanmin.zhang@intel.com>
-  *   Wu Fengguang <fengguang.wu@intel.com>
-  *   Mike Galbraith <efault@gmx.de>
-  *   Paul Mackerras <paulus@samba.org>
-  *
-  * Released under the GPL v2. (and only v2, not any later version)
-  */
-
-#define _GNU_SOURCE
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <getopt.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <ctype.h>
-#include <time.h>
-#include <sched.h>
-#include <pthread.h>
-
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <sys/mman.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-#include "../../include/linux/perf_counter.h"
-
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define rdclock()                                       \
-({                                                      \
-        struct timespec ts;                             \
-                                                        \
-        clock_gettime(CLOCK_MONOTONIC, &ts);            \
-        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
-})
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#ifdef __x86_64__
-#define __NR_perf_counter_open 295
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __i386__
-#define __NR_perf_counter_open 333
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#define rmb() 		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-asmlinkage int sys_perf_counter_open(
-        struct perf_counter_hw_event    *hw_event_uptr          __user,
-        pid_t                           pid,
-        int                             cpu,
-        int                             group_fd,
-        unsigned long                   flags)
-{
-        return syscall(
-                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-}
-
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
-
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
-
-static int			run_perfstat			=  0;
-static int			system_wide			=  0;
-
-static int			nr_counters			=  0;
-static __u64			event_id[MAX_COUNTERS]		= {
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
-
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
-};
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static __u64			count_filter		       = 100;
-
-static int			tid				= -1;
-static int			profile_cpu			= -1;
-static int			nr_cpus				=  0;
-static int			nmi				=  1;
-static unsigned int		realtime_prio			=  0;
-static int			group				=  0;
-static unsigned int		page_size;
-static unsigned int		mmap_pages			=  16;
-static int			use_mmap			= 0;
-static int			use_munmap			= 0;
-
-static char			*vmlinux;
-
-static char			*sym_filter;
-static unsigned long		filter_start;
-static unsigned long		filter_end;
-
-static int			delay_secs			=  2;
-static int			zero;
-static int			dump_symtab;
-
-static int			scale;
-
-struct source_line {
-	uint64_t		EIP;
-	unsigned long		count;
-	char			*line;
-	struct source_line	*next;
-};
-
-static struct source_line	*lines;
-static struct source_line	**lines_tail;
-
-const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
-
-static char *hw_event_names[] = {
-	"CPU cycles",
-	"instructions",
-	"cache references",
-	"cache misses",
-	"branches",
-	"branch misses",
-	"bus cycles",
-};
-
-static char *sw_event_names[] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-	"minor faults",
-	"major faults",
-};
-
-struct event_symbol {
-	__u64 event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
-};
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-static void display_events_help(void)
-{
-	unsigned int i;
-	__u64 e;
-
-	printf(
-	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
-
-		printf("\n                             %d:%d: %-20s",
-				type, id, event_symbols[i].symbol);
-	}
-
-	printf("\n"
-	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
-}
-
-static void display_perfstat_help(void)
-{
-	printf(
-	"Usage: perfstat [<events...>] <cmd...>\n\n"
-	"PerfStat Options (up to %d event types can be specified):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -l                           # scale counter values\n"
-	" -a                           # system-wide collection\n");
-	exit(0);
-}
-
-static void display_help(void)
-{
-	if (run_perfstat)
-		return display_perfstat_help();
-
-	printf(
-	"Usage: kerneltop [<options>]\n"
-	"   Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
-	"KernelTop Options (up to %d event types can be specified at once):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -S        --stat             # perfstat COMMAND\n"
-	" -a                           # system-wide collection (for perfstat)\n\n"
-	" -c CNT    --count=CNT        # event period to sample\n\n"
-	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
-	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
-	" -l                           # show scale factor for RR events\n"
-	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
-	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
-	" -r prio   --realtime=<prio>  # event acquisition runs with SCHED_FIFO policy\n"
-	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
-	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
-	" -z        --zero             # zero counts after display\n"
-	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
-	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
-	" -M        --mmap_info        # print mmap info stream\n"
-	" -U        --munmap_info      # print munmap info stream\n"
-	);
-
-	exit(0);
-}
-
-static char *event_name(int ctr)
-{
-	__u64 config = event_id[ctr];
-	int type = PERF_COUNTER_TYPE(config);
-	int id = PERF_COUNTER_ID(config);
-	static char buf[32];
-
-	if (PERF_COUNTER_RAW(config)) {
-		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
-		return buf;
-	}
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		if (id < PERF_HW_EVENTS_MAX)
-			return hw_event_names[id];
-		return "unknown-hardware";
-
-	case PERF_TYPE_SOFTWARE:
-		if (id < PERF_SW_EVENTS_MAX)
-			return sw_event_names[id];
-		return "unknown-software";
-
-	default:
-		break;
-	}
-
-	return "unknown";
-}
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static __u64 match_event_symbols(char *str)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-
-	if (sscanf(str, "r%llx", &config) == 1)
-		return config | PERF_COUNTER_RAW_MASK;
-
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return ~0ULL;
-}
-
-static int parse_events(char *str)
-{
-	__u64 config;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
-
-	event_id[nr_counters] = config;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
-
-/*
- * perfstat
- */
-
-char fault_here[1000000];
-
-static void create_perfstat_counter(int counter)
-{
-	struct perf_counter_hw_event hw_event;
-
-	memset(&hw_event, 0, sizeof(hw_event));
-	hw_event.config		= event_id[counter];
-	hw_event.record_type	= 0;
-	hw_event.nmi		= 0;
-	if (scale)
-		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
-					  PERF_FORMAT_TOTAL_TIME_RUNNING;
-
-	if (system_wide) {
-		int cpu;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
-			if (fd[cpu][counter] < 0) {
-				printf("perfstat error: syscall returned with %d (%s)\n",
-						fd[cpu][counter], strerror(errno));
-				exit(-1);
-			}
-		}
-	} else {
-		hw_event.inherit	= 1;
-		hw_event.disabled	= 1;
-
-		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
-		if (fd[0][counter] < 0) {
-			printf("perfstat error: syscall returned with %d (%s)\n",
-					fd[0][counter], strerror(errno));
-			exit(-1);
-		}
-	}
-}
-
-int do_perfstat(int argc, char *argv[])
-{
-	unsigned long long t0, t1;
-	int counter;
-	ssize_t res;
-	int status;
-	int pid;
-
-	if (!system_wide)
-		nr_cpus = 1;
-
-	for (counter = 0; counter < nr_counters; counter++)
-		create_perfstat_counter(counter);
-
-	argc -= optind;
-	argv += optind;
-
-	if (!argc)
-		display_help();
-
-	/*
-	 * Enable counters and exec the command:
-	 */
-	t0 = rdclock();
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
-
-	if ((pid = fork()) < 0)
-		perror("failed to fork");
-	if (!pid) {
-		if (execvp(argv[0], argv)) {
-			perror(argv[0]);
-			exit(-1);
-		}
-	}
-	while (wait(&status) >= 0)
-		;
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
-	t1 = rdclock();
-
-	fflush(stdout);
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for \'%s\':\n",
-		argv[0]);
-	fprintf(stderr, "\n");
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		int cpu, nv;
-		__u64 count[3], single_count[3];
-		int scaled;
-
-		count[0] = count[1] = count[2] = 0;
-		nv = scale ? 3 : 1;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			res = read(fd[cpu][counter],
-				   single_count, nv * sizeof(__u64));
-			assert(res == nv * sizeof(__u64));
-
-			count[0] += single_count[0];
-			if (scale) {
-				count[1] += single_count[1];
-				count[2] += single_count[2];
-			}
-		}
-
-		scaled = 0;
-		if (scale) {
-			if (count[2] == 0) {
-				fprintf(stderr, " %14s  %-20s\n",
-					"<not counted>", event_name(counter));
-				continue;
-			}
-			if (count[2] < count[1]) {
-				scaled = 1;
-				count[0] = (unsigned long long)
-					((double)count[0] * count[1] / count[2] + 0.5);
-			}
-		}
-
-		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
-		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
-
-			double msecs = (double)count[0] / 1000000;
-
-			fprintf(stderr, " %14.6f  %-20s (msecs)",
-				msecs, event_name(counter));
-		} else {
-			fprintf(stderr, " %14Ld  %-20s (events)",
-				count[0], event_name(counter));
-		}
-		if (scaled)
-			fprintf(stderr, "  (scaled from %.2f%%)",
-				(double) count[2] / count[1] * 100);
-		fprintf(stderr, "\n");
-	}
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
-			(double)(t1-t0)/1e6);
-	fprintf(stderr, "\n");
-
-	return 0;
-}
-
-/*
- * Symbols
- */
-
-static uint64_t			min_ip;
-static uint64_t			max_ip = -1ll;
-
-struct sym_entry {
-	unsigned long long	addr;
-	char			*sym;
-	unsigned long		count[MAX_COUNTERS];
-	int			skip;
-	struct source_line	*source;
-};
-
-#define MAX_SYMS		100000
-
-static int sym_table_count;
-
-struct sym_entry		*sym_filter_entry;
-
-static struct sym_entry		sym_table[MAX_SYMS];
-
-static void show_details(struct sym_entry *sym);
-
-/*
- * Ordering weight: count-1 * count-2 * ... / count-n
- */
-static double sym_weight(const struct sym_entry *sym)
-{
-	double weight;
-	int counter;
-
-	weight = sym->count[0];
-
-	for (counter = 1; counter < nr_counters-1; counter++)
-		weight *= sym->count[counter];
-
-	weight /= (sym->count[counter] + 1);
-
-	return weight;
-}
-
-static int compare(const void *__sym1, const void *__sym2)
-{
-	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
-
-	return sym_weight(sym1) < sym_weight(sym2);
-}
-
-static long			events;
-static long			userspace_events;
-static const char		CONSOLE_CLEAR[] = "[H[2J";
-
-static struct sym_entry		tmp[MAX_SYMS];
-
-static void print_sym_table(void)
-{
-	int i, printed;
-	int counter;
-	float events_per_sec = events/delay_secs;
-	float kevents_per_sec = (events-userspace_events)/delay_secs;
-	float sum_kevents = 0.0;
-
-	events = userspace_events = 0;
-	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
-	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
-
-	for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
-		sum_kevents += tmp[i].count[0];
-
-	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
-
-	printf(
-"------------------------------------------------------------------------------\n");
-	printf( " KernelTop:%8.0f irqs/sec  kernel:%4.1f%% [%s, ",
-		events_per_sec,
-		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
-		nmi ? "NMI" : "IRQ");
-
-	if (nr_counters == 1)
-		printf("%d ", event_count[0]);
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (counter)
-			printf("/");
-
-		printf("%s", event_name(counter));
-	}
-
-	printf( "], ");
-
-	if (tid != -1)
-		printf(" (tid: %d", tid);
-	else
-		printf(" (all");
-
-	if (profile_cpu != -1)
-		printf(", cpu: %d)\n", profile_cpu);
-	else {
-		if (tid != -1)
-			printf(")\n");
-		else
-			printf(", %d CPUs)\n", nr_cpus);
-	}
-
-	printf("------------------------------------------------------------------------------\n\n");
-
-	if (nr_counters == 1)
-		printf("             events    pcnt");
-	else
-		printf("  weight     events    pcnt");
-
-	printf("         RIP          kernel function\n"
-	       	       "  ______     ______   _____   ________________   _______________\n\n"
-	);
-
-	for (i = 0, printed = 0; i < sym_table_count; i++) {
-		float pcnt;
-		int count;
-
-		if (printed <= 18 && tmp[i].count[0] >= count_filter) {
-			pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
-
-			if (nr_counters == 1)
-				printf("%19.2f - %4.1f%% - %016llx : %s\n",
-					sym_weight(tmp + i),
-					pcnt, tmp[i].addr, tmp[i].sym);
-			else
-				printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
-					sym_weight(tmp + i),
-					tmp[i].count[0],
-					pcnt, tmp[i].addr, tmp[i].sym);
-			printed++;
-		}
-		/*
-		 * Add decay to the counts:
-		 */
-		for (count = 0; count < nr_counters; count++)
-			sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
-	}
-
-	if (sym_filter_entry)
-		show_details(sym_filter_entry);
-
-	{
-		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
-
-		if (poll(&stdin_poll, 1, 0) == 1) {
-			printf("key pressed - exiting.\n");
-			exit(0);
-		}
-	}
-}
-
-static void *display_thread(void *arg)
-{
-	printf("KernelTop refresh period: %d seconds\n", delay_secs);
-
-	while (!sleep(delay_secs))
-		print_sym_table();
-
-	return NULL;
-}
-
-static int read_symbol(FILE *in, struct sym_entry *s)
-{
-	static int filter_match = 0;
-	char *sym, stype;
-	char str[500];
-	int rc, pos;
-
-	rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
-	if (rc == EOF)
-		return -1;
-
-	assert(rc == 3);
-
-	/* skip until end of line: */
-	pos = strlen(str);
-	do {
-		rc = fgetc(in);
-		if (rc == '\n' || rc == EOF || pos >= 499)
-			break;
-		str[pos] = rc;
-		pos++;
-	} while (1);
-	str[pos] = 0;
-
-	sym = str;
-
-	/* Filter out known duplicates and non-text symbols. */
-	if (!strcmp(sym, "_text"))
-		return 1;
-	if (!min_ip && !strcmp(sym, "_stext"))
-		return 1;
-	if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
-		return 1;
-	if (stype != 'T' && stype != 't')
-		return 1;
-	if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
-		return 1;
-	if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
-		return 1;
-
-	s->sym = malloc(strlen(str));
-	assert(s->sym);
-
-	strcpy((char *)s->sym, str);
-	s->skip = 0;
-
-	/* Tag events to be skipped. */
-	if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
-		s->skip = 1;
-	else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
-		s->skip = 1;
-	else if (!strcmp("mwait_idle", s->sym))
-		s->skip = 1;
-
-	if (filter_match == 1) {
-		filter_end = s->addr;
-		filter_match = -1;
-		if (filter_end - filter_start > 10000) {
-			printf("hm, too large filter symbol <%s> - skipping.\n",
-				sym_filter);
-			printf("symbol filter start: %016lx\n", filter_start);
-			printf("                end: %016lx\n", filter_end);
-			filter_end = filter_start = 0;
-			sym_filter = NULL;
-			sleep(1);
-		}
-	}
-	if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
-		filter_match = 1;
-		filter_start = s->addr;
-	}
-
-	return 0;
-}
-
-int compare_addr(const void *__sym1, const void *__sym2)
-{
-	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
-
-	return sym1->addr > sym2->addr;
-}
-
-static void sort_symbol_table(void)
-{
-	int i, dups;
-
-	do {
-		qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
-		for (i = 0, dups = 0; i < sym_table_count; i++) {
-			if (sym_table[i].addr == sym_table[i+1].addr) {
-				sym_table[i+1].addr = -1ll;
-				dups++;
-			}
-		}
-		sym_table_count -= dups;
-	} while(dups);
-}
-
-static void parse_symbols(void)
-{
-	struct sym_entry *last;
-
-	FILE *kallsyms = fopen("/proc/kallsyms", "r");
-
-	if (!kallsyms) {
-		printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
-		exit(-1);
-	}
-
-	while (!feof(kallsyms)) {
-		if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
-			sym_table_count++;
-			assert(sym_table_count <= MAX_SYMS);
-		}
-	}
-
-	sort_symbol_table();
-	min_ip = sym_table[0].addr;
-	max_ip = sym_table[sym_table_count-1].addr;
-	last = sym_table + sym_table_count++;
-
-	last->addr = -1ll;
-	last->sym = "<end>";
-
-	if (filter_end) {
-		int count;
-		for (count=0; count < sym_table_count; count ++) {
-			if (!strcmp(sym_table[count].sym, sym_filter)) {
-				sym_filter_entry = &sym_table[count];
-				break;
-			}
-		}
-	}
-	if (dump_symtab) {
-		int i;
-
-		for (i = 0; i < sym_table_count; i++)
-			fprintf(stderr, "%llx %s\n",
-				sym_table[i].addr, sym_table[i].sym);
-	}
-}
-
-/*
- * Source lines
- */
-
-static void parse_vmlinux(char *filename)
-{
-	FILE *file;
-	char command[PATH_MAX*2];
-	if (!filename)
-		return;
-
-	sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
-
-	file = popen(command, "r");
-	if (!file)
-		return;
-
-	lines_tail = &lines;
-	while (!feof(file)) {
-		struct source_line *src;
-		size_t dummy = 0;
-		char *c;
-
-		src = malloc(sizeof(struct source_line));
-		assert(src != NULL);
-		memset(src, 0, sizeof(struct source_line));
-
-		if (getline(&src->line, &dummy, file) < 0)
-			break;
-		if (!src->line)
-			break;
-
-		c = strchr(src->line, '\n');
-		if (c)
-			*c = 0;
-
-		src->next = NULL;
-		*lines_tail = src;
-		lines_tail = &src->next;
-
-		if (strlen(src->line)>8 && src->line[8] == ':')
-			src->EIP = strtoull(src->line, NULL, 16);
-		if (strlen(src->line)>8 && src->line[16] == ':')
-			src->EIP = strtoull(src->line, NULL, 16);
-	}
-	pclose(file);
-}
-
-static void record_precise_ip(uint64_t ip)
-{
-	struct source_line *line;
-
-	for (line = lines; line; line = line->next) {
-		if (line->EIP == ip)
-			line->count++;
-		if (line->EIP > ip)
-			break;
-	}
-}
-
-static void lookup_sym_in_vmlinux(struct sym_entry *sym)
-{
-	struct source_line *line;
-	char pattern[PATH_MAX];
-	sprintf(pattern, "<%s>:", sym->sym);
-
-	for (line = lines; line; line = line->next) {
-		if (strstr(line->line, pattern)) {
-			sym->source = line;
-			break;
-		}
-	}
-}
-
-static void show_lines(struct source_line *line_queue, int line_queue_count)
-{
-	int i;
-	struct source_line *line;
-
-	line = line_queue;
-	for (i = 0; i < line_queue_count; i++) {
-		printf("%8li\t%s\n", line->count, line->line);
-		line = line->next;
-	}
-}
-
-#define TRACE_COUNT     3
-
-static void show_details(struct sym_entry *sym)
-{
-	struct source_line *line;
-	struct source_line *line_queue = NULL;
-	int displayed = 0;
-	int line_queue_count = 0;
-
-	if (!sym->source)
-		lookup_sym_in_vmlinux(sym);
-	if (!sym->source)
-		return;
-
-	printf("Showing details for %s\n", sym->sym);
-
-	line = sym->source;
-	while (line) {
-		if (displayed && strstr(line->line, ">:"))
-			break;
-
-		if (!line_queue_count)
-			line_queue = line;
-		line_queue_count ++;
-
-		if (line->count >= count_filter) {
-			show_lines(line_queue, line_queue_count);
-			line_queue_count = 0;
-			line_queue = NULL;
-		} else if (line_queue_count > TRACE_COUNT) {
-			line_queue = line_queue->next;
-			line_queue_count --;
-		}
-
-		line->count = 0;
-		displayed++;
-		if (displayed > 300)
-			break;
-		line = line->next;
-	}
-}
-
-/*
- * Binary search in the histogram table and record the hit:
- */
-static void record_ip(uint64_t ip, int counter)
-{
-	int left_idx, middle_idx, right_idx, idx;
-	unsigned long left, middle, right;
-
-	record_precise_ip(ip);
-
-	left_idx = 0;
-	right_idx = sym_table_count-1;
-	assert(ip <= max_ip && ip >= min_ip);
-
-	while (left_idx + 1 < right_idx) {
-		middle_idx = (left_idx + right_idx) / 2;
-
-		left   = sym_table[  left_idx].addr;
-		middle = sym_table[middle_idx].addr;
-		right  = sym_table[ right_idx].addr;
-
-		if (!(left <= middle && middle <= right)) {
-			printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
-			printf("%d %d %d\n", left_idx, middle_idx, right_idx);
-		}
-		assert(left <= middle && middle <= right);
-		if (!(left <= ip && ip <= right)) {
-			printf(" left: %016lx\n", left);
-			printf("   ip: %016lx\n", (unsigned long)ip);
-			printf("right: %016lx\n", right);
-		}
-		assert(left <= ip && ip <= right);
-		/*
-		 * [ left .... target .... middle .... right ]
-		 *   => right := middle
-		 */
-		if (ip < middle) {
-			right_idx = middle_idx;
-			continue;
-		}
-		/*
-		 * [ left .... middle ... target ... right ]
-		 *   => left := middle
-		 */
-		left_idx = middle_idx;
-	}
-
-	idx = left_idx;
-
-	if (!sym_table[idx].skip)
-		sym_table[idx].count[counter]++;
-	else events--;
-}
-
-static void process_event(uint64_t ip, int counter)
-{
-	events++;
-
-	if (ip < min_ip || ip > max_ip) {
-		userspace_events++;
-		return;
-	}
-
-	record_ip(ip, counter);
-}
-
-static void process_options(int argc, char *argv[])
-{
-	int error = 0, counter;
-
-	if (strstr(argv[0], "perfstat"))
-		run_perfstat = 1;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"count",	required_argument,	NULL, 'c'},
-			{"cpu",		required_argument,	NULL, 'C'},
-			{"delay",	required_argument,	NULL, 'd'},
-			{"dump_symtab",	no_argument,		NULL, 'D'},
-			{"event",	required_argument,	NULL, 'e'},
-			{"filter",	required_argument,	NULL, 'f'},
-			{"group",	required_argument,	NULL, 'g'},
-			{"help",	no_argument,		NULL, 'h'},
-			{"nmi",		required_argument,	NULL, 'n'},
-			{"mmap_info",	no_argument,		NULL, 'M'},
-			{"mmap_pages",	required_argument,	NULL, 'm'},
-			{"munmap_info",	no_argument,		NULL, 'U'},
-			{"pid",		required_argument,	NULL, 'p'},
-			{"realtime",	required_argument,	NULL, 'r'},
-			{"scale",	no_argument,		NULL, 'l'},
-			{"symbol",	required_argument,	NULL, 's'},
-			{"stat",	no_argument,		NULL, 'S'},
-			{"vmlinux",	required_argument,	NULL, 'x'},
-			{"zero",	no_argument,		NULL, 'z'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'a': system_wide			=	       1; break;
-		case 'c': default_interval		=   atoi(optarg); break;
-		case 'C':
-			/* CPU and PID are mutually exclusive */
-			if (tid != -1) {
-				printf("WARNING: CPU switch overriding PID\n");
-				sleep(1);
-				tid = -1;
-			}
-			profile_cpu			=   atoi(optarg); break;
-		case 'd': delay_secs			=   atoi(optarg); break;
-		case 'D': dump_symtab			=              1; break;
-
-		case 'e': error				= parse_events(optarg); break;
-
-		case 'f': count_filter			=   atoi(optarg); break;
-		case 'g': group				=   atoi(optarg); break;
-		case 'h':      				  display_help(); break;
-		case 'l': scale				=	       1; break;
-		case 'n': nmi				=   atoi(optarg); break;
-		case 'p':
-			/* CPU and PID are mutually exclusive */
-			if (profile_cpu != -1) {
-				printf("WARNING: PID switch overriding CPU\n");
-				sleep(1);
-				profile_cpu = -1;
-			}
-			tid				=   atoi(optarg); break;
-		case 'r': realtime_prio			=   atoi(optarg); break;
-		case 's': sym_filter			= strdup(optarg); break;
-		case 'S': run_perfstat			=	       1; break;
-		case 'x': vmlinux			= strdup(optarg); break;
-		case 'z': zero				=              1; break;
-		case 'm': mmap_pages			=   atoi(optarg); break;
-		case 'M': use_mmap			=              1; break;
-		case 'U': use_munmap			=              1; break;
-		default: error = 1; break;
-		}
-	}
-	if (error)
-		display_help();
-
-	if (!nr_counters) {
-		if (run_perfstat)
-			nr_counters = 8;
-		else {
-			nr_counters = 1;
-			event_id[0] = 0;
-		}
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
-
-		event_count[counter] = default_interval;
-	}
-}
-
-struct mmap_data {
-	int counter;
-	void *base;
-	unsigned int mask;
-	unsigned int prev;
-};
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
-	struct perf_counter_mmap_page *pc = md->base;
-	int head;
-
-	head = pc->data_head;
-	rmb();
-
-	return head;
-}
-
-struct timeval last_read, this_read;
-
-static void mmap_read(struct mmap_data *md)
-{
-	unsigned int head = mmap_read_head(md);
-	unsigned int old = md->prev;
-	unsigned char *data = md->base + page_size;
-	int diff;
-
-	gettimeofday(&this_read, NULL);
-
-	/*
-	 * If we're further behind than half the buffer, there's a chance
-	 * the writer will bite our tail and screw up the events under us.
-	 *
-	 * If we somehow ended up ahead of the head, we got messed up.
-	 *
-	 * In either case, truncate and restart at head.
-	 */
-	diff = head - old;
-	if (diff > md->mask / 2 || diff < 0) {
-		struct timeval iv;
-		unsigned long msecs;
-
-		timersub(&this_read, &last_read, &iv);
-		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
-
-		fprintf(stderr, "WARNING: failed to keep up with mmap data."
-				"  Last read %lu msecs ago.\n", msecs);
-
-		/*
-		 * head points to a known good entry, start there.
-		 */
-		old = head;
-	}
-
-	last_read = this_read;
-
-	for (; old != head;) {
-		struct ip_event {
-			struct perf_event_header header;
-			__u64 ip;
-			__u32 pid, tid;
-		};
-		struct mmap_event {
-			struct perf_event_header header;
-			__u32 pid, tid;
-			__u64 start;
-			__u64 len;
-			__u64 pgoff;
-			char filename[PATH_MAX];
-		};
-
-		typedef union event_union {
-			struct perf_event_header header;
-			struct ip_event ip;
-			struct mmap_event mmap;
-		} event_t;
-
-		event_t *event = (event_t *)&data[old & md->mask];
-
-		event_t event_copy;
-
-		unsigned int size = event->header.size;
-
-		/*
-		 * Event straddles the mmap boundary -- header should always
-		 * be inside due to u64 alignment of output.
-		 */
-		if ((old & md->mask) + size != ((old + size) & md->mask)) {
-			unsigned int offset = old;
-			unsigned int len = min(sizeof(*event), size), cpy;
-			void *dst = &event_copy;
-
-			do {
-				cpy = min(md->mask + 1 - (offset & md->mask), len);
-				memcpy(dst, &data[offset & md->mask], cpy);
-				offset += cpy;
-				dst += cpy;
-				len -= cpy;
-			} while (len);
-
-			event = &event_copy;
-		}
-
-		old += size;
-
-		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-			if (event->header.type & PERF_RECORD_IP)
-				process_event(event->ip.ip, md->counter);
-		} else {
-			switch (event->header.type) {
-				case PERF_EVENT_MMAP:
-				case PERF_EVENT_MUNMAP:
-					printf("%s: %Lu %Lu %Lu %s\n",
-							event->header.type == PERF_EVENT_MMAP
-							? "mmap" : "munmap",
-							event->mmap.start,
-							event->mmap.len,
-							event->mmap.pgoff,
-							event->mmap.filename);
-					break;
-			}
-		}
-	}
-
-	md->prev = old;
-}
-
-int main(int argc, char *argv[])
-{
-	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-	struct perf_counter_hw_event hw_event;
-	pthread_t thread;
-	int i, counter, group_fd, nr_poll = 0;
-	unsigned int cpu;
-	int ret;
-
-	page_size = sysconf(_SC_PAGE_SIZE);
-
-	process_options(argc, argv);
-
-	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	assert(nr_cpus <= MAX_NR_CPUS);
-	assert(nr_cpus >= 0);
-
-	if (run_perfstat)
-		return do_perfstat(argc, argv);
-
-	if (tid != -1 || profile_cpu != -1)
-		nr_cpus = 1;
-
-	parse_symbols();
-	if (vmlinux && sym_filter_entry)
-		parse_vmlinux(vmlinux);
-
-	for (i = 0; i < nr_cpus; i++) {
-		group_fd = -1;
-		for (counter = 0; counter < nr_counters; counter++) {
-
-			cpu	= profile_cpu;
-			if (tid == -1 && profile_cpu == -1)
-				cpu = i;
-
-			memset(&hw_event, 0, sizeof(hw_event));
-			hw_event.config		= event_id[counter];
-			hw_event.irq_period	= event_count[counter];
-			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-			hw_event.nmi		= nmi;
-			hw_event.mmap		= use_mmap;
-			hw_event.munmap		= use_munmap;
-
-			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
-			if (fd[i][counter] < 0) {
-				int err = errno;
-				printf("kerneltop error: syscall returned with %d (%s)\n",
-					fd[i][counter], strerror(err));
-				if (err == EPERM)
-					printf("Are you root?\n");
-				exit(-1);
-			}
-			assert(fd[i][counter] >= 0);
-			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
-
-			/*
-			 * First counter acts as the group leader:
-			 */
-			if (group && group_fd == -1)
-				group_fd = fd[i][counter];
-
-			event_array[nr_poll].fd = fd[i][counter];
-			event_array[nr_poll].events = POLLIN;
-			nr_poll++;
-
-			mmap_array[i][counter].counter = counter;
-			mmap_array[i][counter].prev = 0;
-			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
-			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-					PROT_READ, MAP_SHARED, fd[i][counter], 0);
-			if (mmap_array[i][counter].base == MAP_FAILED) {
-				printf("kerneltop error: failed to mmap with %d (%s)\n",
-						errno, strerror(errno));
-				exit(-1);
-			}
-		}
-	}
-
-	if (pthread_create(&thread, NULL, display_thread, NULL)) {
-		printf("Could not create display thread.\n");
-		exit(-1);
-	}
-
-	if (realtime_prio) {
-		struct sched_param param;
-
-		param.sched_priority = realtime_prio;
-		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-			printf("Could not set realtime priority.\n");
-			exit(-1);
-		}
-	}
-
-	while (1) {
-		int hits = events;
-
-		for (i = 0; i < nr_cpus; i++) {
-			for (counter = 0; counter < nr_counters; counter++)
-				mmap_read(&mmap_array[i][counter]);
-		}
-
-		if (hits == events)
-			ret = poll(event_array, nr_poll, 100);
-	}
-
-	return 0;
-}
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 63f8a89..ff8658f 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -249,6 +249,7 @@ static void handle_internal_command(int argc, const char **argv)
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
 		{ "top", cmd_top, 0 },
+		{ "stat", cmd_stat, 0 },
 	};
 	int i;
 	static const char ext[] = STRIP_EXTENSION;
-- 
cgit v0.10.2


From 1d8c8b209e9351a7de1307d7b9b6df4222b8d742 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 15:52:29 +0200
Subject: perf_counter tools: add help texts

Add Documentation/perf-stat.txt and Documentation/perf-top.txt.

The template that was used for it: Documentation/git-add.txt from Git.

Fix up small bugs to make these help texts show up both in the 'perf'
common-command summary output screen, and on the individual help screens.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
new file mode 100644
index 0000000..7fcab27
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-stat.txt
@@ -0,0 +1,76 @@
+perf-stat(1)
+==========
+
+NAME
+----
+perf-stat - Run a command and gather performance counter statistics
+
+SYNOPSIS
+--------
+[verse]
+'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+
+DESCRIPTION
+-----------
+This command runs a command and gathers performance counter statistics
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+                             0:0: cpu-cycles          
+                             0:0: cycles              
+                             0:1: instructions        
+                             0:2: cache-references    
+                             0:3: cache-misses        
+                             0:4: branch-instructions 
+                             0:4: branches            
+                             0:5: branch-misses       
+                             0:6: bus-cycles          
+                             1:0: cpu-clock           
+                             1:1: task-clock          
+                             1:2: page-faults         
+                             1:2: faults              
+                             1:5: minor-faults        
+                             1:6: major-faults        
+                             1:3: context-switches    
+                             1:3: cs                  
+                             1:4: cpu-migrations      
+                             1:4: migrations          
+                           rNNN: raw PMU events (eventsel+umask)
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+Configuration
+-------------
+
+EXAMPLES
+--------
+
+$ perf stat sleep 1
+
+ Performance counter stats for 'sleep':
+
+       0.678356  task clock ticks     (msecs)
+              7  context switches     (events)
+              4  CPU migrations       (events)
+            232  pagefaults           (events)
+        1810403  CPU cycles           (events)
+         946759  instructions         (events)
+          18952  cache references     (events)
+           4885  cache misses         (events)
+
+ Wall-clock time elapsed:  1001.252894 msecs
+
+SEE ALSO
+--------
+linkperf:git-tops[1]
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt
new file mode 100644
index 0000000..057333b
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-top.txt
@@ -0,0 +1,61 @@
+perf-top(1)
+==========
+
+NAME
+----
+perf-top - Run a command and profile it
+
+SYNOPSIS
+--------
+[verse]
+'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+                             0:0: cpu-cycles          
+                             0:0: cycles              
+                             0:1: instructions        
+                             0:2: cache-references    
+                             0:3: cache-misses        
+                             0:4: branch-instructions 
+                             0:4: branches            
+                             0:5: branch-misses       
+                             0:6: bus-cycles          
+                             1:0: cpu-clock           
+                             1:1: task-clock          
+                             1:2: page-faults         
+                             1:2: faults              
+                             1:5: minor-faults        
+                             1:6: major-faults        
+                             1:3: context-switches    
+                             1:3: cs                  
+                             1:4: cpu-migrations      
+                             1:4: migrations          
+                           rNNN: raw PMU events (eventsel+umask)
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+Configuration
+-------------
+
+EXAMPLES
+--------
+
+SEE ALSO
+--------
+linkperf:git-stat[1]
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index a3bb6cd..605323c 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -14,6 +14,6 @@ extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int check_pager_config(const char *cmd);
 
-extern int cmd_top(int argc, char **argv, const char *prefix);
-extern int cmd_stat(int argc, char **argv, const char *prefix);
+extern int cmd_top(int argc, const char **argv, const char *prefix);
+extern int cmd_stat(int argc, const char **argv, const char *prefix);
 #endif
diff --git a/Documentation/perf_counter/generate-cmdlist.sh b/Documentation/perf_counter/generate-cmdlist.sh
index 75c68d9..f06f6fd 100755
--- a/Documentation/perf_counter/generate-cmdlist.sh
+++ b/Documentation/perf_counter/generate-cmdlist.sh
@@ -9,16 +9,16 @@ struct cmdname_help
 
 static struct cmdname_help common_cmds[] = {"
 
-sed -n -e 's/^git-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
 sort |
 while read cmd
 do
      sed -n '
-     /^NAME/,/git-'"$cmd"'/H
+     /^NAME/,/perf-'"$cmd"'/H
      ${
             x
-            s/.*git-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
 	    p
-     }' "Documentation/git-$cmd.txt"
+     }' "Documentation/perf-$cmd.txt"
 done
 echo "};"
-- 
cgit v0.10.2


From e33e0a43736307512422e41aee6e24d5a8c39181 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 15:58:01 +0200
Subject: perf_counter tools: add 'perf record' command

Move perf-record.c into the perf suite of commands.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt
new file mode 100644
index 0000000..d07700e
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-record.txt
@@ -0,0 +1,63 @@
+perf-record(1)
+==========
+
+NAME
+----
+perf-record - Run a command and record its profile into output.perf
+
+SYNOPSIS
+--------
+[verse]
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it, into output.perf - without displaying anything.
+
+This file can then be inspected later on, using 'perf report'.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+                             0:0: cpu-cycles          
+                             0:0: cycles              
+                             0:1: instructions        
+                             0:2: cache-references    
+                             0:3: cache-misses        
+                             0:4: branch-instructions 
+                             0:4: branches            
+                             0:5: branch-misses       
+                             0:6: bus-cycles          
+                             1:0: cpu-clock           
+                             1:1: task-clock          
+                             1:2: page-faults         
+                             1:2: faults              
+                             1:5: minor-faults        
+                             1:6: major-faults        
+                             1:3: context-switches    
+                             1:3: cs                  
+                             1:4: cpu-migrations      
+                             1:4: migrations          
+                           rNNN: raw PMU events (eventsel+umask)
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+Configuration
+-------------
+
+EXAMPLES
+--------
+
+SEE ALSO
+--------
+linkperf:git-stat[1]
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index fb8b717..b6c665e 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -309,6 +309,7 @@ LIB_OBJS += usage.o
 LIB_OBJS += wrapper.o
 
 BUILTIN_OBJS += builtin-help.o
+BUILTIN_OBJS += builtin-record.o
 BUILTIN_OBJS += builtin-stat.o
 BUILTIN_OBJS += builtin-top.o
 
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
new file mode 100644
index 0000000..4a50abf
--- /dev/null
+++ b/Documentation/perf_counter/builtin-record.c
@@ -0,0 +1,506 @@
+
+
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#include "../../include/linux/perf_counter.h"
+
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define rdclock()                                       \
+({                                                      \
+        struct timespec ts;                             \
+                                                        \
+        clock_gettime(CLOCK_MONOTONIC, &ts);            \
+        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
+})
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#ifdef __x86_64__
+#define __NR_perf_counter_open 295
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __i386__
+#define __NR_perf_counter_open 333
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#define rmb() 		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+extern asmlinkage int sys_perf_counter_open(
+        struct perf_counter_hw_event    *hw_event_uptr          __user,
+        pid_t                           pid,
+        int                             cpu,
+        int                             group_fd,
+        unsigned long                   flags);
+
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+
+static int			nr_counters			=  0;
+static __u64			event_id[MAX_COUNTERS]		= { };
+static int			default_interval = 100000;
+static int			event_count[MAX_COUNTERS];
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+static int			nr_cpus				=  0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			= 16;
+static int			output;
+static char 			*output_name			= "output.perf";
+static int			group				= 0;
+static unsigned int		realtime_prio			=  0;
+
+const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+struct event_symbol {
+	__u64 event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+};
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static __u64 match_event_symbols(char *str)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return ~0ULL;
+}
+
+static int parse_events(char *str)
+{
+	__u64 config;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
+
+	event_id[nr_counters] = config;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static void display_events_help(void)
+{
+	unsigned int i;
+	__u64 e;
+
+	printf(
+	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		printf("\n                             %d:%d: %-20s",
+				type, id, event_symbols[i].symbol);
+	}
+
+	printf("\n"
+	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
+}
+
+static void display_help(void)
+{
+	printf(
+	"Usage: perf-record [<options>]\n"
+	"perf-record Options (up to %d event types can be specified at once):\n\n",
+		 MAX_COUNTERS);
+
+	display_events_help();
+
+	printf(
+	" -c CNT    --count=CNT          # event period to sample\n"
+	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
+	" -o file   --output=<file>      # output file\n"
+	" -r prio   --realtime=<prio>    # use RT prio\n"
+	);
+
+	exit(0);
+}
+
+static void process_options(int argc, char *argv[])
+{
+	int error = 0, counter;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"count",	required_argument,	NULL, 'c'},
+			{"event",	required_argument,	NULL, 'e'},
+			{"mmap_pages",	required_argument,	NULL, 'm'},
+			{"output",	required_argument,	NULL, 'o'},
+			{"realtime",	required_argument,	NULL, 'r'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:c:e:m:o:r:",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'c': default_interval		=   atoi(optarg); break;
+		case 'e': error				= parse_events(optarg); break;
+		case 'm': mmap_pages			=   atoi(optarg); break;
+		case 'o': output_name			= strdup(optarg); break;
+		case 'r': realtime_prio			=   atoi(optarg); break;
+		default: error = 1; break;
+		}
+	}
+	if (error)
+		display_help();
+
+	if (!nr_counters) {
+		nr_counters = 1;
+		event_id[0] = 0;
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		event_count[counter] = default_interval;
+	}
+}
+
+struct mmap_data {
+	int counter;
+	void *base;
+	unsigned int mask;
+	unsigned int prev;
+};
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+static long events;
+static struct timeval last_read, this_read;
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and screw up the events under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	if (old != head)
+		events++;
+
+	size = head - old;
+
+	if ((old & md->mask) + size != (head & md->mask)) {
+		buf = &data[old & md->mask];
+		size = md->mask + 1 - (old & md->mask);
+		old += size;
+		while (size) {
+			int ret = write(output, buf, size);
+			if (ret < 0) {
+				perror("failed to write");
+				exit(-1);
+			}
+			size -= ret;
+			buf += ret;
+		}
+	}
+
+	buf = &data[old & md->mask];
+	size = head - old;
+	old += size;
+	while (size) {
+		int ret = write(output, buf, size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+		size -= ret;
+		buf += ret;
+	}
+
+	md->prev = old;
+}
+
+static volatile int done = 0;
+
+static void sigchld_handler(int sig)
+{
+	if (sig == SIGCHLD)
+		done = 1;
+}
+
+int cmd_record(int argc, const char **argv)
+{
+	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+	struct perf_counter_hw_event hw_event;
+	int i, counter, group_fd, nr_poll = 0;
+	pid_t pid;
+	int ret;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	process_options(argc, argv);
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
+	if (output < 0) {
+		perror("failed to create output file");
+		exit(-1);
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	for (i = 0; i < nr_cpus; i++) {
+		group_fd = -1;
+		for (counter = 0; counter < nr_counters; counter++) {
+
+			memset(&hw_event, 0, sizeof(hw_event));
+			hw_event.config		= event_id[counter];
+			hw_event.irq_period	= event_count[counter];
+			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
+			hw_event.nmi		= 1;
+			hw_event.mmap		= 1;
+			hw_event.comm		= 1;
+
+			fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0);
+			if (fd[i][counter] < 0) {
+				int err = errno;
+				printf("kerneltop error: syscall returned with %d (%s)\n",
+					fd[i][counter], strerror(err));
+				if (err == EPERM)
+					printf("Are you root?\n");
+				exit(-1);
+			}
+			assert(fd[i][counter] >= 0);
+			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+
+			/*
+			 * First counter acts as the group leader:
+			 */
+			if (group && group_fd == -1)
+				group_fd = fd[i][counter];
+
+			event_array[nr_poll].fd = fd[i][counter];
+			event_array[nr_poll].events = POLLIN;
+			nr_poll++;
+
+			mmap_array[i][counter].counter = counter;
+			mmap_array[i][counter].prev = 0;
+			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+					PROT_READ, MAP_SHARED, fd[i][counter], 0);
+			if (mmap_array[i][counter].base == MAP_FAILED) {
+				printf("kerneltop error: failed to mmap with %d (%s)\n",
+						errno, strerror(errno));
+				exit(-1);
+			}
+		}
+	}
+
+	signal(SIGCHLD, sigchld_handler);
+
+	pid = fork();
+	if (pid < 0)
+		perror("failed to fork");
+
+	if (!pid) {
+		if (execvp(argv[0], argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	/*
+	 * TODO: store the current /proc/$/maps information somewhere
+	 */
+
+	while (!done) {
+		int hits = events;
+
+		for (i = 0; i < nr_cpus; i++) {
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
+		}
+
+		if (hits == events)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	return 0;
+}
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index 605323c..5854b17 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -14,6 +14,7 @@ extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int check_pager_config(const char *cmd);
 
-extern int cmd_top(int argc, const char **argv, const char *prefix);
+extern int cmd_record(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
+extern int cmd_top(int argc, const char **argv, const char *prefix);
 #endif
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
index 52455d4..d15210a 100644
--- a/Documentation/perf_counter/command-list.txt
+++ b/Documentation/perf_counter/command-list.txt
@@ -1,5 +1,6 @@
 # List of known perf commands.
 # command name				category [deprecated] [common]
-perf-top                                mainporcelain common
+perf-record                             mainporcelain common
 perf-stat                               mainporcelain common
+perf-top                                mainporcelain common
 
diff --git a/Documentation/perf_counter/perf-record.c b/Documentation/perf_counter/perf-record.c
deleted file mode 100644
index 614de7c..0000000
--- a/Documentation/perf_counter/perf-record.c
+++ /dev/null
@@ -1,530 +0,0 @@
-
-
-#define _GNU_SOURCE
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <getopt.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <ctype.h>
-#include <time.h>
-#include <sched.h>
-#include <pthread.h>
-
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <sys/mman.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-#include "../../include/linux/perf_counter.h"
-
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define rdclock()                                       \
-({                                                      \
-        struct timespec ts;                             \
-                                                        \
-        clock_gettime(CLOCK_MONOTONIC, &ts);            \
-        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
-})
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#ifdef __x86_64__
-#define __NR_perf_counter_open 295
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __i386__
-#define __NR_perf_counter_open 333
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#define rmb() 		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-asmlinkage int sys_perf_counter_open(
-        struct perf_counter_hw_event    *hw_event_uptr          __user,
-        pid_t                           pid,
-        int                             cpu,
-        int                             group_fd,
-        unsigned long                   flags)
-{
-        return syscall(
-                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-}
-
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
-
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
-
-static int			nr_counters			=  0;
-static __u64			event_id[MAX_COUNTERS]		= { };
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-static int			nr_cpus				=  0;
-static unsigned int		page_size;
-static unsigned int		mmap_pages			= 16;
-static int			output;
-static char 			*output_name			= "output.perf";
-static int			group				= 0;
-static unsigned int		realtime_prio			=  0;
-
-const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
-
-static char *hw_event_names[] = {
-	"CPU cycles",
-	"instructions",
-	"cache references",
-	"cache misses",
-	"branches",
-	"branch misses",
-	"bus cycles",
-};
-
-static char *sw_event_names[] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-	"minor faults",
-	"major faults",
-};
-
-struct event_symbol {
-	__u64 event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
-};
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static __u64 match_event_symbols(char *str)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-
-	if (sscanf(str, "r%llx", &config) == 1)
-		return config | PERF_COUNTER_RAW_MASK;
-
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return ~0ULL;
-}
-
-static int parse_events(char *str)
-{
-	__u64 config;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
-
-	event_id[nr_counters] = config;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-static void display_events_help(void)
-{
-	unsigned int i;
-	__u64 e;
-
-	printf(
-	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
-
-		printf("\n                             %d:%d: %-20s",
-				type, id, event_symbols[i].symbol);
-	}
-
-	printf("\n"
-	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
-}
-
-static void display_help(void)
-{
-	printf(
-	"Usage: perf-record [<options>]\n"
-	"perf-record Options (up to %d event types can be specified at once):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -c CNT    --count=CNT          # event period to sample\n"
-	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
-	" -o file   --output=<file>      # output file\n"
-	" -r prio   --realtime=<prio>    # use RT prio\n"
-	);
-
-	exit(0);
-}
-
-static void process_options(int argc, char *argv[])
-{
-	int error = 0, counter;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"count",	required_argument,	NULL, 'c'},
-			{"event",	required_argument,	NULL, 'e'},
-			{"mmap_pages",	required_argument,	NULL, 'm'},
-			{"output",	required_argument,	NULL, 'o'},
-			{"realtime",	required_argument,	NULL, 'r'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:c:e:m:o:r:",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'c': default_interval		=   atoi(optarg); break;
-		case 'e': error				= parse_events(optarg); break;
-		case 'm': mmap_pages			=   atoi(optarg); break;
-		case 'o': output_name			= strdup(optarg); break;
-		case 'r': realtime_prio			=   atoi(optarg); break;
-		default: error = 1; break;
-		}
-	}
-	if (error)
-		display_help();
-
-	if (!nr_counters) {
-		nr_counters = 1;
-		event_id[0] = 0;
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
-
-		event_count[counter] = default_interval;
-	}
-}
-
-struct mmap_data {
-	int counter;
-	void *base;
-	unsigned int mask;
-	unsigned int prev;
-};
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
-	struct perf_counter_mmap_page *pc = md->base;
-	int head;
-
-	head = pc->data_head;
-	rmb();
-
-	return head;
-}
-
-static long events;
-static struct timeval last_read, this_read;
-
-static void mmap_read(struct mmap_data *md)
-{
-	unsigned int head = mmap_read_head(md);
-	unsigned int old = md->prev;
-	unsigned char *data = md->base + page_size;
-	unsigned long size;
-	void *buf;
-	int diff;
-
-	gettimeofday(&this_read, NULL);
-
-	/*
-	 * If we're further behind than half the buffer, there's a chance
-	 * the writer will bite our tail and screw up the events under us.
-	 *
-	 * If we somehow ended up ahead of the head, we got messed up.
-	 *
-	 * In either case, truncate and restart at head.
-	 */
-	diff = head - old;
-	if (diff > md->mask / 2 || diff < 0) {
-		struct timeval iv;
-		unsigned long msecs;
-
-		timersub(&this_read, &last_read, &iv);
-		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
-
-		fprintf(stderr, "WARNING: failed to keep up with mmap data."
-				"  Last read %lu msecs ago.\n", msecs);
-
-		/*
-		 * head points to a known good entry, start there.
-		 */
-		old = head;
-	}
-
-	last_read = this_read;
-
-	if (old != head)
-		events++;
-
-	size = head - old;
-
-	if ((old & md->mask) + size != (head & md->mask)) {
-		buf = &data[old & md->mask];
-		size = md->mask + 1 - (old & md->mask);
-		old += size;
-		while (size) {
-			int ret = write(output, buf, size);
-			if (ret < 0) {
-				perror("failed to write");
-				exit(-1);
-			}
-			size -= ret;
-			buf += ret;
-		}
-	}
-
-	buf = &data[old & md->mask];
-	size = head - old;
-	old += size;
-	while (size) {
-		int ret = write(output, buf, size);
-		if (ret < 0) {
-			perror("failed to write");
-			exit(-1);
-		}
-		size -= ret;
-		buf += ret;
-	}
-
-	md->prev = old;
-}
-
-static volatile int done = 0;
-
-static void sigchld_handler(int sig)
-{
-	if (sig == SIGCHLD)
-		done = 1;
-}
-
-int main(int argc, char *argv[])
-{
-	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-	struct perf_counter_hw_event hw_event;
-	int i, counter, group_fd, nr_poll = 0;
-	pid_t pid;
-	int ret;
-
-	page_size = sysconf(_SC_PAGE_SIZE);
-
-	process_options(argc, argv);
-
-	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	assert(nr_cpus <= MAX_NR_CPUS);
-	assert(nr_cpus >= 0);
-
-	output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
-	if (output < 0) {
-		perror("failed to create output file");
-		exit(-1);
-	}
-
-	argc -= optind;
-	argv += optind;
-
-	for (i = 0; i < nr_cpus; i++) {
-		group_fd = -1;
-		for (counter = 0; counter < nr_counters; counter++) {
-
-			memset(&hw_event, 0, sizeof(hw_event));
-			hw_event.config		= event_id[counter];
-			hw_event.irq_period	= event_count[counter];
-			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-			hw_event.nmi		= 1;
-			hw_event.mmap		= 1;
-			hw_event.comm		= 1;
-
-			fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0);
-			if (fd[i][counter] < 0) {
-				int err = errno;
-				printf("kerneltop error: syscall returned with %d (%s)\n",
-					fd[i][counter], strerror(err));
-				if (err == EPERM)
-					printf("Are you root?\n");
-				exit(-1);
-			}
-			assert(fd[i][counter] >= 0);
-			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
-
-			/*
-			 * First counter acts as the group leader:
-			 */
-			if (group && group_fd == -1)
-				group_fd = fd[i][counter];
-
-			event_array[nr_poll].fd = fd[i][counter];
-			event_array[nr_poll].events = POLLIN;
-			nr_poll++;
-
-			mmap_array[i][counter].counter = counter;
-			mmap_array[i][counter].prev = 0;
-			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
-			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-					PROT_READ, MAP_SHARED, fd[i][counter], 0);
-			if (mmap_array[i][counter].base == MAP_FAILED) {
-				printf("kerneltop error: failed to mmap with %d (%s)\n",
-						errno, strerror(errno));
-				exit(-1);
-			}
-		}
-	}
-
-	signal(SIGCHLD, sigchld_handler);
-
-	pid = fork();
-	if (pid < 0)
-		perror("failed to fork");
-
-	if (!pid) {
-		if (execvp(argv[0], argv)) {
-			perror(argv[0]);
-			exit(-1);
-		}
-	}
-
-	if (realtime_prio) {
-		struct sched_param param;
-
-		param.sched_priority = realtime_prio;
-		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-			printf("Could not set realtime priority.\n");
-			exit(-1);
-		}
-	}
-
-	/*
-	 * TODO: store the current /proc/$/maps information somewhere
-	 */
-
-	while (!done) {
-		int hits = events;
-
-		for (i = 0; i < nr_cpus; i++) {
-			for (counter = 0; counter < nr_counters; counter++)
-				mmap_read(&mmap_array[i][counter]);
-		}
-
-		if (hits == events)
-			ret = poll(event_array, nr_poll, 100);
-	}
-
-	return 0;
-}
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index ff8658f..e849dd6 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -248,8 +248,9 @@ static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
-		{ "top", cmd_top, 0 },
+		{ "record", cmd_record, 0 },
 		{ "stat", cmd_stat, 0 },
+		{ "top", cmd_top, 0 },
 	};
 	int i;
 	static const char ext[] = STRIP_EXTENSION;
-- 
cgit v0.10.2


From cc13a5913797365b683212f5fca5fb04bb3582eb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 16:01:30 +0200
Subject: perf_counter tools: fix --version

Hook up the 'perf version' built-in command.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index 5854b17..aec5ae3 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -17,4 +17,5 @@ extern int check_pager_config(const char *cmd);
 extern int cmd_record(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
+extern int cmd_version(int argc, const char **argv, const char *prefix);
 #endif
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index e849dd6..20d508c 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -5,7 +5,7 @@
 #include "run-command.h"
 
 const char perf_usage_string[] =
-	"perf [--version] [--exec-path[=PERF_EXEC_PATH]] [--html-path] [-p|--paginate|--no-pager] [--bare] [--perf-dir=PERF_DIR] [--work-tree=PERF_WORK_TREE] [--help] COMMAND [ARGS]";
+	"perf [--version] [--help] COMMAND [ARGS]";
 
 const char perf_more_info_string[] =
 	"See 'perf help COMMAND' for more information on a specific command.";
@@ -251,6 +251,7 @@ static void handle_internal_command(int argc, const char **argv)
 		{ "record", cmd_record, 0 },
 		{ "stat", cmd_stat, 0 },
 		{ "top", cmd_top, 0 },
+		{ "version", cmd_version, 0 },
 	};
 	int i;
 	static const char ext[] = STRIP_EXTENSION;
-- 
cgit v0.10.2


From 6142fdd968c76a0f2ee753c39bd5be8d1bb4ef04 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 16:05:55 +0200
Subject: perf_counter tools: add 'perf help'

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-help.txt b/Documentation/perf_counter/Documentation/perf-help.txt
new file mode 100644
index 0000000..f85fed5
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-help.txt
@@ -0,0 +1,38 @@
+perf-help(1)
+===========
+
+NAME
+----
+perf-help - display help information about perf
+
+SYNOPSIS
+--------
+'perf help' [-a|--all] [COMMAND]
+
+DESCRIPTION
+-----------
+
+With no options and no COMMAND given, the synopsis of the 'perf'
+command and a list of the most commonly used perf commands are printed
+on the standard output.
+
+If the option '--all' or '-a' is given, then all available commands are
+printed on the standard output.
+
+If a perf command is named, a manual page for that command is brought
+up. The 'man' program is used by default for this purpose, but this
+can be overridden by other options or configuration variables.
+
+Note that `perf --help ...` is identical to `perf help ...` because the
+former is internally converted into the latter.
+
+OPTIONS
+-------
+-a::
+--all::
+	Prints all the available commands on the standard output. This
+	option supersedes any other option.
+
+PERF
+----
+Part of the linkperf:perf[1] suite
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index aec5ae3..800f86c 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -14,6 +14,7 @@ extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int check_pager_config(const char *cmd);
 
+extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_record(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 20d508c..8d6faec 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -248,6 +248,7 @@ static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
+		{ "help", cmd_help, 0 },
 		{ "record", cmd_record, 0 },
 		{ "stat", cmd_stat, 0 },
 		{ "top", cmd_top, 0 },
-- 
cgit v0.10.2


From 125e702b09a28a502e145fb434678ee27720fc48 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 16:13:46 +0200
Subject: perf_counter tools: fix 'make install'

Remove Git leftovers from this area.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index b6c665e..690045e 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -255,9 +255,9 @@ EXTRA_PROGRAMS =
 PROGRAMS += $(EXTRA_PROGRAMS)
 
 #
-# None right now:
+# Single 'perf' binary right now:
 #
-# PROGRAMS += perf-fast-import$X
+PROGRAMS += perf
 
 # List built-in command $C whose implementation cmd_$C() is not in
 # builtin-$C.o but is linked in as part of some other command.
@@ -762,24 +762,9 @@ install: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-	$(INSTALL) perf$X perf-upload-pack$X perf-receive-pack$X perf-upload-archive$X perf-shell$X perf-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)'
-	$(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install
-	$(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install
 ifneq (,$X)
 	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
 endif
-	bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \
-	execdir=$$(cd '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' && pwd) && \
-	{ $(RM) "$$execdir/perf-add$X" && \
-		ln "$$bindir/perf$X" "$$execdir/perf-add$X" 2>/dev/null || \
-		cp "$$bindir/perf$X" "$$execdir/perf-add$X"; } && \
-	{ for p in $(filter-out perf-add$X,$(BUILT_INS)); do \
-		$(RM) "$$execdir/$$p" && \
-		ln "$$execdir/perf-add$X" "$$execdir/$$p" 2>/dev/null || \
-		ln -s "perf-add$X" "$$execdir/$$p" 2>/dev/null || \
-		cp "$$execdir/perf-add$X" "$$execdir/$$p" || exit; \
-	  done } && \
-	./check_bindir "z$$bindir" "z$$execdir" "$$bindir/perf-add$X"
 
 ### Maintainer's dist rules
 
-- 
cgit v0.10.2


From a7abe97fd8e7a6ccabba5a04a9f17be9211d418c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 10:59:34 -0400
Subject: tracing: rename EVENT_TRACER config to ENABLE_EVENT_TRACING

Currently we have two configs: EVENT_TRACING and EVENT_TRACER.
All tracers enable EVENT_TRACING. The EVENT_TRACER is only a
convenience to enable the EVENT_TRACING when no other tracers
are enabled.

The names EVENT_TRACER and EVENT_TRACING are too similar and confusing.
This patch renames EVENT_TRACER to ENABLE_EVENT_TRACING to be more
appropriate to what it actually does, as well as add a comment in
the help menu to explain the option's purpose.

[ Impact: rename config option to reduce confusion ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 3fa36d2..450d3c2 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -183,7 +183,7 @@ config CONTEXT_SWITCH_TRACER
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
-config EVENT_TRACER
+config ENABLE_EVENT_TRACING
 	bool "Trace various events in the kernel"
 	select TRACING
 	help
@@ -191,6 +191,10 @@ config EVENT_TRACER
 	  allowing the user to pick and choose which trace point they
 	  want to trace.
 
+	  Note, all tracers enable event tracing. This option is
+	  only a convenience to enable event tracing when no other
+	  tracers are selected.
+
 config FTRACE_SYSCALLS
 	bool "Trace syscalls"
 	depends on HAVE_FTRACE_SYSCALLS
-- 
cgit v0.10.2


From 28d20e2d6e94434827e11c310788b87204b84559 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 12:12:44 -0400
Subject: tracing/events: call the correct event trace selftest init function

The late_initcall calls a helper function instead of the proper
init event selftest function.

This update may have been lost due to conflicting merges.

[ Impact: fix compiler warning and call extended event trace self tests ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 64f9d6d..98daf5d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1199,6 +1199,6 @@ static __init int event_trace_self_tests_init(void)
 	return 0;
 }
 
-late_initcall(event_trace_self_tests);
+late_initcall(event_trace_self_tests_init);
 
 #endif
-- 
cgit v0.10.2


From 23de29de2d8b227943be191d59fb6d983996d55e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 12:59:29 -0400
Subject: tracing: remove dangling semicolon

Due to a cut and paste error, the trace_seq_putc had a semicolon
after the prototype but before the stub function when tracing is
disabled.

[Impact: fix compile error ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 37db9bd..ba9627f 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -63,7 +63,7 @@ static inline int trace_seq_puts(struct trace_seq *s, const char *str)
 {
 	return 0;
 }
-static inline int trace_seq_putc(struct trace_seq *s, unsigned char c);
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
 	return 0;
 }
-- 
cgit v0.10.2


From cd0f2d4736ae8efabc60e54ecc8f677d0eddce02 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 20 Apr 2009 16:56:59 +0100
Subject: ASoC: Factor out generic widget power checks

This will form a basis for further power check refactoring: the overall
goal of these changes is to allow us to check power separately to
applying it, allowing improvements in the power sequencing algorithms.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index a6d7337..28e6e32 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -581,6 +581,19 @@ static int dapm_generic_apply_power(struct snd_soc_dapm_widget *w)
 	return 0;
 }
 
+/* Generic check to see if a widget should be powered.
+ */
+static int dapm_generic_check_power(struct snd_soc_dapm_widget *w)
+{
+	int in, out;
+
+	in = is_connected_input_ep(w);
+	dapm_clear_walk(w->codec);
+	out = is_connected_output_ep(w);
+	dapm_clear_walk(w->codec);
+	return out != 0 && in != 0;
+}
+
 /*
  * Scan a single DAPM widget for a complete audio path and update the
  * power status appropriately.
@@ -653,11 +666,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 	}
 
 	/* all other widgets */
-	in = is_connected_input_ep(w);
-	dapm_clear_walk(w->codec);
-	out = is_connected_output_ep(w);
-	dapm_clear_walk(w->codec);
-	power = (out != 0 && in != 0) ? 1 : 0;
+	power = dapm_generic_check_power(w);
 	power_change = (w->power == power) ? 0 : 1;
 	w->power = power;
 
-- 
cgit v0.10.2


From 6ea31b9f0a0307e16656af27fcda3160e2a64a1b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 20 Apr 2009 17:15:41 +0100
Subject: ASoC: Factor out DAPM power checks for DACs and ADCs

This also switches us to using a switch statement for the widget type
in dapm_power_widget().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 28e6e32..22522e2 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -594,6 +594,34 @@ static int dapm_generic_check_power(struct snd_soc_dapm_widget *w)
 	return out != 0 && in != 0;
 }
 
+/* Check to see if an ADC has power */
+static int dapm_adc_check_power(struct snd_soc_dapm_widget *w)
+{
+	int in;
+
+	if (w->active) {
+		in = is_connected_input_ep(w);
+		dapm_clear_walk(w->codec);
+		return in != 0;
+	} else {
+		return dapm_generic_check_power(w);
+	}
+}
+
+/* Check to see if a DAC has power */
+static int dapm_dac_check_power(struct snd_soc_dapm_widget *w)
+{
+	int out;
+
+	if (w->active) {
+		out = is_connected_output_ep(w);
+		dapm_clear_walk(w->codec);
+		return out != 0;
+	} else {
+		return dapm_generic_check_power(w);
+	}
+}
+
 /*
  * Scan a single DAPM widget for a complete audio path and update the
  * power status appropriately.
@@ -601,36 +629,23 @@ static int dapm_generic_check_power(struct snd_soc_dapm_widget *w)
 static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 			     struct snd_soc_dapm_widget *w)
 {
-	int in, out, power_change, power, ret;
+	int power, ret;
 
-	/* vmid - no action */
-	if (w->id == snd_soc_dapm_vmid)
+	/* Work out the new power state */
+	switch (w->id) {
+	case snd_soc_dapm_vmid:
+		/* No action required */
 		return 0;
 
-	/* active ADC */
-	if (w->id == snd_soc_dapm_adc && w->active) {
-		in = is_connected_input_ep(w);
-		dapm_clear_walk(w->codec);
-		power = (in != 0) ? 1 : 0;
-		if (power == w->power)
-			return 0;
-		w->power = power;
-		return dapm_generic_apply_power(w);
-	}
+	case snd_soc_dapm_adc:
+		power = dapm_adc_check_power(w);
+		break;
 
-	/* active DAC */
-	if (w->id == snd_soc_dapm_dac && w->active) {
-		out = is_connected_output_ep(w);
-		dapm_clear_walk(w->codec);
-		power = (out != 0) ? 1 : 0;
-		if (power == w->power)
-			return 0;
-		w->power = power;
-		return dapm_generic_apply_power(w);
-	}
+	case snd_soc_dapm_dac:
+		power = dapm_dac_check_power(w);
+		break;
 
-	/* pre and post event widgets */
-	if (w->id == snd_soc_dapm_pre) {
+	case snd_soc_dapm_pre:
 		if (!w->event)
 			return 0;
 
@@ -646,8 +661,8 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 				return ret;
 		}
 		return 0;
-	}
-	if (w->id == snd_soc_dapm_post) {
+
+	case snd_soc_dapm_post:
 		if (!w->event)
 			return 0;
 
@@ -663,15 +678,15 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 				return ret;
 		}
 		return 0;
-	}
 
-	/* all other widgets */
-	power = dapm_generic_check_power(w);
-	power_change = (w->power == power) ? 0 : 1;
-	w->power = power;
+	default:
+		power = dapm_generic_check_power(w);
+		break;
+	}
 
-	if (!power_change)
+	if (w->power == power)
 		return 0;
+	w->power = power;
 
 	return dapm_generic_apply_power(w);
 }
-- 
cgit v0.10.2


From b75576d76d4be50196773f36709cb7a4f5ac2ab7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 20 Apr 2009 17:56:13 +0100
Subject: ASoC: Make the DAPM power check an operation on the widget

Rather than having switch statements at point of use make the DAPM
power check a member of the widget structure and set it when we
instantiate the widget.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index fcc929d..839a97b 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -367,6 +367,8 @@ struct snd_soc_dapm_widget {
 	unsigned char suspend:1;		/* was active before suspend */
 	unsigned char pmdown:1;			/* waiting for timeout */
 
+	int (*power_check)(struct snd_soc_dapm_widget *w);
+
 	/* external events */
 	unsigned short event_flags;		/* flags to specify event types */
 	int (*event)(struct snd_soc_dapm_widget*, struct snd_kcontrol *, int);
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 22522e2..d3d1735 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -631,20 +631,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 {
 	int power, ret;
 
-	/* Work out the new power state */
 	switch (w->id) {
-	case snd_soc_dapm_vmid:
-		/* No action required */
-		return 0;
-
-	case snd_soc_dapm_adc:
-		power = dapm_adc_check_power(w);
-		break;
-
-	case snd_soc_dapm_dac:
-		power = dapm_dac_check_power(w);
-		break;
-
 	case snd_soc_dapm_pre:
 		if (!w->event)
 			return 0;
@@ -680,10 +667,13 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 		return 0;
 
 	default:
-		power = dapm_generic_check_power(w);
 		break;
 	}
 
+	if (!w->power_check)
+		return 0;
+
+	power = w->power_check(w);
 	if (w->power == power)
 		return 0;
 	w->power = power;
@@ -1147,15 +1137,22 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec)
 		case snd_soc_dapm_switch:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_mixer(codec, w);
 			break;
 		case snd_soc_dapm_mux:
 		case snd_soc_dapm_value_mux:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_mux(codec, w);
 			break;
 		case snd_soc_dapm_adc:
+			w->power_check = dapm_adc_check_power;
+			break;
 		case snd_soc_dapm_dac:
+			w->power_check = dapm_dac_check_power;
+			break;
 		case snd_soc_dapm_pga:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_pga(codec, w);
 			break;
 		case snd_soc_dapm_input:
@@ -1165,6 +1162,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec)
 		case snd_soc_dapm_hp:
 		case snd_soc_dapm_mic:
 		case snd_soc_dapm_line:
+			w->power_check = dapm_generic_check_power;
+			break;
 		case snd_soc_dapm_vmid:
 		case snd_soc_dapm_pre:
 		case snd_soc_dapm_post:
-- 
cgit v0.10.2


From 17487bfeb6cfb05920e6a9d5a54f345f2917b4e7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 13:24:21 -0400
Subject: tracing: fix recursive test level calculation

The recursive tests to detect same level recursion in the ring buffers
did not account for the hard/softirq_counts to be shifted. Thus the
numbers could be larger than then mask to be tested.

This patch includes the shift for the calculation of the irq depth.

[ Impact: stop false positives in trace recursion detection ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e145969..aa40ae9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1483,7 +1483,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 static int trace_irq_level(void)
 {
-	return hardirq_count() + softirq_count() + in_nmi();
+	return (hardirq_count() >> HARDIRQ_SHIFT) +
+		(softirq_count() >> + SOFTIRQ_SHIFT) +
+		!!in_nmi();
 }
 
 static int trace_recursive_lock(void)
-- 
cgit v0.10.2


From e395898e98119085f666febbc7b631dd69bc637f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 13:32:44 -0400
Subject: tracing: remove recursive test from ring_buffer_event_discard

The ring_buffer_event_discard is not tied to ring_buffer_lock_reserve.
It can be called inside or outside the reserve/commit. Even if it
is called inside the reserve/commit the commit part must also be called.

Only ring_buffer_discard_commit can be used as a replacement for
ring_buffer_unlock_commit.

This patch removes the trace_recursive_unlock from ring_buffer_event_discard
since it would be the wrong place to do so.

[Impact: prevent breakage in trace recursive testing ]

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index aa40ae9..a699767 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1667,7 +1667,6 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
 void ring_buffer_event_discard(struct ring_buffer_event *event)
 {
 	rb_event_discard(event);
-	trace_recursive_unlock();
 }
 EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
 
-- 
cgit v0.10.2


From f1f9b3b1795da8625e0e6096813c9d18d4a344ce Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 20:38:21 +0200
Subject: perfcounters, sched: remove __task_delta_exec()

This function was left orphan by the latest round of sw-counter
cleanups.

[ Impact: remove unused kernel function ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 080d1fd..a77c600 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -85,7 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 /*
  * Lock/unlock the current runqueue - to extract task statistics:
  */
-extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
 extern unsigned long long task_delta_exec(struct task_struct *);
 
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
diff --git a/kernel/sched.c b/kernel/sched.c
index b66a08c..a69278e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4550,29 +4550,6 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
  */
-unsigned long long __task_delta_exec(struct task_struct *p, int update)
-{
-	s64 delta_exec;
-	struct rq *rq;
-
-	rq = task_rq(p);
-	WARN_ON_ONCE(!runqueue_is_locked());
-	WARN_ON_ONCE(!task_current(rq, p));
-
-	if (update)
-		update_rq_clock(rq);
-
-	delta_exec = rq->clock - p->se.exec_start;
-
-	WARN_ON_ONCE(delta_exec < 0);
-
-	return delta_exec;
-}
-
-/*
- * Return any ns on the sched_clock that have not yet been banked in
- * @p in case that task is currently running.
- */
 unsigned long long task_delta_exec(struct task_struct *p)
 {
 	unsigned long flags;
-- 
cgit v0.10.2


From ff743345bf7685a207868048a70e23164c4785e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:26 +0100
Subject: sched: remove extra call overhead for schedule()

Lai Jiangshan's patch reminded me that I promised Nick to remove
that extra call overhead in schedule().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090313112300.927414207@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5d79781..e1fb735 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -248,7 +248,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
 		/* didnt get the lock, go to sleep: */
 		spin_unlock_mutex(&lock->wait_lock, flags);
-		__schedule();
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 7601cee..797f6fd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5131,13 +5131,15 @@ pick_next_task(struct rq *rq)
 /*
  * schedule() is the main scheduler function.
  */
-asmlinkage void __sched __schedule(void)
+asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
 	struct rq *rq;
 	int cpu;
 
+need_resched:
+	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
 	rcu_qsctr_inc(cpu);
@@ -5194,15 +5196,9 @@ need_resched_nonpreemptible:
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
-}
 
-asmlinkage void __sched schedule(void)
-{
-need_resched:
-	preempt_disable();
-	__schedule();
 	preempt_enable_no_resched();
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
+	if (need_resched())
 		goto need_resched;
 }
 EXPORT_SYMBOL(schedule);
-- 
cgit v0.10.2


From aa18efb2a2f07e1cf062039848e9d369bb358724 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 16:16:11 -0400
Subject: tracing: use recursive counter over irq level

Althought using the irq level (hardirq_count, softirq_count and in_nmi)
was nice to detect bad recursion right away, but since the counters are
not atomically updated with respect to the interrupts, the function tracer
might trigger the test from an interrupt handler before the hardirq_count
is updated. This will trigger a false warning.

This patch converts the recursive detection to a simple counter.
If the depth is greater than 16 then the recursive detection will trigger.
16 is more than enough for any nested interrupts.

[ Impact: fix false positive trace recursion detection ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a699767..7bcfd3e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1481,47 +1481,34 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
-static int trace_irq_level(void)
-{
-	return (hardirq_count() >> HARDIRQ_SHIFT) +
-		(softirq_count() >> + SOFTIRQ_SHIFT) +
-		!!in_nmi();
-}
+#define TRACE_RECURSIVE_DEPTH 16
 
 static int trace_recursive_lock(void)
 {
-	int level;
-
-	level = trace_irq_level();
+	current->trace_recursion++;
 
-	if (unlikely(current->trace_recursion & (1 << level))) {
-		/* Disable all tracing before we do anything else */
-		tracing_off_permanent();
+	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+		return 0;
 
-		printk_once(KERN_WARNING "Tracing recursion: "
-			    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
-			    hardirq_count() >> HARDIRQ_SHIFT,
-			    softirq_count() >> SOFTIRQ_SHIFT,
-			    in_nmi());
+	/* Disable all tracing before we do anything else */
+	tracing_off_permanent();
 
-		WARN_ON_ONCE(1);
-		return -1;
-	}
+	printk_once(KERN_WARNING "Tracing recursion: depth[%d]:"
+		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+		    current->trace_recursion,
+		    hardirq_count() >> HARDIRQ_SHIFT,
+		    softirq_count() >> SOFTIRQ_SHIFT,
+		    in_nmi());
 
-	current->trace_recursion |= 1 << level;
-
-	return 0;
+	WARN_ON_ONCE(1);
+	return -1;
 }
 
 static void trace_recursive_unlock(void)
 {
-	int level;
-
-	level = trace_irq_level();
-
-	WARN_ON_ONCE(!current->trace_recursion & (1 << level));
+	WARN_ON_ONCE(!current->trace_recursion);
 
-	current->trace_recursion &= ~(1 << level);
+	current->trace_recursion--;
 }
 
 static DEFINE_PER_CPU(int, rb_need_resched);
-- 
cgit v0.10.2


From cb4764a6dbffd9bb3cf759421ae82384071a933d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 18:16:44 -0400
Subject: tracing: use nowakeup version of commit for function event trace
 tests

The startup tests for the event tracer also runs with the function
tracer enabled. The "wakeup" version of the trace commit was used
which can grab spinlocks. If a task was preempted by an NMI
that called a function being traced, it could deadlock due to the
function tracer trying to grab the same lock.

Thanks to Frederic Weisbecker for pointing out where the bug was.

Reported-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 98daf5d..672b195 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1164,7 +1164,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	trace_current_buffer_unlock_commit(event, flags, pc);
+	trace_nowake_buffer_unlock_commit(event, flags, pc);
 
  out:
 	atomic_dec(&per_cpu(test_event_disable, cpu));
-- 
cgit v0.10.2


From 2ecf0a57c60dcb588f310d94412118e15c510532 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Apr 2009 12:16:56 +0900
Subject: ide-dma: don't reset request fields on dma_timeout_retry()

Impact: drop unnecessary code

Now that everything uses bio and block operations, there is no need to
reset request fields manually when retrying a request.  Every field is
guaranteed to be always valid.  Drop unnecessary request field
resetting from ide_dma_timeout_retry().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a0b8cab1..d9123ec 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 	/*
 	 * un-busy drive etc and make sure request is sane
 	 */
-
 	rq = hwif->rq;
-	if (!rq)
-		goto out;
-
-	hwif->rq = NULL;
-
-	rq->errors = 0;
-
-	if (!rq->bio)
-		goto out;
-
-	rq->sector = rq->bio->bi_sector;
-	rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->buffer = bio_data(rq->bio);
-out:
+	if (rq) {
+		hwif->rq = NULL;
+		rq->errors = 0;
+	}
 	return ret;
 }
 
-- 
cgit v0.10.2


From 37fc334cc8eb84f5fe0a5a1cbe6a6a68049e142a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 21 Apr 2009 13:27:16 +0800
Subject: crypto: api - Fix algorithm module auto-loading

The commit a760a6656e6f00bb0144a42a048cf0266646e22c (crypto:
api - Fix module load deadlock with fallback algorithms) broke
the auto-loading of algorithms that require fallbacks.  The
problem is that the fallback mask check is missing an and which
cauess bits that should be considered to interfere with the
result.

Reported-by: Chuck Ebbert <cebbert@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/api.c b/crypto/api.c
index 314dab9..fd2545d 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -221,7 +221,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
 		request_module(name);
 
-		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask) &&
+		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
+		      CRYPTO_ALG_NEED_FALLBACK) &&
 		    snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp))
 			request_module(tmp);
 
-- 
cgit v0.10.2


From acd246b7494c629aa617da49716409566cf52149 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 21 Apr 2009 13:55:20 +0800
Subject: crypto: padlock - Revert aes-all alias to aes

Since the padlock-aes driver doesn't require a fallback (it's
only padlock-sha that does), it should use the aes alias rather
than aes-all so that ones that do need a fallback can use it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 3f0fdd1..856b3cc 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -489,4 +489,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig");
 
-MODULE_ALIAS("aes-all");
+MODULE_ALIAS("aes");
-- 
cgit v0.10.2


From 6e29ec5701e9d44fa02b96c1c5c45f7516182b65 Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Tue, 21 Apr 2009 08:40:49 +0530
Subject: sched: Replace first_cpu() with cpumask_first() in ILB nomination
 code

Stephen Rothwell reported this build warning:

>  kernel/sched.c: In function 'find_new_ilb':
>  kernel/sched.c:4355: warning: passing argument 1 of '__first_cpu' from incompatible pointer type
>
> Possibly caused by commit f711f6090a81cbd396b63de90f415d33f563af9b
> ("sched: Nominate idle load balancer from a semi-idle package") from
> the sched tree.  Should this call to first_cpu be cpumask_first?

For !(CONFIG_SCHED_MC || CONFIG_SCHED_SMT), find_new_ilb() nominates the
Idle load balancer as the first cpu from the nohz.cpu_mask.

This code uses the older API first_cpu(). Replace it with cpumask_first(),
which is the correct API here.

[ Impact: cleanup, address build warning ]

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <20090421031049.GA4140@in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 797f6fd..54d67b9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4356,7 +4356,7 @@ out_done:
 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
 static inline int find_new_ilb(int call_cpu)
 {
-	return first_cpu(nohz.cpu_mask);
+	return cpumask_first(nohz.cpu_mask);
 }
 #endif
 
-- 
cgit v0.10.2


From cd474f2d548af3c0eb932d9d47ec11483861aa6f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Apr 2009 08:53:08 +0200
Subject: ALSA: Remove deprecated snd_card_new()

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/core.h b/include/sound/core.h
index 3dea798..a26bbdc 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -300,16 +300,6 @@ int snd_card_create(int idx, const char *id,
 		    struct module *module, int extra_size,
 		    struct snd_card **card_ret);
 
-static inline __deprecated
-struct snd_card *snd_card_new(int idx, const char *id,
-			      struct module *module, int extra_size)
-{
-	struct snd_card *card;
-	if (snd_card_create(idx, id, module, extra_size, &card) < 0)
-		return NULL;
-	return card;
-}
-
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
-- 
cgit v0.10.2


From ef9dfa4b1052af23a603de382d4665b2d1fccc61 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Apr 2009 08:53:41 +0200
Subject: ALSA: Remove deprecated include/sound/driver.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/driver.h b/include/sound/driver.h
deleted file mode 100644
index f035943..0000000
--- a/include/sound/driver.h
+++ /dev/null
@@ -1 +0,0 @@
-#warning "This file is deprecated"
-- 
cgit v0.10.2


From fc1edaf9e7cc4d4696f83dee495b8f158d01c4eb Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 20 Apr 2009 13:02:27 -0700
Subject: x86: x2apic, IR: Clean up X86_X2APIC and INTR_REMAP config checks

Add x2apic_supported() to clean up CONFIG_X86_X2APIC checks.

Fix CONFIG_INTR_REMAP checks.

[ Impact: cleanup ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: dwmw2@infradead.org
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Weidong Han <weidong.han@intel.com>
LKML-Reference: <20090420200450.128993000@linux-os.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index fbdd654..3738438 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void);
 extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
-#define EIM_8BIT_APIC_ID	0
-#define EIM_32BIT_APIC_ID	1
+extern int x2apic_mode;
 
 #ifdef CONFIG_X86_X2APIC
 /*
@@ -166,7 +165,7 @@ static inline u64 native_x2apic_icr_read(void)
 	return val;
 }
 
-extern int x2apic, x2apic_phys;
+extern int x2apic_phys;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
@@ -182,6 +181,8 @@ static inline int x2apic_enabled(void)
 		return 1;
 	return 0;
 }
+
+#define x2apic_supported()	(cpu_has_x2apic)
 #else
 static inline void check_x2apic(void)
 {
@@ -194,9 +195,8 @@ static inline int x2apic_enabled(void)
 	return 0;
 }
 
-#define	x2apic	0
 #define	x2apic_preenabled 0
-
+#define	x2apic_supported()	0
 #endif
 
 extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e4..34eaa37 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -161,7 +161,6 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
-#ifdef CONFIG_X86_64
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
 extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
@@ -169,7 +168,6 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
 	struct IO_APIC_route_entry **ioapic_entries);
-#endif
 
 extern void probe_nr_irqs_gsi(void);
 
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 0396760..f275e22 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,6 +1,6 @@
 #ifndef _ASM_X86_IRQ_REMAPPING_H
 #define _ASM_X86_IRQ_REMAPPING_H
 
-#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7b41a32..2b30e52 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -134,8 +134,8 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
+int x2apic_mode;
 #ifdef CONFIG_X86_X2APIC
-int x2apic;
 /* x2apic enabled before OS handover */
 static int x2apic_preenabled;
 static int disable_x2apic;
@@ -858,7 +858,7 @@ void clear_local_APIC(void)
 	u32 v;
 
 	/* APIC hasn't been mapped yet */
-	if (!x2apic && !apic_phys)
+	if (!x2apic_mode && !apic_phys)
 		return;
 
 	maxlvt = lapic_get_maxlvt();
@@ -1330,7 +1330,7 @@ void check_x2apic(void)
 {
 	if (x2apic_enabled()) {
 		pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
+		x2apic_preenabled = x2apic_mode = 1;
 	}
 }
 
@@ -1338,7 +1338,7 @@ void enable_x2apic(void)
 {
 	int msr, msr2;
 
-	if (!x2apic)
+	if (!x2apic_mode)
 		return;
 
 	rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1390,25 +1390,17 @@ void __init enable_IR_x2apic(void)
 	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
 
-#ifdef CONFIG_X86_X2APIC
-	if (cpu_has_x2apic)
-		ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-	else
-#endif
-		ret = enable_intr_remapping(EIM_8BIT_APIC_ID);
-
+	ret = enable_intr_remapping(x2apic_supported());
 	if (ret)
 		goto end_restore;
 
 	pr_info("Enabled Interrupt-remapping\n");
 
-#ifdef CONFIG_X86_X2APIC
-	if (cpu_has_x2apic && !x2apic) {
-		x2apic = 1;
+	if (x2apic_supported() && !x2apic_mode) {
+		x2apic_mode = 1;
 		enable_x2apic();
 		pr_info("Enabled x2apic\n");
 	}
-#endif
 
 end_restore:
 	if (ret)
@@ -1576,7 +1568,7 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
-	if (x2apic) {
+	if (x2apic_mode) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
@@ -2010,10 +2002,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	local_irq_save(flags);
 	disable_local_APIC();
-#ifdef CONFIG_INTR_REMAP
+
 	if (intr_remapping_enabled)
 		disable_intr_remapping();
-#endif
+
 	local_irq_restore(flags);
 	return 0;
 }
@@ -2023,8 +2015,6 @@ static int lapic_resume(struct sys_device *dev)
 	unsigned int l, h;
 	unsigned long flags;
 	int maxlvt;
-
-#ifdef CONFIG_INTR_REMAP
 	int ret;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
@@ -2050,17 +2040,8 @@ static int lapic_resume(struct sys_device *dev)
 		mask_8259A();
 	}
 
-	if (x2apic)
+	if (x2apic_mode)
 		enable_x2apic();
-#else
-	if (!apic_pm_state.active)
-		return 0;
-
-	local_irq_save(flags);
-	if (x2apic)
-		enable_x2apic();
-#endif
-
 	else {
 		/*
 		 * Make sure the APICBASE points to the right address
@@ -2098,18 +2079,12 @@ static int lapic_resume(struct sys_device *dev)
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 
-#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled) {
-		if (x2apic)
-			reenable_intr_remapping(EIM_32BIT_APIC_ID);
-		else
-			reenable_intr_remapping(EIM_8BIT_APIC_ID);
-
+		reenable_intr_remapping(x2apic_mode);
 		unmask_8259A();
 		restore_IO_APIC_setup(ioapic_entries);
 		free_ioapic_entries(ioapic_entries);
 	}
-#endif
 
 	local_irq_restore(flags);
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ea22a86..3a45d2e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -736,7 +736,6 @@ static int __init ioapic_pirq_setup(char *str)
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
 	int apic;
@@ -857,7 +856,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 
 	kfree(ioapic_entries);
 }
-#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1783652..bc3e880 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = {
 void __init default_setup_apic_routing(void)
 {
 #ifdef CONFIG_X86_X2APIC
-	if (x2apic && (apic != &apic_x2apic_phys &&
+	if (x2apic_mode && (apic != &apic_x2apic_phys &&
 #ifdef CONFIG_X86_UV
 		       apic != &apic_x2apic_uv_x &&
 #endif
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 06f592a..10ff5c4 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -158,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
 }
 #define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
+#define disable_intr_remapping()	(0)
+#define reenable_intr_remapping(mode)	(0)
 #define intr_remapping_enabled		(0)
 #endif
 
-- 
cgit v0.10.2


From 25629d810a52176758401184d9b437fbb7f79195 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 20 Apr 2009 13:02:28 -0700
Subject: x86: x2apic, IR: Move eoi_ioapic_irq() into a CONFIG_INTR_REMAP
 section

Address the following complier warning:

   arch/x86/kernel/apic/io_apic.c:2543: warning: `eoi_ioapic_irq' defined but not used

By moving that function (and eoi_ioapic_irq()) into an existing
#ifdef CONFIG_INTR_REMAP section of the code.

[ Impact: cleanup ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: dwmw2@infradead.org
Cc: Weidong Han <weidong.han@intel.com>
LKML-Reference: <20090420200450.271099000@linux-os.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Weidong Han <weidong.han@intel.com>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3a45d2e..4baa9cb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2517,39 +2517,6 @@ static void irq_complete_move(struct irq_desc **descp)
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
 static void ack_apic_edge(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2659,6 +2626,39 @@ static void ack_apic_level(unsigned int irq)
 }
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_eoi(apic, pin);
+		entry = entry->next;
+	}
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ir_ack_apic_edge(unsigned int irq)
 {
 	ack_APIC_irq();
-- 
cgit v0.10.2


From 39d83a5d684a457046aa2a6dac60f105966e78e9 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 20 Apr 2009 13:02:29 -0700
Subject: x86: x2apic, IR: Clean up panic() with nox2apic boot option

Instead of panic() ignore the "nox2apic" boot option when BIOS
has already enabled x2apic prior to OS handover.

[ Impact: printk warning instead of panic() when BIOS has enabled x2apic already ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: dwmw2@infradead.org
Cc: Weidong Han <weidong.han@intel.com>
LKML-Reference: <20090420200450.425091000@linux-os.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2b30e52..d32f558 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -141,8 +141,12 @@ static int x2apic_preenabled;
 static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
-	if (x2apic_enabled())
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
+	if (x2apic_enabled()) {
+		pr_warning("Bios already enabled x2apic, "
+			   "can't enforce nox2apic");
+		return 0;
+	}
+
 	disable_x2apic = 1;
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
 	return 0;
-- 
cgit v0.10.2


From 9d6c26e73bd248c286bb3597aaf788716e8fcceb Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 20 Apr 2009 13:02:31 -0700
Subject: x86: x2apic, IR: Make config X86_UV dependent on X86_X2APIC

Instead of selecting X86_X2APIC, make config X86_UV dependent
on X86_X2APIC.

This will eliminate enabling CONFIG_X86_X2APIC with out
enabling CONFIG_INTR_REMAP.

[ Impact: cleanup ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Jack Steiner <steiner@sgi.com>
Cc: dwmw2@infradead.org
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Weidong Han <weidong.han@intel.com>
LKML-Reference: <20090420200450.694598000@linux-os.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c9086e6..58fb7b3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -354,7 +354,7 @@ config X86_UV
 	depends on X86_64
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
-	select X86_X2APIC
+	depends on X86_X2APIC
 	---help---
 	  This option is needed in order to support SGI Ultraviolet systems.
 	  If you don't have one of these, you should say N here.
-- 
cgit v0.10.2


From 89388913f2c88a2cd15d24abab571b17a2596127 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Tue, 21 Apr 2009 11:39:27 +0300
Subject: x86: unify noexec handling

This patch unifies noexec handling on 32-bit and 64-bit.

[ Impact: cleanup ]

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
[ mingo@elte.hu: build fix ]
LKML-Reference: <1240303167.771.69.camel@penberg-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b8238dc..4d258ad 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,7 +273,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern int nx_enabled;
-extern void set_nx(void);
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd3da1d..fedde53 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -22,6 +22,69 @@ int direct_gbpages
 #endif
 ;
 
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on      Enable
+ * off     Disable
+ */
+static int __init noexec_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (!strncmp(str, "on", 2)) {
+		__supported_pte_mask |= _PAGE_NX;
+		disable_nx = 0;
+	} else if (!strncmp(str, "off", 3)) {
+		disable_nx = 1;
+		__supported_pte_mask &= ~_PAGE_NX;
+	}
+	return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+static void __init set_nx(void)
+{
+	unsigned int v[4], l, h;
+
+	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+		if ((v[3] & (1 << 20)) && !disable_nx) {
+			rdmsr(MSR_EFER, l, h);
+			l |= EFER_NX;
+			wrmsr(MSR_EFER, l, h);
+			nx_enabled = 1;
+			__supported_pte_mask |= _PAGE_NX;
+		}
+	}
+}
+#else
+static inline void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+	unsigned long efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if (!(efer & EFER_NX) || disable_nx)
+		__supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
 static void __init find_early_table_space(unsigned long end, int use_pse,
 					  int use_gbpages)
 {
@@ -158,12 +221,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 	use_gbpages = direct_gbpages;
 #endif
 
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_PAE
 	set_nx();
 	if (nx_enabled)
 		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 
 	/* Enable PSE if available */
 	if (cpu_has_pse)
@@ -174,7 +234,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 		set_in_cr4(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
-#endif
 
 	if (use_gbpages)
 		page_size_mask |= 1 << PG_LEVEL_1G;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 749559e..2b27120 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -587,61 +587,9 @@ void zap_low_mappings(void)
 	flush_tlb_all();
 }
 
-int nx_enabled;
-
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-#ifdef CONFIG_X86_PAE
-
-static int disable_nx __initdata;
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-	if (!str || !strcmp(str, "on")) {
-		if (cpu_has_nx) {
-			__supported_pte_mask |= _PAGE_NX;
-			disable_nx = 0;
-		}
-	} else {
-		if (!strcmp(str, "off")) {
-			disable_nx = 1;
-			__supported_pte_mask &= ~_PAGE_NX;
-		} else {
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-early_param("noexec", noexec_setup);
-
-void __init set_nx(void)
-{
-	unsigned int v[4], l, h;
-
-	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-		if ((v[3] & (1 << 20)) && !disable_nx) {
-			rdmsr(MSR_EFER, l, h);
-			l |= EFER_NX;
-			wrmsr(MSR_EFER, l, h);
-			nx_enabled = 1;
-			__supported_pte_mask |= _PAGE_NX;
-		}
-	}
-}
-#endif
-
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1753e80..a4e7846 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -85,39 +85,6 @@ early_param("gbpages", parse_direct_gbpages_on);
 pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec=on|off
- * Control non-executable mappings for 64-bit processes.
- *
- * on	Enable (default)
- * off	Disable
- */
-static int __init nonx_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!strncmp(str, "on", 2)) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str, "off", 3)) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	}
-	return 0;
-}
-early_param("noexec", nonx_setup);
-
-void __cpuinit check_efer(void)
-{
-	unsigned long efer;
-
-	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || disable_nx)
-		__supported_pte_mask &= ~_PAGE_NX;
-}
-
 int force_personality32;
 
 /*
-- 
cgit v0.10.2


From 92c7c8a7d6e03eb4c0a3c5888e35dbc45f24744c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 24 Mar 2009 07:32:14 +0100
Subject: ALSA: hda - Cache PCM and STREAM parameters queries

Cache quries for PCM and STREAM parameters as well as ampcap and
pincap sharing the hash table.  This will reduce the superfluous
access of the same codec verbs.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index a4e5e59..3d8bf39 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1053,6 +1053,8 @@ EXPORT_SYMBOL_HDA(snd_hda_codec_cleanup_stream);
 /* FIXME: more better hash key? */
 #define HDA_HASH_KEY(nid,dir,idx) (u32)((nid) + ((idx) << 16) + ((dir) << 24))
 #define HDA_HASH_PINCAP_KEY(nid) (u32)((nid) + (0x02 << 24))
+#define HDA_HASH_PARPCM_KEY(nid) (u32)((nid) + (0x03 << 24))
+#define HDA_HASH_PARSTR_KEY(nid) (u32)((nid) + (0x04 << 24))
 #define INFO_AMP_CAPS	(1<<0)
 #define INFO_AMP_VOL(ch)	(1 << (1 + (ch)))
 
@@ -1143,19 +1145,32 @@ int snd_hda_override_amp_caps(struct hda_codec *codec, hda_nid_t nid, int dir,
 }
 EXPORT_SYMBOL_HDA(snd_hda_override_amp_caps);
 
-u32 snd_hda_query_pin_caps(struct hda_codec *codec, hda_nid_t nid)
+static unsigned int
+query_caps_hash(struct hda_codec *codec, hda_nid_t nid, u32 key,
+		unsigned int (*func)(struct hda_codec *, hda_nid_t))
 {
 	struct hda_amp_info *info;
 
-	info = get_alloc_amp_hash(codec, HDA_HASH_PINCAP_KEY(nid));
+	info = get_alloc_amp_hash(codec, key);
 	if (!info)
 		return 0;
 	if (!info->head.val) {
-		info->amp_caps = snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP);
 		info->head.val |= INFO_AMP_CAPS;
+		info->amp_caps = func(codec, nid);
 	}
 	return info->amp_caps;
 }
+
+static unsigned int read_pin_cap(struct hda_codec *codec, hda_nid_t nid)
+{
+	return snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP);
+}
+
+u32 snd_hda_query_pin_caps(struct hda_codec *codec, hda_nid_t nid)
+{
+	return query_caps_hash(codec, nid, HDA_HASH_PINCAP_KEY(nid),
+			       read_pin_cap);
+}
 EXPORT_SYMBOL_HDA(snd_hda_query_pin_caps);
 
 /*
@@ -2538,6 +2553,41 @@ unsigned int snd_hda_calc_stream_format(unsigned int rate,
 }
 EXPORT_SYMBOL_HDA(snd_hda_calc_stream_format);
 
+static unsigned int get_pcm_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	unsigned int val = 0;
+	if (nid != codec->afg &&
+	    (get_wcaps(codec, nid) & AC_WCAP_FORMAT_OVRD))
+		val = snd_hda_param_read(codec, nid, AC_PAR_PCM);
+	if (!val || val == -1)
+		val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM);
+	if (!val || val == -1)
+		return 0;
+	return val;
+}
+
+static unsigned int query_pcm_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	return query_caps_hash(codec, nid, HDA_HASH_PARPCM_KEY(nid),
+			       get_pcm_param);
+}
+
+static unsigned int get_stream_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	unsigned int streams = snd_hda_param_read(codec, nid, AC_PAR_STREAM);
+	if (!streams || streams == -1)
+		streams = snd_hda_param_read(codec, codec->afg, AC_PAR_STREAM);
+	if (!streams || streams == -1)
+		return 0;
+	return streams;
+}
+
+static unsigned int query_stream_param(struct hda_codec *codec, hda_nid_t nid)
+{
+	return query_caps_hash(codec, nid, HDA_HASH_PARSTR_KEY(nid),
+			       get_stream_param);
+}
+
 /**
  * snd_hda_query_supported_pcm - query the supported PCM rates and formats
  * @codec: the HDA codec
@@ -2556,15 +2606,8 @@ static int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid,
 {
 	unsigned int i, val, wcaps;
 
-	val = 0;
 	wcaps = get_wcaps(codec, nid);
-	if (nid != codec->afg && (wcaps & AC_WCAP_FORMAT_OVRD)) {
-		val = snd_hda_param_read(codec, nid, AC_PAR_PCM);
-		if (val == -1)
-			return -EIO;
-	}
-	if (!val)
-		val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM);
+	val = query_pcm_param(codec, nid);
 
 	if (ratesp) {
 		u32 rates = 0;
@@ -2586,15 +2629,9 @@ static int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid,
 		u64 formats = 0;
 		unsigned int streams, bps;
 
-		streams = snd_hda_param_read(codec, nid, AC_PAR_STREAM);
-		if (streams == -1)
+		streams = query_stream_param(codec, nid);
+		if (!streams)
 			return -EIO;
-		if (!streams) {
-			streams = snd_hda_param_read(codec, codec->afg,
-						     AC_PAR_STREAM);
-			if (streams == -1)
-				return -EIO;
-		}
 
 		bps = 0;
 		if (streams & AC_SUPFMT_PCM) {
@@ -2668,17 +2705,9 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid,
 	int i;
 	unsigned int val = 0, rate, stream;
 
-	if (nid != codec->afg &&
-	    (get_wcaps(codec, nid) & AC_WCAP_FORMAT_OVRD)) {
-		val = snd_hda_param_read(codec, nid, AC_PAR_PCM);
-		if (val == -1)
-			return 0;
-	}
-	if (!val) {
-		val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM);
-		if (val == -1)
-			return 0;
-	}
+	val = query_pcm_param(codec, nid);
+	if (!val)
+		return 0;
 
 	rate = format & 0xff00;
 	for (i = 0; i < AC_PAR_PCM_RATE_BITS; i++)
@@ -2690,12 +2719,8 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid,
 	if (i >= AC_PAR_PCM_RATE_BITS)
 		return 0;
 
-	stream = snd_hda_param_read(codec, nid, AC_PAR_STREAM);
-	if (stream == -1)
-		return 0;
-	if (!stream && nid != codec->afg)
-		stream = snd_hda_param_read(codec, codec->afg, AC_PAR_STREAM);
-	if (!stream || stream == -1)
+	stream = query_stream_param(codec, nid);
+	if (!stream)
 		return 0;
 
 	if (stream & AC_SUPFMT_PCM) {
-- 
cgit v0.10.2


From b613291fb21a2d74eb8323d97fe9aa5d281b306c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 24 Mar 2009 07:36:09 +0100
Subject: ALSA: hda - Retry codec-verbs at errors

The current error-recovery scheme for the codec communication errors
doesn't work always well.  Especially falling back to the
single-command mode causes the fatal problem on many systems.

In this patch, the problematic verb is re-issued again after the error
(even with polling mode) instead of the single-cmd mode.  The
single-cmd mode will be used only when specified via the command
option explicitly, mainly just for testing.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 3d8bf39..1736ccb 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -174,14 +174,23 @@ unsigned int snd_hda_codec_read(struct hda_codec *codec, hda_nid_t nid,
 				unsigned int verb, unsigned int parm)
 {
 	struct hda_bus *bus = codec->bus;
-	unsigned int res;
+	unsigned int cmd, res;
+	int repeated = 0;
 
-	res = make_codec_cmd(codec, nid, direct, verb, parm);
+	cmd = make_codec_cmd(codec, nid, direct, verb, parm);
 	snd_hda_power_up(codec);
 	mutex_lock(&bus->cmd_mutex);
-	if (!bus->ops.command(bus, res))
+ again:
+	if (!bus->ops.command(bus, cmd)) {
 		res = bus->ops.get_response(bus);
-	else
+		if (res == -1 && bus->rirb_error) {
+			if (repeated++ < 1) {
+				snd_printd(KERN_WARNING "hda_codec: "
+					   "Trying verb 0x%08x again\n", cmd);
+				goto again;
+			}
+		}
+	} else
 		res = (unsigned int)-1;
 	mutex_unlock(&bus->cmd_mutex);
 	snd_hda_power_down(codec);
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index 2fdecf4..cd8979c 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -623,6 +623,7 @@ struct hda_bus {
 	/* misc op flags */
 	unsigned int needs_damn_long_delay :1;
 	unsigned int shutdown :1;	/* being unloaded */
+	unsigned int rirb_error:1;	/* error in codec communication */
 };
 
 /*
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 30829ee..803b720 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -604,6 +604,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 		}
 		if (!chip->rirb.cmds) {
 			smp_rmb();
+			bus->rirb_error = 0;
 			return chip->rirb.res; /* the last value */
 		}
 		if (time_after(jiffies, timeout))
@@ -623,8 +624,10 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 		chip->irq = -1;
 		pci_disable_msi(chip->pci);
 		chip->msi = 0;
-		if (azx_acquire_irq(chip, 1) < 0)
+		if (azx_acquire_irq(chip, 1) < 0) {
+			bus->rirb_error = 1;
 			return -1;
+		}
 		goto again;
 	}
 
@@ -644,14 +647,12 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 		return -1;
 	}
 
-	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, "
-		   "switching to single_cmd mode: last cmd=0x%08x\n",
-		   chip->last_cmd);
-	chip->rirb.rp = azx_readb(chip, RIRBWP);
-	chip->rirb.cmds = 0;
-	/* switch to single_cmd mode */
-	chip->single_cmd = 1;
-	azx_free_cmd_io(chip);
+	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout (ERROR): "
+		   "last cmd=0x%08x\n", chip->last_cmd);
+	spin_lock_irq(&chip->reg_lock);
+	chip->rirb.cmds = 0; /* reset the index */
+	bus->rirb_error = 1;
+	spin_unlock_irq(&chip->reg_lock);
 	return -1;
 }
 
-- 
cgit v0.10.2


From 586be3fcf97eec22fbc0ef6d67e823706aea7167 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 24 Mar 2009 07:43:24 +0100
Subject: ALSA: hda - Add debug prints for Realtek auto-init

Added a couple of debug prints to show the checked id numbers in
alc_subsystem_id().

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8209779..ee92c73 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1022,6 +1022,9 @@ static void alc_subsystem_id(struct hda_codec *codec,
 	if (codec->vendor_id == 0x10ec0260)
 		nid = 0x17;
 	ass = snd_hda_codec_get_pincfg(codec, nid);
+	snd_printd("realtek: No valid SSID, "
+		   "checking pincfg 0x%08x for NID 0x%x\n",
+		   nid, ass);
 	if (!(ass & 1) && !(ass & 0x100000))
 		return;
 	if ((ass >> 30) != 1)	/* no physical connection */
@@ -1036,6 +1039,8 @@ static void alc_subsystem_id(struct hda_codec *codec,
 	if (((ass >> 16) & 0xf) != tmp)
 		return;
 do_sku:
+	snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n",
+		   ass & 0xffff, codec->vendor_id);
 	/*
 	 * 0 : override
 	 * 1 :	Swap Jack
-- 
cgit v0.10.2


From e8082f3f5a17d7a7bfc7dd1050a3f958dc034e9a Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 21 Apr 2009 17:11:46 +0800
Subject: tracing/filters: don't remove old filters when failed to write
 subsys->filter

If writing subsys->filter returns EINVAL or ENOSPC, the original
filters in subsys/ and subsys/events/ will be removed. This is
definitely wrong.

[ Impact: fix filter setting semantics on error condition ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <49ED8DD2.2070700@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 672b195..9ea55a7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,7 +600,6 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 	err = filter_add_subsystem_pred(system, pred);
 	if (err < 0) {
-		filter_free_subsystem_preds(system);
 		filter_free_pred(pred);
 		return err;
 	}
-- 
cgit v0.10.2


From f66578a7637b87810cbb9041c4e3a77fd2fa4706 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 21 Apr 2009 17:12:11 +0800
Subject: tracing/filters: allow user-input to be integer-like string

Suppose we would like to trace all tasks named '123', but this
will fail:

 # echo 'parent_comm == 123' > events/sched/sched_process_fork/filter
 bash: echo: write error: Invalid argument

Don't guess the type of the filter pred in filter_parse(), but instead
we check it in __filter_add_pred().

[ Impact: extend allowed filter field string values ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <49ED8DEB.6000700@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e0fcfd2..6541828 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -313,6 +313,7 @@ static int __filter_add_pred(struct ftrace_event_call *call,
 {
 	struct ftrace_event_field *field;
 	filter_pred_fn_t fn;
+	unsigned long long val;
 
 	field = find_event_field(call, pred->field_name);
 	if (!field)
@@ -322,14 +323,13 @@ static int __filter_add_pred(struct ftrace_event_call *call,
 	pred->offset = field->offset;
 
 	if (is_string_field(field->type)) {
-		if (!pred->str_len)
-			return -EINVAL;
 		fn = filter_pred_string;
 		pred->str_len = field->size;
 		return filter_add_pred_fn(call, pred, fn);
 	} else {
-		if (pred->str_len)
+		if (strict_strtoull(pred->str_val, 0, &val))
 			return -EINVAL;
+		pred->val = val;
 	}
 
 	switch (field->size) {
@@ -413,12 +413,16 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 	return 0;
 }
 
+/*
+ * The filter format can be
+ *   - 0, which means remove all filter preds
+ *   - [||/&&] <field> ==/!= <val>
+ */
 int filter_parse(char **pbuf, struct filter_pred *pred)
 {
-	char *tmp, *tok, *val_str = NULL;
+	char *tok, *val_str = NULL;
 	int tok_n = 0;
 
-	/* field ==/!= number, or/and field ==/!= number, number */
 	while ((tok = strsep(pbuf, " \n"))) {
 		if (tok_n == 0) {
 			if (!strcmp(tok, "0")) {
@@ -478,19 +482,13 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
 		return -EINVAL;
 	}
 
+	strcpy(pred->str_val, val_str);
+	pred->str_len = strlen(val_str);
+
 	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
 	if (!pred->field_name)
 		return -ENOMEM;
 
-	pred->str_len = 0;
-	pred->val = simple_strtoull(val_str, &tmp, 0);
-	if (tmp == val_str) {
-		strncpy(pred->str_val, val_str, MAX_FILTER_STR_VAL);
-		pred->str_len = strlen(val_str);
-		pred->str_val[pred->str_len] = '\0';
-	} else if (*tmp != '\0')
-		return -EINVAL;
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From a3b48c88f2d5a34c0e25aec0a3dab8069e5a9a72 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Apr 2009 13:37:29 +0200
Subject: ALSA: hda - minor optimization in hda_set_power_state()

Check the target power-state before checking EAPD exception to reduce
unneeded verb executions.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index b649033..b91f6ed 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2348,7 +2348,8 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 		if (wcaps & AC_WCAP_POWER) {
 			unsigned int wid_type = (wcaps & AC_WCAP_TYPE) >>
 				AC_WCAP_TYPE_SHIFT;
-			if (wid_type == AC_WID_PIN) {
+			if (power_state == AC_PWRST_D3 &&
+			    wid_type == AC_WID_PIN) {
 				unsigned int pincap;
 				/*
 				 * don't power down the widget if it controls
@@ -2360,7 +2361,7 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 						nid, 0,
 						AC_VERB_GET_EAPD_BTLENABLE, 0);
 					eapd &= 0x02;
-					if (power_state == AC_PWRST_D3 && eapd)
+					if (eapd)
 						continue;
 				}
 			}
-- 
cgit v0.10.2


From 3554228d4289098a8fe5cfd87512ec32a19bbe5a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 21 Apr 2009 09:41:26 -0400
Subject: ring-buffer: only warn on wrap if buffer is bigger than two pages

On boot up, to save memory, ftrace allocates the minimum buffer
which is two pages. Ftrace also goes through a series of tests
(when configured) on boot up. These tests can fill up a page within
a single interrupt.

The ring buffer also has a WARN_ON when it detects that the buffer was
completely filled within a single commit (other commits are allowed to
be nested).

Combine the small buffer on start up, with the tests that can fill more
than a single page within an interrupt, this can trigger the WARN_ON.

This patch makes the WARN_ON only happen when the ring buffer consists
of more than two pages.

[ Impact: prevent false WARN_ON in ftrace startup tests ]

Reported-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20090421094616.GA14561@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7bcfd3e..61dbdf2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1241,7 +1241,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * about it.
 		 */
 		if (unlikely(next_page == commit_page)) {
-			WARN_ON_ONCE(1);
+			/* This can easily happen on small ring buffers */
+			WARN_ON_ONCE(buffer->pages > 2);
 			goto out_reset;
 		}
 
-- 
cgit v0.10.2


From dfed0ef9b3ff9e37903920b6938ed33344ad0b3d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Apr 2009 18:33:12 +0200
Subject: ALSA: hda - Fix a typo in debug print for realtek auto-detection

The NID and ASS numbers were swapped...

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a6ec87a..8877120 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1026,7 +1026,7 @@ static void alc_subsystem_id(struct hda_codec *codec,
 	ass = snd_hda_codec_get_pincfg(codec, nid);
 	snd_printd("realtek: No valid SSID, "
 		   "checking pincfg 0x%08x for NID 0x%x\n",
-		   nid, ass);
+		   ass, nid);
 	if (!(ass & 1) && !(ass & 0x100000))
 		return;
 	if ((ass >> 30) != 1)	/* no physical connection */
-- 
cgit v0.10.2


From 19bdc9d061bcb71efd2b53083d96b59bbe1a1751 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 17 Apr 2009 05:26:31 +0000
Subject: clocksource: sh_cmt clocksource support

Add clocksource support to the sh_cmt driver. With this in
place we can do tickless with a single CMT channel.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 02bae39..c247564 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -47,6 +47,7 @@ struct sh_cmt_priv {
 	unsigned long rate;
 	spinlock_t lock;
 	struct clock_event_device ced;
+	struct clocksource cs;
 	unsigned long total_cycles;
 };
 
@@ -376,6 +377,68 @@ static void sh_cmt_stop(struct sh_cmt_priv *p, unsigned long flag)
 	spin_unlock_irqrestore(&p->lock, flags);
 }
 
+static struct sh_cmt_priv *cs_to_sh_cmt(struct clocksource *cs)
+{
+	return container_of(cs, struct sh_cmt_priv, cs);
+}
+
+static cycle_t sh_cmt_clocksource_read(struct clocksource *cs)
+{
+	struct sh_cmt_priv *p = cs_to_sh_cmt(cs);
+	unsigned long flags, raw;
+	unsigned long value;
+	int has_wrapped;
+
+	spin_lock_irqsave(&p->lock, flags);
+	value = p->total_cycles;
+	raw = sh_cmt_get_counter(p, &has_wrapped);
+
+	if (unlikely(has_wrapped))
+		raw = p->match_value;
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	return value + raw;
+}
+
+static int sh_cmt_clocksource_enable(struct clocksource *cs)
+{
+	struct sh_cmt_priv *p = cs_to_sh_cmt(cs);
+	int ret;
+
+	p->total_cycles = 0;
+
+	ret = sh_cmt_start(p, FLAG_CLOCKSOURCE);
+	if (ret)
+		return ret;
+
+	/* TODO: calculate good shift from rate and counter bit width */
+	cs->shift = 0;
+	cs->mult = clocksource_hz2mult(p->rate, cs->shift);
+	return 0;
+}
+
+static void sh_cmt_clocksource_disable(struct clocksource *cs)
+{
+	sh_cmt_stop(cs_to_sh_cmt(cs), FLAG_CLOCKSOURCE);
+}
+
+static int sh_cmt_register_clocksource(struct sh_cmt_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clocksource *cs = &p->cs;
+
+	cs->name = name;
+	cs->rating = rating;
+	cs->read = sh_cmt_clocksource_read;
+	cs->enable = sh_cmt_clocksource_enable;
+	cs->disable = sh_cmt_clocksource_disable;
+	cs->mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	pr_info("sh_cmt: %s used as clock source\n", cs->name);
+	clocksource_register(cs);
+	return 0;
+}
+
 static struct sh_cmt_priv *ced_to_sh_cmt(struct clock_event_device *ced)
 {
 	return container_of(ced, struct sh_cmt_priv, ced);
@@ -483,6 +546,9 @@ int sh_cmt_register(struct sh_cmt_priv *p, char *name,
 	if (clockevent_rating)
 		sh_cmt_register_clockevent(p, name, clockevent_rating);
 
+	if (clocksource_rating)
+		sh_cmt_register_clocksource(p, name, clocksource_rating);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 7a4f453b6d7379a7c380825949977c5a838aa012 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 22 Apr 2009 16:53:34 +0800
Subject: tracing/events: make struct trace_entry->type to be int type

struct trace_entry->type is unsigned char, while trace event's id is
int type, thus for a event with id >= 256, it's entry->type is cast
to (id % 256), and then we can't see the trace output of this event.

 # insmod trace-events-sample.ko
 # echo foo_bar > /mnt/tracing/set_event
 # cat /debug/tracing/events/trace-events-sample/foo_bar/id
 256
 # cat /mnt/tracing/trace_pipe
           <...>-3548  [001]   215.091142: Unknown type 0
           <...>-3548  [001]   216.089207: Unknown type 0
           <...>-3548  [001]   217.087271: Unknown type 0
           <...>-3548  [001]   218.085332: Unknown type 0

[ Impact: fix output for trace events with id >= 256 ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49EEDB0E.5070207@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 75f3ac0..2a4a407 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -16,7 +16,7 @@ struct dentry;
  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  */
 struct trace_entry {
-	unsigned char		type;
+	int			type;
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
@@ -73,7 +73,7 @@ enum print_line_t {
 
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc);
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 39a3351..15ef08d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -198,7 +198,7 @@ ftrace_define_fields_##call(void)					\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	int ret;							\
 									\
-	__common_field(unsigned char, type);				\
+	__common_field(int, type);					\
 	__common_field(unsigned char, flags);				\
 	__common_field(unsigned char, preempt_count);			\
 	__common_field(int, pid);					\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b9a3adc..b6183bc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -838,7 +838,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 }
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags, int pc)
 {
@@ -881,7 +881,7 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc)
 {
 	return trace_buffer_lock_reserve(&global_trace,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 247948e..7d55bcf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -422,7 +422,7 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 struct ring_buffer_event;
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags,
 						    int pc);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9ea55a7..5d6e879 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
-				FIELD(unsigned char, type),
+				FIELD(int, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
 				FIELD(int, pid),
-- 
cgit v0.10.2


From ff166cb57a17124af75714a9c11f448f56f1a4a3 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 20 Apr 2009 13:02:30 -0700
Subject: x86: x2apic, IR: remove reinit_intr_remapped_IO_APIC()

When interrupt-remapping is enabled, we are relying on
setup_IO_APIC_irqs() to configure remapped entries in the
IO-APIC, which comes little bit later after enabling
interrupt-remapping.

Meanwhile, restoration of old io-apic entries after enabling
interrupt-remapping will not make the interrupts through
io-apic functional anyway.

So remove the unnecessary reinit_intr_remapped_IO_APIC() step.

The longer story:

When interrupt-remapping is enabled, IO-APIC entries need to be
setup in the re-mappable format (pointing to
interrupt-remapping table entries setup by the OS). This
remapping configuration is happening in the same place where we
traditionally configure IO-APIC (i.e., in
setup_IO_APIC_irqs()).

So when we enable interrupt-remapping successfully, there is no
need to restore old io-apic RTE entries before we actually do a
complete configuration shortly in setup_IO_APIC_irqs(). Old
IO-APIC RTE's may be in traditional format (non re-mappable) or
in re-mappable format pointing to interrupt-remapping table
entries setup by BIOS. Restoring both of these will not make
IO-APIC functional. We have to rely on setup_IO_APIC_irqs() for
proper configuration by OS.

So I am removing this unnecessary and broken step.

[ Impact: remove unnecessary/broken IO-APIC setup step ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Weidong Han <weidong.han@intel.com>
Cc: dwmw2@infradead.org
LKML-Reference: <20090420200450.552359000@linux-os.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 34eaa37..1cf1450 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -166,8 +166,6 @@ extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
 extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
-extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
-	struct IO_APIC_route_entry **ioapic_entries);
 
 extern void probe_nr_irqs_gsi(void);
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d32f558..1386dbe 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1412,8 +1412,6 @@ end_restore:
 		 * IR enabling failed
 		 */
 		restore_IO_APIC_setup(ioapic_entries);
-	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
 
 	unmask_8259A();
 	local_irq_restore(flags);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4baa9cb..8aef5f9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -833,20 +833,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
 	return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-	struct IO_APIC_route_entry **ioapic_entries)
-
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
 	int apic;
-- 
cgit v0.10.2


From 9cbf117662e24c6d33245666804487f92c21b59d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 Apr 2009 04:51:29 +0200
Subject: tracing/events: provide string with undefined size support

This patch provides the support for dynamic size strings on
event tracing.

The key concept is to use a structure with an ending char array field of
undefined size and use such ability to allocate the minimal size on the
ring buffer to make one or more string entries fit inside, as opposite
to a fixed length strings with upper bound.

The strings themselves are represented using fields which have an offset
value from the beginning of the entry.

This patch provides three new macros:

__string(item, src)

This one declares a string to the structure inside TP_STRUCT__entry.
You need to provide the name of the string field and the source that will
be copied inside.
This will also add the dynamic size of the string needed for the ring
buffer entry allocation.
A stack allocated structure is used to temporarily store the offset
of each strings, avoiding double calls to strlen() on each event
insertion.

__get_str(field)

This one will give you a pointer to the string you have created. This
is an abstract helper to resolve the absolute address given the field
name which is a relative address from the beginning of the trace_structure.

__assign_str(dst, src)

Use this macro to automatically perform the string copy from src to
dst. src must be a variable to assign and dst is the name of a __string
field.

Example on how to use it:

TRACE_EVENT(my_event,
	TP_PROTO(char *src1, char *src2),

	TP_ARGS(src1, src2),
	TP_STRUCT__entry(
		__string(str1, src1)
		__string(str2, src2)
	),
	TP_fast_assign(
		__assign_str(str1, src1);
		__assign_str(str2, src2);
	),
	TP_printk("%s %s", __get_str(src1), __get_str(src2))
)

Of course you can mix-up any __field or __array inside this
TRACE_EVENT. The position of the __string or __assign_str
doesn't matter.

Changes in v2:

Address the suggestion of Steven Rostedt: drop the opening_string() macro
and redefine __ending_string() to get the size of the string to be copied
instead of overwritting the whole ring buffer allocation.

Changes in v3:

Address other suggestions of Steven Rostedt and Peter Zijlstra with
some changes: drop the __ending_string and the need to have only one
string field.
Use offsets instead of absolute addresses.

[ Impact: allow more compact memory usage for string tracing ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 15ef08d..5a7d18c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -27,6 +27,9 @@
 #undef __field
 #define __field(type, item)		type	item;
 
+#undef __string
+#define __string(item, src)		int	__str_loc_##item;
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -35,14 +38,53 @@
 	struct ftrace_raw_##name {				\
 		struct trace_entry	ent;			\
 		tstruct						\
+		char			__str_data[0];		\
 	};							\
 	static struct ftrace_event_call event_##name
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+
 /*
  * Stage 2 of the trace events.
  *
+ * Include the following:
+ *
+ * struct ftrace_str_offsets_<call> {
+ *	int				<str1>;
+ *	int				<str2>;
+ *	[...]
+ * };
+ *
+ * The __string() macro will create each int <str>, this is to
+ * keep the offset of each string from the beggining of the event
+ * once we perform the strlen() of the src strings.
+ *
+ */
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(call, proto, args, fmt)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __field
+#define __field(type, item);
+
+#undef __string
+#define __string(item, src)	int item;
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+	struct ftrace_str_offsets_##call {				\
+		tstruct;						\
+	};
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
  * Override the macros in <trace/trace_events.h> to include the following:
  *
  * enum print_line_t
@@ -80,6 +122,9 @@
 #undef TP_printk
 #define TP_printk(fmt, args...) fmt "\n", args
 
+#undef __get_str
+#define __get_str(field)	(char *)__entry + __entry->__str_loc_##field
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 enum print_line_t							\
@@ -146,6 +191,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	if (!ret)							\
 		return 0;
 
+#undef __string
+#define __string(item, src)						       \
+	ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t"	       \
+			       "offset:%u;tsize:%u;\n",			       \
+			       (unsigned int)offsetof(typeof(field),	       \
+					__str_loc_##item),		       \
+			       (unsigned int)sizeof(field.__str_loc_##item));  \
+	if (!ret)							       \
+		return 0;
+
 #undef __entry
 #define __entry REC
 
@@ -189,6 +244,12 @@ ftrace_format_##call(struct trace_seq *s)				\
 	if (ret)							\
 		return ret;
 
+#undef __string
+#define __string(item, src)						       \
+	ret = trace_define_field(event_call, "__str_loc", #item,	       \
+				offsetof(typeof(field), __str_loc_##item),     \
+				sizeof(field.__str_loc_##item));
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 int									\
@@ -212,7 +273,7 @@ ftrace_define_fields_##call(void)					\
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
- * Stage 3 of the trace events.
+ * Stage 4 of the trace events.
  *
  * Override the macros in <trace/trace_events.h> to include the following:
  *
@@ -409,6 +470,23 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef __entry
 #define __entry entry
 
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __string
+#define __string(item, src)						       \
+	__str_offsets.item = __str_size +				       \
+			     offsetof(typeof(*entry), __str_data);	       \
+	__str_size += strlen(src) + 1;
+
+#undef __assign_str
+#define __assign_str(dst, src)						\
+	__entry->__str_loc_##dst = __str_offsets.dst;			\
+	strcpy(__get_str(dst), src);
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 _TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
@@ -417,18 +495,22 @@ static struct ftrace_event_call event_##call;				\
 									\
 static void ftrace_raw_event_##call(proto)				\
 {									\
+	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;  \
 	struct ftrace_event_call *call = &event_##call;			\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	unsigned long irq_flags;					\
+	int __str_size = 0;						\
 	int pc;								\
 									\
 	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
+	tstruct;							\
+									\
 	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				  sizeof(struct ftrace_raw_##call),	\
-				  irq_flags, pc);			\
+				 sizeof(struct ftrace_raw_##call) + __str_size,\
+				 irq_flags, pc);			\
 	if (!event)							\
 		return;							\
 	entry	= ring_buffer_event_data(event);			\
-- 
cgit v0.10.2


From 7e7ca9a22dbbc5c91763cd16923c7509918709b6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 Apr 2009 04:54:49 +0200
Subject: tracing/lock: provide lock_acquired event support for dynamic size
 string

Now that we can support the dynamic sized string, make the lock tracing
able to use it, making it safe against modules removal and consuming
the right amount of memory needed for each lock name

Changes in v2:
adapt to the __ending_string() updates and the opening_string() removal.

[ Impact: protect lock tracer against module removal ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
index 45e326b..3ca315c 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lockdep.h
@@ -38,16 +38,16 @@ TRACE_EVENT(lock_acquired,
 	TP_ARGS(lock, ip, waittime),
 
 	TP_STRUCT__entry(
-		__field(const char *, name)
+		__string(name, lock->name)
 		__field(unsigned long, wait_usec)
 		__field(unsigned long, wait_nsec_rem)
 	),
 	TP_fast_assign(
-		__entry->name = lock->name;
+		__assign_str(name, lock->name);
 		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
 		__entry->wait_usec = (unsigned long) waittime;
 	),
-	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
 				       __entry->wait_nsec_rem)
 );
 
-- 
cgit v0.10.2


From 6a74aa40907757ec98d8710ff66cd4cfe064e7d8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 22 Apr 2009 00:41:09 +0200
Subject: tracing/events: protect __get_str()

The __get_str() macro is used in a code part then its content should be
protected with parenthesis.

[ Impact: make macro definition more robust ]

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5a7d18c..a77f71a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -123,7 +123,7 @@
 #define TP_printk(fmt, args...) fmt "\n", args
 
 #undef __get_str
-#define __get_str(field)	(char *)__entry + __entry->__str_loc_##field
+#define __get_str(field)	((char *)__entry + __entry->__str_loc_##field)
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-- 
cgit v0.10.2


From 1b4246a1fc487370665bf6c55aac8eae379642c0 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Wed, 22 Apr 2009 10:56:50 +0900
Subject: ASoC: OMAP: Add checking to detect bufferless pcms

Add checking in hw_params and prepare to detect bufferless pcms(i.e. BT
<--> codec).

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c
index 07cf7f4..6454e15 100644
--- a/sound/soc/omap/omap-pcm.c
+++ b/sound/soc/omap/omap-pcm.c
@@ -87,8 +87,10 @@ static int omap_pcm_hw_params(struct snd_pcm_substream *substream,
 	struct omap_pcm_dma_data *dma_data = rtd->dai->cpu_dai->dma_data;
 	int err = 0;
 
+	/* return if this is a bufferless transfer e.g.
+	 * codec <--> BT codec or GSM modem -- lg FIXME */
 	if (!dma_data)
-		return -ENODEV;
+		return 0;
 
 	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
 	runtime->dma_bytes = params_buffer_bytes(params);
@@ -134,6 +136,11 @@ static int omap_pcm_prepare(struct snd_pcm_substream *substream)
 	struct omap_pcm_dma_data *dma_data = prtd->dma_data;
 	struct omap_dma_channel_params dma_params;
 
+	/* return if this is a bufferless transfer e.g.
+	 * codec <--> BT codec or GSM modem -- lg FIXME */
+	if (!prtd->dma_data)
+		return 0;
+
 	memset(&dma_params, 0, sizeof(dma_params));
 	/*
 	 * Note: Regardless of interface data formats supported by OMAP McBSP
-- 
cgit v0.10.2


From 246d0a17f5e09af0794960164269fc8988a8811c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 18:24:55 +0100
Subject: ASoC: Add power supply widget to DAPM

Many modern CODECs have shared resources on chip which must be enabled
for portions of the chip to work but which can be disabled at other times
in order to achieve power savings. Examples of such resources include
power supplies and some internal clocks.

Since these widgets are dependencies for the audio path but do not carry
audio signals they require slightly different handling to most widgets -
they do not contribute to the audio path and so should not be counted as
either inputs or outputs during path walks.

Cases where one supply provides a supply for another will require
additional work. There is also room for more optimisation of the graph
walking to avoid repeated checks for the same thing.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 9e67632..9ac842b 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:-
  o Mic        - Mic (and optional Jack)
  o Line       - Line Input/Output (and optional Jack)
  o Speaker    - Speaker
+ o Supply     - Power or clock supply widget used by other widgets.
  o Pre        - Special PRE widget (exec before all others)
  o Post       - Special POST widget (exec after all others)
 
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 839a97b..533f9f2 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -154,12 +154,16 @@
 	.shift = wshift, .invert = winvert, \
 	.event = wevent, .event_flags = wflags}
 
-/* generic register modifier widget */
+/* generic widgets */
 #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \
 {	.id = wid, .name = wname, .kcontrols = NULL, .num_kcontrols = 0, \
 	.reg = -((wreg) + 1), .shift = wshift, .mask = wmask, \
 	.on_val = won_val, .off_val = woff_val, .event = dapm_reg_event, \
 	.event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD}
+#define SND_SOC_DAPM_SUPPLY(wname, wreg, wshift, winvert, wevent, wflags) \
+{	.id = snd_soc_dapm_supply, .name = wname, .reg = wreg,	\
+	.shift = wshift, .invert = winvert, .event = wevent, \
+	.event_flags = wflags}
 
 /* dapm kcontrol types */
 #define SOC_DAPM_SINGLE(xname, reg, shift, max, invert) \
@@ -308,6 +312,7 @@ enum snd_soc_dapm_type {
 	snd_soc_dapm_vmid,			/* codec bias/vmid - to minimise pops */
 	snd_soc_dapm_pre,			/* machine specific pre widget - exec first */
 	snd_soc_dapm_post,			/* machine specific post widget - exec last */
+	snd_soc_dapm_supply,		/* power/clock supply */
 };
 
 /*
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index d3d1735..7847f80 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -52,17 +52,19 @@
 
 /* dapm power sequences - make this per codec in the future */
 static int dapm_up_seq[] = {
-	snd_soc_dapm_pre, snd_soc_dapm_micbias, snd_soc_dapm_mic,
-	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_dac,
-	snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_pga,
-	snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_post
+	snd_soc_dapm_pre, snd_soc_dapm_supply, snd_soc_dapm_micbias,
+	snd_soc_dapm_mic, snd_soc_dapm_mux, snd_soc_dapm_value_mux,
+	snd_soc_dapm_dac, snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl,
+	snd_soc_dapm_pga, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk,
+	snd_soc_dapm_post
 };
 
 static int dapm_down_seq[] = {
 	snd_soc_dapm_pre, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk,
 	snd_soc_dapm_pga, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_mixer,
 	snd_soc_dapm_dac, snd_soc_dapm_mic, snd_soc_dapm_micbias,
-	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_post
+	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_supply,
+	snd_soc_dapm_post
 };
 
 static int dapm_status = 1;
@@ -165,6 +167,7 @@ static void dapm_set_path_status(struct snd_soc_dapm_widget *w,
 	case snd_soc_dapm_dac:
 	case snd_soc_dapm_micbias:
 	case snd_soc_dapm_vmid:
+	case snd_soc_dapm_supply:
 		p->connect = 1;
 	break;
 	/* does effect routing - dynamically connected */
@@ -435,6 +438,9 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget)
 	struct snd_soc_dapm_path *path;
 	int con = 0;
 
+	if (widget->id == snd_soc_dapm_supply)
+		return 0;
+
 	if (widget->id == snd_soc_dapm_adc && widget->active)
 		return 1;
 
@@ -471,6 +477,9 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget)
 	struct snd_soc_dapm_path *path;
 	int con = 0;
 
+	if (widget->id == snd_soc_dapm_supply)
+		return 0;
+
 	/* active stream ? */
 	if (widget->id == snd_soc_dapm_dac && widget->active)
 		return 1;
@@ -622,6 +631,26 @@ static int dapm_dac_check_power(struct snd_soc_dapm_widget *w)
 	}
 }
 
+/* Check to see if a power supply is needed */
+static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
+{
+	struct snd_soc_dapm_path *path;
+	int power = 0;
+
+	/* Check if one of our outputs is connected */
+	list_for_each_entry(path, &w->sinks, list_source) {
+		if (path->sink && path->sink->power_check &&
+		    path->sink->power_check(path->sink)) {
+			power = 1;
+			break;
+		}
+	}
+
+	dapm_clear_walk(w->codec);
+
+	return power;
+}
+
 /*
  * Scan a single DAPM widget for a complete audio path and update the
  * power status appropriately.
@@ -752,6 +781,7 @@ static void dbg_dump_dapm(struct snd_soc_codec* codec, const char *action)
 		case snd_soc_dapm_pga:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+		case snd_soc_dapm_supply:
 			if (w->name) {
 				in = is_connected_input_ep(w);
 				dapm_clear_walk(w->codec);
@@ -880,6 +910,7 @@ static ssize_t dapm_widget_show(struct device *dev,
 		case snd_soc_dapm_pga:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+		case snd_soc_dapm_supply:
 			if (w->name)
 				count += sprintf(buf + count, "%s: %s\n",
 					w->name, w->power ? "On":"Off");
@@ -1044,6 +1075,7 @@ static int snd_soc_dapm_add_route(struct snd_soc_codec *codec,
 	case snd_soc_dapm_vmid:
 	case snd_soc_dapm_pre:
 	case snd_soc_dapm_post:
+	case snd_soc_dapm_supply:
 		list_add(&path->list, &codec->dapm_paths);
 		list_add(&path->list_sink, &wsink->sources);
 		list_add(&path->list_source, &wsource->sinks);
@@ -1164,6 +1196,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec)
 		case snd_soc_dapm_line:
 			w->power_check = dapm_generic_check_power;
 			break;
+		case snd_soc_dapm_supply:
+			w->power_check = dapm_supply_check_power;
 		case snd_soc_dapm_vmid:
 		case snd_soc_dapm_pre:
 		case snd_soc_dapm_post:
-- 
cgit v0.10.2


From 42768a12822c3a0a6d7db69445281db975938294 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 18:39:39 +0100
Subject: ASoC: Use DAPM supply widget for WM8903 charge pump

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index c539184..a3a489d 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -217,7 +217,6 @@ struct wm8903_priv {
 	int sysclk;
 
 	/* Reference counts */
-	int charge_pump_users;
 	int class_w_users;
 	int playback_active;
 	int capture_active;
@@ -373,6 +372,15 @@ static void wm8903_reset(struct snd_soc_codec *codec)
 #define WM8903_OUTPUT_INT   0x2
 #define WM8903_OUTPUT_IN    0x1
 
+static int wm8903_cp_event(struct snd_soc_dapm_widget *w,
+			   struct snd_kcontrol *kcontrol, int event)
+{
+	WARN_ON(event != SND_SOC_DAPM_POST_PMU);
+	mdelay(4);
+
+	return 0;
+}
+
 /*
  * Event for headphone and line out amplifier power changes.  Special
  * power up/down sequences are required in order to maximise pop/click
@@ -382,12 +390,9 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w,
 			       struct snd_kcontrol *kcontrol, int event)
 {
 	struct snd_soc_codec *codec = w->codec;
-	struct wm8903_priv *wm8903 = codec->private_data;
-	struct i2c_client *i2c = codec->control_data;
 	u16 val;
 	u16 reg;
 	int shift;
-	u16 cp_reg = wm8903_read(codec, WM8903_CHARGE_PUMP_0);
 
 	switch (w->reg) {
 	case WM8903_POWER_MANAGEMENT_2:
@@ -419,18 +424,6 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w,
 		/* Short the output */
 		val &= ~(WM8903_OUTPUT_SHORT << shift);
 		wm8903_write(codec, reg, val);
-
-		wm8903->charge_pump_users++;
-
-		dev_dbg(&i2c->dev, "Charge pump use count now %d\n",
-			wm8903->charge_pump_users);
-
-		if (wm8903->charge_pump_users == 1) {
-			dev_dbg(&i2c->dev, "Enabling charge pump\n");
-			wm8903_write(codec, WM8903_CHARGE_PUMP_0,
-				     cp_reg | WM8903_CP_ENA);
-			mdelay(4);
-		}
 	}
 
 	if (event & SND_SOC_DAPM_POST_PMU) {
@@ -464,19 +457,6 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w,
 		wm8903_write(codec, reg, val);
 	}
 
-	if (event & SND_SOC_DAPM_POST_PMD) {
-		wm8903->charge_pump_users--;
-
-		dev_dbg(&i2c->dev, "Charge pump use count now %d\n",
-			wm8903->charge_pump_users);
-
-		if (wm8903->charge_pump_users == 0) {
-			dev_dbg(&i2c->dev, "Disabling charge pump\n");
-			wm8903_write(codec, WM8903_CHARGE_PUMP_0,
-				     cp_reg & ~WM8903_CP_ENA);
-		}
-	}
-
 	return 0;
 }
 
@@ -844,26 +824,28 @@ SND_SOC_DAPM_MIXER("Right Speaker Mixer", WM8903_POWER_MANAGEMENT_4, 0, 0,
 SND_SOC_DAPM_PGA_E("Left Headphone Output PGA", WM8903_POWER_MANAGEMENT_2,
 		   1, 0, NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 SND_SOC_DAPM_PGA_E("Right Headphone Output PGA", WM8903_POWER_MANAGEMENT_2,
 		   0, 0, NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 
 SND_SOC_DAPM_PGA_E("Left Line Output PGA", WM8903_POWER_MANAGEMENT_3, 1, 0,
 		   NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 SND_SOC_DAPM_PGA_E("Right Line Output PGA", WM8903_POWER_MANAGEMENT_3, 0, 0,
 		   NULL, 0, wm8903_output_event,
 		   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU |
-		   SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD),
+		   SND_SOC_DAPM_PRE_PMD),
 
 SND_SOC_DAPM_PGA("Left Speaker PGA", WM8903_POWER_MANAGEMENT_5, 1, 0,
 		 NULL, 0),
 SND_SOC_DAPM_PGA("Right Speaker PGA", WM8903_POWER_MANAGEMENT_5, 0, 0,
 		 NULL, 0),
 
+SND_SOC_DAPM_SUPPLY("Charge Pump", WM8903_CHARGE_PUMP_0, 0, 0,
+		    wm8903_cp_event, SND_SOC_DAPM_POST_PMU),
 };
 
 static const struct snd_soc_dapm_route intercon[] = {
@@ -951,6 +933,11 @@ static const struct snd_soc_dapm_route intercon[] = {
 
 	{ "ROP", NULL, "Right Speaker PGA" },
 	{ "RON", NULL, "Right Speaker PGA" },
+
+	{ "Left Headphone Output PGA", NULL, "Charge Pump" },
+	{ "Right Headphone Output PGA", NULL, "Charge Pump" },
+	{ "Left Line Output PGA", NULL, "Charge Pump" },
+	{ "Right Line Output PGA", NULL, "Charge Pump" },
 };
 
 static int wm8903_add_widgets(struct snd_soc_codec *codec)
-- 
cgit v0.10.2


From c2aef4ffd24dab5c8e94c66e4042ad39d38bcf39 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 20:04:44 +0100
Subject: ASoC: Support CLK_DSP in WM8903

CLK_DSP provides a master clock for the DAC and ADC related functionality
on the device.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index a3a489d..27c8b94 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -846,6 +846,7 @@ SND_SOC_DAPM_PGA("Right Speaker PGA", WM8903_POWER_MANAGEMENT_5, 0, 0,
 
 SND_SOC_DAPM_SUPPLY("Charge Pump", WM8903_CHARGE_PUMP_0, 0, 0,
 		    wm8903_cp_event, SND_SOC_DAPM_POST_PMU),
+SND_SOC_DAPM_SUPPLY("CLK_DSP", WM8903_CLOCK_RATES_2, 1, 0, NULL, 0),
 };
 
 static const struct snd_soc_dapm_route intercon[] = {
@@ -891,7 +892,12 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{ "Right Input PGA", NULL, "Right Input Mode Mux" },
 
 	{ "ADCL", NULL, "Left Input PGA" },
+	{ "ADCL", NULL, "CLK_DSP" },
 	{ "ADCR", NULL, "Right Input PGA" },
+	{ "ADCR", NULL, "CLK_DSP" },
+
+	{ "DACL", NULL, "CLK_DSP" },
+	{ "DACR", NULL, "CLK_DSP" },
 
 	{ "Left Output Mixer", "Left Bypass Switch", "Left Input PGA" },
 	{ "Left Output Mixer", "Right Bypass Switch", "Right Input PGA" },
-- 
cgit v0.10.2


From 4dbfe8097157fde1f8054f48f991ea45833852cd Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 20:32:40 +0100
Subject: ASoC: Optimise configuration of WM8903 DC servo

Modify the default startup sequence in the chip to set the DC servo
dither level for optimal performance.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 27c8b94..de0a585 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -978,6 +978,11 @@ static int wm8903_set_bias_level(struct snd_soc_codec *codec,
 			wm8903_write(codec, WM8903_CLOCK_RATES_2,
 				     WM8903_CLK_SYS_ENA);
 
+			/* Change DC servo dither level in startup sequence */
+			wm8903_write(codec, WM8903_WRITE_SEQUENCER_0, 0x11);
+			wm8903_write(codec, WM8903_WRITE_SEQUENCER_1, 0x1257);
+			wm8903_write(codec, WM8903_WRITE_SEQUENCER_2, 0x2);
+
 			wm8903_run_sequence(codec, 0);
 			wm8903_sync_reg_cache(codec, codec->reg_cache);
 
-- 
cgit v0.10.2


From d7d5c5476a12333a33b7a14ebb10eccc729c01cb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 21:03:50 +0100
Subject: ASoC: Actively manage the DC servo for WM8903

Save a little extra power by enabling the DC servo offset correction
for the output channels only when the relevant channels are enabled.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index de0a585..0bab5c6 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -392,14 +392,18 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w,
 	struct snd_soc_codec *codec = w->codec;
 	u16 val;
 	u16 reg;
+	u16 dcs_reg;
+	u16 dcs_bit;
 	int shift;
 
 	switch (w->reg) {
 	case WM8903_POWER_MANAGEMENT_2:
 		reg = WM8903_ANALOGUE_HP_0;
+		dcs_bit = 0 + w->shift;
 		break;
 	case WM8903_POWER_MANAGEMENT_3:
 		reg = WM8903_ANALOGUE_LINEOUT_0;
+		dcs_bit = 2 + w->shift;
 		break;
 	default:
 		BUG();
@@ -439,6 +443,11 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w,
 		val |= (WM8903_OUTPUT_OUT << shift);
 		wm8903_write(codec, reg, val);
 
+		/* Enable the DC servo */
+		dcs_reg = wm8903_read(codec, WM8903_DC_SERVO_0);
+		dcs_reg |= dcs_bit;
+		wm8903_write(codec, WM8903_DC_SERVO_0, dcs_reg);
+
 		/* Remove the short */
 		val |= (WM8903_OUTPUT_SHORT << shift);
 		wm8903_write(codec, reg, val);
@@ -451,6 +460,11 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w,
 		val &= ~(WM8903_OUTPUT_SHORT << shift);
 		wm8903_write(codec, reg, val);
 
+		/* Disable the DC servo */
+		dcs_reg = wm8903_read(codec, WM8903_DC_SERVO_0);
+		dcs_reg &= ~dcs_bit;
+		wm8903_write(codec, WM8903_DC_SERVO_0, dcs_reg);
+
 		/* Then disable the intermediate and output stages */
 		val &= ~((WM8903_OUTPUT_OUT | WM8903_OUTPUT_INT |
 			  WM8903_OUTPUT_IN) << shift);
-- 
cgit v0.10.2


From 727fb909e541ebd09d5b552afef02a147978c151 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 21:06:14 +0100
Subject: ASoC: Remove redundant rate constraint for WM8903

This is now handled by symmetric_rates.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 0bab5c6..bec418a 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1289,14 +1289,8 @@ static int wm8903_startup(struct snd_pcm_substream *substream,
 	if (wm8903->master_substream) {
 		master_runtime = wm8903->master_substream->runtime;
 
-		dev_dbg(&i2c->dev, "Constraining to %d bits at %dHz\n",
-			master_runtime->sample_bits,
-			master_runtime->rate);
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_RATE,
-					     master_runtime->rate,
-					     master_runtime->rate);
+		dev_dbg(&i2c->dev, "Constraining to %d bits\n",
+			master_runtime->sample_bits);
 
 		snd_pcm_hw_constraint_minmax(substream->runtime,
 					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
-- 
cgit v0.10.2


From 291ce18ceb84aca79368369885eec2d329ae16c5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 21:36:14 +0100
Subject: ASoC: Implement WM8903 digital sidetone support

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index bec418a..d8a9222 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -533,6 +533,7 @@ static int wm8903_class_w_put(struct snd_kcontrol *kcontrol,
 /* ALSA can only do steps of .01dB */
 static const DECLARE_TLV_DB_SCALE(digital_tlv, -7200, 75, 1);
 
+static const DECLARE_TLV_DB_SCALE(digital_sidetone_tlv, -3600, 300, 0);
 static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0);
 
 static const DECLARE_TLV_DB_SCALE(drc_tlv_thresh, 0, 75, 0);
@@ -651,6 +652,16 @@ static const struct soc_enum rinput_inv_enum =
 	SOC_ENUM_SINGLE(WM8903_ANALOGUE_RIGHT_INPUT_1, 4, 3, rinput_mux_text);
 
 
+static const char *sidetone_text[] = {
+	"None", "Left", "Right"
+};
+
+static const struct soc_enum lsidetone_enum =
+	SOC_ENUM_SINGLE(WM8903_DAC_DIGITAL_0, 2, 3, sidetone_text);
+
+static const struct soc_enum rsidetone_enum =
+	SOC_ENUM_SINGLE(WM8903_DAC_DIGITAL_0, 0, 3, sidetone_text);
+
 static const struct snd_kcontrol_new wm8903_snd_controls[] = {
 
 /* Input PGAs - No TLV since the scale depends on PGA mode */
@@ -694,6 +705,9 @@ SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT,
 SOC_ENUM("ADC Companding Mode", adc_companding),
 SOC_SINGLE("ADC Companding Switch", WM8903_AUDIO_INTERFACE_0, 3, 1, 0),
 
+SOC_DOUBLE_TLV("Digital Sidetone Volume", WM8903_DAC_DIGITAL_0, 4, 8,
+	       12, 0, digital_sidetone_tlv),
+
 /* DAC */
 SOC_DOUBLE_R_TLV("Digital Playback Volume", WM8903_DAC_DIGITAL_VOLUME_LEFT,
 		 WM8903_DAC_DIGITAL_VOLUME_RIGHT, 1, 120, 0, digital_tlv),
@@ -756,6 +770,12 @@ static const struct snd_kcontrol_new rinput_mux =
 static const struct snd_kcontrol_new rinput_inv_mux =
 	SOC_DAPM_ENUM("Right Inverting Input Mux", rinput_inv_enum);
 
+static const struct snd_kcontrol_new lsidetone_mux =
+	SOC_DAPM_ENUM("DACL Sidetone Mux", lsidetone_enum);
+
+static const struct snd_kcontrol_new rsidetone_mux =
+	SOC_DAPM_ENUM("DACR Sidetone Mux", rsidetone_enum);
+
 static const struct snd_kcontrol_new left_output_mixer[] = {
 SOC_DAPM_SINGLE("DACL Switch", WM8903_ANALOGUE_LEFT_MIX_0, 3, 1, 0),
 SOC_DAPM_SINGLE("DACR Switch", WM8903_ANALOGUE_LEFT_MIX_0, 2, 1, 0),
@@ -822,6 +842,9 @@ SND_SOC_DAPM_PGA("Right Input PGA", WM8903_POWER_MANAGEMENT_0, 0, 0, NULL, 0),
 SND_SOC_DAPM_ADC("ADCL", "Left HiFi Capture", WM8903_POWER_MANAGEMENT_6, 1, 0),
 SND_SOC_DAPM_ADC("ADCR", "Right HiFi Capture", WM8903_POWER_MANAGEMENT_6, 0, 0),
 
+SND_SOC_DAPM_MUX("DACL Sidetone", SND_SOC_NOPM, 0, 0, &lsidetone_mux),
+SND_SOC_DAPM_MUX("DACR Sidetone", SND_SOC_NOPM, 0, 0, &rsidetone_mux),
+
 SND_SOC_DAPM_DAC("DACL", "Left Playback", WM8903_POWER_MANAGEMENT_6, 3, 0),
 SND_SOC_DAPM_DAC("DACR", "Right Playback", WM8903_POWER_MANAGEMENT_6, 2, 0),
 
@@ -910,7 +933,14 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{ "ADCR", NULL, "Right Input PGA" },
 	{ "ADCR", NULL, "CLK_DSP" },
 
+	{ "DACL Sidetone", "Left", "ADCL" },
+	{ "DACL Sidetone", "Right", "ADCR" },
+	{ "DACR Sidetone", "Left", "ADCL" },
+	{ "DACR Sidetone", "Right", "ADCR" },
+
+	{ "DACL", NULL, "DACL Sidetone" },
 	{ "DACL", NULL, "CLK_DSP" },
+	{ "DACR", NULL, "DACR Sidetone" },
 	{ "DACR", NULL, "CLK_DSP" },
 
 	{ "Left Output Mixer", "Left Bypass Switch", "Left Input PGA" },
-- 
cgit v0.10.2


From 818cf5909701806285d977f7a9365c5cadb062a7 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 23 Apr 2009 09:58:22 +0300
Subject: slub: enforce MAX_ORDER

slub_max_order may not be equal to or greater than MAX_ORDER.

Additionally, if a single object cannot be placed in a slab of
slub_max_order, it still must allocate slabs below MAX_ORDER.

Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slub.c b/mm/slub.c
index 7ab54ec..0e1247e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1909,7 +1909,7 @@ static inline int calculate_order(int size)
 	 * Doh this slab cannot be placed using slub_max_order.
 	 */
 	order = slab_order(size, 1, MAX_ORDER, 1);
-	if (order <= MAX_ORDER)
+	if (order < MAX_ORDER)
 		return order;
 	return -ENOSYS;
 }
@@ -2522,6 +2522,7 @@ __setup("slub_min_order=", setup_slub_min_order);
 static int __init setup_slub_max_order(char *str)
 {
 	get_option(&str, &slub_max_order);
+	slub_max_order = min(slub_max_order, MAX_ORDER - 1);
 
 	return 1;
 }
-- 
cgit v0.10.2


From 1a787e7ad242312af0afb2156596d42ee5e0c6bc Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Wed, 22 Apr 2009 13:13:34 +0900
Subject: ASoC: TWL4030: Add VDL path support

Add DAPMs for VDL(Voice Down Link) path. To support VDL path, we have
to change DAPMs of outputs(Earpiece, PreDrive Left/Right, Headset
Left/Right, Carkit Left/Right) from mux to mixer.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index cc2968c..fdf88df 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -321,104 +321,60 @@ static void twl4030_power_down(struct snd_soc_codec *codec)
 }
 
 /* Earpiece */
-static const char *twl4030_earpiece_texts[] =
-		{"Off", "DACL1", "DACL2", "DACR1"};
-
-static const unsigned int twl4030_earpiece_values[] =
-		{0x0, 0x1, 0x2, 0x4};
-
-static const struct soc_enum twl4030_earpiece_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_EAR_CTL, 1, 0x7,
-			ARRAY_SIZE(twl4030_earpiece_texts),
-			twl4030_earpiece_texts,
-			twl4030_earpiece_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_earpiece_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_earpiece_enum);
+static const struct snd_kcontrol_new twl4030_dapm_earpiece_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_EAR_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_EAR_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_EAR_CTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_EAR_CTL, 3, 1, 0),
+};
 
 /* PreDrive Left */
-static const char *twl4030_predrivel_texts[] =
-		{"Off", "DACL1", "DACL2", "DACR2"};
-
-static const unsigned int twl4030_predrivel_values[] =
-		{0x0, 0x1, 0x2, 0x4};
-
-static const struct soc_enum twl4030_predrivel_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_PREDL_CTL, 1, 0x7,
-			ARRAY_SIZE(twl4030_predrivel_texts),
-			twl4030_predrivel_texts,
-			twl4030_predrivel_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_predrivel_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_predrivel_enum);
+static const struct snd_kcontrol_new twl4030_dapm_predrivel_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PREDL_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_PREDL_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PREDL_CTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PREDL_CTL, 3, 1, 0),
+};
 
 /* PreDrive Right */
-static const char *twl4030_predriver_texts[] =
-		{"Off", "DACR1", "DACR2", "DACL2"};
-
-static const unsigned int twl4030_predriver_values[] =
-		{0x0, 0x1, 0x2, 0x4};
-
-static const struct soc_enum twl4030_predriver_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_PREDR_CTL, 1, 0x7,
-			ARRAY_SIZE(twl4030_predriver_texts),
-			twl4030_predriver_texts,
-			twl4030_predriver_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_predriver_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_predriver_enum);
+static const struct snd_kcontrol_new twl4030_dapm_predriver_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PREDR_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_PREDR_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PREDR_CTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PREDR_CTL, 3, 1, 0),
+};
 
 /* Headset Left */
-static const char *twl4030_hsol_texts[] =
-		{"Off", "DACL1", "DACL2"};
-
-static const struct soc_enum twl4030_hsol_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_HS_SEL, 1,
-			ARRAY_SIZE(twl4030_hsol_texts),
-			twl4030_hsol_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_hsol_control =
-SOC_DAPM_ENUM("Route", twl4030_hsol_enum);
+static const struct snd_kcontrol_new twl4030_dapm_hsol_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_HS_SEL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_HS_SEL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_HS_SEL, 2, 1, 0),
+};
 
 /* Headset Right */
-static const char *twl4030_hsor_texts[] =
-		{"Off", "DACR1", "DACR2"};
-
-static const struct soc_enum twl4030_hsor_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_HS_SEL, 4,
-			ARRAY_SIZE(twl4030_hsor_texts),
-			twl4030_hsor_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_hsor_control =
-SOC_DAPM_ENUM("Route", twl4030_hsor_enum);
+static const struct snd_kcontrol_new twl4030_dapm_hsor_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_HS_SEL, 3, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_HS_SEL, 4, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_HS_SEL, 5, 1, 0),
+};
 
 /* Carkit Left */
-static const char *twl4030_carkitl_texts[] =
-		{"Off", "DACL1", "DACL2"};
-
-static const struct soc_enum twl4030_carkitl_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_PRECKL_CTL, 1,
-			ARRAY_SIZE(twl4030_carkitl_texts),
-			twl4030_carkitl_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_carkitl_control =
-SOC_DAPM_ENUM("Route", twl4030_carkitl_enum);
+static const struct snd_kcontrol_new twl4030_dapm_carkitl_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PRECKL_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_PRECKL_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PRECKL_CTL, 2, 1, 0),
+};
 
 /* Carkit Right */
-static const char *twl4030_carkitr_texts[] =
-		{"Off", "DACR1", "DACR2"};
-
-static const struct soc_enum twl4030_carkitr_enum =
-	SOC_ENUM_SINGLE(TWL4030_REG_PRECKR_CTL, 1,
-			ARRAY_SIZE(twl4030_carkitr_texts),
-			twl4030_carkitr_texts);
-
-static const struct snd_kcontrol_new twl4030_dapm_carkitr_control =
-SOC_DAPM_ENUM("Route", twl4030_carkitr_enum);
+static const struct snd_kcontrol_new twl4030_dapm_carkitr_controls[] = {
+	SOC_DAPM_SINGLE("Voice", TWL4030_REG_PRECKR_CTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_PRECKR_CTL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PRECKR_CTL, 2, 1, 0),
+};
 
 /* Handsfree Left */
 static const char *twl4030_handsfreel_texts[] =
-		{"Voice", "DACL1", "DACL2", "DACR2"};
+		{"Voice", "AudioL1", "AudioL2", "AudioR2"};
 
 static const struct soc_enum twl4030_handsfreel_enum =
 	SOC_ENUM_SINGLE(TWL4030_REG_HFL_CTL, 0,
@@ -430,7 +386,7 @@ SOC_DAPM_ENUM("Route", twl4030_handsfreel_enum);
 
 /* Handsfree Right */
 static const char *twl4030_handsfreer_texts[] =
-		{"Voice", "DACR1", "DACR2", "DACL2"};
+		{"Voice", "AudioR1", "AudioR2", "AudioL2"};
 
 static const struct soc_enum twl4030_handsfreer_enum =
 	SOC_ENUM_SINGLE(TWL4030_REG_HFR_CTL, 0,
@@ -829,6 +785,12 @@ static DECLARE_TLV_DB_SCALE(digital_fine_tlv, -6300, 100, 1);
 static DECLARE_TLV_DB_SCALE(digital_coarse_tlv, 0, 600, 0);
 
 /*
+ * Voice Downlink GAIN volume control:
+ * from -37 to 12 dB in 1 dB steps (mute instead of -37 dB)
+ */
+static DECLARE_TLV_DB_SCALE(digital_voice_downlink_tlv, -3700, 100, 1);
+
+/*
  * Analog playback gain
  * -24 dB to 12 dB in 2 dB steps
  */
@@ -892,6 +854,16 @@ static const struct snd_kcontrol_new twl4030_snd_controls[] = {
 		TWL4030_REG_ARXL2_APGA_CTL, TWL4030_REG_ARXR2_APGA_CTL,
 		1, 1, 0),
 
+	/* Common voice downlink gain controls */
+	SOC_SINGLE_TLV("DAC Voice Digital Downlink Volume",
+		TWL4030_REG_VRXPGA, 0, 0x31, 0, digital_voice_downlink_tlv),
+
+	SOC_SINGLE_TLV("DAC Voice Analog Downlink Volume",
+		TWL4030_REG_VDL_APGA_CTL, 3, 0x12, 1, analog_tlv),
+
+	SOC_SINGLE("DAC Voice Analog Downlink Switch",
+		TWL4030_REG_VDL_APGA_CTL, 1, 1, 0),
+
 	/* Separate output gain controls */
 	SOC_DOUBLE_R_TLV_TWL4030("PreDriv Playback Volume",
 		TWL4030_REG_PREDL_CTL, TWL4030_REG_PREDR_CTL,
@@ -956,6 +928,8 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 			SND_SOC_NOPM, 0, 0),
 	SND_SOC_DAPM_DAC("DAC Left2", "Left Rear Playback",
 			SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_DAC("DAC Voice", "Voice Playback",
+			TWL4030_REG_AVDAC_CTL, 4, 0),
 
 	/* Analog PGAs */
 	SND_SOC_DAPM_PGA("ARXR1_APGA", TWL4030_REG_ARXR1_APGA_CTL,
@@ -966,6 +940,8 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 			0, 0, NULL, 0),
 	SND_SOC_DAPM_PGA("ARXL2_APGA", TWL4030_REG_ARXL2_APGA_CTL,
 			0, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("VDL_APGA", TWL4030_REG_VDL_APGA_CTL,
+			0, 0, NULL, 0),
 
 	/* Analog bypasses */
 	SND_SOC_DAPM_SWITCH_E("Right1 Analog Loopback", SND_SOC_NOPM, 0, 0,
@@ -998,26 +974,35 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer", TWL4030_REG_AVDAC_CTL,
 			3, 0, NULL, 0),
 
-	/* Output MUX controls */
+	/* Output MIXER controls */
 	/* Earpiece */
-	SND_SOC_DAPM_VALUE_MUX("Earpiece Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_earpiece_control),
+	SND_SOC_DAPM_MIXER("Earpiece Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_earpiece_controls[0],
+			ARRAY_SIZE(twl4030_dapm_earpiece_controls)),
 	/* PreDrivL/R */
-	SND_SOC_DAPM_VALUE_MUX("PredriveL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_predrivel_control),
-	SND_SOC_DAPM_VALUE_MUX("PredriveR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_predriver_control),
+	SND_SOC_DAPM_MIXER("PredriveL Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_predrivel_controls[0],
+			ARRAY_SIZE(twl4030_dapm_predrivel_controls)),
+	SND_SOC_DAPM_MIXER("PredriveR Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_predriver_controls[0],
+			ARRAY_SIZE(twl4030_dapm_predriver_controls)),
 	/* HeadsetL/R */
-	SND_SOC_DAPM_MUX_E("HeadsetL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_hsol_control, headsetl_event,
-		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
-	SND_SOC_DAPM_MUX("HeadsetR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_hsor_control),
+	SND_SOC_DAPM_MIXER_E("HeadsetL Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_hsol_controls[0],
+			ARRAY_SIZE(twl4030_dapm_hsol_controls), headsetl_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_MIXER("HeadsetR Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_hsor_controls[0],
+			ARRAY_SIZE(twl4030_dapm_hsor_controls)),
 	/* CarkitL/R */
-	SND_SOC_DAPM_MUX("CarkitL Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_carkitl_control),
-	SND_SOC_DAPM_MUX("CarkitR Mux", SND_SOC_NOPM, 0, 0,
-		&twl4030_dapm_carkitr_control),
+	SND_SOC_DAPM_MIXER("CarkitL Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_carkitl_controls[0],
+			ARRAY_SIZE(twl4030_dapm_carkitl_controls)),
+	SND_SOC_DAPM_MIXER("CarkitR Mixer", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_carkitr_controls[0],
+			ARRAY_SIZE(twl4030_dapm_carkitr_controls)),
+
+	/* Output MUX controls */
 	/* HandsfreeL/R */
 	SND_SOC_DAPM_MUX_E("HandsfreeL Mux", TWL4030_REG_HFL_CTL, 5, 0,
 		&twl4030_dapm_handsfreel_control, handsfree_event,
@@ -1082,50 +1067,61 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"ARXL2_APGA", NULL, "Analog L2 Playback Mixer"},
 	{"ARXR2_APGA", NULL, "Analog R2 Playback Mixer"},
 
+	{"VDL_APGA", NULL, "DAC Voice"},
+
 	/* Internal playback routings */
 	/* Earpiece */
-	{"Earpiece Mux", "DACL1", "ARXL1_APGA"},
-	{"Earpiece Mux", "DACL2", "ARXL2_APGA"},
-	{"Earpiece Mux", "DACR1", "ARXR1_APGA"},
+	{"Earpiece Mixer", "Voice", "VDL_APGA"},
+	{"Earpiece Mixer", "AudioL1", "ARXL1_APGA"},
+	{"Earpiece Mixer", "AudioL2", "ARXL2_APGA"},
+	{"Earpiece Mixer", "AudioR1", "ARXR1_APGA"},
 	/* PreDrivL */
-	{"PredriveL Mux", "DACL1", "ARXL1_APGA"},
-	{"PredriveL Mux", "DACL2", "ARXL2_APGA"},
-	{"PredriveL Mux", "DACR2", "ARXR2_APGA"},
+	{"PredriveL Mixer", "Voice", "VDL_APGA"},
+	{"PredriveL Mixer", "AudioL1", "ARXL1_APGA"},
+	{"PredriveL Mixer", "AudioL2", "ARXL2_APGA"},
+	{"PredriveL Mixer", "AudioR2", "ARXR2_APGA"},
 	/* PreDrivR */
-	{"PredriveR Mux", "DACR1", "ARXR1_APGA"},
-	{"PredriveR Mux", "DACR2", "ARXR2_APGA"},
-	{"PredriveR Mux", "DACL2", "ARXL2_APGA"},
+	{"PredriveR Mixer", "Voice", "VDL_APGA"},
+	{"PredriveR Mixer", "AudioR1", "ARXR1_APGA"},
+	{"PredriveR Mixer", "AudioR2", "ARXR2_APGA"},
+	{"PredriveR Mixer", "AudioL2", "ARXL2_APGA"},
 	/* HeadsetL */
-	{"HeadsetL Mux", "DACL1", "ARXL1_APGA"},
-	{"HeadsetL Mux", "DACL2", "ARXL2_APGA"},
+	{"HeadsetL Mixer", "Voice", "VDL_APGA"},
+	{"HeadsetL Mixer", "AudioL1", "ARXL1_APGA"},
+	{"HeadsetL Mixer", "AudioL2", "ARXL2_APGA"},
 	/* HeadsetR */
-	{"HeadsetR Mux", "DACR1", "ARXR1_APGA"},
-	{"HeadsetR Mux", "DACR2", "ARXR2_APGA"},
+	{"HeadsetR Mixer", "Voice", "VDL_APGA"},
+	{"HeadsetR Mixer", "AudioR1", "ARXR1_APGA"},
+	{"HeadsetR Mixer", "AudioR2", "ARXR2_APGA"},
 	/* CarkitL */
-	{"CarkitL Mux", "DACL1", "ARXL1_APGA"},
-	{"CarkitL Mux", "DACL2", "ARXL2_APGA"},
+	{"CarkitL Mixer", "Voice", "VDL_APGA"},
+	{"CarkitL Mixer", "AudioL1", "ARXL1_APGA"},
+	{"CarkitL Mixer", "AudioL2", "ARXL2_APGA"},
 	/* CarkitR */
-	{"CarkitR Mux", "DACR1", "ARXR1_APGA"},
-	{"CarkitR Mux", "DACR2", "ARXR2_APGA"},
+	{"CarkitR Mixer", "Voice", "VDL_APGA"},
+	{"CarkitR Mixer", "AudioR1", "ARXR1_APGA"},
+	{"CarkitR Mixer", "AudioR2", "ARXR2_APGA"},
 	/* HandsfreeL */
-	{"HandsfreeL Mux", "DACL1", "ARXL1_APGA"},
-	{"HandsfreeL Mux", "DACL2", "ARXL2_APGA"},
-	{"HandsfreeL Mux", "DACR2", "ARXR2_APGA"},
+	{"HandsfreeL Mux", "Voice", "VDL_APGA"},
+	{"HandsfreeL Mux", "AudioL1", "ARXL1_APGA"},
+	{"HandsfreeL Mux", "AudioL2", "ARXL2_APGA"},
+	{"HandsfreeL Mux", "AudioR2", "ARXR2_APGA"},
 	/* HandsfreeR */
-	{"HandsfreeR Mux", "DACR1", "ARXR1_APGA"},
-	{"HandsfreeR Mux", "DACR2", "ARXR2_APGA"},
-	{"HandsfreeR Mux", "DACL2", "ARXL2_APGA"},
+	{"HandsfreeR Mux", "Voice", "VDL_APGA"},
+	{"HandsfreeR Mux", "AudioR1", "ARXR1_APGA"},
+	{"HandsfreeR Mux", "AudioR2", "ARXR2_APGA"},
+	{"HandsfreeR Mux", "AudioL2", "ARXL2_APGA"},
 
 	/* outputs */
 	{"OUTL", NULL, "ARXL2_APGA"},
 	{"OUTR", NULL, "ARXR2_APGA"},
-	{"EARPIECE", NULL, "Earpiece Mux"},
-	{"PREDRIVEL", NULL, "PredriveL Mux"},
-	{"PREDRIVER", NULL, "PredriveR Mux"},
-	{"HSOL", NULL, "HeadsetL Mux"},
-	{"HSOR", NULL, "HeadsetR Mux"},
-	{"CARKITL", NULL, "CarkitL Mux"},
-	{"CARKITR", NULL, "CarkitR Mux"},
+	{"EARPIECE", NULL, "Earpiece Mixer"},
+	{"PREDRIVEL", NULL, "PredriveL Mixer"},
+	{"PREDRIVER", NULL, "PredriveR Mixer"},
+	{"HSOL", NULL, "HeadsetL Mixer"},
+	{"HSOR", NULL, "HeadsetR Mixer"},
+	{"CARKITL", NULL, "CarkitL Mixer"},
+	{"CARKITR", NULL, "CarkitR Mixer"},
 	{"HFL", NULL, "HandsfreeL Mux"},
 	{"HFR", NULL, "HandsfreeR Mux"},
 
-- 
cgit v0.10.2


From 2d7e71fa231035d69faffbfe506ef23638385994 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Thu, 23 Apr 2009 17:05:38 +0800
Subject: ASoC: simplify the SSP DMA parameters settings by run-time generation

The SSP DMA parameters can actually be easily generated at run-time since
they are almost similar except for the FIFO width and direction. Another
benefit is the re-use of information from 'struct ssp_device', like SSDR
physical FIFO address and DRCMR register index for both directions.

Signed-off-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Reviewed-by: pHilipp Zabel <philipp.zabel@gmail.com>

diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index b9b61dd..fb8cacc 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -50,139 +50,6 @@ struct ssp_priv {
 #endif
 };
 
-#define PXA2xx_SSP1_BASE	0x41000000
-#define PXA27x_SSP2_BASE	0x41700000
-#define PXA27x_SSP3_BASE	0x41900000
-#define PXA3xx_SSP4_BASE	0x41a00000
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_mono_out = {
-	.name			= "SSP1 PCM Mono out",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(14),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_mono_in = {
-	.name			= "SSP1 PCM Mono in",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(13),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_stereo_out = {
-	.name			= "SSP1 PCM Stereo out",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(14),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_stereo_in = {
-	.name			= "SSP1 PCM Stereo in",
-	.dev_addr		= PXA2xx_SSP1_BASE + SSDR,
-	.drcmr			= &DRCMR(13),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_mono_out = {
-	.name			= "SSP2 PCM Mono out",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(16),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_mono_in = {
-	.name			= "SSP2 PCM Mono in",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(15),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_stereo_out = {
-	.name			= "SSP2 PCM Stereo out",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(16),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_stereo_in = {
-	.name			= "SSP2 PCM Stereo in",
-	.dev_addr		= PXA27x_SSP2_BASE + SSDR,
-	.drcmr			= &DRCMR(15),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_mono_out = {
-	.name			= "SSP3 PCM Mono out",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_mono_in = {
-	.name			= "SSP3 PCM Mono in",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_stereo_out = {
-	.name			= "SSP3 PCM Stereo out",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_stereo_in = {
-	.name			= "SSP3 PCM Stereo in",
-	.dev_addr		= PXA27x_SSP3_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_mono_out = {
-	.name			= "SSP4 PCM Mono out",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_mono_in = {
-	.name			= "SSP4 PCM Mono in",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH2,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_stereo_out = {
-	.name			= "SSP4 PCM Stereo out",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(67),
-	.dcmd			= DCMD_INCSRCADDR | DCMD_FLOWTRG |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
-static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_stereo_in = {
-	.name			= "SSP4 PCM Stereo in",
-	.dev_addr		= PXA3xx_SSP4_BASE + SSDR,
-	.drcmr			= &DRCMR(66),
-	.dcmd			= DCMD_INCTRGADDR | DCMD_FLOWSRC |
-				  DCMD_BURST16 | DCMD_WIDTH4,
-};
-
 static void dump_registers(struct ssp_device *ssp)
 {
 	dev_dbg(&ssp->pdev->dev, "SSCR0 0x%08x SSCR1 0x%08x SSTO 0x%08x\n",
@@ -194,25 +61,33 @@ static void dump_registers(struct ssp_device *ssp)
 		 ssp_read_reg(ssp, SSACD));
 }
 
-static struct pxa2xx_pcm_dma_params *ssp_dma_params[4][4] = {
-	{
-		&pxa_ssp1_pcm_mono_out, &pxa_ssp1_pcm_mono_in,
-		&pxa_ssp1_pcm_stereo_out, &pxa_ssp1_pcm_stereo_in,
-	},
-	{
-		&pxa_ssp2_pcm_mono_out, &pxa_ssp2_pcm_mono_in,
-		&pxa_ssp2_pcm_stereo_out, &pxa_ssp2_pcm_stereo_in,
-	},
-	{
-		&pxa_ssp3_pcm_mono_out, &pxa_ssp3_pcm_mono_in,
-		&pxa_ssp3_pcm_stereo_out, &pxa_ssp3_pcm_stereo_in,
-	},
-	{
-		&pxa_ssp4_pcm_mono_out, &pxa_ssp4_pcm_mono_in,
-		&pxa_ssp4_pcm_stereo_out, &pxa_ssp4_pcm_stereo_in,
-	},
+struct pxa2xx_pcm_dma_data {
+	struct pxa2xx_pcm_dma_params params;
+	char name[20];
 };
 
+static struct pxa2xx_pcm_dma_params *
+ssp_get_dma_params(struct ssp_device *ssp, int stereo, int out)
+{
+	struct pxa2xx_pcm_dma_data *dma;
+
+	dma = kzalloc(sizeof(struct pxa2xx_pcm_dma_data), GFP_KERNEL);
+	if (dma == NULL)
+		return NULL;
+
+	snprintf(dma->name, 20, "SSP%d PCM %s %s", ssp->port_id,
+			stereo ? "Stereo" : "Mono", out ? "out" : "in");
+
+	dma->params.name = dma->name;
+	dma->params.drcmr = &DRCMR(out ? ssp->drcmr_tx : ssp->drcmr_rx);
+	dma->params.dcmd = (out ? (DCMD_INCSRCADDR | DCMD_FLOWTRG) :
+				  (DCMD_INCTRGADDR | DCMD_FLOWSRC)) |
+			(stereo ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16;
+	dma->params.dev_addr = ssp->phys_base + SSDR;
+
+	return &dma->params;
+}
+
 static int pxa_ssp_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
@@ -227,6 +102,11 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream,
 		clk_enable(priv->dev.ssp->clk);
 		ssp_disable(&priv->dev);
 	}
+
+	if (cpu_dai->dma_data) {
+		kfree(cpu_dai->dma_data);
+		cpu_dai->dma_data = NULL;
+	}
 	return ret;
 }
 
@@ -241,6 +121,11 @@ static void pxa_ssp_shutdown(struct snd_pcm_substream *substream,
 		ssp_disable(&priv->dev);
 		clk_disable(priv->dev.ssp->clk);
 	}
+
+	if (cpu_dai->dma_data) {
+		kfree(cpu_dai->dma_data);
+		cpu_dai->dma_data = NULL;
+	}
 }
 
 #ifdef CONFIG_PM
@@ -653,25 +538,23 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
 	struct ssp_priv *priv = cpu_dai->private_data;
 	struct ssp_device *ssp = priv->dev.ssp;
-	int dma = 0, chn = params_channels(params);
+	int chn = params_channels(params);
 	u32 sscr0;
 	u32 sspsp;
 	int width = snd_pcm_format_physical_width(params_format(params));
 	int ttsa = ssp_read_reg(ssp, SSTSA) & 0xf;
 
-	/* select correct DMA params */
-	if (substream->stream != SNDRV_PCM_STREAM_PLAYBACK)
-		dma = 1; /* capture DMA offset is 1,3 */
+	/* generate correct DMA params */
+	if (cpu_dai->dma_data)
+		kfree(cpu_dai->dma_data);
+
 	/* Network mode with one active slot (ttsa == 1) can be used
 	 * to force 16-bit frame width on the wire (for S16_LE), even
 	 * with two channels. Use 16-bit DMA transfers for this case.
 	 */
-	if (((chn == 2) && (ttsa != 1)) || (width == 32))
-		dma += 2; /* 32-bit DMA offset is 2, 16-bit is 0 */
-
-	cpu_dai->dma_data = ssp_dma_params[cpu_dai->id][dma];
-
-	dev_dbg(&ssp->pdev->dev, "pxa_ssp_hw_params: dma %d\n", dma);
+	cpu_dai->dma_data = ssp_get_dma_params(ssp,
+			((chn == 2) && (ttsa != 1)) || (width == 32),
+			substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
 
 	/* we can only change the settings if the port is not in use */
 	if (ssp_read_reg(ssp, SSCR0) & SSCR0_SSE)
-- 
cgit v0.10.2


From 8eb9feabe566d8272510d5fb33f55a72e3ab3ce4 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Thu, 23 Apr 2009 17:57:46 +0800
Subject: ASoC: change stereo/mono to 32-bit/16-bit for pxa-ssp

The original idea came from pHilipp, and this makes the code looks
more consistent.

Signed-off-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index fb8cacc..6fc7876 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -67,7 +67,7 @@ struct pxa2xx_pcm_dma_data {
 };
 
 static struct pxa2xx_pcm_dma_params *
-ssp_get_dma_params(struct ssp_device *ssp, int stereo, int out)
+ssp_get_dma_params(struct ssp_device *ssp, int width4, int out)
 {
 	struct pxa2xx_pcm_dma_data *dma;
 
@@ -76,13 +76,13 @@ ssp_get_dma_params(struct ssp_device *ssp, int stereo, int out)
 		return NULL;
 
 	snprintf(dma->name, 20, "SSP%d PCM %s %s", ssp->port_id,
-			stereo ? "Stereo" : "Mono", out ? "out" : "in");
+			width4 ? "32-bit" : "16-bit", out ? "out" : "in");
 
 	dma->params.name = dma->name;
 	dma->params.drcmr = &DRCMR(out ? ssp->drcmr_tx : ssp->drcmr_rx);
 	dma->params.dcmd = (out ? (DCMD_INCSRCADDR | DCMD_FLOWTRG) :
 				  (DCMD_INCTRGADDR | DCMD_FLOWSRC)) |
-			(stereo ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16;
+			(width4 ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16;
 	dma->params.dev_addr = ssp->phys_base + SSDR;
 
 	return &dma->params;
-- 
cgit v0.10.2


From 31a00c6b7c0c4f01be49f02660de920c8b82b613 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 23 Apr 2009 14:36:48 +0300
Subject: ASoC: OMAP: Add 4 channel support to mcbsp

Add 4 channel support to omap-mcbsp.
This mode is going to be used by the twl4030 codec, when it
is configured in Option1 mode.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 495192a..a5d46a7 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -259,6 +259,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 			regs->xcr2	|= XFRLEN2(wpf - 1);
 		}
 	case 1:
+	case 4:
 		/* Set word per (McBSP) frame for phase1 */
 		regs->rcr1	|= RFRLEN1(wpf - 1);
 		regs->xcr1	|= XFRLEN1(wpf - 1);
@@ -506,13 +507,13 @@ static struct snd_soc_dai_ops omap_mcbsp_dai_ops = {
 	.id = (link_id),					\
 	.playback = {						\
 		.channels_min = 1,				\
-		.channels_max = 2,				\
+		.channels_max = 4,				\
 		.rates = OMAP_MCBSP_RATES,			\
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,		\
 	},							\
 	.capture = {						\
 		.channels_min = 1,				\
-		.channels_max = 2,				\
+		.channels_max = 4,				\
 		.rates = OMAP_MCBSP_RATES,			\
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,		\
 	},							\
-- 
cgit v0.10.2


From 8a1f936acdfd53cb0a981f3f80483863dcd84fa9 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 23 Apr 2009 14:36:49 +0300
Subject: ASoC: TWL4030: Add 4 channel TDM support

Support for 4 channel TDM (SND_SOC_DAIFMT_DSP_A) for twl4030
codec.
The channel allocations are:
Playback:
TDM         i2s   TWL RX
Channel 1   Left  SDRL2
Channel 3   Right SDRR2
Channel 2   --    SDRL1
Channel 4   --    SDRR1

Capture:
TDM         i2s   TWL TX
Channel 1   Left  TXL1
Channel 3   Right TXR1
Channel 2   --    TXL2
Channel 4   --    TXR2

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index fdf88df..e23c20c 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1251,6 +1251,28 @@ static void twl4030_constraints(struct twl4030_priv *twl4030,
 				twl4030->channels);
 }
 
+/* In case of 4 channel mode, the RX1 L/R for playback and the TX2 L/R for
+ * capture has to be enabled/disabled. */
+static void twl4030_tdm_enable(struct snd_soc_codec *codec, int direction,
+				int enable)
+{
+	u8 reg, mask;
+
+	reg = twl4030_read_reg_cache(codec, TWL4030_REG_OPTION);
+
+	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
+		mask = TWL4030_ARXL1_VRX_EN | TWL4030_ARXR1_EN;
+	else
+		mask = TWL4030_ATXL2_VTXL_EN | TWL4030_ATXR2_VTXR_EN;
+
+	if (enable)
+		reg |= mask;
+	else
+		reg &= ~mask;
+
+	twl4030_write(codec, TWL4030_REG_OPTION, reg);
+}
+
 static int twl4030_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
@@ -1267,6 +1289,15 @@ static int twl4030_startup(struct snd_pcm_substream *substream,
 		if (twl4030->configured)
 			twl4030_constraints(twl4030, twl4030->master_substream);
 	} else {
+		if (!(twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) &
+			TWL4030_OPTION_1)) {
+			/* In option2 4 channel is not supported, set the
+			 * constraint for the first stream for channels, the
+			 * second stream will 'inherit' this cosntraint */
+			snd_pcm_hw_constraint_minmax(substream->runtime,
+						SNDRV_PCM_HW_PARAM_CHANNELS,
+						2, 2);
+		}
 		twl4030->master_substream = substream;
 	}
 
@@ -1292,6 +1323,10 @@ static void twl4030_shutdown(struct snd_pcm_substream *substream,
 		twl4030->configured = 0;
 	 else if (!twl4030->master_substream->runtime->channels)
 		twl4030->configured = 0;
+
+	 /* If the closing substream had 4 channel, do the necessary cleanup */
+	if (substream->runtime->channels == 4)
+		twl4030_tdm_enable(codec, substream->stream, 0);
 }
 
 static int twl4030_hw_params(struct snd_pcm_substream *substream,
@@ -1304,6 +1339,16 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream,
 	struct twl4030_priv *twl4030 = codec->private_data;
 	u8 mode, old_mode, format, old_format;
 
+	 /* If the substream has 4 channel, do the necessary setup */
+	if (params_channels(params) == 4) {
+		/* Safety check: are we in the correct operating mode? */
+		if ((twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) &
+			TWL4030_OPTION_1))
+			twl4030_tdm_enable(codec, substream->stream, 1);
+		else
+			return -EINVAL;
+	}
+
 	if (twl4030->configured)
 		/* Ignoring hw_params for already configured DAI */
 		return 0;
@@ -1461,6 +1506,9 @@ static int twl4030_set_dai_fmt(struct snd_soc_dai *codec_dai,
 	case SND_SOC_DAIFMT_I2S:
 		format |= TWL4030_AIF_FORMAT_CODEC;
 		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		format |= TWL4030_AIF_FORMAT_TDM;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1642,13 +1690,13 @@ struct snd_soc_dai twl4030_dai[] = {
 	.playback = {
 		.stream_name = "Playback",
 		.channels_min = 2,
-		.channels_max = 2,
+		.channels_max = 4,
 		.rates = TWL4030_RATES | SNDRV_PCM_RATE_96000,
 		.formats = TWL4030_FORMATS,},
 	.capture = {
 		.stream_name = "Capture",
 		.channels_min = 2,
-		.channels_max = 2,
+		.channels_max = 4,
 		.rates = TWL4030_RATES,
 		.formats = TWL4030_FORMATS,},
 	.ops = &twl4030_dai_ops,
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index 981ec60..3441115 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -116,6 +116,17 @@
 #define TWL4030_OPTION_1		(1 << 0)
 #define TWL4030_OPTION_2		(0 << 0)
 
+/* TWL4030_OPTION (0x02) Fields */
+
+#define TWL4030_ATXL1_EN		(1 << 0)
+#define TWL4030_ATXR1_EN		(1 << 1)
+#define TWL4030_ATXL2_VTXL_EN		(1 << 2)
+#define TWL4030_ATXR2_VTXR_EN		(1 << 3)
+#define TWL4030_ARXL1_VRX_EN		(1 << 4)
+#define TWL4030_ARXR1_EN		(1 << 5)
+#define TWL4030_ARXL2_EN		(1 << 6)
+#define TWL4030_ARXR2_EN		(1 << 7)
+
 /* TWL4030_REG_MICBIAS_CTL (0x04) Fields */
 
 #define TWL4030_MICBIAS2_CTL		0x40
-- 
cgit v0.10.2


From 9be24414aad047dcf9d8d2a9a929321536c7ebec Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 26 Mar 2009 10:25:24 -0400
Subject: tracing/wakeup: move access to wakeup_cpu into spinlock

The code had the following outside the lock:

        if (next != wakeup_task)
                return;

        pc = preempt_count();

        /* The task we are waiting for is waking up */
        data = wakeup_trace->data[wakeup_cpu];

On initialization, wakeup_task is NULL and wakeup_cpu -1. This code
is not under a lock. If wakeup_task is set on another CPU as that
task is waking up, we can see the wakeup_task before wakeup_cpu is
set. If we read wakeup_cpu while it is still -1 then we will have
a bad data pointer.

This patch moves the reading of wakeup_cpu within the protection of
the spinlock used to protect the writing of wakeup_cpu and wakeup_task.

[ Impact: remove possible race causing invalid pointer dereference ]

Reported-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index b8b13c5..eacb272 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 
 	pc = preempt_count();
 
-	/* The task we are waiting for is waking up */
-	data = wakeup_trace->data[wakeup_cpu];
-
 	/* disable local data, not wakeup_cpu data */
 	cpu = raw_smp_processor_id();
 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
 
+	/* The task we are waiting for is waking up */
+	data = wakeup_trace->data[wakeup_cpu];
+
 	trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
 	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
 
-- 
cgit v0.10.2


From 89ec0dee9eba6275d47be0b878cf5f6d5c2fb6eb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 26 Mar 2009 11:03:29 -0400
Subject: tracing: increase size of number of possible events

With the new event tracing registration, we must increase the number
of events that can be registered. Currently the type field is only
one byte, which leaves us only 256 possible events.

Since we do not save the CPU number in the tracer anymore (it is determined
by the per cpu ring buffer that is used) we have an extra byte to use.

This patch increases the size of type from 1 byte (256 events) to
2 bytes (65,536 events).

It also adds a WARN_ON_ONCE if we exceed that limit.

[ Impact: allow more than 255 events ]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2a4a407..07e0a6d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -16,13 +16,16 @@ struct dentry;
  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  */
 struct trace_entry {
-	int			type;
+	unsigned short		type;
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
 	int			tgid;
 };
 
+#define FTRACE_MAX_EVENT						\
+	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
+
 /*
  * Trace iterator - used by printout routines who present trace
  * results to users and which routines might sleep, etc:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5d6e879..9887131 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
-				FIELD(int, type),
+				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
 				FIELD(int, pid),
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 83a8abb..06997e7 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -537,6 +537,8 @@ int register_ftrace_event(struct trace_event *event)
  out:
 	mutex_unlock(&trace_event_mutex);
 
+	WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_event);
-- 
cgit v0.10.2


From 75db37d2f4c0ad9466ead57d467277d097b4105c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 26 Mar 2009 11:43:36 -0400
Subject: tracing: add size checks for exported ftrace internal structures

The events exported by TRACE_EVENT are automated and are guaranteed
to be correct when used.

The internal ftrace structures on the other hand are more manually
exported. These require the ftrace maintainer to make sure they
are up to date.

This patch adds a size check to help flag when a type changes in
an internal ftrace data structure, and the update needs to be reflected
in the export.

If a export is incorrect, then the only harm is that the user space
tools will not know how to correctly read the internal structures of
ftrace.

[ Impact: help prevent inconsistent ftrace format print outs ]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9887131..b920815 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -381,8 +381,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
+extern char *__bad_type_size(void);
+
 #undef FIELD
 #define FIELD(type, name)						\
+	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
 	#type, "common_" #name, offsetof(typeof(field), name),		\
 		sizeof(field.name)
 
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 48fc02f..0cb1a14 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -19,8 +19,12 @@
 #undef TRACE_STRUCT
 #define TRACE_STRUCT(args...) args
 
+extern void __bad_type_size(void);
+
 #undef TRACE_FIELD
 #define TRACE_FIELD(type, item, assign)					\
+	if (sizeof(type) != sizeof(field.item))				\
+		__bad_type_size();					\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
 			       "offset:%u;\tsize:%u;\n",		\
 			       (unsigned int)offsetof(typeof(field), item), \
-- 
cgit v0.10.2


From d7285c6b5c54397fdf112c2fb98ee43193173aa9 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Thu, 23 Apr 2009 10:21:38 -0700
Subject: x86: use native register access for native tlb flushing

currently these are paravirtulaized, doesn't appear any callers rely on
this (no pv_ops backends are using native_tlb and overriding cr3/4
access).

[ Impact: fix lockdep warning with paravirt and function tracer ]

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
LKML-Reference: <20090423172138.GR3036@sequoia.sous-sol.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index d3539f9..e2927c5 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,7 +17,7 @@
 
 static inline void __native_flush_tlb(void)
 {
-	write_cr3(read_cr3());
+	native_write_cr3(native_read_cr3());
 }
 
 static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
 	 */
 	raw_local_irq_save(flags);
 
-	cr4 = read_cr4();
+	cr4 = native_read_cr4();
 	/* clear PGE */
-	write_cr4(cr4 & ~X86_CR4_PGE);
+	native_write_cr4(cr4 & ~X86_CR4_PGE);
 	/* write old PGE again and flush TLBs */
-	write_cr4(cr4);
+	native_write_cr4(cr4);
 
 	raw_local_irq_restore(flags);
 }
-- 
cgit v0.10.2


From c2518c4366f087ebc10b3919cb2461bbe4f42d0c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 23 Apr 2009 23:26:18 -0400
Subject: tracing: fix cut and paste macro error

In case a module uses the TRACE_EVENT macro for creating automated
events in ftrace, it may choose to use a different file name
than the defined system name, or choose to use a different path than
the default "include/trace/events" include path.

If this is done, then before including trace/define_trace.h the
header would define either "TRACE_INCLUDE_FILE" for the file
name or "TRACE_INCLUDE_PATH" for the include path.

If it does not define these, then the define_trace.h defines them
instead. If define trace defines them, then define_trace.h should
also undefine them before exiting. To do this a macro is used
to note this:

 #ifndef TRACE_INCLUDE_FILE
 # define TRACE_INCLUDE_FILE TRACE_SYSTEM
 # define UNDEF_TRACE_INCLUDE_FILE
 #endif

[...]

 #ifdef UNDEF_TRACE_INCLUDE_FILE
 # undef TRACE_INCLUDE_FILE
 # undef UNDEF_TRACE_INCLUDE_FILE
 #endif

The UNDEF_TRACE_INCLUDE_FILE acts as a CPP variable to know to undef
the TRACE_INCLUDE_FILE before leaving define_trace.h.

Unfortunately, due to cut and paste errors, the macros between
FILE and PATH got mixed up.

[ Impact: undef TRACE_INCLUDE_FILE and/or TRACE_INCLUDE_PATH when needed ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 7f1f23d..abc611f 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -44,7 +44,7 @@
 
 #ifndef TRACE_INCLUDE_PATH
 # define __TRACE_INCLUDE(system) <trace/events/system.h>
-# define UNDEF_TRACE_INCLUDE_FILE
+# define UNDEF_TRACE_INCLUDE_PATH
 #else
 # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
 #endif
@@ -64,13 +64,13 @@
 
 /* Only undef what we defined in this file */
 #ifdef UNDEF_TRACE_INCLUDE_FILE
-# undef TRACE_INCLUDE_PATH
+# undef TRACE_INCLUDE_FILE
 # undef UNDEF_TRACE_INCLUDE_FILE
 #endif
 
-#ifdef UNDEF_TRACE_INCLUDE_FILE
+#ifdef UNDEF_TRACE_INCLUDE_PATH
 # undef TRACE_INCLUDE_PATH
-# undef UNDEF_TRACE_INCLUDE_FILE
+# undef UNDEF_TRACE_INCLUDE_PATH
 #endif
 
 /* We may be processing more files */
-- 
cgit v0.10.2


From 334d4169a6592d3fcd863bbe822a8f6985ffa9af Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 24 Apr 2009 11:27:05 +0800
Subject: ring_buffer: compressed event header

RB_MAX_SMALL_DATA = 28bytes is too small for most tracers, it wastes
an 'u32' to save the actually length for events which data size > 28.

This fix uses compressed event header and enlarges RB_MAX_SMALL_DATA.

[ Impact: saves about 0%-12.5%(depends on tracer) memory in ring_buffer ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <49F13189.3090000@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index fac8f1a..1c2f809 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -11,7 +11,7 @@ struct ring_buffer_iter;
  * Don't refer to this struct directly, use functions below.
  */
 struct ring_buffer_event {
-	u32		type:2, len:3, time_delta:27;
+	u32		type_len:5, time_delta:27;
 	u32		array[];
 };
 
@@ -24,7 +24,8 @@ struct ring_buffer_event {
  *				  size is variable depending on how much
  *				  padding is needed
  *				 If time_delta is non zero:
- *				  everything else same as RINGBUF_TYPE_DATA
+ *				  array[0] holds the actual length
+ *				  size = 4 + length (bytes)
  *
  * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
  *				 array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@ struct ring_buffer_event {
  *				 array[1..2] = tv_sec
  *				 size = 16 bytes
  *
- * @RINGBUF_TYPE_DATA:		Data record
- *				 If len is zero:
+ * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
+ *				Data record
+ *				 If type_len is zero:
  *				  array[0] holds the actual length
  *				  array[1..(length+3)/4] holds data
- *				  size = 4 + 4 + length (bytes)
+ *				  size = 4 + length (bytes)
  *				 else
- *				  length = len << 2
+ *				  length = type_len << 2
  *				  array[0..(length+3)/4-1] holds data
  *				  size = 4 + length (bytes)
  */
 enum ring_buffer_type {
+	RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
 	RINGBUF_TYPE_PADDING,
 	RINGBUF_TYPE_TIME_EXTEND,
 	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
 	RINGBUF_TYPE_TIME_STAMP,
-	RINGBUF_TYPE_DATA,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 61dbdf2..9692f10 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -28,8 +28,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 {
 	int ret;
 
-	ret = trace_seq_printf(s, "\ttype        :    2 bits\n");
-	ret = trace_seq_printf(s, "\tlen         :    3 bits\n");
+	ret = trace_seq_printf(s, "# compressed entry header\n");
+	ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
 	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
 	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
 	ret = trace_seq_printf(s, "\n");
@@ -37,8 +37,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 			       RINGBUF_TYPE_PADDING);
 	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
 			       RINGBUF_TYPE_TIME_EXTEND);
-	ret = trace_seq_printf(s, "\tdata        : type == %d\n",
-			       RINGBUF_TYPE_DATA);
+	ret = trace_seq_printf(s, "\tdata max type_len  == %d\n",
+			       RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 
 	return ret;
 }
@@ -204,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT		4U
-#define RB_MAX_SMALL_DATA	28
+#define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+
+/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
+#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
 enum {
 	RB_LEN_TIME_EXTEND = 8,
@@ -213,17 +216,18 @@ enum {
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+	return event->type_len == RINGBUF_TYPE_PADDING
+			&& event->time_delta == 0;
 }
 
 static inline int rb_discarded_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+	return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
 }
 
 static void rb_event_set_padding(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
+	event->type_len = RINGBUF_TYPE_PADDING;
 	event->time_delta = 0;
 }
 
@@ -232,8 +236,8 @@ rb_event_data_length(struct ring_buffer_event *event)
 {
 	unsigned length;
 
-	if (event->len)
-		length = event->len * RB_ALIGNMENT;
+	if (event->type_len)
+		length = event->type_len * RB_ALIGNMENT;
 	else
 		length = event->array[0];
 	return length + RB_EVNT_HDR_SIZE;
@@ -243,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event)
 static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			/* undefined */
 			return -1;
-		return rb_event_data_length(event);
+		return  event->array[0] + RB_EVNT_HDR_SIZE;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
 		return RB_LEN_TIME_EXTEND;
@@ -272,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event)
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
 	unsigned length = rb_event_length(event);
-	if (event->type != RINGBUF_TYPE_DATA)
+	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 		return length;
 	length -= RB_EVNT_HDR_SIZE;
 	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -285,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
-	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 	/* If length is in len field, then array[0] has the data */
-	if (event->len)
+	if (event->type_len)
 		return (void *)&event->array[0];
 	/* Otherwise length is in array[0] and array[1] has the data */
 	return (void *)&event->array[1];
@@ -988,7 +992,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 			return;
 		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
+		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
 		cpu_buffer->overrun++;
 		cpu_buffer->entries--;
@@ -1133,28 +1137,21 @@ static void
 rb_update_event(struct ring_buffer_event *event,
 			 unsigned type, unsigned length)
 {
-	event->type = type;
+	event->type_len = type;
 
 	switch (type) {
 
 	case RINGBUF_TYPE_PADDING:
-		break;
-
 	case RINGBUF_TYPE_TIME_EXTEND:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
-		break;
-
 	case RINGBUF_TYPE_TIME_STAMP:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
 		break;
 
-	case RINGBUF_TYPE_DATA:
+	case 0:
 		length -= RB_EVNT_HDR_SIZE;
-		if (length > RB_MAX_SMALL_DATA) {
-			event->len = 0;
+		if (length > RB_MAX_SMALL_DATA)
 			event->array[0] = length;
-		} else
-			event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+		else
+			event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 		break;
 	default:
 		BUG();
@@ -1562,7 +1559,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	if (length > BUF_PAGE_SIZE)
 		goto out;
 
-	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	event = rb_reserve_next_event(cpu_buffer, 0, length);
 	if (!event)
 		goto out;
 
@@ -1634,7 +1631,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
+	/* array[0] holds the actual length for the discarded event */
+	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+	event->type_len = RINGBUF_TYPE_PADDING;
 	/* time delta must be non zero */
 	if (!event->time_delta)
 		event->time_delta = 1;
@@ -1786,8 +1785,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 		goto out;
 
 	event_length = rb_calculate_event_length(length);
-	event = rb_reserve_next_event(cpu_buffer,
-				      RINGBUF_TYPE_DATA, event_length);
+	event = rb_reserve_next_event(cpu_buffer, 0, event_length);
 	if (!event)
 		goto out;
 
@@ -2035,7 +2033,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -2066,7 +2064,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -2181,7 +2179,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 
 	event = rb_reader_event(cpu_buffer);
 
-	if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
+	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
+			|| rb_discarded_event(event))
 		cpu_buffer->entries--;
 
 	rb_update_read_stamp(cpu_buffer, event);
@@ -2262,7 +2261,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
 	event = rb_reader_event(cpu_buffer);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			RB_WARN_ON(cpu_buffer, 1);
@@ -2334,7 +2333,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
 	event = rb_iter_head_event(iter);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event)) {
 			rb_inc_iter(iter);
@@ -2393,7 +2392,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	event = rb_buffer_peek(buffer, cpu, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2421,7 +2420,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	event = rb_iter_peek(iter, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2466,7 +2465,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  out:
 	preempt_enable();
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2559,7 +2558,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  out:
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2766,7 +2765,7 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
 		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 			return;
 		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
+		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
 		cpu_buffer->entries--;
 	}
-- 
cgit v0.10.2


From 0cfcdedaddf2468cb53e3cff9c3abfef14b4d784 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Thu, 23 Apr 2009 21:46:19 +0200
Subject: ALSA: sc6000: fix older card initialization

The last patch to handle newer cards like SC7000
broke initialization of the SC6000. Fix this.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index 983ab7e..c803b2e 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -391,7 +391,6 @@ static int __devinit sc6000_init_board(char __iomem *vport,
 	int config = mss_config |
 		     sc6000_mpu_irq_to_softcfg(mpu_irq[dev]);
 	int err;
-	int cfg[2];
 	int old = 0;
 
 	err = sc6000_dsp_reset(vport);
@@ -421,11 +420,18 @@ static int __devinit sc6000_init_board(char __iomem *vport,
 		answer, version[0], version[1]);
 
 	/* set configuration */
-	sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev],
-			     mss_port[dev]);
-	if (sc6000_hw_cfg_write(vport, cfg) < 0) {
-		snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n");
-		return -EIO;
+	sc6000_write(vport, COMMAND_5C);
+	if (sc6000_read(vport) < 0)
+		old = 1;
+
+	if (!old) {
+		int cfg[2];
+		sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev],
+				     mss_port[dev]);
+		if (sc6000_hw_cfg_write(vport, cfg) < 0) {
+			snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n");
+			return -EIO;
+		}
 	}
 	err = sc6000_setup_board(vport, config);
 	if (err < 0) {
@@ -434,10 +440,6 @@ static int __devinit sc6000_init_board(char __iomem *vport,
 	}
 
 	sc6000_dsp_reset(vport);
-	sc6000_write(vport, COMMAND_5C);
-	if (sc6000_read(vport) < 0)
-		old = 1;
-	sc6000_dsp_reset(vport);
 
 	if (!old) {
 		sc6000_write(vport, COMMAND_60);
-- 
cgit v0.10.2


From 3e98f9f15e916c48dfc5231d7e6a59be7f122764 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 24 Apr 2009 15:39:39 +0900
Subject: sh: pci: Fix up the build for CONFIG_PCI=n.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index f9101215..5b2e0fc 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -88,6 +88,7 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
 #endif
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -95,6 +96,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 #ifdef CONFIG_SUPERH32
 /*
-- 
cgit v0.10.2


From 782cc5ae6331d63b4febaa312c9d14493aafa9b8 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 24 Apr 2009 09:43:09 +0200
Subject: x86, ds: fix buffer alignment in debug store selftest

The debug store selftest code uses a stack-allocated buffer, which is
not necessarily correctly aligned.

For tests using a buffer to hold a single entry, the buffer that is
passed to ds_request must already be suitably aligned.

Pass a suitably aligned portion of the bigger buffer.

[ Impact: fix hw-branch-tracer self-test failure ]

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: markus.t.metzger@gmail.com
LKML-Reference: <20090424094309.A30145@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
index 5f104a0..6bc7c19 100644
--- a/arch/x86/kernel/ds_selftest.c
+++ b/arch/x86/kernel/ds_selftest.c
@@ -323,13 +323,15 @@ static int ds_selftest_bts_bad_request_task(void *buffer)
 int ds_selftest_bts(void)
 {
 	struct ds_selftest_bts_conf conf;
-	unsigned char buffer[BUFFER_SIZE];
+	unsigned char buffer[BUFFER_SIZE], *small_buffer;
 	unsigned long irq;
 	int cpu;
 
 	printk(KERN_INFO "[ds] bts selftest...");
 	conf.error = 0;
 
+	small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
+
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		conf.suspend = ds_suspend_bts_wrap;
@@ -381,7 +383,7 @@ int ds_selftest_bts(void)
 	conf.suspend = ds_suspend_bts_noirq;
 	conf.resume = ds_resume_bts_noirq;
 	conf.tracer =
-		ds_request_bts_task(current, buffer, SMALL_BUFFER_SIZE,
+		ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
 				   NULL, (size_t)-1, BTS_KERNEL);
 	local_irq_save(irq);
 	ds_selftest_bts_cpu(&conf);
-- 
cgit v0.10.2


From 7e0bfad24d85de7cf2202a7b0ce51de11a077b21 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 24 Apr 2009 09:44:48 +0200
Subject: x86, bts: reenable ptrace branch trace support

The races found by Oleg Nesterov have been fixed.

Reenable branch trace support.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <20090424094448.A30216@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8130334..924e156 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -506,7 +506,6 @@ config X86_PTRACE_BTS
 	bool "Branch Trace Store"
 	default y
 	depends on X86_DEBUGCTLMSR
-	depends on BROKEN
 	---help---
 	  This adds a ptrace interface to the hardware's branch trace store.
 
-- 
cgit v0.10.2


From 1cb81b143fa8f0e4629f10690862e2e52ca792ff Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 24 Apr 2009 09:51:43 +0200
Subject: x86, bts, mm: clean up buffer allocation

The current mm interface is asymetric. One function allocates a locked
buffer, another function only refunds the memory.

Change this to have two functions for accounting and refunding locked
memory, respectively; and do the actual buffer allocation in ptrace.

[ Impact: refactor BTS buffer allocation code ]

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090424095143.A30265@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d5252ae..09ecbde 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -617,17 +617,28 @@ struct bts_context {
 	struct work_struct	work;
 };
 
-static inline void alloc_bts_buffer(struct bts_context *context,
-				    unsigned int size)
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
 {
-	void *buffer;
+	void *buffer = NULL;
+	int err = -ENOMEM;
 
-	buffer = alloc_locked_buffer(size);
-	if (buffer) {
-		context->buffer = buffer;
-		context->size = size;
-		context->mm = get_task_mm(current);
-	}
+	err = account_locked_memory(current->mm, current->signal->rlim, size);
+	if (err < 0)
+		return err;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto out_refund;
+
+	context->buffer = buffer;
+	context->size = size;
+	context->mm = get_task_mm(current);
+
+	return 0;
+
+ out_refund:
+	refund_locked_memory(current->mm, size);
+	return err;
 }
 
 static inline void free_bts_buffer(struct bts_context *context)
@@ -638,7 +649,7 @@ static inline void free_bts_buffer(struct bts_context *context)
 	kfree(context->buffer);
 	context->buffer = NULL;
 
-	refund_locked_buffer_memory(context->mm, context->size);
+	refund_locked_memory(context->mm, context->size);
 	context->size = 0;
 
 	mmput(context->mm);
@@ -786,13 +797,15 @@ static int ptrace_bts_config(struct task_struct *child,
 	context->tracer = NULL;
 
 	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+		int err;
+
 		free_bts_buffer(context);
 		if (!cfg.size)
 			return 0;
 
-		alloc_bts_buffer(context, cfg.size);
-		if (!context->buffer)
-			return -ENOMEM;
+		err = alloc_bts_buffer(context, cfg.size);
+		if (err < 0)
+			return err;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a3963ba..009eabd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@ struct anon_vma;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
+struct rlimit;
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -1319,7 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page,
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
-extern void *alloc_locked_buffer(size_t size);
-extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
+extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+				 size_t size);
+extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 28be15e..ac13043 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -629,38 +629,36 @@ void user_shm_unlock(size_t size, struct user_struct *user)
 	free_uid(user);
 }
 
-void *alloc_locked_buffer(size_t size)
+int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+			  size_t size)
 {
-	unsigned long rlim, vm, pgsz;
-	void *buffer = NULL;
+	unsigned long lim, vm, pgsz;
+	int error = -ENOMEM;
 
 	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
-
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->total_vm + pgsz;
-	if (rlim < vm)
-		goto out;
+	down_write(&mm->mmap_sem);
 
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->locked_vm + pgsz;
-	if (rlim < vm)
+	lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->total_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	buffer = kzalloc(size, GFP_KERNEL);
-	if (!buffer)
+	lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->locked_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	current->mm->total_vm  += pgsz;
-	current->mm->locked_vm += pgsz;
+	mm->total_vm  += pgsz;
+	mm->locked_vm += pgsz;
 
+	error = 0;
  out:
-	up_write(&current->mm->mmap_sem);
-	return buffer;
+	up_write(&mm->mmap_sem);
+	return error;
 }
 
-void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
+void refund_locked_memory(struct mm_struct *mm, size_t size)
 {
 	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-- 
cgit v0.10.2


From 39517091f88fae32b52254b561ced78da1eaf0a7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 11:05:52 -0400
Subject: tracing/lockdep: convert lockdep to use TRACE_EVENT macro

The TRACE_FORMAT will soon be deprecated. This patch converts it to
the TRACE_EVENT macro.

Note, this change should also speed up the tracing.

[ Impact: remove a user of deprecated TRACE_FORMAT ]

Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
index 3ca315c..0e956c9 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lockdep.h
@@ -9,28 +9,64 @@
 
 #ifdef CONFIG_LOCKDEP
 
-TRACE_FORMAT(lock_acquire,
+TRACE_EVENT(lock_acquire,
+
 	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
 		int trylock, int read, int check,
 		struct lockdep_map *next_lock, unsigned long ip),
+
 	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
 
-TRACE_FORMAT(lock_release,
+	TP_STRUCT__entry(
+		__field(unsigned int, flags)
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+		  (__entry->flags & 2) ? "read " : "",
+		  __get_str(name))
+);
+
+TRACE_EVENT(lock_release,
+
 	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+
 	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
 
 #ifdef CONFIG_LOCK_STAT
 
-TRACE_FORMAT(lock_contended,
+TRACE_EVENT(lock_contended,
+
 	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
 	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
 
 TRACE_EVENT(lock_acquired,
 	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-- 
cgit v0.10.2


From 160031b556e93590fa8635210d73d93c3d3853a9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 11:26:55 -0400
Subject: tracing/irq: convert irq traces to use TRACE_EVENT macro

The TRACE_FORMAT will soon be deprecated. This patch converts it to
the TRACE_EVENT macro.

Note, this change should also speed up the tracing.

[ Impact: remove a user of deprecated TRACE_FORMAT ]

Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 75e3468..7686864 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -10,11 +10,24 @@
 /*
  * Tracepoint for entry of interrupt handler:
  */
-TRACE_FORMAT(irq_handler_entry,
+TRACE_EVENT(irq_handler_entry,
+
 	TP_PROTO(int irq, struct irqaction *action),
+
 	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
+
+	TP_STRUCT__entry(
+		__field(	int,	irq		)
+		__string(	name,	action->name	)
+	),
+
+	TP_fast_assign(
+		__entry->irq = irq;
+		__assign_str(name, action->name);
+	),
+
+	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+);
 
 /*
  * Tracepoint for return of an interrupt handler:
@@ -39,17 +52,43 @@ TRACE_EVENT(irq_handler_exit,
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
-TRACE_FORMAT(softirq_entry,
+TRACE_EVENT(softirq_entry,
+
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
 	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
 
-TRACE_FORMAT(softirq_exit,
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+		__string(	name,	softirq_to_name[h-vec]	)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+		__assign_str(name, softirq_to_name[h-vec]);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+);
+
+TRACE_EVENT(softirq_exit,
+
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
 	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+		__string(	name,	softirq_to_name[h-vec]	)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+		__assign_str(name, softirq_to_name[h-vec]);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+);
 
 #endif /*  _TRACE_IRQ_H */
 
-- 
cgit v0.10.2


From b8e65554d80b4c560d201362d0e8fa02109d89fd Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 11:50:39 -0400
Subject: tracing: remove deprecated TRACE_FORMAT

The TRACE_FORMAT macro has been deprecated by the TRACE_EVENT macro.
There are no more users. All new users must use the TRACE_EVENT macro.

[ Impact: remove old functionality ]

Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 4353f3f..14df7e6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -158,11 +158,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define PARAMS(args...) args
 
-#ifndef TRACE_FORMAT
-#define TRACE_FORMAT(name, proto, args, fmt)		\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
-#endif
-
 #ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index abc611f..f7a7ae1 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,10 +26,6 @@
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
 	DEFINE_TRACE(name)
 
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(name, proto, args, print)	\
-	DEFINE_TRACE(name)
-
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a77f71a..1e68114 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,9 +18,6 @@
 
 #include <linux/ftrace_event.h>
 
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
@@ -62,9 +59,6 @@
  *
  */
 
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
 #undef __array
 #define __array(type, item, len)
 
@@ -298,16 +292,6 @@ ftrace_define_fields_##call(void)					\
  *	unregister_trace_<call>(ftrace_event_<call>);
  * }
  *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- * }
- *
  *
  * For those macros defined with TRACE_EVENT:
  *
@@ -417,56 +401,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
 #define _TRACE_PROFILE_INIT(call)
 #endif
 
-#define _TRACE_FORMAT(call, proto, args, fmt)				\
-static void ftrace_event_##call(proto)					\
-{									\
-	event_trace_printk(_RET_IP_, #call ": " fmt);			\
-}									\
-									\
-static int ftrace_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_event_##call);			\
-}									\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static int ftrace_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(NULL);				\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	return 0;							\
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)				\
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_init_event_##call,		\
-	.regfunc		= ftrace_reg_event_##call,		\
-	.unregfunc		= ftrace_unreg_event_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
 #undef __entry
 #define __entry entry
 
-- 
cgit v0.10.2


From a8353a57299f965ca8747b1b062490aef2c9ca50 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Fri, 24 Apr 2009 11:03:21 +0300
Subject: ASoC: Beagle: Add support for 4 channel

This patch adds support for the four channel TDM mode
on Beagle board.

Depending on the channel count, the interface needs to be
configured differently (I2S for stereo DSP_A for four channels)

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c
index 6aa428e..b0cff9f 100644
--- a/sound/soc/omap/omap3beagle.c
+++ b/sound/soc/omap/omap3beagle.c
@@ -41,23 +41,33 @@ static int omap3beagle_hw_params(struct snd_pcm_substream *substream,
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	unsigned int fmt;
 	int ret;
 
+	switch (params_channels(params)) {
+	case 2: /* Stereo I2S mode */
+		fmt =	SND_SOC_DAIFMT_I2S |
+			SND_SOC_DAIFMT_NB_NF |
+			SND_SOC_DAIFMT_CBM_CFM;
+		break;
+	case 4: /* Four channel TDM mode */
+		fmt =	SND_SOC_DAIFMT_DSP_A |
+			SND_SOC_DAIFMT_IB_NF |
+			SND_SOC_DAIFMT_CBM_CFM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* Set codec DAI configuration */
-	ret = snd_soc_dai_set_fmt(codec_dai,
-				  SND_SOC_DAIFMT_I2S |
-				  SND_SOC_DAIFMT_NB_NF |
-				  SND_SOC_DAIFMT_CBM_CFM);
+	ret = snd_soc_dai_set_fmt(codec_dai, fmt);
 	if (ret < 0) {
 		printk(KERN_ERR "can't set codec DAI configuration\n");
 		return ret;
 	}
 
 	/* Set cpu DAI configuration */
-	ret = snd_soc_dai_set_fmt(cpu_dai,
-				  SND_SOC_DAIFMT_I2S |
-				  SND_SOC_DAIFMT_NB_NF |
-				  SND_SOC_DAIFMT_CBM_CFM);
+	ret = snd_soc_dai_set_fmt(cpu_dai, fmt);
 	if (ret < 0) {
 		printk(KERN_ERR "can't set cpu DAI configuration\n");
 		return ret;
-- 
cgit v0.10.2


From 7629ad24f2b3df95c8b4cd8869e3c04e1df6c442 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Fri, 24 Apr 2009 16:37:44 +0200
Subject: ASoC: add SOC_DOUBLE_EXT macro

Add a macro for double controls with special callback functions.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc.h b/include/sound/soc.h
index b1f2f88..6ab80bf 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -118,6 +118,14 @@
 	.info = snd_soc_info_volsw, \
 	.get = xhandler_get, .put = xhandler_put, \
 	.private_value = SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) }
+#define SOC_DOUBLE_EXT(xname, xreg, shift_left, shift_right, xmax, xinvert,\
+	 xhandler_get, xhandler_put) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\
+	.info = snd_soc_info_volsw, \
+	.get = xhandler_get, .put = xhandler_put, \
+	.private_value = (unsigned long)&(struct soc_mixer_control) \
+		{.reg = xreg, .shift = shift_left, .rshift = shift_right, \
+		 .max = xmax, .invert = xinvert} }
 #define SOC_SINGLE_EXT_TLV(xname, xreg, xshift, xmax, xinvert,\
 	 xhandler_get, xhandler_put, tlv_array) \
 {	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
-- 
cgit v0.10.2


From 172fd9e26200668ebaf3e1d6d09b36d5d531bfa6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 24 Apr 2009 16:33:10 +0100
Subject: ASoC: Fix S3C64xx IIS device registration and support both ports

The S3C64xx IIS code had a number of problems with device registration.
The hardware has two IIS ports of which the driver supported only one
at once via a single exported DAI, attempting to identify the DAI to
use based on the dev->id of the ASoC platform device.  As well as
limiting the driver to only supporting one IIS port at once this also
meant that the ID of the soc-audio device (or in future the card device)
had to match the IIS ID.

Fix both problems by converting the driver to register the DAIs based on
probing of platform devices registered by the arch/arm code, using those
platform devices to interact with the clock API.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index 33c5de7..a84c4be 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -120,36 +120,8 @@ EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clockrate);
 static int s3c64xx_i2s_probe(struct platform_device *pdev,
 			     struct snd_soc_dai *dai)
 {
-	struct device *dev = &pdev->dev;
-	struct s3c_i2sv2_info *i2s;
-	int ret;
-
-	dev_dbg(dev, "%s: probing dai %d\n", __func__, pdev->id);
-
-	if (pdev->id < 0 || pdev->id > ARRAY_SIZE(s3c64xx_i2s)) {
-		dev_err(dev, "id %d out of range\n", pdev->id);
-		return -EINVAL;
-	}
-
-	i2s = &s3c64xx_i2s[pdev->id];
-
-	ret = s3c_i2sv2_probe(pdev, dai, i2s,
-			      pdev->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0);
-	if (ret)
-		return ret;
-
-	i2s->dma_capture = &s3c64xx_i2s_pcm_stereo_in[pdev->id];
-	i2s->dma_playback = &s3c64xx_i2s_pcm_stereo_out[pdev->id];
-
-	i2s->iis_cclk = clk_get(dev, "audio-bus");
-	if (IS_ERR(i2s->iis_cclk)) {
-		dev_err(dev, "failed to get audio-bus");
-		iounmap(i2s->regs);
-		return -ENODEV;
-	}
-
 	/* configure GPIO for i2s port */
-	switch (pdev->id) {
+	switch (dai->id) {
 	case 0:
 		s3c_gpio_cfgpin(S3C64XX_GPD(0), S3C64XX_GPD0_I2S0_CLK);
 		s3c_gpio_cfgpin(S3C64XX_GPD(1), S3C64XX_GPD1_I2S0_CDCLK);
@@ -181,35 +153,114 @@ static struct snd_soc_dai_ops s3c64xx_i2s_dai_ops = {
 	.set_sysclk	= s3c64xx_i2s_set_sysclk,	
 };
 
-struct snd_soc_dai s3c64xx_i2s_dai = {
-	.name		= "s3c64xx-i2s",
-	.id		= 0,
-	.probe		= s3c64xx_i2s_probe,
-	.playback = {
-		.channels_min	= 2,
-		.channels_max	= 2,
-		.rates		= S3C64XX_I2S_RATES,
-		.formats	= S3C64XX_I2S_FMTS,
+struct snd_soc_dai s3c64xx_i2s_dai[] = {
+	{
+		.name		= "s3c64xx-i2s",
+		.id		= 0,
+		.probe		= s3c64xx_i2s_probe,
+		.playback = {
+			.channels_min	= 2,
+			.channels_max	= 2,
+			.rates		= S3C64XX_I2S_RATES,
+			.formats	= S3C64XX_I2S_FMTS,
+		},
+		.capture = {
+			 .channels_min	= 2,
+			 .channels_max	= 2,
+			 .rates		= S3C64XX_I2S_RATES,
+			 .formats	= S3C64XX_I2S_FMTS,
+		 },
+		.ops = &s3c64xx_i2s_dai_ops,
 	},
-	.capture = {
-		.channels_min	= 2,
-		.channels_max	= 2,
-		.rates		= S3C64XX_I2S_RATES,
-		.formats	= S3C64XX_I2S_FMTS,
+	{
+		.name		= "s3c64xx-i2s",
+		.id		= 1,
+		.probe		= s3c64xx_i2s_probe,
+		.playback = {
+			.channels_min	= 2,
+			.channels_max	= 2,
+			.rates		= S3C64XX_I2S_RATES,
+			.formats	= S3C64XX_I2S_FMTS,
+		},
+		.capture = {
+			 .channels_min	= 2,
+			 .channels_max	= 2,
+			 .rates		= S3C64XX_I2S_RATES,
+			 .formats	= S3C64XX_I2S_FMTS,
+		 },
+		.ops = &s3c64xx_i2s_dai_ops,
 	},
-	.ops = &s3c64xx_i2s_dai_ops,
 };
 EXPORT_SYMBOL_GPL(s3c64xx_i2s_dai);
 
+static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev)
+{
+	struct s3c_i2sv2_info *i2s;
+	struct snd_soc_dai *dai;
+	int ret;
+
+	if (pdev->id >= ARRAY_SIZE(s3c64xx_i2s)) {
+		dev_err(&pdev->dev, "id %d out of range\n", pdev->id);
+		return -EINVAL;
+	}
+
+	i2s = &s3c64xx_i2s[pdev->id];
+	dai = &s3c64xx_i2s_dai[pdev->id];
+	dai->dev = &pdev->dev;
+
+	i2s->dma_capture = &s3c64xx_i2s_pcm_stereo_in[pdev->id];
+	i2s->dma_playback = &s3c64xx_i2s_pcm_stereo_out[pdev->id];
+
+	i2s->iis_cclk = clk_get(&pdev->dev, "audio-bus");
+	if (IS_ERR(i2s->iis_cclk)) {
+		dev_err(&pdev->dev, "failed to get audio-bus");
+		ret = PTR_ERR(i2s->iis_cclk);
+		goto err;
+	}
+
+	ret = s3c_i2sv2_probe(pdev, dai, i2s,
+			      dai->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0);
+	if (ret)
+		goto err_clk;
+
+	ret = snd_soc_register_dai(dai);
+	if (ret != 0)
+		goto err_i2sv2;
+
+	return 0;
+
+err_i2sv2:
+	/* Not implemented for I2Sv2 core yet */
+err_clk:
+	clk_put(i2s->iis_cclk);
+err:
+	return ret;
+}
+
+static __devexit int s3c64xx_iis_dev_remove(struct platform_device *pdev)
+{
+	dev_err(&pdev->dev, "Device removal not yet supported\n");
+	return 0;
+}
+
+static struct platform_driver s3c64xx_iis_driver = {
+	.probe  = s3c64xx_iis_dev_probe,
+	.remove = s3c64xx_iis_dev_remove,
+	.driver = {
+		.name = "s3c64xx-iis",
+		.owner = THIS_MODULE,
+	},
+};
+
 static int __init s3c64xx_i2s_init(void)
 {
-	return  s3c_i2sv2_register_dai(&s3c64xx_i2s_dai);
+	return platform_driver_register(&s3c64xx_iis_driver);
 }
 module_init(s3c64xx_i2s_init);
 
 static void __exit s3c64xx_i2s_exit(void)
 {
-	snd_soc_unregister_dai(&s3c64xx_i2s_dai);
+	platform_driver_unregister(&s3c64xx_iis_driver);
 }
 module_exit(s3c64xx_i2s_exit);
 
@@ -217,6 +268,3 @@ module_exit(s3c64xx_i2s_exit);
 MODULE_AUTHOR("Ben Dooks, <ben@simtec.co.uk>");
 MODULE_DESCRIPTION("S3C64XX I2S SoC Interface");
 MODULE_LICENSE("GPL");
-
-
-
diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.h b/sound/soc/s3c24xx/s3c64xx-i2s.h
index b7ffe3c..597822a 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.h
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.h
@@ -24,7 +24,7 @@
 #define S3C64XX_CLKSRC_PCLK	(0)
 #define S3C64XX_CLKSRC_MUX	(1)
 
-extern struct snd_soc_dai s3c64xx_i2s_dai;
+extern struct snd_soc_dai s3c64xx_i2s_dai[];
 
 extern unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *cpu_dai);
 
-- 
cgit v0.10.2


From 008bec397cdabd22a6f4e4c16a746a86a046f8af Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 24 Apr 2009 16:27:09 +0100
Subject: ASoC: S3C2412: Failing to get the I2S clock is an error

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c
index b7e0b3f..168a088 100644
--- a/sound/soc/s3c24xx/s3c2412-i2s.c
+++ b/sound/soc/s3c24xx/s3c2412-i2s.c
@@ -120,7 +120,7 @@ static int s3c2412_i2s_probe(struct platform_device *pdev,
 
 	s3c2412_i2s.iis_cclk = clk_get(&pdev->dev, "i2sclk");
 	if (s3c2412_i2s.iis_cclk == NULL) {
-		pr_debug("failed to get i2sclk clock\n");
+		pr_err("failed to get i2sclk clock\n");
 		iounmap(s3c2412_i2s.regs);
 		return -ENODEV;
 	}
-- 
cgit v0.10.2


From 060fa5c83e67901ba47ab484cfcdb32737d630ba Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 12:20:52 -0400
Subject: tracing/events: reuse trace event ids after overflow

With modules being able to add trace events, and the max trace event
counter is 16 bits (65536) we can overflow the counter easily
with a simple while loop adding and removing modules that contain
trace events.

This patch links together the registered trace events and on overflow
searches for available trace event ids. It will still fail if
over 65536 events are registered, but considering that a typical
kernel only has 22000 functions, 65000 events should be sufficient.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 07e0a6d..78a9ba2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -56,6 +56,7 @@ typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 					      int flags);
 struct trace_event {
 	struct hlist_node	node;
+	struct list_head	list;
 	int			type;
 	trace_print_func	trace;
 	trace_print_func	raw;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 06997e7..5fc51f0 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -483,6 +483,36 @@ struct trace_event *ftrace_find_event(int type)
 	return NULL;
 }
 
+static LIST_HEAD(ftrace_event_list);
+
+static int trace_search_list(struct list_head **list)
+{
+	struct trace_event *e;
+	int last = __TRACE_LAST_TYPE;
+
+	if (list_empty(&ftrace_event_list)) {
+		*list = &ftrace_event_list;
+		return last + 1;
+	}
+
+	/*
+	 * We used up all possible max events,
+	 * lets see if somebody freed one.
+	 */
+	list_for_each_entry(e, &ftrace_event_list, list) {
+		if (e->type != last + 1)
+			break;
+		last++;
+	}
+
+	/* Did we used up all 65 thousand events??? */
+	if ((last + 1) > FTRACE_MAX_EVENT)
+		return 0;
+
+	*list = &e->list;
+	return last + 1;
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -505,20 +535,40 @@ int register_ftrace_event(struct trace_event *event)
 
 	mutex_lock(&trace_event_mutex);
 
-	if (!event) {
-		ret = next_event_type++;
+	if (WARN_ON(!event))
 		goto out;
-	}
 
-	if (!event->type)
-		event->type = next_event_type++;
-	else if (event->type > __TRACE_LAST_TYPE) {
+	INIT_LIST_HEAD(&event->list);
+
+	if (!event->type) {
+		struct list_head *list;
+
+		if (next_event_type > FTRACE_MAX_EVENT) {
+
+			event->type = trace_search_list(&list);
+			if (!event->type)
+				goto out;
+
+		} else {
+			
+			event->type = next_event_type++;
+			list = &ftrace_event_list;
+		}
+
+		if (WARN_ON(ftrace_find_event(event->type)))
+			goto out;
+
+		list_add_tail(&event->list, list);
+
+	} else if (event->type > __TRACE_LAST_TYPE) {
 		printk(KERN_WARNING "Need to add type to trace.h\n");
 		WARN_ON(1);
-	}
-
-	if (ftrace_find_event(event->type))
 		goto out;
+	} else {
+		/* Is this event already used */
+		if (ftrace_find_event(event->type))
+			goto out;
+	}
 
 	if (event->trace == NULL)
 		event->trace = trace_nop_print;
@@ -537,8 +587,6 @@ int register_ftrace_event(struct trace_event *event)
  out:
 	mutex_unlock(&trace_event_mutex);
 
-	WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_event);
@@ -551,6 +599,7 @@ int unregister_ftrace_event(struct trace_event *event)
 {
 	mutex_lock(&trace_event_mutex);
 	hlist_del(&event->node);
+	list_del(&event->list);
 	mutex_unlock(&trace_event_mutex);
 
 	return 0;
-- 
cgit v0.10.2


From 79ffab34391933ee3b95dac7f25c0478fa2f8f1e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 13 May 2009 15:13:42 -0400
Subject: ext4: Properly initialize the buffer_head state

These struct buffer_heads are allocated on the stack (and hence are
initialized with stack garbage).  They are only used to call a
get_blocks() function, so that's mostly OK, but b_state must be
initialized to be 0 so we don't have any unexpected BH_* flags set by
accident, such as BH_Unwritten or BH_Delay.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb..a953214 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3150,6 +3150,7 @@ retry:
 			ret = PTR_ERR(handle);
 			break;
 		}
+		map_bh.b_state = 0;
 		ret = ext4_get_blocks_wrap(handle, inode, block,
 					  max_blocks, &map_bh,
 					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd5..d7ad0bb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2055,7 +2055,20 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	if ((mpd->b_state  & (1 << BH_Mapped)) &&
 	    !(mpd->b_state & (1 << BH_Delay)))
 		return 0;
-	new.b_state = mpd->b_state;
+	/*
+	 * We need to make sure the BH_Delay flag is passed down to
+	 * ext4_da_get_block_write(), since it calls
+	 * ext4_get_blocks_wrap() with the EXT4_DELALLOC_RSVED flag.
+	 * This flag causes ext4_get_blocks_wrap() to call
+	 * ext4_da_update_reserve_space() if the passed buffer head
+	 * has the BH_Delay flag set.  In the future, once we clean up
+	 * the interfaces to ext4_get_blocks_wrap(), we should pass in
+	 * a separate flag which requests that the delayed allocation
+	 * statistics should be updated, instead of depending on the
+	 * state information getting passed down via the map_bh's
+	 * state bitmasks plus the magic EXT4_DELALLOC_RSVED flag.
+	 */
+	new.b_state = mpd->b_state & (1 << BH_Delay);
 	new.b_blocknr = 0;
 	new.b_size = mpd->b_size;
 	next = mpd->b_blocknr;
diff --git a/fs/mpage.c b/fs/mpage.c
index 680ba60..42381bd 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
-	clear_buffer_mapped(&map_bh);
+	map_bh.b_state = 0;
+	map_bh.b_size = 0;
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
-	clear_buffer_mapped(&map_bh);
+	map_bh.b_state = 0;
+	map_bh.b_size = 0;
 	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
 			&map_bh, &first_logical_block, get_block);
 	if (bio)
-- 
cgit v0.10.2


From 8fb0e342481c4d80040670fec915f0b9c7c6499a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 12 May 2009 16:22:37 -0400
Subject: vfs: Add BUG_ON for delayed and unwritten flags in submit_bh()

The BH_Delay and BH_Unwritten flags should never leak out to
submit_bh().  So add some BUG_ON() checks to submit_bh so we can get a
stack trace and determine how and why this might have happened.

(Note that only XFS and ext4 use these buffer head flags, and XFS does
not use submit_bh().  So this patch should only modify behavior for
ext4.)

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: linux-fsdevel@vger.kernel.org

diff --git a/fs/buffer.c b/fs/buffer.c
index aed2977..ad01129 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2933,6 +2933,8 @@ int submit_bh(int rw, struct buffer_head * bh)
 	BUG_ON(!buffer_locked(bh));
 	BUG_ON(!buffer_mapped(bh));
 	BUG_ON(!bh->b_end_io);
+	BUG_ON(buffer_delay(bh));
+	BUG_ON(buffer_unwritten(bh));
 
 	/*
 	 * Mask in barrier bit for a write (could be either a WRITE or a
-- 
cgit v0.10.2


From 29fa89d088941d79765d60f22d5ccdd6b8696e11 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 12 May 2009 16:30:27 -0400
Subject: ext4: Mark the unwritten buffer_head as mapped during write_begin

Setting BH_Unwritten buffer_heads as BH_Mapped avoids multiple
(unnecessary) calls to get_block() during the call to the write(2)
system call.  Setting BH_Unwritten buffer heads as BH_Mapped requires
that the writepages() functions can handle BH_Unwritten buffer_heads.

After this commit, things work as follows:

ext4_ext_get_block() returns unmapped, unwritten, buffer head when
called with create = 0 for prealloc space. This makes sure we handle
the read path and non-delayed allocation case correctly.  Even though
the buffer head is marked unmapped we have valid b_blocknr and b_bdev
values in the buffer_head.

ext4_da_get_block_prep() called for block resrevation will now return
mapped, unwritten, new buffer_head for prealloc space. This avoids
multiple calls to get_block() for write to same offset. By making such
buffers as BH_New, we also assure that sub-block zeroing of buffered
writes happens correctly.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a953214..ea5c476 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2872,6 +2872,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			if (create == EXT4_CREATE_UNINITIALIZED_EXT)
 				goto out;
 			if (!create) {
+				if (allocated > max_blocks)
+					allocated = max_blocks;
 				/*
 				 * We have blocks reserved already.  We
 				 * return allocated blocks so that delalloc
@@ -2879,8 +2881,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 				 * the buffer head will be unmapped so that
 				 * a read from the block returns 0s.
 				 */
-				if (allocated > max_blocks)
-					allocated = max_blocks;
 				set_buffer_unwritten(bh_result);
 				bh_result->b_bdev = inode->i_sb->s_bdev;
 				bh_result->b_blocknr = newblock;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d7ad0bb..96f3366 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1852,7 +1852,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
  * @logical - first logical block to start assignment with
  *
  * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay
+ * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
  */
 static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 				 struct buffer_head *exbh)
@@ -1902,16 +1902,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 			do {
 				if (cur_logical >= logical + blocks)
 					break;
-				if (buffer_delay(bh)) {
-					bh->b_blocknr = pblock;
-					clear_buffer_delay(bh);
-					bh->b_bdev = inode->i_sb->s_bdev;
-				} else if (buffer_unwritten(bh)) {
-					bh->b_blocknr = pblock;
-					clear_buffer_unwritten(bh);
-					set_buffer_mapped(bh);
-					set_buffer_new(bh);
-					bh->b_bdev = inode->i_sb->s_bdev;
+
+				if (buffer_delay(bh) ||
+						buffer_unwritten(bh)) {
+
+					BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
+
+					if (buffer_delay(bh)) {
+						clear_buffer_delay(bh);
+						bh->b_blocknr = pblock;
+					} else {
+						/*
+						 * unwritten already should have
+						 * blocknr assigned. Verify that
+						 */
+						clear_buffer_unwritten(bh);
+						BUG_ON(bh->b_blocknr != pblock);
+					}
+
 				} else if (buffer_mapped(bh))
 					BUG_ON(bh->b_blocknr != pblock);
 
@@ -2053,7 +2061,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	 * We consider only non-mapped and non-allocated blocks
 	 */
 	if ((mpd->b_state  & (1 << BH_Mapped)) &&
-	    !(mpd->b_state & (1 << BH_Delay)))
+		!(mpd->b_state & (1 << BH_Delay)) &&
+		!(mpd->b_state & (1 << BH_Unwritten)))
 		return 0;
 	/*
 	 * We need to make sure the BH_Delay flag is passed down to
@@ -2205,6 +2214,17 @@ flush_it:
 	return;
 }
 
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+	/*
+	 * unmapped buffer is possible for holes.
+	 * delay buffer is possible with delayed allocation.
+	 * We also need to consider unwritten buffer as unmapped.
+	 */
+	return (!buffer_mapped(bh) || buffer_delay(bh) ||
+				buffer_unwritten(bh)) && buffer_dirty(bh);
+}
+
 /*
  * __mpage_da_writepage - finds extent of pages and blocks
  *
@@ -2289,8 +2309,7 @@ static int __mpage_da_writepage(struct page *page,
 			 * Otherwise we won't make progress
 			 * with the page in ext4_da_writepage
 			 */
-			if (buffer_dirty(bh) &&
-			    (!buffer_mapped(bh) || buffer_delay(bh))) {
+			if (ext4_bh_unmapped_or_delay(NULL, bh)) {
 				mpage_add_bh_to_extent(mpd, logical,
 						       bh->b_size,
 						       bh->b_state);
@@ -2318,6 +2337,14 @@ static int __mpage_da_writepage(struct page *page,
 /*
  * this is a special callback for ->write_begin() only
  * it's intention is to return mapped block or reserve space
+ *
+ * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
+ * We also have b_blocknr = -1 and b_bdev initialized properly
+ *
+ * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
+ * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
+ * initialized properly.
+ *
  */
 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 				  struct buffer_head *bh_result, int create)
@@ -2353,28 +2380,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 		set_buffer_delay(bh_result);
 	} else if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
-		/*
-		 * With sub-block writes into unwritten extents
-		 * we also need to mark the buffer as new so that
-		 * the unwritten parts of the buffer gets correctly zeroed.
-		 */
-		if (buffer_unwritten(bh_result))
+		if (buffer_unwritten(bh_result)) {
+			/* A delayed write to unwritten bh should
+			 * be marked new and mapped.  Mapped ensures
+			 * that we don't do get_block multiple times
+			 * when we write to the same offset and new
+			 * ensures that we do proper zero out for
+			 * partial write.
+			 */
 			set_buffer_new(bh_result);
+			set_buffer_mapped(bh_result);
+		}
 		ret = 0;
 	}
 
 	return ret;
 }
 
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
-{
-	/*
-	 * unmapped buffer is possible for holes.
-	 * delay buffer is possible with delayed allocation
-	 */
-	return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
-}
-
 static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
@@ -2828,7 +2850,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
 	for (i = 0; i < idx; i++)
 		bh = bh->b_this_page;
 
-	if (!buffer_mapped(bh) || (buffer_delay(bh)))
+	if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
 		return 0;
 	return 1;
 }
-- 
cgit v0.10.2


From c5ca7c7636fa689a9746b6032f83aa7fffec31c6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 27 Apr 2009 22:48:48 -0400
Subject: ext4: Fallback to vmalloc if kmalloc can't allocate s_flex_groups
 array

For very large filesystems, the s_flex_groups array can get quite big.
For example, a filesystem that can be resized up to 16TB will have
8192 flex groups (assuming the default flex_bg size of 16), so the
array is 96k, which is *very* marginal for kmalloc().  On the other
hand, a 160GB filesystem without the resize_inode feature will only
require 960 bytes.  So we try to allocate the array first using
kmalloc(), and if that fails, we'll try to use vmalloc() instead.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e..3f4475d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/vmalloc.h>
 #include <linux/jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -586,7 +587,10 @@ static void ext4_put_super(struct super_block *sb)
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
-	kfree(sbi->s_flex_groups);
+	if (is_vmalloc_addr(sbi->s_flex_groups))
+		vfree(sbi->s_flex_groups);
+	else
+		kfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1620,6 +1624,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
 	ext4_group_t flex_group_count;
 	ext4_group_t flex_group;
 	int groups_per_flex = 0;
+	size_t size;
 	int i;
 
 	if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,8 +1639,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
-	sbi->s_flex_groups = kzalloc(flex_group_count *
-				     sizeof(struct flex_groups), GFP_KERNEL);
+	size = flex_group_count * sizeof(struct flex_groups);
+	sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
+	if (sbi->s_flex_groups == NULL) {
+		sbi->s_flex_groups = vmalloc(size);
+		if (sbi->s_flex_groups)
+			memset(sbi->s_flex_groups, 0, size);
+	}
 	if (sbi->s_flex_groups == NULL) {
 		printk(KERN_ERR "EXT4-fs: not enough memory for "
 				"%u flex groups\n", flex_group_count);
@@ -2842,6 +2852,12 @@ failed_mount4:
 		sbi->s_journal = NULL;
 	}
 failed_mount3:
+	if (sbi->s_flex_groups) {
+		if (is_vmalloc_addr(sbi->s_flex_groups))
+			vfree(sbi->s_flex_groups);
+		else
+			kfree(sbi->s_flex_groups);
+	}
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
-- 
cgit v0.10.2


From f7c439504ccba0cca43271e651013ab97a221c62 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 24 Apr 2009 23:31:59 -0400
Subject: ext4: Use is_power_of_2() for clarity

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3f4475d..3e509bc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1483,7 +1483,7 @@ set_qf_format:
 				return 0;
 			if (option < 0 || option > (1 << 30))
 				return 0;
-			if (option & (option - 1)) {
+			if (!is_power_of_2(option)) {
 				printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
 				       " must be a power of 2\n");
 				return 0;
@@ -2101,8 +2101,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 	if (parse_strtoul(buf, 0x40000000, &t))
 		return -EINVAL;
 
-	/* inode_readahead_blks must be a power of 2 */
-	if (t & (t-1))
+	if (!is_power_of_2(t))
 		return -EINVAL;
 
 	sbi->s_inode_readahead_blks = t;
-- 
cgit v0.10.2


From e2d670523c6c4ccb0fca9f3ab1b8f066d9aa57d6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 00:33:44 -0400
Subject: ext4: Simplify ext4_commit_super()'s function signature

The ext4_commit_super() function took both a struct super_block * and
a struct ext4_super_block *, but the struct ext4_super_block can be
derived from the struct super_block.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3e509bc..ad4c9be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,8 +54,7 @@ static struct kset *ext4_kset;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
-static int ext4_commit_super(struct super_block *sb,
-			      struct ext4_super_block *es, int sync);
+static int ext4_commit_super(struct super_block *sb, int sync);
 static void ext4_mark_recovery_complete(struct super_block *sb,
 					struct ext4_super_block *es);
 static void ext4_clear_journal_err(struct super_block *sb,
@@ -306,7 +305,7 @@ static void ext4_handle_error(struct super_block *sb)
 		printk(KERN_CRIT "Remounting filesystem read-only\n");
 		sb->s_flags |= MS_RDONLY;
 	}
-	ext4_commit_super(sb, es, 1);
+	ext4_commit_super(sb, 1);
 	if (test_opt(sb, ERRORS_PANIC))
 		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
@@ -448,7 +447,7 @@ __acquires(bitlock)
 	if (test_opt(sb, ERRORS_CONT)) {
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, es, 0);
+		ext4_commit_super(sb, 0);
 		return;
 	}
 	ext4_unlock_group(sb, grp);
@@ -577,7 +576,7 @@ static void ext4_put_super(struct super_block *sb)
 	if (!(sb->s_flags & MS_RDONLY)) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 	if (sbi->s_proc) {
 		remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -1596,7 +1595,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	if (sbi->s_journal)
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
-	ext4_commit_super(sb, es, 1);
+	ext4_commit_super(sb, 1);
 	if (test_opt(sb, DEBUG))
 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -2655,7 +2654,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			if (test_opt(sb, ERRORS_PANIC)) {
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-				ext4_commit_super(sb, es, 1);
+				ext4_commit_super(sb, 1);
 				goto failed_mount4;
 			}
 		}
@@ -3132,15 +3131,15 @@ static int ext4_load_journal(struct super_block *sb,
 		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 
 	return 0;
 }
 
-static int ext4_commit_super(struct super_block *sb,
-			      struct ext4_super_block *es, int sync)
+static int ext4_commit_super(struct super_block *sb, int sync)
 {
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 	int error = 0;
 
@@ -3212,7 +3211,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		sb->s_dirt = 0;
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 	}
 	unlock_super(sb);
 
@@ -3253,7 +3252,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 
 		jbd2_journal_clear_err(journal);
 	}
@@ -3293,7 +3292,7 @@ static void ext4_write_super(struct super_block *sb)
 			BUG();
 		sb->s_dirt = 0;
 	} else {
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+		ext4_commit_super(sb, 1);
 	}
 }
 
@@ -3312,7 +3311,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 						     target);
 		}
 	} else {
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
+		ext4_commit_super(sb, wait);
 	}
 	return ret;
 }
@@ -3345,7 +3344,7 @@ static int ext4_freeze(struct super_block *sb)
 
 		/* Journal blocked and flushed, clear needs_recovery flag. */
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+		error = ext4_commit_super(sb, 1);
 		if (error)
 			goto out;
 	}
@@ -3365,7 +3364,7 @@ static int ext4_unfreeze(struct super_block *sb)
 		lock_super(sb);
 		/* Reser the needs_recovery flag before the fs is unlocked. */
 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+		ext4_commit_super(sb, 1);
 		unlock_super(sb);
 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
@@ -3520,7 +3519,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 		}
 	}
 	if (sbi->s_journal == NULL)
-		ext4_commit_super(sb, es, 1);
+		ext4_commit_super(sb, 1);
 
 #ifdef CONFIG_QUOTA
 	/* Release old quota file names */
-- 
cgit v0.10.2


From 7234ab2a55e77784b44cf2d862136d9e41b8d98a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 30 Apr 2009 21:24:04 -0400
Subject: ext4: Fix and simplify s_dirt handling

The s_dirt flag wasn't completely handled correctly, but it didn't
really matter when journalling was enabled.  It turns out that when
ext4 runs without a journal, we don't clear s_dirt in places where we
should have, with the result that the high-level write_super()
function was writing the superblock when it wasn't necessary.

So we fix this by making ext4_commit_super() clear the s_dirt flag,
and removing many of the other places where s_dirt is manipulated.
When journalling is enabled, the s_dirt flag might be left set more
often, but s_dirt really doesn't matter when journalling is enabled.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ad4c9be..7c7a08a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3128,7 +3128,6 @@ static int ext4_load_journal(struct super_block *sb,
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
-		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
 		ext4_commit_super(sb, 1);
@@ -3168,7 +3167,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 					&EXT4_SB(sb)->s_freeblocks_counter));
 	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
 					&EXT4_SB(sb)->s_freeinodes_counter));
-
+	sb->s_dirt = 0;
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
 	if (sync) {
@@ -3210,7 +3209,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		sb->s_dirt = 0;
 		ext4_commit_super(sb, 1);
 	}
 	unlock_super(sb);
@@ -3271,10 +3269,8 @@ int ext4_force_commit(struct super_block *sb)
 		return 0;
 
 	journal = EXT4_SB(sb)->s_journal;
-	if (journal) {
-		sb->s_dirt = 0;
+	if (journal)
 		ret = ext4_journal_force_commit(journal);
-	}
 
 	return ret;
 }
@@ -3282,15 +3278,13 @@ int ext4_force_commit(struct super_block *sb)
 /*
  * Ext4 always journals updates to the superblock itself, so we don't
  * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
+ * point if the journalling is enabled.
  */
 static void ext4_write_super(struct super_block *sb)
 {
 	if (EXT4_SB(sb)->s_journal) {
 		if (mutex_trylock(&sb->s_lock) != 0)
 			BUG();
-		sb->s_dirt = 0;
 	} else {
 		ext4_commit_super(sb, 1);
 	}
@@ -3302,7 +3296,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	tid_t target;
 
 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
-	sb->s_dirt = 0;
 	if (EXT4_SB(sb)->s_journal) {
 		if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
 					      &target)) {
@@ -3324,7 +3317,6 @@ static int ext4_freeze(struct super_block *sb)
 {
 	int error = 0;
 	journal_t *journal;
-	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		journal = EXT4_SB(sb)->s_journal;
-- 
cgit v0.10.2


From 9ca92389c5312a51e819c15c762f0abdc7f3129b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 12:52:25 -0400
Subject: ext4: Use separate super_operations structure for no_journal
 filesystems

By using a separate super_operations structure for filesystems that
have and don't have journals, we can simply ext4_write_super() ---
which is only needed when no journal is present --- and ext4_freeze(),
ext4_unfreeze(), and ext4_sync_fs(), which are only needed when the
journal is present.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7c7a08a..68c3a44 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -995,7 +995,6 @@ static const struct super_operations ext4_sops = {
 	.dirty_inode	= ext4_dirty_inode,
 	.delete_inode	= ext4_delete_inode,
 	.put_super	= ext4_put_super,
-	.write_super	= ext4_write_super,
 	.sync_fs	= ext4_sync_fs,
 	.freeze_fs	= ext4_freeze,
 	.unfreeze_fs	= ext4_unfreeze,
@@ -1010,6 +1009,25 @@ static const struct super_operations ext4_sops = {
 	.bdev_try_to_free_page = bdev_try_to_free_page,
 };
 
+static const struct super_operations ext4_nojournal_sops = {
+	.alloc_inode	= ext4_alloc_inode,
+	.destroy_inode	= ext4_destroy_inode,
+	.write_inode	= ext4_write_inode,
+	.dirty_inode	= ext4_dirty_inode,
+	.delete_inode	= ext4_delete_inode,
+	.write_super	= ext4_write_super,
+	.put_super	= ext4_put_super,
+	.statfs		= ext4_statfs,
+	.remount_fs	= ext4_remount,
+	.clear_inode	= ext4_clear_inode,
+	.show_options	= ext4_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= ext4_quota_read,
+	.quota_write	= ext4_quota_write,
+#endif
+	.bdev_try_to_free_page = bdev_try_to_free_page,
+};
+
 static const struct export_operations ext4_export_ops = {
 	.fh_to_dentry = ext4_fh_to_dentry,
 	.fh_to_parent = ext4_fh_to_parent,
@@ -2615,7 +2633,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	sb->s_op = &ext4_sops;
+	if (!test_opt(sb, NOLOAD) &&
+	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+		sb->s_op = &ext4_sops;
+	else
+		sb->s_op = &ext4_nojournal_sops;
 	sb->s_export_op = &ext4_export_ops;
 	sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -3275,19 +3297,9 @@ int ext4_force_commit(struct super_block *sb)
 	return ret;
 }
 
-/*
- * Ext4 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point if the journalling is enabled.
- */
 static void ext4_write_super(struct super_block *sb)
 {
-	if (EXT4_SB(sb)->s_journal) {
-		if (mutex_trylock(&sb->s_lock) != 0)
-			BUG();
-	} else {
-		ext4_commit_super(sb, 1);
-	}
+	ext4_commit_super(sb, 1);
 }
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3296,15 +3308,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	tid_t target;
 
 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
-	if (EXT4_SB(sb)->s_journal) {
-		if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
-					      &target)) {
-			if (wait)
-				jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
-						     target);
-		}
-	} else {
-		ext4_commit_super(sb, wait);
+	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
+		if (wait)
+			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
 	}
 	return ret;
 }
@@ -3318,32 +3324,31 @@ static int ext4_freeze(struct super_block *sb)
 	int error = 0;
 	journal_t *journal;
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		journal = EXT4_SB(sb)->s_journal;
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
 
-		if (journal) {
-			/* Now we set up the journal barrier. */
-			jbd2_journal_lock_updates(journal);
+	journal = EXT4_SB(sb)->s_journal;
 
-			/*
-			 * We don't want to clear needs_recovery flag when we
-			 * failed to flush the journal.
-			 */
-			error = jbd2_journal_flush(journal);
-			if (error < 0)
-				goto out;
-		}
+	/* Now we set up the journal barrier. */
+	jbd2_journal_lock_updates(journal);
 
-		/* Journal blocked and flushed, clear needs_recovery flag. */
-		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		error = ext4_commit_super(sb, 1);
-		if (error)
-			goto out;
+	/*
+	 * Don't clear the needs_recovery flag if we failed to flush
+	 * the journal.
+	 */
+	error = jbd2_journal_flush(journal);
+	if (error < 0) {
+	out:
+		jbd2_journal_unlock_updates(journal);
+		return error;
 	}
+
+	/* Journal blocked and flushed, clear needs_recovery flag. */
+	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	error = ext4_commit_super(sb, 1);
+	if (error)
+		goto out;
 	return 0;
-out:
-	jbd2_journal_unlock_updates(journal);
-	return error;
 }
 
 /*
@@ -3352,14 +3357,15 @@ out:
  */
 static int ext4_unfreeze(struct super_block *sb)
 {
-	if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
-		lock_super(sb);
-		/* Reser the needs_recovery flag before the fs is unlocked. */
-		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-		ext4_commit_super(sb, 1);
-		unlock_super(sb);
-		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
-	}
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	lock_super(sb);
+	/* Reset the needs_recovery flag before the fs is unlocked. */
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	ext4_commit_super(sb, 1);
+	unlock_super(sb);
+	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 08:50:38 -0400
Subject: ext4: Avoid races caused by on-line resizing and SMP memory
 reordering

Ext4's on-line resizing adds a new block group and then, only at the
last step adjusts s_groups_count.  However, it's possible on SMP
systems that another CPU could see the updated the s_group_count and
not see the newly initialized data structures for the just-added block
group.  For this reason, it's important to insert a SMP read barrier
after reading s_groups_count and before reading any (for example) the
new block group descriptors allowed by the increased value of
s_groups_count.

Unfortunately, we rather blatently violate this locking protocol
documented in fs/ext4/resize.c.  Fortunately, (1) on-line resizes
happen relatively rarely, and (2) it seems rare that the filesystem
code will immediately try to use just-added block group before any
memory ordering issues resolve themselves.  So apparently problems
here are relatively hard to hit, since ext3 has been vulnerable to the
same issue for years with no one apparently complaining.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad..a5ba039 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -88,6 +88,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
 	int bit, bit_max;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	unsigned free_blocks, group_blocks;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
@@ -123,7 +124,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		bit_max += ext4_bg_num_gdb(sb, block_group);
 	}
 
-	if (block_group == sbi->s_groups_count - 1) {
+	if (block_group == ngroups - 1) {
 		/*
 		 * Even though mke2fs always initialize first and last group
 		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +132,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		 */
 		group_blocks = ext4_blocks_count(sbi->s_es) -
 			le32_to_cpu(sbi->s_es->s_first_data_block) -
-			(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
+			(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
 	} else {
 		group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
 	}
@@ -205,18 +206,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 {
 	unsigned int group_desc;
 	unsigned int offset;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (block_group >= sbi->s_groups_count) {
+	if (block_group >= ngroups) {
 		ext4_error(sb, "ext4_get_group_desc",
 			   "block_group >= groups_count - "
 			   "block_group = %u, groups_count = %u",
-			   block_group, sbi->s_groups_count);
+			   block_group, ngroups);
 
 		return NULL;
 	}
-	smp_rmb();
 
 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -665,7 +666,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 	ext4_fsblk_t desc_count;
 	struct ext4_group_desc *gdp;
 	ext4_group_t i;
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
 	struct ext4_super_block *es;
 	ext4_fsblk_t bitmap_count;
@@ -677,7 +678,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 	bitmap_count = 0;
 	gdp = NULL;
 
-	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
@@ -700,7 +700,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 	return bitmap_count;
 #else
 	desc_count = 0;
-	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef..02ec44b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1228,6 +1228,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
 	 return grp_info[indexv][indexh];
 }
 
+/*
+ * Reading s_groups_count requires using smp_rmb() afterwards.  See
+ * the locking protocol documented in the comments of ext4_group_add()
+ * in resize.c
+ */
+static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
+{
+	ext4_group_t	ngroups = EXT4_SB(sb)->s_groups_count;
+
+	smp_rmb();
+	return ngroups;
+}
 
 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
 					     ext4_group_t block_group)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a0..55ba419 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -316,7 +316,7 @@ error_return:
 static int find_group_dir(struct super_block *sb, struct inode *parent,
 				ext4_group_t *best_group)
 {
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	unsigned int freei, avefreei;
 	struct ext4_group_desc *desc, *best_desc = NULL;
 	ext4_group_t group;
@@ -353,7 +353,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 	struct flex_groups *flex_group = sbi->s_flex_groups;
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
-	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	int flex_size = ext4_flex_bg_size(sbi);
 	ext4_group_t best_flex = parent_fbg_group;
 	int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +362,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 	ext4_group_t n_fbg_groups;
 	ext4_group_t i;
 
-	n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
+	n_fbg_groups = (ngroups + flex_size - 1) >>
 		sbi->s_log_groups_per_flex;
 
 find_close_to_parent:
@@ -478,20 +478,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
 	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei;
 	ext4_fsblk_t freeb, avefreeb;
 	unsigned int ndirs;
 	int max_dirs, min_inodes;
 	ext4_grpblk_t min_blocks;
-	ext4_group_t i, grp, g;
+	ext4_group_t i, grp, g, ngroups;
 	struct ext4_group_desc *desc;
 	struct orlov_stats stats;
 	int flex_size = ext4_flex_bg_size(sbi);
 
+	ngroups = real_ngroups;
 	if (flex_size > 1) {
-		ngroups = (ngroups + flex_size - 1) >>
+		ngroups = (real_ngroups + flex_size - 1) >>
 			sbi->s_log_groups_per_flex;
 		parent_group >>= sbi->s_log_groups_per_flex;
 	}
@@ -543,7 +544,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 		 */
 		grp *= flex_size;
 		for (i = 0; i < flex_size; i++) {
-			if (grp+i >= sbi->s_groups_count)
+			if (grp+i >= real_ngroups)
 				break;
 			desc = ext4_get_group_desc(sb, grp+i, NULL);
 			if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +584,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 	}
 
 fallback:
-	ngroups = sbi->s_groups_count;
+	ngroups = real_ngroups;
 	avefreei = freei / ngroups;
 fallback_retry:
 	parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +614,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 			    ext4_group_t *group, int mode)
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
-	ext4_group_t i, last;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
 
 	/*
@@ -799,11 +799,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	struct super_block *sb;
 	struct buffer_head *inode_bitmap_bh = NULL;
 	struct buffer_head *group_desc_bh;
-	ext4_group_t group = 0;
+	ext4_group_t ngroups, group = 0;
 	unsigned long ino = 0;
 	struct inode *inode;
 	struct ext4_group_desc *gdp = NULL;
-	struct ext4_super_block *es;
 	struct ext4_inode_info *ei;
 	struct ext4_sb_info *sbi;
 	int ret2, err = 0;
@@ -818,15 +817,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 		return ERR_PTR(-EPERM);
 
 	sb = dir->i_sb;
+	ngroups = ext4_get_groups_count(sb);
 	trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
 		   dir->i_ino, mode);
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	ei = EXT4_I(inode);
-
 	sbi = EXT4_SB(sb);
-	es = sbi->s_es;
 
 	if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
 		ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +854,7 @@ got_group:
 	if (ret2 == -1)
 		goto out;
 
-	for (i = 0; i < sbi->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		err = -EIO;
 
 		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +915,7 @@ repeat_in_this_group:
 		 * group descriptor metadata has not yet been updated.
 		 * So we just go onto the next blockgroup.
 		 */
-		if (++group == sbi->s_groups_count)
+		if (++group == ngroups)
 			group = 0;
 	}
 	err = -ENOSPC;
@@ -1158,7 +1156,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 {
 	unsigned long desc_count;
 	struct ext4_group_desc *gdp;
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
 	struct ext4_super_block *es;
 	unsigned long bitmap_count, x;
@@ -1168,7 +1166,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1190,7 +1188,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 	return desc_count;
 #else
 	desc_count = 0;
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1205,9 +1203,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 unsigned long ext4_count_dirs(struct super_block * sb)
 {
 	unsigned long count = 0;
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 96f3366..4e7f363 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4965,7 +4965,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  */
 int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
-	int groups, gdpblocks;
+	ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
+	int gdpblocks;
 	int idxblocks;
 	int ret = 0;
 
@@ -4992,8 +4993,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 		groups += nrblocks;
 
 	gdpblocks = groups;
-	if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
-		groups = EXT4_SB(inode->i_sb)->s_groups_count;
+	if (groups > ngroups)
+		groups = ngroups;
 	if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
 		gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677..c3af9e6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -739,6 +739,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 
 static int ext4_mb_init_cache(struct page *page, char *incore)
 {
+	ext4_group_t ngroups;
 	int blocksize;
 	int blocks_per_page;
 	int groups_per_page;
@@ -757,6 +758,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
 	inode = page->mapping->host;
 	sb = inode->i_sb;
+	ngroups = ext4_get_groups_count(sb);
 	blocksize = 1 << inode->i_blkbits;
 	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
 
@@ -780,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 	for (i = 0; i < groups_per_page; i++) {
 		struct ext4_group_desc *desc;
 
-		if (first_group + i >= EXT4_SB(sb)->s_groups_count)
+		if (first_group + i >= ngroups)
 			break;
 
 		err = -EIO;
@@ -852,7 +854,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 		struct ext4_group_info *grinfo;
 
 		group = (first_block + i) >> 1;
-		if (group >= EXT4_SB(sb)->s_groups_count)
+		if (group >= ngroups)
 			break;
 
 		/*
@@ -1788,6 +1790,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
 	int block, pnum;
 	int blocks_per_page;
 	int groups_per_page;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t first_group;
 	struct ext4_group_info *grp;
 
@@ -1807,7 +1810,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
 	/* read all groups the page covers into the cache */
 	for (i = 0; i < groups_per_page; i++) {
 
-		if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+		if ((first_group + i) >= ngroups)
 			break;
 		grp = ext4_get_group_info(sb, first_group + i);
 		/* take all groups write allocation
@@ -1945,8 +1948,7 @@ err:
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
-	ext4_group_t group;
-	ext4_group_t i;
+	ext4_group_t ngroups, group, i;
 	int cr;
 	int err = 0;
 	int bsbits;
@@ -1957,6 +1959,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 
 	sb = ac->ac_sb;
 	sbi = EXT4_SB(sb);
+	ngroups = ext4_get_groups_count(sb);
 	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
 
 	/* first, try the goal */
@@ -2017,11 +2020,11 @@ repeat:
 		 */
 		group = ac->ac_g_ex.fe_group;
 
-		for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
+		for (i = 0; i < ngroups; group++, i++) {
 			struct ext4_group_info *grp;
 			struct ext4_group_desc *desc;
 
-			if (group == EXT4_SB(sb)->s_groups_count)
+			if (group == ngroups)
 				group = 0;
 
 			/* quick check to skip empty groups */
@@ -2315,12 +2318,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 
-	if (*pos < 0 || *pos >= sbi->s_groups_count)
+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
 		return NULL;
-
 	group = *pos + 1;
 	return (void *) ((unsigned long) group);
 }
@@ -2328,11 +2329,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 
 	++*pos;
-	if (*pos < 0 || *pos >= sbi->s_groups_count)
+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
 		return NULL;
 	group = *pos + 1;
 	return (void *) ((unsigned long) group);
@@ -2587,6 +2587,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
 
 static int ext4_mb_init_backend(struct super_block *sb)
 {
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t i;
 	int metalen;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2599,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	struct ext4_group_desc *desc;
 
 	/* This is the number of blocks used by GDT */
-	num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
+	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
 				1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
 
 	/*
@@ -2644,7 +2645,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	for (i = 0; i < num_meta_group_infos; i++) {
 		if ((i + 1) == num_meta_group_infos)
 			metalen = sizeof(*meta_group_info) *
-				(sbi->s_groups_count -
+				(ngroups -
 					(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
 		if (meta_group_info == NULL) {
@@ -2655,7 +2656,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 		sbi->s_group_info[i] = meta_group_info;
 	}
 
-	for (i = 0; i < sbi->s_groups_count; i++) {
+	for (i = 0; i < ngroups; i++) {
 		desc = ext4_get_group_desc(sb, i, NULL);
 		if (desc == NULL) {
 			printk(KERN_ERR
@@ -2781,13 +2782,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 
 int ext4_mb_release(struct super_block *sb)
 {
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	ext4_group_t i;
 	int num_meta_group_infos;
 	struct ext4_group_info *grinfo;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_group_info) {
-		for (i = 0; i < sbi->s_groups_count; i++) {
+		for (i = 0; i < ngroups; i++) {
 			grinfo = ext4_get_group_info(sb, i);
 #ifdef DOUBLE_CHECK
 			kfree(grinfo->bb_bitmap);
@@ -2797,7 +2799,7 @@ int ext4_mb_release(struct super_block *sb)
 			ext4_unlock_group(sb, i);
 			kfree(grinfo);
 		}
-		num_meta_group_infos = (sbi->s_groups_count +
+		num_meta_group_infos = (ngroups +
 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 		for (i = 0; i < num_meta_group_infos; i++)
@@ -4121,7 +4123,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
-	ext4_group_t i;
+	ext4_group_t ngroups, i;
 
 	printk(KERN_ERR "EXT4-fs: Can't allocate:"
 			" Allocation context details:\n");
@@ -4145,7 +4147,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 	printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
 		ac->ac_found);
 	printk(KERN_ERR "EXT4-fs: groups: \n");
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+	ngroups = ext4_get_groups_count(sb);
+	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
 		struct ext4_prealloc_space *pa;
 		ext4_grpblk_t start;
@@ -4469,13 +4472,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 
 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 {
-	ext4_group_t i;
+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 	int ret;
 	int freed = 0;
 
 	trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
 		   sb->s_id, needed);
-	for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
+	for (i = 0; i < ngroups && needed > 0; i++) {
 		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
 		freed += ret;
 		needed -= ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 68c3a44..fcd7b24 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3557,9 +3557,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 	if (test_opt(sb, MINIX_DF)) {
 		sbi->s_overhead_last = 0;
 	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
-		ext4_group_t ngroups = sbi->s_groups_count, i;
+		ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 		ext4_fsblk_t overhead = 0;
-		smp_rmb();
 
 		/*
 		 * Compute the overhead (FS structures).  This is constant
-- 
cgit v0.10.2


From 114e9fc90703bd6aac0229fb559e97caa6c49770 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Apr 2009 15:48:07 -0400
Subject: ext4: Remove outdated comment about lock_super()

ext4_fill_super() is no longer called by read_super(), and it is no
longer called with the superblock locked.  The
unlock_super()/lock_super() is no longer present, so this comment is
entirely superfluous.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fcd7b24..e3b35f2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2828,14 +2828,6 @@ no_journal:
 		goto failed_mount4;
 	};
 
-	/*
-	 * akpm: core read_super() calls in here with the superblock locked.
-	 * That deadlocks, because orphan cleanup needs to lock the superblock
-	 * in numerous places.  Here we just pop the lock - it's relatively
-	 * harmless, because we are now ready to accept write_super() requests,
-	 * and aviro says that's the only reason for hanging onto the
-	 * superblock lock.
-	 */
 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
 	ext4_orphan_cleanup(sb, es);
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
-- 
cgit v0.10.2


From a63c9eb2ce6f5028da90f282798232c4f398ceb8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 01:59:42 -0400
Subject: ext4: ext4_mark_recovery_complete() doesn't need to use lock_super

The function ext4_mark_recovery_complete() is called from two call
paths: either (a) while mounting the filesystem, in which case there's
no danger of any other CPU calling write_super() until the mount is
completed, and (b) while remounting the filesystem read-write, in
which case the fs core has already locked the superblock.  This also
allows us to take out a very vile unlock_super()/lock_super() pair in
ext4_remount().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e3b35f2..45d0ada 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3219,13 +3219,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
 	if (jbd2_journal_flush(journal) < 0)
 		goto out;
 
-	lock_super(sb);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		ext4_commit_super(sb, 1);
 	}
-	unlock_super(sb);
 
 out:
 	jbd2_journal_unlock_updates(journal);
@@ -3436,15 +3434,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			/*
-			 * We have to unlock super so that we can wait for
-			 * transactions.
-			 */
-			if (sbi->s_journal) {
-				unlock_super(sb);
+			if (sbi->s_journal)
 				ext4_mark_recovery_complete(sb, es);
-				lock_super(sb);
-			}
 		} else {
 			int ret;
 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
-- 
cgit v0.10.2


From 3b9d4ed26680771295d904a6b83e88e620780893 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Apr 2009 22:54:04 -0400
Subject: ext4: Replace lock/unlock_super() with an explicit lock for the
 orphan list

Use a separate lock to protect the orphan list, so we can stop
overloading the use of lock_super().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 57b71fe..4bda2f7 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -71,6 +71,7 @@ struct ext4_sb_info {
 	struct inode *s_journal_inode;
 	struct journal_s *s_journal;
 	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
 	unsigned long s_commit_interval;
 	u32 s_max_batch_time;
 	u32 s_min_batch_time;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1..8018e49 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	if (!ext4_handle_valid(handle))
 		return 0;
 
-	lock_super(sb);
+	mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
 	if (!list_empty(&EXT4_I(inode)->i_orphan))
 		goto out_unlock;
 
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 
 	/* @@@ FIXME: Observation from aviro:
 	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
-	 * here (on lock_super()), so race with ext4_link() which might bump
+	 * here (on s_orphan_lock), so race with ext4_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
+	 *
+	 * tytso, 4/25/2009: I'm not sure how that could happen;
+	 * shouldn't the fs core protect us from these sort of
+	 * unlink()/link() races?
 	 */
 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-	unlock_super(sb);
+	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
 	ext4_std_error(inode->i_sb, err);
 	return err;
 }
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 	if (!ext4_handle_valid(handle))
 		return 0;
 
-	lock_super(inode->i_sb);
-	if (list_empty(&ei->i_orphan)) {
-		unlock_super(inode->i_sb);
-		return 0;
-	}
+	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
+	if (list_empty(&ei->i_orphan))
+		goto out;
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 out_err:
 	ext4_std_error(inode->i_sb, err);
 out:
-	unlock_super(inode->i_sb);
+	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
 	return err;
 
 out_brelse:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 45d0ada..7f43fde 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2645,6 +2645,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sb->dq_op = &ext4_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+	mutex_init(&sbi->s_orphan_lock);
 
 	sb->s_root = NULL;
 
-- 
cgit v0.10.2


From 32ed5058ce90024efcd811254b4b1de0468099df Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Apr 2009 22:53:39 -0400
Subject: ext4: Replace lock/unlock_super() with an explicit lock for resizing

Use a separate lock to protect s_groups_count and the other block
group descriptors which get changed via an on-line resize operation,
so we can stop overloading the use of lock_super().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 4bda2f7..2d36223 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -72,6 +72,7 @@ struct ext4_sb_info {
 	struct journal_s *s_journal;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
 	unsigned long s_commit_interval;
 	u32 s_max_batch_time;
 	u32 s_min_batch_time;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd..e8ded13 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -193,7 +193,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -302,7 +302,7 @@ exit_bh:
 	brelse(bh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
@@ -643,11 +643,12 @@ exit_free:
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -809,7 +810,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext4_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -840,7 +841,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         /*
          * OK, now we've set up the new group.  Time to make it active.
          *
-         * Current kernels don't lock all allocations via lock_super(),
+         * We do not lock all allocations via s_resize_lock
          * so we have to be safe wrt. concurrent accesses the group
          * data.  So we need to be careful to set all of the relevant
          * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -948,7 +949,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	sb->s_dirt = 1;
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -986,7 +987,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = ext4_blocks_count(es);
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
@@ -1056,11 +1057,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&EXT4_SB(sb)->s_resize_lock);
 	if (o_blocks_count != ext4_blocks_count(es)) {
 		ext4_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
@@ -1070,14 +1071,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 						 EXT4_SB(sb)->s_sbh))) {
 		ext4_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		goto exit_put;
 	}
 	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
-	unlock_super(sb);
+	mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	/* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7f43fde..1fbf090 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2646,6 +2646,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
 
 	sb->s_root = NULL;
 
-- 
cgit v0.10.2


From 701970b3a83cc639c1ec8fc6f40a7871cb99426f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 23:11:22 -0400
Subject: tracing/events: make modules have their own file_operations structure

For proper module reference counting, the file_operations that modules use
must have the "owner" field set to the module. Unfortunately, the trace events
use share file_operations. The same file_operations are used by all both
kernel core and all modules.

This patch makes the modules allocate their own file_operations and
copies the functions from the core kernel. This allows those file
operations to be owned by the module.

Care is taken to free this code on module unload.

Thanks to Greg KH for reminding me that file_operations must be owned
by the module to have reference counting take place.

[ Impact: fix modular tracepoints / potential crash ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b920815..be4d3a4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -770,7 +770,11 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 }
 
 static int
-event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
+event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
+		 const struct file_operations *id,
+		 const struct file_operations *enable,
+		 const struct file_operations *filter,
+		 const struct file_operations *format)
 {
 	struct dentry *entry;
 	int ret;
@@ -800,11 +804,11 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 
 	if (call->regfunc)
 		entry = trace_create_file("enable", 0644, call->dir, call,
-					  &ftrace_enable_fops);
+					  enable);
 
 	if (call->id)
 		entry = trace_create_file("id", 0444, call->dir, call,
-					  &ftrace_event_id_fops);
+					  id);
 
 	if (call->define_fields) {
 		ret = call->define_fields();
@@ -814,7 +818,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 			return ret;
 		}
 		entry = trace_create_file("filter", 0644, call->dir, call,
-					  &ftrace_event_filter_fops);
+					  filter);
 	}
 
 	/* A trace may not want to export its format */
@@ -822,7 +826,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 		return 0;
 
 	entry = trace_create_file("format", 0444, call->dir, call,
-				  &ftrace_event_format_fops);
+				  format);
 
 	return 0;
 }
@@ -833,8 +837,60 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 	     event++)
 
 #ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+	struct list_head		list;
+	struct module			*mod;
+	struct file_operations		id;
+	struct file_operations		enable;
+	struct file_operations		format;
+	struct file_operations		filter;
+};
+
+static struct ftrace_module_file_ops *
+trace_create_file_ops(struct module *mod)
+{
+	struct ftrace_module_file_ops *file_ops;
+
+	/*
+	 * This is a bit of a PITA. To allow for correct reference
+	 * counting, modules must "own" their file_operations.
+	 * To do this, we allocate the file operations that will be
+	 * used in the event directory.
+	 */
+
+	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
+	if (!file_ops)
+		return NULL;
+
+	file_ops->mod = mod;
+
+	file_ops->id = ftrace_event_id_fops;
+	file_ops->id.owner = mod;
+
+	file_ops->enable = ftrace_enable_fops;
+	file_ops->enable.owner = mod;
+
+	file_ops->filter = ftrace_event_filter_fops;
+	file_ops->filter.owner = mod;
+
+	file_ops->format = ftrace_event_format_fops;
+	file_ops->format.owner = mod;
+
+	list_add(&file_ops->list, &ftrace_module_file_list);
+
+	return file_ops;
+}
+
 static void trace_module_add_events(struct module *mod)
 {
+	struct ftrace_module_file_ops *file_ops = NULL;
 	struct ftrace_event_call *call, *start, *end;
 	struct dentry *d_events;
 
@@ -852,14 +908,27 @@ static void trace_module_add_events(struct module *mod)
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
+
+		/*
+		 * This module has events, create file ops for this module
+		 * if not already done.
+		 */
+		if (!file_ops) {
+			file_ops = trace_create_file_ops(mod);
+			if (!file_ops)
+				return;
+		}
 		call->mod = mod;
 		list_add(&call->list, &ftrace_events);
-		event_create_dir(call, d_events);
+		event_create_dir(call, d_events,
+				 &file_ops->id, &file_ops->enable,
+				 &file_ops->filter, &file_ops->format);
 	}
 }
 
 static void trace_module_remove_events(struct module *mod)
 {
+	struct ftrace_module_file_ops *file_ops;
 	struct ftrace_event_call *call, *p;
 
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
@@ -874,6 +943,16 @@ static void trace_module_remove_events(struct module *mod)
 			list_del(&call->list);
 		}
 	}
+
+	/* Now free the file_operations */
+	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
+		if (file_ops->mod == mod)
+			break;
+	}
+	if (&file_ops->list != &ftrace_module_file_list) {
+		list_del(&file_ops->list);
+		kfree(file_ops);
+	}
 }
 
 static int trace_module_notify(struct notifier_block *self,
@@ -954,7 +1033,9 @@ static __init int event_trace_init(void)
 		if (!call->name)
 			continue;
 		list_add(&call->list, &ftrace_events);
-		event_create_dir(call, d_events);
+		event_create_dir(call, d_events, &ftrace_event_id_fops,
+				 &ftrace_enable_fops, &ftrace_event_filter_fops,
+				 &ftrace_event_format_fops);
 	}
 
 	ret = register_module_notifier(&trace_module_nb);
-- 
cgit v0.10.2


From 924a158a12c7e732179dd85ddd20848039e7bd71 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 26 Apr 2009 13:14:52 +0100
Subject: [ARM] Convert pmd_page() to be highmem safe

In the long run, we may want to place page tables in highmem.  However,
pmd_page() has traditionally been coded to convert the physical address
to a virtual one, which won't work with highmem pages.  Instead,
translate the physical address to a PFN, and then convert the PFN to a
struct page instead.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 110295c..1cd2d64 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -342,7 +342,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 	return __va(ptr);
 }
 
-#define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd)))
+#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
-- 
cgit v0.10.2


From 38f7b009a6ae1708fcf0f208aba9a9a4364bcfcf Mon Sep 17 00:00:00 2001
From: Hartley Sweeten <hartleys@visionengravers.com>
Date: Wed, 15 Apr 2009 23:18:26 +0100
Subject: [ARM] 5452/1: ep93x: rtc: use ioremap'ed addresses

Update the rtc-ep93xx driver to use ioremap'ed addresses.

This removes the dependency on <mach/hardware.h> and properly
reports the memory addresses used by the driver in /proc/iomem.

In addition, ep93xx_rtc_init() is updated to use
platform_driver_probe() instead of platform_driver_register().

Also, the device_create_file() calls are now properly checked for
error conditions.  The created sysfs files are also now removed
when the driver is removed.

The version number for the driver has been bumped at the request
of Alessandro Zummo.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index ae24486..c535e88 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -450,10 +450,19 @@ static struct amba_device uart3_device = {
 };
 
 
+static struct resource ep93xx_rtc_resource[] = {
+	{
+		.start		= EP93XX_RTC_PHYS_BASE,
+		.end		= EP93XX_RTC_PHYS_BASE + 0x10c - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
 static struct platform_device ep93xx_rtc_device = {
-       .name           = "ep93xx-rtc",
-       .id             = -1,
-       .num_resources  = 0,
+	.name		= "ep93xx-rtc",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(ep93xx_rtc_resource),
+	.resource	= ep93xx_rtc_resource,
 };
 
 
diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
index f66be12..78ac1bd 100644
--- a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
+++ b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
@@ -147,6 +147,7 @@
 #define EP93XX_PWM_BASE			(EP93XX_APB_VIRT_BASE + 0x00110000)
 
 #define EP93XX_RTC_BASE			(EP93XX_APB_VIRT_BASE + 0x00120000)
+#define EP93XX_RTC_PHYS_BASE		(EP93XX_APB_PHYS_BASE + 0x00120000)
 
 #define EP93XX_SYSCON_BASE		(EP93XX_APB_VIRT_BASE + 0x00130000)
 #define EP93XX_SYSCON_REG(x)		(EP93XX_SYSCON_BASE + (x))
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index f7a3283..551332e 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -12,32 +12,56 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
-#include <mach/hardware.h>
+#include <linux/io.h>
+
+#define EP93XX_RTC_DATA			0x000
+#define EP93XX_RTC_MATCH		0x004
+#define EP93XX_RTC_STATUS		0x008
+#define  EP93XX_RTC_STATUS_INTR		 (1<<0)
+#define EP93XX_RTC_LOAD			0x00C
+#define EP93XX_RTC_CONTROL		0x010
+#define  EP93XX_RTC_CONTROL_MIE		 (1<<0)
+#define EP93XX_RTC_SWCOMP		0x108
+#define  EP93XX_RTC_SWCOMP_DEL_MASK	 0x001f0000
+#define  EP93XX_RTC_SWCOMP_DEL_SHIFT	 16
+#define  EP93XX_RTC_SWCOMP_INT_MASK	 0x0000ffff
+#define  EP93XX_RTC_SWCOMP_INT_SHIFT	 0
+
+#define DRV_VERSION "0.3"
 
-#define EP93XX_RTC_REG(x)	(EP93XX_RTC_BASE + (x))
-#define EP93XX_RTC_DATA		EP93XX_RTC_REG(0x0000)
-#define EP93XX_RTC_LOAD		EP93XX_RTC_REG(0x000C)
-#define EP93XX_RTC_SWCOMP	EP93XX_RTC_REG(0x0108)
-
-#define DRV_VERSION "0.2"
+/*
+ * struct device dev.platform_data is used to store our private data
+ * because struct rtc_device does not have a variable to hold it.
+ */
+struct ep93xx_rtc {
+	void __iomem	*mmio_base;
+};
 
-static int ep93xx_get_swcomp(struct device *dev, unsigned short *preload,
+static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload,
 				unsigned short *delete)
 {
-	unsigned short comp = __raw_readl(EP93XX_RTC_SWCOMP);
+	struct ep93xx_rtc *ep93xx_rtc = dev->platform_data;
+	unsigned long comp;
+
+	comp = __raw_readl(ep93xx_rtc->mmio_base + EP93XX_RTC_SWCOMP);
 
 	if (preload)
-		*preload = comp & 0xffff;
+		*preload = (comp & EP93XX_RTC_SWCOMP_INT_MASK)
+				>> EP93XX_RTC_SWCOMP_INT_SHIFT;
 
 	if (delete)
-		*delete = (comp >> 16) & 0x1f;
+		*delete = (comp & EP93XX_RTC_SWCOMP_DEL_MASK)
+				>> EP93XX_RTC_SWCOMP_DEL_SHIFT;
 
 	return 0;
 }
 
 static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	unsigned long time = __raw_readl(EP93XX_RTC_DATA);
+	struct ep93xx_rtc *ep93xx_rtc = dev->platform_data;
+	unsigned long time;
+
+	 time = __raw_readl(ep93xx_rtc->mmio_base + EP93XX_RTC_DATA);
 
 	rtc_time_to_tm(time, tm);
 	return 0;
@@ -45,7 +69,9 @@ static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 static int ep93xx_rtc_set_mmss(struct device *dev, unsigned long secs)
 {
-	__raw_writel(secs + 1, EP93XX_RTC_LOAD);
+	struct ep93xx_rtc *ep93xx_rtc = dev->platform_data;
+
+	__raw_writel(secs + 1, ep93xx_rtc->mmio_base + EP93XX_RTC_LOAD);
 	return 0;
 }
 
@@ -53,7 +79,7 @@ static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq)
 {
 	unsigned short preload, delete;
 
-	ep93xx_get_swcomp(dev, &preload, &delete);
+	ep93xx_rtc_get_swcomp(dev, &preload, &delete);
 
 	seq_printf(seq, "preload\t\t: %d\n", preload);
 	seq_printf(seq, "delete\t\t: %d\n", delete);
@@ -67,54 +93,104 @@ static const struct rtc_class_ops ep93xx_rtc_ops = {
 	.proc		= ep93xx_rtc_proc,
 };
 
-static ssize_t ep93xx_sysfs_show_comp_preload(struct device *dev,
+static ssize_t ep93xx_rtc_show_comp_preload(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
 	unsigned short preload;
 
-	ep93xx_get_swcomp(dev, &preload, NULL);
+	ep93xx_rtc_get_swcomp(dev, &preload, NULL);
 
 	return sprintf(buf, "%d\n", preload);
 }
-static DEVICE_ATTR(comp_preload, S_IRUGO, ep93xx_sysfs_show_comp_preload, NULL);
+static DEVICE_ATTR(comp_preload, S_IRUGO, ep93xx_rtc_show_comp_preload, NULL);
 
-static ssize_t ep93xx_sysfs_show_comp_delete(struct device *dev,
+static ssize_t ep93xx_rtc_show_comp_delete(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
 	unsigned short delete;
 
-	ep93xx_get_swcomp(dev, NULL, &delete);
+	ep93xx_rtc_get_swcomp(dev, NULL, &delete);
 
 	return sprintf(buf, "%d\n", delete);
 }
-static DEVICE_ATTR(comp_delete, S_IRUGO, ep93xx_sysfs_show_comp_delete, NULL);
+static DEVICE_ATTR(comp_delete, S_IRUGO, ep93xx_rtc_show_comp_delete, NULL);
 
 
-static int __devinit ep93xx_rtc_probe(struct platform_device *dev)
+static int __init ep93xx_rtc_probe(struct platform_device *pdev)
 {
-	struct rtc_device *rtc = rtc_device_register("ep93xx",
-				&dev->dev, &ep93xx_rtc_ops, THIS_MODULE);
+	struct ep93xx_rtc *ep93xx_rtc;
+	struct resource *res;
+	struct rtc_device *rtc;
+	int err;
+
+	ep93xx_rtc = kzalloc(sizeof(struct ep93xx_rtc), GFP_KERNEL);
+	if (ep93xx_rtc == NULL)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL)
+		return -ENXIO;
+
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (res == NULL)
+		return -EBUSY;
+
+	ep93xx_rtc->mmio_base = ioremap(res->start, resource_size(res));
+	if (ep93xx_rtc->mmio_base == NULL) {
+		err = -ENXIO;
+		goto fail;
+	}
 
+	pdev->dev.platform_data = ep93xx_rtc;
+
+	rtc = rtc_device_register(pdev->name,
+				&pdev->dev, &ep93xx_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc)) {
-		return PTR_ERR(rtc);
+		err = PTR_ERR(rtc);
+		goto fail;
 	}
 
-	platform_set_drvdata(dev, rtc);
+	platform_set_drvdata(pdev, rtc);
 
-	device_create_file(&dev->dev, &dev_attr_comp_preload);
-	device_create_file(&dev->dev, &dev_attr_comp_delete);
+	err = device_create_file(&pdev->dev, &dev_attr_comp_preload);
+	if (err)
+		goto fail;
+	err = device_create_file(&pdev->dev, &dev_attr_comp_delete);
+	if (err) {
+		device_remove_file(&pdev->dev, &dev_attr_comp_preload);
+		goto fail;
+	}
 
 	return 0;
+
+fail:
+	if (ep93xx_rtc->mmio_base) {
+		iounmap(ep93xx_rtc->mmio_base);
+		pdev->dev.platform_data = NULL;
+	}
+	release_mem_region(res->start, resource_size(res));
+	return err;
 }
 
-static int __devexit ep93xx_rtc_remove(struct platform_device *dev)
+static int __exit ep93xx_rtc_remove(struct platform_device *pdev)
 {
-	struct rtc_device *rtc = platform_get_drvdata(dev);
+	struct rtc_device *rtc = platform_get_drvdata(pdev);
+	struct ep93xx_rtc *ep93xx_rtc = pdev->dev.platform_data;
+	struct resource *res;
+
+	/* cleanup sysfs */
+	device_remove_file(&pdev->dev, &dev_attr_comp_delete);
+	device_remove_file(&pdev->dev, &dev_attr_comp_preload);
+
+	rtc_device_unregister(rtc);
+
+	iounmap(ep93xx_rtc->mmio_base);
+	pdev->dev.platform_data = NULL;
 
- 	if (rtc)
-		rtc_device_unregister(rtc);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, resource_size(res));
 
-	platform_set_drvdata(dev, NULL);
+	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
@@ -122,23 +198,22 @@ static int __devexit ep93xx_rtc_remove(struct platform_device *dev)
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:ep93xx-rtc");
 
-static struct platform_driver ep93xx_rtc_platform_driver = {
+static struct platform_driver ep93xx_rtc_driver = {
 	.driver		= {
 		.name	= "ep93xx-rtc",
 		.owner	= THIS_MODULE,
 	},
-	.probe		= ep93xx_rtc_probe,
-	.remove		= __devexit_p(ep93xx_rtc_remove),
+	.remove		= __exit_p(ep93xx_rtc_remove),
 };
 
 static int __init ep93xx_rtc_init(void)
 {
-	return platform_driver_register(&ep93xx_rtc_platform_driver);
+        return platform_driver_probe(&ep93xx_rtc_driver, ep93xx_rtc_probe);
 }
 
 static void __exit ep93xx_rtc_exit(void)
 {
-	platform_driver_unregister(&ep93xx_rtc_platform_driver);
+	platform_driver_unregister(&ep93xx_rtc_driver);
 }
 
 MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
-- 
cgit v0.10.2


From 0a3ec21fcd311b26ab0f249d62960e127bc20ca8 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 26 Apr 2009 23:07:42 +0200
Subject: x86: beautify vmlinux_64.lds.S

Beautify vmlinux_64.lds.S:

 - Use tabs for indent
 - Located curly braces like in C code
 - Rearranged a few comments

There is no functional changes in this patch

The beautification is done to prepare a unification
of the _32 and the _64 variants of the linker scripts.

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <20090426210742.GA3464@uranus.ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index c874250..6d5a5b0 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -15,69 +15,79 @@ OUTPUT_ARCH(i386:x86-64)
 ENTRY(phys_startup_64)
 jiffies_64 = jiffies;
 PHDRS {
-	text PT_LOAD FLAGS(5);	/* R_E */
-	data PT_LOAD FLAGS(7);	/* RWE */
-	user PT_LOAD FLAGS(7);	/* RWE */
+	text PT_LOAD FLAGS(5);		/* R_E */
+	data PT_LOAD FLAGS(7);		/* RWE */
+	user PT_LOAD FLAGS(7);		/* RWE */
 	data.init PT_LOAD FLAGS(7);	/* RWE */
 #ifdef CONFIG_SMP
 	percpu PT_LOAD FLAGS(7);	/* RWE */
 #endif
 	data.init2 PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(0);	/* ___ */
+	note PT_NOTE FLAGS(0);		/* ___ */
 }
 SECTIONS
 {
-  . = __START_KERNEL;
-  phys_startup_64 = startup_64 - LOAD_OFFSET;
-  .text :  AT(ADDR(.text) - LOAD_OFFSET) {
-	_text = .;			/* Text and read-only data */
-	/* First the code that has to be first for bootstrapping */
-	*(.text.head)
-	_stext = .;
-	/* Then the rest */
-	TEXT_TEXT
-	SCHED_TEXT
-	LOCK_TEXT
-	KPROBES_TEXT
-	IRQENTRY_TEXT
-	*(.fixup)
-	*(.gnu.warning)
-	_etext = .;		/* End of text section */
-  } :text = 0x9090
-
-  NOTES :text :note
-
-  . = ALIGN(16);		/* Exception table */
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-  	__start___ex_table = .;
-	 *(__ex_table)
-  	__stop___ex_table = .;
-  } :text = 0x9090
-
-  RODATA
-
-  . = ALIGN(PAGE_SIZE);		/* Align data segment to page size boundary */
-				/* Data */
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {
-	DATA_DATA
-	CONSTRUCTORS
-	_edata = .;			/* End of data section */
-	} :data
+	. = __START_KERNEL;
+	phys_startup_64 = startup_64 - LOAD_OFFSET;
+
+	/* Text and read-only data */
+	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
+		_text = .;
+		/* First the code that has to be first for bootstrapping */
+		*(.text.head)
+		_stext = .;
+		/* Then the rest */
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		/* End of text section */
+		_etext = .;
+	} :text = 0x9090
+
+	NOTES :text :note
+
+	/* Exception table */
+	. = ALIGN(16);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		 *(__ex_table)
+		__stop___ex_table = .;
+	} :text = 0x9090
 
+	RODATA
 
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+	/* Align data segment to page size boundary */
 	. = ALIGN(PAGE_SIZE);
-	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-	*(.data.cacheline_aligned)
-  }
-  . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-  	*(.data.read_mostly)
-  }
+	/* Data */
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		DATA_DATA
+		CONSTRUCTORS
+		/* End of data section */
+		_edata = .;
+	} :data
+
+
+	.data.cacheline_aligned :
+		AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+		*(.data.cacheline_aligned)
+	}
+
+	. = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
+	.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+		*(.data.read_mostly)
+	}
 
 #define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
 
 #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
 #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
@@ -85,37 +95,53 @@ SECTIONS
 #define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
 #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
 
-  . = VSYSCALL_ADDR;
-  .vsyscall_0 :	 AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
-  __vsyscall_0 = VSYSCALL_VIRT_ADDR;
+	. = VSYSCALL_ADDR;
+	.vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
+		*(.vsyscall_0)
+	} :user
+
+	__vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
+		*(.vsyscall_fn)
+	}
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
+		*(.vsyscall_gtod_data)
+	}
 
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
-		{ *(.vsyscall_gtod_data) }
-  vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
-  .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
-		{ *(.vsyscall_clock) }
-  vsyscall_clock = VVIRT(.vsyscall_clock);
+	vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+	.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
+		*(.vsyscall_clock)
+	}
+	vsyscall_clock = VVIRT(.vsyscall_clock);
 
 
-  .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
-		{ *(.vsyscall_1) }
-  .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
-		{ *(.vsyscall_2) }
+	.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
+		*(.vsyscall_1)
+	}
+	.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
+		*(.vsyscall_2)
+	}
 
-  .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
-  vgetcpu_mode = VVIRT(.vgetcpu_mode);
+	.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
+		*(.vgetcpu_mode)
+	}
+	vgetcpu_mode = VVIRT(.vgetcpu_mode);
 
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
-  jiffies = VVIRT(.jiffies);
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.jiffies : AT(VLOAD(.jiffies)) {
+		*(.jiffies)
+	}
+	jiffies = VVIRT(.jiffies);
 
-  .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
-		{ *(.vsyscall_3) }
+	.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
+		*(.vsyscall_3)
+	}
 
-  . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
+	. = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
 
 #undef VSYSCALL_ADDR
 #undef VSYSCALL_PHYS_ADDR
@@ -125,156 +151,168 @@ SECTIONS
 #undef VVIRT_OFFSET
 #undef VVIRT
 
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-	. = ALIGN(THREAD_SIZE);	/* init_task */
-	*(.data.init_task)
-  }:data.init
+	/* init_task */
+	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+		. = ALIGN(THREAD_SIZE);
+		*(.data.init_task)
+	} :data.init
 
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	*(.data.page_aligned)
-  }
+	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		*(.data.page_aligned)
+	}
 
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-	/* might get freed after init */
-	. = ALIGN(PAGE_SIZE);
-	__smp_alt_begin = .;
-	__smp_locks = .;
-	*(.smp_locks)
-	__smp_locks_end = .;
+	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+		/* might get freed after init */
+		. = ALIGN(PAGE_SIZE);
+		__smp_alt_begin = .;
+		__smp_locks = .;
+		*(.smp_locks)
+		__smp_locks_end = .;
+		. = ALIGN(PAGE_SIZE);
+		__smp_alt_end = .;
+	}
+
+	/* Init code and data */
 	. = ALIGN(PAGE_SIZE);
-	__smp_alt_end = .;
-  }
-
-  . = ALIGN(PAGE_SIZE);		/* Init code and data */
-  __init_begin = .;	/* paired with __init_end */
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-	_sinittext = .;
-	INIT_TEXT
-	_einittext = .;
-  }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-	__initdata_begin = .;
-	INIT_DATA
-	__initdata_end = .;
-   }
-
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-	. = ALIGN(16);
-	__setup_start = .;
-	*(.init.setup)
-	__setup_end = .;
-  }
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-	__initcall_start = .;
-	INITCALLS
-	__initcall_end = .;
-  }
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-	__con_initcall_start = .;
-	*(.con_initcall.init)
-	__con_initcall_end = .;
-  }
-  .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-	__x86_cpu_dev_start = .;
-	*(.x86_cpu_dev.init)
-	__x86_cpu_dev_end = .;
-  }
-  SECURITY_INIT
-
-  . = ALIGN(8);
-  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-	__parainstructions = .;
-       *(.parainstructions)
-	__parainstructions_end = .;
-  }
-
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+	__init_begin = .;	/* paired with __init_end */
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+		__initdata_begin = .;
+		INIT_DATA
+		__initdata_end = .;
+	}
+
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		. = ALIGN(16);
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+		__x86_cpu_dev_start = .;
+		*(.x86_cpu_dev.init)
+		__x86_cpu_dev_end = .;
+	}
+
+	SECURITY_INIT
+
 	. = ALIGN(8);
-	__alt_instructions = .;
-	*(.altinstructions)
-	__alt_instructions_end = .;
-  }
-  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-	*(.altinstr_replacement)
-  }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-	EXIT_TEXT
-  }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-	EXIT_DATA
-  }
+	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+		__parainstructions = .;
+		*(.parainstructions)
+		__parainstructions_end = .;
+	}
+
+	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+		. = ALIGN(8);
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
+	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+		*(.altinstr_replacement)
+	}
+
+	/*
+	 * .exit.text is discard at runtime, not link time, to deal with
+	 *  references from .altinstructions and .eh_frame
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+		EXIT_TEXT
+	}
+
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+		EXIT_DATA
+	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-	__initramfs_start = .;
-	*(.init.ramfs)
-	__initramfs_end = .;
-  }
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 #endif
 
 #ifdef CONFIG_SMP
-  /*
-   * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-   * output PHDR, so the next output section - __data_nosave - should
-   * start another section data.init2.  Also, pda should be at the head of
-   * percpu area.  Preallocate it and define the percpu offset symbol
-   * so that it can be accessed as a percpu variable.
-   */
-  . = ALIGN(PAGE_SIZE);
-  PERCPU_VADDR(0, :percpu)
+	/*
+	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
+	 * output PHDR, so the next output section - __data_nosave - should
+	 * start another section data.init2.  Also, pda should be at the head of
+	 * percpu area.  Preallocate it and define the percpu offset symbol
+	 * so that it can be accessed as a percpu variable.
+	 */
+	. = ALIGN(PAGE_SIZE);
+	PERCPU_VADDR(0, :percpu)
 #else
-  PERCPU(PAGE_SIZE)
+	PERCPU(PAGE_SIZE)
 #endif
 
-  . = ALIGN(PAGE_SIZE);
-  __init_end = .;
-
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	__nosave_begin = .;
-	*(.data.nosave)
-	. = ALIGN(PAGE_SIZE);
-	__nosave_end = .;
-  } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
-
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 	. = ALIGN(PAGE_SIZE);
-	__bss_start = .;		/* BSS */
-	*(.bss.page_aligned)
-	*(.bss)
-	__bss_stop = .;
-  }
+	__init_end = .;
+
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	} :data.init2
+	/* use another section data.init2, see PERCPU_VADDR() above */
+
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		__bss_start = .;		/* BSS */
+		*(.bss.page_aligned)
+		*(.bss)
+		__bss_stop = .;
+	}
 
-  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE);
-	__brk_base = . ;
- 	. += 64 * 1024 ;	/* 64k alignment slop space */
-	*(.brk_reservation)	/* areas brk users have reserved */
-	__brk_limit = . ;
-  }
-
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	*(.exitcall.exit)
-	*(.eh_frame)
-	*(.discard)
+	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		__brk_base = .;
+		. += 64 * 1024;		/* 64k alignment slop space */
+		*(.brk_reservation)	/* areas brk users have reserved */
+		__brk_limit = .;
 	}
 
-  STABS_DEBUG
+	_end = . ;
 
-  DWARF_DEBUG
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.exitcall.exit)
+		*(.eh_frame)
+		*(.discard)
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
 }
 
- /*
-  * Per-cpu symbols which need to be offset from __per_cpu_load
-  * for the boot processor.
-  */
+/*
+ * Per-cpu symbols which need to be offset from __per_cpu_load
+ * for the boot processor.
+ */
 #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
 INIT_PER_CPU(gdt_page);
 INIT_PER_CPU(irq_stack_union);
-- 
cgit v0.10.2


From 51b26ada79b605ed709ddcedbb6012e8f8e0ebed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 26 Apr 2009 10:12:47 -0700
Subject: x86: unify arch/x86/boot/compressed/vmlinux_*.lds

Look at the:

	diff -u arch/x86/boot/compressed/vmlinux_*.lds

output and realize that they're basially exactly the same except for
trivial naming differences, and the fact that the 64-bit version has a
"pgtable" thing.

So unify them.

There's some trivial cleanup there (make the output format a Kconfig thing
rather than doing #ifdef's for it, and unify both 32-bit and 64-bit BSS
end to "_ebss", where 32-bit used to use the traditional "_end"), but
other than that it's really very mindless and straigt conversion.

For example, I think we should aim to remove "startup_32" vs "startup_64",
and just call it "startup", and get rid of one more difference. I didn't
do that.

Also, notice the comment in the unified vmlinux.lds.S talks about
"head_64" and "startup_32" which is an odd and incorrect mix, but that was
actually what the old 64-bit only lds file had, so the confusion isn't
new, and now that mixing is arguably more accurate thanks to the
vmlinux.lds.S file being shared between the two cases ;)

[ Impact: cleanup, unification ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bc25b9f..039c3f0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,11 @@ config X86
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
 
+config OUTPUT_FORMAT
+	string
+	default "elf32-i386" if X86_32
+	default "elf64-x86-64" if X86_64
+
 config ARCH_DEFCONFIG
 	string
 	default "arch/x86/configs/i386_defconfig" if X86_32
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 65551c9..0f4b5e2 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS_vmlinux := -T
 
-$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3a8a866..85bd328 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -88,9 +88,9 @@ ENTRY(startup_32)
  * where decompression in place becomes safe.
  */
 	pushl %esi
-	leal _end(%ebp), %esi
-	leal _end(%ebx), %edi
-	movl $(_end - startup_32), %ecx
+	leal _ebss(%ebp), %esi
+	leal _ebss(%ebx), %edi
+	movl $(_ebss - startup_32), %ecx
 	std
 	rep
 	movsb
@@ -121,7 +121,7 @@ relocated:
  */
 	xorl %eax,%eax
 	leal _edata(%ebx),%edi
-	leal _end(%ebx), %ecx
+	leal _ebss(%ebx), %ecx
 	subl %edi,%ecx
 	cld
 	rep
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
new file mode 100644
index 0000000..ffcb1913
--- /dev/null
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,57 @@
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#ifdef CONFIG_X86_64
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+#else
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+#endif
+
+SECTIONS
+{
+	/* Be careful parts of head_64.S assume startup_32 is at
+	 * address 0.
+	 */
+	. = 0;
+	.text.head : {
+		_head = . ;
+		*(.text.head)
+		_ehead = . ;
+	}
+	.rodata.compressed : {
+		*(.rodata.compressed)
+	}
+	.text :	{
+		_text = .; 	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+#ifdef CONFIG_X86_64
+		. = ALIGN(8);
+		_end_before_pgt = . ;
+		. = ALIGN(4096);
+		pgtable = . ;
+		. = . + 4096 * 6;
+#endif
+		_ebss = .;
+	}
+}
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
deleted file mode 100644
index bb3c483..0000000
--- a/arch/x86/boot/compressed/vmlinux_32.lds
+++ /dev/null
@@ -1,43 +0,0 @@
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(startup_32)
-SECTIONS
-{
-	/* Be careful parts of head_32.S assume startup_32 is at
-	 * address 0.
-	 */
-	. = 0;
-	.text.head : {
-		_head = . ;
-		*(.text.head)
-		_ehead = . ;
-	}
-	.rodata.compressed : {
-		*(.rodata.compressed)
-	}
-	.text :	{
-		_text = .; 	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		_end = . ;
-	}
-}
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds
deleted file mode 100644
index bef1ac8..0000000
--- a/arch/x86/boot/compressed/vmlinux_64.lds
+++ /dev/null
@@ -1,48 +0,0 @@
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(startup_64)
-SECTIONS
-{
-	/* Be careful parts of head_64.S assume startup_32 is at
-	 * address 0.
-	 */
-	. = 0;
-	.text.head : {
-		_head = . ;
-		*(.text.head)
-		_ehead = . ;
-	}
-	.rodata.compressed : {
-		*(.rodata.compressed)
-	}
-	.text :	{
-		_text = .; 	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(8);
-		_end_before_pgt = . ;
-		. = ALIGN(4096);
-		pgtable = . ;
-		. = . + 4096 * 6;
-		_ebss = .;
-	}
-}
-- 
cgit v0.10.2


From fc4967b8c6a1540ebce9ac48e44b8d44e7fac971 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 27 Apr 2009 14:06:26 +0900
Subject: sh: update defconfigs for PCI changes.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index c8d982a..022f70e 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:46:53 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:42:06 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -74,6 +75,7 @@ CONFIG_EMBEDDED=y
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,12 +94,15 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +115,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +163,7 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 CONFIG_CPU_SUBTYPE_SH7723=y
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -168,8 +173,6 @@ CONFIG_CPU_SUBTYPE_SH7723=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -573,6 +576,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -695,6 +699,7 @@ CONFIG_DEVKMEM=y
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=6
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -1030,6 +1035,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -1052,6 +1058,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1100,6 +1111,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1191,10 +1203,24 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1303,6 +1329,7 @@ CONFIG_CRYPTO_CBC=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig
index fa5fc1e..40301f8 100644
--- a/arch/sh/configs/cayman_defconfig
+++ b/arch/sh/configs/cayman_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:49:14 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:42:53 2009
 #
 CONFIG_SUPERH=y
 # CONFIG_SUPERH32 is not set
@@ -40,6 +40,7 @@ CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SWAP=y
 # CONFIG_SYSVIPC is not set
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -70,6 +71,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -89,10 +91,13 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +110,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -127,39 +131,6 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 # System type
 #
 CONFIG_CPU_SH5=y
-# CONFIG_CPU_SUBTYPE_SH7619 is not set
-# CONFIG_CPU_SUBTYPE_SH7201 is not set
-# CONFIG_CPU_SUBTYPE_SH7203 is not set
-# CONFIG_CPU_SUBTYPE_SH7206 is not set
-# CONFIG_CPU_SUBTYPE_SH7263 is not set
-# CONFIG_CPU_SUBTYPE_MXG is not set
-# CONFIG_CPU_SUBTYPE_SH7705 is not set
-# CONFIG_CPU_SUBTYPE_SH7706 is not set
-# CONFIG_CPU_SUBTYPE_SH7707 is not set
-# CONFIG_CPU_SUBTYPE_SH7708 is not set
-# CONFIG_CPU_SUBTYPE_SH7709 is not set
-# CONFIG_CPU_SUBTYPE_SH7710 is not set
-# CONFIG_CPU_SUBTYPE_SH7712 is not set
-# CONFIG_CPU_SUBTYPE_SH7720 is not set
-# CONFIG_CPU_SUBTYPE_SH7721 is not set
-# CONFIG_CPU_SUBTYPE_SH7750 is not set
-# CONFIG_CPU_SUBTYPE_SH7091 is not set
-# CONFIG_CPU_SUBTYPE_SH7750R is not set
-# CONFIG_CPU_SUBTYPE_SH7750S is not set
-# CONFIG_CPU_SUBTYPE_SH7751 is not set
-# CONFIG_CPU_SUBTYPE_SH7751R is not set
-# CONFIG_CPU_SUBTYPE_SH7760 is not set
-# CONFIG_CPU_SUBTYPE_SH4_202 is not set
-# CONFIG_CPU_SUBTYPE_SH7723 is not set
-# CONFIG_CPU_SUBTYPE_SH7763 is not set
-# CONFIG_CPU_SUBTYPE_SH7770 is not set
-# CONFIG_CPU_SUBTYPE_SH7780 is not set
-# CONFIG_CPU_SUBTYPE_SH7785 is not set
-# CONFIG_CPU_SUBTYPE_SH7786 is not set
-# CONFIG_CPU_SUBTYPE_SHX3 is not set
-# CONFIG_CPU_SUBTYPE_SH7343 is not set
-# CONFIG_CPU_SUBTYPE_SH7722 is not set
-# CONFIG_CPU_SUBTYPE_SH7366 is not set
 CONFIG_CPU_SUBTYPE_SH5_101=y
 # CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
@@ -279,8 +250,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -492,6 +461,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -568,6 +538,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -591,6 +562,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -779,6 +751,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -1042,7 +1015,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1082,6 +1054,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1105,6 +1078,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1146,8 +1124,13 @@ CONFIG_MINIX_FS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1208,6 +1191,9 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 CONFIG_SCHEDSTATS=y
 # CONFIG_TIMER_STATS is not set
@@ -1241,15 +1227,21 @@ CONFIG_FRAME_POINTER=y
 # CONFIG_LATENCYTOP is not set
 # CONFIG_SYSCTL_SYSCALL_CHECK is not set
 # CONFIG_PAGE_POISONING is not set
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 # CONFIG_EARLY_SCIF_CONSOLE is not set
 # CONFIG_DEBUG_BOOTMEM is not set
@@ -1354,6 +1346,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index 5c11236..1f3cc98 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:51:48 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:44:27 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -71,6 +72,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -90,6 +92,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -98,6 +101,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +115,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +160,7 @@ CONFIG_CPU_SUBTYPE_SH7091=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +170,6 @@ CONFIG_CPU_SUBTYPE_SH7091=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -271,7 +274,7 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_SH_DMA_API=y
 CONFIG_SH_DMA=y
 CONFIG_SH_DMA_IRQ_MULTI=y
-CONFIG_NR_ONCHIP_DMA_CHANNELS=6
+CONFIG_NR_ONCHIP_DMA_CHANNELS=4
 CONFIG_NR_DMA_CHANNELS_BOOL=y
 CONFIG_NR_DMA_CHANNELS=9
 # CONFIG_PVR2_DMA is not set
@@ -320,7 +323,6 @@ CONFIG_CMDLINE="console=ttySC1,115200 panic=3"
 CONFIG_MAPLE=y
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -602,6 +604,7 @@ CONFIG_INPUT_MOUSE=y
 # CONFIG_MOUSE_APPLETOUCH is not set
 # CONFIG_MOUSE_BCM5974 is not set
 # CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_MOUSE_MAPLE is not set
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 # CONFIG_INPUT_TOUCHSCREEN is not set
@@ -812,7 +815,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -867,6 +869,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -910,6 +917,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -947,10 +955,24 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1051,6 +1073,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig
index f4c34b0..d709245 100644
--- a/arch/sh/configs/edosk7705_defconfig
+++ b/arch/sh/configs/edosk7705_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:54:02 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:45:04 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -56,6 +57,7 @@ CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 # CONFIG_PRINTK is not set
 # CONFIG_BUG is not set
@@ -74,12 +76,15 @@ CONFIG_SHMEM=y
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_BASE_SMALL=1
 # CONFIG_MODULES is not set
@@ -114,6 +119,7 @@ CONFIG_CPU_SUBTYPE_SH7705=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -123,8 +129,6 @@ CONFIG_CPU_SUBTYPE_SH7705=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -382,6 +386,10 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+
+#
 # Pseudo filesystems
 #
 # CONFIG_PROC_FS is not set
@@ -409,10 +417,22 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -426,6 +446,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
index 7825c26..a822b1d 100644
--- a/arch/sh/configs/edosk7760_defconfig
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:54:57 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:45:25 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -67,7 +69,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -77,6 +78,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -96,6 +98,7 @@ CONFIG_COMPAT_BRK=y
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -103,6 +106,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -115,7 +120,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -161,6 +165,7 @@ CONFIG_CPU_SH4=y
 CONFIG_CPU_SUBTYPE_SH7760=y
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -170,8 +175,6 @@ CONFIG_CPU_SUBTYPE_SH7760=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -815,6 +818,7 @@ CONFIG_EXT2_FS_XATTR=y
 # CONFIG_EXT2_FS_SECURITY is not set
 CONFIG_EXT2_FS_XIP=y
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -839,6 +843,11 @@ CONFIG_INOTIFY_USER=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -883,6 +892,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -964,6 +974,9 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 CONFIG_TIMER_STATS=y
@@ -1001,6 +1014,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1010,9 +1024,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1126,6 +1145,7 @@ CONFIG_CRYPTO_DES=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index ebb4c37..c5b5007 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 17:58:18 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:46:26 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -78,6 +79,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -106,6 +108,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -164,6 +167,7 @@ CONFIG_CPU_SH4A=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 CONFIG_CPU_SUBTYPE_SH7763=y
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -173,8 +177,6 @@ CONFIG_CPU_SUBTYPE_SH7763=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -548,6 +550,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -919,6 +922,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -943,6 +947,11 @@ CONFIG_AUTOFS4_FS=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -985,8 +994,13 @@ CONFIG_CRAMFS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1075,11 +1089,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1179,6 +1207,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index 82b113a..8e13027 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:01:05 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:47:15 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -67,6 +68,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -85,12 +87,15 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -98,7 +103,6 @@ CONFIG_BASE_SMALL=0
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -144,6 +148,7 @@ CONFIG_CPU_SUBTYPE_SH7709=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -153,8 +158,6 @@ CONFIG_CPU_SUBTYPE_SH7709=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -385,6 +388,7 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_SRP_ATTRS is not set
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
@@ -431,6 +435,7 @@ CONFIG_KEYBOARD_HP6XX=y
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -674,6 +679,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -719,6 +729,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -786,10 +797,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -898,6 +922,7 @@ CONFIG_CRYPTO_MD5=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index b6fa4a7..7f549ae 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:02:54 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:47:48 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,6 +70,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +90,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +98,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -107,7 +112,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -153,6 +157,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -162,8 +167,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -292,8 +295,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -602,6 +603,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -706,6 +708,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -729,6 +732,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -922,6 +926,7 @@ CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
 # CONFIG_SOC_CAMERA is not set
 CONFIG_V4L_USB_DRIVERS=y
 # CONFIG_USB_VIDEO_CLASS is not set
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
 # CONFIG_USB_GSPCA is not set
 # CONFIG_VIDEO_HDPVR is not set
 CONFIG_VIDEO_USBVIDEO=m
@@ -994,15 +999,17 @@ CONFIG_USB_HID=m
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=m
 CONFIG_HID_APPLE=m
 CONFIG_HID_BELKIN=m
 CONFIG_HID_CHERRY=m
 CONFIG_HID_CHICONY=m
 CONFIG_HID_CYPRESS=m
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=m
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=m
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=m
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1197,6 +1204,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1222,6 +1230,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1270,10 +1283,15 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 CONFIG_UFS_FS=m
 # CONFIG_UFS_FS_WRITE is not set
 # CONFIG_UFS_DEBUG is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
@@ -1364,10 +1382,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -1469,6 +1500,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 92c515c..a7db539 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:06:51 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:48:54 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,6 +70,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +90,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +98,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -107,7 +112,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -153,6 +157,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -162,8 +167,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -293,8 +296,6 @@ CONFIG_CMDLINE="console=ttySC1,115200 root=/dev/sda1"
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -542,6 +543,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -702,6 +704,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -725,6 +728,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -931,7 +935,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1007,6 +1010,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1029,6 +1033,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1073,8 +1082,13 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1151,10 +1165,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -1256,6 +1283,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 26586c2..58bec61 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:07:39 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:49:32 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -39,6 +40,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 # CONFIG_TASKSTATS is not set
@@ -66,7 +68,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +77,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -94,6 +96,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -101,6 +104,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -113,7 +118,6 @@ CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +163,7 @@ CONFIG_CPU_SUBTYPE_SH7720=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -168,8 +173,6 @@ CONFIG_CPU_SUBTYPE_SH7720=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -813,6 +816,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
 # CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
@@ -831,6 +835,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -884,6 +893,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -965,6 +975,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1000,6 +1011,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1008,9 +1020,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1035,6 +1052,7 @@ CONFIG_DUMP_CODE=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig
index 7517835..2886fc8 100644
--- a/arch/sh/configs/microdev_defconfig
+++ b/arch/sh/configs/microdev_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:11:13 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:50:51 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -65,7 +66,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -74,6 +74,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,12 +93,15 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +109,6 @@ CONFIG_BASE_SMALL=0
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -151,6 +154,7 @@ CONFIG_CPU_SH4=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 CONFIG_CPU_SUBTYPE_SH4_202=y
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -160,8 +164,6 @@ CONFIG_CPU_SUBTYPE_SH4_202=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -647,6 +649,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -669,6 +672,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -715,6 +723,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -802,10 +811,24 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -914,6 +937,7 @@ CONFIG_CRYPTO_DES=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index a8720f9..8ecceb4 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:14:03 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:51:34 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -67,7 +68,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +76,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -104,6 +105,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -115,7 +118,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -165,6 +167,7 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -174,8 +177,6 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 CONFIG_CPU_SUBTYPE_SH7722=y
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -577,6 +578,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -873,7 +875,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -1012,6 +1013,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1056,6 +1062,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1105,11 +1112,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1218,6 +1239,7 @@ CONFIG_CRYPTO_WORKQUEUE=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig
index df2d177..2b95072 100644
--- a/arch/sh/configs/polaris_defconfig
+++ b/arch/sh/configs/polaris_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:16:48 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:52:19 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@ CONFIG_LOCALVERSION=""
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 # CONFIG_TASKSTATS is not set
@@ -76,6 +78,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -94,6 +97,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -101,6 +105,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -113,7 +119,6 @@ CONFIG_MODVERSIONS=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +164,7 @@ CONFIG_CPU_SUBTYPE_SH7709=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -168,8 +174,6 @@ CONFIG_CPU_SUBTYPE_SH7709=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -778,6 +782,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -831,6 +840,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -874,6 +884,9 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -914,6 +927,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -923,9 +937,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -950,6 +969,7 @@ CONFIG_DUMP_CODE=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index 7def4df..68e5eff 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:20:17 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:53:28 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -78,6 +79,7 @@ CONFIG_UID16=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -107,6 +109,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=0
@@ -118,7 +122,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -165,6 +168,7 @@ CONFIG_CPU_SH4A=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 CONFIG_CPU_SUBTYPE_SH7780=y
@@ -174,8 +178,6 @@ CONFIG_CPU_SUBTYPE_SH7780=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -313,8 +315,6 @@ CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -536,6 +536,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -696,6 +697,7 @@ CONFIG_E1000=m
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -719,6 +721,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -920,6 +923,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -1027,7 +1031,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1120,6 +1123,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1143,6 +1147,11 @@ CONFIG_INOTIFY_USER=y
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1191,6 +1200,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1279,6 +1289,9 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1316,6 +1329,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1325,11 +1339,16 @@ CONFIG_TRACING=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1449,6 +1468,7 @@ CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index cb134ff..890fe3c 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:24:35 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:55:10 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -43,6 +44,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -78,6 +80,7 @@ CONFIG_UID16=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -108,6 +111,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -120,7 +125,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -168,6 +172,7 @@ CONFIG_CPU_SHX2=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -177,8 +182,6 @@ CONFIG_CPU_SUBTYPE_SH7785=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -337,8 +340,6 @@ CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 # CONFIG_PCI_LEGACY is not set
@@ -561,6 +562,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -698,6 +700,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -721,6 +724,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -948,6 +952,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -970,6 +975,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
 # CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_SHT15 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1109,7 +1115,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1202,6 +1207,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1225,6 +1231,11 @@ CONFIG_INOTIFY_USER=y
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1273,6 +1284,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1359,6 +1371,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1401,6 +1414,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1410,11 +1424,16 @@ CONFIG_TRACING=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1534,6 +1553,7 @@ CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig
index a037c74..fa43957 100644
--- a/arch/sh/configs/rsk7201_defconfig
+++ b/arch/sh/configs/rsk7201_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:29:08 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:56:29 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -65,7 +66,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -74,6 +74,7 @@ CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -100,6 +101,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -110,7 +113,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -157,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7201=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -166,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7201=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -603,6 +604,11 @@ CONFIG_EXT2_FS=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -651,8 +657,13 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -686,11 +697,24 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -705,6 +729,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index 9ae28e8..e3a65f8 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:30:34 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:57:06 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -39,6 +40,7 @@ CONFIG_LOCALVERSION=""
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -70,7 +72,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -80,6 +81,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -106,6 +108,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -116,7 +120,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -163,6 +166,7 @@ CONFIG_CPU_SUBTYPE_SH7203=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -172,8 +176,6 @@ CONFIG_CPU_SUBTYPE_SH7203=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -750,15 +752,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -874,6 +878,7 @@ CONFIG_LEDS_CLASS=y
 # LED drivers
 #
 CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_GPIO_PLATFORM=y
 
 #
 # LED Triggers
@@ -882,6 +887,7 @@ CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_BACKLIGHT=y
+# CONFIG_LEDS_TRIGGER_GPIO is not set
 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 
 #
@@ -951,6 +957,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -989,8 +1000,13 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1033,6 +1049,9 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1076,6 +1095,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1083,11 +1103,16 @@ CONFIG_TRACING=y
 # CONFIG_FUNCTION_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1111,6 +1136,7 @@ CONFIG_DUMP_CODE=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index c0f741a..a4a59f6 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:33:25 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:58:13 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -70,6 +71,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -99,6 +101,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +114,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -302,8 +304,6 @@ CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 root=/dev/sda1 earlyprintk=se
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -513,6 +513,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -672,6 +673,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -695,6 +697,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -793,6 +796,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=1
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -1079,15 +1083,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1291,6 +1297,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1337,6 +1348,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1417,11 +1429,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1524,6 +1550,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 8feef62..2bc1c97 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:34:12 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:59:01 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -70,6 +71,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -99,6 +101,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +114,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -302,8 +304,6 @@ CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 root=/dev/sda1 earlyprintk=se
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -513,6 +513,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -672,6 +673,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -695,6 +697,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -793,6 +796,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=1
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -1079,15 +1083,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1291,6 +1297,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1337,6 +1348,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1417,11 +1429,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1524,6 +1550,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 739e229..a629b79 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:34:43 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 12:59:32 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -41,6 +42,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -73,6 +75,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -93,6 +96,7 @@ CONFIG_COMPAT_BRK=y
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -100,6 +104,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -112,7 +118,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 CONFIG_LBD=y
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -159,6 +164,7 @@ CONFIG_CPU_SH4A=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 CONFIG_CPU_SUBTYPE_SH7780=y
@@ -168,8 +174,6 @@ CONFIG_CPU_SUBTYPE_SH7780=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -310,8 +314,6 @@ CONFIG_CMDLINE="mem=128M console=tty0 console=ttySC0,115200 ip=bootp root=/dev/n
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 # CONFIG_PCI_LEGACY is not set
@@ -646,6 +648,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -1091,15 +1094,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1257,6 +1262,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 # CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1281,6 +1287,11 @@ CONFIG_AUTOFS4_FS=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -1331,6 +1342,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1418,6 +1430,9 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 CONFIG_TIMER_STATS=y
@@ -1455,6 +1470,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1464,9 +1480,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1580,6 +1601,7 @@ CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index d30e0a7..5caf85a 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:39:37 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:01:02 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@ CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -63,6 +65,7 @@ CONFIG_CGROUP_DEBUG=y
 CONFIG_CGROUP_NS=y
 # CONFIG_CGROUP_FREEZER is not set
 CONFIG_CGROUP_DEVICE=y
+# CONFIG_CPUSETS is not set
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RESOURCE_COUNTERS=y
 CONFIG_CGROUP_MEM_RES_CTLR=y
@@ -80,7 +83,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -90,6 +92,7 @@ CONFIG_EMBEDDED=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -116,6 +119,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -127,7 +132,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -174,6 +178,7 @@ CONFIG_CPU_SUBTYPE_SH7206=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -183,8 +188,6 @@ CONFIG_CPU_SUBTYPE_SH7206=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -746,6 +749,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -785,8 +793,13 @@ CONFIG_CRAMFS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -831,6 +844,9 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -869,6 +885,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -876,11 +893,16 @@ CONFIG_TRACING=y
 # CONFIG_FUNCTION_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -991,6 +1013,7 @@ CONFIG_CRYPTO_LZO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index fbb72d0..004d531 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:42:00 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:01:44 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@ CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -73,6 +75,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -91,6 +94,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -98,6 +102,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=0
@@ -109,7 +115,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -158,6 +163,7 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -167,8 +173,6 @@ CONFIG_ARCH_SHMOBILE=y
 CONFIG_CPU_SUBTYPE_SH7343=y
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -769,6 +773,7 @@ CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
 # CONFIG_SOC_CAMERA is not set
 CONFIG_V4L_USB_DRIVERS=y
 # CONFIG_USB_VIDEO_CLASS is not set
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
 CONFIG_USB_GSPCA=m
 # CONFIG_USB_M5602 is not set
 # CONFIG_USB_STV06XX is not set
@@ -800,6 +805,7 @@ CONFIG_USB_GSPCA=m
 # CONFIG_VIDEO_PVRUSB2 is not set
 # CONFIG_VIDEO_HDPVR is not set
 # CONFIG_VIDEO_EM28XX is not set
+# CONFIG_VIDEO_CX231XX is not set
 # CONFIG_VIDEO_USBVISION is not set
 # CONFIG_USB_VICAM is not set
 # CONFIG_USB_IBMCAM is not set
@@ -813,6 +819,7 @@ CONFIG_USB_GSPCA=m
 # CONFIG_USB_STV680 is not set
 # CONFIG_USB_ZC0301 is not set
 # CONFIG_USB_PWC is not set
+CONFIG_USB_PWC_INPUT_EVDEV=y
 # CONFIG_USB_ZR364XX is not set
 # CONFIG_USB_STKWEBCAM is not set
 # CONFIG_USB_S2255 is not set
@@ -914,15 +921,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1050,6 +1059,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1071,6 +1081,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1125,6 +1140,7 @@ CONFIG_CRAMFS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1174,10 +1190,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1279,6 +1308,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 125304e..edbece5 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:44:53 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:02:32 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -61,6 +62,7 @@ CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -78,11 +80,14 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=1
@@ -134,6 +139,7 @@ CONFIG_CPU_SUBTYPE_SH7619=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -143,8 +149,6 @@ CONFIG_CPU_SUBTYPE_SH7619=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -513,7 +517,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -564,6 +567,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -601,8 +609,13 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -630,10 +643,21 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -647,6 +671,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index 0308abf..bae161c 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:45:56 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:02:52 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -62,7 +63,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -70,6 +70,7 @@ CONFIG_EMBEDDED=y
 CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,12 +89,15 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -150,6 +154,7 @@ CONFIG_CPU_SUBTYPE_SH7705=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -159,8 +164,6 @@ CONFIG_CPU_SUBTYPE_SH7705=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -698,7 +701,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -752,6 +754,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -804,6 +811,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -847,10 +855,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -949,6 +970,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index a8c24b7..330043f 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:48:18 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:03:27 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -38,6 +39,7 @@ CONFIG_LOCALVERSION=""
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -69,6 +71,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 # CONFIG_BUG is not set
@@ -87,6 +90,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -94,6 +98,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +111,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -151,6 +156,7 @@ CONFIG_CPU_SUBTYPE_SH7712=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -160,8 +166,6 @@ CONFIG_CPU_SUBTYPE_SH7712=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -585,6 +589,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -812,6 +817,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -833,6 +839,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -887,6 +898,7 @@ CONFIG_CRAMFS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -927,6 +939,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -961,6 +974,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -969,9 +983,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1090,6 +1109,7 @@ CONFIG_CRYPTO_DEFLATE=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index 4b79c25..5647891 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:51:44 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:04:19 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -38,6 +39,7 @@ CONFIG_LOCALVERSION=""
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -73,6 +75,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 # CONFIG_BUG is not set
@@ -91,6 +94,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -98,6 +102,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -109,7 +115,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -155,6 +160,7 @@ CONFIG_CPU_SUBTYPE_SH7721=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -164,8 +170,6 @@ CONFIG_CPU_SUBTYPE_SH7721=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -776,15 +780,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -943,6 +949,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -964,6 +971,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1021,6 +1033,7 @@ CONFIG_CRAMFS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
 
 #
@@ -1085,6 +1098,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1119,6 +1133,7 @@ CONFIG_FRAME_POINTER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1127,9 +1142,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1248,6 +1268,7 @@ CONFIG_CRYPTO_DEFLATE=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig
index 82bdaac..726fdbd 100644
--- a/arch/sh/configs/se7722_defconfig
+++ b/arch/sh/configs/se7722_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:55:10 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:05:29 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,7 +70,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -78,6 +78,7 @@ CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -97,6 +98,7 @@ CONFIG_COMPAT_BRK=y
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -105,6 +107,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -167,6 +170,7 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -176,8 +180,6 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 CONFIG_CPU_SUBTYPE_SH7722=y
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -486,6 +488,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -692,7 +695,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -766,6 +768,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -789,6 +792,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -832,6 +840,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -872,11 +881,25 @@ CONFIG_DEBUG_FS=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -977,6 +1000,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index ceef6d9..ed1a123 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:57:31 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:06:02 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -70,6 +71,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +90,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +98,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -106,7 +111,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -152,6 +156,7 @@ CONFIG_CPU_SUBTYPE_SH7750=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -161,8 +166,6 @@ CONFIG_CPU_SUBTYPE_SH7750=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -553,6 +556,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -775,6 +779,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -829,6 +838,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -886,10 +896,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -989,6 +1012,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 67fc26b..55f3b78 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 18:59:59 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:06:44 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -65,7 +66,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -74,6 +74,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,6 +93,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -99,6 +101,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -110,7 +114,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -156,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7751=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -165,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7751=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -742,6 +744,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -796,6 +803,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -833,10 +841,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -936,6 +957,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/se7780_defconfig b/arch/sh/configs/se7780_defconfig
index ebce23c..c4f0af3 100644
--- a/arch/sh/configs/se7780_defconfig
+++ b/arch/sh/configs/se7780_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:02:05 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:07:14 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -67,6 +68,7 @@ CONFIG_EMBEDDED=y
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -86,12 +88,15 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -103,7 +108,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
 #
@@ -149,6 +153,7 @@ CONFIG_CPU_SH4A=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 CONFIG_CPU_SUBTYPE_SH7780=y
@@ -158,8 +163,6 @@ CONFIG_CPU_SUBTYPE_SH7780=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -285,8 +288,6 @@ CONFIG_CMDLINE="console=ttySC0.115200 root=/dev/sda1"
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -558,6 +559,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -957,15 +959,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1122,6 +1126,10 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1245,11 +1253,24 @@ CONFIG_DEBUG_FS=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1345,6 +1366,7 @@ CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 6fcdb09..f9c6e51 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:04:59 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:07:56 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -41,6 +42,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -67,7 +69,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +77,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -105,6 +107,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@ CONFIG_MODVERSIONS=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -163,6 +166,7 @@ CONFIG_CPU_SUBTYPE_SH7751=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -172,8 +176,6 @@ CONFIG_CPU_SUBTYPE_SH7751=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -300,8 +302,6 @@ CONFIG_CMDLINE="console=ttySC1,115200 mem=64M root=/dev/nfs"
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -562,6 +562,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -656,6 +657,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -679,6 +681,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -887,7 +890,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -929,6 +931,7 @@ CONFIG_EXT2_FS_XATTR=y
 # CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -952,6 +955,11 @@ CONFIG_AUTOFS4_FS=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1001,6 +1009,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1112,11 +1121,26 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -1228,6 +1252,7 @@ CONFIG_CRYPTO_DEFLATE=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index 1ab37c0..48b5809 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:09:01 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:09:16 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -39,6 +40,7 @@ CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -72,6 +74,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -90,6 +93,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -97,6 +101,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=0
@@ -108,7 +114,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -154,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7710=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -163,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7710=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -728,7 +732,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
@@ -780,6 +783,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -834,6 +842,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -871,11 +880,24 @@ CONFIG_DEBUG_FS=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -975,6 +997,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig
index 268d04e..ec8f18c 100644
--- a/arch/sh/configs/sh7724_generic_defconfig
+++ b/arch/sh/configs/sh7724_generic_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30-rc2
-# Thu Apr 16 15:42:20 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:09:47 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index c79bb84..f77bc79 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:10:57 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:10:12 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -78,6 +79,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -106,6 +108,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@ CONFIG_MODULES=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -164,6 +167,7 @@ CONFIG_CPU_SH4A=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 CONFIG_CPU_SUBTYPE_SH7763=y
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -173,8 +177,6 @@ CONFIG_CPU_SUBTYPE_SH7763=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -555,6 +557,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -941,6 +944,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -965,6 +969,11 @@ CONFIG_AUTOFS4_FS=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1012,6 +1021,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1100,11 +1110,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1204,6 +1228,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index a6cf4505..1c55b80 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:12:18 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:10:53 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -79,6 +80,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -98,6 +100,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -106,6 +109,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -118,7 +123,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -166,6 +170,7 @@ CONFIG_CPU_SHX2=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -175,8 +180,6 @@ CONFIG_CPU_SUBTYPE_SH7785=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -310,8 +313,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -672,6 +673,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1009,15 +1011,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1218,6 +1222,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1240,6 +1245,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1289,6 +1299,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1377,6 +1388,9 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1413,6 +1427,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1422,9 +1437,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1542,6 +1562,7 @@ CONFIG_CRYPTO_DES=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig
index e4fac2e..4385fe9 100644
--- a/arch/sh/configs/sh7785lcr_defconfig
+++ b/arch/sh/configs/sh7785lcr_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30-rc2
-# Wed Apr 22 19:17:56 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:11:48 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
@@ -307,8 +307,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index d695e90..4e12025 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:19:03 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:12:41 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
@@ -63,6 +64,7 @@ CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 # CONFIG_BUG is not set
@@ -81,12 +83,15 @@ CONFIG_COMPAT_BRK=y
 # CONFIG_SLUB is not set
 CONFIG_SLOB=y
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_BASE_SMALL=1
 # CONFIG_MODULES is not set
@@ -137,6 +142,7 @@ CONFIG_CPU_SUBTYPE_SH7706=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -146,8 +152,6 @@ CONFIG_CPU_SUBTYPE_SH7706=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -675,6 +679,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -718,6 +727,7 @@ CONFIG_CRAMFS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -762,10 +772,22 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_SH_STANDARD_BIOS=y
@@ -864,6 +886,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig
index e3651f5..c088144 100644
--- a/arch/sh/configs/shx3_defconfig
+++ b/arch/sh/configs/shx3_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:20:54 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:13:12 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -42,6 +43,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 # CONFIG_TASKSTATS is not set
@@ -96,6 +98,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -126,6 +129,8 @@ CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_RT_MUTEXES=y
 CONFIG_BASE_SMALL=0
@@ -138,7 +143,6 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -186,6 +190,7 @@ CONFIG_CPU_SHX3=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -195,8 +200,6 @@ CONFIG_CPU_SUBTYPE_SHX3=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -553,6 +556,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -646,6 +650,7 @@ CONFIG_DEVKMEM=y
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=2
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
@@ -985,6 +990,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1008,6 +1014,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1051,6 +1062,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -1085,6 +1097,9 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1124,6 +1139,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1133,11 +1149,16 @@ CONFIG_TRACING=y
 # CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
 # CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1245,6 +1266,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
diff --git a/arch/sh/configs/snapgear_defconfig b/arch/sh/configs/snapgear_defconfig
index 6960f60..54a7a3c 100644
--- a/arch/sh/configs/snapgear_defconfig
+++ b/arch/sh/configs/snapgear_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:21:39 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:14:00 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -64,7 +65,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -73,6 +73,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -92,12 +93,15 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -105,7 +109,6 @@ CONFIG_BASE_SMALL=0
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -151,6 +154,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -160,8 +164,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -295,8 +297,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -763,6 +763,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -805,8 +810,13 @@ CONFIG_CRAMFS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 # CONFIG_NFS_FS is not set
 # CONFIG_NFSD is not set
@@ -844,10 +854,23 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -862,6 +885,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/systemh_defconfig b/arch/sh/configs/systemh_defconfig
index 7ea639b..dbe7e54 100644
--- a/arch/sh/configs/systemh_defconfig
+++ b/arch/sh/configs/systemh_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:23:31 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:14:33 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -61,7 +62,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -70,6 +70,7 @@ CONFIG_UID16=y
 # CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -88,6 +89,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -95,6 +97,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -107,7 +111,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -153,6 +156,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -162,8 +166,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -506,6 +508,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -547,8 +554,13 @@ CONFIG_CRAMFS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -577,10 +589,24 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -595,6 +621,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index bbeb4c6..8ca94ef 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:24:55 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:14:55 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -40,6 +41,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -66,7 +68,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -76,6 +77,7 @@ CONFIG_UID16=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -95,6 +97,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_IOREMAP_PROT=y
@@ -102,6 +105,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -114,7 +119,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -160,6 +164,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -169,8 +174,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -305,8 +308,6 @@ CONFIG_CMDLINE="console=ttySC1,38400N81 root=/dev/nfs ip=:::::eth1:autoconf rw"
 #
 CONFIG_PCI=y
 CONFIG_SH_PCIDMA_NONCOHERENT=y
-CONFIG_PCI_AUTO=y
-CONFIG_PCI_AUTO_UPDATE_RESOURCES=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_PCI_LEGACY=y
@@ -789,6 +790,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -906,6 +908,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -929,6 +932,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1182,7 +1186,6 @@ CONFIG_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1393,6 +1396,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
 # CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
@@ -1419,6 +1423,11 @@ CONFIG_INOTIFY_USER=y
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1467,8 +1476,13 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1576,6 +1590,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1610,6 +1625,7 @@ CONFIG_SCHED_DEBUG=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1618,9 +1634,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -1741,6 +1762,7 @@ CONFIG_CRYPTO_DEFLATE=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig
index 34f5192..bfb4d98 100644
--- a/arch/sh/configs/ul2_defconfig
+++ b/arch/sh/configs/ul2_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:30:27 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 13:17:05 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -69,7 +70,6 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
-CONFIG_INITRAMFS_COMPRESSION_NONE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -78,6 +78,7 @@ CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -97,6 +98,7 @@ CONFIG_COMPAT_BRK=y
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -105,6 +107,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -117,7 +121,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -167,6 +170,7 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -176,8 +180,6 @@ CONFIG_ARCH_SHMOBILE=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 CONFIG_CPU_SUBTYPE_SH7366=y
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -584,6 +586,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
 # CONFIG_SCSI_OSD_INITIATOR is not set
@@ -936,6 +939,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -958,6 +962,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1005,6 +1014,7 @@ CONFIG_CRAMFS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 # CONFIG_NFS_V3 is not set
@@ -1095,10 +1105,24 @@ CONFIG_FRAME_WARN=1024
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1208,6 +1232,7 @@ CONFIG_CRYPTO_ARC4=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index d174b1a..512664f 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -1,10 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Thu Apr  2 19:33:39 2009
+# Linux kernel version: 2.6.30-rc3
+# Mon Apr 27 14:02:55 2009
 #
 CONFIG_SUPERH=y
 CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_BUG=y
@@ -76,6 +77,7 @@ CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -94,6 +96,7 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+# CONFIG_MARKERS is not set
 # CONFIG_OPROFILE is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
@@ -102,6 +105,8 @@ CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -114,7 +119,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -162,6 +166,7 @@ CONFIG_CPU_SHX3=y
 # CONFIG_CPU_SUBTYPE_SH7760 is not set
 # CONFIG_CPU_SUBTYPE_SH4_202 is not set
 # CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
 # CONFIG_CPU_SUBTYPE_SH7763 is not set
 # CONFIG_CPU_SUBTYPE_SH7770 is not set
 # CONFIG_CPU_SUBTYPE_SH7780 is not set
@@ -171,8 +176,6 @@ CONFIG_CPU_SUBTYPE_SH7786=y
 # CONFIG_CPU_SUBTYPE_SH7343 is not set
 # CONFIG_CPU_SUBTYPE_SH7722 is not set
 # CONFIG_CPU_SUBTYPE_SH7366 is not set
-# CONFIG_CPU_SUBTYPE_SH5_101 is not set
-# CONFIG_CPU_SUBTYPE_SH5_103 is not set
 
 #
 # Memory management options
@@ -900,15 +903,17 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+# CONFIG_HID_KYE is not set
 CONFIG_HID_GYRATION=y
+# CONFIG_HID_KENSINGTON is not set
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1046,6 +1051,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1068,6 +1074,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1117,6 +1128,7 @@ CONFIG_MINIX_FS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1209,10 +1221,24 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
 #
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_SH_STANDARD_BIOS is not set
@@ -1322,6 +1348,7 @@ CONFIG_CRYPTO_DES=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
-- 
cgit v0.10.2


From 5be7c0a4d3dfe25091f2e4e524103e81d9e7e180 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 27 Apr 2009 14:40:47 +0900
Subject: sh: select GENERIC_TIME for new CMT driver.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4c68fde..1647489 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -476,10 +476,11 @@ config SH_TIMER_CMT
 	bool "CMT clockevents driver"
 	depends on SYS_SUPPORTS_CMT && !SH_CMT
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_TIME
 
 config SH_MTU2
 	bool "MTU2 timer support"
-	depends on CPU_SH2A
+	depends on CPU_SH2A && !GENERIC_TIME
 	default y
 	help
 	  This enables the use of the MTU2 as the system timer.
-- 
cgit v0.10.2


From 148be2c15d4a866fbc7a8f55342e4fd4de73be61 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Apr 2009 08:02:14 +0200
Subject: perf_counter tools: move helper library to util/*

Clean up the top level directory a bit by moving all the helper libraries
to util/*.[ch].

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 690045e..543ccf2 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -147,7 +147,7 @@ all::
 # broken, or spawning external process is slower than built-in grep perf has).
 
 PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
-	@$(SHELL_PATH) ./PERF-VERSION-GEN
+	@$(SHELL_PATH) util/PERF-VERSION-GEN
 -include PERF-VERSION-FILE
 
 uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
@@ -287,26 +287,28 @@ export PERL_PATH
 LIB_FILE=libperf.a
 
 LIB_H += ../../include/linux/perf_counter.h
-LIB_H += levenshtein.h
-LIB_H += parse-options.h
-LIB_H += quote.h
-LIB_H += strbuf.h
-LIB_H += run-command.h
-
-LIB_OBJS += abspath.o
-LIB_OBJS += alias.o
-LIB_OBJS += config.o
-LIB_OBJS += ctype.o
-LIB_OBJS += exec_cmd.o
-LIB_OBJS += help.o
-LIB_OBJS += levenshtein.o
-LIB_OBJS += parse-options.o
-LIB_OBJS += path.o
-LIB_OBJS += run-command.o
-LIB_OBJS += quote.o
-LIB_OBJS += strbuf.o
-LIB_OBJS += usage.o
-LIB_OBJS += wrapper.o
+LIB_H += util/levenshtein.h
+LIB_H += util/parse-options.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/help.h
+LIB_H += util/strbuf.h
+LIB_H += util/run-command.h
+
+LIB_OBJS += util/abspath.o
+LIB_OBJS += util/alias.o
+LIB_OBJS += util/config.o
+LIB_OBJS += util/ctype.o
+LIB_OBJS += util/exec_cmd.o
+LIB_OBJS += util/help.o
+LIB_OBJS += util/levenshtein.o
+LIB_OBJS += util/parse-options.o
+LIB_OBJS += util/path.o
+LIB_OBJS += util/run-command.o
+LIB_OBJS += util/quote.o
+LIB_OBJS += util/strbuf.o
+LIB_OBJS += util/usage.o
+LIB_OBJS += util/wrapper.o
 
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-record.o
@@ -620,10 +622,10 @@ $(BUILT_INS): perf$X
 	ln -s perf$X $@ 2>/dev/null || \
 	cp perf$X $@
 
-common-cmds.h: ./generate-cmdlist.sh command-list.txt
+common-cmds.h: util/generate-cmdlist.sh command-list.txt
 
 common-cmds.h: $(wildcard Documentation/perf-*.txt)
-	$(QUIET_GEN)./generate-cmdlist.sh > $@+ && mv $@+ $@
+	$(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@
 
 $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
 	$(QUIET_GEN)$(RM) $@ $@+ && \
@@ -656,7 +658,7 @@ perf.o perf.spec \
 %.o: %.S
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
 
-exec_cmd.o: exec_cmd.c PERF-CFLAGS
+util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
 		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
 		'-DBINDIR="$(bindir_relative_SQ)"' \
@@ -666,7 +668,7 @@ exec_cmd.o: exec_cmd.c PERF-CFLAGS
 builtin-init-db.o: builtin-init-db.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
 
-config.o: config.c PERF-CFLAGS
+util/config.o: util/config.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
 perf-%$X: %.o $(PERFLIBS)
@@ -729,7 +731,7 @@ export NO_SVN_TESTS
 check: common-cmds.h
 	if sparse; \
 	then \
-		for i in *.c; \
+		for i in *.c */*.c; \
 		do \
 			sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
 		done; \
@@ -795,7 +797,7 @@ distclean: clean
 	$(RM) configure
 
 clean:
-	$(RM) *.o $(LIB_FILE)
+	$(RM) *.o */*.o $(LIB_FILE)
 	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
@@ -836,7 +838,7 @@ coverage-build: coverage-clean
 		-j1 test
 
 coverage-report:
-	gcov -b *.c
-	grep '^function.*called 0 ' *.c.gcov \
+	gcov -b *.c */*.c
+	grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
 		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
 		| tee coverage-untested-functions
diff --git a/Documentation/perf_counter/PERF-VERSION-GEN b/Documentation/perf_counter/PERF-VERSION-GEN
deleted file mode 100755
index c561d15..0000000
--- a/Documentation/perf_counter/PERF-VERSION-GEN
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/sh
-
-GVF=PERF-VERSION-FILE
-DEF_VER=v0.0.1.PERF
-
-LF='
-'
-
-# First see if there is a version file (included in release tarballs),
-# then try git-describe, then default.
-if test -f version
-then
-	VN=$(cat version) || VN="$DEF_VER"
-elif test -d .git -o -f .git &&
-	VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
-	case "$VN" in
-	*$LF*) (exit 1) ;;
-	v[0-9]*)
-		git update-index -q --refresh
-		test -z "$(git diff-index --name-only HEAD --)" ||
-		VN="$VN-dirty" ;;
-	esac
-then
-	VN=$(echo "$VN" | sed -e 's/-/./g');
-else
-	VN="$DEF_VER"
-fi
-
-VN=$(expr "$VN" : v*'\(.*\)')
-
-if test -r $GVF
-then
-	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
-else
-	VC=unset
-fi
-test "$VN" = "$VC" || {
-	echo >&2 "PERF_VERSION = $VN"
-	echo "PERF_VERSION = $VN" >$GVF
-}
-
-
diff --git a/Documentation/perf_counter/abspath.c b/Documentation/perf_counter/abspath.c
deleted file mode 100644
index 649f34f..0000000
--- a/Documentation/perf_counter/abspath.c
+++ /dev/null
@@ -1,117 +0,0 @@
-#include "cache.h"
-
-/*
- * Do not use this for inspecting *tracked* content.  When path is a
- * symlink to a directory, we do not want to say it is a directory when
- * dealing with tracked content in the working tree.
- */
-int is_directory(const char *path)
-{
-	struct stat st;
-	return (!stat(path, &st) && S_ISDIR(st.st_mode));
-}
-
-/* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
-
-const char *make_absolute_path(const char *path)
-{
-	static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
-	char cwd[1024] = "";
-	int buf_index = 1, len;
-
-	int depth = MAXDEPTH;
-	char *last_elem = NULL;
-	struct stat st;
-
-	if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
-		die ("Too long path: %.*s", 60, path);
-
-	while (depth--) {
-		if (!is_directory(buf)) {
-			char *last_slash = strrchr(buf, '/');
-			if (last_slash) {
-				*last_slash = '\0';
-				last_elem = xstrdup(last_slash + 1);
-			} else {
-				last_elem = xstrdup(buf);
-				*buf = '\0';
-			}
-		}
-
-		if (*buf) {
-			if (!*cwd && !getcwd(cwd, sizeof(cwd)))
-				die ("Could not get current working directory");
-
-			if (chdir(buf))
-				die ("Could not switch to '%s'", buf);
-		}
-		if (!getcwd(buf, PATH_MAX))
-			die ("Could not get current working directory");
-
-		if (last_elem) {
-			int len = strlen(buf);
-			if (len + strlen(last_elem) + 2 > PATH_MAX)
-				die ("Too long path name: '%s/%s'",
-						buf, last_elem);
-			buf[len] = '/';
-			strcpy(buf + len + 1, last_elem);
-			free(last_elem);
-			last_elem = NULL;
-		}
-
-		if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
-			len = readlink(buf, next_buf, PATH_MAX);
-			if (len < 0)
-				die ("Invalid symlink: %s", buf);
-			if (PATH_MAX <= len)
-				die("symbolic link too long: %s", buf);
-			next_buf[len] = '\0';
-			buf = next_buf;
-			buf_index = 1 - buf_index;
-			next_buf = bufs[buf_index];
-		} else
-			break;
-	}
-
-	if (*cwd && chdir(cwd))
-		die ("Could not change back to '%s'", cwd);
-
-	return buf;
-}
-
-static const char *get_pwd_cwd(void)
-{
-	static char cwd[PATH_MAX + 1];
-	char *pwd;
-	struct stat cwd_stat, pwd_stat;
-	if (getcwd(cwd, PATH_MAX) == NULL)
-		return NULL;
-	pwd = getenv("PWD");
-	if (pwd && strcmp(pwd, cwd)) {
-		stat(cwd, &cwd_stat);
-		if (!stat(pwd, &pwd_stat) &&
-		    pwd_stat.st_dev == cwd_stat.st_dev &&
-		    pwd_stat.st_ino == cwd_stat.st_ino) {
-			strlcpy(cwd, pwd, PATH_MAX);
-		}
-	}
-	return cwd;
-}
-
-const char *make_nonrelative_path(const char *path)
-{
-	static char buf[PATH_MAX + 1];
-
-	if (is_absolute_path(path)) {
-		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	} else {
-		const char *cwd = get_pwd_cwd();
-		if (!cwd)
-			die("Cannot determine the current working directory");
-		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	}
-	return buf;
-}
diff --git a/Documentation/perf_counter/alias.c b/Documentation/perf_counter/alias.c
deleted file mode 100644
index 9b3dd2b..0000000
--- a/Documentation/perf_counter/alias.c
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "cache.h"
-
-static const char *alias_key;
-static char *alias_val;
-
-static int alias_lookup_cb(const char *k, const char *v, void *cb)
-{
-	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
-		if (!v)
-			return config_error_nonbool(k);
-		alias_val = strdup(v);
-		return 0;
-	}
-	return 0;
-}
-
-char *alias_lookup(const char *alias)
-{
-	alias_key = alias;
-	alias_val = NULL;
-	perf_config(alias_lookup_cb, NULL);
-	return alias_val;
-}
-
-int split_cmdline(char *cmdline, const char ***argv)
-{
-	int src, dst, count = 0, size = 16;
-	char quoted = 0;
-
-	*argv = malloc(sizeof(char*) * size);
-
-	/* split alias_string */
-	(*argv)[count++] = cmdline;
-	for (src = dst = 0; cmdline[src];) {
-		char c = cmdline[src];
-		if (!quoted && isspace(c)) {
-			cmdline[dst++] = 0;
-			while (cmdline[++src]
-					&& isspace(cmdline[src]))
-				; /* skip */
-			if (count >= size) {
-				size += 16;
-				*argv = realloc(*argv, sizeof(char*) * size);
-			}
-			(*argv)[count++] = cmdline + dst;
-		} else if (!quoted && (c == '\'' || c == '"')) {
-			quoted = c;
-			src++;
-		} else if (c == quoted) {
-			quoted = 0;
-			src++;
-		} else {
-			if (c == '\\' && quoted != '\'') {
-				src++;
-				c = cmdline[src];
-				if (!c) {
-					free(*argv);
-					*argv = NULL;
-					return error("cmdline ends with \\");
-				}
-			}
-			cmdline[dst++] = c;
-			src++;
-		}
-	}
-
-	cmdline[dst] = 0;
-
-	if (quoted) {
-		free(*argv);
-		*argv = NULL;
-		return error("unclosed quote");
-	}
-
-	return count;
-}
-
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
index a136d61..6616de0 100644
--- a/Documentation/perf_counter/builtin-help.c
+++ b/Documentation/perf_counter/builtin-help.c
@@ -3,13 +3,13 @@
  *
  * Builtin help command
  */
-#include "cache.h"
+#include "util/cache.h"
 #include "builtin.h"
-#include "exec_cmd.h"
+#include "util/exec_cmd.h"
 #include "common-cmds.h"
-#include "parse-options.h"
-#include "run-command.h"
-#include "help.h"
+#include "util/parse-options.h"
+#include "util/run-command.h"
+#include "util/help.h"
 
 static struct man_viewer_list {
 	struct man_viewer_list *next;
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 169a2d1..d7ace63 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -61,7 +61,7 @@
   * Released under the GPL v2. (and only v2, not any later version)
   */
 
-#include "util.h"
+#include "util/util.h"
 
 #include <getopt.h>
 #include <assert.h>
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 98e8690..dea016f 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -42,7 +42,7 @@
   * Released under the GPL v2. (and only v2, not any later version)
   */
 
-#include "util.h"
+#include "util/util.h"
 
 #include <getopt.h>
 #include <assert.h>
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index 800f86c..d32318a 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -1,8 +1,8 @@
 #ifndef BUILTIN_H
 #define BUILTIN_H
 
-#include "util.h"
-#include "strbuf.h"
+#include "util/util.h"
+#include "util/strbuf.h"
 
 extern const char perf_version_string[];
 extern const char perf_usage_string[];
diff --git a/Documentation/perf_counter/cache.h b/Documentation/perf_counter/cache.h
deleted file mode 100644
index 7108051..0000000
--- a/Documentation/perf_counter/cache.h
+++ /dev/null
@@ -1,117 +0,0 @@
-#ifndef CACHE_H
-#define CACHE_H
-
-#include "util.h"
-#include "strbuf.h"
-
-#define PERF_DIR_ENVIRONMENT "PERF_DIR"
-#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
-#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
-#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY"
-#define INDEX_ENVIRONMENT "PERF_INDEX_FILE"
-#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE"
-#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR"
-#define CONFIG_ENVIRONMENT "PERF_CONFIG"
-#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
-#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES"
-#define PERFATTRIBUTES_FILE ".perfattributes"
-#define INFOATTRIBUTES_FILE "info/attributes"
-#define ATTRIBUTE_MACRO_PREFIX "[attr]"
-
-typedef int (*config_fn_t)(const char *, const char *, void *);
-extern int perf_default_config(const char *, const char *, void *);
-extern int perf_config_from_file(config_fn_t fn, const char *, void *);
-extern int perf_config(config_fn_t fn, void *);
-extern int perf_parse_ulong(const char *, unsigned long *);
-extern int perf_config_int(const char *, const char *);
-extern unsigned long perf_config_ulong(const char *, const char *);
-extern int perf_config_bool_or_int(const char *, const char *, int *);
-extern int perf_config_bool(const char *, const char *);
-extern int perf_config_string(const char **, const char *, const char *);
-extern int perf_config_set(const char *, const char *);
-extern int perf_config_set_multivar(const char *, const char *, const char *, int);
-extern int perf_config_rename_section(const char *, const char *);
-extern const char *perf_etc_perfconfig(void);
-extern int check_repository_format_version(const char *var, const char *value, void *cb);
-extern int perf_config_system(void);
-extern int perf_config_global(void);
-extern int config_error_nonbool(const char *);
-extern const char *config_exclusive_filename;
-
-#define MAX_PERFNAME (1000)
-extern char perf_default_email[MAX_PERFNAME];
-extern char perf_default_name[MAX_PERFNAME];
-extern int user_ident_explicitly_given;
-
-extern const char *perf_log_output_encoding;
-extern const char *perf_mailmap_file;
-
-/* IO helper functions */
-extern void maybe_flush_or_die(FILE *, const char *);
-extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
-extern ssize_t read_in_full(int fd, void *buf, size_t count);
-extern ssize_t write_in_full(int fd, const void *buf, size_t count);
-extern void write_or_die(int fd, const void *buf, size_t count);
-extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
-extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
-extern void fsync_or_die(int fd, const char *);
-
-/* pager.c */
-extern void setup_pager(void);
-extern const char *pager_program;
-extern int pager_in_use(void);
-extern int pager_use_color;
-
-extern const char *editor_program;
-extern const char *excludes_file;
-
-char *alias_lookup(const char *alias);
-int split_cmdline(char *cmdline, const char ***argv);
-
-#define alloc_nr(x) (((x)+16)*3/2)
-
-/*
- * Realloc the buffer pointed at by variable 'x' so that it can hold
- * at least 'nr' entries; the number of entries currently allocated
- * is 'alloc', using the standard growing factor alloc_nr() macro.
- *
- * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
-	do { \
-		if ((nr) > alloc) { \
-			if (alloc_nr(alloc) < (nr)) \
-				alloc = (nr); \
-			else \
-				alloc = alloc_nr(alloc); \
-			x = xrealloc((x), alloc * sizeof(*(x))); \
-		} \
-	} while(0)
-
-
-static inline int is_absolute_path(const char *path)
-{
-	return path[0] == '/';
-}
-
-const char *make_absolute_path(const char *path);
-const char *make_nonrelative_path(const char *path);
-const char *make_relative_path(const char *abs, const char *base);
-int normalize_path_copy(char *dst, const char *src);
-int longest_ancestor_length(const char *path, const char *prefix_list);
-char *strip_path_suffix(const char *path, const char *suffix);
-
-extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-
-extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
-	__attribute__((format (printf, 3, 4)));
-extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
-	__attribute__((format (printf, 3, 4)));
-extern char *perf_pathdup(const char *fmt, ...)
-	__attribute__((format (printf, 1, 2)));
-
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-
-#endif /* CACHE_H */
diff --git a/Documentation/perf_counter/config.c b/Documentation/perf_counter/config.c
deleted file mode 100644
index 3dd13fa..0000000
--- a/Documentation/perf_counter/config.c
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * GIT - The information manager from hell
- *
- * Copyright (C) Linus Torvalds, 2005
- * Copyright (C) Johannes Schindelin, 2005
- *
- */
-#include "util.h"
-#include "cache.h"
-#include "exec_cmd.h"
-
-#define MAXNAME (256)
-
-static FILE *config_file;
-static const char *config_file_name;
-static int config_linenr;
-static int config_file_eof;
-
-const char *config_exclusive_filename = NULL;
-
-static int get_next_char(void)
-{
-	int c;
-	FILE *f;
-
-	c = '\n';
-	if ((f = config_file) != NULL) {
-		c = fgetc(f);
-		if (c == '\r') {
-			/* DOS like systems */
-			c = fgetc(f);
-			if (c != '\n') {
-				ungetc(c, f);
-				c = '\r';
-			}
-		}
-		if (c == '\n')
-			config_linenr++;
-		if (c == EOF) {
-			config_file_eof = 1;
-			c = '\n';
-		}
-	}
-	return c;
-}
-
-static char *parse_value(void)
-{
-	static char value[1024];
-	int quote = 0, comment = 0, len = 0, space = 0;
-
-	for (;;) {
-		int c = get_next_char();
-		if (len >= sizeof(value) - 1)
-			return NULL;
-		if (c == '\n') {
-			if (quote)
-				return NULL;
-			value[len] = 0;
-			return value;
-		}
-		if (comment)
-			continue;
-		if (isspace(c) && !quote) {
-			space = 1;
-			continue;
-		}
-		if (!quote) {
-			if (c == ';' || c == '#') {
-				comment = 1;
-				continue;
-			}
-		}
-		if (space) {
-			if (len)
-				value[len++] = ' ';
-			space = 0;
-		}
-		if (c == '\\') {
-			c = get_next_char();
-			switch (c) {
-			case '\n':
-				continue;
-			case 't':
-				c = '\t';
-				break;
-			case 'b':
-				c = '\b';
-				break;
-			case 'n':
-				c = '\n';
-				break;
-			/* Some characters escape as themselves */
-			case '\\': case '"':
-				break;
-			/* Reject unknown escape sequences */
-			default:
-				return NULL;
-			}
-			value[len++] = c;
-			continue;
-		}
-		if (c == '"') {
-			quote = 1-quote;
-			continue;
-		}
-		value[len++] = c;
-	}
-}
-
-static inline int iskeychar(int c)
-{
-	return isalnum(c) || c == '-';
-}
-
-static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
-{
-	int c;
-	char *value;
-
-	/* Get the full name */
-	for (;;) {
-		c = get_next_char();
-		if (config_file_eof)
-			break;
-		if (!iskeychar(c))
-			break;
-		name[len++] = tolower(c);
-		if (len >= MAXNAME)
-			return -1;
-	}
-	name[len] = 0;
-	while (c == ' ' || c == '\t')
-		c = get_next_char();
-
-	value = NULL;
-	if (c != '\n') {
-		if (c != '=')
-			return -1;
-		value = parse_value();
-		if (!value)
-			return -1;
-	}
-	return fn(name, value, data);
-}
-
-static int get_extended_base_var(char *name, int baselen, int c)
-{
-	do {
-		if (c == '\n')
-			return -1;
-		c = get_next_char();
-	} while (isspace(c));
-
-	/* We require the format to be '[base "extension"]' */
-	if (c != '"')
-		return -1;
-	name[baselen++] = '.';
-
-	for (;;) {
-		int c = get_next_char();
-		if (c == '\n')
-			return -1;
-		if (c == '"')
-			break;
-		if (c == '\\') {
-			c = get_next_char();
-			if (c == '\n')
-				return -1;
-		}
-		name[baselen++] = c;
-		if (baselen > MAXNAME / 2)
-			return -1;
-	}
-
-	/* Final ']' */
-	if (get_next_char() != ']')
-		return -1;
-	return baselen;
-}
-
-static int get_base_var(char *name)
-{
-	int baselen = 0;
-
-	for (;;) {
-		int c = get_next_char();
-		if (config_file_eof)
-			return -1;
-		if (c == ']')
-			return baselen;
-		if (isspace(c))
-			return get_extended_base_var(name, baselen, c);
-		if (!iskeychar(c) && c != '.')
-			return -1;
-		if (baselen > MAXNAME / 2)
-			return -1;
-		name[baselen++] = tolower(c);
-	}
-}
-
-static int perf_parse_file(config_fn_t fn, void *data)
-{
-	int comment = 0;
-	int baselen = 0;
-	static char var[MAXNAME];
-
-	/* U+FEFF Byte Order Mark in UTF8 */
-	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
-	const unsigned char *bomptr = utf8_bom;
-
-	for (;;) {
-		int c = get_next_char();
-		if (bomptr && *bomptr) {
-			/* We are at the file beginning; skip UTF8-encoded BOM
-			 * if present. Sane editors won't put this in on their
-			 * own, but e.g. Windows Notepad will do it happily. */
-			if ((unsigned char) c == *bomptr) {
-				bomptr++;
-				continue;
-			} else {
-				/* Do not tolerate partial BOM. */
-				if (bomptr != utf8_bom)
-					break;
-				/* No BOM at file beginning. Cool. */
-				bomptr = NULL;
-			}
-		}
-		if (c == '\n') {
-			if (config_file_eof)
-				return 0;
-			comment = 0;
-			continue;
-		}
-		if (comment || isspace(c))
-			continue;
-		if (c == '#' || c == ';') {
-			comment = 1;
-			continue;
-		}
-		if (c == '[') {
-			baselen = get_base_var(var);
-			if (baselen <= 0)
-				break;
-			var[baselen++] = '.';
-			var[baselen] = 0;
-			continue;
-		}
-		if (!isalpha(c))
-			break;
-		var[baselen] = tolower(c);
-		if (get_value(fn, data, var, baselen+1) < 0)
-			break;
-	}
-	die("bad config file line %d in %s", config_linenr, config_file_name);
-}
-
-static int parse_unit_factor(const char *end, unsigned long *val)
-{
-	if (!*end)
-		return 1;
-	else if (!strcasecmp(end, "k")) {
-		*val *= 1024;
-		return 1;
-	}
-	else if (!strcasecmp(end, "m")) {
-		*val *= 1024 * 1024;
-		return 1;
-	}
-	else if (!strcasecmp(end, "g")) {
-		*val *= 1024 * 1024 * 1024;
-		return 1;
-	}
-	return 0;
-}
-
-static int perf_parse_long(const char *value, long *ret)
-{
-	if (value && *value) {
-		char *end;
-		long val = strtol(value, &end, 0);
-		unsigned long factor = 1;
-		if (!parse_unit_factor(end, &factor))
-			return 0;
-		*ret = val * factor;
-		return 1;
-	}
-	return 0;
-}
-
-int perf_parse_ulong(const char *value, unsigned long *ret)
-{
-	if (value && *value) {
-		char *end;
-		unsigned long val = strtoul(value, &end, 0);
-		if (!parse_unit_factor(end, &val))
-			return 0;
-		*ret = val;
-		return 1;
-	}
-	return 0;
-}
-
-static void die_bad_config(const char *name)
-{
-	if (config_file_name)
-		die("bad config value for '%s' in %s", name, config_file_name);
-	die("bad config value for '%s'", name);
-}
-
-int perf_config_int(const char *name, const char *value)
-{
-	long ret = 0;
-	if (!perf_parse_long(value, &ret))
-		die_bad_config(name);
-	return ret;
-}
-
-unsigned long perf_config_ulong(const char *name, const char *value)
-{
-	unsigned long ret;
-	if (!perf_parse_ulong(value, &ret))
-		die_bad_config(name);
-	return ret;
-}
-
-int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
-{
-	*is_bool = 1;
-	if (!value)
-		return 1;
-	if (!*value)
-		return 0;
-	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
-		return 1;
-	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
-		return 0;
-	*is_bool = 0;
-	return perf_config_int(name, value);
-}
-
-int perf_config_bool(const char *name, const char *value)
-{
-	int discard;
-	return !!perf_config_bool_or_int(name, value, &discard);
-}
-
-int perf_config_string(const char **dest, const char *var, const char *value)
-{
-	if (!value)
-		return config_error_nonbool(var);
-	*dest = strdup(value);
-	return 0;
-}
-
-static int perf_default_core_config(const char *var, const char *value)
-{
-	/* Add other config variables here and to Documentation/config.txt. */
-	return 0;
-}
-
-int perf_default_config(const char *var, const char *value, void *dummy)
-{
-	if (!prefixcmp(var, "core."))
-		return perf_default_core_config(var, value);
-
-	/* Add other config variables here and to Documentation/config.txt. */
-	return 0;
-}
-
-int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
-{
-	int ret;
-	FILE *f = fopen(filename, "r");
-
-	ret = -1;
-	if (f) {
-		config_file = f;
-		config_file_name = filename;
-		config_linenr = 1;
-		config_file_eof = 0;
-		ret = perf_parse_file(fn, data);
-		fclose(f);
-		config_file_name = NULL;
-	}
-	return ret;
-}
-
-const char *perf_etc_perfconfig(void)
-{
-	static const char *system_wide;
-	if (!system_wide)
-		system_wide = system_path(ETC_PERFCONFIG);
-	return system_wide;
-}
-
-static int perf_env_bool(const char *k, int def)
-{
-	const char *v = getenv(k);
-	return v ? perf_config_bool(k, v) : def;
-}
-
-int perf_config_system(void)
-{
-	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
-}
-
-int perf_config_global(void)
-{
-	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
-}
-
-int perf_config(config_fn_t fn, void *data)
-{
-	int ret = 0, found = 0;
-	char *repo_config = NULL;
-	const char *home = NULL;
-
-	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
-	if (config_exclusive_filename)
-		return perf_config_from_file(fn, config_exclusive_filename, data);
-	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
-		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
-					    data);
-		found += 1;
-	}
-
-	home = getenv("HOME");
-	if (perf_config_global() && home) {
-		char *user_config = strdup(mkpath("%s/.perfconfig", home));
-		if (!access(user_config, R_OK)) {
-			ret += perf_config_from_file(fn, user_config, data);
-			found += 1;
-		}
-		free(user_config);
-	}
-
-	repo_config = perf_pathdup("config");
-	if (!access(repo_config, R_OK)) {
-		ret += perf_config_from_file(fn, repo_config, data);
-		found += 1;
-	}
-	free(repo_config);
-	if (found == 0)
-		return -1;
-	return ret;
-}
-
-/*
- * Find all the stuff for perf_config_set() below.
- */
-
-#define MAX_MATCHES 512
-
-static struct {
-	int baselen;
-	char* key;
-	int do_not_match;
-	regex_t* value_regex;
-	int multi_replace;
-	size_t offset[MAX_MATCHES];
-	enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state;
-	int seen;
-} store;
-
-static int matches(const char* key, const char* value)
-{
-	return !strcmp(key, store.key) &&
-		(store.value_regex == NULL ||
-		 (store.do_not_match ^
-		  !regexec(store.value_regex, value, 0, NULL, 0)));
-}
-
-static int store_aux(const char* key, const char* value, void *cb)
-{
-	const char *ep;
-	size_t section_len;
-
-	switch (store.state) {
-	case KEY_SEEN:
-		if (matches(key, value)) {
-			if (store.seen == 1 && store.multi_replace == 0) {
-				warning("%s has multiple values", key);
-			} else if (store.seen >= MAX_MATCHES) {
-				error("too many matches for %s", key);
-				return 1;
-			}
-
-			store.offset[store.seen] = ftell(config_file);
-			store.seen++;
-		}
-		break;
-	case SECTION_SEEN:
-		/*
-		 * What we are looking for is in store.key (both
-		 * section and var), and its section part is baselen
-		 * long.  We found key (again, both section and var).
-		 * We would want to know if this key is in the same
-		 * section as what we are looking for.  We already
-		 * know we are in the same section as what should
-		 * hold store.key.
-		 */
-		ep = strrchr(key, '.');
-		section_len = ep - key;
-
-		if ((section_len != store.baselen) ||
-		    memcmp(key, store.key, section_len+1)) {
-			store.state = SECTION_END_SEEN;
-			break;
-		}
-
-		/*
-		 * Do not increment matches: this is no match, but we
-		 * just made sure we are in the desired section.
-		 */
-		store.offset[store.seen] = ftell(config_file);
-		/* fallthru */
-	case SECTION_END_SEEN:
-	case START:
-		if (matches(key, value)) {
-			store.offset[store.seen] = ftell(config_file);
-			store.state = KEY_SEEN;
-			store.seen++;
-		} else {
-			if (strrchr(key, '.') - key == store.baselen &&
-			      !strncmp(key, store.key, store.baselen)) {
-					store.state = SECTION_SEEN;
-					store.offset[store.seen] = ftell(config_file);
-			}
-		}
-	}
-	return 0;
-}
-
-static int store_write_section(int fd, const char* key)
-{
-	const char *dot;
-	int i, success;
-	struct strbuf sb = STRBUF_INIT;
-
-	dot = memchr(key, '.', store.baselen);
-	if (dot) {
-		strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key);
-		for (i = dot - key + 1; i < store.baselen; i++) {
-			if (key[i] == '"' || key[i] == '\\')
-				strbuf_addch(&sb, '\\');
-			strbuf_addch(&sb, key[i]);
-		}
-		strbuf_addstr(&sb, "\"]\n");
-	} else {
-		strbuf_addf(&sb, "[%.*s]\n", store.baselen, key);
-	}
-
-	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
-	strbuf_release(&sb);
-
-	return success;
-}
-
-static int store_write_pair(int fd, const char* key, const char* value)
-{
-	int i, success;
-	int length = strlen(key + store.baselen + 1);
-	const char *quote = "";
-	struct strbuf sb = STRBUF_INIT;
-
-	/*
-	 * Check to see if the value needs to be surrounded with a dq pair.
-	 * Note that problematic characters are always backslash-quoted; this
-	 * check is about not losing leading or trailing SP and strings that
-	 * follow beginning-of-comment characters (i.e. ';' and '#') by the
-	 * configuration parser.
-	 */
-	if (value[0] == ' ')
-		quote = "\"";
-	for (i = 0; value[i]; i++)
-		if (value[i] == ';' || value[i] == '#')
-			quote = "\"";
-	if (i && value[i - 1] == ' ')
-		quote = "\"";
-
-	strbuf_addf(&sb, "\t%.*s = %s",
-		    length, key + store.baselen + 1, quote);
-
-	for (i = 0; value[i]; i++)
-		switch (value[i]) {
-		case '\n':
-			strbuf_addstr(&sb, "\\n");
-			break;
-		case '\t':
-			strbuf_addstr(&sb, "\\t");
-			break;
-		case '"':
-		case '\\':
-			strbuf_addch(&sb, '\\');
-		default:
-			strbuf_addch(&sb, value[i]);
-			break;
-		}
-	strbuf_addf(&sb, "%s\n", quote);
-
-	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
-	strbuf_release(&sb);
-
-	return success;
-}
-
-static ssize_t find_beginning_of_line(const char* contents, size_t size,
-	size_t offset_, int* found_bracket)
-{
-	size_t equal_offset = size, bracket_offset = size;
-	ssize_t offset;
-
-contline:
-	for (offset = offset_-2; offset > 0
-			&& contents[offset] != '\n'; offset--)
-		switch (contents[offset]) {
-			case '=': equal_offset = offset; break;
-			case ']': bracket_offset = offset; break;
-		}
-	if (offset > 0 && contents[offset-1] == '\\') {
-		offset_ = offset;
-		goto contline;
-	}
-	if (bracket_offset < equal_offset) {
-		*found_bracket = 1;
-		offset = bracket_offset+1;
-	} else
-		offset++;
-
-	return offset;
-}
-
-int perf_config_set(const char* key, const char* value)
-{
-	return perf_config_set_multivar(key, value, NULL, 0);
-}
-
-/*
- * If value==NULL, unset in (remove from) config,
- * if value_regex!=NULL, disregard key/value pairs where value does not match.
- * if multi_replace==0, nothing, or only one matching key/value is replaced,
- *     else all matching key/values (regardless how many) are removed,
- *     before the new pair is written.
- *
- * Returns 0 on success.
- *
- * This function does this:
- *
- * - it locks the config file by creating ".perf/config.lock"
- *
- * - it then parses the config using store_aux() as validator to find
- *   the position on the key/value pair to replace. If it is to be unset,
- *   it must be found exactly once.
- *
- * - the config file is mmap()ed and the part before the match (if any) is
- *   written to the lock file, then the changed part and the rest.
- *
- * - the config file is removed and the lock file rename()d to it.
- *
- */
-int perf_config_set_multivar(const char* key, const char* value,
-	const char* value_regex, int multi_replace)
-{
-	int i, dot;
-	int fd = -1, in_fd;
-	int ret = 0;
-	char* config_filename;
-	const char* last_dot = strrchr(key, '.');
-
-	if (config_exclusive_filename)
-		config_filename = strdup(config_exclusive_filename);
-	else
-		config_filename = perf_pathdup("config");
-
-	/*
-	 * Since "key" actually contains the section name and the real
-	 * key name separated by a dot, we have to know where the dot is.
-	 */
-
-	if (last_dot == NULL) {
-		error("key does not contain a section: %s", key);
-		ret = 2;
-		goto out_free;
-	}
-	store.baselen = last_dot - key;
-
-	store.multi_replace = multi_replace;
-
-	/*
-	 * Validate the key and while at it, lower case it for matching.
-	 */
-	store.key = malloc(strlen(key) + 1);
-	dot = 0;
-	for (i = 0; key[i]; i++) {
-		unsigned char c = key[i];
-		if (c == '.')
-			dot = 1;
-		/* Leave the extended basename untouched.. */
-		if (!dot || i > store.baselen) {
-			if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) {
-				error("invalid key: %s", key);
-				free(store.key);
-				ret = 1;
-				goto out_free;
-			}
-			c = tolower(c);
-		} else if (c == '\n') {
-			error("invalid key (newline): %s", key);
-			free(store.key);
-			ret = 1;
-			goto out_free;
-		}
-		store.key[i] = c;
-	}
-	store.key[i] = 0;
-
-	/*
-	 * If .perf/config does not exist yet, write a minimal version.
-	 */
-	in_fd = open(config_filename, O_RDONLY);
-	if ( in_fd < 0 ) {
-		free(store.key);
-
-		if ( ENOENT != errno ) {
-			error("opening %s: %s", config_filename,
-			      strerror(errno));
-			ret = 3; /* same as "invalid config file" */
-			goto out_free;
-		}
-		/* if nothing to unset, error out */
-		if (value == NULL) {
-			ret = 5;
-			goto out_free;
-		}
-
-		store.key = (char*)key;
-		if (!store_write_section(fd, key) ||
-		    !store_write_pair(fd, key, value))
-			goto write_err_out;
-	} else {
-		struct stat st;
-		char* contents;
-		size_t contents_sz, copy_begin, copy_end;
-		int i, new_line = 0;
-
-		if (value_regex == NULL)
-			store.value_regex = NULL;
-		else {
-			if (value_regex[0] == '!') {
-				store.do_not_match = 1;
-				value_regex++;
-			} else
-				store.do_not_match = 0;
-
-			store.value_regex = (regex_t*)malloc(sizeof(regex_t));
-			if (regcomp(store.value_regex, value_regex,
-					REG_EXTENDED)) {
-				error("invalid pattern: %s", value_regex);
-				free(store.value_regex);
-				ret = 6;
-				goto out_free;
-			}
-		}
-
-		store.offset[0] = 0;
-		store.state = START;
-		store.seen = 0;
-
-		/*
-		 * After this, store.offset will contain the *end* offset
-		 * of the last match, or remain at 0 if no match was found.
-		 * As a side effect, we make sure to transform only a valid
-		 * existing config file.
-		 */
-		if (perf_config_from_file(store_aux, config_filename, NULL)) {
-			error("invalid config file %s", config_filename);
-			free(store.key);
-			if (store.value_regex != NULL) {
-				regfree(store.value_regex);
-				free(store.value_regex);
-			}
-			ret = 3;
-			goto out_free;
-		}
-
-		free(store.key);
-		if (store.value_regex != NULL) {
-			regfree(store.value_regex);
-			free(store.value_regex);
-		}
-
-		/* if nothing to unset, or too many matches, error out */
-		if ((store.seen == 0 && value == NULL) ||
-				(store.seen > 1 && multi_replace == 0)) {
-			ret = 5;
-			goto out_free;
-		}
-
-		fstat(in_fd, &st);
-		contents_sz = xsize_t(st.st_size);
-		contents = mmap(NULL, contents_sz, PROT_READ,
-			MAP_PRIVATE, in_fd, 0);
-		close(in_fd);
-
-		if (store.seen == 0)
-			store.seen = 1;
-
-		for (i = 0, copy_begin = 0; i < store.seen; i++) {
-			if (store.offset[i] == 0) {
-				store.offset[i] = copy_end = contents_sz;
-			} else if (store.state != KEY_SEEN) {
-				copy_end = store.offset[i];
-			} else
-				copy_end = find_beginning_of_line(
-					contents, contents_sz,
-					store.offset[i]-2, &new_line);
-
-			if (copy_end > 0 && contents[copy_end-1] != '\n')
-				new_line = 1;
-
-			/* write the first part of the config */
-			if (copy_end > copy_begin) {
-				if (write_in_full(fd, contents + copy_begin,
-						  copy_end - copy_begin) <
-				    copy_end - copy_begin)
-					goto write_err_out;
-				if (new_line &&
-				    write_in_full(fd, "\n", 1) != 1)
-					goto write_err_out;
-			}
-			copy_begin = store.offset[i];
-		}
-
-		/* write the pair (value == NULL means unset) */
-		if (value != NULL) {
-			if (store.state == START) {
-				if (!store_write_section(fd, key))
-					goto write_err_out;
-			}
-			if (!store_write_pair(fd, key, value))
-				goto write_err_out;
-		}
-
-		/* write the rest of the config */
-		if (copy_begin < contents_sz)
-			if (write_in_full(fd, contents + copy_begin,
-					  contents_sz - copy_begin) <
-			    contents_sz - copy_begin)
-				goto write_err_out;
-
-		munmap(contents, contents_sz);
-	}
-
-	ret = 0;
-
-out_free:
-	free(config_filename);
-	return ret;
-
-write_err_out:
-	goto out_free;
-
-}
-
-/*
- * Call this to report error for your variable that should not
- * get a boolean value (i.e. "[my] var" means "true").
- */
-int config_error_nonbool(const char *var)
-{
-	return error("Missing value for '%s'", var);
-}
diff --git a/Documentation/perf_counter/ctype.c b/Documentation/perf_counter/ctype.c
deleted file mode 100644
index b90ec00..0000000
--- a/Documentation/perf_counter/ctype.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Sane locale-independent, ASCII ctype.
- *
- * No surprises, and works with signed and unsigned chars.
- */
-#include "cache.h"
-
-enum {
-	S = GIT_SPACE,
-	A = GIT_ALPHA,
-	D = GIT_DIGIT,
-	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
-	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
-};
-
-unsigned char sane_ctype[256] = {
-	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
-	S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0,		/*  32.. 47 */
-	D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G,		/*  48.. 63 */
-	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
-	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0,		/*  80.. 95 */
-	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
-	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0,		/* 112..127 */
-	/* Nothing in the 128.. range */
-};
diff --git a/Documentation/perf_counter/exec_cmd.c b/Documentation/perf_counter/exec_cmd.c
deleted file mode 100644
index d392922..0000000
--- a/Documentation/perf_counter/exec_cmd.c
+++ /dev/null
@@ -1,165 +0,0 @@
-#include "cache.h"
-#include "exec_cmd.h"
-#include "quote.h"
-#define MAX_ARGS	32
-
-extern char **environ;
-static const char *argv_exec_path;
-static const char *argv0_path;
-
-const char *system_path(const char *path)
-{
-#ifdef RUNTIME_PREFIX
-	static const char *prefix;
-#else
-	static const char *prefix = PREFIX;
-#endif
-	struct strbuf d = STRBUF_INIT;
-
-	if (is_absolute_path(path))
-		return path;
-
-#ifdef RUNTIME_PREFIX
-	assert(argv0_path);
-	assert(is_absolute_path(argv0_path));
-
-	if (!prefix &&
-	    !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
-	    !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
-	    !(prefix = strip_path_suffix(argv0_path, "perf"))) {
-		prefix = PREFIX;
-		fprintf(stderr, "RUNTIME_PREFIX requested, "
-				"but prefix computation failed.  "
-				"Using static fallback '%s'.\n", prefix);
-	}
-#endif
-
-	strbuf_addf(&d, "%s/%s", prefix, path);
-	path = strbuf_detach(&d, NULL);
-	return path;
-}
-
-const char *perf_extract_argv0_path(const char *argv0)
-{
-	const char *slash;
-
-	if (!argv0 || !*argv0)
-		return NULL;
-	slash = argv0 + strlen(argv0);
-
-	while (argv0 <= slash && !is_dir_sep(*slash))
-		slash--;
-
-	if (slash >= argv0) {
-		argv0_path = strndup(argv0, slash - argv0);
-		return slash + 1;
-	}
-
-	return argv0;
-}
-
-void perf_set_argv_exec_path(const char *exec_path)
-{
-	argv_exec_path = exec_path;
-	/*
-	 * Propagate this setting to external programs.
-	 */
-	setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
-}
-
-
-/* Returns the highest-priority, location to look for perf programs. */
-const char *perf_exec_path(void)
-{
-	const char *env;
-
-	if (argv_exec_path)
-		return argv_exec_path;
-
-	env = getenv(EXEC_PATH_ENVIRONMENT);
-	if (env && *env) {
-		return env;
-	}
-
-	return system_path(PERF_EXEC_PATH);
-}
-
-static void add_path(struct strbuf *out, const char *path)
-{
-	if (path && *path) {
-		if (is_absolute_path(path))
-			strbuf_addstr(out, path);
-		else
-			strbuf_addstr(out, make_nonrelative_path(path));
-
-		strbuf_addch(out, PATH_SEP);
-	}
-}
-
-void setup_path(void)
-{
-	const char *old_path = getenv("PATH");
-	struct strbuf new_path = STRBUF_INIT;
-
-	add_path(&new_path, perf_exec_path());
-	add_path(&new_path, argv0_path);
-
-	if (old_path)
-		strbuf_addstr(&new_path, old_path);
-	else
-		strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
-
-	setenv("PATH", new_path.buf, 1);
-
-	strbuf_release(&new_path);
-}
-
-const char **prepare_perf_cmd(const char **argv)
-{
-	int argc;
-	const char **nargv;
-
-	for (argc = 0; argv[argc]; argc++)
-		; /* just counting */
-	nargv = malloc(sizeof(*nargv) * (argc + 2));
-
-	nargv[0] = "perf";
-	for (argc = 0; argv[argc]; argc++)
-		nargv[argc + 1] = argv[argc];
-	nargv[argc + 1] = NULL;
-	return nargv;
-}
-
-int execv_perf_cmd(const char **argv) {
-	const char **nargv = prepare_perf_cmd(argv);
-
-	/* execvp() can only ever return if it fails */
-	execvp("perf", (char **)nargv);
-
-	free(nargv);
-	return -1;
-}
-
-
-int execl_perf_cmd(const char *cmd,...)
-{
-	int argc;
-	const char *argv[MAX_ARGS + 1];
-	const char *arg;
-	va_list param;
-
-	va_start(param, cmd);
-	argv[0] = cmd;
-	argc = 1;
-	while (argc < MAX_ARGS) {
-		arg = argv[argc++] = va_arg(param, char *);
-		if (!arg)
-			break;
-	}
-	va_end(param);
-	if (MAX_ARGS <= argc)
-		return error("too many args to run %s", cmd);
-
-	argv[argc] = NULL;
-	return execv_perf_cmd(argv);
-}
diff --git a/Documentation/perf_counter/exec_cmd.h b/Documentation/perf_counter/exec_cmd.h
deleted file mode 100644
index effe25e..0000000
--- a/Documentation/perf_counter/exec_cmd.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef PERF_EXEC_CMD_H
-#define PERF_EXEC_CMD_H
-
-extern void perf_set_argv_exec_path(const char *exec_path);
-extern const char *perf_extract_argv0_path(const char *path);
-extern const char *perf_exec_path(void);
-extern void setup_path(void);
-extern const char **prepare_perf_cmd(const char **argv);
-extern int execv_perf_cmd(const char **argv); /* NULL terminated */
-extern int execl_perf_cmd(const char *cmd, ...);
-extern const char *system_path(const char *path);
-
-#endif /* PERF_EXEC_CMD_H */
diff --git a/Documentation/perf_counter/generate-cmdlist.sh b/Documentation/perf_counter/generate-cmdlist.sh
deleted file mode 100755
index f06f6fd..0000000
--- a/Documentation/perf_counter/generate-cmdlist.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/sh
-
-echo "/* Automatically generated by $0 */
-struct cmdname_help
-{
-    char name[16];
-    char help[80];
-};
-
-static struct cmdname_help common_cmds[] = {"
-
-sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
-sort |
-while read cmd
-do
-     sed -n '
-     /^NAME/,/perf-'"$cmd"'/H
-     ${
-            x
-            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
-	    p
-     }' "Documentation/perf-$cmd.txt"
-done
-echo "};"
diff --git a/Documentation/perf_counter/help.c b/Documentation/perf_counter/help.c
deleted file mode 100644
index ec01167..0000000
--- a/Documentation/perf_counter/help.c
+++ /dev/null
@@ -1,366 +0,0 @@
-#include "cache.h"
-#include "builtin.h"
-#include "exec_cmd.h"
-#include "levenshtein.h"
-#include "help.h"
-
-/* most GUI terminals set COLUMNS (although some don't export it) */
-static int term_columns(void)
-{
-	char *col_string = getenv("COLUMNS");
-	int n_cols;
-
-	if (col_string && (n_cols = atoi(col_string)) > 0)
-		return n_cols;
-
-#ifdef TIOCGWINSZ
-	{
-		struct winsize ws;
-		if (!ioctl(1, TIOCGWINSZ, &ws)) {
-			if (ws.ws_col)
-				return ws.ws_col;
-		}
-	}
-#endif
-
-	return 80;
-}
-
-void add_cmdname(struct cmdnames *cmds, const char *name, int len)
-{
-	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
-
-	ent->len = len;
-	memcpy(ent->name, name, len);
-	ent->name[len] = 0;
-
-	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
-	cmds->names[cmds->cnt++] = ent;
-}
-
-static void clean_cmdnames(struct cmdnames *cmds)
-{
-	int i;
-	for (i = 0; i < cmds->cnt; ++i)
-		free(cmds->names[i]);
-	free(cmds->names);
-	cmds->cnt = 0;
-	cmds->alloc = 0;
-}
-
-static int cmdname_compare(const void *a_, const void *b_)
-{
-	struct cmdname *a = *(struct cmdname **)a_;
-	struct cmdname *b = *(struct cmdname **)b_;
-	return strcmp(a->name, b->name);
-}
-
-static void uniq(struct cmdnames *cmds)
-{
-	int i, j;
-
-	if (!cmds->cnt)
-		return;
-
-	for (i = j = 1; i < cmds->cnt; i++)
-		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
-			cmds->names[j++] = cmds->names[i];
-
-	cmds->cnt = j;
-}
-
-void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
-{
-	int ci, cj, ei;
-	int cmp;
-
-	ci = cj = ei = 0;
-	while (ci < cmds->cnt && ei < excludes->cnt) {
-		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
-		if (cmp < 0)
-			cmds->names[cj++] = cmds->names[ci++];
-		else if (cmp == 0)
-			ci++, ei++;
-		else if (cmp > 0)
-			ei++;
-	}
-
-	while (ci < cmds->cnt)
-		cmds->names[cj++] = cmds->names[ci++];
-
-	cmds->cnt = cj;
-}
-
-static void pretty_print_string_list(struct cmdnames *cmds, int longest)
-{
-	int cols = 1, rows;
-	int space = longest + 1; /* min 1 SP between words */
-	int max_cols = term_columns() - 1; /* don't print *on* the edge */
-	int i, j;
-
-	if (space < max_cols)
-		cols = max_cols / space;
-	rows = (cmds->cnt + cols - 1) / cols;
-
-	for (i = 0; i < rows; i++) {
-		printf("  ");
-
-		for (j = 0; j < cols; j++) {
-			int n = j * rows + i;
-			int size = space;
-			if (n >= cmds->cnt)
-				break;
-			if (j == cols-1 || n + rows >= cmds->cnt)
-				size = 1;
-			printf("%-*s", size, cmds->names[n]->name);
-		}
-		putchar('\n');
-	}
-}
-
-static int is_executable(const char *name)
-{
-	struct stat st;
-
-	if (stat(name, &st) || /* stat, not lstat */
-	    !S_ISREG(st.st_mode))
-		return 0;
-
-#ifdef __MINGW32__
-	/* cannot trust the executable bit, peek into the file instead */
-	char buf[3] = { 0 };
-	int n;
-	int fd = open(name, O_RDONLY);
-	st.st_mode &= ~S_IXUSR;
-	if (fd >= 0) {
-		n = read(fd, buf, 2);
-		if (n == 2)
-			/* DOS executables start with "MZ" */
-			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
-				st.st_mode |= S_IXUSR;
-		close(fd);
-	}
-#endif
-	return st.st_mode & S_IXUSR;
-}
-
-static void list_commands_in_dir(struct cmdnames *cmds,
-					 const char *path,
-					 const char *prefix)
-{
-	int prefix_len;
-	DIR *dir = opendir(path);
-	struct dirent *de;
-	struct strbuf buf = STRBUF_INIT;
-	int len;
-
-	if (!dir)
-		return;
-	if (!prefix)
-		prefix = "perf-";
-	prefix_len = strlen(prefix);
-
-	strbuf_addf(&buf, "%s/", path);
-	len = buf.len;
-
-	while ((de = readdir(dir)) != NULL) {
-		int entlen;
-
-		if (prefixcmp(de->d_name, prefix))
-			continue;
-
-		strbuf_setlen(&buf, len);
-		strbuf_addstr(&buf, de->d_name);
-		if (!is_executable(buf.buf))
-			continue;
-
-		entlen = strlen(de->d_name) - prefix_len;
-		if (has_extension(de->d_name, ".exe"))
-			entlen -= 4;
-
-		add_cmdname(cmds, de->d_name + prefix_len, entlen);
-	}
-	closedir(dir);
-	strbuf_release(&buf);
-}
-
-void load_command_list(const char *prefix,
-		struct cmdnames *main_cmds,
-		struct cmdnames *other_cmds)
-{
-	const char *env_path = getenv("PATH");
-	const char *exec_path = perf_exec_path();
-
-	if (exec_path) {
-		list_commands_in_dir(main_cmds, exec_path, prefix);
-		qsort(main_cmds->names, main_cmds->cnt,
-		      sizeof(*main_cmds->names), cmdname_compare);
-		uniq(main_cmds);
-	}
-
-	if (env_path) {
-		char *paths, *path, *colon;
-		path = paths = strdup(env_path);
-		while (1) {
-			if ((colon = strchr(path, PATH_SEP)))
-				*colon = 0;
-			if (!exec_path || strcmp(path, exec_path))
-				list_commands_in_dir(other_cmds, path, prefix);
-
-			if (!colon)
-				break;
-			path = colon + 1;
-		}
-		free(paths);
-
-		qsort(other_cmds->names, other_cmds->cnt,
-		      sizeof(*other_cmds->names), cmdname_compare);
-		uniq(other_cmds);
-	}
-	exclude_cmds(other_cmds, main_cmds);
-}
-
-void list_commands(const char *title, struct cmdnames *main_cmds,
-		   struct cmdnames *other_cmds)
-{
-	int i, longest = 0;
-
-	for (i = 0; i < main_cmds->cnt; i++)
-		if (longest < main_cmds->names[i]->len)
-			longest = main_cmds->names[i]->len;
-	for (i = 0; i < other_cmds->cnt; i++)
-		if (longest < other_cmds->names[i]->len)
-			longest = other_cmds->names[i]->len;
-
-	if (main_cmds->cnt) {
-		const char *exec_path = perf_exec_path();
-		printf("available %s in '%s'\n", title, exec_path);
-		printf("----------------");
-		mput_char('-', strlen(title) + strlen(exec_path));
-		putchar('\n');
-		pretty_print_string_list(main_cmds, longest);
-		putchar('\n');
-	}
-
-	if (other_cmds->cnt) {
-		printf("%s available from elsewhere on your $PATH\n", title);
-		printf("---------------------------------------");
-		mput_char('-', strlen(title));
-		putchar('\n');
-		pretty_print_string_list(other_cmds, longest);
-		putchar('\n');
-	}
-}
-
-int is_in_cmdlist(struct cmdnames *c, const char *s)
-{
-	int i;
-	for (i = 0; i < c->cnt; i++)
-		if (!strcmp(s, c->names[i]->name))
-			return 1;
-	return 0;
-}
-
-static int autocorrect;
-static struct cmdnames aliases;
-
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
-{
-	if (!strcmp(var, "help.autocorrect"))
-		autocorrect = perf_config_int(var,value);
-	/* Also use aliases for command lookup */
-	if (!prefixcmp(var, "alias."))
-		add_cmdname(&aliases, var + 6, strlen(var + 6));
-
-	return perf_default_config(var, value, cb);
-}
-
-static int levenshtein_compare(const void *p1, const void *p2)
-{
-	const struct cmdname *const *c1 = p1, *const *c2 = p2;
-	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
-	int l1 = (*c1)->len;
-	int l2 = (*c2)->len;
-	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
-}
-
-static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
-{
-	int i;
-	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
-
-	for (i = 0; i < old->cnt; i++)
-		cmds->names[cmds->cnt++] = old->names[i];
-	free(old->names);
-	old->cnt = 0;
-	old->names = NULL;
-}
-
-const char *help_unknown_cmd(const char *cmd)
-{
-	int i, n, best_similarity = 0;
-	struct cmdnames main_cmds, other_cmds;
-
-	memset(&main_cmds, 0, sizeof(main_cmds));
-	memset(&other_cmds, 0, sizeof(main_cmds));
-	memset(&aliases, 0, sizeof(aliases));
-
-	perf_config(perf_unknown_cmd_config, NULL);
-
-	load_command_list("perf-", &main_cmds, &other_cmds);
-
-	add_cmd_list(&main_cmds, &aliases);
-	add_cmd_list(&main_cmds, &other_cmds);
-	qsort(main_cmds.names, main_cmds.cnt,
-	      sizeof(main_cmds.names), cmdname_compare);
-	uniq(&main_cmds);
-
-	/* This reuses cmdname->len for similarity index */
-	for (i = 0; i < main_cmds.cnt; ++i)
-		main_cmds.names[i]->len =
-			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
-
-	qsort(main_cmds.names, main_cmds.cnt,
-	      sizeof(*main_cmds.names), levenshtein_compare);
-
-	if (!main_cmds.cnt)
-		die ("Uh oh. Your system reports no Git commands at all.");
-
-	best_similarity = main_cmds.names[0]->len;
-	n = 1;
-	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
-		++n;
-	if (autocorrect && n == 1) {
-		const char *assumed = main_cmds.names[0]->name;
-		main_cmds.names[0] = NULL;
-		clean_cmdnames(&main_cmds);
-		fprintf(stderr, "WARNING: You called a Git program named '%s', "
-			"which does not exist.\n"
-			"Continuing under the assumption that you meant '%s'\n",
-			cmd, assumed);
-		if (autocorrect > 0) {
-			fprintf(stderr, "in %0.1f seconds automatically...\n",
-				(float)autocorrect/10.0);
-			poll(NULL, 0, autocorrect * 100);
-		}
-		return assumed;
-	}
-
-	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
-
-	if (best_similarity < 6) {
-		fprintf(stderr, "\nDid you mean %s?\n",
-			n < 2 ? "this": "one of these");
-
-		for (i = 0; i < n; i++)
-			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
-	}
-
-	exit(1);
-}
-
-int cmd_version(int argc, const char **argv, const char *prefix)
-{
-	printf("perf version %s\n", perf_version_string);
-	return 0;
-}
diff --git a/Documentation/perf_counter/help.h b/Documentation/perf_counter/help.h
deleted file mode 100644
index 56bc154..0000000
--- a/Documentation/perf_counter/help.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef HELP_H
-#define HELP_H
-
-struct cmdnames {
-	int alloc;
-	int cnt;
-	struct cmdname {
-		size_t len; /* also used for similarity index in help.c */
-		char name[FLEX_ARRAY];
-	} **names;
-};
-
-static inline void mput_char(char c, unsigned int num)
-{
-	while(num--)
-		putchar(c);
-}
-
-void load_command_list(const char *prefix,
-		struct cmdnames *main_cmds,
-		struct cmdnames *other_cmds);
-void add_cmdname(struct cmdnames *cmds, const char *name, int len);
-/* Here we require that excludes is a sorted list. */
-void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
-int is_in_cmdlist(struct cmdnames *c, const char *s);
-void list_commands(const char *title, struct cmdnames *main_cmds,
-		   struct cmdnames *other_cmds);
-
-#endif /* HELP_H */
diff --git a/Documentation/perf_counter/levenshtein.c b/Documentation/perf_counter/levenshtein.c
deleted file mode 100644
index e521d15..0000000
--- a/Documentation/perf_counter/levenshtein.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#include "cache.h"
-#include "levenshtein.h"
-
-/*
- * This function implements the Damerau-Levenshtein algorithm to
- * calculate a distance between strings.
- *
- * Basically, it says how many letters need to be swapped, substituted,
- * deleted from, or added to string1, at least, to get string2.
- *
- * The idea is to build a distance matrix for the substrings of both
- * strings.  To avoid a large space complexity, only the last three rows
- * are kept in memory (if swaps had the same or higher cost as one deletion
- * plus one insertion, only two rows would be needed).
- *
- * At any stage, "i + 1" denotes the length of the current substring of
- * string1 that the distance is calculated for.
- *
- * row2 holds the current row, row1 the previous row (i.e. for the substring
- * of string1 of length "i"), and row0 the row before that.
- *
- * In other words, at the start of the big loop, row2[j + 1] contains the
- * Damerau-Levenshtein distance between the substring of string1 of length
- * "i" and the substring of string2 of length "j + 1".
- *
- * All the big loop does is determine the partial minimum-cost paths.
- *
- * It does so by calculating the costs of the path ending in characters
- * i (in string1) and j (in string2), respectively, given that the last
- * operation is a substition, a swap, a deletion, or an insertion.
- *
- * This implementation allows the costs to be weighted:
- *
- * - w (as in "sWap")
- * - s (as in "Substitution")
- * - a (for insertion, AKA "Add")
- * - d (as in "Deletion")
- *
- * Note that this algorithm calculates a distance _iff_ d == a.
- */
-int levenshtein(const char *string1, const char *string2,
-		int w, int s, int a, int d)
-{
-	int len1 = strlen(string1), len2 = strlen(string2);
-	int *row0 = malloc(sizeof(int) * (len2 + 1));
-	int *row1 = malloc(sizeof(int) * (len2 + 1));
-	int *row2 = malloc(sizeof(int) * (len2 + 1));
-	int i, j;
-
-	for (j = 0; j <= len2; j++)
-		row1[j] = j * a;
-	for (i = 0; i < len1; i++) {
-		int *dummy;
-
-		row2[0] = (i + 1) * d;
-		for (j = 0; j < len2; j++) {
-			/* substitution */
-			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
-			/* swap */
-			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
-					string1[i] == string2[j - 1] &&
-					row2[j + 1] > row0[j - 1] + w)
-				row2[j + 1] = row0[j - 1] + w;
-			/* deletion */
-			if (row2[j + 1] > row1[j + 1] + d)
-				row2[j + 1] = row1[j + 1] + d;
-			/* insertion */
-			if (row2[j + 1] > row2[j] + a)
-				row2[j + 1] = row2[j] + a;
-		}
-
-		dummy = row0;
-		row0 = row1;
-		row1 = row2;
-		row2 = dummy;
-	}
-
-	i = row1[len2];
-	free(row0);
-	free(row1);
-	free(row2);
-
-	return i;
-}
diff --git a/Documentation/perf_counter/levenshtein.h b/Documentation/perf_counter/levenshtein.h
deleted file mode 100644
index 0173abe..0000000
--- a/Documentation/perf_counter/levenshtein.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef LEVENSHTEIN_H
-#define LEVENSHTEIN_H
-
-int levenshtein(const char *string1, const char *string2,
-	int swap_penalty, int substition_penalty,
-	int insertion_penalty, int deletion_penalty);
-
-#endif
diff --git a/Documentation/perf_counter/parse-options.c b/Documentation/perf_counter/parse-options.c
deleted file mode 100644
index 7464f34..0000000
--- a/Documentation/perf_counter/parse-options.c
+++ /dev/null
@@ -1,495 +0,0 @@
-#include "util.h"
-#include "parse-options.h"
-#include "cache.h"
-
-#define OPT_SHORT 1
-#define OPT_UNSET 2
-
-static int opterror(const struct option *opt, const char *reason, int flags)
-{
-	if (flags & OPT_SHORT)
-		return error("switch `%c' %s", opt->short_name, reason);
-	if (flags & OPT_UNSET)
-		return error("option `no-%s' %s", opt->long_name, reason);
-	return error("option `%s' %s", opt->long_name, reason);
-}
-
-static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
-		   int flags, const char **arg)
-{
-	if (p->opt) {
-		*arg = p->opt;
-		p->opt = NULL;
-	} else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) {
-		*arg = (const char *)opt->defval;
-	} else if (p->argc > 1) {
-		p->argc--;
-		*arg = *++p->argv;
-	} else
-		return opterror(opt, "requires a value", flags);
-	return 0;
-}
-
-static int get_value(struct parse_opt_ctx_t *p,
-		     const struct option *opt, int flags)
-{
-	const char *s, *arg;
-	const int unset = flags & OPT_UNSET;
-
-	if (unset && p->opt)
-		return opterror(opt, "takes no value", flags);
-	if (unset && (opt->flags & PARSE_OPT_NONEG))
-		return opterror(opt, "isn't available", flags);
-
-	if (!(flags & OPT_SHORT) && p->opt) {
-		switch (opt->type) {
-		case OPTION_CALLBACK:
-			if (!(opt->flags & PARSE_OPT_NOARG))
-				break;
-			/* FALLTHROUGH */
-		case OPTION_BOOLEAN:
-		case OPTION_BIT:
-		case OPTION_SET_INT:
-		case OPTION_SET_PTR:
-			return opterror(opt, "takes no value", flags);
-		default:
-			break;
-		}
-	}
-
-	switch (opt->type) {
-	case OPTION_BIT:
-		if (unset)
-			*(int *)opt->value &= ~opt->defval;
-		else
-			*(int *)opt->value |= opt->defval;
-		return 0;
-
-	case OPTION_BOOLEAN:
-		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
-		return 0;
-
-	case OPTION_SET_INT:
-		*(int *)opt->value = unset ? 0 : opt->defval;
-		return 0;
-
-	case OPTION_SET_PTR:
-		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
-		return 0;
-
-	case OPTION_STRING:
-		if (unset)
-			*(const char **)opt->value = NULL;
-		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
-			*(const char **)opt->value = (const char *)opt->defval;
-		else
-			return get_arg(p, opt, flags, (const char **)opt->value);
-		return 0;
-
-	case OPTION_CALLBACK:
-		if (unset)
-			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
-		if (opt->flags & PARSE_OPT_NOARG)
-			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
-		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
-			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
-		if (get_arg(p, opt, flags, &arg))
-			return -1;
-		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
-
-	case OPTION_INTEGER:
-		if (unset) {
-			*(int *)opt->value = 0;
-			return 0;
-		}
-		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
-			*(int *)opt->value = opt->defval;
-			return 0;
-		}
-		if (get_arg(p, opt, flags, &arg))
-			return -1;
-		*(int *)opt->value = strtol(arg, (char **)&s, 10);
-		if (*s)
-			return opterror(opt, "expects a numerical value", flags);
-		return 0;
-
-	default:
-		die("should not happen, someone must be hit on the forehead");
-	}
-}
-
-static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
-{
-	for (; options->type != OPTION_END; options++) {
-		if (options->short_name == *p->opt) {
-			p->opt = p->opt[1] ? p->opt + 1 : NULL;
-			return get_value(p, options, OPT_SHORT);
-		}
-	}
-	return -2;
-}
-
-static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
-                          const struct option *options)
-{
-	const char *arg_end = strchr(arg, '=');
-	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
-	int abbrev_flags = 0, ambiguous_flags = 0;
-
-	if (!arg_end)
-		arg_end = arg + strlen(arg);
-
-	for (; options->type != OPTION_END; options++) {
-		const char *rest;
-		int flags = 0;
-
-		if (!options->long_name)
-			continue;
-
-		rest = skip_prefix(arg, options->long_name);
-		if (options->type == OPTION_ARGUMENT) {
-			if (!rest)
-				continue;
-			if (*rest == '=')
-				return opterror(options, "takes no value", flags);
-			if (*rest)
-				continue;
-			p->out[p->cpidx++] = arg - 2;
-			return 0;
-		}
-		if (!rest) {
-			/* abbreviated? */
-			if (!strncmp(options->long_name, arg, arg_end - arg)) {
-is_abbreviated:
-				if (abbrev_option) {
-					/*
-					 * If this is abbreviated, it is
-					 * ambiguous. So when there is no
-					 * exact match later, we need to
-					 * error out.
-					 */
-					ambiguous_option = abbrev_option;
-					ambiguous_flags = abbrev_flags;
-				}
-				if (!(flags & OPT_UNSET) && *arg_end)
-					p->opt = arg_end + 1;
-				abbrev_option = options;
-				abbrev_flags = flags;
-				continue;
-			}
-			/* negated and abbreviated very much? */
-			if (!prefixcmp("no-", arg)) {
-				flags |= OPT_UNSET;
-				goto is_abbreviated;
-			}
-			/* negated? */
-			if (strncmp(arg, "no-", 3))
-				continue;
-			flags |= OPT_UNSET;
-			rest = skip_prefix(arg + 3, options->long_name);
-			/* abbreviated and negated? */
-			if (!rest && !prefixcmp(options->long_name, arg + 3))
-				goto is_abbreviated;
-			if (!rest)
-				continue;
-		}
-		if (*rest) {
-			if (*rest != '=')
-				continue;
-			p->opt = rest + 1;
-		}
-		return get_value(p, options, flags);
-	}
-
-	if (ambiguous_option)
-		return error("Ambiguous option: %s "
-			"(could be --%s%s or --%s%s)",
-			arg,
-			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
-			ambiguous_option->long_name,
-			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
-			abbrev_option->long_name);
-	if (abbrev_option)
-		return get_value(p, abbrev_option, abbrev_flags);
-	return -2;
-}
-
-static void check_typos(const char *arg, const struct option *options)
-{
-	if (strlen(arg) < 3)
-		return;
-
-	if (!prefixcmp(arg, "no-")) {
-		error ("did you mean `--%s` (with two dashes ?)", arg);
-		exit(129);
-	}
-
-	for (; options->type != OPTION_END; options++) {
-		if (!options->long_name)
-			continue;
-		if (!prefixcmp(options->long_name, arg)) {
-			error ("did you mean `--%s` (with two dashes ?)", arg);
-			exit(129);
-		}
-	}
-}
-
-void parse_options_start(struct parse_opt_ctx_t *ctx,
-			 int argc, const char **argv, int flags)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	ctx->argc = argc - 1;
-	ctx->argv = argv + 1;
-	ctx->out  = argv;
-	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
-	ctx->flags = flags;
-	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
-	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
-		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
-}
-
-static int usage_with_options_internal(const char * const *,
-				       const struct option *, int);
-
-int parse_options_step(struct parse_opt_ctx_t *ctx,
-		       const struct option *options,
-		       const char * const usagestr[])
-{
-	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
-
-	/* we must reset ->opt, unknown short option leave it dangling */
-	ctx->opt = NULL;
-
-	for (; ctx->argc; ctx->argc--, ctx->argv++) {
-		const char *arg = ctx->argv[0];
-
-		if (*arg != '-' || !arg[1]) {
-			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
-				break;
-			ctx->out[ctx->cpidx++] = ctx->argv[0];
-			continue;
-		}
-
-		if (arg[1] != '-') {
-			ctx->opt = arg + 1;
-			if (internal_help && *ctx->opt == 'h')
-				return parse_options_usage(usagestr, options);
-			switch (parse_short_opt(ctx, options)) {
-			case -1:
-				return parse_options_usage(usagestr, options);
-			case -2:
-				goto unknown;
-			}
-			if (ctx->opt)
-				check_typos(arg + 1, options);
-			while (ctx->opt) {
-				if (internal_help && *ctx->opt == 'h')
-					return parse_options_usage(usagestr, options);
-				switch (parse_short_opt(ctx, options)) {
-				case -1:
-					return parse_options_usage(usagestr, options);
-				case -2:
-					/* fake a short option thing to hide the fact that we may have
-					 * started to parse aggregated stuff
-					 *
-					 * This is leaky, too bad.
-					 */
-					ctx->argv[0] = strdup(ctx->opt - 1);
-					*(char *)ctx->argv[0] = '-';
-					goto unknown;
-				}
-			}
-			continue;
-		}
-
-		if (!arg[2]) { /* "--" */
-			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
-				ctx->argc--;
-				ctx->argv++;
-			}
-			break;
-		}
-
-		if (internal_help && !strcmp(arg + 2, "help-all"))
-			return usage_with_options_internal(usagestr, options, 1);
-		if (internal_help && !strcmp(arg + 2, "help"))
-			return parse_options_usage(usagestr, options);
-		switch (parse_long_opt(ctx, arg + 2, options)) {
-		case -1:
-			return parse_options_usage(usagestr, options);
-		case -2:
-			goto unknown;
-		}
-		continue;
-unknown:
-		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
-			return PARSE_OPT_UNKNOWN;
-		ctx->out[ctx->cpidx++] = ctx->argv[0];
-		ctx->opt = NULL;
-	}
-	return PARSE_OPT_DONE;
-}
-
-int parse_options_end(struct parse_opt_ctx_t *ctx)
-{
-	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
-	ctx->out[ctx->cpidx + ctx->argc] = NULL;
-	return ctx->cpidx + ctx->argc;
-}
-
-int parse_options(int argc, const char **argv, const struct option *options,
-		  const char * const usagestr[], int flags)
-{
-	struct parse_opt_ctx_t ctx;
-
-	parse_options_start(&ctx, argc, argv, flags);
-	switch (parse_options_step(&ctx, options, usagestr)) {
-	case PARSE_OPT_HELP:
-		exit(129);
-	case PARSE_OPT_DONE:
-		break;
-	default: /* PARSE_OPT_UNKNOWN */
-		if (ctx.argv[0][1] == '-') {
-			error("unknown option `%s'", ctx.argv[0] + 2);
-		} else {
-			error("unknown switch `%c'", *ctx.opt);
-		}
-		usage_with_options(usagestr, options);
-	}
-
-	return parse_options_end(&ctx);
-}
-
-#define USAGE_OPTS_WIDTH 24
-#define USAGE_GAP         2
-
-int usage_with_options_internal(const char * const *usagestr,
-				const struct option *opts, int full)
-{
-	if (!usagestr)
-		return PARSE_OPT_HELP;
-
-	fprintf(stderr, "usage: %s\n", *usagestr++);
-	while (*usagestr && **usagestr)
-		fprintf(stderr, "   or: %s\n", *usagestr++);
-	while (*usagestr) {
-		fprintf(stderr, "%s%s\n",
-				**usagestr ? "    " : "",
-				*usagestr);
-		usagestr++;
-	}
-
-	if (opts->type != OPTION_GROUP)
-		fputc('\n', stderr);
-
-	for (; opts->type != OPTION_END; opts++) {
-		size_t pos;
-		int pad;
-
-		if (opts->type == OPTION_GROUP) {
-			fputc('\n', stderr);
-			if (*opts->help)
-				fprintf(stderr, "%s\n", opts->help);
-			continue;
-		}
-		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
-			continue;
-
-		pos = fprintf(stderr, "    ");
-		if (opts->short_name)
-			pos += fprintf(stderr, "-%c", opts->short_name);
-		if (opts->long_name && opts->short_name)
-			pos += fprintf(stderr, ", ");
-		if (opts->long_name)
-			pos += fprintf(stderr, "--%s", opts->long_name);
-
-		switch (opts->type) {
-		case OPTION_ARGUMENT:
-			break;
-		case OPTION_INTEGER:
-			if (opts->flags & PARSE_OPT_OPTARG)
-				if (opts->long_name)
-					pos += fprintf(stderr, "[=<n>]");
-				else
-					pos += fprintf(stderr, "[<n>]");
-			else
-				pos += fprintf(stderr, " <n>");
-			break;
-		case OPTION_CALLBACK:
-			if (opts->flags & PARSE_OPT_NOARG)
-				break;
-			/* FALLTHROUGH */
-		case OPTION_STRING:
-			if (opts->argh) {
-				if (opts->flags & PARSE_OPT_OPTARG)
-					if (opts->long_name)
-						pos += fprintf(stderr, "[=<%s>]", opts->argh);
-					else
-						pos += fprintf(stderr, "[<%s>]", opts->argh);
-				else
-					pos += fprintf(stderr, " <%s>", opts->argh);
-			} else {
-				if (opts->flags & PARSE_OPT_OPTARG)
-					if (opts->long_name)
-						pos += fprintf(stderr, "[=...]");
-					else
-						pos += fprintf(stderr, "[...]");
-				else
-					pos += fprintf(stderr, " ...");
-			}
-			break;
-		default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */
-			break;
-		}
-
-		if (pos <= USAGE_OPTS_WIDTH)
-			pad = USAGE_OPTS_WIDTH - pos;
-		else {
-			fputc('\n', stderr);
-			pad = USAGE_OPTS_WIDTH;
-		}
-		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
-	}
-	fputc('\n', stderr);
-
-	return PARSE_OPT_HELP;
-}
-
-void usage_with_options(const char * const *usagestr,
-			const struct option *opts)
-{
-	usage_with_options_internal(usagestr, opts, 0);
-	exit(129);
-}
-
-int parse_options_usage(const char * const *usagestr,
-			const struct option *opts)
-{
-	return usage_with_options_internal(usagestr, opts, 0);
-}
-
-
-/*----- some often used options -----*/
-#include "cache.h"
-
-int parse_opt_verbosity_cb(const struct option *opt, const char *arg,
-			   int unset)
-{
-	int *target = opt->value;
-
-	if (unset)
-		/* --no-quiet, --no-verbose */
-		*target = 0;
-	else if (opt->short_name == 'v') {
-		if (*target >= 0)
-			(*target)++;
-		else
-			*target = 1;
-	} else {
-		if (*target <= 0)
-			(*target)--;
-		else
-			*target = -1;
-	}
-	return 0;
-}
diff --git a/Documentation/perf_counter/parse-options.h b/Documentation/perf_counter/parse-options.h
deleted file mode 100644
index a81c7fa..0000000
--- a/Documentation/perf_counter/parse-options.h
+++ /dev/null
@@ -1,172 +0,0 @@
-#ifndef PARSE_OPTIONS_H
-#define PARSE_OPTIONS_H
-
-enum parse_opt_type {
-	/* special types */
-	OPTION_END,
-	OPTION_ARGUMENT,
-	OPTION_GROUP,
-	/* options with no arguments */
-	OPTION_BIT,
-	OPTION_BOOLEAN, /* _INCR would have been a better name */
-	OPTION_SET_INT,
-	OPTION_SET_PTR,
-	/* options with arguments (usually) */
-	OPTION_STRING,
-	OPTION_INTEGER,
-	OPTION_CALLBACK,
-};
-
-enum parse_opt_flags {
-	PARSE_OPT_KEEP_DASHDASH = 1,
-	PARSE_OPT_STOP_AT_NON_OPTION = 2,
-	PARSE_OPT_KEEP_ARGV0 = 4,
-	PARSE_OPT_KEEP_UNKNOWN = 8,
-	PARSE_OPT_NO_INTERNAL_HELP = 16,
-};
-
-enum parse_opt_option_flags {
-	PARSE_OPT_OPTARG  = 1,
-	PARSE_OPT_NOARG   = 2,
-	PARSE_OPT_NONEG   = 4,
-	PARSE_OPT_HIDDEN  = 8,
-	PARSE_OPT_LASTARG_DEFAULT = 16,
-};
-
-struct option;
-typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
-
-/*
- * `type`::
- *   holds the type of the option, you must have an OPTION_END last in your
- *   array.
- *
- * `short_name`::
- *   the character to use as a short option name, '\0' if none.
- *
- * `long_name`::
- *   the long option name, without the leading dashes, NULL if none.
- *
- * `value`::
- *   stores pointers to the values to be filled.
- *
- * `argh`::
- *   token to explain the kind of argument this option wants. Keep it
- *   homogenous across the repository.
- *
- * `help`::
- *   the short help associated to what the option does.
- *   Must never be NULL (except for OPTION_END).
- *   OPTION_GROUP uses this pointer to store the group header.
- *
- * `flags`::
- *   mask of parse_opt_option_flags.
- *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
- *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
- *   PARSE_OPT_NONEG: says that this option cannot be negated
- *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
- *                    the long one.
- *
- * `callback`::
- *   pointer to the callback to use for OPTION_CALLBACK.
- *
- * `defval`::
- *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
- *   OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in
- *   the value when met.
- *   CALLBACKS can use it like they want.
- */
-struct option {
-	enum parse_opt_type type;
-	int short_name;
-	const char *long_name;
-	void *value;
-	const char *argh;
-	const char *help;
-
-	int flags;
-	parse_opt_cb *callback;
-	intptr_t defval;
-};
-
-#define OPT_END()                   { OPTION_END }
-#define OPT_ARGUMENT(l, h)          { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) }
-#define OPT_GROUP(h)                { OPTION_GROUP, 0, NULL, NULL, NULL, (h) }
-#define OPT_BIT(s, l, v, h, b)      { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) }
-#define OPT_BOOLEAN(s, l, v, h)     { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) }
-#define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
-#define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
-#define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
-#define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
-#define OPT_DATE(s, l, v, h) \
-	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
-	  parse_opt_approxidate_cb }
-#define OPT_CALLBACK(s, l, v, a, h, f) \
-	{ OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) }
-
-/* parse_options() will filter out the processed options and leave the
- * non-option argments in argv[].
- * Returns the number of arguments left in argv[].
- */
-extern int parse_options(int argc, const char **argv,
-                         const struct option *options,
-                         const char * const usagestr[], int flags);
-
-extern NORETURN void usage_with_options(const char * const *usagestr,
-                                        const struct option *options);
-
-/*----- incremantal advanced APIs -----*/
-
-enum {
-	PARSE_OPT_HELP = -1,
-	PARSE_OPT_DONE,
-	PARSE_OPT_UNKNOWN,
-};
-
-/*
- * It's okay for the caller to consume argv/argc in the usual way.
- * Other fields of that structure are private to parse-options and should not
- * be modified in any way.
- */
-struct parse_opt_ctx_t {
-	const char **argv;
-	const char **out;
-	int argc, cpidx;
-	const char *opt;
-	int flags;
-};
-
-extern int parse_options_usage(const char * const *usagestr,
-			       const struct option *opts);
-
-extern void parse_options_start(struct parse_opt_ctx_t *ctx,
-				int argc, const char **argv, int flags);
-
-extern int parse_options_step(struct parse_opt_ctx_t *ctx,
-			      const struct option *options,
-			      const char * const usagestr[]);
-
-extern int parse_options_end(struct parse_opt_ctx_t *ctx);
-
-
-/*----- some often used options -----*/
-extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
-extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
-extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
-
-#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
-#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
-#define OPT__VERBOSITY(var) \
-	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
-	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
-	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
-	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
-#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
-#define OPT__ABBREV(var)  \
-	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
-	  "use <n> digits to display SHA-1s", \
-	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
-
-extern const char *parse_options_fix_filename(const char *prefix, const char *file);
-
-#endif
diff --git a/Documentation/perf_counter/path.c b/Documentation/perf_counter/path.c
deleted file mode 100644
index a501a40..0000000
--- a/Documentation/perf_counter/path.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * I'm tired of doing "vsnprintf()" etc just to open a
- * file, so here's a "return static buffer with printf"
- * interface for paths.
- *
- * It's obviously not thread-safe. Sue me. But it's quite
- * useful for doing things like
- *
- *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
- *
- * which is what it's designed for.
- */
-#include "cache.h"
-
-static char bad_path[] = "/bad-path/";
-/*
- * Two hacks:
- */
-
-static char *get_perf_dir(void)
-{
-	return ".";
-}
-
-size_t strlcpy(char *dest, const char *src, size_t size)
-{
-	size_t ret = strlen(src);
-
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		memcpy(dest, src, len);
-		dest[len] = '\0';
-	}
-	return ret;
-}
-
-
-static char *get_pathname(void)
-{
-	static char pathname_array[4][PATH_MAX];
-	static int index;
-	return pathname_array[3 & ++index];
-}
-
-static char *cleanup_path(char *path)
-{
-	/* Clean it up */
-	if (!memcmp(path, "./", 2)) {
-		path += 2;
-		while (*path == '/')
-			path++;
-	}
-	return path;
-}
-
-char *mksnpath(char *buf, size_t n, const char *fmt, ...)
-{
-	va_list args;
-	unsigned len;
-
-	va_start(args, fmt);
-	len = vsnprintf(buf, n, fmt, args);
-	va_end(args);
-	if (len >= n) {
-		strlcpy(buf, bad_path, n);
-		return buf;
-	}
-	return cleanup_path(buf);
-}
-
-static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
-{
-	const char *perf_dir = get_perf_dir();
-	size_t len;
-
-	len = strlen(perf_dir);
-	if (n < len + 1)
-		goto bad;
-	memcpy(buf, perf_dir, len);
-	if (len && !is_dir_sep(perf_dir[len-1]))
-		buf[len++] = '/';
-	len += vsnprintf(buf + len, n - len, fmt, args);
-	if (len >= n)
-		goto bad;
-	return cleanup_path(buf);
-bad:
-	strlcpy(buf, bad_path, n);
-	return buf;
-}
-
-char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
-{
-	va_list args;
-	va_start(args, fmt);
-	(void)perf_vsnpath(buf, n, fmt, args);
-	va_end(args);
-	return buf;
-}
-
-char *perf_pathdup(const char *fmt, ...)
-{
-	char path[PATH_MAX];
-	va_list args;
-	va_start(args, fmt);
-	(void)perf_vsnpath(path, sizeof(path), fmt, args);
-	va_end(args);
-	return xstrdup(path);
-}
-
-char *mkpath(const char *fmt, ...)
-{
-	va_list args;
-	unsigned len;
-	char *pathname = get_pathname();
-
-	va_start(args, fmt);
-	len = vsnprintf(pathname, PATH_MAX, fmt, args);
-	va_end(args);
-	if (len >= PATH_MAX)
-		return bad_path;
-	return cleanup_path(pathname);
-}
-
-char *perf_path(const char *fmt, ...)
-{
-	const char *perf_dir = get_perf_dir();
-	char *pathname = get_pathname();
-	va_list args;
-	unsigned len;
-
-	len = strlen(perf_dir);
-	if (len > PATH_MAX-100)
-		return bad_path;
-	memcpy(pathname, perf_dir, len);
-	if (len && perf_dir[len-1] != '/')
-		pathname[len++] = '/';
-	va_start(args, fmt);
-	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
-	va_end(args);
-	if (len >= PATH_MAX)
-		return bad_path;
-	return cleanup_path(pathname);
-}
-
-
-/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
-int perf_mkstemp(char *path, size_t len, const char *template)
-{
-	const char *tmp;
-	size_t n;
-
-	tmp = getenv("TMPDIR");
-	if (!tmp)
-		tmp = "/tmp";
-	n = snprintf(path, len, "%s/%s", tmp, template);
-	if (len <= n) {
-		errno = ENAMETOOLONG;
-		return -1;
-	}
-	return mkstemp(path);
-}
-
-
-const char *make_relative_path(const char *abs, const char *base)
-{
-	static char buf[PATH_MAX + 1];
-	int baselen;
-	if (!base)
-		return abs;
-	baselen = strlen(base);
-	if (prefixcmp(abs, base))
-		return abs;
-	if (abs[baselen] == '/')
-		baselen++;
-	else if (base[baselen - 1] != '/')
-		return abs;
-	strcpy(buf, abs + baselen);
-	return buf;
-}
-
-/*
- * It is okay if dst == src, but they should not overlap otherwise.
- *
- * Performs the following normalizations on src, storing the result in dst:
- * - Ensures that components are separated by '/' (Windows only)
- * - Squashes sequences of '/'.
- * - Removes "." components.
- * - Removes ".." components, and the components the precede them.
- * Returns failure (non-zero) if a ".." component appears as first path
- * component anytime during the normalization. Otherwise, returns success (0).
- *
- * Note that this function is purely textual.  It does not follow symlinks,
- * verify the existence of the path, or make any system calls.
- */
-int normalize_path_copy(char *dst, const char *src)
-{
-	char *dst0;
-
-	if (has_dos_drive_prefix(src)) {
-		*dst++ = *src++;
-		*dst++ = *src++;
-	}
-	dst0 = dst;
-
-	if (is_dir_sep(*src)) {
-		*dst++ = '/';
-		while (is_dir_sep(*src))
-			src++;
-	}
-
-	for (;;) {
-		char c = *src;
-
-		/*
-		 * A path component that begins with . could be
-		 * special:
-		 * (1) "." and ends   -- ignore and terminate.
-		 * (2) "./"           -- ignore them, eat slash and continue.
-		 * (3) ".." and ends  -- strip one and terminate.
-		 * (4) "../"          -- strip one, eat slash and continue.
-		 */
-		if (c == '.') {
-			if (!src[1]) {
-				/* (1) */
-				src++;
-			} else if (is_dir_sep(src[1])) {
-				/* (2) */
-				src += 2;
-				while (is_dir_sep(*src))
-					src++;
-				continue;
-			} else if (src[1] == '.') {
-				if (!src[2]) {
-					/* (3) */
-					src += 2;
-					goto up_one;
-				} else if (is_dir_sep(src[2])) {
-					/* (4) */
-					src += 3;
-					while (is_dir_sep(*src))
-						src++;
-					goto up_one;
-				}
-			}
-		}
-
-		/* copy up to the next '/', and eat all '/' */
-		while ((c = *src++) != '\0' && !is_dir_sep(c))
-			*dst++ = c;
-		if (is_dir_sep(c)) {
-			*dst++ = '/';
-			while (is_dir_sep(c))
-				c = *src++;
-			src--;
-		} else if (!c)
-			break;
-		continue;
-
-	up_one:
-		/*
-		 * dst0..dst is prefix portion, and dst[-1] is '/';
-		 * go up one level.
-		 */
-		dst--;	/* go to trailing '/' */
-		if (dst <= dst0)
-			return -1;
-		/* Windows: dst[-1] cannot be backslash anymore */
-		while (dst0 < dst && dst[-1] != '/')
-			dst--;
-	}
-	*dst = '\0';
-	return 0;
-}
-
-/*
- * path = Canonical absolute path
- * prefix_list = Colon-separated list of absolute paths
- *
- * Determines, for each path in prefix_list, whether the "prefix" really
- * is an ancestor directory of path.  Returns the length of the longest
- * ancestor directory, excluding any trailing slashes, or -1 if no prefix
- * is an ancestor.  (Note that this means 0 is returned if prefix_list is
- * "/".) "/foo" is not considered an ancestor of "/foobar".  Directories
- * are not considered to be their own ancestors.  path must be in a
- * canonical form: empty components, or "." or ".." components are not
- * allowed.  prefix_list may be null, which is like "".
- */
-int longest_ancestor_length(const char *path, const char *prefix_list)
-{
-	char buf[PATH_MAX+1];
-	const char *ceil, *colon;
-	int len, max_len = -1;
-
-	if (prefix_list == NULL || !strcmp(path, "/"))
-		return -1;
-
-	for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
-		for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
-		len = colon - ceil;
-		if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
-			continue;
-		strlcpy(buf, ceil, len+1);
-		if (normalize_path_copy(buf, buf) < 0)
-			continue;
-		len = strlen(buf);
-		if (len > 0 && buf[len-1] == '/')
-			buf[--len] = '\0';
-
-		if (!strncmp(path, buf, len) &&
-		    path[len] == '/' &&
-		    len > max_len) {
-			max_len = len;
-		}
-	}
-
-	return max_len;
-}
-
-/* strip arbitrary amount of directory separators at end of path */
-static inline int chomp_trailing_dir_sep(const char *path, int len)
-{
-	while (len && is_dir_sep(path[len - 1]))
-		len--;
-	return len;
-}
-
-/*
- * If path ends with suffix (complete path components), returns the
- * part before suffix (sans trailing directory separators).
- * Otherwise returns NULL.
- */
-char *strip_path_suffix(const char *path, const char *suffix)
-{
-	int path_len = strlen(path), suffix_len = strlen(suffix);
-
-	while (suffix_len) {
-		if (!path_len)
-			return NULL;
-
-		if (is_dir_sep(path[path_len - 1])) {
-			if (!is_dir_sep(suffix[suffix_len - 1]))
-				return NULL;
-			path_len = chomp_trailing_dir_sep(path, path_len);
-			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
-		}
-		else if (path[--path_len] != suffix[--suffix_len])
-			return NULL;
-	}
-
-	if (path_len && !is_dir_sep(path[path_len - 1]))
-		return NULL;
-	return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
-}
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 8d6faec..594d270 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -1,8 +1,8 @@
 #include "builtin.h"
-#include "exec_cmd.h"
-#include "cache.h"
-#include "quote.h"
-#include "run-command.h"
+#include "util/exec_cmd.h"
+#include "util/cache.h"
+#include "util/quote.h"
+#include "util/run-command.h"
 
 const char perf_usage_string[] =
 	"perf [--version] [--help] COMMAND [ARGS]";
diff --git a/Documentation/perf_counter/quote.c b/Documentation/perf_counter/quote.c
deleted file mode 100644
index 7a49fcf..0000000
--- a/Documentation/perf_counter/quote.c
+++ /dev/null
@@ -1,478 +0,0 @@
-#include "cache.h"
-#include "quote.h"
-
-int quote_path_fully = 1;
-
-/* Help to copy the thing properly quoted for the shell safety.
- * any single quote is replaced with '\'', any exclamation point
- * is replaced with '\!', and the whole thing is enclosed in a
- *
- * E.g.
- *  original     sq_quote     result
- *  name     ==> name      ==> 'name'
- *  a b      ==> a b       ==> 'a b'
- *  a'b      ==> a'\''b    ==> 'a'\''b'
- *  a!b      ==> a'\!'b    ==> 'a'\!'b'
- */
-static inline int need_bs_quote(char c)
-{
-	return (c == '\'' || c == '!');
-}
-
-void sq_quote_buf(struct strbuf *dst, const char *src)
-{
-	char *to_free = NULL;
-
-	if (dst->buf == src)
-		to_free = strbuf_detach(dst, NULL);
-
-	strbuf_addch(dst, '\'');
-	while (*src) {
-		size_t len = strcspn(src, "'!");
-		strbuf_add(dst, src, len);
-		src += len;
-		while (need_bs_quote(*src)) {
-			strbuf_addstr(dst, "'\\");
-			strbuf_addch(dst, *src++);
-			strbuf_addch(dst, '\'');
-		}
-	}
-	strbuf_addch(dst, '\'');
-	free(to_free);
-}
-
-void sq_quote_print(FILE *stream, const char *src)
-{
-	char c;
-
-	fputc('\'', stream);
-	while ((c = *src++)) {
-		if (need_bs_quote(c)) {
-			fputs("'\\", stream);
-			fputc(c, stream);
-			fputc('\'', stream);
-		} else {
-			fputc(c, stream);
-		}
-	}
-	fputc('\'', stream);
-}
-
-void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
-{
-	int i;
-
-	/* Copy into destination buffer. */
-	strbuf_grow(dst, 255);
-	for (i = 0; argv[i]; ++i) {
-		strbuf_addch(dst, ' ');
-		sq_quote_buf(dst, argv[i]);
-		if (maxlen && dst->len > maxlen)
-			die("Too many or long arguments");
-	}
-}
-
-char *sq_dequote_step(char *arg, char **next)
-{
-	char *dst = arg;
-	char *src = arg;
-	char c;
-
-	if (*src != '\'')
-		return NULL;
-	for (;;) {
-		c = *++src;
-		if (!c)
-			return NULL;
-		if (c != '\'') {
-			*dst++ = c;
-			continue;
-		}
-		/* We stepped out of sq */
-		switch (*++src) {
-		case '\0':
-			*dst = 0;
-			if (next)
-				*next = NULL;
-			return arg;
-		case '\\':
-			c = *++src;
-			if (need_bs_quote(c) && *++src == '\'') {
-				*dst++ = c;
-				continue;
-			}
-		/* Fallthrough */
-		default:
-			if (!next || !isspace(*src))
-				return NULL;
-			do {
-				c = *++src;
-			} while (isspace(c));
-			*dst = 0;
-			*next = src;
-			return arg;
-		}
-	}
-}
-
-char *sq_dequote(char *arg)
-{
-	return sq_dequote_step(arg, NULL);
-}
-
-int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
-{
-	char *next = arg;
-
-	if (!*arg)
-		return 0;
-	do {
-		char *dequoted = sq_dequote_step(next, &next);
-		if (!dequoted)
-			return -1;
-		ALLOC_GROW(*argv, *nr + 1, *alloc);
-		(*argv)[(*nr)++] = dequoted;
-	} while (next);
-
-	return 0;
-}
-
-/* 1 means: quote as octal
- * 0 means: quote as octal if (quote_path_fully)
- * -1 means: never quote
- * c: quote as "\\c"
- */
-#define X8(x)   x, x, x, x, x, x, x, x
-#define X16(x)  X8(x), X8(x)
-static signed char const sq_lookup[256] = {
-	/*           0    1    2    3    4    5    6    7 */
-	/* 0x00 */   1,   1,   1,   1,   1,   1,   1, 'a',
-	/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r',   1,   1,
-	/* 0x10 */ X16(1),
-	/* 0x20 */  -1,  -1, '"',  -1,  -1,  -1,  -1,  -1,
-	/* 0x28 */ X16(-1), X16(-1), X16(-1),
-	/* 0x58 */  -1,  -1,  -1,  -1,'\\',  -1,  -1,  -1,
-	/* 0x60 */ X16(-1), X8(-1),
-	/* 0x78 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,   1,
-	/* 0x80 */ /* set to 0 */
-};
-
-static inline int sq_must_quote(char c)
-{
-	return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
-}
-
-/* returns the longest prefix not needing a quote up to maxlen if positive.
-   This stops at the first \0 because it's marked as a character needing an
-   escape */
-static size_t next_quote_pos(const char *s, ssize_t maxlen)
-{
-	size_t len;
-	if (maxlen < 0) {
-		for (len = 0; !sq_must_quote(s[len]); len++);
-	} else {
-		for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
-	}
-	return len;
-}
-
-/*
- * C-style name quoting.
- *
- * (1) if sb and fp are both NULL, inspect the input name and counts the
- *     number of bytes that are needed to hold c_style quoted version of name,
- *     counting the double quotes around it but not terminating NUL, and
- *     returns it.
- *     However, if name does not need c_style quoting, it returns 0.
- *
- * (2) if sb or fp are not NULL, it emits the c_style quoted version
- *     of name, enclosed with double quotes if asked and needed only.
- *     Return value is the same as in (1).
- */
-static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
-                                    struct strbuf *sb, FILE *fp, int no_dq)
-{
-#undef EMIT
-#define EMIT(c)                                 \
-	do {                                        \
-		if (sb) strbuf_addch(sb, (c));          \
-		if (fp) fputc((c), fp);                 \
-		count++;                                \
-	} while (0)
-#define EMITBUF(s, l)                           \
-	do {                                        \
-		if (sb) strbuf_add(sb, (s), (l));       \
-		if (fp) fwrite((s), (l), 1, fp);        \
-		count += (l);                           \
-	} while (0)
-
-	size_t len, count = 0;
-	const char *p = name;
-
-	for (;;) {
-		int ch;
-
-		len = next_quote_pos(p, maxlen);
-		if (len == maxlen || !p[len])
-			break;
-
-		if (!no_dq && p == name)
-			EMIT('"');
-
-		EMITBUF(p, len);
-		EMIT('\\');
-		p += len;
-		ch = (unsigned char)*p++;
-		if (sq_lookup[ch] >= ' ') {
-			EMIT(sq_lookup[ch]);
-		} else {
-			EMIT(((ch >> 6) & 03) + '0');
-			EMIT(((ch >> 3) & 07) + '0');
-			EMIT(((ch >> 0) & 07) + '0');
-		}
-	}
-
-	EMITBUF(p, len);
-	if (p == name)   /* no ending quote needed */
-		return 0;
-
-	if (!no_dq)
-		EMIT('"');
-	return count;
-}
-
-size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
-{
-	return quote_c_style_counted(name, -1, sb, fp, nodq);
-}
-
-void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
-{
-	if (quote_c_style(prefix, NULL, NULL, 0) ||
-	    quote_c_style(path, NULL, NULL, 0)) {
-		if (!nodq)
-			strbuf_addch(sb, '"');
-		quote_c_style(prefix, sb, NULL, 1);
-		quote_c_style(path, sb, NULL, 1);
-		if (!nodq)
-			strbuf_addch(sb, '"');
-	} else {
-		strbuf_addstr(sb, prefix);
-		strbuf_addstr(sb, path);
-	}
-}
-
-void write_name_quoted(const char *name, FILE *fp, int terminator)
-{
-	if (terminator) {
-		quote_c_style(name, NULL, fp, 0);
-	} else {
-		fputs(name, fp);
-	}
-	fputc(terminator, fp);
-}
-
-extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
-                                 const char *name, FILE *fp, int terminator)
-{
-	int needquote = 0;
-
-	if (terminator) {
-		needquote = next_quote_pos(pfx, pfxlen) < pfxlen
-			|| name[next_quote_pos(name, -1)];
-	}
-	if (needquote) {
-		fputc('"', fp);
-		quote_c_style_counted(pfx, pfxlen, NULL, fp, 1);
-		quote_c_style(name, NULL, fp, 1);
-		fputc('"', fp);
-	} else {
-		fwrite(pfx, pfxlen, 1, fp);
-		fputs(name, fp);
-	}
-	fputc(terminator, fp);
-}
-
-/* quote path as relative to the given prefix */
-char *quote_path_relative(const char *in, int len,
-			  struct strbuf *out, const char *prefix)
-{
-	int needquote;
-
-	if (len < 0)
-		len = strlen(in);
-
-	/* "../" prefix itself does not need quoting, but "in" might. */
-	needquote = next_quote_pos(in, len) < len;
-	strbuf_setlen(out, 0);
-	strbuf_grow(out, len);
-
-	if (needquote)
-		strbuf_addch(out, '"');
-	if (prefix) {
-		int off = 0;
-		while (prefix[off] && off < len && prefix[off] == in[off])
-			if (prefix[off] == '/') {
-				prefix += off + 1;
-				in += off + 1;
-				len -= off + 1;
-				off = 0;
-			} else
-				off++;
-
-		for (; *prefix; prefix++)
-			if (*prefix == '/')
-				strbuf_addstr(out, "../");
-	}
-
-	quote_c_style_counted (in, len, out, NULL, 1);
-
-	if (needquote)
-		strbuf_addch(out, '"');
-	if (!out->len)
-		strbuf_addstr(out, "./");
-
-	return out->buf;
-}
-
-/*
- * C-style name unquoting.
- *
- * Quoted should point at the opening double quote.
- * + Returns 0 if it was able to unquote the string properly, and appends the
- *   result in the strbuf `sb'.
- * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
- *   that this function will allocate memory in the strbuf, so calling
- *   strbuf_release is mandatory whichever result unquote_c_style returns.
- *
- * Updates endp pointer to point at one past the ending double quote if given.
- */
-int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
-{
-	size_t oldlen = sb->len, len;
-	int ch, ac;
-
-	if (*quoted++ != '"')
-		return -1;
-
-	for (;;) {
-		len = strcspn(quoted, "\"\\");
-		strbuf_add(sb, quoted, len);
-		quoted += len;
-
-		switch (*quoted++) {
-		  case '"':
-			if (endp)
-				*endp = quoted;
-			return 0;
-		  case '\\':
-			break;
-		  default:
-			goto error;
-		}
-
-		switch ((ch = *quoted++)) {
-		case 'a': ch = '\a'; break;
-		case 'b': ch = '\b'; break;
-		case 'f': ch = '\f'; break;
-		case 'n': ch = '\n'; break;
-		case 'r': ch = '\r'; break;
-		case 't': ch = '\t'; break;
-		case 'v': ch = '\v'; break;
-
-		case '\\': case '"':
-			break; /* verbatim */
-
-		/* octal values with first digit over 4 overflow */
-		case '0': case '1': case '2': case '3':
-					ac = ((ch - '0') << 6);
-			if ((ch = *quoted++) < '0' || '7' < ch)
-				goto error;
-					ac |= ((ch - '0') << 3);
-			if ((ch = *quoted++) < '0' || '7' < ch)
-				goto error;
-					ac |= (ch - '0');
-					ch = ac;
-					break;
-				default:
-			goto error;
-			}
-		strbuf_addch(sb, ch);
-		}
-
-  error:
-	strbuf_setlen(sb, oldlen);
-	return -1;
-}
-
-/* quoting as a string literal for other languages */
-
-void perl_quote_print(FILE *stream, const char *src)
-{
-	const char sq = '\'';
-	const char bq = '\\';
-	char c;
-
-	fputc(sq, stream);
-	while ((c = *src++)) {
-		if (c == sq || c == bq)
-			fputc(bq, stream);
-		fputc(c, stream);
-	}
-	fputc(sq, stream);
-}
-
-void python_quote_print(FILE *stream, const char *src)
-{
-	const char sq = '\'';
-	const char bq = '\\';
-	const char nl = '\n';
-	char c;
-
-	fputc(sq, stream);
-	while ((c = *src++)) {
-		if (c == nl) {
-			fputc(bq, stream);
-			fputc('n', stream);
-			continue;
-		}
-		if (c == sq || c == bq)
-			fputc(bq, stream);
-		fputc(c, stream);
-	}
-	fputc(sq, stream);
-}
-
-void tcl_quote_print(FILE *stream, const char *src)
-{
-	char c;
-
-	fputc('"', stream);
-	while ((c = *src++)) {
-		switch (c) {
-		case '[': case ']':
-		case '{': case '}':
-		case '$': case '\\': case '"':
-			fputc('\\', stream);
-		default:
-			fputc(c, stream);
-			break;
-		case '\f':
-			fputs("\\f", stream);
-			break;
-		case '\r':
-			fputs("\\r", stream);
-			break;
-		case '\n':
-			fputs("\\n", stream);
-			break;
-		case '\t':
-			fputs("\\t", stream);
-			break;
-		case '\v':
-			fputs("\\v", stream);
-			break;
-		}
-	}
-	fputc('"', stream);
-}
diff --git a/Documentation/perf_counter/quote.h b/Documentation/perf_counter/quote.h
deleted file mode 100644
index 66730f2..0000000
--- a/Documentation/perf_counter/quote.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef QUOTE_H
-#define QUOTE_H
-
-#include <stddef.h>
-#include <stdio.h>
-
-/* Help to copy the thing properly quoted for the shell safety.
- * any single quote is replaced with '\'', any exclamation point
- * is replaced with '\!', and the whole thing is enclosed in a
- * single quote pair.
- *
- * For example, if you are passing the result to system() as an
- * argument:
- *
- * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
- *
- * would be appropriate.  If the system() is going to call ssh to
- * run the command on the other side:
- *
- * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
- * sprintf(rcmd, "ssh %s %s", sq_quote(host), sq_quote(cmd));
- *
- * Note that the above examples leak memory!  Remember to free result from
- * sq_quote() in a real application.
- *
- * sq_quote_buf() writes to an existing buffer of specified size; it
- * will return the number of characters that would have been written
- * excluding the final null regardless of the buffer size.
- */
-
-extern void sq_quote_print(FILE *stream, const char *src);
-
-extern void sq_quote_buf(struct strbuf *, const char *src);
-extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
-
-/* This unwraps what sq_quote() produces in place, but returns
- * NULL if the input does not look like what sq_quote would have
- * produced.
- */
-extern char *sq_dequote(char *);
-
-/*
- * Same as the above, but can be used to unwrap many arguments in the
- * same string separated by space. "next" is changed to point to the
- * next argument that should be passed as first parameter. When there
- * is no more argument to be dequoted, "next" is updated to point to NULL.
- */
-extern char *sq_dequote_step(char *arg, char **next);
-extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc);
-
-extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
-extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq);
-extern void quote_two_c_style(struct strbuf *, const char *, const char *, int);
-
-extern void write_name_quoted(const char *name, FILE *, int terminator);
-extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
-                                 const char *name, FILE *, int terminator);
-
-/* quote path as relative to the given prefix */
-char *quote_path_relative(const char *in, int len,
-			  struct strbuf *out, const char *prefix);
-
-/* quoting as a string literal for other languages */
-extern void perl_quote_print(FILE *stream, const char *src);
-extern void python_quote_print(FILE *stream, const char *src);
-extern void tcl_quote_print(FILE *stream, const char *src);
-
-#endif
diff --git a/Documentation/perf_counter/run-command.c b/Documentation/perf_counter/run-command.c
deleted file mode 100644
index b2f5e85..0000000
--- a/Documentation/perf_counter/run-command.c
+++ /dev/null
@@ -1,395 +0,0 @@
-#include "cache.h"
-#include "run-command.h"
-#include "exec_cmd.h"
-
-static inline void close_pair(int fd[2])
-{
-	close(fd[0]);
-	close(fd[1]);
-}
-
-static inline void dup_devnull(int to)
-{
-	int fd = open("/dev/null", O_RDWR);
-	dup2(fd, to);
-	close(fd);
-}
-
-int start_command(struct child_process *cmd)
-{
-	int need_in, need_out, need_err;
-	int fdin[2], fdout[2], fderr[2];
-
-	/*
-	 * In case of errors we must keep the promise to close FDs
-	 * that have been passed in via ->in and ->out.
-	 */
-
-	need_in = !cmd->no_stdin && cmd->in < 0;
-	if (need_in) {
-		if (pipe(fdin) < 0) {
-			if (cmd->out > 0)
-				close(cmd->out);
-			return -ERR_RUN_COMMAND_PIPE;
-		}
-		cmd->in = fdin[1];
-	}
-
-	need_out = !cmd->no_stdout
-		&& !cmd->stdout_to_stderr
-		&& cmd->out < 0;
-	if (need_out) {
-		if (pipe(fdout) < 0) {
-			if (need_in)
-				close_pair(fdin);
-			else if (cmd->in)
-				close(cmd->in);
-			return -ERR_RUN_COMMAND_PIPE;
-		}
-		cmd->out = fdout[0];
-	}
-
-	need_err = !cmd->no_stderr && cmd->err < 0;
-	if (need_err) {
-		if (pipe(fderr) < 0) {
-			if (need_in)
-				close_pair(fdin);
-			else if (cmd->in)
-				close(cmd->in);
-			if (need_out)
-				close_pair(fdout);
-			else if (cmd->out)
-				close(cmd->out);
-			return -ERR_RUN_COMMAND_PIPE;
-		}
-		cmd->err = fderr[0];
-	}
-
-#ifndef __MINGW32__
-	fflush(NULL);
-	cmd->pid = fork();
-	if (!cmd->pid) {
-		if (cmd->no_stdin)
-			dup_devnull(0);
-		else if (need_in) {
-			dup2(fdin[0], 0);
-			close_pair(fdin);
-		} else if (cmd->in) {
-			dup2(cmd->in, 0);
-			close(cmd->in);
-		}
-
-		if (cmd->no_stderr)
-			dup_devnull(2);
-		else if (need_err) {
-			dup2(fderr[1], 2);
-			close_pair(fderr);
-		}
-
-		if (cmd->no_stdout)
-			dup_devnull(1);
-		else if (cmd->stdout_to_stderr)
-			dup2(2, 1);
-		else if (need_out) {
-			dup2(fdout[1], 1);
-			close_pair(fdout);
-		} else if (cmd->out > 1) {
-			dup2(cmd->out, 1);
-			close(cmd->out);
-		}
-
-		if (cmd->dir && chdir(cmd->dir))
-			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
-			    cmd->dir, strerror(errno));
-		if (cmd->env) {
-			for (; *cmd->env; cmd->env++) {
-				if (strchr(*cmd->env, '='))
-					putenv((char*)*cmd->env);
-				else
-					unsetenv(*cmd->env);
-			}
-		}
-		if (cmd->preexec_cb)
-			cmd->preexec_cb();
-		if (cmd->perf_cmd) {
-			execv_perf_cmd(cmd->argv);
-		} else {
-			execvp(cmd->argv[0], (char *const*) cmd->argv);
-		}
-		exit(127);
-	}
-#else
-	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
-	const char **sargv = cmd->argv;
-	char **env = environ;
-
-	if (cmd->no_stdin) {
-		s0 = dup(0);
-		dup_devnull(0);
-	} else if (need_in) {
-		s0 = dup(0);
-		dup2(fdin[0], 0);
-	} else if (cmd->in) {
-		s0 = dup(0);
-		dup2(cmd->in, 0);
-	}
-
-	if (cmd->no_stderr) {
-		s2 = dup(2);
-		dup_devnull(2);
-	} else if (need_err) {
-		s2 = dup(2);
-		dup2(fderr[1], 2);
-	}
-
-	if (cmd->no_stdout) {
-		s1 = dup(1);
-		dup_devnull(1);
-	} else if (cmd->stdout_to_stderr) {
-		s1 = dup(1);
-		dup2(2, 1);
-	} else if (need_out) {
-		s1 = dup(1);
-		dup2(fdout[1], 1);
-	} else if (cmd->out > 1) {
-		s1 = dup(1);
-		dup2(cmd->out, 1);
-	}
-
-	if (cmd->dir)
-		die("chdir in start_command() not implemented");
-	if (cmd->env) {
-		env = copy_environ();
-		for (; *cmd->env; cmd->env++)
-			env = env_setenv(env, *cmd->env);
-	}
-
-	if (cmd->perf_cmd) {
-		cmd->argv = prepare_perf_cmd(cmd->argv);
-	}
-
-	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
-
-	if (cmd->env)
-		free_environ(env);
-	if (cmd->perf_cmd)
-		free(cmd->argv);
-
-	cmd->argv = sargv;
-	if (s0 >= 0)
-		dup2(s0, 0), close(s0);
-	if (s1 >= 0)
-		dup2(s1, 1), close(s1);
-	if (s2 >= 0)
-		dup2(s2, 2), close(s2);
-#endif
-
-	if (cmd->pid < 0) {
-		int err = errno;
-		if (need_in)
-			close_pair(fdin);
-		else if (cmd->in)
-			close(cmd->in);
-		if (need_out)
-			close_pair(fdout);
-		else if (cmd->out)
-			close(cmd->out);
-		if (need_err)
-			close_pair(fderr);
-		return err == ENOENT ?
-			-ERR_RUN_COMMAND_EXEC :
-			-ERR_RUN_COMMAND_FORK;
-	}
-
-	if (need_in)
-		close(fdin[0]);
-	else if (cmd->in)
-		close(cmd->in);
-
-	if (need_out)
-		close(fdout[1]);
-	else if (cmd->out)
-		close(cmd->out);
-
-	if (need_err)
-		close(fderr[1]);
-
-	return 0;
-}
-
-static int wait_or_whine(pid_t pid)
-{
-	for (;;) {
-		int status, code;
-		pid_t waiting = waitpid(pid, &status, 0);
-
-		if (waiting < 0) {
-			if (errno == EINTR)
-				continue;
-			error("waitpid failed (%s)", strerror(errno));
-			return -ERR_RUN_COMMAND_WAITPID;
-		}
-		if (waiting != pid)
-			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
-		if (WIFSIGNALED(status))
-			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
-
-		if (!WIFEXITED(status))
-			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
-		code = WEXITSTATUS(status);
-		switch (code) {
-		case 127:
-			return -ERR_RUN_COMMAND_EXEC;
-		case 0:
-			return 0;
-		default:
-			return -code;
-		}
-	}
-}
-
-int finish_command(struct child_process *cmd)
-{
-	return wait_or_whine(cmd->pid);
-}
-
-int run_command(struct child_process *cmd)
-{
-	int code = start_command(cmd);
-	if (code)
-		return code;
-	return finish_command(cmd);
-}
-
-static void prepare_run_command_v_opt(struct child_process *cmd,
-				      const char **argv,
-				      int opt)
-{
-	memset(cmd, 0, sizeof(*cmd));
-	cmd->argv = argv;
-	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
-	cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
-	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
-}
-
-int run_command_v_opt(const char **argv, int opt)
-{
-	struct child_process cmd;
-	prepare_run_command_v_opt(&cmd, argv, opt);
-	return run_command(&cmd);
-}
-
-int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
-{
-	struct child_process cmd;
-	prepare_run_command_v_opt(&cmd, argv, opt);
-	cmd.dir = dir;
-	cmd.env = env;
-	return run_command(&cmd);
-}
-
-#ifdef __MINGW32__
-static __stdcall unsigned run_thread(void *data)
-{
-	struct async *async = data;
-	return async->proc(async->fd_for_proc, async->data);
-}
-#endif
-
-int start_async(struct async *async)
-{
-	int pipe_out[2];
-
-	if (pipe(pipe_out) < 0)
-		return error("cannot create pipe: %s", strerror(errno));
-	async->out = pipe_out[0];
-
-#ifndef __MINGW32__
-	/* Flush stdio before fork() to avoid cloning buffers */
-	fflush(NULL);
-
-	async->pid = fork();
-	if (async->pid < 0) {
-		error("fork (async) failed: %s", strerror(errno));
-		close_pair(pipe_out);
-		return -1;
-	}
-	if (!async->pid) {
-		close(pipe_out[0]);
-		exit(!!async->proc(pipe_out[1], async->data));
-	}
-	close(pipe_out[1]);
-#else
-	async->fd_for_proc = pipe_out[1];
-	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
-	if (!async->tid) {
-		error("cannot create thread: %s", strerror(errno));
-		close_pair(pipe_out);
-		return -1;
-	}
-#endif
-	return 0;
-}
-
-int finish_async(struct async *async)
-{
-#ifndef __MINGW32__
-	int ret = 0;
-
-	if (wait_or_whine(async->pid))
-		ret = error("waitpid (async) failed");
-#else
-	DWORD ret = 0;
-	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
-		ret = error("waiting for thread failed: %lu", GetLastError());
-	else if (!GetExitCodeThread(async->tid, &ret))
-		ret = error("cannot get thread exit code: %lu", GetLastError());
-	CloseHandle(async->tid);
-#endif
-	return ret;
-}
-
-int run_hook(const char *index_file, const char *name, ...)
-{
-	struct child_process hook;
-	const char **argv = NULL, *env[2];
-	char index[PATH_MAX];
-	va_list args;
-	int ret;
-	size_t i = 0, alloc = 0;
-
-	if (access(perf_path("hooks/%s", name), X_OK) < 0)
-		return 0;
-
-	va_start(args, name);
-	ALLOC_GROW(argv, i + 1, alloc);
-	argv[i++] = perf_path("hooks/%s", name);
-	while (argv[i-1]) {
-		ALLOC_GROW(argv, i + 1, alloc);
-		argv[i++] = va_arg(args, const char *);
-	}
-	va_end(args);
-
-	memset(&hook, 0, sizeof(hook));
-	hook.argv = argv;
-	hook.no_stdin = 1;
-	hook.stdout_to_stderr = 1;
-	if (index_file) {
-		snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file);
-		env[0] = index;
-		env[1] = NULL;
-		hook.env = env;
-	}
-
-	ret = start_command(&hook);
-	free(argv);
-	if (ret) {
-		warning("Could not spawn %s", argv[0]);
-		return ret;
-	}
-	ret = finish_command(&hook);
-	if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
-		warning("%s exited due to uncaught signal", argv[0]);
-
-	return ret;
-}
diff --git a/Documentation/perf_counter/run-command.h b/Documentation/perf_counter/run-command.h
deleted file mode 100644
index 328289f..0000000
--- a/Documentation/perf_counter/run-command.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef RUN_COMMAND_H
-#define RUN_COMMAND_H
-
-enum {
-	ERR_RUN_COMMAND_FORK = 10000,
-	ERR_RUN_COMMAND_EXEC,
-	ERR_RUN_COMMAND_PIPE,
-	ERR_RUN_COMMAND_WAITPID,
-	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
-	ERR_RUN_COMMAND_WAITPID_SIGNAL,
-	ERR_RUN_COMMAND_WAITPID_NOEXIT,
-};
-#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
-
-struct child_process {
-	const char **argv;
-	pid_t pid;
-	/*
-	 * Using .in, .out, .err:
-	 * - Specify 0 for no redirections (child inherits stdin, stdout,
-	 *   stderr from parent).
-	 * - Specify -1 to have a pipe allocated as follows:
-	 *     .in: returns the writable pipe end; parent writes to it,
-	 *          the readable pipe end becomes child's stdin
-	 *     .out, .err: returns the readable pipe end; parent reads from
-	 *          it, the writable pipe end becomes child's stdout/stderr
-	 *   The caller of start_command() must close the returned FDs
-	 *   after it has completed reading from/writing to it!
-	 * - Specify > 0 to set a channel to a particular FD as follows:
-	 *     .in: a readable FD, becomes child's stdin
-	 *     .out: a writable FD, becomes child's stdout/stderr
-	 *     .err > 0 not supported
-	 *   The specified FD is closed by start_command(), even in case
-	 *   of errors!
-	 */
-	int in;
-	int out;
-	int err;
-	const char *dir;
-	const char *const *env;
-	unsigned no_stdin:1;
-	unsigned no_stdout:1;
-	unsigned no_stderr:1;
-	unsigned perf_cmd:1; /* if this is to be perf sub-command */
-	unsigned stdout_to_stderr:1;
-	void (*preexec_cb)(void);
-};
-
-int start_command(struct child_process *);
-int finish_command(struct child_process *);
-int run_command(struct child_process *);
-
-extern int run_hook(const char *index_file, const char *name, ...);
-
-#define RUN_COMMAND_NO_STDIN 1
-#define RUN_PERF_CMD	     2	/*If this is to be perf sub-command */
-#define RUN_COMMAND_STDOUT_TO_STDERR 4
-int run_command_v_opt(const char **argv, int opt);
-
-/*
- * env (the environment) is to be formatted like environ: "VAR=VALUE".
- * To unset an environment variable use just "VAR".
- */
-int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env);
-
-/*
- * The purpose of the following functions is to feed a pipe by running
- * a function asynchronously and providing output that the caller reads.
- *
- * It is expected that no synchronization and mutual exclusion between
- * the caller and the feed function is necessary so that the function
- * can run in a thread without interfering with the caller.
- */
-struct async {
-	/*
-	 * proc writes to fd and closes it;
-	 * returns 0 on success, non-zero on failure
-	 */
-	int (*proc)(int fd, void *data);
-	void *data;
-	int out;	/* caller reads from here and closes it */
-#ifndef __MINGW32__
-	pid_t pid;
-#else
-	HANDLE tid;
-	int fd_for_proc;
-#endif
-};
-
-int start_async(struct async *async);
-int finish_async(struct async *async);
-
-#endif
diff --git a/Documentation/perf_counter/strbuf.c b/Documentation/perf_counter/strbuf.c
deleted file mode 100644
index eaba093..0000000
--- a/Documentation/perf_counter/strbuf.c
+++ /dev/null
@@ -1,359 +0,0 @@
-#include "cache.h"
-
-int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
-/*
- * Used as the default ->buf value, so that people can always assume
- * buf is non NULL and ->buf is NUL terminated even for a freshly
- * initialized strbuf.
- */
-char strbuf_slopbuf[1];
-
-void strbuf_init(struct strbuf *sb, size_t hint)
-{
-	sb->alloc = sb->len = 0;
-	sb->buf = strbuf_slopbuf;
-	if (hint)
-		strbuf_grow(sb, hint);
-}
-
-void strbuf_release(struct strbuf *sb)
-{
-	if (sb->alloc) {
-		free(sb->buf);
-		strbuf_init(sb, 0);
-	}
-}
-
-char *strbuf_detach(struct strbuf *sb, size_t *sz)
-{
-	char *res = sb->alloc ? sb->buf : NULL;
-	if (sz)
-		*sz = sb->len;
-	strbuf_init(sb, 0);
-	return res;
-}
-
-void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
-{
-	strbuf_release(sb);
-	sb->buf   = buf;
-	sb->len   = len;
-	sb->alloc = alloc;
-	strbuf_grow(sb, 0);
-	sb->buf[sb->len] = '\0';
-}
-
-void strbuf_grow(struct strbuf *sb, size_t extra)
-{
-	if (sb->len + extra + 1 <= sb->len)
-		die("you want to use way too much memory");
-	if (!sb->alloc)
-		sb->buf = NULL;
-	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
-}
-
-void strbuf_trim(struct strbuf *sb)
-{
-	char *b = sb->buf;
-	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
-		sb->len--;
-	while (sb->len > 0 && isspace(*b)) {
-		b++;
-		sb->len--;
-	}
-	memmove(sb->buf, b, sb->len);
-	sb->buf[sb->len] = '\0';
-}
-void strbuf_rtrim(struct strbuf *sb)
-{
-	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
-		sb->len--;
-	sb->buf[sb->len] = '\0';
-}
-
-void strbuf_ltrim(struct strbuf *sb)
-{
-	char *b = sb->buf;
-	while (sb->len > 0 && isspace(*b)) {
-		b++;
-		sb->len--;
-	}
-	memmove(sb->buf, b, sb->len);
-	sb->buf[sb->len] = '\0';
-}
-
-void strbuf_tolower(struct strbuf *sb)
-{
-	int i;
-	for (i = 0; i < sb->len; i++)
-		sb->buf[i] = tolower(sb->buf[i]);
-}
-
-struct strbuf **strbuf_split(const struct strbuf *sb, int delim)
-{
-	int alloc = 2, pos = 0;
-	char *n, *p;
-	struct strbuf **ret;
-	struct strbuf *t;
-
-	ret = calloc(alloc, sizeof(struct strbuf *));
-	p = n = sb->buf;
-	while (n < sb->buf + sb->len) {
-		int len;
-		n = memchr(n, delim, sb->len - (n - sb->buf));
-		if (pos + 1 >= alloc) {
-			alloc = alloc * 2;
-			ret = realloc(ret, sizeof(struct strbuf *) * alloc);
-		}
-		if (!n)
-			n = sb->buf + sb->len - 1;
-		len = n - p + 1;
-		t = malloc(sizeof(struct strbuf));
-		strbuf_init(t, len);
-		strbuf_add(t, p, len);
-		ret[pos] = t;
-		ret[++pos] = NULL;
-		p = ++n;
-	}
-	return ret;
-}
-
-void strbuf_list_free(struct strbuf **sbs)
-{
-	struct strbuf **s = sbs;
-
-	while (*s) {
-		strbuf_release(*s);
-		free(*s++);
-	}
-	free(sbs);
-}
-
-int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
-{
-	int len = a->len < b->len ? a->len: b->len;
-	int cmp = memcmp(a->buf, b->buf, len);
-	if (cmp)
-		return cmp;
-	return a->len < b->len ? -1: a->len != b->len;
-}
-
-void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
-				   const void *data, size_t dlen)
-{
-	if (pos + len < pos)
-		die("you want to use way too much memory");
-	if (pos > sb->len)
-		die("`pos' is too far after the end of the buffer");
-	if (pos + len > sb->len)
-		die("`pos + len' is too far after the end of the buffer");
-
-	if (dlen >= len)
-		strbuf_grow(sb, dlen - len);
-	memmove(sb->buf + pos + dlen,
-			sb->buf + pos + len,
-			sb->len - pos - len);
-	memcpy(sb->buf + pos, data, dlen);
-	strbuf_setlen(sb, sb->len + dlen - len);
-}
-
-void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
-{
-	strbuf_splice(sb, pos, 0, data, len);
-}
-
-void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
-{
-	strbuf_splice(sb, pos, len, NULL, 0);
-}
-
-void strbuf_add(struct strbuf *sb, const void *data, size_t len)
-{
-	strbuf_grow(sb, len);
-	memcpy(sb->buf + sb->len, data, len);
-	strbuf_setlen(sb, sb->len + len);
-}
-
-void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
-{
-	strbuf_grow(sb, len);
-	memcpy(sb->buf + sb->len, sb->buf + pos, len);
-	strbuf_setlen(sb, sb->len + len);
-}
-
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
-{
-	int len;
-	va_list ap;
-
-	if (!strbuf_avail(sb))
-		strbuf_grow(sb, 64);
-	va_start(ap, fmt);
-	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
-	va_end(ap);
-	if (len < 0)
-		die("your vsnprintf is broken");
-	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
-		va_start(ap, fmt);
-		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
-		va_end(ap);
-		if (len > strbuf_avail(sb)) {
-			die("this should not happen, your snprintf is broken");
-		}
-	}
-	strbuf_setlen(sb, sb->len + len);
-}
-
-void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
-		   void *context)
-{
-	for (;;) {
-		const char *percent;
-		size_t consumed;
-
-		percent = strchrnul(format, '%');
-		strbuf_add(sb, format, percent - format);
-		if (!*percent)
-			break;
-		format = percent + 1;
-
-		consumed = fn(sb, format, context);
-		if (consumed)
-			format += consumed;
-		else
-			strbuf_addch(sb, '%');
-	}
-}
-
-size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
-		void *context)
-{
-	struct strbuf_expand_dict_entry *e = context;
-	size_t len;
-
-	for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
-		if (!strncmp(placeholder, e->placeholder, len)) {
-			if (e->value)
-				strbuf_addstr(sb, e->value);
-			return len;
-		}
-	}
-	return 0;
-}
-
-size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
-{
-	size_t res;
-	size_t oldalloc = sb->alloc;
-
-	strbuf_grow(sb, size);
-	res = fread(sb->buf + sb->len, 1, size, f);
-	if (res > 0)
-		strbuf_setlen(sb, sb->len + res);
-	else if (res < 0 && oldalloc == 0)
-		strbuf_release(sb);
-	return res;
-}
-
-ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
-{
-	size_t oldlen = sb->len;
-	size_t oldalloc = sb->alloc;
-
-	strbuf_grow(sb, hint ? hint : 8192);
-	for (;;) {
-		ssize_t cnt;
-
-		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
-		if (cnt < 0) {
-			if (oldalloc == 0)
-				strbuf_release(sb);
-			else
-				strbuf_setlen(sb, oldlen);
-			return -1;
-		}
-		if (!cnt)
-			break;
-		sb->len += cnt;
-		strbuf_grow(sb, 8192);
-	}
-
-	sb->buf[sb->len] = '\0';
-	return sb->len - oldlen;
-}
-
-#define STRBUF_MAXLINK (2*PATH_MAX)
-
-int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
-{
-	size_t oldalloc = sb->alloc;
-
-	if (hint < 32)
-		hint = 32;
-
-	while (hint < STRBUF_MAXLINK) {
-		int len;
-
-		strbuf_grow(sb, hint);
-		len = readlink(path, sb->buf, hint);
-		if (len < 0) {
-			if (errno != ERANGE)
-				break;
-		} else if (len < hint) {
-			strbuf_setlen(sb, len);
-			return 0;
-		}
-
-		/* .. the buffer was too small - try again */
-		hint *= 2;
-	}
-	if (oldalloc == 0)
-		strbuf_release(sb);
-	return -1;
-}
-
-int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
-{
-	int ch;
-
-	strbuf_grow(sb, 0);
-	if (feof(fp))
-		return EOF;
-
-	strbuf_reset(sb);
-	while ((ch = fgetc(fp)) != EOF) {
-		if (ch == term)
-			break;
-		strbuf_grow(sb, 1);
-		sb->buf[sb->len++] = ch;
-	}
-	if (ch == EOF && sb->len == 0)
-		return EOF;
-
-	sb->buf[sb->len] = '\0';
-	return 0;
-}
-
-int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
-{
-	int fd, len;
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return -1;
-	len = strbuf_read(sb, fd, hint);
-	close(fd);
-	if (len < 0)
-		return -1;
-
-	return len;
-}
diff --git a/Documentation/perf_counter/strbuf.h b/Documentation/perf_counter/strbuf.h
deleted file mode 100644
index 9ee908a..0000000
--- a/Documentation/perf_counter/strbuf.h
+++ /dev/null
@@ -1,137 +0,0 @@
-#ifndef STRBUF_H
-#define STRBUF_H
-
-/*
- * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
- * long, overflow safe strings.
- *
- * Strbufs has some invariants that are very important to keep in mind:
- *
- * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
- *    build complex strings/buffers whose final size isn't easily known.
- *
- *    It is NOT legal to copy the ->buf pointer away.
- *    `strbuf_detach' is the operation that detachs a buffer from its shell
- *    while keeping the shell valid wrt its invariants.
- *
- * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
- *    allocated. The extra byte is used to store a '\0', allowing the ->buf
- *    member to be a valid C-string. Every strbuf function ensure this
- *    invariant is preserved.
- *
- *    Note that it is OK to "play" with the buffer directly if you work it
- *    that way:
- *
- *    strbuf_grow(sb, SOME_SIZE);
- *       ... Here, the memory array starting at sb->buf, and of length
- *       ... strbuf_avail(sb) is all yours, and you are sure that
- *       ... strbuf_avail(sb) is at least SOME_SIZE.
- *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
- *
- *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
- *
- *    Doing so is safe, though if it has to be done in many places, adding the
- *    missing API to the strbuf module is the way to go.
- *
- *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
- *         even if it's true in the current implementation. Alloc is somehow a
- *         "private" member that should not be messed with.
- */
-
-#include <assert.h>
-
-extern char strbuf_slopbuf[];
-struct strbuf {
-	size_t alloc;
-	size_t len;
-	char *buf;
-};
-
-#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
-
-/*----- strbuf life cycle -----*/
-extern void strbuf_init(struct strbuf *, size_t);
-extern void strbuf_release(struct strbuf *);
-extern char *strbuf_detach(struct strbuf *, size_t *);
-extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
-static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) {
-	struct strbuf tmp = *a;
-	*a = *b;
-	*b = tmp;
-}
-
-/*----- strbuf size related -----*/
-static inline size_t strbuf_avail(const struct strbuf *sb) {
-	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
-}
-
-extern void strbuf_grow(struct strbuf *, size_t);
-
-static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
-	if (!sb->alloc)
-		strbuf_grow(sb, 0);
-	assert(len < sb->alloc);
-	sb->len = len;
-	sb->buf[len] = '\0';
-}
-#define strbuf_reset(sb)  strbuf_setlen(sb, 0)
-
-/*----- content related -----*/
-extern void strbuf_trim(struct strbuf *);
-extern void strbuf_rtrim(struct strbuf *);
-extern void strbuf_ltrim(struct strbuf *);
-extern int strbuf_cmp(const struct strbuf *, const struct strbuf *);
-extern void strbuf_tolower(struct strbuf *);
-
-extern struct strbuf **strbuf_split(const struct strbuf *, int delim);
-extern void strbuf_list_free(struct strbuf **);
-
-/*----- add data in your buffer -----*/
-static inline void strbuf_addch(struct strbuf *sb, int c) {
-	strbuf_grow(sb, 1);
-	sb->buf[sb->len++] = c;
-	sb->buf[sb->len] = '\0';
-}
-
-extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
-extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
-
-/* splice pos..pos+len with given data */
-extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
-                          const void *, size_t);
-
-extern void strbuf_add(struct strbuf *, const void *, size_t);
-static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
-	strbuf_add(sb, s, strlen(s));
-}
-static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) {
-	strbuf_add(sb, sb2->buf, sb2->len);
-}
-extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
-
-typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
-struct strbuf_expand_dict_entry {
-	const char *placeholder;
-	const char *value;
-};
-extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context);
-
-__attribute__((format(printf,2,3)))
-extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-
-extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
-/* XXX: if read fails, any partial read is undone */
-extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
-extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
-extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
-
-extern int strbuf_getline(struct strbuf *, FILE *, int);
-
-extern void stripspace(struct strbuf *buf, int skip_comments);
-extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env);
-
-extern int strbuf_branchname(struct strbuf *sb, const char *name);
-extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
-
-#endif /* STRBUF_H */
diff --git a/Documentation/perf_counter/usage.c b/Documentation/perf_counter/usage.c
deleted file mode 100644
index 7a10421..0000000
--- a/Documentation/perf_counter/usage.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * GIT - The information manager from hell
- *
- * Copyright (C) Linus Torvalds, 2005
- */
-#include "util.h"
-
-static void report(const char *prefix, const char *err, va_list params)
-{
-	char msg[1024];
-	vsnprintf(msg, sizeof(msg), err, params);
-	fprintf(stderr, "%s%s\n", prefix, msg);
-}
-
-static NORETURN void usage_builtin(const char *err)
-{
-	fprintf(stderr, "usage: %s\n", err);
-	exit(129);
-}
-
-static NORETURN void die_builtin(const char *err, va_list params)
-{
-	report("fatal: ", err, params);
-	exit(128);
-}
-
-static void error_builtin(const char *err, va_list params)
-{
-	report("error: ", err, params);
-}
-
-static void warn_builtin(const char *warn, va_list params)
-{
-	report("warning: ", warn, params);
-}
-
-/* If we are in a dlopen()ed .so write to a global variable would segfault
- * (ugh), so keep things static. */
-static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
-static void (*error_routine)(const char *err, va_list params) = error_builtin;
-static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
-
-void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
-{
-	die_routine = routine;
-}
-
-void usage(const char *err)
-{
-	usage_routine(err);
-}
-
-void die(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	die_routine(err, params);
-	va_end(params);
-}
-
-int error(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	error_routine(err, params);
-	va_end(params);
-	return -1;
-}
-
-void warning(const char *warn, ...)
-{
-	va_list params;
-
-	va_start(params, warn);
-	warn_routine(warn, params);
-	va_end(params);
-}
diff --git a/Documentation/perf_counter/util.h b/Documentation/perf_counter/util.h
deleted file mode 100644
index 36e40c3..0000000
--- a/Documentation/perf_counter/util.h
+++ /dev/null
@@ -1,408 +0,0 @@
-#ifndef GIT_COMPAT_UTIL_H
-#define GIT_COMPAT_UTIL_H
-
-#define _FILE_OFFSET_BITS 64
-
-#ifndef FLEX_ARRAY
-/*
- * See if our compiler is known to support flexible array members.
- */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
-# define FLEX_ARRAY /* empty */
-#elif defined(__GNUC__)
-# if (__GNUC__ >= 3)
-#  define FLEX_ARRAY /* empty */
-# else
-#  define FLEX_ARRAY 0 /* older GNU extension */
-# endif
-#endif
-
-/*
- * Otherwise, default to safer but a bit wasteful traditional style
- */
-#ifndef FLEX_ARRAY
-# define FLEX_ARRAY 1
-#endif
-#endif
-
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-
-#ifdef __GNUC__
-#define TYPEOF(x) (__typeof__(x))
-#else
-#define TYPEOF(x)
-#endif
-
-#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
-#define HAS_MULTI_BITS(i)  ((i) & ((i) - 1))  /* checks if an integer has more than 1 bit set */
-
-/* Approximation of the length of the decimal representation of this type. */
-#define decimal_length(x)	((int)(sizeof(x) * 2.56 + 0.5) + 1)
-
-#if !defined(__APPLE__) && !defined(__FreeBSD__)  && !defined(__USLC__) && !defined(_M_UNIX)
-#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
-#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
-#endif
-#define _ALL_SOURCE 1
-#define _GNU_SOURCE 1
-#define _BSD_SOURCE 1
-
-#include <unistd.h>
-#include <stdio.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <dirent.h>
-#include <sys/time.h>
-#include <time.h>
-#include <signal.h>
-#include <fnmatch.h>
-#include <assert.h>
-#include <regex.h>
-#include <utime.h>
-#ifndef __MINGW32__
-#include <sys/wait.h>
-#include <sys/poll.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#ifndef NO_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <pwd.h>
-#include <inttypes.h>
-#if defined(__CYGWIN__)
-#undef _XOPEN_SOURCE
-#include <grp.h>
-#define _XOPEN_SOURCE 600
-#include "compat/cygwin.h"
-#else
-#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
-#include <grp.h>
-#define _ALL_SOURCE 1
-#endif
-#else 	/* __MINGW32__ */
-/* pull in Windows compatibility stuff */
-#include "compat/mingw.h"
-#endif	/* __MINGW32__ */
-
-#ifndef NO_ICONV
-#include <iconv.h>
-#endif
-
-#ifndef NO_OPENSSL
-#include <openssl/ssl.h>
-#include <openssl/err.h>
-#endif
-
-/* On most systems <limits.h> would have given us this, but
- * not on some systems (e.g. GNU/Hurd).
- */
-#ifndef PATH_MAX
-#define PATH_MAX 4096
-#endif
-
-#ifndef PRIuMAX
-#define PRIuMAX "llu"
-#endif
-
-#ifndef PRIu32
-#define PRIu32 "u"
-#endif
-
-#ifndef PRIx32
-#define PRIx32 "x"
-#endif
-
-#ifndef PATH_SEP
-#define PATH_SEP ':'
-#endif
-
-#ifndef STRIP_EXTENSION
-#define STRIP_EXTENSION ""
-#endif
-
-#ifndef has_dos_drive_prefix
-#define has_dos_drive_prefix(path) 0
-#endif
-
-#ifndef is_dir_sep
-#define is_dir_sep(c) ((c) == '/')
-#endif
-
-#ifdef __GNUC__
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN
-#ifndef __attribute__
-#define __attribute__(x)
-#endif
-#endif
-
-/* General helper functions */
-extern void usage(const char *err) NORETURN;
-extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
-
-extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
-
-extern int prefixcmp(const char *str, const char *prefix);
-extern time_t tm_to_time_t(const struct tm *tm);
-
-static inline const char *skip_prefix(const char *str, const char *prefix)
-{
-	size_t len = strlen(prefix);
-	return strncmp(str, prefix, len) ? NULL : str + len;
-}
-
-#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
-
-#ifndef PROT_READ
-#define PROT_READ 1
-#define PROT_WRITE 2
-#define MAP_PRIVATE 1
-#define MAP_FAILED ((void*)-1)
-#endif
-
-#define mmap git_mmap
-#define munmap git_munmap
-extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
-extern int git_munmap(void *start, size_t length);
-
-#else /* NO_MMAP || USE_WIN32_MMAP */
-
-#include <sys/mman.h>
-
-#endif /* NO_MMAP || USE_WIN32_MMAP */
-
-#ifdef NO_MMAP
-
-/* This value must be multiple of (pagesize * 2) */
-#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
-
-#else /* NO_MMAP */
-
-/* This value must be multiple of (pagesize * 2) */
-#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
-	(sizeof(void*) >= 8 \
-		?  1 * 1024 * 1024 * 1024 \
-		: 32 * 1024 * 1024)
-
-#endif /* NO_MMAP */
-
-#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
-#define on_disk_bytes(st) ((st).st_size)
-#else
-#define on_disk_bytes(st) ((st).st_blocks * 512)
-#endif
-
-#define DEFAULT_PACKED_GIT_LIMIT \
-	((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
-
-#ifdef NO_PREAD
-#define pread git_pread
-extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
-#endif
-/*
- * Forward decl that will remind us if its twin in cache.h changes.
- * This function is used in compat/pread.c.  But we can't include
- * cache.h there.
- */
-extern ssize_t read_in_full(int fd, void *buf, size_t count);
-
-#ifdef NO_SETENV
-#define setenv gitsetenv
-extern int gitsetenv(const char *, const char *, int);
-#endif
-
-#ifdef NO_MKDTEMP
-#define mkdtemp gitmkdtemp
-extern char *gitmkdtemp(char *);
-#endif
-
-#ifdef NO_UNSETENV
-#define unsetenv gitunsetenv
-extern void gitunsetenv(const char *);
-#endif
-
-#ifdef NO_STRCASESTR
-#define strcasestr gitstrcasestr
-extern char *gitstrcasestr(const char *haystack, const char *needle);
-#endif
-
-#ifdef NO_STRLCPY
-#define strlcpy gitstrlcpy
-extern size_t gitstrlcpy(char *, const char *, size_t);
-#endif
-
-#ifdef NO_STRTOUMAX
-#define strtoumax gitstrtoumax
-extern uintmax_t gitstrtoumax(const char *, char **, int);
-#endif
-
-#ifdef NO_HSTRERROR
-#define hstrerror githstrerror
-extern const char *githstrerror(int herror);
-#endif
-
-#ifdef NO_MEMMEM
-#define memmem gitmemmem
-void *gitmemmem(const void *haystack, size_t haystacklen,
-                const void *needle, size_t needlelen);
-#endif
-
-#ifdef FREAD_READS_DIRECTORIES
-#ifdef fopen
-#undef fopen
-#endif
-#define fopen(a,b) git_fopen(a,b)
-extern FILE *git_fopen(const char*, const char*);
-#endif
-
-#ifdef SNPRINTF_RETURNS_BOGUS
-#define snprintf git_snprintf
-extern int git_snprintf(char *str, size_t maxsize,
-			const char *format, ...);
-#define vsnprintf git_vsnprintf
-extern int git_vsnprintf(char *str, size_t maxsize,
-			 const char *format, va_list ap);
-#endif
-
-#ifdef __GLIBC_PREREQ
-#if __GLIBC_PREREQ(2, 1)
-#define HAVE_STRCHRNUL
-#endif
-#endif
-
-#ifndef HAVE_STRCHRNUL
-#define strchrnul gitstrchrnul
-static inline char *gitstrchrnul(const char *s, int c)
-{
-	while (*s && *s != c)
-		s++;
-	return (char *)s;
-}
-#endif
-
-/*
- * Wrappers:
- */
-extern char *xstrdup(const char *str);
-extern void *xmalloc(size_t size);
-extern void *xmemdupz(const void *data, size_t len);
-extern char *xstrndup(const char *str, size_t len);
-extern void *xrealloc(void *ptr, size_t size);
-extern void *xcalloc(size_t nmemb, size_t size);
-extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
-extern ssize_t xread(int fd, void *buf, size_t len);
-extern ssize_t xwrite(int fd, const void *buf, size_t len);
-extern int xdup(int fd);
-extern FILE *xfdopen(int fd, const char *mode);
-static inline size_t xsize_t(off_t len)
-{
-	return (size_t)len;
-}
-
-static inline int has_extension(const char *filename, const char *ext)
-{
-	size_t len = strlen(filename);
-	size_t extlen = strlen(ext);
-	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
-}
-
-/* Sane ctype - no locale, and works with signed chars */
-#undef isascii
-#undef isspace
-#undef isdigit
-#undef isalpha
-#undef isalnum
-#undef tolower
-#undef toupper
-extern unsigned char sane_ctype[256];
-#define GIT_SPACE 0x01
-#define GIT_DIGIT 0x02
-#define GIT_ALPHA 0x04
-#define GIT_GLOB_SPECIAL 0x08
-#define GIT_REGEX_SPECIAL 0x10
-#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
-#define isascii(x) (((x) & ~0x7f) == 0)
-#define isspace(x) sane_istest(x,GIT_SPACE)
-#define isdigit(x) sane_istest(x,GIT_DIGIT)
-#define isalpha(x) sane_istest(x,GIT_ALPHA)
-#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
-#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
-#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
-#define tolower(x) sane_case((unsigned char)(x), 0x20)
-#define toupper(x) sane_case((unsigned char)(x), 0)
-
-static inline int sane_case(int x, int high)
-{
-	if (sane_istest(x, GIT_ALPHA))
-		x = (x & ~0x20) | high;
-	return x;
-}
-
-static inline int strtoul_ui(char const *s, int base, unsigned int *result)
-{
-	unsigned long ul;
-	char *p;
-
-	errno = 0;
-	ul = strtoul(s, &p, base);
-	if (errno || *p || p == s || (unsigned int) ul != ul)
-		return -1;
-	*result = ul;
-	return 0;
-}
-
-static inline int strtol_i(char const *s, int base, int *result)
-{
-	long ul;
-	char *p;
-
-	errno = 0;
-	ul = strtol(s, &p, base);
-	if (errno || *p || p == s || (int) ul != ul)
-		return -1;
-	*result = ul;
-	return 0;
-}
-
-#ifdef INTERNAL_QSORT
-void git_qsort(void *base, size_t nmemb, size_t size,
-	       int(*compar)(const void *, const void *));
-#define qsort git_qsort
-#endif
-
-#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
-# define FORCE_DIR_SET_GID S_ISGID
-#else
-# define FORCE_DIR_SET_GID 0
-#endif
-
-#ifdef NO_NSEC
-#undef USE_NSEC
-#define ST_CTIME_NSEC(st) 0
-#define ST_MTIME_NSEC(st) 0
-#else
-#ifdef USE_ST_TIMESPEC
-#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
-#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
-#else
-#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
-#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
-#endif
-#endif
-
-#endif
diff --git a/Documentation/perf_counter/util/PERF-VERSION-GEN b/Documentation/perf_counter/util/PERF-VERSION-GEN
new file mode 100755
index 0000000..c561d15
--- /dev/null
+++ b/Documentation/perf_counter/util/PERF-VERSION-GEN
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+GVF=PERF-VERSION-FILE
+DEF_VER=v0.0.1.PERF
+
+LF='
+'
+
+# First see if there is a version file (included in release tarballs),
+# then try git-describe, then default.
+if test -f version
+then
+	VN=$(cat version) || VN="$DEF_VER"
+elif test -d .git -o -f .git &&
+	VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
+	case "$VN" in
+	*$LF*) (exit 1) ;;
+	v[0-9]*)
+		git update-index -q --refresh
+		test -z "$(git diff-index --name-only HEAD --)" ||
+		VN="$VN-dirty" ;;
+	esac
+then
+	VN=$(echo "$VN" | sed -e 's/-/./g');
+else
+	VN="$DEF_VER"
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
+else
+	VC=unset
+fi
+test "$VN" = "$VC" || {
+	echo >&2 "PERF_VERSION = $VN"
+	echo "PERF_VERSION = $VN" >$GVF
+}
+
+
diff --git a/Documentation/perf_counter/util/abspath.c b/Documentation/perf_counter/util/abspath.c
new file mode 100644
index 0000000..649f34f
--- /dev/null
+++ b/Documentation/perf_counter/util/abspath.c
@@ -0,0 +1,117 @@
+#include "cache.h"
+
+/*
+ * Do not use this for inspecting *tracked* content.  When path is a
+ * symlink to a directory, we do not want to say it is a directory when
+ * dealing with tracked content in the working tree.
+ */
+int is_directory(const char *path)
+{
+	struct stat st;
+	return (!stat(path, &st) && S_ISDIR(st.st_mode));
+}
+
+/* We allow "recursive" symbolic links. Only within reason, though. */
+#define MAXDEPTH 5
+
+const char *make_absolute_path(const char *path)
+{
+	static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
+	char cwd[1024] = "";
+	int buf_index = 1, len;
+
+	int depth = MAXDEPTH;
+	char *last_elem = NULL;
+	struct stat st;
+
+	if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+		die ("Too long path: %.*s", 60, path);
+
+	while (depth--) {
+		if (!is_directory(buf)) {
+			char *last_slash = strrchr(buf, '/');
+			if (last_slash) {
+				*last_slash = '\0';
+				last_elem = xstrdup(last_slash + 1);
+			} else {
+				last_elem = xstrdup(buf);
+				*buf = '\0';
+			}
+		}
+
+		if (*buf) {
+			if (!*cwd && !getcwd(cwd, sizeof(cwd)))
+				die ("Could not get current working directory");
+
+			if (chdir(buf))
+				die ("Could not switch to '%s'", buf);
+		}
+		if (!getcwd(buf, PATH_MAX))
+			die ("Could not get current working directory");
+
+		if (last_elem) {
+			int len = strlen(buf);
+			if (len + strlen(last_elem) + 2 > PATH_MAX)
+				die ("Too long path name: '%s/%s'",
+						buf, last_elem);
+			buf[len] = '/';
+			strcpy(buf + len + 1, last_elem);
+			free(last_elem);
+			last_elem = NULL;
+		}
+
+		if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
+			len = readlink(buf, next_buf, PATH_MAX);
+			if (len < 0)
+				die ("Invalid symlink: %s", buf);
+			if (PATH_MAX <= len)
+				die("symbolic link too long: %s", buf);
+			next_buf[len] = '\0';
+			buf = next_buf;
+			buf_index = 1 - buf_index;
+			next_buf = bufs[buf_index];
+		} else
+			break;
+	}
+
+	if (*cwd && chdir(cwd))
+		die ("Could not change back to '%s'", cwd);
+
+	return buf;
+}
+
+static const char *get_pwd_cwd(void)
+{
+	static char cwd[PATH_MAX + 1];
+	char *pwd;
+	struct stat cwd_stat, pwd_stat;
+	if (getcwd(cwd, PATH_MAX) == NULL)
+		return NULL;
+	pwd = getenv("PWD");
+	if (pwd && strcmp(pwd, cwd)) {
+		stat(cwd, &cwd_stat);
+		if (!stat(pwd, &pwd_stat) &&
+		    pwd_stat.st_dev == cwd_stat.st_dev &&
+		    pwd_stat.st_ino == cwd_stat.st_ino) {
+			strlcpy(cwd, pwd, PATH_MAX);
+		}
+	}
+	return cwd;
+}
+
+const char *make_nonrelative_path(const char *path)
+{
+	static char buf[PATH_MAX + 1];
+
+	if (is_absolute_path(path)) {
+		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	} else {
+		const char *cwd = get_pwd_cwd();
+		if (!cwd)
+			die("Cannot determine the current working directory");
+		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	}
+	return buf;
+}
diff --git a/Documentation/perf_counter/util/alias.c b/Documentation/perf_counter/util/alias.c
new file mode 100644
index 0000000..9b3dd2b
--- /dev/null
+++ b/Documentation/perf_counter/util/alias.c
@@ -0,0 +1,77 @@
+#include "cache.h"
+
+static const char *alias_key;
+static char *alias_val;
+
+static int alias_lookup_cb(const char *k, const char *v, void *cb)
+{
+	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
+		if (!v)
+			return config_error_nonbool(k);
+		alias_val = strdup(v);
+		return 0;
+	}
+	return 0;
+}
+
+char *alias_lookup(const char *alias)
+{
+	alias_key = alias;
+	alias_val = NULL;
+	perf_config(alias_lookup_cb, NULL);
+	return alias_val;
+}
+
+int split_cmdline(char *cmdline, const char ***argv)
+{
+	int src, dst, count = 0, size = 16;
+	char quoted = 0;
+
+	*argv = malloc(sizeof(char*) * size);
+
+	/* split alias_string */
+	(*argv)[count++] = cmdline;
+	for (src = dst = 0; cmdline[src];) {
+		char c = cmdline[src];
+		if (!quoted && isspace(c)) {
+			cmdline[dst++] = 0;
+			while (cmdline[++src]
+					&& isspace(cmdline[src]))
+				; /* skip */
+			if (count >= size) {
+				size += 16;
+				*argv = realloc(*argv, sizeof(char*) * size);
+			}
+			(*argv)[count++] = cmdline + dst;
+		} else if (!quoted && (c == '\'' || c == '"')) {
+			quoted = c;
+			src++;
+		} else if (c == quoted) {
+			quoted = 0;
+			src++;
+		} else {
+			if (c == '\\' && quoted != '\'') {
+				src++;
+				c = cmdline[src];
+				if (!c) {
+					free(*argv);
+					*argv = NULL;
+					return error("cmdline ends with \\");
+				}
+			}
+			cmdline[dst++] = c;
+			src++;
+		}
+	}
+
+	cmdline[dst] = 0;
+
+	if (quoted) {
+		free(*argv);
+		*argv = NULL;
+		return error("unclosed quote");
+	}
+
+	return count;
+}
+
diff --git a/Documentation/perf_counter/util/cache.h b/Documentation/perf_counter/util/cache.h
new file mode 100644
index 0000000..7108051
--- /dev/null
+++ b/Documentation/perf_counter/util/cache.h
@@ -0,0 +1,117 @@
+#ifndef CACHE_H
+#define CACHE_H
+
+#include "util.h"
+#include "strbuf.h"
+
+#define PERF_DIR_ENVIRONMENT "PERF_DIR"
+#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
+#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
+#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY"
+#define INDEX_ENVIRONMENT "PERF_INDEX_FILE"
+#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE"
+#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR"
+#define CONFIG_ENVIRONMENT "PERF_CONFIG"
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES"
+#define PERFATTRIBUTES_FILE ".perfattributes"
+#define INFOATTRIBUTES_FILE "info/attributes"
+#define ATTRIBUTE_MACRO_PREFIX "[attr]"
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+extern int perf_default_config(const char *, const char *, void *);
+extern int perf_config_from_file(config_fn_t fn, const char *, void *);
+extern int perf_config(config_fn_t fn, void *);
+extern int perf_parse_ulong(const char *, unsigned long *);
+extern int perf_config_int(const char *, const char *);
+extern unsigned long perf_config_ulong(const char *, const char *);
+extern int perf_config_bool_or_int(const char *, const char *, int *);
+extern int perf_config_bool(const char *, const char *);
+extern int perf_config_string(const char **, const char *, const char *);
+extern int perf_config_set(const char *, const char *);
+extern int perf_config_set_multivar(const char *, const char *, const char *, int);
+extern int perf_config_rename_section(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
+extern int check_repository_format_version(const char *var, const char *value, void *cb);
+extern int perf_config_system(void);
+extern int perf_config_global(void);
+extern int config_error_nonbool(const char *);
+extern const char *config_exclusive_filename;
+
+#define MAX_PERFNAME (1000)
+extern char perf_default_email[MAX_PERFNAME];
+extern char perf_default_name[MAX_PERFNAME];
+extern int user_ident_explicitly_given;
+
+extern const char *perf_log_output_encoding;
+extern const char *perf_mailmap_file;
+
+/* IO helper functions */
+extern void maybe_flush_or_die(FILE *, const char *);
+extern int copy_fd(int ifd, int ofd);
+extern int copy_file(const char *dst, const char *src, int mode);
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+extern ssize_t write_in_full(int fd, const void *buf, size_t count);
+extern void write_or_die(int fd, const void *buf, size_t count);
+extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
+extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
+extern void fsync_or_die(int fd, const char *);
+
+/* pager.c */
+extern void setup_pager(void);
+extern const char *pager_program;
+extern int pager_in_use(void);
+extern int pager_use_color;
+
+extern const char *editor_program;
+extern const char *excludes_file;
+
+char *alias_lookup(const char *alias);
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+	do { \
+		if ((nr) > alloc) { \
+			if (alloc_nr(alloc) < (nr)) \
+				alloc = (nr); \
+			else \
+				alloc = alloc_nr(alloc); \
+			x = xrealloc((x), alloc * sizeof(*(x))); \
+		} \
+	} while(0)
+
+
+static inline int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+
+const char *make_absolute_path(const char *path);
+const char *make_nonrelative_path(const char *path);
+const char *make_relative_path(const char *abs, const char *base);
+int normalize_path_copy(char *dst, const char *src);
+int longest_ancestor_length(const char *path, const char *prefix_list);
+char *strip_path_suffix(const char *path, const char *suffix);
+
+extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+
+extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_pathdup(const char *fmt, ...)
+	__attribute__((format (printf, 1, 2)));
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
+#endif /* CACHE_H */
diff --git a/Documentation/perf_counter/util/config.c b/Documentation/perf_counter/util/config.c
new file mode 100644
index 0000000..3dd13fa
--- /dev/null
+++ b/Documentation/perf_counter/util/config.c
@@ -0,0 +1,873 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include "util.h"
+#include "cache.h"
+#include "exec_cmd.h"
+
+#define MAXNAME (256)
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+
+const char *config_exclusive_filename = NULL;
+
+static int get_next_char(void)
+{
+	int c;
+	FILE *f;
+
+	c = '\n';
+	if ((f = config_file) != NULL) {
+		c = fgetc(f);
+		if (c == '\r') {
+			/* DOS like systems */
+			c = fgetc(f);
+			if (c != '\n') {
+				ungetc(c, f);
+				c = '\r';
+			}
+		}
+		if (c == '\n')
+			config_linenr++;
+		if (c == EOF) {
+			config_file_eof = 1;
+			c = '\n';
+		}
+	}
+	return c;
+}
+
+static char *parse_value(void)
+{
+	static char value[1024];
+	int quote = 0, comment = 0, len = 0, space = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (len >= sizeof(value) - 1)
+			return NULL;
+		if (c == '\n') {
+			if (quote)
+				return NULL;
+			value[len] = 0;
+			return value;
+		}
+		if (comment)
+			continue;
+		if (isspace(c) && !quote) {
+			space = 1;
+			continue;
+		}
+		if (!quote) {
+			if (c == ';' || c == '#') {
+				comment = 1;
+				continue;
+			}
+		}
+		if (space) {
+			if (len)
+				value[len++] = ' ';
+			space = 0;
+		}
+		if (c == '\\') {
+			c = get_next_char();
+			switch (c) {
+			case '\n':
+				continue;
+			case 't':
+				c = '\t';
+				break;
+			case 'b':
+				c = '\b';
+				break;
+			case 'n':
+				c = '\n';
+				break;
+			/* Some characters escape as themselves */
+			case '\\': case '"':
+				break;
+			/* Reject unknown escape sequences */
+			default:
+				return NULL;
+			}
+			value[len++] = c;
+			continue;
+		}
+		if (c == '"') {
+			quote = 1-quote;
+			continue;
+		}
+		value[len++] = c;
+	}
+}
+
+static inline int iskeychar(int c)
+{
+	return isalnum(c) || c == '-';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+	int c;
+	char *value;
+
+	/* Get the full name */
+	for (;;) {
+		c = get_next_char();
+		if (config_file_eof)
+			break;
+		if (!iskeychar(c))
+			break;
+		name[len++] = tolower(c);
+		if (len >= MAXNAME)
+			return -1;
+	}
+	name[len] = 0;
+	while (c == ' ' || c == '\t')
+		c = get_next_char();
+
+	value = NULL;
+	if (c != '\n') {
+		if (c != '=')
+			return -1;
+		value = parse_value();
+		if (!value)
+			return -1;
+	}
+	return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+	do {
+		if (c == '\n')
+			return -1;
+		c = get_next_char();
+	} while (isspace(c));
+
+	/* We require the format to be '[base "extension"]' */
+	if (c != '"')
+		return -1;
+	name[baselen++] = '.';
+
+	for (;;) {
+		int c = get_next_char();
+		if (c == '\n')
+			return -1;
+		if (c == '"')
+			break;
+		if (c == '\\') {
+			c = get_next_char();
+			if (c == '\n')
+				return -1;
+		}
+		name[baselen++] = c;
+		if (baselen > MAXNAME / 2)
+			return -1;
+	}
+
+	/* Final ']' */
+	if (get_next_char() != ']')
+		return -1;
+	return baselen;
+}
+
+static int get_base_var(char *name)
+{
+	int baselen = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (config_file_eof)
+			return -1;
+		if (c == ']')
+			return baselen;
+		if (isspace(c))
+			return get_extended_base_var(name, baselen, c);
+		if (!iskeychar(c) && c != '.')
+			return -1;
+		if (baselen > MAXNAME / 2)
+			return -1;
+		name[baselen++] = tolower(c);
+	}
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+	int comment = 0;
+	int baselen = 0;
+	static char var[MAXNAME];
+
+	/* U+FEFF Byte Order Mark in UTF8 */
+	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+	const unsigned char *bomptr = utf8_bom;
+
+	for (;;) {
+		int c = get_next_char();
+		if (bomptr && *bomptr) {
+			/* We are at the file beginning; skip UTF8-encoded BOM
+			 * if present. Sane editors won't put this in on their
+			 * own, but e.g. Windows Notepad will do it happily. */
+			if ((unsigned char) c == *bomptr) {
+				bomptr++;
+				continue;
+			} else {
+				/* Do not tolerate partial BOM. */
+				if (bomptr != utf8_bom)
+					break;
+				/* No BOM at file beginning. Cool. */
+				bomptr = NULL;
+			}
+		}
+		if (c == '\n') {
+			if (config_file_eof)
+				return 0;
+			comment = 0;
+			continue;
+		}
+		if (comment || isspace(c))
+			continue;
+		if (c == '#' || c == ';') {
+			comment = 1;
+			continue;
+		}
+		if (c == '[') {
+			baselen = get_base_var(var);
+			if (baselen <= 0)
+				break;
+			var[baselen++] = '.';
+			var[baselen] = 0;
+			continue;
+		}
+		if (!isalpha(c))
+			break;
+		var[baselen] = tolower(c);
+		if (get_value(fn, data, var, baselen+1) < 0)
+			break;
+	}
+	die("bad config file line %d in %s", config_linenr, config_file_name);
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+	if (!*end)
+		return 1;
+	else if (!strcasecmp(end, "k")) {
+		*val *= 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "m")) {
+		*val *= 1024 * 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "g")) {
+		*val *= 1024 * 1024 * 1024;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long val = strtol(value, &end, 0);
+		unsigned long factor = 1;
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+int perf_parse_ulong(const char *value, unsigned long *ret)
+{
+	if (value && *value) {
+		char *end;
+		unsigned long val = strtoul(value, &end, 0);
+		if (!parse_unit_factor(end, &val))
+			return 0;
+		*ret = val;
+		return 1;
+	}
+	return 0;
+}
+
+static void die_bad_config(const char *name)
+{
+	if (config_file_name)
+		die("bad config value for '%s' in %s", name, config_file_name);
+	die("bad config value for '%s'", name);
+}
+
+int perf_config_int(const char *name, const char *value)
+{
+	long ret = 0;
+	if (!perf_parse_long(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+unsigned long perf_config_ulong(const char *name, const char *value)
+{
+	unsigned long ret;
+	if (!perf_parse_ulong(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+	*is_bool = 1;
+	if (!value)
+		return 1;
+	if (!*value)
+		return 0;
+	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+		return 1;
+	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+		return 0;
+	*is_bool = 0;
+	return perf_config_int(name, value);
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+	int discard;
+	return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+int perf_config_string(const char **dest, const char *var, const char *value)
+{
+	if (!value)
+		return config_error_nonbool(var);
+	*dest = strdup(value);
+	return 0;
+}
+
+static int perf_default_core_config(const char *var, const char *value)
+{
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_default_config(const char *var, const char *value, void *dummy)
+{
+	if (!prefixcmp(var, "core."))
+		return perf_default_core_config(var, value);
+
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+	int ret;
+	FILE *f = fopen(filename, "r");
+
+	ret = -1;
+	if (f) {
+		config_file = f;
+		config_file_name = filename;
+		config_linenr = 1;
+		config_file_eof = 0;
+		ret = perf_parse_file(fn, data);
+		fclose(f);
+		config_file_name = NULL;
+	}
+	return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+	static const char *system_wide;
+	if (!system_wide)
+		system_wide = system_path(ETC_PERFCONFIG);
+	return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+	const char *v = getenv(k);
+	return v ? perf_config_bool(k, v) : def;
+}
+
+int perf_config_system(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+int perf_config_global(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+	int ret = 0, found = 0;
+	char *repo_config = NULL;
+	const char *home = NULL;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(fn, config_exclusive_filename, data);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
+					    data);
+		found += 1;
+	}
+
+	home = getenv("HOME");
+	if (perf_config_global() && home) {
+		char *user_config = strdup(mkpath("%s/.perfconfig", home));
+		if (!access(user_config, R_OK)) {
+			ret += perf_config_from_file(fn, user_config, data);
+			found += 1;
+		}
+		free(user_config);
+	}
+
+	repo_config = perf_pathdup("config");
+	if (!access(repo_config, R_OK)) {
+		ret += perf_config_from_file(fn, repo_config, data);
+		found += 1;
+	}
+	free(repo_config);
+	if (found == 0)
+		return -1;
+	return ret;
+}
+
+/*
+ * Find all the stuff for perf_config_set() below.
+ */
+
+#define MAX_MATCHES 512
+
+static struct {
+	int baselen;
+	char* key;
+	int do_not_match;
+	regex_t* value_regex;
+	int multi_replace;
+	size_t offset[MAX_MATCHES];
+	enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state;
+	int seen;
+} store;
+
+static int matches(const char* key, const char* value)
+{
+	return !strcmp(key, store.key) &&
+		(store.value_regex == NULL ||
+		 (store.do_not_match ^
+		  !regexec(store.value_regex, value, 0, NULL, 0)));
+}
+
+static int store_aux(const char* key, const char* value, void *cb)
+{
+	const char *ep;
+	size_t section_len;
+
+	switch (store.state) {
+	case KEY_SEEN:
+		if (matches(key, value)) {
+			if (store.seen == 1 && store.multi_replace == 0) {
+				warning("%s has multiple values", key);
+			} else if (store.seen >= MAX_MATCHES) {
+				error("too many matches for %s", key);
+				return 1;
+			}
+
+			store.offset[store.seen] = ftell(config_file);
+			store.seen++;
+		}
+		break;
+	case SECTION_SEEN:
+		/*
+		 * What we are looking for is in store.key (both
+		 * section and var), and its section part is baselen
+		 * long.  We found key (again, both section and var).
+		 * We would want to know if this key is in the same
+		 * section as what we are looking for.  We already
+		 * know we are in the same section as what should
+		 * hold store.key.
+		 */
+		ep = strrchr(key, '.');
+		section_len = ep - key;
+
+		if ((section_len != store.baselen) ||
+		    memcmp(key, store.key, section_len+1)) {
+			store.state = SECTION_END_SEEN;
+			break;
+		}
+
+		/*
+		 * Do not increment matches: this is no match, but we
+		 * just made sure we are in the desired section.
+		 */
+		store.offset[store.seen] = ftell(config_file);
+		/* fallthru */
+	case SECTION_END_SEEN:
+	case START:
+		if (matches(key, value)) {
+			store.offset[store.seen] = ftell(config_file);
+			store.state = KEY_SEEN;
+			store.seen++;
+		} else {
+			if (strrchr(key, '.') - key == store.baselen &&
+			      !strncmp(key, store.key, store.baselen)) {
+					store.state = SECTION_SEEN;
+					store.offset[store.seen] = ftell(config_file);
+			}
+		}
+	}
+	return 0;
+}
+
+static int store_write_section(int fd, const char* key)
+{
+	const char *dot;
+	int i, success;
+	struct strbuf sb = STRBUF_INIT;
+
+	dot = memchr(key, '.', store.baselen);
+	if (dot) {
+		strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key);
+		for (i = dot - key + 1; i < store.baselen; i++) {
+			if (key[i] == '"' || key[i] == '\\')
+				strbuf_addch(&sb, '\\');
+			strbuf_addch(&sb, key[i]);
+		}
+		strbuf_addstr(&sb, "\"]\n");
+	} else {
+		strbuf_addf(&sb, "[%.*s]\n", store.baselen, key);
+	}
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static int store_write_pair(int fd, const char* key, const char* value)
+{
+	int i, success;
+	int length = strlen(key + store.baselen + 1);
+	const char *quote = "";
+	struct strbuf sb = STRBUF_INIT;
+
+	/*
+	 * Check to see if the value needs to be surrounded with a dq pair.
+	 * Note that problematic characters are always backslash-quoted; this
+	 * check is about not losing leading or trailing SP and strings that
+	 * follow beginning-of-comment characters (i.e. ';' and '#') by the
+	 * configuration parser.
+	 */
+	if (value[0] == ' ')
+		quote = "\"";
+	for (i = 0; value[i]; i++)
+		if (value[i] == ';' || value[i] == '#')
+			quote = "\"";
+	if (i && value[i - 1] == ' ')
+		quote = "\"";
+
+	strbuf_addf(&sb, "\t%.*s = %s",
+		    length, key + store.baselen + 1, quote);
+
+	for (i = 0; value[i]; i++)
+		switch (value[i]) {
+		case '\n':
+			strbuf_addstr(&sb, "\\n");
+			break;
+		case '\t':
+			strbuf_addstr(&sb, "\\t");
+			break;
+		case '"':
+		case '\\':
+			strbuf_addch(&sb, '\\');
+		default:
+			strbuf_addch(&sb, value[i]);
+			break;
+		}
+	strbuf_addf(&sb, "%s\n", quote);
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static ssize_t find_beginning_of_line(const char* contents, size_t size,
+	size_t offset_, int* found_bracket)
+{
+	size_t equal_offset = size, bracket_offset = size;
+	ssize_t offset;
+
+contline:
+	for (offset = offset_-2; offset > 0
+			&& contents[offset] != '\n'; offset--)
+		switch (contents[offset]) {
+			case '=': equal_offset = offset; break;
+			case ']': bracket_offset = offset; break;
+		}
+	if (offset > 0 && contents[offset-1] == '\\') {
+		offset_ = offset;
+		goto contline;
+	}
+	if (bracket_offset < equal_offset) {
+		*found_bracket = 1;
+		offset = bracket_offset+1;
+	} else
+		offset++;
+
+	return offset;
+}
+
+int perf_config_set(const char* key, const char* value)
+{
+	return perf_config_set_multivar(key, value, NULL, 0);
+}
+
+/*
+ * If value==NULL, unset in (remove from) config,
+ * if value_regex!=NULL, disregard key/value pairs where value does not match.
+ * if multi_replace==0, nothing, or only one matching key/value is replaced,
+ *     else all matching key/values (regardless how many) are removed,
+ *     before the new pair is written.
+ *
+ * Returns 0 on success.
+ *
+ * This function does this:
+ *
+ * - it locks the config file by creating ".perf/config.lock"
+ *
+ * - it then parses the config using store_aux() as validator to find
+ *   the position on the key/value pair to replace. If it is to be unset,
+ *   it must be found exactly once.
+ *
+ * - the config file is mmap()ed and the part before the match (if any) is
+ *   written to the lock file, then the changed part and the rest.
+ *
+ * - the config file is removed and the lock file rename()d to it.
+ *
+ */
+int perf_config_set_multivar(const char* key, const char* value,
+	const char* value_regex, int multi_replace)
+{
+	int i, dot;
+	int fd = -1, in_fd;
+	int ret = 0;
+	char* config_filename;
+	const char* last_dot = strrchr(key, '.');
+
+	if (config_exclusive_filename)
+		config_filename = strdup(config_exclusive_filename);
+	else
+		config_filename = perf_pathdup("config");
+
+	/*
+	 * Since "key" actually contains the section name and the real
+	 * key name separated by a dot, we have to know where the dot is.
+	 */
+
+	if (last_dot == NULL) {
+		error("key does not contain a section: %s", key);
+		ret = 2;
+		goto out_free;
+	}
+	store.baselen = last_dot - key;
+
+	store.multi_replace = multi_replace;
+
+	/*
+	 * Validate the key and while at it, lower case it for matching.
+	 */
+	store.key = malloc(strlen(key) + 1);
+	dot = 0;
+	for (i = 0; key[i]; i++) {
+		unsigned char c = key[i];
+		if (c == '.')
+			dot = 1;
+		/* Leave the extended basename untouched.. */
+		if (!dot || i > store.baselen) {
+			if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) {
+				error("invalid key: %s", key);
+				free(store.key);
+				ret = 1;
+				goto out_free;
+			}
+			c = tolower(c);
+		} else if (c == '\n') {
+			error("invalid key (newline): %s", key);
+			free(store.key);
+			ret = 1;
+			goto out_free;
+		}
+		store.key[i] = c;
+	}
+	store.key[i] = 0;
+
+	/*
+	 * If .perf/config does not exist yet, write a minimal version.
+	 */
+	in_fd = open(config_filename, O_RDONLY);
+	if ( in_fd < 0 ) {
+		free(store.key);
+
+		if ( ENOENT != errno ) {
+			error("opening %s: %s", config_filename,
+			      strerror(errno));
+			ret = 3; /* same as "invalid config file" */
+			goto out_free;
+		}
+		/* if nothing to unset, error out */
+		if (value == NULL) {
+			ret = 5;
+			goto out_free;
+		}
+
+		store.key = (char*)key;
+		if (!store_write_section(fd, key) ||
+		    !store_write_pair(fd, key, value))
+			goto write_err_out;
+	} else {
+		struct stat st;
+		char* contents;
+		size_t contents_sz, copy_begin, copy_end;
+		int i, new_line = 0;
+
+		if (value_regex == NULL)
+			store.value_regex = NULL;
+		else {
+			if (value_regex[0] == '!') {
+				store.do_not_match = 1;
+				value_regex++;
+			} else
+				store.do_not_match = 0;
+
+			store.value_regex = (regex_t*)malloc(sizeof(regex_t));
+			if (regcomp(store.value_regex, value_regex,
+					REG_EXTENDED)) {
+				error("invalid pattern: %s", value_regex);
+				free(store.value_regex);
+				ret = 6;
+				goto out_free;
+			}
+		}
+
+		store.offset[0] = 0;
+		store.state = START;
+		store.seen = 0;
+
+		/*
+		 * After this, store.offset will contain the *end* offset
+		 * of the last match, or remain at 0 if no match was found.
+		 * As a side effect, we make sure to transform only a valid
+		 * existing config file.
+		 */
+		if (perf_config_from_file(store_aux, config_filename, NULL)) {
+			error("invalid config file %s", config_filename);
+			free(store.key);
+			if (store.value_regex != NULL) {
+				regfree(store.value_regex);
+				free(store.value_regex);
+			}
+			ret = 3;
+			goto out_free;
+		}
+
+		free(store.key);
+		if (store.value_regex != NULL) {
+			regfree(store.value_regex);
+			free(store.value_regex);
+		}
+
+		/* if nothing to unset, or too many matches, error out */
+		if ((store.seen == 0 && value == NULL) ||
+				(store.seen > 1 && multi_replace == 0)) {
+			ret = 5;
+			goto out_free;
+		}
+
+		fstat(in_fd, &st);
+		contents_sz = xsize_t(st.st_size);
+		contents = mmap(NULL, contents_sz, PROT_READ,
+			MAP_PRIVATE, in_fd, 0);
+		close(in_fd);
+
+		if (store.seen == 0)
+			store.seen = 1;
+
+		for (i = 0, copy_begin = 0; i < store.seen; i++) {
+			if (store.offset[i] == 0) {
+				store.offset[i] = copy_end = contents_sz;
+			} else if (store.state != KEY_SEEN) {
+				copy_end = store.offset[i];
+			} else
+				copy_end = find_beginning_of_line(
+					contents, contents_sz,
+					store.offset[i]-2, &new_line);
+
+			if (copy_end > 0 && contents[copy_end-1] != '\n')
+				new_line = 1;
+
+			/* write the first part of the config */
+			if (copy_end > copy_begin) {
+				if (write_in_full(fd, contents + copy_begin,
+						  copy_end - copy_begin) <
+				    copy_end - copy_begin)
+					goto write_err_out;
+				if (new_line &&
+				    write_in_full(fd, "\n", 1) != 1)
+					goto write_err_out;
+			}
+			copy_begin = store.offset[i];
+		}
+
+		/* write the pair (value == NULL means unset) */
+		if (value != NULL) {
+			if (store.state == START) {
+				if (!store_write_section(fd, key))
+					goto write_err_out;
+			}
+			if (!store_write_pair(fd, key, value))
+				goto write_err_out;
+		}
+
+		/* write the rest of the config */
+		if (copy_begin < contents_sz)
+			if (write_in_full(fd, contents + copy_begin,
+					  contents_sz - copy_begin) <
+			    contents_sz - copy_begin)
+				goto write_err_out;
+
+		munmap(contents, contents_sz);
+	}
+
+	ret = 0;
+
+out_free:
+	free(config_filename);
+	return ret;
+
+write_err_out:
+	goto out_free;
+
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+	return error("Missing value for '%s'", var);
+}
diff --git a/Documentation/perf_counter/util/ctype.c b/Documentation/perf_counter/util/ctype.c
new file mode 100644
index 0000000..b90ec00
--- /dev/null
+++ b/Documentation/perf_counter/util/ctype.c
@@ -0,0 +1,26 @@
+/*
+ * Sane locale-independent, ASCII ctype.
+ *
+ * No surprises, and works with signed and unsigned chars.
+ */
+#include "cache.h"
+
+enum {
+	S = GIT_SPACE,
+	A = GIT_ALPHA,
+	D = GIT_DIGIT,
+	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
+	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
+};
+
+unsigned char sane_ctype[256] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
+	S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0,		/*  32.. 47 */
+	D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G,		/*  48.. 63 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
+	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0,		/*  80.. 95 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
+	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0,		/* 112..127 */
+	/* Nothing in the 128.. range */
+};
diff --git a/Documentation/perf_counter/util/exec_cmd.c b/Documentation/perf_counter/util/exec_cmd.c
new file mode 100644
index 0000000..d392922
--- /dev/null
+++ b/Documentation/perf_counter/util/exec_cmd.c
@@ -0,0 +1,165 @@
+#include "cache.h"
+#include "exec_cmd.h"
+#include "quote.h"
+#define MAX_ARGS	32
+
+extern char **environ;
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+const char *system_path(const char *path)
+{
+#ifdef RUNTIME_PREFIX
+	static const char *prefix;
+#else
+	static const char *prefix = PREFIX;
+#endif
+	struct strbuf d = STRBUF_INIT;
+
+	if (is_absolute_path(path))
+		return path;
+
+#ifdef RUNTIME_PREFIX
+	assert(argv0_path);
+	assert(is_absolute_path(argv0_path));
+
+	if (!prefix &&
+	    !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
+	    !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
+	    !(prefix = strip_path_suffix(argv0_path, "perf"))) {
+		prefix = PREFIX;
+		fprintf(stderr, "RUNTIME_PREFIX requested, "
+				"but prefix computation failed.  "
+				"Using static fallback '%s'.\n", prefix);
+	}
+#endif
+
+	strbuf_addf(&d, "%s/%s", prefix, path);
+	path = strbuf_detach(&d, NULL);
+	return path;
+}
+
+const char *perf_extract_argv0_path(const char *argv0)
+{
+	const char *slash;
+
+	if (!argv0 || !*argv0)
+		return NULL;
+	slash = argv0 + strlen(argv0);
+
+	while (argv0 <= slash && !is_dir_sep(*slash))
+		slash--;
+
+	if (slash >= argv0) {
+		argv0_path = strndup(argv0, slash - argv0);
+		return slash + 1;
+	}
+
+	return argv0;
+}
+
+void perf_set_argv_exec_path(const char *exec_path)
+{
+	argv_exec_path = exec_path;
+	/*
+	 * Propagate this setting to external programs.
+	 */
+	setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
+}
+
+
+/* Returns the highest-priority, location to look for perf programs. */
+const char *perf_exec_path(void)
+{
+	const char *env;
+
+	if (argv_exec_path)
+		return argv_exec_path;
+
+	env = getenv(EXEC_PATH_ENVIRONMENT);
+	if (env && *env) {
+		return env;
+	}
+
+	return system_path(PERF_EXEC_PATH);
+}
+
+static void add_path(struct strbuf *out, const char *path)
+{
+	if (path && *path) {
+		if (is_absolute_path(path))
+			strbuf_addstr(out, path);
+		else
+			strbuf_addstr(out, make_nonrelative_path(path));
+
+		strbuf_addch(out, PATH_SEP);
+	}
+}
+
+void setup_path(void)
+{
+	const char *old_path = getenv("PATH");
+	struct strbuf new_path = STRBUF_INIT;
+
+	add_path(&new_path, perf_exec_path());
+	add_path(&new_path, argv0_path);
+
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+	else
+		strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+	setenv("PATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+const char **prepare_perf_cmd(const char **argv)
+{
+	int argc;
+	const char **nargv;
+
+	for (argc = 0; argv[argc]; argc++)
+		; /* just counting */
+	nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+	nargv[0] = "perf";
+	for (argc = 0; argv[argc]; argc++)
+		nargv[argc + 1] = argv[argc];
+	nargv[argc + 1] = NULL;
+	return nargv;
+}
+
+int execv_perf_cmd(const char **argv) {
+	const char **nargv = prepare_perf_cmd(argv);
+
+	/* execvp() can only ever return if it fails */
+	execvp("perf", (char **)nargv);
+
+	free(nargv);
+	return -1;
+}
+
+
+int execl_perf_cmd(const char *cmd,...)
+{
+	int argc;
+	const char *argv[MAX_ARGS + 1];
+	const char *arg;
+	va_list param;
+
+	va_start(param, cmd);
+	argv[0] = cmd;
+	argc = 1;
+	while (argc < MAX_ARGS) {
+		arg = argv[argc++] = va_arg(param, char *);
+		if (!arg)
+			break;
+	}
+	va_end(param);
+	if (MAX_ARGS <= argc)
+		return error("too many args to run %s", cmd);
+
+	argv[argc] = NULL;
+	return execv_perf_cmd(argv);
+}
diff --git a/Documentation/perf_counter/util/exec_cmd.h b/Documentation/perf_counter/util/exec_cmd.h
new file mode 100644
index 0000000..effe25e
--- /dev/null
+++ b/Documentation/perf_counter/util/exec_cmd.h
@@ -0,0 +1,13 @@
+#ifndef PERF_EXEC_CMD_H
+#define PERF_EXEC_CMD_H
+
+extern void perf_set_argv_exec_path(const char *exec_path);
+extern const char *perf_extract_argv0_path(const char *path);
+extern const char *perf_exec_path(void);
+extern void setup_path(void);
+extern const char **prepare_perf_cmd(const char **argv);
+extern int execv_perf_cmd(const char **argv); /* NULL terminated */
+extern int execl_perf_cmd(const char *cmd, ...);
+extern const char *system_path(const char *path);
+
+#endif /* PERF_EXEC_CMD_H */
diff --git a/Documentation/perf_counter/util/generate-cmdlist.sh b/Documentation/perf_counter/util/generate-cmdlist.sh
new file mode 100755
index 0000000..f06f6fd
--- /dev/null
+++ b/Documentation/perf_counter/util/generate-cmdlist.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+    char name[16];
+    char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "};"
diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c
new file mode 100644
index 0000000..edde541
--- /dev/null
+++ b/Documentation/perf_counter/util/help.c
@@ -0,0 +1,366 @@
+#include "cache.h"
+#include "../builtin.h"
+#include "exec_cmd.h"
+#include "levenshtein.h"
+#include "help.h"
+
+/* most GUI terminals set COLUMNS (although some don't export it) */
+static int term_columns(void)
+{
+	char *col_string = getenv("COLUMNS");
+	int n_cols;
+
+	if (col_string && (n_cols = atoi(col_string)) > 0)
+		return n_cols;
+
+#ifdef TIOCGWINSZ
+	{
+		struct winsize ws;
+		if (!ioctl(1, TIOCGWINSZ, &ws)) {
+			if (ws.ws_col)
+				return ws.ws_col;
+		}
+	}
+#endif
+
+	return 80;
+}
+
+void add_cmdname(struct cmdnames *cmds, const char *name, int len)
+{
+	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+
+	ent->len = len;
+	memcpy(ent->name, name, len);
+	ent->name[len] = 0;
+
+	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
+	cmds->names[cmds->cnt++] = ent;
+}
+
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+	int i;
+	for (i = 0; i < cmds->cnt; ++i)
+		free(cmds->names[i]);
+	free(cmds->names);
+	cmds->cnt = 0;
+	cmds->alloc = 0;
+}
+
+static int cmdname_compare(const void *a_, const void *b_)
+{
+	struct cmdname *a = *(struct cmdname **)a_;
+	struct cmdname *b = *(struct cmdname **)b_;
+	return strcmp(a->name, b->name);
+}
+
+static void uniq(struct cmdnames *cmds)
+{
+	int i, j;
+
+	if (!cmds->cnt)
+		return;
+
+	for (i = j = 1; i < cmds->cnt; i++)
+		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			cmds->names[j++] = cmds->names[i];
+
+	cmds->cnt = j;
+}
+
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+{
+	int ci, cj, ei;
+	int cmp;
+
+	ci = cj = ei = 0;
+	while (ci < cmds->cnt && ei < excludes->cnt) {
+		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
+		if (cmp < 0)
+			cmds->names[cj++] = cmds->names[ci++];
+		else if (cmp == 0)
+			ci++, ei++;
+		else if (cmp > 0)
+			ei++;
+	}
+
+	while (ci < cmds->cnt)
+		cmds->names[cj++] = cmds->names[ci++];
+
+	cmds->cnt = cj;
+}
+
+static void pretty_print_string_list(struct cmdnames *cmds, int longest)
+{
+	int cols = 1, rows;
+	int space = longest + 1; /* min 1 SP between words */
+	int max_cols = term_columns() - 1; /* don't print *on* the edge */
+	int i, j;
+
+	if (space < max_cols)
+		cols = max_cols / space;
+	rows = (cmds->cnt + cols - 1) / cols;
+
+	for (i = 0; i < rows; i++) {
+		printf("  ");
+
+		for (j = 0; j < cols; j++) {
+			int n = j * rows + i;
+			int size = space;
+			if (n >= cmds->cnt)
+				break;
+			if (j == cols-1 || n + rows >= cmds->cnt)
+				size = 1;
+			printf("%-*s", size, cmds->names[n]->name);
+		}
+		putchar('\n');
+	}
+}
+
+static int is_executable(const char *name)
+{
+	struct stat st;
+
+	if (stat(name, &st) || /* stat, not lstat */
+	    !S_ISREG(st.st_mode))
+		return 0;
+
+#ifdef __MINGW32__
+	/* cannot trust the executable bit, peek into the file instead */
+	char buf[3] = { 0 };
+	int n;
+	int fd = open(name, O_RDONLY);
+	st.st_mode &= ~S_IXUSR;
+	if (fd >= 0) {
+		n = read(fd, buf, 2);
+		if (n == 2)
+			/* DOS executables start with "MZ" */
+			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
+				st.st_mode |= S_IXUSR;
+		close(fd);
+	}
+#endif
+	return st.st_mode & S_IXUSR;
+}
+
+static void list_commands_in_dir(struct cmdnames *cmds,
+					 const char *path,
+					 const char *prefix)
+{
+	int prefix_len;
+	DIR *dir = opendir(path);
+	struct dirent *de;
+	struct strbuf buf = STRBUF_INIT;
+	int len;
+
+	if (!dir)
+		return;
+	if (!prefix)
+		prefix = "perf-";
+	prefix_len = strlen(prefix);
+
+	strbuf_addf(&buf, "%s/", path);
+	len = buf.len;
+
+	while ((de = readdir(dir)) != NULL) {
+		int entlen;
+
+		if (prefixcmp(de->d_name, prefix))
+			continue;
+
+		strbuf_setlen(&buf, len);
+		strbuf_addstr(&buf, de->d_name);
+		if (!is_executable(buf.buf))
+			continue;
+
+		entlen = strlen(de->d_name) - prefix_len;
+		if (has_extension(de->d_name, ".exe"))
+			entlen -= 4;
+
+		add_cmdname(cmds, de->d_name + prefix_len, entlen);
+	}
+	closedir(dir);
+	strbuf_release(&buf);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds)
+{
+	const char *env_path = getenv("PATH");
+	const char *exec_path = perf_exec_path();
+
+	if (exec_path) {
+		list_commands_in_dir(main_cmds, exec_path, prefix);
+		qsort(main_cmds->names, main_cmds->cnt,
+		      sizeof(*main_cmds->names), cmdname_compare);
+		uniq(main_cmds);
+	}
+
+	if (env_path) {
+		char *paths, *path, *colon;
+		path = paths = strdup(env_path);
+		while (1) {
+			if ((colon = strchr(path, PATH_SEP)))
+				*colon = 0;
+			if (!exec_path || strcmp(path, exec_path))
+				list_commands_in_dir(other_cmds, path, prefix);
+
+			if (!colon)
+				break;
+			path = colon + 1;
+		}
+		free(paths);
+
+		qsort(other_cmds->names, other_cmds->cnt,
+		      sizeof(*other_cmds->names), cmdname_compare);
+		uniq(other_cmds);
+	}
+	exclude_cmds(other_cmds, main_cmds);
+}
+
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < main_cmds->cnt; i++)
+		if (longest < main_cmds->names[i]->len)
+			longest = main_cmds->names[i]->len;
+	for (i = 0; i < other_cmds->cnt; i++)
+		if (longest < other_cmds->names[i]->len)
+			longest = other_cmds->names[i]->len;
+
+	if (main_cmds->cnt) {
+		const char *exec_path = perf_exec_path();
+		printf("available %s in '%s'\n", title, exec_path);
+		printf("----------------");
+		mput_char('-', strlen(title) + strlen(exec_path));
+		putchar('\n');
+		pretty_print_string_list(main_cmds, longest);
+		putchar('\n');
+	}
+
+	if (other_cmds->cnt) {
+		printf("%s available from elsewhere on your $PATH\n", title);
+		printf("---------------------------------------");
+		mput_char('-', strlen(title));
+		putchar('\n');
+		pretty_print_string_list(other_cmds, longest);
+		putchar('\n');
+	}
+}
+
+int is_in_cmdlist(struct cmdnames *c, const char *s)
+{
+	int i;
+	for (i = 0; i < c->cnt; i++)
+		if (!strcmp(s, c->names[i]->name))
+			return 1;
+	return 0;
+}
+
+static int autocorrect;
+static struct cmdnames aliases;
+
+static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.autocorrect"))
+		autocorrect = perf_config_int(var,value);
+	/* Also use aliases for command lookup */
+	if (!prefixcmp(var, "alias."))
+		add_cmdname(&aliases, var + 6, strlen(var + 6));
+
+	return perf_default_config(var, value, cb);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+	int i;
+	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
+
+	for (i = 0; i < old->cnt; i++)
+		cmds->names[cmds->cnt++] = old->names[i];
+	free(old->names);
+	old->cnt = 0;
+	old->names = NULL;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+	int i, n, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+	memset(&aliases, 0, sizeof(aliases));
+
+	perf_config(perf_unknown_cmd_config, NULL);
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	add_cmd_list(&main_cmds, &aliases);
+	add_cmd_list(&main_cmds, &other_cmds);
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(main_cmds.names), cmdname_compare);
+	uniq(&main_cmds);
+
+	/* This reuses cmdname->len for similarity index */
+	for (i = 0; i < main_cmds.cnt; ++i)
+		main_cmds.names[i]->len =
+			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(*main_cmds.names), levenshtein_compare);
+
+	if (!main_cmds.cnt)
+		die ("Uh oh. Your system reports no Git commands at all.");
+
+	best_similarity = main_cmds.names[0]->len;
+	n = 1;
+	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+		++n;
+	if (autocorrect && n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a Git program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		if (autocorrect > 0) {
+			fprintf(stderr, "in %0.1f seconds automatically...\n",
+				(float)autocorrect/10.0);
+			poll(NULL, 0, autocorrect * 100);
+		}
+		return assumed;
+	}
+
+	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+	if (best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+
+	exit(1);
+}
+
+int cmd_version(int argc, const char **argv, const char *prefix)
+{
+	printf("perf version %s\n", perf_version_string);
+	return 0;
+}
diff --git a/Documentation/perf_counter/util/help.h b/Documentation/perf_counter/util/help.h
new file mode 100644
index 0000000..56bc154
--- /dev/null
+++ b/Documentation/perf_counter/util/help.h
@@ -0,0 +1,29 @@
+#ifndef HELP_H
+#define HELP_H
+
+struct cmdnames {
+	int alloc;
+	int cnt;
+	struct cmdname {
+		size_t len; /* also used for similarity index in help.c */
+		char name[FLEX_ARRAY];
+	} **names;
+};
+
+static inline void mput_char(char c, unsigned int num)
+{
+	while(num--)
+		putchar(c);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds);
+void add_cmdname(struct cmdnames *cmds, const char *name, int len);
+/* Here we require that excludes is a sorted list. */
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
+int is_in_cmdlist(struct cmdnames *c, const char *s);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds);
+
+#endif /* HELP_H */
diff --git a/Documentation/perf_counter/util/levenshtein.c b/Documentation/perf_counter/util/levenshtein.c
new file mode 100644
index 0000000..e521d15
--- /dev/null
+++ b/Documentation/perf_counter/util/levenshtein.c
@@ -0,0 +1,84 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings.  To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substition, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = malloc(sizeof(int) * (len2 + 1));
+	int *row1 = malloc(sizeof(int) * (len2 + 1));
+	int *row2 = malloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/Documentation/perf_counter/util/levenshtein.h b/Documentation/perf_counter/util/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/Documentation/perf_counter/util/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c
new file mode 100644
index 0000000..28b34c1
--- /dev/null
+++ b/Documentation/perf_counter/util/parse-options.c
@@ -0,0 +1,492 @@
+#include "util.h"
+#include "parse-options.h"
+#include "cache.h"
+
+#define OPT_SHORT 1
+#define OPT_UNSET 2
+
+static int opterror(const struct option *opt, const char *reason, int flags)
+{
+	if (flags & OPT_SHORT)
+		return error("switch `%c' %s", opt->short_name, reason);
+	if (flags & OPT_UNSET)
+		return error("option `no-%s' %s", opt->long_name, reason);
+	return error("option `%s' %s", opt->long_name, reason);
+}
+
+static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
+		   int flags, const char **arg)
+{
+	if (p->opt) {
+		*arg = p->opt;
+		p->opt = NULL;
+	} else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) {
+		*arg = (const char *)opt->defval;
+	} else if (p->argc > 1) {
+		p->argc--;
+		*arg = *++p->argv;
+	} else
+		return opterror(opt, "requires a value", flags);
+	return 0;
+}
+
+static int get_value(struct parse_opt_ctx_t *p,
+		     const struct option *opt, int flags)
+{
+	const char *s, *arg;
+	const int unset = flags & OPT_UNSET;
+
+	if (unset && p->opt)
+		return opterror(opt, "takes no value", flags);
+	if (unset && (opt->flags & PARSE_OPT_NONEG))
+		return opterror(opt, "isn't available", flags);
+
+	if (!(flags & OPT_SHORT) && p->opt) {
+		switch (opt->type) {
+		case OPTION_CALLBACK:
+			if (!(opt->flags & PARSE_OPT_NOARG))
+				break;
+			/* FALLTHROUGH */
+		case OPTION_BOOLEAN:
+		case OPTION_BIT:
+		case OPTION_SET_INT:
+		case OPTION_SET_PTR:
+			return opterror(opt, "takes no value", flags);
+		default:
+			break;
+		}
+	}
+
+	switch (opt->type) {
+	case OPTION_BIT:
+		if (unset)
+			*(int *)opt->value &= ~opt->defval;
+		else
+			*(int *)opt->value |= opt->defval;
+		return 0;
+
+	case OPTION_BOOLEAN:
+		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
+		return 0;
+
+	case OPTION_SET_INT:
+		*(int *)opt->value = unset ? 0 : opt->defval;
+		return 0;
+
+	case OPTION_SET_PTR:
+		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
+		return 0;
+
+	case OPTION_STRING:
+		if (unset)
+			*(const char **)opt->value = NULL;
+		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			*(const char **)opt->value = (const char *)opt->defval;
+		else
+			return get_arg(p, opt, flags, (const char **)opt->value);
+		return 0;
+
+	case OPTION_CALLBACK:
+		if (unset)
+			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_NOARG)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
+
+	case OPTION_INTEGER:
+		if (unset) {
+			*(int *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(int *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(int *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	default:
+		die("should not happen, someone must be hit on the forehead");
+	}
+}
+
+static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
+{
+	for (; options->type != OPTION_END; options++) {
+		if (options->short_name == *p->opt) {
+			p->opt = p->opt[1] ? p->opt + 1 : NULL;
+			return get_value(p, options, OPT_SHORT);
+		}
+	}
+	return -2;
+}
+
+static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
+                          const struct option *options)
+{
+	const char *arg_end = strchr(arg, '=');
+	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
+	int abbrev_flags = 0, ambiguous_flags = 0;
+
+	if (!arg_end)
+		arg_end = arg + strlen(arg);
+
+	for (; options->type != OPTION_END; options++) {
+		const char *rest;
+		int flags = 0;
+
+		if (!options->long_name)
+			continue;
+
+		rest = skip_prefix(arg, options->long_name);
+		if (options->type == OPTION_ARGUMENT) {
+			if (!rest)
+				continue;
+			if (*rest == '=')
+				return opterror(options, "takes no value", flags);
+			if (*rest)
+				continue;
+			p->out[p->cpidx++] = arg - 2;
+			return 0;
+		}
+		if (!rest) {
+			/* abbreviated? */
+			if (!strncmp(options->long_name, arg, arg_end - arg)) {
+is_abbreviated:
+				if (abbrev_option) {
+					/*
+					 * If this is abbreviated, it is
+					 * ambiguous. So when there is no
+					 * exact match later, we need to
+					 * error out.
+					 */
+					ambiguous_option = abbrev_option;
+					ambiguous_flags = abbrev_flags;
+				}
+				if (!(flags & OPT_UNSET) && *arg_end)
+					p->opt = arg_end + 1;
+				abbrev_option = options;
+				abbrev_flags = flags;
+				continue;
+			}
+			/* negated and abbreviated very much? */
+			if (!prefixcmp("no-", arg)) {
+				flags |= OPT_UNSET;
+				goto is_abbreviated;
+			}
+			/* negated? */
+			if (strncmp(arg, "no-", 3))
+				continue;
+			flags |= OPT_UNSET;
+			rest = skip_prefix(arg + 3, options->long_name);
+			/* abbreviated and negated? */
+			if (!rest && !prefixcmp(options->long_name, arg + 3))
+				goto is_abbreviated;
+			if (!rest)
+				continue;
+		}
+		if (*rest) {
+			if (*rest != '=')
+				continue;
+			p->opt = rest + 1;
+		}
+		return get_value(p, options, flags);
+	}
+
+	if (ambiguous_option)
+		return error("Ambiguous option: %s "
+			"(could be --%s%s or --%s%s)",
+			arg,
+			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
+			ambiguous_option->long_name,
+			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
+			abbrev_option->long_name);
+	if (abbrev_option)
+		return get_value(p, abbrev_option, abbrev_flags);
+	return -2;
+}
+
+static void check_typos(const char *arg, const struct option *options)
+{
+	if (strlen(arg) < 3)
+		return;
+
+	if (!prefixcmp(arg, "no-")) {
+		error ("did you mean `--%s` (with two dashes ?)", arg);
+		exit(129);
+	}
+
+	for (; options->type != OPTION_END; options++) {
+		if (!options->long_name)
+			continue;
+		if (!prefixcmp(options->long_name, arg)) {
+			error ("did you mean `--%s` (with two dashes ?)", arg);
+			exit(129);
+		}
+	}
+}
+
+void parse_options_start(struct parse_opt_ctx_t *ctx,
+			 int argc, const char **argv, int flags)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->argc = argc - 1;
+	ctx->argv = argv + 1;
+	ctx->out  = argv;
+	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
+	ctx->flags = flags;
+	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
+	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
+		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
+}
+
+static int usage_with_options_internal(const char * const *,
+				       const struct option *, int);
+
+int parse_options_step(struct parse_opt_ctx_t *ctx,
+		       const struct option *options,
+		       const char * const usagestr[])
+{
+	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
+
+	/* we must reset ->opt, unknown short option leave it dangling */
+	ctx->opt = NULL;
+
+	for (; ctx->argc; ctx->argc--, ctx->argv++) {
+		const char *arg = ctx->argv[0];
+
+		if (*arg != '-' || !arg[1]) {
+			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
+				break;
+			ctx->out[ctx->cpidx++] = ctx->argv[0];
+			continue;
+		}
+
+		if (arg[1] != '-') {
+			ctx->opt = arg + 1;
+			if (internal_help && *ctx->opt == 'h')
+				return parse_options_usage(usagestr, options);
+			switch (parse_short_opt(ctx, options)) {
+			case -1:
+				return parse_options_usage(usagestr, options);
+			case -2:
+				goto unknown;
+			}
+			if (ctx->opt)
+				check_typos(arg + 1, options);
+			while (ctx->opt) {
+				if (internal_help && *ctx->opt == 'h')
+					return parse_options_usage(usagestr, options);
+				switch (parse_short_opt(ctx, options)) {
+				case -1:
+					return parse_options_usage(usagestr, options);
+				case -2:
+					/* fake a short option thing to hide the fact that we may have
+					 * started to parse aggregated stuff
+					 *
+					 * This is leaky, too bad.
+					 */
+					ctx->argv[0] = strdup(ctx->opt - 1);
+					*(char *)ctx->argv[0] = '-';
+					goto unknown;
+				}
+			}
+			continue;
+		}
+
+		if (!arg[2]) { /* "--" */
+			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
+				ctx->argc--;
+				ctx->argv++;
+			}
+			break;
+		}
+
+		if (internal_help && !strcmp(arg + 2, "help-all"))
+			return usage_with_options_internal(usagestr, options, 1);
+		if (internal_help && !strcmp(arg + 2, "help"))
+			return parse_options_usage(usagestr, options);
+		switch (parse_long_opt(ctx, arg + 2, options)) {
+		case -1:
+			return parse_options_usage(usagestr, options);
+		case -2:
+			goto unknown;
+		}
+		continue;
+unknown:
+		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
+			return PARSE_OPT_UNKNOWN;
+		ctx->out[ctx->cpidx++] = ctx->argv[0];
+		ctx->opt = NULL;
+	}
+	return PARSE_OPT_DONE;
+}
+
+int parse_options_end(struct parse_opt_ctx_t *ctx)
+{
+	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
+	ctx->out[ctx->cpidx + ctx->argc] = NULL;
+	return ctx->cpidx + ctx->argc;
+}
+
+int parse_options(int argc, const char **argv, const struct option *options,
+		  const char * const usagestr[], int flags)
+{
+	struct parse_opt_ctx_t ctx;
+
+	parse_options_start(&ctx, argc, argv, flags);
+	switch (parse_options_step(&ctx, options, usagestr)) {
+	case PARSE_OPT_HELP:
+		exit(129);
+	case PARSE_OPT_DONE:
+		break;
+	default: /* PARSE_OPT_UNKNOWN */
+		if (ctx.argv[0][1] == '-') {
+			error("unknown option `%s'", ctx.argv[0] + 2);
+		} else {
+			error("unknown switch `%c'", *ctx.opt);
+		}
+		usage_with_options(usagestr, options);
+	}
+
+	return parse_options_end(&ctx);
+}
+
+#define USAGE_OPTS_WIDTH 24
+#define USAGE_GAP         2
+
+int usage_with_options_internal(const char * const *usagestr,
+				const struct option *opts, int full)
+{
+	if (!usagestr)
+		return PARSE_OPT_HELP;
+
+	fprintf(stderr, "usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "   or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+
+	if (opts->type != OPTION_GROUP)
+		fputc('\n', stderr);
+
+	for (; opts->type != OPTION_END; opts++) {
+		size_t pos;
+		int pad;
+
+		if (opts->type == OPTION_GROUP) {
+			fputc('\n', stderr);
+			if (*opts->help)
+				fprintf(stderr, "%s\n", opts->help);
+			continue;
+		}
+		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+			continue;
+
+		pos = fprintf(stderr, "    ");
+		if (opts->short_name)
+			pos += fprintf(stderr, "-%c", opts->short_name);
+		if (opts->long_name && opts->short_name)
+			pos += fprintf(stderr, ", ");
+		if (opts->long_name)
+			pos += fprintf(stderr, "--%s", opts->long_name);
+
+		switch (opts->type) {
+		case OPTION_ARGUMENT:
+			break;
+		case OPTION_INTEGER:
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=<n>]");
+				else
+					pos += fprintf(stderr, "[<n>]");
+			else
+				pos += fprintf(stderr, " <n>");
+			break;
+		case OPTION_CALLBACK:
+			if (opts->flags & PARSE_OPT_NOARG)
+				break;
+			/* FALLTHROUGH */
+		case OPTION_STRING:
+			if (opts->argh) {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=<%s>]", opts->argh);
+					else
+						pos += fprintf(stderr, "[<%s>]", opts->argh);
+				else
+					pos += fprintf(stderr, " <%s>", opts->argh);
+			} else {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=...]");
+					else
+						pos += fprintf(stderr, "[...]");
+				else
+					pos += fprintf(stderr, " ...");
+			}
+			break;
+		default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */
+			break;
+		}
+
+		if (pos <= USAGE_OPTS_WIDTH)
+			pad = USAGE_OPTS_WIDTH - pos;
+		else {
+			fputc('\n', stderr);
+			pad = USAGE_OPTS_WIDTH;
+		}
+		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+	}
+	fputc('\n', stderr);
+
+	return PARSE_OPT_HELP;
+}
+
+void usage_with_options(const char * const *usagestr,
+			const struct option *opts)
+{
+	usage_with_options_internal(usagestr, opts, 0);
+	exit(129);
+}
+
+int parse_options_usage(const char * const *usagestr,
+			const struct option *opts)
+{
+	return usage_with_options_internal(usagestr, opts, 0);
+}
+
+
+int parse_opt_verbosity_cb(const struct option *opt, const char *arg,
+			   int unset)
+{
+	int *target = opt->value;
+
+	if (unset)
+		/* --no-quiet, --no-verbose */
+		*target = 0;
+	else if (opt->short_name == 'v') {
+		if (*target >= 0)
+			(*target)++;
+		else
+			*target = 1;
+	} else {
+		if (*target <= 0)
+			(*target)--;
+		else
+			*target = -1;
+	}
+	return 0;
+}
diff --git a/Documentation/perf_counter/util/parse-options.h b/Documentation/perf_counter/util/parse-options.h
new file mode 100644
index 0000000..a81c7fa
--- /dev/null
+++ b/Documentation/perf_counter/util/parse-options.h
@@ -0,0 +1,172 @@
+#ifndef PARSE_OPTIONS_H
+#define PARSE_OPTIONS_H
+
+enum parse_opt_type {
+	/* special types */
+	OPTION_END,
+	OPTION_ARGUMENT,
+	OPTION_GROUP,
+	/* options with no arguments */
+	OPTION_BIT,
+	OPTION_BOOLEAN, /* _INCR would have been a better name */
+	OPTION_SET_INT,
+	OPTION_SET_PTR,
+	/* options with arguments (usually) */
+	OPTION_STRING,
+	OPTION_INTEGER,
+	OPTION_CALLBACK,
+};
+
+enum parse_opt_flags {
+	PARSE_OPT_KEEP_DASHDASH = 1,
+	PARSE_OPT_STOP_AT_NON_OPTION = 2,
+	PARSE_OPT_KEEP_ARGV0 = 4,
+	PARSE_OPT_KEEP_UNKNOWN = 8,
+	PARSE_OPT_NO_INTERNAL_HELP = 16,
+};
+
+enum parse_opt_option_flags {
+	PARSE_OPT_OPTARG  = 1,
+	PARSE_OPT_NOARG   = 2,
+	PARSE_OPT_NONEG   = 4,
+	PARSE_OPT_HIDDEN  = 8,
+	PARSE_OPT_LASTARG_DEFAULT = 16,
+};
+
+struct option;
+typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
+
+/*
+ * `type`::
+ *   holds the type of the option, you must have an OPTION_END last in your
+ *   array.
+ *
+ * `short_name`::
+ *   the character to use as a short option name, '\0' if none.
+ *
+ * `long_name`::
+ *   the long option name, without the leading dashes, NULL if none.
+ *
+ * `value`::
+ *   stores pointers to the values to be filled.
+ *
+ * `argh`::
+ *   token to explain the kind of argument this option wants. Keep it
+ *   homogenous across the repository.
+ *
+ * `help`::
+ *   the short help associated to what the option does.
+ *   Must never be NULL (except for OPTION_END).
+ *   OPTION_GROUP uses this pointer to store the group header.
+ *
+ * `flags`::
+ *   mask of parse_opt_option_flags.
+ *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
+ *   PARSE_OPT_NONEG: says that this option cannot be negated
+ *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
+ *                    the long one.
+ *
+ * `callback`::
+ *   pointer to the callback to use for OPTION_CALLBACK.
+ *
+ * `defval`::
+ *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
+ *   OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in
+ *   the value when met.
+ *   CALLBACKS can use it like they want.
+ */
+struct option {
+	enum parse_opt_type type;
+	int short_name;
+	const char *long_name;
+	void *value;
+	const char *argh;
+	const char *help;
+
+	int flags;
+	parse_opt_cb *callback;
+	intptr_t defval;
+};
+
+#define OPT_END()                   { OPTION_END }
+#define OPT_ARGUMENT(l, h)          { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) }
+#define OPT_GROUP(h)                { OPTION_GROUP, 0, NULL, NULL, NULL, (h) }
+#define OPT_BIT(s, l, v, h, b)      { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) }
+#define OPT_BOOLEAN(s, l, v, h)     { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) }
+#define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
+#define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
+#define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
+#define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
+#define OPT_DATE(s, l, v, h) \
+	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
+	  parse_opt_approxidate_cb }
+#define OPT_CALLBACK(s, l, v, a, h, f) \
+	{ OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) }
+
+/* parse_options() will filter out the processed options and leave the
+ * non-option argments in argv[].
+ * Returns the number of arguments left in argv[].
+ */
+extern int parse_options(int argc, const char **argv,
+                         const struct option *options,
+                         const char * const usagestr[], int flags);
+
+extern NORETURN void usage_with_options(const char * const *usagestr,
+                                        const struct option *options);
+
+/*----- incremantal advanced APIs -----*/
+
+enum {
+	PARSE_OPT_HELP = -1,
+	PARSE_OPT_DONE,
+	PARSE_OPT_UNKNOWN,
+};
+
+/*
+ * It's okay for the caller to consume argv/argc in the usual way.
+ * Other fields of that structure are private to parse-options and should not
+ * be modified in any way.
+ */
+struct parse_opt_ctx_t {
+	const char **argv;
+	const char **out;
+	int argc, cpidx;
+	const char *opt;
+	int flags;
+};
+
+extern int parse_options_usage(const char * const *usagestr,
+			       const struct option *opts);
+
+extern void parse_options_start(struct parse_opt_ctx_t *ctx,
+				int argc, const char **argv, int flags);
+
+extern int parse_options_step(struct parse_opt_ctx_t *ctx,
+			      const struct option *options,
+			      const char * const usagestr[]);
+
+extern int parse_options_end(struct parse_opt_ctx_t *ctx);
+
+
+/*----- some often used options -----*/
+extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
+extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
+extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
+
+#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
+#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
+#define OPT__VERBOSITY(var) \
+	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
+	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
+#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
+#define OPT__ABBREV(var)  \
+	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
+	  "use <n> digits to display SHA-1s", \
+	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
+
+extern const char *parse_options_fix_filename(const char *prefix, const char *file);
+
+#endif
diff --git a/Documentation/perf_counter/util/path.c b/Documentation/perf_counter/util/path.c
new file mode 100644
index 0000000..a501a40
--- /dev/null
+++ b/Documentation/perf_counter/util/path.c
@@ -0,0 +1,353 @@
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "cache.h"
+
+static char bad_path[] = "/bad-path/";
+/*
+ * Two hacks:
+ */
+
+static char *get_perf_dir(void)
+{
+	return ".";
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+
+
+static char *get_pathname(void)
+{
+	static char pathname_array[4][PATH_MAX];
+	static int index;
+	return pathname_array[3 & ++index];
+}
+
+static char *cleanup_path(char *path)
+{
+	/* Clean it up */
+	if (!memcmp(path, "./", 2)) {
+		path += 2;
+		while (*path == '/')
+			path++;
+	}
+	return path;
+}
+
+char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+
+	va_start(args, fmt);
+	len = vsnprintf(buf, n, fmt, args);
+	va_end(args);
+	if (len >= n) {
+		strlcpy(buf, bad_path, n);
+		return buf;
+	}
+	return cleanup_path(buf);
+}
+
+static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
+{
+	const char *perf_dir = get_perf_dir();
+	size_t len;
+
+	len = strlen(perf_dir);
+	if (n < len + 1)
+		goto bad;
+	memcpy(buf, perf_dir, len);
+	if (len && !is_dir_sep(perf_dir[len-1]))
+		buf[len++] = '/';
+	len += vsnprintf(buf + len, n - len, fmt, args);
+	if (len >= n)
+		goto bad;
+	return cleanup_path(buf);
+bad:
+	strlcpy(buf, bad_path, n);
+	return buf;
+}
+
+char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(buf, n, fmt, args);
+	va_end(args);
+	return buf;
+}
+
+char *perf_pathdup(const char *fmt, ...)
+{
+	char path[PATH_MAX];
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(path, sizeof(path), fmt, args);
+	va_end(args);
+	return xstrdup(path);
+}
+
+char *mkpath(const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+	char *pathname = get_pathname();
+
+	va_start(args, fmt);
+	len = vsnprintf(pathname, PATH_MAX, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+char *perf_path(const char *fmt, ...)
+{
+	const char *perf_dir = get_perf_dir();
+	char *pathname = get_pathname();
+	va_list args;
+	unsigned len;
+
+	len = strlen(perf_dir);
+	if (len > PATH_MAX-100)
+		return bad_path;
+	memcpy(pathname, perf_dir, len);
+	if (len && perf_dir[len-1] != '/')
+		pathname[len++] = '/';
+	va_start(args, fmt);
+	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+int perf_mkstemp(char *path, size_t len, const char *template)
+{
+	const char *tmp;
+	size_t n;
+
+	tmp = getenv("TMPDIR");
+	if (!tmp)
+		tmp = "/tmp";
+	n = snprintf(path, len, "%s/%s", tmp, template);
+	if (len <= n) {
+		errno = ENAMETOOLONG;
+		return -1;
+	}
+	return mkstemp(path);
+}
+
+
+const char *make_relative_path(const char *abs, const char *base)
+{
+	static char buf[PATH_MAX + 1];
+	int baselen;
+	if (!base)
+		return abs;
+	baselen = strlen(base);
+	if (prefixcmp(abs, base))
+		return abs;
+	if (abs[baselen] == '/')
+		baselen++;
+	else if (base[baselen - 1] != '/')
+		return abs;
+	strcpy(buf, abs + baselen);
+	return buf;
+}
+
+/*
+ * It is okay if dst == src, but they should not overlap otherwise.
+ *
+ * Performs the following normalizations on src, storing the result in dst:
+ * - Ensures that components are separated by '/' (Windows only)
+ * - Squashes sequences of '/'.
+ * - Removes "." components.
+ * - Removes ".." components, and the components the precede them.
+ * Returns failure (non-zero) if a ".." component appears as first path
+ * component anytime during the normalization. Otherwise, returns success (0).
+ *
+ * Note that this function is purely textual.  It does not follow symlinks,
+ * verify the existence of the path, or make any system calls.
+ */
+int normalize_path_copy(char *dst, const char *src)
+{
+	char *dst0;
+
+	if (has_dos_drive_prefix(src)) {
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
+	dst0 = dst;
+
+	if (is_dir_sep(*src)) {
+		*dst++ = '/';
+		while (is_dir_sep(*src))
+			src++;
+	}
+
+	for (;;) {
+		char c = *src;
+
+		/*
+		 * A path component that begins with . could be
+		 * special:
+		 * (1) "." and ends   -- ignore and terminate.
+		 * (2) "./"           -- ignore them, eat slash and continue.
+		 * (3) ".." and ends  -- strip one and terminate.
+		 * (4) "../"          -- strip one, eat slash and continue.
+		 */
+		if (c == '.') {
+			if (!src[1]) {
+				/* (1) */
+				src++;
+			} else if (is_dir_sep(src[1])) {
+				/* (2) */
+				src += 2;
+				while (is_dir_sep(*src))
+					src++;
+				continue;
+			} else if (src[1] == '.') {
+				if (!src[2]) {
+					/* (3) */
+					src += 2;
+					goto up_one;
+				} else if (is_dir_sep(src[2])) {
+					/* (4) */
+					src += 3;
+					while (is_dir_sep(*src))
+						src++;
+					goto up_one;
+				}
+			}
+		}
+
+		/* copy up to the next '/', and eat all '/' */
+		while ((c = *src++) != '\0' && !is_dir_sep(c))
+			*dst++ = c;
+		if (is_dir_sep(c)) {
+			*dst++ = '/';
+			while (is_dir_sep(c))
+				c = *src++;
+			src--;
+		} else if (!c)
+			break;
+		continue;
+
+	up_one:
+		/*
+		 * dst0..dst is prefix portion, and dst[-1] is '/';
+		 * go up one level.
+		 */
+		dst--;	/* go to trailing '/' */
+		if (dst <= dst0)
+			return -1;
+		/* Windows: dst[-1] cannot be backslash anymore */
+		while (dst0 < dst && dst[-1] != '/')
+			dst--;
+	}
+	*dst = '\0';
+	return 0;
+}
+
+/*
+ * path = Canonical absolute path
+ * prefix_list = Colon-separated list of absolute paths
+ *
+ * Determines, for each path in prefix_list, whether the "prefix" really
+ * is an ancestor directory of path.  Returns the length of the longest
+ * ancestor directory, excluding any trailing slashes, or -1 if no prefix
+ * is an ancestor.  (Note that this means 0 is returned if prefix_list is
+ * "/".) "/foo" is not considered an ancestor of "/foobar".  Directories
+ * are not considered to be their own ancestors.  path must be in a
+ * canonical form: empty components, or "." or ".." components are not
+ * allowed.  prefix_list may be null, which is like "".
+ */
+int longest_ancestor_length(const char *path, const char *prefix_list)
+{
+	char buf[PATH_MAX+1];
+	const char *ceil, *colon;
+	int len, max_len = -1;
+
+	if (prefix_list == NULL || !strcmp(path, "/"))
+		return -1;
+
+	for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
+		for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
+		len = colon - ceil;
+		if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
+			continue;
+		strlcpy(buf, ceil, len+1);
+		if (normalize_path_copy(buf, buf) < 0)
+			continue;
+		len = strlen(buf);
+		if (len > 0 && buf[len-1] == '/')
+			buf[--len] = '\0';
+
+		if (!strncmp(path, buf, len) &&
+		    path[len] == '/' &&
+		    len > max_len) {
+			max_len = len;
+		}
+	}
+
+	return max_len;
+}
+
+/* strip arbitrary amount of directory separators at end of path */
+static inline int chomp_trailing_dir_sep(const char *path, int len)
+{
+	while (len && is_dir_sep(path[len - 1]))
+		len--;
+	return len;
+}
+
+/*
+ * If path ends with suffix (complete path components), returns the
+ * part before suffix (sans trailing directory separators).
+ * Otherwise returns NULL.
+ */
+char *strip_path_suffix(const char *path, const char *suffix)
+{
+	int path_len = strlen(path), suffix_len = strlen(suffix);
+
+	while (suffix_len) {
+		if (!path_len)
+			return NULL;
+
+		if (is_dir_sep(path[path_len - 1])) {
+			if (!is_dir_sep(suffix[suffix_len - 1]))
+				return NULL;
+			path_len = chomp_trailing_dir_sep(path, path_len);
+			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
+		}
+		else if (path[--path_len] != suffix[--suffix_len])
+			return NULL;
+	}
+
+	if (path_len && !is_dir_sep(path[path_len - 1]))
+		return NULL;
+	return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
+}
diff --git a/Documentation/perf_counter/util/quote.c b/Documentation/perf_counter/util/quote.c
new file mode 100644
index 0000000..7a49fcf
--- /dev/null
+++ b/Documentation/perf_counter/util/quote.c
@@ -0,0 +1,478 @@
+#include "cache.h"
+#include "quote.h"
+
+int quote_path_fully = 1;
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ *
+ * E.g.
+ *  original     sq_quote     result
+ *  name     ==> name      ==> 'name'
+ *  a b      ==> a b       ==> 'a b'
+ *  a'b      ==> a'\''b    ==> 'a'\''b'
+ *  a!b      ==> a'\!'b    ==> 'a'\!'b'
+ */
+static inline int need_bs_quote(char c)
+{
+	return (c == '\'' || c == '!');
+}
+
+void sq_quote_buf(struct strbuf *dst, const char *src)
+{
+	char *to_free = NULL;
+
+	if (dst->buf == src)
+		to_free = strbuf_detach(dst, NULL);
+
+	strbuf_addch(dst, '\'');
+	while (*src) {
+		size_t len = strcspn(src, "'!");
+		strbuf_add(dst, src, len);
+		src += len;
+		while (need_bs_quote(*src)) {
+			strbuf_addstr(dst, "'\\");
+			strbuf_addch(dst, *src++);
+			strbuf_addch(dst, '\'');
+		}
+	}
+	strbuf_addch(dst, '\'');
+	free(to_free);
+}
+
+void sq_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('\'', stream);
+	while ((c = *src++)) {
+		if (need_bs_quote(c)) {
+			fputs("'\\", stream);
+			fputc(c, stream);
+			fputc('\'', stream);
+		} else {
+			fputc(c, stream);
+		}
+	}
+	fputc('\'', stream);
+}
+
+void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+{
+	int i;
+
+	/* Copy into destination buffer. */
+	strbuf_grow(dst, 255);
+	for (i = 0; argv[i]; ++i) {
+		strbuf_addch(dst, ' ');
+		sq_quote_buf(dst, argv[i]);
+		if (maxlen && dst->len > maxlen)
+			die("Too many or long arguments");
+	}
+}
+
+char *sq_dequote_step(char *arg, char **next)
+{
+	char *dst = arg;
+	char *src = arg;
+	char c;
+
+	if (*src != '\'')
+		return NULL;
+	for (;;) {
+		c = *++src;
+		if (!c)
+			return NULL;
+		if (c != '\'') {
+			*dst++ = c;
+			continue;
+		}
+		/* We stepped out of sq */
+		switch (*++src) {
+		case '\0':
+			*dst = 0;
+			if (next)
+				*next = NULL;
+			return arg;
+		case '\\':
+			c = *++src;
+			if (need_bs_quote(c) && *++src == '\'') {
+				*dst++ = c;
+				continue;
+			}
+		/* Fallthrough */
+		default:
+			if (!next || !isspace(*src))
+				return NULL;
+			do {
+				c = *++src;
+			} while (isspace(c));
+			*dst = 0;
+			*next = src;
+			return arg;
+		}
+	}
+}
+
+char *sq_dequote(char *arg)
+{
+	return sq_dequote_step(arg, NULL);
+}
+
+int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
+{
+	char *next = arg;
+
+	if (!*arg)
+		return 0;
+	do {
+		char *dequoted = sq_dequote_step(next, &next);
+		if (!dequoted)
+			return -1;
+		ALLOC_GROW(*argv, *nr + 1, *alloc);
+		(*argv)[(*nr)++] = dequoted;
+	} while (next);
+
+	return 0;
+}
+
+/* 1 means: quote as octal
+ * 0 means: quote as octal if (quote_path_fully)
+ * -1 means: never quote
+ * c: quote as "\\c"
+ */
+#define X8(x)   x, x, x, x, x, x, x, x
+#define X16(x)  X8(x), X8(x)
+static signed char const sq_lookup[256] = {
+	/*           0    1    2    3    4    5    6    7 */
+	/* 0x00 */   1,   1,   1,   1,   1,   1,   1, 'a',
+	/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r',   1,   1,
+	/* 0x10 */ X16(1),
+	/* 0x20 */  -1,  -1, '"',  -1,  -1,  -1,  -1,  -1,
+	/* 0x28 */ X16(-1), X16(-1), X16(-1),
+	/* 0x58 */  -1,  -1,  -1,  -1,'\\',  -1,  -1,  -1,
+	/* 0x60 */ X16(-1), X8(-1),
+	/* 0x78 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,   1,
+	/* 0x80 */ /* set to 0 */
+};
+
+static inline int sq_must_quote(char c)
+{
+	return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
+}
+
+/* returns the longest prefix not needing a quote up to maxlen if positive.
+   This stops at the first \0 because it's marked as a character needing an
+   escape */
+static size_t next_quote_pos(const char *s, ssize_t maxlen)
+{
+	size_t len;
+	if (maxlen < 0) {
+		for (len = 0; !sq_must_quote(s[len]); len++);
+	} else {
+		for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
+	}
+	return len;
+}
+
+/*
+ * C-style name quoting.
+ *
+ * (1) if sb and fp are both NULL, inspect the input name and counts the
+ *     number of bytes that are needed to hold c_style quoted version of name,
+ *     counting the double quotes around it but not terminating NUL, and
+ *     returns it.
+ *     However, if name does not need c_style quoting, it returns 0.
+ *
+ * (2) if sb or fp are not NULL, it emits the c_style quoted version
+ *     of name, enclosed with double quotes if asked and needed only.
+ *     Return value is the same as in (1).
+ */
+static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
+                                    struct strbuf *sb, FILE *fp, int no_dq)
+{
+#undef EMIT
+#define EMIT(c)                                 \
+	do {                                        \
+		if (sb) strbuf_addch(sb, (c));          \
+		if (fp) fputc((c), fp);                 \
+		count++;                                \
+	} while (0)
+#define EMITBUF(s, l)                           \
+	do {                                        \
+		if (sb) strbuf_add(sb, (s), (l));       \
+		if (fp) fwrite((s), (l), 1, fp);        \
+		count += (l);                           \
+	} while (0)
+
+	size_t len, count = 0;
+	const char *p = name;
+
+	for (;;) {
+		int ch;
+
+		len = next_quote_pos(p, maxlen);
+		if (len == maxlen || !p[len])
+			break;
+
+		if (!no_dq && p == name)
+			EMIT('"');
+
+		EMITBUF(p, len);
+		EMIT('\\');
+		p += len;
+		ch = (unsigned char)*p++;
+		if (sq_lookup[ch] >= ' ') {
+			EMIT(sq_lookup[ch]);
+		} else {
+			EMIT(((ch >> 6) & 03) + '0');
+			EMIT(((ch >> 3) & 07) + '0');
+			EMIT(((ch >> 0) & 07) + '0');
+		}
+	}
+
+	EMITBUF(p, len);
+	if (p == name)   /* no ending quote needed */
+		return 0;
+
+	if (!no_dq)
+		EMIT('"');
+	return count;
+}
+
+size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
+{
+	return quote_c_style_counted(name, -1, sb, fp, nodq);
+}
+
+void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
+{
+	if (quote_c_style(prefix, NULL, NULL, 0) ||
+	    quote_c_style(path, NULL, NULL, 0)) {
+		if (!nodq)
+			strbuf_addch(sb, '"');
+		quote_c_style(prefix, sb, NULL, 1);
+		quote_c_style(path, sb, NULL, 1);
+		if (!nodq)
+			strbuf_addch(sb, '"');
+	} else {
+		strbuf_addstr(sb, prefix);
+		strbuf_addstr(sb, path);
+	}
+}
+
+void write_name_quoted(const char *name, FILE *fp, int terminator)
+{
+	if (terminator) {
+		quote_c_style(name, NULL, fp, 0);
+	} else {
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *fp, int terminator)
+{
+	int needquote = 0;
+
+	if (terminator) {
+		needquote = next_quote_pos(pfx, pfxlen) < pfxlen
+			|| name[next_quote_pos(name, -1)];
+	}
+	if (needquote) {
+		fputc('"', fp);
+		quote_c_style_counted(pfx, pfxlen, NULL, fp, 1);
+		quote_c_style(name, NULL, fp, 1);
+		fputc('"', fp);
+	} else {
+		fwrite(pfx, pfxlen, 1, fp);
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix)
+{
+	int needquote;
+
+	if (len < 0)
+		len = strlen(in);
+
+	/* "../" prefix itself does not need quoting, but "in" might. */
+	needquote = next_quote_pos(in, len) < len;
+	strbuf_setlen(out, 0);
+	strbuf_grow(out, len);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (prefix) {
+		int off = 0;
+		while (prefix[off] && off < len && prefix[off] == in[off])
+			if (prefix[off] == '/') {
+				prefix += off + 1;
+				in += off + 1;
+				len -= off + 1;
+				off = 0;
+			} else
+				off++;
+
+		for (; *prefix; prefix++)
+			if (*prefix == '/')
+				strbuf_addstr(out, "../");
+	}
+
+	quote_c_style_counted (in, len, out, NULL, 1);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (!out->len)
+		strbuf_addstr(out, "./");
+
+	return out->buf;
+}
+
+/*
+ * C-style name unquoting.
+ *
+ * Quoted should point at the opening double quote.
+ * + Returns 0 if it was able to unquote the string properly, and appends the
+ *   result in the strbuf `sb'.
+ * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
+ *   that this function will allocate memory in the strbuf, so calling
+ *   strbuf_release is mandatory whichever result unquote_c_style returns.
+ *
+ * Updates endp pointer to point at one past the ending double quote if given.
+ */
+int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
+{
+	size_t oldlen = sb->len, len;
+	int ch, ac;
+
+	if (*quoted++ != '"')
+		return -1;
+
+	for (;;) {
+		len = strcspn(quoted, "\"\\");
+		strbuf_add(sb, quoted, len);
+		quoted += len;
+
+		switch (*quoted++) {
+		  case '"':
+			if (endp)
+				*endp = quoted;
+			return 0;
+		  case '\\':
+			break;
+		  default:
+			goto error;
+		}
+
+		switch ((ch = *quoted++)) {
+		case 'a': ch = '\a'; break;
+		case 'b': ch = '\b'; break;
+		case 'f': ch = '\f'; break;
+		case 'n': ch = '\n'; break;
+		case 'r': ch = '\r'; break;
+		case 't': ch = '\t'; break;
+		case 'v': ch = '\v'; break;
+
+		case '\\': case '"':
+			break; /* verbatim */
+
+		/* octal values with first digit over 4 overflow */
+		case '0': case '1': case '2': case '3':
+					ac = ((ch - '0') << 6);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= ((ch - '0') << 3);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= (ch - '0');
+					ch = ac;
+					break;
+				default:
+			goto error;
+			}
+		strbuf_addch(sb, ch);
+		}
+
+  error:
+	strbuf_setlen(sb, oldlen);
+	return -1;
+}
+
+/* quoting as a string literal for other languages */
+
+void perl_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void python_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char nl = '\n';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == nl) {
+			fputc(bq, stream);
+			fputc('n', stream);
+			continue;
+		}
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void tcl_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('"', stream);
+	while ((c = *src++)) {
+		switch (c) {
+		case '[': case ']':
+		case '{': case '}':
+		case '$': case '\\': case '"':
+			fputc('\\', stream);
+		default:
+			fputc(c, stream);
+			break;
+		case '\f':
+			fputs("\\f", stream);
+			break;
+		case '\r':
+			fputs("\\r", stream);
+			break;
+		case '\n':
+			fputs("\\n", stream);
+			break;
+		case '\t':
+			fputs("\\t", stream);
+			break;
+		case '\v':
+			fputs("\\v", stream);
+			break;
+		}
+	}
+	fputc('"', stream);
+}
diff --git a/Documentation/perf_counter/util/quote.h b/Documentation/perf_counter/util/quote.h
new file mode 100644
index 0000000..5dfad89
--- /dev/null
+++ b/Documentation/perf_counter/util/quote.h
@@ -0,0 +1,68 @@
+#ifndef QUOTE_H
+#define QUOTE_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ * single quote pair.
+ *
+ * For example, if you are passing the result to system() as an
+ * argument:
+ *
+ * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
+ *
+ * would be appropriate.  If the system() is going to call ssh to
+ * run the command on the other side:
+ *
+ * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
+ * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
+ *
+ * Note that the above examples leak memory!  Remember to free result from
+ * sq_quote() in a real application.
+ *
+ * sq_quote_buf() writes to an existing buffer of specified size; it
+ * will return the number of characters that would have been written
+ * excluding the final null regardless of the buffer size.
+ */
+
+extern void sq_quote_print(FILE *stream, const char *src);
+
+extern void sq_quote_buf(struct strbuf *, const char *src);
+extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+
+/* This unwraps what sq_quote() produces in place, but returns
+ * NULL if the input does not look like what sq_quote would have
+ * produced.
+ */
+extern char *sq_dequote(char *);
+
+/*
+ * Same as the above, but can be used to unwrap many arguments in the
+ * same string separated by space. "next" is changed to point to the
+ * next argument that should be passed as first parameter. When there
+ * is no more argument to be dequoted, "next" is updated to point to NULL.
+ */
+extern char *sq_dequote_step(char *arg, char **next);
+extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc);
+
+extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
+extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq);
+extern void quote_two_c_style(struct strbuf *, const char *, const char *, int);
+
+extern void write_name_quoted(const char *name, FILE *, int terminator);
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *, int terminator);
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix);
+
+/* quoting as a string literal for other languages */
+extern void perl_quote_print(FILE *stream, const char *src);
+extern void python_quote_print(FILE *stream, const char *src);
+extern void tcl_quote_print(FILE *stream, const char *src);
+
+#endif
diff --git a/Documentation/perf_counter/util/run-command.c b/Documentation/perf_counter/util/run-command.c
new file mode 100644
index 0000000..b2f5e85
--- /dev/null
+++ b/Documentation/perf_counter/util/run-command.c
@@ -0,0 +1,395 @@
+#include "cache.h"
+#include "run-command.h"
+#include "exec_cmd.h"
+
+static inline void close_pair(int fd[2])
+{
+	close(fd[0]);
+	close(fd[1]);
+}
+
+static inline void dup_devnull(int to)
+{
+	int fd = open("/dev/null", O_RDWR);
+	dup2(fd, to);
+	close(fd);
+}
+
+int start_command(struct child_process *cmd)
+{
+	int need_in, need_out, need_err;
+	int fdin[2], fdout[2], fderr[2];
+
+	/*
+	 * In case of errors we must keep the promise to close FDs
+	 * that have been passed in via ->in and ->out.
+	 */
+
+	need_in = !cmd->no_stdin && cmd->in < 0;
+	if (need_in) {
+		if (pipe(fdin) < 0) {
+			if (cmd->out > 0)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->in = fdin[1];
+	}
+
+	need_out = !cmd->no_stdout
+		&& !cmd->stdout_to_stderr
+		&& cmd->out < 0;
+	if (need_out) {
+		if (pipe(fdout) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->out = fdout[0];
+	}
+
+	need_err = !cmd->no_stderr && cmd->err < 0;
+	if (need_err) {
+		if (pipe(fderr) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			if (need_out)
+				close_pair(fdout);
+			else if (cmd->out)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->err = fderr[0];
+	}
+
+#ifndef __MINGW32__
+	fflush(NULL);
+	cmd->pid = fork();
+	if (!cmd->pid) {
+		if (cmd->no_stdin)
+			dup_devnull(0);
+		else if (need_in) {
+			dup2(fdin[0], 0);
+			close_pair(fdin);
+		} else if (cmd->in) {
+			dup2(cmd->in, 0);
+			close(cmd->in);
+		}
+
+		if (cmd->no_stderr)
+			dup_devnull(2);
+		else if (need_err) {
+			dup2(fderr[1], 2);
+			close_pair(fderr);
+		}
+
+		if (cmd->no_stdout)
+			dup_devnull(1);
+		else if (cmd->stdout_to_stderr)
+			dup2(2, 1);
+		else if (need_out) {
+			dup2(fdout[1], 1);
+			close_pair(fdout);
+		} else if (cmd->out > 1) {
+			dup2(cmd->out, 1);
+			close(cmd->out);
+		}
+
+		if (cmd->dir && chdir(cmd->dir))
+			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
+			    cmd->dir, strerror(errno));
+		if (cmd->env) {
+			for (; *cmd->env; cmd->env++) {
+				if (strchr(*cmd->env, '='))
+					putenv((char*)*cmd->env);
+				else
+					unsetenv(*cmd->env);
+			}
+		}
+		if (cmd->preexec_cb)
+			cmd->preexec_cb();
+		if (cmd->perf_cmd) {
+			execv_perf_cmd(cmd->argv);
+		} else {
+			execvp(cmd->argv[0], (char *const*) cmd->argv);
+		}
+		exit(127);
+	}
+#else
+	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
+	const char **sargv = cmd->argv;
+	char **env = environ;
+
+	if (cmd->no_stdin) {
+		s0 = dup(0);
+		dup_devnull(0);
+	} else if (need_in) {
+		s0 = dup(0);
+		dup2(fdin[0], 0);
+	} else if (cmd->in) {
+		s0 = dup(0);
+		dup2(cmd->in, 0);
+	}
+
+	if (cmd->no_stderr) {
+		s2 = dup(2);
+		dup_devnull(2);
+	} else if (need_err) {
+		s2 = dup(2);
+		dup2(fderr[1], 2);
+	}
+
+	if (cmd->no_stdout) {
+		s1 = dup(1);
+		dup_devnull(1);
+	} else if (cmd->stdout_to_stderr) {
+		s1 = dup(1);
+		dup2(2, 1);
+	} else if (need_out) {
+		s1 = dup(1);
+		dup2(fdout[1], 1);
+	} else if (cmd->out > 1) {
+		s1 = dup(1);
+		dup2(cmd->out, 1);
+	}
+
+	if (cmd->dir)
+		die("chdir in start_command() not implemented");
+	if (cmd->env) {
+		env = copy_environ();
+		for (; *cmd->env; cmd->env++)
+			env = env_setenv(env, *cmd->env);
+	}
+
+	if (cmd->perf_cmd) {
+		cmd->argv = prepare_perf_cmd(cmd->argv);
+	}
+
+	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
+
+	if (cmd->env)
+		free_environ(env);
+	if (cmd->perf_cmd)
+		free(cmd->argv);
+
+	cmd->argv = sargv;
+	if (s0 >= 0)
+		dup2(s0, 0), close(s0);
+	if (s1 >= 0)
+		dup2(s1, 1), close(s1);
+	if (s2 >= 0)
+		dup2(s2, 2), close(s2);
+#endif
+
+	if (cmd->pid < 0) {
+		int err = errno;
+		if (need_in)
+			close_pair(fdin);
+		else if (cmd->in)
+			close(cmd->in);
+		if (need_out)
+			close_pair(fdout);
+		else if (cmd->out)
+			close(cmd->out);
+		if (need_err)
+			close_pair(fderr);
+		return err == ENOENT ?
+			-ERR_RUN_COMMAND_EXEC :
+			-ERR_RUN_COMMAND_FORK;
+	}
+
+	if (need_in)
+		close(fdin[0]);
+	else if (cmd->in)
+		close(cmd->in);
+
+	if (need_out)
+		close(fdout[1]);
+	else if (cmd->out)
+		close(cmd->out);
+
+	if (need_err)
+		close(fderr[1]);
+
+	return 0;
+}
+
+static int wait_or_whine(pid_t pid)
+{
+	for (;;) {
+		int status, code;
+		pid_t waiting = waitpid(pid, &status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			error("waitpid failed (%s)", strerror(errno));
+			return -ERR_RUN_COMMAND_WAITPID;
+		}
+		if (waiting != pid)
+			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+		if (WIFSIGNALED(status))
+			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+
+		if (!WIFEXITED(status))
+			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+		code = WEXITSTATUS(status);
+		switch (code) {
+		case 127:
+			return -ERR_RUN_COMMAND_EXEC;
+		case 0:
+			return 0;
+		default:
+			return -code;
+		}
+	}
+}
+
+int finish_command(struct child_process *cmd)
+{
+	return wait_or_whine(cmd->pid);
+}
+
+int run_command(struct child_process *cmd)
+{
+	int code = start_command(cmd);
+	if (code)
+		return code;
+	return finish_command(cmd);
+}
+
+static void prepare_run_command_v_opt(struct child_process *cmd,
+				      const char **argv,
+				      int opt)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->argv = argv;
+	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
+	cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
+	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
+}
+
+int run_command_v_opt(const char **argv, int opt)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	return run_command(&cmd);
+}
+
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	cmd.dir = dir;
+	cmd.env = env;
+	return run_command(&cmd);
+}
+
+#ifdef __MINGW32__
+static __stdcall unsigned run_thread(void *data)
+{
+	struct async *async = data;
+	return async->proc(async->fd_for_proc, async->data);
+}
+#endif
+
+int start_async(struct async *async)
+{
+	int pipe_out[2];
+
+	if (pipe(pipe_out) < 0)
+		return error("cannot create pipe: %s", strerror(errno));
+	async->out = pipe_out[0];
+
+#ifndef __MINGW32__
+	/* Flush stdio before fork() to avoid cloning buffers */
+	fflush(NULL);
+
+	async->pid = fork();
+	if (async->pid < 0) {
+		error("fork (async) failed: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+	if (!async->pid) {
+		close(pipe_out[0]);
+		exit(!!async->proc(pipe_out[1], async->data));
+	}
+	close(pipe_out[1]);
+#else
+	async->fd_for_proc = pipe_out[1];
+	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
+	if (!async->tid) {
+		error("cannot create thread: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+#endif
+	return 0;
+}
+
+int finish_async(struct async *async)
+{
+#ifndef __MINGW32__
+	int ret = 0;
+
+	if (wait_or_whine(async->pid))
+		ret = error("waitpid (async) failed");
+#else
+	DWORD ret = 0;
+	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
+		ret = error("waiting for thread failed: %lu", GetLastError());
+	else if (!GetExitCodeThread(async->tid, &ret))
+		ret = error("cannot get thread exit code: %lu", GetLastError());
+	CloseHandle(async->tid);
+#endif
+	return ret;
+}
+
+int run_hook(const char *index_file, const char *name, ...)
+{
+	struct child_process hook;
+	const char **argv = NULL, *env[2];
+	char index[PATH_MAX];
+	va_list args;
+	int ret;
+	size_t i = 0, alloc = 0;
+
+	if (access(perf_path("hooks/%s", name), X_OK) < 0)
+		return 0;
+
+	va_start(args, name);
+	ALLOC_GROW(argv, i + 1, alloc);
+	argv[i++] = perf_path("hooks/%s", name);
+	while (argv[i-1]) {
+		ALLOC_GROW(argv, i + 1, alloc);
+		argv[i++] = va_arg(args, const char *);
+	}
+	va_end(args);
+
+	memset(&hook, 0, sizeof(hook));
+	hook.argv = argv;
+	hook.no_stdin = 1;
+	hook.stdout_to_stderr = 1;
+	if (index_file) {
+		snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file);
+		env[0] = index;
+		env[1] = NULL;
+		hook.env = env;
+	}
+
+	ret = start_command(&hook);
+	free(argv);
+	if (ret) {
+		warning("Could not spawn %s", argv[0]);
+		return ret;
+	}
+	ret = finish_command(&hook);
+	if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
+		warning("%s exited due to uncaught signal", argv[0]);
+
+	return ret;
+}
diff --git a/Documentation/perf_counter/util/run-command.h b/Documentation/perf_counter/util/run-command.h
new file mode 100644
index 0000000..328289f
--- /dev/null
+++ b/Documentation/perf_counter/util/run-command.h
@@ -0,0 +1,93 @@
+#ifndef RUN_COMMAND_H
+#define RUN_COMMAND_H
+
+enum {
+	ERR_RUN_COMMAND_FORK = 10000,
+	ERR_RUN_COMMAND_EXEC,
+	ERR_RUN_COMMAND_PIPE,
+	ERR_RUN_COMMAND_WAITPID,
+	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
+	ERR_RUN_COMMAND_WAITPID_SIGNAL,
+	ERR_RUN_COMMAND_WAITPID_NOEXIT,
+};
+#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
+
+struct child_process {
+	const char **argv;
+	pid_t pid;
+	/*
+	 * Using .in, .out, .err:
+	 * - Specify 0 for no redirections (child inherits stdin, stdout,
+	 *   stderr from parent).
+	 * - Specify -1 to have a pipe allocated as follows:
+	 *     .in: returns the writable pipe end; parent writes to it,
+	 *          the readable pipe end becomes child's stdin
+	 *     .out, .err: returns the readable pipe end; parent reads from
+	 *          it, the writable pipe end becomes child's stdout/stderr
+	 *   The caller of start_command() must close the returned FDs
+	 *   after it has completed reading from/writing to it!
+	 * - Specify > 0 to set a channel to a particular FD as follows:
+	 *     .in: a readable FD, becomes child's stdin
+	 *     .out: a writable FD, becomes child's stdout/stderr
+	 *     .err > 0 not supported
+	 *   The specified FD is closed by start_command(), even in case
+	 *   of errors!
+	 */
+	int in;
+	int out;
+	int err;
+	const char *dir;
+	const char *const *env;
+	unsigned no_stdin:1;
+	unsigned no_stdout:1;
+	unsigned no_stderr:1;
+	unsigned perf_cmd:1; /* if this is to be perf sub-command */
+	unsigned stdout_to_stderr:1;
+	void (*preexec_cb)(void);
+};
+
+int start_command(struct child_process *);
+int finish_command(struct child_process *);
+int run_command(struct child_process *);
+
+extern int run_hook(const char *index_file, const char *name, ...);
+
+#define RUN_COMMAND_NO_STDIN 1
+#define RUN_PERF_CMD	     2	/*If this is to be perf sub-command */
+#define RUN_COMMAND_STDOUT_TO_STDERR 4
+int run_command_v_opt(const char **argv, int opt);
+
+/*
+ * env (the environment) is to be formatted like environ: "VAR=VALUE".
+ * To unset an environment variable use just "VAR".
+ */
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env);
+
+/*
+ * The purpose of the following functions is to feed a pipe by running
+ * a function asynchronously and providing output that the caller reads.
+ *
+ * It is expected that no synchronization and mutual exclusion between
+ * the caller and the feed function is necessary so that the function
+ * can run in a thread without interfering with the caller.
+ */
+struct async {
+	/*
+	 * proc writes to fd and closes it;
+	 * returns 0 on success, non-zero on failure
+	 */
+	int (*proc)(int fd, void *data);
+	void *data;
+	int out;	/* caller reads from here and closes it */
+#ifndef __MINGW32__
+	pid_t pid;
+#else
+	HANDLE tid;
+	int fd_for_proc;
+#endif
+};
+
+int start_async(struct async *async);
+int finish_async(struct async *async);
+
+#endif
diff --git a/Documentation/perf_counter/util/strbuf.c b/Documentation/perf_counter/util/strbuf.c
new file mode 100644
index 0000000..eaba093
--- /dev/null
+++ b/Documentation/perf_counter/util/strbuf.c
@@ -0,0 +1,359 @@
+#include "cache.h"
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+void strbuf_init(struct strbuf *sb, size_t hint)
+{
+	sb->alloc = sb->len = 0;
+	sb->buf = strbuf_slopbuf;
+	if (hint)
+		strbuf_grow(sb, hint);
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+	if (sb->alloc) {
+		free(sb->buf);
+		strbuf_init(sb, 0);
+	}
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+	char *res = sb->alloc ? sb->buf : NULL;
+	if (sz)
+		*sz = sb->len;
+	strbuf_init(sb, 0);
+	return res;
+}
+
+void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
+{
+	strbuf_release(sb);
+	sb->buf   = buf;
+	sb->len   = len;
+	sb->alloc = alloc;
+	strbuf_grow(sb, 0);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_grow(struct strbuf *sb, size_t extra)
+{
+	if (sb->len + extra + 1 <= sb->len)
+		die("you want to use way too much memory");
+	if (!sb->alloc)
+		sb->buf = NULL;
+	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+}
+
+void strbuf_trim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+void strbuf_rtrim(struct strbuf *sb)
+{
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_ltrim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_tolower(struct strbuf *sb)
+{
+	int i;
+	for (i = 0; i < sb->len; i++)
+		sb->buf[i] = tolower(sb->buf[i]);
+}
+
+struct strbuf **strbuf_split(const struct strbuf *sb, int delim)
+{
+	int alloc = 2, pos = 0;
+	char *n, *p;
+	struct strbuf **ret;
+	struct strbuf *t;
+
+	ret = calloc(alloc, sizeof(struct strbuf *));
+	p = n = sb->buf;
+	while (n < sb->buf + sb->len) {
+		int len;
+		n = memchr(n, delim, sb->len - (n - sb->buf));
+		if (pos + 1 >= alloc) {
+			alloc = alloc * 2;
+			ret = realloc(ret, sizeof(struct strbuf *) * alloc);
+		}
+		if (!n)
+			n = sb->buf + sb->len - 1;
+		len = n - p + 1;
+		t = malloc(sizeof(struct strbuf));
+		strbuf_init(t, len);
+		strbuf_add(t, p, len);
+		ret[pos] = t;
+		ret[++pos] = NULL;
+		p = ++n;
+	}
+	return ret;
+}
+
+void strbuf_list_free(struct strbuf **sbs)
+{
+	struct strbuf **s = sbs;
+
+	while (*s) {
+		strbuf_release(*s);
+		free(*s++);
+	}
+	free(sbs);
+}
+
+int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
+{
+	int len = a->len < b->len ? a->len: b->len;
+	int cmp = memcmp(a->buf, b->buf, len);
+	if (cmp)
+		return cmp;
+	return a->len < b->len ? -1: a->len != b->len;
+}
+
+void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
+				   const void *data, size_t dlen)
+{
+	if (pos + len < pos)
+		die("you want to use way too much memory");
+	if (pos > sb->len)
+		die("`pos' is too far after the end of the buffer");
+	if (pos + len > sb->len)
+		die("`pos + len' is too far after the end of the buffer");
+
+	if (dlen >= len)
+		strbuf_grow(sb, dlen - len);
+	memmove(sb->buf + pos + dlen,
+			sb->buf + pos + len,
+			sb->len - pos - len);
+	memcpy(sb->buf + pos, data, dlen);
+	strbuf_setlen(sb, sb->len + dlen - len);
+}
+
+void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
+{
+	strbuf_splice(sb, pos, 0, data, len);
+}
+
+void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_splice(sb, pos, len, NULL, 0);
+}
+
+void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, data, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, sb->buf + pos, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+	int len;
+	va_list ap;
+
+	if (!strbuf_avail(sb))
+		strbuf_grow(sb, 64);
+	va_start(ap, fmt);
+	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+	va_end(ap);
+	if (len < 0)
+		die("your vsnprintf is broken");
+	if (len > strbuf_avail(sb)) {
+		strbuf_grow(sb, len);
+		va_start(ap, fmt);
+		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+		va_end(ap);
+		if (len > strbuf_avail(sb)) {
+			die("this should not happen, your snprintf is broken");
+		}
+	}
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+		   void *context)
+{
+	for (;;) {
+		const char *percent;
+		size_t consumed;
+
+		percent = strchrnul(format, '%');
+		strbuf_add(sb, format, percent - format);
+		if (!*percent)
+			break;
+		format = percent + 1;
+
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
+			strbuf_addch(sb, '%');
+	}
+}
+
+size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
+		void *context)
+{
+	struct strbuf_expand_dict_entry *e = context;
+	size_t len;
+
+	for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
+		if (!strncmp(placeholder, e->placeholder, len)) {
+			if (e->value)
+				strbuf_addstr(sb, e->value);
+			return len;
+		}
+	}
+	return 0;
+}
+
+size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
+{
+	size_t res;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, size);
+	res = fread(sb->buf + sb->len, 1, size, f);
+	if (res > 0)
+		strbuf_setlen(sb, sb->len + res);
+	else if (res < 0 && oldalloc == 0)
+		strbuf_release(sb);
+	return res;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
+{
+	size_t oldlen = sb->len;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, hint ? hint : 8192);
+	for (;;) {
+		ssize_t cnt;
+
+		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+		if (cnt < 0) {
+			if (oldalloc == 0)
+				strbuf_release(sb);
+			else
+				strbuf_setlen(sb, oldlen);
+			return -1;
+		}
+		if (!cnt)
+			break;
+		sb->len += cnt;
+		strbuf_grow(sb, 8192);
+	}
+
+	sb->buf[sb->len] = '\0';
+	return sb->len - oldlen;
+}
+
+#define STRBUF_MAXLINK (2*PATH_MAX)
+
+int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
+{
+	size_t oldalloc = sb->alloc;
+
+	if (hint < 32)
+		hint = 32;
+
+	while (hint < STRBUF_MAXLINK) {
+		int len;
+
+		strbuf_grow(sb, hint);
+		len = readlink(path, sb->buf, hint);
+		if (len < 0) {
+			if (errno != ERANGE)
+				break;
+		} else if (len < hint) {
+			strbuf_setlen(sb, len);
+			return 0;
+		}
+
+		/* .. the buffer was too small - try again */
+		hint *= 2;
+	}
+	if (oldalloc == 0)
+		strbuf_release(sb);
+	return -1;
+}
+
+int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
+{
+	int ch;
+
+	strbuf_grow(sb, 0);
+	if (feof(fp))
+		return EOF;
+
+	strbuf_reset(sb);
+	while ((ch = fgetc(fp)) != EOF) {
+		if (ch == term)
+			break;
+		strbuf_grow(sb, 1);
+		sb->buf[sb->len++] = ch;
+	}
+	if (ch == EOF && sb->len == 0)
+		return EOF;
+
+	sb->buf[sb->len] = '\0';
+	return 0;
+}
+
+int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
+{
+	int fd, len;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -1;
+	len = strbuf_read(sb, fd, hint);
+	close(fd);
+	if (len < 0)
+		return -1;
+
+	return len;
+}
diff --git a/Documentation/perf_counter/util/strbuf.h b/Documentation/perf_counter/util/strbuf.h
new file mode 100644
index 0000000..9ee908a
--- /dev/null
+++ b/Documentation/perf_counter/util/strbuf.h
@@ -0,0 +1,137 @@
+#ifndef STRBUF_H
+#define STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ *    build complex strings/buffers whose final size isn't easily known.
+ *
+ *    It is NOT legal to copy the ->buf pointer away.
+ *    `strbuf_detach' is the operation that detachs a buffer from its shell
+ *    while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ *    allocated. The extra byte is used to store a '\0', allowing the ->buf
+ *    member to be a valid C-string. Every strbuf function ensure this
+ *    invariant is preserved.
+ *
+ *    Note that it is OK to "play" with the buffer directly if you work it
+ *    that way:
+ *
+ *    strbuf_grow(sb, SOME_SIZE);
+ *       ... Here, the memory array starting at sb->buf, and of length
+ *       ... strbuf_avail(sb) is all yours, and you are sure that
+ *       ... strbuf_avail(sb) is at least SOME_SIZE.
+ *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ *    Doing so is safe, though if it has to be done in many places, adding the
+ *    missing API to the strbuf module is the way to go.
+ *
+ *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ *         even if it's true in the current implementation. Alloc is somehow a
+ *         "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+	size_t alloc;
+	size_t len;
+	char *buf;
+};
+
+#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+extern void strbuf_init(struct strbuf *, size_t);
+extern void strbuf_release(struct strbuf *);
+extern char *strbuf_detach(struct strbuf *, size_t *);
+extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
+static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) {
+	struct strbuf tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+/*----- strbuf size related -----*/
+static inline size_t strbuf_avail(const struct strbuf *sb) {
+	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+extern void strbuf_grow(struct strbuf *, size_t);
+
+static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
+	if (!sb->alloc)
+		strbuf_grow(sb, 0);
+	assert(len < sb->alloc);
+	sb->len = len;
+	sb->buf[len] = '\0';
+}
+#define strbuf_reset(sb)  strbuf_setlen(sb, 0)
+
+/*----- content related -----*/
+extern void strbuf_trim(struct strbuf *);
+extern void strbuf_rtrim(struct strbuf *);
+extern void strbuf_ltrim(struct strbuf *);
+extern int strbuf_cmp(const struct strbuf *, const struct strbuf *);
+extern void strbuf_tolower(struct strbuf *);
+
+extern struct strbuf **strbuf_split(const struct strbuf *, int delim);
+extern void strbuf_list_free(struct strbuf **);
+
+/*----- add data in your buffer -----*/
+static inline void strbuf_addch(struct strbuf *sb, int c) {
+	strbuf_grow(sb, 1);
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+}
+
+extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
+extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
+
+/* splice pos..pos+len with given data */
+extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
+                          const void *, size_t);
+
+extern void strbuf_add(struct strbuf *, const void *, size_t);
+static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
+	strbuf_add(sb, s, strlen(s));
+}
+static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) {
+	strbuf_add(sb, sb2->buf, sb2->len);
+}
+extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
+
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
+struct strbuf_expand_dict_entry {
+	const char *placeholder;
+	const char *value;
+};
+extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context);
+
+__attribute__((format(printf,2,3)))
+extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+
+extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
+/* XXX: if read fails, any partial read is undone */
+extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
+extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
+extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
+
+extern int strbuf_getline(struct strbuf *, FILE *, int);
+
+extern void stripspace(struct strbuf *buf, int skip_comments);
+extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env);
+
+extern int strbuf_branchname(struct strbuf *sb, const char *name);
+extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
+
+#endif /* STRBUF_H */
diff --git a/Documentation/perf_counter/util/usage.c b/Documentation/perf_counter/util/usage.c
new file mode 100644
index 0000000..7a10421
--- /dev/null
+++ b/Documentation/perf_counter/util/usage.c
@@ -0,0 +1,80 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+
+static void report(const char *prefix, const char *err, va_list params)
+{
+	char msg[1024];
+	vsnprintf(msg, sizeof(msg), err, params);
+	fprintf(stderr, "%s%s\n", prefix, msg);
+}
+
+static NORETURN void usage_builtin(const char *err)
+{
+	fprintf(stderr, "usage: %s\n", err);
+	exit(129);
+}
+
+static NORETURN void die_builtin(const char *err, va_list params)
+{
+	report("fatal: ", err, params);
+	exit(128);
+}
+
+static void error_builtin(const char *err, va_list params)
+{
+	report("error: ", err, params);
+}
+
+static void warn_builtin(const char *warn, va_list params)
+{
+	report("warning: ", warn, params);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
+static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
+static void (*error_routine)(const char *err, va_list params) = error_builtin;
+static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
+
+void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
+{
+	die_routine = routine;
+}
+
+void usage(const char *err)
+{
+	usage_routine(err);
+}
+
+void die(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	die_routine(err, params);
+	va_end(params);
+}
+
+int error(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	error_routine(err, params);
+	va_end(params);
+	return -1;
+}
+
+void warning(const char *warn, ...)
+{
+	va_list params;
+
+	va_start(params, warn);
+	warn_routine(warn, params);
+	va_end(params);
+}
diff --git a/Documentation/perf_counter/util/util.h b/Documentation/perf_counter/util/util.h
new file mode 100644
index 0000000..36e40c3
--- /dev/null
+++ b/Documentation/perf_counter/util/util.h
@@ -0,0 +1,408 @@
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _FILE_OFFSET_BITS 64
+
+#ifndef FLEX_ARRAY
+/*
+ * See if our compiler is known to support flexible array members.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+# define FLEX_ARRAY /* empty */
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 3)
+#  define FLEX_ARRAY /* empty */
+# else
+#  define FLEX_ARRAY 0 /* older GNU extension */
+# endif
+#endif
+
+/*
+ * Otherwise, default to safer but a bit wasteful traditional style
+ */
+#ifndef FLEX_ARRAY
+# define FLEX_ARRAY 1
+#endif
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+#ifdef __GNUC__
+#define TYPEOF(x) (__typeof__(x))
+#else
+#define TYPEOF(x)
+#endif
+
+#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
+#define HAS_MULTI_BITS(i)  ((i) & ((i) - 1))  /* checks if an integer has more than 1 bit set */
+
+/* Approximation of the length of the decimal representation of this type. */
+#define decimal_length(x)	((int)(sizeof(x) * 2.56 + 0.5) + 1)
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__)  && !defined(__USLC__) && !defined(_M_UNIX)
+#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
+#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
+#endif
+#define _ALL_SOURCE 1
+#define _GNU_SOURCE 1
+#define _BSD_SOURCE 1
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/time.h>
+#include <time.h>
+#include <signal.h>
+#include <fnmatch.h>
+#include <assert.h>
+#include <regex.h>
+#include <utime.h>
+#ifndef __MINGW32__
+#include <sys/wait.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#ifndef NO_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <pwd.h>
+#include <inttypes.h>
+#if defined(__CYGWIN__)
+#undef _XOPEN_SOURCE
+#include <grp.h>
+#define _XOPEN_SOURCE 600
+#include "compat/cygwin.h"
+#else
+#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
+#include <grp.h>
+#define _ALL_SOURCE 1
+#endif
+#else 	/* __MINGW32__ */
+/* pull in Windows compatibility stuff */
+#include "compat/mingw.h"
+#endif	/* __MINGW32__ */
+
+#ifndef NO_ICONV
+#include <iconv.h>
+#endif
+
+#ifndef NO_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#endif
+
+/* On most systems <limits.h> would have given us this, but
+ * not on some systems (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef PRIuMAX
+#define PRIuMAX "llu"
+#endif
+
+#ifndef PRIu32
+#define PRIu32 "u"
+#endif
+
+#ifndef PRIx32
+#define PRIx32 "x"
+#endif
+
+#ifndef PATH_SEP
+#define PATH_SEP ':'
+#endif
+
+#ifndef STRIP_EXTENSION
+#define STRIP_EXTENSION ""
+#endif
+
+#ifndef has_dos_drive_prefix
+#define has_dos_drive_prefix(path) 0
+#endif
+
+#ifndef is_dir_sep
+#define is_dir_sep(c) ((c) == '/')
+#endif
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#ifndef __attribute__
+#define __attribute__(x)
+#endif
+#endif
+
+/* General helper functions */
+extern void usage(const char *err) NORETURN;
+extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+
+extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
+
+extern int prefixcmp(const char *str, const char *prefix);
+extern time_t tm_to_time_t(const struct tm *tm);
+
+static inline const char *skip_prefix(const char *str, const char *prefix)
+{
+	size_t len = strlen(prefix);
+	return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
+
+#ifndef PROT_READ
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define MAP_PRIVATE 1
+#define MAP_FAILED ((void*)-1)
+#endif
+
+#define mmap git_mmap
+#define munmap git_munmap
+extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern int git_munmap(void *start, size_t length);
+
+#else /* NO_MMAP || USE_WIN32_MMAP */
+
+#include <sys/mman.h>
+
+#endif /* NO_MMAP || USE_WIN32_MMAP */
+
+#ifdef NO_MMAP
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
+
+#else /* NO_MMAP */
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
+	(sizeof(void*) >= 8 \
+		?  1 * 1024 * 1024 * 1024 \
+		: 32 * 1024 * 1024)
+
+#endif /* NO_MMAP */
+
+#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+#define on_disk_bytes(st) ((st).st_size)
+#else
+#define on_disk_bytes(st) ((st).st_blocks * 512)
+#endif
+
+#define DEFAULT_PACKED_GIT_LIMIT \
+	((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
+
+#ifdef NO_PREAD
+#define pread git_pread
+extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
+#endif
+/*
+ * Forward decl that will remind us if its twin in cache.h changes.
+ * This function is used in compat/pread.c.  But we can't include
+ * cache.h there.
+ */
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+
+#ifdef NO_SETENV
+#define setenv gitsetenv
+extern int gitsetenv(const char *, const char *, int);
+#endif
+
+#ifdef NO_MKDTEMP
+#define mkdtemp gitmkdtemp
+extern char *gitmkdtemp(char *);
+#endif
+
+#ifdef NO_UNSETENV
+#define unsetenv gitunsetenv
+extern void gitunsetenv(const char *);
+#endif
+
+#ifdef NO_STRCASESTR
+#define strcasestr gitstrcasestr
+extern char *gitstrcasestr(const char *haystack, const char *needle);
+#endif
+
+#ifdef NO_STRLCPY
+#define strlcpy gitstrlcpy
+extern size_t gitstrlcpy(char *, const char *, size_t);
+#endif
+
+#ifdef NO_STRTOUMAX
+#define strtoumax gitstrtoumax
+extern uintmax_t gitstrtoumax(const char *, char **, int);
+#endif
+
+#ifdef NO_HSTRERROR
+#define hstrerror githstrerror
+extern const char *githstrerror(int herror);
+#endif
+
+#ifdef NO_MEMMEM
+#define memmem gitmemmem
+void *gitmemmem(const void *haystack, size_t haystacklen,
+                const void *needle, size_t needlelen);
+#endif
+
+#ifdef FREAD_READS_DIRECTORIES
+#ifdef fopen
+#undef fopen
+#endif
+#define fopen(a,b) git_fopen(a,b)
+extern FILE *git_fopen(const char*, const char*);
+#endif
+
+#ifdef SNPRINTF_RETURNS_BOGUS
+#define snprintf git_snprintf
+extern int git_snprintf(char *str, size_t maxsize,
+			const char *format, ...);
+#define vsnprintf git_vsnprintf
+extern int git_vsnprintf(char *str, size_t maxsize,
+			 const char *format, va_list ap);
+#endif
+
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 1)
+#define HAVE_STRCHRNUL
+#endif
+#endif
+
+#ifndef HAVE_STRCHRNUL
+#define strchrnul gitstrchrnul
+static inline char *gitstrchrnul(const char *s, int c)
+{
+	while (*s && *s != c)
+		s++;
+	return (char *)s;
+}
+#endif
+
+/*
+ * Wrappers:
+ */
+extern char *xstrdup(const char *str);
+extern void *xmalloc(size_t size);
+extern void *xmemdupz(const void *data, size_t len);
+extern char *xstrndup(const char *str, size_t len);
+extern void *xrealloc(void *ptr, size_t size);
+extern void *xcalloc(size_t nmemb, size_t size);
+extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern ssize_t xread(int fd, void *buf, size_t len);
+extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern int xdup(int fd);
+extern FILE *xfdopen(int fd, const char *mode);
+static inline size_t xsize_t(off_t len)
+{
+	return (size_t)len;
+}
+
+static inline int has_extension(const char *filename, const char *ext)
+{
+	size_t len = strlen(filename);
+	size_t extlen = strlen(ext);
+	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isalpha
+#undef isalnum
+#undef tolower
+#undef toupper
+extern unsigned char sane_ctype[256];
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
+#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+	if (sane_istest(x, GIT_ALPHA))
+		x = (x & ~0x20) | high;
+	return x;
+}
+
+static inline int strtoul_ui(char const *s, int base, unsigned int *result)
+{
+	unsigned long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtoul(s, &p, base);
+	if (errno || *p || p == s || (unsigned int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+static inline int strtol_i(char const *s, int base, int *result)
+{
+	long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtol(s, &p, base);
+	if (errno || *p || p == s || (int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+#ifdef INTERNAL_QSORT
+void git_qsort(void *base, size_t nmemb, size_t size,
+	       int(*compar)(const void *, const void *));
+#define qsort git_qsort
+#endif
+
+#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
+# define FORCE_DIR_SET_GID S_ISGID
+#else
+# define FORCE_DIR_SET_GID 0
+#endif
+
+#ifdef NO_NSEC
+#undef USE_NSEC
+#define ST_CTIME_NSEC(st) 0
+#define ST_MTIME_NSEC(st) 0
+#else
+#ifdef USE_ST_TIMESPEC
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
+#else
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
+#endif
+#endif
+
+#endif
diff --git a/Documentation/perf_counter/util/wrapper.c b/Documentation/perf_counter/util/wrapper.c
new file mode 100644
index 0000000..6350d65
--- /dev/null
+++ b/Documentation/perf_counter/util/wrapper.c
@@ -0,0 +1,206 @@
+/*
+ * Various trivial helper wrappers around standard functions
+ */
+#include "cache.h"
+
+/*
+ * There's no pack memory to release - but stay close to the Git
+ * version so wrap this away:
+ */
+static inline void release_pack_memory(size_t size, int flag)
+{
+}
+
+char *xstrdup(const char *str)
+{
+	char *ret = strdup(str);
+	if (!ret) {
+		release_pack_memory(strlen(str) + 1, -1);
+		ret = strdup(str);
+		if (!ret)
+			die("Out of memory, strdup failed");
+	}
+	return ret;
+}
+
+void *xmalloc(size_t size)
+{
+	void *ret = malloc(size);
+	if (!ret && !size)
+		ret = malloc(1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = malloc(size);
+		if (!ret && !size)
+			ret = malloc(1);
+		if (!ret)
+			die("Out of memory, malloc failed");
+	}
+#ifdef XMALLOC_POISON
+	memset(ret, 0xA5, size);
+#endif
+	return ret;
+}
+
+/*
+ * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
+ * "data" to the allocated memory, zero terminates the allocated memory,
+ * and returns a pointer to the allocated memory. If the allocation fails,
+ * the program dies.
+ */
+void *xmemdupz(const void *data, size_t len)
+{
+	char *p = xmalloc(len + 1);
+	memcpy(p, data, len);
+	p[len] = '\0';
+	return p;
+}
+
+char *xstrndup(const char *str, size_t len)
+{
+	char *p = memchr(str, '\0', len);
+	return xmemdupz(str, p ? p - str : len);
+}
+
+void *xrealloc(void *ptr, size_t size)
+{
+	void *ret = realloc(ptr, size);
+	if (!ret && !size)
+		ret = realloc(ptr, 1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = realloc(ptr, size);
+		if (!ret && !size)
+			ret = realloc(ptr, 1);
+		if (!ret)
+			die("Out of memory, realloc failed");
+	}
+	return ret;
+}
+
+void *xcalloc(size_t nmemb, size_t size)
+{
+	void *ret = calloc(nmemb, size);
+	if (!ret && (!nmemb || !size))
+		ret = calloc(1, 1);
+	if (!ret) {
+		release_pack_memory(nmemb * size, -1);
+		ret = calloc(nmemb, size);
+		if (!ret && (!nmemb || !size))
+			ret = calloc(1, 1);
+		if (!ret)
+			die("Out of memory, calloc failed");
+	}
+	return ret;
+}
+
+void *xmmap(void *start, size_t length,
+	int prot, int flags, int fd, off_t offset)
+{
+	void *ret = mmap(start, length, prot, flags, fd, offset);
+	if (ret == MAP_FAILED) {
+		if (!length)
+			return NULL;
+		release_pack_memory(length, fd);
+		ret = mmap(start, length, prot, flags, fd, offset);
+		if (ret == MAP_FAILED)
+			die("Out of memory? mmap failed: %s", strerror(errno));
+	}
+	return ret;
+}
+
+/*
+ * xread() is the same a read(), but it automatically restarts read()
+ * operations with a recoverable error (EAGAIN and EINTR). xread()
+ * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xread(int fd, void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = read(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+/*
+ * xwrite() is the same a write(), but it automatically restarts write()
+ * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
+ * GUARANTEE that "len" bytes is written even if the operation is successful.
+ */
+ssize_t xwrite(int fd, const void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = write(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+ssize_t read_in_full(int fd, void *buf, size_t count)
+{
+	char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t loaded = xread(fd, p, count);
+		if (loaded <= 0)
+			return total ? total : loaded;
+		count -= loaded;
+		p += loaded;
+		total += loaded;
+	}
+
+	return total;
+}
+
+ssize_t write_in_full(int fd, const void *buf, size_t count)
+{
+	const char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t written = xwrite(fd, p, count);
+		if (written < 0)
+			return -1;
+		if (!written) {
+			errno = ENOSPC;
+			return -1;
+		}
+		count -= written;
+		p += written;
+		total += written;
+	}
+
+	return total;
+}
+
+int xdup(int fd)
+{
+	int ret = dup(fd);
+	if (ret < 0)
+		die("dup failed: %s", strerror(errno));
+	return ret;
+}
+
+FILE *xfdopen(int fd, const char *mode)
+{
+	FILE *stream = fdopen(fd, mode);
+	if (stream == NULL)
+		die("Out of memory? fdopen failed: %s", strerror(errno));
+	return stream;
+}
+
+int xmkstemp(char *template)
+{
+	int fd;
+
+	fd = mkstemp(template);
+	if (fd < 0)
+		die("Unable to create temporary file: %s", strerror(errno));
+	return fd;
+}
diff --git a/Documentation/perf_counter/wrapper.c b/Documentation/perf_counter/wrapper.c
deleted file mode 100644
index 6350d65..0000000
--- a/Documentation/perf_counter/wrapper.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Various trivial helper wrappers around standard functions
- */
-#include "cache.h"
-
-/*
- * There's no pack memory to release - but stay close to the Git
- * version so wrap this away:
- */
-static inline void release_pack_memory(size_t size, int flag)
-{
-}
-
-char *xstrdup(const char *str)
-{
-	char *ret = strdup(str);
-	if (!ret) {
-		release_pack_memory(strlen(str) + 1, -1);
-		ret = strdup(str);
-		if (!ret)
-			die("Out of memory, strdup failed");
-	}
-	return ret;
-}
-
-void *xmalloc(size_t size)
-{
-	void *ret = malloc(size);
-	if (!ret && !size)
-		ret = malloc(1);
-	if (!ret) {
-		release_pack_memory(size, -1);
-		ret = malloc(size);
-		if (!ret && !size)
-			ret = malloc(1);
-		if (!ret)
-			die("Out of memory, malloc failed");
-	}
-#ifdef XMALLOC_POISON
-	memset(ret, 0xA5, size);
-#endif
-	return ret;
-}
-
-/*
- * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
- * "data" to the allocated memory, zero terminates the allocated memory,
- * and returns a pointer to the allocated memory. If the allocation fails,
- * the program dies.
- */
-void *xmemdupz(const void *data, size_t len)
-{
-	char *p = xmalloc(len + 1);
-	memcpy(p, data, len);
-	p[len] = '\0';
-	return p;
-}
-
-char *xstrndup(const char *str, size_t len)
-{
-	char *p = memchr(str, '\0', len);
-	return xmemdupz(str, p ? p - str : len);
-}
-
-void *xrealloc(void *ptr, size_t size)
-{
-	void *ret = realloc(ptr, size);
-	if (!ret && !size)
-		ret = realloc(ptr, 1);
-	if (!ret) {
-		release_pack_memory(size, -1);
-		ret = realloc(ptr, size);
-		if (!ret && !size)
-			ret = realloc(ptr, 1);
-		if (!ret)
-			die("Out of memory, realloc failed");
-	}
-	return ret;
-}
-
-void *xcalloc(size_t nmemb, size_t size)
-{
-	void *ret = calloc(nmemb, size);
-	if (!ret && (!nmemb || !size))
-		ret = calloc(1, 1);
-	if (!ret) {
-		release_pack_memory(nmemb * size, -1);
-		ret = calloc(nmemb, size);
-		if (!ret && (!nmemb || !size))
-			ret = calloc(1, 1);
-		if (!ret)
-			die("Out of memory, calloc failed");
-	}
-	return ret;
-}
-
-void *xmmap(void *start, size_t length,
-	int prot, int flags, int fd, off_t offset)
-{
-	void *ret = mmap(start, length, prot, flags, fd, offset);
-	if (ret == MAP_FAILED) {
-		if (!length)
-			return NULL;
-		release_pack_memory(length, fd);
-		ret = mmap(start, length, prot, flags, fd, offset);
-		if (ret == MAP_FAILED)
-			die("Out of memory? mmap failed: %s", strerror(errno));
-	}
-	return ret;
-}
-
-/*
- * xread() is the same a read(), but it automatically restarts read()
- * operations with a recoverable error (EAGAIN and EINTR). xread()
- * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
- */
-ssize_t xread(int fd, void *buf, size_t len)
-{
-	ssize_t nr;
-	while (1) {
-		nr = read(fd, buf, len);
-		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
-			continue;
-		return nr;
-	}
-}
-
-/*
- * xwrite() is the same a write(), but it automatically restarts write()
- * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
- * GUARANTEE that "len" bytes is written even if the operation is successful.
- */
-ssize_t xwrite(int fd, const void *buf, size_t len)
-{
-	ssize_t nr;
-	while (1) {
-		nr = write(fd, buf, len);
-		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
-			continue;
-		return nr;
-	}
-}
-
-ssize_t read_in_full(int fd, void *buf, size_t count)
-{
-	char *p = buf;
-	ssize_t total = 0;
-
-	while (count > 0) {
-		ssize_t loaded = xread(fd, p, count);
-		if (loaded <= 0)
-			return total ? total : loaded;
-		count -= loaded;
-		p += loaded;
-		total += loaded;
-	}
-
-	return total;
-}
-
-ssize_t write_in_full(int fd, const void *buf, size_t count)
-{
-	const char *p = buf;
-	ssize_t total = 0;
-
-	while (count > 0) {
-		ssize_t written = xwrite(fd, p, count);
-		if (written < 0)
-			return -1;
-		if (!written) {
-			errno = ENOSPC;
-			return -1;
-		}
-		count -= written;
-		p += written;
-		total += written;
-	}
-
-	return total;
-}
-
-int xdup(int fd)
-{
-	int ret = dup(fd);
-	if (ret < 0)
-		die("dup failed: %s", strerror(errno));
-	return ret;
-}
-
-FILE *xfdopen(int fd, const char *mode)
-{
-	FILE *stream = fdopen(fd, mode);
-	if (stream == NULL)
-		die("Out of memory? fdopen failed: %s", strerror(errno));
-	return stream;
-}
-
-int xmkstemp(char *template)
-{
-	int fd;
-
-	fd = mkstemp(template);
-	if (fd < 0)
-		die("Unable to create temporary file: %s", strerror(errno));
-	return fd;
-}
-- 
cgit v0.10.2


From b2ba83ff4f4405cebc10884121ee71338a1a6c94 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 26 Apr 2009 23:38:08 -0700
Subject: x86: apic: Remove duplicated macros

XAPIC_DEST_* is dupliicated to the one in apicdef.h

[ Impact: cleanup ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <49F552D0.5050505@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9cfe1f4..344eee4 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){
 		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
 }
 
-
-/* In clustered mode, the high nibble of APIC ID is a cluster number.
- * The low nibble is a 4-bit bitmap. */
-#define XAPIC_DEST_CPUS_SHIFT	4
-#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-
 #define SUMMIT_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
 static const struct cpumask *summit_target_cpus(void)
-- 
cgit v0.10.2


From e0e42142bab96404de535cceb85d6533d5ad7942 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 26 Apr 2009 23:39:38 -0700
Subject: x86: Use dmi check in apic_is_clustered() on 64-bit to mark the TSC
 unstable

We will have systems with 2 and more sockets 8cores/2thread,
but we treat them as multi chassis - while they could have
a stable TSC domain.

Use DMI check instead.

[ Impact: do not turn possibly stable TSCs off incorrectly ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Ravikiran Thirumalai <kiran@scalex86.org>
LKML-Reference: <49F5532A.5000802@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1386dbe..28f747d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2138,31 +2138,14 @@ static void apic_pm_activate(void) { }
 #endif	/* CONFIG_PM */
 
 #ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
+
+static int __cpuinit apic_cluster_num(void)
 {
 	int i, clusters, zeros;
 	unsigned id;
 	u16 *bios_cpu_apicid;
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
-	/*
-	 * there is not this kind of box with AMD CPU yet.
-	 * Some AMD box with quadcore cpu and 8 sockets apicid
-	 * will be [4, 0x23] or [8, 0x27] could be thought to
-	 * vsmp box still need checking...
-	 */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-		return 0;
-
 	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
@@ -2198,18 +2181,67 @@ __cpuinit int apic_is_clustered_box(void)
 			++zeros;
 	}
 
-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-	 * not guaranteed to be synced between boards
-	 */
-	if (is_vsmp_box() && clusters > 1)
+	return clusters;
+}
+
+static int __cpuinitdata multi_checked;
+static int __cpuinitdata multi;
+
+static int __cpuinit set_multi(const struct dmi_system_id *d)
+{
+	if (multi)
+		return 0;
+	printk(KERN_INFO "APIC: %s detected, Multi Chassis\n", d->ident);
+	multi = 1;
+	return 0;
+}
+
+static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+	{
+		.callback = set_multi,
+		.ident = "IBM System Summit2",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+		},
+	},
+	{}
+};
+
+static void __cpuinit dmi_check_multi(void)
+{
+	if (multi_checked)
+		return;
+
+	dmi_check_system(multi_dmi_table);
+	multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+	dmi_check_multi();
+	if (multi)
 		return 1;
 
+	if (!is_vsmp_box())
+		return 0;
+
 	/*
-	 * If clusters > 2, then should be multi-chassis.
-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
-	 * out, but AFAIK this will work even for them.
+	 * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
 	 */
-	return (clusters > 2);
+	if (apic_cluster_num() > 1)
+		return 1;
+
+	return 0;
 }
 #endif
 
-- 
cgit v0.10.2


From 47c8a08bbe77ad3c06f63919a14b0f0b0cd54390 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 27 Apr 2009 17:34:39 +0900
Subject: sh: rtc-generic support.

This adds rtc-generic support for SUPERH32.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1647489..9db02ce 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -30,6 +30,7 @@ config SUPERH32
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_ARCH_KGDB
 	select ARCH_HIBERNATION_POSSIBLE if MMU
+	select RTC_LIB
 
 config SUPERH64
 	def_bool ARCH = "sh64"
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index f7b010d..52b0c2d 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -6,6 +6,17 @@ extern void (*board_time_init)(void);
 extern void (*rtc_sh_get_time)(struct timespec *);
 extern int (*rtc_sh_set_time)(const time_t);
 
+/* some dummy definitions */
+#define RTC_BATT_BAD 0x100	/* battery bad */
+#define RTC_SQWE 0x08		/* enable square-wave output */
+#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
+#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
+#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
+
+struct rtc_time;
+unsigned int get_rtc_time(struct rtc_time *);
+int set_rtc_time(struct rtc_time *);
+
 #define RTC_CAP_4_DIGIT_YEAR	(1 << 0)
 
 struct sh_rtc_platform_info {
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index c770413..109409f 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -19,6 +19,7 @@
 #include <linux/mc146818rtc.h>	/* for rtc_lock */
 #include <linux/platform_device.h>
 #include <linux/smp.h>
+#include <linux/rtc.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
 #include <asm/timer.h>
@@ -45,6 +46,28 @@ static int null_rtc_set_time(const time_t secs)
 void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
 int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
 
+unsigned int get_rtc_time(struct rtc_time *tm)
+{
+	if (rtc_sh_get_time != null_rtc_get_time) {
+		struct timespec tv;
+
+		rtc_sh_get_time(&tv);
+		rtc_time_to_tm(tv.tv_sec, tm);
+	}
+
+	return RTC_24H;
+}
+EXPORT_SYMBOL(get_rtc_time);
+
+int set_rtc_time(struct rtc_time *tm)
+{
+	unsigned long secs;
+
+	rtc_tm_to_time(tm, &secs);
+	return rtc_sh_set_time(secs);
+}
+EXPORT_SYMBOL(set_rtc_time);
+
 #ifndef CONFIG_GENERIC_TIME
 void do_gettimeofday(struct timeval *tv)
 {
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 4e9851f..277d35d 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -692,7 +692,7 @@ config RTC_DRV_GENERIC
 	tristate "Generic RTC support"
 	# Please consider writing a new RTC driver instead of using the generic
 	# RTC abstraction
-	depends on PARISC || M68K || PPC
+	depends on PARISC || M68K || PPC || SUPERH32
 	help
 	  Say Y or M here to enable RTC support on systems using the generic
 	  RTC abstraction. If you do not know what you are doing, you should
-- 
cgit v0.10.2


From c2e0090c668fc99f5be65fd9907da781cb6a2ef5 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Mon, 27 Apr 2009 17:54:41 +0900
Subject: sh: mach-r2d: add physmap-flash support for R2D+ boards.

The RTS7751R2D_1 boards only support 1MB of socket-mounted MBM29F040
flash, which we just leave alone as it's not terribly interesting.

This adds support for the s29gl256p on the r2d+ boards that makes a bit
more sense to expose to the user.

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-r2d/setup.c b/arch/sh/boards/mach-r2d/setup.c
index c585be0..a625ecb 100644
--- a/arch/sh/boards/mach-r2d/setup.c
+++ b/arch/sh/boards/mach-r2d/setup.c
@@ -10,6 +10,9 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
 #include <linux/ata_platform.h>
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -181,6 +184,50 @@ static struct platform_device sm501_device = {
 	.resource	= sm501_resources,
 };
 
+static struct mtd_partition r2d_partitions[] = {
+	{
+		.name		= "U-Boot",
+		.offset		= 0x00000000,
+		.size		= 0x00040000,
+		.mask_flags	= MTD_WRITEABLE,
+	}, {
+		.name		= "Environment",
+		.offset		= MTDPART_OFS_NXTBLK,
+		.size		= 0x00040000,
+		.mask_flags	= MTD_WRITEABLE,
+	}, {
+		.name		= "Kernel",
+		.offset		= MTDPART_OFS_NXTBLK,
+		.size		= 0x001c0000,
+	}, {
+		.name		= "Flash_FS",
+		.offset		= MTDPART_OFS_NXTBLK,
+		.size		= MTDPART_SIZ_FULL,
+	}
+};
+
+static struct physmap_flash_data flash_data = {
+	.width		= 2,
+	.nr_parts	= ARRAY_SIZE(r2d_partitions),
+	.parts		= r2d_partitions,
+};
+
+static struct resource flash_resource = {
+	.start		= 0x00000000,
+	.end		= 0x02000000,
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device flash_device = {
+	.name		= "physmap-flash",
+	.id		= -1,
+	.resource	= &flash_resource,
+	.num_resources	= 1,
+	.dev		= {
+		.platform_data = &flash_data,
+	},
+};
+
 static struct platform_device *rts7751r2d_devices[] __initdata = {
 	&sm501_device,
 	&heartbeat_device,
@@ -203,6 +250,9 @@ static int __init rts7751r2d_devices_setup(void)
 	if (register_trapped_io(&cf_trapped_io) == 0)
 		platform_device_register(&cf_ide_device);
 
+	if (mach_is_r2d_plus())
+		platform_device_register(&flash_device);
+
 	spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus));
 
 	return platform_add_devices(rts7751r2d_devices,
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 2bc1c97..a860435 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -424,7 +424,92 @@ CONFIG_FIRMWARE_IN_KERNEL=y
 CONFIG_EXTRA_FIRMWARE=""
 # CONFIG_SYS_HYPERVISOR is not set
 # CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+# CONFIG_MTD_BLKDEVS is not set
+# CONFIG_MTD_BLOCK is not set
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_INTEL_VR_NOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
 # CONFIG_PARPORT is not set
 CONFIG_BLK_DEV=y
 # CONFIG_BLK_CPQ_CISS_DA is not set
-- 
cgit v0.10.2


From 788c9700e7855f8a8cc8875e30d2518b57385c20 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 26 Apr 2009 14:21:59 +0100
Subject: [ARM] Kconfig: sort ARM machine class support choice list by option
 name

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e02b893..a930e5c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -253,6 +253,14 @@ config ARCH_CLPS711X
 	help
 	  Support for Cirrus Logic 711x/721x based boards.
 
+config ARCH_GEMINI
+	bool "Cortina Systems Gemini"
+	select CPU_FA526
+	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Support for the Cortina Systems Gemini family SoCs
+
 config ARCH_EBSA110
 	bool "EBSA-110"
 	select CPU_SA110
@@ -276,14 +284,6 @@ config ARCH_EP93XX
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
 
-config ARCH_GEMINI
-	bool "Cortina Systems Gemini"
-	select CPU_FA526
-	select GENERIC_GPIO
-	select ARCH_REQUIRE_GPIOLIB
-	help
-	  Support for the Cortina Systems Gemini family SoCs
-
 config ARCH_FOOTBRIDGE
 	bool "FootBridge"
 	select CPU_SA110
@@ -292,6 +292,16 @@ config ARCH_FOOTBRIDGE
 	  Support for systems based on the DC21285 companion chip
 	  ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
 
+config ARCH_MXC
+	bool "Freescale MXC/iMX-based"
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select ARCH_MTD_XIP
+	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Support for Freescale MXC/iMX-based family of processors
+
 config ARCH_NETX
 	bool "Hilscher NetX based"
 	select CPU_ARM926T
@@ -404,28 +414,6 @@ config ARCH_KIRKWOOD
 	  Support for the following Marvell Kirkwood series SoCs:
 	  88F6180, 88F6192 and 88F6281.
 
-config ARCH_KS8695
-	bool "Micrel/Kendin KS8695"
-	select CPU_ARM922T
-	select GENERIC_GPIO
-        select ARCH_REQUIRE_GPIOLIB
-	help
-	  Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based
-	  System-on-Chip devices.
-
-config ARCH_NS9XXX
-	bool "NetSilicon NS9xxx"
-	select CPU_ARM926T
-	select GENERIC_GPIO
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
-	select HAVE_CLK
-	help
-	  Say Y here if you intend to run this kernel on a NetSilicon NS9xxx
-	  System.
-
-	  <http://www.digi.com/products/microprocessors/index.jsp>
-
 config ARCH_LOKI
 	bool "Marvell Loki (88RC8480)"
 	select CPU_FEROCEON
@@ -447,16 +435,6 @@ config ARCH_MV78XX0
 	  Support for the following Marvell MV78xx0 series SoCs:
 	  MV781x0, MV782x0.
 
-config ARCH_MXC
-	bool "Freescale MXC/iMX-based"
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
-	select ARCH_MTD_XIP
-	select GENERIC_GPIO
-	select ARCH_REQUIRE_GPIOLIB
-	help
-	  Support for Freescale MXC/iMX-based family of processors
-
 config ARCH_ORION5X
 	bool "Marvell Orion"
 	depends on MMU
@@ -471,6 +449,49 @@ config ARCH_ORION5X
 	  Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182),
 	  Orion-2 (5281), Orion-1-90 (6183).
 
+config ARCH_MMP
+	bool "Marvell PXA168/910"
+	depends on MMU
+	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
+	select HAVE_CLK
+	select COMMON_CLKDEV
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select TICK_ONESHOT
+	select PLAT_PXA
+	help
+	  Support for Marvell's PXA168/910 processor line.
+
+config ARCH_KS8695
+	bool "Micrel/Kendin KS8695"
+	select CPU_ARM922T
+	select GENERIC_GPIO
+        select ARCH_REQUIRE_GPIOLIB
+	help
+	  Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based
+	  System-on-Chip devices.
+
+config ARCH_NS9XXX
+	bool "NetSilicon NS9xxx"
+	select CPU_ARM926T
+	select GENERIC_GPIO
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select HAVE_CLK
+	help
+	  Say Y here if you intend to run this kernel on a NetSilicon NS9xxx
+	  System.
+
+	  <http://www.digi.com/products/microprocessors/index.jsp>
+
+config ARCH_W90X900
+	bool "Nuvoton W90X900 CPU"
+	select CPU_ARM926T
+	help
+		Support for Nuvoton (Winbond logic dept.) ARM9 processor,You
+		can login www.mcuos.com or www.nuvoton.com to know more.
+
 config ARCH_PNX4008
 	bool "Philips Nexperia PNX4008 Mobile"
 	select CPU_ARM926T
@@ -495,19 +516,16 @@ config ARCH_PXA
 	help
 	  Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
 
-config ARCH_MMP
-	bool "Marvell PXA168/910"
-	depends on MMU
-	select GENERIC_GPIO
-	select ARCH_REQUIRE_GPIOLIB
-	select HAVE_CLK
-	select COMMON_CLKDEV
+config ARCH_MSM
+	bool "Qualcomm MSM"
+	select CPU_V6
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
-	select TICK_ONESHOT
-	select PLAT_PXA
 	help
-	  Support for Marvell's PXA168/910 processor line.
+	  Support for Qualcomm MSM7K based systems.  This runs on the ARM11
+	  apps processor of the MSM7K and depends on a shared memory
+	  interface to the ARM9 modem processor which runs the baseband stack
+	  and controls some vital subsystems (clock and power control, etc).
 
 config ARCH_RPC
 	bool "RiscPC"
@@ -598,24 +616,6 @@ config ARCH_OMAP
 	help
 	  Support for TI's OMAP platform (OMAP1 and OMAP2).
 
-config ARCH_MSM
-	bool "Qualcomm MSM"
-	select CPU_V6
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
-	help
-	  Support for Qualcomm MSM7K based systems.  This runs on the ARM11
-	  apps processor of the MSM7K and depends on a shared memory
-	  interface to the ARM9 modem processor which runs the baseband stack
-	  and controls some vital subsystems (clock and power control, etc).
-
-config ARCH_W90X900
-	bool "Nuvoton W90X900 CPU"
-	select CPU_ARM926T
-	help
-		Support for Nuvoton (Winbond logic dept.) ARM9 processor,You
-		can login www.mcuos.com or www.nuvoton.com to know more.
-
 endchoice
 
 source "arch/arm/mach-clps711x/Kconfig"
-- 
cgit v0.10.2


From 9bef5de1e0f8915547124082e5c27c63cfa5c2fd Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:51:15 +0100
Subject: [ARM] 5461/1: Freescale STMP platform support

Header files for STMP37xx boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp37xx/include/mach/entry-macro.S b/arch/arm/mach-stmp37xx/include/mach/entry-macro.S
new file mode 100644
index 0000000..fed2787
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/entry-macro.S
@@ -0,0 +1,37 @@
+/*
+ * Low-level IRQ helper macros for Freescale STMP37XX
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+		.macro	disable_fiq
+		.endm
+
+		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
+
+	        mov	\base, #0xf0000000	@ vm address of IRQ controller
+		ldr	\irqnr, [\base, #0x30]	@ HW_ICOLL_STAT
+		cmp	\irqnr, #0x3f
+		movne	\irqstat, #0		@ Ack this IRQ
+		strne	\irqstat, [\base, #0x00]@ HW_ICOLL_VECTOR
+		moveqs	\irqnr, #0		@ Zero flag set for no IRQ
+
+		.endm
+
+                .macro  get_irqnr_preamble, base, tmp
+                .endm
+
+                .macro  arch_ret_to_user, tmp1, tmp2
+                .endm
diff --git a/arch/arm/mach-stmp37xx/include/mach/irqs.h b/arch/arm/mach-stmp37xx/include/mach/irqs.h
new file mode 100644
index 0000000..98f1293
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/irqs.h
@@ -0,0 +1,99 @@
+/*
+ * Freescale STMP37XX interrupts
+ *
+ * Copyright (C) 2005 Sigmatel Inc
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef _ASM_ARCH_IRQS_H
+#define _ASM_ARCH_IRQS_H
+
+#define IRQ_DEBUG_UART	         0
+#define IRQ_COMMS_RX	           1
+#define IRQ_COMMS_TX	           1
+#define IRQ_SSP2_ERROR	         2
+#define IRQ_VDD5V	              3
+#define IRQ_HEADPHONE_SHORT	    4
+#define IRQ_DAC_DMA	            5
+#define IRQ_DAC_ERROR	          6
+#define IRQ_ADC_DMA	            7
+#define IRQ_ADC_ERROR	          8
+#define IRQ_SPDIF_DMA	          9
+#define IRQ_SAIF2_DMA	          9
+#define IRQ_SPDIF_ERROR	        10
+#define IRQ_SAIF1_IRQ	          10
+#define IRQ_SAIF2_IRQ	          10
+#define IRQ_USB_CTRL	           11
+#define IRQ_USB_WAKEUP	         12
+#define IRQ_GPMI_DMA	           13
+#define IRQ_SSP1_DMA	           14
+#define IRQ_SSP_ERROR	          15
+#define IRQ_GPIO0	              16
+#define IRQ_GPIO1	              17
+#define IRQ_GPIO2	              18
+#define IRQ_SAIF1_DMA	          19
+#define IRQ_SSP2_DMA	           20
+#define IRQ_ECC8_IRQ	           21
+#define IRQ_RTC_ALARM	          22
+#define IRQ_UARTAPP_TX_DMA	     23
+#define IRQ_UARTAPP_INTERNAL	   24
+#define IRQ_UARTAPP_RX_DMA	     25
+#define IRQ_I2C_DMA	            26
+#define IRQ_I2C_ERROR	          27
+#define IRQ_TIMER0	             28
+#define IRQ_TIMER1	             29
+#define IRQ_TIMER2	             30
+#define IRQ_TIMER3	             31
+#define IRQ_BATT_BRNOUT	        32
+#define IRQ_VDDD_BRNOUT	        33
+#define IRQ_VDDIO_BRNOUT	       34
+#define IRQ_VDD18_BRNOUT	       35
+#define IRQ_TOUCH_DETECT	       36
+#define IRQ_LRADC_CH0	          37
+#define IRQ_LRADC_CH1	          38
+#define IRQ_LRADC_CH2	          39
+#define IRQ_LRADC_CH3	          40
+#define IRQ_LRADC_CH4	          41
+#define IRQ_LRADC_CH5	          42
+#define IRQ_LRADC_CH6	          43
+#define IRQ_LRADC_CH7	          44
+#define IRQ_LCDIF_DMA	          45
+#define IRQ_LCDIF_ERROR	        46
+#define IRQ_DIGCTL_DEBUG_TRAP	  47
+#define IRQ_RTC_1MSEC	          48
+#define IRQ_DRI_DMA	            49
+#define IRQ_DRI_ATTENTION	      50
+#define IRQ_GPMI_ATTENTION	     51
+#define IRQ_IR	                 52
+#define IRQ_DCP_VMI	            53
+#define IRQ_DCP	                54
+#define IRQ_RESERVED_55	        55
+#define IRQ_RESERVED_56	        56
+#define IRQ_RESERVED_57	        57
+#define IRQ_RESERVED_58	        58
+#define IRQ_RESERVED_59	        59
+#define SW_IRQ_60	              60
+#define SW_IRQ_61	              61
+#define SW_IRQ_62	              62
+#define SW_IRQ_63	              63
+
+#define NR_REAL_IRQS		64
+#define NR_IRQS			(NR_REAL_IRQS + 32 * 3)
+
+/* TIMER and BRNOUT are FIQ capable */
+#define FIQ_START			IRQ_TIMER0
+
+/* Hard disk IRQ is a GPMI attention IRQ */
+#define IRQ_HARDDISK		IRQ_GPMI_ATTENTION
+
+#endif /* _ASM_ARCH_IRQS_H */
diff --git a/arch/arm/mach-stmp37xx/include/mach/pins.h b/arch/arm/mach-stmp37xx/include/mach/pins.h
new file mode 100644
index 0000000..d56de0c
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/pins.h
@@ -0,0 +1,147 @@
+/*
+ * Freescale STMP37XX SoC pin multiplexing
+ *
+ * Author: Vladislav Buzov <vbuzov@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_ARCH_PINS_H
+#define __ASM_ARCH_PINS_H
+
+/*
+ * Define all STMP37XX pins, a pin name corresponds to a STMP37xx hardware
+ * interface  this pin belongs to.
+ */
+
+/* Bank 0 */
+#define PINID_GPMI_D00		STMP3XXX_PINID(0, 0)
+#define PINID_GPMI_D01		STMP3XXX_PINID(0, 1)
+#define PINID_GPMI_D02		STMP3XXX_PINID(0, 2)
+#define PINID_GPMI_D03		STMP3XXX_PINID(0, 3)
+#define PINID_GPMI_D04		STMP3XXX_PINID(0, 4)
+#define PINID_GPMI_D05		STMP3XXX_PINID(0, 5)
+#define PINID_GPMI_D06		STMP3XXX_PINID(0, 6)
+#define PINID_GPMI_D07		STMP3XXX_PINID(0, 7)
+#define PINID_GPMI_D08		STMP3XXX_PINID(0, 8)
+#define PINID_GPMI_D09		STMP3XXX_PINID(0, 9)
+#define PINID_GPMI_D10		STMP3XXX_PINID(0, 10)
+#define PINID_GPMI_D11		STMP3XXX_PINID(0, 11)
+#define PINID_GPMI_D12		STMP3XXX_PINID(0, 12)
+#define PINID_GPMI_D13		STMP3XXX_PINID(0, 13)
+#define PINID_GPMI_D14		STMP3XXX_PINID(0, 14)
+#define PINID_GPMI_D15		STMP3XXX_PINID(0, 15)
+#define PINID_GPMI_A0		STMP3XXX_PINID(0, 16)
+#define PINID_GPMI_A1		STMP3XXX_PINID(0, 17)
+#define PINID_GPMI_A2		STMP3XXX_PINID(0, 18)
+#define PINID_GPMI_RDY0		STMP3XXX_PINID(0, 19)
+#define PINID_GPMI_RDY2		STMP3XXX_PINID(0, 20)
+#define PINID_GPMI_RDY3		STMP3XXX_PINID(0, 21)
+#define PINID_GPMI_RESETN	STMP3XXX_PINID(0, 22)
+#define PINID_GPMI_IRQ		STMP3XXX_PINID(0, 23)
+#define PINID_GPMI_WRN		STMP3XXX_PINID(0, 24)
+#define PINID_GPMI_RDN		STMP3XXX_PINID(0, 25)
+#define PINID_UART2_CTS		STMP3XXX_PINID(0, 26)
+#define PINID_UART2_RTS		STMP3XXX_PINID(0, 27)
+#define PINID_UART2_RX		STMP3XXX_PINID(0, 28)
+#define PINID_UART2_TX		STMP3XXX_PINID(0, 29)
+
+/* Bank 1 */
+#define PINID_LCD_D00		STMP3XXX_PINID(1, 0)
+#define PINID_LCD_D01		STMP3XXX_PINID(1, 1)
+#define PINID_LCD_D02		STMP3XXX_PINID(1, 2)
+#define PINID_LCD_D03		STMP3XXX_PINID(1, 3)
+#define PINID_LCD_D04		STMP3XXX_PINID(1, 4)
+#define PINID_LCD_D05		STMP3XXX_PINID(1, 5)
+#define PINID_LCD_D06		STMP3XXX_PINID(1, 6)
+#define PINID_LCD_D07		STMP3XXX_PINID(1, 7)
+#define PINID_LCD_D08		STMP3XXX_PINID(1, 8)
+#define PINID_LCD_D09		STMP3XXX_PINID(1, 9)
+#define PINID_LCD_D10		STMP3XXX_PINID(1, 10)
+#define PINID_LCD_D11		STMP3XXX_PINID(1, 11)
+#define PINID_LCD_D12		STMP3XXX_PINID(1, 12)
+#define PINID_LCD_D13		STMP3XXX_PINID(1, 13)
+#define PINID_LCD_D14		STMP3XXX_PINID(1, 14)
+#define PINID_LCD_D15		STMP3XXX_PINID(1, 15)
+#define PINID_LCD_RESET 	STMP3XXX_PINID(1, 16)
+#define PINID_LCD_RS		STMP3XXX_PINID(1, 17)
+#define PINID_LCD_WR_RWN	STMP3XXX_PINID(1, 18)
+#define PINID_LCD_RD_E		STMP3XXX_PINID(1, 19)
+#define PINID_LCD_CS		STMP3XXX_PINID(1, 20)
+#define PINID_LCD_BUSY		STMP3XXX_PINID(1, 21)
+#define PINID_SSP1_CMD		STMP3XXX_PINID(1, 22)
+#define PINID_SSP1_SCK		STMP3XXX_PINID(1, 23)
+#define PINID_SSP1_DATA0	STMP3XXX_PINID(1, 24)
+#define PINID_SSP1_DATA1	STMP3XXX_PINID(1, 25)
+#define PINID_SSP1_DATA2	STMP3XXX_PINID(1, 26)
+#define PINID_SSP1_DATA3	STMP3XXX_PINID(1, 27)
+#define PINID_SSP1_DETECT	STMP3XXX_PINID(1, 28)
+
+/* Bank 2 */
+#define PINID_PWM0		STMP3XXX_PINID(2, 0)
+#define PINID_PWM1		STMP3XXX_PINID(2, 1)
+#define PINID_PWM2		STMP3XXX_PINID(2, 2)
+#define PINID_PWM3		STMP3XXX_PINID(2, 3)
+#define PINID_PWM4		STMP3XXX_PINID(2, 4)
+#define PINID_I2C_SCL		STMP3XXX_PINID(2, 5)
+#define PINID_I2C_SDA		STMP3XXX_PINID(2, 6)
+#define PINID_ROTTARYA		STMP3XXX_PINID(2, 7)
+#define PINID_ROTTARYB		STMP3XXX_PINID(2, 8)
+#define PINID_EMI_CKE		STMP3XXX_PINID(2, 9)
+#define PINID_EMI_RASN		STMP3XXX_PINID(2, 10)
+#define PINID_EMI_CASN		STMP3XXX_PINID(2, 11)
+#define PINID_EMI_CE0N		STMP3XXX_PINID(2, 12)
+#define PINID_EMI_CE1N		STMP3XXX_PINID(2, 13)
+#define PINID_EMI_CE2N		STMP3XXX_PINID(2, 14)
+#define PINID_EMI_CE3N		STMP3XXX_PINID(2, 15)
+#define PINID_EMI_A00		STMP3XXX_PINID(2, 16)
+#define PINID_EMI_A01		STMP3XXX_PINID(2, 17)
+#define PINID_EMI_A02		STMP3XXX_PINID(2, 18)
+#define PINID_EMI_A03		STMP3XXX_PINID(2, 19)
+#define PINID_EMI_A04		STMP3XXX_PINID(2, 20)
+#define PINID_EMI_A05		STMP3XXX_PINID(2, 21)
+#define PINID_EMI_A06		STMP3XXX_PINID(2, 22)
+#define PINID_EMI_A07		STMP3XXX_PINID(2, 23)
+#define PINID_EMI_A08		STMP3XXX_PINID(2, 24)
+#define PINID_EMI_A09		STMP3XXX_PINID(2, 25)
+#define PINID_EMI_A10		STMP3XXX_PINID(2, 26)
+#define PINID_EMI_A11		STMP3XXX_PINID(2, 27)
+#define PINID_EMI_A12		STMP3XXX_PINID(2, 28)
+#define PINID_EMI_A13		STMP3XXX_PINID(2, 29)
+#define PINID_EMI_A14		STMP3XXX_PINID(2, 30)
+#define PINID_EMI_WEN		STMP3XXX_PINID(2, 31)
+
+/* Bank 3 */
+#define PINID_EMI_D00		STMP3XXX_PINID(3, 0)
+#define PINID_EMI_D01		STMP3XXX_PINID(3, 1)
+#define PINID_EMI_D02		STMP3XXX_PINID(3, 2)
+#define PINID_EMI_D03		STMP3XXX_PINID(3, 3)
+#define PINID_EMI_D04		STMP3XXX_PINID(3, 4)
+#define PINID_EMI_D05		STMP3XXX_PINID(3, 5)
+#define PINID_EMI_D06		STMP3XXX_PINID(3, 6)
+#define PINID_EMI_D07		STMP3XXX_PINID(3, 7)
+#define PINID_EMI_D08		STMP3XXX_PINID(3, 8)
+#define PINID_EMI_D09		STMP3XXX_PINID(3, 9)
+#define PINID_EMI_D10		STMP3XXX_PINID(3, 10)
+#define PINID_EMI_D11		STMP3XXX_PINID(3, 11)
+#define PINID_EMI_D12		STMP3XXX_PINID(3, 12)
+#define PINID_EMI_D13		STMP3XXX_PINID(3, 13)
+#define PINID_EMI_D14		STMP3XXX_PINID(3, 14)
+#define PINID_EMI_D15		STMP3XXX_PINID(3, 15)
+#define PINID_EMI_DQS0		STMP3XXX_PINID(3, 16)
+#define PINID_EMI_DQS1		STMP3XXX_PINID(3, 17)
+#define PINID_EMI_DQM0		STMP3XXX_PINID(3, 18)
+#define PINID_EMI_DQM1		STMP3XXX_PINID(3, 19)
+#define PINID_EMI_CLK		STMP3XXX_PINID(3, 20)
+#define PINID_EMI_CLKN		STMP3XXX_PINID(3, 21)
+
+#endif /* __ASM_ARCH_PINS_H */
-- 
cgit v0.10.2


From b4175b89921fefb2f352472fa6dccb0fc4fb37d9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 26 Apr 2009 14:22:29 +0100
Subject: [ARM] sort machine- and plat- by CONFIG* name

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e84729b..885a837 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -99,64 +99,69 @@ CHECKFLAGS	+= -D__arm__
 #Default value
 head-y		:= arch/arm/kernel/head$(MMUEXT).o arch/arm/kernel/init_task.o
 textofs-y	:= 0x00008000
-
- machine-$(CONFIG_ARCH_RPC)	   := rpc
- machine-$(CONFIG_ARCH_EBSA110)	   := ebsa110
- machine-$(CONFIG_FOOTBRIDGE)	   := footbridge
- machine-$(CONFIG_ARCH_SHARK)	   := shark
- machine-$(CONFIG_ARCH_SA1100)	   := sa1100
-ifeq ($(CONFIG_ARCH_SA1100),y)
+textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
 # SA1111 DMA bug: we don't want the kernel to live in precious DMA-able memory
- textofs-$(CONFIG_SA1111)	   := 0x00208000
+ifeq ($(CONFIG_ARCH_SA1100),y)
+textofs-$(CONFIG_SA1111) := 0x00208000
 endif
- machine-$(CONFIG_ARCH_PXA)	   := pxa
- machine-$(CONFIG_ARCH_MMP)	   := mmp
-    plat-$(CONFIG_PLAT_PXA)	   := pxa
- machine-$(CONFIG_ARCH_L7200)	   := l7200
- machine-$(CONFIG_ARCH_INTEGRATOR) := integrator
- machine-$(CONFIG_ARCH_GEMINI)     := gemini
- textofs-$(CONFIG_ARCH_CLPS711X)   := 0x00028000
- machine-$(CONFIG_ARCH_CLPS711X)   := clps711x
- machine-$(CONFIG_ARCH_IOP32X)	   := iop32x
- machine-$(CONFIG_ARCH_IOP33X)	   := iop33x
- machine-$(CONFIG_ARCH_IOP13XX)	   := iop13xx
-    plat-$(CONFIG_PLAT_IOP)	   := iop
- machine-$(CONFIG_ARCH_IXP4XX)	   := ixp4xx
- machine-$(CONFIG_ARCH_IXP2000)    := ixp2000
- machine-$(CONFIG_ARCH_IXP23XX)    := ixp23xx
- machine-$(CONFIG_ARCH_OMAP1)	   := omap1
- machine-$(CONFIG_ARCH_OMAP2)	   := omap2
- machine-$(CONFIG_ARCH_OMAP3)	   := omap2
-    plat-$(CONFIG_ARCH_OMAP)	   := omap
- machine-$(CONFIG_ARCH_S3C2410)	   := s3c2410 s3c2400 s3c2412 s3c2440 s3c2442 s3c2443
- machine-$(CONFIG_ARCH_S3C24A0)	   := s3c24a0
-    plat-$(CONFIG_PLAT_S3C24XX)	   := s3c24xx s3c
- machine-$(CONFIG_ARCH_S3C64XX)	   := s3c6400 s3c6410
-    plat-$(CONFIG_PLAT_S3C64XX)	   := s3c64xx s3c
- machine-$(CONFIG_ARCH_LH7A40X)	   := lh7a40x
- machine-$(CONFIG_ARCH_VERSATILE)  := versatile
- machine-$(CONFIG_ARCH_IMX)	   := imx
- machine-$(CONFIG_ARCH_H720X)	   := h720x
- machine-$(CONFIG_ARCH_AAEC2000)   := aaec2000
- machine-$(CONFIG_ARCH_REALVIEW)   := realview
- machine-$(CONFIG_ARCH_AT91)	   := at91
- machine-$(CONFIG_ARCH_EP93XX)	   := ep93xx
- machine-$(CONFIG_ARCH_PNX4008)	   := pnx4008
- machine-$(CONFIG_ARCH_NETX)	   := netx
- machine-$(CONFIG_ARCH_NS9XXX)	   := ns9xxx
- machine-$(CONFIG_ARCH_DAVINCI)	   := davinci
- machine-$(CONFIG_ARCH_KIRKWOOD)   := kirkwood
- machine-$(CONFIG_ARCH_KS8695)     := ks8695
-    plat-$(CONFIG_ARCH_MXC)	   := mxc
- machine-$(CONFIG_ARCH_MX2)	   := mx2
- machine-$(CONFIG_ARCH_MX3)	   := mx3
- machine-$(CONFIG_ARCH_MX1)	   := mx1
- machine-$(CONFIG_ARCH_ORION5X)	   := orion5x
-    plat-$(CONFIG_PLAT_ORION)	   := orion
- machine-$(CONFIG_ARCH_MSM)	   := msm
- machine-$(CONFIG_ARCH_LOKI)       := loki
- machine-$(CONFIG_ARCH_MV78XX0)    := mv78xx0
- machine-$(CONFIG_ARCH_W90X900)    := w90x900
+
+# Machine directory name.  This list is sorted alphanumerically
+# by CONFIG_* macro name.
+machine-$(CONFIG_ARCH_AAEC2000)		:= aaec2000
+machine-$(CONFIG_ARCH_AT91)		:= at91
+machine-$(CONFIG_ARCH_CLPS711X)		:= clps711x
+machine-$(CONFIG_ARCH_DAVINCI)		:= davinci
+machine-$(CONFIG_ARCH_EBSA110)		:= ebsa110
+machine-$(CONFIG_ARCH_EP93XX)		:= ep93xx
+machine-$(CONFIG_ARCH_GEMINI)		:= gemini
+machine-$(CONFIG_ARCH_H720X)		:= h720x
+machine-$(CONFIG_ARCH_IMX)		:= imx
+machine-$(CONFIG_ARCH_INTEGRATOR)	:= integrator
+machine-$(CONFIG_ARCH_IOP13XX)		:= iop13xx
+machine-$(CONFIG_ARCH_IOP32X)		:= iop32x
+machine-$(CONFIG_ARCH_IOP33X)		:= iop33x
+machine-$(CONFIG_ARCH_IXP2000)		:= ixp2000
+machine-$(CONFIG_ARCH_IXP23XX)		:= ixp23xx
+machine-$(CONFIG_ARCH_IXP4XX)		:= ixp4xx
+machine-$(CONFIG_ARCH_KIRKWOOD)		:= kirkwood
+machine-$(CONFIG_ARCH_KS8695)		:= ks8695
+machine-$(CONFIG_ARCH_L7200)		:= l7200
+machine-$(CONFIG_ARCH_LH7A40X)		:= lh7a40x
+machine-$(CONFIG_ARCH_LOKI) 		:= loki
+machine-$(CONFIG_ARCH_MMP)		:= mmp
+machine-$(CONFIG_ARCH_MSM)		:= msm
+machine-$(CONFIG_ARCH_MV78XX0)		:= mv78xx0
+machine-$(CONFIG_ARCH_MX1)		:= mx1
+machine-$(CONFIG_ARCH_MX2)		:= mx2
+machine-$(CONFIG_ARCH_MX3)		:= mx3
+machine-$(CONFIG_ARCH_NETX)		:= netx
+machine-$(CONFIG_ARCH_NS9XXX)		:= ns9xxx
+machine-$(CONFIG_ARCH_OMAP1)		:= omap1
+machine-$(CONFIG_ARCH_OMAP2)		:= omap2
+machine-$(CONFIG_ARCH_OMAP3)		:= omap2
+machine-$(CONFIG_ARCH_ORION5X)		:= orion5x
+machine-$(CONFIG_ARCH_PNX4008)		:= pnx4008
+machine-$(CONFIG_ARCH_PXA)		:= pxa
+machine-$(CONFIG_ARCH_REALVIEW)		:= realview
+machine-$(CONFIG_ARCH_RPC)		:= rpc
+machine-$(CONFIG_ARCH_S3C2410)		:= s3c2410 s3c2400 s3c2412 s3c2440 s3c2442 s3c2443
+machine-$(CONFIG_ARCH_S3C24A0)		:= s3c24a0
+machine-$(CONFIG_ARCH_S3C64XX)		:= s3c6400 s3c6410
+machine-$(CONFIG_ARCH_SA1100)		:= sa1100
+machine-$(CONFIG_ARCH_SHARK)		:= shark
+machine-$(CONFIG_ARCH_VERSATILE)	:= versatile
+machine-$(CONFIG_ARCH_W90X900)		:= w90x900
+machine-$(CONFIG_FOOTBRIDGE)		:= footbridge
+
+# Platform directory name.  This list is sorted alphanumerically
+# by CONFIG_* macro name.
+plat-$(CONFIG_ARCH_MXC)		:= mxc
+plat-$(CONFIG_ARCH_OMAP)	:= omap
+plat-$(CONFIG_PLAT_IOP)		:= iop
+plat-$(CONFIG_PLAT_ORION)	:= orion
+plat-$(CONFIG_PLAT_PXA)		:= pxa
+plat-$(CONFIG_PLAT_S3C24XX)	:= s3c24xx s3c
+plat-$(CONFIG_PLAT_S3C64XX)	:= s3c64xx s3c
 
 ifeq ($(CONFIG_ARCH_EBSA110),y)
 # This is what happens if you forget the IOCS16 line.
-- 
cgit v0.10.2


From 1e3dd535d641a856e913dd8a17a75bd3c36c49e0 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:52:45 +0100
Subject: [ARM] 5469/1: Freescale STMP platform support [2/10]

Headers for STMP378x boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/include/mach/entry-macro.S b/arch/arm/mach-stmp378x/include/mach/entry-macro.S
new file mode 100644
index 0000000..731a922
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/entry-macro.S
@@ -0,0 +1,35 @@
+/*
+ * Low-level IRQ helper macros for Freescale STMP378X
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+		.macro	disable_fiq
+		.endm
+
+		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
+
+	        mov	\base, #0xf0000000	@ vm address of IRQ controller
+		ldr	\irqnr, [\base, #0x70]	@ HW_ICOLL_STAT
+		cmp	\irqnr, #0x7f
+		moveqs	\irqnr, #0		@ Zero flag set for no IRQ
+
+		.endm
+
+                .macro  get_irqnr_preamble, base, tmp
+                .endm
+
+                .macro  arch_ret_to_user, tmp1, tmp2
+                .endm
diff --git a/arch/arm/mach-stmp378x/include/mach/irqs.h b/arch/arm/mach-stmp378x/include/mach/irqs.h
new file mode 100644
index 0000000..cc59673
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/irqs.h
@@ -0,0 +1,95 @@
+/*
+ * Freescale STMP378X interrupts
+ *
+ * Copyright (C) 2005 Sigmatel Inc
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#define IRQ_DEBUG_UART			0
+#define IRQ_COMMS_RX			1
+#define IRQ_COMMS_TX			1
+#define IRQ_SSP2_ERROR			2
+#define IRQ_VDD5V			3
+#define IRQ_HEADPHONE_SHORT		4
+#define IRQ_DAC_DMA			5
+#define IRQ_DAC_ERROR			6
+#define IRQ_ADC_DMA			7
+#define IRQ_ADC_ERROR			8
+#define IRQ_SPDIF_DMA			9
+#define IRQ_SAIF2_DMA			9
+#define IRQ_SPDIF_ERROR			10
+#define IRQ_SAIF1_IRQ			10
+#define IRQ_SAIF2_IRQ			10
+#define IRQ_USB_CTRL			11
+#define IRQ_USB_WAKEUP			12
+#define IRQ_GPMI_DMA			13
+#define IRQ_SSP1_DMA			14
+#define IRQ_SSP_ERROR			15
+#define IRQ_GPIO0			16
+#define IRQ_GPIO1			17
+#define IRQ_GPIO2			18
+#define IRQ_SAIF1_DMA			19
+#define IRQ_SSP2_DMA			20
+#define IRQ_ECC8_IRQ			21
+#define IRQ_RTC_ALARM			22
+#define IRQ_UARTAPP_TX_DMA		23
+#define IRQ_UARTAPP_INTERNAL		24
+#define IRQ_UARTAPP_RX_DMA		25
+#define IRQ_I2C_DMA			26
+#define IRQ_I2C_ERROR			27
+#define IRQ_TIMER0			28
+#define IRQ_TIMER1			29
+#define IRQ_TIMER2			30
+#define IRQ_TIMER3			31
+#define IRQ_BATT_BRNOUT			32
+#define IRQ_VDDD_BRNOUT			33
+#define IRQ_VDDIO_BRNOUT		34
+#define IRQ_VDD18_BRNOUT		35
+#define IRQ_TOUCH_DETECT		36
+#define IRQ_LRADC_CH0			37
+#define IRQ_LRADC_CH1			38
+#define IRQ_LRADC_CH2			39
+#define IRQ_LRADC_CH3			40
+#define IRQ_LRADC_CH4			41
+#define IRQ_LRADC_CH5			42
+#define IRQ_LRADC_CH6			43
+#define IRQ_LRADC_CH7			44
+#define IRQ_LCDIF_DMA			45
+#define IRQ_LCDIF_ERROR			46
+#define IRQ_DIGCTL_DEBUG_TRAP		47
+#define IRQ_RTC_1MSEC			48
+#define IRQ_DRI_DMA			49
+#define IRQ_DRI_ATTENTION		50
+#define IRQ_GPMI_ATTENTION		51
+#define IRQ_IR				52
+#define IRQ_DCP_VMI			53
+#define IRQ_DCP				54
+#define IRQ_BCH				56
+#define IRQ_PXP				57
+#define IRQ_UARTAPP2_TX_DMA		58
+#define IRQ_UARTAPP2_INTERNAL		59
+#define IRQ_UARTAPP2_RX_DMA		60
+#define IRQ_VDAC_DETECT			61
+#define IRQ_VDD5V_DROOP			64
+#define IRQ_DCDC4P2_BO			65
+
+
+#define NR_REAL_IRQS	128
+#define NR_IRQS		(NR_REAL_IRQS + 32 * 3)
+
+/* All interrupts are FIQ capable */
+#define FIQ_START		IRQ_DEBUG_UART
+
+/* Hard disk IRQ is a GPMI attention IRQ */
+#define IRQ_HARDDISK		IRQ_GPMI_ATTENTION
diff --git a/arch/arm/mach-stmp378x/include/mach/pins.h b/arch/arm/mach-stmp378x/include/mach/pins.h
new file mode 100644
index 0000000..93f952d
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/pins.h
@@ -0,0 +1,151 @@
+/*
+ * Freescale STMP378X SoC pin multiplexing
+ *
+ * Author: Vladislav Buzov <vbuzov@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_ARCH_PINS_H
+#define __ASM_ARCH_PINS_H
+
+/*
+ * Define all STMP378x pins, a pin name corresponds to a STMP378x hardware
+ * interface  this pin belongs to.
+ */
+
+/* Bank 0 */
+#define PINID_GPMI_D00		STMP3XXX_PINID(0, 0)
+#define PINID_GPMI_D01		STMP3XXX_PINID(0, 1)
+#define PINID_GPMI_D02		STMP3XXX_PINID(0, 2)
+#define PINID_GPMI_D03		STMP3XXX_PINID(0, 3)
+#define PINID_GPMI_D04		STMP3XXX_PINID(0, 4)
+#define PINID_GPMI_D05		STMP3XXX_PINID(0, 5)
+#define PINID_GPMI_D06		STMP3XXX_PINID(0, 6)
+#define PINID_GPMI_D07		STMP3XXX_PINID(0, 7)
+#define PINID_GPMI_D08		STMP3XXX_PINID(0, 8)
+#define PINID_GPMI_D09		STMP3XXX_PINID(0, 9)
+#define PINID_GPMI_D10		STMP3XXX_PINID(0, 10)
+#define PINID_GPMI_D11		STMP3XXX_PINID(0, 11)
+#define PINID_GPMI_D12		STMP3XXX_PINID(0, 12)
+#define PINID_GPMI_D13		STMP3XXX_PINID(0, 13)
+#define PINID_GPMI_D14		STMP3XXX_PINID(0, 14)
+#define PINID_GPMI_D15		STMP3XXX_PINID(0, 15)
+#define PINID_GPMI_CLE		STMP3XXX_PINID(0, 16)
+#define PINID_GPMI_ALE		STMP3XXX_PINID(0, 17)
+#define PINID_GMPI_CE2N		STMP3XXX_PINID(0, 18)
+#define PINID_GPMI_RDY0		STMP3XXX_PINID(0, 19)
+#define PINID_GPMI_RDY1		STMP3XXX_PINID(0, 20)
+#define PINID_GPMI_RDY2		STMP3XXX_PINID(0, 21)
+#define PINID_GPMI_RDY3		STMP3XXX_PINID(0, 22)
+#define PINID_GPMI_WPN		STMP3XXX_PINID(0, 23)
+#define PINID_GPMI_WRN		STMP3XXX_PINID(0, 24)
+#define PINID_GPMI_RDN		STMP3XXX_PINID(0, 25)
+#define PINID_AUART1_CTS	STMP3XXX_PINID(0, 26)
+#define PINID_AUART1_RTS	STMP3XXX_PINID(0, 27)
+#define PINID_AUART1_RX		STMP3XXX_PINID(0, 28)
+#define PINID_AUART1_TX		STMP3XXX_PINID(0, 29)
+#define PINID_I2C_SCL		STMP3XXX_PINID(0, 30)
+#define PINID_I2C_SDA		STMP3XXX_PINID(0, 31)
+
+/* Bank 1 */
+#define PINID_LCD_D00		STMP3XXX_PINID(1, 0)
+#define PINID_LCD_D01		STMP3XXX_PINID(1, 1)
+#define PINID_LCD_D02		STMP3XXX_PINID(1, 2)
+#define PINID_LCD_D03		STMP3XXX_PINID(1, 3)
+#define PINID_LCD_D04		STMP3XXX_PINID(1, 4)
+#define PINID_LCD_D05		STMP3XXX_PINID(1, 5)
+#define PINID_LCD_D06		STMP3XXX_PINID(1, 6)
+#define PINID_LCD_D07		STMP3XXX_PINID(1, 7)
+#define PINID_LCD_D08		STMP3XXX_PINID(1, 8)
+#define PINID_LCD_D09		STMP3XXX_PINID(1, 9)
+#define PINID_LCD_D10		STMP3XXX_PINID(1, 10)
+#define PINID_LCD_D11		STMP3XXX_PINID(1, 11)
+#define PINID_LCD_D12		STMP3XXX_PINID(1, 12)
+#define PINID_LCD_D13		STMP3XXX_PINID(1, 13)
+#define PINID_LCD_D14		STMP3XXX_PINID(1, 14)
+#define PINID_LCD_D15		STMP3XXX_PINID(1, 15)
+#define PINID_LCD_D16		STMP3XXX_PINID(1, 16)
+#define PINID_LCD_D17		STMP3XXX_PINID(1, 17)
+#define PINID_LCD_RESET		STMP3XXX_PINID(1, 18)
+#define PINID_LCD_RS		STMP3XXX_PINID(1, 19)
+#define PINID_LCD_WR		STMP3XXX_PINID(1, 20)
+#define PINID_LCD_CS		STMP3XXX_PINID(1, 21)
+#define PINID_LCD_DOTCK		STMP3XXX_PINID(1, 22)
+#define PINID_LCD_ENABLE	STMP3XXX_PINID(1, 23)
+#define PINID_LCD_HSYNC		STMP3XXX_PINID(1, 24)
+#define PINID_LCD_VSYNC		STMP3XXX_PINID(1, 25)
+#define PINID_PWM0		STMP3XXX_PINID(1, 26)
+#define PINID_PWM1		STMP3XXX_PINID(1, 27)
+#define PINID_PWM2		STMP3XXX_PINID(1, 28)
+#define PINID_PWM3		STMP3XXX_PINID(1, 29)
+#define PINID_PWM4		STMP3XXX_PINID(1, 30)
+
+/* Bank 2 */
+#define PINID_SSP1_CMD		STMP3XXX_PINID(2, 0)
+#define PINID_SSP1_DETECT	STMP3XXX_PINID(2, 1)
+#define PINID_SSP1_DATA0	STMP3XXX_PINID(2, 2)
+#define PINID_SSP1_DATA1	STMP3XXX_PINID(2, 3)
+#define PINID_SSP1_DATA2	STMP3XXX_PINID(2, 4)
+#define PINID_SSP1_DATA3	STMP3XXX_PINID(2, 5)
+#define PINID_SSP1_SCK		STMP3XXX_PINID(2, 6)
+#define PINID_ROTARYA		STMP3XXX_PINID(2, 7)
+#define PINID_ROTARYB		STMP3XXX_PINID(2, 8)
+#define PINID_EMI_A00		STMP3XXX_PINID(2, 9)
+#define PINID_EMI_A01		STMP3XXX_PINID(2, 10)
+#define PINID_EMI_A02		STMP3XXX_PINID(2, 11)
+#define PINID_EMI_A03		STMP3XXX_PINID(2, 12)
+#define PINID_EMI_A04		STMP3XXX_PINID(2, 13)
+#define PINID_EMI_A05		STMP3XXX_PINID(2, 14)
+#define PINID_EMI_A06		STMP3XXX_PINID(2, 15)
+#define PINID_EMI_A07		STMP3XXX_PINID(2, 16)
+#define PINID_EMI_A08		STMP3XXX_PINID(2, 17)
+#define PINID_EMI_A09		STMP3XXX_PINID(2, 18)
+#define PINID_EMI_A10		STMP3XXX_PINID(2, 19)
+#define PINID_EMI_A11		STMP3XXX_PINID(2, 20)
+#define PINID_EMI_A12		STMP3XXX_PINID(2, 21)
+#define PINID_EMI_BA0		STMP3XXX_PINID(2, 22)
+#define PINID_EMI_BA1		STMP3XXX_PINID(2, 23)
+#define PINID_EMI_CASN		STMP3XXX_PINID(2, 24)
+#define PINID_EMI_CE0N		STMP3XXX_PINID(2, 25)
+#define PINID_EMI_CE1N		STMP3XXX_PINID(2, 26)
+#define PINID_GPMI_CE1N		STMP3XXX_PINID(2, 27)
+#define PINID_GPMI_CE0N		STMP3XXX_PINID(2, 28)
+#define PINID_EMI_CKE		STMP3XXX_PINID(2, 29)
+#define PINID_EMI_RASN		STMP3XXX_PINID(2, 30)
+#define PINID_EMI_WEN		STMP3XXX_PINID(2, 31)
+
+/* Bank 3 */
+#define PINID_EMI_D00		STMP3XXX_PINID(3, 0)
+#define PINID_EMI_D01		STMP3XXX_PINID(3, 1)
+#define PINID_EMI_D02		STMP3XXX_PINID(3, 2)
+#define PINID_EMI_D03		STMP3XXX_PINID(3, 3)
+#define PINID_EMI_D04		STMP3XXX_PINID(3, 4)
+#define PINID_EMI_D05		STMP3XXX_PINID(3, 5)
+#define PINID_EMI_D06		STMP3XXX_PINID(3, 6)
+#define PINID_EMI_D07		STMP3XXX_PINID(3, 7)
+#define PINID_EMI_D08		STMP3XXX_PINID(3, 8)
+#define PINID_EMI_D09		STMP3XXX_PINID(3, 9)
+#define PINID_EMI_D10		STMP3XXX_PINID(3, 10)
+#define PINID_EMI_D11		STMP3XXX_PINID(3, 11)
+#define PINID_EMI_D12		STMP3XXX_PINID(3, 12)
+#define PINID_EMI_D13		STMP3XXX_PINID(3, 13)
+#define PINID_EMI_D14		STMP3XXX_PINID(3, 14)
+#define PINID_EMI_D15		STMP3XXX_PINID(3, 15)
+#define PINID_EMI_DQM0		STMP3XXX_PINID(3, 16)
+#define PINID_EMI_DQM1		STMP3XXX_PINID(3, 17)
+#define PINID_EMI_DQS0		STMP3XXX_PINID(3, 18)
+#define PINID_EMI_DQS1		STMP3XXX_PINID(3, 19)
+#define PINID_EMI_CLK		STMP3XXX_PINID(3, 20)
+#define PINID_EMI_CLKN		STMP3XXX_PINID(3, 21)
+
+#endif /* __ASM_ARCH_PINS_H */
-- 
cgit v0.10.2


From 34acb09025a132943555d0f0ffca6cb05c698cd4 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:54:05 +0100
Subject: [ARM] 5468/1: Freescale STMP platform support [3/10]

Minimal definition of register set for 37xx boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h b/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h
new file mode 100644
index 0000000..3044c20
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h
@@ -0,0 +1,102 @@
+/*
+ * STMP APBH Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef _INCLUDE_ASM_ARCH_REGS_APBH_H
+#define _INCLUDE_ASM_ARCH_REGS_APBH_H
+
+#include <mach/stmp3xxx_regs.h>
+
+#ifndef REGS_APBH_BASE
+#define REGS_APBH_BASE (REGS_BASE + 0x00004000)
+#endif
+
+HW_REGISTER(HW_APBH_CTRL0, REGS_APBH_BASE, 0x00)
+#define BP_APBH_CTRL0_SFTRST      31
+#define BM_APBH_CTRL0_SFTRST      0x80000000
+#define BP_APBH_CTRL0_CLKGATE      30
+#define BM_APBH_CTRL0_CLKGATE      0x40000000
+#define BP_APBH_CTRL0_RESET_CHANNEL      16
+#define BM_APBH_CTRL0_RESET_CHANNEL      0x00FF0000
+#define BF_APBH_CTRL0_RESET_CHANNEL(v)   \
+	(((v) << BP_APBH_CTRL0_RESET_CHANNEL) & BM_APBH_CTRL0_RESET_CHANNEL)
+HW_REGISTER(HW_APBH_CTRL1, REGS_APBH_BASE, 0x10)
+#define BP_APBH_CTRL1_CH1_CMDCMPLT_IRQ_EN      9
+#define BM_APBH_CTRL1_CH1_CMDCMPLT_IRQ_EN      0x00000200
+#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ_EN      8
+#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ_EN      0x00000100
+#define BP_APBH_CTRL1_CH7_CMDCMPLT_IRQ      7
+#define BM_APBH_CTRL1_CH7_CMDCMPLT_IRQ      0x00000080
+#define BP_APBH_CTRL1_CH1_CMDCMPLT_IRQ      1
+#define BM_APBH_CTRL1_CH1_CMDCMPLT_IRQ      0x00000002
+#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ      0
+#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ      0x00000001
+#define BP_APBH_CTRL1_CH1_ERR_IRQ	    17
+#define BM_APBH_CTRL1_CH1_ERR_IRQ	   0x00020000
+HW_REGISTER_0(HW_APBH_DEVSEL, REGS_APBH_BASE, 0x20)
+HW_REGISTER_RO_INDEXED(HW_APBH_CHn_CURCMDAR, REGS_APBH_BASE, 0x40, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_NXTCMDAR, REGS_APBH_BASE, 0x50, 0x70)
+#define BP_APBH_CHn_NXTCMDAR_CMD_ADDR      0
+#define BM_APBH_CHn_NXTCMDAR_CMD_ADDR      0xFFFFFFFF
+#define BF_APBH_CHn_NXTCMDAR_CMD_ADDR(v)   ((u32) v)
+HW_REGISTER_RO_INDEXED(HW_APBH_CHn_CMD, REGS_APBH_BASE, 0x60, 0x70)
+#define BM_APBH_CHn_CMD_XFER_COUNT		0xFFFF0000
+#define BP_APBH_CHn_CMD_XFER_COUNT		16
+#define BF_APBH_CHn_CMD_XFER_COUNT(v)		\
+	(((v) << BP_APBH_CHn_CMD_XFER_COUNT) & BM_APBH_CHn_CMD_XFER_COUNT)
+#define BM_APBH_CHn_CMD_CMDWORDS		0x0000F000
+#define BP_APBH_CHn_CMD_CMDWORDS		12
+#define BF_APBH_CHn_CMD_CMDWORDS(v)		\
+	(((v) << BP_APBH_CHn_CMD_CMDWORDS) & BM_APBH_CHn_CMD_CMDWORDS)
+#define BM_APBH_CHn_CMD_WAIT4ENDCMD		0x00000080
+#define BM_APBH_CHn_CMD_SEMAPHORE		0x00000040
+#define BP_APBH_CHn_CMD_SEMAPHORE		6
+#define BF_APBH_CHn_CMD_SEMAPHORE(v)		\
+	(((v) << BP_APBH_CHn_CMD_SEMAPHORE) & BM_APBH_CHn_CMD_SEMAPHORE)
+#define BM_APBH_CHn_CMD_NANDWAIT4READY	 0x00000020
+#define BP_APBH_CHn_CMD_NANDLOCK		4
+#define BM_APBH_CHn_CMD_NANDLOCK		0x00000010
+#define BF_APBH_CHn_CMD_NANDLOCK(v)		\
+	(((v) << BP_APBH_CHn_CMD_NANDLOCK) & BM_APBH_CHn_CMD_NANDLOCK)
+#define BM_APBH_CHn_CMD_IRQONCMPLT		0x00000008
+#define BM_APBH_CHn_CMD_CHAIN		0x00000004
+#define BM_APBH_CHn_CMD_DMA_READ		0x00000003
+#define BP_APBH_CHn_CMD_DMA_READ		0
+#define BF_APBH_CHn_CMD_DMA_READ(v)		\
+	(((v) << BP_APBH_CHn_CMD_DMA_READ) & BM_APBH_CHn_CMD_DMA_READ)
+#define BF_APBH_CHn_CMD_COMMAND(v)		\
+	(((v) << BP_APBH_CHn_CMD_DMA_READ) & BM_APBH_CHn_CMD_DMA_READ)
+#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER  0x0
+#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE    0x1
+#define BV_APBH_CHn_CMD_COMMAND__DMA_READ     0x2
+#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE    0x3
+HW_REGISTER_INDEXED(HW_APBH_CHn_BAR, REGS_APBH_BASE, 0x70, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_SEMA, REGS_APBH_BASE, 0x80, 0x70)
+#define BP_APBH_CHn_SEMA_INCREMENT_SEMA      0
+#define BM_APBH_CHn_SEMA_INCREMENT_SEMA      0x000000FF
+#define BF_APBH_CHn_SEMA_INCREMENT_SEMA(v)   \
+	(((v) << BP_APBH_CHn_SEMA_INCREMENT_SEMA) & \
+	BM_APBH_CHn_SEMA_INCREMENT_SEMA)
+#define BP_APBH_CHn_SEMA_PHORE      16
+#define BM_APBH_CHn_SEMA_PHORE      0x00FF0000
+HW_REGISTER_RO_INDEXED(HW_APBH_CHn_DEBUG1, REGS_APBH_BASE, 0x90, 0x70)
+HW_REGISTER_RO_INDEXED(HW_APBH_CHn_DEBUG2, REGS_APBH_BASE, 0xA0, 0x70)
+HW_REGISTER_RO(HW_APBH_VERSION, REGS_APBH_BASE, 0x3F0)
+
+#endif /* _INCLUDE_ASM_ARCH_REGS_APBH_H */
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h b/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h
new file mode 100644
index 0000000..a14ddb9
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h
@@ -0,0 +1,109 @@
+/*
+ * STMP APBX Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef _INCLUDE_ASM_ARCH_REGS_APBX_H
+#define _INCLUDE_ASM_ARCH_REGS_APBX_H
+
+#include <mach/stmp3xxx_regs.h>
+
+#ifndef REGS_APBX_BASE
+#define REGS_APBX_BASE (REGS_BASE + 0x00024000)
+#endif
+
+HW_REGISTER(HW_APBX_CTRL0, REGS_APBX_BASE, 0x00)
+#define BP_APBX_CTRL0_SFTRST      31
+#define BM_APBX_CTRL0_SFTRST      0x80000000
+#define BP_APBX_CTRL0_CLKGATE      30
+#define BM_APBX_CTRL0_CLKGATE      0x40000000
+#define BP_APBX_CTRL0_RESET_CHANNEL      16
+#define BM_APBX_CTRL0_RESET_CHANNEL      0x00FF0000
+#define BF_APBX_CTRL0_RESET_CHANNEL(v)   \
+	(((v) << BP_APBX_CTRL0_RESET_CHANNEL) & BM_APBX_CTRL0_RESET_CHANNEL)
+HW_REGISTER(HW_APBX_CTRL1, REGS_APBX_BASE, 0x10)
+HW_REGISTER_0(HW_APBX_DEVSEL, REGS_APBX_BASE, 0x20)
+#define BP_APBX_DEVSEL_CH7      28
+#define BM_APBX_DEVSEL_CH7      0xF0000000
+#define BF_APBX_DEVSEL_CH7(v)   \
+	(((v) << BP_APBX_DEVSEL_CH7) & BM_APBX_DEVSEL_CH7)
+#define BV_APBX_DEVSEL_CH7__USE_UART  0x0
+#define BV_APBX_DEVSEL_CH7__USE_IRDA  0x1
+#define BP_APBX_DEVSEL_CH6	24
+#define BM_APBX_DEVSEL_CH6      0x0F000000
+#define BF_APBX_DEVSEL_CH6(v)   \
+	(((v) << BP_APBX_DEVSEL_CH6) & BM_APBX_DEVSEL_CH6)
+#define BV_APBX_DEVSEL_CH6__USE_UART  0x0
+#define BV_APBX_DEVSEL_CH6__USE_IRDA  0x1
+#define BP_APBX_CTRL1_CH7_AHB_ERROR_IRQ      23
+#define BM_APBX_CTRL1_CH7_AHB_ERROR_IRQ      0x00800000
+#define BP_APBX_CTRL1_CH6_AHB_ERROR_IRQ      22
+#define BM_APBX_CTRL1_CH6_AHB_ERROR_IRQ      0x00400000
+#define BP_APBX_CTRL1_CH7_CMDCMPLT_IRQ_EN      15
+#define BM_APBX_CTRL1_CH7_CMDCMPLT_IRQ_EN      0x00008000
+#define BP_APBX_CTRL1_CH6_CMDCMPLT_IRQ_EN      14
+#define BM_APBX_CTRL1_CH6_CMDCMPLT_IRQ_EN      0x00004000
+
+HW_REGISTER_RO_INDEXED(HW_APBX_CHn_CURCMDAR, REGS_APBX_BASE, 0x40, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_NXTCMDAR, REGS_APBX_BASE, 0x50, 0x70)
+#define BP_APBX_CHn_NXTCMDAR_CMD_ADDR      0
+#define BM_APBX_CHn_NXTCMDAR_CMD_ADDR      0xFFFFFFFF
+#define BF_APBX_CHn_NXTCMDAR_CMD_ADDR(v)   ((u32) v)
+HW_REGISTER_RO_INDEXED(HW_APBX_CHn_CMD, REGS_APBX_BASE, 0x60, 0x70)
+#define BP_APBX_CHn_CMD_XFER_COUNT      16
+#define BM_APBX_CHn_CMD_XFER_COUNT      0xFFFF0000
+#define BF_APBX_CHn_CMD_XFER_COUNT(v)   \
+	(((v) << BP_APBX_CHn_CMD_XFER_COUNT) & BM_APBX_CHn_CMD_XFER_COUNT)
+#define BP_APBX_CHn_CMD_CMDWORDS      12
+#define BM_APBX_CHn_CMD_CMDWORDS      0x0000F000
+#define BF_APBX_CHn_CMD_CMDWORDS(v)   \
+	(((v) << BP_APBX_CHn_CMD_CMDWORDS) & BM_APBX_CHn_CMD_CMDWORDS)
+#define BP_APBX_CHn_CMD_WAIT4ENDCMD      7
+#define BM_APBX_CHn_CMD_WAIT4ENDCMD      0x00000080
+#define BP_APBX_CHn_CMD_SEMAPHORE      6
+#define BM_APBX_CHn_CMD_SEMAPHORE      0x00000040
+#define BP_APBX_CHn_CMD_IRQONCMPLT      3
+#define BM_APBX_CHn_CMD_IRQONCMPLT      0x00000008
+#define BP_APBX_CHn_CMD_CHAIN      2
+#define BM_APBX_CHn_CMD_CHAIN      0x00000004
+#define BM_APBX_CHn_CMD_DMA_READ		0x00000003
+#define BP_APBX_CHn_CMD_DMA_READ		0
+#define BF_APBX_CHn_CMD_DMA_READ(v)		\
+	(((v) << BP_APBX_CHn_CMD_DMA_READ) & BM_APBX_CHn_CMD_DMA_READ)
+#define BP_APBX_CHn_CMD_COMMAND      0
+#define BM_APBX_CHn_CMD_COMMAND      0x00000003
+#define BF_APBX_CHn_CMD_COMMAND(v)   \
+	(((v) << BP_APBX_CHn_CMD_COMMAND) & BM_APBX_CHn_CMD_COMMAND)
+#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER  0x0
+#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE    0x1
+#define BV_APBX_CHn_CMD_COMMAND__DMA_READ     0x2
+
+HW_REGISTER_RO_INDEXED(HW_APBX_CHn_BAR, REGS_APBX_BASE, 0x70, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_SEMA, REGS_APBX_BASE, 0x80, 0x70)
+#define BP_APBX_CHn_SEMA_INCREMENT_SEMA      0
+#define BM_APBX_CHn_SEMA_INCREMENT_SEMA      0x000000FF
+#define BF_APBX_CHn_SEMA_INCREMENT_SEMA(v)   \
+	(((v) << BP_APBX_CHn_SEMA_INCREMENT_SEMA) & \
+	BM_APBX_CHn_SEMA_INCREMENT_SEMA)
+#define BP_APBX_CHn_SEMA_PHORE      16
+#define BM_APBX_CHn_SEMA_PHORE      0x00FF0000
+HW_REGISTER_RO_INDEXED(HW_APBX_CHn_DEBUG1, REGS_APBX_BASE, 0x90, 0x70)
+HW_REGISTER_RO_INDEXED(HW_APBX_CHn_DEBUG2, REGS_APBX_BASE, 0xA0, 0x70)
+HW_REGISTER_RO(HW_APBX_VERSION, REGS_APBX_BASE, 0x3F0)
+
+#endif /* _INCLUDE_ASM_ARCH_REGS_APBX_H */
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h
new file mode 100644
index 0000000..229ee75
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h
@@ -0,0 +1,85 @@
+#ifndef _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H
+#define _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_CLKCTRL_BASE (REGS_BASE + 0x00040000)
+
+#define HW_CLKCTRL_PLLCTRL0_ADDR	(REGS_CLKCTRL_BASE + 0x00)
+HW_REGISTER(HW_CLKCTRL_PLLCTRL0, REGS_CLKCTRL_BASE, 0x00)
+#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS	0x00040000
+#define HW_CLKCTRL_PLLCTRL1_ADDR	(REGS_CLKCTRL_BASE + 0x10)
+HW_REGISTER(HW_CLKCTRL_PLLCTRL1, REGS_CLKCTRL_BASE, 0x10)
+
+#define HW_CLKCTRL_CPU_ADDR		(REGS_CLKCTRL_BASE + 0x20)
+HW_REGISTER(HW_CLKCTRL_CPU, REGS_CLKCTRL_BASE, 0x20)
+#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F
+#define BF_CLKCTRL_CPU_DIV_CPU(v)  \
+	(((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU)
+
+#define HW_CLKCTRL_HBUS_ADDR		(REGS_CLKCTRL_BASE + 0x30)
+HW_REGISTER(HW_CLKCTRL_HBUS, REGS_CLKCTRL_BASE, 0x30)
+#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 	0	/* for compatitibility */
+#define BM_CLKCTRL_HBUS_DIV 0x0000001F
+#define BF_CLKCTRL_HBUS_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_HBUS_DIV)
+#define HW_CLKCTRL_XBUS_ADDR		(REGS_CLKCTRL_BASE + 0x40)
+HW_REGISTER(HW_CLKCTRL_XBUS, REGS_CLKCTRL_BASE, 0x40)
+#define HW_CLKCTRL_XTAL_ADDR		(REGS_CLKCTRL_BASE + 0x50)
+HW_REGISTER(HW_CLKCTRL_XTAL, REGS_CLKCTRL_BASE, 0x50)
+#define HW_CLKCTRL_PIX_ADDR		(REGS_CLKCTRL_BASE + 0x60)
+HW_REGISTER(HW_CLKCTRL_PIX, REGS_CLKCTRL_BASE, 0x60)
+#define BM_CLKCTRL_PIX_CLKGATE		0x80000000
+#define BM_CLKCTRL_PIX_BUSY		0x20000000
+#define BM_CLKCTRL_PIX_DIV		0x00007FFF
+#define BP_CLKCTRL_PIX_DIV		0
+#define BF_CLKCTRL_PIX_DIV(v)	\
+	(((v) << BP_CLKCTRL_PIX_DIV) & BM_CLKCTRL_PIX_DIV)
+#define HW_CLKCTRL_SSP_ADDR		(REGS_CLKCTRL_BASE + 0x70)
+HW_REGISTER(HW_CLKCTRL_SSP, REGS_CLKCTRL_BASE, 0x70)
+#define HW_CLKCTRL_GPMI_ADDR		(REGS_CLKCTRL_BASE + 0x80)
+HW_REGISTER(HW_CLKCTRL_GPMI, REGS_CLKCTRL_BASE, 0x80)
+#define HW_CLKCTRL_SPDIF_ADDR		(REGS_CLKCTRL_BASE + 0x90)
+HW_REGISTER(HW_CLKCTRL_SPDIF, REGS_CLKCTRL_BASE, 0x90)
+#define HW_CLKCTRL_EMI_ADDR		(REGS_CLKCTRL_BASE + 0xA0)
+HW_REGISTER(HW_CLKCTRL_EMI, REGS_CLKCTRL_BASE, 0xA0)
+#define HW_CLKCTRL_IR_ADDR		(REGS_CLKCTRL_BASE + 0xB0)
+HW_REGISTER(HW_CLKCTRL_IR, REGS_CLKCTRL_BASE, 0xB0)
+#define HW_CLKCTRL_SAIF_ADDR		(REGS_CLKCTRL_BASE + 0xC0)
+HW_REGISTER(HW_CLKCTRL_SAIF, REGS_CLKCTRL_BASE, 0xC0)
+#define HW_CLKCTRL_FRAC_ADDR		(REGS_CLKCTRL_BASE + 0xD0)
+HW_REGISTER(HW_CLKCTRL_FRAC, REGS_CLKCTRL_BASE, 0xD0)
+#define BM_CLKCTRL_FRAC_CLKGATEIO	0x80000000
+#define BM_CLKCTRL_FRAC_IO_STABLE	0x40000000
+#define BM_CLKCTRL_FRAC_IOFRAC		0x3F000000
+#define BP_CLKCTRL_FRAC_IOFRAC		24
+#define BF_CLKCTRL_FRAC_IOFRAC(v)	\
+	(((v) << BP_CLKCTRL_FRAC_IOFRAC) & BM_CLKCTRL_FRAC_IOFRAC)
+#define BM_CLKCTRL_FRAC_CLKGATEPIX	0x00800000
+#define BM_CLKCTRL_FRAC_PIXFRAC		0x003F0000
+#define BP_CLKCTRL_FRAC_PIXFRAC		16
+#define BF_CLKCTRL_FRAC_PIXFRAC(v)	\
+	(((v) << BP_CLKCTRL_FRAC_PIXFRAC) & BM_CLKCTRL_FRAC_PIXFRAC)
+#define BM_CLKCTRL_FRAC_CLKGATEEMI	0x00008000
+#define BM_CLKCTRL_FRAC_EMIFRAC		0x00003F00
+#define BP_CLKCTRL_FRAC_EMIFRAC		8
+#define BF_CLKCTRL_FRAC_EMIFRAC(v)	\
+	(((v) << BP_CLKCTRL_FRAC_EMIFRAC) & BM_CLKCTRL_FRAC_EMIFRAC)
+#define BM_CLKCTRL_FRAC_CLKGATECPU	0x00000080
+#define BM_CLKCTRL_FRAC_CPUFRAC		0x0000003F
+#define BP_CLKCTRL_FRAC_CPUFRAC		0
+#define BF_CLKCTRL_FRAC_CPUFRAC(v)	\
+	(((v) << BP_CLKCTRL_FRAC_CPUFRAC) & BM_CLKCTRL_FRAC_CPUFRAC)
+#define HW_CLKCTRL_CLKSEQ_ADDR		(REGS_CLKCTRL_BASE + 0xE0)
+HW_REGISTER(HW_CLKCTRL_CLKSEQ, REGS_CLKCTRL_BASE, 0xE0)
+#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU		0x00000080
+#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI		0x00000040
+#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP		0x00000020
+#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI		0x00000010
+#define BM_CLKCTRL_CLKSEQ_BYPASS_IR		0x00000008
+#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX		0x00000002
+#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF		0x00000001
+HW_REGISTER_WO(HW_CLKCTRL_RESET, REGS_CLKCTRL_BASE, 0xF0)
+#define BM_CLKCTRL_RESET_CHIP      0x00000002
+#define BM_CLKCTRL_RESET_DIG	   0x00000001
+#endif /* _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H */
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h b/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h
new file mode 100644
index 0000000..8a92f92
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h
@@ -0,0 +1,36 @@
+/*
+ * Freescale STMP378X: clock registers definitions
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#ifndef _INCLUDE_ASM_ARCH_REGS_ICOLL_H
+#define _INCLUDE_ASM_ARCH_REGS_ICOLL_H
+
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_ICOLL_BASE (REGS_BASE + 0x00000000)
+
+HW_REGISTER(HW_ICOLL_VECTOR, REGS_ICOLL_BASE, 0x00)
+HW_REGISTER_WO(HW_ICOLL_LEVELACK, REGS_ICOLL_BASE, 0x10)
+HW_REGISTER(HW_ICOLL_CTRL, REGS_ICOLL_BASE, 0x20)
+#define BM_ICOLL_CTRL_CLKGATE	  0x40000000
+#define BM_ICOLL_CTRL_SFTRST	   0x80000000
+HW_REGISTER_RO(HW_ICOLL_STAT, REGS_ICOLL_BASE, 0x30)
+
+HW_REGISTER_INDEXED(HW_ICOLL_PRIORITYn, REGS_ICOLL_BASE, 0x60, 0x10)
+
+#endif /* _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H */
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h
new file mode 100644
index 0000000..b114ecd
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h
@@ -0,0 +1,159 @@
+/*
+ * STMP pinmux register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef _INCLUDE_ASM_ARCH_REGS_PINCTRL_H
+#define _INCLUDE_ASM_ARCH_REGS_PINCTRL_H
+
+#include <mach/stmp3xxx_regs.h>
+
+#ifndef REGS_PINCTRL_BASE
+#define REGS_PINCTRL_BASE (REGS_BASE + 0x00018000)
+#endif /* REGS_PINCTRL_BASE */
+
+HW_REGISTER(HW_PINCTRL_CTRL, REGS_PINCTRL_BASE, 0)
+
+#define HW_PINCTRL_MUXSEL0_ADDR (REGS_PINCTRL_BASE + 0x100)
+HW_REGISTER(HW_PINCTRL_MUXSEL0, REGS_PINCTRL_BASE, 0x100)
+#define HW_PINCTRL_MUXSEL1_ADDR (REGS_PINCTRL_BASE + 0x110)
+HW_REGISTER(HW_PINCTRL_MUXSEL1, REGS_PINCTRL_BASE, 0x110)
+#define HW_PINCTRL_MUXSEL2_ADDR (REGS_PINCTRL_BASE + 0x120)
+HW_REGISTER(HW_PINCTRL_MUXSEL2, REGS_PINCTRL_BASE, 0x120)
+#define HW_PINCTRL_MUXSEL3_ADDR (REGS_PINCTRL_BASE + 0x130)
+HW_REGISTER(HW_PINCTRL_MUXSEL3, REGS_PINCTRL_BASE, 0x130)
+#define BM_PINCTRL_MUXSEL3_BANK1_PIN28      0x03000000
+#define HW_PINCTRL_MUXSEL4_ADDR (REGS_PINCTRL_BASE + 0x140)
+HW_REGISTER(HW_PINCTRL_MUXSEL4, REGS_PINCTRL_BASE, 0x140)
+#define BM_PINCTRL_MUXSEL4_BANK2_PIN03      0x000000C0
+#define BM_PINCTRL_MUXSEL4_BANK2_PIN04      0x00000300
+#define HW_PINCTRL_MUXSEL5_ADDR (REGS_PINCTRL_BASE + 0x150)
+HW_REGISTER(HW_PINCTRL_MUXSEL5, REGS_PINCTRL_BASE, 0x150)
+#define HW_PINCTRL_MUXSEL6_ADDR (REGS_PINCTRL_BASE + 0x160)
+HW_REGISTER(HW_PINCTRL_MUXSEL6, REGS_PINCTRL_BASE, 0x160)
+#define HW_PINCTRL_MUXSEL7_ADDR (REGS_PINCTRL_BASE + 0x170)
+HW_REGISTER(HW_PINCTRL_MUXSEL7, REGS_PINCTRL_BASE, 0x170)
+
+HW_REGISTER(HW_PINCTRL_DRIVE0, REGS_PINCTRL_BASE, 0x200)
+#define HW_PINCTRL_DRIVE0_ADDR (REGS_PINCTRL_BASE + 0x200)
+HW_REGISTER(HW_PINCTRL_DRIVE1, REGS_PINCTRL_BASE, 0x210)
+#define HW_PINCTRL_DRIVE1_ADDR (REGS_PINCTRL_BASE + 0x210)
+HW_REGISTER(HW_PINCTRL_DRIVE2, REGS_PINCTRL_BASE, 0x220)
+#define HW_PINCTRL_DRIVE2_ADDR (REGS_PINCTRL_BASE + 0x220)
+HW_REGISTER(HW_PINCTRL_DRIVE3, REGS_PINCTRL_BASE, 0x230)
+#define HW_PINCTRL_DRIVE3_ADDR (REGS_PINCTRL_BASE + 0x230)
+HW_REGISTER(HW_PINCTRL_DRIVE4, REGS_PINCTRL_BASE, 0x240)
+#define HW_PINCTRL_DRIVE4_ADDR (REGS_PINCTRL_BASE + 0x240)
+HW_REGISTER(HW_PINCTRL_DRIVE5, REGS_PINCTRL_BASE, 0x250)
+#define HW_PINCTRL_DRIVE5_ADDR (REGS_PINCTRL_BASE + 0x250)
+HW_REGISTER(HW_PINCTRL_DRIVE6, REGS_PINCTRL_BASE, 0x260)
+#define HW_PINCTRL_DRIVE6_ADDR (REGS_PINCTRL_BASE + 0x260)
+HW_REGISTER(HW_PINCTRL_DRIVE7, REGS_PINCTRL_BASE, 0x270)
+#define HW_PINCTRL_DRIVE7_ADDR (REGS_PINCTRL_BASE + 0x270)
+HW_REGISTER(HW_PINCTRL_DRIVE8, REGS_PINCTRL_BASE, 0x280)
+#define HW_PINCTRL_DRIVE8_ADDR (REGS_PINCTRL_BASE + 0x280)
+HW_REGISTER(HW_PINCTRL_DRIVE9, REGS_PINCTRL_BASE, 0x290)
+#define HW_PINCTRL_DRIVE9_ADDR (REGS_PINCTRL_BASE + 0x290)
+HW_REGISTER(HW_PINCTRL_DRIVE10, REGS_PINCTRL_BASE, 0x2a0)
+#define HW_PINCTRL_DRIVE10_ADDR (REGS_PINCTRL_BASE + 0x2a0)
+HW_REGISTER(HW_PINCTRL_DRIVE11, REGS_PINCTRL_BASE, 0x2b0)
+#define HW_PINCTRL_DRIVE11_ADDR (REGS_PINCTRL_BASE + 0x2b0)
+HW_REGISTER(HW_PINCTRL_DRIVE12, REGS_PINCTRL_BASE, 0x2c0)
+#define HW_PINCTRL_DRIVE12_ADDR (REGS_PINCTRL_BASE + 0x2c0)
+HW_REGISTER(HW_PINCTRL_DRIVE13, REGS_PINCTRL_BASE, 0x2d0)
+#define HW_PINCTRL_DRIVE13_ADDR (REGS_PINCTRL_BASE + 0x2d0)
+HW_REGISTER(HW_PINCTRL_DRIVE14, REGS_PINCTRL_BASE, 0x2e0)
+#define HW_PINCTRL_DRIVE14_ADDR (REGS_PINCTRL_BASE + 0x2e0)
+
+
+HW_REGISTER(HW_PINCTRL_PULL0, REGS_PINCTRL_BASE, 0x300)
+#define HW_PINCTRL_PULL0_ADDR (REGS_PINCTRL_BASE + 0x300)
+#define BM_PINCTRL_PULL0_BANK0_PIN01      0x00000002
+#define BM_PINCTRL_PULL0_BANK0_PIN02      0x00000004
+#define BM_PINCTRL_PULL0_BANK0_PIN03      0x00000008
+#define BM_PINCTRL_PULL0_BANK0_PIN04      0x00000010
+#define BM_PINCTRL_PULL0_BANK0_PIN20      0x00100000
+HW_REGISTER(HW_PINCTRL_PULL1, REGS_PINCTRL_BASE, 0x310)
+#define HW_PINCTRL_PULL1_ADDR (REGS_PINCTRL_BASE + 0x310)
+#define BM_PINCTRL_PULL1_BANK1_PIN22      0x00400000
+#define BM_PINCTRL_PULL1_BANK1_PIN24      0x01000000
+#define BM_PINCTRL_PULL1_BANK1_PIN25      0x02000000
+#define BM_PINCTRL_PULL1_BANK1_PIN26      0x04000000
+#define BM_PINCTRL_PULL1_BANK1_PIN27      0x08000000
+HW_REGISTER(HW_PINCTRL_PULL2, REGS_PINCTRL_BASE, 0x320)
+#define HW_PINCTRL_PULL2_ADDR (REGS_PINCTRL_BASE + 0x320)
+HW_REGISTER(HW_PINCTRL_PULL3, REGS_PINCTRL_BASE, 0x330)
+#define HW_PINCTRL_PULL3_ADDR (REGS_PINCTRL_BASE + 0x330)
+
+#define HW_PINCTRL_DOUT0_ADDR (REGS_PINCTRL_BASE + 0x400)
+HW_REGISTER(HW_PINCTRL_DOUT0, REGS_PINCTRL_BASE, 0x400)
+#define HW_PINCTRL_DOUT1_ADDR (REGS_PINCTRL_BASE + 0x410)
+HW_REGISTER(HW_PINCTRL_DOUT1, REGS_PINCTRL_BASE, 0x410)
+#define HW_PINCTRL_DOUT2_ADDR (REGS_PINCTRL_BASE + 0x420)
+HW_REGISTER(HW_PINCTRL_DOUT2, REGS_PINCTRL_BASE, 0x420)
+
+#define HW_PINCTRL_DIN0_ADDR (REGS_PINCTRL_BASE + 0x500)
+HW_REGISTER_RO(HW_PINCTRL_DIN0, REGS_PINCTRL_BASE, 0x500)
+#define HW_PINCTRL_DIN1_ADDR (REGS_PINCTRL_BASE + 0x510)
+HW_REGISTER_RO(HW_PINCTRL_DIN1, REGS_PINCTRL_BASE, 0x510)
+#define HW_PINCTRL_DIN2_ADDR (REGS_PINCTRL_BASE + 0x520)
+HW_REGISTER_RO(HW_PINCTRL_DIN2, REGS_PINCTRL_BASE, 0x520)
+
+#define HW_PINCTRL_DOE0_ADDR (REGS_PINCTRL_BASE + 0x600)
+HW_REGISTER(HW_PINCTRL_DOE0, REGS_PINCTRL_BASE, 0x600)
+#define HW_PINCTRL_DOE1_ADDR (REGS_PINCTRL_BASE + 0x610)
+HW_REGISTER(HW_PINCTRL_DOE1, REGS_PINCTRL_BASE, 0x610)
+#define HW_PINCTRL_DOE2_ADDR (REGS_PINCTRL_BASE + 0x620)
+HW_REGISTER(HW_PINCTRL_DOE2, REGS_PINCTRL_BASE, 0x620)
+
+HW_REGISTER(HW_PINCTRL_PIN2IRQ0, REGS_PINCTRL_BASE, 0x700)
+#define HW_PINCTRL_PIN2IRQ0_ADDR (REGS_PINCTRL_BASE + 0x700)
+HW_REGISTER(HW_PINCTRL_PIN2IRQ1, REGS_PINCTRL_BASE, 0x710)
+#define HW_PINCTRL_PIN2IRQ1_ADDR (REGS_PINCTRL_BASE + 0x710)
+HW_REGISTER(HW_PINCTRL_PIN2IRQ2, REGS_PINCTRL_BASE, 0x720)
+#define HW_PINCTRL_PIN2IRQ2_ADDR (REGS_PINCTRL_BASE + 0x720)
+
+HW_REGISTER(HW_PINCTRL_IRQEN0, REGS_PINCTRL_BASE, 0x800)
+#define HW_PINCTRL_IRQEN0_ADDR (REGS_PINCTRL_BASE + 0x800)
+HW_REGISTER(HW_PINCTRL_IRQEN1, REGS_PINCTRL_BASE, 0x810)
+#define HW_PINCTRL_IRQEN1_ADDR (REGS_PINCTRL_BASE + 0x810)
+HW_REGISTER(HW_PINCTRL_IRQEN2, REGS_PINCTRL_BASE, 0x820)
+#define HW_PINCTRL_IRQEN2_ADDR (REGS_PINCTRL_BASE + 0x820)
+
+HW_REGISTER(HW_PINCTRL_IRQLEVEL0, REGS_PINCTRL_BASE, 0x900)
+#define HW_PINCTRL_IRQLEVEL0_ADDR (REGS_PINCTRL_BASE + 0x900)
+HW_REGISTER(HW_PINCTRL_IRQLEVEL1, REGS_PINCTRL_BASE, 0x910)
+#define HW_PINCTRL_IRQLEVEL1_ADDR (REGS_PINCTRL_BASE + 0x910)
+HW_REGISTER(HW_PINCTRL_IRQLEVEL2, REGS_PINCTRL_BASE, 0x920)
+#define HW_PINCTRL_IRQLEVEL2_ADDR (REGS_PINCTRL_BASE + 0x920)
+
+HW_REGISTER(HW_PINCTRL_IRQPOL0, REGS_PINCTRL_BASE, 0xA00)
+#define HW_PINCTRL_IRQPOL0_ADDR (REGS_PINCTRL_BASE + 0xa00)
+HW_REGISTER(HW_PINCTRL_IRQPOL1, REGS_PINCTRL_BASE, 0xA10)
+#define HW_PINCTRL_IRQPOL1_ADDR (REGS_PINCTRL_BASE + 0xa10)
+HW_REGISTER(HW_PINCTRL_IRQPOL2, REGS_PINCTRL_BASE, 0xA20)
+#define HW_PINCTRL_IRQPOL2_ADDR (REGS_PINCTRL_BASE + 0xa20)
+
+HW_REGISTER(HW_PINCTRL_IRQSTAT0, REGS_PINCTRL_BASE, 0xB00)
+#define HW_PINCTRL_IRQSTAT0_ADDR (REGS_PINCTRL_BASE + 0xb00)
+HW_REGISTER(HW_PINCTRL_IRQSTAT1, REGS_PINCTRL_BASE, 0xB10)
+#define HW_PINCTRL_IRQSTAT1_ADDR (REGS_PINCTRL_BASE + 0xb10)
+HW_REGISTER(HW_PINCTRL_IRQSTAT2, REGS_PINCTRL_BASE, 0xB20)
+#define HW_PINCTRL_IRQSTAT2_ADDR (REGS_PINCTRL_BASE + 0xb20)
+
+#endif /* _INCLUDE_ASM_ARCH_REGS_PINCTRL_H */
+
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-power.h b/arch/arm/mach-stmp37xx/include/mach/regs-power.h
new file mode 100644
index 0000000..d15cd66
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-power.h
@@ -0,0 +1,31 @@
+/*
+ * STMP POWER Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___POWER_H
+#define __ARCH_ARM___POWER_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_POWER_BASE (void __iomem *)(REGS_BASE + 0x44000)
+#define REGS_POWER_BASE_PHYS (0x80044000)
+#define REGS_POWER_SIZE 0x00002000
+HW_REGISTER(HW_POWER_MINPWR, REGS_POWER_BASE, 0x00000020)
+HW_REGISTER(HW_POWER_CHARGE, REGS_POWER_BASE, 0x00000030)
+#endif /* __ARCH_ARM___POWER_H */
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h b/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h
new file mode 100644
index 0000000..7f00030
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h
@@ -0,0 +1,52 @@
+/*
+ * include/asm-arm/arch-stmp3xxx/regstimer.h
+ *
+ * Copyright (c) 2008 SigmaTel Inc
+ * Copyright (c) 2008 Embedded Alley Solutions, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __ARCH_ARM_REGS_TIMROT_H
+#define __ARCH_ARM_REGS_TIMROT_H
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_TIMROT_BASE (REGS_BASE + 0x00068000)
+
+HW_REGISTER(HW_TIMROT_ROTCTRL, REGS_TIMROT_BASE, 0)
+#define BM_TIMROT_ROTCTRL_SFTRST      0x80000000
+#define BM_TIMROT_ROTCTRL_CLKGATE      0x40000000
+
+HW_REGISTER_INDEXED(HW_TIMROT_TIMCTRLn, REGS_TIMROT_BASE, 0x20, 0x20)
+#define BM_TIMROT_TIMCTRLn_SELECT      0x0000000F
+#define BF_TIMROT_TIMCTRLn_SELECT(v)   (((v) << 0) & BM_TIMROT_TIMCTRLn_SELECT)
+#define BM_TIMROT_TIMCTRLn_PRESCALE      0x00000030
+#define BF_TIMROT_TIMCTRLn_PRESCALE(v)   \
+	(((v) << 4) & BM_TIMROT_TIMCTRLn_PRESCALE)
+#define BM_TIMROT_TIMCTRLn_RELOAD      0x00000040
+#define BF_TIMROT_TIMCTRLn_RELOAD(v)   (((v) << 6) & BM_TIMROT_TIMCTRLn_RELOAD)
+#define BM_TIMROT_TIMCTRLn_UPDATE      0x00000080
+#define BF_TIMROT_TIMCTRLn_UPDATE(v)   (((v) << 7) & BM_TIMROT_TIMCTRLn_UPDATE)
+#define BM_TIMROT_TIMCTRLn_POLARITY      0x00000100
+#define BF_TIMROT_TIMCTRLn_POLARITY(v)   \
+	(((v) << 8) & BM_TIMROT_TIMCTRLn_POLARITY)
+#define BM_TIMROT_TIMCTRLn_IRQ_EN      0x00004000
+#define BF_TIMROT_TIMCTRLn_IRQ_EN(v)   \
+	(((v) << 14) & BM_TIMROT_TIMCTRLn_IRQ_EN)
+#define BM_TIMROT_TIMCTRLn_IRQ      0x00008000
+#define BF_TIMROT_TIMCTRLn_IRQ(v)   (((v) << 15) & BM_TIMROT_TIMCTRLn_IRQ)
+HW_REGISTER_0_INDEXED(HW_TIMROT_TIMCOUNTn, REGS_TIMROT_BASE, 0x30, 0x20)
+
+#endif /* __ARCH_ARM_REGSTIMER_H */
-- 
cgit v0.10.2


From 07d9714365bcab286389d679f73512e35796847c Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:54:42 +0100
Subject: [ARM] 5467/1: Freescale STMP platform support [4/10]

Minimal definition of register set for 378x boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/include/mach/regs-apbh.h b/arch/arm/mach-stmp378x/include/mach/regs-apbh.h
new file mode 100644
index 0000000..db63b04
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-apbh.h
@@ -0,0 +1,88 @@
+/*
+ * STMP APBH Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___APBH_H
+#define __ARCH_ARM___APBH_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_APBH_BASE (REGS_BASE + 0x4000)
+#define REGS_APBH_BASE_PHYS (0x80004000)
+#define REGS_APBH_SIZE 0x00002000
+HW_REGISTER(HW_APBH_CTRL0, REGS_APBH_BASE, 0x00000000)
+#define HW_APBH_CTRL0_ADDR (REGS_APBH_BASE + 0x00000000)
+#define BM_APBH_CTRL0_SFTRST 0x80000000
+#define BM_APBH_CTRL0_CLKGATE 0x40000000
+#define BM_APBH_CTRL0_AHB_BURST8_EN 0x20000000
+#define BM_APBH_CTRL0_APB_BURST4_EN 0x10000000
+#define BP_APBH_CTRL0_RESET_CHANNEL      16
+#define BM_APBH_CTRL0_RESET_CHANNEL 0x00FF0000
+#define BF_APBH_CTRL0_RESET_CHANNEL(v)  \
+	(((v) << 16) & BM_APBH_CTRL0_RESET_CHANNEL)
+HW_REGISTER(HW_APBH_CTRL1, REGS_APBH_BASE, 0x00000010)
+#define HW_APBH_CTRL1_ADDR (REGS_APBH_BASE + 0x00000010)
+HW_REGISTER(HW_APBH_CTRL2, REGS_APBH_BASE, 0x00000020)
+HW_REGISTER_0(HW_APBH_DEVSEL, REGS_APBH_BASE, 0x00000030)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_CURCMDAR, REGS_APBH_BASE, 0x00000040, 0x70)
+#define BP_APBH_CHn_CURCMDAR_CMD_ADDR      0
+#define BM_APBH_CHn_CURCMDAR_CMD_ADDR 0xFFFFFFFF
+#define BF_APBH_CHn_CURCMDAR_CMD_ADDR(v)   (v)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_NXTCMDAR, REGS_APBH_BASE, 0x00000050, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_CMD, REGS_APBH_BASE, 0x00000060, 0x70)
+#define BP_APBH_CHn_CMD_XFER_COUNT      16
+#define BM_APBH_CHn_CMD_XFER_COUNT 0xFFFF0000
+#define BF_APBH_CHn_CMD_XFER_COUNT(v) \
+	(((v) << 16) & BM_APBH_CHn_CMD_XFER_COUNT)
+#define BP_APBH_CHn_CMD_CMDWORDS      12
+#define BM_APBH_CHn_CMD_CMDWORDS 0x0000F000
+#define BF_APBH_CHn_CMD_CMDWORDS(v)  \
+	(((v) << 12) & BM_APBH_CHn_CMD_CMDWORDS)
+#define BM_APBH_CHn_CMD_HALTONTERMINATE 0x00000100
+#define BM_APBH_CHn_CMD_WAIT4ENDCMD 0x00000080
+#define BM_APBH_CHn_CMD_SEMAPHORE 0x00000040
+#define BM_APBH_CHn_CMD_NANDWAIT4READY 0x00000020
+#define BM_APBH_CHn_CMD_NANDLOCK 0x00000010
+#define BM_APBH_CHn_CMD_IRQONCMPLT 0x00000008
+#define BM_APBH_CHn_CMD_CHAIN 0x00000004
+#define BP_APBH_CHn_CMD_COMMAND      0
+#define BM_APBH_CHn_CMD_COMMAND 0x00000003
+#define BF_APBH_CHn_CMD_COMMAND(v)  \
+	(((v) << 0) & BM_APBH_CHn_CMD_COMMAND)
+#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER 0x0
+#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE   0x1
+#define BV_APBH_CHn_CMD_COMMAND__DMA_READ    0x2
+#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE   0x3
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_BAR, REGS_APBH_BASE, 0x00000070, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_SEMA, REGS_APBH_BASE, 0x00000080, 0x70)
+#define BP_APBH_CHn_SEMA_PHORE      16
+#define BM_APBH_CHn_SEMA_PHORE 0x00FF0000
+#define BF_APBH_CHn_SEMA_PHORE(v)  \
+	(((v) << 16) & BM_APBH_CHn_SEMA_PHORE)
+#define BP_APBH_CHn_SEMA_INCREMENT_SEMA      0
+#define BM_APBH_CHn_SEMA_INCREMENT_SEMA 0x000000FF
+#define BF_APBH_CHn_SEMA_INCREMENT_SEMA(v)  \
+	(((v) << 0) & BM_APBH_CHn_SEMA_INCREMENT_SEMA)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_DEBUG1, REGS_APBH_BASE, 0x00000090, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBH_CHn_DEBUG2, REGS_APBH_BASE, 0x000000a0, 0x70)
+HW_REGISTER_0(HW_APBH_VERSION, REGS_APBH_BASE, 0x000003f0)
+#endif /* __ARCH_ARM___APBH_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-apbx.h b/arch/arm/mach-stmp378x/include/mach/regs-apbx.h
new file mode 100644
index 0000000..d0e8e9f
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-apbx.h
@@ -0,0 +1,79 @@
+/*
+ * STMP APBX Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef __ARCH_ARM___APBX_H
+#define __ARCH_ARM___APBX_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_APBX_BASE (REGS_BASE + 0x24000)
+#define REGS_APBX_BASE_PHYS (0x80024000)
+#define REGS_APBX_SIZE 0x00002000
+HW_REGISTER(HW_APBX_CTRL0, REGS_APBX_BASE, 0x00000000)
+#define HW_APBX_CTRL0_ADDR (REGS_APBX_BASE + 0x00000000)
+#define BM_APBX_CTRL0_SFTRST 0x80000000
+#define BM_APBX_CTRL0_CLKGATE 0x40000000
+HW_REGISTER(HW_APBX_CTRL1, REGS_APBX_BASE, 0x00000010)
+HW_REGISTER(HW_APBX_CTRL2, REGS_APBX_BASE, 0x00000020)
+HW_REGISTER(HW_APBX_CHANNEL_CTRL, REGS_APBX_BASE, 0x00000030)
+#define BP_APBX_CHANNEL_CTRL_RESET_CHANNEL      16
+#define BM_APBX_CHANNEL_CTRL_RESET_CHANNEL      0xFFFF0000
+#define BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(v)   \
+	(((v) << BP_APBX_CHANNEL_CTRL_RESET_CHANNEL) & \
+	 BM_APBX_CHANNEL_CTRL_RESET_CHANNEL)
+HW_REGISTER_0(HW_APBX_DEVSEL, REGS_APBX_BASE, 0x00000040)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_CURCMDAR, REGS_APBX_BASE, 0x00000100, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_NXTCMDAR, REGS_APBX_BASE, 0x00000110, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_CMD, REGS_APBX_BASE, 0x00000120, 0x70)
+#define BP_APBX_CHn_CMD_XFER_COUNT      16
+#define BM_APBX_CHn_CMD_XFER_COUNT 0xFFFF0000
+#define BF_APBX_CHn_CMD_XFER_COUNT(v) \
+	(((v) << 16) & BM_APBX_CHn_CMD_XFER_COUNT)
+#define BP_APBX_CHn_CMD_CMDWORDS      12
+#define BM_APBX_CHn_CMD_CMDWORDS 0x0000F000
+#define BF_APBX_CHn_CMD_CMDWORDS(v)  \
+	(((v) << 12) & BM_APBX_CHn_CMD_CMDWORDS)
+#define BM_APBX_CHn_CMD_HALTONTERMINATE 0x00000100
+#define BM_APBX_CHn_CMD_WAIT4ENDCMD 0x00000080
+#define BM_APBX_CHn_CMD_SEMAPHORE 0x00000040
+#define BM_APBX_CHn_CMD_IRQONCMPLT 0x00000008
+#define BM_APBX_CHn_CMD_CHAIN 0x00000004
+#define BP_APBX_CHn_CMD_COMMAND      0
+#define BM_APBX_CHn_CMD_COMMAND 0x00000003
+#define BF_APBX_CHn_CMD_COMMAND(v)  \
+	(((v) << 0) & BM_APBX_CHn_CMD_COMMAND)
+#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER 0x0
+#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE   0x1
+#define BV_APBX_CHn_CMD_COMMAND__DMA_READ    0x2
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_BAR, REGS_APBX_BASE, 0x00000130, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_SEMA, REGS_APBX_BASE, 0x00000140, 0x70)
+#define BP_APBX_CHn_SEMA_PHORE      16
+#define BM_APBX_CHn_SEMA_PHORE 0x00FF0000
+#define BF_APBX_CHn_SEMA_PHORE(v)  \
+	(((v) << 16) & BM_APBX_CHn_SEMA_PHORE)
+#define BP_APBX_CHn_SEMA_INCREMENT_SEMA      0
+#define BM_APBX_CHn_SEMA_INCREMENT_SEMA 0x000000FF
+#define BF_APBX_CHn_SEMA_INCREMENT_SEMA(v)  \
+	(((v) << 0) & BM_APBX_CHn_SEMA_INCREMENT_SEMA)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_DEBUG1, REGS_APBX_BASE, 0x00000150, 0x70)
+HW_REGISTER_0_INDEXED(HW_APBX_CHn_DEBUG2, REGS_APBX_BASE, 0x00000160, 0x70)
+HW_REGISTER_0(HW_APBX_VERSION, REGS_APBX_BASE, 0x00000800)
+#endif /* __ARCH_ARM___APBX_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h
new file mode 100644
index 0000000..a421d9e
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h
@@ -0,0 +1,276 @@
+/*
+ * STMP CLKCTRL Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___CLKCTRL_H
+#define __ARCH_ARM___CLKCTRL_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_CLKCTRL_BASE (REGS_BASE + 0x40000)
+#define REGS_CLKCTRL_BASE_PHYS (0x80040000)
+#define REGS_CLKCTRL_SIZE 0x00002000
+HW_REGISTER(HW_CLKCTRL_PLLCTRL0, REGS_CLKCTRL_BASE, 0x00000000)
+#define HW_CLKCTRL_PLLCTRL0_ADDR (REGS_CLKCTRL_BASE + 0x00000000)
+#define BP_CLKCTRL_PLLCTRL0_LFR_SEL      28
+#define BM_CLKCTRL_PLLCTRL0_LFR_SEL 0x30000000
+#define BF_CLKCTRL_PLLCTRL0_LFR_SEL(v)  \
+	(((v) << 28) & BM_CLKCTRL_PLLCTRL0_LFR_SEL)
+#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__DEFAULT   0x0
+#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_2   0x1
+#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_05  0x2
+#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__UNDEFINED 0x3
+#define BP_CLKCTRL_PLLCTRL0_CP_SEL      24
+#define BM_CLKCTRL_PLLCTRL0_CP_SEL 0x03000000
+#define BF_CLKCTRL_PLLCTRL0_CP_SEL(v)  \
+	(((v) << 24) & BM_CLKCTRL_PLLCTRL0_CP_SEL)
+#define BV_CLKCTRL_PLLCTRL0_CP_SEL__DEFAULT   0x0
+#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_2   0x1
+#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_05  0x2
+#define BV_CLKCTRL_PLLCTRL0_CP_SEL__UNDEFINED 0x3
+#define BP_CLKCTRL_PLLCTRL0_DIV_SEL      20
+#define BM_CLKCTRL_PLLCTRL0_DIV_SEL 0x00300000
+#define BF_CLKCTRL_PLLCTRL0_DIV_SEL(v)  \
+	(((v) << 20) & BM_CLKCTRL_PLLCTRL0_DIV_SEL)
+#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__DEFAULT   0x0
+#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWER     0x1
+#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWEST    0x2
+#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__UNDEFINED 0x3
+#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS 0x00040000
+#define BM_CLKCTRL_PLLCTRL0_POWER 0x00010000
+HW_REGISTER_0(HW_CLKCTRL_PLLCTRL1, REGS_CLKCTRL_BASE, 0x00000010)
+#define HW_CLKCTRL_PLLCTRL1_ADDR (REGS_CLKCTRL_BASE + 0x00000010)
+#define BM_CLKCTRL_PLLCTRL1_LOCK 0x80000000
+#define BM_CLKCTRL_PLLCTRL1_FORCE_LOCK 0x40000000
+#define BP_CLKCTRL_PLLCTRL1_LOCK_COUNT      0
+#define BM_CLKCTRL_PLLCTRL1_LOCK_COUNT 0x0000FFFF
+#define BF_CLKCTRL_PLLCTRL1_LOCK_COUNT(v)  \
+	(((v) << 0) & BM_CLKCTRL_PLLCTRL1_LOCK_COUNT)
+HW_REGISTER(HW_CLKCTRL_CPU, REGS_CLKCTRL_BASE, 0x00000020)
+#define HW_CLKCTRL_CPU_ADDR (REGS_CLKCTRL_BASE + 0x00000020)
+#define BM_CLKCTRL_CPU_BUSY_REF_XTAL 0x20000000
+#define BM_CLKCTRL_CPU_BUSY_REF_CPU 0x10000000
+#define BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN 0x04000000
+#define BP_CLKCTRL_CPU_DIV_XTAL      16
+#define BM_CLKCTRL_CPU_DIV_XTAL 0x03FF0000
+#define BF_CLKCTRL_CPU_DIV_XTAL(v)  \
+	(((v) << 16) & BM_CLKCTRL_CPU_DIV_XTAL)
+#define BM_CLKCTRL_CPU_INTERRUPT_WAIT 0x00001000
+#define BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN 0x00000400
+#define BP_CLKCTRL_CPU_DIV_CPU      0
+#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F
+#define BF_CLKCTRL_CPU_DIV_CPU(v)  \
+	(((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU)
+HW_REGISTER(HW_CLKCTRL_HBUS, REGS_CLKCTRL_BASE, 0x00000030)
+#define HW_CLKCTRL_HBUS_ADDR (REGS_CLKCTRL_BASE + 0x00000030)
+#define BM_CLKCTRL_HBUS_BUSY 0x20000000
+#define BM_CLKCTRL_HBUS_DCP_AS_ENABLE 0x10000000
+#define BM_CLKCTRL_HBUS_PXP_AS_ENABLE 0x08000000
+#define BM_CLKCTRL_HBUS_APBHDMA_AS_ENABLE 0x04000000
+#define BM_CLKCTRL_HBUS_APBXDMA_AS_ENABLE 0x02000000
+#define BM_CLKCTRL_HBUS_TRAFFIC_JAM_AS_ENABLE 0x01000000
+#define BM_CLKCTRL_HBUS_TRAFFIC_AS_ENABLE 0x00800000
+#define BM_CLKCTRL_HBUS_CPU_DATA_AS_ENABLE 0x00400000
+#define BM_CLKCTRL_HBUS_CPU_INSTR_AS_ENABLE 0x00200000
+#define BM_CLKCTRL_HBUS_AUTO_SLOW_MODE 0x00100000
+#define BP_CLKCTRL_HBUS_SLOW_DIV      16
+#define BM_CLKCTRL_HBUS_SLOW_DIV 0x00070000
+#define BF_CLKCTRL_HBUS_SLOW_DIV(v)  \
+	(((v) << 16) & BM_CLKCTRL_HBUS_SLOW_DIV)
+#define BV_CLKCTRL_HBUS_SLOW_DIV__BY1  0x0
+#define BV_CLKCTRL_HBUS_SLOW_DIV__BY2  0x1
+#define BV_CLKCTRL_HBUS_SLOW_DIV__BY4  0x2
+#define BV_CLKCTRL_HBUS_SLOW_DIV__BY8  0x3
+#define BV_CLKCTRL_HBUS_SLOW_DIV__BY16 0x4
+#define BV_CLKCTRL_HBUS_SLOW_DIV__BY32 0x5
+#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 0x00000020
+#define BP_CLKCTRL_HBUS_DIV      0
+#define BM_CLKCTRL_HBUS_DIV 0x0000001F
+#define BF_CLKCTRL_HBUS_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_HBUS_DIV)
+HW_REGISTER_0(HW_CLKCTRL_XBUS, REGS_CLKCTRL_BASE, 0x00000040)
+#define HW_CLKCTRL_XBUS_ADDR (REGS_CLKCTRL_BASE + 0x00000040)
+#define BM_CLKCTRL_XBUS_BUSY 0x80000000
+#define BM_CLKCTRL_XBUS_DIV_FRAC_EN 0x00000400
+#define BP_CLKCTRL_XBUS_DIV      0
+#define BM_CLKCTRL_XBUS_DIV 0x000003FF
+#define BF_CLKCTRL_XBUS_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_XBUS_DIV)
+HW_REGISTER(HW_CLKCTRL_XTAL, REGS_CLKCTRL_BASE, 0x00000050)
+#define HW_CLKCTRL_XTAL_ADDR (REGS_CLKCTRL_BASE + 0x00000050)
+#define BM_CLKCTRL_XTAL_UART_CLK_GATE 0x80000000
+#define BM_CLKCTRL_XTAL_FILT_CLK24M_GATE 0x40000000
+#define BM_CLKCTRL_XTAL_PWM_CLK24M_GATE 0x20000000
+#define BM_CLKCTRL_XTAL_DRI_CLK24M_GATE 0x10000000
+#define BM_CLKCTRL_XTAL_DIGCTRL_CLK1M_GATE 0x08000000
+#define BM_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 0x04000000
+#define BP_CLKCTRL_XTAL_DIV_UART      0
+#define BM_CLKCTRL_XTAL_DIV_UART 0x00000003
+#define BF_CLKCTRL_XTAL_DIV_UART(v)  \
+	(((v) << 0) & BM_CLKCTRL_XTAL_DIV_UART)
+HW_REGISTER_0(HW_CLKCTRL_PIX, REGS_CLKCTRL_BASE, 0x00000060)
+#define HW_CLKCTRL_PIX_ADDR (REGS_CLKCTRL_BASE + 0x00000060)
+#define BM_CLKCTRL_PIX_CLKGATE 0x80000000
+#define BM_CLKCTRL_PIX_BUSY 0x20000000
+#define BM_CLKCTRL_PIX_DIV_FRAC_EN 0x00001000
+#define BP_CLKCTRL_PIX_DIV      0
+#define BM_CLKCTRL_PIX_DIV 0x00000FFF
+#define BF_CLKCTRL_PIX_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_PIX_DIV)
+HW_REGISTER_0(HW_CLKCTRL_SSP, REGS_CLKCTRL_BASE, 0x00000070)
+#define HW_CLKCTRL_SSP_ADDR (REGS_CLKCTRL_BASE + 0x00000070)
+#define BM_CLKCTRL_SSP_CLKGATE 0x80000000
+#define BM_CLKCTRL_SSP_BUSY 0x20000000
+#define BM_CLKCTRL_SSP_DIV_FRAC_EN 0x00000200
+#define BP_CLKCTRL_SSP_DIV      0
+#define BM_CLKCTRL_SSP_DIV 0x000001FF
+#define BF_CLKCTRL_SSP_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_SSP_DIV)
+HW_REGISTER_0(HW_CLKCTRL_GPMI, REGS_CLKCTRL_BASE, 0x00000080)
+#define HW_CLKCTRL_GPMI_ADDR (REGS_CLKCTRL_BASE + 0x00000080)
+#define BM_CLKCTRL_GPMI_CLKGATE 0x80000000
+#define BM_CLKCTRL_GPMI_BUSY 0x20000000
+#define BM_CLKCTRL_GPMI_DIV_FRAC_EN 0x00000400
+#define BP_CLKCTRL_GPMI_DIV      0
+#define BM_CLKCTRL_GPMI_DIV 0x000003FF
+#define BF_CLKCTRL_GPMI_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_GPMI_DIV)
+HW_REGISTER_0(HW_CLKCTRL_SPDIF, REGS_CLKCTRL_BASE, 0x00000090)
+#define HW_CLKCTRL_SPDIF_ADDR (REGS_CLKCTRL_BASE + 0x00000090)
+#define BM_CLKCTRL_SPDIF_CLKGATE 0x80000000
+HW_REGISTER_0(HW_CLKCTRL_EMI, REGS_CLKCTRL_BASE, 0x000000a0)
+#define HW_CLKCTRL_EMI_ADDR (REGS_CLKCTRL_BASE + 0x000000a0)
+#define BM_CLKCTRL_EMI_CLKGATE 0x80000000
+#define BM_CLKCTRL_EMI_SYNC_MODE_EN 0x40000000
+#define BM_CLKCTRL_EMI_BUSY_REF_XTAL 0x20000000
+#define BM_CLKCTRL_EMI_BUSY_REF_EMI 0x10000000
+#define BM_CLKCTRL_EMI_BUSY_REF_CPU 0x08000000
+#define BM_CLKCTRL_EMI_BUSY_SYNC_MODE 0x04000000
+#define BM_CLKCTRL_EMI_BUSY_DCC_RESYNC 0x00020000
+#define BM_CLKCTRL_EMI_DCC_RESYNC_ENABLE 0x00010000
+#define BP_CLKCTRL_EMI_DIV_XTAL      8
+#define BM_CLKCTRL_EMI_DIV_XTAL 0x00000F00
+#define BF_CLKCTRL_EMI_DIV_XTAL(v)  \
+	(((v) << 8) & BM_CLKCTRL_EMI_DIV_XTAL)
+#define BP_CLKCTRL_EMI_DIV_EMI      0
+#define BM_CLKCTRL_EMI_DIV_EMI 0x0000003F
+#define BF_CLKCTRL_EMI_DIV_EMI(v)  \
+	(((v) << 0) & BM_CLKCTRL_EMI_DIV_EMI)
+HW_REGISTER_0(HW_CLKCTRL_IR, REGS_CLKCTRL_BASE, 0x000000b0)
+#define HW_CLKCTRL_IR_ADDR (REGS_CLKCTRL_BASE + 0x000000b0)
+#define BM_CLKCTRL_IR_CLKGATE 0x80000000
+#define BM_CLKCTRL_IR_AUTO_DIV 0x20000000
+#define BM_CLKCTRL_IR_IR_BUSY 0x10000000
+#define BM_CLKCTRL_IR_IROV_BUSY 0x08000000
+#define BP_CLKCTRL_IR_IROV_DIV      16
+#define BM_CLKCTRL_IR_IROV_DIV 0x01FF0000
+#define BF_CLKCTRL_IR_IROV_DIV(v)  \
+	(((v) << 16) & BM_CLKCTRL_IR_IROV_DIV)
+#define BP_CLKCTRL_IR_IR_DIV      0
+#define BM_CLKCTRL_IR_IR_DIV 0x000003FF
+#define BF_CLKCTRL_IR_IR_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_IR_IR_DIV)
+HW_REGISTER_0(HW_CLKCTRL_SAIF, REGS_CLKCTRL_BASE, 0x000000c0)
+#define HW_CLKCTRL_SAIF_ADDR (REGS_CLKCTRL_BASE + 0x000000c0)
+#define BM_CLKCTRL_SAIF_CLKGATE 0x80000000
+#define BM_CLKCTRL_SAIF_BUSY 0x20000000
+#define BM_CLKCTRL_SAIF_DIV_FRAC_EN 0x00010000
+#define BP_CLKCTRL_SAIF_DIV      0
+#define BM_CLKCTRL_SAIF_DIV 0x0000FFFF
+#define BF_CLKCTRL_SAIF_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_SAIF_DIV)
+HW_REGISTER_0(HW_CLKCTRL_TV, REGS_CLKCTRL_BASE, 0x000000d0)
+#define HW_CLKCTRL_TV_ADDR (REGS_CLKCTRL_BASE + 0x000000d0)
+#define BM_CLKCTRL_TV_CLK_TV108M_GATE 0x80000000
+#define BM_CLKCTRL_TV_CLK_TV_GATE 0x40000000
+HW_REGISTER_0(HW_CLKCTRL_ETM, REGS_CLKCTRL_BASE, 0x000000e0)
+#define HW_CLKCTRL_ETM_ADDR (REGS_CLKCTRL_BASE + 0x000000e0)
+#define BM_CLKCTRL_ETM_CLKGATE 0x80000000
+#define BM_CLKCTRL_ETM_BUSY 0x20000000
+#define BM_CLKCTRL_ETM_DIV_FRAC_EN 0x00000040
+#define BP_CLKCTRL_ETM_DIV      0
+#define BM_CLKCTRL_ETM_DIV 0x0000003F
+#define BF_CLKCTRL_ETM_DIV(v)  \
+	(((v) << 0) & BM_CLKCTRL_ETM_DIV)
+HW_REGISTER(HW_CLKCTRL_FRAC, REGS_CLKCTRL_BASE, 0x000000f0)
+#define HW_CLKCTRL_FRAC_ADDR (REGS_CLKCTRL_BASE + 0x000000f0)
+#define BM_CLKCTRL_FRAC_CLKGATEIO 0x80000000
+#define BM_CLKCTRL_FRAC_IO_STABLE 0x40000000
+#define BP_CLKCTRL_FRAC_IOFRAC      24
+#define BM_CLKCTRL_FRAC_IOFRAC 0x3F000000
+#define BF_CLKCTRL_FRAC_IOFRAC(v)  \
+	(((v) << 24) & BM_CLKCTRL_FRAC_IOFRAC)
+#define BM_CLKCTRL_FRAC_CLKGATEPIX 0x00800000
+#define BM_CLKCTRL_FRAC_PIX_STABLE 0x00400000
+#define BP_CLKCTRL_FRAC_PIXFRAC      16
+#define BM_CLKCTRL_FRAC_PIXFRAC 0x003F0000
+#define BF_CLKCTRL_FRAC_PIXFRAC(v)  \
+	(((v) << 16) & BM_CLKCTRL_FRAC_PIXFRAC)
+#define BM_CLKCTRL_FRAC_CLKGATEEMI 0x00008000
+#define BM_CLKCTRL_FRAC_EMI_STABLE 0x00004000
+#define BP_CLKCTRL_FRAC_EMIFRAC      8
+#define BM_CLKCTRL_FRAC_EMIFRAC 0x00003F00
+#define BF_CLKCTRL_FRAC_EMIFRAC(v)  \
+	(((v) << 8) & BM_CLKCTRL_FRAC_EMIFRAC)
+#define BM_CLKCTRL_FRAC_CLKGATECPU 0x00000080
+#define BM_CLKCTRL_FRAC_CPU_STABLE 0x00000040
+#define BP_CLKCTRL_FRAC_CPUFRAC      0
+#define BM_CLKCTRL_FRAC_CPUFRAC 0x0000003F
+#define BF_CLKCTRL_FRAC_CPUFRAC(v)  \
+	(((v) << 0) & BM_CLKCTRL_FRAC_CPUFRAC)
+HW_REGISTER(HW_CLKCTRL_FRAC1, REGS_CLKCTRL_BASE, 0x00000100)
+#define HW_CLKCTRL_FRAC1_ADDR (REGS_CLKCTRL_BASE + 0x00000100)
+#define BM_CLKCTRL_FRAC1_CLKGATEVID 0x80000000
+#define BM_CLKCTRL_FRAC1_VID_STABLE 0x40000000
+HW_REGISTER(HW_CLKCTRL_CLKSEQ, REGS_CLKCTRL_BASE, 0x00000110)
+#define HW_CLKCTRL_CLKSEQ_ADDR (REGS_CLKCTRL_BASE + 0x00000110)
+#define BM_CLKCTRL_CLKSEQ_BYPASS_ETM 0x00000100
+#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU 0x00000080
+#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI 0x00000040
+#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP 0x00000020
+#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI 0x00000010
+#define BM_CLKCTRL_CLKSEQ_BYPASS_IR 0x00000008
+#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX 0x00000002
+#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF 0x00000001
+HW_REGISTER_0(HW_CLKCTRL_RESET, REGS_CLKCTRL_BASE, 0x00000120)
+#define HW_CLKCTRL_RESET_ADDR (REGS_CLKCTRL_BASE + 0x00000120)
+#define BM_CLKCTRL_RESET_CHIP 0x00000002
+#define BM_CLKCTRL_RESET_DIG 0x00000001
+HW_REGISTER_0(HW_CLKCTRL_STATUS, REGS_CLKCTRL_BASE, 0x00000130)
+#define HW_CLKCTRL_STATUS_ADDR (REGS_CLKCTRL_BASE + 0x00000130)
+#define BP_CLKCTRL_STATUS_CPU_LIMIT      30
+#define BM_CLKCTRL_STATUS_CPU_LIMIT 0xC0000000
+#define BF_CLKCTRL_STATUS_CPU_LIMIT(v) \
+	(((v) << 30) & BM_CLKCTRL_STATUS_CPU_LIMIT)
+HW_REGISTER_0(HW_CLKCTRL_VERSION, REGS_CLKCTRL_BASE, 0x00000140)
+#define HW_CLKCTRL_VERSION_ADDR (REGS_CLKCTRL_BASE + 0x00000140)
+#define BP_CLKCTRL_VERSION_MAJOR      24
+#define BM_CLKCTRL_VERSION_MAJOR 0xFF000000
+#define BF_CLKCTRL_VERSION_MAJOR(v) \
+	(((v) << 24) & BM_CLKCTRL_VERSION_MAJOR)
+#define BP_CLKCTRL_VERSION_MINOR      16
+#define BM_CLKCTRL_VERSION_MINOR 0x00FF0000
+#define BF_CLKCTRL_VERSION_MINOR(v)  \
+	(((v) << 16) & BM_CLKCTRL_VERSION_MINOR)
+#define BP_CLKCTRL_VERSION_STEP      0
+#define BM_CLKCTRL_VERSION_STEP 0x0000FFFF
+#define BF_CLKCTRL_VERSION_STEP(v)  \
+	(((v) << 0) & BM_CLKCTRL_VERSION_STEP)
+#endif /* __ARCH_ARM___CLKCTRL_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-icoll.h b/arch/arm/mach-stmp378x/include/mach/regs-icoll.h
new file mode 100644
index 0000000..a5a530c
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-icoll.h
@@ -0,0 +1,213 @@
+/*
+ * STMP ICOLL Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___ICOLL_H
+#define __ARCH_ARM___ICOLL_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_ICOLL_BASE (REGS_BASE + 0x0)
+#define REGS_ICOLL_BASE_PHYS (0x80000000)
+#define REGS_ICOLL_SIZE 0x00002000
+HW_REGISTER(HW_ICOLL_VECTOR, REGS_ICOLL_BASE, 0x00000000)
+#define HW_ICOLL_VECTOR_ADDR (REGS_ICOLL_BASE + 0x00000000)
+#define BP_ICOLL_VECTOR_IRQVECTOR      2
+#define BM_ICOLL_VECTOR_IRQVECTOR 0xFFFFFFFC
+#define BF_ICOLL_VECTOR_IRQVECTOR(v) \
+	(((v) << 2) & BM_ICOLL_VECTOR_IRQVECTOR)
+HW_REGISTER_0(HW_ICOLL_LEVELACK, REGS_ICOLL_BASE, 0x00000010)
+#define HW_ICOLL_LEVELACK_ADDR (REGS_ICOLL_BASE + 0x00000010)
+#define BP_ICOLL_LEVELACK_IRQLEVELACK      0
+#define BM_ICOLL_LEVELACK_IRQLEVELACK 0x0000000F
+#define BF_ICOLL_LEVELACK_IRQLEVELACK(v)  \
+	(((v) << 0) & BM_ICOLL_LEVELACK_IRQLEVELACK)
+#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 0x1
+#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL1 0x2
+#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL2 0x4
+#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL3 0x8
+HW_REGISTER(HW_ICOLL_CTRL, REGS_ICOLL_BASE, 0x00000020)
+#define HW_ICOLL_CTRL_ADDR (REGS_ICOLL_BASE + 0x00000020)
+#define BM_ICOLL_CTRL_SFTRST 0x80000000
+#define BV_ICOLL_CTRL_SFTRST__RUN      0x0
+#define BV_ICOLL_CTRL_SFTRST__IN_RESET 0x1
+#define BM_ICOLL_CTRL_CLKGATE 0x40000000
+#define BV_ICOLL_CTRL_CLKGATE__RUN       0x0
+#define BV_ICOLL_CTRL_CLKGATE__NO_CLOCKS 0x1
+#define BP_ICOLL_CTRL_VECTOR_PITCH      21
+#define BM_ICOLL_CTRL_VECTOR_PITCH 0x00E00000
+#define BF_ICOLL_CTRL_VECTOR_PITCH(v)  \
+	(((v) << 21) & BM_ICOLL_CTRL_VECTOR_PITCH)
+#define BV_ICOLL_CTRL_VECTOR_PITCH__DEFAULT_BY4 0x0
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY4	 0x1
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY8	 0x2
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY12	0x3
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY16	0x4
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY20	0x5
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY24	0x6
+#define BV_ICOLL_CTRL_VECTOR_PITCH__BY28	0x7
+#define BM_ICOLL_CTRL_BYPASS_FSM 0x00100000
+#define BV_ICOLL_CTRL_BYPASS_FSM__NORMAL 0x0
+#define BV_ICOLL_CTRL_BYPASS_FSM__BYPASS 0x1
+#define BM_ICOLL_CTRL_NO_NESTING 0x00080000
+#define BV_ICOLL_CTRL_NO_NESTING__NORMAL  0x0
+#define BV_ICOLL_CTRL_NO_NESTING__NO_NEST 0x1
+#define BM_ICOLL_CTRL_ARM_RSE_MODE 0x00040000
+#define BM_ICOLL_CTRL_FIQ_FINAL_ENABLE 0x00020000
+#define BV_ICOLL_CTRL_FIQ_FINAL_ENABLE__DISABLE 0x0
+#define BV_ICOLL_CTRL_FIQ_FINAL_ENABLE__ENABLE  0x1
+#define BM_ICOLL_CTRL_IRQ_FINAL_ENABLE 0x00010000
+#define BV_ICOLL_CTRL_IRQ_FINAL_ENABLE__DISABLE 0x0
+#define BV_ICOLL_CTRL_IRQ_FINAL_ENABLE__ENABLE  0x1
+HW_REGISTER(HW_ICOLL_VBASE, REGS_ICOLL_BASE, 0x00000040)
+#define HW_ICOLL_VBASE_ADDR (REGS_ICOLL_BASE + 0x00000040)
+#define BP_ICOLL_VBASE_TABLE_ADDRESS      2
+#define BM_ICOLL_VBASE_TABLE_ADDRESS 0xFFFFFFFC
+#define BF_ICOLL_VBASE_TABLE_ADDRESS(v) \
+	(((v) << 2) & BM_ICOLL_VBASE_TABLE_ADDRESS)
+HW_REGISTER_0(HW_ICOLL_STAT, REGS_ICOLL_BASE, 0x00000070)
+#define HW_ICOLL_STAT_ADDR (REGS_ICOLL_BASE + 0x00000070)
+#define BP_ICOLL_STAT_VECTOR_NUMBER      0
+#define BM_ICOLL_STAT_VECTOR_NUMBER 0x0000007F
+#define BF_ICOLL_STAT_VECTOR_NUMBER(v)  \
+	(((v) << 0) & BM_ICOLL_STAT_VECTOR_NUMBER)
+/*
+ *  multi-register-define name HW_ICOLL_RAWn
+ *	      base 0x000000A0
+ *	      count 4
+ *	      offset 0x10
+ */
+HW_REGISTER_0_INDEXED(HW_ICOLL_RAWn, REGS_ICOLL_BASE, 0x000000a0, 0x10)
+#define BP_ICOLL_RAWn_RAW_IRQS      0
+#define BM_ICOLL_RAWn_RAW_IRQS 0xFFFFFFFF
+#define BF_ICOLL_RAWn_RAW_IRQS(v)   (v)
+/*
+ *  multi-register-define name HW_ICOLL_INTERRUPTn
+ *	      base 0x00000120
+ *	      count 128
+ *	      offset 0x10
+ */
+HW_REGISTER_INDEXED(HW_ICOLL_INTERRUPTn, REGS_ICOLL_BASE, 0x00000120, 0x10)
+#define BM_ICOLL_INTERRUPTn_ENFIQ 0x00000010
+#define BV_ICOLL_INTERRUPTn_ENFIQ__DISABLE 0x0
+#define BV_ICOLL_INTERRUPTn_ENFIQ__ENABLE  0x1
+#define BM_ICOLL_INTERRUPTn_SOFTIRQ 0x00000008
+#define BV_ICOLL_INTERRUPTn_SOFTIRQ__NO_INTERRUPT    0x0
+#define BV_ICOLL_INTERRUPTn_SOFTIRQ__FORCE_INTERRUPT 0x1
+#define BM_ICOLL_INTERRUPTn_ENABLE 0x00000004
+#define BV_ICOLL_INTERRUPTn_ENABLE__DISABLE 0x0
+#define BV_ICOLL_INTERRUPTn_ENABLE__ENABLE  0x1
+#define BP_ICOLL_INTERRUPTn_PRIORITY      0
+#define BM_ICOLL_INTERRUPTn_PRIORITY 0x00000003
+#define BF_ICOLL_INTERRUPTn_PRIORITY(v)  \
+	(((v) << 0) & BM_ICOLL_INTERRUPTn_PRIORITY)
+#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL0 0x0
+#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL1 0x1
+#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL2 0x2
+#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL3 0x3
+HW_REGISTER(HW_ICOLL_DEBUG, REGS_ICOLL_BASE, 0x00001120)
+#define HW_ICOLL_DEBUG_ADDR (REGS_ICOLL_BASE + 0x00001120)
+#define BP_ICOLL_DEBUG_INSERVICE      28
+#define BM_ICOLL_DEBUG_INSERVICE 0xF0000000
+#define BF_ICOLL_DEBUG_INSERVICE(v) \
+	(((v) << 28) & BM_ICOLL_DEBUG_INSERVICE)
+#define BV_ICOLL_DEBUG_INSERVICE__LEVEL0 0x1
+#define BV_ICOLL_DEBUG_INSERVICE__LEVEL1 0x2
+#define BV_ICOLL_DEBUG_INSERVICE__LEVEL2 0x4
+#define BV_ICOLL_DEBUG_INSERVICE__LEVEL3 0x8
+#define BP_ICOLL_DEBUG_LEVEL_REQUESTS      24
+#define BM_ICOLL_DEBUG_LEVEL_REQUESTS 0x0F000000
+#define BF_ICOLL_DEBUG_LEVEL_REQUESTS(v)  \
+	(((v) << 24) & BM_ICOLL_DEBUG_LEVEL_REQUESTS)
+#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL0 0x1
+#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL1 0x2
+#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL2 0x4
+#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL3 0x8
+#define BP_ICOLL_DEBUG_REQUESTS_BY_LEVEL      20
+#define BM_ICOLL_DEBUG_REQUESTS_BY_LEVEL 0x00F00000
+#define BF_ICOLL_DEBUG_REQUESTS_BY_LEVEL(v)  \
+	(((v) << 20) & BM_ICOLL_DEBUG_REQUESTS_BY_LEVEL)
+#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL0 0x1
+#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL1 0x2
+#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL2 0x4
+#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL3 0x8
+#define BM_ICOLL_DEBUG_FIQ 0x00020000
+#define BV_ICOLL_DEBUG_FIQ__NO_FIQ_REQUESTED 0x0
+#define BV_ICOLL_DEBUG_FIQ__FIQ_REQUESTED    0x1
+#define BM_ICOLL_DEBUG_IRQ 0x00010000
+#define BV_ICOLL_DEBUG_IRQ__NO_IRQ_REQUESTED 0x0
+#define BV_ICOLL_DEBUG_IRQ__IRQ_REQUESTED    0x1
+#define BP_ICOLL_DEBUG_VECTOR_FSM      0
+#define BM_ICOLL_DEBUG_VECTOR_FSM 0x000003FF
+#define BF_ICOLL_DEBUG_VECTOR_FSM(v)  \
+	(((v) << 0) & BM_ICOLL_DEBUG_VECTOR_FSM)
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_IDLE	 0x000
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE1  0x001
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE2  0x002
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_PENDING      0x004
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE3  0x008
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE4  0x010
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING1 0x020
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING2 0x040
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING3 0x080
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE5  0x100
+#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE6  0x200
+HW_REGISTER(HW_ICOLL_DBGREAD0, REGS_ICOLL_BASE, 0x00001130)
+#define HW_ICOLL_DBGREAD0_ADDR (REGS_ICOLL_BASE + 0x00001130)
+#define BP_ICOLL_DBGREAD0_VALUE      0
+#define BM_ICOLL_DBGREAD0_VALUE 0xFFFFFFFF
+#define BF_ICOLL_DBGREAD0_VALUE(v)   (v)
+HW_REGISTER(HW_ICOLL_DBGREAD1, REGS_ICOLL_BASE, 0x00001140)
+#define HW_ICOLL_DBGREAD1_ADDR (REGS_ICOLL_BASE + 0x00001140)
+#define BP_ICOLL_DBGREAD1_VALUE      0
+#define BM_ICOLL_DBGREAD1_VALUE 0xFFFFFFFF
+#define BF_ICOLL_DBGREAD1_VALUE(v)   (v)
+HW_REGISTER(HW_ICOLL_DBGFLAG, REGS_ICOLL_BASE, 0x00001150)
+#define HW_ICOLL_DBGFLAG_ADDR (REGS_ICOLL_BASE + 0x00001150)
+#define BP_ICOLL_DBGFLAG_FLAG      0
+#define BM_ICOLL_DBGFLAG_FLAG 0x0000FFFF
+#define BF_ICOLL_DBGFLAG_FLAG(v)  \
+	(((v) << 0) & BM_ICOLL_DBGFLAG_FLAG)
+/*
+ *  multi-register-define name HW_ICOLL_DBGREQUESTn
+ *	      base 0x00001160
+ *	      count 4
+ *	      offset 0x10
+ */
+HW_REGISTER_0_INDEXED(HW_ICOLL_DBGREQUESTn, REGS_ICOLL_BASE, 0x00001160,
+		       0x10)
+#define BP_ICOLL_DBGREQUESTn_BITS      0
+#define BM_ICOLL_DBGREQUESTn_BITS 0xFFFFFFFF
+#define BF_ICOLL_DBGREQUESTn_BITS(v)   (v)
+HW_REGISTER_0(HW_ICOLL_VERSION, REGS_ICOLL_BASE, 0x000011e0)
+#define HW_ICOLL_VERSION_ADDR (REGS_ICOLL_BASE + 0x000011e0)
+#define BP_ICOLL_VERSION_MAJOR      24
+#define BM_ICOLL_VERSION_MAJOR 0xFF000000
+#define BF_ICOLL_VERSION_MAJOR(v) \
+	(((v) << 24) & BM_ICOLL_VERSION_MAJOR)
+#define BP_ICOLL_VERSION_MINOR      16
+#define BM_ICOLL_VERSION_MINOR 0x00FF0000
+#define BF_ICOLL_VERSION_MINOR(v)  \
+	(((v) << 16) & BM_ICOLL_VERSION_MINOR)
+#define BP_ICOLL_VERSION_STEP      0
+#define BM_ICOLL_VERSION_STEP 0x0000FFFF
+#define BF_ICOLL_VERSION_STEP(v)  \
+	(((v) << 0) & BM_ICOLL_VERSION_STEP)
+#endif /* __ARCH_ARM___ICOLL_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h
new file mode 100644
index 0000000..6c42d2a
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h
@@ -0,0 +1,143 @@
+/*
+ * STMP PINCTRL Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___PINCTRL_H
+#define __ARCH_ARM___PINCTRL_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_PINCTRL_BASE (REGS_BASE + 0x18000)
+#define REGS_PINCTRL_BASE_PHYS (0x80018000)
+#define REGS_PINCTRL_SIZE 0x00002000
+HW_REGISTER(HW_PINCTRL_CTRL, REGS_PINCTRL_BASE, 0x00000000)
+#define HW_PINCTRL_CTRL_ADDR (REGS_PINCTRL_BASE + 0x00000000)
+#define BM_PINCTRL_CTRL_SFTRST 0x80000000
+#define BM_PINCTRL_CTRL_CLKGATE 0x40000000
+#define BM_PINCTRL_CTRL_PRESENT3 0x08000000
+#define BM_PINCTRL_CTRL_PRESENT2 0x04000000
+#define BM_PINCTRL_CTRL_PRESENT1 0x02000000
+#define BM_PINCTRL_CTRL_PRESENT0 0x01000000
+#define BM_PINCTRL_CTRL_IRQOUT2 0x00000004
+#define BM_PINCTRL_CTRL_IRQOUT1 0x00000002
+#define BM_PINCTRL_CTRL_IRQOUT0 0x00000001
+HW_REGISTER(HW_PINCTRL_MUXSEL0, REGS_PINCTRL_BASE, 0x00000100)
+#define HW_PINCTRL_MUXSEL0_ADDR (REGS_PINCTRL_BASE + 0x00000100)
+HW_REGISTER(HW_PINCTRL_MUXSEL1, REGS_PINCTRL_BASE, 0x00000110)
+#define HW_PINCTRL_MUXSEL1_ADDR (REGS_PINCTRL_BASE + 0x00000110)
+HW_REGISTER(HW_PINCTRL_MUXSEL2, REGS_PINCTRL_BASE, 0x00000120)
+#define HW_PINCTRL_MUXSEL2_ADDR (REGS_PINCTRL_BASE + 0x00000120)
+HW_REGISTER(HW_PINCTRL_MUXSEL3, REGS_PINCTRL_BASE, 0x00000130)
+#define HW_PINCTRL_MUXSEL3_ADDR (REGS_PINCTRL_BASE + 0x00000130)
+HW_REGISTER(HW_PINCTRL_MUXSEL4, REGS_PINCTRL_BASE, 0x00000140)
+#define HW_PINCTRL_MUXSEL4_ADDR (REGS_PINCTRL_BASE + 0x00000140)
+HW_REGISTER(HW_PINCTRL_MUXSEL5, REGS_PINCTRL_BASE, 0x00000150)
+#define HW_PINCTRL_MUXSEL5_ADDR (REGS_PINCTRL_BASE + 0x00000150)
+HW_REGISTER(HW_PINCTRL_MUXSEL6, REGS_PINCTRL_BASE, 0x00000160)
+#define HW_PINCTRL_MUXSEL6_ADDR (REGS_PINCTRL_BASE + 0x00000160)
+HW_REGISTER(HW_PINCTRL_MUXSEL7, REGS_PINCTRL_BASE, 0x00000170)
+#define HW_PINCTRL_MUXSEL7_ADDR (REGS_PINCTRL_BASE + 0x00000170)
+HW_REGISTER(HW_PINCTRL_DRIVE0, REGS_PINCTRL_BASE, 0x00000200)
+#define HW_PINCTRL_DRIVE0_ADDR (REGS_PINCTRL_BASE + 0x00000200)
+HW_REGISTER(HW_PINCTRL_DRIVE1, REGS_PINCTRL_BASE, 0x00000210)
+#define HW_PINCTRL_DRIVE1_ADDR (REGS_PINCTRL_BASE + 0x00000210)
+HW_REGISTER(HW_PINCTRL_DRIVE2, REGS_PINCTRL_BASE, 0x00000220)
+#define HW_PINCTRL_DRIVE2_ADDR (REGS_PINCTRL_BASE + 0x00000220)
+HW_REGISTER(HW_PINCTRL_DRIVE3, REGS_PINCTRL_BASE, 0x00000230)
+#define HW_PINCTRL_DRIVE3_ADDR (REGS_PINCTRL_BASE + 0x00000230)
+HW_REGISTER(HW_PINCTRL_DRIVE4, REGS_PINCTRL_BASE, 0x00000240)
+#define HW_PINCTRL_DRIVE4_ADDR (REGS_PINCTRL_BASE + 0x00000240)
+HW_REGISTER(HW_PINCTRL_DRIVE5, REGS_PINCTRL_BASE, 0x00000250)
+#define HW_PINCTRL_DRIVE5_ADDR (REGS_PINCTRL_BASE + 0x00000250)
+HW_REGISTER(HW_PINCTRL_DRIVE6, REGS_PINCTRL_BASE, 0x00000260)
+#define HW_PINCTRL_DRIVE6_ADDR (REGS_PINCTRL_BASE + 0x00000260)
+HW_REGISTER(HW_PINCTRL_DRIVE7, REGS_PINCTRL_BASE, 0x00000270)
+#define HW_PINCTRL_DRIVE7_ADDR (REGS_PINCTRL_BASE + 0x00000270)
+HW_REGISTER(HW_PINCTRL_DRIVE8, REGS_PINCTRL_BASE, 0x00000280)
+#define HW_PINCTRL_DRIVE8_ADDR (REGS_PINCTRL_BASE + 0x00000280)
+HW_REGISTER(HW_PINCTRL_DRIVE9, REGS_PINCTRL_BASE, 0x00000290)
+#define HW_PINCTRL_DRIVE9_ADDR (REGS_PINCTRL_BASE + 0x00000290)
+HW_REGISTER(HW_PINCTRL_DRIVE10, REGS_PINCTRL_BASE, 0x000002a0)
+#define HW_PINCTRL_DRIVE10_ADDR (REGS_PINCTRL_BASE + 0x000002a0)
+HW_REGISTER(HW_PINCTRL_DRIVE11, REGS_PINCTRL_BASE, 0x000002b0)
+#define HW_PINCTRL_DRIVE11_ADDR (REGS_PINCTRL_BASE + 0x000002b0)
+HW_REGISTER(HW_PINCTRL_DRIVE12, REGS_PINCTRL_BASE, 0x000002c0)
+#define HW_PINCTRL_DRIVE12_ADDR (REGS_PINCTRL_BASE + 0x000002c0)
+HW_REGISTER(HW_PINCTRL_DRIVE13, REGS_PINCTRL_BASE, 0x000002d0)
+#define HW_PINCTRL_DRIVE13_ADDR (REGS_PINCTRL_BASE + 0x000002d0)
+HW_REGISTER(HW_PINCTRL_DRIVE14, REGS_PINCTRL_BASE, 0x000002e0)
+#define HW_PINCTRL_DRIVE14_ADDR (REGS_PINCTRL_BASE + 0x000002e0)
+HW_REGISTER(HW_PINCTRL_PULL0, REGS_PINCTRL_BASE, 0x00000400)
+#define HW_PINCTRL_PULL0_ADDR (REGS_PINCTRL_BASE + 0x00000400)
+HW_REGISTER(HW_PINCTRL_PULL1, REGS_PINCTRL_BASE, 0x00000410)
+#define HW_PINCTRL_PULL1_ADDR (REGS_PINCTRL_BASE + 0x00000410)
+HW_REGISTER(HW_PINCTRL_PULL2, REGS_PINCTRL_BASE, 0x00000420)
+#define HW_PINCTRL_PULL2_ADDR (REGS_PINCTRL_BASE + 0x00000420)
+HW_REGISTER(HW_PINCTRL_PULL3, REGS_PINCTRL_BASE, 0x00000430)
+#define HW_PINCTRL_PULL3_ADDR (REGS_PINCTRL_BASE + 0x00000430)
+HW_REGISTER(HW_PINCTRL_DOUT0, REGS_PINCTRL_BASE, 0x00000500)
+#define HW_PINCTRL_DOUT0_ADDR (REGS_PINCTRL_BASE + 0x00000500)
+HW_REGISTER(HW_PINCTRL_DOUT1, REGS_PINCTRL_BASE, 0x00000510)
+#define HW_PINCTRL_DOUT1_ADDR (REGS_PINCTRL_BASE + 0x00000510)
+HW_REGISTER(HW_PINCTRL_DOUT2, REGS_PINCTRL_BASE, 0x00000520)
+#define HW_PINCTRL_DOUT2_ADDR (REGS_PINCTRL_BASE + 0x00000520)
+HW_REGISTER(HW_PINCTRL_DIN0, REGS_PINCTRL_BASE, 0x00000600)
+#define HW_PINCTRL_DIN0_ADDR (REGS_PINCTRL_BASE + 0x00000600)
+HW_REGISTER(HW_PINCTRL_DIN1, REGS_PINCTRL_BASE, 0x00000610)
+#define HW_PINCTRL_DIN1_ADDR (REGS_PINCTRL_BASE + 0x00000610)
+HW_REGISTER(HW_PINCTRL_DIN2, REGS_PINCTRL_BASE, 0x00000620)
+#define HW_PINCTRL_DIN2_ADDR (REGS_PINCTRL_BASE + 0x00000620)
+HW_REGISTER(HW_PINCTRL_DOE0, REGS_PINCTRL_BASE, 0x00000700)
+#define HW_PINCTRL_DOE0_ADDR (REGS_PINCTRL_BASE + 0x00000700)
+HW_REGISTER(HW_PINCTRL_DOE1, REGS_PINCTRL_BASE, 0x00000710)
+#define HW_PINCTRL_DOE1_ADDR (REGS_PINCTRL_BASE + 0x00000710)
+HW_REGISTER(HW_PINCTRL_DOE2, REGS_PINCTRL_BASE, 0x00000720)
+#define HW_PINCTRL_DOE2_ADDR (REGS_PINCTRL_BASE + 0x00000720)
+HW_REGISTER(HW_PINCTRL_PIN2IRQ0, REGS_PINCTRL_BASE, 0x00000800)
+#define HW_PINCTRL_PIN2IRQ0_ADDR (REGS_PINCTRL_BASE + 0x00000800)
+HW_REGISTER(HW_PINCTRL_PIN2IRQ1, REGS_PINCTRL_BASE, 0x00000810)
+#define HW_PINCTRL_PIN2IRQ1_ADDR (REGS_PINCTRL_BASE + 0x00000810)
+HW_REGISTER(HW_PINCTRL_PIN2IRQ2, REGS_PINCTRL_BASE, 0x00000820)
+#define HW_PINCTRL_PIN2IRQ2_ADDR (REGS_PINCTRL_BASE + 0x00000820)
+HW_REGISTER(HW_PINCTRL_IRQEN0, REGS_PINCTRL_BASE, 0x00000900)
+#define HW_PINCTRL_IRQEN0_ADDR (REGS_PINCTRL_BASE + 0x00000900)
+HW_REGISTER(HW_PINCTRL_IRQEN1, REGS_PINCTRL_BASE, 0x00000910)
+#define HW_PINCTRL_IRQEN1_ADDR (REGS_PINCTRL_BASE + 0x00000910)
+HW_REGISTER(HW_PINCTRL_IRQEN2, REGS_PINCTRL_BASE, 0x00000920)
+#define HW_PINCTRL_IRQEN2_ADDR (REGS_PINCTRL_BASE + 0x00000920)
+HW_REGISTER(HW_PINCTRL_IRQLEVEL0, REGS_PINCTRL_BASE, 0x00000a00)
+#define HW_PINCTRL_IRQLEVEL0_ADDR (REGS_PINCTRL_BASE + 0x00000a00)
+HW_REGISTER(HW_PINCTRL_IRQLEVEL1, REGS_PINCTRL_BASE, 0x00000a10)
+#define HW_PINCTRL_IRQLEVEL1_ADDR (REGS_PINCTRL_BASE + 0x00000a10)
+HW_REGISTER(HW_PINCTRL_IRQLEVEL2, REGS_PINCTRL_BASE, 0x00000a20)
+#define HW_PINCTRL_IRQLEVEL2_ADDR (REGS_PINCTRL_BASE + 0x00000a20)
+HW_REGISTER(HW_PINCTRL_IRQPOL0, REGS_PINCTRL_BASE, 0x00000b00)
+#define HW_PINCTRL_IRQPOL0_ADDR (REGS_PINCTRL_BASE + 0x00000b00)
+HW_REGISTER(HW_PINCTRL_IRQPOL1, REGS_PINCTRL_BASE, 0x00000b10)
+#define HW_PINCTRL_IRQPOL1_ADDR (REGS_PINCTRL_BASE + 0x00000b10)
+HW_REGISTER(HW_PINCTRL_IRQPOL2, REGS_PINCTRL_BASE, 0x00000b20)
+#define HW_PINCTRL_IRQPOL2_ADDR (REGS_PINCTRL_BASE + 0x00000b20)
+HW_REGISTER(HW_PINCTRL_IRQSTAT0, REGS_PINCTRL_BASE, 0x00000c00)
+#define HW_PINCTRL_IRQSTAT0_ADDR (REGS_PINCTRL_BASE + 0x00000c00)
+HW_REGISTER(HW_PINCTRL_IRQSTAT1, REGS_PINCTRL_BASE, 0x00000c10)
+#define HW_PINCTRL_IRQSTAT1_ADDR (REGS_PINCTRL_BASE + 0x00000c10)
+HW_REGISTER(HW_PINCTRL_IRQSTAT2, REGS_PINCTRL_BASE, 0x00000c20)
+#define HW_PINCTRL_IRQSTAT2_ADDR (REGS_PINCTRL_BASE + 0x00000c20)
+#endif /* __ARCH_ARM___PINCTRL_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-power.h b/arch/arm/mach-stmp378x/include/mach/regs-power.h
new file mode 100644
index 0000000..1c81afe
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-power.h
@@ -0,0 +1,32 @@
+/*
+ * STMP POWER Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___POWER_H
+#define __ARCH_ARM___POWER_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_POWER_BASE (void __iomem *)(REGS_BASE + 0x44000)
+#define REGS_POWER_BASE_PHYS (0x80044000)
+#define REGS_POWER_SIZE 0x00002000
+HW_REGISTER(HW_POWER_MINPWR, REGS_POWER_BASE, 0x00000020)
+HW_REGISTER(HW_POWER_CHARGE, REGS_POWER_BASE, 0x00000030)
+#endif /* __ARCH_ARM___POWER_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-timrot.h b/arch/arm/mach-stmp378x/include/mach/regs-timrot.h
new file mode 100644
index 0000000..bb6355a
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-timrot.h
@@ -0,0 +1,216 @@
+/*
+ * STMP TIMROT Register Definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ARCH_ARM___TIMROT_H
+#define __ARCH_ARM___TIMROT_H  1
+
+#include <mach/stmp3xxx_regs.h>
+
+#define REGS_TIMROT_BASE (REGS_BASE + 0x68000)
+#define REGS_TIMROT_BASE_PHYS (0x80068000)
+#define REGS_TIMROT_SIZE 0x00002000
+HW_REGISTER(HW_TIMROT_ROTCTRL, REGS_TIMROT_BASE, 0x00000000)
+#define HW_TIMROT_ROTCTRL_ADDR (REGS_TIMROT_BASE + 0x00000000)
+#define BM_TIMROT_ROTCTRL_SFTRST 0x80000000
+#define BM_TIMROT_ROTCTRL_CLKGATE 0x40000000
+#define BM_TIMROT_ROTCTRL_ROTARY_PRESENT 0x20000000
+#define BM_TIMROT_ROTCTRL_TIM3_PRESENT 0x10000000
+#define BM_TIMROT_ROTCTRL_TIM2_PRESENT 0x08000000
+#define BM_TIMROT_ROTCTRL_TIM1_PRESENT 0x04000000
+#define BM_TIMROT_ROTCTRL_TIM0_PRESENT 0x02000000
+#define BP_TIMROT_ROTCTRL_STATE      22
+#define BM_TIMROT_ROTCTRL_STATE 0x01C00000
+#define BF_TIMROT_ROTCTRL_STATE(v)  \
+	(((v) << 22) & BM_TIMROT_ROTCTRL_STATE)
+#define BP_TIMROT_ROTCTRL_DIVIDER      16
+#define BM_TIMROT_ROTCTRL_DIVIDER 0x003F0000
+#define BF_TIMROT_ROTCTRL_DIVIDER(v)  \
+	(((v) << 16) & BM_TIMROT_ROTCTRL_DIVIDER)
+#define BM_TIMROT_ROTCTRL_RELATIVE 0x00001000
+#define BP_TIMROT_ROTCTRL_OVERSAMPLE      10
+#define BM_TIMROT_ROTCTRL_OVERSAMPLE 0x00000C00
+#define BF_TIMROT_ROTCTRL_OVERSAMPLE(v)  \
+	(((v) << 10) & BM_TIMROT_ROTCTRL_OVERSAMPLE)
+#define BV_TIMROT_ROTCTRL_OVERSAMPLE__8X 0x0
+#define BV_TIMROT_ROTCTRL_OVERSAMPLE__4X 0x1
+#define BV_TIMROT_ROTCTRL_OVERSAMPLE__2X 0x2
+#define BV_TIMROT_ROTCTRL_OVERSAMPLE__1X 0x3
+#define BM_TIMROT_ROTCTRL_POLARITY_B 0x00000200
+#define BM_TIMROT_ROTCTRL_POLARITY_A 0x00000100
+#define BP_TIMROT_ROTCTRL_SELECT_B      4
+#define BM_TIMROT_ROTCTRL_SELECT_B 0x00000070
+#define BF_TIMROT_ROTCTRL_SELECT_B(v)  \
+	(((v) << 4) & BM_TIMROT_ROTCTRL_SELECT_B)
+#define BV_TIMROT_ROTCTRL_SELECT_B__NEVER_TICK 0x0
+#define BV_TIMROT_ROTCTRL_SELECT_B__PWM0       0x1
+#define BV_TIMROT_ROTCTRL_SELECT_B__PWM1       0x2
+#define BV_TIMROT_ROTCTRL_SELECT_B__PWM2       0x3
+#define BV_TIMROT_ROTCTRL_SELECT_B__PWM3       0x4
+#define BV_TIMROT_ROTCTRL_SELECT_B__PWM4       0x5
+#define BV_TIMROT_ROTCTRL_SELECT_B__ROTARYA    0x6
+#define BV_TIMROT_ROTCTRL_SELECT_B__ROTARYB    0x7
+#define BP_TIMROT_ROTCTRL_SELECT_A      0
+#define BM_TIMROT_ROTCTRL_SELECT_A 0x00000007
+#define BF_TIMROT_ROTCTRL_SELECT_A(v)  \
+	(((v) << 0) & BM_TIMROT_ROTCTRL_SELECT_A)
+#define BV_TIMROT_ROTCTRL_SELECT_A__NEVER_TICK 0x0
+#define BV_TIMROT_ROTCTRL_SELECT_A__PWM0       0x1
+#define BV_TIMROT_ROTCTRL_SELECT_A__PWM1       0x2
+#define BV_TIMROT_ROTCTRL_SELECT_A__PWM2       0x3
+#define BV_TIMROT_ROTCTRL_SELECT_A__PWM3       0x4
+#define BV_TIMROT_ROTCTRL_SELECT_A__PWM4       0x5
+#define BV_TIMROT_ROTCTRL_SELECT_A__ROTARYA    0x6
+#define BV_TIMROT_ROTCTRL_SELECT_A__ROTARYB    0x7
+HW_REGISTER_0(HW_TIMROT_ROTCOUNT, REGS_TIMROT_BASE, 0x00000010)
+#define HW_TIMROT_ROTCOUNT_ADDR (REGS_TIMROT_BASE + 0x00000010)
+#define BP_TIMROT_ROTCOUNT_UPDOWN      0
+#define BM_TIMROT_ROTCOUNT_UPDOWN 0x0000FFFF
+#define BF_TIMROT_ROTCOUNT_UPDOWN(v)  \
+	(((v) << 0) & BM_TIMROT_ROTCOUNT_UPDOWN)
+/*
+ *  multi-register-define name HW_TIMROT_TIMCTRLn
+ *	      base 0x00000020
+ *	      count 3
+ *	      offset 0x20
+ */
+HW_REGISTER_INDEXED(HW_TIMROT_TIMCTRLn, REGS_TIMROT_BASE, 0x00000020, 0x20)
+#define BM_TIMROT_TIMCTRLn_IRQ 0x00008000
+#define BM_TIMROT_TIMCTRLn_IRQ_EN 0x00004000
+#define BM_TIMROT_TIMCTRLn_POLARITY 0x00000100
+#define BM_TIMROT_TIMCTRLn_UPDATE 0x00000080
+#define BM_TIMROT_TIMCTRLn_RELOAD 0x00000040
+#define BP_TIMROT_TIMCTRLn_PRESCALE      4
+#define BM_TIMROT_TIMCTRLn_PRESCALE 0x00000030
+#define BF_TIMROT_TIMCTRLn_PRESCALE(v)  \
+	(((v) << 4) & BM_TIMROT_TIMCTRLn_PRESCALE)
+#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_1 0x0
+#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_2 0x1
+#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_4 0x2
+#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_8 0x3
+#define BP_TIMROT_TIMCTRLn_SELECT      0
+#define BM_TIMROT_TIMCTRLn_SELECT 0x0000000F
+#define BF_TIMROT_TIMCTRLn_SELECT(v)  \
+	(((v) << 0) & BM_TIMROT_TIMCTRLn_SELECT)
+#define BV_TIMROT_TIMCTRLn_SELECT__NEVER_TICK  0x0
+#define BV_TIMROT_TIMCTRLn_SELECT__PWM0	0x1
+#define BV_TIMROT_TIMCTRLn_SELECT__PWM1	0x2
+#define BV_TIMROT_TIMCTRLn_SELECT__PWM2	0x3
+#define BV_TIMROT_TIMCTRLn_SELECT__PWM3	0x4
+#define BV_TIMROT_TIMCTRLn_SELECT__PWM4	0x5
+#define BV_TIMROT_TIMCTRLn_SELECT__ROTARYA     0x6
+#define BV_TIMROT_TIMCTRLn_SELECT__ROTARYB     0x7
+#define BV_TIMROT_TIMCTRLn_SELECT__32KHZ_XTAL  0x8
+#define BV_TIMROT_TIMCTRLn_SELECT__8KHZ_XTAL   0x9
+#define BV_TIMROT_TIMCTRLn_SELECT__4KHZ_XTAL   0xA
+#define BV_TIMROT_TIMCTRLn_SELECT__1KHZ_XTAL   0xB
+#define BV_TIMROT_TIMCTRLn_SELECT__TICK_ALWAYS 0xC
+/*
+ *  multi-register-define name HW_TIMROT_TIMCOUNTn
+ *	      base 0x00000030
+ *	      count 3
+ *	      offset 0x20
+ */
+HW_REGISTER_0_INDEXED(HW_TIMROT_TIMCOUNTn, REGS_TIMROT_BASE, 0x00000030,
+		       0x20)
+#define BP_TIMROT_TIMCOUNTn_RUNNING_COUNT      16
+#define BM_TIMROT_TIMCOUNTn_RUNNING_COUNT 0xFFFF0000
+#define BF_TIMROT_TIMCOUNTn_RUNNING_COUNT(v) \
+	(((v) << 16) & BM_TIMROT_TIMCOUNTn_RUNNING_COUNT)
+#define BP_TIMROT_TIMCOUNTn_FIXED_COUNT      0
+#define BM_TIMROT_TIMCOUNTn_FIXED_COUNT 0x0000FFFF
+#define BF_TIMROT_TIMCOUNTn_FIXED_COUNT(v)  \
+	(((v) << 0) & BM_TIMROT_TIMCOUNTn_FIXED_COUNT)
+HW_REGISTER(HW_TIMROT_TIMCTRL3, REGS_TIMROT_BASE, 0x00000080)
+#define HW_TIMROT_TIMCTRL3_ADDR (REGS_TIMROT_BASE + 0x00000080)
+#define BP_TIMROT_TIMCTRL3_TEST_SIGNAL      16
+#define BM_TIMROT_TIMCTRL3_TEST_SIGNAL 0x000F0000
+#define BF_TIMROT_TIMCTRL3_TEST_SIGNAL(v)  \
+	(((v) << 16) & BM_TIMROT_TIMCTRL3_TEST_SIGNAL)
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__NEVER_TICK  0x0
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM0	0x1
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM1	0x2
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM2	0x3
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM3	0x4
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM4	0x5
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__ROTARYA     0x6
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__ROTARYB     0x7
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__32KHZ_XTAL  0x8
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__8KHZ_XTAL   0x9
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__4KHZ_XTAL   0xA
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__1KHZ_XTAL   0xB
+#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__TICK_ALWAYS 0xC
+#define BM_TIMROT_TIMCTRL3_IRQ 0x00008000
+#define BM_TIMROT_TIMCTRL3_IRQ_EN 0x00004000
+#define BM_TIMROT_TIMCTRL3_DUTY_VALID 0x00000400
+#define BM_TIMROT_TIMCTRL3_DUTY_CYCLE 0x00000200
+#define BM_TIMROT_TIMCTRL3_POLARITY 0x00000100
+#define BM_TIMROT_TIMCTRL3_UPDATE 0x00000080
+#define BM_TIMROT_TIMCTRL3_RELOAD 0x00000040
+#define BP_TIMROT_TIMCTRL3_PRESCALE      4
+#define BM_TIMROT_TIMCTRL3_PRESCALE 0x00000030
+#define BF_TIMROT_TIMCTRL3_PRESCALE(v)  \
+	(((v) << 4) & BM_TIMROT_TIMCTRL3_PRESCALE)
+#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_1 0x0
+#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_2 0x1
+#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_4 0x2
+#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_8 0x3
+#define BP_TIMROT_TIMCTRL3_SELECT      0
+#define BM_TIMROT_TIMCTRL3_SELECT 0x0000000F
+#define BF_TIMROT_TIMCTRL3_SELECT(v)  \
+	(((v) << 0) & BM_TIMROT_TIMCTRL3_SELECT)
+#define BV_TIMROT_TIMCTRL3_SELECT__NEVER_TICK  0x0
+#define BV_TIMROT_TIMCTRL3_SELECT__PWM0	0x1
+#define BV_TIMROT_TIMCTRL3_SELECT__PWM1	0x2
+#define BV_TIMROT_TIMCTRL3_SELECT__PWM2	0x3
+#define BV_TIMROT_TIMCTRL3_SELECT__PWM3	0x4
+#define BV_TIMROT_TIMCTRL3_SELECT__PWM4	0x5
+#define BV_TIMROT_TIMCTRL3_SELECT__ROTARYA     0x6
+#define BV_TIMROT_TIMCTRL3_SELECT__ROTARYB     0x7
+#define BV_TIMROT_TIMCTRL3_SELECT__32KHZ_XTAL  0x8
+#define BV_TIMROT_TIMCTRL3_SELECT__8KHZ_XTAL   0x9
+#define BV_TIMROT_TIMCTRL3_SELECT__4KHZ_XTAL   0xA
+#define BV_TIMROT_TIMCTRL3_SELECT__1KHZ_XTAL   0xB
+#define BV_TIMROT_TIMCTRL3_SELECT__TICK_ALWAYS 0xC
+HW_REGISTER_0(HW_TIMROT_TIMCOUNT3, REGS_TIMROT_BASE, 0x00000090)
+#define HW_TIMROT_TIMCOUNT3_ADDR (REGS_TIMROT_BASE + 0x00000090)
+#define BP_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT      16
+#define BM_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT 0xFFFF0000
+#define BF_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT(v) \
+	(((v) << 16) & BM_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT)
+#define BP_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT      0
+#define BM_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT 0x0000FFFF
+#define BF_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT(v)  \
+	(((v) << 0) & BM_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT)
+HW_REGISTER_0(HW_TIMROT_VERSION, REGS_TIMROT_BASE, 0x000000a0)
+#define HW_TIMROT_VERSION_ADDR (REGS_TIMROT_BASE + 0x000000a0)
+#define BP_TIMROT_VERSION_MAJOR      24
+#define BM_TIMROT_VERSION_MAJOR 0xFF000000
+#define BF_TIMROT_VERSION_MAJOR(v) \
+	(((v) << 24) & BM_TIMROT_VERSION_MAJOR)
+#define BP_TIMROT_VERSION_MINOR      16
+#define BM_TIMROT_VERSION_MINOR 0x00FF0000
+#define BF_TIMROT_VERSION_MINOR(v)  \
+	(((v) << 16) & BM_TIMROT_VERSION_MINOR)
+#define BP_TIMROT_VERSION_STEP      0
+#define BM_TIMROT_VERSION_STEP 0x0000FFFF
+#define BF_TIMROT_VERSION_STEP(v)  \
+	(((v) << 0) & BM_TIMROT_VERSION_STEP)
+#endif /* __ARCH_ARM___TIMROT_H */
-- 
cgit v0.10.2


From e317872ac532fd845c597e55ceb5a9bceee878c1 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:55:23 +0100
Subject: [ARM] 5466/1: Freescale STMP platform support [5/10]

Shared (platform) headers

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/plat-stmp3xxx/include/mach/clkdev.h b/arch/arm/plat-stmp3xxx/include/mach/clkdev.h
new file mode 100644
index 0000000..f9c3977
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/clkdev.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_MACH_CLKDEV_H
+#define __ASM_MACH_CLKDEV_H
+
+#define __clk_get(clk) ({ 1; })
+#define __clk_put(clk) do { } while (0)
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/cputype.h b/arch/arm/plat-stmp3xxx/include/mach/cputype.h
new file mode 100644
index 0000000..b4e205b
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/cputype.h
@@ -0,0 +1,33 @@
+/*
+ * Freescale STMP37XX/STMP378X CPU type detection
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_CPU_H
+#define __ASM_PLAT_CPU_H
+
+#ifdef CONFIG_ARCH_STMP37XX
+#define cpu_is_stmp37xx()	(1)
+#else
+#define cpu_is_stmp37xx()	(0)
+#endif
+
+#ifdef CONFIG_ARCH_STMP378X
+#define cpu_is_stmp378x()	(1)
+#else
+#define cpu_is_stmp378x()	(0)
+#endif
+
+#endif /* __ASM_PLAT_CPU_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/debug-macro.S b/arch/arm/plat-stmp3xxx/include/mach/debug-macro.S
new file mode 100644
index 0000000..fb3b969
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/debug-macro.S
@@ -0,0 +1,42 @@
+/*
+ * Debugging macro include header
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+		.macro	addruart,rx
+		mrc	p15, 0, \rx, c1, c0
+		tst	\rx, #1			@ MMU enabled?
+		moveq	\rx, #0x80000000	@ physical base address
+		addeq	\rx, \rx, #0x00070000
+		movne	\rx, #0xf0000000	@ virtual base
+		addne	\rx, \rx, #0x00070000
+		.endm
+
+		.macro	senduart,rd,rx
+		strb	\rd, [\rx, #0]		@ data register at 0
+		.endm
+
+		.macro	waituart,rd,rx
+1001:		ldr	\rd, [\rx, #0x18]	@ UARTFLG
+		tst	\rd, #1 << 5		@ UARTFLGUTXFF - 1 when full
+		bne	1001b
+		.endm
+
+		.macro	busyuart,rd,rx
+1001:		ldr	\rd, [\rx, #0x18]	@ UARTFLG
+		tst	\rd, #1 << 3		@ UARTFLGUBUSY - 1 when busy
+		bne	1001b
+		.endm
diff --git a/arch/arm/plat-stmp3xxx/include/mach/dma.h b/arch/arm/plat-stmp3xxx/include/mach/dma.h
new file mode 100644
index 0000000..1e305b2
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/dma.h
@@ -0,0 +1,155 @@
+/*
+ * Freescale STMP37XX/STMP378X DMA helper interface
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_STMP3XXX_DMA_H
+#define __ASM_PLAT_STMP3XXX_DMA_H
+
+#include <linux/platform_device.h>
+#include <linux/dmapool.h>
+
+#if !defined(MAX_PIO_WORDS)
+#define MAX_PIO_WORDS   (15)
+#endif
+
+#define STMP3XXX_BUS_APBH	0
+#define STMP3XXX_BUS_APBX	1
+#define STMP3XXX_DMA_MAX_CHANNEL	16
+
+
+#define STMP3xxx_DMA(channel, bus)  ((bus) * 16 + (channel))
+
+#define MAX_DMA_ADDRESS		0xffffffff
+
+#define MAX_DMA_CHANNELS	32
+
+struct stmp3xxx_dma_command {
+	u32 next;
+	u32 cmd;
+	union {
+		u32 buf_ptr;
+		u32 alternate;
+	};
+	u32 pio_words[MAX_PIO_WORDS];
+};
+
+struct stmp3xxx_dma_descriptor {
+	struct stmp3xxx_dma_command *command;
+	dma_addr_t handle;
+
+	/* The virtual address of the buffer pointer */
+	void *virtual_buf_ptr;
+	/* The next descriptor in a the DMA chain (optional) */
+	struct stmp3xxx_dma_descriptor *next_descr;
+};
+
+struct stmp37xx_circ_dma_chain {
+	unsigned total_count;
+	struct stmp3xxx_dma_descriptor *chain;
+
+	unsigned free_index;
+	unsigned free_count;
+	unsigned active_index;
+	unsigned active_count;
+	unsigned cooked_index;
+	unsigned cooked_count;
+
+	int bus;
+	unsigned channel;
+};
+
+static inline struct stmp3xxx_dma_descriptor
+    *stmp3xxx_dma_circ_get_free_head(struct stmp37xx_circ_dma_chain *chain)
+{
+	return &(chain->chain[chain->free_index]);
+}
+
+static inline struct stmp3xxx_dma_descriptor
+    *stmp3xxx_dma_circ_get_cooked_head(struct stmp37xx_circ_dma_chain *chain)
+{
+	return &(chain->chain[chain->cooked_index]);
+}
+
+int stmp3xxx_dma_request(int ch, struct device *dev, const char *name);
+int stmp3xxx_dma_release(int ch);
+int stmp3xxx_dma_allocate_command(int ch,
+				  struct stmp3xxx_dma_descriptor *descriptor);
+int stmp3xxx_dma_free_command(int ch,
+			      struct stmp3xxx_dma_descriptor *descriptor);
+void stmp3xxx_dma_continue(int channel, u32 semaphore);
+void stmp3xxx_dma_go(int ch, struct stmp3xxx_dma_descriptor *head,
+		     u32 semaphore);
+int stmp3xxx_dma_running(int ch);
+int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain,
+			    struct stmp3xxx_dma_descriptor descriptors[],
+			    unsigned items);
+void stmp3xxx_dma_free_chain(struct stmp37xx_circ_dma_chain *chain);
+void stmp37xx_circ_clear_chain(struct stmp37xx_circ_dma_chain *chain);
+void stmp37xx_circ_advance_free(struct stmp37xx_circ_dma_chain *chain,
+		unsigned count);
+void stmp37xx_circ_advance_active(struct stmp37xx_circ_dma_chain *chain,
+		unsigned count);
+unsigned stmp37xx_circ_advance_cooked(struct stmp37xx_circ_dma_chain *chain);
+int stmp3xxx_dma_read_semaphore(int ch);
+void stmp3xxx_dma_init(void);
+void stmp3xxx_dma_set_alt_target(int ch, int target);
+void stmp3xxx_dma_suspend(void);
+void stmp3xxx_dma_resume(void);
+
+/*
+ * STMP37xx and STMP378x have different DMA control
+ * registers layout
+ */
+
+void stmp3xxx_arch_dma_freeze(int ch);
+void stmp3xxx_arch_dma_unfreeze(int ch);
+void stmp3xxx_arch_dma_reset_channel(int ch);
+void stmp3xxx_arch_dma_enable_interrupt(int ch);
+void stmp3xxx_arch_dma_clear_interrupt(int ch);
+int stmp3xxx_arch_dma_is_interrupt(int ch);
+
+static inline void stmp3xxx_dma_reset_channel(int ch)
+{
+	stmp3xxx_arch_dma_reset_channel(ch);
+}
+
+
+static inline void stmp3xxx_dma_freeze(int ch)
+{
+	stmp3xxx_arch_dma_freeze(ch);
+}
+
+static inline void stmp3xxx_dma_unfreeze(int ch)
+{
+	stmp3xxx_arch_dma_unfreeze(ch);
+}
+
+static inline void stmp3xxx_dma_enable_interrupt(int ch)
+{
+	stmp3xxx_arch_dma_enable_interrupt(ch);
+}
+
+static inline void stmp3xxx_dma_clear_interrupt(int ch)
+{
+	stmp3xxx_arch_dma_clear_interrupt(ch);
+}
+
+static inline int stmp3xxx_dma_is_interrupt(int ch)
+{
+	return stmp3xxx_arch_dma_is_interrupt(ch);
+}
+
+#endif /* __ASM_PLAT_STMP3XXX_DMA_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/gpio.h b/arch/arm/plat-stmp3xxx/include/mach/gpio.h
new file mode 100644
index 0000000..a8b5792
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/gpio.h
@@ -0,0 +1,28 @@
+/*
+ * Freescale STMP37XX/STMP378X GPIO interface
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_GPIO_H
+#define __ASM_PLAT_GPIO_H
+
+#define ARCH_NR_GPIOS	(32 * 3)
+#define gpio_to_irq(gpio) __gpio_to_irq(gpio)
+#define gpio_get_value(gpio) __gpio_get_value(gpio)
+#define gpio_set_value(gpio, value) __gpio_set_value(gpio, value)
+
+#include <asm-generic/gpio.h>
+
+#endif /* __ASM_PLAT_GPIO_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/hardware.h b/arch/arm/plat-stmp3xxx/include/mach/hardware.h
new file mode 100644
index 0000000..47b8978
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/hardware.h
@@ -0,0 +1,32 @@
+/*
+ * This file contains the hardware definitions of the Freescale STMP3XXX
+ *
+ * Copyright (C) 2005 Sigmatel Inc
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_ARCH_HARDWARE_H
+#define __ASM_ARCH_HARDWARE_H
+
+/*
+ * Where in virtual memory the IO devices (timers, system controllers
+ * and so on)
+ */
+#define IO_BASE			0xF0000000                 /* VA of IO  */
+#define IO_SIZE			0x00100000                 /* How much? */
+#define IO_START		0x80000000                 /* PA of IO  */
+
+/* macro to get at IO space when running virtually */
+#define IO_ADDRESS(x) (((x) & 0x000fffff) | IO_BASE)
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/io.h b/arch/arm/plat-stmp3xxx/include/mach/io.h
new file mode 100644
index 0000000..d08b1b7
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/io.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2005 Sigmatel Inc
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_ARM_ARCH_IO_H
+#define __ASM_ARM_ARCH_IO_H
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#define __io(a) 	__typesafe_io(a)
+#define __mem_pci(a)	(a)
+#define __mem_isa(a)	(a)
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/memory.h b/arch/arm/plat-stmp3xxx/include/mach/memory.h
new file mode 100644
index 0000000..7b875a0
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/memory.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_ARCH_MEMORY_H
+#define __ASM_ARCH_MEMORY_H
+
+/*
+ * Physical DRAM offset.
+ */
+#define PHYS_OFFSET	UL(0x40000000)
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/pinmux.h b/arch/arm/plat-stmp3xxx/include/mach/pinmux.h
new file mode 100644
index 0000000..526c068
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/pinmux.h
@@ -0,0 +1,158 @@
+/*
+ * Freescale STMP37XX/STMP378X Pin Multiplexing
+ *
+ * Author: Vladislav Buzov <vbuzov@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __PINMUX_H
+#define __PINMUX_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/gpio.h>
+#include <asm-generic/gpio.h>
+
+/* Pin definitions */
+#include "pins.h"
+#include <mach/pins.h>
+
+/*
+ * Each pin may be routed up to four different HW interfaces
+ * including GPIO
+ */
+enum pin_fun {
+	PIN_FUN1 = 0,
+	PIN_FUN2,
+	PIN_FUN3,
+	PIN_GPIO,
+};
+
+/*
+ * Each pin may have different output drive strength in range from
+ * 4mA to 20mA. The most common case is 4, 8 and 12 mA strengths.
+ */
+enum pin_strength {
+	PIN_4MA = 0,
+	PIN_8MA,
+	PIN_12MA,
+	PIN_16MA,
+	PIN_20MA,
+};
+
+/*
+ * Each pin can be programmed for 1.8V or 3.3V
+ */
+enum pin_voltage {
+	PIN_1_8V = 0,
+	PIN_3_3V,
+};
+
+/*
+ * Structure to define a group of pins and their parameters
+ */
+struct pin_desc {
+	unsigned id;
+	enum pin_fun fun;
+	enum pin_strength strength;
+	enum pin_voltage voltage;
+	unsigned pullup:1;
+};
+
+struct pin_group {
+	struct pin_desc *pins;
+	int nr_pins;
+};
+
+/* Set pin drive strength */
+void stmp3xxx_pin_strength(unsigned id, enum pin_strength strength,
+			   const char *label);
+
+/* Set pin voltage */
+void stmp3xxx_pin_voltage(unsigned id, enum pin_voltage voltage,
+			   const char *label);
+
+/* Enable pull-up resistor for a pin */
+void stmp3xxx_pin_pullup(unsigned id, int enable, const char *label);
+
+/*
+ * Request a pin ownership, only one module (identified by @label)
+ * may own a pin.
+ */
+int stmp3xxx_request_pin(unsigned id, enum pin_fun fun, const char *label);
+
+/* Release pin */
+void stmp3xxx_release_pin(unsigned id, const char *label);
+
+void stmp3xxx_set_pin_type(unsigned id, enum pin_fun fun);
+
+/*
+ * Each bank is associated with a number of registers to control
+ * pin function, drive strength, voltage and pull-up reigster. The
+ * number of registers of a given type depends on the number of bits
+ * describin particular pin.
+ */
+#define HW_MUXSEL_NUM		2	/* registers per bank */
+#define HW_MUXSEL_PIN_LEN	2	/* bits per pin */
+#define HW_MUXSEL_PIN_NUM	16	/* pins per register */
+#define HW_MUXSEL_PINFUN_MASK	0x3	/* pin function mask */
+#define HW_MUXSEL_PINFUN_NUM	4	/* four options for a pin */
+
+#define HW_DRIVE_NUM		4	/* registers per bank */
+#define HW_DRIVE_PIN_LEN	4	/* bits per pin */
+#define HW_DRIVE_PIN_NUM	8	/* pins per register */
+#define HW_DRIVE_PINDRV_MASK	0x3	/* pin strength mask - 2 bits */
+#define HW_DRIVE_PINDRV_NUM	5	/* five possible strength values */
+#define HW_DRIVE_PINV_MASK	0x4	/* pin voltage mask - 1 bit */
+
+
+struct stmp3xxx_pinmux_bank {
+	struct gpio_chip chip;
+
+	/* Pins allocation map */
+	unsigned long pin_map;
+
+	/* Pin owner names */
+	const char *pin_labels[32];
+
+	/* Bank registers */
+	void __iomem *hw_muxsel[HW_MUXSEL_NUM];
+	void __iomem *hw_drive[HW_DRIVE_NUM];
+	void __iomem *hw_pull;
+
+	void __iomem *pin2irq,
+		*irqlevel,
+		*irqpolarity,
+		*irqen,
+		*irqstat;
+
+	/* HW MUXSEL register function bit values */
+	u8 functions[HW_MUXSEL_PINFUN_NUM];
+
+	/*
+	 * HW DRIVE register strength bit values:
+	 * 0xff - requested strength is not supported for this bank
+	 */
+	u8 strengths[HW_DRIVE_PINDRV_NUM];
+
+	/* GPIO things */
+	void __iomem *hw_gpio_read,
+	     *hw_gpio_set,
+	     *hw_gpio_clr,
+	     *hw_gpio_doe;
+	int irq, virq;
+};
+
+int __init stmp3xxx_pinmux_init(int virtual_irq_start);
+
+#endif /* __PINMUX_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/pins.h b/arch/arm/plat-stmp3xxx/include/mach/pins.h
new file mode 100644
index 0000000..c573318
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/pins.h
@@ -0,0 +1,30 @@
+/*
+ * Freescale STMP37XX/STMP378X Pin multiplexing interface definitions
+ *
+ * Author: Vladislav Buzov <vbuzov@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_PINS_H
+#define __ASM_PLAT_PINS_H
+
+#define STMP3XXX_PINID(bank, pin)	(bank * 32 + pin)
+#define STMP3XXX_PINID_TO_BANK(pinid)	(pinid / 32)
+#define STMP3XXX_PINID_TO_PINNUM(pinid)	(pinid % 32)
+
+/*
+ * Special invalid pin identificator to show a pin doesn't exist
+ */
+#define PINID_NO_PIN	STMP3XXX_PINID(0xFF, 0xFF)
+
+#endif /* __ASM_PLAT_PINS_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/platform.h b/arch/arm/plat-stmp3xxx/include/mach/platform.h
new file mode 100644
index 0000000..525c413
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/platform.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_PLATFORM_H
+#define __ASM_PLAT_PLATFORM_H
+
+#include <asm/sizes.h>
+
+/* Virtual address where registers are mapped */
+#define STMP3XXX_REGS_PHBASE	0x80000000
+#ifdef __ASSEMBLER__
+#define STMP3XXX_REGS_BASE	0xF0000000
+#else
+#define STMP3XXX_REGS_BASE	(void __iomem *)0xF0000000
+#endif
+#define STMP3XXX_REGS_SIZE	SZ_1M
+
+/* Virtual address where OCRAM is mapped */
+#define STMP3XXX_OCRAM_PHBASE	0x00000000
+#ifdef __ASSEMBLER__
+#define STMP3XXX_OCRAM_BASE	0xf1000000
+#else
+#define STMP3XXX_OCRAM_BASE	(void __iomem *)0xf1000000
+#endif
+#define STMP3XXX_OCRAM_SIZE	(32 * SZ_1K)
+
+#ifdef CONFIG_ARCH_STMP37XX
+#define IRQ_PRIORITY_REG_RD	HW_ICOLL_PRIORITYn_RD
+#define IRQ_PRIORITY_REG_WR	HW_ICOLL_PRIORITYn_WR
+#endif
+
+#ifdef CONFIG_ARCH_STMP378X
+#define IRQ_PRIORITY_REG_RD	HW_ICOLL_INTERRUPTn_RD
+#define IRQ_PRIORITY_REG_WR	HW_ICOLL_INTERRUPTn_WR
+#endif
+
+#endif /* __ASM_ARCH_PLATFORM_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h
new file mode 100644
index 0000000..78cf1be
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h
@@ -0,0 +1,34 @@
+/*
+ * Freescale STMP37XX/STMP378X core structure and function declarations
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_STMP3XXX_H
+#define __ASM_PLAT_STMP3XXX_H
+
+#include <linux/irq.h>
+
+extern struct sys_timer stmp3xxx_timer;
+
+void stmp3xxx_init_irq(struct irq_chip *chip);
+void stmp3xxx_init(void);
+int stmp3xxx_reset_block(void __iomem *hwreg, int just_enable);
+extern struct platform_device stmp3xxx_dbguart;
+
+struct pin_group;
+void stmp3xxx_release_pin_group(struct pin_group *pin_group, const char *label);
+int stmp3xxx_request_pin_group(struct pin_group *pin_group, const char *label);
+
+#endif /* __ASM_PLAT_STMP3XXX_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h
new file mode 100644
index 0000000..47797b2
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h
@@ -0,0 +1,195 @@
+/*
+ * Freescale STMP37XX/STMP378X SoC register access interfaces
+ *
+ * The SoC registers may be accessed via:
+ *
+ * - single 32 bit address, or
+ * - four 32 bit addresses - general purpose, set, clear and toggle bits
+ *
+ * Multiple IP blocks (e.g. SSP, UART) provide identical register sets per
+ * each module
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_STMP3XXX_REGS_H
+#define __ASM_PLAT_STMP3XXX_REGS_H
+
+#ifndef __ASSEMBLER__
+#include <linux/io.h>
+#endif
+
+#include "platform.h"
+
+#define REGS_BASE STMP3XXX_REGS_BASE
+
+#define HW_STMP3xxx_SET		0x04
+#define HW_STMP3xxx_CLR		0x08
+#define HW_STMP3xxx_TOG		0x0c
+
+#ifndef __ASSEMBLER__
+#define HW_REGISTER_FUNCS(id, base, offset, regset, rd, wr)		\
+	static const u32 id##_OFFSET = offset;				\
+	static inline u32 id##_RD_NB(const void __iomem *regbase) {	\
+		if (!rd)						\
+			printk(KERN_ERR"%s: cannot READ at %p+%x\n",	\
+				#id, regbase, offset);			\
+		return __raw_readl(regbase + offset);			\
+	}								\
+	static inline void id##_WR_NB(void __iomem *regbase, u32 v) {	\
+		if (!wr)						\
+			printk(KERN_ERR"%s: cannot WRITE at %p+%x\n",	\
+				#id, regbase, offset);			\
+		__raw_writel(v, regbase + offset);			\
+	}								\
+	static inline void id##_SET_NB(void __iomem *regbase, u32 v) {	\
+		if (!wr)						\
+			printk(KERN_ERR"%s: cannot SET at %p+%x\n",	\
+				#id, regbase, offset);			\
+		if (regset)						\
+			__raw_writel(v, regbase + 			\
+					offset + HW_STMP3xxx_SET);	\
+		else							\
+			__raw_writel(v | __raw_readl(regbase + offset),	\
+				regbase + offset);			\
+	}								\
+	static inline void id##_CLR_NB(void __iomem *regbase, u32 v) {	\
+		if (!wr) 						\
+			printk(KERN_ERR"%s: cannot CLR at %p+%x\n",	\
+				#id, regbase, offset);			\
+		if (regset)						\
+			__raw_writel(v, regbase + 			\
+					offset + HW_STMP3xxx_CLR);	\
+		else							\
+			__raw_writel(					\
+				~v & __raw_readl(regbase + offset),	\
+				regbase + offset);			\
+	}								\
+	static inline void id##_TOG_NB(void __iomem *regbase, u32 v) {	\
+		if (!wr) 						\
+			printk(KERN_ERR"%s: cannot TOG at %p+%x\n",	\
+				#id, regbase, offset);			\
+		if (regset)						\
+			__raw_writel(v, regbase + 			\
+					offset + HW_STMP3xxx_TOG);	\
+		else							\
+			__raw_writel(v ^ __raw_readl(regbase + offset),	\
+				regbase + offset);			\
+	}								\
+	static inline u32 id##_RD(void) { return id##_RD_NB(base); }	\
+	static inline void id##_WR(u32 v) { id##_WR_NB(base, v); }	\
+	static inline void id##_SET(u32 v) { id##_SET_NB(base, v); }	\
+	static inline void id##_CLR(u32 v) { id##_CLR_NB(base, v); }	\
+	static inline void id##_TOG(u32 v) { id##_TOG_NB(base, v); }
+
+#define HW_REGISTER_FUNCS_INDEXED(id, base, offset, regset, rd, wr, step)\
+	static inline u32 id##_OFFSET(int i) {				\
+		return offset + i * step;				\
+	}								\
+	static inline u32 id##_RD_NB(const void __iomem *regbase, int i) {\
+		if (!rd) 						\
+			printk(KERN_ERR"%s(%d): can't READ at %p+%x\n",	\
+				#id, i, regbase, offset + i * step);	\
+		return __raw_readl(regbase + offset + i * step);	\
+	}								\
+	static inline void id##_WR_NB(void __iomem *regbase, int i, u32 v) {\
+		if (!wr) 						\
+			printk(KERN_ERR"%s(%d): can't WRITE at %p+%x\n",\
+				#id, i, regbase, offset + i * step);	\
+		__raw_writel(v, regbase + offset + i * step);		\
+	}								\
+	static inline void id##_SET_NB(void __iomem *regbase, int i, u32 v) {\
+		if (!wr)						\
+			printk(KERN_ERR"%s(%d): can't SET at %p+%x\n",	\
+				#id, i, regbase, offset + i * step);	\
+		if (regset)						\
+			__raw_writel(v, regbase + offset + 		\
+					i * step + HW_STMP3xxx_SET);	\
+		else							\
+			__raw_writel(v | __raw_readl(regbase + 		\
+						offset + i * step),	\
+				regbase + offset + i * step);		\
+	}								\
+	static inline void id##_CLR_NB(void __iomem *regbase, int i, u32 v) {\
+		if (!wr) 						\
+			printk(KERN_ERR"%s(%d): cannot CLR at %p+%x\n",	\
+				#id, i, regbase, offset + i * step);	\
+		if (regset)						\
+			__raw_writel(v, regbase + offset + 		\
+					i * step + HW_STMP3xxx_CLR);	\
+		else							\
+			__raw_writel(~v & __raw_readl(regbase + 	\
+						offset + i * step),	\
+				regbase + offset + i * step);		\
+	}								\
+	static inline void id##_TOG_NB(void __iomem *regbase, int i, u32 v) {\
+		if (!wr) 						\
+			printk(KERN_ERR"%s(%d): cannot TOG at %p+%x\n",	\
+				#id, i, regbase, offset + i * step);	\
+		if (regset)						\
+			__raw_writel(v, regbase + offset + 		\
+					i * step + HW_STMP3xxx_TOG);	\
+		else							\
+			__raw_writel(v ^ __raw_readl(regbase + offset 	\
+						+ i * step),		\
+				regbase + offset + i * step);		\
+	}								\
+	static inline u32 id##_RD(int i) 				\
+	{ 								\
+		return id##_RD_NB(base, i); 				\
+	}								\
+	static inline void id##_WR(int i, u32 v) 			\
+	{ 								\
+		id##_WR_NB(base, i, v); 				\
+	}								\
+	static inline void id##_SET(int i, u32 v) 			\
+	{								\
+		id##_SET_NB(base, i, v); 				\
+	}								\
+	static inline void id##_CLR(int i, u32 v) 			\
+	{ 								\
+		id##_CLR_NB(base, i, v);				\
+	}								\
+	static inline void id##_TOG(int i, u32 v) 			\
+	{ 								\
+		id##_TOG_NB(base, i, v); 				\
+	}
+
+#define HW_REGISTER_WO(id, base, offset)\
+	HW_REGISTER_FUNCS(id, base, offset, 1,  0, 1)
+#define HW_REGISTER_RO(id, base, offset)\
+	HW_REGISTER_FUNCS(id, base, offset, 1,  1, 0)
+#define HW_REGISTER(id, base, offset)	\
+	HW_REGISTER_FUNCS(id, base, offset, 1,	1, 1)
+#define HW_REGISTER_0(id, base, offset)	\
+	HW_REGISTER_FUNCS(id, base, offset, 0,  1, 1)
+#define HW_REGISTER_INDEXED(id, base, offset, step)	\
+	HW_REGISTER_FUNCS_INDEXED(id, base, offset, 1,  1, 1, step)
+#define HW_REGISTER_RO_INDEXED(id, base, offset, step)	\
+	HW_REGISTER_FUNCS_INDEXED(id, base, offset, 1,  1, 0, step)
+#define HW_REGISTER_0_INDEXED(id, base, offset, step)	\
+	HW_REGISTER_FUNCS_INDEXED(id, base, offset, 0,  1, 1, step)
+#else /* __ASSEMBLER__ */
+#define HW_REGISTER_FUNCS(id, base, offset, regset, rd, wr)
+#define HW_REGISTER_FUNCS_INDEXED(id, base, offset, regset, rd, wr, step)
+#define HW_REGISTER_WO(id, base, offset)
+#define HW_REGISTER_RO(id, base, offset)
+#define HW_REGISTER(id, base, offset)
+#define HW_REGISTER_0(id, base, offset)
+#define HW_REGISTER_INDEXED(id, base, offset, step)
+#define HW_REGISTER_RO_INDEXED(id, base, offset, step)
+#define HW_REGISTER_0_INDEXED(id, base, offset, step)
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_PLAT_STMP3XXX_REGS_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/system.h b/arch/arm/plat-stmp3xxx/include/mach/system.h
new file mode 100644
index 0000000..dac48d2
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/system.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2005 Sigmatel Inc
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_ARCH_SYSTEM_H
+#define __ASM_ARCH_SYSTEM_H
+
+#include <asm/proc-fns.h>
+#include <mach/regs-clkctrl.h>
+#include <mach/regs-power.h>
+
+static inline void arch_idle(void)
+{
+	/*
+	 * This should do all the clock switching
+	 * and wait for interrupt tricks
+	 */
+
+	cpu_do_idle();
+}
+
+static inline void arch_reset(char mode, const char *cmd)
+{
+	/* Set BATTCHRG to default value */
+	HW_POWER_CHARGE_WR(0x00010000);
+
+	/* Set MINPWR to default value   */
+	HW_POWER_MINPWR_WR(0);
+
+	/* Reset digital side of chip (but not power or RTC) */
+	HW_CLKCTRL_RESET_WR(BM_CLKCTRL_RESET_DIG);
+
+	/* Should not return */
+}
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/timex.h b/arch/arm/plat-stmp3xxx/include/mach/timex.h
new file mode 100644
index 0000000..3373985
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/timex.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 1999 ARM Limited
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/*
+ * System time clock is sourced from the 32k clock
+ */
+#define CLOCK_TICK_RATE		(32768)
diff --git a/arch/arm/plat-stmp3xxx/include/mach/uncompress.h b/arch/arm/plat-stmp3xxx/include/mach/uncompress.h
new file mode 100644
index 0000000..f79f5ee
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/uncompress.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ASM_PLAT_UNCOMPRESS_H
+#define __ASM_PLAT_UNCOMPRESS_H
+
+/*
+ * Register includes are for when the MMU enabled; we need to define our
+ * own stuff here for pre-MMU use
+ */
+#define UARTDBG_BASE		0x80070000
+#define UART(c)			(((volatile unsigned *)UARTDBG_BASE)[c])
+
+/*
+ * This does not append a newline
+ */
+static void putc(char c)
+{
+	/* Wait for TX fifo empty */
+	while ((UART(6) & (1<<7)) == 0)
+		continue;
+
+	/* Write byte */
+	UART(0) = c;
+
+	/* Wait for last bit to exit the UART */
+	while (UART(6) & (1<<3))
+		continue;
+}
+
+static void flush(void)
+{
+}
+
+/*
+ * nothing to do
+ */
+#define arch_decomp_setup()
+
+#define arch_decomp_wdog()
+
+#endif /* __ASM_PLAT_UNCOMPRESS_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/vmalloc.h b/arch/arm/plat-stmp3xxx/include/mach/vmalloc.h
new file mode 100644
index 0000000..541b880
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/vmalloc.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#define VMALLOC_END       (0xF0000000)
-- 
cgit v0.10.2


From 5cccd37ea15970846a93b4b01fafd6e043bafe8e Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Thu, 23 Apr 2009 12:24:13 +0100
Subject: [ARM] 5477/1: Freescale STMP platform support [6/10]

Sources: common STMP3xxx platform support

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/plat-stmp3xxx/Kconfig b/arch/arm/plat-stmp3xxx/Kconfig
new file mode 100644
index 0000000..2cf37c3
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/Kconfig
@@ -0,0 +1,37 @@
+if ARCH_STMP3XXX
+
+menu "Freescale STMP3xxx implementations"
+
+choice
+	prompt "Select STMP3xxx chip family"
+
+config ARCH_STMP37XX
+	bool "Freescale SMTP37xx"
+	select CPU_ARM926T
+	---help---
+	 STMP37xx refers to 3700 through 3769 chips
+
+config ARCH_STMP378X
+	bool "Freescale STMP378x"
+	select CPU_ARM926T
+	---help---
+	 STMP378x refers to 3780 through 3789 chips
+
+endchoice
+
+choice
+	prompt "Select STMP3xxx board type"
+
+config MACH_STMP37XX
+	depends on ARCH_STMP37XX
+	bool "Freescale STMP37xx development board"
+
+config MACH_STMP378X
+	depends on ARCH_STMP378X
+	bool "Freescale STMP378x development board"
+
+endchoice
+
+endmenu
+
+endif
diff --git a/arch/arm/plat-stmp3xxx/Makefile b/arch/arm/plat-stmp3xxx/Makefile
new file mode 100644
index 0000000..b634800
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the linux kernel.
+#
+# Object file lists.
+obj-y += core.o timer.o irq.o dma.o clock.o pinmux.o
diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c
new file mode 100644
index 0000000..9a1d46b
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/clock.c
@@ -0,0 +1,1112 @@
+/*
+ * Clock manipulation routines for Freescale STMP37XX/STMP378X
+ *
+ * Author: Vitaly Wool <vital@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/mach-types.h>
+#include <asm/clkdev.h>
+#include <mach/regs-clkctrl.h>
+
+#include "clock.h"
+
+static DEFINE_SPINLOCK(clocks_lock);
+
+static struct clk osc_24M;
+static struct clk pll_clk;
+static struct clk cpu_clk;
+static struct clk hclk;
+
+static int propagate_rate(struct clk *);
+
+static inline int clk_is_busy(struct clk *clk)
+{
+	return __raw_readl(clk->busy_reg) & (1 << clk->busy_bit);
+}
+
+static inline int clk_good(struct clk *clk)
+{
+	return clk && !IS_ERR(clk) && clk->ops;
+}
+
+static int std_clk_enable(struct clk *clk)
+{
+	if (clk->enable_reg) {
+		u32 clk_reg = __raw_readl(clk->enable_reg);
+		if (clk->enable_negate)
+			clk_reg &= ~(1 << clk->enable_shift);
+		else
+			clk_reg |= (1 << clk->enable_shift);
+		__raw_writel(clk_reg, clk->enable_reg);
+		if (clk->enable_wait)
+			udelay(clk->enable_wait);
+		return 0;
+	} else
+		return -EINVAL;
+}
+
+static int std_clk_disable(struct clk *clk)
+{
+	if (clk->enable_reg) {
+		u32 clk_reg = __raw_readl(clk->enable_reg);
+		if (clk->enable_negate)
+			clk_reg |= (1 << clk->enable_shift);
+		else
+			clk_reg &= ~(1 << clk->enable_shift);
+		__raw_writel(clk_reg, clk->enable_reg);
+		return 0;
+	} else
+		return -EINVAL;
+}
+
+static int io_set_rate(struct clk *clk, u32 rate)
+{
+	u32 reg_frac, clkctrl_frac;
+	int i, ret = 0, mask = 0x1f;
+
+	clkctrl_frac = (clk->parent->rate * 18 + rate - 1) / rate;
+
+	if (clkctrl_frac < 18 || clkctrl_frac > 35) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	reg_frac = __raw_readl(clk->scale_reg);
+	reg_frac &= ~(mask << clk->scale_shift);
+	__raw_writel(reg_frac | (clkctrl_frac << clk->scale_shift),
+				clk->scale_reg);
+	if (clk->busy_reg) {
+		for (i = 10000; i; i--)
+			if (!clk_is_busy(clk))
+				break;
+		if (!i)
+			ret = -ETIMEDOUT;
+		else
+			ret = 0;
+	}
+out:
+	return ret;
+}
+
+static long io_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate * 18;
+	int mask = 0x1f;
+
+	rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask;
+	clk->rate = rate;
+
+	return rate;
+}
+
+static long per_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate;
+	long div;
+	const int mask = 0xff;
+
+	if (clk->enable_reg &&
+			!(__raw_readl(clk->enable_reg) & clk->enable_shift))
+		clk->rate = 0;
+	else {
+		div = (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask;
+		if (div)
+			rate /= div;
+		clk->rate = rate;
+	}
+
+	return clk->rate;
+}
+
+static int per_set_rate(struct clk *clk, u32 rate)
+{
+	int ret = -EINVAL;
+	int div = (clk->parent->rate + rate - 1) / rate;
+	u32 reg_frac;
+	const int mask = 0xff;
+	int try = 10;
+	int i = -1;
+
+	if (div == 0 || div > mask)
+		goto out;
+
+	reg_frac = __raw_readl(clk->scale_reg);
+	reg_frac &= ~(mask << clk->scale_shift);
+
+	while (try--) {
+		__raw_writel(reg_frac | (div << clk->scale_shift),
+				clk->scale_reg);
+
+		if (clk->busy_reg) {
+			for (i = 10000; i; i--)
+				if (!clk_is_busy(clk))
+					break;
+		}
+		if (i)
+			break;
+	}
+
+	if (!i)
+		ret = -ETIMEDOUT;
+	else
+		ret = 0;
+
+out:
+	if (ret != 0)
+		printk(KERN_ERR "%s: error %d\n", __func__, ret);
+	return ret;
+}
+
+static long lcdif_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate;
+	long div;
+	const int mask = 0xff;
+
+	div = (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask;
+	if (div) {
+		rate /= div;
+		div = (HW_CLKCTRL_FRAC_RD() & BM_CLKCTRL_FRAC_PIXFRAC) >>
+				BP_CLKCTRL_FRAC_PIXFRAC;
+		rate /= div;
+	}
+	clk->rate = rate;
+
+	return rate;
+}
+
+static int lcdif_set_rate(struct clk *clk, u32 rate)
+{
+	int ret = 0;
+	/*
+	 * On 3700, we can get most timings exact by modifying ref_pix
+	 * and the divider, but keeping the phase timings at 1 (2
+	 * phases per cycle).
+	 *
+	 * ref_pix can be between 480e6*18/35=246.9MHz and 480e6*18/18=480MHz,
+	 * which is between 18/(18*480e6)=2.084ns and 35/(18*480e6)=4.050ns.
+	 *
+	 * ns_cycle >= 2*18e3/(18*480) = 25/6
+	 * ns_cycle <= 2*35e3/(18*480) = 875/108
+	 *
+	 * Multiply the ns_cycle by 'div' to lengthen it until it fits the
+	 * bounds. This is the divider we'll use after ref_pix.
+	 *
+	 * 6 * ns_cycle >= 25 * div
+	 * 108 * ns_cycle <= 875 * div
+	 */
+	u32 ns_cycle = 1000000 / rate;
+	u32 div, reg_val;
+	u32 lowest_result = (u32) -1;
+	u32 lowest_div = 0, lowest_fracdiv = 0;
+
+	for (div = 1; div < 256; ++div) {
+		u32 fracdiv;
+		u32 ps_result;
+		int lower_bound = 6 * ns_cycle >= 25 * div;
+		int upper_bound = 108 * ns_cycle <= 875 * div;
+		if (!lower_bound)
+			break;
+		if (!upper_bound)
+			continue;
+		/*
+		 * Found a matching div. Calculate fractional divider needed,
+		 * rounded up.
+		 */
+		fracdiv = ((clk->parent->rate / 1000 * 18 / 2) *
+				ns_cycle + 1000 * div - 1) /
+				(1000 * div);
+		if (fracdiv < 18 || fracdiv > 35) {
+			ret = -EINVAL;
+			goto out;
+		}
+		/* Calculate the actual cycle time this results in */
+		ps_result = 6250 * div * fracdiv / 27;
+
+		/* Use the fastest result that doesn't break ns_cycle */
+		if (ps_result <= lowest_result) {
+			lowest_result = ps_result;
+			lowest_div = div;
+			lowest_fracdiv = fracdiv;
+		}
+	}
+
+	if (div >= 256 || lowest_result == (u32) -1) {
+		ret = -EINVAL;
+		goto out;
+	}
+	pr_debug("Programming PFD=%u,DIV=%u ref_pix=%uMHz "
+			"PIXCLK=%uMHz cycle=%u.%03uns\n",
+			lowest_fracdiv, lowest_div,
+			480*18/lowest_fracdiv, 480*18/lowest_fracdiv/lowest_div,
+			lowest_result / 1000, lowest_result % 1000);
+
+	/* Program ref_pix phase fractional divider */
+	HW_CLKCTRL_FRAC_WR((HW_CLKCTRL_FRAC_RD() & ~BM_CLKCTRL_FRAC_PIXFRAC) |
+			   BF_CLKCTRL_FRAC_PIXFRAC(lowest_fracdiv));
+	/* Ungate PFD */
+	HW_CLKCTRL_FRAC_CLR(BM_CLKCTRL_FRAC_CLKGATEPIX);
+
+	/* Program pix divider */
+	reg_val = __raw_readl(clk->scale_reg);
+	reg_val &= ~(BM_CLKCTRL_PIX_DIV | BM_CLKCTRL_PIX_CLKGATE);
+	reg_val |= BF_CLKCTRL_PIX_DIV(lowest_div);
+	__raw_writel(reg_val, clk->scale_reg);
+
+	/* Wait for divider update */
+	if (clk->busy_reg) {
+		int i;
+		for (i = 10000; i; i--)
+			if (!clk_is_busy(clk))
+				break;
+		if (!i) {
+			ret = -ETIMEDOUT;
+			goto out;
+		}
+	}
+
+	/* Switch to ref_pix source */
+	HW_CLKCTRL_CLKSEQ_CLR(BM_CLKCTRL_CLKSEQ_BYPASS_PIX);
+
+out:
+	return ret;
+}
+
+
+static int cpu_set_rate(struct clk *clk, u32 rate)
+{
+	if (rate < 24000)
+		return -EINVAL;
+	else if (rate == 24000) {
+		/* switch to the 24M source */
+		clk_set_parent(clk, &osc_24M);
+	} else {
+		int i;
+		u32 clkctrl_cpu = 1;
+		u32 c = clkctrl_cpu;
+		u32 clkctrl_frac = 1;
+		u32 val;
+		for ( ; c < 0x40; c++) {
+			u32 f = (pll_clk.rate*18/c + rate/2) / rate;
+			int s1, s2;
+
+			if (f < 18 || f > 35)
+				continue;
+			s1 = pll_clk.rate*18/clkctrl_frac/clkctrl_cpu - rate;
+			s2 = pll_clk.rate*18/c/f - rate;
+			pr_debug("%s: s1 %d, s2 %d\n", __func__, s1, s2);
+			if (abs(s1) > abs(s2)) {
+				clkctrl_cpu = c;
+				clkctrl_frac = f;
+			}
+			if (s2 == 0)
+				break;
+		};
+		pr_debug("%s: clkctrl_cpu %d, clkctrl_frac %d\n", __func__,
+				clkctrl_cpu, clkctrl_frac);
+		if (c == 0x40) {
+			int  d = pll_clk.rate*18/clkctrl_frac/clkctrl_cpu -
+				rate;
+			if (abs(d) > 100 ||
+			    clkctrl_frac < 18 || clkctrl_frac > 35)
+				return -EINVAL;
+		}
+
+		/* 4.6.2 */
+		val = __raw_readl(clk->scale_reg);
+		val &= ~(0x3f << clk->scale_shift);
+		val |= clkctrl_frac;
+		clk_set_parent(clk, &osc_24M);
+		udelay(10);
+		__raw_writel(val, clk->scale_reg);
+		/* ungate */
+		__raw_writel(1<<7, clk->scale_reg + 8);
+		/* write clkctrl_cpu */
+		clk->saved_div = clkctrl_cpu;
+		HW_CLKCTRL_CPU_WR((HW_CLKCTRL_CPU_RD() & ~0x3f) | clkctrl_cpu);
+		for (i = 10000; i; i--)
+			if (!clk_is_busy(clk))
+				break;
+		if (!i) {
+			printk(KERN_ERR "couldn't set up CPU divisor\n");
+			return -ETIMEDOUT;
+		}
+		clk_set_parent(clk, &pll_clk);
+		clk->saved_div = 0;
+		udelay(10);
+	}
+	return 0;
+}
+
+static long cpu_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate * 18;
+
+	rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & 0x3f;
+	rate /= HW_CLKCTRL_CPU_RD() & 0x3f;
+	rate = ((rate + 9) / 10) * 10;
+	clk->rate = rate;
+
+	return rate;
+}
+
+static long cpu_round_rate(struct clk *clk, u32 rate)
+{
+	unsigned long r = 0;
+
+	if (rate <= 24000)
+		r = 24000;
+	else {
+		u32 clkctrl_cpu = 1;
+		u32 clkctrl_frac;
+		do {
+			clkctrl_frac =
+				(pll_clk.rate*18 / clkctrl_cpu + rate/2) / rate;
+			if (clkctrl_frac > 35)
+				continue;
+			if (pll_clk.rate*18 / clkctrl_frac / clkctrl_cpu/10 ==
+			    rate / 10)
+				break;
+		} while (pll_clk.rate / 2  >= clkctrl_cpu++ * rate);
+		if (pll_clk.rate / 2 < (clkctrl_cpu - 1) * rate)
+			clkctrl_cpu--;
+		pr_debug("%s: clkctrl_cpu %d, clkctrl_frac %d\n", __func__,
+				clkctrl_cpu, clkctrl_frac);
+		if (clkctrl_frac < 18)
+			clkctrl_frac = 18;
+		if (clkctrl_frac > 35)
+			clkctrl_frac = 35;
+
+		r = pll_clk.rate * 18;
+		r /= clkctrl_frac;
+		r /= clkctrl_cpu;
+		r = 10 * ((r + 9) / 10);
+	}
+	return r;
+}
+
+static long emi_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate * 18;
+
+	rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & 0x3f;
+	rate /= HW_CLKCTRL_EMI_RD() & 0x3f;
+	clk->rate = rate;
+
+	return rate;
+}
+
+static int clkseq_set_parent(struct clk *clk, struct clk *parent)
+{
+	int ret = -EINVAL;
+	int shift = 8;
+
+	/* bypass? */
+	if (parent == &osc_24M)
+		shift = 4;
+
+	if (clk->bypass_reg) {
+		u32 hbus_mask = BM_CLKCTRL_HBUS_DIV_FRAC_EN |
+				BM_CLKCTRL_HBUS_DIV;
+
+		if (clk == &cpu_clk && shift == 4) {
+			u32 hbus_val = HW_CLKCTRL_HBUS_RD();
+			u32 cpu_val = HW_CLKCTRL_CPU_RD();
+			hbus_val &= ~hbus_mask;
+			hbus_val |= 1;
+			clk->saved_div = cpu_val & BM_CLKCTRL_CPU_DIV_CPU;
+			cpu_val &= ~BM_CLKCTRL_CPU_DIV_CPU;
+			cpu_val |= 1;
+			__raw_writel(1 << clk->bypass_shift,
+					clk->bypass_reg + shift);
+			if (machine_is_stmp378x()) {
+				HW_CLKCTRL_HBUS_WR(hbus_val);
+				HW_CLKCTRL_CPU_WR(cpu_val);
+				hclk.rate = 0;
+			}
+		} else if (clk == &cpu_clk && shift == 8) {
+			u32 hbus_val = HW_CLKCTRL_HBUS_RD();
+			u32 cpu_val = HW_CLKCTRL_CPU_RD();
+			hbus_val &= ~hbus_mask;
+			hbus_val |= 2;
+			cpu_val &= ~BM_CLKCTRL_CPU_DIV_CPU;
+			if (clk->saved_div)
+				cpu_val |= clk->saved_div;
+			else
+				cpu_val |= 2;
+			if (machine_is_stmp378x()) {
+				HW_CLKCTRL_HBUS_WR(hbus_val);
+				HW_CLKCTRL_CPU_WR(cpu_val);
+				hclk.rate = 0;
+			}
+			__raw_writel(1 << clk->bypass_shift,
+					clk->bypass_reg + shift);
+		} else
+			__raw_writel(1 << clk->bypass_shift,
+					clk->bypass_reg + shift);
+
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int hbus_set_rate(struct clk *clk, u32 rate)
+{
+	u8 div = 0;
+	int is_frac = 0;
+	u32 clkctrl_hbus;
+	struct clk *parent = clk->parent;
+
+	pr_debug("%s: rate %d, parent rate %d\n", __func__, rate,
+			parent->rate);
+
+	if (rate > parent->rate)
+		return -EINVAL;
+
+	if (((parent->rate + rate/2) / rate) * rate != parent->rate &&
+	    parent->rate / rate < 32) {
+		pr_debug("%s: switching to fractional mode\n", __func__);
+		is_frac = 1;
+	}
+
+	if (is_frac)
+		div = (32 * rate + parent->rate / 2) / parent->rate;
+	else
+		div = (parent->rate + rate - 1) / rate;
+	pr_debug("%s: div calculated is %d\n", __func__, div);
+	if (!div || div > 0x1f)
+		return -EINVAL;
+
+	clk_set_parent(&cpu_clk, &osc_24M);
+	udelay(10);
+	clkctrl_hbus = __raw_readl(clk->scale_reg);
+	clkctrl_hbus &= ~0x3f;
+	clkctrl_hbus |= div;
+	clkctrl_hbus |= (is_frac << 5);
+
+	__raw_writel(clkctrl_hbus, clk->scale_reg);
+	if (clk->busy_reg) {
+		int i;
+		for (i = 10000; i; i--)
+			if (!clk_is_busy(clk))
+				break;
+		if (!i) {
+			printk(KERN_ERR "couldn't set up CPU divisor\n");
+			return -ETIMEDOUT;
+		}
+	}
+	clk_set_parent(&cpu_clk, &pll_clk);
+	__raw_writel(clkctrl_hbus, clk->scale_reg);
+	udelay(10);
+	return 0;
+}
+
+static long hbus_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate;
+
+	if (__raw_readl(clk->scale_reg) & 0x20) {
+		rate *= __raw_readl(clk->scale_reg) & 0x1f;
+		rate /= 32;
+	} else
+		rate /= __raw_readl(clk->scale_reg) & 0x1f;
+	clk->rate = rate;
+
+	return rate;
+}
+
+static int xbus_set_rate(struct clk *clk, u32 rate)
+{
+	u16 div = 0;
+	u32 clkctrl_xbus;
+
+	pr_debug("%s: rate %d, parent rate %d\n", __func__, rate,
+			clk->parent->rate);
+
+	div = (clk->parent->rate + rate - 1) / rate;
+	pr_debug("%s: div calculated is %d\n", __func__, div);
+	if (!div || div > 0x3ff)
+		return -EINVAL;
+
+	clkctrl_xbus = __raw_readl(clk->scale_reg);
+	clkctrl_xbus &= ~0x3ff;
+	clkctrl_xbus |= div;
+	__raw_writel(clkctrl_xbus, clk->scale_reg);
+	if (clk->busy_reg) {
+		int i;
+		for (i = 10000; i; i--)
+			if (!clk_is_busy(clk))
+				break;
+		if (!i) {
+			printk(KERN_ERR "couldn't set up xbus divisor\n");
+			return -ETIMEDOUT;
+		}
+	}
+	return 0;
+}
+
+static long xbus_get_rate(struct clk *clk)
+{
+	long rate = clk->parent->rate;
+
+	rate /= __raw_readl(clk->scale_reg) & 0x3ff;
+	clk->rate = rate;
+
+	return rate;
+}
+
+
+/* Clock ops */
+
+static struct clk_ops std_ops = {
+	.enable		= std_clk_enable,
+	.disable	= std_clk_disable,
+	.get_rate	= per_get_rate,
+	.set_rate	= per_set_rate,
+	.set_parent	= clkseq_set_parent,
+};
+
+static struct clk_ops min_ops = {
+	.enable		= std_clk_enable,
+	.disable	= std_clk_disable,
+};
+
+static struct clk_ops cpu_ops = {
+	.enable		= std_clk_enable,
+	.disable	= std_clk_disable,
+	.get_rate	= cpu_get_rate,
+	.set_rate	= cpu_set_rate,
+	.round_rate	= cpu_round_rate,
+	.set_parent	= clkseq_set_parent,
+};
+
+static struct clk_ops io_ops = {
+	.enable		= std_clk_enable,
+	.disable	= std_clk_disable,
+	.get_rate	= io_get_rate,
+	.set_rate	= io_set_rate,
+};
+
+static struct clk_ops hbus_ops = {
+	.get_rate	= hbus_get_rate,
+	.set_rate	= hbus_set_rate,
+};
+
+static struct clk_ops xbus_ops = {
+	.get_rate	= xbus_get_rate,
+	.set_rate	= xbus_set_rate,
+};
+
+static struct clk_ops lcdif_ops = {
+	.enable		= std_clk_enable,
+	.disable	= std_clk_disable,
+	.get_rate	= lcdif_get_rate,
+	.set_rate	= lcdif_set_rate,
+	.set_parent	= clkseq_set_parent,
+};
+
+static struct clk_ops emi_ops = {
+	.get_rate	= emi_get_rate,
+};
+
+/* List of on-chip clocks */
+
+static struct clk osc_24M = {
+	.flags		= FIXED_RATE | ENABLED,
+	.rate		= 24000,
+};
+
+static struct clk pll_clk = {
+	.parent		= &osc_24M,
+	.enable_reg	= HW_CLKCTRL_PLLCTRL0_ADDR,
+	.enable_shift	= 16,
+	.enable_wait	= 10,
+	.flags		= FIXED_RATE | ENABLED,
+	.rate		= 480000,
+	.ops		= &min_ops,
+};
+
+static struct clk cpu_clk = {
+	.parent		= &pll_clk,
+	.scale_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.scale_shift	= 0,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 7,
+	.busy_reg	= HW_CLKCTRL_CPU_ADDR,
+	.busy_bit	= 28,
+	.flags		= RATE_PROPAGATES | ENABLED,
+	.ops		= &cpu_ops,
+};
+
+static struct clk io_clk = {
+	.parent		= &pll_clk,
+	.enable_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.scale_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.scale_shift	= 24,
+	.flags		= RATE_PROPAGATES | ENABLED,
+	.ops		= &io_ops,
+};
+
+static struct clk hclk = {
+	.parent		= &cpu_clk,
+	.scale_reg	= HW_CLKCTRL_HBUS_ADDR,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 7,
+	.busy_reg	= HW_CLKCTRL_HBUS_ADDR,
+	.busy_bit	= 29,
+	.flags		= RATE_PROPAGATES | ENABLED,
+	.ops		= &hbus_ops,
+};
+
+static struct clk xclk = {
+	.parent		= &osc_24M,
+	.scale_reg	= HW_CLKCTRL_XBUS_ADDR,
+	.busy_reg	= HW_CLKCTRL_XBUS_ADDR,
+	.busy_bit	= 31,
+	.flags		= RATE_PROPAGATES | ENABLED,
+	.ops		= &xbus_ops,
+};
+
+static struct clk uart_clk = {
+	.parent		= &xclk,
+	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.flags		= ENABLED,
+	.ops		= &min_ops,
+};
+
+static struct clk audio_clk = {
+	.parent		= &xclk,
+	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_shift	= 30,
+	.enable_negate	= 1,
+	.ops		= &min_ops,
+};
+
+static struct clk pwm_clk = {
+	.parent		= &xclk,
+	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_shift	= 29,
+	.enable_negate	= 1,
+	.ops		= &min_ops,
+};
+
+static struct clk dri_clk = {
+	.parent		= &xclk,
+	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_shift	= 28,
+	.enable_negate	= 1,
+	.ops		= &min_ops,
+};
+
+static struct clk digctl_clk = {
+	.parent		= &xclk,
+	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_shift	= 27,
+	.enable_negate	= 1,
+	.ops		= &min_ops,
+};
+
+static struct clk timer_clk = {
+	.parent		= &xclk,
+	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_shift	= 26,
+	.enable_negate	= 1,
+	.flags		= ENABLED,
+	.ops		= &min_ops,
+};
+
+static struct clk lcdif_clk = {
+	.parent		= &pll_clk,
+	.scale_reg	= HW_CLKCTRL_PIX_ADDR,
+	.busy_reg	= HW_CLKCTRL_PIX_ADDR,
+	.busy_bit	= 29,
+	.enable_reg	= HW_CLKCTRL_PIX_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 1,
+	.flags		= NEEDS_SET_PARENT,
+	.ops		= &lcdif_ops,
+};
+
+static struct clk ssp_clk = {
+	.parent		= &io_clk,
+	.scale_reg	= HW_CLKCTRL_SSP_ADDR,
+	.busy_reg	= HW_CLKCTRL_SSP_ADDR,
+	.busy_bit	= 29,
+	.enable_reg	= HW_CLKCTRL_SSP_ADDR,
+	.enable_shift	= 31,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 5,
+	.enable_negate	= 1,
+	.flags		= NEEDS_SET_PARENT,
+	.ops		= &std_ops,
+};
+
+static struct clk gpmi_clk = {
+	.parent		= &io_clk,
+	.scale_reg	= HW_CLKCTRL_GPMI_ADDR,
+	.busy_reg	= HW_CLKCTRL_GPMI_ADDR,
+	.busy_bit	= 29,
+	.enable_reg	= HW_CLKCTRL_GPMI_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 4,
+	.flags		= NEEDS_SET_PARENT,
+	.ops		= &std_ops,
+};
+
+static struct clk spdif_clk = {
+	.parent		= &pll_clk,
+	.enable_reg	= HW_CLKCTRL_SPDIF_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.ops		= &min_ops,
+};
+
+static struct clk emi_clk = {
+	.parent		= &pll_clk,
+	.enable_reg	= HW_CLKCTRL_EMI_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.scale_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.scale_shift	= 8,
+	.busy_reg	= HW_CLKCTRL_EMI_ADDR,
+	.busy_bit	= 28,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 6,
+	.flags		= ENABLED,
+	.ops		= &emi_ops,
+};
+
+static struct clk ir_clk = {
+	.parent		= &io_clk,
+	.enable_reg	= HW_CLKCTRL_IR_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 3,
+	.ops		= &min_ops,
+};
+
+static struct clk saif_clk = {
+	.parent		= &pll_clk,
+	.scale_reg	= HW_CLKCTRL_SAIF_ADDR,
+	.busy_reg	= HW_CLKCTRL_SAIF_ADDR,
+	.busy_bit	= 29,
+	.enable_reg	= HW_CLKCTRL_SAIF_ADDR,
+	.enable_shift	= 31,
+	.enable_negate	= 1,
+	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_shift	= 0,
+	.ops		= &std_ops,
+};
+
+static struct clk usb_clk = {
+	.parent		= &pll_clk,
+	.enable_reg	= HW_CLKCTRL_PLLCTRL0_ADDR,
+	.enable_shift	= 18,
+	.enable_negate	= 1,
+	.ops		= &min_ops,
+};
+
+/* list of all the clocks */
+static __initdata struct clk_lookup onchip_clks[] = {
+	{
+		.con_id = "osc_24M",
+		.clk = &osc_24M,
+	}, {
+		.con_id = "pll",
+		.clk = &pll_clk,
+	}, {
+		.con_id = "cpu",
+		.clk = &cpu_clk,
+	}, {
+		.con_id = "hclk",
+		.clk = &hclk,
+	}, {
+		.con_id = "xclk",
+		.clk = &xclk,
+	}, {
+		.con_id = "io",
+		.clk = &io_clk,
+	}, {
+		.con_id = "uart",
+		.clk = &uart_clk,
+	}, {
+		.con_id = "audio",
+		.clk = &audio_clk,
+	}, {
+		.con_id = "pwm",
+		.clk = &pwm_clk,
+	}, {
+		.con_id = "dri",
+		.clk = &dri_clk,
+	}, {
+		.con_id = "digctl",
+		.clk = &digctl_clk,
+	}, {
+		.con_id = "timer",
+		.clk = &timer_clk,
+	}, {
+		.con_id = "lcdif",
+		.clk = &lcdif_clk,
+	}, {
+		.con_id = "ssp",
+		.clk = &ssp_clk,
+	}, {
+		.con_id = "gpmi",
+		.clk = &gpmi_clk,
+	}, {
+		.con_id = "spdif",
+		.clk = &spdif_clk,
+	}, {
+		.con_id = "emi",
+		.clk = &emi_clk,
+	}, {
+		.con_id = "ir",
+		.clk = &ir_clk,
+	}, {
+		.con_id = "saif",
+		.clk = &saif_clk,
+	}, {
+		.con_id = "usb",
+		.clk = &usb_clk,
+	},
+};
+
+static int __init propagate_rate(struct clk *clk)
+{
+	struct clk_lookup *cl;
+
+	for (cl = onchip_clks; cl < onchip_clks + ARRAY_SIZE(onchip_clks);
+	     cl++) {
+		if (unlikely(!clk_good(cl->clk)))
+			continue;
+		if (cl->clk->parent == clk && cl->clk->ops->get_rate) {
+			cl->clk->ops->get_rate(cl->clk);
+			if (cl->clk->flags & RATE_PROPAGATES)
+				propagate_rate(cl->clk);
+		}
+	}
+
+	return 0;
+}
+
+/* Exported API */
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (unlikely(!clk_good(clk)))
+		return 0;
+
+	if (clk->rate != 0)
+		return clk->rate;
+
+	if (clk->ops->get_rate != NULL)
+		return clk->ops->get_rate(clk);
+
+	return clk_get_rate(clk->parent);
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	if (unlikely(!clk_good(clk)))
+		return 0;
+
+	if (clk->ops->round_rate)
+		return clk->ops->round_rate(clk, rate);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+static inline int close_enough(long rate1, long rate2)
+{
+	return rate1 && !((rate2 - rate1) * 1000 / rate1);
+}
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	int ret = -EINVAL;
+
+	if (unlikely(!clk_good(clk)))
+		goto out;
+
+	if (clk->flags & FIXED_RATE || !clk->ops->set_rate)
+		goto out;
+
+	else if (!close_enough(clk->rate, rate)) {
+		ret = clk->ops->set_rate(clk, rate);
+		if (ret < 0)
+			goto out;
+		clk->rate = rate;
+		if (clk->flags & RATE_PROPAGATES)
+			propagate_rate(clk);
+	} else
+		ret = 0;
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long clocks_flags;
+
+	if (unlikely(!clk_good(clk)))
+		return -EINVAL;
+
+	if (clk->parent)
+		clk_enable(clk->parent);
+
+	spin_lock_irqsave(&clocks_lock, clocks_flags);
+
+	clk->usage++;
+	if (clk->ops && clk->ops->enable)
+		clk->ops->enable(clk);
+
+	spin_unlock_irqrestore(&clocks_lock, clocks_flags);
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+static void local_clk_disable(struct clk *clk)
+{
+	if (unlikely(!clk_good(clk)))
+		return;
+
+	if (clk->usage == 0 && clk->ops->disable)
+		clk->ops->disable(clk);
+
+	if (clk->parent)
+		local_clk_disable(clk->parent);
+}
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long clocks_flags;
+
+	if (unlikely(!clk_good(clk)))
+		return;
+
+	spin_lock_irqsave(&clocks_lock, clocks_flags);
+
+	if ((--clk->usage) == 0 && clk->ops->disable)
+		clk->ops->disable(clk);
+
+	spin_unlock_irqrestore(&clocks_lock, clocks_flags);
+	if (clk->parent)
+		clk_disable(clk->parent);
+}
+EXPORT_SYMBOL(clk_disable);
+
+/* Some additional API */
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+	int ret = -ENODEV;
+	unsigned long clocks_flags;
+
+	if (unlikely(!clk_good(clk)))
+		goto out;
+
+	if (!clk->ops->set_parent)
+		goto out;
+
+	spin_lock_irqsave(&clocks_lock, clocks_flags);
+
+	ret = clk->ops->set_parent(clk, parent);
+	if (!ret) {
+		/* disable if usage count is 0 */
+		local_clk_disable(parent);
+
+		parent->usage += clk->usage;
+		clk->parent->usage -= clk->usage;
+
+		/* disable if new usage count is 0 */
+		local_clk_disable(clk->parent);
+
+		clk->parent = parent;
+	}
+	spin_unlock_irqrestore(&clocks_lock, clocks_flags);
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+	if (unlikely(!clk_good(clk)))
+		return NULL;
+	return clk->parent;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
+static int __init clk_init(void)
+{
+	struct clk_lookup *cl;
+	struct clk_ops *ops;
+
+	spin_lock_init(&clocks_lock);
+
+	for (cl = onchip_clks; cl < onchip_clks + ARRAY_SIZE(onchip_clks);
+	     cl++) {
+		if (cl->clk->flags & ENABLED)
+			clk_enable(cl->clk);
+		else
+			local_clk_disable(cl->clk);
+
+		ops = cl->clk->ops;
+
+		if ((cl->clk->flags & NEEDS_INITIALIZATION) &&
+				ops && ops->set_rate)
+			ops->set_rate(cl->clk, cl->clk->rate);
+
+		if (cl->clk->flags & FIXED_RATE) {
+			if (cl->clk->flags & RATE_PROPAGATES)
+				propagate_rate(cl->clk);
+		} else {
+			if (ops && ops->get_rate)
+				ops->get_rate(cl->clk);
+		}
+
+		if (cl->clk->flags & NEEDS_SET_PARENT) {
+			if (ops && ops->set_parent)
+				ops->set_parent(cl->clk, cl->clk->parent);
+		}
+
+		clkdev_add(cl);
+	}
+	return 0;
+}
+
+arch_initcall(clk_init);
diff --git a/arch/arm/plat-stmp3xxx/clock.h b/arch/arm/plat-stmp3xxx/clock.h
new file mode 100644
index 0000000..a6611e1
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/clock.h
@@ -0,0 +1,61 @@
+/*
+ * Clock control driver for Freescale STMP37XX/STMP378X - internal header file
+ *
+ * Author: Vitaly Wool <vital@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __ARCH_ARM_STMX3XXX_CLOCK_H__
+#define __ARCH_ARM_STMX3XXX_CLOCK_H__
+
+#ifndef __ASSEMBLER__
+
+struct clk_ops {
+	int (*enable) (struct clk *);
+	int (*disable) (struct clk *);
+	long (*get_rate) (struct clk *);
+	long (*round_rate) (struct clk *, u32);
+	int (*set_rate) (struct clk *, u32);
+	int (*set_parent) (struct clk *, struct clk *);
+};
+
+struct clk {
+	struct clk *parent;
+	u32 rate;
+	u32 flags;
+	u8 scale_shift;
+	u8 enable_shift;
+	u8 bypass_shift;
+	u8 busy_bit;
+	s8 usage;
+	int enable_wait;
+	int enable_negate;
+	u32 saved_div;
+	void __iomem *enable_reg;
+	void __iomem *scale_reg;
+	void __iomem *bypass_reg;
+	void __iomem *busy_reg;
+	struct clk_ops *ops;
+};
+
+#endif /* __ASSEMBLER__ */
+
+/* Flags */
+#define RATE_PROPAGATES      (1<<0)
+#define NEEDS_INITIALIZATION (1<<1)
+#define PARENT_SET_RATE      (1<<2)
+#define FIXED_RATE           (1<<3)
+#define ENABLED	             (1<<4)
+#define NEEDS_SET_PARENT     (1<<5)
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/core.c b/arch/arm/plat-stmp3xxx/core.c
new file mode 100644
index 0000000..6e2fef16
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/core.c
@@ -0,0 +1,127 @@
+/*
+ * Freescale STMP37XX/STMP378X core routines
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <mach/stmp3xxx.h>
+#include <mach/dma.h>
+#include <mach/regs-clkctrl.h>
+
+static int __stmp3xxx_reset_block(void __iomem *hwreg, int just_enable)
+{
+	u32 c;
+	int timeout;
+
+	/* the process of software reset of IP block is done
+	   in several steps:
+
+	   - clear SFTRST and wait for block is enabled;
+	   - clear clock gating (CLKGATE bit);
+	   - set the SFTRST again and wait for block is in reset;
+	   - clear SFTRST and wait for reset completion.
+	*/
+	c = __raw_readl(hwreg);
+	c &= ~(1<<31);		/* clear SFTRST */
+	__raw_writel(c, hwreg);
+	for (timeout = 1000000; timeout > 0; timeout--)
+		/* still in SFTRST state ? */
+		if ((__raw_readl(hwreg) & (1<<31)) == 0)
+			break;
+	if (timeout <= 0) {
+		printk(KERN_ERR"%s(%p): timeout when enabling\n",
+				__func__, hwreg);
+		return -ETIME;
+	}
+
+	c = __raw_readl(hwreg);
+	c &= ~(1<<30);		/* clear CLKGATE */
+	__raw_writel(c, hwreg);
+
+	if (!just_enable) {
+		c = __raw_readl(hwreg);
+		c |= (1<<31);		/* now again set SFTRST */
+		__raw_writel(c, hwreg);
+		for (timeout = 1000000; timeout > 0; timeout--)
+			/* poll until CLKGATE set */
+			if (__raw_readl(hwreg) & (1<<30))
+				break;
+		if (timeout <= 0) {
+			printk(KERN_ERR"%s(%p): timeout when resetting\n",
+					__func__, hwreg);
+			return -ETIME;
+		}
+
+		c = __raw_readl(hwreg);
+		c &= ~(1<<31);		/* clear SFTRST */
+		__raw_writel(c, hwreg);
+		for (timeout = 1000000; timeout > 0; timeout--)
+			/* still in SFTRST state ? */
+			if ((__raw_readl(hwreg) & (1<<31)) == 0)
+				break;
+		if (timeout <= 0) {
+			printk(KERN_ERR"%s(%p): timeout when enabling "
+					"after reset\n", __func__, hwreg);
+			return -ETIME;
+		}
+
+		c = __raw_readl(hwreg);
+		c &= ~(1<<30);		/* clear CLKGATE */
+		__raw_writel(c, hwreg);
+	}
+	for (timeout = 1000000; timeout > 0; timeout--)
+		/* still in SFTRST state ? */
+		if ((__raw_readl(hwreg) & (1<<30)) == 0)
+			break;
+
+	if (timeout <= 0) {
+		printk(KERN_ERR"%s(%p): timeout when unclockgating\n",
+				__func__, hwreg);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+int stmp3xxx_reset_block(void __iomem *hwreg, int just_enable)
+{
+	int try = 10;
+	int r;
+
+	while (try--) {
+		r = __stmp3xxx_reset_block(hwreg, just_enable);
+		if (!r)
+			break;
+		pr_debug("%s: try %d failed\n", __func__, 10 - try);
+	}
+	return r;
+}
+EXPORT_SYMBOL(stmp3xxx_reset_block);
+
+struct platform_device stmp3xxx_dbguart = {
+	.name = "stmp3xxx-dbguart",
+	.id = -1,
+};
+
+void __init stmp3xxx_init(void)
+{
+	/* Turn off auto-slow and other tricks */
+	HW_CLKCTRL_HBUS_CLR(0x07f00000U);
+
+	stmp3xxx_dma_init();
+}
diff --git a/arch/arm/plat-stmp3xxx/dma.c b/arch/arm/plat-stmp3xxx/dma.c
new file mode 100644
index 0000000..cf42de0
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/dma.c
@@ -0,0 +1,462 @@
+/*
+ * DMA helper routines for Freescale STMP37XX/STMP378X
+ *
+ * Author: dmitry pervushin <dpervushin@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/sysdev.h>
+#include <linux/cpufreq.h>
+
+#include <asm/page.h>
+
+#include <mach/dma.h>
+#include <mach/regs-apbx.h>
+#include <mach/regs-apbh.h>
+
+static const size_t pool_item_size = sizeof(struct stmp3xxx_dma_command);
+static const size_t pool_alignment = 8;
+static struct stmp3xxx_dma_user {
+	void *pool;
+	int inuse;
+	const char *name;
+} channels[MAX_DMA_CHANNELS];
+
+static inline int dmach(int ch)
+{
+	return ch % 16;
+}
+
+static inline int dmabus(int ch)
+{
+	return ch / 16;
+}
+
+#define IS_VALID_CHANNEL(ch) ((ch) >= 0 && (ch) < MAX_DMA_CHANNELS)
+#define IS_USED(ch) (channels[ch].inuse)
+
+int stmp3xxx_dma_request(int ch, struct device *dev, const char *name)
+{
+	struct stmp3xxx_dma_user *user;
+	int err = 0;
+
+	user = channels + ch;
+	if (!IS_VALID_CHANNEL(ch)) {
+		err = -ENODEV;
+		goto out;
+	}
+	if (IS_USED(ch)) {
+		err = -EBUSY;
+		goto out;
+	}
+	/* Create a pool to allocate dma commands from */
+	user->pool = dma_pool_create(name, dev, pool_item_size,
+				     pool_alignment, PAGE_SIZE);
+	if (user->pool == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	user->name = name;
+	user->inuse++;
+out:
+	return err;
+}
+EXPORT_SYMBOL(stmp3xxx_dma_request);
+
+int stmp3xxx_dma_release(int ch)
+{
+	struct stmp3xxx_dma_user *user = channels + ch;
+	int err = 0;
+
+	if (!IS_VALID_CHANNEL(ch)) {
+		err = -ENODEV;
+		goto out;
+	}
+	if (!IS_USED(ch)) {
+		err = -EBUSY;
+		goto out;
+	}
+	BUG_ON(user->pool == NULL);
+	dma_pool_destroy(user->pool);
+	user->inuse--;
+out:
+	return err;
+}
+EXPORT_SYMBOL(stmp3xxx_dma_release);
+
+int stmp3xxx_dma_read_semaphore(int channel)
+{
+	int sem = -1;
+
+	switch (dmabus(channel)) {
+	case STMP3XXX_BUS_APBH:
+		sem =
+		    (HW_APBH_CHn_SEMA_RD(dmach(channel)) &
+		     BM_APBH_CHn_SEMA_PHORE) >> BP_APBH_CHn_SEMA_PHORE;
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		sem =
+		    (HW_APBX_CHn_SEMA_RD(dmach(channel)) &
+		     BM_APBX_CHn_SEMA_PHORE) >> BP_APBX_CHn_SEMA_PHORE;
+		break;
+	default:
+		BUG();
+	}
+	return sem;
+}
+EXPORT_SYMBOL(stmp3xxx_dma_read_semaphore);
+
+int stmp3xxx_dma_allocate_command(int channel,
+				  struct stmp3xxx_dma_descriptor *descriptor)
+{
+	struct stmp3xxx_dma_user *user = channels + channel;
+	int err = 0;
+
+	if (!IS_VALID_CHANNEL(channel)) {
+		err = -ENODEV;
+		goto out;
+	}
+	if (!IS_USED(channel)) {
+		err = -EBUSY;
+		goto out;
+	}
+	if (descriptor == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Allocate memory for a command from the buffer */
+	descriptor->command =
+	    dma_pool_alloc(user->pool, GFP_KERNEL, &descriptor->handle);
+
+	/* Check it worked */
+	if (!descriptor->command) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	memset(descriptor->command, 0, pool_item_size);
+out:
+	WARN_ON(err);
+	return err;
+}
+EXPORT_SYMBOL(stmp3xxx_dma_allocate_command);
+
+int stmp3xxx_dma_free_command(int channel,
+			      struct stmp3xxx_dma_descriptor *descriptor)
+{
+	int err = 0;
+
+	if (!IS_VALID_CHANNEL(channel)) {
+		err = -ENODEV;
+		goto out;
+	}
+	if (!IS_USED(channel)) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	/* Return the command memory to the pool */
+	dma_pool_free(channels[channel].pool, descriptor->command,
+		      descriptor->handle);
+
+	/* Initialise descriptor so we're not tempted to use it */
+	descriptor->command = NULL;
+	descriptor->handle = 0;
+	descriptor->virtual_buf_ptr = NULL;
+	descriptor->next_descr = NULL;
+
+	WARN_ON(err);
+out:
+	return err;
+}
+EXPORT_SYMBOL(stmp3xxx_dma_free_command);
+
+void stmp3xxx_dma_go(int channel,
+		     struct stmp3xxx_dma_descriptor *head, u32 semaphore)
+{
+	int ch = dmach(channel);
+
+	switch (dmabus(channel)) {
+	case STMP3XXX_BUS_APBH:
+		/* Set next command */
+		HW_APBH_CHn_NXTCMDAR_WR(ch, head->handle);
+		/* Set counting semaphore (kicks off transfer). Assumes
+		   peripheral has been set up correctly */
+		HW_APBH_CHn_SEMA_WR(ch, semaphore);
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		/* Set next command */
+		HW_APBX_CHn_NXTCMDAR_WR(ch, head->handle);
+		/* Set counting semaphore (kicks off transfer). Assumes
+		   peripheral has been set up correctly */
+		HW_APBX_CHn_SEMA_WR(ch, semaphore);
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_dma_go);
+
+int stmp3xxx_dma_running(int channel)
+{
+	switch (dmabus(channel)) {
+	case STMP3XXX_BUS_APBH:
+		return HW_APBH_CHn_SEMA_RD(dmach(channel)) &
+		    BM_APBH_CHn_SEMA_PHORE;
+
+	case STMP3XXX_BUS_APBX:
+		return HW_APBX_CHn_SEMA_RD(dmach(channel)) &
+		    BM_APBX_CHn_SEMA_PHORE;
+
+	default:
+		BUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_dma_running);
+
+/*
+ * Circular dma chain management
+ */
+void stmp3xxx_dma_free_chain(struct stmp37xx_circ_dma_chain *chain)
+{
+	int i;
+
+	for (i = 0; i < chain->total_count; i++)
+		stmp3xxx_dma_free_command(
+			STMP3xxx_DMA(chain->channel, chain->bus),
+			&chain->chain[i]);
+}
+EXPORT_SYMBOL(stmp3xxx_dma_free_chain);
+
+int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain,
+			    struct stmp3xxx_dma_descriptor descriptors[],
+			    unsigned items)
+{
+	int i;
+	int err = 0;
+
+	if (items == 0)
+		return err;
+
+	for (i = 0; i < items; i++) {
+		err = stmp3xxx_dma_allocate_command(ch, &descriptors[i]);
+		if (err) {
+			WARN_ON(err);
+			/*
+			 * Couldn't allocate the whole chain.
+			 * deallocate what has been allocated
+			 */
+			if (i) {
+				do {
+					stmp3xxx_dma_free_command(ch,
+								  &descriptors
+								  [i]);
+				} while (i-- >= 0);
+			}
+			return err;
+		}
+
+		/* link them! */
+		if (i > 0) {
+			descriptors[i - 1].next_descr = &descriptors[i];
+			descriptors[i - 1].command->next =
+						descriptors[i].handle;
+		}
+	}
+
+	/* make list circular */
+	descriptors[items - 1].next_descr = &descriptors[0];
+	descriptors[items - 1].command->next = descriptors[0].handle;
+
+	chain->total_count = items;
+	chain->chain = descriptors;
+	chain->free_index = 0;
+	chain->active_index = 0;
+	chain->cooked_index = 0;
+	chain->free_count = items;
+	chain->active_count = 0;
+	chain->cooked_count = 0;
+	chain->bus = dmabus(ch);
+	chain->channel = dmach(ch);
+	return err;
+}
+EXPORT_SYMBOL(stmp3xxx_dma_make_chain);
+
+void stmp37xx_circ_clear_chain(struct stmp37xx_circ_dma_chain *chain)
+{
+	BUG_ON(stmp3xxx_dma_running(STMP3xxx_DMA(chain->channel, chain->bus)) >
+	       0);
+	chain->free_index = 0;
+	chain->active_index = 0;
+	chain->cooked_index = 0;
+	chain->free_count = chain->total_count;
+	chain->active_count = 0;
+	chain->cooked_count = 0;
+}
+EXPORT_SYMBOL(stmp37xx_circ_clear_chain);
+
+void stmp37xx_circ_advance_free(struct stmp37xx_circ_dma_chain *chain,
+		unsigned count)
+{
+	BUG_ON(chain->cooked_count < count);
+
+	chain->cooked_count -= count;
+	chain->cooked_index += count;
+	chain->cooked_index %= chain->total_count;
+	chain->free_count += count;
+}
+EXPORT_SYMBOL(stmp37xx_circ_advance_free);
+
+void stmp37xx_circ_advance_active(struct stmp37xx_circ_dma_chain *chain,
+		unsigned count)
+{
+	BUG_ON(chain->free_count < count);
+
+	chain->free_count -= count;
+	chain->free_index += count;
+	chain->free_index %= chain->total_count;
+	chain->active_count += count;
+
+	switch (chain->bus) {
+	case STMP3XXX_BUS_APBH:
+		/* Set counting semaphore (kicks off transfer). Assumes
+		   peripheral has been set up correctly */
+		HW_APBH_CHn_SEMA_CLR(chain->channel,
+				     BM_APBH_CHn_SEMA_INCREMENT_SEMA);
+		HW_APBH_CHn_SEMA_SET(chain->channel,
+				     BF_APBH_CHn_SEMA_INCREMENT_SEMA(count));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		/* Set counting semaphore (kicks off transfer). Assumes
+		   peripheral has been set up correctly */
+		HW_APBX_CHn_SEMA_CLR(chain->channel,
+				     BM_APBX_CHn_SEMA_INCREMENT_SEMA);
+		HW_APBX_CHn_SEMA_SET(chain->channel,
+				     BF_APBX_CHn_SEMA_INCREMENT_SEMA(count));
+		break;
+
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(stmp37xx_circ_advance_active);
+
+unsigned stmp37xx_circ_advance_cooked(struct stmp37xx_circ_dma_chain *chain)
+{
+	unsigned cooked;
+
+	cooked = chain->active_count -
+	  stmp3xxx_dma_read_semaphore(STMP3xxx_DMA(chain->channel, chain->bus));
+
+	chain->active_count -= cooked;
+	chain->active_index += cooked;
+	chain->active_index %= chain->total_count;
+
+	chain->cooked_count += cooked;
+
+	return cooked;
+}
+EXPORT_SYMBOL(stmp37xx_circ_advance_cooked);
+
+void stmp3xxx_dma_set_alt_target(int channel, int function)
+{
+#if defined(CONFIG_ARCH_STMP37XX)
+	unsigned bits = 4;
+#elif defined(CONFIG_ARCH_STMP378X)
+	unsigned bits = 2;
+#else
+#error wrong arch
+#endif
+	int shift = dmach(channel) * bits;
+	unsigned mask = (1<<bits) - 1;
+
+	BUG_ON(function < 0 || function >= (1<<bits));
+	pr_debug("%s: channel = %d, using mask %x, "
+		 "shift = %d\n", __func__, channel, mask, shift);
+
+	switch (dmabus(channel)) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_DEVSEL_CLR(mask<<shift);
+		HW_APBH_DEVSEL_SET(function<<shift);
+		break;
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_DEVSEL_CLR(mask<<shift);
+		HW_APBX_DEVSEL_SET(function<<shift);
+		break;
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_dma_set_alt_target);
+
+void stmp3xxx_dma_suspend(void)
+{
+	HW_APBH_CTRL0_SET(BM_APBH_CTRL0_CLKGATE);
+	HW_APBX_CTRL0_SET(BM_APBX_CTRL0_CLKGATE);
+}
+
+void stmp3xxx_dma_resume(void)
+{
+	HW_APBH_CTRL0_CLR(BM_APBH_CTRL0_CLKGATE | BM_APBH_CTRL0_SFTRST);
+	HW_APBX_CTRL0_CLR(BM_APBX_CTRL0_CLKGATE | BM_APBX_CTRL0_SFTRST);
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+struct dma_notifier_block {
+	struct notifier_block nb;
+	void *data;
+};
+
+static int dma_cpufreq_notifier(struct notifier_block *self,
+				unsigned long phase, void *p)
+{
+	switch (phase) {
+	case CPUFREQ_POSTCHANGE:
+		stmp3xxx_dma_resume();
+		break;
+
+	case CPUFREQ_PRECHANGE:
+		stmp3xxx_dma_suspend();
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct dma_notifier_block dma_cpufreq_nb = {
+	.nb = {
+		.notifier_call = dma_cpufreq_notifier,
+	},
+};
+#endif /* CONFIG_CPU_FREQ */
+
+void __init stmp3xxx_dma_init(void)
+{
+	HW_APBH_CTRL0_CLR(BM_APBH_CTRL0_CLKGATE | BM_APBH_CTRL0_SFTRST);
+	HW_APBX_CTRL0_CLR(BM_APBX_CTRL0_CLKGATE | BM_APBX_CTRL0_SFTRST);
+#ifdef CONFIG_CPU_FREQ
+	cpufreq_register_notifier(&dma_cpufreq_nb.nb,
+				CPUFREQ_TRANSITION_NOTIFIER);
+#endif /* CONFIG_CPU_FREQ */
+
+}
diff --git a/arch/arm/plat-stmp3xxx/irq.c b/arch/arm/plat-stmp3xxx/irq.c
new file mode 100644
index 0000000..cb36590
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/irq.c
@@ -0,0 +1,59 @@
+/*
+ * Freescale STMP37XX/STMP378X common interrupt handling code
+ *
+ * Author: Vladislav Buzov <vbuzov@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/sysdev.h>
+
+#include <mach/stmp3xxx.h>
+#include <mach/regs-icoll.h>
+
+void __init stmp3xxx_init_irq(struct irq_chip *chip)
+{
+	unsigned int i;
+
+	/* Reset the interrupt controller */
+	HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_CLKGATE);
+	udelay(10);
+	HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_SFTRST);
+	udelay(10);
+	HW_ICOLL_CTRL_SET(BM_ICOLL_CTRL_SFTRST);
+	while (!(HW_ICOLL_CTRL_RD() & BM_ICOLL_CTRL_CLKGATE))
+		continue;
+	HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_SFTRST | BM_ICOLL_CTRL_CLKGATE);
+
+	/* Disable all interrupts initially */
+	for (i = 0; i < NR_REAL_IRQS; i++) {
+		chip->mask(i);
+		set_irq_chip(i, chip);
+		set_irq_handler(i, handle_level_irq);
+		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
+	}
+
+	/* Ensure vector is cleared */
+	HW_ICOLL_LEVELACK_WR(1);
+	HW_ICOLL_LEVELACK_WR(2);
+	HW_ICOLL_LEVELACK_WR(4);
+	HW_ICOLL_LEVELACK_WR(8);
+
+	HW_ICOLL_VECTOR_WR(0);
+	/* Barrier */
+	(void) HW_ICOLL_STAT_RD();
+}
+
diff --git a/arch/arm/plat-stmp3xxx/pinmux.c b/arch/arm/plat-stmp3xxx/pinmux.c
new file mode 100644
index 0000000..9b28cc8
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/pinmux.c
@@ -0,0 +1,545 @@
+/*
+ * Freescale STMP378X/STMP378X Pin Multiplexing
+ *
+ * Author: Vladislav Buzov <vbuzov@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sysdev.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/sysdev.h>
+#include <linux/irq.h>
+
+#include <mach/hardware.h>
+#include <mach/regs-pinctrl.h>
+#include <mach/pins.h>
+#include <mach/pinmux.h>
+
+#define NR_BANKS ARRAY_SIZE(pinmux_banks)
+static struct stmp3xxx_pinmux_bank pinmux_banks[] = {
+	[0] = {
+		.hw_muxsel = {
+			HW_PINCTRL_MUXSEL0_ADDR,
+			HW_PINCTRL_MUXSEL1_ADDR
+		},
+		.hw_drive = {
+			HW_PINCTRL_DRIVE0_ADDR,
+			HW_PINCTRL_DRIVE1_ADDR,
+			HW_PINCTRL_DRIVE2_ADDR,
+			HW_PINCTRL_DRIVE3_ADDR
+		},
+		.hw_pull = HW_PINCTRL_PULL0_ADDR,
+		.functions = { 0x0, 0x1, 0x2, 0x3 },
+		.strengths = { 0x0, 0x1, 0x2, 0x3, 0xff },
+
+		.hw_gpio_read = HW_PINCTRL_DIN0_ADDR,
+		.hw_gpio_set = HW_PINCTRL_DOUT0_ADDR + HW_STMP3xxx_SET,
+		.hw_gpio_clr = HW_PINCTRL_DOUT0_ADDR + HW_STMP3xxx_CLR,
+		.hw_gpio_doe = HW_PINCTRL_DOE0_ADDR,
+		.irq = IRQ_GPIO0,
+
+		.pin2irq = HW_PINCTRL_PIN2IRQ0_ADDR,
+		.irqstat = HW_PINCTRL_IRQSTAT0_ADDR,
+		.irqlevel = HW_PINCTRL_IRQLEVEL0_ADDR,
+		.irqpolarity = HW_PINCTRL_IRQPOL0_ADDR,
+		.irqen = HW_PINCTRL_IRQEN0_ADDR,
+	},
+	[1] = {
+		.hw_muxsel = {
+			HW_PINCTRL_MUXSEL2_ADDR,
+			HW_PINCTRL_MUXSEL3_ADDR
+		},
+		.hw_drive = {
+			HW_PINCTRL_DRIVE4_ADDR,
+			HW_PINCTRL_DRIVE5_ADDR,
+			HW_PINCTRL_DRIVE6_ADDR,
+			HW_PINCTRL_DRIVE7_ADDR
+		},
+		.hw_pull = HW_PINCTRL_PULL1_ADDR,
+		.functions = { 0x0, 0x1, 0x2, 0x3 },
+		.strengths = { 0x0, 0x1, 0x2, 0x3, 0xff },
+
+		.hw_gpio_read = HW_PINCTRL_DIN1_ADDR,
+		.hw_gpio_set = HW_PINCTRL_DOUT1_ADDR + HW_STMP3xxx_SET,
+		.hw_gpio_clr = HW_PINCTRL_DOUT1_ADDR + HW_STMP3xxx_CLR,
+		.hw_gpio_doe = HW_PINCTRL_DOE1_ADDR,
+		.irq = IRQ_GPIO1,
+
+		.pin2irq = HW_PINCTRL_PIN2IRQ1_ADDR,
+		.irqstat = HW_PINCTRL_IRQSTAT1_ADDR,
+		.irqlevel = HW_PINCTRL_IRQLEVEL1_ADDR,
+		.irqpolarity = HW_PINCTRL_IRQPOL1_ADDR,
+		.irqen = HW_PINCTRL_IRQEN1_ADDR,
+	},
+	[2] = {
+	       .hw_muxsel = {
+			HW_PINCTRL_MUXSEL4_ADDR,
+			HW_PINCTRL_MUXSEL5_ADDR,
+		},
+		.hw_drive = {
+			HW_PINCTRL_DRIVE8_ADDR,
+			HW_PINCTRL_DRIVE9_ADDR,
+			HW_PINCTRL_DRIVE10_ADDR,
+			HW_PINCTRL_DRIVE11_ADDR,
+		},
+		.hw_pull = HW_PINCTRL_PULL2_ADDR,
+		.functions = { 0x0, 0x1, 0x2, 0x3 },
+		.strengths = { 0x0, 0x1, 0x2, 0x1, 0x2 },
+
+		.hw_gpio_read = HW_PINCTRL_DIN2_ADDR,
+		.hw_gpio_set = HW_PINCTRL_DOUT2_ADDR + HW_STMP3xxx_SET,
+		.hw_gpio_clr = HW_PINCTRL_DOUT2_ADDR + HW_STMP3xxx_CLR,
+		.hw_gpio_doe = HW_PINCTRL_DOE2_ADDR,
+		.irq = IRQ_GPIO2,
+
+		.pin2irq = HW_PINCTRL_PIN2IRQ2_ADDR,
+		.irqstat = HW_PINCTRL_IRQSTAT2_ADDR,
+		.irqlevel = HW_PINCTRL_IRQLEVEL2_ADDR,
+		.irqpolarity = HW_PINCTRL_IRQPOL2_ADDR,
+		.irqen = HW_PINCTRL_IRQEN2_ADDR,
+	},
+	[3] = {
+	       .hw_muxsel = {
+		       HW_PINCTRL_MUXSEL6_ADDR,
+		       HW_PINCTRL_MUXSEL7_ADDR,
+	       },
+	       .hw_drive = {
+			HW_PINCTRL_DRIVE12_ADDR,
+			HW_PINCTRL_DRIVE13_ADDR,
+			HW_PINCTRL_DRIVE14_ADDR,
+			NULL,
+	       },
+	       .hw_pull = HW_PINCTRL_PULL3_ADDR,
+	       .functions = {0x0, 0x1, 0x2, 0x3},
+	       .strengths = {0x0, 0x1, 0x2, 0x3, 0xff},
+	},
+};
+
+static inline struct stmp3xxx_pinmux_bank *
+stmp3xxx_pinmux_bank(unsigned id, unsigned *bank, unsigned *pin)
+{
+	unsigned b, p;
+
+	b = STMP3XXX_PINID_TO_BANK(id);
+	p = STMP3XXX_PINID_TO_PINNUM(id);
+	BUG_ON(b >= NR_BANKS);
+	if (bank)
+		*bank = b;
+	if (pin)
+		*pin = p;
+	return &pinmux_banks[b];
+}
+
+/* Check if requested pin is owned by caller */
+static int stmp3xxx_check_pin(unsigned id, const char *label)
+{
+	unsigned pin;
+	struct stmp3xxx_pinmux_bank *pm = stmp3xxx_pinmux_bank(id, NULL, &pin);
+
+	if (!test_bit(pin, &pm->pin_map)) {
+		printk(KERN_WARNING
+		       "%s: Accessing free pin %x, caller %s\n",
+		       __func__, id, label);
+
+		return -EINVAL;
+	}
+
+	if (label && pm->pin_labels[pin] &&
+	    strcmp(label, pm->pin_labels[pin])) {
+		printk(KERN_WARNING
+		       "%s: Wrong pin owner %x, caller %s owner %s\n",
+		       __func__, id, label, pm->pin_labels[pin]);
+
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void stmp3xxx_pin_strength(unsigned id, enum pin_strength strength,
+		const char *label)
+{
+	struct stmp3xxx_pinmux_bank *pbank;
+	void __iomem *hwdrive;
+	u32 shift, val;
+	u32 bank, pin;
+
+	pbank = stmp3xxx_pinmux_bank(id, &bank, &pin);
+	pr_debug("%s: label %s bank %d pin %d strength %d\n", __func__, label,
+		 bank, pin, strength);
+
+	hwdrive = pbank->hw_drive[pin / HW_DRIVE_PIN_NUM];
+	shift = (pin % HW_DRIVE_PIN_NUM) * HW_DRIVE_PIN_LEN;
+	val = pbank->strengths[strength];
+	if (val == 0xff) {
+		printk(KERN_WARNING
+		       "%s: strength is not supported for bank %d, caller %s",
+		       __func__, bank, label);
+		return;
+	}
+
+	if (stmp3xxx_check_pin(id, label))
+		return;
+
+	pr_debug("%s: writing 0x%x to 0x%p register\n", __func__,
+			val << shift, hwdrive);
+	__raw_writel(HW_DRIVE_PINDRV_MASK << shift, hwdrive + HW_STMP3xxx_CLR);
+	__raw_writel(val << shift, hwdrive + HW_STMP3xxx_SET);
+}
+
+void stmp3xxx_pin_voltage(unsigned id, enum pin_voltage voltage,
+			  const char *label)
+{
+	struct stmp3xxx_pinmux_bank *pbank;
+	void __iomem *hwdrive;
+	u32 shift;
+	u32 bank, pin;
+
+	pbank = stmp3xxx_pinmux_bank(id, &bank, &pin);
+	pr_debug("%s: label %s bank %d pin %d voltage %d\n", __func__, label,
+		 bank, pin, voltage);
+
+	hwdrive = pbank->hw_drive[pin / HW_DRIVE_PIN_NUM];
+	shift = (pin % HW_DRIVE_PIN_NUM) * HW_DRIVE_PIN_LEN;
+
+	if (stmp3xxx_check_pin(id, label))
+		return;
+
+	pr_debug("%s: changing 0x%x bit in 0x%p register\n",
+			__func__, HW_DRIVE_PINV_MASK << shift, hwdrive);
+	if (voltage == PIN_1_8V)
+		__raw_writel(HW_DRIVE_PINV_MASK << shift,
+			     hwdrive + HW_STMP3xxx_CLR);
+	else
+		__raw_writel(HW_DRIVE_PINV_MASK << shift,
+			     hwdrive + HW_STMP3xxx_SET);
+}
+
+void stmp3xxx_pin_pullup(unsigned id, int enable, const char *label)
+{
+	struct stmp3xxx_pinmux_bank *pbank;
+	void __iomem *hwpull;
+	u32 bank, pin;
+
+	pbank = stmp3xxx_pinmux_bank(id, &bank, &pin);
+	pr_debug("%s: label %s bank %d pin %d enable %d\n", __func__, label,
+		 bank, pin, enable);
+
+	hwpull = pbank->hw_pull;
+
+	if (stmp3xxx_check_pin(id, label))
+		return;
+
+	pr_debug("%s: changing 0x%x bit in 0x%p register\n",
+			__func__, 1 << pin, hwpull);
+	__raw_writel(1 << pin,
+		     hwpull + (enable ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR));
+}
+
+int stmp3xxx_request_pin(unsigned id, enum pin_fun fun, const char *label)
+{
+	struct stmp3xxx_pinmux_bank *pbank;
+	u32 bank, pin;
+	int ret = 0;
+
+	pbank = stmp3xxx_pinmux_bank(id, &bank, &pin);
+	pr_debug("%s: label %s bank %d pin %d fun %d\n", __func__, label,
+		 bank, pin, fun);
+
+	if (test_bit(pin, &pbank->pin_map)) {
+		printk(KERN_WARNING
+		       "%s: CONFLICT DETECTED pin %d:%d caller %s owner %s\n",
+		       __func__, bank, pin, label, pbank->pin_labels[pin]);
+		return -EBUSY;
+	}
+
+	set_bit(pin, &pbank->pin_map);
+	pbank->pin_labels[pin] = label;
+
+	stmp3xxx_set_pin_type(id, fun);
+
+	return ret;
+}
+
+void stmp3xxx_set_pin_type(unsigned id, enum pin_fun fun)
+{
+	struct stmp3xxx_pinmux_bank *pbank;
+	void __iomem *hwmux;
+	u32 shift, val;
+	u32 bank, pin;
+
+	pbank = stmp3xxx_pinmux_bank(id, &bank, &pin);
+
+	hwmux = pbank->hw_muxsel[pin / HW_MUXSEL_PIN_NUM];
+	shift = (pin % HW_MUXSEL_PIN_NUM) * HW_MUXSEL_PIN_LEN;
+
+	val = pbank->functions[fun];
+	shift = (pin % HW_MUXSEL_PIN_NUM) * HW_MUXSEL_PIN_LEN;
+	pr_debug("%s: writing 0x%x to 0x%p register\n",
+			__func__, val << shift, hwmux);
+	__raw_writel(HW_MUXSEL_PINFUN_MASK << shift, hwmux + HW_STMP3xxx_CLR);
+	__raw_writel(val << shift, hwmux + HW_STMP3xxx_SET);
+}
+
+void stmp3xxx_release_pin(unsigned id, const char *label)
+{
+	struct stmp3xxx_pinmux_bank *pbank;
+	u32 bank, pin;
+
+	pbank = stmp3xxx_pinmux_bank(id, &bank, &pin);
+	pr_debug("%s: label %s bank %d pin %d\n", __func__, label, bank, pin);
+
+	if (stmp3xxx_check_pin(id, label))
+		return;
+
+	clear_bit(pin, &pbank->pin_map);
+	pbank->pin_labels[pin] = NULL;
+}
+
+int stmp3xxx_request_pin_group(struct pin_group *pin_group, const char *label)
+{
+	struct pin_desc *pin;
+	int p;
+	int err = 0;
+
+	/* Allocate and configure pins */
+	for (p = 0; p < pin_group->nr_pins; p++) {
+		pr_debug("%s: #%d\n", __func__, p);
+		pin = &pin_group->pins[p];
+
+		err = stmp3xxx_request_pin(pin->id, pin->fun, label);
+		if (err)
+			goto out_err;
+
+		stmp3xxx_pin_strength(pin->id, pin->strength, label);
+		stmp3xxx_pin_voltage(pin->id, pin->voltage, label);
+		stmp3xxx_pin_pullup(pin->id, pin->pullup, label);
+	}
+
+	return 0;
+
+out_err:
+	/* Release allocated pins in case of error */
+	while (--p >= 0) {
+		pr_debug("%s: releasing #%d\n", __func__, p);
+		stmp3xxx_release_pin(pin_group->pins[p].id, label);
+	}
+	return err;
+}
+EXPORT_SYMBOL(stmp3xxx_request_pin_group);
+
+void stmp3xxx_release_pin_group(struct pin_group *pin_group, const char *label)
+{
+	struct pin_desc *pin;
+	int p;
+
+	for (p = 0; p < pin_group->nr_pins; p++) {
+		pin = &pin_group->pins[p];
+		stmp3xxx_release_pin(pin->id, label);
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_release_pin_group);
+
+static int stmp3xxx_irq_to_gpio(int irq,
+	struct stmp3xxx_pinmux_bank **bank, unsigned *gpio)
+{
+	struct stmp3xxx_pinmux_bank *pm;
+
+	for (pm = pinmux_banks; pm < pinmux_banks + NR_BANKS; pm++)
+		if (pm->virq <= irq && irq < pm->virq + 32) {
+			*bank = pm;
+			*gpio = irq - pm->virq;
+			return 0;
+		}
+	return -ENOENT;
+}
+
+static int stmp3xxx_set_irqtype(unsigned irq, unsigned type)
+{
+	struct stmp3xxx_pinmux_bank *pm;
+	unsigned gpio;
+	int l, p;
+
+	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		l = 0; p = 1; break;
+	case IRQ_TYPE_EDGE_FALLING:
+		l = 0; p = 0; break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		l = 1; p = 1; break;
+	case IRQ_TYPE_LEVEL_LOW:
+		l = 1; p = 0; break;
+	default:
+		pr_debug("%s: Incorrect GPIO interrupt type 0x%x\n",
+				__func__, type);
+		return -ENXIO;
+	}
+	__raw_writel(1 << gpio,
+		pm->irqlevel + (l ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR));
+	__raw_writel(1 << gpio,
+		pm->irqpolarity + (p ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR));
+	return 0;
+}
+
+static void stmp3xxx_pin_ack_irq(unsigned irq)
+{
+	u32 stat;
+	struct stmp3xxx_pinmux_bank *pm;
+	unsigned gpio;
+
+	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
+	stat = __raw_readl(pm->irqstat) & (1<<gpio);
+	__raw_writel(stat, pm->irqstat + HW_STMP3xxx_CLR);
+}
+
+static void stmp3xxx_pin_mask_irq(unsigned irq)
+{
+	struct stmp3xxx_pinmux_bank *pm;
+	unsigned gpio;
+
+	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
+	__raw_writel(1 << gpio, pm->irqen + HW_STMP3xxx_CLR);
+	__raw_writel(1 << gpio, pm->pin2irq + HW_STMP3xxx_CLR);
+}
+
+static void stmp3xxx_pin_unmask_irq(unsigned irq)
+{
+	struct stmp3xxx_pinmux_bank *pm;
+	unsigned gpio;
+
+	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
+	__raw_writel(1 << gpio, pm->irqen + HW_STMP3xxx_SET);
+	__raw_writel(1 << gpio, pm->pin2irq + HW_STMP3xxx_SET);
+}
+
+static inline
+struct stmp3xxx_pinmux_bank *to_pinmux_bank(struct gpio_chip *chip)
+{
+	return container_of(chip, struct stmp3xxx_pinmux_bank, chip);
+}
+
+static int stmp3xxx_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
+	return pm->virq + offset;
+}
+
+static int stmp3xxx_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
+	unsigned v;
+
+	v = __raw_readl(pm->hw_gpio_read) & (1 << offset);
+	return v ? 1 : 0;
+}
+
+static void stmp3xxx_gpio_set(struct gpio_chip *chip, unsigned offset, int v)
+{
+	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
+
+	__raw_writel(1 << offset, v ? pm->hw_gpio_set : pm->hw_gpio_clr);
+}
+
+static int stmp3xxx_gpio_output(struct gpio_chip *chip, unsigned offset, int v)
+{
+	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
+
+	__raw_writel(1 << offset, pm->hw_gpio_doe + HW_STMP3xxx_SET);
+	stmp3xxx_gpio_set(chip, offset, v);
+	return 0;
+}
+
+static int stmp3xxx_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
+
+	__raw_writel(1 << offset, pm->hw_gpio_doe + HW_STMP3xxx_CLR);
+	return 0;
+}
+
+static int stmp3xxx_gpio_request(struct gpio_chip *chip, unsigned offset)
+{
+	return stmp3xxx_request_pin(chip->base + offset, PIN_GPIO, "gpio");
+}
+
+static void stmp3xxx_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+	stmp3xxx_release_pin(chip->base + offset, "gpio");
+}
+
+static void stmp3xxx_gpio_irq(u32 irq, struct irq_desc *desc)
+{
+	struct stmp3xxx_pinmux_bank *pm = get_irq_data(irq);
+	int gpio_irq = pm->virq;
+	u32 stat = __raw_readl(pm->irqstat);
+
+	while (stat) {
+		if (stat & 1)
+			irq_desc[gpio_irq].handle_irq(gpio_irq,
+				&irq_desc[gpio_irq]);
+		gpio_irq++;
+		stat >>= 1;
+	}
+}
+
+static struct irq_chip gpio_irq_chip = {
+	.ack	= stmp3xxx_pin_ack_irq,
+	.mask	= stmp3xxx_pin_mask_irq,
+	.unmask	= stmp3xxx_pin_unmask_irq,
+	.set_type = stmp3xxx_set_irqtype,
+};
+
+int __init stmp3xxx_pinmux_init(int virtual_irq_start)
+{
+	int b, r = 0;
+	struct stmp3xxx_pinmux_bank *pm;
+	int virq;
+
+	for (b = 0; b < 3; b++) {
+		/* only banks 0,1,2 are allowed to GPIO */
+		pm = pinmux_banks + b;
+		pm->chip.base = 32 * b;
+		pm->chip.ngpio = 32;
+		pm->chip.owner = THIS_MODULE;
+		pm->chip.can_sleep = 1;
+		pm->chip.exported = 1;
+		pm->chip.to_irq = stmp3xxx_gpio_to_irq;
+		pm->chip.direction_input = stmp3xxx_gpio_input;
+		pm->chip.direction_output = stmp3xxx_gpio_output;
+		pm->chip.get = stmp3xxx_gpio_get;
+		pm->chip.set = stmp3xxx_gpio_set;
+		pm->chip.request = stmp3xxx_gpio_request;
+		pm->chip.free = stmp3xxx_gpio_free;
+		pm->virq = virtual_irq_start + b * 32;
+
+		for (virq = pm->virq; virq < pm->virq; virq++) {
+			gpio_irq_chip.mask(virq);
+			set_irq_chip(virq, &gpio_irq_chip);
+			set_irq_handler(virq, handle_level_irq);
+			set_irq_flags(virq, IRQF_VALID);
+		}
+		r = gpiochip_add(&pm->chip);
+		if (r < 0)
+			break;
+		set_irq_chained_handler(pm->irq, stmp3xxx_gpio_irq);
+		set_irq_data(pm->irq, pm);
+	}
+	return r;
+}
+
+MODULE_AUTHOR("Vladislav Buzov");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c
new file mode 100644
index 0000000..c916068
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/timer.c
@@ -0,0 +1,172 @@
+/*
+ * System timer for Freescale STMP37XX/STMP378X
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include <asm/mach/time.h>
+#include <mach/stmp3xxx.h>
+#include <mach/regs-timrot.h>
+
+static irqreturn_t
+stmp3xxx_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *c = dev_id;
+
+	if (HW_TIMROT_TIMCTRLn_RD(0) & (1<<15)) {
+		HW_TIMROT_TIMCTRLn_CLR(0, (1<<15));
+		c->event_handler(c);
+	} else if (HW_TIMROT_TIMCTRLn_RD(1) & (1<<15)) {
+		HW_TIMROT_TIMCTRLn_CLR(1, (1<<15));
+		HW_TIMROT_TIMCTRLn_CLR(1, BM_TIMROT_TIMCTRLn_IRQ_EN);
+		HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static cycle_t stmp3xxx_clock_read(void)
+{
+	return ~((HW_TIMROT_TIMCOUNTn_RD(1) & 0xFFFF0000) >> 16);
+}
+
+static int
+stmp3xxx_timrot_set_next_event(unsigned long delta,
+		struct clock_event_device *dev)
+{
+	HW_TIMROT_TIMCOUNTn_WR(0, delta); /* reload */
+	return 0;
+}
+
+static void
+stmp3xxx_timrot_set_mode(enum clock_event_mode mode,
+		struct clock_event_device *dev)
+{
+}
+
+static struct clock_event_device ckevt_timrot = {
+	.name		= "timrot",
+	.features	= CLOCK_EVT_FEAT_ONESHOT,
+	.shift		= 32,
+	.set_next_event	= stmp3xxx_timrot_set_next_event,
+	.set_mode	= stmp3xxx_timrot_set_mode,
+};
+
+static struct clocksource cksrc_stmp3xxx = {
+	.name           = "cksrc_stmp3xxx",
+	.rating         = 250,
+	.read           = stmp3xxx_clock_read,
+	.mask           = CLOCKSOURCE_MASK(16),
+	.shift          = 10,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static struct irqaction stmp3xxx_timer_irq = {
+	.name		= "stmp3xxx_timer",
+	.flags		= IRQF_DISABLED | IRQF_TIMER,
+	.handler	= stmp3xxx_timer_interrupt,
+	.dev_id		= &ckevt_timrot,
+};
+
+
+/*
+ * Set up timer interrupt, and return the current time in seconds.
+ */
+static void __init stmp3xxx_init_timer(void)
+{
+	cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
+				cksrc_stmp3xxx.shift);
+	ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
+				ckevt_timrot.shift);
+	ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot);
+	ckevt_timrot.max_delta_ns = clockevent_delta2ns(0xFFF, &ckevt_timrot);
+	ckevt_timrot.cpumask = cpumask_of(0);
+
+	HW_TIMROT_ROTCTRL_CLR(BM_TIMROT_ROTCTRL_SFTRST |
+				BM_TIMROT_ROTCTRL_CLKGATE);
+	HW_TIMROT_TIMCOUNTn_WR(0, 0);
+	HW_TIMROT_TIMCOUNTn_WR(1, 0);
+
+	HW_TIMROT_TIMCTRLn_WR(0,
+			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
+			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
+			       BM_TIMROT_TIMCTRLn_RELOAD |
+			       BM_TIMROT_TIMCTRLn_UPDATE |
+			       BM_TIMROT_TIMCTRLn_IRQ_EN));
+	HW_TIMROT_TIMCTRLn_WR(1,
+			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
+			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
+			       BM_TIMROT_TIMCTRLn_RELOAD |
+			       BM_TIMROT_TIMCTRLn_UPDATE));
+
+	HW_TIMROT_TIMCOUNTn_WR(0, CLOCK_TICK_RATE / HZ - 1);
+	HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); /* reload */
+
+	setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
+
+	clocksource_register(&cksrc_stmp3xxx);
+	clockevents_register_device(&ckevt_timrot);
+}
+
+#ifdef CONFIG_PM
+
+void stmp3xxx_suspend_timer(void)
+{
+	HW_TIMROT_TIMCTRLn_CLR(0, BM_TIMROT_TIMCTRLn_IRQ_EN);
+	HW_TIMROT_TIMCTRLn_CLR(0, (1<<15));
+	HW_TIMROT_ROTCTRL_SET(BM_TIMROT_ROTCTRL_CLKGATE);
+}
+
+void stmp3xxx_resume_timer(void)
+{
+	HW_TIMROT_ROTCTRL_CLR(BM_TIMROT_ROTCTRL_SFTRST |
+				BM_TIMROT_ROTCTRL_CLKGATE);
+
+
+	HW_TIMROT_TIMCTRLn_WR(0,
+			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
+			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
+			       BM_TIMROT_TIMCTRLn_UPDATE |
+			       BM_TIMROT_TIMCTRLn_IRQ_EN));
+	HW_TIMROT_TIMCTRLn_WR(1,
+			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
+			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
+			       BM_TIMROT_TIMCTRLn_RELOAD |
+			       BM_TIMROT_TIMCTRLn_UPDATE));
+
+	HW_TIMROT_TIMCOUNTn_WR(0, CLOCK_TICK_RATE / HZ - 1);
+	HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); /* reload */
+}
+
+#else
+
+#define stmp3xxx_suspend_timer	NULL
+#define	stmp3xxx_resume_timer	NULL
+
+#endif	/* CONFIG_PM */
+
+struct sys_timer stmp3xxx_timer = {
+	.init		= stmp3xxx_init_timer,
+	.suspend	= stmp3xxx_suspend_timer,
+	.resume		= stmp3xxx_resume_timer,
+};
-- 
cgit v0.10.2


From 45d9108011b9dfb4fccd6c258290d2185145709b Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:57:05 +0100
Subject: [ARM] 5465/1: Freescale STMP platform support [7/10]

Sources: support for 37xx boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp37xx/Makefile b/arch/arm/mach-stmp37xx/Makefile
new file mode 100644
index 0000000..57deffd
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_ARCH_STMP37XX) += stmp37xx.o
+obj-$(CONFIG_MACH_STMP37XX) += stmp37xx_devb.o
diff --git a/arch/arm/mach-stmp37xx/Makefile.boot b/arch/arm/mach-stmp37xx/Makefile.boot
new file mode 100644
index 0000000..1568ad4
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/Makefile.boot
@@ -0,0 +1,3 @@
+   zreladdr-y	:= 0x40008000
+params_phys-y	:= 0x40000100
+initrd_phys-y	:= 0x40800000
diff --git a/arch/arm/mach-stmp37xx/stmp37xx.c b/arch/arm/mach-stmp37xx/stmp37xx.c
new file mode 100644
index 0000000..83a41c9
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/stmp37xx.c
@@ -0,0 +1,217 @@
+/*
+ * Freescale STMP37XX platform support
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/map.h>
+#include <asm/mach/time.h>
+
+#include <mach/stmp3xxx.h>
+#include <mach/dma.h>
+
+#include <mach/regs-icoll.h>
+#include <mach/regs-apbh.h>
+#include <mach/regs-apbx.h>
+#include "stmp37xx.h"
+
+/*
+ * IRQ handling
+ */
+static void stmp37xx_ack_irq(unsigned int irq)
+{
+	/* Disable IRQ */
+	HW_ICOLL_PRIORITYn_CLR(irq / 4, 0x04 << ((irq % 4) * 8));
+
+	/* ACK current interrupt */
+	HW_ICOLL_LEVELACK_WR(1);
+
+	/* Barrier */
+	(void) HW_ICOLL_STAT_RD();
+}
+
+static void stmp37xx_mask_irq(unsigned int irq)
+{
+	/* IRQ disable */
+	HW_ICOLL_PRIORITYn_CLR(irq / 4, 0x04 << ((irq % 4) * 8));
+}
+
+static void stmp37xx_unmask_irq(unsigned int irq)
+{
+	/* IRQ enable */
+	HW_ICOLL_PRIORITYn_SET(irq / 4, 0x04 << ((irq % 4) * 8));
+}
+
+static struct irq_chip stmp37xx_chip = {
+	.ack	= stmp37xx_ack_irq,
+	.mask	= stmp37xx_mask_irq,
+	.unmask = stmp37xx_unmask_irq,
+};
+
+void __init stmp37xx_init_irq(void)
+{
+	stmp3xxx_init_irq(&stmp37xx_chip);
+}
+
+/*
+ * DMA interrupt handling
+ */
+void stmp3xxx_arch_dma_enable_interrupt(int channel)
+{
+	int dmabus = channel / 16;
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL1_SET(1 << (8 + (channel % 16)));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CTRL1_SET(1 << (8 + (channel % 16)));
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_enable_interrupt);
+
+void stmp3xxx_arch_dma_clear_interrupt(int channel)
+{
+	int dmabus = channel / 16;
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL1_CLR(1 << (channel % 16));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CTRL1_CLR(1 << (channel % 16));
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_clear_interrupt);
+
+int stmp3xxx_arch_dma_is_interrupt(int channel)
+{
+	int r = 0;
+
+	int dmabus = channel / 16;
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		r = HW_APBH_CTRL1_RD() & (1 << (channel % 16));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		r = HW_APBX_CTRL1_RD() & (1 << (channel % 16));
+		break;
+	}
+	return r;
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_is_interrupt);
+
+void stmp3xxx_arch_dma_reset_channel(int channel)
+{
+	int dmabus = channel / 16;
+	unsigned chbit = 1 << (channel % 16);
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		/* Reset channel and wait for it to complete */
+		HW_APBH_CTRL0_SET(chbit << BP_APBH_CTRL0_RESET_CHANNEL);
+		while (HW_APBH_CTRL0_RD() &
+		       (chbit << BP_APBH_CTRL0_RESET_CHANNEL))
+			continue;
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		/* Reset channel and wait for it to complete */
+		HW_APBX_CTRL0_SET(chbit << BP_APBX_CTRL0_RESET_CHANNEL);
+		while (HW_APBX_CTRL0_RD() &
+		       (chbit << BP_APBX_CTRL0_RESET_CHANNEL))
+			continue;
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_reset_channel);
+
+void stmp3xxx_arch_dma_freeze(int channel)
+{
+	int dmabus = channel / 16;
+	unsigned chbit = 1 << (channel % 16);
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL0_SET(1<<chbit);
+		break;
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CTRL0_SET(1<<chbit);
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_freeze);
+
+void stmp3xxx_arch_dma_unfreeze(int channel)
+{
+	int dmabus = channel / 16;
+	unsigned chbit = 1 << (channel % 16);
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL0_CLR(1<<chbit);
+		break;
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CTRL0_CLR(1<<chbit);
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_unfreeze);
+
+/*
+ * The registers are all very closely mapped, so we might as well map them all
+ * with a single mapping
+ *
+ * Logical      Physical
+ * f0000000	80000000	On-chip registers
+ * f1000000	00000000	256k on-chip SRAM
+ */
+static struct map_desc stmp37xx_io_desc[] __initdata = {
+	{
+		.virtual	= (u32)STMP3XXX_REGS_BASE,
+		.pfn		= __phys_to_pfn(STMP3XXX_REGS_PHBASE),
+		.length		= SZ_1M,
+		.type		= MT_DEVICE
+	},
+	{
+		.virtual	= (u32)STMP3XXX_OCRAM_BASE,
+		.pfn		= __phys_to_pfn(STMP3XXX_OCRAM_PHBASE),
+		.length		= STMP3XXX_OCRAM_SIZE,
+		.type		= MT_DEVICE,
+	},
+};
+
+void __init stmp37xx_map_io(void)
+{
+	iotable_init(stmp37xx_io_desc, ARRAY_SIZE(stmp37xx_io_desc));
+}
diff --git a/arch/arm/mach-stmp37xx/stmp37xx.h b/arch/arm/mach-stmp37xx/stmp37xx.h
new file mode 100644
index 0000000..0b75fb7
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/stmp37xx.h
@@ -0,0 +1,24 @@
+/*
+ * Freescale STMP37XX/STMP378X internal functions and data declarations
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __MACH_STMP37XX_H
+#define __MACH_STMP37XX_H
+
+void stmp37xx_map_io(void);
+void stmp37xx_init_irq(void);
+
+#endif /* __MACH_STMP37XX_H */
diff --git a/arch/arm/mach-stmp37xx/stmp37xx_devb.c b/arch/arm/mach-stmp37xx/stmp37xx_devb.c
new file mode 100644
index 0000000..adfbdc7
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/stmp37xx_devb.c
@@ -0,0 +1,83 @@
+/*
+ * Freescale STMP37XX development board support
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+#include <mach/stmp3xxx.h>
+#include <mach/pins.h>
+#include <mach/pinmux.h>
+#include "stmp37xx.h"
+
+/*
+ * List of STMP37xx development board specific devices
+ */
+static struct platform_device *stmp37xx_devb_devices[] = {
+	&stmp3xxx_dbguart,
+};
+
+static struct pin_desc dbguart_pins_0[] = {
+	{ PINID_PWM0, PIN_FUN3, },
+	{ PINID_PWM1, PIN_FUN3, },
+};
+
+static struct pin_group dbguart_pins[] = {
+	[0] = {
+		.pins		= dbguart_pins_0,
+		.nr_pins	= ARRAY_SIZE(dbguart_pins_0),
+	},
+};
+
+static int dbguart_pins_control(int id, int request)
+{
+	int r = 0;
+
+	if (request)
+		r = stmp3xxx_request_pin_group(&dbguart_pins[id], "debug uart");
+	else
+		stmp3xxx_release_pin_group(&dbguart_pins[id], "debug uart");
+	return r;
+}
+
+
+static void __init stmp37xx_devb_init(void)
+{
+	stmp3xxx_pinmux_init(NR_REAL_IRQS);
+
+	/* Init STMP3xxx platform */
+	stmp3xxx_init();
+
+	stmp3xxx_dbguart.dev.platform_data = dbguart_pins_control;
+	/* Add STMP37xx development board devices */
+	platform_add_devices(stmp37xx_devb_devices,
+			ARRAY_SIZE(stmp37xx_devb_devices));
+}
+
+MACHINE_START(STMP37XX, "STMP37XX")
+	.phys_io	= 0x80000000,
+	.io_pg_offst	= ((0xf0000000) >> 18) & 0xfffc,
+	.boot_params	= 0x40000100,
+	.map_io		= stmp37xx_map_io,
+	.init_irq	= stmp37xx_init_irq,
+	.timer		= &stmp3xxx_timer,
+	.init_machine	= stmp37xx_devb_init,
+MACHINE_END
-- 
cgit v0.10.2


From bc19d892a14cbb31d838813b2225e262a6c01341 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Wed, 22 Apr 2009 23:57:28 +0100
Subject: [ARM] 5464/1: Freescale STMP platform support [7/10]

Sources: support for 378x boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/Makefile b/arch/arm/mach-stmp378x/Makefile
new file mode 100644
index 0000000..d156f76
--- /dev/null
+++ b/arch/arm/mach-stmp378x/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_ARCH_STMP378X) += stmp378x.o
+obj-$(CONFIG_MACH_STMP378X) += stmp378x_devb.o
diff --git a/arch/arm/mach-stmp378x/Makefile.boot b/arch/arm/mach-stmp378x/Makefile.boot
new file mode 100644
index 0000000..1568ad4
--- /dev/null
+++ b/arch/arm/mach-stmp378x/Makefile.boot
@@ -0,0 +1,3 @@
+   zreladdr-y	:= 0x40008000
+params_phys-y	:= 0x40000100
+initrd_phys-y	:= 0x40800000
diff --git a/arch/arm/mach-stmp378x/stmp378x.c b/arch/arm/mach-stmp378x/stmp378x.c
new file mode 100644
index 0000000..f156ec7
--- /dev/null
+++ b/arch/arm/mach-stmp378x/stmp378x.c
@@ -0,0 +1,225 @@
+/*
+ * Freescale STMP378X platform support
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+
+#include <asm/dma.h>
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/map.h>
+#include <asm/mach/time.h>
+
+#include <mach/pins.h>
+#include <mach/pinmux.h>
+#include <mach/dma.h>
+#include <mach/hardware.h>
+#include <mach/system.h>
+#include <mach/platform.h>
+#include <mach/stmp3xxx.h>
+#include <mach/regs-icoll.h>
+#include <mach/regs-apbh.h>
+#include <mach/regs-apbx.h>
+
+#include "stmp378x.h"
+/*
+ * IRQ handling
+ */
+static void stmp378x_ack_irq(unsigned int irq)
+{
+	/* Tell ICOLL to release IRQ line */
+	HW_ICOLL_VECTOR_WR(0x0);
+
+	/* ACK current interrupt */
+	HW_ICOLL_LEVELACK_WR(BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0);
+
+	/* Barrier */
+	(void) HW_ICOLL_STAT_RD();
+}
+
+static void stmp378x_mask_irq(unsigned int irq)
+{
+	/* IRQ disable */
+	HW_ICOLL_INTERRUPTn_CLR(irq, BM_ICOLL_INTERRUPTn_ENABLE);
+}
+
+static void stmp378x_unmask_irq(unsigned int irq)
+{
+	/* IRQ enable */
+	HW_ICOLL_INTERRUPTn_SET(irq, BM_ICOLL_INTERRUPTn_ENABLE);
+}
+
+static struct irq_chip stmp378x_chip = {
+	.ack	= stmp378x_ack_irq,
+	.mask	= stmp378x_mask_irq,
+	.unmask = stmp378x_unmask_irq,
+};
+
+void __init stmp378x_init_irq(void)
+{
+	stmp3xxx_init_irq(&stmp378x_chip);
+}
+
+/*
+ * DMA interrupt handling
+ */
+void stmp3xxx_arch_dma_enable_interrupt(int channel)
+{
+	int dmabus = channel / 16;
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL1_SET(1 << (16 + (channel % 16)));
+		HW_APBH_CTRL2_SET(1 << (16 + (channel % 16)));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CTRL1_SET(1 << (16 + (channel % 16)));
+		HW_APBX_CTRL2_SET(1 << (16 + (channel % 16)));
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_enable_interrupt);
+
+void stmp3xxx_arch_dma_clear_interrupt(int channel)
+{
+	int dmabus = channel / 16;
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL1_CLR(1 << (channel % 16));
+		HW_APBH_CTRL2_CLR(1 << (channel % 16));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CTRL1_CLR(1 << (channel % 16));
+		HW_APBX_CTRL2_CLR(1 << (channel % 16));
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_clear_interrupt);
+
+int stmp3xxx_arch_dma_is_interrupt(int channel)
+{
+	int dmabus = channel / 16;
+	int r = 0;
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		r = HW_APBH_CTRL1_RD() & (1 << (channel % 16));
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		r = HW_APBX_CTRL1_RD() & (1 << (channel % 16));
+		break;
+	}
+	return r;
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_is_interrupt);
+
+void stmp3xxx_arch_dma_reset_channel(int channel)
+{
+	int dmabus = channel / 16;
+	unsigned chbit = 1 << (channel % 16);
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		/* Reset channel and wait for it to complete */
+		HW_APBH_CTRL0_SET(chbit <<
+				 BP_APBH_CTRL0_RESET_CHANNEL);
+		while (HW_APBH_CTRL0_RD() &
+		       (chbit << BP_APBH_CTRL0_RESET_CHANNEL))
+			continue;
+		break;
+
+	case STMP3XXX_BUS_APBX:
+		/* Reset channel and wait for it to complete */
+		HW_APBX_CHANNEL_CTRL_SET(
+			BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(chbit));
+		while (HW_APBX_CHANNEL_CTRL_RD() &
+			BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(chbit))
+			continue;
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_reset_channel);
+
+void stmp3xxx_arch_dma_freeze(int channel)
+{
+	int dmabus = channel / 16;
+	unsigned chbit = 1 << (channel % 16);
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL0_SET(1<<chbit);
+		break;
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CHANNEL_CTRL_SET(1<<chbit);
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_freeze);
+
+void stmp3xxx_arch_dma_unfreeze(int channel)
+{
+	int dmabus = channel / 16;
+	unsigned chbit = 1 << (channel % 16);
+
+	switch (dmabus) {
+	case STMP3XXX_BUS_APBH:
+		HW_APBH_CTRL0_CLR(1<<chbit);
+		break;
+	case STMP3XXX_BUS_APBX:
+		HW_APBX_CHANNEL_CTRL_CLR(1<<chbit);
+		break;
+	}
+}
+EXPORT_SYMBOL(stmp3xxx_arch_dma_unfreeze);
+
+/*
+ * The registers are all very closely mapped, so we might as well map them all
+ * with a single mapping
+ *
+ * Logical      Physical
+ * f0000000	80000000	On-chip registers
+ * f1000000	00000000	256k on-chip SRAM
+ */
+
+static struct map_desc stmp378x_io_desc[] __initdata = {
+	{
+		.virtual	= (u32)STMP3XXX_REGS_BASE,
+		.pfn		= __phys_to_pfn(STMP3XXX_REGS_PHBASE),
+		.length		= STMP3XXX_REGS_SIZE,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= (u32)STMP3XXX_OCRAM_BASE,
+		.pfn		= __phys_to_pfn(STMP3XXX_OCRAM_PHBASE),
+		.length		= STMP3XXX_OCRAM_SIZE,
+		.type		= MT_DEVICE,
+	},
+};
+
+void __init stmp378x_map_io(void)
+{
+	iotable_init(stmp378x_io_desc, ARRAY_SIZE(stmp378x_io_desc));
+}
diff --git a/arch/arm/mach-stmp378x/stmp378x.h b/arch/arm/mach-stmp378x/stmp378x.h
new file mode 100644
index 0000000..473de64
--- /dev/null
+++ b/arch/arm/mach-stmp378x/stmp378x.h
@@ -0,0 +1,24 @@
+/*
+ * Freescale STMP37XX/STMP378X internal functions and data declarations
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#ifndef __MACH_STMP378X_H
+#define __MACH_STMP378X_H
+
+void stmp378x_map_io(void);
+void stmp378x_init_irq(void);
+
+#endif /* __MACH_STMP378X_COMMON_H */
diff --git a/arch/arm/mach-stmp378x/stmp378x_devb.c b/arch/arm/mach-stmp378x/stmp378x_devb.c
new file mode 100644
index 0000000..bc643f6
--- /dev/null
+++ b/arch/arm/mach-stmp378x/stmp378x_devb.c
@@ -0,0 +1,80 @@
+/*
+ * Freescale STMP378X development board support
+ *
+ * Embedded Alley Solutions, Inc <source@embeddedalley.com>
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+#include <mach/pins.h>
+#include <mach/pinmux.h>
+#include <mach/stmp3xxx.h>
+
+#include "stmp378x.h"
+
+static struct platform_device *devices[] = {
+	&stmp3xxx_dbguart,
+};
+
+static struct pin_desc dbguart_pins_0[] = {
+	{ PINID_PWM0, PIN_FUN3, },
+	{ PINID_PWM1, PIN_FUN3, },
+};
+
+static struct pin_group dbguart_pins[] = {
+	[0] = {
+		.pins		= dbguart_pins_0,
+		.nr_pins	= ARRAY_SIZE(dbguart_pins_0),
+	},
+};
+
+static int dbguart_pins_control(int id, int request)
+{
+	int r = 0;
+
+	if (request)
+		r = stmp3xxx_request_pin_group(&dbguart_pins[id], "debug uart");
+	else
+		stmp3xxx_release_pin_group(&dbguart_pins[id], "debug uart");
+	return r;
+}
+
+static void __init stmp378x_devb_init(void)
+{
+	stmp3xxx_pinmux_init(NR_REAL_IRQS);
+
+	/* init stmp3xxx platform */
+	stmp3xxx_init();
+
+	stmp3xxx_dbguart.dev.platform_data = dbguart_pins_control;
+
+	/* add board's devices */
+	platform_add_devices(devices, ARRAY_SIZE(devices));
+}
+
+MACHINE_START(STMP378X, "STMP378X")
+	.phys_io	= 0x80000000,
+	.io_pg_offst	= ((0xf0000000) >> 18) & 0xfffc,
+	.boot_params	= 0x40000100,
+	.map_io		= stmp378x_map_io,
+	.init_irq	= stmp378x_init_irq,
+	.timer		= &stmp3xxx_timer,
+	.init_machine	= stmp378x_devb_init,
+MACHINE_END
-- 
cgit v0.10.2


From bf155f8defdb5be5da0653279327bbd3801a3ca9 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Thu, 23 Apr 2009 00:00:01 +0100
Subject: [ARM] 5463/1: Freescale STMP platform support [10/10]

Default configs for STMP3xxx boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/configs/stmp378x_defconfig b/arch/arm/configs/stmp378x_defconfig
new file mode 100644
index 0000000..44461f1
--- /dev/null
+++ b/arch/arm/configs/stmp378x_defconfig
@@ -0,0 +1,1141 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc2
+# Thu Apr 23 02:44:13 2009
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION="-default"
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_INITRAMFS_ROOT_UID=0
+CONFIG_INITRAMFS_ROOT_GID=0
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_INITRAMFS_COMPRESSION_NONE is not set
+CONFIG_INITRAMFS_COMPRESSION_GZIP=y
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+CONFIG_TRACEPOINTS=y
+CONFIG_MARKERS=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_GEMINI is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
+# CONFIG_ARCH_OMAP is not set
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_W90X900 is not set
+CONFIG_ARCH_STMP3XXX=y
+
+#
+# Freescale STMP3xxx implementations
+#
+# CONFIG_ARCH_STMP37XX is not set
+CONFIG_ARCH_STMP378X=y
+# CONFIG_MACH_STMP37XX is not set
+CONFIG_MACH_STMP378X=y
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_ARM926T=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5TJ=y
+CONFIG_CPU_PABRT_NOIFAR=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_COPY_V4WB=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_CPU_ICACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_WRITETHROUGH is not set
+# CONFIG_CPU_CACHE_ROUND_ROBIN is not set
+# CONFIG_OUTER_CACHE is not set
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PREEMPT=y
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+CONFIG_HIGHMEM=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="console=ttySDBG0,115200 mem=32M"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+# CONFIG_FPE_NWFPE is not set
+# CONFIG_FPE_FASTFPE is not set
+# CONFIG_VFP is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETLABEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+# CONFIG_NET_SCH_CBQ is not set
+# CONFIG_NET_SCH_HTB is not set
+# CONFIG_NET_SCH_HFSC is not set
+# CONFIG_NET_SCH_PRIO is not set
+# CONFIG_NET_SCH_MULTIQ is not set
+# CONFIG_NET_SCH_RED is not set
+# CONFIG_NET_SCH_SFQ is not set
+# CONFIG_NET_SCH_TEQL is not set
+# CONFIG_NET_SCH_TBF is not set
+# CONFIG_NET_SCH_GRED is not set
+# CONFIG_NET_SCH_DSMARK is not set
+# CONFIG_NET_SCH_NETEM is not set
+# CONFIG_NET_SCH_DRR is not set
+
+#
+# Classification
+#
+# CONFIG_NET_CLS_BASIC is not set
+# CONFIG_NET_CLS_TCINDEX is not set
+# CONFIG_NET_CLS_ROUTE4 is not set
+# CONFIG_NET_CLS_FW is not set
+# CONFIG_NET_CLS_U32 is not set
+# CONFIG_NET_CLS_RSVP is not set
+# CONFIG_NET_CLS_RSVP6 is not set
+# CONFIG_NET_CLS_FLOW is not set
+# CONFIG_NET_EMATCH is not set
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_SCH_FIFO=y
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_DROP_MONITOR is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_FIB_RULES=y
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_STANDALONE is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+# CONFIG_MTD_PARTITIONS is not set
+# CONFIG_MTD_TESTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+# CONFIG_MTD_BLKDEVS is not set
+# CONFIG_MTD_BLOCK is not set
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+# CONFIG_MTD_NAND_GPIO is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTD_UBI_BEB_RESERVE=1
+CONFIG_MTD_UBI_GLUEBI=y
+
+#
+# UBI debugging options
+#
+# CONFIG_MTD_UBI_DEBUG is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=y
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=4
+CONFIG_BLK_DEV_RAM_SIZE=6144
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_NETDEVICES is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+CONFIG_INPUT_POLLDEV=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=320
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=240
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879 is not set
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_UINPUT is not set
+# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_LIBPS2 is not set
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_DEBUG_GPIO=y
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+CONFIG_VIDEO_DEV=y
+CONFIG_VIDEO_V4L2_COMMON=y
+# CONFIG_VIDEO_ALLOW_V4L1 is not set
+# CONFIG_VIDEO_V4L1_COMPAT is not set
+# CONFIG_DVB_CORE is not set
+CONFIG_VIDEO_MEDIA=y
+
+#
+# Multimedia drivers
+#
+# CONFIG_MEDIA_ATTACH is not set
+CONFIG_VIDEO_V4L2=y
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set
+
+#
+# Encoders/decoders and other helper chips
+#
+
+#
+# Audio decoders
+#
+
+#
+# RDS decoders
+#
+
+#
+# Video decoders
+#
+
+#
+# Video and audio decoders
+#
+
+#
+# MPEG video encoders
+#
+# CONFIG_VIDEO_CX2341X is not set
+
+#
+# Video encoders
+#
+
+#
+# Video improvement chips
+#
+# CONFIG_VIDEO_VIVI is not set
+# CONFIG_SOC_CAMERA is not set
+# CONFIG_RADIO_ADAPTERS is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+# CONFIG_FB_CFB_FILLRECT is not set
+# CONFIG_FB_CFB_COPYAREA is not set
+# CONFIG_FB_CFB_IMAGEBLIT is not set
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_LCD_ILI9320 is not set
+# CONFIG_LCD_PLATFORM is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_GENERIC=y
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_CONFIGFS_FS=m
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_SELFTEST=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_DEBUG_SLAB=y
+CONFIG_DEBUG_SLAB_LEAK=y
+CONFIG_DEBUG_PREEMPT=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_PI_LIST=y
+# CONFIG_RT_MUTEX_TESTER is not set
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_LOCKDEP is not set
+CONFIG_TRACE_IRQFLAGS=y
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
+CONFIG_DEBUG_KOBJECT=y
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_PAGE_POISONING is not set
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+CONFIG_FUNCTION_TRACER=y
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+CONFIG_CONTEXT_SWITCH_TRACER=y
+# CONFIG_EVENT_TRACER is not set
+CONFIG_BOOT_TRACER=y
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+CONFIG_STACK_TRACER=y
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_ARM_UNWIND=y
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_LL is not set
+
+#
+# Security options
+#
+CONFIG_KEYS=y
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_NETWORK is not set
+# CONFIG_SECURITY_PATH is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0
+# CONFIG_SECURITY_TOMOYO is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+CONFIG_CRYPTO_TEST=m
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=y
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+CONFIG_CRYPTO_AES=m
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+CONFIG_CRYPTO_DEFLATE=y
+# CONFIG_CRYPTO_ZLIB is not set
+CONFIG_CRYPTO_LZO=y
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_CRC_CCITT=m
+CONFIG_CRC16=y
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_COMPRESS=y
+CONFIG_LZO_DECOMPRESS=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/arm/configs/stmp37xx_defconfig b/arch/arm/configs/stmp37xx_defconfig
new file mode 100644
index 0000000..401279d
--- /dev/null
+++ b/arch/arm/configs/stmp37xx_defconfig
@@ -0,0 +1,1002 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.29.1
+# Mon Apr 20 04:41:26 2009
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION="-default"
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_INITRAMFS_ROOT_UID=0
+CONFIG_INITRAMFS_ROOT_GID=0
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+CONFIG_TRACEPOINTS=y
+CONFIG_MARKERS=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
+# CONFIG_ARCH_OMAP is not set
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_W90X900 is not set
+CONFIG_ARCH_STMP3XXX=y
+
+#
+# Freescale STMP3xxx implementations
+#
+CONFIG_ARCH_STMP37XX=y
+# CONFIG_ARCH_STMP378X is not set
+CONFIG_MACH_STMP37XX=y
+# CONFIG_MACH_STMP378X is not set
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_ARM926T=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5TJ=y
+CONFIG_CPU_PABRT_NOIFAR=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_COPY_V4WB=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_CPU_ICACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_WRITETHROUGH is not set
+# CONFIG_CPU_CACHE_ROUND_ROBIN is not set
+# CONFIG_OUTER_CACHE is not set
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PREEMPT=y
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="console=ttySDBG0,115200 mem=32M lcd_panel=lms350 rdinit=/bin/sh ignore_loglevel"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+# CONFIG_FPE_NWFPE is not set
+# CONFIG_FPE_FASTFPE is not set
+# CONFIG_VFP is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_COMPAT_NET_DEV_OPS=y
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETLABEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+# CONFIG_NET_SCH_CBQ is not set
+# CONFIG_NET_SCH_HTB is not set
+# CONFIG_NET_SCH_HFSC is not set
+# CONFIG_NET_SCH_PRIO is not set
+# CONFIG_NET_SCH_MULTIQ is not set
+# CONFIG_NET_SCH_RED is not set
+# CONFIG_NET_SCH_SFQ is not set
+# CONFIG_NET_SCH_TEQL is not set
+# CONFIG_NET_SCH_TBF is not set
+# CONFIG_NET_SCH_GRED is not set
+# CONFIG_NET_SCH_DSMARK is not set
+# CONFIG_NET_SCH_NETEM is not set
+# CONFIG_NET_SCH_DRR is not set
+
+#
+# Classification
+#
+# CONFIG_NET_CLS_BASIC is not set
+# CONFIG_NET_CLS_TCINDEX is not set
+# CONFIG_NET_CLS_ROUTE4 is not set
+# CONFIG_NET_CLS_FW is not set
+# CONFIG_NET_CLS_U32 is not set
+# CONFIG_NET_CLS_RSVP is not set
+# CONFIG_NET_CLS_RSVP6 is not set
+# CONFIG_NET_CLS_FLOW is not set
+# CONFIG_NET_EMATCH is not set
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_SCH_FIFO=y
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
+CONFIG_FIB_RULES=y
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_STANDALONE is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=y
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=4
+CONFIG_BLK_DEV_RAM_SIZE=6144
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_NETDEVICES is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+CONFIG_INPUT_POLLDEV=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=320
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=240
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_UINPUT is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_LIBPS2 is not set
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_STMP_DBG is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_DEBUG_GPIO=y
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+CONFIG_VIDEO_DEV=y
+CONFIG_VIDEO_V4L2_COMMON=y
+# CONFIG_VIDEO_ALLOW_V4L1 is not set
+# CONFIG_VIDEO_V4L1_COMPAT is not set
+# CONFIG_DVB_CORE is not set
+CONFIG_VIDEO_MEDIA=y
+
+#
+# Multimedia drivers
+#
+# CONFIG_MEDIA_ATTACH is not set
+CONFIG_VIDEO_V4L2=y
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set
+
+#
+# Encoders/decoders and other helper chips
+#
+
+#
+# Audio decoders
+#
+
+#
+# Video decoders
+#
+
+#
+# Video and audio decoders
+#
+
+#
+# MPEG video encoders
+#
+# CONFIG_VIDEO_CX2341X is not set
+
+#
+# Video encoders
+#
+
+#
+# Video improvement chips
+#
+# CONFIG_VIDEO_VIVI is not set
+# CONFIG_SOC_CAMERA is not set
+# CONFIG_RADIO_ADAPTERS is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+# CONFIG_FB_CFB_FILLRECT is not set
+# CONFIG_FB_CFB_COPYAREA is not set
+# CONFIG_FB_CFB_IMAGEBLIT is not set
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_LCD_ILI9320 is not set
+# CONFIG_LCD_PLATFORM is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_GENERIC=y
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_CONFIGFS_FS=m
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_PREEMPT=y
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+
+#
+# Tracers
+#
+CONFIG_FUNCTION_TRACER=y
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+CONFIG_CONTEXT_SWITCH_TRACER=y
+CONFIG_BOOT_TRACER=y
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+CONFIG_STACK_TRACER=y
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_DEBUG_LL=y
+# CONFIG_DEBUG_ICEDCC is not set
+
+#
+# Security options
+#
+CONFIG_KEYS=y
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_NETWORK is not set
+# CONFIG_SECURITY_PATH is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+CONFIG_CRYPTO_TEST=m
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=y
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+CONFIG_CRYPTO_AES=m
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_LZO=y
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_CRC_CCITT=m
+CONFIG_CRC16=y
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_COMPRESS=y
+CONFIG_LZO_DECOMPRESS=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From 7bd0f2f5fc5f51d351228488dfef5e33d28e8d0c Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Mon, 27 Apr 2009 10:35:04 +0100
Subject: [ARM] 5483/1: Freescale STMP: add Kconfig/Makefile entries

Added Kconfig/Makefile entries for STMP platform

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a930e5c..d17c801 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -302,6 +302,19 @@ config ARCH_MXC
 	help
 	  Support for Freescale MXC/iMX-based family of processors
 
+config ARCH_STMP3XXX
+	bool "Freescale STMP3xxx"
+	select CPU_ARM926T
+	select HAVE_CLK
+	select COMMON_CLKDEV
+	select ARCH_REQUIRE_GPIOLIB
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_GPIO
+	select USB_ARCH_HAS_EHCI
+	help
+	  Support for systems based on the Freescale 3xxx CPUs.
+
 config ARCH_NETX
 	bool "Hilscher NetX based"
 	select CPU_ARM926T
@@ -679,6 +692,8 @@ source "arch/arm/mach-s3c6400/Kconfig"
 source "arch/arm/mach-s3c6410/Kconfig"
 endif
 
+source "arch/arm/plat-stmp3xxx/Kconfig"
+
 source "arch/arm/mach-lh7a40x/Kconfig"
 
 source "arch/arm/mach-imx/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 885a837..9a0af47 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -149,6 +149,8 @@ machine-$(CONFIG_ARCH_S3C24A0)		:= s3c24a0
 machine-$(CONFIG_ARCH_S3C64XX)		:= s3c6400 s3c6410
 machine-$(CONFIG_ARCH_SA1100)		:= sa1100
 machine-$(CONFIG_ARCH_SHARK)		:= shark
+machine-$(CONFIG_ARCH_STMP378X)		:= stmp378x
+machine-$(CONFIG_ARCH_STMP37XX)		:= stmp37xx
 machine-$(CONFIG_ARCH_VERSATILE)	:= versatile
 machine-$(CONFIG_ARCH_W90X900)		:= w90x900
 machine-$(CONFIG_FOOTBRIDGE)		:= footbridge
@@ -162,6 +164,7 @@ plat-$(CONFIG_PLAT_ORION)	:= orion
 plat-$(CONFIG_PLAT_PXA)		:= pxa
 plat-$(CONFIG_PLAT_S3C24XX)	:= s3c24xx s3c
 plat-$(CONFIG_PLAT_S3C64XX)	:= s3c64xx s3c
+plat-$(CONFIG_ARCH_STMP3XXX)	:= stmp3xxx
 
 ifeq ($(CONFIG_ARCH_EBSA110),y)
 # This is what happens if you forget the IOCS16 line.
-- 
cgit v0.10.2


From 4a79ba34cada6a5a4ee86ed53aa8a73ba1e6fc51 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 22 Apr 2009 16:31:35 +0200
Subject: ALSA: hda - Add amp initialization for realtek auto mode

In the realtek auto-probing mode, the initialization of amp with
some magic COEF or EAPD verbs is applied only when the codec SSID
has valid values to satisfy the realtek's definition.
However, many devices don't provide in that way, thus the device
doesn't work as is.

This patch allows the same initialization code even if the SSID
doesn't pass the bit test.  Also, alc_subsystem_id() is changed
just to check and define the type, so that it's called in the
parser, instead of the initializer.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 583603f..3a63063 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -253,6 +253,15 @@ enum {
 /* for GPIO Poll */
 #define GPIO_MASK	0x03
 
+/* extra amp-initialization sequence types */
+enum {
+	ALC_INIT_NONE,
+	ALC_INIT_DEFAULT,
+	ALC_INIT_GPIO1,
+	ALC_INIT_GPIO2,
+	ALC_INIT_GPIO3,
+};
+
 struct alc_spec {
 	/* codec parameterization */
 	struct snd_kcontrol_new *mixers[5];	/* mixer arrays */
@@ -322,6 +331,7 @@ struct alc_spec {
 
 	/* other flags */
 	unsigned int no_analog :1; /* digital I/O only */
+	int init_amp;
 
 	/* for virtual master */
 	hda_nid_t vmaster_nid;
@@ -994,74 +1004,21 @@ static void alc888_coef_init(struct hda_codec *codec)
 				    AC_VERB_SET_PROC_COEF, 0x3030);
 }
 
-/* 32-bit subsystem ID for BIOS loading in HD Audio codec.
- *	31 ~ 16 :	Manufacture ID
- *	15 ~ 8	:	SKU ID
- *	7  ~ 0	:	Assembly ID
- *	port-A --> pin 39/41, port-E --> pin 14/15, port-D --> pin 35/36
- */
-static void alc_subsystem_id(struct hda_codec *codec,
-			     unsigned int porta, unsigned int porte,
-			     unsigned int portd)
+static void alc_auto_init_amp(struct hda_codec *codec, int type)
 {
-	unsigned int ass, tmp, i;
-	unsigned nid;
-	struct alc_spec *spec = codec->spec;
-
-	ass = codec->subsystem_id & 0xffff;
-	if ((ass != codec->bus->pci->subsystem_device) && (ass & 1))
-		goto do_sku;
-
-	/*
-	 * 31~30	: port conetcivity
-	 * 29~21	: reserve
-	 * 20		: PCBEEP input
-	 * 19~16	: Check sum (15:1)
-	 * 15~1		: Custom
-	 * 0		: override
-	*/
-	nid = 0x1d;
-	if (codec->vendor_id == 0x10ec0260)
-		nid = 0x17;
-	ass = snd_hda_codec_get_pincfg(codec, nid);
-	snd_printd("realtek: No valid SSID, "
-		   "checking pincfg 0x%08x for NID 0x%x\n",
-		   ass, nid);
-	if (!(ass & 1) && !(ass & 0x100000))
-		return;
-	if ((ass >> 30) != 1)	/* no physical connection */
-		return;
+	unsigned int tmp;
 
-	/* check sum */
-	tmp = 0;
-	for (i = 1; i < 16; i++) {
-		if ((ass >> i) & 1)
-			tmp++;
-	}
-	if (((ass >> 16) & 0xf) != tmp)
-		return;
-do_sku:
-	snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n",
-		   ass & 0xffff, codec->vendor_id);
-	/*
-	 * 0 : override
-	 * 1 :	Swap Jack
-	 * 2 : 0 --> Desktop, 1 --> Laptop
-	 * 3~5 : External Amplifier control
-	 * 7~6 : Reserved
-	*/
-	tmp = (ass & 0x38) >> 3;	/* external Amp control */
-	switch (tmp) {
-	case 1:
+	switch (type) {
+	case ALC_INIT_GPIO1:
 		snd_hda_sequence_write(codec, alc_gpio1_init_verbs);
 		break;
-	case 3:
+	case ALC_INIT_GPIO2:
 		snd_hda_sequence_write(codec, alc_gpio2_init_verbs);
 		break;
-	case 7:
+	case ALC_INIT_GPIO3:
 		snd_hda_sequence_write(codec, alc_gpio3_init_verbs);
 		break;
-	case 5:	/* set EAPD output high */
+	case ALC_INIT_DEFAULT:
 		switch (codec->vendor_id) {
 		case 0x10ec0260:
 			snd_hda_codec_write(codec, 0x0f, 0,
@@ -1115,7 +1072,7 @@ do_sku:
 					    tmp | 0x2010);
 			break;
 		case 0x10ec0888:
-			/*alc888_coef_init(codec);*/ /* called in alc_init() */
+			alc888_coef_init(codec);
 			break;
 		case 0x10ec0267:
 		case 0x10ec0268:
@@ -1130,7 +1087,104 @@ do_sku:
 					    tmp | 0x3000);
 			break;
 		}
-	default:
+		break;
+	}
+}
+
+static void alc_init_auto_hp(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	if (!spec->autocfg.hp_pins[0])
+		return;
+
+	if (!spec->autocfg.speaker_pins[0]) {
+		if (spec->autocfg.line_out_pins[0])
+			spec->autocfg.speaker_pins[0] =
+				spec->autocfg.line_out_pins[0];
+		else
+			return;
+	}
+
+	snd_hda_codec_write_cache(codec, spec->autocfg.hp_pins[0], 0,
+				  AC_VERB_SET_UNSOLICITED_ENABLE,
+				  AC_USRSP_EN | ALC880_HP_EVENT);
+	spec->unsol_event = alc_sku_unsol_event;
+}
+
+/* check subsystem ID and set up device-specific initialization;
+ * return 1 if initialized, 0 if invalid SSID
+ */
+/* 32-bit subsystem ID for BIOS loading in HD Audio codec.
+ *	31 ~ 16 :	Manufacture ID
+ *	15 ~ 8	:	SKU ID
+ *	7  ~ 0	:	Assembly ID
+ *	port-A --> pin 39/41, port-E --> pin 14/15, port-D --> pin 35/36
+ */
+static int alc_subsystem_id(struct hda_codec *codec,
+			    hda_nid_t porta, hda_nid_t porte,
+			    hda_nid_t portd)
+{
+	unsigned int ass, tmp, i;
+	unsigned nid;
+	struct alc_spec *spec = codec->spec;
+
+	ass = codec->subsystem_id & 0xffff;
+	if ((ass != codec->bus->pci->subsystem_device) && (ass & 1))
+		goto do_sku;
+
+	/* invalid SSID, check the special NID pin defcfg instead */
+	/*
+	 * 31~30	: port conetcivity
+	 * 29~21	: reserve
+	 * 20		: PCBEEP input
+	 * 19~16	: Check sum (15:1)
+	 * 15~1		: Custom
+	 * 0		: override
+	*/
+	nid = 0x1d;
+	if (codec->vendor_id == 0x10ec0260)
+		nid = 0x17;
+	ass = snd_hda_codec_get_pincfg(codec, nid);
+	snd_printd("realtek: No valid SSID, "
+		   "checking pincfg 0x%08x for NID 0x%x\n",
+		   nid, ass);
+	if (!(ass & 1) && !(ass & 0x100000))
+		return 0;
+	if ((ass >> 30) != 1)	/* no physical connection */
+		return 0;
+
+	/* check sum */
+	tmp = 0;
+	for (i = 1; i < 16; i++) {
+		if ((ass >> i) & 1)
+			tmp++;
+	}
+	if (((ass >> 16) & 0xf) != tmp)
+		return 0;
+do_sku:
+	snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n",
+		   ass & 0xffff, codec->vendor_id);
+	/*
+	 * 0 : override
+	 * 1 :	Swap Jack
+	 * 2 : 0 --> Desktop, 1 --> Laptop
+	 * 3~5 : External Amplifier control
+	 * 7~6 : Reserved
+	*/
+	tmp = (ass & 0x38) >> 3;	/* external Amp control */
+	switch (tmp) {
+	case 1:
+		spec->init_amp = ALC_INIT_GPIO1;
+		break;
+	case 3:
+		spec->init_amp = ALC_INIT_GPIO2;
+		break;
+	case 7:
+		spec->init_amp = ALC_INIT_GPIO3;
+		break;
+	case 5:
+		spec->init_amp = ALC_INIT_DEFAULT;
 		break;
 	}
 
@@ -1138,7 +1192,7 @@ do_sku:
 	 * when the external headphone out jack is plugged"
 	 */
 	if (!(ass & 0x8000))
-		return;
+		return 1;
 	/*
 	 * 10~8 : Jack location
 	 * 12~11: Headphone out -> 00: PortA, 01: PortE, 02: PortD, 03: Resvered
@@ -1146,14 +1200,6 @@ do_sku:
 	 * 15   : 1 --> enable the function "Mute internal speaker
 	 *	        when the external headphone out jack is plugged"
 	 */
-	if (!spec->autocfg.speaker_pins[0]) {
-		if (spec->autocfg.line_out_pins[0])
-			spec->autocfg.speaker_pins[0] =
-				spec->autocfg.line_out_pins[0];
-		else
-			return;
-	}
-
 	if (!spec->autocfg.hp_pins[0]) {
 		tmp = (ass >> 11) & 0x3;	/* HP to chassis */
 		if (tmp == 0)
@@ -1163,23 +1209,23 @@ do_sku:
 		else if (tmp == 2)
 			spec->autocfg.hp_pins[0] = portd;
 		else
-			return;
+			return 1;
 	}
-	if (spec->autocfg.hp_pins[0])
-		snd_hda_codec_write(codec, spec->autocfg.hp_pins[0], 0,
-			AC_VERB_SET_UNSOLICITED_ENABLE,
-			AC_USRSP_EN | ALC880_HP_EVENT);
 
-#if 0 /* it's broken in some acses -- temporarily disabled */
-	if (spec->autocfg.input_pins[AUTO_PIN_MIC] &&
-		spec->autocfg.input_pins[AUTO_PIN_FRONT_MIC])
-		snd_hda_codec_write(codec,
-			spec->autocfg.input_pins[AUTO_PIN_MIC], 0,
-			AC_VERB_SET_UNSOLICITED_ENABLE,
-			AC_USRSP_EN | ALC880_MIC_EVENT);
-#endif /* disabled */
+	alc_init_auto_hp(codec);
+	return 1;
+}
 
-	spec->unsol_event = alc_sku_unsol_event;
+static void alc_ssid_check(struct hda_codec *codec,
+			   hda_nid_t porta, hda_nid_t porte, hda_nid_t portd)
+{
+	if (!alc_subsystem_id(codec, porta, porte, portd)) {
+		struct alc_spec *spec = codec->spec;
+		snd_printd("realtek: "
+			   "Enable default setup for auto mode as fallback\n");
+		spec->init_amp = ALC_INIT_DEFAULT;
+		alc_init_auto_hp(codec);
+	}
 }
 
 /*
@@ -2923,8 +2969,7 @@ static int alc_init(struct hda_codec *codec)
 	unsigned int i;
 
 	alc_fix_pll(codec);
-	if (codec->vendor_id == 0x10ec0888)
-		alc888_coef_init(codec);
+	alc_auto_init_amp(codec, spec->init_amp);
 
 	for (i = 0; i < spec->num_init_verbs; i++)
 		snd_hda_sequence_write(codec, spec->init_verbs[i]);
@@ -4198,7 +4243,6 @@ static void alc880_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i < spec->autocfg.line_outs; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -4303,6 +4347,8 @@ static int alc880_parse_auto_config(struct hda_codec *codec)
 	spec->num_mux_defs = 1;
 	spec->input_mux = &spec->private_imux[0];
 
+	alc_ssid_check(codec, 0x15, 0x1b, 0x14);
+
 	return 1;
 }
 
@@ -5678,7 +5724,6 @@ static void alc260_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t nid;
 
-	alc_subsystem_id(codec, 0x10, 0x15, 0x0f);
 	nid = spec->autocfg.line_out_pins[0];
 	if (nid) {
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -5788,6 +5833,8 @@ static int alc260_parse_auto_config(struct hda_codec *codec)
 	spec->num_mux_defs = 1;
 	spec->input_mux = &spec->private_imux[0];
 
+	alc_ssid_check(codec, 0x10, 0x15, 0x0f);
+
 	return 1;
 }
 
@@ -7013,7 +7060,6 @@ static void alc882_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -9154,7 +9200,6 @@ static void alc883_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -9317,6 +9362,7 @@ static int patch_alc883(struct hda_codec *codec)
 		if (!spec->capsrc_nids)
 			spec->capsrc_nids = alc883_capsrc_nids;
 		spec->capture_style = CAPT_MIX; /* matrix-style capture */
+		spec->init_amp = ALC_INIT_DEFAULT; /* always initialize */
 		break;
 	case 0x10ec0889:
 		spec->stream_name_analog = "ALC889 Analog";
@@ -10842,6 +10888,8 @@ static int alc262_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	alc_ssid_check(codec, 0x15, 0x14, 0x1b);
+
 	return 1;
 }
 
@@ -13925,7 +13973,6 @@ static void alc861_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x0e, 0x0f, 0x0b);
 	for (i = 0; i < spec->autocfg.line_outs; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -14008,6 +14055,8 @@ static int alc861_parse_auto_config(struct hda_codec *codec)
 	spec->num_adc_nids = ARRAY_SIZE(alc861_adc_nids);
 	set_capture_mixer(spec);
 
+	alc_ssid_check(codec, 0x0e, 0x0f, 0x0b);
+
 	return 1;
 }
 
@@ -14889,7 +14938,6 @@ static void alc861vd_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -15107,6 +15155,8 @@ static int alc861vd_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	alc_ssid_check(codec, 0x15, 0x1b, 0x14);
+
 	return 1;
 }
 
@@ -16931,7 +16981,6 @@ static void alc662_auto_init_multi_out(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 	int i;
 
-	alc_subsystem_id(codec, 0x15, 0x1b, 0x14);
 	for (i = 0; i <= HDA_SIDE; i++) {
 		hda_nid_t nid = spec->autocfg.line_out_pins[i];
 		int pin_type = get_pin_type(spec->autocfg.line_out_type);
@@ -17028,6 +17077,8 @@ static int alc662_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	alc_ssid_check(codec, 0x15, 0x1b, 0x14);
+
 	return 1;
 }
 
-- 
cgit v0.10.2


From 4bc4d8998a472cd64aa66a4abad3d833be901028 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Apr 2009 14:28:44 +0100
Subject: ASoC: Enforce symmetric rates for S3C64xx I2S interface

There is only one LRCLK pin on each interface.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index a84c4be..c335248 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -171,6 +171,7 @@ struct snd_soc_dai s3c64xx_i2s_dai[] = {
 			 .formats	= S3C64XX_I2S_FMTS,
 		 },
 		.ops = &s3c64xx_i2s_dai_ops,
+		.symmetric_rates = 1,
 	},
 	{
 		.name		= "s3c64xx-i2s",
@@ -189,6 +190,7 @@ struct snd_soc_dai s3c64xx_i2s_dai[] = {
 			 .formats	= S3C64XX_I2S_FMTS,
 		 },
 		.ops = &s3c64xx_i2s_dai_ops,
+		.symmetric_rates = 1,
 	},
 };
 EXPORT_SYMBOL_GPL(s3c64xx_i2s_dai);
-- 
cgit v0.10.2


From 008db442efa542357314593c71ab9966be909855 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Apr 2009 19:17:08 +0100
Subject: ASoC: Include WM8350 register definitions in CODEC header

It's expected behaviour for the CODEC header to provide them but the
WM8350 doesn't due to having all the registers together under drivers/mfd.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8350.h b/sound/soc/codecs/wm8350.h
index d11bd92..d088eb4 100644
--- a/sound/soc/codecs/wm8350.h
+++ b/sound/soc/codecs/wm8350.h
@@ -13,6 +13,7 @@
 #define _WM8350_H
 
 #include <sound/soc.h>
+#include <linux/mfd/wm8350/audio.h>
 
 extern struct snd_soc_dai wm8350_dai;
 extern struct snd_soc_codec_device soc_codec_dev_wm8350;
-- 
cgit v0.10.2


From 5c556a6e190897a0f1ff14e13722591828412031 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Apr 2009 20:23:19 +0100
Subject: ASoC: s3c-i2s-v2 diagnostic improvements

Say what invalid values we're seeing when we see an invalid value and
ensure that errors are displayed by default.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index ab680aa..aeea49c 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -105,7 +105,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
+			break;
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -132,7 +134,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "TXDIS: Invalid MODE %xin IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
+			break;
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -175,7 +179,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -199,7 +204,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -281,7 +287,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 		iismod |= IISMOD_MASTER;
 		break;
 	default:
-		pr_debug("unknwon master/slave format\n");
+		pr_err("unknwon master/slave format\n");
 		return -EINVAL;
 	}
 
@@ -298,7 +304,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 		iismod |= S3C2412_IISMOD_SDF_IIS;
 		break;
 	default:
-		pr_debug("Unknown data format\n");
+		pr_err("Unknown data format\n");
 		return -EINVAL;
 	}
 
-- 
cgit v0.10.2


From a7be4d92d989fc53d840d24cba2ebea9e5ad8480 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Apr 2009 20:24:15 +0100
Subject: ASoC: Use our registration function for S3C64xx

Make sure we get the DAI operations initialised.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index aeea49c..ab680aa 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -105,9 +105,7 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n",
-				mod & S3C2412_IISMOD_MODE_MASK);
-			break;
+			dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n");
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -134,9 +132,7 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXDIS: Invalid MODE %xin IISMOD\n",
-				mod & S3C2412_IISMOD_MODE_MASK);
-			break;
+			dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n");
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -179,8 +175,7 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n",
-				mod & S3C2412_IISMOD_MODE_MASK);
+			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -204,8 +199,7 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n",
-				mod & S3C2412_IISMOD_MODE_MASK);
+			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -287,7 +281,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 		iismod |= IISMOD_MASTER;
 		break;
 	default:
-		pr_err("unknwon master/slave format\n");
+		pr_debug("unknwon master/slave format\n");
 		return -EINVAL;
 	}
 
@@ -304,7 +298,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 		iismod |= S3C2412_IISMOD_SDF_IIS;
 		break;
 	default:
-		pr_err("Unknown data format\n");
+		pr_debug("Unknown data format\n");
 		return -EINVAL;
 	}
 
diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index c335248..1345fbd 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -225,7 +225,7 @@ static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_clk;
 
-	ret = snd_soc_register_dai(dai);
+	ret = s3c_i2sv2_register_dai(dai);
 	if (ret != 0)
 		goto err_i2sv2;
 
-- 
cgit v0.10.2


From 0b5e92c5e020ee7437fa5304a8451d6dd08d1a26 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Mon, 27 Apr 2009 13:49:44 +0000
Subject: ASoC WM8940 Driver

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 121d63f..1c19ad5 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -35,6 +35,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_WM8753 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8900 if I2C
 	select SND_SOC_WM8903 if I2C
+	select SND_SOC_WM8940 if I2C
 	select SND_SOC_WM8960 if I2C
 	select SND_SOC_WM8971 if I2C
 	select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI
@@ -140,6 +141,9 @@ config SND_SOC_WM8900
 config SND_SOC_WM8903
 	tristate
 
+config SND_SOC_WM8940
+        tristate
+
 config SND_SOC_WM8960
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 8116968..3d31b6b 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -23,6 +23,7 @@ snd-soc-wm8750-objs := wm8750.o
 snd-soc-wm8753-objs := wm8753.o
 snd-soc-wm8900-objs := wm8900.o
 snd-soc-wm8903-objs := wm8903.o
+snd-soc-wm8940-objs := wm8940.o
 snd-soc-wm8960-objs := wm8960.o
 snd-soc-wm8971-objs := wm8971.o
 snd-soc-wm8988-objs := wm8988.o
@@ -57,6 +58,7 @@ obj-$(CONFIG_SND_SOC_WM8753)	+= snd-soc-wm8753.o
 obj-$(CONFIG_SND_SOC_WM8900)	+= snd-soc-wm8900.o
 obj-$(CONFIG_SND_SOC_WM8903)	+= snd-soc-wm8903.o
 obj-$(CONFIG_SND_SOC_WM8971)	+= snd-soc-wm8971.o
+obj-$(CONFIG_SND_SOC_WM8940)	+= snd-soc-wm8940.o
 obj-$(CONFIG_SND_SOC_WM8960)	+= snd-soc-wm8960.o
 obj-$(CONFIG_SND_SOC_WM8988)	+= snd-soc-wm8988.o
 obj-$(CONFIG_SND_SOC_WM8990)	+= snd-soc-wm8990.o
diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
new file mode 100644
index 0000000..26987dc
--- /dev/null
+++ b/sound/soc/codecs/wm8940.c
@@ -0,0 +1,955 @@
+/*
+ * wm8940.c  --  WM8940 ALSA Soc Audio driver
+ *
+ * Author: Jonathan Cameron <jic23@cam.ac.uk>
+ *
+ * Based on wm8510.c
+ *    Copyright  2006 Wolfson Microelectronics PLC.
+ *    Author:  Liam Girdwood <lrg@slimlogic.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Not currently handled:
+ * Notch filter control
+ * AUXMode (inverting vs mixer)
+ * No means to obtain current gain if alc enabled.
+ * No use made of gpio
+ * Fast VMID discharge for power down
+ * Soft Start
+ * DLR and ALR Swaps not enabled
+ * Digital Sidetone not supported
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "wm8940.h"
+
+struct wm8940_priv {
+	unsigned int sysclk;
+	u16 reg_cache[WM8940_CACHEREGNUM];
+	struct snd_soc_codec codec;
+};
+
+static u16 wm8940_reg_defaults[] = {
+	0x8940, /* Soft Reset */
+	0x0000, /* Power 1 */
+	0x0000, /* Power 2 */
+	0x0000, /* Power 3 */
+	0x0010, /* Interface Control */
+	0x0000, /* Companding Control */
+	0x0140, /* Clock Control */
+	0x0000, /* Additional Controls */
+	0x0000, /* GPIO Control */
+	0x0002, /* Auto Increment Control */
+	0x0000, /* DAC Control */
+	0x00FF, /* DAC Volume */
+	0,
+	0,
+	0x0100, /* ADC Control */
+	0x00FF, /* ADC Volume */
+	0x0000, /* Notch Filter 1 Control 1 */
+	0x0000, /* Notch Filter 1 Control 2 */
+	0x0000, /* Notch Filter 2 Control 1 */
+	0x0000, /* Notch Filter 2 Control 2 */
+	0x0000, /* Notch Filter 3 Control 1 */
+	0x0000, /* Notch Filter 3 Control 2 */
+	0x0000, /* Notch Filter 4 Control 1 */
+	0x0000, /* Notch Filter 4 Control 2 */
+	0x0032, /* DAC Limit Control 1 */
+	0x0000, /* DAC Limit Control 2 */
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0x0038, /* ALC Control 1 */
+	0x000B, /* ALC Control 2 */
+	0x0032, /* ALC Control 3 */
+	0x0000, /* Noise Gate */
+	0x0041, /* PLLN */
+	0x000C, /* PLLK1 */
+	0x0093, /* PLLK2 */
+	0x00E9, /* PLLK3 */
+	0,
+	0,
+	0x0030, /* ALC Control 4 */
+	0,
+	0x0002, /* Input Control */
+	0x0050, /* PGA Gain */
+	0,
+	0x0002, /* ADC Boost Control */
+	0,
+	0x0002, /* Output Control */
+	0x0000, /* Speaker Mixer Control */
+	0,
+	0,
+	0,
+	0x0079, /* Speaker Volume */
+	0,
+	0x0000, /* Mono Mixer Control */
+};
+
+static inline unsigned int wm8940_read_reg_cache(struct snd_soc_codec *codec,
+						 unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+
+	if (reg >= ARRAY_SIZE(wm8940_reg_defaults))
+		return -1;
+
+	return cache[reg];
+}
+
+static inline int wm8940_write_reg_cache(struct snd_soc_codec *codec,
+					  u16 reg, unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+
+	if (reg >= ARRAY_SIZE(wm8940_reg_defaults))
+		return -1;
+
+	cache[reg] = value;
+
+	return 0;
+}
+
+static int wm8940_write(struct snd_soc_codec *codec, unsigned int reg,
+			unsigned int value)
+{
+	int ret;
+	u8 data[3] = { reg,
+		       (value & 0xff00) >> 8,
+		       (value & 0x00ff)
+	};
+
+	wm8940_write_reg_cache(codec, reg, value);
+
+	ret = codec->hw_write(codec->control_data, data, 3);
+
+	if (ret < 0)
+		return ret;
+	else if (ret != 3)
+		return -EIO;
+	return 0;
+}
+
+static const char *wm8940_companding[] = { "Off", "NC", "u-law", "A-law" };
+static const struct soc_enum wm8940_adc_companding_enum
+= SOC_ENUM_SINGLE(WM8940_COMPANDINGCTL, 1, 4, wm8940_companding);
+static const struct soc_enum wm8940_dac_companding_enum
+= SOC_ENUM_SINGLE(WM8940_COMPANDINGCTL, 3, 4, wm8940_companding);
+
+static const char *wm8940_alc_mode_text[] = {"ALC", "Limiter"};
+static const struct soc_enum wm8940_alc_mode_enum
+= SOC_ENUM_SINGLE(WM8940_ALC3, 8, 2, wm8940_alc_mode_text);
+
+static const char *wm8940_mic_bias_level_text[] = {"0.9", "0.65"};
+static const struct soc_enum wm8940_mic_bias_level_enum
+= SOC_ENUM_SINGLE(WM8940_INPUTCTL, 8, 2, wm8940_mic_bias_level_text);
+
+static const char *wm8940_filter_mode_text[] = {"Audio", "Application"};
+static const struct soc_enum wm8940_filter_mode_enum
+= SOC_ENUM_SINGLE(WM8940_ADC, 7, 2, wm8940_filter_mode_text);
+
+DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1);
+DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0);
+DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0);
+DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0);
+DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0);
+DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0);
+DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0);
+DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0);
+DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1);
+DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0);
+
+static const struct snd_kcontrol_new wm8940_snd_controls[] = {
+	SOC_SINGLE("Digital Loopback Switch", WM8940_COMPANDINGCTL,
+		   6, 1, 0),
+	SOC_ENUM("DAC Companding", wm8940_dac_companding_enum),
+	SOC_ENUM("ADC Companding", wm8940_adc_companding_enum),
+
+	SOC_ENUM("ALC Mode", wm8940_alc_mode_enum),
+	SOC_SINGLE("ALC Switch", WM8940_ALC1, 8, 1, 0),
+	SOC_SINGLE_TLV("ALC Capture Max Gain", WM8940_ALC1,
+		       3, 7, 1, wm8940_alc_max_tlv),
+	SOC_SINGLE_TLV("ALC Capture Min Gain", WM8940_ALC1,
+		       0, 7, 0, wm8940_alc_min_tlv),
+	SOC_SINGLE_TLV("ALC Capture Target", WM8940_ALC2,
+		       0, 14, 0, wm8940_alc_tar_tlv),
+	SOC_SINGLE("ALC Capture Hold", WM8940_ALC2, 4, 10, 0),
+	SOC_SINGLE("ALC Capture Decay", WM8940_ALC3, 4, 10, 0),
+	SOC_SINGLE("ALC Capture Attach", WM8940_ALC3, 0, 10, 0),
+	SOC_SINGLE("ALC ZC Switch", WM8940_ALC4, 1, 1, 0),
+	SOC_SINGLE("ALC Capture Noise Gate Switch", WM8940_NOISEGATE,
+		   3, 1, 0),
+	SOC_SINGLE("ALC Capture Noise Gate Threshold", WM8940_NOISEGATE,
+		   0, 7, 0),
+
+	SOC_SINGLE("DAC Playback Limiter Switch", WM8940_DACLIM1, 8, 1, 0),
+	SOC_SINGLE("DAC Playback Limiter Attack", WM8940_DACLIM1, 0, 9, 0),
+	SOC_SINGLE("DAC Playback Limiter Decay", WM8940_DACLIM1, 4, 11, 0),
+	SOC_SINGLE_TLV("DAC Playback Limiter Threshold", WM8940_DACLIM2,
+		       4, 9, 1, wm8940_lim_thresh_tlv),
+	SOC_SINGLE_TLV("DAC Playback Limiter Boost", WM8940_DACLIM2,
+		       0, 12, 0, wm8940_lim_boost_tlv),
+
+	SOC_SINGLE("Capture PGA ZC Switch", WM8940_PGAGAIN, 7, 1, 0),
+	SOC_SINGLE_TLV("Capture PGA Volume", WM8940_PGAGAIN,
+		       0, 63, 0, wm8940_pga_vol_tlv),
+	SOC_SINGLE_TLV("Digital Playback Volume", WM8940_DACVOL,
+		       0, 255, 0, wm8940_adc_tlv),
+	SOC_SINGLE_TLV("Digital Capture Volume", WM8940_ADCVOL,
+		       0, 255, 0, wm8940_adc_tlv),
+	SOC_ENUM("Mic Bias Level", wm8940_mic_bias_level_enum),
+	SOC_SINGLE_TLV("Capture Boost Volue", WM8940_ADCBOOST,
+		       8, 1, 0, wm8940_capture_boost_vol_tlv),
+	SOC_SINGLE_TLV("Speaker Playback Volume", WM8940_SPKVOL,
+		       0, 63, 0, wm8940_spk_vol_tlv),
+	SOC_SINGLE("Speaker Playback Switch", WM8940_SPKVOL,  6, 1, 1),
+
+	SOC_SINGLE_TLV("Speaker Mixer Line Bypass Volume", WM8940_SPKVOL,
+		       8, 1, 1, wm8940_att_tlv),
+	SOC_SINGLE("Speaker Playback ZC Switch", WM8940_SPKVOL, 7, 1, 0),
+
+	SOC_SINGLE("Mono Out Switch", WM8940_MONOMIX, 6, 1, 1),
+	SOC_SINGLE_TLV("Mono Mixer Line Bypass Volume", WM8940_MONOMIX,
+		       7, 1, 1, wm8940_att_tlv),
+
+	SOC_SINGLE("High Pass Filter Switch", WM8940_ADC, 8, 1, 0),
+	SOC_ENUM("High Pass Filter Mode", wm8940_filter_mode_enum),
+	SOC_SINGLE("High Pass Filter Cut Off", WM8940_ADC, 4, 7, 0),
+	SOC_SINGLE("ADC Inversion Switch", WM8940_ADC, 0, 1, 0),
+	SOC_SINGLE("DAC Inversion Switch", WM8940_DAC, 0, 1, 0),
+	SOC_SINGLE("DAC Auto Mute Switch", WM8940_DAC, 2, 1, 0),
+	SOC_SINGLE("ZC Timeout Clock Switch", WM8940_ADDCNTRL, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8940_speaker_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Line Bypass Switch", WM8940_SPKMIX, 1, 1, 0),
+	SOC_DAPM_SINGLE("Aux Playback Switch", WM8940_SPKMIX, 5, 1, 0),
+	SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_SPKMIX, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new wm8940_mono_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Line Bypass Switch", WM8940_MONOMIX, 1, 1, 0),
+	SOC_DAPM_SINGLE("Aux Playback Switch", WM8940_MONOMIX, 2, 1, 0),
+	SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_MONOMIX, 0, 1, 0),
+};
+
+DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1);
+static const struct snd_kcontrol_new wm8940_input_boost_controls[] = {
+	SOC_DAPM_SINGLE("Mic PGA Switch", WM8940_PGAGAIN, 6, 1, 1),
+	SOC_DAPM_SINGLE_TLV("Aux Volume", WM8940_ADCBOOST,
+			    0, 7, 0, wm8940_boost_vol_tlv),
+	SOC_DAPM_SINGLE_TLV("Mic Volume", WM8940_ADCBOOST,
+			    4, 7, 0, wm8940_boost_vol_tlv),
+};
+
+static const struct snd_kcontrol_new wm8940_micpga_controls[] = {
+	SOC_DAPM_SINGLE("AUX Switch", WM8940_INPUTCTL, 2, 1, 0),
+	SOC_DAPM_SINGLE("MICP Switch", WM8940_INPUTCTL, 0, 1, 0),
+	SOC_DAPM_SINGLE("MICN Switch", WM8940_INPUTCTL, 1, 1, 0),
+};
+
+static const struct snd_soc_dapm_widget wm8940_dapm_widgets[] = {
+	SND_SOC_DAPM_MIXER("Speaker Mixer", WM8940_POWER3, 2, 0,
+			   &wm8940_speaker_mixer_controls[0],
+			   ARRAY_SIZE(wm8940_speaker_mixer_controls)),
+	SND_SOC_DAPM_MIXER("Mono Mixer", WM8940_POWER3, 3, 0,
+			   &wm8940_mono_mixer_controls[0],
+			   ARRAY_SIZE(wm8940_mono_mixer_controls)),
+	SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM8940_POWER3, 0, 0),
+
+	SND_SOC_DAPM_PGA("SpkN Out", WM8940_POWER3, 5, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("SpkP Out", WM8940_POWER3, 6, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Mono Out", WM8940_POWER3, 7, 0, NULL, 0),
+	SND_SOC_DAPM_OUTPUT("MONOOUT"),
+	SND_SOC_DAPM_OUTPUT("SPKOUTP"),
+	SND_SOC_DAPM_OUTPUT("SPKOUTN"),
+
+	SND_SOC_DAPM_PGA("Aux Input", WM8940_POWER1, 6, 0, NULL, 0),
+	SND_SOC_DAPM_ADC("ADC", "HiFi Capture", WM8940_POWER2, 0, 0),
+	SND_SOC_DAPM_MIXER("Mic PGA", WM8940_POWER2, 2, 0,
+			   &wm8940_micpga_controls[0],
+			   ARRAY_SIZE(wm8940_micpga_controls)),
+	SND_SOC_DAPM_MIXER("Boost Mixer", WM8940_POWER2, 4, 0,
+			   &wm8940_input_boost_controls[0],
+			   ARRAY_SIZE(wm8940_input_boost_controls)),
+	SND_SOC_DAPM_MICBIAS("Mic Bias", WM8940_POWER1, 4, 0),
+
+	SND_SOC_DAPM_INPUT("MICN"),
+	SND_SOC_DAPM_INPUT("MICP"),
+	SND_SOC_DAPM_INPUT("AUX"),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+	/* Mono output mixer */
+	{"Mono Mixer", "PCM Playback Switch", "DAC"},
+	{"Mono Mixer", "Aux Playback Switch", "Aux Input"},
+	{"Mono Mixer", "Line Bypass Switch", "Boost Mixer"},
+
+	/* Speaker output mixer */
+	{"Speaker Mixer", "PCM Playback Switch", "DAC"},
+	{"Speaker Mixer", "Aux Playback Switch", "Aux Input"},
+	{"Speaker Mixer", "Line Bypass Switch", "Boost Mixer"},
+
+	/* Outputs */
+	{"Mono Out", NULL, "Mono Mixer"},
+	{"MONOOUT", NULL, "Mono Out"},
+	{"SpkN Out", NULL, "Speaker Mixer"},
+	{"SpkP Out", NULL, "Speaker Mixer"},
+	{"SPKOUTN", NULL, "SpkN Out"},
+	{"SPKOUTP", NULL, "SpkP Out"},
+
+	/*  Microphone PGA */
+	{"Mic PGA", "MICN Switch", "MICN"},
+	{"Mic PGA", "MICP Switch", "MICP"},
+	{"Mic PGA", "AUX Switch", "AUX"},
+
+	/* Boost Mixer */
+	{"Boost Mixer", "Mic PGA Switch", "Mic PGA"},
+	{"Boost Mixer", "Mic Volume",  "MICP"},
+	{"Boost Mixer", "Aux Volume", "Aux Input"},
+
+	{"ADC", NULL, "Boost Mixer"},
+};
+
+static int wm8940_add_widgets(struct snd_soc_codec *codec)
+{
+	int ret;
+
+	ret = snd_soc_dapm_new_controls(codec, wm8940_dapm_widgets,
+					ARRAY_SIZE(wm8940_dapm_widgets));
+	if (ret)
+		goto error_ret;
+	ret = snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+	if (ret)
+		goto error_ret;
+	ret = snd_soc_dapm_new_widgets(codec);
+
+error_ret:
+	return ret;
+}
+
+#define wm8940_reset(c) wm8940_write(c, WM8940_SOFTRESET, 0);
+
+static int wm8940_set_dai_fmt(struct snd_soc_dai *codec_dai,
+			      unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = wm8940_read_reg_cache(codec, WM8940_IFACE) & 0xFE67;
+	u16 clk = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0x1fe;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		clk |= 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+	wm8940_write(codec, WM8940_CLOCK, clk);
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= (2 << 3);
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= (1 << 3);
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= (3 << 3);
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= (3 << 3) | (1 << 7);
+		break;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= (1 << 7);
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= (1 << 8);
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= (1 << 8) | (1 << 7);
+		break;
+	}
+
+	wm8940_write(codec, WM8940_IFACE, iface);
+
+	return 0;
+}
+
+static int wm8940_i2s_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 iface = wm8940_read_reg_cache(codec, WM8940_IFACE) & 0xFD9F;
+	u16 addcntrl = wm8940_read_reg_cache(codec, WM8940_ADDCNTRL) & 0xFFF1;
+	u16 companding =  wm8940_read_reg_cache(codec,
+						WM8940_COMPANDINGCTL) & 0xFFDF;
+	int ret;
+
+	/* LoutR control */
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE
+	    && params_channels(params) == 2)
+		iface |= (1 << 9);
+
+	switch (params_rate(params)) {
+	case SNDRV_PCM_RATE_8000:
+		addcntrl |= (0x5 << 1);
+		break;
+	case SNDRV_PCM_RATE_11025:
+		addcntrl |= (0x4 << 1);
+		break;
+	case SNDRV_PCM_RATE_16000:
+		addcntrl |= (0x3 << 1);
+		break;
+	case SNDRV_PCM_RATE_22050:
+		addcntrl |= (0x2 << 1);
+		break;
+	case SNDRV_PCM_RATE_32000:
+		addcntrl |= (0x1 << 1);
+		break;
+	case SNDRV_PCM_RATE_44100:
+	case SNDRV_PCM_RATE_48000:
+		break;
+	}
+	ret = wm8940_write(codec, WM8940_ADDCNTRL, addcntrl);
+	if (ret)
+		goto error_ret;
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S8:
+		companding = companding | (1 << 5);
+		break;
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= (1 << 5);
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= (2 << 5);
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		iface |= (3 << 5);
+		break;
+	}
+	ret = wm8940_write(codec, WM8940_COMPANDINGCTL, companding);
+	if (ret)
+		goto error_ret;
+	ret = wm8940_write(codec, WM8940_IFACE, iface);
+
+error_ret:
+	return ret;
+}
+
+static int wm8940_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u16 mute_reg = wm8940_read_reg_cache(codec, WM8940_DAC) & 0xffbf;
+
+	if (mute)
+		mute_reg |= 0x40;
+
+	return wm8940_write(codec, WM8940_DAC, mute_reg);
+}
+
+static int wm8940_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 val;
+	u16 pwr_reg = wm8940_read_reg_cache(codec, WM8940_POWER1) & 0x1F0;
+	int ret = 0;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		/* ensure bufioen and biasen */
+		pwr_reg |= (1 << 2) | (1 << 3);
+		/* Enable thermal shutdown */
+		val = wm8940_read_reg_cache(codec, WM8940_OUTPUTCTL);
+		ret = wm8940_write(codec, WM8940_OUTPUTCTL, val | 0x2);
+		if (ret)
+			break;
+		/* set vmid to 75k */
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x1);
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		/* ensure bufioen and biasen */
+		pwr_reg |= (1 << 2) | (1 << 3);
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x1);
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		/* ensure bufioen and biasen */
+		pwr_reg |= (1 << 2) | (1 << 3);
+		/* set vmid to 300k for standby */
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x2);
+		break;
+	case SND_SOC_BIAS_OFF:
+		ret = wm8940_write(codec, WM8940_POWER1, pwr_reg);
+		break;
+	}
+
+	return ret;
+}
+
+struct pll_ {
+	unsigned int pre_scale:2;
+	unsigned int n:4;
+	unsigned int k;
+};
+
+static struct pll_ pll_div;
+
+/* The size in bits of the pll divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_PLL_SIZE ((1 << 24) * 10)
+static void pll_factors(unsigned int target, unsigned int source)
+{
+	unsigned long long Kpart;
+	unsigned int K, Ndiv, Nmod;
+	/* The left shift ist to avoid accuracy loss when right shifting */
+	Ndiv = target / source;
+
+	if (Ndiv > 12) {
+		source <<= 1;
+		/* Multiply by 2 */
+		pll_div.pre_scale = 0;
+		Ndiv = target / source;
+	} else if (Ndiv < 3) {
+		source >>= 2;
+		/* Divide by 4 */
+		pll_div.pre_scale = 3;
+		Ndiv = target / source;
+	} else if (Ndiv < 6) {
+		source >>= 1;
+		/* divide by 2 */
+		pll_div.pre_scale = 2;
+		Ndiv = target / source;
+	} else
+		pll_div.pre_scale = 1;
+
+	if ((Ndiv < 6) || (Ndiv > 12))
+		printk(KERN_WARNING
+			"WM8940 N value %d outwith recommended range!d\n",
+			Ndiv);
+
+	pll_div.n = Ndiv;
+	Nmod = target % source;
+	Kpart = FIXED_PLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, source);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	/* Check if we need to round */
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	K /= 10;
+
+	pll_div.k = K;
+}
+
+/* Untested at the moment */
+static int wm8940_set_dai_pll(struct snd_soc_dai *codec_dai,
+		int pll_id, unsigned int freq_in, unsigned int freq_out)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+
+	/* Turn off PLL */
+	reg = wm8940_read_reg_cache(codec, WM8940_POWER1);
+	wm8940_write(codec, WM8940_POWER1, reg & 0x1df);
+
+	if (freq_in == 0 || freq_out == 0) {
+		/* Clock CODEC directly from MCLK */
+		reg = wm8940_read_reg_cache(codec, WM8940_CLOCK);
+		wm8940_write(codec, WM8940_CLOCK, reg & 0x0ff);
+		/* Pll power down */
+		wm8940_write(codec, WM8940_PLLN, (1 << 7));
+		return 0;
+	}
+
+	/* Pll is followed by a frequency divide by 4 */
+	pll_factors(freq_out*4, freq_in);
+	if (pll_div.k)
+		wm8940_write(codec, WM8940_PLLN,
+			     (pll_div.pre_scale << 4) | pll_div.n | (1 << 6));
+	else /* No factional component */
+		wm8940_write(codec, WM8940_PLLN,
+			     (pll_div.pre_scale << 4) | pll_div.n);
+	wm8940_write(codec, WM8940_PLLK1, pll_div.k >> 18);
+	wm8940_write(codec, WM8940_PLLK2, (pll_div.k >> 9) & 0x1ff);
+	wm8940_write(codec, WM8940_PLLK3, pll_div.k & 0x1ff);
+	/* Enable the PLL */
+	reg = wm8940_read_reg_cache(codec, WM8940_POWER1);
+	wm8940_write(codec, WM8940_POWER1, reg | 0x020);
+
+	/* Run CODEC from PLL instead of MCLK */
+	reg = wm8940_read_reg_cache(codec, WM8940_CLOCK);
+	wm8940_write(codec, WM8940_CLOCK, reg | 0x100);
+
+	return 0;
+}
+
+static int wm8940_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+				 int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm8940_priv *wm8940 = codec->private_data;
+
+	switch (freq) {
+	case 11289600:
+	case 12000000:
+	case 12288000:
+	case 16934400:
+	case 18432000:
+		wm8940->sysclk = freq;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int wm8940_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
+				 int div_id, int div)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 reg;
+	int ret = 0;
+
+	switch (div_id) {
+	case WM8940_BCLKDIV:
+		reg = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0xFFEF3;
+		ret = wm8940_write(codec, WM8940_CLOCK, reg | (div << 2));
+		break;
+	case WM8940_MCLKDIV:
+		reg = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0xFF1F;
+		ret = wm8940_write(codec, WM8940_CLOCK, reg | (div << 5));
+		break;
+	case WM8940_OPCLKDIV:
+		reg = wm8940_read_reg_cache(codec, WM8940_ADDCNTRL) & 0xFFCF;
+		ret = wm8940_write(codec, WM8940_ADDCNTRL, reg | (div << 4));
+		break;
+	}
+	return ret;
+}
+
+#define WM8940_RATES SNDRV_PCM_RATE_8000_48000
+
+#define WM8940_FORMATS (SNDRV_PCM_FMTBIT_S8 |				\
+			SNDRV_PCM_FMTBIT_S16_LE |			\
+			SNDRV_PCM_FMTBIT_S20_3LE |			\
+			SNDRV_PCM_FMTBIT_S24_LE |			\
+			SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops wm8940_dai_ops = {
+	.hw_params = wm8940_i2s_hw_params,
+	.set_sysclk = wm8940_set_dai_sysclk,
+	.digital_mute = wm8940_mute,
+	.set_fmt = wm8940_set_dai_fmt,
+	.set_clkdiv = wm8940_set_dai_clkdiv,
+	.set_pll = wm8940_set_dai_pll,
+};
+
+struct snd_soc_dai wm8940_dai = {
+	.name = "WM8940",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8940_RATES,
+		.formats = WM8940_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8940_RATES,
+		.formats = WM8940_FORMATS,
+	},
+	.ops = &wm8940_dai_ops,
+	.symmetric_rates = 1,
+};
+EXPORT_SYMBOL_GPL(wm8940_dai);
+
+static int wm8940_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	return wm8940_set_bias_level(codec, SND_SOC_BIAS_OFF);
+}
+
+static int wm8940_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int i;
+	int ret;
+	u8 data[3];
+	u16 *cache = codec->reg_cache;
+
+	/* Sync reg_cache with the hardware
+	 * Could use auto incremented writes to speed this up
+	 */
+	for (i = 0; i < ARRAY_SIZE(wm8940_reg_defaults); i++) {
+		data[0] = i;
+		data[1] = (cache[i] & 0xFF00) >> 8;
+		data[2] = cache[i] & 0x00FF;
+		ret = codec->hw_write(codec->control_data, data, 3);
+		if (ret < 0)
+			goto error_ret;
+		else if (ret != 3) {
+			ret = -EIO;
+			goto error_ret;
+		}
+	}
+	ret = wm8940_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	if (ret)
+		goto error_ret;
+	ret = wm8940_set_bias_level(codec, codec->suspend_bias_level);
+
+error_ret:
+	return ret;
+}
+
+static struct snd_soc_codec *wm8940_codec;
+
+static int wm8940_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+
+	int ret = 0;
+
+	if (wm8940_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm8940_codec;
+	codec = wm8940_codec;
+
+	mutex_init(&codec->mutex);
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	ret = snd_soc_add_controls(codec, wm8940_snd_controls,
+			     ARRAY_SIZE(wm8940_snd_controls));
+	if (ret)
+		goto error_free_pcms;
+	ret = wm8940_add_widgets(codec);
+	if (ret)
+		goto error_free_pcms;
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto error_free_pcms;
+	}
+
+	return ret;
+
+error_free_pcms:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm8940_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_wm8940 = {
+	.probe = wm8940_probe,
+	.remove = wm8940_remove,
+	.suspend = wm8940_suspend,
+	.resume = wm8940_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8940);
+
+static int wm8940_register(struct wm8940_priv *wm8940)
+{
+	struct wm8940_setup_data *pdata = wm8940->codec.dev->platform_data;
+	struct snd_soc_codec *codec = &wm8940->codec;
+	int ret;
+	u16 reg;
+	if (wm8940_codec) {
+		dev_err(codec->dev, "Another WM8940 is registered\n");
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm8940;
+	codec->name = "WM8940";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8940_read_reg_cache;
+	codec->write = wm8940_write;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8940_set_bias_level;
+	codec->dai = &wm8940_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm8940_reg_defaults);
+	codec->reg_cache = &wm8940->reg_cache;
+
+	memcpy(codec->reg_cache, wm8940_reg_defaults,
+	       sizeof(wm8940_reg_defaults));
+
+	ret = wm8940_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm8940_dai.dev = codec->dev;
+
+	wm8940_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	ret = wm8940_write(codec, WM8940_POWER1, 0x180);
+	if (ret < 0)
+		return ret;
+
+	if (!pdata)
+		dev_warn(codec->dev, "No platform data supplied\n");
+	else {
+		reg = wm8940_read_reg_cache(codec, WM8940_OUTPUTCTL);
+		ret = wm8940_write(codec, WM8940_OUTPUTCTL, reg | pdata->vroi);
+		if (ret < 0)
+			return ret;
+	}
+
+
+	wm8940_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm8940_dai);
+	if (ret) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void wm8940_unregister(struct wm8940_priv *wm8940)
+{
+	wm8940_set_bias_level(&wm8940->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm8940_dai);
+	snd_soc_unregister_codec(&wm8940->codec);
+	kfree(wm8940);
+	wm8940_codec = NULL;
+}
+
+static int wm8940_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm8940_priv *wm8940;
+	struct snd_soc_codec *codec;
+
+	wm8940 = kzalloc(sizeof *wm8940, GFP_KERNEL);
+	if (wm8940 == NULL)
+		return -ENOMEM;
+
+	codec = &wm8940->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+	i2c_set_clientdata(i2c, wm8940);
+	codec->control_data = i2c;
+	codec->dev = &i2c->dev;
+
+	return wm8940_register(wm8940);
+}
+
+static int wm8940_i2c_remove(struct i2c_client *client)
+{
+	struct wm8940_priv *wm8940 = i2c_get_clientdata(client);
+
+	wm8940_unregister(wm8940);
+
+	return 0;
+}
+
+static const struct i2c_device_id wm8940_i2c_id[] = {
+	{ "wm8940", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8940_i2c_id);
+
+static struct i2c_driver wm8940_i2c_driver = {
+	.driver = {
+		.name = "WM8940 I2C Codec",
+		.owner = THIS_MODULE,
+	},
+	.probe = wm8940_i2c_probe,
+	.remove = __devexit_p(wm8940_i2c_remove),
+	.id_table = wm8940_i2c_id,
+};
+
+static int __init wm8940_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm8940_i2c_driver);
+	if (ret)
+		printk(KERN_ERR "Failed to register WM8940 I2C driver: %d\n",
+		       ret);
+	return ret;
+}
+module_init(wm8940_modinit);
+
+static void __exit wm8940_exit(void)
+{
+	i2c_del_driver(&wm8940_i2c_driver);
+}
+module_exit(wm8940_exit);
+
+MODULE_DESCRIPTION("ASoC WM8940 driver");
+MODULE_AUTHOR("Jonathan Cameron");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm8940.h b/sound/soc/codecs/wm8940.h
new file mode 100644
index 0000000..8410eed
--- /dev/null
+++ b/sound/soc/codecs/wm8940.h
@@ -0,0 +1,104 @@
+/*
+ * wm8940.h -- WM8940 Soc Audio driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _WM8940_H
+#define _WM8940_H
+
+struct wm8940_setup_data {
+	/* Vref to analogue output resistance */
+#define WM8940_VROI_1K 0
+#define WM8940_VROI_30K 1
+	unsigned int vroi:1;
+};
+extern struct snd_soc_dai wm8940_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8940;
+
+/* WM8940 register space */
+#define WM8940_SOFTRESET	0x00
+#define WM8940_POWER1		0x01
+#define WM8940_POWER2		0x02
+#define WM8940_POWER3		0x03
+#define WM8940_IFACE		0x04
+#define WM8940_COMPANDINGCTL	0x05
+#define WM8940_CLOCK		0x06
+#define WM8940_ADDCNTRL		0x07
+#define WM8940_GPIO		0x08
+#define WM8940_CTLINT		0x09
+#define WM8940_DAC		0x0A
+#define WM8940_DACVOL		0x0B
+
+#define WM8940_ADC		0x0E
+#define WM8940_ADCVOL		0x0F
+#define WM8940_NOTCH1		0x10
+#define WM8940_NOTCH2		0x11
+#define WM8940_NOTCH3		0x12
+#define WM8940_NOTCH4		0x13
+#define WM8940_NOTCH5		0x14
+#define WM8940_NOTCH6		0x15
+#define WM8940_NOTCH7		0x16
+#define WM8940_NOTCH8		0x17
+#define WM8940_DACLIM1		0x18
+#define WM8940_DACLIM2		0x19
+
+#define WM8940_ALC1		0x20
+#define WM8940_ALC2		0x21
+#define WM8940_ALC3		0x22
+#define WM8940_NOISEGATE	0x23
+#define WM8940_PLLN		0x24
+#define WM8940_PLLK1		0x25
+#define WM8940_PLLK2		0x26
+#define WM8940_PLLK3		0x27
+
+#define WM8940_ALC4		0x2A
+
+#define WM8940_INPUTCTL		0x2C
+#define WM8940_PGAGAIN		0x2D
+
+#define WM8940_ADCBOOST		0x2F
+
+#define WM8940_OUTPUTCTL	0x31
+#define WM8940_SPKMIX		0x32
+
+#define WM8940_SPKVOL		0x36
+
+#define WM8940_MONOMIX		0x38
+
+#define WM8940_CACHEREGNUM  0x57
+
+
+/* Clock divider Id's */
+#define WM8940_BCLKDIV 0
+#define WM8940_MCLKDIV 1
+#define WM8940_OPCLKDIV 2
+
+/* MCLK clock dividers */
+#define WM8940_MCLKDIV_1	0
+#define WM8940_MCLKDIV_1_5	1
+#define WM8940_MCLKDIV_2	2
+#define WM8940_MCLKDIV_3	3
+#define WM8940_MCLKDIV_4	4
+#define WM8940_MCLKDIV_6	5
+#define WM8940_MCLKDIV_8	6
+#define WM8940_MCLKDIV_12	7
+
+/* BCLK clock dividers */
+#define WM8940_BCLKDIV_1 0
+#define WM8940_BCLKDIV_2 1
+#define WM8940_BCLKDIV_4 2
+#define WM8940_BCLKDIV_8 3
+#define WM8940_BCLKDIV_16 4
+#define WM8940_BCLKDIV_32 5
+
+/* PLL Out Dividers */
+#define WM8940_OPCLKDIV_1 0
+#define WM8940_OPCLKDIV_2 1
+#define WM8940_OPCLKDIV_3 2
+#define WM8940_OPCLKDIV_4 3
+
+#endif /* _WM8940_H */
+
-- 
cgit v0.10.2


From 9c935386512a3faa1be1c3d81cba38b7259a43f5 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Fri, 24 Apr 2009 15:00:25 +0200
Subject: ASoC: cs4270: fix Master Capture Switch polarity

The control modifies the MUTE register, hence the polarity must be
inverted.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-By: Timur Tabi <timur@freescale.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 7fa09a3..3c34fe6 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -486,7 +486,7 @@ static const struct snd_kcontrol_new cs4270_snd_controls[] = {
 	SOC_SINGLE("Zero Cross Switch", CS4270_TRANS, 5, 1, 0),
 	SOC_SINGLE("Popguard Switch", CS4270_MODE, 0, 1, 1),
 	SOC_SINGLE("Auto-Mute Switch", CS4270_MUTE, 5, 1, 0),
-	SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 0)
+	SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1)
 };
 
 /*
-- 
cgit v0.10.2


From 1a4ba05ec8369d62c10155a8931e81267bfbd31c Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Fri, 24 Apr 2009 16:37:45 +0200
Subject: ASoC: cs4270: add Master Playback Switch

This adds a new control named 'Master Playback Switch' for cs4270
codecs. It is implemented using the new SOC_DOUBLE_EXT macro to catch
the put function and store the information about manually set mute
controls from userspace. When a manual mute is set, we don't want the
soc core to un-mute the outputs.

Renamed cs4270_mute() to cs4270_dai_mute() to avoid confusion.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 3c34fe6..ece6ed6 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -109,6 +109,7 @@ struct cs4270_private {
 	unsigned int mclk; /* Input frequency of the MCLK pin */
 	unsigned int mode; /* The mode (I2S or left-justified) */
 	unsigned int slave_mode;
+	unsigned int manual_mute;
 };
 
 /**
@@ -453,7 +454,7 @@ static int cs4270_hw_params(struct snd_pcm_substream *substream,
 }
 
 /**
- * cs4270_mute - enable/disable the CS4270 external mute
+ * cs4270_dai_mute - enable/disable the CS4270 external mute
  * @dai: the SOC DAI
  * @mute: 0 = disable mute, 1 = enable mute
  *
@@ -462,21 +463,52 @@ static int cs4270_hw_params(struct snd_pcm_substream *substream,
  * board does not have the MUTEA or MUTEB pins connected to such circuitry,
  * then this function will do nothing.
  */
-static int cs4270_mute(struct snd_soc_dai *dai, int mute)
+static int cs4270_dai_mute(struct snd_soc_dai *dai, int mute)
 {
 	struct snd_soc_codec *codec = dai->codec;
+	struct cs4270_private *cs4270 = codec->private_data;
 	int reg6;
 
 	reg6 = snd_soc_read(codec, CS4270_MUTE);
 
 	if (mute)
 		reg6 |= CS4270_MUTE_DAC_A | CS4270_MUTE_DAC_B;
-	else
+	else {
 		reg6 &= ~(CS4270_MUTE_DAC_A | CS4270_MUTE_DAC_B);
+		reg6 |= cs4270->manual_mute;
+	}
 
 	return snd_soc_write(codec, CS4270_MUTE, reg6);
 }
 
+/**
+ * cs4270_soc_put_mute - put callback for the 'Master Playback switch'
+ * 			 alsa control.
+ * @kcontrol: mixer control
+ * @ucontrol: control element information
+ *
+ * This function basically passes the arguments on to the generic
+ * snd_soc_put_volsw() function and saves the mute information in
+ * our private data structure. This is because we want to prevent
+ * cs4270_dai_mute() neglecting the user's decision to manually
+ * mute the codec's output.
+ *
+ * Returns 0 for success.
+ */
+static int cs4270_soc_put_mute(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct cs4270_private *cs4270 = codec->private_data;
+	int left = !ucontrol->value.integer.value[0];
+	int right = !ucontrol->value.integer.value[1];
+
+	cs4270->manual_mute = (left ? CS4270_MUTE_DAC_A : 0) |
+			      (right ? CS4270_MUTE_DAC_B : 0);
+
+	return snd_soc_put_volsw(kcontrol, ucontrol);
+}
+
 /* A list of non-DAPM controls that the CS4270 supports */
 static const struct snd_kcontrol_new cs4270_snd_controls[] = {
 	SOC_DOUBLE_R("Master Playback Volume",
@@ -486,7 +518,9 @@ static const struct snd_kcontrol_new cs4270_snd_controls[] = {
 	SOC_SINGLE("Zero Cross Switch", CS4270_TRANS, 5, 1, 0),
 	SOC_SINGLE("Popguard Switch", CS4270_MODE, 0, 1, 1),
 	SOC_SINGLE("Auto-Mute Switch", CS4270_MUTE, 5, 1, 0),
-	SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1)
+	SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1),
+	SOC_DOUBLE_EXT("Master Playback Switch", CS4270_MUTE, 0, 1, 1, 1,
+		snd_soc_get_volsw, cs4270_soc_put_mute),
 };
 
 /*
@@ -506,7 +540,7 @@ static struct snd_soc_dai_ops cs4270_dai_ops = {
 	.hw_params	= cs4270_hw_params,
 	.set_sysclk	= cs4270_set_dai_sysclk,
 	.set_fmt	= cs4270_set_dai_fmt,
-	.digital_mute	= cs4270_mute,
+	.digital_mute	= cs4270_dai_mute,
 };
 
 struct snd_soc_dai cs4270_dai = {
-- 
cgit v0.10.2


From 6be01cfb854818298753bfce65543dbc81d51d5a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Apr 2009 20:57:42 +0100
Subject: ASoC: Staticise TLV values in WM8940

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
index 26987dc..a66dacc 100644
--- a/sound/soc/codecs/wm8940.c
+++ b/sound/soc/codecs/wm8940.c
@@ -168,16 +168,16 @@ static const char *wm8940_filter_mode_text[] = {"Audio", "Application"};
 static const struct soc_enum wm8940_filter_mode_enum
 = SOC_ENUM_SINGLE(WM8940_ADC, 7, 2, wm8940_filter_mode_text);
 
-DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1);
-DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0);
-DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0);
-DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0);
-DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0);
-DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0);
-DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0);
-DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0);
-DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1);
-DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1);
+static DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0);
+static DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1);
+static DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0);
 
 static const struct snd_kcontrol_new wm8940_snd_controls[] = {
 	SOC_SINGLE("Digital Loopback Switch", WM8940_COMPANDINGCTL,
@@ -253,7 +253,7 @@ static const struct snd_kcontrol_new wm8940_mono_mixer_controls[] = {
 	SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_MONOMIX, 0, 1, 0),
 };
 
-DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1);
+static DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1);
 static const struct snd_kcontrol_new wm8940_input_boost_controls[] = {
 	SOC_DAPM_SINGLE("Mic PGA Switch", WM8940_PGAGAIN, 6, 1, 1),
 	SOC_DAPM_SINGLE_TLV("Aux Volume", WM8940_ADCBOOST,
-- 
cgit v0.10.2


From ac2ff946a53e7bd0ae98f4e5d1d6c1b1dced82e5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 12:38:32 +0900
Subject: mg_disk: fix locking

IRQ and timeout handlers call functions which expect locked queue lock
without locking it.  Fix it.

While at it, convert 0s used as null pointer constant to NULLs.

[ Impact: fix locking, cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: unsik Kim <donari75@gmail.com>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index fb39d9a..d3e72ad 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -160,11 +160,16 @@ static irqreturn_t mg_irq(int irq, void *dev_id)
 	struct mg_host *host = dev_id;
 	void (*handler)(struct mg_host *) = host->mg_do_intr;
 
-	host->mg_do_intr = 0;
+	spin_lock(&host->lock);
+
+	host->mg_do_intr = NULL;
 	del_timer(&host->timer);
 	if (!handler)
 		handler = mg_unexpected_intr;
 	handler(host);
+
+	spin_unlock(&host->lock);
+
 	return IRQ_HANDLED;
 }
 
@@ -319,7 +324,7 @@ static void mg_read(struct request *req)
 
 	remains = req->nr_sectors;
 
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, 0) !=
+	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) !=
 			MG_ERR_NONE)
 		mg_bad_rw_intr(host);
 
@@ -363,7 +368,7 @@ static void mg_write(struct request *req)
 
 	remains = req->nr_sectors;
 
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, 0) !=
+	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) !=
 			MG_ERR_NONE) {
 		mg_bad_rw_intr(host);
 		return;
@@ -521,9 +526,11 @@ void mg_times_out(unsigned long data)
 	char *name;
 	struct request *req;
 
+	spin_lock_irq(&host->lock);
+
 	req = elv_next_request(host->breq);
 	if (!req)
-		return;
+		goto out_unlock;
 
 	host->mg_do_intr = NULL;
 
@@ -534,6 +541,8 @@ void mg_times_out(unsigned long data)
 	mg_bad_rw_intr(host);
 
 	mg_request(host->breq);
+out_unlock:
+	spin_unlock_irq(&host->lock);
 }
 
 static void mg_request_poll(struct request_queue *q)
-- 
cgit v0.10.2


From 7090a0a97f55cbf47547a140fcc5a349f32c598c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 28 Apr 2009 12:38:33 +0900
Subject: mg_disk: fix CONFIG_LBD=y warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/block/mg_disk.c: In function ‘mg_dump_status’:
drivers/block/mg_disk.c:265: warning: format ‘%ld’ expects type ‘long int’, but
argument 2 has type ‘sector_t’

[ Impact: kill build warning ]

Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index d3e72ad..f389835 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -79,7 +79,7 @@ static void mg_dump_status(const char *msg, unsigned int stat,
 			if (host->breq) {
 				req = elv_next_request(host->breq);
 				if (req)
-					printk(", sector=%ld", req->sector);
+					printk(", sector=%u", (u32)req->sector);
 			}
 
 		}
-- 
cgit v0.10.2


From e93b9fb7d85da4fd9d5171649e5ddcac1dd572bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 12:38:33 +0900
Subject: hd: fix locking

hd dance around local irq and HD_IRQ enable without achieving much.
It ends up transferring data from irq handler with both local irq and
HD_IRQ disabled.  The only place it actually does something is while
transferring the first block of a request which it does with HD_IRQ
disabled but local irq enabled.

Unfortunately, the dancing is horribly broken from locking POV.  IRQ
and timeout handlers access block queue without grabbing the queue
lock and running the driver in SMP configuration crashes the whole
machine pretty quickly.

Remove meaningless irq enable/disable dancing and add proper locking
in issue, irq and timeout paths.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 3c11f06..baaa9e4 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -509,7 +509,6 @@ ok_to_write:
 	if (i > 0) {
 		SET_HANDLER(&write_intr);
 		outsw(HD_DATA, req->buffer, 256);
-		local_irq_enable();
 	} else {
 #if (HD_DELAY > 0)
 		last_req = read_timer();
@@ -541,8 +540,7 @@ static void hd_times_out(unsigned long dummy)
 	if (!CURRENT)
 		return;
 
-	disable_irq(HD_IRQ);
-	local_irq_enable();
+	spin_lock_irq(hd_queue->queue_lock);
 	reset = 1;
 	name = CURRENT->rq_disk->disk_name;
 	printk("%s: timeout\n", name);
@@ -552,9 +550,8 @@ static void hd_times_out(unsigned long dummy)
 #endif
 		end_request(CURRENT, 0);
 	}
-	local_irq_disable();
 	hd_request();
-	enable_irq(HD_IRQ);
+	spin_unlock_irq(hd_queue->queue_lock);
 }
 
 static int do_special_op(struct hd_i_struct *disk, struct request *req)
@@ -592,7 +589,6 @@ static void hd_request(void)
 		return;
 repeat:
 	del_timer(&device_timer);
-	local_irq_enable();
 
 	req = CURRENT;
 	if (!req) {
@@ -601,7 +597,6 @@ repeat:
 	}
 
 	if (reset) {
-		local_irq_disable();
 		reset_hd();
 		return;
 	}
@@ -660,9 +655,7 @@ repeat:
 
 static void do_hd_request(struct request_queue *q)
 {
-	disable_irq(HD_IRQ);
 	hd_request();
-	enable_irq(HD_IRQ);
 }
 
 static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -684,12 +677,16 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id)
 {
 	void (*handler)(void) = do_hd;
 
+	spin_lock(hd_queue->queue_lock);
+
 	do_hd = NULL;
 	del_timer(&device_timer);
 	if (!handler)
 		handler = unexpected_hd_interrupt;
 	handler();
-	local_irq_enable();
+
+	spin_unlock(hd_queue->queue_lock);
+
 	return IRQ_HANDLED;
 }
 
-- 
cgit v0.10.2


From e686307fdc84f249490e6c9da92fcb2424491f14 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 17 Apr 2009 08:41:21 +0200
Subject: loop: use BIO list management functions

Now that the bio list management stuff is generic, convert loop to use
bio lists instead of its own private bio list implementation.

Cc:  Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ddae808..9ca4bb0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -511,11 +511,7 @@ out:
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
-	if (lo->lo_biotail) {
-		lo->lo_biotail->bi_next = bio;
-		lo->lo_biotail = bio;
-	} else
-		lo->lo_bio = lo->lo_biotail = bio;
+	bio_list_add(&lo->lo_bio_list, bio);
 }
 
 /*
@@ -523,16 +519,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
-	struct bio *bio;
-
-	if ((bio = lo->lo_bio)) {
-		if (bio == lo->lo_biotail)
-			lo->lo_biotail = NULL;
-		lo->lo_bio = bio->bi_next;
-		bio->bi_next = NULL;
-	}
-
-	return bio;
+	return bio_list_pop(&lo->lo_bio_list);
 }
 
 static int loop_make_request(struct request_queue *q, struct bio *old_bio)
@@ -609,12 +596,13 @@ static int loop_thread(void *data)
 
 	set_user_nice(current, -20);
 
-	while (!kthread_should_stop() || lo->lo_bio) {
+	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
 
 		wait_event_interruptible(lo->lo_event,
-				lo->lo_bio || kthread_should_stop());
+				!bio_list_empty(&lo->lo_bio_list) ||
+				kthread_should_stop());
 
-		if (!lo->lo_bio)
+		if (bio_list_empty(&lo->lo_bio_list))
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
@@ -841,7 +829,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
-	lo->lo_bio = lo->lo_biotail = NULL;
+	bio_list_init(&lo->lo_bio_list);
 
 	/*
 	 * set queue make_request_fn, and add limits based on lower level
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7b214fd..f37ca8c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -506,7 +506,7 @@ static inline int bio_has_data(struct bio *bio)
 }
 
 /*
- * BIO list managment for use by remapping drivers (e.g. DM or MD).
+ * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
  *
  * A bio_list anchors a singly-linked list of bios chained through the bi_next
  * member of the bio.  The bio_list also caches the last list member to allow
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 4072544..66c194e 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -56,8 +56,7 @@ struct loop_device {
 	gfp_t		old_gfp_mask;
 
 	spinlock_t		lo_lock;
-	struct bio 		*lo_bio;
-	struct bio		*lo_biotail;
+	struct bio_list		lo_bio_list;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
-- 
cgit v0.10.2


From 924cec7789f65ab7f022256f6533ecba0747b5f3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: block: clear req->errors on bio completion only for fs requests

Impact: subtle behavior change

For fs requests, rq is only carrier of bios and rq error status as a
whole doesn't mean much.  This is the reason why rq->errors is being
cleared on each partial completion of a request as on each partial
completion the error status is transferred to the respective bios.

For pc requests, rq->errors is used to carry error status to the
issuer and thus __end_that_request_first() doesn't clear it on such
cases.

The condition was fine till now as only fs and pc requests have used
bio and thus the bio completion path.  However, future changes will
unify data accesses to bio and all non fs users care about rq error
status.  Clear rq->errors on bio completion only for fs requests.

In general, the implicit clearing is a bit too subtle especially as
the meaning of rq->errors is completely dependent on low level
drivers.  Unifying / cleaning up rq->errors usage and letting llds
manage it would be better.  TODO comment added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jens Axboe <axboe@kernel.dk>

diff --git a/block/blk-core.c b/block/blk-core.c
index 2998fe3..41bc0ff 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1741,10 +1741,14 @@ static int __end_that_request_first(struct request *req, int error,
 	trace_block_rq_complete(req->q, req);
 
 	/*
-	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
-	 * sense key with us all the way through
+	 * For fs requests, rq is just carrier of independent bio's
+	 * and each partial completion should be handled separately.
+	 * Reset per-request error on each partial completion.
+	 *
+	 * TODO: tj: This is too subtle.  It would be better to let
+	 * low level drivers do what they see fit.
 	 */
-	if (!blk_pc_request(req))
+	if (blk_fs_request(req))
 		req->errors = 0;
 
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
-- 
cgit v0.10.2


From 0de57fb93b1daaeaecb658a98b3299ae460c02e9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide-tape: remove back-to-back REQUEST_SENSE detection

Impact: fix an oops which always triggers

ide_tape_issue_pc() assumed drive->pc isn't NULL on invocation when
checking for back-to-back request sense issues but drive->pc can be
NULL and even when it's not NULL, it's not safe to dereference it once
the previous command is complete because pc could have been freed or
was on stack.  Kill back-to-back REQUEST_SENSE detection.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index cb942a9..3a53e08 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -614,12 +614,6 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 {
 	idetape_tape_t *tape = drive->driver_data;
 
-	if (drive->pc->c[0] == REQUEST_SENSE &&
-	    pc->c[0] == REQUEST_SENSE) {
-		printk(KERN_ERR "ide-tape: possible ide-tape.c bug - "
-			"Two request sense in serial were issued\n");
-	}
-
 	if (drive->failed_pc == NULL && pc->c[0] != REQUEST_SENSE)
 		drive->failed_pc = pc;
 
-- 
cgit v0.10.2


From 220d06b5531e7b8a6226b2fdfb21198c3ccc4f76 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide: use blk_run_queue() instead of blk_start_queueing()

blk_start_queueing() is being phased out in favor of
[__]blk_run_queue().  Switch.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 310d03f..a914023 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 			start_queue = 1;
 		spin_unlock_irq(&hwif->lock);
 
-		if (start_queue) {
-			spin_lock_irq(q->queue_lock);
-			blk_start_queueing(q);
-			spin_unlock_irq(q->queue_lock);
-		}
+		if (start_queue)
+			blk_run_queue(q);
 		return;
 	}
 	spin_unlock_irq(&hwif->lock);
-- 
cgit v0.10.2


From b2963ac1738542d30305d7e12c8c078a383a425c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide: don't set REQ_SOFTBARRIER

ide doesn't have to worry about REQ_SOFTBARRIER.  Don't set it.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c..c243880 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 	cmd->protocol = ATA_PROT_NODATA;
 
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->special = cmd;
 }
 
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index c1c25eb..5991b23 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive)
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_len = 1;
 	rq->cmd[0] = REQ_DRIVE_RESET;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 	if (blk_execute_rq(drive->queue, NULL, rq, 1))
 		ret = rq->errors;
 	blk_put_request(rq);
-- 
cgit v0.10.2


From 214ae19104141404f18ad6651752eb38e3dd584e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide kill unused ide_cmd->special

Impact: removal of unused field

No one uses ide_cmd->special anymore.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/include/linux/ide.h b/include/linux/ide.h
index ff65fff..846a1e1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -324,7 +324,6 @@ struct ide_cmd {
 	unsigned int		cursg_ofs;
 
 	struct request		*rq;		/* copy of request */
-	void			*special;	/* valid_t generally */
 };
 
 /* ATAPI packet command flags */
-- 
cgit v0.10.2


From 59a4f6f355fc718581ddcf1bb45a469d4756c035 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide-cd: clear sense buffer before issuing request sense

Impact: code simplification

ide_cd_request_sense_fixup() clears the tail of the sense buffer if
the device didn't completely fill it.  This patch makes
cdrom_queue_request_sense() clear the sense buffer before issuing the
command instead of clearing it afterwards.  This simplifies code and
eases future changes.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3d4e099..b6a0d12 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -217,6 +217,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	if (sense == NULL)
 		sense = &info->sense_data;
 
+	memset(sense, 0, 18);
+
 	/* stuff the sense request in front of our current request */
 	blk_rq_init(NULL, rq);
 	rq->cmd_type = REQ_TYPE_ATA_PC;
@@ -504,14 +506,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 	 * and some drives don't send them.  Sigh.
 	 */
 	if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
-	    cmd->nleft > 0 && cmd->nleft <= 5) {
-		unsigned int ofs = cmd->nbytes - cmd->nleft;
-
-		while (cmd->nleft > 0) {
-			*((u8 *)rq->data + ofs++) = 0;
-			cmd->nleft--;
-		}
-	}
+	    cmd->nleft > 0 && cmd->nleft <= 5)
+		cmd->nleft = 0;
 }
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
-- 
cgit v0.10.2


From 8968932e54db35cf9d69cfbbd50c26dfaaa586c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-floppy: block pc always uses bio

Impact: remove unnecessary code path

Block pc requests always use bio and rq->data is always NULL.  No need
to worry about !rq->bio cases in idefloppy_block_pc_cmd().  Note that
ide-atapi uses ide_pio_bytes() for bio PIO transfer which handle sg
fine.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 2b4868d..3b22e06 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -216,15 +216,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 	ide_init_pc(pc);
 	memcpy(pc->c, rq->cmd, sizeof(pc->c));
 	pc->rq = rq;
-	if (rq->data_len && rq_data_dir(rq) == WRITE)
-		pc->flags |= PC_FLAG_WRITING;
-	pc->buf = rq->data;
-	if (rq->bio)
+	if (rq->data_len) {
 		pc->flags |= PC_FLAG_DMA_OK;
-	/*
-	 * possibly problematic, doesn't look like ide-floppy correctly
-	 * handled scattered requests if dma fails...
-	 */
+		if (rq_data_dir(rq) == WRITE)
+			pc->flags |= PC_FLAG_WRITING;
+	}
+	/* pio will be performed by ide_pio_bytes() which handles sg fine */
+	pc->buf = NULL;
 	pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
-- 
cgit v0.10.2


From d868ca24302e99a0e8a86071ca2c66273edf97d9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-taskfile: don't abuse rq->buffer

Impact: rq->buffer usage cleanup

ide_raw_taskfile() directly uses rq->buffer to carry pointer to the
data buffer.  This complicates both block interface and ide backend
request handling.  Use blk_rq_map_kern() instead and drop special
handling for REQ_TYPE_ATA_TASKFILE from ide_map_sg().

Note that REQ_RW setting is moved upwards as blk_rq_map_kern() uses it
to initialize bio rw flag.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 35dc38d..9b9e8b1 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -248,10 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 	struct scatterlist *sg = hwif->sg_table;
 	struct request *rq = cmd->rq;
 
-	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
-		cmd->sg_nents = 1;
-	} else if (!rq->bio) {
+	if (!rq->bio) {
 		sg_init_one(sg, rq->data, rq->data_len);
 		cmd->sg_nents = 1;
 	} else
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 4aa6223..f400eb4 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-	rq->buffer = buf;
+
+	if (cmd->tf_flags & IDE_TFLAG_WRITE)
+		rq->cmd_flags |= REQ_RW;
 
 	/*
 	 * (ks) We transfer currently only whole sectors.
@@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	 * if we would find a solution to transfer any size.
 	 * To support special commands like READ LONG.
 	 */
-	rq->hard_nr_sectors = rq->nr_sectors = nsect;
-	rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		rq->cmd_flags |= REQ_RW;
+	if (nsect) {
+		error = blk_rq_map_kern(drive->queue, rq, buf,
+					nsect * SECTOR_SIZE, __GFP_WAIT);
+		if (error)
+			goto put_req;
+	}
 
 	rq->special = cmd;
 	cmd->rq = rq;
 
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
-	blk_put_request(rq);
 
+put_req:
+	blk_put_request(rq);
 	return error;
 }
 
-- 
cgit v0.10.2


From ac0b0113ddbab3ed2388132d368c97292f9f3c84 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-atapi: don't abuse rq->buffer

Impact: rq->buffer usage cleanup

ide-atapi uses rq->buffer as private opaque value for internal special
requests.  rq->special isn't used for these cases (the only case where
rq->special is used is for ide-tape rw requests).  Use rq->special
instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7201b17..2894577 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -90,7 +90,7 @@ static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
 	blk_rq_init(NULL, rq);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd_flags |= REQ_PREEMPT;
-	rq->buffer = (char *)pc;
+	rq->special = (char *)pc;
 	rq->rq_disk = disk;
 
 	if (pc->req_xfer) {
@@ -119,7 +119,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->buffer = (char *)pc;
+	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
 		rq->data = pc->buf;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3b22e06..9460033 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -264,7 +264,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
 	} else if (blk_special_request(rq)) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
 		idefloppy_blockpc_cmd(floppy, pc, rq);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 3a53e08..aadf53c 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -828,7 +828,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 		goto out;
 	}
 	if (rq->cmd[13] & REQ_IDETAPE_PC1) {
-		pc = (struct ide_atapi_pc *) rq->buffer;
+		pc = (struct ide_atapi_pc *)rq->special;
 		rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
 		rq->cmd[13] |= REQ_IDETAPE_PC2;
 		goto out;
-- 
cgit v0.10.2


From 1f181d2b1569dfb88a584a6e1847e9e1c7645951 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd: don't abuse rq->buffer

Impact: rq->buffer usage cleanup

ide-cd uses rq->buffer to carry pointer to the original request when
issuing REQUEST_SENSE.  Use rq->special instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index b6a0d12..bb77c79 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -232,8 +232,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	rq->cmd_type = REQ_TYPE_SENSE;
 	rq->cmd_flags |= REQ_PREEMPT;
 
-	/* NOTE! Save the failed command in "rq->buffer" */
-	rq->buffer = (void *) failed_command;
+	/* NOTE! Save the failed command in "rq->special" */
+	rq->special = (void *)failed_command;
 
 	if (failed_command)
 		ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
@@ -247,10 +247,10 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
-	 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+	 * For REQ_TYPE_SENSE, "rq->special" points to the original
 	 * failed request
 	 */
-	struct request *failed = (struct request *)rq->buffer;
+	struct request *failed = (struct request *)rq->special;
 	struct cdrom_info *info = drive->driver_data;
 	void *sense = &info->sense_data;
 
-- 
cgit v0.10.2


From e69d800f7e8797a8e3423380ee9d8ca1cb90c388 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide: add helpers for preparing sense requests

This is in preparation of removing the queueing of a sense request out
of the IRQ handler path.

Use struct request_sense as a general sense buffer for all ATAPI
devices ide-{floppy,tape,cd}.

tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as
      it can cause deadlock.  Converted to use inline struct request
      and blk_rq_init().

    * Added xfer / cdb len selection depending on device type.

    * All sense prep logics folded into ide_prep_sense() which never
      fails.

    * hwif->rq clearing and sense_rq used handling moved into
      ide_queue_sense_rq().

    * blk_rq_map_kern() conversion is moved to later patch.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2894577..c6e0348 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+	struct request_sense *sense = &drive->sense_data;
+	struct request *sense_rq = &drive->sense_rq;
+	unsigned int cmd_len, sense_len;
+
+	debug_log("%s: enter\n", __func__);
+
+	switch (drive->media) {
+	case ide_floppy:
+		cmd_len = 255;
+		sense_len = 18;
+		break;
+	case ide_tape:
+		cmd_len = 20;
+		sense_len = 20;
+		break;
+	default:
+		cmd_len = 18;
+		sense_len = 18;
+	}
+
+	BUG_ON(sense_len > sizeof(*sense));
+
+	if (blk_sense_request(rq) || drive->sense_rq_armed)
+		return;
+
+	memset(sense, 0, sizeof(*sense));
+
+	blk_rq_init(rq->q, sense_rq);
+	sense_rq->rq_disk = rq->rq_disk;
+
+	sense_rq->data = sense;
+	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+	sense_rq->cmd[4] = cmd_len;
+	sense_rq->data_len = sense_len;
+
+	sense_rq->cmd_type = REQ_TYPE_SENSE;
+	sense_rq->cmd_flags |= REQ_PREEMPT;
+
+	if (drive->media == ide_tape)
+		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+	drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+	BUG_ON(!drive->sense_rq_armed);
+
+	drive->sense_rq.special = special;
+	drive->sense_rq_armed = false;
+
+	drive->hwif->rq = NULL;
+
+	elv_add_request(drive->queue, &drive->sense_rq,
+			ELEVATOR_INSERT_FRONT, 0);
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
  * We queue a request sense packet command in the head of the request list.
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 846a1e1..a69ccac 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -602,6 +605,11 @@ struct ide_drive_s {
 
 	struct ide_atapi_pc request_sense_pc;
 	struct request request_sense_rq;
+
+	/* current sense rq and buffer */
+	bool sense_rq_armed;
+	struct request sense_rq;
+	struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+
 int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
-- 
cgit v0.10.2


From c457ce874a0f3dfa3d5e9f2309789f6f34e24325 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd: convert to using generic sense request

Preallocate a sense request in the ->do_request method and reinitialize
it only on demand, in case it's been consumed in the IRQ handler path.
The reason for this is that we don't want to be mapping rq to bio in
the IRQ path and introduce all kinds of unnecessary hacks to the block
layer.

tj: * Both user and kernel PC requests expect sense data to be stored
      in separate storage other than drive->sense_data.  Copy sense
      data to rq->sense on completion if rq->sense is not NULL.  This
      fixes bogus sense data on PC requests.

As a result, remove cdrom_queue_request_sense.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index bb77c79..061d7bb 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -206,44 +206,6 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
 	ide_cd_log_error(drive->name, failed_command, sense);
 }
 
-static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
-				      struct request *failed_command)
-{
-	struct cdrom_info *info		= drive->driver_data;
-	struct request *rq		= &drive->request_sense_rq;
-
-	ide_debug_log(IDE_DBG_SENSE, "enter");
-
-	if (sense == NULL)
-		sense = &info->sense_data;
-
-	memset(sense, 0, 18);
-
-	/* stuff the sense request in front of our current request */
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_ATA_PC;
-	rq->rq_disk = info->disk;
-
-	rq->data = sense;
-	rq->cmd[0] = GPCMD_REQUEST_SENSE;
-	rq->cmd[4] = 18;
-	rq->data_len = 18;
-
-	rq->cmd_type = REQ_TYPE_SENSE;
-	rq->cmd_flags |= REQ_PREEMPT;
-
-	/* NOTE! Save the failed command in "rq->special" */
-	rq->special = (void *)failed_command;
-
-	if (failed_command)
-		ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
-					     failed_command->cmd[0]);
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
@@ -251,11 +213,16 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 	 * failed request
 	 */
 	struct request *failed = (struct request *)rq->special;
-	struct cdrom_info *info = drive->driver_data;
-	void *sense = &info->sense_data;
+	struct request_sense *sense = &drive->sense_data;
 
 	if (failed) {
 		if (failed->sense) {
+			/*
+			 * Sense is always read into drive->sense_data.
+			 * Copy back if the failed request has its
+			 * sense pointer set.
+			 */
+			memcpy(failed->sense, sense, 18);
 			sense = failed->sense;
 			failed->sense_len = rq->sense_len;
 		}
@@ -431,7 +398,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		cdrom_queue_request_sense(drive, NULL, NULL);
+		ide_queue_sense_rq(drive, NULL);
 	return 1;
 
 end_request:
@@ -445,7 +412,7 @@ end_request:
 
 		hwif->rq = NULL;
 
-		cdrom_queue_request_sense(drive, rq->sense, rq);
+		ide_queue_sense_rq(drive, rq);
 		return 1;
 	} else
 		return 2;
@@ -893,6 +860,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		goto out_end;
 	}
 
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 1d97101..93a3cf1b 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -87,10 +87,6 @@ struct cdrom_info {
 
 	struct atapi_toc *toc;
 
-	/* The result of the last successful request sense command
-	   on this device. */
-	struct request_sense sense_data;
-
 	u8 max_speed;		/* Max speed of the drive. */
 	u8 current_speed;	/* Current speed of the drive. */
 
-- 
cgit v0.10.2


From 068753203e6cd085664a62e0fc0636e19b148a12 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense
 buffer

Since we're issuing REQ_TYPE_SENSE now we need to allow those types of
rqs in the ->do_request callbacks. As a future improvement, sense_len
assignment might be unified across all ATAPI devices. Borislav to
check with specs and test.

As a result, get rid of ide_queue_pc_head() and
drive->request_sense_rq.

tj: * Init request sense ide_atapi_pc from sense request.  In the
      longer timer, it would probably better to fold
      ide_create_request_sense_cmd() into its only current user -
      ide_floppy_get_format_progress().

    * ide_retry_pc() no longer takes @disk.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c6e0348..972c522 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc)
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
 /*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-			      struct ide_atapi_pc *pc, struct request *rq)
-{
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = (char *)pc;
-	rq->rq_disk = disk;
-
-	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
-	}
-
-	memcpy(rq->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
-/*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
  */
@@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-	struct request *rq = &drive->request_sense_rq;
+	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
-	ide_create_request_sense_cmd(drive, pc);
+
+	/* init pc from sense_rq */
+	ide_init_pc(pc);
+	memcpy(pc->c, sense_rq->cmd, 12);
+	pc->buf = sense_rq->data;
+	pc->req_xfer = sense_rq->data_len;
+
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-	ide_queue_pc_head(drive, disk, pc, rq);
+
+	ide_queue_sense_rq(drive, pc);
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
 			/* Retry operation */
-			ide_retry_pc(drive, rq->rq_disk);
+			ide_retry_pc(drive);
 
 			/* queued, but not started */
 			return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 9460033..d3302cc 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq)) {
+	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
 		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
@@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		goto out_end;
 	}
 
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index aadf53c..8324dfa 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
 				printk(KERN_ERR "ide-tape: %s: I/O error, ",
 						tape->name);
 			/* Retry operation */
-			ide_retry_pc(drive, tape->disk);
+			ide_retry_pc(drive);
 			return ide_stopped;
 		}
 		pc->error = 0;
@@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			(unsigned long long)rq->sector, rq->nr_sectors,
 			rq->current_nr_sectors);
 
-	if (!blk_special_request(rq)) {
+	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 			"request queue (%d)\n", drive->name, rq->cmd_type);
@@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	BUG();
 
 out:
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a69ccac..9e67cca 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -604,7 +604,6 @@ struct ide_drive_s {
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-	struct request request_sense_rq;
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
@@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
 void ide_queue_sense_rq(ide_drive_t *drive, void *special);
-- 
cgit v0.10.2


From 02e7cf8f848841ca21864ccd019e480b73c323b7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd,atapi: use bio for internal commands

Impact: unify request data buffer handling

rq->data is used mostly to pass kernel buffer through request queue
without using bio.  There are only a couple of places which still do
this in kernel and converting to bio isn't difficult.

This patch converts ide-cd and atapi to use bio instead of rq->data
for request sense and internal pc commands.  With previous change to
unify sense request handling, this is relatively easily achieved by
adding blk_rq_map_kern() during sense_rq prep and PC issue.

If blk_rq_map_kern() fails for sense, the error is deferred till sense
issue and aborts the failed command which triggered the sense.  Note
that this is a slim possibility as sense prep is done on each command
issue, so for the above condition to actually trigger, all preps since
the last sense issue till the issue of the request which would require
a sense should fail.

* do_request functions might sleep now.  This should be okay as ide
  request_fn - do_ide_request() - is invoked only from make_request
  and plug work.  Make sure this is the case by adding might_sleep()
  to do_ide_request().

* Functions which access the read sense data before the sense request
  is complete now should access bio_data(sense_rq->bio) as the sense
  buffer might have been copied during blk_rq_map_kern().

* ide-tape updated to map sg.

* cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC
  special case.  Simplified.

* tp_ops->output/input_data path dropped from ide_pc_intr().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 972c522..5cefe12 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+					GFP_NOIO);
+		if (error)
+			goto put_req;
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		rq->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
 	blk_put_request(rq);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	struct request_sense *sense = &drive->sense_data;
 	struct request *sense_rq = &drive->sense_rq;
 	unsigned int cmd_len, sense_len;
+	int err;
 
 	debug_log("%s: enter\n", __func__);
 
@@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	sense_rq->rq_disk = rq->rq_disk;
 
-	sense_rq->data = sense;
+	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+			      GFP_NOIO);
+	if (unlikely(err)) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: failed to map sense buffer\n",
+			       drive->name);
+		return;
+	}
+
+	sense_rq->rq_disk = rq->rq_disk;
 	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	sense_rq->cmd[4] = cmd_len;
-	sense_rq->data_len = sense_len;
-
 	sense_rq->cmd_type = REQ_TYPE_SENSE;
 	sense_rq->cmd_flags |= REQ_PREEMPT;
 
@@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(ide_prep_sense);
 
-void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
-	BUG_ON(!drive->sense_rq_armed);
+	/* deferred failure from ide_prep_sense() */
+	if (!drive->sense_rq_armed) {
+		printk(KERN_WARNING "%s: failed queue sense request\n",
+		       drive->name);
+		return -ENOMEM;
+	}
 
 	drive->sense_rq.special = special;
 	drive->sense_rq_armed = false;
@@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special)
 
 	elv_add_request(drive->queue, &drive->sense_rq,
 			ELEVATOR_INSERT_FRONT, 0);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
@@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive)
 	/* init pc from sense_rq */
 	ide_init_pc(pc);
 	memcpy(pc->c, sense_rq->cmd, 12);
-	pc->buf = sense_rq->data;
+	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
 	pc->req_xfer = sense_rq->data_len;
 
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
-	ide_queue_sense_rq(drive, pc);
+	if (ide_queue_sense_rq(drive, pc))
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	struct ide_cmd *cmd = &hwif->cmd;
 	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	xfer_func_t *xferfunc;
 	unsigned int timeout, done;
 	u16 bcount;
 	u8 stat, ireason, dsc = 0;
@@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape)
+			if (drive->media == ide_tape && !rq->bio)
 				done = ide_rq_bytes(rq); /* FIXME */
 			else
 				done = blk_rq_bytes(rq);
@@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
-
-	if (drive->media == ide_floppy && pc->buf == NULL) {
+	if (drive->media == ide_tape && pc->bh)
+		done = drive->pc_io_buffers(drive, pc, bcount, write);
+	else {
 		done = min_t(unsigned int, bcount, cmd->nleft);
 		ide_pio_bytes(drive, cmd, write, done);
-	} else if (drive->media == ide_tape && pc->bh) {
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	} else {
-		done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-		xferfunc(drive, NULL, pc->cur_pos, done);
 	}
 
 	/* Update the current position */
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 061d7bb..6736287 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
 	 * For REQ_TYPE_SENSE, "rq->special" points to the original
-	 * failed request
+	 * failed request.  Also, the sense data should be read
+	 * directly from rq which might be different from the original
+	 * sense buffer if it got copied during mapping.
 	 */
 	struct request *failed = (struct request *)rq->special;
-	struct request_sense *sense = &drive->sense_data;
+	void *sense = bio_data(rq->bio);
 
 	if (failed) {
 		if (failed->sense) {
@@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		ide_queue_sense_rq(drive, NULL);
+		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
 	return 1;
 
 end_request:
@@ -412,8 +414,7 @@ end_request:
 
 		hwif->rq = NULL;
 
-		ide_queue_sense_rq(drive, rq);
-		return 1;
+		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
 }
@@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		rq->cmd_flags |= cmd_flags;
 		rq->timeout = timeout;
 		if (buffer) {
-			rq->data = buffer;
-			rq->data_len = *bufflen;
+			error = blk_rq_map_kern(drive->queue, rq, buffer,
+						*bufflen, GFP_NOIO);
+			if (error) {
+				blk_put_request(rq);
+				return error;
+			}
 		}
 
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	drive->dma = 0;
 
 	/* sg request */
-	if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+	if (rq->bio) {
 		struct request_queue *q = drive->queue;
+		char *buf = bio_data(rq->bio);
 		unsigned int alignment;
-		char *buf;
-
-		if (rq->bio)
-			buf = bio_data(rq->bio);
-		else
-			buf = rq->data;
 
 		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9b9e8b1..3245c2d 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q)
 
 	spin_unlock_irq(q->queue_lock);
 
+	/* HLD do_request() callback might sleep, make sure it's okay */
+	might_sleep();
+
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8324dfa..9b762a2 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -850,6 +850,9 @@ out:
 
 	cmd.rq = rq;
 
+	ide_init_sg_cmd(&cmd, pc->req_xfer);
+	ide_map_sg(drive, &cmd);
+
 	return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9e67cca..1957461 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
-void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
-- 
cgit v0.10.2


From 765139ef5f1a4b1d5cb1f1a7a12de7ee61f6500f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:43 +0900
Subject: ide-pm: don't abuse rq->data

Impact: cleanup rq->data usage

ide-pm uses rq->data to carry pointer to struct request_pm_state
through request queue and rq->special is used to carray pointer to
local struct ide_cmd, which isn't necessary.  Use rq->special for
request_pm_state instead and use local ide_cmd in
ide_start_power_step().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 3245c2d..6e3094e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -368,7 +368,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->data;
+			struct request_pm_state *pm = rq->special;
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, pm->pm_step);
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0d8a151..ba1488b 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int ret;
 
 	/* call ACPI _GTM only once */
@@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 		ide_acpi_get_timing(hwif);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
@@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev)
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
-	struct ide_cmd cmd;
 	int err;
 
 	/* call ACPI _PS0 / _STM only once */
@@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev)
 	ide_acpi_exec_tfs(drive);
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&cmd, 0, sizeof(cmd));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_PM_RESUME;
 	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = &cmd;
-	rq->data = &rqpm;
+	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;
 	rqpm.pm_state = PM_EVENT_ON;
 
@@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev)
 
 void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 #ifdef DEBUG_PM
 	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 
 ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
-	struct ide_cmd *cmd = rq->special;
-
-	memset(cmd, 0, sizeof(*cmd));
+	struct request_pm_state *pm = rq->special;
+	struct ide_cmd cmd = { };
 
 	switch (pm->pm_step) {
 	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
@@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 			return ide_stopped;
 		}
 		if (ata_id_flush_ext_enabled(drive->id))
-			cmd->tf.command = ATA_CMD_FLUSH_EXT;
+			cmd.tf.command = ATA_CMD_FLUSH_EXT;
 		else
-			cmd->tf.command = ATA_CMD_FLUSH;
+			cmd.tf.command = ATA_CMD_FLUSH;
 		goto out_do_tf;
 	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		cmd->tf.command = ATA_CMD_STANDBYNOW1;
+		cmd.tf.command = ATA_CMD_STANDBYNOW1;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
 		ide_set_max_pio(drive);
@@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 			ide_complete_power_step(drive, rq);
 		return ide_stopped;
 	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		cmd->tf.command = ATA_CMD_IDLEIMMEDIATE;
+		cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
 		goto out_do_tf;
 	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
 		/*
@@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 	return ide_stopped;
 
 out_do_tf:
-	cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd->valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd->protocol = ATA_PROT_NODATA;
+	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
+	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
+	cmd.protocol = ATA_PROT_NODATA;
 
-	return do_rw_taskfile(drive, cmd);
+	return do_rw_taskfile(drive, &cmd);
 }
 
 /**
@@ -181,7 +173,7 @@ out_do_tf:
 void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 {
 	struct request_queue *q = drive->queue;
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 	unsigned long flags;
 
 	ide_complete_power_step(drive, rq);
@@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->data;
+	struct request_pm_state *pm = rq->special;
 
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == IDE_PM_START_SUSPEND)
-- 
cgit v0.10.2


From 08f370f0a2fb223bf48d0bfa2a173be0393c19dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape,floppy: fix failed command completion after request sense

Impact: fix infinite retry loop

After a command failed, ide-tape and floppy inserts REQUEST_SENSE in
front of the failed command and according to the result, sets
pc->retries, flags and errors.  After REQUEST_SENSE is complete, the
failed command is again at the front of the queue and if the verdict
was to terminate the request, the issue functions tries to complete it
directly by calling drive->pc_callback() and returning ide_stopped.

However, drive->pc_callback() doesn't complete a request.  It only
prepares for completion of the request.  As a result, this creates an
infinite loop where the failed request is retried perpetually.

Fix it by actually ending the request by calling ide_complete_rq().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index d3302cc..d20704a 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -141,6 +141,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9b762a2..2b9a136 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -643,6 +643,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 		}
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 		return ide_stopped;
 	}
 	debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
-- 
cgit v0.10.2


From eb6a61bb9543aa54d62595e27206b3d3c0293bcc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-atapi,tape,floppy: allow ->pc_callback() to change rq->data_len

Impact: allow residual count implementation in ->pc_callback()

rq->data_len has two duties - carrying the number of input bytes on
issue and carrying residual count back to the issuer on completion.
ide-atapi completion callback ->pc_callback() is the right place to do
this but currently ide-atapi depends on rq->data_len carrying the
original request size after calling ->pc_callback() to complete the pc
request.

This patch makes ide_pc_intr(), ide_tape_issue_pc() and
ide_floppy_issue_pc() cache length to complete before calling
->pc_callback() so that it can modify rq->data_len as necessary.

Note: As using rq->data_len for two purposes can make cases like this
      incorrect in subtle ways, future changes will introduce separate
      field for residual count.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5cefe12..3df5442 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -409,6 +409,16 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
 			dsc = 1;
 
+		/*
+		 * ->pc_callback() might change rq->data_len for
+		 * residual count, cache total length.
+		 */
+		if (!blk_special_request(rq) &&
+		    (drive->media == ide_tape && !rq->bio))
+			done = ide_rq_bytes(rq);        /* FIXME */
+		else
+			done = blk_rq_bytes(rq);
+
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
 
@@ -417,7 +427,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (blk_special_request(rq)) {
 			rq->errors = 0;
-			done = blk_rq_bytes(rq);
 			error = 0;
 		} else {
 
@@ -426,11 +435,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape && !rq->bio)
-				done = ide_rq_bytes(rq); /* FIXME */
-			else
-				done = blk_rq_bytes(rq);
-
 			error = uptodate ? 0 : -EIO;
 		}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index d20704a..537b7c5 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -134,14 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 	drive->pc = pc;
 
 	if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
 			ide_floppy_report_error(floppy, pc);
+
 		/* Giving up */
 		pc->error = IDE_DRV_ERROR_GENERAL;
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 2b9a136..8226d52 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -622,6 +622,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
 	if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
 		(pc->flags & PC_FLAG_ABORT)) {
+		unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
 		/*
 		 * We will "abort" retrying a packet command in case legitimate
 		 * error code was received (crossing a filemark, or end of the
@@ -641,9 +643,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 			/* Giving up */
 			pc->error = IDE_DRV_ERROR_GENERAL;
 		}
+
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
+		ide_complete_rq(drive, -EIO, done);
 		return ide_stopped;
 	}
 	debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
-- 
cgit v0.10.2


From 7b13354eeaabaf6283b8c669a7d67d104ce7c638 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: use single continuous buffer

Impact: simpler buffer allocation and handling, kills OOM, fix DMA transfers

ide-tape has its own multiple buffer mechanism using struct
idetape_bh.  It allocates buffer with decreasing order-of-two
allocations so that it results in minimum number of segments.
However, the implementation is quite complex and works in a way that
no other block or ide driver works necessitating a lot of special case
handling.

The benefit this complex allocation scheme brings is questionable as
PIO or DMA the number of segments (16 maximum) doesn't make any
noticeable difference and it also doesn't negate the need for multiple
order allocation which can fail under memory pressure or high
fragmentation although it does lower the highest order necessary by
one when the buffer size isn't power of two.

As the first step to remove the custom buffer management, this patch
makes ide-tape allocate single continous buffer.  The maximum order is
four.  I doubt the change would cause any trouble but if it ever
matters, it should be converted to regular sg mechanism like everyone
else and even in that case dropping custom buffer handling and moving
to standard mechanism first make sense as an intermediate step.

This patch makes the first bh to contain the whole buffer and drops
multi bh handling code.  Following patches will make further changes.

This patch has the side effect of killing OOM triggered by allocation
path and fixing DMA transfers.  Previously, bug in alloc path
triggered OOM on command issue and commands were passed to DMA engine
without DMA-mapping all the segments.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8226d52..b2afbc7 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -134,7 +134,6 @@ enum {
 struct idetape_bh {
 	u32 b_size;
 	atomic_t b_count;
-	struct idetape_bh *b_reqnext;
 	char *b_data;
 };
 
@@ -228,10 +227,6 @@ typedef struct ide_tape_obj {
 	char *b_data;
 	int b_count;
 
-	int pages_per_buffer;
-	/* Wasted space in each stage */
-	int excess_bh_size;
-
 	/* Measures average tape speed */
 	unsigned long avg_time;
 	int avg_size;
@@ -303,9 +298,7 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	struct idetape_bh *bh = pc->bh;
 	int count;
 
-	while (bcount) {
-		if (bh == NULL)
-			break;
+	if (bcount && bh) {
 		count = min(
 			(unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
 			bcount);
@@ -313,15 +306,10 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 					atomic_read(&bh->b_count), count);
 		bcount -= count;
 		atomic_add(count, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
+		if (atomic_read(&bh->b_count) == bh->b_size)
+			pc->bh = NULL;
 	}
 
-	pc->bh = bh;
-
 	return bcount;
 }
 
@@ -331,22 +319,14 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	struct idetape_bh *bh = pc->bh;
 	int count;
 
-	while (bcount) {
-		if (bh == NULL)
-			break;
+	if (bcount && bh) {
 		count = min((unsigned int)pc->b_count, (unsigned int)bcount);
 		drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
 		bcount -= count;
 		pc->b_data += count;
 		pc->b_count -= count;
-		if (!pc->b_count) {
-			bh = bh->b_reqnext;
-			pc->bh = bh;
-			if (bh) {
-				pc->b_data = bh->b_data;
-				pc->b_count = atomic_read(&bh->b_count);
-			}
-		}
+		if (!pc->b_count)
+			pc->bh = NULL;
 	}
 
 	return bcount;
@@ -355,24 +335,20 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
 static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
 {
 	struct idetape_bh *bh = pc->bh;
-	int count;
 	unsigned int bcount = pc->xferred;
 
 	if (pc->flags & PC_FLAG_WRITING)
 		return;
-	while (bcount) {
-		if (bh == NULL) {
+	if (bcount) {
+		if (bh == NULL || bcount > bh->b_size) {
 			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
 					__func__);
 			return;
 		}
-		count = min((unsigned int)bh->b_size, (unsigned int)bcount);
-		atomic_set(&bh->b_count, count);
+		atomic_set(&bh->b_count, bcount);
 		if (atomic_read(&bh->b_count) == bh->b_size)
-			bh = bh->b_reqnext;
-		bcount -= count;
+			pc->bh = NULL;
 	}
-	pc->bh = bh;
 }
 
 /*
@@ -439,24 +415,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 /* Free data buffers completely. */
 static void ide_tape_kfree_buffer(idetape_tape_t *tape)
 {
-	struct idetape_bh *prev_bh, *bh = tape->merge_bh;
-
-	while (bh) {
-		u32 size = bh->b_size;
-
-		while (size) {
-			unsigned int order = fls(size >> PAGE_SHIFT)-1;
-
-			if (bh->b_data)
-				free_pages((unsigned long)bh->b_data, order);
+	struct idetape_bh *bh = tape->merge_bh;
 
-			size &= (order-1);
-			bh->b_data += (1 << order) * PAGE_SIZE;
-		}
-		prev_bh = bh;
-		bh = bh->b_reqnext;
-		kfree(prev_bh);
-	}
+	kfree(bh->b_data);
+	kfree(bh);
 }
 
 static void ide_tape_handle_dsc(ide_drive_t *);
@@ -861,117 +823,50 @@ out:
 }
 
 /*
- * The function below uses __get_free_pages to allocate a data buffer of size
- * tape->buffer_size (or a bit more). We attempt to combine sequential pages as
- * much as possible.
- *
- * It returns a pointer to the newly allocated buffer, or NULL in case of
- * failure.
+ * It returns a pointer to the newly allocated buffer, or NULL in case
+ * of failure.
  */
 static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-						  int full, int clear)
-{
-	struct idetape_bh *prev_bh, *bh, *merge_bh;
-	int pages = tape->pages_per_buffer;
-	unsigned int order, b_allocd;
-	char *b_data = NULL;
-
-	merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-	bh = merge_bh;
-	if (bh == NULL)
-		goto abort;
-
-	order = fls(pages) - 1;
-	bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-	if (!bh->b_data)
-		goto abort;
-	b_allocd = (1 << order) * PAGE_SIZE;
-	pages &= (order-1);
-
-	if (clear)
-		memset(bh->b_data, 0, b_allocd);
-	bh->b_reqnext = NULL;
-	bh->b_size = b_allocd;
-	atomic_set(&bh->b_count, full ? bh->b_size : 0);
+						  int full)
+{
+	struct idetape_bh *bh;
 
-	while (pages) {
-		order = fls(pages) - 1;
-		b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-		if (!b_data)
-			goto abort;
-		b_allocd = (1 << order) * PAGE_SIZE;
-
-		if (clear)
-			memset(b_data, 0, b_allocd);
-
-		/* newly allocated page frames below buffer header or ...*/
-		if (bh->b_data == b_data + b_allocd) {
-			bh->b_size += b_allocd;
-			bh->b_data -= b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		/* they are above the header */
-		if (b_data == bh->b_data + bh->b_size) {
-			bh->b_size += b_allocd;
-			if (full)
-				atomic_add(b_allocd, &bh->b_count);
-			continue;
-		}
-		prev_bh = bh;
-		bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-		if (!bh) {
-			free_pages((unsigned long) b_data, order);
-			goto abort;
-		}
-		bh->b_reqnext = NULL;
-		bh->b_data = b_data;
-		bh->b_size = b_allocd;
-		atomic_set(&bh->b_count, full ? bh->b_size : 0);
-		prev_bh->b_reqnext = bh;
+	bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
+	if (!bh)
+		return NULL;
 
-		pages &= (order-1);
+	bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (!bh->b_data) {
+		kfree(bh);
+		return NULL;
 	}
 
-	bh->b_size -= tape->excess_bh_size;
-	if (full)
-		atomic_sub(tape->excess_bh_size, &bh->b_count);
-	return merge_bh;
-abort:
-	ide_tape_kfree_buffer(tape);
-	return NULL;
+	bh->b_size = tape->buffer_size;
+	atomic_set(&bh->b_count, full ? bh->b_size : 0);
+
+	return bh;
 }
 
 static int idetape_copy_stage_from_user(idetape_tape_t *tape,
 					const char __user *buf, int n)
 {
 	struct idetape_bh *bh = tape->bh;
-	int count;
 	int ret = 0;
 
-	while (n) {
-		if (bh == NULL) {
+	if (n) {
+		if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) {
 			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
 					__func__);
 			return 1;
 		}
-		count = min((unsigned int)
-				(bh->b_size - atomic_read(&bh->b_count)),
-				(unsigned int)n);
 		if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-				count))
+				   n))
 			ret = 1;
-		n -= count;
-		atomic_add(count, &bh->b_count);
-		buf += count;
-		if (atomic_read(&bh->b_count) == bh->b_size) {
-			bh = bh->b_reqnext;
-			if (bh)
-				atomic_set(&bh->b_count, 0);
-		}
+		atomic_add(n, &bh->b_count);
+		if (atomic_read(&bh->b_count) == bh->b_size)
+			tape->bh = NULL;
 	}
-	tape->bh = bh;
+
 	return ret;
 }
 
@@ -979,30 +874,20 @@ static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
 				      int n)
 {
 	struct idetape_bh *bh = tape->bh;
-	int count;
 	int ret = 0;
 
-	while (n) {
-		if (bh == NULL) {
+	if (n) {
+		if (bh == NULL || n > tape->b_count) {
 			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
 					__func__);
 			return 1;
 		}
-		count = min(tape->b_count, n);
-		if  (copy_to_user(buf, tape->b_data, count))
+		if (copy_to_user(buf, tape->b_data, n))
 			ret = 1;
-		n -= count;
-		tape->b_data += count;
-		tape->b_count -= count;
-		buf += count;
-		if (!tape->b_count) {
-			bh = bh->b_reqnext;
-			tape->bh = bh;
-			if (bh) {
-				tape->b_data = bh->b_data;
-				tape->b_count = atomic_read(&bh->b_count);
-			}
-		}
+		tape->b_data += n;
+		tape->b_count -= n;
+		if (!tape->b_count)
+			tape->bh = NULL;
 	}
 	return ret;
 }
@@ -1254,7 +1139,7 @@ static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int blocks, min;
+	int blocks;
 	struct idetape_bh *bh;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
@@ -1269,31 +1154,16 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 	if (tape->merge_bh_size) {
 		blocks = tape->merge_bh_size / tape->blk_size;
 		if (tape->merge_bh_size % tape->blk_size) {
-			unsigned int i;
-
+			unsigned int i = tape->blk_size -
+				tape->merge_bh_size % tape->blk_size;
 			blocks++;
-			i = tape->blk_size - tape->merge_bh_size %
-				tape->blk_size;
-			bh = tape->bh->b_reqnext;
-			while (bh) {
-				atomic_set(&bh->b_count, 0);
-				bh = bh->b_reqnext;
-			}
 			bh = tape->bh;
-			while (i) {
-				if (bh == NULL) {
-					printk(KERN_INFO "ide-tape: bug,"
-							 " bh NULL\n");
-					break;
-				}
-				min = min(i, (unsigned int)(bh->b_size -
-						atomic_read(&bh->b_count)));
+			if (bh) {
 				memset(bh->b_data + atomic_read(&bh->b_count),
-						0, min);
-				atomic_add(min, &bh->b_count);
-				i -= min;
-				bh = bh->b_reqnext;
-			}
+				       0, i);
+				atomic_add(i, &bh->b_count);
+			} else
+				printk(KERN_INFO "ide-tape: bug, bh NULL\n");
 		}
 		(void) idetape_add_chrdev_write_request(drive, blocks);
 		tape->merge_bh_size = 0;
@@ -1321,7 +1191,7 @@ static int idetape_init_read(ide_drive_t *drive)
 					 " 0 now\n");
 			tape->merge_bh_size = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
+		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
 		if (!tape->merge_bh)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_READ;
@@ -1368,23 +1238,18 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	struct idetape_bh *bh;
+	struct idetape_bh *bh = tape->merge_bh;
 	int blocks;
 
 	while (bcount) {
 		unsigned int count;
 
-		bh = tape->merge_bh;
 		count = min(tape->buffer_size, bcount);
 		bcount -= count;
 		blocks = count / tape->blk_size;
-		while (count) {
-			atomic_set(&bh->b_count,
-				   min(count, (unsigned int)bh->b_size));
-			memset(bh->b_data, 0, atomic_read(&bh->b_count));
-			count -= atomic_read(&bh->b_count);
-			bh = bh->b_reqnext;
-		}
+		atomic_set(&bh->b_count, count);
+		memset(bh->b_data, 0, atomic_read(&bh->b_count));
+
 		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
 				      tape->merge_bh);
 	}
@@ -1596,7 +1461,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 				"should be 0 now\n");
 			tape->merge_bh_size = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
+		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
 		if (!tape->merge_bh)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_WRITE;
@@ -1970,7 +1835,7 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
 	idetape_tape_t *tape = drive->driver_data;
 
 	ide_tape_flush_merge_buffer(drive);
-	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0);
+	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1);
 	if (tape->merge_bh != NULL) {
 		idetape_pad_zeros(drive, tape->blk_size *
 				(tape->user_bs_factor - 1));
@@ -2201,11 +2066,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
 		tape->buffer_size = *ctl * tape->blk_size;
 	}
 	buffer_size = tape->buffer_size;
-	tape->pages_per_buffer = buffer_size / PAGE_SIZE;
-	if (buffer_size % PAGE_SIZE) {
-		tape->pages_per_buffer++;
-		tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE;
-	}
 
 	/* select the "best" DSC read/write polling freq */
 	speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
-- 
cgit v0.10.2


From e998f30b45efb99a3c3ce7b5483f76317a17abed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: use standard data transfer mechanism

Impact: use standard way to transfer data

ide-tape uses rq in an interesting way.  For r/w requests, rq->special
is used to carry a private buffer management structure idetape_bh and
rq->nr_sectors and current_nr_sectors are initialized to the number of
idetape blocks which isn't necessary 512 bytes.  Also,
rq->current_nr_sectors is used to report back the residual count in
units of idetape blocks.

This peculiarity taxes both block layer and ide.  ide-atapi has
different paths and hooks to accomodate it and what a rq means becomes
quite confusing and making changes at the block layer becomes quite
difficult and error-prone.

This patch makes ide-tape use bio instead.  With the previous patch,
ide-tape currently is using single contiguos buffer so replacing it
isn't difficult.  Data buffer is mapped into bio using
blk_rq_map_kern() in idetape_queue_rw_tail().  idetape_io_buffers()
and idetape_update_buffers() are dropped and pc->bh is set to null to
tell ide-atapi to use standard data transfer mechanism and idetape_bh
byte counts are updated by the issuer on completion using the residual
count.

This change also nicely removes the FIXME in ide_pc_intr() where
ide-tape rqs need to be completed using ide_rq_bytes() instead of
blk_rq_bytes() (although this didn't really matter as the request
didn't have bio).

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 3df5442..b9dd450 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -413,11 +413,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		 * ->pc_callback() might change rq->data_len for
 		 * residual count, cache total length.
 		 */
-		if (!blk_special_request(rq) &&
-		    (drive->media == ide_tape && !rq->bio))
-			done = ide_rq_bytes(rq);        /* FIXME */
-		else
-			done = blk_rq_bytes(rq);
+		done = blk_rq_bytes(rq);
 
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index b2afbc7..b373ac8 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -292,65 +292,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
 	return tape;
 }
 
-static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				  unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	if (bcount && bh) {
-		count = min(
-			(unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
-			bcount);
-		drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data +
-					atomic_read(&bh->b_count), count);
-		bcount -= count;
-		atomic_add(count, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			pc->bh = NULL;
-	}
-
-	return bcount;
-}
-
-static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				   unsigned int bcount)
-{
-	struct idetape_bh *bh = pc->bh;
-	int count;
-
-	if (bcount && bh) {
-		count = min((unsigned int)pc->b_count, (unsigned int)bcount);
-		drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
-		bcount -= count;
-		pc->b_data += count;
-		pc->b_count -= count;
-		if (!pc->b_count)
-			pc->bh = NULL;
-	}
-
-	return bcount;
-}
-
-static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	struct idetape_bh *bh = pc->bh;
-	unsigned int bcount = pc->xferred;
-
-	if (pc->flags & PC_FLAG_WRITING)
-		return;
-	if (bcount) {
-		if (bh == NULL || bcount > bh->b_size) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return;
-		}
-		atomic_set(&bh->b_count, bcount);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			pc->bh = NULL;
-	}
-}
-
 /*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
@@ -368,12 +309,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 		 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
 
 	/* Correct pc->xferred by asking the tape.	 */
-	if (pc->flags & PC_FLAG_DMA_ERROR) {
+	if (pc->flags & PC_FLAG_DMA_ERROR)
 		pc->xferred = pc->req_xfer -
 			tape->blk_size *
 			get_unaligned_be32(&sense[3]);
-		idetape_update_buffers(drive, pc);
-	}
 
 	/*
 	 * If error was the result of a zero-length read or write command,
@@ -458,7 +397,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->current_nr_sectors -= blocks;
+		rq->data_len -= blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
@@ -520,19 +459,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive)
 	idetape_postpone_request(drive);
 }
 
-static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				unsigned int bcount, int write)
-{
-	unsigned int bleft;
-
-	if (write)
-		bleft = idetape_output_buffers(drive, pc, bcount);
-	else
-		bleft = idetape_input_buffers(drive, pc, bcount);
-
-	return bcount - bleft;
-}
-
 /*
  * Packet Command Interface
  *
@@ -683,7 +609,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = bh;
+	pc->bh = NULL;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
@@ -1063,10 +989,12 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 				 struct idetape_bh *bh)
 {
 	idetape_tape_t *tape = drive->driver_data;
+	size_t size = blocks * tape->blk_size;
 	struct request *rq;
-	int ret, errors;
+	int ret;
 
 	debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
+	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
@@ -1074,21 +1002,29 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 	rq->rq_disk = tape->disk;
 	rq->special = (void *)bh;
 	rq->sector = tape->first_frame;
-	rq->nr_sectors = blocks;
-	rq->current_nr_sectors = blocks;
+
+	if (size) {
+		ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size,
+				      __GFP_WAIT);
+		if (ret)
+			goto out_put;
+	}
+
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-	errors = rq->errors;
-	ret = tape->blk_size * (blocks - rq->current_nr_sectors);
-	blk_put_request(rq);
+	/* calculate the number of transferred bytes and update bh */
+	size -= rq->data_len;
+	if (cmd == REQ_IDETAPE_READ)
+		atomic_add(size, &bh->b_count);
 
-	if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
-		return 0;
+	ret = size;
+	if (rq->errors == IDE_DRV_ERROR_GENERAL)
+		ret = -EIO;
 
 	if (tape->merge_bh)
 		idetape_init_merge_buffer(tape);
-	if (errors == IDE_DRV_ERROR_GENERAL)
-		return -EIO;
+out_put:
+	blk_put_request(rq);
 	return ret;
 }
 
@@ -2034,8 +1970,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
 	u16 *ctl = (u16 *)&tape->caps[12];
 
 	drive->pc_callback	 = ide_tape_callback;
-	drive->pc_update_buffers = idetape_update_buffers;
-	drive->pc_io_buffers	 = ide_tape_io_buffers;
 
 	drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
 
-- 
cgit v0.10.2


From 6cf3d545f7d71b183e5b89960d4cc850a42c410d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: kill idetape_bh

Impact: kill now unnecessary idetape_bh

With everything using standard mechanisms, there is no need for
idetape_bh anymore.  Kill it and use tape->buf, cur and valid to
describe data buffer instead.

Changes worth mentioning are...

* idetape_queue_rq_tail() now always queue tape->buf and and adjusts
  buffer state properly before completion.

* idetape_pad_zeros() clears the buffer only once.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index b373ac8..d2e9c09 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -131,12 +131,6 @@ enum {
 	IDETAPE_DIR_WRITE = (1 << 2),
 };
 
-struct idetape_bh {
-	u32 b_size;
-	atomic_t b_count;
-	char *b_data;
-};
-
 /* Tape door status */
 #define DOOR_UNLOCKED			0
 #define DOOR_LOCKED			1
@@ -218,14 +212,12 @@ typedef struct ide_tape_obj {
 
 	/* Data buffer size chosen based on the tape's recommendation */
 	int buffer_size;
-	/* merge buffer */
-	struct idetape_bh *merge_bh;
-	/* size of the merge buffer */
-	int merge_bh_size;
-	/* pointer to current buffer head within the merge buffer */
-	struct idetape_bh *bh;
-	char *b_data;
-	int b_count;
+	/* Staging buffer of buffer_size bytes */
+	void *buf;
+	/* The read/write cursor */
+	void *cur;
+	/* The number of valid bytes in buf */
+	size_t valid;
 
 	/* Measures average tape speed */
 	unsigned long avg_time;
@@ -351,15 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 	}
 }
 
-/* Free data buffers completely. */
-static void ide_tape_kfree_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *bh = tape->merge_bh;
-
-	kfree(bh->b_data);
-	kfree(bh);
-}
-
 static void ide_tape_handle_dsc(ide_drive_t *);
 
 static int ide_tape_callback(ide_drive_t *drive, int dsc)
@@ -603,8 +586,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 				   struct ide_atapi_pc *pc, struct request *rq,
 				   u8 opcode)
 {
-	struct idetape_bh *bh = (struct idetape_bh *)rq->special;
-	unsigned int length = rq->current_nr_sectors;
+	unsigned int length = rq->nr_sectors;
 
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
@@ -616,14 +598,11 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	if (pc->req_xfer == tape->buffer_size)
 		pc->flags |= PC_FLAG_DMA_OK;
 
-	if (opcode == READ_6) {
+	if (opcode == READ_6)
 		pc->c[0] = READ_6;
-		atomic_set(&bh->b_count, 0);
-	} else if (opcode == WRITE_6) {
+	else if (opcode == WRITE_6) {
 		pc->c[0] = WRITE_6;
 		pc->flags |= PC_FLAG_WRITING;
-		pc->b_data = bh->b_data;
-		pc->b_count = atomic_read(&bh->b_count);
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
@@ -639,10 +618,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	struct ide_cmd cmd;
 	u8 stat;
 
-	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu,"
-			" current_nr_sectors: %u\n",
-			(unsigned long long)rq->sector, rq->nr_sectors,
-			rq->current_nr_sectors);
+	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n"
+		  (unsigned long long)rq->sector, rq->nr_sectors);
 
 	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
@@ -749,89 +726,6 @@ out:
 }
 
 /*
- * It returns a pointer to the newly allocated buffer, or NULL in case
- * of failure.
- */
-static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-						  int full)
-{
-	struct idetape_bh *bh;
-
-	bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-	if (!bh)
-		return NULL;
-
-	bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL);
-	if (!bh->b_data) {
-		kfree(bh);
-		return NULL;
-	}
-
-	bh->b_size = tape->buffer_size;
-	atomic_set(&bh->b_count, full ? bh->b_size : 0);
-
-	return bh;
-}
-
-static int idetape_copy_stage_from_user(idetape_tape_t *tape,
-					const char __user *buf, int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int ret = 0;
-
-	if (n) {
-		if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-				   n))
-			ret = 1;
-		atomic_add(n, &bh->b_count);
-		if (atomic_read(&bh->b_count) == bh->b_size)
-			tape->bh = NULL;
-	}
-
-	return ret;
-}
-
-static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
-				      int n)
-{
-	struct idetape_bh *bh = tape->bh;
-	int ret = 0;
-
-	if (n) {
-		if (bh == NULL || n > tape->b_count) {
-			printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-					__func__);
-			return 1;
-		}
-		if (copy_to_user(buf, tape->b_data, n))
-			ret = 1;
-		tape->b_data += n;
-		tape->b_count -= n;
-		if (!tape->b_count)
-			tape->bh = NULL;
-	}
-	return ret;
-}
-
-static void idetape_init_merge_buffer(idetape_tape_t *tape)
-{
-	struct idetape_bh *bh = tape->merge_bh;
-	tape->bh = tape->merge_bh;
-
-	if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
-		atomic_set(&bh->b_count, 0);
-	else {
-		tape->b_data = bh->b_data;
-		tape->b_count = atomic_read(&bh->b_count);
-	}
-}
-
-/*
  * Write a filemark if write_filemark=1. Flush the device buffers without
  * writing a filemark otherwise.
  */
@@ -928,10 +822,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
 		return;
 
 	clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
-	tape->merge_bh_size = 0;
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+	tape->valid = 0;
+	if (tape->buf != NULL) {
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
@@ -985,8 +879,7 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
-				 struct idetape_bh *bh)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	size_t size = blocks * tape->blk_size;
@@ -1000,11 +893,10 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
-	rq->special = (void *)bh;
 	rq->sector = tape->first_frame;
 
 	if (size) {
-		ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size,
+		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
 				      __GFP_WAIT);
 		if (ret)
 			goto out_put;
@@ -1012,17 +904,17 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
 
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-	/* calculate the number of transferred bytes and update bh */
+	/* calculate the number of transferred bytes and update buffer state */
 	size -= rq->data_len;
+	tape->cur = tape->buf;
 	if (cmd == REQ_IDETAPE_READ)
-		atomic_add(size, &bh->b_count);
+		tape->valid = size;
+	else
+		tape->valid = 0;
 
 	ret = size;
 	if (rq->errors == IDE_DRV_ERROR_GENERAL)
 		ret = -EIO;
-
-	if (tape->merge_bh)
-		idetape_init_merge_buffer(tape);
 out_put:
 	blk_put_request(rq);
 	return ret;
@@ -1064,49 +956,33 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
 /* Queue up a character device originated write request. */
 static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
 {
-	idetape_tape_t *tape = drive->driver_data;
-
 	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-				     blocks, tape->merge_bh);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks);
 }
 
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	int blocks;
-	struct idetape_bh *bh;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
 				" but we are not writing.\n");
 		return;
 	}
-	if (tape->merge_bh_size > tape->buffer_size) {
-		printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n");
-		tape->merge_bh_size = tape->buffer_size;
-	}
-	if (tape->merge_bh_size) {
-		blocks = tape->merge_bh_size / tape->blk_size;
-		if (tape->merge_bh_size % tape->blk_size) {
-			unsigned int i = tape->blk_size -
-				tape->merge_bh_size % tape->blk_size;
+	if (tape->buf) {
+		blocks = tape->valid / tape->blk_size;
+		if (tape->valid % tape->blk_size) {
 			blocks++;
-			bh = tape->bh;
-			if (bh) {
-				memset(bh->b_data + atomic_read(&bh->b_count),
-				       0, i);
-				atomic_add(i, &bh->b_count);
-			} else
-				printk(KERN_INFO "ide-tape: bug, bh NULL\n");
+			memset(tape->buf + tape->valid, 0,
+			       tape->blk_size - tape->valid % tape->blk_size);
 		}
 		(void) idetape_add_chrdev_write_request(drive, blocks);
-		tape->merge_bh_size = 0;
 	}
-	if (tape->merge_bh != NULL) {
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+	if (tape->buf != NULL) {
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
@@ -1122,15 +998,15 @@ static int idetape_init_read(ide_drive_t *drive)
 			ide_tape_flush_merge_buffer(drive);
 			idetape_flush_tape_buffers(drive);
 		}
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size should be"
-					 " 0 now\n");
-			tape->merge_bh_size = 0;
+		if (tape->buf || tape->valid) {
+			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+			tape->valid = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
-		if (!tape->merge_bh)
+		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+		if (!tape->buf)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_READ;
+		tape->cur = tape->buf;
 
 		/*
 		 * Issue a read 0 command to ensure that DSC handshake is
@@ -1140,11 +1016,10 @@ static int idetape_init_read(ide_drive_t *drive)
 		 */
 		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
 			bytes_read = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_READ, 0,
-							tape->merge_bh);
+							REQ_IDETAPE_READ, 0);
 			if (bytes_read < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
+				kfree(tape->buf);
+				tape->buf = NULL;
 				tape->chrdev_dir = IDETAPE_DIR_NONE;
 				return bytes_read;
 			}
@@ -1157,8 +1032,6 @@ static int idetape_init_read(ide_drive_t *drive)
 /* called from idetape_chrdev_read() to service a chrdev read request. */
 static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 {
-	idetape_tape_t *tape = drive->driver_data;
-
 	debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
 
 	/* If we are at a filemark, return a read length of 0 */
@@ -1167,27 +1040,21 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 
 	idetape_init_read(drive);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks,
-				     tape->merge_bh);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks);
 }
 
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	struct idetape_bh *bh = tape->merge_bh;
-	int blocks;
+
+	memset(tape->buf, 0, tape->buffer_size);
 
 	while (bcount) {
-		unsigned int count;
+		unsigned int count = min(tape->buffer_size, bcount);
 
-		count = min(tape->buffer_size, bcount);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+				      count / tape->blk_size);
 		bcount -= count;
-		blocks = count / tape->blk_size;
-		atomic_set(&bh->b_count, count);
-		memset(bh->b_data, 0, atomic_read(&bh->b_count));
-
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
-				      tape->merge_bh);
 	}
 }
 
@@ -1267,7 +1134,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
 	}
 
 	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-		tape->merge_bh_size = 0;
+		tape->valid = 0;
 		if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 			++count;
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1333,20 +1200,20 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		return rc;
 	if (count == 0)
 		return (0);
-	if (tape->merge_bh_size) {
-		actually_read = min((unsigned int)(tape->merge_bh_size),
-				    (unsigned int)count);
-		if (idetape_copy_stage_to_user(tape, buf, actually_read))
+	if (tape->valid) {
+		actually_read = min_t(unsigned int, tape->valid, count);
+		if (copy_to_user(buf, tape->cur, actually_read))
 			ret = -EFAULT;
 		buf += actually_read;
-		tape->merge_bh_size -= actually_read;
 		count -= actually_read;
+		tape->cur += actually_read;
+		tape->valid -= actually_read;
 	}
 	while (count >= tape->buffer_size) {
 		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
 		if (bytes_read <= 0)
 			goto finish;
-		if (idetape_copy_stage_to_user(tape, buf, bytes_read))
+		if (copy_to_user(buf, tape->cur, bytes_read))
 			ret = -EFAULT;
 		buf += bytes_read;
 		count -= bytes_read;
@@ -1357,10 +1224,11 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		if (bytes_read <= 0)
 			goto finish;
 		temp = min((unsigned long)count, (unsigned long)bytes_read);
-		if (idetape_copy_stage_to_user(tape, buf, temp))
+		if (copy_to_user(buf, tape->cur, temp))
 			ret = -EFAULT;
 		actually_read += temp;
-		tape->merge_bh_size = bytes_read-temp;
+		tape->cur += temp;
+		tape->valid -= temp;
 	}
 finish:
 	if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
@@ -1392,16 +1260,15 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		if (tape->chrdev_dir == IDETAPE_DIR_READ)
 			ide_tape_discard_merge_buffer(drive, 1);
-		if (tape->merge_bh || tape->merge_bh_size) {
-			printk(KERN_ERR "ide-tape: merge_bh_size "
-				"should be 0 now\n");
-			tape->merge_bh_size = 0;
+		if (tape->buf || tape->valid) {
+			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+			tape->valid = 0;
 		}
-		tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0);
-		if (!tape->merge_bh)
+		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+		if (!tape->buf)
 			return -ENOMEM;
 		tape->chrdev_dir = IDETAPE_DIR_WRITE;
-		idetape_init_merge_buffer(tape);
+		tape->cur = tape->buf;
 
 		/*
 		 * Issue a write 0 command to ensure that DSC handshake is
@@ -1411,11 +1278,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 		 */
 		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
 			ssize_t retval = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_WRITE, 0,
-							tape->merge_bh);
+							REQ_IDETAPE_WRITE, 0);
 			if (retval < 0) {
-				ide_tape_kfree_buffer(tape);
-				tape->merge_bh = NULL;
+				kfree(tape->buf);
+				tape->buf = NULL;
 				tape->chrdev_dir = IDETAPE_DIR_NONE;
 				return retval;
 			}
@@ -1423,23 +1289,19 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	}
 	if (count == 0)
 		return (0);
-	if (tape->merge_bh_size) {
-		if (tape->merge_bh_size >= tape->buffer_size) {
-			printk(KERN_ERR "ide-tape: bug: merge buf too big\n");
-			tape->merge_bh_size = 0;
-		}
-		actually_written = min((unsigned int)
-				(tape->buffer_size - tape->merge_bh_size),
-				(unsigned int)count);
-		if (idetape_copy_stage_from_user(tape, buf, actually_written))
-				ret = -EFAULT;
+	if (tape->valid < tape->buffer_size) {
+		actually_written = min_t(unsigned int,
+					 tape->buffer_size - tape->valid,
+					 count);
+		if (copy_from_user(tape->cur, buf, actually_written))
+			ret = -EFAULT;
 		buf += actually_written;
-		tape->merge_bh_size += actually_written;
 		count -= actually_written;
+		tape->cur += actually_written;
+		tape->valid += actually_written;
 
-		if (tape->merge_bh_size == tape->buffer_size) {
+		if (tape->valid == tape->buffer_size) {
 			ssize_t retval;
-			tape->merge_bh_size = 0;
 			retval = idetape_add_chrdev_write_request(drive, ctl);
 			if (retval <= 0)
 				return (retval);
@@ -1447,7 +1309,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	}
 	while (count >= tape->buffer_size) {
 		ssize_t retval;
-		if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size))
+		if (copy_from_user(tape->cur, buf, tape->buffer_size))
 			ret = -EFAULT;
 		buf += tape->buffer_size;
 		count -= tape->buffer_size;
@@ -1458,9 +1320,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	}
 	if (count) {
 		actually_written += count;
-		if (idetape_copy_stage_from_user(tape, buf, count))
+		if (copy_from_user(tape->cur, buf, count))
 			ret = -EFAULT;
-		tape->merge_bh_size += count;
+		tape->cur += count;
+		tape->valid += count;
 	}
 	return ret ? ret : actually_written;
 }
@@ -1623,7 +1486,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file,
 		idetape_flush_tape_buffers(drive);
 	}
 	if (cmd == MTIOCGET || cmd == MTIOCPOS) {
-		block_offset = tape->merge_bh_size /
+		block_offset = tape->valid /
 			(tape->blk_size * tape->user_bs_factor);
 		position = idetape_read_position(drive);
 		if (position < 0)
@@ -1771,12 +1634,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
 	idetape_tape_t *tape = drive->driver_data;
 
 	ide_tape_flush_merge_buffer(drive);
-	tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1);
-	if (tape->merge_bh != NULL) {
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (tape->buf != NULL) {
 		idetape_pad_zeros(drive, tape->blk_size *
 				(tape->user_bs_factor - 1));
-		ide_tape_kfree_buffer(tape);
-		tape->merge_bh = NULL;
+		kfree(tape->buf);
+		tape->buf = NULL;
 	}
 	idetape_write_filemark(drive);
 	idetape_flush_tape_buffers(drive);
@@ -2042,7 +1905,7 @@ static void ide_tape_release(struct device *dev)
 	ide_drive_t *drive = tape->drive;
 	struct gendisk *g = tape->disk;
 
-	BUG_ON(tape->merge_bh_size);
+	BUG_ON(tape->valid);
 
 	drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
 	drive->driver_data = NULL;
-- 
cgit v0.10.2


From 3596b66452491a3cff26256a5e6e6061a66c4142 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:02 +0900
Subject: ide-tape: unify r/w init paths

Impact: cleanup

Read and write init paths are almost identical.  Unify them into
idetape_init_rw().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d2e9c09..4da7fa9 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -987,42 +987,50 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 	tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
 
-static int idetape_init_read(ide_drive_t *drive)
+static int idetape_init_rw(ide_drive_t *drive, int dir)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int bytes_read;
+	int rc;
 
-	/* Initialize read operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-			ide_tape_flush_merge_buffer(drive);
-			idetape_flush_tape_buffers(drive);
-		}
-		if (tape->buf || tape->valid) {
-			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
-			tape->valid = 0;
-		}
-		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
-		if (!tape->buf)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_READ;
-		tape->cur = tape->buf;
+	BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
 
-		/*
-		 * Issue a read 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode.
-		 * No point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			bytes_read = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_READ, 0);
-			if (bytes_read < 0) {
-				kfree(tape->buf);
-				tape->buf = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return bytes_read;
-			}
+	if (tape->chrdev_dir == dir)
+		return 0;
+
+	if (tape->chrdev_dir == IDETAPE_DIR_READ)
+		ide_tape_discard_merge_buffer(drive, 1);
+	else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
+		ide_tape_flush_merge_buffer(drive);
+		idetape_flush_tape_buffers(drive);
+	}
+
+	if (tape->buf || tape->valid) {
+		printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+		tape->valid = 0;
+	}
+
+	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+	if (!tape->buf)
+		return -ENOMEM;
+	tape->chrdev_dir = dir;
+	tape->cur = tape->buf;
+
+	/*
+	 * Issue a 0 rw command to ensure that DSC handshake is
+	 * switched from completion mode to buffer available mode.  No
+	 * point in issuing this if DSC overlap isn't supported, some
+	 * drives (Seagate STT3401A) will return an error.
+	 */
+	if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
+		int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
+						  : REQ_IDETAPE_WRITE;
+
+		rc = idetape_queue_rw_tail(drive, cmd, 0);
+		if (rc < 0) {
+			kfree(tape->buf);
+			tape->buf = NULL;
+			tape->chrdev_dir = IDETAPE_DIR_NONE;
+			return rc;
 		}
 	}
 
@@ -1038,7 +1046,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
 		return 0;
 
-	idetape_init_read(drive);
+	idetape_init_rw(drive, IDETAPE_DIR_READ);
 
 	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks);
 }
@@ -1195,7 +1203,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
 	}
-	rc = idetape_init_read(drive);
+	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
 	if (rc < 0)
 		return rc;
 	if (count == 0)
@@ -1249,6 +1257,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	ssize_t actually_written = 0;
 	ssize_t ret = 0;
 	u16 ctl = *(u16 *)&tape->caps[12];
+	int rc;
 
 	/* The drive is write protected. */
 	if (tape->write_prot)
@@ -1257,36 +1266,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	/* Initialize write operation */
-	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
-		if (tape->chrdev_dir == IDETAPE_DIR_READ)
-			ide_tape_discard_merge_buffer(drive, 1);
-		if (tape->buf || tape->valid) {
-			printk(KERN_ERR "ide-tape: valid should be 0 now\n");
-			tape->valid = 0;
-		}
-		tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
-		if (!tape->buf)
-			return -ENOMEM;
-		tape->chrdev_dir = IDETAPE_DIR_WRITE;
-		tape->cur = tape->buf;
-
-		/*
-		 * Issue a write 0 command to ensure that DSC handshake is
-		 * switched from completion mode to buffer available mode. No
-		 * point in issuing this if DSC overlap isn't supported, some
-		 * drives (Seagate STT3401A) will return an error.
-		 */
-		if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-			ssize_t retval = idetape_queue_rw_tail(drive,
-							REQ_IDETAPE_WRITE, 0);
-			if (retval < 0) {
-				kfree(tape->buf);
-				tape->buf = NULL;
-				tape->chrdev_dir = IDETAPE_DIR_NONE;
-				return retval;
-			}
-		}
-	}
+	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
+	if (rc < 0)
+		return rc;
 	if (count == 0)
 		return (0);
 	if (tape->valid < tape->buffer_size) {
-- 
cgit v0.10.2


From 71294cf93d22ceaa75448cc6ebee2c65897be8c2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-tape: use byte size instead of sectors on rw issue functions

Impact: cleanup

Byte size is what most issue functions deal with, make
idetape_queue_rw_tail() and its wrappers take byte size instead of
sector counts.  idetape_chrdev_read() and write() functions are
converted to use tape->buffer_size instead of ctl from tape->cap.

This cleans up code a little bit and will ease the next r/w
reimplementation.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 4da7fa9..d5e9bb2 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -879,15 +879,15 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	size_t size = blocks * tape->blk_size;
 	struct request *rq;
 	int ret;
 
 	debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
 	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
+	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
@@ -954,17 +954,16 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
 }
 
 /* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
+static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size)
 {
 	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size);
 }
 
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
-	int blocks;
 
 	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
 		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
@@ -972,15 +971,10 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 		return;
 	}
 	if (tape->buf) {
-		blocks = tape->valid / tape->blk_size;
-		if (tape->valid % tape->blk_size) {
-			blocks++;
-			memset(tape->buf + tape->valid, 0,
-			       tape->blk_size - tape->valid % tape->blk_size);
-		}
-		(void) idetape_add_chrdev_write_request(drive, blocks);
-	}
-	if (tape->buf != NULL) {
+		size_t aligned = roundup(tape->valid, tape->blk_size);
+
+		memset(tape->cur, 0, aligned - tape->valid);
+		idetape_add_chrdev_write_request(drive, aligned);
 		kfree(tape->buf);
 		tape->buf = NULL;
 	}
@@ -1038,9 +1032,9 @@ static int idetape_init_rw(ide_drive_t *drive, int dir)
 }
 
 /* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
+static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size)
 {
-	debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
+	debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size);
 
 	/* If we are at a filemark, return a read length of 0 */
 	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
@@ -1048,7 +1042,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
 
 	idetape_init_rw(drive, IDETAPE_DIR_READ);
 
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks);
+	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size);
 }
 
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
@@ -1060,8 +1054,7 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 	while (bcount) {
 		unsigned int count = min(tape->buffer_size, bcount);
 
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-				      count / tape->blk_size);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
 		bcount -= count;
 	}
 }
@@ -1193,7 +1186,6 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 	ide_drive_t *drive = tape->drive;
 	ssize_t bytes_read, temp, actually_read = 0, rc;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
 
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
@@ -1218,7 +1210,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		tape->valid -= actually_read;
 	}
 	while (count >= tape->buffer_size) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
+		bytes_read = idetape_add_chrdev_read_request(drive,
+							     tape->buffer_size);
 		if (bytes_read <= 0)
 			goto finish;
 		if (copy_to_user(buf, tape->cur, bytes_read))
@@ -1228,7 +1221,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		actually_read += bytes_read;
 	}
 	if (count) {
-		bytes_read = idetape_add_chrdev_read_request(drive, ctl);
+		bytes_read = idetape_add_chrdev_read_request(drive,
+							     tape->buffer_size);
 		if (bytes_read <= 0)
 			goto finish;
 		temp = min((unsigned long)count, (unsigned long)bytes_read);
@@ -1256,7 +1250,6 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	ide_drive_t *drive = tape->drive;
 	ssize_t actually_written = 0;
 	ssize_t ret = 0;
-	u16 ctl = *(u16 *)&tape->caps[12];
 	int rc;
 
 	/* The drive is write protected. */
@@ -1284,7 +1277,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 
 		if (tape->valid == tape->buffer_size) {
 			ssize_t retval;
-			retval = idetape_add_chrdev_write_request(drive, ctl);
+			retval = idetape_add_chrdev_write_request(drive,
+							tape->buffer_size);
 			if (retval <= 0)
 				return (retval);
 		}
@@ -1295,7 +1289,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 			ret = -EFAULT;
 		buf += tape->buffer_size;
 		count -= tape->buffer_size;
-		retval = idetape_add_chrdev_write_request(drive, ctl);
+		retval = idetape_add_chrdev_write_request(drive,
+							  tape->buffer_size);
 		actually_written += tape->buffer_size;
 		if (retval <= 0)
 			return (retval);
-- 
cgit v0.10.2


From 4344d07fb8dbf0cbfec1f7d7c1afeccaceaaa120 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-tape: simplify read/write functions

Impact: cleanup

idetape_chrdev_read/write() functions are unnecessarily complex when
everything can be handled in a single loop.  Collapse
idetape_add_chrdev_read/write_request() into the rw functions and
simplify the implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d5e9bb2..2599579 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -953,14 +953,6 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
 	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
 }
 
-/* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size)
-{
-	debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size);
-}
-
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
@@ -974,7 +966,7 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 		size_t aligned = roundup(tape->valid, tape->blk_size);
 
 		memset(tape->cur, 0, aligned - tape->valid);
-		idetape_add_chrdev_write_request(drive, aligned);
+		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
 		kfree(tape->buf);
 		tape->buf = NULL;
 	}
@@ -1031,20 +1023,6 @@ static int idetape_init_rw(ide_drive_t *drive, int dir)
 	return 0;
 }
 
-/* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size)
-{
-	debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size);
-
-	/* If we are at a filemark, return a read length of 0 */
-	if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
-		return 0;
-
-	idetape_init_rw(drive, IDETAPE_DIR_READ);
-
-	return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size);
-}
-
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
 	idetape_tape_t *tape = drive->driver_data;
@@ -1184,8 +1162,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t bytes_read, temp, actually_read = 0, rc;
+	size_t done = 0;
 	ssize_t ret = 0;
+	int rc;
 
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
@@ -1195,52 +1174,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
 	}
+
 	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
 	if (rc < 0)
 		return rc;
-	if (count == 0)
-		return (0);
-	if (tape->valid) {
-		actually_read = min_t(unsigned int, tape->valid, count);
-		if (copy_to_user(buf, tape->cur, actually_read))
-			ret = -EFAULT;
-		buf += actually_read;
-		count -= actually_read;
-		tape->cur += actually_read;
-		tape->valid -= actually_read;
-	}
-	while (count >= tape->buffer_size) {
-		bytes_read = idetape_add_chrdev_read_request(drive,
-							     tape->buffer_size);
-		if (bytes_read <= 0)
-			goto finish;
-		if (copy_to_user(buf, tape->cur, bytes_read))
-			ret = -EFAULT;
-		buf += bytes_read;
-		count -= bytes_read;
-		actually_read += bytes_read;
-	}
-	if (count) {
-		bytes_read = idetape_add_chrdev_read_request(drive,
-							     tape->buffer_size);
-		if (bytes_read <= 0)
-			goto finish;
-		temp = min((unsigned long)count, (unsigned long)bytes_read);
-		if (copy_to_user(buf, tape->cur, temp))
+
+	while (done < count) {
+		size_t todo;
+
+		/* refill if staging buffer is empty */
+		if (!tape->valid) {
+			/* If we are at a filemark, nothing more to read */
+			if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+				break;
+			/* read */
+			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
+						  tape->buffer_size) <= 0)
+				break;
+		}
+
+		/* copy out */
+		todo = min_t(size_t, count - done, tape->valid);
+		if (copy_to_user(buf + done, tape->cur, todo))
 			ret = -EFAULT;
-		actually_read += temp;
-		tape->cur += temp;
-		tape->valid -= temp;
+
+		tape->cur += todo;
+		tape->valid -= todo;
+		done += todo;
 	}
-finish:
-	if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
+
+	if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
 		debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
 		idetape_space_over_filemarks(drive, MTFSF, 1);
 		return 0;
 	}
 
-	return ret ? ret : actually_read;
+	return ret ? ret : done;
 }
 
 static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
@@ -1248,7 +1218,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 {
 	struct ide_tape_obj *tape = file->private_data;
 	ide_drive_t *drive = tape->drive;
-	ssize_t actually_written = 0;
+	size_t done = 0;
 	ssize_t ret = 0;
 	int rc;
 
@@ -1262,47 +1232,28 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
 	if (rc < 0)
 		return rc;
-	if (count == 0)
-		return (0);
-	if (tape->valid < tape->buffer_size) {
-		actually_written = min_t(unsigned int,
-					 tape->buffer_size - tape->valid,
-					 count);
-		if (copy_from_user(tape->cur, buf, actually_written))
-			ret = -EFAULT;
-		buf += actually_written;
-		count -= actually_written;
-		tape->cur += actually_written;
-		tape->valid += actually_written;
-
-		if (tape->valid == tape->buffer_size) {
-			ssize_t retval;
-			retval = idetape_add_chrdev_write_request(drive,
-							tape->buffer_size);
-			if (retval <= 0)
-				return (retval);
-		}
-	}
-	while (count >= tape->buffer_size) {
-		ssize_t retval;
-		if (copy_from_user(tape->cur, buf, tape->buffer_size))
-			ret = -EFAULT;
-		buf += tape->buffer_size;
-		count -= tape->buffer_size;
-		retval = idetape_add_chrdev_write_request(drive,
-							  tape->buffer_size);
-		actually_written += tape->buffer_size;
-		if (retval <= 0)
-			return (retval);
-	}
-	if (count) {
-		actually_written += count;
-		if (copy_from_user(tape->cur, buf, count))
+
+	while (done < count) {
+		size_t todo;
+
+		/* flush if staging buffer is full */
+		if (tape->valid == tape->buffer_size &&
+		    idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+					  tape->buffer_size) <= 0)
+			return rc;
+
+		/* copy in */
+		todo = min_t(size_t, count - done,
+			     tape->buffer_size - tape->valid);
+		if (copy_from_user(tape->cur, buf + done, todo))
 			ret = -EFAULT;
-		tape->cur += count;
-		tape->valid += count;
+
+		tape->cur += todo;
+		tape->valid += todo;
+		done += todo;
 	}
-	return ret ? ret : actually_written;
+
+	return ret ? ret : done;
 }
 
 static int idetape_write_filemark(ide_drive_t *drive)
-- 
cgit v0.10.2


From 29d1a4371035e01b0d079bc5aa88b50f5af7a566 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-atapi: kill unused fields and callbacks

Impact: remove fields and code paths which are no longer necessary

Now that ide-tape uses standard mechanisms to transfer data, special
case handling for bh handling can be dropped from ide-atapi.  Drop the
followings.

* pc->cur_pos, b_count, bh and b_data
* drive->pc_update_buffers() and pc_io_buffers().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index b9dd450..afe5a43 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else {
+		} else
 			pc->xferred = pc->req_xfer;
-			if (drive->pc_update_buffers)
-				drive->pc_update_buffers(drive, pc);
-		}
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	if (drive->media == ide_tape && pc->bh)
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	else {
-		done = min_t(unsigned int, bcount, cmd->nleft);
-		ide_pio_bytes(drive, cmd, write, done);
-	}
+	done = min_t(unsigned int, bcount, cmd->nleft);
+	ide_pio_bytes(drive, cmd, write, done);
 
-	/* Update the current position */
+	/* Update transferred byte count */
 	pc->xferred += done;
-	pc->cur_pos += done;
 
 	bcount -= done;
 
@@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 
 		/* We haven't transferred any data yet */
 		pc->xferred = 0;
-		pc->cur_pos = pc->buf;
 
 		valid_tf = IDE_VALID_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 2599579..8dfc688 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = NULL;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1957461..34c128f 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -362,11 +362,7 @@ struct ide_atapi_pc {
 
 	/* data buffer */
 	u8 *buf;
-	/* current buffer position */
-	u8 *cur_pos;
 	int buf_size;
-	/* missing/available data on the current buffer */
-	int b_count;
 
 	/* the corresponding request */
 	struct request *rq;
@@ -379,10 +375,6 @@ struct ide_atapi_pc {
 	 */
 	u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-	/* idetape only */
-	struct idetape_bh *bh;
-	char *b_data;
-
 	unsigned long timeout;
 };
 
@@ -595,10 +587,6 @@ struct ide_drive_s {
 	/* callback for packet commands */
 	int  (*pc_callback)(struct ide_drive_s *, int);
 
-	void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-			      unsigned int, int);
-
 	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
 	unsigned long atapi_flags;
-- 
cgit v0.10.2


From 5ad960fe8d0e4f99fe2b8dded45e8251137293c9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide: drop rq->data handling from ide_map_sg()

Impact: remove code path which is no longer necessary

All IDE data transfers now use rq->bio.  Simplify ide_map_sg()
accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6e3094e..a0309ea 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -248,11 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 	struct scatterlist *sg = hwif->sg_table;
 	struct request *rq = cmd->rq;
 
-	if (!rq->bio) {
-		sg_init_one(sg, rq->data, rq->data_len);
-		cmd->sg_nents = 1;
-	} else
-		cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
+	cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 }
 EXPORT_SYMBOL_GPL(ide_map_sg);
 
-- 
cgit v0.10.2


From 586cf2681f527ce8b85b9bd57c8b9f7945fbe051 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Apr 2009 12:16:56 +0900
Subject: ide-dma: don't reset request fields on dma_timeout_retry()

Impact: drop unnecessary code

Now that everything uses bio and block operations, there is no need to
reset request fields manually when retrying a request.  Every field is
guaranteed to be always valid.  Drop unnecessary request field
resetting from ide_dma_timeout_retry().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index a0b8cab1..d9123ec 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 	/*
 	 * un-busy drive etc and make sure request is sane
 	 */
-
 	rq = hwif->rq;
-	if (!rq)
-		goto out;
-
-	hwif->rq = NULL;
-
-	rq->errors = 0;
-
-	if (!rq->bio)
-		goto out;
-
-	rq->sector = rq->bio->bi_sector;
-	rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->buffer = bio_data(rq->bio);
-out:
+	if (rq) {
+		hwif->rq = NULL;
+		rq->errors = 0;
+	}
 	return ret;
 }
 
-- 
cgit v0.10.2


From db29a6b49674085f136331014ba0eee249c16a2c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 21 Apr 2009 09:27:03 +0200
Subject: block: enable by default support for large devices and files on
 32-bit archs

Enable by default support for large devices and files (CONFIG_LBD):

- With 1TB disks being a commodity hardware it is quite easy to hit 2TB
  limitation while building RAIDs etc. and many distros have been using
  CONFIG_LBD=y by default already (at least Fedora 10 and openSUSE 11.1).

- This should also prevent a subtle ext4 filesystem compatibility issue:
  mke2fs.ext4 defaults to creating filesystems with huge_files feature
  enabled and such filesystems cannot be later mounted read-write on
  machines with CONFIG_LBD=n (it should be quite easy to hit this issue
  when trying to use filesystem created using distro kernel on system
  running the self-build kernel, think about USB disk enclosures & co.).

While at it:

- Clarify config option help text w.r.t. mounting ext4 filesystems
  (they can be mounted with CONFIG_LBD=n but in the read-only mode).

Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/Kconfig b/block/Kconfig
index e7d1278..2c39527 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,7 @@ if BLOCK
 config LBD
 	bool "Support for large block devices and files"
 	depends on !64BIT
+	default y
 	help
 	  Enable block devices or files of size 2TB and larger.
 
@@ -38,11 +39,13 @@ config LBD
 
 	  The ext4 filesystem requires that this feature be enabled in
 	  order to support filesystems that have the huge_file feature
-	  enabled.    Otherwise, it will refuse to mount any filesystems
-	  that use the huge_file feature, which is enabled by default
-	  by mke2fs.ext4.   The GFS2 filesystem also requires this feature.
+	  enabled.  Otherwise, it will refuse to mount in the read-write
+	  mode any filesystems that use the huge_file feature, which is
+	  enabled by default by mke2fs.ext4.
 
-	  If unsure, say N.
+	  The GFS2 filesystem also requires this feature.
+
+	  If unsure, say Y.
 
 config BLK_DEV_BSG
 	bool "Block layer SG support v4 (EXPERIMENTAL)"
-- 
cgit v0.10.2


From db2dbb12dc47a50c7a4c5678f526014063e486f6 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 22 Apr 2009 14:08:13 +0200
Subject: block: implement blkdev_readpages

Doing a proper block dev ->readpages() speeds up the crazy dump(8)
approach of using interleaved process IO.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index f45dbc1..a85fe31 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -331,6 +331,12 @@ static int blkdev_readpage(struct file * file, struct page * page)
 	return block_read_full_page(page, blkdev_get_block);
 }
 
+static int blkdev_readpages(struct file *file, struct address_space *mapping,
+			struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
+}
+
 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -1399,6 +1405,7 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
+	.readpages	= blkdev_readpages,
 	.writepage	= blkdev_writepage,
 	.sync_page	= block_sync_page,
 	.write_begin	= blkdev_write_begin,
-- 
cgit v0.10.2


From a538cd03be6f363d039daa94199c28cfbd508455 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:17 +0900
Subject: block: merge blk_invoke_request_fn() into __blk_run_queue()

__blk_run_queue wraps blk_invoke_request_fn() such that it
additionally removes plug and bails out early if the queue is empty.
Both extra operations have their own pending mechanisms and don't
cause any harm correctness-wise when they are done superflously.

The only user of blk_invoke_request_fn() being blk_start_queue(),
there isn't much reason to keep both functions around.  Merge
blk_invoke_request_fn() into __blk_run_queue() and make
blk_start_queue() use __blk_run_queue() instead.

[ Impact: merge two subtly different internal functions ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 41bc0ff..02f53bc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -333,24 +333,6 @@ void blk_unplug(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_unplug);
 
-static void blk_invoke_request_fn(struct request_queue *q)
-{
-	if (unlikely(blk_queue_stopped(q)))
-		return;
-
-	/*
-	 * one level of recursion is ok and is much faster than kicking
-	 * the unplug handling
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else {
-		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
-		kblockd_schedule_work(q, &q->unplug_work);
-	}
-}
-
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -365,7 +347,7 @@ void blk_start_queue(struct request_queue *q)
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-	blk_invoke_request_fn(q);
+	__blk_run_queue(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -425,12 +407,23 @@ void __blk_run_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
 
+	if (unlikely(blk_queue_stopped(q)))
+		return;
+
+	if (elv_queue_empty(q))
+		return;
+
 	/*
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!elv_queue_empty(q))
-		blk_invoke_request_fn(q);
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+		q->request_fn(q);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
+	} else {
+		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+		kblockd_schedule_work(q, &q->unplug_work);
+	}
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
-- 
cgit v0.10.2


From a7f557923441186a3cdbabc54f1bcacf42b63bf5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:17 +0900
Subject: block: kill blk_start_queueing()

blk_start_queueing() is identical to __blk_run_queue() except that it
doesn't check for recursion.  None of the current users depends on
blk_start_queueing() running request_fn directly.  Replace usages of
blk_start_queueing() with [__]blk_run_queue() and kill it.

[ Impact: removal of mostly duplicate interface function ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/as-iosched.c b/block/as-iosched.c
index c48fa67..45bd070 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1312,12 +1312,8 @@ static void as_merged_requests(struct request_queue *q, struct request *req,
 static void as_work_handler(struct work_struct *work)
 {
 	struct as_data *ad = container_of(work, struct as_data, antic_work);
-	struct request_queue *q = ad->q;
-	unsigned long flags;
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_start_queueing(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_run_queue(ad->q);
 }
 
 static int as_may_queue(struct request_queue *q, int rw)
diff --git a/block/blk-core.c b/block/blk-core.c
index 02f53bc..8b4a0af 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -433,9 +433,7 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Invoke request handling on this queue, if it has pending work to do.
- *    May be used to restart queueing when a request has completed. Also
- *    See @blk_start_queueing.
- *
+ *    May be used to restart queueing when a request has completed.
  */
 void blk_run_queue(struct request_queue *q)
 {
@@ -895,28 +893,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 EXPORT_SYMBOL(blk_get_request);
 
 /**
- * blk_start_queueing - initiate dispatch of requests to device
- * @q:		request queue to kick into gear
- *
- * This is basically a helper to remove the need to know whether a queue
- * is plugged or not if someone just wants to initiate dispatch of requests
- * for this queue. Should be used to start queueing on a device outside
- * of ->request_fn() context. Also see @blk_run_queue.
- *
- * The queue lock must be held with interrupts disabled.
- */
-void blk_start_queueing(struct request_queue *q)
-{
-	if (!blk_queue_plugged(q)) {
-		if (unlikely(blk_queue_stopped(q)))
-			return;
-		q->request_fn(q);
-	} else
-		__generic_unplug_device(q);
-}
-EXPORT_SYMBOL(blk_start_queueing);
-
-/**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
@@ -984,7 +960,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where, 0);
-	blk_start_queueing(q);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a55a9bd..def0c69 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2088,7 +2088,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
 			    cfqd->busy_queues > 1) {
 				del_timer(&cfqd->idle_slice_timer);
-				blk_start_queueing(cfqd->queue);
+			__blk_run_queue(cfqd->queue);
 			}
 			cfq_mark_cfqq_must_dispatch(cfqq);
 		}
@@ -2100,7 +2100,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		blk_start_queueing(cfqd->queue);
+		__blk_run_queue(cfqd->queue);
 	}
 }
 
@@ -2345,7 +2345,7 @@ static void cfq_kick_queue(struct work_struct *work)
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
-	blk_start_queueing(q);
+	__blk_run_queue(cfqd->queue);
 	spin_unlock_irq(q->queue_lock);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index 7073a90..2e0fb21 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -599,7 +599,7 @@ void elv_quiesce_start(struct request_queue *q)
 	 */
 	elv_drain_elevator(q);
 	while (q->rq.elvpriv) {
-		blk_start_queueing(q);
+		__blk_run_queue(q);
 		spin_unlock_irq(q->queue_lock);
 		msleep(10);
 		spin_lock_irq(q->queue_lock);
@@ -643,8 +643,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		blk_remove_plug(q);
-		blk_start_queueing(q);
+		__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT:
@@ -971,7 +970,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
 		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
-			blk_start_queueing(q);
+			__blk_run_queue(q);
 		}
 	}
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2755d5c..12e20de 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -797,7 +797,6 @@ extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
-extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v0.10.2


From e4025f6c21f1389696c069be2dc647f364925c45 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:17 +0900
Subject: block: don't set REQ_NOMERGE unnecessarily

RQ_NOMERGE_FLAGS already clears defines which REQ flags aren't
mergeable.  There is no reason to specify it superflously.  It only
adds to confusion.  Don't set REQ_NOMERGE for barriers and requests
with specific queueing directive.  REQ_NOMERGE is now exclusively used
by the merging code.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 8b4a0af..7e0fab5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1082,16 +1082,13 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	if (bio_failfast_driver(bio))
 		req->cmd_flags |= REQ_FAILFAST_DRIVER;
 
-	/*
-	 * REQ_BARRIER implies no merging, but lets make it explicit
-	 */
 	if (unlikely(bio_discard(bio))) {
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_barrier(bio))
 			req->cmd_flags |= REQ_SOFTBARRIER;
 		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_barrier(bio)))
-		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+		req->cmd_flags |= REQ_HARDBARRIER;
 
 	if (bio_sync(bio))
 		req->cmd_flags |= REQ_RW_SYNC;
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 6af716d..49557e9 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -51,7 +51,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
 	rq->rq_disk = bd_disk;
-	rq->cmd_flags |= REQ_NOMERGE;
 	rq->end_io = done;
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
-- 
cgit v0.10.2


From 10732f5661fb7adf62e20733b0030fc0fc93b0c4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: cleanup REQ_SOFTBARRIER usages

blk_insert_request() doesn't need to worry about REQ_SOFTBARRIER.
Don't set it.  Combined with recent ide updates, REQ_SOFTBARRIER is
now only used in elevator proper and for discard requests.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 7e0fab5..cf10dfc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -946,7 +946,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 	 * barrier
 	 */
 	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_SOFTBARRIER;
 
 	rq->special = data;
 
-- 
cgit v0.10.2


From 2eef33e439ba9ae387cdc3f1abcef2f3f6c4e7a8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: clean up misc stuff after block layer timeout conversion

* In blk_rq_timed_out_timer(), else { if } to else if

* In blk_add_timer(), simplify if/else block

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 1ec0d50..1ba7e0a 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -122,10 +122,8 @@ void blk_rq_timed_out_timer(unsigned long data)
 			if (blk_mark_rq_complete(rq))
 				continue;
 			blk_rq_timed_out(rq);
-		} else {
-			if (!next || time_after(next, rq->deadline))
-				next = rq->deadline;
-		}
+		} else if (!next || time_after(next, rq->deadline))
+			next = rq->deadline;
 	}
 
 	/*
@@ -176,16 +174,14 @@ void blk_add_timer(struct request *req)
 	BUG_ON(!list_empty(&req->timeout_list));
 	BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
 
-	if (req->timeout)
-		req->deadline = jiffies + req->timeout;
-	else {
-		req->deadline = jiffies + q->rq_timeout;
-		/*
-		 * Some LLDs, like scsi, peek at the timeout to prevent
-		 * a command from being retried forever.
-		 */
+	/*
+	 * Some LLDs, like scsi, peek at the timeout to prevent a
+	 * command from being retried forever.
+	 */
+	if (!req->timeout)
 		req->timeout = q->rq_timeout;
-	}
+
+	req->deadline = jiffies + req->timeout;
 	list_add_tail(&req->timeout_list, &q->timeout_list);
 
 	/*
-- 
cgit v0.10.2


From 5efccd17ceb0fc43837a331297c2c407969d7201 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: reorder request completion functions

Reorder request completion functions such that

* All request completion functions are located together.

* Functions which are used by only one caller is put right above the
  caller.

* end_request() is put after other completion functions but before
  blk_update_request().

This change is for completion function cleanup which will follow.

[ Impact: cleanup, code reorganization ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index cf10dfc..406a93e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1684,6 +1684,35 @@ static void blk_account_io_done(struct request *req)
 }
 
 /**
+ * blk_rq_bytes - Returns bytes left to complete in the entire request
+ * @rq: the request being processed
+ **/
+unsigned int blk_rq_bytes(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return rq->hard_nr_sectors << 9;
+
+	return rq->data_len;
+}
+EXPORT_SYMBOL_GPL(blk_rq_bytes);
+
+/**
+ * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
+ * @rq: the request being processed
+ **/
+unsigned int blk_rq_cur_bytes(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return rq->current_nr_sectors << 9;
+
+	if (rq->bio)
+		return rq->bio->bi_size;
+
+	return rq->data_len;
+}
+EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
+
+/**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
  * @error:    %0 for success, < %0 for error
@@ -1797,6 +1826,22 @@ static int __end_that_request_first(struct request *req, int error,
 	return 1;
 }
 
+static int end_that_request_data(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
+{
+	if (rq->bio) {
+		if (__end_that_request_first(rq, error, nr_bytes))
+			return 1;
+
+		/* Bidi request must be completed as a whole */
+		if (blk_bidi_rq(rq) &&
+		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * queue lock must be held
  */
@@ -1826,78 +1871,6 @@ static void end_that_request_last(struct request *req, int error)
 }
 
 /**
- * blk_rq_bytes - Returns bytes left to complete in the entire request
- * @rq: the request being processed
- **/
-unsigned int blk_rq_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->hard_nr_sectors << 9;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_bytes);
-
-/**
- * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
- * @rq: the request being processed
- **/
-unsigned int blk_rq_cur_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->current_nr_sectors << 9;
-
-	if (rq->bio)
-		return rq->bio->bi_size;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
-
-/**
- * end_request - end I/O on the current segment of the request
- * @req:	the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
- *
- * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
- **/
-void end_request(struct request *req, int uptodate)
-{
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_request(req, error, req->hard_cur_sectors << 9);
-}
-EXPORT_SYMBOL(end_request);
-
-static int end_that_request_data(struct request *rq, int error,
-				 unsigned int nr_bytes, unsigned int bidi_bytes)
-{
-	if (rq->bio) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
-
-		/* Bidi request must be completed as a whole */
-		if (blk_bidi_rq(rq) &&
-		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
-			return 1;
-	}
-
-	return 0;
-}
-
-/**
  * blk_end_io - Generic end_io function to complete a request.
  * @rq:           the request being processed
  * @error:        %0 for success, < %0 for error
@@ -2007,6 +1980,33 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
+ * end_request - end I/O on the current segment of the request
+ * @req:	the request being processed
+ * @uptodate:	error value or %0/%1 uptodate flag
+ *
+ * Description:
+ *     Ends I/O on the current segment of a request. If that is the only
+ *     remaining segment, the request is also completed and freed.
+ *
+ *     This is a remnant of how older block drivers handled I/O completions.
+ *     Modern drivers typically end I/O on the full request in one go, unless
+ *     they have a residual value to account for. For that case this function
+ *     isn't really useful, unless the residual just happens to be the
+ *     full current segment. In other words, don't use this function in new
+ *     code. Use blk_end_request() or __blk_end_request() to end a request.
+ **/
+void end_request(struct request *req, int uptodate)
+{
+	int error = 0;
+
+	if (uptodate <= 0)
+		error = uptodate ? uptodate : -EIO;
+
+	__blk_end_request(req, error, req->hard_cur_sectors << 9);
+}
+EXPORT_SYMBOL(end_request);
+
+/**
  * blk_update_request - Special helper function for request stacking drivers
  * @rq:           the request being processed
  * @error:        %0 for success, < %0 for error
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 12e20de..156ffd9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -832,6 +832,14 @@ static inline void blk_run_address_space(struct address_space *mapping)
 extern void blkdev_dequeue_request(struct request *req);
 
 /*
+ * blk_end_request() takes bytes instead of sectors as a complete size.
+ * blk_rq_bytes() returns bytes left to complete in the entire request.
+ * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
+ */
+extern unsigned int blk_rq_bytes(struct request *rq);
+extern unsigned int blk_rq_cur_bytes(struct request *rq);
+
+/*
  * blk_end_request() and friends.
  * __blk_end_request() and end_request() must be called with
  * the request queue spinlock acquired.
@@ -858,14 +866,6 @@ extern void blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
 
 /*
- * blk_end_request() takes bytes instead of sectors as a complete size.
- * blk_rq_bytes() returns bytes left to complete in the entire request.
- * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
- */
-extern unsigned int blk_rq_bytes(struct request *rq);
-extern unsigned int blk_rq_cur_bytes(struct request *rq);
-
-/*
  * Access functions for manipulating queue properties
  */
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
-- 
cgit v0.10.2


From 158dbda0068e63c7cce7bd47c123bd1dfa5a902c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: reorganize request fetching functions

Impact: code reorganization

elv_next_request() and elv_dequeue_request() are public block layer
interface than actual elevator implementation.  They mostly deal with
how requests interact with block layer and low level drivers at the
beginning of rqeuest processing whereas __elv_next_request() is the
actual eleveator request fetching interface.

Move the two functions to blk-core.c.  This prepares for further
interface cleanup.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 406a93e..678ede2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1712,6 +1712,101 @@ unsigned int blk_rq_cur_bytes(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
 
+struct request *elv_next_request(struct request_queue *q)
+{
+	struct request *rq;
+	int ret;
+
+	while ((rq = __elv_next_request(q)) != NULL) {
+		if (!(rq->cmd_flags & REQ_STARTED)) {
+			/*
+			 * This is the first time the device driver
+			 * sees this request (possibly after
+			 * requeueing).  Notify IO scheduler.
+			 */
+			if (blk_sorted_rq(rq))
+				elv_activate_rq(q, rq);
+
+			/*
+			 * just mark as started even if we don't start
+			 * it, a request that has been delayed should
+			 * not be passed by new incoming requests
+			 */
+			rq->cmd_flags |= REQ_STARTED;
+			trace_block_rq_issue(q, rq);
+		}
+
+		if (!q->boundary_rq || q->boundary_rq == rq) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = NULL;
+		}
+
+		if (rq->cmd_flags & REQ_DONTPREP)
+			break;
+
+		if (q->dma_drain_size && rq->data_len) {
+			/*
+			 * make sure space for the drain appears we
+			 * know we can do this because max_hw_segments
+			 * has been adjusted to be one fewer than the
+			 * device can handle
+			 */
+			rq->nr_phys_segments++;
+		}
+
+		if (!q->prep_rq_fn)
+			break;
+
+		ret = q->prep_rq_fn(q, rq);
+		if (ret == BLKPREP_OK) {
+			break;
+		} else if (ret == BLKPREP_DEFER) {
+			/*
+			 * the request may have been (partially) prepped.
+			 * we need to keep this request in the front to
+			 * avoid resource deadlock.  REQ_STARTED will
+			 * prevent other fs requests from passing this one.
+			 */
+			if (q->dma_drain_size && rq->data_len &&
+			    !(rq->cmd_flags & REQ_DONTPREP)) {
+				/*
+				 * remove the space for the drain we added
+				 * so that we don't add it again
+				 */
+				--rq->nr_phys_segments;
+			}
+
+			rq = NULL;
+			break;
+		} else if (ret == BLKPREP_KILL) {
+			rq->cmd_flags |= REQ_QUIET;
+			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+		} else {
+			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
+			break;
+		}
+	}
+
+	return rq;
+}
+EXPORT_SYMBOL(elv_next_request);
+
+void elv_dequeue_request(struct request_queue *q, struct request *rq)
+{
+	BUG_ON(list_empty(&rq->queuelist));
+	BUG_ON(ELV_ON_HASH(rq));
+
+	list_del_init(&rq->queuelist);
+
+	/*
+	 * the time frame between a request being removed from the lists
+	 * and to it is freed is accounted as io that is in progress at
+	 * the driver side.
+	 */
+	if (blk_account_rq(rq))
+		q->in_flight++;
+}
+
 /**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
diff --git a/block/blk.h b/block/blk.h
index 79c85f7..9b2c324 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -43,6 +43,43 @@ static inline void blk_clear_rq_complete(struct request *rq)
 	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
 }
 
+/*
+ * Internal elevator interface
+ */
+#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
+
+static inline struct request *__elv_next_request(struct request_queue *q)
+{
+	struct request *rq;
+
+	while (1) {
+		while (!list_empty(&q->queue_head)) {
+			rq = list_entry_rq(q->queue_head.next);
+			if (blk_do_ordered(q, &rq))
+				return rq;
+		}
+
+		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
+			return NULL;
+	}
+}
+
+static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
+{
+	struct elevator_queue *e = q->elevator;
+
+	if (e->ops->elevator_activate_req_fn)
+		e->ops->elevator_activate_req_fn(q, rq);
+}
+
+static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
+{
+	struct elevator_queue *e = q->elevator;
+
+	if (e->ops->elevator_deactivate_req_fn)
+		e->ops->elevator_deactivate_req_fn(q, rq);
+}
+
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 int blk_should_fake_timeout(struct request_queue *);
 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
diff --git a/block/elevator.c b/block/elevator.c
index 2e0fb21..b03b875 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -53,7 +53,6 @@ static const int elv_hash_shift = 6;
 		(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
 #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
 DEFINE_TRACE(block_rq_insert);
 DEFINE_TRACE(block_rq_issue);
@@ -310,22 +309,6 @@ void elevator_exit(struct elevator_queue *e)
 }
 EXPORT_SYMBOL(elevator_exit);
 
-static void elv_activate_rq(struct request_queue *q, struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e->ops->elevator_activate_req_fn)
-		e->ops->elevator_activate_req_fn(q, rq);
-}
-
-static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
 static inline void __elv_rqhash_del(struct request *rq)
 {
 	hlist_del_init(&rq->hash);
@@ -758,117 +741,6 @@ void elv_add_request(struct request_queue *q, struct request *rq, int where,
 }
 EXPORT_SYMBOL(elv_add_request);
 
-static inline struct request *__elv_next_request(struct request_queue *q)
-{
-	struct request *rq;
-
-	while (1) {
-		while (!list_empty(&q->queue_head)) {
-			rq = list_entry_rq(q->queue_head.next);
-			if (blk_do_ordered(q, &rq))
-				return rq;
-		}
-
-		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
-			return NULL;
-	}
-}
-
-struct request *elv_next_request(struct request_queue *q)
-{
-	struct request *rq;
-	int ret;
-
-	while ((rq = __elv_next_request(q)) != NULL) {
-		if (!(rq->cmd_flags & REQ_STARTED)) {
-			/*
-			 * This is the first time the device driver
-			 * sees this request (possibly after
-			 * requeueing).  Notify IO scheduler.
-			 */
-			if (blk_sorted_rq(rq))
-				elv_activate_rq(q, rq);
-
-			/*
-			 * just mark as started even if we don't start
-			 * it, a request that has been delayed should
-			 * not be passed by new incoming requests
-			 */
-			rq->cmd_flags |= REQ_STARTED;
-			trace_block_rq_issue(q, rq);
-		}
-
-		if (!q->boundary_rq || q->boundary_rq == rq) {
-			q->end_sector = rq_end_sector(rq);
-			q->boundary_rq = NULL;
-		}
-
-		if (rq->cmd_flags & REQ_DONTPREP)
-			break;
-
-		if (q->dma_drain_size && rq->data_len) {
-			/*
-			 * make sure space for the drain appears we
-			 * know we can do this because max_hw_segments
-			 * has been adjusted to be one fewer than the
-			 * device can handle
-			 */
-			rq->nr_phys_segments++;
-		}
-
-		if (!q->prep_rq_fn)
-			break;
-
-		ret = q->prep_rq_fn(q, rq);
-		if (ret == BLKPREP_OK) {
-			break;
-		} else if (ret == BLKPREP_DEFER) {
-			/*
-			 * the request may have been (partially) prepped.
-			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  REQ_STARTED will
-			 * prevent other fs requests from passing this one.
-			 */
-			if (q->dma_drain_size && rq->data_len &&
-			    !(rq->cmd_flags & REQ_DONTPREP)) {
-				/*
-				 * remove the space for the drain we added
-				 * so that we don't add it again
-				 */
-				--rq->nr_phys_segments;
-			}
-
-			rq = NULL;
-			break;
-		} else if (ret == BLKPREP_KILL) {
-			rq->cmd_flags |= REQ_QUIET;
-			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
-		} else {
-			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
-			break;
-		}
-	}
-
-	return rq;
-}
-EXPORT_SYMBOL(elv_next_request);
-
-void elv_dequeue_request(struct request_queue *q, struct request *rq)
-{
-	BUG_ON(list_empty(&rq->queuelist));
-	BUG_ON(ELV_ON_HASH(rq));
-
-	list_del_init(&rq->queuelist);
-
-	/*
-	 * the time frame between a request being removed from the lists
-	 * and to it is freed is accounted as io that is in progress at
-	 * the driver side.
-	 */
-	if (blk_account_rq(rq))
-		q->in_flight++;
-}
-
 int elv_queue_empty(struct request_queue *q)
 {
 	struct elevator_queue *e = q->elevator;
-- 
cgit v0.10.2


From 0b302d5aa7975006fa2ec3d66386610b9b36c669 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: kill blk_end_request_callback()

With recent IDE updates, blk_end_request_callback() doesn't have any
user now.  Kill it.

[ Impact: removal of unused convoluted interface ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 678ede2..2f277ea 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1971,10 +1971,6 @@ static void end_that_request_last(struct request *req, int error)
  * @error:        %0 for success, < %0 for error
  * @nr_bytes:     number of bytes to complete @rq
  * @bidi_bytes:   number of bytes to complete @rq->next_rq
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non %0, this helper returns without
- *                completion of the request.
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
@@ -1985,8 +1981,7 @@ static void end_that_request_last(struct request *req, int error)
  *     %1 - this request is not freed yet, it still has pending buffers.
  **/
 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes,
-		      int (drv_callback)(struct request *))
+		      unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags = 0UL;
@@ -1994,10 +1989,6 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
 	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
 		return 1;
 
-	/* Special feature for tricky drivers */
-	if (drv_callback && drv_callback(rq))
-		return 1;
-
 	add_disk_randomness(rq->rq_disk);
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -2023,7 +2014,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
  **/
 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
-	return blk_end_io(rq, error, nr_bytes, 0, NULL);
+	return blk_end_io(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL_GPL(blk_end_request);
 
@@ -2070,7 +2061,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 			 unsigned int bidi_bytes)
 {
-	return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);
+	return blk_end_io(rq, error, nr_bytes, bidi_bytes);
 }
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
@@ -2131,39 +2122,6 @@ void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-/**
- * blk_end_request_callback - Special helper function for tricky drivers
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non %0, this helper returns without
- *                completion of the request.
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- *     This special helper function is used only for existing tricky drivers.
- *     (e.g. cdrom_newpc_intr() of ide-cd)
- *     This interface will be removed when such drivers are rewritten.
- *     Don't use this interface in other places anymore.
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - this request is not freed yet.
- *          this request still has pending buffers or
- *          the driver doesn't want to finish this request yet.
- **/
-int blk_end_request_callback(struct request *rq, int error,
-			     unsigned int nr_bytes,
-			     int (drv_callback)(struct request *))
-{
-	return blk_end_io(rq, error, nr_bytes, 0, drv_callback);
-}
-EXPORT_SYMBOL_GPL(blk_end_request_callback);
-
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 156ffd9..1fa9dcf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -855,9 +855,6 @@ extern int __blk_end_request(struct request *rq, int error,
 extern int blk_end_bidi_request(struct request *rq, int error,
 				unsigned int nr_bytes, unsigned int bidi_bytes);
 extern void end_request(struct request *, int);
-extern int blk_end_request_callback(struct request *rq, int error,
-				unsigned int nr_bytes,
-				int (drv_callback)(struct request *));
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
-- 
cgit v0.10.2


From 2e60e02297cf54e367567f2d85b2ca56b1c4a906 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: clean up request completion API

Request completion has gone through several changes and became a bit
messy over the time.  Clean it up.

1. end_that_request_data() is a thin wrapper around
   end_that_request_data_first() which checks whether bio is NULL
   before doing anything and handles bidi completion.
   blk_update_request() is a thin wrapper around
   end_that_request_data() which clears nr_sectors on the last
   iteration but doesn't use the bidi completion.

   Clean it up by moving the initial bio NULL check and nr_sectors
   clearing on the last iteration into end_that_request_data() and
   renaming it to blk_update_request(), which makes blk_end_io() the
   only user of end_that_request_data().  Collapse
   end_that_request_data() into blk_end_io().

2. There are four visible completion variants - blk_end_request(),
   __blk_end_request(), blk_end_bidi_request() and end_request().
   blk_end_request() and blk_end_bidi_request() uses blk_end_request()
   as the backend but __blk_end_request() and end_request() use
   separate implementation in __blk_end_request() due to different
   locking rules.

   blk_end_bidi_request() is identical to blk_end_io().  Collapse
   blk_end_io() into blk_end_bidi_request(), separate out request
   update into internal helper blk_update_bidi_request() and add
   __blk_end_bidi_request().  Redefine [__]blk_end_request() as thin
   inline wrappers around [__]blk_end_bidi_request().

3. As the whole request issue/completion usages are about to be
   modified and audited, it's a good chance to convert completion
   functions return bool which better indicates the intended meaning
   of return values.

4. The function name end_that_request_last() is from the days when it
   was a public interface and slighly confusing.  Give it a proper
   internal name - blk_finish_request().

5. Add description explaning that blk_end_bidi_request() can be safely
   used for uni requests as suggested by Boaz Harrosh.

The only visible behavior change is from #1.  nr_sectors counts are
cleared after the final iteration no matter which function is used to
complete the request.  I couldn't find any place where the code
assumes those nr_sectors counters contain the values for the last
segment and this change is good as it makes the API much more
consistent as the end result is now same whether a request is
completed using [__]blk_end_request() alone or in combination with
blk_update_request().

API further cleaned up per Christoph's suggestion.

[ Impact: cleanup, rq->*nr_sectors always updated after req completion ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Christoph Hellwig <hch@infradead.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 2f277ea..89cc05d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1808,25 +1808,35 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
 }
 
 /**
- * __end_that_request_first - end I/O on a request
- * @req:      the request being processed
+ * blk_update_request - Special helper function for request stacking drivers
+ * @rq:	      the request being processed
  * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
+ * @nr_bytes: number of bytes to complete @rq
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @req, and sets it up
- *     for the next range of segments (if any) in the cluster.
+ *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
+ *     the request structure even if @rq doesn't have leftover.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ *     This special helper function is only for request stacking drivers
+ *     (e.g. request-based dm) so that they can handle partial completion.
+ *     Actual device drivers should use blk_end_request instead.
+ *
+ *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
+ *     %false return from this function.
  *
  * Return:
- *     %0 - we are done with this request, call end_that_request_last()
- *     %1 - still buffers pending for this request
+ *     %false - this request doesn't have any more data
+ *     %true  - this request has more data
  **/
-static int __end_that_request_first(struct request *req, int error,
-				    int nr_bytes)
+bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 
+	if (!req->bio)
+		return false;
+
 	trace_block_rq_complete(req->q, req);
 
 	/*
@@ -1903,8 +1913,16 @@ static int __end_that_request_first(struct request *req, int error,
 	/*
 	 * completely done
 	 */
-	if (!req->bio)
-		return 0;
+	if (!req->bio) {
+		/*
+		 * Reset counters so that the request stacking driver
+		 * can find how many bytes remain in the request
+		 * later.
+		 */
+		req->nr_sectors = req->hard_nr_sectors = 0;
+		req->current_nr_sectors = req->hard_cur_sectors = 0;
+		return false;
+	}
 
 	/*
 	 * if the request wasn't completed, update state
@@ -1918,29 +1936,31 @@ static int __end_that_request_first(struct request *req, int error,
 
 	blk_recalc_rq_sectors(req, total_bytes >> 9);
 	blk_recalc_rq_segments(req);
-	return 1;
+	return true;
 }
+EXPORT_SYMBOL_GPL(blk_update_request);
 
-static int end_that_request_data(struct request *rq, int error,
-				 unsigned int nr_bytes, unsigned int bidi_bytes)
+static bool blk_update_bidi_request(struct request *rq, int error,
+				    unsigned int nr_bytes,
+				    unsigned int bidi_bytes)
 {
-	if (rq->bio) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
+	if (blk_update_request(rq, error, nr_bytes))
+		return true;
 
-		/* Bidi request must be completed as a whole */
-		if (blk_bidi_rq(rq) &&
-		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
-			return 1;
-	}
+	/* Bidi request must be completed as a whole */
+	if (unlikely(blk_bidi_rq(rq)) &&
+	    blk_update_request(rq->next_rq, error, bidi_bytes))
+		return true;
 
-	return 0;
+	add_disk_randomness(rq->rq_disk);
+
+	return false;
 }
 
 /*
  * queue lock must be held
  */
-static void end_that_request_last(struct request *req, int error)
+static void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
@@ -1966,161 +1986,65 @@ static void end_that_request_last(struct request *req, int error)
 }
 
 /**
- * blk_end_io - Generic end_io function to complete a request.
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete @rq
- * @bidi_bytes:   number of bytes to complete @rq->next_rq
+ * blk_end_bidi_request - Complete a bidi request
+ * @rq:         the request to complete
+ * @error:      %0 for success, < %0 for error
+ * @nr_bytes:   number of bytes to complete @rq
+ * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- *     If @rq has leftover, sets it up for the next range of segments.
+ *     Drivers that supports bidi can safely call this member for any
+ *     type of request, bidi or uni.  In the later case @bidi_bytes is
+ *     just ignored.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - this request is not freed yet, it still has pending buffers.
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes)
+bool blk_end_bidi_request(struct request *rq, int error,
+			  unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
-	unsigned long flags = 0UL;
-
-	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
-		return 1;
+	unsigned long flags;
 
-	add_disk_randomness(rq->rq_disk);
+	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+		return true;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	end_that_request_last(rq, error);
+	blk_finish_request(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
-}
-
-/**
- * blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
-{
-	return blk_end_io(rq, error, nr_bytes, 0);
-}
-EXPORT_SYMBOL_GPL(blk_end_request);
-
-/**
- * __blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Must be called with queue lock held unlike blk_end_request().
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
-{
-	if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
-		return 1;
-
-	add_disk_randomness(rq->rq_disk);
-
-	end_that_request_last(rq, error);
-
-	return 0;
+	return false;
 }
-EXPORT_SYMBOL_GPL(__blk_end_request);
+EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
- * blk_end_bidi_request - Helper function for drivers to complete bidi request.
- * @rq:         the bidi request being processed
+ * __blk_end_bidi_request - Complete a bidi request with queue lock held
+ * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
+ *     Identical to blk_end_bidi_request() except that queue lock is
+ *     assumed to be locked on entry and remains so on return.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
-			 unsigned int bidi_bytes)
-{
-	return blk_end_io(rq, error, nr_bytes, bidi_bytes);
-}
-EXPORT_SYMBOL_GPL(blk_end_bidi_request);
-
-/**
- * end_request - end I/O on the current segment of the request
- * @req:	the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
- *
- * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-void end_request(struct request *req, int uptodate)
+bool __blk_end_bidi_request(struct request *rq, int error,
+			    unsigned int nr_bytes, unsigned int bidi_bytes)
 {
-	int error = 0;
+	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+		return true;
 
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
+	blk_finish_request(rq, error);
 
-	__blk_end_request(req, error, req->hard_cur_sectors << 9);
-}
-EXPORT_SYMBOL(end_request);
-
-/**
- * blk_update_request - Special helper function for request stacking drivers
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete @rq
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
- *     the request structure even if @rq doesn't have leftover.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- *     This special helper function is only for request stacking drivers
- *     (e.g. request-based dm) so that they can handle partial completion.
- *     Actual device drivers should use blk_end_request instead.
- */
-void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
-{
-	if (!end_that_request_data(rq, error, nr_bytes, 0)) {
-		/*
-		 * These members are not updated in end_that_request_data()
-		 * when all bios are completed.
-		 * Update them so that the request stacking driver can find
-		 * how many bytes remain in the request later.
-		 */
-		rq->nr_sectors = rq->hard_nr_sectors = 0;
-		rq->current_nr_sectors = rq->hard_cur_sectors = 0;
-	}
+	return false;
 }
-EXPORT_SYMBOL_GPL(blk_update_request);
+EXPORT_SYMBOL_GPL(__blk_end_bidi_request);
 
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1fa9dcf..501f684 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -840,27 +840,97 @@ extern unsigned int blk_rq_bytes(struct request *rq);
 extern unsigned int blk_rq_cur_bytes(struct request *rq);
 
 /*
- * blk_end_request() and friends.
- * __blk_end_request() and end_request() must be called with
- * the request queue spinlock acquired.
+ * Request completion related functions.
+ *
+ * blk_update_request() completes given number of bytes and updates
+ * the request without completing it.
+ *
+ * blk_end_request() and friends.  __blk_end_request() and
+ * end_request() must be called with the request queue spinlock
+ * acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern int blk_end_request(struct request *rq, int error,
-				unsigned int nr_bytes);
-extern int __blk_end_request(struct request *rq, int error,
-				unsigned int nr_bytes);
-extern int blk_end_bidi_request(struct request *rq, int error,
-				unsigned int nr_bytes, unsigned int bidi_bytes);
-extern void end_request(struct request *, int);
+extern bool blk_update_request(struct request *rq, int error,
+			       unsigned int nr_bytes);
+extern bool blk_end_bidi_request(struct request *rq, int error,
+				 unsigned int nr_bytes,
+				 unsigned int bidi_bytes);
+extern bool __blk_end_bidi_request(struct request *rq, int error,
+				   unsigned int nr_bytes,
+				   unsigned int bidi_bytes);
+
+/**
+ * blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+static inline bool blk_end_request(struct request *rq, int error,
+				   unsigned int nr_bytes)
+{
+	return blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+
+/**
+ * __blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Must be called with queue lock held unlike blk_end_request().
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+static inline bool __blk_end_request(struct request *rq, int error,
+				     unsigned int nr_bytes)
+{
+	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+
+/**
+ * end_request - end I/O on the current segment of the request
+ * @rq:		the request being processed
+ * @uptodate:	error value or %0/%1 uptodate flag
+ *
+ * Description:
+ *     Ends I/O on the current segment of a request. If that is the only
+ *     remaining segment, the request is also completed and freed.
+ *
+ *     This is a remnant of how older block drivers handled I/O completions.
+ *     Modern drivers typically end I/O on the full request in one go, unless
+ *     they have a residual value to account for. For that case this function
+ *     isn't really useful, unless the residual just happens to be the
+ *     full current segment. In other words, don't use this function in new
+ *     code. Use blk_end_request() or __blk_end_request() to end a request.
+ **/
+static inline void end_request(struct request *rq, int uptodate)
+{
+	int error = 0;
+
+	if (uptodate <= 0)
+		error = uptodate ? uptodate : -EIO;
+
+	__blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0);
+}
+
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
-extern void blk_update_request(struct request *rq, int error,
-			       unsigned int nr_bytes);
 
 /*
  * Access functions for manipulating queue properties
-- 
cgit v0.10.2


From b243ddcbe9be146172baa544dadecebf156eda0e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: move rq->start_time initialization to blk_rq_init()

rq->start_time was initialized in init_request_from_bio() so special
requests didn't have start_time set.  This has been okay as start_time
has been used only for fs requests; however, there is no indication of
this actually is the case or not.  Set rq->start_time in blk_rq_init()
and guarantee that all initialized rq's have its start_time set.  This
improves consistency at virtually no cost and future changes will make
use of the timestamp for !bio requests.

[ Impact: rq->start_time is valid for all requests ]

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 89cc05d..b84250d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -134,6 +134,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->ref_count = 1;
+	rq->start_time = jiffies;
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -1099,7 +1100,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
-	req->start_time = jiffies;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-- 
cgit v0.10.2


From 40cbbb781d3eba5d6ac0860db078af490e5c7c6b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:19 +0900
Subject: block: implement and use [__]blk_end_request_all()

There are many [__]blk_end_request() call sites which call it with
full request length and expect full completion.  Many of them ensure
that the request actually completes by doing BUG_ON() the return
value, which is awkward and error-prone.

This patch adds [__]blk_end_request_all() which takes @rq and @error
and fully completes the request.  BUG_ON() is added to to ensure that
this actually happens.

Most conversions are simple but there are a few noteworthy ones.

* cdrom/viocd: viocd_end_request() replaced with direct calls to
  __blk_end_request_all().

* s390/block/dasd: dasd_end_request() replaced with direct calls to
  __blk_end_request_all().

* s390/char/tape_block: tapeblock_end_request() replaced with direct
  calls to blk_end_request_all().

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Mike Miller <mike.miller@hp.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 0abfbaa..cf81bad 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -192,8 +192,7 @@ static void mbox_tx_work(struct work_struct *work)
 		}
 
 		spin_lock(q->queue_lock);
-		if (__blk_end_request(rq, 0, 0))
-			BUG();
+		__blk_end_request_all(rq, 0);
 		spin_unlock(q->queue_lock);
 	}
 }
@@ -224,10 +223,7 @@ static void mbox_rx_work(struct work_struct *work)
 			break;
 
 		msg = (mbox_msg_t) rq->data;
-
-		if (blk_end_request(rq, 0, 0))
-			BUG();
-
+		blk_end_request_all(rq, 0);
 		mbox->rxq->callback((void *)msg);
 	}
 }
@@ -337,8 +333,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf)
 
 		*p = (mbox_msg_t) rq->data;
 
-		if (blk_end_request(rq, 0, 0))
-			BUG();
+		blk_end_request_all(rq, 0);
 
 		if (unlikely(mbox_seq_test(mbox, *p))) {
 			pr_info("mbox: Illegal seq bit!(%08x) ignored\n", *p);
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 20b4111..c8d0876 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -106,10 +106,7 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	 */
 	q->ordseq = 0;
 	rq = q->orig_bar_rq;
-
-	if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
-		BUG();
-
+	__blk_end_request_all(rq, q->orderr);
 	return true;
 }
 
@@ -252,9 +249,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 			 * with prejudice.
 			 */
 			elv_dequeue_request(q, rq);
-			if (__blk_end_request(rq, -EOPNOTSUPP,
-					      blk_rq_bytes(rq)))
-				BUG();
+			__blk_end_request_all(rq, -EOPNOTSUPP);
 			*rqp = NULL;
 			return false;
 		}
diff --git a/block/blk-core.c b/block/blk-core.c
index b84250d..0520cc7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1780,7 +1780,7 @@ struct request *elv_next_request(struct request_queue *q)
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
-			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+			__blk_end_request_all(rq, -EIO);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
diff --git a/block/elevator.c b/block/elevator.c
index b03b875..1af5d9f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -810,7 +810,7 @@ void elv_abort_queue(struct request_queue *q)
 		rq = list_entry_rq(q->queue_head.next);
 		rq->cmd_flags |= REQ_QUIET;
 		trace_block_rq_abort(q, rq);
-		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+		__blk_end_request_all(rq, -EIO);
 	}
 }
 EXPORT_SYMBOL(elv_abort_queue);
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index ca268ca..488a8f4 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -1024,8 +1024,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout)
 				cmd->req.sg[i].size, ddir);
 
 	DBGPX(printk("Done with %p\n", rq););
-	if (__blk_end_request(rq, error, blk_rq_bytes(rq)))
-		BUG();
+	__blk_end_request_all(rq, error);
 }
 
 /*
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index ff0448e..60e85bb 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -749,8 +749,7 @@ static inline void carm_end_request_queued(struct carm_host *host,
 	struct request *req = crq->rq;
 	int rc;
 
-	rc = __blk_end_request(req, error, blk_rq_bytes(req));
-	assert(rc == 0);
+	__blk_end_request_all(req, error);
 
 	rc = carm_put_request(host, crq);
 	assert(rc == 0);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5d34764..50745e6 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -62,7 +62,7 @@ static void blk_done(struct virtqueue *vq)
 			break;
 		}
 
-		__blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req));
+		__blk_end_request_all(vbr->req, error);
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
 	}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8f90508..cd6cfe3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -551,7 +551,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 	for (i = info->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
-		int ret;
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		id   = bret->id;
@@ -578,8 +577,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			ret = __blk_end_request(req, error, blk_rq_bytes(req));
-			BUG_ON(ret);
+			__blk_end_request_all(req, error);
 			break;
 		default:
 			BUG();
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 2eecb77..fee9a9e 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -632,7 +632,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		* before handling ending the request */
 		spin_lock(&gdrom_lock);
 		list_del_init(&req->queuelist);
-		__blk_end_request(req, err, blk_rq_bytes(req));
+		__blk_end_request_all(req, err);
 	}
 	spin_unlock(&gdrom_lock);
 	kfree(read_command);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 1392935..cc3efa0 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -291,23 +291,6 @@ static int send_request(struct request *req)
 	return 0;
 }
 
-static void viocd_end_request(struct request *req, int error)
-{
-	int nsectors = req->hard_nr_sectors;
-
-	/*
-	 * Make sure it's fully ended, and ensure that we process
-	 * at least one sector.
-	 */
-	if (blk_pc_request(req))
-		nsectors = (req->data_len + 511) >> 9;
-	if (!nsectors)
-		nsectors = 1;
-
-	if (__blk_end_request(req, error, nsectors << 9))
-		BUG();
-}
-
 static int rwreq;
 
 static void do_viocd_request(struct request_queue *q)
@@ -316,11 +299,11 @@ static void do_viocd_request(struct request_queue *q)
 
 	while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) {
 		if (!blk_fs_request(req))
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
 			printk(VIOCD_KERN_WARNING
 					"unable to send message to OS/400!");
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		} else
 			rwreq++;
 	}
@@ -531,9 +514,9 @@ return_complete:
 					"with rc %d:0x%04X: %s\n",
 					req, event->xRc,
 					bevent->sub_result, err->msg);
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		} else
-			viocd_end_request(req, 0);
+			__blk_end_request_all(req, 0);
 
 		/* restart handling of incoming requests */
 		spin_unlock_irqrestore(&viocd_reqlock, flags);
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index de143de..a416346 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -826,7 +826,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
 	if (msb->eject) {
 		while ((req = elv_next_request(q)) != NULL)
-			__blk_end_request(req, -ENODEV, blk_rq_bytes(req));
+			__blk_end_request_all(req, -ENODEV);
 
 		return;
 	}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index d181527..fabec95 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1614,15 +1614,6 @@ void dasd_block_clear_timer(struct dasd_block *block)
 }
 
 /*
- * posts the buffer_cache about a finalized request
- */
-static inline void dasd_end_request(struct request *req, int error)
-{
-	if (__blk_end_request(req, error, blk_rq_bytes(req)))
-		BUG();
-}
-
-/*
  * Process finished error recovery ccw.
  */
 static inline void __dasd_block_process_erp(struct dasd_block *block,
@@ -1676,7 +1667,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting write request %p",
 				      req);
 			blkdev_dequeue_request(req);
-			dasd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -1705,7 +1696,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "on request %p",
 				      PTR_ERR(cqr), req);
 			blkdev_dequeue_request(req);
-			dasd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		/*
@@ -1731,7 +1722,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status <= 0)
 		error = status ? status : -EIO;
-	dasd_end_request(req, error);
+	__blk_end_request_all(req, error);
 }
 
 /*
@@ -2040,7 +2031,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 	spin_lock_irq(&block->request_queue_lock);
 	while ((req = elv_next_request(block->request_queue))) {
 		blkdev_dequeue_request(req);
-		dasd_end_request(req, -EIO);
+		__blk_end_request_all(req, -EIO);
 	}
 	spin_unlock_irq(&block->request_queue_lock);
 }
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index f32e89e..86596d3 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -74,13 +74,6 @@ tapeblock_trigger_requeue(struct tape_device *device)
  * Post finished request.
  */
 static void
-tapeblock_end_request(struct request *req, int error)
-{
-	if (blk_end_request(req, error, blk_rq_bytes(req)))
-		BUG();
-}
-
-static void
 __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 {
 	struct tape_device *device;
@@ -90,7 +83,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 
 	device = ccw_req->device;
 	req = (struct request *) data;
-	tapeblock_end_request(req, (ccw_req->rc == 0) ? 0 : -EIO);
+	blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO);
 	if (ccw_req->rc == 0)
 		/* Update position. */
 		device->blk_data.block_position =
@@ -118,7 +111,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req)
 	ccw_req = device->discipline->bread(device, req);
 	if (IS_ERR(ccw_req)) {
 		DBF_EVENT(1, "TBLOCK: bread failed\n");
-		tapeblock_end_request(req, -EIO);
+		blk_end_request_all(req, -EIO);
 		return PTR_ERR(ccw_req);
 	}
 	ccw_req->callback = __tapeblock_end_request;
@@ -131,7 +124,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req)
 		 * Start/enqueueing failed. No retries in
 		 * this case.
 		 */
-		tapeblock_end_request(req, -EIO);
+		blk_end_request_all(req, -EIO);
 		device->discipline->free_bread(ccw_req);
 	}
 
@@ -177,7 +170,7 @@ tapeblock_requeue(struct work_struct *work) {
 			DBF_EVENT(1, "TBLOCK: Rejecting write request\n");
 			blkdev_dequeue_request(req);
 			spin_unlock_irq(&device->blk_data.request_queue_lock);
-			tapeblock_end_request(req, -EIO);
+			blk_end_request_all(req, -EIO);
 			spin_lock_irq(&device->blk_data.request_queue_lock);
 			continue;
 		}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d1cb64a..756ac7c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -922,7 +922,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			if (driver_byte(result) & DRIVER_SENSE)
 				scsi_print_sense("", cmd);
 		}
-		blk_end_request(req, -EIO, blk_rq_bytes(req));
+		blk_end_request_all(req, -EIO);
 		scsi_next_command(cmd);
 		break;
 	case ACTION_REPREP:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 501f684..e33c835 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -883,6 +883,22 @@ static inline bool blk_end_request(struct request *rq, int error,
 }
 
 /**
+ * blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.
+ */
+static inline void blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+
+	pending = blk_end_request(rq, error, blk_rq_bytes(rq));
+	BUG_ON(pending);
+}
+
+/**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
@@ -902,6 +918,22 @@ static inline bool __blk_end_request(struct request *rq, int error,
 }
 
 /**
+ * __blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.  Must be called with queue lock held.
+ */
+static inline void __blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+
+	pending = __blk_end_request(rq, error, blk_rq_bytes(rq));
+	BUG_ON(pending);
+}
+
+/**
  * end_request - end I/O on the current segment of the request
  * @rq:		the request being processed
  * @uptodate:	error value or %0/%1 uptodate flag
-- 
cgit v0.10.2


From f06d9a2b52e246a66b606130cea3f0d7b7be17a7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:19 +0900
Subject: block: replace end_request() with [__]blk_end_request_cur()

end_request() has been kept around for backward compatibility;
however, it's about time for it to go away.

* There aren't too many users left.

* Its use of @updtodate is pretty confusing.

* In some cases, newer code ends up using mixture of end_request() and
  [__]blk_end_request[_all](), which is way too confusing.

So, add [__]blk_end_request_cur() and replace end_request() with it.
Most conversions are straightforward.  Noteworthy ones are...

* paride/pcd: next_request() updated to take 0/-errno instead of 1/0.

* paride/pf: pf_end_request() and next_request() updated to take
  0/-errno instead of 1/0.

* xd: xd_readwrite() updated to return 0/-errno instead of 1/0.

* mtd/mtd_blkdevs: blktrans_discard_request() updated to return
  0/-errno instead of 1/0.  Unnecessary local variable res
  initialization removed from mtd_blktrans_thread().

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Joerg Dorchain <joerg@dorchain.net>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Laurent Vivier <Laurent@lvivier.info>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: unsik Kim <donari75@gmail.com>

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8df436f..b99a2a6 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1359,7 +1359,7 @@ static void redo_fd_request(void)
 #endif
 		block = CURRENT->sector + cnt;
 		if ((int)block > floppy->blocks) {
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 
@@ -1373,11 +1373,11 @@ static void redo_fd_request(void)
 
 		if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) {
 			printk(KERN_WARNING "do_fd_request: unknown command\n");
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 		if (get_track(drive, track) == -1) {
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 
@@ -1391,7 +1391,7 @@ static void redo_fd_request(void)
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				end_request(CURRENT, 0);
+				__blk_end_request_cur(CURRENT, -EIO);
 				goto repeat;
 			}
 			/*
@@ -1410,7 +1410,7 @@ static void redo_fd_request(void)
 	CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 	CURRENT->sector += CURRENT->current_nr_sectors;
 
-	end_request(CURRENT, 1);
+	__blk_end_request_cur(CURRENT, 0);
 	goto repeat;
 }
 
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4234c11..44a8702 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -612,7 +612,7 @@ static void fd_error( void )
 	CURRENT->errors++;
 	if (CURRENT->errors >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 	}
 	else if (CURRENT->errors == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -734,7 +734,7 @@ static void do_fd_action( int drive )
 			/* all sectors finished */
 			CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 			CURRENT->sector += CURRENT->current_nr_sectors;
-			end_request(CURRENT, 1);
+			__blk_end_request_cur(CURRENT, 0);
 			redo_fd_request();
 			return;
 		    }
@@ -1141,7 +1141,7 @@ static void fd_rwsec_done1(int status)
 		/* all sectors finished */
 		CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 		CURRENT->sector += CURRENT->current_nr_sectors;
-		end_request(CURRENT, 1);
+		__blk_end_request_cur(CURRENT, 0);
 		redo_fd_request();
 	}
 	return;
@@ -1414,7 +1414,7 @@ repeat:
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 		goto repeat;
 	}
 		
@@ -1430,12 +1430,12 @@ repeat:
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1445,7 +1445,7 @@ repeat:
 	}
 	
 	if (CURRENT->sector + 1 > UDT->blocks) {
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 		goto repeat;
 	}
 
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index baaa9e4..5cb300b 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -410,7 +410,7 @@ static void bad_rw_intr(void)
 	if (req != NULL) {
 		struct hd_i_struct *disk = req->rq_disk->private_data;
 		if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			disk->special_op = disk->recalibrate = 1;
 		} else if (req->errors % RESET_FREQ == 0)
 			reset = 1;
@@ -466,7 +466,7 @@ ok_to_read:
 		req->buffer+512);
 #endif
 	if (req->current_nr_sectors <= 0)
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	if (i > 0) {
 		SET_HANDLER(&read_intr);
 		return;
@@ -505,7 +505,7 @@ ok_to_write:
 	--req->current_nr_sectors;
 	req->buffer += 512;
 	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	if (i > 0) {
 		SET_HANDLER(&write_intr);
 		outsw(HD_DATA, req->buffer, 256);
@@ -548,7 +548,7 @@ static void hd_times_out(unsigned long dummy)
 #ifdef DEBUG
 		printk("%s: too many errors\n", name);
 #endif
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 	}
 	hd_request();
 	spin_unlock_irq(hd_queue->queue_lock);
@@ -563,7 +563,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req)
 	}
 	if (disk->head > 16) {
 		printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 	}
 	disk->special_op = 0;
 	return 1;
@@ -607,7 +607,7 @@ repeat:
 	    ((block+nsect) > get_capacity(req->rq_disk))) {
 		printk("%s: bad access: block=%d, count=%d\n",
 			req->rq_disk->disk_name, block, nsect);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		goto repeat;
 	}
 
@@ -647,7 +647,7 @@ repeat:
 			break;
 		default:
 			printk("unknown hd-command\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			break;
 		}
 	}
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index f389835..408c2bd 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -285,7 +285,7 @@ static void mg_bad_rw_intr(struct mg_host *host)
 	if (req != NULL)
 		if (++req->errors >= MG_MAX_ERRORS ||
 				host->error == MG_ERR_TIMEOUT)
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 }
 
 static unsigned int mg_out(struct mg_host *host,
@@ -351,7 +351,7 @@ static void mg_read(struct request *req)
 
 		if (req->current_nr_sectors <= 0) {
 			MG_DBG("remain : %d sects\n", remains);
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			if (remains > 0)
 				req = elv_next_request(host->breq);
 		}
@@ -395,7 +395,7 @@ static void mg_write(struct request *req)
 
 		if (req->current_nr_sectors <= 0) {
 			MG_DBG("remain : %d sects\n", remains);
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			if (remains > 0)
 				req = elv_next_request(host->breq);
 		}
@@ -448,7 +448,7 @@ ok_to_read:
 
 	/* let know if current segment done */
 	if (req->current_nr_sectors <= 0)
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 
 	/* set handler if read remains */
 	if (i > 0) {
@@ -497,7 +497,7 @@ ok_to_write:
 
 	/* let know if current segment or all done */
 	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 
 	/* write 1 sector and set handler if remains */
 	if (i > 0) {
@@ -563,7 +563,7 @@ static void mg_request_poll(struct request_queue *q)
 			default:
 				printk(KERN_WARNING "%s:%d unknown command\n",
 						__func__, __LINE__);
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				break;
 			}
 		}
@@ -617,7 +617,7 @@ static unsigned int mg_issue_req(struct request *req,
 	default:
 		printk(KERN_WARNING "%s:%d unknown command\n",
 				__func__, __LINE__);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		break;
 	}
 	return MG_ERR_NONE;
@@ -655,7 +655,7 @@ static void mg_request(struct request_queue *q)
 					"%s: bad access: sector=%d, count=%d\n",
 					req->rq_disk->disk_name,
 					sect_num, sect_cnt);
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index e91d4b4..9fd57c2 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -735,16 +735,16 @@ static void do_pcd_request(struct request_queue * q)
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
 		} else
-			end_request(pcd_req, 0);
+			__blk_end_request_cur(pcd_req, -EIO);
 	}
 }
 
-static inline void next_request(int success)
+static inline void next_request(int err)
 {
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pcd_lock, saved_flags);
-	end_request(pcd_req, success);
+	__blk_end_request_cur(pcd_req, err);
 	pcd_busy = 0;
 	do_pcd_request(pcd_queue);
 	spin_unlock_irqrestore(&pcd_lock, saved_flags);
@@ -781,7 +781,7 @@ static void pcd_start(void)
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
@@ -796,7 +796,7 @@ static void do_pcd_read(void)
 	pcd_retries = 0;
 	pcd_transfer();
 	if (!pcd_count) {
-		next_request(1);
+		next_request(0);
 		return;
 	}
 
@@ -815,7 +815,7 @@ static void do_pcd_read_drq(void)
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9299455..0732df4 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -410,7 +410,8 @@ static void run_fsm(void)
 				pd_claimed = 0;
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
-				end_request(pd_req, res);
+				__blk_end_request_cur(pd_req,
+						      res == Ok ? 0 : -EIO);
 				pd_req = elv_next_request(pd_queue);
 				if (!pd_req)
 					stop = 1;
@@ -477,7 +478,7 @@ static int pd_next_buf(void)
 	if (pd_count)
 		return 0;
 	spin_lock_irqsave(&pd_lock, saved_flags);
-	end_request(pd_req, 1);
+	__blk_end_request_cur(pd_req, 0);
 	pd_count = pd_req->current_nr_sectors;
 	pd_buf = pd_req->buffer;
 	spin_unlock_irqrestore(&pd_lock, saved_flags);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index bef3b99..3871e35 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -750,10 +750,10 @@ static int pf_ready(void)
 
 static struct request_queue *pf_queue;
 
-static void pf_end_request(int uptodate)
+static void pf_end_request(int err)
 {
 	if (pf_req) {
-		end_request(pf_req, uptodate);
+		__blk_end_request_cur(pf_req, err);
 		pf_req = NULL;
 	}
 }
@@ -773,7 +773,7 @@ repeat:
 	pf_count = pf_req->current_nr_sectors;
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(0);
+		pf_end_request(-EIO);
 		goto repeat;
 	}
 
@@ -788,7 +788,7 @@ repeat:
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(0);
+		pf_end_request(-EIO);
 		goto repeat;
 	}
 }
@@ -805,7 +805,7 @@ static int pf_next_buf(void)
 		return 1;
 	if (!pf_count) {
 		spin_lock_irqsave(&pf_spin_lock, saved_flags);
-		pf_end_request(1);
+		pf_end_request(0);
 		pf_req = elv_next_request(pf_queue);
 		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 		if (!pf_req)
@@ -816,12 +816,12 @@ static int pf_next_buf(void)
 	return 0;
 }
 
-static inline void next_request(int success)
+static inline void next_request(int err)
 {
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
-	pf_end_request(success);
+	pf_end_request(err);
 	pf_busy = 0;
 	do_pf_request(pf_queue);
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
@@ -844,7 +844,7 @@ static void do_pf_read_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -863,7 +863,7 @@ static void do_pf_read_drq(void)
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(0);
+			next_request(-EIO);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -871,7 +871,7 @@ static void do_pf_read_drq(void)
 			break;
 	}
 	pi_disconnect(pf_current->pi);
-	next_request(1);
+	next_request(0);
 }
 
 static void do_pf_write(void)
@@ -890,7 +890,7 @@ static void do_pf_write_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
@@ -903,7 +903,7 @@ static void do_pf_write_start(void)
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(0);
+			next_request(-EIO);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -923,11 +923,11 @@ static void do_pf_write_done(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 	pi_disconnect(pf_current->pi);
-	next_request(1);
+	next_request(0);
 }
 
 static int __init pf_init(void)
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index bccc42b..d23b54b 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		return 0;
 	}
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		return 0;
 	}
 
@@ -205,7 +205,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 				break;
 		} else {
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 	}
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index d22cc38..6544a7b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -532,39 +532,39 @@ static void redo_fd_request(struct request_queue *q)
 
 		fs = req->rq_disk->private_data;
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (req->current_nr_sectors == 0) {
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			continue;
 		}
 		if (!fs->disk_in) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (rq_data_dir(req) == WRITE) {
 			if (fs->write_protected) {
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
 		}
 		switch (rq_data_dir(req)) {
 		case WRITE:
 			/* NOT IMPLEMENTED */
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			break;
 		case READ:
 			if (floppy_read_sectors(fs, req->sector,
 						req->current_nr_sectors,
 						req->buffer)) {
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
 			req->nr_sectors -= req->current_nr_sectors;
 			req->sector += req->current_nr_sectors;
 			req->buffer += req->current_nr_sectors * 512;
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			break;
 		}
 	}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 6129653..5904f7b 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -320,15 +320,15 @@ static void start_request(struct floppy_state *fs)
 #endif
 
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (req->current_nr_sectors == 0) {
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			continue;
 		}
 		if (fs->ejected) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
@@ -336,7 +336,7 @@ static void start_request(struct floppy_state *fs)
 			if (fs->write_prot < 0)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
 		}
@@ -508,7 +508,7 @@ static void act(struct floppy_state *fs)
 		case do_transfer:
 			if (fs->cur_cyl != fs->req_cyl) {
 				if (fs->retries > 5) {
-					end_request(fd_req, 0);
+					__blk_end_request_cur(fd_req, -EIO);
 					fs->state = idle;
 					return;
 				}
@@ -540,7 +540,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		end_request(fd_req, 0);
+		__blk_end_request_cur(fd_req, -EIO);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -559,7 +559,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	printk(KERN_ERR "swim3: seek timeout\n");
-	end_request(fd_req, 0);
+	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -583,7 +583,7 @@ static void settle_timeout(unsigned long data)
 		return;
 	}
 	printk(KERN_ERR "swim3: seek settle timeout\n");
-	end_request(fd_req, 0);
+	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -615,7 +615,7 @@ static void xfer_timeout(unsigned long data)
 	fd_req->current_nr_sectors -= s;
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
 	       (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector);
-	end_request(fd_req, 0);
+	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -646,7 +646,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk(KERN_ERR "swim3: seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					end_request(fd_req, 0);
+					__blk_end_request_cur(fd_req, -EIO);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -731,7 +731,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk("swim3: error %sing block %ld (err=%x)\n",
 				       rq_data_dir(fd_req) == WRITE? "writ": "read",
 				       (long)fd_req->sector, err);
-				end_request(fd_req, 0);
+				__blk_end_request_cur(fd_req, -EIO);
 				fs->state = idle;
 			}
 		} else {
@@ -740,7 +740,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid);
 				printk(KERN_ERR "  state=%d, dir=%x, intr=%x, err=%x\n",
 				       fs->state, rq_data_dir(fd_req), intr, err);
-				end_request(fd_req, 0);
+				__blk_end_request_cur(fd_req, -EIO);
 				fs->state = idle;
 				start_request(fs);
 				break;
@@ -749,7 +749,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 			fd_req->current_nr_sectors -= fs->scount;
 			fd_req->buffer += fs->scount * 512;
 			if (fd_req->current_nr_sectors <= 0) {
-				end_request(fd_req, 1);
+				__blk_end_request_cur(fd_req, 0);
 				fs->state = idle;
 			} else {
 				fs->req_sector += fs->scount;
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 64b496f..6f6ad82 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -314,21 +314,22 @@ static void do_xd_request (struct request_queue * q)
 		int retry;
 
 		if (!blk_fs_request(req)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (block + count > get_capacity(req->rq_disk)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (rw != READ && rw != WRITE) {
 			printk("do_xd_request: unknown request\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
 			res = xd_readwrite(rw, disk, req->buffer, block, count);
-		end_request(req, res);	/* wrap up, 0 = fail, 1 = success */
+		/* wrap up, 0 = success, -errno = fail */
+		__blk_end_request_cur(req, res);
 	}
 }
 
@@ -418,7 +419,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_
 				printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write"));
 				xd_recalibrate(drive);
 				spin_lock_irq(&xd_lock);
-				return (0);
+				return -EIO;
 			case 2:
 				if (sense[0] & 0x30) {
 					printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing"));
@@ -439,7 +440,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_
 				else
 					printk(" - no valid disk address\n");
 				spin_lock_irq(&xd_lock);
-				return (0);
+				return -EIO;
 		}
 		if (xd_dma_buffer)
 			for (i=0; i < (temp * 0x200); i++)
@@ -448,7 +449,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_
 		count -= temp, buffer += temp * 0x200, block += temp;
 	}
 	spin_lock_irq(&xd_lock);
-	return (1);
+	return 0;
 }
 
 /* xd_recalibrate: recalibrate a given drive and reset controller if necessary */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index cd6cfe3..b456447 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -302,7 +302,7 @@ static void do_blkif_request(struct request_queue *rq)
 	while ((req = elv_next_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
 		if (!blk_fs_request(req)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 4aecf5d..b1e1d7e 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -466,7 +466,7 @@ struct request *ace_get_next_request(struct request_queue * q)
 	while ((req = elv_next_request(q)) != NULL) {
 		if (blk_fs_request(req))
 			break;
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 	}
 	return req;
 }
@@ -494,7 +494,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Drop all pending requests */
 		while ((req = elv_next_request(ace->queue)) != NULL)
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 80754cd..b66ad58 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -77,7 +77,7 @@ static void do_z2_request(struct request_queue *q)
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
 				req->sector, req->current_nr_sectors);
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		while (len) {
@@ -93,7 +93,7 @@ static void do_z2_request(struct request_queue *q)
 			start += size;
 			len -= size;
 		}
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	}
 }
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index fee9a9e..cab2b1f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -654,17 +654,17 @@ static void gdrom_request(struct request_queue *rq)
 	while ((req = elv_next_request(rq)) != NULL) {
 		if (!blk_fs_request(req)) {
 			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 		}
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_NOTICE "GDROM: Read only device -");
 			printk(" write request ignored\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 		}
 		if (req->nr_sectors)
 			gdrom_request_handler_dma(req);
 		else
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 	}
 }
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index a443e13..221317e 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -923,7 +923,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 				break;
 			}
 		} else
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 	}
 };
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index a49a9c8..76c4c8d 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -54,33 +54,33 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
 	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return !tr->discard(dev, block, nsect);
+		return tr->discard(dev, block, nsect);
 
 	if (!blk_fs_request(req))
-		return 0;
+		return -EIO;
 
 	if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk))
-		return 0;
+		return -EIO;
 
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return 0;
-		return 1;
+				return -EIO;
+		return 0;
 
 	case WRITE:
 		if (!tr->writesect)
-			return 0;
+			return -EIO;
 
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return 0;
-		return 1;
+				return -EIO;
+		return 0;
 
 	default:
 		printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
-		return 0;
+		return -EIO;
 	}
 }
 
@@ -96,7 +96,7 @@ static int mtd_blktrans_thread(void *arg)
 	while (!kthread_should_stop()) {
 		struct request *req;
 		struct mtd_blktrans_dev *dev;
-		int res = 0;
+		int res;
 
 		req = elv_next_request(rq);
 
@@ -119,7 +119,7 @@ static int mtd_blktrans_thread(void *arg)
 
 		spin_lock_irq(rq->queue_lock);
 
-		end_request(req, res);
+		__blk_end_request_cur(req, res);
 	}
 	spin_unlock_irq(rq->queue_lock);
 
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index a85ad05..0961788 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -192,25 +192,25 @@ static void jsfd_do_request(struct request_queue *q)
 		size_t len = req->current_nr_sectors << 9;
 
 		if ((offset + len) > jdp->dsize) {
-               		end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_ERR "jsfd: write\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
 		if ((jdp->dbase & 0xff000000) != 0x20000000) {
 			printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase);
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
 		jsfd_read(req->buffer, jdp->dbase + offset, len);
 
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	}
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e33c835..cfeb3c2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,9 +845,8 @@ extern unsigned int blk_rq_cur_bytes(struct request *rq);
  * blk_update_request() completes given number of bytes and updates
  * the request without completing it.
  *
- * blk_end_request() and friends.  __blk_end_request() and
- * end_request() must be called with the request queue spinlock
- * acquired.
+ * blk_end_request() and friends.  __blk_end_request() must be called
+ * with the request queue spinlock acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
@@ -899,6 +898,19 @@ static inline void blk_end_request_all(struct request *rq, int error)
 }
 
 /**
+ * blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.
+ */
+static inline void blk_end_request_cur(struct request *rq, int error)
+{
+	blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+}
+
+/**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
@@ -934,29 +946,17 @@ static inline void __blk_end_request_all(struct request *rq, int error)
 }
 
 /**
- * end_request - end I/O on the current segment of the request
- * @rq:		the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
+ * __blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
  *
  * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
- **/
-static inline void end_request(struct request *rq, int uptodate)
+ *     Complete the current consecutively mapped chunk from @rq.  Must
+ *     be called with queue lock held.
+ */
+static inline void __blk_end_request_cur(struct request *rq, int error)
 {
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0);
+	__blk_end_request(rq, error, rq->hard_cur_sectors << 9);
 }
 
 extern void blk_complete_request(struct request *);
-- 
cgit v0.10.2


From ec24751a6b57e1373a12361e581b2458bc9bb791 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:20 +0900
Subject: arm-omap: don't abuse rq->data

omap mailbox uses rq->data as the second opaque pointer to carry
mbox_msg_t and rq->special message argument which is needed only for
tx.  Add and use omap_msg_tx_data struct for tx and use rq->special
for mbox_msg_t for rx such that only rq->special is used as opaque
pointer.

[ Impact: cleanup rq->data usage, extra kmalloc in msg_send ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Russell King <rmk@arm.linux.org.uk>

diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index cf81bad..538ba75 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -147,24 +147,40 @@ static int __mbox_msg_send(struct omap_mbox *mbox, mbox_msg_t msg, void *arg)
 	return ret;
 }
 
+struct omap_msg_tx_data {
+	mbox_msg_t	msg;
+	void		*arg;
+};
+
+static void omap_msg_tx_end_io(struct request *rq, int error)
+{
+	kfree(rq->special);
+	__blk_put_request(rq->q, rq);
+}
+
 int omap_mbox_msg_send(struct omap_mbox *mbox, mbox_msg_t msg, void* arg)
 {
+	struct omap_msg_tx_data *tx_data;
 	struct request *rq;
 	struct request_queue *q = mbox->txq->queue;
-	int ret = 0;
+
+	tx_data = kmalloc(sizeof(*tx_data), GFP_ATOMIC);
+	if (unlikely(!tx_data))
+		return -ENOMEM;
 
 	rq = blk_get_request(q, WRITE, GFP_ATOMIC);
 	if (unlikely(!rq)) {
-		ret = -ENOMEM;
-		goto fail;
+		kfree(tx_data);
+		return -ENOMEM;
 	}
 
-	rq->data = (void *)msg;
-	blk_insert_request(q, rq, 0, arg);
+	tx_data->msg = msg;
+	tx_data->arg = arg;
+	rq->end_io = omap_msg_tx_end_io;
+	blk_insert_request(q, rq, 0, tx_data);
 
 	schedule_work(&mbox->txq->work);
- fail:
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(omap_mbox_msg_send);
 
@@ -178,6 +194,8 @@ static void mbox_tx_work(struct work_struct *work)
 	struct request_queue *q = mbox->txq->queue;
 
 	while (1) {
+		struct omap_msg_tx_data *tx_data;
+
 		spin_lock(q->queue_lock);
 		rq = elv_next_request(q);
 		spin_unlock(q->queue_lock);
@@ -185,7 +203,9 @@ static void mbox_tx_work(struct work_struct *work)
 		if (!rq)
 			break;
 
-		ret = __mbox_msg_send(mbox, (mbox_msg_t) rq->data, rq->special);
+		tx_data = rq->special;
+
+		ret = __mbox_msg_send(mbox, tx_data->msg, tx_data->arg);
 		if (ret) {
 			enable_mbox_irq(mbox, IRQ_TX);
 			return;
@@ -222,7 +242,7 @@ static void mbox_rx_work(struct work_struct *work)
 		if (!rq)
 			break;
 
-		msg = (mbox_msg_t) rq->data;
+		msg = (mbox_msg_t)rq->special;
 		blk_end_request_all(rq, 0);
 		mbox->rxq->callback((void *)msg);
 	}
@@ -260,7 +280,6 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox)
 			goto nomem;
 
 		msg = mbox_fifo_read(mbox);
-		rq->data = (void *)msg;
 
 		if (unlikely(mbox_seq_test(mbox, msg))) {
 			pr_info("mbox: Illegal seq bit!(%08x)\n", msg);
@@ -268,7 +287,7 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox)
 				mbox->err_notify();
 		}
 
-		blk_insert_request(q, rq, 0, NULL);
+		blk_insert_request(q, rq, 0, (void *)msg);
 		if (mbox->ops->type == OMAP_MBOX_TYPE1)
 			break;
 	}
@@ -331,7 +350,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf)
 		if (!rq)
 			break;
 
-		*p = (mbox_msg_t) rq->data;
+		*p = (mbox_msg_t)rq->special;
 
 		blk_end_request_all(rq, 0);
 
-- 
cgit v0.10.2


From 731ec497e5888c6792ad62613ae9be97eebcd7ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:20 +0900
Subject: block: kill rq->data

Now that all block request data transfer is done via bio, rq->data
isn't used.  Kill it.

While at it, make the roles of rq->special and buffer clear.

[ Impact: drop now unncessary field from struct request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Boaz Harrosh <bharrosh@panasas.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 0520cc7..6dd180c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -189,10 +189,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 						(unsigned long long)rq->sector,
 						rq->nr_sectors,
 						rq->current_nr_sectors);
-	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, data %p, len %u\n",
+	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 						rq->bio, rq->biotail,
-						rq->buffer, rq->data,
-						rq->data_len);
+						rq->buffer, rq->data_len);
 
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
diff --git a/block/blk-map.c b/block/blk-map.c
index f103729..694fefa 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -156,7 +156,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 unmap_rq:
 	blk_rq_unmap_user(bio);
@@ -235,7 +235,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 	blk_queue_bounce(q, &bio);
 	bio_get(bio);
 	blk_rq_bio_prep(q, rq, bio);
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
@@ -313,7 +313,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 
 	blk_rq_bio_prep(q, rq, bio);
 	blk_queue_bounce(q, &rq->bio);
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 82a0ca2..bb9aa43 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -500,7 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->data = NULL;
 	rq->data_len = 0;
 	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 756ac7c..aa9fc57 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1088,7 +1088,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 			return ret;
 	} else {
 		BUG_ON(req->data_len);
-		BUG_ON(req->data);
 
 		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 		req->buffer = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cfeb3c2..12c545e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -211,8 +211,8 @@ struct request {
 
 	unsigned short ioprio;
 
-	void *special;
-	char *buffer;
+	void *special;		/* opaque pointer available for LLD use */
+	char *buffer;		/* kaddr of the current segment if available */
 
 	int tag;
 	int errors;
@@ -229,7 +229,6 @@ struct request {
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
-	void *data;
 	void *sense;
 
 	unsigned long deadline;
-- 
cgit v0.10.2


From c2553b5844b06910435e40cfab9e6f384840cb97 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 24 Apr 2009 08:10:11 +0200
Subject: block: make blk_do_io_stat() do the full "is this rq accountable"
 checks

We currently check for file system requests outside of blk_do_io_stat(rq),
but we may as well just include it.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 6dd180c..1e3b97f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -68,7 +68,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 	int rw = rq_data_dir(rq);
 	int cpu;
 
-	if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
+	if (!blk_do_io_stat(rq))
 		return;
 
 	cpu = part_stat_lock();
@@ -1639,10 +1639,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request);
 
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
-	if (!blk_do_io_stat(req))
-		return;
-
-	if (blk_fs_request(req)) {
+	if (blk_do_io_stat(req)) {
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
@@ -1656,15 +1653,12 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 
 static void blk_account_io_done(struct request *req)
 {
-	if (!blk_do_io_stat(req))
-		return;
-
 	/*
 	 * Account IO completion.  bar_rq isn't accounted as a normal
 	 * IO on queueing nor completion.  Accounting the containing
 	 * request is enough.
 	 */
-	if (blk_fs_request(req) && req != &req->q->bar_rq) {
+	if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
diff --git a/block/blk.h b/block/blk.h
index 9b2c324..404c10b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -149,9 +149,16 @@ static inline int blk_cpu_to_group(int cpu)
 #endif
 }
 
+/*
+ * Contribute to IO statistics IFF:
+ *
+ *	a) it's attached to a gendisk, and
+ *	b) the queue had IO stats enabled when this request was started, and
+ *	c) it's a file system request
+ */
 static inline int blk_do_io_stat(struct request *rq)
 {
-	return rq->rq_disk && blk_rq_io_stat(rq);
+	return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq);
 }
 
 #endif
-- 
cgit v0.10.2


From c69d48540c201394d08cb4d48b905e001313d9b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 24 Apr 2009 08:12:19 +0200
Subject: block: include discard requests in IO accounting

We currently don't do merging on discard requests, but we potentially
could. If we do, then we need to include discard requests in the IO
accounting, or merging would end up decrementing in_flight IO counters
for an IO which never incremented them.

So enable accounting for discard requests.

Problem found by Nikanth Karthikesan <knikanth@suse.de>

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk.h b/block/blk.h
index 404c10b..5111559 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -158,7 +158,8 @@ static inline int blk_cpu_to_group(int cpu)
  */
 static inline int blk_do_io_stat(struct request *rq)
 {
-	return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq);
+	return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq) &&
+		blk_discard_rq(rq);
 }
 
 #endif
-- 
cgit v0.10.2


From 9eb55b030c4b3227334ee4482402096cd1d1a6fe Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Mon, 27 Apr 2009 14:53:54 +0200
Subject: block: catch trying to use more bits than request->cmd_flags has

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 1e3b97f..394c5bd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2097,6 +2097,9 @@ EXPORT_SYMBOL(kblockd_schedule_work);
 
 int __init blk_dev_init(void)
 {
+	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
+			sizeof(((struct request *)0)->cmd_flags));
+
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
-- 
cgit v0.10.2


From 9fd8d0e1bcb848257968d9a7d73ca4d890ea8bd1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:04 +0900
Subject: block: make blk_end_request_cur() return bool

In the process of mindlessly copying [__]blk_end_request_all(),
[__]blk_end_request_cur() ended up returning void even though they're
partial completion functions.  Fix it.

[ Impact: fix braindead API ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 12c545e..3a5b1bd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -903,10 +903,14 @@ static inline void blk_end_request_all(struct request *rq, int error)
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  */
-static inline void blk_end_request_cur(struct request *rq, int error)
+static inline bool blk_end_request_cur(struct request *rq, int error)
 {
-	blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return blk_end_request(rq, error, rq->hard_cur_sectors << 9);
 }
 
 /**
@@ -952,10 +956,14 @@ static inline void __blk_end_request_all(struct request *rq, int error)
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
  *     be called with queue lock held.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  */
-static inline void __blk_end_request_cur(struct request *rq, int error)
+static inline bool __blk_end_request_cur(struct request *rq, int error)
 {
-	__blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return __blk_end_request(rq, error, rq->hard_cur_sectors << 9);
 }
 
 extern void blk_complete_request(struct request *);
-- 
cgit v0.10.2


From 4c94dece1baf320d925cedb231489c4e0358ac5a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:05 +0900
Subject: block: don't init rq fields unnecessarily

blk_get_request() always returns properly zeroed requests.  Don't set
fields to zero/NULL unnecessarily.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index bb9aa43..58cf456 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -500,8 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->data_len = 0;
-	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;
-- 
cgit v0.10.2


From 5b5c5d12b91cb6b2a2967f06aef35d59008dc2e7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:06 +0900
Subject: amiflop,ataflop,xd,mg_disk: clean up unnecessary stuff from block
 drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rq_data_dir() can only be READ or WRITE and rq->sector and nr_sectors
are always automatically updated after partial request completion.
Don't worry about rq_data_dir() not being either READ or WRITE or
manually update sector and nr_sectors.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jörg Dorchain <joerg@dorchain.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index b99a2a6..8ff95f2 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1371,11 +1371,6 @@ static void redo_fd_request(void)
 		       "0x%08lx\n", track, sector, data);
 #endif
 
-		if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) {
-			printk(KERN_WARNING "do_fd_request: unknown command\n");
-			__blk_end_request_cur(CURRENT, -EIO);
-			goto repeat;
-		}
 		if (get_track(drive, track) == -1) {
 			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
@@ -1407,8 +1402,6 @@ static void redo_fd_request(void)
 			break;
 		}
 	}
-	CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
-	CURRENT->sector += CURRENT->current_nr_sectors;
 
 	__blk_end_request_cur(CURRENT, 0);
 	goto repeat;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 44a8702..2506728 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -732,8 +732,6 @@ static void do_fd_action( int drive )
 		    }
 		    else {
 			/* all sectors finished */
-			CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
-			CURRENT->sector += CURRENT->current_nr_sectors;
 			__blk_end_request_cur(CURRENT, 0);
 			redo_fd_request();
 			return;
@@ -1139,8 +1137,6 @@ static void fd_rwsec_done1(int status)
 	}
 	else {
 		/* all sectors finished */
-		CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
-		CURRENT->sector += CURRENT->current_nr_sectors;
 		__blk_end_request_cur(CURRENT, 0);
 		redo_fd_request();
 	}
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 408c2bd..2c00ad9 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -560,11 +560,6 @@ static void mg_request_poll(struct request_queue *q)
 			case WRITE:
 				mg_write(req);
 				break;
-			default:
-				printk(KERN_WARNING "%s:%d unknown command\n",
-						__func__, __LINE__);
-				__blk_end_request_cur(req, -EIO);
-				break;
 			}
 		}
 	}
@@ -614,11 +609,6 @@ static unsigned int mg_issue_req(struct request *req,
 		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
 		break;
-	default:
-		printk(KERN_WARNING "%s:%d unknown command\n",
-				__func__, __LINE__);
-		__blk_end_request_cur(req, -EIO);
-		break;
 	}
 	return MG_ERR_NONE;
 }
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 6f6ad82..14be4c1 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -308,7 +308,6 @@ static void do_xd_request (struct request_queue * q)
 	while ((req = elv_next_request(q)) != NULL) {
 		unsigned block = req->sector;
 		unsigned count = req->nr_sectors;
-		int rw = rq_data_dir(req);
 		XD_INFO *disk = req->rq_disk->private_data;
 		int res = 0;
 		int retry;
@@ -321,13 +320,9 @@ static void do_xd_request (struct request_queue * q)
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
-		if (rw != READ && rw != WRITE) {
-			printk("do_xd_request: unknown request\n");
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
 		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
-			res = xd_readwrite(rw, disk, req->buffer, block, count);
+			res = xd_readwrite(rq_data_dir(req), disk, req->buffer,
+					   block, count);
 		/* wrap up, 0 = success, -errno = fail */
 		__blk_end_request_cur(req, res);
 	}
-- 
cgit v0.10.2


From cd4c34ebec155e5c10f897d9bebf618248121e70 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:07 +0900
Subject: ps3disk: simplify request completion

ps3disk_interrupt() always completes requests fully but it uses
rq->hard_cur_sectors for FLUSH requests for some reason.  Drop them
and simply use __blk_end_request_all().

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d23b54b..f6586e4 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -231,7 +231,6 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	struct request *req;
 	int res, read, error;
 	u64 tag, status;
-	unsigned long num_sectors;
 	const char *op;
 
 	res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status);
@@ -261,11 +260,9 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
 	    req->cmd[0] == REQ_LB_OP_FLUSH) {
 		read = 0;
-		num_sectors = req->hard_cur_sectors;
 		op = "flush";
 	} else {
 		read = !rq_data_dir(req);
-		num_sectors = req->nr_sectors;
 		op = read ? "read" : "write";
 	}
 	if (status) {
@@ -281,7 +278,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	}
 
 	spin_lock(&priv->lock);
-	__blk_end_request(req, error, num_sectors << 9);
+	__blk_end_request_all(req, error);
 	priv->req = NULL;
 	ps3disk_do_request(dev, priv->queue);
 	spin_unlock(&priv->lock);
-- 
cgit v0.10.2


From 044208506d35bd62396c4673176e2c12393905b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:08 +0900
Subject: sunvdc: kill vdc_end_request()

vdc_end_request() is a thin silly wrapper on top of
__blk_end_request().  Kill it.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5861e33..f59887c 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -212,11 +212,6 @@ static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
 	vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
 }
 
-static void vdc_end_request(struct request *req, int error, int num_sectors)
-{
-	__blk_end_request(req, error, num_sectors << 9);
-}
-
 static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 			unsigned int index)
 {
@@ -239,7 +234,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
 	rqe->req = NULL;
 
-	vdc_end_request(req, (desc->status ? -EIO : 0), desc->size >> 9);
+	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
 
 	if (blk_queue_stopped(port->disk->queue))
 		blk_start_queue(port->disk->queue);
@@ -453,7 +448,7 @@ static void do_vdc_request(struct request_queue *q)
 
 		blkdev_dequeue_request(req);
 		if (__send_request(req) < 0)
-			vdc_end_request(req, -EIO, req->hard_nr_sectors);
+			__blk_end_request_all(req, -EIO);
 	}
 }
 
-- 
cgit v0.10.2


From 4d6c84d91d1a539ebc47d1a36a35e9390ba11fdc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:09 +0900
Subject: ubd: cleanup completion path

ubd had its own block request partial completion mechanism, which is
unnecessary as block layer already does it.  Kill ubd_end_request()
and ubd_finish() and replace them with direct call to
blk_end_request().

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f934225..36ca9fa 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -451,23 +451,6 @@ static void do_ubd_request(struct request_queue * q);
 
 /* Only changed by ubd_init, which is an initcall. */
 static int thread_fd = -1;
-
-static void ubd_end_request(struct request *req, int bytes, int error)
-{
-	blk_end_request(req, error, bytes);
-}
-
-/* Callable only from interrupt context - otherwise you need to do
- * spin_lock_irq()/spin_lock_irqsave() */
-static inline void ubd_finish(struct request *req, int bytes)
-{
-	if(bytes < 0){
-		ubd_end_request(req, 0, -EIO);
-		return;
-	}
-	ubd_end_request(req, bytes, 0);
-}
-
 static LIST_HEAD(restart);
 
 /* XXX - move this inside ubd_intr. */
@@ -475,7 +458,6 @@ static LIST_HEAD(restart);
 static void ubd_handler(void)
 {
 	struct io_thread_req *req;
-	struct request *rq;
 	struct ubd *ubd;
 	struct list_head *list, *next_ele;
 	unsigned long flags;
@@ -492,10 +474,7 @@ static void ubd_handler(void)
 			return;
 		}
 
-		rq = req->req;
-		rq->nr_sectors -= req->length >> 9;
-		if(rq->nr_sectors == 0)
-			ubd_finish(rq, rq->hard_nr_sectors << 9);
+		blk_end_request(req->req, 0, req->length);
 		kfree(req);
 	}
 	reactivate_fd(thread_fd, UBD_IRQ);
-- 
cgit v0.10.2


From f81f2f7c9fee307e371f37424577d46f9eaf8692 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:10 +0900
Subject: ubd: drop unnecessary rq->sector manipulation

ubd curiously updates rq->sector while issuing the request in multiple
pieces.  Don't do it and simply use local copy of sector.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 36ca9fa..4330127 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1222,7 +1222,8 @@ static void do_ubd_request(struct request_queue *q)
 {
 	struct io_thread_req *io_req;
 	struct request *req;
-	int n, last_sectors;
+	sector_t sector;
+	int n;
 
 	while(1){
 		struct ubd *dev = q->queuedata;
@@ -1238,11 +1239,10 @@ static void do_ubd_request(struct request_queue *q)
 		}
 
 		req = dev->request;
-		last_sectors = 0;
+		sector = req->sector;
 		while(dev->start_sg < dev->end_sg){
 			struct scatterlist *sg = &dev->sg[dev->start_sg];
 
-			req->sector += last_sectors;
 			io_req = kmalloc(sizeof(struct io_thread_req),
 					 GFP_ATOMIC);
 			if(io_req == NULL){
@@ -1251,10 +1251,10 @@ static void do_ubd_request(struct request_queue *q)
 				return;
 			}
 			prepare_request(req, io_req,
-					(unsigned long long) req->sector << 9,
+					(unsigned long long)sector << 9,
 					sg->offset, sg->length, sg_page(sg));
 
-			last_sectors = sg->length >> 9;
+			sector += sg->length >> 9;
 			n = os_write_file(thread_fd, &io_req,
 					  sizeof(struct io_thread_req *));
 			if(n != sizeof(struct io_thread_req *)){
-- 
cgit v0.10.2


From e091eb67af957bac4e4f7410c5d1aa263ee483a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:11 +0900
Subject: hd: clean up request completion paths

hd read/write_intr() functions manually manipulate request to
incrementally complete it, which block layer already supports.  Simply
use block layer completion routines instead of manual partial
completion.

While at it, clear unnecessary elv_next_request() check at the tail of
read_intr().  This also makes read and write_intr() more consistent.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 5cb300b..75b9ca9 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -452,32 +452,25 @@ static void read_intr(void)
 	bad_rw_intr();
 	hd_request();
 	return;
+
 ok_to_read:
 	req = CURRENT;
 	insw(HD_DATA, req->buffer, 256);
-	req->sector++;
-	req->buffer += 512;
-	req->errors = 0;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
 #ifdef DEBUG
 	printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n",
-		req->rq_disk->disk_name, req->sector, req->nr_sectors,
+		req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1,
 		req->buffer+512);
 #endif
-	if (req->current_nr_sectors <= 0)
-		__blk_end_request_cur(req, 0);
-	if (i > 0) {
+	if (__blk_end_request(req, 0, 512)) {
 		SET_HANDLER(&read_intr);
 		return;
 	}
+
 	(void) inb_p(HD_STATUS);
 #if (HD_DELAY > 0)
 	last_req = read_timer();
 #endif
-	if (elv_next_request(QUEUE))
-		hd_request();
-	return;
+	hd_request();
 }
 
 static void write_intr(void)
@@ -499,23 +492,18 @@ static void write_intr(void)
 	bad_rw_intr();
 	hd_request();
 	return;
+
 ok_to_write:
-	req->sector++;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
-	req->buffer += 512;
-	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		__blk_end_request_cur(req, 0);
-	if (i > 0) {
+	if (__blk_end_request(req, 0, 512)) {
 		SET_HANDLER(&write_intr);
 		outsw(HD_DATA, req->buffer, 256);
-	} else {
+		return;
+	}
+
 #if (HD_DELAY > 0)
-		last_req = read_timer();
+	last_req = read_timer();
 #endif
-		hd_request();
-	}
-	return;
+	hd_request();
 }
 
 static void recal_intr(void)
-- 
cgit v0.10.2


From 467ca759fc83fc35cb7d15aec0d74c62cffc4481 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:12 +0900
Subject: swim3: clean up request completion paths

swim3 curiously tries to update request parameters before calling
__blk_end_request() when __blk_end_request() will do it anyway, and it
updates request for partial completion manually instead of using
blk_update_request().  Also, it does some spurious checks on rq such
as testing whether rq->sector is negative or current_nr_sectors is
zero right after fetching.

Drop unnecessary stuff and use standard block layer mechanisms.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 5904f7b..4248559 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -319,14 +319,10 @@ static void start_request(struct floppy_state *fs)
 		       req->errors, req->current_nr_sectors);
 #endif
 
-		if (req->sector < 0 || req->sector >= fs->total_secs) {
+		if (req->sector >= fs->total_secs) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
-		if (req->current_nr_sectors == 0) {
-			__blk_end_request_cur(req, 0);
-			continue;
-		}
 		if (fs->ejected) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
@@ -593,8 +589,6 @@ static void xfer_timeout(unsigned long data)
 	struct floppy_state *fs = (struct floppy_state *) data;
 	struct swim3 __iomem *sw = fs->swim3;
 	struct dbdma_regs __iomem *dr = fs->dma;
-	struct dbdma_cmd *cp = fs->dma_cmd;
-	unsigned long s;
 	int n;
 
 	fs->timeout_pending = 0;
@@ -605,14 +599,6 @@ static void xfer_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
 	out_8(&sw->select, RELAX);
-	if (rq_data_dir(fd_req) == WRITE)
-		++cp;
-	if (ld_le16(&cp->xfer_status) != 0)
-		s = fs->scount - ((ld_le16(&cp->res_count) + 511) >> 9);
-	else
-		s = 0;
-	fd_req->sector += s;
-	fd_req->current_nr_sectors -= s;
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
 	       (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector);
 	__blk_end_request_cur(fd_req, -EIO);
@@ -719,9 +705,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 		if (intr & ERROR_INTR) {
 			n = fs->scount - 1 - resid / 512;
 			if (n > 0) {
-				fd_req->sector += n;
-				fd_req->current_nr_sectors -= n;
-				fd_req->buffer += n * 512;
+				blk_update_request(fd_req, 0, n << 9);
 				fs->req_sector += n;
 			}
 			if (fs->retries < 5) {
@@ -745,13 +729,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				start_request(fs);
 				break;
 			}
-			fd_req->sector += fs->scount;
-			fd_req->current_nr_sectors -= fs->scount;
-			fd_req->buffer += fs->scount * 512;
-			if (fd_req->current_nr_sectors <= 0) {
-				__blk_end_request_cur(fd_req, 0);
-				fs->state = idle;
-			} else {
+			if (__blk_end_request(fd_req, 0, fs->scount << 9)) {
 				fs->req_sector += fs->scount;
 				if (fs->req_sector > fs->secpertrack) {
 					fs->req_sector -= fs->secpertrack;
@@ -761,7 +739,8 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 					}
 				}
 				act(fs);
-			}
+			} else
+				fs->state = idle;
 		}
 		if (fs->state == idle)
 			start_request(fs);
-- 
cgit v0.10.2


From e138b4e08ef65771000fbe6d93d67e3960ff862b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:13 +0900
Subject: swim: clean up request completion paths

swim curiously tries to update request parameters before calling
__blk_end_request() when __blk_end_request() will do it anyway and
unnecessarily checks whether current_nr_sectors is zero right after
fetching.

Drop unnecessary stuff and use standard block layer mechanisms.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Laurent Vivier <Laurent@lvivier.info>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 6544a7b..97ef426 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -535,10 +535,6 @@ static void redo_fd_request(struct request_queue *q)
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
-		if (req->current_nr_sectors == 0) {
-			__blk_end_request_cur(req, 0);
-			continue;
-		}
 		if (!fs->disk_in) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
@@ -561,9 +557,6 @@ static void redo_fd_request(struct request_queue *q)
 				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
-			req->nr_sectors -= req->current_nr_sectors;
-			req->sector += req->current_nr_sectors;
-			req->buffer += req->current_nr_sectors * 512;
 			__blk_end_request_cur(req, 0);
 			break;
 		}
-- 
cgit v0.10.2


From eec9462088a26c046d4db3100796a340a50890b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:14 +0900
Subject: mg_disk: fold mg_disk.h into mg_disk.c

include/linux/mg_disk.h is used only by drivers/block/mg_disk.c.  No
reason to put it in a separate header.  Fold it into mg_disk.c.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 2c00ad9..2c6127e 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -22,10 +22,194 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
-#include <linux/mg_disk.h>
 
 #define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
 
+/* name for block device */
+#define MG_DISK_NAME "mgd"
+/* name for platform device */
+#define MG_DEV_NAME "mg_disk"
+
+#define MG_DISK_MAJ 0
+#define MG_DISK_MAX_PART 16
+#define MG_SECTOR_SIZE 512
+#define MG_MAX_SECTS 256
+
+/* Register offsets */
+#define MG_BUFF_OFFSET			0x8000
+#define MG_STORAGE_BUFFER_SIZE		0x200
+#define MG_REG_OFFSET			0xC000
+#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
+#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
+#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
+#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
+#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
+#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
+#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
+#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
+#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
+#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
+#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
+
+/* "Drive Select/Head Register" bit values */
+#define MG_REG_HEAD_MUST_BE_ON		0xA0 /* These 2 bits are always on */
+#define MG_REG_HEAD_DRIVE_MASTER	(0x00 | MG_REG_HEAD_MUST_BE_ON)
+#define MG_REG_HEAD_DRIVE_SLAVE		(0x10 | MG_REG_HEAD_MUST_BE_ON)
+#define MG_REG_HEAD_LBA_MODE		(0x40 | MG_REG_HEAD_MUST_BE_ON)
+
+
+/* "Device Control Register" bit values */
+#define MG_REG_CTRL_INTR_ENABLE			0x0
+#define MG_REG_CTRL_INTR_DISABLE		(0x1<<1)
+#define MG_REG_CTRL_RESET			(0x1<<2)
+#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH	0x0
+#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW	(0x1<<4)
+#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW		0x0
+#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH	(0x1<<5)
+#define MG_REG_CTRL_DPD_DISABLE			0x0
+#define MG_REG_CTRL_DPD_ENABLE			(0x1<<6)
+
+/* Status register bit */
+/* error bit in status register */
+#define MG_REG_STATUS_BIT_ERROR			0x01
+/* corrected error in status register */
+#define MG_REG_STATUS_BIT_CORRECTED_ERROR	0x04
+/* data request bit in status register */
+#define MG_REG_STATUS_BIT_DATA_REQ		0x08
+/* DSC - Drive Seek Complete */
+#define MG_REG_STATUS_BIT_SEEK_DONE		0x10
+/* DWF - Drive Write Fault */
+#define MG_REG_STATUS_BIT_WRITE_FAULT		0x20
+#define MG_REG_STATUS_BIT_READY			0x40
+#define MG_REG_STATUS_BIT_BUSY			0x80
+
+/* handy status */
+#define MG_STAT_READY	(MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE)
+#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | \
+				(MG_REG_STATUS_BIT_BUSY | \
+				 MG_REG_STATUS_BIT_WRITE_FAULT | \
+				 MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY)
+
+/* Error register */
+#define MG_REG_ERR_AMNF		0x01
+#define MG_REG_ERR_ABRT		0x04
+#define MG_REG_ERR_IDNF		0x10
+#define MG_REG_ERR_UNC		0x40
+#define MG_REG_ERR_BBK		0x80
+
+/* error code for others */
+#define MG_ERR_NONE		0
+#define MG_ERR_TIMEOUT		0x100
+#define MG_ERR_INIT_STAT	0x101
+#define MG_ERR_TRANSLATION	0x102
+#define MG_ERR_CTRL_RST		0x103
+#define MG_ERR_INV_STAT		0x104
+#define MG_ERR_RSTOUT		0x105
+
+#define MG_MAX_ERRORS	6	/* Max read/write errors */
+
+/* command */
+#define MG_CMD_RD 0x20
+#define MG_CMD_WR 0x30
+#define MG_CMD_SLEEP 0x99
+#define MG_CMD_WAKEUP 0xC3
+#define MG_CMD_ID 0xEC
+#define MG_CMD_WR_CONF 0x3C
+#define MG_CMD_RD_CONF 0x40
+
+/* operation mode */
+#define MG_OP_CASCADE (1 << 0)
+#define MG_OP_CASCADE_SYNC_RD (1 << 1)
+#define MG_OP_CASCADE_SYNC_WR (1 << 2)
+#define MG_OP_INTERLEAVE (1 << 3)
+
+/* synchronous */
+#define MG_BURST_LAT_4 (3 << 4)
+#define MG_BURST_LAT_5 (4 << 4)
+#define MG_BURST_LAT_6 (5 << 4)
+#define MG_BURST_LAT_7 (6 << 4)
+#define MG_BURST_LAT_8 (7 << 4)
+#define MG_BURST_LEN_4 (1 << 1)
+#define MG_BURST_LEN_8 (2 << 1)
+#define MG_BURST_LEN_16 (3 << 1)
+#define MG_BURST_LEN_32 (4 << 1)
+#define MG_BURST_LEN_CONT (0 << 1)
+
+/* timeout value (unit: ms) */
+#define MG_TMAX_CONF_TO_CMD	1
+#define MG_TMAX_WAIT_RD_DRQ	10
+#define MG_TMAX_WAIT_WR_DRQ	500
+#define MG_TMAX_RST_TO_BUSY	10
+#define MG_TMAX_HDRST_TO_RDY	500
+#define MG_TMAX_SWRST_TO_RDY	500
+#define MG_TMAX_RSTOUT		3000
+
+/* device attribution */
+/* use mflash as boot device */
+#define MG_BOOT_DEV		(1 << 0)
+/* use mflash as storage device */
+#define MG_STORAGE_DEV		(1 << 1)
+/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
+#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
+
+#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
+
+/* names of GPIO resource */
+#define MG_RST_PIN	"mg_rst"
+/* except MG_BOOT_DEV, reset-out pin should be assigned */
+#define MG_RSTOUT_PIN	"mg_rstout"
+
+/* private driver data */
+struct mg_drv_data {
+	/* disk resource */
+	u32 use_polling;
+
+	/* device attribution */
+	u32 dev_attr;
+
+	/* internally used */
+	struct mg_host *host;
+};
+
+/* main structure for mflash driver */
+struct mg_host {
+	struct device *dev;
+
+	struct request_queue *breq;
+	spinlock_t lock;
+	struct gendisk *gd;
+
+	struct timer_list timer;
+	void (*mg_do_intr) (struct mg_host *);
+
+	u16 id[ATA_ID_WORDS];
+
+	u16 cyls;
+	u16 heads;
+	u16 sectors;
+	u32 n_sectors;
+	u32 nres_sectors;
+
+	void __iomem *dev_base;
+	unsigned int irq;
+	unsigned int rst;
+	unsigned int rstout;
+
+	u32 major;
+	u32 error;
+};
+
+/*
+ * Debugging macro and defines
+ */
+#undef DO_MG_DEBUG
+#ifdef DO_MG_DEBUG
+#  define MG_DBG(fmt, args...) \
+	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
+#else /* CONFIG_MG_DEBUG */
+#  define MG_DBG(fmt, args...) do { } while (0)
+#endif /* CONFIG_MG_DEBUG */
+
 static void mg_request(struct request_queue *);
 
 static void mg_dump_status(const char *msg, unsigned int stat,
diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h
deleted file mode 100644
index 1f76b1e..0000000
--- a/include/linux/mg_disk.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- *  include/linux/mg_disk.c
- *
- *  Support for the mGine m[g]flash IO mode.
- *  Based on legacy hd.c
- *
- * (c) 2008 mGine Co.,LTD
- * (c) 2008 unsik Kim <donari75@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#ifndef __MG_DISK_H__
-#define __MG_DISK_H__
-
-#include <linux/blkdev.h>
-#include <linux/ata.h>
-
-/* name for block device */
-#define MG_DISK_NAME "mgd"
-/* name for platform device */
-#define MG_DEV_NAME "mg_disk"
-
-#define MG_DISK_MAJ 0
-#define MG_DISK_MAX_PART 16
-#define MG_SECTOR_SIZE 512
-#define MG_MAX_SECTS 256
-
-/* Register offsets */
-#define MG_BUFF_OFFSET			0x8000
-#define MG_STORAGE_BUFFER_SIZE		0x200
-#define MG_REG_OFFSET			0xC000
-#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
-#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
-#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
-#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
-#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
-#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
-#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
-#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
-#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
-#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
-#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
-
-/* "Drive Select/Head Register" bit values */
-#define MG_REG_HEAD_MUST_BE_ON		0xA0 /* These 2 bits are always on */
-#define MG_REG_HEAD_DRIVE_MASTER	(0x00 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_DRIVE_SLAVE		(0x10 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_LBA_MODE		(0x40 | MG_REG_HEAD_MUST_BE_ON)
-
-
-/* "Device Control Register" bit values */
-#define MG_REG_CTRL_INTR_ENABLE			0x0
-#define MG_REG_CTRL_INTR_DISABLE		(0x1<<1)
-#define MG_REG_CTRL_RESET			(0x1<<2)
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH	0x0
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW	(0x1<<4)
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW		0x0
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH	(0x1<<5)
-#define MG_REG_CTRL_DPD_DISABLE			0x0
-#define MG_REG_CTRL_DPD_ENABLE			(0x1<<6)
-
-/* Status register bit */
-/* error bit in status register */
-#define MG_REG_STATUS_BIT_ERROR			0x01
-/* corrected error in status register */
-#define MG_REG_STATUS_BIT_CORRECTED_ERROR	0x04
-/* data request bit in status register */
-#define MG_REG_STATUS_BIT_DATA_REQ		0x08
-/* DSC - Drive Seek Complete */
-#define MG_REG_STATUS_BIT_SEEK_DONE		0x10
-/* DWF - Drive Write Fault */
-#define MG_REG_STATUS_BIT_WRITE_FAULT		0x20
-#define MG_REG_STATUS_BIT_READY			0x40
-#define MG_REG_STATUS_BIT_BUSY			0x80
-
-/* handy status */
-#define MG_STAT_READY	(MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE)
-#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | \
-				(MG_REG_STATUS_BIT_BUSY | \
-				 MG_REG_STATUS_BIT_WRITE_FAULT | \
-				 MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY)
-
-/* Error register */
-#define MG_REG_ERR_AMNF		0x01
-#define MG_REG_ERR_ABRT		0x04
-#define MG_REG_ERR_IDNF		0x10
-#define MG_REG_ERR_UNC		0x40
-#define MG_REG_ERR_BBK		0x80
-
-/* error code for others */
-#define MG_ERR_NONE		0
-#define MG_ERR_TIMEOUT		0x100
-#define MG_ERR_INIT_STAT	0x101
-#define MG_ERR_TRANSLATION	0x102
-#define MG_ERR_CTRL_RST		0x103
-#define MG_ERR_INV_STAT		0x104
-#define MG_ERR_RSTOUT		0x105
-
-#define MG_MAX_ERRORS	6	/* Max read/write errors */
-
-/* command */
-#define MG_CMD_RD 0x20
-#define MG_CMD_WR 0x30
-#define MG_CMD_SLEEP 0x99
-#define MG_CMD_WAKEUP 0xC3
-#define MG_CMD_ID 0xEC
-#define MG_CMD_WR_CONF 0x3C
-#define MG_CMD_RD_CONF 0x40
-
-/* operation mode */
-#define MG_OP_CASCADE (1 << 0)
-#define MG_OP_CASCADE_SYNC_RD (1 << 1)
-#define MG_OP_CASCADE_SYNC_WR (1 << 2)
-#define MG_OP_INTERLEAVE (1 << 3)
-
-/* synchronous */
-#define MG_BURST_LAT_4 (3 << 4)
-#define MG_BURST_LAT_5 (4 << 4)
-#define MG_BURST_LAT_6 (5 << 4)
-#define MG_BURST_LAT_7 (6 << 4)
-#define MG_BURST_LAT_8 (7 << 4)
-#define MG_BURST_LEN_4 (1 << 1)
-#define MG_BURST_LEN_8 (2 << 1)
-#define MG_BURST_LEN_16 (3 << 1)
-#define MG_BURST_LEN_32 (4 << 1)
-#define MG_BURST_LEN_CONT (0 << 1)
-
-/* timeout value (unit: ms) */
-#define MG_TMAX_CONF_TO_CMD	1
-#define MG_TMAX_WAIT_RD_DRQ	10
-#define MG_TMAX_WAIT_WR_DRQ	500
-#define MG_TMAX_RST_TO_BUSY	10
-#define MG_TMAX_HDRST_TO_RDY	500
-#define MG_TMAX_SWRST_TO_RDY	500
-#define MG_TMAX_RSTOUT		3000
-
-/* device attribution */
-/* use mflash as boot device */
-#define MG_BOOT_DEV		(1 << 0)
-/* use mflash as storage device */
-#define MG_STORAGE_DEV		(1 << 1)
-/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
-#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
-
-#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
-
-/* names of GPIO resource */
-#define MG_RST_PIN	"mg_rst"
-/* except MG_BOOT_DEV, reset-out pin should be assigned */
-#define MG_RSTOUT_PIN	"mg_rstout"
-
-/* private driver data */
-struct mg_drv_data {
-	/* disk resource */
-	u32 use_polling;
-
-	/* device attribution */
-	u32 dev_attr;
-
-	/* internally used */
-	struct mg_host *host;
-};
-
-/* main structure for mflash driver */
-struct mg_host {
-	struct device *dev;
-
-	struct request_queue *breq;
-	spinlock_t lock;
-	struct gendisk *gd;
-
-	struct timer_list timer;
-	void (*mg_do_intr) (struct mg_host *);
-
-	u16 id[ATA_ID_WORDS];
-
-	u16 cyls;
-	u16 heads;
-	u16 sectors;
-	u32 n_sectors;
-	u32 nres_sectors;
-
-	void __iomem *dev_base;
-	unsigned int irq;
-	unsigned int rst;
-	unsigned int rstout;
-
-	u32 major;
-	u32 error;
-};
-
-/*
- * Debugging macro and defines
- */
-#undef DO_MG_DEBUG
-#ifdef DO_MG_DEBUG
-#  define MG_DBG(fmt, args...) \
-	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
-#else /* CONFIG_MG_DEBUG */
-#  define MG_DBG(fmt, args...) do { } while (0)
-#endif /* CONFIG_MG_DEBUG */
-
-#endif
-- 
cgit v0.10.2


From a03bb5a32fff4aa23f081a3cff7e98d4084104cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:15 +0900
Subject: mg_disk: clean up request completion paths

mg_disk implements its own partial completion.  Convert to standard
block layer partial completion.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 2c6127e..1a4cc96 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -503,95 +503,68 @@ static unsigned int mg_out(struct mg_host *host,
 
 static void mg_read(struct request *req)
 {
-	u32 remains, j;
+	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	remains = req->nr_sectors;
-
 	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) !=
 			MG_ERR_NONE)
 		mg_bad_rw_intr(host);
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-			remains, req->sector, req->buffer);
+	       req->nr_sectors, req->sector, req->buffer);
+
+	do {
+		u16 *buff = (u16 *)req->buffer;
 
-	while (remains) {
 		if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ,
 					MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return;
 		}
-		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) {
-			*(u16 *)req->buffer =
-				inw((unsigned long)host->dev_base +
-						MG_BUFF_OFFSET + (j << 1));
-			req->buffer += 2;
-		}
-
-		req->sector++;
-		req->errors = 0;
-		remains = --req->nr_sectors;
-		--req->current_nr_sectors;
-
-		if (req->current_nr_sectors <= 0) {
-			MG_DBG("remain : %d sects\n", remains);
-			__blk_end_request_cur(req, 0);
-			if (remains > 0)
-				req = elv_next_request(host->breq);
-		}
+		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++)
+			*buff++ = inw((unsigned long)host->dev_base +
+				      MG_BUFF_OFFSET + (j << 1));
 
 		outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-	}
+	} while (__blk_end_request(req, 0, MG_SECTOR_SIZE));
 }
 
 static void mg_write(struct request *req)
 {
-	u32 remains, j;
+	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	remains = req->nr_sectors;
-
 	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) !=
 			MG_ERR_NONE) {
 		mg_bad_rw_intr(host);
 		return;
 	}
 
-
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-			remains, req->sector, req->buffer);
-	while (remains) {
+	       req->nr_sectors, req->sector, req->buffer);
+
+	do {
+		u16 *buff = (u16 *)req->buffer;
+
 		if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ,
 					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return;
 		}
-		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) {
-			outw(*(u16 *)req->buffer,
-					(unsigned long)host->dev_base +
-					MG_BUFF_OFFSET + (j << 1));
-			req->buffer += 2;
-		}
-		req->sector++;
-		remains = --req->nr_sectors;
-		--req->current_nr_sectors;
-
-		if (req->current_nr_sectors <= 0) {
-			MG_DBG("remain : %d sects\n", remains);
-			__blk_end_request_cur(req, 0);
-			if (remains > 0)
-				req = elv_next_request(host->breq);
-		}
+		for (j = 0; j < MG_SECTOR_SIZE >> 1; j++)
+			outw(*buff++, (unsigned long)host->dev_base +
+				      MG_BUFF_OFFSET + (j << 1));
 
 		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-	}
+	} while (__blk_end_request(req, 0, MG_SECTOR_SIZE));
 }
 
 static void mg_read_intr(struct mg_host *host)
 {
 	u32 i;
+	u16 *buff;
 	struct request *req;
 
 	/* check status */
@@ -612,39 +585,24 @@ static void mg_read_intr(struct mg_host *host)
 ok_to_read:
 	/* get current segment of request */
 	req = elv_next_request(host->breq);
+	buff = (u16 *)req->buffer;
 
 	/* read 1 sector */
-	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) {
-		*(u16 *)req->buffer =
-			inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
-					(i << 1));
-		req->buffer += 2;
-	}
+	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
+		*buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
+			      (i << 1));
 
-	/* manipulate request */
 	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
 			req->sector, req->nr_sectors - 1, req->buffer);
 
-	req->sector++;
-	req->errors = 0;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
-
-	/* let know if current segment done */
-	if (req->current_nr_sectors <= 0)
-		__blk_end_request_cur(req, 0);
-
-	/* set handler if read remains */
-	if (i > 0) {
-		host->mg_do_intr = mg_read_intr;
-		mod_timer(&host->timer, jiffies + 3 * HZ);
-	}
-
 	/* send read confirm */
 	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
 
-	/* goto next request */
-	if (!i)
+	if (__blk_end_request(req, 0, MG_SECTOR_SIZE)) {
+		/* set handler if read remains */
+		host->mg_do_intr = mg_read_intr;
+		mod_timer(&host->timer, jiffies + 3 * HZ);
+	} else /* goto next request */
 		mg_request(host->breq);
 }
 
@@ -653,6 +611,7 @@ static void mg_write_intr(struct mg_host *host)
 	u32 i, j;
 	u16 *buff;
 	struct request *req;
+	bool rem;
 
 	/* get current segment of request */
 	req = elv_next_request(host->breq);
@@ -673,18 +632,8 @@ static void mg_write_intr(struct mg_host *host)
 	return;
 
 ok_to_write:
-	/* manipulate request */
-	req->sector++;
-	i = --req->nr_sectors;
-	--req->current_nr_sectors;
-	req->buffer += MG_SECTOR_SIZE;
-
-	/* let know if current segment or all done */
-	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		__blk_end_request_cur(req, 0);
-
-	/* write 1 sector and set handler if remains */
-	if (i > 0) {
+	if ((rem = __blk_end_request(req, 0, MG_SECTOR_SIZE))) {
+		/* write 1 sector and set handler if remains */
 		buff = (u16 *)req->buffer;
 		for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) {
 			outw(*buff, (unsigned long)host->dev_base +
@@ -700,7 +649,7 @@ ok_to_write:
 	/* send write confirm */
 	outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
 
-	if (!i)
+	if (!rem)
 		mg_request(host->breq);
 }
 
-- 
cgit v0.10.2


From 8a11a789c39cd6f61de4243234cf44a46c23ffd0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 28 Apr 2009 13:06:16 +0900
Subject: mg_disk: fix dependency on libata

Add local copies of ata_id_string() and ata_id_c_string() to mg_disk
so there is no need for the driver to depend on ATA and SCSI.

[ Impact: break dependency on libata by copying ata id string functions ]

Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index ddea8e4..f42fa50 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -412,7 +412,7 @@ config ATA_OVER_ETH
 
 config MG_DISK
 	tristate "mGine mflash, gflash support"
-	depends on ARM && ATA && GPIOLIB
+	depends on ARM && GPIOLIB
 	help
 	  mGine mFlash(gFlash) block device driver
 
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 1a4cc96..8e53cae 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -17,7 +17,7 @@
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
-#include <linux/libata.h>
+#include <linux/ata.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
@@ -357,6 +357,42 @@ static irqreturn_t mg_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+/* local copy of ata_id_string() */
+static void mg_id_string(const u16 *id, unsigned char *s,
+			 unsigned int ofs, unsigned int len)
+{
+	unsigned int c;
+
+	BUG_ON(len & 1);
+
+	while (len > 0) {
+		c = id[ofs] >> 8;
+		*s = c;
+		s++;
+
+		c = id[ofs] & 0xff;
+		*s = c;
+		s++;
+
+		ofs++;
+		len -= 2;
+	}
+}
+
+/* local copy of ata_id_c_string() */
+static void mg_id_c_string(const u16 *id, unsigned char *s,
+			   unsigned int ofs, unsigned int len)
+{
+	unsigned char *p;
+
+	mg_id_string(id, s, ofs, len - 1);
+
+	p = s + strnlen(s, len - 1);
+	while (p > s && p[-1] == ' ')
+		p--;
+	*p = '\0';
+}
+
 static int mg_get_disk_id(struct mg_host *host)
 {
 	u32 i;
@@ -403,9 +439,9 @@ static int mg_get_disk_id(struct mg_host *host)
 		host->n_sectors -= host->nres_sectors;
 	}
 
-	ata_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
-	ata_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
-	ata_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
+	mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
+	mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
+	mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
 	printk(KERN_INFO "mg_disk: model: %s\n", model);
 	printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev);
 	printk(KERN_INFO "mg_disk: serial: %s\n", serial);
-- 
cgit v0.10.2


From f68adec3c7155a8bbf32a90cb4c4d0737df045d9 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 28 Apr 2009 13:06:17 +0900
Subject: mg_disk: use defines from <linux/ata.h>

While at it:
- remove MG_REG_HEAD_MUST_BE_ON define
- remove MG_REG_CTRL_INTR_ENABLE define
- remove MG_REG_HEAD_LBA_MODE define
- remove unused defines

Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 8e53cae..71e56cc 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -51,51 +51,10 @@
 #define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
 #define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
 
-/* "Drive Select/Head Register" bit values */
-#define MG_REG_HEAD_MUST_BE_ON		0xA0 /* These 2 bits are always on */
-#define MG_REG_HEAD_DRIVE_MASTER	(0x00 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_DRIVE_SLAVE		(0x10 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_LBA_MODE		(0x40 | MG_REG_HEAD_MUST_BE_ON)
-
-
-/* "Device Control Register" bit values */
-#define MG_REG_CTRL_INTR_ENABLE			0x0
-#define MG_REG_CTRL_INTR_DISABLE		(0x1<<1)
-#define MG_REG_CTRL_RESET			(0x1<<2)
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH	0x0
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW	(0x1<<4)
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW		0x0
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH	(0x1<<5)
-#define MG_REG_CTRL_DPD_DISABLE			0x0
-#define MG_REG_CTRL_DPD_ENABLE			(0x1<<6)
-
-/* Status register bit */
-/* error bit in status register */
-#define MG_REG_STATUS_BIT_ERROR			0x01
-/* corrected error in status register */
-#define MG_REG_STATUS_BIT_CORRECTED_ERROR	0x04
-/* data request bit in status register */
-#define MG_REG_STATUS_BIT_DATA_REQ		0x08
-/* DSC - Drive Seek Complete */
-#define MG_REG_STATUS_BIT_SEEK_DONE		0x10
-/* DWF - Drive Write Fault */
-#define MG_REG_STATUS_BIT_WRITE_FAULT		0x20
-#define MG_REG_STATUS_BIT_READY			0x40
-#define MG_REG_STATUS_BIT_BUSY			0x80
-
 /* handy status */
-#define MG_STAT_READY	(MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE)
-#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | \
-				(MG_REG_STATUS_BIT_BUSY | \
-				 MG_REG_STATUS_BIT_WRITE_FAULT | \
-				 MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY)
-
-/* Error register */
-#define MG_REG_ERR_AMNF		0x01
-#define MG_REG_ERR_ABRT		0x04
-#define MG_REG_ERR_IDNF		0x10
-#define MG_REG_ERR_UNC		0x40
-#define MG_REG_ERR_BBK		0x80
+#define MG_STAT_READY	(ATA_DRDY | ATA_DSC)
+#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \
+				 ATA_ERR))) == MG_STAT_READY)
 
 /* error code for others */
 #define MG_ERR_NONE		0
@@ -225,41 +184,39 @@ static void mg_dump_status(const char *msg, unsigned int stat,
 	}
 
 	printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
-	if (stat & MG_REG_STATUS_BIT_BUSY)
+	if (stat & ATA_BUSY)
 		printk("Busy ");
-	if (stat & MG_REG_STATUS_BIT_READY)
+	if (stat & ATA_DRDY)
 		printk("DriveReady ");
-	if (stat & MG_REG_STATUS_BIT_WRITE_FAULT)
+	if (stat & ATA_DF)
 		printk("WriteFault ");
-	if (stat & MG_REG_STATUS_BIT_SEEK_DONE)
+	if (stat & ATA_DSC)
 		printk("SeekComplete ");
-	if (stat & MG_REG_STATUS_BIT_DATA_REQ)
+	if (stat & ATA_DRQ)
 		printk("DataRequest ");
-	if (stat & MG_REG_STATUS_BIT_CORRECTED_ERROR)
+	if (stat & ATA_CORR)
 		printk("CorrectedError ");
-	if (stat & MG_REG_STATUS_BIT_ERROR)
+	if (stat & ATA_ERR)
 		printk("Error ");
 	printk("}\n");
-	if ((stat & MG_REG_STATUS_BIT_ERROR) == 0) {
+	if ((stat & ATA_ERR) == 0) {
 		host->error = 0;
 	} else {
 		host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR);
 		printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg,
 				host->error & 0xff);
-		if (host->error & MG_REG_ERR_BBK)
+		if (host->error & ATA_BBK)
 			printk("BadSector ");
-		if (host->error & MG_REG_ERR_UNC)
+		if (host->error & ATA_UNC)
 			printk("UncorrectableError ");
-		if (host->error & MG_REG_ERR_IDNF)
+		if (host->error & ATA_IDNF)
 			printk("SectorIdNotFound ");
-		if (host->error & MG_REG_ERR_ABRT)
+		if (host->error & ATA_ABORTED)
 			printk("DriveStatusError ");
-		if (host->error & MG_REG_ERR_AMNF)
+		if (host->error & ATA_AMNF)
 			printk("AddrMarkNotFound ");
 		printk("}");
-		if (host->error &
-				(MG_REG_ERR_BBK | MG_REG_ERR_UNC |
-				 MG_REG_ERR_IDNF | MG_REG_ERR_AMNF)) {
+		if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) {
 			if (host->breq) {
 				req = elv_next_request(host->breq);
 				if (req)
@@ -284,12 +241,12 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec)
 
 	do {
 		cur_jiffies = jiffies;
-		if (status & MG_REG_STATUS_BIT_BUSY) {
-			if (expect == MG_REG_STATUS_BIT_BUSY)
+		if (status & ATA_BUSY) {
+			if (expect == ATA_BUSY)
 				break;
 		} else {
 			/* Check the error condition! */
-			if (status & MG_REG_STATUS_BIT_ERROR) {
+			if (status & ATA_ERR) {
 				mg_dump_status("mg_wait", status, host);
 				break;
 			}
@@ -298,8 +255,8 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec)
 				if (MG_READY_OK(status))
 					break;
 
-			if (expect == MG_REG_STATUS_BIT_DATA_REQ)
-				if (status & MG_REG_STATUS_BIT_DATA_REQ)
+			if (expect == ATA_DRQ)
+				if (status & ATA_DRQ)
 					break;
 		}
 		if (!msec) {
@@ -404,12 +361,10 @@ static int mg_get_disk_id(struct mg_host *host)
 	char serial[ATA_ID_SERNO_LEN + 1];
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_DISABLE,
-				(unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND);
-	err = mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_RD_DRQ);
+	err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ);
 	if (err)
 		return err;
 
@@ -449,8 +404,7 @@ static int mg_get_disk_id(struct mg_host *host)
 			host->n_sectors, host->nres_sectors);
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	return err;
 }
@@ -464,7 +418,7 @@ static int mg_disk_init(struct mg_host *host)
 
 	/* hdd rst low */
 	gpio_set_value(host->rst, 0);
-	err = mg_wait(host, MG_REG_STATUS_BIT_BUSY, MG_TMAX_RST_TO_BUSY);
+	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
 	if (err)
 		return err;
 
@@ -475,17 +429,14 @@ static int mg_disk_init(struct mg_host *host)
 		return err;
 
 	/* soft reset on */
-	outb(MG_REG_CTRL_RESET |
-			(prv_data->use_polling ? MG_REG_CTRL_INTR_DISABLE :
-			 MG_REG_CTRL_INTR_ENABLE),
+	outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0),
 			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
-	err = mg_wait(host, MG_REG_STATUS_BIT_BUSY, MG_TMAX_RST_TO_BUSY);
+	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
 	if (err)
 		return err;
 
 	/* soft reset off */
-	outb(prv_data->use_polling ? MG_REG_CTRL_INTR_DISABLE :
-			MG_REG_CTRL_INTR_ENABLE,
+	outb(prv_data->use_polling ? ATA_NIEN : 0,
 			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 	err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY);
 	if (err)
@@ -531,7 +482,7 @@ static unsigned int mg_out(struct mg_host *host,
 			MG_REG_CYL_LOW);
 	outb((u8)(sect_num >> 16), (unsigned long)host->dev_base +
 			MG_REG_CYL_HIGH);
-	outb((u8)((sect_num >> 24) | MG_REG_HEAD_LBA_MODE),
+	outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS),
 			(unsigned long)host->dev_base + MG_REG_DRV_HEAD);
 	outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND);
 	return MG_ERR_NONE;
@@ -552,8 +503,8 @@ static void mg_read(struct request *req)
 	do {
 		u16 *buff = (u16 *)req->buffer;
 
-		if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ,
-					MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
+		if (mg_wait(host, ATA_DRQ,
+			    MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return;
 		}
@@ -583,8 +534,7 @@ static void mg_write(struct request *req)
 	do {
 		u16 *buff = (u16 *)req->buffer;
 
-		if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ,
-					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
+	if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return;
 		}
@@ -606,11 +556,11 @@ static void mg_read_intr(struct mg_host *host)
 	/* check status */
 	do {
 		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		if (i & MG_REG_STATUS_BIT_BUSY)
+		if (i & ATA_BUSY)
 			break;
 		if (!MG_READY_OK(i))
 			break;
-		if (i & MG_REG_STATUS_BIT_DATA_REQ)
+		if (i & ATA_DRQ)
 			goto ok_to_read;
 	} while (0);
 	mg_dump_status("mg_read_intr", i, host);
@@ -655,11 +605,11 @@ static void mg_write_intr(struct mg_host *host)
 	/* check status */
 	do {
 		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
-		if (i & MG_REG_STATUS_BIT_BUSY)
+		if (i & ATA_BUSY)
 			break;
 		if (!MG_READY_OK(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & MG_REG_STATUS_BIT_DATA_REQ))
+		if ((req->nr_sectors <= 1) || (i & ATA_DRQ))
 			goto ok_to_write;
 	} while (0);
 	mg_dump_status("mg_write_intr", i, host);
@@ -752,18 +702,15 @@ static unsigned int mg_issue_req(struct request *req,
 		break;
 	case WRITE:
 		/* TODO : handler */
-		outb(MG_REG_CTRL_INTR_DISABLE,
-				(unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
 				!= MG_ERR_NONE) {
 			mg_bad_rw_intr(host);
 			return host->error;
 		}
 		del_timer(&host->timer);
-		mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_WR_DRQ);
-		outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		if (host->error) {
 			mg_bad_rw_intr(host);
 			return host->error;
@@ -849,9 +796,7 @@ static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
 		return -EIO;
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_DISABLE,
-				(unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND);
 	/* wait until mflash deep sleep */
@@ -859,9 +804,7 @@ static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
 
 	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) {
 		if (!prv_data->use_polling)
-			outb(MG_REG_CTRL_INTR_ENABLE,
-					(unsigned long)host->dev_base +
-					MG_REG_DRV_CTRL);
+			outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 		return -EIO;
 	}
 
@@ -884,8 +827,7 @@ static int mg_resume(struct platform_device *plat_dev)
 		return -EIO;
 
 	if (!prv_data->use_polling)
-		outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base +
-				MG_REG_DRV_CTRL);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 5b644c7a218702668d7b610994e7dcbc3d4705d3 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 28 Apr 2009 08:17:54 +0000
Subject: clocksource: improve sh_cmt clocksource overflow handling

This patch improves the sh_cmt clocksource handling.

Currently the counter value is ignored in the case of
overflow. With this patch the overflow flag is read
before and after reading the counter, removing any
counter value and overflow flag mismatch issues.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index c247564..d607ac2 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -111,16 +111,21 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_priv *p,
 					int *has_wrapped)
 {
 	unsigned long v1, v2, v3;
+	int o1, o2;
+
+	o1 = sh_cmt_read(p, CMCSR) & p->overflow_bit;
 
 	/* Make sure the timer value is stable. Stolen from acpi_pm.c */
 	do {
+		o2 = o1;
 		v1 = sh_cmt_read(p, CMCNT);
 		v2 = sh_cmt_read(p, CMCNT);
 		v3 = sh_cmt_read(p, CMCNT);
-	} while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
-			  || (v3 > v1 && v3 < v2)));
+		o1 = sh_cmt_read(p, CMCSR) & p->overflow_bit;
+	} while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
+			  || (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
 
-	*has_wrapped = sh_cmt_read(p, CMCSR) & p->overflow_bit;
+	*has_wrapped = o1;
 	return v2;
 }
 
@@ -394,7 +399,7 @@ static cycle_t sh_cmt_clocksource_read(struct clocksource *cs)
 	raw = sh_cmt_get_counter(p, &has_wrapped);
 
 	if (unlikely(has_wrapped))
-		raw = p->match_value;
+		raw += p->match_value;
 	spin_unlock_irqrestore(&p->lock, flags);
 
 	return value + raw;
-- 
cgit v0.10.2


From 8e0b842948156e3463879caed12b4ce51bed772e Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 28 Apr 2009 08:19:50 +0000
Subject: sh: setup timers in late_time_init()

This patch moves the SuperH timer setup code from time_init()
to late_time_init(). Good things about this change:
 - interrupts: they are enabled at late_time_init()
 - mm: regular kmalloc() can be used at late_time_init()

Together with moving to late_time_init() this patch changes
the sh_cmt driver to always allocate with kmalloc(). This
simplifies the code a bit and also fixes section mismatches.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index 109409f..a2458bf 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -237,21 +237,8 @@ unsigned long long sched_clock(void)
 }
 #endif
 
-void __init time_init(void)
+static void __init sh_late_time_init(void)
 {
-	if (board_time_init)
-		board_time_init();
-
-	clk_init();
-
-	rtc_sh_get_time(&xtime);
-	set_normalized_timespec(&wall_to_monotonic,
-				-xtime.tv_sec, -xtime.tv_nsec);
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	local_timer_setup(smp_processor_id());
-#endif
-
 	/*
 	 * Make sure all compiled-in early timers register themselves.
 	 * Run probe() for one "earlytimer" device.
@@ -270,3 +257,22 @@ void __init time_init(void)
 
 	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
 }
+
+void __init time_init(void)
+{
+	if (board_time_init)
+		board_time_init();
+
+	clk_init();
+
+	rtc_sh_get_time(&xtime);
+	set_normalized_timespec(&wall_to_monotonic,
+				-xtime.tv_sec, -xtime.tv_nsec);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	local_timer_setup(smp_processor_id());
+#endif
+
+	late_time_init = sh_late_time_init;
+}
+
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index d607ac2..bf3e4c1 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -18,7 +18,6 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
@@ -645,11 +644,7 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev)
 		return 0;
 	}
 
-	if (is_early_platform_device(pdev))
-		p = alloc_bootmem(sizeof(*p));
-	else
-		p = kmalloc(sizeof(*p), GFP_KERNEL);
-
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
 	if (p == NULL) {
 		dev_err(&pdev->dev, "failed to allocate driver data\n");
 		return -ENOMEM;
@@ -657,11 +652,7 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev)
 
 	ret = sh_cmt_setup(p, pdev);
 	if (ret) {
-		if (is_early_platform_device(pdev))
-			free_bootmem(__pa(p), sizeof(*p));
-		else
-			kfree(p);
-
+		kfree(p);
 		platform_set_drvdata(pdev, NULL);
 	}
 	return ret;
-- 
cgit v0.10.2


From 9ec4fa271faf2db3b8e1419c998da1ca6b094eb6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:57:18 -0700
Subject: irq, cpumask: correct CPUMASKS_OFFSTACK typo and fix fallout

CPUMASKS_OFFSTACK is not defined anywhere (it is CPUMASK_OFFSTACK).
It is a typo and init_allocate_desc_masks() is called before it set
affinity to all cpus...

Split init_alloc_desc_masks() into all_desc_masks() and init_desc_masks().

Also use CPUMASK_OFFSTACK in alloc_desc_masks().

[ Impact: fix smp_affinity copying/setup when moving irq_desc between CPUs ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
LKML-Reference: <49F6546E.3040406@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b7cbeed..c4953cf 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -424,27 +424,25 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
 #ifdef CONFIG_SMP
 /**
- * init_alloc_desc_masks - allocate cpumasks for irq_desc
+ * alloc_desc_masks - allocate cpumasks for irq_desc
  * @desc:	pointer to irq_desc struct
  * @cpu:	cpu which will be handling the cpumasks
  * @boot:	true if need bootmem
  *
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
- * Side effect: affinity has all bits set, pending_mask has all bits clear.
  */
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
 								bool boot)
 {
+#ifdef CONFIG_CPUMASK_OFFSTACK
 	int node;
 
 	if (boot) {
 		alloc_bootmem_cpumask_var(&desc->affinity);
-		cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 		alloc_bootmem_cpumask_var(&desc->pending_mask);
-		cpumask_clear(desc->pending_mask);
 #endif
 		return true;
 	}
@@ -453,18 +451,25 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 
 	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
 		return false;
-	cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
 		free_cpumask_var(desc->affinity);
 		return false;
 	}
-	cpumask_clear(desc->pending_mask);
+#endif
 #endif
 	return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+	cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
 /**
  * init_copy_desc_masks - copy cpumasks for irq_desc
  * @old_desc:	pointer to old irq_desc struct
@@ -478,7 +483,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
-#ifdef CONFIG_CPUMASKS_OFFSTACK
+#ifdef CONFIG_CPUMASK_OFFSTACK
 	cpumask_copy(new_desc->affinity, old_desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -499,12 +504,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc,
 
 #else /* !CONFIG_SMP */
 
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
 								bool boot)
 {
 	return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+}
+
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d82142b..882c798 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -115,10 +115,11 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
 		printk(KERN_ERR "can not alloc kstat_irqs\n");
 		BUG_ON(1);
 	}
-	if (!init_alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, cpu, false)) {
 		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
 		BUG_ON(1);
 	}
+	init_desc_masks(desc);
 	arch_init_chip_data(desc, cpu);
 }
 
@@ -169,7 +170,8 @@ int __init early_irq_init(void)
 		desc[i].irq = i;
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-		init_alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], 0, true);
+		init_desc_masks(&desc[i]);
 		irq_desc_ptrs[i] = desc + i;
 	}
 
@@ -256,7 +258,8 @@ int __init early_irq_init(void)
 
 	for (i = 0; i < count; i++) {
 		desc[i].irq = i;
-		init_alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], 0, true);
+		init_desc_masks(&desc[i]);
 		desc[i].kstat_irqs = kstat_irqs_all[i];
 	}
 	return arch_early_irq_init();
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 44bbdcb..5760d72 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -37,7 +37,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
 		 struct irq_desc *desc, int cpu)
 {
 	memcpy(desc, old_desc, sizeof(struct irq_desc));
-	if (!init_alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, cpu, false)) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
 				"for migration.\n", irq);
 		return false;
-- 
cgit v0.10.2


From fcef5911c7ea89b80d5bfc727f402f37c9eefd57 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:58:23 -0700
Subject: x86/irq: remove leftover code from NUMA_MIGRATE_IRQ_DESC

The original feature of migrating irq_desc dynamic was too fragile
and was causing problems: it caused crashes on systems with lots of
cards with MSI-X when user-space irq-balancer was enabled.

We now have new patches that create irq_desc according to device
numa node. This patch removes the leftover bits of the dynamic balancer.

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F654AF.8000808@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index df9e885..e1b2543 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,16 +274,6 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say N.
 
-config NUMA_MIGRATE_IRQ_DESC
-	bool "Move irq desc when changing irq smp_affinity"
-	depends on SPARSE_IRQ && NUMA
-	depends on BROKEN
-	default n
-	---help---
-	  This enables moving irq_desc to cpu/node that irq will use handled.
-
-	  If you don't know what to do here, say N.
-
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
 	default y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9fe5d21..27b8ce0 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -195,7 +195,6 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
 CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
 # CONFIG_X86_ELAN is not set
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 30da617..9fbf0f7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -148,9 +148,6 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -254,8 +251,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
 	return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
 init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
 {
@@ -356,19 +352,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 		old_desc->chip_data = NULL;
 	}
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg = desc->chip_data;
-
-	if (!cfg->move_in_progress) {
-		/* it means that domain is not changed */
-		if (!cpumask_intersects(desc->affinity, mask))
-			cfg->move_desc_pending = 1;
-	}
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +362,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -592,9 +569,6 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 	if (assign_irq_vector(irq, cfg, mask))
 		return BAD_APICID;
 
-	/* check that before desc->addinity get updated */
-	set_extra_move_desc(desc, mask);
-
 	cpumask_copy(desc->affinity, mask);
 
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
@@ -2393,8 +2367,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, mask);
-
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
 	irte.vector = cfg->vector;
@@ -2491,34 +2463,14 @@ static void irq_complete_move(struct irq_desc **descp)
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		if (likely(!cfg->move_desc_pending))
-			return;
-
-		/* domain has not changed, but affinity did */
-		me = smp_processor_id();
-		if (cpumask_test_cpu(me, desc->affinity)) {
-			*descp = desc = move_irq_desc(desc, me);
-			/* get the new one */
-			cfg = desc->chip_data;
-			cfg->move_desc_pending = 0;
-		}
-#endif
+	if (likely(!cfg->move_in_progress))
 		return;
-	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		*descp = desc = move_irq_desc(desc, me);
-		/* get the new one */
-		cfg = desc->chip_data;
-#endif
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
-	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c4953cf..2a34cd6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -212,16 +212,6 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
 extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
-static inline struct irq_desc *
-irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
-{
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	return irq_to_desc(irq);
-#else
-	return desc;
-#endif
-}
-
 /*
  * Migration helpers for obsolete names, they will go away:
  */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 3394f8f..2f06527 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c687ba4..13c68e7 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
 	spin_lock(&desc->lock);
 	mask_ack_irq(desc, irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	desc->status &= ~IRQ_INPROGRESS;
 out:
 	desc->chip->eoi(irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	spin_unlock(&desc->lock);
 }
@@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		    !desc->action)) {
 		desc->status |= (IRQ_PENDING | IRQ_MASKED);
 		mask_ack_irq(desc, irq);
-		desc = irq_remap_to_desc(irq, desc);
 		goto out_unlock;
 	}
 	kstat_incr_irqs_this_cpu(irq, desc);
@@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 	/* Start handling the irq */
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	if (desc->chip->eoi) {
+	if (desc->chip->eoi)
 		desc->chip->eoi(irq);
-		desc = irq_remap_to_desc(irq, desc);
-	}
 }
 
 void
@@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
-		if (desc->chip != &no_irq_chip) {
+		if (desc->chip != &no_irq_chip)
 			mask_ack_irq(desc, irq);
-			desc = irq_remap_to_desc(irq, desc);
-		}
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 882c798..3e0cbc4 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -458,11 +458,8 @@ unsigned int __do_IRQ(unsigned int irq)
 		/*
 		 * No locking required for CPU-local interrupts:
 		 */
-		if (desc->chip->ack) {
+		if (desc->chip->ack)
 			desc->chip->ack(irq);
-			/* get new one */
-			desc = irq_remap_to_desc(irq, desc);
-		}
 		if (likely(!(desc->status & IRQ_DISABLED))) {
 			action_ret = handle_IRQ_event(irq, desc->action);
 			if (!noirqdebug)
@@ -473,10 +470,8 @@ unsigned int __do_IRQ(unsigned int irq)
 	}
 
 	spin_lock(&desc->lock);
-	if (desc->chip->ack) {
+	if (desc->chip->ack)
 		desc->chip->ack(irq);
-		desc = irq_remap_to_desc(irq, desc);
-	}
 	/*
 	 * REPLAY is when Linux resends an IRQ that was dropped earlier
 	 * WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 5760d72..ce72bc3 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -97,9 +97,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 
 	/* free the old one */
 	free_one_irq_desc(old_desc, desc);
-	spin_unlock(&old_desc->lock);
 	kfree(old_desc);
-	spin_lock(&desc->lock);
 
 	return desc;
 
-- 
cgit v0.10.2


From d5dedd4507d307eb3f35f21b6e16f336fdc0d82a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:59:21 -0700
Subject: irq: change ->set_affinity() to return status

according to Ingo, change set_affinity() in irq_chip should return int,
because that way we can handle failure cases in a much cleaner way, in
the genirq layer.

v2: fix two typos

[ Impact: extend API ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-arch@vger.kernel.org
LKML-Reference: <49F654E9.4070809@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 9c9d1fd..5bd5259 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 	}
 }
 
-static void
+static int
 dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
 	cpu_set_irq_affinity(irq, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
+
+	return 0;
 }
 
-static void
+static int
 clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
 	cpu_set_irq_affinity(irq - 16, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
+
+	return 0;
 }
 
 static struct hw_interrupt_type dp264_irq_type = {
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 27f840a..8dd239e 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 
 }
 
-static void
+static int
 titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&titan_irq_lock);
 	titan_cpu_set_irq_affinity(irq - 16, *affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
+
+	return 0;
 }
 
 static void
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index c6884ba..90f6b7f 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
+static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
@@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 	val |= 1 << (cpu + shift);
 	writel(val, reg);
 	spin_unlock(&irq_controller_lock);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index df3925c..d70b445 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq)
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
+int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
 	irq_allocations[irq - FIRST_IRQ].mask = *dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
+
+	return 0;
 }
 
 static struct irq_chip crisv32_irq_type = {
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index cc0a318..acb5047 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq)
 {
 }
 
-static void
+static int
 hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
+	return 0;
 }
 
 static struct hw_interrupt_type irq_type_hp_sim = {
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 166e0d8..f92cef4 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -329,7 +329,7 @@ unmask_irq (unsigned int irq)
 }
 
 
-static void
+static int
 iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
@@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	cpu = cpumask_first_and(cpu_online_mask, mask);
 	if (cpu >= nr_cpu_ids)
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	dest = cpu_physical_id(cpu);
 
 	if (!iosapic_intr_info[irq].count)
-		return;			/* not an IOSAPIC interrupt */
+		return -1;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
 
@@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
 		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
+
 #endif
+	return 0;
 }
 
 /*
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 2b15e23..0f8ade9 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -12,7 +12,7 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq,
+static int ia64_set_msi_irq_affinity(unsigned int irq,
 				      const cpumask_t *cpu_mask)
 {
 	struct msi_msg msg;
@@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
 	int cpu = first_cpu(*cpu_mask);
 
 	if (!cpu_online(cpu))
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	read_msi_msg(irq, &msg);
 
@@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
 
 	write_msi_msg(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
@@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
 	int cpu = cpumask_first(mask);
 
 	if (!cpu_online(cpu))
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	dmar_msi_read(irq, &msg);
 
@@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dmar_msi_write(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, mask);
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 66fd705..764f26a 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,7 +227,7 @@ finish_up:
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
@@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
 		(void)sn_retarget_vector(sn_irq_info, nasid, slice);
+
+	return 0;
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 81e4289..fbbfb97 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq,
+static int sn_set_msi_irq_affinity(unsigned int irq,
 				    const struct cpumask *cpu_mask)
 {
 	struct msi_msg msg;
@@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 	cpu = cpumask_first(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
-		return;
+		return -1;
 
 	/*
 	 * Release XIO resources for the old MSI PCI address
@@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 	new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
 	sn_msi_info[irq].sn_irq_info = new_irq_info;
 	if (new_irq_info == NULL)
-		return;
+		return -1;
 
 	/*
 	 * Map the xio address into bus space
@@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 
 	write_msi_msg(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, cpu_mask);
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 1c19af8..d3a0c81 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu;
 	int bit = irq - OCTEON_IRQ_WORKQ0;	/* Bit 0-63 of EN0 */
@@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask
 	 */
 	cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
 	write_unlock(&octeon_irq_ciu0_rwlock);
+
+	return 0;
 }
 #endif
 
@@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu;
 	int bit = irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
@@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask
 	 */
 	cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
 	write_unlock(&octeon_irq_ciu1_rwlock);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 3214ade..4f1eed1 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq)
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq,
+extern int plat_set_irq_affinity(unsigned int irq,
 				  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 87deb8f..3f43c2e 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 
 	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
-		return;
+		return -1;
 
 	/* Assumption : cpumask refers to a single CPU */
 	spin_lock_irqsave(&gic_lock, flags);
@@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	cpumask_copy(irq_desc[irq].affinity, cpumask);
 	spin_unlock_irqrestore(&gic_lock, flags);
 
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ba3188..499ffe5 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = {
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
 	cpumask_t tmask;
 	int cpu = 0;
@@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 
 	/* Do any generic SMTC IRQ affinity setup */
 	smtc_set_irq_affinity(irq, tmask);
+
+	return 0;
 }
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index 352352b..4f256a1 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -119,7 +119,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	if (cpumask_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-		return;
+		return -1;
 	}
 	i = cpumask_first(mask);
 
@@ -155,6 +155,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 	}
 	spin_unlock(&bcm1480_imr_lock);
 	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index c08ff58..e389507 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
@@ -114,7 +114,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	if (cpumask_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-		return;
+		return -1;
 	}
 
 	/* Convert logical CPU to physical CPU */
@@ -146,6 +146,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 	}
 	spin_unlock(&sb1250_imr_lock);
 	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4ea4229..8007f1e 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest)
 	return cpu_dest;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu_dest;
 
 	cpu_dest = cpu_check_affinity(irq, dest);
 	if (cpu_dest < 0)
-		return;
+		return -1;
 
 	cpumask_copy(&irq_desc[irq].affinity, dest);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 80b5134..be3581a 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq)
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 
 	irq = (unsigned int)irq_map[virq].hwirq;
 	if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
-		return;
+		return -1;
 
 	status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
 			__func__, irq, status);
-		return;
+		return -1;
 	}
 
 	/*
@@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
-		return;
+		return -1;
 	}
 
 	status = rtas_call(ibm_set_xive, 3, 1, NULL,
@@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 	if (status) {
 		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
 			__func__, irq, status);
-		return;
+		return -1;
 	}
+
+	return 0;
 }
 
 static struct irq_chip xics_pic_direct = {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 21b9567..f4cbd15 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
 	}
+
+	return 0;
 }
 
 static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5deabe9..e5e78f9 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq,
+static int sun4u_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
+
+	return 0;
 }
 
 /* Don't do anything.  The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq,
+static int sun4v_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq,
 	if (err != HV_EOK)
 		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 		       "err(%d)\n", ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
+static int sun4v_virt_set_affinity(unsigned int virt_irq,
 				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq,
 		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 		       "err(%d)\n",
 		       dev_handle, dev_ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9fbf0f7..5c7630b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -574,13 +574,14 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
 }
 
-static void
+static int
 set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
@@ -591,18 +592,21 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
 		__target_IO_APIC_irq(irq, dest, cfg);
+		ret = 0;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return ret;
 }
 
-static void
+static int
 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc;
 
 	desc = irq_to_desc(irq);
 
-	set_ioapic_affinity_irq_desc(desc, mask);
+	return set_ioapic_affinity_irq_desc(desc, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -2348,24 +2352,25 @@ static int ioapic_retrigger_irq(unsigned int irq)
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return ret;
 
 	irq = desc->irq;
 	if (get_irte(irq, &irte))
-		return;
+		return ret;
 
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
+		return ret;
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2381,27 +2386,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 		send_cleanup_vector(cfg);
 
 	cpumask_copy(desc->affinity, mask);
+
+	return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					    const struct cpumask *mask)
 {
-	migrate_ioapic_irq_desc(desc, mask);
+	return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	set_ir_ioapic_affinity_irq_desc(desc, mask);
+	return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 						   const struct cpumask *mask)
 {
+	return 0;
 }
 #endif
 
@@ -3318,7 +3326,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3327,7 +3335,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3339,13 +3347,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
+
+	return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -3354,11 +3364,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	struct irte irte;
 
 	if (get_irte(irq, &irte))
-		return;
+		return -1;
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3375,6 +3385,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
+
+	return 0;
 }
 
 #endif
@@ -3528,7 +3540,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3537,7 +3549,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3549,6 +3561,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3582,7 +3596,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3591,7 +3605,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3603,6 +3617,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3659,7 +3675,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3667,11 +3683,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
 	target_ht_irq(irq, dest, cfg->vector);
+
+	return 0;
 }
 
 #endif
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 73348c4..4a9cc92 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq,
+static int iosapic_set_affinity_irq(unsigned int irq,
 				     const struct cpumask *dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
@@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
 
 	dest_cpu = cpu_check_affinity(irq, dest);
 	if (dest_cpu < 0)
-		return;
+		return -1;
 
 	cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
 	vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
@@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq,
 	iosapic_set_irt_data(vi, &dummy_d0, &d1);
 	iosapic_wr_irt_entry(vi, d0, d1);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af..3338988 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -688,13 +688,13 @@ void rebind_evtchn_irq(int evtchn, int irq)
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
 	struct evtchn_bind_vcpu bind_vcpu;
 	int evtchn = evtchn_from_irq(irq);
 
 	if (!VALID_EVTCHN(evtchn))
-		return;
+		return -1;
 
 	/* Send future instances of this interrupt to other vcpu. */
 	bind_vcpu.port = evtchn;
@@ -707,13 +707,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 	 */
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
 		bind_evtchn_to_cpu(evtchn, tcpu);
-}
 
+	return 0;
+}
 
-static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
 	unsigned tcpu = cpumask_first(dest);
-	rebind_irq_to_cpu(irq, tcpu);
+
+	return rebind_irq_to_cpu(irq, tcpu);
 }
 
 int resend_irq_on_evtchn(unsigned int irq)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2a34cd6..8e4c18b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -117,7 +117,7 @@ struct irq_chip {
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq,
+	int		(*set_affinity)(unsigned int irq,
 					const struct cpumask *dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
-- 
cgit v0.10.2


From 57b150cce8e004ddd36330490a68bfb59b7271e9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:59:53 -0700
Subject: irq: only update affinity if ->set_affinity() is sucessfull

irq_set_affinity() and move_masked_irq() try to assign affinity
before calling chip set_affinity(). Some archs are assigning it
in ->set_affinity() again.

We do something like:

 cpumask_cpy(desc->affinity, mask);
 desc->chip->set_affinity(mask);

But in the failure path, affinity should not be touched - otherwise
we'll end up with a different affinity mask despite the failure to
migrate the IRQ.

So try to update the afffinity only if set_affinity returns with 0.
Also call irq_set_thread_affinity accordingly.

v2: update after "irq, x86: Remove IRQ_DISABLED check in process context IRQ move"
v3: according to Ingo, change set_affinity() in irq_chip should return int.
v4: update comments by removing moving irq_desc code.

[ Impact: fix /proc/irq/*/smp_affinity setting corner case bug ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F65509.60307@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 01ce20e..de5f412 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -42,6 +42,9 @@ static inline void unregister_handler_proc(unsigned int irq,
 
 extern int irq_select_affinity_usr(unsigned int irq);
 
+extern void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
+
 /*
  * Debugging printout:
  */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2734eca..aaf5c9d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,7 +80,7 @@ int irq_can_set_affinity(unsigned int irq)
 	return 1;
 }
 
-static void
+void
 irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
 {
 	struct irqaction *action = desc->action;
@@ -109,17 +109,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	spin_lock_irqsave(&desc->lock, flags);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (desc->status & IRQ_MOVE_PCNTXT)
-		desc->chip->set_affinity(irq, cpumask);
+	if (desc->status & IRQ_MOVE_PCNTXT) {
+		if (!desc->chip->set_affinity(irq, cpumask)) {
+			cpumask_copy(desc->affinity, cpumask);
+			irq_set_thread_affinity(desc, cpumask);
+		}
+	}
 	else {
 		desc->status |= IRQ_MOVE_PENDING;
 		cpumask_copy(desc->pending_mask, cpumask);
 	}
 #else
-	cpumask_copy(desc->affinity, cpumask);
-	desc->chip->set_affinity(irq, cpumask);
+	if (!desc->chip->set_affinity(irq, cpumask)) {
+		cpumask_copy(desc->affinity, cpumask);
+		irq_set_thread_affinity(desc, cpumask);
+	}
 #endif
-	irq_set_thread_affinity(desc, cpumask);
 	desc->status |= IRQ_AFFINITY_SET;
 	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index e05ad9b..cfe767c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,5 +1,8 @@
 
 #include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
 
 void move_masked_irq(int irq)
 {
@@ -39,11 +42,12 @@ void move_masked_irq(int irq)
 	 * masking the irqs.
 	 */
 	if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
-		   < nr_cpu_ids)) {
-		cpumask_and(desc->affinity,
-			    desc->pending_mask, cpu_online_mask);
-		desc->chip->set_affinity(irq, desc->affinity);
-	}
+		   < nr_cpu_ids))
+		if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
+			cpumask_copy(desc->affinity, desc->pending_mask);
+			irq_set_thread_affinity(desc, desc->pending_mask);
+		}
+
 	cpumask_clear(desc->pending_mask);
 }
 
-- 
cgit v0.10.2


From 85ac16d033370caf6f48d743c8dc8103700f5cc5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:00:38 -0700
Subject: x86/irq: change irq_desc_alloc() to take node instead of cpu

This simplifies the node awareness of the code. All our allocators
only deal with a NUMA node ID locality not with CPU ids anyway - so
there's no need to maintain (and transform) a CPU id all across the
IRq layer.

v2: keep move_irq_desc related

[ Impact: cleanup, prepare IRQ code to be NUMA-aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
LKML-Reference: <49F65536.2020300@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c7630b..560b887 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -129,12 +129,9 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
 	struct irq_pin_list *pin;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -209,12 +206,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 	return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
 	struct irq_cfg *cfg;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
@@ -235,13 +229,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(cpu);
+		desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
@@ -253,7 +247,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
 
 /* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
 	struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -262,7 +256,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
 	if (!old_entry)
 		return;
 
-	entry = get_one_free_irq_2_pin(cpu);
+	entry = get_one_free_irq_2_pin(node);
 	if (!entry)
 		return;
 
@@ -272,7 +266,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
 	tail		= entry;
 	old_entry	= old_entry->next;
 	while (old_entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			entry = head;
 			while (entry) {
@@ -312,12 +306,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-				 struct irq_desc *desc, int cpu)
+				 struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 	struct irq_cfg *old_cfg;
 
-	cfg = get_one_free_irq_cfg(cpu);
+	cfg = get_one_free_irq_cfg(node);
 
 	if (!cfg)
 		return;
@@ -328,7 +322,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -615,13 +609,13 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
 					apic, pin);
@@ -641,7 +635,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(cpu);
+	entry->next = get_one_free_irq_2_pin(node);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -650,7 +644,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -670,7 +664,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+		add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -1612,7 +1606,7 @@ static void __init setup_IO_APIC_irqs(void)
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1647,13 +1641,13 @@ static void __init setup_IO_APIC_irqs(void)
 					apic->multi_timer_check(apic_id, irq))
 				continue;
 
-			desc = irq_to_desc_alloc_cpu(irq, cpu);
+			desc = irq_to_desc_alloc_node(irq, node);
 			if (!desc) {
 				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 				continue;
 			}
 			cfg = desc->chip_data;
-			add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+			add_pin_to_irq_node(cfg, node, apic_id, pin);
 
 			setup_IO_APIC_irq(apic_id, pin, irq, desc,
 					irq_trigger(idx), irq_polarity(idx));
@@ -2863,7 +2857,7 @@ static inline void __init check_timer(void)
 {
 	struct irq_desc *desc = irq_to_desc(0);
 	struct irq_cfg *cfg = desc->chip_data;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -2929,7 +2923,7 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+			add_pin_to_irq_node(cfg, node, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		} else {
 			/* for edge trigger, setup_IO_APIC_irq already
@@ -2966,7 +2960,7 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -3185,7 +3179,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3194,7 +3188,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new < nr_irqs; new++) {
-		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		desc_new = irq_to_desc_alloc_node(new, node);
 		if (!desc_new) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", new);
 			continue;
@@ -3968,7 +3962,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 {
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -3976,7 +3970,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 		return -EINVAL;
 	}
 
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	desc = irq_to_desc_alloc_node(irq, node);
 	if (!desc) {
 		printk(KERN_INFO "can not get irq_desc %d\n", irq);
 		return 0;
@@ -3987,7 +3981,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 	 */
 	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+		add_pin_to_irq_node(cfg, node, ioapic, pin);
 	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ca7ec44..45acbcf 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -636,7 +636,7 @@ static void __init lguest_init_IRQ(void)
 
 void lguest_setup_irq(unsigned int irq)
 {
-	irq_to_desc_alloc_cpu(irq, 0);
+	irq_to_desc_alloc_node(irq, 0);
 	set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
 				      handle_level_irq, "level");
 }
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f5e0ea7..9eff36a 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -23,15 +23,12 @@ struct irq_2_iommu {
 };
 
 #ifdef CONFIG_GENERIC_HARDIRQS
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
 {
 	struct irq_2_iommu *iommu;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+	printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
 
 	return iommu;
 }
@@ -48,7 +45,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 	return desc->irq_2_iommu;
 }
 
-static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	struct irq_2_iommu *irq_iommu;
@@ -56,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
 	/*
 	 * alloc irq desc if not allocated already.
 	 */
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	desc = irq_to_desc_alloc_node(irq, node);
 	if (!desc) {
 		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 		return NULL;
@@ -65,14 +62,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
 	irq_iommu = desc->irq_2_iommu;
 
 	if (!irq_iommu)
-		desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(node);
 
 	return desc->irq_2_iommu;
 }
 
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
-	return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+	return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id));
 }
 
 #else /* !CONFIG_SPARSE_IRQ */
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 3338988..be437c2 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -335,7 +335,7 @@ static int find_unbound_irq(void)
 	if (irq == nr_irqs)
 		panic("No available IRQ to bind to: increase nr_irqs!\n");
 
-	desc = irq_to_desc_alloc_cpu(irq, 0);
+	desc = irq_to_desc_alloc_node(irq, 0);
 	if (WARN_ON(desc == NULL))
 		return -1;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 91bb76f..ff374ce 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -566,6 +566,6 @@ struct irq_desc;
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int arch_init_chip_data(struct irq_desc *desc, int node);
 
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8e4c18b..a09baf8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -187,7 +187,7 @@ struct irq_desc {
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
-	unsigned int		cpu;
+	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
@@ -201,16 +201,16 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-					struct irq_desc *desc, int cpu);
+					struct irq_desc *desc, int node);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
 #else /* CONFIG_SPARSE_IRQ */
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
 #endif /* CONFIG_SPARSE_IRQ */
 
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
 /*
  * Migration helpers for obsolete names, they will go away:
@@ -422,12 +422,10 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
  */
-static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 								bool boot)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
-	int node;
-
 	if (boot) {
 		alloc_bootmem_cpumask_var(&desc->affinity);
 
@@ -437,8 +435,6 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
 		return true;
 	}
 
-	node = cpu_to_node(cpu);
-
 	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
 		return false;
 
@@ -494,7 +490,7 @@ static inline void free_desc_masks(struct irq_desc *old_desc,
 
 #else /* !CONFIG_SMP */
 
-static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 								bool boot)
 {
 	return true;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3e0cbc4..a6368db 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -81,12 +81,10 @@ static struct irq_desc irq_desc_init = {
 	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
-void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void init_kstat_irqs(struct irq_desc *desc, int node, int nr)
 {
-	int node;
 	void *ptr;
 
-	node = cpu_to_node(cpu);
 	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
 
 	/*
@@ -94,33 +92,32 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 	 * init_copy_kstat_irqs() could still use old one
 	 */
 	if (ptr) {
-		printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
-			 cpu, node);
+		printk(KERN_DEBUG "  alloc kstat_irqs on node %d\n", node);
 		desc->kstat_irqs = ptr;
 	}
 }
 
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 
 	spin_lock_init(&desc->lock);
 	desc->irq = irq;
 #ifdef CONFIG_SMP
-	desc->cpu = cpu;
+	desc->node = node;
 #endif
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_kstat_irqs(desc, cpu, nr_cpu_ids);
+	init_kstat_irqs(desc, node, nr_cpu_ids);
 	if (!desc->kstat_irqs) {
 		printk(KERN_ERR "can not alloc kstat_irqs\n");
 		BUG_ON(1);
 	}
-	if (!alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, node, false)) {
 		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
 		BUG_ON(1);
 	}
 	init_desc_masks(desc);
-	arch_init_chip_data(desc, cpu);
+	arch_init_chip_data(desc, node);
 }
 
 /*
@@ -189,11 +186,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
-	int node;
 
 	if (irq >= nr_irqs) {
 		WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
@@ -212,15 +208,13 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
 	if (desc)
 		goto out_unlock;
 
-	node = cpu_to_node(cpu);
 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
-		 irq, cpu, node);
+	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {
 		printk(KERN_ERR "can not alloc irq_desc\n");
 		BUG_ON(1);
 	}
-	init_one_irq_desc(irq, desc, cpu);
+	init_one_irq_desc(irq, desc, node);
 
 	irq_desc_ptrs[irq] = desc;
 
@@ -270,7 +264,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	return irq_to_desc(irq);
 }
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index de5f412..7346825 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
 extern struct lock_class_key irq_desc_lock_class;
-extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
 extern spinlock_t sparse_irq_lock;
 
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ce72bc3..2f69bee 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -15,9 +15,9 @@
 
 static void init_copy_kstat_irqs(struct irq_desc *old_desc,
 				 struct irq_desc *desc,
-				 int cpu, int nr)
+				 int node, int nr)
 {
-	init_kstat_irqs(desc, cpu, nr);
+	init_kstat_irqs(desc, node, nr);
 
 	if (desc->kstat_irqs != old_desc->kstat_irqs)
 		memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
@@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
 }
 
 static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
-		 struct irq_desc *desc, int cpu)
+		 struct irq_desc *desc, int node)
 {
 	memcpy(desc, old_desc, sizeof(struct irq_desc));
-	if (!alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, node, false)) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
 				"for migration.\n", irq);
 		return false;
 	}
 	spin_lock_init(&desc->lock);
-	desc->cpu = cpu;
+	desc->node = node;
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
 	init_copy_desc_masks(old_desc, desc);
-	arch_init_copy_chip_data(old_desc, desc, cpu);
+	arch_init_copy_chip_data(old_desc, desc, node);
 	return true;
 }
 
@@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
 }
 
 static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
-						int cpu)
+						int node)
 {
 	struct irq_desc *desc;
 	unsigned int irq;
 	unsigned long flags;
-	int node;
 
 	irq = old_desc->irq;
 
@@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 	if (desc && old_desc != desc)
 		goto out_unlock;
 
-	node = cpu_to_node(cpu);
 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
 	if (!desc) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc "
@@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 		desc = old_desc;
 		goto out_unlock;
 	}
-	if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+	if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
 		/* still use old one */
 		kfree(desc);
 		desc = old_desc;
@@ -107,24 +105,14 @@ out_unlock:
 	return desc;
 }
 
-struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
-	int old_cpu;
-	int node, old_node;
-
 	/* those all static, do move them */
 	if (desc->irq < NR_IRQS_LEGACY)
 		return desc;
 
-	old_cpu = desc->cpu;
-	if (old_cpu != cpu) {
-		node = cpu_to_node(cpu);
-		old_node = cpu_to_node(old_cpu);
-		if (old_node != node)
-			desc = __real_move_irq_desc(desc, cpu);
-		else
-			desc->cpu = cpu;
-	}
+	if (desc->node != node)
+		desc = __real_move_irq_desc(desc, node);
 
 	return desc;
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b525dd3..f674f33 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -828,7 +828,7 @@ int __init __weak arch_early_irq_init(void)
 	return 0;
 }
 
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	return 0;
 }
-- 
cgit v0.10.2


From a2f809b08ae4dddc1015c7dcd8659e5729e45b3e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:01:20 -0700
Subject: irq: change ACPI GSI APIs to also take a device argument

We want to use dev_to_node() later on, to be aware of the 'home node'
of the GSI in question.

[ Impact: cleanup, prepare the IRQ code to be more NUMA aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Len Brown <lenb@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Len Brown <lenb@kernel.org>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-acpi@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
LKML-Reference: <49F65560.20904@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5510317..baec6f0 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
 		return gsi;
@@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 
 	fadt = (struct acpi_table_fadt *)fadt_header;
 
-	acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+	acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+				 ACPI_ACTIVE_LOW);
 	return 0;
 }
 
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e4..07f2913 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,8 +154,8 @@ extern int timer_through_8259;
 extern int io_apic_get_unique_id(int ioapic, int apic_id);
 extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
-				   int edge_level, int active_high_low);
+extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin,
+				  int irq, int edge_level, int active_high_low);
 #endif /* CONFIG_ACPI */
 
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 642fc7f..3ea1f53 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -72,7 +72,9 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
 extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
-extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+struct device;
+extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
+				 int active_high_low);
 extern int acpi_probe_gsi(void);
 #ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 723989d..6ee96b5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -522,7 +522,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	unsigned int irq;
 	unsigned int plat_gsi = gsi;
@@ -539,7 +539,7 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
 
 #ifdef CONFIG_X86_IO_APIC
 	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
-		plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+		plat_gsi = mp_register_gsi(dev, gsi, triggering, polarity);
 	}
 #endif
 	acpi_gsi_to_irq(plat_gsi, &irq);
@@ -1158,7 +1158,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	}
 }
 
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	int ioapic;
 	int ioapic_pin;
@@ -1253,7 +1253,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 		}
 	}
 #endif
-	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+	io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi,
 				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
 				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
 	return gsi;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 560b887..d934662 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3958,7 +3958,8 @@ int __init io_apic_get_version(int ioapic)
 }
 #endif
 
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
+int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
+				 int triggering, int polarity)
 {
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 51b9f82..2faa9e2 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		/* Interrupt Line values above 0xF are forbidden */
 		if (dev->irq > 0 && (dev->irq <= 0xF)) {
 			printk(" - using IRQ %d\n", dev->irq);
-			acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE,
+			acpi_register_gsi(&dev->dev, dev->irq,
+					  ACPI_LEVEL_SENSITIVE,
 					  ACPI_ACTIVE_LOW);
 			return 0;
 		} else {
@@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		}
 	}
 
-	rc = acpi_register_gsi(gsi, triggering, polarity);
+	rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
 	if (rc < 0) {
 		dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
 			 pin_name(pin));
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 340ba4f..4a9f349 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
 			break;
 		}
 
-		gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE,
+		gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
 					ACPI_ACTIVE_LOW);
 		if (gsi > 0)
 			break;
@@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 		irqp = &res->data.extended_irq;
 
 		for (i = 0; i < irqp->interrupt_count; i++) {
-			irq = acpi_register_gsi(irqp->interrupts[i],
+			irq = acpi_register_gsi(NULL, irqp->interrupts[i],
 				      irqp->triggering, irqp->polarity);
 			if (irq < 0)
 				return AE_ERROR;
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index adf1785..7f207f3 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev,
 	}
 
 	flags = irq_flags(triggering, polarity, shareable);
-	irq = acpi_register_gsi(gsi, triggering, polarity);
+	irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
 	if (irq >= 0)
 		pcibios_penalize_isa_irq(irq, 1);
 	else
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88be890..51b4b0a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num;
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
-int acpi_register_gsi (u32 gsi, int triggering, int polarity);
+int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
 #ifdef CONFIG_X86_IO_APIC
-- 
cgit v0.10.2


From 024154cfdd802654cb236a18c78b6e37351e2c49 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:01:50 -0700
Subject: irq: change io_apic_set_pci_routing() to use device parameter

Make actual use of the device parameter passed down to
io_apic_set_pci_routing() - to have the IRQ descriptor
on the home node of the device.

If no device has been passed down, we assume it's a platform
device and use the boot node ID for the IRQ descriptor.

[ Impact: optimization, make IO-APIC code more NUMA aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F6557E.3080101@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d934662..82376e0 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3963,7 +3963,7 @@ int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 {
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int node = cpu_to_node(boot_cpu_id);
+	int node;
 
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -3971,6 +3971,11 @@ int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 		return -EINVAL;
 	}
 
+	if (dev)
+		node = dev_to_node(dev);
+	else
+		node = cpu_to_node(boot_cpu_id);
+
 	desc = irq_to_desc_alloc_node(irq, node);
 	if (!desc) {
 		printk(KERN_INFO "can not get irq_desc %d\n", irq);
-- 
cgit v0.10.2


From d047f53a2ecce37e3bdf79eac5a326fbaadb3628 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:02:23 -0700
Subject: x86/irq: change MSI irq_desc to be more numa aware

Try to get irq_desc on the home node in create_irq_nr().

v2: don't check if we can move it when sparse_irq is not used
v3: use move_irq_des, if that node is not what we want

[ Impact: optimization, make MSI IRQ descriptors more NUMA aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F6559F.7070005@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 82376e0..9cd4806 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3172,14 +3172,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
 	/* Allocate an unused irq */
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int node = cpu_to_node(boot_cpu_id);
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3197,6 +3196,13 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 		if (cfg_new->vector != 0)
 			continue;
+
+#ifdef CONFIG_NUMA_IRQ_DESC
+		/* different node ?*/
+		if (desc_new->node != node)
+			desc = move_irq_desc(desc, node);
+#endif
+
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
 		break;
@@ -3214,11 +3220,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
+	int node = cpu_to_node(boot_cpu_id);
 	unsigned int irq_want;
 	int irq;
 
 	irq_want = nr_irqs_gsi;
-	irq = create_irq_nr(irq_want);
+	irq = create_irq_nr(irq_want, node);
 
 	if (irq == 0)
 		irq = -1;
@@ -3476,15 +3483,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	unsigned int irq_want;
 	struct intel_iommu *iommu = NULL;
 	int index = 0;
+	int node;
 
 	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
+	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want);
+		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -1;
 		irq_want = irq + 1;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index a09baf8..4b95ddb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -376,7 +376,7 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
-extern unsigned int create_irq_nr(unsigned int irq_want);
+extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-- 
cgit v0.10.2


From 56b581ea9591b5767b1e0204c6a06c7d0c49396e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:02:46 -0700
Subject: irq: make ht irq_desc more numa aware

Try to get irq_desc on the same node as create_irq_nr().

[ Impact: optimization, make HT IRQs more NUMA-aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F655B6.8020109@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index bf7d6ce..4e9dd0f 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -98,6 +98,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	int max_irq;
 	int pos;
 	int irq;
+	int node;
 
 	pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
 	if (!pos)
@@ -125,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 	cfg->msg.address_lo = 0xffffffff;
 	cfg->msg.address_hi = 0xffffffff;
 
-	irq = create_irq();
+	node = dev_to_node(&dev->dev);
+	irq = create_irq_nr(0, node);
 
 	if (irq <= 0) {
 		kfree(cfg);
-- 
cgit v0.10.2


From 2a2ed0dfc9ec44a899c7d4672f73f2c045099118 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Apr 2009 13:01:26 +0200
Subject: ALSA: hda - Don't enable auto-mute but for speakers in
 patch_realtek.c

Enable auto-muting in model=auto only for devices with HP and speakers.
For devices with HP and line-outs, don't enable the auto-muting.

Also, add a debug print to show the auto-mute feature.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3a63063..96475dc 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1099,13 +1099,16 @@ static void alc_init_auto_hp(struct hda_codec *codec)
 		return;
 
 	if (!spec->autocfg.speaker_pins[0]) {
-		if (spec->autocfg.line_out_pins[0])
+		if (spec->autocfg.line_out_pins[0] &&
+		    spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT)
 			spec->autocfg.speaker_pins[0] =
 				spec->autocfg.line_out_pins[0];
 		else
 			return;
 	}
 
+	snd_printdd("realtek: Enable HP auto-muting on NID 0x%x\n",
+		    spec->autocfg.hp_pins[0]);
 	snd_hda_codec_write_cache(codec, spec->autocfg.hp_pins[0], 0,
 				  AC_VERB_SET_UNSOLICITED_ENABLE,
 				  AC_USRSP_EN | ALC880_HP_EVENT);
-- 
cgit v0.10.2


From cb6605c1e4d2a2eaffdde433fbfe1567ca688458 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Apr 2009 13:03:19 +0200
Subject: ALSA: hda - Fix a typo in patch_realtek.c again

The commmit dfed0ef9b3ff9e37903920b6938ed33344ad0b3d was reverted
accidentally by the merge of auto-detection fix patch.

Fixed again now.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 96475dc..3e7207b 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1151,7 +1151,7 @@ static int alc_subsystem_id(struct hda_codec *codec,
 	ass = snd_hda_codec_get_pincfg(codec, nid);
 	snd_printd("realtek: No valid SSID, "
 		   "checking pincfg 0x%08x for NID 0x%x\n",
-		   nid, ass);
+		   ass, nid);
 	if (!(ass & 1) && !(ass & 0x100000))
 		return 0;
 	if ((ass >> 30) != 1)	/* no physical connection */
-- 
cgit v0.10.2


From 51e95bed6263f6a2e558ca04fc4ff877f3b0cca3 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 24 Apr 2009 16:26:25 -0700
Subject: slub: add Documentation/ABI/testing/sysfs-kernel-slab

Adds documentation for the slub ABI.

This is placed in the `testing' directory since the meanings of these
files are still subject to change as slub is developed.

Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
new file mode 100644
index 0000000..6dcf75e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -0,0 +1,479 @@
+What:		/sys/kernel/slab
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The /sys/kernel/slab directory contains a snapshot of the
+		internal state of the SLUB allocator for each cache.  Certain
+		files may be modified to change the behavior of the cache (and
+		any cache it aliases, if any).
+Users:		kernel memory tuning tools
+
+What:		/sys/kernel/slab/cache/aliases
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The aliases file is read-only and specifies how many caches
+		have merged into this cache.
+
+What:		/sys/kernel/slab/cache/align
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The align file is read-only and specifies the cache's object
+		alignment in bytes.
+
+What:		/sys/kernel/slab/cache/alloc_calls
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The alloc_calls file is read-only and lists the kernel code
+		locations from which allocations for this cache were performed.
+		The alloc_calls file only contains information if debugging is
+		enabled for that cache (see Documentation/vm/slub.txt).
+
+What:		/sys/kernel/slab/cache/alloc_fastpath
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The alloc_fastpath file is read-only and specifies how many
+		objects have been allocated using the fast path.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/alloc_from_partial
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The alloc_from_partial file is read-only and specifies how
+		many times a cpu slab has been full and it has been refilled
+		by using a slab from the list of partially used slabs.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/alloc_refill
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The alloc_refill file is read-only and specifies how many
+		times the per-cpu freelist was empty but there were objects
+		available as the result of remote cpu frees.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/alloc_slab
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The alloc_slab file is read-only and specifies how many times
+		a new slab had to be allocated from the page allocator.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/alloc_slowpath
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The alloc_slowpath file is read-only and specifies how many
+		objects have been allocated using the slow path because of a
+		refill or allocation from a partial or new slab.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/cache_dma
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The cache_dma file is read-only and specifies whether objects
+		are from ZONE_DMA.
+		Available when CONFIG_ZONE_DMA is enabled.
+
+What:		/sys/kernel/slab/cache/cpu_slabs
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The cpu_slabs file is read-only and displays how many cpu slabs
+		are active and their NUMA locality.
+
+What:		/sys/kernel/slab/cache/cpuslab_flush
+Date:		April 2009
+KernelVersion:	2.6.31
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file cpuslab_flush is read-only and specifies how many
+		times a cache's cpu slabs have been flushed as the result of
+		destroying or shrinking a cache, a cpu going offline, or as
+		the result of forcing an allocation from a certain node.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/ctor
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The ctor file is read-only and specifies the cache's object
+		constructor function, which is invoked for each object when a
+		new slab is allocated.
+
+What:		/sys/kernel/slab/cache/deactivate_empty
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file deactivate_empty is read-only and specifies how many
+		times an empty cpu slab was deactivated.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/deactivate_full
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file deactivate_full is read-only and specifies how many
+		times a full cpu slab was deactivated.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/deactivate_remote_frees
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file deactivate_remote_frees is read-only and specifies how
+		many times a cpu slab has been deactivated and contained free
+		objects that were freed remotely.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/deactivate_to_head
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file deactivate_to_head is read-only and specifies how
+		many times a partial cpu slab was deactivated and added to the
+		head of its node's partial list.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/deactivate_to_tail
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file deactivate_to_tail is read-only and specifies how
+		many times a partial cpu slab was deactivated and added to the
+		tail of its node's partial list.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/destroy_by_rcu
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The destroy_by_rcu file is read-only and specifies whether
+		slabs (not objects) are freed by rcu.
+
+What:		/sys/kernel/slab/cache/free_add_partial
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file free_add_partial is read-only and specifies how many
+		times an object has been freed in a full slab so that it had to
+		added to its node's partial list.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/free_calls
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The free_calls file is read-only and lists the locations of
+		object frees if slab debugging is enabled (see
+		Documentation/vm/slub.txt).
+
+What:		/sys/kernel/slab/cache/free_fastpath
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The free_fastpath file is read-only and specifies how many
+		objects have been freed using the fast path because it was an
+		object from the cpu slab.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/free_frozen
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The free_frozen file is read-only and specifies how many
+		objects have been freed to a frozen slab (i.e. a remote cpu
+		slab).
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/free_remove_partial
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file free_remove_partial is read-only and specifies how
+		many times an object has been freed to a now-empty slab so
+		that it had to be removed from its node's partial list.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/free_slab
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The free_slab file is read-only and specifies how many times an
+		empty slab has been freed back to the page allocator.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/free_slowpath
+Date:		February 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The free_slowpath file is read-only and specifies how many
+		objects have been freed using the slow path (i.e. to a full or
+		partial slab).
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/hwcache_align
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The hwcache_align file is read-only and specifies whether
+		objects are aligned on cachelines.
+
+What:		/sys/kernel/slab/cache/min_partial
+Date:		February 2009
+KernelVersion:	2.6.30
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		David Rientjes <rientjes@google.com>
+Description:
+		The min_partial file specifies how many empty slabs shall
+		remain on a node's partial list to avoid the overhead of
+		allocating new slabs.  Such slabs may be reclaimed by utilizing
+		the shrink file.
+
+What:		/sys/kernel/slab/cache/object_size
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The object_size file is read-only and specifies the cache's
+		object size.
+
+What:		/sys/kernel/slab/cache/objects
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The objects file is read-only and displays how many objects are
+		active and from which nodes they are from.
+
+What:		/sys/kernel/slab/cache/objects_partial
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The objects_partial file is read-only and displays how many
+		objects are on partial slabs and from which nodes they are
+		from.
+
+What:		/sys/kernel/slab/cache/objs_per_slab
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file objs_per_slab is read-only and specifies how many
+		objects may be allocated from a single slab of the order
+		specified in /sys/kernel/slab/cache/order.
+
+What:		/sys/kernel/slab/cache/order
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The order file specifies the page order at which new slabs are
+		allocated.  It is writable and can be changed to increase the
+		number of objects per slab.  If a slab cannot be allocated
+		because of fragmentation, SLUB will retry with the minimum order
+		possible depending on its characteristics.
+
+What:		/sys/kernel/slab/cache/order_fallback
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file order_fallback is read-only and specifies how many
+		times an allocation of a new slab has not been possible at the
+		cache's order and instead fallen back to its minimum possible
+		order.
+		Available when CONFIG_SLUB_STATS is enabled.
+
+What:		/sys/kernel/slab/cache/partial
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The partial file is read-only and displays how long many
+		partial slabs there are and how long each node's list is.
+
+What:		/sys/kernel/slab/cache/poison
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The poison file specifies whether objects should be poisoned
+		when a new slab is allocated.
+
+What:		/sys/kernel/slab/cache/reclaim_account
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The reclaim_account file specifies whether the cache's objects
+		are reclaimable (and grouped by their mobility).
+
+What:		/sys/kernel/slab/cache/red_zone
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The red_zone file specifies whether the cache's objects are red
+		zoned.
+
+What:		/sys/kernel/slab/cache/remote_node_defrag_ratio
+Date:		January 2008
+KernelVersion:	2.6.25
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The file remote_node_defrag_ratio specifies the percentage of
+		times SLUB will attempt to refill the cpu slab with a partial
+		slab from a remote node as opposed to allocating a new slab on
+		the local node.  This reduces the amount of wasted memory over
+		the entire system but can be expensive.
+		Available when CONFIG_NUMA is enabled.
+
+What:		/sys/kernel/slab/cache/sanity_checks
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The sanity_checks file specifies whether expensive checks
+		should be performed on free and, at minimum, enables double free
+		checks.  Caches that enable sanity_checks cannot be merged with
+		caches that do not.
+
+What:		/sys/kernel/slab/cache/shrink
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The shrink file is written when memory should be reclaimed from
+		a cache.  Empty partial slabs are freed and the partial list is
+		sorted so the slabs with the fewest available objects are used
+		first.
+
+What:		/sys/kernel/slab/cache/slab_size
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The slab_size file is read-only and specifies the object size
+		with metadata (debugging information and alignment) in bytes.
+
+What:		/sys/kernel/slab/cache/slabs
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The slabs file is read-only and displays how long many slabs
+		there are (both cpu and partial) and from which nodes they are
+		from.
+
+What:		/sys/kernel/slab/cache/store_user
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The store_user file specifies whether the location of
+		allocation or free should be tracked for a cache.
+
+What:		/sys/kernel/slab/cache/total_objects
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The total_objects file is read-only and displays how many total
+		objects a cache has and from which nodes they are from.
+
+What:		/sys/kernel/slab/cache/trace
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		The trace file specifies whether object allocations and frees
+		should be traced.
+
+What:		/sys/kernel/slab/cache/validate
+Date:		May 2007
+KernelVersion:	2.6.22
+Contact:	Pekka Enberg <penberg@cs.helsinki.fi>,
+		Christoph Lameter <cl@linux-foundation.org>
+Description:
+		Writing to the validate file causes SLUB to traverse all of its
+		cache's objects and check the validity of metadata.
-- 
cgit v0.10.2


From 4278600644dee621bd50d7498e244b135612e0f6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 28 Apr 2009 23:12:10 +0900
Subject: sh: register the rtc-generic platform device properly.

The device registration was accidentally omitted, add it back in. Do some
basic device probing as well, so this doesn't show up for platforms that
tie in to the RTC interface properly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index a2458bf..4573321 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
  *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002 - 2008  Paul Mundt
+ *  Copyright (C) 2002 - 2009  Paul Mundt
  *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
  *
  *  Some code taken from i386 version.
@@ -68,6 +68,21 @@ int set_rtc_time(struct rtc_time *tm)
 }
 EXPORT_SYMBOL(set_rtc_time);
 
+static int __init rtc_generic_init(void)
+{
+	struct platform_device *pdev;
+
+	if (rtc_sh_get_time == null_rtc_get_time)
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+module_init(rtc_generic_init);
+
 #ifndef CONFIG_GENERIC_TIME
 void do_gettimeofday(struct timeval *tv)
 {
-- 
cgit v0.10.2


From 1130b0296184bc21806225fd06d533515a99d2db Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 28 Apr 2009 14:56:18 +0200
Subject: perf_counter tools: fix Documentation/perf_counter build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mike Galbraith reported:

> marge:..Documentation/perf_counter # make
>     CC builtin-stat.o
> In file included from builtin-stat.c:71:
> /usr/include/ctype.h:102: error: expected expression before ‘]’ token

Remove the ctype.h include.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index d7ace63..112b94e 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -68,7 +68,6 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include <errno.h>
-#include <ctype.h>
 #include <time.h>
 #include <sched.h>
 #include <pthread.h>
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index dea016f..6a276d2 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -49,7 +49,6 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include <errno.h>
-#include <ctype.h>
 #include <time.h>
 #include <sched.h>
 #include <pthread.h>
-- 
cgit v0.10.2


From 6b2db28a7a2da1064df9e179d9b6d07b0bfe156a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Apr 2009 10:54:49 +0900
Subject: fuse: misc cleanups

* fuse_file_alloc() was structured in weird way.  The success path was
  split between else block and code following the block.  Restructure
  the code such that it's easier to read and modify.

* Unindent success path of fuse_release_common() to ease future
  changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06f30e9..028e17d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -49,22 +49,26 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
 	struct fuse_file *ff;
+
 	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
-	if (ff) {
-		ff->reserved_req = fuse_request_alloc();
-		if (!ff->reserved_req) {
-			kfree(ff);
-			return NULL;
-		} else {
-			INIT_LIST_HEAD(&ff->write_entry);
-			atomic_set(&ff->count, 0);
-			spin_lock(&fc->lock);
-			ff->kh = ++fc->khctr;
-			spin_unlock(&fc->lock);
-		}
-		RB_CLEAR_NODE(&ff->polled_node);
-		init_waitqueue_head(&ff->poll_wait);
+	if (unlikely(!ff))
+		return NULL;
+
+	ff->reserved_req = fuse_request_alloc();
+	if (unlikely(!ff->reserved_req)) {
+		kfree(ff);
+		return NULL;
 	}
+
+	INIT_LIST_HEAD(&ff->write_entry);
+	atomic_set(&ff->count, 0);
+	RB_CLEAR_NODE(&ff->polled_node);
+	init_waitqueue_head(&ff->poll_wait);
+
+	spin_lock(&fc->lock);
+	ff->kh = ++fc->khctr;
+	spin_unlock(&fc->lock);
+
 	return ff;
 }
 
@@ -158,34 +162,37 @@ void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
 
 int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 {
-	struct fuse_file *ff = file->private_data;
-	if (ff) {
-		struct fuse_conn *fc = get_fuse_conn(inode);
-		struct fuse_req *req = ff->reserved_req;
+	struct fuse_conn *fc;
+	struct fuse_file *ff;
+	struct fuse_req *req;
 
-		fuse_release_fill(ff, get_node_id(inode), file->f_flags,
-				  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
+	ff = file->private_data;
+	if (unlikely(!ff))
+		return 0;	/* return value is ignored by VFS */
 
-		/* Hold vfsmount and dentry until release is finished */
-		req->misc.release.vfsmount = mntget(file->f_path.mnt);
-		req->misc.release.dentry = dget(file->f_path.dentry);
+	fc = get_fuse_conn(inode);
+	req = ff->reserved_req;
 
-		spin_lock(&fc->lock);
-		list_del(&ff->write_entry);
-		if (!RB_EMPTY_NODE(&ff->polled_node))
-			rb_erase(&ff->polled_node, &fc->polled_files);
-		spin_unlock(&fc->lock);
+	fuse_release_fill(ff, get_node_id(inode), file->f_flags,
+			  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
 
-		wake_up_interruptible_sync(&ff->poll_wait);
-		/*
-		 * Normally this will send the RELEASE request,
-		 * however if some asynchronous READ or WRITE requests
-		 * are outstanding, the sending will be delayed
-		 */
-		fuse_file_put(ff);
-	}
+	/* Hold vfsmount and dentry until release is finished */
+	req->misc.release.vfsmount = mntget(file->f_path.mnt);
+	req->misc.release.dentry = dget(file->f_path.dentry);
+
+	spin_lock(&fc->lock);
+	list_del(&ff->write_entry);
+	if (!RB_EMPTY_NODE(&ff->polled_node))
+		rb_erase(&ff->polled_node, &fc->polled_files);
+	spin_unlock(&fc->lock);
 
-	/* Return value is ignored by VFS */
+	wake_up_interruptible_sync(&ff->poll_wait);
+	/*
+	 * Normally this will send the RELEASE request, however if
+	 * some asynchronous READ or WRITE requests are outstanding,
+	 * the sending will be delayed.
+	 */
+	fuse_file_put(ff);
 	return 0;
 }
 
-- 
cgit v0.10.2


From b0be46ebf72ca7478c1c4bd0153c42f90e768a03 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:36 +0200
Subject: fuse: use struct path in release structure

Use struct path instead of separate dentry and vfsmount in
req->misc.release.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 028e17d..3c8fa93 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,15 +86,14 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	dput(req->misc.release.dentry);
-	mntput(req->misc.release.vfsmount);
+	path_put(&req->misc.release.path);
 }
 
 static void fuse_file_put(struct fuse_file *ff)
 {
 	if (atomic_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
-		struct inode *inode = req->misc.release.dentry->d_inode;
+		struct inode *inode = req->misc.release.path.dentry->d_inode;
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		req->end = fuse_release_end;
 		fuse_request_send_background(fc, req);
@@ -177,8 +176,8 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 			  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
 
 	/* Hold vfsmount and dentry until release is finished */
-	req->misc.release.vfsmount = mntget(file->f_path.mnt);
-	req->misc.release.dentry = dget(file->f_path.dentry);
+	path_get(&file->f_path);
+	req->misc.release.path = file->f_path;
 
 	spin_lock(&fc->lock);
 	list_del(&ff->write_entry);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6fc5aed..146317f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -248,8 +248,7 @@ struct fuse_req {
 		struct fuse_forget_in forget_in;
 		struct {
 			struct fuse_release_in in;
-			struct vfsmount *vfsmount;
-			struct dentry *dentry;
+			struct path path;
 		} release;
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
-- 
cgit v0.10.2


From 2d698b070382442f817813b9dd0103034e5bca81 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:36 +0200
Subject: fuse: clean up fuse_write_fill()

Move out code from fuse_write_fill() which is not common to all
callers.  Remove two function arguments which become unnecessary.

Also remove unnecessary memset(), the request is already initialized
to zero.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3c8fa93..823f84a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -613,20 +613,16 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	return generic_file_aio_read(iocb, iov, nr_segs, pos);
 }
 
-static void fuse_write_fill(struct fuse_req *req, struct file *file,
-			    struct fuse_file *ff, struct inode *inode,
-			    loff_t pos, size_t count, int writepage)
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
+			    struct inode *inode, loff_t pos, size_t count)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_write_in *inarg = &req->misc.write.in;
 	struct fuse_write_out *outarg = &req->misc.write.out;
 
-	memset(inarg, 0, sizeof(struct fuse_write_in));
 	inarg->fh = ff->fh;
 	inarg->offset = pos;
 	inarg->size = count;
-	inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
-	inarg->flags = file ? file->f_flags : 0;
 	req->in.h.opcode = FUSE_WRITE;
 	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 2;
@@ -646,9 +642,11 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
 			      fl_owner_t owner)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
+	struct fuse_write_in *inarg = &req->misc.write.in;
+
+	fuse_write_fill(req, file->private_data, inode, pos, count);
+	inarg->flags = file->f_flags;
 	if (owner != NULL) {
-		struct fuse_write_in *inarg = &req->misc.write.in;
 		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
 		inarg->lock_owner = fuse_lock_owner_id(fc, owner);
 	}
@@ -1183,9 +1181,10 @@ static int fuse_writepage_locked(struct page *page)
 	req->ff = fuse_file_get(ff);
 	spin_unlock(&fc->lock);
 
-	fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+	fuse_write_fill(req, ff, inode, page_offset(page), 0);
 
 	copy_highpage(tmp_page, page);
+	req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
 	req->in.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = tmp_page;
-- 
cgit v0.10.2


From d09cb9d7f6e4cb1dd0cf12ee0d352440291c74cf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:36 +0200
Subject: fuse: prepare fuse_direct_io() for CUSE

Move code operating on the inode out from fuse_direct_io().

This prepares this function for use by CUSE, where the inode is not
owned by a fuse filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 823f84a..b83d7d8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -993,9 +993,6 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 	ssize_t res = 0;
 	struct fuse_req *req;
 
-	if (is_bad_inode(inode))
-		return -EIO;
-
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1038,12 +1035,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		}
 	}
 	fuse_put_request(fc, req);
-	if (res > 0) {
-		if (write)
-			fuse_write_update_size(inode, pos);
+	if (res > 0)
 		*ppos = pos;
-	}
-	fuse_invalidate_attr(inode);
 
 	return res;
 }
@@ -1051,7 +1044,17 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 				     size_t count, loff_t *ppos)
 {
-	return fuse_direct_io(file, buf, count, ppos, 0);
+	ssize_t res;
+	struct inode *inode = file->f_path.dentry->d_inode;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	res = fuse_direct_io(file, buf, count, ppos, 0);
+
+	fuse_invalidate_attr(inode);
+
+	return res;
 }
 
 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
@@ -1059,12 +1062,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t res;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
 	/* Don't allow parallel writes to the same file */
 	mutex_lock(&inode->i_mutex);
 	res = generic_write_checks(file, ppos, &count, 0);
-	if (!res)
+	if (!res) {
 		res = fuse_direct_io(file, buf, count, ppos, 1);
+		if (res > 0)
+			fuse_write_update_size(inode, *ppos);
+	}
 	mutex_unlock(&inode->i_mutex);
+
+	fuse_invalidate_attr(inode);
+
 	return res;
 }
 
-- 
cgit v0.10.2


From da5e4714578ff323f7a61af490fc3539e68f188b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:36 +0200
Subject: fuse: add members to struct fuse_file

Add new members ->fc and ->nodeid to struct fuse_file.  This will aid
in converting functions for use by CUSE, where the inode is not owned
by a fuse filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b83d7d8..3be0301 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -54,6 +54,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 	if (unlikely(!ff))
 		return NULL;
 
+	ff->fc = fc;
 	ff->reserved_req = fuse_request_alloc();
 	if (unlikely(!ff->reserved_req)) {
 		kfree(ff);
@@ -111,6 +112,7 @@ void fuse_finish_open(struct inode *inode, struct file *file,
 	if (outarg->open_flags & FOPEN_NONSEEKABLE)
 		nonseekable_open(inode, file);
 	ff->fh = outarg->fh;
+	ff->nodeid = get_node_id(inode);
 	file->private_data = fuse_file_get(ff);
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 146317f..4469d99 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -97,8 +97,13 @@ struct fuse_inode {
 	struct list_head writepages;
 };
 
+struct fuse_conn;
+
 /** FUSE specific file data */
 struct fuse_file {
+	/** Fuse connection for this file */
+	struct fuse_conn *fc;
+
 	/** Request reserved for flush and release */
 	struct fuse_req *reserved_req;
 
@@ -108,6 +113,9 @@ struct fuse_file {
 	/** File handle used by userspace */
 	u64 fh;
 
+	/** Node id of this file */
+	u64 nodeid;
+
 	/** Refcount */
 	atomic_t count;
 
@@ -185,8 +193,6 @@ enum fuse_req_state {
 	FUSE_REQ_FINISHED
 };
 
-struct fuse_conn;
-
 /**
  * A request to the client
  */
-- 
cgit v0.10.2


From 2106cb18930312af9325d3418e138569c5b903cc Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:37 +0200
Subject: fuse: don't use inode in helpers called by fuse_direct_io()

Use ff->fc and ff->nodeid instead of passing down the inode.

This prepares this function for use by CUSE, where the inode is not
owned by a fuse filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8b8eebc..222584b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1035,7 +1035,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+	fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
 	fuse_request_send(fc, req);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3be0301..aa4a387 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -379,8 +379,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 	return fuse_fsync_common(file, de, datasync, 0);
 }
 
-void fuse_read_fill(struct fuse_req *req, struct file *file,
-		    struct inode *inode, loff_t pos, size_t count, int opcode)
+void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
+		    size_t count, int opcode)
 {
 	struct fuse_read_in *inarg = &req->misc.read.in;
 	struct fuse_file *ff = file->private_data;
@@ -390,7 +390,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 	inarg->size = count;
 	inarg->flags = file->f_flags;
 	req->in.h.opcode = opcode;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_read_in);
 	req->in.args[0].value = inarg;
@@ -400,12 +400,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 }
 
 static size_t fuse_send_read(struct fuse_req *req, struct file *file,
-			     struct inode *inode, loff_t pos, size_t count,
-			     fl_owner_t owner)
+			     loff_t pos, size_t count, fl_owner_t owner)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 
-	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+	fuse_read_fill(req, file, pos, count, FUSE_READ);
 	if (owner != NULL) {
 		struct fuse_read_in *inarg = &req->misc.read.in;
 
@@ -463,7 +463,7 @@ static int fuse_readpage(struct file *file, struct page *page)
 	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
-	num_read = fuse_send_read(req, file, inode, pos, count, NULL);
+	num_read = fuse_send_read(req, file, pos, count, NULL);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 
@@ -512,19 +512,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
 		fuse_file_put(req->ff);
 }
 
-static void fuse_send_readpages(struct fuse_req *req, struct file *file,
-				struct inode *inode)
+static void fuse_send_readpages(struct fuse_req *req, struct file *file)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 	loff_t pos = page_offset(req->pages[0]);
 	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
 
 	req->out.argpages = 1;
 	req->out.page_zeroing = 1;
-	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+	fuse_read_fill(req, file, pos, count, FUSE_READ);
 	req->misc.read.attr_ver = fuse_get_attr_version(fc);
 	if (fc->async_read) {
-		struct fuse_file *ff = file->private_data;
 		req->ff = fuse_file_get(ff);
 		req->end = fuse_readpages_end;
 		fuse_request_send_background(fc, req);
@@ -554,7 +553,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
 	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
 	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
 	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
-		fuse_send_readpages(req, data->file, inode);
+		fuse_send_readpages(req, data->file);
 		data->req = req = fuse_get_req(fc);
 		if (IS_ERR(req)) {
 			unlock_page(page);
@@ -588,7 +587,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
 		if (data.req->num_pages)
-			fuse_send_readpages(data.req, file, inode);
+			fuse_send_readpages(data.req, file);
 		else
 			fuse_put_request(fc, data.req);
 	}
@@ -616,9 +615,8 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 }
 
 static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
-			    struct inode *inode, loff_t pos, size_t count)
+			    loff_t pos, size_t count)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_write_in *inarg = &req->misc.write.in;
 	struct fuse_write_out *outarg = &req->misc.write.out;
 
@@ -626,9 +624,9 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
 	inarg->offset = pos;
 	inarg->size = count;
 	req->in.h.opcode = FUSE_WRITE;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 2;
-	if (fc->minor < 9)
+	if (ff->fc->minor < 9)
 		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
 	else
 		req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -640,13 +638,13 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
 }
 
 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
-			      struct inode *inode, loff_t pos, size_t count,
-			      fl_owner_t owner)
+			      loff_t pos, size_t count, fl_owner_t owner)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_write_in *inarg = &req->misc.write.in;
 
-	fuse_write_fill(req, file->private_data, inode, pos, count);
+	fuse_write_fill(req, ff, pos, count);
 	inarg->flags = file->f_flags;
 	if (owner != NULL) {
 		inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -706,7 +704,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
 	req->num_pages = 1;
 	req->pages[0] = page;
 	req->page_offset = offset;
-	nres = fuse_send_write(req, file, inode, pos, count, NULL);
+	nres = fuse_send_write(req, file, pos, count, NULL);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
 	if (!err && !nres)
@@ -747,7 +745,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
 	for (i = 0; i < req->num_pages; i++)
 		fuse_wait_on_page_writeback(inode, req->pages[i]->index);
 
-	res = fuse_send_write(req, file, inode, pos, count, NULL);
+	res = fuse_send_write(req, file, pos, count, NULL);
 
 	offset = req->page_offset;
 	count = res;
@@ -988,8 +986,8 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 			      size_t count, loff_t *ppos, int write)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
 	ssize_t res = 0;
@@ -1001,6 +999,7 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 
 	while (count) {
 		size_t nres;
+		fl_owner_t owner = current->files;
 		size_t nbytes = min(count, nmax);
 		int err = fuse_get_user_pages(req, buf, &nbytes, write);
 		if (err) {
@@ -1009,11 +1008,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		}
 
 		if (write)
-			nres = fuse_send_write(req, file, inode, pos, nbytes,
-					       current->files);
+			nres = fuse_send_write(req, file, pos, nbytes, owner);
 		else
-			nres = fuse_send_read(req, file, inode, pos, nbytes,
-					      current->files);
+			nres = fuse_send_read(req, file, pos, nbytes, owner);
+
 		fuse_release_user_pages(req, !write);
 		if (req->out.h.error) {
 			if (!res)
@@ -1196,7 +1194,7 @@ static int fuse_writepage_locked(struct page *page)
 	req->ff = fuse_file_get(ff);
 	spin_unlock(&fc->lock);
 
-	fuse_write_fill(req, ff, inode, page_offset(page), 0);
+	fuse_write_fill(req, ff, page_offset(page), 0);
 
 	copy_highpage(tmp_page, page);
 	req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4469d99..a51f63c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -520,7 +520,7 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
  * Initialize READ or READDIR request
  */
 void fuse_read_fill(struct fuse_req *req, struct file *file,
-		    struct inode *inode, loff_t pos, size_t count, int opcode);
+		    loff_t pos, size_t count, int opcode);
 
 /**
  * Send OPEN or OPENDIR request
-- 
cgit v0.10.2


From c7b7143c6342b8751d47b03a025ac5c0ac1ae809 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:37 +0200
Subject: fuse: clean up args in fuse_finish_open() and fuse_release_fill()

Move setting ff->fh, ff->nodeid and file->private_data outside
fuse_finish_open().  Add ->open_flags member to struct fuse_file.

This simplifies the argument passing to fuse_finish_open() and
fuse_release_fill(), and paves the way for creating an open helper
that doesn't need an inode pointer.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 222584b..da87a3d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -365,9 +365,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
  * Synchronous release for the case when something goes wrong in CREATE_OPEN
  */
 static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
-			      u64 nodeid, int flags)
+			      int flags)
 {
-	fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
+	fuse_release_fill(ff, flags, FUSE_RELEASE);
 	ff->reserved_req->force = 1;
 	fuse_request_send(fc, ff->reserved_req);
 	fuse_put_request(fc, ff->reserved_req);
@@ -445,12 +445,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 		goto out_free_ff;
 
 	fuse_put_request(fc, req);
+	ff->fh = outopen.fh;
+	ff->nodeid = outentry.nodeid;
+	ff->open_flags = outopen.open_flags;
 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
 	if (!inode) {
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
-		ff->fh = outopen.fh;
-		fuse_sync_release(fc, ff, outentry.nodeid, flags);
+		fuse_sync_release(fc, ff, flags);
 		fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
 		return -ENOMEM;
 	}
@@ -460,11 +462,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	fuse_invalidate_attr(dir);
 	file = lookup_instantiate_filp(nd, entry, generic_file_open);
 	if (IS_ERR(file)) {
-		ff->fh = outopen.fh;
-		fuse_sync_release(fc, ff, outentry.nodeid, flags);
+		fuse_sync_release(fc, ff, flags);
 		return PTR_ERR(file);
 	}
-	fuse_finish_open(inode, file, ff, &outopen);
+	file->private_data = fuse_file_get(ff);
+	fuse_finish_open(inode, file);
 	return 0;
 
  out_free_ff:
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index aa4a387..d9458ca 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -79,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff)
 	kfree(ff);
 }
 
-static struct fuse_file *fuse_file_get(struct fuse_file *ff)
+struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
 	atomic_inc(&ff->count);
 	return ff;
@@ -102,18 +102,16 @@ static void fuse_file_put(struct fuse_file *ff)
 	}
 }
 
-void fuse_finish_open(struct inode *inode, struct file *file,
-		      struct fuse_file *ff, struct fuse_open_out *outarg)
+void fuse_finish_open(struct inode *inode, struct file *file)
 {
-	if (outarg->open_flags & FOPEN_DIRECT_IO)
+	struct fuse_file *ff = file->private_data;
+
+	if (ff->open_flags & FOPEN_DIRECT_IO)
 		file->f_op = &fuse_direct_io_file_operations;
-	if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
+	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
 		invalidate_inode_pages2(inode->i_mapping);
-	if (outarg->open_flags & FOPEN_NONSEEKABLE)
+	if (ff->open_flags & FOPEN_NONSEEKABLE)
 		nonseekable_open(inode, file);
-	ff->fh = outarg->fh;
-	ff->nodeid = get_node_id(inode);
-	file->private_data = fuse_file_get(ff);
 }
 
 int fuse_open_common(struct inode *inode, struct file *file, int isdir)
@@ -141,13 +139,17 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 	else {
 		if (isdir)
 			outarg.open_flags &= ~FOPEN_DIRECT_IO;
-		fuse_finish_open(inode, file, ff, &outarg);
+		ff->fh = outarg.fh;
+		ff->nodeid = get_node_id(inode);
+		ff->open_flags = outarg.open_flags;
+		file->private_data = fuse_file_get(ff);
+		fuse_finish_open(inode, file);
 	}
 
 	return err;
 }
 
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
+void fuse_release_fill(struct fuse_file *ff, int flags, int opcode)
 {
 	struct fuse_req *req = ff->reserved_req;
 	struct fuse_release_in *inarg = &req->misc.release.in;
@@ -155,7 +157,7 @@ void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
 	inarg->fh = ff->fh;
 	inarg->flags = flags;
 	req->in.h.opcode = opcode;
-	req->in.h.nodeid = nodeid;
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_release_in);
 	req->in.args[0].value = inarg;
@@ -174,7 +176,7 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 	fc = get_fuse_conn(inode);
 	req = ff->reserved_req;
 
-	fuse_release_fill(ff, get_node_id(inode), file->f_flags,
+	fuse_release_fill(ff, file->f_flags,
 			  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
 
 	/* Hold vfsmount and dentry until release is finished */
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a51f63c..ce46c12 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -119,6 +119,9 @@ struct fuse_file {
 	/** Refcount */
 	atomic_t count;
 
+	/** FOPEN_* flags returned by open */
+	u32 open_flags;
+
 	/** Entry on inode's write_files list */
 	struct list_head write_entry;
 
@@ -528,12 +531,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 int fuse_open_common(struct inode *inode, struct file *file, int isdir);
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file,
-		      struct fuse_file *ff, struct fuse_open_out *outarg);
+void fuse_finish_open(struct inode *inode, struct file *file);
 
 /** Fill in ff->reserved_req with a RELEASE request */
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode);
+void fuse_release_fill(struct fuse_file *ff, int flags, int opcode);
 
 /**
  * Send RELEASE or RELEASEDIR request
-- 
cgit v0.10.2


From 91fe96b403f8a0a4a8a045a39b1bd549b0da7941 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:37 +0200
Subject: fuse: create fuse_do_open() helper for CUSE

Create a helper for sending an OPEN request that doesn't need a struct
inode pointer.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index da87a3d..faa3b2f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1103,7 +1103,7 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
 
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
-	return fuse_open_common(inode, file, 1);
+	return fuse_open_common(inode, file, true);
 }
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d9458ca..6bdaa55 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,10 +15,9 @@
 
 static const struct file_operations fuse_direct_io_file_operations;
 
-static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
-			  struct fuse_open_out *outargp)
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+			  int opcode, struct fuse_open_out *outargp)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_open_in inarg;
 	struct fuse_req *req;
 	int err;
@@ -31,8 +30,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
-	req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.opcode = opcode;
+	req->in.h.nodeid = nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -102,6 +101,35 @@ static void fuse_file_put(struct fuse_file *ff)
 	}
 }
 
+static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+			bool isdir)
+{
+	struct fuse_open_out outarg;
+	struct fuse_file *ff;
+	int err;
+	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+
+	ff = fuse_file_alloc(fc);
+	if (!ff)
+		return -ENOMEM;
+
+	err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+	if (err) {
+		fuse_file_free(ff);
+		return err;
+	}
+
+	if (isdir)
+		outarg.open_flags &= ~FOPEN_DIRECT_IO;
+
+	ff->fh = outarg.fh;
+	ff->nodeid = nodeid;
+	ff->open_flags = outarg.open_flags;
+	file->private_data = fuse_file_get(ff);
+
+	return 0;
+}
+
 void fuse_finish_open(struct inode *inode, struct file *file)
 {
 	struct fuse_file *ff = file->private_data;
@@ -114,11 +142,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
 		nonseekable_open(inode, file);
 }
 
-int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_open_out outarg;
-	struct fuse_file *ff;
 	int err;
 
 	/* VFS checks this, but only _after_ ->open() */
@@ -129,24 +155,13 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 	if (err)
 		return err;
 
-	ff = fuse_file_alloc(fc);
-	if (!ff)
-		return -ENOMEM;
-
-	err = fuse_send_open(inode, file, isdir, &outarg);
+	err = fuse_do_open(fc, get_node_id(inode), file, isdir);
 	if (err)
-		fuse_file_free(ff);
-	else {
-		if (isdir)
-			outarg.open_flags &= ~FOPEN_DIRECT_IO;
-		ff->fh = outarg.fh;
-		ff->nodeid = get_node_id(inode);
-		ff->open_flags = outarg.open_flags;
-		file->private_data = fuse_file_get(ff);
-		fuse_finish_open(inode, file);
-	}
+		return err;
 
-	return err;
+	fuse_finish_open(inode, file);
+
+	return 0;
 }
 
 void fuse_release_fill(struct fuse_file *ff, int flags, int opcode)
@@ -201,7 +216,7 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 
 static int fuse_open(struct inode *inode, struct file *file)
 {
-	return fuse_open_common(inode, file, 0);
+	return fuse_open_common(inode, file, false);
 }
 
 static int fuse_release(struct inode *inode, struct file *file)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ce46c12..429e669 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -528,7 +528,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 /**
  * Send OPEN or OPENDIR request
  */
-int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
 struct fuse_file *fuse_file_get(struct fuse_file *ff);
-- 
cgit v0.10.2


From 8b0797a4984de4406de25808e1a424344de543e4 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:39 +0200
Subject: fuse: don't use inode in fuse_sync_release()

Make fuse_sync_release() a generic helper function that doesn't need a
struct inode pointer.  This makes it suitable for use by CUSE.

Change return value of fuse_release_common() from int to void.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index faa3b2f..b3089a0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 }
 
 /*
- * Synchronous release for the case when something goes wrong in CREATE_OPEN
- */
-static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
-			      int flags)
-{
-	fuse_release_fill(ff, flags, FUSE_RELEASE);
-	ff->reserved_req->force = 1;
-	fuse_request_send(fc, ff->reserved_req);
-	fuse_put_request(fc, ff->reserved_req);
-	kfree(ff);
-}
-
-/*
  * Atomic create+open operation
  *
  * If the filesystem doesn't support this, then fall back to separate
@@ -452,7 +439,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
 	if (!inode) {
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
-		fuse_sync_release(fc, ff, flags);
+		fuse_sync_release(ff, flags);
 		fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
 		return -ENOMEM;
 	}
@@ -462,7 +449,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	fuse_invalidate_attr(dir);
 	file = lookup_instantiate_filp(nd, entry, generic_file_open);
 	if (IS_ERR(file)) {
-		fuse_sync_release(fc, ff, flags);
+		fuse_sync_release(ff, flags);
 		return PTR_ERR(file);
 	}
 	file->private_data = fuse_file_get(ff);
@@ -1108,7 +1095,9 @@ static int fuse_dir_open(struct inode *inode, struct file *file)
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
 {
-	return fuse_release_common(inode, file, 1);
+	fuse_release_common(file, FUSE_RELEASEDIR);
+
+	return 0;
 }
 
 static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6bdaa55..ab627b4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -93,10 +93,9 @@ static void fuse_file_put(struct fuse_file *ff)
 {
 	if (atomic_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
-		struct inode *inode = req->misc.release.path.dentry->d_inode;
-		struct fuse_conn *fc = get_fuse_conn(inode);
+
 		req->end = fuse_release_end;
-		fuse_request_send_background(fc, req);
+		fuse_request_send_background(ff->fc, req);
 		kfree(ff);
 	}
 }
@@ -164,11 +163,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 	return 0;
 }
 
-void fuse_release_fill(struct fuse_file *ff, int flags, int opcode)
+static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 {
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_req *req = ff->reserved_req;
 	struct fuse_release_in *inarg = &req->misc.release.in;
 
+	spin_lock(&fc->lock);
+	list_del(&ff->write_entry);
+	if (!RB_EMPTY_NODE(&ff->polled_node))
+		rb_erase(&ff->polled_node, &fc->polled_files);
+	spin_unlock(&fc->lock);
+
+	wake_up_interruptible_sync(&ff->poll_wait);
+
 	inarg->fh = ff->fh;
 	inarg->flags = flags;
 	req->in.h.opcode = opcode;
@@ -178,40 +186,28 @@ void fuse_release_fill(struct fuse_file *ff, int flags, int opcode)
 	req->in.args[0].value = inarg;
 }
 
-int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+void fuse_release_common(struct file *file, int opcode)
 {
-	struct fuse_conn *fc;
 	struct fuse_file *ff;
 	struct fuse_req *req;
 
 	ff = file->private_data;
 	if (unlikely(!ff))
-		return 0;	/* return value is ignored by VFS */
+		return;
 
-	fc = get_fuse_conn(inode);
 	req = ff->reserved_req;
-
-	fuse_release_fill(ff, file->f_flags,
-			  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
+	fuse_prepare_release(ff, file->f_flags, opcode);
 
 	/* Hold vfsmount and dentry until release is finished */
 	path_get(&file->f_path);
 	req->misc.release.path = file->f_path;
 
-	spin_lock(&fc->lock);
-	list_del(&ff->write_entry);
-	if (!RB_EMPTY_NODE(&ff->polled_node))
-		rb_erase(&ff->polled_node, &fc->polled_files);
-	spin_unlock(&fc->lock);
-
-	wake_up_interruptible_sync(&ff->poll_wait);
 	/*
 	 * Normally this will send the RELEASE request, however if
 	 * some asynchronous READ or WRITE requests are outstanding,
 	 * the sending will be delayed.
 	 */
 	fuse_file_put(ff);
-	return 0;
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
@@ -221,7 +217,20 @@ static int fuse_open(struct inode *inode, struct file *file)
 
 static int fuse_release(struct inode *inode, struct file *file)
 {
-	return fuse_release_common(inode, file, 0);
+	fuse_release_common(file, FUSE_RELEASE);
+
+	/* return value is ignored by VFS */
+	return 0;
+}
+
+void fuse_sync_release(struct fuse_file *ff, int flags)
+{
+	WARN_ON(atomic_read(&ff->count) > 1);
+	fuse_prepare_release(ff, flags, FUSE_RELEASE);
+	ff->reserved_req->force = 1;
+	fuse_request_send(ff->fc, ff->reserved_req);
+	fuse_put_request(ff->fc, ff->reserved_req);
+	kfree(ff);
 }
 
 /*
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 429e669..ef2e1f3 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -535,13 +535,12 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
 void fuse_finish_open(struct inode *inode, struct file *file);
 
-/** Fill in ff->reserved_req with a RELEASE request */
-void fuse_release_fill(struct fuse_file *ff, int flags, int opcode);
+void fuse_sync_release(struct fuse_file *ff, int flags);
 
 /**
  * Send RELEASE or RELEASEDIR request
  */
-int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+void fuse_release_common(struct file *file, int opcode);
 
 /**
  * Send FSYNC or FSYNCDIR request
-- 
cgit v0.10.2


From d36f248710c05714f37d921258b630bd1456b99f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:39 +0200
Subject: fuse: don't use inode in fuse_do_ioctl() helper

Create a helper for sending an IOCTL request that doesn't use a struct
inode.

This prepares this function for use by CUSE, where the inode is not
owned by a fuse filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ab627b4..9f0ade0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1647,12 +1647,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
  * limits ioctl data transfers to well-formed ioctls and is the forced
  * behavior for all FUSE servers.
  */
-static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
-			       unsigned long arg, unsigned int flags)
+static long fuse_do_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg, unsigned int flags)
 {
-	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_ioctl_in inarg = {
 		.fh = ff->fh,
 		.cmd = cmd,
@@ -1671,13 +1670,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
 	/* assume all the iovs returned by client always fits in a page */
 	BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
-	if (!fuse_allow_task(fc, current))
-		return -EACCES;
-
-	err = -EIO;
-	if (is_bad_inode(inode))
-		goto out;
-
 	err = -ENOMEM;
 	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
 	iov_page = alloc_page(GFP_KERNEL);
@@ -1738,7 +1730,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
 
 	/* okay, let's send it to the client */
 	req->in.h.opcode = FUSE_IOCTL;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
@@ -1822,16 +1814,31 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
 	return err ? err : outarg.result;
 }
 
+static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
+				   unsigned long arg, unsigned int flags)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	return fuse_file_do_ioctl(file, cmd, arg, 0);
+	return fuse_file_ioctl_common(file, cmd, arg, 0);
 }
 
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 				   unsigned long arg)
 {
-	return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+	return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
 /*
-- 
cgit v0.10.2


From 797759aaf31351a1ab1b9130c4f180ce496f46c5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 28 Apr 2009 16:56:41 +0200
Subject: fuse: don't use inode in fuse_file_poll

Use ff->fc and ff->nodeid instead of file->f_dentry->d_inode in the
fuse_file_poll() implementation.

This prepares this function for use by CUSE, where the inode is not
owned by a fuse filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9f0ade0..c5de60e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1894,9 +1894,8 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
 
 static unsigned fuse_file_poll(struct file *file, poll_table *wait)
 {
-	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_file *ff = file->private_data;
-	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_conn *fc = ff->fc;
 	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
 	struct fuse_poll_out outarg;
 	struct fuse_req *req;
@@ -1921,7 +1920,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
 		return PTR_ERR(req);
 
 	req->in.h.opcode = FUSE_POLL;
-	req->in.h.nodeid = get_node_id(inode);
+	req->in.h.nodeid = ff->nodeid;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(inarg);
 	req->in.args[0].value = &inarg;
-- 
cgit v0.10.2


From a325f9b92273d6c64ec56167905b951b9827ec33 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Apr 2009 10:54:52 +0900
Subject: fuse: update fuse_conn_init() and separate out fuse_conn_kill()

Update fuse_conn_init() such that it doesn't take @sb and move bdi
registration into a separate function.  Also separate out
fuse_conn_kill() from fuse_put_super().

These will be used to implement cuse.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba76b68..368189f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -282,7 +282,7 @@ __releases(&fc->lock)
 			wake_up_all(&fc->blocked_waitq);
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
-		    fc->connected) {
+		    fc->connected && fc->bdi_initialized) {
 			clear_bdi_congested(&fc->bdi, READ);
 			clear_bdi_congested(&fc->bdi, WRITE);
 		}
@@ -408,7 +408,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 	fc->num_background++;
 	if (fc->num_background == FUSE_MAX_BACKGROUND)
 		fc->blocked = 1;
-	if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
+	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+	    fc->bdi_initialized) {
 		set_bdi_congested(&fc->bdi, READ);
 		set_bdi_congested(&fc->bdi, WRITE);
 	}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ef2e1f3..2efcf12 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -394,6 +394,9 @@ struct fuse_conn {
 	/** Filesystem supports NFS exporting.  Only set in INIT */
 	unsigned export_support:1;
 
+	/** Set if bdi is valid */
+	unsigned bdi_initialized:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -662,7 +665,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 /**
  * Initialize fuse_conn
  */
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+void fuse_conn_init(struct fuse_conn *fc);
 
 /**
  * Release reference to fuse_conn
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 75ca5ac..fea7c10 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -274,11 +274,14 @@ static void fuse_send_destroy(struct fuse_conn *fc)
 	}
 }
 
-static void fuse_put_super(struct super_block *sb)
+static void fuse_bdi_destroy(struct fuse_conn *fc)
 {
-	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	if (fc->bdi_initialized)
+		bdi_destroy(&fc->bdi);
+}
 
-	fuse_send_destroy(fc);
+static void fuse_conn_kill(struct fuse_conn *fc)
+{
 	spin_lock(&fc->lock);
 	fc->connected = 0;
 	fc->blocked = 0;
@@ -292,7 +295,15 @@ static void fuse_put_super(struct super_block *sb)
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);
 	mutex_unlock(&fuse_mutex);
-	bdi_destroy(&fc->bdi);
+	fuse_bdi_destroy(fc);
+}
+
+static void fuse_put_super(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	fuse_send_destroy(fc);
+	fuse_conn_kill(fc);
 	fuse_conn_put(fc);
 }
 
@@ -463,10 +474,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
 	return 0;
 }
 
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
+void fuse_conn_init(struct fuse_conn *fc)
 {
-	int err;
-
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
@@ -481,49 +490,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	atomic_set(&fc->num_waiting, 0);
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-	fc->bdi.unplug_io_fn = default_unplug_io_fn;
-	/* fuse does it's own writeback accounting */
-	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
 	fc->khctr = 0;
 	fc->polled_files = RB_ROOT;
-	fc->dev = sb->s_dev;
-	err = bdi_init(&fc->bdi);
-	if (err)
-		goto error_mutex_destroy;
-	if (sb->s_bdev) {
-		err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
-				   MAJOR(fc->dev), MINOR(fc->dev));
-	} else {
-		err = bdi_register_dev(&fc->bdi, fc->dev);
-	}
-	if (err)
-		goto error_bdi_destroy;
-	/*
-	 * For a single fuse filesystem use max 1% of dirty +
-	 * writeback threshold.
-	 *
-	 * This gives about 1M of write buffer for memory maps on a
-	 * machine with 1G and 10% dirty_ratio, which should be more
-	 * than enough.
-	 *
-	 * Privileged users can raise it by writing to
-	 *
-	 *    /sys/class/bdi/<bdi>/max_ratio
-	 */
-	bdi_set_max_ratio(&fc->bdi, 1);
 	fc->reqctr = 0;
 	fc->blocked = 1;
 	fc->attr_version = 1;
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
-
-	return 0;
-
- error_bdi_destroy:
-	bdi_destroy(&fc->bdi);
- error_mutex_destroy:
-	mutex_destroy(&fc->inst_mutex);
-	return err;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -794,6 +766,48 @@ static void fuse_free_conn(struct fuse_conn *fc)
 	kfree(fc);
 }
 
+static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
+{
+	int err;
+
+	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.unplug_io_fn = default_unplug_io_fn;
+	/* fuse does it's own writeback accounting */
+	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+
+	err = bdi_init(&fc->bdi);
+	if (err)
+		return err;
+
+	fc->bdi_initialized = 1;
+
+	if (sb->s_bdev) {
+		err =  bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+				    MAJOR(fc->dev), MINOR(fc->dev));
+	} else {
+		err = bdi_register_dev(&fc->bdi, fc->dev);
+	}
+
+	if (err)
+		return err;
+
+	/*
+	 * For a single fuse filesystem use max 1% of dirty +
+	 * writeback threshold.
+	 *
+	 * This gives about 1M of write buffer for memory maps on a
+	 * machine with 1G and 10% dirty_ratio, which should be more
+	 * than enough.
+	 *
+	 * Privileged users can raise it by writing to
+	 *
+	 *    /sys/class/bdi/<bdi>/max_ratio
+	 */
+	bdi_set_max_ratio(&fc->bdi, 1);
+
+	return 0;
+}
+
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct fuse_conn *fc;
@@ -840,11 +854,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!fc)
 		goto err_fput;
 
-	err = fuse_conn_init(fc, sb);
-	if (err) {
-		kfree(fc);
-		goto err_fput;
-	}
+	fuse_conn_init(fc);
+
+	fc->dev = sb->s_dev;
+	err = fuse_bdi_init(fc, sb);
+	if (err)
+		goto err_put_conn;
 
 	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
@@ -908,7 +923,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
  err_put_root:
 	dput(root_dentry);
  err_put_conn:
-	bdi_destroy(&fc->bdi);
+	fuse_bdi_destroy(fc);
 	fuse_conn_put(fc);
  err_fput:
 	fput(file);
-- 
cgit v0.10.2


From 08cbf542bf24fb0481a54526b177347ae4046f5e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Apr 2009 10:54:53 +0900
Subject: fuse: export symbols to be used by CUSE

Export the following symbols for CUSE.

fuse_conn_put()
fuse_conn_get()
fuse_conn_kill()
fuse_send_init()
fuse_do_open()
fuse_sync_release()
fuse_direct_io()
fuse_do_ioctl()
fuse_file_poll()
fuse_request_alloc()
fuse_get_req()
fuse_put_request()
fuse_request_send()
fuse_abort_conn()
fuse_dev_release()
fuse_dev_operations

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 368189f..8fed2ed 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void)
 		fuse_request_init(req);
 	return req;
 }
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
 struct fuse_req *fuse_request_alloc_nofs(void)
 {
@@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
 	atomic_dec(&fc->num_waiting);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL_GPL(fuse_get_req);
 
 /*
  * Return request in fuse_file->reserved_req.  However that may
@@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
 			fuse_request_free(req);
 	}
 }
+EXPORT_SYMBOL_GPL(fuse_put_request);
 
 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 {
@@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 	}
 	spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send);
 
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 					    struct fuse_req *req)
@@ -440,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 	req->isreply = 1;
 	fuse_request_send_nowait(fc, req);
 }
+EXPORT_SYMBOL_GPL(fuse_request_send_background);
 
 /*
  * Called under fc->lock
@@ -1106,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
 	}
 	spin_unlock(&fc->lock);
 }
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
-static int fuse_dev_release(struct inode *inode, struct file *file)
+int fuse_dev_release(struct inode *inode, struct file *file)
 {
 	struct fuse_conn *fc = fuse_get_conn(file);
 	if (fc) {
@@ -1121,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(fuse_dev_release);
 
 static int fuse_dev_fasync(int fd, struct file *file, int on)
 {
@@ -1143,6 +1150,7 @@ const struct file_operations fuse_dev_operations = {
 	.release	= fuse_dev_release,
 	.fasync		= fuse_dev_fasync,
 };
+EXPORT_SYMBOL_GPL(fuse_dev_operations);
 
 static struct miscdevice fuse_miscdevice = {
 	.minor = FUSE_MINOR,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c5de60e..fce6ce6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/module.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -100,8 +101,8 @@ static void fuse_file_put(struct fuse_file *ff)
 	}
 }
 
-static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
-			bool isdir)
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+		 bool isdir)
 {
 	struct fuse_open_out outarg;
 	struct fuse_file *ff;
@@ -128,6 +129,7 @@ static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(fuse_do_open);
 
 void fuse_finish_open(struct inode *inode, struct file *file)
 {
@@ -232,6 +234,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags)
 	fuse_put_request(ff->fc, ff->reserved_req);
 	kfree(ff);
 }
+EXPORT_SYMBOL_GPL(fuse_sync_release);
 
 /*
  * Scramble the ID space with XTEA, so that the value of the files_struct
@@ -1009,8 +1012,8 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 	return 0;
 }
 
-static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
-			      size_t count, loff_t *ppos, int write)
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+		       size_t count, loff_t *ppos, int write)
 {
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = ff->fc;
@@ -1066,6 +1069,7 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(fuse_direct_io);
 
 static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 				     size_t count, loff_t *ppos)
@@ -1647,8 +1651,8 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
  * limits ioctl data transfers to well-formed ioctls and is the forced
  * behavior for all FUSE servers.
  */
-static long fuse_do_ioctl(struct file *file, unsigned int cmd,
-			  unsigned long arg, unsigned int flags)
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+		   unsigned int flags)
 {
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = ff->fc;
@@ -1813,6 +1817,7 @@ static long fuse_do_ioctl(struct file *file, unsigned int cmd,
 
 	return err ? err : outarg.result;
 }
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
 
 static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
 				   unsigned long arg, unsigned int flags)
@@ -1892,7 +1897,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
 	spin_unlock(&fc->lock);
 }
 
-static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+unsigned fuse_file_poll(struct file *file, poll_table *wait)
 {
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = ff->fc;
@@ -1939,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
 	}
 	return POLLERR;
 }
+EXPORT_SYMBOL_GPL(fuse_file_poll);
 
 /*
  * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 2efcf12..aaf2f9f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -261,6 +261,8 @@ struct fuse_req {
 		} release;
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
+		struct cuse_init_in cuse_init_in;
+		struct cuse_init_out cuse_init_out;
 		struct {
 			struct fuse_read_in in;
 			u64 attr_ver;
@@ -662,6 +664,8 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
  */
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
+void fuse_conn_kill(struct fuse_conn *fc);
+
 /**
  * Initialize fuse_conn
  */
@@ -704,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
 
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+		 bool isdir);
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+		       size_t count, loff_t *ppos, int write);
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+		   unsigned int flags);
+unsigned fuse_file_poll(struct file *file, poll_table *wait);
+int fuse_dev_release(struct inode *inode, struct file *file);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fea7c10..d8673cc 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -280,7 +280,7 @@ static void fuse_bdi_destroy(struct fuse_conn *fc)
 		bdi_destroy(&fc->bdi);
 }
 
-static void fuse_conn_kill(struct fuse_conn *fc)
+void fuse_conn_kill(struct fuse_conn *fc)
 {
 	spin_lock(&fc->lock);
 	fc->connected = 0;
@@ -297,6 +297,7 @@ static void fuse_conn_kill(struct fuse_conn *fc)
 	mutex_unlock(&fuse_mutex);
 	fuse_bdi_destroy(fc);
 }
+EXPORT_SYMBOL_GPL(fuse_conn_kill);
 
 static void fuse_put_super(struct super_block *sb)
 {
@@ -508,12 +509,14 @@ void fuse_conn_put(struct fuse_conn *fc)
 		fc->release(fc);
 	}
 }
+EXPORT_SYMBOL_GPL(fuse_conn_put);
 
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 {
 	atomic_inc(&fc->count);
 	return fc;
 }
+EXPORT_SYMBOL_GPL(fuse_conn_get);
 
 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 {
-- 
cgit v0.10.2


From cd891ae0305601bdb4d2e7e85282961c4ff256cd Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 28 Apr 2009 11:39:34 -0400
Subject: tracing: convert ftrace_dump spinlocks to raw

ftrace_dump is used for printing out the contents of the ftrace ring buffer
to the console on failure. Currently it uses a spinlock to synchronize
the output from multiple failures on different CPUs. This spin lock
currently is a normal spinlock and can cause issues with lockdep and
lock tracing.

This patch converts it to raw since it is for error handling only.
The lock is local to the ftrace_dump and is not used by any other
infrastructure.

[ Impact: prevent ftrace_dump from locking up by internal tracing ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b6183bc..5d704a4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4114,7 +4114,8 @@ trace_printk_seq(struct trace_seq *s)
 
 static void __ftrace_dump(bool disable_tracing)
 {
-	static DEFINE_SPINLOCK(ftrace_dump_lock);
+	static raw_spinlock_t ftrace_dump_lock =
+		(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	/* use static because iter can be a bit big for the stack */
 	static struct trace_iterator iter;
 	unsigned int old_userobj;
@@ -4123,7 +4124,8 @@ static void __ftrace_dump(bool disable_tracing)
 	int cnt = 0, cpu;
 
 	/* only one dump */
-	spin_lock_irqsave(&ftrace_dump_lock, flags);
+	local_irq_save(flags);
+	__raw_spin_lock(&ftrace_dump_lock);
 	if (dump_ran)
 		goto out;
 
@@ -4195,7 +4197,8 @@ static void __ftrace_dump(bool disable_tracing)
 	}
 
  out:
-	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+	__raw_spin_unlock(&ftrace_dump_lock);
+	local_irq_restore(flags);
 }
 
 /* By default: disable tracing after the dump */
-- 
cgit v0.10.2


From edc953fa4ebc0265ef3b1754fe116a9fd4264e15 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Tue, 28 Apr 2009 11:13:46 -0400
Subject: x86: clean up alternative.h

Alternative header duplicates assembly that could be merged in
one single macro.  Merging this into this macro also allows to
directly declare ALTERNATIVE() statements within assembly code.

Uses a __stringify() of the feature bits rather than passing a
"i" operand.  Leave the old %0 operand as-is (set to 0), unused
to stay compatible with API.

(v2: tab alignment fixes)

[ Impact: cleanup ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
LKML-Reference: <20090428151346.GA31212@Krystal>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f6aa18e..1a37bcd 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/stddef.h>
+#include <linux/stringify.h>
 #include <asm/asm.h>
 
 /*
@@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {}
 
 const unsigned char *const *find_nop_table(void);
 
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+									\
+      "661:\n\t" oldinstr "\n662:\n"					\
+      ".section .altinstructions,\"a\"\n"				\
+      _ASM_ALIGN "\n"							\
+      _ASM_PTR "661b\n"				/* label           */	\
+      _ASM_PTR "663f\n"				/* new instruction */	\
+      "	 .byte " __stringify(feature) "\n"	/* feature bit     */	\
+      "	 .byte 662b-661b\n"			/* sourcelen       */	\
+      "	 .byte 664f-663f\n"			/* replacementlen  */	\
+      ".previous\n"							\
+      ".section .altinstr_replacement, \"ax\"\n"			\
+      "663:\n\t" newinstr "\n664:\n"		/* replacement     */	\
+      ".previous"
+
 /*
  * Alternative instructions for different CPU types or capabilities.
  *
@@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void);
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, newinstr, feature)			\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature) : "memory")
+	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
 /*
  * Alternative inline assembly with input.
@@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void);
  * Best is to use constraints that are fixed size (like (%1) ... "r")
  * If you use variable sized constraints like "m" or "g" in the
  * replacement make sure to pad to the worst case length.
+ * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c0\n"		/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
-		      ".previous" :: "i" (feature), ##input)
+	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+		: : "i" (0), ## input)
 
 /* Like alternative_input, but with a single output argument */
 #define alternative_io(oldinstr, newinstr, feature, output, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      _ASM_ALIGN "\n"					\
-		      _ASM_PTR "661b\n"		/* label */		\
-		      _ASM_PTR "663f\n"		/* new instruction */	\
-		      "	 .byte %c[feat]\n"	/* feature bit */	\
-		      "	 .byte 662b-661b\n"	/* sourcelen */		\
-		      "	 .byte 664f-663f\n"	/* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-		      ".previous" : output : [feat] "i" (feature), ##input)
+	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+		: output : "i" (0), ## input)
 
 /*
  * use this macro(s) if you need more than one output parameter
-- 
cgit v0.10.2


From fa59440d0c7b5a2bcdc9e35f25fdac693e54c86a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 23 Apr 2009 10:19:58 +0100
Subject: [ARM] 5470/1: U300 register definitions

This adds registers, interrupt numbers and IO mappings
for the U300 series platforms core support, including
basic block offsets and registers definitions for the
system controller.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/include/mach/hardware.h b/arch/arm/mach-u300/include/mach/hardware.h
new file mode 100644
index 0000000..b99d4ce
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/hardware.h
@@ -0,0 +1,5 @@
+/*
+ * arch/arm/mach-u300/include/mach/hardware.h
+ */
+#include <asm/sizes.h>
+#include <mach/u300-regs.h>
diff --git a/arch/arm/mach-u300/include/mach/io.h b/arch/arm/mach-u300/include/mach/io.h
new file mode 100644
index 0000000..5d6b4c1
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/io.h
@@ -0,0 +1,20 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/io.h
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Dummy IO map for being able to use writew()/readw(),
+ * writel()/readw() and similar accessor functions.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#ifndef __MACH_IO_H
+#define __MACH_IO_H
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#define __io(a)		__typesafe_io(a)
+#define __mem_pci(a)	(a)
+
+#endif
diff --git a/arch/arm/mach-u300/include/mach/irqs.h b/arch/arm/mach-u300/include/mach/irqs.h
new file mode 100644
index 0000000..a6867b1
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/irqs.h
@@ -0,0 +1,114 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/irqs.h
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * IRQ channel definitions for the U300 platforms.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#ifndef __MACH_IRQS_H
+#define __MACH_IRQS_H
+
+#define IRQ_U300_INTCON0_START		0
+#define IRQ_U300_INTCON1_START		32
+/* These are on INTCON0 - 30 lines */
+#define IRQ_U300_IRQ0_EXT		0
+#define IRQ_U300_IRQ1_EXT		1
+#define IRQ_U300_DMA			2
+#define IRQ_U300_VIDEO_ENC_0		3
+#define IRQ_U300_VIDEO_ENC_1		4
+#define IRQ_U300_AAIF_RX		5
+#define IRQ_U300_AAIF_TX		6
+#define IRQ_U300_AAIF_VGPIO		7
+#define IRQ_U300_AAIF_WAKEUP		8
+#define IRQ_U300_PCM_I2S0_FRAME		9
+#define IRQ_U300_PCM_I2S0_FIFO		10
+#define IRQ_U300_PCM_I2S1_FRAME		11
+#define IRQ_U300_PCM_I2S1_FIFO		12
+#define IRQ_U300_XGAM_GAMCON		13
+#define IRQ_U300_XGAM_CDI		14
+#define IRQ_U300_XGAM_CDICON		15
+#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330)
+/* MMIACC not used on the DB3210 or DB3350 chips */
+#define IRQ_U300_XGAM_MMIACC		16
+#endif
+#define IRQ_U300_XGAM_PDI		17
+#define IRQ_U300_XGAM_PDICON		18
+#define IRQ_U300_XGAM_GAMEACC		19
+#define IRQ_U300_XGAM_MCIDCT		20
+#define IRQ_U300_APEX			21
+#define IRQ_U300_UART0			22
+#define IRQ_U300_SPI			23
+#define IRQ_U300_TIMER_APP_OS		24
+#define IRQ_U300_TIMER_APP_DD		25
+#define IRQ_U300_TIMER_APP_GP1		26
+#define IRQ_U300_TIMER_APP_GP2		27
+#define IRQ_U300_TIMER_OS		28
+#define IRQ_U300_TIMER_MS		29
+#define IRQ_U300_KEYPAD_KEYBF		30
+#define IRQ_U300_KEYPAD_KEYBR		31
+/* These are on INTCON1 - 32 lines */
+#define IRQ_U300_GPIO_PORT0		32
+#define IRQ_U300_GPIO_PORT1		33
+#define IRQ_U300_GPIO_PORT2		34
+
+#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330) || \
+    defined(CONFIG_MACH_U300_BS335)
+/* These are for DB3150, DB3200 and DB3350 */
+#define IRQ_U300_WDOG			35
+#define IRQ_U300_EVHIST			36
+#define IRQ_U300_MSPRO			37
+#define IRQ_U300_MMCSD_MCIINTR0		38
+#define IRQ_U300_MMCSD_MCIINTR1		39
+#define IRQ_U300_I2C0			40
+#define IRQ_U300_I2C1			41
+#define IRQ_U300_RTC			42
+#define IRQ_U300_NFIF			43
+#define IRQ_U300_NFIF2			44
+#endif
+
+/* DB3150 and DB3200 have only 45 IRQs */
+#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330)
+#define U300_NR_IRQS			45
+#endif
+
+/* The DB3350-specific interrupt lines */
+#ifdef CONFIG_MACH_U300_BS335
+#define IRQ_U300_ISP_F0			45
+#define IRQ_U300_ISP_F1			46
+#define IRQ_U300_ISP_F2			47
+#define IRQ_U300_ISP_F3			48
+#define IRQ_U300_ISP_F4			49
+#define IRQ_U300_GPIO_PORT3		50
+#define IRQ_U300_SYSCON_PLL_LOCK	51
+#define IRQ_U300_UART1			52
+#define IRQ_U300_GPIO_PORT4		53
+#define IRQ_U300_GPIO_PORT5		54
+#define IRQ_U300_GPIO_PORT6		55
+#define U300_NR_IRQS			56
+#endif
+
+/* The DB3210-specific interrupt lines */
+#ifdef CONFIG_MACH_U300_BS365
+#define IRQ_U300_GPIO_PORT3		35
+#define IRQ_U300_GPIO_PORT4		36
+#define IRQ_U300_WDOG			37
+#define IRQ_U300_EVHIST			38
+#define IRQ_U300_MSPRO			39
+#define IRQ_U300_MMCSD_MCIINTR0		40
+#define IRQ_U300_MMCSD_MCIINTR1		41
+#define IRQ_U300_I2C0			42
+#define IRQ_U300_I2C1			43
+#define IRQ_U300_RTC			44
+#define IRQ_U300_NFIF			45
+#define IRQ_U300_NFIF2			46
+#define IRQ_U300_SYSCON_PLL_LOCK	47
+#define U300_NR_IRQS			48
+#endif
+
+#define NR_IRQS U300_NR_IRQS
+
+#endif
diff --git a/arch/arm/mach-u300/include/mach/syscon.h b/arch/arm/mach-u300/include/mach/syscon.h
new file mode 100644
index 0000000..1c90d1b
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/syscon.h
@@ -0,0 +1,644 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/syscon.h
+ *
+ *
+ * Copyright (C) 2008 ST-Ericsson AB
+ *
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ */
+
+#ifndef __MACH_SYSCON_H
+#define __MACH_SYSCON_H
+
+/*
+ * All register defines for SYSCON registers that concerns individual
+ * block clocks and reset lines are registered here. This is because
+ * we don't want any other file to try to fool around with this stuff.
+ */
+
+/* APP side SYSCON registers */
+/* TODO: this is incomplete. Add all from asic_syscon_map.h eventually. */
+/* CLK Control Register 16bit (R/W) */
+#define U300_SYSCON_CCR						(0x0000)
+#define U300_SYSCON_CCR_I2S1_USE_VCXO				(0x0040)
+#define U300_SYSCON_CCR_I2S0_USE_VCXO				(0x0020)
+#define U300_SYSCON_CCR_TURN_VCXO_ON				(0x0008)
+#define U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK			(0x0007)
+#define U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER		(0x04)
+#define U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW			(0x03)
+#define U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE		(0x02)
+#define U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH			(0x01)
+#define U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST			(0x00)
+/* CLK Status Register 16bit (R/W) */
+#define U300_SYSCON_CSR						(0x0004)
+#define U300_SYSCON_CSR_PLL208_LOCK_IND				(0x0002)
+#define U300_SYSCON_CSR_PLL13_LOCK_IND				(0x0001)
+/* Reset lines for SLOW devices 16bit (R/W) */
+#define U300_SYSCON_RSR						(0x0014)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_RSR_PPM_RESET_EN				(0x0200)
+#endif
+#define U300_SYSCON_RSR_ACC_TMR_RESET_EN			(0x0100)
+#define U300_SYSCON_RSR_APP_TMR_RESET_EN			(0x0080)
+#define U300_SYSCON_RSR_RTC_RESET_EN				(0x0040)
+#define U300_SYSCON_RSR_KEYPAD_RESET_EN				(0x0020)
+#define U300_SYSCON_RSR_GPIO_RESET_EN				(0x0010)
+#define U300_SYSCON_RSR_EH_RESET_EN				(0x0008)
+#define U300_SYSCON_RSR_BTR_RESET_EN				(0x0004)
+#define U300_SYSCON_RSR_UART_RESET_EN				(0x0002)
+#define U300_SYSCON_RSR_SLOW_BRIDGE_RESET_EN			(0x0001)
+/* Reset lines for FAST devices 16bit (R/W) */
+#define U300_SYSCON_RFR						(0x0018)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_RFR_UART1_RESET_ENABLE			(0x0080)
+#endif
+#define U300_SYSCON_RFR_SPI_RESET_ENABLE			(0x0040)
+#define U300_SYSCON_RFR_MMC_RESET_ENABLE			(0x0020)
+#define U300_SYSCON_RFR_PCM_I2S1_RESET_ENABLE			(0x0010)
+#define U300_SYSCON_RFR_PCM_I2S0_RESET_ENABLE			(0x0008)
+#define U300_SYSCON_RFR_I2C1_RESET_ENABLE			(0x0004)
+#define U300_SYSCON_RFR_I2C0_RESET_ENABLE			(0x0002)
+#define U300_SYSCON_RFR_FAST_BRIDGE_RESET_ENABLE		(0x0001)
+/* Reset lines for the rest of the peripherals 16bit (R/W) */
+#define U300_SYSCON_RRR						(0x001c)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_RRR_CDS_RESET_EN				(0x4000)
+#define U300_SYSCON_RRR_ISP_RESET_EN				(0x2000)
+#endif
+#define U300_SYSCON_RRR_INTCON_RESET_EN				(0x1000)
+#define U300_SYSCON_RRR_MSPRO_RESET_EN				(0x0800)
+#define U300_SYSCON_RRR_XGAM_RESET_EN				(0x0100)
+#define U300_SYSCON_RRR_XGAM_VC_SYNC_RESET_EN			(0x0080)
+#define U300_SYSCON_RRR_NANDIF_RESET_EN				(0x0040)
+#define U300_SYSCON_RRR_EMIF_RESET_EN				(0x0020)
+#define U300_SYSCON_RRR_DMAC_RESET_EN				(0x0010)
+#define U300_SYSCON_RRR_CPU_RESET_EN				(0x0008)
+#define U300_SYSCON_RRR_APEX_RESET_EN				(0x0004)
+#define U300_SYSCON_RRR_AHB_RESET_EN				(0x0002)
+#define U300_SYSCON_RRR_AAIF_RESET_EN				(0x0001)
+/* Clock enable for SLOW peripherals 16bit (R/W) */
+#define U300_SYSCON_CESR					(0x0020)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_CESR_PPM_CLK_EN				(0x0200)
+#endif
+#define U300_SYSCON_CESR_ACC_TMR_CLK_EN				(0x0100)
+#define U300_SYSCON_CESR_APP_TMR_CLK_EN				(0x0080)
+#define U300_SYSCON_CESR_KEYPAD_CLK_EN				(0x0040)
+#define U300_SYSCON_CESR_GPIO_CLK_EN				(0x0010)
+#define U300_SYSCON_CESR_EH_CLK_EN				(0x0008)
+#define U300_SYSCON_CESR_BTR_CLK_EN				(0x0004)
+#define U300_SYSCON_CESR_UART_CLK_EN				(0x0002)
+#define U300_SYSCON_CESR_SLOW_BRIDGE_CLK_EN			(0x0001)
+/* Clock enable for FAST peripherals 16bit (R/W) */
+#define U300_SYSCON_CEFR					(0x0024)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_CEFR_UART1_CLK_EN				(0x0200)
+#endif
+#define U300_SYSCON_CEFR_I2S1_CORE_CLK_EN			(0x0100)
+#define U300_SYSCON_CEFR_I2S0_CORE_CLK_EN			(0x0080)
+#define U300_SYSCON_CEFR_SPI_CLK_EN				(0x0040)
+#define U300_SYSCON_CEFR_MMC_CLK_EN				(0x0020)
+#define U300_SYSCON_CEFR_I2S1_CLK_EN    			(0x0010)
+#define U300_SYSCON_CEFR_I2S0_CLK_EN     			(0x0008)
+#define U300_SYSCON_CEFR_I2C1_CLK_EN     			(0x0004)
+#define U300_SYSCON_CEFR_I2C0_CLK_EN     			(0x0002)
+#define U300_SYSCON_CEFR_FAST_BRIDGE_CLK_EN			(0x0001)
+/* Clock enable for the rest of the peripherals 16bit (R/W) */
+#define U300_SYSCON_CERR					(0x0028)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_CERR_CDS_CLK_EN				(0x2000)
+#define U300_SYSCON_CERR_ISP_CLK_EN				(0x1000)
+#endif
+#define U300_SYSCON_CERR_MSPRO_CLK_EN				(0x0800)
+#define U300_SYSCON_CERR_AHB_SUBSYS_BRIDGE_CLK_EN		(0x0400)
+#define U300_SYSCON_CERR_SEMI_CLK_EN				(0x0200)
+#define U300_SYSCON_CERR_XGAM_CLK_EN				(0x0100)
+#define U300_SYSCON_CERR_VIDEO_ENC_CLK_EN			(0x0080)
+#define U300_SYSCON_CERR_NANDIF_CLK_EN				(0x0040)
+#define U300_SYSCON_CERR_EMIF_CLK_EN				(0x0020)
+#define U300_SYSCON_CERR_DMAC_CLK_EN				(0x0010)
+#define U300_SYSCON_CERR_CPU_CLK_EN				(0x0008)
+#define U300_SYSCON_CERR_APEX_CLK_EN				(0x0004)
+#define U300_SYSCON_CERR_AHB_CLK_EN				(0x0002)
+#define U300_SYSCON_CERR_AAIF_CLK_EN				(0x0001)
+/* Single block clock enable 16bit (-/W) */
+#define U300_SYSCON_SBCER					(0x002c)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_SBCER_PPM_CLK_EN				(0x0009)
+#endif
+#define U300_SYSCON_SBCER_ACC_TMR_CLK_EN			(0x0008)
+#define U300_SYSCON_SBCER_APP_TMR_CLK_EN			(0x0007)
+#define U300_SYSCON_SBCER_KEYPAD_CLK_EN				(0x0006)
+#define U300_SYSCON_SBCER_GPIO_CLK_EN				(0x0004)
+#define U300_SYSCON_SBCER_EH_CLK_EN				(0x0003)
+#define U300_SYSCON_SBCER_BTR_CLK_EN				(0x0002)
+#define U300_SYSCON_SBCER_UART_CLK_EN				(0x0001)
+#define U300_SYSCON_SBCER_SLOW_BRIDGE_CLK_EN			(0x0000)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_SBCER_UART1_CLK_EN				(0x0019)
+#endif
+#define U300_SYSCON_SBCER_I2S1_CORE_CLK_EN			(0x0018)
+#define U300_SYSCON_SBCER_I2S0_CORE_CLK_EN			(0x0017)
+#define U300_SYSCON_SBCER_SPI_CLK_EN				(0x0016)
+#define U300_SYSCON_SBCER_MMC_CLK_EN				(0x0015)
+#define U300_SYSCON_SBCER_I2S1_CLK_EN				(0x0014)
+#define U300_SYSCON_SBCER_I2S0_CLK_EN				(0x0013)
+#define U300_SYSCON_SBCER_I2C1_CLK_EN				(0x0012)
+#define U300_SYSCON_SBCER_I2C0_CLK_EN				(0x0011)
+#define U300_SYSCON_SBCER_FAST_BRIDGE_CLK_EN			(0x0010)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_SBCER_CDS_CLK_EN				(0x002D)
+#define U300_SYSCON_SBCER_ISP_CLK_EN				(0x002C)
+#endif
+#define U300_SYSCON_SBCER_MSPRO_CLK_EN				(0x002B)
+#define U300_SYSCON_SBCER_AHB_SUBSYS_BRIDGE_CLK_EN		(0x002A)
+#define U300_SYSCON_SBCER_SEMI_CLK_EN				(0x0029)
+#define U300_SYSCON_SBCER_XGAM_CLK_EN				(0x0028)
+#define U300_SYSCON_SBCER_VIDEO_ENC_CLK_EN			(0x0027)
+#define U300_SYSCON_SBCER_NANDIF_CLK_EN				(0x0026)
+#define U300_SYSCON_SBCER_EMIF_CLK_EN				(0x0025)
+#define U300_SYSCON_SBCER_DMAC_CLK_EN				(0x0024)
+#define U300_SYSCON_SBCER_CPU_CLK_EN				(0x0023)
+#define U300_SYSCON_SBCER_APEX_CLK_EN				(0x0022)
+#define U300_SYSCON_SBCER_AHB_CLK_EN				(0x0021)
+#define U300_SYSCON_SBCER_AAIF_CLK_EN				(0x0020)
+/* Single block clock disable 16bit (-/W) */
+#define U300_SYSCON_SBCDR					(0x0030)
+/* Same values as above for SBCER */
+/* Clock force SLOW peripherals 16bit (R/W) */
+#define U300_SYSCON_CFSR					(0x003c)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_CFSR_PPM_CLK_FORCE_EN			(0x0200)
+#endif
+#define U300_SYSCON_CFSR_ACC_TMR_CLK_FORCE_EN			(0x0100)
+#define U300_SYSCON_CFSR_APP_TMR_CLK_FORCE_EN			(0x0080)
+#define U300_SYSCON_CFSR_KEYPAD_CLK_FORCE_EN			(0x0020)
+#define U300_SYSCON_CFSR_GPIO_CLK_FORCE_EN			(0x0010)
+#define U300_SYSCON_CFSR_EH_CLK_FORCE_EN			(0x0008)
+#define U300_SYSCON_CFSR_BTR_CLK_FORCE_EN			(0x0004)
+#define U300_SYSCON_CFSR_UART_CLK_FORCE_EN			(0x0002)
+#define U300_SYSCON_CFSR_SLOW_BRIDGE_CLK_FORCE_EN		(0x0001)
+/* Clock force FAST peripherals 16bit (R/W) */
+#define U300_SYSCON_CFFR					(0x40)
+/* Values not defined. Define if you want to use them. */
+/* Clock force the rest of the peripherals 16bit (R/W) */
+#define U300_SYSCON_CFRR					(0x44)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_CFRR_CDS_CLK_FORCE_EN			(0x2000)
+#define U300_SYSCON_CFRR_ISP_CLK_FORCE_EN			(0x1000)
+#endif
+#define U300_SYSCON_CFRR_MSPRO_CLK_FORCE_EN			(0x0800)
+#define U300_SYSCON_CFRR_AHB_SUBSYS_BRIDGE_CLK_FORCE_EN		(0x0400)
+#define U300_SYSCON_CFRR_SEMI_CLK_FORCE_EN			(0x0200)
+#define U300_SYSCON_CFRR_XGAM_CLK_FORCE_EN			(0x0100)
+#define U300_SYSCON_CFRR_VIDEO_ENC_CLK_FORCE_EN			(0x0080)
+#define U300_SYSCON_CFRR_NANDIF_CLK_FORCE_EN			(0x0040)
+#define U300_SYSCON_CFRR_EMIF_CLK_FORCE_EN			(0x0020)
+#define U300_SYSCON_CFRR_DMAC_CLK_FORCE_EN			(0x0010)
+#define U300_SYSCON_CFRR_CPU_CLK_FORCE_EN			(0x0008)
+#define U300_SYSCON_CFRR_APEX_CLK_FORCE_EN			(0x0004)
+#define U300_SYSCON_CFRR_AHB_CLK_FORCE_EN			(0x0002)
+#define U300_SYSCON_CFRR_AAIF_CLK_FORCE_EN			(0x0001)
+/* PLL208 Frequency Control 16bit (R/W) */
+#define U300_SYSCON_PFCR					(0x48)
+#define U300_SYSCON_PFCR_DPLL_MULT_NUM				(0x000F)
+/* Power Management Control 16bit (R/W) */
+#define U300_SYSCON_PMCR					(0x50)
+#define U300_SYSCON_PMCR_DCON_ENABLE				(0x0002)
+#define U300_SYSCON_PMCR_PWR_MGNT_ENABLE			(0x0001)
+/*
+ * All other clocking registers moved to clock.c!
+ */
+/* Reset Out 16bit (R/W) */
+#define U300_SYSCON_RCR						(0x6c)
+#define U300_SYSCON_RCR_RESOUT0_RST_N_DISABLE			(0x0001)
+/* EMIF Slew Rate Control 16bit (R/W) */
+#define U300_SYSCON_SRCLR					(0x70)
+#define U300_SYSCON_SRCLR_MASK					(0x03FF)
+#define U300_SYSCON_SRCLR_VALUE					(0x03FF)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_5_B			(0x0200)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_5_A			(0x0100)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_4_B			(0x0080)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_4_A			(0x0040)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_3_B			(0x0020)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_3_A			(0x0010)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_2_B			(0x0008)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_2_A			(0x0004)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_1_B			(0x0002)
+#define U300_SYSCON_SRCLR_EMIF_1_SLRC_1_A			(0x0001)
+/* EMIF Clock Control Register 16bit (R/W) */
+#define U300_SYSCON_ECCR					(0x0078)
+#define U300_SYSCON_ECCR_MASK					(0x000F)
+#define U300_SYSCON_ECCR_EMIF_1_STATIC_CLK_EN_N_DISABLE		(0x0008)
+#define U300_SYSCON_ECCR_EMIF_1_RET_OUT_CLK_EN_N_DISABLE	(0x0004)
+#define U300_SYSCON_ECCR_EMIF_MEMCLK_RET_EN_N_DISABLE		(0x0002)
+#define U300_SYSCON_ECCR_EMIF_SDRCLK_RET_EN_N_DISABLE		(0x0001)
+/* PAD MUX Control register 1 (LOW) 16bit (R/W) */
+#define U300_SYSCON_PMC1LR					(0x007C)
+#define U300_SYSCON_PMC1LR_MASK					(0xFFFF)
+#define U300_SYSCON_PMC1LR_CDI_MASK				(0xC000)
+#define U300_SYSCON_PMC1LR_CDI_CDI				(0x0000)
+#define U300_SYSCON_PMC1LR_CDI_EMIF				(0x4000)
+#define U300_SYSCON_PMC1LR_CDI_GPIO				(0x8000)
+#define U300_SYSCON_PMC1LR_CDI_WCDMA				(0xC000)
+#define U300_SYSCON_PMC1LR_PDI_MASK				(0x3000)
+#define U300_SYSCON_PMC1LR_PDI_PDI				(0x0000)
+#define U300_SYSCON_PMC1LR_PDI_EGG				(0x1000)
+#define U300_SYSCON_PMC1LR_PDI_WCDMA				(0x3000)
+#define U300_SYSCON_PMC1LR_MMCSD_MASK				(0x0C00)
+#define U300_SYSCON_PMC1LR_MMCSD_MMCSD				(0x0000)
+#define U300_SYSCON_PMC1LR_MMCSD_MSPRO				(0x0400)
+#define U300_SYSCON_PMC1LR_MMCSD_DSP				(0x0800)
+#define U300_SYSCON_PMC1LR_MMCSD_WCDMA				(0x0C00)
+#define U300_SYSCON_PMC1LR_ETM_MASK				(0x0300)
+#define U300_SYSCON_PMC1LR_ETM_ACC				(0x0000)
+#define U300_SYSCON_PMC1LR_ETM_APP				(0x0100)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS2_MASK			(0x00C0)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS2_STATIC			(0x0000)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS2_NFIF			(0x0040)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS2_SDRAM			(0x0080)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS2_STATIC_2GB		(0x00C0)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS1_MASK			(0x0030)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS1_STATIC			(0x0000)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS1_NFIF			(0x0010)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS1_SDRAM			(0x0020)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS1_SEMI			(0x0030)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS0_MASK			(0x000C)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS0_STATIC			(0x0000)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS0_NFIF			(0x0004)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS0_SDRAM			(0x0008)
+#define U300_SYSCON_PMC1LR_EMIF_1_CS0_SEMI			(0x000C)
+#define U300_SYSCON_PMC1LR_EMIF_1_MASK				(0x0003)
+#define U300_SYSCON_PMC1LR_EMIF_1_STATIC			(0x0000)
+#define U300_SYSCON_PMC1LR_EMIF_1_SDRAM0			(0x0001)
+#define U300_SYSCON_PMC1LR_EMIF_1_SDRAM1			(0x0002)
+#define U300_SYSCON_PMC1LR_EMIF_1				(0x0003)
+/* PAD MUX Control register 2 (HIGH) 16bit (R/W) */
+#define U300_SYSCON_PMC1HR					(0x007E)
+#define U300_SYSCON_PMC1HR_MASK					(0xFFFF)
+#define U300_SYSCON_PMC1HR_MISC_2_MASK				(0xC000)
+#define U300_SYSCON_PMC1HR_MISC_2_APP_GPIO			(0x0000)
+#define U300_SYSCON_PMC1HR_MISC_2_MSPRO				(0x4000)
+#define U300_SYSCON_PMC1HR_MISC_2_DSP				(0x8000)
+#define U300_SYSCON_PMC1HR_MISC_2_AAIF				(0xC000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_2_MASK			(0x3000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_2_APP_GPIO			(0x0000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_2_NFIF			(0x1000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_2_DSP			(0x2000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_2_AAIF			(0x3000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_1_MASK			(0x0C00)
+#define U300_SYSCON_PMC1HR_APP_GPIO_1_APP_GPIO			(0x0000)
+#define U300_SYSCON_PMC1HR_APP_GPIO_1_MMC			(0x0400)
+#define U300_SYSCON_PMC1HR_APP_GPIO_1_DSP			(0x0800)
+#define U300_SYSCON_PMC1HR_APP_GPIO_1_AAIF			(0x0C00)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK			(0x0300)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_APP_GPIO		(0x0000)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI			(0x0100)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_AAIF			(0x0300)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK			(0x00C0)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_APP_GPIO		(0x0000)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI			(0x0040)
+#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_AAIF			(0x00C0)
+#define U300_SYSCON_PMC1HR_APP_SPI_2_MASK			(0x0030)
+#define U300_SYSCON_PMC1HR_APP_SPI_2_APP_GPIO			(0x0000)
+#define U300_SYSCON_PMC1HR_APP_SPI_2_SPI			(0x0010)
+#define U300_SYSCON_PMC1HR_APP_SPI_2_DSP			(0x0020)
+#define U300_SYSCON_PMC1HR_APP_SPI_2_AAIF			(0x0030)
+#define U300_SYSCON_PMC1HR_APP_UART0_2_MASK			(0x000C)
+#define U300_SYSCON_PMC1HR_APP_UART0_2_APP_GPIO			(0x0000)
+#define U300_SYSCON_PMC1HR_APP_UART0_2_UART0			(0x0004)
+#define U300_SYSCON_PMC1HR_APP_UART0_2_NFIF_CS			(0x0008)
+#define U300_SYSCON_PMC1HR_APP_UART0_2_AAIF			(0x000C)
+#define U300_SYSCON_PMC1HR_APP_UART0_1_MASK			(0x0003)
+#define U300_SYSCON_PMC1HR_APP_UART0_1_APP_GPIO			(0x0000)
+#define U300_SYSCON_PMC1HR_APP_UART0_1_UART0			(0x0001)
+#define U300_SYSCON_PMC1HR_APP_UART0_1_AAIF			(0x0003)
+/* Step one for killing the applications system 16bit (-/W) */
+#define U300_SYSCON_KA1R					(0x0080)
+#define U300_SYSCON_KA1R_MASK					(0xFFFF)
+#define U300_SYSCON_KA1R_VALUE					(0xFFFF)
+/* Step two for killing the application system 16bit (-/W) */
+#define U300_SYSCON_KA2R					(0x0084)
+#define U300_SYSCON_KA2R_MASK					(0xFFFF)
+#define U300_SYSCON_KA2R_VALUE					(0xFFFF)
+/* MMC/MSPRO frequency divider register 0 16bit (R/W) */
+#define U300_SYSCON_MMF0R					(0x90)
+#define U300_SYSCON_MMF0R_MASK					(0x00FF)
+#define U300_SYSCON_MMF0R_FREQ_0_HIGH_MASK			(0x00F0)
+#define U300_SYSCON_MMF0R_FREQ_0_LOW_MASK			(0x000F)
+/* MMC/MSPRO frequency divider register 1 16bit (R/W) */
+#define U300_SYSCON_MMF1R					(0x94)
+#define U300_SYSCON_MMF1R_MASK					(0x00FF)
+#define U300_SYSCON_MMF1R_FREQ_1_HIGH_MASK			(0x00F0)
+#define U300_SYSCON_MMF1R_FREQ_1_LOW_MASK			(0x000F)
+/* AAIF control register 16 bit (R/W) */
+#define U300_SYSCON_AAIFCR					(0x98)
+#define U300_SYSCON_AAIFCR_MASK					(0x0003)
+#define U300_SYSCON_AAIFCR_AASW_CTRL_MASK			(0x0003)
+#define U300_SYSCON_AAIFCR_AASW_CTRL_FUNCTIONAL			(0x0000)
+#define U300_SYSCON_AAIFCR_AASW_CTRL_MONITORING			(0x0001)
+#define U300_SYSCON_AAIFCR_AASW_CTRL_ACC_TO_EXT			(0x0002)
+#define U300_SYSCON_AAIFCR_AASW_CTRL_APP_TO_EXT			(0x0003)
+/* Clock control for the MMC and MSPRO blocks 16bit (R/W) */
+#define U300_SYSCON_MMCR					(0x9C)
+#define U300_SYSCON_MMCR_MASK					(0x0003)
+#define U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE			(0x0002)
+#define U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE			(0x0001)
+
+/* TODO: More SYSCON registers missing */
+#define U300_SYSCON_PMC3R					(0x10c)
+#define U300_SYSCON_PMC3R_APP_MISC_11_MASK			(0xc000)
+#define U300_SYSCON_PMC3R_APP_MISC_11_SPI			(0x4000)
+#define U300_SYSCON_PMC3R_APP_MISC_10_MASK			(0x3000)
+#define U300_SYSCON_PMC3R_APP_MISC_10_SPI			(0x1000)
+/* TODO: Missing other configs, I just added the SPI stuff */
+
+/* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */
+#define U300_SYSCON_S0CCR					(0x120)
+#define U300_SYSCON_S0CCR_FIELD_MASK				(0x43FF)
+#define U300_SYSCON_S0CCR_CLOCK_REQ				(0x4000)
+#define U300_SYSCON_S0CCR_CLOCK_INV				(0x0200)
+#define U300_SYSCON_S0CCR_CLOCK_FREQ_MASK			(0x01E0)
+#define U300_SYSCON_S0CCR_CLOCK_SELECT_MASK			(0x001E)
+#define U300_SYSCON_S0CCR_CLOCK_ENABLE				(0x0001)
+#define U300_SYSCON_S0CCR_SEL_MCLK				(0x8<<1)
+#define U300_SYSCON_S0CCR_SEL_ACC_FSM_CLK			(0xA<<1)
+#define U300_SYSCON_S0CCR_SEL_PLL60_48_CLK			(0xC<<1)
+#define U300_SYSCON_S0CCR_SEL_PLL60_60_CLK			(0xD<<1)
+#define U300_SYSCON_S0CCR_SEL_ACC_PLL208_CLK			(0xE<<1)
+#define U300_SYSCON_S0CCR_SEL_APP_PLL13_CLK			(0x0<<1)
+#define U300_SYSCON_S0CCR_SEL_APP_FSM_CLK			(0x2<<1)
+#define U300_SYSCON_S0CCR_SEL_RTC_CLK				(0x4<<1)
+#define U300_SYSCON_S0CCR_SEL_APP_PLL208_CLK			(0x6<<1)
+/* SYS_1_CLK_CONTROL second clock control 16 bit (R/W) */
+#define U300_SYSCON_S1CCR					(0x124)
+#define U300_SYSCON_S1CCR_FIELD_MASK				(0x43FF)
+#define U300_SYSCON_S1CCR_CLOCK_REQ				(0x4000)
+#define U300_SYSCON_S1CCR_CLOCK_INV				(0x0200)
+#define U300_SYSCON_S1CCR_CLOCK_FREQ_MASK			(0x01E0)
+#define U300_SYSCON_S1CCR_CLOCK_SELECT_MASK			(0x001E)
+#define U300_SYSCON_S1CCR_CLOCK_ENABLE				(0x0001)
+#define U300_SYSCON_S1CCR_SEL_MCLK				(0x8<<1)
+#define U300_SYSCON_S1CCR_SEL_ACC_FSM_CLK			(0xA<<1)
+#define U300_SYSCON_S1CCR_SEL_PLL60_48_CLK			(0xC<<1)
+#define U300_SYSCON_S1CCR_SEL_PLL60_60_CLK			(0xD<<1)
+#define U300_SYSCON_S1CCR_SEL_ACC_PLL208_CLK			(0xE<<1)
+#define U300_SYSCON_S1CCR_SEL_ACC_PLL13_CLK			(0x0<<1)
+#define U300_SYSCON_S1CCR_SEL_APP_FSM_CLK			(0x2<<1)
+#define U300_SYSCON_S1CCR_SEL_RTC_CLK				(0x4<<1)
+#define U300_SYSCON_S1CCR_SEL_APP_PLL208_CLK			(0x6<<1)
+/* SYS_2_CLK_CONTROL third clock contol 16 bit (R/W) */
+#define U300_SYSCON_S2CCR					(0x128)
+#define U300_SYSCON_S2CCR_FIELD_MASK				(0xC3FF)
+#define U300_SYSCON_S2CCR_CLK_STEAL				(0x8000)
+#define U300_SYSCON_S2CCR_CLOCK_REQ				(0x4000)
+#define U300_SYSCON_S2CCR_CLOCK_INV				(0x0200)
+#define U300_SYSCON_S2CCR_CLOCK_FREQ_MASK			(0x01E0)
+#define U300_SYSCON_S2CCR_CLOCK_SELECT_MASK			(0x001E)
+#define U300_SYSCON_S2CCR_CLOCK_ENABLE				(0x0001)
+#define U300_SYSCON_S2CCR_SEL_MCLK				(0x8<<1)
+#define U300_SYSCON_S2CCR_SEL_ACC_FSM_CLK			(0xA<<1)
+#define U300_SYSCON_S2CCR_SEL_PLL60_48_CLK			(0xC<<1)
+#define U300_SYSCON_S2CCR_SEL_PLL60_60_CLK			(0xD<<1)
+#define U300_SYSCON_S2CCR_SEL_ACC_PLL208_CLK			(0xE<<1)
+#define U300_SYSCON_S2CCR_SEL_ACC_PLL13_CLK			(0x0<<1)
+#define U300_SYSCON_S2CCR_SEL_APP_FSM_CLK			(0x2<<1)
+#define U300_SYSCON_S2CCR_SEL_RTC_CLK				(0x4<<1)
+#define U300_SYSCON_S2CCR_SEL_APP_PLL208_CLK			(0x6<<1)
+/* SYS_MISC_CONTROL, miscellaneous 16bit (R/W) */
+#define U300_SYSCON_MCR						(0x12c)
+#define U300_SYSCON_MCR_FIELD_MASK				(0x00FF)
+#define U300_SYSCON_MCR_PMGEN_CR_4_MASK				(0x00C0)
+#define U300_SYSCON_MCR_PMGEN_CR_4_GPIO				(0x0000)
+#define U300_SYSCON_MCR_PMGEN_CR_4_SPI				(0x0040)
+#define U300_SYSCON_MCR_PMGEN_CR_4_AAIF				(0x00C0)
+#define U300_SYSCON_MCR_PMGEN_CR_2_MASK				(0x0030)
+#define U300_SYSCON_MCR_PMGEN_CR_2_GPIO				(0x0000)
+#define U300_SYSCON_MCR_PMGEN_CR_2_EMIF_1_STATIC		(0x0010)
+#define U300_SYSCON_MCR_PMGEN_CR_2_DSP				(0x0020)
+#define U300_SYSCON_MCR_PMGEN_CR_2_AAIF				(0x0030)
+#define U300_SYSCON_MCR_PMGEN_CR_0_MASK				(0x000C)
+#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_1_SDRAM_M1		(0x0000)
+#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_1_SDRAM_M2		(0x0004)
+#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_1_SDRAM_M3		(0x0008)
+#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_0_SDRAM			(0x000C)
+#define U300_SYSCON_MCR_PM1G_MODE_ENABLE			(0x0002)
+#define U300_SYSCON_MCR_PMTG5_MODE_ENABLE			(0x0001)
+/* Clock activity observability register 0 */
+#define U300_SYSCON_C0OAR					(0x140)
+#define U300_SYSCON_C0OAR_MASK					(0xFFFF)
+#define U300_SYSCON_C0OAR_VALUE					(0xFFFF)
+#define U300_SYSCON_C0OAR_BT_H_CLK				(0x8000)
+#define U300_SYSCON_C0OAR_ASPB_P_CLK				(0x4000)
+#define U300_SYSCON_C0OAR_APP_SEMI_H_CLK			(0x2000)
+#define U300_SYSCON_C0OAR_APP_SEMI_CLK				(0x1000)
+#define U300_SYSCON_C0OAR_APP_MMC_MSPRO_CLK			(0x0800)
+#define U300_SYSCON_C0OAR_APP_I2S1_CLK				(0x0400)
+#define U300_SYSCON_C0OAR_APP_I2S0_CLK				(0x0200)
+#define U300_SYSCON_C0OAR_APP_CPU_CLK				(0x0100)
+#define U300_SYSCON_C0OAR_APP_52_CLK				(0x0080)
+#define U300_SYSCON_C0OAR_APP_208_CLK				(0x0040)
+#define U300_SYSCON_C0OAR_APP_104_CLK				(0x0020)
+#define U300_SYSCON_C0OAR_APEX_CLK				(0x0010)
+#define U300_SYSCON_C0OAR_AHPB_M_H_CLK				(0x0008)
+#define U300_SYSCON_C0OAR_AHB_CLK				(0x0004)
+#define U300_SYSCON_C0OAR_AFPB_P_CLK				(0x0002)
+#define U300_SYSCON_C0OAR_AAIF_CLK				(0x0001)
+/* Clock activity observability register 1 */
+#define U300_SYSCON_C1OAR					(0x144)
+#define U300_SYSCON_C1OAR_MASK					(0x3FFE)
+#define U300_SYSCON_C1OAR_VALUE					(0x3FFE)
+#define U300_SYSCON_C1OAR_NFIF_F_CLK				(0x2000)
+#define U300_SYSCON_C1OAR_MSPRO_CLK				(0x1000)
+#define U300_SYSCON_C1OAR_MMC_P_CLK				(0x0800)
+#define U300_SYSCON_C1OAR_MMC_CLK				(0x0400)
+#define U300_SYSCON_C1OAR_KP_P_CLK				(0x0200)
+#define U300_SYSCON_C1OAR_I2C1_P_CLK				(0x0100)
+#define U300_SYSCON_C1OAR_I2C0_P_CLK				(0x0080)
+#define U300_SYSCON_C1OAR_GPIO_CLK				(0x0040)
+#define U300_SYSCON_C1OAR_EMIF_MPMC_CLK				(0x0020)
+#define U300_SYSCON_C1OAR_EMIF_H_CLK				(0x0010)
+#define U300_SYSCON_C1OAR_EVHIST_CLK				(0x0008)
+#define U300_SYSCON_C1OAR_PPM_CLK				(0x0004)
+#define U300_SYSCON_C1OAR_DMA_CLK				(0x0002)
+/* Clock activity observability register 2 */
+#define U300_SYSCON_C2OAR					(0x148)
+#define U300_SYSCON_C2OAR_MASK					(0x0FFF)
+#define U300_SYSCON_C2OAR_VALUE					(0x0FFF)
+#define U300_SYSCON_C2OAR_XGAM_CDI_CLK				(0x0800)
+#define U300_SYSCON_C2OAR_XGAM_CLK				(0x0400)
+#define U300_SYSCON_C2OAR_VC_H_CLK				(0x0200)
+#define U300_SYSCON_C2OAR_VC_CLK				(0x0100)
+#define U300_SYSCON_C2OAR_UA_P_CLK				(0x0080)
+#define U300_SYSCON_C2OAR_TMR1_CLK				(0x0040)
+#define U300_SYSCON_C2OAR_TMR0_CLK				(0x0020)
+#define U300_SYSCON_C2OAR_SPI_P_CLK				(0x0010)
+#define U300_SYSCON_C2OAR_PCM_I2S1_CORE_CLK			(0x0008)
+#define U300_SYSCON_C2OAR_PCM_I2S1_CLK				(0x0004)
+#define U300_SYSCON_C2OAR_PCM_I2S0_CORE_CLK			(0x0002)
+#define U300_SYSCON_C2OAR_PCM_I2S0_CLK				(0x0001)
+
+/* Chip ID register 16bit (R/-) */
+#define U300_SYSCON_CIDR					(0x400)
+/* Video IRQ clear 16bit (R/W) */
+#define U300_SYSCON_VICR					(0x404)
+#define U300_SYSCON_VICR_VIDEO1_IRQ_CLEAR_ENABLE		(0x0002)
+#define U300_SYSCON_VICR_VIDEO0_IRQ_CLEAR_ENABLE		(0x0001)
+/* SMCR */
+#define U300_SYSCON_SMCR					(0x4d0)
+#define U300_SYSCON_SMCR_FIELD_MASK				(0x000e)
+#define U300_SYSCON_SMCR_SEMI_SREFACK_IND			(0x0008)
+#define U300_SYSCON_SMCR_SEMI_SREFREQ_ENABLE			(0x0004)
+#define U300_SYSCON_SMCR_SEMI_EXT_BOOT_MODE_ENABLE		(0x0002)
+/* CPU_SW_DBGEN Software Debug Enable 16bit (R/W) */
+#define U300_SYSCON_CSDR					(0x4f0)
+#define U300_SYSCON_CSDR_SW_DEBUG_ENABLE			(0x0001)
+/* PRINT_CONTROL Print Control 16bit (R/-) */
+#define U300_SYSCON_PCR						(0x4f8)
+#define U300_SYSCON_PCR_SERV_IND				(0x0001)
+/* BOOT_CONTROL 16bit (R/-) */
+#define U300_SYSCON_BCR						(0x4fc)
+#define U300_SYSCON_BCR_ACC_CPU_SUBSYS_VINITHI_IND		(0x0400)
+#define U300_SYSCON_BCR_APP_CPU_SUBSYS_VINITHI_IND		(0x0200)
+#define U300_SYSCON_BCR_EXTRA_BOOT_OPTION_MASK			(0x01FC)
+#define U300_SYSCON_BCR_APP_BOOT_SERV_MASK			(0x0003)
+
+
+/* CPU clock defines */
+/**
+ * CPU high frequency in MHz
+ */
+#define SYSCON_CPU_CLOCK_HIGH    208
+/**
+ * CPU medium frequency in MHz
+ */
+#define SYSCON_CPU_CLOCK_MEDIUM  104
+/**
+ * CPU low frequency in MHz
+ */
+#define SYSCON_CPU_CLOCK_LOW      13
+
+/* EMIF clock defines */
+/**
+ * EMIF high frequency in MHz
+ */
+#define SYSCON_EMIF_CLOCK_HIGH   104
+/**
+ * EMIF medium frequency in MHz
+ */
+#define SYSCON_EMIF_CLOCK_MEDIUM 104
+/**
+ * EMIF low frequency in MHz
+ */
+#define SYSCON_EMIF_CLOCK_LOW     13
+
+/* AHB clock defines */
+/**
+ * AHB high frequency in MHz
+ */
+#define SYSCON_AHB_CLOCK_HIGH     52
+/**
+ * AHB medium frequency in MHz
+ */
+#define SYSCON_AHB_CLOCK_MEDIUM   52
+/**
+ * AHB low frequency in MHz
+ */
+#define SYSCON_AHB_CLOCK_LOW       7  /* i.e 13/2=6.5MHz */
+
+enum syscon_busmaster {
+  SYSCON_BM_DMAC,
+  SYSCON_BM_XGAM,
+  SYSCON_BM_VIDEO_ENC
+};
+
+/*
+ * Note that this array must match the order of the array "clk_reg"
+ * in syscon.c
+ */
+enum syscon_clk {
+  SYSCON_CLKCONTROL_SLOW_BRIDGE,
+  SYSCON_CLKCONTROL_UART,
+  SYSCON_CLKCONTROL_BTR,
+  SYSCON_CLKCONTROL_EH,
+  SYSCON_CLKCONTROL_GPIO,
+  SYSCON_CLKCONTROL_KEYPAD,
+  SYSCON_CLKCONTROL_APP_TIMER,
+  SYSCON_CLKCONTROL_ACC_TIMER,
+  SYSCON_CLKCONTROL_FAST_BRIDGE,
+  SYSCON_CLKCONTROL_I2C0,
+  SYSCON_CLKCONTROL_I2C1,
+  SYSCON_CLKCONTROL_I2S0,
+  SYSCON_CLKCONTROL_I2S1,
+  SYSCON_CLKCONTROL_MMC,
+  SYSCON_CLKCONTROL_SPI,
+  SYSCON_CLKCONTROL_I2S0_CORE,
+  SYSCON_CLKCONTROL_I2S1_CORE,
+  SYSCON_CLKCONTROL_AAIF,
+  SYSCON_CLKCONTROL_AHB,
+  SYSCON_CLKCONTROL_APEX,
+  SYSCON_CLKCONTROL_CPU,
+  SYSCON_CLKCONTROL_DMA,
+  SYSCON_CLKCONTROL_EMIF,
+  SYSCON_CLKCONTROL_NAND_IF,
+  SYSCON_CLKCONTROL_VIDEO_ENC,
+  SYSCON_CLKCONTROL_XGAM,
+  SYSCON_CLKCONTROL_SEMI,
+  SYSCON_CLKCONTROL_AHB_SUBSYS,
+  SYSCON_CLKCONTROL_MSPRO
+};
+
+enum syscon_sysclk_mode {
+  SYSCON_SYSCLK_DISABLED,
+  SYSCON_SYSCLK_M_CLK,
+  SYSCON_SYSCLK_ACC_FSM,
+  SYSCON_SYSCLK_PLL60_48,
+  SYSCON_SYSCLK_PLL60_60,
+  SYSCON_SYSCLK_ACC_PLL208,
+  SYSCON_SYSCLK_APP_PLL13,
+  SYSCON_SYSCLK_APP_FSM,
+  SYSCON_SYSCLK_RTC,
+  SYSCON_SYSCLK_APP_PLL208
+};
+
+enum syscon_sysclk_req {
+  SYSCON_SYSCLKREQ_DISABLED,
+  SYSCON_SYSCLKREQ_ACTIVE_LOW
+};
+
+enum syscon_clk_mode {
+  SYSCON_CLKMODE_OFF,
+  SYSCON_CLKMODE_DEFAULT,
+  SYSCON_CLKMODE_LOW,
+  SYSCON_CLKMODE_MEDIUM,
+  SYSCON_CLKMODE_HIGH,
+  SYSCON_CLKMODE_PERMANENT,
+  SYSCON_CLKMODE_ON,
+};
+
+enum syscon_call_mode {
+  SYSCON_CLKCALL_NOWAIT,
+  SYSCON_CLKCALL_WAIT,
+};
+
+int syscon_dc_on(bool keep_power_on);
+int syscon_set_busmaster_active_state(enum syscon_busmaster busmaster,
+				      bool active);
+bool syscon_get_busmaster_active_state(void);
+int syscon_set_sleep_mask(enum syscon_clk,
+			  bool sleep_ctrl);
+int syscon_config_sysclk(u32 sysclk,
+			 enum syscon_sysclk_mode sysclkmode,
+			 bool inverse,
+			 u32 divisor,
+			 enum syscon_sysclk_req sysclkreq);
+bool syscon_can_turn_off_semi_clock(void);
+
+/* This function is restricted to core.c */
+int syscon_request_normal_power(bool req);
+
+/* This function is restricted to be used by platform_speed.c */
+int syscon_speed_request(enum syscon_call_mode wait_mode,
+			 enum syscon_clk_mode req_clk_mode);
+#endif /* __MACH_SYSCON_H */
diff --git a/arch/arm/mach-u300/include/mach/u300-regs.h b/arch/arm/mach-u300/include/mach/u300-regs.h
new file mode 100644
index 0000000..88333df
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/u300-regs.h
@@ -0,0 +1,187 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/u300-regs.h
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Basic register address definitions in physical memory and
+ * some block defintions for core devices like the timer.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#ifndef __MACH_U300_REGS_H
+#define __MACH_U300_REGS_H
+
+/*
+ * These are the large blocks of memory allocated for I/O.
+ * the defines are used for setting up the I/O memory mapping.
+ */
+
+/* NAND Flash CS0 */
+#define U300_NAND_CS0_PHYS_BASE		0x80000000
+#define U300_NAND_CS0_VIRT_BASE		0xff040000
+
+/* NFIF */
+#define U300_NAND_IF_PHYS_BASE		0x9f800000
+#define U300_NAND_IF_VIRT_BASE		0xff030000
+
+/* AHB Peripherals */
+#define U300_AHB_PER_PHYS_BASE		0xa0000000
+#define U300_AHB_PER_VIRT_BASE		0xff010000
+
+/* FAST Peripherals */
+#define U300_FAST_PER_PHYS_BASE		0xc0000000
+#define U300_FAST_PER_VIRT_BASE		0xff020000
+
+/* SLOW Peripherals */
+#define U300_SLOW_PER_PHYS_BASE		0xc0010000
+#define U300_SLOW_PER_VIRT_BASE		0xff000000
+
+/* Boot ROM */
+#define U300_BOOTROM_PHYS_BASE		0xffff0000
+#define U300_BOOTROM_VIRT_BASE		0xffff0000
+
+/* SEMI config base */
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SEMI_CONFIG_BASE		0x2FFE0000
+#else
+#define U300_SEMI_CONFIG_BASE		0x30000000
+#endif
+
+/*
+ * All the following peripherals are specified at their PHYSICAL address,
+ * so if you need to access them (in the kernel), you MUST use the macros
+ * defined in <asm/io.h> to map to the IO_ADDRESS_AHB() IO_ADDRESS_FAST()
+ * etc.
+ */
+
+/*
+ * AHB peripherals
+ */
+
+/* AHB Peripherals Bridge Controller */
+#define U300_AHB_BRIDGE_BASE		(U300_AHB_PER_PHYS_BASE+0x0000)
+
+/* Vectored Interrupt Controller 0, servicing 32 interrupts */
+#define U300_INTCON0_BASE		(U300_AHB_PER_PHYS_BASE+0x1000)
+#define U300_INTCON0_VBASE		(U300_AHB_PER_VIRT_BASE+0x1000)
+
+/* Vectored Interrupt Controller 1, servicing 32 interrupts */
+#define U300_INTCON1_BASE		(U300_AHB_PER_PHYS_BASE+0x2000)
+#define U300_INTCON1_VBASE		(U300_AHB_PER_VIRT_BASE+0x2000)
+
+/* Memory Stick Pro (MSPRO) controller */
+#define U300_MSPRO_BASE			(U300_AHB_PER_PHYS_BASE+0x3000)
+
+/* EMIF Configuration Area */
+#define U300_EMIF_CFG_BASE		(U300_AHB_PER_PHYS_BASE+0x4000)
+
+
+/*
+ * FAST peripherals
+ */
+
+/* FAST bridge control */
+#define U300_FAST_BRIDGE_BASE		(U300_FAST_PER_PHYS_BASE+0x0000)
+
+/* MMC/SD controller */
+#define U300_MMCSD_BASE			(U300_FAST_PER_PHYS_BASE+0x1000)
+
+/* PCM I2S0 controller */
+#define U300_PCM_I2S0_BASE		(U300_FAST_PER_PHYS_BASE+0x2000)
+
+/* PCM I2S1 controller */
+#define U300_PCM_I2S1_BASE		(U300_FAST_PER_PHYS_BASE+0x3000)
+
+/* I2C0 controller */
+#define U300_I2C0_BASE			(U300_FAST_PER_PHYS_BASE+0x4000)
+
+/* I2C1 controller */
+#define U300_I2C1_BASE			(U300_FAST_PER_PHYS_BASE+0x5000)
+
+/* SPI controller */
+#define U300_SPI_BASE			(U300_FAST_PER_PHYS_BASE+0x6000)
+
+#ifdef CONFIG_MACH_U300_BS335
+/* Fast UART1 on U335 only */
+#define U300_UART1_BASE			(U300_SLOW_PER_PHYS_BASE+0x7000)
+#endif
+
+/*
+ * SLOW peripherals
+ */
+
+/* SLOW bridge control */
+#define U300_SLOW_BRIDGE_BASE		(U300_SLOW_PER_PHYS_BASE)
+
+/* SYSCON */
+#define U300_SYSCON_BASE		(U300_SLOW_PER_PHYS_BASE+0x1000)
+#define U300_SYSCON_VBASE		(U300_SLOW_PER_VIRT_BASE+0x1000)
+
+/* Watchdog */
+#define U300_WDOG_BASE			(U300_SLOW_PER_PHYS_BASE+0x2000)
+
+/* UART0 */
+#define U300_UART0_BASE			(U300_SLOW_PER_PHYS_BASE+0x3000)
+
+/* APP side special timer */
+#define U300_TIMER_APP_BASE		(U300_SLOW_PER_PHYS_BASE+0x4000)
+#define U300_TIMER_APP_VBASE		(U300_SLOW_PER_VIRT_BASE+0x4000)
+
+/* Keypad */
+#define U300_KEYPAD_BASE		(U300_SLOW_PER_PHYS_BASE+0x5000)
+
+/* GPIO */
+#define U300_GPIO_BASE			(U300_SLOW_PER_PHYS_BASE+0x6000)
+
+/* RTC */
+#define U300_RTC_BASE			(U300_SLOW_PER_PHYS_BASE+0x7000)
+
+/* Bus tracer */
+#define U300_BUSTR_BASE			(U300_SLOW_PER_PHYS_BASE+0x8000)
+
+/* Event handler (hardware queue) */
+#define U300_EVHIST_BASE		(U300_SLOW_PER_PHYS_BASE+0x9000)
+
+/* Genric Timer */
+#define U300_TIMER_BASE			(U300_SLOW_PER_PHYS_BASE+0xa000)
+
+/* PPM */
+#define U300_PPM_BASE			(U300_SLOW_PER_PHYS_BASE+0xb000)
+
+
+/*
+ * REST peripherals
+ */
+
+/* ISP (image signal processor) is only available in U335 */
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_ISP_BASE			(0xA0008000)
+#endif
+
+/* DMA Controller base */
+#define U300_DMAC_BASE			(0xC0020000)
+
+/* MSL Base */
+#define U300_MSL_BASE			(0xc0022000)
+
+/* APEX Base */
+#define U300_APEX_BASE			(0xc0030000)
+
+/* Video Encoder Base */
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_VIDEOENC_BASE		(0xc0080000)
+#else
+#define U300_VIDEOENC_BASE		(0xc0040000)
+#endif
+
+/* XGAM Base */
+#define U300_XGAM_BASE			(0xd0000000)
+
+/*
+ * Virtual accessor macros for static devices
+ */
+
+
+#endif
-- 
cgit v0.10.2


From bd41b99d4661e775ff152f2842782c43dbb30a59 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 23 Apr 2009 21:15:04 +0100
Subject: [ARM] 5471/2: U300 GPIO and PADMUX support

This adds GPIO and PADMUX headers and implementation for
the U300 platform. This is an implementation in isolation
that depend on later patches in this series to plug into
the framework.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/gpio.c b/arch/arm/mach-u300/gpio.c
new file mode 100644
index 0000000..2d5ae8e
--- /dev/null
+++ b/arch/arm/mach-u300/gpio.c
@@ -0,0 +1,701 @@
+/*
+ *
+ * arch/arm/mach-u300/gpio.c
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * U300 GPIO module.
+ * This can driver either of the two basic GPIO cores
+ * available in the U300 platforms:
+ * COH 901 335   - Used in DB3150 (U300 1.0) and DB3200 (U330 1.0)
+ * COH 901 571/3 - Used in DB3210 (U365 2.0) and DB3350 (U335 1.0)
+ * Notice that you also have inline macros in <asm-arch/gpio.h>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+/* Need access to SYSCON registers for PADmuxing */
+#include <mach/syscon.h>
+
+#include "padmux.h"
+
+/* Reference to GPIO block clock */
+static struct clk *clk;
+
+/* Memory resource */
+static struct resource *memres;
+static void __iomem *virtbase;
+
+struct u300_gpio_port {
+	const char *name;
+	int irq;
+	int number;
+};
+
+
+static struct u300_gpio_port gpio_ports[] = {
+	{
+		.name = "gpio0",
+		.number = 0,
+	},
+	{
+		.name = "gpio1",
+		.number = 1,
+	},
+	{
+		.name = "gpio2",
+		.number = 2,
+	},
+#ifdef U300_COH901571_3
+	{
+		.name = "gpio3",
+		.number = 3,
+	},
+	{
+		.name = "gpio4",
+		.number = 4,
+	},
+#ifdef CONFIG_MACH_U300_BS335
+	{
+		.name = "gpio5",
+		.number = 5,
+	},
+	{
+		.name = "gpio6",
+		.number = 6,
+	},
+#endif
+#endif
+
+};
+
+
+#ifdef U300_COH901571_3
+
+/* Default input value */
+#define DEFAULT_OUTPUT_LOW   0
+#define DEFAULT_OUTPUT_HIGH  1
+
+/* GPIO Pull-Up status */
+#define DISABLE_PULL_UP  0
+#define ENABLE_PULL_UP  1
+
+#define GPIO_NOT_USED 0
+#define GPIO_IN       1
+#define GPIO_OUT      2
+
+struct u300_gpio_configuration_data {
+	unsigned char pin_usage;
+	unsigned char default_output_value;
+	unsigned char pull_up;
+};
+
+/* Initial configuration */
+const struct u300_gpio_configuration_data
+u300_gpio_config[U300_GPIO_NUM_PORTS][U300_GPIO_PINS_PER_PORT] = {
+#ifdef CONFIG_MACH_U300_BS335
+	/* Port 0, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_HIGH,  DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 1, pins 0-7 */
+	{
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_HIGH,  DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 2, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP}
+	},
+	/* Port 3, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 4, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 5, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 6, pind 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	}
+#endif
+
+#ifdef CONFIG_MACH_U300_BS365
+	/* Port 0, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 1, pins 0-7 */
+	{
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_HIGH,  DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP}
+	},
+	/* Port 2, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,   DISABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP}
+	},
+	/* Port 3, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP}
+	},
+	/* Port 4, pins 0-7 */
+	{
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_IN,  DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		/* These 4 pins doesn't exist on DB3210 */
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP},
+		{GPIO_OUT, DEFAULT_OUTPUT_LOW,    ENABLE_PULL_UP}
+	}
+#endif
+};
+#endif
+
+
+/* No users == we can power down GPIO */
+static int gpio_users;
+
+struct gpio_struct {
+	int (*callback)(void *);
+	void *data;
+	int users;
+};
+
+static struct gpio_struct gpio_pin[U300_GPIO_MAX];
+
+/*
+ * Let drivers register callback in order to get notified when there is
+ * an interrupt on the gpio pin
+ */
+int gpio_register_callback(unsigned gpio, int (*func)(void *arg), void *data)
+{
+	if (gpio_pin[gpio].callback)
+		printk(KERN_WARNING "GPIO: %s: WARNING: callback already " \
+		       "registered for gpio pin#%d\n", __func__, gpio);
+	gpio_pin[gpio].callback = func;
+	gpio_pin[gpio].data = data;
+
+	return 0;
+}
+EXPORT_SYMBOL(gpio_register_callback);
+
+int gpio_unregister_callback(unsigned gpio)
+{
+	if (!gpio_pin[gpio].callback)
+		printk(KERN_WARNING "GPIO: %s: WARNING: callback already " \
+		       "unregistered for gpio pin#%d\n", __func__, gpio);
+	gpio_pin[gpio].callback = NULL;
+	gpio_pin[gpio].data = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(gpio_unregister_callback);
+
+int gpio_request(unsigned gpio, const char *label)
+{
+	if (gpio_pin[gpio].users)
+		return -EINVAL;
+	else
+		gpio_pin[gpio].users++;
+
+	gpio_users++;
+
+	return 0;
+}
+EXPORT_SYMBOL(gpio_request);
+
+void gpio_free(unsigned gpio)
+{
+	gpio_users--;
+	gpio_pin[gpio].users--;
+	if (unlikely(gpio_pin[gpio].users < 0)) {
+		printk(KERN_WARNING "GPIO: Warning: gpio#%d release mismatch\n",
+				gpio);
+		gpio_pin[gpio].users = 0;
+	}
+
+	return;
+}
+EXPORT_SYMBOL(gpio_free);
+
+/* This returns zero or nonzero */
+int gpio_get_value(unsigned gpio)
+{
+	return readl(virtbase + U300_GPIO_PXPDIR +
+	  PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING) & (1 << (gpio & 0x07));
+}
+EXPORT_SYMBOL(gpio_get_value);
+
+/*
+ * We hope that the compiler will optimize away the unused branch
+ * in case "value" is a constant
+ */
+void gpio_set_value(unsigned gpio, int value)
+{
+	u32 val;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (value) {
+		/* set */
+		val = readl(virtbase + U300_GPIO_PXPDOR +
+		  PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING)
+		  & (1 << (gpio & 0x07));
+		writel(val | (1 << (gpio & 0x07)), virtbase +
+		  U300_GPIO_PXPDOR +
+		  PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING);
+	} else {
+		/* clear */
+		val = readl(virtbase + U300_GPIO_PXPDOR +
+		  PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING)
+		  & (1 << (gpio & 0x07));
+		writel(val & ~(1 << (gpio & 0x07)), virtbase +
+		  U300_GPIO_PXPDOR +
+		  PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING);
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(gpio_set_value);
+
+int gpio_direction_input(unsigned gpio)
+{
+	unsigned long flags;
+	u32 val;
+
+	if (gpio > U300_GPIO_MAX)
+		return -EINVAL;
+
+	local_irq_save(flags);
+	val = readl(virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	/* Mask out this pin*/
+	val &= ~(U300_GPIO_PXPCR_PIN_MODE_MASK << ((gpio & 0x07) << 1));
+	/* This is not needed since it sets the bits to zero.*/
+	/* val |= (U300_GPIO_PXPCR_PIN_MODE_INPUT << (gpio*2)); */
+	writel(val, virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	local_irq_restore(flags);
+	return 0;
+}
+EXPORT_SYMBOL(gpio_direction_input);
+
+int gpio_direction_output(unsigned gpio, int value)
+{
+	unsigned long flags;
+	u32 val;
+
+	if (gpio > U300_GPIO_MAX)
+		return -EINVAL;
+
+	local_irq_save(flags);
+	val = readl(virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	/* Mask out this pin */
+	val &= ~(U300_GPIO_PXPCR_PIN_MODE_MASK << ((gpio & 0x07) << 1));
+	/*
+	 * FIXME: configure for push/pull, open drain or open source per pin
+	 * in setup. The current driver will only support push/pull.
+	 */
+	val |= (U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL
+			<< ((gpio & 0x07) << 1));
+	writel(val, virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	gpio_set_value(gpio, value);
+	local_irq_restore(flags);
+	return 0;
+}
+EXPORT_SYMBOL(gpio_direction_output);
+
+/*
+ * Enable an IRQ, edge is rising edge (!= 0) or falling edge (==0).
+ */
+void enable_irq_on_gpio_pin(unsigned gpio, int edge)
+{
+	u32 val;
+	unsigned long flags;
+	local_irq_save(flags);
+
+	val = readl(virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	val |= (1 << (gpio & 0x07));
+	writel(val, virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	val = readl(virtbase + U300_GPIO_PXICR + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	if (edge)
+		val |= (1 << (gpio & 0x07));
+	else
+		val &= ~(1 << (gpio & 0x07));
+	writel(val, virtbase + U300_GPIO_PXICR + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(enable_irq_on_gpio_pin);
+
+void disable_irq_on_gpio_pin(unsigned gpio)
+{
+	u32 val;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	val = readl(virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	val &= ~(1 << (gpio & 0x07));
+	writel(val, virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) *
+				U300_GPIO_PORTX_SPACING);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(disable_irq_on_gpio_pin);
+
+/* Enable (value == 0) or disable (value == 1) internal pullup */
+void gpio_pullup(unsigned gpio, int value)
+{
+	u32 val;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (value) {
+		val = readl(virtbase + U300_GPIO_PXPER + PIN_TO_PORT(gpio) *
+					U300_GPIO_PORTX_SPACING);
+		writel(val | (1 << (gpio & 0x07)), virtbase + U300_GPIO_PXPER +
+				PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING);
+	} else {
+		val = readl(virtbase + U300_GPIO_PXPER + PIN_TO_PORT(gpio) *
+					U300_GPIO_PORTX_SPACING);
+		writel(val & ~(1 << (gpio & 0x07)), virtbase + U300_GPIO_PXPER +
+				PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING);
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(gpio_pullup);
+
+static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
+{
+	struct u300_gpio_port *port = dev_id;
+	u32 val;
+	int pin;
+
+	/* Read event register */
+	val = readl(virtbase + U300_GPIO_PXIEV + port->number *
+				U300_GPIO_PORTX_SPACING);
+	/* Mask with enable register */
+	val &= readl(virtbase + U300_GPIO_PXIEV + port->number *
+				U300_GPIO_PORTX_SPACING);
+	/* Mask relevant bits */
+	val &= U300_GPIO_PXIEV_ALL_IRQ_EVENT_MASK;
+	/* ACK IRQ (clear event) */
+	writel(val, virtbase + U300_GPIO_PXIEV + port->number *
+				U300_GPIO_PORTX_SPACING);
+	/* Print message */
+	while (val != 0) {
+		unsigned gpio;
+
+		pin = __ffs(val);
+		/* mask off this pin */
+		val &= ~(1 << pin);
+		gpio = (port->number << 3) + pin;
+
+		if (gpio_pin[gpio].callback)
+			(void)gpio_pin[gpio].callback(gpio_pin[gpio].data);
+		else
+			printk(KERN_DEBUG "GPIO: Stray GPIO IRQ on line %d\n",
+			       gpio);
+	}
+	return IRQ_HANDLED;
+}
+
+static void gpio_set_initial_values(void)
+{
+#ifdef U300_COH901571_3
+	int i, j;
+	unsigned long flags;
+	u32 val;
+
+	/* Write default values to all pins */
+	for (i = 0; i < U300_GPIO_NUM_PORTS; i++) {
+		val = 0;
+		for (j = 0; j < 8; j++)
+			val |= (u32) (u300_gpio_config[i][j].default_output_value != DEFAULT_OUTPUT_LOW) << j;
+		local_irq_save(flags);
+		writel(val, virtbase + U300_GPIO_PXPDOR + i * U300_GPIO_PORTX_SPACING);
+		local_irq_restore(flags);
+	}
+
+	/*
+	 * Put all pins that are set to either 'GPIO_OUT' or 'GPIO_NOT_USED'
+	 * to output and 'GPIO_IN' to input for each port. And initalize
+	 * default value on outputs.
+	 */
+	for (i = 0; i < U300_GPIO_NUM_PORTS; i++) {
+		for (j = 0; j < U300_GPIO_PINS_PER_PORT; j++) {
+			local_irq_save(flags);
+			val = readl(virtbase + U300_GPIO_PXPCR +
+					 i * U300_GPIO_PORTX_SPACING);
+			/* Mask out this pin */
+			val &= ~(U300_GPIO_PXPCR_PIN_MODE_MASK << (j << 1));
+
+			if (u300_gpio_config[i][j].pin_usage != GPIO_IN)
+				val |= (U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL << (j << 1));
+			writel(val, virtbase + U300_GPIO_PXPCR +
+					 i * U300_GPIO_PORTX_SPACING);
+			local_irq_restore(flags);
+		}
+	}
+
+	/* Enable or disable the internal pull-ups in the GPIO ASIC block */
+	for (i = 0; i < U300_GPIO_MAX; i++) {
+		val = 0;
+		for (j = 0; j < 8; j++)
+			val |= (u32)((u300_gpio_config[i][j].pull_up == DISABLE_PULL_UP)) << j;
+		local_irq_save(flags);
+		writel(val, virtbase + U300_GPIO_PXPER + i * U300_GPIO_PORTX_SPACING);
+		local_irq_restore(flags);
+	}
+#endif
+}
+
+static int __devinit gpio_probe(struct platform_device *pdev)
+{
+	u32 val;
+	int err = 0;
+	int i;
+	int num_irqs;
+
+	memset(gpio_pin, 0, sizeof(gpio_pin));
+
+	/* Get GPIO clock */
+	clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		err = PTR_ERR(clk);
+		printk(KERN_ERR "GPIO: could not get GPIO clock\n");
+		goto err_no_clk;
+	}
+	err = clk_enable(clk);
+	if (err) {
+		printk(KERN_ERR "GPIO: could not enable GPIO clock\n");
+		goto err_no_clk_enable;
+	}
+
+	memres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!memres)
+		goto err_no_resource;
+
+	if (request_mem_region(memres->start, memres->end - memres->start, "GPIO Controller")
+	    == NULL) {
+		err = -ENODEV;
+		goto err_no_ioregion;
+	}
+
+	virtbase = ioremap(memres->start, memres->end - memres->start + 1);
+	if (!virtbase) {
+		err = -ENOMEM;
+		goto err_no_ioremap;
+	}
+
+#ifdef U300_COH901335
+	printk(KERN_INFO "GPIO: Initializing GPIO Controller COH 901 335\n");
+	/* Turn on the GPIO block */
+	writel(U300_GPIO_CR_BLOCK_CLOCK_ENABLE, virtbase + U300_GPIO_CR);
+#endif
+
+#ifdef U300_COH901571_3
+	printk(KERN_INFO "GPIO: Initializing GPIO Controller COH 901 571/3\n");
+	val = readl(virtbase + U300_GPIO_CR);
+	printk(KERN_INFO "GPIO: COH901571/3 block version: %d, " \
+	       "number of cores: %d\n",
+	       ((val & 0x0000FE00) >> 9),
+	       ((val & 0x000001FC) >> 2));
+	writel(U300_GPIO_CR_BLOCK_CLKRQ_ENABLE, virtbase + U300_GPIO_CR);
+#endif
+
+	/* Set up some padmuxing here */
+#ifdef CONFIG_MMC
+	pmx_set_mission_mode_mmc();
+#endif
+#ifdef CONFIG_SPI_PL022
+	pmx_set_mission_mode_spi();
+#endif
+
+	gpio_set_initial_values();
+
+	for (num_irqs = 0 ; num_irqs < U300_GPIO_NUM_PORTS; num_irqs++) {
+
+		gpio_ports[num_irqs].irq =
+			platform_get_irq_byname(pdev,
+						gpio_ports[num_irqs].name);
+
+		err = request_irq(gpio_ports[num_irqs].irq,
+				  gpio_irq_handler, IRQF_DISABLED,
+				  gpio_ports[num_irqs].name,
+				  &gpio_ports[num_irqs]);
+		if (err) {
+			printk(KERN_CRIT "GPIO: Cannot allocate IRQ for %s!\n",
+			       gpio_ports[num_irqs].name);
+			goto err_no_irq;
+		}
+		/* Turns off PortX_irq_force */
+		writel(0x0, virtbase + U300_GPIO_PXIFR +
+				 num_irqs * U300_GPIO_PORTX_SPACING);
+	}
+	printk(KERN_INFO "GPIO: U300 gpio module loaded\n");
+
+	return 0;
+
+ err_no_irq:
+	for (i = 0; i < num_irqs; i++)
+		free_irq(gpio_ports[i].irq, &gpio_ports[i]);
+	iounmap(virtbase);
+ err_no_ioremap:
+	release_mem_region(memres->start, memres->end - memres->start);
+ err_no_ioregion:
+ err_no_resource:
+	clk_disable(clk);
+ err_no_clk_enable:
+	clk_put(clk);
+ err_no_clk:
+	printk(KERN_INFO "GPIO: module ERROR:%d\n", err);
+	return err;
+}
+
+static int __devexit gpio_remove(struct platform_device *pdev)
+{
+	int i;
+
+	/* Turn off the GPIO block */
+	writel(0x00000000U, virtbase + U300_GPIO_CR);
+	for (i = 0 ; i < U300_GPIO_NUM_PORTS; i++)
+		free_irq(gpio_ports[i].irq, &gpio_ports[i]);
+	iounmap(virtbase);
+	release_mem_region(memres->start, memres->end - memres->start);
+	clk_disable(clk);
+	clk_put(clk);
+	return 0;
+}
+
+static struct platform_driver gpio_driver = {
+	.driver		= {
+		.name	= "u300-gpio",
+	},
+	.probe		= gpio_probe,
+	.remove		= __devexit_p(gpio_remove),
+};
+
+
+static int __init u300_gpio_init(void)
+{
+	return platform_driver_register(&gpio_driver);
+}
+
+static void __exit u300_gpio_exit(void)
+{
+	platform_driver_unregister(&gpio_driver);
+}
+
+arch_initcall(u300_gpio_init);
+module_exit(u300_gpio_exit);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
+
+#ifdef U300_COH901571_3
+MODULE_DESCRIPTION("ST-Ericsson AB COH 901 571/3 GPIO driver");
+#endif
+
+#ifdef U300_COH901335
+MODULE_DESCRIPTION("ST-Ericsson AB COH 901 335 GPIO driver");
+#endif
+
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-u300/include/mach/gpio.h b/arch/arm/mach-u300/include/mach/gpio.h
new file mode 100644
index 0000000..c817412
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/gpio.h
@@ -0,0 +1,290 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/gpio.h
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * GPIO block resgister definitions and inline macros for
+ * U300 GPIO COH 901 335 or COH 901 571/3
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#ifndef __MACH_U300_GPIO_H
+#define __MACH_U300_GPIO_H
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <mach/hardware.h>
+#include <asm/irq.h>
+
+/* Switch type depending on platform/chip variant */
+#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330)
+#define U300_COH901335
+#endif
+#if defined(CONFIG_MACH_U300_BS365) || defined(CONFIG_MACH_U300_BS335)
+#define U300_COH901571_3
+#endif
+
+/* Get base address for regs here */
+#include "u300-regs.h"
+/* IRQ numbers */
+#include "irqs.h"
+
+/*
+ * This is the GPIO block definitions. GPIO (General Purpose I/O) can be
+ * used for anything, and often is. The event/enable etc figures are for
+ * the lowermost pin (pin 0 on each port), shift this left to match your
+ * pin if you're gonna use these values.
+ */
+#ifdef U300_COH901335
+#define U300_GPIO_PORTX_SPACING				(0x1C)
+/* Port X Pin Data Register 32bit, this is both input and output (R/W) */
+#define U300_GPIO_PXPDIR				(0x00)
+#define U300_GPIO_PXPDOR				(0x00)
+/* Port X Pin Config Register 32bit (R/W) */
+#define U300_GPIO_PXPCR					(0x04)
+#define U300_GPIO_PXPCR_ALL_PINS_MODE_MASK		(0x0000FFFFUL)
+#define U300_GPIO_PXPCR_PIN_MODE_MASK			(0x00000003UL)
+#define U300_GPIO_PXPCR_PIN_MODE_SHIFT			(0x00000002UL)
+#define U300_GPIO_PXPCR_PIN_MODE_INPUT			(0x00000000UL)
+#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL	(0x00000001UL)
+#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_DRAIN	(0x00000002UL)
+#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_SOURCE	(0x00000003UL)
+/* Port X Interrupt Event Register 32bit (R/W) */
+#define U300_GPIO_PXIEV					(0x08)
+#define U300_GPIO_PXIEV_ALL_IRQ_EVENT_MASK		(0x000000FFUL)
+#define U300_GPIO_PXIEV_IRQ_EVENT			(0x00000001UL)
+/* Port X Interrupt Enable Register 32bit (R/W) */
+#define U300_GPIO_PXIEN					(0x0C)
+#define U300_GPIO_PXIEN_ALL_IRQ_ENABLE_MASK		(0x000000FFUL)
+#define U300_GPIO_PXIEN_IRQ_ENABLE			(0x00000001UL)
+/* Port X Interrupt Force Register 32bit (R/W) */
+#define U300_GPIO_PXIFR					(0x10)
+#define U300_GPIO_PXIFR_ALL_IRQ_FORCE_MASK		(0x000000FFUL)
+#define U300_GPIO_PXIFR_IRQ_FORCE			(0x00000001UL)
+/* Port X Interrupt Config Register 32bit (R/W) */
+#define U300_GPIO_PXICR					(0x14)
+#define U300_GPIO_PXICR_ALL_IRQ_CONFIG_MASK		(0x000000FFUL)
+#define U300_GPIO_PXICR_IRQ_CONFIG_MASK			(0x00000001UL)
+#define U300_GPIO_PXICR_IRQ_CONFIG_FALLING_EDGE		(0x00000000UL)
+#define U300_GPIO_PXICR_IRQ_CONFIG_RISING_EDGE		(0x00000001UL)
+/* Port X Pull-up Enable Register 32bit (R/W) */
+#define U300_GPIO_PXPER					(0x18)
+#define U300_GPIO_PXPER_ALL_PULL_UP_DISABLE_MASK	(0x000000FFUL)
+#define U300_GPIO_PXPER_PULL_UP_DISABLE			(0x00000001UL)
+/* Control Register 32bit (R/W) */
+#define U300_GPIO_CR					(0x54)
+#define U300_GPIO_CR_BLOCK_CLOCK_ENABLE			(0x00000001UL)
+/* three ports of 8 bits each = GPIO pins 0..23 */
+#define U300_GPIO_NUM_PORTS 3
+#define U300_GPIO_PINS_PER_PORT 8
+#define U300_GPIO_MAX	(U300_GPIO_PINS_PER_PORT * U300_GPIO_NUM_PORTS - 1)
+#endif
+
+#ifdef U300_COH901571_3
+/*
+ * Control Register 32bit (R/W)
+ * bit 15-9 (mask 0x0000FE00) contains the number of cores. 8*cores
+ * gives the number of GPIO pins.
+ * bit 8-2  (mask 0x000001FC) contains the core version ID.
+ */
+#define U300_GPIO_CR					(0x00)
+#define U300_GPIO_CR_SYNC_SEL_ENABLE			(0x00000002UL)
+#define U300_GPIO_CR_BLOCK_CLKRQ_ENABLE			(0x00000001UL)
+#define U300_GPIO_PORTX_SPACING				(0x30)
+/* Port X Pin Data INPUT Register 32bit (R/W) */
+#define U300_GPIO_PXPDIR				(0x04)
+/* Port X Pin Data OUTPUT Register 32bit (R/W) */
+#define U300_GPIO_PXPDOR				(0x08)
+/* Port X Pin Config Register 32bit (R/W) */
+#define U300_GPIO_PXPCR					(0x0C)
+#define U300_GPIO_PXPCR_ALL_PINS_MODE_MASK		(0x0000FFFFUL)
+#define U300_GPIO_PXPCR_PIN_MODE_MASK			(0x00000003UL)
+#define U300_GPIO_PXPCR_PIN_MODE_SHIFT			(0x00000002UL)
+#define U300_GPIO_PXPCR_PIN_MODE_INPUT			(0x00000000UL)
+#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL	(0x00000001UL)
+#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_DRAIN	(0x00000002UL)
+#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_SOURCE	(0x00000003UL)
+/* Port X Pull-up Enable Register 32bit (R/W) */
+#define U300_GPIO_PXPER					(0x10)
+#define U300_GPIO_PXPER_ALL_PULL_UP_DISABLE_MASK	(0x000000FFUL)
+#define U300_GPIO_PXPER_PULL_UP_DISABLE			(0x00000001UL)
+/* Port X Interrupt Event Register 32bit (R/W) */
+#define U300_GPIO_PXIEV					(0x14)
+#define U300_GPIO_PXIEV_ALL_IRQ_EVENT_MASK		(0x000000FFUL)
+#define U300_GPIO_PXIEV_IRQ_EVENT			(0x00000001UL)
+/* Port X Interrupt Enable Register 32bit (R/W) */
+#define U300_GPIO_PXIEN					(0x18)
+#define U300_GPIO_PXIEN_ALL_IRQ_ENABLE_MASK		(0x000000FFUL)
+#define U300_GPIO_PXIEN_IRQ_ENABLE			(0x00000001UL)
+/* Port X Interrupt Force Register 32bit (R/W) */
+#define U300_GPIO_PXIFR					(0x1C)
+#define U300_GPIO_PXIFR_ALL_IRQ_FORCE_MASK		(0x000000FFUL)
+#define U300_GPIO_PXIFR_IRQ_FORCE			(0x00000001UL)
+/* Port X Interrupt Config Register 32bit (R/W) */
+#define U300_GPIO_PXICR					(0x20)
+#define U300_GPIO_PXICR_ALL_IRQ_CONFIG_MASK		(0x000000FFUL)
+#define U300_GPIO_PXICR_IRQ_CONFIG_MASK			(0x00000001UL)
+#define U300_GPIO_PXICR_IRQ_CONFIG_FALLING_EDGE		(0x00000000UL)
+#define U300_GPIO_PXICR_IRQ_CONFIG_RISING_EDGE		(0x00000001UL)
+#ifdef CONFIG_MACH_U300_BS335
+/* seven ports of 8 bits each = GPIO pins 0..55 */
+#define U300_GPIO_NUM_PORTS 7
+#else
+/* five ports of 8 bits each = GPIO pins 0..39 */
+#define U300_GPIO_NUM_PORTS 5
+#endif
+#define U300_GPIO_PINS_PER_PORT 8
+#define U300_GPIO_MAX (U300_GPIO_PINS_PER_PORT * U300_GPIO_NUM_PORTS - 1)
+#endif
+
+/*
+ * Individual pin assignments for the B26/S26. Notice that the
+ * actual usage of these pins depends on the PAD MUX settings, that
+ * is why the same number can potentially appear several times.
+ * In the reference design each pin is only used for one purpose.
+ * These were determined by inspecting the B26/S26 schematic:
+ * 2/1911-ROA 128 1603
+ */
+#ifdef CONFIG_MACH_U300_BS2X
+#define U300_GPIO_PIN_UART_RX		0
+#define U300_GPIO_PIN_UART_TX		1
+#define U300_GPIO_PIN_GPIO02		2  /* Unrouted */
+#define U300_GPIO_PIN_GPIO03		3  /* Unrouted */
+#define U300_GPIO_PIN_CAM_SLEEP		4
+#define U300_GPIO_PIN_CAM_REG_EN	5
+#define U300_GPIO_PIN_GPIO06		6  /* Unrouted */
+#define U300_GPIO_PIN_GPIO07		7  /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO08		8  /* Service point SP2321 */
+#define U300_GPIO_PIN_GPIO09		9  /* Service point SP2322 */
+#define U300_GPIO_PIN_PHFSENSE		10 /* Headphone jack sensing */
+#define U300_GPIO_PIN_MMC_CLKRET	11 /* Clock return from MMC/SD card */
+#define U300_GPIO_PIN_MMC_CD		12 /* MMC Card insertion detection */
+#define U300_GPIO_PIN_FLIPSENSE		13 /* Mechanical flip sensing */
+#define U300_GPIO_PIN_GPIO14		14 /* DSP JTAG Port RTCK */
+#define U300_GPIO_PIN_GPIO15		15 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO16		16 /* Unrouted */
+#define U300_GPIO_PIN_GPIO17		17 /* Unrouted */
+#define U300_GPIO_PIN_GPIO18		18 /* Unrouted */
+#define U300_GPIO_PIN_GPIO19		19 /* Unrouted */
+#define U300_GPIO_PIN_GPIO20		20 /* Unrouted */
+#define U300_GPIO_PIN_GPIO21		21 /* Unrouted */
+#define U300_GPIO_PIN_GPIO22		22 /* Unrouted */
+#define U300_GPIO_PIN_GPIO23		23 /* Unrouted */
+#endif
+
+/*
+ * Individual pin assignments for the B330/S330 and B365/S365.
+ * Notice that the actual usage of these pins depends on the
+ * PAD MUX settings, that is why the same number can potentially
+ * appear several times. In the reference design each pin is only
+ * used for one purpose. These were determined by inspecting the
+ * S365 schematic.
+ */
+#if defined(CONFIG_MACH_U300_BS330) || defined(CONFIG_MACH_U300_BS365) || \
+    defined(CONFIG_MACH_U300_BS335)
+#define U300_GPIO_PIN_UART_RX		0
+#define U300_GPIO_PIN_UART_TX		1
+#define U300_GPIO_PIN_UART_CTS		2
+#define U300_GPIO_PIN_UART_RTS		3
+#define U300_GPIO_PIN_CAM_MAIN_STANDBY	4 /* Camera MAIN standby */
+#define U300_GPIO_PIN_GPIO05		5 /* Unrouted */
+#define U300_GPIO_PIN_MS_CD		6 /* Memory Stick Card insertion */
+#define U300_GPIO_PIN_GPIO07		7 /* Test point TP2430 */
+
+#define U300_GPIO_PIN_GPIO08		8 /* Test point TP2437 */
+#define U300_GPIO_PIN_GPIO09		9 /* Test point TP2431 */
+#define U300_GPIO_PIN_GPIO10		10 /* Test point TP2432 */
+#define U300_GPIO_PIN_MMC_CLKRET	11 /* Clock return from MMC/SD card */
+#define U300_GPIO_PIN_MMC_CD		12 /* MMC Card insertion detection */
+#define U300_GPIO_PIN_CAM_SUB_STANDBY	13 /* Camera SUB standby */
+#define U300_GPIO_PIN_GPIO14		14 /* Test point TP2436 */
+#define U300_GPIO_PIN_GPIO15		15 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO16		16 /* Test point TP2438 */
+#define U300_GPIO_PIN_PHFSENSE		17 /* Headphone jack sensing */
+#define U300_GPIO_PIN_GPIO18		18 /* Test point TP2439 */
+#define U300_GPIO_PIN_GPIO19		19 /* Routed somewhere */
+#define U300_GPIO_PIN_GPIO20		20 /* Unrouted */
+#define U300_GPIO_PIN_GPIO21		21 /* Unrouted */
+#define U300_GPIO_PIN_GPIO22		22 /* Unrouted */
+#define U300_GPIO_PIN_GPIO23		23 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO24		24 /* Unrouted */
+#define U300_GPIO_PIN_GPIO25		25 /* Unrouted */
+#define U300_GPIO_PIN_GPIO26		26 /* Unrouted */
+#define U300_GPIO_PIN_GPIO27		27 /* Unrouted */
+#define U300_GPIO_PIN_GPIO28		28 /* Unrouted */
+#define U300_GPIO_PIN_GPIO29		29 /* Unrouted */
+#define U300_GPIO_PIN_GPIO30		30 /* Unrouted */
+#define U300_GPIO_PIN_GPIO31		31 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO32		32 /* Unrouted */
+#define U300_GPIO_PIN_GPIO33		33 /* Unrouted */
+#define U300_GPIO_PIN_GPIO34		34 /* Unrouted */
+#define U300_GPIO_PIN_GPIO35		35 /* Unrouted */
+#define U300_GPIO_PIN_GPIO36		36 /* Unrouted */
+#define U300_GPIO_PIN_GPIO37		37 /* Unrouted */
+#define U300_GPIO_PIN_GPIO38		38 /* Unrouted */
+#define U300_GPIO_PIN_GPIO39		39 /* Unrouted */
+
+#ifdef CONFIG_MACH_U300_BS335
+
+#define U300_GPIO_PIN_GPIO40		40 /* Unrouted */
+#define U300_GPIO_PIN_GPIO41		41 /* Unrouted */
+#define U300_GPIO_PIN_GPIO42		42 /* Unrouted */
+#define U300_GPIO_PIN_GPIO43		43 /* Unrouted */
+#define U300_GPIO_PIN_GPIO44		44 /* Unrouted */
+#define U300_GPIO_PIN_GPIO45		45 /* Unrouted */
+#define U300_GPIO_PIN_GPIO46		46 /* Unrouted */
+#define U300_GPIO_PIN_GPIO47		47 /* Unrouted */
+
+#define U300_GPIO_PIN_GPIO48		48 /* Unrouted */
+#define U300_GPIO_PIN_GPIO49		49 /* Unrouted */
+#define U300_GPIO_PIN_GPIO50		50 /* Unrouted */
+#define U300_GPIO_PIN_GPIO51		51 /* Unrouted */
+#define U300_GPIO_PIN_GPIO52		52 /* Unrouted */
+#define U300_GPIO_PIN_GPIO53		53 /* Unrouted */
+#define U300_GPIO_PIN_GPIO54		54 /* Unrouted */
+#define U300_GPIO_PIN_GPIO55		55 /* Unrouted */
+#endif
+
+#endif
+
+/* translates a pin number to a port number */
+#define PIN_TO_PORT(val) (val >> 3)
+
+/* These can be found in arch/arm/mach-u300/gpio.c */
+extern int gpio_request(unsigned gpio, const char *label);
+extern void gpio_free(unsigned gpio);
+extern int gpio_direction_input(unsigned gpio);
+extern int gpio_direction_output(unsigned gpio, int value);
+extern int gpio_register_callback(unsigned gpio,
+				  int (*func)(void *arg),
+				  void *);
+extern int gpio_unregister_callback(unsigned gpio);
+extern void enable_irq_on_gpio_pin(unsigned gpio, int edge);
+extern void disable_irq_on_gpio_pin(unsigned gpio);
+extern void gpio_pullup(unsigned gpio, int value);
+extern int gpio_get_value(unsigned gpio);
+extern void gpio_set_value(unsigned gpio, int value);
+
+/* wrappers to sleep-enable the previous two functions */
+static inline unsigned gpio_to_irq(unsigned gpio)
+{
+	return PIN_TO_PORT(gpio) + IRQ_U300_GPIO_PORT0;
+}
+
+static inline unsigned irq_to_gpio(unsigned irq)
+{
+	/*
+	 * FIXME: This is no 1-1 mapping at all, it points to the
+	 * whole block of 8 pins.
+	 */
+	return (irq - IRQ_U300_GPIO_PORT0) << 3;
+}
+
+#endif
diff --git a/arch/arm/mach-u300/padmux.c b/arch/arm/mach-u300/padmux.c
new file mode 100644
index 0000000..f366456
--- /dev/null
+++ b/arch/arm/mach-u300/padmux.c
@@ -0,0 +1,58 @@
+/*
+ *
+ * arch/arm/mach-u300/padmux.c
+ *
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * U300 PADMUX functions
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ */
+#include <linux/io.h>
+#include <linux/err.h>
+#include <mach/u300-regs.h>
+#include <mach/syscon.h>
+
+#include "padmux.h"
+
+/* Set the PAD MUX to route the MMC reader correctly to GPIO0. */
+void pmx_set_mission_mode_mmc(void)
+{
+	u16 val;
+
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1LR);
+	val &= ~U300_SYSCON_PMC1LR_MMCSD_MASK;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1LR);
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
+	val &= ~U300_SYSCON_PMC1HR_APP_GPIO_1_MASK;
+	val |= U300_SYSCON_PMC1HR_APP_GPIO_1_MMC;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
+}
+
+void pmx_set_mission_mode_spi(void)
+{
+	u16 val;
+
+	/* Set up padmuxing so the SPI port and its chipselects are active */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
+	/*
+	 * Activate the SPI port (disable the use of these pins for generic
+	 * GPIO, DSP, AAIF
+	 */
+	val &= ~U300_SYSCON_PMC1HR_APP_SPI_2_MASK;
+	val |= U300_SYSCON_PMC1HR_APP_SPI_2_SPI;
+	/*
+	 * Use GPIO pin SPI CS1 for CS1 actually (it can be used for other
+	 * things also)
+	 */
+	val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK;
+	val |= U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI;
+	/*
+	 * Use GPIO pin SPI CS2 for CS2 actually (it can be used for other
+	 * things also)
+	 */
+	val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK;
+	val |= U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
+}
diff --git a/arch/arm/mach-u300/padmux.h b/arch/arm/mach-u300/padmux.h
new file mode 100644
index 0000000..8c2099a
--- /dev/null
+++ b/arch/arm/mach-u300/padmux.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * arch/arm/mach-u300/padmux.h
+ *
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * U300 PADMUX API
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ */
+
+#ifndef __MACH_U300_PADMUX_H
+#define __MACH_U300_PADMUX_H
+
+void pmx_set_mission_mode_mmc(void);
+void pmx_set_mission_mode_spi(void);
+
+#endif
-- 
cgit v0.10.2


From cd27e485410aa7e7464b0126d968fe8c2a5c045b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 23 Apr 2009 10:21:30 +0100
Subject: [ARM] 5474/1: U300 clocking framework

This adds the clocking framework and hooks into the clkdevice
for U300 series platforms.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c
new file mode 100644
index 0000000..d6f8232
--- /dev/null
+++ b/arch/arm/mach-u300/clock.c
@@ -0,0 +1,1487 @@
+/*
+ *
+ * arch/arm/mach-u300/clock.c
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Define clocks in the app platform.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/clk.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+
+#include <asm/clkdev.h>
+#include <mach/hardware.h>
+#include <mach/syscon.h>
+
+#include "clock.h"
+
+/*
+ * TODO:
+ * - move all handling of the CCR register into this file and create
+ *   a spinlock for the CCR register
+ * - switch to the clkdevice lookup mechanism that maps clocks to
+ *   device ID:s instead when it becomes available in kernel 2.6.29.
+ * - implement rate get/set for all clocks that need it.
+ */
+
+/*
+ * Syscon clock I/O registers lock so clock requests don't collide
+ * NOTE: this is a local lock only used to lock access to clock and
+ * reset registers in syscon.
+ */
+static DEFINE_SPINLOCK(syscon_clkreg_lock);
+static DEFINE_SPINLOCK(syscon_resetreg_lock);
+
+/*
+ * The clocking hierarchy currently looks like this.
+ * NOTE: the idea is NOT to show how the clocks are routed on the chip!
+ * The ideas is to show dependencies, so a clock higher up in the
+ * hierarchy has to be on in order for another clock to be on. Now,
+ * both CPU and DMA can actually be on top of the hierarchy, and that
+ * is not modeled currently. Instead we have the backbone AMBA bus on
+ * top. This bus cannot be programmed in any way but conceptually it
+ * needs to be active for the bridges and devices to transport data.
+ *
+ * Please be aware that a few clocks are hw controlled, which mean that
+ * the hw itself can turn on/off or change the rate of the clock when
+ * needed!
+ *
+ *  AMBA bus
+ *  |
+ *  +- CPU
+ *  +- NANDIF NAND Flash interface
+ *  +- SEMI Shared Memory interface
+ *  +- ISP Image Signal Processor (U335 only)
+ *  +- CDS (U335 only)
+ *  +- DMA Direct Memory Access Controller
+ *  +- AAIF APP/ACC Inteface (Mobile Scalable Link, MSL)
+ *  +- APEX
+ *  +- VIDEO_ENC AVE2/3 Video Encoder
+ *  +- XGAM Graphics Accelerator Controller
+ *  +- AHB
+ *  |
+ *  +- ahb:0 AHB Bridge
+ *  |  |
+ *  |  +- ahb:1 INTCON Interrupt controller
+ *  |  +- ahb:3 MSPRO  Memory Stick Pro controller
+ *  |  +- ahb:4 EMIF   External Memory interface
+ *  |
+ *  +- fast:0 FAST bridge
+ *  |  |
+ *  |  +- fast:1 MMCSD MMC/SD card reader controller
+ *  |  +- fast:2 I2S0  PCM I2S channel 0 controller
+ *  |  +- fast:3 I2S1  PCM I2S channel 1 controller
+ *  |  +- fast:4 I2C0  I2C channel 0 controller
+ *  |  +- fast:5 I2C1  I2C channel 1 controller
+ *  |  +- fast:6 SPI   SPI controller
+ *  |  +- fast:7 UART1 Secondary UART (U335 only)
+ *  |
+ *  +- slow:0 SLOW bridge
+ *     |
+ *     +- slow:1 SYSCON (not possible to control)
+ *     +- slow:2 WDOG Watchdog
+ *     +- slow:3 UART0 primary UART
+ *     +- slow:4 TIMER_APP Application timer - used in Linux
+ *     +- slow:5 KEYPAD controller
+ *     +- slow:6 GPIO controller
+ *     +- slow:7 RTC controller
+ *     +- slow:8 BT Bus Tracer (not used currently)
+ *     +- slow:9 EH Event Handler (not used currently)
+ *     +- slow:a TIMER_ACC Access style timer (not used currently)
+ *     +- slow:b PPM (U335 only, what is that?)
+ */
+
+/*
+ * Reset control functions. We remember if a block has been
+ * taken out of reset and don't remove the reset assertion again
+ * and vice versa. Currently we only remove resets so the
+ * enablement function is defined out.
+ */
+static void syscon_block_reset_enable(struct clk *clk)
+{
+	u16 val;
+	unsigned long iflags;
+
+	/* Not all blocks support resetting */
+	if (!clk->res_reg || !clk->res_mask)
+		return;
+	spin_lock_irqsave(&syscon_resetreg_lock, iflags);
+	val = readw(clk->res_reg);
+	val |= clk->res_mask;
+	writew(val, clk->res_reg);
+	spin_unlock_irqrestore(&syscon_resetreg_lock, iflags);
+	clk->reset = true;
+}
+
+static void syscon_block_reset_disable(struct clk *clk)
+{
+	u16 val;
+	unsigned long iflags;
+
+	/* Not all blocks support resetting */
+	if (!clk->res_reg || !clk->res_mask)
+		return;
+	spin_lock_irqsave(&syscon_resetreg_lock, iflags);
+	val = readw(clk->res_reg);
+	val &= ~clk->res_mask;
+	writew(val, clk->res_reg);
+	spin_unlock_irqrestore(&syscon_resetreg_lock, iflags);
+	clk->reset = false;
+}
+
+int __clk_get(struct clk *clk)
+{
+	u16 val;
+
+	/* The MMC and MSPRO clocks need some special set-up */
+	if (!strcmp(clk->name, "MCLK")) {
+		/* Set default MMC clock divisor to 18.9 MHz */
+		writew(0x0054U, U300_SYSCON_VBASE + U300_SYSCON_MMF0R);
+		val = readw(U300_SYSCON_VBASE + U300_SYSCON_MMCR);
+		/* Disable the MMC feedback clock */
+		val &= ~U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE;
+		/* Disable MSPRO frequency */
+		val &= ~U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE;
+		writew(val, U300_SYSCON_VBASE + U300_SYSCON_MMCR);
+	}
+	if (!strcmp(clk->name, "MSPRO")) {
+		val = readw(U300_SYSCON_VBASE + U300_SYSCON_MMCR);
+		/* Disable the MMC feedback clock */
+		val &= ~U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE;
+		/* Enable MSPRO frequency */
+		val |= U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE;
+		writew(val, U300_SYSCON_VBASE + U300_SYSCON_MMCR);
+	}
+	return 1;
+}
+EXPORT_SYMBOL(__clk_get);
+
+void __clk_put(struct clk *clk)
+{
+}
+EXPORT_SYMBOL(__clk_put);
+
+static void syscon_clk_disable(struct clk *clk)
+{
+	unsigned long iflags;
+
+	/* Don't touch the hardware controlled clocks */
+	if (clk->hw_ctrld)
+		return;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	writew(clk->clk_val, U300_SYSCON_VBASE + U300_SYSCON_SBCDR);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+
+static void syscon_clk_enable(struct clk *clk)
+{
+	unsigned long iflags;
+
+	/* Don't touch the hardware controlled clocks */
+	if (clk->hw_ctrld)
+		return;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	writew(clk->clk_val, U300_SYSCON_VBASE + U300_SYSCON_SBCER);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+
+static u16 syscon_clk_get_rate(void)
+{
+	u16 val;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val &= U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK;
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+	return val;
+}
+
+#ifdef CONFIG_MACH_U300_USE_I2S_AS_MASTER
+static void enable_i2s0_vcxo(void)
+{
+	u16 val;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	/* Set I2S0 to use the VCXO 26 MHz clock */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val |= U300_SYSCON_CCR_TURN_VCXO_ON;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val |= U300_SYSCON_CCR_I2S0_USE_VCXO;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	val |= U300_SYSCON_CEFR_I2S0_CLK_EN;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+
+static void enable_i2s1_vcxo(void)
+{
+	u16 val;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	/* Set I2S1 to use the VCXO 26 MHz clock */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val |= U300_SYSCON_CCR_TURN_VCXO_ON;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val |= U300_SYSCON_CCR_I2S1_USE_VCXO;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	val |= U300_SYSCON_CEFR_I2S1_CLK_EN;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+
+static void disable_i2s0_vcxo(void)
+{
+	u16 val;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	/* Disable I2S0 use of the VCXO 26 MHz clock */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val &= ~U300_SYSCON_CCR_I2S0_USE_VCXO;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	/* Deactivate VCXO if noone else is using VCXO */
+	if (!(val & U300_SYSCON_CCR_I2S1_USE_VCXO))
+		val &= ~U300_SYSCON_CCR_TURN_VCXO_ON;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	val &= ~U300_SYSCON_CEFR_I2S0_CLK_EN;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+
+static void disable_i2s1_vcxo(void)
+{
+	u16 val;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	/* Disable I2S1 use of the VCXO 26 MHz clock */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val &= ~U300_SYSCON_CCR_I2S1_USE_VCXO;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	/* Deactivate VCXO if noone else is using VCXO */
+	if (!(val & U300_SYSCON_CCR_I2S0_USE_VCXO))
+		val &= ~U300_SYSCON_CCR_TURN_VCXO_ON;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	val &= ~U300_SYSCON_CEFR_I2S0_CLK_EN;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+#endif /* CONFIG_MACH_U300_USE_I2S_AS_MASTER */
+
+
+static void syscon_clk_rate_set_mclk(unsigned long rate)
+{
+	u16 val;
+	u32 reg;
+	unsigned long iflags;
+
+	switch (rate) {
+	case 18900000:
+		val = 0x0054;
+		break;
+	case 20800000:
+		val = 0x0044;
+		break;
+	case 23100000:
+		val = 0x0043;
+		break;
+	case 26000000:
+		val = 0x0033;
+		break;
+	case 29700000:
+		val = 0x0032;
+		break;
+	case 34700000:
+		val = 0x0022;
+		break;
+	case 41600000:
+		val = 0x0021;
+		break;
+	case 52000000:
+		val = 0x0011;
+		break;
+	case 104000000:
+		val = 0x0000;
+		break;
+	default:
+		printk(KERN_ERR "Trying to set MCLK to unknown speed! %ld\n",
+		       rate);
+		return;
+	}
+
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	reg = readw(U300_SYSCON_VBASE + U300_SYSCON_MMF0R) &
+		~U300_SYSCON_MMF0R_MASK;
+	writew(reg | val, U300_SYSCON_VBASE + U300_SYSCON_MMF0R);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+
+void syscon_clk_rate_set_cpuclk(unsigned long rate)
+{
+	u16 val;
+	unsigned long iflags;
+
+	switch (rate) {
+	case 13000000:
+		val = U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER;
+		break;
+	case 52000000:
+		val = U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE;
+		break;
+	case 104000000:
+		val = U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH;
+		break;
+	case 208000000:
+		val = U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST;
+		break;
+	default:
+		return;
+	}
+	spin_lock_irqsave(&syscon_clkreg_lock, iflags);
+	val |= readw(U300_SYSCON_VBASE + U300_SYSCON_CCR) &
+		~U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK ;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	spin_unlock_irqrestore(&syscon_clkreg_lock, iflags);
+}
+EXPORT_SYMBOL(syscon_clk_rate_set_cpuclk);
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long iflags;
+
+	spin_lock_irqsave(&clk->lock, iflags);
+	if (clk->usecount > 0 && !(--clk->usecount)) {
+		/* some blocks lack clocking registers and cannot be disabled */
+		if (clk->disable)
+			clk->disable(clk);
+		if (likely((u32)clk->parent))
+			clk_disable(clk->parent);
+	}
+#ifdef CONFIG_MACH_U300_USE_I2S_AS_MASTER
+	if (unlikely(!strcmp(clk->name, "I2S0")))
+		disable_i2s0_vcxo();
+	if (unlikely(!strcmp(clk->name, "I2S1")))
+		disable_i2s1_vcxo();
+#endif
+	spin_unlock_irqrestore(&clk->lock, iflags);
+}
+EXPORT_SYMBOL(clk_disable);
+
+int clk_enable(struct clk *clk)
+{
+	int ret = 0;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&clk->lock, iflags);
+	if (clk->usecount++ == 0) {
+		if (likely((u32)clk->parent))
+			ret = clk_enable(clk->parent);
+
+		if (unlikely(ret != 0))
+			clk->usecount--;
+		else {
+			/* remove reset line (we never enable reset again) */
+			syscon_block_reset_disable(clk);
+			/* clocks without enable function are always on */
+			if (clk->enable)
+				clk->enable(clk);
+#ifdef CONFIG_MACH_U300_USE_I2S_AS_MASTER
+			if (unlikely(!strcmp(clk->name, "I2S0")))
+				enable_i2s0_vcxo();
+			if (unlikely(!strcmp(clk->name, "I2S1")))
+				enable_i2s1_vcxo();
+#endif
+		}
+	}
+	spin_unlock_irqrestore(&clk->lock, iflags);
+	return ret;
+
+}
+EXPORT_SYMBOL(clk_enable);
+
+/* Returns the clock rate in Hz */
+static unsigned long clk_get_rate_cpuclk(struct clk *clk)
+{
+	u16 val;
+
+	val = syscon_clk_get_rate();
+
+	switch (val) {
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW:
+		return 13000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE:
+		return 52000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH:
+		return 104000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST:
+		return 208000000;
+	default:
+		break;
+	}
+	return clk->rate;
+}
+
+static unsigned long clk_get_rate_ahb_clk(struct clk *clk)
+{
+	u16 val;
+
+	val = syscon_clk_get_rate();
+
+	switch (val) {
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW:
+		return 6500000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE:
+		return 26000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST:
+		return 52000000;
+	default:
+		break;
+	}
+	return clk->rate;
+
+}
+
+static unsigned long clk_get_rate_emif_clk(struct clk *clk)
+{
+	u16 val;
+
+	val = syscon_clk_get_rate();
+
+	switch (val) {
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW:
+		return 13000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE:
+		return 52000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST:
+		return 104000000;
+	default:
+		break;
+	}
+	return clk->rate;
+
+}
+
+static unsigned long clk_get_rate_xgamclk(struct clk *clk)
+{
+	u16 val;
+
+	val = syscon_clk_get_rate();
+
+	switch (val) {
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW:
+		return 6500000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE:
+		return 26000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST:
+		return 52000000;
+	default:
+		break;
+	}
+
+	return clk->rate;
+}
+
+static unsigned long clk_get_rate_mclk(struct clk *clk)
+{
+	u16 val;
+
+	val = syscon_clk_get_rate();
+
+	switch (val) {
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER:
+		/*
+		 * Here, the 208 MHz PLL gets shut down and the always
+		 * on 13 MHz PLL used for RTC etc kicks into use
+		 * instead.
+		 */
+		return 13000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST:
+	{
+		/*
+		 * This clock is under program control. The register is
+		 * divided in two nybbles, bit 7-4 gives cycles-1 to count
+		 * high, bit 3-0 gives cycles-1 to count low. Distribute
+		 * these with no more than 1 cycle difference between
+		 * low and high and add low and high to get the actual
+		 * divisor. The base PLL is 208 MHz. Writing 0x00 will
+		 * divide by 1 and 1 so the highest frequency possible
+		 * is 104 MHz.
+		 *
+		 * e.g. 0x54 =>
+		 * f = 208 / ((5+1) + (4+1)) = 208 / 11 = 18.9 MHz
+		 */
+		u16 val = readw(U300_SYSCON_VBASE + U300_SYSCON_MMF0R) &
+			U300_SYSCON_MMF0R_MASK;
+		switch (val) {
+		case 0x0054:
+			return 18900000;
+		case 0x0044:
+			return 20800000;
+		case 0x0043:
+			return 23100000;
+		case 0x0033:
+			return 26000000;
+		case 0x0032:
+			return 29700000;
+		case 0x0022:
+			return 34700000;
+		case 0x0021:
+			return 41600000;
+		case 0x0011:
+			return 52000000;
+		case 0x0000:
+			return 104000000;
+		default:
+			break;
+		}
+	}
+	default:
+		break;
+	}
+
+	return clk->rate;
+}
+
+static unsigned long clk_get_rate_i2s_i2c_spi(struct clk *clk)
+{
+	u16 val;
+
+	val = syscon_clk_get_rate();
+
+	switch (val) {
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW:
+		return 13000000;
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH:
+	case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST:
+		return 26000000;
+	default:
+		break;
+	}
+
+	return clk->rate;
+}
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (clk->get_rate)
+		return clk->get_rate(clk);
+	else
+		return clk->rate;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+static unsigned long clk_round_rate_mclk(struct clk *clk, unsigned long rate)
+{
+	if (rate >= 18900000)
+		return 18900000;
+	if (rate >= 20800000)
+		return 20800000;
+	if (rate >= 23100000)
+		return 23100000;
+	if (rate >= 26000000)
+		return 26000000;
+	if (rate >= 29700000)
+		return 29700000;
+	if (rate >= 34700000)
+		return 34700000;
+	if (rate >= 41600000)
+		return 41600000;
+	if (rate >= 52000000)
+		return 52000000;
+	return -EINVAL;
+}
+
+static unsigned long clk_round_rate_cpuclk(struct clk *clk, unsigned long rate)
+{
+	if (rate >= 13000000)
+		return 13000000;
+	if (rate >= 52000000)
+		return 52000000;
+	if (rate >= 104000000)
+		return 104000000;
+	if (rate >= 208000000)
+		return 208000000;
+	return -EINVAL;
+}
+
+/*
+ * This adjusts a requested rate to the closest exact rate
+ * a certain clock can provide. For a fixed clock it's
+ * mostly clk->rate.
+ */
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	/* TODO: get apropriate switches for EMIFCLK, AHBCLK and MCLK */
+	/* Else default to fixed value */
+
+	if (clk->round_rate) {
+		return (long) clk->round_rate(clk, rate);
+	} else {
+		printk(KERN_ERR "clock: Failed to round rate of %s\n",
+		       clk->name);
+	}
+	return (long) clk->rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+static int clk_set_rate_mclk(struct clk *clk, unsigned long rate)
+{
+	syscon_clk_rate_set_mclk(clk_round_rate(clk, rate));
+	return 0;
+}
+
+static int clk_set_rate_cpuclk(struct clk *clk, unsigned long rate)
+{
+	syscon_clk_rate_set_cpuclk(clk_round_rate(clk, rate));
+	return 0;
+}
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	/* TODO: set for EMIFCLK and AHBCLK */
+	/* Else assume the clock is fixed and fail */
+	if (clk->set_rate) {
+		return clk->set_rate(clk, rate);
+	} else {
+		printk(KERN_ERR "clock: Failed to set %s to %ld hz\n",
+		       clk->name, rate);
+		return -1;
+	}
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+/*
+ * Clock definitions. The clock parents are set to respective
+ * bridge and the clock framework makes sure that the clocks have
+ * parents activated and are brought out of reset when in use.
+ *
+ * Clocks that have hw_ctrld = true are hw controlled, and the hw
+ * can by itself turn these clocks on and off.
+ * So in other words, we don't really have to care about them.
+ */
+
+static struct clk amba_clk = {
+	.name	    = "AMBA",
+	.rate	    = 52000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = false,
+};
+
+/*
+ * These blocks are connected directly to the AMBA bus
+ * with no bridge.
+ */
+
+static struct clk cpu_clk = {
+	.name	    = "CPU",
+	.parent	    = &amba_clk,
+	.rate	    = 208000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_CPU_RESET_EN,
+	.set_rate   = clk_set_rate_cpuclk,
+	.get_rate   = clk_get_rate_cpuclk,
+	.round_rate = clk_round_rate_cpuclk,
+};
+
+static struct clk nandif_clk = {
+	.name       = "NANDIF",
+	.parent	    = &amba_clk,
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_NANDIF_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_NANDIF_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk semi_clk = {
+	.name       = "SEMI",
+	.parent	    = &amba_clk,
+	.rate       = 0, /* FIXME */
+	/* It is not possible to reset SEMI */
+	.hw_ctrld   = false,
+	.reset	    = false,
+	.clk_val    = U300_SYSCON_SBCER_SEMI_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+#ifdef CONFIG_MACH_U300_BS335
+static struct clk isp_clk = {
+	.name	    = "ISP",
+	.parent	    = &amba_clk,
+	.rate	    = 0, /* FIXME */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_ISP_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_ISP_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk cds_clk = {
+	.name	    = "CDS",
+	.parent	    = &amba_clk,
+	.rate	    = 0, /* FIXME */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_CDS_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_CDS_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+#endif
+
+static struct clk dma_clk = {
+	.name       = "DMA",
+	.parent	    = &amba_clk,
+	.rate       = 52000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_DMAC_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_DMAC_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk aaif_clk = {
+	.name       = "AAIF",
+	.parent	    = &amba_clk,
+	.rate       = 52000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_AAIF_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_AAIF_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk apex_clk = {
+	.name       = "APEX",
+	.parent	    = &amba_clk,
+	.rate       = 0, /* FIXME */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_APEX_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_APEX_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk video_enc_clk = {
+	.name       = "VIDEO_ENC",
+	.parent	    = &amba_clk,
+	.rate       = 208000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = false,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	/* This has XGAM in the name but refers to the video encoder */
+	.res_mask   = U300_SYSCON_RRR_XGAM_VC_SYNC_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_VIDEO_ENC_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk xgam_clk = {
+	.name       = "XGAMCLK",
+	.parent	    = &amba_clk,
+	.rate       = 52000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_XGAM_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_XGAM_CLK_EN,
+	.get_rate   = clk_get_rate_xgamclk,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+/* This clock is used to activate the video encoder */
+static struct clk ahb_clk = {
+	.name	    = "AHB",
+	.parent	    = &amba_clk,
+	.rate	    = 52000000, /* this varies! */
+	.hw_ctrld   = false, /* This one is set to false due to HW bug */
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_AHB_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_AHB_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_ahb_clk,
+};
+
+
+/*
+ * Clocks on the AHB bridge
+ */
+
+static struct clk ahb_subsys_clk = {
+	.name	    = "AHB_SUBSYS",
+	.parent	    = &amba_clk,
+	.rate	    = 52000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = false,
+	.clk_val    = U300_SYSCON_SBCER_AHB_SUBSYS_BRIDGE_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_ahb_clk,
+};
+
+static struct clk intcon_clk = {
+	.name	    = "INTCON",
+	.parent	    = &ahb_subsys_clk,
+	.rate	    = 52000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_INTCON_RESET_EN,
+	/* INTCON can be reset but not clock-gated */
+};
+
+static struct clk mspro_clk = {
+	.name       = "MSPRO",
+	.parent	    = &ahb_subsys_clk,
+	.rate       = 0, /* FIXME */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_MSPRO_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_MSPRO_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk emif_clk = {
+	.name	    = "EMIF",
+	.parent	    = &ahb_subsys_clk,
+	.rate	    = 104000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RRR,
+	.res_mask   = U300_SYSCON_RRR_EMIF_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_EMIF_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_emif_clk,
+};
+
+
+/*
+ * Clocks on the FAST bridge
+ */
+static struct clk fast_clk = {
+	.name	    = "FAST_BRIDGE",
+	.parent	    = &amba_clk,
+	.rate	    = 13000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_FAST_BRIDGE_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_FAST_BRIDGE_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk mmcsd_clk = {
+	.name       = "MCLK",
+	.parent	    = &fast_clk,
+	.rate       = 18900000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_MMC_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_MMC_CLK_EN,
+	.get_rate   = clk_get_rate_mclk,
+	.set_rate   = clk_set_rate_mclk,
+	.round_rate = clk_round_rate_mclk,
+	.disable    = syscon_clk_disable,
+	.enable     = syscon_clk_enable,
+};
+
+static struct clk i2s0_clk = {
+	.name       = "i2s0",
+	.parent	    = &fast_clk,
+	.rate       = 26000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_PCM_I2S0_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_I2S0_CORE_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_i2s_i2c_spi,
+};
+
+static struct clk i2s1_clk = {
+	.name       = "i2s1",
+	.parent	    = &fast_clk,
+	.rate       = 26000000, /* this varies! */
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_PCM_I2S1_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_I2S1_CORE_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_i2s_i2c_spi,
+};
+
+static struct clk i2c0_clk = {
+	.name       = "I2C0",
+	.parent	    = &fast_clk,
+	.rate       = 26000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_I2C0_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_I2C0_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_i2s_i2c_spi,
+};
+
+static struct clk i2c1_clk = {
+	.name       = "I2C1",
+	.parent	    = &fast_clk,
+	.rate       = 26000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_I2C1_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_I2C1_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_i2s_i2c_spi,
+};
+
+static struct clk spi_clk = {
+	.name       = "SPI",
+	.parent	    = &fast_clk,
+	.rate       = 26000000, /* this varies! */
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_SPI_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_SPI_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+	.get_rate   = clk_get_rate_i2s_i2c_spi,
+};
+
+#ifdef CONFIG_MACH_U300_BS335
+static struct clk uart1_clk = {
+	.name	    = "UART1",
+	.parent	    = &fast_clk,
+	.rate	    = 13000000,
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RFR,
+	.res_mask   = U300_SYSCON_RFR_UART1_RESET_ENABLE,
+	.clk_val    = U300_SYSCON_SBCER_UART1_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+#endif
+
+
+/*
+ * Clocks on the SLOW bridge
+ */
+static struct clk slow_clk = {
+	.name	    = "SLOW_BRIDGE",
+	.parent	    = &amba_clk,
+	.rate	    = 13000000,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_SLOW_BRIDGE_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_SLOW_BRIDGE_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+/* TODO: implement SYSCON clock? */
+
+static struct clk wdog_clk = {
+	.name	    = "WDOG",
+	.parent	    = &slow_clk,
+	.hw_ctrld   = false,
+	.rate	    = 32768,
+	.reset	    = false,
+	/* This is always on, cannot be enabled/disabled or reset */
+};
+
+/* This one is hardwired to PLL13 */
+static struct clk uart_clk = {
+	.name	    = "UARTCLK",
+	.parent	    = &slow_clk,
+	.rate	    = 13000000,
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_UART_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_UART_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk keypad_clk = {
+	.name       = "KEYPAD",
+	.parent	    = &slow_clk,
+	.rate       = 32768,
+	.hw_ctrld   = false,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_KEYPAD_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_KEYPAD_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk gpio_clk = {
+	.name       = "GPIO",
+	.parent	    = &slow_clk,
+	.rate       = 13000000,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_GPIO_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_GPIO_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk rtc_clk = {
+	.name	    = "RTC",
+	.parent	    = &slow_clk,
+	.rate	    = 32768,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_RTC_RESET_EN,
+	/* This clock is always on, cannot be enabled/disabled */
+};
+
+static struct clk bustr_clk = {
+	.name       = "BUSTR",
+	.parent	    = &slow_clk,
+	.rate       = 13000000,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_BTR_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_BTR_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk evhist_clk = {
+	.name       = "EVHIST",
+	.parent	    = &slow_clk,
+	.rate       = 13000000,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_EH_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_EH_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk timer_clk = {
+	.name       = "TIMER",
+	.parent	    = &slow_clk,
+	.rate       = 13000000,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_ACC_TMR_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_ACC_TMR_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+static struct clk app_timer_clk = {
+	.name       = "TIMER_APP",
+	.parent	    = &slow_clk,
+	.rate       = 13000000,
+	.hw_ctrld   = true,
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_APP_TMR_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_APP_TMR_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+
+#ifdef CONFIG_MACH_U300_BS335
+static struct clk ppm_clk = {
+	.name	    = "PPM",
+	.parent	    = &slow_clk,
+	.rate	    = 0, /* FIXME */
+	.hw_ctrld   = true, /* TODO: Look up if it is hw ctrld or not */
+	.reset	    = true,
+	.res_reg    = U300_SYSCON_VBASE + U300_SYSCON_RSR,
+	.res_mask   = U300_SYSCON_RSR_PPM_RESET_EN,
+	.clk_val    = U300_SYSCON_SBCER_PPM_CLK_EN,
+	.enable     = syscon_clk_enable,
+	.disable    = syscon_clk_disable,
+};
+#endif
+
+#define DEF_LOOKUP(devid, clkref)		\
+	{					\
+	.dev_id = devid,			\
+	.clk = clkref,				\
+	}
+
+/*
+ * Here we only define clocks that are meaningful to
+ * look up through clockdevice.
+ */
+static struct clk_lookup lookups[] = {
+	/* Connected directly to the AMBA bus */
+	DEF_LOOKUP("amba", &amba_clk),
+	DEF_LOOKUP("cpu", &cpu_clk),
+	DEF_LOOKUP("nandif", &nandif_clk),
+	DEF_LOOKUP("semi", &semi_clk),
+#ifdef CONFIG_MACH_U300_BS335
+	DEF_LOOKUP("isp", &isp_clk),
+	DEF_LOOKUP("cds", &cds_clk),
+#endif
+	DEF_LOOKUP("dma", &dma_clk),
+	DEF_LOOKUP("aaif", &aaif_clk),
+	DEF_LOOKUP("apex", &apex_clk),
+	DEF_LOOKUP("video_enc", &video_enc_clk),
+	DEF_LOOKUP("xgam", &xgam_clk),
+	DEF_LOOKUP("ahb", &ahb_clk),
+	/* AHB bridge clocks */
+	DEF_LOOKUP("ahb", &ahb_subsys_clk),
+	DEF_LOOKUP("intcon", &intcon_clk),
+	DEF_LOOKUP("mspro", &mspro_clk),
+	DEF_LOOKUP("pl172", &emif_clk),
+	/* FAST bridge clocks */
+	DEF_LOOKUP("fast", &fast_clk),
+	DEF_LOOKUP("mmci", &mmcsd_clk),
+	/*
+	 * The .0 and .1 identifiers on these comes from the platform device
+	 * .id field and are assigned when the platform devices are registered.
+	 */
+	DEF_LOOKUP("i2s.0", &i2s0_clk),
+	DEF_LOOKUP("i2s.1", &i2s1_clk),
+	DEF_LOOKUP("stddci2c.0", &i2c0_clk),
+	DEF_LOOKUP("stddci2c.1", &i2c1_clk),
+	DEF_LOOKUP("pl022", &spi_clk),
+#ifdef CONFIG_MACH_U300_BS335
+	DEF_LOOKUP("uart1", &uart1_clk),
+#endif
+	/* SLOW bridge clocks */
+	DEF_LOOKUP("slow", &slow_clk),
+	DEF_LOOKUP("wdog", &wdog_clk),
+	DEF_LOOKUP("uart0", &uart_clk),
+	DEF_LOOKUP("apptimer", &app_timer_clk),
+	DEF_LOOKUP("keypad", &keypad_clk),
+	DEF_LOOKUP("u300-gpio", &gpio_clk),
+	DEF_LOOKUP("rtc0", &rtc_clk),
+	DEF_LOOKUP("bustr", &bustr_clk),
+	DEF_LOOKUP("evhist", &evhist_clk),
+	DEF_LOOKUP("timer", &timer_clk),
+#ifdef CONFIG_MACH_U300_BS335
+	DEF_LOOKUP("ppm", &ppm_clk),
+#endif
+};
+
+static void __init clk_register(void)
+{
+	int i;
+
+	/* Register the lookups */
+	for (i = 0; i < ARRAY_SIZE(lookups); i++)
+		clkdev_add(&lookups[i]);
+}
+
+/*
+ * These are the clocks for cells registered as primecell drivers
+ * on the AMBA bus. These must be on during AMBA device registration
+ * since the bus probe will attempt to read magic configuration
+ * registers for these devices. If they are deactivated these probes
+ * will fail.
+ *
+ *
+ * Please note that on emif, both RAM and NAND is connected in dual
+ * RAM phones. On single RAM phones, ram is on semi and NAND on emif.
+ *
+ */
+void u300_clock_primecells(void)
+{
+	clk_enable(&intcon_clk);
+	clk_enable(&uart_clk);
+#ifdef CONFIG_MACH_U300_BS335
+	clk_enable(&uart1_clk);
+#endif
+	clk_enable(&spi_clk);
+
+	clk_enable(&mmcsd_clk);
+
+}
+EXPORT_SYMBOL(u300_clock_primecells);
+
+void u300_unclock_primecells(void)
+{
+
+	clk_disable(&intcon_clk);
+	clk_disable(&uart_clk);
+#ifdef CONFIG_MACH_U300_BS335
+	clk_disable(&uart1_clk);
+#endif
+	clk_disable(&spi_clk);
+	clk_disable(&mmcsd_clk);
+
+}
+EXPORT_SYMBOL(u300_unclock_primecells);
+
+/*
+ * The interrupt controller is enabled before the clock API is registered.
+ */
+void u300_enable_intcon_clock(void)
+{
+	clk_enable(&intcon_clk);
+}
+EXPORT_SYMBOL(u300_enable_intcon_clock);
+
+/*
+ * The timer is enabled before the clock API is registered.
+ */
+void u300_enable_timer_clock(void)
+{
+	clk_enable(&app_timer_clk);
+}
+EXPORT_SYMBOL(u300_enable_timer_clock);
+
+#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
+/*
+ * The following makes it possible to view the status (especially
+ * reference count and reset status) for the clocks in the platform
+ * by looking into the special file <debugfs>/u300_clocks
+ */
+
+/* A list of all clocks in the platform */
+static struct clk *clks[] = {
+	/* Top node clock for the AMBA bus */
+	&amba_clk,
+	/* Connected directly to the AMBA bus */
+	&cpu_clk,
+	&nandif_clk,
+	&semi_clk,
+#ifdef CONFIG_MACH_U300_BS335
+	&isp_clk,
+	&cds_clk,
+#endif
+	&dma_clk,
+	&aaif_clk,
+	&apex_clk,
+	&video_enc_clk,
+	&xgam_clk,
+	&ahb_clk,
+
+	/* AHB bridge clocks */
+	&ahb_subsys_clk,
+	&intcon_clk,
+	&mspro_clk,
+	&emif_clk,
+	/* FAST bridge clocks */
+	&fast_clk,
+	&mmcsd_clk,
+	&i2s0_clk,
+	&i2s1_clk,
+	&i2c0_clk,
+	&i2c1_clk,
+	&spi_clk,
+#ifdef CONFIG_MACH_U300_BS335
+	&uart1_clk,
+#endif
+	/* SLOW bridge clocks */
+	&slow_clk,
+	&wdog_clk,
+	&uart_clk,
+	&app_timer_clk,
+	&keypad_clk,
+	&gpio_clk,
+	&rtc_clk,
+	&bustr_clk,
+	&evhist_clk,
+	&timer_clk,
+#ifdef CONFIG_MACH_U300_BS335
+	&ppm_clk,
+#endif
+};
+
+static int u300_clocks_show(struct seq_file *s, void *data)
+{
+	struct clk *clk;
+	int i;
+
+	seq_printf(s, "CLOCK           DEVICE          RESET STATE\t" \
+		   "ACTIVE\tUSERS\tHW CTRL FREQ\n");
+	seq_printf(s, "---------------------------------------------" \
+		   "-----------------------------------------\n");
+	for (i = 0; i < ARRAY_SIZE(clks); i++) {
+		clk = clks[i];
+		if (clk != ERR_PTR(-ENOENT)) {
+			/* Format clock and device name nicely */
+			char cdp[33];
+			int chars;
+
+			chars = snprintf(&cdp[0], 17, "%s", clk->name);
+			while (chars < 16) {
+				cdp[chars] = ' ';
+				chars++;
+			}
+			chars = snprintf(&cdp[16], 17, "%s", clk->dev ?
+					 dev_name(clk->dev) : "N/A");
+			while (chars < 16) {
+				cdp[chars+16] = ' ';
+				chars++;
+			}
+			cdp[32] = '\0';
+			if (clk->get_rate)
+				seq_printf(s,
+					   "%s%s\t%s\t%d\t%s\t%lu Hz\n",
+					   &cdp[0],
+					   clk->reset ?
+					   "ASSERTED" : "RELEASED",
+					   clk->usecount ? "ON" : "OFF",
+					   clk->usecount,
+					   clk->hw_ctrld  ? "YES" : "NO ",
+					   clk->get_rate(clk));
+			else
+				seq_printf(s,
+					   "%s%s\t%s\t%d\t%s\t" \
+					   "(unknown rate)\n",
+					   &cdp[0],
+					   clk->reset ?
+					   "ASSERTED" : "RELEASED",
+					   clk->usecount ? "ON" : "OFF",
+					   clk->usecount,
+					   clk->hw_ctrld  ? "YES" : "NO ");
+		}
+	}
+	return 0;
+}
+
+static int u300_clocks_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, u300_clocks_show, NULL);
+}
+
+static const struct file_operations u300_clocks_operations = {
+	.open		= u300_clocks_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void init_clk_read_procfs(void)
+{
+	/* Expose a simple debugfs interface to view all clocks */
+	(void) debugfs_create_file("u300_clocks", S_IFREG | S_IRUGO,
+				   NULL, NULL, &u300_clocks_operations);
+}
+#else
+static inline void init_clk_read_procfs(void)
+{
+}
+#endif
+
+static int __init u300_clock_init(void)
+{
+	u16 val;
+
+	/*
+	 * FIXME: shall all this powermanagement stuff really live here???
+	 */
+
+	/* Set system to run at PLL208, max performance, a known state. */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val &= ~U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	/* Wait for the PLL208 to lock if not locked in yet */
+	while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) &
+		 U300_SYSCON_CSR_PLL208_LOCK_IND));
+
+	/* Power management enable */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMCR);
+	val |= U300_SYSCON_PMCR_PWR_MGNT_ENABLE;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMCR);
+
+	clk_register();
+
+	init_clk_read_procfs();
+
+	/*
+	 * Some of these may be on when we boot the system so make sure they
+	 * are turned OFF.
+	 */
+	syscon_block_reset_enable(&timer_clk);
+	timer_clk.disable(&timer_clk);
+
+	/*
+	 * These shall be turned on by default when we boot the system
+	 * so make sure they are ON. (Adding CPU here is a bit too much.)
+	 * These clocks will be claimed by drivers later.
+	 */
+	syscon_block_reset_disable(&semi_clk);
+	syscon_block_reset_disable(&emif_clk);
+	semi_clk.enable(&semi_clk);
+	emif_clk.enable(&emif_clk);
+
+	return 0;
+}
+/* initialize clocking early to be available later in the boot */
+core_initcall(u300_clock_init);
diff --git a/arch/arm/mach-u300/clock.h b/arch/arm/mach-u300/clock.h
new file mode 100644
index 0000000..fc6d9cc
--- /dev/null
+++ b/arch/arm/mach-u300/clock.h
@@ -0,0 +1,53 @@
+/*
+ * arch/arm/mach-u300/include/mach/clock.h
+ *
+ * Copyright (C) 2004 - 2005 Nokia corporation
+ * Written by Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>
+ * Based on clocks.h by Tony Lindgren, Gordon McNutt and RidgeRun, Inc
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * Adopted to ST-Ericsson U300 platforms by
+ * Jonas Aaberg <jonas.aberg@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __MACH_CLOCK_H
+#define __MACH_CLOCK_H
+
+#include <linux/clk.h>
+
+struct clk {
+	struct list_head node;
+	struct module *owner;
+	struct device *dev;
+	const char *name;
+	struct clk *parent;
+
+	spinlock_t lock;
+	unsigned long rate;
+	bool reset;
+	__u16 clk_val;
+	__s8 usecount;
+	__u32 res_reg;
+	__u16 res_mask;
+
+	bool hw_ctrld;
+
+	void (*recalc) (struct clk *);
+	int (*set_rate) (struct clk *, unsigned long);
+	unsigned long (*get_rate) (struct clk *);
+	unsigned long (*round_rate) (struct clk *, unsigned long);
+	void (*init) (struct clk *);
+	void (*enable) (struct clk *);
+	void (*disable) (struct clk *);
+};
+
+void u300_clock_primecells(void);
+void u300_unclock_primecells(void);
+void u300_enable_intcon_clock(void);
+void u300_enable_timer_clock(void);
+
+#endif
diff --git a/arch/arm/mach-u300/include/mach/clkdev.h b/arch/arm/mach-u300/include/mach/clkdev.h
new file mode 100644
index 0000000..92e3cc8
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/clkdev.h
@@ -0,0 +1,7 @@
+#ifndef __MACH_CLKDEV_H
+#define __MACH_CLKDEV_H
+
+int __clk_get(struct clk *clk);
+void __clk_put(struct clk *clk);
+
+#endif
-- 
cgit v0.10.2


From bb3cee2b35d2b9edab71997bd06040ff37483e08 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 23 Apr 2009 10:22:13 +0100
Subject: [ARM] 5473/1: U300 core machine support

This adds core support for the ST-Ericsson U300 series
platforms: U300, U330, U335 and U365. Supports memory
mappings, interrupt controller, system timer (clocksource
and clockevents), and binds to the existing drivers for
the PrimeCells used in this design: PL190 (VIC), PL180
(MMC/SD host) and PL011 (UART). This is intented to serve
as starting point for our mainling work, more patches to
follow.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
new file mode 100644
index 0000000..337b9aa
--- /dev/null
+++ b/arch/arm/mach-u300/Kconfig
@@ -0,0 +1,105 @@
+if ARCH_U300
+
+menu "ST-Ericsson AB U300/U330/U335/U365 Platform"
+
+comment "ST-Ericsson Mobile Platform Products"
+
+config MACH_U300
+	bool "U300"
+
+comment "ST-Ericsson U300/U330/U335/U365 Feature Selections"
+
+choice
+	prompt "U300/U330/U335/U365 system type"
+	default MACH_U300_BS2X
+	---help---
+	You need to select the target system, i.e. the
+	U300/U330/U335/U365 board that you want to compile your kernel
+	for.
+
+config MACH_U300_BS2X
+	bool "S26/S26/B25/B26 Test Products"
+	depends on MACH_U300
+	help
+		Select this if you're developing on the
+		S26/S25 test products. (Also works on
+		B26/B25 big boards.)
+
+config MACH_U300_BS330
+	bool "S330/B330 Test Products"
+	depends on MACH_U300
+	help
+		Select this if you're developing on the
+		S330/B330 test products.
+
+config MACH_U300_BS335
+	bool "S335/B335 Test Products"
+	depends on MACH_U300
+	help
+		Select this if you're developing on the
+		S335/B335 test products.
+
+config MACH_U300_BS365
+	bool "S365/B365 Test Products"
+	depends on MACH_U300
+	help
+		Select this if you're developing on the
+		S365/B365 test products.
+
+endchoice
+
+choice
+	prompt "Memory configuration"
+	default MACH_U300_SINGLE_RAM
+	---help---
+	You have to config the kernel according to the physical memory
+	configuration.
+
+config MACH_U300_SINGLE_RAM
+	bool "Single RAM"
+	help
+		Select this if you want support for Single RAM phones.
+
+config MACH_U300_DUAL_RAM
+	bool "Dual RAM"
+	help
+		Select this if you want support for Dual RAM phones.
+		This is two RAM memorys on different EMIFs.
+endchoice
+
+config U300_DEBUG
+	bool "Debug support for U300"
+	depends on PM
+	help
+		Debug support for U300 in sysfs, procfs etc.
+
+config MACH_U300_SEMI_IS_SHARED
+	bool "The SEMI is used by both the access and application side"
+	depends on MACH_U300
+	help
+		This makes it possible to use the SEMI (Shared External
+		Memory Interface) from both from access and application
+		side.
+
+comment "All the settings below must match the bootloader's settings"
+
+config MACH_U300_ACCESS_MEM_SIZE
+       int "Access CPU memory allocation"
+       range 7 25
+       depends on MACH_U300_SINGLE_RAM
+       default 13
+       help
+		How much memory in MiB that the Access side CPU has allocated
+
+config MACH_U300_2MB_ALIGNMENT_FIX
+       bool "2MiB alignment fix"
+       depends on MACH_U300_SINGLE_RAM
+       default y
+       help
+		If yes and the Access side CPU has allocated an odd size in
+		MiB, this fix gives you one MiB extra that would otherwise be
+		lost due to Linux 2 MiB alignment policy.
+
+endmenu
+
+endif
diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile
new file mode 100644
index 0000000..24950e0
--- /dev/null
+++ b/arch/arm/mach-u300/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the linux kernel, U300 machine.
+#
+
+obj-y		:= core.o clock.o timer.o gpio.o padmux.o
+obj-m		:=
+obj-n		:=
+obj-		:=
+
+obj-$(CONFIG_ARCH_U300)	          += u300.o
+obj-$(CONFIG_MMC)                 += mmc.o
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
new file mode 100644
index 0000000..1f2ed21
--- /dev/null
+++ b/arch/arm/mach-u300/core.c
@@ -0,0 +1,649 @@
+/*
+ *
+ * arch/arm/mach-u300/core.c
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Core platform support, IRQ handling and device definitions.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/termios.h>
+#include <linux/amba/bus.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/hardware/vic.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+
+#include <mach/hardware.h>
+#include <mach/syscon.h>
+
+#include "clock.h"
+#include "mmc.h"
+
+/*
+ * Static I/O mappings that are needed for booting the U300 platforms. The
+ * only things we need are the areas where we find the timer, syscon and
+ * intcon, since the remaining device drivers will map their own memory
+ * physical to virtual as the need arise.
+ */
+static struct map_desc u300_io_desc[] __initdata = {
+	{
+		.virtual	= U300_SLOW_PER_VIRT_BASE,
+		.pfn		= __phys_to_pfn(U300_SLOW_PER_PHYS_BASE),
+		.length		= SZ_64K,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= U300_AHB_PER_VIRT_BASE,
+		.pfn		= __phys_to_pfn(U300_AHB_PER_PHYS_BASE),
+		.length		= SZ_32K,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= U300_FAST_PER_VIRT_BASE,
+		.pfn		= __phys_to_pfn(U300_FAST_PER_PHYS_BASE),
+		.length		= SZ_32K,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= 0xffff2000, /* TCM memory */
+		.pfn		= __phys_to_pfn(0xffff2000),
+		.length		= SZ_16K,
+		.type		= MT_DEVICE,
+	},
+
+	/*
+	 * This overlaps with the IRQ vectors etc at 0xffff0000, so these
+	 * may have to be moved to 0x00000000 in order to use the ROM.
+	 */
+	/*
+	{
+		.virtual	= U300_BOOTROM_VIRT_BASE,
+		.pfn		= __phys_to_pfn(U300_BOOTROM_PHYS_BASE),
+		.length		= SZ_64K,
+		.type		= MT_ROM,
+	},
+	*/
+};
+
+void __init u300_map_io(void)
+{
+	iotable_init(u300_io_desc, ARRAY_SIZE(u300_io_desc));
+}
+
+/*
+ * Declaration of devices found on the U300 board and
+ * their respective memory locations.
+ */
+static struct amba_device uart0_device = {
+	.dev = {
+		.init_name = "uart0", /* Slow device at 0x3000 offset */
+		.platform_data = NULL,
+	},
+	.res = {
+		.start = U300_UART0_BASE,
+		.end   = U300_UART0_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	.irq = { IRQ_U300_UART0, NO_IRQ },
+};
+
+/* The U335 have an additional UART1 on the APP CPU */
+#ifdef CONFIG_MACH_U300_BS335
+static struct amba_device uart1_device = {
+	.dev = {
+		.init_name = "uart1", /* Fast device at 0x7000 offset */
+		.platform_data = NULL,
+	},
+	.res = {
+		.start = U300_UART1_BASE,
+		.end   = U300_UART1_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	.irq = { IRQ_U300_UART1, NO_IRQ },
+};
+#endif
+
+static struct amba_device pl172_device = {
+	.dev = {
+		.init_name = "pl172", /* AHB device at 0x4000 offset */
+		.platform_data = NULL,
+	},
+	.res = {
+		.start = U300_EMIF_CFG_BASE,
+		.end   = U300_EMIF_CFG_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
+
+/*
+ * Everything within this next ifdef deals with external devices connected to
+ * the APP SPI bus.
+ */
+static struct amba_device pl022_device = {
+	.dev = {
+		.coherent_dma_mask = ~0,
+		.init_name = "pl022", /* Fast device at 0x6000 offset */
+	},
+	.res = {
+		.start = U300_SPI_BASE,
+		.end   = U300_SPI_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	.irq = {IRQ_U300_SPI, NO_IRQ },
+	/*
+	 * This device has a DMA channel but the Linux driver does not use
+	 * it currently.
+	 */
+};
+
+static struct amba_device mmcsd_device = {
+	.dev = {
+		.init_name = "mmci", /* Fast device at 0x1000 offset */
+		.platform_data = NULL, /* Added later */
+	},
+	.res = {
+		.start = U300_MMCSD_BASE,
+		.end   = U300_MMCSD_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	.irq = {IRQ_U300_MMCSD_MCIINTR0, IRQ_U300_MMCSD_MCIINTR1 },
+	/*
+	 * This device has a DMA channel but the Linux driver does not use
+	 * it currently.
+	 */
+};
+
+/*
+ * The order of device declaration may be important, since some devices
+ * have dependencies on other devices being initialized first.
+ */
+static struct amba_device *amba_devs[] __initdata = {
+	&uart0_device,
+#ifdef CONFIG_MACH_U300_BS335
+	&uart1_device,
+#endif
+	&pl022_device,
+	&pl172_device,
+	&mmcsd_device,
+};
+
+/* Here follows a list of all hw resources that the platform devices
+ * allocate. Note, clock dependencies are not included
+ */
+
+static struct resource gpio_resources[] = {
+	{
+		.start = U300_GPIO_BASE,
+		.end   = (U300_GPIO_BASE + SZ_4K - 1),
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "gpio0",
+		.start = IRQ_U300_GPIO_PORT0,
+		.end   = IRQ_U300_GPIO_PORT0,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "gpio1",
+		.start = IRQ_U300_GPIO_PORT1,
+		.end   = IRQ_U300_GPIO_PORT1,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "gpio2",
+		.start = IRQ_U300_GPIO_PORT2,
+		.end   = IRQ_U300_GPIO_PORT2,
+		.flags = IORESOURCE_IRQ,
+	},
+#ifdef U300_COH901571_3
+	{
+		.name  = "gpio3",
+		.start = IRQ_U300_GPIO_PORT3,
+		.end   = IRQ_U300_GPIO_PORT3,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "gpio4",
+		.start = IRQ_U300_GPIO_PORT4,
+		.end   = IRQ_U300_GPIO_PORT4,
+		.flags = IORESOURCE_IRQ,
+	},
+#ifdef CONFIG_MACH_U300_BS335
+	{
+		.name  = "gpio5",
+		.start = IRQ_U300_GPIO_PORT5,
+		.end   = IRQ_U300_GPIO_PORT5,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "gpio6",
+		.start = IRQ_U300_GPIO_PORT6,
+		.end   = IRQ_U300_GPIO_PORT6,
+		.flags = IORESOURCE_IRQ,
+	},
+#endif /* CONFIG_MACH_U300_BS335 */
+#endif /* U300_COH901571_3 */
+};
+
+static struct resource keypad_resources[] = {
+	{
+		.start = U300_KEYPAD_BASE,
+		.end   = U300_KEYPAD_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "coh901461-press",
+		.start = IRQ_U300_KEYPAD_KEYBF,
+		.end   = IRQ_U300_KEYPAD_KEYBF,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "coh901461-release",
+		.start = IRQ_U300_KEYPAD_KEYBR,
+		.end   = IRQ_U300_KEYPAD_KEYBR,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource rtc_resources[] = {
+	{
+		.start = U300_RTC_BASE,
+		.end   = U300_RTC_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_U300_RTC,
+		.end   = IRQ_U300_RTC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/*
+ * Fsmc does have IRQs: #43 and #44 (NFIF and NFIF2)
+ * but these are not yet used by the driver.
+ */
+static struct resource fsmc_resources[] = {
+	{
+		.start = U300_NAND_IF_PHYS_BASE,
+		.end   = U300_NAND_IF_PHYS_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
+static struct resource i2c0_resources[] = {
+	{
+		.start = U300_I2C0_BASE,
+		.end   = U300_I2C0_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_U300_I2C0,
+		.end   = IRQ_U300_I2C0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource i2c1_resources[] = {
+	{
+		.start = U300_I2C1_BASE,
+		.end   = U300_I2C1_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_U300_I2C1,
+		.end   = IRQ_U300_I2C1,
+		.flags = IORESOURCE_IRQ,
+	},
+
+};
+
+static struct resource wdog_resources[] = {
+	{
+		.start = U300_WDOG_BASE,
+		.end   = U300_WDOG_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_U300_WDOG,
+		.end   = IRQ_U300_WDOG,
+		.flags = IORESOURCE_IRQ,
+	}
+};
+
+/* TODO: These should be protected by suitable #ifdef's */
+static struct resource ave_resources[] = {
+	{
+		.name  = "AVE3e I/O Area",
+		.start = U300_VIDEOENC_BASE,
+		.end   = U300_VIDEOENC_BASE + SZ_512K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "AVE3e IRQ0",
+		.start = IRQ_U300_VIDEO_ENC_0,
+		.end   = IRQ_U300_VIDEO_ENC_0,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "AVE3e IRQ1",
+		.start = IRQ_U300_VIDEO_ENC_1,
+		.end   = IRQ_U300_VIDEO_ENC_1,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "AVE3e Physmem Area",
+		.start = 0, /* 0 will be remapped to reserved memory */
+		.end   = SZ_1M - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	/*
+	 * The AVE3e requires two regions of 256MB that it considers
+	 * "invisible". The hardware will not be able to access these
+	 * adresses, so they should never point to system RAM.
+	 */
+	{
+		.name  = "AVE3e Reserved 0",
+		.start = 0xd0000000,
+		.end   = 0xd0000000 + SZ_256M - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "AVE3e Reserved 1",
+		.start = 0xe0000000,
+		.end   = 0xe0000000 + SZ_256M - 1,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device wdog_device = {
+	.name = "wdog",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(wdog_resources),
+	.resource = wdog_resources,
+};
+
+static struct platform_device i2c0_device = {
+	.name = "stddci2c",
+	.id = 0,
+	.num_resources = ARRAY_SIZE(i2c0_resources),
+	.resource = i2c0_resources,
+};
+
+static struct platform_device i2c1_device = {
+	.name = "stddci2c",
+	.id = 1,
+	.num_resources = ARRAY_SIZE(i2c1_resources),
+	.resource = i2c1_resources,
+};
+
+static struct platform_device gpio_device = {
+	.name = "u300-gpio",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(gpio_resources),
+	.resource = gpio_resources,
+};
+
+static struct platform_device keypad_device = {
+	.name = "keypad",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(keypad_resources),
+	.resource = keypad_resources,
+};
+
+static struct platform_device rtc_device = {
+	.name = "rtc0",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(rtc_resources),
+	.resource = rtc_resources,
+};
+
+static struct platform_device fsmc_device = {
+	.name = "nandif",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(fsmc_resources),
+	.resource = fsmc_resources,
+};
+
+static struct platform_device ave_device = {
+	.name = "video_enc",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(ave_resources),
+	.resource = ave_resources,
+};
+
+/*
+ * Notice that AMBA devices are initialized before platform devices.
+ *
+ */
+static struct platform_device *platform_devs[] __initdata = {
+	&i2c0_device,
+	&i2c1_device,
+	&keypad_device,
+	&rtc_device,
+	&gpio_device,
+	&fsmc_device,
+	&wdog_device,
+	&ave_device
+};
+
+
+/*
+ * Interrupts: the U300 platforms have two pl190 ARM PrimeCells connected
+ * together so some interrupts are connected to the first one and some
+ * to the second one.
+ */
+void __init u300_init_irq(void)
+{
+	u32 mask[2] = {0, 0};
+	int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		set_bit(i, (unsigned long *) &mask[0]);
+	u300_enable_intcon_clock();
+	vic_init((void __iomem *) U300_INTCON0_VBASE, 0, mask[0]);
+	vic_init((void __iomem *) U300_INTCON1_VBASE, 32, mask[1]);
+}
+
+
+/*
+ * U300 platforms peripheral handling
+ */
+struct db_chip {
+	u16 chipid;
+	const char *name;
+};
+
+/*
+ * This is a list of the Digital Baseband chips used in the U300 platform.
+ */
+static struct db_chip db_chips[] __initdata = {
+	{
+		.chipid = 0xb800,
+		.name = "DB3000",
+	},
+	{
+		.chipid = 0xc000,
+		.name = "DB3100",
+	},
+	{
+		.chipid = 0xc800,
+		.name = "DB3150",
+	},
+	{
+		.chipid = 0xd800,
+		.name = "DB3200",
+	},
+	{
+		.chipid = 0xe000,
+		.name = "DB3250",
+	},
+	{
+		.chipid = 0xe800,
+		.name = "DB3210",
+	},
+	{
+		.chipid = 0xf000,
+		.name = "DB3350 P1x",
+	},
+	{
+		.chipid = 0xf100,
+		.name = "DB3350 P2x",
+	},
+	{
+		.chipid = 0x0000, /* List terminator */
+		.name = NULL,
+	}
+};
+
+static void u300_init_check_chip(void)
+{
+
+	u16 val;
+	struct db_chip *chip;
+	const char *chipname;
+	const char unknown[] = "UNKNOWN";
+
+	/* Read out and print chip ID */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CIDR);
+	/* This is in funky bigendian order... */
+	val = (val & 0xFFU) << 8 | (val >> 8);
+	chip = db_chips;
+	chipname = unknown;
+
+	for ( ; chip->chipid; chip++) {
+		if (chip->chipid == (val & 0xFF00U)) {
+			chipname = chip->name;
+			break;
+		}
+	}
+	printk(KERN_INFO "Initializing U300 system on %s baseband chip " \
+	       "(chip ID 0x%04x)\n", chipname, val);
+
+#ifdef CONFIG_MACH_U300_BS26
+	if ((val & 0xFF00U) != 0xc800) {
+		printk(KERN_ERR "Platform configured for BS25/BS26 " \
+		       "with DB3150 but %s detected, expect problems!",
+		       chipname);
+	}
+#endif
+#ifdef CONFIG_MACH_U300_BS330
+	if ((val & 0xFF00U) != 0xd800) {
+		printk(KERN_ERR "Platform configured for BS330 " \
+		       "with DB3200 but %s detected, expect problems!",
+		       chipname);
+	}
+#endif
+#ifdef CONFIG_MACH_U300_BS335
+	if ((val & 0xFF00U) != 0xf000 && (val & 0xFF00U) != 0xf100) {
+		printk(KERN_ERR "Platform configured for BS365 " \
+		       " with DB3350 but %s detected, expect problems!",
+		       chipname);
+	}
+#endif
+#ifdef CONFIG_MACH_U300_BS365
+	if ((val & 0xFF00U) != 0xe800) {
+		printk(KERN_ERR "Platform configured for BS365 " \
+		       "with DB3210 but %s detected, expect problems!",
+		       chipname);
+	}
+#endif
+
+
+}
+
+/*
+ * Some devices and their resources require reserved physical memory from
+ * the end of the available RAM. This function traverses the list of devices
+ * and assigns actual adresses to these.
+ */
+static void __init u300_assign_physmem(void)
+{
+	unsigned long curr_start = __pa(high_memory);
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(platform_devs); i++) {
+		for (j = 0; j < platform_devs[i]->num_resources; j++) {
+			struct resource *const res =
+			  &platform_devs[i]->resource[j];
+
+			if (IORESOURCE_MEM == res->flags &&
+				     0 == res->start) {
+				res->start  = curr_start;
+				res->end   += curr_start;
+				curr_start += (res->end - res->start + 1);
+
+				printk(KERN_INFO "core.c: Mapping RAM " \
+				       "%#x-%#x to device %s:%s\n",
+					res->start, res->end,
+				       platform_devs[i]->name, res->name);
+			}
+		}
+	}
+}
+
+void __init u300_init_devices(void)
+{
+	int i;
+	u16 val;
+
+	/* Check what platform we run and print some status information */
+	u300_init_check_chip();
+
+	/* Set system to run at PLL208, max performance, a known state. */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	val &= ~U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR);
+	/* Wait for the PLL208 to lock if not locked in yet */
+	while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) &
+		 U300_SYSCON_CSR_PLL208_LOCK_IND));
+
+	/* Register the AMBA devices in the AMBA bus abstraction layer */
+	u300_clock_primecells();
+	for (i = 0; i < ARRAY_SIZE(amba_devs); i++) {
+		struct amba_device *d = amba_devs[i];
+		amba_device_register(d, &iomem_resource);
+	}
+	u300_unclock_primecells();
+
+	u300_assign_physmem();
+
+	/* Register the platform devices */
+	platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs));
+
+#ifndef CONFIG_MACH_U300_SEMI_IS_SHARED
+	/*
+	 * Enable SEMI self refresh. Self-refresh of the SDRAM is entered when
+	 * both subsystems are requesting this mode.
+	 * If we not share the Acc SDRAM, this is never the case. Therefore
+	 * enable it here from the App side.
+	 */
+	val = readw(U300_SYSCON_VBASE + U300_SYSCON_SMCR) |
+		U300_SYSCON_SMCR_SEMI_SREFREQ_ENABLE;
+	writew(val, U300_SYSCON_VBASE + U300_SYSCON_SMCR);
+#endif /* CONFIG_MACH_U300_SEMI_IS_SHARED */
+}
+
+static int core_module_init(void)
+{
+	/*
+	 * This needs to be initialized later: it needs the input framework
+	 * to be initialized first.
+	 */
+	return mmc_init(&mmcsd_device);
+}
+module_init(core_module_init);
diff --git a/arch/arm/mach-u300/include/mach/debug-macro.S b/arch/arm/mach-u300/include/mach/debug-macro.S
new file mode 100644
index 0000000..f3a1cbb
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/debug-macro.S
@@ -0,0 +1,22 @@
+/*
+ *
+ * arch-arm/mach-u300/include/mach/debug-macro.S
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Debugging macro include header.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <mach/hardware.h>
+
+	.macro	addruart,rx
+	/* If we move the adress using MMU, use this. */
+	mrc	p15, 0, \rx, c1, c0
+	tst	\rx, #1			@ MMU enabled?
+	ldreq	\rx,	  = U300_SLOW_PER_PHYS_BASE @ MMU off, physical address
+	ldrne	\rx,	  = U300_SLOW_PER_VIRT_BASE @ MMU on, virtual address
+	orr	\rx, \rx, #0x00003000
+	.endm
+
+#include <asm/hardware/debug-pl01x.S>
diff --git a/arch/arm/mach-u300/include/mach/entry-macro.S b/arch/arm/mach-u300/include/mach/entry-macro.S
new file mode 100644
index 0000000..20731ae
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/entry-macro.S
@@ -0,0 +1,40 @@
+/*
+ *
+ * arch-arm/mach-u300/include/mach/entry-macro.S
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Low-level IRQ helper macros for ST-Ericsson U300
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <mach/hardware.h>
+#include <asm/hardware/vic.h>
+
+	.macro	disable_fiq
+	.endm
+
+	.macro  get_irqnr_preamble, base, tmp
+	.endm
+
+	.macro  arch_ret_to_user, tmp1, tmp2
+	.endm
+
+	.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
+	ldr	\base, = U300_AHB_PER_VIRT_BASE-U300_AHB_PER_PHYS_BASE+U300_INTCON0_BASE
+	ldr	\irqstat, [\base, #VIC_IRQ_STATUS] @ get masked status
+	mov	\irqnr, #0
+	teq	\irqstat, #0
+	bne	1002f
+1001:	ldr	\base, = U300_AHB_PER_VIRT_BASE-U300_AHB_PER_PHYS_BASE+U300_INTCON1_BASE
+	ldr	\irqstat, [\base, #VIC_IRQ_STATUS] @ get masked status
+	mov	\irqnr, #32
+	teq	\irqstat, #0
+	beq	1003f
+1002:	tst	\irqstat, #1
+	bne	1003f
+	add	\irqnr, \irqnr, #1
+	movs	\irqstat, \irqstat, lsr #1
+	bne	1002b
+1003:		/* EQ will be set if no irqs pending */
+	.endm
diff --git a/arch/arm/mach-u300/include/mach/memory.h b/arch/arm/mach-u300/include/mach/memory.h
new file mode 100644
index 0000000..bf134bc
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/memory.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/memory.h
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Memory virtual/physical mapping constants.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
+ */
+
+#ifndef __MACH_MEMORY_H
+#define __MACH_MEMORY_H
+
+#ifdef CONFIG_MACH_U300_DUAL_RAM
+
+#define PHYS_OFFSET		UL(0x48000000)
+#define BOOT_PARAMS_OFFSET	(PHYS_OFFSET + 0x100)
+
+#else
+
+#ifdef CONFIG_MACH_U300_2MB_ALIGNMENT_FIX
+#define PHYS_OFFSET (0x28000000 + \
+	     (CONFIG_MACH_U300_ACCESS_MEM_SIZE - \
+	     (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024)
+#else
+#define PHYS_OFFSET (0x28000000 + \
+	     (CONFIG_MACH_U300_ACCESS_MEM_SIZE +	\
+	     (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024)
+#endif
+#define BOOT_PARAMS_OFFSET (0x28000000 + \
+	    (CONFIG_MACH_U300_ACCESS_MEM_SIZE +		\
+	    (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024 + 0x100)
+#endif
+
+/*
+ * We enable a real big DMA buffer if need be.
+ */
+#define CONSISTENT_DMA_SIZE SZ_4M
+
+#endif
diff --git a/arch/arm/mach-u300/include/mach/platform.h b/arch/arm/mach-u300/include/mach/platform.h
new file mode 100644
index 0000000..77d9210
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/platform.h
@@ -0,0 +1,19 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/platform.h
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Basic platform init and mapping functions.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#ifndef __ASSEMBLY__
+
+void u300_map_io(void);
+void u300_init_irq(void);
+void u300_init_devices(void);
+extern struct sys_timer u300_timer;
+
+#endif
diff --git a/arch/arm/mach-u300/include/mach/system.h b/arch/arm/mach-u300/include/mach/system.h
new file mode 100644
index 0000000..8daf136
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/system.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/system.h
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * System shutdown and reset functions.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <mach/hardware.h>
+#include <asm/io.h>
+#include <asm/hardware/vic.h>
+#include <asm/irq.h>
+
+/* Forward declare this function from the watchdog */
+void coh901327_watchdog_reset(void);
+
+static inline void arch_idle(void)
+{
+	cpu_do_idle();
+}
+
+static void arch_reset(char mode, const char *cmd)
+{
+	switch (mode) {
+	case 's':
+	case 'h':
+		printk(KERN_CRIT "RESET: shutting down/rebooting system\n");
+		/* Disable interrupts */
+		local_irq_disable();
+#ifdef CONFIG_COH901327_WATCHDOG
+		coh901327_watchdog_reset();
+#endif
+		break;
+	default:
+		/* Do nothing */
+		break;
+	}
+	/* Wait for system do die/reset. */
+	while (1);
+}
diff --git a/arch/arm/mach-u300/include/mach/timex.h b/arch/arm/mach-u300/include/mach/timex.h
new file mode 100644
index 0000000..f233b72
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/timex.h
@@ -0,0 +1,17 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/timex.h
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Platform tick rate definition.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#ifndef __MACH_TIMEX_H
+#define __MACH_TIMEX_H
+
+/* This is for the APP OS GP1 (General Purpose 1) timer */
+#define CLOCK_TICK_RATE		1000000
+
+#endif
diff --git a/arch/arm/mach-u300/include/mach/uncompress.h b/arch/arm/mach-u300/include/mach/uncompress.h
new file mode 100644
index 0000000..29acb71
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/uncompress.h
@@ -0,0 +1,46 @@
+/*
+ * arch/arm/mach-u300/include/mach/uncompress.h
+ *
+ * Copyright (C) 2003 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#define AMBA_UART_DR	(*(volatile unsigned char *)0xc0013000)
+#define AMBA_UART_LCRH	(*(volatile unsigned char *)0xc001302C)
+#define AMBA_UART_CR	(*(volatile unsigned char *)0xc0013030)
+#define AMBA_UART_FR	(*(volatile unsigned char *)0xc0013018)
+
+/*
+ * This does not append a newline
+ */
+static inline void putc(int c)
+{
+	while (AMBA_UART_FR & (1 << 5))
+		barrier();
+
+	AMBA_UART_DR = c;
+}
+
+static inline void flush(void)
+{
+	while (AMBA_UART_FR & (1 << 3))
+		barrier();
+}
+
+/*
+ * nothing to do
+ */
+#define arch_decomp_setup()
+#define arch_decomp_wdog()
diff --git a/arch/arm/mach-u300/include/mach/vmalloc.h b/arch/arm/mach-u300/include/mach/vmalloc.h
new file mode 100644
index 0000000..b00c51a
--- /dev/null
+++ b/arch/arm/mach-u300/include/mach/vmalloc.h
@@ -0,0 +1,12 @@
+/*
+ *
+ * arch/arm/mach-u300/include/mach/vmalloc.h
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Virtual memory allocations
+ * End must be above the I/O registers and on an even 2MiB boundary.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#define VMALLOC_END	0xfe800000
diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c
new file mode 100644
index 0000000..3138d39
--- /dev/null
+++ b/arch/arm/mach-u300/mmc.c
@@ -0,0 +1,216 @@
+/*
+ *
+ * arch/arm/mach-u300/mmc.c
+ *
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Johan Lundin <johan.lundin@stericsson.com>
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
+ */
+#include <linux/device.h>
+#include <linux/amba/bus.h>
+#include <linux/mmc/host.h>
+#include <linux/input.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/machine.h>
+#include <linux/gpio.h>
+
+#include <asm/mach/mmc.h>
+#include "mmc.h"
+
+struct mmci_card_event {
+	struct input_dev *mmc_input;
+	int mmc_inserted;
+	struct work_struct workq;
+	struct mmc_platform_data mmc0_plat_data;
+};
+
+static unsigned int mmc_status(struct device *dev)
+{
+	struct mmci_card_event *mmci_card = container_of(
+		dev->platform_data,
+		struct mmci_card_event, mmc0_plat_data);
+
+	return mmci_card->mmc_inserted;
+}
+
+/*
+ * Here follows a large chunk of code which will only be enabled if you
+ * have both the AB3100 chip mounted and the MMC subsystem activated.
+ */
+
+static u32 mmc_translate_vdd(struct device *dev, unsigned int voltage)
+{
+	int v;
+
+	/*
+	 * MMC Spec:
+	 * bit 7:	1.70 - 1.95V
+	 * bit 8 - 14:	2.0 - 2.6V
+	 * bit 15 - 23:	2.7 - 3.6V
+	 *
+	 * ab3100 voltages:
+	 * 000 - 2.85V
+	 * 001 - 2.75V
+	 * 010 - 1.8V
+	 * 011 - 1.5V
+	 */
+	switch (voltage) {
+	case 8:
+		v = 3;
+		break;
+	case 9:
+	case 10:
+	case 11:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+		v = 1;
+		break;
+	case 16:
+		v = 1;
+		break;
+	case 17:
+	case 18:
+	case 19:
+	case 20:
+	case 21:
+	case 22:
+	case 23:
+	case 24:
+		v = 0;
+		break;
+	default:
+		v = 0;
+		break;
+	}
+
+	/* PL180 voltage register bits */
+	return v << 2;
+}
+
+
+
+static int mmci_callback(void *data)
+{
+	struct mmci_card_event *mmci_card = data;
+
+	disable_irq_on_gpio_pin(U300_GPIO_PIN_MMC_CD);
+	schedule_work(&mmci_card->workq);
+
+	return 0;
+}
+
+
+static ssize_t gpio_show(struct device *dev, struct device_attribute *attr,
+		  char *buf)
+{
+	struct mmci_card_event *mmci_card = container_of(
+		dev->platform_data,
+		struct mmci_card_event, mmc0_plat_data);
+
+
+	return sprintf(buf, "%d\n", !mmci_card->mmc_inserted);
+}
+
+static DEVICE_ATTR(mmc_inserted, S_IRUGO, gpio_show, NULL);
+
+static void _mmci_callback(struct work_struct *ws)
+{
+
+	struct mmci_card_event *mmci_card = container_of(
+		ws,
+		struct mmci_card_event, workq);
+
+	mdelay(20);
+
+	mmci_card->mmc_inserted = !!gpio_get_value(U300_GPIO_PIN_MMC_CD);
+
+	input_report_switch(mmci_card->mmc_input, KEY_INSERT,
+			    !mmci_card->mmc_inserted);
+	input_sync(mmci_card->mmc_input);
+
+	pr_debug("MMC/SD card was %s\n",
+		 mmci_card->mmc_inserted ? "removed" : "inserted");
+
+	enable_irq_on_gpio_pin(U300_GPIO_PIN_MMC_CD, !mmci_card->mmc_inserted);
+}
+
+int __devinit mmc_init(struct amba_device *adev)
+{
+	struct mmci_card_event *mmci_card;
+	struct device *mmcsd_device = &adev->dev;
+	int ret = 0;
+
+	mmci_card = kzalloc(sizeof(struct mmci_card_event), GFP_KERNEL);
+	if (!mmci_card)
+		return -ENOMEM;
+
+	/* Nominally 2.85V on our platform */
+	mmci_card->mmc0_plat_data.ocr_mask = MMC_VDD_28_29;
+	mmci_card->mmc0_plat_data.translate_vdd = mmc_translate_vdd;
+	mmci_card->mmc0_plat_data.status = mmc_status;
+
+	mmcsd_device->platform_data = (void *) &mmci_card->mmc0_plat_data;
+
+	INIT_WORK(&mmci_card->workq, _mmci_callback);
+
+	ret = gpio_request(U300_GPIO_PIN_MMC_CD, "MMC card detection");
+	if (ret) {
+		printk(KERN_CRIT "Could not allocate MMC card detection " \
+		       "GPIO pin\n");
+		goto out;
+	}
+
+	ret = gpio_direction_input(U300_GPIO_PIN_MMC_CD);
+	if (ret) {
+		printk(KERN_CRIT "Invalid GPIO pin requested\n");
+		goto out;
+	}
+
+	ret = sysfs_create_file(&mmcsd_device->kobj,
+			       &dev_attr_mmc_inserted.attr);
+	if (ret)
+		goto out;
+
+	mmci_card->mmc_input = input_allocate_device();
+	if (!mmci_card->mmc_input) {
+		printk(KERN_CRIT "Could not allocate MMC input device\n");
+		return -ENOMEM;
+	}
+
+	mmci_card->mmc_input->name = "MMC insert notification";
+	mmci_card->mmc_input->id.bustype = BUS_HOST;
+	mmci_card->mmc_input->id.vendor = 0;
+	mmci_card->mmc_input->id.product = 0;
+	mmci_card->mmc_input->id.version = 0x0100;
+	mmci_card->mmc_input->dev.parent = mmcsd_device;
+	input_set_capability(mmci_card->mmc_input, EV_SW, KEY_INSERT);
+
+	/*
+	 * Since this must always be compiled into the kernel, this input
+	 * is never unregistered or free:ed.
+	 */
+	ret = input_register_device(mmci_card->mmc_input);
+	if (ret) {
+		input_free_device(mmci_card->mmc_input);
+		goto out;
+	}
+
+	input_set_drvdata(mmci_card->mmc_input, mmci_card);
+
+	ret = gpio_register_callback(U300_GPIO_PIN_MMC_CD, mmci_callback,
+				     mmci_card);
+
+	schedule_work(&mmci_card->workq);
+
+	printk(KERN_INFO "Registered MMC insert/remove notification\n");
+out:
+	return ret;
+}
diff --git a/arch/arm/mach-u300/mmc.h b/arch/arm/mach-u300/mmc.h
new file mode 100644
index 0000000..92b8512
--- /dev/null
+++ b/arch/arm/mach-u300/mmc.h
@@ -0,0 +1,18 @@
+/*
+ *
+ * arch/arm/mach-u300/mmc.h
+ *
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com>
+ */
+#ifndef MMC_H
+#define MMC_H
+
+#include <linux/amba/bus.h>
+
+int __devinit mmc_init(struct amba_device *adev);
+
+#endif
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
new file mode 100644
index 0000000..57b5351
--- /dev/null
+++ b/arch/arm/mach-u300/timer.c
@@ -0,0 +1,422 @@
+/*
+ *
+ * arch/arm/mach-u300/timer.c
+ *
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Timer COH 901 328, runs the OS timer interrupt.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+#include <mach/hardware.h>
+
+/* Generic stuff */
+#include <asm/mach/map.h>
+#include <asm/mach/time.h>
+#include <asm/mach/irq.h>
+
+#include "clock.h"
+
+/*
+ * APP side special timer registers
+ * This timer contains four timers which can fire an interrupt each.
+ * OS (operating system) timer @ 32768 Hz
+ * DD (device driver) timer @ 1 kHz
+ * GP1 (general purpose 1) timer @ 1MHz
+ * GP2 (general purpose 2) timer @ 1MHz
+ */
+
+/* Reset OS Timer 32bit (-/W) */
+#define U300_TIMER_APP_ROST					(0x0000)
+#define U300_TIMER_APP_ROST_TIMER_RESET				(0x00000000)
+/* Enable OS Timer 32bit (-/W) */
+#define U300_TIMER_APP_EOST					(0x0004)
+#define U300_TIMER_APP_EOST_TIMER_ENABLE			(0x00000000)
+/* Disable OS Timer 32bit (-/W) */
+#define U300_TIMER_APP_DOST					(0x0008)
+#define U300_TIMER_APP_DOST_TIMER_DISABLE			(0x00000000)
+/* OS Timer Mode Register 32bit (-/W) */
+#define U300_TIMER_APP_SOSTM					(0x000c)
+#define U300_TIMER_APP_SOSTM_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_SOSTM_MODE_ONE_SHOT			(0x00000001)
+/* OS Timer Status Register 32bit (R/-) */
+#define U300_TIMER_APP_OSTS					(0x0010)
+#define U300_TIMER_APP_OSTS_TIMER_STATE_MASK			(0x0000000F)
+#define U300_TIMER_APP_OSTS_TIMER_STATE_IDLE			(0x00000001)
+#define U300_TIMER_APP_OSTS_TIMER_STATE_ACTIVE			(0x00000002)
+#define U300_TIMER_APP_OSTS_ENABLE_IND				(0x00000010)
+#define U300_TIMER_APP_OSTS_MODE_MASK				(0x00000020)
+#define U300_TIMER_APP_OSTS_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_OSTS_MODE_ONE_SHOT			(0x00000020)
+#define U300_TIMER_APP_OSTS_IRQ_ENABLED_IND			(0x00000040)
+#define U300_TIMER_APP_OSTS_IRQ_PENDING_IND			(0x00000080)
+/* OS Timer Current Count Register 32bit (R/-) */
+#define U300_TIMER_APP_OSTCC					(0x0014)
+/* OS Timer Terminal Count Register 32bit (R/W) */
+#define U300_TIMER_APP_OSTTC					(0x0018)
+/* OS Timer Interrupt Enable Register 32bit (-/W) */
+#define U300_TIMER_APP_OSTIE					(0x001c)
+#define U300_TIMER_APP_OSTIE_IRQ_DISABLE			(0x00000000)
+#define U300_TIMER_APP_OSTIE_IRQ_ENABLE				(0x00000001)
+/* OS Timer Interrupt Acknowledge Register 32bit (-/W) */
+#define U300_TIMER_APP_OSTIA					(0x0020)
+#define U300_TIMER_APP_OSTIA_IRQ_ACK				(0x00000080)
+
+/* Reset DD Timer 32bit (-/W) */
+#define U300_TIMER_APP_RDDT					(0x0040)
+#define U300_TIMER_APP_RDDT_TIMER_RESET				(0x00000000)
+/* Enable DD Timer 32bit (-/W) */
+#define U300_TIMER_APP_EDDT					(0x0044)
+#define U300_TIMER_APP_EDDT_TIMER_ENABLE			(0x00000000)
+/* Disable DD Timer 32bit (-/W) */
+#define U300_TIMER_APP_DDDT					(0x0048)
+#define U300_TIMER_APP_DDDT_TIMER_DISABLE			(0x00000000)
+/* DD Timer Mode Register 32bit (-/W) */
+#define U300_TIMER_APP_SDDTM					(0x004c)
+#define U300_TIMER_APP_SDDTM_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_SDDTM_MODE_ONE_SHOT			(0x00000001)
+/* DD Timer Status Register 32bit (R/-) */
+#define U300_TIMER_APP_DDTS					(0x0050)
+#define U300_TIMER_APP_DDTS_TIMER_STATE_MASK			(0x0000000F)
+#define U300_TIMER_APP_DDTS_TIMER_STATE_IDLE			(0x00000001)
+#define U300_TIMER_APP_DDTS_TIMER_STATE_ACTIVE			(0x00000002)
+#define U300_TIMER_APP_DDTS_ENABLE_IND				(0x00000010)
+#define U300_TIMER_APP_DDTS_MODE_MASK				(0x00000020)
+#define U300_TIMER_APP_DDTS_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_DDTS_MODE_ONE_SHOT			(0x00000020)
+#define U300_TIMER_APP_DDTS_IRQ_ENABLED_IND			(0x00000040)
+#define U300_TIMER_APP_DDTS_IRQ_PENDING_IND			(0x00000080)
+/* DD Timer Current Count Register 32bit (R/-) */
+#define U300_TIMER_APP_DDTCC					(0x0054)
+/* DD Timer Terminal Count Register 32bit (R/W) */
+#define U300_TIMER_APP_DDTTC					(0x0058)
+/* DD Timer Interrupt Enable Register 32bit (-/W) */
+#define U300_TIMER_APP_DDTIE					(0x005c)
+#define U300_TIMER_APP_DDTIE_IRQ_DISABLE			(0x00000000)
+#define U300_TIMER_APP_DDTIE_IRQ_ENABLE				(0x00000001)
+/* DD Timer Interrupt Acknowledge Register 32bit (-/W) */
+#define U300_TIMER_APP_DDTIA					(0x0060)
+#define U300_TIMER_APP_DDTIA_IRQ_ACK				(0x00000080)
+
+/* Reset GP1 Timer 32bit (-/W) */
+#define U300_TIMER_APP_RGPT1					(0x0080)
+#define U300_TIMER_APP_RGPT1_TIMER_RESET			(0x00000000)
+/* Enable GP1 Timer 32bit (-/W) */
+#define U300_TIMER_APP_EGPT1					(0x0084)
+#define U300_TIMER_APP_EGPT1_TIMER_ENABLE			(0x00000000)
+/* Disable GP1 Timer 32bit (-/W) */
+#define U300_TIMER_APP_DGPT1					(0x0088)
+#define U300_TIMER_APP_DGPT1_TIMER_DISABLE			(0x00000000)
+/* GP1 Timer Mode Register 32bit (-/W) */
+#define U300_TIMER_APP_SGPT1M					(0x008c)
+#define U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT			(0x00000001)
+/* GP1 Timer Status Register 32bit (R/-) */
+#define U300_TIMER_APP_GPT1S					(0x0090)
+#define U300_TIMER_APP_GPT1S_TIMER_STATE_MASK			(0x0000000F)
+#define U300_TIMER_APP_GPT1S_TIMER_STATE_IDLE			(0x00000001)
+#define U300_TIMER_APP_GPT1S_TIMER_STATE_ACTIVE			(0x00000002)
+#define U300_TIMER_APP_GPT1S_ENABLE_IND				(0x00000010)
+#define U300_TIMER_APP_GPT1S_MODE_MASK				(0x00000020)
+#define U300_TIMER_APP_GPT1S_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_GPT1S_MODE_ONE_SHOT			(0x00000020)
+#define U300_TIMER_APP_GPT1S_IRQ_ENABLED_IND			(0x00000040)
+#define U300_TIMER_APP_GPT1S_IRQ_PENDING_IND			(0x00000080)
+/* GP1 Timer Current Count Register 32bit (R/-) */
+#define U300_TIMER_APP_GPT1CC					(0x0094)
+/* GP1 Timer Terminal Count Register 32bit (R/W) */
+#define U300_TIMER_APP_GPT1TC					(0x0098)
+/* GP1 Timer Interrupt Enable Register 32bit (-/W) */
+#define U300_TIMER_APP_GPT1IE					(0x009c)
+#define U300_TIMER_APP_GPT1IE_IRQ_DISABLE			(0x00000000)
+#define U300_TIMER_APP_GPT1IE_IRQ_ENABLE			(0x00000001)
+/* GP1 Timer Interrupt Acknowledge Register 32bit (-/W) */
+#define U300_TIMER_APP_GPT1IA					(0x00a0)
+#define U300_TIMER_APP_GPT1IA_IRQ_ACK				(0x00000080)
+
+/* Reset GP2 Timer 32bit (-/W) */
+#define U300_TIMER_APP_RGPT2					(0x00c0)
+#define U300_TIMER_APP_RGPT2_TIMER_RESET			(0x00000000)
+/* Enable GP2 Timer 32bit (-/W) */
+#define U300_TIMER_APP_EGPT2					(0x00c4)
+#define U300_TIMER_APP_EGPT2_TIMER_ENABLE			(0x00000000)
+/* Disable GP2 Timer 32bit (-/W) */
+#define U300_TIMER_APP_DGPT2					(0x00c8)
+#define U300_TIMER_APP_DGPT2_TIMER_DISABLE			(0x00000000)
+/* GP2 Timer Mode Register 32bit (-/W) */
+#define U300_TIMER_APP_SGPT2M					(0x00cc)
+#define U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_SGPT2M_MODE_ONE_SHOT			(0x00000001)
+/* GP2 Timer Status Register 32bit (R/-) */
+#define U300_TIMER_APP_GPT2S					(0x00d0)
+#define U300_TIMER_APP_GPT2S_TIMER_STATE_MASK			(0x0000000F)
+#define U300_TIMER_APP_GPT2S_TIMER_STATE_IDLE			(0x00000001)
+#define U300_TIMER_APP_GPT2S_TIMER_STATE_ACTIVE			(0x00000002)
+#define U300_TIMER_APP_GPT2S_ENABLE_IND				(0x00000010)
+#define U300_TIMER_APP_GPT2S_MODE_MASK				(0x00000020)
+#define U300_TIMER_APP_GPT2S_MODE_CONTINUOUS			(0x00000000)
+#define U300_TIMER_APP_GPT2S_MODE_ONE_SHOT			(0x00000020)
+#define U300_TIMER_APP_GPT2S_IRQ_ENABLED_IND			(0x00000040)
+#define U300_TIMER_APP_GPT2S_IRQ_PENDING_IND			(0x00000080)
+/* GP2 Timer Current Count Register 32bit (R/-) */
+#define U300_TIMER_APP_GPT2CC					(0x00d4)
+/* GP2 Timer Terminal Count Register 32bit (R/W) */
+#define U300_TIMER_APP_GPT2TC					(0x00d8)
+/* GP2 Timer Interrupt Enable Register 32bit (-/W) */
+#define U300_TIMER_APP_GPT2IE					(0x00dc)
+#define U300_TIMER_APP_GPT2IE_IRQ_DISABLE			(0x00000000)
+#define U300_TIMER_APP_GPT2IE_IRQ_ENABLE			(0x00000001)
+/* GP2 Timer Interrupt Acknowledge Register 32bit (-/W) */
+#define U300_TIMER_APP_GPT2IA					(0x00e0)
+#define U300_TIMER_APP_GPT2IA_IRQ_ACK				(0x00000080)
+
+/* Clock request control register - all four timers */
+#define U300_TIMER_APP_CRC					(0x100)
+#define U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE			(0x00000001)
+
+#define TICKS_PER_JIFFY ((CLOCK_TICK_RATE + (HZ/2)) / HZ)
+#define US_PER_TICK ((1000000 + (HZ/2)) / HZ)
+
+/*
+ * The u300_set_mode() function is always called first, if we
+ * have oneshot timer active, the oneshot scheduling function
+ * u300_set_next_event() is called immediately after.
+ */
+static void u300_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* Disable interrupts on GPT1 */
+		writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+		/* Disable GP1 while we're reprogramming it. */
+		writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1);
+		/*
+		 * Set the periodic mode to a certain number of ticks per
+		 * jiffy.
+		 */
+		writel(TICKS_PER_JIFFY,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1TC);
+		/*
+		 * Set continuous mode, so the timer keeps triggering
+		 * interrupts.
+		 */
+		writel(U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT1M);
+		/* Enable timer interrupts */
+		writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+		/* Then enable the OS timer again */
+		writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT1);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* Just break; here? */
+		/*
+		 * The actual event will be programmed by the next event hook,
+		 * so we just set a dummy value somewhere at the end of the
+		 * universe here.
+		 */
+		/* Disable interrupts on GPT1 */
+		writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+		/* Disable GP1 while we're reprogramming it. */
+		writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1);
+		/*
+		 * Expire far in the future, u300_set_next_event() will be
+		 * called soon...
+		 */
+		writel(0xFFFFFFFF, U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1TC);
+		/* We run one shot per tick here! */
+		writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT1M);
+		/* Enable interrupts for this timer */
+		writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+		/* Enable timer */
+		writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT1);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		/* Disable interrupts on GP1 */
+		writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+		/* Disable GP1 */
+		writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE,
+		       U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		/* Ignore this call */
+		break;
+	}
+}
+
+/*
+ * The app timer in one shot mode obviously has to be reprogrammed
+ * in EXACTLY this sequence to work properly. Do NOT try to e.g. replace
+ * the interrupt disable + timer disable commands with a reset command,
+ * it will fail miserably. Apparently (and I found this the hard way)
+ * the timer is very sensitive to the instruction order, though you don't
+ * get that impression from the data sheet.
+ */
+static int u300_set_next_event(unsigned long cycles,
+			       struct clock_event_device *evt)
+
+{
+	/* Disable interrupts on GPT1 */
+	writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+	/* Disable GP1 while we're reprogramming it. */
+	writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1);
+	/* Reset the General Purpose timer 1. */
+	writel(U300_TIMER_APP_RGPT1_TIMER_RESET,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_RGPT1);
+	/* IRQ in n * cycles */
+	writel(cycles, U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1TC);
+	/*
+	 * We run one shot per tick here! (This is necessary to reconfigure,
+	 * the timer will tilt if you don't!)
+	 */
+	writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT1M);
+	/* Enable timer interrupts */
+	writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE);
+	/* Then enable the OS timer again */
+	writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT1);
+	return 0;
+}
+
+
+/* Use general purpose timer 1 as clock event */
+static struct clock_event_device clockevent_u300_1mhz = {
+	.name           = "GPT1",
+	.rating         = 300, /* Reasonably fast and accurate clock event */
+	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	/* 22 calculated using the algorithm in arch/mips/kernel/time.c */
+	.shift          = 22,
+	.set_next_event = u300_set_next_event,
+	.set_mode       = u300_set_mode,
+};
+
+/* Clock event timer interrupt handler */
+static irqreturn_t u300_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = &clockevent_u300_1mhz;
+	/* ACK/Clear timer IRQ for the APP GPT1 Timer */
+	writel(U300_TIMER_APP_GPT1IA_IRQ_ACK,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IA);
+	evt->event_handler(evt);
+	return IRQ_HANDLED;
+}
+
+static struct irqaction u300_timer_irq = {
+	.name           = "U300 Timer Tick",
+	.flags          = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.handler        = u300_timer_interrupt,
+};
+
+/* Use general purpose timer 2 as clock source */
+static cycle_t u300_get_cycles(void)
+{
+	return (cycles_t) readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
+}
+
+static struct clocksource clocksource_u300_1mhz = {
+	.name           = "GPT2",
+	.rating         = 300, /* Reasonably fast and accurate clock source */
+	.read           = u300_get_cycles,
+	.mask           = CLOCKSOURCE_MASK(32), /* 32 bits */
+	/* 22 calculated using the algorithm in arch/mips/kernel/time.c */
+	.shift          = 22,
+	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+
+/*
+ * This sets up the system timers, clock source and clock event.
+ */
+static void __init u300_timer_init(void)
+{
+	u300_enable_timer_clock();
+	/*
+	 * Disable the "OS" and "DD" timers - these are designed for Symbian!
+	 * Example usage in cnh1601578 cpu subsystem pd_timer_app.c
+	 */
+	writel(U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_CRC);
+	writel(U300_TIMER_APP_ROST_TIMER_RESET,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_ROST);
+	writel(U300_TIMER_APP_DOST_TIMER_DISABLE,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_DOST);
+	writel(U300_TIMER_APP_RDDT_TIMER_RESET,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_RDDT);
+	writel(U300_TIMER_APP_DDDT_TIMER_DISABLE,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_DDDT);
+
+	/* Reset the General Purpose timer 1. */
+	writel(U300_TIMER_APP_RGPT1_TIMER_RESET,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_RGPT1);
+
+	/* Set up the IRQ handler */
+	setup_irq(IRQ_U300_TIMER_APP_GP1, &u300_timer_irq);
+
+	/* Reset the General Purpose timer 2 */
+	writel(U300_TIMER_APP_RGPT2_TIMER_RESET,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_RGPT2);
+	/* Set this timer to run around forever */
+	writel(0xFFFFFFFFU, U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2TC);
+	/* Set continuous mode so it wraps around */
+	writel(U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS,
+	       U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT2M);
+	/* Disable timer interrupts */
+	writel(U300_TIMER_APP_GPT2IE_IRQ_DISABLE,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2IE);
+	/* Then enable the GP2 timer to use as a free running us counter */
+	writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE,
+		U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2);
+
+	/* This is a pure microsecond clock source */
+	clocksource_u300_1mhz.mult =
+		clocksource_khz2mult(1000, clocksource_u300_1mhz.shift);
+	if (clocksource_register(&clocksource_u300_1mhz))
+		printk(KERN_ERR "timer: failed to initialize clock "
+		       "source %s\n", clocksource_u300_1mhz.name);
+
+	clockevent_u300_1mhz.mult =
+		div_sc(1000000, NSEC_PER_SEC, clockevent_u300_1mhz.shift);
+	/* 32bit counter, so 32bits delta is max */
+	clockevent_u300_1mhz.max_delta_ns =
+		clockevent_delta2ns(0xffffffff, &clockevent_u300_1mhz);
+	/* This timer is slow enough to set for 1 cycle == 1 MHz */
+	clockevent_u300_1mhz.min_delta_ns =
+		clockevent_delta2ns(1, &clockevent_u300_1mhz);
+	clockevent_u300_1mhz.cpumask = cpumask_of(0);
+	clockevents_register_device(&clockevent_u300_1mhz);
+	/*
+	 * TODO: init and register the rest of the timers too, they can be
+	 * used by hrtimers!
+	 */
+}
+
+/*
+ * Very simple system timer that only register the clock event and
+ * clock source.
+ */
+struct sys_timer u300_timer = {
+	.init		= u300_timer_init,
+};
diff --git a/arch/arm/mach-u300/u300.c b/arch/arm/mach-u300/u300.c
new file mode 100644
index 0000000..d2a0b88
--- /dev/null
+++ b/arch/arm/mach-u300/u300.c
@@ -0,0 +1,55 @@
+/*
+ *
+ * arch/arm/mach-u300/u300.c
+ *
+ *
+ * Copyright (C) 2006-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Platform machine definition.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <mach/hardware.h>
+#include <mach/platform.h>
+#include <mach/memory.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+static void __init u300_init_machine(void)
+{
+	u300_init_devices();
+}
+
+#ifdef CONFIG_MACH_U300_BS2X
+#define MACH_U300_STRING "Ericsson AB U300 S25/S26/B25/B26 Prototype Board"
+#endif
+
+#ifdef CONFIG_MACH_U300_BS330
+#define MACH_U300_STRING "Ericsson AB U330 S330/B330 Prototype Board"
+#endif
+
+#ifdef CONFIG_MACH_U300_BS335
+#define MACH_U300_STRING "Ericsson AB U335 S335/B335 Prototype Board"
+#endif
+
+#ifdef CONFIG_MACH_U300_BS365
+#define MACH_U300_STRING "Ericsson AB U365 S365/B365 Prototype Board"
+#endif
+
+MACHINE_START(U300, MACH_U300_STRING)
+	/* Maintainer: Linus Walleij <linus.walleij@stericsson.com> */
+	.phys_io	= U300_AHB_PER_PHYS_BASE,
+	.io_pg_offst	= ((U300_AHB_PER_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= BOOT_PARAMS_OFFSET,
+	.map_io		= u300_map_io,
+	.init_irq	= u300_init_irq,
+	.timer		= &u300_timer,
+	.init_machine	= u300_init_machine,
+MACHINE_END
-- 
cgit v0.10.2


From d98aac7592114241f378bc8d5b3e424cced7ded2 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 27 Apr 2009 10:21:46 +0100
Subject: [ARM] 5480/1: U300-v5 integrate into the ARM architecture

This hooks the U300 support into Kbuild and makes a small hook
in mmu.c for supporting an odd memory alignment with shared memory
on these systems.

This is rebased to RMK:s GIT HEAD. This patch tries to add the
Kconfig option in alphabetic order by option text and the Makefile
entry after config symbol.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a930e5c..44456e1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -594,6 +594,20 @@ config ARCH_LH7A40X
 	  core with a wide array of integrated devices for
 	  hand-held and low-power applications.
 
+config ARCH_U300
+	bool "ST-Ericsson U300 Series"
+	depends on MMU
+	select CPU_ARM926T
+	select ARM_AMBA
+	select ARM_VIC
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	select HAVE_CLK
+	select COMMON_CLKDEV
+	select GENERIC_GPIO
+	help
+	  Support for ST-Ericsson U300 series mobile platforms.
+
 config ARCH_DAVINCI
 	bool "TI DaVinci"
 	select CPU_ARM926T
@@ -705,6 +719,8 @@ source "arch/arm/mach-ks8695/Kconfig"
 
 source "arch/arm/mach-msm/Kconfig"
 
+source "arch/arm/mach-u300/Kconfig"
+
 source "arch/arm/mach-w90x900/Kconfig"
 
 # Definitions to make life easier
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 885a837..20084c5 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -149,6 +149,7 @@ machine-$(CONFIG_ARCH_S3C24A0)		:= s3c24a0
 machine-$(CONFIG_ARCH_S3C64XX)		:= s3c6400 s3c6410
 machine-$(CONFIG_ARCH_SA1100)		:= sa1100
 machine-$(CONFIG_ARCH_SHARK)		:= shark
+machine-$(CONFIG_ARCH_U300)		:= u300
 machine-$(CONFIG_ARCH_VERSATILE)	:= versatile
 machine-$(CONFIG_ARCH_W90X900)		:= w90x900
 machine-$(CONFIG_FOOTBRIDGE)		:= footbridge
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e6344ec..39fca4e 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -839,6 +839,20 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 		reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
 				BOOTMEM_EXCLUSIVE);
 
+	/*
+	 * U300 - This platform family can share physical memory
+	 * between two ARM cpus, one running Linux and the other
+	 * running another OS.
+	 */
+	if (machine_is_u300()) {
+#ifdef CONFIG_MACH_U300_SINGLE_RAM
+#if ((CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1) == 1) &&	\
+	CONFIG_MACH_U300_2MB_ALIGNMENT_FIX
+		res_size = 0x00100000;
+#endif
+#endif
+	}
+
 #ifdef CONFIG_SA1111
 	/*
 	 * Because of the SA1111 DMA bug, we want to preserve our
-- 
cgit v0.10.2


From 6d0485a99366d4e0e7e725f14995c74cb7ca4499 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Tue, 31 Mar 2009 17:13:15 +0100
Subject: [ARM] 5438/1: AT91: manage clock by functionality instead of CPUs

In clock.c file the clock management is grouped by cpu with cpu_is_xxx()
function. This lead to some kind of difficulties to read this file and
maintainability issues as the number of AT91 cpus & PLLs/clocks is growing.

In this patch, I try to group clock functionality together and match cpus with
this functionality set.
An update to at91_pmc.h is needed to cover some new PMC possibilities (and
some update in comments).

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Andrew Victor <avictor.za@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index e434510..bac578f 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -43,6 +43,25 @@
 #define clk_is_sys(x)		((x)->type & CLK_TYPE_SYSTEM)
 
 
+/*
+ * Chips have some kind of clocks : group them by functionality
+ */
+#define cpu_has_utmi()		(  cpu_is_at91cap9() \
+				|| cpu_is_at91sam9rl())
+
+#define cpu_has_800M_plla()	(cpu_is_at91sam9g20())
+
+#define cpu_has_pllb()		(!cpu_is_at91sam9rl())
+
+#define cpu_has_upll()		(0)
+
+/* USB host HS & FS */
+#define cpu_has_uhp()		(!cpu_is_at91sam9rl())
+
+/* USB device FS only */
+#define cpu_has_udpfs()		(!cpu_is_at91sam9rl())
+
+
 static LIST_HEAD(clocks);
 static DEFINE_SPINLOCK(clk_lock);
 
@@ -140,7 +159,7 @@ static struct clk utmi_clk = {
 };
 static struct clk uhpck = {
 	.name		= "uhpck",
-	.parent		= &pllb,
+	/*.parent		= ... we choose parent at runtime */
 	.mode		= pmc_sys_mode,
 };
 
@@ -173,7 +192,11 @@ static struct clk __init *at91_css_to_clk(unsigned long css)
 		case AT91_PMC_CSS_PLLA:
 			return &plla;
 		case AT91_PMC_CSS_PLLB:
-			return &pllb;
+			if (cpu_has_upll())
+				/* CSS_PLLB == CSS_UPLL */
+				return &utmi_clk;
+			else if (cpu_has_pllb())
+				return &pllb;
 	}
 
 	return NULL;
@@ -322,7 +345,7 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 			u32	pckr;
 
 			pckr = at91_sys_read(AT91_PMC_PCKR(clk->id));
-			pckr &= AT91_PMC_CSS_PLLB;	/* clock selection */
+			pckr &= AT91_PMC_CSS;	/* clock selection */
 			pckr |= prescale << 2;
 			at91_sys_write(AT91_PMC_PCKR(clk->id), pckr);
 			clk->rate_hz = actual;
@@ -361,7 +384,7 @@ int clk_set_parent(struct clk *clk, struct clk *parent)
 }
 EXPORT_SYMBOL(clk_set_parent);
 
-/* establish PCK0..PCK3 parentage and rate */
+/* establish PCK0..PCKN parentage and rate */
 static void __init init_programmable_clock(struct clk *clk)
 {
 	struct clk	*parent;
@@ -389,11 +412,13 @@ static int at91_clk_show(struct seq_file *s, void *unused)
 	seq_printf(s, "MOR  = %8x\n", at91_sys_read(AT91_CKGR_MOR));
 	seq_printf(s, "MCFR = %8x\n", at91_sys_read(AT91_CKGR_MCFR));
 	seq_printf(s, "PLLA = %8x\n", at91_sys_read(AT91_CKGR_PLLAR));
-	if (!cpu_is_at91sam9rl())
+	if (cpu_has_pllb())
 		seq_printf(s, "PLLB = %8x\n", at91_sys_read(AT91_CKGR_PLLBR));
-	if (cpu_is_at91cap9() || cpu_is_at91sam9rl())
+	if (cpu_has_utmi())
 		seq_printf(s, "UCKR = %8x\n", uckr = at91_sys_read(AT91_CKGR_UCKR));
 	seq_printf(s, "MCKR = %8x\n", at91_sys_read(AT91_PMC_MCKR));
+	if (cpu_has_upll())
+		seq_printf(s, "USB  = %8x\n", at91_sys_read(AT91_PMC_USB));
 	seq_printf(s, "SR   = %8x\n", sr = at91_sys_read(AT91_PMC_SR));
 
 	seq_printf(s, "\n");
@@ -554,16 +579,60 @@ static struct clk *const standard_pmc_clocks[] __initdata = {
 	&clk32k,
 	&main_clk,
 	&plla,
-	&pllb,
-
-	/* PLLB children (USB) */
-	&udpck,
-	&uhpck,
 
 	/* MCK */
 	&mck
 };
 
+/* PLLB generated USB full speed clock init */
+static void __init at91_pllb_usbfs_clock_init(unsigned long main_clock)
+{
+	/*
+	 * USB clock init:  choose 48 MHz PLLB value,
+	 * disable 48MHz clock during usb peripheral suspend.
+	 *
+	 * REVISIT:  assumes MCK doesn't derive from PLLB!
+	 */
+	uhpck.parent = &pllb;
+
+	at91_pllb_usb_init = at91_pll_calc(main_clock, 48000000 * 2) | AT91_PMC_USB96M;
+	pllb.rate_hz = at91_pll_rate(&pllb, main_clock, at91_pllb_usb_init);
+	if (cpu_is_at91rm9200()) {
+		uhpck.pmc_mask = AT91RM9200_PMC_UHP;
+		udpck.pmc_mask = AT91RM9200_PMC_UDP;
+		at91_sys_write(AT91_PMC_SCER, AT91RM9200_PMC_MCKUDP);
+	} else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) {
+		uhpck.pmc_mask = AT91SAM926x_PMC_UHP;
+		udpck.pmc_mask = AT91SAM926x_PMC_UDP;
+	} else if (cpu_is_at91cap9()) {
+		uhpck.pmc_mask = AT91CAP9_PMC_UHP;
+	}
+	at91_sys_write(AT91_CKGR_PLLBR, 0);
+
+	udpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init);
+	uhpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init);
+}
+
+/* UPLL generated USB full speed clock init */
+static void __init at91_upll_usbfs_clock_init(unsigned long main_clock)
+{
+	/*
+	 * USB clock init: choose 480 MHz from UPLL,
+	 */
+	unsigned int usbr = AT91_PMC_USBS_UPLL;
+
+	/* Setup divider by 10 to reach 48 MHz */
+	usbr |= ((10 - 1) << 8) & AT91_PMC_OHCIUSBDIV;
+
+	at91_sys_write(AT91_PMC_USB, usbr);
+
+	/* Now set uhpck values */
+	uhpck.parent = &utmi_clk;
+	uhpck.pmc_mask = AT91SAM926x_PMC_UHP;
+	uhpck.rate_hz = utmi_clk.parent->rate_hz;
+	uhpck.rate_hz /= 1 + ((at91_sys_read(AT91_PMC_USB) & AT91_PMC_OHCIUSBDIV) >> 8);
+}
+
 int __init at91_clock_init(unsigned long main_clock)
 {
 	unsigned tmp, freq, mckr;
@@ -585,43 +654,37 @@ int __init at91_clock_init(unsigned long main_clock)
 
 	/* report if PLLA is more than mildly overclocked */
 	plla.rate_hz = at91_pll_rate(&plla, main_clock, at91_sys_read(AT91_CKGR_PLLAR));
-	if ((!cpu_is_at91sam9g20() && plla.rate_hz > 209000000)
-	   || (cpu_is_at91sam9g20() && plla.rate_hz > 800000000))
+	if ((!cpu_has_800M_plla() && plla.rate_hz > 209000000)
+	   || (cpu_has_800M_plla() && plla.rate_hz > 800000000))
 		pr_info("Clocks: PLLA overclocked, %ld MHz\n", plla.rate_hz / 1000000);
 
-	/*
-	 * USB clock init:  choose 48 MHz PLLB value,
-	 * disable 48MHz clock during usb peripheral suspend.
-	 *
-	 * REVISIT:  assumes MCK doesn't derive from PLLB!
-	 */
-	at91_pllb_usb_init = at91_pll_calc(main_clock, 48000000 * 2) | AT91_PMC_USB96M;
-	pllb.rate_hz = at91_pll_rate(&pllb, main_clock, at91_pllb_usb_init);
-	if (cpu_is_at91rm9200()) {
-		uhpck.pmc_mask = AT91RM9200_PMC_UHP;
-		udpck.pmc_mask = AT91RM9200_PMC_UDP;
-		at91_sys_write(AT91_PMC_SCER, AT91RM9200_PMC_MCKUDP);
-	} else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) {
-		uhpck.pmc_mask = AT91SAM926x_PMC_UHP;
-		udpck.pmc_mask = AT91SAM926x_PMC_UDP;
-	} else if (cpu_is_at91cap9()) {
-		uhpck.pmc_mask = AT91CAP9_PMC_UHP;
+
+	if (cpu_has_upll() && !cpu_has_pllb()) {
+		/* setup UTMI clock as the fourth primary clock
+		 * (instead of pllb) */
+		utmi_clk.type |= CLK_TYPE_PRIMARY;
+		utmi_clk.id = 3;
 	}
-	at91_sys_write(AT91_CKGR_PLLBR, 0);
 
-	udpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init);
-	uhpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init);
 
 	/*
 	 * USB HS clock init
 	 */
-	if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) {
+	if (cpu_has_utmi())
 		/*
 		 * multiplier is hard-wired to 40
 		 * (obtain the USB High Speed 480 MHz when input is 12 MHz)
 		 */
 		utmi_clk.rate_hz = 40 * utmi_clk.parent->rate_hz;
-	}
+
+	/*
+	 * USB FS clock init
+	 */
+	if (cpu_has_pllb())
+		at91_pllb_usbfs_clock_init(main_clock);
+	if (cpu_has_upll())
+		/* assumes that we choose UPLL for USB and not PLLA */
+		at91_upll_usbfs_clock_init(main_clock);
 
 	/*
 	 * MCK and CPU derive from one of those primary clocks.
@@ -631,21 +694,31 @@ int __init at91_clock_init(unsigned long main_clock)
 	mck.parent = at91_css_to_clk(mckr & AT91_PMC_CSS);
 	freq = mck.parent->rate_hz;
 	freq /= (1 << ((mckr & AT91_PMC_PRES) >> 2));				/* prescale */
-	if (cpu_is_at91rm9200())
+	if (cpu_is_at91rm9200()) {
 		mck.rate_hz = freq / (1 + ((mckr & AT91_PMC_MDIV) >> 8));	/* mdiv */
-	else if (cpu_is_at91sam9g20()) {
+	} else if (cpu_is_at91sam9g20()) {
 		mck.rate_hz = (mckr & AT91_PMC_MDIV) ?
 			freq / ((mckr & AT91_PMC_MDIV) >> 7) : freq;	/* mdiv ; (x >> 7) = ((x >> 8) * 2) */
 		if (mckr & AT91_PMC_PDIV)
 			freq /= 2;		/* processor clock division */
-	} else
+	} else {
 		mck.rate_hz = freq / (1 << ((mckr & AT91_PMC_MDIV) >> 8));      /* mdiv */
+	}
 
 	/* Register the PMC's standard clocks */
 	for (i = 0; i < ARRAY_SIZE(standard_pmc_clocks); i++)
 		list_add_tail(&standard_pmc_clocks[i]->node, &clocks);
 
-	if (cpu_is_at91cap9() || cpu_is_at91sam9rl())
+	if (cpu_has_pllb())
+		list_add_tail(&pllb.node, &clocks);
+
+	if (cpu_has_uhp())
+		list_add_tail(&uhpck.node, &clocks);
+
+	if (cpu_has_udpfs())
+		list_add_tail(&udpck.node, &clocks);
+
+	if (cpu_has_utmi())
 		list_add_tail(&utmi_clk.node, &clocks);
 
 	/* MCK and CPU clock are "always on" */
diff --git a/arch/arm/mach-at91/include/mach/at91_pmc.h b/arch/arm/mach-at91/include/mach/at91_pmc.h
index 9561e33..64589ea 100644
--- a/arch/arm/mach-at91/include/mach/at91_pmc.h
+++ b/arch/arm/mach-at91/include/mach/at91_pmc.h
@@ -23,7 +23,7 @@
 #define		AT91_PMC_PCK		(1 <<  0)		/* Processor Clock */
 #define		AT91RM9200_PMC_UDP	(1 <<  1)		/* USB Devcice Port Clock [AT91RM9200 only] */
 #define		AT91RM9200_PMC_MCKUDP	(1 <<  2)		/* USB Device Port Master Clock Automatic Disable on Suspend [AT91RM9200 only] */
-#define		AT91CAP9_PMC_DDR	(1 <<  2)		/* DDR Clock [AT91CAP9 revC only] */
+#define		AT91CAP9_PMC_DDR	(1 <<  2)		/* DDR Clock [CAP9 revC & some SAM9 only] */
 #define		AT91RM9200_PMC_UHP	(1 <<  4)		/* USB Host Port Clock [AT91RM9200 only] */
 #define		AT91SAM926x_PMC_UHP	(1 <<  6)		/* USB Host Port Clock [AT91SAM926x only] */
 #define		AT91CAP9_PMC_UHP	(1 <<  6)		/* USB Host Port Clock [AT91CAP9 only] */
@@ -39,11 +39,11 @@
 #define	AT91_PMC_PCDR		(AT91_PMC + 0x14)	/* Peripheral Clock Disable Register */
 #define	AT91_PMC_PCSR		(AT91_PMC + 0x18)	/* Peripheral Clock Status Register */
 
-#define	AT91_CKGR_UCKR		(AT91_PMC + 0x1C)	/* UTMI Clock Register [SAM9RL, CAP9] */
+#define	AT91_CKGR_UCKR		(AT91_PMC + 0x1C)	/* UTMI Clock Register [some SAM9, CAP9] */
 #define		AT91_PMC_UPLLEN		(1   << 16)		/* UTMI PLL Enable */
 #define		AT91_PMC_UPLLCOUNT	(0xf << 20)		/* UTMI PLL Start-up Time */
 #define		AT91_PMC_BIASEN		(1   << 24)		/* UTMI BIAS Enable */
-#define		AT91_PMC_BIASCOUNT	(0xf << 28)		/* UTMI PLL Start-up Time */
+#define		AT91_PMC_BIASCOUNT	(0xf << 28)		/* UTMI BIAS Start-up Time */
 
 #define	AT91_CKGR_MOR		(AT91_PMC + 0x20)	/* Main Oscillator Register [not on SAM9RL] */
 #define		AT91_PMC_MOSCEN		(1    << 0)		/* Main Oscillator Enable */
@@ -72,6 +72,7 @@
 #define			AT91_PMC_CSS_MAIN		(1 << 0)
 #define			AT91_PMC_CSS_PLLA		(2 << 0)
 #define			AT91_PMC_CSS_PLLB		(3 << 0)
+#define			AT91_PMC_CSS_UPLL		(3 << 0)	/* [some SAM9 only] */
 #define		AT91_PMC_PRES		(7 <<  2)		/* Master Clock Prescaler */
 #define			AT91_PMC_PRES_1			(0 << 2)
 #define			AT91_PMC_PRES_2			(1 << 2)
@@ -88,12 +89,25 @@
 #define			AT91SAM9_PMC_MDIV_1		(0 << 8)	/* [SAM9,CAP9 only] */
 #define			AT91SAM9_PMC_MDIV_2		(1 << 8)
 #define			AT91SAM9_PMC_MDIV_4		(2 << 8)
-#define			AT91SAM9_PMC_MDIV_6		(3 << 8)
+#define			AT91SAM9_PMC_MDIV_6		(3 << 8)	/* [some SAM9 only] */
+#define			AT91SAM9_PMC_MDIV_3		(3 << 8)	/* [some SAM9 only] */
 #define		AT91_PMC_PDIV		(1 << 12)		/* Processor Clock Division [some SAM9 only] */
 #define			AT91_PMC_PDIV_1			(0 << 12)
 #define			AT91_PMC_PDIV_2			(1 << 12)
+#define		AT91_PMC_PLLADIV2	(1 << 12)		/* PLLA divisor by 2 [some SAM9 only] */
+#define			AT91_PMC_PLLADIV2_OFF		(0 << 12)
+#define			AT91_PMC_PLLADIV2_ON		(1 << 12)
 
-#define	AT91_PMC_PCKR(n)	(AT91_PMC + 0x40 + ((n) * 4))	/* Programmable Clock 0-3 Registers */
+#define	AT91_PMC_USB		(AT91_PMC + 0x38)	/* USB Clock Register [some SAM9 only] */
+#define		AT91_PMC_USBS		(0x1 <<  0)		/* USB OHCI Input clock selection */
+#define			AT91_PMC_USBS_PLLA		(0 << 0)
+#define			AT91_PMC_USBS_UPLL		(1 << 0)
+#define		AT91_PMC_OHCIUSBDIV	(0xF <<  8)		/* Divider for USB OHCI Clock */
+
+#define	AT91_PMC_PCKR(n)	(AT91_PMC + 0x40 + ((n) * 4))	/* Programmable Clock 0-N Registers */
+#define		AT91_PMC_CSSMCK		(0x1 <<  8)		/* CSS or Master Clock Selection */
+#define			AT91_PMC_CSSMCK_CSS		(0 << 8)
+#define			AT91_PMC_CSSMCK_MCK		(1 << 8)
 
 #define	AT91_PMC_IER		(AT91_PMC + 0x60)	/* Interrupt Enable Register */
 #define	AT91_PMC_IDR		(AT91_PMC + 0x64)	/* Interrupt Disable Register */
@@ -102,7 +116,7 @@
 #define		AT91_PMC_LOCKA		(1 <<  1)		/* PLLA Lock */
 #define		AT91_PMC_LOCKB		(1 <<  2)		/* PLLB Lock */
 #define		AT91_PMC_MCKRDY		(1 <<  3)		/* Master Clock */
-#define		AT91_PMC_LOCKU		(1 <<  6)		/* UPLL Lock [AT91CAP9 only] */
+#define		AT91_PMC_LOCKU		(1 <<  6)		/* UPLL Lock [some SAM9, AT91CAP9 only] */
 #define		AT91_PMC_OSCSEL		(1 <<  7)		/* Slow Clock Oscillator [AT91CAP9 revC only] */
 #define		AT91_PMC_PCK0RDY	(1 <<  8)		/* Programmable Clock 0 */
 #define		AT91_PMC_PCK1RDY	(1 <<  9)		/* Programmable Clock 1 */
-- 
cgit v0.10.2


From 5beae6efd1004b44c3e257dc96087978e4c763c1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 29 Apr 2009 00:16:21 -0400
Subject: tracing: fix ref count in splice pages

The pages allocated for the splice binary buffer did not initialize
the ref count correctly. This caused pages not to be freed and causes
a drastic memory leak.

Thanks to logdev I was able to trace the tracer to find where the leak
was.

[ Impact: stop memory leak when using splice ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5d704a4..9058240 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3531,6 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		if (!ref)
 			break;
 
+		ref->ref = 1;
 		ref->buffer = info->tr->buffer;
 		ref->page = ring_buffer_alloc_read_page(ref->buffer);
 		if (!ref->page) {
-- 
cgit v0.10.2


From 93459c6cb9816c52200993d29dd18cea1daee335 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 29 Apr 2009 00:23:13 -0400
Subject: tracing: only add splice page if entries exist

The splice code allocates a page even when the ring buffer is empty.
It detects the ring buffer being empty when it it fails to copy
anything from the ring buffer into the page.

This patch adds a check to see if there is anything in the ring buffer
before allocating a page.

Thanks to logdev for letting me trace the tracer to find this.

[ Impact: speed up due to removing unnecessary allocation ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9058240..0aeb3b9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3508,7 +3508,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		.spd_release	= buffer_spd_release,
 	};
 	struct buffer_ref *ref;
-	int size, i;
+	int entries, size, i;
 	size_t ret;
 
 	if (*ppos & (PAGE_SIZE - 1)) {
@@ -3523,7 +3523,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		len &= PAGE_MASK;
 	}
 
-	for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
+	entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+
+	for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
 		struct page *page;
 		int r;
 
@@ -3564,6 +3566,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		spd.partial[i].private = (unsigned long)ref;
 		spd.nr_pages++;
 		*ppos += PAGE_SIZE;
+
+		entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 	}
 
 	spd.nr_pages = i;
-- 
cgit v0.10.2


From f2957f1f196b0217644a17c1379855a118a37d72 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 29 Apr 2009 00:26:30 -0400
Subject: tracing: have splice only copy full pages

Splice works with pages, it is much more effecient to use an entire
page than to copy bits over several pages.

Using logdev to trace the internals of the splice mechanism, I was
able to see that splice can be very aggressive. When tracing is
occurring, and the reader caught up to the writer, and the writer
is on the reader page, the reader will copy what is there into the
splice page. Splice may iterate over several pages and if the
writer is still writing to the page, the reader will keep copying
bits to new pages to pass to userspace.

This patch changes it to only pass data to userspace if the page
is full (the writer has left the page). This has a small side effect
that splice can not read a partial page, and must wait for the
page to fill. This should not be an issue. If tracing has stopped,
then a use of "read" will still read all of the page.

[ Impact: better performance for ring buffer splice code ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0aeb3b9..f5427e0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3542,7 +3542,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		}
 
 		r = ring_buffer_read_page(ref->buffer, &ref->page,
-					  len, info->cpu, 0);
+					  len, info->cpu, 1);
 		if (r < 0) {
 			ring_buffer_free_read_page(ref->buffer,
 						   ref->page);
-- 
cgit v0.10.2


From 7d7d2b803159d4edeb051b0e5efbc1a8d9ef1c67 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 27 Apr 2009 12:37:49 -0400
Subject: ring-buffer: fix printk output

The warning output in trace_recursive_lock uses %d for a long when
it should be %ld.

[ Impact: fix compile warning ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9692f10..f4cc590 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1491,7 +1491,7 @@ static int trace_recursive_lock(void)
 	/* Disable all tracing before we do anything else */
 	tracing_off_permanent();
 
-	printk_once(KERN_WARNING "Tracing recursion: depth[%d]:"
+	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
 		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
 		    current->trace_recursion,
 		    hardirq_count() >> HARDIRQ_SHIFT,
-- 
cgit v0.10.2


From aee6a166a5401dcfcb17fcdc055e5edf2a4f4042 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:17 +0200
Subject: x86: beautify vmlinux_32.lds.S

Beautify vmlinux_32.lds.S:

 - Use tabs for indent
 - Located curly braces like in C code
 - Rearranged a few comments

To see actual differences use "git diff -b" which
ignore 'whitespace' changes.

The beautification is done to prepare a unification
of the _32 and _64 variants of the linker scripts.

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-1-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 62ad500..fffa45a 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -22,196 +22,220 @@ ENTRY(phys_startup_32)
 jiffies = jiffies_64;
 
 PHDRS {
-	text PT_LOAD FLAGS(5);	/* R_E */
-	data PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(0);	/* ___ */
+	text PT_LOAD FLAGS(5);		/* R_E */
+	data PT_LOAD FLAGS(7);		/* RWE */
+	note PT_NOTE FLAGS(0);		/* ___ */
 }
 SECTIONS
 {
-  . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
-  phys_startup_32 = startup_32 - LOAD_OFFSET;
-
-  .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
-  	_text = .;			/* Text and read-only data */
-	*(.text.head)
-  } :text = 0x9090
-
-  /* read-only */
-  .text : AT(ADDR(.text) - LOAD_OFFSET) {
-	. = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
-	*(.text.page_aligned)
-	TEXT_TEXT
-	SCHED_TEXT
-	LOCK_TEXT
-	KPROBES_TEXT
-	IRQENTRY_TEXT
-	*(.fixup)
-	*(.gnu.warning)
-  	_etext = .;			/* End of text section */
-  } :text = 0x9090
-
-  NOTES :text :note
-
-  . = ALIGN(16);		/* Exception table */
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-  	__start___ex_table = .;
-	 *(__ex_table)
-  	__stop___ex_table = .;
-  } :text = 0x9090
-
-  RODATA
-
-  /* writeable */
-  . = ALIGN(PAGE_SIZE);
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
-	DATA_DATA
-	CONSTRUCTORS
+	. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
+	phys_startup_32 = startup_32 - LOAD_OFFSET;
+
+	/* Text and read-only data */
+	.text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
+		_text = .;
+		*(.text.head)
+	} :text = 0x9090
+
+	/* read-only */
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		/* not really needed, already page aligned */
+		. = ALIGN(PAGE_SIZE);
+		*(.text.page_aligned)
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		/* End of text section */
+		_etext = .;
+	} :text = 0x9090
+
+	NOTES :text :note
+
+	/* Exception table */
+	. = ALIGN(16);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		 *(__ex_table)
+		__stop___ex_table = .;
+	} :text = 0x9090
+
+	RODATA
+
+	/* writeable */
+	. = ALIGN(PAGE_SIZE);
+	/* Data */
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		DATA_DATA
+		CONSTRUCTORS
 	} :data
 
-  . = ALIGN(PAGE_SIZE);
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-  	__nosave_begin = .;
-	*(.data.nosave)
-  	. = ALIGN(PAGE_SIZE);
-  	__nosave_end = .;
-  }
-
-  . = ALIGN(PAGE_SIZE);
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-	*(.data.page_aligned)
-	*(.data.idt)
-  }
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-	*(.data.cacheline_aligned)
-  }
-
-  /* rarely changed data like cpu maps */
-  . = ALIGN(32);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-	*(.data.read_mostly)
-	_edata = .;		/* End of data section */
-  }
-
-  . = ALIGN(THREAD_SIZE);	/* init_task */
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-	*(.data.init_task)
-  }
-
-  /* might get freed after init */
-  . = ALIGN(PAGE_SIZE);
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-  	__smp_locks = .;
-	*(.smp_locks)
-	__smp_locks_end = .;
-  }
-  /* will be freed after init
-   * Following ALIGN() is required to make sure no other data falls on the
-   * same page where __smp_alt_end is pointing as that page might be freed
-   * after boot. Always make sure that ALIGN() directive is present after
-   * the section which contains __smp_alt_end.
-   */
-  . = ALIGN(PAGE_SIZE);
-
-  /* will be freed after init */
-  . = ALIGN(PAGE_SIZE);		/* Init code and data */
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-  	__init_begin = .;
-	_sinittext = .;
-	INIT_TEXT
-	_einittext = .;
-  }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-	INIT_DATA
-  }
-  . = ALIGN(16);
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-  	__setup_start = .;
-	*(.init.setup)
-  	__setup_end = .;
-   }
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-  	__initcall_start = .;
-	INITCALLS
-  	__initcall_end = .;
-  }
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-  	__con_initcall_start = .;
-	*(.con_initcall.init)
-  	__con_initcall_end = .;
-  }
-  .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-	__x86_cpu_dev_start = .;
-	*(.x86_cpu_dev.init)
-	__x86_cpu_dev_end = .;
-  }
-  SECURITY_INIT
-  . = ALIGN(4);
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-  	__alt_instructions = .;
-	*(.altinstructions)
-	__alt_instructions_end = .;
-  }
-  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-	*(.altinstr_replacement)
-  }
-  . = ALIGN(4);
-  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-  	__parainstructions = .;
-	*(.parainstructions)
-  	__parainstructions_end = .;
-  }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-	EXIT_TEXT
-  }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-	EXIT_DATA
-  }
+	. = ALIGN(PAGE_SIZE);
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+		*(.data.page_aligned)
+		*(.data.idt)
+	}
+
+	. = ALIGN(32);
+	.data.cacheline_aligned :
+		AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+		*(.data.cacheline_aligned)
+	}
+
+	/* rarely changed data like cpu maps */
+	. = ALIGN(32);
+	.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+		*(.data.read_mostly)
+
+		/* End of data section */
+		_edata = .;
+	}
+
+	/* init_task */
+	. = ALIGN(THREAD_SIZE);
+	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+		*(.data.init_task)
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+		/* might get freed after init */
+		__smp_locks = .;
+		*(.smp_locks)
+		__smp_locks_end = .;
+	}
+	/* will be freed after init
+	 * Following ALIGN() is required to make sure no other data falls on the
+	 * same page where __smp_alt_end is pointing as that page might be freed
+	 * after boot. Always make sure that ALIGN() directive is present after
+	 * the section which contains __smp_alt_end.
+	 */
+	. = ALIGN(PAGE_SIZE);
+
+	/* Init code and data - will be freed after init */
+	. = ALIGN(PAGE_SIZE);
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		__init_begin = .;
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+		INIT_DATA
+	}
+
+	. = ALIGN(16);
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+		__x86_cpu_dev_start = .;
+		*(.x86_cpu_dev.init)
+		__x86_cpu_dev_end = .;
+	}
+
+	SECURITY_INIT
+
+	. = ALIGN(4);
+	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
+	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+		*(.altinstr_replacement)
+	}
+
+	. = ALIGN(4);
+	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+		__parainstructions = .;
+		*(.parainstructions)
+		__parainstructions_end = .;
+	}
+
+	/*
+	 * .exit.text is discard at runtime, not link time, to deal with
+	 *  references from .altinstructions and .eh_frame
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+		EXIT_TEXT
+	}
+
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+		EXIT_DATA
+	}
+
 #if defined(CONFIG_BLK_DEV_INITRD)
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-	__initramfs_start = .;
-	*(.init.ramfs)
-	__initramfs_end = .;
-  }
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 #endif
-  PERCPU(PAGE_SIZE)
-  . = ALIGN(PAGE_SIZE);
-  /* freed after init ends here */
-
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-	__init_end = .;
-	__bss_start = .;		/* BSS */
-	*(.bss.page_aligned)
-	*(.bss)
-	. = ALIGN(4);
-	__bss_stop = .;
-  }
 
-  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+	PERCPU(PAGE_SIZE)
+
 	. = ALIGN(PAGE_SIZE);
-	__brk_base = . ;
- 	. += 64 * 1024 ;	/* 64k alignment slop space */
-	*(.brk_reservation)	/* areas brk users have reserved */
-	__brk_limit = . ;
-  }
+	/* freed after init ends here */
+
+	/* BSS */
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+		__init_end = .;
+		__bss_start = .;
+		*(.bss.page_aligned)
+		*(.bss)
+		. = ALIGN(4);
+		__bss_stop = .;
+	}
 
-  .end : AT(ADDR(.end) - LOAD_OFFSET) {
-	_end = . ;
-  }
+	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		__brk_base = .;
+		. += 64 * 1024;		/* 64k alignment slop space */
+		*(.brk_reservation)	/* areas brk users have reserved */
+		__brk_limit = .;
+	}
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	*(.exitcall.exit)
-	*(.discard)
+	.end : AT(ADDR(.end) - LOAD_OFFSET) {
+		_end = . ;
 	}
 
-  STABS_DEBUG
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.exitcall.exit)
+		*(.discard)
+	}
 
-  DWARF_DEBUG
+	STABS_DEBUG
+	DWARF_DEBUG
 }
 
 /*
-- 
cgit v0.10.2


From 17ce265d6a1789eae5eb739a3bb7fcffdb3e87c5 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:18 +0200
Subject: x86, vmlinux.lds: unify header/footer

Merge everything except PHDRS and SECTIONS into
vmlinux.lds.S.

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-2-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 849ee61..d113642 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -1,5 +1,82 @@
+/*
+ * ld script for the x86 kernel
+ *
+ * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Modernisation and unification done by Sam Ravnborg <sam@ravnborg.org>
+ *
+ *
+ * Don't define absolute symbols until and unless you know that symbol
+ * value is should remain constant even if kernel image is relocated
+ * at run time. Absolute symbols are not relocated. If symbol value should
+ * change if kernel is relocated, make the symbol section relative and
+ * put it inside the section definition.
+ */
+
+#ifdef CONFIG_X86_32
+#define LOAD_OFFSET __PAGE_OFFSET
+#else
+#define LOAD_OFFSET __START_KERNEL_map
+#endif
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page_types.h>
+#include <asm/cache.h>
+#include <asm/boot.h>
+
+#undef i386     /* in case the preprocessor is a 32bit one */
+
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#ifdef CONFIG_X86_32
+OUTPUT_ARCH(i386)
+ENTRY(phys_startup_32)
+jiffies = jiffies_64;
+#else
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(phys_startup_64)
+jiffies_64 = jiffies;
+#endif
+
+
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
 #else
 # include "vmlinux_64.lds.S"
 #endif
+
+
+#ifdef CONFIG_X86_32
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+        "kernel image bigger than KERNEL_IMAGE_SIZE")
+#else
+/*
+ * Per-cpu symbols which need to be offset from __per_cpu_load
+ * for the boot processor.
+ */
+#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+INIT_PER_CPU(gdt_page);
+INIT_PER_CPU(irq_stack_union);
+
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+        "irq_stack_union is not at start of per-cpu area");
+#endif
+
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+       "kexec control code size is too big")
+#endif
+
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index fffa45a..4c985fc 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,26 +1,3 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- *
- * Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
-
-#define LOAD_OFFSET __PAGE_OFFSET
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/page_types.h>
-#include <asm/cache.h>
-#include <asm/boot.h>
-
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
-jiffies = jiffies_64;
-
 PHDRS {
 	text PT_LOAD FLAGS(5);		/* R_E */
 	data PT_LOAD FLAGS(7);		/* RWE */
@@ -237,17 +214,3 @@ SECTIONS
 	STABS_DEBUG
 	DWARF_DEBUG
 }
-
-/*
- * Build-time check on the image size:
- */
-ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
-	"kernel image bigger than KERNEL_IMAGE_SIZE")
-
-#ifdef CONFIG_KEXEC
-/* Link time checks */
-#include <asm/kexec.h>
-
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-       "kexec control code size is too big")
-#endif
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 6d5a5b0..7f1cc3d 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,19 +1,3 @@
-/* ld script to make x86-64 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-
-#define LOAD_OFFSET __START_KERNEL_map
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/asm-offsets.h>
-#include <asm/page_types.h>
-
-#undef i386	/* in case the preprocessor is a 32bit one */
-
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
 PHDRS {
 	text PT_LOAD FLAGS(5);		/* R_E */
 	data PT_LOAD FLAGS(7);		/* RWE */
@@ -308,29 +292,3 @@ SECTIONS
 	STABS_DEBUG
 	DWARF_DEBUG
 }
-
-/*
- * Per-cpu symbols which need to be offset from __per_cpu_load
- * for the boot processor.
- */
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
-INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_union);
-
-/*
- * Build-time check on the image size:
- */
-ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
-	"kernel image bigger than KERNEL_IMAGE_SIZE")
-
-#ifdef CONFIG_SMP
-ASSERT((per_cpu__irq_stack_union == 0),
-        "irq_stack_union is not at start of per-cpu area");
-#endif
-
-#ifdef CONFIG_KEXEC
-#include <asm/kexec.h>
-
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-       "kexec control code size is too big")
-#endif
-- 
cgit v0.10.2


From afb8095a7eab32e5760613fa73d2f80a39cc45bf Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:19 +0200
Subject: x86, vmlinux.lds: unify PHDRS

PHDRS are not equal for the two - so
use ifdefs to cover up for that.

On the assumption that they may become equal the ifdef
is inside the PHDRS definiton.

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-3-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d113642..1a1b303 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -40,6 +40,19 @@ ENTRY(phys_startup_64)
 jiffies_64 = jiffies;
 #endif
 
+PHDRS {
+	text PT_LOAD FLAGS(5);          /* R_E */
+	data PT_LOAD FLAGS(7);          /* RWE */
+#ifdef CONFIG_X86_64
+	user PT_LOAD FLAGS(7);          /* RWE */
+	data.init PT_LOAD FLAGS(7);     /* RWE */
+#ifdef CONFIG_SMP
+	percpu PT_LOAD FLAGS(7);        /* RWE */
+#endif
+	data.init2 PT_LOAD FLAGS(7);    /* RWE */
+#endif
+	note PT_NOTE FLAGS(0);          /* ___ */
+}
 
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 4c985fc..4fd40dc 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,8 +1,3 @@
-PHDRS {
-	text PT_LOAD FLAGS(5);		/* R_E */
-	data PT_LOAD FLAGS(7);		/* RWE */
-	note PT_NOTE FLAGS(0);		/* ___ */
-}
 SECTIONS
 {
 	. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 7f1cc3d..6e7cbee 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,14 +1,3 @@
-PHDRS {
-	text PT_LOAD FLAGS(5);		/* R_E */
-	data PT_LOAD FLAGS(7);		/* RWE */
-	user PT_LOAD FLAGS(7);		/* RWE */
-	data.init PT_LOAD FLAGS(7);	/* RWE */
-#ifdef CONFIG_SMP
-	percpu PT_LOAD FLAGS(7);	/* RWE */
-#endif
-	data.init2 PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(0);		/* ___ */
-}
 SECTIONS
 {
 	. = __START_KERNEL;
-- 
cgit v0.10.2


From 444e0ae4831f99ba25062d9a5ccb7117c62841a0 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:20 +0200
Subject: x86, vmlinux.lds: unify start/end of SECTIONS

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-4-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1a1b303..845776f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -54,12 +54,26 @@ PHDRS {
 	note PT_NOTE FLAGS(0);          /* ___ */
 }
 
+SECTIONS
+{
+#ifdef CONFIG_X86_32
+        . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
+        phys_startup_32 = startup_32 - LOAD_OFFSET;
+#else
+        . = __START_KERNEL;
+        phys_startup_64 = startup_64 - LOAD_OFFSET;
+#endif
+
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
 #else
 # include "vmlinux_64.lds.S"
 #endif
 
+        STABS_DEBUG
+        DWARF_DEBUG
+}
+
 
 #ifdef CONFIG_X86_32
 ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 4fd40dc..3d3d49c 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,8 +1,3 @@
-SECTIONS
-{
-	. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
-	phys_startup_32 = startup_32 - LOAD_OFFSET;
-
 	/* Text and read-only data */
 	.text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
 		_text = .;
@@ -205,7 +200,3 @@ SECTIONS
 		*(.exitcall.exit)
 		*(.discard)
 	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-}
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 6e7cbee..2d7fa20 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,8 +1,3 @@
-SECTIONS
-{
-	. = __START_KERNEL;
-	phys_startup_64 = startup_64 - LOAD_OFFSET;
-
 	/* Text and read-only data */
 	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
 		_text = .;
@@ -277,7 +272,3 @@ SECTIONS
 		*(.eh_frame)
 		*(.discard)
 	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-}
-- 
cgit v0.10.2


From dfc20895d944cfa81d8ff00809b68ecb8f72cbb0 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:21 +0200
Subject: x86, vmlinux.lds: unify .text output sections

32 bit x86 had a dedicated .text.head output section,
whereas 64 bit had it all in a single output section.

In the unified version the dedicated .text.head output section
was kept to have full control over the head code.

32 bit:

- Moved definition of _stext to the linker script.
  The definition is located _after_ .text.page_aligned as this
  is what 32 bit did before.

The ALIGN(8) was introduced so we hit the exact same address
(on the tested config) before and after the move.

I assume that it is a bug that _stext did not cover the
.text.page_aligned section - if this is true it can be fixed
in a follow-up patch (and the ugly ALIGN() can be dropped).

[ Impact: 64-bit: cleanup, 32-bit: use the 64-bit linker script ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-5-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 3068388..dc5ed4b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -608,13 +608,6 @@ ignore_int:
 ENTRY(initial_code)
 	.long i386_start_kernel
 
-.section .text
-/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
 /*
  * BSS section
  */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 845776f..a7c88bb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -64,6 +64,37 @@ SECTIONS
         phys_startup_64 = startup_64 - LOAD_OFFSET;
 #endif
 
+	/* Text and read-only data */
+
+	/* bootstrapping code */
+	.text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
+		_text = .;
+		*(.text.head)
+	} :text = 0x9090
+
+	/* The rest of the text */
+	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_X86_32
+		/* not really needed, already page aligned */
+		. = ALIGN(PAGE_SIZE);
+		*(.text.page_aligned)
+#endif
+		. = ALIGN(8);
+		_stext = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		/* End of text section */
+		_etext = .;
+	} :text = 0x9090
+
+	NOTES :text :note
+
+
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
 #else
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 3d3d49c..8540092 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,27 +1,3 @@
-	/* Text and read-only data */
-	.text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
-		_text = .;
-		*(.text.head)
-	} :text = 0x9090
-
-	/* read-only */
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		/* not really needed, already page aligned */
-		. = ALIGN(PAGE_SIZE);
-		*(.text.page_aligned)
-		TEXT_TEXT
-		SCHED_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-		/* End of text section */
-		_etext = .;
-	} :text = 0x9090
-
-	NOTES :text :note
-
 	/* Exception table */
 	. = ALIGN(16);
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 2d7fa20..b5d4367 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,23 +1,3 @@
-	/* Text and read-only data */
-	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;
-		/* First the code that has to be first for bootstrapping */
-		*(.text.head)
-		_stext = .;
-		/* Then the rest */
-		TEXT_TEXT
-		SCHED_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-		/* End of text section */
-		_etext = .;
-	} :text = 0x9090
-
-	NOTES :text :note
-
 	/* Exception table */
 	. = ALIGN(16);
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-- 
cgit v0.10.2


From 448bc3ab0d03e77fee8e4264de0d001fc87bc164 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:22 +0200
Subject: x86, vmlinux.lds: unify exception table

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-6-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a7c88bb..67164f6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -94,6 +94,16 @@ SECTIONS
 
 	NOTES :text :note
 
+	/* Exception table */
+	. = ALIGN(16);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	} :text = 0x9090
+
+	RODATA
+
 
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 8540092..920cc69 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,13 +1,3 @@
-	/* Exception table */
-	. = ALIGN(16);
-	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-		__start___ex_table = .;
-		 *(__ex_table)
-		__stop___ex_table = .;
-	} :text = 0x9090
-
-	RODATA
-
 	/* writeable */
 	. = ALIGN(PAGE_SIZE);
 	/* Data */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index b5d4367..641f3f9 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,13 +1,3 @@
-	/* Exception table */
-	. = ALIGN(16);
-	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-		__start___ex_table = .;
-		 *(__ex_table)
-		__stop___ex_table = .;
-	} :text = 0x9090
-
-	RODATA
-
 	/* Align data segment to page size boundary */
 	. = ALIGN(PAGE_SIZE);
 	/* Data */
-- 
cgit v0.10.2


From 1f6397bac55040cd520d9eaf299e155a7aa01d5f Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:23 +0200
Subject: x86, vmlinux.lds: unify data output sections

For 64 bit the following functional changes are introduced:

 - .data.page_aligned has moved
 - .data.cacheline_aligned has moved
 - .data.read_mostly has moved
 - ALIGN() moved out of output section for .data.cacheline_aligned
 - ALIGN() moved out of output section for .data.page_aligned

Notice that 32 bit and 64 bit has different location of _edata.
.data_nosave is 32 bit only as 64 bit is special due to PERCPU.

[ Impact: 32-bit: cleanup, 64-bit: use 32-bit linker script ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-7-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 67164f6..067bdb0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -104,6 +104,61 @@ SECTIONS
 
 	RODATA
 
+	/* Data */
+	. = ALIGN(PAGE_SIZE);
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		DATA_DATA
+		CONSTRUCTORS
+
+#ifdef CONFIG_X86_64
+		/* End of data section */
+		_edata = .;
+#endif
+	} :data
+
+#ifdef CONFIG_X86_32
+	/* 32 bit has nosave before _edata */
+	. = ALIGN(PAGE_SIZE);
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	}
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+		*(.data.page_aligned)
+		*(.data.idt)
+	}
+
+#ifdef CONFIG_X86_32
+	. = ALIGN(32);
+#else
+	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+#endif
+	.data.cacheline_aligned :
+		AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+		*(.data.cacheline_aligned)
+	}
+
+	/* rarely changed data like cpu maps */
+#ifdef CONFIG_X86_32
+	. = ALIGN(32);
+#else
+	. = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
+#endif
+	.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+		*(.data.read_mostly)
+
+#ifdef CONFIG_X86_32
+		/* End of data section */
+		_edata = .;
+#endif
+	}
+
 
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 920cc69..8ade846 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,40 +1,3 @@
-	/* writeable */
-	. = ALIGN(PAGE_SIZE);
-	/* Data */
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		DATA_DATA
-		CONSTRUCTORS
-	} :data
-
-	. = ALIGN(PAGE_SIZE);
-	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-		__nosave_begin = .;
-		*(.data.nosave)
-		. = ALIGN(PAGE_SIZE);
-		__nosave_end = .;
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-		*(.data.page_aligned)
-		*(.data.idt)
-	}
-
-	. = ALIGN(32);
-	.data.cacheline_aligned :
-		AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-		*(.data.cacheline_aligned)
-	}
-
-	/* rarely changed data like cpu maps */
-	. = ALIGN(32);
-	.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-		*(.data.read_mostly)
-
-		/* End of data section */
-		_edata = .;
-	}
-
 	/* init_task */
 	. = ALIGN(THREAD_SIZE);
 	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 641f3f9..8262701 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,26 +1,3 @@
-	/* Align data segment to page size boundary */
-	. = ALIGN(PAGE_SIZE);
-	/* Data */
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		DATA_DATA
-		CONSTRUCTORS
-		/* End of data section */
-		_edata = .;
-	} :data
-
-
-	.data.cacheline_aligned :
-		AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-		. = ALIGN(PAGE_SIZE);
-		. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-		*(.data.cacheline_aligned)
-	}
-
-	. = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
-	.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-		*(.data.read_mostly)
-	}
-
 #define VSYSCALL_ADDR (-10*1024*1024)
 #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
                             SIZEOF(.data.read_mostly) + 4095) & ~(4095))
@@ -95,11 +72,6 @@
 		*(.data.init_task)
 	} :data.init
 
-	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-		. = ALIGN(PAGE_SIZE);
-		*(.data.page_aligned)
-	}
-
 	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
 		/* might get freed after init */
 		. = ALIGN(PAGE_SIZE);
-- 
cgit v0.10.2


From ff6f87e1626e10beef675084c9b5384a9477e3d5 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:24 +0200
Subject: x86, vmlinux.lds: move vsyscall output sections

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-8-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 067bdb0..b3106c2 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -159,6 +159,77 @@ SECTIONS
 #endif
 	}
 
+#ifdef CONFIG_X86_64
+
+#define VSYSCALL_ADDR (-10*1024*1024)
+#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+
+#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
+#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
+
+#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
+#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+
+	. = VSYSCALL_ADDR;
+	.vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
+		*(.vsyscall_0)
+	} :user
+
+	__vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
+		*(.vsyscall_fn)
+	}
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
+		*(.vsyscall_gtod_data)
+	}
+
+	vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+	.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
+		*(.vsyscall_clock)
+	}
+	vsyscall_clock = VVIRT(.vsyscall_clock);
+
+
+	.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
+		*(.vsyscall_1)
+	}
+	.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
+		*(.vsyscall_2)
+	}
+
+	.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
+		*(.vgetcpu_mode)
+	}
+	vgetcpu_mode = VVIRT(.vgetcpu_mode);
+
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+	.jiffies : AT(VLOAD(.jiffies)) {
+		*(.jiffies)
+	}
+	jiffies = VVIRT(.jiffies);
+
+	.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
+		*(.vsyscall_3)
+	}
+
+	. = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
+
+#undef VSYSCALL_ADDR
+#undef VSYSCALL_PHYS_ADDR
+#undef VSYSCALL_VIRT_ADDR
+#undef VLOAD_OFFSET
+#undef VLOAD
+#undef VVIRT_OFFSET
+#undef VVIRT
+
+#endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 8262701..013aa0e 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,71 +1,3 @@
-#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
-                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
-                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
-	. = VSYSCALL_ADDR;
-	.vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
-		*(.vsyscall_0)
-	} :user
-
-	__vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
-	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-	.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
-		*(.vsyscall_fn)
-	}
-
-	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-	.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
-		*(.vsyscall_gtod_data)
-	}
-
-	vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
-	.vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
-		*(.vsyscall_clock)
-	}
-	vsyscall_clock = VVIRT(.vsyscall_clock);
-
-
-	.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
-		*(.vsyscall_1)
-	}
-	.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
-		*(.vsyscall_2)
-	}
-
-	.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
-		*(.vgetcpu_mode)
-	}
-	vgetcpu_mode = VVIRT(.vgetcpu_mode);
-
-	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-	.jiffies : AT(VLOAD(.jiffies)) {
-		*(.jiffies)
-	}
-	jiffies = VVIRT(.jiffies);
-
-	.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
-		*(.vsyscall_3)
-	}
-
-	. = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
-
-#undef VSYSCALL_ADDR
-#undef VSYSCALL_PHYS_ADDR
-#undef VSYSCALL_VIRT_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
 	/* init_task */
 	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
 		. = ALIGN(THREAD_SIZE);
-- 
cgit v0.10.2


From e58bdaa8f810332e5c1760ce496b01e07d51642c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:25 +0200
Subject: x86, vmlinux.lds: unify first part of initdata

32-bit:

 - Move definition of __init_begin outside output_section
   because it covers more than one section
 - Move ALIGN() for end-of-section inside .smp_locks output section.
   Same effect but the intent is better documented that
   we need both start and end aligned.

64-bit:

 - Move ALIGN() outside output section in .init.setup
 - Deleted unused __smp_alt_* symbols

None of the above should result in any functional change.

[ Impact: refactor and unify linker script ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-9-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index b3106c2..8b203c4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -231,6 +231,67 @@ SECTIONS
 
 #endif /* CONFIG_X86_64 */
 
+	/* init_task */
+	. = ALIGN(THREAD_SIZE);
+	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+		*(.data.init_task)
+	}
+#ifdef CONFIG_X86_64
+	 :data.init
+#endif
+
+	/*
+	 * smp_locks might be freed after init
+	 * start/end must be page aligned
+	 */
+	. = ALIGN(PAGE_SIZE);
+	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+		__smp_locks = .;
+		*(.smp_locks)
+		__smp_locks_end = .;
+		. = ALIGN(PAGE_SIZE);
+	}
+
+	/* Init code and data - will be freed after init */
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .; /* paired with __init_end */
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+		INIT_DATA
+	}
+
+	. = ALIGN(16);
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+		__x86_cpu_dev_start = .;
+		*(.x86_cpu_dev.init)
+		__x86_cpu_dev_end = .;
+	}
+
+	SECURITY_INIT
+
+
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
 #else
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 8ade846..d8ba539 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,63 +1,3 @@
-	/* init_task */
-	. = ALIGN(THREAD_SIZE);
-	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-		*(.data.init_task)
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-		/* might get freed after init */
-		__smp_locks = .;
-		*(.smp_locks)
-		__smp_locks_end = .;
-	}
-	/* will be freed after init
-	 * Following ALIGN() is required to make sure no other data falls on the
-	 * same page where __smp_alt_end is pointing as that page might be freed
-	 * after boot. Always make sure that ALIGN() directive is present after
-	 * the section which contains __smp_alt_end.
-	 */
-	. = ALIGN(PAGE_SIZE);
-
-	/* Init code and data - will be freed after init */
-	. = ALIGN(PAGE_SIZE);
-	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-		__init_begin = .;
-		_sinittext = .;
-		INIT_TEXT
-		_einittext = .;
-	}
-
-	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-		INIT_DATA
-	}
-
-	. = ALIGN(16);
-	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-		__setup_start = .;
-		*(.init.setup)
-		__setup_end = .;
-	}
-	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-		__initcall_start = .;
-		INITCALLS
-		__initcall_end = .;
-	}
-
-	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-		__con_initcall_start = .;
-		*(.con_initcall.init)
-		__con_initcall_end = .;
-	}
-
-	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-		__x86_cpu_dev_start = .;
-		*(.x86_cpu_dev.init)
-		__x86_cpu_dev_end = .;
-	}
-
-	SECURITY_INIT
-
 	. = ALIGN(4);
 	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
 		__alt_instructions = .;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 013aa0e..0e8054e 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,62 +1,3 @@
-	/* init_task */
-	.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-		. = ALIGN(THREAD_SIZE);
-		*(.data.init_task)
-	} :data.init
-
-	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-		/* might get freed after init */
-		. = ALIGN(PAGE_SIZE);
-		__smp_alt_begin = .;
-		__smp_locks = .;
-		*(.smp_locks)
-		__smp_locks_end = .;
-		. = ALIGN(PAGE_SIZE);
-		__smp_alt_end = .;
-	}
-
-	/* Init code and data */
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;	/* paired with __init_end */
-	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-		_sinittext = .;
-		INIT_TEXT
-		_einittext = .;
-	}
-
-	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-		__initdata_begin = .;
-		INIT_DATA
-		__initdata_end = .;
-	}
-
-	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-		. = ALIGN(16);
-		__setup_start = .;
-		*(.init.setup)
-		__setup_end = .;
-	}
-
-	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-		__initcall_start = .;
-		INITCALLS
-		__initcall_end = .;
-	}
-
-	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-		__con_initcall_start = .;
-		*(.con_initcall.init)
-		__con_initcall_end = .;
-	}
-
-	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-		__x86_cpu_dev_start = .;
-		*(.x86_cpu_dev.init)
-		__x86_cpu_dev_end = .;
-	}
-
-	SECURITY_INIT
-
 	. = ALIGN(8);
 	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
 		__parainstructions = .;
-- 
cgit v0.10.2


From ae61836289a415351caa524d328110aaeae100d4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:26 +0200
Subject: x86, vmlinux.lds: unify parainstructions

32 bit:

 - increase alignment from 4 to 8 for .parainstructions
 - increase alignment from 4 to 8 for .altinstructions

64 bit:

 - move ALIGN() outside output section for .altinstructions

None of the above should result in any functional change.

[ Impact: refactor and unify linker script ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-10-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8b203c4..c8dd71e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -291,6 +291,24 @@ SECTIONS
 
 	SECURITY_INIT
 
+	. = ALIGN(8);
+	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+		__parainstructions = .;
+		*(.parainstructions)
+		__parainstructions_end = .;
+	}
+
+	. = ALIGN(8);
+	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
+	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+		*(.altinstr_replacement)
+	}
+
 
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index d8ba539..5df9647 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,21 +1,3 @@
-	. = ALIGN(4);
-	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-
-	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-		*(.altinstr_replacement)
-	}
-
-	. = ALIGN(4);
-	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-		__parainstructions = .;
-		*(.parainstructions)
-		__parainstructions_end = .;
-	}
-
 	/*
 	 * .exit.text is discard at runtime, not link time, to deal with
 	 *  references from .altinstructions and .eh_frame
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 0e8054e..9ef7096 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,21 +1,3 @@
-	. = ALIGN(8);
-	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-		__parainstructions = .;
-		*(.parainstructions)
-		__parainstructions_end = .;
-	}
-
-	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-		. = ALIGN(8);
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-
-	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-		*(.altinstr_replacement)
-	}
-
 	/*
 	 * .exit.text is discard at runtime, not link time, to deal with
 	 *  references from .altinstructions and .eh_frame
-- 
cgit v0.10.2


From bf6a57418d5445c98047edbec022c9e54d1526e6 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:27 +0200
Subject: x86, vmlinux.lds: unify .exit.* and .init.ramfs

[ Impact: cleanup ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-11-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c8dd71e..1ab62a5 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -309,6 +309,26 @@ SECTIONS
 		*(.altinstr_replacement)
 	}
 
+	/*
+	 * .exit.text is discard at runtime, not link time, to deal with
+	 *  references from .altinstructions and .eh_frame
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+		EXIT_TEXT
+	}
+
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+		EXIT_DATA
+	}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
+#endif
 
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 5df9647..36c8fbd 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,24 +1,3 @@
-	/*
-	 * .exit.text is discard at runtime, not link time, to deal with
-	 *  references from .altinstructions and .eh_frame
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-		EXIT_TEXT
-	}
-
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-		EXIT_DATA
-	}
-
-#if defined(CONFIG_BLK_DEV_INITRD)
-	. = ALIGN(PAGE_SIZE);
-	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		__initramfs_end = .;
-	}
-#endif
-
 	PERCPU(PAGE_SIZE)
 
 	. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 9ef7096..1aa5362 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,24 +1,3 @@
-	/*
-	 * .exit.text is discard at runtime, not link time, to deal with
-	 *  references from .altinstructions and .eh_frame
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-		EXIT_TEXT
-	}
-
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-		EXIT_DATA
-	}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(PAGE_SIZE);
-	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		__initramfs_end = .;
-	}
-#endif
-
 #ifdef CONFIG_SMP
 	/*
 	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-- 
cgit v0.10.2


From 9d16e78318f174fd4b07916a93e41749d5199267 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:28 +0200
Subject: x86, vmlinux.lds: unify percpu

32 bit:
- move __init_end outside the .bss output section
  It really did not belong in there

[ Impact: 64-bit: cleanup, 32-bit: refactor linker script ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-12-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1ab62a5..1ea2b85 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -330,6 +330,36 @@ SECTIONS
 	}
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
+	/*
+	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
+	 * output PHDR, so the next output section - __data_nosave - should
+	 * start another section data.init2.  Also, pda should be at the head of
+	 * percpu area.  Preallocate it and define the percpu offset symbol
+	 * so that it can be accessed as a percpu variable.
+	 */
+	. = ALIGN(PAGE_SIZE);
+	PERCPU_VADDR(0, :percpu)
+#else
+	PERCPU(PAGE_SIZE)
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	/* freed after init ends here */
+	__init_end = .;
+
+#ifdef CONFIG_X86_64
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+	} :data.init2
+	/* use another section data.init2, see PERCPU_VADDR() above */
+#endif
+
+
 #ifdef CONFIG_X86_32
 # include "vmlinux_32.lds.S"
 #else
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 36c8fbd..d23ee2c 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -1,11 +1,5 @@
-	PERCPU(PAGE_SIZE)
-
-	. = ALIGN(PAGE_SIZE);
-	/* freed after init ends here */
-
 	/* BSS */
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-		__init_end = .;
 		__bss_start = .;
 		*(.bss.page_aligned)
 		*(.bss)
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1aa5362..a539366 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -1,29 +1,3 @@
-#ifdef CONFIG_SMP
-	/*
-	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-	 * output PHDR, so the next output section - __data_nosave - should
-	 * start another section data.init2.  Also, pda should be at the head of
-	 * percpu area.  Preallocate it and define the percpu offset symbol
-	 * so that it can be accessed as a percpu variable.
-	 */
-	. = ALIGN(PAGE_SIZE);
-	PERCPU_VADDR(0, :percpu)
-#else
-	PERCPU(PAGE_SIZE)
-#endif
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-		. = ALIGN(PAGE_SIZE);
-		__nosave_begin = .;
-		*(.data.nosave)
-		. = ALIGN(PAGE_SIZE);
-		__nosave_end = .;
-	} :data.init2
-	/* use another section data.init2, see PERCPU_VADDR() above */
-
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		. = ALIGN(PAGE_SIZE);
 		__bss_start = .;		/* BSS */
-- 
cgit v0.10.2


From 091e52c3551d3031343df24b573b770b4c6c72b6 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 29 Apr 2009 09:47:29 +0200
Subject: x86, vmlinux.lds: unify remaining parts

32 bit:
- explicit page align .bss
- move ALING() out of .brk output section
- discard *(.eh_frame)

64 bit:
- move ALIGN() out of .bss output section
- move ALIGN() out of .brk output section
- use a dedicated section to define _end

[ Impact: unify and fix section alignments in linker script ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-13-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1ea2b85..ef3e4f1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -359,12 +359,34 @@ SECTIONS
 	/* use another section data.init2, see PERCPU_VADDR() above */
 #endif
 
+	/* BSS */
+	. = ALIGN(PAGE_SIZE);
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+		__bss_start = .;
+		*(.bss.page_aligned)
+		*(.bss)
+		. = ALIGN(4);
+		__bss_stop = .;
+	}
 
-#ifdef CONFIG_X86_32
-# include "vmlinux_32.lds.S"
-#else
-# include "vmlinux_64.lds.S"
-#endif
+	. = ALIGN(PAGE_SIZE);
+	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+		__brk_base = .;
+		. += 64 * 1024;		/* 64k alignment slop space */
+		*(.brk_reservation)	/* areas brk users have reserved */
+		__brk_limit = .;
+	}
+
+	.end : AT(ADDR(.end) - LOAD_OFFSET) {
+		_end = .;
+	}
+
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.exitcall.exit)
+		*(.eh_frame)
+		*(.discard)
+	}
 
         STABS_DEBUG
         DWARF_DEBUG
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
deleted file mode 100644
index d23ee2c..0000000
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ /dev/null
@@ -1,26 +0,0 @@
-	/* BSS */
-	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-		__bss_start = .;
-		*(.bss.page_aligned)
-		*(.bss)
-		. = ALIGN(4);
-		__bss_stop = .;
-	}
-
-	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-		. = ALIGN(PAGE_SIZE);
-		__brk_base = .;
-		. += 64 * 1024;		/* 64k alignment slop space */
-		*(.brk_reservation)	/* areas brk users have reserved */
-		__brk_limit = .;
-	}
-
-	.end : AT(ADDR(.end) - LOAD_OFFSET) {
-		_end = . ;
-	}
-
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-	}
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
deleted file mode 100644
index a539366..0000000
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ /dev/null
@@ -1,24 +0,0 @@
-	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-		. = ALIGN(PAGE_SIZE);
-		__bss_start = .;		/* BSS */
-		*(.bss.page_aligned)
-		*(.bss)
-		__bss_stop = .;
-	}
-
-	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-		. = ALIGN(PAGE_SIZE);
-		__brk_base = .;
-		. += 64 * 1024;		/* 64k alignment slop space */
-		*(.brk_reservation)	/* areas brk users have reserved */
-		__brk_limit = .;
-	}
-
-	_end = . ;
-
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.eh_frame)
-		*(.discard)
-	}
-- 
cgit v0.10.2


From 91fd7fe809bdf4d8aa56559d17b9f25a1a6fe732 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 29 Apr 2009 10:58:38 +0200
Subject: x86, vmlinux.lds: add copyright

Acked-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1240991249-27117-2-git-send-email-sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index ef3e4f1..0bdbaa5 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -3,7 +3,8 @@
  *
  * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
  *
- * Modernisation and unification done by Sam Ravnborg <sam@ravnborg.org>
+ * Modernisation, unification and other changes and fixes:
+ *   Copyright (C) 2007-2009  Sam Ravnborg <sam@ravnborg.org>
  *
  *
  * Don't define absolute symbols until and unless you know that symbol
-- 
cgit v0.10.2


From 0492e1bb8fe7d122901c9f3af75e537d4129712e Mon Sep 17 00:00:00 2001
From: Stuart Bennett <stuart@freedesktop.org>
Date: Tue, 28 Apr 2009 20:17:49 +0100
Subject: tracing: x86, mmiotrace: code consistency/legibility improvement

kmmio_probe being *p and kmmio_fault_page being sometimes *f and
sometimes *p is not helpful.

[ Impact: cleanup ]

Signed-off-by: Stuart Bennett <stuart@freedesktop.org>
Acked-by: Pekka Paalanen <pq@iki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1240946271-7083-3-git-send-email-stuart@freedesktop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 4f115e0..869181a 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -97,13 +97,13 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 {
 	struct list_head *head;
-	struct kmmio_fault_page *p;
+	struct kmmio_fault_page *f;
 
 	page &= PAGE_MASK;
 	head = kmmio_page_list(page);
-	list_for_each_entry_rcu(p, head, list) {
-		if (p->page == page)
-			return p;
+	list_for_each_entry_rcu(f, head, list) {
+		if (f->page == page)
+			return f;
 	}
 	return NULL;
 }
@@ -439,12 +439,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
 						head,
 						struct kmmio_delayed_release,
 						rcu);
-	struct kmmio_fault_page *p = dr->release_list;
-	while (p) {
-		struct kmmio_fault_page *next = p->release_next;
-		BUG_ON(p->count);
-		kfree(p);
-		p = next;
+	struct kmmio_fault_page *f = dr->release_list;
+	while (f) {
+		struct kmmio_fault_page *next = f->release_next;
+		BUG_ON(f->count);
+		kfree(f);
+		f = next;
 	}
 	kfree(dr);
 }
@@ -453,19 +453,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
 {
 	struct kmmio_delayed_release *dr =
 		container_of(head, struct kmmio_delayed_release, rcu);
-	struct kmmio_fault_page *p = dr->release_list;
+	struct kmmio_fault_page *f = dr->release_list;
 	struct kmmio_fault_page **prevp = &dr->release_list;
 	unsigned long flags;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
-	while (p) {
-		if (!p->count) {
-			list_del_rcu(&p->list);
-			prevp = &p->release_next;
+	while (f) {
+		if (!f->count) {
+			list_del_rcu(&f->list);
+			prevp = &f->release_next;
 		} else {
-			*prevp = p->release_next;
+			*prevp = f->release_next;
 		}
-		p = p->release_next;
+		f = f->release_next;
 	}
 	spin_unlock_irqrestore(&kmmio_lock, flags);
 
-- 
cgit v0.10.2


From 46e91d00b1165b14b484aa33800e1bba0794ae1a Mon Sep 17 00:00:00 2001
From: Stuart Bennett <stuart@freedesktop.org>
Date: Tue, 28 Apr 2009 20:17:50 +0100
Subject: tracing: x86, mmiotrace: refactor clearing/restore of page presence

* change function names to clear_* from set_*: in reality we only clear
  and restore page presence, and never unconditionally set present.
  Using clear_*({true, false}, ...) is therefore more honest than
  set_*({false, true}, ...)

* upgrade presence storage to pteval_t: doing user-space tracing will
  require saving and manipulation of the _PAGE_PROTNONE bit, in addition
  to the existing _PAGE_PRESENT changes, and having multiple bools stored
  and passed around does not seem optimal

[ Impact: refactor, clean up mmiotrace code ]

Signed-off-by: Stuart Bennett <stuart@freedesktop.org>
Acked-by: Pekka Paalanen <pq@iki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1240946271-7083-4-git-send-email-stuart@freedesktop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 869181a..a769d1a 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,7 +32,7 @@ struct kmmio_fault_page {
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
 	unsigned long page; /* location of the fault page */
-	bool old_presence; /* page presence prior to arming */
+	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
 	/*
@@ -108,49 +108,51 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 	return NULL;
 }
 
-static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
 	pmdval_t v = pmd_val(*pmd);
-	*old = !!(v & _PAGE_PRESENT);
-	v &= ~_PAGE_PRESENT;
-	if (present)
-		v |= _PAGE_PRESENT;
+	if (clear) {
+		*old = v & _PAGE_PRESENT;
+		v &= ~_PAGE_PRESENT;
+	} else	/* presume this has been called with clear==true previously */
+		v |= *old;
 	set_pmd(pmd, __pmd(v));
 }
 
-static void set_pte_presence(pte_t *pte, bool present, bool *old)
+static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
 	pteval_t v = pte_val(*pte);
-	*old = !!(v & _PAGE_PRESENT);
-	v &= ~_PAGE_PRESENT;
-	if (present)
-		v |= _PAGE_PRESENT;
+	if (clear) {
+		*old = v & _PAGE_PRESENT;
+		v &= ~_PAGE_PRESENT;
+	} else	/* presume this has been called with clear==true previously */
+		v |= *old;
 	set_pte_atomic(pte, __pte(v));
 }
 
-static int set_page_presence(unsigned long addr, bool present, bool *old)
+static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(addr, &level);
+	pte_t *pte = lookup_address(f->page, &level);
 
 	if (!pte) {
-		pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+		pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
 		return -1;
 	}
 
 	switch (level) {
 	case PG_LEVEL_2M:
-		set_pmd_presence((pmd_t *)pte, present, old);
+		clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
 		break;
 	case PG_LEVEL_4K:
-		set_pte_presence(pte, present, old);
+		clear_pte_presence(pte, clear, &f->old_presence);
 		break;
 	default:
 		pr_err("kmmio: unexpected page level 0x%x.\n", level);
 		return -1;
 	}
 
-	__flush_tlb_one(addr);
+	__flush_tlb_one(f->page);
 	return 0;
 }
 
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 	WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
 	if (f->armed) {
 		pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
-					f->page, f->count, f->old_presence);
+					f->page, f->count, !!f->old_presence);
 	}
-	ret = set_page_presence(f->page, false, &f->old_presence);
+	ret = clear_page_presence(f, true);
 	WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
 	f->armed = true;
 	return ret;
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 /** Restore the given page to saved presence state. */
 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
-	bool tmp;
-	int ret = set_page_presence(f->page, f->old_presence, &tmp);
+	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
 			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
 	f->armed = false;
-- 
cgit v0.10.2


From 0f9a623dd6c9b5b4dd00c232f29525bfc7a8ecf2 Mon Sep 17 00:00:00 2001
From: Stuart Bennett <stuart@freedesktop.org>
Date: Tue, 28 Apr 2009 20:17:51 +0100
Subject: tracing: x86, mmiotrace: only register for die notifier when tracer
 active

Follow up to afcfe024aebd74b0984a41af9a34e009cf5badaf in Linus' tree
("x86: mmiotrace: quieten spurious warning message")

Signed-off-by: Stuart Bennett <stuart@freedesktop.org>
Acked-by: Pekka Paalanen <pq@iki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1240946271-7083-5-git-send-email-stuart@freedesktop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index a769d1a..256ce64 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -311,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
 	if (!ctx->active) {
-		pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+		/*
+		 * debug traps without an active context are due to either
+		 * something external causing them (f.e. using a debugger while
+		 * mmio tracing enabled), or erroneous behaviour
+		 */
+		pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
 							smp_processor_id());
 		goto out;
 	}
@@ -529,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 }
 EXPORT_SYMBOL(unregister_kmmio_probe);
 
-static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
-								void *args)
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
 {
 	struct die_args *arg = args;
 
@@ -545,11 +550,23 @@ static struct notifier_block nb_die = {
 	.notifier_call = kmmio_die_notifier
 };
 
-static int __init init_kmmio(void)
+int kmmio_init(void)
 {
 	int i;
+
 	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
 		INIT_LIST_HEAD(&kmmio_page_table[i]);
+
 	return register_die_notifier(&nb_die);
 }
-fs_initcall(init_kmmio); /* should be before device_initcall() */
+
+void kmmio_cleanup(void)
+{
+	int i;
+
+	unregister_die_notifier(&nb_die);
+	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+		WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+			KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
+	}
+}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index c9342ed..132772a 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -451,6 +451,7 @@ void enable_mmiotrace(void)
 
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
+	kmmio_init();
 	enter_uniprocessor();
 	spin_lock_irq(&trace_lock);
 	atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@ void disable_mmiotrace(void)
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
+	kmmio_cleanup();
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 3d1b7bd..97491f7 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -30,6 +30,8 @@ extern unsigned int kmmio_count;
 
 extern int register_kmmio_probe(struct kmmio_probe *p);
 extern void unregister_kmmio_probe(struct kmmio_probe *p);
+extern int kmmio_init(void);
+extern void kmmio_cleanup(void);
 
 #ifdef CONFIG_MMIOTRACE
 /* kmmio is active by some kmmio_probes? */
-- 
cgit v0.10.2


From fd0731944333db6e9e91b6954c6ef95f4b71ab04 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 29 Apr 2009 12:56:58 +0200
Subject: x86, vmlinux.lds: fix relocatable symbols

__init_begin/_end symbols should be inside sections as well,
otherwise the relocatable kernel gets confused when freeing
init sections in the wrong place.

[ Impact: fix bootup crash ]

Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@MIT.EDU>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <20090429105056.GA28720@uranus.ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0bdbaa5..4c85b2e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -255,8 +255,8 @@ SECTIONS
 
 	/* Init code and data - will be freed after init */
 	. = ALIGN(PAGE_SIZE);
-	__init_begin = .; /* paired with __init_end */
 	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		__init_begin = .; /* paired with __init_end */
 		_sinittext = .;
 		INIT_TEXT
 		_einittext = .;
@@ -346,8 +346,11 @@ SECTIONS
 #endif
 
 	. = ALIGN(PAGE_SIZE);
+
 	/* freed after init ends here */
-	__init_end = .;
+	.init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
+		__init_end = .;
+	}
 
 #ifdef CONFIG_X86_64
 	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-- 
cgit v0.10.2


From 30e673b230f9d556eb81ef68a7b1a08c8b3b142c Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 28 Apr 2009 03:04:47 -0500
Subject: tracing/filters: move preds into event_filter object

Create a new event_filter object, and move the pred-related members
out of the call and subsystem objects and into the filter object - the
details of the filter implementation don't need to be exposed in the
call and subsystem in any case, and it will also help make the new
parser implementation a little cleaner.

[ Impact: refactor trace-filter code to prepare for new features ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1240905887.6416.119.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 78a9ba2..46a27f2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -101,8 +101,8 @@ struct ftrace_event_call {
 	int			(*show_format)(struct trace_seq *s);
 	int			(*define_fields)(void);
 	struct list_head	fields;
-	int			n_preds;
-	struct filter_pred	**preds;
+	int			filter_active;
+	void			*filter;
 	void			*mod;
 
 #ifdef CONFIG_EVENT_PROFILE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7d55bcf..1fb7d6c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -731,12 +731,16 @@ struct ftrace_event_field {
 	int			size;
 };
 
+struct event_filter {
+	int			n_preds;
+	struct filter_pred	**preds;
+};
+
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
 	struct dentry		*entry;
-	int			n_preds;
-	struct filter_pred	**preds;
+	void			*filter;
 };
 
 struct filter_pred;
@@ -774,7 +778,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->n_preds) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index be4d3a4..1cd1f37 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -757,8 +757,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
 	list_add(&system->list, &event_subsystems);
 
-	system->preds = NULL;
-	system->n_preds = 0;
+	system->filter = NULL;
 
 	entry = debugfs_create_file("filter", 0644, system->entry, system,
 				    &ftrace_subsystem_filter_fops);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 6541828..1e861ec 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -93,11 +93,12 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
+	struct event_filter *filter = call->filter;
 	int i, matched, and_failed = 0;
 	struct filter_pred *pred;
 
-	for (i = 0; i < call->n_preds; i++) {
-		pred = call->preds[i];
+	for (i = 0; i < filter->n_preds; i++) {
+		pred = filter->preds[i];
 		if (and_failed && !pred->or)
 			continue;
 		matched = pred->fn(pred, rec);
@@ -115,20 +116,20 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec)
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void __filter_print_preds(struct filter_pred **preds, int n_preds,
+static void __filter_print_preds(struct event_filter *filter,
 				 struct trace_seq *s)
 {
-	char *field_name;
 	struct filter_pred *pred;
+	char *field_name;
 	int i;
 
-	if (!n_preds) {
+	if (!filter || !filter->n_preds) {
 		trace_seq_printf(s, "none\n");
 		return;
 	}
 
-	for (i = 0; i < n_preds; i++) {
-		pred = preds[i];
+	for (i = 0; i < filter->n_preds; i++) {
+		pred = filter->preds[i];
 		field_name = pred->field_name;
 		if (i)
 			trace_seq_printf(s, pred->or ? "|| " : "&& ");
@@ -144,7 +145,7 @@ static void __filter_print_preds(struct filter_pred **preds, int n_preds,
 void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(call->preds, call->n_preds, s);
+	__filter_print_preds(call->filter, s);
 	mutex_unlock(&filter_mutex);
 }
 
@@ -152,7 +153,7 @@ void filter_print_subsystem_preds(struct event_subsystem *system,
 				  struct trace_seq *s)
 {
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(system->preds, system->n_preds, s);
+	__filter_print_preds(system->filter, s);
 	mutex_unlock(&filter_mutex);
 }
 
@@ -200,12 +201,14 @@ static int filter_set_pred(struct filter_pred *dest,
 
 static void __filter_disable_preds(struct ftrace_event_call *call)
 {
+	struct event_filter *filter = call->filter;
 	int i;
 
-	call->n_preds = 0;
+	call->filter_active = 0;
+	filter->n_preds = 0;
 
 	for (i = 0; i < MAX_FILTER_PRED; i++)
-		call->preds[i]->fn = filter_pred_none;
+		filter->preds[i]->fn = filter_pred_none;
 }
 
 void filter_disable_preds(struct ftrace_event_call *call)
@@ -217,32 +220,39 @@ void filter_disable_preds(struct ftrace_event_call *call)
 
 int init_preds(struct ftrace_event_call *call)
 {
+	struct event_filter *filter;
 	struct filter_pred *pred;
 	int i;
 
-	call->n_preds = 0;
-
-	call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
-	if (!call->preds)
+	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!call->filter)
 		return -ENOMEM;
 
+	call->filter_active = 0;
+	filter->n_preds = 0;
+
+	filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
+	if (!filter->preds)
+		goto oom;
+
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
 		pred = kzalloc(sizeof(*pred), GFP_KERNEL);
 		if (!pred)
 			goto oom;
 		pred->fn = filter_pred_none;
-		call->preds[i] = pred;
+		filter->preds[i] = pred;
 	}
 
 	return 0;
 
 oom:
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (call->preds[i])
-			filter_free_pred(call->preds[i]);
+		if (filter->preds[i])
+			filter_free_pred(filter->preds[i]);
 	}
-	kfree(call->preds);
-	call->preds = NULL;
+	kfree(filter->preds);
+	kfree(call->filter);
+	call->filter = NULL;
 
 	return -ENOMEM;
 }
@@ -250,15 +260,16 @@ EXPORT_SYMBOL_GPL(init_preds);
 
 static void __filter_free_subsystem_preds(struct event_subsystem *system)
 {
+	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 	int i;
 
-	if (system->n_preds) {
-		for (i = 0; i < system->n_preds; i++)
-			filter_free_pred(system->preds[i]);
-		kfree(system->preds);
-		system->preds = NULL;
-		system->n_preds = 0;
+	if (filter && filter->n_preds) {
+		for (i = 0; i < filter->n_preds; i++)
+			filter_free_pred(filter->preds[i]);
+		kfree(filter->preds);
+		kfree(filter);
+		system->filter = NULL;
 	}
 
 	list_for_each_entry(call, &ftrace_events, list) {
@@ -281,21 +292,23 @@ static int filter_add_pred_fn(struct ftrace_event_call *call,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
+	struct event_filter *filter = call->filter;
 	int idx, err;
 
-	if (call->n_preds && !pred->compound)
+	if (filter->n_preds && !pred->compound)
 		__filter_disable_preds(call);
 
-	if (call->n_preds == MAX_FILTER_PRED)
+	if (filter->n_preds == MAX_FILTER_PRED)
 		return -ENOSPC;
 
-	idx = call->n_preds;
-	filter_clear_pred(call->preds[idx]);
-	err = filter_set_pred(call->preds[idx], pred, fn);
+	idx = filter->n_preds;
+	filter_clear_pred(filter->preds[idx]);
+	err = filter_set_pred(filter->preds[idx], pred, fn);
 	if (err)
 		return err;
 
-	call->n_preds++;
+	filter->n_preds++;
+	call->filter_active = 1;
 
 	return 0;
 }
@@ -366,29 +379,41 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 int filter_add_subsystem_pred(struct event_subsystem *system,
 			      struct filter_pred *pred)
 {
+	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 
 	mutex_lock(&filter_mutex);
 
-	if (system->n_preds && !pred->compound)
+	if (filter && filter->n_preds && !pred->compound) {
 		__filter_free_subsystem_preds(system);
+		filter = NULL;
+	}
 
-	if (!system->n_preds) {
-		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+	if (!filter) {
+		system->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+		if (!system->filter) {
+			mutex_unlock(&filter_mutex);
+			return -ENOMEM;
+		}
+		filter = system->filter;
+		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
 					GFP_KERNEL);
-		if (!system->preds) {
+
+		if (!filter->preds) {
+			kfree(system->filter);
+			system->filter = NULL;
 			mutex_unlock(&filter_mutex);
 			return -ENOMEM;
 		}
 	}
 
-	if (system->n_preds == MAX_FILTER_PRED) {
+	if (filter->n_preds == MAX_FILTER_PRED) {
 		mutex_unlock(&filter_mutex);
 		return -ENOSPC;
 	}
 
-	system->preds[system->n_preds] = pred;
-	system->n_preds++;
+	filter->preds[filter->n_preds] = pred;
+	filter->n_preds++;
 
 	list_for_each_entry(call, &ftrace_events, list) {
 		int err;
@@ -401,8 +426,8 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 
 		err = __filter_add_pred(call, pred);
 		if (err == -ENOMEM) {
-			system->preds[system->n_preds] = NULL;
-			system->n_preds--;
+			filter->preds[filter->n_preds] = NULL;
+			filter->n_preds--;
 			mutex_unlock(&filter_mutex);
 			return err;
 		}
-- 
cgit v0.10.2


From a118e4d1402f1349fe3d953493e4168a300a752d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 28 Apr 2009 03:04:53 -0500
Subject: tracing/filters: distinguish between signed and unsigned fields

The new filter comparison ops need to be able to distinguish between
signed and unsigned field types, so add an is_signed flag/param to the
event field struct/trace_define_fields().  Also define a simple macro,
is_signed_type() to determine the signedness at compile time, used in the
trace macros.  If the is_signed_type() macro won't work with a specific
type, a new slightly modified version of TRACE_FIELD() called
TRACE_FIELD_SIGN(), allows the signedness to be set explicitly.

[ Impact: extend trace-filter code for new feature ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1240905893.6416.120.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 46a27f2..e61a740 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -122,8 +122,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call,
 					struct ring_buffer_event *event);
 
 extern int trace_define_field(struct ftrace_event_call *call, char *type,
-			      char *name, int offset, int size);
+			      char *name, int offset, int size, int is_signed);
 
+#define is_signed_type(type)	(((type)(-1)) < 0)
 
 /*
  * The double __builtin_constant_p is because gcc will give us an error
@@ -144,10 +145,10 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-#define __common_field(type, item)					\
+#define __common_field(type, item, is_signed)				\
 	ret = trace_define_field(event_call, #type, "common_" #item,	\
 				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
+				 sizeof(field.ent.item), is_signed);	\
 	if (ret)							\
 		return ret;
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1e68114..edb02bc 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -225,7 +225,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define __field(type, item)						\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), is_signed_type(type));	\
 	if (ret)							\
 		return ret;
 
@@ -234,7 +234,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), 0);		\
 	if (ret)							\
 		return ret;
 
@@ -242,7 +242,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define __string(item, src)						       \
 	ret = trace_define_field(event_call, "__str_loc", #item,	       \
 				offsetof(typeof(field), __str_loc_##item),     \
-				sizeof(field.__str_loc_##item));
+				 sizeof(field.__str_loc_##item), 0);
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
@@ -253,11 +253,11 @@ ftrace_define_fields_##call(void)					\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	int ret;							\
 									\
-	__common_field(int, type);					\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
+	__common_field(int, type, 1);					\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
 									\
 	tstruct;							\
 									\
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1fb7d6c..866d010 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -729,6 +729,7 @@ struct ftrace_event_field {
 	char			*type;
 	int			offset;
 	int			size;
+	int			is_signed;
 };
 
 struct event_filter {
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index cfcecc4..5e32e37 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -141,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
 
 TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
 	TRACE_STRUCT(
-		TRACE_FIELD(ktime_t, state_data.stamp, stamp)
-		TRACE_FIELD(ktime_t, state_data.end, end)
+		TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
+		TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
 		TRACE_FIELD(int, state_data.type, type)
 		TRACE_FIELD(int, state_data.state, state)
 	),
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1cd1f37..bbbea74 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -26,7 +26,7 @@ static DEFINE_MUTEX(event_mutex);
 LIST_HEAD(ftrace_events);
 
 int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size)
+		       char *name, int offset, int size, int is_signed)
 {
 	struct ftrace_event_field *field;
 
@@ -44,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
 
 	field->offset = offset;
 	field->size = size;
+	field->is_signed = is_signed;
 	list_add(&field->link, &call->fields);
 
 	return 0;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 0cb1a14..d06cf89 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -50,6 +50,9 @@ extern void __bad_type_size(void);
 	if (!ret)							\
 		return 0;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
 
 #undef TP_RAW_FMT
 #define TP_RAW_FMT(args...) args
@@ -98,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define TRACE_FIELD(type, item, assign)\
 	entry->item = assign;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
+
 #undef TP_CMD
 #define TP_CMD(cmd...)	cmd
 
@@ -149,7 +156,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD(type, item, assign)					\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), is_signed_type(type));	\
 	if (ret)							\
 		return ret;
 
@@ -157,7 +164,15 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD_SPECIAL(type, item, len, cmd)			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), 0);		\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)			\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed);	\
 	if (ret)							\
 		return ret;
 
@@ -173,11 +188,11 @@ ftrace_define_fields_##call(void)					\
 	struct args field;						\
 	int ret;							\
 									\
-	__common_field(unsigned char, type);				\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
+	__common_field(unsigned char, type, 0);				\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
 									\
 	tstruct;							\
 									\
-- 
cgit v0.10.2


From 8b3725621074040d380664964ffbc40610aef8c6 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 28 Apr 2009 03:04:59 -0500
Subject: tracing/filters: a better event parser

Replace the current event parser hack with a better one.  Filters are
no longer specified predicate by predicate, but all at once and can
use parens and any of the following operators:

numeric fields:

==, !=, <, <=, >, >=

string fields:

==, !=

predicates can be combined with the logical operators:

&&, ||

examples:

"common_preempt_count > 4" > filter

"((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter

If there was an error, the erroneous string along with an error
message can be seen by looking at the filter e.g.:

((sig >= 10 && sig < 15) || dsig == 17) && comm != bash
^
parse_error: Field not found

Currently the caret for an error always appears at the beginning of
the filter; a real position should be used, but the error message
should be useful even without it.

To clear a filter, '0' can be written to the filter file.

Filters can also be set or cleared for a complete subsystem by writing
the same filter as would be written to an individual event to the
filter file at the root of the subsytem.  Note however, that if any
event in the subsystem lacks a field specified in the filter being
set, the set will fail and all filters in the subsytem are
automatically cleared.  This change from the previous version was made
because using only the fields that happen to exist for a given event
would most likely result in a meaningless filter.

Because the logical operators are now implemented as predicates, the
maximum number of predicates in a filter was increased from 8 to 16.

[ Impact: add new, extended trace-filter implementation ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1240905899.6416.121.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index e61a740..5fff40c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -112,7 +112,7 @@ struct ftrace_event_call {
 #endif
 };
 
-#define MAX_FILTER_PRED		8
+#define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	128
 
 extern int init_preds(struct ftrace_event_call *call);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 866d010..7736fe8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -735,6 +735,7 @@ struct ftrace_event_field {
 struct event_filter {
 	int			n_preds;
 	struct filter_pred	**preds;
+	char			*filter_string;
 };
 
 struct event_subsystem {
@@ -746,7 +747,8 @@ struct event_subsystem {
 
 struct filter_pred;
 
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
+				 int val1, int val2);
 
 struct filter_pred {
 	filter_pred_fn_t fn;
@@ -756,23 +758,18 @@ struct filter_pred {
 	char *field_name;
 	int offset;
 	int not;
-	int or;
-	int compound;
-	int clear;
+	int op;
+	int pop_n;
 };
 
-extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct ftrace_event_call *call,
+extern void print_event_filter(struct ftrace_event_call *call,
 			       struct trace_seq *s);
-extern int filter_parse(char **pbuf, struct filter_pred *pred);
-extern int filter_add_pred(struct ftrace_event_call *call,
-			   struct filter_pred *pred);
-extern void filter_disable_preds(struct ftrace_event_call *call);
-extern void filter_free_subsystem_preds(struct event_subsystem *system);
-extern void filter_print_subsystem_preds(struct event_subsystem *system,
+extern int apply_event_filter(struct ftrace_event_call *call,
+			      char *filter_string);
+extern int apply_subsystem_event_filter(struct event_subsystem *system,
+					char *filter_string);
+extern void print_subsystem_event_filter(struct event_subsystem *system,
 					 struct trace_seq *s);
-extern int filter_add_subsystem_pred(struct event_subsystem *system,
-				     struct filter_pred *pred);
 
 static inline int
 filter_check_discard(struct ftrace_event_call *call, void *rec,
@@ -787,6 +784,47 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
+#define DEFINE_COMPARISON_PRED(type)					\
+static int filter_pred_##type(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	type *addr = (type *)(event + pred->offset);			\
+	type val = (type)pred->val;					\
+	int match = 0;							\
+									\
+	switch (pred->op) {						\
+	case OP_LT:							\
+		match = (*addr < val);					\
+		break;							\
+	case OP_LE:							\
+		match = (*addr <= val);					\
+		break;							\
+	case OP_GT:							\
+		match = (*addr > val);					\
+		break;							\
+	case OP_GE:							\
+		match = (*addr >= val);					\
+		break;							\
+	default:							\
+		break;							\
+	}								\
+									\
+	return match;							\
+}
+
+#define DEFINE_EQUALITY_PRED(size)					\
+static int filter_pred_##size(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	u##size *addr = (u##size *)(event + pred->offset);		\
+	u##size val = (u##size)pred->val;				\
+	int match;							\
+									\
+	match = (val == *addr) ^ pred->not;				\
+									\
+	return match;							\
+}
+
 extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index bbbea74..f789ca5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -492,7 +492,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_preds(call, s);
+	print_event_filter(call, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -505,40 +505,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
 	struct ftrace_event_call *call = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
-	}
-
-	if (pred->clear) {
-		filter_disable_preds(call);
-		filter_free_pred(pred);
-		return cnt;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	err = filter_add_pred(call, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
+	err = apply_event_filter(call, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
-
-	filter_free_pred(pred);
 
 	*ppos += cnt;
 
@@ -562,7 +548,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_subsystem_preds(system, s);
+	print_subsystem_event_filter(system, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -575,38 +561,26 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		       loff_t *ppos)
 {
 	struct event_subsystem *system = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
-	}
-
-	if (pred->clear) {
-		filter_free_subsystem_preds(system);
-		filter_free_pred(pred);
-		return cnt;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	err = filter_add_subsystem_pred(system, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
+	err = apply_subsystem_event_filter(system, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
 
 	*ppos += cnt;
 
@@ -760,11 +734,21 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
 	system->filter = NULL;
 
+	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+	if (!system->filter) {
+		pr_warning("Could not allocate filter for subsystem "
+			   "'%s'\n", name);
+		return system->entry;
+	}
+
 	entry = debugfs_create_file("filter", 0644, system->entry, system,
 				    &ftrace_subsystem_filter_fops);
-	if (!entry)
+	if (!entry) {
+		kfree(system->filter);
+		system->filter = NULL;
 		pr_warning("Could not create debugfs "
 			   "'%s/filter' entry\n", name);
+	}
 
 	return system->entry;
 }
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1e861ec..f494866 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -29,51 +29,130 @@
 
 static DEFINE_MUTEX(filter_mutex);
 
-static int filter_pred_64(struct filter_pred *pred, void *event)
+enum filter_op_ids
 {
-	u64 *addr = (u64 *)(event + pred->offset);
-	u64 val = (u64)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_32(struct filter_pred *pred, void *event)
-{
-	u32 *addr = (u32 *)(event + pred->offset);
-	u32 val = (u32)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_16(struct filter_pred *pred, void *event)
+	OP_OR,
+	OP_AND,
+	OP_NE,
+	OP_EQ,
+	OP_LT,
+	OP_LE,
+	OP_GT,
+	OP_GE,
+	OP_NONE,
+	OP_OPEN_PAREN,
+};
+
+struct filter_op {
+	int id;
+	char *string;
+	int precedence;
+};
+
+static struct filter_op filter_ops[] = {
+	{ OP_OR, "||", 1 },
+	{ OP_AND, "&&", 2 },
+	{ OP_NE, "!=", 4 },
+	{ OP_EQ, "==", 4 },
+	{ OP_LT, "<", 5 },
+	{ OP_LE, "<=", 5 },
+	{ OP_GT, ">", 5 },
+	{ OP_GE, ">=", 5 },
+	{ OP_NONE, "OP_NONE", 0 },
+	{ OP_OPEN_PAREN, "(", 0 },
+};
+
+enum {
+	FILT_ERR_NONE,
+	FILT_ERR_INVALID_OP,
+	FILT_ERR_UNBALANCED_PAREN,
+	FILT_ERR_TOO_MANY_OPERANDS,
+	FILT_ERR_OPERAND_TOO_LONG,
+	FILT_ERR_FIELD_NOT_FOUND,
+	FILT_ERR_ILLEGAL_FIELD_OP,
+	FILT_ERR_ILLEGAL_INTVAL,
+	FILT_ERR_BAD_SUBSYS_FILTER,
+	FILT_ERR_TOO_MANY_PREDS,
+	FILT_ERR_MISSING_FIELD,
+	FILT_ERR_INVALID_FILTER,
+};
+
+static char *err_text[] = {
+	"No error",
+	"Invalid operator",
+	"Unbalanced parens",
+	"Too many operands",
+	"Operand too long",
+	"Field not found",
+	"Illegal operation for field type",
+	"Illegal integer value",
+	"Couldn't find or set field in one of a subsystem's events",
+	"Too many terms in predicate expression",
+	"Missing field name and/or value",
+	"Meaningless filter expression",
+};
+
+struct opstack_op {
+	int op;
+	struct list_head list;
+};
+
+struct postfix_elt {
+	int op;
+	char *operand;
+	struct list_head list;
+};
+
+struct filter_parse_state {
+	struct filter_op *ops;
+	struct list_head opstack;
+	struct list_head postfix;
+	int lasterr;
+	int lasterr_pos;
+
+	struct {
+		char *string;
+		unsigned int cnt;
+		unsigned int tail;
+	} infix;
+
+	struct {
+		char string[MAX_FILTER_STR_VAL];
+		int pos;
+		unsigned int tail;
+	} operand;
+};
+
+DEFINE_COMPARISON_PRED(s64);
+DEFINE_COMPARISON_PRED(u64);
+DEFINE_COMPARISON_PRED(s32);
+DEFINE_COMPARISON_PRED(u32);
+DEFINE_COMPARISON_PRED(s16);
+DEFINE_COMPARISON_PRED(u16);
+DEFINE_COMPARISON_PRED(s8);
+DEFINE_COMPARISON_PRED(u8);
+
+DEFINE_EQUALITY_PRED(64);
+DEFINE_EQUALITY_PRED(32);
+DEFINE_EQUALITY_PRED(16);
+DEFINE_EQUALITY_PRED(8);
+
+static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
+			   void *event __attribute((unused)),
+			   int val1, int val2)
 {
-	u16 *addr = (u16 *)(event + pred->offset);
-	u16 val = (u16)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
+	return val1 && val2;
 }
 
-static int filter_pred_8(struct filter_pred *pred, void *event)
+static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
+			  void *event __attribute((unused)),
+			  int val1, int val2)
 {
-	u8 *addr = (u8 *)(event + pred->offset);
-	u8 val = (u8)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
+	return val1 || val2;
 }
 
-static int filter_pred_string(struct filter_pred *pred, void *event)
+static int filter_pred_string(struct filter_pred *pred, void *event,
+			      int val1, int val2)
 {
 	char *addr = (char *)(event + pred->offset);
 	int cmp, match;
@@ -85,7 +164,8 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
 	return match;
 }
 
-static int filter_pred_none(struct filter_pred *pred, void *event)
+static int filter_pred_none(struct filter_pred *pred, void *event,
+			    int val1, int val2)
 {
 	return 0;
 }
@@ -94,66 +174,119 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
 	struct event_filter *filter = call->filter;
-	int i, matched, and_failed = 0;
+	int match, top = 0, val1 = 0, val2 = 0;
+	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
+	int i;
 
 	for (i = 0; i < filter->n_preds; i++) {
 		pred = filter->preds[i];
-		if (and_failed && !pred->or)
+		if (!pred->pop_n) {
+			match = pred->fn(pred, rec, val1, val2);
+			stack[top++] = match;
 			continue;
-		matched = pred->fn(pred, rec);
-		if (!matched && !pred->or) {
-			and_failed = 1;
-			continue;
-		} else if (matched && pred->or)
-			return 1;
+		}
+		if (pred->pop_n > top) {
+			WARN_ON_ONCE(1);
+			return 0;
+		}
+		val1 = stack[--top];
+		val2 = stack[--top];
+		match = pred->fn(pred, rec, val1, val2);
+		stack[top++] = match;
 	}
 
-	if (and_failed)
-		return 0;
-
-	return 1;
+	return stack[--top];
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void __filter_print_preds(struct event_filter *filter,
-				 struct trace_seq *s)
+static void parse_error(struct filter_parse_state *ps, int err, int pos)
 {
-	struct filter_pred *pred;
-	char *field_name;
-	int i;
+	ps->lasterr = err;
+	ps->lasterr_pos = pos;
+}
 
-	if (!filter || !filter->n_preds) {
-		trace_seq_printf(s, "none\n");
+static void remove_filter_string(struct event_filter *filter)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = NULL;
+}
+
+static int replace_filter_string(struct event_filter *filter,
+				 char *filter_string)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+	if (!filter->filter_string)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int append_filter_string(struct event_filter *filter,
+				char *string)
+{
+	int newlen;
+	char *new_filter_string;
+
+	BUG_ON(!filter->filter_string);
+	newlen = strlen(filter->filter_string) + strlen(string) + 1;
+	new_filter_string = kmalloc(newlen, GFP_KERNEL);
+	if (!new_filter_string)
+		return -ENOMEM;
+
+	strcpy(new_filter_string, filter->filter_string);
+	strcat(new_filter_string, string);
+	kfree(filter->filter_string);
+	filter->filter_string = new_filter_string;
+
+	return 0;
+}
+
+static void append_filter_err(struct filter_parse_state *ps,
+			      struct event_filter *filter)
+{
+	int pos = ps->lasterr_pos;
+	char *buf, *pbuf;
+
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return;
-	}
 
-	for (i = 0; i < filter->n_preds; i++) {
-		pred = filter->preds[i];
-		field_name = pred->field_name;
-		if (i)
-			trace_seq_printf(s, pred->or ? "|| " : "&& ");
-		trace_seq_printf(s, "%s ", field_name);
-		trace_seq_printf(s, pred->not ? "!= " : "== ");
-		if (pred->str_len)
-			trace_seq_printf(s, "%s\n", pred->str_val);
-		else
-			trace_seq_printf(s, "%llu\n", pred->val);
-	}
+	append_filter_string(filter, "\n");
+	memset(buf, ' ', PAGE_SIZE);
+	if (pos > PAGE_SIZE - 128)
+		pos = 0;
+	buf[pos] = '^';
+	pbuf = &buf[pos] + 1;
+
+	sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
+	append_filter_string(filter, buf);
+	free_page((unsigned long) buf);
 }
 
-void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s)
+void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
+	struct event_filter *filter = call->filter;
+
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(call->filter, s);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
 	mutex_unlock(&filter_mutex);
 }
 
-void filter_print_subsystem_preds(struct event_subsystem *system,
+void print_subsystem_event_filter(struct event_subsystem *system,
 				  struct trace_seq *s)
 {
+	struct event_filter *filter = system->filter;
+
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(system->filter, s);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
 	mutex_unlock(&filter_mutex);
 }
 
@@ -170,7 +303,7 @@ find_event_field(struct ftrace_event_call *call, char *name)
 	return NULL;
 }
 
-void filter_free_pred(struct filter_pred *pred)
+static void filter_free_pred(struct filter_pred *pred)
 {
 	if (!pred)
 		return;
@@ -191,15 +324,17 @@ static int filter_set_pred(struct filter_pred *dest,
 			   filter_pred_fn_t fn)
 {
 	*dest = *src;
-	dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
-	if (!dest->field_name)
-		return -ENOMEM;
+	if (src->field_name) {
+		dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
+		if (!dest->field_name)
+			return -ENOMEM;
+	}
 	dest->fn = fn;
 
 	return 0;
 }
 
-static void __filter_disable_preds(struct ftrace_event_call *call)
+static void filter_disable_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter = call->filter;
 	int i;
@@ -211,13 +346,6 @@ static void __filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
-void filter_disable_preds(struct ftrace_event_call *call)
-{
-	mutex_lock(&filter_mutex);
-	__filter_disable_preds(call);
-	mutex_unlock(&filter_mutex);
-}
-
 int init_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter;
@@ -258,48 +386,43 @@ oom:
 }
 EXPORT_SYMBOL_GPL(init_preds);
 
-static void __filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
 	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 	int i;
 
-	if (filter && filter->n_preds) {
+	if (filter->n_preds) {
 		for (i = 0; i < filter->n_preds; i++)
 			filter_free_pred(filter->preds[i]);
 		kfree(filter->preds);
-		kfree(filter);
-		system->filter = NULL;
+		filter->preds = NULL;
+		filter->n_preds = 0;
 	}
 
 	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->define_fields)
 			continue;
 
-		if (!strcmp(call->system, system->name))
-			__filter_disable_preds(call);
+		if (!strcmp(call->system, system->name)) {
+			filter_disable_preds(call);
+			remove_filter_string(call->filter);
+		}
 	}
 }
 
-void filter_free_subsystem_preds(struct event_subsystem *system)
-{
-	mutex_lock(&filter_mutex);
-	__filter_free_subsystem_preds(system);
-	mutex_unlock(&filter_mutex);
-}
-
-static int filter_add_pred_fn(struct ftrace_event_call *call,
+static int filter_add_pred_fn(struct filter_parse_state *ps,
+			      struct ftrace_event_call *call,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
 	struct event_filter *filter = call->filter;
 	int idx, err;
 
-	if (filter->n_preds && !pred->compound)
-		__filter_disable_preds(call);
-
-	if (filter->n_preds == MAX_FILTER_PRED)
+	if (filter->n_preds == MAX_FILTER_PRED) {
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
 		return -ENOSPC;
+	}
 
 	idx = filter->n_preds;
 	filter_clear_pred(filter->preds[idx]);
@@ -321,94 +444,132 @@ static int is_string_field(const char *type)
 	return 0;
 }
 
-static int __filter_add_pred(struct ftrace_event_call *call,
-			     struct filter_pred *pred)
+static int is_legal_op(struct ftrace_event_field *field, int op)
+{
+	if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
+		return 0;
+
+	return 1;
+}
+
+static filter_pred_fn_t select_comparison_fn(int op, int field_size,
+					     int field_is_signed)
+{
+	filter_pred_fn_t fn = NULL;
+
+	switch (field_size) {
+	case 8:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_64;
+		else if (field_is_signed)
+			fn = filter_pred_s64;
+		else
+			fn = filter_pred_u64;
+		break;
+	case 4:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_32;
+		else if (field_is_signed)
+			fn = filter_pred_s32;
+		else
+			fn = filter_pred_u32;
+		break;
+	case 2:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_16;
+		else if (field_is_signed)
+			fn = filter_pred_s16;
+		else
+			fn = filter_pred_u16;
+		break;
+	case 1:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_8;
+		else if (field_is_signed)
+			fn = filter_pred_s8;
+		else
+			fn = filter_pred_u8;
+		break;
+	}
+
+	return fn;
+}
+
+static int filter_add_pred(struct filter_parse_state *ps,
+			   struct ftrace_event_call *call,
+			   struct filter_pred *pred)
 {
 	struct ftrace_event_field *field;
 	filter_pred_fn_t fn;
 	unsigned long long val;
 
+	pred->fn = filter_pred_none;
+
+	if (pred->op == OP_AND) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_and);
+	} else if (pred->op == OP_OR) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_or);
+	}
+
 	field = find_event_field(call, pred->field_name);
-	if (!field)
+	if (!field) {
+		parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
 		return -EINVAL;
+	}
 
-	pred->fn = filter_pred_none;
 	pred->offset = field->offset;
 
+	if (!is_legal_op(field, pred->op)) {
+		parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
+		return -EINVAL;
+	}
+
 	if (is_string_field(field->type)) {
 		fn = filter_pred_string;
 		pred->str_len = field->size;
-		return filter_add_pred_fn(call, pred, fn);
+		if (pred->op == OP_NE)
+			pred->not = 1;
+		return filter_add_pred_fn(ps, call, pred, fn);
 	} else {
-		if (strict_strtoull(pred->str_val, 0, &val))
+		if (strict_strtoull(pred->str_val, 0, &val)) {
+			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
 			return -EINVAL;
+		}
 		pred->val = val;
 	}
 
-	switch (field->size) {
-	case 8:
-		fn = filter_pred_64;
-		break;
-	case 4:
-		fn = filter_pred_32;
-		break;
-	case 2:
-		fn = filter_pred_16;
-		break;
-	case 1:
-		fn = filter_pred_8;
-		break;
-	default:
+	fn = select_comparison_fn(pred->op, field->size, field->is_signed);
+	if (!fn) {
+		parse_error(ps, FILT_ERR_INVALID_OP, 0);
 		return -EINVAL;
 	}
 
-	return filter_add_pred_fn(call, pred, fn);
-}
-
-int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
-{
-	int err;
-
-	mutex_lock(&filter_mutex);
-	err = __filter_add_pred(call, pred);
-	mutex_unlock(&filter_mutex);
+	if (pred->op == OP_NE)
+		pred->not = 1;
 
-	return err;
+	return filter_add_pred_fn(ps, call, pred, fn);
 }
 
-int filter_add_subsystem_pred(struct event_subsystem *system,
-			      struct filter_pred *pred)
+static int filter_add_subsystem_pred(struct filter_parse_state *ps,
+				     struct event_subsystem *system,
+				     struct filter_pred *pred,
+				     char *filter_string)
 {
 	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 
-	mutex_lock(&filter_mutex);
-
-	if (filter && filter->n_preds && !pred->compound) {
-		__filter_free_subsystem_preds(system);
-		filter = NULL;
-	}
-
-	if (!filter) {
-		system->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-		if (!system->filter) {
-			mutex_unlock(&filter_mutex);
-			return -ENOMEM;
-		}
-		filter = system->filter;
+	if (!filter->preds) {
 		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
 					GFP_KERNEL);
 
-		if (!filter->preds) {
-			kfree(system->filter);
-			system->filter = NULL;
-			mutex_unlock(&filter_mutex);
+		if (!filter->preds)
 			return -ENOMEM;
-		}
 	}
 
 	if (filter->n_preds == MAX_FILTER_PRED) {
-		mutex_unlock(&filter_mutex);
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
 		return -ENOSPC;
 	}
 
@@ -424,97 +585,508 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 		if (strcmp(call->system, system->name))
 			continue;
 
-		err = __filter_add_pred(call, pred);
-		if (err == -ENOMEM) {
-			filter->preds[filter->n_preds] = NULL;
-			filter->n_preds--;
-			mutex_unlock(&filter_mutex);
+		err = filter_add_pred(ps, call, pred);
+		if (err) {
+			filter_free_subsystem_preds(system);
+			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
 			return err;
 		}
+		replace_filter_string(call->filter, filter_string);
 	}
 
-	mutex_unlock(&filter_mutex);
+	return 0;
+}
+
+static void parse_init(struct filter_parse_state *ps,
+		       struct filter_op *ops,
+		       char *infix_string)
+{
+	memset(ps, '\0', sizeof(*ps));
+
+	ps->infix.string = infix_string;
+	ps->infix.cnt = strlen(infix_string);
+	ps->ops = ops;
+
+	INIT_LIST_HEAD(&ps->opstack);
+	INIT_LIST_HEAD(&ps->postfix);
+}
+
+static char infix_next(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+
+	return ps->infix.string[ps->infix.tail++];
+}
+
+static char infix_peek(struct filter_parse_state *ps)
+{
+	if (ps->infix.tail == strlen(ps->infix.string))
+		return 0;
+
+	return ps->infix.string[ps->infix.tail];
+}
+
+static void infix_advance(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+	ps->infix.tail++;
+}
+
+static inline int is_precedence_lower(struct filter_parse_state *ps,
+				      int a, int b)
+{
+	return ps->ops[a].precedence < ps->ops[b].precedence;
+}
+
+static inline int is_op_char(struct filter_parse_state *ps, char c)
+{
+	int i;
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (ps->ops[i].string[0] == c)
+			return 1;
+	}
 
 	return 0;
 }
 
-/*
- * The filter format can be
- *   - 0, which means remove all filter preds
- *   - [||/&&] <field> ==/!= <val>
- */
-int filter_parse(char **pbuf, struct filter_pred *pred)
-{
-	char *tok, *val_str = NULL;
-	int tok_n = 0;
-
-	while ((tok = strsep(pbuf, " \n"))) {
-		if (tok_n == 0) {
-			if (!strcmp(tok, "0")) {
-				pred->clear = 1;
-				return 0;
-			} else if (!strcmp(tok, "&&")) {
-				pred->or = 0;
-				pred->compound = 1;
-			} else if (!strcmp(tok, "||")) {
-				pred->or = 1;
-				pred->compound = 1;
-			} else
-				pred->field_name = tok;
-			tok_n = 1;
-			continue;
+static int infix_get_op(struct filter_parse_state *ps, char firstc)
+{
+	char nextc = infix_peek(ps);
+	char opstr[3];
+	int i;
+
+	opstr[0] = firstc;
+	opstr[1] = nextc;
+	opstr[2] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string)) {
+			infix_advance(ps);
+			return ps->ops[i].id;
 		}
-		if (tok_n == 1) {
-			if (!pred->field_name)
-				pred->field_name = tok;
-			else if (!strcmp(tok, "!="))
-				pred->not = 1;
-			else if (!strcmp(tok, "=="))
-				pred->not = 0;
-			else {
-				pred->field_name = NULL;
+	}
+
+	opstr[1] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string))
+			return ps->ops[i].id;
+	}
+
+	return OP_NONE;
+}
+
+static inline void clear_operand_string(struct filter_parse_state *ps)
+{
+	memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
+	ps->operand.tail = 0;
+}
+
+static inline int append_operand_char(struct filter_parse_state *ps, char c)
+{
+	if (ps->operand.tail == MAX_FILTER_STR_VAL)
+		return -EINVAL;
+
+	ps->operand.string[ps->operand.tail++] = c;
+
+	return 0;
+}
+
+static int filter_opstack_push(struct filter_parse_state *ps, int op)
+{
+	struct opstack_op *opstack_op;
+
+	opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
+	if (!opstack_op)
+		return -ENOMEM;
+
+	opstack_op->op = op;
+	list_add(&opstack_op->list, &ps->opstack);
+
+	return 0;
+}
+
+static int filter_opstack_empty(struct filter_parse_state *ps)
+{
+	return list_empty(&ps->opstack);
+}
+
+static int filter_opstack_top(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+
+	return opstack_op->op;
+}
+
+static int filter_opstack_pop(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+	int op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+	op = opstack_op->op;
+	list_del(&opstack_op->list);
+
+	kfree(opstack_op);
+
+	return op;
+}
+
+static void filter_opstack_clear(struct filter_parse_state *ps)
+{
+	while (!filter_opstack_empty(ps))
+		filter_opstack_pop(ps);
+}
+
+static char *curr_operand(struct filter_parse_state *ps)
+{
+	return ps->operand.string;
+}
+
+static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = OP_NONE;
+	elt->operand = kstrdup(operand, GFP_KERNEL);
+	if (!elt->operand) {
+		kfree(elt);
+		return -ENOMEM;
+	}
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static int postfix_append_op(struct filter_parse_state *ps, int op)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = op;
+	elt->operand = NULL;
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static void postfix_clear(struct filter_parse_state *ps)
+{
+	struct postfix_elt *elt;
+
+	while (!list_empty(&ps->postfix)) {
+		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
+		kfree(elt->operand);
+		list_del(&elt->list);
+	}
+}
+
+static int filter_parse(struct filter_parse_state *ps)
+{
+	int op, top_op;
+	char ch;
+
+	while ((ch = infix_next(ps))) {
+		if (isspace(ch))
+			continue;
+
+		if (is_op_char(ps, ch)) {
+			op = infix_get_op(ps, ch);
+			if (op == OP_NONE) {
+				parse_error(ps, FILT_ERR_INVALID_OP, 0);
 				return -EINVAL;
 			}
-			tok_n = 2;
+
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			while (!filter_opstack_empty(ps)) {
+				top_op = filter_opstack_top(ps);
+				if (!is_precedence_lower(ps, top_op, op)) {
+					top_op = filter_opstack_pop(ps);
+					postfix_append_op(ps, top_op);
+					continue;
+				}
+				break;
+			}
+
+			filter_opstack_push(ps, op);
 			continue;
 		}
-		if (tok_n == 2) {
-			if (pred->compound) {
-				if (!strcmp(tok, "!="))
-					pred->not = 1;
-				else if (!strcmp(tok, "=="))
-					pred->not = 0;
-				else {
-					pred->field_name = NULL;
-					return -EINVAL;
-				}
-			} else {
-				val_str = tok;
-				break; /* done */
+
+		if (ch == '(') {
+			filter_opstack_push(ps, OP_OPEN_PAREN);
+			continue;
+		}
+
+		if (ch == ')') {
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			top_op = filter_opstack_pop(ps);
+			while (top_op != OP_NONE) {
+				if (top_op == OP_OPEN_PAREN)
+					break;
+				postfix_append_op(ps, top_op);
+				top_op = filter_opstack_pop(ps);
+			}
+			if (top_op == OP_NONE) {
+				parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+				return -EINVAL;
 			}
-			tok_n = 3;
 			continue;
 		}
-		if (tok_n == 3) {
-			val_str = tok;
-			break; /* done */
+		if (append_operand_char(ps, ch)) {
+			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
+			return -EINVAL;
+		}
+	}
+
+	if (strlen(curr_operand(ps)))
+		postfix_append_operand(ps, curr_operand(ps));
+
+	while (!filter_opstack_empty(ps)) {
+		top_op = filter_opstack_pop(ps);
+		if (top_op == OP_NONE)
+			break;
+		if (top_op == OP_OPEN_PAREN) {
+			parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+			return -EINVAL;
+		}
+		postfix_append_op(ps, top_op);
+	}
+
+	return 0;
+}
+
+static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->field_name = kstrdup(operand1, GFP_KERNEL);
+	if (!pred->field_name) {
+		kfree(pred);
+		return NULL;
+	}
+
+	strcpy(pred->str_val, operand2);
+	pred->str_len = strlen(operand2);
+
+	pred->op = op;
+
+	return pred;
+}
+
+static struct filter_pred *create_logical_pred(int op)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->op = op;
+
+	return pred;
+}
+
+static int check_preds(struct filter_parse_state *ps)
+{
+	int n_normal_preds = 0, n_logical_preds = 0;
+	struct postfix_elt *elt;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE)
+			continue;
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			n_logical_preds++;
+			continue;
 		}
+		n_normal_preds++;
 	}
 
-	if (!val_str || !strlen(val_str)
-	    || strlen(val_str) >= MAX_FILTER_STR_VAL) {
-		pred->field_name = NULL;
+	if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
+		parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
 		return -EINVAL;
 	}
 
-	strcpy(pred->str_val, val_str);
-	pred->str_len = strlen(val_str);
+	return 0;
+}
 
-	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-	if (!pred->field_name)
-		return -ENOMEM;
+static int replace_preds(struct event_subsystem *system,
+			 struct ftrace_event_call *call,
+			 struct filter_parse_state *ps,
+			 char *filter_string)
+{
+	char *operand1 = NULL, *operand2 = NULL;
+	struct filter_pred *pred;
+	struct postfix_elt *elt;
+	int err;
+
+	err = check_preds(ps);
+	if (err)
+		return err;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE) {
+			if (!operand1)
+				operand1 = elt->operand;
+			else if (!operand2)
+				operand2 = elt->operand;
+			else {
+				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
+				return -EINVAL;
+			}
+			continue;
+		}
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			pred = create_logical_pred(elt->op);
+			if (call) {
+				err = filter_add_pred(ps, call, pred);
+				filter_free_pred(pred);
+			} else
+				err = filter_add_subsystem_pred(ps, system,
+							pred, filter_string);
+			if (err)
+				return err;
+
+			operand1 = operand2 = NULL;
+			continue;
+		}
+
+		if (!operand1 || !operand2) {
+			parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
+			return -EINVAL;
+		}
+
+		pred = create_pred(elt->op, operand1, operand2);
+		if (call) {
+			err = filter_add_pred(ps, call, pred);
+			filter_free_pred(pred);
+		} else
+			err = filter_add_subsystem_pred(ps, system, pred,
+							filter_string);
+		if (err)
+			return err;
+
+		operand1 = operand2 = NULL;
+	}
 
 	return 0;
 }
 
+int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_disable_preds(call);
+		remove_filter_string(call->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		return -ENOMEM;
+
+	filter_disable_preds(call);
+	replace_filter_string(call->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, call->filter);
+		goto out;
+	}
+
+	err = replace_preds(NULL, call, ps, filter_string);
+	if (err)
+		append_filter_err(ps, call->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
+
+int apply_subsystem_event_filter(struct event_subsystem *system,
+				 char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_free_subsystem_preds(system);
+		remove_filter_string(system->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		return -ENOMEM;
+
+	filter_free_subsystem_preds(system);
+	replace_filter_string(system->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, system->filter);
+		goto out;
+	}
+
+	err = replace_preds(system, NULL, ps, filter_string);
+	if (err)
+		append_filter_err(ps, system->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
 
-- 
cgit v0.10.2


From a0e39ed378fb6ba916522764cd508fa7d42ad495 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 29 Apr 2009 13:51:39 +0200
Subject: tracing: fix build failure on s390

"tracing: create automated trace defines" causes this compile error on s390,
as reported by Sachin Sant against linux-next:

 kernel/built-in.o: In function `__do_softirq':
 (.text+0x1c680): undefined reference to `__tracepoint_softirq_entry'

This happens because the definitions of the softirq tracepoints were moved
from kernel/softirq.c to kernel/irq/handle.c. Since s390 doesn't support
generic hardirqs handle.c doesn't get compiled and the definitions are
missing.

So move the tracepoints to softirq.c again.

[ Impact: fix build failure on s390 ]

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
LKML-Reference: <20090429135139.5fac79b8@osiris.boeblingen.de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 37c6363..e68bb5a 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,8 +18,6 @@
 #include <linux/rculist.h>
 #include <linux/hash.h>
 #include <linux/bootmem.h>
-
-#define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
 
 #include "internals.h"
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7ab9dfd..d4ba347 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,6 +24,8 @@
 #include <linux/ftrace.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
+
+#define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
 
 #include <asm/irq.h>
-- 
cgit v0.10.2


From 50fa610a3b6ba7cf91d7a92229177dfaff2b81a1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 28 Apr 2009 15:01:38 +0100
Subject: sched: Document memory barriers implied by sleep/wake-up primitives

Add a section to the memory barriers document to note the implied
memory barriers of sleep primitives (set_current_state() and wrappers)
and wake-up primitives (wake_up() and co.).

Also extend the in-code comments on the wake_up() functions to note
these implied barriers.

[ Impact: add documentation ]

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090428140138.1192.94723.stgit@warthog.procyon.org.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f5b7127..7f5809e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -31,6 +31,7 @@ Contents:
 
      - Locking functions.
      - Interrupt disabling functions.
+     - Sleep and wake-up functions.
      - Miscellaneous functions.
 
  (*) Inter-CPU locking barrier effects.
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some
 other means.
 
 
+SLEEP AND WAKE-UP FUNCTIONS
+---------------------------
+
+Sleeping and waking on an event flagged in global data can be viewed as an
+interaction between two pieces of data: the task state of the task waiting for
+the event and the global data used to indicate the event.  To make sure that
+these appear to happen in the right order, the primitives to begin the process
+of going to sleep, and the primitives to initiate a wake up imply certain
+barriers.
+
+Firstly, the sleeper normally follows something like this sequence of events:
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (event_indicated)
+			break;
+		schedule();
+	}
+
+A general memory barrier is interpolated automatically by set_current_state()
+after it has altered the task state:
+
+	CPU 1
+	===============================
+	set_current_state();
+	  set_mb();
+	    STORE current->state
+	    <general barrier>
+	LOAD event_indicated
+
+set_current_state() may be wrapped by:
+
+	prepare_to_wait();
+	prepare_to_wait_exclusive();
+
+which therefore also imply a general memory barrier after setting the state.
+The whole sequence above is available in various canned forms, all of which
+interpolate the memory barrier in the right place:
+
+	wait_event();
+	wait_event_interruptible();
+	wait_event_interruptible_exclusive();
+	wait_event_interruptible_timeout();
+	wait_event_killable();
+	wait_event_timeout();
+	wait_on_bit();
+	wait_on_bit_lock();
+
+
+Secondly, code that performs a wake up normally follows something like this:
+
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+or:
+
+	event_indicated = 1;
+	wake_up_process(event_daemon);
+
+A write memory barrier is implied by wake_up() and co. if and only if they wake
+something up.  The barrier occurs before the task state is cleared, and so sits
+between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	set_current_state();		STORE event_indicated
+	  set_mb();			wake_up();
+	    STORE current->state	  <write barrier>
+	    <general barrier>		  STORE current->state
+	LOAD event_indicated
+
+The available waker functions include:
+
+	complete();
+	wake_up();
+	wake_up_all();
+	wake_up_bit();
+	wake_up_interruptible();
+	wake_up_interruptible_all();
+	wake_up_interruptible_nr();
+	wake_up_interruptible_poll();
+	wake_up_interruptible_sync();
+	wake_up_interruptible_sync_poll();
+	wake_up_locked();
+	wake_up_locked_poll();
+	wake_up_nr();
+	wake_up_poll();
+	wake_up_process();
+
+
+[!] Note that the memory barriers implied by the sleeper and the waker do _not_
+order multiple stores before the wake-up with respect to loads of those stored
+values after the sleeper has called set_current_state().  For instance, if the
+sleeper does:
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (event_indicated)
+		break;
+	__set_current_state(TASK_RUNNING);
+	do_something(my_data);
+
+and the waker does:
+
+	my_data = value;
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+there's no guarantee that the change to event_indicated will be perceived by
+the sleeper as coming after the change to my_data.  In such a circumstance, the
+code on both sides must interpolate its own memory barriers between the
+separate data accesses.  Thus the above sleeper ought to do:
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (event_indicated) {
+		smp_rmb();
+		do_something(my_data);
+	}
+
+and the waker should do:
+
+	my_data = value;
+	smp_wmb();
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+
 MISCELLANEOUS FUNCTIONS
 -----------------------
 
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED?
 
 Under normal operation, memory operation reordering is generally not going to
 be a problem as a single-threaded linear piece of code will still appear to
-work correctly, even if it's in an SMP kernel.  There are, however, three
+work correctly, even if it's in an SMP kernel.  There are, however, four
 circumstances in which reordering definitely _could_ be a problem:
 
  (*) Interprocessor interaction.
diff --git a/kernel/sched.c b/kernel/sched.c
index b902e58..fd0c2ce 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2458,6 +2458,17 @@ out:
 	return success;
 }
 
+/**
+ * wake_up_process - Wake up a specific process
+ * @p: The process to be woken up.
+ *
+ * Attempt to wake up the nominated process and move it to the set of runnable
+ * processes.  Returns 1 if the process was woken up, 0 if it was already
+ * running.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
 int wake_up_process(struct task_struct *p)
 {
 	return try_to_wake_up(p, TASK_ALL, 0);
@@ -5241,6 +5252,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void __wake_up(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, void *key)
@@ -5279,6 +5293,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
  * with each other. This can prevent needless bouncing between CPUs.
  *
  * On UP it can prevent extra preemption.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, void *key)
@@ -5315,6 +5332,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * awakened in the same order in which they were queued.
  *
  * See also complete_all(), wait_for_completion() and related routines.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void complete(struct completion *x)
 {
@@ -5332,6 +5352,9 @@ EXPORT_SYMBOL(complete);
  * @x:  holds the state of this particular completion
  *
  * This will wake up all threads waiting on this particular completion event.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void complete_all(struct completion *x)
 {
-- 
cgit v0.10.2


From da1a776be1ac7f78bb30ececbec4c1383163b079 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:46:58 +0200
Subject: perf_counter, x86: remove X86_FEATURE_ARCH_PERFMON flag for AMD cpus

X86_FEATURE_ARCH_PERFMON is an Intel hardware feature that does not
work on AMD CPUs. The flag is now only used in Intel specific code
(especially initialization).

[ Impact: refactor code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1241002046-8832-2-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd69c51..7e4a459 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -420,10 +420,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 >= 6)
 		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
-	/* Enable Performance counter for K7 and later */
-	if (c->x86 > 6 && c->x86 <= 0x11)
-		set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
-
 	if (!c->x86_model_id[0]) {
 		switch (c->x86) {
 		case 0xf:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 0fcbaab..7d0f81d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -949,6 +949,9 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 	unsigned int unused;
 	unsigned int ebx;
 
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+		return NULL;
+
 	/*
 	 * Check whether the Architectural PerfMon supports
 	 * Branch Misses Retired Event or not.
@@ -987,9 +990,6 @@ static struct pmc_x86_ops *pmc_amd_init(void)
 
 void __init init_hw_perf_counters(void)
 {
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-		return;
-
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_INTEL:
 		pmc_ops = pmc_intel_init();
-- 
cgit v0.10.2


From 829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:46:59 +0200
Subject: perf_counter, x86: declare perf_max_counters only for
 CONFIG_PERF_COUNTERS

This is only needed for CONFIG_PERF_COUNTERS enabled.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1241002046-8832-3-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9814328..be10b3f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -512,12 +512,13 @@ struct perf_cpu_context {
 	int			recursion[4];
 };
 
+#ifdef CONFIG_PERF_COUNTERS
+
 /*
  * Set by architecture code:
  */
 extern int perf_max_counters;
 
-#ifdef CONFIG_PERF_COUNTERS
 extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
-- 
cgit v0.10.2


From 4138960a9251a265002b5cf07e671a49f8495381 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:00 +0200
Subject: perf_counter, x86: add default path to cpu detection

This quits hw counter initialization immediately if no cpu is
detected.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-4-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7d0f81d..d6d6529 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -997,6 +997,8 @@ void __init init_hw_perf_counters(void)
 	case X86_VENDOR_AMD:
 		pmc_ops = pmc_amd_init();
 		break;
+	default:
+		return;
 	}
 	if (!pmc_ops)
 		return;
-- 
cgit v0.10.2


From 4295ee62660b13ddb87d41539f49b239e6e7d56f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:01 +0200
Subject: perf_counter, x86: rework pmc_amd_save_disable_all() and
 pmc_amd_restore_all()

MSR reads and writes are expensive. This patch adds checks to avoid
its usage where possible.

[ Impact: micro-optimization on AMD CPUs ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-5-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d6d6529..75a0903 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -334,11 +334,13 @@ static u64 pmc_amd_save_disable_all(void)
 	for (idx = 0; idx < nr_counters_generic; idx++) {
 		u64 val;
 
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
 		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
-			val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		}
+		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+			continue;
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
 	}
 
 	return enabled;
@@ -372,13 +374,15 @@ static void pmc_amd_restore_all(u64 ctrl)
 		return;
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-		if (test_bit(idx, cpuc->active_mask)) {
-			u64 val;
+		u64 val;
 
-			rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		}
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+			continue;
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
 	}
 }
 
-- 
cgit v0.10.2


From 527e26af3741a2168986d8b82653ffe173891324 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:02 +0200
Subject: perf_counter, x86: protect per-cpu variables with compile barriers
 only

Per-cpu variables needn't to be protected with cpu barriers
(smp_wmb()). Protection is only needed for preemption on the same cpu
(rescheduling or the nmi handler). This can be done using a compiler
barrier only.

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-6-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 75a0903..ad663d5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -673,7 +673,7 @@ try_generic:
 	/*
 	 * Make it visible before enabling the hw:
 	 */
-	smp_wmb();
+	barrier();
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
 	__pmc_generic_enable(counter, hwc, idx);
@@ -745,7 +745,7 @@ static void pmc_generic_disable(struct perf_counter *counter)
 	 * Make sure the cleared pointer becomes visible before we
 	 * (potentially) free the counter:
 	 */
-	smp_wmb();
+	barrier();
 
 	/*
 	 * Drain the remaining delta count out of a counter
-- 
cgit v0.10.2


From 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:03 +0200
Subject: perfcounters: rename struct hw_perf_counter_ops into struct pmu

This patch renames struct hw_perf_counter_ops into struct pmu. It
introduces a structure to describe a cpu specific pmu (performance
monitoring unit). It may contain ops and data. The new name of the
structure fits better, is shorter, and thus better to handle. Where it
was appropriate, names of function and variable have been changed too.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-7-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bd76d0f..d9bbe5e 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
 	return 0;
 }
 
-static void power_perf_read(struct perf_counter *counter)
+static void power_pmu_read(struct perf_counter *counter)
 {
 	long val, delta, prev;
 
@@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable)
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
 		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-			power_perf_read(counter);
+			power_pmu_read(counter);
 			write_pmc(counter->hw.idx, 0);
 			counter->hw.idx = 0;
 		}
@@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 	counter->oncpu = cpu;
 	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
 	if (is_software_counter(counter))
-		counter->hw_ops->enable(counter);
+		counter->pmu->enable(counter);
 }
 
 /*
@@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
  * re-enable the PMU in order to get hw_perf_restore to do the
  * actual work of reconfiguring the PMU.
  */
-static int power_perf_enable(struct perf_counter *counter)
+static int power_pmu_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	unsigned long flags;
@@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter)
 /*
  * Remove a counter from the PMU.
  */
-static void power_perf_disable(struct perf_counter *counter)
+static void power_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	long i;
@@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter)
 	local_irq_save(flags);
 	pmudis = hw_perf_save_disable();
 
-	power_perf_read(counter);
+	power_pmu_read(counter);
 
 	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	for (i = 0; i < cpuhw->n_counters; ++i) {
@@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter)
 	local_irq_restore(flags);
 }
 
-struct hw_perf_counter_ops power_perf_ops = {
-	.enable = power_perf_enable,
-	.disable = power_perf_disable,
-	.read = power_perf_read
+struct pmu power_pmu = {
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
 };
 
 /* Number of perf_counters counting hardware events */
@@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
 	}
 }
 
-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	unsigned long ev;
 	struct perf_counter *ctrs[MAX_HWCOUNTERS];
@@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 
 	if (err)
 		return ERR_PTR(err);
-	return &power_perf_ops;
+	return &power_pmu;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ad663d5..95de980 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -515,8 +515,8 @@ __pmc_fixed_disable(struct perf_counter *counter,
 }
 
 static inline void
-__pmc_generic_disable(struct perf_counter *counter,
-			   struct hw_perf_counter *hwc, unsigned int idx)
+__x86_pmu_disable(struct perf_counter *counter,
+		  struct hw_perf_counter *hwc, unsigned int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_disable(counter, hwc, idx);
@@ -591,8 +591,8 @@ __pmc_fixed_enable(struct perf_counter *counter,
 }
 
 static void
-__pmc_generic_enable(struct perf_counter *counter,
-			  struct hw_perf_counter *hwc, int idx)
+__x86_pmu_enable(struct perf_counter *counter,
+		 struct hw_perf_counter *hwc, int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_enable(counter, hwc, idx);
@@ -626,7 +626,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
-static int pmc_generic_enable(struct perf_counter *counter)
+static int x86_pmu_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -667,7 +667,7 @@ try_generic:
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__pmc_generic_disable(counter, hwc, idx);
+	__x86_pmu_disable(counter, hwc, idx);
 
 	cpuc->counters[idx] = counter;
 	/*
@@ -676,7 +676,7 @@ try_generic:
 	barrier();
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
-	__pmc_generic_enable(counter, hwc, idx);
+	__x86_pmu_enable(counter, hwc, idx);
 
 	return 0;
 }
@@ -731,13 +731,13 @@ void perf_counter_print_debug(void)
 	local_irq_enable();
 }
 
-static void pmc_generic_disable(struct perf_counter *counter)
+static void x86_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__pmc_generic_disable(counter, hwc, idx);
+	__x86_pmu_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
@@ -767,7 +767,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	__hw_perf_counter_set_period(counter, hwc, idx);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		__pmc_generic_enable(counter, hwc, idx);
+		__x86_pmu_enable(counter, hwc, idx);
 }
 
 /*
@@ -805,7 +805,7 @@ again:
 
 		perf_save_and_restart(counter);
 		if (perf_counter_overflow(counter, nmi, regs, 0))
-			__pmc_generic_disable(counter, &counter->hw, bit);
+			__x86_pmu_disable(counter, &counter->hw, bit);
 	}
 
 	hw_perf_ack_status(ack);
@@ -1034,19 +1034,18 @@ void __init init_hw_perf_counters(void)
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-static void pmc_generic_read(struct perf_counter *counter)
+static void x86_pmu_read(struct perf_counter *counter)
 {
 	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 }
 
-static const struct hw_perf_counter_ops x86_perf_counter_ops = {
-	.enable		= pmc_generic_enable,
-	.disable	= pmc_generic_disable,
-	.read		= pmc_generic_read,
+static const struct pmu pmu = {
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.read		= x86_pmu_read,
 };
 
-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	int err;
 
@@ -1054,7 +1053,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 	if (err)
 		return ERR_PTR(err);
 
-	return &x86_perf_counter_ops;
+	return &pmu;
 }
 
 /*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index be10b3f..c3db52d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -334,9 +334,9 @@ struct hw_perf_counter {
 struct perf_counter;
 
 /**
- * struct hw_perf_counter_ops - performance counter hw ops
+ * struct pmu - generic performance monitoring unit
  */
-struct hw_perf_counter_ops {
+struct pmu {
 	int (*enable)			(struct perf_counter *counter);
 	void (*disable)			(struct perf_counter *counter);
 	void (*read)			(struct perf_counter *counter);
@@ -381,7 +381,7 @@ struct perf_counter {
 	struct list_head		sibling_list;
 	int 				nr_siblings;
 	struct perf_counter		*group_leader;
-	const struct hw_perf_counter_ops *hw_ops;
+	const struct pmu		*pmu;
 
 	enum perf_counter_active_state	state;
 	enum perf_counter_active_state	prev_state;
@@ -519,8 +519,7 @@ struct perf_cpu_context {
  */
 extern int perf_max_counters;
 
-extern const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter);
+extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0939609..582108a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -52,8 +52,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
 /*
  * Architecture provided APIs - weak aliases:
  */
-extern __weak const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	return NULL;
 }
@@ -124,7 +123,7 @@ counter_sched_out(struct perf_counter *counter,
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_stopped = ctx->time;
-	counter->hw_ops->disable(counter);
+	counter->pmu->disable(counter);
 	counter->oncpu = -1;
 
 	if (!is_software_counter(counter))
@@ -417,7 +416,7 @@ counter_sched_in(struct perf_counter *counter,
 	 */
 	smp_wmb();
 
-	if (counter->hw_ops->enable(counter)) {
+	if (counter->pmu->enable(counter)) {
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->oncpu = -1;
 		return -EAGAIN;
@@ -1096,7 +1095,7 @@ static void __read(void *info)
 	local_irq_save(flags);
 	if (ctx->is_active)
 		update_context_time(ctx);
-	counter->hw_ops->read(counter);
+	counter->pmu->read(counter);
 	update_counter_times(counter);
 	local_irq_restore(flags);
 }
@@ -1922,7 +1921,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		leader = counter->group_leader;
 		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 			if (sub != counter)
-				sub->hw_ops->read(sub);
+				sub->pmu->read(sub);
 
 			group_entry.event = sub->hw_event.config;
 			group_entry.counter = atomic64_read(&sub->count);
@@ -2264,7 +2263,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	struct pt_regs *regs;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
-	counter->hw_ops->read(counter);
+	counter->pmu->read(counter);
 
 	regs = get_irq_regs();
 	/*
@@ -2410,7 +2409,7 @@ static void perf_swcounter_disable(struct perf_counter *counter)
 	perf_swcounter_update(counter);
 }
 
-static const struct hw_perf_counter_ops perf_ops_generic = {
+static const struct pmu perf_ops_generic = {
 	.enable		= perf_swcounter_enable,
 	.disable	= perf_swcounter_disable,
 	.read		= perf_swcounter_read,
@@ -2460,7 +2459,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 	cpu_clock_perf_counter_update(counter);
 }
 
-static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+static const struct pmu perf_ops_cpu_clock = {
 	.enable		= cpu_clock_perf_counter_enable,
 	.disable	= cpu_clock_perf_counter_disable,
 	.read		= cpu_clock_perf_counter_read,
@@ -2522,7 +2521,7 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 	task_clock_perf_counter_update(counter, time);
 }
 
-static const struct hw_perf_counter_ops perf_ops_task_clock = {
+static const struct pmu perf_ops_task_clock = {
 	.enable		= task_clock_perf_counter_enable,
 	.disable	= task_clock_perf_counter_disable,
 	.read		= task_clock_perf_counter_read,
@@ -2574,7 +2573,7 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
 	cpu_migrations_perf_counter_update(counter);
 }
 
-static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+static const struct pmu perf_ops_cpu_migrations = {
 	.enable		= cpu_migrations_perf_counter_enable,
 	.disable	= cpu_migrations_perf_counter_disable,
 	.read		= cpu_migrations_perf_counter_read,
@@ -2600,8 +2599,7 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
 	ftrace_profile_disable(perf_event_id(&counter->hw_event));
 }
 
-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
 	int event_id = perf_event_id(&counter->hw_event);
 	int ret;
@@ -2616,18 +2614,16 @@ tp_perf_counter_init(struct perf_counter *counter)
 	return &perf_ops_generic;
 }
 #else
-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
 	return NULL;
 }
 #endif
 
-static const struct hw_perf_counter_ops *
-sw_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
 	struct perf_counter_hw_event *hw_event = &counter->hw_event;
-	const struct hw_perf_counter_ops *hw_ops = NULL;
+	const struct pmu *pmu = NULL;
 	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
@@ -2639,7 +2635,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 */
 	switch (perf_event_id(&counter->hw_event)) {
 	case PERF_COUNT_CPU_CLOCK:
-		hw_ops = &perf_ops_cpu_clock;
+		pmu = &perf_ops_cpu_clock;
 
 		if (hw_event->irq_period && hw_event->irq_period < 10000)
 			hw_event->irq_period = 10000;
@@ -2650,9 +2646,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 		 * use the cpu_clock counter instead.
 		 */
 		if (counter->ctx->task)
-			hw_ops = &perf_ops_task_clock;
+			pmu = &perf_ops_task_clock;
 		else
-			hw_ops = &perf_ops_cpu_clock;
+			pmu = &perf_ops_cpu_clock;
 
 		if (hw_event->irq_period && hw_event->irq_period < 10000)
 			hw_event->irq_period = 10000;
@@ -2661,18 +2657,18 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_PAGE_FAULTS_MIN:
 	case PERF_COUNT_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		hw_ops = &perf_ops_generic;
+		pmu = &perf_ops_generic;
 		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
 		if (!counter->hw_event.exclude_kernel)
-			hw_ops = &perf_ops_cpu_migrations;
+			pmu = &perf_ops_cpu_migrations;
 		break;
 	}
 
-	if (hw_ops)
+	if (pmu)
 		hwc->irq_period = hw_event->irq_period;
 
-	return hw_ops;
+	return pmu;
 }
 
 /*
@@ -2685,7 +2681,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   struct perf_counter *group_leader,
 		   gfp_t gfpflags)
 {
-	const struct hw_perf_counter_ops *hw_ops;
+	const struct pmu *pmu;
 	struct perf_counter *counter;
 	long err;
 
@@ -2713,46 +2709,46 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->group_leader		= group_leader;
-	counter->hw_ops			= NULL;
+	counter->pmu			= NULL;
 	counter->ctx			= ctx;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
 
-	hw_ops = NULL;
+	pmu = NULL;
 
 	if (perf_event_raw(hw_event)) {
-		hw_ops = hw_perf_counter_init(counter);
+		pmu = hw_perf_counter_init(counter);
 		goto done;
 	}
 
 	switch (perf_event_type(hw_event)) {
 	case PERF_TYPE_HARDWARE:
-		hw_ops = hw_perf_counter_init(counter);
+		pmu = hw_perf_counter_init(counter);
 		break;
 
 	case PERF_TYPE_SOFTWARE:
-		hw_ops = sw_perf_counter_init(counter);
+		pmu = sw_perf_counter_init(counter);
 		break;
 
 	case PERF_TYPE_TRACEPOINT:
-		hw_ops = tp_perf_counter_init(counter);
+		pmu = tp_perf_counter_init(counter);
 		break;
 	}
 done:
 	err = 0;
-	if (!hw_ops)
+	if (!pmu)
 		err = -EINVAL;
-	else if (IS_ERR(hw_ops))
-		err = PTR_ERR(hw_ops);
+	else if (IS_ERR(pmu))
+		err = PTR_ERR(pmu);
 
 	if (err) {
 		kfree(counter);
 		return ERR_PTR(err);
 	}
 
-	counter->hw_ops = hw_ops;
+	counter->pmu = pmu;
 
 	if (counter->hw_event.mmap)
 		atomic_inc(&nr_mmap_tracking);
-- 
cgit v0.10.2


From 5f4ec28ffe77c840354cce1820a3436106e9e0f1 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:04 +0200
Subject: perf_counter, x86: rename struct pmc_x86_ops into struct x86_pmu

This patch renames struct pmc_x86_ops into struct x86_pmu. It
introduces a structure to describe an x86 model specific pmu
(performance monitoring unit). It may contain ops and data. The new
name of the structure fits better, is shorter, and thus better to
handle. Where it was appropriate, names of function and variable have
been changed too.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-8-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 95de980..808a1a1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -44,9 +44,9 @@ struct cpu_hw_counters {
 };
 
 /*
- * struct pmc_x86_ops - performance counter x86 ops
+ * struct x86_pmu - generic x86 pmu
  */
-struct pmc_x86_ops {
+struct x86_pmu {
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
 	u64		(*get_status)(u64);
@@ -60,7 +60,7 @@ struct pmc_x86_ops {
 	int		max_events;
 };
 
-static struct pmc_x86_ops *pmc_ops __read_mostly;
+static struct x86_pmu *x86_pmu __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
 	.enabled = 1,
@@ -82,12 +82,12 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_BUS_CYCLES]		= 0x013c,
 };
 
-static u64 pmc_intel_event_map(int event)
+static u64 intel_pmu_event_map(int event)
 {
 	return intel_perfmon_event_map[event];
 }
 
-static u64 pmc_intel_raw_event(u64 event)
+static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
 #define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
@@ -114,12 +114,12 @@ static const u64 amd_perfmon_event_map[] =
   [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
 };
 
-static u64 pmc_amd_event_map(int event)
+static u64 amd_pmu_event_map(int event)
 {
 	return amd_perfmon_event_map[event];
 }
 
-static u64 pmc_amd_raw_event(u64 event)
+static u64 amd_pmu_raw_event(u64 event)
 {
 #define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
@@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void)
 		disable_lapic_nmi_watchdog();
 
 	for (i = 0; i < nr_counters_generic; i++) {
-		if (!reserve_perfctr_nmi(pmc_ops->perfctr + i))
+		if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
 			goto perfctr_fail;
 	}
 
 	for (i = 0; i < nr_counters_generic; i++) {
-		if (!reserve_evntsel_nmi(pmc_ops->eventsel + i))
+		if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
 			goto eventsel_fail;
 	}
 
@@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void)
 
 eventsel_fail:
 	for (i--; i >= 0; i--)
-		release_evntsel_nmi(pmc_ops->eventsel + i);
+		release_evntsel_nmi(x86_pmu->eventsel + i);
 
 	i = nr_counters_generic;
 
 perfctr_fail:
 	for (i--; i >= 0; i--)
-		release_perfctr_nmi(pmc_ops->perfctr + i);
+		release_perfctr_nmi(x86_pmu->perfctr + i);
 
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		enable_lapic_nmi_watchdog();
@@ -216,8 +216,8 @@ static void release_pmc_hardware(void)
 	int i;
 
 	for (i = 0; i < nr_counters_generic; i++) {
-		release_perfctr_nmi(pmc_ops->perfctr + i);
-		release_evntsel_nmi(pmc_ops->eventsel + i);
+		release_perfctr_nmi(x86_pmu->perfctr + i);
+		release_evntsel_nmi(x86_pmu->eventsel + i);
 	}
 
 	if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -293,14 +293,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * Raw event type provide the config in the event structure
 	 */
 	if (perf_event_raw(hw_event)) {
-		hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
+		hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
 	} else {
-		if (perf_event_id(hw_event) >= pmc_ops->max_events)
+		if (perf_event_id(hw_event) >= x86_pmu->max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
+		hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
 	}
 
 	counter->destroy = hw_perf_counter_destroy;
@@ -308,7 +308,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	return 0;
 }
 
-static u64 pmc_intel_save_disable_all(void)
+static u64 intel_pmu_save_disable_all(void)
 {
 	u64 ctrl;
 
@@ -318,7 +318,7 @@ static u64 pmc_intel_save_disable_all(void)
 	return ctrl;
 }
 
-static u64 pmc_amd_save_disable_all(void)
+static u64 amd_pmu_save_disable_all(void)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	int enabled, idx;
@@ -327,7 +327,8 @@ static u64 pmc_amd_save_disable_all(void)
 	cpuc->enabled = 0;
 	/*
 	 * ensure we write the disable before we start disabling the
-	 * counters proper, so that pcm_amd_enable() does the right thing.
+	 * counters proper, so that amd_pmu_enable_counter() does the
+	 * right thing.
 	 */
 	barrier();
 
@@ -351,19 +352,19 @@ u64 hw_perf_save_disable(void)
 	if (unlikely(!perf_counters_initialized))
 		return 0;
 
-	return pmc_ops->save_disable_all();
+	return x86_pmu->save_disable_all();
 }
 /*
  * Exported because of ACPI idle
  */
 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
-static void pmc_intel_restore_all(u64 ctrl)
+static void intel_pmu_restore_all(u64 ctrl)
 {
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 }
 
-static void pmc_amd_restore_all(u64 ctrl)
+static void amd_pmu_restore_all(u64 ctrl)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	int idx;
@@ -391,14 +392,14 @@ void hw_perf_restore(u64 ctrl)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	pmc_ops->restore_all(ctrl);
+	x86_pmu->restore_all(ctrl);
 }
 /*
  * Exported because of ACPI idle
  */
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
-static u64 pmc_intel_get_status(u64 mask)
+static u64 intel_pmu_get_status(u64 mask)
 {
 	u64 status;
 
@@ -407,7 +408,7 @@ static u64 pmc_intel_get_status(u64 mask)
 	return status;
 }
 
-static u64 pmc_amd_get_status(u64 mask)
+static u64 amd_pmu_get_status(u64 mask)
 {
 	u64 status = 0;
 	int idx;
@@ -432,15 +433,15 @@ static u64 hw_perf_get_status(u64 mask)
 	if (unlikely(!perf_counters_initialized))
 		return 0;
 
-	return pmc_ops->get_status(mask);
+	return x86_pmu->get_status(mask);
 }
 
-static void pmc_intel_ack_status(u64 ack)
+static void intel_pmu_ack_status(u64 ack)
 {
 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static void pmc_amd_ack_status(u64 ack)
+static void amd_pmu_ack_status(u64 ack)
 {
 }
 
@@ -449,16 +450,16 @@ static void hw_perf_ack_status(u64 ack)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	pmc_ops->ack_status(ack);
+	x86_pmu->ack_status(ack);
 }
 
-static void pmc_intel_enable(int idx, u64 config)
+static void intel_pmu_enable_counter(int idx, u64 config)
 {
 	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
 			config | ARCH_PERFMON_EVENTSEL0_ENABLE);
 }
 
-static void pmc_amd_enable(int idx, u64 config)
+static void amd_pmu_enable_counter(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
@@ -474,15 +475,15 @@ static void hw_perf_enable(int idx, u64 config)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	pmc_ops->enable(idx, config);
+	x86_pmu->enable(idx, config);
 }
 
-static void pmc_intel_disable(int idx, u64 config)
+static void intel_pmu_disable_counter(int idx, u64 config)
 {
 	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
 }
 
-static void pmc_amd_disable(int idx, u64 config)
+static void amd_pmu_disable_counter(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
@@ -496,7 +497,7 @@ static void hw_perf_disable(int idx, u64 config)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	pmc_ops->disable(idx, config);
+	x86_pmu->disable(idx, config);
 }
 
 static inline void
@@ -613,11 +614,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
+	if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
+	if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
+	if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
@@ -661,8 +662,8 @@ try_generic:
 			set_bit(idx, cpuc->used);
 			hwc->idx = idx;
 		}
-		hwc->config_base  = pmc_ops->eventsel;
-		hwc->counter_base = pmc_ops->perfctr;
+		hwc->config_base  = x86_pmu->eventsel;
+		hwc->counter_base = x86_pmu->perfctr;
 	}
 
 	perf_counters_lapic_init(hwc->nmi);
@@ -710,8 +711,8 @@ void perf_counter_print_debug(void)
 	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-		rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
-		rdmsrl(pmc_ops->perfctr  + idx, pmc_count);
+		rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
+		rdmsrl(x86_pmu->perfctr  + idx, pmc_count);
 
 		prev_left = per_cpu(prev_left[idx], cpu);
 
@@ -918,35 +919,35 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 	.priority		= 1
 };
 
-static struct pmc_x86_ops pmc_intel_ops = {
-	.save_disable_all	= pmc_intel_save_disable_all,
-	.restore_all		= pmc_intel_restore_all,
-	.get_status		= pmc_intel_get_status,
-	.ack_status		= pmc_intel_ack_status,
-	.enable			= pmc_intel_enable,
-	.disable		= pmc_intel_disable,
+static struct x86_pmu intel_pmu = {
+	.save_disable_all	= intel_pmu_save_disable_all,
+	.restore_all		= intel_pmu_restore_all,
+	.get_status		= intel_pmu_get_status,
+	.ack_status		= intel_pmu_ack_status,
+	.enable			= intel_pmu_enable_counter,
+	.disable		= intel_pmu_disable_counter,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= pmc_intel_event_map,
-	.raw_event		= pmc_intel_raw_event,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 };
 
-static struct pmc_x86_ops pmc_amd_ops = {
-	.save_disable_all	= pmc_amd_save_disable_all,
-	.restore_all		= pmc_amd_restore_all,
-	.get_status		= pmc_amd_get_status,
-	.ack_status		= pmc_amd_ack_status,
-	.enable			= pmc_amd_enable,
-	.disable		= pmc_amd_disable,
+static struct x86_pmu amd_pmu = {
+	.save_disable_all	= amd_pmu_save_disable_all,
+	.restore_all		= amd_pmu_restore_all,
+	.get_status		= amd_pmu_get_status,
+	.ack_status		= amd_pmu_ack_status,
+	.enable			= amd_pmu_enable_counter,
+	.disable		= amd_pmu_disable_counter,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= pmc_amd_event_map,
-	.raw_event		= pmc_amd_raw_event,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
 };
 
-static struct pmc_x86_ops *pmc_intel_init(void)
+static struct x86_pmu *intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
@@ -977,10 +978,10 @@ static struct pmc_x86_ops *pmc_intel_init(void)
 	nr_counters_fixed = edx.split.num_counters_fixed;
 	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
 
-	return &pmc_intel_ops;
+	return &intel_pmu;
 }
 
-static struct pmc_x86_ops *pmc_amd_init(void)
+static struct x86_pmu *amd_pmu_init(void)
 {
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
@@ -989,22 +990,22 @@ static struct pmc_x86_ops *pmc_amd_init(void)
 
 	pr_info("AMD Performance Monitoring support detected.\n");
 
-	return &pmc_amd_ops;
+	return &amd_pmu;
 }
 
 void __init init_hw_perf_counters(void)
 {
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_INTEL:
-		pmc_ops = pmc_intel_init();
+		x86_pmu = intel_pmu_init();
 		break;
 	case X86_VENDOR_AMD:
-		pmc_ops = pmc_amd_init();
+		x86_pmu = amd_pmu_init();
 		break;
 	default:
 		return;
 	}
-	if (!pmc_ops)
+	if (!x86_pmu)
 		return;
 
 	pr_info("... num counters:    %d\n", nr_counters_generic);
-- 
cgit v0.10.2


From 39d81eab2374d71b2d9c82f66258a1a4f57ddd2e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:05 +0200
Subject: perf_counter, x86: make interrupt handler model specific

This separates the perfcounter interrupt handler for AMD and Intel
cpus. The AMD interrupt handler implementation is a follow-on patch.

[ Impact: refactor and clean up code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-9-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 808a1a1..9d90de0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -4,6 +4,7 @@
  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
  *  Copyright(C) 2009 Jaswinder Singh Rajput
+ *  Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -47,6 +48,7 @@ struct cpu_hw_counters {
  * struct x86_pmu - generic x86 pmu
  */
 struct x86_pmu {
+	int		(*handle_irq)(struct pt_regs *, int);
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
 	u64		(*get_status)(u64);
@@ -241,6 +243,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	int err;
 
+	/* disable temporarily */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return -ENOSYS;
+
 	if (unlikely(!perf_counters_initialized))
 		return -EINVAL;
 
@@ -780,7 +786,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
  */
-static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
+static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
 	u64 ack, status;
@@ -827,6 +833,8 @@ out:
 	return ret;
 }
 
+static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
+
 void perf_counter_unthrottle(void)
 {
 	struct cpu_hw_counters *cpuc;
@@ -851,7 +859,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
 	irq_enter();
 	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
 	ack_APIC_irq();
-	__smp_perf_counter_interrupt(regs, 0);
+	x86_pmu->handle_irq(regs, 0);
 	irq_exit();
 }
 
@@ -908,7 +916,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	regs = args->regs;
 
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	ret = __smp_perf_counter_interrupt(regs, 1);
+	ret = x86_pmu->handle_irq(regs, 1);
 
 	return ret ? NOTIFY_STOP : NOTIFY_OK;
 }
@@ -920,6 +928,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 };
 
 static struct x86_pmu intel_pmu = {
+	.handle_irq		= intel_pmu_handle_irq,
 	.save_disable_all	= intel_pmu_save_disable_all,
 	.restore_all		= intel_pmu_restore_all,
 	.get_status		= intel_pmu_get_status,
@@ -934,6 +943,7 @@ static struct x86_pmu intel_pmu = {
 };
 
 static struct x86_pmu amd_pmu = {
+	.handle_irq		= amd_pmu_handle_irq,
 	.save_disable_all	= amd_pmu_save_disable_all,
 	.restore_all		= amd_pmu_restore_all,
 	.get_status		= amd_pmu_get_status,
-- 
cgit v0.10.2


From b7f8859a8ed1937e2139c17b84878f1d413fa659 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:06 +0200
Subject: perf_counter, x86: remove get_status() from struct x86_pmu

This function is Intel only and not necessary for AMD cpus.

[ Impact: simplify code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-10-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9d90de0..d0bb029 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -51,7 +51,6 @@ struct x86_pmu {
 	int		(*handle_irq)(struct pt_regs *, int);
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
-	u64		(*get_status)(u64);
 	void		(*ack_status)(u64);
 	void		(*enable)(int, u64);
 	void		(*disable)(int, u64);
@@ -405,41 +404,15 @@ void hw_perf_restore(u64 ctrl)
  */
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
-static u64 intel_pmu_get_status(u64 mask)
+static inline u64 intel_pmu_get_status(u64 mask)
 {
 	u64 status;
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-	return status;
-}
-
-static u64 amd_pmu_get_status(u64 mask)
-{
-	u64 status = 0;
-	int idx;
-
-	for (idx = 0; idx < nr_counters_generic; idx++) {
-		s64 val;
-
-		if (!(mask & (1 << idx)))
-			continue;
-
-		rdmsrl(MSR_K7_PERFCTR0 + idx, val);
-		val <<= (64 - counter_value_bits);
-		if (val >= 0)
-			status |= (1 << idx);
-	}
-
-	return status;
-}
-
-static u64 hw_perf_get_status(u64 mask)
-{
 	if (unlikely(!perf_counters_initialized))
 		return 0;
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 
-	return x86_pmu->get_status(mask);
+	return status;
 }
 
 static void intel_pmu_ack_status(u64 ack)
@@ -795,7 +768,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 
 	cpuc->throttle_ctrl = hw_perf_save_disable();
 
-	status = hw_perf_get_status(cpuc->throttle_ctrl);
+	status = intel_pmu_get_status(cpuc->throttle_ctrl);
 	if (!status)
 		goto out;
 
@@ -820,7 +793,7 @@ again:
 	/*
 	 * Repeat if there is more work to be done:
 	 */
-	status = hw_perf_get_status(cpuc->throttle_ctrl);
+	status = intel_pmu_get_status(cpuc->throttle_ctrl);
 	if (status)
 		goto again;
 out:
@@ -931,7 +904,6 @@ static struct x86_pmu intel_pmu = {
 	.handle_irq		= intel_pmu_handle_irq,
 	.save_disable_all	= intel_pmu_save_disable_all,
 	.restore_all		= intel_pmu_restore_all,
-	.get_status		= intel_pmu_get_status,
 	.ack_status		= intel_pmu_ack_status,
 	.enable			= intel_pmu_enable_counter,
 	.disable		= intel_pmu_disable_counter,
@@ -946,7 +918,6 @@ static struct x86_pmu amd_pmu = {
 	.handle_irq		= amd_pmu_handle_irq,
 	.save_disable_all	= amd_pmu_save_disable_all,
 	.restore_all		= amd_pmu_restore_all,
-	.get_status		= amd_pmu_get_status,
 	.ack_status		= amd_pmu_ack_status,
 	.enable			= amd_pmu_enable_counter,
 	.disable		= amd_pmu_disable_counter,
-- 
cgit v0.10.2


From dee5d9067ca78b317538fd67930be4e09a83dbc5 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:07 +0200
Subject: perf_counter, x86: remove ack_status() from struct x86_pmu

This function is Intel only and not necessary for AMD cpus.

[ Impact: simplify code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-11-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d0bb029..6bbdc16 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -51,7 +51,6 @@ struct x86_pmu {
 	int		(*handle_irq)(struct pt_regs *, int);
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
-	void		(*ack_status)(u64);
 	void		(*enable)(int, u64);
 	void		(*disable)(int, u64);
 	unsigned	eventsel;
@@ -415,23 +414,11 @@ static inline u64 intel_pmu_get_status(u64 mask)
 	return status;
 }
 
-static void intel_pmu_ack_status(u64 ack)
+static inline void intel_pmu_ack_status(u64 ack)
 {
 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static void amd_pmu_ack_status(u64 ack)
-{
-}
-
-static void hw_perf_ack_status(u64 ack)
-{
-	if (unlikely(!perf_counters_initialized))
-		return;
-
-	x86_pmu->ack_status(ack);
-}
-
 static void intel_pmu_enable_counter(int idx, u64 config)
 {
 	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
@@ -788,7 +775,7 @@ again:
 			__x86_pmu_disable(counter, &counter->hw, bit);
 	}
 
-	hw_perf_ack_status(ack);
+	intel_pmu_ack_status(ack);
 
 	/*
 	 * Repeat if there is more work to be done:
@@ -904,7 +891,6 @@ static struct x86_pmu intel_pmu = {
 	.handle_irq		= intel_pmu_handle_irq,
 	.save_disable_all	= intel_pmu_save_disable_all,
 	.restore_all		= intel_pmu_restore_all,
-	.ack_status		= intel_pmu_ack_status,
 	.enable			= intel_pmu_enable_counter,
 	.disable		= intel_pmu_disable_counter,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
@@ -918,7 +904,6 @@ static struct x86_pmu amd_pmu = {
 	.handle_irq		= amd_pmu_handle_irq,
 	.save_disable_all	= amd_pmu_save_disable_all,
 	.restore_all		= amd_pmu_restore_all,
-	.ack_status		= amd_pmu_ack_status,
 	.enable			= amd_pmu_enable_counter,
 	.disable		= amd_pmu_disable_counter,
 	.eventsel		= MSR_K7_EVNTSEL0,
-- 
cgit v0.10.2


From 26816c287e13eedc67bc4ed0cd40c138314b7c7d Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:08 +0200
Subject: perf_counter, x86: rename __hw_perf_counter_set_period into
 x86_perf_counter_set_period

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-12-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6bbdc16..fa6541d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -498,7 +498,7 @@ static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
  * To be called with the counter disabled in hw:
  */
 static void
-__hw_perf_counter_set_period(struct perf_counter *counter,
+x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
@@ -642,7 +642,7 @@ try_generic:
 	 */
 	barrier();
 
-	__hw_perf_counter_set_period(counter, hwc, idx);
+	x86_perf_counter_set_period(counter, hwc, idx);
 	__x86_pmu_enable(counter, hwc, idx);
 
 	return 0;
@@ -731,7 +731,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	int idx = hwc->idx;
 
 	x86_perf_counter_update(counter, hwc, idx);
-	__hw_perf_counter_set_period(counter, hwc, idx);
+	x86_perf_counter_set_period(counter, hwc, idx);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		__x86_pmu_enable(counter, hwc, idx);
-- 
cgit v0.10.2


From 55de0f2e57994b525324bf0d04d242d9358a2417 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:09 +0200
Subject: perf_counter, x86: rename intel only functions

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-13-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index fa6541d..5a52d73 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -725,7 +725,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
  * Save and restart an expired counter. Called by NMI contexts,
  * so it has to be careful about preempting normal counter ops:
  */
-static void perf_save_and_restart(struct perf_counter *counter)
+static void intel_pmu_save_and_restart(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	int idx = hwc->idx;
@@ -753,7 +753,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
 	int ret = 0;
 
-	cpuc->throttle_ctrl = hw_perf_save_disable();
+	cpuc->throttle_ctrl = intel_pmu_save_disable_all();
 
 	status = intel_pmu_get_status(cpuc->throttle_ctrl);
 	if (!status)
@@ -770,7 +770,7 @@ again:
 		if (!counter)
 			continue;
 
-		perf_save_and_restart(counter);
+		intel_pmu_save_and_restart(counter);
 		if (perf_counter_overflow(counter, nmi, regs, 0))
 			__x86_pmu_disable(counter, &counter->hw, bit);
 	}
@@ -788,7 +788,7 @@ out:
 	 * Restore - do not reenable when global enable is off or throttled:
 	 */
 	if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
-		hw_perf_restore(cpuc->throttle_ctrl);
+		intel_pmu_restore_all(cpuc->throttle_ctrl);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 72eae04d3a3075c26d39e1e685acfc8e8c29db64 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:10 +0200
Subject: perf_counter, x86: modify initialization of struct x86_pmu

This patch adds an error handler and changes initialization of struct
x86_pmu. No functional changes. Needed for follow-on patches.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-14-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a52d73..7c72a94 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -913,7 +913,7 @@ static struct x86_pmu amd_pmu = {
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
 };
 
-static struct x86_pmu *intel_pmu_init(void)
+static int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
@@ -921,7 +921,7 @@ static struct x86_pmu *intel_pmu_init(void)
 	unsigned int ebx;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-		return NULL;
+		return -ENODEV;
 
 	/*
 	 * Check whether the Architectural PerfMon supports
@@ -929,49 +929,54 @@ static struct x86_pmu *intel_pmu_init(void)
 	 */
 	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return NULL;
+		return -ENODEV;
 
 	intel_perfmon_version = eax.split.version_id;
 	if (intel_perfmon_version < 2)
-		return NULL;
+		return -ENODEV;
 
 	pr_info("Intel Performance Monitoring support detected.\n");
 	pr_info("... version:         %d\n", intel_perfmon_version);
 	pr_info("... bit width:       %d\n", eax.split.bit_width);
 	pr_info("... mask length:     %d\n", eax.split.mask_length);
 
+	x86_pmu = &intel_pmu;
+
 	nr_counters_generic = eax.split.num_counters;
 	nr_counters_fixed = edx.split.num_counters_fixed;
 	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
 
-	return &intel_pmu;
+	return 0;
 }
 
-static struct x86_pmu *amd_pmu_init(void)
+static int amd_pmu_init(void)
 {
+	x86_pmu = &amd_pmu;
+
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
 	counter_value_mask = 0x0000FFFFFFFFFFFFULL;
 	counter_value_bits = 48;
 
 	pr_info("AMD Performance Monitoring support detected.\n");
-
-	return &amd_pmu;
+	return 0;
 }
 
 void __init init_hw_perf_counters(void)
 {
+	int err;
+
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_INTEL:
-		x86_pmu = intel_pmu_init();
+		err = intel_pmu_init();
 		break;
 	case X86_VENDOR_AMD:
-		x86_pmu = amd_pmu_init();
+		err = amd_pmu_init();
 		break;
 	default:
 		return;
 	}
-	if (!x86_pmu)
+	if (err != 0)
 		return;
 
 	pr_info("... num counters:    %d\n", nr_counters_generic);
-- 
cgit v0.10.2


From 4a06bd8508f65ad1dd5cd2046b85694813fa36a2 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:11 +0200
Subject: perf_counter, x86: make x86_pmu data a static struct

Instead of using a pointer to reference to the x86 pmu we now have one
single data structure that is initialized at the beginning. This saves
the pointer access when using this memory.

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-15-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7c72a94..68597d7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -60,7 +60,7 @@ struct x86_pmu {
 	int		max_events;
 };
 
-static struct x86_pmu *x86_pmu __read_mostly;
+static struct x86_pmu x86_pmu __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
 	.enabled = 1,
@@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void)
 		disable_lapic_nmi_watchdog();
 
 	for (i = 0; i < nr_counters_generic; i++) {
-		if (!reserve_perfctr_nmi(x86_pmu->perfctr + i))
+		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
 	}
 
 	for (i = 0; i < nr_counters_generic; i++) {
-		if (!reserve_evntsel_nmi(x86_pmu->eventsel + i))
+		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
 			goto eventsel_fail;
 	}
 
@@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void)
 
 eventsel_fail:
 	for (i--; i >= 0; i--)
-		release_evntsel_nmi(x86_pmu->eventsel + i);
+		release_evntsel_nmi(x86_pmu.eventsel + i);
 
 	i = nr_counters_generic;
 
 perfctr_fail:
 	for (i--; i >= 0; i--)
-		release_perfctr_nmi(x86_pmu->perfctr + i);
+		release_perfctr_nmi(x86_pmu.perfctr + i);
 
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		enable_lapic_nmi_watchdog();
@@ -216,8 +216,8 @@ static void release_pmc_hardware(void)
 	int i;
 
 	for (i = 0; i < nr_counters_generic; i++) {
-		release_perfctr_nmi(x86_pmu->perfctr + i);
-		release_evntsel_nmi(x86_pmu->eventsel + i);
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
 
 	if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -297,14 +297,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * Raw event type provide the config in the event structure
 	 */
 	if (perf_event_raw(hw_event)) {
-		hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event));
+		hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
 	} else {
-		if (perf_event_id(hw_event) >= x86_pmu->max_events)
+		if (perf_event_id(hw_event) >= x86_pmu.max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= x86_pmu->event_map(perf_event_id(hw_event));
+		hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
 	}
 
 	counter->destroy = hw_perf_counter_destroy;
@@ -356,7 +356,7 @@ u64 hw_perf_save_disable(void)
 	if (unlikely(!perf_counters_initialized))
 		return 0;
 
-	return x86_pmu->save_disable_all();
+	return x86_pmu.save_disable_all();
 }
 /*
  * Exported because of ACPI idle
@@ -396,7 +396,7 @@ void hw_perf_restore(u64 ctrl)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	x86_pmu->restore_all(ctrl);
+	x86_pmu.restore_all(ctrl);
 }
 /*
  * Exported because of ACPI idle
@@ -441,7 +441,7 @@ static void hw_perf_enable(int idx, u64 config)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	x86_pmu->enable(idx, config);
+	x86_pmu.enable(idx, config);
 }
 
 static void intel_pmu_disable_counter(int idx, u64 config)
@@ -463,7 +463,7 @@ static void hw_perf_disable(int idx, u64 config)
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	x86_pmu->disable(idx, config);
+	x86_pmu.disable(idx, config);
 }
 
 static inline void
@@ -580,11 +580,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
@@ -628,8 +628,8 @@ try_generic:
 			set_bit(idx, cpuc->used);
 			hwc->idx = idx;
 		}
-		hwc->config_base  = x86_pmu->eventsel;
-		hwc->counter_base = x86_pmu->perfctr;
+		hwc->config_base  = x86_pmu.eventsel;
+		hwc->counter_base = x86_pmu.perfctr;
 	}
 
 	perf_counters_lapic_init(hwc->nmi);
@@ -677,8 +677,8 @@ void perf_counter_print_debug(void)
 	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
 	for (idx = 0; idx < nr_counters_generic; idx++) {
-		rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl);
-		rdmsrl(x86_pmu->perfctr  + idx, pmc_count);
+		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
 
 		prev_left = per_cpu(prev_left[idx], cpu);
 
@@ -819,7 +819,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
 	irq_enter();
 	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
 	ack_APIC_irq();
-	x86_pmu->handle_irq(regs, 0);
+	x86_pmu.handle_irq(regs, 0);
 	irq_exit();
 }
 
@@ -876,7 +876,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	regs = args->regs;
 
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	ret = x86_pmu->handle_irq(regs, 1);
+	ret = x86_pmu.handle_irq(regs, 1);
 
 	return ret ? NOTIFY_STOP : NOTIFY_OK;
 }
@@ -940,7 +940,7 @@ static int intel_pmu_init(void)
 	pr_info("... bit width:       %d\n", eax.split.bit_width);
 	pr_info("... mask length:     %d\n", eax.split.mask_length);
 
-	x86_pmu = &intel_pmu;
+	x86_pmu = intel_pmu;
 
 	nr_counters_generic = eax.split.num_counters;
 	nr_counters_fixed = edx.split.num_counters_fixed;
@@ -951,7 +951,7 @@ static int intel_pmu_init(void)
 
 static int amd_pmu_init(void)
 {
-	x86_pmu = &amd_pmu;
+	x86_pmu = amd_pmu;
 
 	nr_counters_generic = 4;
 	nr_counters_fixed = 0;
-- 
cgit v0.10.2


From 0933e5c6a680ba8d8d786a6f7fa377b7ec0d1e49 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:12 +0200
Subject: perf_counter, x86: move counter parameters to struct x86_pmu

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-16-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 68597d7..75dbb1f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -24,16 +24,7 @@
 #include <asm/nmi.h>
 
 static bool perf_counters_initialized __read_mostly;
-
-/*
- * Number of (generic) HW counters:
- */
-static int nr_counters_generic __read_mostly;
 static u64 perf_counter_mask __read_mostly;
-static u64 counter_value_mask __read_mostly;
-static int counter_value_bits __read_mostly;
-
-static int nr_counters_fixed __read_mostly;
 
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
@@ -58,6 +49,10 @@ struct x86_pmu {
 	u64		(*event_map)(int);
 	u64		(*raw_event)(u64);
 	int		max_events;
+	int		num_counters;
+	int		num_counters_fixed;
+	int		counter_bits;
+	u64		counter_mask;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -183,12 +178,12 @@ static bool reserve_pmc_hardware(void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		disable_lapic_nmi_watchdog();
 
-	for (i = 0; i < nr_counters_generic; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
 	}
 
-	for (i = 0; i < nr_counters_generic; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
 			goto eventsel_fail;
 	}
@@ -199,7 +194,7 @@ eventsel_fail:
 	for (i--; i >= 0; i--)
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 
-	i = nr_counters_generic;
+	i = x86_pmu.num_counters;
 
 perfctr_fail:
 	for (i--; i >= 0; i--)
@@ -215,7 +210,7 @@ static void release_pmc_hardware(void)
 {
 	int i;
 
-	for (i = 0; i < nr_counters_generic; i++) {
+	for (i = 0; i < x86_pmu.num_counters; i++) {
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
@@ -336,7 +331,7 @@ static u64 amd_pmu_save_disable_all(void)
 	 */
 	barrier();
 
-	for (idx = 0; idx < nr_counters_generic; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
@@ -378,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl)
 	if (!ctrl)
 		return;
 
-	for (idx = 0; idx < nr_counters_generic; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
@@ -527,7 +522,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	atomic64_set(&hwc->prev_count, (u64)-left);
 
 	err = checking_wrmsrl(hwc->counter_base + idx,
-			     (u64)(-left) & counter_value_mask);
+			     (u64)(-left) & x86_pmu.counter_mask);
 }
 
 static inline void
@@ -621,8 +616,9 @@ static int x86_pmu_enable(struct perf_counter *counter)
 		/* Try to get the previous generic counter again */
 		if (test_and_set_bit(idx, cpuc->used)) {
 try_generic:
-			idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
-			if (idx == nr_counters_generic)
+			idx = find_first_zero_bit(cpuc->used,
+						  x86_pmu.num_counters);
+			if (idx == x86_pmu.num_counters)
 				return -EAGAIN;
 
 			set_bit(idx, cpuc->used);
@@ -654,7 +650,7 @@ void perf_counter_print_debug(void)
 	struct cpu_hw_counters *cpuc;
 	int cpu, idx;
 
-	if (!nr_counters_generic)
+	if (!x86_pmu.num_counters)
 		return;
 
 	local_irq_disable();
@@ -676,7 +672,7 @@ void perf_counter_print_debug(void)
 	}
 	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
 
-	for (idx = 0; idx < nr_counters_generic; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
 		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
 
@@ -689,7 +685,7 @@ void perf_counter_print_debug(void)
 		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
-	for (idx = 0; idx < nr_counters_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -911,6 +907,9 @@ static struct x86_pmu amd_pmu = {
 	.event_map		= amd_pmu_event_map,
 	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_counters		= 4,
+	.counter_bits		= 48,
+	.counter_mask		= (1ULL << 48) - 1,
 };
 
 static int intel_pmu_init(void)
@@ -941,10 +940,10 @@ static int intel_pmu_init(void)
 	pr_info("... mask length:     %d\n", eax.split.mask_length);
 
 	x86_pmu = intel_pmu;
-
-	nr_counters_generic = eax.split.num_counters;
-	nr_counters_fixed = edx.split.num_counters_fixed;
-	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.num_counters = eax.split.num_counters;
+	x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+	x86_pmu.counter_bits = eax.split.bit_width;
+	x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
 
 	return 0;
 }
@@ -952,12 +951,6 @@ static int intel_pmu_init(void)
 static int amd_pmu_init(void)
 {
 	x86_pmu = amd_pmu;
-
-	nr_counters_generic = 4;
-	nr_counters_fixed = 0;
-	counter_value_mask = 0x0000FFFFFFFFFFFFULL;
-	counter_value_bits = 48;
-
 	pr_info("AMD Performance Monitoring support detected.\n");
 	return 0;
 }
@@ -979,25 +972,26 @@ void __init init_hw_perf_counters(void)
 	if (err != 0)
 		return;
 
-	pr_info("... num counters:    %d\n", nr_counters_generic);
-	if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
-		nr_counters_generic = X86_PMC_MAX_GENERIC;
+	pr_info("... num counters:    %d\n", x86_pmu.num_counters);
+	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
+		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-			nr_counters_generic, X86_PMC_MAX_GENERIC);
+		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
 	}
-	perf_counter_mask = (1 << nr_counters_generic) - 1;
-	perf_max_counters = nr_counters_generic;
+	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
+	perf_max_counters = x86_pmu.num_counters;
 
-	pr_info("... value mask:      %016Lx\n", counter_value_mask);
+	pr_info("... value mask:      %016Lx\n", x86_pmu.counter_mask);
 
-	if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
-		nr_counters_fixed = X86_PMC_MAX_FIXED;
+	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
+		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
 		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-			nr_counters_fixed, X86_PMC_MAX_FIXED);
+		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
 	}
-	pr_info("... fixed counters:  %d\n", nr_counters_fixed);
+	pr_info("... fixed counters:  %d\n", x86_pmu.num_counters_fixed);
 
-	perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+	perf_counter_mask |=
+		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
 	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
 	perf_counters_initialized = true;
-- 
cgit v0.10.2


From faa28ae018ed004a22aa4a7704e04ccdde4a941e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:13 +0200
Subject: perf_counter, x86: make pmu version generic

This makes the use of the version variable generic. Also, some debug
messages have been generalized.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-17-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 75dbb1f..15d2c03 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -39,6 +39,8 @@ struct cpu_hw_counters {
  * struct x86_pmu - generic x86 pmu
  */
 struct x86_pmu {
+	const char	*name;
+	int		version;
 	int		(*handle_irq)(struct pt_regs *, int);
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
@@ -61,8 +63,6 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
 	.enabled = 1,
 };
 
-static __read_mostly int intel_perfmon_version;
-
 /*
  * Intel PerfMon v3. Used on Core2 and later.
  */
@@ -658,7 +658,7 @@ void perf_counter_print_debug(void)
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	if (intel_perfmon_version >= 2) {
+	if (x86_pmu.version >= 2) {
 		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
@@ -884,6 +884,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 };
 
 static struct x86_pmu intel_pmu = {
+	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
 	.save_disable_all	= intel_pmu_save_disable_all,
 	.restore_all		= intel_pmu_restore_all,
@@ -897,6 +898,7 @@ static struct x86_pmu intel_pmu = {
 };
 
 static struct x86_pmu amd_pmu = {
+	.name			= "AMD",
 	.handle_irq		= amd_pmu_handle_irq,
 	.save_disable_all	= amd_pmu_save_disable_all,
 	.restore_all		= amd_pmu_restore_all,
@@ -918,6 +920,7 @@ static int intel_pmu_init(void)
 	union cpuid10_eax eax;
 	unsigned int unused;
 	unsigned int ebx;
+	int version;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return -ENODEV;
@@ -930,16 +933,12 @@ static int intel_pmu_init(void)
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
 		return -ENODEV;
 
-	intel_perfmon_version = eax.split.version_id;
-	if (intel_perfmon_version < 2)
+	version = eax.split.version_id;
+	if (version < 2)
 		return -ENODEV;
 
-	pr_info("Intel Performance Monitoring support detected.\n");
-	pr_info("... version:         %d\n", intel_perfmon_version);
-	pr_info("... bit width:       %d\n", eax.split.bit_width);
-	pr_info("... mask length:     %d\n", eax.split.mask_length);
-
 	x86_pmu = intel_pmu;
+	x86_pmu.version = version;
 	x86_pmu.num_counters = eax.split.num_counters;
 	x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
 	x86_pmu.counter_bits = eax.split.bit_width;
@@ -951,7 +950,6 @@ static int intel_pmu_init(void)
 static int amd_pmu_init(void)
 {
 	x86_pmu = amd_pmu;
-	pr_info("AMD Performance Monitoring support detected.\n");
 	return 0;
 }
 
@@ -972,6 +970,10 @@ void __init init_hw_perf_counters(void)
 	if (err != 0)
 		return;
 
+	pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
+	pr_info("... version:         %d\n", x86_pmu.version);
+	pr_info("... bit width:       %d\n", x86_pmu.counter_bits);
+
 	pr_info("... num counters:    %d\n", x86_pmu.num_counters);
 	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
 		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
-- 
cgit v0.10.2


From bb775fc2d1dcd1aa6eafde37a8289ba2d80783aa Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:14 +0200
Subject: perf_counter, x86: make x86_pmu_read() static inline

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-18-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 15d2c03..3f3ae47 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1002,7 +1002,7 @@ void __init init_hw_perf_counters(void)
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-static void x86_pmu_read(struct perf_counter *counter)
+static inline void x86_pmu_read(struct perf_counter *counter)
 {
 	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 }
-- 
cgit v0.10.2


From 93904966934193204ad08e951f806d5631c29eb3 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:15 +0200
Subject: perf_counter, x86: rename cpuc->active_mask

This is to have a consistent naming scheme with cpuc->used.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-19-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3f3ae47..9ec51a6 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -29,9 +29,9 @@ static u64 perf_counter_mask __read_mostly;
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	u64			throttle_ctrl;
-	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	int			enabled;
 };
 
@@ -334,7 +334,7 @@ static u64 amd_pmu_save_disable_all(void)
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
-		if (!test_bit(idx, cpuc->active_mask))
+		if (!test_bit(idx, cpuc->active))
 			continue;
 		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
 		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
@@ -376,7 +376,7 @@ static void amd_pmu_restore_all(u64 ctrl)
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
-		if (!test_bit(idx, cpuc->active_mask))
+		if (!test_bit(idx, cpuc->active))
 			continue;
 		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
 		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
@@ -424,7 +424,7 @@ static void amd_pmu_enable_counter(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
-	set_bit(idx, cpuc->active_mask);
+	set_bit(idx, cpuc->active);
 	if (cpuc->enabled)
 		config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 
@@ -448,7 +448,7 @@ static void amd_pmu_disable_counter(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
-	clear_bit(idx, cpuc->active_mask);
+	clear_bit(idx, cpuc->active);
 	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
 
 }
-- 
cgit v0.10.2


From 095342389e2ed8deed07b3076f990260ce3c7c9f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:16 +0200
Subject: perf_counter, x86: generic use of cpuc->active

cpuc->active will now be used to indicate an enabled counter which
implies also valid pointers of cpuc->counters[]. In contrast,
cpuc->used only locks the counter, but it can be still uninitialized.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-20-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9ec51a6..f7fd4a3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -424,7 +424,6 @@ static void amd_pmu_enable_counter(int idx, u64 config)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 
-	set_bit(idx, cpuc->active);
 	if (cpuc->enabled)
 		config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 
@@ -446,9 +445,6 @@ static void intel_pmu_disable_counter(int idx, u64 config)
 
 static void amd_pmu_disable_counter(int idx, u64 config)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	clear_bit(idx, cpuc->active);
 	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
 
 }
@@ -633,10 +629,7 @@ try_generic:
 	__x86_pmu_disable(counter, hwc, idx);
 
 	cpuc->counters[idx] = counter;
-	/*
-	 * Make it visible before enabling the hw:
-	 */
-	barrier();
+	set_bit(idx, cpuc->active);
 
 	x86_perf_counter_set_period(counter, hwc, idx);
 	__x86_pmu_enable(counter, hwc, idx);
@@ -700,10 +693,13 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
+	/*
+	 * Must be done before we disable, otherwise the nmi handler
+	 * could reenable again:
+	 */
+	clear_bit(idx, cpuc->active);
 	__x86_pmu_disable(counter, hwc, idx);
 
-	clear_bit(idx, cpuc->used);
-	cpuc->counters[idx] = NULL;
 	/*
 	 * Make sure the cleared pointer becomes visible before we
 	 * (potentially) free the counter:
@@ -715,6 +711,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	 * that we are disabling:
 	 */
 	x86_perf_counter_update(counter, hwc, idx);
+	cpuc->counters[idx] = NULL;
+	clear_bit(idx, cpuc->used);
 }
 
 /*
@@ -763,7 +761,7 @@ again:
 		struct perf_counter *counter = cpuc->counters[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
-		if (!counter)
+		if (!test_bit(bit, cpuc->active))
 			continue;
 
 		intel_pmu_save_and_restart(counter);
-- 
cgit v0.10.2


From 6f00cada07bb5da7f751929d3173494dcc5446cc Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:17 +0200
Subject: perf_counter, x86: consistent use of type int for counter index

The type of counter index is sometimes implemented as unsigned
int. This patch changes this to have a consistent usage of int.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-21-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f7fd4a3..d8beebe 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -459,7 +459,7 @@ static void hw_perf_disable(int idx, u64 config)
 
 static inline void
 __pmc_fixed_disable(struct perf_counter *counter,
-		    struct hw_perf_counter *hwc, unsigned int __idx)
+		    struct hw_perf_counter *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, mask;
@@ -474,7 +474,7 @@ __pmc_fixed_disable(struct perf_counter *counter,
 
 static inline void
 __x86_pmu_disable(struct perf_counter *counter,
-		  struct hw_perf_counter *hwc, unsigned int idx)
+		  struct hw_perf_counter *hwc, int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_disable(counter, hwc, idx);
@@ -523,7 +523,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 
 static inline void
 __pmc_fixed_enable(struct perf_counter *counter,
-		   struct hw_perf_counter *hwc, unsigned int __idx)
+		   struct hw_perf_counter *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, bits, mask;
@@ -691,7 +691,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
-	unsigned int idx = hwc->idx;
+	int idx = hwc->idx;
 
 	/*
 	 * Must be done before we disable, otherwise the nmi handler
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c3db52d..41aed42 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -318,7 +318,7 @@ struct hw_perf_counter {
 			unsigned long			config_base;
 			unsigned long			counter_base;
 			int				nmi;
-			unsigned int			idx;
+			int				idx;
 		};
 		union { /* software */
 			atomic64_t			count;
-- 
cgit v0.10.2


From 7c90cc45f89af4dd4617f97d452740ad95b800d5 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:18 +0200
Subject: perf_counter, x86: rework counter enable functions

There is vendor specific code in generic x86 code, and there is vendor
specific code that could be generic. This patch introduces
x86_pmu_enable_counter() for x86 generic code. Fixed counter code for
Intel is moved to Intel only functions. In the end, checks and calls
via function pointers were reduced to the necessary. Also, the
internal function i/f changed.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-22-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d8beebe..ae55933 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -44,7 +44,7 @@ struct x86_pmu {
 	int		(*handle_irq)(struct pt_regs *, int);
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
-	void		(*enable)(int, u64);
+	void		(*enable)(struct hw_perf_counter *, int);
 	void		(*disable)(int, u64);
 	unsigned	eventsel;
 	unsigned	perfctr;
@@ -414,28 +414,15 @@ static inline void intel_pmu_ack_status(u64 ack)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static void intel_pmu_enable_counter(int idx, u64 config)
+static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 {
-	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
-			config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
-
-static void amd_pmu_enable_counter(int idx, u64 config)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
-}
+	int err;
 
-static void hw_perf_enable(int idx, u64 config)
-{
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	x86_pmu.enable(idx, config);
+	err = checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
 }
 
 static void intel_pmu_disable_counter(int idx, u64 config)
@@ -522,8 +509,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 }
 
 static inline void
-__pmc_fixed_enable(struct perf_counter *counter,
-		   struct hw_perf_counter *hwc, int __idx)
+intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, bits, mask;
@@ -548,14 +534,24 @@ __pmc_fixed_enable(struct perf_counter *counter,
 	err = checking_wrmsrl(hwc->config_base, ctrl_val);
 }
 
-static void
-__x86_pmu_enable(struct perf_counter *counter,
-		 struct hw_perf_counter *hwc, int idx)
+static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 {
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
-		__pmc_fixed_enable(counter, hwc, idx);
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_enable_counter(hwc, idx);
+}
+
+static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+	if (cpuc->enabled)
+		x86_pmu_enable_counter(hwc, idx);
 	else
-		hw_perf_enable(idx, hwc->config);
+		amd_pmu_disable_counter(idx, hwc->config);
 }
 
 static int
@@ -632,7 +628,7 @@ try_generic:
 	set_bit(idx, cpuc->active);
 
 	x86_perf_counter_set_period(counter, hwc, idx);
-	__x86_pmu_enable(counter, hwc, idx);
+	x86_pmu.enable(hwc, idx);
 
 	return 0;
 }
@@ -728,7 +724,7 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
 	x86_perf_counter_set_period(counter, hwc, idx);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		__x86_pmu_enable(counter, hwc, idx);
+		intel_pmu_enable_counter(hwc, idx);
 }
 
 /*
-- 
cgit v0.10.2


From d43698918bd46c71d494555fb92195fbea1fcb6c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:19 +0200
Subject: perf_counter, x86: rework counter disable functions

As for the enable function, this patch reworks the disable functions
and introduces x86_pmu_disable_counter(). The internal function i/f in
struct x86_pmu changed too.

[ Impact: refactor and generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-23-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ae55933..df9012b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -45,7 +45,7 @@ struct x86_pmu {
 	u64		(*save_disable_all)(void);
 	void		(*restore_all)(u64);
 	void		(*enable)(struct hw_perf_counter *, int);
-	void		(*disable)(int, u64);
+	void		(*disable)(struct hw_perf_counter *, int);
 	unsigned	eventsel;
 	unsigned	perfctr;
 	u64		(*event_map)(int);
@@ -425,28 +425,19 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
 }
 
-static void intel_pmu_disable_counter(int idx, u64 config)
+static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 {
-	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
-}
-
-static void amd_pmu_disable_counter(int idx, u64 config)
-{
-	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
-
-}
+	int err;
 
-static void hw_perf_disable(int idx, u64 config)
-{
 	if (unlikely(!perf_counters_initialized))
 		return;
 
-	x86_pmu.disable(idx, config);
+	err = checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config);
 }
 
 static inline void
-__pmc_fixed_disable(struct perf_counter *counter,
-		    struct hw_perf_counter *hwc, int __idx)
+intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, mask;
@@ -460,13 +451,20 @@ __pmc_fixed_disable(struct perf_counter *counter,
 }
 
 static inline void
-__x86_pmu_disable(struct perf_counter *counter,
-		  struct hw_perf_counter *hwc, int idx)
+intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 {
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
-		__pmc_fixed_disable(counter, hwc, idx);
-	else
-		hw_perf_disable(idx, hwc->config);
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_disable_counter(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+{
+	x86_pmu_disable_counter(hwc, idx);
 }
 
 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
@@ -551,7 +549,7 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 	if (cpuc->enabled)
 		x86_pmu_enable_counter(hwc, idx);
 	else
-		amd_pmu_disable_counter(idx, hwc->config);
+		x86_pmu_disable_counter(hwc, idx);
 }
 
 static int
@@ -622,7 +620,7 @@ try_generic:
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__x86_pmu_disable(counter, hwc, idx);
+	x86_pmu.disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
 	set_bit(idx, cpuc->active);
@@ -694,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	 * could reenable again:
 	 */
 	clear_bit(idx, cpuc->active);
-	__x86_pmu_disable(counter, hwc, idx);
+	x86_pmu.disable(hwc, idx);
 
 	/*
 	 * Make sure the cleared pointer becomes visible before we
@@ -762,7 +760,7 @@ again:
 
 		intel_pmu_save_and_restart(counter);
 		if (perf_counter_overflow(counter, nmi, regs, 0))
-			__x86_pmu_disable(counter, &counter->hw, bit);
+			intel_pmu_disable_counter(&counter->hw, bit);
 	}
 
 	intel_pmu_ack_status(ack);
-- 
cgit v0.10.2


From 85cf9dba92152bb4edec118b2f4f0be1ae7fdcab Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:20 +0200
Subject: perf_counter, x86: change and remove pmu initialization checks

Some functions are only called if the pmu was proper initialized. That
initalization checks can be removed. The way to check initialization
changed too. Now, the pointer to the interrupt handler is checked. If
it exists the pmu is initialized. This also removes a static variable
and uses struct x86_pmu as only data source for the check.

[ Impact: simplify code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-24-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index df9012b..2d3681b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -23,7 +23,6 @@
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
 
-static bool perf_counters_initialized __read_mostly;
 static u64 perf_counter_mask __read_mostly;
 
 struct cpu_hw_counters {
@@ -227,6 +226,11 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
 	}
 }
 
+static inline int x86_pmu_initialized(void)
+{
+	return x86_pmu.handle_irq != NULL;
+}
+
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
@@ -240,8 +244,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 		return -ENOSYS;
 
-	if (unlikely(!perf_counters_initialized))
-		return -EINVAL;
+	if (!x86_pmu_initialized())
+		return -ENODEV;
 
 	err = 0;
 	if (atomic_inc_not_zero(&num_counters)) {
@@ -348,9 +352,8 @@ static u64 amd_pmu_save_disable_all(void)
 
 u64 hw_perf_save_disable(void)
 {
-	if (unlikely(!perf_counters_initialized))
+	if (!x86_pmu_initialized())
 		return 0;
-
 	return x86_pmu.save_disable_all();
 }
 /*
@@ -388,9 +391,8 @@ static void amd_pmu_restore_all(u64 ctrl)
 
 void hw_perf_restore(u64 ctrl)
 {
-	if (unlikely(!perf_counters_initialized))
+	if (!x86_pmu_initialized())
 		return;
-
 	x86_pmu.restore_all(ctrl);
 }
 /*
@@ -402,8 +404,6 @@ static inline u64 intel_pmu_get_status(u64 mask)
 {
 	u64 status;
 
-	if (unlikely(!perf_counters_initialized))
-		return 0;
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 
 	return status;
@@ -417,10 +417,6 @@ static inline void intel_pmu_ack_status(u64 ack)
 static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 {
 	int err;
-
-	if (unlikely(!perf_counters_initialized))
-		return;
-
 	err = checking_wrmsrl(hwc->config_base + idx,
 			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
 }
@@ -428,10 +424,6 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 {
 	int err;
-
-	if (unlikely(!perf_counters_initialized))
-		return;
-
 	err = checking_wrmsrl(hwc->config_base + idx,
 			      hwc->config);
 }
@@ -787,10 +779,10 @@ void perf_counter_unthrottle(void)
 {
 	struct cpu_hw_counters *cpuc;
 
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+	if (!x86_pmu_initialized())
 		return;
 
-	if (unlikely(!perf_counters_initialized))
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -829,8 +821,9 @@ void perf_counters_lapic_init(int nmi)
 {
 	u32 apic_val;
 
-	if (!perf_counters_initialized)
+	if (!x86_pmu_initialized())
 		return;
+
 	/*
 	 * Enable the performance counter vector in the APIC LVT:
 	 */
@@ -988,7 +981,6 @@ void __init init_hw_perf_counters(void)
 		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
 	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
-	perf_counters_initialized = true;
 
 	perf_counters_lapic_init(0);
 	register_die_notifier(&perf_counter_nmi_notifier);
-- 
cgit v0.10.2


From a29aa8a7ff93e4196d558036928597e68337dd8d Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:21 +0200
Subject: perf_counter, x86: implement the interrupt handler for AMD cpus

This patch implements the interrupt handler for AMD performance
counters. In difference to the Intel pmu, there is no single status
register and also there are no fixed counters. This makes the handler
very different and it is useful to make the handler vendor
specific. To check if a counter is overflowed the upper bit of the
counter is checked. Only counters where the active bit is set are
checked.

With this patch throttling is enabled for AMD performance counters.

This patch also reenables Linux performance counters on AMD cpus.

[ Impact: re-enable perfcounters on AMD CPUs ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-25-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2d3681b..f4d59d4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -240,10 +240,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	int err;
 
-	/* disable temporarily */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		return -ENOSYS;
-
 	if (!x86_pmu_initialized())
 		return -ENODEV;
 
@@ -773,7 +769,43 @@ out:
 	return ret;
 }
 
-static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; }
+static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
+{
+	int cpu = smp_processor_id();
+	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
+	u64 val;
+	int handled = 0;
+	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
+	int idx;
+
+	++cpuc->interrupts;
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		if (!test_bit(idx, cpuc->active))
+			continue;
+		counter = cpuc->counters[idx];
+		hwc = &counter->hw;
+		x86_perf_counter_update(counter, hwc, idx);
+		val = atomic64_read(&hwc->prev_count);
+		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+			continue;
+		/* counter overflow */
+		x86_perf_counter_set_period(counter, hwc, idx);
+		handled = 1;
+		inc_irq_stat(apic_perf_irqs);
+		if (perf_counter_overflow(counter, nmi, regs, 0))
+			amd_pmu_disable_counter(hwc, idx);
+		else if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
+			/*
+			 * do not reenable when throttled, but reload
+			 * the register
+			 */
+			amd_pmu_disable_counter(hwc, idx);
+		else if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+			amd_pmu_enable_counter(hwc, idx);
+	}
+	return handled;
+}
 
 void perf_counter_unthrottle(void)
 {
@@ -782,9 +814,6 @@ void perf_counter_unthrottle(void)
 	if (!x86_pmu_initialized())
 		return;
 
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
-		return;
-
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
 		if (printk_ratelimit())
-- 
cgit v0.10.2


From 4b7bfd0d276da3a006d37e85d3cf900d7a14ae2a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:22 +0200
Subject: perf_counter, x86: return raw count with x86_perf_counter_update()

To check on AMD cpus if a counter overflows, the upper bit of the raw
counter value must be checked. This value is already internally
available in x86_perf_counter_update(). Now, the value is returned so
that it can be used directly to check for overflows.

[ Impact: micro-optimization ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-26-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f4d59d4..a8a53ab 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -132,7 +132,7 @@ static u64 amd_pmu_raw_event(u64 event)
  * Can only be executed on the CPU where the counter is active.
  * Returns the delta events processed.
  */
-static void
+static u64
 x86_perf_counter_update(struct perf_counter *counter,
 			struct hw_perf_counter *hwc, int idx)
 {
@@ -165,6 +165,8 @@ again:
 
 	atomic64_add(delta, &counter->count);
 	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
 }
 
 static atomic_t num_counters;
@@ -785,8 +787,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 			continue;
 		counter = cpuc->counters[idx];
 		hwc = &counter->hw;
-		x86_perf_counter_update(counter, hwc, idx);
-		val = atomic64_read(&hwc->prev_count);
+		val = x86_perf_counter_update(counter, hwc, idx);
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
 			continue;
 		/* counter overflow */
-- 
cgit v0.10.2


From c619b8ffb1cec6a431687a35695dc6fd292a79e6 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:23 +0200
Subject: perf_counter, x86: introduce max_period variable

In x86 pmus the allowed counter period to programm differs. This
introduces a max_period value and allows the generic implementation
for all models to check the max period.

[ Impact: generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-27-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a8a53ab..4b8715b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -54,6 +54,7 @@ struct x86_pmu {
 	int		num_counters_fixed;
 	int		counter_bits;
 	u64		counter_mask;
+	u64		max_period;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -279,14 +280,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->nmi = 1;
 
 	hwc->irq_period		= hw_event->irq_period;
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic counter period:
-	 */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
-			hwc->irq_period = 0x7FFFFFFF;
+	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
+		hwc->irq_period = x86_pmu.max_period;
 
 	atomic64_set(&hwc->period_left, hwc->irq_period);
 
@@ -910,6 +905,12 @@ static struct x86_pmu intel_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.raw_event		= intel_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic counter period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
 };
 
 static struct x86_pmu amd_pmu = {
@@ -927,6 +928,8 @@ static struct x86_pmu amd_pmu = {
 	.num_counters		= 4,
 	.counter_bits		= 48,
 	.counter_mask		= (1ULL << 48) - 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
 };
 
 static int intel_pmu_init(void)
@@ -999,6 +1002,7 @@ void __init init_hw_perf_counters(void)
 	perf_max_counters = x86_pmu.num_counters;
 
 	pr_info("... value mask:      %016Lx\n", x86_pmu.counter_mask);
+	pr_info("... max period:      %016Lx\n", x86_pmu.max_period);
 
 	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
-- 
cgit v0.10.2


From ef7b3e09ffdcd5200aea9523f6b56d331d1c4fc0 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:24 +0200
Subject: perf_counter, x86: remove vendor check in fixed_mode_idx()

The function fixed_mode_idx() is used generically. Now it checks the
num_counters_fixed value instead of the vendor to decide if fixed
counters are present.

[ Impact: generalize code ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-28-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 4b8715b..d1c8036 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -542,7 +542,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 {
 	unsigned int event;
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+	if (!x86_pmu.num_counters_fixed)
 		return -1;
 
 	if (unlikely(hwc->nmi))
-- 
cgit v0.10.2


From 19d84dab55a383d75c885b5c1a618f5ead96f2f6 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:25 +0200
Subject: perf_counter, x86: remove unused function argument in
 intel_pmu_get_status()

The mask argument is unused and thus can be removed.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-29-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d1c8036..856b0b8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -393,7 +393,7 @@ void hw_perf_restore(u64 ctrl)
  */
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
-static inline u64 intel_pmu_get_status(u64 mask)
+static inline u64 intel_pmu_get_status(void)
 {
 	u64 status;
 
@@ -728,7 +728,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 
 	cpuc->throttle_ctrl = intel_pmu_save_disable_all();
 
-	status = intel_pmu_get_status(cpuc->throttle_ctrl);
+	status = intel_pmu_get_status();
 	if (!status)
 		goto out;
 
@@ -753,7 +753,7 @@ again:
 	/*
 	 * Repeat if there is more work to be done:
 	 */
-	status = intel_pmu_get_status(cpuc->throttle_ctrl);
+	status = intel_pmu_get_status();
 	if (status)
 		goto again;
 out:
-- 
cgit v0.10.2


From 38105f0234d4795c77c7c6845916caf3a395c451 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:26 +0200
Subject: perf_counter: update 'perf top' documentation

The documentation about the perf-top build was outdated after
perfstat has been implemented. This updates it.

[ Impact: update documentation ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-30-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 6a276d2..8d28864 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -3,7 +3,7 @@
 
    Build with:
 
-     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
+     make -C Documentation/perf_counter/
 
    Sample output:
 
-- 
cgit v0.10.2


From 98144511427c192e4249ff66a3f9debc55c59411 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 29 Apr 2009 14:52:50 +0200
Subject: perf_counter: add/update copyrights

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 856b0b8..47e563b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1,10 +1,11 @@
 /*
  * Performance counter x86 architecture code
  *
- *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
- *  Copyright(C) 2009 Jaswinder Singh Rajput
- *  Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *
  *  For licencing details see kernel-base/COPYING
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 582108a..a95a171 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1,9 +1,9 @@
 /*
  * Performance counter core code
  *
- *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
- *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *
  *  For licensing details see kernel-base/COPYING
  */
-- 
cgit v0.10.2


From ab7ef2e50a557af92f4f90689f51fadadafc16b2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 29 Apr 2009 22:38:51 +1000
Subject: perf_counter: powerpc: allow use of limited-function counters

POWER5+ and POWER6 have two hardware counters with limited functionality:
PMC5 counts instructions completed in run state and PMC6 counts cycles
in run state.  (Run state is the state when a hardware RUN bit is 1;
the idle task clears RUN while waiting for work to do and sets it when
there is work to do.)

These counters can't be written to by the kernel, can't generate
interrupts, and don't obey the freeze conditions.  That means we can
only use them for per-task counters (where we know we'll always be in
run state; we can't put a per-task counter on an idle task), and only
if we don't want interrupts and we do want to count in all processor
modes.

Obviously some counters can't go on a limited hardware counter, but there
are also situations where we can only put a counter on a limited hardware
counter - if there are already counters on that exclude some processor
modes and we want to put on a per-task cycle or instruction counter that
doesn't exclude any processor mode, it could go on if it can use a
limited hardware counter.

To keep track of these constraints, this adds a flags argument to the
processor-specific get_alternatives() functions, with three bits defined:
one to say that we can accept alternative event codes that go on limited
counters, one to say we only want alternatives on limited counters, and
one to say that this is a per-task counter and therefore events that are
gated by run state are equivalent to those that aren't (e.g. a "cycles"
event is equivalent to a "cycles in run state" event).  These flags
are computed for each counter and stored in the counter->hw.counter_base
field (slightly wonky name for what it does, but it was an existing
unused field).

Since the limited counters don't freeze when we freeze the other counters,
we need some special handling to avoid getting skew between things counted
on the limited counters and those counted on normal counters.  To minimize
this skew, if we are using any limited counters, we read PMC5 and PMC6
immediately after setting and clearing the freeze bit.  This is done in
a single asm in the new write_mmcr0() function.

The code here is specific to PMC5 and PMC6 being the limited hardware
counters.  Being more general (e.g. having a bitmap of limited hardware
counter numbers) would have meant more complex code to read the limited
counters when freezing and unfreezing the normal counters, with
conditional branches, which would have increased the skew.  Since it
isn't necessary for the code to be more general at this stage, it isn't.

This also extends the back-ends for POWER5+ and POWER6 to be able to
handle up to 6 counters rather than the 4 they previously handled.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index 9d7ff6d..56d66c3 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -12,6 +12,7 @@
 
 #define MAX_HWCOUNTERS		8
 #define MAX_EVENT_ALTERNATIVES	8
+#define MAX_LIMITED_HWCOUNTERS	2
 
 /*
  * This struct provides the constants and functions needed to
@@ -25,8 +26,11 @@ struct power_pmu {
 	int	(*compute_mmcr)(unsigned int events[], int n_ev,
 				unsigned int hwc[], u64 mmcr[]);
 	int	(*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
-	int	(*get_alternatives)(unsigned int event, unsigned int alt[]);
+	int	(*get_alternatives)(unsigned int event, unsigned int flags,
+				    unsigned int alt[]);
 	void	(*disable_pmc)(unsigned int pmc, u64 mmcr[]);
+	int	(*limited_pmc_event)(unsigned int event);
+	int	limited_pmc5_6;	/* PMC5 and PMC6 have limited function */
 	int	n_generic;
 	int	*generic_events;
 };
@@ -34,6 +38,13 @@ struct power_pmu {
 extern struct power_pmu *ppmu;
 
 /*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
+
+/*
  * The power_pmu.get_constraint function returns a 64-bit value and
  * a 64-bit mask that express the constraints between this event and
  * other events.
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d9bbe5e..15cdc8e 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -23,10 +23,14 @@ struct cpu_hw_counters {
 	int n_percpu;
 	int disabled;
 	int n_added;
+	int n_limited;
+	u8  pmcs_enabled;
 	struct perf_counter *counter[MAX_HWCOUNTERS];
 	unsigned int events[MAX_HWCOUNTERS];
+	unsigned int flags[MAX_HWCOUNTERS];
 	u64 mmcr[3];
-	u8 pmcs_enabled;
+	struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
+	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
 };
 DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
@@ -127,7 +131,8 @@ static void write_pmc(int idx, unsigned long val)
  * and see if any combination of alternative codes is feasible.
  * The feasible set is returned in event[].
  */
-static int power_check_constraints(unsigned int event[], int n_ev)
+static int power_check_constraints(unsigned int event[], unsigned int cflags[],
+				   int n_ev)
 {
 	u64 mask, value, nv;
 	unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
@@ -144,11 +149,15 @@ static int power_check_constraints(unsigned int event[], int n_ev)
 
 	/* First see if the events will go on as-is */
 	for (i = 0; i < n_ev; ++i) {
-		alternatives[i][0] = event[i];
+		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+		    && !ppmu->limited_pmc_event(event[i])) {
+			ppmu->get_alternatives(event[i], cflags[i],
+					       alternatives[i]);
+			event[i] = alternatives[i][0];
+		}
 		if (ppmu->get_constraint(event[i], &amasks[i][0],
 					 &avalues[i][0]))
 			return -1;
-		choice[i] = 0;
 	}
 	value = mask = 0;
 	for (i = 0; i < n_ev; ++i) {
@@ -166,7 +175,9 @@ static int power_check_constraints(unsigned int event[], int n_ev)
 	if (!ppmu->get_alternatives)
 		return -1;
 	for (i = 0; i < n_ev; ++i) {
-		n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
+		choice[i] = 0;
+		n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
+						  alternatives[i]);
 		for (j = 1; j < n_alt[i]; ++j)
 			ppmu->get_constraint(alternatives[i][j],
 					     &amasks[i][j], &avalues[i][j]);
@@ -231,28 +242,41 @@ static int power_check_constraints(unsigned int event[], int n_ev)
  * exclude_{user,kernel,hv} with each other and any previously
  * added counters.
  */
-static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
+static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
+			  int n_prev, int n_new)
 {
-	int eu, ek, eh;
-	int i, n;
+	int eu = 0, ek = 0, eh = 0;
+	int i, n, first;
 	struct perf_counter *counter;
 
 	n = n_prev + n_new;
 	if (n <= 1)
 		return 0;
 
-	eu = ctrs[0]->hw_event.exclude_user;
-	ek = ctrs[0]->hw_event.exclude_kernel;
-	eh = ctrs[0]->hw_event.exclude_hv;
-	if (n_prev == 0)
-		n_prev = 1;
-	for (i = n_prev; i < n; ++i) {
+	first = 1;
+	for (i = 0; i < n; ++i) {
+		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+			continue;
+		}
 		counter = ctrs[i];
-		if (counter->hw_event.exclude_user != eu ||
-		    counter->hw_event.exclude_kernel != ek ||
-		    counter->hw_event.exclude_hv != eh)
+		if (first) {
+			eu = counter->hw_event.exclude_user;
+			ek = counter->hw_event.exclude_kernel;
+			eh = counter->hw_event.exclude_hv;
+			first = 0;
+		} else if (counter->hw_event.exclude_user != eu ||
+			   counter->hw_event.exclude_kernel != ek ||
+			   counter->hw_event.exclude_hv != eh) {
 			return -EAGAIN;
+		}
 	}
+
+	if (eu || ek || eh)
+		for (i = 0; i < n; ++i)
+			if (cflags[i] & PPMU_LIMITED_PMC_OK)
+				cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
 	return 0;
 }
 
@@ -280,6 +304,85 @@ static void power_pmu_read(struct perf_counter *counter)
 }
 
 /*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts.  This tells
+ * us if `counter' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+	return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
+				    unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_counter *counter;
+	u64 val, prev, delta;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		counter = cpuhw->limited_counter[i];
+		if (!counter->hw.idx)
+			continue;
+		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
+		prev = atomic64_read(&counter->hw.prev_count);
+		counter->hw.idx = 0;
+		delta = (val - prev) & 0xfffffffful;
+		atomic64_add(delta, &counter->count);
+	}
+}
+
+static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
+				  unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_counter *counter;
+	u64 val;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		counter = cpuhw->limited_counter[i];
+		counter->hw.idx = cpuhw->limited_hwidx[i];
+		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
+		atomic64_set(&counter->hw.prev_count, val);
+		perf_counter_update_userpage(counter);
+	}
+}
+
+/*
+ * Since limited counters don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other counters.  We try to keep the values from the limited
+ * counters as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited counters as small and consistent as possible.
+ * Therefore, if any limited counters are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
+{
+	unsigned long pmc5, pmc6;
+
+	if (!cpuhw->n_limited) {
+		mtspr(SPRN_MMCR0, mmcr0);
+		return;
+	}
+
+	/*
+	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 */
+	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+		     : "=&r" (pmc5), "=&r" (pmc6)
+		     : "r" (mmcr0), "i" (SPRN_MMCR0),
+		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+	if (mmcr0 & MMCR0_FC)
+		freeze_limited_counters(cpuhw, pmc5, pmc6);
+	else
+		thaw_limited_counters(cpuhw, pmc5, pmc6);
+}
+
+/*
  * Disable all counters to prevent PMU interrupts and to allow
  * counters to be added or removed.
  */
@@ -321,7 +424,7 @@ u64 hw_perf_save_disable(void)
 		 * executed and the PMU has frozen the counters
 		 * before we return.
 		 */
-		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
 		mb();
 	}
 	local_irq_restore(flags);
@@ -342,6 +445,8 @@ void hw_perf_restore(u64 disable)
 	unsigned long val;
 	s64 left;
 	unsigned int hwc_index[MAX_HWCOUNTERS];
+	int n_lim;
+	int idx;
 
 	if (disable)
 		return;
@@ -414,10 +519,18 @@ void hw_perf_restore(u64 disable)
 	/*
 	 * Initialize the PMCs for all the new and moved counters.
 	 */
+	cpuhw->n_limited = n_lim = 0;
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
 		if (counter->hw.idx)
 			continue;
+		idx = hwc_index[i] + 1;
+		if (is_limited_pmc(idx)) {
+			cpuhw->limited_counter[n_lim] = counter;
+			cpuhw->limited_hwidx[n_lim] = idx;
+			++n_lim;
+			continue;
+		}
 		val = 0;
 		if (counter->hw_event.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
@@ -425,15 +538,16 @@ void hw_perf_restore(u64 disable)
 				val = 0x80000000L - left;
 		}
 		atomic64_set(&counter->hw.prev_count, val);
-		counter->hw.idx = hwc_index[i] + 1;
-		write_pmc(counter->hw.idx, val);
+		counter->hw.idx = idx;
+		write_pmc(idx, val);
 		perf_counter_update_userpage(counter);
 	}
+	cpuhw->n_limited = n_lim;
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
 
  out_enable:
 	mb();
-	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
 
 	/*
 	 * Enable instruction sampling if necessary
@@ -448,7 +562,8 @@ void hw_perf_restore(u64 disable)
 }
 
 static int collect_events(struct perf_counter *group, int max_count,
-			  struct perf_counter *ctrs[], unsigned int *events)
+			  struct perf_counter *ctrs[], unsigned int *events,
+			  unsigned int *flags)
 {
 	int n = 0;
 	struct perf_counter *counter;
@@ -457,6 +572,7 @@ static int collect_events(struct perf_counter *group, int max_count,
 		if (n >= max_count)
 			return -1;
 		ctrs[n] = group;
+		flags[n] = group->hw.counter_base;
 		events[n++] = group->hw.config;
 	}
 	list_for_each_entry(counter, &group->sibling_list, list_entry) {
@@ -465,6 +581,7 @@ static int collect_events(struct perf_counter *group, int max_count,
 			if (n >= max_count)
 				return -1;
 			ctrs[n] = counter;
+			flags[n] = counter->hw.counter_base;
 			events[n++] = counter->hw.config;
 		}
 	}
@@ -497,12 +614,14 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	n0 = cpuhw->n_counters;
 	n = collect_events(group_leader, ppmu->n_counter - n0,
-			   &cpuhw->counter[n0], &cpuhw->events[n0]);
+			   &cpuhw->counter[n0], &cpuhw->events[n0],
+			   &cpuhw->flags[n0]);
 	if (n < 0)
 		return -EAGAIN;
-	if (check_excludes(cpuhw->counter, n0, n))
+	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
 		return -EAGAIN;
-	if (power_check_constraints(cpuhw->events, n + n0))
+	i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
+	if (i < 0)
 		return -EAGAIN;
 	cpuhw->n_counters = n0 + n;
 	cpuhw->n_added += n;
@@ -554,9 +673,10 @@ static int power_pmu_enable(struct perf_counter *counter)
 		goto out;
 	cpuhw->counter[n0] = counter;
 	cpuhw->events[n0] = counter->hw.config;
-	if (check_excludes(cpuhw->counter, n0, 1))
+	cpuhw->flags[n0] = counter->hw.counter_base;
+	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
 		goto out;
-	if (power_check_constraints(cpuhw->events, n0 + 1))
+	if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
 		goto out;
 
 	counter->hw.config = cpuhw->events[n0];
@@ -592,12 +712,24 @@ static void power_pmu_disable(struct perf_counter *counter)
 				cpuhw->counter[i-1] = cpuhw->counter[i];
 			--cpuhw->n_counters;
 			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
-			write_pmc(counter->hw.idx, 0);
-			counter->hw.idx = 0;
+			if (counter->hw.idx) {
+				write_pmc(counter->hw.idx, 0);
+				counter->hw.idx = 0;
+			}
 			perf_counter_update_userpage(counter);
 			break;
 		}
 	}
+	for (i = 0; i < cpuhw->n_limited; ++i)
+		if (counter == cpuhw->limited_counter[i])
+			break;
+	if (i < cpuhw->n_limited) {
+		while (++i < cpuhw->n_limited) {
+			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
+			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+		}
+		--cpuhw->n_limited;
+	}
 	if (cpuhw->n_counters == 0) {
 		/* disable exceptions if no counters are running */
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
@@ -613,6 +745,61 @@ struct pmu power_pmu = {
 	.read		= power_pmu_read,
 };
 
+/*
+ * Return 1 if we might be able to put counter on a limited PMC,
+ * or 0 if not.
+ * A counter can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev,
+				 unsigned int flags)
+{
+	int n;
+	unsigned int alt[MAX_EVENT_ALTERNATIVES];
+
+	if (counter->hw_event.exclude_user
+	    || counter->hw_event.exclude_kernel
+	    || counter->hw_event.exclude_hv
+	    || counter->hw_event.irq_period)
+		return 0;
+
+	if (ppmu->limited_pmc_event(ev))
+		return 1;
+
+	/*
+	 * The requested event isn't on a limited PMC already;
+	 * see if any alternative code goes on a limited PMC.
+	 */
+	if (!ppmu->get_alternatives)
+		return 0;
+
+	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (n)
+		return alt[0];
+
+	return 0;
+}
+
+/*
+ * Find an alternative event that goes on a normal PMC, if possible,
+ * and return the event code, or 0 if there is no such alternative.
+ * (Note: event code 0 is "don't count" on all machines.)
+ */
+static unsigned long normal_pmc_alternative(unsigned long ev,
+					    unsigned long flags)
+{
+	unsigned int alt[MAX_EVENT_ALTERNATIVES];
+	int n;
+
+	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (!n)
+		return 0;
+	return alt[0];
+}
+
 /* Number of perf_counters counting hardware events */
 static atomic_t num_counters;
 /* Used to avoid races in calling reserve/release_pmc_hardware */
@@ -633,9 +820,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
 
 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
-	unsigned long ev;
+	unsigned long ev, flags;
 	struct perf_counter *ctrs[MAX_HWCOUNTERS];
 	unsigned int events[MAX_HWCOUNTERS];
+	unsigned int cflags[MAX_HWCOUNTERS];
 	int n;
 	int err;
 
@@ -661,7 +849,36 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 	 */
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		counter->hw_event.exclude_hv = 0;
-	
+
+	/*
+	 * If this is a per-task counter, then we can use
+	 * PM_RUN_* events interchangeably with their non RUN_*
+	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+	 * XXX we should check if the task is an idle task.
+	 */
+	flags = 0;
+	if (counter->ctx->task)
+		flags |= PPMU_ONLY_COUNT_RUN;
+
+	/*
+	 * If this machine has limited counters, check whether this
+	 * event could go on a limited counter.
+	 */
+	if (ppmu->limited_pmc5_6) {
+		if (can_go_on_limited_pmc(counter, ev, flags)) {
+			flags |= PPMU_LIMITED_PMC_OK;
+		} else if (ppmu->limited_pmc_event(ev)) {
+			/*
+			 * The requested event is on a limited PMC,
+			 * but we can't use a limited PMC; see if any
+			 * alternative goes on a normal PMC.
+			 */
+			ev = normal_pmc_alternative(ev, flags);
+			if (!ev)
+				return ERR_PTR(-EINVAL);
+		}
+	}
+
 	/*
 	 * If this is in a group, check if it can go on with all the
 	 * other hardware counters in the group.  We assume the counter
@@ -670,18 +887,20 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 	n = 0;
 	if (counter->group_leader != counter) {
 		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
-				   ctrs, events);
+				   ctrs, events, cflags);
 		if (n < 0)
 			return ERR_PTR(-EINVAL);
 	}
 	events[n] = ev;
 	ctrs[n] = counter;
-	if (check_excludes(ctrs, n, 1))
+	cflags[n] = flags;
+	if (check_excludes(ctrs, cflags, n, 1))
 		return ERR_PTR(-EINVAL);
-	if (power_check_constraints(events, n + 1))
+	if (power_check_constraints(events, cflags, n + 1))
 		return ERR_PTR(-EINVAL);
 
 	counter->hw.config = events[n];
+	counter->hw.counter_base = cflags[n];
 	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
 
 	/*
@@ -763,6 +982,10 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	int found = 0;
 	int nmi;
 
+	if (cpuhw->n_limited)
+		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
+					mfspr(SPRN_PMC6));
+
 	/*
 	 * If interrupts were soft-disabled when this PMU interrupt
 	 * occurred, treat it as an NMI.
@@ -775,6 +998,8 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
+		if (is_limited_pmc(counter->hw.idx))
+			continue;
 		val = read_pmc(counter->hw.idx);
 		if ((int)val < 0) {
 			/* counter has overflowed */
@@ -791,6 +1016,8 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	 */
 	if (!found) {
 		for (i = 0; i < ppmu->n_counter; ++i) {
+			if (is_limited_pmc(i + 1))
+				continue;
 			val = read_pmc(i + 1);
 			if ((int)val < 0)
 				write_pmc(i + 1, 0);
@@ -804,7 +1031,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	 * XXX might want to use MSR.PM to keep the counters frozen until
 	 * we get back out of this interrupt.
 	 */
-	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
 
 	if (nmi)
 		nmi_exit();
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 1407b19..744a275 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -320,7 +320,8 @@ static unsigned int ppc_inst_cmpl[] = {
 	0x1001, 0x4001, 0x6001, 0x7001, 0x8001
 };
 
-static int p4_get_alternatives(unsigned int event, unsigned int alt[])
+static int p4_get_alternatives(unsigned int event, unsigned int flags,
+			       unsigned int alt[])
 {
 	int i, j, na;
 
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 1222c8e..8154eaa 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -78,8 +78,8 @@
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
  * 3210987654321098765432109876543210987654321098765432109876543210
- *             [  ><><>< ><> <><>[  >      <  ><  ><  ><  ><><><><>
- *             NC  G0G1G2 G3 T0T1 UC        B0  B1  B2  B3 P4P3P2P1
+ *             [  ><><>< ><> <><>[  >  <  ><  ><  ><  ><><><><><><>
+ *             NC  G0G1G2 G3 T0T1 UC    B0  B1  B2  B3 P6P5P4P3P2P1
  *
  * NC - number of counters
  *     51: NC error 0x0008_0000_0000_0000
@@ -105,18 +105,18 @@
  *     30: IDU|GRS events needed 0x00_4000_0000
  *
  * B0
- *     20-23: Byte 0 event source 0x00f0_0000
+ *     24-27: Byte 0 event source 0x0f00_0000
  *	      Encoding as for the event code
  *
  * B1, B2, B3
- *     16-19, 12-15, 8-11: Byte 1, 2, 3 event sources
+ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
  *
- * P4
- *     7: P1 error 0x80
- *     6-7: Count of events needing PMC4
+ * P6
+ *     11: P6 error 0x800
+ *     10-11: Count of events needing PMC6
  *
- * P1..P3
- *     0-6: Count of events needing PMC1..PMC3
+ * P1..P5
+ *     0-9: Count of events needing PMC1..PMC5
  */
 
 static const int grsel_shift[8] = {
@@ -143,11 +143,13 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 
 	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
 	if (pmc) {
-		if (pmc > 4)
+		if (pmc > 6)
 			return -1;
 		sh = (pmc - 1) * 2;
 		mask |= 2 << sh;
 		value |= 1 << sh;
+		if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
+			return -1;
 	}
 	if (event & PM_BUSEVENT_MSK) {
 		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
@@ -173,16 +175,26 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
 		}
 		/* Set byte lane select field */
-		mask  |= 0xfULL << (20 - 4 * byte);
-		value |= (u64)unit << (20 - 4 * byte);
+		mask  |= 0xfULL << (24 - 4 * byte);
+		value |= (u64)unit << (24 - 4 * byte);
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000000000000ull;
+		value |= 0x1000000000000ull;
 	}
-	mask  |= 0x8000000000000ull;
-	value |= 0x1000000000000ull;
 	*maskp = mask;
 	*valp = value;
 	return 0;
 }
 
+static int power5p_limited_pmc_event(unsigned int event)
+{
+	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+	return pmc == 5 || pmc == 6;
+}
+
 #define MAX_ALT	3	/* at most 3 alternatives for any event */
 
 static const unsigned int event_alternatives[][MAX_ALT] = {
@@ -193,6 +205,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
 	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
 	{ 0x800c4,  0xc20e0 },			/* PM_DTLB_MISS */
 	{ 0xc50c6,  0xc60e0 },			/* PM_MRK_DTLB_MISS */
+	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
 	{ 0x100009, 0x200009 },			/* PM_INST_CMPL */
 	{ 0x200015, 0x300015 },			/* PM_LSU_LMQ_SRQ_EMPTY_CYC */
 	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
@@ -260,24 +273,85 @@ static int find_alternative_bdecode(unsigned int event)
 	return -1;
 }
 
-static int power5p_get_alternatives(unsigned int event, unsigned int alt[])
+static int power5p_get_alternatives(unsigned int event, unsigned int flags,
+				    unsigned int alt[])
 {
 	int i, j, ae, nalt = 1;
+	int nlim;
 
 	alt[0] = event;
 	nalt = 1;
+	nlim = power5p_limited_pmc_event(event);
 	i = find_alternative(event);
 	if (i >= 0) {
 		for (j = 0; j < MAX_ALT; ++j) {
 			ae = event_alternatives[i][j];
 			if (ae && ae != event)
 				alt[nalt++] = ae;
+			nlim += power5p_limited_pmc_event(ae);
 		}
 	} else {
 		ae = find_alternative_bdecode(event);
 		if (ae > 0)
 			alt[nalt++] = ae;
 	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC
+		 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs (e.g.
+		 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
+		 * Note that even with these additional alternatives
+		 * we never end up with more than 3 alternatives for any event.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0xf:	/* PM_CYC */
+				alt[j++] = 0x600005;	/* PM_RUN_CYC */
+				++nlim;
+				break;
+			case 0x600005:	/* PM_RUN_CYC */
+				alt[j++] = 0xf;
+				break;
+			case 0x100009:	/* PM_INST_CMPL */
+				alt[j++] = 0x500009;	/* PM_RUN_INST_CMPL */
+				++nlim;
+				break;
+			case 0x500009:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 0x100009;	/* PM_INST_CMPL */
+				alt[j++] = 0x200009;
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+		/* remove the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (!power5p_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+		/* remove all but the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (power5p_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	}
+
 	return nalt;
 }
 
@@ -390,7 +464,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
 	unsigned char unituse[16];
 	int ttmuse;
 
-	if (n_ev > 4)
+	if (n_ev > 6)
 		return -1;
 
 	/* First pass to count resource use */
@@ -399,7 +473,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
 	for (i = 0; i < n_ev; ++i) {
 		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
 		if (pmc) {
-			if (pmc > 4)
+			if (pmc > 6)
 				return -1;
 			if (pmc_inuse & (1 << (pmc - 1)))
 				return -1;
@@ -488,13 +562,16 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
 			if (pmc >= 4)
 				return -1;
 			pmc_inuse |= 1 << pmc;
-		} else {
+		} else if (pmc <= 4) {
 			/* Direct event */
 			--pmc;
 			if (isbus && (byte & 2) &&
 			    (psel == 8 || psel == 0x10 || psel == 0x28))
 				/* add events on higher-numbered bus */
 				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		} else {
+			/* Instructions or run cycles on PMC5/6 */
+			--pmc;
 		}
 		if (isbus && unit == PM_GRS) {
 			bit = psel & 7;
@@ -538,7 +615,7 @@ static int power5p_generic_events[] = {
 };
 
 struct power_pmu power5p_pmu = {
-	.n_counter = 4,
+	.n_counter = 6,
 	.max_alternatives = MAX_ALT,
 	.add_fields = 0x7000000000055ull,
 	.test_adder = 0x3000040000000ull,
@@ -548,4 +625,6 @@ struct power_pmu power5p_pmu = {
 	.disable_pmc = power5p_disable_pmc,
 	.n_generic = ARRAY_SIZE(power5p_generic_events),
 	.generic_events = power5p_generic_events,
+	.limited_pmc5_6 = 1,
+	.limited_pmc_event = power5p_limited_pmc_event,
 };
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 116c4bb..6e667dc 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -269,7 +269,8 @@ static int find_alternative_bdecode(unsigned int event)
 	return -1;
 }
 
-static int power5_get_alternatives(unsigned int event, unsigned int alt[])
+static int power5_get_alternatives(unsigned int event, unsigned int flags,
+				   unsigned int alt[])
 {
 	int i, j, ae, nalt = 1;
 
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index fce1fc2..d44049f 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -182,7 +182,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
 	unsigned int ttmset = 0;
 	unsigned int pmc_inuse = 0;
 
-	if (n_ev > 4)
+	if (n_ev > 6)
 		return -1;
 	for (i = 0; i < n_ev; ++i) {
 		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
@@ -202,6 +202,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
 			for (pmc = 0; pmc < 4; ++pmc)
 				if (!(pmc_inuse & (1 << pmc)))
 					break;
+			if (pmc >= 4)
+				return -1;
 			pmc_inuse |= 1 << pmc;
 		}
 		hwc[i] = pmc;
@@ -240,7 +242,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
 		}
 		if (power6_marked_instr_event(event[i]))
 			mmcra |= MMCRA_SAMPLE_ENABLE;
-		mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
+		if (pmc < 4)
+			mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
 	}
 	mmcr[0] = 0;
 	if (pmc_inuse & 1)
@@ -256,19 +259,20 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
  * Layout of constraint bits:
  *
  *	0-1	add field: number of uses of PMC1 (max 1)
- *	2-3, 4-5, 6-7: ditto for PMC2, 3, 4
- *	8-10	select field: nest (subunit) event selector
+ *	2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
+ *	12-15	add field: number of uses of PMC1-4 (max 4)
  *	16-19	select field: unit on byte 0 of event bus
  *	20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ *	32-34	select field: nest (subunit) event selector
  */
 static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 {
-	int pmc, byte, sh;
-	unsigned int mask = 0, value = 0;
+	int pmc, byte, sh, subunit;
+	u64 mask = 0, value = 0;
 
 	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
 	if (pmc) {
-		if (pmc > 4)
+		if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
 			return -1;
 		sh = (pmc - 1) * 2;
 		mask |= 2 << sh;
@@ -276,26 +280,38 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 	}
 	if (event & PM_BUSEVENT_MSK) {
 		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
-		sh = byte * 4;
+		sh = byte * 4 + (16 - PM_UNIT_SH);
 		mask |= PM_UNIT_MSKS << sh;
-		value |= (event & PM_UNIT_MSKS) << sh;
+		value |= (u64)(event & PM_UNIT_MSKS) << sh;
 		if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
-			mask |= PM_SUBUNIT_MSKS;
-			value |= event & PM_SUBUNIT_MSKS;
+			subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+			mask  |= (u64)PM_SUBUNIT_MSK << 32;
+			value |= (u64)subunit << 32;
 		}
 	}
+	if (pmc <= 4) {
+		mask  |= 0x8000;	/* add field for count of PMC1-4 uses */
+		value |= 0x1000;
+	}
 	*maskp = mask;
 	*valp = value;
 	return 0;
 }
 
+static int p6_limited_pmc_event(unsigned int event)
+{
+	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+	return pmc == 5 || pmc == 6;
+}
+
 #define MAX_ALT	4	/* at most 4 alternatives for any event */
 
 static const unsigned int event_alternatives[][MAX_ALT] = {
 	{ 0x0130e8, 0x2000f6, 0x3000fc },	/* PM_PTEG_RELOAD_VALID */
 	{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
 	{ 0x080088, 0x200054, 0x3000f0 },	/* PM_ST_MISS_L1 */
-	{ 0x10000a, 0x2000f4 },			/* PM_RUN_CYC */
+	{ 0x10000a, 0x2000f4, 0x600005 },	/* PM_RUN_CYC */
 	{ 0x10000b, 0x2000f5 },			/* PM_RUN_COUNT */
 	{ 0x10000e, 0x400010 },			/* PM_PURR */
 	{ 0x100010, 0x4000f8 },			/* PM_FLUSH */
@@ -340,13 +356,15 @@ static int find_alternatives_list(unsigned int event)
 	return -1;
 }
 
-static int p6_get_alternatives(unsigned int event, unsigned int alt[])
+static int p6_get_alternatives(unsigned int event, unsigned int flags,
+			       unsigned int alt[])
 {
-	int i, j;
+	int i, j, nlim;
 	unsigned int aevent, psel, pmc;
 	unsigned int nalt = 1;
 
 	alt[0] = event;
+	nlim = p6_limited_pmc_event(event);
 
 	/* check the alternatives table */
 	i = find_alternatives_list(event);
@@ -358,6 +376,7 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
 				break;
 			if (aevent != event)
 				alt[nalt++] = aevent;
+			nlim += p6_limited_pmc_event(aevent);
 		}
 
 	} else {
@@ -375,13 +394,75 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
 				((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
 	}
 
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC,
+		 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs (e.g.
+		 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
+		 * Note that even with these additional alternatives
+		 * we never end up with more than 4 alternatives for any event.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:	/* PM_CYC */
+				alt[j++] = 0x600005;	/* PM_RUN_CYC */
+				++nlim;
+				break;
+			case 0x10000a:	/* PM_RUN_CYC */
+				alt[j++] = 0x1e;	/* PM_CYC */
+				break;
+			case 2:		/* PM_INST_CMPL */
+				alt[j++] = 0x500009;	/* PM_RUN_INST_CMPL */
+				++nlim;
+				break;
+			case 0x500009:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 2;		/* PM_INST_CMPL */
+				break;
+			case 0x10000e:	/* PM_PURR */
+				alt[j++] = 0x4000f4;	/* PM_RUN_PURR */
+				break;
+			case 0x4000f4:	/* PM_RUN_PURR */
+				alt[j++] = 0x10000e;	/* PM_PURR */
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+		/* remove the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (!p6_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+		/* remove all but the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (p6_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	}
+
 	return nalt;
 }
 
 static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
 {
 	/* Set PMCxSEL to 0 to disable PMCx */
-	mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+	if (pmc <= 3)
+		mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
 }
 
 static int power6_generic_events[] = {
@@ -394,14 +475,16 @@ static int power6_generic_events[] = {
 };
 
 struct power_pmu power6_pmu = {
-	.n_counter = 4,
+	.n_counter = 6,
 	.max_alternatives = MAX_ALT,
-	.add_fields = 0x55,
-	.test_adder = 0,
+	.add_fields = 0x1555,
+	.test_adder = 0x3000,
 	.compute_mmcr = p6_compute_mmcr,
 	.get_constraint = p6_get_constraint,
 	.get_alternatives = p6_get_alternatives,
 	.disable_pmc = p6_disable_pmc,
 	.n_generic = ARRAY_SIZE(power6_generic_events),
 	.generic_events = power6_generic_events,
+	.limited_pmc5_6 = 1,
+	.limited_pmc_event = p6_limited_pmc_event,
 };
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index aed8ccd..af2d188 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -243,7 +243,8 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 	return 0;
 }
 
-static int p970_get_alternatives(unsigned int event, unsigned int alt[])
+static int p970_get_alternatives(unsigned int event, unsigned int flags,
+				 unsigned int alt[])
 {
 	alt[0] = event;
 
-- 
cgit v0.10.2


From f25181f598cf4a8ccc40a51d8b74f8b555ecddee Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lmcilroy@redhat.com>
Date: Thu, 23 Apr 2009 22:18:00 -0400
Subject: xfs_file_last_byte() needs to acquire ilock

We had some systems crash with this stack:

[<a00000010000cb20>] ia64_leave_kernel+0x0/0x280
[<a00000021291ca00>] xfs_bmbt_get_startoff+0x0/0x20 [xfs]
[<a0000002129080b0>] xfs_bmap_last_offset+0x210/0x280 [xfs]
[<a00000021295b010>] xfs_file_last_byte+0x70/0x1a0 [xfs]
[<a00000021295b200>] xfs_itruncate_start+0xc0/0x1a0 [xfs]
[<a0000002129935f0>] xfs_inactive_free_eofblocks+0x290/0x460 [xfs]
[<a000000212998fb0>] xfs_release+0x1b0/0x240 [xfs]
[<a0000002129ad930>] xfs_file_release+0x70/0xa0 [xfs]
[<a000000100162ea0>] __fput+0x1a0/0x420
[<a000000100163160>] fput+0x40/0x60

The problem here is that xfs_file_last_byte() does not acquire the
inode lock and can therefore race with another thread that is modifying
the extext list.  While xfs_bmap_last_offset() is trying to lookup
what was the last extent some extents were merged and the extent list
shrunk so the index we lookup is now beyond the end of the extent list
and potentially in a freed buffer.

Signed-off-by: Lachlan McIlroy <lmcilroy@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e7ae08d..123b20c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1258,8 +1258,10 @@ xfs_file_last_byte(
 	 * necessary.
 	 */
 	if (ip->i_df.if_flags & XFS_IFEXTENTS) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
 		error = xfs_bmap_last_offset(NULL, ip, &last_block,
 			XFS_DATA_FORK);
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		if (error) {
 			last_block = 0;
 		}
-- 
cgit v0.10.2


From 2ac00af7a6d2e65013e6f28bd1f37c0cd98ba134 Mon Sep 17 00:00:00 2001
From: Olaf Weber <olaf@sgi.com>
Date: Fri, 17 Apr 2009 16:12:45 -0500
Subject: xfs: add more checks to superblock validation

There had been reports where xfs filesystem was randomly
corrupted with fsfuzzer, and xfs failed to handle it
gracefully. This patch fixes couple of reported problem
by providing additional checks in the superblock
validation routine.

Signed-off-by: Olaf Weber <olaf@sgi.com>
Reviewed-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b101990..65a9972 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -291,14 +291,17 @@ xfs_mount_validate_sb(
 	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
 	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
 	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
+	    sbp->sb_sectsize != (1 << sbp->sb_sectlog)			||
 	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
 	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
 	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
 	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
+	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
 	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
 	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
 	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
 	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
+	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
 	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
-- 
cgit v0.10.2


From b1fca26631f76a5e8b18435a43f5d82b8734da4b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 23 Mar 2009 18:22:09 +0100
Subject: mutex: add atomic_dec_and_mutex_lock()

Much like the atomic_dec_and_lock() function in which we take an hold a
spin_lock if we drop the atomic to 0 this function takes and holds the
mutex if we dec the atomic to 0.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.410913479@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3069ec7..93054fc 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+
 #endif
-- 
cgit v0.10.2


From 4be4a00fb55879ef44b5914c189aecffa828deb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 29 Apr 2009 10:50:48 -0400
Subject: xfs: a couple getbmap cleanups

 - reshuffle various conditionals for data vs attr fork to make the code
   more readable
 - do fine-grainded goto-based error handling
 - exit early from conditionals instead of keeping a long else branch around
 - allow kmem_alloc to fail

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3a6ed42..abe4244 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5880,7 +5880,7 @@ xfs_getbmap(
 	void			*arg)		/* formatter arg */
 {
 	__int64_t		bmvend;		/* last block requested */
-	int			error;		/* return value */
+	int			error = 0;	/* return value */
 	__int64_t		fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
 	int			lock;		/* lock state */
@@ -5899,30 +5899,8 @@ xfs_getbmap(
 
 	mp = ip->i_mount;
 	iflags = bmv->bmv_iflags;
-
 	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
 
-	/*	If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
-	 *	generate a DMAPI read event.  Otherwise, if the DM_EVENT_READ
-	 *	bit is set for the file, generate a read event in order
-	 *	that the DMAPI application may do its thing before we return
-	 *	the extents.  Usually this means restoring user file data to
-	 *	regions of the file that look like holes.
-	 *
-	 *	The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
-	 *	BMV_IF_NO_DMAPI_READ so that read events are generated.
-	 *	If this were not true, callers of ioctl( XFS_IOC_GETBMAP )
-	 *	could misinterpret holes in a DMAPI file as true holes,
-	 *	when in fact they may represent offline user data.
-	 */
-	if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
-	    DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
-	    whichfork == XFS_DATA_FORK) {
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
-		if (error)
-			return XFS_ERROR(error);
-	}
-
 	if (whichfork == XFS_ATTR_FORK) {
 		if (XFS_IFORK_Q(ip)) {
 			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
@@ -5936,11 +5914,37 @@ xfs_getbmap(
 					 ip->i_mount);
 			return XFS_ERROR(EFSCORRUPTED);
 		}
-	} else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		   ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
-		return XFS_ERROR(EINVAL);
-	if (whichfork == XFS_DATA_FORK) {
+
+		prealloced = 0;
+		fixlen = 1LL << 32;
+	} else {
+		/*
+		 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
+		 * not generate a DMAPI read event.  Otherwise, if the
+		 * DM_EVENT_READ bit is set for the file, generate a read
+		 * event in order that the DMAPI application may do its thing
+		 * before we return the extents.  Usually this means restoring
+		 * user file data to regions of the file that look like holes.
+		 *
+		 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
+		 * BMV_IF_NO_DMAPI_READ so that read events are generated.
+		 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
+		 * could misinterpret holes in a DMAPI file as true holes,
+		 * when in fact they may represent offline user data.
+		 */
+		if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
+		    !(iflags & BMV_IF_NO_DMAPI_READ)) {
+			error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
+					      0, 0, 0, NULL);
+			if (error)
+				return XFS_ERROR(error);
+		}
+
+		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
+		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+			return XFS_ERROR(EINVAL);
+
 		if (xfs_get_extsz_hint(ip) ||
 		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
 			prealloced = 1;
@@ -5949,42 +5953,34 @@ xfs_getbmap(
 			prealloced = 0;
 			fixlen = ip->i_size;
 		}
-	} else {
-		prealloced = 0;
-		fixlen = 1LL << 32;
 	}
 
 	if (bmv->bmv_length == -1) {
 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
-		bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset),
-					(__int64_t)0);
-	} else if (bmv->bmv_length < 0)
-		return XFS_ERROR(EINVAL);
-	if (bmv->bmv_length == 0) {
+		bmv->bmv_length =
+			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+	} else if (bmv->bmv_length == 0) {
 		bmv->bmv_entries = 0;
 		return 0;
+	} else if (bmv->bmv_length < 0) {
+		return XFS_ERROR(EINVAL);
 	}
+
 	nex = bmv->bmv_count - 1;
 	if (nex <= 0)
 		return XFS_ERROR(EINVAL);
 	bmvend = bmv->bmv_offset + bmv->bmv_length;
 
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
-	if (((iflags & BMV_IF_DELALLOC) == 0) &&
-	    (whichfork == XFS_DATA_FORK) &&
-	    (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
-		/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
-		error = xfs_flush_pages(ip, (xfs_off_t)0,
-					       -1, 0, FI_REMAPF);
-		if (error) {
-			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-		return error;
+	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
+		if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
+			error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
+			if (error)
+				goto out_unlock_iolock;
 		}
-	}
 
-	ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) ||
-	       ip->i_delayed_blks == 0);
+		ASSERT(ip->i_delayed_blks == 0);
+	}
 
 	lock = xfs_ilock_map_shared(ip);
 
@@ -5995,23 +5991,25 @@ xfs_getbmap(
 	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
 		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
 
-	bmapi_flags = xfs_bmapi_aflag(whichfork) |
-			((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE);
+	bmapi_flags = xfs_bmapi_aflag(whichfork);
+	if (!(iflags & BMV_IF_PREALLOC))
+		bmapi_flags |= XFS_BMAPI_IGSTATE;
 
 	/*
 	 * Allocate enough space to handle "subnex" maps at a time.
 	 */
+	error = ENOMEM;
 	subnex = 16;
-	map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP);
+	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL);
+	if (!map)
+		goto out_unlock_ilock;
 
 	bmv->bmv_entries = 0;
 
-	if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) {
-		if (((iflags & BMV_IF_DELALLOC) == 0) ||
-		    whichfork == XFS_ATTR_FORK) {
-			error = 0;
-			goto unlock_and_return;
-		}
+	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
+	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
+		error = 0;
+		goto out_free_map;
 	}
 
 	nexleft = nex;
@@ -6023,10 +6021,12 @@ xfs_getbmap(
 				  bmapi_flags, NULL, 0, map, &nmap,
 				  NULL, NULL);
 		if (error)
-			goto unlock_and_return;
+			goto out_free_map;
 		ASSERT(nmap <= subnex);
 
 		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
+			int full = 0;	/* user array is full */
+
 			out.bmv_oflags = 0;
 			if (map[i].br_state == XFS_EXT_UNWRITTEN)
 				out.bmv_oflags |= BMV_OF_PREALLOC;
@@ -6041,36 +6041,32 @@ xfs_getbmap(
 			    whichfork == XFS_ATTR_FORK) {
 				/* came to the end of attribute fork */
 				out.bmv_oflags |= BMV_OF_LAST;
-				goto unlock_and_return;
-			} else {
-				int full = 0;	/* user array is full */
-
-				if (!xfs_getbmapx_fix_eof_hole(ip, &out,
-							prealloced, bmvend,
-							map[i].br_startblock)) {
-					goto unlock_and_return;
-				}
-
-				/* format results & advance arg */
-				error = formatter(&arg, &out, &full);
-				if (error || full)
-					goto unlock_and_return;
-				nexleft--;
-				bmv->bmv_offset =
-					out.bmv_offset + out.bmv_length;
-				bmv->bmv_length = MAX((__int64_t)0,
-					(__int64_t)(bmvend - bmv->bmv_offset));
-				bmv->bmv_entries++;
+				goto out_free_map;
 			}
+
+			if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced,
+					bmvend, map[i].br_startblock))
+				goto out_free_map;
+
+			/* format results & advance arg */
+			error = formatter(&arg, &out, &full);
+			if (error || full)
+				goto out_free_map;
+			nexleft--;
+			bmv->bmv_offset =
+				out.bmv_offset + out.bmv_length;
+			bmv->bmv_length =
+				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+			bmv->bmv_entries++;
 		}
 	} while (nmap && nexleft && bmv->bmv_length);
 
-unlock_and_return:
+ out_free_map:
+	kmem_free(map);
+ out_unlock_ilock:
 	xfs_iunlock_map_shared(ip, lock);
+ out_unlock_iolock:
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-	kmem_free(map);
-
 	return error;
 }
 
-- 
cgit v0.10.2


From 6321e3ed2acf3ee9643cdd403e1c88605d7944ba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 24 Feb 2009 08:39:02 -0500
Subject: xfs: fix getbmap vs mmap deadlock

xfs_getbmap (or rather the formatters called by it) copy out the getbmap
structures under the ilock, which can deadlock against mmap.  This has
been reported via bugzilla a while ago (#717) and has recently also
shown up via lockdep.

So allocate a temporary buffer to format the kernel getbmap structures
into and then copy them out after dropping the locks.

A little problem with this is that we limit the number of extents we
can copy out by the maximum allocation size, but I see no real way
around that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index abe4244..ca7c600 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5890,12 +5890,13 @@ xfs_getbmap(
 	int			nexleft;	/* # of user extents left */
 	int			subnex;		/* # of bmapi's can do */
 	int			nmap;		/* number of map entries */
-	struct getbmapx		out;		/* output structure */
+	struct getbmapx		*out;		/* output structure */
 	int			whichfork;	/* data or attr fork */
 	int			prealloced;	/* this is a file with
 						 * preallocated data space */
 	int			iflags;		/* interface flags */
 	int			bmapi_flags;	/* flags for xfs_bmapi */
+	int			cur_ext = 0;
 
 	mp = ip->i_mount;
 	iflags = bmv->bmv_iflags;
@@ -5971,6 +5972,13 @@ xfs_getbmap(
 		return XFS_ERROR(EINVAL);
 	bmvend = bmv->bmv_offset + bmv->bmv_length;
 
+
+	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
+		return XFS_ERROR(ENOMEM);
+	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
+	if (!out)
+		return XFS_ERROR(ENOMEM);
+
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
 		if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
@@ -6025,39 +6033,39 @@ xfs_getbmap(
 		ASSERT(nmap <= subnex);
 
 		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
-			int full = 0;	/* user array is full */
-
-			out.bmv_oflags = 0;
+			out[cur_ext].bmv_oflags = 0;
 			if (map[i].br_state == XFS_EXT_UNWRITTEN)
-				out.bmv_oflags |= BMV_OF_PREALLOC;
+				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
 			else if (map[i].br_startblock == DELAYSTARTBLOCK)
-				out.bmv_oflags |= BMV_OF_DELALLOC;
-			out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
-			out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			out.bmv_unused1 = out.bmv_unused2 = 0;
+				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
+			out[cur_ext].bmv_offset =
+				XFS_FSB_TO_BB(mp, map[i].br_startoff);
+			out[cur_ext].bmv_length =
+				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+			out[cur_ext].bmv_unused1 = 0;
+			out[cur_ext].bmv_unused2 = 0;
 			ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
 			      (map[i].br_startblock != DELAYSTARTBLOCK));
                         if (map[i].br_startblock == HOLESTARTBLOCK &&
 			    whichfork == XFS_ATTR_FORK) {
 				/* came to the end of attribute fork */
-				out.bmv_oflags |= BMV_OF_LAST;
+				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
 				goto out_free_map;
 			}
 
-			if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced,
-					bmvend, map[i].br_startblock))
+			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
+					prealloced, bmvend,
+					map[i].br_startblock))
 				goto out_free_map;
 
-			/* format results & advance arg */
-			error = formatter(&arg, &out, &full);
-			if (error || full)
-				goto out_free_map;
 			nexleft--;
 			bmv->bmv_offset =
-				out.bmv_offset + out.bmv_length;
+				out[cur_ext].bmv_offset +
+				out[cur_ext].bmv_length;
 			bmv->bmv_length =
 				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
 			bmv->bmv_entries++;
+			cur_ext++;
 		}
 	} while (nmap && nexleft && bmv->bmv_length);
 
@@ -6067,6 +6075,16 @@ xfs_getbmap(
 	xfs_iunlock_map_shared(ip, lock);
  out_unlock_iolock:
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	for (i = 0; i < cur_ext; i++) {
+		int full = 0;	/* user array is full */
+
+		/* format results & advance arg */
+		error = formatter(&arg, &out[i], &full);
+		if (error || full)
+			break;
+	}
+
 	return error;
 }
 
-- 
cgit v0.10.2


From 0c8b946e3ebb3846103486420ea7430a4b5e5b1b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 15 Apr 2009 17:48:18 +0200
Subject: vsprintf: introduce %pf format specifier

A printf format specifier which would allow us to print a pure
function name has been suggested by Andrew Morton a couple of
months ago.

The current %pF is very convenient to print a function symbol,
but often we only want to print the name of the function, without
its asm offset.

That's what  %pf does in this patch.  The lowecase f has been chosen
for its intuitive meaning of a 'weak kind of %pF'.

The support for this new format would be welcome by the tracing code
where the need to print pure function names is often needed. This is
also true for other parts of the kernel:

$ git-grep -E "kallsyms_lookup\(.+?\)"
arch/blackfin/kernel/traps.c:   symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf);
arch/powerpc/xmon/xmon.c:               name = kallsyms_lookup(pc, &size, &offset, NULL, tmpstr);
arch/sh/kernel/cpu/sh5/unwind.c:        sym = kallsyms_lookup(pc, NULL, &offset, NULL, namebuf);
arch/x86/kernel/ftrace.c:       kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
kernel/kprobes.c:               sym = kallsyms_lookup((unsigned long)p->addr, NULL,
kernel/lockdep.c:       return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
kernel/trace/ftrace.c:  kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
kernel/trace/ftrace.c:  kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
kernel/trace/ftrace.c:  kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
kernel/trace/ftrace.c:  kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
kernel/trace/ftrace.c:  kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
kernel/trace/ftrace.c:  kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
kernel/trace/ftrace.c:  kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
kernel/trace/trace_functions.c: kallsyms_lookup(ip, NULL, NULL, NULL, str);
kernel/trace/trace_output.c:    kallsyms_lookup(address, NULL, NULL, NULL, str);

Changes in v2:

- Add the explanation of the %pf role for vsnprintf() and bstr_printf()

- Change the comments by dropping the "asm offset" notion and only
  define the %pf against the actual function offset notion.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090415154817.GC5989@nowhere>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b56f6d0..756ccaf 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -575,12 +575,15 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec)
 }
 
 static char *symbol_string(char *buf, char *end, void *ptr,
-				struct printf_spec spec)
+				struct printf_spec spec, char ext)
 {
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
-	sprint_symbol(sym, value);
+	if (ext != 'f')
+		sprint_symbol(sym, value);
+	else
+		kallsyms_lookup(value, NULL, NULL, NULL, sym);
 	return string(buf, end, sym, spec);
 #else
 	spec.field_width = 2*sizeof(void *);
@@ -692,7 +695,8 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr,
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers
+ * - 'F' For symbolic function descriptor pointers with offset
+ * - 'f' For simple symbolic function names without offset
  * - 'S' For symbolic direct pointers
  * - 'R' For a struct resource pointer, it prints the range of
  *       addresses (not the name nor the flags)
@@ -715,10 +719,11 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 
 	switch (*fmt) {
 	case 'F':
+	case 'f':
 		ptr = dereference_function_descriptor(ptr);
 		/* Fallthrough */
 	case 'S':
-		return symbol_string(buf, end, ptr, spec);
+		return symbol_string(buf, end, ptr, spec, *fmt);
 	case 'R':
 		return resource_string(buf, end, ptr, spec);
 	case 'm':
@@ -954,7 +959,8 @@ qualifier:
  *
  * This function follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
- * %pF output the name of a function pointer
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
  * %pR output the address range in a struct resource
  *
  * The return value is the number of characters which would
@@ -1412,7 +1418,8 @@ EXPORT_SYMBOL_GPL(vbin_printf);
  *
  * The format follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
- * %pF output the name of a function pointer
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
  * %pR output the address range in a struct resource
  * %n is ignored
  *
-- 
cgit v0.10.2


From 43f6201a22dbf1c5abe1cab96b49bd56fa9df8f4 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 16:55:56 +0200
Subject: perf_counter, x86: rename bitmasks to ->used_mask and ->active_mask

Standardize on explicitly mentioning '_mask' in fields that
are not plain flags but masks. This avoids typos like:

       if (cpuc->used)

(which could easily slip through review unnoticed), while if a
typo looks like this:

       if (cpuc->used_mask)

it might get noticed during review.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1241016956-24648-1-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 47e563b..fc06f4d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -28,8 +28,8 @@ static u64 perf_counter_mask __read_mostly;
 
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
-	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		active[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	u64			throttle_ctrl;
 	int			enabled;
@@ -332,7 +332,7 @@ static u64 amd_pmu_save_disable_all(void)
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
-		if (!test_bit(idx, cpuc->active))
+		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
 		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
@@ -373,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl)
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
-		if (!test_bit(idx, cpuc->active))
+		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
 		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
@@ -576,7 +576,7 @@ static int x86_pmu_enable(struct perf_counter *counter)
 		 * Try to get the fixed counter, if that is already taken
 		 * then try to get a generic counter:
 		 */
-		if (test_and_set_bit(idx, cpuc->used))
+		if (test_and_set_bit(idx, cpuc->used_mask))
 			goto try_generic;
 
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
@@ -590,14 +590,14 @@ static int x86_pmu_enable(struct perf_counter *counter)
 	} else {
 		idx = hwc->idx;
 		/* Try to get the previous generic counter again */
-		if (test_and_set_bit(idx, cpuc->used)) {
+		if (test_and_set_bit(idx, cpuc->used_mask)) {
 try_generic:
-			idx = find_first_zero_bit(cpuc->used,
+			idx = find_first_zero_bit(cpuc->used_mask,
 						  x86_pmu.num_counters);
 			if (idx == x86_pmu.num_counters)
 				return -EAGAIN;
 
-			set_bit(idx, cpuc->used);
+			set_bit(idx, cpuc->used_mask);
 			hwc->idx = idx;
 		}
 		hwc->config_base  = x86_pmu.eventsel;
@@ -609,7 +609,7 @@ try_generic:
 	x86_pmu.disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active);
+	set_bit(idx, cpuc->active_mask);
 
 	x86_perf_counter_set_period(counter, hwc, idx);
 	x86_pmu.enable(hwc, idx);
@@ -643,7 +643,7 @@ void perf_counter_print_debug(void)
 		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 	}
-	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
+	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -677,7 +677,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	 * Must be done before we disable, otherwise the nmi handler
 	 * could reenable again:
 	 */
-	clear_bit(idx, cpuc->active);
+	clear_bit(idx, cpuc->active_mask);
 	x86_pmu.disable(hwc, idx);
 
 	/*
@@ -692,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	 */
 	x86_perf_counter_update(counter, hwc, idx);
 	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used);
+	clear_bit(idx, cpuc->used_mask);
 }
 
 /*
@@ -741,7 +741,7 @@ again:
 		struct perf_counter *counter = cpuc->counters[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
-		if (!test_bit(bit, cpuc->active))
+		if (!test_bit(bit, cpuc->active_mask))
 			continue;
 
 		intel_pmu_save_and_restart(counter);
@@ -779,7 +779,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 
 	++cpuc->interrupts;
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active))
+		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 		counter = cpuc->counters[idx];
 		hwc = &counter->hw;
-- 
cgit v0.10.2


From 23b94b967f118bef941369238f33c8140be46539 Mon Sep 17 00:00:00 2001
From: Luis Henriques <henrix@sapo.pt>
Date: Wed, 29 Apr 2009 21:54:51 +0100
Subject: locking, rtmutex.c: Documentation cleanup

Two minor updates on functions documentation:
 - Updated documentation for function rt_mutex_unlock(), which contained an
   incorrect name
 - Removed extra '*' from comment in function rt_mutex_destroy()

[ Impact: cleanup ]

Signed-off-by: Luis Henriques <henrix@sapo.pt>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090429205451.GA23154@hades.domain.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 69d9cb9..013882e 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -864,9 +864,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
 /**
- * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
- *				       the timeout structure is provided
- *				       by the caller
+ * rt_mutex_timed_lock - lock a rt_mutex interruptible
+ *			the timeout structure is provided
+ *			by the caller
  *
  * @lock: 		the rt_mutex to be locked
  * @timeout:		timeout structure or NULL (no timeout)
@@ -913,7 +913,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
 }
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 
-/***
+/**
  * rt_mutex_destroy - mark a mutex unusable
  * @lock: the mutex to be destroyed
  *
-- 
cgit v0.10.2


From 88c48db9788862d0290831d081bc3c64e13b592f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 29 Apr 2009 14:00:25 -0400
Subject: SELinux: drop secondary_ops->sysctl

We are still calling secondary_ops->sysctl even though the capabilities
module does not define a sysctl operation.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ba808ef..dd19ba8 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1980,10 +1980,6 @@ static int selinux_sysctl(ctl_table *table, int op)
 	u32 tsid, sid;
 	int rc;
 
-	rc = secondary_ops->sysctl(table, op);
-	if (rc)
-		return rc;
-
 	sid = current_sid();
 
 	rc = selinux_sysctl_get_sid(table, (op == 0001) ?
-- 
cgit v0.10.2


From 3bcac0263f0b45e67a64034ebcb69eb9abb742f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Apr 2009 13:45:05 +0100
Subject: SELinux: Don't flush inherited SIGKILL during execve()

Don't flush inherited SIGKILL during execve() in SELinux's post cred commit
hook.  This isn't really a security problem: if the SIGKILL came before the
credentials were changed, then we were right to receive it at the time, and
should honour it; if it came after the creds were changed, then we definitely
should honour it; and in any case, all that will happen is that the process
will be scrapped before it ever returns to userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1d19c02..d3b787c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1875,6 +1875,7 @@ extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
+extern void __flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
diff --git a/kernel/signal.c b/kernel/signal.c
index 1c88144..f93efec 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -238,14 +238,19 @@ void flush_sigqueue(struct sigpending *queue)
 /*
  * Flush all pending signals for a task.
  */
+void __flush_signals(struct task_struct *t)
+{
+	clear_tsk_thread_flag(t, TIF_SIGPENDING);
+	flush_sigqueue(&t->pending);
+	flush_sigqueue(&t->signal->shared_pending);
+}
+
 void flush_signals(struct task_struct *t)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&t->sighand->siglock, flags);
-	clear_tsk_thread_flag(t, TIF_SIGPENDING);
-	flush_sigqueue(&t->pending);
-	flush_sigqueue(&t->signal->shared_pending);
+	__flush_signals(t);
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 }
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index dd19ba8..5a34511 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2394,11 +2394,12 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 		memset(&itimer, 0, sizeof itimer);
 		for (i = 0; i < 3; i++)
 			do_setitimer(i, &itimer, NULL);
-		flush_signals(current);
 		spin_lock_irq(&current->sighand->siglock);
-		flush_signal_handlers(current, 1);
-		sigemptyset(&current->blocked);
-		recalc_sigpending();
+		if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
+			__flush_signals(current);
+			flush_signal_handlers(current, 1);
+			sigemptyset(&current->blocked);
+		}
 		spin_unlock_irq(&current->sighand->siglock);
 	}
 
-- 
cgit v0.10.2


From ecd6de3c88e8cbcad175b2eab48ba05c2014f7b6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 29 Apr 2009 16:02:24 +0200
Subject: selinux: selinux_bprm_committed_creds() should wake up ->real_parent,
 not ->parent.

We shouldn't worry about the tracer if current is ptraced, exec() must not
succeed if the tracer has no rights to trace this task after cred changing.
But we should notify ->real_parent which is, well, real parent.

Also, we don't need _irq to take tasklist, and we don't need parent's
->siglock to wake_up_interruptible(real_parent->signal->wait_chldexit).
Since we hold tasklist, real_parent->signal must be stable. Otherwise
spin_lock(siglock) is not safe too and can't help anyway.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5a34511..39046dd 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2371,10 +2371,8 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 {
 	const struct task_security_struct *tsec = current_security();
 	struct itimerval itimer;
-	struct sighand_struct *psig;
 	u32 osid, sid;
 	int rc, i;
-	unsigned long flags;
 
 	osid = tsec->osid;
 	sid = tsec->sid;
@@ -2405,12 +2403,9 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 
 	/* Wake up the parent if it is waiting so that it can recheck
 	 * wait permission to the new task SID. */
-	read_lock_irq(&tasklist_lock);
-	psig = current->parent->sighand;
-	spin_lock_irqsave(&psig->siglock, flags);
-	wake_up_interruptible(&current->parent->signal->wait_chldexit);
-	spin_unlock_irqrestore(&psig->siglock, flags);
-	read_unlock_irq(&tasklist_lock);
+	read_lock(&tasklist_lock);
+	wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
+	read_unlock(&tasklist_lock);
 }
 
 /* superblock security operations */
-- 
cgit v0.10.2


From e6be3a25861429166f945499c7ee616875bc3db9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 12:56:37 +0900
Subject: sh: pass through ioremap() for non-mmu processors.

All 32-bit SuperH processors currently go through __ioremap_mode()
and check for IO_TRAPPED and directly mapped segments. With this
patch we simplify the MMU less case with a pass through version of
 __ioremap_mode() which just returns the physical address.

The effects of this is change are:
 - fix non-MMU ioremap() of high address hardware blocks (sh7203 CMT)
 - make sure IO_TRAPPED is not selected

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index dcc1af8..99e2d47 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -121,7 +121,7 @@ config SH_RTS7751R2D
 	bool "RTS7751R2D"
 	depends on CPU_SUBTYPE_SH7751R
 	select SYS_SUPPORTS_PCI
-	select IO_TRAPPED
+	select IO_TRAPPED if MMU
 	help
 	  Select RTS7751R2D if configuring for a Renesas Technology
 	  Sales SH-Graphics board.
@@ -145,13 +145,13 @@ config SH_HIGHLANDER
 	bool "Highlander"
 	depends on CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785
 	select SYS_SUPPORTS_PCI
-	select IO_TRAPPED
+	select IO_TRAPPED if MMU
 
 config SH_SH7785LCR
 	bool "SH7785LCR"
 	depends on CPU_SUBTYPE_SH7785
 	select SYS_SUPPORTS_PCI
-	select IO_TRAPPED
+	select IO_TRAPPED if MMU
 
 config SH_SH7785LCR_29BIT_PHYSMAPS
 	bool "SH7785LCR 29bit physmaps"
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 0454f8d..ef21734 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -228,12 +228,6 @@ void __iounmap(void __iomem *addr);
 unsigned long onchip_remap(unsigned long addr, unsigned long size,
 			   const char *name);
 extern void onchip_unmap(unsigned long vaddr);
-#else
-#define __ioremap(offset, size, flags)	((void __iomem *)(offset))
-#define __iounmap(addr)			do { } while (0)
-#define onchip_remap(addr, size, name)	(addr)
-#define onchip_unmap(addr)		do { } while (0)
-#endif /* CONFIG_MMU */
 
 static inline void __iomem *
 __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
@@ -268,6 +262,12 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 
 	return __ioremap(offset, size, flags);
 }
+#else
+#define onchip_remap(addr, size, name)		(addr)
+#define onchip_unmap(addr)			do { } while (0)
+#define __ioremap_mode(offset, size, flags)	((void __iomem *)(offset))
+#define __iounmap(addr)				do { } while (0)
+#endif /* CONFIG_MMU */
 
 #define ioremap(offset, size)				\
 	__ioremap_mode((offset), (size), 0)
-- 
cgit v0.10.2


From 3014f47460ecfb13d4169daae51f26a20bacfa17 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 29 Apr 2009 14:50:37 +0000
Subject: clocksource: sh_cmt 16-bit fixes

This patch contains various fixes for 16-bit cmt hardware.
With this applied periodic clockevents work fine on sh7203.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index bf3e4c1..4ff1508 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -158,16 +158,18 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
 		pr_err("sh_cmt: cannot enable clock \"%s\"\n", cfg->clk);
 		return ret;
 	}
-	*rate = clk_get_rate(p->clk) / 8;
 
 	/* make sure channel is disabled */
 	sh_cmt_start_stop_ch(p, 0);
 
 	/* configure channel, periodic mode and maximum timeout */
-	if (p->width == 16)
-		sh_cmt_write(p, CMCSR, 0);
-	else
+	if (p->width == 16) {
+		*rate = clk_get_rate(p->clk) / 512;
+		sh_cmt_write(p, CMCSR, 0x43);
+	} else {
+		*rate = clk_get_rate(p->clk) / 8;
 		sh_cmt_write(p, CMCSR, 0x01a4);
+	}
 
 	sh_cmt_write(p, CMCOR, 0xffffffff);
 	sh_cmt_write(p, CMCNT, 0);
@@ -615,7 +617,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 	if (resource_size(res) == 6) {
 		p->width = 16;
 		p->overflow_bit = 0x80;
-		p->clear_bits = ~0xc0;
+		p->clear_bits = ~0x80;
 	} else {
 		p->width = 32;
 		p->overflow_bit = 0x8000;
-- 
cgit v0.10.2


From 698aa99da5f5e2b4c666fd21ab77306f0225b8f5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 04:08:18 +0000
Subject: sh: sh2/sh2a 16-bit CMT platform data

This patch adds 16-bit cmt platform data for the following cpus:
 - sh7619 (2 channels)
 - sh7203/sh7263 (2 channels)
 - sh7206 (2 channels)

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index 0e32d8e..d70c263 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_cmt.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -109,9 +111,75 @@ static struct platform_device eth_device = {
 	.resource = eth_resources,
 };
 
+static struct sh_cmt_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0xf84a0072,
+		.end	= 0xf84a0077,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 86,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_cmt_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x08,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0xf84a0078,
+		.end	= 0xf84a007d,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 87,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
 static struct platform_device *sh7619_devices[] __initdata = {
 	&sci_device,
 	&eth_device,
+	&cmt0_device,
+	&cmt1_device,
 };
 
 static int __init sh7619_devices_setup(void)
@@ -125,3 +193,19 @@ void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7619_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+};
+
+#define STBCR3 0xf80a0000
+
+void __init plat_early_device_setup(void)
+{
+	/* enable CMT clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x10, STBCR3);
+
+	early_platform_add_devices(sh7619_early_devices,
+				   ARRAY_SIZE(sh7619_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 820dfb2..0836ace 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -11,6 +11,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_cmt.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -205,6 +207,70 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_cmt_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0xfffec002,
+		.end	= 0xfffec007,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 142,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_cmt_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x08,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0xfffec008,
+		.end	= 0xfffec00d,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 143,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
 static struct resource rtc_resources[] = {
 	[0] = {
 		.start	= 0xffff2000,
@@ -227,6 +293,8 @@ static struct platform_device rtc_device = {
 
 static struct platform_device *sh7203_devices[] __initdata = {
 	&sci_device,
+	&cmt0_device,
+	&cmt1_device,
 	&rtc_device,
 };
 
@@ -241,3 +309,19 @@ void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7203_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+};
+
+#define STBCR4 0xfffe040c
+
+void __init plat_early_device_setup(void)
+{
+	/* enable CMT clock */
+	__raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4);
+
+	early_platform_add_devices(sh7203_early_devices,
+				   ARRAY_SIZE(sh7203_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index c46a835..f9606e3 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_cmt.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -165,8 +167,74 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_cmt_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0xfffec002,
+		.end	= 0xfffec007,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 140,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_cmt_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x08,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 0, /* disabled due to code generation issues */
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0xfffec008,
+		.end	= 0xfffec00d,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 144,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
 static struct platform_device *sh7206_devices[] __initdata = {
 	&sci_device,
+	&cmt0_device,
+	&cmt1_device,
 };
 
 static int __init sh7206_devices_setup(void)
@@ -180,3 +248,19 @@ void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7206_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+};
+
+#define STBCR4 0xfffe040c
+
+void __init plat_early_device_setup(void)
+{
+	/* enable CMT clock */
+	__raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4);
+
+	early_platform_add_devices(sh7206_early_devices,
+				   ARRAY_SIZE(sh7206_early_devices));
+}
-- 
cgit v0.10.2


From f425752fc66acf1d4e47970ea704ed7d31c14173 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 04:09:26 +0000
Subject: sh: remove old CMT driver

This patch removes the old CMT driver (CONFIG_SH_CMT/timer-cmt.c)

As replacement, select the sh_cmt driver with CONFIG_SH_TIMER_CMT
and configure timer channel using platform data.

If multiple CMT channels are enabled using platform data, use the
earlytimer parameter on the kernel command line to select channel.
For instance, use "earlytimer=sh_cmt.0" to select the first channel.

To verify which timer is being used, look at printouts or the timer
irq count in /proc/interrupts.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 9db02ce..7e96ade 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -462,22 +462,14 @@ config SH_TMU
 	help
 	  This enables the use of the TMU as the system timer.
 
-config SH_CMT
-	bool "CMT timer support"
-	depends on SYS_SUPPORTS_CMT && CPU_SH2
-	default y
-	help
-	  This enables the use of the CMT as the system timer.
-
-#
-# Support for the new-style CMT driver. This will replace SH_CMT
-# once its other dependencies are merged.
-#
 config SH_TIMER_CMT
-	bool "CMT clockevents driver"
-	depends on SYS_SUPPORTS_CMT && !SH_CMT
+	bool "CMT timer driver"
+	depends on SYS_SUPPORTS_CMT
+	default y
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_TIME
+	help
+	  This enables build of the CMT timer driver.
 
 config SH_MTU2
 	bool "MTU2 timer support"
diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
index 4c3b66e3..8f80a55 100644
--- a/arch/sh/include/asm/timer.h
+++ b/arch/sh/include/asm/timer.h
@@ -23,7 +23,7 @@ struct sys_timer {
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-extern struct sys_timer tmu_timer, cmt_timer, mtu2_timer;
+extern struct sys_timer tmu_timer, mtu2_timer;
 extern struct sys_timer *sys_timer;
 
 #ifndef CONFIG_GENERIC_TIME
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
index 0b7f857..1e9a104 100644
--- a/arch/sh/kernel/timers/Makefile
+++ b/arch/sh/kernel/timers/Makefile
@@ -6,6 +6,5 @@ obj-y	:= timer.o
 
 obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
 obj-$(CONFIG_SH_MTU2)		+= timer-mtu2.o
-obj-$(CONFIG_SH_CMT)		+= timer-cmt.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= timer-broadcast.o
diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c
deleted file mode 100644
index 9aa3486..0000000
--- a/arch/sh/kernel/timers/timer-cmt.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer-cmt.c - CMT Timer Support
- *
- *  Copyright (C) 2005  Yoshinori Sato
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/seqlock.h>
-#include <asm/timer.h>
-#include <asm/rtc.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/clock.h>
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7619)
-#define CMT_CMSTR	0xf84a0070
-#define CMT_CMCSR_0	0xf84a0072
-#define CMT_CMCNT_0	0xf84a0074
-#define CMT_CMCOR_0	0xf84a0076
-#define CMT_CMCSR_1	0xf84a0078
-#define CMT_CMCNT_1	0xf84a007a
-#define CMT_CMCOR_1	0xf84a007c
-
-#define STBCR3		0xf80a0000
-#define cmt_clock_enable() do {	ctrl_outb(ctrl_inb(STBCR3) & ~0x10, STBCR3); } while(0)
-#define CMT_CMCSR_INIT	0x0040
-#define CMT_CMCSR_CALIB	0x0000
-#elif defined(CONFIG_CPU_SUBTYPE_SH7203) || \
-      defined(CONFIG_CPU_SUBTYPE_SH7206) || \
-      defined(CONFIG_CPU_SUBTYPE_SH7263)
-#define CMT_CMSTR	0xfffec000
-#define CMT_CMCSR_0	0xfffec002
-#define CMT_CMCNT_0	0xfffec004
-#define CMT_CMCOR_0	0xfffec006
-
-#define STBCR4		0xfffe040c
-#define cmt_clock_enable() do {	ctrl_outb(ctrl_inb(STBCR4) & ~0x04, STBCR4); } while(0)
-#define CMT_CMCSR_INIT	0x0040
-#define CMT_CMCSR_CALIB	0x0000
-#else
-#error "Unknown CPU SUBTYPE"
-#endif
-
-static unsigned long cmt_timer_get_offset(void)
-{
-	int count;
-	static unsigned short count_p = 0xffff;    /* for the first call after boot */
-	static unsigned long jiffies_p = 0;
-
-	/*
-	 * cache volatile jiffies temporarily; we have IRQs turned off.
-	 */
-	unsigned long jiffies_t;
-
-	/* timer count may underflow right here */
-	count =  ctrl_inw(CMT_CMCOR_0);
-	count -= ctrl_inw(CMT_CMCNT_0);
-
-	jiffies_t = jiffies;
-
-	/*
-	 * avoiding timer inconsistencies (they are rare, but they happen)...
-	 * there is one kind of problem that must be avoided here:
-	 *  1. the timer counter underflows
-	 */
-
-	if (jiffies_t == jiffies_p) {
-		if (count > count_p) {
-			/* the nutcase */
-			if (ctrl_inw(CMT_CMCSR_0) & 0x80) { /* Check CMF bit */
-				count -= LATCH;
-			} else {
-				printk("%s (): hardware timer problem?\n",
-				       __func__);
-			}
-		}
-	} else
-		jiffies_p = jiffies_t;
-
-	count_p = count;
-
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	count = (count + LATCH/2) / LATCH;
-
-	return count;
-}
-
-static irqreturn_t cmt_timer_interrupt(int irq, void *dev_id)
-{
-	unsigned long timer_status;
-
-	/* Clear CMF bit */
-	timer_status = ctrl_inw(CMT_CMCSR_0);
-	timer_status &= ~0x80;
-	ctrl_outw(timer_status, CMT_CMCSR_0);
-
-	handle_timer_tick();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction cmt_irq = {
-	.name		= "timer",
-	.handler	= cmt_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-};
-
-static void cmt_clk_init(struct clk *clk)
-{
-	u8 divisor = CMT_CMCSR_INIT & 0x3;
-	ctrl_inw(CMT_CMCSR_0);
-	ctrl_outw(CMT_CMCSR_INIT, CMT_CMCSR_0);
-	clk->parent = clk_get(NULL, "module_clk");
-	clk->rate = clk->parent->rate / (8 << (divisor << 1));
-}
-
-static void cmt_clk_recalc(struct clk *clk)
-{
-	u8 divisor = ctrl_inw(CMT_CMCSR_0) & 0x3;
-	clk->rate = clk->parent->rate / (8 << (divisor << 1));
-}
-
-static struct clk_ops cmt_clk_ops = {
-	.init		= cmt_clk_init,
-	.recalc		= cmt_clk_recalc,
-};
-
-static struct clk cmt0_clk = {
-	.name		= "cmt0_clk",
-	.ops		= &cmt_clk_ops,
-};
-
-static int cmt_timer_start(void)
-{
-	ctrl_outw(ctrl_inw(CMT_CMSTR) | 0x01, CMT_CMSTR);
-	return 0;
-}
-
-static int cmt_timer_stop(void)
-{
-	ctrl_outw(ctrl_inw(CMT_CMSTR) & ~0x01, CMT_CMSTR);
-	return 0;
-}
-
-static int cmt_timer_init(void)
-{
-	unsigned long interval;
-
-	cmt_clock_enable();
-
-	setup_irq(CONFIG_SH_TIMER_IRQ, &cmt_irq);
-
-	cmt0_clk.parent = clk_get(NULL, "module_clk");
-
-	cmt_timer_stop();
-
-	interval = cmt0_clk.parent->rate / 8 / HZ;
-	printk(KERN_INFO "Interval = %ld\n", interval);
-
-	ctrl_outw(interval, CMT_CMCOR_0);
-
-	clk_register(&cmt0_clk);
-	clk_enable(&cmt0_clk);
-
-	cmt_timer_start();
-
-	return 0;
-}
-
-static struct sys_timer_ops cmt_timer_ops = {
-	.init		= cmt_timer_init,
-	.start		= cmt_timer_start,
-	.stop		= cmt_timer_stop,
-#ifndef CONFIG_GENERIC_TIME
-	.get_offset	= cmt_timer_get_offset,
-#endif
-};
-
-struct sys_timer cmt_timer = {
-	.name	= "cmt",
-	.ops	= &cmt_timer_ops,
-};
diff --git a/arch/sh/kernel/timers/timer.c b/arch/sh/kernel/timers/timer.c
index 4e7e747..f3bd141 100644
--- a/arch/sh/kernel/timers/timer.c
+++ b/arch/sh/kernel/timers/timer.c
@@ -20,9 +20,6 @@ static struct sys_timer *sys_timers[] = {
 #ifdef CONFIG_SH_MTU2
 	&mtu2_timer,
 #endif
-#ifdef CONFIG_SH_CMT
-	&cmt_timer,
-#endif
 	NULL,
 };
 
-- 
cgit v0.10.2


From c5dd016cdf0a040e1de0b691e274fbfe642b2cdc Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 30 Apr 2009 09:48:16 +1000
Subject: perf_counter: update copyright notice

This adds my name to the list of copyright holders on the core
perf_counter.c, since I have contributed a significant amount of the
code in there.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
LKML-Reference: <18936.59200.888049.746658@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a95a171..75f2b6c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  *  For licensing details see kernel-base/COPYING
  */
-- 
cgit v0.10.2


From a511e3f968c462a55ef58697257f5347c73d306e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 29 Apr 2009 15:59:58 -0700
Subject: mutex: add atomic_dec_and_mutex_lock(), fix

include/linux/mutex.h:136: warning: 'mutex_lock' declared inline after being called
 include/linux/mutex.h:136: warning: previous declaration of 'mutex_lock' was here

uninline it.

[ Impact: clean up and uninline, address compiler warning ]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric Paris <eparis@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <200904292318.n3TNIsi6028340@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 93054fc..878cab4 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -150,28 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
-
-/**
- * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
- * @cnt: the atomic which we are to dec
- * @lock: the mutex to return holding if we dec to 0
- *
- * return true and hold lock if we dec to 0, return false otherwise
- */
-static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
-{
-	/* dec if we can't possibly hit 0 */
-	if (atomic_add_unless(cnt, -1, 1))
-		return 0;
-	/* we might hit 0, so take the lock */
-	mutex_lock(lock);
-	if (!atomic_dec_and_test(cnt)) {
-		/* when we actually did the dec, we didn't hit 0 */
-		mutex_unlock(lock);
-		return 0;
-	}
-	/* we hit 0, and we hold the lock */
-	return 1;
-}
+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
 #endif
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 507cf2b..e2d25e9 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -471,5 +471,28 @@ int __sched mutex_trylock(struct mutex *lock)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(mutex_trylock);
+
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
-- 
cgit v0.10.2


From 2b72394e4089643f11669d9610907a1442fe044a Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Tue, 28 Apr 2009 16:00:49 +0300
Subject: x86: move max_pfn_mapped and max_low_pfn_mapped to setup.c

This patch moves the max_pfn_mapped and max_low_pfn_mapped global
variables to kernel/setup.c where they're initialized.

[ Impact: cleanup ]

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <1240923649.1982.21.camel@penberg-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b415843..0d77e56 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,14 @@
 #define ARCH_SETUP
 #endif
 
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_low_pfn_mapped;
+unsigned long max_pfn_mapped;
+
 RESERVE_BRK(dmi_alloc, 65536);
 
 unsigned int boot_cpu_id __read_mostly;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2b27120..a640a7f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,11 +49,9 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/page_types.h>
 #include <asm/init.h>
 
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a4e7846..1016ea0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -50,14 +50,6 @@
 #include <asm/cacheflush.h>
 #include <asm/init.h>
 
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
 static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-- 
cgit v0.10.2


From 9518e0e4350a5ea8ca200ce320b28d6284a7b0ce Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Tue, 28 Apr 2009 16:00:50 +0300
Subject: x86: move per-cpu mmu_gathers to mm/init.c

[ Impact: cleanup ]

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <1240923650.1982.22.camel@penberg-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fedde53..4d67c33 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -9,6 +9,9 @@
 #include <asm/sections.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 unsigned long __initdata e820_table_start;
 unsigned long __meminitdata e820_table_end;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index a640a7f..fef1d90 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -52,7 +52,6 @@
 #include <asm/page_types.h>
 #include <asm/init.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static noinline int do_test_wp_bit(void);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1016ea0..6a1a573 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -52,8 +52,6 @@
 
 static unsigned long dma_reserve __initdata;
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 static int __init parse_direct_gbpages_off(char *arg)
 {
 	direct_gbpages = 0;
-- 
cgit v0.10.2


From ba9c22f2c01cf5c88beed5a6b9e07d42e10bd358 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Mon, 20 Apr 2009 22:22:22 -0700
Subject: futex: remove FUTEX_REQUEUE_PI (non CMP)

The new requeue PI futex op codes were modeled after the existing
FUTEX_REQUEUE and FUTEX_CMP_REQUEUE calls.  I was unaware at the time
that FUTEX_REQUEUE was only around for compatibility reasons and
shouldn't be used in new code.  Ulrich Drepper elaborates on this in his
Futexes are Tricky paper: http://people.redhat.com/drepper/futex.pdf.
The deprecated call doesn't catch changes to the futex corresponding to
the destination futex which can lead to deadlock.

Therefor, I feel it best to remove FUTEX_REQUEUE_PI and leave only
FUTEX_CMP_REQUEUE_PI as there are not yet any existing users of the API.
This patch does change the OP code value of FUTEX_CMP_REQUEUE_PI to 12
from 13.  Since my test case is the only known user of this API, I felt
this was the right thing to do, rather than leave a hole in the
enumeration.

I chose to continue using the _CMP_ modifier in the OP code to make it
explicit to the user that the test is being done.

Builds, boots, and ran several hundred iterations requeue_pi.c.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
LKML-Reference: <49ED580E.1050502@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/futex.h b/include/linux/futex.h
index b05519c..34956c8 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -24,8 +24,7 @@ union ktime;
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
 #define FUTEX_WAIT_REQUEUE_PI	11
-#define FUTEX_REQUEUE_PI	12
-#define FUTEX_CMP_REQUEUE_PI	13
+#define FUTEX_CMP_REQUEUE_PI	12
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -43,7 +42,6 @@ union ktime;
 #define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
-#define FUTEX_REQUEUE_PI_PRIVATE	(FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 6d2daa4..aec8bf8 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2555,9 +2555,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
 					    clockrt, uaddr2);
 		break;
-	case FUTEX_REQUEUE_PI:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1);
-		break;
 	case FUTEX_CMP_REQUEUE_PI:
 		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
 				    1);
@@ -2596,8 +2593,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
 	 */
 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-	    cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI ||
-	    cmd == FUTEX_WAKE_OP)
+	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
 		val2 = (u32) (unsigned long) utime;
 
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
-- 
cgit v0.10.2


From 83c4832683bc8ebcd1687b3c0bf3ba1ab253dd4f Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Thu, 30 Apr 2009 12:03:16 +0200
Subject: x86: boot/compressed/vmlinux.lds.S: fix build of bzImage with 64 bit
 compiler

Jesper reported that he saw following build issue:

 > ld:arch/x86/boot/compressed/vmlinux.lds:9: syntax error
 > make[2]: *** [arch/x86/boot/compressed/vmlinux] Error 1
 > make[1]: *** [arch/x86/boot/compressed/vmlinux] Error 2
 > make: *** [bzImage] Error 2

CPP defines the symbol "i386" to "1".
Undefine this to fix it.

[ Impact: build fix with certain tool chains ]

Reported-by: Jesper Dangaard Brouer <jdb@comx.dk>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.0904260958190.3101@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index ffcb1913..0d26c92 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,5 +1,7 @@
 OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
 
+#undef i386
+
 #ifdef CONFIG_X86_64
 OUTPUT_ARCH(i386:x86-64)
 ENTRY(startup_64)
-- 
cgit v0.10.2


From aac3f3c2c41ce49a6dbb98d9145265c00a964dc2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 30 Apr 2009 13:52:19 +0200
Subject: perf_counter tools: add perf-report to the Makefile

Build it explicitly until it's a proper builtin command.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 543ccf2..877cf5d 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -228,7 +228,7 @@ COMPAT_CFLAGS =
 COMPAT_OBJS =
 LIB_H =
 LIB_OBJS =
-PROGRAMS =
+PROGRAMS = perf-report
 SCRIPT_PERL =
 SCRIPT_SH =
 TEST_PROGRAMS =
@@ -808,6 +808,10 @@ clean:
 	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
 	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
 
+# temporary hack:
+perf-report: perf-report.cc ../../include/linux/perf_counter.h Makefile
+	g++ -g -O2 -Wall -lrt -o $@ $<
+
 .PHONY: all install clean strip
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
-- 
cgit v0.10.2


From 66cf782996f3d57d3cc199f0a2d47a54e2aa5991 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 30 Apr 2009 13:53:33 +0200
Subject: perf_counter tools: perf stat: make -l default-on

Turn on scaling display by default - this is less confusing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 112b94e..1fde127 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -171,7 +171,7 @@ static unsigned int		page_size;
 
 static int			zero;
 
-static int			scale;
+static int			scale				=  1;
 
 static const unsigned int default_count[] = {
 	1000000,
-- 
cgit v0.10.2


From bad760089c1ef7fe525c0f268a4078b9cb483903 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 30 Apr 2009 14:14:37 +0200
Subject: perf_counter tools: fix infinite loop in perf-report on zeroed event
 records

Bail out early if a record has zero size - we have no chance to make
reliable progress in that case. Print out the offset where this happens,
and print the number of bytes we missed out on.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc
index 1727317..933a075 100644
--- a/Documentation/perf_counter/perf-report.cc
+++ b/Documentation/perf_counter/perf-report.cc
@@ -13,6 +13,7 @@
 #include <ctype.h>
 #include <time.h>
 #include <getopt.h>
+#include <assert.h>
 
 #include <sys/ioctl.h>
 #include <sys/poll.h>
@@ -226,7 +227,7 @@ void load_kallsyms(void)
 	while (!feof(file)) {
 		uint64_t start;
 		char c;
-		char sym[1024];
+		char sym[1024000];
 
 		if (getline(&line, &n, file) < 0)
 			break;
@@ -416,12 +417,23 @@ more:
 
 	if (head + event->header.size >= page_size * mmap_window) {
 		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
 
-		munmap(buf, page_size * mmap_window);
 		offset += shift;
 		head -= shift;
 		goto remap;
 	}
+
+
+	if (!event->header.size) {
+		fprintf(stderr, "zero-sized event at file offset %ld\n", offset + head);
+		fprintf(stderr, "skipping %ld bytes of events.\n", stat.st_size - offset - head);
+		goto done;
+	}
+
 	head += event->header.size;
 
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
@@ -458,6 +470,8 @@ more:
 	if (offset + head < stat.st_size)
 		goto more;
 
+done:
+
 	close(input);
 
 	std::map<std::string, int>::iterator hi = hist.begin();
-- 
cgit v0.10.2


From 09aa60df92a9c5ff00e156c0dbc79f166d406a7f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 29 Apr 2009 18:51:48 +0100
Subject: ASoC: Fix error message formatting in s3c64xx-i2s driver

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index 1345fbd..7679f7b 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -215,7 +215,7 @@ static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev)
 
 	i2s->iis_cclk = clk_get(&pdev->dev, "audio-bus");
 	if (IS_ERR(i2s->iis_cclk)) {
-		dev_err(&pdev->dev, "failed to get audio-bus");
+		dev_err(&pdev->dev, "failed to get audio-bus\n");
 		ret = PTR_ERR(i2s->iis_cclk);
 		goto err;
 	}
-- 
cgit v0.10.2


From 8a0f62b842e2f189e36d9f4c575ee15da9c605ff Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 29 Apr 2009 20:28:47 +0100
Subject: ASoC: Check for supported CPUs when building s3c-i2s-v2

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index ab680aa..3b9201c 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -37,6 +37,20 @@
 
 #include "s3c-i2s-v2.h"
 
+#undef S3C_IIS_V2_SUPPORTED
+
+#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413)
+#define S3C_IIS_V2_SUPPORTED
+#endif
+
+#ifdef CONFIG_PLAT_S3C64XX
+#define S3C_IIS_V2_SUPPORTED
+#endif
+
+#ifndef S3C_IIS_V2_SUPPORTED
+#error Unsupported CPU model
+#endif
+
 #define S3C2412_I2S_DEBUG_CON 0
 
 static inline struct s3c_i2sv2_info *to_info(struct snd_soc_dai *cpu_dai)
-- 
cgit v0.10.2


From 51438449e717db54550b4676f38208092eb654da Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 29 Apr 2009 20:30:39 +0100
Subject: ASoC: Make S3C64xx clock export function to return struct clk

This makes the interface usable with the s3c-iis-v2 rate calculator
and consistent with S3C2412.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index 7679f7b..cb11f78 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -108,14 +108,13 @@ static int s3c64xx_i2s_set_sysclk(struct snd_soc_dai *cpu_dai,
 	return 0;
 }
 
-
-unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *dai)
+struct clk *s3c64xx_i2s_get_clock(struct snd_soc_dai *dai)
 {
 	struct s3c_i2sv2_info *i2s = to_info(dai);
 
-	return clk_get_rate(i2s->iis_cclk);
+	return i2s->iis_cclk;
 }
-EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clockrate);
+EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clock);
 
 static int s3c64xx_i2s_probe(struct platform_device *pdev,
 			     struct snd_soc_dai *dai)
diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.h b/sound/soc/s3c24xx/s3c64xx-i2s.h
index 597822a..02148ce 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.h
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.h
@@ -15,6 +15,8 @@
 #ifndef __SND_SOC_S3C24XX_S3C64XX_I2S_H
 #define __SND_SOC_S3C24XX_S3C64XX_I2S_H __FILE__
 
+struct clk;
+
 #include "s3c-i2s-v2.h"
 
 #define S3C64XX_DIV_BCLK	S3C_I2SV2_DIV_BCLK
@@ -26,6 +28,6 @@
 
 extern struct snd_soc_dai s3c64xx_i2s_dai[];
 
-extern unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *cpu_dai);
+extern struct clk *s3c64xx_i2s_get_clock(struct snd_soc_dai *dai);
 
 #endif /* __SND_SOC_S3C24XX_S3C64XX_I2S_H */
-- 
cgit v0.10.2


From 553b1dd58c5cf1abd6d0965041169400a3cff1ad Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 29 Apr 2009 20:29:25 +0100
Subject: ASoC: Fix data format configuration for S3C64xx IISv2 and add 24 bit

The data format configuration for S3C64xx IISv2 is completely different
to that for S3C24xx. Instead of a single bit configuration in bit 0 of
IISMOD we have format selection in bits 13 and 14 and bit clock rate
selection in bits 1 and 2. While we're here add support for 24 bit
samples in S3C64xx.

At some point it may be desirable to expose the bit clock rate selection
to users but given the limited configuration options that may not be
required.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index 3b9201c..54f4119 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -280,7 +280,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
  */
 #define IISMOD_MASTER_MASK (1 << 11)
 #define IISMOD_SLAVE (1 << 11)
-#define IISMOD_MASTER (0x0)
+#define IISMOD_MASTER (0 << 11)
 #endif
 
 	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -341,6 +341,7 @@ static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream,
 	iismod = readl(i2s->regs + S3C2412_IISMOD);
 	pr_debug("%s: r: IISMOD: %x\n", __func__, iismod);
 
+#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413)
 	switch (params_format(params)) {
 	case SNDRV_PCM_FORMAT_S8:
 		iismod |= S3C2412_IISMOD_8BIT;
@@ -349,6 +350,25 @@ static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream,
 		iismod &= ~S3C2412_IISMOD_8BIT;
 		break;
 	}
+#endif
+
+#ifdef CONFIG_PLAT_S3C64XX
+	iismod &= ~0x606;
+	/* Sample size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S8:
+		/* 8 bit sample, 16fs BCLK */
+		iismod |= 0x2004;
+		break;
+	case SNDRV_PCM_FORMAT_S16_LE:
+		/* 16 bit sample, 32fs BCLK */
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		/* 24 bit sample, 48fs BCLK */
+		iismod |= 0x4002;
+		break;
+	}
+#endif
 
 	writel(iismod, i2s->regs + S3C2412_IISMOD);
 	pr_debug("%s: w: IISMOD: %x\n", __func__, iismod);
diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index cb11f78..e0f4a16 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -146,7 +146,8 @@ static int s3c64xx_i2s_probe(struct platform_device *pdev,
 	SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000)
 
 #define S3C64XX_I2S_FMTS \
-	(SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE)
+	(SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE |\
+	 SNDRV_PCM_FMTBIT_S24_LE)
 
 static struct snd_soc_dai_ops s3c64xx_i2s_dai_ops = {
 	.set_sysclk	= s3c64xx_i2s_set_sysclk,	
-- 
cgit v0.10.2


From 07736d48051869c37838635b41850618aa63b9a7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 13:13:14 +0100
Subject: ASoC: Fix boot warnings from S3C IISv2

On startup we try to make sure that the port is quiesced but if the
port is already stopped then this will generate a warning about the
RX/TX mode configuration. Configure the mode before doing the teardown
to suppress these warnings.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index 54f4119..34142c8 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -573,6 +573,7 @@ int s3c_i2sv2_probe(struct platform_device *pdev,
 		    unsigned long base)
 {
 	struct device *dev = &pdev->dev;
+	unsigned int iismod;
 
 	i2s->dev = dev;
 
@@ -594,12 +595,16 @@ int s3c_i2sv2_probe(struct platform_device *pdev,
 
 	clk_enable(i2s->iis_pclk);
 
+	/* Mark ourselves as in TXRX mode so we can run through our cleanup
+	 * process without warnings. */
+	iismod = readl(i2s->regs + S3C2412_IISMOD);
+	iismod |= S3C2412_IISMOD_MODE_TXRX;
+	writel(iismod, i2s->regs + S3C2412_IISMOD);
 	s3c2412_snd_txctrl(i2s, 0);
 	s3c2412_snd_rxctrl(i2s, 0);
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(s3c_i2sv2_probe);
 
 #ifdef CONFIG_PM
-- 
cgit v0.10.2


From c86bde54062a4d02c1b58203b7802797e4007a8a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 13:09:33 +0100
Subject: ASoC: Allow use of resource from the platform device for S3C IISv2

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index 34142c8..cb85498 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -580,6 +580,24 @@ int s3c_i2sv2_probe(struct platform_device *pdev,
 	/* record our i2s structure for later use in the callbacks */
 	dai->private_data = i2s;
 
+	if (!base) {
+		struct resource *res = platform_get_resource(pdev,
+							     IORESOURCE_MEM,
+							     0);
+		if (!res) {
+			dev_err(dev, "Unable to get register resource\n");
+			return -ENXIO;
+		}
+
+		if (!request_mem_region(res->start, resource_size(res),
+					"s3c64xx-i2s-v4")) {
+			dev_err(dev, "Unable to request register region\n");
+			return -EBUSY;
+		}
+
+		base = res->start;
+	}
+
 	i2s->regs = ioremap(base, 0x100);
 	if (i2s->regs == NULL) {
 		dev_err(dev, "cannot ioremap registers\n");
-- 
cgit v0.10.2


From af3ea7bdc77be000f69a41e7c41060f72b5a7111 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 13:13:55 +0100
Subject: ASoC: Display the clock rate used as the basis for rate calculation

Aids debugging.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index cb85498..ad690b2 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -523,6 +523,8 @@ int s3c_i2sv2_iis_calc_rate(struct s3c_i2sv2_rate_calc *info,
 	unsigned int best_rate = 0;
 	unsigned int best_deviation = INT_MAX;
 
+	pr_debug("Input clock rate %ldHz\n", clkrate);
+
 	if (fstab == NULL)
 		fstab = iis_fs_tab;
 
-- 
cgit v0.10.2


From 38e43c81a07de8ee8a757a9c93dd3a4937dd35e0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 13:14:38 +0100
Subject: ASoC: Display S3C IISv2 mode and MS errors by default

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index ad690b2..bc4e504 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -295,7 +295,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 		iismod |= IISMOD_MASTER;
 		break;
 	default:
-		pr_debug("unknwon master/slave format\n");
+		pr_err("unknwon master/slave format\n");
 		return -EINVAL;
 	}
 
@@ -312,7 +312,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 		iismod |= S3C2412_IISMOD_SDF_IIS;
 		break;
 	default:
-		pr_debug("Unknown data format\n");
+		pr_err("Unknown data format\n");
 		return -EINVAL;
 	}
 
-- 
cgit v0.10.2


From abbc82466967064e4eaafa367fc225a8c803569c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 13:21:52 +0100
Subject: ASoC: Staticise txctrl and rxctrl for S3C IISv2

They aren't used by anything external and aren't prototyped; if any
users appear they can be exported again for them.

Also report what modes we have a problem with when we encounter invalid
mode configurations.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index bc4e504..972c276 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -89,7 +89,7 @@ static inline void dbg_showcon(const char *fn, u32 con)
 
 
 /* Turn on or off the transmission path. */
-void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
+static void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 {
 	void __iomem *regs = i2s->regs;
 	u32 fic, con, mod;
@@ -119,7 +119,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
+			break;
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -146,7 +148,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "TXDIS: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
+			break;
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -157,9 +161,8 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on)
 	dbg_showcon(__func__, con);
 	pr_debug("%s: IIS: CON=%x MOD=%x FIC=%x\n", __func__, con, mod, fic);
 }
-EXPORT_SYMBOL_GPL(s3c2412_snd_txctrl);
 
-void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
+static void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 {
 	void __iomem *regs = i2s->regs;
 	u32 fic, con, mod;
@@ -189,7 +192,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
 		}
 
 		writel(mod, regs + S3C2412_IISMOD);
@@ -213,7 +217,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 			break;
 
 		default:
-			dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n");
+			dev_err(i2s->dev, "RXDIS: Invalid MODE %x in IISMOD\n",
+				mod & S3C2412_IISMOD_MODE_MASK);
 		}
 
 		writel(con, regs + S3C2412_IISCON);
@@ -223,7 +228,6 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on)
 	fic = readl(regs + S3C2412_IISFIC);
 	pr_debug("%s: IIS: CON=%x MOD=%x FIC=%x\n", __func__, con, mod, fic);
 }
-EXPORT_SYMBOL_GPL(s3c2412_snd_rxctrl);
 
 /*
  * Wait for the LR signal to allow synchronisation to the L/R clock
-- 
cgit v0.10.2


From 71437552f2564c0d0c5cc4995045683051c5fe62 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 13:42:04 +0100
Subject: ASoC: Use platform device resource for S3C64xx IISv2

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c
index e0f4a16..3c06c40 100644
--- a/sound/soc/s3c24xx/s3c64xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c64xx-i2s.c
@@ -220,8 +220,7 @@ static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = s3c_i2sv2_probe(pdev, dai, i2s,
-			      dai->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0);
+	ret = s3c_i2sv2_probe(pdev, dai, i2s, 0);
 	if (ret)
 		goto err_clk;
 
-- 
cgit v0.10.2


From 03682411b1ccd38cbde2e9a6ab43884ff34fbefc Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 30 Apr 2009 18:38:01 +0200
Subject: alim15x3: Remove historical hacks, re-enable init_hwif for PowerPC

Some time ago we had to disable init_hwif callback for PowerPC builds.
That was because of a historical IRQ overwrite in the driver, which
was causing IDE malfunction on the MPC8610HPCD PowerPC boards.

It's unclear whether this overwrite is still useful, but it is proven
to cause a bit of harm, and today some PowerPC targets (Xilinx ML510,
as reported by Roderick Colenbrander) need the init_hwif, so we have
to re-enable it and remove the overwrite.

Reported-by: Roderick Colenbrander <thunderbird2k@gmail.com>
Suggested-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 537da1c..e59b6de 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -402,27 +402,23 @@ static u8 ali_cable_detect(ide_hwif_t *hwif)
 	return cbl;
 }
 
-#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC)
+#ifndef CONFIG_SPARC64
 /**
  *	init_hwif_ali15x3	-	Initialize the ALI IDE x86 stuff
  *	@hwif: interface to configure
  *
  *	Obtain the IRQ tables for an ALi based IDE solution on the PC
  *	class platforms. This part of the code isn't applicable to the
- *	Sparc and PowerPC systems.
+ *	Sparc systems.
  */
 
 static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
 {
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 ideic, inmir;
 	s8 irq_routing_table[] = { -1,  9, 3, 10, 4,  5, 7,  6,
 				      1, 11, 0, 12, 0, 14, 0, 15 };
 	int irq = -1;
 
-	if (dev->device == PCI_DEVICE_ID_AL_M5229)
-		hwif->irq = hwif->channel ? 15 : 14;
-
 	if (isa_dev) {
 		/*
 		 * read IDE interface control
@@ -455,7 +451,7 @@ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
 }
 #else
 #define init_hwif_ali15x3 NULL
-#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */
+#endif /* CONFIG_SPARC64 */
 
 /**
  *	init_dma_ali15x3	-	set up DMA on ALi15x3
-- 
cgit v0.10.2


From 30b4ae8a4498543863501f707879b7220b649602 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 4 Apr 2009 21:01:01 +0000
Subject: signals: split do_tkill

Split out the code from do_tkill to make it reusable by the follow up
patch which implements sys_rt_tgsigqueueinfo

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>

diff --git a/kernel/signal.c b/kernel/signal.c
index d803473..56d27ac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2278,24 +2278,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
 	return kill_something_info(sig, &info, pid);
 }
 
-static int do_tkill(pid_t tgid, pid_t pid, int sig)
+static int
+do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
-	int error;
-	struct siginfo info;
 	struct task_struct *p;
 	unsigned long flags;
-
-	error = -ESRCH;
-	info.si_signo = sig;
-	info.si_errno = 0;
-	info.si_code = SI_TKILL;
-	info.si_pid = task_tgid_vnr(current);
-	info.si_uid = current_uid();
+	int error = -ESRCH;
 
 	rcu_read_lock();
 	p = find_task_by_vpid(pid);
 	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
-		error = check_kill_permission(sig, &info, p);
+		error = check_kill_permission(sig, info, p);
 		/*
 		 * The null signal is a permissions and process existence
 		 * probe.  No signal is actually delivered.
@@ -2305,7 +2298,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
 		 * signal is private anyway.
 		 */
 		if (!error && sig && lock_task_sighand(p, &flags)) {
-			error = specific_send_sig_info(sig, &info, p);
+			error = specific_send_sig_info(sig, info, p);
 			unlock_task_sighand(p, &flags);
 		}
 	}
@@ -2314,6 +2307,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
 	return error;
 }
 
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
+{
+	struct siginfo info;
+
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_TKILL;
+	info.si_pid = task_tgid_vnr(current);
+	info.si_uid = current_uid();
+
+	return do_send_specific(tgid, pid, sig, &info);
+}
+
 /**
  *  sys_tgkill - send signal to one specific thread
  *  @tgid: the thread group ID of the thread
-- 
cgit v0.10.2


From 62ab4505e3efaf67784f84059e0fb9cedb1728ea Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 4 Apr 2009 21:01:06 +0000
Subject: signals: implement sys_rt_tgsigqueueinfo

sys_kill has the per thread counterpart sys_tgkill. sigqueueinfo is
missing a thread directed counterpart. Such an interface is important
for migrating applications from other OSes which have the per thread
delivery implemented.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Acked-by: Ulrich Drepper <drepper@redhat.com>

diff --git a/include/linux/compat.h b/include/linux/compat.h
index f2ded21..af931ee 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
+long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+				  struct compat_siginfo __user *uinfo);
 
 static inline int compat_timeval_compare(struct compat_timeval *lhs,
 					struct compat_timeval *rhs)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 84f997f..c755283 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig)
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
+				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern int show_unhandled_signals;
diff --git a/kernel/compat.c b/kernel/compat.c
index 42d5654..f6c204f 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 
 }
 
+asmlinkage long
+compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+			     struct compat_siginfo __user *uinfo)
+{
+	siginfo_t info;
+
+	if (copy_siginfo_from_user32(&info, uinfo))
+		return -EFAULT;
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 56d27ac..f79b3b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2369,6 +2369,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
 	return kill_proc_info(sig, &info, pid);
 }
 
+long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+{
+	/* This is only valid for single tasks */
+	if (pid <= 0 || tgid <= 0)
+		return -EINVAL;
+
+	/* Not even root can pretend to send signals from the kernel.
+	   Nor can they impersonate a kill(), which adds source info.  */
+	if (info->si_code >= 0)
+		return -EPERM;
+	info->si_signo = sig;
+
+	return do_send_specific(tgid, pid, sig, info);
+}
+
+SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
+		siginfo_t __user *, uinfo)
+{
+	siginfo_t info;
+
+	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
+		return -EFAULT;
+
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *t = current;
-- 
cgit v0.10.2


From 12d161147f828192b5bcc33166f468a827832767 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 4 Apr 2009 21:01:10 +0000
Subject: x86: hookup sys_rt_tgsigqueueinfo

Make the new sys_rt_tgsigqueueinfo available for x86.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202..dcef387 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -830,4 +830,5 @@ ia32_sys_call_table:
 	.quad sys_inotify_init1
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
+	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74..708dae6 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,7 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_rt_tgsigqueueinfo	335
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f818294..4e2b054 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev				296
 __SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_rt_tgsigqueueinfo			297
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 
 
 #ifndef __NO_STUBS
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c873..734f92c 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,4 @@ ENTRY(sys_call_table)
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo	/* 335 */
-- 
cgit v0.10.2


From bf293c17b26b8854241df08b9b63f7270cbde012 Mon Sep 17 00:00:00 2001
From: Remis Lima Baima <remis.developer@googlemail.com>
Date: Thu, 30 Apr 2009 18:36:23 +0200
Subject: x86: added 'ifndef _ASM_X86_IOMAP_H' to iomap.h

iomap.h misses the include guards.

[ Impact: cleanup ]

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <200904301836.23885.arnd@arndb.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 86af260..0e9fe1d 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,3 +1,6 @@
+#ifndef _ASM_X86_IOMAP_H
+#define _ASM_X86_IOMAP_H
+
 /*
  * Copyright © 2008 Ingo Molnar
  *
@@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 
 void
 iounmap_atomic(void *kvaddr, enum km_type type);
+
+#endif /* _ASM_X86_IOMAP_H */
-- 
cgit v0.10.2


From e74cc06df3b05e2b2c1611a043f6e6dcadaab1eb Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Wed, 28 Jan 2009 19:18:32 +0100
Subject: configfs: Silence lockdep on mkdir() and rmdir()

When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:

- the VFS allows one to lock first an inode and second one of its
  children (The lock subclasses for this pattern are respectively
  I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
  descending order as long as it stays under a single mountpoint and
  does not follow symlinks.

Unfortunately lockdep does not know (yet?) how to handle such
recursion.

I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.

>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.

I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
   lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
   i_mutexes recursively locked in different classes based on their
   depth from the top-level config_group created. This
   induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
   nesting of configfs default groups whenever lockdep is activated
   but this limit looks reasonably high. Unfortunately, this also
   isolates VFS operations on configfs default groups from the others
   and thus lowers the chances to detect locking issues.

Nobody likes solution 1), which I can understand.

This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.

[ Note: This hides a few locking interactions with the VFS from lockdep.
  That was my big concern, because we like lockdep's protection.  However,
  the current state always dumps a spurious warning.  The locking is
  correct, so I tell people to ignore the warning and that we'll keep
  our eyes on the locking to make sure it stays correct.  With this patch,
  we eliminate the warning.  We do lose some of the lockdep protections,
  but this only means that we still have to keep our eyes on the locking.
  We're going to do that anyway.  -- Joel ]

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 762d287..da6061a 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -39,6 +39,9 @@ struct configfs_dirent {
 	umode_t			s_mode;
 	struct dentry		* s_dentry;
 	struct iattr		* s_iattr;
+#ifdef CONFIG_LOCKDEP
+	int			s_depth;
+#endif
 };
 
 #define CONFIGFS_ROOT		0x0001
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 05373db..d4d871f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -78,6 +78,92 @@ static const struct dentry_operations configfs_dentry_ops = {
 	.d_delete	= configfs_d_delete,
 };
 
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * Helpers to make lockdep happy with our recursive locking of default groups'
+ * inodes (see configfs_attach_group() and configfs_detach_group()).
+ * We put default groups i_mutexes in separate classes according to their depth
+ * from the youngest non-default group ancestor.
+ *
+ * For a non-default group A having default groups A/B, A/C, and A/C/D, default
+ * groups A/B and A/C will have their inode's mutex in class
+ * default_group_class[0], and default group A/C/D will be in
+ * default_group_class[1].
+ *
+ * The lock classes are declared and assigned in inode.c, according to the
+ * s_depth value.
+ * The s_depth value is initialized to -1, adjusted to >= 0 when attaching
+ * default groups, and reset to -1 when all default groups are attached. During
+ * attachment, if configfs_create() sees s_depth > 0, the lock class of the new
+ * inode's mutex is set to default_group_class[s_depth - 1].
+ */
+
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+	sd->s_depth = -1;
+}
+
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+					  struct configfs_dirent *sd)
+{
+	int parent_depth = parent_sd->s_depth;
+
+	if (parent_depth >= 0)
+		sd->s_depth = parent_depth + 1;
+}
+
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+	/*
+	 * item's i_mutex class is already setup, so s_depth is now only
+	 * used to set new sub-directories s_depth, which is always done
+	 * with item's i_mutex locked.
+	 */
+	/*
+	 *  sd->s_depth == -1 iff we are a non default group.
+	 *  else (we are a default group) sd->s_depth > 0 (see
+	 *  create_dir()).
+	 */
+	if (sd->s_depth == -1)
+		/*
+		 * We are a non default group and we are going to create
+		 * default groups.
+		 */
+		sd->s_depth = 0;
+}
+
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+	/* We will not create default groups anymore. */
+	sd->s_depth = -1;
+}
+
+#else /* CONFIG_LOCKDEP */
+
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+}
+
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+					  struct configfs_dirent *sd)
+{
+}
+
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+}
+
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+}
+
+#endif /* CONFIG_LOCKDEP */
+
 /*
  * Allocates a new configfs_dirent and links it to the parent configfs_dirent
  */
@@ -94,6 +180,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 	INIT_LIST_HEAD(&sd->s_links);
 	INIT_LIST_HEAD(&sd->s_children);
 	sd->s_element = element;
+	configfs_init_dirent_depth(sd);
 	spin_lock(&configfs_dirent_lock);
 	if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
 		spin_unlock(&configfs_dirent_lock);
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
 		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
 					     CONFIGFS_DIR | CONFIGFS_USET_CREATING);
 	if (!error) {
+		configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
 		error = configfs_create(d, mode, init_dir);
 		if (!error) {
 			inc_nlink(p->d_inode);
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item,
 		 * error, as rmdir() would.
 		 */
 		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+		configfs_adjust_dir_dirent_depth_before_populate(sd);
 		ret = populate_groups(to_config_group(item));
 		if (ret) {
 			configfs_detach_item(item);
 			dentry->d_inode->i_flags |= S_DEAD;
 		}
+		configfs_adjust_dir_dirent_depth_after_populate(sd);
 		mutex_unlock(&dentry->d_inode->i_mutex);
 		if (ret)
 			d_delete(dentry);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5d349d3..4921e742 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -33,10 +33,15 @@
 #include <linux/backing-dev.h>
 #include <linux/capability.h>
 #include <linux/sched.h>
+#include <linux/lockdep.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
 
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
+#endif
+
 extern struct super_block * configfs_sb;
 
 static const struct address_space_operations configfs_aops = {
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
 	return inode;
 }
 
+#ifdef CONFIG_LOCKDEP
+
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+					  struct inode *inode)
+{
+	int depth = sd->s_depth;
+
+	if (depth > 0) {
+		if (depth <= ARRAY_SIZE(default_group_class)) {
+			lockdep_set_class(&inode->i_mutex,
+					  &default_group_class[depth - 1]);
+		} else {
+			/*
+			 * In practice the maximum level of locking depth is
+			 * already reached. Just inform about possible reasons.
+			 */
+			printk(KERN_INFO "configfs: Too many levels of inodes"
+			       " for the locking correctness validator.\n");
+			printk(KERN_INFO "Spurious warnings may appear.\n");
+		}
+	}
+}
+
+#else /* CONFIG_LOCKDEP */
+
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+					  struct inode *inode)
+{
+}
+
+#endif /* CONFIG_LOCKDEP */
+
 int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 {
 	int error = 0;
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
 					struct inode *p_inode = dentry->d_parent->d_inode;
 					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
 				}
+				configfs_set_inode_lock_class(sd, inode);
 				goto Proceed;
 			}
 			else
-- 
cgit v0.10.2


From 420118caa32c8ccdf9fce5a623b9de3f951573c5 Mon Sep 17 00:00:00 2001
From: Louis Rilling <louis.rilling@kerlabs.com>
Date: Wed, 28 Jan 2009 19:18:33 +0100
Subject: configfs: Rework configfs_depend_item() locking and make lockdep
 happy

configfs_depend_item() recursively locks all inodes mutex from configfs root to
the target item, which makes lockdep unhappy. The purpose of this recursive
locking is to ensure that the item tree can be safely parsed and that the target
item, if found, is not about to leave.

This patch reworks configfs_depend_item() locking using configfs_dirent_lock.
Since configfs_dirent_lock protects all changes to the configfs_dirent tree, and
protects tagging of items to be removed, this lock can be used instead of the
inodes mutex lock chain.
This needs that the check for dependents be done atomically with
CONFIGFS_USET_DROPPING tagging.

Now lockdep looks happy with configfs.

[ Lifted the setting of s_type into configfs_new_dirent() to satisfy the
  atomic setting of CONFIGFS_USET_CREATING  -- Joel ]

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index d4d871f..8e48b52 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -167,8 +167,8 @@ configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
 /*
  * Allocates a new configfs_dirent and links it to the parent configfs_dirent
  */
-static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd,
-						void * element)
+static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
+						   void *element, int type)
 {
 	struct configfs_dirent * sd;
 
@@ -180,6 +180,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 	INIT_LIST_HEAD(&sd->s_links);
 	INIT_LIST_HEAD(&sd->s_children);
 	sd->s_element = element;
+	sd->s_type = type;
 	configfs_init_dirent_depth(sd);
 	spin_lock(&configfs_dirent_lock);
 	if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
@@ -225,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
 {
 	struct configfs_dirent * sd;
 
-	sd = configfs_new_dirent(parent_sd, element);
+	sd = configfs_new_dirent(parent_sd, element, type);
 	if (IS_ERR(sd))
 		return PTR_ERR(sd);
 
 	sd->s_mode = mode;
-	sd->s_type = type;
 	sd->s_dentry = dentry;
 	if (dentry) {
 		dentry->d_fsdata = configfs_get(sd);
@@ -1006,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
  * Note, btw, that this can be called at *any* time, even when a configfs
  * subsystem isn't registered, or when configfs is loading or unloading.
  * Just like configfs_register_subsystem().  So we take the same
- * precautions.  We pin the filesystem.  We lock each i_mutex _in_order_
- * on our way down the tree.  If we can find the target item in the
+ * precautions.  We pin the filesystem.  We lock configfs_dirent_lock.
+ * If we can find the target item in the
  * configfs tree, it must be part of the subsystem tree as well, so we
- * do not need the subsystem semaphore.  Holding the i_mutex chain locks
- * out mkdir() and rmdir(), who might be racing us.
+ * do not need the subsystem semaphore.  Holding configfs_dirent_lock helps
+ * locking out mkdir() and rmdir(), who might be racing us.
  */
 
 /*
@@ -1023,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
  * do that so we can unlock it if we find nothing.
  *
  * Here we do a depth-first search of the dentry hierarchy looking for
- * our object.  We take i_mutex on each step of the way down.  IT IS
- * ESSENTIAL THAT i_mutex LOCKING IS ORDERED.  If we come back up a branch,
- * we'll drop the i_mutex.
+ * our object.
+ * We deliberately ignore items tagged as dropping since they are virtually
+ * dead, as well as items in the middle of attachment since they virtually
+ * do not exist yet. This completes the locking out of racing mkdir() and
+ * rmdir().
+ * Note: subdirectories in the middle of attachment start with s_type =
+ * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir().  When
+ * CONFIGFS_USET_CREATING is set, we ignore the item.  The actual set of
+ * s_type is in configfs_new_dirent(), which has configfs_dirent_lock.
  *
- * If the target is not found, -ENOENT is bubbled up and we have released
- * all locks.  If the target was found, the locks will be cleared by
- * configfs_depend_rollback().
+ * If the target is not found, -ENOENT is bubbled up.
  *
  * This adds a requirement that all config_items be unique!
  *
- * This is recursive because the locking traversal is tricky.  There isn't
+ * This is recursive.  There isn't
  * much on the stack, though, so folks that need this function - be careful
  * about your stack!  Patches will be accepted to make it iterative.
  */
@@ -1045,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin,
 
 	BUG_ON(!origin || !sd);
 
-	/* Lock this guy on the way down */
-	mutex_lock(&sd->s_dentry->d_inode->i_mutex);
 	if (sd->s_element == target)  /* Boo-yah */
 		goto out;
 
 	list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
-		if (child_sd->s_type & CONFIGFS_DIR) {
+		if ((child_sd->s_type & CONFIGFS_DIR) &&
+		    !(child_sd->s_type & CONFIGFS_USET_DROPPING) &&
+		    !(child_sd->s_type & CONFIGFS_USET_CREATING)) {
 			ret = configfs_depend_prep(child_sd->s_dentry,
 						   target);
 			if (!ret)
@@ -1060,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin,
 	}
 
 	/* We looped all our children and didn't find target */
-	mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
 	ret = -ENOENT;
 
 out:
 	return ret;
 }
 
-/*
- * This is ONLY called if configfs_depend_prep() did its job.  So we can
- * trust the entire path from item back up to origin.
- *
- * We walk backwards from item, unlocking each i_mutex.  We finish by
- * unlocking origin.
- */
-static void configfs_depend_rollback(struct dentry *origin,
-				     struct config_item *item)
-{
-	struct dentry *dentry = item->ci_dentry;
-
-	while (dentry != origin) {
-		mutex_unlock(&dentry->d_inode->i_mutex);
-		dentry = dentry->d_parent;
-	}
-
-	mutex_unlock(&origin->d_inode->i_mutex);
-}
-
 int configfs_depend_item(struct configfs_subsystem *subsys,
 			 struct config_item *target)
 {
@@ -1127,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
 
 	/* Ok, now we can trust subsys/s_item */
 
-	/* Scan the tree, locking i_mutex recursively, return 0 if found */
+	spin_lock(&configfs_dirent_lock);
+	/* Scan the tree, return 0 if found */
 	ret = configfs_depend_prep(subsys_sd->s_dentry, target);
 	if (ret)
-		goto out_unlock_fs;
+		goto out_unlock_dirent_lock;
 
-	/* We hold all i_mutexes from the subsystem down to the target */
+	/*
+	 * We are sure that the item is not about to be removed by rmdir(), and
+	 * not in the middle of attachment by mkdir().
+	 */
 	p = target->ci_dentry->d_fsdata;
 	p->s_dependent_count += 1;
 
-	configfs_depend_rollback(subsys_sd->s_dentry, target);
-
+out_unlock_dirent_lock:
+	spin_unlock(&configfs_dirent_lock);
 out_unlock_fs:
 	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
 
@@ -1162,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
 	struct configfs_dirent *sd;
 
 	/*
-	 * Since we can trust everything is pinned, we just need i_mutex
-	 * on the item.
+	 * Since we can trust everything is pinned, we just need
+	 * configfs_dirent_lock.
 	 */
-	mutex_lock(&target->ci_dentry->d_inode->i_mutex);
+	spin_lock(&configfs_dirent_lock);
 
 	sd = target->ci_dentry->d_fsdata;
 	BUG_ON(sd->s_dependent_count < 1);
@@ -1176,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
 	 * After this unlock, we cannot trust the item to stay alive!
 	 * DO NOT REFERENCE item after this unlock.
 	 */
-	mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
+	spin_unlock(&configfs_dirent_lock);
 }
 EXPORT_SYMBOL(configfs_undepend_item);
 
@@ -1376,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (sd->s_type & CONFIGFS_USET_DEFAULT)
 		return -EPERM;
 
-	/*
-	 * Here's where we check for dependents.  We're protected by
-	 * i_mutex.
-	 */
-	if (sd->s_dependent_count)
-		return -EBUSY;
-
 	/* Get a working ref until we have the child */
 	parent_item = configfs_get_config_item(dentry->d_parent);
 	subsys = to_config_group(parent_item)->cg_subsys;
@@ -1406,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 
 		mutex_lock(&configfs_symlink_mutex);
 		spin_lock(&configfs_dirent_lock);
-		ret = configfs_detach_prep(dentry, &wait_mutex);
-		if (ret)
-			configfs_detach_rollback(dentry);
+		/*
+		 * Here's where we check for dependents.  We're protected by
+		 * configfs_dirent_lock.
+		 * If no dependent, atomically tag the item as dropping.
+		 */
+		ret = sd->s_dependent_count ? -EBUSY : 0;
+		if (!ret) {
+			ret = configfs_detach_prep(dentry, &wait_mutex);
+			if (ret)
+				configfs_detach_rollback(dentry);
+		}
 		spin_unlock(&configfs_dirent_lock);
 		mutex_unlock(&configfs_symlink_mutex);
 
@@ -1519,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 	 */
 	err = -ENOENT;
 	if (configfs_dirent_is_ready(parent_sd)) {
-		file->private_data = configfs_new_dirent(parent_sd, NULL);
+		file->private_data = configfs_new_dirent(parent_sd, NULL, 0);
 		if (IS_ERR(file->private_data))
 			err = PTR_ERR(file->private_data);
 		else
-- 
cgit v0.10.2


From 78a3d9d5654a7fd99cf8b2ab06b9497b9c7aad64 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 29 Apr 2009 18:01:23 +0200
Subject: do_wait: do take security_task_wait() into account

I was never able to understand what should we actually do when
security_task_wait() fails, but the current code doesn't look right.

If ->task_wait() returns the error, we update *notask_error correctly.
But then we either reap the child (despite the fact this was forbidden)
or clear *notask_error (and hide the securiy policy problems).

This patch assumes that "stolen by ptrace" doesn't matter. If selinux
denies the child we should ignore it but make sure we report -EACCESS
instead of -ECHLD if there are no other eligible children.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/kernel/exit.c b/kernel/exit.c
index 167e1e3..d2e8239 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1582,6 +1582,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
 		 */
 		if (*notask_error)
 			*notask_error = ret;
+		return 0;
 	}
 
 	if (likely(!ptrace) && unlikely(p->ptrace)) {
-- 
cgit v0.10.2


From d6662c351a827264706bf880a36cdd376808ba1c Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:40 +0000
Subject: [ARM] SMDK6410: Ensure LCD settings are setup

Ensure that the LCD output type is RGB and that
the modem interface is not bypassing the LCD
block. This ensures the LCD interface output
gets to the pins in the correct format.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index 7f473e4..d0b9f09 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -39,6 +39,9 @@
 #include <asm/mach-types.h>
 
 #include <plat/regs-serial.h>
+#include <plat/regs-modem.h>
+#include <plat/regs-gpio.h>
+#include <plat/regs-sys.h>
 #include <plat/iic.h>
 #include <plat/fb.h>
 
@@ -155,9 +158,23 @@ static struct i2c_board_info i2c_devs1[] __initdata = {
 
 static void __init smdk6410_map_io(void)
 {
+	u32 tmp;
+
 	s3c64xx_init_io(smdk6410_iodesc, ARRAY_SIZE(smdk6410_iodesc));
 	s3c24xx_init_clocks(12000000);
 	s3c24xx_init_uarts(smdk6410_uartcfgs, ARRAY_SIZE(smdk6410_uartcfgs));
+
+	/* set the LCD type */
+
+	tmp = __raw_readl(S3C64XX_SPCON);
+	tmp &= ~S3C64XX_SPCON_LCD_SEL_MASK;
+	tmp |= S3C64XX_SPCON_LCD_SEL_RGB;
+	__raw_writel(tmp, S3C64XX_SPCON);
+
+	/* remove the lcd bypass */
+	tmp = __raw_readl(S3C64XX_MODEM_MIFPCON);
+	tmp &= ~MIFPCON_LCD_BYPASS;
+	__raw_writel(tmp, S3C64XX_MODEM_MIFPCON);
 }
 
 static void __init smdk6410_machine_init(void)
-- 
cgit v0.10.2


From 3056ea0afba83d19af5f1f3daf6ed7211f0717da Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 27 Jan 2009 16:18:01 +0000
Subject: [ARM] SMDK6410: Add support for SMSC9115 ethernet controller

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index d0b9f09..dfd84d0 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -24,6 +24,7 @@
 #include <linux/fb.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
+#include <linux/smsc911x.h>
 
 #include <video/platform_lcd.h>
 
@@ -44,6 +45,7 @@
 #include <plat/regs-sys.h>
 #include <plat/iic.h>
 #include <plat/fb.h>
+#include <plat/gpio-cfg.h>
 
 #include <plat/s3c6410.h>
 #include <plat/clock.h>
@@ -132,6 +134,37 @@ static struct s3c_fb_platdata smdk6410_lcd_pdata __initdata = {
 	.vidcon1	= VIDCON1_INV_HSYNC | VIDCON1_INV_VSYNC,
 };
 
+static struct resource smdk6410_smsc911x_resources[] = {
+	[0] = {
+		.start = 0x18000000,
+		.end   = 0x18000000 + SZ_64K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = S3C_EINT(10),
+		.end   = S3C_EINT(10),
+		.flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW,
+	},
+};
+
+static struct smsc911x_platform_config smdk6410_smsc911x_pdata = {
+	.irq_polarity  = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type      = SMSC911X_IRQ_TYPE_OPEN_DRAIN,
+	.flags         = SMSC911X_USE_32BIT | SMSC911X_FORCE_INTERNAL_PHY,
+	.phy_interface = PHY_INTERFACE_MODE_MII,
+};
+
+
+static struct platform_device smdk6410_smsc911x = {
+	.name          = "smsc911x",
+	.id            = -1,
+	.num_resources = ARRAY_SIZE(smdk6410_smsc911x_resources),
+	.resource      = &smdk6410_smsc911x_resources[0],
+	.dev = {
+		.platform_data = &smdk6410_smsc911x_pdata,
+	},
+};
+
 static struct map_desc smdk6410_iodesc[] = {};
 
 static struct platform_device *smdk6410_devices[] __initdata = {
@@ -145,6 +178,8 @@ static struct platform_device *smdk6410_devices[] __initdata = {
 	&s3c_device_i2c1,
 	&s3c_device_fb,
 	&smdk6410_lcd_powerdev,
+
+	&smdk6410_smsc911x,
 };
 
 static struct i2c_board_info i2c_devs0[] __initdata = {
-- 
cgit v0.10.2


From ecc558acaba9ca1c6e2e7b54a1edb73ee391ae17 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 17 Feb 2009 15:59:38 +0000
Subject: [ARM] SMDK6410: Support WM1190-EV1 PMIC board

The SMDK6410 supports pluggable PMIC boards. One such board is the
Wolfson Microelectronics 1190-EV1 for the WM8350 PMICs. This patch
introduces initial support for this module. Further patches will
add additional integration with the system as support for the
S3C6410 and other devices on the system improves.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index 1d50100..ffa4974 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -60,3 +60,20 @@ config SMDK6410_SD_CH1
 	  channels 0 and 1 are the same.
 
 endchoice
+
+config SMDK6410_WM1190_EV1
+	bool "Support Wolfson Microelectronics 1190-EV1 PMIC card"
+	depends on MACH_SMDK6410
+	select REGULATOR
+	select REGULATOR_WM8350
+	select MFD_WM8350_I2C
+	select MFD_WM8350_CONFIG_MODE_0
+	select MFD_WM8350_CONFIG_MODE_3
+	select MFD_WM8352_CONFIG_MODE_0
+	help
+	  The Wolfson Microelectronics 1190-EV1 is a WM835x based PMIC
+	  and audio daughtercard for the Samsung SMDK6410 reference
+	  platform.  Enabling this option will build support for this
+	  module into the kernel.  The presence of the module will be
+	  detected at runtime so the the resulting kernel can be used
+	  with or without the 1190-EV1 fitted.
diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index dfd84d0..466dee0 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -26,6 +26,11 @@
 #include <linux/delay.h>
 #include <linux/smsc911x.h>
 
+#ifdef CONFIG_SMDK6410_WM1190_EV1
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/pmic.h>
+#endif
+
 #include <video/platform_lcd.h>
 
 #include <asm/mach/arch.h>
@@ -182,9 +187,135 @@ static struct platform_device *smdk6410_devices[] __initdata = {
 	&smdk6410_smsc911x,
 };
 
+#ifdef CONFIG_SMDK6410_WM1190_EV1
+/* S3C64xx internal logic & PLL */
+static struct regulator_init_data wm8350_dcdc1_data = {
+	.constraints = {
+		.name = "PVDD_INT/PVDD_PLL",
+		.min_uV = 1200000,
+		.max_uV = 1200000,
+		.always_on = 1,
+		.apply_uV = 1,
+	},
+};
+
+/* Memory */
+static struct regulator_init_data wm8350_dcdc3_data = {
+	.constraints = {
+		.name = "PVDD_MEM",
+		.min_uV = 1800000,
+		.max_uV = 1800000,
+		.always_on = 1,
+		.state_mem = {
+			 .uV = 1800000,
+			 .mode = REGULATOR_MODE_NORMAL,
+			 .enabled = 1,
+		 },
+		.initial_state = PM_SUSPEND_MEM,
+	},
+};
+
+/* USB, EXT, PCM, ADC/DAC, USB, MMC */
+static struct regulator_init_data wm8350_dcdc4_data = {
+	.constraints = {
+		.name = "PVDD_HI/PVDD_EXT/PVDD_SYS/PVCCM2MTV",
+		.min_uV = 3000000,
+		.max_uV = 3000000,
+		.always_on = 1,
+	},
+};
+
+/* ARM core */
+static struct regulator_init_data wm8350_dcdc6_data = {
+	.constraints = {
+		.name = "PVDD_ARM",
+		.min_uV = 1000000,
+		.max_uV = 1300000,
+		.always_on = 1,
+	},
+};
+
+/* Alive */
+static struct regulator_init_data wm8350_ldo1_data = {
+	.constraints = {
+		.name = "PVDD_ALIVE",
+		.min_uV = 1200000,
+		.max_uV = 1200000,
+		.always_on = 1,
+		.apply_uV = 1,
+	},
+};
+
+/* OTG */
+static struct regulator_init_data wm8350_ldo2_data = {
+	.constraints = {
+		.name = "PVDD_OTG",
+		.min_uV = 3300000,
+		.max_uV = 3300000,
+	},
+};
+
+/* LCD */
+static struct regulator_init_data wm8350_ldo3_data = {
+	.constraints = {
+		.name = "PVDD_LCD",
+		.min_uV = 3000000,
+		.max_uV = 3000000,
+	},
+};
+
+/* OTGi/1190-EV1 HPVDD & AVDD */
+static struct regulator_init_data wm8350_ldo4_data = {
+	.constraints = {
+		.name = "PVDD_OTGI/HPVDD/AVDD",
+		.min_uV = 1200000,
+		.max_uV = 1200000,
+		.apply_uV = 1,
+	},
+};
+
+static struct {
+	int regulator;
+	struct regulator_init_data *initdata;
+} wm1190_regulators[] = {
+	{ WM8350_DCDC_1, &wm8350_dcdc1_data },
+	{ WM8350_DCDC_3, &wm8350_dcdc3_data },
+	{ WM8350_DCDC_4, &wm8350_dcdc4_data },
+	{ WM8350_DCDC_6, &wm8350_dcdc6_data },
+	{ WM8350_LDO_1, &wm8350_ldo1_data },
+	{ WM8350_LDO_2, &wm8350_ldo2_data },
+	{ WM8350_LDO_3, &wm8350_ldo3_data },
+	{ WM8350_LDO_4, &wm8350_ldo4_data },
+};
+
+static int __init smdk6410_wm8350_init(struct wm8350 *wm8350)
+{
+	int i;
+
+	/* Instantiate the regulators */
+	for (i = 0; i < ARRAY_SIZE(wm1190_regulators); i++)
+		wm8350_register_regulator(wm8350,
+					  wm1190_regulators[i].regulator,
+					  wm1190_regulators[i].initdata);
+
+	return 0;
+}
+
+static struct wm8350_platform_data __initdata smdk6410_wm8350_pdata = {
+	.init = smdk6410_wm8350_init,
+};
+#endif
+
 static struct i2c_board_info i2c_devs0[] __initdata = {
 	{ I2C_BOARD_INFO("24c08", 0x50), },
 	{ I2C_BOARD_INFO("wm8580", 0x1b), },
+
+#ifdef CONFIG_SMDK6410_WM1190_EV1
+	{ I2C_BOARD_INFO("wm8350", 0x1a),
+	  .platform_data = &smdk6410_wm8350_pdata,
+	  .irq = S3C_EINT(12),
+	},
+#endif
 };
 
 static struct i2c_board_info i2c_devs1[] __initdata = {
-- 
cgit v0.10.2


From 87c4122f1714f96faa1b1c9449f762b9c56f77e4 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Wed, 11 Mar 2009 11:05:55 +0900
Subject: [ARM] S3C6410: Basic support for NCP board (v2)

This is a basic support for NCP board based on s3c6410.

Only enables the serial. also remove empty i2c device.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index ffa4974..53350b4 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -77,3 +77,12 @@ config SMDK6410_WM1190_EV1
 	  module into the kernel.  The presence of the module will be
 	  detected at runtime so the the resulting kernel can be used
 	  with or without the 1190-EV1 fitted.
+
+config MACH_NCP
+	bool "NCP"
+	select CPU_S3C6410
+	select S3C_DEV_I2C1
+	select S3C_DEV_HSMMC1
+	select S3C64XX_SETUP_I2C1
+	help
+          Machine support for the Samsung NCP
diff --git a/arch/arm/mach-s3c6410/Makefile b/arch/arm/mach-s3c6410/Makefile
index 2cd4f18..03e9ea6 100644
--- a/arch/arm/mach-s3c6410/Makefile
+++ b/arch/arm/mach-s3c6410/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_S3C6410_SETUP_SDHCI)	+= setup-sdhci.o
 # machine support
 
 obj-$(CONFIG_MACH_SMDK6410)	+= mach-smdk6410.o
+obj-$(CONFIG_MACH_NCP)		+= mach-ncp.o
diff --git a/arch/arm/mach-s3c6410/mach-ncp.c b/arch/arm/mach-s3c6410/mach-ncp.c
new file mode 100644
index 0000000..6030636
--- /dev/null
+++ b/arch/arm/mach-s3c6410/mach-ncp.c
@@ -0,0 +1,107 @@
+/*
+ * linux/arch/arm/mach-s3c6410/mach-ncp.c
+ *
+ * Copyright (C) 2008-2009 Samsung Electronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/fb.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+
+#include <video/platform_lcd.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+
+#include <mach/hardware.h>
+#include <mach/regs-fb.h>
+#include <mach/map.h>
+
+#include <asm/irq.h>
+#include <asm/mach-types.h>
+
+#include <plat/regs-serial.h>
+#include <plat/iic.h>
+#include <plat/fb.h>
+
+#include <plat/s3c6410.h>
+#include <plat/clock.h>
+#include <plat/devs.h>
+#include <plat/cpu.h>
+
+#define UCON S3C2410_UCON_DEFAULT
+#define ULCON S3C2410_LCON_CS8 | S3C2410_LCON_PNONE
+#define UFCON S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE
+
+static struct s3c2410_uartcfg ncp_uartcfgs[] __initdata = {
+	/* REVISIT: NCP uses only serial 1, 2 */
+	[0] = {
+		.hwport	     = 0,
+		.flags	     = 0,
+		.ucon	     = UCON,
+		.ulcon	     = ULCON,
+		.ufcon	     = UFCON,
+	},
+	[1] = {
+		.hwport	     = 1,
+		.flags	     = 0,
+		.ucon	     = UCON,
+		.ulcon	     = ULCON,
+		.ufcon	     = UFCON,
+	},
+	[2] = {
+		.hwport	     = 2,
+		.flags	     = 0,
+		.ucon	     = UCON,
+		.ulcon	     = ULCON,
+		.ufcon	     = UFCON,
+	},
+};
+
+static struct platform_device *ncp_devices[] __initdata = {
+	&s3c_device_hsmmc1,
+	&s3c_device_i2c0,
+};
+
+struct map_desc ncp_iodesc[] = {};
+
+static void __init ncp_map_io(void)
+{
+	s3c64xx_init_io(ncp_iodesc, ARRAY_SIZE(ncp_iodesc));
+	s3c24xx_init_clocks(12000000);
+	s3c24xx_init_uarts(ncp_uartcfgs, ARRAY_SIZE(ncp_uartcfgs));
+}
+
+static void __init ncp_machine_init(void)
+{
+	s3c_i2c0_set_platdata(NULL);
+
+	platform_add_devices(ncp_devices, ARRAY_SIZE(ncp_devices));
+}
+
+MACHINE_START(NCP, "NCP")
+	/* Maintainer: Samsung Electronics */
+	.phys_io	= S3C_PA_UART & 0xfff00000,
+	.io_pg_offst	= (((u32)S3C_VA_UART) >> 18) & 0xfffc,
+	.boot_params	= S3C64XX_PA_SDRAM + 0x100,
+	.init_irq	= s3c6410_init_irq,
+	.map_io		= ncp_map_io,
+	.init_machine	= ncp_machine_init,
+	.timer		= &s3c24xx_timer,
+MACHINE_END
-- 
cgit v0.10.2


From db9256f33c1b9ce72d2a126d51c7fe0de65f0f8c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 9 Apr 2009 19:00:19 +0100
Subject: [ARM] SMDK6410: Use active high IRQ for the WM8350 on the WM1190-EV1

Using an active high IRQ ensures that the WM8350 interrupt handling
does not spin when used with a SMDK6410 which has not had R20 removed
and R21 fitted to connect EINT12 to the PMIC module rather thant the
fixed regulators on the CPU module.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index 466dee0..15f9c7a 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -303,6 +303,7 @@ static int __init smdk6410_wm8350_init(struct wm8350 *wm8350)
 
 static struct wm8350_platform_data __initdata smdk6410_wm8350_pdata = {
 	.init = smdk6410_wm8350_init,
+	.irq_high = 1,
 };
 #endif
 
-- 
cgit v0.10.2


From f53aee29bef09070e4ab14488e43433b371844ce Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 9 Apr 2009 16:30:40 +0100
Subject: [ARM] SMDK6410: Mark all supplies as always_on

Since no consumers are currently configured for the SMDK6410 mark all
the supplies on the board as being always_on, ensuring interoperability
with future regulator API changes to disable unused regulators.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index 15f9c7a..697c046 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -252,6 +252,7 @@ static struct regulator_init_data wm8350_ldo2_data = {
 		.name = "PVDD_OTG",
 		.min_uV = 3300000,
 		.max_uV = 3300000,
+		.always_on = 1,
 	},
 };
 
@@ -261,6 +262,7 @@ static struct regulator_init_data wm8350_ldo3_data = {
 		.name = "PVDD_LCD",
 		.min_uV = 3000000,
 		.max_uV = 3000000,
+		.always_on = 1,
 	},
 };
 
@@ -271,6 +273,7 @@ static struct regulator_init_data wm8350_ldo4_data = {
 		.min_uV = 1200000,
 		.max_uV = 1200000,
 		.apply_uV = 1,
+		.always_on = 1,
 	},
 };
 
-- 
cgit v0.10.2


From 1f26a8a0fd6d067237f87a95d96ddfe263b96a94 Mon Sep 17 00:00:00 2001
From: Kwangwoo Lee <kwangwoo.lee@gmail.com>
Date: Tue, 28 Apr 2009 10:35:57 +0900
Subject: [PATCH] S3C64XX: Basic A&W6410 board support patch V2

A&W6410 board uses Samsung S3C6410 SoC and it is registered with 2183.
Framebuffer and ethernet devices are supported currently.

Unnecessary header file is removed.
Fix to use __raw_writel() and __raw_readl() for accessing mapped address.

Thank you for your comments.

Signed-off-by: Kwangwoo Lee <kwangwoo.lee@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index 53350b4..e5a575f 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -19,6 +19,14 @@ config S3C6410_SETUP_SDHCI
 	help
 	  Internal helper functions for S3C6410 based SDHCI systems
 
+config MACH_ANW6410
+	bool "A&W6410"
+	select CPU_S3C6410
+	select S3C_DEV_FB
+	select S3C64XX_SETUP_FB_24BPP
+	help
+	  Machine support for the A&W6410
+
 config MACH_SMDK6410
 	bool "SMDK6410"
 	select CPU_S3C6410
diff --git a/arch/arm/mach-s3c6410/Makefile b/arch/arm/mach-s3c6410/Makefile
index 03e9ea6..6f9deac 100644
--- a/arch/arm/mach-s3c6410/Makefile
+++ b/arch/arm/mach-s3c6410/Makefile
@@ -20,5 +20,8 @@ obj-$(CONFIG_S3C6410_SETUP_SDHCI)	+= setup-sdhci.o
 
 # machine support
 
+obj-$(CONFIG_MACH_ANW6410)	+= mach-anw6410.o
 obj-$(CONFIG_MACH_SMDK6410)	+= mach-smdk6410.o
 obj-$(CONFIG_MACH_NCP)		+= mach-ncp.o
+
+
diff --git a/arch/arm/mach-s3c6410/mach-anw6410.c b/arch/arm/mach-s3c6410/mach-anw6410.c
new file mode 100644
index 0000000..661cca6
--- /dev/null
+++ b/arch/arm/mach-s3c6410/mach-anw6410.c
@@ -0,0 +1,245 @@
+/* linux/arch/arm/mach-s3c6410/mach-anw6410.c
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ * Copyright 2009 Kwangwoo Lee
+ * 	Kwangwoo Lee <kwangwoo.lee@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+*/
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/fb.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/dm9000.h>
+
+#include <video/platform_lcd.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+
+#include <mach/hardware.h>
+#include <mach/regs-fb.h>
+#include <mach/map.h>
+
+#include <asm/irq.h>
+#include <asm/mach-types.h>
+
+#include <plat/regs-serial.h>
+#include <plat/iic.h>
+#include <plat/fb.h>
+
+#include <plat/s3c6410.h>
+#include <plat/clock.h>
+#include <plat/devs.h>
+#include <plat/cpu.h>
+#include <plat/regs-gpio.h>
+#include <plat/regs-modem.h>
+
+/* DM9000 */
+#define ANW6410_PA_DM9000	(0x18000000)
+
+/* A hardware buffer to control external devices is mapped at 0x30000000.
+ * It can not be read. So current status must be kept in anw6410_extdev_status.
+ */
+#define ANW6410_VA_EXTDEV	S3C_ADDR(0x02000000)
+#define ANW6410_PA_EXTDEV	(0x30000000)
+
+#define ANW6410_EN_DM9000	(1<<11)
+#define ANW6410_EN_LCD		(1<<14)
+
+static __u32 anw6410_extdev_status;
+
+static struct s3c2410_uartcfg anw6410_uartcfgs[] __initdata = {
+	[0] = {
+		.hwport	     = 0,
+		.flags	     = 0,
+		.ucon	     = 0x3c5,
+		.ulcon	     = 0x03,
+		.ufcon	     = 0x51,
+	},
+	[1] = {
+		.hwport	     = 1,
+		.flags	     = 0,
+		.ucon	     = 0x3c5,
+		.ulcon	     = 0x03,
+		.ufcon	     = 0x51,
+	},
+};
+
+/* framebuffer and LCD setup. */
+static void __init anw6410_lcd_mode_set(void)
+{
+	u32 tmp;
+
+	/* set the LCD type */
+	tmp = __raw_readl(S3C64XX_SPCON);
+	tmp &= ~S3C64XX_SPCON_LCD_SEL_MASK;
+	tmp |= S3C64XX_SPCON_LCD_SEL_RGB;
+	__raw_writel(tmp, S3C64XX_SPCON);
+
+	/* remove the LCD bypass */
+	tmp = __raw_readl(S3C64XX_MODEM_MIFPCON);
+	tmp &= ~MIFPCON_LCD_BYPASS;
+	__raw_writel(tmp, S3C64XX_MODEM_MIFPCON);
+}
+
+/* GPF1 = LCD panel power
+ * GPF4 = LCD backlight control
+ */
+static void anw6410_lcd_power_set(struct plat_lcd_data *pd,
+				   unsigned int power)
+{
+	if (power) {
+		anw6410_extdev_status |= (ANW6410_EN_LCD << 16);
+		__raw_writel(anw6410_extdev_status, ANW6410_VA_EXTDEV);
+
+		gpio_direction_output(S3C64XX_GPF(1), 1);
+		gpio_direction_output(S3C64XX_GPF(4), 1);
+	} else {
+		anw6410_extdev_status &= ~(ANW6410_EN_LCD << 16);
+		__raw_writel(anw6410_extdev_status, ANW6410_VA_EXTDEV);
+
+		gpio_direction_output(S3C64XX_GPF(1), 0);
+		gpio_direction_output(S3C64XX_GPF(4), 0);
+	}
+}
+
+static struct plat_lcd_data anw6410_lcd_power_data = {
+	.set_power	= anw6410_lcd_power_set,
+};
+
+static struct platform_device anw6410_lcd_powerdev = {
+	.name			= "platform-lcd",
+	.dev.parent		= &s3c_device_fb.dev,
+	.dev.platform_data	= &anw6410_lcd_power_data,
+};
+
+static struct s3c_fb_pd_win anw6410_fb_win0 = {
+	/* this is to ensure we use win0 */
+	.win_mode	= {
+		.pixclock	= 41094,
+		.left_margin	= 8,
+		.right_margin	= 13,
+		.upper_margin	= 7,
+		.lower_margin	= 5,
+		.hsync_len	= 3,
+		.vsync_len	= 1,
+		.xres		= 800,
+		.yres		= 480,
+	},
+	.max_bpp	= 32,
+	.default_bpp	= 16,
+};
+
+/* 405566 clocks per frame => 60Hz refresh requires 24333960Hz clock */
+static struct s3c_fb_platdata anw6410_lcd_pdata __initdata = {
+	.setup_gpio	= s3c64xx_fb_gpio_setup_24bpp,
+	.win[0]		= &anw6410_fb_win0,
+	.vidcon0	= VIDCON0_VIDOUT_RGB | VIDCON0_PNRMODE_RGB,
+	.vidcon1	= VIDCON1_INV_HSYNC | VIDCON1_INV_VSYNC,
+};
+
+/* DM9000AEP 10/100 ethernet controller */
+static void __init anw6410_dm9000_enable(void)
+{
+	anw6410_extdev_status |= (ANW6410_EN_DM9000 << 16);
+	__raw_writel(anw6410_extdev_status, ANW6410_VA_EXTDEV);
+}
+
+static struct resource anw6410_dm9000_resource[] = {
+	[0] = {
+		.start = ANW6410_PA_DM9000,
+		.end   = ANW6410_PA_DM9000 + 3,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = ANW6410_PA_DM9000 + 4,
+		.end   = ANW6410_PA_DM9000 + 4 + 500,
+		.flags = IORESOURCE_MEM,
+	},
+	[2] = {
+		.start = IRQ_EINT(15),
+		.end   = IRQ_EINT(15),
+		.flags = IORESOURCE_IRQ | IRQF_TRIGGER_HIGH,
+	},
+};
+
+static struct dm9000_plat_data anw6410_dm9000_pdata = {
+	.flags	  = (DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM),
+	/* dev_addr can be set to provide hwaddr. */
+};
+
+static struct platform_device anw6410_device_eth = {
+	.name	= "dm9000",
+	.id	= -1,
+	.num_resources	= ARRAY_SIZE(anw6410_dm9000_resource),
+	.resource	= anw6410_dm9000_resource,
+	.dev	= {
+		.platform_data  = &anw6410_dm9000_pdata,
+	},
+};
+
+static struct map_desc anw6410_iodesc[] __initdata = {
+	{
+		.virtual	= (unsigned long)ANW6410_VA_EXTDEV,
+		.pfn		= __phys_to_pfn(ANW6410_PA_EXTDEV),
+		.length		= SZ_64K,
+		.type		= MT_DEVICE,
+	},
+};
+
+static struct platform_device *anw6410_devices[] __initdata = {
+	&s3c_device_fb,
+	&anw6410_lcd_powerdev,
+	&anw6410_device_eth,
+};
+
+static void __init anw6410_map_io(void)
+{
+	s3c64xx_init_io(anw6410_iodesc, ARRAY_SIZE(anw6410_iodesc));
+	s3c24xx_init_clocks(12000000);
+	s3c24xx_init_uarts(anw6410_uartcfgs, ARRAY_SIZE(anw6410_uartcfgs));
+
+	anw6410_lcd_mode_set();
+}
+
+static void __init anw6410_machine_init(void)
+{
+	s3c_fb_set_platdata(&anw6410_lcd_pdata);
+
+	gpio_request(S3C64XX_GPF(1), "panel power");
+	gpio_request(S3C64XX_GPF(4), "LCD backlight");
+
+	anw6410_dm9000_enable();
+
+	platform_add_devices(anw6410_devices, ARRAY_SIZE(anw6410_devices));
+}
+
+MACHINE_START(ANW6410, "A&W6410")
+	/* Maintainer: Kwangwoo Lee <kwangwoo.lee@gmail.com> */
+	.phys_io	= S3C_PA_UART & 0xfff00000,
+	.io_pg_offst	= (((u32)S3C_VA_UART) >> 18) & 0xfffc,
+	.boot_params	= S3C64XX_PA_SDRAM + 0x100,
+
+	.init_irq	= s3c6410_init_irq,
+	.map_io		= anw6410_map_io,
+	.init_machine	= anw6410_machine_init,
+	.timer		= &s3c24xx_timer,
+MACHINE_END
-- 
cgit v0.10.2


From 5a9eb8da8b95fce21eb2dd39fe71b2b7882ce89a Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:33 +0000
Subject: [ARM] S3C24XX: Move plat/dma.h

Move the platform dma.h to dma-plat.h to ensure it doen't get
confused with plat/dma.h

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 440c014..6248f41 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -20,7 +20,7 @@
 #include <mach/dma.h>
 
 #include <plat/cpu.h>
-#include <plat/dma.h>
+#include <plat/dma-plat.h>
 
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
diff --git a/arch/arm/mach-s3c2412/dma.c b/arch/arm/mach-s3c2412/dma.c
index 9e34785..f888d4e 100644
--- a/arch/arm/mach-s3c2412/dma.c
+++ b/arch/arm/mach-s3c2412/dma.c
@@ -20,7 +20,7 @@
 
 #include <mach/dma.h>
 
-#include <plat/dma.h>
+#include <plat/dma-plat.h>
 #include <plat/cpu.h>
 
 #include <plat/regs-serial.h>
diff --git a/arch/arm/mach-s3c2440/dma.c b/arch/arm/mach-s3c2440/dma.c
index 69b6cf3..895783f 100644
--- a/arch/arm/mach-s3c2440/dma.c
+++ b/arch/arm/mach-s3c2440/dma.c
@@ -19,7 +19,7 @@
 
 #include <mach/dma.h>
 
-#include <plat/dma.h>
+#include <plat/dma-plat.h>
 #include <plat/cpu.h>
 
 #include <plat/regs-serial.h>
diff --git a/arch/arm/mach-s3c2443/dma.c b/arch/arm/mach-s3c2443/dma.c
index 8430e58..825b718 100644
--- a/arch/arm/mach-s3c2443/dma.c
+++ b/arch/arm/mach-s3c2443/dma.c
@@ -20,7 +20,7 @@
 
 #include <mach/dma.h>
 
-#include <plat/dma.h>
+#include <plat/dma-plat.h>
 #include <plat/cpu.h>
 
 #include <plat/regs-serial.h>
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index aee2aeb..d3d1375 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -34,7 +34,7 @@
 
 #include <mach/map.h>
 
-#include <plat/dma.h>
+#include <plat/dma-plat.h>
 
 /* io map for dma */
 static void __iomem *dma_base;
diff --git a/arch/arm/plat-s3c24xx/include/plat/dma-plat.h b/arch/arm/plat-s3c24xx/include/plat/dma-plat.h
new file mode 100644
index 0000000..fbe84af
--- /dev/null
+++ b/arch/arm/plat-s3c24xx/include/plat/dma-plat.h
@@ -0,0 +1,82 @@
+/* linux/arch/arm/plat-s3c24xx/include/plat/dma-plat.h
+ *
+ * Copyright (C) 2006 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * Samsung S3C24XX DMA support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+extern struct sysdev_class dma_sysclass;
+extern struct s3c2410_dma_chan s3c2410_chans[S3C2410_DMA_CHANNELS];
+
+#define DMA_CH_VALID		(1<<31)
+#define DMA_CH_NEVER		(1<<30)
+
+struct s3c24xx_dma_addr {
+	unsigned long		from;
+	unsigned long		to;
+};
+
+/* struct s3c24xx_dma_map
+ *
+ * this holds the mapping information for the channel selected
+ * to be connected to the specified device
+*/
+
+struct s3c24xx_dma_map {
+	const char		*name;
+	struct s3c24xx_dma_addr  hw_addr;
+
+	unsigned long		 channels[S3C2410_DMA_CHANNELS];
+	unsigned long		 channels_rx[S3C2410_DMA_CHANNELS];
+};
+
+struct s3c24xx_dma_selection {
+	struct s3c24xx_dma_map	*map;
+	unsigned long		 map_size;
+	unsigned long		 dcon_mask;
+
+	void	(*select)(struct s3c2410_dma_chan *chan,
+			  struct s3c24xx_dma_map *map);
+
+	void	(*direction)(struct s3c2410_dma_chan *chan,
+			     struct s3c24xx_dma_map *map,
+			     enum s3c2410_dmasrc dir);
+};
+
+extern int s3c24xx_dma_init_map(struct s3c24xx_dma_selection *sel);
+
+/* struct s3c24xx_dma_order_ch
+ *
+ * channel map for one of the `enum dma_ch` dma channels. the list
+ * entry contains a set of low-level channel numbers, orred with
+ * DMA_CH_VALID, which are checked in the order in the array.
+*/
+
+struct s3c24xx_dma_order_ch {
+	unsigned int	list[S3C2410_DMA_CHANNELS];	/* list of channels */
+	unsigned int	flags;				/* flags */
+};
+
+/* struct s3c24xx_dma_order
+ *
+ * information provided by either the core or the board to give the
+ * dma system a hint on how to allocate channels
+*/
+
+struct s3c24xx_dma_order {
+	struct s3c24xx_dma_order_ch	channels[DMACH_MAX];
+};
+
+extern int s3c24xx_dma_order_set(struct s3c24xx_dma_order *map);
+
+/* DMA init code, called from the cpu support code */
+
+extern int s3c2410_dma_init(void);
+
+extern int s3c24xx_dma_init(unsigned int channels, unsigned int irq,
+			    unsigned int stride);
diff --git a/arch/arm/plat-s3c24xx/include/plat/dma.h b/arch/arm/plat-s3c24xx/include/plat/dma.h
deleted file mode 100644
index c78efe3..0000000
--- a/arch/arm/plat-s3c24xx/include/plat/dma.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* linux/include/asm-arm/plat-s3c24xx/dma.h
- *
- * Copyright (C) 2006 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * Samsung S3C24XX DMA support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-extern struct sysdev_class dma_sysclass;
-extern struct s3c2410_dma_chan s3c2410_chans[S3C2410_DMA_CHANNELS];
-
-#define DMA_CH_VALID		(1<<31)
-#define DMA_CH_NEVER		(1<<30)
-
-struct s3c24xx_dma_addr {
-	unsigned long		from;
-	unsigned long		to;
-};
-
-/* struct s3c24xx_dma_map
- *
- * this holds the mapping information for the channel selected
- * to be connected to the specified device
-*/
-
-struct s3c24xx_dma_map {
-	const char		*name;
-	struct s3c24xx_dma_addr  hw_addr;
-
-	unsigned long		 channels[S3C2410_DMA_CHANNELS];
-	unsigned long		 channels_rx[S3C2410_DMA_CHANNELS];
-};
-
-struct s3c24xx_dma_selection {
-	struct s3c24xx_dma_map	*map;
-	unsigned long		 map_size;
-	unsigned long		 dcon_mask;
-
-	void	(*select)(struct s3c2410_dma_chan *chan,
-			  struct s3c24xx_dma_map *map);
-
-	void	(*direction)(struct s3c2410_dma_chan *chan,
-			     struct s3c24xx_dma_map *map,
-			     enum s3c2410_dmasrc dir);
-};
-
-extern int s3c24xx_dma_init_map(struct s3c24xx_dma_selection *sel);
-
-/* struct s3c24xx_dma_order_ch
- *
- * channel map for one of the `enum dma_ch` dma channels. the list
- * entry contains a set of low-level channel numbers, orred with
- * DMA_CH_VALID, which are checked in the order in the array.
-*/
-
-struct s3c24xx_dma_order_ch {
-	unsigned int	list[S3C2410_DMA_CHANNELS];	/* list of channels */
-	unsigned int	flags;				/* flags */
-};
-
-/* struct s3c24xx_dma_order
- *
- * information provided by either the core or the board to give the
- * dma system a hint on how to allocate channels
-*/
-
-struct s3c24xx_dma_order {
-	struct s3c24xx_dma_order_ch	channels[DMACH_MAX];
-};
-
-extern int s3c24xx_dma_order_set(struct s3c24xx_dma_order *map);
-
-/* DMA init code, called from the cpu support code */
-
-extern int s3c2410_dma_init(void);
-
-extern int s3c24xx_dma_init(unsigned int channels, unsigned int irq,
-			    unsigned int stride);
-- 
cgit v0.10.2


From 8970ef47d56fd3db28ee798b9d400caf08abd924 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:34 +0000
Subject: [ARM] S3C24XX: Remove hardware specific registers from DMA calls

The S3C24XX DMA API channel configuration registers are being passed
values comprised of register values which makes it hard to move the
API to cover both the S3C24XX and S3C64XX.

These values can be calculated from knowing which device the channel
is connected to, so remove them from the two calls s3c2410_dma_config
and s3c2410_dma_devconfig.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/dma.h b/arch/arm/mach-s3c2410/include/mach/dma.h
index 13358ce..acaef67 100644
--- a/arch/arm/mach-s3c2410/include/mach/dma.h
+++ b/arch/arm/mach-s3c2410/include/mach/dma.h
@@ -206,10 +206,10 @@ struct s3c2410_dma_chan {
 
 	/* channel configuration */
 	enum s3c2410_dmasrc	 source;
+	enum dma_ch		 req_ch;
 	unsigned long		 dev_addr;
 	unsigned long		 load_timeout;
 	unsigned int		 flags;		/* channel flags */
-	unsigned int		 hw_cfg;	/* last hw config */
 
 	struct s3c24xx_dma_map	*map;		/* channel hw maps */
 
@@ -290,7 +290,7 @@ extern int s3c2410_dma_enqueue(unsigned int channel, void *id,
  * configure the dma channel
 */
 
-extern int s3c2410_dma_config(unsigned int channel, int xferunit, int dcon);
+extern int s3c2410_dma_config(unsigned int channel, int xferunit);
 
 /* s3c2410_dma_devconfig
  *
@@ -298,7 +298,7 @@ extern int s3c2410_dma_config(unsigned int channel, int xferunit, int dcon);
 */
 
 extern int s3c2410_dma_devconfig(int channel, enum s3c2410_dmasrc source,
-				 int hwcfg, unsigned long devaddr);
+				 unsigned long devaddr);
 
 /* s3c2410_dma_getposition
  *
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index d3d1375..dc58a0d 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -1038,14 +1038,13 @@ EXPORT_SYMBOL(s3c2410_dma_ctrl);
 /* s3c2410_dma_config
  *
  * xfersize:     size of unit in bytes (1,2,4)
- * dcon:         base value of the DCONx register
 */
 
 int s3c2410_dma_config(unsigned int channel,
-		       int xferunit,
-		       int dcon)
+		       int xferunit)
 {
 	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	unsigned int dcon;
 
 	pr_debug("%s: chan=%d, xfer_unit=%d, dcon=%08x\n",
 		 __func__, channel, xferunit, dcon);
@@ -1055,10 +1054,33 @@ int s3c2410_dma_config(unsigned int channel,
 
 	pr_debug("%s: Initial dcon is %08x\n", __func__, dcon);
 
-	dcon |= chan->dcon & dma_sel.dcon_mask;
+	dcon = chan->dcon & dma_sel.dcon_mask;
 
 	pr_debug("%s: New dcon is %08x\n", __func__, dcon);
 
+	switch (chan->req_ch) {
+	case DMACH_I2S_IN:
+	case DMACH_I2S_OUT:
+	case DMACH_PCM_IN:
+	case DMACH_PCM_OUT:
+	case DMACH_MIC_IN:
+	default:
+		dcon |= S3C2410_DCON_HANDSHAKE;
+		dcon |= S3C2410_DCON_SYNC_PCLK;
+		break;
+
+	case DMACH_SDI:
+		/* note, ensure if need HANDSHAKE or not */
+		dcon |= S3C2410_DCON_SYNC_PCLK;
+		break;
+
+	case DMACH_XD0:
+	case DMACH_XD1:
+		dcon |= S3C2410_DCON_HANDSHAKE;
+		dcon |= S3C2410_DCON_SYNC_HCLK;
+		break;
+	}
+
 	switch (xferunit) {
 	case 1:
 		dcon |= S3C2410_DCON_BYTE;
@@ -1150,29 +1172,38 @@ EXPORT_SYMBOL(s3c2410_dma_set_buffdone_fn);
  * source:    S3C2410_DMASRC_HW: source is hardware
  *            S3C2410_DMASRC_MEM: source is memory
  *
- * hwcfg:     the value for xxxSTCn register,
- *            bit 0: 0=increment pointer, 1=leave pointer
- *            bit 1: 0=source is AHB, 1=source is APB
- *
  * devaddr:   physical address of the source
 */
 
 int s3c2410_dma_devconfig(int channel,
 			  enum s3c2410_dmasrc source,
-			  int hwcfg,
 			  unsigned long devaddr)
 {
 	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	unsigned int hwcfg;
 
 	if (chan == NULL)
 		return -EINVAL;
 
-	pr_debug("%s: source=%d, hwcfg=%08x, devaddr=%08lx\n",
-		 __func__, (int)source, hwcfg, devaddr);
+	pr_debug("%s: source=%d, devaddr=%08lx\n",
+		 __func__, (int)source, devaddr);
 
 	chan->source = source;
 	chan->dev_addr = devaddr;
-	chan->hw_cfg = hwcfg;
+
+	switch (chan->req_ch) {
+	case DMACH_XD0:
+	case DMACH_XD1:
+		hwcfg = 0; /* AHB */
+		break;
+
+	default:
+		hwcfg = S3C2410_DISRCC_APB;
+	}
+
+	/* always assume our peripheral desintation is a fixed
+	 * address in memory. */
+	 hwcfg |= S3C2410_DISRCC_INC;
 
 	switch (source) {
 	case S3C2410_DMASRC_HW:
@@ -1278,8 +1309,8 @@ static int s3c2410_dma_resume(struct sys_device *dev)
 
 	printk(KERN_INFO "dma%d: restoring configuration\n", cp->number);
 
-	s3c2410_dma_config(no, cp->xfer_unit, cp->dcon);
-	s3c2410_dma_devconfig(no, cp->source, cp->hw_cfg, cp->dev_addr);
+	s3c2410_dma_config(no, cp->xfer_unit);
+	s3c2410_dma_devconfig(no, cp->source, cp->dev_addr);
 
 	/* re-select the dma source for this channel */
 
@@ -1476,6 +1507,7 @@ static struct s3c2410_dma_chan *s3c2410_dma_map_channel(int channel)
  found:
 	dmach = &s3c2410_chans[ch];
 	dmach->map = ch_map;
+	dmach->req_ch = channel;
 	dma_chan_map[channel] = dmach;
 
 	/* select the channel */
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 2db166b..889f350 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -789,7 +789,7 @@ static void s3cmci_dma_setup(struct s3cmci_host *host,
 
 	last_source = source;
 
-	s3c2410_dma_devconfig(host->dma, source, 3,
+	s3c2410_dma_devconfig(host->dma, source,
 			      host->mem->start + host->sdidata);
 
 	if (!setup_ok) {
diff --git a/sound/soc/s3c24xx/s3c24xx-pcm.c b/sound/soc/s3c24xx/s3c24xx-pcm.c
index 169ddad..eecfa5e 100644
--- a/sound/soc/s3c24xx/s3c24xx-pcm.c
+++ b/sound/soc/s3c24xx/s3c24xx-pcm.c
@@ -218,24 +218,17 @@ static int s3c24xx_pcm_prepare(struct snd_pcm_substream *substream)
 	 * sync to pclk, half-word transfers to the IIS-FIFO. */
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		s3c2410_dma_devconfig(prtd->params->channel,
-				S3C2410_DMASRC_MEM, S3C2410_DISRCC_INC |
-				S3C2410_DISRCC_APB, prtd->params->dma_addr);
-
-		s3c2410_dma_config(prtd->params->channel,
-				prtd->params->dma_size,
-				S3C2410_DCON_SYNC_PCLK |
-				S3C2410_DCON_HANDSHAKE);
+				      S3C2410_DMASRC_MEM,
+				      prtd->params->dma_addr);
 	} else {
-		s3c2410_dma_config(prtd->params->channel,
-				prtd->params->dma_size,
-				S3C2410_DCON_HANDSHAKE |
-				S3C2410_DCON_SYNC_PCLK);
-
 		s3c2410_dma_devconfig(prtd->params->channel,
-					S3C2410_DMASRC_HW, 0x3,
-					prtd->params->dma_addr);
+				      S3C2410_DMASRC_HW,
+				      prtd->params->dma_addr);
 	}
 
+	s3c2410_dma_config(prtd->params->channel,
+			   prtd->params->dma_size);
+
 	/* flush the DMA channel */
 	s3c2410_dma_ctrl(prtd->params->channel, S3C2410_DMAOP_FLUSH);
 	prtd->dma_loaded = 0;
-- 
cgit v0.10.2


From 44dc94045f6ddbc07db3e0eb3448c2efc13ac2cf Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:35 +0000
Subject: [ARM] S3C24XX: DMA: Split hardware regs out of <mach/dma.h>

The <mach/dma.h> is for the driver API for the DMA system and should
not have anything relying on the CPU specific registers.

Remove the registers to <plat/dma-regs.h> for the code that really
needs to know about them.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 6248f41..aba159f 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -25,6 +25,7 @@
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
 #include <plat/regs-ac97.h>
+#include <plat/regs-dma.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/mach-s3c2410/include/mach/dma.h b/arch/arm/mach-s3c2410/include/mach/dma.h
index acaef67..6cefa56 100644
--- a/arch/arm/mach-s3c2410/include/mach/dma.h
+++ b/arch/arm/mach-s3c2410/include/mach/dma.h
@@ -3,7 +3,7 @@
  * Copyright (C) 2003,2004,2006 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * Samsung S3C241XX DMA support
+ * Samsung S3C24XX DMA support
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -68,7 +68,6 @@ enum s3c2410_dma_state {
 	S3C2410_DMA_PAUSED
 };
 
-
 /* enum s3c2410_dma_loadst
  *
  * This represents the state of the DMA engine, wrt to the loaded / running
@@ -311,138 +310,4 @@ extern int s3c2410_dma_getposition(unsigned int channel,
 extern int s3c2410_dma_set_opfn(unsigned int, s3c2410_dma_opfn_t rtn);
 extern int s3c2410_dma_set_buffdone_fn(unsigned int, s3c2410_dma_cbfn_t rtn);
 
-/* DMA Register definitions */
-
-#define S3C2410_DMA_DISRC       (0x00)
-#define S3C2410_DMA_DISRCC      (0x04)
-#define S3C2410_DMA_DIDST       (0x08)
-#define S3C2410_DMA_DIDSTC      (0x0C)
-#define S3C2410_DMA_DCON        (0x10)
-#define S3C2410_DMA_DSTAT       (0x14)
-#define S3C2410_DMA_DCSRC       (0x18)
-#define S3C2410_DMA_DCDST       (0x1C)
-#define S3C2410_DMA_DMASKTRIG   (0x20)
-#define S3C2412_DMA_DMAREQSEL	(0x24)
-#define S3C2443_DMA_DMAREQSEL	(0x24)
-
-#define S3C2410_DISRCC_INC	(1<<0)
-#define S3C2410_DISRCC_APB	(1<<1)
-
-#define S3C2410_DMASKTRIG_STOP   (1<<2)
-#define S3C2410_DMASKTRIG_ON     (1<<1)
-#define S3C2410_DMASKTRIG_SWTRIG (1<<0)
-
-#define S3C2410_DCON_DEMAND     (0<<31)
-#define S3C2410_DCON_HANDSHAKE  (1<<31)
-#define S3C2410_DCON_SYNC_PCLK  (0<<30)
-#define S3C2410_DCON_SYNC_HCLK  (1<<30)
-
-#define S3C2410_DCON_INTREQ     (1<<29)
-
-#define S3C2410_DCON_CH0_XDREQ0	(0<<24)
-#define S3C2410_DCON_CH0_UART0	(1<<24)
-#define S3C2410_DCON_CH0_SDI	(2<<24)
-#define S3C2410_DCON_CH0_TIMER	(3<<24)
-#define S3C2410_DCON_CH0_USBEP1	(4<<24)
-
-#define S3C2410_DCON_CH1_XDREQ1	(0<<24)
-#define S3C2410_DCON_CH1_UART1	(1<<24)
-#define S3C2410_DCON_CH1_I2SSDI	(2<<24)
-#define S3C2410_DCON_CH1_SPI	(3<<24)
-#define S3C2410_DCON_CH1_USBEP2	(4<<24)
-
-#define S3C2410_DCON_CH2_I2SSDO	(0<<24)
-#define S3C2410_DCON_CH2_I2SSDI	(1<<24)
-#define S3C2410_DCON_CH2_SDI	(2<<24)
-#define S3C2410_DCON_CH2_TIMER	(3<<24)
-#define S3C2410_DCON_CH2_USBEP3	(4<<24)
-
-#define S3C2410_DCON_CH3_UART2	(0<<24)
-#define S3C2410_DCON_CH3_SDI	(1<<24)
-#define S3C2410_DCON_CH3_SPI	(2<<24)
-#define S3C2410_DCON_CH3_TIMER	(3<<24)
-#define S3C2410_DCON_CH3_USBEP4	(4<<24)
-
-#define S3C2410_DCON_SRCSHIFT   (24)
-#define S3C2410_DCON_SRCMASK	(7<<24)
-
-#define S3C2410_DCON_BYTE       (0<<20)
-#define S3C2410_DCON_HALFWORD   (1<<20)
-#define S3C2410_DCON_WORD       (2<<20)
-
-#define S3C2410_DCON_AUTORELOAD (0<<22)
-#define S3C2410_DCON_NORELOAD   (1<<22)
-#define S3C2410_DCON_HWTRIG     (1<<23)
-
-#ifdef CONFIG_CPU_S3C2440
-#define S3C2440_DIDSTC_CHKINT	(1<<2)
-
-#define S3C2440_DCON_CH0_I2SSDO	(5<<24)
-#define S3C2440_DCON_CH0_PCMIN	(6<<24)
-
-#define S3C2440_DCON_CH1_PCMOUT	(5<<24)
-#define S3C2440_DCON_CH1_SDI	(6<<24)
-
-#define S3C2440_DCON_CH2_PCMIN	(5<<24)
-#define S3C2440_DCON_CH2_MICIN	(6<<24)
-
-#define S3C2440_DCON_CH3_MICIN	(5<<24)
-#define S3C2440_DCON_CH3_PCMOUT	(6<<24)
-#endif
-
-#ifdef CONFIG_CPU_S3C2412
-
-#define S3C2412_DMAREQSEL_SRC(x)	((x)<<1)
-
-#define S3C2412_DMAREQSEL_HW		(1)
-
-#define S3C2412_DMAREQSEL_SPI0TX	S3C2412_DMAREQSEL_SRC(0)
-#define S3C2412_DMAREQSEL_SPI0RX	S3C2412_DMAREQSEL_SRC(1)
-#define S3C2412_DMAREQSEL_SPI1TX	S3C2412_DMAREQSEL_SRC(2)
-#define S3C2412_DMAREQSEL_SPI1RX	S3C2412_DMAREQSEL_SRC(3)
-#define S3C2412_DMAREQSEL_I2STX		S3C2412_DMAREQSEL_SRC(4)
-#define S3C2412_DMAREQSEL_I2SRX		S3C2412_DMAREQSEL_SRC(5)
-#define S3C2412_DMAREQSEL_TIMER		S3C2412_DMAREQSEL_SRC(9)
-#define S3C2412_DMAREQSEL_SDI		S3C2412_DMAREQSEL_SRC(10)
-#define S3C2412_DMAREQSEL_USBEP1	S3C2412_DMAREQSEL_SRC(13)
-#define S3C2412_DMAREQSEL_USBEP2	S3C2412_DMAREQSEL_SRC(14)
-#define S3C2412_DMAREQSEL_USBEP3	S3C2412_DMAREQSEL_SRC(15)
-#define S3C2412_DMAREQSEL_USBEP4	S3C2412_DMAREQSEL_SRC(16)
-#define S3C2412_DMAREQSEL_XDREQ0	S3C2412_DMAREQSEL_SRC(17)
-#define S3C2412_DMAREQSEL_XDREQ1	S3C2412_DMAREQSEL_SRC(18)
-#define S3C2412_DMAREQSEL_UART0_0	S3C2412_DMAREQSEL_SRC(19)
-#define S3C2412_DMAREQSEL_UART0_1	S3C2412_DMAREQSEL_SRC(20)
-#define S3C2412_DMAREQSEL_UART1_0	S3C2412_DMAREQSEL_SRC(21)
-#define S3C2412_DMAREQSEL_UART1_1	S3C2412_DMAREQSEL_SRC(22)
-#define S3C2412_DMAREQSEL_UART2_0	S3C2412_DMAREQSEL_SRC(23)
-#define S3C2412_DMAREQSEL_UART2_1	S3C2412_DMAREQSEL_SRC(24)
-
-#endif
-
-#define S3C2443_DMAREQSEL_SRC(x)	((x)<<1)
-
-#define S3C2443_DMAREQSEL_HW		(1)
-
-#define S3C2443_DMAREQSEL_SPI0TX	S3C2443_DMAREQSEL_SRC(0)
-#define S3C2443_DMAREQSEL_SPI0RX	S3C2443_DMAREQSEL_SRC(1)
-#define S3C2443_DMAREQSEL_SPI1TX	S3C2443_DMAREQSEL_SRC(2)
-#define S3C2443_DMAREQSEL_SPI1RX	S3C2443_DMAREQSEL_SRC(3)
-#define S3C2443_DMAREQSEL_I2STX		S3C2443_DMAREQSEL_SRC(4)
-#define S3C2443_DMAREQSEL_I2SRX		S3C2443_DMAREQSEL_SRC(5)
-#define S3C2443_DMAREQSEL_TIMER		S3C2443_DMAREQSEL_SRC(9)
-#define S3C2443_DMAREQSEL_SDI		S3C2443_DMAREQSEL_SRC(10)
-#define S3C2443_DMAREQSEL_XDREQ0	S3C2443_DMAREQSEL_SRC(17)
-#define S3C2443_DMAREQSEL_XDREQ1	S3C2443_DMAREQSEL_SRC(18)
-#define S3C2443_DMAREQSEL_UART0_0	S3C2443_DMAREQSEL_SRC(19)
-#define S3C2443_DMAREQSEL_UART0_1	S3C2443_DMAREQSEL_SRC(20)
-#define S3C2443_DMAREQSEL_UART1_0	S3C2443_DMAREQSEL_SRC(21)
-#define S3C2443_DMAREQSEL_UART1_1	S3C2443_DMAREQSEL_SRC(22)
-#define S3C2443_DMAREQSEL_UART2_0	S3C2443_DMAREQSEL_SRC(23)
-#define S3C2443_DMAREQSEL_UART2_1	S3C2443_DMAREQSEL_SRC(24)
-#define S3C2443_DMAREQSEL_UART3_0	S3C2443_DMAREQSEL_SRC(25)
-#define S3C2443_DMAREQSEL_UART3_1	S3C2443_DMAREQSEL_SRC(26)
-#define S3C2443_DMAREQSEL_PCMOUT	S3C2443_DMAREQSEL_SRC(27)
-#define S3C2443_DMAREQSEL_PCMIN 	S3C2443_DMAREQSEL_SRC(28)
-#define S3C2443_DMAREQSEL_MICIN		S3C2443_DMAREQSEL_SRC(29)
-
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/mach-s3c2412/dma.c b/arch/arm/mach-s3c2412/dma.c
index f888d4e..f8d16fc 100644
--- a/arch/arm/mach-s3c2412/dma.c
+++ b/arch/arm/mach-s3c2412/dma.c
@@ -26,6 +26,7 @@
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
 #include <plat/regs-ac97.h>
+#include <plat/regs-dma.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/mach-s3c2440/dma.c b/arch/arm/mach-s3c2440/dma.c
index 895783f..e08e081 100644
--- a/arch/arm/mach-s3c2440/dma.c
+++ b/arch/arm/mach-s3c2440/dma.c
@@ -17,6 +17,7 @@
 #include <linux/sysdev.h>
 #include <linux/serial_core.h>
 
+#include <mach/map.h>
 #include <mach/dma.h>
 
 #include <plat/dma-plat.h>
@@ -25,6 +26,7 @@
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
 #include <plat/regs-ac97.h>
+#include <plat/regs-dma.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/mach-s3c2443/dma.c b/arch/arm/mach-s3c2443/dma.c
index 825b718..397f3b5 100644
--- a/arch/arm/mach-s3c2443/dma.c
+++ b/arch/arm/mach-s3c2443/dma.c
@@ -26,6 +26,7 @@
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
 #include <plat/regs-ac97.h>
+#include <plat/regs-dma.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index dc58a0d..7c37c9a 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -31,10 +31,10 @@
 #include <asm/irq.h>
 #include <mach/hardware.h>
 #include <mach/dma.h>
-
 #include <mach/map.h>
 
 #include <plat/dma-plat.h>
+#include <plat/regs-dma.h>
 
 /* io map for dma */
 static void __iomem *dma_base;
diff --git a/arch/arm/plat-s3c24xx/include/plat/regs-dma.h b/arch/arm/plat-s3c24xx/include/plat/regs-dma.h
new file mode 100644
index 0000000..d38e7b7
--- /dev/null
+++ b/arch/arm/plat-s3c24xx/include/plat/regs-dma.h
@@ -0,0 +1,145 @@
+/* arch/arm/mach-s3c2410/include/mach/dma.h
+ *
+ * Copyright (C) 2003,2004,2006 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * Samsung S3C24XX DMA support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/* DMA Register definitions */
+
+#define S3C2410_DMA_DISRC       (0x00)
+#define S3C2410_DMA_DISRCC      (0x04)
+#define S3C2410_DMA_DIDST       (0x08)
+#define S3C2410_DMA_DIDSTC      (0x0C)
+#define S3C2410_DMA_DCON        (0x10)
+#define S3C2410_DMA_DSTAT       (0x14)
+#define S3C2410_DMA_DCSRC       (0x18)
+#define S3C2410_DMA_DCDST       (0x1C)
+#define S3C2410_DMA_DMASKTRIG   (0x20)
+#define S3C2412_DMA_DMAREQSEL	(0x24)
+#define S3C2443_DMA_DMAREQSEL	(0x24)
+
+#define S3C2410_DISRCC_INC	(1<<0)
+#define S3C2410_DISRCC_APB	(1<<1)
+
+#define S3C2410_DMASKTRIG_STOP   (1<<2)
+#define S3C2410_DMASKTRIG_ON     (1<<1)
+#define S3C2410_DMASKTRIG_SWTRIG (1<<0)
+
+#define S3C2410_DCON_DEMAND     (0<<31)
+#define S3C2410_DCON_HANDSHAKE  (1<<31)
+#define S3C2410_DCON_SYNC_PCLK  (0<<30)
+#define S3C2410_DCON_SYNC_HCLK  (1<<30)
+
+#define S3C2410_DCON_INTREQ     (1<<29)
+
+#define S3C2410_DCON_CH0_XDREQ0	(0<<24)
+#define S3C2410_DCON_CH0_UART0	(1<<24)
+#define S3C2410_DCON_CH0_SDI	(2<<24)
+#define S3C2410_DCON_CH0_TIMER	(3<<24)
+#define S3C2410_DCON_CH0_USBEP1	(4<<24)
+
+#define S3C2410_DCON_CH1_XDREQ1	(0<<24)
+#define S3C2410_DCON_CH1_UART1	(1<<24)
+#define S3C2410_DCON_CH1_I2SSDI	(2<<24)
+#define S3C2410_DCON_CH1_SPI	(3<<24)
+#define S3C2410_DCON_CH1_USBEP2	(4<<24)
+
+#define S3C2410_DCON_CH2_I2SSDO	(0<<24)
+#define S3C2410_DCON_CH2_I2SSDI	(1<<24)
+#define S3C2410_DCON_CH2_SDI	(2<<24)
+#define S3C2410_DCON_CH2_TIMER	(3<<24)
+#define S3C2410_DCON_CH2_USBEP3	(4<<24)
+
+#define S3C2410_DCON_CH3_UART2	(0<<24)
+#define S3C2410_DCON_CH3_SDI	(1<<24)
+#define S3C2410_DCON_CH3_SPI	(2<<24)
+#define S3C2410_DCON_CH3_TIMER	(3<<24)
+#define S3C2410_DCON_CH3_USBEP4	(4<<24)
+
+#define S3C2410_DCON_SRCSHIFT   (24)
+#define S3C2410_DCON_SRCMASK	(7<<24)
+
+#define S3C2410_DCON_BYTE       (0<<20)
+#define S3C2410_DCON_HALFWORD   (1<<20)
+#define S3C2410_DCON_WORD       (2<<20)
+
+#define S3C2410_DCON_AUTORELOAD (0<<22)
+#define S3C2410_DCON_NORELOAD   (1<<22)
+#define S3C2410_DCON_HWTRIG     (1<<23)
+
+#ifdef CONFIG_CPU_S3C2440
+#define S3C2440_DIDSTC_CHKINT	(1<<2)
+
+#define S3C2440_DCON_CH0_I2SSDO	(5<<24)
+#define S3C2440_DCON_CH0_PCMIN	(6<<24)
+
+#define S3C2440_DCON_CH1_PCMOUT	(5<<24)
+#define S3C2440_DCON_CH1_SDI	(6<<24)
+
+#define S3C2440_DCON_CH2_PCMIN	(5<<24)
+#define S3C2440_DCON_CH2_MICIN	(6<<24)
+
+#define S3C2440_DCON_CH3_MICIN	(5<<24)
+#define S3C2440_DCON_CH3_PCMOUT	(6<<24)
+#endif
+
+#ifdef CONFIG_CPU_S3C2412
+
+#define S3C2412_DMAREQSEL_SRC(x)	((x)<<1)
+
+#define S3C2412_DMAREQSEL_HW		(1)
+
+#define S3C2412_DMAREQSEL_SPI0TX	S3C2412_DMAREQSEL_SRC(0)
+#define S3C2412_DMAREQSEL_SPI0RX	S3C2412_DMAREQSEL_SRC(1)
+#define S3C2412_DMAREQSEL_SPI1TX	S3C2412_DMAREQSEL_SRC(2)
+#define S3C2412_DMAREQSEL_SPI1RX	S3C2412_DMAREQSEL_SRC(3)
+#define S3C2412_DMAREQSEL_I2STX		S3C2412_DMAREQSEL_SRC(4)
+#define S3C2412_DMAREQSEL_I2SRX		S3C2412_DMAREQSEL_SRC(5)
+#define S3C2412_DMAREQSEL_TIMER		S3C2412_DMAREQSEL_SRC(9)
+#define S3C2412_DMAREQSEL_SDI		S3C2412_DMAREQSEL_SRC(10)
+#define S3C2412_DMAREQSEL_USBEP1	S3C2412_DMAREQSEL_SRC(13)
+#define S3C2412_DMAREQSEL_USBEP2	S3C2412_DMAREQSEL_SRC(14)
+#define S3C2412_DMAREQSEL_USBEP3	S3C2412_DMAREQSEL_SRC(15)
+#define S3C2412_DMAREQSEL_USBEP4	S3C2412_DMAREQSEL_SRC(16)
+#define S3C2412_DMAREQSEL_XDREQ0	S3C2412_DMAREQSEL_SRC(17)
+#define S3C2412_DMAREQSEL_XDREQ1	S3C2412_DMAREQSEL_SRC(18)
+#define S3C2412_DMAREQSEL_UART0_0	S3C2412_DMAREQSEL_SRC(19)
+#define S3C2412_DMAREQSEL_UART0_1	S3C2412_DMAREQSEL_SRC(20)
+#define S3C2412_DMAREQSEL_UART1_0	S3C2412_DMAREQSEL_SRC(21)
+#define S3C2412_DMAREQSEL_UART1_1	S3C2412_DMAREQSEL_SRC(22)
+#define S3C2412_DMAREQSEL_UART2_0	S3C2412_DMAREQSEL_SRC(23)
+#define S3C2412_DMAREQSEL_UART2_1	S3C2412_DMAREQSEL_SRC(24)
+
+#endif
+
+#define S3C2443_DMAREQSEL_SRC(x)	((x)<<1)
+
+#define S3C2443_DMAREQSEL_HW		(1)
+
+#define S3C2443_DMAREQSEL_SPI0TX	S3C2443_DMAREQSEL_SRC(0)
+#define S3C2443_DMAREQSEL_SPI0RX	S3C2443_DMAREQSEL_SRC(1)
+#define S3C2443_DMAREQSEL_SPI1TX	S3C2443_DMAREQSEL_SRC(2)
+#define S3C2443_DMAREQSEL_SPI1RX	S3C2443_DMAREQSEL_SRC(3)
+#define S3C2443_DMAREQSEL_I2STX		S3C2443_DMAREQSEL_SRC(4)
+#define S3C2443_DMAREQSEL_I2SRX		S3C2443_DMAREQSEL_SRC(5)
+#define S3C2443_DMAREQSEL_TIMER		S3C2443_DMAREQSEL_SRC(9)
+#define S3C2443_DMAREQSEL_SDI		S3C2443_DMAREQSEL_SRC(10)
+#define S3C2443_DMAREQSEL_XDREQ0	S3C2443_DMAREQSEL_SRC(17)
+#define S3C2443_DMAREQSEL_XDREQ1	S3C2443_DMAREQSEL_SRC(18)
+#define S3C2443_DMAREQSEL_UART0_0	S3C2443_DMAREQSEL_SRC(19)
+#define S3C2443_DMAREQSEL_UART0_1	S3C2443_DMAREQSEL_SRC(20)
+#define S3C2443_DMAREQSEL_UART1_0	S3C2443_DMAREQSEL_SRC(21)
+#define S3C2443_DMAREQSEL_UART1_1	S3C2443_DMAREQSEL_SRC(22)
+#define S3C2443_DMAREQSEL_UART2_0	S3C2443_DMAREQSEL_SRC(23)
+#define S3C2443_DMAREQSEL_UART2_1	S3C2443_DMAREQSEL_SRC(24)
+#define S3C2443_DMAREQSEL_UART3_0	S3C2443_DMAREQSEL_SRC(25)
+#define S3C2443_DMAREQSEL_UART3_1	S3C2443_DMAREQSEL_SRC(26)
+#define S3C2443_DMAREQSEL_PCMOUT	S3C2443_DMAREQSEL_SRC(27)
+#define S3C2443_DMAREQSEL_PCMIN 	S3C2443_DMAREQSEL_SRC(28)
+#define S3C2443_DMAREQSEL_MICIN		S3C2443_DMAREQSEL_SRC(29)
-- 
cgit v0.10.2


From 023b40cd1018915beb5a519b55ea174683215f16 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:36 +0000
Subject: [ARM] S3C24XX: Fix indentation in <plat/dma-regs.h>

The <plat/dma-regs.h> pre-date the invention of the TAB character,
so fix the indentation of the register defines.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c24xx/include/plat/regs-dma.h b/arch/arm/plat-s3c24xx/include/plat/regs-dma.h
index d38e7b7..3bc0a21 100644
--- a/arch/arm/plat-s3c24xx/include/plat/regs-dma.h
+++ b/arch/arm/plat-s3c24xx/include/plat/regs-dma.h
@@ -12,81 +12,81 @@
 
 /* DMA Register definitions */
 
-#define S3C2410_DMA_DISRC       (0x00)
-#define S3C2410_DMA_DISRCC      (0x04)
-#define S3C2410_DMA_DIDST       (0x08)
-#define S3C2410_DMA_DIDSTC      (0x0C)
-#define S3C2410_DMA_DCON        (0x10)
-#define S3C2410_DMA_DSTAT       (0x14)
-#define S3C2410_DMA_DCSRC       (0x18)
-#define S3C2410_DMA_DCDST       (0x1C)
-#define S3C2410_DMA_DMASKTRIG   (0x20)
-#define S3C2412_DMA_DMAREQSEL	(0x24)
-#define S3C2443_DMA_DMAREQSEL	(0x24)
-
-#define S3C2410_DISRCC_INC	(1<<0)
-#define S3C2410_DISRCC_APB	(1<<1)
-
-#define S3C2410_DMASKTRIG_STOP   (1<<2)
-#define S3C2410_DMASKTRIG_ON     (1<<1)
-#define S3C2410_DMASKTRIG_SWTRIG (1<<0)
-
-#define S3C2410_DCON_DEMAND     (0<<31)
-#define S3C2410_DCON_HANDSHAKE  (1<<31)
-#define S3C2410_DCON_SYNC_PCLK  (0<<30)
-#define S3C2410_DCON_SYNC_HCLK  (1<<30)
-
-#define S3C2410_DCON_INTREQ     (1<<29)
-
-#define S3C2410_DCON_CH0_XDREQ0	(0<<24)
-#define S3C2410_DCON_CH0_UART0	(1<<24)
-#define S3C2410_DCON_CH0_SDI	(2<<24)
-#define S3C2410_DCON_CH0_TIMER	(3<<24)
-#define S3C2410_DCON_CH0_USBEP1	(4<<24)
-
-#define S3C2410_DCON_CH1_XDREQ1	(0<<24)
-#define S3C2410_DCON_CH1_UART1	(1<<24)
-#define S3C2410_DCON_CH1_I2SSDI	(2<<24)
-#define S3C2410_DCON_CH1_SPI	(3<<24)
-#define S3C2410_DCON_CH1_USBEP2	(4<<24)
-
-#define S3C2410_DCON_CH2_I2SSDO	(0<<24)
-#define S3C2410_DCON_CH2_I2SSDI	(1<<24)
-#define S3C2410_DCON_CH2_SDI	(2<<24)
-#define S3C2410_DCON_CH2_TIMER	(3<<24)
-#define S3C2410_DCON_CH2_USBEP3	(4<<24)
-
-#define S3C2410_DCON_CH3_UART2	(0<<24)
-#define S3C2410_DCON_CH3_SDI	(1<<24)
-#define S3C2410_DCON_CH3_SPI	(2<<24)
-#define S3C2410_DCON_CH3_TIMER	(3<<24)
-#define S3C2410_DCON_CH3_USBEP4	(4<<24)
-
-#define S3C2410_DCON_SRCSHIFT   (24)
-#define S3C2410_DCON_SRCMASK	(7<<24)
-
-#define S3C2410_DCON_BYTE       (0<<20)
-#define S3C2410_DCON_HALFWORD   (1<<20)
-#define S3C2410_DCON_WORD       (2<<20)
-
-#define S3C2410_DCON_AUTORELOAD (0<<22)
-#define S3C2410_DCON_NORELOAD   (1<<22)
-#define S3C2410_DCON_HWTRIG     (1<<23)
+#define S3C2410_DMA_DISRC		(0x00)
+#define S3C2410_DMA_DISRCC		(0x04)
+#define S3C2410_DMA_DIDST		(0x08)
+#define S3C2410_DMA_DIDSTC		(0x0C)
+#define S3C2410_DMA_DCON		(0x10)
+#define S3C2410_DMA_DSTAT		(0x14)
+#define S3C2410_DMA_DCSRC		(0x18)
+#define S3C2410_DMA_DCDST		(0x1C)
+#define S3C2410_DMA_DMASKTRIG		(0x20)
+#define S3C2412_DMA_DMAREQSEL		(0x24)
+#define S3C2443_DMA_DMAREQSEL		(0x24)
+
+#define S3C2410_DISRCC_INC		(1<<0)
+#define S3C2410_DISRCC_APB		(1<<1)
+
+#define S3C2410_DMASKTRIG_STOP		(1<<2)
+#define S3C2410_DMASKTRIG_ON		(1<<1)
+#define S3C2410_DMASKTRIG_SWTRIG	(1<<0)
+
+#define S3C2410_DCON_DEMAND		(0<<31)
+#define S3C2410_DCON_HANDSHAKE		(1<<31)
+#define S3C2410_DCON_SYNC_PCLK		(0<<30)
+#define S3C2410_DCON_SYNC_HCLK		(1<<30)
+
+#define S3C2410_DCON_INTREQ		(1<<29)
+
+#define S3C2410_DCON_CH0_XDREQ0		(0<<24)
+#define S3C2410_DCON_CH0_UART0		(1<<24)
+#define S3C2410_DCON_CH0_SDI		(2<<24)
+#define S3C2410_DCON_CH0_TIMER		(3<<24)
+#define S3C2410_DCON_CH0_USBEP1		(4<<24)
+
+#define S3C2410_DCON_CH1_XDREQ1		(0<<24)
+#define S3C2410_DCON_CH1_UART1		(1<<24)
+#define S3C2410_DCON_CH1_I2SSDI		(2<<24)
+#define S3C2410_DCON_CH1_SPI		(3<<24)
+#define S3C2410_DCON_CH1_USBEP2		(4<<24)
+
+#define S3C2410_DCON_CH2_I2SSDO		(0<<24)
+#define S3C2410_DCON_CH2_I2SSDI		(1<<24)
+#define S3C2410_DCON_CH2_SDI		(2<<24)
+#define S3C2410_DCON_CH2_TIMER		(3<<24)
+#define S3C2410_DCON_CH2_USBEP3		(4<<24)
+
+#define S3C2410_DCON_CH3_UART2		(0<<24)
+#define S3C2410_DCON_CH3_SDI		(1<<24)
+#define S3C2410_DCON_CH3_SPI		(2<<24)
+#define S3C2410_DCON_CH3_TIMER		(3<<24)
+#define S3C2410_DCON_CH3_USBEP4		(4<<24)
+
+#define S3C2410_DCON_SRCSHIFT		(24)
+#define S3C2410_DCON_SRCMASK		(7<<24)
+
+#define S3C2410_DCON_BYTE		(0<<20)
+#define S3C2410_DCON_HALFWORD		(1<<20)
+#define S3C2410_DCON_WORD		(2<<20)
+
+#define S3C2410_DCON_AUTORELOAD		(0<<22)
+#define S3C2410_DCON_NORELOAD		(1<<22)
+#define S3C2410_DCON_HWTRIG		(1<<23)
 
 #ifdef CONFIG_CPU_S3C2440
-#define S3C2440_DIDSTC_CHKINT	(1<<2)
+#define S3C2440_DIDSTC_CHKINT		(1<<2)
 
-#define S3C2440_DCON_CH0_I2SSDO	(5<<24)
-#define S3C2440_DCON_CH0_PCMIN	(6<<24)
+#define S3C2440_DCON_CH0_I2SSDO		(5<<24)
+#define S3C2440_DCON_CH0_PCMIN		(6<<24)
 
-#define S3C2440_DCON_CH1_PCMOUT	(5<<24)
-#define S3C2440_DCON_CH1_SDI	(6<<24)
+#define S3C2440_DCON_CH1_PCMOUT		(5<<24)
+#define S3C2440_DCON_CH1_SDI		(6<<24)
 
-#define S3C2440_DCON_CH2_PCMIN	(5<<24)
-#define S3C2440_DCON_CH2_MICIN	(6<<24)
+#define S3C2440_DCON_CH2_PCMIN		(5<<24)
+#define S3C2440_DCON_CH2_MICIN		(6<<24)
 
-#define S3C2440_DCON_CH3_MICIN	(5<<24)
-#define S3C2440_DCON_CH3_PCMOUT	(6<<24)
+#define S3C2440_DCON_CH3_MICIN		(5<<24)
+#define S3C2440_DCON_CH3_PCMOUT		(6<<24)
 #endif
 
 #ifdef CONFIG_CPU_S3C2412
-- 
cgit v0.10.2


From c133c290118bfc99cd7a23b5a00b5252d2be90c8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:37 +0000
Subject: [ARM] S3C: Make common <plat/dma.h> header

Move the common information from the <mach/dma.h> header into a new
shareable <plat/dma.h> header for re-use on the s3c64xx.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/dma.h b/arch/arm/mach-s3c2410/include/mach/dma.h
index 6cefa56..2fbabec 100644
--- a/arch/arm/mach-s3c2410/include/mach/dma.h
+++ b/arch/arm/mach-s3c2410/include/mach/dma.h
@@ -13,6 +13,7 @@
 #ifndef __ASM_ARCH_DMA_H
 #define __ASM_ARCH_DMA_H __FILE__
 
+#include <plat/dma.h>
 #include <linux/sysdev.h>
 #include <mach/hardware.h>
 
@@ -103,32 +104,6 @@ enum s3c2410_dma_loadst {
 	S3C2410_DMALOAD_1LOADED_1RUNNING,
 };
 
-enum s3c2410_dma_buffresult {
-	S3C2410_RES_OK,
-	S3C2410_RES_ERR,
-	S3C2410_RES_ABORT
-};
-
-enum s3c2410_dmasrc {
-	S3C2410_DMASRC_HW,		/* source is memory */
-	S3C2410_DMASRC_MEM		/* source is hardware */
-};
-
-/* enum s3c2410_chan_op
- *
- * operation codes passed to the DMA code by the user, and also used
- * to inform the current channel owner of any changes to the system state
-*/
-
-enum s3c2410_chan_op {
-	S3C2410_DMAOP_START,
-	S3C2410_DMAOP_STOP,
-	S3C2410_DMAOP_PAUSE,
-	S3C2410_DMAOP_RESUME,
-	S3C2410_DMAOP_FLUSH,
-	S3C2410_DMAOP_TIMEOUT,		/* internal signal to handler */
-	S3C2410_DMAOP_STARTED,		/* indicate channel started */
-};
 
 /* flags */
 
@@ -138,17 +113,14 @@ enum s3c2410_chan_op {
 
 /* dma buffer */
 
-struct s3c2410_dma_client {
-	char                *name;
-};
+struct s3c2410_dma_buf;
 
-/* s3c2410_dma_buf_s
+/* s3c2410_dma_buf
  *
  * internally used buffer structure to describe a queued or running
  * buffer.
 */
 
-struct s3c2410_dma_buf;
 struct s3c2410_dma_buf {
 	struct s3c2410_dma_buf	*next;
 	int			 magic;		/* magic */
@@ -160,20 +132,6 @@ struct s3c2410_dma_buf {
 
 /* [1] is this updated for both recv/send modes? */
 
-struct s3c2410_dma_chan;
-
-/* s3c2410_dma_cbfn_t
- *
- * buffer callback routine type
-*/
-
-typedef void (*s3c2410_dma_cbfn_t)(struct s3c2410_dma_chan *,
-				   void *buf, int size,
-				   enum s3c2410_dma_buffresult result);
-
-typedef int  (*s3c2410_dma_opfn_t)(struct s3c2410_dma_chan *,
-				   enum s3c2410_chan_op );
-
 struct s3c2410_dma_stats {
 	unsigned long		loads;
 	unsigned long		timeout_longest;
@@ -241,73 +199,4 @@ extern struct s3c2410_dma_chan s3c2410_chans[];
 /* note, we don't really use dma_device_t at the moment */
 typedef unsigned long dma_device_t;
 
-/* functions --------------------------------------------------------------- */
-
-/* s3c2410_dma_request
- *
- * request a dma channel exclusivley
-*/
-
-extern int s3c2410_dma_request(unsigned int channel,
-			       struct s3c2410_dma_client *, void *dev);
-
-
-/* s3c2410_dma_ctrl
- *
- * change the state of the dma channel
-*/
-
-extern int s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op);
-
-/* s3c2410_dma_setflags
- *
- * set the channel's flags to a given state
-*/
-
-extern int s3c2410_dma_setflags(unsigned int channel,
-				unsigned int flags);
-
-/* s3c2410_dma_free
- *
- * free the dma channel (will also abort any outstanding operations)
-*/
-
-extern int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *);
-
-/* s3c2410_dma_enqueue
- *
- * place the given buffer onto the queue of operations for the channel.
- * The buffer must be allocated from dma coherent memory, or the Dcache/WB
- * drained before the buffer is given to the DMA system.
-*/
-
-extern int s3c2410_dma_enqueue(unsigned int channel, void *id,
-			       dma_addr_t data, int size);
-
-/* s3c2410_dma_config
- *
- * configure the dma channel
-*/
-
-extern int s3c2410_dma_config(unsigned int channel, int xferunit);
-
-/* s3c2410_dma_devconfig
- *
- * configure the device we're talking to
-*/
-
-extern int s3c2410_dma_devconfig(int channel, enum s3c2410_dmasrc source,
-				 unsigned long devaddr);
-
-/* s3c2410_dma_getposition
- *
- * get the position that the dma transfer is currently at
-*/
-
-extern int s3c2410_dma_getposition(unsigned int channel,
-				   dma_addr_t *src, dma_addr_t *dest);
-
-extern int s3c2410_dma_set_opfn(unsigned int, s3c2410_dma_opfn_t rtn);
-extern int s3c2410_dma_set_buffdone_fn(unsigned int, s3c2410_dma_cbfn_t rtn);
-
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/plat-s3c/include/plat/dma.h b/arch/arm/plat-s3c/include/plat/dma.h
new file mode 100644
index 0000000..34dba98
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/dma.h
@@ -0,0 +1,127 @@
+/* arch/arm/plat-s3c/include/plat/dma.h
+ *
+ * Copyright (C) 2003,2004,2006 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * Samsung S3C DMA support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+enum s3c2410_dma_buffresult {
+	S3C2410_RES_OK,
+	S3C2410_RES_ERR,
+	S3C2410_RES_ABORT
+};
+
+enum s3c2410_dmasrc {
+	S3C2410_DMASRC_HW,		/* source is memory */
+	S3C2410_DMASRC_MEM		/* source is hardware */
+};
+
+/* enum s3c2410_chan_op
+ *
+ * operation codes passed to the DMA code by the user, and also used
+ * to inform the current channel owner of any changes to the system state
+*/
+
+enum s3c2410_chan_op {
+	S3C2410_DMAOP_START,
+	S3C2410_DMAOP_STOP,
+	S3C2410_DMAOP_PAUSE,
+	S3C2410_DMAOP_RESUME,
+	S3C2410_DMAOP_FLUSH,
+	S3C2410_DMAOP_TIMEOUT,		/* internal signal to handler */
+	S3C2410_DMAOP_STARTED,		/* indicate channel started */
+};
+
+struct s3c2410_dma_client {
+	char                *name;
+};
+
+struct s3c2410_dma_chan;
+
+/* s3c2410_dma_cbfn_t
+ *
+ * buffer callback routine type
+*/
+
+typedef void (*s3c2410_dma_cbfn_t)(struct s3c2410_dma_chan *,
+				   void *buf, int size,
+				   enum s3c2410_dma_buffresult result);
+
+typedef int  (*s3c2410_dma_opfn_t)(struct s3c2410_dma_chan *,
+				   enum s3c2410_chan_op );
+
+
+
+/* s3c2410_dma_request
+ *
+ * request a dma channel exclusivley
+*/
+
+extern int s3c2410_dma_request(unsigned int channel,
+			       struct s3c2410_dma_client *, void *dev);
+
+
+/* s3c2410_dma_ctrl
+ *
+ * change the state of the dma channel
+*/
+
+extern int s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op);
+
+/* s3c2410_dma_setflags
+ *
+ * set the channel's flags to a given state
+*/
+
+extern int s3c2410_dma_setflags(unsigned int channel,
+				unsigned int flags);
+
+/* s3c2410_dma_free
+ *
+ * free the dma channel (will also abort any outstanding operations)
+*/
+
+extern int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *);
+
+/* s3c2410_dma_enqueue
+ *
+ * place the given buffer onto the queue of operations for the channel.
+ * The buffer must be allocated from dma coherent memory, or the Dcache/WB
+ * drained before the buffer is given to the DMA system.
+*/
+
+extern int s3c2410_dma_enqueue(unsigned int channel, void *id,
+			       dma_addr_t data, int size);
+
+/* s3c2410_dma_config
+ *
+ * configure the dma channel
+*/
+
+extern int s3c2410_dma_config(unsigned int channel, int xferunit);
+
+/* s3c2410_dma_devconfig
+ *
+ * configure the device we're talking to
+*/
+
+extern int s3c2410_dma_devconfig(int channel, enum s3c2410_dmasrc source,
+				 unsigned long devaddr);
+
+/* s3c2410_dma_getposition
+ *
+ * get the position that the dma transfer is currently at
+*/
+
+extern int s3c2410_dma_getposition(unsigned int channel,
+				   dma_addr_t *src, dma_addr_t *dest);
+
+extern int s3c2410_dma_set_opfn(unsigned int, s3c2410_dma_opfn_t rtn);
+extern int s3c2410_dma_set_buffdone_fn(unsigned int, s3c2410_dma_cbfn_t rtn);
+
+
-- 
cgit v0.10.2


From 20934cdbaae9c26a31d7f593c6a747c687ae79a1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:38 +0000
Subject: [ARM] S3C24XX: dma.h should not include <mach/hardware.h>

The dma.h header does not need to include <mach/hardware.h> as there
is nothing it depends on. Remove the include and fixup the couple of
places where <mach/map.h> was not explicitly included (dragged in
via <mach/hardware.h>)

Note, we have to fix arch/arm/plat-s3c24xx/pm.c which seems to rely
on having <mach/hardware.h> included by this method.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index aba159f..dbf96e6 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -17,6 +17,7 @@
 #include <linux/sysdev.h>
 #include <linux/serial_core.h>
 
+#include <mach/map.h>
 #include <mach/dma.h>
 
 #include <plat/cpu.h>
diff --git a/arch/arm/mach-s3c2410/include/mach/dma.h b/arch/arm/mach-s3c2410/include/mach/dma.h
index 2fbabec..1648311 100644
--- a/arch/arm/mach-s3c2410/include/mach/dma.h
+++ b/arch/arm/mach-s3c2410/include/mach/dma.h
@@ -15,7 +15,6 @@
 
 #include <plat/dma.h>
 #include <linux/sysdev.h>
-#include <mach/hardware.h>
 
 #define MAX_DMA_TRANSFER_SIZE   0x100000 /* Data Unit is half word  */
 
-- 
cgit v0.10.2


From 97c1b145231730e62dd71921ec653315a1da3aad Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 19 Mar 2009 15:02:39 +0000
Subject: [ARM] S3C: Move DMA channel management code to plat-s3c

Change the name of S3C2410_DMA_CHANNELS to S3C_DMA_CHANNELS in the process.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/dma.h b/arch/arm/mach-s3c2410/include/mach/dma.h
index 1648311..c3a2629 100644
--- a/arch/arm/mach-s3c2410/include/mach/dma.h
+++ b/arch/arm/mach-s3c2410/include/mach/dma.h
@@ -55,9 +55,9 @@ enum dma_ch {
 
 /* we have 4 dma channels */
 #ifndef CONFIG_CPU_S3C2443
-#define S3C2410_DMA_CHANNELS		(4)
+#define S3C_DMA_CHANNELS		(4)
 #else
-#define S3C2410_DMA_CHANNELS		(6)
+#define S3C_DMA_CHANNELS		(6)
 #endif
 
 /* types */
@@ -192,10 +192,6 @@ struct s3c2410_dma_chan {
 	struct sys_device	dev;
 };
 
-/* the currently allocated channel information */
-extern struct s3c2410_dma_chan s3c2410_chans[];
-
-/* note, we don't really use dma_device_t at the moment */
 typedef unsigned long dma_device_t;
 
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/plat-s3c/Kconfig b/arch/arm/plat-s3c/Kconfig
index de93838..d4fc89f 100644
--- a/arch/arm/plat-s3c/Kconfig
+++ b/arch/arm/plat-s3c/Kconfig
@@ -150,6 +150,13 @@ config S3C_GPIO_CFG_S3C64XX
 	  Internal configuration to enable S3C64XX style GPIO configuration
 	  functions.
 
+# DMA
+
+config S3C_DMA
+	bool
+	help
+	  Internal configuration for S3C DMA core
+
 # device definitions to compile in
 
 config S3C_DEV_HSMMC
diff --git a/arch/arm/plat-s3c/Makefile b/arch/arm/plat-s3c/Makefile
index 8d7815d..e8b23fc 100644
--- a/arch/arm/plat-s3c/Makefile
+++ b/arch/arm/plat-s3c/Makefile
@@ -18,6 +18,10 @@ obj-y				+= pwm-clock.o
 obj-y				+= gpio.o
 obj-y				+= gpio-config.o
 
+# DMA support
+
+obj-$(CONFIG_S3C_DMA)		+= dma.o
+
 # PM support
 
 obj-$(CONFIG_PM)		+= pm.o
diff --git a/arch/arm/plat-s3c/dma.c b/arch/arm/plat-s3c/dma.c
new file mode 100644
index 0000000..c9db75c
--- /dev/null
+++ b/arch/arm/plat-s3c/dma.c
@@ -0,0 +1,86 @@
+/* linux/arch/arm/plat-s3c/dma.c
+ *
+ * Copyright (c) 2003-2005,2006,2009 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C DMA core
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+struct s3c2410_dma_buf;
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+
+#include <mach/dma.h>
+#include <mach/irqs.h>
+
+#include <plat/dma-plat.h>
+
+/* dma channel state information */
+struct s3c2410_dma_chan s3c2410_chans[S3C_DMA_CHANNELS];
+struct s3c2410_dma_chan *s3c_dma_chan_map[DMACH_MAX];
+
+/* s3c_dma_lookup_channel
+ *
+ * change the dma channel number given into a real dma channel id
+*/
+
+struct s3c2410_dma_chan *s3c_dma_lookup_channel(unsigned int channel)
+{
+	if (channel & DMACH_LOW_LEVEL)
+		return &s3c2410_chans[channel & ~DMACH_LOW_LEVEL];
+	else
+		return s3c_dma_chan_map[channel];
+}
+
+/* do we need to protect the settings of the fields from
+ * irq?
+*/
+
+int s3c2410_dma_set_opfn(unsigned int channel, s3c2410_dma_opfn_t rtn)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+
+	if (chan == NULL)
+		return -EINVAL;
+
+	pr_debug("%s: chan=%p, op rtn=%p\n", __func__, chan, rtn);
+
+	chan->op_fn = rtn;
+
+	return 0;
+}
+EXPORT_SYMBOL(s3c2410_dma_set_opfn);
+
+int s3c2410_dma_set_buffdone_fn(unsigned int channel, s3c2410_dma_cbfn_t rtn)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+
+	if (chan == NULL)
+		return -EINVAL;
+
+	pr_debug("%s: chan=%p, callback rtn=%p\n", __func__, chan, rtn);
+
+	chan->callback_fn = rtn;
+
+	return 0;
+}
+EXPORT_SYMBOL(s3c2410_dma_set_buffdone_fn);
+
+int s3c2410_dma_setflags(unsigned int channel, unsigned int flags)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+
+	if (chan == NULL)
+		return -EINVAL;
+
+	chan->flags = flags;
+	return 0;
+}
+EXPORT_SYMBOL(s3c2410_dma_setflags);
diff --git a/arch/arm/plat-s3c/include/plat/dma-core.h b/arch/arm/plat-s3c/include/plat/dma-core.h
new file mode 100644
index 0000000..32ff2a9
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/dma-core.h
@@ -0,0 +1,22 @@
+/* arch/arm/plat-s3c/include/plat/dma.h
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * Samsung S3C DMA core support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+extern struct s3c2410_dma_chan *s3c_dma_lookup_channel(unsigned int channel);
+
+extern struct s3c2410_dma_chan *s3c_dma_chan_map[];
+
+/* the currently allocated channel information */
+extern struct s3c2410_dma_chan s3c2410_chans[];
+
+
diff --git a/arch/arm/plat-s3c24xx/Kconfig b/arch/arm/plat-s3c24xx/Kconfig
index 2c8a2f5..5b0bc91 100644
--- a/arch/arm/plat-s3c24xx/Kconfig
+++ b/arch/arm/plat-s3c24xx/Kconfig
@@ -71,6 +71,7 @@ config PM_SIMTEC
 config S3C2410_DMA
 	bool "S3C2410 DMA support"
 	depends on ARCH_S3C2410
+	select S3C_DMA
 	help
 	  S3C2410 DMA support. This is needed for drivers like sound which
 	  use the S3C2410's DMA system to move data to and from the
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 7c37c9a..b07fec0 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -44,8 +44,6 @@ static int dma_channels;
 
 static struct s3c24xx_dma_selection dma_sel;
 
-/* dma channel state information */
-struct s3c2410_dma_chan s3c2410_chans[S3C2410_DMA_CHANNELS];
 
 /* debugging functions */
 
@@ -135,21 +133,6 @@ dmadbg_showregs(const char *fname, int line, struct s3c2410_dma_chan *chan)
 #define dbg_showchan(chan) do { } while(0)
 #endif /* CONFIG_S3C2410_DMA_DEBUG */
 
-static struct s3c2410_dma_chan *dma_chan_map[DMACH_MAX];
-
-/* lookup_dma_channel
- *
- * change the dma channel number given into a real dma channel id
-*/
-
-static struct s3c2410_dma_chan *lookup_dma_channel(unsigned int channel)
-{
-	if (channel & DMACH_LOW_LEVEL)
-		return &s3c2410_chans[channel & ~DMACH_LOW_LEVEL];
-	else
-		return dma_chan_map[channel];
-}
-
 /* s3c2410_dma_stats_timeout
  *
  * Update DMA stats from timeout info
@@ -214,8 +197,6 @@ s3c2410_dma_waitforload(struct s3c2410_dma_chan *chan, int line)
 	return 0;
 }
 
-
-
 /* s3c2410_dma_loadbuffer
  *
  * load a buffer, and update the channel state
@@ -453,7 +434,7 @@ s3c2410_dma_canload(struct s3c2410_dma_chan *chan)
 int s3c2410_dma_enqueue(unsigned int channel, void *id,
 			dma_addr_t data, int size)
 {
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 	struct s3c2410_dma_buf *buf;
 	unsigned long flags;
 
@@ -804,7 +785,7 @@ EXPORT_SYMBOL(s3c2410_dma_request);
 
 int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *client)
 {
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 	unsigned long flags;
 
 	if (chan == NULL)
@@ -836,7 +817,7 @@ int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *client)
 	chan->irq_claimed = 0;
 
 	if (!(channel & DMACH_LOW_LEVEL))
-		dma_chan_map[channel] = NULL;
+		s3c_dma_chan_map[channel] = NULL;
 
 	local_irq_restore(flags);
 
@@ -995,7 +976,7 @@ static int s3c2410_dma_started(struct s3c2410_dma_chan *chan)
 int
 s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op)
 {
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
 	if (chan == NULL)
 		return -EINVAL;
@@ -1043,7 +1024,7 @@ EXPORT_SYMBOL(s3c2410_dma_ctrl);
 int s3c2410_dma_config(unsigned int channel,
 		       int xferunit)
 {
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 	unsigned int dcon;
 
 	pr_debug("%s: chan=%d, xfer_unit=%d, dcon=%08x\n",
@@ -1112,58 +1093,6 @@ int s3c2410_dma_config(unsigned int channel,
 
 EXPORT_SYMBOL(s3c2410_dma_config);
 
-int s3c2410_dma_setflags(unsigned int channel, unsigned int flags)
-{
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
-
-	if (chan == NULL)
-		return -EINVAL;
-
-	pr_debug("%s: chan=%p, flags=%08x\n", __func__, chan, flags);
-
-	chan->flags = flags;
-
-	return 0;
-}
-
-EXPORT_SYMBOL(s3c2410_dma_setflags);
-
-
-/* do we need to protect the settings of the fields from
- * irq?
-*/
-
-int s3c2410_dma_set_opfn(unsigned int channel, s3c2410_dma_opfn_t rtn)
-{
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
-
-	if (chan == NULL)
-		return -EINVAL;
-
-	pr_debug("%s: chan=%p, op rtn=%p\n", __func__, chan, rtn);
-
-	chan->op_fn = rtn;
-
-	return 0;
-}
-
-EXPORT_SYMBOL(s3c2410_dma_set_opfn);
-
-int s3c2410_dma_set_buffdone_fn(unsigned int channel, s3c2410_dma_cbfn_t rtn)
-{
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
-
-	if (chan == NULL)
-		return -EINVAL;
-
-	pr_debug("%s: chan=%p, callback rtn=%p\n", __func__, chan, rtn);
-
-	chan->callback_fn = rtn;
-
-	return 0;
-}
-
-EXPORT_SYMBOL(s3c2410_dma_set_buffdone_fn);
 
 /* s3c2410_dma_devconfig
  *
@@ -1179,7 +1108,7 @@ int s3c2410_dma_devconfig(int channel,
 			  enum s3c2410_dmasrc source,
 			  unsigned long devaddr)
 {
-	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 	unsigned int hwcfg;
 
 	if (chan == NULL)
@@ -1250,7 +1179,7 @@ EXPORT_SYMBOL(s3c2410_dma_devconfig);
 
 int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *dst)
 {
- 	struct s3c2410_dma_chan *chan = lookup_dma_channel(channel);
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
 
 	if (chan == NULL)
 		return -EINVAL;
@@ -1508,7 +1437,7 @@ static struct s3c2410_dma_chan *s3c2410_dma_map_channel(int channel)
 	dmach = &s3c2410_chans[ch];
 	dmach->map = ch_map;
 	dmach->req_ch = channel;
-	dma_chan_map[channel] = dmach;
+	s3c_dma_chan_map[channel] = dmach;
 
 	/* select the channel */
 
diff --git a/arch/arm/plat-s3c24xx/include/plat/dma-plat.h b/arch/arm/plat-s3c24xx/include/plat/dma-plat.h
index fbe84af..9565ead 100644
--- a/arch/arm/plat-s3c24xx/include/plat/dma-plat.h
+++ b/arch/arm/plat-s3c24xx/include/plat/dma-plat.h
@@ -10,8 +10,10 @@
  * published by the Free Software Foundation.
 */
 
+#include <plat/dma-core.h>
+
 extern struct sysdev_class dma_sysclass;
-extern struct s3c2410_dma_chan s3c2410_chans[S3C2410_DMA_CHANNELS];
+extern struct s3c2410_dma_chan s3c2410_chans[S3C_DMA_CHANNELS];
 
 #define DMA_CH_VALID		(1<<31)
 #define DMA_CH_NEVER		(1<<30)
@@ -31,8 +33,8 @@ struct s3c24xx_dma_map {
 	const char		*name;
 	struct s3c24xx_dma_addr  hw_addr;
 
-	unsigned long		 channels[S3C2410_DMA_CHANNELS];
-	unsigned long		 channels_rx[S3C2410_DMA_CHANNELS];
+	unsigned long		 channels[S3C_DMA_CHANNELS];
+	unsigned long		 channels_rx[S3C_DMA_CHANNELS];
 };
 
 struct s3c24xx_dma_selection {
@@ -58,7 +60,7 @@ extern int s3c24xx_dma_init_map(struct s3c24xx_dma_selection *sel);
 */
 
 struct s3c24xx_dma_order_ch {
-	unsigned int	list[S3C2410_DMA_CHANNELS];	/* list of channels */
+	unsigned int	list[S3C_DMA_CHANNELS];	/* list of channels */
 	unsigned int	flags;				/* flags */
 };
 
-- 
cgit v0.10.2


From c33a0bc4e41ef169d6e807d8abb9502544b518e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 1 May 2009 12:23:16 +0200
Subject: perf_counter: fix race in perf_output_*

When two (or more) contexts output to the same buffer, it is possible
to observe half written output.

Suppose we have CPU0 doing perf_counter_mmap(), CPU1 doing
perf_counter_overflow(). If CPU1 does a wakeup and exposes head to
user-space, then CPU2 can observe the data CPU0 is still writing.

[ Impact: fix occasionally corrupted profiling records ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090501102533.007821627@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 41aed42..f776851 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -358,10 +358,13 @@ struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;	/* nr of data pages  */
 
-	atomic_t			wakeup;		/* POLL_ for wakeups */
+	atomic_t			poll;		/* POLL_ for wakeups */
 	atomic_t			head;		/* write position    */
 	atomic_t			events;		/* event limit       */
 
+	atomic_t			wakeup_head;	/* completed head    */
+	atomic_t			lock;		/* concurrent writes */
+
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 75f2b6c..8660ae5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1279,14 +1279,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_counter *counter = file->private_data;
 	struct perf_mmap_data *data;
-	unsigned int events;
+	unsigned int events = POLL_HUP;
 
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
 	if (data)
-		events = atomic_xchg(&data->wakeup, 0);
-	else
-		events = POLL_HUP;
+		events = atomic_xchg(&data->poll, 0);
 	rcu_read_unlock();
 
 	poll_wait(file, &counter->waitq, wait);
@@ -1568,22 +1566,6 @@ static const struct file_operations perf_fops = {
 
 void perf_counter_wakeup(struct perf_counter *counter)
 {
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data) {
-		atomic_set(&data->wakeup, POLL_IN);
-		/*
-		 * Ensure all data writes are issued before updating the
-		 * user-space data head information. The matching rmb()
-		 * will be in userspace after reading this value.
-		 */
-		smp_wmb();
-		data->user_page->data_head = atomic_read(&data->head);
-	}
-	rcu_read_unlock();
-
 	wake_up_all(&counter->waitq);
 
 	if (counter->pending_kill) {
@@ -1721,10 +1703,14 @@ struct perf_output_handle {
 	int			wakeup;
 	int			nmi;
 	int			overflow;
+	int			locked;
+	unsigned long		flags;
 };
 
-static inline void __perf_output_wakeup(struct perf_output_handle *handle)
+static void perf_output_wakeup(struct perf_output_handle *handle)
 {
+	atomic_set(&handle->data->poll, POLL_IN);
+
 	if (handle->nmi) {
 		handle->counter->pending_wakeup = 1;
 		perf_pending_queue(&handle->counter->pending,
@@ -1733,6 +1719,86 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 		perf_counter_wakeup(handle->counter);
 }
 
+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event doesn't publish a head when a former
+ * event isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int cpu;
+
+	handle->locked = 0;
+
+	local_irq_save(handle->flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi() && atomic_read(&data->lock) == cpu)
+		return;
+
+	while (atomic_cmpxchg(&data->lock, 0, cpu) != 0)
+		cpu_relax();
+
+	handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int head, cpu;
+
+	if (handle->wakeup)
+		data->wakeup_head = data->head;
+
+	if (!handle->locked)
+		goto out;
+
+again:
+	/*
+	 * The xchg implies a full barrier that ensures all writes are done
+	 * before we publish the new head, matched by a rmb() in userspace when
+	 * reading this position.
+	 */
+	while ((head = atomic_xchg(&data->wakeup_head, 0))) {
+		data->user_page->data_head = head;
+		handle->wakeup = 1;
+	}
+
+	/*
+	 * NMI can happen here, which means we can miss a wakeup_head update.
+	 */
+
+	cpu = atomic_xchg(&data->lock, 0);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
+	/*
+	 * Therefore we have to validate we did not indeed do so.
+	 */
+	if (unlikely(atomic_read(&data->wakeup_head))) {
+		/*
+		 * Since we had it locked, we can lock it again.
+		 */
+		while (atomic_cmpxchg(&data->lock, 0, cpu) != 0)
+			cpu_relax();
+
+		goto again;
+	}
+
+	if (handle->wakeup)
+		perf_output_wakeup(handle);
+out:
+	local_irq_restore(handle->flags);
+}
+
 static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size,
 			     int nmi, int overflow)
@@ -1745,6 +1811,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data)
 		goto out;
 
+	handle->data	 = data;
 	handle->counter	 = counter;
 	handle->nmi	 = nmi;
 	handle->overflow = overflow;
@@ -1752,12 +1819,13 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data->nr_pages)
 		goto fail;
 
+	perf_output_lock(handle);
+
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
-	handle->data	= data;
 	handle->offset	= offset;
 	handle->head	= head;
 	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
@@ -1765,7 +1833,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	return 0;
 
 fail:
-	__perf_output_wakeup(handle);
+	perf_output_wakeup(handle);
 out:
 	rcu_read_unlock();
 
@@ -1809,16 +1877,20 @@ static void perf_output_copy(struct perf_output_handle *handle,
 
 static void perf_output_end(struct perf_output_handle *handle)
 {
-	int wakeup_events = handle->counter->hw_event.wakeup_events;
+	struct perf_counter *counter = handle->counter;
+	struct perf_mmap_data *data = handle->data;
+
+	int wakeup_events = counter->hw_event.wakeup_events;
 
 	if (handle->overflow && wakeup_events) {
-		int events = atomic_inc_return(&handle->data->events);
+		int events = atomic_inc_return(&data->events);
 		if (events >= wakeup_events) {
-			atomic_sub(wakeup_events, &handle->data->events);
-			__perf_output_wakeup(handle);
+			atomic_sub(wakeup_events, &data->events);
+			handle->wakeup = 1;
 		}
-	} else if (handle->wakeup)
-		__perf_output_wakeup(handle);
+	}
+
+	perf_output_unlock(handle);
 	rcu_read_unlock();
 }
 
-- 
cgit v0.10.2


From 63a809a2dc53b91268dd915bbcbd425063893676 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 1 May 2009 12:23:17 +0200
Subject: perf_counter: fix nmi-watchdog interaction

When we don't have any perf-counters active, don't act like we know
what the NMI is for.

[ Impact: fix hard hang with nmi_watchdog=2 ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090501102533.109867793@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index fc06f4d..d4c0cc9 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -871,6 +871,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	struct pt_regs *regs;
 	int ret;
 
+	if (!atomic_read(&num_counters))
+		return NOTIFY_DONE;
+
 	switch (cmd) {
 	case DIE_NMI:
 	case DIE_NMI_IPI:
-- 
cgit v0.10.2


From 585e3374d9d29376c2c37d821c8b7637dd48ca95 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 1 May 2009 12:23:18 +0200
Subject: perf_counter: tool: handle 0-length data files

Avoid perf-report barfing on 0-length data files.

[ Impact: fix perf-report SIGBUS ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090501102533.196245693@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc
index 933a075..911d7f3 100644
--- a/Documentation/perf_counter/perf-report.cc
+++ b/Documentation/perf_counter/perf-report.cc
@@ -402,6 +402,11 @@ int main(int argc, char *argv[])
 		exit(-1);
 	}
 
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
 	load_kallsyms();
 
 remap:
-- 
cgit v0.10.2


From e5791a808ae91a9e7e1b65ea9b8de0f96a043d88 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 1 May 2009 12:23:19 +0200
Subject: perf_counter: documentation update

Update the documentation to reflect the current state of affairs

[ Impact: documentation update ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090501102533.296727903@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt
index aaf105c..9930c4b 100644
--- a/Documentation/perf_counter/design.txt
+++ b/Documentation/perf_counter/design.txt
@@ -34,41 +34,47 @@ can be poll()ed.
 
 When creating a new counter fd, 'perf_counter_hw_event' is:
 
-/*
- * Event to monitor via a performance monitoring counter:
- */
 struct perf_counter_hw_event {
-	__u64			event_config;
-
-	__u64			irq_period;
-	__u64			record_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				nmi	       :  1, /* NMI sampling          */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-
-				__reserved_1   : 55;
-
-	__u32			extra_config_len;
-
-	__u32			__reserved_4;
-	__u64			__reserved_2;
-	__u64			__reserved_3;
+        /*
+         * The MSB of the config word signifies if the rest contains cpu
+         * specific (raw) counter configuration data, if unset, the next
+         * 7 bits are an event type and the rest of the bits are the event
+         * identifier.
+         */
+        __u64                   config;
+
+        __u64                   irq_period;
+        __u32                   record_type;
+        __u32                   read_format;
+
+        __u64                   disabled       :  1, /* off by default        */
+                                nmi            :  1, /* NMI sampling          */
+                                inherit        :  1, /* children inherit it   */
+                                pinned         :  1, /* must always be on PMU */
+                                exclusive      :  1, /* only group on PMU     */
+                                exclude_user   :  1, /* don't count user      */
+                                exclude_kernel :  1, /* ditto kernel          */
+                                exclude_hv     :  1, /* ditto hypervisor      */
+                                exclude_idle   :  1, /* don't count when idle */
+                                mmap           :  1, /* include mmap data     */
+                                munmap         :  1, /* include munmap data   */
+                                comm           :  1, /* include comm data     */
+
+                                __reserved_1   : 52;
+
+        __u32                   extra_config_len;
+        __u32                   wakeup_events;  /* wakeup every n events */
+
+        __u64                   __reserved_2;
+        __u64                   __reserved_3;
 };
 
-The 'event_config' field specifies what the counter should count.  It
+The 'config' field specifies what the counter should count.  It
 is divided into 3 bit-fields:
 
-raw_type: 1 bit (most significant bit)		0x8000_0000_0000_0000
-type:	  7 bits (next most significant)	0x7f00_0000_0000_0000
-event_id: 56 bits (least significant)		0x00ff_0000_0000_0000
+raw_type: 1 bit   (most significant bit)	0x8000_0000_0000_0000
+type:	  7 bits  (next most significant)	0x7f00_0000_0000_0000
+event_id: 56 bits (least significant)		0x00ff_ffff_ffff_ffff
 
 If 'raw_type' is 1, then the counter will count a hardware event
 specified by the remaining 63 bits of event_config.  The encoding is
@@ -134,41 +140,56 @@ enum sw_event_ids {
 	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
 };
 
+Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
+tracer is available, and event_id values can be obtained from
+/debug/tracing/events/*/*/id
+
+
 Counters come in two flavours: counting counters and sampling
 counters.  A "counting" counter is one that is used for counting the
 number of events that occur, and is characterised by having
-irq_period = 0 and record_type = PERF_RECORD_SIMPLE.  A read() on a
-counting counter simply returns the current value of the counter as
-an 8-byte number.
+irq_period = 0.
+
+
+A read() on a counter returns the current value of the counter and possible
+additional values as specified by 'read_format', each value is a u64 (8 bytes)
+in size.
+
+/*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_counter_read_format {
+        PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
+        PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
+};
+
+Using these additional values one can establish the overcommit ratio for a
+particular counter allowing one to take the round-robin scheduling effect
+into account.
+
 
 A "sampling" counter is one that is set up to generate an interrupt
 every N events, where N is given by 'irq_period'.  A sampling counter
-has irq_period > 0 and record_type != PERF_RECORD_SIMPLE.  The
-record_type controls what data is recorded on each interrupt, and the
-available values are currently:
+has irq_period > 0. The record_type controls what data is recorded on each
+interrupt:
 
 /*
- * IRQ-notification data record type:
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
  */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		= 0,
-	PERF_RECORD_IRQ			= 1,
-	PERF_RECORD_GROUP		= 2,
+enum perf_counter_record_format {
+        PERF_RECORD_IP          = 1U << 0,
+        PERF_RECORD_TID         = 1U << 1,
+        PERF_RECORD_TIME        = 1U << 2,
+        PERF_RECORD_ADDR        = 1U << 3,
+        PERF_RECORD_GROUP       = 1U << 4,
+        PERF_RECORD_CALLCHAIN   = 1U << 5,
 };
 
-A record_type value of PERF_RECORD_IRQ will record the instruction
-pointer (IP) at which the interrupt occurred.  A record_type value of
-PERF_RECORD_GROUP will record the event_config and counter value of
-all of the other counters in the group, and should only be used on a
-group leader (see below).  Currently these two values are mutually
-exclusive, but record_type will become a bit-mask in future and
-support other values.
-
-A sampling counter has an event queue, into which an event is placed
-on each interrupt.  A read() on a sampling counter will read the next
-event from the event queue.  If the queue is empty, the read() will
-either block or return an EAGAIN error, depending on whether the fd
-has been set to non-blocking mode or not.
+Such (and other) events will be recorded in a ring-buffer, which is
+available to user-space using mmap() (see below).
 
 The 'disabled' bit specifies whether the counter starts out disabled
 or enabled.  If it is initially disabled, it can be enabled by ioctl
@@ -206,6 +227,13 @@ The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
 way to request that counting of events be restricted to times when the
 CPU is in user, kernel and/or hypervisor mode.
 
+The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
+operations, these can be used to relate userspace IP addresses to actual
+code, even after the mapping (or even the whole process) is gone,
+these events are recorded in the ring-buffer (see below).
+
+The 'comm' bit allows tracking of process comm data on process creation.
+This too is recorded in the ring-buffer (see below).
 
 The 'pid' parameter to the perf_counter_open() system call allows the
 counter to be specific to a task:
@@ -250,6 +278,138 @@ can be meaningfully compared, added, divided (to get ratios), etc.,
 with each other, since they have counted events for the same set of
 executed instructions.
 
+
+Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
+tracking are logged into a ring-buffer. This ring-buffer is created and
+accessed through mmap().
+
+The mmap size should be 1+2^n pages, where the first page is a meta-data page
+(struct perf_counter_mmap_page) that contains various bits of information such
+as where the ring-buffer head is.
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+        __u32   version;                /* version number of this structure */
+        __u32   compat_version;         /* lowest version this is compat with */
+
+        /*
+         * Bits needed to read the hw counters in user-space.
+         *
+         *   u32 seq;
+         *   s64 count;
+         *
+         *   do {
+         *     seq = pc->lock;
+         *
+         *     barrier()
+         *     if (pc->index) {
+         *       count = pmc_read(pc->index - 1);
+         *       count += pc->offset;
+         *     } else
+         *       goto regular_read;
+         *
+         *     barrier();
+         *   } while (pc->lock != seq);
+         *
+         * NOTE: for obvious reason this only works on self-monitoring
+         *       processes.
+         */
+        __u32   lock;                   /* seqlock for synchronization */
+        __u32   index;                  /* hardware counter identifier */
+        __s64   offset;                 /* add to hardware counter value */
+
+        /*
+         * Control data for the mmap() data buffer.
+         *
+         * User-space reading this value should issue an rmb(), on SMP capable
+         * platforms, after reading this value -- see perf_counter_wakeup().
+         */
+        __u32   data_head;              /* head in the data section */
+};
+
+NOTE: the hw-counter userspace bits are arch specific and are currently only
+      implemented on powerpc.
+
+The following 2^n pages are the ring-buffer which contains events of the form:
+
+#define PERF_EVENT_MISC_KERNEL          (1 << 0)
+#define PERF_EVENT_MISC_USER            (1 << 1)
+#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
+
+struct perf_event_header {
+        __u32   type;
+        __u16   misc;
+        __u16   size;
+};
+
+enum perf_event_type {
+
+        /*
+         * The MMAP events record the PROT_EXEC mappings so that we can
+         * correlate userspace IPs to code. They have the following structure:
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      u64                             addr;
+         *      u64                             len;
+         *      u64                             pgoff;
+         *      char                            filename[];
+         * };
+         */
+        PERF_EVENT_MMAP                 = 1,
+        PERF_EVENT_MUNMAP               = 2,
+
+        /*
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      char                            comm[];
+         * };
+         */
+        PERF_EVENT_COMM                 = 3,
+
+        /*
+         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+         * will be PERF_RECORD_*
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      { u64                   ip;       } && PERF_RECORD_IP
+         *      { u32                   pid, tid; } && PERF_RECORD_TID
+         *      { u64                   time;     } && PERF_RECORD_TIME
+         *      { u64                   addr;     } && PERF_RECORD_ADDR
+         *
+         *      { u64                   nr;
+         *        { u64 event, val; }   cnt[nr];  } && PERF_RECORD_GROUP
+         *
+         *      { u16                   nr,
+         *                              hv,
+         *                              kernel,
+         *                              user;
+         *        u64                   ips[nr];  } && PERF_RECORD_CALLCHAIN
+         * };
+         */
+};
+
+NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
+      on x86.
+
+Notification of new events is possible through poll()/select()/epoll() and
+fcntl() managing signals.
+
+Normally a notification is generated for every page filled, however one can
+additionally set perf_counter_hw_event.wakeup_events to generate one every
+so many counter overflow events.
+
+Future work will include a splice() interface to the ring-buffer.
+
+
 Counters can be enabled and disabled in two ways: via ioctl and via
 prctl.  When a counter is disabled, it doesn't count or generate
 events but does continue to exist and maintain its count value.
@@ -269,6 +429,12 @@ group other than the leader only affects that counter - disabling an
 non-leader stops that counter from counting but doesn't affect any
 other counter.
 
+Additionally, non-inherited overflow counters can use
+
+	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
+
+to enable a counter for 'nr' events, after which it gets disabled again.
+
 A process can enable or disable all the counter groups that are
 attached to it, using prctl:
 
-- 
cgit v0.10.2


From 56afb0f8823650f53a5f0e96d69a282e8892c61b Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 30 Apr 2009 13:29:36 -0400
Subject: kerneldoc, tracing: make kernel-doc understand TRACE_EVENT() macro
 (take #2)

Add support to kernel-doc for tracepoint comments above TRACE_EVENT()
macro definitions. Paves the way for tracepoint docbook.

[ Impact: extend DocBook infrastructure ]

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: akpm@linux-foundation.org
Cc: rostedt@goodmis.org
Cc: fweisbec@gmail.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: wcohen@redhat.com
LKML-Reference: <d80706b6797e277924d2f3ec9af176c6b2951f88.1241107197.git.jbaron@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 0f11870..2b53a55 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1827,6 +1827,25 @@ sub reset_state {
     $state = 0;
 }
 
+sub tracepoint_munge($) {
+	my $file = shift;
+	my $tracepointname = 0;
+	my $tracepointargs = 0;
+
+	if($prototype =~ m/TRACE_EVENT\((.*?),/) {
+		$tracepointname = $1;
+	}
+	if($prototype =~ m/TP_PROTO\((.*?)\)/) {
+		$tracepointargs = $1;
+	}
+	if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+		print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n".
+			     "$prototype\n";
+	} else {
+		$prototype = "static inline void trace_$tracepointname($tracepointargs)";
+	}
+}
+
 sub syscall_munge() {
 	my $void = 0;
 
@@ -1881,6 +1900,9 @@ sub process_state3_function($$) {
 	if ($prototype =~ /SYSCALL_DEFINE/) {
 		syscall_munge();
 	}
+	if ($prototype =~ /TRACE_EVENT/) {
+		tracepoint_munge($file);
+	}
 	dump_function($prototype, $file);
 	reset_state();
     }
-- 
cgit v0.10.2


From a76f8c6da1e48fd4ef025f42c736389532ff30ba Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 30 Apr 2009 13:29:42 -0400
Subject: tracing: add new tracepoints docbook

Add tracepoint docbook. This will help us document and understand
what tracepoints are in the kernel. Since there are multiple
macros, and files that contain tracepoints.

[ Impact: add documentation ]

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: akpm@linux-foundation.org
Cc: rostedt@goodmis.org
Cc: fweisbec@gmail.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: wcohen@redhat.com
LKML-Reference: <84160b6bd94aff02455da7e12bad054d34c579a0.1241107197.git.jbaron@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 8918a32..4c8f4d6 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
 	    mac80211.xml debugobjects.xml sh.xml regulator.xml \
-	    alsa-driver-api.xml writing-an-alsa-driver.xml
+	    alsa-driver-api.xml writing-an-alsa-driver.xml \
+	    tracepoint.xml
 
 ###
 # The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 0000000..70891bc
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Tracepoints">
+ <bookinfo>
+  <title>The Linux Kernel Tracepoint API</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Baron</surname>
+    <affiliation>
+     <address>
+      <email>jbaron@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+  <chapter id="intro">
+   <title>Introduction</title>
+   <para>
+     Tracepoints are static probe points that are located in strategic points
+     throughout the kernel. 'Probes' register/unregister with tracepoints
+     via a callback mechanism. The 'probes' are strictly typed functions that
+     are passed a unique set of parameters defined by each tracepoint.
+   </para>
+
+   <para>
+     From this simple callback mechanism, 'probes' can be used to profile, debug,
+     and understand kernel behavior. There are a number of tools that provide a
+     framework for using 'probes'. These tools include Systemtap, ftrace, and
+     LTTng.
+   </para>
+
+   <para>
+     Tracepoints are defined in a number of header files via various macros. Thus,
+     the purpose of this document is to provide a clear accounting of the available
+     tracepoints. The intention is to understand not only what tracepoints are
+     available but also to understand where future tracepoints might be added.
+   </para>
+
+   <para>
+     The API presented has functions of the form:
+     <function>trace_tracepointname(function parameters)</function>. These are the
+     tracepoints callbacks that are found throughout the code. Registering and
+     unregistering probes with these callback sites is covered in the
+     <filename>Documentation/trace/*</filename> directory.
+   </para>
+  </chapter>
+
+</book>
-- 
cgit v0.10.2


From 9ee1983c9aa18f12388ef660d0c76a23dc112959 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 30 Apr 2009 13:29:47 -0400
Subject: tracing: add irq tracepoint documentation

Document irqs for the newly created docbook.

[ Impact: add documentation ]

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: akpm@linux-foundation.org
Cc: rostedt@goodmis.org
Cc: fweisbec@gmail.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: wcohen@redhat.com
LKML-Reference: <73ff42be3420157667ec548e9b0e409c3cfad05f.1241107197.git.jbaron@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index 70891bc..b0756d0 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -81,4 +81,9 @@
    </para>
   </chapter>
 
+  <chapter id="irq">
+   <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+  </chapter>
+
 </book>
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 7686864..32a9f7e 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -7,8 +7,16 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM irq
 
-/*
- * Tracepoint for entry of interrupt handler:
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
  */
 TRACE_EVENT(irq_handler_entry,
 
@@ -29,8 +37,16 @@ TRACE_EVENT(irq_handler_entry,
 	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
 );
 
-/*
- * Tracepoint for return of an interrupt handler:
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
  */
 TRACE_EVENT(irq_handler_exit,
 
@@ -52,6 +68,17 @@ TRACE_EVENT(irq_handler_exit,
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
 TRACE_EVENT(softirq_entry,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
@@ -71,6 +98,17 @@ TRACE_EVENT(softirq_entry,
 	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
 );
 
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the struct softirq_action
+ * that has handled the softirq. By subtracting the @vec pointer from
+ * the @h pointer, we can determine the softirq number. Also, when used in
+ * combination with the softirq_entry tracepoint we can determine the softirq
+ * latency.
+ */
 TRACE_EVENT(softirq_exit,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-- 
cgit v0.10.2


From e3980b6a03e4c81e0e8d2cfcd7ab18082bbe92a5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 15 Apr 2009 17:11:53 +0100
Subject: [ARM] SMDK6410: Hook regulator control of VDDARM up for WM1190-EV1

This allows the S3C CPUfreq driver to do DVFS.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index 697c046..d26f693 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -226,13 +226,22 @@ static struct regulator_init_data wm8350_dcdc4_data = {
 };
 
 /* ARM core */
+static struct regulator_consumer_supply dcdc6_consumers[] = {
+	{
+		.supply = "vddarm",
+	}
+};
+
 static struct regulator_init_data wm8350_dcdc6_data = {
 	.constraints = {
 		.name = "PVDD_ARM",
 		.min_uV = 1000000,
 		.max_uV = 1300000,
 		.always_on = 1,
+		.valid_ops_mask = REGULATOR_CHANGE_VOLTAGE,
 	},
+	.num_consumer_supplies = ARRAY_SIZE(dcdc6_consumers),
+	.consumer_supplies = dcdc6_consumers,
 };
 
 /* Alive */
-- 
cgit v0.10.2


From e0202f56a82cd1170c6f1c520db669431cf26ddc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 1 May 2009 16:51:44 +0200
Subject: perf_counter tools: fix x86 syscall numbers

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 4a50abf..9cff266 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -57,13 +57,13 @@
 #define asmlinkage
 
 #ifdef __x86_64__
-#define __NR_perf_counter_open 295
+#define __NR_perf_counter_open 298
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __i386__
-#define __NR_perf_counter_open 333
+#define __NR_perf_counter_open 336
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 1fde127..9fbc661 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -108,13 +108,13 @@
 #define asmlinkage
 
 #ifdef __x86_64__
-#define __NR_perf_counter_open 295
+#define __NR_perf_counter_open 298
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __i386__
-#define __NR_perf_counter_open 333
+#define __NR_perf_counter_open 336
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 8d28864..b6d989e 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -89,13 +89,13 @@
 #define asmlinkage
 
 #ifdef __x86_64__
-#define __NR_perf_counter_open 295
+#define __NR_perf_counter_open 298
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __i386__
-#define __NR_perf_counter_open 333
+#define __NR_perf_counter_open 336
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
-- 
cgit v0.10.2


From 3666932bf2212a8fa77e344c5d946e86787bdbbe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 May 2009 17:37:51 +0200
Subject: perf_counter tools: remove build generated files

These files are generated during the build process. No need to have
them in the git repository.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/perf_counter/PERF-BUILD-OPTIONS b/Documentation/perf_counter/PERF-BUILD-OPTIONS
deleted file mode 100644
index 46d8d6c..0000000
--- a/Documentation/perf_counter/PERF-BUILD-OPTIONS
+++ /dev/null
@@ -1,4 +0,0 @@
-SHELL_PATH='/bin/sh'
-TAR='tar'
-NO_CURL=''
-NO_PERL=''
diff --git a/Documentation/perf_counter/PERF-CFLAGS b/Documentation/perf_counter/PERF-CFLAGS
deleted file mode 100644
index f24906c..0000000
--- a/Documentation/perf_counter/PERF-CFLAGS
+++ /dev/null
@@ -1 +0,0 @@
--g -O2 -Wall  -DSHA1_HEADER='<openssl/sha.h>' : /home/mingo/bin:libexec/perf-core:share/perf-core/templates:/home/mingo
diff --git a/Documentation/perf_counter/PERF-VERSION-FILE b/Documentation/perf_counter/PERF-VERSION-FILE
deleted file mode 100644
index 328e244..0000000
--- a/Documentation/perf_counter/PERF-VERSION-FILE
+++ /dev/null
@@ -1 +0,0 @@
-PERF_VERSION = 0.0.1.PERF
-- 
cgit v0.10.2


From 6eda5838bc5771578986429cde4a0870e1e5f5e1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 May 2009 18:29:57 +0200
Subject: perfcounter tools: move common defines ... to local header file

No change, move of duplicated stuff only.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 877cf5d..481e4c2 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -287,6 +287,7 @@ export PERL_PATH
 LIB_FILE=libperf.a
 
 LIB_H += ../../include/linux/perf_counter.h
+LIB_H += perf.h
 LIB_H += util/levenshtein.h
 LIB_H += util/parse-options.h
 LIB_H += util/quote.h
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 9cff266..59f1d87 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -1,6 +1,7 @@
 
 
-#define _GNU_SOURCE
+#include "util/util.h"
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/time.h>
@@ -32,66 +33,7 @@
 
 #include "../../include/linux/perf_counter.h"
 
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define rdclock()                                       \
-({                                                      \
-        struct timespec ts;                             \
-                                                        \
-        clock_gettime(CLOCK_MONOTONIC, &ts);            \
-        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
-})
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#ifdef __x86_64__
-#define __NR_perf_counter_open 298
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __i386__
-#define __NR_perf_counter_open 336
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#define rmb() 		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-extern asmlinkage int sys_perf_counter_open(
-        struct perf_counter_hw_event    *hw_event_uptr          __user,
-        pid_t                           pid,
-        int                             cpu,
-        int                             group_fd,
-        unsigned long                   flags);
-
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
-
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+#include "perf.h"
 
 static int			nr_counters			=  0;
 static __u64			event_id[MAX_COUNTERS]		= { };
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 9fbc661..6de38d2 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -85,64 +85,7 @@
 
 #include "../../include/linux/perf_counter.h"
 
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#define rdclock()                                       \
-({                                                      \
-        struct timespec ts;                             \
-                                                        \
-        clock_gettime(CLOCK_MONOTONIC, &ts);            \
-        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
-})
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#ifdef __x86_64__
-#define __NR_perf_counter_open 298
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __i386__
-#define __NR_perf_counter_open 336
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#define rmb() 		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-extern asmlinkage int sys_perf_counter_open(
-        struct perf_counter_hw_event    *hw_event_uptr          __user,
-        pid_t                           pid,
-        int                             cpu,
-        int                             group_fd,
-        unsigned long                   flags);
-
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
-
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+#include "perf.h"
 
 static int			system_wide			=  0;
 
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index b6d989e..cd6f61d 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -66,68 +66,7 @@
 
 #include "../../include/linux/perf_counter.h"
 
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#define rdclock()                                       \
-({                                                      \
-        struct timespec ts;                             \
-                                                        \
-        clock_gettime(CLOCK_MONOTONIC, &ts);            \
-        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
-})
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#ifdef __x86_64__
-#define __NR_perf_counter_open 298
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __i386__
-#define __NR_perf_counter_open 336
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#define __NR_perf_counter_open 319
-#define rmb() 		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-asmlinkage int sys_perf_counter_open(
-        struct perf_counter_hw_event    *hw_event_uptr          __user,
-        pid_t                           pid,
-        int                             cpu,
-        int                             group_fd,
-        unsigned long                   flags)
-{
-        return syscall(
-                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
-}
-
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
-
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+#include "perf.h"
 
 static int			system_wide			=  0;
 
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
new file mode 100644
index 0000000..391fcc7
--- /dev/null
+++ b/Documentation/perf_counter/perf.h
@@ -0,0 +1,64 @@
+#ifndef _PERF_PERF_H
+#define _PERF_PERF_H
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#define rdclock()					\
+({							\
+	struct timespec ts;				\
+							\
+	clock_gettime(CLOCK_MONOTONIC, &ts);		\
+	ts.tv_sec * 1000000000ULL + ts.tv_nsec;		\
+})
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#ifdef __x86_64__
+#define __NR_perf_counter_open 298
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __i386__
+#define __NR_perf_counter_open 336
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#define __NR_perf_counter_open 319
+#define rmb()		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+static inline int
+sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+		      pid_t pid, int cpu, int group_fd,
+		      unsigned long flags)
+{
+	return syscall(__NR_perf_counter_open, hw_event_uptr, pid, cpu,
+		       group_fd, flags);
+}
+
+#define MAX_COUNTERS			64
+#define MAX_NR_CPUS			256
+
+#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
+
+#endif
-- 
cgit v0.10.2


From a92e70237c8abbd1c3241133bf72f2cd07c90eae Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 May 2009 18:39:47 +0200
Subject: perfcounter tools: make rdclock an inline function

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index 391fcc7..fb14230 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -8,13 +8,17 @@
 #define PR_TASK_PERF_COUNTERS_DISABLE   31
 #define PR_TASK_PERF_COUNTERS_ENABLE    32
 
-#define rdclock()					\
-({							\
-	struct timespec ts;				\
-							\
-	clock_gettime(CLOCK_MONOTONIC, &ts);		\
-	ts.tv_sec * 1000000000ULL + ts.tv_nsec;		\
-})
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC			1000000000ULL
+#endif
+
+static inline unsigned long long rdclock(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
 
 /*
  * Pick up some kernel type conventions:
-- 
cgit v0.10.2


From 7bd5469cd938eec6a76b3135e6becd9b5e096e98 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 May 2009 18:42:47 +0200
Subject: perfcounter tools: fix pointer mismatch

Neither process_options nor execvp take an const **char as argument.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 59f1d87..3a3deb3 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -332,7 +332,7 @@ static void sigchld_handler(int sig)
 		done = 1;
 }
 
-int cmd_record(int argc, const char **argv)
+int cmd_record(int argc, char **argv)
 {
 	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
 	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-- 
cgit v0.10.2


From 4ba67c1d48aeedcc31630bb40b6179fc7d360f90 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 May 2009 18:48:06 +0200
Subject: perfcounter tools: get the syscall number from
 arch/*/include/asm/unistd.h

Avoid further confusion during development

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index fb14230..6fa3656 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -26,20 +26,14 @@ static inline unsigned long long rdclock(void)
 #define __user
 #define asmlinkage
 
-#ifdef __x86_64__
-#define __NR_perf_counter_open 298
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __i386__
-#define __NR_perf_counter_open 336
+#if defined(__x86_64__) || defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #endif
 
 #ifdef __powerpc__
-#define __NR_perf_counter_open 319
+#include "../../arch/powerpc/include/asm/unistd.h"
 #define rmb()		asm volatile ("sync" ::: "memory")
 #define cpu_relax()	asm volatile ("" ::: "memory");
 #endif
-- 
cgit v0.10.2


From 75507efb1372b6acf1aa6bf00ebd49ce196fd994 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 12:58:36 -0400
Subject: ext4: Don't avoid using BLOCK_UNINIT block groups in mballoc

By avoiding the use of not-yet-used block groups (i.e., block groups
with the BLOCK_UNINIT flag), mballoc had a tendency to create large
files with large non-contiguous gaps.  In addition avoiding the use of
new block groups had a tendency to push regular file data into the
first block group in a flex_bg group, which slows down the speed of
e2fsck pass 2, since it has a tendency to seek much more.  For
example:

               Before Patch                       After Patch
              Time in seconds                   Time in seconds
            Real /  User/  Sys   MB/s      Real /  User/  Sys    MB/s
Pass 1      8.52 / 2.21 / 0.46  20.43      8.84 / 4.97 / 1.11   19.68
Pass 2     21.16 / 1.02 / 1.86  11.30      6.54 / 1.77 / 1.78   36.39
Pass 3      0.01 / 0.00 / 0.00 139.00      0.01 / 0.01 / 0.00  128.90
Pass 4      0.16 / 0.15 / 0.00   0.00      0.17 / 0.17 / 0.00    0.00
Pass 5      2.52 / 1.99 / 0.09   0.79      2.31 / 1.78 / 0.06    0.86
Total      32.40 / 5.11 / 2.49  12.81     17.99 / 8.75 / 2.98   23.01

This was on a sample 80 gig root filesystem which was approximately
50% full.  Note the improved e2fsck pass 2 performance, by over a
factor of 3, due to a decreased number of seeks.  (The total amount of
I/O in pass 2 was unchanged; the layout of the directory blocks was
simply much better from e2fsck's's perspective.)

Other changes as a result of this patch on this sample filesystem:

                             Before Patch    After Patch
# of non-contig files           762             779
# of non-contig directories     571             570
# of BLOCK_UNINIT bg's          307             293
# of INODE_UNINIT bg's          503             503

Out of 640 block groups, of which 333 were in use, this patch caused
an extra 14 block groups to be utilized.  The number of non-contiguous
files did go up slightly, but when measured against the 99.9% of the
files (603,154) which were contiguously allocated, this is pretty
insignificant.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andreas Dilger <adilger@sun.com>

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c3af9e6..dbd47ea 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1728,7 +1728,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 	unsigned free, fragments;
 	unsigned i, bits;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
-	struct ext4_group_desc *desc;
 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
 	BUG_ON(cr < 0 || cr >= 4);
@@ -1744,10 +1743,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 	switch (cr) {
 	case 0:
 		BUG_ON(ac->ac_2order == 0);
-		/* If this group is uninitialized, skip it initially */
-		desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
-		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-			return 0;
 
 		/* Avoid using the first bg of a flexgroup for data files */
 		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -2067,9 +2062,7 @@ repeat:
 
 			ac->ac_groups_scanned++;
 			desc = ext4_get_group_desc(sb, group, NULL);
-			if (cr == 0 || (desc->bg_flags &
-					cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
-					ac->ac_2order != 0))
+			if (cr == 0)
 				ext4_mb_simple_scan_group(ac, &e4b);
 			else if (cr == 1 &&
 					ac->ac_g_ex.fe_len == sbi->s_stripe)
-- 
cgit v0.10.2


From 15e957d08dd4a841359cfec59ecb74041e0097aa Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 30 Apr 2009 01:17:50 -0700
Subject: x86/irq: use move_irq_desc() in create_irq_nr()

move_irq_desc() will try to move irq_desc to the home node if
the allocated one is not correct, in create_irq_nr().

( This can happen on devices that are on different nodes that
  are using MSI, when drivers are loaded and unloaded randomly. )

v2: fix non-smp build
v3: add NUMA_IRQ_DESC to eliminate #ifdefs

[ Impact: improve irq descriptor locality on NUMA systems ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F95EAE.2050903@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e1b2543..674e21e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,6 +274,10 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say N.
 
+config NUMA_IRQ_DESC
+	def_bool y
+	depends on SPARSE_IRQ && NUMA
+
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
 	default y
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9cd4806..e583291 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3197,11 +3197,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
 		if (cfg_new->vector != 0)
 			continue;
 
-#ifdef CONFIG_NUMA_IRQ_DESC
-		/* different node ?*/
-		if (desc_new->node != node)
-			desc = move_irq_desc(desc, node);
-#endif
+		desc_new = move_irq_desc(desc_new, node);
 
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4b95ddb..eedbb8e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -206,9 +206,16 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-#else /* CONFIG_SPARSE_IRQ */
+#endif
+
+#ifdef CONFIG_NUMA_IRQ_DESC
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
-#endif /* CONFIG_SPARSE_IRQ */
+#else
+static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
+	return desc;
+}
+#endif
 
 extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 2f06527..7d04780 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o
+obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
-- 
cgit v0.10.2


From d444c3c38189b3f18337a213855ac1c07af4e2d9 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 13:44:33 -0400
Subject: ext4: Move the ext4_i.h header file into ext4.h

There is no longer a reason for a separate ext4_i.h header file, so
move it into ext4.h just to make life easier for developers to find
the relevant data structures and typedefs.  Should also speed up
compiles slightly, too.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 02ec44b..ba57d66 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,10 @@
 #include <linux/magic.h>
 #include <linux/jbd2.h>
 #include <linux/quota.h>
-#include "ext4_i.h"
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
 
 /*
  * The fourth extended filesystem constants/structures
@@ -46,6 +49,19 @@
 #define ext4_debug(f, a...)	do {} while (0)
 #endif
 
+/* data type for block offset of block group */
+typedef int ext4_grpblk_t;
+
+/* data type for filesystem-wide blocks number */
+typedef unsigned long long ext4_fsblk_t;
+
+/* data type for file logical block number */
+typedef __u32 ext4_lblk_t;
+
+/* data type for block group number */
+typedef unsigned int ext4_group_t;
+
+
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE		1
 /* blocks already reserved */
@@ -516,6 +532,110 @@ do {									       \
 #endif /* defined(__KERNEL__) || defined(__linux__) */
 
 /*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+	ext4_fsblk_t	ec_start;
+	ext4_lblk_t	ec_block;
+	__u32		ec_len; /* must be 32bit to return holes */
+	__u32		ec_type;
+};
+
+/*
+ * fourth extended file system inode data in memory
+ */
+struct ext4_inode_info {
+	__le32	i_data[15];	/* unconverted */
+	__u32	i_flags;
+	ext4_fsblk_t	i_file_acl;
+	__u32	i_dtime;
+
+	/*
+	 * i_block_group is the number of the block group which contains
+	 * this file's inode.  Constant across the lifetime of the inode,
+	 * it is ued for making block allocation decisions - we try to
+	 * place a file's data blocks near its inode block, and new inodes
+	 * near to their parent directory's inode.
+	 */
+	ext4_group_t	i_block_group;
+	__u32	i_state;		/* Dynamic state flags for ext4 */
+
+	ext4_lblk_t		i_dir_start_lookup;
+#ifdef CONFIG_EXT4_FS_XATTR
+	/*
+	 * Extended attributes can be read independently of the main file
+	 * data. Taking i_mutex even when reading would cause contention
+	 * between readers of EAs and writers of regular file data, so
+	 * instead we synchronize on xattr_sem when reading or changing
+	 * EAs.
+	 */
+	struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
+
+	struct list_head i_orphan;	/* unlinked but open inodes */
+
+	/*
+	 * i_disksize keeps track of what the inode size is ON DISK, not
+	 * in memory.  During truncate, i_size is set to the new size by
+	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
+	 * set i_disksize to 0 until the truncate is actually under way.
+	 *
+	 * The intent is that i_disksize always represents the blocks which
+	 * are used by this file.  This allows recovery to restart truncate
+	 * on orphans if we crash during truncate.  We actually write i_disksize
+	 * into the on-disk inode when writing inodes out, instead of i_size.
+	 *
+	 * The only time when i_disksize and i_size may be different is when
+	 * a truncate is in progress.  The only things which change i_disksize
+	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
+	 */
+	loff_t	i_disksize;
+
+	/*
+	 * i_data_sem is for serialising ext4_truncate() against
+	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
+	 * data tree are chopped off during truncate. We can't do that in
+	 * ext4 because whenever we perform intermediate commits during
+	 * truncate, the inode and all the metadata blocks *must* be in a
+	 * consistent state which allows truncation of the orphans to restart
+	 * during recovery.  Hence we must fix the get_block-vs-truncate race
+	 * by other means, so we have i_data_sem.
+	 */
+	struct rw_semaphore i_data_sem;
+	struct inode vfs_inode;
+	struct jbd2_inode jinode;
+
+	struct ext4_ext_cache i_cached_extent;
+	/*
+	 * File creation time. Its function is same as that of
+	 * struct timespec i_{a,c,m}time in the generic inode.
+	 */
+	struct timespec i_crtime;
+
+	/* mballoc */
+	struct list_head i_prealloc_list;
+	spinlock_t i_prealloc_lock;
+
+	/* ialloc */
+	ext4_group_t	i_last_alloc_group;
+
+	/* allocation reservation info for delalloc */
+	unsigned int i_reserved_data_blocks;
+	unsigned int i_reserved_meta_blocks;
+	unsigned int i_allocated_meta_blocks;
+	unsigned short i_delalloc_reserved_flag;
+
+	/* on-disk additional length */
+	__u16 i_extra_isize;
+
+	spinlock_t i_block_reservation_lock;
+};
+
+/*
  * File system states
  */
 #define	EXT4_VALID_FS			0x0001	/* Unmounted cleanly */
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187..0000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- *  ext4_i.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_i.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-#ifndef _EXT4_I
-#define _EXT4_I
-
-#include <linux/rwsem.h>
-#include <linux/rbtree.h>
-#include <linux/seqlock.h>
-#include <linux/mutex.h>
-
-/* data type for block offset of block group */
-typedef int ext4_grpblk_t;
-
-/* data type for filesystem-wide blocks number */
-typedef unsigned long long ext4_fsblk_t;
-
-/* data type for file logical block number */
-typedef __u32 ext4_lblk_t;
-
-/* data type for block group number */
-typedef unsigned int ext4_group_t;
-
-/*
- * storage for cached extent
- */
-struct ext4_ext_cache {
-	ext4_fsblk_t	ec_start;
-	ext4_lblk_t	ec_block;
-	__u32		ec_len; /* must be 32bit to return holes */
-	__u32		ec_type;
-};
-
-/*
- * fourth extended file system inode data in memory
- */
-struct ext4_inode_info {
-	__le32	i_data[15];	/* unconverted */
-	__u32	i_flags;
-	ext4_fsblk_t	i_file_acl;
-	__u32	i_dtime;
-
-	/*
-	 * i_block_group is the number of the block group which contains
-	 * this file's inode.  Constant across the lifetime of the inode,
-	 * it is ued for making block allocation decisions - we try to
-	 * place a file's data blocks near its inode block, and new inodes
-	 * near to their parent directory's inode.
-	 */
-	ext4_group_t	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext4 */
-
-	ext4_lblk_t		i_dir_start_lookup;
-#ifdef CONFIG_EXT4_FS_XATTR
-	/*
-	 * Extended attributes can be read independently of the main file
-	 * data. Taking i_mutex even when reading would cause contention
-	 * between readers of EAs and writers of regular file data, so
-	 * instead we synchronize on xattr_sem when reading or changing
-	 * EAs.
-	 */
-	struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
-
-	struct list_head i_orphan;	/* unlinked but open inodes */
-
-	/*
-	 * i_disksize keeps track of what the inode size is ON DISK, not
-	 * in memory.  During truncate, i_size is set to the new size by
-	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
-	 * set i_disksize to 0 until the truncate is actually under way.
-	 *
-	 * The intent is that i_disksize always represents the blocks which
-	 * are used by this file.  This allows recovery to restart truncate
-	 * on orphans if we crash during truncate.  We actually write i_disksize
-	 * into the on-disk inode when writing inodes out, instead of i_size.
-	 *
-	 * The only time when i_disksize and i_size may be different is when
-	 * a truncate is in progress.  The only things which change i_disksize
-	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
-	 */
-	loff_t	i_disksize;
-
-	/*
-	 * i_data_sem is for serialising ext4_truncate() against
-	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
-	 * data tree are chopped off during truncate. We can't do that in
-	 * ext4 because whenever we perform intermediate commits during
-	 * truncate, the inode and all the metadata blocks *must* be in a
-	 * consistent state which allows truncation of the orphans to restart
-	 * during recovery.  Hence we must fix the get_block-vs-truncate race
-	 * by other means, so we have i_data_sem.
-	 */
-	struct rw_semaphore i_data_sem;
-	struct inode vfs_inode;
-	struct jbd2_inode jinode;
-
-	struct ext4_ext_cache i_cached_extent;
-	/*
-	 * File creation time. Its function is same as that of
-	 * struct timespec i_{a,c,m}time in the generic inode.
-	 */
-	struct timespec i_crtime;
-
-	/* mballoc */
-	struct list_head i_prealloc_list;
-	spinlock_t i_prealloc_lock;
-
-	/* ialloc */
-	ext4_group_t	i_last_alloc_group;
-
-	/* allocation reservation info for delalloc */
-	unsigned int i_reserved_data_blocks;
-	unsigned int i_reserved_meta_blocks;
-	unsigned int i_allocated_meta_blocks;
-	unsigned short i_delalloc_reserved_flag;
-
-	/* on-disk additional length */
-	__u16 i_extra_isize;
-
-	spinlock_t i_block_reservation_lock;
-};
-
-#endif	/* _EXT4_I */
-- 
cgit v0.10.2


From ca0faba0e8ac844dc0279825eb8db876b5962ea5 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 3 May 2009 16:33:44 -0400
Subject: ext4: Move the ext4_sb.h header file into ext4.h

There is no longer a reason for a separate ext4_sb.h header file, so
move it into ext4.h just to make life easier for developers to find
the relevant data structures and typedefs.  Should also speed up
compiles slightly, too.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ba57d66..af3c906 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -25,6 +25,10 @@
 #include <linux/rbtree.h>
 #include <linux/seqlock.h>
 #include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
 
 /*
  * The fourth extended filesystem constants/structures
@@ -195,9 +199,6 @@ struct flex_groups {
 #define EXT4_BG_BLOCK_UNINIT	0x0002 /* Block bitmap not in use */
 #define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */
 
-#ifdef __KERNEL__
-#include "ext4_sb.h"
-#endif
 /*
  * Macro-instructions used to manage group descriptors
  */
@@ -809,6 +810,136 @@ struct ext4_super_block {
 };
 
 #ifdef __KERNEL__
+/*
+ * fourth extended-fs super-block data in memory
+ */
+struct ext4_sb_info {
+	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
+	unsigned long s_inodes_per_block;/* Number of inodes per block */
+	unsigned long s_blocks_per_group;/* Number of blocks in a group */
+	unsigned long s_inodes_per_group;/* Number of inodes in a group */
+	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
+	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
+	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
+	ext4_group_t s_groups_count;	/* Number of groups in the fs */
+	unsigned long s_overhead_last;  /* Last calculated overhead */
+	unsigned long s_blocks_last;    /* Last seen block count */
+	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
+	struct buffer_head * s_sbh;	/* Buffer containing the super block */
+	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
+	struct buffer_head **s_group_desc;
+	unsigned long  s_mount_opt;
+	ext4_fsblk_t s_sb_block;
+	uid_t s_resuid;
+	gid_t s_resgid;
+	unsigned short s_mount_state;
+	unsigned short s_pad;
+	int s_addr_per_block_bits;
+	int s_desc_per_block_bits;
+	int s_inode_size;
+	int s_first_ino;
+	unsigned int s_inode_readahead_blks;
+	spinlock_t s_next_gen_lock;
+	u32 s_next_generation;
+	u32 s_hash_seed[4];
+	int s_def_hash_version;
+	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
+	struct percpu_counter s_freeblocks_counter;
+	struct percpu_counter s_freeinodes_counter;
+	struct percpu_counter s_dirs_counter;
+	struct percpu_counter s_dirtyblocks_counter;
+	struct blockgroup_lock *s_blockgroup_lock;
+	struct proc_dir_entry *s_proc;
+	struct kobject s_kobj;
+	struct completion s_kobj_unregister;
+
+	/* Journaling */
+	struct inode *s_journal_inode;
+	struct journal_s *s_journal;
+	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
+	unsigned long s_commit_interval;
+	u32 s_max_batch_time;
+	u32 s_min_batch_time;
+	struct block_device *journal_bdev;
+#ifdef CONFIG_JBD2_DEBUG
+	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
+	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+	int s_jquota_fmt;			/* Format of quota to use */
+#endif
+	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+
+#ifdef EXTENTS_STATS
+	/* ext4 extents stats */
+	unsigned long s_ext_min;
+	unsigned long s_ext_max;
+	unsigned long s_depth_max;
+	spinlock_t s_ext_stats_lock;
+	unsigned long s_ext_blocks;
+	unsigned long s_ext_extents;
+#endif
+
+	/* for buddy allocator */
+	struct ext4_group_info ***s_group_info;
+	struct inode *s_buddy_cache;
+	long s_blocks_reserved;
+	spinlock_t s_reserve_lock;
+	spinlock_t s_md_lock;
+	tid_t s_last_transaction;
+	unsigned short *s_mb_offsets;
+	unsigned int *s_mb_maxs;
+
+	/* tunables */
+	unsigned long s_stripe;
+	unsigned int s_mb_stream_request;
+	unsigned int s_mb_max_to_scan;
+	unsigned int s_mb_min_to_scan;
+	unsigned int s_mb_stats;
+	unsigned int s_mb_order2_reqs;
+	unsigned int s_mb_group_prealloc;
+	/* where last allocation was done - for stream allocation */
+	unsigned long s_mb_last_group;
+	unsigned long s_mb_last_start;
+
+	/* history to debug policy */
+	struct ext4_mb_history *s_mb_history;
+	int s_mb_history_cur;
+	int s_mb_history_max;
+	int s_mb_history_num;
+	spinlock_t s_mb_history_lock;
+	int s_mb_history_filter;
+
+	/* stats for buddy allocator */
+	spinlock_t s_mb_pa_lock;
+	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
+	atomic_t s_bal_success;	/* we found long enough chunks */
+	atomic_t s_bal_allocated;	/* in blocks */
+	atomic_t s_bal_ex_scanned;	/* total extents scanned */
+	atomic_t s_bal_goals;	/* goal hits */
+	atomic_t s_bal_breaks;	/* too long searches */
+	atomic_t s_bal_2orders;	/* 2^order hits */
+	spinlock_t s_bal_lock;
+	unsigned long s_mb_buddies_generated;
+	unsigned long long s_mb_generation_time;
+	atomic_t s_mb_lost_chunks;
+	atomic_t s_mb_preallocated;
+	atomic_t s_mb_discarded;
+
+	/* locality groups */
+	struct ext4_locality_group *s_locality_groups;
+
+	/* for write statistics */
+	unsigned long s_sectors_written_start;
+	u64 s_kbytes_written;
+
+	unsigned int s_log_groups_per_flex;
+	struct flex_groups *s_flex_groups;
+};
+
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -824,7 +955,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
 		current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
 }
 
-
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
@@ -833,6 +963,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 		(ino >= EXT4_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
+
+static inline spinlock_t *
+sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
+}
 #else
 /* Assume that user mode programs are passing in an ext4fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 2d36223..0000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- *  ext4_sb.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_sb.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-#ifndef _EXT4_SB
-#define _EXT4_SB
-
-#ifdef __KERNEL__
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/percpu_counter.h>
-#endif
-#include <linux/rbtree.h>
-
-/*
- * fourth extended-fs super-block data in memory
- */
-struct ext4_sb_info {
-	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
-	unsigned long s_inodes_per_block;/* Number of inodes per block */
-	unsigned long s_blocks_per_group;/* Number of blocks in a group */
-	unsigned long s_inodes_per_group;/* Number of inodes in a group */
-	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
-	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
-	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
-	ext4_group_t s_groups_count;	/* Number of groups in the fs */
-	unsigned long s_overhead_last;  /* Last calculated overhead */
-	unsigned long s_blocks_last;    /* Last seen block count */
-	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
-	struct buffer_head * s_sbh;	/* Buffer containing the super block */
-	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
-	struct buffer_head **s_group_desc;
-	unsigned long  s_mount_opt;
-	ext4_fsblk_t s_sb_block;
-	uid_t s_resuid;
-	gid_t s_resgid;
-	unsigned short s_mount_state;
-	unsigned short s_pad;
-	int s_addr_per_block_bits;
-	int s_desc_per_block_bits;
-	int s_inode_size;
-	int s_first_ino;
-	unsigned int s_inode_readahead_blks;
-	spinlock_t s_next_gen_lock;
-	u32 s_next_generation;
-	u32 s_hash_seed[4];
-	int s_def_hash_version;
-	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
-	struct percpu_counter s_freeblocks_counter;
-	struct percpu_counter s_freeinodes_counter;
-	struct percpu_counter s_dirs_counter;
-	struct percpu_counter s_dirtyblocks_counter;
-	struct blockgroup_lock *s_blockgroup_lock;
-	struct proc_dir_entry *s_proc;
-	struct kobject s_kobj;
-	struct completion s_kobj_unregister;
-
-	/* Journaling */
-	struct inode *s_journal_inode;
-	struct journal_s *s_journal;
-	struct list_head s_orphan;
-	struct mutex s_orphan_lock;
-	struct mutex s_resize_lock;
-	unsigned long s_commit_interval;
-	u32 s_max_batch_time;
-	u32 s_min_batch_time;
-	struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
-	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
-#endif
-#ifdef CONFIG_QUOTA
-	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
-	int s_jquota_fmt;			/* Format of quota to use */
-#endif
-	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
-
-#ifdef EXTENTS_STATS
-	/* ext4 extents stats */
-	unsigned long s_ext_min;
-	unsigned long s_ext_max;
-	unsigned long s_depth_max;
-	spinlock_t s_ext_stats_lock;
-	unsigned long s_ext_blocks;
-	unsigned long s_ext_extents;
-#endif
-
-	/* for buddy allocator */
-	struct ext4_group_info ***s_group_info;
-	struct inode *s_buddy_cache;
-	long s_blocks_reserved;
-	spinlock_t s_reserve_lock;
-	spinlock_t s_md_lock;
-	tid_t s_last_transaction;
-	unsigned short *s_mb_offsets;
-	unsigned int *s_mb_maxs;
-
-	/* tunables */
-	unsigned long s_stripe;
-	unsigned int s_mb_stream_request;
-	unsigned int s_mb_max_to_scan;
-	unsigned int s_mb_min_to_scan;
-	unsigned int s_mb_stats;
-	unsigned int s_mb_order2_reqs;
-	unsigned int s_mb_group_prealloc;
-	/* where last allocation was done - for stream allocation */
-	unsigned long s_mb_last_group;
-	unsigned long s_mb_last_start;
-
-	/* history to debug policy */
-	struct ext4_mb_history *s_mb_history;
-	int s_mb_history_cur;
-	int s_mb_history_max;
-	int s_mb_history_num;
-	spinlock_t s_mb_history_lock;
-	int s_mb_history_filter;
-
-	/* stats for buddy allocator */
-	spinlock_t s_mb_pa_lock;
-	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
-	atomic_t s_bal_success;	/* we found long enough chunks */
-	atomic_t s_bal_allocated;	/* in blocks */
-	atomic_t s_bal_ex_scanned;	/* total extents scanned */
-	atomic_t s_bal_goals;	/* goal hits */
-	atomic_t s_bal_breaks;	/* too long searches */
-	atomic_t s_bal_2orders;	/* 2^order hits */
-	spinlock_t s_bal_lock;
-	unsigned long s_mb_buddies_generated;
-	unsigned long long s_mb_generation_time;
-	atomic_t s_mb_lost_chunks;
-	atomic_t s_mb_preallocated;
-	atomic_t s_mb_discarded;
-
-	/* locality groups */
-	struct ext4_locality_group *s_locality_groups;
-
-	/* for write statistics */
-	unsigned long s_sectors_written_start;
-	u64 s_kbytes_written;
-
-	unsigned int s_log_groups_per_flex;
-	struct flex_groups *s_flex_groups;
-};
-
-static inline spinlock_t *
-sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
-{
-	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
-}
-
-#endif	/* _EXT4_SB */
-- 
cgit v0.10.2


From 596397b77c895d0fa3674f579c94ad5ea88ef01d Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 13:49:15 -0400
Subject: ext4: Move fs/ext4/namei.h into ext4.h

The fs/ext4/namei.h header file had only a single function
declaration, and should have never been a standalone file.  Move it
into ext4.h, where should have been from the beginning.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index af3c906..d9c5251 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1594,6 +1594,7 @@ extern const struct file_operations ext4_file_operations;
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
+extern struct dentry *ext4_get_parent(struct dentry *child);
 
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8018e49..c9690b2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
-#include "namei.h"
 #include "xattr.h"
 #include "acl.h"
 
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff..0000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*  linux/fs/ext4/namei.h
- *
- * Copyright (C) 2005 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
-*/
-
-extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1fbf090..d79e1c4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -46,7 +46,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "namei.h"
 #include "group.h"
 
 struct proc_dir_entry *ext4_proc_root;
-- 
cgit v0.10.2


From 6f0aced639d346e5f54eea9fcb2784b633493d09 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Fri, 1 May 2009 23:54:25 +0400
Subject: x86, apic: use pr_ macro

Replace recenly appeared printk with pr_ macro
(the file already use a lot of them).

[ Impact: cleanup ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090501195425.GB4633@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 28f747d..e258bed 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2191,7 +2191,7 @@ static int __cpuinit set_multi(const struct dmi_system_id *d)
 {
 	if (multi)
 		return 0;
-	printk(KERN_INFO "APIC: %s detected, Multi Chassis\n", d->ident);
+	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
 	multi = 1;
 	return 0;
 }
-- 
cgit v0.10.2


From bb23c20a851a5038b255a3c0d0aa56093c1da3f8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 May 2009 19:44:44 -0400
Subject: ext4: Move fs/ext4/group.h into ext4.h

Move the function prototypes in group.h into ext4.h so they are all
defined in one place.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index a5ba039..92f557d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
 #include <linux/buffer_head.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
-#include "group.h"
 #include "mballoc.h"
 
 /*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d9c5251..5973f32 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1270,6 +1270,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 						    ext4_group_t block_group,
 						    struct buffer_head ** bh);
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+				      ext4_group_t block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t group,
+				       struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)			\
+		ext4_init_block_bitmap(sb, NULL, group, desc)
 
 /* dir.c */
 extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1294,6 +1302,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
 extern unsigned long ext4_count_dirs(struct super_block *);
 extern void ext4_check_inodes_bitmap(struct super_block *);
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t group,
+				       struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 
 /* mballoc.c */
 extern long ext4_mb_stats;
@@ -1417,6 +1430,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
 				struct ext4_group_desc *bg, __u32 count);
 extern void ext4_itable_unused_set(struct super_block *sb,
 				   struct ext4_group_desc *bg, __u32 count);
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+				   struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+				       struct ext4_group_desc *gdp);
 
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d..0000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  linux/fs/ext4/group.h
- *
- * Copyright (C) 2007 Cluster File Systems, Inc
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#ifndef _LINUX_EXT4_GROUP_H
-#define _LINUX_EXT4_GROUP_H
-
-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
-				   struct ext4_group_desc *gdp);
-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
-				       struct ext4_group_desc *gdp);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
-				      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)			\
-		ext4_init_block_bitmap(sb, NULL, group, desc)
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
-#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 55ba419..916d05c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "group.h"
 
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd..75e34f6 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
-#include "group.h"
 
 /*
  * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e8ded13..27eb289 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 
 #include "ext4_jbd2.h"
-#include "group.h"
 
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d79e1c4..7903f20 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -46,7 +46,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "group.h"
 
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
-- 
cgit v0.10.2


From f40339031b04279c3fdde7ac5fe97db33b2a7694 Mon Sep 17 00:00:00 2001
From: Curt Wohlgemuth <curtw@google.com>
Date: Fri, 1 May 2009 20:27:20 -0400
Subject: ext4: Make the length of the mb_history file tunable

In memory-constrained systems with many partitions, the ~68K for each
partition for the mb_history buffer can be excessive.

This patch adds a new mount option, mb_history_length, as well as a
way of setting the default via a module parameter (or via a sysfs
parameter in /sys/module/ext4/parameter/default_mb_history_length).
If the mb_history_length is set to zero, the mb_history facility is
disabled entirely.

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dbd47ea..df75855 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2413,7 +2413,8 @@ static void ext4_mb_history_release(struct super_block *sb)
 
 	if (sbi->s_proc != NULL) {
 		remove_proc_entry("mb_groups", sbi->s_proc);
-		remove_proc_entry("mb_history", sbi->s_proc);
+		if (sbi->s_mb_history_max)
+			remove_proc_entry("mb_history", sbi->s_proc);
 	}
 	kfree(sbi->s_mb_history);
 }
@@ -2424,17 +2425,17 @@ static void ext4_mb_history_init(struct super_block *sb)
 	int i;
 
 	if (sbi->s_proc != NULL) {
-		proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
-				 &ext4_mb_seq_history_fops, sb);
+		if (sbi->s_mb_history_max)
+			proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
+					 &ext4_mb_seq_history_fops, sb);
 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
 				 &ext4_mb_seq_groups_fops, sb);
 	}
 
-	sbi->s_mb_history_max = 1000;
 	sbi->s_mb_history_cur = 0;
 	spin_lock_init(&sbi->s_mb_history_lock);
 	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-	sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
+	sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
 	/* if we can't allocate history, then we simple won't use it */
 }
 
@@ -2444,7 +2445,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
 	struct ext4_mb_history h;
 
-	if (unlikely(sbi->s_mb_history == NULL))
+	if (sbi->s_mb_history == NULL)
 		return;
 
 	if (!(ac->ac_op & sbi->s_mb_history_filter))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7903f20..39223a5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -47,6 +47,13 @@
 #include "xattr.h"
 #include "acl.h"
 
+static int default_mb_history_length = 1000;
+
+module_param_named(default_mb_history_length, default_mb_history_length,
+		   int, 0644);
+MODULE_PARM_DESC(default_mb_history_length,
+		 "Default number of entries saved for mb_history");
+
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
 
@@ -1042,7 +1049,7 @@ enum {
 	Opt_journal_update, Opt_journal_dev,
 	Opt_journal_checksum, Opt_journal_async_commit,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-	Opt_data_err_abort, Opt_data_err_ignore,
+	Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
@@ -1088,6 +1095,7 @@ static const match_table_t tokens = {
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_data_err_abort, "data_err=abort"},
 	{Opt_data_err_ignore, "data_err=ignore"},
+	{Opt_mb_history_length, "mb_history_length=%u"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1329,6 +1337,13 @@ static int parse_options(char *options, struct super_block *sb,
 		case Opt_data_err_ignore:
 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 			break;
+		case Opt_mb_history_length:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			sbi->s_mb_history_max = option;
+			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -2345,6 +2360,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+	sbi->s_mb_history_max = default_mb_history_length;
 
 	set_opt(sbi->s_mount_opt, BARRIER);
 
-- 
cgit v0.10.2


From abc8746eb91fb01e8d411896f80f7687c0d8372e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 2 May 2009 22:54:32 -0400
Subject: ext4: hook fiemap operation for directories

Add fiemap callback for directories

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c9690b2..f2bc160 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2534,6 +2534,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 	.removexattr	= generic_removexattr,
 #endif
 	.permission	= ext4_permission,
+	.fiemap         = ext4_fiemap,
 };
 
 const struct inode_operations ext4_special_inode_operations = {
-- 
cgit v0.10.2


From 19ba0559f9ce104171ab16706893ce01f03ef116 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 13 May 2009 18:12:05 -0400
Subject: vfs: Enable FS_IOC_FIEMAP and FIGETBSZ for all filetypes

The fiemap and get_blk_size ioctls should be enabled even for
directories.  So move it outisde file_ioctl.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 82d9c42..286f38d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 	switch (cmd) {
 	case FIBMAP:
 		return ioctl_fibmap(filp, p);
-	case FS_IOC_FIEMAP:
-		return ioctl_fiemap(filp, arg);
-	case FIGETBSZ:
-		return put_user(inode->i_sb->s_blocksize, p);
 	case FIONREAD:
 		return put_user(i_size_read(inode) - filp->f_pos, p);
 	}
@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		error = ioctl_fsthaw(filp);
 		break;
 
+	case FS_IOC_FIEMAP:
+		return ioctl_fiemap(filp, arg);
+
+	case FIGETBSZ:
+	{
+		struct inode *inode = filp->f_path.dentry->d_inode;
+		int __user *p = (int __user *)arg;
+		return put_user(inode->i_sb->s_blocksize, p);
+	}
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
-- 
cgit v0.10.2


From c9877b205f6ce7943bb95281342f4001cc1c00ec Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 1 May 2009 23:32:06 -0400
Subject: ext4: fix for fiemap last-block test

Carl Henrik Lunde reported and debugged this; the test for the
last allocated block was comparing bytes to blocks in this test:

	if (logical + length - 1 == EXT_MAX_BLOCK ||
	    ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
		flags |= FIEMAP_EXTENT_LAST;

so any extent which ended right at 4G was stopping the extent
walk.  Just replacing these values with the extent block &
length should fix it.

Also give blksize_bits a saner type, and reverse the order
of the tests to make the more likely case tested first.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reported-by: Carl Henrik Lunde <chlunde@ping.uio.no>
Tested-by: Carl Henrik Lunde <chlunde@ping.uio.no>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ea5c476..5f72952 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3196,7 +3196,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 		       void *data)
 {
 	struct fiemap_extent_info *fieinfo = data;
-	unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 	__u64	logical;
 	__u64	physical;
 	__u64	length;
@@ -3243,8 +3243,8 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 	 *
 	 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
 	 */
-	if (logical + length - 1 == EXT_MAX_BLOCK ||
-	    ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+	if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
+	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK)
 		flags |= FIEMAP_EXTENT_LAST;
 
 	error = fiemap_fill_next_extent(fieinfo, logical, physical,
-- 
cgit v0.10.2


From a25cbd045a2ffc42787d4dbcbb9c7118f5f42732 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Fri, 1 May 2009 14:45:46 +0900
Subject: clocksource: setup mult_orig in clocksource_enable()

Setup clocksource mult_orig in clocksource_enable().

Clocksource drivers can save power by using keeping the
device clock disabled while the clocksource is unused.

In practice this means that the enable() and disable()
callbacks perform clk_enable() and clk_disable().

The enable() callback may also use clk_get_rate() to get
the clock rate from the clock framework. This information
can then be used to calculate the shift and mult variables.

Currently the mult_orig variable is setup from mult at
registration time only. This is conflicting with the above
case since the clock is disabled and the mult variable is
not yet calculated at the time of registration.

Moving the mult_orig setup code to clocksource_enable()
allows us to both handle the common case with no enable()
callback and the mult-changed-after-enable() case.

[ Impact: allow dynamic clock source usage ]

Signed-off-by: Magnus Damm <damm@igel.co.jp>
LKML-Reference: <20090501054546.8193.10688.sendpatchset@rx1.opensource.se>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 5a40d14..c56457c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -288,7 +288,15 @@ static inline cycle_t clocksource_read(struct clocksource *cs)
  */
 static inline int clocksource_enable(struct clocksource *cs)
 {
-	return cs->enable ? cs->enable(cs) : 0;
+	int ret = 0;
+
+	if (cs->enable)
+		ret = cs->enable(cs);
+
+	/* save mult_orig on enable */
+	cs->mult_orig = cs->mult;
+
+	return ret;
 }
 
 /**
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ecfd7b5..80189f6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -402,9 +402,6 @@ int clocksource_register(struct clocksource *c)
 	unsigned long flags;
 	int ret;
 
-	/* save mult_orig on registration */
-	c->mult_orig = c->mult;
-
 	spin_lock_irqsave(&clocksource_lock, flags);
 	ret = clocksource_enqueue(c);
 	if (!ret)
-- 
cgit v0.10.2


From 7d27558c4138ac6b3684dea35c2f4379b940a7dd Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Fri, 1 May 2009 13:10:26 -0700
Subject: timekeeping: create arch_gettimeoffset infrastructure

Some arches don't supply their own clocksource. This is mainly the
case in architectures that get their inter-tick times by reading the
counter on their interval timer.  Since these timers wrap every tick,
they're not really useful as clocksources.  Wrapping them to act like
one is possible but not very efficient. So we provide a callout these
arches can implement for use with the jiffies clocksource to provide
finer then tick granular time.

[ Impact: ease the migration to generic time keeping ]

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/time.h b/include/linux/time.h
index 242f624..ea16c1a 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,6 +113,21 @@ struct timespec current_kernel_time(void);
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
 
+/* Some architectures do not supply their own clocksource.
+ * This is mainly the case in architectures that get their
+ * inter-tick times by reading the counter on their interval
+ * timer. Since these timers wrap every tick, they're not really
+ * useful as clocksources. Wrapping them to act like one is possible
+ * but not very efficient. So we provide a callout these arches
+ * can implement for use with the jiffies clocksource to provide
+ * finer then tick granular time.
+ */
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+extern u32 arch_gettimeoffset(void);
+#else
+static inline u32 arch_gettimeoffset(void) { return 0; }
+#endif
+
 extern void do_gettimeofday(struct timeval *tv);
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 687dff4..e97c50f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -77,6 +77,10 @@ static void clocksource_forward_now(void)
 	clock->cycle_last = cycle_now;
 
 	nsec = cyc2ns(clock, cycle_delta);
+
+	/* If arch requires, add in gettimeoffset() */
+	nsec += arch_gettimeoffset();
+
 	timespec_add_ns(&xtime, nsec);
 
 	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
@@ -111,6 +115,9 @@ void getnstimeofday(struct timespec *ts)
 		/* convert to nanoseconds: */
 		nsecs = cyc2ns(clock, cycle_delta);
 
+		/* If arch requires, add in gettimeoffset() */
+		nsecs += arch_gettimeoffset();
+
 	} while (read_seqretry(&xtime_lock, seq));
 
 	timespec_add_ns(ts, nsecs);
-- 
cgit v0.10.2


From 33f503c96c976fd585dedb76514ca6cb286e60d9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 2 May 2009 12:24:55 +0100
Subject: ASoC: Use a shared define for AC97 CODEC data formats

The AC97 wire format is completely fixed so CODECs don't have any choice
about the formats they accept but controllers accept a variety of data
formats and render them down onto the bus.  Have a shared define so all
the CODEC drivers will interoperate with any of our controller drivers.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 22b729f..ea07b4b 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -96,6 +96,9 @@ struct snd_pcm_substream;
 #define SND_SOC_CLOCK_IN		0
 #define SND_SOC_CLOCK_OUT		1
 
+#define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\
+                               SNDRV_PCM_FMTBIT_S32_LE)
+
 struct snd_soc_dai_ops;
 struct snd_soc_dai;
 struct snd_ac97_bus_ops;
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index b0d4af1..932299b 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -53,13 +53,13 @@ struct snd_soc_dai ac97_dai = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = STD_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "AC97 Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = STD_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &ac97_dai_ops,
 };
 EXPORT_SYMBOL_GPL(ac97_dai);
diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c
index ddb3b08..d7440a9 100644
--- a/sound/soc/codecs/ad1980.c
+++ b/sound/soc/codecs/ad1980.c
@@ -137,13 +137,13 @@ struct snd_soc_dai ad1980_dai = {
 		.channels_min = 2,
 		.channels_max = 6,
 		.rates = SNDRV_PCM_RATE_48000,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE, },
+		.formats = SND_SOC_STD_AC97_FMTS, },
 	.capture = {
 		.stream_name = "Capture",
 		.channels_min = 2,
 		.channels_max = 2,
 		.rates = SNDRV_PCM_RATE_48000,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE, },
+		.formats = SND_SOC_STD_AC97_FMTS, },
 };
 EXPORT_SYMBOL_GPL(ad1980_dai);
 
diff --git a/sound/soc/codecs/wm9705.c b/sound/soc/codecs/wm9705.c
index c2d1a7a..fa88b46 100644
--- a/sound/soc/codecs/wm9705.c
+++ b/sound/soc/codecs/wm9705.c
@@ -282,14 +282,14 @@ struct snd_soc_dai wm9705_dai[] = {
 			.channels_min = 1,
 			.channels_max = 2,
 			.rates = WM9705_AC97_RATES,
-			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.formats = SND_SOC_STD_AC97_FMTS,
 		},
 		.capture = {
 			.stream_name = "HiFi Capture",
 			.channels_min = 1,
 			.channels_max = 2,
 			.rates = WM9705_AC97_RATES,
-			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.formats = SND_SOC_STD_AC97_FMTS,
 		},
 		.ops = &wm9705_dai_ops,
 	},
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 765cf1e..550c903 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -534,13 +534,13 @@ struct snd_soc_dai wm9712_dai[] = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "HiFi Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9712_dai_ops_hifi,
 },
 {
@@ -550,7 +550,7 @@ struct snd_soc_dai wm9712_dai[] = {
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9712_dai_ops_aux,
 }
 };
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index a6feb784..d1744e9 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -1040,13 +1040,13 @@ struct snd_soc_dai wm9713_dai[] = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "HiFi Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9713_dai_ops_hifi,
 	},
 	{
@@ -1056,7 +1056,7 @@ struct snd_soc_dai wm9713_dai[] = {
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9713_dai_ops_aux,
 	},
 	{
-- 
cgit v0.10.2


From 4072604b9dd18f25a98cc0f4d3d4553ed1ad4152 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 2 May 2009 12:28:25 +0100
Subject: ASoC: Remove unused DAI format defines

The defines for TDM and synchronous clocks are not used - they are
mostly a legacy of the automatic clocking configuration.  TDM will
require configuration of the number of timeslots and which ones to use
so can't be fit into the DAI format and synchronous mode is handled by
symmetric_rates (and needs to be done by constraints rather than when
the DAI format is being configured).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index ea07b4b..a997c2c 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -45,24 +45,6 @@ struct snd_pcm_substream;
 #define SND_SOC_DAIFMT_GATED		(1 << 4) /* clock is gated */
 
 /*
- * DAI Left/Right Clocks.
- *
- * Specifies whether the DAI can support different samples for similtanious
- * playback and capture. This usually requires a seperate physical frame
- * clock for playback and capture.
- */
-#define SND_SOC_DAIFMT_SYNC		(0 << 5) /* Tx FRM = Rx FRM */
-#define SND_SOC_DAIFMT_ASYNC		(1 << 5) /* Tx FRM ~ Rx FRM */
-
-/*
- * TDM
- *
- * Time Division Multiplexing. Allows PCM data to be multplexed with other
- * data on the DAI.
- */
-#define SND_SOC_DAIFMT_TDM		(1 << 6)
-
-/*
  * DAI hardware signal inversions.
  *
  * Specifies whether the DAI can also support inverted clocks for the specified
-- 
cgit v0.10.2


From eefd7f03b86b8a319890e7fac5a6fcc7f8694b76 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 2 May 2009 19:05:37 -0400
Subject: ext4: fix the length returned by fiemap for an unallocated extent

If the file's blocks have not yet been allocated because of delayed
allocation, the length of the extent returned by fiemap is incorrect.
This commit fixes this bug.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5f72952..4fec6b7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3244,8 +3244,15 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 	 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
 	 */
 	if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK)
+	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
+		loff_t size = i_size_read(inode);
+		loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
+
 		flags |= FIEMAP_EXTENT_LAST;
+		if ((flags & FIEMAP_EXTENT_DELALLOC) &&
+		    logical+length > size)
+			length = (size - logical + bs - 1) & ~(bs-1);
+	}
 
 	error = fiemap_fill_next_extent(fieinfo, logical, physical,
 					length, flags);
-- 
cgit v0.10.2


From 955ce5f5be67dfe0d1d096b543af33fe8a1ce3dd Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 2 May 2009 20:35:09 -0400
Subject: ext4: Convert ext4_lock_group to use sb_bgl_lock

We have sb_bgl_lock() and ext4_group_info.bb_state
bit spinlock to protech group information. The later is only
used within mballoc code. Consolidate them to use sb_bgl_lock().
This makes the mballoc.c code much simpler and also avoid
confusion with two locks protecting same info.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 92f557d..e2126d7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 	down_write(&grp->alloc_sem);
 	for (i = 0, blocks_freed = 0; i < count; i++) {
 		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+		if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
 						bit + i, bitmap_bh->b_data)) {
 			ext4_error(sb, __func__,
 				   "bit already cleared for block %llu",
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 			blocks_freed++;
 		}
 	}
-	spin_lock(sb_bgl_lock(sbi, block_group));
+	ext4_lock_group(sb, block_group);
 	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
 	ext4_free_blks_set(sb, desc, blk_free_count);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	ext4_unlock_group(sb, block_group);
 	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
 
 	if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5973f32..149e02d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -963,12 +963,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 		(ino >= EXT4_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
-
-static inline spinlock_t *
-sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
-{
-	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
-}
 #else
 /* Assume that user mode programs are passing in an ext4fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
@@ -1568,33 +1562,31 @@ struct ext4_group_info {
 };
 
 #define EXT4_GROUP_INFO_NEED_INIT_BIT	0
-#define EXT4_GROUP_INFO_LOCKED_BIT	1
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
 
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
+					      ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+	return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
+}
 
-	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+{
+	spin_lock(ext4_group_lock_ptr(sb, group));
 }
 
 static inline void ext4_unlock_group(struct super_block *sb,
 					ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+	spin_unlock(ext4_group_lock_ptr(sb, group));
 }
 
 static inline int ext4_is_group_locked(struct super_block *sb,
 					ext4_group_t group)
 {
-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-						&(grinfo->bb_state));
+	return spin_is_locked(ext4_group_lock_ptr(sb, group));
 }
 
 /*
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 916d05c..82f7d1d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -122,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		return bh;
 	}
-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+	ext4_unlock_group(sb, block_group);
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -246,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	spin_lock(sb_bgl_lock(sbi, block_group));
-	cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
+					bit, bitmap_bh->b_data);
 	if (!cleared)
 		ext4_error(sb, "ext4_free_inode",
 			   "bit already cleared for inode %lu", ino);
@@ -260,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 		if (fatal) goto error_return;
 
 		if (gdp) {
-			spin_lock(sb_bgl_lock(sbi, block_group));
+			ext4_lock_group(sb, block_group);
 			count = ext4_free_inodes_count(sb, gdp) + 1;
 			ext4_free_inodes_set(sb, gdp, count);
 			if (is_directory) {
@@ -276,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 			}
 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
 							block_group, gdp);
-			spin_unlock(sb_bgl_lock(sbi, block_group));
+			ext4_unlock_group(sb, block_group);
 			percpu_counter_inc(&sbi->s_freeinodes_counter);
 			if (is_directory)
 				percpu_counter_dec(&sbi->s_dirs_counter);
@@ -707,10 +706,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 
 /*
  * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's sb_bgl_lock
+ * is uninit we need to take the groups's ext4_group_lock
  * and clear the uninit flag. The inode bitmap update
  * and group desc uninit flag clear should be done
- * after holding sb_bgl_lock so that ext4_read_inode_bitmap
+ * after holding ext4_group_lock so that ext4_read_inode_bitmap
  * doesn't race with the ext4_claim_inode
  */
 static int ext4_claim_inode(struct super_block *sb,
@@ -721,7 +720,7 @@ static int ext4_claim_inode(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 
-	spin_lock(sb_bgl_lock(sbi, group));
+	ext4_lock_group(sb, group);
 	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
 		/* not a free inode */
 		retval = 1;
@@ -730,7 +729,7 @@ static int ext4_claim_inode(struct super_block *sb,
 	ino++;
 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
 			ino > EXT4_INODES_PER_GROUP(sb)) {
-		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_unlock_group(sb, group);
 		ext4_error(sb, __func__,
 			   "reserved inode or inode > inodes count - "
 			   "block_group = %u, inode=%lu", group,
@@ -779,7 +778,7 @@ static int ext4_claim_inode(struct super_block *sb,
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 err_ret:
-	spin_unlock(sb_bgl_lock(sbi, group));
+	ext4_unlock_group(sb, group);
 	return retval;
 }
 
@@ -935,7 +934,7 @@ got:
 		}
 
 		free = 0;
-		spin_lock(sb_bgl_lock(sbi, group));
+		ext4_lock_group(sb, group);
 		/* recheck and clear flag under lock if we still need to */
 		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -944,7 +943,7 @@ got:
 			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
 								gdp);
 		}
-		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_unlock_group(sb, group);
 
 		/* Don't need to dirty bitmap block if we didn't change it */
 		if (free) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index df75855..e76459c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
 	ext4_set_bit(bit, addr);
 }
 
-static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_set_bit_atomic(lock, bit, addr);
-}
-
 static inline void mb_clear_bit(int bit, void *addr)
 {
 	addr = mb_correct_addr_and_bit(&bit, addr);
 	ext4_clear_bit(bit, addr);
 }
 
-static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-	addr = mb_correct_addr_and_bit(&bit, addr);
-	ext4_clear_bit_atomic(lock, bit, addr);
-}
-
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
 	int fix = 0, ret, tmpmax;
@@ -803,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 			unlock_buffer(bh[i]);
 			continue;
 		}
-		spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+		ext4_lock_group(sb, first_group + i);
 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			ext4_init_block_bitmap(sb, bh[i],
 						first_group + i, desc);
 			set_bitmap_uptodate(bh[i]);
 			set_buffer_uptodate(bh[i]);
-			spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+			ext4_unlock_group(sb, first_group + i);
 			unlock_buffer(bh[i]);
 			continue;
 		}
-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+		ext4_unlock_group(sb, first_group + i);
 		if (buffer_uptodate(bh[i])) {
 			/*
 			 * if not uninit if bh is uptodate,
@@ -1080,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
 	return 0;
 }
 
-static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_clear_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1093,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_clear_bit_atomic(lock, cur, bm);
-		else
-			mb_clear_bit(cur, bm);
+		mb_clear_bit(cur, bm);
 		cur++;
 	}
 }
 
-static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_set_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1114,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
 			cur += 32;
 			continue;
 		}
-		if (lock)
-			mb_set_bit_atomic(lock, cur, bm);
-		else
-			mb_set_bit(cur, bm);
+		mb_set_bit(cur, bm);
 		cur++;
 	}
 }
@@ -1332,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 		e4b->bd_info->bb_counters[ord]++;
 	}
 
-	mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
-			EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
 	mb_check_buddy(e4b);
 
 	return ret;
@@ -2756,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	return 0;
 }
 
-/* need to called with ext4 group lock (ext4_lock_group) */
+/* need to called with the ext4 group lock held */
 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
 	struct ext4_prealloc_space *pa;
@@ -2993,14 +2974,17 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		 * Fix the bitmap and repeat the block allocation
 		 * We leak some of the blocks here.
 		 */
-		mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
-				bitmap_bh->b_data, ac->ac_b_ex.fe_start,
-				ac->ac_b_ex.fe_len);
+		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+			    ac->ac_b_ex.fe_len);
+		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		if (!err)
 			err = -EAGAIN;
 		goto out_err;
 	}
+
+	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
 #ifdef AGGRESSIVE_CHECK
 	{
 		int i;
@@ -3010,9 +2994,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		}
 	}
 #endif
-	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	mb_set_bits(NULL, bitmap_bh->b_data,
-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
+	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_blks_set(sb, gdp,
@@ -3022,7 +3004,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
 	ext4_free_blks_set(sb, gdp, len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+
+	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
 	/*
 	 * Now reduce the dirty block count also. Should not go negative
@@ -3455,7 +3438,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
  * the function goes through all block freed in the group
  * but not yet committed and marks them used in in-core bitmap.
  * buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with the ext4 group lock held
  */
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 						ext4_group_t group)
@@ -3469,9 +3452,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, node);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-				bitmap, entry->start_blk,
-				entry->count);
+		mb_set_bits(bitmap, entry->start_blk, entry->count);
 		n = rb_next(n);
 	}
 	return;
@@ -3480,7 +3461,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 /*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with ext4 group lock held
  */
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
@@ -3512,8 +3493,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 		if (unlikely(len == 0))
 			continue;
 		BUG_ON(groupnr != group);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-						bitmap, start, len);
+		mb_set_bits(bitmap, start, len);
 		preallocated += len;
 		count++;
 	}
@@ -4856,29 +4836,25 @@ do_more:
 		new_entry->group  = block_group;
 		new_entry->count = count;
 		new_entry->t_tid = handle->h_transaction->t_tid;
+
 		ext4_lock_group(sb, block_group);
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		ext4_mb_free_metadata(handle, &e4b, new_entry);
-		ext4_unlock_group(sb, block_group);
 	} else {
-		ext4_lock_group(sb, block_group);
 		/* need to update group_info->bb_free and bitmap
 		 * with group lock held. generate_buddy look at
 		 * them with group lock_held
 		 */
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
+		ext4_lock_group(sb, block_group);
+		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		mb_free_blocks(inode, &e4b, bit, count);
 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-		ext4_unlock_group(sb, block_group);
 	}
 
-	spin_lock(sb_bgl_lock(sbi, block_group));
 	ret = ext4_free_blks_count(sb, gdp) + count;
 	ext4_free_blks_set(sb, gdp, ret);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
+	ext4_unlock_group(sb, block_group);
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
 	if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 39223a5..dc34ed3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1784,18 +1784,18 @@ static int ext4_check_descriptors(struct super_block *sb)
 			       "(block %llu)!\n", i, inode_table);
 			return 0;
 		}
-		spin_lock(sb_bgl_lock(sbi, i));
+		ext4_lock_group(sb, i);
 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
 			       "Checksum for group %u failed (%u!=%u)\n",
 			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
 			       gdp)), le16_to_cpu(gdp->bg_checksum));
 			if (!(sb->s_flags & MS_RDONLY)) {
-				spin_unlock(sb_bgl_lock(sbi, i));
+				ext4_unlock_group(sb, i);
 				return 0;
 			}
 		}
-		spin_unlock(sb_bgl_lock(sbi, i));
+		ext4_unlock_group(sb, i);
 		if (!flexbg_flag)
 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
 	}
-- 
cgit v0.10.2


From dab6f6a3401f596fe934f41fc5da3f401adfdfb1 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Sat, 2 May 2009 08:02:36 +0200
Subject: perf_counter tools: fix build error

ctype.h crawled out of the bit bucket :)

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 3a3deb3..ddfdcf8 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -15,7 +15,6 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include <errno.h>
-#include <ctype.h>
 #include <time.h>
 #include <sched.h>
 #include <pthread.h>
-- 
cgit v0.10.2


From a454ab3110175d710f4f9a96226a26ce4d5d5de2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 3 May 2009 10:09:03 +0200
Subject: x86, mm: fault.c, use printk_once() in is_errata93()

Andrew pointed out that the 'once' variable has a needlessly
function-global scope. We can in fact eliminate it completely,
via the use of printk_once().

[ Impact: cleanup ]

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 24a36a6..b9ca6d7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -514,8 +514,6 @@ bad:
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_64
-	static int once;
-
 	if (address != regs->ip)
 		return 0;
 
@@ -525,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	address |= 0xffffffffUL << 32;
 	if ((address >= (u64)_stext && address <= (u64)_etext) ||
 	    (address >= MODULES_VADDR && address <= MODULES_END)) {
-		if (!once) {
-			printk(errata93_warning);
-			once = 1;
-		}
+		printk_once(errata93_warning);
 		regs->ip = address;
 		return 1;
 	}
-- 
cgit v0.10.2


From 0cd5f7b0c7fd96f9f00187ef2ad2328de28ae326 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 2 May 2009 20:00:44 +0000
Subject: sh: remove obsolete no_irq_type

Impact: cleanup

convert the last remaining users to no_irq_chip

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 528de29..2d868c6 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -19,13 +19,12 @@
 #include <asm/ftrace.h>
 
 extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
-extern struct hw_interrupt_type no_irq_type;
 
 /* platform dependent support */
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(irq_desc);
-EXPORT_SYMBOL(no_irq_type);
+EXPORT_SYMBOL(no_irq_chip);
 
 EXPORT_SYMBOL(strlen);
 
-- 
cgit v0.10.2


From d80498398276ca8eee7ebdbe0d47e06d01317439 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 2 May 2009 20:01:20 +0000
Subject: sh: remove obsolete hw_interrupt_type

Impact: cleanup

Convert the last remaining users to struct irq_chip and remove the
define.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/irq/imask.c b/arch/sh/kernel/cpu/irq/imask.c
index 301b505..f43753b 100644
--- a/arch/sh/kernel/cpu/irq/imask.c
+++ b/arch/sh/kernel/cpu/irq/imask.c
@@ -39,7 +39,7 @@ static unsigned int startup_imask_irq(unsigned int irq)
 	return 0; /* never anything pending */
 }
 
-static struct hw_interrupt_type imask_irq_type = {
+static struct irq_chip imask_irq_type = {
 	.typename = "SR.IMASK",
 	.startup = startup_imask_irq,
 	.shutdown = shutdown_imask_irq,
diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c
index 726f033..d8679a4 100644
--- a/arch/sh/kernel/cpu/irq/intc-sh5.c
+++ b/arch/sh/kernel/cpu/irq/intc-sh5.c
@@ -84,7 +84,7 @@ static void disable_intc_irq(unsigned int irq);
 static void mask_and_ack_intc(unsigned int);
 static void end_intc_irq(unsigned int irq);
 
-static struct hw_interrupt_type intc_irq_type = {
+static struct irq_chip intc_irq_type = {
 	.typename = "INTC",
 	.startup = startup_intc_irq,
 	.shutdown = shutdown_intc_irq,
-- 
cgit v0.10.2


From 7563431107f6debf57c1dbecfb9498cf31a1c036 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Fri, 1 May 2009 13:10:28 -0700
Subject: time: sh: convert to use arch_getoffset() infrastructure

Convert sh to use GENERIC_TIME via the arch_getoffset() infrastructure.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 7e96ade..6f91478 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -75,7 +75,7 @@ config GENERIC_IOMAP
 	bool
 
 config GENERIC_TIME
-	def_bool n
+	def_bool y
 
 config GENERIC_CLOCKEVENTS
 	def_bool n
diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
index 8f80a55..9c968d1 100644
--- a/arch/sh/include/asm/timer.h
+++ b/arch/sh/include/asm/timer.h
@@ -9,7 +9,7 @@ struct sys_timer_ops {
 	int (*init)(void);
 	int (*start)(void);
 	int (*stop)(void);
-#ifndef CONFIG_GENERIC_TIME
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	unsigned long (*get_offset)(void);
 #endif
 };
@@ -26,7 +26,7 @@ struct sys_timer {
 extern struct sys_timer tmu_timer, mtu2_timer;
 extern struct sys_timer *sys_timer;
 
-#ifndef CONFIG_GENERIC_TIME
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 static inline unsigned long get_timer_offset(void)
 {
 	return sys_timer->ops->get_offset();
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index 4573321..9d34dff 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -83,65 +83,12 @@ static int __init rtc_generic_init(void)
 }
 module_init(rtc_generic_init);
 
-#ifndef CONFIG_GENERIC_TIME
-void do_gettimeofday(struct timeval *tv)
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+u32 arch_gettimeoffset(void)
 {
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-
-	do {
-		/*
-		 * Turn off IRQs when grabbing xtime_lock, so that
-		 * the sys_timer get_offset code doesn't have to handle it.
-		 */
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		usec = get_timer_offset();
-		sec = xtime.tv_sec;
-		usec += xtime.tv_nsec / NSEC_PER_USEC;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= get_timer_offset() * NSEC_PER_USEC;
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-
-	return 0;
+	return get_timer_offset() * 1000;
 }
-EXPORT_SYMBOL(do_settimeofday);
-#endif /* !CONFIG_GENERIC_TIME */
+#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */
 
 /* last time the RTC clock got updated */
 static long last_rtc_update;
@@ -238,7 +185,7 @@ struct clocksource clocksource_sh = {
 	.name		= "SuperH",
 };
 
-#ifdef CONFIG_GENERIC_TIME
+#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 unsigned long long sched_clock(void)
 {
 	unsigned long long cycles;
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
index 988c77c..f4f5e8a 100644
--- a/arch/sh/kernel/time_64.c
+++ b/arch/sh/kernel/time_64.c
@@ -144,59 +144,10 @@ static unsigned long usecs_since_tick(void)
 	return result;
 }
 
-void do_gettimeofday(struct timeval *tv)
+u32 arch_gettimeoffset(void)
 {
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		usec = usecs_since_tick();
-		sec = xtime.tv_sec;
-		usec += xtime.tv_nsec / 1000;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= 1000 * usecs_since_tick();
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-
-	return 0;
+	return usecs_since_tick() * 1000;
 }
-EXPORT_SYMBOL(do_settimeofday);
 
 /* Dummy RTC ops */
 static void null_rtc_get_time(struct timespec *tv)
diff --git a/arch/sh/kernel/timers/timer-mtu2.c b/arch/sh/kernel/timers/timer-mtu2.c
index 9b0ef01..8a1dcc2 100644
--- a/arch/sh/kernel/timers/timer-mtu2.c
+++ b/arch/sh/kernel/timers/timer-mtu2.c
@@ -191,7 +191,7 @@ struct sys_timer_ops mtu2_timer_ops = {
 	.init		= mtu2_timer_init,
 	.start		= mtu2_timer_start,
 	.stop		= mtu2_timer_stop,
-#ifndef CONFIG_GENERIC_TIME
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	.get_offset	= mtu2_timer_get_offset,
 #endif
 };
-- 
cgit v0.10.2


From d5ed4c2e5ce9f5f6fd6a5a39ee1196a1f8a46eed Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 07:02:49 +0000
Subject: clocksource: SuperH MTU2 Timer driver

This patch adds a MTU2 driver for the SuperH architecture.

The MTU2 driver is a platform driver with early platform
support to allow using a MTU2 channel as only clockevent
during system bootup.

Clocksource on sh2a is currently unsupported due to code
generation issues with 64-bit math, so at this point only
periodic clockevent support is in place.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6f91478..6d0dd37 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -113,6 +113,9 @@ config SYS_SUPPORTS_PCI
 config SYS_SUPPORTS_CMT
 	bool
 
+config SYS_SUPPORTS_MTU2
+	bool
+
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -478,6 +481,14 @@ config SH_MTU2
 	help
 	  This enables the use of the MTU2 as the system timer.
 
+config SH_TIMER_MTU2
+	bool "MTU2 timer driver"
+	depends on SYS_SUPPORTS_MTU2 && !SH_MTU2
+	default y
+	select GENERIC_CLOCKEVENTS
+	help
+	  This enables build of the MTU2 timer driver.
+
 config SH_TIMER_IRQ
 	int
 	default "28" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 1efb287..9785586 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_X86_CYCLONE_TIMER)	+= cyclone.o
 obj-$(CONFIG_X86_PM_TIMER)	+= acpi_pm.o
 obj-$(CONFIG_SCx200HR_TIMER)	+= scx200_hrt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
+obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
new file mode 100644
index 0000000..420566f
--- /dev/null
+++ b/drivers/clocksource/sh_mtu2.c
@@ -0,0 +1,357 @@
+/*
+ * SuperH Timer Support - MTU2
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/clockchips.h>
+#include <linux/sh_mtu2.h>
+
+struct sh_mtu2_priv {
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct irqaction irqaction;
+	struct platform_device *pdev;
+	unsigned long rate;
+	unsigned long periodic;
+	struct clock_event_device ced;
+};
+
+static DEFINE_SPINLOCK(sh_mtu2_lock);
+
+#define TSTR -1 /* shared register */
+#define TCR  0 /* channel register */
+#define TMDR 1 /* channel register */
+#define TIOR 2 /* channel register */
+#define TIER 3 /* channel register */
+#define TSR  4 /* channel register */
+#define TCNT 5 /* channel register */
+#define TGR  6 /* channel register */
+
+static unsigned long mtu2_reg_offs[] = {
+	[TCR] = 0,
+	[TMDR] = 1,
+	[TIOR] = 2,
+	[TIER] = 4,
+	[TSR] = 5,
+	[TCNT] = 6,
+	[TGR] = 8,
+};
+
+static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR)
+		return ioread8(base + cfg->channel_offset);
+
+	offs = mtu2_reg_offs[reg_nr];
+
+	if ((reg_nr == TCNT) || (reg_nr == TGR))
+		return ioread16(base + offs);
+	else
+		return ioread8(base + offs);
+}
+
+static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
+				unsigned long value)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR) {
+		iowrite8(value, base + cfg->channel_offset);
+		return;
+	}
+
+	offs = mtu2_reg_offs[reg_nr];
+
+	if ((reg_nr == TCNT) || (reg_nr == TGR))
+		iowrite16(value, base + offs);
+	else
+		iowrite8(value, base + offs);
+}
+
+static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	unsigned long flags, value;
+
+	/* start stop register shared by multiple timer channels */
+	spin_lock_irqsave(&sh_mtu2_lock, flags);
+	value = sh_mtu2_read(p, TSTR);
+
+	if (start)
+		value |= 1 << cfg->timer_bit;
+	else
+		value &= ~(1 << cfg->timer_bit);
+
+	sh_mtu2_write(p, TSTR, value);
+	spin_unlock_irqrestore(&sh_mtu2_lock, flags);
+}
+
+static int sh_mtu2_enable(struct sh_mtu2_priv *p)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		pr_err("sh_mtu2: cannot enable clock \"%s\"\n", cfg->clk);
+		return ret;
+	}
+
+	/* make sure channel is disabled */
+	sh_mtu2_start_stop_ch(p, 0);
+
+	p->rate = clk_get_rate(p->clk) / 64;
+	p->periodic = (p->rate + HZ/2) / HZ;
+
+	/* "Periodic Counter Operation" */
+	sh_mtu2_write(p, TCR, 0x23); /* TGRA clear, divide clock by 64 */
+	sh_mtu2_write(p, TIOR, 0);
+	sh_mtu2_write(p, TGR, p->periodic);
+	sh_mtu2_write(p, TCNT, 0);
+	sh_mtu2_write(p, TMDR, 0);
+	sh_mtu2_write(p, TIER, 0x01);
+
+	/* enable channel */
+	sh_mtu2_start_stop_ch(p, 1);
+
+	return 0;
+}
+
+static void sh_mtu2_disable(struct sh_mtu2_priv *p)
+{
+	/* disable channel */
+	sh_mtu2_start_stop_ch(p, 0);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static irqreturn_t sh_mtu2_interrupt(int irq, void *dev_id)
+{
+	struct sh_mtu2_priv *p = dev_id;
+
+	/* acknowledge interrupt */
+	sh_mtu2_read(p, TSR);
+	sh_mtu2_write(p, TSR, 0xfe);
+
+	/* notify clockevent layer */
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static struct sh_mtu2_priv *ced_to_sh_mtu2(struct clock_event_device *ced)
+{
+	return container_of(ced, struct sh_mtu2_priv, ced);
+}
+
+static void sh_mtu2_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct sh_mtu2_priv *p = ced_to_sh_mtu2(ced);
+	int disabled = 0;
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		sh_mtu2_disable(p);
+		disabled = 1;
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		pr_info("sh_mtu2: %s used for periodic clock events\n",
+			ced->name);
+		sh_mtu2_enable(p);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+		if (!disabled)
+			sh_mtu2_disable(p);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+}
+
+static void sh_mtu2_register_clockevent(struct sh_mtu2_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clock_event_device *ced = &p->ced;
+	int ret;
+
+	memset(ced, 0, sizeof(*ced));
+
+	ced->name = name;
+	ced->features = CLOCK_EVT_FEAT_PERIODIC;
+	ced->rating = rating;
+	ced->cpumask = cpumask_of(0);
+	ced->set_mode = sh_mtu2_clock_event_mode;
+
+	ret = setup_irq(p->irqaction.irq, &p->irqaction);
+	if (ret) {
+		pr_err("sh_mtu2: failed to request irq %d\n",
+		       p->irqaction.irq);
+		return;
+	}
+
+	pr_info("sh_mtu2: %s used for clock events\n", ced->name);
+	clockevents_register_device(ced);
+}
+
+int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
+		     unsigned long clockevent_rating)
+{
+	if (clockevent_rating)
+		sh_mtu2_register_clockevent(p, name, clockevent_rating);
+
+	return 0;
+}
+
+static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
+{
+	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	struct resource *res;
+	int irq, ret;
+	ret = -ENXIO;
+
+	memset(p, 0, sizeof(*p));
+	p->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&p->pdev->dev, "missing platform data\n");
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&p->pdev->dev, "failed to get I/O memory\n");
+		goto err0;
+	}
+
+	irq = platform_get_irq(p->pdev, 0);
+	if (irq < 0) {
+		dev_err(&p->pdev->dev, "failed to get irq\n");
+		goto err0;
+	}
+
+	/* map memory, let mapbase point to our channel */
+	p->mapbase = ioremap_nocache(res->start, resource_size(res));
+	if (p->mapbase == NULL) {
+		pr_err("sh_mtu2: failed to remap I/O memory\n");
+		goto err0;
+	}
+
+	/* setup data for setup_irq() (too early for request_irq()) */
+	p->irqaction.name = cfg->name;
+	p->irqaction.handler = sh_mtu2_interrupt;
+	p->irqaction.dev_id = p;
+	p->irqaction.irq = irq;
+	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL;
+	p->irqaction.mask = CPU_MASK_NONE;
+
+	/* get hold of clock */
+	p->clk = clk_get(&p->pdev->dev, cfg->clk);
+	if (IS_ERR(p->clk)) {
+		pr_err("sh_mtu2: cannot get clock \"%s\"\n", cfg->clk);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	return sh_mtu2_register(p, cfg->name, cfg->clockevent_rating);
+ err1:
+	iounmap(p->mapbase);
+ err0:
+	return ret;
+}
+
+static int __devinit sh_mtu2_probe(struct platform_device *pdev)
+{
+	struct sh_mtu2_priv *p = platform_get_drvdata(pdev);
+	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	int ret;
+
+	if (p) {
+		pr_info("sh_mtu2: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	ret = sh_mtu2_setup(p, pdev);
+	if (ret) {
+		kfree(p);
+		platform_set_drvdata(pdev, NULL);
+	}
+	return ret;
+}
+
+static int __devexit sh_mtu2_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent */
+}
+
+static struct platform_driver sh_mtu2_device_driver = {
+	.probe		= sh_mtu2_probe,
+	.remove		= __devexit_p(sh_mtu2_remove),
+	.driver		= {
+		.name	= "sh_mtu2",
+	}
+};
+
+static int __init sh_mtu2_init(void)
+{
+	return platform_driver_register(&sh_mtu2_device_driver);
+}
+
+static void __exit sh_mtu2_exit(void)
+{
+	platform_driver_unregister(&sh_mtu2_device_driver);
+}
+
+early_platform_init("earlytimer", &sh_mtu2_device_driver);
+module_init(sh_mtu2_init);
+module_exit(sh_mtu2_exit);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("SuperH MTU2 Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h
new file mode 100644
index 0000000..a988763
--- /dev/null
+++ b/include/linux/sh_mtu2.h
@@ -0,0 +1,12 @@
+#ifndef __SH_MTU2_H__
+#define __SH_MTU2_H__
+
+struct sh_mtu2_config {
+	char *name;
+	int channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+};
+
+#endif /* __SH_MTU2_H__ */
-- 
cgit v0.10.2


From da107c6ef919b3afd9c9b405a4f71e03b5725b04 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 07:06:26 +0000
Subject: sh: sh2a MTU2 platform data

This patch adds MTU2 platform data for the following cpus:
 - sh7201 (3/5 channels)
 - sh7203/sh7263 (2/4 channels)
 - sh7206 (3/5 channels)
 - MXG (3/5 channels)

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6d0dd37..2061488 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -214,27 +214,32 @@ config CPU_SUBTYPE_SH7201
 	bool "Support SH7201 processor"
 	select CPU_SH2A
 	select CPU_HAS_FPU
+	select SYS_SUPPORTS_MTU2
  
 config CPU_SUBTYPE_SH7203
 	bool "Support SH7203 processor"
 	select CPU_SH2A
 	select CPU_HAS_FPU
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_MTU2
 
 config CPU_SUBTYPE_SH7206
 	bool "Support SH7206 processor"
 	select CPU_SH2A
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_MTU2
 
 config CPU_SUBTYPE_SH7263
 	bool "Support SH7263 processor"
 	select CPU_SH2A
 	select CPU_HAS_FPU
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_MTU2
 
 config CPU_SUBTYPE_MXG
 	bool "Support MX-G processor"
 	select CPU_SH2A
+	select SYS_SUPPORTS_MTU2
 	help
 	  Select MX-G if running on an R8A03022BG part.
 
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 8442937..870030a 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_mtu2.h>
 
 enum {
 	UNUSED = 0,
@@ -24,7 +25,7 @@ enum {
 
 	SCIF0, SCIF1,
 
-	MTU2_GROUP1, MTU2_GROUP2, MTU2_GROUP3, MTU2_GROUP4, MTU2_GROUP5
+	MTU2_GROUP1, MTU2_GROUP2, MTU2_GROUP3, MTU2_GROUP4, MTU2_GROUP5,
 	MTU2_TGI3B, MTU2_TGI3C,
 
 	/* interrupt groups */
@@ -113,6 +114,99 @@ static struct intc_mask_reg mask_registers[] __initdata = {
 static DECLARE_INTC_DESC(intc_desc, "mxg", vectors, groups,
 			 mask_registers, prio_registers, NULL);
 
+static struct sh_mtu2_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xff801300,
+		.end	= 0xff801326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 228,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_mtu2_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xff801380,
+		.end	= 0xff801390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 234,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
+static struct sh_mtu2_config mtu2_2_platform_data = {
+	.name = "MTU2_2",
+	.channel_offset = 0x80,
+	.timer_bit = 2,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_2_resources[] = {
+	[0] = {
+		.name	= "MTU2_2",
+		.start	= 0xff801000,
+		.end	= 0xff80100a,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 240,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_2_device = {
+	.name		= "sh_mtu2",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &mtu2_2_platform_data,
+	},
+	.resource	= mtu2_2_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xff804000,
@@ -134,6 +228,9 @@ static struct platform_device sci_device = {
 
 static struct platform_device *mxg_devices[] __initdata = {
 	&sci_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
 static int __init mxg_devices_setup(void)
@@ -147,3 +244,15 @@ void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *mxg_early_devices[] __initdata = {
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(mxg_early_devices,
+				   ARRAY_SIZE(mxg_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index 00f42f9..074aa90 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_mtu2.h>
+#include <linux/io.h>
 
 enum {
 	UNUSED = 0,
@@ -249,9 +251,105 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
+static struct sh_mtu2_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xfffe4300,
+		.end	= 0xfffe4326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 108,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_mtu2_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xfffe4380,
+		.end	= 0xfffe4390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 116,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
+static struct sh_mtu2_config mtu2_2_platform_data = {
+	.name = "MTU2_2",
+	.channel_offset = 0x80,
+	.timer_bit = 2,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_2_resources[] = {
+	[0] = {
+		.name	= "MTU2_2",
+		.start	= 0xfffe4000,
+		.end	= 0xfffe400a,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 124,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_2_device = {
+	.name		= "sh_mtu2",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &mtu2_2_platform_data,
+	},
+	.resource	= mtu2_2_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_2_resources),
+};
+
 static struct platform_device *sh7201_devices[] __initdata = {
 	&sci_device,
 	&rtc_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
 static int __init sh7201_devices_setup(void)
@@ -265,3 +363,20 @@ void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct platform_device *sh7201_early_devices[] __initdata = {
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
+};
+
+#define STBCR3 0xfffe0408
+
+void __init plat_early_device_setup(void)
+{
+	/* enable MTU2 clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
+
+	early_platform_add_devices(sh7201_early_devices,
+				   ARRAY_SIZE(sh7201_early_devices));
+}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 0836ace..3448164 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -12,6 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/sh_cmt.h>
+#include <linux/sh_mtu2.h>
 #include <linux/io.h>
 
 enum {
@@ -271,6 +272,68 @@ static struct platform_device cmt1_device = {
 	.num_resources	= ARRAY_SIZE(cmt1_resources),
 };
 
+static struct sh_mtu2_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xfffe4300,
+		.end	= 0xfffe4326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 146,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_mtu2_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xfffe4380,
+		.end	= 0xfffe4390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 153,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
 static struct resource rtc_resources[] = {
 	[0] = {
 		.start	= 0xffff2000,
@@ -295,6 +358,8 @@ static struct platform_device *sh7203_devices[] __initdata = {
 	&sci_device,
 	&cmt0_device,
 	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
 	&rtc_device,
 };
 
@@ -313,8 +378,11 @@ void __init plat_irq_setup(void)
 static struct platform_device *sh7203_early_devices[] __initdata = {
 	&cmt0_device,
 	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
 };
 
+#define STBCR3 0xfffe0408
 #define STBCR4 0xfffe040c
 
 void __init plat_early_device_setup(void)
@@ -322,6 +390,9 @@ void __init plat_early_device_setup(void)
 	/* enable CMT clock */
 	__raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4);
 
+	/* enable MTU2 clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
+
 	early_platform_add_devices(sh7203_early_devices,
 				   ARRAY_SIZE(sh7203_early_devices));
 }
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index f9606e3..e700559 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -13,6 +13,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/sh_cmt.h>
+#include <linux/sh_mtu2.h>
 #include <linux/io.h>
 
 enum {
@@ -231,10 +232,106 @@ static struct platform_device cmt1_device = {
 	.num_resources	= ARRAY_SIZE(cmt1_resources),
 };
 
+static struct sh_mtu2_config mtu2_0_platform_data = {
+	.name = "MTU2_0",
+	.channel_offset = -0x80,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_0_resources[] = {
+	[0] = {
+		.name	= "MTU2_0",
+		.start	= 0xfffe4300,
+		.end	= 0xfffe4326,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 156,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_0_device = {
+	.name		= "sh_mtu2",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &mtu2_0_platform_data,
+	},
+	.resource	= mtu2_0_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
+};
+
+static struct sh_mtu2_config mtu2_1_platform_data = {
+	.name = "MTU2_1",
+	.channel_offset = -0x100,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_1_resources[] = {
+	[0] = {
+		.name	= "MTU2_1",
+		.start	= 0xfffe4380,
+		.end	= 0xfffe4390,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 164,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_1_device = {
+	.name		= "sh_mtu2",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &mtu2_1_platform_data,
+	},
+	.resource	= mtu2_1_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
+};
+
+static struct sh_mtu2_config mtu2_2_platform_data = {
+	.name = "MTU2_2",
+	.channel_offset = 0x80,
+	.timer_bit = 2,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource mtu2_2_resources[] = {
+	[0] = {
+		.name	= "MTU2_2",
+		.start	= 0xfffe4000,
+		.end	= 0xfffe400a,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 180,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mtu2_2_device = {
+	.name		= "sh_mtu2",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &mtu2_2_platform_data,
+	},
+	.resource	= mtu2_2_resources,
+	.num_resources	= ARRAY_SIZE(mtu2_2_resources),
+};
+
 static struct platform_device *sh7206_devices[] __initdata = {
 	&sci_device,
 	&cmt0_device,
 	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
 static int __init sh7206_devices_setup(void)
@@ -252,8 +349,12 @@ void __init plat_irq_setup(void)
 static struct platform_device *sh7206_early_devices[] __initdata = {
 	&cmt0_device,
 	&cmt1_device,
+	&mtu2_0_device,
+	&mtu2_1_device,
+	&mtu2_2_device,
 };
 
+#define STBCR3 0xfffe0408
 #define STBCR4 0xfffe040c
 
 void __init plat_early_device_setup(void)
@@ -261,6 +362,9 @@ void __init plat_early_device_setup(void)
 	/* enable CMT clock */
 	__raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4);
 
+	/* enable MTU2 clock */
+	__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
+
 	early_platform_add_devices(sh7206_early_devices,
 				   ARRAY_SIZE(sh7206_early_devices));
 }
-- 
cgit v0.10.2


From 1cbac972ba28e706fa9ce4d4c81830040bc811ee Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sat, 2 May 2009 13:39:56 +0400
Subject: x86: uv io-apic - use BUILD_BUG_ON instead of BUG_ON

The expression is known to be true/false at compilation
time so we're allowed to use build-time instead of
run-time check. Also align 'entry' items assignment.

[ Impact: shrink kernel a bit, cleanup ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Jack Steiner <steiner@sgi.com>
LKML-Reference: <20090502093956.GB4791@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8aef5f9..a80335b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3749,6 +3749,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	unsigned long flags;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	cfg = irq_cfg(irq);
 
 	err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3762,15 +3764,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3788,10 +3788,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
 	entry->mask = 1;
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
-- 
cgit v0.10.2


From 3280c8865e1b738604bacdea54738acef31e8c12 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 07:12:09 +0000
Subject: sh: remove old MTU2 driver

This patch removes the old MTU2 driver (CONFIG_SH_MTU2/timer-mtu2.c)

As replacement, select the sh_cmt driver with CONFIG_SH_TIMER_MTU2
and configure timer channel using platform data.

If multiple MTU channels are enabled using platform data, use the
earlytimer parameter on the kernel command line to select channel.
For instance, use "earlytimer=sh_mtu2.0" to select the first channel.

To verify which timer is being used, look at printouts or the timer
irq count in /proc/interrupts.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 2061488..bda3dc1 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -479,16 +479,9 @@ config SH_TIMER_CMT
 	help
 	  This enables build of the CMT timer driver.
 
-config SH_MTU2
-	bool "MTU2 timer support"
-	depends on CPU_SH2A && !GENERIC_TIME
-	default y
-	help
-	  This enables the use of the MTU2 as the system timer.
-
 config SH_TIMER_MTU2
 	bool "MTU2 timer driver"
-	depends on SYS_SUPPORTS_MTU2 && !SH_MTU2
+	depends on SYS_SUPPORTS_MTU2
 	default y
 	select GENERIC_CLOCKEVENTS
 	help
diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
index 9c968d1..581e3fe 100644
--- a/arch/sh/include/asm/timer.h
+++ b/arch/sh/include/asm/timer.h
@@ -23,7 +23,7 @@ struct sys_timer {
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-extern struct sys_timer tmu_timer, mtu2_timer;
+extern struct sys_timer tmu_timer;
 extern struct sys_timer *sys_timer;
 
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
index 1e9a104..44156df 100644
--- a/arch/sh/kernel/timers/Makefile
+++ b/arch/sh/kernel/timers/Makefile
@@ -5,6 +5,5 @@
 obj-y	:= timer.o
 
 obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
-obj-$(CONFIG_SH_MTU2)		+= timer-mtu2.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= timer-broadcast.o
diff --git a/arch/sh/kernel/timers/timer-mtu2.c b/arch/sh/kernel/timers/timer-mtu2.c
deleted file mode 100644
index 8a1dcc2..0000000
--- a/arch/sh/kernel/timers/timer-mtu2.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer-mtu2.c - MTU2 Timer Support
- *
- *  Copyright (C) 2005  Paul Mundt
- *
- * Based off of arch/sh/kernel/timers/timer-tmu.c
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/seqlock.h>
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/clock.h>
-
-/*
- * We use channel 1 for our lowly system timer. Channel 2 would be the other
- * likely candidate, but we leave it alone as it has higher divisors that
- * would be of more use to other more interesting applications.
- *
- * TODO: Presently we only implement a 16-bit single-channel system timer.
- * However, we can implement channel cascade if we go the overflow route and
- * get away with using 2 MTU2 channels as a 32-bit timer.
- */
-#define MTU2_TSTR	0xfffe4280
-#define MTU2_TCR_1	0xfffe4380
-#define MTU2_TMDR_1	0xfffe4381
-#define MTU2_TIOR_1	0xfffe4382
-#define MTU2_TIER_1	0xfffe4384
-#define MTU2_TSR_1	0xfffe4385
-#define MTU2_TCNT_1	0xfffe4386	/* 16-bit counter */
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7201) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7203)
-#define MTU2_TGRA_1	0xfffe4388
-#else
-#define MTU2_TGRA_1	0xfffe438a
-#endif
-
-#define STBCR3		0xfffe0408
-
-#define MTU2_TSTR_CST1	(1 << 1)	/* Counter Start 1 */
-
-#define MTU2_TSR_TGFA	(1 << 0)	/* GRA compare match */
-
-#define MTU2_TIER_TGIEA	(1 << 0)	/* GRA compare match  interrupt enable */
-
-#define MTU2_TCR_INIT	0x22
-
-#define MTU2_TCR_CALIB  0x00
-
-static unsigned long mtu2_timer_get_offset(void)
-{
-	int count;
-	static int count_p = 0x7fff;	/* for the first call after boot */
-	static unsigned long jiffies_p = 0;
-
-	/*
-	 * cache volatile jiffies temporarily; we have IRQs turned off.
-	 */
-	unsigned long jiffies_t;
-
-	/* timer count may underflow right here */
-	count = ctrl_inw(MTU2_TCNT_1);	/* read the latched count */
-
-	jiffies_t = jiffies;
-
-	/*
-	 * avoiding timer inconsistencies (they are rare, but they happen)...
-	 * there is one kind of problem that must be avoided here:
-	 *  1. the timer counter underflows
-	 */
-
-	if (jiffies_t == jiffies_p) {
-		if (count > count_p) {
-			if (ctrl_inb(MTU2_TSR_1) & MTU2_TSR_TGFA) {
-				count -= LATCH;
-			} else {
-				printk("%s (): hardware timer problem?\n",
-				       __func__);
-			}
-		}
-	} else
-		jiffies_p = jiffies_t;
-
-	count_p = count;
-
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	count = (count + LATCH/2) / LATCH;
-
-	return count;
-}
-
-static irqreturn_t mtu2_timer_interrupt(int irq, void *dev_id)
-{
-	unsigned long timer_status;
-
-	/* Clear TGFA bit */
-	timer_status = ctrl_inb(MTU2_TSR_1);
-	timer_status &= ~MTU2_TSR_TGFA;
-	ctrl_outb(timer_status, MTU2_TSR_1);
-
-	/* Do timer tick */
-	handle_timer_tick();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction mtu2_irq = {
-	.name		= "timer",
-	.handler	= mtu2_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-};
-
-static unsigned int divisors[] = { 1, 4, 16, 64, 1, 1, 256 };
-
-static void mtu2_clk_init(struct clk *clk)
-{
-	u8 idx = MTU2_TCR_INIT & 0x7;
-
-	clk->rate = clk->parent->rate / divisors[idx];
-	/* Start TCNT counting */
-	ctrl_outb(ctrl_inb(MTU2_TSTR) | MTU2_TSTR_CST1, MTU2_TSTR);
-
-}
-
-static void mtu2_clk_recalc(struct clk *clk)
-{
-	u8 idx = ctrl_inb(MTU2_TCR_1) & 0x7;
-	clk->rate = clk->parent->rate / divisors[idx];
-}
-
-static struct clk_ops mtu2_clk_ops = {
-	.init		= mtu2_clk_init,
-	.recalc		= mtu2_clk_recalc,
-};
-
-static struct clk mtu2_clk1 = {
-	.name		= "mtu2_clk1",
-	.ops		= &mtu2_clk_ops,
-};
-
-static int mtu2_timer_start(void)
-{
-	ctrl_outb(ctrl_inb(MTU2_TSTR) | MTU2_TSTR_CST1, MTU2_TSTR);
-	return 0;
-}
-
-static int mtu2_timer_stop(void)
-{
-	ctrl_outb(ctrl_inb(MTU2_TSTR) & ~MTU2_TSTR_CST1, MTU2_TSTR);
-	return 0;
-}
-
-static int mtu2_timer_init(void)
-{
-	unsigned long interval;
-
-	setup_irq(CONFIG_SH_TIMER_IRQ, &mtu2_irq);
-
-	mtu2_clk1.parent = clk_get(NULL, "module_clk");
-
-	ctrl_outb(ctrl_inb(STBCR3) & (~0x20), STBCR3);
-
-	/* Normal operation */
-	ctrl_outb(0, MTU2_TMDR_1);
-	ctrl_outb(MTU2_TCR_INIT, MTU2_TCR_1);
-	ctrl_outb(0x01, MTU2_TIOR_1);
-
-	/* Enable underflow interrupt */
-	ctrl_outb(ctrl_inb(MTU2_TIER_1) | MTU2_TIER_TGIEA, MTU2_TIER_1);
-
-	interval = CONFIG_SH_PCLK_FREQ / 16 / HZ;
-	printk(KERN_INFO "Interval = %ld\n", interval);
-
-	ctrl_outw(interval, MTU2_TGRA_1);
-	ctrl_outw(0, MTU2_TCNT_1);
-
-	clk_register(&mtu2_clk1);
-	clk_enable(&mtu2_clk1);
-
-	return 0;
-}
-
-struct sys_timer_ops mtu2_timer_ops = {
-	.init		= mtu2_timer_init,
-	.start		= mtu2_timer_start,
-	.stop		= mtu2_timer_stop,
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-	.get_offset	= mtu2_timer_get_offset,
-#endif
-};
-
-struct sys_timer mtu2_timer = {
-	.name	= "mtu2",
-	.ops	= &mtu2_timer_ops,
-};
diff --git a/arch/sh/kernel/timers/timer.c b/arch/sh/kernel/timers/timer.c
index f3bd141..920891a 100644
--- a/arch/sh/kernel/timers/timer.c
+++ b/arch/sh/kernel/timers/timer.c
@@ -17,9 +17,6 @@ static struct sys_timer *sys_timers[] = {
 #ifdef CONFIG_SH_TMU
 	&tmu_timer,
 #endif
-#ifdef CONFIG_SH_MTU2
-	&mtu2_timer,
-#endif
 	NULL,
 };
 
-- 
cgit v0.10.2


From 3969c52d4d2fef5a4b9e3ab0e51b3901e1cc8b83 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sun, 3 May 2009 11:11:35 +0530
Subject: x86: cpufeature.h fix name for X86_FEATURE_MCE

X86_FEATURE_MCE = Machine Check Exception
X86_FEATURE_MCA = Machine Check Architecture

[ Impact: cleanup ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
LKML-Reference: <1241329295.6321.1.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c..ccc1061 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,7 +22,7 @@
 #define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
 #define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers */
 #define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Exception */
 #define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
 #define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
 #define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
-- 
cgit v0.10.2


From 9570ef20423b549757aa484ad388f9a7d5bdc4d9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 1 May 2009 06:51:00 +0000
Subject: clocksource: SuperH TMU Timer driver

This patch adds a TMU driver for the SuperH architecture.

The TMU driver is a platform driver with early platform
support to allow using a TMU channel as clockevent or
clocksource during system bootup or later.

Clocksource or clockevent can be selected.
Both periodic and oneshot clockevents are supported.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index bda3dc1..1f74737 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -116,6 +116,9 @@ config SYS_SUPPORTS_CMT
 config SYS_SUPPORTS_MTU2
 	bool
 
+config SYS_SUPPORTS_TMU
+	bool
+
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -470,6 +473,15 @@ config SH_TMU
 	help
 	  This enables the use of the TMU as the system timer.
 
+config SH_TIMER_TMU
+	bool "TMU timer driver"
+	depends on !SH_TMU && SYS_SUPPORTS_TMU
+	default y
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	help
+	  This enables the build of the TMU timer driver.
+
 config SH_TIMER_CMT
 	bool "CMT timer driver"
 	depends on SYS_SUPPORTS_CMT
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 9785586..eef216f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_PM_TIMER)	+= acpi_pm.o
 obj-$(CONFIG_SCx200HR_TIMER)	+= scx200_hrt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
+obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
new file mode 100644
index 0000000..21bd77a
--- /dev/null
+++ b/drivers/clocksource/sh_tmu.c
@@ -0,0 +1,461 @@
+/*
+ * SuperH Timer Support - TMU
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/sh_tmu.h>
+
+struct sh_tmu_priv {
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct irqaction irqaction;
+	struct platform_device *pdev;
+	unsigned long rate;
+	unsigned long periodic;
+	struct clock_event_device ced;
+	struct clocksource cs;
+};
+
+static DEFINE_SPINLOCK(sh_tmu_lock);
+
+#define TSTR -1 /* shared register */
+#define TCOR  0 /* channel register */
+#define TCNT 1 /* channel register */
+#define TCR 2 /* channel register */
+
+static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR)
+		return ioread8(base - cfg->channel_offset);
+
+	offs = reg_nr << 2;
+
+	if (reg_nr == TCR)
+		return ioread16(base + offs);
+	else
+		return ioread32(base + offs);
+}
+
+static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
+				unsigned long value)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR) {
+		iowrite8(value, base - cfg->channel_offset);
+		return;
+	}
+
+	offs = reg_nr << 2;
+
+	if (reg_nr == TCR)
+		iowrite16(value, base + offs);
+	else
+		iowrite32(value, base + offs);
+}
+
+static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	unsigned long flags, value;
+
+	/* start stop register shared by multiple timer channels */
+	spin_lock_irqsave(&sh_tmu_lock, flags);
+	value = sh_tmu_read(p, TSTR);
+
+	if (start)
+		value |= 1 << cfg->timer_bit;
+	else
+		value &= ~(1 << cfg->timer_bit);
+
+	sh_tmu_write(p, TSTR, value);
+	spin_unlock_irqrestore(&sh_tmu_lock, flags);
+}
+
+static int sh_tmu_enable(struct sh_tmu_priv *p)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		pr_err("sh_tmu: cannot enable clock \"%s\"\n", cfg->clk);
+		return ret;
+	}
+
+	/* make sure channel is disabled */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* maximum timeout */
+	sh_tmu_write(p, TCOR, 0xffffffff);
+	sh_tmu_write(p, TCNT, 0xffffffff);
+
+	/* configure channel to parent clock / 4, irq off */
+	p->rate = clk_get_rate(p->clk) / 4;
+	sh_tmu_write(p, TCR, 0x0000);
+
+	/* enable channel */
+	sh_tmu_start_stop_ch(p, 1);
+
+	return 0;
+}
+
+static void sh_tmu_disable(struct sh_tmu_priv *p)
+{
+	/* disable channel */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta,
+			    int periodic)
+{
+	/* stop timer */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* acknowledge interrupt */
+	sh_tmu_read(p, TCR);
+
+	/* enable interrupt */
+	sh_tmu_write(p, TCR, 0x0020);
+
+	/* reload delta value in case of periodic timer */
+	if (periodic)
+		sh_tmu_write(p, TCOR, delta);
+	else
+		sh_tmu_write(p, TCOR, 0);
+
+	sh_tmu_write(p, TCNT, delta);
+
+	/* start timer */
+	sh_tmu_start_stop_ch(p, 1);
+}
+
+static irqreturn_t sh_tmu_interrupt(int irq, void *dev_id)
+{
+	struct sh_tmu_priv *p = dev_id;
+
+	/* disable or acknowledge interrupt */
+	if (p->ced.mode == CLOCK_EVT_MODE_ONESHOT)
+		sh_tmu_write(p, TCR, 0x0000);
+	else
+		sh_tmu_write(p, TCR, 0x0020);
+
+	/* notify clockevent layer */
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static struct sh_tmu_priv *cs_to_sh_tmu(struct clocksource *cs)
+{
+	return container_of(cs, struct sh_tmu_priv, cs);
+}
+
+static cycle_t sh_tmu_clocksource_read(struct clocksource *cs)
+{
+	struct sh_tmu_priv *p = cs_to_sh_tmu(cs);
+
+	return sh_tmu_read(p, TCNT) ^ 0xffffffff;
+}
+
+static int sh_tmu_clocksource_enable(struct clocksource *cs)
+{
+	struct sh_tmu_priv *p = cs_to_sh_tmu(cs);
+	int ret;
+
+	ret = sh_tmu_enable(p);
+	if (ret)
+		return ret;
+
+	/* TODO: calculate good shift from rate and counter bit width */
+	cs->shift = 10;
+	cs->mult = clocksource_hz2mult(p->rate, cs->shift);
+	return 0;
+}
+
+static void sh_tmu_clocksource_disable(struct clocksource *cs)
+{
+	sh_tmu_disable(cs_to_sh_tmu(cs));
+}
+
+static int sh_tmu_register_clocksource(struct sh_tmu_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clocksource *cs = &p->cs;
+
+	cs->name = name;
+	cs->rating = rating;
+	cs->read = sh_tmu_clocksource_read;
+	cs->enable = sh_tmu_clocksource_enable;
+	cs->disable = sh_tmu_clocksource_disable;
+	cs->mask = CLOCKSOURCE_MASK(32);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	pr_info("sh_tmu: %s used as clock source\n", cs->name);
+	clocksource_register(cs);
+	return 0;
+}
+
+static struct sh_tmu_priv *ced_to_sh_tmu(struct clock_event_device *ced)
+{
+	return container_of(ced, struct sh_tmu_priv, ced);
+}
+
+static void sh_tmu_clock_event_start(struct sh_tmu_priv *p, int periodic)
+{
+	struct clock_event_device *ced = &p->ced;
+
+	sh_tmu_enable(p);
+
+	/* TODO: calculate good shift from rate and counter bit width */
+
+	ced->shift = 32;
+	ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift);
+	ced->max_delta_ns = clockevent_delta2ns(0xffffffff, ced);
+	ced->min_delta_ns = 5000;
+
+	if (periodic) {
+		p->periodic = (p->rate + HZ/2) / HZ;
+		sh_tmu_set_next(p, p->periodic, 1);
+	}
+}
+
+static void sh_tmu_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct sh_tmu_priv *p = ced_to_sh_tmu(ced);
+	int disabled = 0;
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		sh_tmu_disable(p);
+		disabled = 1;
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		pr_info("sh_tmu: %s used for periodic clock events\n",
+			ced->name);
+		sh_tmu_clock_event_start(p, 1);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		pr_info("sh_tmu: %s used for oneshot clock events\n",
+			ced->name);
+		sh_tmu_clock_event_start(p, 0);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+		if (!disabled)
+			sh_tmu_disable(p);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+}
+
+static int sh_tmu_clock_event_next(unsigned long delta,
+				   struct clock_event_device *ced)
+{
+	struct sh_tmu_priv *p = ced_to_sh_tmu(ced);
+
+	BUG_ON(ced->mode != CLOCK_EVT_MODE_ONESHOT);
+
+	/* program new delta value */
+	sh_tmu_set_next(p, delta, 0);
+	return 0;
+}
+
+static void sh_tmu_register_clockevent(struct sh_tmu_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clock_event_device *ced = &p->ced;
+	int ret;
+
+	memset(ced, 0, sizeof(*ced));
+
+	ced->name = name;
+	ced->features = CLOCK_EVT_FEAT_PERIODIC;
+	ced->features |= CLOCK_EVT_FEAT_ONESHOT;
+	ced->rating = rating;
+	ced->cpumask = cpumask_of(0);
+	ced->set_next_event = sh_tmu_clock_event_next;
+	ced->set_mode = sh_tmu_clock_event_mode;
+
+	ret = setup_irq(p->irqaction.irq, &p->irqaction);
+	if (ret) {
+		pr_err("sh_tmu: failed to request irq %d\n",
+		       p->irqaction.irq);
+		return;
+	}
+
+	pr_info("sh_tmu: %s used for clock events\n", ced->name);
+	clockevents_register_device(ced);
+}
+
+static int sh_tmu_register(struct sh_tmu_priv *p, char *name,
+		    unsigned long clockevent_rating,
+		    unsigned long clocksource_rating)
+{
+	if (clockevent_rating)
+		sh_tmu_register_clockevent(p, name, clockevent_rating);
+	else if (clocksource_rating)
+		sh_tmu_register_clocksource(p, name, clocksource_rating);
+
+	return 0;
+}
+
+static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
+{
+	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	struct resource *res;
+	int irq, ret;
+	ret = -ENXIO;
+
+	memset(p, 0, sizeof(*p));
+	p->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&p->pdev->dev, "missing platform data\n");
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&p->pdev->dev, "failed to get I/O memory\n");
+		goto err0;
+	}
+
+	irq = platform_get_irq(p->pdev, 0);
+	if (irq < 0) {
+		dev_err(&p->pdev->dev, "failed to get irq\n");
+		goto err0;
+	}
+
+	/* map memory, let mapbase point to our channel */
+	p->mapbase = ioremap_nocache(res->start, resource_size(res));
+	if (p->mapbase == NULL) {
+		pr_err("sh_tmu: failed to remap I/O memory\n");
+		goto err0;
+	}
+
+	/* setup data for setup_irq() (too early for request_irq()) */
+	p->irqaction.name = cfg->name;
+	p->irqaction.handler = sh_tmu_interrupt;
+	p->irqaction.dev_id = p;
+	p->irqaction.irq = irq;
+	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL;
+	p->irqaction.mask = CPU_MASK_NONE;
+
+	/* get hold of clock */
+	p->clk = clk_get(&p->pdev->dev, cfg->clk);
+	if (IS_ERR(p->clk)) {
+		pr_err("sh_tmu: cannot get clock \"%s\"\n", cfg->clk);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	return sh_tmu_register(p, cfg->name,
+			       cfg->clockevent_rating,
+			       cfg->clocksource_rating);
+ err1:
+	iounmap(p->mapbase);
+ err0:
+	return ret;
+}
+
+static int __devinit sh_tmu_probe(struct platform_device *pdev)
+{
+	struct sh_tmu_priv *p = platform_get_drvdata(pdev);
+	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	int ret;
+
+	if (p) {
+		pr_info("sh_tmu: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	ret = sh_tmu_setup(p, pdev);
+	if (ret) {
+		kfree(p);
+		platform_set_drvdata(pdev, NULL);
+	}
+	return ret;
+}
+
+static int __devexit sh_tmu_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent and clocksource */
+}
+
+static struct platform_driver sh_tmu_device_driver = {
+	.probe		= sh_tmu_probe,
+	.remove		= __devexit_p(sh_tmu_remove),
+	.driver		= {
+		.name	= "sh_tmu",
+	}
+};
+
+static int __init sh_tmu_init(void)
+{
+	return platform_driver_register(&sh_tmu_device_driver);
+}
+
+static void __exit sh_tmu_exit(void)
+{
+	platform_driver_unregister(&sh_tmu_device_driver);
+}
+
+early_platform_init("earlytimer", &sh_tmu_device_driver);
+module_init(sh_tmu_init);
+module_exit(sh_tmu_exit);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("SuperH TMU Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h
new file mode 100644
index 0000000..b1195e8
--- /dev/null
+++ b/include/linux/sh_tmu.h
@@ -0,0 +1,13 @@
+#ifndef __SH_TMU_H__
+#define __SH_TMU_H__
+
+struct sh_tmu_config {
+	char *name;
+	unsigned long channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+	unsigned long clocksource_rating;
+};
+
+#endif /* __SH_TMU_H__ */
-- 
cgit v0.10.2


From d43a41bf8b504a1d9f0b4ce7e17d803f4ef39d84 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 1 May 2009 06:58:52 +0000
Subject: sh: TMU platform data for sh7722

This patch adds TMU platform data for sh7722. Only clockevent
mode is enabled for now, clocksource requires this patch:
"clocksource: setup mult_orig in clocksource_enable()"

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1f74737..c3e9455 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -423,6 +423,7 @@ config CPU_SUBTYPE_SH7722
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_CMT
+	select SYS_SUPPORTS_TMU
 
 config CPU_SUBTYPE_SH7366
 	bool "Support SH7366 processor"
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 793c50d..512735c 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/uio_driver.h>
 #include <linux/sh_cmt.h>
+#include <linux/sh_tmu.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -209,6 +210,98 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_tmu_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_tmu_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 0, /* disabled for now */
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_tmu_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffe00000,
@@ -243,6 +336,9 @@ static struct platform_device sci_device = {
 
 static struct platform_device *sh7722_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&rtc_device,
 	&usbf_device,
 	&iic_device,
@@ -271,6 +367,9 @@ __initcall(sh7722_devices_setup);
 
 static struct platform_device *sh7722_early_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 void __init plat_early_device_setup(void)
-- 
cgit v0.10.2


From 9a8709d44139748fe2e0ab56d20d8c384c8b65ad Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 2 May 2009 00:25:11 +0400
Subject: x86: uv - prevent NULL dereference in uv_system_init()

We may reach NULL dereference oops if kmalloc failed.
Prevent it with explicit BUG_ON.

[ Impact: more controlled assert in 'impossible' scenario ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <20090501202511.GE4633@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 873bf71..9d9e228 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -569,15 +569,18 @@ void __init uv_system_init(void)
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
 	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_blade_info);
 
 	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
 	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
 	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_node_to_blade);
 	memset(uv_node_to_blade, 255, bytes);
 
 	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
 	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_cpu_to_blade);
 	memset(uv_cpu_to_blade, 255, bytes);
 
 	blade = 0;
-- 
cgit v0.10.2


From 46a12f7426d71cabc08972cf8d3ffdd441d26a3a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 May 2009 17:57:17 +0900
Subject: sh: Consolidate MTU2/CMT/TMU timer platform data.

All of the SH timers use a roughly identical structure for platform data,
which presently is broken out for each block. Consolidate all of these
definitions, as there is no reason for them to be broken out in the first
place.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index d70c263..94ac27f 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -111,7 +111,7 @@ static struct platform_device eth_device = {
 	.resource = eth_resources,
 };
 
-static struct sh_cmt_config cmt0_platform_data = {
+static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
@@ -143,7 +143,7 @@ static struct platform_device cmt0_device = {
 	.num_resources	= ARRAY_SIZE(cmt0_resources),
 };
 
-static struct sh_cmt_config cmt1_platform_data = {
+static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 870030a..a452d96 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 
 enum {
 	UNUSED = 0,
@@ -114,7 +114,7 @@ static struct intc_mask_reg mask_registers[] __initdata = {
 static DECLARE_INTC_DESC(intc_desc, "mxg", vectors, groups,
 			 mask_registers, prio_registers, NULL);
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -145,7 +145,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
@@ -176,7 +176,7 @@ static struct platform_device mtu2_1_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
 };
 
-static struct sh_mtu2_config mtu2_2_platform_data = {
+static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index 074aa90..772358b 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -251,7 +251,7 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -282,7 +282,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
@@ -313,7 +313,7 @@ static struct platform_device mtu2_1_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
 };
 
-static struct sh_mtu2_config mtu2_2_platform_data = {
+static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 3448164..d749341 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -11,8 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_cmt.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -208,7 +207,7 @@ static struct platform_device sci_device = {
 	},
 };
 
-static struct sh_cmt_config cmt0_platform_data = {
+static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
@@ -240,7 +239,7 @@ static struct platform_device cmt0_device = {
 	.num_resources	= ARRAY_SIZE(cmt0_resources),
 };
 
-static struct sh_cmt_config cmt1_platform_data = {
+static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
@@ -272,7 +271,7 @@ static struct platform_device cmt1_device = {
 	.num_resources	= ARRAY_SIZE(cmt1_resources),
 };
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -303,7 +302,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index e700559..2fc6bff 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -12,8 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_cmt.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -168,7 +167,7 @@ static struct platform_device sci_device = {
 	},
 };
 
-static struct sh_cmt_config cmt0_platform_data = {
+static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
@@ -200,7 +199,7 @@ static struct platform_device cmt0_device = {
 	.num_resources	= ARRAY_SIZE(cmt0_resources),
 };
 
-static struct sh_cmt_config cmt1_platform_data = {
+static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
@@ -232,7 +231,7 @@ static struct platform_device cmt1_device = {
 	.num_resources	= ARRAY_SIZE(cmt1_resources),
 };
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -263,7 +262,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
@@ -294,7 +293,7 @@ static struct platform_device mtu2_1_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
 };
 
-static struct sh_mtu2_config mtu2_2_platform_data = {
+static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index cb5b4db..f327b7e 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -12,7 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 
 static struct resource iic0_resources[] = {
@@ -141,7 +141,7 @@ static struct platform_device jpu_device = {
 	.num_resources	= ARRAY_SIZE(jpu_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 2a771f4..7361ea9 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -14,7 +14,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 
 static struct resource iic_resources[] = {
@@ -148,7 +148,7 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 512735c..624e8e5 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -13,8 +13,7 @@
 #include <linux/serial_sci.h>
 #include <linux/mm.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
-#include <linux/sh_tmu.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -178,7 +177,7 @@ static struct platform_device jpu_device = {
 	.num_resources	= ARRAY_SIZE(jpu_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
@@ -210,7 +209,7 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
-static struct sh_tmu_config tmu0_platform_data = {
+static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
@@ -241,7 +240,7 @@ static struct platform_device tmu0_device = {
 	.num_resources	= ARRAY_SIZE(tmu0_resources),
 };
 
-static struct sh_tmu_config tmu1_platform_data = {
+static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
@@ -272,7 +271,7 @@ static struct platform_device tmu1_device = {
 	.num_resources	= ARRAY_SIZE(tmu1_resources),
 };
 
-static struct sh_tmu_config tmu2_platform_data = {
+static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index dbb4494..fb9b542 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -13,7 +13,7 @@
 #include <linux/mm.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -101,7 +101,7 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 8429396..e074951 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -18,7 +18,7 @@
 #include <linux/mm.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
@@ -230,7 +230,7 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 4ff1508..aeb8c9b 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -28,7 +28,7 @@
 #include <linux/err.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 
 struct sh_cmt_priv {
 	void __iomem *mapbase;
@@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(sh_cmt_lock);
 
 static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -83,7 +83,7 @@ static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr)
 static inline void sh_cmt_write(struct sh_cmt_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -131,7 +131,7 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_priv *p,
 
 static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -149,7 +149,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
 
 static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -560,7 +560,7 @@ int sh_cmt_register(struct sh_cmt_priv *p, char *name,
 
 static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 {
-	struct sh_cmt_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -638,7 +638,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 static int __devinit sh_cmt_probe(struct platform_device *pdev)
 {
 	struct sh_cmt_priv *p = platform_get_drvdata(pdev);
-	struct sh_cmt_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (p) {
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index 420566f..ef02185 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/err.h>
 #include <linux/clockchips.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 
 struct sh_mtu2_priv {
 	void __iomem *mapbase;
@@ -63,7 +63,7 @@ static unsigned long mtu2_reg_offs[] = {
 
 static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -81,7 +81,7 @@ static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
 static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -100,7 +100,7 @@ static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
 
 static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -118,7 +118,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
 
 static int sh_mtu2_enable(struct sh_mtu2_priv *p)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -243,7 +243,7 @@ int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
 
 static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
 {
-	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -303,7 +303,7 @@ static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
 static int __devinit sh_mtu2_probe(struct platform_device *pdev)
 {
 	struct sh_mtu2_priv *p = platform_get_drvdata(pdev);
-	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (p) {
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 21bd77a..d6ea439 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -29,7 +29,7 @@
 #include <linux/err.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/sh_tmu.h>
+#include <linux/sh_timer.h>
 
 struct sh_tmu_priv {
 	void __iomem *mapbase;
@@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(sh_tmu_lock);
 
 static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -69,7 +69,7 @@ static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
 static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -88,7 +88,7 @@ static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
 
 static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -106,7 +106,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
 
 static int sh_tmu_enable(struct sh_tmu_priv *p)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -345,7 +345,7 @@ static int sh_tmu_register(struct sh_tmu_priv *p, char *name,
 
 static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
 {
-	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -407,7 +407,7 @@ static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
 static int __devinit sh_tmu_probe(struct platform_device *pdev)
 {
 	struct sh_tmu_priv *p = platform_get_drvdata(pdev);
-	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (p) {
diff --git a/include/linux/sh_cmt.h b/include/linux/sh_cmt.h
deleted file mode 100644
index 68cacde..0000000
--- a/include/linux/sh_cmt.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __SH_CMT_H__
-#define __SH_CMT_H__
-
-struct sh_cmt_config {
-	char *name;
-	unsigned long channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-	unsigned long clocksource_rating;
-};
-
-#endif /* __SH_CMT_H__ */
diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h
deleted file mode 100644
index a988763..0000000
--- a/include/linux/sh_mtu2.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __SH_MTU2_H__
-#define __SH_MTU2_H__
-
-struct sh_mtu2_config {
-	char *name;
-	int channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-};
-
-#endif /* __SH_MTU2_H__ */
diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h
new file mode 100644
index 0000000..864bd56
--- /dev/null
+++ b/include/linux/sh_timer.h
@@ -0,0 +1,13 @@
+#ifndef __SH_TIMER_H__
+#define __SH_TIMER_H__
+
+struct sh_timer_config {
+	char *name;
+	long channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+	unsigned long clocksource_rating;
+};
+
+#endif /* __SH_TIMER_H__ */
diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h
deleted file mode 100644
index b1195e8..0000000
--- a/include/linux/sh_tmu.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __SH_TMU_H__
-#define __SH_TMU_H__
-
-struct sh_tmu_config {
-	char *name;
-	unsigned long channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-	unsigned long clocksource_rating;
-};
-
-#endif /* __SH_TMU_H__ */
-- 
cgit v0.10.2


From d1fcc0a8db5e47c1abaa783a3e83dbf5f2184969 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 May 2009 18:05:42 +0900
Subject: clocksource: sh_mtu2/cmt_register() should be static.

Neither of these need to be exported, so just make them static.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index aeb8c9b..cf56a2a 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -537,9 +537,9 @@ static void sh_cmt_register_clockevent(struct sh_cmt_priv *p,
 	clockevents_register_device(ced);
 }
 
-int sh_cmt_register(struct sh_cmt_priv *p, char *name,
-		    unsigned long clockevent_rating,
-		    unsigned long clocksource_rating)
+static int sh_cmt_register(struct sh_cmt_priv *p, char *name,
+			   unsigned long clockevent_rating,
+			   unsigned long clocksource_rating)
 {
 	if (p->width == (sizeof(p->max_match_value) * 8))
 		p->max_match_value = ~0;
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index ef02185..d1ae754 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -232,8 +232,8 @@ static void sh_mtu2_register_clockevent(struct sh_mtu2_priv *p,
 	clockevents_register_device(ced);
 }
 
-int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
-		     unsigned long clockevent_rating)
+static int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
+			    unsigned long clockevent_rating)
 {
 	if (clockevent_rating)
 		sh_mtu2_register_clockevent(p, name, clockevent_rating);
-- 
cgit v0.10.2


From 938edae11ee3a7b20b6d754074a0f2c2edc4534b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 May 2009 18:12:26 +0900
Subject: sh: select both GENERIC_TIME and GENERIC_CLOCKEVENTS.

Now that the rest of the timers that didn't support clockevents have been
rewritten, both of these can be enabled by default.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c3e9455..565f390 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -78,7 +78,7 @@ config GENERIC_TIME
 	def_bool y
 
 config GENERIC_CLOCKEVENTS
-	def_bool n
+	def_bool y
 
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
@@ -469,8 +469,6 @@ config SH_TMU
 	bool "TMU timer support"
 	depends on CPU_SH3 || CPU_SH4
 	default y
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
 	help
 	  This enables the use of the TMU as the system timer.
 
@@ -478,8 +476,6 @@ config SH_TIMER_TMU
 	bool "TMU timer driver"
 	depends on !SH_TMU && SYS_SUPPORTS_TMU
 	default y
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
 	help
 	  This enables the build of the TMU timer driver.
 
@@ -487,8 +483,6 @@ config SH_TIMER_CMT
 	bool "CMT timer driver"
 	depends on SYS_SUPPORTS_CMT
 	default y
-	select GENERIC_CLOCKEVENTS
-	select GENERIC_TIME
 	help
 	  This enables build of the CMT timer driver.
 
@@ -496,7 +490,6 @@ config SH_TIMER_MTU2
 	bool "MTU2 timer driver"
 	depends on SYS_SUPPORTS_MTU2
 	default y
-	select GENERIC_CLOCKEVENTS
 	help
 	  This enables build of the MTU2 timer driver.
 
-- 
cgit v0.10.2


From dec56e6312434b2536fedf9d7e9e73d666aaf0a8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 May 2009 18:18:14 +0900
Subject: sh: Kill off the now unused ARCH_USES_GETTIMEOFFSET code.

Now that the stragglers (MTU2/CMT/etc.) have been rewritten and we are
selecting both GENERIC_TIME and GENERIC_CLOCKEVENTS, the get_offset()
timer op is completely unused. As a result, we are now able to kill off
the ARCH_USES_GETTIMEOFFSET references.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
index 581e3fe..cb16645 100644
--- a/arch/sh/include/asm/timer.h
+++ b/arch/sh/include/asm/timer.h
@@ -9,9 +9,6 @@ struct sys_timer_ops {
 	int (*init)(void);
 	int (*start)(void);
 	int (*stop)(void);
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-	unsigned long (*get_offset)(void);
-#endif
 };
 
 struct sys_timer {
@@ -26,13 +23,6 @@ struct sys_timer {
 extern struct sys_timer tmu_timer;
 extern struct sys_timer *sys_timer;
 
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-static inline unsigned long get_timer_offset(void)
-{
-	return sys_timer->ops->get_offset();
-}
-#endif
-
 /* arch/sh/kernel/timers/timer.c */
 struct sys_timer *get_sys_timer(void);
 
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
index 9d34dff..d41ca4c 100644
--- a/arch/sh/kernel/time_32.c
+++ b/arch/sh/kernel/time_32.c
@@ -83,13 +83,6 @@ static int __init rtc_generic_init(void)
 }
 module_init(rtc_generic_init);
 
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-u32 arch_gettimeoffset(void)
-{
-	return get_timer_offset() * 1000;
-}
-#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */
-
 /* last time the RTC clock got updated */
 static long last_rtc_update;
 
@@ -185,7 +178,6 @@ struct clocksource clocksource_sh = {
 	.name		= "SuperH",
 };
 
-#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 unsigned long long sched_clock(void)
 {
 	unsigned long long cycles;
@@ -197,7 +189,6 @@ unsigned long long sched_clock(void)
 	cycles = clocksource_sh.read(&clocksource_sh);
 	return cyc2ns(&clocksource_sh, cycles);
 }
-#endif
 
 static void __init sh_late_time_init(void)
 {
-- 
cgit v0.10.2


From 25483efeb2e56521e418a59fa93401be156dc3bb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 May 2009 18:29:27 +0900
Subject: sh: Move dummy clockevents broadcast timer to its new home.

The old arch/sh/kernel/timers/ directly will be going away completely
once the rest of the TMU users are migrated, so move the dummy broadcast
driver up a level in preparation.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 82a3a15..5b1d4d0 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -32,4 +32,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_DUMP_CODE)		+= disassemble.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o
 
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= localtimer.o
+
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index fe425d7..d256c77 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -17,4 +17,6 @@ obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 
+obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= localtimer.o
+
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/localtimer.c b/arch/sh/kernel/localtimer.c
new file mode 100644
index 0000000..96e8eae
--- /dev/null
+++ b/arch/sh/kernel/localtimer.c
@@ -0,0 +1,57 @@
+/*
+ * Dummy local timer
+ *
+ * Copyright (C) 2008  Paul Mundt
+ *
+ * cloned from:
+ *
+ *  linux/arch/arm/mach-realview/localtimer.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
+#include <linux/irq.h>
+
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+
+/*
+ * Used on SMP for either the local timer or SMP_MSG_TIMER
+ */
+void local_timer_interrupt(void)
+{
+	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
+
+	clk->event_handler(clk);
+}
+
+static void dummy_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *clk)
+{
+}
+
+void __cpuinit local_timer_setup(unsigned int cpu)
+{
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+
+	clk->name		= "dummy_timer";
+	clk->features		= CLOCK_EVT_FEAT_DUMMY;
+	clk->rating		= 200;
+	clk->mult		= 1;
+	clk->set_mode		= dummy_timer_set_mode;
+	clk->broadcast		= smp_timer_broadcast;
+	clk->cpumask		= cpumask_of(cpu);
+
+	clockevents_register_device(clk);
+}
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
index 44156df..8bceba5 100644
--- a/arch/sh/kernel/timers/Makefile
+++ b/arch/sh/kernel/timers/Makefile
@@ -5,5 +5,3 @@
 obj-y	:= timer.o
 
 obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
-
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= timer-broadcast.o
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
deleted file mode 100644
index 96e8eae..0000000
--- a/arch/sh/kernel/timers/timer-broadcast.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Dummy local timer
- *
- * Copyright (C) 2008  Paul Mundt
- *
- * cloned from:
- *
- *  linux/arch/arm/mach-realview/localtimer.c
- *
- *  Copyright (C) 2002 ARM Ltd.
- *  All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/device.h>
-#include <linux/smp.h>
-#include <linux/jiffies.h>
-#include <linux/percpu.h>
-#include <linux/clockchips.h>
-#include <linux/irq.h>
-
-static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
-
-/*
- * Used on SMP for either the local timer or SMP_MSG_TIMER
- */
-void local_timer_interrupt(void)
-{
-	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
-
-	clk->event_handler(clk);
-}
-
-static void dummy_timer_set_mode(enum clock_event_mode mode,
-				 struct clock_event_device *clk)
-{
-}
-
-void __cpuinit local_timer_setup(unsigned int cpu)
-{
-	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
-
-	clk->name		= "dummy_timer";
-	clk->features		= CLOCK_EVT_FEAT_DUMMY;
-	clk->rating		= 200;
-	clk->mult		= 1;
-	clk->set_mode		= dummy_timer_set_mode;
-	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of(cpu);
-
-	clockevents_register_device(clk);
-}
-- 
cgit v0.10.2


From 514bf54cd8c7f172816d3c003a6d022e9165a29b Mon Sep 17 00:00:00 2001
From: James Gardiner <renidragsemaj@yahoo.com>
Date: Sun, 3 May 2009 04:00:44 -0400
Subject: ALSA: hda - Addition for HP dv4-1222nr laptop support

Signed-off-by: James Gardiner <renidragsemaj@yahoo.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 8eec05b..36c9712 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -347,6 +347,7 @@ STAC92HD71B*
   hp-m4		HP mini 1000
   hp-dv5	HP dv series
   hp-hdx	HP HDX series
+  hp-dv4-1222nr	HP dv4-1222nr (with LED support)
   auto		BIOS setup (default)
 
 STAC92HD73*
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 917bc5d..76487de 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -100,6 +100,7 @@ enum {
 	STAC_HP_M4,
 	STAC_HP_DV5,
 	STAC_HP_HDX,
+	STAC_HP_DV4_1222NR,
 	STAC_92HD71BXX_MODELS
 };
 
@@ -1836,6 +1837,7 @@ static unsigned int *stac92hd71bxx_brd_tbl[STAC_92HD71BXX_MODELS] = {
 	[STAC_HP_M4]		= NULL,
 	[STAC_HP_DV5]		= NULL,
 	[STAC_HP_HDX]           = NULL,
+	[STAC_HP_DV4_1222NR]	= NULL,
 };
 
 static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = {
@@ -1847,6 +1849,7 @@ static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = {
 	[STAC_HP_M4] = "hp-m4",
 	[STAC_HP_DV5] = "hp-dv5",
 	[STAC_HP_HDX] = "hp-hdx",
+	[STAC_HP_DV4_1222NR] = "hp-dv4-1222nr",
 };
 
 static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = {
@@ -1855,6 +1858,8 @@ static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = {
 		      "DFI LanParty", STAC_92HD71BXX_REF),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101,
 		      "DFI LanParty", STAC_92HD71BXX_REF),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x30fb,
+		      "HP dv4-1222nr", STAC_HP_DV4_1222NR),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3080,
 		      "HP", STAC_HP_DV5),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x30f0,
@@ -4520,27 +4525,38 @@ static int stac92xx_resume(struct hda_codec *codec)
 	return 0;
 }
 
-
 /*
- * using power check for controlling mute led of HP HDX notebooks
+ * using power check for controlling mute led of HP notebooks
  * check for mute state only on Speakers (nid = 0x10)
  *
  * For this feature CONFIG_SND_HDA_POWER_SAVE is needed, otherwise
  * the LED is NOT working properly !
+ *
+ * Changed name to reflect that it now works for any designated
+ * model, not just HP HDX.
  */
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
-static int stac92xx_hp_hdx_check_power_status(struct hda_codec *codec,
+static int stac92xx_hp_check_power_status(struct hda_codec *codec,
 					      hda_nid_t nid)
 {
 	struct sigmatel_spec *spec = codec->spec;
+	unsigned int gpio_bit = 0; /* gets rid of compiler warning */
+
+	switch (spec->board_config) {
+	case STAC_HP_DV4_1222NR:
+		gpio_bit = 0x01;
+		break;
+	case STAC_HP_HDX:
+		gpio_bit = 0x08;
+	}
 
 	if (nid == 0x10) {
 		if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
 		    HDA_AMP_MUTE)
-			spec->gpio_data &= ~0x08;  /* orange */
+			spec->gpio_data &= ~gpio_bit;  /* orange */
 		else
-			spec->gpio_data |= 0x08;   /* white */
+			spec->gpio_data |= gpio_bit;   /* white */
 
 		stac_gpio_set(codec, spec->gpio_mask,
 			      spec->gpio_dir,
@@ -5219,6 +5235,22 @@ again:
 		spec->num_smuxes = 0;
 		spec->num_dmuxes = 1;
 		break;
+	case STAC_HP_DV4_1222NR:
+		spec->num_dmics = 1;
+		/* I don't know if it needs 1 or 2 smuxes - will wait for
+		 * bug reports to fix if needed
+		 */
+		spec->num_smuxes = 1;
+		spec->num_dmuxes = 1;
+#ifdef CONFIG_SND_HDA_POWER_SAVE
+		/* This controls MUTE LED */
+		spec->gpio_mask |= 0x01;
+		spec->gpio_dir  |= 0x01;
+		spec->gpio_data |= 0x01;
+		codec->patch_ops.check_power_status =
+		    stac92xx_hp_check_power_status;
+#endif
+		/* fallthrough */
 	case STAC_HP_DV5:
 		snd_hda_codec_set_pincfg(codec, 0x0d, 0x90170010);
 		stac92xx_auto_set_pinctl(codec, 0x0d, AC_PINCTL_OUT_EN);
@@ -5239,7 +5271,7 @@ again:
 
 		/* register check_power_status callback. */
 		codec->patch_ops.check_power_status =
-		    stac92xx_hp_hdx_check_power_status;
+		    stac92xx_hp_check_power_status;
 #endif	
 		break;
 	};
-- 
cgit v0.10.2


From b0ec3a30bc01c15cc6277b223fae136f7b71e90c Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Sun, 3 May 2009 10:39:19 +0200
Subject: ALSA: sc6000: enable joystick port

Add module parameter to enable or disable
joystick port (gameport) on the SC6600 and
later cards.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..11a80a9c71 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -1543,13 +1543,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
   Module snd-sc6000
   -----------------
 
-    Module for Gallant SC-6000 soundcard.
+    Module for Gallant SC-6000 soundcard and later models: SC-6600
+    and SC-7000.
 
     port	- Port # (0x220 or 0x240)
     mss_port	- MSS Port # (0x530 or 0xe80)
     irq		- IRQ # (5,7,9,10,11)
     mpu_irq	- MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq
     dma		- DMA # (1,3,0)
+    joystick	- Enable gameport - 0 = disable (default), 1 = enable
 
     This module supports multiple cards.
 
diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index c803b2e..9a8bbf6 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -56,6 +56,7 @@ static long mpu_port[SNDRV_CARDS] = SNDRV_DEFAULT_PORT;
 						/* 0x300, 0x310, 0x320, 0x330 */
 static int mpu_irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ;	/* 5, 7, 9, 10, 0 */
 static int dma[SNDRV_CARDS] = SNDRV_DEFAULT_DMA;	/* 0, 1, 3 */
+static bool joystick[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = false };
 
 module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for sc-6000 based soundcard.");
@@ -75,6 +76,8 @@ module_param_array(mpu_irq, int, NULL, 0444);
 MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for sc-6000 driver.");
 module_param_array(dma, int, NULL, 0444);
 MODULE_PARM_DESC(dma, "DMA # for sc-6000 driver.");
+module_param_array(joystick, bool, NULL, 0444);
+MODULE_PARM_DESC(joystick, "Enable gameport.");
 
 /*
  * Commands of SC6000's DSP (SBPRO+special).
@@ -363,7 +366,7 @@ static int __devinit sc6000_init_mss(char __iomem *vport, int config,
 
 static void __devinit sc6000_hw_cfg_encode(char __iomem *vport, int *cfg,
 					   long xport, long xmpu,
-					   long xmss_port)
+					   long xmss_port, int joystick)
 {
 	cfg[0] = 0;
 	cfg[1] = 0;
@@ -376,6 +379,8 @@ static void __devinit sc6000_hw_cfg_encode(char __iomem *vport, int *cfg,
 	if (xmss_port == 0xe80)
 		cfg[0] |= 0x10;
 	cfg[0] |= 0x40;		/* always set */
+	if (!joystick)
+		cfg[0] |= 0x02;
 	cfg[1] |= 0x80;		/* enable WSS system */
 	cfg[1] &= ~0x40;	/* disable IDE */
 	snd_printd("hw cfg %x, %x\n", cfg[0], cfg[1]);
@@ -427,7 +432,7 @@ static int __devinit sc6000_init_board(char __iomem *vport,
 	if (!old) {
 		int cfg[2];
 		sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev],
-				     mss_port[dev]);
+				     mss_port[dev], joystick[dev]);
 		if (sc6000_hw_cfg_write(vport, cfg) < 0) {
 			snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n");
 			return -EIO;
-- 
cgit v0.10.2


From b82914ce33146186d554b0f5c41e4e13693614ce Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 May 2009 18:54:32 +0200
Subject: perf_counter: round-robin per-CPU counters too

This used to be unstable when we had the rq->lock dependencies,
but now that they are that of the past we can turn on percpu
counter RR too.

[ Impact: handle counter over-commit for per-CPU counters too ]

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8660ae5..b9679c3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1069,18 +1069,14 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
-	const int rotate_percpu = 0;
 
-	if (rotate_percpu)
-		perf_counter_cpu_sched_out(cpuctx);
+	perf_counter_cpu_sched_out(cpuctx);
 	perf_counter_task_sched_out(curr, cpu);
 
-	if (rotate_percpu)
-		rotate_ctx(&cpuctx->ctx);
+	rotate_ctx(&cpuctx->ctx);
 	rotate_ctx(ctx);
 
-	if (rotate_percpu)
-		perf_counter_cpu_sched_in(cpuctx, cpu);
+	perf_counter_cpu_sched_in(cpuctx, cpu);
 	perf_counter_task_sched_in(curr, cpu);
 }
 
-- 
cgit v0.10.2


From ba77813a2a22d631fe5bc0bf1ec0d11350544b70 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 4 May 2009 18:47:44 +0200
Subject: perf_counter: x86: fixup nmi_watchdog vs perf_counter boo-boo

Invert the atomic_inc_not_zero() test so that we will indeed detect the
first activation.

Also rename the global num_counters, since its easy to confuse with
x86_pmu.num_counters.

[ Impact: fix non-working perfcounters on AMD CPUs, cleanup ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241455664.7620.4938.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4c0cc9..196b58f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -171,7 +171,7 @@ again:
 	return new_raw_count;
 }
 
-static atomic_t num_counters;
+static atomic_t active_counters;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 static bool reserve_pmc_hardware(void)
@@ -224,7 +224,7 @@ static void release_pmc_hardware(void)
 
 static void hw_perf_counter_destroy(struct perf_counter *counter)
 {
-	if (atomic_dec_and_mutex_lock(&num_counters, &pmc_reserve_mutex)) {
+	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
 		release_pmc_hardware();
 		mutex_unlock(&pmc_reserve_mutex);
 	}
@@ -248,12 +248,12 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		return -ENODEV;
 
 	err = 0;
-	if (atomic_inc_not_zero(&num_counters)) {
+	if (!atomic_inc_not_zero(&active_counters)) {
 		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&num_counters) == 0 && !reserve_pmc_hardware())
+		if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
 			err = -EBUSY;
 		else
-			atomic_inc(&num_counters);
+			atomic_inc(&active_counters);
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 	if (err)
@@ -280,7 +280,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
 		hwc->nmi = 1;
 
-	hwc->irq_period		= hw_event->irq_period;
+	hwc->irq_period	= hw_event->irq_period;
 	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
 		hwc->irq_period = x86_pmu.max_period;
 
@@ -871,7 +871,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	struct pt_regs *regs;
 	int ret;
 
-	if (!atomic_read(&num_counters))
+	if (!atomic_read(&active_counters))
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-- 
cgit v0.10.2


From 0d905bca23aca5c86a10ee101bcd3b1abbd40b25 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 May 2009 19:13:30 +0200
Subject: perf_counter: initialize the per-cpu context earlier

percpu scheduling for perfcounters wants to take the context lock,
but that lock first needs to be initialized. Currently it is an
early_initcall() - but that is too late, the task tick runs much
sooner than that.

Call it explicitly from the scheduler init sequence instead.

[ Impact: fix access-before-init crash ]

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f776851..a356fa6 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -573,6 +573,8 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
 
+extern void perf_counter_init(void);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -600,9 +602,10 @@ perf_counter_mmap(unsigned long addr, unsigned long len,
 
 static inline void
 perf_counter_munmap(unsigned long addr, unsigned long len,
-		    unsigned long pgoff, struct file *file) 		{ }
+		    unsigned long pgoff, struct file *file)		{ }
 
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
+static inline void perf_counter_init(void)				{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b9679c3..fcdafa2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3265,15 +3265,12 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = {
 	.notifier_call		= perf_cpu_notify,
 };
 
-static int __init perf_counter_init(void)
+void __init perf_counter_init(void)
 {
 	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
 	register_cpu_notifier(&perf_cpu_nb);
-
-	return 0;
 }
-early_initcall(perf_counter_init);
 
 static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f600e3..a728976 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,6 +39,7 @@
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
 #include <linux/debug_locks.h>
+#include <linux/perf_counter.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -8996,7 +8997,7 @@ void __init sched_init(void)
 		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
 		 * then A0's share of the cpu resource is:
 		 *
-		 * 	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
+		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
 		 *
 		 * We achieve this by letting init_task_group's tasks sit
 		 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
@@ -9097,6 +9098,8 @@ void __init sched_init(void)
 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
 #endif /* SMP */
 
+	perf_counter_init();
+
 	scheduler_running = 1;
 }
 
-- 
cgit v0.10.2


From 1dce8d99b85aba6eddb8b8260baea944922e6fe7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 May 2009 19:23:18 +0200
Subject: perf_counter: convert perf_resource_mutex to a spinlock

Now percpu counters can be initialized very early. But the init
sequence uses mutex_lock(). Fortunately, perf_resource_mutex should
be a spinlock anyway, so convert it.

[ Impact: fix crash due to early init mutex use ]

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fcdafa2..5f86a11 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,9 +46,9 @@ static atomic_t nr_comm_tracking __read_mostly;
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
 
 /*
- * Mutex for (sysadmin-configurable) counter reservations:
+ * Lock for (sysadmin-configurable) counter reservations:
  */
-static DEFINE_MUTEX(perf_resource_mutex);
+static DEFINE_SPINLOCK(perf_resource_lock);
 
 /*
  * Architecture provided APIs - weak aliases:
@@ -3207,9 +3207,9 @@ static void __cpuinit perf_counter_init_cpu(int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	__perf_counter_init_context(&cpuctx->ctx, NULL);
 
-	mutex_lock(&perf_resource_mutex);
+	spin_lock(&perf_resource_lock);
 	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
-	mutex_unlock(&perf_resource_mutex);
+	spin_unlock(&perf_resource_lock);
 
 	hw_perf_counter_setup(cpu);
 }
@@ -3292,7 +3292,7 @@ perf_set_reserve_percpu(struct sysdev_class *class,
 	if (val > perf_max_counters)
 		return -EINVAL;
 
-	mutex_lock(&perf_resource_mutex);
+	spin_lock(&perf_resource_lock);
 	perf_reserved_percpu = val;
 	for_each_online_cpu(cpu) {
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -3302,7 +3302,7 @@ perf_set_reserve_percpu(struct sysdev_class *class,
 		cpuctx->max_pertask = mpt;
 		spin_unlock_irq(&cpuctx->ctx.lock);
 	}
-	mutex_unlock(&perf_resource_mutex);
+	spin_unlock(&perf_resource_lock);
 
 	return count;
 }
@@ -3324,9 +3324,9 @@ perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
 	if (val > 1)
 		return -EINVAL;
 
-	mutex_lock(&perf_resource_mutex);
+	spin_lock(&perf_resource_lock);
 	perf_overcommit = val;
-	mutex_unlock(&perf_resource_mutex);
+	spin_unlock(&perf_resource_lock);
 
 	return count;
 }
-- 
cgit v0.10.2


From 066d7dea32c9bffe6decc0abe465627656cdd84e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 May 2009 19:04:09 +0200
Subject: perf_counter: fix fixed-purpose counter support on v2 Intel-PERFMON

Fixed-purpose counters stopped working in a simple 'perf stat ls' run:

   <not counted>  cache references
   <not counted>  cache misses

Due to:

  ef7b3e0: perf_counter, x86: remove vendor check in fixed_mode_idx()

Which made x86_pmu.num_counters_fixed matter: if it's nonzero, the
fixed-purpose counters are utilized.

But on v2 perfmon this field is not set (despite there being
fixed-purpose PMCs). So add a quirk to set the number of fixed-purpose
counters to at least three.

[ Impact: add quirk for three fixed-purpose counters on certain Intel CPUs ]

Cc: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-28-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 196b58f..a6878b0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -962,7 +962,13 @@ static int intel_pmu_init(void)
 	x86_pmu = intel_pmu;
 	x86_pmu.version = version;
 	x86_pmu.num_counters = eax.split.num_counters;
-	x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
+	 * assume at least 3 counters:
+	 */
+	x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+
 	x86_pmu.counter_bits = eax.split.bit_width;
 	x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
 
-- 
cgit v0.10.2


From fcd274a345875b05c348ba19bc6b3dd48ecbb7d0 Mon Sep 17 00:00:00 2001
From: "Lopez Cruz, Misael" <x0052729@ti.com>
Date: Thu, 30 Apr 2009 21:47:22 -0500
Subject: ASoC: TWL4030: Add VDL analog bypass

This patch adds voice downlink analog bypass switch. It follows
the same approach as in other analog bypass switches.
DAC switch is moved from 'DAC Voice' to 'Analog Voice Playback Mixer',
that will also allow voice DAC to be powered in digital voice
loopback (sidetone).

Signed-off-by: Misael Lopez Cruz <x0052729@ti.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index efa1a80..efb371f 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -468,6 +468,10 @@ static const struct snd_kcontrol_new twl4030_dapm_abypassr2_control =
 static const struct snd_kcontrol_new twl4030_dapm_abypassl2_control =
 	SOC_DAPM_SINGLE("Switch", TWL4030_REG_ARXL2_APGA_CTL, 2, 1, 0);
 
+/* Analog bypass for Voice */
+static const struct snd_kcontrol_new twl4030_dapm_abypassv_control =
+	SOC_DAPM_SINGLE("Switch", TWL4030_REG_VDL_APGA_CTL, 2, 1, 0);
+
 /* Digital bypass gain, 0 mutes the bypass */
 static const unsigned int twl4030_dapm_dbypass_tlv[] = {
 	TLV_DB_RANGE_HEAD(2),
@@ -585,7 +589,7 @@ static int bypass_event(struct snd_soc_dapm_widget *w,
 	struct soc_mixer_control *m =
 		(struct soc_mixer_control *)w->kcontrols->private_value;
 	struct twl4030_priv *twl4030 = w->codec->private_data;
-	unsigned char reg;
+	unsigned char reg, misc;
 
 	reg = twl4030_read_reg_cache(w->codec, m->reg);
 
@@ -597,14 +601,28 @@ static int bypass_event(struct snd_soc_dapm_widget *w,
 		else
 			twl4030->bypass_state &=
 				~(1 << (m->reg - TWL4030_REG_ARXL1_APGA_CTL));
+	} else if (m->reg == TWL4030_REG_VDL_APGA_CTL) {
+		/* Analog voice bypass */
+		if (reg & (1 << m->shift))
+			twl4030->bypass_state |= (1 << 4);
+		else
+			twl4030->bypass_state &= ~(1 << 4);
 	} else {
 		/* Digital bypass */
 		if (reg & (0x7 << m->shift))
-			twl4030->bypass_state |= (1 << (m->shift ? 5 : 4));
+			twl4030->bypass_state |= (1 << (m->shift ? 6 : 5));
 		else
-			twl4030->bypass_state &= ~(1 << (m->shift ? 5 : 4));
+			twl4030->bypass_state &= ~(1 << (m->shift ? 6 : 5));
 	}
 
+	/* Enable master analog loopback mode if any analog switch is enabled*/
+	misc = twl4030_read_reg_cache(w->codec, TWL4030_REG_MISC_SET_1);
+	if (twl4030->bypass_state & 0x1F)
+		misc |= TWL4030_FMLOOP_EN;
+	else
+		misc &= ~TWL4030_FMLOOP_EN;
+	twl4030_write(w->codec, TWL4030_REG_MISC_SET_1, misc);
+
 	if (w->codec->bias_level == SND_SOC_BIAS_STANDBY) {
 		if (twl4030->bypass_state)
 			twl4030_codec_mute(w->codec, 0);
@@ -935,7 +953,7 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_DAC("DAC Left2", "Left Rear Playback",
 			SND_SOC_NOPM, 0, 0),
 	SND_SOC_DAPM_DAC("DAC Voice", "Voice Playback",
-			TWL4030_REG_AVDAC_CTL, 4, 0),
+			SND_SOC_NOPM, 0, 0),
 
 	/* Analog PGAs */
 	SND_SOC_DAPM_PGA("ARXR1_APGA", TWL4030_REG_ARXR1_APGA_CTL,
@@ -962,6 +980,9 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_SWITCH_E("Left2 Analog Loopback", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_abypassl2_control,
 			bypass_event, SND_SOC_DAPM_POST_REG),
+	SND_SOC_DAPM_SWITCH_E("Voice Analog Loopback", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_abypassv_control,
+			bypass_event, SND_SOC_DAPM_POST_REG),
 
 	/* Digital bypasses */
 	SND_SOC_DAPM_SWITCH_E("Left Digital Loopback", SND_SOC_NOPM, 0, 0,
@@ -979,6 +1000,8 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 			2, 0, NULL, 0),
 	SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer", TWL4030_REG_AVDAC_CTL,
 			3, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog Voice Playback Mixer", TWL4030_REG_AVDAC_CTL,
+			4, 0, NULL, 0),
 
 	/* Output MIXER controls */
 	/* Earpiece */
@@ -1067,13 +1090,13 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"Analog R1 Playback Mixer", NULL, "DAC Right1"},
 	{"Analog L2 Playback Mixer", NULL, "DAC Left2"},
 	{"Analog R2 Playback Mixer", NULL, "DAC Right2"},
+	{"Analog Voice Playback Mixer", NULL, "DAC Voice"},
 
 	{"ARXL1_APGA", NULL, "Analog L1 Playback Mixer"},
 	{"ARXR1_APGA", NULL, "Analog R1 Playback Mixer"},
 	{"ARXL2_APGA", NULL, "Analog L2 Playback Mixer"},
 	{"ARXR2_APGA", NULL, "Analog R2 Playback Mixer"},
-
-	{"VDL_APGA", NULL, "DAC Voice"},
+	{"VDL_APGA", NULL, "Analog Voice Playback Mixer"},
 
 	/* Internal playback routings */
 	/* Earpiece */
@@ -1169,11 +1192,13 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"Left1 Analog Loopback", "Switch", "Analog Left Capture Route"},
 	{"Right2 Analog Loopback", "Switch", "Analog Right Capture Route"},
 	{"Left2 Analog Loopback", "Switch", "Analog Left Capture Route"},
+	{"Voice Analog Loopback", "Switch", "Analog Left Capture Route"},
 
 	{"Analog R1 Playback Mixer", NULL, "Right1 Analog Loopback"},
 	{"Analog L1 Playback Mixer", NULL, "Left1 Analog Loopback"},
 	{"Analog R2 Playback Mixer", NULL, "Right2 Analog Loopback"},
 	{"Analog L2 Playback Mixer", NULL, "Left2 Analog Loopback"},
+	{"Analog Voice Playback Mixer", NULL, "Voice Analog Loopback"},
 
 	/* Digital bypass routes */
 	{"Right Digital Loopback", "Volume", "TX1 Capture Route"},
-- 
cgit v0.10.2


From ee8f6894f358b6a04d8190fd78990749de98a498 Mon Sep 17 00:00:00 2001
From: "Lopez Cruz, Misael" <x0052729@ti.com>
Date: Thu, 30 Apr 2009 21:48:08 -0500
Subject: ASoC: TWL4030: Add voice digital loopback: sidetone

This patch add voice digital loopback (sidetone) to the twl4030
driver. It mixes voice uplink attenuated (by sidetone gain) with
voice downlink when the codec is working in option2 (voice/audio
mode).

Signed-off-by: Misael Lopez Cruz <x0052729@ti.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index efb371f..23bae74 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -491,6 +491,18 @@ static const struct snd_kcontrol_new twl4030_dapm_dbypassr_control =
 			TWL4030_REG_ATX2ARXPGA, 0, 7, 0,
 			twl4030_dapm_dbypass_tlv);
 
+/*
+ * Voice Sidetone GAIN volume control:
+ * from -51 to -10 dB in 1 dB steps (mute instead of -51 dB)
+ */
+static DECLARE_TLV_DB_SCALE(twl4030_dapm_dbypassv_tlv, -5100, 100, 1);
+
+/* Digital bypass voice: sidetone (VUL -> VDL)*/
+static const struct snd_kcontrol_new twl4030_dapm_dbypassv_control =
+	SOC_DAPM_SINGLE_TLV("Volume",
+			TWL4030_REG_VSTPGA, 0, 0x29, 0,
+			twl4030_dapm_dbypassv_tlv);
+
 static int micpath_event(struct snd_soc_dapm_widget *w,
 	struct snd_kcontrol *kcontrol, int event)
 {
@@ -607,12 +619,18 @@ static int bypass_event(struct snd_soc_dapm_widget *w,
 			twl4030->bypass_state |= (1 << 4);
 		else
 			twl4030->bypass_state &= ~(1 << 4);
+	} else if (m->reg == TWL4030_REG_VSTPGA) {
+		/* Voice digital bypass */
+		if (reg)
+			twl4030->bypass_state |= (1 << 5);
+		else
+			twl4030->bypass_state &= ~(1 << 5);
 	} else {
 		/* Digital bypass */
 		if (reg & (0x7 << m->shift))
-			twl4030->bypass_state |= (1 << (m->shift ? 6 : 5));
+			twl4030->bypass_state |= (1 << (m->shift ? 7 : 6));
 		else
-			twl4030->bypass_state &= ~(1 << (m->shift ? 6 : 5));
+			twl4030->bypass_state &= ~(1 << (m->shift ? 7 : 6));
 	}
 
 	/* Enable master analog loopback mode if any analog switch is enabled*/
@@ -991,6 +1009,9 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_SWITCH_E("Right Digital Loopback", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_dbypassr_control, bypass_event,
 			SND_SOC_DAPM_POST_REG),
+	SND_SOC_DAPM_SWITCH_E("Voice Digital Loopback", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_dbypassv_control, bypass_event,
+			SND_SOC_DAPM_POST_REG),
 
 	SND_SOC_DAPM_MIXER("Analog R1 Playback Mixer", TWL4030_REG_AVDAC_CTL,
 			0, 0, NULL, 0),
@@ -1203,9 +1224,11 @@ static const struct snd_soc_dapm_route intercon[] = {
 	/* Digital bypass routes */
 	{"Right Digital Loopback", "Volume", "TX1 Capture Route"},
 	{"Left Digital Loopback", "Volume", "TX1 Capture Route"},
+	{"Voice Digital Loopback", "Volume", "TX2 Capture Route"},
 
 	{"Analog R2 Playback Mixer", NULL, "Right Digital Loopback"},
 	{"Analog L2 Playback Mixer", NULL, "Left Digital Loopback"},
+	{"Analog Voice Playback Mixer", NULL, "Voice Digital Loopback"},
 
 };
 
-- 
cgit v0.10.2


From a195b51bc5abc745f12b7b2fe0e3422f55c1953f Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Mon, 4 May 2009 14:54:11 +0000
Subject: ASoC: IMote2 ASoC Support

This patch adds the ASoC side of the board support for the Crossbow
IMB400 daughter board.

Thanks to Crossbow for considerable assistance.

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index ad8a10f..eb75a1c 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -134,3 +134,12 @@ config SND_PXA2XX_SOC_MIOA701
         help
           Say Y if you want to add support for SoC audio on the
           MIO A701.
+
+config SND_PXA2XX_SOC_IMOTE2
+       tristate "SoC Audio support for IMote 2"
+       depends on SND_PXA2XX_SOC && MACH_INTELMOTE2
+       select SND_PXA2XX_SOC_I2S
+       select SND_SOC_WM8940
+       help
+         Say Y if you want to add support for SoC audio on the
+	 IMote 2.
diff --git a/sound/soc/pxa/Makefile b/sound/soc/pxa/Makefile
index 4b90c3c..6e096b4 100644
--- a/sound/soc/pxa/Makefile
+++ b/sound/soc/pxa/Makefile
@@ -22,6 +22,7 @@ snd-soc-palm27x-objs := palm27x.o
 snd-soc-zylonite-objs := zylonite.o
 snd-soc-magician-objs := magician.o
 snd-soc-mioa701-objs := mioa701_wm9713.o
+snd-soc-imote2-objs := imote2.o
 
 obj-$(CONFIG_SND_PXA2XX_SOC_CORGI) += snd-soc-corgi.o
 obj-$(CONFIG_SND_PXA2XX_SOC_POODLE) += snd-soc-poodle.o
@@ -35,3 +36,4 @@ obj-$(CONFIG_SND_PXA2XX_SOC_PALM27X) += snd-soc-palm27x.o
 obj-$(CONFIG_SND_PXA2XX_SOC_MAGICIAN) += snd-soc-magician.o
 obj-$(CONFIG_SND_PXA2XX_SOC_MIOA701) += snd-soc-mioa701.o
 obj-$(CONFIG_SND_SOC_ZYLONITE) += snd-soc-zylonite.o
+obj-$(CONFIG_SND_PXA2XX_SOC_IMOTE2) += snd-soc-imote2.o
diff --git a/sound/soc/pxa/imote2.c b/sound/soc/pxa/imote2.c
new file mode 100644
index 0000000..405587a
--- /dev/null
+++ b/sound/soc/pxa/imote2.c
@@ -0,0 +1,114 @@
+
+#include <linux/module.h>
+#include <sound/soc.h>
+
+#include <asm/mach-types.h>
+
+#include "../codecs/wm8940.h"
+#include "pxa2xx-i2s.h"
+#include "pxa2xx-pcm.h"
+
+static int imote2_asoc_hw_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	unsigned int clk = 0;
+	int ret;
+
+	switch (params_rate(params)) {
+	case 8000:
+	case 16000:
+	case 48000:
+	case 96000:
+		clk = 12288000;
+		break;
+	case 11025:
+	case 22050:
+	case 44100:
+		clk = 11289600;
+		break;
+	}
+
+	/* set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai, SND_SOC_DAIFMT_I2S
+				  | SND_SOC_DAIFMT_NB_NF
+				  | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0)
+		return ret;
+
+	/* CPU should be clock master */
+	ret = snd_soc_dai_set_fmt(cpu_dai,  SND_SOC_DAIFMT_I2S
+				  | SND_SOC_DAIFMT_NB_NF
+				  | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0)
+		return ret;
+
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, clk,
+				     SND_SOC_CLOCK_IN);
+	if (ret < 0)
+		return ret;
+
+	/* set the I2S system clock as input (unused) */
+	ret = snd_soc_dai_set_sysclk(cpu_dai, PXA2XX_I2S_SYSCLK, clk,
+		SND_SOC_CLOCK_OUT);
+
+	return ret;
+}
+
+static struct snd_soc_ops imote2_asoc_ops = {
+	.hw_params = imote2_asoc_hw_params,
+};
+
+static struct snd_soc_dai_link imote2_dai = {
+	.name = "WM8940",
+	.stream_name = "WM8940",
+	.cpu_dai = &pxa_i2s_dai,
+	.codec_dai = &wm8940_dai,
+	.ops = &imote2_asoc_ops,
+};
+
+static struct snd_soc_card snd_soc_imote2 = {
+	.name = "Imote2",
+	.platform = &pxa2xx_soc_platform,
+	.dai_link = &imote2_dai,
+	.num_links = 1,
+};
+
+static struct snd_soc_device imote2_snd_devdata = {
+	.card = &snd_soc_imote2,
+	.codec_dev = &soc_codec_dev_wm8940,
+};
+
+static struct platform_device *imote2_snd_device;
+
+static int __init imote2_asoc_init(void)
+{
+	int ret;
+
+	if (!machine_is_intelmote2())
+		return -ENODEV;
+	imote2_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!imote2_snd_device)
+		return -ENOMEM;
+
+	platform_set_drvdata(imote2_snd_device, &imote2_snd_devdata);
+	imote2_snd_devdata.dev = &imote2_snd_device->dev;
+	ret = platform_device_add(imote2_snd_device);
+	if (ret)
+		platform_device_put(imote2_snd_device);
+
+	return ret;
+}
+module_init(imote2_asoc_init);
+
+static void __exit imote2_asoc_exit(void)
+{
+	platform_device_unregister(imote2_snd_device);
+}
+module_exit(imote2_asoc_exit);
+
+MODULE_AUTHOR("Jonathan Cameron");
+MODULE_DESCRIPTION("ALSA SoC Imote 2");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 376f7839b72ec526173cafb5d8eadfc61e2effdf Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Tue, 5 May 2009 08:55:47 +0300
Subject: ASoC: TWL4030: Add VIBRA output

This patch adds support for the VIBRA output on TWL4030 codec.
The VIBRA output can be driven with audio data or with
local vibrator driver.
Add the needed DAPM elements and routes for the VIBRA output and
controls for the VIBRA driver configuration.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 23bae74..1a00e4b 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -396,6 +396,31 @@ static const struct soc_enum twl4030_handsfreer_enum =
 static const struct snd_kcontrol_new twl4030_dapm_handsfreer_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreer_enum);
 
+/* Vibra */
+/* Vibra audio path selection */
+static const char *twl4030_vibra_texts[] =
+		{"AudioL1", "AudioR1", "AudioL2", "AudioR2"};
+
+static const struct soc_enum twl4030_vibra_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 2,
+			ARRAY_SIZE(twl4030_vibra_texts),
+			twl4030_vibra_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_vibra_control =
+SOC_DAPM_ENUM("Route", twl4030_vibra_enum);
+
+/* Vibra path selection: local vibrator (PWM) or audio driven */
+static const char *twl4030_vibrapath_texts[] =
+		{"Local vibrator", "Audio"};
+
+static const struct soc_enum twl4030_vibrapath_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 4,
+			ARRAY_SIZE(twl4030_vibrapath_texts),
+			twl4030_vibrapath_texts);
+
+static const struct snd_kcontrol_new twl4030_dapm_vibrapath_control =
+SOC_DAPM_ENUM("Route", twl4030_vibrapath_enum);
+
 /* Left analog microphone selection */
 static const char *twl4030_analoglmic_texts[] =
 		{"Off", "Main mic", "Headset mic", "AUXL", "Carkit mic"};
@@ -867,6 +892,26 @@ static const struct soc_enum twl4030_rampdelay_enum =
 			ARRAY_SIZE(twl4030_rampdelay_texts),
 			twl4030_rampdelay_texts);
 
+/* Vibra H-bridge direction mode */
+static const char *twl4030_vibradirmode_texts[] = {
+	"Vibra H-bridge direction", "Audio data MSB",
+};
+
+static const struct soc_enum twl4030_vibradirmode_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 5,
+			ARRAY_SIZE(twl4030_vibradirmode_texts),
+			twl4030_vibradirmode_texts);
+
+/* Vibra H-bridge direction */
+static const char *twl4030_vibradir_texts[] = {
+	"Positive polarity", "Negative polarity",
+};
+
+static const struct soc_enum twl4030_vibradir_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_VIBRA_CTL, 1,
+			ARRAY_SIZE(twl4030_vibradir_texts),
+			twl4030_vibradir_texts);
+
 static const struct snd_kcontrol_new twl4030_snd_controls[] = {
 	/* Common playback gain controls */
 	SOC_DOUBLE_R_TLV("DAC1 Digital Fine Playback Volume",
@@ -933,6 +978,9 @@ static const struct snd_kcontrol_new twl4030_snd_controls[] = {
 		0, 3, 5, 0, input_gain_tlv),
 
 	SOC_ENUM("HS ramp delay", twl4030_rampdelay_enum),
+
+	SOC_ENUM("Vibra H-bridge mode", twl4030_vibradirmode_enum),
+	SOC_ENUM("Vibra H-bridge direction", twl4030_vibradir_enum),
 };
 
 static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
@@ -960,6 +1008,7 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_OUTPUT("CARKITR"),
 	SND_SOC_DAPM_OUTPUT("HFL"),
 	SND_SOC_DAPM_OUTPUT("HFR"),
+	SND_SOC_DAPM_OUTPUT("VIBRA"),
 
 	/* DACs */
 	SND_SOC_DAPM_DAC("DAC Right1", "Right Front Playback",
@@ -1060,6 +1109,11 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_MUX_E("HandsfreeR Mux", TWL4030_REG_HFR_CTL, 5, 0,
 		&twl4030_dapm_handsfreer_control, handsfree_event,
 		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	/* Vibra */
+	SND_SOC_DAPM_MUX("Vibra Mux", TWL4030_REG_VIBRA_CTL, 0, 0,
+		&twl4030_dapm_vibra_control),
+	SND_SOC_DAPM_MUX("Vibra Route", SND_SOC_NOPM, 0, 0,
+		&twl4030_dapm_vibrapath_control),
 
 	/* Introducing four virtual ADC, since TWL4030 have four channel for
 	   capture */
@@ -1161,6 +1215,11 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"HandsfreeR Mux", "AudioR1", "ARXR1_APGA"},
 	{"HandsfreeR Mux", "AudioR2", "ARXR2_APGA"},
 	{"HandsfreeR Mux", "AudioL2", "ARXL2_APGA"},
+	/* Vibra */
+	{"Vibra Mux", "AudioL1", "DAC Left1"},
+	{"Vibra Mux", "AudioR1", "DAC Right1"},
+	{"Vibra Mux", "AudioL2", "DAC Left2"},
+	{"Vibra Mux", "AudioR2", "DAC Right2"},
 
 	/* outputs */
 	{"OUTL", NULL, "ARXL2_APGA"},
@@ -1174,6 +1233,8 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"CARKITR", NULL, "CarkitR Mixer"},
 	{"HFL", NULL, "HandsfreeL Mux"},
 	{"HFR", NULL, "HandsfreeR Mux"},
+	{"Vibra Route", "Audio", "Vibra Mux"},
+	{"VIBRA", NULL, "Vibra Route"},
 
 	/* Capture path */
 	{"Analog Left Capture Route", "Main mic", "MAINMIC"},
-- 
cgit v0.10.2


From bbd993077d788589a86a718ba7a7895ba5e71a17 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 May 2009 10:27:38 +0100
Subject: ASoC: Remove redundant codec pointer from DAIs

The DAI structure has two pointers to the codec, one in the body of the
DAI and one in a union for a parent pointer.  Drop the parent pointer
version.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index a997c2c..496dc30 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -205,11 +205,8 @@ struct snd_soc_dai {
 	/* DAI private data */
 	void *private_data;
 
-	/* parent codec/platform */
-	union {
-		struct snd_soc_codec *codec;
-		struct snd_soc_platform *platform;
-	};
+	/* parent platform */
+	struct snd_soc_platform *platform;
 
 	struct list_head list;
 };
-- 
cgit v0.10.2


From a9dcad5e375800fcb07f7617dba23b3aade8f09d Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Mon, 26 Jan 2009 15:13:40 +0200
Subject: omap iommu: tlb and pagetable primitives

This patch provides:

- iotlb_*()     : iommu tlb operations
- iopgtable_*() : iommu pagetable(twl) operations
- iommu_*()     : the other generic operations

and the entry points to register and acquire iommu object.

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/plat-omap/include/mach/iommu.h b/arch/arm/plat-omap/include/mach/iommu.h
new file mode 100644
index 0000000..769b00b
--- /dev/null
+++ b/arch/arm/plat-omap/include/mach/iommu.h
@@ -0,0 +1,168 @@
+/*
+ * omap iommu: main structures
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MACH_IOMMU_H
+#define __MACH_IOMMU_H
+
+struct iotlb_entry {
+	u32 da;
+	u32 pa;
+	u32 pgsz, prsvd, valid;
+	union {
+		u16 ap;
+		struct {
+			u32 endian, elsz, mixed;
+		};
+	};
+};
+
+struct iommu {
+	const char	*name;
+	struct module	*owner;
+	struct clk	*clk;
+	void __iomem	*regbase;
+	struct device	*dev;
+
+	unsigned int	refcount;
+	struct mutex	iommu_lock;	/* global for this whole object */
+
+	/*
+	 * We don't change iopgd for a situation like pgd for a task,
+	 * but share it globally for each iommu.
+	 */
+	u32		*iopgd;
+	spinlock_t	page_table_lock; /* protect iopgd */
+
+	int		nr_tlb_entries;
+
+	struct list_head	mmap;
+	struct mutex		mmap_lock; /* protect mmap */
+
+	int (*isr)(struct iommu *obj);
+
+	void *ctx; /* iommu context: registres saved area */
+};
+
+struct cr_regs {
+	union {
+		struct {
+			u16 cam_l;
+			u16 cam_h;
+		};
+		u32 cam;
+	};
+	union {
+		struct {
+			u16 ram_l;
+			u16 ram_h;
+		};
+		u32 ram;
+	};
+};
+
+struct iotlb_lock {
+	short base;
+	short vict;
+};
+
+/* architecture specific functions */
+struct iommu_functions {
+	unsigned long	version;
+
+	int (*enable)(struct iommu *obj);
+	void (*disable)(struct iommu *obj);
+	u32 (*fault_isr)(struct iommu *obj, u32 *ra);
+
+	void (*tlb_read_cr)(struct iommu *obj, struct cr_regs *cr);
+	void (*tlb_load_cr)(struct iommu *obj, struct cr_regs *cr);
+
+	struct cr_regs *(*alloc_cr)(struct iommu *obj, struct iotlb_entry *e);
+	int (*cr_valid)(struct cr_regs *cr);
+	u32 (*cr_to_virt)(struct cr_regs *cr);
+	void (*cr_to_e)(struct cr_regs *cr, struct iotlb_entry *e);
+	ssize_t (*dump_cr)(struct iommu *obj, struct cr_regs *cr, char *buf);
+
+	u32 (*get_pte_attr)(struct iotlb_entry *e);
+
+	void (*save_ctx)(struct iommu *obj);
+	void (*restore_ctx)(struct iommu *obj);
+	ssize_t (*dump_ctx)(struct iommu *obj, char *buf);
+};
+
+struct iommu_platform_data {
+	const char *name;
+	const char *clk_name;
+	const int nr_tlb_entries;
+};
+
+#if defined(CONFIG_ARCH_OMAP1)
+#error "iommu for this processor not implemented yet"
+#else
+#include <mach/iommu2.h>
+#endif
+
+/*
+ * utilities for super page(16MB, 1MB, 64KB and 4KB)
+ */
+
+#define iopgsz_max(bytes)			\
+	(((bytes) >= SZ_16M) ? SZ_16M :		\
+	 ((bytes) >= SZ_1M)  ? SZ_1M  :		\
+	 ((bytes) >= SZ_64K) ? SZ_64K :		\
+	 ((bytes) >= SZ_4K)  ? SZ_4K  :	0)
+
+#define bytes_to_iopgsz(bytes)				\
+	(((bytes) == SZ_16M) ? MMU_CAM_PGSZ_16M :	\
+	 ((bytes) == SZ_1M)  ? MMU_CAM_PGSZ_1M  :	\
+	 ((bytes) == SZ_64K) ? MMU_CAM_PGSZ_64K :	\
+	 ((bytes) == SZ_4K)  ? MMU_CAM_PGSZ_4K  : -1)
+
+#define iopgsz_to_bytes(iopgsz)				\
+	(((iopgsz) == MMU_CAM_PGSZ_16M)	? SZ_16M :	\
+	 ((iopgsz) == MMU_CAM_PGSZ_1M)	? SZ_1M  :	\
+	 ((iopgsz) == MMU_CAM_PGSZ_64K)	? SZ_64K :	\
+	 ((iopgsz) == MMU_CAM_PGSZ_4K)	? SZ_4K  : 0)
+
+#define iopgsz_ok(bytes) (bytes_to_iopgsz(bytes) >= 0)
+
+/*
+ * global functions
+ */
+extern u32 iommu_arch_version(void);
+
+extern void iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e);
+extern u32 iotlb_cr_to_virt(struct cr_regs *cr);
+
+extern int load_iotlb_entry(struct iommu *obj, struct iotlb_entry *e);
+extern void flush_iotlb_page(struct iommu *obj, u32 da);
+extern void flush_iotlb_range(struct iommu *obj, u32 start, u32 end);
+extern void flush_iotlb_all(struct iommu *obj);
+
+extern int iopgtable_store_entry(struct iommu *obj, struct iotlb_entry *e);
+extern size_t iopgtable_clear_entry(struct iommu *obj, u32 iova);
+
+extern struct iommu *iommu_get(const char *name);
+extern void iommu_put(struct iommu *obj);
+
+extern void iommu_save_ctx(struct iommu *obj);
+extern void iommu_restore_ctx(struct iommu *obj);
+
+extern int install_iommu_arch(const struct iommu_functions *ops);
+extern void uninstall_iommu_arch(const struct iommu_functions *ops);
+
+extern int foreach_iommu_device(void *data,
+				int (*fn)(struct device *, void *));
+
+extern ssize_t iommu_dump_ctx(struct iommu *obj, char *buf);
+extern size_t dump_tlb_entries(struct iommu *obj, char *buf);
+
+#endif /* __MACH_IOMMU_H */
diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c
new file mode 100644
index 0000000..4cf449f
--- /dev/null
+++ b/arch/arm/plat-omap/iommu.c
@@ -0,0 +1,996 @@
+/*
+ * omap iommu: tlb and pagetable primitives
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
+ *		Paul Mundt and Toshihiro Kobayashi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include <asm/cacheflush.h>
+
+#include <mach/iommu.h>
+
+#include "iopgtable.h"
+
+/* accommodate the difference between omap1 and omap2/3 */
+static const struct iommu_functions *arch_iommu;
+
+static struct platform_driver omap_iommu_driver;
+static struct kmem_cache *iopte_cachep;
+
+/**
+ * install_iommu_arch - Install archtecure specific iommu functions
+ * @ops:	a pointer to architecture specific iommu functions
+ *
+ * There are several kind of iommu algorithm(tlb, pagetable) among
+ * omap series. This interface installs such an iommu algorighm.
+ **/
+int install_iommu_arch(const struct iommu_functions *ops)
+{
+	if (arch_iommu)
+		return -EBUSY;
+
+	arch_iommu = ops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(install_iommu_arch);
+
+/**
+ * uninstall_iommu_arch - Uninstall archtecure specific iommu functions
+ * @ops:	a pointer to architecture specific iommu functions
+ *
+ * This interface uninstalls the iommu algorighm installed previously.
+ **/
+void uninstall_iommu_arch(const struct iommu_functions *ops)
+{
+	if (arch_iommu != ops)
+		pr_err("%s: not your arch\n", __func__);
+
+	arch_iommu = NULL;
+}
+EXPORT_SYMBOL_GPL(uninstall_iommu_arch);
+
+/**
+ * iommu_save_ctx - Save registers for pm off-mode support
+ * @obj:	target iommu
+ **/
+void iommu_save_ctx(struct iommu *obj)
+{
+	arch_iommu->save_ctx(obj);
+}
+EXPORT_SYMBOL_GPL(iommu_save_ctx);
+
+/**
+ * iommu_restore_ctx - Restore registers for pm off-mode support
+ * @obj:	target iommu
+ **/
+void iommu_restore_ctx(struct iommu *obj)
+{
+	arch_iommu->restore_ctx(obj);
+}
+EXPORT_SYMBOL_GPL(iommu_restore_ctx);
+
+/**
+ * iommu_arch_version - Return running iommu arch version
+ **/
+u32 iommu_arch_version(void)
+{
+	return arch_iommu->version;
+}
+EXPORT_SYMBOL_GPL(iommu_arch_version);
+
+static int iommu_enable(struct iommu *obj)
+{
+	int err;
+
+	if (!obj)
+		return -EINVAL;
+
+	clk_enable(obj->clk);
+
+	err = arch_iommu->enable(obj);
+
+	clk_disable(obj->clk);
+	return err;
+}
+
+static void iommu_disable(struct iommu *obj)
+{
+	if (!obj)
+		return;
+
+	clk_enable(obj->clk);
+
+	arch_iommu->disable(obj);
+
+	clk_disable(obj->clk);
+}
+
+/*
+ *	TLB operations
+ */
+void iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
+{
+	BUG_ON(!cr || !e);
+
+	arch_iommu->cr_to_e(cr, e);
+}
+EXPORT_SYMBOL_GPL(iotlb_cr_to_e);
+
+static inline int iotlb_cr_valid(struct cr_regs *cr)
+{
+	if (!cr)
+		return -EINVAL;
+
+	return arch_iommu->cr_valid(cr);
+}
+
+static inline struct cr_regs *iotlb_alloc_cr(struct iommu *obj,
+					     struct iotlb_entry *e)
+{
+	if (!e)
+		return NULL;
+
+	return arch_iommu->alloc_cr(obj, e);
+}
+
+u32 iotlb_cr_to_virt(struct cr_regs *cr)
+{
+	return arch_iommu->cr_to_virt(cr);
+}
+EXPORT_SYMBOL_GPL(iotlb_cr_to_virt);
+
+static u32 get_iopte_attr(struct iotlb_entry *e)
+{
+	return arch_iommu->get_pte_attr(e);
+}
+
+static u32 iommu_report_fault(struct iommu *obj, u32 *da)
+{
+	return arch_iommu->fault_isr(obj, da);
+}
+
+static void iotlb_lock_get(struct iommu *obj, struct iotlb_lock *l)
+{
+	u32 val;
+
+	val = iommu_read_reg(obj, MMU_LOCK);
+
+	l->base = MMU_LOCK_BASE(val);
+	l->vict = MMU_LOCK_VICT(val);
+
+	BUG_ON(l->base != 0); /* Currently no preservation is used */
+}
+
+static void iotlb_lock_set(struct iommu *obj, struct iotlb_lock *l)
+{
+	u32 val;
+
+	BUG_ON(l->base != 0); /* Currently no preservation is used */
+
+	val = (l->base << MMU_LOCK_BASE_SHIFT);
+	val |= (l->vict << MMU_LOCK_VICT_SHIFT);
+
+	iommu_write_reg(obj, val, MMU_LOCK);
+}
+
+static void iotlb_read_cr(struct iommu *obj, struct cr_regs *cr)
+{
+	arch_iommu->tlb_read_cr(obj, cr);
+}
+
+static void iotlb_load_cr(struct iommu *obj, struct cr_regs *cr)
+{
+	arch_iommu->tlb_load_cr(obj, cr);
+
+	iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
+	iommu_write_reg(obj, 1, MMU_LD_TLB);
+}
+
+/**
+ * iotlb_dump_cr - Dump an iommu tlb entry into buf
+ * @obj:	target iommu
+ * @cr:		contents of cam and ram register
+ * @buf:	output buffer
+ **/
+static inline ssize_t iotlb_dump_cr(struct iommu *obj, struct cr_regs *cr,
+				    char *buf)
+{
+	BUG_ON(!cr || !buf);
+
+	return arch_iommu->dump_cr(obj, cr, buf);
+}
+
+/**
+ * load_iotlb_entry - Set an iommu tlb entry
+ * @obj:	target iommu
+ * @e:		an iommu tlb entry info
+ **/
+int load_iotlb_entry(struct iommu *obj, struct iotlb_entry *e)
+{
+	int i;
+	int err = 0;
+	struct iotlb_lock l;
+	struct cr_regs *cr;
+
+	if (!obj || !obj->nr_tlb_entries || !e)
+		return -EINVAL;
+
+	clk_enable(obj->clk);
+
+	for (i = 0; i < obj->nr_tlb_entries; i++) {
+		struct cr_regs tmp;
+
+		iotlb_lock_get(obj, &l);
+		l.vict = i;
+		iotlb_lock_set(obj, &l);
+		iotlb_read_cr(obj, &tmp);
+		if (!iotlb_cr_valid(&tmp))
+			break;
+	}
+
+	if (i == obj->nr_tlb_entries) {
+		dev_dbg(obj->dev, "%s: full: no entry\n", __func__);
+		err = -EBUSY;
+		goto out;
+	}
+
+	cr = iotlb_alloc_cr(obj, e);
+	if (IS_ERR(cr)) {
+		clk_disable(obj->clk);
+		return PTR_ERR(cr);
+	}
+
+	iotlb_load_cr(obj, cr);
+	kfree(cr);
+
+	/* increment victim for next tlb load */
+	if (++l.vict == obj->nr_tlb_entries)
+		l.vict = 0;
+	iotlb_lock_set(obj, &l);
+out:
+	clk_disable(obj->clk);
+	return err;
+}
+EXPORT_SYMBOL_GPL(load_iotlb_entry);
+
+/**
+ * flush_iotlb_page - Clear an iommu tlb entry
+ * @obj:	target iommu
+ * @da:		iommu device virtual address
+ *
+ * Clear an iommu tlb entry which includes 'da' address.
+ **/
+void flush_iotlb_page(struct iommu *obj, u32 da)
+{
+	struct iotlb_lock l;
+	int i;
+
+	clk_enable(obj->clk);
+
+	for (i = 0; i < obj->nr_tlb_entries; i++) {
+		struct cr_regs cr;
+		u32 start;
+		size_t bytes;
+
+		iotlb_lock_get(obj, &l);
+		l.vict = i;
+		iotlb_lock_set(obj, &l);
+		iotlb_read_cr(obj, &cr);
+		if (!iotlb_cr_valid(&cr))
+			continue;
+
+		start = iotlb_cr_to_virt(&cr);
+		bytes = iopgsz_to_bytes(cr.cam & 3);
+
+		if ((start <= da) && (da < start + bytes)) {
+			dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n",
+				__func__, start, da, bytes);
+
+			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
+		}
+	}
+	clk_disable(obj->clk);
+
+	if (i == obj->nr_tlb_entries)
+		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
+}
+EXPORT_SYMBOL_GPL(flush_iotlb_page);
+
+/**
+ * flush_iotlb_range - Clear an iommu tlb entries
+ * @obj:	target iommu
+ * @start:	iommu device virtual address(start)
+ * @end:	iommu device virtual address(end)
+ *
+ * Clear an iommu tlb entry which includes 'da' address.
+ **/
+void flush_iotlb_range(struct iommu *obj, u32 start, u32 end)
+{
+	u32 da = start;
+
+	while (da < end) {
+		flush_iotlb_page(obj, da);
+		/* FIXME: Optimize for multiple page size */
+		da += IOPTE_SIZE;
+	}
+}
+EXPORT_SYMBOL_GPL(flush_iotlb_range);
+
+/**
+ * flush_iotlb_all - Clear all iommu tlb entries
+ * @obj:	target iommu
+ **/
+void flush_iotlb_all(struct iommu *obj)
+{
+	struct iotlb_lock l;
+
+	clk_enable(obj->clk);
+
+	l.base = 0;
+	l.vict = 0;
+	iotlb_lock_set(obj, &l);
+
+	iommu_write_reg(obj, 1, MMU_GFLUSH);
+
+	clk_disable(obj->clk);
+}
+EXPORT_SYMBOL_GPL(flush_iotlb_all);
+
+#if defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
+
+ssize_t iommu_dump_ctx(struct iommu *obj, char *buf)
+{
+	ssize_t bytes;
+
+	if (!obj || !buf)
+		return -EINVAL;
+
+	clk_enable(obj->clk);
+
+	bytes = arch_iommu->dump_ctx(obj, buf);
+
+	clk_disable(obj->clk);
+
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(iommu_dump_ctx);
+
+static int __dump_tlb_entries(struct iommu *obj, struct cr_regs *crs)
+{
+	int i;
+	struct iotlb_lock saved, l;
+	struct cr_regs *p = crs;
+
+	clk_enable(obj->clk);
+
+	iotlb_lock_get(obj, &saved);
+	memcpy(&l, &saved, sizeof(saved));
+
+	for (i = 0; i < obj->nr_tlb_entries; i++) {
+		struct cr_regs tmp;
+
+		iotlb_lock_get(obj, &l);
+		l.vict = i;
+		iotlb_lock_set(obj, &l);
+		iotlb_read_cr(obj, &tmp);
+		if (!iotlb_cr_valid(&tmp))
+			continue;
+
+		*p++ = tmp;
+	}
+	iotlb_lock_set(obj, &saved);
+	clk_disable(obj->clk);
+
+	return  p - crs;
+}
+
+/**
+ * dump_tlb_entries - dump cr arrays to given buffer
+ * @obj:	target iommu
+ * @buf:	output buffer
+ **/
+size_t dump_tlb_entries(struct iommu *obj, char *buf)
+{
+	int i, n;
+	struct cr_regs *cr;
+	char *p = buf;
+
+	cr = kcalloc(obj->nr_tlb_entries, sizeof(*cr), GFP_KERNEL);
+	if (!cr)
+		return 0;
+
+	n = __dump_tlb_entries(obj, cr);
+	for (i = 0; i < n; i++)
+		p += iotlb_dump_cr(obj, cr + i, p);
+	kfree(cr);
+
+	return p - buf;
+}
+EXPORT_SYMBOL_GPL(dump_tlb_entries);
+
+int foreach_iommu_device(void *data, int (*fn)(struct device *, void *))
+{
+	return driver_for_each_device(&omap_iommu_driver.driver,
+				      NULL, data, fn);
+}
+EXPORT_SYMBOL_GPL(foreach_iommu_device);
+
+#endif /* CONFIG_OMAP_IOMMU_DEBUG_MODULE */
+
+/*
+ *	H/W pagetable operations
+ */
+static void flush_iopgd_range(u32 *first, u32 *last)
+{
+	/* FIXME: L2 cache should be taken care of if it exists */
+	do {
+		asm("mcr	p15, 0, %0, c7, c10, 1 @ flush_pgd"
+		    : : "r" (first));
+		first += L1_CACHE_BYTES / sizeof(*first);
+	} while (first <= last);
+}
+
+static void flush_iopte_range(u32 *first, u32 *last)
+{
+	/* FIXME: L2 cache should be taken care of if it exists */
+	do {
+		asm("mcr	p15, 0, %0, c7, c10, 1 @ flush_pte"
+		    : : "r" (first));
+		first += L1_CACHE_BYTES / sizeof(*first);
+	} while (first <= last);
+}
+
+static void iopte_free(u32 *iopte)
+{
+	/* Note: freed iopte's must be clean ready for re-use */
+	kmem_cache_free(iopte_cachep, iopte);
+}
+
+static u32 *iopte_alloc(struct iommu *obj, u32 *iopgd, u32 da)
+{
+	u32 *iopte;
+
+	/* a table has already existed */
+	if (*iopgd)
+		goto pte_ready;
+
+	/*
+	 * do the allocation outside the page table lock
+	 */
+	spin_unlock(&obj->page_table_lock);
+	iopte = kmem_cache_zalloc(iopte_cachep, GFP_KERNEL);
+	spin_lock(&obj->page_table_lock);
+
+	if (!*iopgd) {
+		if (!iopte)
+			return ERR_PTR(-ENOMEM);
+
+		*iopgd = virt_to_phys(iopte) | IOPGD_TABLE;
+		flush_iopgd_range(iopgd, iopgd);
+
+		dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte);
+	} else {
+		/* We raced, free the reduniovant table */
+		iopte_free(iopte);
+	}
+
+pte_ready:
+	iopte = iopte_offset(iopgd, da);
+
+	dev_vdbg(obj->dev,
+		 "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n",
+		 __func__, da, iopgd, *iopgd, iopte, *iopte);
+
+	return iopte;
+}
+
+static int iopgd_alloc_section(struct iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+
+	*iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION;
+	flush_iopgd_range(iopgd, iopgd);
+	return 0;
+}
+
+static int iopgd_alloc_super(struct iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+	int i;
+
+	for (i = 0; i < 16; i++)
+		*(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER;
+	flush_iopgd_range(iopgd, iopgd + 15);
+	return 0;
+}
+
+static int iopte_alloc_page(struct iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+	u32 *iopte = iopte_alloc(obj, iopgd, da);
+
+	if (IS_ERR(iopte))
+		return PTR_ERR(iopte);
+
+	*iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL;
+	flush_iopte_range(iopte, iopte);
+
+	dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n",
+		 __func__, da, pa, iopte, *iopte);
+
+	return 0;
+}
+
+static int iopte_alloc_large(struct iommu *obj, u32 da, u32 pa, u32 prot)
+{
+	u32 *iopgd = iopgd_offset(obj, da);
+	u32 *iopte = iopte_alloc(obj, iopgd, da);
+	int i;
+
+	if (IS_ERR(iopte))
+		return PTR_ERR(iopte);
+
+	for (i = 0; i < 16; i++)
+		*(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE;
+	flush_iopte_range(iopte, iopte + 15);
+	return 0;
+}
+
+static int iopgtable_store_entry_core(struct iommu *obj, struct iotlb_entry *e)
+{
+	int (*fn)(struct iommu *, u32, u32, u32);
+	u32 prot;
+	int err;
+
+	if (!obj || !e)
+		return -EINVAL;
+
+	switch (e->pgsz) {
+	case MMU_CAM_PGSZ_16M:
+		fn = iopgd_alloc_super;
+		break;
+	case MMU_CAM_PGSZ_1M:
+		fn = iopgd_alloc_section;
+		break;
+	case MMU_CAM_PGSZ_64K:
+		fn = iopte_alloc_large;
+		break;
+	case MMU_CAM_PGSZ_4K:
+		fn = iopte_alloc_page;
+		break;
+	default:
+		fn = NULL;
+		BUG();
+		break;
+	}
+
+	prot = get_iopte_attr(e);
+
+	spin_lock(&obj->page_table_lock);
+	err = fn(obj, e->da, e->pa, prot);
+	spin_unlock(&obj->page_table_lock);
+
+	return err;
+}
+
+/**
+ * iopgtable_store_entry - Make an iommu pte entry
+ * @obj:	target iommu
+ * @e:		an iommu tlb entry info
+ **/
+int iopgtable_store_entry(struct iommu *obj, struct iotlb_entry *e)
+{
+	int err;
+
+	flush_iotlb_page(obj, e->da);
+	err = iopgtable_store_entry_core(obj, e);
+#ifdef PREFETCH_IOTLB
+	if (!err)
+		load_iotlb_entry(obj, e);
+#endif
+	return err;
+}
+EXPORT_SYMBOL_GPL(iopgtable_store_entry);
+
+/**
+ * iopgtable_lookup_entry - Lookup an iommu pte entry
+ * @obj:	target iommu
+ * @da:		iommu device virtual address
+ * @ppgd:	iommu pgd entry pointer to be returned
+ * @ppte:	iommu pte entry pointer to be returned
+ **/
+void iopgtable_lookup_entry(struct iommu *obj, u32 da, u32 **ppgd, u32 **ppte)
+{
+	u32 *iopgd, *iopte = NULL;
+
+	iopgd = iopgd_offset(obj, da);
+	if (!*iopgd)
+		goto out;
+
+	if (*iopgd & IOPGD_TABLE)
+		iopte = iopte_offset(iopgd, da);
+out:
+	*ppgd = iopgd;
+	*ppte = iopte;
+}
+EXPORT_SYMBOL_GPL(iopgtable_lookup_entry);
+
+static size_t iopgtable_clear_entry_core(struct iommu *obj, u32 da)
+{
+	size_t bytes;
+	u32 *iopgd = iopgd_offset(obj, da);
+	int nent = 1;
+
+	if (!*iopgd)
+		return 0;
+
+	if (*iopgd & IOPGD_TABLE) {
+		int i;
+		u32 *iopte = iopte_offset(iopgd, da);
+
+		bytes = IOPTE_SIZE;
+		if (*iopte & IOPTE_LARGE) {
+			nent *= 16;
+			/* rewind to the 1st entry */
+			iopte = (u32 *)((u32)iopte & IOLARGE_MASK);
+		}
+		bytes *= nent;
+		memset(iopte, 0, nent * sizeof(*iopte));
+		flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte));
+
+		/*
+		 * do table walk to check if this table is necessary or not
+		 */
+		iopte = iopte_offset(iopgd, 0);
+		for (i = 0; i < PTRS_PER_IOPTE; i++)
+			if (iopte[i])
+				goto out;
+
+		iopte_free(iopte);
+		nent = 1; /* for the next L1 entry */
+	} else {
+		bytes = IOPGD_SIZE;
+		if (*iopgd & IOPGD_SUPER) {
+			nent *= 16;
+			/* rewind to the 1st entry */
+			iopgd = (u32 *)((u32)iopgd & IOSUPER_MASK);
+		}
+		bytes *= nent;
+	}
+	memset(iopgd, 0, nent * sizeof(*iopgd));
+	flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd));
+out:
+	return bytes;
+}
+
+/**
+ * iopgtable_clear_entry - Remove an iommu pte entry
+ * @obj:	target iommu
+ * @da:		iommu device virtual address
+ **/
+size_t iopgtable_clear_entry(struct iommu *obj, u32 da)
+{
+	size_t bytes;
+
+	spin_lock(&obj->page_table_lock);
+
+	bytes = iopgtable_clear_entry_core(obj, da);
+	flush_iotlb_page(obj, da);
+
+	spin_unlock(&obj->page_table_lock);
+
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(iopgtable_clear_entry);
+
+static void iopgtable_clear_entry_all(struct iommu *obj)
+{
+	int i;
+
+	spin_lock(&obj->page_table_lock);
+
+	for (i = 0; i < PTRS_PER_IOPGD; i++) {
+		u32 da;
+		u32 *iopgd;
+
+		da = i << IOPGD_SHIFT;
+		iopgd = iopgd_offset(obj, da);
+
+		if (!*iopgd)
+			continue;
+
+		if (*iopgd & IOPGD_TABLE)
+			iopte_free(iopte_offset(iopgd, 0));
+
+		*iopgd = 0;
+		flush_iopgd_range(iopgd, iopgd);
+	}
+
+	flush_iotlb_all(obj);
+
+	spin_unlock(&obj->page_table_lock);
+}
+
+/*
+ *	Device IOMMU generic operations
+ */
+static irqreturn_t iommu_fault_handler(int irq, void *data)
+{
+	u32 stat, da;
+	u32 *iopgd, *iopte;
+	int err = -EIO;
+	struct iommu *obj = data;
+
+	if (!obj->refcount)
+		return IRQ_NONE;
+
+	/* Dynamic loading TLB or PTE */
+	if (obj->isr)
+		err = obj->isr(obj);
+
+	if (!err)
+		return IRQ_HANDLED;
+
+	clk_enable(obj->clk);
+	stat = iommu_report_fault(obj, &da);
+	clk_disable(obj->clk);
+	if (!stat)
+		return IRQ_HANDLED;
+
+	iopgd = iopgd_offset(obj, da);
+
+	if (!(*iopgd & IOPGD_TABLE)) {
+		dev_err(obj->dev, "%s: da:%08x pgd:%p *pgd:%08x\n", __func__,
+			da, iopgd, *iopgd);
+		return IRQ_NONE;
+	}
+
+	iopte = iopte_offset(iopgd, da);
+
+	dev_err(obj->dev, "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n",
+		__func__, da, iopgd, *iopgd, iopte, *iopte);
+
+	return IRQ_NONE;
+}
+
+static int device_match_by_alias(struct device *dev, void *data)
+{
+	struct iommu *obj = to_iommu(dev);
+	const char *name = data;
+
+	pr_debug("%s: %s %s\n", __func__, obj->name, name);
+
+	return strcmp(obj->name, name) == 0;
+}
+
+/**
+ * iommu_get - Get iommu handler
+ * @name:	target iommu name
+ **/
+struct iommu *iommu_get(const char *name)
+{
+	int err = -ENOMEM;
+	struct device *dev;
+	struct iommu *obj;
+
+	dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name,
+				 device_match_by_alias);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	obj = to_iommu(dev);
+
+	mutex_lock(&obj->iommu_lock);
+
+	if (obj->refcount++ == 0) {
+		err = iommu_enable(obj);
+		if (err)
+			goto err_enable;
+		flush_iotlb_all(obj);
+	}
+
+	if (!try_module_get(obj->owner))
+		goto err_module;
+
+	mutex_unlock(&obj->iommu_lock);
+
+	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
+	return obj;
+
+err_module:
+	if (obj->refcount == 1)
+		iommu_disable(obj);
+err_enable:
+	obj->refcount--;
+	mutex_unlock(&obj->iommu_lock);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(iommu_get);
+
+/**
+ * iommu_put - Put back iommu handler
+ * @obj:	target iommu
+ **/
+void iommu_put(struct iommu *obj)
+{
+	if (!obj && IS_ERR(obj))
+		return;
+
+	mutex_lock(&obj->iommu_lock);
+
+	if (--obj->refcount == 0)
+		iommu_disable(obj);
+
+	module_put(obj->owner);
+
+	mutex_unlock(&obj->iommu_lock);
+
+	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
+}
+EXPORT_SYMBOL_GPL(iommu_put);
+
+/*
+ *	OMAP Device MMU(IOMMU) detection
+ */
+static int __devinit omap_iommu_probe(struct platform_device *pdev)
+{
+	int err = -ENODEV;
+	void *p;
+	int irq;
+	struct iommu *obj;
+	struct resource *res;
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+
+	if (pdev->num_resources != 2)
+		return -EINVAL;
+
+	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
+	if (IS_ERR(obj->clk))
+		goto err_clk;
+
+	obj->nr_tlb_entries = pdata->nr_tlb_entries;
+	obj->name = pdata->name;
+	obj->dev = &pdev->dev;
+	obj->ctx = (void *)obj + sizeof(*obj);
+
+	mutex_init(&obj->iommu_lock);
+	mutex_init(&obj->mmap_lock);
+	spin_lock_init(&obj->page_table_lock);
+	INIT_LIST_HEAD(&obj->mmap);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -ENODEV;
+		goto err_mem;
+	}
+	obj->regbase = ioremap(res->start, resource_size(res));
+	if (!obj->regbase) {
+		err = -ENOMEM;
+		goto err_mem;
+	}
+
+	res = request_mem_region(res->start, resource_size(res),
+				 dev_name(&pdev->dev));
+	if (!res) {
+		err = -EIO;
+		goto err_mem;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		err = -ENODEV;
+		goto err_irq;
+	}
+	err = request_irq(irq, iommu_fault_handler, IRQF_SHARED,
+			  dev_name(&pdev->dev), obj);
+	if (err < 0)
+		goto err_irq;
+	platform_set_drvdata(pdev, obj);
+
+	p = (void *)__get_free_pages(GFP_KERNEL, get_order(IOPGD_TABLE_SIZE));
+	if (!p) {
+		err = -ENOMEM;
+		goto err_pgd;
+	}
+	memset(p, 0, IOPGD_TABLE_SIZE);
+	clean_dcache_area(p, IOPGD_TABLE_SIZE);
+	obj->iopgd = p;
+
+	BUG_ON(!IS_ALIGNED((unsigned long)obj->iopgd, IOPGD_TABLE_SIZE));
+
+	dev_info(&pdev->dev, "%s registered\n", obj->name);
+	return 0;
+
+err_pgd:
+	free_irq(irq, obj);
+err_irq:
+	release_mem_region(res->start, resource_size(res));
+	iounmap(obj->regbase);
+err_mem:
+	clk_put(obj->clk);
+err_clk:
+	kfree(obj);
+	return err;
+}
+
+static int __devexit omap_iommu_remove(struct platform_device *pdev)
+{
+	int irq;
+	struct resource *res;
+	struct iommu *obj = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+
+	iopgtable_clear_entry_all(obj);
+	free_pages((unsigned long)obj->iopgd, get_order(IOPGD_TABLE_SIZE));
+
+	irq = platform_get_irq(pdev, 0);
+	free_irq(irq, obj);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, resource_size(res));
+	iounmap(obj->regbase);
+
+	clk_put(obj->clk);
+	dev_info(&pdev->dev, "%s removed\n", obj->name);
+	kfree(obj);
+	return 0;
+}
+
+static struct platform_driver omap_iommu_driver = {
+	.probe	= omap_iommu_probe,
+	.remove	= __devexit_p(omap_iommu_remove),
+	.driver	= {
+		.name	= "omap-iommu",
+	},
+};
+
+static void iopte_cachep_ctor(void *iopte)
+{
+	clean_dcache_area(iopte, IOPTE_TABLE_SIZE);
+}
+
+static int __init omap_iommu_init(void)
+{
+	struct kmem_cache *p;
+	const unsigned long flags = SLAB_HWCACHE_ALIGN;
+	size_t align = 1 << 10; /* L2 pagetable alignement */
+
+	p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags,
+			      iopte_cachep_ctor);
+	if (!p)
+		return -ENOMEM;
+	iopte_cachep = p;
+
+	return platform_driver_register(&omap_iommu_driver);
+}
+module_init(omap_iommu_init);
+
+static void __exit omap_iommu_exit(void)
+{
+	kmem_cache_destroy(iopte_cachep);
+
+	platform_driver_unregister(&omap_iommu_driver);
+}
+module_exit(omap_iommu_exit);
+
+MODULE_DESCRIPTION("omap iommu: tlb and pagetable primitives");
+MODULE_ALIAS("platform:omap-iommu");
+MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/plat-omap/iopgtable.h b/arch/arm/plat-omap/iopgtable.h
new file mode 100644
index 0000000..37dac43
--- /dev/null
+++ b/arch/arm/plat-omap/iopgtable.h
@@ -0,0 +1,72 @@
+/*
+ * omap iommu: pagetable definitions
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PLAT_OMAP_IOMMU_H
+#define __PLAT_OMAP_IOMMU_H
+
+#define IOPGD_SHIFT		20
+#define IOPGD_SIZE		(1 << IOPGD_SHIFT)
+#define IOPGD_MASK		(~(IOPGD_SIZE - 1))
+#define IOSECTION_MASK		IOPGD_MASK
+#define PTRS_PER_IOPGD		(1 << (32 - IOPGD_SHIFT))
+#define IOPGD_TABLE_SIZE	(PTRS_PER_IOPGD * sizeof(u32))
+
+#define IOSUPER_SIZE		(IOPGD_SIZE << 4)
+#define IOSUPER_MASK		(~(IOSUPER_SIZE - 1))
+
+#define IOPTE_SHIFT		12
+#define IOPTE_SIZE		(1 << IOPTE_SHIFT)
+#define IOPTE_MASK		(~(IOPTE_SIZE - 1))
+#define IOPAGE_MASK		IOPTE_MASK
+#define PTRS_PER_IOPTE		(1 << (IOPGD_SHIFT - IOPTE_SHIFT))
+#define IOPTE_TABLE_SIZE	(PTRS_PER_IOPTE * sizeof(u32))
+
+#define IOLARGE_SIZE		(IOPTE_SIZE << 4)
+#define IOLARGE_MASK		(~(IOLARGE_SIZE - 1))
+
+#define IOPGD_TABLE		(1 << 0)
+#define IOPGD_SECTION		(2 << 0)
+#define IOPGD_SUPER		(1 << 18 | 2 << 0)
+
+#define IOPTE_SMALL		(2 << 0)
+#define IOPTE_LARGE		(1 << 0)
+
+#define iopgd_index(da)		(((da) >> IOPGD_SHIFT) & (PTRS_PER_IOPGD - 1))
+#define iopgd_offset(obj, da)	((obj)->iopgd + iopgd_index(da))
+
+#define iopte_paddr(iopgd)	(*iopgd & ~((1 << 10) - 1))
+#define iopte_vaddr(iopgd)	((u32 *)phys_to_virt(iopte_paddr(iopgd)))
+
+#define iopte_index(da)		(((da) >> IOPTE_SHIFT) & (PTRS_PER_IOPTE - 1))
+#define iopte_offset(iopgd, da)	(iopte_vaddr(iopgd) + iopte_index(da))
+
+static inline u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa,
+				   u32 flags)
+{
+	memset(e, 0, sizeof(*e));
+
+	e->da		= da;
+	e->pa		= pa;
+	e->valid	= 1;
+	/* FIXME: add OMAP1 support */
+	e->pgsz		= flags & MMU_CAM_PGSZ_MASK;
+	e->endian	= flags & MMU_RAM_ENDIAN_MASK;
+	e->elsz		= flags & MMU_RAM_ELSZ_MASK;
+	e->mixed	= flags & MMU_RAM_MIXED_MASK;
+
+	return iopgsz_to_bytes(e->pgsz);
+}
+
+#define to_iommu(dev)							\
+	(struct iommu *)platform_get_drvdata(to_platform_device(dev))
+
+#endif /* __PLAT_OMAP_IOMMU_H */
-- 
cgit v0.10.2


From 2bcb573343dbd7d913cb62b81463960644a3737f Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Mon, 26 Jan 2009 15:13:45 +0200
Subject: omap iommu: omap2 architecture specific functions

The structure 'arch_mmu' accommodates the difference between omap1 and
omap2/3.

This patch provides omap2/3 specific functions

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/mach-omap2/iommu2.c b/arch/arm/mach-omap2/iommu2.c
new file mode 100644
index 0000000..015f22a
--- /dev/null
+++ b/arch/arm/mach-omap2/iommu2.c
@@ -0,0 +1,323 @@
+/*
+ * omap iommu: omap2/3 architecture specific functions
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
+ *		Paul Mundt and Toshihiro Kobayashi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+
+#include <mach/iommu.h>
+
+/*
+ * omap2 architecture specific register bit definitions
+ */
+#define IOMMU_ARCH_VERSION	0x00000011
+
+/* SYSCONF */
+#define MMU_SYS_IDLE_SHIFT	3
+#define MMU_SYS_IDLE_FORCE	(0 << MMU_SYS_IDLE_SHIFT)
+#define MMU_SYS_IDLE_NONE	(1 << MMU_SYS_IDLE_SHIFT)
+#define MMU_SYS_IDLE_SMART	(2 << MMU_SYS_IDLE_SHIFT)
+#define MMU_SYS_IDLE_MASK	(3 << MMU_SYS_IDLE_SHIFT)
+
+#define MMU_SYS_SOFTRESET	(1 << 1)
+#define MMU_SYS_AUTOIDLE	1
+
+/* SYSSTATUS */
+#define MMU_SYS_RESETDONE	1
+
+/* IRQSTATUS & IRQENABLE */
+#define MMU_IRQ_MULTIHITFAULT	(1 << 4)
+#define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
+#define MMU_IRQ_EMUMISS		(1 << 2)
+#define MMU_IRQ_TRANSLATIONFAULT	(1 << 1)
+#define MMU_IRQ_TLBMISS		(1 << 0)
+#define MMU_IRQ_MASK	\
+	(MMU_IRQ_MULTIHITFAULT | MMU_IRQ_TABLEWALKFAULT | MMU_IRQ_EMUMISS | \
+	 MMU_IRQ_TRANSLATIONFAULT)
+
+/* MMU_CNTL */
+#define MMU_CNTL_SHIFT		1
+#define MMU_CNTL_MASK		(7 << MMU_CNTL_SHIFT)
+#define MMU_CNTL_EML_TLB	(1 << 3)
+#define MMU_CNTL_TWL_EN		(1 << 2)
+#define MMU_CNTL_MMU_EN		(1 << 1)
+
+#define get_cam_va_mask(pgsz)				\
+	(((pgsz) == MMU_CAM_PGSZ_16M) ? 0xff000000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_1M)  ? 0xfff00000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
+
+static int omap2_iommu_enable(struct iommu *obj)
+{
+	u32 l, pa;
+	unsigned long timeout;
+
+	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
+		return -EINVAL;
+
+	pa = virt_to_phys(obj->iopgd);
+	if (!IS_ALIGNED(pa, SZ_16K))
+		return -EINVAL;
+
+	iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG);
+
+	timeout = jiffies + msecs_to_jiffies(20);
+	do {
+		l = iommu_read_reg(obj, MMU_SYSSTATUS);
+		if (l & MMU_SYS_RESETDONE)
+			break;
+	} while (time_after(jiffies, timeout));
+
+	if (!(l & MMU_SYS_RESETDONE)) {
+		dev_err(obj->dev, "can't take mmu out of reset\n");
+		return -ENODEV;
+	}
+
+	l = iommu_read_reg(obj, MMU_REVISION);
+	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
+		 (l >> 4) & 0xf, l & 0xf);
+
+	l = iommu_read_reg(obj, MMU_SYSCONFIG);
+	l &= ~MMU_SYS_IDLE_MASK;
+	l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE);
+	iommu_write_reg(obj, l, MMU_SYSCONFIG);
+
+	iommu_write_reg(obj, MMU_IRQ_MASK, MMU_IRQENABLE);
+	iommu_write_reg(obj, pa, MMU_TTB);
+
+	l = iommu_read_reg(obj, MMU_CNTL);
+	l &= ~MMU_CNTL_MASK;
+	l |= (MMU_CNTL_MMU_EN | MMU_CNTL_TWL_EN);
+	iommu_write_reg(obj, l, MMU_CNTL);
+
+	return 0;
+}
+
+static void omap2_iommu_disable(struct iommu *obj)
+{
+	u32 l = iommu_read_reg(obj, MMU_CNTL);
+
+	l &= ~MMU_CNTL_MASK;
+	iommu_write_reg(obj, l, MMU_CNTL);
+	iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG);
+
+	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
+}
+
+static u32 omap2_iommu_fault_isr(struct iommu *obj, u32 *ra)
+{
+	int i;
+	u32 stat, da;
+	const char *err_msg[] =	{
+		"tlb miss",
+		"translation fault",
+		"emulation miss",
+		"table walk fault",
+		"multi hit fault",
+	};
+
+	stat = iommu_read_reg(obj, MMU_IRQSTATUS);
+	stat &= MMU_IRQ_MASK;
+	if (!stat)
+		return 0;
+
+	da = iommu_read_reg(obj, MMU_FAULT_AD);
+	*ra = da;
+
+	dev_err(obj->dev, "%s:\tda:%08x ", __func__, da);
+
+	for (i = 0; i < ARRAY_SIZE(err_msg); i++) {
+		if (stat & (1 << i))
+			printk("%s ", err_msg[i]);
+	}
+	printk("\n");
+
+	iommu_write_reg(obj, stat, MMU_IRQSTATUS);
+	return stat;
+}
+
+static void omap2_tlb_read_cr(struct iommu *obj, struct cr_regs *cr)
+{
+	cr->cam = iommu_read_reg(obj, MMU_READ_CAM);
+	cr->ram = iommu_read_reg(obj, MMU_READ_RAM);
+}
+
+static void omap2_tlb_load_cr(struct iommu *obj, struct cr_regs *cr)
+{
+	iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM);
+	iommu_write_reg(obj, cr->ram, MMU_RAM);
+}
+
+static u32 omap2_cr_to_virt(struct cr_regs *cr)
+{
+	u32 page_size = cr->cam & MMU_CAM_PGSZ_MASK;
+	u32 mask = get_cam_va_mask(cr->cam & page_size);
+
+	return cr->cam & mask;
+}
+
+static struct cr_regs *omap2_alloc_cr(struct iommu *obj, struct iotlb_entry *e)
+{
+	struct cr_regs *cr;
+
+	if (e->da & ~(get_cam_va_mask(e->pgsz))) {
+		dev_err(obj->dev, "%s:\twrong alignment: %08x\n", __func__,
+			e->da);
+		return ERR_PTR(-EINVAL);
+	}
+
+	cr = kmalloc(sizeof(*cr), GFP_KERNEL);
+	if (!cr)
+		return ERR_PTR(-ENOMEM);
+
+	cr->cam = (e->da & MMU_CAM_VATAG_MASK) | e->prsvd | e->pgsz;
+	cr->ram = e->pa | e->endian | e->elsz | e->mixed;
+
+	return cr;
+}
+
+static inline int omap2_cr_valid(struct cr_regs *cr)
+{
+	return cr->cam & MMU_CAM_V;
+}
+
+static u32 omap2_get_pte_attr(struct iotlb_entry *e)
+{
+	u32 attr;
+
+	attr = e->mixed << 5;
+	attr |= e->endian;
+	attr |= e->elsz >> 3;
+	attr <<= ((e->pgsz & MMU_CAM_PGSZ_4K) ? 0 : 6);
+
+	return attr;
+}
+
+static ssize_t omap2_dump_cr(struct iommu *obj, struct cr_regs *cr, char *buf)
+{
+	char *p = buf;
+
+	/* FIXME: Need more detail analysis of cam/ram */
+	p += sprintf(p, "%08x %08x\n", cr->cam, cr->ram);
+
+	return p - buf;
+}
+
+#define pr_reg(name)							\
+	p += sprintf(p, "%20s: %08x\n",					\
+		     __stringify(name), iommu_read_reg(obj, MMU_##name));
+
+static ssize_t omap2_iommu_dump_ctx(struct iommu *obj, char *buf)
+{
+	char *p = buf;
+
+	pr_reg(REVISION);
+	pr_reg(SYSCONFIG);
+	pr_reg(SYSSTATUS);
+	pr_reg(IRQSTATUS);
+	pr_reg(IRQENABLE);
+	pr_reg(WALKING_ST);
+	pr_reg(CNTL);
+	pr_reg(FAULT_AD);
+	pr_reg(TTB);
+	pr_reg(LOCK);
+	pr_reg(LD_TLB);
+	pr_reg(CAM);
+	pr_reg(RAM);
+	pr_reg(GFLUSH);
+	pr_reg(FLUSH_ENTRY);
+	pr_reg(READ_CAM);
+	pr_reg(READ_RAM);
+	pr_reg(EMU_FAULT_AD);
+
+	return p - buf;
+}
+
+static void omap2_iommu_save_ctx(struct iommu *obj)
+{
+	int i;
+	u32 *p = obj->ctx;
+
+	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+		p[i] = iommu_read_reg(obj, i * sizeof(u32));
+		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+	}
+
+	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
+}
+
+static void omap2_iommu_restore_ctx(struct iommu *obj)
+{
+	int i;
+	u32 *p = obj->ctx;
+
+	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+		iommu_write_reg(obj, p[i], i * sizeof(u32));
+		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+	}
+
+	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
+}
+
+static void omap2_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
+{
+	e->da		= cr->cam & MMU_CAM_VATAG_MASK;
+	e->pa		= cr->ram & MMU_RAM_PADDR_MASK;
+	e->valid	= cr->cam & MMU_CAM_V;
+	e->pgsz		= cr->cam & MMU_CAM_PGSZ_MASK;
+	e->endian	= cr->ram & MMU_RAM_ENDIAN_MASK;
+	e->elsz		= cr->ram & MMU_RAM_ELSZ_MASK;
+	e->mixed	= cr->ram & MMU_RAM_MIXED;
+}
+
+static const struct iommu_functions omap2_iommu_ops = {
+	.version	= IOMMU_ARCH_VERSION,
+
+	.enable		= omap2_iommu_enable,
+	.disable	= omap2_iommu_disable,
+	.fault_isr	= omap2_iommu_fault_isr,
+
+	.tlb_read_cr	= omap2_tlb_read_cr,
+	.tlb_load_cr	= omap2_tlb_load_cr,
+
+	.cr_to_e	= omap2_cr_to_e,
+	.cr_to_virt	= omap2_cr_to_virt,
+	.alloc_cr	= omap2_alloc_cr,
+	.cr_valid	= omap2_cr_valid,
+	.dump_cr	= omap2_dump_cr,
+
+	.get_pte_attr	= omap2_get_pte_attr,
+
+	.save_ctx	= omap2_iommu_save_ctx,
+	.restore_ctx	= omap2_iommu_restore_ctx,
+	.dump_ctx	= omap2_iommu_dump_ctx,
+};
+
+static int __init omap2_iommu_init(void)
+{
+	return install_iommu_arch(&omap2_iommu_ops);
+}
+module_init(omap2_iommu_init);
+
+static void __exit omap2_iommu_exit(void)
+{
+	uninstall_iommu_arch(&omap2_iommu_ops);
+}
+module_exit(omap2_iommu_exit);
+
+MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
+MODULE_DESCRIPTION("omap iommu: omap2/3 architecture specific functions");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/plat-omap/include/mach/iommu2.h b/arch/arm/plat-omap/include/mach/iommu2.h
new file mode 100644
index 0000000..10ad05f
--- /dev/null
+++ b/arch/arm/plat-omap/include/mach/iommu2.h
@@ -0,0 +1,96 @@
+/*
+ * omap iommu: omap2 architecture specific definitions
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MACH_IOMMU2_H
+#define __MACH_IOMMU2_H
+
+#include <linux/io.h>
+
+/*
+ * MMU Register offsets
+ */
+#define MMU_REVISION		0x00
+#define MMU_SYSCONFIG		0x10
+#define MMU_SYSSTATUS		0x14
+#define MMU_IRQSTATUS		0x18
+#define MMU_IRQENABLE		0x1c
+#define MMU_WALKING_ST		0x40
+#define MMU_CNTL		0x44
+#define MMU_FAULT_AD		0x48
+#define MMU_TTB			0x4c
+#define MMU_LOCK		0x50
+#define MMU_LD_TLB		0x54
+#define MMU_CAM			0x58
+#define MMU_RAM			0x5c
+#define MMU_GFLUSH		0x60
+#define MMU_FLUSH_ENTRY		0x64
+#define MMU_READ_CAM		0x68
+#define MMU_READ_RAM		0x6c
+#define MMU_EMU_FAULT_AD	0x70
+
+#define MMU_REG_SIZE		256
+
+/*
+ * MMU Register bit definitions
+ */
+#define MMU_LOCK_BASE_SHIFT	10
+#define MMU_LOCK_BASE_MASK	(0x1f << MMU_LOCK_BASE_SHIFT)
+#define MMU_LOCK_BASE(x)	\
+	((x & MMU_LOCK_BASE_MASK) >> MMU_LOCK_BASE_SHIFT)
+
+#define MMU_LOCK_VICT_SHIFT	4
+#define MMU_LOCK_VICT_MASK	(0x1f << MMU_LOCK_VICT_SHIFT)
+#define MMU_LOCK_VICT(x)	\
+	((x & MMU_LOCK_VICT_MASK) >> MMU_LOCK_VICT_SHIFT)
+
+#define MMU_CAM_VATAG_SHIFT	12
+#define MMU_CAM_VATAG_MASK \
+	((~0UL >> MMU_CAM_VATAG_SHIFT) << MMU_CAM_VATAG_SHIFT)
+#define MMU_CAM_P		(1 << 3)
+#define MMU_CAM_V		(1 << 2)
+#define MMU_CAM_PGSZ_MASK	3
+#define MMU_CAM_PGSZ_1M		(0 << 0)
+#define MMU_CAM_PGSZ_64K	(1 << 0)
+#define MMU_CAM_PGSZ_4K		(2 << 0)
+#define MMU_CAM_PGSZ_16M	(3 << 0)
+
+#define MMU_RAM_PADDR_SHIFT	12
+#define MMU_RAM_PADDR_MASK \
+	((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
+#define MMU_RAM_ENDIAN_SHIFT	9
+#define MMU_RAM_ENDIAN_MASK	(1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_BIG	(1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_LITTLE	(0 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ELSZ_SHIFT	7
+#define MMU_RAM_ELSZ_MASK	(3 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_8		(0 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_16		(1 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_32		(2 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_ELSZ_NONE	(3 << MMU_RAM_ELSZ_SHIFT)
+#define MMU_RAM_MIXED_SHIFT	6
+#define MMU_RAM_MIXED_MASK	(1 << MMU_RAM_MIXED_SHIFT)
+#define MMU_RAM_MIXED		MMU_RAM_MIXED_MASK
+
+/*
+ * register accessors
+ */
+static inline u32 iommu_read_reg(struct iommu *obj, size_t offs)
+{
+	return __raw_readl(obj->regbase + offs);
+}
+
+static inline void iommu_write_reg(struct iommu *obj, u32 val, size_t offs)
+{
+	__raw_writel(val, obj->regbase + offs);
+}
+
+#endif /* __MACH_IOMMU2_H */
-- 
cgit v0.10.2


From 4bbe1ddf89a5ba3ec30fe5980912d8bda3a3cbb2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 13 Oct 2008 03:07:14 +0200
Subject: ALSA: Add extra delay count in PCM

Added runtime->delay field to adjust the delayed samples for snd_pcm_delay().
Typically a hardware FIFO length is stored in this field, so that the
extra delay between hwptr and applptr can be computed.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c172968..267effd 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -270,6 +270,7 @@ struct snd_pcm_runtime {
 	snd_pcm_uframes_t hw_ptr_base;	/* Position at buffer restart */
 	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
 	unsigned long hw_ptr_jiffies;	/* Time when hw_ptr is updated */
+	snd_pcm_sframes_t delay;	/* extra delay; typically FIFO size */
 
 	/* -- HW params -- */
 	snd_pcm_access_t access;	/* access mode */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index fc6f98e..cb769d4 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -587,14 +587,15 @@ int snd_pcm_status(struct snd_pcm_substream *substream,
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		status->avail = snd_pcm_playback_avail(runtime);
 		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING ||
-		    runtime->status->state == SNDRV_PCM_STATE_DRAINING)
+		    runtime->status->state == SNDRV_PCM_STATE_DRAINING) {
 			status->delay = runtime->buffer_size - status->avail;
-		else
+			status->delay += runtime->delay;
+		} else
 			status->delay = 0;
 	} else {
 		status->avail = snd_pcm_capture_avail(runtime);
 		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING)
-			status->delay = status->avail;
+			status->delay = status->avail + runtime->delay;
 		else
 			status->delay = 0;
 	}
@@ -2404,6 +2405,7 @@ static int snd_pcm_delay(struct snd_pcm_substream *substream,
 			n = snd_pcm_playback_hw_avail(runtime);
 		else
 			n = snd_pcm_capture_avail(runtime);
+		n += runtime->delay;
 		break;
 	case SNDRV_PCM_STATE_XRUN:
 		err = -EPIPE;
-- 
cgit v0.10.2


From ae1ec5e1e97f67d41e641a73380129e5905e41cc Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 13 Oct 2008 03:08:53 +0200
Subject: ALSA: usbaudio - Add delay account

Manage the PCM delay account based on the queued URBs.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 823296d..6c25fa2 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -627,6 +627,7 @@ static int prepare_playback_urb(struct snd_usb_substream *subs,
 	subs->hwptr_done += offs;
 	if (subs->hwptr_done >= runtime->buffer_size)
 		subs->hwptr_done -= runtime->buffer_size;
+	runtime->delay += offs;
 	spin_unlock_irqrestore(&subs->lock, flags);
 	urb->transfer_buffer_length = offs * stride;
 	if (period_elapsed)
@@ -636,12 +637,22 @@ static int prepare_playback_urb(struct snd_usb_substream *subs,
 
 /*
  * process after playback data complete
- * - nothing to do
+ * - decrease the delay count again
  */
 static int retire_playback_urb(struct snd_usb_substream *subs,
 			       struct snd_pcm_runtime *runtime,
 			       struct urb *urb)
 {
+	unsigned long flags;
+	int stride = runtime->frame_bits >> 3;
+	int processed = urb->transfer_buffer_length / stride;
+
+	spin_lock_irqsave(&subs->lock, flags);
+	if (processed > runtime->delay)
+		runtime->delay = 0;
+	else
+		runtime->delay -= processed;
+	spin_unlock_irqrestore(&subs->lock, flags);
 	return 0;
 }
 
@@ -1520,6 +1531,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
 	subs->hwptr_done = 0;
 	subs->transfer_done = 0;
 	subs->phase = 0;
+	runtime->delay = 0;
 
 	/* clear urbs (to be sure) */
 	deactivate_urbs(subs, 0, 1);
-- 
cgit v0.10.2


From e6e55122a54db87e22c67477de2a9978a3e4c81b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 May 2009 11:10:24 +0100
Subject: ASoC: Add headers to match patterns in MAINTAINERS

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index ef03abe..3cf4f0d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5277,6 +5277,7 @@ L:	alsa-devel@alsa-project.org (subscribers-only)
 W:	http://alsa-project.org/main/index.php/ASoC
 S:	Supported
 F:	sound/soc/
+F:	include/sound/soc*
 
 SPARC + UltraSPARC (sparc/sparc64)
 P:	David S. Miller
-- 
cgit v0.10.2


From d6ce96dabe2c4409fd009ec14250a1fdbab4b133 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 5 May 2009 01:15:24 -0400
Subject: ring-buffer: export symbols

I'm adding a module to do a series of tests on the ring buffer as well
as benchmarks. This module needs to have more of the ring buffer API
exported. There's nothing wrong with reading the ring buffer from a
module.

[ Impact: allow modules to read pages from the ring buffer ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f4cc590..3e86da9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2802,6 +2802,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
 
 	return bpage;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
 
 /**
  * ring_buffer_free_read_page - free an allocated read page
@@ -2814,6 +2815,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
 {
 	free_page((unsigned long)data);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
 
 /**
  * ring_buffer_read_page - extract a page from the ring buffer
@@ -2959,6 +2961,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
  out:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
-- 
cgit v0.10.2


From f0d2c681ac0a85142fc8abe65fc33fcad35cb9b7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 29 Apr 2009 13:43:37 -0400
Subject: ring-buffer: add counters for commit overrun and nmi dropped entries

The WARN_ON in the ring buffer when a commit is preempted and the
buffer is filled by preceding writes can happen in normal operations.
The WARN_ON makes it look like a bug, not to mention, because
it does not stop tracing and calls printk which can also recurse, this
is prone to deadlock (the WARN_ON is not in a position to recurse).

This patch removes the WARN_ON and replaces it with a counter that
can be retrieved by a tracer. This counter is called commit_overrun.

While at it, I added a nmi_dropped counter to count any time an NMI entry
is dropped because the NMI could not take the spinlock.

[ Impact: prevent deadlock by printing normal case warning ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 1c2f809..f134582 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -153,6 +153,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3e86da9..26e1359 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -402,6 +402,8 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
+	unsigned long			nmi_dropped;
+	unsigned long			commit_overrun;
 	unsigned long			overrun;
 	unsigned long			entries;
 	u64				write_stamp;
@@ -1216,8 +1218,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * simply fail.
 		 */
 		if (unlikely(in_nmi())) {
-			if (!__raw_spin_trylock(&cpu_buffer->lock))
+			if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+				cpu_buffer->nmi_dropped++;
 				goto out_reset;
+			}
 		} else
 			__raw_spin_lock(&cpu_buffer->lock);
 
@@ -1238,8 +1242,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * about it.
 		 */
 		if (unlikely(next_page == commit_page)) {
-			/* This can easily happen on small ring buffers */
-			WARN_ON_ONCE(buffer->pages > 2);
+			cpu_buffer->commit_overrun++;
 			goto out_reset;
 		}
 
@@ -1926,6 +1929,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->nmi_dropped;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->commit_overrun;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+
+/**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
  *
@@ -2595,6 +2639,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
+	cpu_buffer->nmi_dropped = 0;
+	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
 
-- 
cgit v0.10.2


From c8d771835e18c938dae8690611d65fe98ad30f58 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 29 Apr 2009 18:03:45 -0400
Subject: tracing: export stats of ring buffers to userspace

This patch adds stats to the ftrace ring buffers:

 # cat /debugfs/tracing/per_cpu/cpu0/stats
 entries: 42360
 overrun: 30509326
 commit overrun: 0
 nmi dropped: 0

Where entries are the total number of data entries in the buffer.

overrun is the number of entries not consumed and were overwritten by
the writer.

commit overrun is the number of entries dropped due to nested writers
wrapping the buffer before the initial writer finished the commit.

nmi dropped is the number of entries dropped due to the ring buffer
lock being held when an nmi was going to write to the ring buffer.
Note, this field will be meaningless and will go away when the ring
buffer becomes lockless.

[ Impact: let userspace know what is happening in the ring buffers ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f5427e0..74df029 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3595,6 +3595,45 @@ static const struct file_operations tracing_buffers_fops = {
 	.llseek		= no_llseek,
 };
 
+static ssize_t
+tracing_stats_read(struct file *filp, char __user *ubuf,
+		   size_t count, loff_t *ppos)
+{
+	unsigned long cpu = (unsigned long)filp->private_data;
+	struct trace_array *tr = &global_trace;
+	struct trace_seq *s;
+	unsigned long cnt;
+
+	s = kmalloc(sizeof(*s), GFP_ATOMIC);
+	if (!s)
+		return ENOMEM;
+
+	trace_seq_init(s);
+
+	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "entries: %ld\n", cnt);
+
+	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
+
+	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return count;
+}
+
+static const struct file_operations tracing_stats_fops = {
+	.open		= tracing_open_generic,
+	.read		= tracing_stats_read,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3708,6 +3747,9 @@ static void tracing_init_debugfs_percpu(long cpu)
 
 	trace_create_file("trace_pipe_raw", 0444, d_cpu,
 			(void *) cpu, &tracing_buffers_fops);
+
+	trace_create_file("stats", 0444, d_cpu,
+			(void *) cpu, &tracing_stats_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
-- 
cgit v0.10.2


From 60aa605dfce2976e54fa76e805ab0f221372d4d9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:21 +0200
Subject: sched: rt: document the risk of small values in the bandwidth
 settings

Thomas noted that we should disallow sysctl_sched_rt_runtime == 0 for
(!RT_GROUP) since the root group always has some RT tasks in it.

Further, update the documentation to inspire clue.

[ Impact: exclude corner-case sysctl_sched_rt_runtime value ]

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090505155436.863098054@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 5ba4d3f..eb74b01 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -4,6 +4,7 @@
 CONTENTS
 ========
 
+0. WARNING
 1. Overview
   1.1 The problem
   1.2 The solution
@@ -14,6 +15,23 @@ CONTENTS
 3. Future plans
 
 
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unstable system, the knobs are
+ root only and assumes root knows what he is doing.
+
+Most notable:
+
+ * very small values in sched_rt_period_us can result in an unstable
+   system when the period is smaller than either the available hrtimer
+   resolution, or the time it takes to handle the budget refresh itself.
+
+ * very small values in sched_rt_runtime_us can result in an unstable
+   system when the runtime is so small the system has difficulty making
+   forward progress (NOTE: the migration thread and kstopmachine both
+   are real-time processes).
+
 1. Overview
 ===========
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 54d67b9..2a43a58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9917,6 +9917,13 @@ static int sched_rt_global_constraints(void)
 	if (sysctl_sched_rt_period <= 0)
 		return -EINVAL;
 
+	/*
+	 * There's always some RT tasks in the root group
+	 * -- migration, kstopmachine etc..
+	 */
+	if (sysctl_sched_rt_runtime == 0)
+		return -EBUSY;
+
 	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
 	for_each_possible_cpu(i) {
 		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-- 
cgit v0.10.2


From 80ab8817bf9b740df1f0778c41875e93151409bf Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 5 May 2009 11:25:00 +0200
Subject: ASoC: cs4270: introduce CS4270_I2C_INCR

Replace the magic 0x80 value with a suitable macro definition.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index ece6ed6..153124b 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -56,6 +56,7 @@
 #define CS4270_FIRSTREG	0x01
 #define CS4270_LASTREG	0x08
 #define CS4270_NUMREGS	(CS4270_LASTREG - CS4270_FIRSTREG + 1)
+#define CS4270_I2C_INCR	0x80
 
 /* Bit masks for the CS4270 registers */
 #define CS4270_CHIPID_ID	0xF0
@@ -296,7 +297,7 @@ static int cs4270_fill_cache(struct snd_soc_codec *codec)
 	s32 length;
 
 	length = i2c_smbus_read_i2c_block_data(i2c_client,
-		CS4270_FIRSTREG | 0x80, CS4270_NUMREGS, cache);
+		CS4270_FIRSTREG | CS4270_I2C_INCR, CS4270_NUMREGS, cache);
 
 	if (length != CS4270_NUMREGS) {
 		dev_err(codec->dev, "i2c read failure, addr=0x%x\n",
-- 
cgit v0.10.2


From c66de4a5be7913247bd83d79168f8e4420c9cfbc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:22 +0200
Subject: perf_counter: uncouple data_head updates from wakeups

Keep data_head up-to-date irrespective of notifications. This fixes
the case where you disable a counter and don't get a notification for
the last few pending events, and it also allows polling usage.

[ Impact: increase precision of perfcounter mmap-ed fields ]

Suggested-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090505155436.925084300@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a356fa6..17b6310 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -362,9 +362,11 @@ struct perf_mmap_data {
 	atomic_t			head;		/* write position    */
 	atomic_t			events;		/* event limit       */
 
-	atomic_t			wakeup_head;	/* completed head    */
+	atomic_t			done_head;	/* completed head    */
 	atomic_t			lock;		/* concurrent writes */
 
+	atomic_t			wakeup;		/* needs a wakeup    */
+
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5f86a11..ba5e921 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1696,7 +1696,6 @@ struct perf_output_handle {
 	struct perf_mmap_data	*data;
 	unsigned int		offset;
 	unsigned int		head;
-	int			wakeup;
 	int			nmi;
 	int			overflow;
 	int			locked;
@@ -1752,8 +1751,7 @@ static void perf_output_unlock(struct perf_output_handle *handle)
 	struct perf_mmap_data *data = handle->data;
 	int head, cpu;
 
-	if (handle->wakeup)
-		data->wakeup_head = data->head;
+	data->done_head = data->head;
 
 	if (!handle->locked)
 		goto out;
@@ -1764,13 +1762,11 @@ again:
 	 * before we publish the new head, matched by a rmb() in userspace when
 	 * reading this position.
 	 */
-	while ((head = atomic_xchg(&data->wakeup_head, 0))) {
+	while ((head = atomic_xchg(&data->done_head, 0)))
 		data->user_page->data_head = head;
-		handle->wakeup = 1;
-	}
 
 	/*
-	 * NMI can happen here, which means we can miss a wakeup_head update.
+	 * NMI can happen here, which means we can miss a done_head update.
 	 */
 
 	cpu = atomic_xchg(&data->lock, 0);
@@ -1779,7 +1775,7 @@ again:
 	/*
 	 * Therefore we have to validate we did not indeed do so.
 	 */
-	if (unlikely(atomic_read(&data->wakeup_head))) {
+	if (unlikely(atomic_read(&data->done_head))) {
 		/*
 		 * Since we had it locked, we can lock it again.
 		 */
@@ -1789,7 +1785,7 @@ again:
 		goto again;
 	}
 
-	if (handle->wakeup)
+	if (atomic_xchg(&data->wakeup, 0))
 		perf_output_wakeup(handle);
 out:
 	local_irq_restore(handle->flags);
@@ -1824,7 +1820,9 @@ static int perf_output_begin(struct perf_output_handle *handle,
 
 	handle->offset	= offset;
 	handle->head	= head;
-	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
+
+	if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
+		atomic_set(&data->wakeup, 1);
 
 	return 0;
 
@@ -1882,7 +1880,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 		int events = atomic_inc_return(&data->events);
 		if (events >= wakeup_events) {
 			atomic_sub(wakeup_events, &data->events);
-			handle->wakeup = 1;
+			atomic_set(&data->wakeup, 1);
 		}
 	}
 
-- 
cgit v0.10.2


From 6de6a7b95705b859b61430fa3afa1403034eb3e6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:23 +0200
Subject: perf_counter: add ioctl(PERF_COUNTER_IOC_RESET)

Provide a way to reset an existing counter - this eases PAPI
libraries around perfcounters.

Similar to read() it doesn't collapse pending child counters.

[ Impact: new perfcounter fd ioctl method to reset counters ]

Suggested-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090505155437.022272933@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 17b6310..0fcbf34 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -160,6 +160,7 @@ struct perf_counter_hw_event {
 #define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
 #define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
 #define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 
 /*
  * Structure of the page that can be mapped via mmap
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ba5e921..6e6834e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1288,6 +1288,11 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
+static void perf_counter_reset(struct perf_counter *counter)
+{
+	atomic_set(&counter->count, 0);
+}
+
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
@@ -1303,6 +1308,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_COUNTER_IOC_REFRESH:
 		perf_counter_refresh(counter, arg);
 		break;
+	case PERF_COUNTER_IOC_RESET:
+		perf_counter_reset(counter);
+		break;
 	default:
 		err = -ENOTTY;
 	}
-- 
cgit v0.10.2


From c5078f78b455fbf67ea71442c7e7ca8acf9ff095 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:24 +0200
Subject: perf_counter: provide an mlock threshold

Provide a threshold to relax the mlock accounting, increasing usability.

Each counter gets perf_counter_mlock_kb for free.

[ Impact: allow more mmap buffering ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090505155437.112113632@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0fcbf34..00081d8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -358,6 +358,7 @@ struct file;
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;	/* nr of data pages  */
+	int				nr_locked;	/* nr pages mlocked  */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 	atomic_t			head;		/* write position    */
@@ -575,6 +576,7 @@ struct perf_callchain_entry {
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
+extern int sysctl_perf_counter_mlock;
 
 extern void perf_counter_init(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 6e6834e..2d13427 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -44,6 +44,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1461,7 +1462,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
-		vma->vm_mm->locked_vm -= counter->data->nr_pages + 1;
+		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
 	}
@@ -1480,6 +1481,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long nr_pages;
 	unsigned long locked, lock_limit;
 	int ret = 0;
+	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1507,8 +1509,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	locked = vma->vm_mm->locked_vm;
-	locked += nr_pages + 1;
+	extra = nr_pages /* + 1 only account the data pages */;
+	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	if (extra < 0)
+		extra = 0;
+
+	locked = vma->vm_mm->locked_vm + extra;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
@@ -1524,7 +1530,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
-	vma->vm_mm->locked_vm += nr_pages + 1;
+	vma->vm_mm->locked_vm += extra;
+	counter->data->nr_locked = extra;
 unlock:
 	mutex_unlock(&counter->mmap_mutex);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8203d70..3b05c2b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -920,6 +920,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_mlock_kb",
+		.data		= &sysctl_perf_counter_mlock,
+		.maxlen		= sizeof(sysctl_perf_counter_mlock),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 /*
  * NOTE: do not add new entries to this table unless you have read
-- 
cgit v0.10.2


From 22c1558e51c210787c6cf75d8905246fc91ec030 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:25 +0200
Subject: perf_counter: fix the output lock

Use -1 instead of 0 as unlocked, since 0 is a valid cpu number.

( This is not an issue right now but will be once we allow multiple
  counters to output to the same mmap area. )

[ Impact: prepare code for multi-counter profile output ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090505155437.232686598@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2d13427..c881afe 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1409,6 +1409,7 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
 	}
 
 	data->nr_pages = nr_pages;
+	atomic_set(&data->lock, -1);
 
 	rcu_assign_pointer(counter->data, data);
 
@@ -1755,7 +1756,7 @@ static void perf_output_lock(struct perf_output_handle *handle)
 	if (in_nmi() && atomic_read(&data->lock) == cpu)
 		return;
 
-	while (atomic_cmpxchg(&data->lock, 0, cpu) != 0)
+	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
 		cpu_relax();
 
 	handle->locked = 1;
@@ -1784,7 +1785,7 @@ again:
 	 * NMI can happen here, which means we can miss a done_head update.
 	 */
 
-	cpu = atomic_xchg(&data->lock, 0);
+	cpu = atomic_xchg(&data->lock, -1);
 	WARN_ON_ONCE(cpu != smp_processor_id());
 
 	/*
@@ -1794,7 +1795,7 @@ again:
 		/*
 		 * Since we had it locked, we can lock it again.
 		 */
-		while (atomic_cmpxchg(&data->lock, 0, cpu) != 0)
+		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
 			cpu_relax();
 
 		goto again;
-- 
cgit v0.10.2


From 2023b359214bbc5bad31571cf50d7fb83b535c0a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:26 +0200
Subject: perf_counter: inheritable sample counters

Redirect the output to the parent counter and put in some sanity checks.

[ Impact: new perfcounter feature - inherited sampling counters ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090505155437.331556171@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c881afe..60e55f0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -738,10 +738,18 @@ static void perf_counter_enable(struct perf_counter *counter)
 	spin_unlock_irq(&ctx->lock);
 }
 
-static void perf_counter_refresh(struct perf_counter *counter, int refresh)
+static int perf_counter_refresh(struct perf_counter *counter, int refresh)
 {
+	/*
+	 * not supported on inherited counters
+	 */
+	if (counter->hw_event.inherit)
+		return -EINVAL;
+
 	atomic_add(refresh, &counter->event_limit);
 	perf_counter_enable(counter);
+
+	return 0;
 }
 
 /*
@@ -1307,7 +1315,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		perf_counter_disable_family(counter);
 		break;
 	case PERF_COUNTER_IOC_REFRESH:
-		perf_counter_refresh(counter, arg);
+		err = perf_counter_refresh(counter, arg);
 		break;
 	case PERF_COUNTER_IOC_RESET:
 		perf_counter_reset(counter);
@@ -1814,6 +1822,12 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	struct perf_mmap_data *data;
 	unsigned int offset, head;
 
+	/*
+	 * For inherited counters we send all the output towards the parent.
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
 	if (!data)
@@ -1995,6 +2009,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_ADDR)
 		perf_output_put(&handle, addr);
 
+	/*
+	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
+	 */
 	if (record_type & PERF_RECORD_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
@@ -2281,6 +2298,11 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	/*
+	 * XXX event_limit might not quite work as expected on inherited
+	 * counters
+	 */
+
 	counter->pending_kill = POLL_IN;
 	if (events && atomic_dec_and_test(&counter->event_limit)) {
 		ret = 1;
@@ -2801,6 +2823,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	pmu = NULL;
 
+	/*
+	 * we currently do not support PERF_RECORD_GROUP on inherited counters
+	 */
+	if (hw_event->inherit && (hw_event->record_type & PERF_RECORD_GROUP))
+		goto done;
+
 	if (perf_event_raw(hw_event)) {
 		pmu = hw_perf_counter_init(counter);
 		goto done;
-- 
cgit v0.10.2


From 16c8a10932aef971292c9570eb5f60b5d4e83ed2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:27 +0200
Subject: perf_counter: tools: update the tools to support process and
 inherited counters

"perf record":
 - per task counter
 - inherit switch
 - nmi switch

"perf report":
 - userspace/kernel filter

"perf stat":
 - userspace/kernel filter

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090505155437.389163017@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index ddfdcf8..5f5e6df 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -45,7 +45,10 @@ static unsigned int		mmap_pages			= 16;
 static int			output;
 static char 			*output_name			= "output.perf";
 static int			group				= 0;
-static unsigned int		realtime_prio			=  0;
+static unsigned int		realtime_prio			= 0;
+static int			system_wide			= 0;
+static int			inherit				= 1;
+static int			nmi				= 1;
 
 const unsigned int default_count[] = {
 	1000000,
@@ -167,7 +170,7 @@ static void display_events_help(void)
 static void display_help(void)
 {
 	printf(
-	"Usage: perf-record [<options>]\n"
+	"Usage: perf-record [<options>] <cmd>\n"
 	"perf-record Options (up to %d event types can be specified at once):\n\n",
 		 MAX_COUNTERS);
 
@@ -178,12 +181,13 @@ static void display_help(void)
 	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
 	" -o file   --output=<file>      # output file\n"
 	" -r prio   --realtime=<prio>    # use RT prio\n"
+	" -s        --system             # system wide profiling\n"
 	);
 
 	exit(0);
 }
 
-static void process_options(int argc, char *argv[])
+static void process_options(int argc, const char *argv[])
 {
 	int error = 0, counter;
 
@@ -196,9 +200,12 @@ static void process_options(int argc, char *argv[])
 			{"mmap_pages",	required_argument,	NULL, 'm'},
 			{"output",	required_argument,	NULL, 'o'},
 			{"realtime",	required_argument,	NULL, 'r'},
+			{"system",	no_argument,		NULL, 's'},
+			{"inherit",	no_argument,		NULL, 'i'},
+			{"nmi",		no_argument,		NULL, 'n'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:c:e:m:o:r:",
+		int c = getopt_long(argc, argv, "+:c:e:m:o:r:sin",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -209,9 +216,16 @@ static void process_options(int argc, char *argv[])
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'o': output_name			= strdup(optarg); break;
 		case 'r': realtime_prio			=   atoi(optarg); break;
+		case 's': system_wide                   ^=             1; break;
+		case 'i': inherit			^=	       1; break;
+		case 'n': nmi				^=	       1; break;
 		default: error = 1; break;
 		}
 	}
+
+	if (argc - optind == 0)
+		error = 1;
+
 	if (error)
 		display_help();
 
@@ -325,18 +339,82 @@ static void mmap_read(struct mmap_data *md)
 
 static volatile int done = 0;
 
-static void sigchld_handler(int sig)
+static void sig_handler(int sig)
 {
-	if (sig == SIGCHLD)
-		done = 1;
+	done = 1;
 }
 
-int cmd_record(int argc, char **argv)
+static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int nr_poll;
+static int nr_cpu;
+
+static void open_counters(int cpu)
 {
-	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 	struct perf_counter_hw_event hw_event;
-	int i, counter, group_fd, nr_poll = 0;
+	int counter, group_fd;
+	int track = 1;
+	pid_t pid = -1;
+
+	if (cpu < 0)
+		pid = 0;
+
+	group_fd = -1;
+	for (counter = 0; counter < nr_counters; counter++) {
+
+		memset(&hw_event, 0, sizeof(hw_event));
+		hw_event.config		= event_id[counter];
+		hw_event.irq_period	= event_count[counter];
+		hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
+		hw_event.nmi		= nmi;
+		hw_event.mmap		= track;
+		hw_event.comm		= track;
+		hw_event.inherit	= (cpu < 0) && inherit;
+
+		track = 0; // only the first counter needs these
+
+		fd[nr_cpu][counter] =
+			sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
+
+		if (fd[nr_cpu][counter] < 0) {
+			int err = errno;
+			printf("kerneltop error: syscall returned with %d (%s)\n",
+					fd[nr_cpu][counter], strerror(err));
+			if (err == EPERM)
+				printf("Are you root?\n");
+			exit(-1);
+		}
+		assert(fd[nr_cpu][counter] >= 0);
+		fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
+
+		/*
+		 * First counter acts as the group leader:
+		 */
+		if (group && group_fd == -1)
+			group_fd = fd[nr_cpu][counter];
+
+		event_array[nr_poll].fd = fd[nr_cpu][counter];
+		event_array[nr_poll].events = POLLIN;
+		nr_poll++;
+
+		mmap_array[nr_cpu][counter].counter = counter;
+		mmap_array[nr_cpu][counter].prev = 0;
+		mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+		mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+				PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
+		if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+			printf("kerneltop error: failed to mmap with %d (%s)\n",
+					errno, strerror(errno));
+			exit(-1);
+		}
+	}
+	nr_cpu++;
+}
+
+int cmd_record(int argc, const char **argv)
+{
+	int i, counter;
 	pid_t pid;
 	int ret;
 
@@ -357,54 +435,13 @@ int cmd_record(int argc, char **argv)
 	argc -= optind;
 	argv += optind;
 
-	for (i = 0; i < nr_cpus; i++) {
-		group_fd = -1;
-		for (counter = 0; counter < nr_counters; counter++) {
-
-			memset(&hw_event, 0, sizeof(hw_event));
-			hw_event.config		= event_id[counter];
-			hw_event.irq_period	= event_count[counter];
-			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-			hw_event.nmi		= 1;
-			hw_event.mmap		= 1;
-			hw_event.comm		= 1;
-
-			fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0);
-			if (fd[i][counter] < 0) {
-				int err = errno;
-				printf("kerneltop error: syscall returned with %d (%s)\n",
-					fd[i][counter], strerror(err));
-				if (err == EPERM)
-					printf("Are you root?\n");
-				exit(-1);
-			}
-			assert(fd[i][counter] >= 0);
-			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
-
-			/*
-			 * First counter acts as the group leader:
-			 */
-			if (group && group_fd == -1)
-				group_fd = fd[i][counter];
-
-			event_array[nr_poll].fd = fd[i][counter];
-			event_array[nr_poll].events = POLLIN;
-			nr_poll++;
-
-			mmap_array[i][counter].counter = counter;
-			mmap_array[i][counter].prev = 0;
-			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
-			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-					PROT_READ, MAP_SHARED, fd[i][counter], 0);
-			if (mmap_array[i][counter].base == MAP_FAILED) {
-				printf("kerneltop error: failed to mmap with %d (%s)\n",
-						errno, strerror(errno));
-				exit(-1);
-			}
-		}
-	}
+	if (!system_wide)
+		open_counters(-1);
+	else for (i = 0; i < nr_cpus; i++)
+		open_counters(i);
 
-	signal(SIGCHLD, sigchld_handler);
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
 
 	pid = fork();
 	if (pid < 0)
@@ -434,7 +471,7 @@ int cmd_record(int argc, char **argv)
 	while (!done) {
 		int hits = events;
 
-		for (i = 0; i < nr_cpus; i++) {
+		for (i = 0; i < nr_cpu; i++) {
 			for (counter = 0; counter < nr_counters; counter++)
 				mmap_read(&mmap_array[i][counter]);
 		}
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 6de38d2..e2fa117 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -87,6 +87,9 @@
 
 #include "perf.h"
 
+#define EVENT_MASK_KERNEL		1
+#define EVENT_MASK_USER			2
+
 static int			system_wide			=  0;
 
 static int			nr_counters			=  0;
@@ -104,6 +107,7 @@ static __u64			event_id[MAX_COUNTERS]		= {
 static int			default_interval = 100000;
 static int			event_count[MAX_COUNTERS];
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+static int			event_mask[MAX_COUNTERS];
 
 static int			tid				= -1;
 static int			profile_cpu			= -1;
@@ -258,12 +262,23 @@ static __u64 match_event_symbols(char *str)
 	__u64 config, id;
 	int type;
 	unsigned int i;
+	char mask_str[4];
 
 	if (sscanf(str, "r%llx", &config) == 1)
 		return config | PERF_COUNTER_RAW_MASK;
 
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
+	switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) {
+		case 3:
+			if (strchr(mask_str, 'u'))
+				event_mask[nr_counters] |= EVENT_MASK_USER;
+			if (strchr(mask_str, 'k'))
+				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
+		case 2:
+			return EID(type, id);
+
+		default:
+			break;
+	}
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		if (!strncmp(str, event_symbols[i].symbol,
@@ -313,6 +328,11 @@ static void create_perfstat_counter(int counter)
 	hw_event.config		= event_id[counter];
 	hw_event.record_type	= 0;
 	hw_event.nmi		= 0;
+	hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
+	hw_event.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
+
+printf("exclude: %d\n", event_mask[counter]);
+
 	if (scale)
 		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
 					  PERF_FORMAT_TOTAL_TIME_RUNNING;
diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc
index 911d7f3..8855107 100644
--- a/Documentation/perf_counter/perf-report.cc
+++ b/Documentation/perf_counter/perf-report.cc
@@ -33,8 +33,13 @@
 #include <string>
 
 
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
 static char 		const *input_name = "output.perf";
 static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
@@ -359,15 +364,21 @@ static void process_options(int argc, char *argv[])
 		/** Options for getopt */
 		static struct option long_options[] = {
 			{"input",	required_argument,	NULL, 'i'},
+			{"no-user",	no_argument,		NULL, 'u'},
+			{"no-kernel",	no_argument,		NULL, 'k'},
+			{"no-hv",	no_argument,		NULL, 'h'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:i:",
+		int c = getopt_long(argc, argv, "+:i:kuh",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
 
 		switch (c) {
 		case 'i': input_name			= strdup(optarg); break;
+		case 'k': show_mask &= ~SHOW_KERNEL; break;
+		case 'u': show_mask &= ~SHOW_USER; break;
+		case 'h': show_mask &= ~SHOW_HV; break;
 		default: error = 1; break;
 		}
 	}
@@ -443,22 +454,28 @@ more:
 
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
 		std::string comm, sym, level;
+		int show = 0;
 		char output[1024];
 
 		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+			show |= SHOW_KERNEL;
 			level = " [k] ";
 			sym = resolve_kernel_symbol(event->ip.ip);
 		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+			show |= SHOW_USER;
 			level = " [.] ";
 			sym = resolve_user_symbol(event->ip.pid, event->ip.ip);
 		} else {
+			show |= SHOW_HV;
 			level = " [H] ";
 		}
-		comm = resolve_comm(event->ip.pid);
 
-		snprintf(output, sizeof(output), "%16s %s %s",
-				comm.c_str(), level.c_str(), sym.c_str());
-		hist[output]++;
+		if (show & show_mask) {
+			comm = resolve_comm(event->ip.pid);
+			snprintf(output, sizeof(output), "%16s %s %s",
+					comm.c_str(), level.c_str(), sym.c_str());
+			hist[output]++;
+		}
 
 		total++;
 
-- 
cgit v0.10.2


From e4906eff9e6fbd2d311abcbcc53d5a531773c982 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 30 Apr 2009 20:49:44 -0400
Subject: ring-buffer: convert cpu buffer entries to local_t

The entries counter in cpu buffer is not atomic. It can be updated by
other interrupts or from another CPU (readers).

But making entries into "atomic_t" causes an atomic operation that can
hurt performance. Instead we convert it to a local_t that will increment
a counter with a local CPU atomic operation (if the arch supports it).

Instead of fighting with readers and overwrites that decrement the counter,
I added a "read" counter. Every time a reader reads an entry it is
incremented.

We already have a overrun counter and with that, the entries counter and
the read counter, we can calculate the total number of entries in the
buffer with:

  (entries - overrun) - read

As long as the total number of entries in the ring buffer is less than
the word size, this will work. But since the entries counter was previously
a long, this is no different than what we had before.

Thanks to Andrew Morton for pointing out in the first version that
atomic_t does not replace unsigned long. I switched to atomic_long_t
even though it is signed. A negative count is most likely a bug.

[ Impact: keep accurate count of cpu buffer entries ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 26e1359..c792ea8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -405,7 +405,8 @@ struct ring_buffer_per_cpu {
 	unsigned long			nmi_dropped;
 	unsigned long			commit_overrun;
 	unsigned long			overrun;
-	unsigned long			entries;
+	unsigned long			read;
+	local_t				entries;
 	u64				write_stamp;
 	u64				read_stamp;
 	atomic_t			record_disabled;
@@ -997,7 +998,6 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
 		cpu_buffer->overrun++;
-		cpu_buffer->entries--;
 	}
 }
 
@@ -1588,7 +1588,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->entries++;
+	local_inc(&cpu_buffer->entries);
 
 	/* Only process further if we own the commit */
 	if (!rb_is_commit(cpu_buffer, event))
@@ -1722,7 +1722,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	 * The commit is still visible by the reader, so we
 	 * must increment entries.
 	 */
-	cpu_buffer->entries++;
+	local_inc(&cpu_buffer->entries);
  out:
 	/*
 	 * If a write came in and pushed the tail page
@@ -1902,7 +1902,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->entries;
+	ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
+		- cpu_buffer->read;
 
 	return ret;
 }
@@ -1985,7 +1986,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
-		entries += cpu_buffer->entries;
+		entries += (local_read(&cpu_buffer->entries) -
+			    cpu_buffer->overrun) - cpu_buffer->read;
 	}
 
 	return entries;
@@ -2225,7 +2227,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 
 	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 			|| rb_discarded_event(event))
-		cpu_buffer->entries--;
+		cpu_buffer->read++;
 
 	rb_update_read_stamp(cpu_buffer, event);
 
@@ -2642,7 +2644,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->nmi_dropped = 0;
 	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
-	cpu_buffer->entries = 0;
+	cpu_buffer->read = 0;
+	local_set(&cpu_buffer->entries, 0);
 
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
@@ -2813,7 +2816,7 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
 		/* Only count data entries */
 		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
-		cpu_buffer->entries--;
+		cpu_buffer->read++;
 	}
 	__raw_spin_unlock(&cpu_buffer->lock);
 }
-- 
cgit v0.10.2


From 41c51c98f588edcdf6141cff1895df738e03ddd4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 3 May 2009 23:11:18 +0200
Subject: rcu: rcu_sched_grace_period(): kill the bogus flush_signals()

As a kernel thread, rcu_sched_grace_period() runs with all signals ignored.
It can never receive a signal even if it sleeps in TASK_INTERRUPTIBLE, it
needs the explicit allow_signal() to be visible for signals.

[ Impact: reduce kernel size, remove dead code ]

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20090503211118.GA22973@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ce97a4d..beb0e65 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1356,17 +1356,11 @@ static int rcu_sched_grace_period(void *arg)
 
 		rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
 		spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-		ret = 0;
+		ret = 0; /* unused */
 		__wait_event_interruptible(rcu_ctrlblk.sched_wq,
 			rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
 			ret);
 
-		/*
-		 * Signals would prevent us from sleeping, and we cannot
-		 * do much with them in any case.  So flush them.
-		 */
-		if (ret)
-			flush_signals(current);
 		couldsleepnext = 0;
 
 	} while (!kthread_should_stop());
-- 
cgit v0.10.2


From 778c55d44eb4f5f658915ed631d68ed9d1ac3ad1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 1 May 2009 18:44:45 -0400
Subject: ring-buffer: record page entries in buffer page descriptor

Currently, when the ring buffer writer overflows the buffer and must
write over non consumed data, we increment the overrun counter by
reading the entries on the page we are about to overwrite. This reads
the entries one by one.

This is not very effecient. This patch adds another entry counter
into each buffer page descriptor that keeps track of the number of
entries on the page. Now on overwrite, the overrun counter simply
needs to add the number of entries that is on the page it is about
to overwrite.

[ Impact: speed up of ring buffer in overwrite mode ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c792ea8..342eacc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -321,9 +321,10 @@ struct buffer_data_page {
 };
 
 struct buffer_page {
+	struct list_head list;		/* list of buffer pages */
 	local_t		 write;		/* index for next write */
 	unsigned	 read;		/* index for next read */
-	struct list_head list;		/* list of free pages */
+	local_t		 entries;	/* entries on this page */
 	struct buffer_data_page *page;	/* Actual data page */
 };
 
@@ -977,30 +978,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
 	return rb_page_commit(cpu_buffer->head_page);
 }
 
-/*
- * When the tail hits the head and the buffer is in overwrite mode,
- * the head jumps to the next page and all content on the previous
- * page is discarded. But before doing so, we update the overrun
- * variable of the buffer.
- */
-static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	struct ring_buffer_event *event;
-	unsigned long head;
-
-	for (head = 0; head < rb_head_size(cpu_buffer);
-	     head += rb_event_length(event)) {
-
-		event = __rb_page_index(cpu_buffer->head_page, head);
-		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-			return;
-		/* Only count data entries */
-		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
-			continue;
-		cpu_buffer->overrun++;
-	}
-}
-
 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
 			       struct buffer_page **bpage)
 {
@@ -1253,7 +1230,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 			/* tail_page has not moved yet? */
 			if (tail_page == cpu_buffer->tail_page) {
 				/* count overflows */
-				rb_update_overflow(cpu_buffer);
+				cpu_buffer->overrun +=
+					local_read(&head_page->entries);
 
 				rb_inc_page(cpu_buffer, &head_page);
 				cpu_buffer->head_page = head_page;
@@ -1268,6 +1246,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 */
 		if (tail_page == cpu_buffer->tail_page) {
 			local_set(&next_page->write, 0);
+			local_set(&next_page->entries, 0);
 			local_set(&next_page->page->commit, 0);
 			cpu_buffer->tail_page = next_page;
 
@@ -1313,6 +1292,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	event = __rb_page_index(tail_page, tail);
 	rb_update_event(event, type, length);
 
+	/* The passed in type is zero for DATA */
+	if (likely(!type))
+		local_inc(&tail_page->entries);
+
 	/*
 	 * If this is a commit and the tail is zero, then update
 	 * this page's time stamp.
@@ -2183,6 +2166,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
 
 	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 
 	/* Make the reader page now replace the head */
@@ -2629,6 +2613,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
 	local_set(&cpu_buffer->head_page->write, 0);
+	local_set(&cpu_buffer->head_page->entries, 0);
 	local_set(&cpu_buffer->head_page->page->commit, 0);
 
 	cpu_buffer->head_page->read = 0;
@@ -2638,6 +2623,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
 	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
@@ -2996,6 +2982,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 		bpage = reader->page;
 		reader->page = *data_page;
 		local_set(&reader->write, 0);
+		local_set(&reader->entries, 0);
 		reader->read = 0;
 		*data_page = bpage;
 
-- 
cgit v0.10.2


From afbab76a62b69ea6197e19727d4b8a8aef8deb25 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 1 May 2009 19:40:05 -0400
Subject: ring-buffer: have read page swap increment counter with page entries

In the swap page ring buffer code that is used by the ftrace splice code,
we scan the page to increment the counter of entries read.

With the number of entries already in the page we simply need to add it.

[ Impact: speed up reading page from ring buffer ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 342eacc..9e42a74 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2785,28 +2785,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 
-static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
-			      struct buffer_data_page *bpage,
-			      unsigned int offset)
-{
-	struct ring_buffer_event *event;
-	unsigned long head;
-
-	__raw_spin_lock(&cpu_buffer->lock);
-	for (head = offset; head < local_read(&bpage->commit);
-	     head += rb_event_length(event)) {
-
-		event = __rb_data_page_index(bpage, head);
-		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-			return;
-		/* Only count data entries */
-		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
-			continue;
-		cpu_buffer->read++;
-	}
-	__raw_spin_unlock(&cpu_buffer->lock);
-}
-
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
@@ -2977,6 +2955,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 		/* we copied everything to the beginning */
 		read = 0;
 	} else {
+		/* update the entry counter */
+		cpu_buffer->read += local_read(&reader->entries);
+
 		/* swap the pages */
 		rb_init_page(bpage);
 		bpage = reader->page;
@@ -2985,9 +2966,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 		local_set(&reader->entries, 0);
 		reader->read = 0;
 		*data_page = bpage;
-
-		/* update the entry counter */
-		rb_remove_entries(cpu_buffer, bpage, read);
 	}
 	ret = read;
 
-- 
cgit v0.10.2


From 41ede23eded40832c955d98d4b71bc244809abb3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 1 May 2009 20:26:54 -0400
Subject: ring-buffer: disable writers when resetting buffers

As a precaution, it is best to disable writing to the ring buffers
when reseting them.

[ Impact: prevent weird things if write happens during reset ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9e42a74..7876df0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2650,6 +2650,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
+	atomic_inc(&cpu_buffer->record_disabled);
+
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
 	__raw_spin_lock(&cpu_buffer->lock);
@@ -2659,6 +2661,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 	__raw_spin_unlock(&cpu_buffer->lock);
 
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+	atomic_dec(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
-- 
cgit v0.10.2


From 31b6e76e21b2ffd3cb2f6fe4149790a9fdadce2d Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@MIT.EDU>
Date: Thu, 30 Apr 2009 20:06:11 -0400
Subject: ftrace: use .sched.text, not .text.sched in recordmcount.pl

The only references in the kernel to the .text.sched section are in
recordmcount.pl.  Since the code it has is intended to be example code
it should refer to real kernel sections.  So change it to .sched.text
instead.

[ Impact: consistency in comments ]

Signed-off-by: Tim Abbott <tabbott@mit.edu>
LKML-Reference: <1241136371-10768-1-git-send-email-tabbott@mit.edu>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 409596e..0fae7da 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -26,7 +26,7 @@
 # which will also be the location of that section after final link.
 # e.g.
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  .globl my_func
 #  my_func:
 #        [...]
@@ -39,7 +39,7 @@
 #        [...]
 #
 # Both relocation offsets for the mcounts in the above example will be
-# offset from .text.sched. If we make another file called tmp.s with:
+# offset from .sched.text. If we make another file called tmp.s with:
 #
 #  .section __mcount_loc
 #  .quad  my_func + 0x5
@@ -51,7 +51,7 @@
 # But this gets hard if my_func is not globl (a static function).
 # In such a case we have:
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  my_func:
 #        [...]
 #        call mcount  (offset: 0x5)
-- 
cgit v0.10.2


From 94487d6d53af5acae10cf9fd52f74498994d46b1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 5 May 2009 19:22:53 -0400
Subject: tracing: use proper export symbol for tracing api

When adding the EXPORT_SYMBOL to some of the tracing API, I accidently
used EXPORT_SYMBOL instead of EXPORT_SYMBOL_GPL. This patch fixes
that mistake.

[ Impact: export the tracing code only for GPL modules ]

Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 74df029..4164a34 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -887,21 +887,21 @@ trace_current_buffer_lock_reserve(int type, unsigned long len,
 	return trace_buffer_lock_reserve(&global_trace,
 					 type, len, flags, pc);
 }
-EXPORT_SYMBOL(trace_current_buffer_lock_reserve);
+EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
 	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
 }
-EXPORT_SYMBOL(trace_current_buffer_unlock_commit);
+EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
 	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
 }
-EXPORT_SYMBOL(trace_nowake_buffer_unlock_commit);
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
 
 void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
 {
-- 
cgit v0.10.2


From 53fc0e2259f261602a2750dcc82b8d7bf04d3c35 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 5 May 2009 13:12:48 -0400
Subject: integrity: lsm audit rule matching fix

An audit subsystem change replaced AUDIT_EQUAL with Audit_equal.
Update calls to security_filter_rule_init()/match() to reflect
the change.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index b5291ad..b168c1d 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -96,7 +96,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule,
 	if ((rule->flags & IMA_UID) && rule->uid != tsk->cred->uid)
 		return false;
 	for (i = 0; i < MAX_LSM_RULES; i++) {
-		int rc;
+		int rc = 0;
 		u32 osid, sid;
 
 		if (!rule->lsm[i].rule)
@@ -109,7 +109,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule,
 			security_inode_getsecid(inode, &osid);
 			rc = security_filter_rule_match(osid,
 							rule->lsm[i].type,
-							AUDIT_EQUAL,
+							Audit_equal,
 							rule->lsm[i].rule,
 							NULL);
 			break;
@@ -119,7 +119,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule,
 			security_task_getsecid(tsk, &sid);
 			rc = security_filter_rule_match(sid,
 							rule->lsm[i].type,
-							AUDIT_EQUAL,
+							Audit_equal,
 							rule->lsm[i].rule,
 							NULL);
 		default:
@@ -227,7 +227,7 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry,
 
 	entry->lsm[lsm_rule].type = audit_type;
 	result = security_filter_rule_init(entry->lsm[lsm_rule].type,
-					   AUDIT_EQUAL, args,
+					   Audit_equal, args,
 					   &entry->lsm[lsm_rule].rule);
 	return result;
 }
-- 
cgit v0.10.2


From e5e520a715dcea6b72f6b9417b203a4b1e813a8b Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 5 May 2009 13:13:00 -0400
Subject: integrity: use audit_log_string

Based on a request from Eric Paris to simplify parsing, replace
audit_log_format statements containing "%s" with audit_log_string().

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c
index 1e082bb..c146115 100644
--- a/security/integrity/ima/ima_audit.c
+++ b/security/integrity/ima/ima_audit.c
@@ -54,19 +54,10 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
 			 audit_get_loginuid(current),
 			 audit_get_sessionid(current));
 	audit_log_task_context(ab);
-	switch (audit_msgno) {
-	case AUDIT_INTEGRITY_DATA:
-	case AUDIT_INTEGRITY_METADATA:
-	case AUDIT_INTEGRITY_PCR:
-	case AUDIT_INTEGRITY_STATUS:
-		audit_log_format(ab, " op=%s cause=%s", op, cause);
-		break;
-	case AUDIT_INTEGRITY_HASH:
-		audit_log_format(ab, " op=%s hash=%s", op, cause);
-		break;
-	default:
-		audit_log_format(ab, " op=%s", op);
-	}
+	audit_log_format(ab, " op=");
+	audit_log_string(ab, op);
+	audit_log_format(ab, " cause=");
+	audit_log_string(ab, cause);
 	audit_log_format(ab, " comm=");
 	audit_log_untrustedstring(ab, current->comm);
 	if (fname) {
-- 
cgit v0.10.2


From 07ff7a0b187f3951788f64ae1f30e8109bc8e9eb Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 5 May 2009 13:13:10 -0400
Subject: integrity: remove __setup auditing msgs

Remove integrity audit messages from __setup()

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c
index c146115..b628eea 100644
--- a/security/integrity/ima/ima_audit.c
+++ b/security/integrity/ima/ima_audit.c
@@ -22,18 +22,9 @@ static int ima_audit;
 static int __init ima_audit_setup(char *str)
 {
 	unsigned long audit;
-	int rc, result = 0;
-	char *op = "ima_audit";
-	char *cause;
 
-	rc = strict_strtoul(str, 0, &audit);
-	if (rc || audit > 1)
-		result = 1;
-	else
-		ima_audit = audit;
-	cause = ima_audit ? "enabled" : "not_enabled";
-	integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
-			    op, cause, result, 0);
+	if (!strict_strtoul(str, 0, &audit))
+		ima_audit = audit ? 1 : 0;
 	return 1;
 }
 __setup("ima_audit=", ima_audit_setup);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f4e7266..122f17f 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -29,20 +29,8 @@ int ima_initialized;
 char *ima_hash = "sha1";
 static int __init hash_setup(char *str)
 {
-	const char *op = "hash_setup";
-	const char *hash = "sha1";
-	int result = 0;
-	int audit_info = 0;
-
-	if (strncmp(str, "md5", 3) == 0) {
-		hash = "md5";
-		ima_hash = str;
-	} else if (strncmp(str, "sha1", 4) != 0) {
-		hash = "invalid_hash_type";
-		result = 1;
-	}
-	integrity_audit_msg(AUDIT_INTEGRITY_HASH, NULL, NULL, op, hash,
-			    result, audit_info);
+	if (strncmp(str, "md5", 3) == 0)
+		ima_hash = "md5";
 	return 1;
 }
 __setup("ima_hash=", hash_setup);
-- 
cgit v0.10.2


From aa20ae8444fc6c318272c643f856d8d8ad3e198d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 5 May 2009 21:16:11 -0400
Subject: ring-buffer: move big if statement down

In the hot path of the ring buffer "__rb_reserve_next" there's a big
if statement that does not even return back to the work flow.

	code;

	if (cross to next page) {

		[ lots of code ]

		return;
	}

	more code;

The condition is even the unlikely path, although we do not denote it
with an unlikely because gcc is fine with it. The condition is true when
the write crosses a page boundary, and we need to start at a new page.

Having this if statement makes it hard to read, but calling another
function to do the work is also not appropriate, because we are using a lot
of variables that were set before the if statement, and we do not want to
send them as parameters.

This patch changes it to a goto:

	code;

	if (cross to next page)
		goto next_page;

	more code;

	return;

next_page:

	[ lots of code]

This makes the code easier to understand, and a bit more obvious.

The output from gcc is practically identical. For some reason, gcc decided
to use different registers when I switched it to a goto. But other than that,
the logic is the same.

[ Impact: easier to read code ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7876df0..424129e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1159,6 +1159,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
 	struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
+	struct buffer_page *next_page;
 	unsigned long tail, write;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
@@ -1173,137 +1174,140 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	tail = write - length;
 
 	/* See if we shot pass the end of this buffer page */
-	if (write > BUF_PAGE_SIZE) {
-		struct buffer_page *next_page = tail_page;
+	if (write > BUF_PAGE_SIZE)
+		goto next_page;
 
-		local_irq_save(flags);
-		/*
-		 * Since the write to the buffer is still not
-		 * fully lockless, we must be careful with NMIs.
-		 * The locks in the writers are taken when a write
-		 * crosses to a new page. The locks protect against
-		 * races with the readers (this will soon be fixed
-		 * with a lockless solution).
-		 *
-		 * Because we can not protect against NMIs, and we
-		 * want to keep traces reentrant, we need to manage
-		 * what happens when we are in an NMI.
-		 *
-		 * NMIs can happen after we take the lock.
-		 * If we are in an NMI, only take the lock
-		 * if it is not already taken. Otherwise
-		 * simply fail.
-		 */
-		if (unlikely(in_nmi())) {
-			if (!__raw_spin_trylock(&cpu_buffer->lock)) {
-				cpu_buffer->nmi_dropped++;
-				goto out_reset;
-			}
-		} else
-			__raw_spin_lock(&cpu_buffer->lock);
-
-		lock_taken = true;
+	/* We reserved something on the buffer */
 
-		rb_inc_page(cpu_buffer, &next_page);
+	if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
+		return NULL;
 
-		head_page = cpu_buffer->head_page;
-		reader_page = cpu_buffer->reader_page;
+	event = __rb_page_index(tail_page, tail);
+	rb_update_event(event, type, length);
 
-		/* we grabbed the lock before incrementing */
-		if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
-			goto out_reset;
+	/* The passed in type is zero for DATA */
+	if (likely(!type))
+		local_inc(&tail_page->entries);
 
-		/*
-		 * If for some reason, we had an interrupt storm that made
-		 * it all the way around the buffer, bail, and warn
-		 * about it.
-		 */
-		if (unlikely(next_page == commit_page)) {
-			cpu_buffer->commit_overrun++;
-			goto out_reset;
-		}
+	/*
+	 * If this is a commit and the tail is zero, then update
+	 * this page's time stamp.
+	 */
+	if (!tail && rb_is_commit(cpu_buffer, event))
+		cpu_buffer->commit_page->page->time_stamp = *ts;
 
-		if (next_page == head_page) {
-			if (!(buffer->flags & RB_FL_OVERWRITE))
-				goto out_reset;
+	return event;
 
-			/* tail_page has not moved yet? */
-			if (tail_page == cpu_buffer->tail_page) {
-				/* count overflows */
-				cpu_buffer->overrun +=
-					local_read(&head_page->entries);
+ next_page:
 
-				rb_inc_page(cpu_buffer, &head_page);
-				cpu_buffer->head_page = head_page;
-				cpu_buffer->head_page->read = 0;
-			}
-		}
+	next_page = tail_page;
 
-		/*
-		 * If the tail page is still the same as what we think
-		 * it is, then it is up to us to update the tail
-		 * pointer.
-		 */
-		if (tail_page == cpu_buffer->tail_page) {
-			local_set(&next_page->write, 0);
-			local_set(&next_page->entries, 0);
-			local_set(&next_page->page->commit, 0);
-			cpu_buffer->tail_page = next_page;
-
-			/* reread the time stamp */
-			*ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
-			cpu_buffer->tail_page->page->time_stamp = *ts;
+	local_irq_save(flags);
+	/*
+	 * Since the write to the buffer is still not
+	 * fully lockless, we must be careful with NMIs.
+	 * The locks in the writers are taken when a write
+	 * crosses to a new page. The locks protect against
+	 * races with the readers (this will soon be fixed
+	 * with a lockless solution).
+	 *
+	 * Because we can not protect against NMIs, and we
+	 * want to keep traces reentrant, we need to manage
+	 * what happens when we are in an NMI.
+	 *
+	 * NMIs can happen after we take the lock.
+	 * If we are in an NMI, only take the lock
+	 * if it is not already taken. Otherwise
+	 * simply fail.
+	 */
+	if (unlikely(in_nmi())) {
+		if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+			cpu_buffer->nmi_dropped++;
+			goto out_reset;
 		}
+	} else
+		__raw_spin_lock(&cpu_buffer->lock);
 
-		/*
-		 * The actual tail page has moved forward.
-		 */
-		if (tail < BUF_PAGE_SIZE) {
-			/* Mark the rest of the page with padding */
-			event = __rb_page_index(tail_page, tail);
-			rb_event_set_padding(event);
-		}
+	lock_taken = true;
 
-		if (tail <= BUF_PAGE_SIZE)
-			/* Set the write back to the previous setting */
-			local_set(&tail_page->write, tail);
+	rb_inc_page(cpu_buffer, &next_page);
 
-		/*
-		 * If this was a commit entry that failed,
-		 * increment that too
-		 */
-		if (tail_page == cpu_buffer->commit_page &&
-		    tail == rb_commit_index(cpu_buffer)) {
-			rb_set_commit_to_write(cpu_buffer);
-		}
+	head_page = cpu_buffer->head_page;
+	reader_page = cpu_buffer->reader_page;
 
-		__raw_spin_unlock(&cpu_buffer->lock);
-		local_irq_restore(flags);
+	/* we grabbed the lock before incrementing */
+	if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+		goto out_reset;
 
-		/* fail and let the caller try again */
-		return ERR_PTR(-EAGAIN);
+	/*
+	 * If for some reason, we had an interrupt storm that made
+	 * it all the way around the buffer, bail, and warn
+	 * about it.
+	 */
+	if (unlikely(next_page == commit_page)) {
+		cpu_buffer->commit_overrun++;
+		goto out_reset;
 	}
 
-	/* We reserved something on the buffer */
+	if (next_page == head_page) {
+		if (!(buffer->flags & RB_FL_OVERWRITE))
+			goto out_reset;
 
-	if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
-		return NULL;
+		/* tail_page has not moved yet? */
+		if (tail_page == cpu_buffer->tail_page) {
+			/* count overflows */
+			cpu_buffer->overrun +=
+				local_read(&head_page->entries);
 
-	event = __rb_page_index(tail_page, tail);
-	rb_update_event(event, type, length);
+			rb_inc_page(cpu_buffer, &head_page);
+			cpu_buffer->head_page = head_page;
+			cpu_buffer->head_page->read = 0;
+		}
+	}
 
-	/* The passed in type is zero for DATA */
-	if (likely(!type))
-		local_inc(&tail_page->entries);
+	/*
+	 * If the tail page is still the same as what we think
+	 * it is, then it is up to us to update the tail
+	 * pointer.
+	 */
+	if (tail_page == cpu_buffer->tail_page) {
+		local_set(&next_page->write, 0);
+		local_set(&next_page->entries, 0);
+		local_set(&next_page->page->commit, 0);
+		cpu_buffer->tail_page = next_page;
+
+		/* reread the time stamp */
+		*ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
+		cpu_buffer->tail_page->page->time_stamp = *ts;
+	}
 
 	/*
-	 * If this is a commit and the tail is zero, then update
-	 * this page's time stamp.
+	 * The actual tail page has moved forward.
 	 */
-	if (!tail && rb_is_commit(cpu_buffer, event))
-		cpu_buffer->commit_page->page->time_stamp = *ts;
+	if (tail < BUF_PAGE_SIZE) {
+		/* Mark the rest of the page with padding */
+		event = __rb_page_index(tail_page, tail);
+		rb_event_set_padding(event);
+	}
 
-	return event;
+	if (tail <= BUF_PAGE_SIZE)
+		/* Set the write back to the previous setting */
+		local_set(&tail_page->write, tail);
+
+	/*
+	 * If this was a commit entry that failed,
+	 * increment that too
+	 */
+	if (tail_page == cpu_buffer->commit_page &&
+	    tail == rb_commit_index(cpu_buffer)) {
+		rb_set_commit_to_write(cpu_buffer);
+	}
+
+	__raw_spin_unlock(&cpu_buffer->lock);
+	local_irq_restore(flags);
+
+	/* fail and let the caller try again */
+	return ERR_PTR(-EAGAIN);
 
  out_reset:
 	/* reset write */
-- 
cgit v0.10.2


From c898faf91b3ec6b0f6efa35831b3984fa3331db0 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 5 May 2009 17:28:56 -0400
Subject: x86: 46 bit physical address support on 64 bits

Extend the maximum addressable memory on x86-64 from 2^44 to
2^46 bytes. This requires some shuffling around of the vmalloc
and virtual memmap memory areas, to keep them away from the
direct mapping of up to 64TB of physical memory.

This patch also introduces a guard hole between the vmalloc
area and the virtual memory map space.  There's really no
good reason why we wouldn't have a guard hole there.

[ Impact: future hardware enablement ]

Signed-off-by: Rik van Riel <riel@redhat.com>
LKML-Reference: <20090505172856.6820db22@cuia.bos.redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 29b52b1..5394132 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables:
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
 hole caused by [48:63] sign extension
 ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
-ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory
-ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
-ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
-ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
+ffff880000000000 - ffffc8ffffffffff (=64 TB) direct mapping of all phys. memory
+ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
+ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
+ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
+ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffffff80000000 - ffffffffa0000000 (=512 MB)  kernel text mapping, from phys 0
 ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 3f58718..6fadb02 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -47,7 +47,7 @@
 #define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define __PHYSICAL_MASK_SHIFT	46
 #define __VIRTUAL_MASK_SHIFT	48
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index fbf42b8..766ea16 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
-
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc20000000000, UL)
-#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
+#define VMALLOC_START    _AC(0xffffc90000000000, UL)
+#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
 #define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index e3cc3c0..4517d6b 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
 # define MAX_PHYSADDR_BITS	44
-# define MAX_PHYSMEM_BITS	44 /* Can be max 45 bits */
+# define MAX_PHYSMEM_BITS	46
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
-- 
cgit v0.10.2


From 2feceeff1e771850e49f9074307f071964fd9e3e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 5 May 2009 19:07:07 -0700
Subject: x86: fix typo in address space documentation

Fix a trivial typo in Documentation/x86/x86_64/mm.txt.

[ Impact: documentation only ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Rik van Riel <riel@redhat.com>

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5394132..d6498e3 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,7 +6,7 @@ Virtual memory map with 4 level page tables:
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
 hole caused by [48:63] sign extension
 ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
-ffff880000000000 - ffffc8ffffffffff (=64 TB) direct mapping of all phys. memory
+ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
 ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
 ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
 ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
-- 
cgit v0.10.2


From 5092dbc96f3acdac5433b27c06860352dc6d23b9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 5 May 2009 22:47:18 -0400
Subject: ring-buffer: add benchmark and tester

This patch adds code that can benchmark the ring buffer as well as
test it. This code can be compiled into the kernel (not recommended)
or as a module.

A separate ring buffer is used to not interfer with other users, like
ftrace. It creates a producer and a consumer (option to disable creation
of the consumer) and will run for 10 seconds, then sleep for 10 seconds
and then repeat.

While running, the producer will write 10 byte loads into the ring
buffer with just putting in the current CPU number. The reader will
continually try to read the buffer. The reader will alternate from reading
the buffer via event by event, or by full pages.

The output is a pr_info, thus it will fill up the syslogs.

  Starting ring buffer hammer
  End ring buffer hammer
  Time:     9000349 (usecs)
  Overruns: 12578640
  Read:     5358440  (by events)
  Entries:  0
  Total:    17937080
  Missed:   0
  Hit:      17937080
  Entries per millisec: 1993
  501 ns per entry
  Sleeping for 10 secs
  Starting ring buffer hammer
  End ring buffer hammer
  Time:     9936350 (usecs)
  Overruns: 0
  Read:     28146644  (by pages)
  Entries:  74
  Total:    28146718
  Missed:   0
  Hit:      28146718
  Entries per millisec: 2832
  353 ns per entry
  Sleeping for 10 secs

Time:      is the time the test ran
Overruns:  the number of events that were overwritten and not read
Read:      the number of events read (either by pages or events)
Entries:   the number of entries left in the buffer
                 (the by pages will only read full pages)
Total:     Entries + Read + Overruns
Missed:    the number of entries that failed to write
Hit:       the number of entries that were written

The above example shows that it takes ~353 nanosecs per entry when
there is a reader, reading by pages (and no overruns)

The event by event reader slowed the producer down to 501 nanosecs.

[ Impact: see how changes to the ring buffer affect stability and performance ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 450d3c2..50f62a2 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -471,6 +471,22 @@ config MMIOTRACE_TEST
 
 	  Say N, unless you absolutely know what you are doing.
 
+config RING_BUFFER_BENCHMARK
+	tristate "Ring buffer benchmark stress tester"
+	depends on RING_BUFFER
+	help
+	  This option creates a test to stress the ring buffer and bench mark it.
+	  It creates its own ring buffer such that it will not interfer with
+	  any other users of the ring buffer (such as ftrace). It then creates
+	  a producer and consumer that will run for 10 seconds and sleep for
+	  10 seconds. Each interval it will print out the number of events
+	  it recorded and give a rough estimate of how long each iteration took.
+
+	  It does not disable interrupts or raise its priority, so it may be
+	  affected by processes that are running.
+
+	  If unsure, say N
+
 endif # FTRACE
 
 endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index fb9d7f9..7c34cbf 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -17,6 +17,7 @@ endif
 
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
+obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_TRACING) += trace_clock.o
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
new file mode 100644
index 0000000..747244a
--- /dev/null
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -0,0 +1,379 @@
+/*
+ * ring buffer tester and benchmark
+ *
+ * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/time.h>
+
+struct rb_page {
+	u64		ts;
+	local_t		commit;
+	char		data[4080];
+};
+
+/* run time and sleep time in seconds */
+#define RUN_TIME	10
+#define SLEEP_TIME	10
+
+/* number of events for writer to wake up the reader */
+static int wakeup_interval = 100;
+
+static int reader_finish;
+static struct completion read_start;
+static struct completion read_done;
+
+static struct ring_buffer *buffer;
+static struct task_struct *producer;
+static struct task_struct *consumer;
+static unsigned long read;
+
+static int disable_reader;
+module_param(disable_reader, uint, 0644);
+MODULE_PARM_DESC(disable_reader, "only run producer");
+
+static int read_events;
+
+static int kill_test;
+
+#define KILL_TEST()				\
+	do {					\
+		if (!kill_test) {		\
+			kill_test = 1;		\
+			WARN_ON(1);		\
+		}				\
+	} while (0)
+
+enum event_status {
+	EVENT_FOUND,
+	EVENT_DROPPED,
+};
+
+static enum event_status read_event(int cpu)
+{
+	struct ring_buffer_event *event;
+	int *entry;
+	u64 ts;
+
+	event = ring_buffer_consume(buffer, cpu, &ts);
+	if (!event)
+		return EVENT_DROPPED;
+
+	entry = ring_buffer_event_data(event);
+	if (*entry != cpu) {
+		KILL_TEST();
+		return EVENT_DROPPED;
+	}
+
+	read++;
+	return EVENT_FOUND;
+}
+
+static enum event_status read_page(int cpu)
+{
+	struct ring_buffer_event *event;
+	struct rb_page *rpage;
+	unsigned long commit;
+	void *bpage;
+	int *entry;
+	int ret;
+	int inc;
+	int i;
+
+	bpage = ring_buffer_alloc_read_page(buffer);
+	ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
+	if (ret >= 0) {
+		rpage = bpage;
+		commit = local_read(&rpage->commit);
+		for (i = 0; i < commit && !kill_test; i += inc) {
+
+			if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
+				KILL_TEST();
+				break;
+			}
+
+			inc = -1;
+			event = (void *)&rpage->data[i];
+			switch (event->type_len) {
+			case RINGBUF_TYPE_PADDING:
+				/* We don't expect any padding */
+				KILL_TEST();
+				break;
+			case RINGBUF_TYPE_TIME_EXTEND:
+				inc = 8;
+				break;
+			case 0:
+				entry = ring_buffer_event_data(event);
+				if (*entry != cpu) {
+					KILL_TEST();
+					break;
+				}
+				read++;
+				if (!event->array[0]) {
+					KILL_TEST();
+					break;
+				}
+				inc = event->array[0];
+				break;
+			default:
+				entry = ring_buffer_event_data(event);
+				if (*entry != cpu) {
+					KILL_TEST();
+					break;
+				}
+				read++;
+				inc = ((event->type_len + 1) * 4);
+			}
+			if (kill_test)
+				break;
+
+			if (inc <= 0) {
+				KILL_TEST();
+				break;
+			}
+		}
+	}
+	ring_buffer_free_read_page(buffer, bpage);
+
+	if (ret < 0)
+		return EVENT_DROPPED;
+	return EVENT_FOUND;
+}
+
+static void ring_buffer_consumer(void)
+{
+	/* toggle between reading pages and events */
+	read_events ^= 1;
+
+	read = 0;
+	while (!reader_finish && !kill_test) {
+		int found;
+
+		do {
+			int cpu;
+
+			found = 0;
+			for_each_online_cpu(cpu) {
+				enum event_status stat;
+
+				if (read_events)
+					stat = read_event(cpu);
+				else
+					stat = read_page(cpu);
+
+				if (kill_test)
+					break;
+				if (stat == EVENT_FOUND)
+					found = 1;
+			}
+		} while (found && !kill_test);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (reader_finish)
+			break;
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	reader_finish = 0;
+	complete(&read_done);
+}
+
+static void ring_buffer_producer(void)
+{
+	struct timeval start_tv;
+	struct timeval end_tv;
+	unsigned long long time;
+	unsigned long long entries;
+	unsigned long long overruns;
+	unsigned long missed = 0;
+	unsigned long hit = 0;
+	unsigned long avg;
+	int cnt = 0;
+
+	/*
+	 * Hammer the buffer for 10 secs (this may
+	 * make the system stall)
+	 */
+	pr_info("Starting ring buffer hammer\n");
+	do_gettimeofday(&start_tv);
+	do {
+		struct ring_buffer_event *event;
+		int *entry;
+
+		event = ring_buffer_lock_reserve(buffer, 10);
+		if (!event) {
+			missed++;
+		} else {
+			hit++;
+			entry = ring_buffer_event_data(event);
+			*entry = smp_processor_id();
+			ring_buffer_unlock_commit(buffer, event);
+		}
+		do_gettimeofday(&end_tv);
+
+		if (consumer && !(++cnt % wakeup_interval))
+			wake_up_process(consumer);
+
+	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
+	pr_info("End ring buffer hammer\n");
+
+	if (consumer) {
+		/* Init both completions here to avoid races */
+		init_completion(&read_start);
+		init_completion(&read_done);
+		/* the completions must be visible before the finish var */
+		smp_wmb();
+		reader_finish = 1;
+		/* finish var visible before waking up the consumer */
+		smp_wmb();
+		wake_up_process(consumer);
+		wait_for_completion(&read_done);
+	}
+
+	time = end_tv.tv_sec - start_tv.tv_sec;
+	time *= 1000000;
+	time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
+
+	entries = ring_buffer_entries(buffer);
+	overruns = ring_buffer_overruns(buffer);
+
+	if (kill_test)
+		pr_info("ERROR!\n");
+	pr_info("Time:     %lld (usecs)\n", time);
+	pr_info("Overruns: %lld\n", overruns);
+	if (disable_reader)
+		pr_info("Read:     (reader disabled)\n");
+	else
+		pr_info("Read:     %ld  (by %s)\n", read,
+			read_events ? "events" : "pages");
+	pr_info("Entries:  %lld\n", entries);
+	pr_info("Total:    %lld\n", entries + overruns + read);
+	pr_info("Missed:   %ld\n", missed);
+	pr_info("Hit:      %ld\n", hit);
+
+	do_div(time, 1000);
+	if (time)
+		hit /= (long)time;
+	else
+		pr_info("TIME IS ZERO??\n");
+
+	pr_info("Entries per millisec: %ld\n", hit);
+
+	if (hit) {
+		avg = 1000000 / hit;
+		pr_info("%ld ns per entry\n", avg);
+	}
+}
+
+static void wait_to_die(void)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+static int ring_buffer_consumer_thread(void *arg)
+{
+	while (!kthread_should_stop() && !kill_test) {
+		complete(&read_start);
+
+		ring_buffer_consumer();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop() || kill_test)
+			break;
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (kill_test)
+		wait_to_die();
+
+	return 0;
+}
+
+static int ring_buffer_producer_thread(void *arg)
+{
+	init_completion(&read_start);
+
+	while (!kthread_should_stop() && !kill_test) {
+		ring_buffer_reset(buffer);
+
+		if (consumer) {
+			smp_wmb();
+			wake_up_process(consumer);
+			wait_for_completion(&read_start);
+		}
+
+		ring_buffer_producer();
+
+		pr_info("Sleeping for 10 secs\n");
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ * SLEEP_TIME);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	if (kill_test)
+		wait_to_die();
+
+	return 0;
+}
+
+static int __init ring_buffer_benchmark_init(void)
+{
+	int ret;
+
+	/* make a one meg buffer in overwite mode */
+	buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
+	if (!buffer)
+		return -ENOMEM;
+
+	if (!disable_reader) {
+		consumer = kthread_create(ring_buffer_consumer_thread,
+					  NULL, "rb_consumer");
+		ret = PTR_ERR(consumer);
+		if (IS_ERR(consumer))
+			goto out_fail;
+	}
+
+	producer = kthread_run(ring_buffer_producer_thread,
+			       NULL, "rb_producer");
+	ret = PTR_ERR(producer);
+
+	if (IS_ERR(producer))
+		goto out_kill;
+
+	return 0;
+
+ out_kill:
+	if (consumer)
+		kthread_stop(consumer);
+
+ out_fail:
+	ring_buffer_free(buffer);
+	return ret;
+}
+
+static void __exit ring_buffer_benchmark_exit(void)
+{
+	kthread_stop(producer);
+	if (consumer)
+		kthread_stop(consumer);
+	ring_buffer_free(buffer);
+}
+
+module_init(ring_buffer_benchmark_init);
+module_exit(ring_buffer_benchmark_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("ring_buffer_benchmark");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From b2e5d8588de0b5341eddad87dbe48d2185eaa3dd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 May 2009 07:55:33 +0200
Subject: irq: change ->set_affinity() to return status, fix

This build failure:

 arch/powerpc/sysdev/mpic.c:810: error: conflicting types for 'mpic_set_affinity'
 arch/powerpc/sysdev/mpic.h:39: error: previous declaration of 'mpic_set_affinity' was here
 make[2]: *** [arch/powerpc/sysdev/mpic.o] Error 1
 make[2]: *** Waiting for unfinished jobs....

Triggers because the function prototype was not updated when the
function call signature got changed by:

   d5dedd4: irq: change ->set_affinity() to return status

[ Impact: build fix on powerpc ]

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-arch@vger.kernel.org
LKML-Reference: <49F654E9.4070809@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 3cef2af..eff433c 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
-- 
cgit v0.10.2


From 1eb5ac6466d4be7b15b38ce3ab709600f1bc891f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 5 May 2009 19:13:44 +1000
Subject: mm: SLUB fix reclaim_state

SLUB does not correctly account reclaim_state.reclaimed_slab, so it will
break memory reclaim. Account it like SLAB does.

Cc: stable@kernel.org
Cc: linux-mm@kvack.org
Cc: Matt Mackall <mpm@selenic.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slub.c b/mm/slub.c
index 7ab54ec..aa34913 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/swap.h> /* struct reclaim_state */
 #include <linux/module.h>
 #include <linux/bit_spinlock.h>
 #include <linux/interrupt.h>
@@ -1170,6 +1171,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 
 	__ClearPageSlab(page);
 	reset_page_mapcount(page);
+	if (current->reclaim_state)
+		current->reclaim_state->reclaimed_slab += pages;
 	__free_pages(page, order);
 }
 
-- 
cgit v0.10.2


From 1f0532eb617d28f65c93593a1491f662f14f7eac Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 5 May 2009 19:13:45 +1000
Subject: mm: SLOB fix reclaim_state

SLOB does not correctly account reclaim_state.reclaimed_slab, so it will
break memory reclaim. Account it like SLAB does.

Cc: stable@kernel.org
Cc: linux-mm@kvack.org
Acked-by: Matt Mackall <mpm@selenic.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slob.c b/mm/slob.c
index a2d4ab3..f92e66d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -60,6 +60,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/swap.h> /* struct reclaim_state */
 #include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -255,6 +256,8 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
 
 static void slob_free_pages(void *b, int order)
 {
+	if (current->reclaim_state)
+		current->reclaim_state->reclaimed_slab += 1 << order;
 	free_pages((unsigned long)b, order);
 }
 
@@ -407,7 +410,7 @@ static void slob_free(void *block, int size)
 		spin_unlock_irqrestore(&slob_lock, flags);
 		clear_slob_page(sp);
 		free_slob_page(sp);
-		free_page((unsigned long)b);
+		slob_free_pages(b, 0);
 		return;
 	}
 
-- 
cgit v0.10.2


From fd6da10a617f483348ee32bcfe53fd20c302eca1 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 6 May 2009 10:32:13 +0800
Subject: tracing/events: don't say hi when loading the trace event sample

The sample is useful for testing, and I'm using it. But after
loading the module, it keeps saying hi every 10 seconds, this may
be disturbing.

Also Steven said commenting out the "hi" helped in causing races. :)

[ Impact: make testing a bit easier ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A00F6AD.2070008@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index f33b3ba..aabc4e9 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -16,10 +16,6 @@ static void simple_thread_func(int cnt)
 	set_current_state(TASK_INTERRUPTIBLE);
 	schedule_timeout(HZ);
 	trace_foo_bar("hello", cnt);
-
-	if (!(cnt % 10))
-		/* It is really important that I say "hi!" */
-		printk(KERN_EMERG "hi!\n");
 }
 
 static int simple_thread(void *arg)
-- 
cgit v0.10.2


From 96d17980fabeb757706d2d6db5a28580a6156bfc Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 6 May 2009 10:32:32 +0800
Subject: tracing/events: make SAMPLE_TRACE_EVENTS default to n

Normally a config should be default to n. This patch also makes the
sample module-only, like SAMPLE_MARKERS and SAMPLE_TRACEPOINTS.

[ Impact: don't build trace event sample by default ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A00F6C0.8090803@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/samples/Kconfig b/samples/Kconfig
index 93f41c0..b75d28c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -20,9 +20,8 @@ config SAMPLE_TRACEPOINTS
 	  This build tracepoints example modules.
 
 config SAMPLE_TRACE_EVENTS
-	tristate "Build trace_events examples"
-	depends on EVENT_TRACING
-	default m
+	tristate "Build trace_events examples -- loadable modules only"
+	depends on EVENT_TRACING && m
 	help
 	  This build trace event example modules.
 
-- 
cgit v0.10.2


From 2df75e415709ad12862028916c772c1f377f6a7c Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 6 May 2009 10:33:04 +0800
Subject: tracing/events: fix memory leak when unloading module

When unloading a module, memory allocated by init_preds() and
trace_define_field() is not freed.

[ Impact: fix memory leak ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4A00F6E0.3040503@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5fff40c..662c1be 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -116,6 +116,7 @@ struct ftrace_event_call {
 #define MAX_FILTER_STR_VAL	128
 
 extern int init_preds(struct ftrace_event_call *call);
+extern void destroy_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern int filter_current_check_discard(struct ftrace_event_call *call,
 					void *rec,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f789ca5..f251a15 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -60,6 +60,22 @@ err:
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
+#ifdef CONFIG_MODULES
+
+static void trace_destroy_fields(struct ftrace_event_call *call)
+{
+	struct ftrace_event_field *field, *next;
+
+	list_for_each_entry_safe(field, next, &call->fields, link) {
+		list_del(&field->link);
+		kfree(field->type);
+		kfree(field->name);
+		kfree(field);
+	}
+}
+
+#endif /* CONFIG_MODULES */
+
 static void ftrace_clear_events(void)
 {
 	struct ftrace_event_call *call;
@@ -925,6 +941,8 @@ static void trace_module_remove_events(struct module *mod)
 				unregister_ftrace_event(call->event);
 			debugfs_remove_recursive(call->dir);
 			list_del(&call->list);
+			trace_destroy_fields(call);
+			destroy_preds(call);
 		}
 	}
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f494866..ce07b81 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -346,6 +346,20 @@ static void filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
+void destroy_preds(struct ftrace_event_call *call)
+{
+	struct event_filter *filter = call->filter;
+	int i;
+
+	for (i = 0; i < MAX_FILTER_PRED; i++) {
+		if (filter->preds[i])
+			filter_free_pred(filter->preds[i]);
+	}
+	kfree(filter->preds);
+	kfree(filter);
+	call->filter = NULL;
+}
+
 int init_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter;
@@ -374,13 +388,7 @@ int init_preds(struct ftrace_event_call *call)
 	return 0;
 
 oom:
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (filter->preds[i])
-			filter_free_pred(filter->preds[i]);
-	}
-	kfree(filter->preds);
-	kfree(call->filter);
-	call->filter = NULL;
+	destroy_preds(call);
 
 	return -ENOMEM;
 }
-- 
cgit v0.10.2


From 20c8928abe70e204bd077ab6cfe23002d7788983 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 6 May 2009 10:33:45 +0800
Subject: tracing/events: fix concurrent access to ftrace_events list

A module will add/remove its trace events when it gets loaded/unloaded, so
the ftrace_events list is not "const", and concurrent access needs to be
protected.

This patch thus fixes races between loading/unloding modules and read
'available_events' or read/write 'set_event', etc.

Below shows how to reproduce the race:

 # for ((; ;)) { cat /mnt/tracing/available_events; } > /dev/null &
 # for ((; ;)) { insmod trace-events-sample.ko; rmmod sample; } &

After a while:

BUG: unable to handle kernel paging request at 0010011c
IP: [<c1080f27>] t_next+0x1b/0x2d
...
Call Trace:
 [<c10c90e6>] ? seq_read+0x217/0x30d
 [<c10c8ecf>] ? seq_read+0x0/0x30d
 [<c10b4c19>] ? vfs_read+0x8f/0x136
 [<c10b4fc3>] ? sys_read+0x40/0x65
 [<c1002a68>] ? sysenter_do_call+0x12/0x36

[ Impact: fix races when concurrent accessing ftrace_events list ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4A00F709.3080800@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7736fe8..777c6c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -825,6 +825,7 @@ static int filter_pred_##size(struct filter_pred *pred, void *event,	\
 	return match;							\
 }
 
+extern struct mutex event_mutex;
 extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 7bf2ad6..5b5895a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -10,21 +10,30 @@
 int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
+	int ret = -EINVAL;
 
+	mutex_lock(&event_mutex);
 	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id)
-			return event->profile_enable(event);
+		if (event->id == event_id) {
+			ret = event->profile_enable(event);
+			break;
+		}
 	}
+	mutex_unlock(&event_mutex);
 
-	return -EINVAL;
+	return ret;
 }
 
 void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
 
+	mutex_lock(&event_mutex);
 	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id)
-			return event->profile_disable(event);
+		if (event->id == event_id) {
+			event->profile_disable(event);
+			break;
+		}
 	}
+	mutex_unlock(&event_mutex);
 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f251a15..8d579ff 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -21,7 +21,7 @@
 
 #define TRACE_SYSTEM "TRACE_SYSTEM"
 
-static DEFINE_MUTEX(event_mutex);
+DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
 
@@ -80,6 +80,7 @@ static void ftrace_clear_events(void)
 {
 	struct ftrace_event_call *call;
 
+	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (call->enabled) {
@@ -87,6 +88,7 @@ static void ftrace_clear_events(void)
 			call->unregfunc();
 		}
 	}
+	mutex_unlock(&event_mutex);
 }
 
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
@@ -274,6 +276,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&event_mutex);
+	if (*pos == 0)
+		m->private = ftrace_events.next;
 	return t_next(m, NULL, pos);
 }
 
@@ -303,6 +308,9 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
+	mutex_lock(&event_mutex);
+	if (*pos == 0)
+		m->private = ftrace_events.next;
 	return s_next(m, NULL, pos);
 }
 
@@ -319,12 +327,12 @@ static int t_show(struct seq_file *m, void *v)
 
 static void t_stop(struct seq_file *m, void *p)
 {
+	mutex_unlock(&event_mutex);
 }
 
 static int
 ftrace_event_seq_open(struct inode *inode, struct file *file)
 {
-	int ret;
 	const struct seq_operations *seq_ops;
 
 	if ((file->f_mode & FMODE_WRITE) &&
@@ -332,13 +340,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
 		ftrace_clear_events();
 
 	seq_ops = inode->i_private;
-	ret = seq_open(file, seq_ops);
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-
-		m->private = ftrace_events.next;
-	}
-	return ret;
+	return seq_open(file, seq_ops);
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index ce07b81..7ac6910 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -408,6 +408,7 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 		filter->n_preds = 0;
 	}
 
+	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->define_fields)
 			continue;
@@ -417,6 +418,7 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 			remove_filter_string(call->filter);
 		}
 	}
+	mutex_unlock(&event_mutex);
 }
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
@@ -567,6 +569,7 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 {
 	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
+	int err = 0;
 
 	if (!filter->preds) {
 		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
@@ -584,8 +587,8 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 	filter->preds[filter->n_preds] = pred;
 	filter->n_preds++;
 
+	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
-		int err;
 
 		if (!call->define_fields)
 			continue;
@@ -597,12 +600,13 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 		if (err) {
 			filter_free_subsystem_preds(system);
 			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-			return err;
+			break;
 		}
 		replace_filter_string(call->filter, filter_string);
 	}
+	mutex_unlock(&event_mutex);
 
-	return 0;
+	return err;
 }
 
 static void parse_init(struct filter_parse_state *ps,
-- 
cgit v0.10.2


From de1d7286060430e79a1d50ad6e5fee8fe863c5f6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Tue, 5 May 2009 16:49:59 +0800
Subject: tracepoint: trace_sched_migrate_task(): remove parameter

The orig_cpu parameter in trace_sched_migrate_task() is not necessary,
it can be got by using task_cpu(p) in the probe.

[ Impact: micro-optimization ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
[ modified from Mathieu's patch. The original patch is at:
  http://marc.info/?l=linux-kernel&m=123791201716239&w=2 ]
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: fweisbec@gmail.com
Cc: rostedt@goodmis.org
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: zhaolei@cn.fujitsu.com
Cc: laijs@cn.fujitsu.com
LKML-Reference: <49FFFDB7.1050402@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index ffa1cab..dd4033c 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -180,9 +180,9 @@ TRACE_EVENT(sched_switch,
  */
 TRACE_EVENT(sched_migrate_task,
 
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+	TP_PROTO(struct task_struct *p, int dest_cpu),
 
-	TP_ARGS(p, orig_cpu, dest_cpu),
+	TP_ARGS(p, dest_cpu),
 
 	TP_STRUCT__entry(
 		__array(	char,	comm,	TASK_COMM_LEN	)
@@ -196,7 +196,7 @@ TRACE_EVENT(sched_migrate_task,
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
+		__entry->orig_cpu	= task_cpu(p);
 		__entry->dest_cpu	= dest_cpu;
 	),
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 9f7ffd0..9cdedbd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1954,7 +1954,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	clock_offset = old_rq->clock - new_rq->clock;
 
-	trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+	trace_sched_migrate_task(p, new_cpu);
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
-- 
cgit v0.10.2


From a42aaa3bbce85ac487ad4fad5db99e8e91b7aac1 Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Mon, 4 May 2009 16:27:26 -0400
Subject: blktrace: correct remap names

This attempts to clarify names utilized during block I/O remap
operations (partition, volume manager). It correctly matches up the
/from/ information for both device & sector. This takes in the concept
from Kosaki Motohiro and extends it to include better naming for the
"device_from" field.

[ Impact: cleanup ]

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <49FF4FAE.3000301@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 62763c9..82b4636 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -116,9 +116,9 @@ struct blk_io_trace {
  * The remap event
  */
 struct blk_io_trace_remap {
-	__be32 device;
 	__be32 device_from;
-	__be64 sector;
+	__be32 device_to;
+	__be64 sector_from;
 };
 
 enum {
diff --git a/include/trace/block.h b/include/trace/block.h
index 25b7068..87f6456 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
 
 DECLARE_TRACE(block_remap,
 	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t from, sector_t to),
-	      TP_ARGS(q, bio, dev, from, to));
+		 sector_t to, sector_t from),
+	      TP_ARGS(q, bio, dev, to, from));
 
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c32062b..f8d46d6 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -830,8 +830,8 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  * @q:		queue the io is for
  * @bio:	the source bio
  * @dev:	target device
- * @from:	source sector
  * @to:		target sector
+ * @from:	source sector
  *
  * Description:
  *     Device mapper or raid target sometimes need to split a bio because
@@ -839,7 +839,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
+				       dev_t dev, sector_t to, sector_t from)
 {
 	struct blk_trace *bt = q->blk_trace;
 	struct blk_io_trace_remap r;
@@ -847,9 +847,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 	if (likely(!bt))
 		return;
 
-	r.device = cpu_to_be32(dev);
-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-	r.sector = cpu_to_be64(to);
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
+	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
 			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
@@ -1028,11 +1028,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
 			  struct blk_io_trace_remap *r)
 {
 	const struct blk_io_trace_remap *__r = pdu_start(ent);
-	__u64 sector = __r->sector;
+	__u64 sector_from = __r->sector_from;
 
-	r->device = be32_to_cpu(__r->device);
 	r->device_from = be32_to_cpu(__r->device_from);
-	r->sector = be64_to_cpu(sector);
+	r->device_to   = be32_to_cpu(__r->device_to);
+	r->sector_from = be64_to_cpu(sector_from);
 }
 
 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1148,13 +1148,13 @@ static int blk_log_with_error(struct trace_seq *s,
 
 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
 {
-	struct blk_io_trace_remap r = { .device = 0, };
+	struct blk_io_trace_remap r = { .device_from = 0, };
 
 	get_pdu_remap(ent, &r);
 	return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
-			       t_sector(ent),
-			       t_sec(ent), MAJOR(r.device), MINOR(r.device),
-			       (unsigned long long)r.sector);
+				t_sector(ent), t_sec(ent),
+				MAJOR(r.device_from), MINOR(r.device_from),
+				(unsigned long long)r.sector_from);
 }
 
 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
-- 
cgit v0.10.2


From 22a7c31a9659deaddafbbcec6562d44141e84474 Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Mon, 4 May 2009 16:35:08 -0400
Subject: blktrace: from-sector redundant in trace_block_remap

Remove redundant from-sector parameter: it's /always/ the bio's sector
passed in.

[ Impact: cleanup ]

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <49FF517C.7000503@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/block/blk-core.c b/block/blk-core.c
index 07ab754..a5f747a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1275,7 +1275,7 @@ static inline void blk_partition_remap(struct bio *bio)
 		bio->bi_bdev = bdev->bd_contains;
 
 		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-				    bdev->bd_dev, bio->bi_sector,
+				    bdev->bd_dev,
 				    bio->bi_sector - p->start_sect);
 	}
 }
@@ -1444,8 +1444,7 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 
 		if (old_sector != -1)
-			trace_block_remap(q, bio, old_dev, bio->bi_sector,
-					    old_sector);
+			trace_block_remap(q, bio, old_dev, old_sector);
 
 		trace_block_bio_queue(q, bio);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8a994be..b01514a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -657,8 +657,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 		/* the bio has been remapped so dispatch it */
 
 		trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev,
-				    clone->bi_sector, sector);
+				    tio->io->bio->bi_bdev->bd_dev, sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/include/trace/block.h b/include/trace/block.h
index 87f6456..8ac945b 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
 
 DECLARE_TRACE(block_remap,
 	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t to, sector_t from),
-	      TP_ARGS(q, bio, dev, to, from));
+		 sector_t to),
+	      TP_ARGS(q, bio, dev, to));
 
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index f8d46d6..e099f8c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -830,7 +830,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  * @q:		queue the io is for
  * @bio:	the source bio
  * @dev:	target device
- * @to:		target sector
  * @from:	source sector
  *
  * Description:
@@ -839,7 +838,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t to, sector_t from)
+				       dev_t dev, sector_t from)
 {
 	struct blk_trace *bt = q->blk_trace;
 	struct blk_io_trace_remap r;
@@ -851,8 +850,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
 	r.sector_from = cpu_to_be64(from);
 
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
-			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+			BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
+			sizeof(r), &r);
 }
 
 /**
-- 
cgit v0.10.2


From 48dd0fed90e2b1f1ba87401439b85942181c6df3 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Wed, 6 May 2009 15:45:45 +0530
Subject: tracing: trace_output.c, fix false positive compiler warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This compiler warning:

  CC      kernel/trace/trace_output.o
 kernel/trace/trace_output.c: In function ‘register_ftrace_event’:
 kernel/trace/trace_output.c:544: warning: ‘list’ may be used uninitialized in this function

Is wrong as 'list' is always initialized - but GCC (4.3.2) does not
recognize this relationship properly.

Work around the warning by initializing the variable to NULL.

[ Impact: fix false positive compiler warning ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 5fc51f0..8bd9a2c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -541,7 +541,7 @@ int register_ftrace_event(struct trace_event *event)
 	INIT_LIST_HEAD(&event->list);
 
 	if (!event->type) {
-		struct list_head *list;
+		struct list_head *list = NULL;
 
 		if (next_event_type > FTRACE_MAX_EVENT) {
 
-- 
cgit v0.10.2


From 35cf723e99c0e26ddf51f037dffaa4ff2c2c9106 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 May 2009 12:33:38 +0200
Subject: tracing: small trave_events sample Makefile cleanup

Use -I$(src) to add the current directory the include path.

[ Impact: cleanup ]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
index 06c6dea..0d428dc 100644
--- a/samples/trace_events/Makefile
+++ b/samples/trace_events/Makefile
@@ -1,8 +1,6 @@
 # builds the trace events example kernel modules;
 # then to use one (as root):  insmod <module_name.ko>
 
-PWD := $(shell pwd)
-
-CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+CFLAGS_trace-events-sample.o := -I$(src)
 
 obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
-- 
cgit v0.10.2


From 92d71005e2f305d6dca126d8b8497e885b842dba Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 May 2009 17:18:34 +0200
Subject: ALSA: ice1724 - Check error in set_rate function

The set_rate might return error but the current code doesn't check it.
This patch adds a proper error check.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 128510e7..5c5ef7f 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -626,7 +626,7 @@ static unsigned char stdclock_set_mclk(struct snd_ice1712 *ice,
 	return 0;
 }
 
-static void snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
+static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 				    int force)
 {
 	unsigned long flags;
@@ -634,17 +634,18 @@ static void snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 	unsigned int i, old_rate;
 
 	if (rate > ice->hw_rates->list[ice->hw_rates->count - 1])
-		return;
+		return -EINVAL;
+
 	spin_lock_irqsave(&ice->reg_lock, flags);
 	if ((inb(ICEMT1724(ice, DMA_CONTROL)) & DMA_STARTS) ||
 	    (inb(ICEMT1724(ice, DMA_PAUSE)) & DMA_PAUSES)) {
 		/* running? we cannot change the rate now... */
 		spin_unlock_irqrestore(&ice->reg_lock, flags);
-		return;
+		return -EBUSY;
 	}
 	if (!force && is_pro_rate_locked(ice)) {
 		spin_unlock_irqrestore(&ice->reg_lock, flags);
-		return;
+		return (rate == ice->cur_rate) ? 0 : -EBUSY;
 	}
 
 	old_rate = ice->get_rate(ice);
@@ -652,7 +653,7 @@ static void snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 		ice->set_rate(ice, rate);
 	else if (rate == ice->cur_rate) {
 		spin_unlock_irqrestore(&ice->reg_lock, flags);
-		return;
+		return 0;
 	}
 
 	ice->cur_rate = rate;
@@ -674,13 +675,15 @@ static void snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 	}
 	if (ice->spdif.ops.setup_rate)
 		ice->spdif.ops.setup_rate(ice, rate);
+
+	return 0;
 }
 
 static int snd_vt1724_pcm_hw_params(struct snd_pcm_substream *substream,
 				    struct snd_pcm_hw_params *hw_params)
 {
 	struct snd_ice1712 *ice = snd_pcm_substream_chip(substream);
-	int i, chs;
+	int i, chs, err;
 
 	chs = params_channels(hw_params);
 	mutex_lock(&ice->open_mutex);
@@ -715,7 +718,11 @@ static int snd_vt1724_pcm_hw_params(struct snd_pcm_substream *substream,
 		}
 	}
 	mutex_unlock(&ice->open_mutex);
-	snd_vt1724_set_pro_rate(ice, params_rate(hw_params), 0);
+
+	err = snd_vt1724_set_pro_rate(ice, params_rate(hw_params), 0);
+	if (err < 0)
+		return err;
+
 	return snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params));
 }
 
-- 
cgit v0.10.2


From a5b7b5c1d05387ffeaf0487482806ec6c5968ac7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 May 2009 17:22:07 +0200
Subject: ALSA: ice1724 - Clean up definitions of DMA records

Rename some vt1724_pcm_reg records to more generic and consistent ones.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 5c5ef7f..8afa043 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -855,20 +855,39 @@ static snd_pcm_uframes_t snd_vt1724_pcm_pointer(struct snd_pcm_substream *substr
 #endif
 }
 
-static const struct vt1724_pcm_reg vt1724_playback_pro_reg = {
+static const struct vt1724_pcm_reg vt1724_pdma0_reg = {
 	.addr = VT1724_MT_PLAYBACK_ADDR,
 	.size = VT1724_MT_PLAYBACK_SIZE,
 	.count = VT1724_MT_PLAYBACK_COUNT,
 	.start = VT1724_PDMA0_START,
 };
 
-static const struct vt1724_pcm_reg vt1724_capture_pro_reg = {
+static const struct vt1724_pcm_reg vt1724_pdma4_reg = {
+	.addr = VT1724_MT_PDMA4_ADDR,
+	.size = VT1724_MT_PDMA4_SIZE,
+	.count = VT1724_MT_PDMA4_COUNT,
+	.start = VT1724_PDMA4_START,
+};
+
+static const struct vt1724_pcm_reg vt1724_rdma0_reg = {
 	.addr = VT1724_MT_CAPTURE_ADDR,
 	.size = VT1724_MT_CAPTURE_SIZE,
 	.count = VT1724_MT_CAPTURE_COUNT,
 	.start = VT1724_RDMA0_START,
 };
 
+static const struct vt1724_pcm_reg vt1724_rdma1_reg = {
+	.addr = VT1724_MT_RDMA1_ADDR,
+	.size = VT1724_MT_RDMA1_SIZE,
+	.count = VT1724_MT_RDMA1_COUNT,
+	.start = VT1724_RDMA1_START,
+};
+
+#define vt1724_playback_pro_reg vt1724_pdma0_reg
+#define vt1724_playback_spdif_reg vt1724_pdma4_reg
+#define vt1724_capture_pro_reg vt1724_rdma0_reg
+#define vt1724_capture_spdif_reg vt1724_rdma1_reg
+
 static const struct snd_pcm_hardware snd_vt1724_playback_pro = {
 	.info =			(SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
 				 SNDRV_PCM_INFO_BLOCK_TRANSFER |
@@ -1084,20 +1103,6 @@ static int __devinit snd_vt1724_pcm_profi(struct snd_ice1712 *ice, int device)
  * SPDIF PCM
  */
 
-static const struct vt1724_pcm_reg vt1724_playback_spdif_reg = {
-	.addr = VT1724_MT_PDMA4_ADDR,
-	.size = VT1724_MT_PDMA4_SIZE,
-	.count = VT1724_MT_PDMA4_COUNT,
-	.start = VT1724_PDMA4_START,
-};
-
-static const struct vt1724_pcm_reg vt1724_capture_spdif_reg = {
-	.addr = VT1724_MT_RDMA1_ADDR,
-	.size = VT1724_MT_RDMA1_SIZE,
-	.count = VT1724_MT_RDMA1_COUNT,
-	.start = VT1724_RDMA1_START,
-};
-
 /* update spdif control bits; call with reg_lock */
 static void update_spdif_bits(struct snd_ice1712 *ice, unsigned int val)
 {
-- 
cgit v0.10.2


From d82b64f4764755d765038cf95b1dbe7db039592a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 May 2009 17:25:42 +0200
Subject: ALSA: ice1724 - Add PCI postint to reset sequence

Add the PCI posting to ensure the reset sequence in snd_vt1724_chip_reset().

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 8afa043..1f05c59 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -2258,8 +2258,10 @@ static int __devinit snd_vt1724_read_eeprom(struct snd_ice1712 *ice,
 static void __devinit snd_vt1724_chip_reset(struct snd_ice1712 *ice)
 {
 	outb(VT1724_RESET , ICEREG1724(ice, CONTROL));
+	inb(ICEREG1724(ice, CONTROL)); /* pci posting flush */
 	msleep(10);
 	outb(0, ICEREG1724(ice, CONTROL));
+	inb(ICEREG1724(ice, CONTROL)); /* pci posting flush */
 	msleep(10);
 }
 
-- 
cgit v0.10.2


From 2bf864ac963c5126a9c7c3dce0958390fb612270 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 May 2009 17:30:17 +0200
Subject: ALSA: ice1724 - Allow spec driver to create own routing controls

Added a new flag, own_routing, to allow spec drivers to create own
routing controls.  Also, the basic get/put calls are changed to be
external for later use by maya44 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ice1712/ice1712.h b/sound/pci/ice1712/ice1712.h
index fdae6de..adc909e 100644
--- a/sound/pci/ice1712/ice1712.h
+++ b/sound/pci/ice1712/ice1712.h
@@ -335,6 +335,7 @@ struct snd_ice1712 {
 	unsigned int force_rdma1:1;	/* VT1720/4 - RDMA1 as non-spdif */
 	unsigned int midi_output:1;	/* VT1720/4: MIDI output triggered */
 	unsigned int midi_input:1;	/* VT1720/4: MIDI input triggered */
+	unsigned int own_routing:1;	/* VT1720/4: use own routing ctls */
 	unsigned int num_total_dacs;	/* total DACs */
 	unsigned int num_total_adcs;	/* total ADCs */
 	unsigned int cur_rate;		/* current rate */
@@ -458,10 +459,17 @@ static inline int snd_ice1712_gpio_read_bits(struct snd_ice1712 *ice,
 	return  snd_ice1712_gpio_read(ice) & mask;
 }
 
+/* route access functions */
+int snd_ice1724_get_route_val(struct snd_ice1712 *ice, int shift);
+int snd_ice1724_put_route_val(struct snd_ice1712 *ice, unsigned int val,
+								int shift);
+
 int snd_ice1712_spdif_build_controls(struct snd_ice1712 *ice);
 
-int snd_ice1712_akm4xxx_init(struct snd_akm4xxx *ak, const struct snd_akm4xxx *template,
-			     const struct snd_ak4xxx_private *priv, struct snd_ice1712 *ice);
+int snd_ice1712_akm4xxx_init(struct snd_akm4xxx *ak,
+			     const struct snd_akm4xxx *template,
+			     const struct snd_ak4xxx_private *priv,
+			     struct snd_ice1712 *ice);
 void snd_ice1712_akm4xxx_free(struct snd_ice1712 *ice);
 int snd_ice1712_akm4xxx_build_controls(struct snd_ice1712 *ice);
 
diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 1f05c59..5a735ee 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -1975,7 +1975,7 @@ static inline int digital_route_shift(int idx)
 	return idx * 3;
 }
 
-static int get_route_val(struct snd_ice1712 *ice, int shift)
+int snd_ice1724_get_route_val(struct snd_ice1712 *ice, int shift)
 {
 	unsigned long val;
 	unsigned char eitem;
@@ -1994,7 +1994,8 @@ static int get_route_val(struct snd_ice1712 *ice, int shift)
 	return eitem;
 }
 
-static int put_route_val(struct snd_ice1712 *ice, unsigned int val, int shift)
+int snd_ice1724_put_route_val(struct snd_ice1712 *ice, unsigned int val,
+								int shift)
 {
 	unsigned int old_val, nval;
 	int change;
@@ -2022,7 +2023,7 @@ static int snd_vt1724_pro_route_analog_get(struct snd_kcontrol *kcontrol,
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
 	ucontrol->value.enumerated.item[0] =
-		get_route_val(ice, analog_route_shift(idx));
+		snd_ice1724_get_route_val(ice, analog_route_shift(idx));
 	return 0;
 }
 
@@ -2031,8 +2032,9 @@ static int snd_vt1724_pro_route_analog_put(struct snd_kcontrol *kcontrol,
 {
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
-	return put_route_val(ice, ucontrol->value.enumerated.item[0],
-			     analog_route_shift(idx));
+	return snd_ice1724_put_route_val(ice,
+					 ucontrol->value.enumerated.item[0],
+					 analog_route_shift(idx));
 }
 
 static int snd_vt1724_pro_route_spdif_get(struct snd_kcontrol *kcontrol,
@@ -2041,7 +2043,7 @@ static int snd_vt1724_pro_route_spdif_get(struct snd_kcontrol *kcontrol,
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
 	ucontrol->value.enumerated.item[0] =
-		get_route_val(ice, digital_route_shift(idx));
+		snd_ice1724_get_route_val(ice, digital_route_shift(idx));
 	return 0;
 }
 
@@ -2050,11 +2052,13 @@ static int snd_vt1724_pro_route_spdif_put(struct snd_kcontrol *kcontrol,
 {
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
-	return put_route_val(ice, ucontrol->value.enumerated.item[0],
-			     digital_route_shift(idx));
+	return snd_ice1724_put_route_val(ice,
+					 ucontrol->value.enumerated.item[0],
+					 digital_route_shift(idx));
 }
 
-static struct snd_kcontrol_new snd_vt1724_mixer_pro_analog_route __devinitdata = {
+static struct snd_kcontrol_new snd_vt1724_mixer_pro_analog_route __devinitdata =
+{
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "H/W Playback Route",
 	.info = snd_vt1724_pro_route_info,
@@ -2291,9 +2295,12 @@ static int __devinit snd_vt1724_spdif_build_controls(struct snd_ice1712 *ice)
 	if (snd_BUG_ON(!ice->pcm))
 		return -EIO;
 
-	err = snd_ctl_add(ice->card, snd_ctl_new1(&snd_vt1724_mixer_pro_spdif_route, ice));
-	if (err < 0)
-		return err;
+	if (!ice->own_routing) {
+		err = snd_ctl_add(ice->card,
+			snd_ctl_new1(&snd_vt1724_mixer_pro_spdif_route, ice));
+		if (err < 0)
+			return err;
+	}
 
 	err = snd_ctl_add(ice->card, snd_ctl_new1(&snd_vt1724_spdif_switch, ice));
 	if (err < 0)
@@ -2340,7 +2347,7 @@ static int __devinit snd_vt1724_build_controls(struct snd_ice1712 *ice)
 	if (err < 0)
 		return err;
 
-	if (ice->num_total_dacs > 0) {
+	if (!ice->own_routing && ice->num_total_dacs > 0) {
 		struct snd_kcontrol_new tmp = snd_vt1724_mixer_pro_analog_route;
 		tmp.count = ice->num_total_dacs;
 		if (ice->vt1720 && tmp.count > 2)
-- 
cgit v0.10.2


From 72cbfd45fac627de4bd38c203baaac40cd26477c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 6 May 2009 17:33:19 +0200
Subject: ALSA: ice1724 - Add ESI Maya44 support

Added the support for ESI Maya44 board to ice1724 driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..821a994 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -925,6 +925,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 			* Onkyo SE-90PCI
 			* Onkyo SE-200PCI
 			* ESI Juli@
+			* ESI Maya44
 			* Hercules Fortissimo IV
 			* EGO-SYS WaveTerminal 192M
 
@@ -933,7 +934,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 		  prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192,
 		  juli, aureon51, aureon71, universe, ap192, k8x800,
 		  phase22, phase28, ms300, av710, se200pci, se90pci,
-		  fortissimo4, sn25p, WT192M
+		  fortissimo4, sn25p, WT192M, maya44
 
     This module supports multiple cards and autoprobe.
 
diff --git a/Documentation/sound/alsa/README.maya44 b/Documentation/sound/alsa/README.maya44
new file mode 100644
index 0000000..0e41576
--- /dev/null
+++ b/Documentation/sound/alsa/README.maya44
@@ -0,0 +1,163 @@
+NOTE: The following is the original document of Rainer's patch that the
+current maya44 code based on.  Some contents might be obsoleted, but I
+keep here as reference -- tiwai
+
+----------------------------------------------------------------
+ 
+STATE OF DEVELOPMENT:
+
+This driver is being developed on the initiative of Piotr Makowski (oponek@gmail.com) and financed by Lars Bergmann.
+Development is carried out by Rainer Zimmermann (mail@lightshed.de).
+
+ESI provided a sample Maya44 card for the development work.
+
+However, unfortunately it has turned out difficult to get detailed programming information, so I (Rainer Zimmermann) had to find out some card-specific information by experiment and conjecture. Some information (in particular, several GPIO bits) is still missing.
+
+This is the first testing version of the Maya44 driver released to the alsa-devel mailing list (Feb 5, 2008).
+
+
+The following functions work, as tested by Rainer Zimmermann and Piotr Makowski:
+
+- playback and capture at all sampling rates
+- input/output level
+- crossmixing
+- line/mic switch
+- phantom power switch
+- analogue monitor a.k.a bypass
+
+
+The following functions *should* work, but are not fully tested:
+
+- Channel 3+4 analogue - S/PDIF input switching
+- S/PDIF output
+- all inputs/outputs on the M/IO/DIO extension card
+- internal/external clock selection
+
+
+*In particular, we would appreciate testing of these functions by anyone who has access to an M/IO/DIO extension card.*
+
+
+Things that do not seem to work:
+
+- The level meters ("multi track") in 'alsamixer' do not seem to react to signals in (if this is a bug, it would probably be in the existing ICE1724 code).
+
+- Ardour 2.1 seems to work only via JACK, not using ALSA directly or via OSS. This still needs to be tracked down.
+
+
+DRIVER DETAILS:
+
+the following files were added:
+
+pci/ice1724/maya44.c        - Maya44 specific code
+pci/ice1724/maya44.h
+pci/ice1724/ice1724.patch
+pci/ice1724/ice1724.h.patch - PROPOSED patch to ice1724.h (see SAMPLING RATES)
+i2c/other/wm8776.c  - low-level access routines for Wolfson WM8776 codecs 
+include/wm8776.h
+
+
+Note that the wm8776.c code is meant to be card-independent and does not actually register the codec with the ALSA infrastructure.
+This is done in maya44.c, mainly because some of the WM8776 controls are used in Maya44-specific ways, and should be named appropriately.
+
+
+the following files were created in pci/ice1724, simply #including the corresponding file from the alsa-kernel tree:
+
+wtm.h
+vt1720_mobo.h
+revo.h
+prodigy192.h
+pontis.h
+phase.h
+maya44.h
+juli.h
+aureon.h
+amp.h
+envy24ht.h
+se.h
+prodigy_hifi.h
+
+
+*I hope this is the correct way to do things.*
+
+
+SAMPLING RATES:
+
+The Maya44 card (or more exactly, the Wolfson WM8776 codecs) allow a maximum sampling rate of 192 kHz for playback and 92 kHz for capture.
+
+As the ICE1724 chip only allows one global sampling rate, this is handled as follows:
+
+* setting the sampling rate on any open PCM device on the maya44 card will always set the *global* sampling rate for all playback and capture channels.
+
+* In the current state of the driver, setting rates of up to 192 kHz is permitted even for capture devices.
+
+*AVOID CAPTURING AT RATES ABOVE 96kHz*, even though it may appear to work. The codec cannot actually capture at such rates, meaning poor quality.
+
+
+I propose some additional code for limiting the sampling rate when setting on a capture pcm device. However because of the global sampling rate, this logic would be somewhat problematic.
+
+The proposed code (currently deactivated) is in ice1712.h.patch, ice1724.c and maya44.c (in pci/ice1712).
+
+
+SOUND DEVICES:
+
+PCM devices correspond to inputs/outputs as follows (assuming Maya44 is card #0):
+
+hw:0,0 input - stereo, analog input 1+2
+hw:0,0 output - stereo, analog output 1+2
+hw:0,1 input - stereo, analog input 3+4 OR S/PDIF input
+hw:0,1 output - stereo, analog output 3+4 (and SPDIF out)
+
+
+NAMING OF MIXER CONTROLS:
+
+(for more information about the signal flow, please refer to the block diagram on p.24 of the ESI Maya44 manual, or in the ESI windows software).
+
+
+PCM: (digital) output level for channel 1+2
+PCM 1: same for channel 3+4
+
+Mic Phantom+48V: switch for +48V phantom power for electrostatic microphones on input 1/2.
+    Make sure this is not turned on while any other source is connected to input 1/2.
+    It might damage the source and/or the maya44 card.
+
+Mic/Line input: if switch is is on, input jack 1/2 is microphone input (mono), otherwise line input (stereo).
+
+Bypass: analogue bypass from ADC input to output for channel 1+2. Same as "Monitor" in the windows driver.
+Bypass 1: same for channel 3+4.
+
+Crossmix: cross-mixer from channels 1+2 to channels 3+4
+Crossmix 1: cross-mixer from channels 3+4 to channels 1+2
+
+IEC958 Output: switch for S/PDIF output.
+    This is not supported by the ESI windows driver.
+    S/PDIF should output the same signal as channel 3+4. [untested!]
+
+
+Digitial output selectors:
+
+    These switches allow a direct digital routing from the ADCs to the DACs.
+    Each switch determines where the digital input data to one of the DACs comes from.
+    They are not supported by the ESI windows driver.
+    For normal operation, they should all be set to "PCM out".
+
+H/W: Output source channel 1
+H/W 1: Output source channel 2
+H/W 2: Output source channel 3
+H/W 3: Output source channel 4
+
+H/W 4 ... H/W 9: unknown function, left in to enable testing.
+    Possibly some of these control S/PDIF output(s).
+    If these turn out to be unused, they will go away in later driver versions.
+
+Selectable values for each of the digital output selectors are:
+   "PCM out" -> DAC output of the corresponding channel (default setting)
+   "Input 1"...
+   "Input 4" -> direct routing from ADC output of the selected input channel
+
+
+--------
+
+Feb 14, 2008
+Rainer Zimmermann
+mail@lightshed.de
+
diff --git a/sound/pci/ice1712/Makefile b/sound/pci/ice1712/Makefile
index f99fe08..536eae2 100644
--- a/sound/pci/ice1712/Makefile
+++ b/sound/pci/ice1712/Makefile
@@ -5,7 +5,7 @@
 
 snd-ice17xx-ak4xxx-objs := ak4xxx.o
 snd-ice1712-objs := ice1712.o delta.o hoontech.o ews.o
-snd-ice1724-objs := ice1724.o amp.o revo.o aureon.o vt1720_mobo.o pontis.o prodigy192.o prodigy_hifi.o juli.o phase.o wtm.o se.o
+snd-ice1724-objs := ice1724.o amp.o revo.o aureon.o vt1720_mobo.o pontis.o prodigy192.o prodigy_hifi.o juli.o phase.o wtm.o se.o maya44.o
 
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_ICE1712) += snd-ice1712.o snd-ice17xx-ak4xxx.o
diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 5a735ee..36ade77 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -49,6 +49,7 @@
 #include "prodigy192.h"
 #include "prodigy_hifi.h"
 #include "juli.h"
+#include "maya44.h"
 #include "phase.h"
 #include "wtm.h"
 #include "se.h"
@@ -65,6 +66,7 @@ MODULE_SUPPORTED_DEVICE("{"
 	       PRODIGY192_DEVICE_DESC
 	       PRODIGY_HIFI_DEVICE_DESC
 	       JULI_DEVICE_DESC
+	       MAYA44_DEVICE_DESC
 	       PHASE_DEVICE_DESC
 	       WTM_DEVICE_DESC
 	       SE_DEVICE_DESC
@@ -2125,6 +2127,7 @@ static struct snd_ice1712_card_info *card_tables[] __devinitdata = {
 	snd_vt1724_prodigy_hifi_cards,
 	snd_vt1724_prodigy192_cards,
 	snd_vt1724_juli_cards,
+	snd_vt1724_maya44_cards,
 	snd_vt1724_phase_cards,
 	snd_vt1724_wtm_cards,
 	snd_vt1724_se_cards,
diff --git a/sound/pci/ice1712/maya44.c b/sound/pci/ice1712/maya44.c
new file mode 100644
index 0000000..3e1c20a
--- /dev/null
+++ b/sound/pci/ice1712/maya44.c
@@ -0,0 +1,779 @@
+/*
+ *   ALSA driver for ICEnsemble VT1724 (Envy24HT)
+ *
+ *   Lowlevel functions for ESI Maya44 cards
+ *
+ *	Copyright (c) 2009 Takashi Iwai <tiwai@suse.de>
+ *	Based on the patches by Rainer Zimmermann <mail@lightshed.de>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <sound/core.h>
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/tlv.h>
+
+#include "ice1712.h"
+#include "envy24ht.h"
+#include "maya44.h"
+
+/* WM8776 register indexes */
+#define WM8776_REG_HEADPHONE_L		0x00
+#define WM8776_REG_HEADPHONE_R		0x01
+#define WM8776_REG_HEADPHONE_MASTER	0x02
+#define WM8776_REG_DAC_ATTEN_L		0x03
+#define WM8776_REG_DAC_ATTEN_R		0x04
+#define WM8776_REG_DAC_ATTEN_MASTER	0x05
+#define WM8776_REG_DAC_PHASE		0x06
+#define WM8776_REG_DAC_CONTROL		0x07
+#define WM8776_REG_DAC_MUTE		0x08
+#define WM8776_REG_DAC_DEEMPH		0x09
+#define WM8776_REG_DAC_IF_CONTROL	0x0a
+#define WM8776_REG_ADC_IF_CONTROL	0x0b
+#define WM8776_REG_MASTER_MODE_CONTROL	0x0c
+#define WM8776_REG_POWERDOWN		0x0d
+#define WM8776_REG_ADC_ATTEN_L		0x0e
+#define WM8776_REG_ADC_ATTEN_R		0x0f
+#define WM8776_REG_ADC_ALC1		0x10
+#define WM8776_REG_ADC_ALC2		0x11
+#define WM8776_REG_ADC_ALC3		0x12
+#define WM8776_REG_ADC_NOISE_GATE	0x13
+#define WM8776_REG_ADC_LIMITER		0x14
+#define WM8776_REG_ADC_MUX		0x15
+#define WM8776_REG_OUTPUT_MUX		0x16
+#define WM8776_REG_RESET		0x17
+
+#define WM8776_NUM_REGS			0x18
+
+/* clock ratio identifiers for snd_wm8776_set_rate() */
+#define WM8776_CLOCK_RATIO_128FS	0
+#define WM8776_CLOCK_RATIO_192FS	1
+#define WM8776_CLOCK_RATIO_256FS	2
+#define WM8776_CLOCK_RATIO_384FS	3
+#define WM8776_CLOCK_RATIO_512FS	4
+#define WM8776_CLOCK_RATIO_768FS	5
+
+enum { WM_VOL_HP, WM_VOL_DAC, WM_VOL_ADC, WM_NUM_VOLS };
+enum { WM_SW_DAC, WM_SW_BYPASS, WM_NUM_SWITCHES };
+
+struct snd_wm8776 {
+	unsigned char addr;
+	unsigned short regs[WM8776_NUM_REGS];
+	unsigned char volumes[WM_NUM_VOLS][2];
+	unsigned int switch_bits;
+};
+
+struct snd_maya44 {
+	struct snd_ice1712 *ice;
+	struct snd_wm8776 wm[2];
+	struct mutex mutex;
+};
+
+
+/* write the given register and save the data to the cache */
+static void wm8776_write(struct snd_ice1712 *ice, struct snd_wm8776 *wm,
+			 unsigned char reg, unsigned short val)
+{
+	/*
+	 * WM8776 registers are up to 9 bits wide, bit 8 is placed in the LSB
+	 * of the address field
+	 */
+	snd_vt1724_write_i2c(ice, wm->addr,
+			     (reg << 1) | ((val >> 8) & 1),
+			     val & 0xff);
+	wm->regs[reg] = val;
+}
+
+/*
+ * update the given register with and/or mask and save the data to the cache
+ */
+static int wm8776_write_bits(struct snd_ice1712 *ice, struct snd_wm8776 *wm,
+			     unsigned char reg,
+			     unsigned short mask, unsigned short val)
+{
+	val |= wm->regs[reg] & ~mask;
+	if (val != wm->regs[reg]) {
+		wm8776_write(ice, wm, reg, val);
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ * WM8776 volume controls
+ */
+
+struct maya_vol_info {
+	unsigned int maxval;		/* volume range: 0..maxval */
+	unsigned char regs[2];		/* left and right registers */
+	unsigned short mask;		/* value mask */
+	unsigned short offset;		/* zero-value offset */
+	unsigned short mute;		/* mute bit */
+	unsigned short update;		/* update bits */
+	unsigned char mux_bits[2];	/* extra bits for ADC mute */
+};
+
+static struct maya_vol_info vol_info[WM_NUM_VOLS] = {
+	[WM_VOL_HP] = {
+		.maxval = 80,
+		.regs = { WM8776_REG_HEADPHONE_L, WM8776_REG_HEADPHONE_R },
+		.mask = 0x7f,
+		.offset = 0x30,
+		.mute = 0x00,
+		.update = 0x180,	/* update and zero-cross enable */
+	},
+	[WM_VOL_DAC] = {
+		.maxval = 255,
+		.regs = { WM8776_REG_DAC_ATTEN_L, WM8776_REG_DAC_ATTEN_R },
+		.mask = 0xff,
+		.offset = 0x01,
+		.mute = 0x00,
+		.update = 0x100,	/* zero-cross enable */
+	},
+	[WM_VOL_ADC] = {
+		.maxval = 91,
+		.regs = { WM8776_REG_ADC_ATTEN_L, WM8776_REG_ADC_ATTEN_R },
+		.mask = 0xff,
+		.offset = 0xa5,
+		.mute = 0xa5,
+		.update = 0x100,	/* update */
+		.mux_bits = { 0x80, 0x40 }, /* ADCMUX bits */
+	},
+};
+
+/*
+ * dB tables
+ */
+/* headphone output: mute, -73..+6db (1db step) */
+static const DECLARE_TLV_DB_SCALE(db_scale_hp, -7400, 100, 1);
+/* DAC output: mute, -127..0db (0.5db step) */
+static const DECLARE_TLV_DB_SCALE(db_scale_dac, -12750, 50, 1);
+/* ADC gain: mute, -21..+24db (0.5db step) */
+static const DECLARE_TLV_DB_SCALE(db_scale_adc, -2100, 50, 1);
+
+static int maya_vol_info(struct snd_kcontrol *kcontrol,
+			 struct snd_ctl_elem_info *uinfo)
+{
+	unsigned int idx = kcontrol->private_value;
+	struct maya_vol_info *vol = &vol_info[idx];
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 2;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = vol->maxval;
+	return 0;
+}
+
+static int maya_vol_get(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = kcontrol->private_value;
+
+	mutex_lock(&chip->mutex);
+	ucontrol->value.integer.value[0] = wm->volumes[idx][0];
+	ucontrol->value.integer.value[1] = wm->volumes[idx][1];
+	mutex_unlock(&chip->mutex);
+	return 0;
+}
+
+static int maya_vol_put(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = kcontrol->private_value;
+	struct maya_vol_info *vol = &vol_info[idx];
+	unsigned int val, data;
+	int ch, changed = 0;
+
+	mutex_lock(&chip->mutex);
+	for (ch = 0; ch < 2; ch++) {
+		val = ucontrol->value.integer.value[ch];
+		if (val > vol->maxval)
+			val = vol->maxval;
+		if (val == wm->volumes[idx][ch])
+			continue;
+		if (!val)
+			data = vol->mute;
+		else
+			data = (val - 1) + vol->offset;
+		data |= vol->update;
+		changed |= wm8776_write_bits(chip->ice, wm, vol->regs[ch],
+					     vol->mask | vol->update, data);
+		if (vol->mux_bits[ch])
+			wm8776_write_bits(chip->ice, wm, WM8776_REG_ADC_MUX,
+					  vol->mux_bits[ch],
+					  val ? 0 : vol->mux_bits[ch]);
+		wm->volumes[idx][ch] = val;
+	}
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * WM8776 switch controls
+ */
+
+#define COMPOSE_SW_VAL(idx, reg, mask)	((idx) | ((reg) << 8) | ((mask) << 16))
+#define GET_SW_VAL_IDX(val)	((val) & 0xff)
+#define GET_SW_VAL_REG(val)	(((val) >> 8) & 0xff)
+#define GET_SW_VAL_MASK(val)	(((val) >> 16) & 0xff)
+
+#define maya_sw_info	snd_ctl_boolean_mono_info
+
+static int maya_sw_get(struct snd_kcontrol *kcontrol,
+		       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = GET_SW_VAL_IDX(kcontrol->private_value);
+
+	ucontrol->value.integer.value[0] = (wm->switch_bits >> idx) & 1;
+	return 0;
+}
+
+static int maya_sw_put(struct snd_kcontrol *kcontrol,
+		       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	struct snd_wm8776 *wm =
+		&chip->wm[snd_ctl_get_ioff(kcontrol, &ucontrol->id)];
+	unsigned int idx = GET_SW_VAL_IDX(kcontrol->private_value);
+	unsigned int mask, val;
+	int changed;
+
+	mutex_lock(&chip->mutex);
+	mask = 1 << idx;
+	wm->switch_bits &= ~mask;
+	val = ucontrol->value.integer.value[0];
+	if (val)
+		wm->switch_bits |= mask;
+	mask = GET_SW_VAL_MASK(kcontrol->private_value);
+	changed = wm8776_write_bits(chip->ice, wm,
+				    GET_SW_VAL_REG(kcontrol->private_value),
+				    mask, val ? mask : 0);
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * GPIO pins (known ones for maya44)
+ */
+#define GPIO_PHANTOM_OFF	2
+#define GPIO_MIC_RELAY		4
+#define GPIO_SPDIF_IN_INV	5
+#define GPIO_MUST_BE_0		7
+
+/*
+ * GPIO switch controls
+ */
+
+#define COMPOSE_GPIO_VAL(shift, inv)	((shift) | ((inv) << 8))
+#define GET_GPIO_VAL_SHIFT(val)		((val) & 0xff)
+#define GET_GPIO_VAL_INV(val)		(((val) >> 8) & 1)
+
+static int maya_set_gpio_bits(struct snd_ice1712 *ice, unsigned int mask,
+			      unsigned int bits)
+{
+	unsigned int data;
+	data = snd_ice1712_gpio_read(ice);
+	if ((data & mask) == bits)
+		return 0;
+	snd_ice1712_gpio_write(ice, (data & ~mask) | bits);
+	return 1;
+}
+
+#define maya_gpio_sw_info	snd_ctl_boolean_mono_info
+
+static int maya_gpio_sw_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	unsigned int shift = GET_GPIO_VAL_SHIFT(kcontrol->private_value);
+	unsigned int val;
+
+	val = (snd_ice1712_gpio_read(chip->ice) >> shift) & 1;
+	if (GET_GPIO_VAL_INV(kcontrol->private_value))
+		val = !val;
+	ucontrol->value.integer.value[0] = val;
+	return 0;
+}
+
+static int maya_gpio_sw_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	unsigned int shift = GET_GPIO_VAL_SHIFT(kcontrol->private_value);
+	unsigned int val, mask;
+	int changed;
+
+	mutex_lock(&chip->mutex);
+	mask = 1 << shift;
+	val = ucontrol->value.integer.value[0];
+	if (GET_GPIO_VAL_INV(kcontrol->private_value))
+		val = !val;
+	val = val ? mask : 0;
+	changed = maya_set_gpio_bits(chip->ice, mask, val);
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * capture source selection
+ */
+
+/* known working input slots (0-4) */
+#define MAYA_LINE_IN	1	/* in-2 */
+#define MAYA_MIC_IN	4	/* in-5 */
+
+static void wm8776_select_input(struct snd_maya44 *chip, int idx, int line)
+{
+	wm8776_write_bits(chip->ice, &chip->wm[idx], WM8776_REG_ADC_MUX,
+			  0x1f, 1 << line);
+}
+
+static int maya_rec_src_info(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[] = { "Line", "Mic" };
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = ARRAY_SIZE(texts);
+	if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+		uinfo->value.enumerated.item =
+			uinfo->value.enumerated.items - 1;
+	strcpy(uinfo->value.enumerated.name,
+	       texts[uinfo->value.enumerated.item]);
+	return 0;
+}
+
+static int maya_rec_src_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int sel;
+
+	if (snd_ice1712_gpio_read(chip->ice) & (1 << GPIO_MIC_RELAY))
+		sel = 1;
+	else
+		sel = 0;
+	ucontrol->value.enumerated.item[0] = sel;
+	return 0;
+}
+
+static int maya_rec_src_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int sel = ucontrol->value.enumerated.item[0];
+	int changed;
+
+	mutex_lock(&chip->mutex);
+	changed = maya_set_gpio_bits(chip->ice, GPIO_MIC_RELAY,
+				     sel ? GPIO_MIC_RELAY : 0);
+	wm8776_select_input(chip, 0, sel ? MAYA_MIC_IN : MAYA_LINE_IN);
+	mutex_unlock(&chip->mutex);
+	return changed;
+}
+
+/*
+ * Maya44 routing switch settings have different meanings than the standard
+ * ice1724 switches as defined in snd_vt1724_pro_route_info (ice1724.c).
+ */
+static int maya_pb_route_info(struct snd_kcontrol *kcontrol,
+			      struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[] = {
+		"PCM Out", /* 0 */
+		"Input 1", "Input 2", "Input 3", "Input 4"
+	};
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = ARRAY_SIZE(texts);
+	if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+		uinfo->value.enumerated.item =
+			uinfo->value.enumerated.items - 1;
+	strcpy(uinfo->value.enumerated.name,
+	       texts[uinfo->value.enumerated.item]);
+	return 0;
+}
+
+static int maya_pb_route_shift(int idx)
+{
+	static const unsigned char shift[10] =
+		{ 8, 20, 0, 3, 11, 23, 14, 26, 17, 29 };
+	return shift[idx % 10];
+}
+
+static int maya_pb_route_get(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+	ucontrol->value.enumerated.item[0] =
+		snd_ice1724_get_route_val(chip->ice, maya_pb_route_shift(idx));
+	return 0;
+}
+
+static int maya_pb_route_put(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_maya44 *chip = snd_kcontrol_chip(kcontrol);
+	int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+	return snd_ice1724_put_route_val(chip->ice,
+					 ucontrol->value.enumerated.item[0],
+					 maya_pb_route_shift(idx));
+}
+
+
+/*
+ * controls to be added
+ */
+
+static struct snd_kcontrol_new maya_controls[] __devinitdata = {
+	{
+		.name = "Crossmix Playback Volume",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+		.info = maya_vol_info,
+		.get = maya_vol_get,
+		.put = maya_vol_put,
+		.tlv = { .p = db_scale_hp },
+		.private_value = WM_VOL_HP,
+		.count = 2,
+	},
+	{
+		.name = "PCM Playback Volume",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+		.info = maya_vol_info,
+		.get = maya_vol_get,
+		.put = maya_vol_put,
+		.tlv = { .p = db_scale_dac },
+		.private_value = WM_VOL_DAC,
+		.count = 2,
+	},
+	{
+		.name = "Line Capture Volume",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+		.info = maya_vol_info,
+		.get = maya_vol_get,
+		.put = maya_vol_put,
+		.tlv = { .p = db_scale_adc },
+		.private_value = WM_VOL_ADC,
+		.count = 2,
+	},
+	{
+		.name = "PCM Playback Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_sw_info,
+		.get = maya_sw_get,
+		.put = maya_sw_put,
+		.private_value = COMPOSE_SW_VAL(WM_SW_DAC,
+						WM8776_REG_OUTPUT_MUX, 0x01),
+		.count = 2,
+	},
+	{
+		.name = "Bypass Playback Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_sw_info,
+		.get = maya_sw_get,
+		.put = maya_sw_put,
+		.private_value = COMPOSE_SW_VAL(WM_SW_BYPASS,
+						WM8776_REG_OUTPUT_MUX, 0x04),
+		.count = 2,
+	},
+	{
+		.name = "Capture Source",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_rec_src_info,
+		.get = maya_rec_src_get,
+		.put = maya_rec_src_put,
+	},
+	{
+		.name = "Mic Phantom Power Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_gpio_sw_info,
+		.get = maya_gpio_sw_get,
+		.put = maya_gpio_sw_put,
+		.private_value = COMPOSE_GPIO_VAL(GPIO_PHANTOM_OFF, 1),
+	},
+	{
+		.name = "SPDIF Capture Switch",
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.info = maya_gpio_sw_info,
+		.get = maya_gpio_sw_get,
+		.put = maya_gpio_sw_put,
+		.private_value = COMPOSE_GPIO_VAL(GPIO_SPDIF_IN_INV, 1),
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "H/W Playback Route",
+		.info = maya_pb_route_info,
+		.get = maya_pb_route_get,
+		.put = maya_pb_route_put,
+		.count = 4,  /* FIXME: do controls 5-9 have any meaning? */
+	},
+};
+
+static int __devinit maya44_add_controls(struct snd_ice1712 *ice)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(maya_controls); i++) {
+		err = snd_ctl_add(ice->card, snd_ctl_new1(&maya_controls[i],
+							  ice->spec));
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
+
+/*
+ * initialize a wm8776 chip
+ */
+static void __devinit wm8776_init(struct snd_ice1712 *ice,
+				  struct snd_wm8776 *wm, unsigned int addr)
+{
+	static const unsigned short inits_wm8776[] = {
+		0x02, 0x100, /* R2: headphone L+R muted + update */
+		0x05, 0x100, /* R5: DAC output L+R muted + update */
+		0x06, 0x000, /* R6: DAC output phase normal */
+		0x07, 0x091, /* R7: DAC enable zero cross detection,
+				normal output */
+		0x08, 0x000, /* R8: DAC soft mute off */
+		0x09, 0x000, /* R9: no deemph, DAC zero detect disabled */
+		0x0a, 0x022, /* R10: DAC I2C mode, std polarities, 24bit */
+		0x0b, 0x022, /* R11: ADC I2C mode, std polarities, 24bit,
+				highpass filter enabled */
+		0x0c, 0x042, /* R12: ADC+DAC slave, ADC+DAC 44,1kHz */
+		0x0d, 0x000, /* R13: all power up */
+		0x0e, 0x100, /* R14: ADC left muted,
+				enable zero cross detection */
+		0x0f, 0x100, /* R15: ADC right muted,
+				enable zero cross detection */
+			     /* R16: ALC...*/
+		0x11, 0x000, /* R17: disable ALC */
+			     /* R18: ALC...*/
+			     /* R19: noise gate...*/
+		0x15, 0x000, /* R21: ADC input mux init, mute all inputs */
+		0x16, 0x001, /* R22: output mux, select DAC */
+		0xff, 0xff
+	};
+
+	const unsigned short *ptr;
+	unsigned char reg;
+	unsigned short data;
+
+	wm->addr = addr;
+	/* enable DAC output; mute bypass, aux & all inputs */
+	wm->switch_bits = (1 << WM_SW_DAC);
+
+	ptr = inits_wm8776;
+	while (*ptr != 0xff) {
+		reg = *ptr++;
+		data = *ptr++;
+		wm8776_write(ice, wm, reg, data);
+	}
+}
+
+
+/*
+ * change the rate on the WM8776 codecs.
+ * this assumes that the VT17xx's rate is changed by the calling function.
+ * NOTE: even though the WM8776's are running in slave mode and rate
+ * selection is automatic, we need to call snd_wm8776_set_rate() here
+ * to make sure some flags are set correctly.
+ */
+static void set_rate(struct snd_ice1712 *ice, unsigned int rate)
+{
+	struct snd_maya44 *chip = ice->spec;
+	unsigned int ratio, adc_ratio, val;
+	int i;
+
+	switch (rate) {
+	case 192000:
+		ratio = WM8776_CLOCK_RATIO_128FS;
+		break;
+	case 176400:
+		ratio = WM8776_CLOCK_RATIO_128FS;
+		break;
+	case 96000:
+		ratio = WM8776_CLOCK_RATIO_256FS;
+		break;
+	case 88200:
+		ratio = WM8776_CLOCK_RATIO_384FS;
+		break;
+	case 48000:
+		ratio = WM8776_CLOCK_RATIO_512FS;
+		break;
+	case 44100:
+		ratio = WM8776_CLOCK_RATIO_512FS;
+		break;
+	case 32000:
+		ratio = WM8776_CLOCK_RATIO_768FS;
+		break;
+	case 0:
+		/* no hint - S/PDIF input is master, simply return */
+		return;
+	default:
+		snd_BUG();
+		return;
+	}
+
+	/*
+	 * this currently sets the same rate for ADC and DAC, but limits
+	 * ADC rate to 256X (96kHz). For 256X mode (96kHz), this sets ADC
+	 * oversampling to 64x, as recommended by WM8776 datasheet.
+	 * Setting the rate is not really necessary in slave mode.
+	 */
+	adc_ratio = ratio;
+	if (adc_ratio < WM8776_CLOCK_RATIO_256FS)
+		adc_ratio = WM8776_CLOCK_RATIO_256FS;
+
+	val = adc_ratio;
+	if (adc_ratio == WM8776_CLOCK_RATIO_256FS)
+		val |= 8;
+	val |= ratio << 4;
+
+	mutex_lock(&chip->mutex);
+	for (i = 0; i < 2; i++)
+		wm8776_write_bits(ice, &chip->wm[i],
+				  WM8776_REG_MASTER_MODE_CONTROL,
+				  0x180, val);
+	mutex_unlock(&chip->mutex);
+}
+
+/*
+ * supported sample rates (to override the default one)
+ */
+
+static unsigned int rates[] = {
+	32000, 44100, 48000, 64000, 88200, 96000, 176400, 192000
+};
+
+/* playback rates: 32..192 kHz */
+static struct snd_pcm_hw_constraint_list dac_rates = {
+	.count = ARRAY_SIZE(rates),
+	.list = rates,
+	.mask = 0
+};
+
+
+/*
+ * chip addresses on I2C bus
+ */
+static unsigned char wm8776_addr[2] __devinitdata = {
+	0x34, 0x36, /* codec 0 & 1 */
+};
+
+/*
+ * initialize the chip
+ */
+static int __devinit maya44_init(struct snd_ice1712 *ice)
+{
+	int i;
+	struct snd_maya44 *chip;
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+	mutex_init(&chip->mutex);
+	chip->ice = ice;
+	ice->spec = chip;
+
+	/* initialise codecs */
+	ice->num_total_dacs = 4;
+	ice->num_total_adcs = 4;
+	ice->akm_codecs = 0;
+
+	for (i = 0; i < 2; i++) {
+		wm8776_init(ice, &chip->wm[i], wm8776_addr[i]);
+		wm8776_select_input(chip, i, MAYA_LINE_IN);
+	}
+
+	/* set card specific rates */
+	ice->hw_rates = &dac_rates;
+
+	/* register change rate notifier */
+	ice->gpio.set_pro_rate = set_rate;
+
+	/* RDMA1 (2nd input channel) is used for ADC by default */
+	ice->force_rdma1 = 1;
+
+	/* have an own routing control */
+	ice->own_routing = 1;
+
+	return 0;
+}
+
+
+/*
+ * Maya44 boards don't provide the EEPROM data except for the vendor IDs.
+ * hence the driver needs to sets up it properly.
+ */
+
+static unsigned char maya44_eeprom[] __devinitdata = {
+	[ICE_EEP2_SYSCONF]     = 0x45,
+		/* clock xin1=49.152MHz, mpu401, 2 stereo ADCs+DACs */
+	[ICE_EEP2_ACLINK]      = 0x80,
+		/* I2S */
+	[ICE_EEP2_I2S]         = 0xf8,
+		/* vol, 96k, 24bit, 192k */
+	[ICE_EEP2_SPDIF]       = 0xc3,
+		/* enable spdif out, spdif out supp, spdif-in, ext spdif out */
+	[ICE_EEP2_GPIO_DIR]    = 0xff,
+	[ICE_EEP2_GPIO_DIR1]   = 0xff,
+	[ICE_EEP2_GPIO_DIR2]   = 0xff,
+	[ICE_EEP2_GPIO_MASK]   = 0/*0x9f*/,
+	[ICE_EEP2_GPIO_MASK1]  = 0/*0xff*/,
+	[ICE_EEP2_GPIO_MASK2]  = 0/*0x7f*/,
+	[ICE_EEP2_GPIO_STATE]  = (1 << GPIO_PHANTOM_OFF) |
+			(1 << GPIO_SPDIF_IN_INV),
+	[ICE_EEP2_GPIO_STATE1] = 0x00,
+	[ICE_EEP2_GPIO_STATE2] = 0x00,
+};
+
+/* entry point */
+struct snd_ice1712_card_info snd_vt1724_maya44_cards[] __devinitdata = {
+	{
+		.subvendor = VT1724_SUBDEVICE_MAYA44,
+		.name = "ESI Maya44",
+		.model = "maya44",
+		.chip_init = maya44_init,
+		.build_controls = maya44_add_controls,
+		.eeprom_size = sizeof(maya44_eeprom),
+		.eeprom_data = maya44_eeprom,
+	},
+	{ } /* terminator */
+};
diff --git a/sound/pci/ice1712/maya44.h b/sound/pci/ice1712/maya44.h
new file mode 100644
index 0000000..eafd03a
--- /dev/null
+++ b/sound/pci/ice1712/maya44.h
@@ -0,0 +1,10 @@
+#ifndef __SOUND_MAYA44_H
+#define __SOUND_MAYA44_H
+
+#define MAYA44_DEVICE_DESC		"{ESI,Maya44},"
+
+#define VT1724_SUBDEVICE_MAYA44		0x34315441	/* Maya44 */
+
+extern struct snd_ice1712_card_info  snd_vt1724_maya44_cards[];
+
+#endif	/* __SOUND_MAYA44_H */
-- 
cgit v0.10.2


From 8e7abf1c62941ebb7a1416cbc62392c8a0902625 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 10:26:45 -0400
Subject: ring-buffer: remove unneeded conditional in rb_reserve_next

The code in __rb_reserve_next checks on page overflow if it is the
original commiter and then resets the page back to the original
setting.  Although this is fine, and the code is correct, it is
a bit fragil. Some experimental work I did breaks it easily.

The better and more robust solution is to have all commiters that
overflow the page, simply subtract what they added.

[ Impact: more robust ring buffer account management ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 424129e..03ed52b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1290,9 +1290,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		rb_event_set_padding(event);
 	}
 
-	if (tail <= BUF_PAGE_SIZE)
-		/* Set the write back to the previous setting */
-		local_set(&tail_page->write, tail);
+	/* Set the write back to the previous setting */
+	local_sub(length, &tail_page->write);
 
 	/*
 	 * If this was a commit entry that failed,
@@ -1311,8 +1310,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 
  out_reset:
 	/* reset write */
-	if (tail <= BUF_PAGE_SIZE)
-		local_set(&tail_page->write, tail);
+	local_sub(length, &tail_page->write);
 
 	if (likely(lock_taken))
 		__raw_spin_unlock(&cpu_buffer->lock);
-- 
cgit v0.10.2


From 00c81a58c5b4e0de14ee33bfbc3d71c90f69f9ea Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 12:40:51 -0400
Subject: ring-buffer: check for failed allocation in ring buffer benchmark

The result of the allocation of the ring buffer read page in the
ring buffer bench mark does not check the return to see if a page
was actually allocated. This patch fixes that.

[ Impact: avoid NULL dereference ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 747244a..dcd75e9 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -84,6 +84,9 @@ static enum event_status read_page(int cpu)
 	int i;
 
 	bpage = ring_buffer_alloc_read_page(buffer);
+	if (!bpage)
+		return EVENT_DROPPED;
+
 	ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
 	if (ret >= 0) {
 		rpage = bpage;
-- 
cgit v0.10.2


From 6634ff26cce2da04e5c2a5481bcb8888e7d01786 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 15:30:07 -0400
Subject: ring-buffer: make moving the tail page a separate function

Ingo Molnar thought the code would be cleaner if we used a function call
instead of a goto for moving the tail page. After implementing this,
it seems that gcc still inlines the result and the output is pretty much
the same. Since this is considered a cleaner approach, might as well
implement it.

[ Impact: code clean up ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 03ed52b..3ae5ccf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1154,51 +1154,18 @@ static unsigned rb_calculate_event_length(unsigned length)
 	return length;
 }
 
+
 static struct ring_buffer_event *
-__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-		  unsigned type, unsigned long length, u64 *ts)
+rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
+	     unsigned long length, unsigned long tail,
+	     struct buffer_page *commit_page,
+	     struct buffer_page *tail_page, u64 *ts)
 {
-	struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
-	struct buffer_page *next_page;
-	unsigned long tail, write;
+	struct buffer_page *next_page, *head_page, *reader_page;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
-	unsigned long flags;
 	bool lock_taken = false;
-
-	commit_page = cpu_buffer->commit_page;
-	/* we just need to protect against interrupts */
-	barrier();
-	tail_page = cpu_buffer->tail_page;
-	write = local_add_return(length, &tail_page->write);
-	tail = write - length;
-
-	/* See if we shot pass the end of this buffer page */
-	if (write > BUF_PAGE_SIZE)
-		goto next_page;
-
-	/* We reserved something on the buffer */
-
-	if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
-		return NULL;
-
-	event = __rb_page_index(tail_page, tail);
-	rb_update_event(event, type, length);
-
-	/* The passed in type is zero for DATA */
-	if (likely(!type))
-		local_inc(&tail_page->entries);
-
-	/*
-	 * If this is a commit and the tail is zero, then update
-	 * this page's time stamp.
-	 */
-	if (!tail && rb_is_commit(cpu_buffer, event))
-		cpu_buffer->commit_page->page->time_stamp = *ts;
-
-	return event;
-
- next_page:
+	unsigned long flags;
 
 	next_page = tail_page;
 
@@ -1318,6 +1285,48 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	return NULL;
 }
 
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+		  unsigned type, unsigned long length, u64 *ts)
+{
+	struct buffer_page *tail_page, *commit_page;
+	struct ring_buffer_event *event;
+	unsigned long tail, write;
+
+	commit_page = cpu_buffer->commit_page;
+	/* we just need to protect against interrupts */
+	barrier();
+	tail_page = cpu_buffer->tail_page;
+	write = local_add_return(length, &tail_page->write);
+	tail = write - length;
+
+	/* See if we shot pass the end of this buffer page */
+	if (write > BUF_PAGE_SIZE)
+		return rb_move_tail(cpu_buffer, length, tail,
+				    commit_page, tail_page, ts);
+
+	/* We reserved something on the buffer */
+
+	if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
+		return NULL;
+
+	event = __rb_page_index(tail_page, tail);
+	rb_update_event(event, type, length);
+
+	/* The passed in type is zero for DATA */
+	if (likely(!type))
+		local_inc(&tail_page->entries);
+
+	/*
+	 * If this is a commit and the tail is zero, then update
+	 * this page's time stamp.
+	 */
+	if (!tail && rb_is_commit(cpu_buffer, event))
+		cpu_buffer->commit_page->page->time_stamp = *ts;
+
+	return event;
+}
+
 static int
 rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  u64 *ts, u64 *delta)
-- 
cgit v0.10.2


From 3e07a4f680adc66dfa175aa5021aedf340251b12 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 18:36:59 -0400
Subject: ring-buffer: change test to be more latency friendly

The ring buffer benchmark/test runs a producer for 10 seconds.
This is done with preemption and interrupts enabled. But if the kernel
is not compiled with CONFIG_PREEMPT, it basically stops everything
but interrupts for 10 seconds.

Although this is just a test and is not for production, this attribute
can be quite annoying. It can also spawn badness elsewhere.

This patch solves the issues by calling "cond_resched" when the system
is not compiled with CONFIG_PREEMPT. It also keeps track of the time
spent to call cond_resched such that it does not go against the
time calculations. That is, if the task schedules away, the time scheduled
out is removed from the test data. Note, this only works for non PREEMPT
because we do not know when the task is scheduled out if we have PREEMPT
enabled.

[ Impact: prevent test from stopping the world for 10 seconds ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index dcd75e9..a26fc67 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -185,6 +185,35 @@ static void ring_buffer_consumer(void)
 	complete(&read_done);
 }
 
+/*
+ * If we are a non preempt kernel, the 10 second run will
+ * stop everything while it runs. Instead, we will call cond_resched
+ * and also add any time that was lost by a rescedule.
+ */
+#ifdef CONFIG_PREEMPT
+static void sched_if_needed(struct timeval *start_tv, struct timeval *end_tv)
+{
+}
+#else
+static void sched_if_needed(struct timeval *start_tv, struct timeval *end_tv)
+{
+	struct timeval tv;
+
+	cond_resched();
+	do_gettimeofday(&tv);
+	if (tv.tv_usec < end_tv->tv_usec) {
+		tv.tv_usec += 1000000;
+		tv.tv_sec--;
+	}
+	start_tv->tv_sec += tv.tv_sec - end_tv->tv_sec;
+	start_tv->tv_usec += tv.tv_usec - end_tv->tv_usec;
+	if (start_tv->tv_usec > 1000000) {
+		start_tv->tv_usec -= 1000000;
+		start_tv->tv_sec++;
+	}
+}
+#endif
+
 static void ring_buffer_producer(void)
 {
 	struct timeval start_tv;
@@ -221,6 +250,8 @@ static void ring_buffer_producer(void)
 		if (consumer && !(++cnt % wakeup_interval))
 			wake_up_process(consumer);
 
+		sched_if_needed(&start_tv, &end_tv);
+
 	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
 	pr_info("End ring buffer hammer\n");
 
-- 
cgit v0.10.2


From 71e1c8ac42ae4038ddb1367cce7097ab868dc532 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 21:20:39 -0400
Subject: tracing: update sample with TRACE_INCLUDE_FILE

When creating trace events for ftrace, the header file with the TRACE_EVENT
macros must also have a macro called TRACE_SYSTEM. This macro describes
the name of the system the TRACE_EVENTS are defined for. It also doubles
as a way for the define_trace.h file to include the file that included
it.

For example:

in irq.h

 #define TRACE_SYSTEM irq

[...]

 #include <trace/define_trace.h>

The define_trace will use TRACE_SYSTEM to include irq.h. But if the name
of the trace system does not match the name of the trace header file,
one can override it with:

Which will change define_trace.h to inclued foo_trace.h instead of foo.h

The sample comments this, but people that use the sample code will more
likely use the code and not read the comments. This patch changes the
sample code to use the TRACE_INCLUDE_FILE to better show developers how to
use it.

[ Impact: make sample less confusing to developers ]

Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index eab4644..128a897 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -31,7 +31,7 @@
  *
  */
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM trace-events-sample
+#define TRACE_SYSTEM sample
 
 /*
  * The TRACE_EVENT macro is broken up into 5 parts.
@@ -120,5 +120,10 @@ TRACE_EVENT(foo_bar,
  * result.
  */
 #undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_PATH .
+/*
+ * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
+ */
+#define TRACE_INCLUDE_FILE trace-events-sample
 #include <trace/define_trace.h>
-- 
cgit v0.10.2


From 9456f0fa6d3cb944d3b9fc31c9a244e0362c26ea Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 21:54:09 -0400
Subject: tracing: reset ring buffer when removing modules with events

Li Zefan found that there's a race using the event ids of events and
modules. When a module is loaded, an event id is incremented. We only
have 16 bits for event ids (65536) and there is a possible (but highly
unlikely) race that we could load and unload a module that registers
events so many times that the event id counter overflows.

When it overflows, it then restarts and goes looking for available
ids. An id is available if it was added by a module and released.

The race is if you have one module add an id, and then is removed.
Another module loaded can use that same event id. But if the old module
still had events in the ring buffer, the new module's call back would
get bogus data.  At best (and most likely) the output would just be
garbage. But if the module for some reason used pointers (not recommended)
then this could potentially crash.

The safest thing to do is just reset the ring buffer if a module that
registered events is removed.

[ Impact: prevent unpredictable results of event id overflows ]

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <49FEAFD0.30106@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4164a34..dd40d23 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -639,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 		tracing_reset(tr, cpu);
 }
 
+void tracing_reset_current(int cpu)
+{
+	tracing_reset(&global_trace, cpu);
+}
+
+void tracing_reset_current_online_cpus(void)
+{
+	tracing_reset_online_cpus(&global_trace);
+}
+
 #define SAVED_CMDLINES 128
 #define NO_CMDLINE_MAP UINT_MAX
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 777c6c3..ba25793 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -409,6 +409,8 @@ int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
+void tracing_reset_current(int cpu);
+void tracing_reset_current_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *trace_create_file(const char *name,
 				 mode_t mode,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8d579ff..6d2c842 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -932,9 +932,11 @@ static void trace_module_remove_events(struct module *mod)
 {
 	struct ftrace_module_file_ops *file_ops;
 	struct ftrace_event_call *call, *p;
+	bool found = false;
 
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
 		if (call->mod == mod) {
+			found = true;
 			if (call->enabled) {
 				call->enabled = 0;
 				call->unregfunc();
@@ -957,6 +959,13 @@ static void trace_module_remove_events(struct module *mod)
 		list_del(&file_ops->list);
 		kfree(file_ops);
 	}
+
+	/*
+	 * It is safest to reset the ring buffer if the module being unloaded
+	 * registered any events.
+	 */
+	if (found)
+		tracing_reset_current_online_cpus();
 }
 
 static int trace_module_notify(struct notifier_block *self,
-- 
cgit v0.10.2


From 8ae79a138e88aceeeb07077bff2883245fb7c218 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 6 May 2009 22:52:15 -0400
Subject: tracing: add hierarchical enabling of events

With the current event directory, you can only enable individual events.
The file debugfs/tracing/set_event is used to be able to enable or
disable several events at once. But that can still be awkward.

This patch adds hierarchical enabling of events. That is, each directory
in debugfs/tracing/events has an "enable" file. This file can enable
or disable all events within the directory and below.

 # echo 1 > /debugfs/tracing/events/enable

will enable all events.

 # echo 1 > /debugfs/tracing/events/sched/enable

will enable all events in the sched subsystem.

 # echo 1 > /debugfs/tracing/events/enable
 # echo 0 > /debugfs/tracing/events/irq/enable

will enable all events, but then disable just the irq subsystem events.

When reading one of these enable files, there are four results:

 0 - all events this file affects are disabled
 1 - all events this file affects are enabled
 X - there is a mixture of events enabled and disabled
 ? - this file does not affect any event

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6d2c842..87feb01 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -400,6 +400,133 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+		   loff_t *ppos)
+{
+	const char *system = filp->private_data;
+	struct ftrace_event_call *call;
+	char buf[2];
+	int set = -1;
+	int all = 0;
+	int ret;
+
+	if (system[0] == '*')
+		all = 1;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!call->name || !call->regfunc)
+			continue;
+
+		if (!all && strcmp(call->system, system) != 0)
+			continue;
+
+		/*
+		 * We need to find out if all the events are set
+		 * or if all events or cleared, or if we have
+		 * a mixture.
+		 */
+		if (call->enabled) {
+			switch (set) {
+			case -1:
+				set = 1;
+				break;
+			case 0:
+				set = 2;
+				break;
+			}
+		} else {
+			switch (set) {
+			case -1:
+				set = 0;
+				break;
+			case 1:
+				set = 2;
+				break;
+			}
+		}
+		/*
+		 * If we have a mixture, no need to look further.
+		 */
+		if (set == 2)
+			break;
+	}
+	mutex_unlock(&event_mutex);
+
+	buf[1] = '\n';
+	switch (set) {
+	case 0:
+		buf[0] = '0';
+		break;
+	case 1:
+		buf[0] = '1';
+		break;
+	case 2:
+		buf[0] = 'X';
+		break;
+	default:
+		buf[0] = '?';
+	}
+
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
+
+	return ret;
+}
+
+static ssize_t
+system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		    loff_t *ppos)
+{
+	const char *system = filp->private_data;
+	unsigned long val;
+	char *command;
+	char buf[64];
+	ssize_t ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	switch (val) {
+	case 0:
+	case 1:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	command = kstrdup(system, GFP_KERNEL);
+	if (!command)
+		return -ENOMEM;
+
+	ret = ftrace_set_clr_event(command, val);
+	if (ret)
+		goto out_free;
+
+	ret = cnt;
+
+ out_free:
+	kfree(command);
+
+	*ppos += cnt;
+
+	return ret;
+}
+
 extern char *__bad_type_size(void);
 
 #undef FIELD
@@ -686,6 +813,12 @@ static const struct file_operations ftrace_subsystem_filter_fops = {
 	.write = subsystem_filter_write,
 };
 
+static const struct file_operations ftrace_system_enable_fops = {
+	.open = tracing_open_generic,
+	.read = system_enable_read,
+	.write = system_enable_write,
+};
+
 static const struct file_operations ftrace_show_header_fops = {
 	.open = tracing_open_generic,
 	.read = show_header,
@@ -768,6 +901,10 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 			   "'%s/filter' entry\n", name);
 	}
 
+	entry = trace_create_file("enable", 0644, system->entry,
+				  (void *)system->name,
+				  &ftrace_system_enable_fops);
+
 	return system->entry;
 }
 
@@ -1041,6 +1178,9 @@ static __init int event_trace_init(void)
 			  ring_buffer_print_entry_header,
 			  &ftrace_show_header_fops);
 
+	trace_create_file("enable", 0644, d_events,
+			  "*:*", &ftrace_system_enable_fops);
+
 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
-- 
cgit v0.10.2


From 975e5f45500dff6d15c0001bb662e9aac0ce0076 Mon Sep 17 00:00:00 2001
From: Samuel Bronson <naesten@gmail.com>
Date: Wed, 6 May 2009 22:27:55 -0400
Subject: x86: use symbolic name for VM86_SIGNAL when used as vm86 default
 return

This code has apparently used "0" and not VM86_SIGNAL since Linux
1.1.9, when Linus added VM86_SIGNAL to vm86.h. This patch changes the
code to use the symbolic name.

The magic 0 tripped me up in trying to extend the vm86(2) manpage to
actually explain vm86()'s interface -- my greps for VM86_SIGNAL came up
fruitless.

[ Impact: cleanup; no object code change ]

Signed-off-by: Samuel Bronson <naesten@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index d7ac84e..b8035a0 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -318,9 +318,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	}
 
 /*
- * Save old state, set default return value (%ax) to 0
+ * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
  */
-	info->regs32->ax = 0;
+	info->regs32->ax = VM86_SIGNAL;
 	tsk->thread.saved_sp0 = tsk->thread.sp0;
 	tsk->thread.saved_fs = info->regs32->fs;
 	tsk->thread.saved_gs = get_user_gs(info->regs32);
-- 
cgit v0.10.2


From 40c8bca76ecaa6b663d403d34f0fcd422bbdbffd Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 May 2009 15:24:36 +0900
Subject: sh: Flag IRQSTACKS as BROKEN for now.

There still seems to be some stack corruption to sort out here, so flag
this as BROKEN until this issue is sorted out.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 0d62681..fdb0493 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -92,7 +92,7 @@ config 4KSTACKS
 
 config IRQSTACKS
 	bool "Use separate kernel stacks when processing interrupts"
-	depends on DEBUG_KERNEL && SUPERH32
+	depends on DEBUG_KERNEL && SUPERH32 && BROKEN
 	help
 	  If you say Y here the kernel will use separate kernel stacks
 	  for handling hard and soft interrupts.  This can help avoid
-- 
cgit v0.10.2


From 643bec956544d376b7c2a80a3d5c3d0bf94da8d3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 7 May 2009 09:12:50 +0200
Subject: x86: clean up arch/x86/kernel/tsc_sync.c a bit

 - remove unused define
 - make the lock variable definition stand out some more
 - convert KERN_* to pr_info() / pr_warning()

[ Impact: cleanup ]

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index bf36328..027b5b4 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -34,6 +34,7 @@ static __cpuinitdata atomic_t stop_count;
  * of a critical section, to be able to prove TSC time-warps:
  */
 static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+
 static __cpuinitdata cycles_t last_tsc;
 static __cpuinitdata cycles_t max_warp;
 static __cpuinitdata int nr_warps;
@@ -113,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
 		return;
 
 	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
-		printk(KERN_INFO
-		       "Skipping synchronization checks as TSC is reliable.\n");
+		pr_info("Skipping synchronization checks as TSC is reliable.\n");
 		return;
 	}
 
-	printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
-			  smp_processor_id(), cpu);
+	pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
+		smp_processor_id(), cpu);
 
 	/*
 	 * Reset it - in case this is a second bootup:
@@ -143,8 +143,8 @@ void __cpuinit check_tsc_sync_source(int cpu)
 
 	if (nr_warps) {
 		printk("\n");
-		printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
-				    " turning off TSC clock.\n", max_warp);
+		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
+			   "turning off TSC clock.\n", max_warp);
 		mark_tsc_unstable("check_tsc_sync_source failed");
 	} else {
 		printk(" passed.\n");
@@ -195,5 +195,3 @@ void __cpuinit check_tsc_sync_target(void)
 	while (atomic_read(&stop_count) != cpus)
 		cpu_relax();
 }
-#undef NR_LOOPS
-
-- 
cgit v0.10.2


From aa47b7e0f89b9998dad4d1667447e8cb7703ff4e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 4 May 2009 01:38:05 -0700
Subject: sched: emit thread info flags with stack trace

When a thread is oom killed and fails to exit, it's helpful to know which
threads have access to memory reserves if the machine livelocks.  This is
done by testing for the TIF_MEMDIE thread info flag and should be
displayed alongside stack traces to identify tasks that have access to
such reserves but are still stuck allocating pages, for instance.

It would probably be helpful in other cases as well, so all thread info
flags are emitted when showing a task.

( v2: fix warning reported by Stephen Rothwell )

[ Impact: extend debug printout info ]

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
LKML-Reference: <alpine.DEB.2.00.0905040136390.15831@chino.kir.corp.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 2a43a58..5aa63f5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6610,8 +6610,9 @@ void sched_show_task(struct task_struct *p)
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	free = stack_not_used(p);
 #endif
-	printk(KERN_CONT "%5lu %5d %6d\n", free,
-		task_pid_nr(p), task_pid_nr(p->real_parent));
+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+		task_pid_nr(p), task_pid_nr(p->real_parent),
+		(unsigned long)task_thread_info(p)->flags);
 
 	show_stack(p, NULL);
 }
-- 
cgit v0.10.2


From ee1acbfabd5270b40ce2cfdc202070b7ca91cdff Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 May 2009 16:38:16 +0900
Subject: sh: Handle shm_align_mask also for HAVE_ARCH_UNMAPPED_AREA_TOPDOWN.

Presently shm_align_mask is only looked at for the bottom up case, but we
still want this for proper colouring constraints in the topdown case.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 09acbc3..4c5462d 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -75,7 +75,5 @@ extern void copy_from_user_page(struct vm_area_struct *vma,
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
-#define HAVE_ARCH_UNMAPPED_AREA
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_CACHEFLUSH_H */
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index b517ae0..2a011b1 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -154,6 +154,10 @@ extern void kmap_coherent_init(void);
 #define kmap_coherent_init()	do { } while (0)
 #endif
 
+/* arch/sh/mm/mmap.c */
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
 #include <asm-generic/pgtable.h>
 
 #endif /* __ASM_SH_PGTABLE_H */
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 931f4d0..1b5fdfb 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -1,7 +1,7 @@
 /*
  * arch/sh/mm/mmap.c
  *
- * Copyright (C) 2008  Paul Mundt
+ * Copyright (C) 2008 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -21,9 +21,26 @@ EXPORT_SYMBOL(shm_align_mask);
 /*
  * To avoid cache aliases, we map the shared page with same color.
  */
-#define COLOUR_ALIGN(addr, pgoff)				\
-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+static inline unsigned long COLOUR_ALIGN(unsigned long addr,
+					 unsigned long pgoff)
+{
+	unsigned long base = (addr + shm_align_mask) & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	return base + off;
+}
+
+static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
+					      unsigned long pgoff)
+{
+	unsigned long base = addr & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	if (base + off <= addr)
+		return base + off;
+
+	return base - off;
+}
 
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -103,6 +120,117 @@ full_search:
 			addr = COLOUR_ALIGN(addr, pgoff);
 	}
 }
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	int do_colour_align;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	do_colour_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_colour_align = 1;
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+	        mm->cached_hole_size = 0;
+		mm->free_area_cache = mm->mmap_base;
+	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+	if (do_colour_align) {
+		unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+
+		addr = base + len;
+	}
+
+	/* make sure it can fit in the remaining address space */
+	if (likely(addr > len)) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start) {
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+		}
+	}
+
+	if (unlikely(mm->mmap_base < len))
+		goto bottomup;
+
+	addr = mm->mmap_base-len;
+	if (do_colour_align)
+		addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (likely(!vma || addr+len <= vma->vm_start)) {
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+		}
+
+		/* remember the largest hole we saw so far */
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+		if (do_colour_align)
+			addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+	} while (likely(len < vma->vm_start));
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
 #endif /* CONFIG_MMU */
 
 /*
-- 
cgit v0.10.2


From 5e7c03442574ed0376c0621bfb0c477d79c12c71 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 6 May 2009 01:26:01 +0200
Subject: ASoC: cs4270: add power management support

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 153124b..a32b822 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -18,7 +18,7 @@
  * - The machine driver's 'startup' function must call
  *   cs4270_set_dai_sysclk() with the value of MCLK.
  * - Only I2S and left-justified modes are supported
- * - Power management is not supported
+ * - Power management is supported
  */
 
 #include <linux/module.h>
@@ -27,6 +27,7 @@
 #include <sound/soc.h>
 #include <sound/initval.h>
 #include <linux/i2c.h>
+#include <linux/delay.h>
 
 #include "cs4270.h"
 
@@ -65,6 +66,8 @@
 #define CS4270_PWRCTL_PDN_ADC	0x20
 #define CS4270_PWRCTL_PDN_DAC	0x02
 #define CS4270_PWRCTL_PDN	0x01
+#define CS4270_PWRCTL_PDN_ALL	\
+	(CS4270_PWRCTL_PDN_ADC | CS4270_PWRCTL_PDN_DAC | CS4270_PWRCTL_PDN)
 #define CS4270_MODE_SPEED_MASK	0x30
 #define CS4270_MODE_1X		0x00
 #define CS4270_MODE_2X		0x10
@@ -788,6 +791,57 @@ static struct i2c_device_id cs4270_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, cs4270_id);
 
+#ifdef CONFIG_PM
+
+/* This suspend/resume implementation can handle both - a simple standby
+ * where the codec remains powered, and a full suspend, where the voltage
+ * domain the codec is connected to is teared down and/or any other hardware
+ * reset condition is asserted.
+ *
+ * The codec's own power saving features are enabled in the suspend callback,
+ * and all registers are written back to the hardware when resuming.
+ */
+
+static int cs4270_i2c_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	struct cs4270_private *cs4270 = i2c_get_clientdata(client);
+	struct snd_soc_codec *codec = &cs4270->codec;
+	int reg = snd_soc_read(codec, CS4270_PWRCTL) | CS4270_PWRCTL_PDN_ALL;
+
+	return snd_soc_write(codec, CS4270_PWRCTL, reg);
+}
+
+static int cs4270_i2c_resume(struct i2c_client *client)
+{
+	struct cs4270_private *cs4270 = i2c_get_clientdata(client);
+	struct snd_soc_codec *codec = &cs4270->codec;
+	int reg;
+
+	/* In case the device was put to hard reset during sleep, we need to
+	 * wait 500ns here before any I2C communication. */
+	ndelay(500);
+
+	/* first restore the entire register cache ... */
+	for (reg = CS4270_FIRSTREG; reg <= CS4270_LASTREG; reg++) {
+		u8 val = snd_soc_read(codec, reg);
+
+		if (i2c_smbus_write_byte_data(client, reg, val)) {
+			dev_err(codec->dev, "i2c write failed\n");
+			return -EIO;
+		}
+	}
+
+	/* ... then disable the power-down bits */
+	reg = snd_soc_read(codec, CS4270_PWRCTL);
+	reg &= ~CS4270_PWRCTL_PDN_ALL;
+
+	return snd_soc_write(codec, CS4270_PWRCTL, reg);
+}
+#else
+#define cs4270_i2c_suspend	NULL
+#define cs4270_i2c_resume	NULL
+#endif /* CONFIG_PM */
+
 /*
  * cs4270_i2c_driver - I2C device identification
  *
@@ -802,6 +856,8 @@ static struct i2c_driver cs4270_i2c_driver = {
 	.id_table = cs4270_id,
 	.probe = cs4270_i2c_probe,
 	.remove = cs4270_i2c_remove,
+	.suspend = cs4270_i2c_suspend,
+	.resume = cs4270_i2c_resume,
 };
 
 /*
-- 
cgit v0.10.2


From e8808c1019b048a43686dbd25c188a035842c2e2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 3 May 2009 02:48:52 +0200
Subject: tracing/filters: support for filters of dynamic sized arrays

Currently the filtering infrastructure supports well the
numeric types and fixed sized array types.

But the recently added __string() field uses a specific
indirect offset mechanism which requires a specific
predicate. Until now it wasn't supported.

This patch adds this support and implies very few changes,
only a new predicate is needed, the management of this specific
field can be done through the usual string helpers in the
filtering infrastructure.

[ Impact: support all kinds of strings in the tracing filters ]

Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 7ac6910..01c76eb 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -151,6 +151,7 @@ static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
 	return val1 || val2;
 }
 
+/* Filter predicate for fixed sized arrays of characters */
 static int filter_pred_string(struct filter_pred *pred, void *event,
 			      int val1, int val2)
 {
@@ -164,6 +165,30 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
 	return match;
 }
 
+/*
+ * Filter predicate for dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry.
+ * Also each of these strings have a field in the entry which
+ * contains its offset from the beginning of the entry.
+ * We have then first to get this field, dereference it
+ * and add it to the address of the entry, and at last we have
+ * the address of the string.
+ */
+static int filter_pred_strloc(struct filter_pred *pred, void *event,
+			      int val1, int val2)
+{
+	int str_loc = *(int *)(event + pred->offset);
+	char *addr = (char *)(event + str_loc);
+	int cmp, match;
+
+	cmp = strncmp(addr, pred->str_val, pred->str_len);
+
+	match = (!cmp) ^ pred->not;
+
+	return match;
+}
+
 static int filter_pred_none(struct filter_pred *pred, void *event,
 			    int val1, int val2)
 {
@@ -446,10 +471,18 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 	return 0;
 }
 
+enum {
+	FILTER_STATIC_STRING = 1,
+	FILTER_DYN_STRING
+};
+
 static int is_string_field(const char *type)
 {
 	if (strchr(type, '[') && strstr(type, "char"))
-		return 1;
+		return FILTER_STATIC_STRING;
+
+	if (!strcmp(type, "__str_loc"))
+		return FILTER_DYN_STRING;
 
 	return 0;
 }
@@ -512,6 +545,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 	struct ftrace_event_field *field;
 	filter_pred_fn_t fn;
 	unsigned long long val;
+	int string_type;
 
 	pred->fn = filter_pred_none;
 
@@ -536,8 +570,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
 		return -EINVAL;
 	}
 
-	if (is_string_field(field->type)) {
-		fn = filter_pred_string;
+	string_type = is_string_field(field->type);
+	if (string_type) {
+		if (string_type == FILTER_STATIC_STRING)
+			fn = filter_pred_string;
+		else
+			fn = filter_pred_strloc;
 		pred->str_len = field->size;
 		if (pred->op == OP_NE)
 			pred->not = 1;
-- 
cgit v0.10.2


From 5928c3cc0ffcb6894bbab6be591b7ae1786b2d87 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 3 May 2009 03:03:57 +0200
Subject: tracing/filters: support for operator reserved characters in strings

When we set a filter for an event, such as:

echo "name == my_lock_name" > \
	/debug/tracing/events/lockdep/lock_acquired/filter

then the following order of token type is parsed:

- space
- operator
- parentheses
- operand

Because the operators and parentheses have a higher precedence
than the operand characters, which is normal, then we can't
use any string containing such special characters:

()=<>!&|

To get this support and also avoid ambiguous intepretation from
the parser or the human, we can do it using double quotes so that
we keep the usual languages habits.

Then after this patch you can still declare string condition like
before:

echo name == myname

But if you want to compare against a string containing an operator
character, you can use double quotes:

echo 'name == "&myname"'

Don't forget to include the whole expression into single quotes or
the double ones will be eaten by echo.

[ Impact: support strings with special characters for tracing filters ]

Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 01c76eb..8c62e5b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -851,10 +851,19 @@ static void postfix_clear(struct filter_parse_state *ps)
 
 static int filter_parse(struct filter_parse_state *ps)
 {
+	int in_string = 0;
 	int op, top_op;
 	char ch;
 
 	while ((ch = infix_next(ps))) {
+		if (ch == '"') {
+			in_string ^= 1;
+			continue;
+		}
+
+		if (in_string)
+			goto parse_operand;
+
 		if (isspace(ch))
 			continue;
 
@@ -908,6 +917,7 @@ static int filter_parse(struct filter_parse_state *ps)
 			}
 			continue;
 		}
+parse_operand:
 		if (append_operand_char(ps, ch)) {
 			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
 			return -EINVAL;
-- 
cgit v0.10.2


From d94fc523f3c35bd8013f04827e94756cbc0212f4 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 7 May 2009 15:11:15 +0800
Subject: tracing/events: fix concurrent access to ftrace_events list, fix

In filter_add_subsystem_pred() we should release event_mutex before
calling filter_free_subsystem_preds(), since both functions hold
event_mutex.

[ Impact: fix deadlock when writing invalid pred into subsystem filter ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: tzanussi@gmail.com
Cc: a.p.zijlstra@chello.nl
Cc: fweisbec@gmail.com
Cc: rostedt@goodmis.org
LKML-Reference: <4A028993.7020509@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8c62e5b..85ad6a8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -636,14 +636,15 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 
 		err = filter_add_pred(ps, call, pred);
 		if (err) {
+			mutex_unlock(&event_mutex);
 			filter_free_subsystem_preds(system);
 			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-			break;
+			goto out;
 		}
 		replace_filter_string(call->filter, filter_string);
 	}
 	mutex_unlock(&event_mutex);
-
+out:
 	return err;
 }
 
-- 
cgit v0.10.2


From ae318a148e4d255dfbc87d963fdd6031c2af9c46 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 May 2009 17:55:09 +0900
Subject: sh: sh64 still needs ARCH_USES_GETTIMEOFFSET temporarily.

sh64 is still using this, so re-enable it temporarily while SH-5 gets
converted to use the generic code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 565f390..02688d7 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -141,6 +141,10 @@ config ARCH_NO_VIRT_TO_BUS
 config ARCH_HAS_DEFAULT_IDLE
 	def_bool y
 
+config ARCH_USES_GETTIMEOFFSET
+	def_bool y 
+	depends on SUPERH64
+
 config IO_TRAPPED
 	bool
 
-- 
cgit v0.10.2


From 0fb849b9d743a20056f2418cd955e5c650658663 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 May 2009 18:10:27 +0900
Subject: sh: Integrate the SH-5 onchip_remap() more coherently.

Presently this is special-cased for early initialization. While there are
situations where these static early initializations are still necessary,
with minor changes it is possible to use this for the regular ioremap
implementation as well. This allows us to kill off the special-casing for
the remap completely and to start tidying up all of the SH-5
special-casing in drivers.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c
index da62ad5..dbb00c9 100644
--- a/arch/sh/boards/mach-cayman/irq.c
+++ b/arch/sh/boards/mach-cayman/irq.c
@@ -161,7 +161,7 @@ void init_cayman_irq(void)
 {
 	int i;
 
-	epld_virt = onchip_remap(EPLD_BASE, 1024, "EPLD");
+	epld_virt = (unsigned long)ioremap_nocache(EPLD_BASE, 1024);
 	if (!epld_virt) {
 		printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n");
 		return;
diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
index e7f9cc5..7e8216a 100644
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -102,7 +102,7 @@ static int __init smsc_superio_setup(void)
 {
 	unsigned char devid, devrev;
 
-	smsc_superio_virt = onchip_remap(SMSC_SUPERIO_BASE, 1024, "SMSC SuperIO");
+	smsc_superio_virt = (unsigned long)ioremap_nocache(SMSC_SUPERIO_BASE, 1024);
 	if (!smsc_superio_virt) {
 		panic("Unable to remap SMSC SuperIO\n");
 	}
diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c
index cf43185..873ed2b 100644
--- a/arch/sh/drivers/pci/pci-sh5.c
+++ b/arch/sh/drivers/pci/pci-sh5.c
@@ -119,12 +119,12 @@ static int __init sh5pci_init(void)
                 return -EINVAL;
         }
 
-	pcicr_virt = onchip_remap(SH5PCI_ICR_BASE, 1024, "PCICR");
+	pcicr_virt = (unsigned long)ioremap_nocache(SH5PCI_ICR_BASE, 1024);
 	if (!pcicr_virt) {
 		panic("Unable to remap PCICR\n");
 	}
 
-	PCI_IO_AREA = onchip_remap(SH5PCI_IO_BASE, 0x10000, "PCIIO");
+	PCI_IO_AREA = (unsigned long)ioremap_nocache(SH5PCI_IO_BASE, 0x10000);
 	if (!PCI_IO_AREA) {
 		panic("Unable to remap PCIIO\n");
 	}
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index ef21734..c7c360b 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -224,11 +224,6 @@ void __iomem *__ioremap(unsigned long offset, unsigned long size,
 			unsigned long flags);
 void __iounmap(void __iomem *addr);
 
-/* arch/sh/mm/ioremap_64.c */
-unsigned long onchip_remap(unsigned long addr, unsigned long size,
-			   const char *name);
-extern void onchip_unmap(unsigned long vaddr);
-
 static inline void __iomem *
 __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 {
@@ -263,8 +258,6 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 	return __ioremap(offset, size, flags);
 }
 #else
-#define onchip_remap(addr, size, name)		(addr)
-#define onchip_unmap(addr)			do { } while (0)
 #define __ioremap_mode(offset, size, flags)	((void __iomem *)(offset))
 #define __iounmap(addr)				do { } while (0)
 #endif /* CONFIG_MMU */
diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c
index d8679a4..02d8137 100644
--- a/arch/sh/kernel/cpu/irq/intc-sh5.c
+++ b/arch/sh/kernel/cpu/irq/intc-sh5.c
@@ -188,7 +188,7 @@ void __init plat_irq_setup(void)
 	unsigned long reg;
 	int i;
 
-	intc_virt = onchip_remap(INTC_BASE, 1024, "INTC");
+	intc_virt = (unsigned long)ioremap_nocache(INTC_BASE, 1024);
 	if (!intc_virt) {
 		panic("Unable to remap INTC\n");
 	}
diff --git a/arch/sh/kernel/cpu/sh5/Makefile b/arch/sh/kernel/cpu/sh5/Makefile
index ce4602e..a184a31 100644
--- a/arch/sh/kernel/cpu/sh5/Makefile
+++ b/arch/sh/kernel/cpu/sh5/Makefile
@@ -6,6 +6,9 @@ obj-y := entry.o probe.o switchto.o
 obj-$(CONFIG_SH_FPU)		+= fpu.o
 obj-$(CONFIG_KALLSYMS)		+= unwind.o
 
+# CPU subtype setup
+obj-$(CONFIG_CPU_SH5)		+= setup-sh5.o
+
 # Primary on-chip clocks (common)
 clock-$(CONFIG_CPU_SH5)		:= clock-sh5.o
 
diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c
index 52c4924..5486324 100644
--- a/arch/sh/kernel/cpu/sh5/clock-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c
@@ -71,7 +71,7 @@ static struct clk_ops *sh5_clk_ops[] = {
 
 void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 {
-	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
+	cprc_base = (unsigned long)ioremap_nocache(CPRC_BASE, 1024);
 	BUG_ON(!cprc_base);
 
 	if (idx < ARRAY_SIZE(sh5_clk_ops))
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index 7e49cb8..5162975 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -1410,8 +1410,8 @@ peek_real_address_q:
 	   r2(out) : result quadword
 
 	   This is provided as a cheapskate way of manipulating device
-	   registers for debugging (to avoid the need to onchip_remap the debug
-	   module, and to avoid the need to onchip_remap the watchpoint
+	   registers for debugging (to avoid the need to ioremap the debug
+	   module, and to avoid the need to ioremap the watchpoint
 	   controller in a way that identity maps sufficient bits to avoid the
 	   SH5-101 cut2 silicon defect).
 
@@ -1459,8 +1459,8 @@ poke_real_address_q:
 	   r3 : quadword value to write.
 
 	   This is provided as a cheapskate way of manipulating device
-	   registers for debugging (to avoid the need to onchip_remap the debug
-	   module, and to avoid the need to onchip_remap the watchpoint
+	   registers for debugging (to avoid the need to ioremap the debug
+	   module, and to avoid the need to ioremap the watchpoint
 	   controller in a way that identity maps sufficient bits to avoid the
 	   SH5-101 cut2 silicon defect).
 
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
new file mode 100644
index 0000000..d8d59fe
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -0,0 +1,46 @@
+/*
+ * SH5-101/SH5-103 CPU Setup
+ *
+ *  Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/serial.h>
+#include <linux/serial_sci.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <asm/addrspace.h>
+
+static struct plat_sci_port sci_platform_data[] = {
+	{
+		.mapbase	= PHYS_PERIPHERAL_BLOCK + 0x01030000,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_IOREMAP,
+		.type		= PORT_SCIF,
+		.irqs		= { 39, 40, 42, 0 },
+	}, {
+		.flags = 0,
+	}
+};
+
+static struct platform_device sci_device = {
+	.name		= "sh-sci",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= sci_platform_data,
+	},
+};
+
+static struct platform_device *sh5_devices[] __initdata = {
+	&sci_device,
+};
+
+static int __init sh5_devices_setup(void)
+{
+	return platform_add_devices(sh5_devices,
+				    ARRAY_SIZE(sh5_devices));
+}
+__initcall(sh5_devices_setup);
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
index f4f5e8a..7bfeaa0 100644
--- a/arch/sh/kernel/time_64.c
+++ b/arch/sh/kernel/time_64.c
@@ -243,12 +243,12 @@ void __init time_init(void)
 	unsigned long interval;
 	struct clk *clk;
 
-	tmu_base = onchip_remap(TMU_BASE, 1024, "TMU");
+	tmu_base = (unsigned long)ioremap_nocache(TMU_BASE, 1024);
 	if (!tmu_base) {
 		panic("Unable to remap TMU\n");
 	}
 
-	rtc_base = onchip_remap(RTC_BASE, 1024, "RTC");
+	rtc_base = (unsigned long)ioremap_nocache(RTC_BASE, 1024);
 	if (!rtc_base) {
 		panic("Unable to remap RTC\n");
 	}
diff --git a/arch/sh/mm/ioremap_64.c b/arch/sh/mm/ioremap_64.c
index 31e1bb5..2331229 100644
--- a/arch/sh/mm/ioremap_64.c
+++ b/arch/sh/mm/ioremap_64.c
@@ -27,88 +27,17 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
 
-static void shmedia_mapioaddr(unsigned long, unsigned long);
-static unsigned long shmedia_ioremap(struct resource *, u32, int);
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void *__ioremap(unsigned long phys_addr, unsigned long size,
-		unsigned long flags)
-{
-	void * addr;
-	struct vm_struct * area;
-	unsigned long offset, last_addr;
-	pgprot_t pgprot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	pgprot = __pgprot(_PAGE_PRESENT  | _PAGE_READ   |
-			  _PAGE_WRITE    | _PAGE_DIRTY  |
-			  _PAGE_ACCESSED | _PAGE_SHARED | flags);
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-	pr_debug("Get vm_area returns %p addr %p\n", area, area->addr);
-	area->phys_addr = phys_addr;
-	addr = area->addr;
-	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, pgprot)) {
-		vunmap(addr);
-		return NULL;
-	}
-	return (void *) (offset + (char *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iounmap(void *addr)
-{
-	struct vm_struct *area;
-
-	vfree((void *) (PAGE_MASK & (unsigned long) addr));
-	area = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr));
-	if (!area) {
-		printk(KERN_ERR "iounmap: bad address %p\n", addr);
-		return;
-	}
-
-	kfree(area);
-}
-EXPORT_SYMBOL(__iounmap);
-
 static struct resource shmedia_iomap = {
 	.name	= "shmedia_iomap",
 	.start	= IOBASE_VADDR + PAGE_SIZE,
 	.end	= IOBASE_END - 1,
 };
 
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va);
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
+			      unsigned long flags);
 static void shmedia_unmapioaddr(unsigned long vaddr);
-static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz);
+static void __iomem *shmedia_ioremap(struct resource *res, u32 pa,
+				     int sz, unsigned long flags);
 
 /*
  * We have the same problem as the SPARC, so lets have the same comment:
@@ -130,18 +59,18 @@ static struct xresource xresv[XNRES];
 
 static struct xresource *xres_alloc(void)
 {
-        struct xresource *xrp;
-        int n;
-
-        xrp = xresv;
-        for (n = 0; n < XNRES; n++) {
-                if (xrp->xflag == 0) {
-                        xrp->xflag = 1;
-                        return xrp;
-                }
-                xrp++;
-        }
-        return NULL;
+	struct xresource *xrp;
+	int n;
+
+	xrp = xresv;
+	for (n = 0; n < XNRES; n++) {
+		if (xrp->xflag == 0) {
+			xrp->xflag = 1;
+			return xrp;
+		}
+		xrp++;
+	}
+	return NULL;
 }
 
 static void xres_free(struct xresource *xrp)
@@ -161,76 +90,71 @@ static struct resource *shmedia_find_resource(struct resource *root,
 	return NULL;
 }
 
-static unsigned long shmedia_alloc_io(unsigned long phys, unsigned long size,
-				      const char *name)
+static void __iomem *shmedia_alloc_io(unsigned long phys, unsigned long size,
+				      const char *name, unsigned long flags)
 {
-        static int printed_full = 0;
-        struct xresource *xres;
-        struct resource *res;
-        char *tack;
-        int tlen;
-
-        if (name == NULL) name = "???";
-
-        if ((xres = xres_alloc()) != 0) {
-                tack = xres->xname;
-                res = &xres->xres;
-        } else {
-                if (!printed_full) {
-                        printk("%s: done with statics, switching to kmalloc\n",
-			       __func__);
-                        printed_full = 1;
-                }
-                tlen = strlen(name);
-                tack = kmalloc(sizeof (struct resource) + tlen + 1, GFP_KERNEL);
-                if (!tack)
-			return -ENOMEM;
-                memset(tack, 0, sizeof(struct resource));
-                res = (struct resource *) tack;
-                tack += sizeof (struct resource);
-        }
-
-        strncpy(tack, name, XNMLN);
-        tack[XNMLN] = 0;
-        res->name = tack;
-
-        return shmedia_ioremap(res, phys, size);
+	static int printed_full;
+	struct xresource *xres;
+	struct resource *res;
+	char *tack;
+	int tlen;
+
+	if (name == NULL)
+		name = "???";
+
+	xres = xres_alloc();
+	if (xres != 0) {
+		tack = xres->xname;
+		res = &xres->xres;
+	} else {
+		if (!printed_full) {
+			printk(KERN_NOTICE "%s: done with statics, "
+			       "switching to kmalloc\n", __func__);
+			printed_full = 1;
+		}
+		tlen = strlen(name);
+		tack = kmalloc(sizeof(struct resource) + tlen + 1, GFP_KERNEL);
+		if (!tack)
+			return NULL;
+		memset(tack, 0, sizeof(struct resource));
+		res = (struct resource *) tack;
+		tack += sizeof(struct resource);
+	}
+
+	strncpy(tack, name, XNMLN);
+	tack[XNMLN] = 0;
+	res->name = tack;
+
+	return shmedia_ioremap(res, phys, size, flags);
 }
 
-static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz)
+static void __iomem *shmedia_ioremap(struct resource *res, u32 pa, int sz,
+				     unsigned long flags)
 {
-        unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
+	unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
 	unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
-        unsigned long va;
-        unsigned int psz;
+	unsigned long va;
+	unsigned int psz;
 
-        if (allocate_resource(&shmedia_iomap, res, round_sz,
+	if (allocate_resource(&shmedia_iomap, res, round_sz,
 			      shmedia_iomap.start, shmedia_iomap.end,
 			      PAGE_SIZE, NULL, NULL) != 0) {
-                panic("alloc_io_res(%s): cannot occupy\n",
-                    (res->name != NULL)? res->name: "???");
-        }
+		panic("alloc_io_res(%s): cannot occupy\n",
+		      (res->name != NULL) ? res->name : "???");
+	}
 
-        va = res->start;
-        pa &= PAGE_MASK;
+	va = res->start;
+	pa &= PAGE_MASK;
 
 	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
 
-	/* log at boot time ... */
-	printk("mapioaddr: %6s  [%2d page%s]  va 0x%08lx   pa 0x%08x\n",
-	       ((res->name != NULL) ? res->name : "???"),
-	       psz, psz == 1 ? " " : "s", va, pa);
-
-        for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
-                shmedia_mapioaddr(pa, va);
-                va += PAGE_SIZE;
-                pa += PAGE_SIZE;
-        }
-
-        res->start += offset;
-        res->end = res->start + sz - 1;         /* not strictly necessary.. */
+	for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
+		shmedia_mapioaddr(pa, va, flags);
+		va += PAGE_SIZE;
+		pa += PAGE_SIZE;
+	}
 
-        return res->start;
+	return (void __iomem *)(unsigned long)(res->start + offset);
 }
 
 static void shmedia_free_io(struct resource *res)
@@ -249,14 +173,12 @@ static void shmedia_free_io(struct resource *res)
 
 static __init_refok void *sh64_get_page(void)
 {
-	extern int after_bootmem;
 	void *page;
 
-	if (after_bootmem) {
-		page = (void *)get_zeroed_page(GFP_ATOMIC);
-	} else {
+	if (after_bootmem)
+		page = (void *)get_zeroed_page(GFP_KERNEL);
+	else
 		page = alloc_bootmem_pages(PAGE_SIZE);
-	}
 
 	if (!page || ((unsigned long)page & ~PAGE_MASK))
 		panic("sh64_get_page: Out of memory already?\n");
@@ -264,17 +186,20 @@ static __init_refok void *sh64_get_page(void)
 	return page;
 }
 
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va)
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
+			      unsigned long flags)
 {
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep, pte;
 	pgprot_t prot;
-	unsigned long flags = 1; /* 1 = CB0-1 device */
 
 	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);
 
+	if (!flags)
+		flags = 1; /* 1 = CB0-1 device */
+
 	pgdp = pgd_offset_k(va);
 	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
 		pudp = (pud_t *)sh64_get_page();
@@ -288,7 +213,7 @@ static void shmedia_mapioaddr(unsigned long pa, unsigned long va)
 	}
 
 	pmdp = pmd_offset(pudp, va);
-	if (pmd_none(*pmdp) || !pmd_present(*pmdp) ) {
+	if (pmd_none(*pmdp) || !pmd_present(*pmdp)) {
 		ptep = (pte_t *)sh64_get_page();
 		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
 	}
@@ -336,17 +261,19 @@ static void shmedia_unmapioaddr(unsigned long vaddr)
 	pte_clear(&init_mm, vaddr, ptep);
 }
 
-unsigned long onchip_remap(unsigned long phys, unsigned long size, const char *name)
+void __iomem *__ioremap(unsigned long offset, unsigned long size,
+			unsigned long flags)
 {
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
+	char name[14];
 
-	return shmedia_alloc_io(phys, size, name);
+	sprintf(name, "phys_%08x", (u32)offset);
+	return shmedia_alloc_io(offset, size, name, flags);
 }
-EXPORT_SYMBOL(onchip_remap);
+EXPORT_SYMBOL(__ioremap);
 
-void onchip_unmap(unsigned long vaddr)
+void __iounmap(void __iomem *virtual)
 {
+	unsigned long vaddr = (unsigned long)virtual & PAGE_MASK;
 	struct resource *res;
 	unsigned int psz;
 
@@ -357,10 +284,7 @@ void onchip_unmap(unsigned long vaddr)
 		return;
 	}
 
-        psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
-
-        printk(KERN_DEBUG "unmapioaddr: %6s  [%2d page%s] freed\n",
-	       res->name, psz, psz == 1 ? " " : "s");
+	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
 
 	shmedia_free_io(res);
 
@@ -371,9 +295,8 @@ void onchip_unmap(unsigned long vaddr)
 		kfree(res);
 	}
 }
-EXPORT_SYMBOL(onchip_unmap);
+EXPORT_SYMBOL(__iounmap);
 
-#ifdef CONFIG_PROC_FS
 static int
 ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
 		  void *data)
@@ -385,7 +308,10 @@ ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
 	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
 		if (p + 32 >= e)        /* Better than nothing */
 			break;
-		if ((nm = r->name) == 0) nm = "???";
+		nm = r->name;
+		if (nm == NULL)
+			nm = "???";
+
 		p += sprintf(p, "%08lx-%08lx: %s\n",
 			     (unsigned long)r->start,
 			     (unsigned long)r->end, nm);
@@ -393,14 +319,11 @@ ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
 
 	return p-buf;
 }
-#endif /* CONFIG_PROC_FS */
 
 static int __init register_proc_onchip(void)
 {
-#ifdef CONFIG_PROC_FS
-	create_proc_read_entry("io_map",0,0, ioremap_proc_info, &shmedia_iomap);
-#endif
+	create_proc_read_entry("io_map", 0, 0, ioremap_proc_info,
+			       &shmedia_iomap);
 	return 0;
 }
-
-__initcall(register_proc_onchip);
+late_initcall(register_proc_onchip);
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index dbf5357..728d6a0 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -985,13 +985,7 @@ static void sci_config_port(struct uart_port *port, int flags)
 	port->type = s->type;
 
 	if (port->flags & UPF_IOREMAP && !port->membase) {
-#if defined(CONFIG_SUPERH64)
-		port->mapbase = onchip_remap(SCIF_ADDR_SH5, 1024, "SCIF");
-		port->membase = (void __iomem *)port->mapbase;
-#else
 		port->membase = ioremap_nocache(port->mapbase, 0x40);
-#endif
-
 		dev_err(port->dev, "can't remap port#%d\n", port->line);
 	}
 }
-- 
cgit v0.10.2


From c51279ec0d2d959e13831ae84b714301f0494f27 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 7 May 2009 18:17:20 +0900
Subject: sh: Kill off unused SH-5 irq_describe cruft.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c
index dbb00c9..33f7708 100644
--- a/arch/sh/boards/mach-cayman/irq.c
+++ b/arch/sh/boards/mach-cayman/irq.c
@@ -142,21 +142,6 @@ int cayman_irq_demux(int evt)
 	return irq;
 }
 
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
-int cayman_irq_describe(char* p, int irq)
-{
-	if (irq < NR_INTC_IRQS) {
-		return intc_irq_describe(p, irq);
-	} else if (irq < NR_INTC_IRQS + 8) {
-		return sprintf(p, "(SMSC %d)", irq - NR_INTC_IRQS);
-	} else if ((irq >= NR_INTC_IRQS + 24) && (irq < NR_INTC_IRQS + 32)) {
-		return sprintf(p, "(PCI2 %d)", irq - (NR_INTC_IRQS + 24));
-	}
-
-	return 0;
-}
-#endif
-
 void init_cayman_irq(void)
 {
 	int i;
diff --git a/arch/sh/include/cpu-sh5/cpu/irq.h b/arch/sh/include/cpu-sh5/cpu/irq.h
index f0f0756..0ccf257 100644
--- a/arch/sh/include/cpu-sh5/cpu/irq.h
+++ b/arch/sh/include/cpu-sh5/cpu/irq.h
@@ -111,7 +111,6 @@
 #define TOP_PRIORITY	15
 
 extern int intc_evt_to_irq[(0xE20/0x20)+1];
-int intc_irq_describe(char* p, int irq);
 extern int platform_int_priority[NR_INTC_IRQS];
 
 #endif /* __ASM_SH_CPU_SH5_IRQ_H */
diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c
index 02d8137..a619eaf 100644
--- a/arch/sh/kernel/cpu/irq/intc-sh5.c
+++ b/arch/sh/kernel/cpu/irq/intc-sh5.c
@@ -160,28 +160,6 @@ void make_intc_irq(unsigned int irq)
 	disable_intc_irq(irq);
 }
 
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
-static int IRQ_to_vectorN[NR_INTC_IRQS] = {
-	0x12, 0x15, 0x18, 0x1B, 0x40, 0x41, 0x42, 0x43, /*  0- 7 */
-	  -1,   -1,   -1,   -1, 0x50, 0x51, 0x52, 0x53,	/*  8-15 */
-	0x54, 0x55, 0x32, 0x33, 0x34, 0x35, 0x36,   -1, /* 16-23 */
-	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, /* 24-31 */
-	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x38,	/* 32-39 */
-        0x39, 0x3A, 0x3B,   -1,   -1,   -1,   -1,   -1, /* 40-47 */
-	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, /* 48-55 */
-	  -1,   -1,   -1,   -1,   -1,   -1,   -1, 0x2B, /* 56-63 */
-
-};
-
-int intc_irq_describe(char* p, int irq)
-{
-	if (irq < NR_INTC_IRQS)
-		return sprintf(p, "(0x%3x)", IRQ_to_vectorN[irq]*0x20);
-	else
-		return 0;
-}
-#endif
-
 void __init plat_irq_setup(void)
 {
 	unsigned long long __dummy0, __dummy1=~0x00000000100000f0;
-- 
cgit v0.10.2


From eca8655ffa1ffb23c8f6f1485c1315a3087c8f38 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Fri, 6 Mar 2009 19:49:48 +0000
Subject: [ARM] S3C: Add common USB OHCI device definition

Add common definition for USB OHCI platform device, add a Kconfig
to selectively compile it and add update all the users.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/Kconfig b/arch/arm/mach-s3c2410/Kconfig
index 63a30d1..41bb65d 100644
--- a/arch/arm/mach-s3c2410/Kconfig
+++ b/arch/arm/mach-s3c2410/Kconfig
@@ -59,6 +59,7 @@ config ARCH_H1940
 	bool "IPAQ H1940"
 	select CPU_S3C2410
 	select PM_H1940 if PM
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the HP IPAQ H1940
 
@@ -70,6 +71,7 @@ config PM_H1940
 config MACH_N30
 	bool "Acer N30 family"
 	select CPU_S3C2410
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you want suppt for the Acer N30, Acer N35,
 	  Navman PiN570, Yakumo AlphaX or Airis NC05 PDAs.
@@ -82,6 +84,7 @@ config ARCH_BAST
 	select MACH_BAST_IDE
 	select S3C24XX_DCLK
 	select ISA
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the Simtec Electronics EB2410ITX
 	  development board (also known as BAST)
@@ -89,6 +92,7 @@ config ARCH_BAST
 config MACH_OTOM
  	bool "NexVision OTOM Board"
  	select CPU_S3C2410
+	select S3C_DEV_USB_HOST
 	help
  	  Say Y here if you are using the Nex Vision OTOM board
 
@@ -96,6 +100,7 @@ config MACH_AML_M5900
 	bool "AML M5900 Series"
 	select CPU_S3C2410
 	select PM_SIMTEC if PM
+	select S3C_DEV_USB_HOST
 	help
 	   Say Y here if you are using the American Microsystems M5900 Series
            <http://www.amltd.com>
@@ -111,6 +116,7 @@ config BAST_PC104_IRQ
 config MACH_TCT_HAMMER
 	bool "TCT Hammer Board"
 	select CPU_S3C2410
+	select S3C_DEV_USB_HOST
 	help
 	   Say Y here if you are using the TinCanTools Hammer Board
            <http://www.tincantools.com>
@@ -122,12 +128,14 @@ config MACH_VR1000
 	select SIMTEC_NOR
 	select MACH_BAST_IDE
 	select CPU_S3C2410
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the Thorcom VR1000 board.
 
 config MACH_QT2410
 	bool "QT2410"
 	select CPU_S3C2410
+	select S3C_DEV_USB_HOST
 	help
 	   Say Y here if you are using the Armzone QT2410
 
diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig
index ca99564..63586ff 100644
--- a/arch/arm/mach-s3c2412/Kconfig
+++ b/arch/arm/mach-s3c2412/Kconfig
@@ -38,6 +38,7 @@ menu "S3C2412 Machines"
 config MACH_JIVE
 	bool "Logitech Jive"
 	select CPU_S3C2412
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the Logitech Jive.
 
@@ -50,6 +51,7 @@ config MACH_SMDK2413
 	select CPU_S3C2412
 	select MACH_S3C2413
 	select MACH_SMDK
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using an SMDK2413
 
@@ -72,6 +74,7 @@ config MACH_SMDK2412
 config MACH_VSTMS
 	bool "VMSTMS"
 	select CPU_S3C2412
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using an VSTMS board
 
diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig
index cde5ae9..5df73cb 100644
--- a/arch/arm/mach-s3c2440/Kconfig
+++ b/arch/arm/mach-s3c2440/Kconfig
@@ -33,6 +33,7 @@ config MACH_ANUBIS
 	select PM_SIMTEC if PM
 	select HAVE_PATA_PLATFORM
 	select S3C24XX_GPIO_EXTRA64
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the Simtec Electronics ANUBIS
 	  development system
@@ -43,6 +44,7 @@ config MACH_OSIRIS
 	select S3C24XX_DCLK
 	select PM_SIMTEC if PM
 	select S3C24XX_GPIO_EXTRA128
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the Simtec IM2440D20 module, also
 	  known as the Osiris.
@@ -58,12 +60,14 @@ config ARCH_S3C2440
 	bool "SMDK2440"
 	select CPU_S3C2440
 	select MACH_SMDK
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the SMDK2440.
 
 config MACH_NEXCODER_2440
  	bool "NexVision NEXCODER 2440 Light Board"
  	select CPU_S3C2440
+	select S3C_DEV_USB_HOST
 	help
  	  Say Y here if you are using the Nex Vision NEXCODER 2440 Light Board
 
@@ -76,6 +80,7 @@ config SMDK2440_CPU2440
 config MACH_AT2440EVB
 	bool "Avantech AT2440EVB development board"
 	select CPU_S3C2440
+	select S3C_DEV_USB_HOST
 	help
 	  Say Y here if you are using the AT2440EVB development board
 
diff --git a/arch/arm/plat-s3c/Kconfig b/arch/arm/plat-s3c/Kconfig
index de93838..042d34a 100644
--- a/arch/arm/plat-s3c/Kconfig
+++ b/arch/arm/plat-s3c/Kconfig
@@ -172,4 +172,9 @@ config S3C_DEV_FB
 	help
 	  Compile in platform device definition for framebuffer
 
+config S3C_DEV_USB_HOST
+	bool
+	help
+	  Compile in platform device definition for USB host.
+
 endif
diff --git a/arch/arm/plat-s3c/Makefile b/arch/arm/plat-s3c/Makefile
index 8d7815d..aa995a4 100644
--- a/arch/arm/plat-s3c/Makefile
+++ b/arch/arm/plat-s3c/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_S3C_DEV_HSMMC1)	+= dev-hsmmc1.o
 obj-y				+= dev-i2c0.o
 obj-$(CONFIG_S3C_DEV_I2C1)	+= dev-i2c1.o
 obj-$(CONFIG_S3C_DEV_FB)	+= dev-fb.o
+obj-$(CONFIG_S3C_DEV_USB_HOST)	+= dev-usb.o
diff --git a/arch/arm/plat-s3c/dev-usb.c b/arch/arm/plat-s3c/dev-usb.c
new file mode 100644
index 0000000..a997d95
--- /dev/null
+++ b/arch/arm/plat-s3c/dev-usb.c
@@ -0,0 +1,50 @@
+/* linux/arch/arm/plat-s3c/dev-usb.c
+ *
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C series device definition for USB host
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+
+#include <mach/irqs.h>
+#include <mach/map.h>
+
+#include <plat/devs.h>
+
+
+static struct resource s3c_usb_resource[] = {
+	[0] = {
+		.start = S3C24XX_PA_USBHOST,
+		.end   = S3C24XX_PA_USBHOST + S3C24XX_SZ_USBHOST - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_USBH,
+		.end   = IRQ_USBH,
+		.flags = IORESOURCE_IRQ,
+	}
+};
+
+static u64 s3c_device_usb_dmamask = 0xffffffffUL;
+
+struct platform_device s3c_device_usb = {
+	.name		  = "s3c2410-ohci",
+	.id		  = -1,
+	.num_resources	  = ARRAY_SIZE(s3c_usb_resource),
+	.resource	  = s3c_usb_resource,
+	.dev              = {
+		.dma_mask = &s3c_device_usb_dmamask,
+		.coherent_dma_mask = 0xffffffffUL
+	}
+};
+
+EXPORT_SYMBOL(s3c_device_usb);
diff --git a/arch/arm/plat-s3c24xx/devs.c b/arch/arm/plat-s3c24xx/devs.c
index 16ac01d..4eb378c 100644
--- a/arch/arm/plat-s3c24xx/devs.c
+++ b/arch/arm/plat-s3c24xx/devs.c
@@ -136,36 +136,6 @@ struct platform_device *s3c24xx_uart_src[4] = {
 struct platform_device *s3c24xx_uart_devs[4] = {
 };
 
-/* USB Host Controller */
-
-static struct resource s3c_usb_resource[] = {
-	[0] = {
-		.start = S3C24XX_PA_USBHOST,
-		.end   = S3C24XX_PA_USBHOST + S3C24XX_SZ_USBHOST - 1,
-		.flags = IORESOURCE_MEM,
-	},
-	[1] = {
-		.start = IRQ_USBH,
-		.end   = IRQ_USBH,
-		.flags = IORESOURCE_IRQ,
-	}
-};
-
-static u64 s3c_device_usb_dmamask = 0xffffffffUL;
-
-struct platform_device s3c_device_usb = {
-	.name		  = "s3c2410-ohci",
-	.id		  = -1,
-	.num_resources	  = ARRAY_SIZE(s3c_usb_resource),
-	.resource	  = s3c_usb_resource,
-	.dev              = {
-		.dma_mask = &s3c_device_usb_dmamask,
-		.coherent_dma_mask = 0xffffffffUL
-	}
-};
-
-EXPORT_SYMBOL(s3c_device_usb);
-
 /* LCD Controller */
 
 static struct resource s3c_lcd_resource[] = {
-- 
cgit v0.10.2


From 98fd63ba61450151a3b6d18a8264156996d86411 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Fri, 6 Mar 2009 19:49:49 +0000
Subject: [ARM] SMDK6410: Add USB OHCI host

Add USB OHCI host capability to the SMDK6410 for either USB OtG or
a single/double USB host port.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index 1d50100..0c8df97 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -26,6 +26,7 @@ config MACH_SMDK6410
 	select S3C_DEV_HSMMC1
 	select S3C_DEV_I2C1
 	select S3C_DEV_FB
+	select S3C_DEV_USB_HOST
 	select S3C6410_SETUP_SDHCI
 	select S3C64XX_SETUP_I2C1
 	select S3C64XX_SETUP_FB_24BPP
diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index 7f473e4..678b728 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -141,6 +141,7 @@ static struct platform_device *smdk6410_devices[] __initdata = {
 	&s3c_device_i2c0,
 	&s3c_device_i2c1,
 	&s3c_device_fb,
+	&s3c_device_usb,
 	&smdk6410_lcd_powerdev,
 };
 
-- 
cgit v0.10.2


From e6a2a9ce69400a8109e01030aec0d5e1e2bc73c2 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Fri, 6 Mar 2009 19:51:50 +0000
Subject: [ARM] S3C: Rename S3C24XX_PA_USBHOST to S3C_PA_USBHOST

The USB host base address is available on both the S3C24XX and S3C64XX
ranges.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/map.h b/arch/arm/mach-s3c2410/include/mach/map.h
index 255fdfe..e99b212 100644
--- a/arch/arm/mach-s3c2410/include/mach/map.h
+++ b/arch/arm/mach-s3c2410/include/mach/map.h
@@ -84,7 +84,6 @@
 
 #define S3C24XX_PA_IRQ      S3C2410_PA_IRQ
 #define S3C24XX_PA_MEMCTRL  S3C2410_PA_MEMCTRL
-#define S3C24XX_PA_USBHOST  S3C2410_PA_USBHOST
 #define S3C24XX_PA_DMA      S3C2410_PA_DMA
 #define S3C24XX_PA_CLKPWR   S3C2410_PA_CLKPWR
 #define S3C24XX_PA_LCD      S3C2410_PA_LCD
@@ -102,6 +101,7 @@
 
 #define S3C_PA_IIC          S3C2410_PA_IIC
 #define S3C_PA_UART	    S3C24XX_PA_UART
+#define S3C_PA_USBHOST	S3C2410_PA_USBHOST
 #define S3C_PA_HSMMC0	    S3C2443_PA_HSMMC
 
 #endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/plat-s3c/dev-usb.c b/arch/arm/plat-s3c/dev-usb.c
index a997d95..2ee85ab 100644
--- a/arch/arm/plat-s3c/dev-usb.c
+++ b/arch/arm/plat-s3c/dev-usb.c
@@ -23,8 +23,8 @@
 
 static struct resource s3c_usb_resource[] = {
 	[0] = {
-		.start = S3C24XX_PA_USBHOST,
-		.end   = S3C24XX_PA_USBHOST + S3C24XX_SZ_USBHOST - 1,
+		.start = S3C_PA_USBHOST,
+		.end   = S3C_PA_USBHOST + 0x100 - 1,
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = {
diff --git a/arch/arm/plat-s3c24xx/include/plat/map.h b/arch/arm/plat-s3c24xx/include/plat/map.h
index eed8f78..c4d1334 100644
--- a/arch/arm/plat-s3c24xx/include/plat/map.h
+++ b/arch/arm/plat-s3c24xx/include/plat/map.h
@@ -58,7 +58,6 @@
 #define S3C24XX_SZ_SPI		SZ_1M
 #define S3C24XX_SZ_SDI		SZ_1M
 #define S3C24XX_SZ_NAND		SZ_1M
-#define S3C24XX_SZ_USBHOST	SZ_1M
 
 /* GPIO ports */
 
-- 
cgit v0.10.2


From 67b3e542ab1cf9f2fe3c1d4ae2aa16dd3b4a7e24 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Fri, 6 Mar 2009 19:51:51 +0000
Subject: [ARM] S3C64XX: Add USB OHCI support

Add USB OHCI host definitions.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/include/mach/map.h b/arch/arm/mach-s3c6400/include/mach/map.h
index baf1c0f..ea4223b 100644
--- a/arch/arm/mach-s3c6400/include/mach/map.h
+++ b/arch/arm/mach-s3c6400/include/mach/map.h
@@ -55,6 +55,8 @@
 #define S3C64XX_PA_MODEM	(0x74108000)
 #define S3C64XX_VA_MODEM	S3C_ADDR(0x00600000)
 
+#define S3C64XX_PA_USBHOST	(0x74300000)
+
 /* place VICs close together */
 #define S3C_VA_VIC0		(S3C_VA_IRQ + 0x00)
 #define S3C_VA_VIC1		(S3C_VA_IRQ + 0x10000)
@@ -67,5 +69,6 @@
 #define S3C_PA_IIC		S3C64XX_PA_IIC0
 #define S3C_PA_IIC1		S3C64XX_PA_IIC1
 #define S3C_PA_FB		S3C64XX_PA_FB
+#define S3C_PA_USBHOST		S3C64XX_PA_USBHOST
 
 #endif /* __ASM_ARCH_6400_MAP_H */
diff --git a/arch/arm/plat-s3c64xx/Kconfig b/arch/arm/plat-s3c64xx/Kconfig
index 54375a0..ad48fd73 100644
--- a/arch/arm/plat-s3c64xx/Kconfig
+++ b/arch/arm/plat-s3c64xx/Kconfig
@@ -19,6 +19,7 @@ config PLAT_S3C64XX
 	select S3C_GPIO_PULL_UPDOWN
 	select S3C_GPIO_CFG_S3C24XX
 	select S3C_GPIO_CFG_S3C64XX
+	select USB_ARCH_HAS_OHCI
 	help
 	  Base platform code for any Samsung S3C64XX device
 
-- 
cgit v0.10.2


From 57699e9adf1b44f281d808609b47816804763bfa Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:23 +0000
Subject: [ARM] S3C: Add debug to UART save and a per-arch callback pre-restore

Add a simple debug message on saving the UART state and add a per-arch
pre-restore function to be used by the s3c64xx restore code to ensure
the UARTs control registers do not go through any illegal state changes.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/pm.c b/arch/arm/plat-s3c/pm.c
index 061182c..de4c4aa 100644
--- a/arch/arm/plat-s3c/pm.c
+++ b/arch/arm/plat-s3c/pm.c
@@ -83,6 +83,9 @@ static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save)
 	save->ufcon = __raw_readl(regs + S3C2410_UFCON);
 	save->umcon = __raw_readl(regs + S3C2410_UMCON);
 	save->ubrdiv = __raw_readl(regs + S3C2410_UBRDIV);
+
+	S3C_PMDBG("UART[%d]: ULCON=%04x, UCON=%04x, UFCON=%04x, UBRDIV=%04x\n",
+		  uart, save->ulcon, save->ucon, save->ufcon, save->ubrdiv);
 }
 
 static void s3c_pm_save_uarts(void)
@@ -98,6 +101,8 @@ static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save)
 {
 	void __iomem *regs = S3C_VA_UARTx(uart);
 
+	s3c_pm_arch_update_uart(regs, save);
+
 	__raw_writel(save->ulcon, regs + S3C2410_ULCON);
 	__raw_writel(save->ucon,  regs + S3C2410_UCON);
 	__raw_writel(save->ufcon, regs + S3C2410_UFCON);
diff --git a/arch/arm/plat-s3c24xx/include/plat/pm-core.h b/arch/arm/plat-s3c24xx/include/plat/pm-core.h
index c758821..fb45dd9 100644
--- a/arch/arm/plat-s3c24xx/include/plat/pm-core.h
+++ b/arch/arm/plat-s3c24xx/include/plat/pm-core.h
@@ -57,3 +57,8 @@ static inline void s3c_pm_arch_show_resume_irqs(void)
 	s3c_pm_show_resume_irqs(IRQ_EINT4-4, __raw_readl(S3C2410_EINTPEND),
 				s3c_irqwake_eintmask);
 }
+
+static inline void s3c_pm_arch_update_uart(void __iomem *regs,
+					   struct pm_uart_save *save)
+{
+}
-- 
cgit v0.10.2


From 4b637dc231a96a151ea70c27d86b35c7891e2a7c Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:24 +0000
Subject: [ARM] S3C: PM save UART UDIVSLOT if doing PM

Add the facility to save the UART UDIVSLOT register if the UART
state is being saved over suspend.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/pm.h b/arch/arm/plat-s3c/include/plat/pm.h
index 3779775..5f8707e 100644
--- a/arch/arm/plat-s3c/include/plat/pm.h
+++ b/arch/arm/plat-s3c/include/plat/pm.h
@@ -44,6 +44,8 @@ extern void (*pm_cpu_sleep)(void);
 
 extern unsigned long s3c_pm_flags;
 
+extern unsigned char pm_uart_udivslot;  /* true to save UART UDIVSLOT */
+
 /* from sleep.S */
 
 extern int  s3c_cpu_save(unsigned long *saveblk);
@@ -88,6 +90,7 @@ struct pm_uart_save {
 	u32	ufcon;
 	u32	umcon;
 	u32	ubrdiv;
+	u32	udivslot;
 };
 
 /* helper functions to save/restore lists of registers. */
diff --git a/arch/arm/plat-s3c/pm.c b/arch/arm/plat-s3c/pm.c
index de4c4aa..9957b53 100644
--- a/arch/arm/plat-s3c/pm.c
+++ b/arch/arm/plat-s3c/pm.c
@@ -70,6 +70,8 @@ static inline void s3c_pm_debug_init(void)
 
 /* Save the UART configurations if we are configured for debug. */
 
+unsigned char pm_uart_udivslot;
+
 #ifdef CONFIG_S3C2410_PM_DEBUG
 
 struct pm_uart_save uart_save[CONFIG_SERIAL_SAMSUNG_UARTS];
@@ -84,6 +86,9 @@ static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save)
 	save->umcon = __raw_readl(regs + S3C2410_UMCON);
 	save->ubrdiv = __raw_readl(regs + S3C2410_UBRDIV);
 
+	if (pm_uart_udivslot)
+		save->udivslot = __raw_readl(regs + S3C2443_DIVSLOT);
+
 	S3C_PMDBG("UART[%d]: ULCON=%04x, UCON=%04x, UFCON=%04x, UBRDIV=%04x\n",
 		  uart, save->ulcon, save->ucon, save->ufcon, save->ubrdiv);
 }
@@ -108,6 +113,9 @@ static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save)
 	__raw_writel(save->ufcon, regs + S3C2410_UFCON);
 	__raw_writel(save->umcon, regs + S3C2410_UMCON);
 	__raw_writel(save->ubrdiv, regs + S3C2410_UBRDIV);
+
+	if (pm_uart_udivslot)
+		__raw_writel(save->udivslot, regs + S3C2443_DIVSLOT);
 }
 
 static void s3c_pm_restore_uarts(void)
-- 
cgit v0.10.2


From bd117bd161ea99826494983aef8c8e236ac129bd Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 10 Mar 2009 18:19:35 +0000
Subject: [ARM] S3C64XX: Initial support for PM (suspend to RAM)

Add the initial support for the S3C64XX based systems to use
suspend-to-RAM to sleep.

Includes basic debugging for use with the SMDK6410 usign the
LEDs on the baseboard.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/include/mach/regs-clock.h b/arch/arm/mach-s3c6400/include/mach/regs-clock.h
new file mode 100644
index 0000000..a6c7f4e
--- /dev/null
+++ b/arch/arm/mach-s3c6400/include/mach/regs-clock.h
@@ -0,0 +1,16 @@
+/* linux/arch/arm/mach-s3c6400/include/mach/regs-clock.h
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *	http://armlinux.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * S3C64XX - clock register compatibility with s3c24xx
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <plat/regs-clock.h>
+
diff --git a/arch/arm/plat-s3c/Kconfig b/arch/arm/plat-s3c/Kconfig
index 042d34a..a4c5027 100644
--- a/arch/arm/plat-s3c/Kconfig
+++ b/arch/arm/plat-s3c/Kconfig
@@ -71,6 +71,15 @@ config S3C2410_PM_DEBUG
 	  Resume code. See <file:Documentation/arm/Samsung-S3C24XX/Suspend.txt>
 	  for more information.
 
+config S3C_PM_DEBUG_LED_SMDK
+       bool "SMDK LED suspend/resume debugging"
+       depends on PM && (MACH_SMDK6410)
+       help
+         Say Y here to enable the use of the SMDK LEDs on the baseboard
+	 for debugging of the state of the suspend and resume process.
+
+	 Note, this currently only works for S3C64XX based SMDK boards.
+
 config S3C2410_PM_CHECK
 	bool "S3C2410 PM Suspend Memory CRC"
 	depends on PM && CRC32
diff --git a/arch/arm/plat-s3c/include/plat/pm.h b/arch/arm/plat-s3c/include/plat/pm.h
index 5f8707e..7a79719 100644
--- a/arch/arm/plat-s3c/include/plat/pm.h
+++ b/arch/arm/plat-s3c/include/plat/pm.h
@@ -127,6 +127,18 @@ extern void s3c_pm_dbg(const char *msg, ...);
 #define S3C_PMDBG(fmt...) printk(KERN_DEBUG fmt)
 #endif
 
+#ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK
+/**
+ * s3c_pm_debug_smdkled() - Debug PM suspend/resume via SMDK Board LEDs
+ * @set: set bits for the state of the LEDs
+ * @clear: clear bits for the state of the LEDs.
+ */
+extern void s3c_pm_debug_smdkled(u32 set, u32 clear);
+
+#else
+static inline void s3c_pm_debug_smdkled(u32 set, u32 clear) { }
+#endif /* CONFIG_S3C_PM_DEBUG_LED_SMDK */
+
 /* suspend memory checking */
 
 #ifdef CONFIG_S3C2410_PM_CHECK
diff --git a/arch/arm/plat-s3c/pm.c b/arch/arm/plat-s3c/pm.c
index 9957b53..8d97db2 100644
--- a/arch/arm/plat-s3c/pm.c
+++ b/arch/arm/plat-s3c/pm.c
@@ -21,11 +21,10 @@
 
 #include <asm/cacheflush.h>
 #include <mach/hardware.h>
+#include <mach/map.h>
 
 #include <plat/regs-serial.h>
 #include <mach/regs-clock.h>
-#include <mach/regs-gpio.h>
-#include <mach/regs-mem.h>
 #include <mach/regs-irq.h>
 #include <asm/irq.h>
 
@@ -326,6 +325,9 @@ static int s3c_pm_enter(suspend_state_t state)
 
 	S3C_PMDBG("%s: post sleep, preparing to return\n", __func__);
 
+	/* LEDs should now be 1110 */
+	s3c_pm_debug_smdkled(1 << 1, 0);
+
 	s3c_pm_check_restore();
 
 	/* ok, let's return from sleep */
diff --git a/arch/arm/plat-s3c64xx/Makefile b/arch/arm/plat-s3c64xx/Makefile
index 2e6d79b..b2bc2a9 100644
--- a/arch/arm/plat-s3c64xx/Makefile
+++ b/arch/arm/plat-s3c64xx/Makefile
@@ -24,6 +24,11 @@ obj-y				+= gpiolib.o
 obj-$(CONFIG_CPU_S3C6400_INIT)	+= s3c6400-init.o
 obj-$(CONFIG_CPU_S3C6400_CLOCK)	+= s3c6400-clock.o
 
+# PM support
+
+obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_PM)		+= sleep.o
+
 # Device setup
 
 obj-$(CONFIG_S3C64XX_SETUP_I2C0) += setup-i2c0.o
diff --git a/arch/arm/plat-s3c64xx/include/plat/irqs.h b/arch/arm/plat-s3c64xx/include/plat/irqs.h
index f865bf4..743a700 100644
--- a/arch/arm/plat-s3c64xx/include/plat/irqs.h
+++ b/arch/arm/plat-s3c64xx/include/plat/irqs.h
@@ -157,6 +157,7 @@
 
 #define S3C_EINT(x)		((x) + S3C_IRQ_EINT_BASE)
 #define IRQ_EINT(x)		S3C_EINT(x)
+#define IRQ_EINT_BIT(x)		((x) - S3C_EINT(0))
 
 /* Next the external interrupt groups. These are similar to the IRQ_EINT(x)
  * that they are sourced from the GPIO pins but with a different scheme for
diff --git a/arch/arm/plat-s3c64xx/include/plat/pm-core.h b/arch/arm/plat-s3c64xx/include/plat/pm-core.h
new file mode 100644
index 0000000..d347de3
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/include/plat/pm-core.h
@@ -0,0 +1,98 @@
+/* linux/arch/arm/plat-s3c64xx/include/plat/pm-core.h
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *      Ben Dooks <ben@simtec.co.uk>
+ *      http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX - PM core support for arch/arm/plat-s3c/pm.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <plat/regs-gpio.h>
+
+static inline void s3c_pm_debug_init_uart(void)
+{
+	u32 tmp = __raw_readl(S3C_PCLK_GATE);
+
+	/* As a note, since the S3C64XX UARTs generally have multiple
+	 * clock sources, we simply enable PCLK at the moment and hope
+	 * that the resume settings for the UART are suitable for the
+	 * use with PCLK.
+	 */
+
+	tmp |= S3C_CLKCON_PCLK_UART0;
+	tmp |= S3C_CLKCON_PCLK_UART1;
+	tmp |= S3C_CLKCON_PCLK_UART2;
+	tmp |= S3C_CLKCON_PCLK_UART3;
+
+	__raw_writel(tmp, S3C_PCLK_GATE);
+	udelay(10);
+}
+
+static inline void s3c_pm_arch_prepare_irqs(void)
+{
+	/* VIC should have already been taken care of */
+
+	/* clear any pending EINT0 interrupts */
+	__raw_writel(__raw_readl(S3C64XX_EINT0PEND), S3C64XX_EINT0PEND);
+}
+
+static inline void s3c_pm_arch_stop_clocks(void)
+{
+}
+
+static inline void s3c_pm_arch_show_resume_irqs(void)
+{
+}
+
+/* make these defines, we currently do not have any need to change
+ * the IRQ wake controls depending on the CPU we are running on */
+
+#define s3c_irqwake_eintallow	((1 << 28) - 1)
+#define s3c_irqwake_intallow	(0)
+
+static inline void s3c_pm_arch_update_uart(void __iomem *regs,
+					   struct pm_uart_save *save)
+{
+	u32 ucon = __raw_readl(regs + S3C2410_UCON);
+	u32 ucon_clk = ucon & S3C6400_UCON_CLKMASK;
+	u32 save_clk = save->ucon & S3C6400_UCON_CLKMASK;
+	u32 new_ucon;
+	u32 delta;
+
+	/* S3C64XX UART blocks only support level interrupts, so ensure that
+	 * when we restore unused UART blocks we force the level interrupt
+	 * settigs. */
+	save->ucon |= S3C2410_UCON_TXILEVEL | S3C2410_UCON_RXILEVEL;
+
+	/* We have a constraint on changing the clock type of the UART
+	 * between UCLKx and PCLK, so ensure that when we restore UCON
+	 * that the CLK field is correctly modified if the bootloader
+	 * has changed anything.
+	 */
+	if (ucon_clk != save_clk) {
+		new_ucon = save->ucon;
+		delta = ucon_clk ^ save_clk;
+
+		/* change from UCLKx => wrong PCLK,
+		 * either UCLK can be tested for by a bit-test
+		 * with UCLK0 */
+		if (ucon_clk & S3C6400_UCON_UCLK0 &&
+		    !(save_clk & S3C6400_UCON_UCLK0) &&
+		    delta & S3C6400_UCON_PCLK2) {
+			new_ucon &= ~S3C6400_UCON_UCLK0;
+		} else if (delta == S3C6400_UCON_PCLK2) {
+			/* as an precaution, don't change from
+			 * PCLK2 => PCLK or vice-versa */
+			new_ucon ^= S3C6400_UCON_PCLK2;
+		}
+
+		S3C_PMDBG("ucon change %04x => %04x (save=%04x)\n",
+			  ucon, new_ucon, save->ucon);
+		save->ucon = new_ucon;
+	}
+}
diff --git a/arch/arm/plat-s3c64xx/include/plat/regs-clock.h b/arch/arm/plat-s3c64xx/include/plat/regs-clock.h
index b1082c1..52836d4 100644
--- a/arch/arm/plat-s3c64xx/include/plat/regs-clock.h
+++ b/arch/arm/plat-s3c64xx/include/plat/regs-clock.h
@@ -32,6 +32,7 @@
 #define S3C_HCLK_GATE		S3C_CLKREG(0x30)
 #define S3C_PCLK_GATE		S3C_CLKREG(0x34)
 #define S3C_SCLK_GATE		S3C_CLKREG(0x38)
+#define S3C_MEM0_GATE		S3C_CLKREG(0x3C)
 
 /* CLKDIV0 */
 #define S3C6400_CLKDIV0_MFC_MASK	(0xf << 28)
diff --git a/arch/arm/plat-s3c64xx/irq-eint.c b/arch/arm/plat-s3c64xx/irq-eint.c
index 47e5155..f81b7b8 100644
--- a/arch/arm/plat-s3c64xx/irq-eint.c
+++ b/arch/arm/plat-s3c64xx/irq-eint.c
@@ -14,6 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/gpio.h>
 #include <linux/irq.h>
 #include <linux/io.h>
@@ -26,6 +27,7 @@
 
 #include <mach/map.h>
 #include <plat/cpu.h>
+#include <plat/pm.h>
 
 #define eint_offset(irq)	((irq) - IRQ_EINT(0))
 #define eint_irq_to_bit(irq)	(1 << eint_offset(irq))
@@ -134,6 +136,7 @@ static struct irq_chip s3c_irq_eint = {
 	.mask_ack	= s3c_irq_eint_maskack,
 	.ack		= s3c_irq_eint_ack,
 	.set_type	= s3c_irq_eint_set_type,
+	.set_wake	= s3c_irqext_wake,
 };
 
 /* s3c_irq_demux_eint
diff --git a/arch/arm/plat-s3c64xx/pm.c b/arch/arm/plat-s3c64xx/pm.c
new file mode 100644
index 0000000..98190aa
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/pm.c
@@ -0,0 +1,186 @@
+/* linux/arch/arm/plat-s3c64xx/pm.c
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX CPU PM support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/serial_core.h>
+#include <linux/io.h>
+
+#include <mach/map.h>
+
+#include <plat/pm.h>
+#include <plat/regs-sys.h>
+#include <plat/regs-gpio.h>
+#include <plat/regs-clock.h>
+#include <plat/regs-syscon-power.h>
+#include <plat/regs-gpio-memport.h>
+
+#ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK
+#include <plat/gpio-bank-n.h>
+
+void s3c_pm_debug_smdkled(u32 set, u32 clear)
+{
+	unsigned long flags;
+	u32 reg;
+
+	local_irq_save(flags);
+	reg = __raw_readl(S3C64XX_GPNCON);
+	reg &= ~(S3C64XX_GPN_CONMASK(12) | S3C64XX_GPN_CONMASK(13) |
+		 S3C64XX_GPN_CONMASK(14) | S3C64XX_GPN_CONMASK(15));
+	reg |= S3C64XX_GPN_OUTPUT(12) | S3C64XX_GPN_OUTPUT(13) |
+	       S3C64XX_GPN_OUTPUT(14) | S3C64XX_GPN_OUTPUT(15);
+	__raw_writel(reg, S3C64XX_GPNCON);
+
+	reg = __raw_readl(S3C64XX_GPNDAT);
+	reg &= ~(clear << 12);
+	reg |= set << 12;
+	__raw_writel(reg, S3C64XX_GPNDAT);
+
+	local_irq_restore(flags);
+}
+#endif
+
+static struct sleep_save core_save[] = {
+	SAVE_ITEM(S3C_APLL_LOCK),
+	SAVE_ITEM(S3C_MPLL_LOCK),
+	SAVE_ITEM(S3C_EPLL_LOCK),
+	SAVE_ITEM(S3C_CLK_SRC),
+	SAVE_ITEM(S3C_CLK_DIV0),
+	SAVE_ITEM(S3C_CLK_DIV1),
+	SAVE_ITEM(S3C_CLK_DIV2),
+	SAVE_ITEM(S3C_CLK_OUT),
+	SAVE_ITEM(S3C_HCLK_GATE),
+	SAVE_ITEM(S3C_PCLK_GATE),
+	SAVE_ITEM(S3C_SCLK_GATE),
+	SAVE_ITEM(S3C_MEM0_GATE),
+
+	SAVE_ITEM(S3C_EPLL_CON1),
+	SAVE_ITEM(S3C_EPLL_CON0),
+
+	SAVE_ITEM(S3C64XX_MEM0DRVCON),
+	SAVE_ITEM(S3C64XX_MEM1DRVCON),
+
+#ifndef CONFIG_CPU_FREQ
+	SAVE_ITEM(S3C_APLL_CON),
+	SAVE_ITEM(S3C_MPLL_CON),
+#endif
+};
+
+static struct sleep_save misc_save[] = {
+	SAVE_ITEM(S3C64XX_AHB_CON0),
+	SAVE_ITEM(S3C64XX_AHB_CON1),
+	SAVE_ITEM(S3C64XX_AHB_CON2),
+	
+	SAVE_ITEM(S3C64XX_SPCON),
+
+	SAVE_ITEM(S3C64XX_MEM0CONSTOP),
+	SAVE_ITEM(S3C64XX_MEM1CONSTOP),
+	SAVE_ITEM(S3C64XX_MEM0CONSLP0),
+	SAVE_ITEM(S3C64XX_MEM0CONSLP1),
+	SAVE_ITEM(S3C64XX_MEM1CONSLP),
+};
+
+void s3c_pm_configure_extint(void)
+{
+	__raw_writel(s3c_irqwake_eintmask, S3C64XX_EINT_MASK);
+}
+
+void s3c_pm_save_gpios(void)
+{
+	/* currently, unless the bootloader does something really stupid
+	 * the gpio blocks should be maintained over their sleep.
+	 */
+}
+
+void s3c_pm_restore_gpios(void)
+{
+}
+
+void s3c_pm_restore_core(void)
+{
+	__raw_writel(0, S3C64XX_EINT_MASK);
+
+	s3c_pm_debug_smdkled(1 << 2, 0);
+
+	s3c_pm_do_restore_core(core_save, ARRAY_SIZE(core_save));
+	s3c_pm_do_restore(misc_save, ARRAY_SIZE(misc_save));
+}
+
+void s3c_pm_save_core(void)
+{
+	s3c_pm_do_save(misc_save, ARRAY_SIZE(misc_save));
+	s3c_pm_do_save(core_save, ARRAY_SIZE(core_save));
+}
+
+/* since both s3c6400 and s3c6410 share the same sleep pm calls, we
+ * put the per-cpu code in here until any new cpu comes along and changes
+ * this.
+ */
+
+#include <plat/regs-gpio.h>
+
+static void s3c64xx_cpu_suspend(void)
+{
+	unsigned long tmp;
+
+	/* set our standby method to sleep */
+
+	tmp = __raw_readl(S3C64XX_PWR_CFG);
+	tmp &= ~S3C64XX_PWRCFG_CFG_WFI_MASK;
+	tmp |= S3C64XX_PWRCFG_CFG_WFI_SLEEP;
+	__raw_writel(tmp, S3C64XX_PWR_CFG);
+
+	/* clear any old wakeup */
+
+	__raw_writel(__raw_readl(S3C64XX_WAKEUP_STAT),
+		     S3C64XX_WAKEUP_STAT);
+
+	/* set the LED state to 0110 over sleep */
+	s3c_pm_debug_smdkled(3 << 1, 0xf);
+
+	/* issue the standby signal into the pm unit. Note, we
+	 * issue a write-buffer drain just in case */
+
+	tmp = 0;
+
+	asm("b 1f\n\t"
+	    ".align 5\n\t"
+	    "1:\n\t"
+	    "mcr p15, 0, %0, c7, c10, 5\n\t"
+	    "mcr p15, 0, %0, c7, c10, 4\n\t"
+	    "mcr p15, 0, %0, c7, c0, 4" :: "r" (tmp));
+
+	/* we should never get past here */
+
+	panic("sleep resumed to originator?");
+}
+
+static void s3c64xx_pm_prepare(void)
+{
+	/* store address of resume. */
+	__raw_writel(virt_to_phys(s3c_cpu_resume), S3C64XX_INFORM0);
+
+	/* ensure previous wakeup state is cleared before sleeping */
+	__raw_writel(__raw_readl(S3C64XX_WAKEUP_STAT), S3C64XX_WAKEUP_STAT);
+}
+
+static int s3c64xx_pm_init(void)
+{
+	pm_cpu_prep = s3c64xx_pm_prepare;
+	pm_cpu_sleep = s3c64xx_cpu_suspend;
+	pm_uart_udivslot = 1;
+	return 0;
+}
+
+arch_initcall(s3c64xx_pm_init);
diff --git a/arch/arm/plat-s3c64xx/sleep.S b/arch/arm/plat-s3c64xx/sleep.S
new file mode 100644
index 0000000..8e71fe9
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/sleep.S
@@ -0,0 +1,144 @@
+/* linux/0arch/arm/plat-s3c64xx/sleep.S
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX CPU sleep code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <mach/map.h>
+
+#undef S3C64XX_VA_GPIO
+#define S3C64XX_VA_GPIO (0x0)
+
+#include <plat/regs-gpio.h>
+#include <plat/gpio-bank-n.h>
+
+#define LL_UART (S3C_PA_UART + (0x400 * CONFIG_S3C_LOWLEVEL_UART_PORT))
+
+	.text
+
+	/* s3c_cpu_save
+	 *
+	 * Save enough processor state to allow the restart of the pm.c
+	 * code after resume.
+	 *
+	 * entry:
+	 *	r0 = pointer to the save block
+	*/
+
+ENTRY(s3c_cpu_save)
+	stmfd	sp!, { r4 - r12, lr }
+
+	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c2, c0, 0	@ Translation Table BASE0
+	mrc	p15, 0, r7, c2, c0, 1	@ Translation Table BASE1
+	mrc	p15, 0, r8, c2, c0, 2	@ Translation Table Control
+	mrc	p15, 0, r9, c1, c0, 0	@ Control register
+	mrc	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
+	mrc	p15, 0, r11, c1, c0, 2	@ Co-processor access controls
+
+	stmia	r0, { r4 - r13 }	@ Save CP registers and SP
+
+	@@ save our state to ram
+	bl	s3c_pm_cb_flushcache
+
+	@@ call final suspend code
+	ldr	r0, =pm_cpu_sleep
+	ldr	pc, [r0]
+	
+	@@ return to the caller, after the MMU is turned on.
+	@@ restore the last bits of the stack and return.
+resume_with_mmu:
+	ldmfd	sp!, { r4 - r12, pc }	@ return, from sp from s3c_cpu_save
+
+	.data
+
+	/* the next bit is code, but it requires easy access to the
+	 * s3c_sleep_save_phys data before the MMU is switched on, so
+	 * we store the code that needs this variable in the .data where
+	 * the value can be written to (the .text segment is RO).
+	*/
+
+	.global	s3c_sleep_save_phys
+s3c_sleep_save_phys:
+	.word	0
+
+	/* Sleep magic, the word before the resume entry point so that the
+	 * bootloader can check for a resumeable image. */
+
+	.word	0x2bedf00d
+
+	/* s3c_cpu_reusme
+	 *
+	 * This is the entry point, stored by whatever method the bootloader
+	 * requires to get the kernel runnign again. This code expects to be
+	 * entered with no caches live and the MMU disabled. It will then
+	 * restore the MMU and other basic CP registers saved and restart
+	 * the kernel C code to finish the resume code.
+	*/
+
+ENTRY(s3c_cpu_resume)
+	msr	cpsr_c, #PSR_I_BIT | PSR_F_BIT | SVC_MODE
+	ldr	r2, =LL_UART		/* for debug */
+
+#ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK
+	/* Initialise the GPIO state if we are debugging via the SMDK LEDs,
+	 * as the uboot version supplied resets these to inputs during the
+	 * resume checks.
+	*/
+
+	ldr	r3, =S3C64XX_PA_GPIO
+	ldr	r0, [ r3, #S3C64XX_GPNCON ]
+	bic	r0, r0, #(S3C64XX_GPN_CONMASK(12) | S3C64XX_GPN_CONMASK(13) | \
+			  S3C64XX_GPN_CONMASK(14) | S3C64XX_GPN_CONMASK(15))
+	orr	r0, r0, #(S3C64XX_GPN_OUTPUT(12) | S3C64XX_GPN_OUTPUT(13) | \
+			  S3C64XX_GPN_OUTPUT(14) | S3C64XX_GPN_OUTPUT(15))
+	str	r0, [ r3, #S3C64XX_GPNCON ]
+
+	ldr	r0, [ r3, #S3C64XX_GPNDAT ]
+	bic	r0, r0, #0xf << 12			@ GPN12..15
+	orr	r0, r0, #1 << 15			@ GPN15
+	str	r0, [ r3, #S3C64XX_GPNDAT ]
+#endif
+
+	/* __v6_setup from arch/arm/mm/proc-v6.S, ensure that the caches
+	 * are thoroughly cleaned just in case the bootloader didn't do it
+	 * for us. */
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c14, 0		@ clean+invalidate D cache
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c15, 0		@ clean+invalidate cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	@@mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs
+	@@mcr	p15, 0, r0, c7, c7, 0		@ Invalidate I + D caches
+
+	ldr	r0, s3c_sleep_save_phys
+	ldmia	r0, { r4 - r13 }
+
+	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mcr	p15, 0, r6, c2, c0, 0	@ Translation Table BASE0
+	mcr	p15, 0, r7, c2, c0, 1	@ Translation Table BASE1
+	mcr	p15, 0, r8, c2, c0, 2	@ Translation Table Control
+	mcr	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
+
+	mov	r0, #0			@ restore copro access controls
+	mcr	p15, 0, r11, c1, c0, 2	@ Co-processor access controls
+	mcr 	p15, 0, r0, c7, c5, 4
+
+	ldr	r2, =resume_with_mmu
+	mcr	p15, 0, r9, c1, c0, 0		/* turn mmu back on */
+	nop
+	mov	pc, r2				/* jump back */
+
+	.end
-- 
cgit v0.10.2


From 0abfe9aab1867abd96b44a425ad27ce021891e4f Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:28 +0000
Subject: [ARM] S3C64XX: Add s3c6410_sysclass export

Add definition for s3c6410_sysclass which was missing
from arch/arm/plat-s3c/include/plat/cpu.h.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/cpu.h b/arch/arm/plat-s3c/include/plat/cpu.h
index e62ae0f..f878cf6 100644
--- a/arch/arm/plat-s3c/include/plat/cpu.h
+++ b/arch/arm/plat-s3c/include/plat/cpu.h
@@ -69,3 +69,4 @@ extern struct sysdev_class s3c2412_sysclass;
 extern struct sysdev_class s3c2440_sysclass;
 extern struct sysdev_class s3c2442_sysclass;
 extern struct sysdev_class s3c2443_sysclass;
+extern struct sysdev_class s3c6410_sysclass;
-- 
cgit v0.10.2


From 1deb507dc7612e6c81b7953e827d1c859c1d8c0b Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:31 +0000
Subject: [ARM] S3C64XX: Add generic s3c64xx sys device.

Add an s3c64xx_sysclass and device for items that currently
want to bind to any s3c64xx processor. The first user of this
will be parts of the s3c64xx suspend support which need to
save device state over suspend/resume.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/cpu.h b/arch/arm/plat-s3c/include/plat/cpu.h
index f878cf6..be541cb 100644
--- a/arch/arm/plat-s3c/include/plat/cpu.h
+++ b/arch/arm/plat-s3c/include/plat/cpu.h
@@ -70,3 +70,5 @@ extern struct sysdev_class s3c2440_sysclass;
 extern struct sysdev_class s3c2442_sysclass;
 extern struct sysdev_class s3c2443_sysclass;
 extern struct sysdev_class s3c6410_sysclass;
+extern struct sysdev_class s3c64xx_sysclass;
+
diff --git a/arch/arm/plat-s3c64xx/cpu.c b/arch/arm/plat-s3c64xx/cpu.c
index 91f49a3..f9fdaab 100644
--- a/arch/arm/plat-s3c64xx/cpu.c
+++ b/arch/arm/plat-s3c64xx/cpu.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
+#include <linux/sysdev.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -104,6 +105,16 @@ static struct map_desc s3c_iodesc[] __initdata = {
 	},
 };
 
+
+struct sysdev_class s3c64xx_sysclass = {
+	.name	= "s3c64xx-core",
+};
+
+static struct sys_device s3c64xx_sysdev = {
+	.cls	= &s3c64xx_sysclass,
+};
+
+
 /* read cpu identification code */
 
 void __init s3c64xx_init_io(struct map_desc *mach_desc, int size)
@@ -117,3 +128,11 @@ void __init s3c64xx_init_io(struct map_desc *mach_desc, int size)
 	idcode = __raw_readl(S3C_VA_SYS + 0x118);
 	s3c_init_cpu(idcode, cpu_ids, ARRAY_SIZE(cpu_ids));
 }
+
+static __init int s3c64xx_sysdev_init(void)
+{
+	sysdev_class_register(&s3c64xx_sysclass);
+	return sysdev_register(&s3c64xx_sysdev);
+}
+
+core_initcall(s3c64xx_sysdev_init);
-- 
cgit v0.10.2


From 966bcc14386000e8b4dc7bbb426910bcb55a8588 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:32 +0000
Subject: [ARM] S3C64XX: Add IRQ PM code

Add support for saving the state of the IRQ registers over suspend.

This requires moving the S3C64XX UART registers into <plat/regs-serial.h>
and adding irq-pm.c which saves the state of all the IRQ registers.

The irq-pm.c saves all the IRQ registers, including the IRQ_EINT and
IRQ_EINT_GROUP registers as it was easier than adding three different
files. Also ensuring that all the registers are restored to the same
state as before suspend is considered to be the best thing to do.

Note, we do not suspend the VIC here, this is done by the VIC driver
itself.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/regs-serial.h b/arch/arm/plat-s3c/include/plat/regs-serial.h
index 487d7d2..66af75a 100644
--- a/arch/arm/plat-s3c/include/plat/regs-serial.h
+++ b/arch/arm/plat-s3c/include/plat/regs-serial.h
@@ -189,6 +189,11 @@
 
 #define S3C2443_DIVSLOT		  (0x2C)
 
+/* S3C64XX interrupt registers. */
+#define S3C64XX_UINTP		0x30
+#define S3C64XX_UINTSP		0x34
+#define S3C64XX_UINTM		0x38
+
 #ifndef __ASSEMBLY__
 
 /* struct s3c24xx_uart_clksrc
diff --git a/arch/arm/plat-s3c64xx/Makefile b/arch/arm/plat-s3c64xx/Makefile
index b2bc2a9..12ac7a5 100644
--- a/arch/arm/plat-s3c64xx/Makefile
+++ b/arch/arm/plat-s3c64xx/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_CPU_S3C6400_CLOCK)	+= s3c6400-clock.o
 
 obj-$(CONFIG_PM)		+= pm.o
 obj-$(CONFIG_PM)		+= sleep.o
+obj-$(CONFIG_PM)		+= irq-pm.o
 
 # Device setup
 
diff --git a/arch/arm/plat-s3c64xx/irq-pm.c b/arch/arm/plat-s3c64xx/irq-pm.c
new file mode 100644
index 0000000..ca523b5
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/irq-pm.c
@@ -0,0 +1,111 @@
+/* arch/arm/plat-s3c64xx/irq-pm.c
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *      Ben Dooks <ben@simtec.co.uk>
+ *      http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX - Interrupt handling Power Management
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+#include <linux/serial_core.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <mach/map.h>
+
+#include <plat/regs-serial.h>
+#include <plat/regs-timer.h>
+#include <plat/regs-gpio.h>
+#include <plat/cpu.h>
+#include <plat/pm.h>
+
+/* We handled all the IRQ types in this code, to save having to make several
+ * small files to handle each different type separately. Having the EINT_GRP
+ * code here shouldn't be as much bloat as the IRQ table space needed when
+ * they are enabled. The added benefit is we ensure that these registers are
+ * in the same state as we suspended.
+ */
+
+static struct sleep_save irq_save[] = {
+	SAVE_ITEM(S3C64XX_PRIORITY),
+	SAVE_ITEM(S3C64XX_EINT0CON0),
+	SAVE_ITEM(S3C64XX_EINT0CON1),
+	SAVE_ITEM(S3C64XX_EINT0FLTCON0),
+	SAVE_ITEM(S3C64XX_EINT0FLTCON1),
+	SAVE_ITEM(S3C64XX_EINT0FLTCON2),
+	SAVE_ITEM(S3C64XX_EINT0FLTCON3),
+	SAVE_ITEM(S3C64XX_EINT0MASK),
+	SAVE_ITEM(S3C64XX_TINT_CSTAT),
+};
+
+static struct irq_grp_save {
+	u32	fltcon;
+	u32	con;
+	u32	mask;
+} eint_grp_save[5];
+
+static u32 irq_uart_mask[CONFIG_SERIAL_SAMSUNG_UARTS];
+
+static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
+{
+	struct irq_grp_save *grp = eint_grp_save;
+	int i;
+
+	S3C_PMDBG("%s: suspending IRQs\n", __func__);
+
+	s3c_pm_do_save(irq_save, ARRAY_SIZE(irq_save));
+
+	for (i = 0; i < CONFIG_SERIAL_SAMSUNG_UARTS; i++)
+		irq_uart_mask[i] = __raw_readl(S3C_VA_UARTx(i) + S3C64XX_UINTM);
+
+	for (i = 0; i < ARRAY_SIZE(eint_grp_save); i++, grp++) {
+		grp->con = __raw_readl(S3C64XX_EINT12CON + (i * 4));
+		grp->mask = __raw_readl(S3C64XX_EINT12MASK + (i * 4));
+		grp->fltcon = __raw_readl(S3C64XX_EINT12FLTCON + (i * 4));
+	}
+
+	return 0;
+}
+
+static int s3c64xx_irq_pm_resume(struct sys_device *dev)
+{
+	struct irq_grp_save *grp = eint_grp_save;
+	int i;
+
+	S3C_PMDBG("%s: resuming IRQs\n", __func__);
+
+	s3c_pm_do_restore(irq_save, ARRAY_SIZE(irq_save));
+
+	for (i = 0; i < CONFIG_SERIAL_SAMSUNG_UARTS; i++)
+		__raw_writel(irq_uart_mask[i], S3C_VA_UARTx(i) + S3C64XX_UINTM);
+
+	for (i = 0; i < ARRAY_SIZE(eint_grp_save); i++, grp++) {
+		__raw_writel(grp->con, S3C64XX_EINT12CON + (i * 4));
+		__raw_writel(grp->mask, S3C64XX_EINT12MASK + (i * 4));
+		__raw_writel(grp->fltcon, S3C64XX_EINT12FLTCON + (i * 4));
+	}
+
+	S3C_PMDBG("%s: IRQ configuration restored\n", __func__);
+	return 0;
+}
+
+static struct sysdev_driver s3c64xx_irq_driver = {
+	.suspend = s3c64xx_irq_pm_suspend,
+	.resume	 = s3c64xx_irq_pm_resume,
+};
+
+static int __init s3c64xx_irq_pm_init(void)
+{
+	return sysdev_driver_register(&s3c64xx_sysclass, &s3c64xx_irq_driver);
+}
+
+arch_initcall(s3c64xx_irq_pm_init);
+
diff --git a/arch/arm/plat-s3c64xx/irq.c b/arch/arm/plat-s3c64xx/irq.c
index f22edf7..f4c4844 100644
--- a/arch/arm/plat-s3c64xx/irq.c
+++ b/arch/arm/plat-s3c64xx/irq.c
@@ -14,12 +14,14 @@
 
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/serial_core.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 
 #include <asm/hardware/vic.h>
 
 #include <mach/map.h>
+#include <plat/regs-serial.h>
 #include <plat/regs-timer.h>
 #include <plat/cpu.h>
 
@@ -135,9 +137,6 @@ static inline unsigned int s3c_irq_uart_bit(unsigned int irq)
 }
 
 /* UART interrupt registers, not worth adding to seperate include header */
-#define S3C64XX_UINTP	0x30
-#define S3C64XX_UINTSP	0x34
-#define S3C64XX_UINTM	0x38
 
 static void s3c_irq_uart_mask(unsigned int irq)
 {
-- 
cgit v0.10.2


From d87964c46005ccb04754f6309df0fd8f67b08c6d Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Dec 2008 00:24:30 +0000
Subject: [ARM] S3C: GPIO PM core GPIOlib integration

Move the GPIO suspend/resume support inline with the gpiolib support
so that it will work with both the S3C24XX and S3C64XX series.

The s3c_gpio_chip is extended to have a pm callback and a save block
to keep the state of the GPIO over suspend, and the code from the
s3c24xx implementation is added to a new common file.

The suspend process now uses the list of registered chips to go through
saving and restoring each one as appropriate, using the pm callback to
select the appropriate routine depending on the type of control register
present.

This change also means that any additional GPIO added should not require
changes to the PM.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/Makefile b/arch/arm/plat-s3c/Makefile
index aa995a4..bfc8f53 100644
--- a/arch/arm/plat-s3c/Makefile
+++ b/arch/arm/plat-s3c/Makefile
@@ -21,6 +21,7 @@ obj-y				+= gpio-config.o
 # PM support
 
 obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_PM)		+= pm-gpio.o
 obj-$(CONFIG_S3C2410_PM_CHECK)	+= pm-check.o
 
 # devices
diff --git a/arch/arm/plat-s3c/gpio.c b/arch/arm/plat-s3c/gpio.c
index d71dd6d..260fdc6 100644
--- a/arch/arm/plat-s3c/gpio.c
+++ b/arch/arm/plat-s3c/gpio.c
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/gpio.h>
 
-#include <plat/gpio-core.h>
+#include <mach/gpio-core.h>
 
 #ifdef CONFIG_S3C_GPIO_TRACK
 struct s3c_gpio_chip *s3c_gpios[S3C_GPIO_END];
@@ -140,6 +140,15 @@ __init void s3c_gpiolib_add(struct s3c_gpio_chip *chip)
 	if (!gc->get)
 		gc->get = s3c_gpiolib_get;
 
+#ifdef CONFIG_PM
+	if (chip->pm != NULL) {
+		if (!chip->pm->save || !chip->pm->resume)
+			printk(KERN_ERR "gpio: %s has missing PM functions\n",
+			       gc->label);
+	} else
+		printk(KERN_ERR "gpio: %s has no PM function\n", gc->label);
+#endif
+
 	/* gpiochip_add() prints own failure message on error. */
 	ret = gpiochip_add(gc);
 	if (ret >= 0)
diff --git a/arch/arm/plat-s3c/include/plat/gpio-core.h b/arch/arm/plat-s3c/include/plat/gpio-core.h
index 2fc60a5..32af612 100644
--- a/arch/arm/plat-s3c/include/plat/gpio-core.h
+++ b/arch/arm/plat-s3c/include/plat/gpio-core.h
@@ -20,6 +20,18 @@
  * specific code.
 */
 
+struct s3c_gpio_chip;
+
+/**
+ * struct s3c_gpio_pm - power management (suspend/resume) information
+ * @save: Routine to save the state of the GPIO block
+ * @resume: Routine to resume the GPIO block.
+ */
+struct s3c_gpio_pm {
+	void (*save)(struct s3c_gpio_chip *chip);
+	void (*resume)(struct s3c_gpio_chip *chip);
+};
+
 struct s3c_gpio_cfg;
 
 /**
@@ -27,6 +39,7 @@ struct s3c_gpio_cfg;
  * @chip: The chip structure to be exported via gpiolib.
  * @base: The base pointer to the gpio configuration registers.
  * @config: special function and pull-resistor control information.
+ * @pm_save: Save information for suspend/resume support.
  *
  * This wrapper provides the necessary information for the Samsung
  * specific gpios being registered with gpiolib.
@@ -34,7 +47,11 @@ struct s3c_gpio_cfg;
 struct s3c_gpio_chip {
 	struct gpio_chip	chip;
 	struct s3c_gpio_cfg	*config;
+	struct s3c_gpio_pm	*pm;
 	void __iomem		*base;
+#ifdef CONFIG_PM
+	u32			pm_save[4];
+#endif
 };
 
 static inline struct s3c_gpio_chip *to_s3c_gpio(struct gpio_chip *gpc)
@@ -75,3 +92,16 @@ static inline struct s3c_gpio_chip *s3c_gpiolib_getchip(unsigned int chip)
 
 static inline void s3c_gpiolib_track(struct s3c_gpio_chip *chip) { }
 #endif
+
+#ifdef CONFIG_PM
+extern struct s3c_gpio_pm s3c_gpio_pm_1bit;
+extern struct s3c_gpio_pm s3c_gpio_pm_2bit;
+extern struct s3c_gpio_pm s3c_gpio_pm_4bit;
+#define __gpio_pm(x) x
+#else
+#define s3c_gpio_pm_1bit NULL
+#define s3c_gpio_pm_2bit NULL
+#define s3c_gpio_pm_4bit NULL
+#define __gpio_pm(x) NULL
+
+#endif /* CONFIG_PM */
diff --git a/arch/arm/plat-s3c/pm-gpio.c b/arch/arm/plat-s3c/pm-gpio.c
new file mode 100644
index 0000000..cfd326a
--- /dev/null
+++ b/arch/arm/plat-s3c/pm-gpio.c
@@ -0,0 +1,380 @@
+
+/* linux/arch/arm/plat-s3c/pm-gpio.c
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C series GPIO PM code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+#include <mach/gpio-core.h>
+#include <plat/pm.h>
+
+/* PM GPIO helpers */
+
+#define OFFS_CON	(0x00)
+#define OFFS_DAT	(0x04)
+#define OFFS_UP		(0x08)
+
+static void s3c_gpio_pm_1bit_save(struct s3c_gpio_chip *chip)
+{
+	chip->pm_save[0] = __raw_readl(chip->base + OFFS_CON);
+	chip->pm_save[1] = __raw_readl(chip->base + OFFS_DAT);
+}
+
+static void s3c_gpio_pm_1bit_resume(struct s3c_gpio_chip *chip)
+{
+	void __iomem *base = chip->base;
+	u32 old_gpcon = __raw_readl(base + OFFS_CON);
+	u32 old_gpdat = __raw_readl(base + OFFS_DAT);
+	u32 gps_gpcon = chip->pm_save[0];
+	u32 gps_gpdat = chip->pm_save[1];
+	u32 gpcon;
+
+	/* GPACON only has one bit per control / data and no PULLUPs.
+	 * GPACON[x] = 0 => Output, 1 => SFN */
+
+	/* first set all SFN bits to SFN */
+
+	gpcon = old_gpcon | gps_gpcon;
+	__raw_writel(gpcon, base + OFFS_CON);
+
+	/* now set all the other bits */
+
+	__raw_writel(gps_gpdat, base + OFFS_DAT);
+	__raw_writel(gps_gpcon, base + OFFS_CON);
+
+	S3C_PMDBG("%s: CON %08x => %08x, DAT %08x => %08x\n",
+		  chip->chip.label, old_gpcon, gps_gpcon, old_gpdat, gps_gpdat);
+}
+
+struct s3c_gpio_pm s3c_gpio_pm_1bit = {
+	.save	= s3c_gpio_pm_1bit_save,
+	.resume = s3c_gpio_pm_1bit_resume,
+};
+
+static void s3c_gpio_pm_2bit_save(struct s3c_gpio_chip *chip)
+{
+	chip->pm_save[0] = __raw_readl(chip->base + OFFS_CON);
+	chip->pm_save[1] = __raw_readl(chip->base + OFFS_DAT);
+	chip->pm_save[2] = __raw_readl(chip->base + OFFS_UP);
+}
+
+/* Test whether the given masked+shifted bits of an GPIO configuration
+ * are one of the SFN (special function) modes. */
+
+static inline int is_sfn(unsigned long con)
+{
+	return con >= 2;
+}
+
+/* Test if the given masked+shifted GPIO configuration is an input */
+
+static inline int is_in(unsigned long con)
+{
+	return con == 0;
+}
+
+/* Test if the given masked+shifted GPIO configuration is an output */
+
+static inline int is_out(unsigned long con)
+{
+	return con == 1;
+}
+
+/**
+ * s3c_gpio_pm_2bit_resume() - restore the given GPIO bank
+ * @chip: The chip information to resume.
+ *
+ * Restore one of the GPIO banks that was saved during suspend. This is
+ * not as simple as once thought, due to the possibility of glitches
+ * from the order that the CON and DAT registers are set in.
+ *
+ * The three states the pin can be are {IN,OUT,SFN} which gives us 9
+ * combinations of changes to check. Three of these, if the pin stays
+ * in the same configuration can be discounted. This leaves us with
+ * the following:
+ *
+ * { IN => OUT }  Change DAT first
+ * { IN => SFN }  Change CON first
+ * { OUT => SFN } Change CON first, so new data will not glitch
+ * { OUT => IN }  Change CON first, so new data will not glitch
+ * { SFN => IN }  Change CON first
+ * { SFN => OUT } Change DAT first, so new data will not glitch [1]
+ *
+ * We do not currently deal with the UP registers as these control
+ * weak resistors, so a small delay in change should not need to bring
+ * these into the calculations.
+ *
+ * [1] this assumes that writing to a pin DAT whilst in SFN will set the
+ *     state for when it is next output.
+ */
+static void s3c_gpio_pm_2bit_resume(struct s3c_gpio_chip *chip)
+{
+	void __iomem *base = chip->base;
+	u32 old_gpcon = __raw_readl(base + OFFS_CON);
+	u32 old_gpdat = __raw_readl(base + OFFS_DAT);
+	u32 gps_gpcon = chip->pm_save[0];
+	u32 gps_gpdat = chip->pm_save[1];
+	u32 gpcon, old, new, mask;
+	u32 change_mask = 0x0;
+	int nr;
+
+	/* restore GPIO pull-up settings */
+	__raw_writel(chip->pm_save[2], base + OFFS_UP);
+
+	/* Create a change_mask of all the items that need to have
+	 * their CON value changed before their DAT value, so that
+	 * we minimise the work between the two settings.
+	 */
+
+	for (nr = 0, mask = 0x03; nr < 32; nr += 2, mask <<= 2) {
+		old = (old_gpcon & mask) >> nr;
+		new = (gps_gpcon & mask) >> nr;
+
+		/* If there is no change, then skip */
+
+		if (old == new)
+			continue;
+
+		/* If both are special function, then skip */
+
+		if (is_sfn(old) && is_sfn(new))
+			continue;
+
+		/* Change is IN => OUT, do not change now */
+
+		if (is_in(old) && is_out(new))
+			continue;
+
+		/* Change is SFN => OUT, do not change now */
+
+		if (is_sfn(old) && is_out(new))
+			continue;
+
+		/* We should now be at the case of IN=>SFN,
+		 * OUT=>SFN, OUT=>IN, SFN=>IN. */
+
+		change_mask |= mask;
+	}
+
+
+	/* Write the new CON settings */
+
+	gpcon = old_gpcon & ~change_mask;
+	gpcon |= gps_gpcon & change_mask;
+
+	__raw_writel(gpcon, base + OFFS_CON);
+
+	/* Now change any items that require DAT,CON */
+
+	__raw_writel(gps_gpdat, base + OFFS_DAT);
+	__raw_writel(gps_gpcon, base + OFFS_CON);
+
+	S3C_PMDBG("%s: CON %08x => %08x, DAT %08x => %08x\n",
+		  chip->chip.label, old_gpcon, gps_gpcon, old_gpdat, gps_gpdat);
+}
+
+struct s3c_gpio_pm s3c_gpio_pm_2bit = {
+	.save	= s3c_gpio_pm_2bit_save,
+	.resume = s3c_gpio_pm_2bit_resume,
+};
+
+#ifdef CONFIG_ARCH_S3C64XX
+static void s3c_gpio_pm_4bit_save(struct s3c_gpio_chip *chip)
+{
+	chip->pm_save[1] = __raw_readl(chip->base + OFFS_CON);
+	chip->pm_save[2] = __raw_readl(chip->base + OFFS_DAT);
+	chip->pm_save[3] = __raw_readl(chip->base + OFFS_UP);
+
+	if (chip->chip.ngpio > 8)
+		chip->pm_save[0] = __raw_readl(chip->base - 4);
+}
+
+static u32 s3c_gpio_pm_4bit_mask(u32 old_gpcon, u32 gps_gpcon)
+{
+	u32 old, new, mask;
+	u32 change_mask = 0x0;
+	int nr;
+
+	for (nr = 0, mask = 0x0f; nr < 16; nr += 4, mask <<= 4) {
+		old = (old_gpcon & mask) >> nr;
+		new = (gps_gpcon & mask) >> nr;
+
+		/* If there is no change, then skip */
+
+		if (old == new)
+			continue;
+
+		/* If both are special function, then skip */
+
+		if (is_sfn(old) && is_sfn(new))
+			continue;
+
+		/* Change is IN => OUT, do not change now */
+
+		if (is_in(old) && is_out(new))
+			continue;
+
+		/* Change is SFN => OUT, do not change now */
+
+		if (is_sfn(old) && is_out(new))
+			continue;
+
+		/* We should now be at the case of IN=>SFN,
+		 * OUT=>SFN, OUT=>IN, SFN=>IN. */
+
+		change_mask |= mask;
+	}
+
+	return change_mask;
+}
+
+static void s3c_gpio_pm_4bit_con(struct s3c_gpio_chip *chip, int index)
+{
+	void __iomem *con = chip->base + (index * 4);
+	u32 old_gpcon = __raw_readl(con);
+	u32 gps_gpcon = chip->pm_save[index + 1];
+	u32 gpcon, mask;
+
+	mask = s3c_gpio_pm_4bit_mask(old_gpcon, gps_gpcon);
+
+	gpcon = old_gpcon & ~mask;
+	gpcon |= gps_gpcon & mask;
+
+	__raw_writel(gpcon, con);
+}
+
+static void s3c_gpio_pm_4bit_resume(struct s3c_gpio_chip *chip)
+{
+	void __iomem *base = chip->base;
+	u32 old_gpcon[2];
+	u32 old_gpdat = __raw_readl(base + OFFS_DAT);
+	u32 gps_gpdat = chip->pm_save[2];
+
+	/* First, modify the CON settings */
+
+	old_gpcon[0] = 0;
+	old_gpcon[1] = __raw_readl(base + OFFS_CON);
+
+	s3c_gpio_pm_4bit_con(chip, 0);
+	if (chip->chip.ngpio > 8) {
+		old_gpcon[0] = __raw_readl(base - 4);
+		s3c_gpio_pm_4bit_con(chip, -1);
+	}
+
+	/* Now change the configurations that require DAT,CON */
+
+	__raw_writel(chip->pm_save[2], base + OFFS_DAT);
+	__raw_writel(chip->pm_save[1], base + OFFS_CON);
+	if (chip->chip.ngpio > 8)
+		__raw_writel(chip->pm_save[0], base - 4);
+
+	__raw_writel(chip->pm_save[2], base + OFFS_DAT);
+	__raw_writel(chip->pm_save[3], base + OFFS_UP);
+
+	if (chip->chip.ngpio > 8) {
+		S3C_PMDBG("%s: CON4 %08x,%08x => %08x,%08x, DAT %08x => %08x\n",
+			  chip->chip.label, old_gpcon[0], old_gpcon[1],
+			  __raw_readl(base - 4),
+			  __raw_readl(base + OFFS_CON),
+			  old_gpdat, gps_gpdat);
+	} else
+		S3C_PMDBG("%s: CON4 %08x => %08x, DAT %08x => %08x\n",
+			  chip->chip.label, old_gpcon[1],
+			  __raw_readl(base + OFFS_CON),
+			  old_gpdat, gps_gpdat);
+}
+
+struct s3c_gpio_pm s3c_gpio_pm_4bit = {
+	.save	= s3c_gpio_pm_4bit_save,
+	.resume = s3c_gpio_pm_4bit_resume,
+};
+#endif /* CONFIG_ARCH_S3C64XX */
+
+/**
+ * s3c_pm_save_gpio() - save gpio chip data for suspend
+ * @ourchip: The chip for suspend.
+ */
+static void s3c_pm_save_gpio(struct s3c_gpio_chip *ourchip)
+{
+	struct s3c_gpio_pm *pm = ourchip->pm;
+
+	if (pm == NULL || pm->save == NULL)
+		S3C_PMDBG("%s: no pm for %s\n", __func__, ourchip->chip.label);
+	else
+		pm->save(ourchip);
+}
+
+/**
+ * s3c_pm_save_gpios() - Save the state of the GPIO banks.
+ *
+ * For all the GPIO banks, save the state of each one ready for going
+ * into a suspend mode.
+ */
+void s3c_pm_save_gpios(void)
+{
+	struct s3c_gpio_chip *ourchip;
+	unsigned int gpio_nr;
+
+	for (gpio_nr = 0; gpio_nr < S3C_GPIO_END; gpio_nr++) {
+		ourchip = s3c_gpiolib_getchip(gpio_nr);
+		if (!ourchip)
+			continue;
+
+		s3c_pm_save_gpio(ourchip);
+
+		S3C_PMDBG("%s: save %08x,%08x,%08x,%08x\n",
+			  ourchip->chip.label,
+			  ourchip->pm_save[0],
+			  ourchip->pm_save[1],
+			  ourchip->pm_save[2],
+			  ourchip->pm_save[3]);
+
+		gpio_nr += ourchip->chip.ngpio;
+		gpio_nr += CONFIG_S3C_GPIO_SPACE;
+	}
+}
+
+/**
+ * s3c_pm_resume_gpio() - restore gpio chip data after suspend
+ * @ourchip: The suspended chip.
+ */
+static void s3c_pm_resume_gpio(struct s3c_gpio_chip *ourchip)
+{
+	struct s3c_gpio_pm *pm = ourchip->pm;
+
+	if (pm == NULL || pm->resume == NULL)
+		S3C_PMDBG("%s: no pm for %s\n", __func__, ourchip->chip.label);
+	else
+		pm->resume(ourchip);
+}
+
+void s3c_pm_restore_gpios(void)
+{
+	struct s3c_gpio_chip *ourchip;
+	unsigned int gpio_nr;
+
+	for (gpio_nr = 0; gpio_nr < S3C_GPIO_END; gpio_nr++) {
+		ourchip = s3c_gpiolib_getchip(gpio_nr);
+		if (!ourchip)
+			continue;
+
+		s3c_pm_resume_gpio(ourchip);
+
+		gpio_nr += ourchip->chip.ngpio;
+		gpio_nr += CONFIG_S3C_GPIO_SPACE;
+	}
+}
diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index 5c0491b..4bac12d 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -22,6 +22,7 @@
 #include <mach/gpio-core.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
+#include <plat/pm.h>
 
 #include <mach/regs-gpio.h>
 
@@ -78,6 +79,7 @@ static int s3c24xx_gpiolib_bankg_toirq(struct gpio_chip *chip, unsigned offset)
 struct s3c_gpio_chip s3c24xx_gpios[] = {
 	[0] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPA0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_1bit),
 		.chip	= {
 			.base			= S3C2410_GPA0,
 			.owner			= THIS_MODULE,
@@ -89,6 +91,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 	},
 	[1] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPB0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPB0,
 			.owner			= THIS_MODULE,
@@ -98,6 +101,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 	},
 	[2] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPC0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPC0,
 			.owner			= THIS_MODULE,
@@ -107,6 +111,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 	},
 	[3] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPD0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPD0,
 			.owner			= THIS_MODULE,
@@ -116,6 +121,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 	},
 	[4] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPE0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPE0,
 			.label			= "GPIOE",
@@ -125,6 +131,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 	},
 	[5] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPF0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPF0,
 			.owner			= THIS_MODULE,
@@ -135,6 +142,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 	},
 	[6] = {
 		.base	= S3C24XX_GPIO_BASE(S3C2410_GPG0),
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPG0,
 			.owner			= THIS_MODULE,
diff --git a/arch/arm/plat-s3c24xx/pm.c b/arch/arm/plat-s3c24xx/pm.c
index 062a293..5135c40 100644
--- a/arch/arm/plat-s3c24xx/pm.c
+++ b/arch/arm/plat-s3c24xx/pm.c
@@ -75,43 +75,10 @@ static struct sleep_save core_save[] = {
 	SAVE_ITEM(S3C2410_CLKSLOW),
 };
 
-static struct gpio_sleep {
-	void __iomem	*base;
-	unsigned int	 gpcon;
-	unsigned int	 gpdat;
-	unsigned int	 gpup;
-} gpio_save[] = {
-	[0] = {
-		.base	= S3C2410_GPACON,
-	},
-	[1] = {
-		.base	= S3C2410_GPBCON,
-	},
-	[2] = {
-		.base	= S3C2410_GPCCON,
-	},
-	[3] = {
-		.base	= S3C2410_GPDCON,
-	},
-	[4] = {
-		.base	= S3C2410_GPECON,
-	},
-	[5] = {
-		.base	= S3C2410_GPFCON,
-	},
-	[6] = {
-		.base	= S3C2410_GPGCON,
-	},
-	[7] = {
-		.base	= S3C2410_GPHCON,
-	},
-};
-
 static struct sleep_save misc_save[] = {
 	SAVE_ITEM(S3C2410_DCLKCON),
 };
 
-
 /* s3c_pm_check_resume_pin
  *
  * check to see if the pin is configured correctly for sleep mode, and
@@ -165,186 +132,6 @@ void s3c_pm_configure_extint(void)
 	}
 }
 
-/* offsets for CON/DAT/UP registers */
-
-#define OFFS_CON	(S3C2410_GPACON - S3C2410_GPACON)
-#define OFFS_DAT	(S3C2410_GPADAT - S3C2410_GPACON)
-#define OFFS_UP		(S3C2410_GPBUP  - S3C2410_GPBCON)
-
-/* s3c_pm_save_gpios()
- *
- * Save the state of the GPIOs
- */
-
-void s3c_pm_save_gpios(void)
-{
-	struct gpio_sleep *gps = gpio_save;
-	unsigned int gpio;
-
-	for (gpio = 0; gpio < ARRAY_SIZE(gpio_save); gpio++, gps++) {
-		void __iomem *base = gps->base;
-
-		gps->gpcon = __raw_readl(base + OFFS_CON);
-		gps->gpdat = __raw_readl(base + OFFS_DAT);
-
-		if (gpio > 0)
-			gps->gpup = __raw_readl(base + OFFS_UP);
-
-	}
-}
-
-/* Test whether the given masked+shifted bits of an GPIO configuration
- * are one of the SFN (special function) modes. */
-
-static inline int is_sfn(unsigned long con)
-{
-	return (con == 2 || con == 3);
-}
-
-/* Test if the given masked+shifted GPIO configuration is an input */
-
-static inline int is_in(unsigned long con)
-{
-	return con == 0;
-}
-
-/* Test if the given masked+shifted GPIO configuration is an output */
-
-static inline int is_out(unsigned long con)
-{
-	return con == 1;
-}
-
-/**
- * s3c2410_pm_restore_gpio() - restore the given GPIO bank
- * @index: The number of the GPIO bank being resumed.
- * @gps: The sleep confgiuration for the bank.
- *
- * Restore one of the GPIO banks that was saved during suspend. This is
- * not as simple as once thought, due to the possibility of glitches
- * from the order that the CON and DAT registers are set in.
- *
- * The three states the pin can be are {IN,OUT,SFN} which gives us 9
- * combinations of changes to check. Three of these, if the pin stays
- * in the same configuration can be discounted. This leaves us with
- * the following:
- *
- * { IN => OUT }  Change DAT first
- * { IN => SFN }  Change CON first
- * { OUT => SFN } Change CON first, so new data will not glitch
- * { OUT => IN }  Change CON first, so new data will not glitch
- * { SFN => IN }  Change CON first
- * { SFN => OUT } Change DAT first, so new data will not glitch [1]
- *
- * We do not currently deal with the UP registers as these control
- * weak resistors, so a small delay in change should not need to bring
- * these into the calculations.
- *
- * [1] this assumes that writing to a pin DAT whilst in SFN will set the
- *     state for when it is next output.
- */
-
-static void s3c2410_pm_restore_gpio(int index, struct gpio_sleep *gps)
-{
-	void __iomem *base = gps->base;
-	unsigned long gps_gpcon = gps->gpcon;
-	unsigned long gps_gpdat = gps->gpdat;
-	unsigned long old_gpcon;
-	unsigned long old_gpdat;
-	unsigned long old_gpup = 0x0;
-	unsigned long gpcon;
-	int nr;
-
-	old_gpcon = __raw_readl(base + OFFS_CON);
-	old_gpdat = __raw_readl(base + OFFS_DAT);
-
-	if (base == S3C2410_GPACON) {
-		/* GPACON only has one bit per control / data and no PULLUPs.
-		 * GPACON[x] = 0 => Output, 1 => SFN */
-
-		/* first set all SFN bits to SFN */
-
-		gpcon = old_gpcon | gps->gpcon;
-		__raw_writel(gpcon, base + OFFS_CON);
-
-		/* now set all the other bits */
-
-		__raw_writel(gps_gpdat, base + OFFS_DAT);
-		__raw_writel(gps_gpcon, base + OFFS_CON);
-	} else {
-		unsigned long old, new, mask;
-		unsigned long change_mask = 0x0;
-
-		old_gpup = __raw_readl(base + OFFS_UP);
-
-		/* Create a change_mask of all the items that need to have
-		 * their CON value changed before their DAT value, so that
-		 * we minimise the work between the two settings.
-		 */
-
-		for (nr = 0, mask = 0x03; nr < 32; nr += 2, mask <<= 2) {
-			old = (old_gpcon & mask) >> nr;
-			new = (gps_gpcon & mask) >> nr;
-
-			/* If there is no change, then skip */
-
-			if (old == new)
-				continue;
-
-			/* If both are special function, then skip */
-
-			if (is_sfn(old) && is_sfn(new))
-				continue;
-
-			/* Change is IN => OUT, do not change now */
-
-			if (is_in(old) && is_out(new))
-				continue;
-
-			/* Change is SFN => OUT, do not change now */
-
-			if (is_sfn(old) && is_out(new))
-				continue;
-
-			/* We should now be at the case of IN=>SFN,
-			 * OUT=>SFN, OUT=>IN, SFN=>IN. */
-
-			change_mask |= mask;
-		}
-
-		/* Write the new CON settings */
-
-		gpcon = old_gpcon & ~change_mask;
-		gpcon |= gps_gpcon & change_mask;
-
-		__raw_writel(gpcon, base + OFFS_CON);
-
-		/* Now change any items that require DAT,CON */
-
-		__raw_writel(gps_gpdat, base + OFFS_DAT);
-		__raw_writel(gps_gpcon, base + OFFS_CON);
-		__raw_writel(gps->gpup, base + OFFS_UP);
-	}
-
-	S3C_PMDBG("GPIO[%d] CON %08lx => %08lx, DAT %08lx => %08lx\n",
-		  index, old_gpcon, gps_gpcon, old_gpdat, gps_gpdat);
-}
-
-
-/** s3c2410_pm_restore_gpios()
- *
- * Restore the state of the GPIOs
- */
-
-void s3c_pm_restore_gpios(void)
-{
-	struct gpio_sleep *gps = gpio_save;
-	int gpio;
-
-	for (gpio = 0; gpio < ARRAY_SIZE(gpio_save); gpio++, gps++) {
-		s3c2410_pm_restore_gpio(gpio, gps);
-	}
-}
 
 void s3c_pm_restore_core(void)
 {
diff --git a/arch/arm/plat-s3c64xx/gpiolib.c b/arch/arm/plat-s3c64xx/gpiolib.c
index ee9188a..ccb82e8 100644
--- a/arch/arm/plat-s3c64xx/gpiolib.c
+++ b/arch/arm/plat-s3c64xx/gpiolib.c
@@ -385,12 +385,19 @@ static __init void s3c64xx_gpiolib_add_4bit(struct s3c_gpio_chip *chip)
 {
 	chip->chip.direction_input = s3c64xx_gpiolib_4bit_input;
 	chip->chip.direction_output = s3c64xx_gpiolib_4bit_output;
+	chip->pm = __gpio_pm(&s3c_gpio_pm_4bit);
 }
 
 static __init void s3c64xx_gpiolib_add_4bit2(struct s3c_gpio_chip *chip)
 {
 	chip->chip.direction_input = s3c64xx_gpiolib_4bit2_input;
 	chip->chip.direction_output = s3c64xx_gpiolib_4bit2_output;
+	chip->pm = __gpio_pm(&s3c_gpio_pm_4bit);
+}
+
+static __init void s3c64xx_gpiolib_add_2bit(struct s3c_gpio_chip *chip)
+{
+	chip->pm = __gpio_pm(&s3c_gpio_pm_2bit);
 }
 
 static __init void s3c64xx_gpiolib_add(struct s3c_gpio_chip *chips,
@@ -412,7 +419,8 @@ static __init int s3c64xx_gpiolib_init(void)
 	s3c64xx_gpiolib_add(gpio_4bit2, ARRAY_SIZE(gpio_4bit2),
 			    s3c64xx_gpiolib_add_4bit2);
 
-	s3c64xx_gpiolib_add(gpio_2bit, ARRAY_SIZE(gpio_2bit), NULL);
+	s3c64xx_gpiolib_add(gpio_2bit, ARRAY_SIZE(gpio_2bit),
+			    s3c64xx_gpiolib_add_2bit);
 
 	return 0;
 }
diff --git a/arch/arm/plat-s3c64xx/pm.c b/arch/arm/plat-s3c64xx/pm.c
index 98190aa..07a6516 100644
--- a/arch/arm/plat-s3c64xx/pm.c
+++ b/arch/arm/plat-s3c64xx/pm.c
@@ -96,17 +96,6 @@ void s3c_pm_configure_extint(void)
 	__raw_writel(s3c_irqwake_eintmask, S3C64XX_EINT_MASK);
 }
 
-void s3c_pm_save_gpios(void)
-{
-	/* currently, unless the bootloader does something really stupid
-	 * the gpio blocks should be maintained over their sleep.
-	 */
-}
-
-void s3c_pm_restore_gpios(void)
-{
-}
-
 void s3c_pm_restore_core(void)
 {
 	__raw_writel(0, S3C64XX_EINT_MASK);
-- 
cgit v0.10.2


From c07f87f22ecc94201893b7a25430b7f5d5fd6de8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 24 Mar 2009 15:30:07 +0000
Subject: [ARM] VIC: Add power management device

Add power management support to the VIC by registering
each VIC as a system device to get suspend/resume
events going.

Since the VIC registeration is done early, we need to
record the VICs in a static array which is used to add
the system devices later once the initcalls are run. This
means there is now a configuration value for the number
of VICs in the system.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index a2cd9be..38518d4 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -4,6 +4,13 @@ config ARM_GIC
 config ARM_VIC
 	bool
 
+config ARM_VIC_NR
+	int
+	default 2
+	help
+	  The maximum number of VICs available in the system, for
+	  power management.
+
 config ICST525
 	bool
 
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index b2a781d..887c6eb 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
+#include <linux/sysdev.h>
 
 #include <asm/mach/irq.h>
 #include <asm/hardware/vic.h>
@@ -39,11 +40,219 @@ static void vic_unmask_irq(unsigned int irq)
 	writel(1 << irq, base + VIC_INT_ENABLE);
 }
 
+/**
+ * vic_init2 - common initialisation code
+ * @base: Base of the VIC.
+ *
+ * Common initialisation code for registeration
+ * and resume.
+*/
+static void vic_init2(void __iomem *base)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		void __iomem *reg = base + VIC_VECT_CNTL0 + (i * 4);
+		writel(VIC_VECT_CNTL_ENABLE | i, reg);
+	}
+
+	writel(32, base + VIC_PL190_DEF_VECT_ADDR);
+}
+
+#if defined(CONFIG_PM)
+/**
+ * struct vic_device - VIC PM device
+ * @sysdev: The system device which is registered.
+ * @irq: The IRQ number for the base of the VIC.
+ * @base: The register base for the VIC.
+ * @resume_sources: A bitmask of interrupts for resume.
+ * @resume_irqs: The IRQs enabled for resume.
+ * @int_select: Save for VIC_INT_SELECT.
+ * @int_enable: Save for VIC_INT_ENABLE.
+ * @soft_int: Save for VIC_INT_SOFT.
+ * @protect: Save for VIC_PROTECT.
+ */
+struct vic_device {
+	struct sys_device sysdev;
+
+	void __iomem	*base;
+	int		irq;
+	u32		resume_sources;
+	u32		resume_irqs;
+	u32		int_select;
+	u32		int_enable;
+	u32		soft_int;
+	u32		protect;
+};
+
+/* we cannot allocate memory when VICs are initially registered */
+static struct vic_device vic_devices[CONFIG_ARM_VIC_NR];
+
+static inline struct vic_device *to_vic(struct sys_device *sys)
+{
+	return container_of(sys, struct vic_device, sysdev);
+}
+
+static int vic_id;
+
+static int vic_class_resume(struct sys_device *dev)
+{
+	struct vic_device *vic = to_vic(dev);
+	void __iomem *base = vic->base;
+
+	printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base);
+
+	/* re-initialise static settings */
+	vic_init2(base);
+
+	writel(vic->int_select, base + VIC_INT_SELECT);
+	writel(vic->protect, base + VIC_PROTECT);
+
+	/* set the enabled ints and then clear the non-enabled */
+	writel(vic->int_enable, base + VIC_INT_ENABLE);
+	writel(~vic->int_enable, base + VIC_INT_ENABLE_CLEAR);
+
+	/* and the same for the soft-int register */
+
+	writel(vic->soft_int, base + VIC_INT_SOFT);
+	writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR);
+
+	return 0;
+}
+
+static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
+{
+	struct vic_device *vic = to_vic(dev);
+	void __iomem *base = vic->base;
+
+	printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base);
+
+	vic->int_select = readl(base + VIC_INT_SELECT);
+	vic->int_enable = readl(base + VIC_INT_ENABLE);
+	vic->soft_int = readl(base + VIC_INT_SOFT);
+	vic->protect = readl(base + VIC_PROTECT);
+
+	/* set the interrupts (if any) that are used for
+	 * resuming the system */
+
+	writel(vic->resume_irqs, base + VIC_INT_ENABLE);
+	writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR);
+
+	return 0;
+}
+
+struct sysdev_class vic_class = {
+	.name		= "vic",
+	.suspend	= vic_class_suspend,
+	.resume		= vic_class_resume,
+};
+
+/**
+ * vic_pm_register - Register a VIC for later power management control
+ * @base: The base address of the VIC.
+ * @irq: The base IRQ for the VIC.
+ * @resume_sources: bitmask of interrupts allowed for resume sources.
+ *
+ * Register the VIC with the system device tree so that it can be notified
+ * of suspend and resume requests and ensure that the correct actions are
+ * taken to re-instate the settings on resume.
+ */
+static void __init vic_pm_register(void __iomem *base, unsigned int irq, u32 resume_sources)
+{
+	struct vic_device *v;
+
+	if (vic_id >= ARRAY_SIZE(vic_devices))
+		printk(KERN_ERR "%s: too few VICs, increase CONFIG_ARM_VIC_NR\n", __func__);
+	else {
+		v = &vic_devices[vic_id];
+		v->base = base;
+		v->resume_sources = resume_sources;
+		v->irq = irq;
+		vic_id++;
+	}
+}
+
+/**
+ * vic_pm_init - initicall to register VIC pm
+ *
+ * This is called via late_initcall() to register
+ * the resources for the VICs due to the early
+ * nature of the VIC's registration.
+*/
+static int __init vic_pm_init(void)
+{
+	struct vic_device *dev = vic_devices;
+	int err;
+	int id;
+
+	if (vic_id == 0)
+		return 0;
+
+	err = sysdev_class_register(&vic_class);
+	if (err) {
+		printk(KERN_ERR "%s: cannot register class\n", __func__);
+		return err;
+	}
+
+	for (id = 0; id < vic_id; id++, dev++) {
+		dev->sysdev.id = id;
+		dev->sysdev.cls = &vic_class;
+
+		err = sysdev_register(&dev->sysdev);
+		if (err) {
+			printk(KERN_ERR "%s: failed to register device\n",
+			       __func__);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+late_initcall(vic_pm_init);
+
+static struct vic_device *vic_from_irq(unsigned int irq)
+{
+        struct vic_device *v = vic_devices;
+	unsigned int base_irq = irq & ~31;
+	int id;
+
+	for (id = 0; id < vic_id; id++, v++) {
+		if (v->irq == base_irq)
+			return v;
+	}
+
+	return NULL;
+}
+
+static int vic_set_wake(unsigned int irq, unsigned int on)
+{
+	struct vic_device *v = vic_from_irq(irq);
+	unsigned int off = irq & 31;
+
+	if (!v)
+		return -EINVAL;
+
+	if (on)
+		v->resume_irqs |= 1 << off;
+	else
+		v->resume_irqs &= ~(1 << off);
+
+	return 0;
+}
+
+#else
+static inline void vic_pm_register(void __iomem *base, unsigned int irq, u32 arg1) { }
+
+#define vic_set_wake NULL
+#endif /* CONFIG_PM */
+
 static struct irq_chip vic_chip = {
 	.name	= "VIC",
 	.ack	= vic_mask_irq,
 	.mask	= vic_mask_irq,
 	.unmask	= vic_unmask_irq,
+	.set_wake = vic_set_wake,
 };
 
 /**
@@ -51,9 +260,10 @@ static struct irq_chip vic_chip = {
  * @base: iomem base address
  * @irq_start: starting interrupt number, must be muliple of 32
  * @vic_sources: bitmask of interrupt sources to allow
+ * @resume_sources: bitmask of interrupt sources to allow for resume
  */
 void __init vic_init(void __iomem *base, unsigned int irq_start,
-		     u32 vic_sources)
+		     u32 vic_sources, u32 resume_sources)
 {
 	unsigned int i;
 
@@ -77,12 +287,7 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 		writel(value, base + VIC_PL190_VECT_ADDR);
 	}
 
-	for (i = 0; i < 16; i++) {
-		void __iomem *reg = base + VIC_VECT_CNTL0 + (i * 4);
-		writel(VIC_VECT_CNTL_ENABLE | i, reg);
-	}
-
-	writel(32, base + VIC_PL190_DEF_VECT_ADDR);
+	vic_init2(base);
 
 	for (i = 0; i < 32; i++) {
 		if (vic_sources & (1 << i)) {
@@ -94,4 +299,6 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 		}
 	}
+
+	vic_pm_register(base, irq_start, resume_sources);
 }
diff --git a/arch/arm/include/asm/hardware/vic.h b/arch/arm/include/asm/hardware/vic.h
index f87328d..5d72550 100644
--- a/arch/arm/include/asm/hardware/vic.h
+++ b/arch/arm/include/asm/hardware/vic.h
@@ -41,7 +41,7 @@
 #define VIC_PL192_VECT_ADDR		0xF00
 
 #ifndef __ASSEMBLY__
-void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources);
+void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources, u32 resume_sources);
 #endif
 
 #endif
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index ae24486..6faa3dc 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -362,8 +362,8 @@ void __init ep93xx_init_irq(void)
 {
 	int gpio_irq;
 
-	vic_init((void *)EP93XX_VIC1_BASE, 0, EP93XX_VIC1_VALID_IRQ_MASK);
-	vic_init((void *)EP93XX_VIC2_BASE, 32, EP93XX_VIC2_VALID_IRQ_MASK);
+	vic_init((void *)EP93XX_VIC1_BASE, 0, EP93XX_VIC1_VALID_IRQ_MASK, 0);
+	vic_init((void *)EP93XX_VIC2_BASE, 32, EP93XX_VIC2_VALID_IRQ_MASK, 0);
 
 	for (gpio_irq = gpio_to_irq(0);
 	     gpio_irq <= gpio_to_irq(EP93XX_GPIO_LINE_MAX_IRQ); ++gpio_irq) {
diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c
index 79df60c..43da8bb 100644
--- a/arch/arm/mach-netx/generic.c
+++ b/arch/arm/mach-netx/generic.c
@@ -168,7 +168,7 @@ void __init netx_init_irq(void)
 {
 	int irq;
 
-	vic_init(__io(io_p2v(NETX_PA_VIC)), 0, ~0);
+	vic_init(__io(io_p2v(NETX_PA_VIC)), 0, ~0, 0);
 
 	for (irq = NETX_IRQ_HIF_CHAINED(0); irq <= NETX_IRQ_HIF_LAST; irq++) {
 		set_irq_chip(irq, &netx_hif_chip);
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 1f929c3..c39c788 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -116,7 +116,7 @@ void __init versatile_init_irq(void)
 {
 	unsigned int i;
 
-	vic_init(VA_VIC_BASE, IRQ_VIC_START, ~0);
+	vic_init(VA_VIC_BASE, IRQ_VIC_START, ~0, 0);
 
 	set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq);
 
diff --git a/arch/arm/plat-s3c64xx/irq.c b/arch/arm/plat-s3c64xx/irq.c
index f4c4844..8dc5b6d 100644
--- a/arch/arm/plat-s3c64xx/irq.c
+++ b/arch/arm/plat-s3c64xx/irq.c
@@ -232,8 +232,8 @@ void __init s3c64xx_init_irq(u32 vic0_valid, u32 vic1_valid)
 	printk(KERN_DEBUG "%s: initialising interrupts\n", __func__);
 
 	/* initialise the pair of VICs */
-	vic_init(S3C_VA_VIC0, S3C_VIC0_BASE, vic0_valid);
-	vic_init(S3C_VA_VIC1, S3C_VIC1_BASE, vic1_valid);
+	vic_init(S3C_VA_VIC0, S3C_VIC0_BASE, vic0_valid, 0);
+	vic_init(S3C_VA_VIC1, S3C_VIC1_BASE, vic1_valid, 0);
 
 	/* add the timer sub-irqs */
 
-- 
cgit v0.10.2


From 2ab408e8759ad4b2a9bad3647838acd648d2c10c Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 24 Mar 2009 16:09:06 +0000
Subject: [ARM] SMDK6400: Initial machine support for SMDK6400

Add minimial support for the SMDK6400 board to test
the S3C6400 support.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/Kconfig b/arch/arm/mach-s3c6400/Kconfig
index 6da82b5..da87f2d 100644
--- a/arch/arm/mach-s3c6400/Kconfig
+++ b/arch/arm/mach-s3c6400/Kconfig
@@ -6,3 +6,12 @@
 # Licensed under GPLv2
 
 # Currently nothing here, this will be added later
+
+# machine support
+
+config MACH_SMDK6400
+       bool "SMDK6400"
+	select CPU_S3C6410
+	select S3C_DEV_HSMMC
+	help
+	  Machine support for the Samsung SMDK6400
diff --git a/arch/arm/mach-s3c6400/Makefile b/arch/arm/mach-s3c6400/Makefile
index 8f397db..82cb48a 100644
--- a/arch/arm/mach-s3c6400/Makefile
+++ b/arch/arm/mach-s3c6400/Makefile
@@ -13,3 +13,7 @@ obj-				:=
 # Core support for S3C6400 system
 
 obj-n				+= blank.o
+
+# Machine support
+
+obj-$(CONFIG_MACH_SMDK6400)	+= mach-smdk6400.o
diff --git a/arch/arm/mach-s3c6400/mach-smdk6400.c b/arch/arm/mach-s3c6400/mach-smdk6400.c
new file mode 100644
index 0000000..d020a2a
--- /dev/null
+++ b/arch/arm/mach-s3c6400/mach-smdk6400.c
@@ -0,0 +1,96 @@
+/* linux/arch/arm/mach-s3c6400/mach-smdk6400.c
+ *
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+
+#include <asm/mach-types.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+
+#include <mach/hardware.h>
+#include <mach/map.h>
+
+#include <plat/regs-serial.h>
+
+#include <plat/s3c6400.h>
+#include <plat/clock.h>
+#include <plat/devs.h>
+#include <plat/cpu.h>
+#include <plat/iic.h>
+
+#define UCON S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK
+#define ULCON S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB
+#define UFCON S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE
+
+static struct s3c2410_uartcfg smdk6400_uartcfgs[] __initdata = {
+	[0] = {
+		.hwport	     = 0,
+		.flags	     = 0,
+		.ucon	     = 0x3c5,
+		.ulcon	     = 0x03,
+		.ufcon	     = 0x51,
+	},
+	[1] = {
+		.hwport	     = 1,
+		.flags	     = 0,
+		.ucon	     = 0x3c5,
+		.ulcon	     = 0x03,
+		.ufcon	     = 0x51,
+	},
+};
+
+static struct map_desc smdk6400_iodesc[] = {};
+
+static void __init smdk6400_map_io(void)
+{
+	s3c64xx_init_io(smdk6400_iodesc, ARRAY_SIZE(smdk6400_iodesc));
+	s3c24xx_init_clocks(12000000);
+	s3c24xx_init_uarts(smdk6400_uartcfgs, ARRAY_SIZE(smdk6400_uartcfgs));
+}
+
+static struct platform_device *smdk6400_devices[] __initdata = {
+	&s3c_device_hsmmc1,
+	&s3c_device_i2c0,
+};
+
+static struct i2c_board_info i2c_devs[] __initdata = {
+	{ I2C_BOARD_INFO("WM8753", 0x1A), },
+	{ I2C_BOARD_INFO("24c08", 0x50), },
+};
+
+static void __init smdk6400_machine_init(void)
+{
+	i2c_register_board_info(0, i2c_devs, ARRAY_SIZE(i2c_devs));
+	platform_add_devices(smdk6400_devices, ARRAY_SIZE(smdk6400_devices));
+}
+
+MACHINE_START(SMDK6400, "SMDK6400")
+	/* Maintainer: Ben Dooks <ben@fluff.org> */
+	.phys_io	= S3C_PA_UART & 0xfff00000,
+	.io_pg_offst	= (((u32)S3C_VA_UART) >> 18) & 0xfffc,
+	.boot_params	= S3C64XX_PA_SDRAM + 0x100,
+
+	.init_irq	= s3c6400_init_irq,
+	.map_io		= smdk6400_map_io,
+	.init_machine	= smdk6400_machine_init,
+	.timer		= &s3c24xx_timer,
+MACHINE_END
-- 
cgit v0.10.2


From a6925c1c515513e22b0419e1a41155c88d22b1f4 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 24 Mar 2009 17:25:40 +0000
Subject: [ARM] S3C6400: Core support for S3C6400 SoC

Add the core support files for the Samsung S3C6400 SoC.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/Kconfig b/arch/arm/mach-s3c6400/Kconfig
index da87f2d..f0292a8 100644
--- a/arch/arm/mach-s3c6400/Kconfig
+++ b/arch/arm/mach-s3c6400/Kconfig
@@ -5,13 +5,20 @@
 #
 # Licensed under GPLv2
 
-# Currently nothing here, this will be added later
+# Configuration options for the S3C6410 CPU
 
-# machine support
+config CPU_S3C6400
+	bool
+	select CPU_S3C6400_INIT
+	select CPU_S3C6400_CLOCK
+	help
+	  Enable S3C6400 CPU support
+
+# S36400 Macchine support
 
 config MACH_SMDK6400
        bool "SMDK6400"
-	select CPU_S3C6410
+	select CPU_S3C6400
 	select S3C_DEV_HSMMC
 	help
 	  Machine support for the Samsung SMDK6400
diff --git a/arch/arm/mach-s3c6400/Makefile b/arch/arm/mach-s3c6400/Makefile
index 82cb48a..07eac2b 100644
--- a/arch/arm/mach-s3c6400/Makefile
+++ b/arch/arm/mach-s3c6400/Makefile
@@ -12,7 +12,7 @@ obj-				:=
 
 # Core support for S3C6400 system
 
-obj-n				+= blank.o
+obj-$(CONFIG_CPU_S3C6400)	+= s3c6400.o
 
 # Machine support
 
diff --git a/arch/arm/mach-s3c6400/s3c6400.c b/arch/arm/mach-s3c6400/s3c6400.c
new file mode 100644
index 0000000..d11a427
--- /dev/null
+++ b/arch/arm/mach-s3c6400/s3c6400.c
@@ -0,0 +1,83 @@
+/* linux/arch/arm/mach-s3c6410/cpu.c
+ *
+ * Copyright 2009 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/sysdev.h>
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+
+#include <mach/hardware.h>
+#include <asm/irq.h>
+
+#include <plat/cpu-freq.h>
+#include <plat/regs-serial.h>
+
+#include <plat/cpu.h>
+#include <plat/devs.h>
+#include <plat/clock.h>
+#include <plat/sdhci.h>
+#include <plat/iic-core.h>
+#include <plat/s3c6400.h>
+
+void __init s3c6400_map_io(void)
+{
+	/* the i2c device is directly compatible with s3c2440 */
+	s3c_i2c0_setname("s3c2440-i2c");
+}
+
+void __init s3c6400_init_clocks(int xtal)
+{
+	printk(KERN_DEBUG "%s: initialising clocks\n", __func__);
+	s3c24xx_register_baseclocks(xtal);
+	s3c64xx_register_clocks();
+	s3c6400_register_clocks();
+	s3c6400_setup_clocks();
+}
+
+void __init s3c6400_init_irq(void)
+{
+	/* VIC0 does not have IRQS 5..7,
+	 * VIC1 is fully populated. */
+	s3c64xx_init_irq(~0 & ~(0xf << 5), ~0);
+}
+
+struct sysdev_class s3c6400_sysclass = {
+	.name	= "s3c6400-core",
+};
+
+static struct sys_device s3c6400_sysdev = {
+	.cls	= &s3c6400_sysclass,
+};
+
+static int __init s3c6400_core_init(void)
+{
+	return sysdev_class_register(&s3c6400_sysclass);
+}
+
+core_initcall(s3c6400_core_init);
+
+int __init s3c6400_init(void)
+{
+	printk("S3C6400: Initialising architecture\n");
+
+	return sysdev_register(&s3c6400_sysdev);
+}
diff --git a/arch/arm/plat-s3c64xx/include/plat/s3c6400.h b/arch/arm/plat-s3c64xx/include/plat/s3c6400.h
index 571eaa2..e122966 100644
--- a/arch/arm/plat-s3c64xx/include/plat/s3c6400.h
+++ b/arch/arm/plat-s3c64xx/include/plat/s3c6400.h
@@ -21,6 +21,7 @@ extern void s3c6400_setup_clocks(void);
 #ifdef CONFIG_CPU_S3C6400
 
 extern  int s3c6400_init(void);
+extern void s3c6400_init_irq(void);
 extern void s3c6400_map_io(void);
 extern void s3c6400_init_clocks(int xtal);
 
-- 
cgit v0.10.2


From e074f9803227236252c8e7be16d836d709abff57 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 24 Mar 2009 17:31:42 +0000
Subject: [ARM] S3C64XX: Add S3C6400 CPU detection.

Add detection support for the S3C6400 SoC which has it's
id register in a different place to the S3C6410.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c64xx/cpu.c b/arch/arm/plat-s3c64xx/cpu.c
index f9fdaab..19f68b5 100644
--- a/arch/arm/plat-s3c64xx/cpu.c
+++ b/arch/arm/plat-s3c64xx/cpu.c
@@ -126,6 +126,14 @@ void __init s3c64xx_init_io(struct map_desc *mach_desc, int size)
 	iotable_init(mach_desc, size);
 
 	idcode = __raw_readl(S3C_VA_SYS + 0x118);
+	if (!idcode) {
+		/* S3C6400 has the ID register in a different place,
+		 * and needs a write before it can be read. */
+
+		__raw_writel(0x0, S3C_VA_SYS + 0xA1C);
+		idcode = __raw_readl(S3C_VA_SYS + 0xA1C);
+	}
+
 	s3c_init_cpu(idcode, cpu_ids, ARRAY_SIZE(cpu_ids));
 }
 
-- 
cgit v0.10.2


From 4faf6867638cc21aa43b4ca4ed0bdf14a2d29762 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 25 Mar 2009 11:01:24 +0000
Subject: [ARM] S3C64XX: Add S3C6400 SDHCI setup support

Add support for S3C6400 SDHCI channels 0 and 1, making
the GPIO code common to both S3C6400 and S3C6410.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/Kconfig b/arch/arm/mach-s3c6400/Kconfig
index f0292a8..f5af212 100644
--- a/arch/arm/mach-s3c6400/Kconfig
+++ b/arch/arm/mach-s3c6400/Kconfig
@@ -14,11 +14,18 @@ config CPU_S3C6400
 	help
 	  Enable S3C6400 CPU support
 
+config S3C6400_SETUP_SDHCI
+	bool
+	help
+	  Internal configuration for default SDHCI
+	  setup for S3C6400.
+
 # S36400 Macchine support
 
 config MACH_SMDK6400
        bool "SMDK6400"
 	select CPU_S3C6400
 	select S3C_DEV_HSMMC
+	select S3C6400_SETUP_SDHCI
 	help
 	  Machine support for the Samsung SMDK6400
diff --git a/arch/arm/mach-s3c6400/Makefile b/arch/arm/mach-s3c6400/Makefile
index 07eac2b..df1ce4a 100644
--- a/arch/arm/mach-s3c6400/Makefile
+++ b/arch/arm/mach-s3c6400/Makefile
@@ -14,6 +14,10 @@ obj-				:=
 
 obj-$(CONFIG_CPU_S3C6400)	+= s3c6400.o
 
+# setup support
+
+obj-$(CONFIG_S3C6400_SETUP_SDHCI) += setup-sdhci.o
+
 # Machine support
 
 obj-$(CONFIG_MACH_SMDK6400)	+= mach-smdk6400.o
diff --git a/arch/arm/mach-s3c6400/s3c6400.c b/arch/arm/mach-s3c6400/s3c6400.c
index d11a427..bd17f3d 100644
--- a/arch/arm/mach-s3c6400/s3c6400.c
+++ b/arch/arm/mach-s3c6400/s3c6400.c
@@ -40,7 +40,12 @@
 
 void __init s3c6400_map_io(void)
 {
-	/* the i2c device is directly compatible with s3c2440 */
+	/* setup SDHCI */
+
+	s3c6400_default_sdhci0();
+	s3c6400_default_sdhci1();
+
+	/* the i2c devices are directly compatible with s3c2440 */
 	s3c_i2c0_setname("s3c2440-i2c");
 }
 
diff --git a/arch/arm/mach-s3c6400/setup-sdhci.c b/arch/arm/mach-s3c6400/setup-sdhci.c
new file mode 100644
index 0000000..b93dafb
--- /dev/null
+++ b/arch/arm/mach-s3c6400/setup-sdhci.c
@@ -0,0 +1,63 @@
+/* linux/arch/arm/mach-s3c6410/setup-sdhci.c
+ *
+ * Copyright 2008 Simtec Electronics
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C6410 - Helper functions for settign up SDHCI device(s) (HSMMC)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+
+#include <plat/regs-sdhci.h>
+#include <plat/sdhci.h>
+
+/* clock sources for the mmc bus clock, order as for the ctrl2[5..4] */
+
+char *s3c6400_hsmmc_clksrcs[4] = {
+	[0] = "hsmmc",
+	[1] = "hsmmc",
+	[2] = "mmc_bus",
+	/* [3] = "48m", - note not succesfully used yet */
+};
+
+void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev,
+				  void __iomem *r,
+				  struct mmc_ios *ios,
+				  struct mmc_card *card)
+{
+	u32 ctrl2, ctrl3;
+
+	ctrl2 = readl(r + S3C_SDHCI_CONTROL2);
+	ctrl2 &= S3C_SDHCI_CTRL2_SELBASECLK_MASK;
+	ctrl2 |= (S3C64XX_SDHCI_CTRL2_ENSTAASYNCCLR |
+		  S3C64XX_SDHCI_CTRL2_ENCMDCNFMSK |
+		  S3C_SDHCI_CTRL2_ENFBCLKRX |
+		  S3C_SDHCI_CTRL2_DFCNT_NONE |
+		  S3C_SDHCI_CTRL2_ENCLKOUTHOLD);
+
+	if (ios->clock < 25 * 1000000)
+		ctrl3 = (S3C_SDHCI_CTRL3_FCSEL3 |
+			 S3C_SDHCI_CTRL3_FCSEL2 |
+			 S3C_SDHCI_CTRL3_FCSEL1 |
+			 S3C_SDHCI_CTRL3_FCSEL0);
+	else
+		ctrl3 = (S3C_SDHCI_CTRL3_FCSEL1 | S3C_SDHCI_CTRL3_FCSEL0);
+
+	printk(KERN_INFO "%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3);
+	writel(ctrl2, r + S3C_SDHCI_CONTROL2);
+	writel(ctrl3, r + S3C_SDHCI_CONTROL3);
+}
+
diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index 0c8df97..4f7ffc7 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -16,6 +16,7 @@ config CPU_S3C6410
 
 config S3C6410_SETUP_SDHCI
 	bool
+	select S3C64XX_SETUP_SDHCI_GPIO
 	help
 	  Internal helper functions for S3C6410 based SDHCI systems
 
diff --git a/arch/arm/mach-s3c6410/setup-sdhci.c b/arch/arm/mach-s3c6410/setup-sdhci.c
index 0b5788b..20666f3 100644
--- a/arch/arm/mach-s3c6410/setup-sdhci.c
+++ b/arch/arm/mach-s3c6410/setup-sdhci.c
@@ -21,8 +21,6 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 
-#include <mach/gpio.h>
-#include <plat/gpio-cfg.h>
 #include <plat/regs-sdhci.h>
 #include <plat/sdhci.h>
 
@@ -35,22 +33,6 @@ char *s3c6410_hsmmc_clksrcs[4] = {
 	/* [3] = "48m", - note not succesfully used yet */
 };
 
-void s3c6410_setup_sdhci0_cfg_gpio(struct platform_device *dev, int width)
-{
-	unsigned int gpio;
-	unsigned int end;
-
-	end = S3C64XX_GPG(2 + width);
-
-	/* Set all the necessary GPG pins to special-function 0 */
-	for (gpio = S3C64XX_GPG(0); gpio < end; gpio++) {
-		s3c_gpio_cfgpin(gpio, S3C_GPIO_SFN(2));
-		s3c_gpio_setpull(gpio, S3C_GPIO_PULL_NONE);
-	}
-
-	s3c_gpio_setpull(S3C64XX_GPG(6), S3C_GPIO_PULL_UP);
-	s3c_gpio_cfgpin(S3C64XX_GPG(6), S3C_GPIO_SFN(2));
-}
 
 void s3c6410_setup_sdhci0_cfg_card(struct platform_device *dev,
 				    void __iomem *r,
@@ -84,19 +66,3 @@ void s3c6410_setup_sdhci0_cfg_card(struct platform_device *dev,
 	writel(ctrl3, r + S3C_SDHCI_CONTROL3);
 }
 
-void s3c6410_setup_sdhci1_cfg_gpio(struct platform_device *dev, int width)
-{
-	unsigned int gpio;
-	unsigned int end;
-
-	end = S3C64XX_GPH(2 + width);
-
-	/* Set all the necessary GPG pins to special-function 0 */
-	for (gpio = S3C64XX_GPH(0); gpio < end; gpio++) {
-		s3c_gpio_cfgpin(gpio, S3C_GPIO_SFN(2));
-		s3c_gpio_setpull(gpio, S3C_GPIO_PULL_NONE);
-	}
-
-	s3c_gpio_setpull(S3C64XX_GPG(6), S3C_GPIO_PULL_UP);
-	s3c_gpio_cfgpin(S3C64XX_GPG(6), S3C_GPIO_SFN(3));
-}
diff --git a/arch/arm/plat-s3c/include/plat/sdhci.h b/arch/arm/plat-s3c/include/plat/sdhci.h
index c4ca392..f615308 100644
--- a/arch/arm/plat-s3c/include/plat/sdhci.h
+++ b/arch/arm/plat-s3c/include/plat/sdhci.h
@@ -67,12 +67,52 @@ extern struct s3c_sdhci_platdata s3c_hsmmc1_def_platdata;
 
 /* Helper function availablity */
 
+extern void s3c64xx_setup_sdhci0_cfg_gpio(struct platform_device *, int w);
+extern void s3c64xx_setup_sdhci1_cfg_gpio(struct platform_device *, int w);
+
+/* S3C6400 SDHCI setup */
+
+#ifdef CONFIG_S3C6400_SETUP_SDHCI
+extern char *s3c6400_hsmmc_clksrcs[4];
+
+#ifdef CONFIG_S3C_DEV_HSMMC
+extern void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev,
+					 void __iomem *r,
+					 struct mmc_ios *ios,
+					 struct mmc_card *card);
+
+static inline void s3c6400_default_sdhci0(void)
+{
+	s3c_hsmmc0_def_platdata.clocks = s3c6400_hsmmc_clksrcs;
+	s3c_hsmmc0_def_platdata.cfg_gpio = s3c64xx_setup_sdhci0_cfg_gpio;
+	s3c_hsmmc0_def_platdata.cfg_card = s3c6400_setup_sdhci_cfg_card;
+}
+
+#else
+static inline void s3c6400_default_sdhci0(void) { }
+#endif  /* CONFIG_S3C_DEV_HSMMC */
+
+#ifdef CONFIG_S3C_DEV_HSMMC1
+static inline void s3c6400_default_sdhci1(void)
+{
+	s3c_hsmmc1_def_platdata.clocks = s3c6400_hsmmc_clksrcs;
+	s3c_hsmmc1_def_platdata.cfg_gpio = s3c64xx_setup_sdhci1_cfg_gpio;
+	s3c_hsmmc1_def_platdata.cfg_card = s3c6400_setup_sdhci_cfg_card;
+}
+#else
+static inline void s3c6400_default_sdhci1(void) { }
+#endif /* CONFIG_S3C_DEV_HSMMC1 */
+
+#else
+static inline void s3c6400_default_sdhci0(void) { }
+static inline void s3c6400_default_sdhci1(void) { }
+#endif /* CONFIG_S3C6400_SETUP_SDHCI */
+
+/* S3C6410 SDHCI setup */
+
 #ifdef CONFIG_S3C6410_SETUP_SDHCI
 extern char *s3c6410_hsmmc_clksrcs[4];
 
-extern void s3c6410_setup_sdhci0_cfg_gpio(struct platform_device *, int w);
-extern void s3c6410_setup_sdhci1_cfg_gpio(struct platform_device *, int w);
-
 extern void s3c6410_setup_sdhci0_cfg_card(struct platform_device *dev,
 					   void __iomem *r,
 					   struct mmc_ios *ios,
@@ -82,7 +122,7 @@ extern void s3c6410_setup_sdhci0_cfg_card(struct platform_device *dev,
 static inline void s3c6410_default_sdhci0(void)
 {
 	s3c_hsmmc0_def_platdata.clocks = s3c6410_hsmmc_clksrcs;
-	s3c_hsmmc0_def_platdata.cfg_gpio = s3c6410_setup_sdhci0_cfg_gpio;
+	s3c_hsmmc0_def_platdata.cfg_gpio = s3c64xx_setup_sdhci0_cfg_gpio;
 	s3c_hsmmc0_def_platdata.cfg_card = s3c6410_setup_sdhci0_cfg_card;
 }
 #else
@@ -93,7 +133,7 @@ static inline void s3c6410_default_sdhci0(void) { }
 static inline void s3c6410_default_sdhci1(void)
 {
 	s3c_hsmmc1_def_platdata.clocks = s3c6410_hsmmc_clksrcs;
-	s3c_hsmmc1_def_platdata.cfg_gpio = s3c6410_setup_sdhci1_cfg_gpio;
+	s3c_hsmmc1_def_platdata.cfg_gpio = s3c64xx_setup_sdhci1_cfg_gpio;
 	s3c_hsmmc1_def_platdata.cfg_card = s3c6410_setup_sdhci0_cfg_card;
 }
 #else
diff --git a/arch/arm/plat-s3c64xx/Kconfig b/arch/arm/plat-s3c64xx/Kconfig
index ad48fd73..eb088db 100644
--- a/arch/arm/plat-s3c64xx/Kconfig
+++ b/arch/arm/plat-s3c64xx/Kconfig
@@ -60,4 +60,9 @@ config S3C64XX_SETUP_FB_24BPP
 	help
 	  Common setup code for S3C64XX with an 24bpp RGB display helper.
 
+config S3C64XX_SETUP_SDHCI_GPIO
+	bool
+	help
+	  Common setup code for S3C64XX SDHCI GPIO configurations
+
 endif
diff --git a/arch/arm/plat-s3c64xx/Makefile b/arch/arm/plat-s3c64xx/Makefile
index 12ac7a5..030050f 100644
--- a/arch/arm/plat-s3c64xx/Makefile
+++ b/arch/arm/plat-s3c64xx/Makefile
@@ -35,3 +35,4 @@ obj-$(CONFIG_PM)		+= irq-pm.o
 obj-$(CONFIG_S3C64XX_SETUP_I2C0) += setup-i2c0.o
 obj-$(CONFIG_S3C64XX_SETUP_I2C1) += setup-i2c1.o
 obj-$(CONFIG_S3C64XX_SETUP_FB_24BPP) += setup-fb-24bpp.o
+obj-$(CONFIG_S3C64XX_SETUP_SDHCI_GPIO) += setup-sdhci-gpio.o
\ No newline at end of file
diff --git a/arch/arm/plat-s3c64xx/setup-sdhci-gpio.c b/arch/arm/plat-s3c64xx/setup-sdhci-gpio.c
new file mode 100644
index 0000000..5417123
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/setup-sdhci-gpio.c
@@ -0,0 +1,55 @@
+/* linux/arch/arm/plat-s3c64xx/setup-sdhci-gpio.c
+ *
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX - Helper functions for setting up SDHCI device(s) GPIO (HSMMC)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+
+#include <mach/gpio.h>
+#include <plat/gpio-cfg.h>
+
+void s3c64xx_setup_sdhci0_cfg_gpio(struct platform_device *dev, int width)
+{
+	unsigned int gpio;
+	unsigned int end;
+
+	end = S3C64XX_GPG(2 + width);
+
+	/* Set all the necessary GPG pins to special-function 0 */
+	for (gpio = S3C64XX_GPG(0); gpio < end; gpio++) {
+		s3c_gpio_cfgpin(gpio, S3C_GPIO_SFN(2));
+		s3c_gpio_setpull(gpio, S3C_GPIO_PULL_NONE);
+	}
+
+	s3c_gpio_setpull(S3C64XX_GPG(6), S3C_GPIO_PULL_UP);
+	s3c_gpio_cfgpin(S3C64XX_GPG(6), S3C_GPIO_SFN(2));
+}
+
+void s3c64xx_setup_sdhci1_cfg_gpio(struct platform_device *dev, int width)
+{
+	unsigned int gpio;
+	unsigned int end;
+
+	end = S3C64XX_GPH(2 + width);
+
+	/* Set all the necessary GPG pins to special-function 0 */
+	for (gpio = S3C64XX_GPH(0); gpio < end; gpio++) {
+		s3c_gpio_cfgpin(gpio, S3C_GPIO_SFN(2));
+		s3c_gpio_setpull(gpio, S3C_GPIO_PULL_NONE);
+	}
+
+	s3c_gpio_setpull(S3C64XX_GPG(6), S3C_GPIO_PULL_UP);
+	s3c_gpio_cfgpin(S3C64XX_GPG(6), S3C_GPIO_SFN(3));
+}
-- 
cgit v0.10.2


From 19e3f4858d4a2863ad66b2c92d0f9270879eac04 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 23 Apr 2009 17:26:15 +0100
Subject: [ARM] SMDK6400: Fix WM8753 i2c board definition

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/mach-smdk6400.c b/arch/arm/mach-s3c6400/mach-smdk6400.c
index d020a2a..ab19285 100644
--- a/arch/arm/mach-s3c6400/mach-smdk6400.c
+++ b/arch/arm/mach-s3c6400/mach-smdk6400.c
@@ -73,7 +73,7 @@ static struct platform_device *smdk6400_devices[] __initdata = {
 };
 
 static struct i2c_board_info i2c_devs[] __initdata = {
-	{ I2C_BOARD_INFO("WM8753", 0x1A), },
+	{ I2C_BOARD_INFO("wm8753", 0x1A), },
 	{ I2C_BOARD_INFO("24c08", 0x50), },
 };
 
-- 
cgit v0.10.2


From e2c977dca2901012ddabdc2a7b3c47a80c94d431 Mon Sep 17 00:00:00 2001
From: Werner Almesberger <werner@openmoko.org>
Date: Thu, 5 Mar 2009 11:43:14 +0800
Subject: [ARM] S3C6410: Add CAMIF clock

Add camera interface clock to S3C6410.

Signed-off-by: Werner Almesberger <werner@openmoko.org>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c64xx/s3c6400-clock.c b/arch/arm/plat-s3c64xx/s3c6400-clock.c
index 05b1752..6dd187d 100644
--- a/arch/arm/plat-s3c64xx/s3c6400-clock.c
+++ b/arch/arm/plat-s3c64xx/s3c6400-clock.c
@@ -520,6 +520,33 @@ static struct clksrc_clk clk_irda = {
 	.reg_divider	= S3C_CLK_DIV2,
 };
 
+static struct clk *clkset_camif_list[] = {
+	&clk_h2,
+};
+
+static struct clk_sources clkset_camif = {
+	.sources	= clkset_camif_list,
+	.nr_sources	= ARRAY_SIZE(clkset_camif_list),
+};
+
+static struct clksrc_clk clk_camif = {
+	.clk	= {
+		.name		= "camera",
+		.id		= -1,
+		.ctrlbit        = S3C_CLKCON_SCLK_CAM,
+		.enable		= s3c64xx_sclk_ctrl,
+		.set_parent	= s3c64xx_setparent_clksrc,
+		.get_rate	= s3c64xx_getrate_clksrc,
+		.set_rate	= s3c64xx_setrate_clksrc,
+		.round_rate	= s3c64xx_roundrate_clksrc,
+	},
+	.shift		= 0,
+	.mask		= 0,
+	.sources	= &clkset_camif,
+	.divider_shift	= S3C6400_CLKDIV0_CAM_SHIFT,
+	.reg_divider	= S3C_CLK_DIV0,
+};
+
 /* Clock initialisation code */
 
 static struct clksrc_clk *init_parents[] = {
@@ -536,6 +563,7 @@ static struct clksrc_clk *init_parents[] = {
 	&clk_audio0,
 	&clk_audio1,
 	&clk_irda,
+	&clk_camif,
 };
 
 static void __init_or_cpufreq s3c6400_set_clksrc(struct clksrc_clk *clk)
@@ -635,6 +663,7 @@ static struct clk *clks[] __initdata = {
 	&clk_audio0.clk,
 	&clk_audio1.clk,
 	&clk_irda.clk,
+	&clk_camif.clk,
 };
 
 void __init s3c6400_register_clocks(void)
-- 
cgit v0.10.2


From a03f7daf6df31b4d849031c51a3c10b8ba78ce67 Mon Sep 17 00:00:00 2001
From: Werner Almesberger <werner@openmoko.org>
Date: Thu, 5 Mar 2009 11:43:13 +0800
Subject: [ARM] S3C64XX: Add HCLKx2

Add doubled HCLK to S3C64xx.

Signed-off-by: Werner Almesberger <werner@openmoko.org>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/clock.h b/arch/arm/plat-s3c/include/plat/clock.h
index a10622e..d86af84 100644
--- a/arch/arm/plat-s3c/include/plat/clock.h
+++ b/arch/arm/plat-s3c/include/plat/clock.h
@@ -50,6 +50,7 @@ extern struct clk clk_xtal;
 extern struct clk clk_ext;
 
 /* S3C64XX specific clocks */
+extern struct clk clk_h2;
 extern struct clk clk_27m;
 extern struct clk clk_48m;
 
diff --git a/arch/arm/plat-s3c64xx/clock.c b/arch/arm/plat-s3c64xx/clock.c
index ad1b968..679076f 100644
--- a/arch/arm/plat-s3c64xx/clock.c
+++ b/arch/arm/plat-s3c64xx/clock.c
@@ -27,6 +27,12 @@
 #include <plat/devs.h>
 #include <plat/clock.h>
 
+struct clk clk_h2 = {
+	.name		= "hclk2",
+	.id		= -1,
+	.rate		= 0,
+};
+
 struct clk clk_27m = {
 	.name		= "clk_27m",
 	.id		= -1,
@@ -246,6 +252,7 @@ static struct clk *clks[] __initdata = {
 	&clk_epll,
 	&clk_27m,
 	&clk_48m,
+	&clk_h2,
 };
 
 void __init s3c64xx_register_clocks(void)
diff --git a/arch/arm/plat-s3c64xx/s3c6400-clock.c b/arch/arm/plat-s3c64xx/s3c6400-clock.c
index 6dd187d..96fa9ea 100644
--- a/arch/arm/plat-s3c64xx/s3c6400-clock.c
+++ b/arch/arm/plat-s3c64xx/s3c6400-clock.c
@@ -636,6 +636,7 @@ void __init_or_cpufreq s3c6400_setup_clocks(void)
 	clk_fout_epll.rate = epll;
 	clk_fout_apll.rate = apll;
 
+	clk_h2.rate = hclk2;
 	clk_h.rate = hclk;
 	clk_p.rate = pclk;
 	clk_f.rate = fclk;
-- 
cgit v0.10.2


From 8f1ecf1d965f4e1842b32af5dd40f3d7b6d407ed Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 28 Apr 2009 16:06:24 +0100
Subject: [ARM] S3C64XX: Configure clocks for DMA controller

Add missing DMA controller block clocks.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c64xx/clock.c b/arch/arm/plat-s3c64xx/clock.c
index 679076f..0bc2fa1 100644
--- a/arch/arm/plat-s3c64xx/clock.c
+++ b/arch/arm/plat-s3c64xx/clock.c
@@ -158,6 +158,18 @@ static struct clk init_clocks_disable[] = {
 		.parent		= &clk_48m,
 		.enable		= s3c64xx_sclk_ctrl,
 		.ctrlbit	= S3C_CLKCON_SCLK_MMC2_48,
+	}, {
+		.name		= "dma0",
+		.id		= -1,
+		.parent		= &clk_h,
+		.enable		= s3c64xx_hclk_ctrl,
+		.ctrlbit	= S3C_CLKCON_HCLK_DMA0,
+	}, {
+		.name		= "dma1",
+		.id		= -1,
+		.parent		= &clk_h,
+		.enable		= s3c64xx_hclk_ctrl,
+		.ctrlbit	= S3C_CLKCON_HCLK_DMA1,
 	},
 };
 
-- 
cgit v0.10.2


From 496a3f0927197ca155a604478bf6e7db3eca3bbd Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sat, 2 May 2009 13:48:53 +0100
Subject: [ARM] S3C64XX: Add ARM clock

Add ARM clock to provide 'arm' from the APLL to the ARM core.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/s3c6400.c b/arch/arm/mach-s3c6400/s3c6400.c
index bd17f3d..1ece887 100644
--- a/arch/arm/mach-s3c6400/s3c6400.c
+++ b/arch/arm/mach-s3c6400/s3c6400.c
@@ -30,6 +30,7 @@
 
 #include <plat/cpu-freq.h>
 #include <plat/regs-serial.h>
+#include <plat/regs-clock.h>
 
 #include <plat/cpu.h>
 #include <plat/devs.h>
@@ -54,7 +55,7 @@ void __init s3c6400_init_clocks(int xtal)
 	printk(KERN_DEBUG "%s: initialising clocks\n", __func__);
 	s3c24xx_register_baseclocks(xtal);
 	s3c64xx_register_clocks();
-	s3c6400_register_clocks();
+	s3c6400_register_clocks(S3C6400_CLKDIV0_ARM_MASK);
 	s3c6400_setup_clocks();
 }
 
diff --git a/arch/arm/mach-s3c6410/cpu.c b/arch/arm/mach-s3c6410/cpu.c
index 6a73ca6..ade904d 100644
--- a/arch/arm/mach-s3c6410/cpu.c
+++ b/arch/arm/mach-s3c6410/cpu.c
@@ -31,6 +31,7 @@
 
 #include <plat/cpu-freq.h>
 #include <plat/regs-serial.h>
+#include <plat/regs-clock.h>
 
 #include <plat/cpu.h>
 #include <plat/devs.h>
@@ -68,7 +69,7 @@ void __init s3c6410_init_clocks(int xtal)
 	printk(KERN_DEBUG "%s: initialising clocks\n", __func__);
 	s3c24xx_register_baseclocks(xtal);
 	s3c64xx_register_clocks();
-	s3c6400_register_clocks();
+	s3c6400_register_clocks(S3C6410_CLKDIV0_ARM_MASK);
 	s3c6400_setup_clocks();
 }
 
diff --git a/arch/arm/plat-s3c64xx/include/plat/s3c6400.h b/arch/arm/plat-s3c64xx/include/plat/s3c6400.h
index e122966..11f2e1e 100644
--- a/arch/arm/plat-s3c64xx/include/plat/s3c6400.h
+++ b/arch/arm/plat-s3c64xx/include/plat/s3c6400.h
@@ -15,7 +15,7 @@
 /* Common init code for S3C6400 related SoCs */
 
 extern void s3c6400_common_init_uarts(struct s3c2410_uartcfg *cfg, int no);
-extern void s3c6400_register_clocks(void);
+extern void s3c6400_register_clocks(unsigned armclk_divlimit);
 extern void s3c6400_setup_clocks(void);
 
 #ifdef CONFIG_CPU_S3C6400
diff --git a/arch/arm/plat-s3c64xx/s3c6400-clock.c b/arch/arm/plat-s3c64xx/s3c6400-clock.c
index 96fa9ea..1debc1f 100644
--- a/arch/arm/plat-s3c64xx/s3c6400-clock.c
+++ b/arch/arm/plat-s3c64xx/s3c6400-clock.c
@@ -133,6 +133,65 @@ static struct clksrc_clk clk_mout_mpll = {
 	.sources	= &clk_src_mpll,
 };
 
+static unsigned int armclk_mask;
+
+static unsigned long s3c64xx_clk_arm_get_rate(struct clk *clk)
+{
+	unsigned long rate = clk_get_rate(clk->parent);
+	u32 clkdiv;
+
+	/* divisor mask starts at bit0, so no need to shift */
+	clkdiv = __raw_readl(S3C_CLK_DIV0) & armclk_mask;
+
+	return rate / (clkdiv + 1);
+}
+
+static unsigned long s3c64xx_clk_arm_round_rate(struct clk *clk,
+						unsigned long rate)
+{
+	unsigned long parent = clk_get_rate(clk->parent);
+	u32 div;
+
+	if (parent < rate)
+		return rate;
+
+	div = (parent / rate) - 1;
+	if (div > armclk_mask)
+		div = armclk_mask;
+
+	return parent / (div + 1);
+}
+
+static int s3c64xx_clk_arm_set_rate(struct clk *clk, unsigned long rate)
+{
+	unsigned long parent = clk_get_rate(clk->parent);
+	u32 div;
+	u32 val;
+
+	if (rate < parent / (armclk_mask + 1))
+		return -EINVAL;
+
+	rate = clk_round_rate(clk, rate);
+	div = clk_get_rate(clk->parent) / rate;
+
+	val = __raw_readl(S3C_CLK_DIV0);
+	val &= armclk_mask;
+	val |= (div - 1);
+	__raw_writel(val, S3C_CLK_DIV0);
+
+	return 0;
+
+}
+
+static struct clk clk_arm = {
+	.name		= "armclk",
+	.id		= -1,
+	.parent		= &clk_mout_apll.clk,
+	.get_rate	= s3c64xx_clk_arm_get_rate,
+	.set_rate	= s3c64xx_clk_arm_set_rate,
+	.round_rate	= s3c64xx_clk_arm_round_rate,
+};
+
 static unsigned long s3c64xx_clk_doutmpll_get_rate(struct clk *clk)
 {
 	unsigned long rate = clk_get_rate(clk->parent);
@@ -665,14 +724,29 @@ static struct clk *clks[] __initdata = {
 	&clk_audio1.clk,
 	&clk_irda.clk,
 	&clk_camif.clk,
+	&clk_arm,
 };
 
-void __init s3c6400_register_clocks(void)
+/**
+ * s3c6400_register_clocks - register clocks for s3c6400 and above
+ * @armclk_divlimit: Divisor mask for ARMCLK
+ *
+ * Register the clocks for the S3C6400 and above SoC range, such
+ * as ARMCLK and the clocks which have divider chains attached.
+ *
+ * This call does not setup the clocks, which is left to the
+ * s3c6400_setup_clocks() call which may be needed by the cpufreq
+ * or resume code to re-set the clocks if the bootloader has changed
+ * them.
+ */
+void __init s3c6400_register_clocks(unsigned armclk_divlimit)
 {
 	struct clk *clkp;
 	int ret;
 	int ptr;
 
+	armclk_mask = armclk_divlimit;
+
 	for (ptr = 0; ptr < ARRAY_SIZE(clks); ptr++) {
 		clkp = clks[ptr];
 		ret = s3c24xx_register_clock(clkp);
-- 
cgit v0.10.2


From 41d5545d23d2ccf4d34725094dccebd37f15c1c4 Mon Sep 17 00:00:00 2001
From: Kacper Szczesniak <kacper@qwe.pl>
Date: Thu, 7 May 2009 12:47:43 +0200
Subject: ALSA: hda - Add support for MacBook 5.1 (Aluminium)

Signed-off-by: Kacper Szczesniak <kacper@qwe.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3e7207b..b949534 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -205,6 +205,7 @@ enum {
 	ALC882_ASUS_A7M,
 	ALC885_MACPRO,
 	ALC885_MBP3,
+	ALC885_MB5,
 	ALC885_IMAC24,
 	ALC882_AUTO,
 	ALC882_MODEL_LAST,
@@ -6164,6 +6165,16 @@ static struct hda_input_mux alc882_capture_source = {
 		{ "CD", 0x4 },
 	},
 };
+
+static struct hda_input_mux mb5_capture_source = {
+	.num_items = 3,
+	.items = {
+		{ "Mic", 0x1 },
+		{ "Line", 0x2 },
+		{ "CD", 0x4 },
+	},
+};
+
 /*
  * 2ch mode
  */
@@ -6293,6 +6304,20 @@ static struct snd_kcontrol_new alc885_mbp3_mixer[] = {
 	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0x00, HDA_INPUT),
 	{ } /* end */
 };
+
+static struct snd_kcontrol_new alc885_mb5_mixer[] = {
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0d, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("Front Playback Switch", 0x0d, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line-Out Playback Volume", 0x0c, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("Line-Out Playback Switch", 0x0c, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE  ("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE  ("Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Boost", 0x15, 0x00, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x19, 0x00, HDA_INPUT),
+	{ } /* end */
+};
 static struct snd_kcontrol_new alc882_w2jc_mixer[] = {
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
@@ -6520,6 +6545,38 @@ static struct hda_verb alc882_macpro_init_verbs[] = {
 	{ }
 };
 
+/* Macbook 5,1 */
+static struct hda_verb alc885_mb5_init_verbs[] = {
+	/* Front mixer */
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* LineOut mixer */
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* Front Pin: output 0 (0x0d) */
+	{0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x01},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x18, AC_VERB_SET_CONNECT_SEL, 0x01},
+	/* HP Pin: output 0 (0x0c) */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x14, AC_VERB_SET_CONNECT_SEL, 0x00},
+	/* Front Mic pin: input vref at 80% */
+	{0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+	/* Line In pin */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+	{ }
+};
+
 /* Macbook Pro rev3 */
 static struct hda_verb alc885_mbp3_init_verbs[] = {
 	/* Front mixer: unmute input/output amp left and right (volume = 0) */
@@ -6864,6 +6921,7 @@ static const char *alc882_models[ALC882_MODEL_LAST] = {
 	[ALC882_ASUS_A7J]	= "asus-a7j",
 	[ALC882_ASUS_A7M]	= "asus-a7m",
 	[ALC885_MACPRO]		= "macpro",
+	[ALC885_MB5]		= "mb5",
 	[ALC885_MBP3]		= "mbp3",
 	[ALC885_IMAC24]		= "imac24",
 	[ALC882_AUTO]		= "auto",
@@ -6944,6 +7002,18 @@ static struct alc_config_preset alc882_presets[] = {
 		.unsol_event = alc885_mbp3_unsol_event,
 		.init_hook = alc885_mbp3_automute,
 	},
+	[ALC885_MB5] = {
+		.mixers = { alc885_mb5_mixer },
+		.init_verbs = { alc885_mb5_init_verbs,
+				alc880_gpio1_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc882_dac_nids),
+		.dac_nids = alc882_dac_nids,
+		.channel_mode = alc885_mbp_6ch_modes,
+		.num_channel_mode = ARRAY_SIZE(alc885_mbp_6ch_modes),
+		.input_mux = &mb5_capture_source,
+		.dig_out_nid = ALC882_DIGOUT_NID,
+		.dig_in_nid = ALC882_DIGIN_NID,
+	},
 	[ALC885_MACPRO] = {
 		.mixers = { alc882_macpro_mixer },
 		.init_verbs = { alc882_macpro_init_verbs },
@@ -7249,6 +7319,9 @@ static int patch_alc882(struct hda_codec *codec)
 		case 0x106b3800: /* MacbookPro4,1 - latter revision */
 			board_config = ALC885_MBP3;
 			break;
+		case 0x106b3f00: /* Macbook 5,1 */
+			board_config = ALC885_MB5;
+			break;
 		default:
 			/* ALC889A is handled better as ALC888-compatible */
 			if (codec->revision_id == 0x100101 ||
-- 
cgit v0.10.2


From 01eb1753fcafd25304ec9392e0d9d2acb649931b Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 31 Mar 2009 20:51:52 +0200
Subject: MXC irq: remove unused defines

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/irq.c b/arch/arm/plat-mxc/irq.c
index 0fb68a5..931a0ee 100644
--- a/arch/arm/plat-mxc/irq.c
+++ b/arch/arm/plat-mxc/irq.c
@@ -45,11 +45,6 @@
 #define AVIC_FIPNDH		(AVIC_BASE + 0x60)	/* fast int pending high */
 #define AVIC_FIPNDL		(AVIC_BASE + 0x64)	/* fast int pending low */
 
-#define SYSTEM_PREV_REG		IO_ADDRESS(IIM_BASE_ADDR + 0x20)
-#define SYSTEM_SREV_REG		IO_ADDRESS(IIM_BASE_ADDR + 0x24)
-#define IIM_PROD_REV_SH		3
-#define IIM_PROD_REV_LEN	5
-
 int imx_irq_set_priority(unsigned char irq, unsigned char prio)
 {
 #ifdef CONFIG_MXC_IRQ_PRIOR
-- 
cgit v0.10.2


From bca6ef1e53b601ece59250f41b9817eba3afa490 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 1 Apr 2009 11:11:48 +0200
Subject: MXC: Add iomux support for MX35 SoCs

This iomux is called iomux-v3 in the tree because it is the third known
incarnation of MXC iomuxers. It is not only found on the MX35 but also
on the MX51 and probably others.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index 194b842..6d61ef0 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -5,6 +5,7 @@ config ARCH_MX31
 
 config ARCH_MX35
 	bool
+	select ARCH_MXC_IOMUX_V3
 
 comment "MX3 platforms:"
 
diff --git a/arch/arm/plat-mxc/Kconfig b/arch/arm/plat-mxc/Kconfig
index 17d0e99..61acd4c 100644
--- a/arch/arm/plat-mxc/Kconfig
+++ b/arch/arm/plat-mxc/Kconfig
@@ -51,4 +51,6 @@ config MXC_PWM
 	help
 	  Enable support for the i.MX PWM controller(s).
 
+config ARCH_MXC_IOMUX_V3
+	bool
 endif
diff --git a/arch/arm/plat-mxc/Makefile b/arch/arm/plat-mxc/Makefile
index 0554063..e3212c8 100644
--- a/arch/arm/plat-mxc/Makefile
+++ b/arch/arm/plat-mxc/Makefile
@@ -7,4 +7,5 @@ obj-y := irq.o clock.o gpio.o time.o devices.o cpu.o system.o
 
 obj-$(CONFIG_ARCH_MX1) += iomux-mx1-mx2.o dma-mx1-mx2.o
 obj-$(CONFIG_ARCH_MX2) += iomux-mx1-mx2.o dma-mx1-mx2.o
+obj-$(CONFIG_ARCH_MXC_IOMUX_V3) += iomux-v3.o
 obj-$(CONFIG_MXC_PWM)  += pwm.o
diff --git a/arch/arm/plat-mxc/include/mach/iomux-v3.h b/arch/arm/plat-mxc/include/mach/iomux-v3.h
new file mode 100644
index 0000000..7cd8454
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/iomux-v3.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2009 by Jan Weitzel Phytec Messtechnik GmbH,
+ *			<armlinux@phytec.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __MACH_IOMUX_V3_H__
+#define __MACH_IOMUX_V3_H__
+
+/*
+ *	build IOMUX_PAD structure
+ *
+ * This iomux scheme is based around pads, which are the physical balls
+ * on the processor.
+ *
+ * - Each pad has a pad control register (IOMUXC_SW_PAD_CTRL_x) which controls
+ *   things like driving strength and pullup/pulldown.
+ * - Each pad can have but not necessarily does have an output routing register
+ *   (IOMUXC_SW_MUX_CTL_PAD_x).
+ * - Each pad can have but not necessarily does have an input routing register
+ *   (IOMUXC_x_SELECT_INPUT)
+ *
+ * The three register sets do not have a fixed offset to each other,
+ * hence we order this table by pad control registers (which all pads
+ * have) and put the optional i/o routing registers into additional
+ * fields.
+ *
+ * The naming convention for the pad modes is MX35_PAD_<padname>__<padmode>
+ * If <padname> or <padmode> refers to a GPIO, it is named
+ * GPIO_<unit>_<num>
+ *
+ */
+
+struct pad_desc {
+	unsigned mux_ctrl_ofs:12; /* IOMUXC_SW_MUX_CTL_PAD offset */
+	unsigned mux_mode:8;
+	unsigned pad_ctrl_ofs:12; /* IOMUXC_SW_PAD_CTRL offset */
+#define	NO_PAD_CTRL	(1 << 16)
+	unsigned pad_ctrl:17;
+	unsigned select_input_ofs:12; /* IOMUXC_SELECT_INPUT offset */
+	unsigned select_input:3;
+};
+
+#define IOMUX_PAD(_pad_ctrl_ofs, _mux_ctrl_ofs, _mux_mode, _select_input_ofs, \
+		_select_input, _pad_ctrl)				\
+		{							\
+			.mux_ctrl_ofs     = _mux_ctrl_ofs,		\
+			.mux_mode         = _mux_mode,			\
+			.pad_ctrl_ofs     = _pad_ctrl_ofs,		\
+			.pad_ctrl         = _pad_ctrl,			\
+			.select_input_ofs = _select_input_ofs,		\
+			.select_input     = _select_input,		\
+		}
+
+/*
+ * Use to set PAD control
+ */
+#define PAD_CTL_DRIVE_VOLTAGE_3_3_V	0
+#define PAD_CTL_DRIVE_VOLTAGE_1_8_V	1
+
+#define PAD_CTL_NO_HYSTERESIS		0
+#define PAD_CTL_HYSTERESIS		1
+
+#define PAD_CTL_PULL_DISABLED		0x0
+#define PAD_CTL_PULL_KEEPER		0xa
+#define PAD_CTL_PULL_DOWN_100K		0xc
+#define PAD_CTL_PULL_UP_47K		0xd
+#define PAD_CTL_PULL_UP_100K		0xe
+#define PAD_CTL_PULL_UP_22K		0xf
+
+#define PAD_CTL_OUTPUT_CMOS		0
+#define PAD_CTL_OUTPUT_OPEN_DRAIN	1
+
+#define PAD_CTL_DRIVE_STRENGTH_NORM	0
+#define PAD_CTL_DRIVE_STRENGTH_HIGH	1
+#define PAD_CTL_DRIVE_STRENGTH_MAX	2
+
+#define PAD_CTL_SLEW_RATE_SLOW		0
+#define PAD_CTL_SLEW_RATE_FAST		1
+
+/*
+ * setups a single pad:
+ * 	- reserves the pad so that it is not claimed by another driver
+ * 	- setups the iomux according to the configuration
+ */
+int mxc_iomux_v3_setup_pad(struct pad_desc *pad);
+
+/*
+ * setups mutliple pads
+ * convenient way to call the above function with tables
+ */
+int mxc_iomux_v3_setup_multiple_pads(struct pad_desc *pad_list, unsigned count);
+
+/*
+ * releases a single pad:
+ * 	- make it available for a future use by another driver
+ * 	- DOES NOT reconfigure the IOMUX in its reset state
+ */
+void mxc_iomux_v3_release_pad(struct pad_desc *pad);
+
+/*
+ * releases multiple pads
+ * convenvient way to call the above function with tables
+ */
+void mxc_iomux_v3_release_multiple_pads(struct pad_desc *pad_list, int count);
+
+#endif /* __MACH_IOMUX_V3_H__*/
+
diff --git a/arch/arm/plat-mxc/iomux-v3.c b/arch/arm/plat-mxc/iomux-v3.c
new file mode 100644
index 0000000..77a078f
--- /dev/null
+++ b/arch/arm/plat-mxc/iomux-v3.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2004-2006 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright (C) 2008 by Sascha Hauer <kernel@pengutronix.de>
+ * Copyright (C) 2009 by Jan Weitzel Phytec Messtechnik GmbH,
+ *                       <armlinux@phytec.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <mach/hardware.h>
+#include <asm/mach/map.h>
+#include <mach/iomux-v3.h>
+
+#define IOMUX_BASE	IO_ADDRESS(IOMUXC_BASE_ADDR)
+
+static unsigned long iomux_v3_pad_alloc_map[0x200 / BITS_PER_LONG];
+
+/*
+ * setups a single pin:
+ * 	- reserves the pin so that it is not claimed by another driver
+ * 	- setups the iomux according to the configuration
+ */
+int mxc_iomux_v3_setup_pad(struct pad_desc *pad)
+{
+	unsigned int pad_ofs = pad->pad_ctrl_ofs;
+
+	if (test_and_set_bit(pad_ofs >> 2, iomux_v3_pad_alloc_map))
+		return -EBUSY;
+	if (pad->mux_ctrl_ofs)
+		__raw_writel(pad->mux_mode, IOMUX_BASE + pad->mux_ctrl_ofs);
+
+	if (pad->select_input_ofs)
+		__raw_writel(pad->select_input,
+				IOMUX_BASE + pad->select_input_ofs);
+
+	if (!(pad->pad_ctrl & NO_PAD_CTRL))
+		__raw_writel(pad->pad_ctrl, IOMUX_BASE + pad->pad_ctrl_ofs);
+	return 0;
+}
+EXPORT_SYMBOL(mxc_iomux_v3_setup_pad);
+
+int mxc_iomux_v3_setup_multiple_pads(struct pad_desc *pad_list, unsigned count)
+{
+	struct pad_desc *p = pad_list;
+	int i;
+	int ret;
+
+	for (i = 0; i < count; i++) {
+		ret = mxc_iomux_v3_setup_pad(p);
+		if (ret)
+			goto setup_error;
+		p++;
+	}
+	return 0;
+
+setup_error:
+	mxc_iomux_v3_release_multiple_pads(pad_list, i);
+	return ret;
+}
+EXPORT_SYMBOL(mxc_iomux_v3_setup_multiple_pads);
+
+void mxc_iomux_v3_release_pad(struct pad_desc *pad)
+{
+	unsigned int pad_ofs = pad->pad_ctrl_ofs;
+
+	clear_bit(pad_ofs >> 2, iomux_v3_pad_alloc_map);
+}
+EXPORT_SYMBOL(mxc_iomux_v3_release_pad);
+
+void mxc_iomux_v3_release_multiple_pads(struct pad_desc *pad_list, int count)
+{
+	struct pad_desc *p = pad_list;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		mxc_iomux_v3_release_pad(p);
+		p++;
+	}
+}
+EXPORT_SYMBOL(mxc_iomux_v3_release_multiple_pads);
-- 
cgit v0.10.2


From 218deaa743fde30033caeb91fa211063620399de Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 1 Apr 2009 11:13:22 +0200
Subject: MX35: Add iomux pin defintions

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx35.h b/arch/arm/plat-mxc/include/mach/iomux-mx35.h
new file mode 100644
index 0000000..00b0ac1
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/iomux-mx35.h
@@ -0,0 +1,1267 @@
+/*
+ * Copyright (C, NO_PAD_CTRL) 2009 by Jan Weitzel Phytec Messtechnik GmbH <armlinux@phytec.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option, NO_PAD_CTRL) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __MACH_IOMUX_MX35_H__
+#define __MACH_IOMUX_MX35_H__
+
+#include <mach/iomux-v3.h>
+
+/*
+ * The naming convention for the pad modes is MX35_PAD_<padname>__<padmode>
+ * If <padname> or <padmode> refers to a GPIO, it is named
+ * GPIO_<unit>_<num> see also iomux-v3.h
+ */
+
+/*									  PAD    MUX   ALT INPSE PATH */
+#define MX35_PAD_CAPTURE__GPT_CAPIN1				IOMUX_PAD(0x328, 0x004, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CAPTURE__GPT_CMPOUT2				IOMUX_PAD(0x328, 0x004, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CAPTURE__CSPI2_SS1				IOMUX_PAD(0x328, 0x004, 2, 0x7f4, 0, NO_PAD_CTRL)
+#define MX35_PAD_CAPTURE__EPIT1_EPITO				IOMUX_PAD(0x328, 0x004, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CAPTURE__CCM_CLK32K				IOMUX_PAD(0x328, 0x004, 4, 0x7d0, 0, NO_PAD_CTRL)
+#define MX35_PAD_CAPTURE__GPIO1_4				IOMUX_PAD(0x328, 0x004, 5, 0x850, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_COMPARE__GPT_CMPOUT1				IOMUX_PAD(0x32c, 0x008, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_COMPARE__GPT_CAPIN2				IOMUX_PAD(0x32c, 0x008, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_COMPARE__GPT_CMPOUT3				IOMUX_PAD(0x32c, 0x008, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_COMPARE__EPIT2_EPITO				IOMUX_PAD(0x32c, 0x008, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_COMPARE__GPIO1_5				IOMUX_PAD(0x32c, 0x008, 5, 0x854, 0, NO_PAD_CTRL)
+#define MX35_PAD_COMPARE__SDMA_EXTDMA_2				IOMUX_PAD(0x32c, 0x008, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_WDOG_RST__WDOG_WDOG_B				IOMUX_PAD(0x330, 0x00c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_WDOG_RST__IPU_FLASH_STROBE			IOMUX_PAD(0x330, 0x00c, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_WDOG_RST__GPIO1_6				IOMUX_PAD(0x330, 0x00c, 5, 0x858, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_GPIO1_0__GPIO1_0				IOMUX_PAD(0x334, 0x010, 0, 0x82c, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_0__CCM_PMIC_RDY				IOMUX_PAD(0x334, 0x010, 1, 0x7d4, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_0__OWIRE_LINE				IOMUX_PAD(0x334, 0x010, 2, 0x990, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_0__SDMA_EXTDMA_0				IOMUX_PAD(0x334, 0x010, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_GPIO1_1__GPIO1_1				IOMUX_PAD(0x338, 0x014, 0, 0x838, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_1__PWM_PWMO				IOMUX_PAD(0x338, 0x014, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_1__CSPI1_SS2				IOMUX_PAD(0x338, 0x014, 3, 0x7d8, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_1__SCC_TAMPER_DETECT			IOMUX_PAD(0x338, 0x014, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO1_1__SDMA_EXTDMA_1				IOMUX_PAD(0x338, 0x014, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_GPIO2_0__GPIO2_0				IOMUX_PAD(0x33c, 0x018, 0, 0x868, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO2_0__USB_TOP_USBOTG_CLK			IOMUX_PAD(0x33c, 0x018, 1, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_GPIO3_0__GPIO3_0				IOMUX_PAD(0x340, 0x01c, 0, 0x8e8, 0, NO_PAD_CTRL)
+#define MX35_PAD_GPIO3_0__USB_TOP_USBH2_CLK			IOMUX_PAD(0x340, 0x01c, 1, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RESET_IN_B__CCM_RESET_IN_B			IOMUX_PAD(0x344, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_POR_B__CCM_POR_B				IOMUX_PAD(0x348, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CLKO__CCM_CLKO					IOMUX_PAD(0x34c, 0x020, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CLKO__GPIO1_8					IOMUX_PAD(0x34c, 0x020, 5, 0x860, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_BOOT_MODE0__CCM_BOOT_MODE_0			IOMUX_PAD(0x350, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_BOOT_MODE1__CCM_BOOT_MODE_1			IOMUX_PAD(0x354, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CLK_MODE0__CCM_CLK_MODE_0			IOMUX_PAD(0x358, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CLK_MODE1__CCM_CLK_MODE_1			IOMUX_PAD(0x35c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_POWER_FAIL__CCM_DSM_WAKEUP_INT_26		IOMUX_PAD(0x360, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_VSTBY__CCM_VSTBY				IOMUX_PAD(0x364, 0x024, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_VSTBY__GPIO1_7					IOMUX_PAD(0x364, 0x024, 5, 0x85c, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_A0__EMI_EIM_DA_L_0				IOMUX_PAD(0x368, 0x028, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A1__EMI_EIM_DA_L_1				IOMUX_PAD(0x36c, 0x02c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A2__EMI_EIM_DA_L_2				IOMUX_PAD(0x370, 0x030, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A3__EMI_EIM_DA_L_3				IOMUX_PAD(0x374, 0x034, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A4__EMI_EIM_DA_L_4				IOMUX_PAD(0x378, 0x038, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A5__EMI_EIM_DA_L_5				IOMUX_PAD(0x37c, 0x03c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A6__EMI_EIM_DA_L_6				IOMUX_PAD(0x380, 0x040, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A7__EMI_EIM_DA_L_7				IOMUX_PAD(0x384, 0x044, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A8__EMI_EIM_DA_H_8				IOMUX_PAD(0x388, 0x048, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A9__EMI_EIM_DA_H_9				IOMUX_PAD(0x38c, 0x04c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A10__EMI_EIM_DA_H_10				IOMUX_PAD(0x390, 0x050, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_MA10__EMI_MA10					IOMUX_PAD(0x394, 0x054, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A11__EMI_EIM_DA_H_11				IOMUX_PAD(0x398, 0x058, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A12__EMI_EIM_DA_H_12				IOMUX_PAD(0x39c, 0x05c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A13__EMI_EIM_DA_H_13				IOMUX_PAD(0x3a0, 0x060, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A14__EMI_EIM_DA_H2_14				IOMUX_PAD(0x3a4, 0x064, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A15__EMI_EIM_DA_H2_15				IOMUX_PAD(0x3a8, 0x068, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A16__EMI_EIM_A_16				IOMUX_PAD(0x3ac, 0x06c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A17__EMI_EIM_A_17				IOMUX_PAD(0x3b0, 0x070, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A18__EMI_EIM_A_18				IOMUX_PAD(0x3b4, 0x074, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A19__EMI_EIM_A_19				IOMUX_PAD(0x3b8, 0x078, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A20__EMI_EIM_A_20				IOMUX_PAD(0x3bc, 0x07c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A21__EMI_EIM_A_21				IOMUX_PAD(0x3c0, 0x080, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A22__EMI_EIM_A_22				IOMUX_PAD(0x3c4, 0x084, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A23__EMI_EIM_A_23				IOMUX_PAD(0x3c8, 0x088, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A24__EMI_EIM_A_24				IOMUX_PAD(0x3cc, 0x08c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_A25__EMI_EIM_A_25				IOMUX_PAD(0x3d0, 0x090, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDBA1__EMI_EIM_SDBA1				IOMUX_PAD(0x3d4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDBA0__EMI_EIM_SDBA0				IOMUX_PAD(0x3d8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD0__EMI_DRAM_D_0				IOMUX_PAD(0x3dc, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1__EMI_DRAM_D_1				IOMUX_PAD(0x3e0, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2__EMI_DRAM_D_2				IOMUX_PAD(0x3e4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD3__EMI_DRAM_D_3				IOMUX_PAD(0x3e8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD4__EMI_DRAM_D_4				IOMUX_PAD(0x3ec, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD5__EMI_DRAM_D_5				IOMUX_PAD(0x3f0, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD6__EMI_DRAM_D_6				IOMUX_PAD(0x3f4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD7__EMI_DRAM_D_7				IOMUX_PAD(0x3f8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD8__EMI_DRAM_D_8				IOMUX_PAD(0x3fc, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD9__EMI_DRAM_D_9				IOMUX_PAD(0x400, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD10__EMI_DRAM_D_10				IOMUX_PAD(0x404, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD11__EMI_DRAM_D_11				IOMUX_PAD(0x408, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD12__EMI_DRAM_D_12				IOMUX_PAD(0x40c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD13__EMI_DRAM_D_13				IOMUX_PAD(0x410, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD14__EMI_DRAM_D_14				IOMUX_PAD(0x414, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD15__EMI_DRAM_D_15				IOMUX_PAD(0x418, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD16__EMI_DRAM_D_16				IOMUX_PAD(0x41c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD17__EMI_DRAM_D_17				IOMUX_PAD(0x420, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD18__EMI_DRAM_D_18				IOMUX_PAD(0x424, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD19__EMI_DRAM_D_19				IOMUX_PAD(0x428, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD20__EMI_DRAM_D_20				IOMUX_PAD(0x42c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD21__EMI_DRAM_D_21				IOMUX_PAD(0x430, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD22__EMI_DRAM_D_22				IOMUX_PAD(0x434, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD23__EMI_DRAM_D_23				IOMUX_PAD(0x438, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD24__EMI_DRAM_D_24				IOMUX_PAD(0x43c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD25__EMI_DRAM_D_25				IOMUX_PAD(0x440, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD26__EMI_DRAM_D_26				IOMUX_PAD(0x444, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD27__EMI_DRAM_D_27				IOMUX_PAD(0x448, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD28__EMI_DRAM_D_28				IOMUX_PAD(0x44c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD29__EMI_DRAM_D_29				IOMUX_PAD(0x450, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD30__EMI_DRAM_D_30				IOMUX_PAD(0x454, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD31__EMI_DRAM_D_31				IOMUX_PAD(0x458, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_DQM0__EMI_DRAM_DQM_0				IOMUX_PAD(0x45c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_DQM1__EMI_DRAM_DQM_1				IOMUX_PAD(0x460, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_DQM2__EMI_DRAM_DQM_2				IOMUX_PAD(0x464, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_DQM3__EMI_DRAM_DQM_3				IOMUX_PAD(0x468, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_EB0__EMI_EIM_EB0_B				IOMUX_PAD(0x46c, 0x094, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_EB1__EMI_EIM_EB1_B				IOMUX_PAD(0x470, 0x098, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_OE__EMI_EIM_OE					IOMUX_PAD(0x474, 0x09c, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CS0__EMI_EIM_CS0				IOMUX_PAD(0x478, 0x0a0, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CS1__EMI_EIM_CS1				IOMUX_PAD(0x47c, 0x0a4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CS1__EMI_NANDF_CE3				IOMUX_PAD(0x47c, 0x0a4, 3, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CS2__EMI_EIM_CS2				IOMUX_PAD(0x480, 0x0a8, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CS3__EMI_EIM_CS3				IOMUX_PAD(0x484, 0x0ac, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CS4__EMI_EIM_CS4				IOMUX_PAD(0x488, 0x0b0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CS4__EMI_DTACK_B				IOMUX_PAD(0x488, 0x0b0, 1, 0x800, 0, NO_PAD_CTRL)
+#define MX35_PAD_CS4__EMI_NANDF_CE1				IOMUX_PAD(0x488, 0x0b0, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CS4__GPIO1_20					IOMUX_PAD(0x488, 0x0b0, 5, 0x83c, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_CS5__EMI_EIM_CS5				IOMUX_PAD(0x48c, 0x0b4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CS5__CSPI2_SS2					IOMUX_PAD(0x48c, 0x0b4, 1, 0x7f8, 0, NO_PAD_CTRL)
+#define MX35_PAD_CS5__CSPI1_SS2					IOMUX_PAD(0x48c, 0x0b4, 2, 0x7d8, 1, NO_PAD_CTRL)
+#define MX35_PAD_CS5__EMI_NANDF_CE2				IOMUX_PAD(0x48c, 0x0b4, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CS5__GPIO1_21					IOMUX_PAD(0x48c, 0x0b4, 5, 0x840, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_NF_CE0__EMI_NANDF_CE0				IOMUX_PAD(0x490, 0x0b8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NF_CE0__GPIO1_22				IOMUX_PAD(0x490, 0x0b8, 5, 0x844, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_ECB__EMI_EIM_ECB				IOMUX_PAD(0x494, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LBA__EMI_EIM_LBA				IOMUX_PAD(0x498, 0x0bc, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_BCLK__EMI_EIM_BCLK				IOMUX_PAD(0x49c, 0x0c0, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RW__EMI_EIM_RW					IOMUX_PAD(0x4a0, 0x0c4, 0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RAS__EMI_DRAM_RAS				IOMUX_PAD(0x4a4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CAS__EMI_DRAM_CAS				IOMUX_PAD(0x4a8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDWE__EMI_DRAM_SDWE				IOMUX_PAD(0x4ac, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDCKE0__EMI_DRAM_SDCKE_0			IOMUX_PAD(0x4b0, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDCKE1__EMI_DRAM_SDCKE_1			IOMUX_PAD(0x4b4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDCLK__EMI_DRAM_SDCLK				IOMUX_PAD(0x4b8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDQS0__EMI_DRAM_SDQS_0				IOMUX_PAD(0x4bc, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDQS1__EMI_DRAM_SDQS_1				IOMUX_PAD(0x4c0, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDQS2__EMI_DRAM_SDQS_2				IOMUX_PAD(0x4c4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SDQS3__EMI_DRAM_SDQS_3				IOMUX_PAD(0x4c8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_NFWE_B__EMI_NANDF_WE_B				IOMUX_PAD(0x4cc, 0x0c8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFWE_B__USB_TOP_USBH2_DATA_3			IOMUX_PAD(0x4cc, 0x0c8, 1, 0x9d8, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFWE_B__IPU_DISPB_D0_VSYNC			IOMUX_PAD(0x4cc, 0x0c8, 2, 0x924, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFWE_B__GPIO2_18				IOMUX_PAD(0x4cc, 0x0c8, 5, 0x88c, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFWE_B__ARM11P_TOP_TRACE_0			IOMUX_PAD(0x4cc, 0x0c8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_NFRE_B__EMI_NANDF_RE_B				IOMUX_PAD(0x4d0, 0x0cc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFRE_B__USB_TOP_USBH2_DIR			IOMUX_PAD(0x4d0, 0x0cc, 1, 0x9ec, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFRE_B__IPU_DISPB_BCLK				IOMUX_PAD(0x4d0, 0x0cc, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFRE_B__GPIO2_19				IOMUX_PAD(0x4d0, 0x0cc, 5, 0x890, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFRE_B__ARM11P_TOP_TRACE_1			IOMUX_PAD(0x4d0, 0x0cc, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_NFALE__EMI_NANDF_ALE				IOMUX_PAD(0x4d4, 0x0d0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFALE__USB_TOP_USBH2_STP			IOMUX_PAD(0x4d4, 0x0d0, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFALE__IPU_DISPB_CS0				IOMUX_PAD(0x4d4, 0x0d0, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFALE__GPIO2_20				IOMUX_PAD(0x4d4, 0x0d0, 5, 0x898, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFALE__ARM11P_TOP_TRACE_2			IOMUX_PAD(0x4d4, 0x0d0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_NFCLE__EMI_NANDF_CLE				IOMUX_PAD(0x4d8, 0x0d4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFCLE__USB_TOP_USBH2_NXT			IOMUX_PAD(0x4d8, 0x0d4, 1, 0x9f0, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFCLE__IPU_DISPB_PAR_RS			IOMUX_PAD(0x4d8, 0x0d4, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFCLE__GPIO2_21				IOMUX_PAD(0x4d8, 0x0d4, 5, 0x89c, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFCLE__ARM11P_TOP_TRACE_3			IOMUX_PAD(0x4d8, 0x0d4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_NFWP_B__EMI_NANDF_WP_B				IOMUX_PAD(0x4dc, 0x0d8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFWP_B__USB_TOP_USBH2_DATA_7			IOMUX_PAD(0x4dc, 0x0d8, 1, 0x9e8, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFWP_B__IPU_DISPB_WR				IOMUX_PAD(0x4dc, 0x0d8, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFWP_B__GPIO2_22				IOMUX_PAD(0x4dc, 0x0d8, 5, 0x8a0, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFWP_B__ARM11P_TOP_TRCTL			IOMUX_PAD(0x4dc, 0x0d8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_NFRB__EMI_NANDF_RB				IOMUX_PAD(0x4e0, 0x0dc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFRB__IPU_DISPB_RD				IOMUX_PAD(0x4e0, 0x0dc, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_NFRB__GPIO2_23					IOMUX_PAD(0x4e0, 0x0dc, 5, 0x8a4, 0, NO_PAD_CTRL)
+#define MX35_PAD_NFRB__ARM11P_TOP_TRCLK				IOMUX_PAD(0x4e0, 0x0dc, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D15__EMI_EIM_D_15				IOMUX_PAD(0x4e4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D14__EMI_EIM_D_14				IOMUX_PAD(0x4e8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D13__EMI_EIM_D_13				IOMUX_PAD(0x4ec, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D12__EMI_EIM_D_12				IOMUX_PAD(0x4f0, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D11__EMI_EIM_D_11				IOMUX_PAD(0x4f4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D10__EMI_EIM_D_10				IOMUX_PAD(0x4f8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D9__EMI_EIM_D_9				IOMUX_PAD(0x4fc, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D8__EMI_EIM_D_8				IOMUX_PAD(0x500, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D7__EMI_EIM_D_7				IOMUX_PAD(0x504, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D6__EMI_EIM_D_6				IOMUX_PAD(0x508, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D5__EMI_EIM_D_5				IOMUX_PAD(0x50c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D4__EMI_EIM_D_4				IOMUX_PAD(0x510, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3__EMI_EIM_D_3				IOMUX_PAD(0x514, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D2__EMI_EIM_D_2				IOMUX_PAD(0x518, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D1__EMI_EIM_D_1				IOMUX_PAD(0x51c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D0__EMI_EIM_D_0				IOMUX_PAD(0x520, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D8__IPU_CSI_D_8				IOMUX_PAD(0x524, 0x0e0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D8__KPP_COL_0				IOMUX_PAD(0x524, 0x0e0, 1, 0x950, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D8__GPIO1_20				IOMUX_PAD(0x524, 0x0e0, 5, 0x83c, 1, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D8__ARM11P_TOP_EVNTBUS_13			IOMUX_PAD(0x524, 0x0e0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D9__IPU_CSI_D_9				IOMUX_PAD(0x528, 0x0e4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D9__KPP_COL_1				IOMUX_PAD(0x528, 0x0e4, 1, 0x954, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D9__GPIO1_21				IOMUX_PAD(0x528, 0x0e4, 5, 0x840, 1, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D9__ARM11P_TOP_EVNTBUS_14			IOMUX_PAD(0x528, 0x0e4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D10__IPU_CSI_D_10				IOMUX_PAD(0x52c, 0x0e8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D10__KPP_COL_2				IOMUX_PAD(0x52c, 0x0e8, 1, 0x958, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D10__GPIO1_22				IOMUX_PAD(0x52c, 0x0e8, 5, 0x844, 1, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D10__ARM11P_TOP_EVNTBUS_15			IOMUX_PAD(0x52c, 0x0e8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D11__IPU_CSI_D_11				IOMUX_PAD(0x530, 0x0ec, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D11__KPP_COL_3				IOMUX_PAD(0x530, 0x0ec, 1, 0x95c, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D11__GPIO1_23				IOMUX_PAD(0x530, 0x0ec, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D12__IPU_CSI_D_12				IOMUX_PAD(0x534, 0x0f0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D12__KPP_ROW_0				IOMUX_PAD(0x534, 0x0f0, 1, 0x970, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D12__GPIO1_24				IOMUX_PAD(0x534, 0x0f0, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D13__IPU_CSI_D_13				IOMUX_PAD(0x538, 0x0f4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D13__KPP_ROW_1				IOMUX_PAD(0x538, 0x0f4, 1, 0x974, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D13__GPIO1_25				IOMUX_PAD(0x538, 0x0f4, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D14__IPU_CSI_D_14				IOMUX_PAD(0x53c, 0x0f8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D14__KPP_ROW_2				IOMUX_PAD(0x53c, 0x0f8, 1, 0x978, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D14__GPIO1_26				IOMUX_PAD(0x53c, 0x0f8, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_D15__IPU_CSI_D_15				IOMUX_PAD(0x540, 0x0fc, 0, 0x97c, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D15__KPP_ROW_3				IOMUX_PAD(0x540, 0x0fc, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_D15__GPIO1_27				IOMUX_PAD(0x540, 0x0fc, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_MCLK__IPU_CSI_MCLK				IOMUX_PAD(0x544, 0x100, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_MCLK__GPIO1_28				IOMUX_PAD(0x544, 0x100, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_VSYNC__IPU_CSI_VSYNC			IOMUX_PAD(0x548, 0x104, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_VSYNC__GPIO1_29				IOMUX_PAD(0x548, 0x104, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_HSYNC__IPU_CSI_HSYNC			IOMUX_PAD(0x54c, 0x108, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_HSYNC__GPIO1_30				IOMUX_PAD(0x54c, 0x108, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSI_PIXCLK__IPU_CSI_PIXCLK			IOMUX_PAD(0x550, 0x10c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSI_PIXCLK__GPIO1_31				IOMUX_PAD(0x550, 0x10c, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_I2C1_CLK__I2C1_SCL				IOMUX_PAD(0x554, 0x110, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_I2C1_CLK__GPIO2_24				IOMUX_PAD(0x554, 0x110, 5, 0x8a8, 0, NO_PAD_CTRL)
+#define MX35_PAD_I2C1_CLK__CCM_USB_BYP_CLK			IOMUX_PAD(0x554, 0x110, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_I2C1_DAT__I2C1_SDA				IOMUX_PAD(0x558, 0x114, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_I2C1_DAT__GPIO2_25				IOMUX_PAD(0x558, 0x114, 5, 0x8ac, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_I2C2_CLK__I2C2_SCL				IOMUX_PAD(0x55c, 0x118, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_CLK__CAN1_TXCAN				IOMUX_PAD(0x55c, 0x118, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_CLK__USB_TOP_USBH2_PWR			IOMUX_PAD(0x55c, 0x118, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_CLK__GPIO2_26				IOMUX_PAD(0x55c, 0x118, 5, 0x8b0, 0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_CLK__SDMA_DEBUG_BUS_DEVICE_2		IOMUX_PAD(0x55c, 0x118, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_I2C2_DAT__I2C2_SDA				IOMUX_PAD(0x560, 0x11c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_DAT__CAN1_RXCAN				IOMUX_PAD(0x560, 0x11c, 1, 0x7c8, 0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_DAT__USB_TOP_USBH2_OC			IOMUX_PAD(0x560, 0x11c, 2, 0x9f4, 0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_DAT__GPIO2_27				IOMUX_PAD(0x560, 0x11c, 5, 0x8b4, 0, NO_PAD_CTRL)
+#define MX35_PAD_I2C2_DAT__SDMA_DEBUG_BUS_DEVICE_3		IOMUX_PAD(0x560, 0x11c, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_STXD4__AUDMUX_AUD4_TXD				IOMUX_PAD(0x564, 0x120, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_STXD4__GPIO2_28				IOMUX_PAD(0x564, 0x120, 5, 0x8b8, 0, NO_PAD_CTRL)
+#define MX35_PAD_STXD4__ARM11P_TOP_ARM_COREASID0		IOMUX_PAD(0x564, 0x120, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SRXD4__AUDMUX_AUD4_RXD				IOMUX_PAD(0x568, 0x124, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SRXD4__GPIO2_29				IOMUX_PAD(0x568, 0x124, 5, 0x8bc, 0, NO_PAD_CTRL)
+#define MX35_PAD_SRXD4__ARM11P_TOP_ARM_COREASID1		IOMUX_PAD(0x568, 0x124, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SCK4__AUDMUX_AUD4_TXC				IOMUX_PAD(0x56c, 0x128, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SCK4__GPIO2_30					IOMUX_PAD(0x56c, 0x128, 5, 0x8c4, 0, NO_PAD_CTRL)
+#define MX35_PAD_SCK4__ARM11P_TOP_ARM_COREASID2			IOMUX_PAD(0x56c, 0x128, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_STXFS4__AUDMUX_AUD4_TXFS			IOMUX_PAD(0x570, 0x12c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_STXFS4__GPIO2_31				IOMUX_PAD(0x570, 0x12c, 5, 0x8c8, 0, NO_PAD_CTRL)
+#define MX35_PAD_STXFS4__ARM11P_TOP_ARM_COREASID3		IOMUX_PAD(0x570, 0x12c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_STXD5__AUDMUX_AUD5_TXD				IOMUX_PAD(0x574, 0x130, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_STXD5__SPDIF_SPDIF_OUT1			IOMUX_PAD(0x574, 0x130, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_STXD5__CSPI2_MOSI				IOMUX_PAD(0x574, 0x130, 2, 0x7ec, 0, NO_PAD_CTRL)
+#define MX35_PAD_STXD5__GPIO1_0					IOMUX_PAD(0x574, 0x130, 5, 0x82c, 1, NO_PAD_CTRL)
+#define MX35_PAD_STXD5__ARM11P_TOP_ARM_COREASID4		IOMUX_PAD(0x574, 0x130, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SRXD5__AUDMUX_AUD5_RXD				IOMUX_PAD(0x578, 0x134, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SRXD5__SPDIF_SPDIF_IN1				IOMUX_PAD(0x578, 0x134, 1, 0x998, 0, NO_PAD_CTRL)
+#define MX35_PAD_SRXD5__CSPI2_MISO				IOMUX_PAD(0x578, 0x134, 2, 0x7e8, 0, NO_PAD_CTRL)
+#define MX35_PAD_SRXD5__GPIO1_1					IOMUX_PAD(0x578, 0x134, 5, 0x838, 1, NO_PAD_CTRL)
+#define MX35_PAD_SRXD5__ARM11P_TOP_ARM_COREASID5		IOMUX_PAD(0x578, 0x134, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SCK5__AUDMUX_AUD5_TXC				IOMUX_PAD(0x57c, 0x138, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SCK5__SPDIF_SPDIF_EXTCLK			IOMUX_PAD(0x57c, 0x138, 1, 0x994, 0, NO_PAD_CTRL)
+#define MX35_PAD_SCK5__CSPI2_SCLK				IOMUX_PAD(0x57c, 0x138, 2, 0x7e0, 0, NO_PAD_CTRL)
+#define MX35_PAD_SCK5__GPIO1_2					IOMUX_PAD(0x57c, 0x138, 5, 0x848, 0, NO_PAD_CTRL)
+#define MX35_PAD_SCK5__ARM11P_TOP_ARM_COREASID6			IOMUX_PAD(0x57c, 0x138, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_STXFS5__AUDMUX_AUD5_TXFS			IOMUX_PAD(0x580, 0x13c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_STXFS5__CSPI2_RDY				IOMUX_PAD(0x580, 0x13c, 2, 0x7e4, 0, NO_PAD_CTRL)
+#define MX35_PAD_STXFS5__GPIO1_3				IOMUX_PAD(0x580, 0x13c, 5, 0x84c, 0, NO_PAD_CTRL)
+#define MX35_PAD_STXFS5__ARM11P_TOP_ARM_COREASID7		IOMUX_PAD(0x580, 0x13c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SCKR__ESAI_SCKR				IOMUX_PAD(0x584, 0x140, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SCKR__GPIO1_4					IOMUX_PAD(0x584, 0x140, 5, 0x850, 1, NO_PAD_CTRL)
+#define MX35_PAD_SCKR__ARM11P_TOP_EVNTBUS_10			IOMUX_PAD(0x584, 0x140, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FSR__ESAI_FSR					IOMUX_PAD(0x588, 0x144, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FSR__GPIO1_5					IOMUX_PAD(0x588, 0x144, 5, 0x854, 1, NO_PAD_CTRL)
+#define MX35_PAD_FSR__ARM11P_TOP_EVNTBUS_11			IOMUX_PAD(0x588, 0x144, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_HCKR__ESAI_HCKR				IOMUX_PAD(0x58c, 0x148, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_HCKR__AUDMUX_AUD5_RXFS				IOMUX_PAD(0x58c, 0x148, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_HCKR__CSPI2_SS0				IOMUX_PAD(0x58c, 0x148, 2, 0x7f0, 0, NO_PAD_CTRL)
+#define MX35_PAD_HCKR__IPU_FLASH_STROBE				IOMUX_PAD(0x58c, 0x148, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_HCKR__GPIO1_6					IOMUX_PAD(0x58c, 0x148, 5, 0x858, 1, NO_PAD_CTRL)
+#define MX35_PAD_HCKR__ARM11P_TOP_EVNTBUS_12			IOMUX_PAD(0x58c, 0x148, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SCKT__ESAI_SCKT				IOMUX_PAD(0x590, 0x14c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SCKT__GPIO1_7					IOMUX_PAD(0x590, 0x14c, 5, 0x85c, 1, NO_PAD_CTRL)
+#define MX35_PAD_SCKT__IPU_CSI_D_0				IOMUX_PAD(0x590, 0x14c, 6, 0x930, 0, NO_PAD_CTRL)
+#define MX35_PAD_SCKT__KPP_ROW_2				IOMUX_PAD(0x590, 0x14c, 7, 0x978, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_FST__ESAI_FST					IOMUX_PAD(0x594, 0x150, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FST__GPIO1_8					IOMUX_PAD(0x594, 0x150, 5, 0x860, 1, NO_PAD_CTRL)
+#define MX35_PAD_FST__IPU_CSI_D_1				IOMUX_PAD(0x594, 0x150, 6, 0x934, 0, NO_PAD_CTRL)
+#define MX35_PAD_FST__KPP_ROW_3					IOMUX_PAD(0x594, 0x150, 7, 0x97c, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_HCKT__ESAI_HCKT				IOMUX_PAD(0x598, 0x154, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_HCKT__AUDMUX_AUD5_RXC				IOMUX_PAD(0x598, 0x154, 1, 0x7a8, 0, NO_PAD_CTRL)
+#define MX35_PAD_HCKT__GPIO1_9					IOMUX_PAD(0x598, 0x154, 5, 0x864, 0, NO_PAD_CTRL)
+#define MX35_PAD_HCKT__IPU_CSI_D_2				IOMUX_PAD(0x598, 0x154, 6, 0x938, 0, NO_PAD_CTRL)
+#define MX35_PAD_HCKT__KPP_COL_3				IOMUX_PAD(0x598, 0x154, 7, 0x95c, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_TX5_RX0__ESAI_TX5_RX0				IOMUX_PAD(0x59c, 0x158, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX5_RX0__AUDMUX_AUD4_RXC			IOMUX_PAD(0x59c, 0x158, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX5_RX0__CSPI2_SS2				IOMUX_PAD(0x59c, 0x158, 2, 0x7f8, 1, NO_PAD_CTRL)
+#define MX35_PAD_TX5_RX0__CAN2_TXCAN				IOMUX_PAD(0x59c, 0x158, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX5_RX0__UART2_DTR				IOMUX_PAD(0x59c, 0x158, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX5_RX0__GPIO1_10				IOMUX_PAD(0x59c, 0x158, 5, 0x830, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX5_RX0__EMI_M3IF_CHOSEN_MASTER_0		IOMUX_PAD(0x59c, 0x158, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TX4_RX1__ESAI_TX4_RX1				IOMUX_PAD(0x5a0, 0x15c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__AUDMUX_AUD4_RXFS			IOMUX_PAD(0x5a0, 0x15c, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__CSPI2_SS3				IOMUX_PAD(0x5a0, 0x15c, 2, 0x7fc, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__CAN2_RXCAN				IOMUX_PAD(0x5a0, 0x15c, 3, 0x7cc, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__UART2_DSR				IOMUX_PAD(0x5a0, 0x15c, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__GPIO1_11				IOMUX_PAD(0x5a0, 0x15c, 5, 0x834, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__IPU_CSI_D_3				IOMUX_PAD(0x5a0, 0x15c, 6, 0x93c, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX4_RX1__KPP_ROW_0				IOMUX_PAD(0x5a0, 0x15c, 7, 0x970, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_TX3_RX2__ESAI_TX3_RX2				IOMUX_PAD(0x5a4, 0x160, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX3_RX2__I2C3_SCL				IOMUX_PAD(0x5a4, 0x160, 1, 0x91c, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX3_RX2__EMI_NANDF_CE1				IOMUX_PAD(0x5a4, 0x160, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX3_RX2__GPIO1_12				IOMUX_PAD(0x5a4, 0x160, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX3_RX2__IPU_CSI_D_4				IOMUX_PAD(0x5a4, 0x160, 6, 0x940, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX3_RX2__KPP_ROW_1				IOMUX_PAD(0x5a4, 0x160, 7, 0x974, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_TX2_RX3__ESAI_TX2_RX3				IOMUX_PAD(0x5a8, 0x164, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX2_RX3__I2C3_SDA				IOMUX_PAD(0x5a8, 0x164, 1, 0x920, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX2_RX3__EMI_NANDF_CE2				IOMUX_PAD(0x5a8, 0x164, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX2_RX3__GPIO1_13				IOMUX_PAD(0x5a8, 0x164, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX2_RX3__IPU_CSI_D_5				IOMUX_PAD(0x5a8, 0x164, 6, 0x944, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX2_RX3__KPP_COL_0				IOMUX_PAD(0x5a8, 0x164, 7, 0x950, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_TX1__ESAI_TX1					IOMUX_PAD(0x5ac, 0x168, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX1__CCM_PMIC_RDY				IOMUX_PAD(0x5ac, 0x168, 1, 0x7d4, 1, NO_PAD_CTRL)
+#define MX35_PAD_TX1__CSPI1_SS2					IOMUX_PAD(0x5ac, 0x168, 2, 0x7d8, 2, NO_PAD_CTRL)
+#define MX35_PAD_TX1__EMI_NANDF_CE3				IOMUX_PAD(0x5ac, 0x168, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX1__UART2_RI					IOMUX_PAD(0x5ac, 0x168, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX1__GPIO1_14					IOMUX_PAD(0x5ac, 0x168, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX1__IPU_CSI_D_6				IOMUX_PAD(0x5ac, 0x168, 6, 0x948, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX1__KPP_COL_1					IOMUX_PAD(0x5ac, 0x168, 7, 0x954, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_TX0__ESAI_TX0					IOMUX_PAD(0x5b0, 0x16c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX0__SPDIF_SPDIF_EXTCLK			IOMUX_PAD(0x5b0, 0x16c, 1, 0x994, 1, NO_PAD_CTRL)
+#define MX35_PAD_TX0__CSPI1_SS3					IOMUX_PAD(0x5b0, 0x16c, 2, 0x7dc, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX0__EMI_DTACK_B				IOMUX_PAD(0x5b0, 0x16c, 3, 0x800, 1, NO_PAD_CTRL)
+#define MX35_PAD_TX0__UART2_DCD					IOMUX_PAD(0x5b0, 0x16c, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX0__GPIO1_15					IOMUX_PAD(0x5b0, 0x16c, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TX0__IPU_CSI_D_7				IOMUX_PAD(0x5b0, 0x16c, 6, 0x94c, 0, NO_PAD_CTRL)
+#define MX35_PAD_TX0__KPP_COL_2					IOMUX_PAD(0x5b0, 0x16c, 7, 0x958, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_CSPI1_MOSI__CSPI1_MOSI				IOMUX_PAD(0x5b4, 0x170, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_MOSI__GPIO1_16				IOMUX_PAD(0x5b4, 0x170, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_MOSI__ECT_CTI_TRIG_OUT1_2		IOMUX_PAD(0x5b4, 0x170, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSPI1_MISO__CSPI1_MISO				IOMUX_PAD(0x5b8, 0x174, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_MISO__GPIO1_17				IOMUX_PAD(0x5b8, 0x174, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_MISO__ECT_CTI_TRIG_OUT1_3		IOMUX_PAD(0x5b8, 0x174, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSPI1_SS0__CSPI1_SS0				IOMUX_PAD(0x5bc, 0x178, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS0__OWIRE_LINE				IOMUX_PAD(0x5bc, 0x178, 1, 0x990, 1, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS0__CSPI2_SS3				IOMUX_PAD(0x5bc, 0x178, 2, 0x7fc, 1, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS0__GPIO1_18				IOMUX_PAD(0x5bc, 0x178, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS0__ECT_CTI_TRIG_OUT1_4			IOMUX_PAD(0x5bc, 0x178, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSPI1_SS1__CSPI1_SS1				IOMUX_PAD(0x5c0, 0x17c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS1__PWM_PWMO				IOMUX_PAD(0x5c0, 0x17c, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS1__CCM_CLK32K				IOMUX_PAD(0x5c0, 0x17c, 2, 0x7d0, 1, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS1__GPIO1_19				IOMUX_PAD(0x5c0, 0x17c, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS1__IPU_DIAGB_29			IOMUX_PAD(0x5c0, 0x17c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SS1__ECT_CTI_TRIG_OUT1_5			IOMUX_PAD(0x5c0, 0x17c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSPI1_SCLK__CSPI1_SCLK				IOMUX_PAD(0x5c4, 0x180, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SCLK__GPIO3_4				IOMUX_PAD(0x5c4, 0x180, 5, 0x904, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SCLK__IPU_DIAGB_30			IOMUX_PAD(0x5c4, 0x180, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SCLK__EMI_M3IF_CHOSEN_MASTER_1		IOMUX_PAD(0x5c4, 0x180, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CSPI1_SPI_RDY__CSPI1_RDY			IOMUX_PAD(0x5c8, 0x184, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SPI_RDY__GPIO3_5				IOMUX_PAD(0x5c8, 0x184, 5, 0x908, 0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SPI_RDY__IPU_DIAGB_31			IOMUX_PAD(0x5c8, 0x184, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CSPI1_SPI_RDY__EMI_M3IF_CHOSEN_MASTER_2	IOMUX_PAD(0x5c8, 0x184, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RXD1__UART1_RXD_MUX				IOMUX_PAD(0x5cc, 0x188, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_RXD1__CSPI2_MOSI				IOMUX_PAD(0x5cc, 0x188, 1, 0x7ec, 1, NO_PAD_CTRL)
+#define MX35_PAD_RXD1__KPP_COL_4				IOMUX_PAD(0x5cc, 0x188, 4, 0x960, 0, NO_PAD_CTRL)
+#define MX35_PAD_RXD1__GPIO3_6					IOMUX_PAD(0x5cc, 0x188, 5, 0x90c, 0, NO_PAD_CTRL)
+#define MX35_PAD_RXD1__ARM11P_TOP_EVNTBUS_16			IOMUX_PAD(0x5cc, 0x188, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TXD1__UART1_TXD_MUX				IOMUX_PAD(0x5d0, 0x18c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TXD1__CSPI2_MISO				IOMUX_PAD(0x5d0, 0x18c, 1, 0x7e8, 1, NO_PAD_CTRL)
+#define MX35_PAD_TXD1__KPP_COL_5				IOMUX_PAD(0x5d0, 0x18c, 4, 0x964, 0, NO_PAD_CTRL)
+#define MX35_PAD_TXD1__GPIO3_7					IOMUX_PAD(0x5d0, 0x18c, 5, 0x910, 0, NO_PAD_CTRL)
+#define MX35_PAD_TXD1__ARM11P_TOP_EVNTBUS_17			IOMUX_PAD(0x5d0, 0x18c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RTS1__UART1_RTS				IOMUX_PAD(0x5d4, 0x190, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__CSPI2_SCLK				IOMUX_PAD(0x5d4, 0x190, 1, 0x7e0, 1, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__I2C3_SCL					IOMUX_PAD(0x5d4, 0x190, 2, 0x91c, 1, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__IPU_CSI_D_0				IOMUX_PAD(0x5d4, 0x190, 3, 0x930, 1, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__KPP_COL_6				IOMUX_PAD(0x5d4, 0x190, 4, 0x968, 0, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__GPIO3_8					IOMUX_PAD(0x5d4, 0x190, 5, 0x914, 0, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__EMI_NANDF_CE1				IOMUX_PAD(0x5d4, 0x190, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_RTS1__ARM11P_TOP_EVNTBUS_18			IOMUX_PAD(0x5d4, 0x190, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CTS1__UART1_CTS				IOMUX_PAD(0x5d8, 0x194, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__CSPI2_RDY				IOMUX_PAD(0x5d8, 0x194, 1, 0x7e4, 1, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__I2C3_SDA					IOMUX_PAD(0x5d8, 0x194, 2, 0x920, 1, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__IPU_CSI_D_1				IOMUX_PAD(0x5d8, 0x194, 3, 0x934, 1, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__KPP_COL_7				IOMUX_PAD(0x5d8, 0x194, 4, 0x96c, 0, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__GPIO3_9					IOMUX_PAD(0x5d8, 0x194, 5, 0x918, 0, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__EMI_NANDF_CE2				IOMUX_PAD(0x5d8, 0x194, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CTS1__ARM11P_TOP_EVNTBUS_19			IOMUX_PAD(0x5d8, 0x194, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RXD2__UART2_RXD_MUX				IOMUX_PAD(0x5dc, 0x198, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_RXD2__KPP_ROW_4				IOMUX_PAD(0x5dc, 0x198, 4, 0x980, 0, NO_PAD_CTRL)
+#define MX35_PAD_RXD2__GPIO3_10					IOMUX_PAD(0x5dc, 0x198, 5, 0x8ec, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_TXD2__UART2_TXD_MUX				IOMUX_PAD(0x5e0, 0x19c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_TXD2__SPDIF_SPDIF_EXTCLK			IOMUX_PAD(0x5e0, 0x19c, 1, 0x994, 2, NO_PAD_CTRL)
+#define MX35_PAD_TXD2__KPP_ROW_5				IOMUX_PAD(0x5e0, 0x19c, 4, 0x984, 0, NO_PAD_CTRL)
+#define MX35_PAD_TXD2__GPIO3_11					IOMUX_PAD(0x5e0, 0x19c, 5, 0x8f0, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_RTS2__UART2_RTS				IOMUX_PAD(0x5e4, 0x1a0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__SPDIF_SPDIF_IN1				IOMUX_PAD(0x5e4, 0x1a0, 1, 0x998, 1, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__CAN2_RXCAN				IOMUX_PAD(0x5e4, 0x1a0, 2, 0x7cc, 1, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__IPU_CSI_D_2				IOMUX_PAD(0x5e4, 0x1a0, 3, 0x938, 1, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__KPP_ROW_6				IOMUX_PAD(0x5e4, 0x1a0, 4, 0x988, 0, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__GPIO3_12					IOMUX_PAD(0x5e4, 0x1a0, 5, 0x8f4, 0, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__AUDMUX_AUD5_RXC				IOMUX_PAD(0x5e4, 0x1a0, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_RTS2__UART3_RXD_MUX				IOMUX_PAD(0x5e4, 0x1a0, 7, 0x9a0, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_CTS2__UART2_CTS				IOMUX_PAD(0x5e8, 0x1a4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__SPDIF_SPDIF_OUT1				IOMUX_PAD(0x5e8, 0x1a4, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__CAN2_TXCAN				IOMUX_PAD(0x5e8, 0x1a4, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__IPU_CSI_D_3				IOMUX_PAD(0x5e8, 0x1a4, 3, 0x93c, 1, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__KPP_ROW_7				IOMUX_PAD(0x5e8, 0x1a4, 4, 0x98c, 0, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__GPIO3_13					IOMUX_PAD(0x5e8, 0x1a4, 5, 0x8f8, 0, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__AUDMUX_AUD5_RXFS				IOMUX_PAD(0x5e8, 0x1a4, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CTS2__UART3_TXD_MUX				IOMUX_PAD(0x5e8, 0x1a4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_RTCK__ARM11P_TOP_RTCK				IOMUX_PAD(0x5ec, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TCK__SJC_TCK					IOMUX_PAD(0x5f0, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TMS__SJC_TMS					IOMUX_PAD(0x5f4, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TDI__SJC_TDI					IOMUX_PAD(0x5f8, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TDO__SJC_TDO					IOMUX_PAD(0x5fc, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TRSTB__SJC_TRSTB				IOMUX_PAD(0x600, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_DE_B__SJC_DE_B					IOMUX_PAD(0x604, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SJC_MOD__SJC_MOD				IOMUX_PAD(0x608, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_USBOTG_PWR__USB_TOP_USBOTG_PWR			IOMUX_PAD(0x60c, 0x1a8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_USBOTG_PWR__USB_TOP_USBH2_PWR			IOMUX_PAD(0x60c, 0x1a8, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_USBOTG_PWR__GPIO3_14				IOMUX_PAD(0x60c, 0x1a8, 5, 0x8fc, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_USBOTG_OC__USB_TOP_USBOTG_OC			IOMUX_PAD(0x610, 0x1ac, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_USBOTG_OC__USB_TOP_USBH2_OC			IOMUX_PAD(0x610, 0x1ac, 1, 0x9f4, 1, NO_PAD_CTRL)
+#define MX35_PAD_USBOTG_OC__GPIO3_15				IOMUX_PAD(0x610, 0x1ac, 5, 0x900, 0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD0__IPU_DISPB_DAT_0				IOMUX_PAD(0x614, 0x1b0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD0__GPIO2_0					IOMUX_PAD(0x614, 0x1b0, 5, 0x868, 1, NO_PAD_CTRL)
+#define MX35_PAD_LD0__SDMA_SDMA_DEBUG_PC_0			IOMUX_PAD(0x614, 0x1b0, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD1__IPU_DISPB_DAT_1				IOMUX_PAD(0x618, 0x1b4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD1__GPIO2_1					IOMUX_PAD(0x618, 0x1b4, 5, 0x894, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD1__SDMA_SDMA_DEBUG_PC_1			IOMUX_PAD(0x618, 0x1b4, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD2__IPU_DISPB_DAT_2				IOMUX_PAD(0x61c, 0x1b8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD2__GPIO2_2					IOMUX_PAD(0x61c, 0x1b8, 5, 0x8c0, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD2__SDMA_SDMA_DEBUG_PC_2			IOMUX_PAD(0x61c, 0x1b8, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD3__IPU_DISPB_DAT_3				IOMUX_PAD(0x620, 0x1bc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD3__GPIO2_3					IOMUX_PAD(0x620, 0x1bc, 5, 0x8cc, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD3__SDMA_SDMA_DEBUG_PC_3			IOMUX_PAD(0x620, 0x1bc, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD4__IPU_DISPB_DAT_4				IOMUX_PAD(0x624, 0x1c0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD4__GPIO2_4					IOMUX_PAD(0x624, 0x1c0, 5, 0x8d0, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD4__SDMA_SDMA_DEBUG_PC_4			IOMUX_PAD(0x624, 0x1c0, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD5__IPU_DISPB_DAT_5				IOMUX_PAD(0x628, 0x1c4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD5__GPIO2_5					IOMUX_PAD(0x628, 0x1c4, 5, 0x8d4, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD5__SDMA_SDMA_DEBUG_PC_5			IOMUX_PAD(0x628, 0x1c4, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD6__IPU_DISPB_DAT_6				IOMUX_PAD(0x62c, 0x1c8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD6__GPIO2_6					IOMUX_PAD(0x62c, 0x1c8, 5, 0x8d8, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD6__SDMA_SDMA_DEBUG_PC_6			IOMUX_PAD(0x62c, 0x1c8, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD7__IPU_DISPB_DAT_7				IOMUX_PAD(0x630, 0x1cc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD7__GPIO2_7					IOMUX_PAD(0x630, 0x1cc, 5, 0x8dc, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD7__SDMA_SDMA_DEBUG_PC_7			IOMUX_PAD(0x630, 0x1cc, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD8__IPU_DISPB_DAT_8				IOMUX_PAD(0x634, 0x1d0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD8__GPIO2_8					IOMUX_PAD(0x634, 0x1d0, 5, 0x8e0, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD8__SDMA_SDMA_DEBUG_PC_8			IOMUX_PAD(0x634, 0x1d0, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD9__IPU_DISPB_DAT_9				IOMUX_PAD(0x638, 0x1d4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD9__GPIO2_9					IOMUX_PAD(0x638, 0x1d4, 5, 0x8e4  0, NO_PAD_CTRL)
+#define MX35_PAD_LD9__SDMA_SDMA_DEBUG_PC_9			IOMUX_PAD(0x638, 0x1d4, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD10__IPU_DISPB_DAT_10				IOMUX_PAD(0x63c, 0x1d8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD10__GPIO2_10					IOMUX_PAD(0x63c, 0x1d8, 5, 0x86c, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD10__SDMA_SDMA_DEBUG_PC_10			IOMUX_PAD(0x63c, 0x1d8, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD11__IPU_DISPB_DAT_11				IOMUX_PAD(0x640, 0x1dc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD11__GPIO2_11					IOMUX_PAD(0x640, 0x1dc, 5, 0x870, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD11__SDMA_SDMA_DEBUG_PC_11			IOMUX_PAD(0x640, 0x1dc, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD11__ARM11P_TOP_TRACE_4			IOMUX_PAD(0x640, 0x1dc, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD12__IPU_DISPB_DAT_12				IOMUX_PAD(0x644, 0x1e0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD12__GPIO2_12					IOMUX_PAD(0x644, 0x1e0, 5, 0x874, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD12__SDMA_SDMA_DEBUG_PC_12			IOMUX_PAD(0x644, 0x1e0, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD12__ARM11P_TOP_TRACE_5			IOMUX_PAD(0x644, 0x1e0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD13__IPU_DISPB_DAT_13				IOMUX_PAD(0x648, 0x1e4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD13__GPIO2_13					IOMUX_PAD(0x648, 0x1e4, 5, 0x878, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD13__SDMA_SDMA_DEBUG_PC_13			IOMUX_PAD(0x648, 0x1e4, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD13__ARM11P_TOP_TRACE_6			IOMUX_PAD(0x648, 0x1e4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD14__IPU_DISPB_DAT_14				IOMUX_PAD(0x64c, 0x1e8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD14__GPIO2_14					IOMUX_PAD(0x64c, 0x1e8, 5, 0x87c, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD14__SDMA_SDMA_DEBUG_EVENT_CHANNEL_0		IOMUX_PAD(0x64c, 0x1e8, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD14__ARM11P_TOP_TRACE_7			IOMUX_PAD(0x64c, 0x1e8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD15__IPU_DISPB_DAT_15				IOMUX_PAD(0x650, 0x1ec, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD15__GPIO2_15					IOMUX_PAD(0x650, 0x1ec, 5, 0x880, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD15__SDMA_SDMA_DEBUG_EVENT_CHANNEL_1		IOMUX_PAD(0x650, 0x1ec, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD15__ARM11P_TOP_TRACE_8			IOMUX_PAD(0x650, 0x1ec, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD16__IPU_DISPB_DAT_16				IOMUX_PAD(0x654, 0x1f0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD16__IPU_DISPB_D12_VSYNC			IOMUX_PAD(0x654, 0x1f0, 2, 0x928, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD16__GPIO2_16					IOMUX_PAD(0x654, 0x1f0, 5, 0x884, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD16__SDMA_SDMA_DEBUG_EVENT_CHANNEL_2		IOMUX_PAD(0x654, 0x1f0, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD16__ARM11P_TOP_TRACE_9			IOMUX_PAD(0x654, 0x1f0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD17__IPU_DISPB_DAT_17				IOMUX_PAD(0x658, 0x1f4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD17__IPU_DISPB_CS2				IOMUX_PAD(0x658, 0x1f4, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD17__GPIO2_17					IOMUX_PAD(0x658, 0x1f4, 5, 0x888, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD17__SDMA_SDMA_DEBUG_EVENT_CHANNEL_3		IOMUX_PAD(0x658, 0x1f4, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD17__ARM11P_TOP_TRACE_10			IOMUX_PAD(0x658, 0x1f4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD18__IPU_DISPB_DAT_18				IOMUX_PAD(0x65c, 0x1f8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD18__IPU_DISPB_D0_VSYNC			IOMUX_PAD(0x65c, 0x1f8, 1, 0x924, 1, NO_PAD_CTRL)
+#define MX35_PAD_LD18__IPU_DISPB_D12_VSYNC			IOMUX_PAD(0x65c, 0x1f8, 2, 0x928, 1, NO_PAD_CTRL)
+#define MX35_PAD_LD18__ESDHC3_CMD				IOMUX_PAD(0x65c, 0x1f8, 3, 0x818, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD18__USB_TOP_USBOTG_DATA_3			IOMUX_PAD(0x65c, 0x1f8, 4, 0x9b0, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD18__GPIO3_24					IOMUX_PAD(0x65c, 0x1f8, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD18__SDMA_SDMA_DEBUG_EVENT_CHANNEL_4		IOMUX_PAD(0x65c, 0x1f8, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD18__ARM11P_TOP_TRACE_11			IOMUX_PAD(0x65c, 0x1f8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD19__IPU_DISPB_DAT_19				IOMUX_PAD(0x660, 0x1fc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__IPU_DISPB_BCLK				IOMUX_PAD(0x660, 0x1fc, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__IPU_DISPB_CS1				IOMUX_PAD(0x660, 0x1fc, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__ESDHC3_CLK				IOMUX_PAD(0x660, 0x1fc, 3, 0x814, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__USB_TOP_USBOTG_DIR			IOMUX_PAD(0x660, 0x1fc, 4, 0x9c4, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__GPIO3_25					IOMUX_PAD(0x660, 0x1fc, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__SDMA_SDMA_DEBUG_EVENT_CHANNEL_5		IOMUX_PAD(0x660, 0x1fc, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD19__ARM11P_TOP_TRACE_12			IOMUX_PAD(0x660, 0x1fc, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD20__IPU_DISPB_DAT_20				IOMUX_PAD(0x664, 0x200, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD20__IPU_DISPB_CS0				IOMUX_PAD(0x664, 0x200, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD20__IPU_DISPB_SD_CLK				IOMUX_PAD(0x664, 0x200, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD20__ESDHC3_DAT0				IOMUX_PAD(0x664, 0x200, 3, 0x81c, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD20__GPIO3_26					IOMUX_PAD(0x664, 0x200, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD20__SDMA_SDMA_DEBUG_CORE_STATUS_3		IOMUX_PAD(0x664, 0x200, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD20__ARM11P_TOP_TRACE_13			IOMUX_PAD(0x664, 0x200, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD21__IPU_DISPB_DAT_21				IOMUX_PAD(0x668, 0x204, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__IPU_DISPB_PAR_RS				IOMUX_PAD(0x668, 0x204, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__IPU_DISPB_SER_RS				IOMUX_PAD(0x668, 0x204, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__ESDHC3_DAT1				IOMUX_PAD(0x668, 0x204, 3, 0x820, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__USB_TOP_USBOTG_STP			IOMUX_PAD(0x668, 0x204, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__GPIO3_27					IOMUX_PAD(0x668, 0x204, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__SDMA_DEBUG_EVENT_CHANNEL_SEL		IOMUX_PAD(0x668, 0x204, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD21__ARM11P_TOP_TRACE_14			IOMUX_PAD(0x668, 0x204, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD22__IPU_DISPB_DAT_22				IOMUX_PAD(0x66c, 0x208, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__IPU_DISPB_WR				IOMUX_PAD(0x66c, 0x208, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__IPU_DISPB_SD_D_I				IOMUX_PAD(0x66c, 0x208, 2, 0x92c, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__ESDHC3_DAT2				IOMUX_PAD(0x66c, 0x208, 3, 0x824, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__USB_TOP_USBOTG_NXT			IOMUX_PAD(0x66c, 0x208, 4, 0x9c8, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__GPIO3_28					IOMUX_PAD(0x66c, 0x208, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__SDMA_DEBUG_BUS_ERROR			IOMUX_PAD(0x66c, 0x208, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD22__ARM11P_TOP_TRCTL				IOMUX_PAD(0x66c, 0x208, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_LD23__IPU_DISPB_DAT_23				IOMUX_PAD(0x670, 0x20c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD23__IPU_DISPB_RD				IOMUX_PAD(0x670, 0x20c, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD23__IPU_DISPB_SD_D_IO			IOMUX_PAD(0x670, 0x20c, 2, 0x92c, 1, NO_PAD_CTRL)
+#define MX35_PAD_LD23__ESDHC3_DAT3				IOMUX_PAD(0x670, 0x20c, 3, 0x828, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD23__USB_TOP_USBOTG_DATA_7			IOMUX_PAD(0x670, 0x20c, 4, 0x9c0, 0, NO_PAD_CTRL)
+#define MX35_PAD_LD23__GPIO3_29					IOMUX_PAD(0x670, 0x20c, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD23__SDMA_DEBUG_MATCHED_DMBUS			IOMUX_PAD(0x670, 0x20c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_LD23__ARM11P_TOP_TRCLK				IOMUX_PAD(0x670, 0x20c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_HSYNC__IPU_DISPB_D3_HSYNC			IOMUX_PAD(0x674, 0x210, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_HSYNC__IPU_DISPB_SD_D_IO			IOMUX_PAD(0x674, 0x210, 2, 0x92c, 2, NO_PAD_CTRL)
+#define MX35_PAD_D3_HSYNC__GPIO3_30				IOMUX_PAD(0x674, 0x210, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_HSYNC__SDMA_DEBUG_RTBUFFER_WRITE		IOMUX_PAD(0x674, 0x210, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_HSYNC__ARM11P_TOP_TRACE_15			IOMUX_PAD(0x674, 0x210, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_FPSHIFT__IPU_DISPB_D3_CLK			IOMUX_PAD(0x678, 0x214, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_FPSHIFT__IPU_DISPB_SD_CLK			IOMUX_PAD(0x678, 0x214, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_FPSHIFT__GPIO3_31				IOMUX_PAD(0x678, 0x214, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_FPSHIFT__SDMA_SDMA_DEBUG_CORE_STATUS_0	IOMUX_PAD(0x678, 0x214, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_FPSHIFT__ARM11P_TOP_TRACE_16		IOMUX_PAD(0x678, 0x214, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_DRDY__IPU_DISPB_D3_DRDY			IOMUX_PAD(0x67c, 0x218, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_DRDY__IPU_DISPB_SD_D_O			IOMUX_PAD(0x67c, 0x218, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_DRDY__GPIO1_0				IOMUX_PAD(0x67c, 0x218, 5, 0x82c, 2, NO_PAD_CTRL)
+#define MX35_PAD_D3_DRDY__SDMA_SDMA_DEBUG_CORE_STATUS_1		IOMUX_PAD(0x67c, 0x218, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_DRDY__ARM11P_TOP_TRACE_17			IOMUX_PAD(0x67c, 0x218, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_CONTRAST__IPU_DISPB_CONTR			IOMUX_PAD(0x680, 0x21c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CONTRAST__GPIO1_1				IOMUX_PAD(0x680, 0x21c, 5, 0x838, 2, NO_PAD_CTRL)
+#define MX35_PAD_CONTRAST__SDMA_SDMA_DEBUG_CORE_STATUS_2	IOMUX_PAD(0x680, 0x21c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_CONTRAST__ARM11P_TOP_TRACE_18			IOMUX_PAD(0x680, 0x21c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_VSYNC__IPU_DISPB_D3_VSYNC			IOMUX_PAD(0x684, 0x220, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_VSYNC__IPU_DISPB_CS1			IOMUX_PAD(0x684, 0x220, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_VSYNC__GPIO1_2				IOMUX_PAD(0x684, 0x220, 5, 0x848, 1, NO_PAD_CTRL)
+#define MX35_PAD_D3_VSYNC__SDMA_DEBUG_YIELD			IOMUX_PAD(0x684, 0x220, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_VSYNC__ARM11P_TOP_TRACE_19			IOMUX_PAD(0x684, 0x220, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_REV__IPU_DISPB_D3_REV			IOMUX_PAD(0x688, 0x224, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_REV__IPU_DISPB_SER_RS			IOMUX_PAD(0x688, 0x224, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_REV__GPIO1_3				IOMUX_PAD(0x688, 0x224, 5, 0x84c, 1, NO_PAD_CTRL)
+#define MX35_PAD_D3_REV__SDMA_DEBUG_BUS_RWB			IOMUX_PAD(0x688, 0x224, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_REV__ARM11P_TOP_TRACE_20			IOMUX_PAD(0x688, 0x224, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_CLS__IPU_DISPB_D3_CLS			IOMUX_PAD(0x68c, 0x228, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_CLS__IPU_DISPB_CS2				IOMUX_PAD(0x68c, 0x228, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_CLS__GPIO1_4				IOMUX_PAD(0x68c, 0x228, 5, 0x850, 2, NO_PAD_CTRL)
+#define MX35_PAD_D3_CLS__SDMA_DEBUG_BUS_DEVICE_0		IOMUX_PAD(0x68c, 0x228, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_CLS__ARM11P_TOP_TRACE_21			IOMUX_PAD(0x68c, 0x228, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_D3_SPL__IPU_DISPB_D3_SPL			IOMUX_PAD(0x690, 0x22c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_SPL__IPU_DISPB_D12_VSYNC			IOMUX_PAD(0x690, 0x22c, 2, 0x928, 2, NO_PAD_CTRL)
+#define MX35_PAD_D3_SPL__GPIO1_5				IOMUX_PAD(0x690, 0x22c, 5, 0x854, 2, NO_PAD_CTRL)
+#define MX35_PAD_D3_SPL__SDMA_DEBUG_BUS_DEVICE_1		IOMUX_PAD(0x690, 0x22c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_D3_SPL__ARM11P_TOP_TRACE_22			IOMUX_PAD(0x690, 0x22c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1_CMD__ESDHC1_CMD				IOMUX_PAD(0x694, 0x230, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CMD__MSHC_SCLK				IOMUX_PAD(0x694, 0x230, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CMD__IPU_DISPB_D0_VSYNC			IOMUX_PAD(0x694, 0x230, 3, 0x924, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CMD__USB_TOP_USBOTG_DATA_4			IOMUX_PAD(0x694, 0x230, 4, 0x9b4, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CMD__GPIO1_6				IOMUX_PAD(0x694, 0x230, 5, 0x858, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CMD__ARM11P_TOP_TRCTL			IOMUX_PAD(0x694, 0x230, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1_CLK__ESDHC1_CLK				IOMUX_PAD(0x698, 0x234, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CLK__MSHC_BS				IOMUX_PAD(0x698, 0x234, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CLK__IPU_DISPB_BCLK			IOMUX_PAD(0x698, 0x234, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CLK__USB_TOP_USBOTG_DATA_5			IOMUX_PAD(0x698, 0x234, 4, 0x9b8, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CLK__GPIO1_7				IOMUX_PAD(0x698, 0x234, 5, 0x85c, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD1_CLK__ARM11P_TOP_TRCLK			IOMUX_PAD(0x698, 0x234, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1_DATA0__ESDHC1_DAT0				IOMUX_PAD(0x69c, 0x238, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA0__MSHC_DATA_0				IOMUX_PAD(0x69c, 0x238, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA0__IPU_DISPB_CS0			IOMUX_PAD(0x69c, 0x238, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA0__USB_TOP_USBOTG_DATA_6		IOMUX_PAD(0x69c, 0x238, 4, 0x9bc, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA0__GPIO1_8				IOMUX_PAD(0x69c, 0x238, 5, 0x860, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA0__ARM11P_TOP_TRACE_23			IOMUX_PAD(0x69c, 0x238, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1_DATA1__ESDHC1_DAT1				IOMUX_PAD(0x6a0, 0x23c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA1__MSHC_DATA_1				IOMUX_PAD(0x6a0, 0x23c, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA1__IPU_DISPB_PAR_RS			IOMUX_PAD(0x6a0, 0x23c, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA1__USB_TOP_USBOTG_DATA_0		IOMUX_PAD(0x6a0, 0x23c, 4, 0x9a4, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA1__GPIO1_9				IOMUX_PAD(0x6a0, 0x23c, 5, 0x864, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA1__ARM11P_TOP_TRACE_24			IOMUX_PAD(0x6a0, 0x23c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1_DATA2__ESDHC1_DAT2				IOMUX_PAD(0x6a4, 0x240, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA2__MSHC_DATA_2				IOMUX_PAD(0x6a4, 0x240, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA2__IPU_DISPB_WR			IOMUX_PAD(0x6a4, 0x240, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA2__USB_TOP_USBOTG_DATA_1		IOMUX_PAD(0x6a4, 0x240, 4, 0x9a8, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA2__GPIO1_10				IOMUX_PAD(0x6a4, 0x240, 5, 0x830, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA2__ARM11P_TOP_TRACE_25			IOMUX_PAD(0x6a4, 0x240, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD1_DATA3__ESDHC1_DAT3				IOMUX_PAD(0x6a8, 0x244, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA3__MSHC_DATA_3				IOMUX_PAD(0x6a8, 0x244, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA3__IPU_DISPB_RD			IOMUX_PAD(0x6a8, 0x244, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA3__USB_TOP_USBOTG_DATA_2		IOMUX_PAD(0x6a8, 0x244, 4, 0x9ac, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA3__GPIO1_11				IOMUX_PAD(0x6a8, 0x244, 5, 0x834, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD1_DATA3__ARM11P_TOP_TRACE_26			IOMUX_PAD(0x6a8, 0x244, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2_CMD__ESDHC2_CMD				IOMUX_PAD(0x6ac, 0x248, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__I2C3_SCL				IOMUX_PAD(0x6ac, 0x248, 1, 0x91c, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__ESDHC1_DAT4				IOMUX_PAD(0x6ac, 0x248, 2, 0x804, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__IPU_CSI_D_2				IOMUX_PAD(0x6ac, 0x248, 3, 0x938, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__USB_TOP_USBH2_DATA_4			IOMUX_PAD(0x6ac, 0x248, 4, 0x9dc, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__GPIO2_0				IOMUX_PAD(0x6ac, 0x248, 5, 0x868, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__SPDIF_SPDIF_OUT1			IOMUX_PAD(0x6ac, 0x248, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CMD__IPU_DISPB_D12_VSYNC			IOMUX_PAD(0x6ac, 0x248, 7, 0x928, 3, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2_CLK__ESDHC2_CLK				IOMUX_PAD(0x6b0, 0x24c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__I2C3_SDA				IOMUX_PAD(0x6b0, 0x24c, 1, 0x920, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__ESDHC1_DAT5				IOMUX_PAD(0x6b0, 0x24c, 2, 0x808, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__IPU_CSI_D_3				IOMUX_PAD(0x6b0, 0x24c, 3, 0x93c, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__USB_TOP_USBH2_DATA_5			IOMUX_PAD(0x6b0, 0x24c, 4, 0x9e0, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__GPIO2_1				IOMUX_PAD(0x6b0, 0x24c, 5, 0x894, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__SPDIF_SPDIF_IN1			IOMUX_PAD(0x6b0, 0x24c, 6, 0x998, 2, NO_PAD_CTRL)
+#define MX35_PAD_SD2_CLK__IPU_DISPB_CS2				IOMUX_PAD(0x6b0, 0x24c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2_DATA0__ESDHC2_DAT0				IOMUX_PAD(0x6b4, 0x250, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA0__UART3_RXD_MUX			IOMUX_PAD(0x6b4, 0x250, 1, 0x9a0, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA0__ESDHC1_DAT6				IOMUX_PAD(0x6b4, 0x250, 2, 0x80c, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA0__IPU_CSI_D_4				IOMUX_PAD(0x6b4, 0x250, 3, 0x940, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA0__USB_TOP_USBH2_DATA_6		IOMUX_PAD(0x6b4, 0x250, 4, 0x9e4, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA0__GPIO2_2				IOMUX_PAD(0x6b4, 0x250, 5, 0x8c0, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA0__SPDIF_SPDIF_EXTCLK			IOMUX_PAD(0x6b4, 0x250, 6, 0x994, 3, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2_DATA1__ESDHC2_DAT1				IOMUX_PAD(0x6b8, 0x254, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA1__UART3_TXD_MUX			IOMUX_PAD(0x6b8, 0x254, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA1__ESDHC1_DAT7				IOMUX_PAD(0x6b8, 0x254, 2, 0x810, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA1__IPU_CSI_D_5				IOMUX_PAD(0x6b8, 0x254, 3, 0x944, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA1__USB_TOP_USBH2_DATA_0		IOMUX_PAD(0x6b8, 0x254, 4, 0x9cc, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA1__GPIO2_3				IOMUX_PAD(0x6b8, 0x254, 5, 0x8cc, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2_DATA2__ESDHC2_DAT2				IOMUX_PAD(0x6bc, 0x258, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA2__UART3_RTS				IOMUX_PAD(0x6bc, 0x258, 1, 0x99c, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA2__CAN1_RXCAN				IOMUX_PAD(0x6bc, 0x258, 2, 0x7c8, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA2__IPU_CSI_D_6				IOMUX_PAD(0x6bc, 0x258, 3, 0x948, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA2__USB_TOP_USBH2_DATA_1		IOMUX_PAD(0x6bc, 0x258, 4, 0x9d0, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA2__GPIO2_4				IOMUX_PAD(0x6bc, 0x258, 5, 0x8d0, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_SD2_DATA3__ESDHC2_DAT3				IOMUX_PAD(0x6c0, 0x25c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA3__UART3_CTS				IOMUX_PAD(0x6c0, 0x25c, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA3__CAN1_TXCAN				IOMUX_PAD(0x6c0, 0x25c, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA3__IPU_CSI_D_7				IOMUX_PAD(0x6c0, 0x25c, 3, 0x94c, 1, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA3__USB_TOP_USBH2_DATA_2		IOMUX_PAD(0x6c0, 0x25c, 4, 0x9d4, 0, NO_PAD_CTRL)
+#define MX35_PAD_SD2_DATA3__GPIO2_5				IOMUX_PAD(0x6c0, 0x25c, 5, 0x8d4, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_CS0__ATA_CS0				IOMUX_PAD(0x6c4, 0x260, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS0__CSPI1_SS3				IOMUX_PAD(0x6c4, 0x260, 1, 0x7dc, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS0__IPU_DISPB_CS1				IOMUX_PAD(0x6c4, 0x260, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS0__GPIO2_6				IOMUX_PAD(0x6c4, 0x260, 5, 0x8d8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS0__IPU_DIAGB_0				IOMUX_PAD(0x6c4, 0x260, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS0__ARM11P_TOP_MAX1_HMASTER_0		IOMUX_PAD(0x6c4, 0x260, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_CS1__ATA_CS1				IOMUX_PAD(0x6c8, 0x264, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS1__IPU_DISPB_CS2				IOMUX_PAD(0x6c8, 0x264, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS1__CSPI2_SS0				IOMUX_PAD(0x6c8, 0x264, 4, 0x7f0, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS1__GPIO2_7				IOMUX_PAD(0x6c8, 0x264, 5, 0x8dc, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS1__IPU_DIAGB_1				IOMUX_PAD(0x6c8, 0x264, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_CS1__ARM11P_TOP_MAX1_HMASTER_1		IOMUX_PAD(0x6c8, 0x264, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DIOR__ATA_DIOR				IOMUX_PAD(0x6cc, 0x268, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__ESDHC3_DAT0				IOMUX_PAD(0x6cc, 0x268, 1, 0x81c, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__USB_TOP_USBOTG_DIR			IOMUX_PAD(0x6cc, 0x268, 2, 0x9c4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__IPU_DISPB_BE0			IOMUX_PAD(0x6cc, 0x268, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__CSPI2_SS1				IOMUX_PAD(0x6cc, 0x268, 4, 0x7f4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__GPIO2_8				IOMUX_PAD(0x6cc, 0x268, 5, 0x8e0, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__IPU_DIAGB_2				IOMUX_PAD(0x6cc, 0x268, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOR__ARM11P_TOP_MAX1_HMASTER_2		IOMUX_PAD(0x6cc, 0x268, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DIOW__ATA_DIOW				IOMUX_PAD(0x6d0, 0x26c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__ESDHC3_DAT1				IOMUX_PAD(0x6d0, 0x26c, 1, 0x820, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__USB_TOP_USBOTG_STP			IOMUX_PAD(0x6d0, 0x26c, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__IPU_DISPB_BE1			IOMUX_PAD(0x6d0, 0x26c, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__CSPI2_MOSI				IOMUX_PAD(0x6d0, 0x26c, 4, 0x7ec, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__GPIO2_9				IOMUX_PAD(0x6d0, 0x26c, 5, 0x8e4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__IPU_DIAGB_3				IOMUX_PAD(0x6d0, 0x26c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DIOW__ARM11P_TOP_MAX1_HMASTER_3		IOMUX_PAD(0x6d0, 0x26c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DMACK__ATA_DMACK				IOMUX_PAD(0x6d4, 0x270, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMACK__ESDHC3_DAT2				IOMUX_PAD(0x6d4, 0x270, 1, 0x824, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMACK__USB_TOP_USBOTG_NXT			IOMUX_PAD(0x6d4, 0x270, 2, 0x9c8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMACK__CSPI2_MISO				IOMUX_PAD(0x6d4, 0x270, 4, 0x7e8, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMACK__GPIO2_10				IOMUX_PAD(0x6d4, 0x270, 5, 0x86c, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMACK__IPU_DIAGB_4				IOMUX_PAD(0x6d4, 0x270, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMACK__ARM11P_TOP_MAX0_HMASTER_0		IOMUX_PAD(0x6d4, 0x270, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_RESET_B__ATA_RESET_B			IOMUX_PAD(0x6d8, 0x274, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__ESDHC3_DAT3			IOMUX_PAD(0x6d8, 0x274, 1, 0x828, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__USB_TOP_USBOTG_DATA_0		IOMUX_PAD(0x6d8, 0x274, 2, 0x9a4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__IPU_DISPB_SD_D_O			IOMUX_PAD(0x6d8, 0x274, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__CSPI2_RDY				IOMUX_PAD(0x6d8, 0x274, 4, 0x7e4, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__GPIO2_11				IOMUX_PAD(0x6d8, 0x274, 5, 0x870, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__IPU_DIAGB_5			IOMUX_PAD(0x6d8, 0x274, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_RESET_B__ARM11P_TOP_MAX0_HMASTER_1		IOMUX_PAD(0x6d8, 0x274, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_IORDY__ATA_IORDY				IOMUX_PAD(0x6dc, 0x278, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__ESDHC3_DAT4				IOMUX_PAD(0x6dc, 0x278, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__USB_TOP_USBOTG_DATA_1		IOMUX_PAD(0x6dc, 0x278, 2, 0x9a8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__IPU_DISPB_SD_D_IO			IOMUX_PAD(0x6dc, 0x278, 3, 0x92c, 3, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__ESDHC2_DAT4				IOMUX_PAD(0x6dc, 0x278, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__GPIO2_12				IOMUX_PAD(0x6dc, 0x278, 5, 0x874, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__IPU_DIAGB_6				IOMUX_PAD(0x6dc, 0x278, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_IORDY__ARM11P_TOP_MAX0_HMASTER_2		IOMUX_PAD(0x6dc, 0x278, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA0__ATA_DATA_0				IOMUX_PAD(0x6e0, 0x27c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__ESDHC3_DAT5				IOMUX_PAD(0x6e0, 0x27c, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__USB_TOP_USBOTG_DATA_2		IOMUX_PAD(0x6e0, 0x27c, 2, 0x9ac, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__IPU_DISPB_D12_VSYNC			IOMUX_PAD(0x6e0, 0x27c, 3, 0x928, 4, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__ESDHC2_DAT5				IOMUX_PAD(0x6e0, 0x27c, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__GPIO2_13				IOMUX_PAD(0x6e0, 0x27c, 5, 0x878, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__IPU_DIAGB_7				IOMUX_PAD(0x6e0, 0x27c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA0__ARM11P_TOP_MAX0_HMASTER_3		IOMUX_PAD(0x6e0, 0x27c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA1__ATA_DATA_1				IOMUX_PAD(0x6e4, 0x280, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__ESDHC3_DAT6				IOMUX_PAD(0x6e4, 0x280, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__USB_TOP_USBOTG_DATA_3		IOMUX_PAD(0x6e4, 0x280, 2, 0x9b0, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__IPU_DISPB_SD_CLK			IOMUX_PAD(0x6e4, 0x280, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__ESDHC2_DAT6				IOMUX_PAD(0x6e4, 0x280, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__GPIO2_14				IOMUX_PAD(0x6e4, 0x280, 5, 0x87c, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__IPU_DIAGB_8				IOMUX_PAD(0x6e4, 0x280, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA1__ARM11P_TOP_TRACE_27			IOMUX_PAD(0x6e4, 0x280, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA2__ATA_DATA_2				IOMUX_PAD(0x6e8, 0x284, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__ESDHC3_DAT7				IOMUX_PAD(0x6e8, 0x284, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__USB_TOP_USBOTG_DATA_4		IOMUX_PAD(0x6e8, 0x284, 2, 0x9b4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__IPU_DISPB_SER_RS			IOMUX_PAD(0x6e8, 0x284, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__ESDHC2_DAT7				IOMUX_PAD(0x6e8, 0x284, 4, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__GPIO2_15				IOMUX_PAD(0x6e8, 0x284, 5, 0x880, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__IPU_DIAGB_9				IOMUX_PAD(0x6e8, 0x284, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA2__ARM11P_TOP_TRACE_28			IOMUX_PAD(0x6e8, 0x284, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA3__ATA_DATA_3				IOMUX_PAD(0x6e8, 0x288, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA3__ESDHC3_CLK				IOMUX_PAD(0x6e8, 0x288, 1, 0x814, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA3__USB_TOP_USBOTG_DATA_5		IOMUX_PAD(0x6e8, 0x288, 2, 0x9b8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA3__CSPI2_SCLK				IOMUX_PAD(0x6e8, 0x288, 4, 0x7e0, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA3__GPIO2_16				IOMUX_PAD(0x6e8, 0x288, 5, 0x884, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA3__IPU_DIAGB_10			IOMUX_PAD(0x6e8, 0x288, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA3__ARM11P_TOP_TRACE_29			IOMUX_PAD(0x6e8, 0x288, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA4__ATA_DATA_4				IOMUX_PAD(0x6f0, 0x28c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA4__ESDHC3_CMD				IOMUX_PAD(0x6f0, 0x28c, 1, 0x818, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA4__USB_TOP_USBOTG_DATA_6		IOMUX_PAD(0x6f0, 0x28c, 2, 0x9bc, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA4__GPIO2_17				IOMUX_PAD(0x6f0, 0x28c, 5, 0x888, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA4__IPU_DIAGB_11			IOMUX_PAD(0x6f0, 0x28c, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA4__ARM11P_TOP_TRACE_30			IOMUX_PAD(0x6f0, 0x28c, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA5__ATA_DATA_5				IOMUX_PAD(0x6f4, 0x290, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA5__USB_TOP_USBOTG_DATA_7		IOMUX_PAD(0x6f4, 0x290, 2, 0x9c0, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA5__GPIO2_18				IOMUX_PAD(0x6f4, 0x290, 5, 0x88c, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA5__IPU_DIAGB_12			IOMUX_PAD(0x6f4, 0x290, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA5__ARM11P_TOP_TRACE_31			IOMUX_PAD(0x6f4, 0x290, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA6__ATA_DATA_6				IOMUX_PAD(0x6f8, 0x294, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA6__CAN1_TXCAN				IOMUX_PAD(0x6f8, 0x294, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA6__UART1_DTR				IOMUX_PAD(0x6f8, 0x294, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA6__AUDMUX_AUD6_TXD			IOMUX_PAD(0x6f8, 0x294, 3, 0x7b4, 0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA6__GPIO2_19				IOMUX_PAD(0x6f8, 0x294, 5, 0x890, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA6__IPU_DIAGB_13			IOMUX_PAD(0x6f8, 0x294, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA7__ATA_DATA_7				IOMUX_PAD(0x6fc, 0x298, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA7__CAN1_RXCAN				IOMUX_PAD(0x6fc, 0x298, 1, 0x7c8, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA7__UART1_DSR				IOMUX_PAD(0x6fc, 0x298, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA7__AUDMUX_AUD6_RXD			IOMUX_PAD(0x6fc, 0x298, 3, 0x7b0, 0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA7__GPIO2_20				IOMUX_PAD(0x6fc, 0x298, 5, 0x898, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA7__IPU_DIAGB_14			IOMUX_PAD(0x6fc, 0x298, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA8__ATA_DATA_8				IOMUX_PAD(0x700, 0x29c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA8__UART3_RTS				IOMUX_PAD(0x700, 0x29c, 1, 0x99c, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA8__UART1_RI				IOMUX_PAD(0x700, 0x29c, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA8__AUDMUX_AUD6_TXC			IOMUX_PAD(0x700, 0x29c, 3, 0x7c0, 0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA8__GPIO2_21				IOMUX_PAD(0x700, 0x29c, 5, 0x89c, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA8__IPU_DIAGB_15			IOMUX_PAD(0x700, 0x29c, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA9__ATA_DATA_9				IOMUX_PAD(0x704, 0x2a0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA9__UART3_CTS				IOMUX_PAD(0x704, 0x2a0, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA9__UART1_DCD				IOMUX_PAD(0x704, 0x2a0, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA9__AUDMUX_AUD6_TXFS			IOMUX_PAD(0x704, 0x2a0, 3, 0x7c4, 0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA9__GPIO2_22				IOMUX_PAD(0x704, 0x2a0, 5, 0x8a0, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA9__IPU_DIAGB_16			IOMUX_PAD(0x704, 0x2a0, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA10__ATA_DATA_10			IOMUX_PAD(0x708, 0x2a4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA10__UART3_RXD_MUX			IOMUX_PAD(0x708, 0x2a4, 1, 0x9a0, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA10__AUDMUX_AUD6_RXC			IOMUX_PAD(0x708, 0x2a4, 3, 0x7b8, 0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA10__GPIO2_23				IOMUX_PAD(0x708, 0x2a4, 5, 0x8a4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA10__IPU_DIAGB_17			IOMUX_PAD(0x708, 0x2a4, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA11__ATA_DATA_11			IOMUX_PAD(0x70c, 0x2a8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA11__UART3_TXD_MUX			IOMUX_PAD(0x70c, 0x2a8, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA11__AUDMUX_AUD6_RXFS			IOMUX_PAD(0x70c, 0x2a8, 3, 0x7bc, 0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA11__GPIO2_24				IOMUX_PAD(0x70c, 0x2a8, 5, 0x8a8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA11__IPU_DIAGB_18			IOMUX_PAD(0x70c, 0x2a8, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA12__ATA_DATA_12			IOMUX_PAD(0x710, 0x2ac, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA12__I2C3_SCL				IOMUX_PAD(0x710, 0x2ac, 1, 0x91c, 3, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA12__GPIO2_25				IOMUX_PAD(0x710, 0x2ac, 5, 0x8ac, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA12__IPU_DIAGB_19			IOMUX_PAD(0x710, 0x2ac, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA13__ATA_DATA_13			IOMUX_PAD(0x714, 0x2b0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA13__I2C3_SDA				IOMUX_PAD(0x714, 0x2b0, 1, 0x920, 3, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA13__GPIO2_26				IOMUX_PAD(0x714, 0x2b0, 5, 0x8b0, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA13__IPU_DIAGB_20			IOMUX_PAD(0x714, 0x2b0, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA14__ATA_DATA_14			IOMUX_PAD(0x718, 0x2b4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA14__IPU_CSI_D_0			IOMUX_PAD(0x718, 0x2b4, 1, 0x930, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA14__KPP_ROW_0				IOMUX_PAD(0x718, 0x2b4, 3, 0x970, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA14__GPIO2_27				IOMUX_PAD(0x718, 0x2b4, 5, 0x8b4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA14__IPU_DIAGB_21			IOMUX_PAD(0x718, 0x2b4, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DATA15__ATA_DATA_15			IOMUX_PAD(0x71c, 0x2b8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA15__IPU_CSI_D_1			IOMUX_PAD(0x71c, 0x2b8, 1, 0x934, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA15__KPP_ROW_1				IOMUX_PAD(0x71c, 0x2b8, 3, 0x974, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA15__GPIO2_28				IOMUX_PAD(0x71c, 0x2b8, 5, 0x8b8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DATA15__IPU_DIAGB_22			IOMUX_PAD(0x71c, 0x2b8, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_INTRQ__ATA_INTRQ				IOMUX_PAD(0x720, 0x2bc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_INTRQ__IPU_CSI_D_2				IOMUX_PAD(0x720, 0x2bc, 1, 0x938, 3, NO_PAD_CTRL)
+#define MX35_PAD_ATA_INTRQ__KPP_ROW_2				IOMUX_PAD(0x720, 0x2bc, 3, 0x978, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_INTRQ__GPIO2_29				IOMUX_PAD(0x720, 0x2bc, 5, 0x8bc, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_INTRQ__IPU_DIAGB_23			IOMUX_PAD(0x720, 0x2bc, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_BUFF_EN__ATA_BUFFER_EN			IOMUX_PAD(0x724, 0x2c0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_BUFF_EN__IPU_CSI_D_3			IOMUX_PAD(0x724, 0x2c0, 1, 0x93c, 3, NO_PAD_CTRL)
+#define MX35_PAD_ATA_BUFF_EN__KPP_ROW_3				IOMUX_PAD(0x724, 0x2c0, 3, 0x97c, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_BUFF_EN__GPIO2_30				IOMUX_PAD(0x724, 0x2c0, 5, 0x8c4, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_BUFF_EN__IPU_DIAGB_24			IOMUX_PAD(0x724, 0x2c0, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DMARQ__ATA_DMARQ				IOMUX_PAD(0x728, 0x2c4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMARQ__IPU_CSI_D_4				IOMUX_PAD(0x728, 0x2c4, 1, 0x940, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMARQ__KPP_COL_0				IOMUX_PAD(0x728, 0x2c4, 3, 0x950, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMARQ__GPIO2_31				IOMUX_PAD(0x728, 0x2c4, 5, 0x8c8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMARQ__IPU_DIAGB_25			IOMUX_PAD(0x728, 0x2c4, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DMARQ__ECT_CTI_TRIG_IN1_4			IOMUX_PAD(0x728, 0x2c4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DA0__ATA_DA_0				IOMUX_PAD(0x72c, 0x2c8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA0__IPU_CSI_D_5				IOMUX_PAD(0x72c, 0x2c8, 1, 0x944, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA0__KPP_COL_1				IOMUX_PAD(0x72c, 0x2c8, 3, 0x954, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA0__GPIO3_0				IOMUX_PAD(0x72c, 0x2c8, 5, 0x8e8, 1, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA0__IPU_DIAGB_26				IOMUX_PAD(0x72c, 0x2c8, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA0__ECT_CTI_TRIG_IN1_5			IOMUX_PAD(0x72c, 0x2c8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DA1__ATA_DA_1				IOMUX_PAD(0x730, 0x2cc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA1__IPU_CSI_D_6				IOMUX_PAD(0x730, 0x2cc, 1, 0x948, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA1__KPP_COL_2				IOMUX_PAD(0x730, 0x2cc, 3, 0x958, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA1__GPIO3_1				IOMUX_PAD(0x730, 0x2cc, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA1__IPU_DIAGB_27				IOMUX_PAD(0x730, 0x2cc, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA1__ECT_CTI_TRIG_IN1_6			IOMUX_PAD(0x730, 0x2cc, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_ATA_DA2__ATA_DA_2				IOMUX_PAD(0x734, 0x2d0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA2__IPU_CSI_D_7				IOMUX_PAD(0x734, 0x2d0, 1, 0x94c, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA2__KPP_COL_3				IOMUX_PAD(0x734, 0x2d0, 3, 0x95c, 2, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA2__GPIO3_2				IOMUX_PAD(0x734, 0x2d0, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA2__IPU_DIAGB_28				IOMUX_PAD(0x734, 0x2d0, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_ATA_DA2__ECT_CTI_TRIG_IN1_7			IOMUX_PAD(0x734, 0x2d0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_MLB_CLK__MLB_MLBCLK				IOMUX_PAD(0x738, 0x2d4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_MLB_CLK__GPIO3_3				IOMUX_PAD(0x738, 0x2d4, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_MLB_DAT__MLB_MLBDAT				IOMUX_PAD(0x73c, 0x2d8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_MLB_DAT__GPIO3_4				IOMUX_PAD(0x73c, 0x2d8, 5, 0x904, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_MLB_SIG__MLB_MLBSIG				IOMUX_PAD(0x740, 0x2dc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_MLB_SIG__GPIO3_5				IOMUX_PAD(0x740, 0x2dc, 5, 0x908, 1, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TX_CLK__FEC_TX_CLK				IOMUX_PAD(0x744, 0x2e0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__ESDHC1_DAT4			IOMUX_PAD(0x744, 0x2e0, 1, 0x804, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__UART3_RXD_MUX			IOMUX_PAD(0x744, 0x2e0, 2, 0x9a0, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__USB_TOP_USBH2_DIR			IOMUX_PAD(0x744, 0x2e0, 3, 0x9ec, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__CSPI2_MOSI				IOMUX_PAD(0x744, 0x2e0, 4, 0x7ec, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__GPIO3_6				IOMUX_PAD(0x744, 0x2e0, 5, 0x90c, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__IPU_DISPB_D12_VSYNC		IOMUX_PAD(0x744, 0x2e0, 6, 0x928, 5, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_CLK__ARM11P_TOP_EVNTBUS_0		IOMUX_PAD(0x744, 0x2e0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RX_CLK__FEC_RX_CLK				IOMUX_PAD(0x748, 0x2e4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__ESDHC1_DAT5			IOMUX_PAD(0x748, 0x2e4, 1, 0x808, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__UART3_TXD_MUX			IOMUX_PAD(0x748, 0x2e4, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__USB_TOP_USBH2_STP			IOMUX_PAD(0x748, 0x2e4, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__CSPI2_MISO				IOMUX_PAD(0x748, 0x2e4, 4, 0x7e8, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__GPIO3_7				IOMUX_PAD(0x748, 0x2e4, 5, 0x910, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__IPU_DISPB_SD_D_I			IOMUX_PAD(0x748, 0x2e4, 6, 0x92c, 4, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_CLK__ARM11P_TOP_EVNTBUS_1		IOMUX_PAD(0x748, 0x2e4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RX_DV__FEC_RX_DV				IOMUX_PAD(0x74c, 0x2e8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__ESDHC1_DAT6				IOMUX_PAD(0x74c, 0x2e8, 1, 0x80c, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__UART3_RTS				IOMUX_PAD(0x74c, 0x2e8, 2, 0x99c, 2, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__USB_TOP_USBH2_NXT			IOMUX_PAD(0x74c, 0x2e8, 3, 0x9f0, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__CSPI2_SCLK				IOMUX_PAD(0x74c, 0x2e8, 4, 0x7e0, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__GPIO3_8				IOMUX_PAD(0x74c, 0x2e8, 5, 0x914, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__IPU_DISPB_SD_CLK			IOMUX_PAD(0x74c, 0x2e8, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_DV__ARM11P_TOP_EVNTBUS_2		IOMUX_PAD(0x74c, 0x2e8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_COL__FEC_COL				IOMUX_PAD(0x750, 0x2ec, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__ESDHC1_DAT7				IOMUX_PAD(0x750, 0x2ec, 1, 0x810, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__UART3_CTS				IOMUX_PAD(0x750, 0x2ec, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__USB_TOP_USBH2_DATA_0			IOMUX_PAD(0x750, 0x2ec, 3, 0x9cc, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__CSPI2_RDY				IOMUX_PAD(0x750, 0x2ec, 4, 0x7e4, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__GPIO3_9				IOMUX_PAD(0x750, 0x2ec, 5, 0x918, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__IPU_DISPB_SER_RS			IOMUX_PAD(0x750, 0x2ec, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_COL__ARM11P_TOP_EVNTBUS_3			IOMUX_PAD(0x750, 0x2ec, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RDATA0__FEC_RDATA_0			IOMUX_PAD(0x754, 0x2f0, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__PWM_PWMO				IOMUX_PAD(0x754, 0x2f0, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__UART3_DTR				IOMUX_PAD(0x754, 0x2f0, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__USB_TOP_USBH2_DATA_1		IOMUX_PAD(0x754, 0x2f0, 3, 0x9d0, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__CSPI2_SS0				IOMUX_PAD(0x754, 0x2f0, 4, 0x7f0, 2, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__GPIO3_10				IOMUX_PAD(0x754, 0x2f0, 5, 0x8ec, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__IPU_DISPB_CS1			IOMUX_PAD(0x754, 0x2f0, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA0__ARM11P_TOP_EVNTBUS_4		IOMUX_PAD(0x754, 0x2f0, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TDATA0__FEC_TDATA_0			IOMUX_PAD(0x758, 0x2f4, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__SPDIF_SPDIF_OUT1			IOMUX_PAD(0x758, 0x2f4, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__UART3_DSR				IOMUX_PAD(0x758, 0x2f4, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__USB_TOP_USBH2_DATA_2		IOMUX_PAD(0x758, 0x2f4, 3, 0x9d4, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__CSPI2_SS1				IOMUX_PAD(0x758, 0x2f4, 4, 0x7f4, 2, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__GPIO3_11				IOMUX_PAD(0x758, 0x2f4, 5, 0x8f0, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__IPU_DISPB_CS0			IOMUX_PAD(0x758, 0x2f4, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA0__ARM11P_TOP_EVNTBUS_5		IOMUX_PAD(0x758, 0x2f4, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TX_EN__FEC_TX_EN				IOMUX_PAD(0x75c, 0x2f8, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_EN__SPDIF_SPDIF_IN1			IOMUX_PAD(0x75c, 0x2f8, 1, 0x998, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_EN__UART3_RI				IOMUX_PAD(0x75c, 0x2f8, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_EN__USB_TOP_USBH2_DATA_3		IOMUX_PAD(0x75c, 0x2f8, 3, 0x9d8, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_EN__GPIO3_12				IOMUX_PAD(0x75c, 0x2f8, 5, 0x8f4, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_EN__IPU_DISPB_PAR_RS			IOMUX_PAD(0x75c, 0x2f8, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_EN__ARM11P_TOP_EVNTBUS_6		IOMUX_PAD(0x75c, 0x2f8, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_MDC__FEC_MDC				IOMUX_PAD(0x760, 0x2fc, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDC__CAN2_TXCAN				IOMUX_PAD(0x760, 0x2fc, 1, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDC__UART3_DCD				IOMUX_PAD(0x760, 0x2fc, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDC__USB_TOP_USBH2_DATA_4			IOMUX_PAD(0x760, 0x2fc, 3, 0x9dc, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDC__GPIO3_13				IOMUX_PAD(0x760, 0x2fc, 5, 0x8f8, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDC__IPU_DISPB_WR				IOMUX_PAD(0x760, 0x2fc, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDC__ARM11P_TOP_EVNTBUS_7			IOMUX_PAD(0x760, 0x2fc, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_MDIO__FEC_MDIO				IOMUX_PAD(0x764, 0x300, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDIO__CAN2_RXCAN				IOMUX_PAD(0x764, 0x300, 1, 0x7cc, 2, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDIO__USB_TOP_USBH2_DATA_5			IOMUX_PAD(0x764, 0x300, 3, 0x9e0, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDIO__GPIO3_14				IOMUX_PAD(0x764, 0x300, 5, 0x8fc, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDIO__IPU_DISPB_RD				IOMUX_PAD(0x764, 0x300, 6, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_MDIO__ARM11P_TOP_EVNTBUS_8			IOMUX_PAD(0x764, 0x300, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TX_ERR__FEC_TX_ERR				IOMUX_PAD(0x768, 0x304, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_ERR__OWIRE_LINE				IOMUX_PAD(0x768, 0x304, 1, 0x990, 2, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_ERR__SPDIF_SPDIF_EXTCLK			IOMUX_PAD(0x768, 0x304, 2, 0x994, 4, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_ERR__USB_TOP_USBH2_DATA_6		IOMUX_PAD(0x768, 0x304, 3, 0x9e4, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_ERR__GPIO3_15				IOMUX_PAD(0x768, 0x304, 5, 0x900, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_ERR__IPU_DISPB_D0_VSYNC			IOMUX_PAD(0x768, 0x304, 6, 0x924, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TX_ERR__ARM11P_TOP_EVNTBUS_9		IOMUX_PAD(0x768, 0x304, 7, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RX_ERR__FEC_RX_ERR				IOMUX_PAD(0x76c, 0x308, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_ERR__IPU_CSI_D_0			IOMUX_PAD(0x76c, 0x308, 1, 0x930, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_ERR__USB_TOP_USBH2_DATA_7		IOMUX_PAD(0x76c, 0x308, 3, 0x9e8, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_ERR__KPP_COL_4				IOMUX_PAD(0x76c, 0x308, 4, 0x960, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_ERR__GPIO3_16				IOMUX_PAD(0x76c, 0x308, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RX_ERR__IPU_DISPB_SD_D_IO			IOMUX_PAD(0x76c, 0x308, 6, 0x92c, 5, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_CRS__FEC_CRS				IOMUX_PAD(0x770, 0x30c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_CRS__IPU_CSI_D_1				IOMUX_PAD(0x770, 0x30c, 1, 0x934, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_CRS__USB_TOP_USBH2_PWR			IOMUX_PAD(0x770, 0x30c, 3, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_CRS__KPP_COL_5				IOMUX_PAD(0x770, 0x30c, 4, 0x964, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_CRS__GPIO3_17				IOMUX_PAD(0x770, 0x30c, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_CRS__IPU_FLASH_STROBE			IOMUX_PAD(0x770, 0x30c, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RDATA1__FEC_RDATA_1			IOMUX_PAD(0x774, 0x310, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA1__IPU_CSI_D_2			IOMUX_PAD(0x774, 0x310, 1, 0x938, 4, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA1__AUDMUX_AUD6_RXC			IOMUX_PAD(0x774, 0x310, 2, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA1__USB_TOP_USBH2_OC			IOMUX_PAD(0x774, 0x310, 3, 0x9f4, 2, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA1__KPP_COL_6				IOMUX_PAD(0x774, 0x310, 4, 0x968, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA1__GPIO3_18				IOMUX_PAD(0x774, 0x310, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA1__IPU_DISPB_BE0			IOMUX_PAD(0x774, 0x310, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TDATA1__FEC_TDATA_1			IOMUX_PAD(0x778, 0x314, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA1__IPU_CSI_D_3			IOMUX_PAD(0x778, 0x314, 1, 0x93c, 4, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA1__AUDMUX_AUD6_RXFS			IOMUX_PAD(0x778, 0x314, 2, 0x7bc, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA1__KPP_COL_7				IOMUX_PAD(0x778, 0x314, 4, 0x96c, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA1__GPIO3_19				IOMUX_PAD(0x778, 0x314, 5, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA1__IPU_DISPB_BE1			IOMUX_PAD(0x778, 0x314, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RDATA2__FEC_RDATA_2			IOMUX_PAD(0x77c, 0x318, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA2__IPU_CSI_D_4			IOMUX_PAD(0x77c, 0x318, 1, 0x940, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA2__AUDMUX_AUD6_TXD			IOMUX_PAD(0x77c, 0x318, 2, 0x7b4, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA2__KPP_ROW_4				IOMUX_PAD(0x77c, 0x318, 4, 0x980, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA2__GPIO3_20				IOMUX_PAD(0x77c, 0x318, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TDATA2__FEC_TDATA_2			IOMUX_PAD(0x780, 0x31c, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA2__IPU_CSI_D_5			IOMUX_PAD(0x780, 0x31c, 1, 0x944, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA2__AUDMUX_AUD6_RXD			IOMUX_PAD(0x780, 0x31c, 2, 0x7b0, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA2__KPP_ROW_5				IOMUX_PAD(0x780, 0x31c, 4, 0x984, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA2__GPIO3_21				IOMUX_PAD(0x780, 0x31c, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_RDATA3__FEC_RDATA_3			IOMUX_PAD(0x784, 0x320, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA3__IPU_CSI_D_6			IOMUX_PAD(0x784, 0x320, 1, 0x948, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA3__AUDMUX_AUD6_TXC			IOMUX_PAD(0x784, 0x320, 2, 0x7c0, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA3__KPP_ROW_6				IOMUX_PAD(0x784, 0x320, 4, 0x988, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_RDATA3__GPIO3_22				IOMUX_PAD(0x784, 0x320, 6, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_FEC_TDATA3__FEC_TDATA_3			IOMUX_PAD(0x788, 0x324, 0, 0x0,   0, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA3__IPU_CSI_D_7			IOMUX_PAD(0x788, 0x324, 1, 0x94c, 3, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA3__AUDMUX_AUD6_TXFS			IOMUX_PAD(0x788, 0x324, 2, 0x7c4, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA3__KPP_ROW_7				IOMUX_PAD(0x788, 0x324, 4, 0x98c, 1, NO_PAD_CTRL)
+#define MX35_PAD_FEC_TDATA3__GPIO3_23				IOMUX_PAD(0x788, 0x324, 5, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_EXT_ARMCLK__CCM_EXT_ARMCLK			IOMUX_PAD(0x78c, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+#define MX35_PAD_TEST_MODE__TCU_TEST_MODE			IOMUX_PAD(0x790, 0x0,   0, 0x0,   0, NO_PAD_CTRL)
+
+
+#endif /* __MACH_IOMUX_MX35_H__ */
-- 
cgit v0.10.2


From 54df526819d3bfbc1af3f13aed7a3f9b9fdbdd04 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 6 Feb 2009 15:42:26 +0100
Subject: [ARM] MX35: Add PCM043 board support

The PCM043 is a i.MX35 based board from Phytec also known as the
phyCORE-i.MX35.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index 6d61ef0..0e7de94 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -67,4 +67,11 @@ config MACH_QONG
 	  Include support for Dave/DENX QongEVB-LITE platform. This includes
 	  specific configurations for the board and its peripherals.
 
+config MACH_PCM043
+	bool "Support Phytec pcm043 (i.MX35) platforms"
+	select ARCH_MX35
+	help
+	  Include support for Phytec pcm043 platform. This includes
+	  specific configurations for the board and its peripherals.
+
 endif
diff --git a/arch/arm/mach-mx3/Makefile b/arch/arm/mach-mx3/Makefile
index 272c8a9..cd6547b 100644
--- a/arch/arm/mach-mx3/Makefile
+++ b/arch/arm/mach-mx3/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_MACH_MX31_3DS)	+= mx31pdk.o
 obj-$(CONFIG_MACH_MX31MOBOARD)	+= mx31moboard.o mx31moboard-devboard.o \
 				   mx31moboard-marxbot.o
 obj-$(CONFIG_MACH_QONG)		+= qong.o
+obj-$(CONFIG_MACH_PCM043)	+= pcm043.o
diff --git a/arch/arm/mach-mx3/pcm043.c b/arch/arm/mach-mx3/pcm043.c
new file mode 100644
index 0000000..638b358
--- /dev/null
+++ b/arch/arm/mach-mx3/pcm043.c
@@ -0,0 +1,252 @@
+/*
+ *  Copyright (C) 2009 Sascha Hauer, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+#include <linux/platform_device.h>
+#include <linux/mtd/physmap.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/memory.h>
+#include <linux/gpio.h>
+#include <linux/smc911x.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/i2c/at24.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/mach/map.h>
+
+#include <mach/hardware.h>
+#include <mach/common.h>
+#include <mach/imx-uart.h>
+#if defined CONFIG_I2C_IMX || defined CONFIG_I2C_IMX_MODULE
+#include <mach/i2c.h>
+#endif
+#include <mach/iomux-mx35.h>
+#include <mach/ipu.h>
+#include <mach/mx3fb.h>
+
+#include "devices.h"
+
+static const struct fb_videomode fb_modedb[] = {
+	{
+		/* 240x320 @ 60 Hz */
+		.name		= "Sharp-LQ035Q7",
+		.refresh	= 60,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 185925,
+		.left_margin	= 9,
+		.right_margin	= 16,
+		.upper_margin	= 7,
+		.lower_margin	= 9,
+		.hsync_len	= 1,
+		.vsync_len	= 1,
+		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_SHARP_MODE | FB_SYNC_CLK_INVERT | FB_SYNC_CLK_IDLE_EN,
+		.vmode		= FB_VMODE_NONINTERLACED,
+		.flag		= 0,
+	}, {
+		/* 240x320 @ 60 Hz */
+		.name		= "TX090",
+		.refresh	= 60,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 38255,
+		.left_margin	= 144,
+		.right_margin	= 0,
+		.upper_margin	= 7,
+		.lower_margin	= 40,
+		.hsync_len	= 96,
+		.vsync_len	= 1,
+		.sync		= FB_SYNC_VERT_HIGH_ACT | FB_SYNC_OE_ACT_HIGH,
+		.vmode		= FB_VMODE_NONINTERLACED,
+		.flag		= 0,
+	},
+};
+
+static struct ipu_platform_data mx3_ipu_data = {
+	.irq_base = MXC_IPU_IRQ_START,
+};
+
+static struct mx3fb_platform_data mx3fb_pdata = {
+	.dma_dev	= &mx3_ipu.dev,
+	.name		= "Sharp-LQ035Q7",
+	.mode		= fb_modedb,
+	.num_modes	= ARRAY_SIZE(fb_modedb),
+};
+
+static struct physmap_flash_data pcm043_flash_data = {
+	.width  = 2,
+};
+
+static struct resource pcm043_flash_resource = {
+	.start	= 0xa0000000,
+	.end	= 0xa1ffffff,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct platform_device pcm043_flash = {
+	.name	= "physmap-flash",
+	.id	= 0,
+	.dev	= {
+		.platform_data  = &pcm043_flash_data,
+	},
+	.resource = &pcm043_flash_resource,
+	.num_resources = 1,
+};
+
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+#if defined CONFIG_I2C_IMX || defined CONFIG_I2C_IMX_MODULE
+static struct imxi2c_platform_data pcm043_i2c_1_data = {
+	.bitrate = 50000,
+};
+
+static struct at24_platform_data board_eeprom = {
+	.byte_len = 4096,
+	.page_size = 32,
+	.flags = AT24_FLAG_ADDR16,
+};
+
+static struct i2c_board_info pcm043_i2c_devices[] = {
+       {
+		I2C_BOARD_INFO("at24", 0x52), /* E0=0, E1=1, E2=0 */
+		.platform_data = &board_eeprom,
+	}, {
+		I2C_BOARD_INFO("rtc-pcf8563", 0x51),
+		.type = "pcf8563",
+	}
+};
+#endif
+
+static struct platform_device *devices[] __initdata = {
+	&pcm043_flash,
+	&mxc_fec_device,
+};
+
+static struct pad_desc pcm043_pads[] = {
+	/* UART1 */
+	MX35_PAD_CTS1__UART1_CTS,
+	MX35_PAD_RTS1__UART1_RTS,
+	MX35_PAD_TXD1__UART1_TXD_MUX,
+	MX35_PAD_RXD1__UART1_RXD_MUX,
+	/* UART2 */
+	MX35_PAD_CTS2__UART2_CTS,
+	MX35_PAD_RTS2__UART2_RTS,
+	MX35_PAD_TXD2__UART2_TXD_MUX,
+	MX35_PAD_RXD2__UART2_RXD_MUX,
+	/* FEC */
+	MX35_PAD_FEC_TX_CLK__FEC_TX_CLK,
+	MX35_PAD_FEC_RX_CLK__FEC_RX_CLK,
+	MX35_PAD_FEC_RX_DV__FEC_RX_DV,
+	MX35_PAD_FEC_COL__FEC_COL,
+	MX35_PAD_FEC_RDATA0__FEC_RDATA_0,
+	MX35_PAD_FEC_TDATA0__FEC_TDATA_0,
+	MX35_PAD_FEC_TX_EN__FEC_TX_EN,
+	MX35_PAD_FEC_MDC__FEC_MDC,
+	MX35_PAD_FEC_MDIO__FEC_MDIO,
+	MX35_PAD_FEC_TX_ERR__FEC_TX_ERR,
+	MX35_PAD_FEC_RX_ERR__FEC_RX_ERR,
+	MX35_PAD_FEC_CRS__FEC_CRS,
+	MX35_PAD_FEC_RDATA1__FEC_RDATA_1,
+	MX35_PAD_FEC_TDATA1__FEC_TDATA_1,
+	MX35_PAD_FEC_RDATA2__FEC_RDATA_2,
+	MX35_PAD_FEC_TDATA2__FEC_TDATA_2,
+	MX35_PAD_FEC_RDATA3__FEC_RDATA_3,
+	MX35_PAD_FEC_TDATA3__FEC_TDATA_3,
+	/* I2C1 */
+	MX35_PAD_I2C1_CLK__I2C1_SCL,
+	MX35_PAD_I2C1_DAT__I2C1_SDA,
+	/* Display */
+	MX35_PAD_LD0__IPU_DISPB_DAT_0,
+	MX35_PAD_LD1__IPU_DISPB_DAT_1,
+	MX35_PAD_LD2__IPU_DISPB_DAT_2,
+	MX35_PAD_LD3__IPU_DISPB_DAT_3,
+	MX35_PAD_LD4__IPU_DISPB_DAT_4,
+	MX35_PAD_LD5__IPU_DISPB_DAT_5,
+	MX35_PAD_LD6__IPU_DISPB_DAT_6,
+	MX35_PAD_LD7__IPU_DISPB_DAT_7,
+	MX35_PAD_LD8__IPU_DISPB_DAT_8,
+	MX35_PAD_LD9__IPU_DISPB_DAT_9,
+	MX35_PAD_LD10__IPU_DISPB_DAT_10,
+	MX35_PAD_LD11__IPU_DISPB_DAT_11,
+	MX35_PAD_LD12__IPU_DISPB_DAT_12,
+	MX35_PAD_LD13__IPU_DISPB_DAT_13,
+	MX35_PAD_LD14__IPU_DISPB_DAT_14,
+	MX35_PAD_LD15__IPU_DISPB_DAT_15,
+	MX35_PAD_LD16__IPU_DISPB_DAT_16,
+	MX35_PAD_LD17__IPU_DISPB_DAT_17,
+	MX35_PAD_D3_HSYNC__IPU_DISPB_D3_HSYNC,
+	MX35_PAD_D3_FPSHIFT__IPU_DISPB_D3_CLK,
+	MX35_PAD_D3_DRDY__IPU_DISPB_D3_DRDY,
+	MX35_PAD_CONTRAST__IPU_DISPB_CONTR,
+	MX35_PAD_D3_VSYNC__IPU_DISPB_D3_VSYNC,
+	MX35_PAD_D3_REV__IPU_DISPB_D3_REV,
+	MX35_PAD_D3_CLS__IPU_DISPB_D3_CLS,
+	MX35_PAD_D3_SPL__IPU_DISPB_D3_SPL
+};
+
+/*
+ * Board specific initialization.
+ */
+static void __init mxc_board_init(void)
+{
+	mxc_iomux_v3_setup_multiple_pads(pcm043_pads, ARRAY_SIZE(pcm043_pads));
+
+	platform_add_devices(devices, ARRAY_SIZE(devices));
+
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+
+	mxc_register_device(&mxc_uart_device1, &uart_pdata);
+
+#if defined CONFIG_I2C_IMX || defined CONFIG_I2C_IMX_MODULE
+	i2c_register_board_info(0, pcm043_i2c_devices,
+			ARRAY_SIZE(pcm043_i2c_devices));
+
+	mxc_register_device(&mxc_i2c_device0, &pcm043_i2c_1_data);
+#endif
+
+	mxc_register_device(&mx3_ipu, &mx3_ipu_data);
+	mxc_register_device(&mx3_fb, &mx3fb_pdata);
+}
+
+static void __init pcm043_timer_init(void)
+{
+	mx35_clocks_init();
+}
+
+struct sys_timer pcm043_timer = {
+	.init	= pcm043_timer_init,
+};
+
+MACHINE_START(PCM043, "Phytec Phycore pcm043")
+	/* Maintainer: Pengutronix */
+	.phys_io	= AIPS1_BASE_ADDR,
+	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params    = PHYS_OFFSET + 0x100,
+	.map_io         = mxc_map_io,
+	.init_irq       = mxc_init_irq,
+	.init_machine   = mxc_board_init,
+	.timer          = &pcm043_timer,
+MACHINE_END
+
diff --git a/arch/arm/plat-mxc/include/mach/board-pcm043.h b/arch/arm/plat-mxc/include/mach/board-pcm043.h
new file mode 100644
index 0000000..15fbdf1
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-pcm043.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (C) 2008 Sascha Hauer, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_PCM043_H__
+#define __ASM_ARCH_MXC_BOARD_PCM043_H__
+
+/* mandatory for CONFIG_LL_DEBUG */
+
+#define MXC_LL_UART_PADDR	UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
+
+#endif /* __ASM_ARCH_MXC_BOARD_PCM043_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S
index 4f77314..4e7c8d2 100644
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -34,6 +34,9 @@
 #ifdef CONFIG_MACH_QONG
 #include <mach/board-qong.h>
 #endif
+#ifdef CONFIG_MACH_PCM043
+#include <mach/board-pcm043.h>
+#endif
 		.macro	addruart,rx
 		mrc	p15, 0, \rx, c1, c0
 		tst	\rx, #1			@ MMU enabled?
-- 
cgit v0.10.2


From d16caf69ce26dbd2ec3ae4e0072c2a60f1896c89 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 23 Mar 2009 10:07:48 +0100
Subject: i.MX35 clock support: remove automotive path

It is no longer present in newer cores. Unfortunately Freescale decided
to put the bit which decides between automotive clock path and consumer
clock path in the automotive clock path direction. With current code we
cannot detect the core revision, so just remove automotive path completely.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 53a112d..db4aba7 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -147,34 +147,16 @@ static struct arm_ahb_div clk_consumer[] = {
 	{ .arm = 0, .ahb = 0, .sel = 0},
 };
 
-static struct arm_ahb_div clk_automotive[] = {
-	{ .arm = 1, .ahb = 3, .sel = 0},
-	{ .arm = 1, .ahb = 2, .sel = 1},
-	{ .arm = 2, .ahb = 1, .sel = 1},
-	{ .arm = 0, .ahb = 0, .sel = 0},
-	{ .arm = 1, .ahb = 6, .sel = 0},
-	{ .arm = 1, .ahb = 4, .sel = 1},
-	{ .arm = 2, .ahb = 2, .sel = 1},
-	{ .arm = 0, .ahb = 0, .sel = 0},
-};
-
 static unsigned long get_rate_arm(void)
 {
 	unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0);
 	struct arm_ahb_div *aad;
 	unsigned long fref = get_rate_mpll();
 
-	if (pdr0 & 1) {
-		/* consumer path */
-		aad = &clk_consumer[(pdr0 >> 16) & 0xf];
-		if (aad->sel)
-			fref = fref * 2 / 3;
-	} else {
-		/* auto path */
-		aad = &clk_automotive[(pdr0 >> 9) & 0x7];
-		if (aad->sel)
-			fref = fref * 3 / 4;
-	}
+	aad = &clk_consumer[(pdr0 >> 16) & 0xf];
+	if (aad->sel)
+		fref = fref * 2 / 3;
+
 	return fref / aad->arm;
 }
 
@@ -184,12 +166,7 @@ static unsigned long get_rate_ahb(struct clk *clk)
 	struct arm_ahb_div *aad;
 	unsigned long fref = get_rate_mpll();
 
-	if (pdr0 & 1)
-		/* consumer path */
-		aad = &clk_consumer[(pdr0 >> 16) & 0xf];
-	else
-		/* auto path */
-		aad = &clk_automotive[(pdr0 >> 9) & 0x7];
+	aad = &clk_consumer[(pdr0 >> 16) & 0xf];
 
 	return fref / aad->ahb;
 }
-- 
cgit v0.10.2


From 01ac7d584d7299c787bb64067231a06d3b5e52e3 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 13 Mar 2009 19:52:41 +0100
Subject: pcm037: setup all pins at once and not in init functions

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index b5227d8..c26919d 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -46,6 +46,76 @@
 
 #include "devices.h"
 
+static unsigned int pcm037_pins[] = {
+	/* I2C */
+	MX31_PIN_CSPI2_MOSI__SCL,
+	MX31_PIN_CSPI2_MISO__SDA,
+	/* SDHC1 */
+	MX31_PIN_SD1_DATA3__SD1_DATA3,
+	MX31_PIN_SD1_DATA2__SD1_DATA2,
+	MX31_PIN_SD1_DATA1__SD1_DATA1,
+	MX31_PIN_SD1_DATA0__SD1_DATA0,
+	MX31_PIN_SD1_CLK__SD1_CLK,
+	MX31_PIN_SD1_CMD__SD1_CMD,
+	IOMUX_MODE(MX31_PIN_SCK6, IOMUX_CONFIG_GPIO), /* card detect */
+	IOMUX_MODE(MX31_PIN_SFS6, IOMUX_CONFIG_GPIO), /* write protect */
+	/* SPI1 */
+	MX31_PIN_CSPI1_MOSI__MOSI,
+	MX31_PIN_CSPI1_MISO__MISO,
+	MX31_PIN_CSPI1_SCLK__SCLK,
+	MX31_PIN_CSPI1_SPI_RDY__SPI_RDY,
+	MX31_PIN_CSPI1_SS0__SS0,
+	MX31_PIN_CSPI1_SS1__SS1,
+	MX31_PIN_CSPI1_SS2__SS2,
+	/* UART1 */
+	MX31_PIN_CTS1__CTS1,
+	MX31_PIN_RTS1__RTS1,
+	MX31_PIN_TXD1__TXD1,
+	MX31_PIN_RXD1__RXD1,
+	/* UART2 */
+	MX31_PIN_TXD2__TXD2,
+	MX31_PIN_RXD2__RXD2,
+	MX31_PIN_CTS2__CTS2,
+	MX31_PIN_RTS2__RTS2,
+	/* UART3 */
+	MX31_PIN_CSPI3_MOSI__RXD3,
+	MX31_PIN_CSPI3_MISO__TXD3,
+	MX31_PIN_CSPI3_SCLK__RTS3,
+	MX31_PIN_CSPI3_SPI_RDY__CTS3,
+	/* LAN9217 irq pin */
+	IOMUX_MODE(MX31_PIN_GPIO3_1, IOMUX_CONFIG_GPIO),
+	/* Onewire */
+	MX31_PIN_BATT_LINE__OWIRE,
+	/* Framebuffer */
+	MX31_PIN_LD0__LD0,
+	MX31_PIN_LD1__LD1,
+	MX31_PIN_LD2__LD2,
+	MX31_PIN_LD3__LD3,
+	MX31_PIN_LD4__LD4,
+	MX31_PIN_LD5__LD5,
+	MX31_PIN_LD6__LD6,
+	MX31_PIN_LD7__LD7,
+	MX31_PIN_LD8__LD8,
+	MX31_PIN_LD9__LD9,
+	MX31_PIN_LD10__LD10,
+	MX31_PIN_LD11__LD11,
+	MX31_PIN_LD12__LD12,
+	MX31_PIN_LD13__LD13,
+	MX31_PIN_LD14__LD14,
+	MX31_PIN_LD15__LD15,
+	MX31_PIN_LD16__LD16,
+	MX31_PIN_LD17__LD17,
+	MX31_PIN_VSYNC3__VSYNC3,
+	MX31_PIN_HSYNC__HSYNC,
+	MX31_PIN_FPSHIFT__FPSHIFT,
+	MX31_PIN_DRDY0__DRDY0,
+	MX31_PIN_D3_REV__D3_REV,
+	MX31_PIN_CONTRAST__CONTRAST,
+	MX31_PIN_D3_SPL__D3_SPL,
+	MX31_PIN_D3_CLS__D3_CLS,
+	MX31_PIN_LCS0__GPI03_23,
+};
+
 static struct physmap_flash_data pcm037_flash_data = {
 	.width  = 2,
 };
@@ -127,26 +197,8 @@ static struct mxc_nand_platform_data pcm037_nand_board_info = {
 };
 
 #ifdef CONFIG_I2C_IMX
-static int i2c_1_pins[] = {
-	MX31_PIN_CSPI2_MOSI__SCL,
-	MX31_PIN_CSPI2_MISO__SDA,
-};
-
-static int pcm037_i2c_1_init(struct device *dev)
-{
-	return mxc_iomux_setup_multiple_pins(i2c_1_pins, ARRAY_SIZE(i2c_1_pins),
-			"i2c-1");
-}
-
-static void pcm037_i2c_1_exit(struct device *dev)
-{
-	mxc_iomux_release_multiple_pins(i2c_1_pins, ARRAY_SIZE(i2c_1_pins));
-}
-
 static struct imxi2c_platform_data pcm037_i2c_1_data = {
 	.bitrate = 100000,
-	.init = pcm037_i2c_1_init,
-	.exit = pcm037_i2c_1_exit,
 };
 
 static struct at24_platform_data board_eeprom = {
@@ -166,24 +218,13 @@ static struct i2c_board_info pcm037_i2c_devices[] = {
 };
 #endif
 
-static int sdhc1_pins[] = {
-	MX31_PIN_SD1_DATA3__SD1_DATA3,
-	MX31_PIN_SD1_DATA2__SD1_DATA2,
-	MX31_PIN_SD1_DATA1__SD1_DATA1,
-	MX31_PIN_SD1_DATA0__SD1_DATA0,
-	MX31_PIN_SD1_CLK__SD1_CLK,
-	MX31_PIN_SD1_CMD__SD1_CMD,
-};
-
 static int pcm970_sdhc1_init(struct device *dev, irq_handler_t h, void *data)
 {
-	return mxc_iomux_setup_multiple_pins(sdhc1_pins, ARRAY_SIZE(sdhc1_pins),
-				"sdhc-1");
+	return 0;
 }
 
 static void pcm970_sdhc1_exit(struct device *dev, void *data)
 {
-	mxc_iomux_release_multiple_pins(sdhc1_pins, ARRAY_SIZE(sdhc1_pins));
 }
 
 /* No card and rw detection at the moment */
@@ -198,38 +239,23 @@ static struct platform_device *devices[] __initdata = {
 	&pcm037_sram_device,
 };
 
-static int uart0_pins[] = {
-	MX31_PIN_CTS1__CTS1,
-	MX31_PIN_RTS1__RTS1,
-	MX31_PIN_TXD1__TXD1,
-	MX31_PIN_RXD1__RXD1
-};
-
-static int uart2_pins[] = {
-	MX31_PIN_CSPI3_MOSI__RXD3,
-	MX31_PIN_CSPI3_MISO__TXD3
-};
-
 /*
  * Board specific initialization.
  */
 static void __init mxc_board_init(void)
 {
+	mxc_iomux_setup_multiple_pins(pcm037_pins, ARRAY_SIZE(pcm037_pins),
+			"pcm037");
+
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 
-	mxc_iomux_setup_multiple_pins(uart0_pins, ARRAY_SIZE(uart0_pins), "uart-0");
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
-
-	mxc_iomux_setup_multiple_pins(uart2_pins, ARRAY_SIZE(uart2_pins), "uart-2");
 	mxc_register_device(&mxc_uart_device2, &uart_pdata);
 
-	mxc_iomux_setup_pin(MX31_PIN_BATT_LINE__OWIRE, "batt-0wire");
 	mxc_register_device(&mxc_w1_master_device, NULL);
 
 	/* LAN9217 IRQ pin */
-	if (!mxc_iomux_setup_pin(IOMUX_MODE(MX31_PIN_GPIO3_1, IOMUX_CONFIG_GPIO),
-				"pcm037-eth"))
-		gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_GPIO3_1));
+	gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_GPIO3_1));
 
 #ifdef CONFIG_I2C_IMX
 	i2c_register_board_info(1, pcm037_i2c_devices,
-- 
cgit v0.10.2


From dddd4a493dfa9ff2640ff683f04d0f3c8c287111 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 10 Mar 2009 11:56:10 +0100
Subject: pcm037: add SDHC card detection

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index c26919d..91495cf 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -28,6 +28,9 @@
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/i2c/at24.h>
+#include <linux/delay.h>
+#include <linux/spi/spi.h>
+#include <linux/irq.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -218,17 +221,41 @@ static struct i2c_board_info pcm037_i2c_devices[] = {
 };
 #endif
 
-static int pcm970_sdhc1_init(struct device *dev, irq_handler_t h, void *data)
+/* Not connected by default */
+#ifdef PCM970_SDHC_RW_SWITCH
+static int pcm970_sdhc1_get_ro(struct device *dev)
 {
-	return 0;
+	return gpio_get_value(IOMUX_TO_GPIO(MX31_PIN_SFS6));
+}
+#endif
+
+static int pcm970_sdhc1_init(struct device *dev, irq_handler_t detect_irq,
+		void *data)
+{
+	int ret;
+	int gpio_det, gpio_wp;
+
+	gpio_det = IOMUX_TO_GPIO(MX31_PIN_SCK6);
+	gpio_wp = IOMUX_TO_GPIO(MX31_PIN_SFS6);
+
+	gpio_direction_input(gpio_det);
+	gpio_direction_input(gpio_wp);
+
+	ret = request_irq(IOMUX_TO_IRQ(MX31_PIN_SCK6), detect_irq,
+			IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+				"sdhc-detect", data);
+	return ret;
 }
 
 static void pcm970_sdhc1_exit(struct device *dev, void *data)
 {
+	free_irq(IOMUX_TO_IRQ(MX31_PIN_SCK6), data);
 }
 
-/* No card and rw detection at the moment */
 static struct imxmmc_platform_data sdhc_pdata = {
+#ifdef PCM970_SDHC_RW_SWITCH
+	.get_ro = pcm970_sdhc1_get_ro,
+#endif
 	.init = pcm970_sdhc1_init,
 	.exit = pcm970_sdhc1_exit,
 };
-- 
cgit v0.10.2


From 76b6ea0aaa19fe94338bf2cd49a90d14976595ca Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 1 Apr 2009 10:09:18 +0200
Subject: MX35: Fix IPU/Framebuffer clock names

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index db4aba7..1814ba44d 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -407,7 +407,8 @@ static struct clk_lookup lookups[] __initdata = {
 	_REGISTER_CLOCK("imx-i2c.1", NULL, i2c2_clk)
 	_REGISTER_CLOCK("imx-i2c.2", NULL, i2c3_clk)
 	_REGISTER_CLOCK(NULL, "iomuxc", iomuxc_clk)
-	_REGISTER_CLOCK(NULL, "ipu", ipu_clk)
+	_REGISTER_CLOCK("ipu-core", NULL, ipu_clk)
+	_REGISTER_CLOCK("mx3_sdc_fb", NULL, ipu_clk)
 	_REGISTER_CLOCK(NULL, "kpp", kpp_clk)
 	_REGISTER_CLOCK(NULL, "mlb", mlb_clk)
 	_REGISTER_CLOCK(NULL, "mshc", mshc_clk)
-- 
cgit v0.10.2


From 13e9f61235616966f9f6caefd533046c19d35c92 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 30 Mar 2009 15:04:38 +0200
Subject: pcm037: Add support for UART2

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index 91495cf..bd475cc 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -277,6 +277,7 @@ static void __init mxc_board_init(void)
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	mxc_register_device(&mxc_uart_device1, &uart_pdata);
 	mxc_register_device(&mxc_uart_device2, &uart_pdata);
 
 	mxc_register_device(&mxc_w1_master_device, NULL);
-- 
cgit v0.10.2


From cd4a05f9df859e7cd2efa96e035444a3decb427a Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 2 Apr 2009 22:32:10 +0200
Subject: MXC: rename mxc_map_io to architecture specific versions

This allows us to have more mapping functions for more than one
i.MX architecture in the kernel. As this is the earliest board
specific hook we have, also use it to set the cpu type.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx1/generic.c b/arch/arm/mach-mx1/generic.c
index 0dec6f3..9c3528d 100644
--- a/arch/arm/mach-mx1/generic.c
+++ b/arch/arm/mach-mx1/generic.c
@@ -37,7 +37,9 @@ static struct map_desc imx_io_desc[] __initdata = {
 	}
 };
 
-void __init mxc_map_io(void)
+void __init mx1_map_io(void)
 {
+	mxc_set_cpu_type(MXC_CPU_MX1);
+
 	iotable_init(imx_io_desc, ARRAY_SIZE(imx_io_desc));
 }
diff --git a/arch/arm/mach-mx1/mx1ads.c b/arch/arm/mach-mx1/mx1ads.c
index e54057f..7a648c1 100644
--- a/arch/arm/mach-mx1/mx1ads.c
+++ b/arch/arm/mach-mx1/mx1ads.c
@@ -198,7 +198,7 @@ MACHINE_START(MXLADS, "Freescale MXLADS")
 	.phys_io	= IMX_IO_PHYS,
 	.io_pg_offst	= (IMX_IO_BASE >> 18) & 0xfffc,
 	.boot_params	= PHYS_OFFSET + 0x100,
-	.map_io		= mxc_map_io,
+	.map_io		= mx1_map_io,
 	.init_irq	= mxc_init_irq,
 	.timer		= &mx1ads_timer,
 	.init_machine	= mx1ads_init,
diff --git a/arch/arm/mach-mx1/scb9328.c b/arch/arm/mach-mx1/scb9328.c
index 0e71f3f..20e0b5b 100644
--- a/arch/arm/mach-mx1/scb9328.c
+++ b/arch/arm/mach-mx1/scb9328.c
@@ -153,7 +153,7 @@ MACHINE_START(SCB9328, "Synertronixx scb9328")
 	.phys_io	= 0x00200000,
 	.io_pg_offst	= ((0xe0200000) >> 18) & 0xfffc,
 	.boot_params	= 0x08000100,
-	.map_io		= mxc_map_io,
+	.map_io		= mx1_map_io,
 	.init_irq	= mxc_init_irq,
 	.timer		= &scb9328_timer,
 	.init_machine	= scb9328_init,
diff --git a/arch/arm/mach-mx2/generic.c b/arch/arm/mach-mx2/generic.c
index bd51dd0..169372f 100644
--- a/arch/arm/mach-mx2/generic.c
+++ b/arch/arm/mach-mx2/generic.c
@@ -69,7 +69,17 @@ static struct map_desc mxc_io_desc[] __initdata = {
  * system startup to create static physical to virtual
  * memory map for the IO modules.
  */
-void __init mxc_map_io(void)
+void __init mx21_map_io(void)
 {
+	mxc_set_cpu_type(MXC_CPU_MX21);
+
 	iotable_init(mxc_io_desc, ARRAY_SIZE(mxc_io_desc));
 }
+
+void __init mx27_map_io(void)
+{
+	mxc_set_cpu_type(MXC_CPU_MX27);
+
+	iotable_init(mxc_io_desc, ARRAY_SIZE(mxc_io_desc));
+}
+
diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index 4a3b097..f4ad932 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -277,7 +277,7 @@ static struct map_desc mx27ads_io_desc[] __initdata = {
 
 static void __init mx27ads_map_io(void)
 {
-	mxc_map_io();
+	mx27_map_io();
 	iotable_init(mx27ads_io_desc, ARRAY_SIZE(mx27ads_io_desc));
 }
 
diff --git a/arch/arm/mach-mx2/pcm038.c b/arch/arm/mach-mx2/pcm038.c
index aa4eaa6..732937c 100644
--- a/arch/arm/mach-mx2/pcm038.c
+++ b/arch/arm/mach-mx2/pcm038.c
@@ -295,7 +295,7 @@ MACHINE_START(PCM038, "phyCORE-i.MX27")
 	.phys_io        = AIPI_BASE_ADDR,
 	.io_pg_offst    = ((AIPI_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx27_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = pcm038_init,
 	.timer          = &pcm038_timer,
diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 1814ba44d..0d76521 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -440,8 +440,6 @@ int __init mx35_clocks_init()
 	int i;
 	unsigned int ll = 0;
 
-	mxc_set_cpu_type(MXC_CPU_MX35);
-
 #ifdef CONFIG_DEBUG_LL_CONSOLE
 	ll = (3 << 16);
 #endif
diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c
index 9957a11..09cbd4a 100644
--- a/arch/arm/mach-mx3/clock.c
+++ b/arch/arm/mach-mx3/clock.c
@@ -566,8 +566,6 @@ int __init mx31_clocks_init(unsigned long fref)
 	u32 reg;
 	int i;
 
-	mxc_set_cpu_type(MXC_CPU_MX31);
-
 	ckih_rate = fref;
 
 	for (i = 0; i < ARRAY_SIZE(lookups); i++)
diff --git a/arch/arm/mach-mx3/mm.c b/arch/arm/mach-mx3/mm.c
index 9e1459c..1f5fdd4 100644
--- a/arch/arm/mach-mx3/mm.c
+++ b/arch/arm/mach-mx3/mm.c
@@ -72,8 +72,17 @@ static struct map_desc mxc_io_desc[] __initdata = {
  * system startup to create static physical to virtual memory mappings
  * for the IO modules.
  */
-void __init mxc_map_io(void)
+void __init mx31_map_io(void)
 {
+	mxc_set_cpu_type(MXC_CPU_MX31);
+
+	iotable_init(mxc_io_desc, ARRAY_SIZE(mxc_io_desc));
+}
+
+void __init mx35_map_io(void)
+{
+	mxc_set_cpu_type(MXC_CPU_MX35);
+
 	iotable_init(mxc_io_desc, ARRAY_SIZE(mxc_io_desc));
 }
 
diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c
index a6d6efe..db2e06a 100644
--- a/arch/arm/mach-mx3/mx31ads.c
+++ b/arch/arm/mach-mx3/mx31ads.c
@@ -511,7 +511,7 @@ static struct map_desc mx31ads_io_desc[] __initdata = {
  */
 static void __init mx31ads_map_io(void)
 {
-	mxc_map_io();
+	mx31_map_io();
 	iotable_init(mx31ads_io_desc, ARRAY_SIZE(mx31ads_io_desc));
 }
 
diff --git a/arch/arm/mach-mx3/mx31lite.c b/arch/arm/mach-mx3/mx31lite.c
index 894d98c..76b36da 100644
--- a/arch/arm/mach-mx3/mx31lite.c
+++ b/arch/arm/mach-mx3/mx31lite.c
@@ -59,7 +59,7 @@ static struct map_desc mx31lite_io_desc[] __initdata = {
  */
 void __init mx31lite_map_io(void)
 {
-	mxc_map_io();
+	mx31_map_io();
 	iotable_init(mx31lite_io_desc, ARRAY_SIZE(mx31lite_io_desc));
 }
 
diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index 34c2a1b..77168ed 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -117,7 +117,7 @@ MACHINE_START(MX31MOBOARD, "EPFL Mobots mx31moboard")
 	.phys_io	= AIPS1_BASE_ADDR,
 	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx31_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mxc_board_init,
 	.timer          = &mx31moboard_timer,
diff --git a/arch/arm/mach-mx3/mx31pdk.c b/arch/arm/mach-mx3/mx31pdk.c
index bc63f17..048084b 100644
--- a/arch/arm/mach-mx3/mx31pdk.c
+++ b/arch/arm/mach-mx3/mx31pdk.c
@@ -84,7 +84,7 @@ MACHINE_START(MX31_3DS, "Freescale MX31PDK (3DS)")
 	.phys_io	= AIPS1_BASE_ADDR,
 	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx31_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mxc_board_init,
 	.timer          = &mx31pdk_timer,
diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index bd475cc..6809891 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -309,7 +309,7 @@ MACHINE_START(PCM037, "Phytec Phycore pcm037")
 	.phys_io	= AIPS1_BASE_ADDR,
 	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx31_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mxc_board_init,
 	.timer          = &pcm037_timer,
diff --git a/arch/arm/mach-mx3/pcm043.c b/arch/arm/mach-mx3/pcm043.c
index 638b358..8d27c32 100644
--- a/arch/arm/mach-mx3/pcm043.c
+++ b/arch/arm/mach-mx3/pcm043.c
@@ -244,7 +244,7 @@ MACHINE_START(PCM043, "Phytec Phycore pcm043")
 	.phys_io	= AIPS1_BASE_ADDR,
 	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx35_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mxc_board_init,
 	.timer          = &pcm043_timer,
diff --git a/arch/arm/plat-mxc/include/mach/common.h b/arch/arm/plat-mxc/include/mach/common.h
index b2f9b72..02c3cd0 100644
--- a/arch/arm/plat-mxc/include/mach/common.h
+++ b/arch/arm/plat-mxc/include/mach/common.h
@@ -14,7 +14,11 @@
 struct platform_device;
 struct clk;
 
-extern void mxc_map_io(void);
+extern void mx1_map_io(void);
+extern void mx21_map_io(void);
+extern void mx27_map_io(void);
+extern void mx31_map_io(void);
+extern void mx35_map_io(void);
 extern void mxc_init_irq(void);
 extern void mxc_timer_init(struct clk *timer_clk);
 extern int mx1_clocks_init(unsigned long fref);
-- 
cgit v0.10.2


From ec996ba9b5cc2a085ca6938678317c4ff9c954d1 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 18 Feb 2009 20:58:40 +0100
Subject: mxc timer: make compile time independent

Currently we depend on hardcoded base addresses for the timer.
This prevents us from compiling in more than one i.MX architecture
at a time. This patch changes the base address to a runtime
calculated one.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/include/mach/mxc_timer.h b/arch/arm/plat-mxc/include/mach/mxc_timer.h
deleted file mode 100644
index 6c19a13..0000000
--- a/arch/arm/plat-mxc/include/mach/mxc_timer.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * mxc_timer.h
- *
- * Copyright (C) 2008 Juergen Beisert (kernel@pengutronix.de)
- *
- * Platform independent (i.MX1, i.MX2, i.MX3) definition for timer handling.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- */
-
-#ifndef __PLAT_MXC_TIMER_H
-#define __PLAT_MXC_TIMER_H
-
-#include <linux/clk.h>
-#include <mach/hardware.h>
-
-#ifdef CONFIG_ARCH_MX1
-#define TIMER_BASE		IO_ADDRESS(TIM1_BASE_ADDR)
-#define TIMER_INTERRUPT		TIM1_INT
-
-#define TCTL_VAL		TCTL_CLK_PCLK1
-#define TCTL_IRQEN		(1<<4)
-#define TCTL_FRR		(1<<8)
-#define TCTL_CLK_PCLK1		(1<<1)
-#define TCTL_CLK_PCLK1_4	(2<<1)
-#define TCTL_CLK_TIN		(3<<1)
-#define TCTL_CLK_32		(4<<1)
-
-#define MXC_TCTL   0x00
-#define MXC_TPRER  0x04
-#define MXC_TCMP   0x08
-#define MXC_TCR    0x0c
-#define MXC_TCN    0x10
-#define MXC_TSTAT  0x14
-#define TSTAT_CAPT		(1<<1)
-#define TSTAT_COMP		(1<<0)
-
-static inline void gpt_irq_disable(void)
-{
-	unsigned int tmp;
-
-	tmp = __raw_readl(TIMER_BASE + MXC_TCTL);
-	__raw_writel(tmp & ~TCTL_IRQEN, TIMER_BASE + MXC_TCTL);
-}
-
-static inline void gpt_irq_enable(void)
-{
-	__raw_writel(__raw_readl(TIMER_BASE + MXC_TCTL) | TCTL_IRQEN,
-				TIMER_BASE + MXC_TCTL);
-}
-
-static void gpt_irq_acknowledge(void)
-{
-	__raw_writel(0, TIMER_BASE + MXC_TSTAT);
-}
-#endif /* CONFIG_ARCH_MX1 */
-
-#ifdef CONFIG_ARCH_MX2
-#define TIMER_BASE		IO_ADDRESS(GPT1_BASE_ADDR)
-#define TIMER_INTERRUPT		MXC_INT_GPT1
-
-#define MXC_TCTL   0x00
-#define TCTL_VAL		TCTL_CLK_PCLK1
-#define TCTL_CLK_PCLK1		(1<<1)
-#define TCTL_CLK_PCLK1_4	(2<<1)
-#define TCTL_IRQEN		(1<<4)
-#define TCTL_FRR		(1<<8)
-#define MXC_TPRER  0x04
-#define MXC_TCMP   0x08
-#define MXC_TCR    0x0c
-#define MXC_TCN    0x10
-#define MXC_TSTAT  0x14
-#define TSTAT_CAPT		(1<<1)
-#define TSTAT_COMP		(1<<0)
-
-static inline void gpt_irq_disable(void)
-{
-	unsigned int tmp;
-
-	tmp = __raw_readl(TIMER_BASE + MXC_TCTL);
-	__raw_writel(tmp & ~TCTL_IRQEN, TIMER_BASE + MXC_TCTL);
-}
-
-static inline void gpt_irq_enable(void)
-{
-	__raw_writel(__raw_readl(TIMER_BASE + MXC_TCTL) | TCTL_IRQEN,
-				TIMER_BASE + MXC_TCTL);
-}
-
-static void gpt_irq_acknowledge(void)
-{
-	__raw_writel(TSTAT_CAPT | TSTAT_COMP, TIMER_BASE + MXC_TSTAT);
-}
-#endif /* CONFIG_ARCH_MX2 */
-
-#ifdef CONFIG_ARCH_MX3
-#define TIMER_BASE		IO_ADDRESS(GPT1_BASE_ADDR)
-#define TIMER_INTERRUPT		MXC_INT_GPT
-
-#define MXC_TCTL   0x00
-#define TCTL_VAL		(TCTL_CLK_IPG | TCTL_WAITEN)
-#define TCTL_CLK_IPG		(1<<6)
-#define TCTL_FRR		(1<<9)
-#define TCTL_WAITEN		(1<<3)
-
-#define MXC_TPRER  0x04
-#define MXC_TSTAT  0x08
-#define TSTAT_OF1		(1<<0)
-#define TSTAT_OF2		(1<<1)
-#define TSTAT_OF3		(1<<2)
-#define TSTAT_IF1		(1<<3)
-#define TSTAT_IF2		(1<<4)
-#define TSTAT_ROV		(1<<5)
-#define MXC_IR     0x0c
-#define MXC_TCMP   0x10
-#define MXC_TCMP2  0x14
-#define MXC_TCMP3  0x18
-#define MXC_TCR    0x1c
-#define MXC_TCN    0x24
-
-static inline void gpt_irq_disable(void)
-{
-	__raw_writel(0, TIMER_BASE + MXC_IR);
-}
-
-static inline void gpt_irq_enable(void)
-{
-	__raw_writel(1<<0, TIMER_BASE + MXC_IR);
-}
-
-static inline void gpt_irq_acknowledge(void)
-{
-	__raw_writel(TSTAT_OF1, TIMER_BASE + MXC_TSTAT);
-}
-#endif /* CONFIG_ARCH_MX3 */
-
-#define TCTL_SWR		(1<<15)
-#define TCTL_CC			(1<<10)
-#define TCTL_OM			(1<<9)
-#define TCTL_CAP_RIS		(1<<6)
-#define TCTL_CAP_FAL		(2<<6)
-#define TCTL_CAP_RIS_FAL	(3<<6)
-#define TCTL_CAP_ENA		(1<<5)
-#define TCTL_TEN		(1<<0)
-
-#endif
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index dab3357..85f002e 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -29,22 +29,85 @@
 #include <mach/hardware.h>
 #include <asm/mach/time.h>
 #include <mach/common.h>
-#include <mach/mxc_timer.h>
+
+/* defines common for all i.MX */
+#define MXC_TCTL		0x00
+#define MXC_TCTL_TEN		(1 << 0)
+#define MXC_TPRER		0x04
+
+/* MX1, MX21, MX27 */
+#define MX1_2_TCTL_CLK_PCLK1	(1 << 1)
+#define MX1_2_TCTL_IRQEN	(1 << 4)
+#define MX1_2_TCTL_FRR		(1 << 8)
+#define MX1_2_TCMP		0x08
+#define MX1_2_TCN		0x10
+#define MX1_2_TSTAT		0x14
+
+/* MX21, MX27 */
+#define MX2_TSTAT_CAPT		(1 << 1)
+#define MX2_TSTAT_COMP		(1 << 0)
+
+/* MX31, MX35 */
+#define MX3_TCTL_WAITEN		(1 << 3)
+#define MX3_TCTL_CLK_IPG	(1 << 6)
+#define MX3_TCTL_FRR		(1 << 9)
+#define MX3_IR			0x0c
+#define MX3_TSTAT		0x08
+#define MX3_TSTAT_OF1		(1 << 0)
+#define MX3_TCN			0x24
+#define MX3_TCMP		0x10
 
 static struct clock_event_device clockevent_mxc;
 static enum clock_event_mode clockevent_mode = CLOCK_EVT_MODE_UNUSED;
 
-/* clock source */
+static void __iomem *timer_base;
 
-static cycle_t mxc_get_cycles(struct clocksource *cs)
+static inline void gpt_irq_disable(void)
 {
-	return __raw_readl(TIMER_BASE + MXC_TCN);
+	unsigned int tmp;
+
+	if (cpu_is_mx3())
+		__raw_writel(0, timer_base + MX3_IR);
+	else {
+		tmp = __raw_readl(timer_base + MXC_TCTL);
+		__raw_writel(tmp & ~MX1_2_TCTL_IRQEN, timer_base + MXC_TCTL);
+	}
+}
+
+static inline void gpt_irq_enable(void)
+{
+	if (cpu_is_mx3())
+		__raw_writel(1<<0, timer_base + MX3_IR);
+	else {
+		__raw_writel(__raw_readl(timer_base + MXC_TCTL) | MX1_2_TCTL_IRQEN,
+			timer_base + MXC_TCTL);
+	}
+}
+
+static void gpt_irq_acknowledge(void)
+{
+	if (cpu_is_mx1())
+		__raw_writel(0, timer_base + MX1_2_TSTAT);
+	if (cpu_is_mx2())
+		__raw_writel(MX2_TSTAT_CAPT | MX2_TSTAT_COMP, timer_base + MX1_2_TSTAT);
+	if (cpu_is_mx3())
+		__raw_writel(MX3_TSTAT_OF1, timer_base + MX3_TSTAT);
+}
+
+static cycle_t mx1_2_get_cycles(struct clocksource *cs)
+{
+	return __raw_readl(timer_base + MX1_2_TCN);
+}
+
+static cycle_t mx3_get_cycles(struct clocksource *cs)
+{
+	return __raw_readl(timer_base + MX3_TCN);
 }
 
 static struct clocksource clocksource_mxc = {
 	.name 		= "mxc_timer1",
 	.rating		= 200,
-	.read		= mxc_get_cycles,
+	.read		= mx1_2_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
 	.shift 		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
@@ -54,6 +117,9 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
 {
 	unsigned int c = clk_get_rate(timer_clk);
 
+	if (cpu_is_mx3())
+		clocksource_mxc.read = mx3_get_cycles;
+
 	clocksource_mxc.mult = clocksource_hz2mult(c,
 					clocksource_mxc.shift);
 	clocksource_register(&clocksource_mxc);
@@ -63,15 +129,29 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
 
 /* clock event */
 
-static int mxc_set_next_event(unsigned long evt,
+static int mx1_2_set_next_event(unsigned long evt,
 			      struct clock_event_device *unused)
 {
 	unsigned long tcmp;
 
-	tcmp = __raw_readl(TIMER_BASE + MXC_TCN) + evt;
-	__raw_writel(tcmp, TIMER_BASE + MXC_TCMP);
+	tcmp = __raw_readl(timer_base + MX1_2_TCN) + evt;
 
-	return (int)(tcmp - __raw_readl(TIMER_BASE + MXC_TCN)) < 0 ?
+	__raw_writel(tcmp, timer_base + MX1_2_TCMP);
+
+	return (int)(tcmp - __raw_readl(timer_base + MX1_2_TCN)) < 0 ?
+				-ETIME : 0;
+}
+
+static int mx3_set_next_event(unsigned long evt,
+			      struct clock_event_device *unused)
+{
+	unsigned long tcmp;
+
+	tcmp = __raw_readl(timer_base + MX3_TCN) + evt;
+
+	__raw_writel(tcmp, timer_base + MX3_TCMP);
+
+	return (int)(tcmp - __raw_readl(timer_base + MX3_TCN)) < 0 ?
 				-ETIME : 0;
 }
 
@@ -100,8 +180,13 @@ static void mxc_set_mode(enum clock_event_mode mode,
 
 	if (mode != clockevent_mode) {
 		/* Set event time into far-far future */
-		__raw_writel(__raw_readl(TIMER_BASE + MXC_TCN) - 3,
-				TIMER_BASE + MXC_TCMP);
+		if (cpu_is_mx3())
+			__raw_writel(__raw_readl(timer_base + MX3_TCN) - 3,
+					timer_base + MX3_TCMP);
+		else
+			__raw_writel(__raw_readl(timer_base + MX1_2_TCN) - 3,
+					timer_base + MX1_2_TCMP);
+
 		/* Clear pending interrupt */
 		gpt_irq_acknowledge();
 	}
@@ -148,7 +233,10 @@ static irqreturn_t mxc_timer_interrupt(int irq, void *dev_id)
 	struct clock_event_device *evt = &clockevent_mxc;
 	uint32_t tstat;
 
-	tstat = __raw_readl(TIMER_BASE + MXC_TSTAT);
+	if (cpu_is_mx3())
+		tstat = __raw_readl(timer_base + MX1_2_TSTAT);
+	else
+		tstat = __raw_readl(timer_base + MX3_TSTAT);
 
 	gpt_irq_acknowledge();
 
@@ -168,7 +256,7 @@ static struct clock_event_device clockevent_mxc = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.set_mode	= mxc_set_mode,
-	.set_next_event	= mxc_set_next_event,
+	.set_next_event	= mx1_2_set_next_event,
 	.rating		= 200,
 };
 
@@ -176,6 +264,9 @@ static int __init mxc_clockevent_init(struct clk *timer_clk)
 {
 	unsigned int c = clk_get_rate(timer_clk);
 
+	if (cpu_is_mx3())
+		clockevent_mxc.set_next_event = mx3_set_next_event;
+
 	clockevent_mxc.mult = div_sc(c, NSEC_PER_SEC,
 					clockevent_mxc.shift);
 	clockevent_mxc.max_delta_ns =
@@ -192,23 +283,47 @@ static int __init mxc_clockevent_init(struct clk *timer_clk)
 
 void __init mxc_timer_init(struct clk *timer_clk)
 {
+	uint32_t tctl_val;
+	int irq;
+
 	clk_enable(timer_clk);
 
+	if (cpu_is_mx1()) {
+#ifdef CONFIG_ARCH_MX1
+		timer_base = IO_ADDRESS(TIM1_BASE_ADDR);
+		irq = TIM1_INT;
+#endif
+	} else if (cpu_is_mx2()) {
+#ifdef CONFIG_ARCH_MX2
+		timer_base = IO_ADDRESS(GPT1_BASE_ADDR);
+		irq = MXC_INT_GPT1;
+#endif
+	} else if (cpu_is_mx3()) {
+#ifdef CONFIG_ARCH_MX3
+		timer_base = IO_ADDRESS(GPT1_BASE_ADDR);
+		irq = MXC_INT_GPT;
+#endif
+	} else
+		BUG();
+
 	/*
 	 * Initialise to a known state (all timers off, and timing reset)
 	 */
-	__raw_writel(0, TIMER_BASE + MXC_TCTL);
-	__raw_writel(0, TIMER_BASE + MXC_TPRER); /* see datasheet note */
 
-	__raw_writel(TCTL_FRR |	/* free running */
-		     TCTL_VAL |	/* set clocksource and arch specific bits */
-		     TCTL_TEN,	/* start the timer */
-		     TIMER_BASE + MXC_TCTL);
+	__raw_writel(0, timer_base + MXC_TCTL);
+	__raw_writel(0, timer_base + MXC_TPRER); /* see datasheet note */
+
+	if (cpu_is_mx3())
+		tctl_val = MX3_TCTL_CLK_IPG | MX3_TCTL_FRR | MX3_TCTL_WAITEN | MXC_TCTL_TEN;
+	else
+		tctl_val = MX1_2_TCTL_FRR | MX1_2_TCTL_CLK_PCLK1 | MXC_TCTL_TEN;
+
+	__raw_writel(tctl_val, timer_base + MXC_TCTL);
 
 	/* init and register the timer to the framework */
 	mxc_clocksource_init(timer_clk);
 	mxc_clockevent_init(timer_clk);
 
 	/* Make irqs happen */
-	setup_irq(TIMER_INTERRUPT, &mxc_timer_irq);
+	setup_irq(irq, &mxc_timer_irq);
 }
-- 
cgit v0.10.2


From 84c9fa43040af08126326ac8b2a2b18f804eb1e8 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 18 Feb 2009 20:59:04 +0100
Subject: mxc irq: make architecture runtime dependent

Currently we depend on hardcoded base addresses for the interrupt
controller. This prevents us from compiling in more than one i.MX
architecture at a time. This patch changes the base address to a
runtime calculated one.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/irq.c b/arch/arm/plat-mxc/irq.c
index 931a0ee..8aee763 100644
--- a/arch/arm/plat-mxc/irq.c
+++ b/arch/arm/plat-mxc/irq.c
@@ -24,26 +24,27 @@
 #include <asm/mach/irq.h>
 #include <mach/hardware.h>
 
-#define AVIC_BASE		IO_ADDRESS(AVIC_BASE_ADDR)
-#define AVIC_INTCNTL		(AVIC_BASE + 0x00)	/* int control reg */
-#define AVIC_NIMASK		(AVIC_BASE + 0x04)	/* int mask reg */
-#define AVIC_INTENNUM		(AVIC_BASE + 0x08)	/* int enable number reg */
-#define AVIC_INTDISNUM		(AVIC_BASE + 0x0C)	/* int disable number reg */
-#define AVIC_INTENABLEH		(AVIC_BASE + 0x10)	/* int enable reg high */
-#define AVIC_INTENABLEL		(AVIC_BASE + 0x14)	/* int enable reg low */
-#define AVIC_INTTYPEH		(AVIC_BASE + 0x18)	/* int type reg high */
-#define AVIC_INTTYPEL		(AVIC_BASE + 0x1C)	/* int type reg low */
-#define AVIC_NIPRIORITY(x)	(AVIC_BASE + (0x20 + 4 * (7 - (x)))) /* int priority */
-#define AVIC_NIVECSR		(AVIC_BASE + 0x40)	/* norm int vector/status */
-#define AVIC_FIVECSR		(AVIC_BASE + 0x44)	/* fast int vector/status */
-#define AVIC_INTSRCH		(AVIC_BASE + 0x48)	/* int source reg high */
-#define AVIC_INTSRCL		(AVIC_BASE + 0x4C)	/* int source reg low */
-#define AVIC_INTFRCH		(AVIC_BASE + 0x50)	/* int force reg high */
-#define AVIC_INTFRCL		(AVIC_BASE + 0x54)	/* int force reg low */
-#define AVIC_NIPNDH		(AVIC_BASE + 0x58)	/* norm int pending high */
-#define AVIC_NIPNDL		(AVIC_BASE + 0x5C)	/* norm int pending low */
-#define AVIC_FIPNDH		(AVIC_BASE + 0x60)	/* fast int pending high */
-#define AVIC_FIPNDL		(AVIC_BASE + 0x64)	/* fast int pending low */
+#define AVIC_INTCNTL		0x00	/* int control reg */
+#define AVIC_NIMASK		0x04	/* int mask reg */
+#define AVIC_INTENNUM		0x08	/* int enable number reg */
+#define AVIC_INTDISNUM		0x0C	/* int disable number reg */
+#define AVIC_INTENABLEH		0x10	/* int enable reg high */
+#define AVIC_INTENABLEL		0x14	/* int enable reg low */
+#define AVIC_INTTYPEH		0x18	/* int type reg high */
+#define AVIC_INTTYPEL		0x1C	/* int type reg low */
+#define AVIC_NIPRIORITY(x)	(0x20 + 4 * (7 - (x))) /* int priority */
+#define AVIC_NIVECSR		0x40	/* norm int vector/status */
+#define AVIC_FIVECSR		0x44	/* fast int vector/status */
+#define AVIC_INTSRCH		0x48	/* int source reg high */
+#define AVIC_INTSRCL		0x4C	/* int source reg low */
+#define AVIC_INTFRCH		0x50	/* int force reg high */
+#define AVIC_INTFRCL		0x54	/* int force reg low */
+#define AVIC_NIPNDH		0x58	/* norm int pending high */
+#define AVIC_NIPNDL		0x5C	/* norm int pending low */
+#define AVIC_FIPNDH		0x60	/* fast int pending high */
+#define AVIC_FIPNDL		0x64	/* fast int pending low */
+
+static void __iomem *avic_base;
 
 int imx_irq_set_priority(unsigned char irq, unsigned char prio)
 {
@@ -54,11 +55,11 @@ int imx_irq_set_priority(unsigned char irq, unsigned char prio)
 	if (irq >= MXC_INTERNAL_IRQS)
 		return -EINVAL;;
 
-	temp = __raw_readl(AVIC_NIPRIORITY(irq / 8));
+	temp = __raw_readl(avic_base + AVIC_NIPRIORITY(irq / 8));
 	temp &= ~mask;
 	temp |= prio & mask;
 
-	__raw_writel(temp, AVIC_NIPRIORITY(irq / 8));
+	__raw_writel(temp, avic_base + AVIC_NIPRIORITY(irq / 8));
 
 	return 0;
 #else
@@ -76,12 +77,12 @@ int mxc_set_irq_fiq(unsigned int irq, unsigned int type)
 		return -EINVAL;
 
 	if (irq < MXC_INTERNAL_IRQS / 2) {
-		irqt = __raw_readl(AVIC_INTTYPEL) & ~(1 << irq);
-		__raw_writel(irqt | (!!type << irq), AVIC_INTTYPEL);
+		irqt = __raw_readl(avic_base + AVIC_INTTYPEL) & ~(1 << irq);
+		__raw_writel(irqt | (!!type << irq), avic_base + AVIC_INTTYPEL);
 	} else {
 		irq -= MXC_INTERNAL_IRQS / 2;
-		irqt = __raw_readl(AVIC_INTTYPEH) & ~(1 << irq);
-		__raw_writel(irqt | (!!type << irq), AVIC_INTTYPEH);
+		irqt = __raw_readl(avic_base + AVIC_INTTYPEH) & ~(1 << irq);
+		__raw_writel(irqt | (!!type << irq), avic_base + AVIC_INTTYPEH);
 	}
 
 	return 0;
@@ -92,13 +93,13 @@ EXPORT_SYMBOL(mxc_set_irq_fiq);
 /* Disable interrupt number "irq" in the AVIC */
 static void mxc_mask_irq(unsigned int irq)
 {
-	__raw_writel(irq, AVIC_INTDISNUM);
+	__raw_writel(irq, avic_base + AVIC_INTDISNUM);
 }
 
 /* Enable interrupt number "irq" in the AVIC */
 static void mxc_unmask_irq(unsigned int irq)
 {
-	__raw_writel(irq, AVIC_INTENNUM);
+	__raw_writel(irq, avic_base + AVIC_INTENNUM);
 }
 
 static struct irq_chip mxc_avic_chip = {
@@ -116,19 +117,21 @@ void __init mxc_init_irq(void)
 {
 	int i;
 
+	avic_base = IO_ADDRESS(AVIC_BASE_ADDR);
+
 	/* put the AVIC into the reset value with
 	 * all interrupts disabled
 	 */
-	__raw_writel(0, AVIC_INTCNTL);
-	__raw_writel(0x1f, AVIC_NIMASK);
+	__raw_writel(0, avic_base + AVIC_INTCNTL);
+	__raw_writel(0x1f, avic_base + AVIC_NIMASK);
 
 	/* disable all interrupts */
-	__raw_writel(0, AVIC_INTENABLEH);
-	__raw_writel(0, AVIC_INTENABLEL);
+	__raw_writel(0, avic_base + AVIC_INTENABLEH);
+	__raw_writel(0, avic_base + AVIC_INTENABLEL);
 
 	/* all IRQ no FIQ */
-	__raw_writel(0, AVIC_INTTYPEH);
-	__raw_writel(0, AVIC_INTTYPEL);
+	__raw_writel(0, avic_base + AVIC_INTTYPEH);
+	__raw_writel(0, avic_base + AVIC_INTTYPEL);
 	for (i = 0; i < MXC_INTERNAL_IRQS; i++) {
 		set_irq_chip(i, &mxc_avic_chip);
 		set_irq_handler(i, handle_level_irq);
@@ -137,7 +140,7 @@ void __init mxc_init_irq(void)
 
 	/* Set default priority value (0) for all IRQ's */
 	for (i = 0; i < 8; i++)
-		__raw_writel(0, AVIC_NIPRIORITY(i));
+		__raw_writel(0, avic_base + AVIC_NIPRIORITY(i));
 
 	/* init architectures chained interrupt handler */
 	mxc_register_gpios();
@@ -149,3 +152,4 @@ void __init mxc_init_irq(void)
 
 	printk(KERN_INFO "MXC IRQ initialized\n");
 }
+
-- 
cgit v0.10.2


From 8c8fdbc9bd9718b21146065de61c0cafdff11ecb Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 1 Apr 2009 12:40:15 +0200
Subject: [ARM] Remove arch-imx from build system

arch-imx is superseeded by the MXC architecture support.
This patch removes arch-imx from the build system.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e60ec54..be680f3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -308,15 +308,6 @@ config ARCH_H720X
 	help
 	  This enables support for systems based on the Hynix HMS720x
 
-config ARCH_IMX
-	bool "IMX"
-	select CPU_ARM920T
-	select GENERIC_GPIO
-	select GENERIC_TIME
-	select GENERIC_CLOCKEVENTS
-	help
-	  Support for Motorola's i.MX family of processors (MX1, MXL).
-
 config ARCH_IOP13XX
 	bool "IOP13xx-based"
 	depends on MMU
@@ -682,8 +673,6 @@ endif
 
 source "arch/arm/mach-lh7a40x/Kconfig"
 
-source "arch/arm/mach-imx/Kconfig"
-
 source "arch/arm/mach-h720x/Kconfig"
 
 source "arch/arm/mach-versatile/Kconfig"
@@ -1022,7 +1011,7 @@ source "mm/Kconfig"
 config LEDS
 	bool "Timer and CPU usage LEDs"
 	depends on ARCH_CDB89712 || ARCH_EBSA110 || \
-		   ARCH_EBSA285 || ARCH_IMX || ARCH_INTEGRATOR || \
+		   ARCH_EBSA285 || ARCH_INTEGRATOR || \
 		   ARCH_LUBBOCK || MACH_MAINSTONE || ARCH_NETWINDER || \
 		   ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \
 		   ARCH_SA1100 || ARCH_SHARK || ARCH_VERSATILE || \
@@ -1188,7 +1177,7 @@ endmenu
 
 menu "CPU Power Management"
 
-if (ARCH_SA1100 || ARCH_INTEGRATOR || ARCH_OMAP || ARCH_IMX || ARCH_PXA)
+if (ARCH_SA1100 || ARCH_INTEGRATOR || ARCH_OMAP || ARCH_PXA)
 
 source "drivers/cpufreq/Kconfig"
 
@@ -1213,14 +1202,11 @@ config CPU_FREQ_INTEGRATOR
 
 	  If in doubt, say Y.
 
-config CPU_FREQ_IMX
-	tristate "CPUfreq driver for i.MX CPUs"
-	depends on ARCH_IMX && CPU_FREQ
-	default n
-	help
-	  This enables the CPUfreq driver for i.MX CPUs.
-
-	  If in doubt, say N.
+config CPU_FREQ_PXA
+	bool
+	depends on CPU_FREQ && ARCH_PXA && PXA25x
+	default y
+	select CPU_FREQ_DEFAULT_GOV_USERSPACE
 
 endif
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e84729b..921a627 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -135,7 +135,6 @@ endif
     plat-$(CONFIG_PLAT_S3C64XX)	   := s3c64xx s3c
  machine-$(CONFIG_ARCH_LH7A40X)	   := lh7a40x
  machine-$(CONFIG_ARCH_VERSATILE)  := versatile
- machine-$(CONFIG_ARCH_IMX)	   := imx
  machine-$(CONFIG_ARCH_H720X)	   := h720x
  machine-$(CONFIG_ARCH_AAEC2000)   := aaec2000
  machine-$(CONFIG_ARCH_REALVIEW)   := realview
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index b4cf691..3eb87bd 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -155,7 +155,7 @@ config MMC_ATMELMCI_DMA
 
 config MMC_IMX
 	tristate "Motorola i.MX Multimedia Card Interface support"
-	depends on ARCH_IMX
+	depends on ARCH_MX1
 	help
 	  This selects the Motorola i.MX Multimedia card Interface.
 	  If you have a i.MX platform with a Multimedia Card slot,
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 9f460b1..3f5d5a2 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -66,7 +66,7 @@
 #define ONEMS 0xb0 /* One Millisecond register */
 #define UTS   0xb4 /* UART Test Register */
 #endif
-#if defined(CONFIG_ARCH_IMX) || defined(CONFIG_ARCH_MX1)
+#ifdef CONFIG_ARCH_MX1
 #define BIPR1 0xb0 /* Incremental Preset Register 1 */
 #define BIPR2 0xb4 /* Incremental Preset Register 2 */
 #define BIPR3 0xb8 /* Incremental Preset Register 3 */
@@ -96,7 +96,7 @@
 #define  UCR1_RTSDEN     (1<<5)	 /* RTS delta interrupt enable */
 #define  UCR1_SNDBRK     (1<<4)	 /* Send break */
 #define  UCR1_TDMAEN     (1<<3)	 /* Transmitter ready DMA enable */
-#if defined(CONFIG_ARCH_IMX) || defined(CONFIG_ARCH_MX1)
+#ifdef CONFIG_ARCH_MX1
 #define  UCR1_UARTCLKEN  (1<<2)	 /* UART clock enabled */
 #endif
 #if defined CONFIG_ARCH_MX3 || defined CONFIG_ARCH_MX2
@@ -127,7 +127,7 @@
 #define  UCR3_RXDSEN	 (1<<6)  /* Receive status interrupt enable */
 #define  UCR3_AIRINTEN   (1<<5)  /* Async IR wake interrupt enable */
 #define  UCR3_AWAKEN	 (1<<4)  /* Async wake interrupt enable */
-#ifdef CONFIG_ARCH_IMX
+#ifdef CONFIG_ARCH_MX1
 #define  UCR3_REF25 	 (1<<3)  /* Ref freq 25 MHz, only on mx1 */
 #define  UCR3_REF30 	 (1<<2)  /* Ref Freq 30 MHz, only on mx1 */
 #endif
@@ -180,13 +180,6 @@
 #define  UTS_SOFTRST	 (1<<0)	 /* Software reset */
 
 /* We've been assigned a range on the "Low-density serial ports" major */
-#ifdef CONFIG_ARCH_IMX
-#define SERIAL_IMX_MAJOR	204
-#define MINOR_START		41
-#define DEV_NAME		"ttySMX"
-#define MAX_INTERNAL_IRQ	IMX_IRQS
-#endif
-
 #ifdef CONFIG_ARCH_MXC
 #define SERIAL_IMX_MAJOR        207
 #define MINOR_START	        16
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 83a185d..7c61251 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -118,7 +118,7 @@ config SPI_GPIO
 
 config SPI_IMX
 	tristate "Freescale iMX SPI controller"
-	depends on ARCH_IMX && EXPERIMENTAL
+	depends on ARCH_MX1 && EXPERIMENTAL
 	help
 	  This enables using the Freescale iMX SPI controller in master
 	  mode.
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 7826bdc..93258e1 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -397,7 +397,7 @@ config FB_SA1100
 
 config FB_IMX
 	tristate "Motorola i.MX LCD support"
-	depends on FB && (ARCH_IMX || ARCH_MX2)
+	depends on FB && (ARCH_MX1 || ARCH_MX2)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-- 
cgit v0.10.2


From 1341d34ffc296f98dc84876f35f3151525dc02a2 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 1 Apr 2009 12:41:21 +0200
Subject: [ARM] remove arch-imx

arch-imx is superseeded by the MXC architecture support.
This patch removes arch/arm/mach-imx from the kernel.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
deleted file mode 100644
index cddd194..0000000
--- a/arch/arm/mach-imx/Kconfig
+++ /dev/null
@@ -1,11 +0,0 @@
-menu "IMX Implementations"
-	depends on ARCH_IMX
-
-config ARCH_MX1ADS
-	bool "mx1ads"
-	depends on ARCH_IMX
-	select ISA
-	help
-	  Say Y here if you are using the Motorola MX1ADS board
-
-endmenu
diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
deleted file mode 100644
index b047c7e..0000000
--- a/arch/arm/mach-imx/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-# Object file lists.
-
-obj-y			+= irq.o time.o dma.o generic.o clock.o
-
-obj-$(CONFIG_CPU_FREQ_IMX)	+= cpufreq.o
-
-# Specific board support
-obj-$(CONFIG_ARCH_MX1ADS) += mx1ads.o
-
-# Support for blinky lights
-led-y := leds.o
-
-obj-$(CONFIG_LEDS)	+=  $(led-y)
-led-$(CONFIG_ARCH_MX1ADS) += leds-mx1ads.o
diff --git a/arch/arm/mach-imx/Makefile.boot b/arch/arm/mach-imx/Makefile.boot
deleted file mode 100644
index fd72ce5..0000000
--- a/arch/arm/mach-imx/Makefile.boot
+++ /dev/null
@@ -1,2 +0,0 @@
-    zreladdr-$(CONFIG_ARCH_MX1ADS)	:= 0x08008000
-
diff --git a/arch/arm/mach-imx/clock.c b/arch/arm/mach-imx/clock.c
deleted file mode 100644
index cf332ae..0000000
--- a/arch/arm/mach-imx/clock.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- *  Copyright (C) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/math64.h>
-#include <linux/err.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-
-/*
- * Very simple approach: We can't disable clocks, so we do
- * not need refcounting
- */
-
-struct clk {
-	struct list_head node;
-	const char *name;
-	unsigned long (*get_rate)(void);
-};
-
-/*
- *  get the system pll clock in Hz
- *
- *                  mfi + mfn / (mfd +1)
- *  f = 2 * f_ref * --------------------
- *                        pd + 1
- */
-static unsigned long imx_decode_pll(unsigned int pll, u32 f_ref)
-{
-	unsigned long long ll;
-	unsigned long quot;
-
-	u32 mfi = (pll >> 10) & 0xf;
-	u32 mfn = pll & 0x3ff;
-	u32 mfd = (pll >> 16) & 0x3ff;
-	u32 pd =  (pll >> 26) & 0xf;
-
-	mfi = mfi <= 5 ? 5 : mfi;
-
-	ll = 2 * (unsigned long long)f_ref *
-		((mfi << 16) + (mfn << 16) / (mfd + 1));
-	quot = (pd + 1) * (1 << 16);
-	ll += quot / 2;
-	do_div(ll, quot);
-	return (unsigned long)ll;
-}
-
-static unsigned long imx_get_system_clk(void)
-{
-	u32 f_ref = (CSCR & CSCR_SYSTEM_SEL) ? 16000000 : (CLK32 * 512);
-
-	return imx_decode_pll(SPCTL0, f_ref);
-}
-
-static unsigned long imx_get_mcu_clk(void)
-{
-	return imx_decode_pll(MPCTL0, CLK32 * 512);
-}
-
-/*
- *  get peripheral clock 1 ( UART[12], Timer[12], PWM )
- */
-static unsigned long imx_get_perclk1(void)
-{
-	return imx_get_system_clk() / (((PCDR) & 0xf)+1);
-}
-
-/*
- *  get peripheral clock 2 ( LCD, SD, SPI[12] )
- */
-static unsigned long imx_get_perclk2(void)
-{
-	return imx_get_system_clk() / (((PCDR>>4) & 0xf)+1);
-}
-
-/*
- *  get peripheral clock 3 ( SSI )
- */
-static unsigned long imx_get_perclk3(void)
-{
-	return imx_get_system_clk() / (((PCDR>>16) & 0x7f)+1);
-}
-
-/*
- *  get hclk ( SDRAM, CSI, Memory Stick, I2C, DMA )
- */
-static unsigned long imx_get_hclk(void)
-{
-	return imx_get_system_clk() / (((CSCR>>10) & 0xf)+1);
-}
-
-static struct clk clk_system_clk = {
-	.name = "system_clk",
-	.get_rate = imx_get_system_clk,
-};
-
-static struct clk clk_hclk = {
-	.name = "hclk",
-	.get_rate = imx_get_hclk,
-};
-
-static struct clk clk_mcu_clk = {
-	.name = "mcu_clk",
-	.get_rate = imx_get_mcu_clk,
-};
-
-static struct clk clk_perclk1 = {
-	.name = "perclk1",
-	.get_rate = imx_get_perclk1,
-};
-
-static struct clk clk_uart_clk = {
-	.name = "uart_clk",
-	.get_rate = imx_get_perclk1,
-};
-
-static struct clk clk_perclk2 = {
-	.name = "perclk2",
-	.get_rate = imx_get_perclk2,
-};
-
-static struct clk clk_perclk3 = {
-	.name = "perclk3",
-	.get_rate = imx_get_perclk3,
-};
-
-static struct clk *clks[] = {
-	&clk_perclk1,
-	&clk_perclk2,
-	&clk_perclk3,
-	&clk_system_clk,
-	&clk_hclk,
-	&clk_mcu_clk,
-	&clk_uart_clk,
-};
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
-	struct clk *p, *clk = ERR_PTR(-ENOENT);
-
-	mutex_lock(&clocks_mutex);
-	list_for_each_entry(p, &clocks, node) {
-		if (!strcmp(p->name, id)) {
-			clk = p;
-			goto found;
-		}
-	}
-
-found:
-	mutex_unlock(&clocks_mutex);
-
-	return clk;
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
-
-int clk_enable(struct clk *clk)
-{
-	return 0;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-	return clk->get_rate();
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-int imx_clocks_init(void)
-{
-	int i;
-
-	mutex_lock(&clocks_mutex);
-	for (i = 0; i < ARRAY_SIZE(clks); i++)
-		list_add(&clks[i]->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-
-	return 0;
-}
-
diff --git a/arch/arm/mach-imx/cpufreq.c b/arch/arm/mach-imx/cpufreq.c
deleted file mode 100644
index 434b4ca..0000000
--- a/arch/arm/mach-imx/cpufreq.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * cpu.c: clock scaling for the iMX
- *
- * Copyright (C) 2000 2001, The Delft University of Technology
- * Copyright (c) 2004 Sascha Hauer <sascha@saschahauer.de>
- * Copyright (C) 2006 Inky Lung <ilung@cwlinux.com>
- * Copyright (C) 2006 Pavel Pisa, PiKRON <ppisa@pikron.com>
- *
- * Based on SA1100 version written by:
- * - Johan Pouwelse (J.A.Pouwelse@its.tudelft.nl): initial version
- * - Erik Mouw (J.A.K.Mouw@its.tudelft.nl):
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-/*#define DEBUG*/
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <asm/system.h>
-
-#include <mach/hardware.h>
-
-#include "generic.h"
-
-#ifndef __val2mfld
-#define __val2mfld(mask,val) (((mask)&~((mask)<<1))*(val)&(mask))
-#endif
-#ifndef __mfld2val
-#define __mfld2val(mask,val) (((val)&(mask))/((mask)&~((mask)<<1)))
-#endif
-
-#define CR_920T_CLOCK_MODE	0xC0000000
-#define CR_920T_FASTBUS_MODE	0x00000000
-#define CR_920T_ASYNC_MODE	0xC0000000
-
-static u32 mpctl0_at_boot;
-static u32 bclk_div_at_boot;
-
-static struct clk *system_clk, *mcu_clk;
-
-static void imx_set_async_mode(void)
-{
-	adjust_cr(CR_920T_CLOCK_MODE, CR_920T_ASYNC_MODE);
-}
-
-static void imx_set_fastbus_mode(void)
-{
-	adjust_cr(CR_920T_CLOCK_MODE, CR_920T_FASTBUS_MODE);
-}
-
-static void imx_set_mpctl0(u32 mpctl0)
-{
-	unsigned long flags;
-
-	if (mpctl0 == 0) {
-		local_irq_save(flags);
-		CSCR &= ~CSCR_MPEN;
-		local_irq_restore(flags);
-		return;
-	}
-
-	local_irq_save(flags);
-	MPCTL0 = mpctl0;
-	CSCR |= CSCR_MPEN;
-	local_irq_restore(flags);
-}
-
-/**
- * imx_compute_mpctl - compute new PLL parameters
- * @new_mpctl:	pointer to location assigned by new PLL control register value
- * @cur_mpctl:	current PLL control register parameters
- * @f_ref:	reference source frequency Hz
- * @freq:	required frequency in Hz
- * @relation:	is one of %CPUFREQ_RELATION_L (supremum)
- *		and %CPUFREQ_RELATION_H (infimum)
- */
-long imx_compute_mpctl(u32 *new_mpctl, u32 cur_mpctl, u32 f_ref, unsigned long freq, int relation)
-{
-        u32 mfi;
-        u32 mfn;
-        u32 mfd;
-        u32 pd;
-	unsigned long long ll;
-	long l;
-	long quot;
-
-	/* Fdppl=2*Fref*(MFI+MFN/(MFD+1))/(PD+1) */
-	/*  PD=<0,15>, MFD=<1,1023>, MFI=<5,15> MFN=<0,1022> */
-
-	if (cur_mpctl) {
-		mfd = ((cur_mpctl >> 16) & 0x3ff) + 1;
-		pd =  ((cur_mpctl >> 26) & 0xf) + 1;
-	} else {
-		pd=2; mfd=313;
-	}
-
-	/* pd=2; mfd=313; mfi=8; mfn=183; */
-	/* (MFI+MFN/(MFD)) = Fdppl / (2*Fref) * (PD); */
-
-	quot = (f_ref + (1 << 9)) >> 10;
-	l = (freq * pd + quot) / (2 * quot);
-	mfi = l >> 10;
-	mfn = ((l & ((1 << 10) - 1)) * mfd + (1 << 9)) >> 10;
-
-	mfd -= 1;
-	pd -= 1;
-
-	*new_mpctl = ((mfi & 0xf) << 10) | (mfn & 0x3ff) | ((mfd & 0x3ff) << 16)
-		| ((pd & 0xf) << 26);
-
-	ll = 2 * (unsigned long long)f_ref * ( (mfi<<16) + (mfn<<16) / (mfd+1) );
-	quot = (pd+1) * (1<<16);
-	ll += quot / 2;
-	do_div(ll, quot);
-	freq = ll;
-
-	pr_debug(KERN_DEBUG "imx: new PLL parameters pd=%d mfd=%d mfi=%d mfn=%d, freq=%ld\n",
-		pd, mfd, mfi, mfn, freq);
-
-	return freq;
-}
-
-
-static int imx_verify_speed(struct cpufreq_policy *policy)
-{
-	if (policy->cpu != 0)
-		return -EINVAL;
-
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, policy->cpuinfo.max_freq);
-
-	return 0;
-}
-
-static unsigned int imx_get_speed(unsigned int cpu)
-{
-	unsigned int freq;
-	unsigned int cr;
-	unsigned int cscr;
-	unsigned int bclk_div;
-
-	if (cpu)
-		return 0;
-
-	cscr = CSCR;
-	bclk_div = __mfld2val(CSCR_BCLK_DIV, cscr) + 1;
-	cr = get_cr();
-
-	if((cr & CR_920T_CLOCK_MODE) == CR_920T_FASTBUS_MODE) {
-		freq = clk_get_rate(system_clk);
-		freq = (freq + bclk_div/2) / bclk_div;
-	} else {
-		freq = clk_get_rate(mcu_clk);
-		if (cscr & CSCR_MPU_PRESC)
-			freq /= 2;
-	}
-
-	freq = (freq + 500) / 1000;
-
-	return freq;
-}
-
-static int imx_set_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq,
-			  unsigned int relation)
-{
-	struct cpufreq_freqs freqs;
-	u32 mpctl0 = 0;
-	u32 cscr;
-	unsigned long flags;
-	long freq;
-	long sysclk;
-	unsigned int bclk_div = bclk_div_at_boot;
-
-	/*
-	 * Some governors do not respects CPU and policy lower limits
-	 * which leads to bad things (division by zero etc), ensure
-	 * that such things do not happen.
-	 */
-	if(target_freq < policy->cpuinfo.min_freq)
-		target_freq = policy->cpuinfo.min_freq;
-
-	if(target_freq < policy->min)
-		target_freq = policy->min;
-
-	freq = target_freq * 1000;
-
-	pr_debug(KERN_DEBUG "imx: requested frequency %ld Hz, mpctl0 at boot 0x%08x\n",
-			freq, mpctl0_at_boot);
-
-	sysclk = clk_get_rate(system_clk);
-
-	if (freq > sysclk / bclk_div_at_boot + 1000000) {
-		freq = imx_compute_mpctl(&mpctl0, mpctl0_at_boot, CLK32 * 512, freq, relation);
-		if (freq < 0) {
-			printk(KERN_WARNING "imx: target frequency %ld Hz cannot be set\n", freq);
-			return -EINVAL;
-		}
-	} else {
-		if(freq + 1000 < sysclk) {
-			if (relation == CPUFREQ_RELATION_L)
-				bclk_div = (sysclk - 1000) / freq;
-			else
-				bclk_div = (sysclk + freq + 1000) / freq;
-
-			if(bclk_div > 16)
-				bclk_div = 16;
-			if(bclk_div < bclk_div_at_boot)
-				bclk_div = bclk_div_at_boot;
-		}
-		freq = (sysclk + bclk_div / 2) / bclk_div;
-	}
-
-	freqs.old = imx_get_speed(0);
-	freqs.new = (freq + 500) / 1000;
-	freqs.cpu = 0;
-	freqs.flags = 0;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	local_irq_save(flags);
-
-	imx_set_fastbus_mode();
-
-	imx_set_mpctl0(mpctl0);
-
-	cscr = CSCR;
-	cscr &= ~CSCR_BCLK_DIV;
-	cscr |= __val2mfld(CSCR_BCLK_DIV, bclk_div - 1);
-	CSCR = cscr;
-
-	if(mpctl0) {
-		CSCR |= CSCR_MPLL_RESTART;
-
-		/* Wait until MPLL is stabilized */
-		while( CSCR & CSCR_MPLL_RESTART );
-
-		imx_set_async_mode();
-	}
-
-	local_irq_restore(flags);
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	pr_debug(KERN_INFO "imx: set frequency %ld Hz, running from %s\n",
-			freq, mpctl0? "MPLL": "SPLL");
-
-	return 0;
-}
-
-static int __init imx_cpufreq_driver_init(struct cpufreq_policy *policy)
-{
-	printk(KERN_INFO "i.MX cpu freq change driver v1.0\n");
-
-	if (policy->cpu != 0)
-		return -EINVAL;
-
-	policy->cur = policy->min = policy->max = imx_get_speed(0);
-	policy->cpuinfo.min_freq = 8000;
-	policy->cpuinfo.max_freq = 200000;
-	 /* Manual states, that PLL stabilizes in two CLK32 periods */
-	policy->cpuinfo.transition_latency = 4 * 1000000000LL / CLK32;
-	return 0;
-}
-
-static struct cpufreq_driver imx_driver = {
-	.flags		= CPUFREQ_STICKY,
-	.verify		= imx_verify_speed,
-	.target		= imx_set_target,
-	.get		= imx_get_speed,
-	.init		= imx_cpufreq_driver_init,
-	.name		= "imx",
-};
-
-static int __init imx_cpufreq_init(void)
-{
-	bclk_div_at_boot = __mfld2val(CSCR_BCLK_DIV, CSCR) + 1;
-	mpctl0_at_boot = 0;
-
-	system_clk = clk_get(NULL, "system_clk");
-	if (IS_ERR(system_clk))
-		return PTR_ERR(system_clk);
-
-	mcu_clk = clk_get(NULL, "mcu_clk");
-	if (IS_ERR(mcu_clk)) {
-		clk_put(system_clk);
-		return PTR_ERR(mcu_clk);
-	}
-
-	if((CSCR & CSCR_MPEN) &&
-	   ((get_cr() & CR_920T_CLOCK_MODE) != CR_920T_FASTBUS_MODE))
-		mpctl0_at_boot = MPCTL0;
-
-	return cpufreq_register_driver(&imx_driver);
-}
-
-arch_initcall(imx_cpufreq_init);
-
diff --git a/arch/arm/mach-imx/dma.c b/arch/arm/mach-imx/dma.c
deleted file mode 100644
index 1536583..0000000
--- a/arch/arm/mach-imx/dma.c
+++ /dev/null
@@ -1,597 +0,0 @@
-/*
- *  linux/arch/arm/mach-imx/dma.c
- *
- *  imx DMA registration and IRQ dispatching
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  2004-03-03 Sascha Hauer <sascha@saschahauer.de>
- *             initial version heavily inspired by
- *             linux/arch/arm/mach-pxa/dma.c
- *
- *  2005-04-17 Pavel Pisa <pisa@cmp.felk.cvut.cz>
- *             Changed to support scatter gather DMA
- *             by taking Russell's code from RiscPC
- *
- *  2006-05-31 Pavel Pisa <pisa@cmp.felk.cvut.cz>
- *             Corrected error handling code.
- *
- */
-
-#undef DEBUG
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-
-#include <asm/scatterlist.h>
-#include <asm/system.h>
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <mach/dma.h>
-#include <mach/imx-dma.h>
-
-struct imx_dma_channel imx_dma_channels[IMX_DMA_CHANNELS];
-
-/*
- * imx_dma_sg_next - prepare next chunk for scatter-gather DMA emulation
- * @dma_ch: i.MX DMA channel number
- * @lastcount: number of bytes transferred during last transfer
- *
- * Functions prepares DMA controller for next sg data chunk transfer.
- * The @lastcount argument informs function about number of bytes transferred
- * during last block. Zero value can be used for @lastcount to setup DMA
- * for the first chunk.
- */
-static inline int imx_dma_sg_next(imx_dmach_t dma_ch, unsigned int lastcount)
-{
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-	unsigned int nextcount;
-	unsigned int nextaddr;
-
-	if (!imxdma->name) {
-		printk(KERN_CRIT "%s: called for  not allocated channel %d\n",
-		       __func__, dma_ch);
-		return 0;
-	}
-
-	imxdma->resbytes -= lastcount;
-
-	if (!imxdma->sg) {
-		pr_debug("imxdma%d: no sg data\n", dma_ch);
-		return 0;
-	}
-
-	imxdma->sgbc += lastcount;
-	if ((imxdma->sgbc >= imxdma->sg->length) || !imxdma->resbytes) {
-		if ((imxdma->sgcount <= 1) || !imxdma->resbytes) {
-			pr_debug("imxdma%d: sg transfer limit reached\n",
-				 dma_ch);
-			imxdma->sgcount=0;
-			imxdma->sg = NULL;
-			return 0;
-		} else {
-			imxdma->sgcount--;
-			imxdma->sg++;
-			imxdma->sgbc = 0;
-		}
-	}
-	nextcount = imxdma->sg->length - imxdma->sgbc;
-	nextaddr = imxdma->sg->dma_address + imxdma->sgbc;
-
-	if(imxdma->resbytes < nextcount)
-		nextcount = imxdma->resbytes;
-
-	if ((imxdma->dma_mode & DMA_MODE_MASK) == DMA_MODE_READ)
-		DAR(dma_ch) = nextaddr;
-	else
-		SAR(dma_ch) = nextaddr;
-
-	CNTR(dma_ch) = nextcount;
-	pr_debug("imxdma%d: next sg chunk dst 0x%08x, src 0x%08x, size 0x%08x\n",
-		 dma_ch, DAR(dma_ch), SAR(dma_ch), CNTR(dma_ch));
-
-	return nextcount;
-}
-
-/*
- * imx_dma_setup_sg_base - scatter-gather DMA emulation
- * @dma_ch: i.MX DMA channel number
- * @sg: pointer to the scatter-gather list/vector
- * @sgcount: scatter-gather list hungs count
- *
- * Functions sets up i.MX DMA state for emulated scatter-gather transfer
- * and sets up channel registers to be ready for the first chunk
- */
-static int
-imx_dma_setup_sg_base(imx_dmach_t dma_ch,
-		      struct scatterlist *sg, unsigned int sgcount)
-{
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-
-	imxdma->sg = sg;
-	imxdma->sgcount = sgcount;
-	imxdma->sgbc = 0;
-	return imx_dma_sg_next(dma_ch, 0);
-}
-
-/**
- * imx_dma_setup_single - setup i.MX DMA channel for linear memory to/from device transfer
- * @dma_ch: i.MX DMA channel number
- * @dma_address: the DMA/physical memory address of the linear data block
- *		to transfer
- * @dma_length: length of the data block in bytes
- * @dev_addr: physical device port address
- * @dmamode: DMA transfer mode, %DMA_MODE_READ from the device to the memory
- *           or %DMA_MODE_WRITE from memory to the device
- *
- * The function setups DMA channel source and destination addresses for transfer
- * specified by provided parameters. The scatter-gather emulation is disabled,
- * because linear data block
- * form the physical address range is transferred.
- * Return value: if incorrect parameters are provided -%EINVAL.
- *		Zero indicates success.
- */
-int
-imx_dma_setup_single(imx_dmach_t dma_ch, dma_addr_t dma_address,
-		     unsigned int dma_length, unsigned int dev_addr,
-		     unsigned int dmamode)
-{
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-
-	imxdma->sg = NULL;
-	imxdma->sgcount = 0;
-	imxdma->dma_mode = dmamode;
-	imxdma->resbytes = dma_length;
-
-	if (!dma_address) {
-		printk(KERN_ERR "imxdma%d: imx_dma_setup_single null address\n",
-		       dma_ch);
-		return -EINVAL;
-	}
-
-	if (!dma_length) {
-		printk(KERN_ERR "imxdma%d: imx_dma_setup_single zero length\n",
-		       dma_ch);
-		return -EINVAL;
-	}
-
-	if ((dmamode & DMA_MODE_MASK) == DMA_MODE_READ) {
-		pr_debug("imxdma%d: mx_dma_setup_single2dev dma_addressg=0x%08x dma_length=%d dev_addr=0x%08x for read\n",
-			dma_ch, (unsigned int)dma_address, dma_length,
-			dev_addr);
-		SAR(dma_ch) = dev_addr;
-		DAR(dma_ch) = (unsigned int)dma_address;
-	} else if ((dmamode & DMA_MODE_MASK) == DMA_MODE_WRITE) {
-		pr_debug("imxdma%d: mx_dma_setup_single2dev dma_addressg=0x%08x dma_length=%d dev_addr=0x%08x for write\n",
-			dma_ch, (unsigned int)dma_address, dma_length,
-			dev_addr);
-		SAR(dma_ch) = (unsigned int)dma_address;
-		DAR(dma_ch) = dev_addr;
-	} else {
-		printk(KERN_ERR "imxdma%d: imx_dma_setup_single bad dmamode\n",
-		       dma_ch);
-		return -EINVAL;
-	}
-
-	CNTR(dma_ch) = dma_length;
-
-	return 0;
-}
-
-/**
- * imx_dma_setup_sg - setup i.MX DMA channel SG list to/from device transfer
- * @dma_ch: i.MX DMA channel number
- * @sg: pointer to the scatter-gather list/vector
- * @sgcount: scatter-gather list hungs count
- * @dma_length: total length of the transfer request in bytes
- * @dev_addr: physical device port address
- * @dmamode: DMA transfer mode, %DMA_MODE_READ from the device to the memory
- *           or %DMA_MODE_WRITE from memory to the device
- *
- * The function sets up DMA channel state and registers to be ready for transfer
- * specified by provided parameters. The scatter-gather emulation is set up
- * according to the parameters.
- *
- * The full preparation of the transfer requires setup of more register
- * by the caller before imx_dma_enable() can be called.
- *
- * %BLR(dma_ch) holds transfer burst length in bytes, 0 means 64 bytes
- *
- * %RSSR(dma_ch) has to be set to the DMA request line source %DMA_REQ_xxx
- *
- * %CCR(dma_ch) has to specify transfer parameters, the next settings is typical
- * for linear or simple scatter-gather transfers if %DMA_MODE_READ is specified
- *
- * %CCR_DMOD_LINEAR | %CCR_DSIZ_32 | %CCR_SMOD_FIFO | %CCR_SSIZ_x
- *
- * The typical setup for %DMA_MODE_WRITE is specified by next options combination
- *
- * %CCR_SMOD_LINEAR | %CCR_SSIZ_32 | %CCR_DMOD_FIFO | %CCR_DSIZ_x
- *
- * Be careful here and do not mistakenly mix source and target device
- * port sizes constants, they are really different:
- * %CCR_SSIZ_8, %CCR_SSIZ_16, %CCR_SSIZ_32,
- * %CCR_DSIZ_8, %CCR_DSIZ_16, %CCR_DSIZ_32
- *
- * Return value: if incorrect parameters are provided -%EINVAL.
- * Zero indicates success.
- */
-int
-imx_dma_setup_sg(imx_dmach_t dma_ch,
-		 struct scatterlist *sg, unsigned int sgcount, unsigned int dma_length,
-		 unsigned int dev_addr, unsigned int dmamode)
-{
-	int res;
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-
-	imxdma->sg = NULL;
-	imxdma->sgcount = 0;
-	imxdma->dma_mode = dmamode;
-	imxdma->resbytes = dma_length;
-
-	if (!sg || !sgcount) {
-		printk(KERN_ERR "imxdma%d: imx_dma_setup_sg epty sg list\n",
-		       dma_ch);
-		return -EINVAL;
-	}
-
-	if (!sg->length) {
-		printk(KERN_ERR "imxdma%d: imx_dma_setup_sg zero length\n",
-		       dma_ch);
-		return -EINVAL;
-	}
-
-	if ((dmamode & DMA_MODE_MASK) == DMA_MODE_READ) {
-		pr_debug("imxdma%d: mx_dma_setup_sg2dev sg=%p sgcount=%d total length=%d dev_addr=0x%08x for read\n",
-			dma_ch, sg, sgcount, dma_length, dev_addr);
-		SAR(dma_ch) = dev_addr;
-	} else if ((dmamode & DMA_MODE_MASK) == DMA_MODE_WRITE) {
-		pr_debug("imxdma%d: mx_dma_setup_sg2dev sg=%p sgcount=%d total length=%d dev_addr=0x%08x for write\n",
-			dma_ch, sg, sgcount, dma_length, dev_addr);
-		DAR(dma_ch) = dev_addr;
-	} else {
-		printk(KERN_ERR "imxdma%d: imx_dma_setup_sg bad dmamode\n",
-		       dma_ch);
-		return -EINVAL;
-	}
-
-	res = imx_dma_setup_sg_base(dma_ch, sg, sgcount);
-	if (res <= 0) {
-		printk(KERN_ERR "imxdma%d: no sg chunk ready\n", dma_ch);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * imx_dma_setup_handlers - setup i.MX DMA channel end and error notification handlers
- * @dma_ch: i.MX DMA channel number
- * @irq_handler: the pointer to the function called if the transfer
- *		ends successfully
- * @err_handler: the pointer to the function called if the premature
- *		end caused by error occurs
- * @data: user specified value to be passed to the handlers
- */
-int
-imx_dma_setup_handlers(imx_dmach_t dma_ch,
-		       void (*irq_handler) (int, void *),
-		       void (*err_handler) (int, void *, int),
-		       void *data)
-{
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-	unsigned long flags;
-
-	if (!imxdma->name) {
-		printk(KERN_CRIT "%s: called for  not allocated channel %d\n",
-		       __func__, dma_ch);
-		return -ENODEV;
-	}
-
-	local_irq_save(flags);
-	DISR = (1 << dma_ch);
-	imxdma->irq_handler = irq_handler;
-	imxdma->err_handler = err_handler;
-	imxdma->data = data;
-	local_irq_restore(flags);
-	return 0;
-}
-
-/**
- * imx_dma_enable - function to start i.MX DMA channel operation
- * @dma_ch: i.MX DMA channel number
- *
- * The channel has to be allocated by driver through imx_dma_request()
- * or imx_dma_request_by_prio() function.
- * The transfer parameters has to be set to the channel registers through
- * call of the imx_dma_setup_single() or imx_dma_setup_sg() function
- * and registers %BLR(dma_ch), %RSSR(dma_ch) and %CCR(dma_ch) has to
- * be set prior this function call by the channel user.
- */
-void imx_dma_enable(imx_dmach_t dma_ch)
-{
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-	unsigned long flags;
-
-	pr_debug("imxdma%d: imx_dma_enable\n", dma_ch);
-
-	if (!imxdma->name) {
-		printk(KERN_CRIT "%s: called for  not allocated channel %d\n",
-		       __func__, dma_ch);
-		return;
-	}
-
-	local_irq_save(flags);
-	DISR = (1 << dma_ch);
-	DIMR &= ~(1 << dma_ch);
-	CCR(dma_ch) |= CCR_CEN;
-	local_irq_restore(flags);
-}
-
-/**
- * imx_dma_disable - stop, finish i.MX DMA channel operatin
- * @dma_ch: i.MX DMA channel number
- */
-void imx_dma_disable(imx_dmach_t dma_ch)
-{
-	unsigned long flags;
-
-	pr_debug("imxdma%d: imx_dma_disable\n", dma_ch);
-
-	local_irq_save(flags);
-	DIMR |= (1 << dma_ch);
-	CCR(dma_ch) &= ~CCR_CEN;
-	DISR = (1 << dma_ch);
-	local_irq_restore(flags);
-}
-
-/**
- * imx_dma_request - request/allocate specified channel number
- * @dma_ch: i.MX DMA channel number
- * @name: the driver/caller own non-%NULL identification
- */
-int imx_dma_request(imx_dmach_t dma_ch, const char *name)
-{
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-	unsigned long flags;
-
-	/* basic sanity checks */
-	if (!name)
-		return -EINVAL;
-
-	if (dma_ch >= IMX_DMA_CHANNELS) {
-		printk(KERN_CRIT "%s: called for  non-existed channel %d\n",
-		       __func__, dma_ch);
-		return -EINVAL;
-	}
-
-	local_irq_save(flags);
-	if (imxdma->name) {
-		local_irq_restore(flags);
-		return -ENODEV;
-	}
-
-	imxdma->name = name;
-	imxdma->irq_handler = NULL;
-	imxdma->err_handler = NULL;
-	imxdma->data = NULL;
-	imxdma->sg = NULL;
-	local_irq_restore(flags);
-	return 0;
-}
-
-/**
- * imx_dma_free - release previously acquired channel
- * @dma_ch: i.MX DMA channel number
- */
-void imx_dma_free(imx_dmach_t dma_ch)
-{
-	unsigned long flags;
-	struct imx_dma_channel *imxdma = &imx_dma_channels[dma_ch];
-
-	if (!imxdma->name) {
-		printk(KERN_CRIT
-		       "%s: trying to free channel %d which is already freed\n",
-		       __func__, dma_ch);
-		return;
-	}
-
-	local_irq_save(flags);
-	/* Disable interrupts */
-	DIMR |= (1 << dma_ch);
-	CCR(dma_ch) &= ~CCR_CEN;
-	imxdma->name = NULL;
-	local_irq_restore(flags);
-}
-
-/**
- * imx_dma_request_by_prio - find and request some of free channels best suiting requested priority
- * @name: the driver/caller own non-%NULL identification
- * @prio: one of the hardware distinguished priority level:
- *        %DMA_PRIO_HIGH, %DMA_PRIO_MEDIUM, %DMA_PRIO_LOW
- *
- * This function tries to find free channel in the specified priority group
- * if the priority cannot be achieved it tries to look for free channel
- * in the higher and then even lower priority groups.
- *
- * Return value: If there is no free channel to allocate, -%ENODEV is returned.
- *               On successful allocation channel is returned.
- */
-imx_dmach_t imx_dma_request_by_prio(const char *name, imx_dma_prio prio)
-{
-	int i;
-	int best;
-
-	switch (prio) {
-	case (DMA_PRIO_HIGH):
-		best = 8;
-		break;
-	case (DMA_PRIO_MEDIUM):
-		best = 4;
-		break;
-	case (DMA_PRIO_LOW):
-	default:
-		best = 0;
-		break;
-	}
-
-	for (i = best; i < IMX_DMA_CHANNELS; i++) {
-		if (!imx_dma_request(i, name)) {
-			return i;
-		}
-	}
-
-	for (i = best - 1; i >= 0; i--) {
-		if (!imx_dma_request(i, name)) {
-			return i;
-		}
-	}
-
-	printk(KERN_ERR "%s: no free DMA channel found\n", __func__);
-
-	return -ENODEV;
-}
-
-static irqreturn_t dma_err_handler(int irq, void *dev_id)
-{
-	int i, disr = DISR;
-	struct imx_dma_channel *channel;
-	unsigned int err_mask = DBTOSR | DRTOSR | DSESR | DBOSR;
-	int errcode;
-
-	DISR = disr & err_mask;
-	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
-		if(!(err_mask & (1 << i)))
-			continue;
-		channel = &imx_dma_channels[i];
-		errcode = 0;
-
-		if (DBTOSR & (1 << i)) {
-			DBTOSR = (1 << i);
-			errcode |= IMX_DMA_ERR_BURST;
-		}
-		if (DRTOSR & (1 << i)) {
-			DRTOSR = (1 << i);
-			errcode |= IMX_DMA_ERR_REQUEST;
-		}
-		if (DSESR & (1 << i)) {
-			DSESR = (1 << i);
-			errcode |= IMX_DMA_ERR_TRANSFER;
-		}
-		if (DBOSR & (1 << i)) {
-			DBOSR = (1 << i);
-			errcode |= IMX_DMA_ERR_BUFFER;
-		}
-
-		/*
-		 * The cleaning of @sg field would be questionable
-		 * there, because its value can help to compute
-		 * remaining/transferred bytes count in the handler
-		 */
-		/*imx_dma_channels[i].sg = NULL;*/
-
-		if (channel->name && channel->err_handler) {
-			channel->err_handler(i, channel->data, errcode);
-			continue;
-		}
-
-		imx_dma_channels[i].sg = NULL;
-
-		printk(KERN_WARNING
-		       "DMA timeout on channel %d (%s) -%s%s%s%s\n",
-		       i, channel->name,
-		       errcode&IMX_DMA_ERR_BURST?    " burst":"",
-		       errcode&IMX_DMA_ERR_REQUEST?  " request":"",
-		       errcode&IMX_DMA_ERR_TRANSFER? " transfer":"",
-		       errcode&IMX_DMA_ERR_BUFFER?   " buffer":"");
-	}
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t dma_irq_handler(int irq, void *dev_id)
-{
-	int i, disr = DISR;
-
-	pr_debug("imxdma: dma_irq_handler called, disr=0x%08x\n",
-		     disr);
-
-	DISR = disr;
-	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
-		if (disr & (1 << i)) {
-			struct imx_dma_channel *channel = &imx_dma_channels[i];
-			if (channel->name) {
-				if (imx_dma_sg_next(i, CNTR(i))) {
-					CCR(i) &= ~CCR_CEN;
-					mb();
-					CCR(i) |= CCR_CEN;
-				} else {
-					if (channel->irq_handler)
-						channel->irq_handler(i,
-							channel->data);
-				}
-			} else {
-				/*
-				 * IRQ for an unregistered DMA channel:
-				 * let's clear the interrupts and disable it.
-				 */
-				printk(KERN_WARNING
-				       "spurious IRQ for DMA channel %d\n", i);
-			}
-		}
-	}
-	return IRQ_HANDLED;
-}
-
-static int __init imx_dma_init(void)
-{
-	int ret;
-	int i;
-
-	/* reset DMA module */
-	DCR = DCR_DRST;
-
-	ret = request_irq(DMA_INT, dma_irq_handler, 0, "DMA", NULL);
-	if (ret) {
-		printk(KERN_CRIT "Wow!  Can't register IRQ for DMA\n");
-		return ret;
-	}
-
-	ret = request_irq(DMA_ERR, dma_err_handler, 0, "DMA", NULL);
-	if (ret) {
-		printk(KERN_CRIT "Wow!  Can't register ERRIRQ for DMA\n");
-		free_irq(DMA_INT, NULL);
-	}
-
-	/* enable DMA module */
-	DCR = DCR_DEN;
-
-	/* clear all interrupts */
-	DISR = (1 << IMX_DMA_CHANNELS) - 1;
-
-	/* enable interrupts */
-	DIMR = (1 << IMX_DMA_CHANNELS) - 1;
-
-	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
-		imx_dma_channels[i].sg = NULL;
-		imx_dma_channels[i].dma_num = i;
-	}
-
-	return ret;
-}
-
-arch_initcall(imx_dma_init);
-
-EXPORT_SYMBOL(imx_dma_setup_single);
-EXPORT_SYMBOL(imx_dma_setup_sg);
-EXPORT_SYMBOL(imx_dma_setup_handlers);
-EXPORT_SYMBOL(imx_dma_enable);
-EXPORT_SYMBOL(imx_dma_disable);
-EXPORT_SYMBOL(imx_dma_request);
-EXPORT_SYMBOL(imx_dma_free);
-EXPORT_SYMBOL(imx_dma_request_by_prio);
-EXPORT_SYMBOL(imx_dma_channels);
diff --git a/arch/arm/mach-imx/generic.c b/arch/arm/mach-imx/generic.c
deleted file mode 100644
index 05f1739..0000000
--- a/arch/arm/mach-imx/generic.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- *  arch/arm/mach-imx/generic.c
- *
- *  author: Sascha Hauer
- *  Created: april 20th, 2004
- *  Copyright: Synertronixx GmbH
- *
- *  Common code for i.MX machines
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-#include <linux/platform_device.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <asm/errno.h>
-#include <mach/hardware.h>
-#include <mach/imx-regs.h>
-
-#include <asm/mach/map.h>
-#include <mach/mmc.h>
-#include <mach/gpio.h>
-
-unsigned long imx_gpio_alloc_map[(GPIO_PORT_MAX + 1) * 32 / BITS_PER_LONG];
-
-void imx_gpio_mode(int gpio_mode)
-{
-	unsigned int pin = gpio_mode & GPIO_PIN_MASK;
-	unsigned int port = (gpio_mode & GPIO_PORT_MASK) >> GPIO_PORT_SHIFT;
-	unsigned int ocr = (gpio_mode & GPIO_OCR_MASK) >> GPIO_OCR_SHIFT;
-	unsigned int tmp;
-
-	/* Pullup enable */
-	if(gpio_mode & GPIO_PUEN)
-		PUEN(port) |= (1<<pin);
-	else
-		PUEN(port) &= ~(1<<pin);
-
-	/* Data direction */
-	if(gpio_mode & GPIO_OUT)
-		DDIR(port) |= 1<<pin;
-	else
-		DDIR(port) &= ~(1<<pin);
-
-	/* Primary / alternate function */
-	if(gpio_mode & GPIO_AF)
-		GPR(port) |= (1<<pin);
-	else
-		GPR(port) &= ~(1<<pin);
-
-	/* use as gpio? */
-	if(gpio_mode &  GPIO_GIUS)
-		GIUS(port) |= (1<<pin);
-	else
-		GIUS(port) &= ~(1<<pin);
-
-	/* Output / input configuration */
-	/* FIXME: I'm not very sure about OCR and ICONF, someone
-	 * should have a look over it
-	 */
-	if(pin<16) {
-		tmp = OCR1(port);
-		tmp &= ~( 3<<(pin*2));
-		tmp |= (ocr << (pin*2));
-		OCR1(port) = tmp;
-
-		ICONFA1(port) &= ~( 3<<(pin*2));
-		ICONFA1(port) |= ((gpio_mode >> GPIO_AOUT_SHIFT) & 3) << (pin * 2);
-		ICONFB1(port) &= ~( 3<<(pin*2));
-		ICONFB1(port) |= ((gpio_mode >> GPIO_BOUT_SHIFT) & 3) << (pin * 2);
-	} else {
-		tmp = OCR2(port);
-		tmp &= ~( 3<<((pin-16)*2));
-		tmp |= (ocr << ((pin-16)*2));
-		OCR2(port) = tmp;
-
-		ICONFA2(port) &= ~( 3<<((pin-16)*2));
-		ICONFA2(port) |= ((gpio_mode >> GPIO_AOUT_SHIFT) & 3) << ((pin-16) * 2);
-		ICONFB2(port) &= ~( 3<<((pin-16)*2));
-		ICONFB2(port) |= ((gpio_mode >> GPIO_BOUT_SHIFT) & 3) << ((pin-16) * 2);
-	}
-}
-
-EXPORT_SYMBOL(imx_gpio_mode);
-
-int imx_gpio_request(unsigned gpio, const char *label)
-{
-	if(gpio >= (GPIO_PORT_MAX + 1) * 32) {
-		printk(KERN_ERR "imx_gpio: Attempt to request nonexistent GPIO %d for \"%s\"\n",
-			gpio, label ? label : "?");
-		return -EINVAL;
-	}
-
-	if(test_and_set_bit(gpio, imx_gpio_alloc_map)) {
-		printk(KERN_ERR "imx_gpio: GPIO %d already used. Allocation for \"%s\" failed\n",
-			gpio, label ? label : "?");
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-EXPORT_SYMBOL(imx_gpio_request);
-
-void imx_gpio_free(unsigned gpio)
-{
-	if(gpio >= (GPIO_PORT_MAX + 1) * 32)
-		return;
-
-	clear_bit(gpio, imx_gpio_alloc_map);
-}
-
-EXPORT_SYMBOL(imx_gpio_free);
-
-int imx_gpio_direction_input(unsigned gpio)
-{
-	imx_gpio_mode(gpio | GPIO_IN | GPIO_GIUS | GPIO_DR);
-	return 0;
-}
-
-EXPORT_SYMBOL(imx_gpio_direction_input);
-
-int imx_gpio_direction_output(unsigned gpio, int value)
-{
-	imx_gpio_set_value(gpio, value);
-	imx_gpio_mode(gpio | GPIO_OUT | GPIO_GIUS | GPIO_DR);
-	return 0;
-}
-
-EXPORT_SYMBOL(imx_gpio_direction_output);
-
-int imx_gpio_setup_multiple_pins(const int *pin_list, unsigned count,
-				int alloc_mode, const char *label)
-{
-	const int *p = pin_list;
-	int i;
-	unsigned gpio;
-	unsigned mode;
-
-	for (i = 0; i < count; i++) {
-		gpio = *p & (GPIO_PIN_MASK | GPIO_PORT_MASK);
-		mode = *p & ~(GPIO_PIN_MASK | GPIO_PORT_MASK);
-
-		if (gpio >= (GPIO_PORT_MAX + 1) * 32)
-			goto setup_error;
-
-		if (alloc_mode & IMX_GPIO_ALLOC_MODE_RELEASE)
-			imx_gpio_free(gpio);
-		else if (!(alloc_mode & IMX_GPIO_ALLOC_MODE_NO_ALLOC))
-			if (imx_gpio_request(gpio, label))
-				if (!(alloc_mode & IMX_GPIO_ALLOC_MODE_TRY_ALLOC))
-					goto setup_error;
-
-		if (!(alloc_mode & (IMX_GPIO_ALLOC_MODE_ALLOC_ONLY |
-				    IMX_GPIO_ALLOC_MODE_RELEASE)))
-			imx_gpio_mode(gpio | mode);
-
-		p++;
-	}
-	return 0;
-
-setup_error:
-	if(alloc_mode & (IMX_GPIO_ALLOC_MODE_NO_ALLOC |
-		         IMX_GPIO_ALLOC_MODE_TRY_ALLOC))
-		return -EINVAL;
-
-	while (p != pin_list) {
-		p--;
-		gpio = *p & (GPIO_PIN_MASK | GPIO_PORT_MASK);
-		imx_gpio_free(gpio);
-	}
-
-	return -EINVAL;
-}
-
-EXPORT_SYMBOL(imx_gpio_setup_multiple_pins);
-
-void __imx_gpio_set_value(unsigned gpio, int value)
-{
-	imx_gpio_set_value_inline(gpio, value);
-}
-
-EXPORT_SYMBOL(__imx_gpio_set_value);
-
-int imx_gpio_to_irq(unsigned gpio)
-{
-	return IRQ_GPIOA(0) + gpio;
-}
-
-EXPORT_SYMBOL(imx_gpio_to_irq);
-
-int imx_irq_to_gpio(unsigned irq)
-{
-	if (irq < IRQ_GPIOA(0))
-		return -EINVAL;
-	return irq - IRQ_GPIOA(0);
-}
-
-EXPORT_SYMBOL(imx_irq_to_gpio);
-
-static struct resource imx_mmc_resources[] = {
-	[0] = {
-		.start	= 0x00214000,
-		.end	= 0x002140FF,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= (SDHC_INT),
-		.end	= (SDHC_INT),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static u64 imxmmmc_dmamask = 0xffffffffUL;
-
-static struct platform_device imx_mmc_device = {
-	.name		= "imx-mmc",
-	.id		= 0,
-	.dev		= {
-		.dma_mask = &imxmmmc_dmamask,
-		.coherent_dma_mask = 0xffffffff,
-	},
-	.num_resources	= ARRAY_SIZE(imx_mmc_resources),
-	.resource	= imx_mmc_resources,
-};
-
-void __init imx_set_mmc_info(struct imxmmc_platform_data *info)
-{
-	imx_mmc_device.dev.platform_data = info;
-}
-
-static struct platform_device *devices[] __initdata = {
-	&imx_mmc_device,
-};
-
-static struct map_desc imx_io_desc[] __initdata = {
-	{
-		.virtual	= IMX_IO_BASE,
-		.pfn		= __phys_to_pfn(IMX_IO_PHYS),
-		.length		= IMX_IO_SIZE,
-		.type		= MT_DEVICE
-	}
-};
-
-void __init
-imx_map_io(void)
-{
-	iotable_init(imx_io_desc, ARRAY_SIZE(imx_io_desc));
-}
-
-static int __init imx_init(void)
-{
-	return platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-subsys_initcall(imx_init);
diff --git a/arch/arm/mach-imx/generic.h b/arch/arm/mach-imx/generic.h
deleted file mode 100644
index e91003e..0000000
--- a/arch/arm/mach-imx/generic.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  linux/arch/arm/mach-imx/generic.h
- *
- * Author:	Sascha Hauer <sascha@saschahauer.de>
- * Copyright:	Synertronixx GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-extern void __init imx_map_io(void);
-extern void __init imx_init_irq(void);
-
-struct sys_timer;
-extern struct sys_timer imx_timer;
diff --git a/arch/arm/mach-imx/include/mach/debug-macro.S b/arch/arm/mach-imx/include/mach/debug-macro.S
deleted file mode 100644
index 87802bbf..0000000
--- a/arch/arm/mach-imx/include/mach/debug-macro.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* arch/arm/mach-imx/include/mach/debug-macro.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
-*/
-
-		.macro	addruart,rx
-		mrc	p15, 0, \rx, c1, c0
-		tst	\rx, #1			@ MMU enabled?
-		moveq	\rx, #0x00000000	@ physical
-		movne	\rx, #0xe0000000	@ virtual
-		orreq	\rx, \rx, #0x00200000	@ physical
-		orr	\rx, \rx, #0x00006000	@ UART1 offset
-		.endm
-
-		.macro	senduart,rd,rx
-		str	\rd, [\rx, #0x40]	@ TXDATA
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
-
-		.macro	busyuart,rd,rx
-1002:		ldr	\rd, [\rx, #0x98]	@ SR2
-		tst	\rd, #1 << 3		@ TXDC
-		beq	1002b			@ wait until transmit done
-		.endm
diff --git a/arch/arm/mach-imx/include/mach/dma.h b/arch/arm/mach-imx/include/mach/dma.h
deleted file mode 100644
index 621ff2c..0000000
--- a/arch/arm/mach-imx/include/mach/dma.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  linux/include/asm-arm/imxads/dma.h
- *
- *  Copyright (C) 1997,1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __ASM_ARCH_DMA_H
-#define __ASM_ARCH_DMA_H
-
-typedef enum {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 1,
-	DMA_PRIO_LOW = 2
-} imx_dma_prio;
-
-#define DMA_REQ_UART3_T        2
-#define DMA_REQ_UART3_R        3
-#define DMA_REQ_SSI2_T         4
-#define DMA_REQ_SSI2_R         5
-#define DMA_REQ_CSI_STAT       6
-#define DMA_REQ_CSI_R          7
-#define DMA_REQ_MSHC           8
-#define DMA_REQ_DSPA_DCT_DOUT  9
-#define DMA_REQ_DSPA_DCT_DIN  10
-#define DMA_REQ_DSPA_MAC      11
-#define DMA_REQ_EXT           12
-#define DMA_REQ_SDHC          13
-#define DMA_REQ_SPI1_R        14
-#define DMA_REQ_SPI1_T        15
-#define DMA_REQ_SSI_T         16
-#define DMA_REQ_SSI_R         17
-#define DMA_REQ_ASP_DAC       18
-#define DMA_REQ_ASP_ADC       19
-#define DMA_REQ_USP_EP(x)    (20+(x))
-#define DMA_REQ_SPI2_R        26
-#define DMA_REQ_SPI2_T        27
-#define DMA_REQ_UART2_T       28
-#define DMA_REQ_UART2_R       29
-#define DMA_REQ_UART1_T       30
-#define DMA_REQ_UART1_R       31
-
-#endif				/* _ASM_ARCH_DMA_H */
diff --git a/arch/arm/mach-imx/include/mach/entry-macro.S b/arch/arm/mach-imx/include/mach/entry-macro.S
deleted file mode 100644
index e4db679..0000000
--- a/arch/arm/mach-imx/include/mach/entry-macro.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * arch/arm/mach-imx/include/mach/entry-macro.S
- *
- * Low-level IRQ helper macros for iMX-based platforms
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#include <mach/hardware.h>
-
-		.macro	disable_fiq
-		.endm
-
-		.macro	get_irqnr_preamble, base, tmp
-		.endm
-
-		.macro	arch_ret_to_user, tmp1, tmp2
-		.endm
-
-#define AITC_NIVECSR   0x40
-		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-		ldr	\base, =IO_ADDRESS(IMX_AITC_BASE)
-		@ Load offset & priority of the highest priority
-		@ interrupt pending.
-		ldr	\irqstat, [\base, #AITC_NIVECSR]
-		@ Shift off the priority leaving the offset or
-		@ "interrupt number", use arithmetic shift to
-		@ transform illegal source (0xffff) as -1
-		mov	\irqnr, \irqstat, asr #16
-		adds	\tmp, \irqnr, #1
-		.endm
diff --git a/arch/arm/mach-imx/include/mach/gpio.h b/arch/arm/mach-imx/include/mach/gpio.h
deleted file mode 100644
index 6c2942f..0000000
--- a/arch/arm/mach-imx/include/mach/gpio.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef _IMX_GPIO_H
-
-#include <linux/kernel.h>
-#include <mach/hardware.h>
-#include <mach/imx-regs.h>
-
-#define IMX_GPIO_ALLOC_MODE_NORMAL	0
-#define IMX_GPIO_ALLOC_MODE_NO_ALLOC	1
-#define IMX_GPIO_ALLOC_MODE_TRY_ALLOC	2
-#define IMX_GPIO_ALLOC_MODE_ALLOC_ONLY	4
-#define IMX_GPIO_ALLOC_MODE_RELEASE	8
-
-extern int imx_gpio_request(unsigned gpio, const char *label);
-
-extern void imx_gpio_free(unsigned gpio);
-
-extern int imx_gpio_setup_multiple_pins(const int *pin_list, unsigned count,
-					int alloc_mode, const char *label);
-
-extern int imx_gpio_direction_input(unsigned gpio);
-
-extern int imx_gpio_direction_output(unsigned gpio, int value);
-
-extern void __imx_gpio_set_value(unsigned gpio, int value);
-
-static inline int imx_gpio_get_value(unsigned gpio)
-{
-	return SSR(gpio >> GPIO_PORT_SHIFT) & (1 << (gpio & GPIO_PIN_MASK));
-}
-
-static inline void imx_gpio_set_value_inline(unsigned gpio, int value)
-{
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	if(value)
-		DR(gpio >> GPIO_PORT_SHIFT) |= (1 << (gpio & GPIO_PIN_MASK));
-	else
-		DR(gpio >> GPIO_PORT_SHIFT) &= ~(1 << (gpio & GPIO_PIN_MASK));
-	raw_local_irq_restore(flags);
-}
-
-static inline void imx_gpio_set_value(unsigned gpio, int value)
-{
-	if(__builtin_constant_p(gpio))
-		imx_gpio_set_value_inline(gpio, value);
-	else
-		__imx_gpio_set_value(gpio, value);
-}
-
-extern int imx_gpio_to_irq(unsigned gpio);
-
-extern int imx_irq_to_gpio(unsigned irq);
-
-/*-------------------------------------------------------------------------*/
-
-/* Wrappers for "new style" GPIO calls. These calls i.MX specific versions
- * to allow future extension of GPIO logic.
- */
-
-static inline int gpio_request(unsigned gpio, const char *label)
-{
-	return imx_gpio_request(gpio, label);
-}
-
-static inline void gpio_free(unsigned gpio)
-{
-	might_sleep();
-
-	imx_gpio_free(gpio);
-}
-
-static inline  int gpio_direction_input(unsigned gpio)
-{
-	return imx_gpio_direction_input(gpio);
-}
-
-static inline int gpio_direction_output(unsigned gpio, int value)
-{
-	return imx_gpio_direction_output(gpio, value);
-}
-
-static inline int gpio_get_value(unsigned gpio)
-{
-	return imx_gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned gpio, int value)
-{
-	imx_gpio_set_value(gpio, value);
-}
-
-#include <asm-generic/gpio.h>		/* cansleep wrappers */
-
-static inline int gpio_to_irq(unsigned gpio)
-{
-	return imx_gpio_to_irq(gpio);
-}
-
-static inline int irq_to_gpio(unsigned irq)
-{
-	return imx_irq_to_gpio(irq);
-}
-
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/hardware.h b/arch/arm/mach-imx/include/mach/hardware.h
deleted file mode 100644
index c73e9e7..0000000
--- a/arch/arm/mach-imx/include/mach/hardware.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- *  arch/arm/mach-imx/include/mach/hardware.h
- *
- *  Copyright (C) 1999 ARM Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef __ASM_ARCH_HARDWARE_H
-#define __ASM_ARCH_HARDWARE_H
-
-#include <asm/sizes.h>
-#include "imx-regs.h"
-
-#ifndef __ASSEMBLY__
-# define __REG(x)	(*((volatile u32 *)IO_ADDRESS(x)))
-
-# define __REG2(x,y)        (*(volatile u32 *)((u32)&__REG(x) + (y)))
-#endif
-
-/*
- * Memory map
- */
-
-#define IMX_IO_PHYS		0x00200000
-#define IMX_IO_SIZE		0x00100000
-#define IMX_IO_BASE		0xe0000000
-
-#define IMX_CS0_PHYS		0x10000000
-#define IMX_CS0_SIZE		0x02000000
-#define IMX_CS0_VIRT		0xe8000000
-
-#define IMX_CS1_PHYS		0x12000000
-#define IMX_CS1_SIZE		0x01000000
-#define IMX_CS1_VIRT		0xea000000
-
-#define IMX_CS2_PHYS		0x13000000
-#define IMX_CS2_SIZE		0x01000000
-#define IMX_CS2_VIRT		0xeb000000
-
-#define IMX_CS3_PHYS		0x14000000
-#define IMX_CS3_SIZE		0x01000000
-#define IMX_CS3_VIRT		0xec000000
-
-#define IMX_CS4_PHYS		0x15000000
-#define IMX_CS4_SIZE		0x01000000
-#define IMX_CS4_VIRT		0xed000000
-
-#define IMX_CS5_PHYS		0x16000000
-#define IMX_CS5_SIZE		0x01000000
-#define IMX_CS5_VIRT		0xee000000
-
-#define IMX_FB_VIRT		0xF1000000
-#define IMX_FB_SIZE		(256*1024)
-
-/* macro to get at IO space when running virtually */
-#define IO_ADDRESS(x) ((x) | IMX_IO_BASE)
-
-#ifndef __ASSEMBLY__
-/*
- * Handy routine to set GPIO functions
- */
-extern void imx_gpio_mode( int gpio_mode );
-
-#endif
-
-#define MAXIRQNUM                       62
-#define MAXFIQNUM                       62
-#define MAXSWINUM                       62
-
-/*
- * Use SDRAM for memory
- */
-#define MEM_SIZE		0x01000000
-
-#ifdef CONFIG_ARCH_MX1ADS
-#include "mx1ads.h"
-#endif
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/imx-dma.h b/arch/arm/mach-imx/include/mach/imx-dma.h
deleted file mode 100644
index bbe54df..0000000
--- a/arch/arm/mach-imx/include/mach/imx-dma.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- *  linux/include/asm-arm/imxads/dma.h
- *
- *  Copyright (C) 1997,1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <mach/dma.h>
-
-#ifndef __ASM_ARCH_IMX_DMA_H
-#define __ASM_ARCH_IMX_DMA_H
-
-#define IMX_DMA_CHANNELS  11
-
-/*
- * struct imx_dma_channel - i.MX specific DMA extension
- * @name: name specified by DMA client
- * @irq_handler: client callback for end of transfer
- * @err_handler: client callback for error condition
- * @data: clients context data for callbacks
- * @dma_mode: direction of the transfer %DMA_MODE_READ or %DMA_MODE_WRITE
- * @sg: pointer to the actual read/written chunk for scatter-gather emulation
- * @sgbc: counter of processed bytes in the actual read/written chunk
- * @resbytes: total residual number of bytes to transfer
- *            (it can be lower or same as sum of SG mapped chunk sizes)
- * @sgcount: number of chunks to be read/written
- *
- * Structure is used for IMX DMA processing. It would be probably good
- * @struct dma_struct in the future for external interfacing and use
- * @struct imx_dma_channel only as extension to it.
- */
-
-struct imx_dma_channel {
-	const char *name;
-	void (*irq_handler) (int, void *);
-	void (*err_handler) (int, void *, int errcode);
-	void *data;
-	unsigned int  dma_mode;
-	struct scatterlist *sg;
-	unsigned int sgbc;
-	unsigned int sgcount;
-	unsigned int resbytes;
-	int dma_num;
-};
-
-extern struct imx_dma_channel imx_dma_channels[IMX_DMA_CHANNELS];
-
-#define IMX_DMA_ERR_BURST     1
-#define IMX_DMA_ERR_REQUEST   2
-#define IMX_DMA_ERR_TRANSFER  4
-#define IMX_DMA_ERR_BUFFER    8
-
-/* The type to distinguish channel numbers parameter from ordinal int type */
-typedef int imx_dmach_t;
-
-#define DMA_MODE_READ		0
-#define DMA_MODE_WRITE		1
-#define DMA_MODE_MASK		1
-
-int
-imx_dma_setup_single(imx_dmach_t dma_ch, dma_addr_t dma_address,
-		unsigned int dma_length, unsigned int dev_addr, unsigned int dmamode);
-
-int
-imx_dma_setup_sg(imx_dmach_t dma_ch,
-		 struct scatterlist *sg, unsigned int sgcount, unsigned int dma_length,
-		 unsigned int dev_addr, unsigned int dmamode);
-
-int
-imx_dma_setup_handlers(imx_dmach_t dma_ch,
-		void (*irq_handler) (int, void *),
-		void (*err_handler) (int, void *, int), void *data);
-
-void imx_dma_enable(imx_dmach_t dma_ch);
-
-void imx_dma_disable(imx_dmach_t dma_ch);
-
-int imx_dma_request(imx_dmach_t dma_ch, const char *name);
-
-void imx_dma_free(imx_dmach_t dma_ch);
-
-imx_dmach_t imx_dma_request_by_prio(const char *name, imx_dma_prio prio);
-
-
-#endif	/* _ASM_ARCH_IMX_DMA_H */
diff --git a/arch/arm/mach-imx/include/mach/imx-regs.h b/arch/arm/mach-imx/include/mach/imx-regs.h
deleted file mode 100644
index 490297f..0000000
--- a/arch/arm/mach-imx/include/mach/imx-regs.h
+++ /dev/null
@@ -1,376 +0,0 @@
-#ifndef _IMX_REGS_H
-#define _IMX_REGS_H
-/* ------------------------------------------------------------------------
- *  Motorola IMX system registers
- * ------------------------------------------------------------------------
- *
- */
-
-/*
- *  Register BASEs, based on OFFSETs
- *
- */
-#define IMX_AIPI1_BASE             (0x00000 + IMX_IO_BASE)
-#define IMX_WDT_BASE               (0x01000 + IMX_IO_BASE)
-#define IMX_TIM1_BASE              (0x02000 + IMX_IO_BASE)
-#define IMX_TIM2_BASE              (0x03000 + IMX_IO_BASE)
-#define IMX_RTC_BASE               (0x04000 + IMX_IO_BASE)
-#define IMX_LCDC_BASE              (0x05000 + IMX_IO_BASE)
-#define IMX_UART1_BASE             (0x06000 + IMX_IO_BASE)
-#define IMX_UART2_BASE             (0x07000 + IMX_IO_BASE)
-#define IMX_PWM_BASE               (0x08000 + IMX_IO_BASE)
-#define IMX_DMAC_BASE              (0x09000 + IMX_IO_BASE)
-#define IMX_AIPI2_BASE             (0x10000 + IMX_IO_BASE)
-#define IMX_SIM_BASE               (0x11000 + IMX_IO_BASE)
-#define IMX_USBD_BASE              (0x12000 + IMX_IO_BASE)
-#define IMX_SPI1_BASE              (0x13000 + IMX_IO_BASE)
-#define IMX_MMC_BASE               (0x14000 + IMX_IO_BASE)
-#define IMX_ASP_BASE               (0x15000 + IMX_IO_BASE)
-#define IMX_BTA_BASE               (0x16000 + IMX_IO_BASE)
-#define IMX_I2C_BASE               (0x17000 + IMX_IO_BASE)
-#define IMX_SSI_BASE               (0x18000 + IMX_IO_BASE)
-#define IMX_SPI2_BASE              (0x19000 + IMX_IO_BASE)
-#define IMX_MSHC_BASE              (0x1A000 + IMX_IO_BASE)
-#define IMX_PLL_BASE               (0x1B000 + IMX_IO_BASE)
-#define IMX_GPIO_BASE              (0x1C000 + IMX_IO_BASE)
-#define IMX_EIM_BASE               (0x20000 + IMX_IO_BASE)
-#define IMX_SDRAMC_BASE            (0x21000 + IMX_IO_BASE)
-#define IMX_MMA_BASE               (0x22000 + IMX_IO_BASE)
-#define IMX_AITC_BASE              (0x23000 + IMX_IO_BASE)
-#define IMX_CSI_BASE               (0x24000 + IMX_IO_BASE)
-
-/* PLL registers */
-#define CSCR   __REG(IMX_PLL_BASE)        /* Clock Source Control Register */
-#define CSCR_SPLL_RESTART	(1<<22)
-#define CSCR_MPLL_RESTART	(1<<21)
-#define CSCR_SYSTEM_SEL		(1<<16)
-#define CSCR_BCLK_DIV		(0xf<<10)
-#define CSCR_MPU_PRESC		(1<<15)
-#define CSCR_SPEN		(1<<1)
-#define CSCR_MPEN		(1<<0)
-
-#define MPCTL0 __REG(IMX_PLL_BASE + 0x4)  /* MCU PLL Control Register 0 */
-#define MPCTL1 __REG(IMX_PLL_BASE + 0x8)  /* MCU PLL and System Clock Register 1 */
-#define SPCTL0 __REG(IMX_PLL_BASE + 0xc)  /* System PLL Control Register 0 */
-#define SPCTL1 __REG(IMX_PLL_BASE + 0x10) /* System PLL Control Register 1 */
-#define PCDR   __REG(IMX_PLL_BASE + 0x20) /* Peripheral Clock Divider Register */
-
-/*
- *  GPIO Module and I/O Multiplexer
- *  x = 0..3 for reg_A, reg_B, reg_C, reg_D
- */
-#define DDIR(x)    __REG2(IMX_GPIO_BASE + 0x00, ((x) & 3) << 8)
-#define OCR1(x)    __REG2(IMX_GPIO_BASE + 0x04, ((x) & 3) << 8)
-#define OCR2(x)    __REG2(IMX_GPIO_BASE + 0x08, ((x) & 3) << 8)
-#define ICONFA1(x) __REG2(IMX_GPIO_BASE + 0x0c, ((x) & 3) << 8)
-#define ICONFA2(x) __REG2(IMX_GPIO_BASE + 0x10, ((x) & 3) << 8)
-#define ICONFB1(x) __REG2(IMX_GPIO_BASE + 0x14, ((x) & 3) << 8)
-#define ICONFB2(x) __REG2(IMX_GPIO_BASE + 0x18, ((x) & 3) << 8)
-#define DR(x)      __REG2(IMX_GPIO_BASE + 0x1c, ((x) & 3) << 8)
-#define GIUS(x)    __REG2(IMX_GPIO_BASE + 0x20, ((x) & 3) << 8)
-#define SSR(x)     __REG2(IMX_GPIO_BASE + 0x24, ((x) & 3) << 8)
-#define ICR1(x)    __REG2(IMX_GPIO_BASE + 0x28, ((x) & 3) << 8)
-#define ICR2(x)    __REG2(IMX_GPIO_BASE + 0x2c, ((x) & 3) << 8)
-#define IMR(x)     __REG2(IMX_GPIO_BASE + 0x30, ((x) & 3) << 8)
-#define ISR(x)     __REG2(IMX_GPIO_BASE + 0x34, ((x) & 3) << 8)
-#define GPR(x)     __REG2(IMX_GPIO_BASE + 0x38, ((x) & 3) << 8)
-#define SWR(x)     __REG2(IMX_GPIO_BASE + 0x3c, ((x) & 3) << 8)
-#define PUEN(x)    __REG2(IMX_GPIO_BASE + 0x40, ((x) & 3) << 8)
-
-#define GPIO_PORT_MAX  3
-
-#define GPIO_PIN_MASK 0x1f
-#define GPIO_PORT_MASK (0x3 << 5)
-
-#define GPIO_PORT_SHIFT 5
-#define GPIO_PORTA (0<<5)
-#define GPIO_PORTB (1<<5)
-#define GPIO_PORTC (2<<5)
-#define GPIO_PORTD (3<<5)
-
-#define GPIO_OUT   (1<<7)
-#define GPIO_IN    (0<<7)
-#define GPIO_PUEN  (1<<8)
-
-#define GPIO_PF    (0<<9)
-#define GPIO_AF    (1<<9)
-
-#define GPIO_OCR_SHIFT 10
-#define GPIO_OCR_MASK (3<<10)
-#define GPIO_AIN   (0<<10)
-#define GPIO_BIN   (1<<10)
-#define GPIO_CIN   (2<<10)
-#define GPIO_DR    (3<<10)
-
-#define GPIO_AOUT_SHIFT 12
-#define GPIO_AOUT_MASK (3<<12)
-#define GPIO_AOUT     (0<<12)
-#define GPIO_AOUT_ISR (1<<12)
-#define GPIO_AOUT_0   (2<<12)
-#define GPIO_AOUT_1   (3<<12)
-
-#define GPIO_BOUT_SHIFT 14
-#define GPIO_BOUT_MASK (3<<14)
-#define GPIO_BOUT      (0<<14)
-#define GPIO_BOUT_ISR  (1<<14)
-#define GPIO_BOUT_0    (2<<14)
-#define GPIO_BOUT_1    (3<<14)
-
-#define GPIO_GIUS      (1<<16)
-
-/* assignements for GPIO alternate/primary functions */
-
-/* FIXME: This list is not completed. The correct directions are
- * missing on some (many) pins
- */
-#define PA0_AIN_SPI2_CLK     ( GPIO_GIUS | GPIO_PORTA | GPIO_OUT | 0 )
-#define PA0_AF_ETMTRACESYNC  ( GPIO_PORTA | GPIO_AF | 0 )
-#define PA1_AOUT_SPI2_RXD    ( GPIO_GIUS | GPIO_PORTA | GPIO_IN | 1 )
-#define PA1_PF_TIN           ( GPIO_PORTA | GPIO_PF | 1 )
-#define PA2_PF_PWM0          ( GPIO_PORTA | GPIO_OUT | GPIO_PF | 2 )
-#define PA3_PF_CSI_MCLK      ( GPIO_PORTA | GPIO_PF | 3 )
-#define PA4_PF_CSI_D0        ( GPIO_PORTA | GPIO_PF | 4 )
-#define PA5_PF_CSI_D1        ( GPIO_PORTA | GPIO_PF | 5 )
-#define PA6_PF_CSI_D2        ( GPIO_PORTA | GPIO_PF | 6 )
-#define PA7_PF_CSI_D3        ( GPIO_PORTA | GPIO_PF | 7 )
-#define PA8_PF_CSI_D4        ( GPIO_PORTA | GPIO_PF | 8 )
-#define PA9_PF_CSI_D5        ( GPIO_PORTA | GPIO_PF | 9 )
-#define PA10_PF_CSI_D6       ( GPIO_PORTA | GPIO_PF | 10 )
-#define PA11_PF_CSI_D7       ( GPIO_PORTA | GPIO_PF | 11 )
-#define PA12_PF_CSI_VSYNC    ( GPIO_PORTA | GPIO_PF | 12 )
-#define PA13_PF_CSI_HSYNC    ( GPIO_PORTA | GPIO_PF | 13 )
-#define PA14_PF_CSI_PIXCLK   ( GPIO_PORTA | GPIO_PF | 14 )
-#define PA15_PF_I2C_SDA      ( GPIO_PORTA | GPIO_OUT | GPIO_PF | 15 )
-#define PA16_PF_I2C_SCL      ( GPIO_PORTA | GPIO_OUT | GPIO_PF | 16 )
-#define PA17_AF_ETMTRACEPKT4 ( GPIO_PORTA | GPIO_AF | 17 )
-#define PA17_AIN_SPI2_SS     ( GPIO_GIUS | GPIO_PORTA | GPIO_OUT | 17 )
-#define PA18_AF_ETMTRACEPKT5 ( GPIO_PORTA | GPIO_AF | 18 )
-#define PA19_AF_ETMTRACEPKT6 ( GPIO_PORTA | GPIO_AF | 19 )
-#define PA20_AF_ETMTRACEPKT7 ( GPIO_PORTA | GPIO_AF | 20 )
-#define PA21_PF_A0           ( GPIO_PORTA | GPIO_PF | 21 )
-#define PA22_PF_CS4          ( GPIO_PORTA | GPIO_PF | 22 )
-#define PA23_PF_CS5          ( GPIO_PORTA | GPIO_PF | 23 )
-#define PA24_PF_A16          ( GPIO_PORTA | GPIO_PF | 24 )
-#define PA24_AF_ETMTRACEPKT0 ( GPIO_PORTA | GPIO_AF | 24 )
-#define PA25_PF_A17          ( GPIO_PORTA | GPIO_PF | 25 )
-#define PA25_AF_ETMTRACEPKT1 ( GPIO_PORTA | GPIO_AF | 25 )
-#define PA26_PF_A18          ( GPIO_PORTA | GPIO_PF | 26 )
-#define PA26_AF_ETMTRACEPKT2 ( GPIO_PORTA | GPIO_AF | 26 )
-#define PA27_PF_A19          ( GPIO_PORTA | GPIO_PF | 27 )
-#define PA27_AF_ETMTRACEPKT3 ( GPIO_PORTA | GPIO_AF | 27 )
-#define PA28_PF_A20          ( GPIO_PORTA | GPIO_PF | 28 )
-#define PA28_AF_ETMPIPESTAT0 ( GPIO_PORTA | GPIO_AF | 28 )
-#define PA29_PF_A21          ( GPIO_PORTA | GPIO_PF | 29 )
-#define PA29_AF_ETMPIPESTAT1 ( GPIO_PORTA | GPIO_AF | 29 )
-#define PA30_PF_A22          ( GPIO_PORTA | GPIO_PF | 30 )
-#define PA30_AF_ETMPIPESTAT2 ( GPIO_PORTA | GPIO_AF | 30 )
-#define PA31_PF_A23          ( GPIO_PORTA | GPIO_PF | 31 )
-#define PA31_AF_ETMTRACECLK  ( GPIO_PORTA | GPIO_AF | 31 )
-#define PB8_PF_SD_DAT0       ( GPIO_PORTB | GPIO_PF | GPIO_PUEN | 8 )
-#define PB8_AF_MS_PIO        ( GPIO_PORTB | GPIO_AF | 8 )
-#define PB9_PF_SD_DAT1       ( GPIO_PORTB | GPIO_PF | GPIO_PUEN  | 9 )
-#define PB9_AF_MS_PI1        ( GPIO_PORTB | GPIO_AF | 9 )
-#define PB10_PF_SD_DAT2      ( GPIO_PORTB | GPIO_PF | GPIO_PUEN  | 10 )
-#define PB10_AF_MS_SCLKI     ( GPIO_PORTB | GPIO_AF | 10 )
-#define PB11_PF_SD_DAT3      ( GPIO_PORTB | GPIO_PF | 11 )
-#define PB11_AF_MS_SDIO      ( GPIO_PORTB | GPIO_AF | 11 )
-#define PB12_PF_SD_CLK       ( GPIO_PORTB | GPIO_PF | 12 )
-#define PB12_AF_MS_SCLK0     ( GPIO_PORTB | GPIO_AF | 12 )
-#define PB13_PF_SD_CMD       ( GPIO_PORTB | GPIO_PF | GPIO_PUEN | 13 )
-#define PB13_AF_MS_BS        ( GPIO_PORTB | GPIO_AF | 13 )
-#define PB14_AF_SSI_RXFS     ( GPIO_PORTB | GPIO_AF | 14 )
-#define PB15_AF_SSI_RXCLK    ( GPIO_PORTB | GPIO_AF | 15 )
-#define PB16_AF_SSI_RXDAT    ( GPIO_PORTB | GPIO_IN | GPIO_AF | 16 )
-#define PB17_AF_SSI_TXDAT    ( GPIO_PORTB | GPIO_OUT | GPIO_AF | 17 )
-#define PB18_AF_SSI_TXFS     ( GPIO_PORTB | GPIO_AF | 18 )
-#define PB19_AF_SSI_TXCLK    ( GPIO_PORTB | GPIO_AF | 19 )
-#define PB20_PF_USBD_AFE     ( GPIO_PORTB | GPIO_PF | 20 )
-#define PB21_PF_USBD_OE      ( GPIO_PORTB | GPIO_PF | 21 )
-#define PB22_PFUSBD_RCV      ( GPIO_PORTB | GPIO_PF | 22 )
-#define PB23_PF_USBD_SUSPND  ( GPIO_PORTB | GPIO_PF | 23 )
-#define PB24_PF_USBD_VP      ( GPIO_PORTB | GPIO_PF | 24 )
-#define PB25_PF_USBD_VM      ( GPIO_PORTB | GPIO_PF | 25 )
-#define PB26_PF_USBD_VPO     ( GPIO_PORTB | GPIO_PF | 26 )
-#define PB27_PF_USBD_VMO     ( GPIO_PORTB | GPIO_PF | 27 )
-#define PB28_PF_UART2_CTS    ( GPIO_PORTB | GPIO_OUT | GPIO_PF | 28 )
-#define PB29_PF_UART2_RTS    ( GPIO_PORTB | GPIO_IN | GPIO_PF | 29 )
-#define PB30_PF_UART2_TXD    ( GPIO_PORTB | GPIO_OUT | GPIO_PF | 30 )
-#define PB31_PF_UART2_RXD    ( GPIO_PORTB | GPIO_IN | GPIO_PF | 31 )
-#define PC3_PF_SSI_RXFS      ( GPIO_PORTC | GPIO_PF | 3 )
-#define PC4_PF_SSI_RXCLK     ( GPIO_PORTC | GPIO_PF | 4 )
-#define PC5_PF_SSI_RXDAT     ( GPIO_PORTC | GPIO_IN | GPIO_PF | 5 )
-#define PC6_PF_SSI_TXDAT     ( GPIO_PORTC | GPIO_OUT | GPIO_PF | 6 )
-#define PC7_PF_SSI_TXFS      ( GPIO_PORTC | GPIO_PF | 7 )
-#define PC8_PF_SSI_TXCLK     ( GPIO_PORTC | GPIO_PF | 8 )
-#define PC9_PF_UART1_CTS     ( GPIO_PORTC | GPIO_OUT | GPIO_PF | 9 )
-#define PC10_PF_UART1_RTS    ( GPIO_PORTC | GPIO_IN | GPIO_PF | 10 )
-#define PC11_PF_UART1_TXD    ( GPIO_PORTC | GPIO_OUT | GPIO_PF | 11 )
-#define PC12_PF_UART1_RXD    ( GPIO_PORTC | GPIO_IN | GPIO_PF | 12 )
-#define PC13_PF_SPI1_SPI_RDY ( GPIO_PORTC | GPIO_PF | 13 )
-#define PC14_PF_SPI1_SCLK    ( GPIO_PORTC | GPIO_PF | 14 )
-#define PC15_PF_SPI1_SS      ( GPIO_PORTC | GPIO_PF | 15 )
-#define PC16_PF_SPI1_MISO    ( GPIO_PORTC | GPIO_PF | 16 )
-#define PC17_PF_SPI1_MOSI    ( GPIO_PORTC | GPIO_PF | 17 )
-#define PC24_BIN_UART3_RI    ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 24 )
-#define PC25_BIN_UART3_DSR   ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 25 )
-#define PC26_AOUT_UART3_DTR  ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 26 )
-#define PC27_BIN_UART3_DCD   ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 27 )
-#define PC28_BIN_UART3_CTS   ( GPIO_GIUS | GPIO_PORTC | GPIO_OUT | GPIO_BIN | 28 )
-#define PC29_AOUT_UART3_RTS  ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 29 )
-#define PC30_BIN_UART3_TX    ( GPIO_GIUS | GPIO_PORTC | GPIO_BIN | 30 )
-#define PC31_AOUT_UART3_RX   ( GPIO_GIUS | GPIO_PORTC | GPIO_IN | 31)
-#define PD6_PF_LSCLK         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 6 )
-#define PD7_PF_REV           ( GPIO_PORTD | GPIO_PF | 7 )
-#define PD7_AF_UART2_DTR     ( GPIO_GIUS | GPIO_PORTD | GPIO_IN | GPIO_AF | 7 )
-#define PD7_AIN_SPI2_SCLK    ( GPIO_GIUS | GPIO_PORTD | GPIO_AIN | 7 )
-#define PD8_PF_CLS           ( GPIO_PORTD | GPIO_PF | 8 )
-#define PD8_AF_UART2_DCD     ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 8 )
-#define PD8_AIN_SPI2_SS      ( GPIO_GIUS | GPIO_PORTD | GPIO_AIN | 8 )
-#define PD9_PF_PS            ( GPIO_PORTD | GPIO_PF | 9 )
-#define PD9_AF_UART2_RI      ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 9 )
-#define PD9_AOUT_SPI2_RXD    ( GPIO_GIUS | GPIO_PORTD | GPIO_IN | 9 )
-#define PD10_PF_SPL_SPR      ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 10 )
-#define PD10_AF_UART2_DSR    ( GPIO_PORTD | GPIO_OUT | GPIO_AF | 10 )
-#define PD10_AIN_SPI2_TXD    ( GPIO_GIUS | GPIO_PORTD | GPIO_OUT | 10 )
-#define PD11_PF_CONTRAST     ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 11 )
-#define PD12_PF_ACD_OE       ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 12 )
-#define PD13_PF_LP_HSYNC     ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 13 )
-#define PD14_PF_FLM_VSYNC    ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 14 )
-#define PD15_PF_LD0          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 15 )
-#define PD16_PF_LD1          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 16 )
-#define PD17_PF_LD2          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 17 )
-#define PD18_PF_LD3          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 18 )
-#define PD19_PF_LD4          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 19 )
-#define PD20_PF_LD5          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 20 )
-#define PD21_PF_LD6          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 21 )
-#define PD22_PF_LD7          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 22 )
-#define PD23_PF_LD8          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 23 )
-#define PD24_PF_LD9          ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 24 )
-#define PD25_PF_LD10         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 25 )
-#define PD26_PF_LD11         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 26 )
-#define PD27_PF_LD12         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 27 )
-#define PD28_PF_LD13         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 28 )
-#define PD29_PF_LD14         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 29 )
-#define PD30_PF_LD15         ( GPIO_PORTD | GPIO_OUT | GPIO_PF | 30 )
-#define PD31_PF_TMR2OUT      ( GPIO_PORTD | GPIO_PF | 31 )
-#define PD31_BIN_SPI2_TXD    ( GPIO_GIUS | GPIO_PORTD | GPIO_BIN | 31 )
-
-/*
- * PWM controller
- */
-#define PWMC	__REG(IMX_PWM_BASE + 0x00)	/* PWM Control Register		*/
-#define PWMS	__REG(IMX_PWM_BASE + 0x04)	/* PWM Sample Register		*/
-#define PWMP	__REG(IMX_PWM_BASE + 0x08)	/* PWM Period Register		*/
-#define PWMCNT	__REG(IMX_PWM_BASE + 0x0C)	/* PWM Counter Register		*/
-
-#define PWMC_HCTR		(0x01<<18)		/* Halfword FIFO Data Swapping	*/
-#define PWMC_BCTR		(0x01<<17)		/* Byte FIFO Data Swapping	*/
-#define PWMC_SWR		(0x01<<16)		/* Software Reset		*/
-#define PWMC_CLKSRC		(0x01<<15)		/* Clock Source			*/
-#define PWMC_PRESCALER(x)	(((x-1) & 0x7F) << 8)	/* PRESCALER			*/
-#define PWMC_IRQ		(0x01<< 7)		/* Interrupt Request		*/
-#define PWMC_IRQEN		(0x01<< 6)		/* Interrupt Request Enable	*/
-#define PWMC_FIFOAV		(0x01<< 5)		/* FIFO Available		*/
-#define PWMC_EN			(0x01<< 4)		/* Enables/Disables the PWM	*/
-#define PWMC_REPEAT(x)		(((x) & 0x03) << 2)	/* Sample Repeats		*/
-#define PWMC_CLKSEL(x)		(((x) & 0x03) << 0)	/* Clock Selection		*/
-
-#define PWMS_SAMPLE(x)		((x) & 0xFFFF)		/* Contains a two-sample word	*/
-#define PWMP_PERIOD(x)		((x) & 0xFFFF)		/* Represents the PWM's period	*/
-#define PWMC_COUNTER(x)		((x) & 0xFFFF)		/* Represents the current count value	*/
-
-/*
- *  DMA Controller
- */
-#define DCR     __REG(IMX_DMAC_BASE +0x00)	/* DMA Control Register */
-#define DISR    __REG(IMX_DMAC_BASE +0x04)	/* DMA Interrupt status Register */
-#define DIMR    __REG(IMX_DMAC_BASE +0x08)	/* DMA Interrupt mask Register */
-#define DBTOSR  __REG(IMX_DMAC_BASE +0x0c)	/* DMA Burst timeout status Register */
-#define DRTOSR  __REG(IMX_DMAC_BASE +0x10)	/* DMA Request timeout Register */
-#define DSESR   __REG(IMX_DMAC_BASE +0x14)	/* DMA Transfer Error Status Register */
-#define DBOSR   __REG(IMX_DMAC_BASE +0x18)	/* DMA Buffer overflow status Register */
-#define DBTOCR  __REG(IMX_DMAC_BASE +0x1c)	/* DMA Burst timeout control Register */
-#define WSRA    __REG(IMX_DMAC_BASE +0x40)	/* W-Size Register A */
-#define XSRA    __REG(IMX_DMAC_BASE +0x44)	/* X-Size Register A */
-#define YSRA    __REG(IMX_DMAC_BASE +0x48)	/* Y-Size Register A */
-#define WSRB    __REG(IMX_DMAC_BASE +0x4c)	/* W-Size Register B */
-#define XSRB    __REG(IMX_DMAC_BASE +0x50)	/* X-Size Register B */
-#define YSRB    __REG(IMX_DMAC_BASE +0x54)	/* Y-Size Register B */
-#define SAR(x)  __REG2( IMX_DMAC_BASE + 0x80, (x) << 6)	/* Source Address Registers */
-#define DAR(x)  __REG2( IMX_DMAC_BASE + 0x84, (x) << 6)	/* Destination Address Registers */
-#define CNTR(x) __REG2( IMX_DMAC_BASE + 0x88, (x) << 6)	/* Count Registers */
-#define CCR(x)  __REG2( IMX_DMAC_BASE + 0x8c, (x) << 6)	/* Control Registers */
-#define RSSR(x) __REG2( IMX_DMAC_BASE + 0x90, (x) << 6)	/* Request source select Registers */
-#define BLR(x)  __REG2( IMX_DMAC_BASE + 0x94, (x) << 6)	/* Burst length Registers */
-#define RTOR(x) __REG2( IMX_DMAC_BASE + 0x98, (x) << 6)	/* Request timeout Registers */
-#define BUCR(x) __REG2( IMX_DMAC_BASE + 0x98, (x) << 6)	/* Bus Utilization Registers */
-
-#define DCR_DRST           (1<<1)
-#define DCR_DEN            (1<<0)
-#define DBTOCR_EN          (1<<15)
-#define DBTOCR_CNT(x)      ((x) & 0x7fff )
-#define CNTR_CNT(x)        ((x) & 0xffffff )
-#define CCR_DMOD_LINEAR    ( 0x0 << 12 )
-#define CCR_DMOD_2D        ( 0x1 << 12 )
-#define CCR_DMOD_FIFO      ( 0x2 << 12 )
-#define CCR_DMOD_EOBFIFO   ( 0x3 << 12 )
-#define CCR_SMOD_LINEAR    ( 0x0 << 10 )
-#define CCR_SMOD_2D        ( 0x1 << 10 )
-#define CCR_SMOD_FIFO      ( 0x2 << 10 )
-#define CCR_SMOD_EOBFIFO   ( 0x3 << 10 )
-#define CCR_MDIR_DEC       (1<<9)
-#define CCR_MSEL_B         (1<<8)
-#define CCR_DSIZ_32        ( 0x0 << 6 )
-#define CCR_DSIZ_8         ( 0x1 << 6 )
-#define CCR_DSIZ_16        ( 0x2 << 6 )
-#define CCR_SSIZ_32        ( 0x0 << 4 )
-#define CCR_SSIZ_8         ( 0x1 << 4 )
-#define CCR_SSIZ_16        ( 0x2 << 4 )
-#define CCR_REN            (1<<3)
-#define CCR_RPT            (1<<2)
-#define CCR_FRC            (1<<1)
-#define CCR_CEN            (1<<0)
-#define RTOR_EN            (1<<15)
-#define RTOR_CLK           (1<<14)
-#define RTOR_PSC           (1<<13)
-
-/*
- *  Interrupt controller
- */
-
-#define IMX_INTCNTL        __REG(IMX_AITC_BASE+0x00)
-#define INTCNTL_FIAD       (1<<19)
-#define INTCNTL_NIAD       (1<<20)
-
-#define IMX_NIMASK         __REG(IMX_AITC_BASE+0x04)
-#define IMX_INTENNUM       __REG(IMX_AITC_BASE+0x08)
-#define IMX_INTDISNUM      __REG(IMX_AITC_BASE+0x0c)
-#define IMX_INTENABLEH     __REG(IMX_AITC_BASE+0x10)
-#define IMX_INTENABLEL     __REG(IMX_AITC_BASE+0x14)
-
-/*
- *  General purpose timers
- */
-#define IMX_TCTL(x)        __REG( 0x00 + (x))
-#define TCTL_SWR           (1<<15)
-#define TCTL_FRR           (1<<8)
-#define TCTL_CAP_RIS       (1<<6)
-#define TCTL_CAP_FAL       (2<<6)
-#define TCTL_CAP_RIS_FAL   (3<<6)
-#define TCTL_OM            (1<<5)
-#define TCTL_IRQEN         (1<<4)
-#define TCTL_CLK_PCLK1     (1<<1)
-#define TCTL_CLK_PCLK1_16  (2<<1)
-#define TCTL_CLK_TIN       (3<<1)
-#define TCTL_CLK_32        (4<<1)
-#define TCTL_TEN           (1<<0)
-
-#define IMX_TPRER(x)       __REG( 0x04 + (x))
-#define IMX_TCMP(x)        __REG( 0x08 + (x))
-#define IMX_TCR(x)         __REG( 0x0C + (x))
-#define IMX_TCN(x)         __REG( 0x10 + (x))
-#define IMX_TSTAT(x)       __REG( 0x14 + (x))
-#define TSTAT_CAPT         (1<<1)
-#define TSTAT_COMP         (1<<0)
-
-#endif				// _IMX_REGS_H
diff --git a/arch/arm/mach-imx/include/mach/imx-uart.h b/arch/arm/mach-imx/include/mach/imx-uart.h
deleted file mode 100644
index d54eb1d..0000000
--- a/arch/arm/mach-imx/include/mach/imx-uart.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef ASMARM_ARCH_UART_H
-#define ASMARM_ARCH_UART_H
-
-#define IMXUART_HAVE_RTSCTS (1<<0)
-
-struct imxuart_platform_data {
-	int (*init)(struct platform_device *pdev);
-	void (*exit)(struct platform_device *pdev);
-	unsigned int flags;
-};
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/io.h b/arch/arm/mach-imx/include/mach/io.h
deleted file mode 100644
index 9e197ae..0000000
--- a/arch/arm/mach-imx/include/mach/io.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *  arch/arm/mach-imxads/include/mach/io.h
- *
- *  Copyright (C) 1999 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef __ASM_ARM_ARCH_IO_H
-#define __ASM_ARM_ARCH_IO_H
-
-#define IO_SPACE_LIMIT 0xffffffff
-
-#define __io(a)		__typesafe_io(a)
-#define __mem_pci(a)	(a)
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/irqs.h b/arch/arm/mach-imx/include/mach/irqs.h
deleted file mode 100644
index 67812c5..0000000
--- a/arch/arm/mach-imx/include/mach/irqs.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- *  arch/arm/mach-imxads/include/mach/irqs.h
- *
- *  Copyright (C) 1999 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __ARM_IRQS_H__
-#define __ARM_IRQS_H__
-
-/* Use the imx definitions */
-#include <mach/hardware.h>
-
-/*
- *  IMX Interrupt numbers
- *
- */
-#define INT_SOFTINT                 0
-#define CSI_INT                     6
-#define DSPA_MAC_INT                7
-#define DSPA_INT                    8
-#define COMP_INT                    9
-#define MSHC_XINT                   10
-#define GPIO_INT_PORTA              11
-#define GPIO_INT_PORTB              12
-#define GPIO_INT_PORTC              13
-#define LCDC_INT                    14
-#define SIM_INT                     15
-#define SIM_DATA_INT                16
-#define RTC_INT                     17
-#define RTC_SAMINT                  18
-#define UART2_MINT_PFERR            19
-#define UART2_MINT_RTS              20
-#define UART2_MINT_DTR              21
-#define UART2_MINT_UARTC            22
-#define UART2_MINT_TX               23
-#define UART2_MINT_RX               24
-#define UART1_MINT_PFERR            25
-#define UART1_MINT_RTS              26
-#define UART1_MINT_DTR              27
-#define UART1_MINT_UARTC            28
-#define UART1_MINT_TX               29
-#define UART1_MINT_RX               30
-#define VOICE_DAC_INT               31
-#define VOICE_ADC_INT               32
-#define PEN_DATA_INT                33
-#define PWM_INT                     34
-#define SDHC_INT                    35
-#define I2C_INT                     39
-#define CSPI_INT                    41
-#define SSI_TX_INT                  42
-#define SSI_TX_ERR_INT              43
-#define SSI_RX_INT                  44
-#define SSI_RX_ERR_INT              45
-#define TOUCH_INT                   46
-#define USBD_INT0                   47
-#define USBD_INT1                   48
-#define USBD_INT2                   49
-#define USBD_INT3                   50
-#define USBD_INT4                   51
-#define USBD_INT5                   52
-#define USBD_INT6                   53
-#define BTSYS_INT                   55
-#define BTTIM_INT                   56
-#define BTWUI_INT                   57
-#define TIM2_INT                    58
-#define TIM1_INT                    59
-#define DMA_ERR                     60
-#define DMA_INT                     61
-#define GPIO_INT_PORTD              62
-
-#define IMX_IRQS                         (64)
-
-/* note: the IMX has four gpio ports (A-D), but only
- *       the following pins are connected to the outside
- *       world:
- *
- * PORT A: bits 0-31
- * PORT B: bits 8-31
- * PORT C: bits 3-17
- * PORT D: bits 6-31
- *
- * We map these interrupts straight on. As a result we have
- * several holes in the interrupt mapping. We do this for two
- * reasons:
- *   - mapping the interrupts without holes would get
- *     far more complicated
- *   - Motorola could well decide to bring some processor
- *     with more pins connected
- */
-
-#define IRQ_GPIOA(x)  (IMX_IRQS + x)
-#define IRQ_GPIOB(x)  (IRQ_GPIOA(32) + x)
-#define IRQ_GPIOC(x)  (IRQ_GPIOB(32) + x)
-#define IRQ_GPIOD(x)  (IRQ_GPIOC(32) + x)
-
-/* decode irq number to use with IMR(x), ISR(x) and friends */
-#define IRQ_TO_REG(irq) ((irq - IMX_IRQS) >> 5)
-
-/* all normal IRQs can be FIQs */
-#define FIQ_START	0
-/* switch betwean IRQ and FIQ */
-extern int imx_set_irq_fiq(unsigned int irq, unsigned int type);
-
-#define NR_IRQS (IRQ_GPIOD(32) + 1)
-#define IRQ_GPIO(x)
-#endif
diff --git a/arch/arm/mach-imx/include/mach/memory.h b/arch/arm/mach-imx/include/mach/memory.h
deleted file mode 100644
index a93df7c..0000000
--- a/arch/arm/mach-imx/include/mach/memory.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *  arch/arm/mach-imx/include/mach/memory.h
- *
- *  Copyright (C) 1999 ARM Limited
- *  Copyright (C) 2002 Shane Nay (shane@minirl.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef __ASM_ARCH_MMU_H
-#define __ASM_ARCH_MMU_H
-
-#define PHYS_OFFSET	UL(0x08000000)
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/mmc.h b/arch/arm/mach-imx/include/mach/mmc.h
deleted file mode 100644
index 4712f35..0000000
--- a/arch/arm/mach-imx/include/mach/mmc.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef ASMARM_ARCH_MMC_H
-#define ASMARM_ARCH_MMC_H
-
-#include <linux/mmc/host.h>
-
-struct device;
-
-struct imxmmc_platform_data {
-	int (*card_present)(struct device *);
-	int (*get_ro)(struct device *);
-};
-
-extern void imx_set_mmc_info(struct imxmmc_platform_data *info);
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/mx1ads.h b/arch/arm/mach-imx/include/mach/mx1ads.h
deleted file mode 100644
index def05d5..0000000
--- a/arch/arm/mach-imx/include/mach/mx1ads.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * arch/arm/mach-imx/include/mach/mx1ads.h
- *
- * Copyright (C) 2004 Robert Schwebel, Pengutronix
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef __ASM_ARCH_MX1ADS_H
-#define __ASM_ARCH_MX1ADS_H
-
-/* ------------------------------------------------------------------------ */
-/* Memory Map for the M9328MX1ADS (MX1ADS) Board                            */
-/* ------------------------------------------------------------------------ */
-
-#define MX1ADS_FLASH_PHYS		0x10000000
-#define MX1ADS_FLASH_SIZE		(16*1024*1024)
-
-#define IMX_FB_PHYS			(0x0C000000 - 0x40000)
-
-#define CLK32 32000
-
-#endif /* __ASM_ARCH_MX1ADS_H */
diff --git a/arch/arm/mach-imx/include/mach/spi_imx.h b/arch/arm/mach-imx/include/mach/spi_imx.h
deleted file mode 100644
index 4186430..0000000
--- a/arch/arm/mach-imx/include/mach/spi_imx.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * arch/arm/mach-imx/include/mach/spi_imx.h
- *
- * Copyright (C) 2006 SWAPP
- *	Andrea Paterniani <a.paterniani@swapp-eng.it>
- *
- * Initial version inspired by:
- *	linux-2.6.17-rc3-mm1/arch/arm/mach-pxa/include/mach/pxa2xx_spi.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef SPI_IMX_H_
-#define SPI_IMX_H_
-
-
-/*-------------------------------------------------------------------------*/
-/**
- * struct spi_imx_master - device.platform_data for SPI controller devices.
- * @num_chipselect: chipselects are used to distinguish individual
- *	SPI slaves, and are numbered from zero to num_chipselects - 1.
- *	each slave has a chipselect signal, but it's common that not
- *	every chipselect is connected to a slave.
- * @enable_dma: if true enables DMA driven transfers.
-*/
-struct spi_imx_master {
-	u8 num_chipselect;
-	u8 enable_dma:1;
-};
-/*-------------------------------------------------------------------------*/
-
-
-/*-------------------------------------------------------------------------*/
-/**
- * struct spi_imx_chip - spi_board_info.controller_data for SPI
- * slave devices, copied to spi_device.controller_data.
- * @enable_loopback : used for test purpouse to internally connect RX and TX
- *	sections.
- * @enable_dma : enables dma transfer (provided that controller driver has
- *	dma enabled too).
- * @ins_ss_pulse : enable /SS pulse insertion between SPI burst.
- * @bclk_wait : number of bclk waits between each bits_per_word SPI burst.
- * @cs_control : function pointer to board-specific function to assert/deassert
- *	I/O port to control HW generation of devices chip-select.
-*/
-struct spi_imx_chip {
-	u8	enable_loopback:1;
-	u8	enable_dma:1;
-	u8	ins_ss_pulse:1;
-	u16	bclk_wait:15;
-	void (*cs_control)(u32 control);
-};
-
-/* Chip-select state */
-#define SPI_CS_ASSERT			(1 << 0)
-#define SPI_CS_DEASSERT			(1 << 1)
-/*-------------------------------------------------------------------------*/
-
-
-#endif /* SPI_IMX_H_*/
diff --git a/arch/arm/mach-imx/include/mach/system.h b/arch/arm/mach-imx/include/mach/system.h
deleted file mode 100644
index 46d4ca9..0000000
--- a/arch/arm/mach-imx/include/mach/system.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  arch/arm/mach-imxads/include/mach/system.h
- *
- *  Copyright (C) 1999 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef __ASM_ARCH_SYSTEM_H
-#define __ASM_ARCH_SYSTEM_H
-
-static void
-arch_idle(void)
-{
-	/*
-	 * This should do all the clock switching
-	 * and wait for interrupt tricks
-	 */
-	cpu_do_idle();
-}
-
-static inline void
-arch_reset(char mode, const char *cmd)
-{
-	cpu_reset(0);
-}
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/timex.h b/arch/arm/mach-imx/include/mach/timex.h
deleted file mode 100644
index e22ba78..0000000
--- a/arch/arm/mach-imx/include/mach/timex.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *  linux/include/asm-arm/imx/timex.h
- *
- *  Copyright (C) 1999 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __ASM_ARCH_TIMEX_H
-#define __ASM_ARCH_TIMEX_H
-
-#define CLOCK_TICK_RATE		(16000000)
-
-#endif
diff --git a/arch/arm/mach-imx/include/mach/uncompress.h b/arch/arm/mach-imx/include/mach/uncompress.h
deleted file mode 100644
index 70523e6..0000000
--- a/arch/arm/mach-imx/include/mach/uncompress.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  arch/arm/mach-imxads/include/mach/uncompress.h
- *
- *
- *
- *  Copyright (C) 1999 ARM Limited
- *  Copyright (C) Shane Nay (shane@minirl.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#define UART(x) (*(volatile unsigned long *)(serial_port + (x)))
-
-#define UART1_BASE 0x206000
-#define UART2_BASE 0x207000
-#define USR2 0x98
-#define USR2_TXFE (1<<14)
-#define TXR  0x40
-#define UCR1 0x80
-#define UCR1_UARTEN 1
-
-/*
- * The following code assumes the serial port has already been
- * initialized by the bootloader.  We search for the first enabled
- * port in the most probable order.  If you didn't setup a port in
- * your bootloader then nothing will appear (which might be desired).
- *
- * This does not append a newline
- */
-static void putc(int c)
-{
-	unsigned long serial_port;
-
-	do {
-		serial_port = UART1_BASE;
-		if ( UART(UCR1) & UCR1_UARTEN )
-			break;
-		serial_port = UART2_BASE;
-		if ( UART(UCR1) & UCR1_UARTEN )
-			break;
-		return;
-	} while(0);
-
-	while (!(UART(USR2) & USR2_TXFE))
-		barrier();
-
-	UART(TXR) = c;
-}
-
-static inline void flush(void)
-{
-}
-
-/*
- * nothing to do
- */
-#define arch_decomp_setup()
-
-#define arch_decomp_wdog()
diff --git a/arch/arm/mach-imx/include/mach/vmalloc.h b/arch/arm/mach-imx/include/mach/vmalloc.h
deleted file mode 100644
index 7d7cb0b..0000000
--- a/arch/arm/mach-imx/include/mach/vmalloc.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  arch/arm/mach-imx/include/mach/vmalloc.h
- *
- *  Copyright (C) 2000 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#define VMALLOC_END       (PAGE_OFFSET + 0x10000000)
diff --git a/arch/arm/mach-imx/irq.c b/arch/arm/mach-imx/irq.c
deleted file mode 100644
index 531b95d..0000000
--- a/arch/arm/mach-imx/irq.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- *  linux/arch/arm/mach-imx/irq.c
- *
- *  Copyright (C) 1999 ARM Limited
- *  Copyright (C) 2002 Shane Nay (shane@minirl.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *  03/03/2004   Sascha Hauer <sascha@saschahauer.de>
- *               Copied from the motorola bsp package and added gpio demux
- *               interrupt handler
- */
-
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/timer.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <asm/irq.h>
-
-#include <asm/mach/irq.h>
-
-/*
- *
- * We simply use the ENABLE DISABLE registers inside of the IMX
- * to turn on/off specific interrupts.
- *
- */
-
-#define INTCNTL_OFF               0x00
-#define NIMASK_OFF                0x04
-#define INTENNUM_OFF              0x08
-#define INTDISNUM_OFF             0x0C
-#define INTENABLEH_OFF            0x10
-#define INTENABLEL_OFF            0x14
-#define INTTYPEH_OFF              0x18
-#define INTTYPEL_OFF              0x1C
-#define NIPRIORITY_OFF(x)         (0x20+4*(7-(x)))
-#define NIVECSR_OFF               0x40
-#define FIVECSR_OFF               0x44
-#define INTSRCH_OFF               0x48
-#define INTSRCL_OFF               0x4C
-#define INTFRCH_OFF               0x50
-#define INTFRCL_OFF               0x54
-#define NIPNDH_OFF                0x58
-#define NIPNDL_OFF                0x5C
-#define FIPNDH_OFF                0x60
-#define FIPNDL_OFF                0x64
-
-#define VA_AITC_BASE              IO_ADDRESS(IMX_AITC_BASE)
-#define IMX_AITC_INTCNTL         (VA_AITC_BASE + INTCNTL_OFF)
-#define IMX_AITC_NIMASK          (VA_AITC_BASE + NIMASK_OFF)
-#define IMX_AITC_INTENNUM        (VA_AITC_BASE + INTENNUM_OFF)
-#define IMX_AITC_INTDISNUM       (VA_AITC_BASE + INTDISNUM_OFF)
-#define IMX_AITC_INTENABLEH      (VA_AITC_BASE + INTENABLEH_OFF)
-#define IMX_AITC_INTENABLEL      (VA_AITC_BASE + INTENABLEL_OFF)
-#define IMX_AITC_INTTYPEH        (VA_AITC_BASE + INTTYPEH_OFF)
-#define IMX_AITC_INTTYPEL        (VA_AITC_BASE + INTTYPEL_OFF)
-#define IMX_AITC_NIPRIORITY(x)   (VA_AITC_BASE + NIPRIORITY_OFF(x))
-#define IMX_AITC_NIVECSR         (VA_AITC_BASE + NIVECSR_OFF)
-#define IMX_AITC_FIVECSR         (VA_AITC_BASE + FIVECSR_OFF)
-#define IMX_AITC_INTSRCH         (VA_AITC_BASE + INTSRCH_OFF)
-#define IMX_AITC_INTSRCL         (VA_AITC_BASE + INTSRCL_OFF)
-#define IMX_AITC_INTFRCH         (VA_AITC_BASE + INTFRCH_OFF)
-#define IMX_AITC_INTFRCL         (VA_AITC_BASE + INTFRCL_OFF)
-#define IMX_AITC_NIPNDH          (VA_AITC_BASE + NIPNDH_OFF)
-#define IMX_AITC_NIPNDL          (VA_AITC_BASE + NIPNDL_OFF)
-#define IMX_AITC_FIPNDH          (VA_AITC_BASE + FIPNDH_OFF)
-#define IMX_AITC_FIPNDL          (VA_AITC_BASE + FIPNDL_OFF)
-
-#if 0
-#define DEBUG_IRQ(fmt...)	printk(fmt)
-#else
-#define DEBUG_IRQ(fmt...)	do { } while (0)
-#endif
-
-static void
-imx_mask_irq(unsigned int irq)
-{
-	__raw_writel(irq, IMX_AITC_INTDISNUM);
-}
-
-static void
-imx_unmask_irq(unsigned int irq)
-{
-	__raw_writel(irq, IMX_AITC_INTENNUM);
-}
-
-#ifdef CONFIG_FIQ
-int imx_set_irq_fiq(unsigned int irq, unsigned int type)
-{
-	unsigned int irqt;
-
-	if (irq >= IMX_IRQS)
-		return -EINVAL;
-
-	if (irq < IMX_IRQS / 2) {
-		irqt = __raw_readl(IMX_AITC_INTTYPEL) & ~(1 << irq);
-		__raw_writel(irqt | (!!type << irq), IMX_AITC_INTTYPEL);
-	} else {
-		irq -= IMX_IRQS / 2;
-		irqt = __raw_readl(IMX_AITC_INTTYPEH) & ~(1 << irq);
-		__raw_writel(irqt | (!!type << irq), IMX_AITC_INTTYPEH);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(imx_set_irq_fiq);
-#endif /* CONFIG_FIQ */
-
-static int
-imx_gpio_irq_type(unsigned int _irq, unsigned int type)
-{
-	unsigned int irq_type = 0, irq, reg, bit;
-
-	irq = _irq - IRQ_GPIOA(0);
-	reg = irq >> 5;
-	bit = 1 << (irq % 32);
-
-	if (type == IRQ_TYPE_PROBE) {
-		/* Don't mess with enabled GPIOs using preconfigured edges or
-		   GPIOs set to alternate function during probe */
-		/* TODO: support probe */
-//              if ((GPIO_IRQ_rising_edge[idx] | GPIO_IRQ_falling_edge[idx]) &
-//                  GPIO_bit(gpio))
-//                      return 0;
-//              if (GAFR(gpio) & (0x3 << (((gpio) & 0xf)*2)))
-//                      return 0;
-//              type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-	}
-
-	GIUS(reg) |= bit;
-	DDIR(reg) &= ~(bit);
-
-	DEBUG_IRQ("setting type of irq %d to ", _irq);
-
-	if (type & IRQ_TYPE_EDGE_RISING) {
-		DEBUG_IRQ("rising edges\n");
-		irq_type = 0x0;
-	}
-	if (type & IRQ_TYPE_EDGE_FALLING) {
-		DEBUG_IRQ("falling edges\n");
-		irq_type = 0x1;
-	}
-	if (type & IRQ_TYPE_LEVEL_LOW) {
-		DEBUG_IRQ("low level\n");
-		irq_type = 0x3;
-	}
-	if (type & IRQ_TYPE_LEVEL_HIGH) {
-		DEBUG_IRQ("high level\n");
-		irq_type = 0x2;
-	}
-
-	if (irq % 32 < 16) {
-		ICR1(reg) = (ICR1(reg) & ~(0x3 << ((irq % 16) * 2))) |
-		    (irq_type << ((irq % 16) * 2));
-	} else {
-		ICR2(reg) = (ICR2(reg) & ~(0x3 << ((irq % 16) * 2))) |
-		    (irq_type << ((irq % 16) * 2));
-	}
-
-	return 0;
-
-}
-
-static void
-imx_gpio_ack_irq(unsigned int irq)
-{
-	DEBUG_IRQ("%s: irq %d\n", __func__, irq);
-	ISR(IRQ_TO_REG(irq)) = 1 << ((irq - IRQ_GPIOA(0)) % 32);
-}
-
-static void
-imx_gpio_mask_irq(unsigned int irq)
-{
-	DEBUG_IRQ("%s: irq %d\n", __func__, irq);
-	IMR(IRQ_TO_REG(irq)) &= ~( 1 << ((irq - IRQ_GPIOA(0)) % 32));
-}
-
-static void
-imx_gpio_unmask_irq(unsigned int irq)
-{
-	DEBUG_IRQ("%s: irq %d\n", __func__, irq);
-	IMR(IRQ_TO_REG(irq)) |= 1 << ((irq - IRQ_GPIOA(0)) % 32);
-}
-
-static void
-imx_gpio_handler(unsigned int mask, unsigned int irq,
-                 struct irq_desc *desc)
-{
-	while (mask) {
-		if (mask & 1) {
-			DEBUG_IRQ("handling irq %d\n", irq);
-			generic_handle_irq(irq);
-		}
-		irq++;
-		mask >>= 1;
-	}
-}
-
-static void
-imx_gpioa_demux_handler(unsigned int irq_unused, struct irq_desc *desc)
-{
-	unsigned int mask, irq;
-
-	mask = ISR(0);
-	irq = IRQ_GPIOA(0);
-	imx_gpio_handler(mask, irq, desc);
-}
-
-static void
-imx_gpiob_demux_handler(unsigned int irq_unused, struct irq_desc *desc)
-{
-	unsigned int mask, irq;
-
-	mask = ISR(1);
-	irq = IRQ_GPIOB(0);
-	imx_gpio_handler(mask, irq, desc);
-}
-
-static void
-imx_gpioc_demux_handler(unsigned int irq_unused, struct irq_desc *desc)
-{
-	unsigned int mask, irq;
-
-	mask = ISR(2);
-	irq = IRQ_GPIOC(0);
-	imx_gpio_handler(mask, irq, desc);
-}
-
-static void
-imx_gpiod_demux_handler(unsigned int irq_unused, struct irq_desc *desc)
-{
-	unsigned int mask, irq;
-
-	mask = ISR(3);
-	irq = IRQ_GPIOD(0);
-	imx_gpio_handler(mask, irq, desc);
-}
-
-static struct irq_chip imx_internal_chip = {
-	.name = "MPU",
-	.ack = imx_mask_irq,
-	.mask = imx_mask_irq,
-	.unmask = imx_unmask_irq,
-};
-
-static struct irq_chip imx_gpio_chip = {
-	.name = "GPIO",
-	.ack = imx_gpio_ack_irq,
-	.mask = imx_gpio_mask_irq,
-	.unmask = imx_gpio_unmask_irq,
-	.set_type = imx_gpio_irq_type,
-};
-
-void __init
-imx_init_irq(void)
-{
-	unsigned int irq;
-
-	DEBUG_IRQ("Initializing imx interrupts\n");
-
-	/* Disable all interrupts initially. */
-	/* Do not rely on the bootloader. */
-	__raw_writel(0, IMX_AITC_INTENABLEH);
-	__raw_writel(0, IMX_AITC_INTENABLEL);
-
-	/* Mask all GPIO interrupts as well */
-	IMR(0) = 0;
-	IMR(1) = 0;
-	IMR(2) = 0;
-	IMR(3) = 0;
-
-	for (irq = 0; irq < IMX_IRQS; irq++) {
-		set_irq_chip(irq, &imx_internal_chip);
-		set_irq_handler(irq, handle_level_irq);
-		set_irq_flags(irq, IRQF_VALID);
-	}
-
-	for (irq = IRQ_GPIOA(0); irq < IRQ_GPIOD(32); irq++) {
-		set_irq_chip(irq, &imx_gpio_chip);
-		set_irq_handler(irq, handle_edge_irq);
-		set_irq_flags(irq, IRQF_VALID);
-	}
-
-	set_irq_chained_handler(GPIO_INT_PORTA, imx_gpioa_demux_handler);
-	set_irq_chained_handler(GPIO_INT_PORTB, imx_gpiob_demux_handler);
-	set_irq_chained_handler(GPIO_INT_PORTC, imx_gpioc_demux_handler);
-	set_irq_chained_handler(GPIO_INT_PORTD, imx_gpiod_demux_handler);
-
-	/* Release masking of interrupts according to priority */
-	__raw_writel(-1, IMX_AITC_NIMASK);
-
-#ifdef CONFIG_FIQ
-	/* Initialize FIQ */
-	init_FIQ();
-#endif
-}
diff --git a/arch/arm/mach-imx/leds-mx1ads.c b/arch/arm/mach-imx/leds-mx1ads.c
deleted file mode 100644
index 1d48f27..0000000
--- a/arch/arm/mach-imx/leds-mx1ads.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * linux/arch/arm/mach-imx/leds-mx1ads.c
- *
- * Copyright (c) 2004 Sascha Hauer <sascha@saschahauer.de>
- *
- * Original (leds-footbridge.c) by Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <mach/hardware.h>
-#include <asm/system.h>
-#include <asm/leds.h>
-#include "leds.h"
-
-/*
- * The MX1ADS Board has only one usable LED,
- * so select only the timer led or the
- * cpu usage led
- */
-void
-mx1ads_leds_event(led_event_t ledevt)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	switch (ledevt) {
-#ifdef CONFIG_LEDS_CPU
-	case led_idle_start:
-		DR(0) &= ~(1<<2);
-		break;
-
-	case led_idle_end:
-		DR(0) |= 1<<2;
-		break;
-#endif
-
-#ifdef CONFIG_LEDS_TIMER
-	case led_timer:
-		DR(0) ^= 1<<2;
-#endif
-	default:
-		break;
-	}
-	local_irq_restore(flags);
-}
diff --git a/arch/arm/mach-imx/leds.c b/arch/arm/mach-imx/leds.c
deleted file mode 100644
index cf30803..0000000
--- a/arch/arm/mach-imx/leds.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * linux/arch/arm/mach-imx/leds.c
- *
- * Copyright (C) 2004 Sascha Hauer <sascha@saschahauer.de>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/leds.h>
-#include <asm/mach-types.h>
-
-#include "leds.h"
-
-static int __init
-leds_init(void)
-{
-	if (machine_is_mx1ads()) {
-		leds_event = mx1ads_leds_event;
-	}
-
-	return 0;
-}
-
-__initcall(leds_init);
diff --git a/arch/arm/mach-imx/leds.h b/arch/arm/mach-imx/leds.h
deleted file mode 100644
index 49dc1c1..0000000
--- a/arch/arm/mach-imx/leds.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * arch/arm/mach-imx/leds.h
- *
- * Copyright (c) 2004 Sascha Hauer <sascha@saschahauer.de>
- *
- * blinky lights for IMX-based systems
- *
- */
-extern void mx1ads_leds_event(led_event_t evt);
diff --git a/arch/arm/mach-imx/mx1ads.c b/arch/arm/mach-imx/mx1ads.c
deleted file mode 100644
index 87fa1ff..0000000
--- a/arch/arm/mach-imx/mx1ads.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * arch/arm/mach-imx/mx1ads.c
- *
- * Initially based on:
- *	linux-2.6.7-imx/arch/arm/mach-imx/scb9328.c
- *	Copyright (c) 2004 Sascha Hauer <sascha@saschahauer.de>
- *
- * 2004 (c) MontaVista Software, Inc.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <asm/system.h>
-#include <mach/hardware.h>
-#include <asm/irq.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-
-#include <asm/mach/map.h>
-#include <asm/mach-types.h>
-
-#include <asm/mach/arch.h>
-#include <mach/mmc.h>
-#include <mach/imx-uart.h>
-#include <linux/interrupt.h>
-#include "generic.h"
-
-static struct resource cs89x0_resources[] = {
-	[0] = {
-		.start	= IMX_CS4_PHYS + 0x300,
-		.end	= IMX_CS4_PHYS + 0x300 + 16,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= IRQ_GPIOC(17),
-		.end	= IRQ_GPIOC(17),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device cs89x0_device = {
-	.name		= "cirrus-cs89x0",
-	.num_resources	= ARRAY_SIZE(cs89x0_resources),
-	.resource	= cs89x0_resources,
-};
-
-static struct imxuart_platform_data uart_pdata = {
-	.flags = IMXUART_HAVE_RTSCTS,
-};
-
-static struct resource imx_uart1_resources[] = {
-	[0] = {
-		.start	= 0x00206000,
-		.end	= 0x002060FF,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= (UART1_MINT_RX),
-		.end	= (UART1_MINT_RX),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= (UART1_MINT_TX),
-		.end	= (UART1_MINT_TX),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= UART1_MINT_RTS,
-		.end	= UART1_MINT_RTS,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device imx_uart1_device = {
-	.name		= "imx-uart",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(imx_uart1_resources),
-	.resource	= imx_uart1_resources,
-	.dev = {
-		.platform_data = &uart_pdata,
-	}
-};
-
-static struct resource imx_uart2_resources[] = {
-	[0] = {
-		.start	= 0x00207000,
-		.end	= 0x002070FF,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= (UART2_MINT_RX),
-		.end	= (UART2_MINT_RX),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= (UART2_MINT_TX),
-		.end	= (UART2_MINT_TX),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= UART2_MINT_RTS,
-		.end	= UART2_MINT_RTS,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device imx_uart2_device = {
-	.name		= "imx-uart",
-	.id		= 1,
-	.num_resources	= ARRAY_SIZE(imx_uart2_resources),
-	.resource	= imx_uart2_resources,
-	.dev = {
-		.platform_data = &uart_pdata,
-	}
-};
-
-static struct platform_device *devices[] __initdata = {
-	&cs89x0_device,
-	&imx_uart1_device,
-	&imx_uart2_device,
-};
-
-#if defined(CONFIG_MMC_IMX) || defined(CONFIG_MMC_IMX_MODULE)
-static int mx1ads_mmc_card_present(struct device *dev)
-{
-	/* MMC/SD Card Detect is PB 20 on MX1ADS V1.0.7 */
-	return (SSR(1) & (1 << 20) ? 0 : 1);
-}
-
-static struct imxmmc_platform_data mx1ads_mmc_info = {
-       .card_present = mx1ads_mmc_card_present,
-};
-#endif
-
-static void __init
-mx1ads_init(void)
-{
-#ifdef CONFIG_LEDS
-	imx_gpio_mode(GPIO_PORTA | GPIO_OUT | 2);
-#endif
-#if defined(CONFIG_MMC_IMX) || defined(CONFIG_MMC_IMX_MODULE)
-	/* SD/MMC card detect */
-	imx_gpio_mode(GPIO_PORTB | GPIO_GIUS | GPIO_IN | 20);
-	imx_set_mmc_info(&mx1ads_mmc_info);
-#endif
-
-	imx_gpio_mode(PC9_PF_UART1_CTS);
-	imx_gpio_mode(PC10_PF_UART1_RTS);
-	imx_gpio_mode(PC11_PF_UART1_TXD);
-	imx_gpio_mode(PC12_PF_UART1_RXD);
-
-	imx_gpio_mode(PB28_PF_UART2_CTS);
-	imx_gpio_mode(PB29_PF_UART2_RTS);
-	imx_gpio_mode(PB30_PF_UART2_TXD);
-	imx_gpio_mode(PB31_PF_UART2_RXD);
-
-	platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-static void __init
-mx1ads_map_io(void)
-{
-	imx_map_io();
-}
-
-MACHINE_START(MX1ADS, "Motorola MX1ADS")
-	/* Maintainer: Sascha Hauer, Pengutronix */
-	.phys_io	= 0x00200000,
-	.io_pg_offst	= ((0xe0000000) >> 18) & 0xfffc,
-	.boot_params	= 0x08000100,
-	.map_io		= mx1ads_map_io,
-	.init_irq	= imx_init_irq,
-	.timer		= &imx_timer,
-	.init_machine	= mx1ads_init,
-MACHINE_END
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
deleted file mode 100644
index 5aef18b..0000000
--- a/arch/arm/mach-imx/time.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- *  linux/arch/arm/mach-imx/time.c
- *
- *  Copyright (C) 2000-2001 Deep Blue Solutions
- *  Copyright (C) 2002 Shane Nay (shane@minirl.com)
- *  Copyright (C) 2006-2007 Pavel Pisa (ppisa@pikron.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/time.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <asm/leds.h>
-#include <asm/irq.h>
-#include <asm/mach/time.h>
-
-/* Use timer 1 as system timer */
-#define TIMER_BASE IMX_TIM1_BASE
-
-static struct clock_event_device clockevent_imx;
-static enum clock_event_mode clockevent_mode = CLOCK_EVT_MODE_UNUSED;
-
-/*
- * IRQ handler for the timer
- */
-static irqreturn_t
-imx_timer_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *evt = &clockevent_imx;
-	uint32_t tstat;
-	irqreturn_t ret = IRQ_NONE;
-
-	/* clear the interrupt */
-	tstat = IMX_TSTAT(TIMER_BASE);
-	IMX_TSTAT(TIMER_BASE) = 0;
-
-	if (tstat & TSTAT_COMP) {
-		evt->event_handler(evt);
-		ret = IRQ_HANDLED;
-	}
-
-	return ret;
-}
-
-static struct irqaction imx_timer_irq = {
-	.name		= "i.MX Timer Tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-	.handler	= imx_timer_interrupt,
-};
-
-/*
- * Set up timer hardware into expected mode and state.
- */
-static void __init imx_timer_hardware_init(void)
-{
-	/*
-	 * Initialise to a known state (all timers off, and timing reset)
-	 */
-	IMX_TCTL(TIMER_BASE) = 0;
-	IMX_TPRER(TIMER_BASE) = 0;
-
-	IMX_TCTL(TIMER_BASE) = TCTL_FRR | TCTL_CLK_PCLK1 | TCTL_TEN;
-}
-
-cycle_t imx_get_cycles(struct clocksource *cs)
-{
-	return IMX_TCN(TIMER_BASE);
-}
-
-static struct clocksource clocksource_imx = {
-	.name 		= "imx_timer1",
-	.rating		= 200,
-	.read		= imx_get_cycles,
-	.mask		= 0xFFFFFFFF,
-	.shift 		= 20,
-	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int __init imx_clocksource_init(unsigned long rate)
-{
-	clocksource_imx.mult =
-		clocksource_hz2mult(rate, clocksource_imx.shift);
-	clocksource_register(&clocksource_imx);
-
-	return 0;
-}
-
-static int imx_set_next_event(unsigned long evt,
-				  struct clock_event_device *unused)
-{
-	unsigned long tcmp;
-
-	tcmp = IMX_TCN(TIMER_BASE) + evt;
-	IMX_TCMP(TIMER_BASE) = tcmp;
-
-	return (int32_t)(tcmp - IMX_TCN(TIMER_BASE)) < 0 ? -ETIME : 0;
-}
-
-#ifdef DEBUG
-static const char *clock_event_mode_label[]={
-	[CLOCK_EVT_MODE_PERIODIC] = "CLOCK_EVT_MODE_PERIODIC",
-	[CLOCK_EVT_MODE_ONESHOT]  = "CLOCK_EVT_MODE_ONESHOT",
-	[CLOCK_EVT_MODE_SHUTDOWN] = "CLOCK_EVT_MODE_SHUTDOWN",
-	[CLOCK_EVT_MODE_UNUSED]   = "CLOCK_EVT_MODE_UNUSED"
-};
-#endif /*DEBUG*/
-
-static void imx_set_mode(enum clock_event_mode mode, struct clock_event_device *evt)
-{
-	unsigned long flags;
-
-	/*
-	 * The timer interrupt generation is disabled at least
-	 * for enough time to call imx_set_next_event()
-	 */
-	local_irq_save(flags);
-	/* Disable interrupt in GPT module */
-	IMX_TCTL(TIMER_BASE) &= ~TCTL_IRQEN;
-	if (mode != clockevent_mode) {
-		/* Set event time into far-far future */
-		IMX_TCMP(TIMER_BASE) = IMX_TCN(TIMER_BASE) - 3;
-		/* Clear pending interrupt */
-		IMX_TSTAT(TIMER_BASE) &= ~TSTAT_COMP;
-	}
-
-#ifdef DEBUG
-	printk(KERN_INFO "imx_set_mode: changing mode from %s to %s\n",
-		clock_event_mode_label[clockevent_mode], clock_event_mode_label[mode]);
-#endif /*DEBUG*/
-
-	/* Remember timer mode */
-	clockevent_mode = mode;
-	local_irq_restore(flags);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		printk(KERN_ERR "imx_set_mode: Periodic mode is not supported for i.MX\n");
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-		/*
-		 * Do not put overhead of interrupt enable/disable into
-		 * imx_set_next_event(), the core has about 4 minutes
-		 * to call imx_set_next_event() or shutdown clock after
-		 * mode switching
-		 */
-		local_irq_save(flags);
-		IMX_TCTL(TIMER_BASE) |= TCTL_IRQEN;
-		local_irq_restore(flags);
-		break;
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_RESUME:
-		/* Left event sources disabled, no more interrupts appears */
-		break;
-	}
-}
-
-static struct clock_event_device clockevent_imx = {
-	.name		= "imx_timer1",
-	.features	= CLOCK_EVT_FEAT_ONESHOT,
-	.shift		= 32,
-	.set_mode	= imx_set_mode,
-	.set_next_event	= imx_set_next_event,
-	.rating		= 200,
-};
-
-static int __init imx_clockevent_init(unsigned long rate)
-{
-	clockevent_imx.mult = div_sc(rate, NSEC_PER_SEC,
-					clockevent_imx.shift);
-	clockevent_imx.max_delta_ns =
-		clockevent_delta2ns(0xfffffffe, &clockevent_imx);
-	clockevent_imx.min_delta_ns =
-		clockevent_delta2ns(0xf, &clockevent_imx);
-
-	clockevent_imx.cpumask = cpumask_of(0);
-
-	clockevents_register_device(&clockevent_imx);
-
-	return 0;
-}
-
-extern int imx_clocks_init(void);
-
-static void __init imx_timer_init(void)
-{
-	struct clk *clk;
-	unsigned long rate;
-
-	imx_clocks_init();
-
-	clk = clk_get(NULL, "perclk1");
-	clk_enable(clk);
-	rate = clk_get_rate(clk);
-
-	imx_timer_hardware_init();
-	imx_clocksource_init(rate);
-
-	imx_clockevent_init(rate);
-
-	/*
-	 * Make irqs happen for the system timer
-	 */
-	setup_irq(TIM1_INT, &imx_timer_irq);
-}
-
-struct sys_timer imx_timer = {
-	.init		= imx_timer_init,
-};
-- 
cgit v0.10.2


From fde364742a1afec9e71e705e863d029295282fe5 Mon Sep 17 00:00:00 2001
From: Holger Schurig <hs4233@mail.mn-solutions.de>
Date: Mon, 30 Mar 2009 14:35:39 +0100
Subject: imx: exit functions can/should be void

Signed-off-by: Holger Schurig <hs4233@mail.mn-solutions.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index f4ad932..ab389c2 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -71,11 +71,10 @@ static int uart_mxc_port0_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart0_pins), "UART0");
 }
 
-static int uart_mxc_port0_exit(struct platform_device *pdev)
+static void uart_mxc_port0_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart0_pins,
 			ARRAY_SIZE(mxc_uart0_pins));
-	return 0;
 }
 
 static int mxc_uart1_pins[] = {
@@ -91,11 +90,10 @@ static int uart_mxc_port1_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart1_pins), "UART1");
 }
 
-static int uart_mxc_port1_exit(struct platform_device *pdev)
+static void uart_mxc_port1_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart1_pins,
 			ARRAY_SIZE(mxc_uart1_pins));
-	return 0;
 }
 
 static int mxc_uart2_pins[] = {
@@ -111,11 +109,10 @@ static int uart_mxc_port2_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart2_pins), "UART2");
 }
 
-static int uart_mxc_port2_exit(struct platform_device *pdev)
+static void uart_mxc_port2_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart2_pins,
 			ARRAY_SIZE(mxc_uart2_pins));
-	return 0;
 }
 
 static int mxc_uart3_pins[] = {
@@ -131,11 +128,10 @@ static int uart_mxc_port3_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart3_pins), "UART3");
 }
 
-static int uart_mxc_port3_exit(struct platform_device *pdev)
+static void uart_mxc_port3_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart3_pins,
 			ARRAY_SIZE(mxc_uart3_pins));
-	return 0;
 }
 
 static int mxc_uart4_pins[] = {
@@ -151,11 +147,10 @@ static int uart_mxc_port4_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart4_pins), "UART4");
 }
 
-static int uart_mxc_port4_exit(struct platform_device *pdev)
+static void uart_mxc_port4_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart4_pins,
 			ARRAY_SIZE(mxc_uart4_pins));
-	return 0;
 }
 
 static int mxc_uart5_pins[] = {
@@ -171,11 +166,10 @@ static int uart_mxc_port5_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart5_pins), "UART5");
 }
 
-static int uart_mxc_port5_exit(struct platform_device *pdev)
+static void uart_mxc_port5_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart5_pins,
 			ARRAY_SIZE(mxc_uart5_pins));
-	return 0;
 }
 
 static struct platform_device *platform_devices[] __initdata = {
diff --git a/arch/arm/mach-mx2/pcm038.c b/arch/arm/mach-mx2/pcm038.c
index 732937c..7f3a62e 100644
--- a/arch/arm/mach-mx2/pcm038.c
+++ b/arch/arm/mach-mx2/pcm038.c
@@ -101,11 +101,10 @@ static int uart_mxc_port0_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart0_pins), "UART0");
 }
 
-static int uart_mxc_port0_exit(struct platform_device *pdev)
+static void uart_mxc_port0_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart0_pins,
 			ARRAY_SIZE(mxc_uart0_pins));
-	return 0;
 }
 
 static int mxc_uart1_pins[] = {
@@ -121,11 +120,10 @@ static int uart_mxc_port1_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart1_pins), "UART1");
 }
 
-static int uart_mxc_port1_exit(struct platform_device *pdev)
+static void uart_mxc_port1_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart1_pins,
 			ARRAY_SIZE(mxc_uart1_pins));
-	return 0;
 }
 
 static int mxc_uart2_pins[] = { PE8_PF_UART3_TXD,
@@ -139,11 +137,10 @@ static int uart_mxc_port2_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_uart2_pins), "UART2");
 }
 
-static int uart_mxc_port2_exit(struct platform_device *pdev)
+static void uart_mxc_port2_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_uart2_pins,
 			ARRAY_SIZE(mxc_uart2_pins));
-	return 0;
 }
 
 static struct imxuart_platform_data uart_pdata[] = {
diff --git a/arch/arm/mach-mx2/pcm970-baseboard.c b/arch/arm/mach-mx2/pcm970-baseboard.c
index bf4e520..16f3344 100644
--- a/arch/arm/mach-mx2/pcm970-baseboard.c
+++ b/arch/arm/mach-mx2/pcm970-baseboard.c
@@ -105,11 +105,9 @@ static int pcm038_fb_init(struct platform_device *pdev)
 			ARRAY_SIZE(mxc_fb_pins), "FB");
 }
 
-static int pcm038_fb_exit(struct platform_device *pdev)
+static void pcm038_fb_exit(struct platform_device *pdev)
 {
 	mxc_gpio_release_multiple_pins(mxc_fb_pins, ARRAY_SIZE(mxc_fb_pins));
-
-	return 0;
 }
 
 /*
diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h
index 599217b..90af4d9 100644
--- a/arch/arm/plat-mxc/include/mach/imx-uart.h
+++ b/arch/arm/plat-mxc/include/mach/imx-uart.h
@@ -23,7 +23,7 @@
 
 struct imxuart_platform_data {
 	int (*init)(struct platform_device *pdev);
-	int (*exit)(struct platform_device *pdev);
+	void (*exit)(struct platform_device *pdev);
 	unsigned int flags;
 };
 
diff --git a/arch/arm/plat-mxc/include/mach/imxfb.h b/arch/arm/plat-mxc/include/mach/imxfb.h
index 762a7b0..9f01011 100644
--- a/arch/arm/plat-mxc/include/mach/imxfb.h
+++ b/arch/arm/plat-mxc/include/mach/imxfb.h
@@ -76,8 +76,8 @@ struct imx_fb_platform_data {
 	u_char * fixed_screen_cpu;
 	dma_addr_t fixed_screen_dma;
 
-	int (*init)(struct platform_device*);
-	int (*exit)(struct platform_device*);
+	int (*init)(struct platform_device *);
+	void (*exit)(struct platform_device *);
 
 	void (*lcd_power)(int);
 	void (*backlight_power)(int);
diff --git a/arch/arm/plat-mxc/include/mach/usb.h b/arch/arm/plat-mxc/include/mach/usb.h
index 2dacb308..be27337 100644
--- a/arch/arm/plat-mxc/include/mach/usb.h
+++ b/arch/arm/plat-mxc/include/mach/usb.h
@@ -17,7 +17,7 @@
 
 struct imxusb_platform_data {
 	int (*init)(struct device *);
-	int (*exit)(struct device *);
+	void (*exit)(struct device *);
 };
 
 #endif /* __ASM_ARCH_MXC_USB */
-- 
cgit v0.10.2


From 910862ec092c156023b8a6e726e8c793da0a03f7 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lg@denx.de>
Date: Thu, 12 Mar 2009 12:46:41 +0100
Subject: mxc: emulate GPIO interrupt on both-edges

MXC GPIO controller does not support generation of interrupts on both
edges. Emulate this mode in software by reconfiguring the irq trigger
polarity on each interrupt. This follows an example of
drivers/mfd/asic3.c.

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/gpio.c b/arch/arm/plat-mxc/gpio.c
index 89e9579..7506d96 100644
--- a/arch/arm/plat-mxc/gpio.c
+++ b/arch/arm/plat-mxc/gpio.c
@@ -64,6 +64,8 @@ static void gpio_unmask_irq(u32 irq)
 	_set_gpio_irqenable(&mxc_gpio_ports[gpio / 32], gpio & 0x1f, 1);
 }
 
+static int mxc_gpio_get(struct gpio_chip *chip, unsigned offset);
+
 static int gpio_set_irq_type(u32 irq, u32 type)
 {
 	u32 gpio = irq_to_gpio(irq);
@@ -72,6 +74,7 @@ static int gpio_set_irq_type(u32 irq, u32 type)
 	int edge;
 	void __iomem *reg = port->base;
 
+	port->both_edges &= ~(1 << (gpio & 31));
 	switch (type) {
 	case IRQ_TYPE_EDGE_RISING:
 		edge = GPIO_INT_RISE_EDGE;
@@ -79,13 +82,24 @@ static int gpio_set_irq_type(u32 irq, u32 type)
 	case IRQ_TYPE_EDGE_FALLING:
 		edge = GPIO_INT_FALL_EDGE;
 		break;
+	case IRQ_TYPE_EDGE_BOTH:
+		val = mxc_gpio_get(&port->chip, gpio & 31);
+		if (val) {
+			edge = GPIO_INT_LOW_LEV;
+			pr_debug("mxc: set GPIO %d to low trigger\n", gpio);
+		} else {
+			edge = GPIO_INT_HIGH_LEV;
+			pr_debug("mxc: set GPIO %d to high trigger\n", gpio);
+		}
+		port->both_edges |= 1 << (gpio & 31);
+		break;
 	case IRQ_TYPE_LEVEL_LOW:
 		edge = GPIO_INT_LOW_LEV;
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
 		edge = GPIO_INT_HIGH_LEV;
 		break;
-	default:	/* this includes IRQ_TYPE_EDGE_BOTH */
+	default:
 		return -EINVAL;
 	}
 
@@ -98,6 +112,34 @@ static int gpio_set_irq_type(u32 irq, u32 type)
 	return 0;
 }
 
+static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio)
+{
+	void __iomem *reg = port->base;
+	u32 bit, val;
+	int edge;
+
+	reg += GPIO_ICR1 + ((gpio & 0x10) >> 2); /* lower or upper register */
+	bit = gpio & 0xf;
+	val = __raw_readl(reg);
+	edge = (val >> (bit << 1)) & 3;
+	val &= ~(0x3 << (bit << 1));
+	switch (edge) {
+	case GPIO_INT_HIGH_LEV:
+		edge = GPIO_INT_LOW_LEV;
+		pr_debug("mxc: switch GPIO %d to low trigger\n", gpio);
+		break;
+	case GPIO_INT_LOW_LEV:
+		edge = GPIO_INT_HIGH_LEV;
+		pr_debug("mxc: switch GPIO %d to high trigger\n", gpio);
+		break;
+	default:
+		pr_err("mxc: invalid configuration for GPIO %d: %x\n",
+		       gpio, edge);
+		return;
+	}
+	__raw_writel(val | (edge << (bit << 1)), reg);
+}
+
 /* handle n interrupts in one status register */
 static void mxc_gpio_irq_handler(struct mxc_gpio_port *port, u32 irq_stat)
 {
@@ -105,11 +147,16 @@ static void mxc_gpio_irq_handler(struct mxc_gpio_port *port, u32 irq_stat)
 
 	gpio_irq_no = port->virtual_irq_start;
 	for (; irq_stat != 0; irq_stat >>= 1, gpio_irq_no++) {
+		u32 gpio = irq_to_gpio(gpio_irq_no);
 
 		if ((irq_stat & 1) == 0)
 			continue;
 
 		BUG_ON(!(irq_desc[gpio_irq_no].handle_irq));
+
+		if (port->both_edges & (1 << (gpio & 31)))
+			mxc_flip_edge(port, gpio);
+
 		irq_desc[gpio_irq_no].handle_irq(gpio_irq_no,
 				&irq_desc[gpio_irq_no]);
 	}
diff --git a/arch/arm/plat-mxc/include/mach/gpio.h b/arch/arm/plat-mxc/include/mach/gpio.h
index ea509f1..894d2f8 100644
--- a/arch/arm/plat-mxc/include/mach/gpio.h
+++ b/arch/arm/plat-mxc/include/mach/gpio.h
@@ -35,6 +35,7 @@ struct mxc_gpio_port {
 	int irq;
 	int virtual_irq_start;
 	struct gpio_chip chip;
+	u32 both_edges;
 };
 
 int mxc_gpio_init(struct mxc_gpio_port*, int);
-- 
cgit v0.10.2


From a8df0ee8d89dc1bdbec7415be620b159b5b0177b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lg@denx.de>
Date: Wed, 15 Apr 2009 14:32:52 +0200
Subject: ARM: framebuffer support for pcm037

Add support for a QVGA Sharp LCD on pcm037.

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index 6809891..5d7e0ca 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -40,7 +40,9 @@
 #include <mach/common.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
+#include <mach/ipu.h>
 #include <mach/board-pcm037.h>
+#include <mach/mx3fb.h>
 #include <mach/mxc_nand.h>
 #include <mach/mmc.h>
 #ifdef CONFIG_I2C_IMX
@@ -266,6 +268,54 @@ static struct platform_device *devices[] __initdata = {
 	&pcm037_sram_device,
 };
 
+static struct ipu_platform_data mx3_ipu_data = {
+	.irq_base = MXC_IPU_IRQ_START,
+};
+
+static const struct fb_videomode fb_modedb[] = {
+	{
+		/* 240x320 @ 60 Hz Sharp */
+		.name		= "Sharp-LQ035Q7DH06-QVGA",
+		.refresh	= 60,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 185925,
+		.left_margin	= 9,
+		.right_margin	= 16,
+		.upper_margin	= 7,
+		.lower_margin	= 9,
+		.hsync_len	= 1,
+		.vsync_len	= 1,
+		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_SHARP_MODE |
+				  FB_SYNC_CLK_INVERT | FB_SYNC_CLK_IDLE_EN,
+		.vmode		= FB_VMODE_NONINTERLACED,
+		.flag		= 0,
+	}, {
+		/* 240x320 @ 60 Hz */
+		.name		= "TX090",
+		.refresh	= 60,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 38255,
+		.left_margin	= 144,
+		.right_margin	= 0,
+		.upper_margin	= 7,
+		.lower_margin	= 40,
+		.hsync_len	= 96,
+		.vsync_len	= 1,
+		.sync		= FB_SYNC_VERT_HIGH_ACT | FB_SYNC_OE_ACT_HIGH,
+		.vmode		= FB_VMODE_NONINTERLACED,
+		.flag		= 0,
+	},
+};
+
+static struct mx3fb_platform_data mx3fb_pdata = {
+	.dma_dev	= &mx3_ipu.dev,
+	.name		= "Sharp-LQ035Q7DH06-QVGA",
+	.mode		= fb_modedb,
+	.num_modes	= ARRAY_SIZE(fb_modedb),
+};
+
 /*
  * Board specific initialization.
  */
@@ -293,6 +343,8 @@ static void __init mxc_board_init(void)
 #endif
 	mxc_register_device(&mxc_nand_device, &pcm037_nand_board_info);
 	mxc_register_device(&mxcsdhc_device0, &sdhc_pdata);
+	mxc_register_device(&mx3_ipu, &mx3_ipu_data);
+	mxc_register_device(&mx3_fb, &mx3fb_pdata);
 }
 
 static void __init pcm037_timer_init(void)
-- 
cgit v0.10.2


From 6b91edde09fadde2657b11b454b88ae89c5b4cae Mon Sep 17 00:00:00 2001
From: Ivo Clarysse <ivo.clarysse@gmail.com>
Date: Wed, 15 Apr 2009 15:39:27 +0200
Subject: MXC: mx21ads base support

[PATCH v6] MXC: mx21ads base support

Base machine support for the  Freescale i.MX21ADS
(M9328MX21ADSE) Application Development System.

Signed-off-by: Ivo Clarysse <ivo.clarysse@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/Kconfig b/arch/arm/mach-mx2/Kconfig
index 42a7888..7ce3712 100644
--- a/arch/arm/mach-mx2/Kconfig
+++ b/arch/arm/mach-mx2/Kconfig
@@ -18,6 +18,13 @@ endchoice
 
 comment "MX2 platforms:"
 
+config MACH_MX21ADS
+	bool "MX21ADS platform"
+	depends on MACH_MX21
+	help
+	  Include support for MX21ADS platform. This includes specific
+	  configurations for the board and its peripherals.
+
 config MACH_MX27ADS
 	bool "MX27ADS platform"
 	depends on MACH_MX27
diff --git a/arch/arm/mach-mx2/Makefile b/arch/arm/mach-mx2/Makefile
index 950649a..e583754 100644
--- a/arch/arm/mach-mx2/Makefile
+++ b/arch/arm/mach-mx2/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_MACH_MX21) += clock_imx21.o
 obj-$(CONFIG_MACH_MX27) += cpu_imx27.o
 obj-$(CONFIG_MACH_MX27) += clock_imx27.o
 
+obj-$(CONFIG_MACH_MX21ADS) += mx21ads.o
 obj-$(CONFIG_MACH_MX27ADS) += mx27ads.o
 obj-$(CONFIG_MACH_PCM038) += pcm038.o
 obj-$(CONFIG_MACH_PCM970_BASEBOARD) += pcm970-baseboard.o
diff --git a/arch/arm/mach-mx2/mx21ads.c b/arch/arm/mach-mx2/mx21ads.c
new file mode 100644
index 0000000..a5ee461
--- /dev/null
+++ b/arch/arm/mach-mx2/mx21ads.c
@@ -0,0 +1,286 @@
+/*
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *  Copyright (C) 2002 Shane Nay (shane@minirl.com)
+ *  Copyright 2006-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/physmap.h>
+#include <linux/gpio.h>
+#include <mach/common.h>
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/mach/map.h>
+#include <mach/imx-uart.h>
+#include <mach/imxfb.h>
+#include <mach/iomux.h>
+#include <mach/mxc_nand.h>
+#include <mach/mmc.h>
+#include <mach/board-mx21ads.h>
+
+#include "devices.h"
+
+static unsigned int mx21ads_pins[] = {
+
+	/* CS8900A */
+	(GPIO_PORTE | GPIO_GPIO | GPIO_IN | 11),
+
+	/* UART1 */
+	PE12_PF_UART1_TXD,
+	PE13_PF_UART1_RXD,
+	PE14_PF_UART1_CTS,
+	PE15_PF_UART1_RTS,
+
+	/* UART3 (IrDA) - only TXD and RXD */
+	PE8_PF_UART3_TXD,
+	PE9_PF_UART3_RXD,
+
+	/* UART4 */
+	PB26_AF_UART4_RTS,
+	PB28_AF_UART4_TXD,
+	PB29_AF_UART4_CTS,
+	PB31_AF_UART4_RXD,
+
+	/* LCDC */
+	PA5_PF_LSCLK,
+	PA6_PF_LD0,
+	PA7_PF_LD1,
+	PA8_PF_LD2,
+	PA9_PF_LD3,
+	PA10_PF_LD4,
+	PA11_PF_LD5,
+	PA12_PF_LD6,
+	PA13_PF_LD7,
+	PA14_PF_LD8,
+	PA15_PF_LD9,
+	PA16_PF_LD10,
+	PA17_PF_LD11,
+	PA18_PF_LD12,
+	PA19_PF_LD13,
+	PA20_PF_LD14,
+	PA21_PF_LD15,
+	PA22_PF_LD16,
+	PA24_PF_REV,     /* Sharp panel dedicated signal */
+	PA25_PF_CLS,     /* Sharp panel dedicated signal */
+	PA26_PF_PS,      /* Sharp panel dedicated signal */
+	PA27_PF_SPL_SPR, /* Sharp panel dedicated signal */
+	PA28_PF_HSYNC,
+	PA29_PF_VSYNC,
+	PA30_PF_CONTRAST,
+	PA31_PF_OE_ACD,
+
+	/* MMC/SDHC */
+	PE18_PF_SD1_D0,
+	PE19_PF_SD1_D1,
+	PE20_PF_SD1_D2,
+	PE21_PF_SD1_D3,
+	PE22_PF_SD1_CMD,
+	PE23_PF_SD1_CLK,
+
+	/* NFC */
+	PF0_PF_NRFB,
+	PF1_PF_NFCE,
+	PF2_PF_NFWP,
+	PF3_PF_NFCLE,
+	PF4_PF_NFALE,
+	PF5_PF_NFRE,
+	PF6_PF_NFWE,
+	PF7_PF_NFIO0,
+	PF8_PF_NFIO1,
+	PF9_PF_NFIO2,
+	PF10_PF_NFIO3,
+	PF11_PF_NFIO4,
+	PF12_PF_NFIO5,
+	PF13_PF_NFIO6,
+	PF14_PF_NFIO7,
+};
+
+/* ADS's NOR flash: 2x AM29BDS128HE9VKI on 32-bit bus */
+static struct physmap_flash_data mx21ads_flash_data = {
+	.width = 4,
+};
+
+static struct resource mx21ads_flash_resource = {
+	.start = CS0_BASE_ADDR,
+	.end = CS0_BASE_ADDR + 0x02000000 - 1,
+	.flags = IORESOURCE_MEM,
+};
+
+static struct platform_device mx21ads_nor_mtd_device = {
+	.name = "physmap-flash",
+	.id = 0,
+	.dev = {
+		.platform_data = &mx21ads_flash_data,
+	},
+	.num_resources = 1,
+	.resource = &mx21ads_flash_resource,
+};
+
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+static struct imxuart_platform_data uart_norts_pdata = {
+};
+
+
+static int mx21ads_fb_init(struct platform_device *pdev)
+{
+	u16 tmp;
+
+	tmp = __raw_readw(MX21ADS_IO_REG);
+	tmp |= MX21ADS_IO_LCDON;
+	__raw_writew(tmp, MX21ADS_IO_REG);
+	return 0;
+}
+
+static void mx21ads_fb_exit(struct platform_device *pdev)
+{
+	u16 tmp;
+
+	tmp = __raw_readw(MX21ADS_IO_REG);
+	tmp &= ~MX21ADS_IO_LCDON;
+	__raw_writew(tmp, MX21ADS_IO_REG);
+}
+
+/*
+ * Connected is a portrait Sharp-QVGA display
+ * of type: LQ035Q7DB02
+ */
+static struct imx_fb_platform_data mx21ads_fb_data = {
+	.pixclock       = 188679, /* in ps */
+	.xres           = 240,
+	.yres           = 320,
+
+	.bpp            = 16,
+	.hsync_len      = 2,
+	.left_margin    = 6,
+	.right_margin   = 16,
+
+	.vsync_len      = 1,
+	.upper_margin   = 8,
+	.lower_margin   = 10,
+	.fixed_screen_cpu = 0,
+
+	.pcr            = 0xFB108BC7,
+	.pwmr           = 0x00A901ff,
+	.lscr1          = 0x00120300,
+	.dmacr          = 0x00020008,
+
+	.init = mx21ads_fb_init,
+	.exit = mx21ads_fb_exit,
+};
+
+static int mx21ads_sdhc_get_ro(struct device *dev)
+{
+	return (__raw_readw(MX21ADS_IO_REG) & MX21ADS_IO_SD_WP) ? 1 : 0;
+}
+
+static int mx21ads_sdhc_init(struct device *dev, irq_handler_t detect_irq,
+	void *data)
+{
+	int ret;
+
+	ret = request_irq(IRQ_GPIOD(25), detect_irq,
+		IRQF_TRIGGER_FALLING, "mmc-detect", data);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static void mx21ads_sdhc_exit(struct device *dev, void *data)
+{
+	free_irq(IRQ_GPIOD(25), data);
+}
+
+static struct imxmmc_platform_data mx21ads_sdhc_pdata = {
+	.ocr_avail = MMC_VDD_29_30 | MMC_VDD_30_31, /* 3.0V */
+	.get_ro = mx21ads_sdhc_get_ro,
+	.init = mx21ads_sdhc_init,
+	.exit = mx21ads_sdhc_exit,
+};
+
+static struct mxc_nand_platform_data mx21ads_nand_board_info = {
+	.width = 1,
+	.hw_ecc = 1,
+};
+
+static struct map_desc mx21ads_io_desc[] __initdata = {
+	/*
+	 * Memory-mapped I/O on MX21ADS Base board:
+	 *   - CS8900A Ethernet controller
+	 *   - ST16C2552CJ UART
+	 *   - CPU and Base board version
+	 *   - Base board I/O register
+	 */
+	{
+		.virtual = MX21ADS_MMIO_BASE_ADDR,
+		.pfn = __phys_to_pfn(CS1_BASE_ADDR),
+		.length = MX21ADS_MMIO_SIZE,
+		.type = MT_DEVICE,
+	},
+};
+
+static void __init mx21ads_map_io(void)
+{
+	mx21_map_io();
+	iotable_init(mx21ads_io_desc, ARRAY_SIZE(mx21ads_io_desc));
+}
+
+static struct platform_device *platform_devices[] __initdata = {
+	&mx21ads_nor_mtd_device,
+};
+
+static void __init mx21ads_board_init(void)
+{
+	mxc_gpio_setup_multiple_pins(mx21ads_pins, ARRAY_SIZE(mx21ads_pins),
+			"mx21ads");
+
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	mxc_register_device(&mxc_uart_device2, &uart_norts_pdata);
+	mxc_register_device(&mxc_uart_device3, &uart_pdata);
+	mxc_register_device(&mxc_fb_device, &mx21ads_fb_data);
+	mxc_register_device(&mxc_sdhc_device0, &mx21ads_sdhc_pdata);
+	mxc_register_device(&mxc_nand_device, &mx21ads_nand_board_info);
+
+	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
+}
+
+static void __init mx21ads_timer_init(void)
+{
+	mx21_clocks_init(32768, 26000000);
+}
+
+static struct sys_timer mx21ads_timer = {
+	.init	= mx21ads_timer_init,
+};
+
+MACHINE_START(MX21ADS, "Freescale i.MX21ADS")
+	/* maintainer: Freescale Semiconductor, Inc. */
+	.phys_io        = AIPI_BASE_ADDR,
+	.io_pg_offst    = ((AIPI_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params    = PHYS_OFFSET + 0x100,
+	.map_io         = mx21ads_map_io,
+	.init_irq       = mxc_init_irq,
+	.init_machine   = mx21ads_board_init,
+	.timer          = &mx21ads_timer,
+MACHINE_END
diff --git a/arch/arm/plat-mxc/include/mach/board-mx21ads.h b/arch/arm/plat-mxc/include/mach/board-mx21ads.h
new file mode 100644
index 0000000..06701df
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-mx21ads.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_MX21ADS_H__
+#define __ASM_ARCH_MXC_BOARD_MX21ADS_H__
+
+/*
+ * MXC UART EVB board level configurations
+ */
+#define MXC_LL_UART_PADDR       UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR       AIPI_IO_ADDRESS(UART1_BASE_ADDR)
+
+/*
+ * Memory-mapped I/O on MX21ADS base board
+ */
+#define MX21ADS_MMIO_BASE_ADDR   0xF5000000
+#define MX21ADS_MMIO_SIZE        SZ_16M
+
+#define MX21ADS_REG_ADDR(offset)    (void __force __iomem *) \
+		(MX21ADS_MMIO_BASE_ADDR + (offset))
+
+#define MX21ADS_CS8900A_IRQ         IRQ_GPIOE(11)
+#define MX21ADS_CS8900A_IOBASE_REG  MX21ADS_REG_ADDR(0x000000)
+#define MX21ADS_ST16C255_IOBASE_REG MX21ADS_REG_ADDR(0x200000)
+#define MX21ADS_VERSION_REG         MX21ADS_REG_ADDR(0x400000)
+#define MX21ADS_IO_REG              MX21ADS_REG_ADDR(0x800000)
+
+/* MX21ADS_IO_REG bit definitions */
+#define MX21ADS_IO_SD_WP        0x0001 /* read */
+#define MX21ADS_IO_TP6          0x0001 /* write */
+#define MX21ADS_IO_SW_SEL       0x0002 /* read */
+#define MX21ADS_IO_TP7          0x0002 /* write */
+#define MX21ADS_IO_RESET_E_UART 0x0004
+#define MX21ADS_IO_RESET_BASE   0x0008
+#define MX21ADS_IO_CSI_CTL2     0x0010
+#define MX21ADS_IO_CSI_CTL1     0x0020
+#define MX21ADS_IO_CSI_CTL0     0x0040
+#define MX21ADS_IO_UART1_EN     0x0080
+#define MX21ADS_IO_UART4_EN     0x0100
+#define MX21ADS_IO_LCDON        0x0200
+#define MX21ADS_IO_IRDA_EN      0x0400
+#define MX21ADS_IO_IRDA_FIR_SEL 0x0800
+#define MX21ADS_IO_IRDA_MD0_B   0x1000
+#define MX21ADS_IO_IRDA_MD1     0x2000
+#define MX21ADS_IO_LED4_ON      0x4000
+#define MX21ADS_IO_LED3_ON      0x8000
+
+#endif				/* __ASM_ARCH_MXC_BOARD_MX21ADS_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S
index 4e7c8d2..cc7e89d 100644
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -25,6 +25,9 @@
 #ifdef CONFIG_MACH_MX27ADS
 #include <mach/board-mx27ads.h>
 #endif
+#ifdef CONFIG_MACH_MX21ADS
+#include <mach/board-mx21ads.h>
+#endif
 #ifdef CONFIG_MACH_PCM038
 #include <mach/board-pcm038.h>
 #endif
-- 
cgit v0.10.2


From c010dba89bc0f5585550877b1693d11d24063b6b Mon Sep 17 00:00:00 2001
From: Holger Schurig <h.schurig@mn-solutions.de>
Date: Tue, 14 Apr 2009 12:50:20 +0200
Subject: imx: re-work of PWM, add i.MX21 support

* Kconfig enables now HAVE_PWM (this enables in turn the selection of
  CONFIG_BACKLIGHT_PWM)
* changes CONFIG_ARCH_MXyy to CONFIG_MACH_MXyy
* fix some register names to match those of the reference manual
* write a stub code so that the PWM can be used to program the
  LCD backlight
* convert from #ifdef CONFIG_ARCH_MXxx to cpu_is_mxXX()
* remove unneeded defines and fix 80-column "violations" of checkpatch.pl

Signed-off-by: Holger Schurig <hs4233@mail.mn-solutions.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/Kconfig b/arch/arm/plat-mxc/Kconfig
index 61acd4c..beeb01a 100644
--- a/arch/arm/plat-mxc/Kconfig
+++ b/arch/arm/plat-mxc/Kconfig
@@ -48,6 +48,7 @@ config MXC_IRQ_PRIOR
 config MXC_PWM
 	tristate "Enable PWM driver"
 	depends on ARCH_MXC
+	select HAVE_PWM
 	help
 	  Enable support for the i.MX PWM controller(s).
 
diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c
index 9bffbc5..ae34198 100644
--- a/arch/arm/plat-mxc/pwm.c
+++ b/arch/arm/plat-mxc/pwm.c
@@ -15,65 +15,26 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/pwm.h>
+#include <mach/hardware.h>
+
+
+/* i.MX1 and i.MX21 share the same PWM function block: */
+
+#define MX1_PWMC    0x00   /* PWM Control Register */
+#define MX1_PWMS    0x04   /* PWM Sample Register */
+#define MX1_PWMP    0x08   /* PWM Period Register */
+
+
+/* i.MX27, i.MX31, i.MX35 share the same PWM function block: */
+
+#define MX3_PWMCR                 0x00    /* PWM Control Register */
+#define MX3_PWMSAR                0x0C    /* PWM Sample Register */
+#define MX3_PWMPR                 0x10    /* PWM Period Register */
+#define MX3_PWMCR_PRESCALER(x)    (((x - 1) & 0xFFF) << 4)
+#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16)
+#define MX3_PWMCR_EN              (1 << 0)
+
 
-#if defined CONFIG_ARCH_MX1 || defined CONFIG_ARCH_MX21
-#define PWM_VER_1
-
-#define PWMCR	0x00	/* PWM Control Register		*/
-#define PWMSR	0x04	/* PWM Sample Register		*/
-#define PWMPR	0x08	/* PWM Period Register		*/
-#define PWMCNR	0x0C	/* PWM Counter Register		*/
-
-#define PWMCR_HCTR		(1 << 18)		/* Halfword FIFO Data Swapping	*/
-#define PWMCR_BCTR		(1 << 17)		/* Byte FIFO Data Swapping	*/
-#define PWMCR_SWR		(1 << 16)		/* Software Reset		*/
-#define PWMCR_CLKSRC_PERCLK	(0 << 15)		/* PERCLK Clock Source		*/
-#define PWMCR_CLKSRC_CLK32	(1 << 15)		/* 32KHz Clock Source		*/
-#define PWMCR_PRESCALER(x)	(((x - 1) & 0x7F) << 8)	/* PRESCALER			*/
-#define PWMCR_IRQ		(1 << 7)		/* Interrupt Request		*/
-#define PWMCR_IRQEN		(1 << 6)		/* Interrupt Request Enable	*/
-#define PWMCR_FIFOAV		(1 << 5)		/* FIFO Available		*/
-#define PWMCR_EN		(1 << 4)		/* Enables/Disables the PWM	*/
-#define PWMCR_REPEAT(x)		(((x) & 0x03) << 2)	/* Sample Repeats		*/
-#define PWMCR_DIV(x)		(((x) & 0x03) << 0)	/* Clock divider 2/4/8/16	*/
-
-#define MAX_DIV			(128 * 16)
-#endif
-
-#if defined CONFIG_MACH_MX27 || defined CONFIG_ARCH_MX31
-#define PWM_VER_2
-
-#define PWMCR	0x00	/* PWM Control Register		*/
-#define PWMSR	0x04	/* PWM Status Register		*/
-#define PWMIR	0x08	/* PWM Interrupt Register	*/
-#define PWMSAR	0x0C	/* PWM Sample Register		*/
-#define PWMPR	0x10	/* PWM Period Register		*/
-#define PWMCNR	0x14	/* PWM Counter Register		*/
-
-#define PWMCR_EN		(1 << 0)		/* Enables/Disables the PWM	*/
-#define PWMCR_REPEAT(x)		(((x) & 0x03) << 1)	/* Sample Repeats		*/
-#define PWMCR_SWR		(1 << 3)		/* Software Reset		*/
-#define PWMCR_PRESCALER(x)	(((x - 1) & 0xFFF) << 4)/* PRESCALER			*/
-#define PWMCR_CLKSRC(x)		(((x) & 0x3) << 16)
-#define PWMCR_CLKSRC_OFF	(0 << 16)
-#define PWMCR_CLKSRC_IPG	(1 << 16)
-#define PWMCR_CLKSRC_IPG_HIGH	(2 << 16)
-#define PWMCR_CLKSRC_CLK32	(3 << 16)
-#define PWMCR_POUTC
-#define PWMCR_HCTR		(1 << 20)		/* Halfword FIFO Data Swapping	*/
-#define PWMCR_BCTR		(1 << 21)		/* Byte FIFO Data Swapping	*/
-#define PWMCR_DBGEN		(1 << 22)		/* Debug Mode			*/
-#define PWMCR_WAITEN		(1 << 23)		/* Wait Mode			*/
-#define PWMCR_DOZEN		(1 << 24)		/* Doze Mode			*/
-#define PWMCR_STOPEN		(1 << 25)		/* Stop Mode			*/
-#define PWMCR_FWM(x)		(((x) & 0x3) << 26)	/* FIFO Water Mark		*/
-
-#define MAX_DIV 4096
-#endif
-
-#define PWMS_SAMPLE(x)		((x) & 0xFFFF)		/* Contains a two-sample word	*/
-#define PWMP_PERIOD(x)		((x) & 0xFFFF)		/* Represents the PWM's period	*/
-#define PWMC_COUNTER(x)		((x) & 0xFFFF)		/* Represents the current count value	*/
 
 struct pwm_device {
 	struct list_head	node;
@@ -91,32 +52,52 @@ struct pwm_device {
 
 int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
 {
-	unsigned long long c;
-	unsigned long period_cycles, duty_cycles, prescale;
-
 	if (pwm == NULL || period_ns == 0 || duty_ns > period_ns)
 		return -EINVAL;
 
-	c = clk_get_rate(pwm->clk);
-	c = c * period_ns;
-	do_div(c, 1000000000);
-	period_cycles = c;
-
-	prescale = period_cycles / 0x10000 + 1;
-
-	period_cycles /= prescale;
-	c = (unsigned long long)period_cycles * duty_ns;
-	do_div(c, period_ns);
-	duty_cycles = c;
-
-#ifdef PWM_VER_2
-	writel(duty_cycles, pwm->mmio_base + PWMSAR);
-	writel(period_cycles, pwm->mmio_base + PWMPR);
-	writel(PWMCR_PRESCALER(prescale - 1) | PWMCR_CLKSRC_IPG_HIGH | PWMCR_EN,
-			pwm->mmio_base + PWMCR);
-#elif defined PWM_VER_1
-#error PWM not yet working on MX1 / MX21
-#endif
+	if (cpu_is_mx27() || cpu_is_mx3()) {
+		unsigned long long c;
+		unsigned long period_cycles, duty_cycles, prescale;
+		c = clk_get_rate(pwm->clk);
+		c = c * period_ns;
+		do_div(c, 1000000000);
+		period_cycles = c;
+
+		prescale = period_cycles / 0x10000 + 1;
+
+		period_cycles /= prescale;
+		c = (unsigned long long)period_cycles * duty_ns;
+		do_div(c, period_ns);
+		duty_cycles = c;
+
+		writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR);
+		writel(period_cycles, pwm->mmio_base + MX3_PWMPR);
+		writel(MX3_PWMCR_PRESCALER(prescale - 1) |
+			MX3_PWMCR_CLKSRC_IPG_HIGH | MX3_PWMCR_EN,
+			pwm->mmio_base + MX3_PWMCR);
+	} else if (cpu_is_mx1() || cpu_is_mx21()) {
+		/* The PWM subsystem allows for exact frequencies. However,
+		 * I cannot connect a scope on my device to the PWM line and
+		 * thus cannot provide the program the PWM controller
+		 * exactly. Instead, I'm relying on the fact that the
+		 * Bootloader (u-boot or WinCE+haret) has programmed the PWM
+		 * function group already. So I'll just modify the PWM sample
+		 * register to follow the ratio of duty_ns vs. period_ns
+		 * accordingly.
+		 *
+		 * This is good enought for programming the brightness of
+		 * the LCD backlight.
+		 *
+		 * The real implementation would divide PERCLK[0] first by
+		 * both the prescaler (/1 .. /128) and then by CLKSEL
+		 * (/2 .. /16).
+		 */
+		u32 max = readl(pwm->mmio_base + MX1_PWMP);
+		u32 p = max * duty_ns / period_ns;
+		writel(max - p, pwm->mmio_base + MX1_PWMS);
+	} else {
+		BUG();
+	}
 
 	return 0;
 }
@@ -297,4 +278,3 @@ module_exit(mxc_pwm_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
-
-- 
cgit v0.10.2


From 5ed7449a26bf5336ba9828025f54407227e4f379 Mon Sep 17 00:00:00 2001
From: Ivo Clarysse <ivo.clarysse@gmail.com>
Date: Wed, 15 Apr 2009 10:45:19 +0200
Subject: MXC: mx2_defconfig file

Adds a defconfig file for the i.MX21 SoC, targeting
the Freescale i.MX21ADS (M9328MX21ADSE) Application
Development System.

Signed-off-by: Ivo Clarysse <ivo.clarysse@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/configs/mx21_defconfig b/arch/arm/configs/mx21_defconfig
new file mode 100644
index 0000000..4b04290
--- /dev/null
+++ b/arch/arm/configs/mx21_defconfig
@@ -0,0 +1,1170 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc1
+# Tue Apr 14 16:58:09 2009
+#
+CONFIG_ARM=y
+CONFIG_HAVE_PWM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_MTD_XIP=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_NOOP=y
+CONFIG_DEFAULT_IOSCHED="noop"
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_GEMINI is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+CONFIG_ARCH_MXC=y
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
+# CONFIG_ARCH_OMAP is not set
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_W90X900 is not set
+
+#
+# Freescale MXC Implementations
+#
+# CONFIG_ARCH_MX1 is not set
+CONFIG_ARCH_MX2=y
+# CONFIG_ARCH_MX3 is not set
+CONFIG_MACH_MX21=y
+# CONFIG_MACH_MX27 is not set
+
+#
+# MX2 platforms:
+#
+CONFIG_MACH_MX21ADS=y
+# CONFIG_MXC_IRQ_PRIOR is not set
+CONFIG_MXC_PWM=y
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_ARM926T=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5TJ=y
+CONFIG_CPU_PABRT_NOIFAR=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_COPY_V4WB=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_CPU_ICACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_WRITETHROUGH is not set
+# CONFIG_CPU_CACHE_ROUND_ROBIN is not set
+# CONFIG_OUTER_CACHE is not set
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PREEMPT=y
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE=""
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+# CONFIG_FPE_NWFPE is not set
+# CONFIG_FPE_FASTFPE is not set
+# CONFIG_VFP is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_PACKET is not set
+# CONFIG_UNIX is not set
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_WIRELESS=y
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+CONFIG_MTD_DEBUG=y
+CONFIG_MTD_DEBUG_VERBOSE=3
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
+# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
+# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_OTP is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+# CONFIG_MTD_XIP is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_ARM_INTEGRATOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+# CONFIG_MTD_NAND_GPIO is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+CONFIG_MTD_NAND_MXC=y
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_SMC91X is not set
+# CONFIG_DM9000 is not set
+# CONFIG_ENC28J60 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+CONFIG_CS89x0=y
+CONFIG_CS89x0_NONISA_IRQ=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+# CONFIG_CONSOLE_TRANSLATIONS is not set
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_IMX=y
+CONFIG_SERIAL_IMX_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_GPIO is not set
+CONFIG_I2C_IMX=y
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+# CONFIG_SPI_BITBANG is not set
+# CONFIG_SPI_GPIO is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_TC6393XB is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+CONFIG_FB_IMX=y
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+CONFIG_FONTS=y
+CONFIG_FONT_8x8=y
+# CONFIG_FONT_8x16 is not set
+# CONFIG_FONT_6x11 is not set
+# CONFIG_FONT_7x14 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+# CONFIG_FONT_ACORN_8x8 is not set
+# CONFIG_FONT_MINI_4x6 is not set
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+# CONFIG_FONT_10x18 is not set
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_SDHCI is not set
+CONFIG_MMC_MXC=y
+# CONFIG_MMC_SPI is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+# CONFIG_VFAT_FS is not set
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+CONFIG_ROOT_NFS=y
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_ARM_UNWIND=y
+# CONFIG_DEBUG_USER is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From ec9be0debd822d5bd1d5bfdf297144396d56ac6b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabioestevam@yahoo.com>
Date: Thu, 16 Apr 2009 12:45:01 -0700
Subject: MX27: Add basic support to MX27PDK

This patch adds basic support to MX27PDK board http://www.freescale.com/imx27pdk ).

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/Kconfig b/arch/arm/mach-mx2/Kconfig
index 7ce3712..6155044 100644
--- a/arch/arm/mach-mx2/Kconfig
+++ b/arch/arm/mach-mx2/Kconfig
@@ -53,4 +53,10 @@ config MACH_PCM970_BASEBOARD
 
 endchoice
 
+config MACH_MX27_3DS
+	bool "MX27PDK platform"
+	depends on MACH_MX27
+	help
+	  Include support for MX27PDK platform. This includes specific
+	  configurations for the board and its peripherals.
 endif
diff --git a/arch/arm/mach-mx2/Makefile b/arch/arm/mach-mx2/Makefile
index e583754..d140e2d 100644
--- a/arch/arm/mach-mx2/Makefile
+++ b/arch/arm/mach-mx2/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_MACH_MX21ADS) += mx21ads.o
 obj-$(CONFIG_MACH_MX27ADS) += mx27ads.o
 obj-$(CONFIG_MACH_PCM038) += pcm038.o
 obj-$(CONFIG_MACH_PCM970_BASEBOARD) += pcm970-baseboard.o
+obj-$(CONFIG_MACH_MX27_3DS) += mx27pdk.o
diff --git a/arch/arm/mach-mx2/mx27pdk.c b/arch/arm/mach-mx2/mx27pdk.c
new file mode 100644
index 0000000..90b1fa5
--- /dev/null
+++ b/arch/arm/mach-mx2/mx27pdk.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Author: Fabio Estevam <fabio.estevam@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <mach/hardware.h>
+#include <mach/common.h>
+#include <mach/imx-uart.h>
+#include <mach/iomux.h>
+#include <mach/board-mx27pdk.h>
+
+#include "devices.h"
+
+static unsigned int mx27pdk_pins[] = {
+	/* UART1 */
+	PE12_PF_UART1_TXD,
+	PE13_PF_UART1_RXD,
+	PE14_PF_UART1_CTS,
+	PE15_PF_UART1_RTS,
+	/* FEC */
+	PD0_AIN_FEC_TXD0,
+	PD1_AIN_FEC_TXD1,
+	PD2_AIN_FEC_TXD2,
+	PD3_AIN_FEC_TXD3,
+	PD4_AOUT_FEC_RX_ER,
+	PD5_AOUT_FEC_RXD1,
+	PD6_AOUT_FEC_RXD2,
+	PD7_AOUT_FEC_RXD3,
+	PD8_AF_FEC_MDIO,
+	PD9_AIN_FEC_MDC,
+	PD10_AOUT_FEC_CRS,
+	PD11_AOUT_FEC_TX_CLK,
+	PD12_AOUT_FEC_RXD0,
+	PD13_AOUT_FEC_RX_DV,
+	PD14_AOUT_FEC_RX_CLK,
+	PD15_AOUT_FEC_COL,
+	PD16_AIN_FEC_TX_ER,
+	PF23_AIN_FEC_TX_EN,
+};
+
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+static struct platform_device *platform_devices[] __initdata = {
+	&mxc_fec_device,
+};
+
+static void __init mx27pdk_init(void)
+{
+	mxc_gpio_setup_multiple_pins(mx27pdk_pins, ARRAY_SIZE(mx27pdk_pins),
+		"mx27pdk");
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
+}
+
+static void __init mx27pdk_timer_init(void)
+{
+	mx27_clocks_init(26000000);
+}
+
+static struct sys_timer mx27pdk_timer = {
+	.init	= mx27pdk_timer_init,
+};
+
+MACHINE_START(MX27_3DS, "Freescale MX27PDK")
+	/* maintainer: Freescale Semiconductor, Inc. */
+	.phys_io        = AIPI_BASE_ADDR,
+	.io_pg_offst    = ((AIPI_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params    = PHYS_OFFSET + 0x100,
+	.map_io         = mxc_map_io,
+	.init_irq       = mxc_init_irq,
+	.init_machine   = mx27pdk_init,
+	.timer          = &mx27pdk_timer,
+MACHINE_END
diff --git a/arch/arm/plat-mxc/include/mach/board-mx27pdk.h b/arch/arm/plat-mxc/include/mach/board-mx27pdk.h
new file mode 100644
index 0000000..552b55d
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-mx27pdk.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_MX27PDK_H__
+#define __ASM_ARCH_MXC_BOARD_MX27PDK_H__
+
+/* mandatory for CONFIG_DEBUG_LL */
+
+#define MXC_LL_UART_PADDR	UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
+
+#endif /* __ASM_ARCH_MXC_BOARD_MX27PDK_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S
index cc7e89d..e6b841b 100644
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -40,6 +40,9 @@
 #ifdef CONFIG_MACH_PCM043
 #include <mach/board-pcm043.h>
 #endif
+#ifdef CONFIG_MACH_MX27_3DS
+#include <mach/board-mx27pdk.h>
+#endif
 		.macro	addruart,rx
 		mrc	p15, 0, \rx, c1, c0
 		tst	\rx, #1			@ MMU enabled?
-- 
cgit v0.10.2


From 220bbcea278b4fef7ef0a45ce754222f8ed9d134 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Fri, 17 Apr 2009 15:20:25 +0200
Subject: mx31moboard: clean-up of board support

Various improvements (includes in alphabetical order, platform devices
declaration order change, ...)

The pins now are claimed in a single function call from a main table for
every board for a better clarity and to adopt the current agreement for
mx31 devices.

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31moboard-devboard.c b/arch/arm/mach-mx3/mx31moboard-devboard.c
index d080b4a..718702f 100644
--- a/arch/arm/mach-mx3/mx31moboard-devboard.c
+++ b/arch/arm/mach-mx3/mx31moboard-devboard.c
@@ -16,33 +16,36 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/types.h>
 #include <linux/init.h>
-
 #include <linux/platform_device.h>
+#include <linux/types.h>
 
-#include <mach/hardware.h>
 #include <mach/common.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
+#include <mach/hardware.h>
 
 #include "devices.h"
 
-static struct imxuart_platform_data uart_pdata = {
-	.flags = IMXUART_HAVE_RTSCTS,
-};
-
-static int mxc_uart1_pins[] = {
+static unsigned int devboard_pins[] = {
+	/* UART1 */
 	MX31_PIN_CTS2__CTS2, MX31_PIN_RTS2__RTS2,
 	MX31_PIN_TXD2__TXD2, MX31_PIN_RXD2__RXD2,
 };
 
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
 /*
  * system init for baseboard usage. Will be called by mx31moboard init.
  */
 void __init mx31moboard_devboard_init(void)
 {
 	printk(KERN_INFO "Initializing mx31devboard peripherals\n");
-	mxc_iomux_setup_multiple_pins(mxc_uart1_pins, ARRAY_SIZE(mxc_uart1_pins), "uart1");
+
+	mxc_iomux_setup_multiple_pins(devboard_pins, ARRAY_SIZE(devboard_pins),
+		"devboard");
+
 	mxc_register_device(&mxc_uart_device1, &uart_pdata);
 }
diff --git a/arch/arm/mach-mx3/mx31moboard-marxbot.c b/arch/arm/mach-mx3/mx31moboard-marxbot.c
index 9ef9566..f5ea114 100644
--- a/arch/arm/mach-mx3/mx31moboard-marxbot.c
+++ b/arch/arm/mach-mx3/mx31moboard-marxbot.c
@@ -16,13 +16,12 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/types.h>
 #include <linux/init.h>
-
 #include <linux/platform_device.h>
+#include <linux/types.h>
 
-#include <mach/hardware.h>
 #include <mach/common.h>
+#include <mach/hardware.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
 
diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index 77168ed..623df17 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -16,26 +16,34 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/types.h>
 #include <linux/init.h>
-
-#include <linux/platform_device.h>
+#include <linux/memory.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
-#include <linux/memory.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
 
-#include <mach/hardware.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 #include <asm/mach/map.h>
+#include <mach/board-mx31moboard.h>
 #include <mach/common.h>
+#include <mach/hardware.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
-#include <mach/board-mx31moboard.h>
 
 #include "devices.h"
 
+static unsigned int moboard_pins[] = {
+	/* UART0 */
+	MX31_PIN_CTS1__CTS1, MX31_PIN_RTS1__RTS1,
+	MX31_PIN_TXD1__TXD1, MX31_PIN_RXD1__RXD1,
+	/* UART4 */
+	MX31_PIN_PC_RST__CTS5, MX31_PIN_PC_VS2__RTS5,
+	MX31_PIN_PC_BVD2__TXD5, MX31_PIN_PC_BVD1__RXD5,
+};
+
 static struct physmap_flash_data mx31moboard_flash_data = {
 	.width  	= 2,
 };
@@ -64,15 +72,6 @@ static struct platform_device *devices[] __initdata = {
 	&mx31moboard_flash,
 };
 
-static int mxc_uart0_pins[] = {
-	MX31_PIN_CTS1__CTS1, MX31_PIN_RTS1__RTS1,
-	MX31_PIN_TXD1__TXD1, MX31_PIN_RXD1__RXD1,
-};
-static int mxc_uart4_pins[] = {
-	MX31_PIN_PC_RST__CTS5, MX31_PIN_PC_VS2__RTS5,
-	MX31_PIN_PC_BVD2__TXD5, MX31_PIN_PC_BVD1__RXD5,
-};
-
 static int mx31moboard_baseboard;
 core_param(mx31moboard_baseboard, mx31moboard_baseboard, int, 0444);
 
@@ -81,12 +80,12 @@ core_param(mx31moboard_baseboard, mx31moboard_baseboard, int, 0444);
  */
 static void __init mxc_board_init(void)
 {
+	mxc_iomux_setup_multiple_pins(moboard_pins, ARRAY_SIZE(moboard_pins),
+		"moboard");
+
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 
-	mxc_iomux_setup_multiple_pins(mxc_uart0_pins, ARRAY_SIZE(mxc_uart0_pins), "uart0");
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
-
-	mxc_iomux_setup_multiple_pins(mxc_uart4_pins, ARRAY_SIZE(mxc_uart4_pins), "uart4");
 	mxc_register_device(&mxc_uart_device4, &uart_pdata);
 
 	switch (mx31moboard_baseboard) {
@@ -99,7 +98,8 @@ static void __init mxc_board_init(void)
 		mx31moboard_marxbot_init();
 		break;
 	default:
-		printk(KERN_ERR "Illegal mx31moboard_baseboard type %d\n", mx31moboard_baseboard);
+		printk(KERN_ERR "Illegal mx31moboard_baseboard type %d\n",
+			mx31moboard_baseboard);
 	}
 }
 
-- 
cgit v0.10.2


From 45001e92d0249a8c4b9f6c3695215652e8e8493d Mon Sep 17 00:00:00 2001
From: Alan Carvalho de Assis <acassis@gmail.com>
Date: Thu, 2 Apr 2009 12:38:41 -0300
Subject: i.MX31: Add hw-random for RNGA

This hw-random driver add support to RNGA hardware found
on some i.MX processors.

Signed-off-by: Alan Carvalho de Assis <acassis@gmail.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index 0e7de94..32e4515 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -1,6 +1,7 @@
 if ARCH_MX3
 
 config ARCH_MX31
+	select ARCH_HAS_RNGA
 	bool
 
 config ARCH_MX35
diff --git a/arch/arm/mach-mx3/devices.c b/arch/arm/mach-mx3/devices.c
index 380be0c..227a538 100644
--- a/arch/arm/mach-mx3/devices.c
+++ b/arch/arm/mach-mx3/devices.c
@@ -23,6 +23,7 @@
 #include <linux/gpio.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
+#include <mach/common.h>
 #include <mach/imx-uart.h>
 
 #include "devices.h"
@@ -283,6 +284,21 @@ struct platform_device mxcsdhc_device1 = {
 	.num_resources = ARRAY_SIZE(mxcsdhc1_resources),
 	.resource = mxcsdhc1_resources,
 };
+
+static struct resource rnga_resources[] = {
+	{
+		.start = RNGA_BASE_ADDR,
+		.end = RNGA_BASE_ADDR + 0x28,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
+struct platform_device mxc_rnga_device = {
+	.name = "mxc_rnga",
+	.id = -1,
+	.num_resources = 1,
+	.resource = rnga_resources,
+};
 #endif /* CONFIG_ARCH_MX31 */
 
 /* i.MX31 Image Processing Unit */
@@ -359,6 +375,7 @@ static int mx3_devices_init(void)
 	if (cpu_is_mx31()) {
 		mxc_nand_resources[0].start = MX31_NFC_BASE_ADDR;
 		mxc_nand_resources[0].end = MX31_NFC_BASE_ADDR + 0xfff;
+		mxc_register_device(&mxc_rnga_device, NULL);
 	}
 	if (cpu_is_mx35()) {
 		mxc_nand_resources[0].start = MX35_NFC_BASE_ADDR;
diff --git a/arch/arm/plat-mxc/Kconfig b/arch/arm/plat-mxc/Kconfig
index beeb01a..8986b74 100644
--- a/arch/arm/plat-mxc/Kconfig
+++ b/arch/arm/plat-mxc/Kconfig
@@ -52,6 +52,10 @@ config MXC_PWM
 	help
 	  Enable support for the i.MX PWM controller(s).
 
+config ARCH_HAS_RNGA
+	bool
+	depends on ARCH_MXC
+
 config ARCH_MXC_IOMUX_V3
 	bool
 endif
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 5fab647..26c93c7 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -148,3 +148,15 @@ config HW_RANDOM_VIRTIO
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called virtio-rng.  If unsure, say N.
+
+config HW_RANDOM_MXC_RNGA
+	tristate "Freescale i.MX RNGA Random Number Generator"
+	depends on HW_RANDOM && ARCH_HAS_RNGA
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator hardware found on Freescale i.MX processors.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mxc-rnga.
+
+	  If unsure, say Y.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index e81d21a..fd1ecd2 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o
 obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o
 obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o
 obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
+obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c
new file mode 100644
index 0000000..187c6be
--- /dev/null
+++ b/drivers/char/hw_random/mxc-rnga.c
@@ -0,0 +1,247 @@
+/*
+ * RNG driver for Freescale RNGA
+ *
+ * Copyright 2008-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Author: Alan Carvalho de Assis <acassis@gmail.com>
+ */
+
+/*
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * This driver is based on other RNG drivers.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+
+/* RNGA Registers */
+#define RNGA_CONTROL			0x00
+#define RNGA_STATUS			0x04
+#define RNGA_ENTROPY			0x08
+#define RNGA_OUTPUT_FIFO		0x0c
+#define RNGA_MODE			0x10
+#define RNGA_VERIFICATION_CONTROL	0x14
+#define RNGA_OSC_CONTROL_COUNTER	0x18
+#define RNGA_OSC1_COUNTER		0x1c
+#define RNGA_OSC2_COUNTER		0x20
+#define RNGA_OSC_COUNTER_STATUS		0x24
+
+/* RNGA Registers Range */
+#define RNG_ADDR_RANGE			0x28
+
+/* RNGA Control Register */
+#define RNGA_CONTROL_SLEEP		0x00000010
+#define RNGA_CONTROL_CLEAR_INT		0x00000008
+#define RNGA_CONTROL_MASK_INTS		0x00000004
+#define RNGA_CONTROL_HIGH_ASSURANCE	0x00000002
+#define RNGA_CONTROL_GO			0x00000001
+
+#define RNGA_STATUS_LEVEL_MASK		0x0000ff00
+
+/* RNGA Status Register */
+#define RNGA_STATUS_OSC_DEAD		0x80000000
+#define RNGA_STATUS_SLEEP		0x00000010
+#define RNGA_STATUS_ERROR_INT		0x00000008
+#define RNGA_STATUS_FIFO_UNDERFLOW	0x00000004
+#define RNGA_STATUS_LAST_READ_STATUS	0x00000002
+#define RNGA_STATUS_SECURITY_VIOLATION	0x00000001
+
+static struct platform_device *rng_dev;
+
+static int mxc_rnga_data_present(struct hwrng *rng)
+{
+	int level;
+	void __iomem *rng_base = (void __iomem *)rng->priv;
+
+	/* how many random numbers is in FIFO? [0-16] */
+	level = ((__raw_readl(rng_base + RNGA_STATUS) &
+			RNGA_STATUS_LEVEL_MASK) >> 8);
+
+	return level > 0 ? 1 : 0;
+}
+
+static int mxc_rnga_data_read(struct hwrng *rng, u32 * data)
+{
+	int err;
+	u32 ctrl;
+	void __iomem *rng_base = (void __iomem *)rng->priv;
+
+	/* retrieve a random number from FIFO */
+	*data = __raw_readl(rng_base + RNGA_OUTPUT_FIFO);
+
+	/* some error while reading this random number? */
+	err = __raw_readl(rng_base + RNGA_STATUS) & RNGA_STATUS_ERROR_INT;
+
+	/* if error: clear error interrupt, but doesn't return random number */
+	if (err) {
+		dev_dbg(&rng_dev->dev, "Error while reading random number!\n");
+		ctrl = __raw_readl(rng_base + RNGA_CONTROL);
+		__raw_writel(ctrl | RNGA_CONTROL_CLEAR_INT,
+					rng_base + RNGA_CONTROL);
+		return 0;
+	} else
+		return 4;
+}
+
+static int mxc_rnga_init(struct hwrng *rng)
+{
+	u32 ctrl, osc;
+	void __iomem *rng_base = (void __iomem *)rng->priv;
+
+	/* wake up */
+	ctrl = __raw_readl(rng_base + RNGA_CONTROL);
+	__raw_writel(ctrl & ~RNGA_CONTROL_SLEEP, rng_base + RNGA_CONTROL);
+
+	/* verify if oscillator is working */
+	osc = __raw_readl(rng_base + RNGA_STATUS);
+	if (osc & RNGA_STATUS_OSC_DEAD) {
+		dev_err(&rng_dev->dev, "RNGA Oscillator is dead!\n");
+		return -ENODEV;
+	}
+
+	/* go running */
+	ctrl = __raw_readl(rng_base + RNGA_CONTROL);
+	__raw_writel(ctrl | RNGA_CONTROL_GO, rng_base + RNGA_CONTROL);
+
+	return 0;
+}
+
+static void mxc_rnga_cleanup(struct hwrng *rng)
+{
+	u32 ctrl;
+	void __iomem *rng_base = (void __iomem *)rng->priv;
+
+	ctrl = __raw_readl(rng_base + RNGA_CONTROL);
+
+	/* stop rnga */
+	__raw_writel(ctrl & ~RNGA_CONTROL_GO, rng_base + RNGA_CONTROL);
+}
+
+static struct hwrng mxc_rnga = {
+	.name = "mxc-rnga",
+	.init = mxc_rnga_init,
+	.cleanup = mxc_rnga_cleanup,
+	.data_present = mxc_rnga_data_present,
+	.data_read = mxc_rnga_data_read
+};
+
+static int __init mxc_rnga_probe(struct platform_device *pdev)
+{
+	int err = -ENODEV;
+	struct clk *clk;
+	struct resource *res, *mem;
+	void __iomem *rng_base = NULL;
+
+	if (rng_dev)
+		return -EBUSY;
+
+	clk = clk_get(&pdev->dev, "rng");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Could not get rng_clk!\n");
+		err = PTR_ERR(clk);
+		goto out;
+	}
+
+	clk_enable(clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -ENOENT;
+		goto err_region;
+	}
+
+	mem = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (mem == NULL) {
+		err = -EBUSY;
+		goto err_region;
+	}
+
+	rng_base = ioremap(res->start, resource_size(res));
+	if (!rng_base) {
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	mxc_rnga.priv = (unsigned long)rng_base;
+
+	err = hwrng_register(&mxc_rnga);
+	if (err) {
+		dev_err(&pdev->dev, "MXC RNGA registering failed (%d)\n", err);
+		goto err_register;
+	}
+
+	rng_dev = pdev;
+
+	dev_info(&pdev->dev, "MXC RNGA Registered.\n");
+
+	return 0;
+
+err_register:
+	iounmap(rng_base);
+	rng_base = NULL;
+
+err_ioremap:
+	release_mem_region(res->start, resource_size(res));
+
+err_region:
+	clk_disable(clk);
+	clk_put(clk);
+
+out:
+	return err;
+}
+
+static int __exit mxc_rnga_remove(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	void __iomem *rng_base = (void __iomem *)mxc_rnga.priv;
+	struct clk *clk = clk_get(&pdev->dev, "rng");
+
+	hwrng_unregister(&mxc_rnga);
+
+	iounmap(rng_base);
+
+	release_mem_region(res->start, resource_size(res));
+
+	clk_disable(clk);
+	clk_put(clk);
+
+	return 0;
+}
+
+static struct platform_driver mxc_rnga_driver = {
+	.driver = {
+		   .name = "mxc_rnga",
+		   .owner = THIS_MODULE,
+		   },
+	.remove = __exit_p(mxc_rnga_remove),
+};
+
+static int __init mod_init(void)
+{
+	return platform_driver_probe(&mxc_rnga_driver, mxc_rnga_probe);
+}
+
+static void __exit mod_exit(void)
+{
+	platform_driver_unregister(&mxc_rnga_driver);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("H/W RNGA driver for i.MX");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From f801079b2b8790a9f096151e2ca1d1239a24d9e4 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Wed, 22 Apr 2009 10:54:14 +0200
Subject: mx31: define various pins used on mx31moboard (v2)

As suggested by Sascha, I regroup them in a single patch so that the
other patches become more orthogonal.

changes since v1: changed I2C1 pin names

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx3.h b/arch/arm/plat-mxc/include/mach/iomux-mx3.h
index 57e927a..be43f76 100644
--- a/arch/arm/plat-mxc/include/mach/iomux-mx3.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-mx3.h
@@ -633,6 +633,40 @@ enum iomux_pins {
 #define MX31_PIN_USBOTG_DIR__USBOTG_DIR        IOMUX_MODE(MX31_PIN_USBOTG_DIR, IOMUX_CONFIG_FUNC)
 #define MX31_PIN_USBOTG_NXT__USBOTG_NXT        IOMUX_MODE(MX31_PIN_USBOTG_NXT, IOMUX_CONFIG_FUNC)
 #define MX31_PIN_USBOTG_STP__USBOTG_STP        IOMUX_MODE(MX31_PIN_USBOTG_STP, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_USB_OC__GPIO1_30	IOMUX_MODE(MX31_PIN_USB_OC, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_I2C_DAT__I2C1_SDA	IOMUX_MODE(MX31_PIN_I2C_DAT, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_I2C_CLK__I2C1_SCL	IOMUX_MODE(MX31_PIN_I2C_CLK, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_DCD_DTE1__I2C2_SDA	IOMUX_MODE(MX31_PIN_DCD_DTE1, IOMUX_CONFIG_ALT2)
+#define MX31_PIN_RI_DTE1__I2C2_SCL	IOMUX_MODE(MX31_PIN_RI_DTE1, IOMUX_CONFIG_ALT2)
+#define MX31_PIN_ATA_CS0__GPIO3_26	IOMUX_MODE(MX31_PIN_ATA_CS0, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_ATA_CS1__GPIO3_27	IOMUX_MODE(MX31_PIN_ATA_CS1, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_PC_PWRON__SD2_DATA3	IOMUX_MODE(MX31_PIN_PC_PWRON, IOMUX_CONFIG_ALT1)
+#define MX31_PIN_PC_VS1__SD2_DATA2	IOMUX_MODE(MX31_PIN_PC_VS1, IOMUX_CONFIG_ALT1)
+#define MX31_PIN_PC_READY__SD2_DATA1	IOMUX_MODE(MX31_PIN_PC_READY, IOMUX_CONFIG_ALT1)
+#define MX31_PIN_PC_WAIT_B__SD2_DATA0	IOMUX_MODE(MX31_PIN_PC_WAIT_B, IOMUX_CONFIG_ALT1)
+#define MX31_PIN_PC_CD2_B__SD2_CLK	IOMUX_MODE(MX31_PIN_PC_CD2_B, IOMUX_CONFIG_ALT1)
+#define MX31_PIN_PC_CD1_B__SD2_CMD	IOMUX_MODE(MX31_PIN_PC_CD1_B, IOMUX_CONFIG_ALT1)
+#define MX31_PIN_ATA_DIOR__GPIO3_28	IOMUX_MODE(MX31_PIN_ATA_DIOR, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_ATA_DIOW__GPIO3_29	IOMUX_MODE(MX31_PIN_ATA_DIOW, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_CSI_D4__CSI_D4		IOMUX_MODE(MX31_PIN_CSI_D4, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D5__CSI_D5		IOMUX_MODE(MX31_PIN_CSI_D5, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D6__CSI_D6		IOMUX_MODE(MX31_PIN_CSI_D6, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D7__CSI_D7		IOMUX_MODE(MX31_PIN_CSI_D7, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D8__CSI_D8		IOMUX_MODE(MX31_PIN_CSI_D8, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D9__CSI_D9		IOMUX_MODE(MX31_PIN_CSI_D9, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D10__CSI_D10	IOMUX_MODE(MX31_PIN_CSI_D10, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D11__CSI_D11	IOMUX_MODE(MX31_PIN_CSI_D11, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D12__CSI_D12	IOMUX_MODE(MX31_PIN_CSI_D12, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D13__CSI_D13	IOMUX_MODE(MX31_PIN_CSI_D13, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D14__CSI_D14	IOMUX_MODE(MX31_PIN_CSI_D14, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_D15__CSI_D15	IOMUX_MODE(MX31_PIN_CSI_D15, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_HSYNC__CSI_HSYNC	IOMUX_MODE(MX31_PIN_CSI_HSYNC, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_MCLK__CSI_MCLK	IOMUX_MODE(MX31_PIN_CSI_MCLK, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_PIXCLK__CSI_PIXCLK	IOMUX_MODE(MX31_PIN_CSI_PIXCLK, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_CSI_VSYNC__CSI_VSYNC	IOMUX_MODE(MX31_PIN_CSI_VSYNC, IOMUX_CONFIG_FUNC)
+#define MX31_PIN_GPIO3_0__GPIO3_0	IOMUX_MODE(MX31_PIN_GPIO3_0, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_GPIO3_1__GPIO3_1	IOMUX_MODE(MX31_PIN_GPIO3_1, IOMUX_CONFIG_GPIO)
+#define MX31_PIN_TXD2__GPIO1_28		IOMUX_MODE(MX31_PIN_TXD2, IOMUX_CONFIG_GPIO)
 
 /*XXX: The SS0, SS1, SS2, SS3 lines of spi3 are multiplexed by cspi2_ss0, cspi2_ss1, cspi1_ss0
  * cspi1_ss1*/
-- 
cgit v0.10.2


From 56c7a45bee7313d49dd58465492cdbd49fea8e16 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Wed, 22 Apr 2009 10:55:50 +0200
Subject: mx31moboard: setup all pins (v2)

As suggested by Sascha, here we setup all the pins that we are using in
the current (and currently reviewed) drivers for mx31moboard system.

changes since v1: changed I2C1 pin names

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31moboard-devboard.c b/arch/arm/mach-mx3/mx31moboard-devboard.c
index 718702f..59b3a74 100644
--- a/arch/arm/mach-mx3/mx31moboard-devboard.c
+++ b/arch/arm/mach-mx3/mx31moboard-devboard.c
@@ -31,6 +31,11 @@ static unsigned int devboard_pins[] = {
 	/* UART1 */
 	MX31_PIN_CTS2__CTS2, MX31_PIN_RTS2__RTS2,
 	MX31_PIN_TXD2__TXD2, MX31_PIN_RXD2__RXD2,
+	/* SDHC2 */
+	MX31_PIN_PC_PWRON__SD2_DATA3, MX31_PIN_PC_VS1__SD2_DATA2,
+	MX31_PIN_PC_READY__SD2_DATA1, MX31_PIN_PC_WAIT_B__SD2_DATA0,
+	MX31_PIN_PC_CD2_B__SD2_CLK, MX31_PIN_PC_CD1_B__SD2_CMD,
+	MX31_PIN_ATA_DIOR__GPIO3_28, MX31_PIN_ATA_DIOW__GPIO3_29,
 };
 
 static struct imxuart_platform_data uart_pdata = {
diff --git a/arch/arm/mach-mx3/mx31moboard-marxbot.c b/arch/arm/mach-mx3/mx31moboard-marxbot.c
index f5ea114..daeb1e9 100644
--- a/arch/arm/mach-mx3/mx31moboard-marxbot.c
+++ b/arch/arm/mach-mx3/mx31moboard-marxbot.c
@@ -27,10 +27,32 @@
 
 #include "devices.h"
 
+static unsigned int marxbot_pins[] = {
+	/* SDHC2 */
+	MX31_PIN_PC_PWRON__SD2_DATA3, MX31_PIN_PC_VS1__SD2_DATA2,
+	MX31_PIN_PC_READY__SD2_DATA1, MX31_PIN_PC_WAIT_B__SD2_DATA0,
+	MX31_PIN_PC_CD2_B__SD2_CLK, MX31_PIN_PC_CD1_B__SD2_CMD,
+	MX31_PIN_ATA_DIOR__GPIO3_28, MX31_PIN_ATA_DIOW__GPIO3_29,
+	/* CSI */
+	MX31_PIN_CSI_D4__CSI_D4, MX31_PIN_CSI_D5__CSI_D5,
+	MX31_PIN_CSI_D6__CSI_D6, MX31_PIN_CSI_D7__CSI_D7,
+	MX31_PIN_CSI_D8__CSI_D8, MX31_PIN_CSI_D9__CSI_D9,
+	MX31_PIN_CSI_D10__CSI_D10, MX31_PIN_CSI_D11__CSI_D11,
+	MX31_PIN_CSI_D12__CSI_D12, MX31_PIN_CSI_D13__CSI_D13,
+	MX31_PIN_CSI_D14__CSI_D14, MX31_PIN_CSI_D15__CSI_D15,
+	MX31_PIN_CSI_HSYNC__CSI_HSYNC, MX31_PIN_CSI_MCLK__CSI_MCLK,
+	MX31_PIN_CSI_PIXCLK__CSI_PIXCLK, MX31_PIN_CSI_VSYNC__CSI_VSYNC,
+	MX31_PIN_GPIO3_0__GPIO3_0, MX31_PIN_GPIO3_1__GPIO3_1,
+	MX31_PIN_TXD2__GPIO1_28,
+};
+
 /*
  * system init for baseboard usage. Will be called by mx31moboard init.
  */
 void __init mx31moboard_marxbot_init(void)
 {
 	printk(KERN_INFO "Initializing mx31marxbot peripherals\n");
+
+	mxc_iomux_setup_multiple_pins(marxbot_pins, ARRAY_SIZE(marxbot_pins),
+		"marxbot");
 }
diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index 623df17..60a5145 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -42,6 +42,27 @@ static unsigned int moboard_pins[] = {
 	/* UART4 */
 	MX31_PIN_PC_RST__CTS5, MX31_PIN_PC_VS2__RTS5,
 	MX31_PIN_PC_BVD2__TXD5, MX31_PIN_PC_BVD1__RXD5,
+	/* I2C0 */
+	MX31_PIN_I2C_DAT__I2C1_SDA, MX31_PIN_I2C_CLK__I2C1_SCL,
+	/* I2C1 */
+	MX31_PIN_DCD_DTE1__I2C2_SDA, MX31_PIN_RI_DTE1__I2C2_SCL,
+	/* SDHC1 */
+	MX31_PIN_SD1_DATA3__SD1_DATA3, MX31_PIN_SD1_DATA2__SD1_DATA2,
+	MX31_PIN_SD1_DATA1__SD1_DATA1, MX31_PIN_SD1_DATA0__SD1_DATA0,
+	MX31_PIN_SD1_CLK__SD1_CLK, MX31_PIN_SD1_CMD__SD1_CMD,
+	MX31_PIN_ATA_CS0__GPIO3_26, MX31_PIN_ATA_CS1__GPIO3_27,
+	/* USB OTG */
+	MX31_PIN_USBOTG_DATA0__USBOTG_DATA0,
+	MX31_PIN_USBOTG_DATA1__USBOTG_DATA1,
+	MX31_PIN_USBOTG_DATA2__USBOTG_DATA2,
+	MX31_PIN_USBOTG_DATA3__USBOTG_DATA3,
+	MX31_PIN_USBOTG_DATA4__USBOTG_DATA4,
+	MX31_PIN_USBOTG_DATA5__USBOTG_DATA5,
+	MX31_PIN_USBOTG_DATA6__USBOTG_DATA6,
+	MX31_PIN_USBOTG_DATA7__USBOTG_DATA7,
+	MX31_PIN_USBOTG_CLK__USBOTG_CLK, MX31_PIN_USBOTG_DIR__USBOTG_DIR,
+	MX31_PIN_USBOTG_NXT__USBOTG_NXT, MX31_PIN_USBOTG_STP__USBOTG_STP,
+	MX31_PIN_USB_OC__GPIO1_30,
 };
 
 static struct physmap_flash_data mx31moboard_flash_data = {
-- 
cgit v0.10.2


From 4ec6ecc77872a57b471b60dce298ed25556944f0 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Tue, 21 Apr 2009 10:24:22 +0200
Subject: mx31moboard: add i2c support (v2)

Changes since v1: all the pins needed for the drivers are claimed in
another patch

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index 60a5145..d846527 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -32,6 +32,7 @@
 #include <mach/hardware.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
+#include <mach/i2c.h>
 
 #include "devices.h"
 
@@ -89,6 +90,14 @@ static struct imxuart_platform_data uart_pdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
+static struct imxi2c_platform_data moboard_i2c0_pdata = {
+	.bitrate = 400000,
+};
+
+static struct imxi2c_platform_data moboard_i2c1_pdata = {
+	.bitrate = 100000,
+};
+
 static struct platform_device *devices[] __initdata = {
 	&mx31moboard_flash,
 };
@@ -109,6 +118,9 @@ static void __init mxc_board_init(void)
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
 	mxc_register_device(&mxc_uart_device4, &uart_pdata);
 
+	mxc_register_device(&mxc_i2c_device0, &moboard_i2c0_pdata);
+	mxc_register_device(&mxc_i2c_device1, &moboard_i2c1_pdata);
+
 	switch (mx31moboard_baseboard) {
 	case MX31NOBOARD:
 		break;
-- 
cgit v0.10.2


From 45b131a78ceeb3bc776db08c8eaa67d4676b6cd7 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Tue, 21 Apr 2009 10:24:56 +0200
Subject: mx31moboard: add sdhc support (v3)

This support both sdhc1 and sdhc2 with WP and CD for the marxbot and
devboard mx31moboard baseboards.

sdhc2 is present on both, but is not directly included in the
mx31moboard file because a third baseboard (not supported yet) without
sdhc2 is planned.

Changes since v1: removed pin initialization from init/exit function and
taken different comments into account

Changes since v2: pin initialiation now is done in another patch for all
current mx31moboard pins

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31moboard-devboard.c b/arch/arm/mach-mx3/mx31moboard-devboard.c
index 59b3a74..0a69a4f 100644
--- a/arch/arm/mach-mx3/mx31moboard-devboard.c
+++ b/arch/arm/mach-mx3/mx31moboard-devboard.c
@@ -16,7 +16,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/gpio.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
@@ -24,6 +26,7 @@
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
 #include <mach/hardware.h>
+#include <mach/mmc.h>
 
 #include "devices.h"
 
@@ -42,6 +45,33 @@ static struct imxuart_platform_data uart_pdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
+#define SDHC2_CD IOMUX_TO_GPIO(MX31_PIN_ATA_DIOR)
+#define SDHC2_WP IOMUX_TO_GPIO(MX31_PIN_ATA_DIOW)
+
+static int devboard_sdhc2_get_ro(struct device *dev)
+{
+	return gpio_get_value(SDHC2_WP);
+}
+
+static int devboard_sdhc2_init(struct device *dev, irq_handler_t detect_irq,
+		void *data)
+{
+	return request_irq(gpio_to_irq(SDHC2_CD), detect_irq,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		"sdhc2-card-detect", data);
+}
+
+static void devboard_sdhc2_exit(struct device *dev, void *data)
+{
+	free_irq(gpio_to_irq(SDHC2_CD), data);
+}
+
+static struct imxmmc_platform_data sdhc2_pdata = {
+	.get_ro	= devboard_sdhc2_get_ro,
+	.init	= devboard_sdhc2_init,
+	.exit	= devboard_sdhc2_exit,
+};
+
 /*
  * system init for baseboard usage. Will be called by mx31moboard init.
  */
@@ -53,4 +83,6 @@ void __init mx31moboard_devboard_init(void)
 		"devboard");
 
 	mxc_register_device(&mxc_uart_device1, &uart_pdata);
+
+	mxc_register_device(&mxcsdhc_device1, &sdhc2_pdata);
 }
diff --git a/arch/arm/mach-mx3/mx31moboard-marxbot.c b/arch/arm/mach-mx3/mx31moboard-marxbot.c
index daeb1e9..4fc2711 100644
--- a/arch/arm/mach-mx3/mx31moboard-marxbot.c
+++ b/arch/arm/mach-mx3/mx31moboard-marxbot.c
@@ -16,7 +16,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/gpio.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
@@ -24,6 +26,7 @@
 #include <mach/hardware.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
+#include <mach/mmc.h>
 
 #include "devices.h"
 
@@ -46,6 +49,33 @@ static unsigned int marxbot_pins[] = {
 	MX31_PIN_TXD2__GPIO1_28,
 };
 
+#define SDHC2_CD IOMUX_TO_GPIO(MX31_PIN_ATA_DIOR)
+#define SDHC2_WP IOMUX_TO_GPIO(MX31_PIN_ATA_DIOW)
+
+static int marxbot_sdhc2_get_ro(struct device *dev)
+{
+	return gpio_get_value(SDHC2_WP);
+}
+
+static int marxbot_sdhc2_init(struct device *dev, irq_handler_t detect_irq,
+		void *data)
+{
+	return request_irq(gpio_to_irq(SDHC2_CD), detect_irq,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		"sdhc2-card-detect", data);
+}
+
+static void marxbot_sdhc2_exit(struct device *dev, void *data)
+{
+	free_irq(gpio_to_irq(SDHC2_CD), data);
+}
+
+static struct imxmmc_platform_data sdhc2_pdata = {
+	.get_ro	= marxbot_sdhc2_get_ro,
+	.init	= marxbot_sdhc2_init,
+	.exit	= marxbot_sdhc2_exit,
+};
+
 /*
  * system init for baseboard usage. Will be called by mx31moboard init.
  */
@@ -55,4 +85,6 @@ void __init mx31moboard_marxbot_init(void)
 
 	mxc_iomux_setup_multiple_pins(marxbot_pins, ARRAY_SIZE(marxbot_pins),
 		"marxbot");
+
+	mxc_register_device(&mxcsdhc_device1, &sdhc2_pdata);
 }
diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index d846527..2cca3f2 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -16,7 +16,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/gpio.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/memory.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
@@ -33,6 +35,7 @@
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
 #include <mach/i2c.h>
+#include <mach/mmc.h>
 
 #include "devices.h"
 
@@ -98,6 +101,33 @@ static struct imxi2c_platform_data moboard_i2c1_pdata = {
 	.bitrate = 100000,
 };
 
+#define SDHC1_CD IOMUX_TO_GPIO(MX31_PIN_ATA_CS0)
+#define SDHC1_WP IOMUX_TO_GPIO(MX31_PIN_ATA_CS1)
+
+static int moboard_sdhc1_get_ro(struct device *dev)
+{
+	return gpio_get_value(SDHC1_WP);
+}
+
+static int moboard_sdhc1_init(struct device *dev, irq_handler_t detect_irq,
+		void *data)
+{
+	return request_irq(gpio_to_irq(SDHC1_CD), detect_irq,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		"sdhc1-card-detect", data);
+}
+
+static void moboard_sdhc1_exit(struct device *dev, void *data)
+{
+	free_irq(gpio_to_irq(SDHC1_CD), data);
+}
+
+static struct imxmmc_platform_data sdhc1_pdata = {
+	.get_ro	= moboard_sdhc1_get_ro,
+	.init	= moboard_sdhc1_init,
+	.exit	= moboard_sdhc1_exit,
+};
+
 static struct platform_device *devices[] __initdata = {
 	&mx31moboard_flash,
 };
@@ -121,6 +151,8 @@ static void __init mxc_board_init(void)
 	mxc_register_device(&mxc_i2c_device0, &moboard_i2c0_pdata);
 	mxc_register_device(&mxc_i2c_device1, &moboard_i2c1_pdata);
 
+	mxc_register_device(&mxcsdhc_device0, &sdhc1_pdata);
+
 	switch (mx31moboard_baseboard) {
 	case MX31NOBOARD:
 		break;
-- 
cgit v0.10.2


From a854b8ab987b29fd3b1ae286f41f04973f9edbac Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Tue, 14 Apr 2009 22:00:03 +0200
Subject: i.MX31: Add UART1 device to Litekit board.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lite.c b/arch/arm/mach-mx3/mx31lite.c
index 76b36da..abe6ca3 100644
--- a/arch/arm/mach-mx3/mx31lite.c
+++ b/arch/arm/mach-mx3/mx31lite.c
@@ -32,11 +32,26 @@
 #include <asm/page.h>
 #include <asm/setup.h>
 #include <mach/board-mx31lite.h>
+#include <mach/imx-uart.h>
+#include <mach/iomux-mx3.h>
+#include "devices.h"
 
 /*
  * This file contains the board-specific initialization routines.
  */
 
+static unsigned int mx31lite_pins[] = {
+	/* UART1 */
+	MX31_PIN_CTS1__CTS1,
+	MX31_PIN_RTS1__RTS1,
+	MX31_PIN_TXD1__TXD1,
+	MX31_PIN_RXD1__RXD1,
+};
+
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
 /*
  * This structure defines the MX31 memory map.
  */
@@ -68,6 +83,10 @@ void __init mx31lite_map_io(void)
  */
 static void __init mxc_board_init(void)
 {
+	mxc_iomux_setup_multiple_pins(mx31lite_pins, ARRAY_SIZE(mx31lite_pins),
+				      "mx31lite");
+
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
 }
 
 static void __init mx31lite_timer_init(void)
-- 
cgit v0.10.2


From 3b7b8db1226fd2abf70f252b78f32198ef93c216 Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Tue, 14 Apr 2009 22:00:05 +0200
Subject: i.MX31: Removed unused items from board-mx31lite.h.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/include/mach/board-mx31lite.h b/arch/arm/plat-mxc/include/mach/board-mx31lite.h
index e4e5cf5..52fbdf2 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31lite.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31lite.h
@@ -11,28 +11,8 @@
 #ifndef __ASM_ARCH_MXC_BOARD_MX31LITE_H__
 #define __ASM_ARCH_MXC_BOARD_MX31LITE_H__
 
-#define MXC_MAX_EXP_IO_LINES	16
-
-
-/*
- * Memory Size parameters
- */
-
-/*
- * Size of SDRAM memory
- */
-#define SDRAM_MEM_SIZE		SZ_128M
-/*
- * Size of MBX buffer memory
- */
-#define MXC_MBX_MEM_SIZE	SZ_16M
-/*
- * Size of memory available to kernel
- */
-#define MEM_SIZE		(SDRAM_MEM_SIZE - MXC_MBX_MEM_SIZE)
-
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
 
-#endif /* __ASM_ARCH_MXC_BOARD_MX31ADS_H__ */
+#endif /* __ASM_ARCH_MXC_BOARD_MX31LITE_H__ */
 
-- 
cgit v0.10.2


From c1332616315584d45776a5e664382888513c0fa0 Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Tue, 14 Apr 2009 22:00:06 +0200
Subject: i.MX31: Correct comments on CONFIG_DEBUG_LL.

Several comments in board config files stated "mandatory for
CONFIG_LL_DEBUG" but the correct name is CONFIG_DEBUG_LL.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/include/mach/board-mx31ads.h b/arch/arm/plat-mxc/include/mach/board-mx31ads.h
index 318c72a..06e6895 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31ads.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31ads.h
@@ -114,7 +114,7 @@
 
 #define MXC_MAX_EXP_IO_LINES	16
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
diff --git a/arch/arm/plat-mxc/include/mach/board-mx31moboard.h b/arch/arm/plat-mxc/include/mach/board-mx31moboard.h
index f8aef1b..303fd24 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31moboard.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31moboard.h
@@ -19,7 +19,7 @@
 #ifndef __ASM_ARCH_MXC_BOARD_MX31MOBOARD_H__
 #define __ASM_ARCH_MXC_BOARD_MX31MOBOARD_H__
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	(AIPI_BASE_ADDR_VIRT + 0x0A000)
diff --git a/arch/arm/plat-mxc/include/mach/board-mx31pdk.h b/arch/arm/plat-mxc/include/mach/board-mx31pdk.h
index 2b6b316..b1e6463 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31pdk.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31pdk.h
@@ -11,7 +11,7 @@
 #ifndef __ASM_ARCH_MXC_BOARD_MX31PDK_H__
 #define __ASM_ARCH_MXC_BOARD_MX31PDK_H__
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
diff --git a/arch/arm/plat-mxc/include/mach/board-pcm037.h b/arch/arm/plat-mxc/include/mach/board-pcm037.h
index 82232ba..f0a1fa1 100644
--- a/arch/arm/plat-mxc/include/mach/board-pcm037.h
+++ b/arch/arm/plat-mxc/include/mach/board-pcm037.h
@@ -19,7 +19,7 @@
 #ifndef __ASM_ARCH_MXC_BOARD_PCM037_H__
 #define __ASM_ARCH_MXC_BOARD_PCM037_H__
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
diff --git a/arch/arm/plat-mxc/include/mach/board-pcm038.h b/arch/arm/plat-mxc/include/mach/board-pcm038.h
index 750c62a..4fcd749 100644
--- a/arch/arm/plat-mxc/include/mach/board-pcm038.h
+++ b/arch/arm/plat-mxc/include/mach/board-pcm038.h
@@ -19,7 +19,7 @@
 #ifndef __ASM_ARCH_MXC_BOARD_PCM038_H__
 #define __ASM_ARCH_MXC_BOARD_PCM038_H__
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	(AIPI_BASE_ADDR_VIRT + 0x0A000)
diff --git a/arch/arm/plat-mxc/include/mach/board-qong.h b/arch/arm/plat-mxc/include/mach/board-qong.h
index 4ff762d..04033ec 100644
--- a/arch/arm/plat-mxc/include/mach/board-qong.h
+++ b/arch/arm/plat-mxc/include/mach/board-qong.h
@@ -11,7 +11,7 @@
 #ifndef __ASM_ARCH_MXC_BOARD_QONG_H__
 #define __ASM_ARCH_MXC_BOARD_QONG_H__
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
diff --git a/arch/arm/plat-mxc/include/mach/mx1.h b/arch/arm/plat-mxc/include/mach/mx1.h
index b92e023..1000bf3 100644
--- a/arch/arm/plat-mxc/include/mach/mx1.h
+++ b/arch/arm/plat-mxc/include/mach/mx1.h
@@ -179,7 +179,7 @@
 #define DMA_REQ_UART1_T		30
 #define DMA_REQ_UART1_R		31
 
-/* mandatory for CONFIG_LL_DEBUG */
+/* mandatory for CONFIG_DEBUG_LL */
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	IO_ADDRESS(UART1_BASE_ADDR)
 
-- 
cgit v0.10.2


From 3211705f76cccd52cd3f3c2744aef798bf705702 Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Tue, 14 Apr 2009 22:00:07 +0200
Subject: i.MX31: Add ethernet support to i.MX31 Litekit board.

This patch adds the SMSC platform device to the Litekit board.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lite.c b/arch/arm/mach-mx3/mx31lite.c
index abe6ca3..74734e5 100644
--- a/arch/arm/mach-mx3/mx31lite.c
+++ b/arch/arm/mach-mx3/mx31lite.c
@@ -22,6 +22,9 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/memory.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/smsc911x.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -34,6 +37,7 @@
 #include <mach/board-mx31lite.h>
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
+#include <mach/irqs.h>
 #include "devices.h"
 
 /*
@@ -46,12 +50,43 @@ static unsigned int mx31lite_pins[] = {
 	MX31_PIN_RTS1__RTS1,
 	MX31_PIN_TXD1__TXD1,
 	MX31_PIN_RXD1__RXD1,
+	/* LAN9117 IRQ pin */
+	IOMUX_MODE(MX31_PIN_SFS6, IOMUX_CONFIG_GPIO),
 };
 
 static struct imxuart_platform_data uart_pdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
+static struct smsc911x_platform_config smsc911x_config = {
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
+	.flags		= SMSC911X_USE_16BIT,
+};
+
+static struct resource smsc911x_resources[] = {
+	[0] = {
+		.start		= CS4_BASE_ADDR,
+		.end		= CS4_BASE_ADDR + 0x100,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= IOMUX_TO_IRQ(MX31_PIN_SFS6),
+		.end		= IOMUX_TO_IRQ(MX31_PIN_SFS6),
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device smsc911x_device = {
+	.name		= "smsc911x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(smsc911x_resources),
+	.resource	= smsc911x_resources,
+	.dev		= {
+		.platform_data = &smsc911x_config,
+	},
+};
+
 /*
  * This structure defines the MX31 memory map.
  */
@@ -87,6 +122,10 @@ static void __init mxc_board_init(void)
 				      "mx31lite");
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+
+	/* SMSC9117 IRQ pin */
+	gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_SFS6));
+	platform_device_register(&smsc911x_device);
 }
 
 static void __init mx31lite_timer_init(void)
-- 
cgit v0.10.2


From c1a6f12383c79b28dd7c40a4e7293c80db265f59 Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Wed, 29 Apr 2009 04:00:48 +0400
Subject: MX27ADS: setup all pins at once in one table

Setup all pins at once in one table

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index ab389c2..13accbe 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -36,148 +36,38 @@
 
 #include "devices.h"
 
-/* ADS's NOR flash */
-static struct physmap_flash_data mx27ads_flash_data = {
-	.width = 2,
-};
-
-static struct resource mx27ads_flash_resource = {
-	.start = 0xc0000000,
-	.end = 0xc0000000 + 0x02000000 - 1,
-	.flags = IORESOURCE_MEM,
-
-};
-
-static struct platform_device mx27ads_nor_mtd_device = {
-	.name = "physmap-flash",
-	.id = 0,
-	.dev = {
-		.platform_data = &mx27ads_flash_data,
-	},
-	.num_resources = 1,
-	.resource = &mx27ads_flash_resource,
-};
-
-static int mxc_uart0_pins[] = {
+static unsigned int mx27ads_pins[] = {
+	/* UART0 */
 	PE12_PF_UART1_TXD,
 	PE13_PF_UART1_RXD,
 	PE14_PF_UART1_CTS,
-	PE15_PF_UART1_RTS
-};
-
-static int uart_mxc_port0_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart0_pins,
-			ARRAY_SIZE(mxc_uart0_pins), "UART0");
-}
-
-static void uart_mxc_port0_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart0_pins,
-			ARRAY_SIZE(mxc_uart0_pins));
-}
-
-static int mxc_uart1_pins[] = {
+	PE15_PF_UART1_RTS,
+	/* UART1 */
 	PE3_PF_UART2_CTS,
 	PE4_PF_UART2_RTS,
 	PE6_PF_UART2_TXD,
-	PE7_PF_UART2_RXD
-};
-
-static int uart_mxc_port1_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart1_pins,
-			ARRAY_SIZE(mxc_uart1_pins), "UART1");
-}
-
-static void uart_mxc_port1_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart1_pins,
-			ARRAY_SIZE(mxc_uart1_pins));
-}
-
-static int mxc_uart2_pins[] = {
+	PE7_PF_UART2_RXD,
+	/* UART2 */
 	PE8_PF_UART3_TXD,
 	PE9_PF_UART3_RXD,
 	PE10_PF_UART3_CTS,
-	PE11_PF_UART3_RTS
-};
-
-static int uart_mxc_port2_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart2_pins,
-			ARRAY_SIZE(mxc_uart2_pins), "UART2");
-}
-
-static void uart_mxc_port2_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart2_pins,
-			ARRAY_SIZE(mxc_uart2_pins));
-}
-
-static int mxc_uart3_pins[] = {
+	PE11_PF_UART3_RTS,
+	/* UART3 */
 	PB26_AF_UART4_RTS,
 	PB28_AF_UART4_TXD,
 	PB29_AF_UART4_CTS,
-	PB31_AF_UART4_RXD
-};
-
-static int uart_mxc_port3_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart3_pins,
-			ARRAY_SIZE(mxc_uart3_pins), "UART3");
-}
-
-static void uart_mxc_port3_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart3_pins,
-			ARRAY_SIZE(mxc_uart3_pins));
-}
-
-static int mxc_uart4_pins[] = {
+	PB31_AF_UART4_RXD,
+	/* UART4 */
 	PB18_AF_UART5_TXD,
 	PB19_AF_UART5_RXD,
 	PB20_AF_UART5_CTS,
-	PB21_AF_UART5_RTS
-};
-
-static int uart_mxc_port4_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart4_pins,
-			ARRAY_SIZE(mxc_uart4_pins), "UART4");
-}
-
-static void uart_mxc_port4_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart4_pins,
-			ARRAY_SIZE(mxc_uart4_pins));
-}
-
-static int mxc_uart5_pins[] = {
+	PB21_AF_UART5_RTS,
+	/* UART5 */
 	PB10_AF_UART6_TXD,
 	PB12_AF_UART6_CTS,
 	PB11_AF_UART6_RXD,
-	PB13_AF_UART6_RTS
-};
-
-static int uart_mxc_port5_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart5_pins,
-			ARRAY_SIZE(mxc_uart5_pins), "UART5");
-}
-
-static void uart_mxc_port5_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart5_pins,
-			ARRAY_SIZE(mxc_uart5_pins));
-}
-
-static struct platform_device *platform_devices[] __initdata = {
-	&mx27ads_nor_mtd_device,
-	&mxc_fec_device,
-};
-
-static int mxc_fec_pins[] = {
+	PB13_AF_UART6_RTS,
+	/* FEC */
 	PD0_AIN_FEC_TXD0,
 	PD1_AIN_FEC_TXD1,
 	PD2_AIN_FEC_TXD2,
@@ -195,46 +85,56 @@ static int mxc_fec_pins[] = {
 	PD14_AOUT_FEC_RX_CLK,
 	PD15_AOUT_FEC_COL,
 	PD16_AIN_FEC_TX_ER,
-	PF23_AIN_FEC_TX_EN
+	PF23_AIN_FEC_TX_EN,
 };
 
-static void gpio_fec_active(void)
-{
-	mxc_gpio_setup_multiple_pins(mxc_fec_pins,
-			ARRAY_SIZE(mxc_fec_pins), "FEC");
-}
+/* ADS's NOR flash */
+static struct physmap_flash_data mx27ads_flash_data = {
+	.width = 2,
+};
+
+static struct resource mx27ads_flash_resource = {
+	.start = 0xc0000000,
+	.end = 0xc0000000 + 0x02000000 - 1,
+	.flags = IORESOURCE_MEM,
+
+};
+
+static struct platform_device mx27ads_nor_mtd_device = {
+	.name = "physmap-flash",
+	.id = 0,
+	.dev = {
+		.platform_data = &mx27ads_flash_data,
+	},
+	.num_resources = 1,
+	.resource = &mx27ads_flash_resource,
+};
+
+static struct platform_device *platform_devices[] __initdata = {
+	&mx27ads_nor_mtd_device,
+	&mxc_fec_device,
+};
 
 static struct imxuart_platform_data uart_pdata[] = {
 	{
-		.init = uart_mxc_port0_init,
-		.exit = uart_mxc_port0_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port1_init,
-		.exit = uart_mxc_port1_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port2_init,
-		.exit = uart_mxc_port2_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port3_init,
-		.exit = uart_mxc_port3_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port4_init,
-		.exit = uart_mxc_port4_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port5_init,
-		.exit = uart_mxc_port5_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	},
 };
 
 static void __init mx27ads_board_init(void)
 {
-	gpio_fec_active();
+	mxc_gpio_setup_multiple_pins(mx27ads_pins, ARRAY_SIZE(mx27ads_pins),
+			"mx27ads");
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata[0]);
 	mxc_register_device(&mxc_uart_device1, &uart_pdata[1]);
-- 
cgit v0.10.2


From 8d4fd258f9b0bee1ef5f1508bfed4d138a252891 Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Wed, 29 Apr 2009 04:00:49 +0400
Subject: MX27ADS: Add NAND support

Add NAND support for MX27ADS board

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index 13accbe..ee8f3d9 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -33,6 +33,7 @@
 #include <mach/imx-uart.h>
 #include <mach/iomux.h>
 #include <mach/board-mx27ads.h>
+#include <mach/mxc_nand.h>
 
 #include "devices.h"
 
@@ -88,6 +89,11 @@ static unsigned int mx27ads_pins[] = {
 	PF23_AIN_FEC_TX_EN,
 };
 
+static struct mxc_nand_platform_data mx27ads_nand_board_info = {
+	.width = 1,
+	.hw_ecc = 1,
+};
+
 /* ADS's NOR flash */
 static struct physmap_flash_data mx27ads_flash_data = {
 	.width = 2,
@@ -142,6 +148,7 @@ static void __init mx27ads_board_init(void)
 	mxc_register_device(&mxc_uart_device3, &uart_pdata[3]);
 	mxc_register_device(&mxc_uart_device4, &uart_pdata[4]);
 	mxc_register_device(&mxc_uart_device5, &uart_pdata[5]);
+	mxc_register_device(&mxc_nand_device, &mx27ads_nand_board_info);
 
 	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
 }
-- 
cgit v0.10.2


From c981214ac4a304d784a290b3714cc73a39589af0 Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Wed, 29 Apr 2009 04:00:50 +0400
Subject: MX27ADS: Add I2C support

Add I2C support for MX27ADS board

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index ee8f3d9..bf13145 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -23,6 +23,7 @@
 #include <linux/mtd/map.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
+#include <linux/i2c.h>
 #include <mach/common.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -34,6 +35,7 @@
 #include <mach/iomux.h>
 #include <mach/board-mx27ads.h>
 #include <mach/mxc_nand.h>
+#include <mach/i2c.h>
 
 #include "devices.h"
 
@@ -87,6 +89,9 @@ static unsigned int mx27ads_pins[] = {
 	PD15_AOUT_FEC_COL,
 	PD16_AIN_FEC_TX_ER,
 	PF23_AIN_FEC_TX_EN,
+	/* I2C2 */
+	PC5_PF_I2C2_SDA,
+	PC6_PF_I2C2_SCL,
 };
 
 static struct mxc_nand_platform_data mx27ads_nand_board_info = {
@@ -116,6 +121,13 @@ static struct platform_device mx27ads_nor_mtd_device = {
 	.resource = &mx27ads_flash_resource,
 };
 
+static struct imxi2c_platform_data mx27ads_i2c_data = {
+	.bitrate = 100000,
+};
+
+static struct i2c_board_info mx27ads_i2c_devices[] = {
+};
+
 static struct platform_device *platform_devices[] __initdata = {
 	&mx27ads_nor_mtd_device,
 	&mxc_fec_device,
@@ -150,6 +162,11 @@ static void __init mx27ads_board_init(void)
 	mxc_register_device(&mxc_uart_device5, &uart_pdata[5]);
 	mxc_register_device(&mxc_nand_device, &mx27ads_nand_board_info);
 
+	/* only the i2c master 1 is used on this CPU card */
+	i2c_register_board_info(1, mx27ads_i2c_devices,
+				ARRAY_SIZE(mx27ads_i2c_devices));
+	mxc_register_device(&mxc_i2c_device1, &mx27ads_i2c_data);
+
 	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
 }
 
-- 
cgit v0.10.2


From 11cda13d48a29ff81fe7bc2e0b767e164991c094 Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Wed, 29 Apr 2009 04:00:51 +0400
Subject: MX27ADS: Add framebuffer support

Add Sharp-QVGA panel support for MX27ADS board

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index bf13145..41d67f2 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -36,6 +36,7 @@
 #include <mach/board-mx27ads.h>
 #include <mach/mxc_nand.h>
 #include <mach/i2c.h>
+#include <mach/imxfb.h>
 
 #include "devices.h"
 
@@ -92,6 +93,34 @@ static unsigned int mx27ads_pins[] = {
 	/* I2C2 */
 	PC5_PF_I2C2_SDA,
 	PC6_PF_I2C2_SCL,
+	/* FB */
+	PA5_PF_LSCLK,
+	PA6_PF_LD0,
+	PA7_PF_LD1,
+	PA8_PF_LD2,
+	PA9_PF_LD3,
+	PA10_PF_LD4,
+	PA11_PF_LD5,
+	PA12_PF_LD6,
+	PA13_PF_LD7,
+	PA14_PF_LD8,
+	PA15_PF_LD9,
+	PA16_PF_LD10,
+	PA17_PF_LD11,
+	PA18_PF_LD12,
+	PA19_PF_LD13,
+	PA20_PF_LD14,
+	PA21_PF_LD15,
+	PA22_PF_LD16,
+	PA23_PF_LD17,
+	PA24_PF_REV,
+	PA25_PF_CLS,
+	PA26_PF_PS,
+	PA27_PF_SPL_SPR,
+	PA28_PF_HSYNC,
+	PA29_PF_VSYNC,
+	PA30_PF_CONTRAST,
+	PA31_PF_OE_ACD,
 };
 
 static struct mxc_nand_platform_data mx27ads_nand_board_info = {
@@ -128,6 +157,46 @@ static struct imxi2c_platform_data mx27ads_i2c_data = {
 static struct i2c_board_info mx27ads_i2c_devices[] = {
 };
 
+void lcd_power(int on)
+{
+	if (on)
+		__raw_writew(PBC_BCTRL1_LCDON, PBC_BCTRL1_SET_REG);
+	else
+		__raw_writew(PBC_BCTRL1_LCDON, PBC_BCTRL1_CLEAR_REG);
+}
+
+static struct imx_fb_platform_data mx27ads_fb_data = {
+	.pixclock	= 188679,
+	.xres		= 240,
+	.yres		= 320,
+
+	.bpp		= 16,
+	.hsync_len	= 1,
+	.left_margin	= 9,
+	.right_margin	= 16,
+
+	.vsync_len	= 1,
+	.upper_margin	= 7,
+	.lower_margin	= 9,
+	.fixed_screen_cpu = 0,
+
+	/*
+	 * - HSYNC active high
+	 * - VSYNC active high
+	 * - clk notenabled while idle
+	 * - clock inverted
+	 * - data not inverted
+	 * - data enable low active
+	 * - enable sharp mode
+	 */
+	.pcr		= 0xFB008BC0,
+	.pwmr		= 0x00A903FF,
+	.lscr1		= 0x00120300,
+	.dmacr		= 0x00020010,
+
+	.lcd_power	= lcd_power,
+};
+
 static struct platform_device *platform_devices[] __initdata = {
 	&mx27ads_nor_mtd_device,
 	&mxc_fec_device,
@@ -166,6 +235,7 @@ static void __init mx27ads_board_init(void)
 	i2c_register_board_info(1, mx27ads_i2c_devices,
 				ARRAY_SIZE(mx27ads_i2c_devices));
 	mxc_register_device(&mxc_i2c_device1, &mx27ads_i2c_data);
+	mxc_register_device(&mxc_fb_device, &mx27ads_fb_data);
 
 	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
 }
-- 
cgit v0.10.2


From 9366d8f67c234815e99bc8518a92da7f0fa3e026 Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Wed, 29 Apr 2009 04:00:52 +0400
Subject: MX27ADS: Add 1-wire support

Add 1-wire support for MX27ADS board

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index 41d67f2..90268a5 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -121,6 +121,8 @@ static unsigned int mx27ads_pins[] = {
 	PA29_PF_VSYNC,
 	PA30_PF_CONTRAST,
 	PA31_PF_OE_ACD,
+	/* OWIRE */
+	PE16_AF_OWIRE,
 };
 
 static struct mxc_nand_platform_data mx27ads_nand_board_info = {
@@ -200,6 +202,7 @@ static struct imx_fb_platform_data mx27ads_fb_data = {
 static struct platform_device *platform_devices[] __initdata = {
 	&mx27ads_nor_mtd_device,
 	&mxc_fec_device,
+	&mxc_w1_master_device,
 };
 
 static struct imxuart_platform_data uart_pdata[] = {
-- 
cgit v0.10.2


From 60c24dc79f01edfaa14290ada39a9074050ffbcc Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Thu, 30 Apr 2009 15:31:20 +0400
Subject: MX27ADS: Add SDHC support

Add SDHC support for MX27ADS board

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27ads.c b/arch/arm/mach-mx2/mx27ads.c
index 90268a5..02dadda 100644
--- a/arch/arm/mach-mx2/mx27ads.c
+++ b/arch/arm/mach-mx2/mx27ads.c
@@ -24,6 +24,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
 #include <mach/common.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -37,6 +38,7 @@
 #include <mach/mxc_nand.h>
 #include <mach/i2c.h>
 #include <mach/imxfb.h>
+#include <mach/mmc.h>
 
 #include "devices.h"
 
@@ -123,6 +125,20 @@ static unsigned int mx27ads_pins[] = {
 	PA31_PF_OE_ACD,
 	/* OWIRE */
 	PE16_AF_OWIRE,
+	/* SDHC1*/
+	PE18_PF_SD1_D0,
+	PE19_PF_SD1_D1,
+	PE20_PF_SD1_D2,
+	PE21_PF_SD1_D3,
+	PE22_PF_SD1_CMD,
+	PE23_PF_SD1_CLK,
+	/* SDHC2*/
+	PB4_PF_SD2_D0,
+	PB5_PF_SD2_D1,
+	PB6_PF_SD2_D2,
+	PB7_PF_SD2_D3,
+	PB8_PF_SD2_CMD,
+	PB9_PF_SD2_CLK,
 };
 
 static struct mxc_nand_platform_data mx27ads_nand_board_info = {
@@ -199,6 +215,40 @@ static struct imx_fb_platform_data mx27ads_fb_data = {
 	.lcd_power	= lcd_power,
 };
 
+static int mx27ads_sdhc1_init(struct device *dev, irq_handler_t detect_irq,
+			      void *data)
+{
+	return request_irq(IRQ_GPIOE(21), detect_irq, IRQF_TRIGGER_RISING,
+			   "sdhc1-card-detect", data);
+}
+
+static int mx27ads_sdhc2_init(struct device *dev, irq_handler_t detect_irq,
+			      void *data)
+{
+	return request_irq(IRQ_GPIOB(7), detect_irq, IRQF_TRIGGER_RISING,
+			   "sdhc2-card-detect", data);
+}
+
+static void mx27ads_sdhc1_exit(struct device *dev, void *data)
+{
+	free_irq(IRQ_GPIOE(21), data);
+}
+
+static void mx27ads_sdhc2_exit(struct device *dev, void *data)
+{
+	free_irq(IRQ_GPIOB(7), data);
+}
+
+static struct imxmmc_platform_data sdhc1_pdata = {
+	.init = mx27ads_sdhc1_init,
+	.exit = mx27ads_sdhc1_exit,
+};
+
+static struct imxmmc_platform_data sdhc2_pdata = {
+	.init = mx27ads_sdhc2_init,
+	.exit = mx27ads_sdhc2_exit,
+};
+
 static struct platform_device *platform_devices[] __initdata = {
 	&mx27ads_nor_mtd_device,
 	&mxc_fec_device,
@@ -239,6 +289,8 @@ static void __init mx27ads_board_init(void)
 				ARRAY_SIZE(mx27ads_i2c_devices));
 	mxc_register_device(&mxc_i2c_device1, &mx27ads_i2c_data);
 	mxc_register_device(&mxc_fb_device, &mx27ads_fb_data);
+	mxc_register_device(&mxc_sdhc_device0, &sdhc1_pdata);
+	mxc_register_device(&mxc_sdhc_device1, &sdhc2_pdata);
 
 	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
 }
-- 
cgit v0.10.2


From f231ea441a6e4b025d971b023ff010094626fdb0 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 16 Apr 2009 15:45:45 +0200
Subject: pcm038: Setup all iomux pins at once

Also, remove usage of set_irq_type after request_irq.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/pcm038.c b/arch/arm/mach-mx2/pcm038.c
index 7f3a62e..35fb96fe9 100644
--- a/arch/arm/mach-mx2/pcm038.c
+++ b/arch/arm/mach-mx2/pcm038.c
@@ -39,6 +39,63 @@
 
 #include "devices.h"
 
+static int pcm038_pins[] = {
+	/* UART1 */
+	PE12_PF_UART1_TXD,
+	PE13_PF_UART1_RXD,
+	PE14_PF_UART1_CTS,
+	PE15_PF_UART1_RTS,
+	/* UART2 */
+	PE3_PF_UART2_CTS,
+	PE4_PF_UART2_RTS,
+	PE6_PF_UART2_TXD,
+	PE7_PF_UART2_RXD,
+	/* UART3 */
+	PE8_PF_UART3_TXD,
+	PE9_PF_UART3_RXD,
+	PE10_PF_UART3_CTS,
+	PE11_PF_UART3_RTS,
+	/* FEC */
+	PD0_AIN_FEC_TXD0,
+	PD1_AIN_FEC_TXD1,
+	PD2_AIN_FEC_TXD2,
+	PD3_AIN_FEC_TXD3,
+	PD4_AOUT_FEC_RX_ER,
+	PD5_AOUT_FEC_RXD1,
+	PD6_AOUT_FEC_RXD2,
+	PD7_AOUT_FEC_RXD3,
+	PD8_AF_FEC_MDIO,
+	PD9_AIN_FEC_MDC,
+	PD10_AOUT_FEC_CRS,
+	PD11_AOUT_FEC_TX_CLK,
+	PD12_AOUT_FEC_RXD0,
+	PD13_AOUT_FEC_RX_DV,
+	PD14_AOUT_FEC_RX_CLK,
+	PD15_AOUT_FEC_COL,
+	PD16_AIN_FEC_TX_ER,
+	PF23_AIN_FEC_TX_EN,
+	/* I2C2 */
+	PC5_PF_I2C2_SDA,
+	PC6_PF_I2C2_SCL,
+	/* SPI1 */
+	PD25_PF_CSPI1_RDY,
+	PD27_PF_CSPI1_SS1,
+	PD28_PF_CSPI1_SS0,
+	PD29_PF_CSPI1_SCLK,
+	PD30_PF_CSPI1_MISO,
+	PD31_PF_CSPI1_MOSI,
+	/* SSI1 */
+	PC20_PF_SSI1_FS,
+	PC21_PF_SSI1_RXD,
+	PC22_PF_SSI1_TXD,
+	PC23_PF_SSI1_CLK,
+	/* SSI4 */
+	PC16_PF_SSI4_FS,
+	PC17_PF_SSI4_RXD,
+	PC18_PF_SSI4_TXD,
+	PC19_PF_SSI4_CLK,
+};
+
 /*
  * Phytec's PCM038 comes with 2MiB battery buffered SRAM,
  * 16 bit width
@@ -88,104 +145,16 @@ static struct platform_device pcm038_nor_mtd_device = {
 	.resource = &pcm038_flash_resource,
 };
 
-static int mxc_uart0_pins[] = {
-	PE12_PF_UART1_TXD,
-	PE13_PF_UART1_RXD,
-	PE14_PF_UART1_CTS,
-	PE15_PF_UART1_RTS
-};
-
-static int uart_mxc_port0_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart0_pins,
-			ARRAY_SIZE(mxc_uart0_pins), "UART0");
-}
-
-static void uart_mxc_port0_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart0_pins,
-			ARRAY_SIZE(mxc_uart0_pins));
-}
-
-static int mxc_uart1_pins[] = {
-	PE3_PF_UART2_CTS,
-	PE4_PF_UART2_RTS,
-	PE6_PF_UART2_TXD,
-	PE7_PF_UART2_RXD
-};
-
-static int uart_mxc_port1_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart1_pins,
-			ARRAY_SIZE(mxc_uart1_pins), "UART1");
-}
-
-static void uart_mxc_port1_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart1_pins,
-			ARRAY_SIZE(mxc_uart1_pins));
-}
-
-static int mxc_uart2_pins[] = { PE8_PF_UART3_TXD,
-				PE9_PF_UART3_RXD,
-				PE10_PF_UART3_CTS,
-				PE11_PF_UART3_RTS };
-
-static int uart_mxc_port2_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart2_pins,
-			ARRAY_SIZE(mxc_uart2_pins), "UART2");
-}
-
-static void uart_mxc_port2_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart2_pins,
-			ARRAY_SIZE(mxc_uart2_pins));
-}
-
 static struct imxuart_platform_data uart_pdata[] = {
 	{
-		.init = uart_mxc_port0_init,
-		.exit = uart_mxc_port0_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port1_init,
-		.exit = uart_mxc_port1_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart_mxc_port2_init,
-		.exit = uart_mxc_port2_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	},
 };
 
-static int mxc_fec_pins[] = {
-	PD0_AIN_FEC_TXD0,
-	PD1_AIN_FEC_TXD1,
-	PD2_AIN_FEC_TXD2,
-	PD3_AIN_FEC_TXD3,
-	PD4_AOUT_FEC_RX_ER,
-	PD5_AOUT_FEC_RXD1,
-	PD6_AOUT_FEC_RXD2,
-	PD7_AOUT_FEC_RXD3,
-	PD8_AF_FEC_MDIO,
-	PD9_AIN_FEC_MDC,
-	PD10_AOUT_FEC_CRS,
-	PD11_AOUT_FEC_TX_CLK,
-	PD12_AOUT_FEC_RXD0,
-	PD13_AOUT_FEC_RX_DV,
-	PD14_AOUT_FEC_RX_CLK,
-	PD15_AOUT_FEC_COL,
-	PD16_AIN_FEC_TX_ER,
-	PF23_AIN_FEC_TX_EN
-};
-
-static void gpio_fec_active(void)
-{
-	mxc_gpio_setup_multiple_pins(mxc_fec_pins,
-			ARRAY_SIZE(mxc_fec_pins), "FEC");
-}
-
 static struct mxc_nand_platform_data pcm038_nand_board_info = {
 	.width = 1,
 	.hw_ecc = 1,
@@ -208,26 +177,8 @@ static void __init pcm038_init_sram(void)
 }
 
 #ifdef CONFIG_I2C_IMX
-static int mxc_i2c1_pins[] = {
-	PC5_PF_I2C2_SDA,
-	PC6_PF_I2C2_SCL
-};
-
-static int pcm038_i2c_1_init(struct device *dev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_i2c1_pins, ARRAY_SIZE(mxc_i2c1_pins),
-			"I2C1");
-}
-
-static void pcm038_i2c_1_exit(struct device *dev)
-{
-	mxc_gpio_release_multiple_pins(mxc_i2c1_pins, ARRAY_SIZE(mxc_i2c1_pins));
-}
-
 static struct imxi2c_platform_data pcm038_i2c_1_data = {
 	.bitrate = 100000,
-	.init = pcm038_i2c_1_init,
-	.exit = pcm038_i2c_1_exit,
 };
 
 static struct at24_platform_data board_eeprom = {
@@ -254,7 +205,9 @@ static struct i2c_board_info pcm038_i2c_devices[] = {
 
 static void __init pcm038_init(void)
 {
-	gpio_fec_active();
+	mxc_gpio_setup_multiple_pins(pcm038_pins, ARRAY_SIZE(pcm038_pins),
+			"PCM038");
+
 	pcm038_init_sram();
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata[0]);
diff --git a/arch/arm/mach-mx2/pcm970-baseboard.c b/arch/arm/mach-mx2/pcm970-baseboard.c
index 16f3344..6dcd625 100644
--- a/arch/arm/mach-mx2/pcm970-baseboard.c
+++ b/arch/arm/mach-mx2/pcm970-baseboard.c
@@ -30,57 +30,93 @@
 
 #include "devices.h"
 
-static int pcm970_sdhc2_get_ro(struct device *dev)
-{
-	return gpio_get_value(GPIO_PORTC + 28);
-}
-
-static int pcm970_sdhc2_pins[] = {
+static int pcm970_pins[] = {
+	/* SDHC */
 	PB4_PF_SD2_D0,
 	PB5_PF_SD2_D1,
 	PB6_PF_SD2_D2,
 	PB7_PF_SD2_D3,
 	PB8_PF_SD2_CMD,
 	PB9_PF_SD2_CLK,
+	GPIO_PORTC | 28 | GPIO_GPIO | GPIO_IN, /* card detect */
+	/* display */
+	PA5_PF_LSCLK,
+	PA6_PF_LD0,
+	PA7_PF_LD1,
+	PA8_PF_LD2,
+	PA9_PF_LD3,
+	PA10_PF_LD4,
+	PA11_PF_LD5,
+	PA12_PF_LD6,
+	PA13_PF_LD7,
+	PA14_PF_LD8,
+	PA15_PF_LD9,
+	PA16_PF_LD10,
+	PA17_PF_LD11,
+	PA18_PF_LD12,
+	PA19_PF_LD13,
+	PA20_PF_LD14,
+	PA21_PF_LD15,
+	PA22_PF_LD16,
+	PA23_PF_LD17,
+	PA24_PF_REV,
+	PA25_PF_CLS,
+	PA26_PF_PS,
+	PA27_PF_SPL_SPR,
+	PA28_PF_HSYNC,
+	PA29_PF_VSYNC,
+	PA30_PF_CONTRAST,
+	PA31_PF_OE_ACD,
+	/*
+	 * it seems the data line misses a pullup, so we must enable
+	 * the internal pullup as a local workaround
+	 */
+	PD17_PF_I2C_DATA | GPIO_PUEN,
+	PD18_PF_I2C_CLK,
+	/* Camera */
+	PB10_PF_CSI_D0,
+	PB11_PF_CSI_D1,
+	PB12_PF_CSI_D2,
+	PB13_PF_CSI_D3,
+	PB14_PF_CSI_D4,
+	PB15_PF_CSI_MCLK,
+	PB16_PF_CSI_PIXCLK,
+	PB17_PF_CSI_D5,
+	PB18_PF_CSI_D6,
+	PB19_PF_CSI_D7,
+	PB20_PF_CSI_VSYNC,
+	PB21_PF_CSI_HSYNC,
 };
 
+static int pcm970_sdhc2_get_ro(struct device *dev)
+{
+	return gpio_get_value(GPIO_PORTC + 28);
+}
+
 static int pcm970_sdhc2_init(struct device *dev, irq_handler_t detect_irq, void *data)
 {
 	int ret;
 
-	ret = mxc_gpio_setup_multiple_pins(pcm970_sdhc2_pins,
-		ARRAY_SIZE(pcm970_sdhc2_pins), "sdhc2");
-	if(ret)
-		return ret;
-
-	ret = request_irq(IRQ_GPIOC(29), detect_irq, 0,
+	ret = request_irq(IRQ_GPIOC(29), detect_irq, IRQF_TRIGGER_FALLING,
 				"imx-mmc-detect", data);
 	if (ret)
-		goto out_release_gpio;
-
-	set_irq_type(IRQ_GPIOC(29), IRQF_TRIGGER_FALLING);
+		return ret;
 
 	ret = gpio_request(GPIO_PORTC + 28, "imx-mmc-ro");
-	if (ret)
-		goto out_release_gpio;
+	if (ret) {
+		free_irq(IRQ_GPIOC(29), data);
+		return ret;
+	}
 
-	mxc_gpio_mode((GPIO_PORTC | 28) | GPIO_GPIO | GPIO_IN);
 	gpio_direction_input(GPIO_PORTC + 28);
 
 	return 0;
-
-out_release_gpio:
-	mxc_gpio_release_multiple_pins(pcm970_sdhc2_pins,
-			ARRAY_SIZE(pcm970_sdhc2_pins));
-	return ret;
 }
 
 static void pcm970_sdhc2_exit(struct device *dev, void *data)
 {
 	free_irq(IRQ_GPIOC(29), data);
 	gpio_free(GPIO_PORTC + 28);
-	mxc_gpio_release_multiple_pins(pcm970_sdhc2_pins,
-			ARRAY_SIZE(pcm970_sdhc2_pins));
 }
 
 static struct imxmmc_platform_data sdhc_pdata = {
@@ -89,27 +125,6 @@ static struct imxmmc_platform_data sdhc_pdata = {
 	.exit = pcm970_sdhc2_exit,
 };
 
-static int mxc_fb_pins[] = {
-	PA5_PF_LSCLK,	PA6_PF_LD0,	PA7_PF_LD1,	PA8_PF_LD2,
-	PA9_PF_LD3,	PA10_PF_LD4,	PA11_PF_LD5,	PA12_PF_LD6,
-	PA13_PF_LD7,	PA14_PF_LD8,	PA15_PF_LD9,	PA16_PF_LD10,
-	PA17_PF_LD11,	PA18_PF_LD12,	PA19_PF_LD13,	PA20_PF_LD14,
-	PA21_PF_LD15,	PA22_PF_LD16,	PA23_PF_LD17,	PA24_PF_REV,
-	PA25_PF_CLS,	PA26_PF_PS,	PA27_PF_SPL_SPR, PA28_PF_HSYNC,
-	PA29_PF_VSYNC,	PA30_PF_CONTRAST, PA31_PF_OE_ACD
-};
-
-static int pcm038_fb_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_fb_pins,
-			ARRAY_SIZE(mxc_fb_pins), "FB");
-}
-
-static void pcm038_fb_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_fb_pins, ARRAY_SIZE(mxc_fb_pins));
-}
-
 /*
  * Connected is a portrait Sharp-QVGA display
  * of type: LQ035Q7DH06
@@ -142,9 +157,6 @@ static struct imx_fb_platform_data pcm038_fb_data = {
 	.pwmr		= 0x00A903FF,
 	.lscr1		= 0x00120300,
 	.dmacr		= 0x00020010,
-
-	.init = pcm038_fb_init,
-	.exit = pcm038_fb_exit,
 };
 
 /*
@@ -155,6 +167,9 @@ static struct imx_fb_platform_data pcm038_fb_data = {
  */
 void __init pcm970_baseboard_init(void)
 {
+	mxc_gpio_setup_multiple_pins(pcm970_pins, ARRAY_SIZE(pcm970_pins),
+			"PCM970");
+
 	mxc_register_device(&mxc_fb_device, &pcm038_fb_data);
 	mxc_register_device(&mxc_sdhc_device1, &sdhc_pdata);
 }
-- 
cgit v0.10.2


From bff0b53bd77df7e44a1d6b1a13162e26def5d17a Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 29 Apr 2009 09:17:03 +0200
Subject: PCM038: Remove unnecessary ifdefs, reorder includes alphabetically

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/pcm038.c b/arch/arm/mach-mx2/pcm038.c
index 35fb96fe9..a4628d0 100644
--- a/arch/arm/mach-mx2/pcm038.c
+++ b/arch/arm/mach-mx2/pcm038.c
@@ -17,24 +17,23 @@
  * MA 02110-1301, USA.
  */
 
-#include <linux/platform_device.h>
-#include <linux/mtd/physmap.h>
-#include <linux/mtd/plat-ram.h>
-#include <linux/io.h>
 #include <linux/i2c.h>
 #include <linux/i2c/at24.h>
+#include <linux/io.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/mtd/physmap.h>
+#include <linux/platform_device.h>
 
-#include <asm/mach/arch.h>
 #include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+
+#include <mach/board-pcm038.h>
 #include <mach/common.h>
 #include <mach/hardware.h>
-#include <mach/iomux.h>
-#ifdef CONFIG_I2C_IMX
 #include <mach/i2c.h>
-#endif
-#include <asm/mach/time.h>
+#include <mach/iomux.h>
 #include <mach/imx-uart.h>
-#include <mach/board-pcm038.h>
 #include <mach/mxc_nand.h>
 
 #include "devices.h"
@@ -176,7 +175,6 @@ static void __init pcm038_init_sram(void)
 	__raw_writel(0x22220a00, CSCR_A(1));
 }
 
-#ifdef CONFIG_I2C_IMX
 static struct imxi2c_platform_data pcm038_i2c_1_data = {
 	.bitrate = 100000,
 };
@@ -201,7 +199,6 @@ static struct i2c_board_info pcm038_i2c_devices[] = {
 		.type = "lm75"
 	}
 };
-#endif
 
 static void __init pcm038_init(void)
 {
@@ -217,13 +214,11 @@ static void __init pcm038_init(void)
 	mxc_gpio_mode(PE16_AF_OWIRE);
 	mxc_register_device(&mxc_nand_device, &pcm038_nand_board_info);
 
-#ifdef CONFIG_I2C_IMX
 	/* only the i2c master 1 is used on this CPU card */
 	i2c_register_board_info(1, pcm038_i2c_devices,
 				ARRAY_SIZE(pcm038_i2c_devices));
 
 	mxc_register_device(&mxc_i2c_device1, &pcm038_i2c_1_data);
-#endif
 
 	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
 
diff --git a/arch/arm/mach-mx2/pcm970-baseboard.c b/arch/arm/mach-mx2/pcm970-baseboard.c
index 6dcd625..6a3acaf 100644
--- a/arch/arm/mach-mx2/pcm970-baseboard.c
+++ b/arch/arm/mach-mx2/pcm970-baseboard.c
@@ -16,17 +16,17 @@
  * MA 02110-1301, USA.
  */
 
-#include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/irq.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach/arch.h>
 
-#include <mach/hardware.h>
 #include <mach/common.h>
-#include <mach/mmc.h>
-#include <mach/imxfb.h>
 #include <mach/iomux.h>
+#include <mach/imxfb.h>
+#include <mach/hardware.h>
+#include <mach/mmc.h>
 
 #include "devices.h"
 
-- 
cgit v0.10.2


From eb05bbeb65cc1015bdd5c4cb72fd1694a3288e97 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lg@denx.de>
Date: Mon, 4 May 2009 13:13:52 +0200
Subject: ARM: add USB device support to pcm037

Add OTG device definition and resources to i.MX31 and a pure USB device mode
support to the pcm037 board.

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/devices.c b/arch/arm/mach-mx3/devices.c
index 227a538..f55c986 100644
--- a/arch/arm/mach-mx3/devices.c
+++ b/arch/arm/mach-mx3/devices.c
@@ -17,6 +17,7 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/serial.h>
@@ -349,6 +350,32 @@ struct platform_device mx3_fb = {
        },
 };
 
+static struct resource otg_resources[] = {
+	{
+		.start	= OTG_BASE_ADDR,
+		.end	= OTG_BASE_ADDR + 0x1ff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= MXC_INT_USB3,
+		.end	= MXC_INT_USB3,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static u64 otg_dmamask = DMA_BIT_MASK(32);
+
+/* OTG gadget device */
+struct platform_device mxc_otg_udc_device = {
+	.name		= "fsl-usb2-udc",
+	.id		= -1,
+	.dev		= {
+		.dma_mask		= &otg_dmamask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+	},
+	.resource	= otg_resources,
+	.num_resources	= ARRAY_SIZE(otg_resources),
+};
+
 #ifdef CONFIG_ARCH_MX35
 static struct resource mxc_fec_resources[] = {
 	{
diff --git a/arch/arm/mach-mx3/devices.h b/arch/arm/mach-mx3/devices.h
index 88c04b2..27779c3 100644
--- a/arch/arm/mach-mx3/devices.h
+++ b/arch/arm/mach-mx3/devices.h
@@ -14,3 +14,4 @@ extern struct platform_device mx3_fb;
 extern struct platform_device mxc_fec_device;
 extern struct platform_device mxcsdhc_device0;
 extern struct platform_device mxcsdhc_device1;
+extern struct platform_device mxc_otg_udc_device;
diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index 5d7e0ca..350cff5 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
 #include <linux/irq.h>
+#include <linux/fsl_devices.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -131,6 +132,54 @@ static struct resource pcm037_flash_resource = {
 	.flags	= IORESOURCE_MEM,
 };
 
+static int usbotg_pins[] = {
+	MX31_PIN_USBOTG_DATA0__USBOTG_DATA0,
+	MX31_PIN_USBOTG_DATA1__USBOTG_DATA1,
+	MX31_PIN_USBOTG_DATA2__USBOTG_DATA2,
+	MX31_PIN_USBOTG_DATA3__USBOTG_DATA3,
+	MX31_PIN_USBOTG_DATA4__USBOTG_DATA4,
+	MX31_PIN_USBOTG_DATA5__USBOTG_DATA5,
+	MX31_PIN_USBOTG_DATA6__USBOTG_DATA6,
+	MX31_PIN_USBOTG_DATA7__USBOTG_DATA7,
+	MX31_PIN_USBOTG_CLK__USBOTG_CLK,
+	MX31_PIN_USBOTG_DIR__USBOTG_DIR,
+	MX31_PIN_USBOTG_NXT__USBOTG_NXT,
+	MX31_PIN_USBOTG_STP__USBOTG_STP,
+};
+
+/* USB OTG HS port */
+static int __init gpio_usbotg_hs_activate(void)
+{
+	int ret = mxc_iomux_setup_multiple_pins(usbotg_pins,
+					ARRAY_SIZE(usbotg_pins), "usbotg");
+
+	if (ret < 0) {
+		printk(KERN_ERR "Cannot set up OTG pins\n");
+		return ret;
+	}
+
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA0, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA1, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA2, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA3, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA4, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA5, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA6, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA7, PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_CLK,   PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DIR,   PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_NXT,   PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_STP,   PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST);
+
+	return 0;
+}
+
+/* OTG config */
+static struct fsl_usb2_platform_data usb_pdata = {
+	.operating_mode	= FSL_USB2_DR_DEVICE,
+	.phy_mode	= FSL_USB2_PHY_ULPI,
+};
+
 static struct platform_device pcm037_flash = {
 	.name	= "physmap-flash",
 	.id	= 0,
@@ -345,6 +394,8 @@ static void __init mxc_board_init(void)
 	mxc_register_device(&mxcsdhc_device0, &sdhc_pdata);
 	mxc_register_device(&mx3_ipu, &mx3_ipu_data);
 	mxc_register_device(&mx3_fb, &mx3fb_pdata);
+	if (!gpio_usbotg_hs_activate())
+		mxc_register_device(&mxc_otg_udc_device, &usb_pdata);
 }
 
 static void __init pcm037_timer_init(void)
-- 
cgit v0.10.2


From 5b68421f41b47dd9311b81d9e0d7e9781c2983f3 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 29 Apr 2009 13:14:19 +0200
Subject: mx1ads: remove ifdefs, reorder include alphabetically

header is part of the kernel now.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx1/mx1ads.c b/arch/arm/mach-mx1/mx1ads.c
index 7a648c1..1d28598 100644
--- a/arch/arm/mach-mx1/mx1ads.c
+++ b/arch/arm/mach-mx1/mx1ads.c
@@ -12,24 +12,24 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pcf857x.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/physmap.h>
-#include <linux/i2c.h>
-#include <linux/i2c/pcf857x.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 
-#include <mach/irqs.h>
-#include <mach/hardware.h>
 #include <mach/common.h>
-#include <mach/imx-uart.h>
-#include <mach/irqs.h>
+#include <mach/hardware.h>
 #include <mach/i2c.h>
+#include <mach/imx-uart.h>
 #include <mach/iomux.h>
+#include <mach/irqs.h>
+
 #include "devices.h"
 
 /*
@@ -111,7 +111,6 @@ static struct platform_device flash_device = {
 /*
  * I2C
  */
-
 static int i2c_pins[] = {
 	PA15_PF_I2C_SDA,
 	PA16_PF_I2C_SCL,
-- 
cgit v0.10.2


From 5ae07daa488e78994db731d0fd133967a5cf0ceb Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 29 Apr 2009 13:17:21 +0200
Subject: mx1ads: setup iomux pins at once

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx1/mx1ads.c b/arch/arm/mach-mx1/mx1ads.c
index 1d28598..55d3289 100644
--- a/arch/arm/mach-mx1/mx1ads.c
+++ b/arch/arm/mach-mx1/mx1ads.c
@@ -32,57 +32,36 @@
 
 #include "devices.h"
 
-/*
- * UARTs platform data
- */
-static int mxc_uart1_pins[] = {
+static int mx1ads_pins[] = {
+	/* UART1 */
 	PC9_PF_UART1_CTS,
 	PC10_PF_UART1_RTS,
 	PC11_PF_UART1_TXD,
 	PC12_PF_UART1_RXD,
-};
-
-static int uart1_mxc_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart1_pins,
-			ARRAY_SIZE(mxc_uart1_pins), "UART1");
-}
-
-static int uart1_mxc_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart1_pins,
-			ARRAY_SIZE(mxc_uart1_pins));
-	return 0;
-}
-
-static int mxc_uart2_pins[] = {
+	/* UART2 */
 	PB28_PF_UART2_CTS,
 	PB29_PF_UART2_RTS,
 	PB30_PF_UART2_TXD,
 	PB31_PF_UART2_RXD,
+	/* I2C */
+	PA15_PF_I2C_SDA,
+	PA16_PF_I2C_SCL,
+	/* SPI */
+	PC13_PF_SPI1_SPI_RDY,
+	PC14_PF_SPI1_SCLK,
+	PC15_PF_SPI1_SS,
+	PC16_PF_SPI1_MISO,
+	PC17_PF_SPI1_MOSI,
 };
 
-static int uart2_mxc_init(struct platform_device *pdev)
-{
-	return mxc_gpio_setup_multiple_pins(mxc_uart2_pins,
-			ARRAY_SIZE(mxc_uart2_pins), "UART2");
-}
-
-static int uart2_mxc_exit(struct platform_device *pdev)
-{
-	mxc_gpio_release_multiple_pins(mxc_uart2_pins,
-			ARRAY_SIZE(mxc_uart2_pins));
-	return 0;
-}
+/*
+ * UARTs platform data
+ */
 
 static struct imxuart_platform_data uart_pdata[] = {
 	{
-		.init = uart1_mxc_init,
-		.exit = uart1_mxc_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	}, {
-		.init = uart2_mxc_init,
-		.exit = uart2_mxc_exit,
 		.flags = IMXUART_HAVE_RTSCTS,
 	},
 };
@@ -111,23 +90,6 @@ static struct platform_device flash_device = {
 /*
  * I2C
  */
-static int i2c_pins[] = {
-	PA15_PF_I2C_SDA,
-	PA16_PF_I2C_SCL,
-};
-
-static int i2c_init(struct device *dev)
-{
-	return mxc_gpio_setup_multiple_pins(i2c_pins,
-			ARRAY_SIZE(i2c_pins), "I2C");
-}
-
-static void i2c_exit(struct device *dev)
-{
-	mxc_gpio_release_multiple_pins(i2c_pins,
-			ARRAY_SIZE(i2c_pins));
-}
-
 static struct pcf857x_platform_data pcf857x_data[] = {
 	{
 		.gpio_base = 4 * 32,
@@ -138,8 +100,6 @@ static struct pcf857x_platform_data pcf857x_data[] = {
 
 static struct imxi2c_platform_data mx1ads_i2c_data = {
 	.bitrate = 100000,
-	.init = i2c_init,
-	.exit = i2c_exit,
 };
 
 static struct i2c_board_info mx1ads_i2c_devices[] = {
@@ -159,6 +119,9 @@ static struct i2c_board_info mx1ads_i2c_devices[] = {
  */
 static void __init mx1ads_init(void)
 {
+	mxc_gpio_setup_multiple_pins(mx1ads_pins,
+		ARRAY_SIZE(mx1ads_pins), "mx1ads");
+
 	/* UART */
 	mxc_register_device(&imx_uart1_device, &uart_pdata[0]);
 	mxc_register_device(&imx_uart2_device, &uart_pdata[1]);
-- 
cgit v0.10.2


From 345569a2ed84478ce860a32555edd71bb321e48b Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 29 Apr 2009 13:41:06 +0200
Subject: mx1: add missing include

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx1/generic.c b/arch/arm/mach-mx1/generic.c
index 9c3528d..7622c9b 100644
--- a/arch/arm/mach-mx1/generic.c
+++ b/arch/arm/mach-mx1/generic.c
@@ -26,6 +26,7 @@
 
 #include <asm/mach/map.h>
 
+#include <mach/common.h>
 #include <mach/hardware.h>
 
 static struct map_desc imx_io_desc[] __initdata = {
-- 
cgit v0.10.2


From 74fe030e63bd9e4adc5a67128d4c8359b37df55d Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 29 Apr 2009 13:41:40 +0200
Subject: mx1ads: rename mxc_map_io to mx1_map_io

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx1/mx1ads.c b/arch/arm/mach-mx1/mx1ads.c
index 55d3289..e5b0c0a 100644
--- a/arch/arm/mach-mx1/mx1ads.c
+++ b/arch/arm/mach-mx1/mx1ads.c
@@ -150,7 +150,7 @@ MACHINE_START(MX1ADS, "Freescale MX1ADS")
 	.phys_io	= IMX_IO_PHYS,
 	.io_pg_offst	= (IMX_IO_BASE >> 18) & 0xfffc,
 	.boot_params	= PHYS_OFFSET + 0x100,
-	.map_io		= mxc_map_io,
+	.map_io		= mx1_map_io,
 	.init_irq	= mxc_init_irq,
 	.timer		= &mx1ads_timer,
 	.init_machine	= mx1ads_init,
-- 
cgit v0.10.2


From 81ec1f929fe5be3f6464edf7939a9b65fc8223ca Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 29 Apr 2009 13:55:13 +0200
Subject: mxc: fix wrong register access in timer code

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index 85f002e..88fb3a5 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -234,9 +234,9 @@ static irqreturn_t mxc_timer_interrupt(int irq, void *dev_id)
 	uint32_t tstat;
 
 	if (cpu_is_mx3())
-		tstat = __raw_readl(timer_base + MX1_2_TSTAT);
-	else
 		tstat = __raw_readl(timer_base + MX3_TSTAT);
+	else
+		tstat = __raw_readl(timer_base + MX1_2_TSTAT);
 
 	gpt_irq_acknowledge();
 
-- 
cgit v0.10.2


From 35c82da0bfa04b26cc642814d3e9557f93b7b365 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 5 May 2009 11:13:23 +0200
Subject: MXC qonq: mxc_map_io is now mx31_map_io

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/qong.c b/arch/arm/mach-mx3/qong.c
index 5a01e48..82b31c4 100644
--- a/arch/arm/mach-mx3/qong.c
+++ b/arch/arm/mach-mx3/qong.c
@@ -279,7 +279,7 @@ MACHINE_START(QONG, "Dave/DENX QongEVB-LITE")
 	.phys_io        = AIPS1_BASE_ADDR,
 	.io_pg_offst    = ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx31_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mxc_board_init,
 	.timer          = &qong_timer,
-- 
cgit v0.10.2


From ef754d635820102ec7719486d40ede3c94ba44c8 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Wed, 6 May 2009 11:44:20 +0200
Subject: mx31: remove gpio_request calls from iomux code

Since iomux code is not directly related to gpio on mx31, the calls
to gpio_request are removed from iomux.c file.

These calls have to be done in platform initialization files. The
name of the singe pin call for iomux is also changed to
mxc_iomux_alloc_pin.

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/iomux.c b/arch/arm/mach-mx3/iomux.c
index 40ffc5a..c66ccbc 100644
--- a/arch/arm/mach-mx3/iomux.c
+++ b/arch/arm/mach-mx3/iomux.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <mach/hardware.h>
 #include <mach/gpio.h>
@@ -94,15 +93,13 @@ void mxc_iomux_set_pad(enum iomux_pins pin, u32 config)
 EXPORT_SYMBOL(mxc_iomux_set_pad);
 
 /*
- * setups a single pin:
+ * allocs a single pin:
  * 	- reserves the pin so that it is not claimed by another driver
  * 	- setups the iomux according to the configuration
- * 	- if the pin is configured as a GPIO, we claim it through kernel gpiolib
  */
-int mxc_iomux_setup_pin(const unsigned int pin, const char *label)
+int mxc_iomux_alloc_pin(const unsigned int pin, const char *label)
 {
 	unsigned pad = pin & IOMUX_PADNUM_MASK;
-	unsigned gpio;
 
 	if (pad >= (PIN_MAX + 1)) {
 		printk(KERN_ERR "mxc_iomux: Attempt to request nonexistant pin %u for \"%s\"\n",
@@ -113,19 +110,13 @@ int mxc_iomux_setup_pin(const unsigned int pin, const char *label)
 	if (test_and_set_bit(pad, mxc_pin_alloc_map)) {
 		printk(KERN_ERR "mxc_iomux: pin %u already used. Allocation for \"%s\" failed\n",
 			pad, label ? label : "?");
-		return -EINVAL;
+		return -EBUSY;
 	}
 	mxc_iomux_mode(pin);
 
-	/* if we have a gpio, we can allocate it */
-	gpio = (pin & IOMUX_GPIONUM_MASK) >> IOMUX_GPIONUM_SHIFT;
-	if (gpio < (GPIO_PORT_MAX + 1) * 32)
-		if (gpio_request(gpio, label))
-			return -EINVAL;
-
 	return 0;
 }
-EXPORT_SYMBOL(mxc_iomux_setup_pin);
+EXPORT_SYMBOL(mxc_iomux_alloc_pin);
 
 int mxc_iomux_setup_multiple_pins(unsigned int *pin_list, unsigned count,
 		const char *label)
@@ -135,7 +126,8 @@ int mxc_iomux_setup_multiple_pins(unsigned int *pin_list, unsigned count,
 	int ret = -EINVAL;
 
 	for (i = 0; i < count; i++) {
-		if (mxc_iomux_setup_pin(*p, label))
+		ret = mxc_iomux_alloc_pin(*p, label);
+		if (ret)
 			goto setup_error;
 		p++;
 	}
@@ -150,14 +142,9 @@ EXPORT_SYMBOL(mxc_iomux_setup_multiple_pins);
 void mxc_iomux_release_pin(const unsigned int pin)
 {
 	unsigned pad = pin & IOMUX_PADNUM_MASK;
-	unsigned gpio;
 
 	if (pad < (PIN_MAX + 1))
 		clear_bit(pad, mxc_pin_alloc_map);
-
-	gpio = (pin & IOMUX_GPIONUM_MASK) >> IOMUX_GPIONUM_SHIFT;
-	if (gpio < (GPIO_PORT_MAX + 1) * 32)
-		gpio_free(gpio);
 }
 EXPORT_SYMBOL(mxc_iomux_release_pin);
 
diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx3.h b/arch/arm/plat-mxc/include/mach/iomux-mx3.h
index be43f76..27f8d1b 100644
--- a/arch/arm/plat-mxc/include/mach/iomux-mx3.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-mx3.h
@@ -114,7 +114,7 @@ enum iomux_gp_func {
  * 	- setups the iomux according to the configuration
  * 	- if the pin is configured as a GPIO, we claim it throug kernel gpiolib
  */
-int mxc_iomux_setup_pin(const unsigned int pin, const char *label);
+int mxc_iomux_alloc_pin(const unsigned int pin, const char *label);
 /*
  * setups mutliple pins
  * convenient way to call the above function with tables
-- 
cgit v0.10.2


From 4f163eb8811e8ea760d9fe654ecc6f17feecb477 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 6 May 2009 12:55:50 +0200
Subject: mx31: calls to gpio_request moved into platform code

In order to use the gpiolib, we now have to call gpio_request in
the plaform code since it is not done in iomux code anymore.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c
index db2e06a..30e2767 100644
--- a/arch/arm/mach-mx3/mx31ads.c
+++ b/arch/arm/mach-mx3/mx31ads.c
@@ -187,7 +187,7 @@ static void __init mx31ads_init_expio(void)
 	/*
 	 * Configure INT line as GPIO input
 	 */
-	mxc_iomux_setup_pin(IOMUX_MODE(MX31_PIN_GPIO1_4, IOMUX_CONFIG_GPIO), "expio");
+	mxc_iomux_alloc_pin(IOMUX_MODE(MX31_PIN_GPIO1_4, IOMUX_CONFIG_GPIO), "expio");
 
 	/* disable the interrupt and clear the status */
 	__raw_writew(0xFFFF, PBC_INTMASK_CLEAR_REG);
diff --git a/arch/arm/mach-mx3/mx31lite.c b/arch/arm/mach-mx3/mx31lite.c
index 74734e5..e709229 100644
--- a/arch/arm/mach-mx3/mx31lite.c
+++ b/arch/arm/mach-mx3/mx31lite.c
@@ -118,14 +118,21 @@ void __init mx31lite_map_io(void)
  */
 static void __init mxc_board_init(void)
 {
+	int ret;
+
 	mxc_iomux_setup_multiple_pins(mx31lite_pins, ARRAY_SIZE(mx31lite_pins),
 				      "mx31lite");
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
 
 	/* SMSC9117 IRQ pin */
-	gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_SFS6));
-	platform_device_register(&smsc911x_device);
+	ret = gpio_request(IOMUX_TO_GPIO(MX31_PIN_SFS6), "sms9117-irq");
+	if (ret)
+		pr_warning("could not get LAN irq gpio\n");
+	else {
+		gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_SFS6));
+		platform_device_register(&smsc911x_device);
+	}
 }
 
 static void __init mx31lite_timer_init(void)
diff --git a/arch/arm/mach-mx3/mx31moboard-devboard.c b/arch/arm/mach-mx3/mx31moboard-devboard.c
index 0a69a4f..b3e8f25 100644
--- a/arch/arm/mach-mx3/mx31moboard-devboard.c
+++ b/arch/arm/mach-mx3/mx31moboard-devboard.c
@@ -56,14 +56,40 @@ static int devboard_sdhc2_get_ro(struct device *dev)
 static int devboard_sdhc2_init(struct device *dev, irq_handler_t detect_irq,
 		void *data)
 {
-	return request_irq(gpio_to_irq(SDHC2_CD), detect_irq,
+	int ret;
+
+	ret = gpio_request(SDHC2_CD, "sdhc-detect");
+	if (ret)
+		return ret;
+
+	gpio_direction_input(SDHC2_CD);
+
+	ret = gpio_request(SDHC2_WP, "sdhc-wp");
+	if (ret)
+		goto err_gpio_free;
+	gpio_direction_input(SDHC2_WP);
+
+	ret = request_irq(gpio_to_irq(SDHC2_CD), detect_irq,
 		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
 		"sdhc2-card-detect", data);
+	if (ret)
+		goto err_gpio_free_2;
+
+	return 0;
+
+err_gpio_free_2:
+	gpio_free(SDHC2_WP);
+err_gpio_free:
+	gpio_free(SDHC2_CD);
+
+	return ret;
 }
 
 static void devboard_sdhc2_exit(struct device *dev, void *data)
 {
 	free_irq(gpio_to_irq(SDHC2_CD), data);
+	gpio_free(SDHC2_WP);
+	gpio_free(SDHC2_CD);
 }
 
 static struct imxmmc_platform_data sdhc2_pdata = {
diff --git a/arch/arm/mach-mx3/mx31moboard-marxbot.c b/arch/arm/mach-mx3/mx31moboard-marxbot.c
index 4fc2711..53ce7ff 100644
--- a/arch/arm/mach-mx3/mx31moboard-marxbot.c
+++ b/arch/arm/mach-mx3/mx31moboard-marxbot.c
@@ -60,14 +60,40 @@ static int marxbot_sdhc2_get_ro(struct device *dev)
 static int marxbot_sdhc2_init(struct device *dev, irq_handler_t detect_irq,
 		void *data)
 {
-	return request_irq(gpio_to_irq(SDHC2_CD), detect_irq,
+	int ret;
+
+	ret = gpio_request(SDHC2_CD, "sdhc-detect");
+	if (ret)
+		return ret;
+
+	gpio_direction_input(SDHC2_CD);
+
+	ret = gpio_request(SDHC2_WP, "sdhc-wp");
+	if (ret)
+		goto err_gpio_free;
+	gpio_direction_input(SDHC2_WP);
+
+	ret = request_irq(gpio_to_irq(SDHC2_CD), detect_irq,
 		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
 		"sdhc2-card-detect", data);
+	if (ret)
+		goto err_gpio_free_2;
+
+	return 0;
+
+err_gpio_free_2:
+	gpio_free(SDHC2_WP);
+err_gpio_free:
+	gpio_free(SDHC2_CD);
+
+	return ret;
 }
 
 static void marxbot_sdhc2_exit(struct device *dev, void *data)
 {
 	free_irq(gpio_to_irq(SDHC2_CD), data);
+	gpio_free(SDHC2_WP);
+	gpio_free(SDHC2_CD);
 }
 
 static struct imxmmc_platform_data sdhc2_pdata = {
diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index 2cca3f2..0df6ac6 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -112,14 +112,40 @@ static int moboard_sdhc1_get_ro(struct device *dev)
 static int moboard_sdhc1_init(struct device *dev, irq_handler_t detect_irq,
 		void *data)
 {
-	return request_irq(gpio_to_irq(SDHC1_CD), detect_irq,
+	int ret;
+
+	ret = gpio_request(SDHC1_CD, "sdhc-detect");
+	if (ret)
+		return ret;
+
+	gpio_direction_input(SDHC1_CD);
+
+	ret = gpio_request(SDHC1_WP, "sdhc-wp");
+	if (ret)
+		goto err_gpio_free;
+	gpio_direction_input(SDHC1_WP);
+
+	ret = request_irq(gpio_to_irq(SDHC1_CD), detect_irq,
 		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
 		"sdhc1-card-detect", data);
+	if (ret)
+		goto err_gpio_free_2;
+
+	return 0;
+
+err_gpio_free_2:
+	gpio_free(SDHC1_WP);
+err_gpio_free:
+	gpio_free(SDHC1_CD);
+
+	return ret;
 }
 
 static void moboard_sdhc1_exit(struct device *dev, void *data)
 {
 	free_irq(gpio_to_irq(SDHC1_CD), data);
+	gpio_free(SDHC1_WP);
+	gpio_free(SDHC1_CD);
 }
 
 static struct imxmmc_platform_data sdhc1_pdata = {
diff --git a/arch/arm/mach-mx3/pcm037.c b/arch/arm/mach-mx3/pcm037.c
index 350cff5..c6f61a1 100644
--- a/arch/arm/mach-mx3/pcm037.c
+++ b/arch/arm/mach-mx3/pcm037.c
@@ -280,27 +280,50 @@ static int pcm970_sdhc1_get_ro(struct device *dev)
 }
 #endif
 
+#define SDHC1_GPIO_WP	IOMUX_TO_GPIO(MX31_PIN_SFS6)
+#define SDHC1_GPIO_DET	IOMUX_TO_GPIO(MX31_PIN_SCK6)
+
 static int pcm970_sdhc1_init(struct device *dev, irq_handler_t detect_irq,
 		void *data)
 {
 	int ret;
-	int gpio_det, gpio_wp;
 
-	gpio_det = IOMUX_TO_GPIO(MX31_PIN_SCK6);
-	gpio_wp = IOMUX_TO_GPIO(MX31_PIN_SFS6);
+	ret = gpio_request(SDHC1_GPIO_DET, "sdhc-detect");
+	if (ret)
+		return ret;
+
+	gpio_direction_input(SDHC1_GPIO_DET);
 
-	gpio_direction_input(gpio_det);
-	gpio_direction_input(gpio_wp);
+#ifdef PCM970_SDHC_RW_SWITCH
+	ret = gpio_request(SDHC1_GPIO_WP, "sdhc-wp");
+	if (ret)
+		goto err_gpio_free;
+	gpio_direction_input(SDHC1_GPIO_WP);
+#endif
 
 	ret = request_irq(IOMUX_TO_IRQ(MX31_PIN_SCK6), detect_irq,
 			IRQF_DISABLED | IRQF_TRIGGER_FALLING,
 				"sdhc-detect", data);
+	if (ret)
+		goto err_gpio_free_2;
+
+	return 0;
+
+err_gpio_free_2:
+#ifdef PCM970_SDHC_RW_SWITCH
+	gpio_free(SDHC1_GPIO_WP);
+err_gpio_free:
+#endif
+	gpio_free(SDHC1_GPIO_DET);
+
 	return ret;
 }
 
 static void pcm970_sdhc1_exit(struct device *dev, void *data)
 {
 	free_irq(IOMUX_TO_IRQ(MX31_PIN_SCK6), data);
+	gpio_free(SDHC1_GPIO_DET);
+	gpio_free(SDHC1_GPIO_WP);
 }
 
 static struct imxmmc_platform_data sdhc_pdata = {
@@ -313,7 +336,6 @@ static struct imxmmc_platform_data sdhc_pdata = {
 
 static struct platform_device *devices[] __initdata = {
 	&pcm037_flash,
-	&pcm037_eth,
 	&pcm037_sram_device,
 };
 
@@ -370,6 +392,8 @@ static struct mx3fb_platform_data mx3fb_pdata = {
  */
 static void __init mxc_board_init(void)
 {
+	int ret;
+
 	mxc_iomux_setup_multiple_pins(pcm037_pins, ARRAY_SIZE(pcm037_pins),
 			"pcm037");
 
@@ -382,7 +406,14 @@ static void __init mxc_board_init(void)
 	mxc_register_device(&mxc_w1_master_device, NULL);
 
 	/* LAN9217 IRQ pin */
-	gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_GPIO3_1));
+	ret = gpio_request(IOMUX_TO_GPIO(MX31_PIN_GPIO3_1), "lan9217-irq");
+	if (ret)
+		pr_warning("could not get LAN irq gpio\n");
+	else {
+		gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_GPIO3_1));
+		platform_device_register(&pcm037_eth);
+	}
+
 
 #ifdef CONFIG_I2C_IMX
 	i2c_register_board_info(1, pcm037_i2c_devices,
-- 
cgit v0.10.2


From 03c13ecc0d27d6c566afdb9624848fa0e0efe316 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Wed, 6 May 2009 11:45:36 +0200
Subject: mx31moboard: add support for usb OTG device (v3)

This is used on all board of our system, so again this is done in the
mx31moboard.c file.

changes since v2: call to gpio_request added in platform code, and
moved device initialization in baseboard files.

changes since v1: pins are claimed in another patch

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31moboard-devboard.c b/arch/arm/mach-mx3/mx31moboard-devboard.c
index b3e8f25..4704405 100644
--- a/arch/arm/mach-mx3/mx31moboard-devboard.c
+++ b/arch/arm/mach-mx3/mx31moboard-devboard.c
@@ -16,6 +16,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/fsl_devices.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -39,6 +40,18 @@ static unsigned int devboard_pins[] = {
 	MX31_PIN_PC_READY__SD2_DATA1, MX31_PIN_PC_WAIT_B__SD2_DATA0,
 	MX31_PIN_PC_CD2_B__SD2_CLK, MX31_PIN_PC_CD1_B__SD2_CMD,
 	MX31_PIN_ATA_DIOR__GPIO3_28, MX31_PIN_ATA_DIOW__GPIO3_29,
+	/* USB OTG */
+	MX31_PIN_USBOTG_DATA0__USBOTG_DATA0,
+	MX31_PIN_USBOTG_DATA1__USBOTG_DATA1,
+	MX31_PIN_USBOTG_DATA2__USBOTG_DATA2,
+	MX31_PIN_USBOTG_DATA3__USBOTG_DATA3,
+	MX31_PIN_USBOTG_DATA4__USBOTG_DATA4,
+	MX31_PIN_USBOTG_DATA5__USBOTG_DATA5,
+	MX31_PIN_USBOTG_DATA6__USBOTG_DATA6,
+	MX31_PIN_USBOTG_DATA7__USBOTG_DATA7,
+	MX31_PIN_USBOTG_CLK__USBOTG_CLK, MX31_PIN_USBOTG_DIR__USBOTG_DIR,
+	MX31_PIN_USBOTG_NXT__USBOTG_NXT, MX31_PIN_USBOTG_STP__USBOTG_STP,
+	MX31_PIN_USB_OC__GPIO1_30,
 };
 
 static struct imxuart_platform_data uart_pdata = {
@@ -98,6 +111,33 @@ static struct imxmmc_platform_data sdhc2_pdata = {
 	.exit	= devboard_sdhc2_exit,
 };
 
+static struct fsl_usb2_platform_data usb_pdata = {
+	.operating_mode	= FSL_USB2_DR_DEVICE,
+	.phy_mode	= FSL_USB2_PHY_ULPI,
+};
+
+#define OTG_PAD_CFG (PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST)
+#define OTG_EN_B IOMUX_TO_GPIO(MX31_PIN_USB_OC)
+
+static void devboard_usbotg_init(void)
+{
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA0, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA1, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA2, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA3, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA4, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA5, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA6, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA7, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_CLK, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DIR, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_NXT, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_STP, OTG_PAD_CFG);
+
+	gpio_request(OTG_EN_B, "usb-udc-en");
+	gpio_direction_output(OTG_EN_B, 0);
+}
+
 /*
  * system init for baseboard usage. Will be called by mx31moboard init.
  */
@@ -111,4 +151,7 @@ void __init mx31moboard_devboard_init(void)
 	mxc_register_device(&mxc_uart_device1, &uart_pdata);
 
 	mxc_register_device(&mxcsdhc_device1, &sdhc2_pdata);
+
+	devboard_usbotg_init();
+	mxc_register_device(&mxc_otg_udc_device, &usb_pdata);
 }
diff --git a/arch/arm/mach-mx3/mx31moboard-marxbot.c b/arch/arm/mach-mx3/mx31moboard-marxbot.c
index 53ce7ff..641c3d6 100644
--- a/arch/arm/mach-mx3/mx31moboard-marxbot.c
+++ b/arch/arm/mach-mx3/mx31moboard-marxbot.c
@@ -16,6 +16,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/fsl_devices.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -47,6 +48,18 @@ static unsigned int marxbot_pins[] = {
 	MX31_PIN_CSI_PIXCLK__CSI_PIXCLK, MX31_PIN_CSI_VSYNC__CSI_VSYNC,
 	MX31_PIN_GPIO3_0__GPIO3_0, MX31_PIN_GPIO3_1__GPIO3_1,
 	MX31_PIN_TXD2__GPIO1_28,
+	/* USB OTG */
+	MX31_PIN_USBOTG_DATA0__USBOTG_DATA0,
+	MX31_PIN_USBOTG_DATA1__USBOTG_DATA1,
+	MX31_PIN_USBOTG_DATA2__USBOTG_DATA2,
+	MX31_PIN_USBOTG_DATA3__USBOTG_DATA3,
+	MX31_PIN_USBOTG_DATA4__USBOTG_DATA4,
+	MX31_PIN_USBOTG_DATA5__USBOTG_DATA5,
+	MX31_PIN_USBOTG_DATA6__USBOTG_DATA6,
+	MX31_PIN_USBOTG_DATA7__USBOTG_DATA7,
+	MX31_PIN_USBOTG_CLK__USBOTG_CLK, MX31_PIN_USBOTG_DIR__USBOTG_DIR,
+	MX31_PIN_USBOTG_NXT__USBOTG_NXT, MX31_PIN_USBOTG_STP__USBOTG_STP,
+	MX31_PIN_USB_OC__GPIO1_30,
 };
 
 #define SDHC2_CD IOMUX_TO_GPIO(MX31_PIN_ATA_DIOR)
@@ -102,6 +115,33 @@ static struct imxmmc_platform_data sdhc2_pdata = {
 	.exit	= marxbot_sdhc2_exit,
 };
 
+static struct fsl_usb2_platform_data usb_pdata = {
+	.operating_mode	= FSL_USB2_DR_DEVICE,
+	.phy_mode	= FSL_USB2_PHY_ULPI,
+};
+
+#define OTG_PAD_CFG (PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST)
+#define OTG_EN_B IOMUX_TO_GPIO(MX31_PIN_USB_OC)
+
+static void marxbot_usbotg_init(void)
+{
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA0, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA1, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA2, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA3, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA4, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA5, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA6, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA7, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_CLK, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_DIR, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_NXT, OTG_PAD_CFG);
+	mxc_iomux_set_pad(MX31_PIN_USBOTG_STP, OTG_PAD_CFG);
+
+	gpio_request(OTG_EN_B, "usb-udc-en");
+	gpio_direction_output(OTG_EN_B, 0);
+}
+
 /*
  * system init for baseboard usage. Will be called by mx31moboard init.
  */
@@ -113,4 +153,7 @@ void __init mx31moboard_marxbot_init(void)
 		"marxbot");
 
 	mxc_register_device(&mxcsdhc_device1, &sdhc2_pdata);
+
+	marxbot_usbotg_init();
+	mxc_register_device(&mxc_otg_udc_device, &usb_pdata);
 }
diff --git a/arch/arm/mach-mx3/mx31moboard.c b/arch/arm/mach-mx3/mx31moboard.c
index 0df6ac6..a17f2e4 100644
--- a/arch/arm/mach-mx3/mx31moboard.c
+++ b/arch/arm/mach-mx3/mx31moboard.c
@@ -55,18 +55,6 @@ static unsigned int moboard_pins[] = {
 	MX31_PIN_SD1_DATA1__SD1_DATA1, MX31_PIN_SD1_DATA0__SD1_DATA0,
 	MX31_PIN_SD1_CLK__SD1_CLK, MX31_PIN_SD1_CMD__SD1_CMD,
 	MX31_PIN_ATA_CS0__GPIO3_26, MX31_PIN_ATA_CS1__GPIO3_27,
-	/* USB OTG */
-	MX31_PIN_USBOTG_DATA0__USBOTG_DATA0,
-	MX31_PIN_USBOTG_DATA1__USBOTG_DATA1,
-	MX31_PIN_USBOTG_DATA2__USBOTG_DATA2,
-	MX31_PIN_USBOTG_DATA3__USBOTG_DATA3,
-	MX31_PIN_USBOTG_DATA4__USBOTG_DATA4,
-	MX31_PIN_USBOTG_DATA5__USBOTG_DATA5,
-	MX31_PIN_USBOTG_DATA6__USBOTG_DATA6,
-	MX31_PIN_USBOTG_DATA7__USBOTG_DATA7,
-	MX31_PIN_USBOTG_CLK__USBOTG_CLK, MX31_PIN_USBOTG_DIR__USBOTG_DIR,
-	MX31_PIN_USBOTG_NXT__USBOTG_NXT, MX31_PIN_USBOTG_STP__USBOTG_STP,
-	MX31_PIN_USB_OC__GPIO1_30,
 };
 
 static struct physmap_flash_data mx31moboard_flash_data = {
-- 
cgit v0.10.2


From 9c70e227e00fcc3b210b11c96652ddcc0b6194f3 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Thu, 7 May 2009 11:51:42 +0200
Subject: mx31: add camera device

This adds the camera device based on Guennadi's soc_camera architecture
for the mx31.

The proposed init by Guennadi was removed and must be implemented in the
board init file as it contains a lot of board specific info (memory
size, clock speed).

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Reviewed-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/devices.c b/arch/arm/mach-mx3/devices.c
index f55c986..d927edd 100644
--- a/arch/arm/mach-mx3/devices.c
+++ b/arch/arm/mach-mx3/devices.c
@@ -22,10 +22,12 @@
 #include <linux/platform_device.h>
 #include <linux/serial.h>
 #include <linux/gpio.h>
+#include <linux/dma-mapping.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/common.h>
 #include <mach/imx-uart.h>
+#include <mach/mx3_camera.h>
 
 #include "devices.h"
 
@@ -346,10 +348,28 @@ struct platform_device mx3_fb = {
 	.num_resources	= ARRAY_SIZE(fb_resources),
 	.resource	= fb_resources,
 	.dev		= {
-		.coherent_dma_mask = 0xffffffff,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
        },
 };
 
+static struct resource camera_resources[] = {
+	{
+		.start	= IPU_CTRL_BASE_ADDR + 0x60,
+		.end	= IPU_CTRL_BASE_ADDR + 0x87,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+struct platform_device mx3_camera = {
+	.name		= "mx3-camera",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(camera_resources),
+	.resource	= camera_resources,
+	.dev		= {
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+};
+
 static struct resource otg_resources[] = {
 	{
 		.start	= OTG_BASE_ADDR,
diff --git a/arch/arm/mach-mx3/devices.h b/arch/arm/mach-mx3/devices.h
index 27779c3..475410a 100644
--- a/arch/arm/mach-mx3/devices.h
+++ b/arch/arm/mach-mx3/devices.h
@@ -11,6 +11,7 @@ extern struct platform_device mxc_i2c_device1;
 extern struct platform_device mxc_i2c_device2;
 extern struct platform_device mx3_ipu;
 extern struct platform_device mx3_fb;
+extern struct platform_device mx3_camera;
 extern struct platform_device mxc_fec_device;
 extern struct platform_device mxcsdhc_device0;
 extern struct platform_device mxcsdhc_device1;
-- 
cgit v0.10.2


From c2e5307b902426247afa48d3f1ed4fa5409dcb49 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Wed, 6 May 2009 11:54:48 +0200
Subject: mx31: changed CONSISTENT_DMA_SIZE to 8M for mx31 video

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Acked-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/plat-mxc/include/mach/memory.h b/arch/arm/plat-mxc/include/mach/memory.h
index eca37d09f..6065e00 100644
--- a/arch/arm/plat-mxc/include/mach/memory.h
+++ b/arch/arm/plat-mxc/include/mach/memory.h
@@ -32,4 +32,12 @@
 #define CONSISTENT_DMA_SIZE SZ_4M
 #endif /* CONFIG_MX1_VIDEO */
 
+#if defined(CONFIG_MX3_VIDEO)
+/*
+ * Increase size of DMA-consistent memory region.
+ * This is required for mx3 camera driver to capture at least two QXGA frames.
+ */
+#define CONSISTENT_DMA_SIZE SZ_8M
+#endif /* CONFIG_MX3_VIDEO */
+
 #endif /* __ASM_ARCH_MXC_MEMORY_H__ */
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 9d48da2..57835f5 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -758,10 +758,14 @@ config VIDEO_MX1
 	---help---
 	  This is a v4l2 driver for the i.MX1/i.MXL CMOS Sensor Interface
 
+config MX3_VIDEO
+	bool
+
 config VIDEO_MX3
 	tristate "i.MX3x Camera Sensor Interface driver"
 	depends on VIDEO_DEV && MX3_IPU && SOC_CAMERA
 	select VIDEOBUF_DMA_CONTIG
+	select MX3_VIDEO
 	---help---
 	  This is a v4l2 driver for the i.MX3x Camera Sensor Interface
 
-- 
cgit v0.10.2


From 3679e38a832360d6d96efb4721742d81195594a0 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Fri, 1 May 2009 15:15:44 +0100
Subject: [ARM] 5493/1: Add w90p910 IC interface controller regester map

Add w90p910 IC other interface controller regester map
for driver will be submitted.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/include/mach/map.h b/arch/arm/mach-w90x900/include/mach/map.h
index 79320eb..1a20953 100644
--- a/arch/arm/mach-w90x900/include/mach/map.h
+++ b/arch/arm/mach-w90x900/include/mach/map.h
@@ -1,8 +1,7 @@
 /*
  * arch/arm/mach-w90x900/include/mach/map.h
  *
- * Copyright (c) 2008 Nuvoton technology corporation
- * All rights reserved.
+ * Copyright (c) 2008 Nuvoton technology corporation.
  *
  * Wan ZongShun <mcuos.com@gmail.com>
  *
@@ -10,8 +9,7 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * the Free Software Foundation;version 2 of the License.
  *
  */
 
@@ -34,7 +32,6 @@
  * interrupt controller is the first thing we put in, to make
  * the assembly code for the irq detection easier
  */
-
 #define W90X900_VA_IRQ		W90X900_ADDR(0x00000000)
 #define W90X900_PA_IRQ		(0xB8002000)
 #define W90X900_SZ_IRQ		SZ_4K
@@ -44,33 +41,117 @@
 #define W90X900_SZ_GCR		SZ_4K
 
 /* Clock and Power management */
-
 #define W90X900_VA_CLKPWR	(W90X900_VA_GCR+0x200)
 #define W90X900_PA_CLKPWR	(0xB0000200)
 #define W90X900_SZ_CLKPWR	SZ_4K
 
 /* EBI management */
-
 #define W90X900_VA_EBI		W90X900_ADDR(0x00001000)
 #define W90X900_PA_EBI		(0xB0001000)
 #define W90X900_SZ_EBI		SZ_4K
 
 /* UARTs */
-
 #define W90X900_VA_UART		W90X900_ADDR(0x08000000)
 #define W90X900_PA_UART		(0xB8000000)
 #define W90X900_SZ_UART		SZ_4K
 
 /* Timers */
-
 #define W90X900_VA_TIMER	W90X900_ADDR(0x08001000)
 #define W90X900_PA_TIMER	(0xB8001000)
 #define W90X900_SZ_TIMER	SZ_4K
 
 /* GPIO ports */
-
 #define W90X900_VA_GPIO		W90X900_ADDR(0x08003000)
 #define W90X900_PA_GPIO		(0xB8003000)
 #define W90X900_SZ_GPIO		SZ_4K
 
+/* GDMA control */
+#define W90X900_VA_GDMA		W90X900_ADDR(0x00004000)
+#define W90X900_PA_GDMA		(0xB0004000)
+#define W90X900_SZ_GDMA		SZ_4K
+
+/* USB host controller*/
+#define W90X900_VA_USBEHCIHOST	W90X900_ADDR(0x00005000)
+#define W90X900_PA_USBEHCIHOST	(0xB0005000)
+#define W90X900_SZ_USBEHCIHOST	SZ_4K
+
+#define W90X900_VA_USBOHCIHOST	W90X900_ADDR(0x00007000)
+#define W90X900_PA_USBOHCIHOST	(0xB0007000)
+#define W90X900_SZ_USBOHCIHOST	SZ_4K
+
+/* I2C hardware controller */
+#define W90X900_VA_I2C		W90X900_ADDR(0x08006000)
+#define W90X900_PA_I2C		(0xB8006000)
+#define W90X900_SZ_I2C		SZ_4K
+
+/* Keypad Interface*/
+#define W90X900_VA_KPI		W90X900_ADDR(0x08008000)
+#define W90X900_PA_KPI		(0xB8008000)
+#define W90X900_SZ_KPI		SZ_4K
+
+/* Smart card host*/
+#define W90X900_VA_SC		W90X900_ADDR(0x08005000)
+#define W90X900_PA_SC		(0xB8005000)
+#define W90X900_SZ_SC		SZ_4K
+
+/* LCD controller*/
+#define W90X900_VA_LCD		W90X900_ADDR(0x00008000)
+#define W90X900_PA_LCD		(0xB0008000)
+#define W90X900_SZ_LCD		SZ_4K
+
+/* 2D controller*/
+#define W90X900_VA_GE		W90X900_ADDR(0x0000B000)
+#define W90X900_PA_GE		(0xB000B000)
+#define W90X900_SZ_GE		SZ_4K
+
+/* ATAPI */
+#define W90X900_VA_ATAPI	W90X900_ADDR(0x0000A000)
+#define W90X900_PA_ATAPI	(0xB000A000)
+#define W90X900_SZ_ATAPI	SZ_4K
+
+/* ADC */
+#define W90X900_VA_ADC		W90X900_ADDR(0x0800A000)
+#define W90X900_PA_ADC		(0xB800A000)
+#define W90X900_SZ_ADC		SZ_4K
+
+/* PS2 Interface*/
+#define W90X900_VA_PS2		W90X900_ADDR(0x08009000)
+#define W90X900_PA_PS2		(0xB8009000)
+#define W90X900_SZ_PS2		SZ_4K
+
+/* RTC */
+#define W90X900_VA_RTC		W90X900_ADDR(0x08004000)
+#define W90X900_PA_RTC		(0xB8004000)
+#define W90X900_SZ_RTC		SZ_4K
+
+/* Pulse Width Modulation(PWM) Registers */
+#define W90X900_VA_PWM		W90X900_ADDR(0x08007000)
+#define W90X900_PA_PWM		(0xB8007000)
+#define W90X900_SZ_PWM		SZ_4K
+
+/* Audio Controller controller */
+#define W90X900_VA_ACTL		W90X900_ADDR(0x00009000)
+#define W90X900_PA_ACTL		(0xB0009000)
+#define W90X900_SZ_ACTL		SZ_4K
+
+/* DMA controller */
+#define W90X900_VA_DMA		W90X900_ADDR(0x0000c000)
+#define W90X900_PA_DMA		(0xB000c000)
+#define W90X900_SZ_DMA		SZ_4K
+
+/* FMI controller */
+#define W90X900_VA_FMI		W90X900_ADDR(0x0000d000)
+#define W90X900_PA_FMI		(0xB000d000)
+#define W90X900_SZ_FMI		SZ_4K
+
+/* USB Device port */
+#define W90X900_VA_USBDEV	W90X900_ADDR(0x00006000)
+#define W90X900_PA_USBDEV	(0xB0006000)
+#define W90X900_SZ_USBDEV	SZ_4K
+
+/* External MAC control*/
+#define W90X900_VA_EMC		W90X900_ADDR(0x00003000)
+#define W90X900_PA_EMC		(0xB0003000)
+#define W90X900_SZ_EMC		SZ_4K
+
 #endif /* __ASM_ARCH_MAP_H */
-- 
cgit v0.10.2


From 7f7810e2db70589c6695f9994c10258b09b1f93c Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Fri, 1 May 2009 15:18:11 +0100
Subject: [ARM] 5494/1: Add w90p910 irq number define

Add irq number define for driver will be submitted.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/include/mach/irqs.h b/arch/arm/mach-w90x900/include/mach/irqs.h
index 1c583f9..9d5cba3 100644
--- a/arch/arm/mach-w90x900/include/mach/irqs.h
+++ b/arch/arm/mach-w90x900/include/mach/irqs.h
@@ -1,8 +1,7 @@
 /*
  * arch/arm/mach-w90x900/include/mach/irqs.h
  *
- * Copyright (c) 2008 Nuvoton technology corporation
- * All rights reserved.
+ * Copyright (c) 2008 Nuvoton technology corporation.
  *
  * Wan ZongShun <mcuos.com@gmail.com>
  *
@@ -10,8 +9,7 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * the Free Software Foundation;version 2 of the License.
  *
  */
 
@@ -31,6 +29,11 @@
 /* Main cpu interrupts */
 
 #define IRQ_WDT		W90X900_IRQ(1)
+#define IRQ_GROUP0	W90X900_IRQ(2)
+#define IRQ_GROUP1	W90X900_IRQ(3)
+#define IRQ_ACTL	W90X900_IRQ(4)
+#define IRQ_LCD		W90X900_IRQ(5)
+#define IRQ_RTC		W90X900_IRQ(6)
 #define IRQ_UART0	W90X900_IRQ(7)
 #define IRQ_UART1	W90X900_IRQ(8)
 #define IRQ_UART2	W90X900_IRQ(9)
@@ -39,7 +42,45 @@
 #define IRQ_TIMER0	W90X900_IRQ(12)
 #define IRQ_TIMER1	W90X900_IRQ(13)
 #define IRQ_T_INT_GROUP	W90X900_IRQ(14)
+#define IRQ_USBH	W90X900_IRQ(15)
+#define IRQ_EMCTX	W90X900_IRQ(16)
+#define IRQ_EMCRX	W90X900_IRQ(17)
+#define IRQ_GDMAGROUP	W90X900_IRQ(18)
+#define IRQ_DMAC	W90X900_IRQ(19)
+#define IRQ_FMI		W90X900_IRQ(20)
+#define IRQ_USBD	W90X900_IRQ(21)
+#define IRQ_ATAPI	W90X900_IRQ(22)
+#define IRQ_G2D		W90X900_IRQ(23)
+#define IRQ_PCI		W90X900_IRQ(24)
+#define IRQ_SCGROUP	W90X900_IRQ(25)
+#define IRQ_I2CGROUP	W90X900_IRQ(26)
+#define IRQ_SSP		W90X900_IRQ(27)
+#define IRQ_PWM		W90X900_IRQ(28)
+#define IRQ_KPI		W90X900_IRQ(29)
+#define IRQ_P2SGROUP	W90X900_IRQ(30)
 #define IRQ_ADC		W90X900_IRQ(31)
 #define NR_IRQS		(IRQ_ADC+1)
 
+/*for irq group*/
+
+#define	IRQ_PS2_PORT0	0x10000000
+#define	IRQ_PS2_PORT1	0x20000000
+#define	IRQ_I2C_LINE0	0x04000000
+#define	IRQ_I2C_LINE1	0x08000000
+#define	IRQ_SC_CARD0	0x01000000
+#define	IRQ_SC_CARD1	0x02000000
+#define	IRQ_GDMA_CH0	0x00100000
+#define	IRQ_GDMA_CH1	0x00200000
+#define	IRQ_TIMER2	0x00010000
+#define	IRQ_TIMER3	0x00020000
+#define	IRQ_TIMER4	0x00040000
+#define	IRQ_GROUP0_IRQ0	0x00000001
+#define	IRQ_GROUP0_IRQ1	0x00000002
+#define	IRQ_GROUP0_IRQ2	0x00000004
+#define	IRQ_GROUP0_IRQ3	0x00000008
+#define	IRQ_GROUP1_IRQ4	0x00000010
+#define	IRQ_GROUP1_IRQ5	0x00000020
+#define	IRQ_GROUP1_IRQ6	0x00000040
+#define	IRQ_GROUP1_IRQ7	0x00000080
+
 #endif /* __ASM_ARCH_IRQ_H */
-- 
cgit v0.10.2


From 177dd6bb8c70885bfe3c269bf21dab1c4aeaf5c3 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Fri, 1 May 2009 16:11:46 +0100
Subject: [ARM] 5495/1: Add w90p910 usb host driver relevant kernel parts[1/2]

Add this usb host driver relevant kernel parts.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/mach-w90p910evb.c b/arch/arm/mach-w90x900/mach-w90p910evb.c
index 726ff67..578a509 100644
--- a/arch/arm/mach-w90x900/mach-w90p910evb.c
+++ b/arch/arm/mach-w90x900/mach-w90p910evb.c
@@ -3,15 +3,13 @@
  *
  * Based on mach-s3c2410/mach-smdk2410.c by Jonas Dietsche
  *
- * Copyright (C) 2008 Nuvoton technology corporation
- * All rights reserved.
+ * Copyright (C) 2008 Nuvoton technology corporation.
  *
  * Wan ZongShun <mcuos.com@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
+ * published by the Free Software Foundation;version 2 of the License.
  *
  */
 
@@ -80,6 +78,63 @@ static struct platform_device w90p910_flash_device = {
 	.num_resources	=	ARRAY_SIZE(w90p910_flash_resources),
 };
 
+/* USB EHCI Host Controller */
+
+static struct resource w90x900_usb_ehci_resource[] = {
+	[0] = {
+		.start = W90X900_PA_USBEHCIHOST,
+		.end   = W90X900_PA_USBEHCIHOST + W90X900_SZ_USBEHCIHOST - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_USBH,
+		.end   = IRQ_USBH,
+		.flags = IORESOURCE_IRQ,
+	}
+};
+
+static u64 w90x900_device_usb_ehci_dmamask = 0xffffffffUL;
+
+struct platform_device w90x900_device_usb_ehci = {
+	.name		  = "w90x900-ehci",
+	.id		  = -1,
+	.num_resources	  = ARRAY_SIZE(w90x900_usb_ehci_resource),
+	.resource	  = w90x900_usb_ehci_resource,
+	.dev              = {
+		.dma_mask = &w90x900_device_usb_ehci_dmamask,
+		.coherent_dma_mask = 0xffffffffUL
+	}
+};
+EXPORT_SYMBOL(w90x900_device_usb_ehci);
+
+/* USB OHCI Host Controller */
+
+static struct resource w90x900_usb_ohci_resource[] = {
+	[0] = {
+		.start = W90X900_PA_USBOHCIHOST,
+		.end   = W90X900_PA_USBOHCIHOST + W90X900_SZ_USBOHCIHOST - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_USBH,
+		.end   = IRQ_USBH,
+		.flags = IORESOURCE_IRQ,
+	}
+};
+
+static u64 w90x900_device_usb_ohci_dmamask = 0xffffffffUL;
+struct platform_device w90x900_device_usb_ohci = {
+	.name		  = "w90x900-ohci",
+	.id		  = -1,
+	.num_resources	  = ARRAY_SIZE(w90x900_usb_ohci_resource),
+	.resource	  = w90x900_usb_ohci_resource,
+	.dev              = {
+		.dma_mask = &w90x900_device_usb_ohci_dmamask,
+		.coherent_dma_mask = 0xffffffffUL
+	}
+};
+EXPORT_SYMBOL(w90x900_device_usb_ohci);
+
 static struct map_desc w90p910_iodesc[] __initdata = {
 };
 
@@ -88,6 +143,8 @@ static struct map_desc w90p910_iodesc[] __initdata = {
 static struct platform_device *w90p910evb_dev[] __initdata = {
 	&w90p910_serial_device,
 	&w90p910_flash_device,
+	&w90x900_device_usb_ehci,
+	&w90x900_device_usb_ohci,
 };
 
 static void __init w90p910evb_map_io(void)
-- 
cgit v0.10.2


From 432818f08fa5cf983e13b0ac32ec59b0951882b9 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Fri, 1 May 2009 16:13:05 +0100
Subject: [ARM] 5496/1: Add w90p910 touch screen driver relevant kernel
 parts[2/2].

Add this touch screen driver relevant kernel parts.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/mach-w90p910evb.c b/arch/arm/mach-w90x900/mach-w90p910evb.c
index 578a509..26de71f 100644
--- a/arch/arm/mach-w90x900/mach-w90p910evb.c
+++ b/arch/arm/mach-w90x900/mach-w90p910evb.c
@@ -135,6 +135,29 @@ struct platform_device w90x900_device_usb_ohci = {
 };
 EXPORT_SYMBOL(w90x900_device_usb_ohci);
 
+/*TouchScreen controller*/
+
+static struct resource w90x900_ts_resource[] = {
+	[0] = {
+		.start = W90X900_PA_ADC,
+		.end   = W90X900_PA_ADC + W90X900_SZ_ADC-1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_ADC,
+		.end   = IRQ_ADC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device w90x900_device_ts = {
+	.name		= "w90x900-ts",
+	.id		= -1,
+	.resource	= w90x900_ts_resource,
+	.num_resources	= ARRAY_SIZE(w90x900_ts_resource),
+};
+EXPORT_SYMBOL(w90x900_device_ts);
+
 static struct map_desc w90p910_iodesc[] __initdata = {
 };
 
@@ -145,6 +168,7 @@ static struct platform_device *w90p910evb_dev[] __initdata = {
 	&w90p910_flash_device,
 	&w90x900_device_usb_ehci,
 	&w90x900_device_usb_ohci,
+	&w90x900_device_ts,
 };
 
 static void __init w90p910evb_map_io(void)
-- 
cgit v0.10.2


From 604f766f5f2fd4791ce898cd04d1f0dcb32b94a7 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Fri, 1 May 2009 16:14:54 +0100
Subject: [ARM] 5497/1: Add usb and ts relevant kernel maping option

Add drivers relevant kernel maping option.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/w90p910.c b/arch/arm/mach-w90x900/w90p910.c
index 2bcbaa6..d2e4a16 100644
--- a/arch/arm/mach-w90x900/w90p910.c
+++ b/arch/arm/mach-w90x900/w90p910.c
@@ -3,8 +3,7 @@
  *
  * Based on linux/arch/arm/plat-s3c24xx/s3c244x.c by Ben Dooks
  *
- * Copyright (c) 2008 Nuvoton technology corporation
- * All rights reserved.
+ * Copyright (c) 2008 Nuvoton technology corporation.
  *
  * Wan ZongShun <mcuos.com@gmail.com>
  *
@@ -12,8 +11,7 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * the Free Software Foundation;version 2 of the License.
  *
  */
 
@@ -45,6 +43,9 @@ static struct map_desc w90p910_iodesc[] __initdata = {
 	IODESC_ENT(UART),
 	IODESC_ENT(TIMER),
 	IODESC_ENT(EBI),
+	IODESC_ENT(USBEHCIHOST),
+	IODESC_ENT(USBOHCIHOST),
+	IODESC_ENT(ADC),
 	/*IODESC_ENT(LCD),*/
 };
 
-- 
cgit v0.10.2


From 4e3f48e590330a1fd880ab96e52bc095e94c25e3 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Sat, 2 May 2009 15:39:09 +0100
Subject: [ARM] 5498/1: w90p910 Clock register controller header file dfine

Add Clock register controller header file dfine.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/include/mach/regs-clock.h b/arch/arm/mach-w90x900/include/mach/regs-clock.h
new file mode 100644
index 0000000..f10b6a8
--- /dev/null
+++ b/arch/arm/mach-w90x900/include/mach/regs-clock.h
@@ -0,0 +1,31 @@
+/*
+ * arch/arm/mach-w90x900/include/mach/regs-clock.h
+ *
+ * Copyright (c) 2008 Nuvoton technology corporation.
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;version 2 of the License.
+ *
+ */
+
+#ifndef __ASM_ARCH_REGS_CLOCK_H
+#define __ASM_ARCH_REGS_CLOCK_H
+
+/* Clock Control Registers  */
+#define CLK_BA		W90X900_VA_CLKPWR
+#define REG_CLKEN	(CLK_BA + 0x00)
+#define REG_CLKSEL	(CLK_BA + 0x04)
+#define REG_CLKDIV	(CLK_BA + 0x08)
+#define REG_PLLCON0	(CLK_BA + 0x0C)
+#define REG_PLLCON1	(CLK_BA + 0x10)
+#define REG_PMCON	(CLK_BA + 0x14)
+#define REG_IRQWAKECON	(CLK_BA + 0x18)
+#define REG_IRQWAKEFLAG	(CLK_BA + 0x1C)
+#define REG_IPSRST	(CLK_BA + 0x20)
+#define REG_CLKEN1	(CLK_BA + 0x24)
+#define REG_CLKDIV1	(CLK_BA + 0x28)
+
+#endif /*  __ASM_ARCH_REGS_CLOCK_H */
-- 
cgit v0.10.2


From 21dbe15f7dd0f0511bc0178d142213c3fadfcb99 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Sat, 2 May 2009 15:41:14 +0100
Subject: [ARM] 5499/1: Add Usb register controller header file dfine

Add Usb register controller header file dfine.
w90p910 usb ip is standard,but some mutifunction
controll pin must be special define in w90p910

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/include/mach/regs-usb.h b/arch/arm/mach-w90x900/include/mach/regs-usb.h
new file mode 100644
index 0000000..ab74b0c
--- /dev/null
+++ b/arch/arm/mach-w90x900/include/mach/regs-usb.h
@@ -0,0 +1,35 @@
+/*
+ * arch/arm/mach-w90x900/include/mach/regs-usb.h
+ *
+ * Copyright (c) 2008 Nuvoton technology corporation.
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;version 2 of the License.
+ *
+ */
+
+#ifndef __ASM_ARCH_REGS_USB_H
+#define __ASM_ARCH_REGS_USB_H
+
+/* usb Control Registers  */
+#define USBH_BA		W90X900_VA_USBEHCIHOST
+#define USBD_BA		W90X900_VA_USBDEV
+#define USBO_BA		W90X900_VA_USBOHCIHOST
+
+/* USB Host Control Registers */
+#define REG_UPSCR0	(USBH_BA+0x064)
+#define REG_UPSCR1	(USBH_BA+0x068)
+#define REG_USBPCR0	(USBH_BA+0x0C4)
+#define REG_USBPCR1	(USBH_BA+0x0C8)
+
+/* USBH OHCI Control Registers */
+#define REG_OpModEn	(USBO_BA+0x204)
+/*This bit controls the polarity of over
+*current flag from external power IC.
+*/
+#define OCALow		0x08
+
+#endif /*  __ASM_ARCH_REGS_USB_H */
-- 
cgit v0.10.2


From b4380b8e5888e5ef5872e43b610c9dac4bf253ac Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Sun, 3 May 2009 16:49:26 +0100
Subject: [ARM] 5501/1: Freescale STMP: fix compilation warning

To avoid compile-time warning, added parameter to the stmp3xxx_clock_read

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c
index c916068..7d872f0 100644
--- a/arch/arm/plat-stmp3xxx/timer.c
+++ b/arch/arm/plat-stmp3xxx/timer.c
@@ -45,7 +45,7 @@ stmp3xxx_timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static cycle_t stmp3xxx_clock_read(void)
+static cycle_t stmp3xxx_clock_read(struct clocksource *cs)
 {
 	return ~((HW_TIMROT_TIMCOUNTn_RD(1) & 0xFFFF0000) >> 16);
 }
-- 
cgit v0.10.2


From 08ce4c91e44d51bb6c946f2756825a462d53c545 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 7 Apr 2009 23:40:39 +0200
Subject: dlm: Make name input parameter of {,dlm_}new_lockspace() const

| fs/gfs2/lock_dlm.c:207: warning: passing argument 1 of 'dlm_new_lockspace' discards qualifiers from pointer target type

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index cd8e2df..82528d9 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -384,7 +384,7 @@ static void threads_stop(void)
 	dlm_astd_stop();
 }
 
-static int new_lockspace(char *name, int namelen, void **lockspace,
+static int new_lockspace(const char *name, int namelen, void **lockspace,
 			 uint32_t flags, int lvblen)
 {
 	struct dlm_ls *ls;
@@ -614,7 +614,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	return error;
 }
 
-int dlm_new_lockspace(char *name, int namelen, void **lockspace,
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 		      uint32_t flags, int lvblen)
 {
 	int error = 0;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index b9cd386..0b3518c 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -81,8 +81,8 @@ struct dlm_lksb {
  * the cluster, the calling node joins it.
  */
 
-int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace,
-		      uint32_t flags, int lvblen);
+int dlm_new_lockspace(const char *name, int namelen,
+		      dlm_lockspace_t **lockspace, uint32_t flags, int lvblen);
 
 /*
  * dlm_release_lockspace
-- 
cgit v0.10.2


From 8511a2728ab82cab398e39d019f5cf1246021c1c Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 8 Apr 2009 15:38:43 -0500
Subject: dlm: fix use count with multiple joins

When a lockspace was joined multiple times, the global dlm
use count was incremented when it should not have been.  This
caused the global dlm threads to not be stopped when all
lockspaces were eventually be removed.

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 82528d9..d489fcc 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -419,16 +419,14 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 			break;
 		}
 		ls->ls_create_count++;
-		module_put(THIS_MODULE);
-		error = 1; /* not an error, return 0 */
+		*lockspace = ls;
+		error = 1;
 		break;
 	}
 	spin_unlock(&lslist_lock);
 
-	if (error < 0)
-		goto out;
 	if (error)
-		goto ret_zero;
+		goto out;
 
 	error = -ENOMEM;
 
@@ -583,7 +581,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	dlm_create_debug_file(ls);
 
 	log_debug(ls, "join complete");
- ret_zero:
 	*lockspace = ls;
 	return 0;
 
@@ -628,7 +625,9 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
 	if (!error)
 		ls_count++;
-	else if (!ls_count)
+	if (error > 0)
+		error = 0;
+	if (!ls_count)
 		threads_stop();
  out:
 	mutex_unlock(&ls_lock);
-- 
cgit v0.10.2


From 29c8000ee7da3a6756d26143991e573eaaf2a9f6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 May 2009 11:13:42 -0400
Subject: ring-buffer: remove complex calculations in ring-buffer-test

Ingo Molnar thought that the code to calculate the time in cond_resched
is a bit too ugly and is not needed. This patch removes it and replaces
it with a simple call to cond_resched. I kept the comment that explains
the reason for the cond_resched.

[ Impact: remove ugly code ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index a26fc67..f4ceb45 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -185,35 +185,6 @@ static void ring_buffer_consumer(void)
 	complete(&read_done);
 }
 
-/*
- * If we are a non preempt kernel, the 10 second run will
- * stop everything while it runs. Instead, we will call cond_resched
- * and also add any time that was lost by a rescedule.
- */
-#ifdef CONFIG_PREEMPT
-static void sched_if_needed(struct timeval *start_tv, struct timeval *end_tv)
-{
-}
-#else
-static void sched_if_needed(struct timeval *start_tv, struct timeval *end_tv)
-{
-	struct timeval tv;
-
-	cond_resched();
-	do_gettimeofday(&tv);
-	if (tv.tv_usec < end_tv->tv_usec) {
-		tv.tv_usec += 1000000;
-		tv.tv_sec--;
-	}
-	start_tv->tv_sec += tv.tv_sec - end_tv->tv_sec;
-	start_tv->tv_usec += tv.tv_usec - end_tv->tv_usec;
-	if (start_tv->tv_usec > 1000000) {
-		start_tv->tv_usec -= 1000000;
-		start_tv->tv_sec++;
-	}
-}
-#endif
-
 static void ring_buffer_producer(void)
 {
 	struct timeval start_tv;
@@ -250,7 +221,13 @@ static void ring_buffer_producer(void)
 		if (consumer && !(++cnt % wakeup_interval))
 			wake_up_process(consumer);
 
-		sched_if_needed(&start_tv, &end_tv);
+		/*
+		 * If we are a non preempt kernel, the 10 second run will
+		 * stop everything while it runs. Instead, we will call
+		 * cond_resched and also add any time that was lost by a
+		 * rescedule.
+		 */
+		cond_resched();
 
 	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
 	pr_info("End ring buffer hammer\n");
-- 
cgit v0.10.2


From 54f2c841fa0007e5fee3b7d01a911c774f0a6cda Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 7 May 2009 17:28:59 +0200
Subject: oprofile: fix cpu buffer size

The unit of oprofile_cpu_buffer_size is in samples, but was allocated
in bytes. This led to the allocation of too small cpu buffers. This
patch recalculates the buffer size in bytes taking also the
ring_buffer_event header size into account.

Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index f0e99d4..242257b 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -78,16 +78,20 @@ void free_cpu_buffers(void)
 	op_ring_buffer_write = NULL;
 }
 
+#define RB_EVENT_HDR_SIZE 4
+
 int alloc_cpu_buffers(void)
 {
 	int i;
 
 	unsigned long buffer_size = oprofile_cpu_buffer_size;
+	unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
+						 RB_EVENT_HDR_SIZE);
 
-	op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
+	op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
 	if (!op_ring_buffer_read)
 		goto fail;
-	op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
+	op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
 	if (!op_ring_buffer_write)
 		goto fail;
 
-- 
cgit v0.10.2


From d6bf81ef0f7474434c2a049e8bf3c9146a14dd96 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 May 2009 11:49:35 -0400
Subject: tracing: append ":*" to internal setting of system events

The system enabling of events uses the same code as the set_event file.
It passes in the name of the system to the parser and that will enable
all the events that has that system as a name.

The problem is that it will also enable events with the same name as the
system.

If you have system name foo, and system name bar, but within the system
bar, there exists an event called foo. By setting the system name foo,
you will also be enabling the event foo in the system bar. This is not
an expected result.

The solution is to pass in "foo:*", which will only enable the system
foo and not events called foo.

[ Impact: prevent accidental enabling of events with same name as a system ]

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 87feb01..8d0fae3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -509,9 +509,11 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		return -EINVAL;
 	}
 
-	command = kstrdup(system, GFP_KERNEL);
+	/* +3 for the ":*\0" */
+	command = kmalloc(strlen(system)+3, GFP_KERNEL);
 	if (!command)
 		return -ENOMEM;
+	sprintf(command, "%s:*", system);
 
 	ret = ftrace_set_clr_event(command, val);
 	if (ret)
@@ -1179,7 +1181,7 @@ static __init int event_trace_init(void)
 			  &ftrace_show_header_fops);
 
 	trace_create_file("enable", 0644, d_events,
-			  "*:*", &ftrace_system_enable_fops);
+			  "*", &ftrace_system_enable_fops);
 
 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
-- 
cgit v0.10.2


From 65b77242043f74bca6a0d733c0e48ef03a8c9893 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 May 2009 12:49:27 -0400
Subject: tracing: have menu default enabled when kernel debug is configured

Tracing can be very helpful to debug the kernel. When DEBUG_KERNEL is
enabled it is nice to enable the trace menu as well.

This patch only make the tracing menu enabled by default, it does not
make any of the tracers enabled. And the menu is only enabled by
default if DEBUG_KERNEL is enabled.

[ Impact: show tracing options to those debugging the kernel ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 50f62a2..f61be30 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -79,6 +79,7 @@ if TRACING_SUPPORT
 
 menuconfig FTRACE
 	bool "Tracers"
+	default y if DEBUG_KERNEL
 	help
 	 Enable the kernel tracing infrastructure.
 
-- 
cgit v0.10.2


From 0574ea421b90e0e45a72c447dd3c2c79ffd8c153 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 May 2009 14:20:28 -0400
Subject: ring-buffer: only periodically call cond_resched to
 ring-buffer-benchmark

Calling cond_resched at every iteration of the loop adds a bit of
overhead to the benchmark.

This patch does two things.

1) only calls cond-resched when CONFIG_PREEMPT is not enabled
2) only calls cond-resched after so many traces has been performed.

[ Impact: less overhead to the ring-buffer-benchmark ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index f4ceb45..a7c048b 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -218,16 +218,23 @@ static void ring_buffer_producer(void)
 		}
 		do_gettimeofday(&end_tv);
 
-		if (consumer && !(++cnt % wakeup_interval))
+		cnt++;
+		if (consumer && !(cnt % wakeup_interval))
 			wake_up_process(consumer);
 
+#ifndef CONFIG_PREEMPT
 		/*
 		 * If we are a non preempt kernel, the 10 second run will
 		 * stop everything while it runs. Instead, we will call
 		 * cond_resched and also add any time that was lost by a
 		 * rescedule.
+		 *
+		 * Do a cond resched at the same frequency we would wake up
+		 * the reader.
 		 */
-		cond_resched();
+		if (cnt % wakeup_interval)
+			cond_resched();
+#endif
 
 	} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
 	pr_info("End ring buffer hammer\n");
-- 
cgit v0.10.2


From d3584183d2f40f40371e288ceef187d04da213b5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 7 May 2009 15:36:13 -0700
Subject: sparc64: Fix SET_PERSONALITY to not clip bits outside of PER_MASK.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 425c2f9..d42e393 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -208,8 +208,9 @@ do {	unsigned long new_flags = current_thread_info()->flags; \
 	else						\
 		clear_thread_flag(TIF_ABI_PENDING);	\
 	/* flush_thread will update pgd cache */	\
-	if (current->personality != PER_LINUX32)	\
-		set_personality(PER_LINUX);		\
+	if (personality(current->personality) != PER_LINUX32)	\
+		set_personality(PER_LINUX |		\
+			(current->personality & (~PER_MASK)));	\
 } while (0)
 
 #endif /* !(__ASM_SPARC64_ELF_H) */
-- 
cgit v0.10.2


From 7da3046d6ce6ea97494020081c509b642b7016af Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 May 2009 19:52:20 -0400
Subject: ring-buffer: add total count in ring-buffer-benchmark

It is nice to see the overhead of the benchmark test when tracing is
disabled. That is, we turn off the ring buffer just to see what the
cost of running the loop that calls into the ring buffer is.

Currently, if no entries wer made, we get 0. This is not informative.
This patch changes it to check if we had any "missed" (non recorded)
events. If so, a total count is also reported.

[ Impact: evaluate the over head of the ring buffer benchmark test ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index a7c048b..a21aa7b 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -285,6 +285,17 @@ static void ring_buffer_producer(void)
 		avg = 1000000 / hit;
 		pr_info("%ld ns per entry\n", avg);
 	}
+
+
+	if (missed) {
+		if (time)
+			missed /= (long)time;
+
+		pr_info("Total iterations per millisec: %ld\n", hit + missed);
+
+		avg = 1000000 / (hit + missed);
+		pr_info("%ld ns per entry\n", avg);
+	}
 }
 
 static void wait_to_die(void)
-- 
cgit v0.10.2


From 74f4fd21664148b8c454cc07bfe74e4dd51cf07b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 7 May 2009 19:58:55 -0400
Subject: ring-buffer: change WARN_ON from checking preempt_count to
 preemptible

There's a WARN_ON in the ring buffer code that makes sure preemption
is disabled. It checks "!preempt_count()". But when CONFIG_PREEMPT is not
enabled, preempt_count() is always zero, and this will trigger the warning.

[ Impact: prevent false warning on non preemptible kernels ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3ae5ccf..3611706 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1688,7 +1688,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	 * committed yet. Thus we can assume that preemption
 	 * is still disabled.
 	 */
-	RB_WARN_ON(buffer, !preempt_count());
+	RB_WARN_ON(buffer, preemptible());
 
 	cpu = smp_processor_id();
 	cpu_buffer = buffer->buffers[cpu];
-- 
cgit v0.10.2


From 5dafc91fca9135a7b0acb76d9709f4abfad92d6e Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 7 May 2009 10:17:44 +0000
Subject: sh: sh7785 early scif fix

This patch moves the SH4 case of EARLY_SCIF_CONSOLE_PORT
so the SH7785 default value gets used. Without this patch
the value for SH7785 is set to 0xffe80000.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index fdb0493..fafa907 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -38,10 +38,10 @@ config EARLY_SCIF_CONSOLE_PORT
 	default "0xffe00000" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7763 || \
 				CPU_SUBTYPE_SH7722 || CPU_SUBTYPE_SH7366 || \
 				CPU_SUBTYPE_SH7343
-	default "0xffe80000" if CPU_SH4
 	default "0xffea0000" if CPU_SUBTYPE_SH7785
 	default "0xfffe8000" if CPU_SUBTYPE_SH7203
 	default "0xfffe9800" if CPU_SUBTYPE_SH7206 || CPU_SUBTYPE_SH7263
+	default "0xffe80000" if CPU_SH4
 	default "0x00000000"
 
 config EARLY_PRINTK
-- 
cgit v0.10.2


From b3cacf318172757783d8272fc333284dd4f50140 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 7 May 2009 10:31:39 +0000
Subject: sh: call clock framework init() callback once

Make sure that clk->ops->init() only gets called once in
the case of CLK_ALWAYS_ENABLED. Without this patch the
init() callback may be called multiple times.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 1dc8964..099373a 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -99,15 +99,18 @@ static int __clk_enable(struct clk *clk)
 	 * changes and the clock needs to hunt for the proper set of
 	 * divisors to use before it can effectively recalc.
 	 */
+
+	if (clk->flags & CLK_ALWAYS_ENABLED) {
+		kref_get(&clk->kref);
+		return 0;
+	}
+
 	if (unlikely(atomic_read(&clk->kref.refcount) == 1))
 		if (clk->ops && clk->ops->init)
 			clk->ops->init(clk);
 
 	kref_get(&clk->kref);
 
-	if (clk->flags & CLK_ALWAYS_ENABLED)
-		return 0;
-
 	if (likely(clk->ops && clk->ops->enable))
 		clk->ops->enable(clk);
 
-- 
cgit v0.10.2


From 06ee846a25642c434c55209151c633f538c12022 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 7 May 2009 10:44:55 +0000
Subject: sh: r7785 highlander clock fixes

Update the r7785 highlander defconfig to fix
PCLK value and that mode4 is set high.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index 890fe3c..5e677a8 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -269,7 +269,7 @@ CONFIG_SH_R7785RP=y
 #
 CONFIG_SH_TMU=y
 CONFIG_SH_TIMER_IRQ=28
-CONFIG_SH_PCLK_FREQ=50000000
+CONFIG_SH_PCLK_FREQ=33333333
 CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -333,7 +333,7 @@ CONFIG_GUSA=y
 CONFIG_ZERO_PAGE_OFFSET=0x00001000
 CONFIG_BOOT_LINK_OFFSET=0x00800000
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
+CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1 mode4_pin=high"
 
 #
 # Bus options
-- 
cgit v0.10.2


From e367592cc93ac653e7bc0bebbc9bb713a77e2696 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 7 May 2009 10:55:37 +0000
Subject: sh: TMU platform data for sh7785

This patch adds TMU platform data for sh7785. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 02688d7..b773e7d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -393,6 +393,7 @@ config CPU_SUBTYPE_SH7785
 	select CPU_SHX2
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
+	select SYS_SUPPORTS_TMU
 
 config CPU_SUBTYPE_SH7786
 	bool "Support SH7786 processor"
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index d80802a..dc5d3e5 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -13,8 +13,191 @@
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 28,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 29,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 30,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffdc0008,
+		.end	= 0xffdc0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 96,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffdc0014,
+		.end	= 0xffdc001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 97,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffdc0020,
+		.end	= 0xffdc002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 98,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffea0000,
@@ -60,6 +243,12 @@ static struct platform_device sci_device = {
 };
 
 static struct platform_device *sh7785_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&sci_device,
 };
 
@@ -70,6 +259,21 @@ static int __init sh7785_devices_setup(void)
 }
 __initcall(sh7785_devices_setup);
 
+static struct platform_device *sh7785_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7785_early_devices,
+				   ARRAY_SIZE(sh7785_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From ec91d1335f478c5cd089d82ffbf936075c5f24c8 Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Thu, 7 May 2009 19:49:45 -0500
Subject: xfs: fix double unlock in xfs_swap_extents()

Regreesion from commit ef8f7fc, which rearranged the code in
xfs_swap_extents() leading to double unlock of xfs inode ilock.
That resulted in xfs_fsr deadlocking itself on platforms, which
don't handle double unlock of rw_semaphore nicely. It caused the
count go negative, which represents the write holder, without
really having one. ia64 is one of the platforms where deadlock
was easily reproduced and the fix was tested.

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e6d839b..7465f9e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -347,13 +347,15 @@ xfs_swap_extents(
 
 	error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
 
-out_unlock:
-	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 out:
 	kmem_free(tempifp);
 	return error;
 
+out_unlock:
+	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	goto out;
+
 out_trans_cancel:
 	xfs_trans_cancel(tp, 0);
 	goto out_unlock;
-- 
cgit v0.10.2


From 9da29271bea5d831d745f3ceb7f6f6b2def13a5b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 7 May 2009 16:31:14 +0200
Subject: ALSA: hda - Fix secondary SPDIF on VT1708S and VT1702 codecs

VIA VT1708S and VT1702 codecs can have two SPDIF outputs.  One of them
should have been handled as the extra digital out, but it's not
properly accessed.

This patch fixes the handling of the secondary SPDIF on these codecs
with the slave dig-out as found in patch_sigmatel.c.  This makes the
use of such a device easier (for normal users).

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index b25a5cc..8e004fb 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -205,7 +205,7 @@ struct via_spec {
 
 	/* playback */
 	struct hda_multi_out multiout;
-	hda_nid_t extra_dig_out_nid;
+	hda_nid_t slave_dig_outs[2];
 
 	/* capture */
 	unsigned int num_adc_nids;
@@ -731,21 +731,6 @@ static int via_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
 	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
 }
 
-/* setup SPDIF output stream */
-static void setup_dig_playback_stream(struct hda_codec *codec, hda_nid_t nid,
-				 unsigned int stream_tag, unsigned int format)
-{
-	/* turn off SPDIF once; otherwise the IEC958 bits won't be updated */
-	if (codec->spdif_ctls & AC_DIG1_ENABLE)
-		snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_DIGI_CONVERT_1,
-				    codec->spdif_ctls & ~AC_DIG1_ENABLE & 0xff);
-	snd_hda_codec_setup_stream(codec, nid, stream_tag, 0, format);
-	/* turn on again (if needed) */
-	if (codec->spdif_ctls & AC_DIG1_ENABLE)
-		snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_DIGI_CONVERT_1,
-				    codec->spdif_ctls & 0xff);
-}
-
 static int via_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
 					struct hda_codec *codec,
 					unsigned int stream_tag,
@@ -753,19 +738,16 @@ static int via_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
 					struct snd_pcm_substream *substream)
 {
 	struct via_spec *spec = codec->spec;
-	hda_nid_t nid;
-
-	/* 1st or 2nd S/PDIF */
-	if (substream->number == 0)
-		nid = spec->multiout.dig_out_nid;
-	else if (substream->number == 1)
-		nid = spec->extra_dig_out_nid;
-	else
-		return -1;
+	return snd_hda_multi_out_dig_prepare(codec, &spec->multiout,
+					     stream_tag, format, substream);
+}
 
-	mutex_lock(&codec->spdif_mutex);
-	setup_dig_playback_stream(codec, nid, stream_tag, format);
-	mutex_unlock(&codec->spdif_mutex);
+static int via_dig_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
+{
+	struct via_spec *spec = codec->spec;
+	snd_hda_multi_out_dig_cleanup(codec, &spec->multiout);
 	return 0;
 }
 
@@ -842,7 +824,8 @@ static struct hda_pcm_stream vt1708_pcm_digital_playback = {
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -874,13 +857,6 @@ static int via_build_controls(struct hda_codec *codec)
 		if (err < 0)
 			return err;
 		spec->multiout.share_spdif = 1;
-
-		if (spec->extra_dig_out_nid) {
-			err = snd_hda_create_spdif_out_ctls(codec,
-						    spec->extra_dig_out_nid);
-			if (err < 0)
-				return err;
-		}
 	}
 	if (spec->dig_in_nid) {
 		err = snd_hda_create_spdif_in_ctls(codec, spec->dig_in_nid);
@@ -1013,10 +989,6 @@ static void via_unsol_event(struct hda_codec *codec,
 		via_gpio_control(codec);
 }
 
-static hda_nid_t slave_dig_outs[] = {
-	0,
-};
-
 static int via_init(struct hda_codec *codec)
 {
 	struct via_spec *spec = codec->spec;
@@ -1051,8 +1023,9 @@ static int via_init(struct hda_codec *codec)
 		snd_hda_codec_write(codec, spec->autocfg.dig_in_pin, 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN);
 
-	/* no slave outs */
-	codec->slave_dig_outs = slave_dig_outs;
+	/* assign slave outs */
+	if (spec->slave_dig_outs[0])
+		codec->slave_dig_outs = spec->slave_dig_outs;
 
  	return 0;
 }
@@ -2134,7 +2107,8 @@ static struct hda_pcm_stream vt1708B_pcm_digital_playback = {
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -2589,14 +2563,15 @@ static struct hda_pcm_stream vt1708S_pcm_analog_capture = {
 };
 
 static struct hda_pcm_stream vt1708S_pcm_digital_playback = {
-	.substreams = 2,
+	.substreams = 1,
 	.channels_min = 2,
 	.channels_max = 2,
 	/* NID is set in via_build_pcms */
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -2805,14 +2780,37 @@ static int vt1708S_auto_create_analog_input_ctls(struct via_spec *spec,
 	return 0;
 }
 
+/* fill out digital output widgets; one for master and one for slave outputs */
+static void fill_dig_outs(struct hda_codec *codec)
+{
+	struct via_spec *spec = codec->spec;
+	int i;
+
+	for (i = 0; i < spec->autocfg.dig_outs; i++) {
+		hda_nid_t nid;
+		int conn;
+
+		nid = spec->autocfg.dig_out_pins[i];
+		if (!nid)
+			continue;
+		conn = snd_hda_get_connections(codec, nid, &nid, 1);
+		if (conn < 1)
+			continue;
+		if (!spec->multiout.dig_out_nid)
+			spec->multiout.dig_out_nid = nid;
+		else {
+			spec->slave_dig_outs[0] = nid;
+			break; /* at most two dig outs */
+		}
+	}
+}
+
 static int vt1708S_parse_auto_config(struct hda_codec *codec)
 {
 	struct via_spec *spec = codec->spec;
 	int err;
-	static hda_nid_t vt1708s_ignore[] = {0x21, 0};
 
-	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
-					   vt1708s_ignore);
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
 	if (err < 0)
 		return err;
 	err = vt1708S_auto_fill_dac_nids(spec, &spec->autocfg);
@@ -2833,10 +2831,7 @@ static int vt1708S_parse_auto_config(struct hda_codec *codec)
 
 	spec->multiout.max_channels = spec->multiout.num_dacs * 2;
 
-	if (spec->autocfg.dig_outs)
-		spec->multiout.dig_out_nid = VT1708S_DIGOUT_NID;
-
-	spec->extra_dig_out_nid = 0x15;
+	fill_dig_outs(codec);
 
 	if (spec->kctls.list)
 		spec->mixers[spec->num_mixers++] = spec->kctls.list;
@@ -3000,7 +2995,8 @@ static struct hda_pcm_stream vt1702_pcm_digital_playback = {
 	.ops = {
 		.open = via_dig_playback_pcm_open,
 		.close = via_dig_playback_pcm_close,
-		.prepare = via_dig_playback_pcm_prepare
+		.prepare = via_dig_playback_pcm_prepare,
+		.cleanup = via_dig_playback_pcm_cleanup
 	},
 };
 
@@ -3128,10 +3124,8 @@ static int vt1702_parse_auto_config(struct hda_codec *codec)
 {
 	struct via_spec *spec = codec->spec;
 	int err;
-	static hda_nid_t vt1702_ignore[] = {0x1C, 0};
 
-	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
-					   vt1702_ignore);
+	err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
 	if (err < 0)
 		return err;
 	err = vt1702_auto_fill_dac_nids(spec, &spec->autocfg);
@@ -3152,10 +3146,7 @@ static int vt1702_parse_auto_config(struct hda_codec *codec)
 
 	spec->multiout.max_channels = spec->multiout.num_dacs * 2;
 
-	if (spec->autocfg.dig_outs)
-		spec->multiout.dig_out_nid = VT1702_DIGOUT_NID;
-
-	spec->extra_dig_out_nid = 0x1B;
+	fill_dig_outs(codec);
 
 	if (spec->kctls.list)
 		spec->mixers[spec->num_mixers++] = spec->kctls.list;
-- 
cgit v0.10.2


From 4fa48e1774992fd03a4cd83a0f2adecb288c46f8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 15:28:15 +0900
Subject: sh: Enable new TMU driver support for all SH-3 and SH-4 CPUs.

The TMU block is supported on all SH-3 and SH-4 subtypes, so just select
it there, rather than conditionalizing it per subtype.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b773e7d..88eb118 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -168,6 +168,7 @@ config CPU_SH3
 	bool
 	select CPU_HAS_INTEVT
 	select CPU_HAS_SR_RB
+	select SYS_SUPPORTS_TMU
 
 config CPU_SH4
 	bool
@@ -175,6 +176,7 @@ config CPU_SH4
 	select CPU_HAS_SR_RB
 	select CPU_HAS_PTEA if !CPU_SH4A || CPU_SHX2
 	select CPU_HAS_FPU if !CPU_SH4AL_DSP
+	select SYS_SUPPORTS_TMU
 
 config CPU_SH4A
 	bool
@@ -393,7 +395,6 @@ config CPU_SUBTYPE_SH7785
 	select CPU_SHX2
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
-	select SYS_SUPPORTS_TMU
 
 config CPU_SUBTYPE_SH7786
 	bool "Support SH7786 processor"
@@ -428,7 +429,6 @@ config CPU_SUBTYPE_SH7722
 	select ARCH_SPARSEMEM_ENABLE
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_CMT
-	select SYS_SUPPORTS_TMU
 
 config CPU_SUBTYPE_SH7366
 	bool "Support SH7366 processor"
-- 
cgit v0.10.2


From c2ecb4c4a7da16288062a057b693b7b1e16aaf88 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 15:39:25 +0900
Subject: sh: Move out rtc-sh registration from time_64.c to setup-sh5.c

Now that the onchip_remap() mess is sorted out, the rtc-sh support code
for SH-5 can follow the same approach as the other CPUs.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index d8d59fe..9a362c8 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -34,8 +34,39 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct resource rtc_resources[] = {
+	[0] = {
+		.start	= PHYS_PERIPHERAL_BLOCK + 0x01040000,
+		.end	= PHYS_PERIPHERAL_BLOCK + 0x01040000 + 0x58 - 1,
+		.flags	= IORESOURCE_IO,
+	},
+	[1] = {
+		/* Period IRQ */
+		.start	= IRQ_PRI,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* Carry IRQ */
+		.start	= IRQ_CUI,
+		.flags	= IORESOURCE_IRQ,
+	},
+	[3] = {
+		/* Alarm IRQ */
+		.start	= IRQ_ATI,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device rtc_device = {
+	.name		= "sh-rtc",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(rtc_resources),
+	.resource	= rtc_resources,
+};
+
 static struct platform_device *sh5_devices[] __initdata = {
 	&sci_device,
+	&rtc_device,
 };
 
 static int __init sh5_devices_setup(void)
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
index 7bfeaa0..b4fe770 100644
--- a/arch/sh/kernel/time_64.c
+++ b/arch/sh/kernel/time_64.c
@@ -46,12 +46,6 @@
 #define TMU_TSTR_INIT	1
 #define TMU_TSTR_OFF	0
 
-/* Real Time Clock */
-#define	RTC_BLOCK_OFF	0x01040000
-#define RTC_BASE	PHYS_PERIPHERAL_BLOCK + RTC_BLOCK_OFF
-#define RTC_RCR1_CIE	0x10	/* Carry Interrupt Enable */
-#define RTC_RCR1	(rtc_base + 0x38)
-
 /* Time Management Unit */
 #define	TMU_BLOCK_OFF	0x01020000
 #define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
@@ -68,8 +62,7 @@
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-static unsigned long tmu_base, rtc_base;
-unsigned long cprc_base;
+static unsigned long tmu_base;
 
 /* Variables to allow interpolation of time of day to resolution better than a
  * jiffy. */
@@ -248,11 +241,6 @@ void __init time_init(void)
 		panic("Unable to remap TMU\n");
 	}
 
-	rtc_base = (unsigned long)ioremap_nocache(RTC_BASE, 1024);
-	if (!rtc_base) {
-		panic("Unable to remap RTC\n");
-	}
-
 	clk = clk_get(NULL, "cpu_clk");
 	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) /
 			(unsigned long long)(clk_get_rate(clk) / HZ));
@@ -274,41 +262,3 @@ void __init time_init(void)
 	ctrl_outl(interval, TMU0_TCNT);
 	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
 }
-
-static struct resource rtc_resources[] = {
-	[0] = {
-		/* RTC base, filled in by rtc_init */
-		.flags	= IORESOURCE_IO,
-	},
-	[1] = {
-		/* Period IRQ */
-		.start	= IRQ_PRI,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		/* Carry IRQ */
-		.start	= IRQ_CUI,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		/* Alarm IRQ */
-		.start	= IRQ_ATI,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device rtc_device = {
-	.name		= "sh-rtc",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(rtc_resources),
-	.resource	= rtc_resources,
-};
-
-static int __init rtc_init(void)
-{
-	rtc_resources[0].start	= rtc_base;
-	rtc_resources[0].end	= rtc_resources[0].start + 0x58 - 1;
-
-	return platform_device_register(&rtc_device);
-}
-device_initcall(rtc_init);
-- 
cgit v0.10.2


From add47067a8ca324e9d26c4373350dc3cce7f1e7f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:12:17 +0900
Subject: sh: Finish the sh64 migration off of ARCH_USES_GETTIMEOFFSET.

This adds sh_tmu support to the SH-5 subtypes, which subsequently allows
us to kill off time_64.c and use the now generic time_32.c. As a bonus,
SH-5 now supports highres timers and tickless for the first time.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 88eb118..ca5c09b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -141,10 +141,6 @@ config ARCH_NO_VIRT_TO_BUS
 config ARCH_HAS_DEFAULT_IDLE
 	def_bool y
 
-config ARCH_USES_GETTIMEOFFSET
-	def_bool y 
-	depends on SUPERH64
-
 config IO_TRAPPED
 	bool
 
@@ -190,6 +186,7 @@ config CPU_SH4AL_DSP
 config CPU_SH5
 	bool
 	select CPU_HAS_FPU
+	select SYS_SUPPORTS_TMU
 
 config CPU_SHX2
 	bool
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index d256c77..4667d4c 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -2,7 +2,7 @@ extra-y	:= head_64.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o machvec.o process_64.o \
 	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
-	   syscalls_64.o time_64.o topology.o traps.o traps_64.o
+	   syscalls_64.o time_32.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
 obj-$(CONFIG_VSYSCALL)		+= vsyscall/
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index 9a362c8..678d69b 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -13,6 +13,7 @@
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sh_timer.h>
 #include <asm/addrspace.h>
 
 static struct plat_sci_port sci_platform_data[] = {
@@ -64,6 +65,110 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
+#define	TMU_BLOCK_OFF	0x01020000
+#define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
+#define TMU0_BASE	(TMU_BASE + 0x8 + (0xc * 0x0))
+#define TMU1_BASE	(TMU_BASE + 0x8 + (0xc * 0x1))
+#define TMU2_BASE	(TMU_BASE + 0x8 + (0xc * 0x2))
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= TMU0_BASE,
+		.end	= TMU0_BASE + 0xc - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_TUNI0,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= TMU1_BASE,
+		.end	= TMU1_BASE + 0xc - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_TUNI1,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= TMU2_BASE,
+		.end	= TMU2_BASE + 0xc - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_TUNI2,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct platform_device *sh5_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
 static struct platform_device *sh5_devices[] __initdata = {
 	&sci_device,
 	&rtc_device,
@@ -71,7 +176,20 @@ static struct platform_device *sh5_devices[] __initdata = {
 
 static int __init sh5_devices_setup(void)
 {
+	int ret;
+
+	ret = platform_add_devices(sh5_early_devices,
+				   ARRAY_SIZE(sh5_early_devices));
+	if (unlikely(ret != 0))
+		return ret;
+
 	return platform_add_devices(sh5_devices,
 				    ARRAY_SIZE(sh5_devices));
 }
 __initcall(sh5_devices_setup);
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh5_early_devices,
+				   ARRAY_SIZE(sh5_early_devices));
+}
diff --git a/arch/sh/kernel/time_64.c b/arch/sh/kernel/time_64.c
deleted file mode 100644
index b4fe770..0000000
--- a/arch/sh/kernel/time_64.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * arch/sh/kernel/time_64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2003 - 2007  Paul Mundt
- * Copyright (C) 2003  Richard Curnow
- *
- *    Original TMU/RTC code taken from sh version.
- *    Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
- *      Some code taken from i386 version.
- *      Copyright (C) 1991, 1992, 1995  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/errno.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/profile.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/bcd.h>
-#include <linux/timex.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <cpu/registers.h>	 /* required by inline __asm__ stmt. */
-#include <cpu/irq.h>
-#include <asm/addrspace.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/delay.h>
-#include <asm/clock.h>
-
-#define TMU_TOCR_INIT	0x00
-#define TMU0_TCR_INIT	0x0020
-#define TMU_TSTR_INIT	1
-#define TMU_TSTR_OFF	0
-
-/* Time Management Unit */
-#define	TMU_BLOCK_OFF	0x01020000
-#define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
-#define TMU0_BASE	tmu_base + 0x8 + (0xc * 0x0)
-#define TMU1_BASE	tmu_base + 0x8 + (0xc * 0x1)
-#define TMU2_BASE	tmu_base + 0x8 + (0xc * 0x2)
-
-#define TMU_TOCR	tmu_base+0x0	/* Byte access */
-#define TMU_TSTR	tmu_base+0x4	/* Byte access */
-
-#define TMU0_TCOR	TMU0_BASE+0x0	/* Long access */
-#define TMU0_TCNT	TMU0_BASE+0x4	/* Long access */
-#define TMU0_TCR	TMU0_BASE+0x8	/* Word access */
-
-#define TICK_SIZE (tick_nsec / 1000)
-
-static unsigned long tmu_base;
-
-/* Variables to allow interpolation of time of day to resolution better than a
- * jiffy. */
-
-/* This is effectively protected by xtime_lock */
-static unsigned long ctc_last_interrupt;
-static unsigned long long usecs_per_jiffy = 1000000/HZ; /* Approximation */
-
-#define CTC_JIFFY_SCALE_SHIFT 40
-
-/* 2**CTC_JIFFY_SCALE_SHIFT / ctc_ticks_per_jiffy */
-static unsigned long long scaled_recip_ctc_ticks_per_jiffy;
-
-/* Estimate number of microseconds that have elapsed since the last timer tick,
-   by scaling the delta that has occurred in the CTC register.
-
-   WARNING WARNING WARNING : This algorithm relies on the CTC decrementing at
-   the CPU clock rate.  If the CPU sleeps, the CTC stops counting.  Bear this
-   in mind if enabling SLEEP_WORKS in process.c.  In that case, this algorithm
-   probably needs to use TMU.TCNT0 instead.  This will work even if the CPU is
-   sleeping, though will be coarser.
-
-   FIXME : What if usecs_per_tick is moving around too much, e.g. if an adjtime
-   is running or if the freq or tick arguments of adjtimex are modified after
-   we have calibrated the scaling factor?  This will result in either a jump at
-   the end of a tick period, or a wrap backwards at the start of the next one,
-   if the application is reading the time of day often enough.  I think we
-   ought to do better than this.  For this reason, usecs_per_jiffy is left
-   separated out in the calculation below.  This allows some future hook into
-   the adjtime-related stuff in kernel/timer.c to remove this hazard.
-
-*/
-
-static unsigned long usecs_since_tick(void)
-{
-	unsigned long long current_ctc;
-	long ctc_ticks_since_interrupt;
-	unsigned long long ull_ctc_ticks_since_interrupt;
-	unsigned long result;
-
-	unsigned long long mul1_out;
-	unsigned long long mul1_out_high;
-	unsigned long long mul2_out_low, mul2_out_high;
-
-	/* Read CTC register */
-	asm ("getcon cr62, %0" : "=r" (current_ctc));
-	/* Note, the CTC counts down on each CPU clock, not up.
-	   Note(2), use long type to get correct wraparound arithmetic when
-	   the counter crosses zero. */
-	ctc_ticks_since_interrupt = (long) ctc_last_interrupt - (long) current_ctc;
-	ull_ctc_ticks_since_interrupt = (unsigned long long) ctc_ticks_since_interrupt;
-
-	/* Inline assembly to do 32x32x32->64 multiplier */
-	asm volatile ("mulu.l %1, %2, %0" :
-	     "=r" (mul1_out) :
-	     "r" (ull_ctc_ticks_since_interrupt), "r" (usecs_per_jiffy));
-
-	mul1_out_high = mul1_out >> 32;
-
-	asm volatile ("mulu.l %1, %2, %0" :
-	     "=r" (mul2_out_low) :
-	     "r" (mul1_out), "r" (scaled_recip_ctc_ticks_per_jiffy));
-
-#if 1
-	asm volatile ("mulu.l %1, %2, %0" :
-	     "=r" (mul2_out_high) :
-	     "r" (mul1_out_high), "r" (scaled_recip_ctc_ticks_per_jiffy));
-#endif
-
-	result = (unsigned long) (((mul2_out_high << 32) + mul2_out_low) >> CTC_JIFFY_SCALE_SHIFT);
-
-	return result;
-}
-
-u32 arch_gettimeoffset(void)
-{
-	return usecs_since_tick() * 1000;
-}
-
-/* Dummy RTC ops */
-static void null_rtc_get_time(struct timespec *tv)
-{
-	tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
-	tv->tv_nsec = 0;
-}
-
-static int null_rtc_set_time(const time_t secs)
-{
-	return 0;
-}
-
-void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
-int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
-
-/* last time the RTC clock got updated */
-static long last_rtc_update;
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(void)
-{
-	unsigned long long current_ctc;
-
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
-	asm ("getcon cr62, %0" : "=r" (current_ctc));
-	ctc_last_interrupt = (unsigned long) current_ctc;
-
-	do_timer(1);
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-		if (rtc_sh_set_time(xtime.tv_sec) == 0)
-			last_rtc_update = xtime.tv_sec;
-		else
-			/* do it again in 60 s */
-			last_rtc_update = xtime.tv_sec - 600;
-	}
-	write_sequnlock(&xtime_lock);
-
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	unsigned long timer_status;
-
-	/* Clear UNF bit */
-	timer_status = ctrl_inw(TMU0_TCR);
-	timer_status &= ~0x100;
-	ctrl_outw(timer_status, TMU0_TCR);
-
-	do_timer_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq0  = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED,
-	.name = "timer",
-};
-
-void __init time_init(void)
-{
-	unsigned long interval;
-	struct clk *clk;
-
-	tmu_base = (unsigned long)ioremap_nocache(TMU_BASE, 1024);
-	if (!tmu_base) {
-		panic("Unable to remap TMU\n");
-	}
-
-	clk = clk_get(NULL, "cpu_clk");
-	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) /
-			(unsigned long long)(clk_get_rate(clk) / HZ));
-
-	rtc_sh_get_time(&xtime);
-
-	setup_irq(TIMER_IRQ, &irq0);
-
-	clk = clk_get(NULL, "module_clk");
-	interval = (clk_get_rate(clk)/(HZ*4));
-
-	printk("Interval = %ld\n", interval);
-
-	/* Start TMU0 */
-	ctrl_outb(TMU_TSTR_OFF, TMU_TSTR);
-	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
-	ctrl_outw(TMU0_TCR_INIT, TMU0_TCR);
-	ctrl_outl(interval, TMU0_TCOR);
-	ctrl_outl(interval, TMU0_TCNT);
-	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
-}
-- 
cgit v0.10.2


From b179b72fad5c88c3b616fb88a9ae7cbbc1a750d3 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:17:36 +0900
Subject: sh: Rename arch/sh/kernel/time_32.c to arch/sh/kernel/time.c.

This is now fully generic, and used both by _32 and _64 variants.
Rename it accordingly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 5b1d4d0..aee08af 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -11,7 +11,7 @@ endif
 
 obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o			\
 	   machvec.o process_32.o ptrace_32.o setup.o signal_32.o	\
-	   sys_sh.o sys_sh32.o syscalls_32.o time_32.o topology.o	\
+	   sys_sh.o sys_sh32.o syscalls_32.o time.o topology.o	\
 	   traps.o traps_32.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index 4667d4c..31f6ebf 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -2,7 +2,7 @@ extra-y	:= head_64.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o machvec.o process_64.o \
 	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
-	   syscalls_64.o time_32.o topology.o traps.o traps_64.o
+	   syscalls_64.o time.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
 obj-$(CONFIG_VSYSCALL)		+= vsyscall/
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
new file mode 100644
index 0000000..d41ca4c
--- /dev/null
+++ b/arch/sh/kernel/time.c
@@ -0,0 +1,231 @@
+/*
+ *  arch/sh/kernel/time_32.c
+ *
+ *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
+ *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
+ *  Copyright (C) 2002 - 2009  Paul Mundt
+ *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
+ *
+ *  Some code taken from i386 version.
+ *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/clockchips.h>
+#include <linux/mc146818rtc.h>	/* for rtc_lock */
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/rtc.h>
+#include <asm/clock.h>
+#include <asm/rtc.h>
+#include <asm/timer.h>
+#include <asm/kgdb.h>
+
+struct sys_timer *sys_timer;
+
+/* Move this somewhere more sensible.. */
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+/* Dummy RTC ops */
+static void null_rtc_get_time(struct timespec *tv)
+{
+	tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
+	tv->tv_nsec = 0;
+}
+
+static int null_rtc_set_time(const time_t secs)
+{
+	return 0;
+}
+
+void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
+int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
+
+unsigned int get_rtc_time(struct rtc_time *tm)
+{
+	if (rtc_sh_get_time != null_rtc_get_time) {
+		struct timespec tv;
+
+		rtc_sh_get_time(&tv);
+		rtc_time_to_tm(tv.tv_sec, tm);
+	}
+
+	return RTC_24H;
+}
+EXPORT_SYMBOL(get_rtc_time);
+
+int set_rtc_time(struct rtc_time *tm)
+{
+	unsigned long secs;
+
+	rtc_tm_to_time(tm, &secs);
+	return rtc_sh_set_time(secs);
+}
+EXPORT_SYMBOL(set_rtc_time);
+
+static int __init rtc_generic_init(void)
+{
+	struct platform_device *pdev;
+
+	if (rtc_sh_get_time == null_rtc_get_time)
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+module_init(rtc_generic_init);
+
+/* last time the RTC clock got updated */
+static long last_rtc_update;
+
+/*
+ * handle_timer_tick() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+void handle_timer_tick(void)
+{
+	if (current->pid)
+		profile_tick(CPU_PROFILING);
+
+	/*
+	 * Here we are in the timer irq handler. We just have irqs locally
+	 * disabled but we don't know if the timer_bh is running on the other
+	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+	 * the irq version of write_lock because as just said we have irq
+	 * locally disabled. -arca
+	 */
+	write_seqlock(&xtime_lock);
+	do_timer(1);
+
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+	 * called as close as possible to 500 ms before the new second starts.
+	 */
+	if (ntp_synced() &&
+	    xtime.tv_sec > last_rtc_update + 660 &&
+	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
+	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
+		if (rtc_sh_set_time(xtime.tv_sec) == 0)
+			last_rtc_update = xtime.tv_sec;
+		else
+			/* do it again in 60s */
+			last_rtc_update = xtime.tv_sec - 600;
+	}
+	write_sequnlock(&xtime_lock);
+
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(get_irq_regs()));
+#endif
+}
+
+#ifdef CONFIG_PM
+int timer_suspend(struct sys_device *dev, pm_message_t state)
+{
+	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
+
+	sys_timer->ops->stop();
+
+	return 0;
+}
+
+int timer_resume(struct sys_device *dev)
+{
+	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
+
+	sys_timer->ops->start();
+
+	return 0;
+}
+#else
+#define timer_suspend NULL
+#define timer_resume NULL
+#endif
+
+static struct sysdev_class timer_sysclass = {
+	.name	 = "timer",
+	.suspend = timer_suspend,
+	.resume	 = timer_resume,
+};
+
+static int __init timer_init_sysfs(void)
+{
+	int ret;
+
+	if (!sys_timer)
+		return 0;
+
+	ret = sysdev_class_register(&timer_sysclass);
+	if (ret != 0)
+		return ret;
+
+	sys_timer->dev.cls = &timer_sysclass;
+	return sysdev_register(&sys_timer->dev);
+}
+device_initcall(timer_init_sysfs);
+
+void (*board_time_init)(void);
+
+struct clocksource clocksource_sh = {
+	.name		= "SuperH",
+};
+
+unsigned long long sched_clock(void)
+{
+	unsigned long long cycles;
+
+	/* jiffies based sched_clock if no clocksource is installed */
+	if (!clocksource_sh.rating)
+		return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+
+	cycles = clocksource_sh.read(&clocksource_sh);
+	return cyc2ns(&clocksource_sh, cycles);
+}
+
+static void __init sh_late_time_init(void)
+{
+	/*
+	 * Make sure all compiled-in early timers register themselves.
+	 * Run probe() for one "earlytimer" device.
+	 */
+	early_platform_driver_register_all("earlytimer");
+	if (early_platform_driver_probe("earlytimer", 1, 0))
+		return;
+
+	/*
+	 * Find the timer to use as the system timer, it will be
+	 * initialized for us.
+	 */
+	sys_timer = get_sys_timer();
+	if (unlikely(!sys_timer))
+		panic("System timer missing.\n");
+
+	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
+}
+
+void __init time_init(void)
+{
+	if (board_time_init)
+		board_time_init();
+
+	clk_init();
+
+	rtc_sh_get_time(&xtime);
+	set_normalized_timespec(&wall_to_monotonic,
+				-xtime.tv_sec, -xtime.tv_nsec);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	local_timer_setup(smp_processor_id());
+#endif
+
+	late_time_init = sh_late_time_init;
+}
+
diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c
deleted file mode 100644
index d41ca4c..0000000
--- a/arch/sh/kernel/time_32.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- *  arch/sh/kernel/time_32.c
- *
- *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
- *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002 - 2009  Paul Mundt
- *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
- *
- *  Some code taken from i386 version.
- *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/profile.h>
-#include <linux/timex.h>
-#include <linux/sched.h>
-#include <linux/clockchips.h>
-#include <linux/mc146818rtc.h>	/* for rtc_lock */
-#include <linux/platform_device.h>
-#include <linux/smp.h>
-#include <linux/rtc.h>
-#include <asm/clock.h>
-#include <asm/rtc.h>
-#include <asm/timer.h>
-#include <asm/kgdb.h>
-
-struct sys_timer *sys_timer;
-
-/* Move this somewhere more sensible.. */
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-/* Dummy RTC ops */
-static void null_rtc_get_time(struct timespec *tv)
-{
-	tv->tv_sec = mktime(2000, 1, 1, 0, 0, 0);
-	tv->tv_nsec = 0;
-}
-
-static int null_rtc_set_time(const time_t secs)
-{
-	return 0;
-}
-
-void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
-int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
-
-unsigned int get_rtc_time(struct rtc_time *tm)
-{
-	if (rtc_sh_get_time != null_rtc_get_time) {
-		struct timespec tv;
-
-		rtc_sh_get_time(&tv);
-		rtc_time_to_tm(tv.tv_sec, tm);
-	}
-
-	return RTC_24H;
-}
-EXPORT_SYMBOL(get_rtc_time);
-
-int set_rtc_time(struct rtc_time *tm)
-{
-	unsigned long secs;
-
-	rtc_tm_to_time(tm, &secs);
-	return rtc_sh_set_time(secs);
-}
-EXPORT_SYMBOL(set_rtc_time);
-
-static int __init rtc_generic_init(void)
-{
-	struct platform_device *pdev;
-
-	if (rtc_sh_get_time == null_rtc_get_time)
-		return -ENODEV;
-
-	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	return 0;
-}
-module_init(rtc_generic_init);
-
-/* last time the RTC clock got updated */
-static long last_rtc_update;
-
-/*
- * handle_timer_tick() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-void handle_timer_tick(void)
-{
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
-	do_timer(1);
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-		if (rtc_sh_set_time(xtime.tv_sec) == 0)
-			last_rtc_update = xtime.tv_sec;
-		else
-			/* do it again in 60s */
-			last_rtc_update = xtime.tv_sec - 600;
-	}
-	write_sequnlock(&xtime_lock);
-
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-}
-
-#ifdef CONFIG_PM
-int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
-
-	sys_timer->ops->stop();
-
-	return 0;
-}
-
-int timer_resume(struct sys_device *dev)
-{
-	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
-
-	sys_timer->ops->start();
-
-	return 0;
-}
-#else
-#define timer_suspend NULL
-#define timer_resume NULL
-#endif
-
-static struct sysdev_class timer_sysclass = {
-	.name	 = "timer",
-	.suspend = timer_suspend,
-	.resume	 = timer_resume,
-};
-
-static int __init timer_init_sysfs(void)
-{
-	int ret;
-
-	if (!sys_timer)
-		return 0;
-
-	ret = sysdev_class_register(&timer_sysclass);
-	if (ret != 0)
-		return ret;
-
-	sys_timer->dev.cls = &timer_sysclass;
-	return sysdev_register(&sys_timer->dev);
-}
-device_initcall(timer_init_sysfs);
-
-void (*board_time_init)(void);
-
-struct clocksource clocksource_sh = {
-	.name		= "SuperH",
-};
-
-unsigned long long sched_clock(void)
-{
-	unsigned long long cycles;
-
-	/* jiffies based sched_clock if no clocksource is installed */
-	if (!clocksource_sh.rating)
-		return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
-
-	cycles = clocksource_sh.read(&clocksource_sh);
-	return cyc2ns(&clocksource_sh, cycles);
-}
-
-static void __init sh_late_time_init(void)
-{
-	/*
-	 * Make sure all compiled-in early timers register themselves.
-	 * Run probe() for one "earlytimer" device.
-	 */
-	early_platform_driver_register_all("earlytimer");
-	if (early_platform_driver_probe("earlytimer", 1, 0))
-		return;
-
-	/*
-	 * Find the timer to use as the system timer, it will be
-	 * initialized for us.
-	 */
-	sys_timer = get_sys_timer();
-	if (unlikely(!sys_timer))
-		panic("System timer missing.\n");
-
-	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
-}
-
-void __init time_init(void)
-{
-	if (board_time_init)
-		board_time_init();
-
-	clk_init();
-
-	rtc_sh_get_time(&xtime);
-	set_normalized_timespec(&wall_to_monotonic,
-				-xtime.tv_sec, -xtime.tv_nsec);
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	local_timer_setup(smp_processor_id());
-#endif
-
-	late_time_init = sh_late_time_init;
-}
-
-- 
cgit v0.10.2


From 6d134b9e8d3f32331ad2faca2db8186f54198931 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:36:13 +0900
Subject: sh: Wire up GENERIC_CMOS_UPDATE for the platforms that need it.

Now that everything has converted over to generic timekeeping, we need an
alternate method for keeping the RTC updated for those platforms that are
still using the rtc_sh_get/set_time pairs, presently limited to SH-03 and
the Dreamcast. This wires up the GENERIC_CMOS_UPDATE hooks for those to
maintain the same behaviour.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index ca5c09b..d88a61b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -83,6 +83,10 @@ config GENERIC_CLOCKEVENTS
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
 
+config GENERIC_CMOS_UPDATE
+	def_bool y
+	depends on SH_SH03 || SH_DREAMCAST
+
 config GENERIC_LOCKBREAK
 	def_bool y
 	depends on SMP && PREEMPT
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index d41ca4c..77b841a 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -46,6 +46,20 @@ static int null_rtc_set_time(const time_t secs)
 void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
 int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
 
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+unsigned long read_persistent_clock(void)
+{
+	struct timespec tv;
+	rtc_sh_get_time(&tv);
+	return tv.tv_sec;
+}
+
+int update_persistent_clock(struct timespec now)
+{
+	return rtc_sh_set_time(now.tv_sec);
+}
+#endif
+
 unsigned int get_rtc_time(struct rtc_time *tm)
 {
 	if (rtc_sh_get_time != null_rtc_get_time) {
-- 
cgit v0.10.2


From 5ac5496411b30d41945a996fe7a7fb5abccf2aaa Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:44:00 +0900
Subject: sh: Kill off dead handle_timer_tick() code.

Nothing is using this anymore now that we have fully converted to generic
time, so kill it off completely.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
index cb16645..6c08511 100644
--- a/arch/sh/include/asm/timer.h
+++ b/arch/sh/include/asm/timer.h
@@ -18,17 +18,12 @@ struct sys_timer {
 	struct sys_timer_ops	*ops;
 };
 
-#define TICK_SIZE (tick_nsec / 1000)
-
 extern struct sys_timer tmu_timer;
 extern struct sys_timer *sys_timer;
 
 /* arch/sh/kernel/timers/timer.c */
 struct sys_timer *get_sys_timer(void);
 
-/* arch/sh/kernel/time.c */
-void handle_timer_tick(void);
-
 extern struct clocksource clocksource_sh;
 
 #endif /* __ASM_SH_TIMER_H */
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 77b841a..f4c304a 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -1,13 +1,14 @@
 /*
- *  arch/sh/kernel/time_32.c
+ *  arch/sh/kernel/time.c
  *
  *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
  *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
  *  Copyright (C) 2002 - 2009  Paul Mundt
  *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
  *
- *  Some code taken from i386 version.
- *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -23,7 +24,6 @@
 #include <asm/clock.h>
 #include <asm/rtc.h>
 #include <asm/timer.h>
-#include <asm/kgdb.h>
 
 struct sys_timer *sys_timer;
 
@@ -97,50 +97,6 @@ static int __init rtc_generic_init(void)
 }
 module_init(rtc_generic_init);
 
-/* last time the RTC clock got updated */
-static long last_rtc_update;
-
-/*
- * handle_timer_tick() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-void handle_timer_tick(void)
-{
-	if (current->pid)
-		profile_tick(CPU_PROFILING);
-
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
-	do_timer(1);
-
-	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 */
-	if (ntp_synced() &&
-	    xtime.tv_sec > last_rtc_update + 660 &&
-	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
-	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
-		if (rtc_sh_set_time(xtime.tv_sec) == 0)
-			last_rtc_update = xtime.tv_sec;
-		else
-			/* do it again in 60s */
-			last_rtc_update = xtime.tv_sec - 600;
-	}
-	write_sequnlock(&xtime_lock);
-
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(get_irq_regs()));
-#endif
-}
-
 #ifdef CONFIG_PM
 int timer_suspend(struct sys_device *dev, pm_message_t state)
 {
@@ -242,4 +198,3 @@ void __init time_init(void)
 
 	late_time_init = sh_late_time_init;
 }
-
-- 
cgit v0.10.2


From 6459d7bb72e9767bc7d22f2ee44aab35188e4b8a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:47:48 +0900
Subject: sh: Kill off dead timer sysclass pm hooks.

With the conversion to generic clockevents these are completely unused,
so just kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index f4c304a..c26576a 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -97,51 +97,6 @@ static int __init rtc_generic_init(void)
 }
 module_init(rtc_generic_init);
 
-#ifdef CONFIG_PM
-int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
-
-	sys_timer->ops->stop();
-
-	return 0;
-}
-
-int timer_resume(struct sys_device *dev)
-{
-	struct sys_timer *sys_timer = container_of(dev, struct sys_timer, dev);
-
-	sys_timer->ops->start();
-
-	return 0;
-}
-#else
-#define timer_suspend NULL
-#define timer_resume NULL
-#endif
-
-static struct sysdev_class timer_sysclass = {
-	.name	 = "timer",
-	.suspend = timer_suspend,
-	.resume	 = timer_resume,
-};
-
-static int __init timer_init_sysfs(void)
-{
-	int ret;
-
-	if (!sys_timer)
-		return 0;
-
-	ret = sysdev_class_register(&timer_sysclass);
-	if (ret != 0)
-		return ret;
-
-	sys_timer->dev.cls = &timer_sysclass;
-	return sysdev_register(&sys_timer->dev);
-}
-device_initcall(timer_init_sysfs);
-
 void (*board_time_init)(void);
 
 struct clocksource clocksource_sh = {
-- 
cgit v0.10.2


From cd1408f22d2fd8f0d09c082b07cf0b9bcfb6eac9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:57:35 +0900
Subject: sh: mach-sh03: Give the sh03 rtc its own spinlock.

This converts the sh03 rtc code off of using the global rtc_lock and on
to its own spinlock. There are no other possible users of the rtc_lock,
so serializing with it is not necessary.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-sh03/rtc.c b/arch/sh/boards/mach-sh03/rtc.c
index 0a9266b..a8b9f84 100644
--- a/arch/sh/boards/mach-sh03/rtc.c
+++ b/arch/sh/boards/mach-sh03/rtc.c
@@ -35,13 +35,13 @@
 #define RTC_BUSY	1
 #define RTC_STOP	2
 
-extern spinlock_t rtc_lock;
+static DEFINE_SPINLOCK(sh03_rtc_lock);
 
 unsigned long get_cmos_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 
-	spin_lock(&rtc_lock);
+	spin_lock(&sh03_rtc_lock);
  again:
 	do {
 		sec  = (ctrl_inb(RTC_SEC1) & 0xf) + (ctrl_inb(RTC_SEC10) & 0x7) * 10;
@@ -73,7 +73,7 @@ unsigned long get_cmos_time(void)
 		goto again;
 	}
 
-	spin_unlock(&rtc_lock);
+	spin_unlock(&sh03_rtc_lock);
 	return mktime(year, mon, day, hour, min, sec);
 }
 
@@ -91,7 +91,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 	int i;
 
 	/* gets recalled with irq locally disabled */
-	spin_lock(&rtc_lock);
+	spin_lock(&sh03_rtc_lock);
 	for (i = 0 ; i < 1000000 ; i++)	/* may take up to 1 second... */
 		if (!(ctrl_inb(RTC_CTL) & RTC_BUSY))
 			break;
@@ -113,7 +113,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 		       cmos_minutes, real_minutes);
 		retval = -1;
 	}
-	spin_unlock(&rtc_lock);
+	spin_unlock(&sh03_rtc_lock);
 
 	return retval;
 }
-- 
cgit v0.10.2


From 1af2fe45fec15bdba446c22b9b602699cdabfc9f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 16:59:05 +0900
Subject: sh: Kill off the global rtc_lock with extreme prejudice.

Now that all of the possible users for rtc_lock have gone away, it is no
longer necessary to keep this lock definition around.

This follows several other architectures that have either recently
dropped it or never supported it in the first place.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index c26576a..e0aa769 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -17,7 +17,6 @@
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/clockchips.h>
-#include <linux/mc146818rtc.h>	/* for rtc_lock */
 #include <linux/platform_device.h>
 #include <linux/smp.h>
 #include <linux/rtc.h>
@@ -27,10 +26,6 @@
 
 struct sys_timer *sys_timer;
 
-/* Move this somewhere more sensible.. */
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
 /* Dummy RTC ops */
 static void null_rtc_get_time(struct timespec *tv)
 {
-- 
cgit v0.10.2


From cb3a86c89ebdf917b88665f70e06863986fbab5c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 17:25:35 +0900
Subject: sh: Kill off sh64's hand-rolled syscall tracer.

This is no longer necessary, as there are now sufficient generic
alternatives available.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index fafa907..44627ef 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -137,12 +137,4 @@ config SH64_SR_WATCH
 	bool "Debug: set SR.WATCH to enable hardware watchpoints and trace"
 	depends on SUPERH64
 
-config POOR_MANS_STRACE
-	bool "Debug: enable rudimentary strace facility"
-	depends on SUPERH64
-	help
-	  This option allows system calls to be traced to the console.  It also
-	  aids in detecting kernel stack underflow.  It is useful for debugging
-	  early-userland problems (e.g. init incurring fatal exceptions.)
-
 endmenu
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index bece1f7..493da97 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -193,7 +193,7 @@ zImage uImage uImage.srec vmlinux.srec: vmlinux
 
 compressed: zImage
 
-archprepare: maketools arch/sh/lib64/syscalltab.h
+archprepare: maketools
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
@@ -205,34 +205,4 @@ define archhelp
 	@echo '  uImage.srec  	           - Create an S-record for U-Boot'
 endef
 
-define filechk_gen-syscalltab
-       (set -e; \
-	echo "/*"; \
-	echo " * DO NOT MODIFY."; \
-	echo " *"; \
-	echo " * This file was generated by arch/sh/Makefile"; \
-	echo " * Any changes will be reverted at build time."; \
-	echo " */"; \
-	echo ""; \
-	echo "#ifndef __SYSCALLTAB_H"; \
-	echo "#define __SYSCALLTAB_H"; \
-	echo ""; \
-	echo "#include <linux/kernel.h>"; \
-	echo ""; \
-	echo "struct syscall_info {"; \
-	echo "	const char *name;"; \
-	echo "} syscall_info_table[] = {"; \
-	sed -e '/^.*\.long /!d;s//	{ "/;s/\(\([^/]*\)\/\)\{1\}.*/\2/; \
-		s/[ \t]*$$//g;s/$$/" },/;s/\("\)sys_/\1/g'; \
-	echo "};"; \
-	echo ""; \
-	echo "#define NUM_SYSCALL_INFO_ENTRIES ARRAY_SIZE(syscall_info_table)";\
-	echo ""; \
-	echo "#endif /* __SYSCALLTAB_H */" )
-endef
-
-arch/sh/lib64/syscalltab.h: arch/sh/kernel/syscalls_64.S
-	$(call filechk,gen-syscalltab)
-
-CLEAN_FILES += arch/sh/lib64/syscalltab.h \
-	       include/asm-sh/machtypes.h
+CLEAN_FILES += include/asm-sh/machtypes.h
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index 5162975..b0aacf6 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -812,27 +812,6 @@ no_underflow:
 	! exceptions
 	add	SP, ZERO, r14
 
-#ifdef CONFIG_POOR_MANS_STRACE
-	/* We've pushed all the registers now, so only r2-r4 hold anything
-	 * useful. Move them into callee save registers */
-	or	r2, ZERO, r28
-	or	r3, ZERO, r29
-	or	r4, ZERO, r30
-
-	/* Preserve r2 as the event code */
-	movi	evt_debug, r3
-	ori	r3, 1, r3
-	ptabs	r3, tr0
-
-	or	SP, ZERO, r6
-	getcon	TRA, r5
-	blink	tr0, LINK
-
-	or	r28, ZERO, r2
-	or	r29, ZERO, r3
-	or	r30, ZERO, r4
-#endif
-
 	/* For syscall and debug race condition, get TRA now */
 	getcon	TRA, r5
 
@@ -887,11 +866,6 @@ no_underflow:
  */
 	.global ret_from_irq
 ret_from_irq:
-#ifdef CONFIG_POOR_MANS_STRACE
-	pta	evt_debug_ret_from_irq, tr0
-	ori	SP, 0, r2
-	blink	tr0, LINK
-#endif
 	ld.q	SP, FRAME_S(FSSR), r6
 	shlri	r6, 30, r6
 	andi	r6, 1, r6
@@ -905,12 +879,6 @@ ret_from_irq:
 ret_from_exception:
 	preempt_stop()
 
-#ifdef CONFIG_POOR_MANS_STRACE
-	pta	evt_debug_ret_from_exc, tr0
-	ori	SP, 0, r2
-	blink	tr0, LINK
-#endif
-
 	ld.q	SP, FRAME_S(FSSR), r6
 	shlri	r6, 30, r6
 	andi	r6, 1, r6
@@ -1236,18 +1204,6 @@ syscall_bad:
 	.global syscall_ret
 syscall_ret:
 	st.q	SP, FRAME_R(9), r2	/* Expecting SP back to BASIC frame */
-
-#ifdef CONFIG_POOR_MANS_STRACE
-	/* nothing useful in registers at this point */
-
-	movi	evt_debug2, r5
-	ori	r5, 1, r5
-	ptabs	r5, tr0
-	ld.q	SP, FRAME_R(9), r2
-	or	SP, ZERO, r3
-	blink	tr0, LINK
-#endif
-
 	ld.q	SP, FRAME_S(FSPC), r2
 	addi	r2, 4, r2		/* Move PC, being pre-execution event */
 	st.q	SP, FRAME_S(FSPC), r2
@@ -1268,25 +1224,12 @@ ret_from_fork:
 	ptabs	r5, tr0
 	blink	tr0, LINK
 
-#ifdef CONFIG_POOR_MANS_STRACE
-	/* nothing useful in registers at this point */
-
-	movi	evt_debug2, r5
-	ori	r5, 1, r5
-	ptabs	r5, tr0
-	ld.q	SP, FRAME_R(9), r2
-	or	SP, ZERO, r3
-	blink	tr0, LINK
-#endif
-
 	ld.q	SP, FRAME_S(FSPC), r2
 	addi	r2, 4, r2		/* Move PC, being pre-execution event */
 	st.q	SP, FRAME_S(FSPC), r2
 	pta	ret_from_syscall, tr0
 	blink	tr0, ZERO
 
-
-
 syscall_allowed:
 	/* Use LINK to deflect the exit point, default is syscall_ret */
 	pta	syscall_ret, tr0
diff --git a/arch/sh/lib64/.gitignore b/arch/sh/lib64/.gitignore
deleted file mode 100644
index 3508c2c..0000000
--- a/arch/sh/lib64/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-syscalltab.h
diff --git a/arch/sh/lib64/dbg.c b/arch/sh/lib64/dbg.c
index 2fb8eaf..25f2481 100644
--- a/arch/sh/lib64/dbg.c
+++ b/arch/sh/lib64/dbg.c
@@ -135,140 +135,6 @@ void print_itlb(void)
 	    (" =============================================================\n");
 }
 
-/* ======================================================================= */
-
-#ifdef CONFIG_POOR_MANS_STRACE
-
-#include "syscalltab.h"
-
-struct ring_node {
-	int evt;
-	int ret_addr;
-	int event;
-	int tra;
-	int pid;
-	unsigned long sp;
-	unsigned long pc;
-};
-
-static struct ring_node event_ring[16];
-static int event_ptr = 0;
-
-struct stored_syscall_data {
-	int pid;
-	int syscall_number;
-};
-
-#define N_STORED_SYSCALLS 16
-
-static struct stored_syscall_data stored_syscalls[N_STORED_SYSCALLS];
-static int syscall_next=0;
-static int syscall_next_print=0;
-
-void evt_debug(int evt, int ret_addr, int event, int tra, struct pt_regs *regs)
-{
-	int syscallno = tra & 0xff;
-	unsigned long sp;
-	unsigned long stack_bottom;
-	int pid;
-	struct ring_node *rr;
-
-	pid = current->pid;
-	stack_bottom = (unsigned long) task_stack_page(current);
-	asm volatile("ori r15, 0, %0" : "=r" (sp));
-	rr = event_ring + event_ptr;
-	rr->evt = evt;
-	rr->ret_addr = ret_addr;
-	rr->event = event;
-	rr->tra = tra;
-	rr->pid = pid;
-	rr->sp = sp;
-	rr->pc = regs->pc;
-
-	if (sp < stack_bottom + 3092) {
-		int i, j;
-		printk("evt_debug : stack underflow report\n");
-		for (j=0, i = event_ptr; j<16; j++) {
-			rr = event_ring + i;
-			printk("evt=%08x event=%08x tra=%08x pid=%5d sp=%08lx pc=%08lx\n",
-				rr->evt, rr->event, rr->tra, rr->pid, rr->sp, rr->pc);
-			i--;
-			i &= 15;
-		}
-		panic("STACK UNDERFLOW\n");
-	}
-
-	event_ptr = (event_ptr + 1) & 15;
-
-	if ((event == 2) && (evt == 0x160)) {
-		if (syscallno < NUM_SYSCALL_INFO_ENTRIES) {
-			/* Store the syscall information to print later.  We
-			 * can't print this now - currently we're running with
-			 * SR.BL=1, so we can't take a tlbmiss (which could occur
-			 * in the console drivers under printk).
-			 *
-			 * Just overwrite old entries on ring overflow - this
-			 * is only for last-hope debugging. */
-			stored_syscalls[syscall_next].pid = current->pid;
-			stored_syscalls[syscall_next].syscall_number = syscallno;
-			syscall_next++;
-			syscall_next &= (N_STORED_SYSCALLS - 1);
-		}
-	}
-}
-
-static void drain_syscalls(void) {
-	while (syscall_next_print != syscall_next) {
-		printk("Task %d: %s()\n",
-			stored_syscalls[syscall_next_print].pid,
-			syscall_info_table[stored_syscalls[syscall_next_print].syscall_number].name);
-			syscall_next_print++;
-			syscall_next_print &= (N_STORED_SYSCALLS - 1);
-	}
-}
-
-void evt_debug2(unsigned int ret)
-{
-	drain_syscalls();
-	printk("Task %d: syscall returns %08x\n", current->pid, ret);
-}
-
-void evt_debug_ret_from_irq(struct pt_regs *regs)
-{
-	int pid;
-	struct ring_node *rr;
-
-	pid = current->pid;
-	rr = event_ring + event_ptr;
-	rr->evt = 0xffff;
-	rr->ret_addr = 0;
-	rr->event = 0;
-	rr->tra = 0;
-	rr->pid = pid;
-	rr->pc = regs->pc;
-	event_ptr = (event_ptr + 1) & 15;
-}
-
-void evt_debug_ret_from_exc(struct pt_regs *regs)
-{
-	int pid;
-	struct ring_node *rr;
-
-	pid = current->pid;
-	rr = event_ring + event_ptr;
-	rr->evt = 0xfffe;
-	rr->ret_addr = 0;
-	rr->event = 0;
-	rr->tra = 0;
-	rr->pid = pid;
-	rr->pc = regs->pc;
-	event_ptr = (event_ptr + 1) & 15;
-}
-
-#endif /* CONFIG_POOR_MANS_STRACE */
-
-/* ======================================================================= */
-
 void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs)
 {
 
-- 
cgit v0.10.2


From ef9f89996e450e9686816b6fc1e34d7b9a65766c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 17:36:57 +0900
Subject: sh: Kill off unused sh64 debug code.

None of the print_page() code and associated helpers are presently used
by anything in-tree, so just kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/lib64/dbg.c b/arch/sh/lib64/dbg.c
index 25f2481..6152a6a 100644
--- a/arch/sh/lib64/dbg.c
+++ b/arch/sh/lib64/dbg.c
@@ -246,51 +246,3 @@ void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs)
 	print_dtlb();
 	print_itlb();
 }
-
-/* ======================================================================= */
-
-/*
-** Depending on <base> scan the MMU, Data or Instruction side
-** looking for a valid mapping matching Eaddr & asid.
-** Return -1 if not found or the TLB id entry otherwise.
-** Note: it works only for 4k pages!
-*/
-static unsigned long
-lookup_mmu_side(unsigned long base, unsigned long Eaddr, unsigned long asid)
-{
-	regType_t pteH;
-	unsigned long epn;
-	int count;
-
-	epn = Eaddr & 0xfffff000;
-
-	for (count = 0; count < MAX_TLBs; count++, base += TLB_STEP) {
-		pteH = getConfigReg(base);
-		if (GET_VALID(pteH))
-			if ((unsigned long) GET_EPN(pteH) == epn)
-				if ((unsigned long) GET_ASID(pteH) == asid)
-					break;
-	}
-	return ((unsigned long) ((count < MAX_TLBs) ? base : -1));
-}
-
-unsigned long lookup_dtlb(unsigned long Eaddr)
-{
-	unsigned long asid = get_asid();
-	return (lookup_mmu_side((u64) DTLB_BASE, Eaddr, asid));
-}
-
-unsigned long lookup_itlb(unsigned long Eaddr)
-{
-	unsigned long asid = get_asid();
-	return (lookup_mmu_side((u64) ITLB_BASE, Eaddr, asid));
-}
-
-void print_page(struct page *page)
-{
-	printk("  page[%p] -> index 0x%lx,  count 0x%x,  flags 0x%lx\n",
-	       page, page->index, page_count(page), page->flags);
-	printk("       address_space = %p, pages =%ld\n", page->mapping,
-	       page->mapping->nrpages);
-
-}
diff --git a/arch/sh/lib64/udelay.c b/arch/sh/lib64/udelay.c
index d76bd80..f215b06 100644
--- a/arch/sh/lib64/udelay.c
+++ b/arch/sh/lib64/udelay.c
@@ -33,7 +33,7 @@ void __delay(unsigned long loops)
 			     :"0"(loops));
 }
 
-inline void __const_udelay(unsigned long xloops)
+void __const_udelay(unsigned long xloops)
 {
 	__delay(xloops * (HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy));
 }
-- 
cgit v0.10.2


From 7d170b1bc540a1d83098a9f27cf4939e026fda81 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 17:41:59 +0900
Subject: sh: Move out cayman-specific panic handler code to its own file.

This moves out the cayman-specific panic handler code to a better
location, and leaves the generic implementation a simple stub that is
still used under emulation.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-cayman/Makefile b/arch/sh/boards/mach-cayman/Makefile
index cafe1ac..00fa3ea 100644
--- a/arch/sh/boards/mach-cayman/Makefile
+++ b/arch/sh/boards/mach-cayman/Makefile
@@ -1,4 +1,4 @@
 #
 # Makefile for the Hitachi Cayman specific parts of the kernel
 #
-obj-y := setup.o irq.o
+obj-y := setup.o irq.o panic.o
diff --git a/arch/sh/boards/mach-cayman/panic.c b/arch/sh/boards/mach-cayman/panic.c
new file mode 100644
index 0000000..d1e6730
--- /dev/null
+++ b/arch/sh/boards/mach-cayman/panic.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2003  Richard Curnow, SuperH UK Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <cpu/registers.h>
+
+/* THIS IS A PHYSICAL ADDRESS */
+#define HDSP2534_ADDR (0x04002100)
+
+static void poor_mans_delay(void)
+{
+	int i;
+
+	for (i = 0; i < 2500000; i++)
+		cpu_relax();
+}
+
+static void show_value(unsigned long x)
+{
+	int i;
+	unsigned nibble;
+	for (i = 0; i < 8; i++) {
+		nibble = ((x >> (i * 4)) & 0xf);
+
+		__raw_writeb(nibble + ((nibble > 9) ? 55 : 48),
+			  HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
+	}
+}
+
+void
+panic_handler(unsigned long panicPC, unsigned long panicSSR,
+	      unsigned long panicEXPEVT)
+{
+	while (1) {
+		/* This piece of code displays the PC on the LED display */
+		show_value(panicPC);
+		poor_mans_delay();
+		show_value(panicSSR);
+		poor_mans_delay();
+		show_value(panicEXPEVT);
+		poor_mans_delay();
+	}
+}
diff --git a/arch/sh/lib64/panic.c b/arch/sh/lib64/panic.c
index da32ba7..38c954e 100644
--- a/arch/sh/lib64/panic.c
+++ b/arch/sh/lib64/panic.c
@@ -6,53 +6,10 @@
  * for more details.
  */
 
-#include <linux/kernel.h>
-#include <asm/io.h>
-#include <cpu/registers.h>
-
-/* THIS IS A PHYSICAL ADDRESS */
-#define HDSP2534_ADDR (0x04002100)
-
-#ifdef CONFIG_SH_CAYMAN
-
-static void poor_mans_delay(void)
-{
-	int i;
-	for (i = 0; i < 2500000; i++) {
-	}		/* poor man's delay */
-}
-
-static void show_value(unsigned long x)
-{
-	int i;
-	unsigned nibble;
-	for (i = 0; i < 8; i++) {
-		nibble = ((x >> (i * 4)) & 0xf);
-
-		ctrl_outb(nibble + ((nibble > 9) ? 55 : 48),
-			  HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
-	}
-}
-
-#endif
-
 void
 panic_handler(unsigned long panicPC, unsigned long panicSSR,
 	      unsigned long panicEXPEVT)
 {
-#ifdef CONFIG_SH_CAYMAN
-	while (1) {
-		/* This piece of code displays the PC on the LED display */
-		show_value(panicPC);
-		poor_mans_delay();
-		show_value(panicSSR);
-		poor_mans_delay();
-		show_value(panicEXPEVT);
-		poor_mans_delay();
-	}
-#endif
-
 	/* Never return from the panic handler */
 	for (;;) ;
-
 }
-- 
cgit v0.10.2


From 4f5ecaa05493dfddf155b40224b951592bfce325 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 8 May 2009 08:23:29 +0000
Subject: sh: clock framework update, fix count and kill off kref

This patch updates the clock framework use count code.
With this patch the enable() and disable() callbacks
only get called when counting from and to zero.
While at it the kref stuff gets replaced with an int.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 2f6c962..b1f2919 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -1,7 +1,6 @@
 #ifndef __ASM_SH_CLOCK_H
 #define __ASM_SH_CLOCK_H
 
-#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/seq_file.h>
 #include <linux/clk.h>
@@ -28,7 +27,7 @@ struct clk {
 	struct clk		*parent;
 	struct clk_ops		*ops;
 
-	struct kref		kref;
+	int			usecount;
 
 	unsigned long		rate;
 	unsigned long		flags;
@@ -37,6 +36,7 @@ struct clk {
 
 #define CLK_ALWAYS_ENABLED	(1 << 0)
 #define CLK_RATE_PROPAGATES	(1 << 1)
+#define CLK_NEEDS_INIT		(1 << 2)
 
 /* Should be defined by processor-specific code */
 void arch_init_clk_ops(struct clk_ops **, int type);
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 099373a..133dbe4 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -19,7 +19,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
-#include <linux/kref.h>
 #include <linux/kobject.h>
 #include <linux/sysdev.h>
 #include <linux/seq_file.h>
@@ -90,7 +89,7 @@ static void propagate_rate(struct clk *clk)
 	}
 }
 
-static int __clk_enable(struct clk *clk)
+static void __clk_init(struct clk *clk)
 {
 	/*
 	 * See if this is the first time we're enabling the clock, some
@@ -100,19 +99,33 @@ static int __clk_enable(struct clk *clk)
 	 * divisors to use before it can effectively recalc.
 	 */
 
-	if (clk->flags & CLK_ALWAYS_ENABLED) {
-		kref_get(&clk->kref);
-		return 0;
-	}
-
-	if (unlikely(atomic_read(&clk->kref.refcount) == 1))
+	if (clk->flags & CLK_NEEDS_INIT) {
 		if (clk->ops && clk->ops->init)
 			clk->ops->init(clk);
 
-	kref_get(&clk->kref);
+		clk->flags &= ~CLK_NEEDS_INIT;
+	}
+}
+
+static int __clk_enable(struct clk *clk)
+{
+	if (!clk)
+		return -EINVAL;
+
+	clk->usecount++;
+
+	/* nothing to do if always enabled */
+	if (clk->flags & CLK_ALWAYS_ENABLED)
+		return 0;
+
+	if (clk->usecount == 1) {
+		__clk_init(clk);
 
-	if (likely(clk->ops && clk->ops->enable))
-		clk->ops->enable(clk);
+		__clk_enable(clk->parent);
+
+		if (clk->ops && clk->ops->enable)
+			clk->ops->enable(clk);
+	}
 
 	return 0;
 }
@@ -122,11 +135,6 @@ int clk_enable(struct clk *clk)
 	unsigned long flags;
 	int ret;
 
-	if (!clk)
-		return -EINVAL;
-
-	clk_enable(clk->parent);
-
 	spin_lock_irqsave(&clock_lock, flags);
 	ret = __clk_enable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
@@ -135,21 +143,23 @@ int clk_enable(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_enable);
 
-static void clk_kref_release(struct kref *kref)
-{
-	/* Nothing to do */
-}
-
 static void __clk_disable(struct clk *clk)
 {
-	int count = kref_put(&clk->kref, clk_kref_release);
+	if (!clk)
+		return;
+
+	clk->usecount--;
+
+	WARN_ON(clk->usecount < 0);
 
 	if (clk->flags & CLK_ALWAYS_ENABLED)
 		return;
 
-	if (!count) {	/* count reaches zero, disable the clock */
+	if (clk->usecount == 0) {
 		if (likely(clk->ops && clk->ops->disable))
 			clk->ops->disable(clk);
+
+		__clk_disable(clk->parent);
 	}
 }
 
@@ -157,14 +167,9 @@ void clk_disable(struct clk *clk)
 {
 	unsigned long flags;
 
-	if (!clk)
-		return;
-
 	spin_lock_irqsave(&clock_lock, flags);
 	__clk_disable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
-
-	clk_disable(clk->parent);
 }
 EXPORT_SYMBOL_GPL(clk_disable);
 
@@ -173,14 +178,14 @@ int clk_register(struct clk *clk)
 	mutex_lock(&clock_list_sem);
 
 	list_add(&clk->node, &clock_list);
-	kref_init(&clk->kref);
+	clk->usecount = 0;
+	clk->flags |= CLK_NEEDS_INIT;
 
 	mutex_unlock(&clock_list_sem);
 
 	if (clk->flags & CLK_ALWAYS_ENABLED) {
+		__clk_init(clk);
 		pr_debug( "Clock '%s' is ALWAYS_ENABLED\n", clk->name);
-		if (clk->ops && clk->ops->init)
-			clk->ops->init(clk);
 		if (clk->ops && clk->ops->enable)
 			clk->ops->enable(clk);
 		pr_debug( "Enabled.");
@@ -356,7 +361,7 @@ static int show_clocks(char *buf, char **start, off_t off,
 		p += sprintf(p, "%-12s\t: %ld.%02ldMHz\t%s\n", clk->name,
 			     rate / 1000000, (rate % 1000000) / 10000,
 			     ((clk->flags & CLK_ALWAYS_ENABLED) ||
-			      (atomic_read(&clk->kref.refcount) != 1)) ?
+			      clk->usecount > 0) ?
 			     "enabled" : "disabled");
 	}
 
-- 
cgit v0.10.2


From 583d1d549ff107500481461cec0325189c5349ec Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 8 May 2009 08:27:19 +0000
Subject: sh: enable TMU clocksource on sh7722

This patch enables the TMU clocksource on sh7722.
To prioritize TMU over CMT we also adjust the CMT
clock source rating.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 624e8e5..b1c3e7f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -183,7 +183,7 @@ static struct sh_timer_config cmt_platform_data = {
 	.timer_bit = 5,
 	.clk = "cmt0",
 	.clockevent_rating = 125,
-	.clocksource_rating = 200,
+	.clocksource_rating = 125,
 };
 
 static struct resource cmt_resources[] = {
@@ -245,7 +245,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.channel_offset = 0x10,
 	.timer_bit = 1,
 	.clk = "tmu0",
-	.clocksource_rating = 0, /* disabled for now */
+	.clocksource_rating = 200,
 };
 
 static struct resource tmu1_resources[] = {
-- 
cgit v0.10.2


From 47dd6f4439811c6c83d9abf2f364bcb6d2684640 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 8 May 2009 08:32:18 +0000
Subject: sh: TMU platform data for sh7723

This patch adds TMU platform data for sh7723. Both clockevent
and clocksource support is enabled. While at it, adjust the
CMT clocksource rating to prioritize the TMU.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index fb9b542..484ca2d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -107,7 +107,7 @@ static struct sh_timer_config cmt_platform_data = {
 	.timer_bit = 5,
 	.clk = "cmt0",
 	.clockevent_rating = 125,
-	.clocksource_rating = 200,
+	.clocksource_rating = 125,
 };
 
 static struct resource cmt_resources[] = {
@@ -133,6 +133,188 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu1",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd90008,
+		.end	= 0xffd90013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu1",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd90014,
+		.end	= 0xffd9001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 58,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu1",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd90020,
+		.end	= 0xffd9002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase        = 0xffe00000,
@@ -255,6 +437,12 @@ static struct platform_device iic_device = {
 
 static struct platform_device *sh7723_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&sci_device,
 	&rtc_device,
 	&iic_device,
@@ -282,6 +470,12 @@ __initcall(sh7723_devices_setup);
 
 static struct platform_device *sh7723_early_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 };
 
 void __init plat_early_device_setup(void)
-- 
cgit v0.10.2


From 6b2e8523df148c15ea5abf13075026fb8bdb3f86 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 7 May 2009 11:56:49 -0700
Subject: xen: reserve Xen start_info rather than e820 reserving

Use reserve_early rather than e820 reservations for Xen start info and mfn->pfn
table, so that the memory use is a bit more self-documenting.

[ Impact: cleanup ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Xen-devel <xen-devel@lists.xensource.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4A032EF1.6070708@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 15c6c68..ad0047f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -61,9 +61,9 @@ char * __init xen_memory_setup(void)
 	 *  - xen_start_info
 	 * See comment above "struct start_info" in <xen/interface/xen.h>
 	 */
-	e820_add_region(__pa(xen_start_info->mfn_list),
-			xen_start_info->pt_base - xen_start_info->mfn_list,
-			E820_RESERVED);
+	reserve_early(__pa(xen_start_info->mfn_list),
+		      __pa(xen_start_info->pt_base),
+			"XEN START INFO");
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
-- 
cgit v0.10.2


From b366328335b98373325977e116d2423f500708ac Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 18:01:03 +0900
Subject: sh: Drop dead rules from arch/sh/kernel/Makefile_64.

Several of these options are specific to the SHcompact ISA and will need
to be rewritten for SHmedia if they are to be supported at all. Drop
the impossible rules for now.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index 31f6ebf..c845a0d 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -5,13 +5,10 @@ obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o machvec.o process_64.o \
 	   syscalls_64.o time.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
-obj-$(CONFIG_VSYSCALL)		+= vsyscall/
 obj-$(CONFIG_SMP)		+= smp.o
-obj-$(CONFIG_SH_STANDARD_BIOS)	+= sh_bios.o
 obj-$(CONFIG_SH_CPU_FREQ)	+= cpufreq.o
 obj-$(CONFIG_MODULES)		+= sh_ksyms_64.o module.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
-- 
cgit v0.10.2


From 1dcdb5a9e7c235e6e80f1f4d5b8247b3e5347e48 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 27 Apr 2009 17:44:11 +0200
Subject: oprofile: re-add force_arch_perfmon option

This re-adds the force_arch_perfmon option that was in the original
arch perfmon patchkit. Originally this was rejected in favour
of a generalized perfmon=name option, but it turned out implementing
the later in a reliable way is hard (and it would have been easy
to crash the kernel if a user gets it wrong)

But now Atom and Core i7 support being readded a user would
need to update their oprofile userland to beyond 0.9.4 to use oprofile again
on Atom or Core i7.

To avoid this problem readd the force_arch_perfmon option.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 90b3924..9b9566b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1650,6 +1650,12 @@ and is between 256 and 4096 characters. It is defined in the file
 	oprofile.timer=	[HW]
 			Use timer interrupt instead of performance counters
 
+	oprofile.force_arch_perfmon=1	[X86]
+			Force use of architectural perfmon instead of
+			the CPU specific event set.
+			This might be useful if you have older oprofile
+			userland or if you want common events over Intel CPUs.
+
 	osst=		[HW,SCSI] SCSI Tape Driver
 			Format: <buffer_size>,<write_threshold>
 			See also Documentation/scsi/st.txt.
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864a..e5171c9 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -389,10 +389,16 @@ static int __init p4_init(char **cpu_type)
 	return 0;
 }
 
+int force_arch_perfmon;
+module_param(force_arch_perfmon, int, 0);
+
 static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
+	if (force_arch_perfmon && cpu_has_arch_perfmon)
+		return 0;
+
 	switch (cpu_model) {
 	case 0 ... 2:
 		*cpu_type = "i386/ppro";
-- 
cgit v0.10.2


From 1f3d7b60691993d8d368d8dd7d5d85871d41e8f5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 27 Apr 2009 17:44:12 +0200
Subject: oprofile: remove undocumented oprofile.p4force option

There are no new P4s and the oprofile code knows about all existing
ones, so we don't really need the p4force option anymore.

Remove it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index e5171c9..f472c0c 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -356,14 +356,11 @@ static void exit_sysfs(void)
 #define exit_sysfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-static int p4force;
-module_param(p4force, int, 0);
-
 static int __init p4_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 
-	if (!p4force && (cpu_model > 6 || cpu_model == 5))
+	if (cpu_model > 6 || cpu_model == 5)
 		return 0;
 
 #ifndef CONFIG_SMP
-- 
cgit v0.10.2


From 6adf406f0a0eaf37251018d15f51e93f5b538ee6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 27 Apr 2009 17:44:13 +0200
Subject: oprofile: add support for Core i7 and Atom

The registers are about the same as other Family 6 CPUs
so we only need to add detection.

I'm not completely happy with calling Nehalem Core i7 because
there will be undoubtedly other Nehalem based CPUs
in the future with different marketing names, but it's
the best we got for now.

Requires updated oprofile userland for the new event files.

If you don't want to update right now you can also use
oprofile.force_arch_perfmon=1 (added in the next patch) with 0.9.4

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f472c0c..3308147 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -417,6 +417,13 @@ static int __init ppro_init(char **cpu_type)
 	case 15: case 23:
 		*cpu_type = "i386/core_2";
 		break;
+	case 26:
+		arch_perfmon_setup_counters();
+		*cpu_type = "i386/core_i7";
+		break;
+	case 28:
+		*cpu_type = "i386/atom";
+		break;
 	default:
 		/* Unknown */
 		return 0;
-- 
cgit v0.10.2


From 7e4e0bd50e80df2fe5501f48f872448376cdd997 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 6 May 2009 12:10:23 +0200
Subject: oprofile: introduce module_param oprofile.cpu_type

This patch removes module_param oprofile.force_arch_perfmon and
introduces oprofile.cpu_type=archperfmon instead. This new parameter
can be reused for other models and architectures.

Currently only archperfmon is supported.

Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9b9566b..6ce5f48 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1650,11 +1650,13 @@ and is between 256 and 4096 characters. It is defined in the file
 	oprofile.timer=	[HW]
 			Use timer interrupt instead of performance counters
 
-	oprofile.force_arch_perfmon=1	[X86]
-			Force use of architectural perfmon instead of
-			the CPU specific event set.
-			This might be useful if you have older oprofile
-			userland or if you want common events over Intel CPUs.
+	oprofile.cpu_type=	Force an oprofile cpu type
+			This might be useful if you have an older oprofile
+			userland or if you want common events.
+			Format: { archperfmon }
+			archperfmon: [X86] Force use of architectural
+				perfmon on Intel CPUs instead of the
+				CPU specific event set.
 
 	osst=		[HW,SCSI] SCSI Tape Driver
 			Format: <buffer_size>,<write_threshold>
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3308147..3b285e6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -386,8 +386,17 @@ static int __init p4_init(char **cpu_type)
 	return 0;
 }
 
-int force_arch_perfmon;
-module_param(force_arch_perfmon, int, 0);
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str, struct kernel_param *kp)
+{
+	if (!strcmp(str, "archperfmon")) {
+		force_arch_perfmon = 1;
+		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+	}
+
+	return 0;
+}
+module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
 
 static int __init ppro_init(char **cpu_type)
 {
-- 
cgit v0.10.2


From 6eac1af01112d1919b1c432f6591e6368e8d538f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 19:48:47 +0900
Subject: sh: Always select RTC_LIB, not just for SUPERH32.

The RTC_LIB helpers are used in arch/sh/kernel/time.c, which was
previously only the case for the 32-bit variant. Now that this has
become the common implementation, move the RTC_LIB select to reflect
that. Fixes up the sh64 build.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d88a61b..6ed16c4 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -15,6 +15,7 @@ config SUPERH
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
+	select RTC_LIB
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
@@ -30,7 +31,6 @@ config SUPERH32
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_ARCH_KGDB
 	select ARCH_HIBERNATION_POSSIBLE if MMU
-	select RTC_LIB
 
 config SUPERH64
 	def_bool ARCH = "sh64"
-- 
cgit v0.10.2


From c198d811812417961582d4e25360372ca1eccdae Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Thu, 7 May 2009 14:32:00 +0300
Subject: ASoC: TWL4030: Fix typo in twl4030_codec_mute function

Copy-paste error: TWL4030_PRECKL_GAIN >> TWL4030_PRECKR_GAIN
It has not caused problems, since
TWL4030_PRECKL_GAIN == TWL4030_PRECKR_GAIN == 0x30

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 1a00e4b..fd392c6 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -237,7 +237,7 @@ static void twl4030_codec_mute(struct snd_soc_codec *codec, int mute)
 					TWL4030_REG_PRECKL_CTL);
 		reg_val = twl4030_read_reg_cache(codec, TWL4030_REG_PRECKR_CTL);
 		twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
-					reg_val & (~TWL4030_PRECKL_GAIN),
+					reg_val & (~TWL4030_PRECKR_GAIN),
 					TWL4030_REG_PRECKR_CTL);
 
 		/* Disable PLL */
-- 
cgit v0.10.2


From bec4c99e8637b5b8bd4b0513eacb51da25885e3b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 6 May 2009 10:36:34 +0100
Subject: ASoC: Fix file patterns for PXA sound drivers

The file matches for PXA sound drivers missed the generic AC97 support
and were overly specific within sound/soc/pxa, omitting all machine
drivers and the SSP driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 3cf4f0d..17c8ec1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4544,7 +4544,8 @@ F:	drivers/pcmcia/pxa2xx*
 F:	drivers/spi/pxa2xx*
 F:	drivers/usb/gadget/pxa2*
 F:	include/sound/pxa2xx-lib.h
-F:	sound/soc/pxa/pxa2xx*
+F:	sound/arm/pxa*
+F:	sound/soc/pxa
 
 PXA168 SUPPORT
 P:	Eric Miao
-- 
cgit v0.10.2


From 30d88cf52f229c3e0c90b8d71036960ccc2db4e2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 20:20:56 +0900
Subject: sh: Kill off extra cflags Kconfig entry.

There is no real reason to use this anymore, as the build system
generally knows what it is doing with regards to cflags mangling.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 44627ef..8179cc9 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -122,17 +122,6 @@ config SH_NO_BSS_INIT
 	  For all other cases, say N. If this option seems perplexing, or
 	  you aren't sure, say N.
 
-config MORE_COMPILE_OPTIONS
-	bool "Add any additional compile options"
-	help
-	  If you want to add additional CFLAGS to the kernel build, enable this
-	  option and then enter what you would like to add in the next question.
-	  Note however that -g is already appended with the selection of KGDB.
-
-config COMPILE_OPTIONS
-	string "Additional compile arguments"
-	depends on MORE_COMPILE_OPTIONS
-
 config SH64_SR_WATCH
 	bool "Debug: set SR.WATCH to enable hardware watchpoints and trace"
 	depends on SUPERH64
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 493da97..68348e7 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -70,9 +70,6 @@ cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -ml
 cflags-y	+= $(call cc-option,-mno-fdpic)
 cflags-y	+= $(isaflags-y) -ffreestanding
 
-cflags-$(CONFIG_MORE_COMPILE_OPTIONS)	+= \
-	$(shell echo $(CONFIG_COMPILE_OPTIONS) | sed -e 's/"//g')
-
 OBJCOPYFLAGS	:= -O binary -R .note -R .note.gnu.build-id -R .comment \
 		   -R .stab -R .stabstr -S
 
-- 
cgit v0.10.2


From c29418c2ae15ee9171bc136ad261c497b95a46fc Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 20:32:56 +0900
Subject: sh: Always fixup unaligned userspace accesses on sh64.

sh64 has traditionally had this configurable via a Kconfig option
(CONFIG_SH64_USER_MISALIGNED_FIXUP). In practice it has never really been
terribly useful to turn this off, so just get rid of the option entirely.

We leave the sysctl around so we don't end up breaking existing root
file systems, and to allow folks that really want this off to do so at
their own risk.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig.cpu b/arch/sh/Kconfig.cpu
index c7d7043..9eb1712 100644
--- a/arch/sh/Kconfig.cpu
+++ b/arch/sh/Kconfig.cpu
@@ -76,11 +76,6 @@ config SPECULATIVE_EXECUTION
 
 	  If unsure, say N.
 
-config SH64_USER_MISALIGNED_FIXUP
-	def_bool y
-	prompt "Fixup misaligned loads/stores occurring in user mode"
-	depends on SUPERH64
-
 config SH64_ID2815_WORKAROUND
 	bool "Include workaround for SH5-101 cut2 silicon defect ID2815"
 	depends on CPU_SUBTYPE_SH5_101
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index a85831c..267e5eb 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -370,7 +370,6 @@ static int generate_and_check_address(struct pt_regs *regs,
 		return -1;
 	}
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	/* Check accessible.  For misaligned access in the kernel, assume the
 	   address is always accessible (and if not, just fault when the
 	   load/store gets done.) */
@@ -380,18 +379,13 @@ static int generate_and_check_address(struct pt_regs *regs,
 		}
 		/* Do access_ok check later - it depends on whether it's a load or a store. */
 	}
-#endif
 
 	*address = addr;
 	return 0;
 }
 
-/* Default value as for sh */
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 static int user_mode_unaligned_fixup_count = 10;
 static int user_mode_unaligned_fixup_enable = 1;
-#endif
-
 static int kernel_mode_unaligned_fixup_count = 32;
 
 static void misaligned_kernel_word_load(__u64 address, int do_sign_extend, __u64 *result)
@@ -440,7 +434,6 @@ static int misaligned_load(struct pt_regs *regs,
 	}
 
 	destreg = (opcode >> 4) & 0x3f;
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	if (user_mode(regs)) {
 		__u64 buffer;
 
@@ -470,9 +463,7 @@ static int misaligned_load(struct pt_regs *regs,
 				width_shift, (unsigned long) regs->pc);
 			break;
 		}
-	} else
-#endif
-	{
+	} else {
 		/* kernel mode - we can take short cuts since if we fault, it's a genuine bug */
 		__u64 lo, hi;
 
@@ -519,7 +510,6 @@ static int misaligned_store(struct pt_regs *regs,
 	}
 
 	srcreg = (opcode >> 4) & 0x3f;
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	if (user_mode(regs)) {
 		__u64 buffer;
 
@@ -546,9 +536,7 @@ static int misaligned_store(struct pt_regs *regs,
 		if (__copy_user((void *)(int)address, &buffer, (1 << width_shift)) > 0) {
 			return -1; /* fault */
 		}
-	} else
-#endif
-	{
+	} else {
 		/* kernel mode - we can take short cuts since if we fault, it's a genuine bug */
 		__u64 val = regs->regs[srcreg];
 
@@ -576,7 +564,6 @@ static int misaligned_store(struct pt_regs *regs,
 
 }
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 /* Never need to fix up misaligned FPU accesses within the kernel since that's a real
    error. */
 static int misaligned_fpu_load(struct pt_regs *regs,
@@ -727,7 +714,6 @@ static int misaligned_fpu_store(struct pt_regs *regs,
 		return -1;
 	}
 }
-#endif
 
 static int misaligned_fixup(struct pt_regs *regs)
 {
@@ -735,12 +721,8 @@ static int misaligned_fixup(struct pt_regs *regs)
 	int error;
 	int major, minor;
 
-#if !defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
-	/* Never fixup user mode misaligned accesses without this option enabled. */
-	return -1;
-#else
-	if (!user_mode_unaligned_fixup_enable) return -1;
-#endif
+	if (!user_mode_unaligned_fixup_enable)
+		return -1;
 
 	error = read_opcode(regs->pc, &opcode, user_mode(regs));
 	if (error < 0) {
@@ -749,15 +731,12 @@ static int misaligned_fixup(struct pt_regs *regs)
 	major = (opcode >> 26) & 0x3f;
 	minor = (opcode >> 16) & 0xf;
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	if (user_mode(regs) && (user_mode_unaligned_fixup_count > 0)) {
 		--user_mode_unaligned_fixup_count;
 		/* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
 		printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
 		       current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
-	} else
-#endif
-	if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
+	} else if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
 		--kernel_mode_unaligned_fixup_count;
 		if (in_interrupt()) {
 			printk("Fixing up unaligned kernelspace access in interrupt pc=0x%08x ins=0x%08lx\n",
@@ -830,7 +809,6 @@ static int misaligned_fixup(struct pt_regs *regs)
 			}
 			break;
 
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 		case (0x94>>2): /* FLD.S */
 			error = misaligned_fpu_load(regs, opcode, 1, 2, 0);
 			break;
@@ -881,7 +859,6 @@ static int misaligned_fixup(struct pt_regs *regs)
 				break;
 			}
 			break;
-#endif
 
 		default:
 			/* Fault */
@@ -907,7 +884,6 @@ static ctl_table unaligned_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
-#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "user_reports",
@@ -923,7 +899,6 @@ static ctl_table unaligned_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec},
-#endif
 	{}
 };
 
-- 
cgit v0.10.2


From 8f31bfe538ebafac187d2d4465a92e1d9ee6d8c2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 8 May 2009 10:31:42 +0800
Subject: tracing/events: clean up for ftrace_set_clr_event()

Add a helper function __ftrace_set_clr_event(), and replace some
ftrace_set_clr_event() calls with this helper, thus we don't need any
kstrdup() or kmalloc().

As a side effect, this patch fixes an issue in self tests code, which is
similar to the one fixed in commit d6bf81ef0f7474434c2a049e8bf3c9146a14dd96
("tracing: append ":*" to internal setting of system events")

It's a small issue and won't cause any bug in fact, but we should do things
right anyway.

[ Impact: prevent spurious event-enabling in tracing self-tests ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A03998E.3020503@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8d0fae3..45f1099 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -111,11 +111,44 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 	}
 }
 
-static int ftrace_set_clr_event(char *buf, int set)
+/*
+ * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
+ */
+static int __ftrace_set_clr_event(const char *match, const char *sub,
+				  const char *event, int set)
 {
 	struct ftrace_event_call *call;
+	int ret;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+
+		if (!call->name || !call->regfunc)
+			continue;
+
+		if (match &&
+		    strcmp(match, call->name) != 0 &&
+		    strcmp(match, call->system) != 0)
+			continue;
+
+		if (sub && strcmp(sub, call->system) != 0)
+			continue;
+
+		if (event && strcmp(event, call->name) != 0)
+			continue;
+
+		ftrace_event_enable_disable(call, set);
+
+		ret = 0;
+	}
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
+static int ftrace_set_clr_event(char *buf, int set)
+{
 	char *event = NULL, *sub = NULL, *match;
-	int ret = -EINVAL;
 
 	/*
 	 * The buf format can be <subsystem>:<event-name>
@@ -141,30 +174,7 @@ static int ftrace_set_clr_event(char *buf, int set)
 			event = NULL;
 	}
 
-	mutex_lock(&event_mutex);
-	list_for_each_entry(call, &ftrace_events, list) {
-
-		if (!call->name || !call->regfunc)
-			continue;
-
-		if (match &&
-		    strcmp(match, call->name) != 0 &&
-		    strcmp(match, call->system) != 0)
-			continue;
-
-		if (sub && strcmp(sub, call->system) != 0)
-			continue;
-
-		if (event && strcmp(event, call->name) != 0)
-			continue;
-
-		ftrace_event_enable_disable(call, set);
-
-		ret = 0;
-	}
-	mutex_unlock(&event_mutex);
-
-	return ret;
+	return __ftrace_set_clr_event(match, sub, event, set);
 }
 
 /* 128 should be much more than enough */
@@ -408,18 +418,14 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 	struct ftrace_event_call *call;
 	char buf[2];
 	int set = -1;
-	int all = 0;
 	int ret;
 
-	if (system[0] == '*')
-		all = 1;
-
 	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->name || !call->regfunc)
 			continue;
 
-		if (!all && strcmp(call->system, system) != 0)
+		if (system && strcmp(call->system, system) != 0)
 			continue;
 
 		/*
@@ -480,7 +486,6 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 {
 	const char *system = filp->private_data;
 	unsigned long val;
-	char *command;
 	char buf[64];
 	ssize_t ret;
 
@@ -500,30 +505,16 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	if (ret < 0)
 		return ret;
 
-	switch (val) {
-	case 0:
-	case 1:
-		break;
-
-	default:
+	if (val != 0 && val != 1)
 		return -EINVAL;
-	}
 
-	/* +3 for the ":*\0" */
-	command = kmalloc(strlen(system)+3, GFP_KERNEL);
-	if (!command)
-		return -ENOMEM;
-	sprintf(command, "%s:*", system);
-
-	ret = ftrace_set_clr_event(command, val);
+	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
 	if (ret)
-		goto out_free;
+		goto out;
 
 	ret = cnt;
 
- out_free:
-	kfree(command);
-
+out:
 	*ppos += cnt;
 
 	return ret;
@@ -1181,7 +1172,7 @@ static __init int event_trace_init(void)
 			  &ftrace_show_header_fops);
 
 	trace_create_file("enable", 0644, d_events,
-			  "*", &ftrace_system_enable_fops);
+			  NULL, &ftrace_system_enable_fops);
 
 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
@@ -1259,7 +1250,6 @@ static __init void event_trace_self_tests(void)
 {
 	struct ftrace_event_call *call;
 	struct event_subsystem *system;
-	char *sysname;
 	int ret;
 
 	pr_info("Running tests on trace events:\n");
@@ -1305,14 +1295,7 @@ static __init void event_trace_self_tests(void)
 
 		pr_info("Testing event system %s: ", system->name);
 
-		/* ftrace_set_clr_event can modify the name passed in. */
-		sysname = kstrdup(system->name, GFP_KERNEL);
-		if (WARN_ON(!sysname)) {
-			pr_warning("Can't allocate memory, giving up!\n");
-			return;
-		}
-		ret = ftrace_set_clr_event(sysname, 1);
-		kfree(sysname);
+		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
 		if (WARN_ON_ONCE(ret)) {
 			pr_warning("error enabling system %s\n",
 				   system->name);
@@ -1321,14 +1304,7 @@ static __init void event_trace_self_tests(void)
 
 		event_test_stuff();
 
-		sysname = kstrdup(system->name, GFP_KERNEL);
-		if (WARN_ON(!sysname)) {
-			pr_warning("Can't allocate memory, giving up!\n");
-			return;
-		}
-		ret = ftrace_set_clr_event(sysname, 0);
-		kfree(sysname);
-
+		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
 		if (WARN_ON_ONCE(ret))
 			pr_warning("error disabling system %s\n",
 				   system->name);
@@ -1341,15 +1317,8 @@ static __init void event_trace_self_tests(void)
 	pr_info("Running tests on all trace events:\n");
 	pr_info("Testing all events: ");
 
-	sysname = kmalloc(4, GFP_KERNEL);
-	if (WARN_ON(!sysname)) {
-		pr_warning("Can't allocate memory, giving up!\n");
-		return;
-	}
-	memcpy(sysname, "*:*", 4);
-	ret = ftrace_set_clr_event(sysname, 1);
+	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
 	if (WARN_ON_ONCE(ret)) {
-		kfree(sysname);
 		pr_warning("error enabling all events\n");
 		return;
 	}
@@ -1357,10 +1326,7 @@ static __init void event_trace_self_tests(void)
 	event_test_stuff();
 
 	/* reset sysname */
-	memcpy(sysname, "*:*", 4);
-	ret = ftrace_set_clr_event(sysname, 0);
-	kfree(sysname);
-
+	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
 	if (WARN_ON_ONCE(ret)) {
 		pr_warning("error disabling all events\n");
 		return;
-- 
cgit v0.10.2


From c142b15dc56ee6d55cb97a062e3c8e9c61e384c0 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 8 May 2009 10:32:05 +0800
Subject: tracing/events: simplify system_enable_read()

A smarter way to figure out the output of an enable file.

[ Impact: clean up ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A0399A5.2080603@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 45f1099..df394bc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -414,10 +414,11 @@ static ssize_t
 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
+	const char set_to_char[4] = { '?', '0', '1', 'X' };
 	const char *system = filp->private_data;
 	struct ftrace_event_call *call;
 	char buf[2];
-	int set = -1;
+	int set = 0;
 	int ret;
 
 	mutex_lock(&event_mutex);
@@ -433,47 +434,18 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		 * or if all events or cleared, or if we have
 		 * a mixture.
 		 */
-		if (call->enabled) {
-			switch (set) {
-			case -1:
-				set = 1;
-				break;
-			case 0:
-				set = 2;
-				break;
-			}
-		} else {
-			switch (set) {
-			case -1:
-				set = 0;
-				break;
-			case 1:
-				set = 2;
-				break;
-			}
-		}
+		set |= (1 << !!call->enabled);
+
 		/*
 		 * If we have a mixture, no need to look further.
 		 */
-		if (set == 2)
+		if (set == 3)
 			break;
 	}
 	mutex_unlock(&event_mutex);
 
+	buf[0] = set_to_char[set];
 	buf[1] = '\n';
-	switch (set) {
-	case 0:
-		buf[0] = '0';
-		break;
-	case 1:
-		buf[0] = '1';
-		break;
-	case 2:
-		buf[0] = 'X';
-		break;
-	default:
-		buf[0] = '?';
-	}
 
 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
 
-- 
cgit v0.10.2


From 42171c17f267d7fdc5167ad7b6b5fb9edfd04186 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 May 2009 14:11:43 +0200
Subject: ALSA: hda - Fix and clean up hippo-compat HP auto-muting

The speaker auto-muting per HP plugging for ALC262 HIPPO and compatible
devices is slightly buggy as the "Master" or "Front" mixer control can
still toggle the speaker output even if the headphone is plugged.

This patch fixes the issue, and clean up the hippo-related codes
together with fixes of some inconsistent mixer names.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b949534..8b242c9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9534,24 +9534,6 @@ static struct snd_kcontrol_new alc262_base_mixer[] = {
 	{ } /* end */
 };
 
-static struct snd_kcontrol_new alc262_hippo1_mixer[] = {
-	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
-	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
-	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
-	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
-	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
-	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
-	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
-	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
-	HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
-	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
-	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
-	/*HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0D, 0x0, HDA_OUTPUT),*/
-	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
-	{ } /* end */
-};
-
 /* update HP, line and mono-out pins according to the master switch */
 static void alc262_hp_master_update(struct hda_codec *codec)
 {
@@ -9772,46 +9754,132 @@ static struct hda_input_mux alc262_hp_rp5700_capture_source = {
 	},
 };
 
-/* bind hp and internal speaker mute (with plug check) */
-static int alc262_sony_master_sw_put(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
+/* bind hp and internal speaker mute (with plug check) as master switch */
+static void alc262_hippo_master_update(struct hda_codec *codec)
 {
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	long *valp = ucontrol->value.integer.value;
-	int change;
+	struct alc_spec *spec = codec->spec;
+	hda_nid_t hp_nid = spec->autocfg.hp_pins[0];
+	hda_nid_t line_nid = spec->autocfg.line_out_pins[0];
+	hda_nid_t speaker_nid = spec->autocfg.speaker_pins[0];
+	unsigned int mute;
 
-	/* change hp mute */
-	change = snd_hda_codec_amp_update(codec, 0x15, 0, HDA_OUTPUT, 0,
-					  HDA_AMP_MUTE,
-					  valp[0] ? 0 : HDA_AMP_MUTE);
-	change |= snd_hda_codec_amp_update(codec, 0x15, 1, HDA_OUTPUT, 0,
-					   HDA_AMP_MUTE,
-					   valp[1] ? 0 : HDA_AMP_MUTE);
-	if (change) {
-		/* change speaker according to HP jack state */
-		struct alc_spec *spec = codec->spec;
-		unsigned int mute;
-		if (spec->jack_present)
-			mute = HDA_AMP_MUTE;
-		else
-			mute = snd_hda_codec_amp_read(codec, 0x15, 0,
-						      HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
+	/* HP */
+	mute = spec->master_sw ? 0 : HDA_AMP_MUTE;
+	snd_hda_codec_amp_stereo(codec, hp_nid, HDA_OUTPUT, 0,
+				 HDA_AMP_MUTE, mute);
+	/* mute internal speaker per jack sense */
+	if (spec->jack_present)
+		mute = HDA_AMP_MUTE;
+	if (line_nid)
+		snd_hda_codec_amp_stereo(codec, line_nid, HDA_OUTPUT, 0,
+					 HDA_AMP_MUTE, mute);
+	if (speaker_nid && speaker_nid != line_nid)
+		snd_hda_codec_amp_stereo(codec, speaker_nid, HDA_OUTPUT, 0,
 					 HDA_AMP_MUTE, mute);
+}
+
+#define alc262_hippo_master_sw_get	alc262_hp_master_sw_get
+
+static int alc262_hippo_master_sw_put(struct snd_kcontrol *kcontrol,
+				      struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct alc_spec *spec = codec->spec;
+	int val = !!*ucontrol->value.integer.value;
+
+	if (val == spec->master_sw)
+		return 0;
+	spec->master_sw = val;
+	alc262_hippo_master_update(codec);
+	return 1;
+}
+
+#define ALC262_HIPPO_MASTER_SWITCH				\
+	{							\
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,		\
+		.name = "Master Playback Switch",		\
+		.info = snd_ctl_boolean_mono_info,		\
+		.get = alc262_hippo_master_sw_get,		\
+		.put = alc262_hippo_master_sw_put,		\
 	}
-	return change;
+
+static struct snd_kcontrol_new alc262_hippo_mixer[] = {
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+	{ } /* end */
+};
+
+static struct snd_kcontrol_new alc262_hippo1_mixer[] = {
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
+	{ } /* end */
+};
+
+/* mute/unmute internal speaker according to the hp jack and mute state */
+static void alc262_hippo_automute(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	hda_nid_t hp_nid = spec->autocfg.hp_pins[0];
+	unsigned int present;
+
+	/* need to execute and sync at first */
+	snd_hda_codec_read(codec, hp_nid, 0, AC_VERB_SET_PIN_SENSE, 0);
+	present = snd_hda_codec_read(codec, hp_nid, 0,
+				     AC_VERB_GET_PIN_SENSE, 0);
+	spec->jack_present = (present & 0x80000000) != 0;
+	alc262_hippo_master_update(codec);
 }
 
+static void alc262_hippo_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+	if ((res >> 26) != ALC880_HP_EVENT)
+		return;
+	alc262_hippo_automute(codec);
+}
+
+static void alc262_hippo_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc262_hippo_automute(codec);
+}
+
+static void alc262_hippo1_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc262_hippo_automute(codec);
+}
+
+
 static struct snd_kcontrol_new alc262_sony_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_hda_mixer_amp_switch_info,
-		.get = snd_hda_mixer_amp_switch_get,
-		.put = alc262_sony_master_sw_put,
-		.private_value = HDA_COMPOSE_AMP_VAL(0x15, 3, 0, HDA_OUTPUT),
-	},
+	ALC262_HIPPO_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_VOLUME("ATAPI Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
@@ -9820,8 +9888,8 @@ static struct snd_kcontrol_new alc262_sony_mixer[] = {
 };
 
 static struct snd_kcontrol_new alc262_benq_t31_mixer[] = {
-	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
 	HDA_CODEC_MUTE("Headphone Playback Switch", 0x15, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
@@ -10076,69 +10144,6 @@ static void alc262_toshiba_s06_init_hook(struct hda_codec *codec)
 	alc262_dmic_automute(codec);
 }
 
-/* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc262_hippo_automute(struct hda_codec *codec)
-{
-	struct alc_spec *spec = codec->spec;
-	unsigned int mute;
-	unsigned int present;
-
-	/* need to execute and sync at first */
-	snd_hda_codec_read(codec, 0x15, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	spec->jack_present = (present & 0x80000000) != 0;
-	if (spec->jack_present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x15, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-}
-
-/* unsolicited event for HP jack sensing */
-static void alc262_hippo_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_hippo_automute(codec);
-}
-
-static void alc262_hippo1_automute(struct hda_codec *codec)
-{
-	unsigned int mute;
-	unsigned int present;
-
-	snd_hda_codec_read(codec, 0x1b, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x1b, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-}
-
-/* unsolicited event for HP jack sensing */
-static void alc262_hippo1_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_hippo1_automute(codec);
-}
-
 /*
  * nec model
  *  0x15 = headphone
@@ -10406,14 +10411,7 @@ static struct snd_kcontrol_new alc262_lenovo_3000_mixer[] = {
 
 static struct snd_kcontrol_new alc262_toshiba_rx1_mixer[] = {
 	HDA_BIND_VOL("Master Playback Volume", &alc262_fujitsu_bind_master_vol),
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_hda_mixer_amp_switch_info,
-		.get = snd_hda_mixer_amp_switch_get,
-		.put = alc262_sony_master_sw_put,
-		.private_value = HDA_COMPOSE_AMP_VAL(0x15, 3, 0, HDA_OUTPUT),
-	},
+	ALC262_HIPPO_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
 	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
@@ -11068,7 +11066,7 @@ static struct alc_config_preset alc262_presets[] = {
 		.input_mux = &alc262_capture_source,
 	},
 	[ALC262_HIPPO] = {
-		.mixers = { alc262_base_mixer },
+		.mixers = { alc262_hippo_mixer },
 		.init_verbs = { alc262_init_verbs, alc262_hippo_unsol_verbs},
 		.num_dacs = ARRAY_SIZE(alc262_dac_nids),
 		.dac_nids = alc262_dac_nids,
@@ -11078,7 +11076,7 @@ static struct alc_config_preset alc262_presets[] = {
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_HIPPO_1] = {
 		.mixers = { alc262_hippo1_mixer },
@@ -11090,8 +11088,8 @@ static struct alc_config_preset alc262_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc262_modes),
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
-		.unsol_event = alc262_hippo1_unsol_event,
-		.init_hook = alc262_hippo1_automute,
+		.unsol_event = alc262_hippo_unsol_event,
+		.init_hook = alc262_hippo1_init_hook,
 	},
 	[ALC262_FUJITSU] = {
 		.mixers = { alc262_fujitsu_mixer },
@@ -11185,7 +11183,7 @@ static struct alc_config_preset alc262_presets[] = {
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_BENQ_T31] = {
 		.mixers = { alc262_benq_t31_mixer },
@@ -11197,7 +11195,7 @@ static struct alc_config_preset alc262_presets[] = {
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_ULTRA] = {
 		.mixers = { alc262_ultra_mixer },
@@ -11262,7 +11260,7 @@ static struct alc_config_preset alc262_presets[] = {
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
 		.unsol_event = alc262_hippo_unsol_event,
-		.init_hook = alc262_hippo_automute,
+		.init_hook = alc262_hippo_init_hook,
 	},
 	[ALC262_TYAN] = {
 		.mixers = { alc262_tyan_mixer },
@@ -11419,6 +11417,17 @@ static struct snd_kcontrol_new alc268_base_mixer[] = {
 	{ }
 };
 
+static struct snd_kcontrol_new alc268_toshiba_mixer[] = {
+	/* output mixer control */
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x2, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x3, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Mic Boost", 0x19, 0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line In Boost", 0x1a, 0, HDA_INPUT),
+	{ }
+};
+
 /* bind Beep switches of both NID 0x0f and 0x10 */
 static struct hda_bind_ctls alc268_bind_beep_sw = {
 	.ops = &snd_hda_bind_sw,
@@ -11442,8 +11451,6 @@ static struct hda_verb alc268_eapd_verbs[] = {
 };
 
 /* Toshiba specific */
-#define alc268_toshiba_automute	alc262_hippo_automute
-
 static struct hda_verb alc268_toshiba_verbs[] = {
 	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
 	{ } /* end */
@@ -11579,13 +11586,8 @@ static struct hda_verb alc268_acer_verbs[] = {
 };
 
 /* unsolicited event for HP jack sensing */
-static void alc268_toshiba_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc268_toshiba_automute(codec);
-}
+#define alc268_toshiba_unsol_event	alc262_hippo_unsol_event
+#define alc268_toshiba_init_hook	alc262_hippo_init_hook
 
 static void alc268_acer_unsol_event(struct hda_codec *codec,
 				       unsigned int res)
@@ -12230,7 +12232,7 @@ static struct alc_config_preset alc268_presets[] = {
 		.input_mux = &alc268_capture_source,
 	},
 	[ALC268_TOSHIBA] = {
-		.mixers = { alc268_base_mixer, alc268_capture_alt_mixer,
+		.mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer,
 			    alc268_beep_mixer },
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_toshiba_verbs },
@@ -12244,7 +12246,7 @@ static struct alc_config_preset alc268_presets[] = {
 		.channel_mode = alc268_modes,
 		.input_mux = &alc268_capture_source,
 		.unsol_event = alc268_toshiba_unsol_event,
-		.init_hook = alc268_toshiba_automute,
+		.init_hook = alc268_toshiba_init_hook,
 	},
 	[ALC268_ACER] = {
 		.mixers = { alc268_acer_mixer, alc268_capture_alt_mixer,
@@ -12327,7 +12329,7 @@ static struct alc_config_preset alc268_presets[] = {
 		.channel_mode = alc268_modes,
 		.input_mux = &alc268_capture_source,
 		.unsol_event = alc268_toshiba_unsol_event,
-		.init_hook = alc268_toshiba_automute
+		.init_hook = alc268_toshiba_init_hook
 	},
 #ifdef CONFIG_SND_DEBUG
 	[ALC268_TEST] = {
@@ -15552,10 +15554,8 @@ static struct snd_kcontrol_new alc662_lenovo_101e_mixer[] = {
 };
 
 static struct snd_kcontrol_new alc662_eeepc_p701_mixer[] = {
-	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
-
-	HDA_CODEC_VOLUME("Line-Out Playback Volume", 0x02, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Line-Out Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x02, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
 
 	HDA_CODEC_VOLUME("e-Mic Boost", 0x18, 0, HDA_INPUT),
 	HDA_CODEC_VOLUME("e-Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
@@ -15568,15 +15568,11 @@ static struct snd_kcontrol_new alc662_eeepc_p701_mixer[] = {
 };
 
 static struct snd_kcontrol_new alc662_eeepc_ep20_mixer[] = {
-	HDA_CODEC_VOLUME("Line-Out Playback Volume", 0x02, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("Line-Out Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+	ALC262_HIPPO_MASTER_SWITCH,
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x02, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("Surround Playback Volume", 0x03, 0x0, HDA_OUTPUT),
-	HDA_BIND_MUTE("Surround Playback Switch", 0x03, 2, HDA_INPUT),
 	HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x04, 1, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x04, 2, 0x0, HDA_OUTPUT),
-	HDA_BIND_MUTE_MONO("Center Playback Switch", 0x04, 1, 2, HDA_INPUT),
-	HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x04, 2, 2, HDA_INPUT),
-	HDA_CODEC_MUTE("Speaker Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("MuteCtrl Playback Switch", 0x0c, 2, HDA_INPUT),
 	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
 	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
@@ -16084,51 +16080,25 @@ static void alc662_eeepc_mic_automute(struct hda_codec *codec)
 static void alc662_eeepc_unsol_event(struct hda_codec *codec,
 				     unsigned int res)
 {
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc262_hippo1_automute( codec );
-
 	if ((res >> 26) == ALC880_MIC_EVENT)
 		alc662_eeepc_mic_automute(codec);
+	else
+		alc262_hippo_unsol_event(codec, res);
 }
 
 static void alc662_eeepc_inithook(struct hda_codec *codec)
 {
-	alc262_hippo1_automute( codec );
+	alc262_hippo1_init_hook(codec);
 	alc662_eeepc_mic_automute(codec);
 }
 
-static void alc662_eeepc_ep20_automute(struct hda_codec *codec)
-{
-	unsigned int mute;
-	unsigned int present;
-
-	snd_hda_codec_read(codec, 0x14, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-					HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x14, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-					HDA_AMP_MUTE, mute);
-	}
-}
-
-/* unsolicited event for HP jack sensing */
-static void alc662_eeepc_ep20_unsol_event(struct hda_codec *codec,
-					  unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc662_eeepc_ep20_automute(codec);
-}
-
 static void alc662_eeepc_ep20_inithook(struct hda_codec *codec)
 {
-	alc662_eeepc_ep20_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc262_hippo_master_update(codec);
 }
 
 static void alc663_m51va_speaker_automute(struct hda_codec *codec)
@@ -16462,35 +16432,9 @@ static void alc663_g50v_inithook(struct hda_codec *codec)
 	alc662_eeepc_mic_automute(codec);
 }
 
-/* bind hp and internal speaker mute (with plug check) */
-static int alc662_ecs_master_sw_put(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	long *valp = ucontrol->value.integer.value;
-	int change;
-
-	change = snd_hda_codec_amp_update(codec, 0x1b, 0, HDA_OUTPUT, 0,
-					  HDA_AMP_MUTE,
-					  valp[0] ? 0 : HDA_AMP_MUTE);
-	change |= snd_hda_codec_amp_update(codec, 0x1b, 1, HDA_OUTPUT, 0,
-					   HDA_AMP_MUTE,
-					   valp[1] ? 0 : HDA_AMP_MUTE);
-	if (change)
-		alc262_hippo1_automute(codec);
-	return change;
-}
-
 static struct snd_kcontrol_new alc662_ecs_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0x02, 0x0, HDA_OUTPUT),
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_hda_mixer_amp_switch_info,
-		.get = snd_hda_mixer_amp_switch_get,
-		.put = alc662_ecs_master_sw_put,
-		.private_value = HDA_COMPOSE_AMP_VAL(0x1b, 3, 0, HDA_OUTPUT),
-	},
+	ALC262_HIPPO_MASTER_SWITCH,
 
 	HDA_CODEC_VOLUME("e-Mic/LineIn Boost", 0x18, 0, HDA_INPUT),
 	HDA_CODEC_VOLUME("e-Mic/LineIn Playback Volume", 0x0b, 0x0, HDA_INPUT),
@@ -16682,7 +16626,7 @@ static struct alc_config_preset alc662_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc662_3ST_6ch_modes),
 		.channel_mode = alc662_3ST_6ch_modes,
 		.input_mux = &alc662_lenovo_101e_capture_source,
-		.unsol_event = alc662_eeepc_ep20_unsol_event,
+		.unsol_event = alc662_eeepc_unsol_event,
 		.init_hook = alc662_eeepc_ep20_inithook,
 	},
 	[ALC662_ECS] = {
-- 
cgit v0.10.2


From c3d480ded1584dc17f6e82f49af4155380a51dda Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 21:57:04 +0900
Subject: sh: TMU platform data for SH7786.

Wires up all 12 TMU channels, with TMU0 and 1 used as clockevent and
clocksource respectively.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 90e8cff..2c464bf 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2009  Renesas Solutions Corp.
  * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ * Paul Mundt <paul.mundt@renesas.com>
  *
  * Based on SH7785 Setup
  *
@@ -19,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
 static struct plat_sci_port sci_platform_data[] = {
@@ -69,6 +71,368 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffda0008,
+		.end	= 0xffda0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 20,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffda0014,
+		.end	= 0xffda001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 21,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffda0020,
+		.end	= 0xffda002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 22,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+static struct sh_timer_config tmu6_platform_data = {
+	.name = "TMU6",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu6_resources[] = {
+	[0] = {
+		.name	= "TMU6",
+		.start	= 0xffdc0008,
+		.end	= 0xffdc0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 45,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu6_device = {
+	.name		= "sh_tmu",
+	.id		= 6,
+	.dev = {
+		.platform_data	= &tmu6_platform_data,
+	},
+	.resource	= tmu6_resources,
+	.num_resources	= ARRAY_SIZE(tmu6_resources),
+};
+
+static struct sh_timer_config tmu7_platform_data = {
+	.name = "TMU7",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu7_resources[] = {
+	[0] = {
+		.name	= "TMU7",
+		.start	= 0xffdc0014,
+		.end	= 0xffdc001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 45,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu7_device = {
+	.name		= "sh_tmu",
+	.id		= 7,
+	.dev = {
+		.platform_data	= &tmu7_platform_data,
+	},
+	.resource	= tmu7_resources,
+	.num_resources	= ARRAY_SIZE(tmu7_resources),
+};
+
+static struct sh_timer_config tmu8_platform_data = {
+	.name = "TMU8",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu8_resources[] = {
+	[0] = {
+		.name	= "TMU8",
+		.start	= 0xffdc0020,
+		.end	= 0xffdc002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 45,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu8_device = {
+	.name		= "sh_tmu",
+	.id		= 8,
+	.dev = {
+		.platform_data	= &tmu8_platform_data,
+	},
+	.resource	= tmu8_resources,
+	.num_resources	= ARRAY_SIZE(tmu8_resources),
+};
+
+static struct sh_timer_config tmu9_platform_data = {
+	.name = "TMU9",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu9_resources[] = {
+	[0] = {
+		.name	= "TMU9",
+		.start	= 0xffde0008,
+		.end	= 0xffde0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 46,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu9_device = {
+	.name		= "sh_tmu",
+	.id		= 9,
+	.dev = {
+		.platform_data	= &tmu9_platform_data,
+	},
+	.resource	= tmu9_resources,
+	.num_resources	= ARRAY_SIZE(tmu9_resources),
+};
+
+static struct sh_timer_config tmu10_platform_data = {
+	.name = "TMU10",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu10_resources[] = {
+	[0] = {
+		.name	= "TMU10",
+		.start	= 0xffde0014,
+		.end	= 0xffde001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 46,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu10_device = {
+	.name		= "sh_tmu",
+	.id		= 10,
+	.dev = {
+		.platform_data	= &tmu10_platform_data,
+	},
+	.resource	= tmu10_resources,
+	.num_resources	= ARRAY_SIZE(tmu10_resources),
+};
+
+static struct sh_timer_config tmu11_platform_data = {
+	.name = "TMU11",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu11_resources[] = {
+	[0] = {
+		.name	= "TMU11",
+		.start	= 0xffde0020,
+		.end	= 0xffde002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 46,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu11_device = {
+	.name		= "sh_tmu",
+	.id		= 11,
+	.dev = {
+		.platform_data	= &tmu11_platform_data,
+	},
+	.resource	= tmu11_resources,
+	.num_resources	= ARRAY_SIZE(tmu11_resources),
+};
+
 static struct resource usb_ohci_resources[] = {
 	[0] = {
 		.start	= 0xffe70400,
@@ -94,6 +458,21 @@ static struct platform_device usb_ohci_device = {
 	.resource	= usb_ohci_resources,
 };
 
+static struct platform_device *sh7786_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&tmu6_device,
+	&tmu7_device,
+	&tmu8_device,
+	&tmu9_device,
+	&tmu10_device,
+	&tmu11_device,
+};
+
 static struct platform_device *sh7786_devices[] __initdata = {
 	&sci_device,
 	&usb_ohci_device,
@@ -156,12 +535,26 @@ static void __init sh7786_usb_setup(void)
 
 static int __init sh7786_devices_setup(void)
 {
+	int ret;
+
 	sh7786_usb_setup();
+
+	ret = platform_add_devices(sh7786_early_devices,
+				   ARRAY_SIZE(sh7786_early_devices));
+	if (unlikely(ret != 0))
+		return ret;
+
 	return platform_add_devices(sh7786_devices,
 				    ARRAY_SIZE(sh7786_devices));
 }
 device_initcall(sh7786_devices_setup);
 
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7786_early_devices,
+				   ARRAY_SIZE(sh7786_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From ccdaeb4c8f982900a2abf722e34b60131394d8eb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 22:09:30 +0900
Subject: sh: TMU platform data for SH-X3 proto CPU.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index bd35f32..9d5185b 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -1,7 +1,7 @@
 /*
- * SH-X3 Setup
+ * SH-X3 Prototype Setup
  *
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2007 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -12,6 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/io.h>
+#include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
 static struct plat_sci_port sci_platform_data[] = {
@@ -48,17 +49,221 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffc10008,
+		.end	= 0xffc10013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffc10014,
+		.end	= 0xffc1001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffc10020,
+		.end	= 0xffc1002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffc20008,
+		.end	= 0xffc20013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 19,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffc20014,
+		.end	= 0xffc2001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 20,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffc20020,
+		.end	= 0xffc2002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 21,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+static struct platform_device *shx3_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
 static struct platform_device *shx3_devices[] __initdata = {
 	&sci_device,
 };
 
 static int __init shx3_devices_setup(void)
 {
+	int ret;
+
+	ret = platform_add_devices(shx3_early_devices,
+				   ARRAY_SIZE(shx3_early_devices));
+	if (unlikely(ret != 0))
+		return ret;
+
 	return platform_add_devices(shx3_devices,
 				    ARRAY_SIZE(shx3_devices));
 }
 __initcall(shx3_devices_setup);
 
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(shx3_early_devices,
+				   ARRAY_SIZE(shx3_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From 7b551f9daa9bd9533ba4ce31622ed4be1dd97d3e Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 22:14:01 +0900
Subject: sh: Kill off the GENERIC_CALIBRATE_DELAY ifndef.

Now that everyone is using the clock framework directly and we
unconditionally provide our own calibrate_delay() function, having it
wrapped in an ifndef is no longer useful. So, kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 38515a7..00086b2 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -156,7 +156,7 @@ static void __init reserve_crashkernel(void)
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size) {
 		if (crash_base <= 0) {
-			vp = alloc_bootmem_nopanic(crash_size); 
+			vp = alloc_bootmem_nopanic(crash_size);
 			if (!vp) {
 				printk(KERN_INFO "crashkernel allocation "
 				       "failed\n");
@@ -185,7 +185,6 @@ static inline void __init reserve_crashkernel(void)
 {}
 #endif
 
-#ifndef CONFIG_GENERIC_CALIBRATE_DELAY
 void __cpuinit calibrate_delay(void)
 {
 	struct clk *clk = clk_get(NULL, "cpu_clk");
@@ -201,7 +200,6 @@ void __cpuinit calibrate_delay(void)
 			 (loops_per_jiffy/(5000/HZ)) % 100,
 			 loops_per_jiffy);
 }
-#endif
 
 void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 						unsigned long end_pfn)
-- 
cgit v0.10.2


From b72519b518211ecb7c4c7b5e660ac6235ca7d1b7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 May 2009 14:31:55 +0200
Subject: ALSA: hda - Clean up for ALC262 HP model auto-mute functions

Just clean up, no functional changes.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8b242c9..7c043b3 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9589,14 +9589,7 @@ static void alc262_hp_wildwest_unsol_event(struct hda_codec *codec,
 	alc262_hp_wildwest_automute(codec);
 }
 
-static int alc262_hp_master_sw_get(struct snd_kcontrol *kcontrol,
-				   struct snd_ctl_elem_value *ucontrol)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	struct alc_spec *spec = codec->spec;
-	*ucontrol->value.integer.value = spec->master_sw;
-	return 0;
-}
+#define alc262_hp_master_sw_get		alc260_hp_master_sw_get
 
 static int alc262_hp_master_sw_put(struct snd_kcontrol *kcontrol,
 				   struct snd_ctl_elem_value *ucontrol)
@@ -9612,14 +9605,17 @@ static int alc262_hp_master_sw_put(struct snd_kcontrol *kcontrol,
 	return 1;
 }
 
+#define ALC262_HP_MASTER_SWITCH					\
+	{							\
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,		\
+		.name = "Master Playback Switch",		\
+		.info = snd_ctl_boolean_mono_info,		\
+		.get = alc262_hp_master_sw_get,			\
+		.put = alc262_hp_master_sw_put,			\
+	}
+
 static struct snd_kcontrol_new alc262_HP_BPC_mixer[] = {
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_ctl_boolean_mono_info,
-		.get = alc262_hp_master_sw_get,
-		.put = alc262_hp_master_sw_put,
-	},
+	ALC262_HP_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Front Playback Switch", 0x15, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
@@ -9643,13 +9639,7 @@ static struct snd_kcontrol_new alc262_HP_BPC_mixer[] = {
 };
 
 static struct snd_kcontrol_new alc262_HP_BPC_WildWest_mixer[] = {
-	{
-		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-		.name = "Master Playback Switch",
-		.info = snd_ctl_boolean_mono_info,
-		.get = alc262_hp_master_sw_get,
-		.put = alc262_hp_master_sw_put,
-	},
+	ALC262_HP_MASTER_SWITCH,
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Front Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
@@ -9676,17 +9666,14 @@ static struct snd_kcontrol_new alc262_HP_BPC_WildWest_option_mixer[] = {
 };
 
 /* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc262_hp_t5735_automute(struct hda_codec *codec, int force)
+static void alc262_hp_t5735_automute(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
+	unsigned int present;
 
-	if (force || !spec->sense_updated) {
-		unsigned int present;
-		present = snd_hda_codec_read(codec, 0x15, 0,
-					     AC_VERB_GET_PIN_SENSE, 0);
-		spec->jack_present = (present & AC_PINSENSE_PRESENCE) != 0;
-		spec->sense_updated = 1;
-	}
+	present = snd_hda_codec_read(codec, 0x15, 0,
+				     AC_VERB_GET_PIN_SENSE, 0);
+	spec->jack_present = (present & AC_PINSENSE_PRESENCE) != 0;
 	snd_hda_codec_amp_stereo(codec, 0x0c, HDA_OUTPUT, 0, HDA_AMP_MUTE,
 				 spec->jack_present ? HDA_AMP_MUTE : 0);
 }
@@ -9696,13 +9683,10 @@ static void alc262_hp_t5735_unsol_event(struct hda_codec *codec,
 {
 	if ((res >> 26) != ALC880_HP_EVENT)
 		return;
-	alc262_hp_t5735_automute(codec, 1);
+	alc262_hp_t5735_automute(codec);
 }
 
-static void alc262_hp_t5735_init_hook(struct hda_codec *codec)
-{
-	alc262_hp_t5735_automute(codec, 1);
-}
+#define alc262_hp_t5735_init_hook	alc262_hp_t5735_automute
 
 static struct snd_kcontrol_new alc262_hp_t5735_mixer[] = {
 	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-- 
cgit v0.10.2


From e552de2413edad1a7b0c7f82a2f2753e4f905d93 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:13:42 +0000
Subject: sh-sci: add platform device private data

This patch adds per-platform private data to the sh-sci driver.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 728d6a0..af5da11 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -47,6 +47,7 @@
 #include <linux/clk.h>
 #include <linux/ctype.h>
 #include <linux/err.h>
+#include <linux/list.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/clock.h>
@@ -78,6 +79,16 @@ struct sci_port {
 	/* Port clock */
 	struct clk		*clk;
 #endif
+	struct list_head	node;
+};
+
+struct sh_sci_priv {
+	spinlock_t lock;
+	struct list_head ports;
+
+#ifdef CONFIG_HAVE_CLK
+	struct notifier_block clk_nb;
+#endif
 };
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
@@ -726,19 +737,22 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 static int sci_notifier(struct notifier_block *self,
 			unsigned long phase, void *p)
 {
-	int i;
+	struct sh_sci_priv *priv = container_of(self,
+						struct sh_sci_priv, clk_nb);
+	struct sci_port *sci_port;
+	unsigned long flags;
 
 	if ((phase == CPUFREQ_POSTCHANGE) ||
-	    (phase == CPUFREQ_RESUMECHANGE))
-		for (i = 0; i < SCI_NPORTS; i++) {
-			struct sci_port *s = &sci_ports[i];
-			s->port.uartclk = clk_get_rate(s->clk);
-		}
+	    (phase == CPUFREQ_RESUMECHANGE)) {
+		spin_lock_irqsave(&priv->lock, flags);
+		list_for_each_entry(sci_port, &priv->ports, node)
+			sci_port->port.uartclk = clk_get_rate(sci_port->clk);
+
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
 
 	return NOTIFY_OK;
 }
-
-static struct notifier_block sci_nb = { &sci_notifier, NULL, 0 };
 #endif
 
 static int sci_request_irq(struct sci_port *port)
@@ -1201,6 +1215,27 @@ static struct uart_driver sci_uart_driver = {
 	.cons		= SCI_CONSOLE,
 };
 
+
+static int __devexit sci_remove(struct platform_device *dev)
+{
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	struct sci_port *p;
+	unsigned long flags;
+
+#ifdef CONFIG_HAVE_CLK
+	cpufreq_unregister_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
+#endif
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(p, &priv->ports, node)
+		uart_remove_one_port(&sci_uart_driver, &p->port);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	kfree(priv);
+	return 0;
+}
+
 /*
  * Register a set of serial devices attached to a platform device.  The
  * list is terminated with a zero flags entry, which means we expect
@@ -1210,7 +1245,22 @@ static struct uart_driver sci_uart_driver = {
 static int __devinit sci_probe(struct platform_device *dev)
 {
 	struct plat_sci_port *p = dev->dev.platform_data;
+	struct sh_sci_priv *priv;
 	int i, ret = -EINVAL;
+	unsigned long flags;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&priv->ports);
+	spin_lock_init(&priv->lock);
+	platform_set_drvdata(dev, priv);
+
+#ifdef CONFIG_HAVE_CLK
+	priv->clk_nb.notifier_call = sci_notifier;
+	cpufreq_register_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
+#endif
 
 	for (i = 0; p && p->flags != 0; p++, i++) {
 		struct sci_port *sciport = &sci_ports[i];
@@ -1254,12 +1304,19 @@ static int __devinit sci_probe(struct platform_device *dev)
 
 		memcpy(&sciport->irqs, &p->irqs, sizeof(p->irqs));
 
-		uart_add_one_port(&sci_uart_driver, &sciport->port);
-	}
+		ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
 
-#ifdef CONFIG_HAVE_CLK
-	cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
-#endif
+		if (ret && (p->flags & UPF_IOREMAP)) {
+			iounmap(p->membase);
+			goto err_unreg;
+		}
+
+		INIT_LIST_HEAD(&sciport->node);
+
+		spin_lock_irqsave(&priv->lock, flags);
+		list_add(&sciport->node, &priv->ports);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
 
 #ifdef CONFIG_SH_STANDARD_BIOS
 	sh_bios_gdb_detach();
@@ -1268,50 +1325,36 @@ static int __devinit sci_probe(struct platform_device *dev)
 	return 0;
 
 err_unreg:
-	for (i = i - 1; i >= 0; i--)
-		uart_remove_one_port(&sci_uart_driver, &sci_ports[i].port);
-
+	sci_remove(dev);
 	return ret;
 }
 
-static int __devexit sci_remove(struct platform_device *dev)
-{
-	int i;
-
-#ifdef CONFIG_HAVE_CLK
-	cpufreq_unregister_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
-#endif
-
-	for (i = 0; i < SCI_NPORTS; i++)
-		uart_remove_one_port(&sci_uart_driver, &sci_ports[i].port);
-
-	return 0;
-}
-
 static int sci_suspend(struct platform_device *dev, pm_message_t state)
 {
-	int i;
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	struct sci_port *p;
+	unsigned long flags;
 
-	for (i = 0; i < SCI_NPORTS; i++) {
-		struct sci_port *p = &sci_ports[i];
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(p, &priv->ports, node)
+		uart_suspend_port(&sci_uart_driver, &p->port);
 
-		if (p->type != PORT_UNKNOWN && p->port.dev == &dev->dev)
-			uart_suspend_port(&sci_uart_driver, &p->port);
-	}
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
 }
 
 static int sci_resume(struct platform_device *dev)
 {
-	int i;
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	struct sci_port *p;
+	unsigned long flags;
 
-	for (i = 0; i < SCI_NPORTS; i++) {
-		struct sci_port *p = &sci_ports[i];
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(p, &priv->ports, node)
+		uart_resume_port(&sci_uart_driver, &p->port);
 
-		if (p->type != PORT_UNKNOWN && p->port.dev == &dev->dev)
-			uart_resume_port(&sci_uart_driver, &p->port);
-	}
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 9080b72819650c3a757d173a19bc930d603b79d6 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:13:58 +0000
Subject: sh-sci: remove early_sci_setup()

Remove unused early_sci_setup() function from sh-sci.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index af5da11..5f37f7d 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1082,20 +1082,6 @@ static void __init sci_init_ports(void)
 	}
 }
 
-int __init early_sci_setup(struct uart_port *port)
-{
-	if (unlikely(port->line > SCI_NPORTS))
-		return -ENODEV;
-
-	sci_init_ports();
-
-	sci_ports[port->line].port.membase	= port->membase;
-	sci_ports[port->line].port.mapbase	= port->mapbase;
-	sci_ports[port->line].port.type		= port->type;
-
-	return 0;
-}
-
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
 /*
  *	Print a string to the serial port trying not to disturb
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 893cc53..717059d 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -27,6 +27,4 @@ struct plat_sci_port {
 	upf_t		flags;			/* UPF_* flags */
 };
 
-int early_sci_setup(struct uart_port *port);
-
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v0.10.2


From a9fd4f3fcdc7e5757424f7e5888f660cf34bb1a9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 May 2009 15:57:59 +0200
Subject: ALSA: hda - Clean up Realtek auto-mute unsol routines

Most of unsol handlers defined in patch_realtek.c can be classified to
two types, mute via amp of pins and mute via ctl bits of pins.
Thus there are a big room to generalize each implementation.

This patch creates two generic functions, alc_automute_amp() and
alc_automute_pin().  The latter is actually changed from the previous
alc_sku_automute().  Each caller needs to initialize hp_pins and
speaker_pins properly at own init_hook.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7c043b3..34f6fb7 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -926,20 +926,26 @@ static void alc_fix_pll_init(struct hda_codec *codec, hda_nid_t nid,
 	alc_fix_pll(codec);
 }
 
-static void alc_sku_automute(struct hda_codec *codec)
+static void alc_automute_pin(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
 	unsigned int present;
-	unsigned int hp_nid = spec->autocfg.hp_pins[0];
-	unsigned int sp_nid = spec->autocfg.speaker_pins[0];
+	unsigned int nid = spec->autocfg.hp_pins[0];
+	int i;
 
 	/* need to execute and sync at first */
-	snd_hda_codec_read(codec, hp_nid, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, hp_nid, 0,
+	snd_hda_codec_read(codec, nid, 0, AC_VERB_SET_PIN_SENSE, 0);
+	present = snd_hda_codec_read(codec, nid, 0,
 				     AC_VERB_GET_PIN_SENSE, 0);
-	spec->jack_present = (present & 0x80000000) != 0;
-	snd_hda_codec_write(codec, sp_nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    spec->jack_present ? 0 : PIN_OUT);
+	spec->jack_present = (present & AC_PINSENSE_PRESENCE) != 0;
+	for (i = 0; i < ARRAY_SIZE(spec->autocfg.speaker_pins); i++) {
+		nid = spec->autocfg.speaker_pins[i];
+		if (!nid)
+			break;
+		snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_PIN_WIDGET_CONTROL,
+				    spec->jack_present ? 0 : PIN_OUT);
+	}
 }
 
 #if 0 /* it's broken in some acses -- temporarily disabled */
@@ -974,16 +980,19 @@ static void alc_sku_unsol_event(struct hda_codec *codec, unsigned int res)
 		res >>= 28;
 	else
 		res >>= 26;
-	if (res == ALC880_HP_EVENT)
-		alc_sku_automute(codec);
-
-	if (res == ALC880_MIC_EVENT)
+	switch (res) {
+	case ALC880_HP_EVENT:
+		alc_automute_pin(codec);
+		break;
+	case ALC880_MIC_EVENT:
 		alc_mic_automute(codec);
+		break;
+	}
 }
 
 static void alc_inithook(struct hda_codec *codec)
 {
-	alc_sku_automute(codec);
+	alc_automute_pin(codec);
 	alc_mic_automute(codec);
 }
 
@@ -1364,32 +1373,58 @@ static struct hda_verb alc888_fujitsu_xa3530_verbs[] = {
 	{}
 };
 
-static void alc888_fujitsu_xa3530_automute(struct hda_codec *codec)
+static void alc_automute_amp(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned int bits;
-	/* Line out presence */
-	present = snd_hda_codec_read(codec, 0x17, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	/* HP out presence */
-	present = present || snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
+	struct alc_spec *spec = codec->spec;
+	unsigned int val, mute;
+	hda_nid_t nid;
+	int i;
+
+	spec->jack_present = 0;
+	for (i = 0; i < ARRAY_SIZE(spec->autocfg.hp_pins); i++) {
+		nid = spec->autocfg.hp_pins[i];
+		if (!nid)
+			break;
+		val = snd_hda_codec_read(codec, nid, 0,
+					 AC_VERB_GET_PIN_SENSE, 0);
+		if (val & AC_PINSENSE_PRESENCE) {
+			spec->jack_present = 1;
+			break;
+		}
+	}
+
+	mute = spec->jack_present ? HDA_AMP_MUTE : 0;
 	/* Toggle internal speakers muting */
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	/* Toggle internal bass muting */
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
+	for (i = 0; i < ARRAY_SIZE(spec->autocfg.speaker_pins); i++) {
+		nid = spec->autocfg.speaker_pins[i];
+		if (!nid)
+			break;
+		snd_hda_codec_amp_stereo(codec, nid, HDA_OUTPUT, 0,
+					 HDA_AMP_MUTE, mute);
+	}
 }
 
-static void alc888_fujitsu_xa3530_unsol_event(struct hda_codec *codec,
-		unsigned int res)
+static void alc_automute_amp_unsol_event(struct hda_codec *codec,
+					 unsigned int res)
 {
-	if (res >> 26 == ALC880_HP_EVENT)
-		alc888_fujitsu_xa3530_automute(codec);
+	if (codec->vendor_id == 0x10ec0880)
+		res >>= 28;
+	else
+		res >>= 26;
+	if (res == ALC880_HP_EVENT)
+		alc_automute_amp(codec);
 }
 
+static void alc888_fujitsu_xa3530_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x17; /* line-out */
+	spec->autocfg.hp_pins[1] = 0x1b; /* hp */
+	spec->autocfg.speaker_pins[0] = 0x14; /* speaker */
+	spec->autocfg.speaker_pins[1] = 0x15; /* bass */
+	alc_automute_amp(codec);
+}
 
 /*
  * ALC888 Acer Aspire 4930G model
@@ -1456,22 +1491,13 @@ static struct snd_kcontrol_new alc888_base_mixer[] = {
 	{ } /* end */
 };
 
-static void alc888_acer_aspire_4930g_automute(struct hda_codec *codec)
+static void alc888_acer_aspire_4930g_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned int bits;
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc888_acer_aspire_4930g_unsol_event(struct hda_codec *codec,
-		unsigned int res)
-{
-	if (res >> 26 == ALC880_HP_EVENT)
-		alc888_acer_aspire_4930g_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 /*
@@ -2439,21 +2465,6 @@ static struct hda_verb alc880_beep_init_verbs[] = {
 	{ }
 };
 
-/* toggle speaker-output according to the hp-jack state */
-static void alc880_uniwill_hp_automute(struct hda_codec *codec)
-{
- 	unsigned int present;
-	unsigned char bits;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
-
 /* auto-toggle front mic */
 static void alc880_uniwill_mic_automute(struct hda_codec *codec)
 {
@@ -2466,9 +2477,14 @@ static void alc880_uniwill_mic_automute(struct hda_codec *codec)
 	snd_hda_codec_amp_stereo(codec, 0x0b, HDA_INPUT, 1, HDA_AMP_MUTE, bits);
 }
 
-static void alc880_uniwill_automute(struct hda_codec *codec)
+static void alc880_uniwill_init_hook(struct hda_codec *codec)
 {
-	alc880_uniwill_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x16;
+	alc_automute_amp(codec);
 	alc880_uniwill_mic_automute(codec);
 }
 
@@ -2479,24 +2495,22 @@ static void alc880_uniwill_unsol_event(struct hda_codec *codec,
 	 * definition.  4bit tag is placed at 28 bit!
 	 */
 	switch (res >> 28) {
-	case ALC880_HP_EVENT:
-		alc880_uniwill_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc880_uniwill_mic_automute(codec);
 		break;
+	default:
+		alc_automute_amp_unsol_event(codec, res);
+		break;
 	}
 }
 
-static void alc880_uniwill_p53_hp_automute(struct hda_codec *codec)
+static void alc880_uniwill_p53_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0, HDA_AMP_MUTE, bits);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
 static void alc880_uniwill_p53_dcvol_automute(struct hda_codec *codec)
@@ -2518,10 +2532,10 @@ static void alc880_uniwill_p53_unsol_event(struct hda_codec *codec,
 	/* Looks like the unsol event is incompatible with the standard
 	 * definition.  4bit tag is placed at 28 bit!
 	 */
-	if ((res >> 28) == ALC880_HP_EVENT)
-		alc880_uniwill_p53_hp_automute(codec);
 	if ((res >> 28) == ALC880_DCVOL_EVENT)
 		alc880_uniwill_p53_dcvol_automute(codec);
+	else
+		alc_automute_amp_unsol_event(codec, res);
 }
 
 /*
@@ -2753,30 +2767,18 @@ static struct hda_verb alc880_lg_init_verbs[] = {
 	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	/* jack sense */
-	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | 0x1},
+	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT},
 	{ }
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc880_lg_automute(struct hda_codec *codec)
+static void alc880_lg_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc880_lg_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	/* Looks like the unsol event is incompatible with the standard
-	 * definition.  4bit tag is placed at 28 bit!
-	 */
-	if ((res >> 28) == 0x01)
-		alc880_lg_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x17;
+	alc_automute_amp(codec);
 }
 
 /*
@@ -2850,30 +2852,18 @@ static struct hda_verb alc880_lg_lw_init_verbs[] = {
 	{0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
 	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	/* jack sense */
-	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | 0x1},
+	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT},
 	{ }
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc880_lg_lw_automute(struct hda_codec *codec)
+static void alc880_lg_lw_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc880_lg_lw_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	/* Looks like the unsol event is incompatible with the standard
-	 * definition.  4bit tag is placed at 28 bit!
-	 */
-	if ((res >> 28) == 0x01)
-		alc880_lg_lw_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 static struct snd_kcontrol_new alc880_medion_rim_mixer[] = {
@@ -2920,16 +2910,10 @@ static struct hda_verb alc880_medion_rim_init_verbs[] = {
 /* toggle speaker-output according to the hp-jack state */
 static void alc880_medion_rim_automute(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	if (present)
+	struct alc_spec *spec = codec->spec;
+	alc_automute_amp(codec);
+	/* toggle EAPD */
+	if (spec->jack_present)
 		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 0);
 	else
 		snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 2);
@@ -2945,6 +2929,15 @@ static void alc880_medion_rim_unsol_event(struct hda_codec *codec,
 		alc880_medion_rim_automute(codec);
 }
 
+static void alc880_medion_rim_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc880_medion_rim_automute(codec);
+}
+
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 static struct hda_amp_list alc880_loopbacks[] = {
 	{ 0x0b, HDA_INPUT, 0 },
@@ -3803,7 +3796,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.channel_mode = alc880_2_jack_modes,
 		.input_mux = &alc880_f1734_capture_source,
 		.unsol_event = alc880_uniwill_p53_unsol_event,
-		.init_hook = alc880_uniwill_p53_hp_automute,
+		.init_hook = alc880_uniwill_p53_init_hook,
 	},
 	[ALC880_ASUS] = {
 		.mixers = { alc880_asus_mixer },
@@ -3880,7 +3873,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 		.unsol_event = alc880_uniwill_unsol_event,
-		.init_hook = alc880_uniwill_automute,
+		.init_hook = alc880_uniwill_init_hook,
 	},
 	[ALC880_UNIWILL_P53] = {
 		.mixers = { alc880_uniwill_p53_mixer },
@@ -3892,7 +3885,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.channel_mode = alc880_threestack_modes,
 		.input_mux = &alc880_capture_source,
 		.unsol_event = alc880_uniwill_p53_unsol_event,
-		.init_hook = alc880_uniwill_p53_hp_automute,
+		.init_hook = alc880_uniwill_p53_init_hook,
 	},
 	[ALC880_FUJITSU] = {
 		.mixers = { alc880_fujitsu_mixer },
@@ -3906,7 +3899,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.channel_mode = alc880_2_jack_modes,
 		.input_mux = &alc880_capture_source,
 		.unsol_event = alc880_uniwill_p53_unsol_event,
-		.init_hook = alc880_uniwill_p53_hp_automute,
+		.init_hook = alc880_uniwill_p53_init_hook,
 	},
 	[ALC880_CLEVO] = {
 		.mixers = { alc880_three_stack_mixer },
@@ -3931,8 +3924,8 @@ static struct alc_config_preset alc880_presets[] = {
 		.channel_mode = alc880_lg_ch_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc880_lg_capture_source,
-		.unsol_event = alc880_lg_unsol_event,
-		.init_hook = alc880_lg_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc880_lg_init_hook,
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 		.loopbacks = alc880_lg_loopbacks,
 #endif
@@ -3947,8 +3940,8 @@ static struct alc_config_preset alc880_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc880_lg_lw_modes),
 		.channel_mode = alc880_lg_lw_modes,
 		.input_mux = &alc880_lg_lw_capture_source,
-		.unsol_event = alc880_lg_lw_unsol_event,
-		.init_hook = alc880_lg_lw_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc880_lg_lw_init_hook,
 	},
 	[ALC880_MEDION_RIM] = {
 		.mixers = { alc880_medion_rim_mixer },
@@ -3962,7 +3955,7 @@ static struct alc_config_preset alc880_presets[] = {
 		.channel_mode = alc880_2_jack_modes,
 		.input_mux = &alc880_medion_rim_capture_source,
 		.unsol_event = alc880_medion_rim_unsol_event,
-		.init_hook = alc880_medion_rim_automute,
+		.init_hook = alc880_medion_rim_init_hook,
 	},
 #ifdef CONFIG_SND_DEBUG
 	[ALC880_TEST] = {
@@ -6666,45 +6659,23 @@ static struct hda_verb alc885_imac24_init_verbs[] = {
 };
 
 /* Toggle speaker-output according to the hp-jack state */
-static void alc885_imac24_automute(struct hda_codec *codec)
+static void alc885_imac24_automute_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x18, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
+	struct alc_spec *spec = codec->spec;
 
-/* Processes unsolicited events. */
-static void alc885_imac24_unsol_event(struct hda_codec *codec,
-				      unsigned int res)
-{
-	/* Headphone insertion or removal. */
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc885_imac24_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x18;
+	spec->autocfg.speaker_pins[1] = 0x1a;
+	alc_automute_amp(codec);
 }
 
-static void alc885_mbp3_automute(struct hda_codec *codec)
+static void alc885_mbp3_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14,  HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? 0 : HDA_AMP_MUTE);
+	struct alc_spec *spec = codec->spec;
 
-}
-static void alc885_mbp3_unsol_event(struct hda_codec *codec,
-				    unsigned int res)
-{
-	/* Headphone insertion or removal. */
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc885_mbp3_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 
@@ -6729,24 +6700,25 @@ static struct hda_verb alc882_targa_verbs[] = {
 /* toggle speaker-output according to the hp-jack state */
 static void alc882_targa_automute(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	struct alc_spec *spec = codec->spec;
+	alc_automute_amp(codec);
 	snd_hda_codec_write_cache(codec, 1, 0, AC_VERB_SET_GPIO_DATA,
-				  present ? 1 : 3);
+				  spec->jack_present ? 1 : 3);
+}
+
+static void alc882_targa_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc882_targa_automute(codec);
 }
 
 static void alc882_targa_unsol_event(struct hda_codec *codec, unsigned int res)
 {
-	/* Looks like the unsol event is incompatible with the standard
-	 * definition.  4bit tag is placed at 26 bit!
-	 */
-	if (((res >> 26) == ALC880_HP_EVENT)) {
+	if ((res >> 26) == ALC880_HP_EVENT)
 		alc882_targa_automute(codec);
-	}
 }
 
 static struct hda_verb alc882_asus_a7j_verbs[] = {
@@ -6828,7 +6800,7 @@ static void alc885_macpro_init_hook(struct hda_codec *codec)
 static void alc885_imac24_init_hook(struct hda_codec *codec)
 {
 	alc885_macpro_init_hook(codec);
-	alc885_imac24_automute(codec);
+	alc885_imac24_automute_init_hook(codec);
 }
 
 /*
@@ -6999,8 +6971,8 @@ static struct alc_config_preset alc882_presets[] = {
 		.input_mux = &alc882_capture_source,
 		.dig_out_nid = ALC882_DIGOUT_NID,
 		.dig_in_nid = ALC882_DIGIN_NID,
-		.unsol_event = alc885_mbp3_unsol_event,
-		.init_hook = alc885_mbp3_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc885_mbp3_init_hook,
 	},
 	[ALC885_MB5] = {
 		.mixers = { alc885_mb5_mixer },
@@ -7036,7 +7008,7 @@ static struct alc_config_preset alc882_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc882_ch_modes),
 		.channel_mode = alc882_ch_modes,
 		.input_mux = &alc882_capture_source,
-		.unsol_event = alc885_imac24_unsol_event,
+		.unsol_event = alc_automute_amp_unsol_event,
 		.init_hook = alc885_imac24_init_hook,
 	},
 	[ALC882_TARGA] = {
@@ -7053,7 +7025,7 @@ static struct alc_config_preset alc882_presets[] = {
 		.need_dac_fix = 1,
 		.input_mux = &alc882_capture_source,
 		.unsol_event = alc882_targa_unsol_event,
-		.init_hook = alc882_targa_automute,
+		.init_hook = alc882_targa_init_hook,
 	},
 	[ALC882_ASUS_A7J] = {
 		.mixers = { alc882_asus_a7j_mixer, alc882_chmode_mixer },
@@ -7903,8 +7875,6 @@ static struct snd_kcontrol_new alc888_lenovo_sky_mixer[] = {
 	HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0d, 2, 2, HDA_INPUT),
 	HDA_CODEC_VOLUME("Side Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Side Playback Switch", 0x0f, 2, HDA_INPUT),
-	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
-	HDA_CODEC_MUTE("iSpeaker Playback Switch", 0x1a, 0x0, HDA_OUTPUT),
 	HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
 	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
 	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
@@ -8053,16 +8023,14 @@ static struct hda_verb alc883_init_verbs[] = {
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_mitac_hp_automute(struct hda_codec *codec)
+static void alc883_mitac_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x17;
+	alc_automute_amp(codec);
 }
 
 /* auto-toggle front mic */
@@ -8079,25 +8047,6 @@ static void alc883_mitac_mic_automute(struct hda_codec *codec)
 }
 */
 
-static void alc883_mitac_automute(struct hda_codec *codec)
-{
-	alc883_mitac_hp_automute(codec);
-	/* alc883_mitac_mic_automute(codec); */
-}
-
-static void alc883_mitac_unsol_event(struct hda_codec *codec,
-					   unsigned int res)
-{
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_mitac_hp_automute(codec);
-		break;
-	case ALC880_MIC_EVENT:
-		/* alc883_mitac_mic_automute(codec); */
-		break;
-	}
-}
-
 static struct hda_verb alc883_mitac_verbs[] = {
 	/* HP */
 	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
@@ -8215,29 +8164,15 @@ static struct hda_verb alc888_6st_dell_verbs[] = {
 	{ }
 };
 
-static void alc888_3st_hp_front_automute(struct hda_codec *codec)
+static void alc888_3st_hp_init_hook(struct hda_codec *codec)
 {
-	unsigned int present, bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-			AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_amp_stereo(codec, 0x18, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc888_3st_hp_unsol_event(struct hda_codec *codec,
-				      unsigned int res)
-{
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc888_3st_hp_front_automute(codec);
-		break;
-	}
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	spec->autocfg.speaker_pins[2] = 0x18;
+	alc_automute_amp(codec);
 }
 
 static struct hda_verb alc888_3st_hp_verbs[] = {
@@ -8334,56 +8269,18 @@ static struct hda_verb alc883_medion_md2_verbs[] = {
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_medion_md2_automute(struct hda_codec *codec)
+static void alc883_medion_md2_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
-
-static void alc883_medion_md2_unsol_event(struct hda_codec *codec,
-					  unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_medion_md2_automute(codec);
-}
-
-/* toggle speaker-output according to the hp-jack state */
-static void alc883_tagra_automute(struct hda_codec *codec)
-{
- 	unsigned int present;
-	unsigned char bits;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x1b, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-	snd_hda_codec_write_cache(codec, 1, 0, AC_VERB_SET_GPIO_DATA,
-				  present ? 1 : 3);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc883_tagra_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_tagra_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_clevo_m720_hp_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+#define alc883_tagra_init_hook		alc882_targa_init_hook
+#define alc883_tagra_unsol_event	alc882_targa_unsol_event
 
 static void alc883_clevo_m720_mic_automute(struct hda_codec *codec)
 {
@@ -8395,9 +8292,13 @@ static void alc883_clevo_m720_mic_automute(struct hda_codec *codec)
 				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
 }
 
-static void alc883_clevo_m720_automute(struct hda_codec *codec)
+static void alc883_clevo_m720_init_hook(struct hda_codec *codec)
 {
-	alc883_clevo_m720_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 	alc883_clevo_m720_mic_automute(codec);
 }
 
@@ -8405,52 +8306,32 @@ static void alc883_clevo_m720_unsol_event(struct hda_codec *codec,
 					   unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_clevo_m720_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc883_clevo_m720_mic_automute(codec);
 		break;
+	default:
+		alc_automute_amp_unsol_event(codec, res);
+		break;
 	}
 }
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_2ch_fujitsu_pi2515_automute(struct hda_codec *codec)
+static void alc883_2ch_fujitsu_pi2515_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-	unsigned char bits;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0, AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc883_2ch_fujitsu_pi2515_unsol_event(struct hda_codec *codec,
-						  unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_2ch_fujitsu_pi2515_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
-static void alc883_haier_w66_automute(struct hda_codec *codec)
+static void alc883_haier_w66_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? 0x80 : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 0x80, bits);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc883_haier_w66_unsol_event(struct hda_codec *codec,
-					 unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_haier_w66_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 static void alc883_lenovo_101e_ispeaker_automute(struct hda_codec *codec)
@@ -8489,23 +8370,14 @@ static void alc883_lenovo_101e_unsol_event(struct hda_codec *codec,
 }
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc883_acer_aspire_automute(struct hda_codec *codec)
+static void alc883_acer_aspire_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc883_acer_aspire_unsol_event(struct hda_codec *codec,
-					   unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc883_acer_aspire_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	alc_automute_amp(codec);
 }
 
 static struct hda_verb alc883_acer_eapd_verbs[] = {
@@ -8526,75 +8398,29 @@ static struct hda_verb alc883_acer_eapd_verbs[] = {
 	{ }
 };
 
-static void alc888_6st_dell_front_automute(struct hda_codec *codec)
+static void alc888_6st_dell_init_hook(struct hda_codec *codec)
 {
- 	unsigned int present;
-
- 	present = snd_hda_codec_read(codec, 0x1b, 0,
-				AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-	snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-				HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc888_6st_dell_unsol_event(struct hda_codec *codec,
-					     unsigned int res)
-{
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		/* printk(KERN_DEBUG "hp_event\n"); */
-		alc888_6st_dell_front_automute(codec);
-		break;
-	}
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x15;
+	spec->autocfg.speaker_pins[2] = 0x16;
+	spec->autocfg.speaker_pins[3] = 0x17;
+	alc_automute_amp(codec);
 }
 
-static void alc888_lenovo_sky_front_automute(struct hda_codec *codec)
+static void alc888_lenovo_sky_init_hook(struct hda_codec *codec)
 {
-	unsigned int mute;
-	unsigned int present;
-
-	snd_hda_codec_read(codec, 0x1b, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute internal speaker */
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-		snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute internal speaker if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x1b, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x17, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-		snd_hda_codec_amp_stereo(codec, 0x1a, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc883_lenovo_sky_unsol_event(struct hda_codec *codec,
-					     unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc888_lenovo_sky_front_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x15;
+	spec->autocfg.speaker_pins[2] = 0x16;
+	spec->autocfg.speaker_pins[3] = 0x17;
+	spec->autocfg.speaker_pins[4] = 0x1a;
+	alc_automute_amp(codec);
 }
 
 /*
@@ -8682,39 +8508,33 @@ static void alc883_nb_mic_automute(struct hda_codec *codec)
 			    0x7000 | (0x01 << 8) | (present ? 0x80 : 0));
 }
 
-static void alc883_M90V_speaker_automute(struct hda_codec *codec)
+static void alc883_M90V_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned char bits;
+	struct alc_spec *spec = codec->spec;
 
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? 0 : PIN_OUT;
-	snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
-	snd_hda_codec_write(codec, 0x15, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
-	snd_hda_codec_write(codec, 0x16, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x15;
+	spec->autocfg.speaker_pins[2] = 0x16;
+	alc_automute_pin(codec);
 }
 
 static void alc883_mode2_unsol_event(struct hda_codec *codec,
 					   unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_M90V_speaker_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc883_nb_mic_automute(codec);
 		break;
+	default:
+		alc_sku_unsol_event(codec, res);
+		break;
 	}
 }
 
 static void alc883_mode2_inithook(struct hda_codec *codec)
 {
-	alc883_M90V_speaker_automute(codec);
+	alc883_M90V_init_hook(codec);
 	alc883_nb_mic_automute(codec);
 }
 
@@ -8731,32 +8551,13 @@ static struct hda_verb alc888_asus_eee1601_verbs[] = {
 	{ } /* end */
 };
 
-static void alc883_eee1601_speaker_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	bits = present ? 0 : PIN_OUT;
-	snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    bits);
-}
-
-static void alc883_eee1601_unsol_event(struct hda_codec *codec,
-					   unsigned int res)
-{
-	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc883_eee1601_speaker_automute(codec);
-		break;
-	}
-}
-
 static void alc883_eee1601_inithook(struct hda_codec *codec)
 {
-	alc883_eee1601_speaker_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[0] = 0x1b;
+	alc_automute_pin(codec);
 }
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -8969,7 +8770,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_tagra_unsol_event,
-		.init_hook = alc883_tagra_automute,
+		.init_hook = alc883_tagra_init_hook,
 	},
 	[ALC883_TARGA_2ch_DIG] = {
 		.mixers = { alc883_tagra_2ch_mixer},
@@ -8983,7 +8784,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_tagra_unsol_event,
-		.init_hook = alc883_tagra_automute,
+		.init_hook = alc883_tagra_init_hook,
 	},
 	[ALC883_ACER] = {
 		.mixers = { alc883_base_mixer },
@@ -9008,8 +8809,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_acer_aspire_unsol_event,
-		.init_hook = alc883_acer_aspire_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_acer_aspire_init_hook,
 	},
 	[ALC888_ACER_ASPIRE_4930G] = {
 		.mixers = { alc888_base_mixer,
@@ -9028,8 +8829,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_mux_defs =
 			ARRAY_SIZE(alc888_2_capture_sources),
 		.input_mux = alc888_2_capture_sources,
-		.unsol_event = alc888_acer_aspire_4930g_unsol_event,
-		.init_hook = alc888_acer_aspire_4930g_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_acer_aspire_4930g_init_hook,
 	},
 	[ALC883_MEDION] = {
 		.mixers = { alc883_fivestack_mixer,
@@ -9053,8 +8854,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_medion_md2_unsol_event,
-		.init_hook = alc883_medion_md2_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_medion_md2_init_hook,
 	},
 	[ALC883_LAPTOP_EAPD] = {
 		.mixers = { alc883_base_mixer },
@@ -9075,7 +8876,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_clevo_m720_unsol_event,
-		.init_hook = alc883_clevo_m720_automute,
+		.init_hook = alc883_clevo_m720_init_hook,
 	},
 	[ALC883_LENOVO_101E_2ch] = {
 		.mixers = { alc883_lenovo_101e_2ch_mixer},
@@ -9099,8 +8900,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_3ST_2ch_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_lenovo_nb0763_capture_source,
-		.unsol_event = alc883_medion_md2_unsol_event,
-		.init_hook = alc883_medion_md2_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_medion_md2_init_hook,
 	},
 	[ALC888_LENOVO_MS7195_DIG] = {
 		.mixers = { alc883_3ST_6ch_mixer, alc883_chmode_mixer },
@@ -9124,8 +8925,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_haier_w66_unsol_event,
-		.init_hook = alc883_haier_w66_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_haier_w66_init_hook,
 	},
 	[ALC888_3ST_HP] = {
 		.mixers = { alc883_3ST_6ch_mixer, alc883_chmode_mixer },
@@ -9136,8 +8937,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc888_3st_hp_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc888_3st_hp_unsol_event,
-		.init_hook = alc888_3st_hp_front_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_3st_hp_init_hook,
 	},
 	[ALC888_6ST_DELL] = {
 		.mixers = { alc883_base_mixer, alc883_chmode_mixer },
@@ -9149,8 +8950,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc883_sixstack_modes),
 		.channel_mode = alc883_sixstack_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc888_6st_dell_unsol_event,
-		.init_hook = alc888_6st_dell_front_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_6st_dell_init_hook,
 	},
 	[ALC883_MITAC] = {
 		.mixers = { alc883_mitac_mixer },
@@ -9160,8 +8961,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_capture_source,
-		.unsol_event = alc883_mitac_unsol_event,
-		.init_hook = alc883_mitac_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_mitac_init_hook,
 	},
 	[ALC883_FUJITSU_PI2515] = {
 		.mixers = { alc883_2ch_fujitsu_pi2515_mixer },
@@ -9173,8 +8974,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
 		.channel_mode = alc883_3ST_2ch_modes,
 		.input_mux = &alc883_fujitsu_pi2515_capture_source,
-		.unsol_event = alc883_2ch_fujitsu_pi2515_unsol_event,
-		.init_hook = alc883_2ch_fujitsu_pi2515_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_2ch_fujitsu_pi2515_init_hook,
 	},
 	[ALC888_FUJITSU_XA3530] = {
 		.mixers = { alc888_base_mixer, alc883_chmode_mixer },
@@ -9191,8 +8992,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.num_mux_defs =
 			ARRAY_SIZE(alc888_2_capture_sources),
 		.input_mux = alc888_2_capture_sources,
-		.unsol_event = alc888_fujitsu_xa3530_unsol_event,
-		.init_hook = alc888_fujitsu_xa3530_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_fujitsu_xa3530_init_hook,
 	},
 	[ALC888_LENOVO_SKY] = {
 		.mixers = { alc888_lenovo_sky_mixer, alc883_chmode_mixer },
@@ -9204,8 +9005,8 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_sixstack_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_lenovo_sky_capture_source,
-		.unsol_event = alc883_lenovo_sky_unsol_event,
-		.init_hook = alc888_lenovo_sky_front_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_lenovo_sky_init_hook,
 	},
 	[ALC888_ASUS_M90V] = {
 		.mixers = { alc883_3ST_6ch_mixer, alc883_chmode_mixer },
@@ -9233,7 +9034,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_3ST_2ch_modes,
 		.need_dac_fix = 1,
 		.input_mux = &alc883_asus_eee1601_capture_source,
-		.unsol_event = alc883_eee1601_unsol_event,
+		.unsol_event = alc_sku_unsol_event,
 		.init_hook = alc883_eee1601_inithook,
 	},
 	[ALC1200_ASUS_P5Q] = {
@@ -9666,28 +9467,15 @@ static struct snd_kcontrol_new alc262_HP_BPC_WildWest_option_mixer[] = {
 };
 
 /* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc262_hp_t5735_automute(struct hda_codec *codec)
+static void alc262_hp_t5735_init_hook(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
-	unsigned int present;
 
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	spec->jack_present = (present & AC_PINSENSE_PRESENCE) != 0;
-	snd_hda_codec_amp_stereo(codec, 0x0c, HDA_OUTPUT, 0, HDA_AMP_MUTE,
-				 spec->jack_present ? HDA_AMP_MUTE : 0);
-}
-
-static void alc262_hp_t5735_unsol_event(struct hda_codec *codec,
-					unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_hp_t5735_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x0c; /* HACK: not actually a pin */
+	alc_automute_amp(codec);
 }
 
-#define alc262_hp_t5735_init_hook	alc262_hp_t5735_automute
-
 static struct snd_kcontrol_new alc262_hp_t5735_mixer[] = {
 	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
@@ -9914,34 +9702,15 @@ static struct hda_verb alc262_tyan_verbs[] = {
 };
 
 /* unsolicited event for HP jack sensing */
-static void alc262_tyan_automute(struct hda_codec *codec)
+static void alc262_tyan_init_hook(struct hda_codec *codec)
 {
-	unsigned int mute;
-	unsigned int present;
+	struct alc_spec *spec = codec->spec;
 
-	snd_hda_codec_read(codec, 0x1b, 0, AC_VERB_SET_PIN_SENSE, 0);
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0);
-	present = (present & 0x80000000) != 0;
-	if (present) {
-		/* mute line output on ATX panel */
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, HDA_AMP_MUTE);
-	} else {
-		/* unmute line output if necessary */
-		mute = snd_hda_codec_amp_read(codec, 0x1b, 0, HDA_OUTPUT, 0);
-		snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
-					 HDA_AMP_MUTE, mute);
-	}
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x15;
+	alc_automute_amp(codec);
 }
 
-static void alc262_tyan_unsol_event(struct hda_codec *codec,
-				       unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc262_tyan_automute(codec);
-}
 
 #define alc262_capture_mixer		alc882_capture_mixer
 #define alc262_capture_alt_mixer	alc882_capture_alt_mixer
@@ -10096,35 +9865,24 @@ static void alc262_dmic_automute(struct hda_codec *codec)
 				AC_VERB_SET_CONNECT_SEL, present ? 0x0 : 0x09);
 }
 
-/* toggle speaker-output according to the hp-jack state */
-static void alc262_toshiba_s06_speaker_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x15, 0,
-					AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? 0 : PIN_OUT;
-	snd_hda_codec_write(codec, 0x14, 0,
-					AC_VERB_SET_PIN_WIDGET_CONTROL, bits);
-}
-
-
 
 /* unsolicited event for HP jack sensing */
 static void alc262_toshiba_s06_unsol_event(struct hda_codec *codec,
 				       unsigned int res)
 {
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc262_toshiba_s06_speaker_automute(codec);
 	if ((res >> 26) == ALC880_MIC_EVENT)
 		alc262_dmic_automute(codec);
-
+	else
+		alc_sku_unsol_event(codec, res);
 }
 
 static void alc262_toshiba_s06_init_hook(struct hda_codec *codec)
 {
-	alc262_toshiba_s06_speaker_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_pin(codec);
 	alc262_dmic_automute(codec);
 }
 
@@ -11135,7 +10893,7 @@ static struct alc_config_preset alc262_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc262_modes),
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
-		.unsol_event = alc262_hp_t5735_unsol_event,
+		.unsol_event = alc_automute_amp_unsol_event,
 		.init_hook = alc262_hp_t5735_init_hook,
 	},
 	[ALC262_HP_RP5700] = {
@@ -11256,8 +11014,8 @@ static struct alc_config_preset alc262_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc262_modes),
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_capture_source,
-		.unsol_event = alc262_tyan_unsol_event,
-		.init_hook = alc262_tyan_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc262_tyan_init_hook,
 	},
 };
 
@@ -11646,30 +11404,15 @@ static struct hda_verb alc268_dell_verbs[] = {
 };
 
 /* mute/unmute internal speaker according to the hp jack and mute state */
-static void alc268_dell_automute(struct hda_codec *codec)
+static void alc268_dell_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-	unsigned int mute;
-
-	present = snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_PIN_SENSE, 0);
-	if (present & 0x80000000)
-		mute = HDA_AMP_MUTE;
-	else
-		mute = snd_hda_codec_amp_read(codec, 0x15, 0, HDA_OUTPUT, 0);
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, mute);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc268_dell_unsol_event(struct hda_codec *codec,
-				    unsigned int res)
-{
-	if ((res >> 26) != ALC880_HP_EVENT)
-		return;
-	alc268_dell_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_pin(codec);
 }
 
-#define alc268_dell_init_hook	alc268_dell_automute
-
 static struct snd_kcontrol_new alc267_quanta_il1_mixer[] = {
 	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x2, 0x0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
@@ -11688,16 +11431,6 @@ static struct hda_verb alc267_quanta_il1_verbs[] = {
 	{ }
 };
 
-static void alc267_quanta_il1_hp_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-
-	present = snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_PIN_SENSE, 0)
-		& AC_PINSENSE_PRESENCE;
-	snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
-			    present ? 0 : PIN_OUT);
-}
-
 static void alc267_quanta_il1_mic_automute(struct hda_codec *codec)
 {
 	unsigned int present;
@@ -11709,9 +11442,13 @@ static void alc267_quanta_il1_mic_automute(struct hda_codec *codec)
 			    present ? 0x00 : 0x01);
 }
 
-static void alc267_quanta_il1_automute(struct hda_codec *codec)
+static void alc267_quanta_il1_init_hook(struct hda_codec *codec)
 {
-	alc267_quanta_il1_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_pin(codec);
 	alc267_quanta_il1_mic_automute(codec);
 }
 
@@ -11719,12 +11456,12 @@ static void alc267_quanta_il1_unsol_event(struct hda_codec *codec,
 					   unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc267_quanta_il1_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc267_quanta_il1_mic_automute(codec);
 		break;
+	default:
+		alc_sku_unsol_event(codec, res);
+		break;
 	}
 }
 
@@ -12198,7 +11935,7 @@ static struct alc_config_preset alc268_presets[] = {
 		.channel_mode = alc268_modes,
 		.input_mux = &alc268_capture_source,
 		.unsol_event = alc267_quanta_il1_unsol_event,
-		.init_hook = alc267_quanta_il1_automute,
+		.init_hook = alc267_quanta_il1_init_hook,
 	},
 	[ALC268_3ST] = {
 		.mixers = { alc268_base_mixer, alc268_capture_alt_mixer,
@@ -12293,7 +12030,7 @@ static struct alc_config_preset alc268_presets[] = {
 		.hp_nid = 0x02,
 		.num_channel_mode = ARRAY_SIZE(alc268_modes),
 		.channel_mode = alc268_modes,
-		.unsol_event = alc268_dell_unsol_event,
+		.unsol_event = alc_sku_unsol_event,
 		.init_hook = alc268_dell_init_hook,
 		.input_mux = &alc268_capture_source,
 	},
@@ -14727,19 +14464,6 @@ static struct hda_verb alc861vd_lenovo_unsol_verbs[] = {
 	{}
 };
 
-/* toggle speaker-output according to the hp-jack state */
-static void alc861vd_lenovo_hp_automute(struct hda_codec *codec)
-{
-	unsigned int present;
-	unsigned char bits;
-
-	present = snd_hda_codec_read(codec, 0x1b, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	bits = present ? HDA_AMP_MUTE : 0;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, bits);
-}
-
 static void alc861vd_lenovo_mic_automute(struct hda_codec *codec)
 {
 	unsigned int present;
@@ -14752,9 +14476,13 @@ static void alc861vd_lenovo_mic_automute(struct hda_codec *codec)
 				 HDA_AMP_MUTE, bits);
 }
 
-static void alc861vd_lenovo_automute(struct hda_codec *codec)
+static void alc861vd_lenovo_init_hook(struct hda_codec *codec)
 {
-	alc861vd_lenovo_hp_automute(codec);
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x1b;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 	alc861vd_lenovo_mic_automute(codec);
 }
 
@@ -14762,12 +14490,12 @@ static void alc861vd_lenovo_unsol_event(struct hda_codec *codec,
 					unsigned int res)
 {
 	switch (res >> 26) {
-	case ALC880_HP_EVENT:
-		alc861vd_lenovo_hp_automute(codec);
-		break;
 	case ALC880_MIC_EVENT:
 		alc861vd_lenovo_mic_automute(codec);
 		break;
+	default:
+		alc_automute_amp_unsol_event(codec, res);
+		break;
 	}
 }
 
@@ -14817,20 +14545,13 @@ static struct hda_verb alc861vd_dallas_verbs[] = {
 };
 
 /* toggle speaker-output according to the hp-jack state */
-static void alc861vd_dallas_automute(struct hda_codec *codec)
+static void alc861vd_dallas_init_hook(struct hda_codec *codec)
 {
-	unsigned int present;
-
-	present = snd_hda_codec_read(codec, 0x15, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
-	snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0,
-				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
-}
+	struct alc_spec *spec = codec->spec;
 
-static void alc861vd_dallas_unsol_event(struct hda_codec *codec, unsigned int res)
-{
-	if ((res >> 26) == ALC880_HP_EVENT)
-		alc861vd_dallas_automute(codec);
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	alc_automute_amp(codec);
 }
 
 #ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -14944,7 +14665,7 @@ static struct alc_config_preset alc861vd_presets[] = {
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_capture_source,
 		.unsol_event = alc861vd_lenovo_unsol_event,
-		.init_hook = alc861vd_lenovo_automute,
+		.init_hook = alc861vd_lenovo_init_hook,
 	},
 	[ALC861VD_DALLAS] = {
 		.mixers = { alc861vd_dallas_mixer },
@@ -14954,8 +14675,8 @@ static struct alc_config_preset alc861vd_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc861vd_3stack_2ch_modes),
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_dallas_capture_source,
-		.unsol_event = alc861vd_dallas_unsol_event,
-		.init_hook = alc861vd_dallas_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc861vd_dallas_init_hook,
 	},
 	[ALC861VD_HP] = {
 		.mixers = { alc861vd_hp_mixer },
@@ -14966,8 +14687,8 @@ static struct alc_config_preset alc861vd_presets[] = {
 		.num_channel_mode = ARRAY_SIZE(alc861vd_3stack_2ch_modes),
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_hp_capture_source,
-		.unsol_event = alc861vd_dallas_unsol_event,
-		.init_hook = alc861vd_dallas_automute,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc861vd_dallas_init_hook,
 	},
 	[ALC660VD_ASUS_V1S] = {
 		.mixers = { alc861vd_lenovo_mixer },
@@ -14982,7 +14703,7 @@ static struct alc_config_preset alc861vd_presets[] = {
 		.channel_mode = alc861vd_3stack_2ch_modes,
 		.input_mux = &alc861vd_capture_source,
 		.unsol_event = alc861vd_lenovo_unsol_event,
-		.init_hook = alc861vd_lenovo_automute,
+		.init_hook = alc861vd_lenovo_init_hook,
 	},
 };
 
-- 
cgit v0.10.2


From dc8e6f5bfcd6a307a8196d3e41fd9798be5a1c76 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:14:06 +0000
Subject: sh-sci: rework serial console support

Rework sh-sci serial console code.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 5f37f7d..d2cd1a4 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -91,10 +91,6 @@ struct sh_sci_priv {
 #endif
 };
 
-#ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
-static struct sci_port *serial_console_port;
-#endif
-
 /* Function prototypes */
 static void sci_stop_tx(struct uart_port *port);
 
@@ -1083,6 +1079,18 @@ static void __init sci_init_ports(void)
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
+static struct tty_driver *serial_console_device(struct console *co, int *index)
+{
+	struct uart_driver *p = &sci_uart_driver;
+	*index = co->index;
+	return p->tty_driver;
+}
+
+static void serial_console_putchar(struct uart_port *port, int ch)
+{
+	sci_poll_put_char(port, ch);
+}
+
 /*
  *	Print a string to the serial port trying not to disturb
  *	any possible real use of the port...
@@ -1090,16 +1098,10 @@ static void __init sci_init_ports(void)
 static void serial_console_write(struct console *co, const char *s,
 				 unsigned count)
 {
-	struct uart_port *port = &serial_console_port->port;
+	struct uart_port *port = co->data;
 	unsigned short bits;
-	int i;
 
-	for (i = 0; i < count; i++) {
-		if (*s == 10)
-			sci_poll_put_char(port, '\r');
-
-		sci_poll_put_char(port, *s++);
-	}
+	uart_console_write(co->data, s, count, serial_console_putchar);
 
 	/* wait until fifo is empty and last bit has been transmitted */
 	bits = SCxSR_TDxE(port) | SCxSR_TEND(port);
@@ -1109,6 +1111,7 @@ static void serial_console_write(struct console *co, const char *s,
 
 static int __init serial_console_setup(struct console *co, char *options)
 {
+	struct sci_port *sci_port;
 	struct uart_port *port;
 	int baud = 115200;
 	int bits = 8;
@@ -1124,8 +1127,9 @@ static int __init serial_console_setup(struct console *co, char *options)
 	if (co->index >= SCI_NPORTS)
 		co->index = 0;
 
-	serial_console_port = &sci_ports[co->index];
-	port = &serial_console_port->port;
+	sci_port = &sci_ports[co->index];
+	port = &sci_port->port;
+	co->data = port;
 
 	/*
 	 * Also need to check port->type, we don't actually have any
@@ -1135,21 +1139,17 @@ static int __init serial_console_setup(struct console *co, char *options)
 	 */
 	if (!port->type)
 		return -ENODEV;
-	if (!port->membase || !port->mapbase)
-		return -ENODEV;
-
-	port->type = serial_console_port->type;
 
 #ifdef CONFIG_HAVE_CLK
-	if (!serial_console_port->clk)
-		serial_console_port->clk = clk_get(NULL, "module_clk");
+	if (!sci_port->clk)
+		sci_port->clk = clk_get(NULL, "module_clk");
 #endif
 
 	if (port->flags & UPF_IOREMAP)
 		sci_config_port(port, 0);
 
-	if (serial_console_port->enable)
-		serial_console_port->enable(port);
+	if (sci_port->enable)
+		sci_port->enable(port);
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
@@ -1165,12 +1165,11 @@ static int __init serial_console_setup(struct console *co, char *options)
 
 static struct console serial_console = {
 	.name		= "ttySC",
-	.device		= uart_console_device,
+	.device		= serial_console_device,
 	.write		= serial_console_write,
 	.setup		= serial_console_setup,
 	.flags		= CON_PRINTBUFFER,
 	.index		= -1,
-	.data		= &sci_uart_driver,
 };
 
 static int __init sci_console_init(void)
-- 
cgit v0.10.2


From a5660adae85918f2ab6b10ab58e2f574c1bd5ce1 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:14:38 +0000
Subject: sh-sci: use to_sci_port() if possible

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index d2cd1a4..17fa7f1 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -618,7 +618,7 @@ static inline int sci_handle_breaks(struct uart_port *port)
 	int copied = 0;
 	unsigned short status = sci_in(port, SCxSR);
 	struct tty_struct *tty = port->info->port.tty;
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	if (uart_handle_break(port))
 		return 0;
@@ -875,7 +875,7 @@ static void sci_break_ctl(struct uart_port *port, int break_state)
 
 static int sci_startup(struct uart_port *port)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	if (s->enable)
 		s->enable(port);
@@ -893,7 +893,7 @@ static int sci_startup(struct uart_port *port)
 
 static void sci_shutdown(struct uart_port *port)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	sci_stop_rx(port);
 	sci_stop_tx(port);
@@ -990,7 +990,7 @@ static int sci_request_port(struct uart_port *port)
 
 static void sci_config_port(struct uart_port *port, int flags)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	port->type = s->type;
 
@@ -1002,7 +1002,7 @@ static void sci_config_port(struct uart_port *port, int flags)
 
 static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
 {
-	struct sci_port *s = &sci_ports[port->line];
+	struct sci_port *s = to_sci_port(port);
 
 	if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
 		return -EINVAL;
-- 
cgit v0.10.2


From 0ee70712922c15252183db8b50a7e369c96017c0 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:13:50 +0000
Subject: sh-sci: allow single port platform devices

Allow registration of single port sh-sci platform devices.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 17fa7f1..e9b350c 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1221,6 +1221,70 @@ static int __devexit sci_remove(struct platform_device *dev)
 	return 0;
 }
 
+static int __devinit sci_probe_single(struct platform_device *dev,
+				      unsigned int index,
+				      struct plat_sci_port *p,
+				      struct sci_port *sciport)
+{
+	struct sh_sci_priv *priv = platform_get_drvdata(dev);
+	unsigned long flags;
+	int ret;
+
+	/* Sanity check */
+	if (unlikely(index >= SCI_NPORTS)) {
+		dev_notice(&dev->dev, "Attempting to register port "
+			   "%d when only %d are available.\n",
+			   index+1, SCI_NPORTS);
+		dev_notice(&dev->dev, "Consider bumping "
+			   "CONFIG_SERIAL_SH_SCI_NR_UARTS!\n");
+		return 0;
+	}
+
+	sciport->port.mapbase	= p->mapbase;
+
+	if (p->mapbase && !p->membase) {
+		if (p->flags & UPF_IOREMAP) {
+			p->membase = ioremap_nocache(p->mapbase, 0x40);
+			if (IS_ERR(p->membase))
+				return PTR_ERR(p->membase);
+		} else {
+			/*
+			 * For the simple (and majority of) cases
+			 * where we don't need to do any remapping,
+			 * just cast the cookie directly.
+			 */
+			p->membase = (void __iomem *)p->mapbase;
+		}
+	}
+
+	sciport->port.membase	= p->membase;
+
+	sciport->port.irq	= p->irqs[SCIx_TXI_IRQ];
+	sciport->port.flags	= p->flags;
+	sciport->port.dev	= &dev->dev;
+
+	sciport->type		= sciport->port.type = p->type;
+
+	memcpy(&sciport->irqs, &p->irqs, sizeof(p->irqs));
+
+	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
+
+	if (ret) {
+		if (p->flags & UPF_IOREMAP)
+			iounmap(p->membase);
+
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&sciport->node);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_add(&sciport->node, &priv->ports);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
 /*
  * Register a set of serial devices attached to a platform device.  The
  * list is terminated with a zero flags entry, which means we expect
@@ -1232,7 +1296,6 @@ static int __devinit sci_probe(struct platform_device *dev)
 	struct plat_sci_port *p = dev->dev.platform_data;
 	struct sh_sci_priv *priv;
 	int i, ret = -EINVAL;
-	unsigned long flags;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -1247,60 +1310,16 @@ static int __devinit sci_probe(struct platform_device *dev)
 	cpufreq_register_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
 #endif
 
-	for (i = 0; p && p->flags != 0; p++, i++) {
-		struct sci_port *sciport = &sci_ports[i];
-
-		/* Sanity check */
-		if (unlikely(i == SCI_NPORTS)) {
-			dev_notice(&dev->dev, "Attempting to register port "
-				   "%d when only %d are available.\n",
-				   i+1, SCI_NPORTS);
-			dev_notice(&dev->dev, "Consider bumping "
-				   "CONFIG_SERIAL_SH_SCI_NR_UARTS!\n");
-			break;
-		}
-
-		sciport->port.mapbase	= p->mapbase;
-
-		if (p->mapbase && !p->membase) {
-			if (p->flags & UPF_IOREMAP) {
-				p->membase = ioremap_nocache(p->mapbase, 0x40);
-				if (IS_ERR(p->membase)) {
-					ret = PTR_ERR(p->membase);
-					goto err_unreg;
-				}
-			} else {
-				/*
-				 * For the simple (and majority of) cases
-				 * where we don't need to do any remapping,
-				 * just cast the cookie directly.
-				 */
-				p->membase = (void __iomem *)p->mapbase;
-			}
-		}
-
-		sciport->port.membase	= p->membase;
-
-		sciport->port.irq	= p->irqs[SCIx_TXI_IRQ];
-		sciport->port.flags	= p->flags;
-		sciport->port.dev	= &dev->dev;
-
-		sciport->type		= sciport->port.type = p->type;
-
-		memcpy(&sciport->irqs, &p->irqs, sizeof(p->irqs));
-
-		ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
-
-		if (ret && (p->flags & UPF_IOREMAP)) {
-			iounmap(p->membase);
+	if (dev->id != -1) {
+		ret = sci_probe_single(dev, dev->id, p, &sci_ports[dev->id]);
+		if (ret)
 			goto err_unreg;
+	} else {
+		for (i = 0; p && p->flags != 0; p++, i++) {
+			ret = sci_probe_single(dev, i, p, &sci_ports[i]);
+			if (ret)
+				goto err_unreg;
 		}
-
-		INIT_LIST_HEAD(&sciport->node);
-
-		spin_lock_irqsave(&priv->lock, flags);
-		list_add(&sciport->node, &priv->ports);
-		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 #ifdef CONFIG_SH_STANDARD_BIOS
-- 
cgit v0.10.2


From 7ed7e0711b3ff85b3e15591081b42f2af96d584b Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:14:14 +0000
Subject: sh-sci: replace sci_init_ports()

Replace sci_init_ports() with sci_init_single().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index e9b350c..039c700 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1036,46 +1036,64 @@ static struct uart_ops sci_uart_ops = {
 #endif
 };
 
-static void __init sci_init_ports(void)
+static int __devinit sci_init_single(struct sci_port *sci_port,
+				     unsigned int index,
+				     struct plat_sci_port *p)
 {
-	static int first = 1;
-	int i;
-
-	if (!first)
-		return;
-
-	first = 0;
-
-	for (i = 0; i < SCI_NPORTS; i++) {
-		sci_ports[i].port.ops		= &sci_uart_ops;
-		sci_ports[i].port.iotype	= UPIO_MEM;
-		sci_ports[i].port.line		= i;
-		sci_ports[i].port.fifosize	= 1;
+	sci_port->port.ops	= &sci_uart_ops;
+	sci_port->port.iotype	= UPIO_MEM;
+	sci_port->port.line	= index;
+	sci_port->port.fifosize	= 1;
 
 #if defined(__H8300H__) || defined(__H8300S__)
 #ifdef __H8300S__
-		sci_ports[i].enable	= h8300_sci_enable;
-		sci_ports[i].disable	= h8300_sci_disable;
+	sci_port->enable	= h8300_sci_enable;
+	sci_port->disable	= h8300_sci_disable;
 #endif
-		sci_ports[i].port.uartclk = CONFIG_CPU_CLOCK;
+	sci_port->port.uartclk	= CONFIG_CPU_CLOCK;
 #elif defined(CONFIG_HAVE_CLK)
-		/*
-		 * XXX: We should use a proper SCI/SCIF clock
-		 */
-		{
-			struct clk *clk = clk_get(NULL, "module_clk");
-			sci_ports[i].port.uartclk = clk_get_rate(clk);
-			clk_put(clk);
-		}
+	/*
+	 * XXX: We should use a proper SCI/SCIF clock
+	 */
+	{
+		struct clk *clk = clk_get(NULL, "module_clk");
+		sci_port->port.uartclk = clk_get_rate(clk);
+		clk_put(clk);
+	}
 #else
 #error "Need a valid uartclk"
 #endif
 
-		sci_ports[i].break_timer.data = (unsigned long)&sci_ports[i];
-		sci_ports[i].break_timer.function = sci_break_timer;
+	sci_port->break_timer.data = (unsigned long)sci_port;
+	sci_port->break_timer.function = sci_break_timer;
+	init_timer(&sci_port->break_timer);
+
+	sci_port->port.mapbase	= p->mapbase;
 
-		init_timer(&sci_ports[i].break_timer);
+	if (p->mapbase && !p->membase) {
+		if (p->flags & UPF_IOREMAP) {
+			p->membase = ioremap_nocache(p->mapbase, 0x40);
+			if (IS_ERR(p->membase))
+				return PTR_ERR(p->membase);
+		} else {
+			/*
+			 * For the simple (and majority of) cases
+			 * where we don't need to do any remapping,
+			 * just cast the cookie directly.
+			 */
+			p->membase = (void __iomem *)p->mapbase;
+		}
 	}
+
+	sci_port->port.membase	= p->membase;
+
+	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
+	sci_port->port.flags	= p->flags;
+	sci_port->type		= sci_port->port.type = p->type;
+
+	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
+
+	return 0;
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
@@ -1174,7 +1192,6 @@ static struct console serial_console = {
 
 static int __init sci_console_init(void)
 {
-	sci_init_ports();
 	register_console(&serial_console);
 	return 0;
 }
@@ -1240,32 +1257,10 @@ static int __devinit sci_probe_single(struct platform_device *dev,
 		return 0;
 	}
 
-	sciport->port.mapbase	= p->mapbase;
-
-	if (p->mapbase && !p->membase) {
-		if (p->flags & UPF_IOREMAP) {
-			p->membase = ioremap_nocache(p->mapbase, 0x40);
-			if (IS_ERR(p->membase))
-				return PTR_ERR(p->membase);
-		} else {
-			/*
-			 * For the simple (and majority of) cases
-			 * where we don't need to do any remapping,
-			 * just cast the cookie directly.
-			 */
-			p->membase = (void __iomem *)p->mapbase;
-		}
-	}
-
-	sciport->port.membase	= p->membase;
-
-	sciport->port.irq	= p->irqs[SCIx_TXI_IRQ];
-	sciport->port.flags	= p->flags;
-	sciport->port.dev	= &dev->dev;
-
-	sciport->type		= sciport->port.type = p->type;
-
-	memcpy(&sciport->irqs, &p->irqs, sizeof(p->irqs));
+	sciport->port.dev = &dev->dev;
+	ret = sci_init_single(sciport, index, p);
+	if (ret)
+		return ret;
 
 	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
 
@@ -1380,8 +1375,6 @@ static int __init sci_init(void)
 
 	printk(banner);
 
-	sci_init_ports();
-
 	ret = uart_register_driver(&sci_uart_driver);
 	if (likely(ret == 0)) {
 		ret = platform_driver_register(&sci_driver);
-- 
cgit v0.10.2


From 08f8cb315fdf9195b472aeb440ae65b189b151da Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:14:22 +0000
Subject: sh-sci: ioremap() in a single place

Handle ioremap() in sci_config_port only.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 039c700..408624a 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -994,9 +994,21 @@ static void sci_config_port(struct uart_port *port, int flags)
 
 	port->type = s->type;
 
-	if (port->flags & UPF_IOREMAP && !port->membase) {
+	if (port->membase)
+		return;
+
+	if (port->flags & UPF_IOREMAP) {
 		port->membase = ioremap_nocache(port->mapbase, 0x40);
-		dev_err(port->dev, "can't remap port#%d\n", port->line);
+
+		if (IS_ERR(port->membase))
+			dev_err(port->dev, "can't remap port#%d\n", port->line);
+	} else {
+		/*
+		 * For the simple (and majority of) cases where we don't
+		 * need to do any remapping, just cast the cookie
+		 * directly.
+		 */
+		port->membase = (void __iomem *)port->mapbase;
 	}
 }
 
@@ -1036,9 +1048,9 @@ static struct uart_ops sci_uart_ops = {
 #endif
 };
 
-static int __devinit sci_init_single(struct sci_port *sci_port,
-				     unsigned int index,
-				     struct plat_sci_port *p)
+static void __devinit sci_init_single(struct sci_port *sci_port,
+				      unsigned int index,
+				      struct plat_sci_port *p)
 {
 	sci_port->port.ops	= &sci_uart_ops;
 	sci_port->port.iotype	= UPIO_MEM;
@@ -1069,22 +1081,6 @@ static int __devinit sci_init_single(struct sci_port *sci_port,
 	init_timer(&sci_port->break_timer);
 
 	sci_port->port.mapbase	= p->mapbase;
-
-	if (p->mapbase && !p->membase) {
-		if (p->flags & UPF_IOREMAP) {
-			p->membase = ioremap_nocache(p->mapbase, 0x40);
-			if (IS_ERR(p->membase))
-				return PTR_ERR(p->membase);
-		} else {
-			/*
-			 * For the simple (and majority of) cases
-			 * where we don't need to do any remapping,
-			 * just cast the cookie directly.
-			 */
-			p->membase = (void __iomem *)p->mapbase;
-		}
-	}
-
 	sci_port->port.membase	= p->membase;
 
 	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
@@ -1092,8 +1088,6 @@ static int __devinit sci_init_single(struct sci_port *sci_port,
 	sci_port->type		= sci_port->port.type = p->type;
 
 	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
-
-	return 0;
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
@@ -1163,8 +1157,7 @@ static int __init serial_console_setup(struct console *co, char *options)
 		sci_port->clk = clk_get(NULL, "module_clk");
 #endif
 
-	if (port->flags & UPF_IOREMAP)
-		sci_config_port(port, 0);
+	sci_config_port(port, 0);
 
 	if (sci_port->enable)
 		sci_port->enable(port);
@@ -1258,18 +1251,11 @@ static int __devinit sci_probe_single(struct platform_device *dev,
 	}
 
 	sciport->port.dev = &dev->dev;
-	ret = sci_init_single(sciport, index, p);
-	if (ret)
-		return ret;
+	sci_init_single(sciport, index, p);
 
 	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
-
-	if (ret) {
-		if (p->flags & UPF_IOREMAP)
-			iounmap(p->membase);
-
+	if (ret)
 		return ret;
-	}
 
 	INIT_LIST_HEAD(&sciport->node);
 
-- 
cgit v0.10.2


From 501b825d01efb93766c87d29f299851152cf4eb0 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:14:30 +0000
Subject: sh-sci: improve clock framework support

Use enable/disable hooks for clock framework integration.
Make sure we control the clock for the serial console as well.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 408624a..3daf767 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -76,8 +76,10 @@ struct sci_port {
 	int			break_flag;
 
 #ifdef CONFIG_HAVE_CLK
-	/* Port clock */
-	struct clk		*clk;
+	/* Interface clock */
+	struct clk		*iclk;
+	/* Data clock */
+	struct clk		*dclk;
 #endif
 	struct list_head	node;
 };
@@ -166,12 +168,12 @@ static void h8300_sci_config(struct uart_port *port, unsigned int ctrl)
 		*mstpcrl &= ~mask;
 }
 
-static inline void h8300_sci_enable(struct uart_port *port)
+static void h8300_sci_enable(struct uart_port *port)
 {
 	h8300_sci_config(port, sci_enable);
 }
 
-static inline void h8300_sci_disable(struct uart_port *port)
+static void h8300_sci_disable(struct uart_port *port)
 {
 	h8300_sci_config(port, sci_disable);
 }
@@ -742,13 +744,34 @@ static int sci_notifier(struct notifier_block *self,
 	    (phase == CPUFREQ_RESUMECHANGE)) {
 		spin_lock_irqsave(&priv->lock, flags);
 		list_for_each_entry(sci_port, &priv->ports, node)
-			sci_port->port.uartclk = clk_get_rate(sci_port->clk);
+			sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
 
 		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 	return NOTIFY_OK;
 }
+
+static void sci_clk_enable(struct uart_port *port)
+{
+	struct sci_port *sci_port = to_sci_port(port);
+
+	clk_enable(sci_port->dclk);
+	sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
+
+	if (sci_port->iclk)
+		clk_enable(sci_port->iclk);
+}
+
+static void sci_clk_disable(struct uart_port *port)
+{
+	struct sci_port *sci_port = to_sci_port(port);
+
+	if (sci_port->iclk)
+		clk_disable(sci_port->iclk);
+
+	clk_disable(sci_port->dclk);
+}
 #endif
 
 static int sci_request_irq(struct sci_port *port)
@@ -880,10 +903,6 @@ static int sci_startup(struct uart_port *port)
 	if (s->enable)
 		s->enable(port);
 
-#ifdef CONFIG_HAVE_CLK
-	s->clk = clk_get(NULL, "module_clk");
-#endif
-
 	sci_request_irq(s);
 	sci_start_tx(port);
 	sci_start_rx(port, 1);
@@ -901,11 +920,6 @@ static void sci_shutdown(struct uart_port *port)
 
 	if (s->disable)
 		s->disable(port);
-
-#ifdef CONFIG_HAVE_CLK
-	clk_put(s->clk);
-	s->clk = NULL;
-#endif
 }
 
 static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
@@ -1048,7 +1062,8 @@ static struct uart_ops sci_uart_ops = {
 #endif
 };
 
-static void __devinit sci_init_single(struct sci_port *sci_port,
+static void __devinit sci_init_single(struct platform_device *dev,
+				      struct sci_port *sci_port,
 				      unsigned int index,
 				      struct plat_sci_port *p)
 {
@@ -1064,14 +1079,10 @@ static void __devinit sci_init_single(struct sci_port *sci_port,
 #endif
 	sci_port->port.uartclk	= CONFIG_CPU_CLOCK;
 #elif defined(CONFIG_HAVE_CLK)
-	/*
-	 * XXX: We should use a proper SCI/SCIF clock
-	 */
-	{
-		struct clk *clk = clk_get(NULL, "module_clk");
-		sci_port->port.uartclk = clk_get_rate(clk);
-		clk_put(clk);
-	}
+	sci_port->iclk		= p->clk ? clk_get(&dev->dev, p->clk) : NULL;
+	sci_port->dclk		= clk_get(&dev->dev, "module_clk");
+	sci_port->enable	= sci_clk_enable;
+	sci_port->disable	= sci_clk_disable;
 #else
 #error "Need a valid uartclk"
 #endif
@@ -1085,9 +1096,11 @@ static void __devinit sci_init_single(struct sci_port *sci_port,
 
 	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
 	sci_port->port.flags	= p->flags;
+	sci_port->port.dev	= &dev->dev;
 	sci_port->type		= sci_port->port.type = p->type;
 
 	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
+
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
@@ -1111,14 +1124,21 @@ static void serial_console_write(struct console *co, const char *s,
 				 unsigned count)
 {
 	struct uart_port *port = co->data;
+	struct sci_port *sci_port = to_sci_port(port);
 	unsigned short bits;
 
-	uart_console_write(co->data, s, count, serial_console_putchar);
+	if (sci_port->enable)
+		sci_port->enable(port);
+
+	uart_console_write(port, s, count, serial_console_putchar);
 
 	/* wait until fifo is empty and last bit has been transmitted */
 	bits = SCxSR_TDxE(port) | SCxSR_TEND(port);
 	while ((sci_in(port, SCxSR) & bits) != bits)
 		cpu_relax();
+
+	if (sci_port->disable);
+		sci_port->disable(port);
 }
 
 static int __init serial_console_setup(struct console *co, char *options)
@@ -1152,11 +1172,6 @@ static int __init serial_console_setup(struct console *co, char *options)
 	if (!port->type)
 		return -ENODEV;
 
-#ifdef CONFIG_HAVE_CLK
-	if (!sci_port->clk)
-		sci_port->clk = clk_get(NULL, "module_clk");
-#endif
-
 	sci_config_port(port, 0);
 
 	if (sci_port->enable)
@@ -1171,6 +1186,7 @@ static int __init serial_console_setup(struct console *co, char *options)
 	if (ret == 0)
 		sci_stop_rx(port);
 #endif
+	/* TODO: disable clock */
 	return ret;
 }
 
@@ -1250,8 +1266,7 @@ static int __devinit sci_probe_single(struct platform_device *dev,
 		return 0;
 	}
 
-	sciport->port.dev = &dev->dev;
-	sci_init_single(sciport, index, p);
+	sci_init_single(dev, sciport, index, p);
 
 	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
 	if (ret)
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 717059d..1c297dd 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -25,6 +25,7 @@ struct plat_sci_port {
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
+	char		*clk;			/* clock string */
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v0.10.2


From 3b226e15beb5ecf068738e796811afd1e5b3f81f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 23:28:54 +0900
Subject: sh: Add clock id to sh-sci platform data on SH-Mobile CPUs.

This adds the clock specifier to all of the SH-Mobile sh-sci ports.
Impacted CPUs are SH7343/SH7366/SH7722/SH7723/SH7724.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index f327b7e..b9c361e 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -179,21 +179,25 @@ static struct plat_sci_port sci_platform_data[] = {
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	}, {
 		.mapbase	= 0xffe10000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	}, {
 		.mapbase	= 0xffe20000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	}, {
 		.mapbase	= 0xffe30000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 83, 83, 83, 83 },
+		.clk		= "scif3",
 	}, {
 		.flags = 0,
 	}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 7361ea9..d4a994b 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -186,6 +186,7 @@ static struct plat_sci_port sci_platform_data[] = {
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	}, {
 		.flags = 0,
 	}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index b1c3e7f..5d6247f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -307,18 +307,21 @@ static struct plat_sci_port sci_platform_data[] = {
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	},
 	{
 		.mapbase	= 0xffe10000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	},
 	{
 		.mapbase	= 0xffe20000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	},
 	{
 		.flags = 0,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index 484ca2d..1429fc5 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -321,31 +321,37 @@ static struct plat_sci_port sci_platform_data[] = {
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	},{
 		.mapbase        = 0xffe10000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	},{
 		.mapbase        = 0xffe20000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	},{
 		.mapbase	= 0xa4e30000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 56, 56, 56, 56 },
+		.clk		= "scif3",
 	},{
 		.mapbase	= 0xa4e40000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 88, 88, 88, 88 },
+		.clk		= "scif4",
 	},{
 		.mapbase	= 0xa4e50000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 109, 109, 109, 109 },
+		.clk		= "scif5",
 	}, {
 		.flags = 0,
 	}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index e074951..a3039ce 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -30,31 +30,37 @@ static struct plat_sci_port sci_platform_data[] = {
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 80, 80, 80, 80 },
+		.clk		= "scif0",
 	}, {
 		.mapbase        = 0xffe10000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 81, 81, 81, 81 },
+		.clk		= "scif1",
 	}, {
 		.mapbase        = 0xffe20000,
 		.flags          = UPF_BOOT_AUTOCONF,
 		.type           = PORT_SCIF,
 		.irqs           = { 82, 82, 82, 82 },
+		.clk		= "scif2",
 	}, {
 		.mapbase	= 0xa4e30000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 56, 56, 56, 56 },
+		.clk		= "scif3",
 	}, {
 		.mapbase	= 0xa4e40000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 88, 88, 88, 88 },
+		.clk		= "scif4",
 	}, {
 		.mapbase	= 0xa4e50000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIFA,
 		.irqs		= { 109, 109, 109, 109 },
+		.clk		= "scif5",
 	}, {
 		.flags = 0,
 	}
-- 
cgit v0.10.2


From 54507f6ee99778a727ff1b38a1f4050fe6479835 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 8 May 2009 23:48:33 +0900
Subject: serial: sh-sci: Fix up section mismatch in error path.

The sci_probe_single() path attempts to use sci_remove() for the error
path, while sci_remove() is still flagged as __devexit. So, we simply
discard the section annotation.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 3daf767..686e4a4 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1227,7 +1227,7 @@ static struct uart_driver sci_uart_driver = {
 };
 
 
-static int __devexit sci_remove(struct platform_device *dev)
+static int sci_remove(struct platform_device *dev)
 {
 	struct sh_sci_priv *priv = platform_get_drvdata(dev);
 	struct sci_port *p;
-- 
cgit v0.10.2


From 168f36237b16e2b3159e24c7d3b658e3c912d149 Mon Sep 17 00:00:00 2001
From: Yoshinori Sato <ysato@users.sourceforge.jp>
Date: Tue, 28 Apr 2009 04:40:15 +0000
Subject: serial: sh-sci: Fix up h8300 support.

- Dummy SCIF functions define.
- h8300 specific header include.

Signed-off-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 686e4a4..4e3248d 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -54,6 +54,10 @@
 #include <asm/sh_bios.h>
 #endif
 
+#ifdef CONFIG_H8300
+#include <asm/gpio.h>
+#endif
+
 #include "sh-sci.h"
 
 struct sci_port {
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index 84cc651..e3eae4e 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -317,7 +317,18 @@
     }									\
   }
 
-#define CPU_SCIF_FNS(name, scif_offset, scif_size)				\
+#ifdef CONFIG_H8300
+/* h8300 don't have SCIF */
+#define CPU_SCIF_FNS(name)						\
+  static inline unsigned int sci_##name##_in(struct uart_port *port)	\
+  {									\
+    return 0;								\
+  }									\
+  static inline void sci_##name##_out(struct uart_port *port, unsigned int value) \
+  {									\
+  }
+#else
+#define CPU_SCIF_FNS(name, scif_offset, scif_size)			\
   static inline unsigned int sci_##name##_in(struct uart_port *port)	\
   {									\
     SCI_IN(scif_size, scif_offset);					\
@@ -326,6 +337,7 @@
   {									\
     SCI_OUT(scif_size, scif_offset, value);				\
   }
+#endif
 
 #define CPU_SCI_FNS(name, sci_offset, sci_size)				\
   static inline unsigned int sci_##name##_in(struct uart_port* port)	\
@@ -363,7 +375,8 @@
 		 sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \
                  h8_sci_offset, h8_sci_size) \
   CPU_SCI_FNS(name, h8_sci_offset, h8_sci_size)
-#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size)
+#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \
+  CPU_SCIF_FNS(name)
 #elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\
       defined(CONFIG_CPU_SUBTYPE_SH7724)
         #define SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size) \
-- 
cgit v0.10.2


From be6514c6295cc79498eeb9a8f933451082ca9e69 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieranbingham@gmail.com>
Date: Fri, 8 May 2009 15:48:15 +0100
Subject: sh: Add in some ptrace definitions from GDB.

Plugs in PT_TEXT_END_ADDR/PT_TEXT_ADDR/PT_DATA_ADDR/PT_TEXT_LEN
definitions.

Signed-off-by: Kieran Bingham <kieranbingham@gmail.com>
Signed-off-by: Peter Griffin <pgriffin@mpc-data.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index 68e20ff..1dc12cb 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -102,6 +102,11 @@ struct pt_dspregs {
 #define	PTRACE_GETDSPREGS	55	/* DSP registers */
 #define	PTRACE_SETDSPREGS	56
 
+#define PT_TEXT_END_ADDR 	240
+#define PT_TEXT_ADDR 		244	/* &(struct user)->start_code */
+#define PT_DATA_ADDR 		248	/* &(struct user)->start_data */
+#define PT_TEXT_LEN		252
+
 #ifdef __KERNEL__
 #include <asm/addrspace.h>
 
-- 
cgit v0.10.2


From e73173dbe55e5b4c2306728aad50c8e42194f6d5 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieranbingham@gmail.com>
Date: Fri, 8 May 2009 15:49:50 +0100
Subject: sh: Fix UBC setup and registers for SH2A

Signed-off-by: Kieran Bingham <kieranbingham@gmail.com>
Signed-off-by: Peter Griffin <pgriffin@mpc-data.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/ubc.h b/arch/sh/include/asm/ubc.h
index a7b9028..4ca4b77 100644
--- a/arch/sh/include/asm/ubc.h
+++ b/arch/sh/include/asm/ubc.h
@@ -42,12 +42,23 @@
 
 #define BRCR_CMFA		(1 << 15)
 #define BRCR_CMFB		(1 << 14)
+
+#if defined CONFIG_CPU_SH2A
+#define BRCR_CMFCA		(1 << 15)
+#define BRCR_CMFCB		(1 << 14)
+#define BRCR_CMFDA		(1 << 13)
+#define BRCR_CMFDB		(1 << 12)
+#define BRCR_PCBB		(1 << 6)	/* 1: after execution */
+#define BRCR_PCBA		(1 << 5)	/* 1: after execution */
+#define BRCR_PCTE		0
+#else
 #define BRCR_PCTE		(1 << 11)
 #define BRCR_PCBA		(1 << 10)	/* 1: after execution */
 #define BRCR_DBEB		(1 << 7)
 #define BRCR_PCBB		(1 << 6)
 #define BRCR_SEQ		(1 << 3)
 #define BRCR_UBDE		(1 << 0)
+#endif
 
 #ifndef __ASSEMBLY__
 /* arch/sh/kernel/cpu/ubc.S */
diff --git a/arch/sh/include/cpu-sh2a/cpu/ubc.h b/arch/sh/include/cpu-sh2a/cpu/ubc.h
index 8ce2fc1c..1192e1c 100644
--- a/arch/sh/include/cpu-sh2a/cpu/ubc.h
+++ b/arch/sh/include/cpu-sh2a/cpu/ubc.h
@@ -1 +1,28 @@
-#include <cpu-sh2/cpu/ubc.h>
+/*
+ * SH-2A UBC definitions
+ *
+ * Copyright (C) 2008 Kieran Bingham
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __ASM_CPU_SH2A_UBC_H
+#define __ASM_CPU_SH2A_UBC_H
+
+#define UBC_BARA                0xfffc0400
+#define UBC_BAMRA               0xfffc0404
+#define UBC_BBRA                0xfffc04a0	/* 16 bit access */
+#define UBC_BDRA                0xfffc0408
+#define UBC_BDMRA               0xfffc040c
+
+#define UBC_BARB                0xfffc0410
+#define UBC_BAMRB               0xfffc0414
+#define UBC_BBRB                0xfffc04b0	/* 16 bit access */
+#define UBC_BDRB                0xfffc0418
+#define UBC_BDMRB               0xfffc041c
+
+#define UBC_BRCR                0xfffc04c0
+
+#endif /* __ASM_CPU_SH2A_UBC_H */
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 6d94725..9289ede 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -251,7 +251,8 @@ static void ubc_set_tracing(int asid, unsigned long pc)
 
 	if (current_cpu_data.type == CPU_SH7729 ||
 	    current_cpu_data.type == CPU_SH7710 ||
-	    current_cpu_data.type == CPU_SH7712) {
+	    current_cpu_data.type == CPU_SH7712 ||
+	    current_cpu_data.type == CPU_SH7203){
 		ctrl_outw(BBR_INST | BBR_READ | BBR_CPU, UBC_BBRA);
 		ctrl_outl(BRCR_PCBA | BRCR_PCTE, UBC_BRCR);
 	} else {
@@ -407,6 +408,7 @@ asmlinkage void break_point_trap(void)
 #else
 	ctrl_outw(0, UBC_BBRA);
 	ctrl_outw(0, UBC_BBRB);
+	ctrl_outl(0, UBC_BRCR);
 #endif
 	current->thread.ubc_pc = 0;
 	ubc_usercnt -= 1;
-- 
cgit v0.10.2


From ba0d474082dfa37fb0efd439b4d0c0234ceb1e80 Mon Sep 17 00:00:00 2001
From: Peter Griffin <pgriffin@mpc-data.co.uk>
Date: Fri, 8 May 2009 15:50:54 +0100
Subject: sh: Add ptrace support for NOMMU debugging

Signed-off-by: Peter Griffin <pgriffin@mpc-data.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index f7b22dd..3392e83 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -334,6 +334,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					[(addr - (long)&dummy->fpu) >> 2];
 		} else if (addr == (long) &dummy->u_fpvalid)
 			tmp = !!tsk_used_math(child);
+		else if (addr == PT_TEXT_ADDR)
+			tmp = child->mm->start_code;
+		else if (addr == PT_DATA_ADDR)
+			tmp = child->mm->start_data;
+		else if (addr == PT_TEXT_END_ADDR)
+			tmp = child->mm->end_code;
+		else if (addr == PT_TEXT_LEN)
+			tmp = child->mm->end_code - child->mm->start_code;
 		else
 			tmp = 0;
 		ret = put_user(tmp, datap);
-- 
cgit v0.10.2


From cd89436e54b29a07a383ee82f2f718d8c9d24cc4 Mon Sep 17 00:00:00 2001
From: Peter Griffin <pgriffin@mpc-data.co.uk>
Date: Fri, 8 May 2009 15:51:51 +0100
Subject: sh: Add UBC trap vector for SH2A

Signed-off-by: Peter Griffin <pgriffin@mpc-data.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 30ca9c5..67550d8 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -34,6 +34,7 @@
 # define TRAP_ILLEGAL_SLOT_INST	6
 # define TRAP_ADDRESS_ERROR	9
 # ifdef CONFIG_CPU_SH2A
+#  define TRAP_UBC		12
 #  define TRAP_FPU_ERROR	13
 #  define TRAP_DIVZERO_ERROR	17
 #  define TRAP_DIVOVF_ERROR	18
@@ -849,6 +850,10 @@ void __init trap_init(void)
 #endif
 #endif
 
+#ifdef TRAP_UBC
+	set_exception_table_vec(TRAP_UBC, break_point_trap);
+#endif
+
 	/* Setup VBR for boot cpu */
 	per_cpu_trap_init();
 }
-- 
cgit v0.10.2


From bf8b9a63c18a1a7777571650de0c9f4fd4368ca0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Fri, 8 May 2009 20:53:58 +0530
Subject: x86: msr-index.h remove duplicate MSR C001_0015 declaration

MSRC001_0015 Hardware Configuration Register (HWCR) is already defined
as MSR_K7_HWCR.

And HWCR is available for >= K7.

So MSR_K8_HWCR is not required and no-one is using it.

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc1..4d58d04 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,7 +121,6 @@
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
 #define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_HWCR			0xc0010015
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
-- 
cgit v0.10.2


From b4df0a6c9d88cfff77c73d33873cd60f9ab909b6 Mon Sep 17 00:00:00 2001
From: Sergey Lapin <slapin@ossfans.org>
Date: Fri, 8 May 2009 19:19:41 +0400
Subject: ASoC: AFEB9260 driver

ASoC driver for AT91SAM9260-based AFEB9260 board

Signed-off-by: Sergey Lapin <slapin@ossfans.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index a608d70..e720d5e 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -41,3 +41,11 @@ config SND_AT32_SOC_PLAYPAQ_SLAVE
           and FRAME signals on the PlayPaq.  Unless you want to play
           with the AT32 as the SSC master, you probably want to say N here,
           as this will give you better sound quality.
+
+config SND_AT91_SOC_AFEB9260
+	tristate "SoC Audio support for AFEB9260 board"
+	depends on ARCH_AT91 && MACH_AFEB9260 && SND_ATMEL_SOC
+	select SND_ATMEL_SOC_SSC
+	select SND_SOC_TLV320AIC23
+	help
+	  Say Y here to support sound on AFEB9260 board.
diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile
index f54a7cc..e7ea56b 100644
--- a/sound/soc/atmel/Makefile
+++ b/sound/soc/atmel/Makefile
@@ -13,3 +13,4 @@ snd-soc-playpaq-objs := playpaq_wm8510.o
 
 obj-$(CONFIG_SND_AT91_SOC_SAM9G20_WM8731) += snd-soc-sam9g20-wm8731.o
 obj-$(CONFIG_SND_AT32_SOC_PLAYPAQ) += snd-soc-playpaq.o
+obj-$(CONFIG_SND_AT91_SOC_AFEB9260) += snd-soc-afeb9260.o
diff --git a/sound/soc/atmel/snd-soc-afeb9260.c b/sound/soc/atmel/snd-soc-afeb9260.c
new file mode 100644
index 0000000..23349de
--- /dev/null
+++ b/sound/soc/atmel/snd-soc-afeb9260.c
@@ -0,0 +1,203 @@
+/*
+ * afeb9260.c  --  SoC audio for AFEB9260
+ *
+ * Copyright (C) 2009 Sergey Lapin <slapin@ossfans.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include <linux/atmel-ssc.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <asm/mach-types.h>
+#include <mach/hardware.h>
+#include <linux/gpio.h>
+
+#include "../codecs/tlv320aic23.h"
+#include "atmel-pcm.h"
+#include "atmel_ssc_dai.h"
+
+#define CODEC_CLOCK 	12000000
+
+static int afeb9260_hw_params(struct snd_pcm_substream *substream,
+			 struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int err;
+
+	/* Set codec DAI configuration */
+	err = snd_soc_dai_set_fmt(codec_dai,
+				  SND_SOC_DAIFMT_I2S|
+				  SND_SOC_DAIFMT_NB_IF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (err < 0) {
+		printk(KERN_ERR "can't set codec DAI configuration\n");
+		return err;
+	}
+
+	/* Set cpu DAI configuration */
+	err = snd_soc_dai_set_fmt(cpu_dai,
+				  SND_SOC_DAIFMT_I2S |
+				  SND_SOC_DAIFMT_NB_IF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (err < 0) {
+		printk(KERN_ERR "can't set cpu DAI configuration\n");
+		return err;
+	}
+
+	/* Set the codec system clock for DAC and ADC */
+	err =
+	    snd_soc_dai_set_sysclk(codec_dai, 0, CODEC_CLOCK, SND_SOC_CLOCK_IN);
+
+	if (err < 0) {
+		printk(KERN_ERR "can't set codec system clock\n");
+		return err;
+	}
+
+	return err;
+}
+
+static struct snd_soc_ops afeb9260_ops = {
+	.hw_params = afeb9260_hw_params,
+};
+
+static const struct snd_soc_dapm_widget tlv320aic23_dapm_widgets[] = {
+	SND_SOC_DAPM_HP("Headphone Jack", NULL),
+	SND_SOC_DAPM_LINE("Line In", NULL),
+	SND_SOC_DAPM_MIC("Mic Jack", NULL),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+	{"Headphone Jack", NULL, "LHPOUT"},
+	{"Headphone Jack", NULL, "RHPOUT"},
+
+	{"LLINEIN", NULL, "Line In"},
+	{"RLINEIN", NULL, "Line In"},
+
+	{"MICIN", NULL, "Mic Jack"},
+};
+
+static int afeb9260_tlv320aic23_init(struct snd_soc_codec *codec)
+{
+
+	/* Add afeb9260 specific widgets */
+	snd_soc_dapm_new_controls(codec, tlv320aic23_dapm_widgets,
+				  ARRAY_SIZE(tlv320aic23_dapm_widgets));
+
+	/* Set up afeb9260 specific audio path audio_map */
+	snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+
+	snd_soc_dapm_enable_pin(codec, "Headphone Jack");
+	snd_soc_dapm_enable_pin(codec, "Line In");
+	snd_soc_dapm_enable_pin(codec, "Mic Jack");
+
+	snd_soc_dapm_sync(codec);
+
+	return 0;
+}
+
+/* Digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link afeb9260_dai = {
+	.name = "TLV320AIC23",
+	.stream_name = "AIC23",
+	.cpu_dai = &atmel_ssc_dai[0],
+	.codec_dai = &tlv320aic23_dai,
+	.init = afeb9260_tlv320aic23_init,
+	.ops = &afeb9260_ops,
+};
+
+/* Audio machine driver */
+static struct snd_soc_card snd_soc_machine_afeb9260 = {
+	.name = "AFEB9260",
+	.platform = &atmel_soc_platform,
+	.dai_link = &afeb9260_dai,
+	.num_links = 1,
+};
+
+/* Audio subsystem */
+static struct snd_soc_device afeb9260_snd_devdata = {
+	.card = &snd_soc_machine_afeb9260,
+	.codec_dev = &soc_codec_dev_tlv320aic23,
+};
+
+static struct platform_device *afeb9260_snd_device;
+
+static int __init afeb9260_soc_init(void)
+{
+	int err;
+	struct device *dev;
+	struct atmel_ssc_info *ssc_p = afeb9260_dai.cpu_dai->private_data;
+	struct ssc_device *ssc = NULL;
+
+	if (!(machine_is_afeb9260()))
+		return -ENODEV;
+
+	ssc = ssc_request(0);
+	if (IS_ERR(ssc)) {
+		printk(KERN_ERR "ASoC: Failed to request SSC 0\n");
+		err = PTR_ERR(ssc);
+		ssc = NULL;
+		goto err_ssc;
+	}
+	ssc_p->ssc = ssc;
+
+	afeb9260_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!afeb9260_snd_device) {
+		printk(KERN_ERR "ASoC: Platform device allocation failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(afeb9260_snd_device, &afeb9260_snd_devdata);
+	afeb9260_snd_devdata.dev = &afeb9260_snd_device->dev;
+	err = platform_device_add(afeb9260_snd_device);
+	if (err)
+		goto err1;
+
+	dev = &afeb9260_snd_device->dev;
+
+	return 0;
+err1:
+	platform_device_del(afeb9260_snd_device);
+	platform_device_put(afeb9260_snd_device);
+err_ssc:
+	return err;
+
+}
+
+static void __exit afeb9260_soc_exit(void)
+{
+	platform_device_unregister(afeb9260_snd_device);
+}
+
+module_init(afeb9260_soc_init);
+module_exit(afeb9260_soc_exit);
+
+MODULE_AUTHOR("Sergey Lapin <slapin@ossfans.org>");
+MODULE_DESCRIPTION("ALSA SoC for AFEB9260");
+MODULE_LICENSE("GPL");
+
-- 
cgit v0.10.2


From 7fc23a5380797012e92a9633169440f2f4a21253 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:21 +0200
Subject: perf_counter: optimize perf_counter_task_tick()

perf_counter_task_tick() does way too much work to find out
there's nothing to do. Provide an easy short-circuit for the
normal case where there are no counters on the system.

[ Impact: micro-optimization ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090508170028.750619201@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 60e55f0..fdb0d24 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -39,6 +39,7 @@ int perf_max_counters __read_mostly = 1;
 static int perf_reserved_percpu __read_mostly;
 static int perf_overcommit __read_mostly = 1;
 
+static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_tracking __read_mostly;
 static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
@@ -1076,8 +1077,14 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 
 void perf_counter_task_tick(struct task_struct *curr, int cpu)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+
+	if (!atomic_read(&nr_counters))
+		return;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	ctx = &curr->perf_counter_ctx;
 
 	perf_counter_cpu_sched_out(cpuctx);
 	perf_counter_task_sched_out(curr, cpu);
@@ -1197,6 +1204,7 @@ static void free_counter(struct perf_counter *counter)
 {
 	perf_pending_sync(counter);
 
+	atomic_dec(&nr_counters);
 	if (counter->hw_event.mmap)
 		atomic_dec(&nr_mmap_tracking);
 	if (counter->hw_event.munmap)
@@ -2861,6 +2869,7 @@ done:
 
 	counter->pmu = pmu;
 
+	atomic_inc(&nr_counters);
 	if (counter->hw_event.mmap)
 		atomic_inc(&nr_mmap_tracking);
 	if (counter->hw_event.munmap)
-- 
cgit v0.10.2


From 3df5edad87a998273aa5a9a8c728c05d855ad00e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:22 +0200
Subject: perf_counter: rework ioctl()s

Corey noticed that ioctl()s on grouped counters didn't work on
the whole group. This extends the ioctl() interface to take a
second argument that is interpreted as a flags field. We then
provide PERF_IOC_FLAG_GROUP to toggle the behaviour.

Having this flag gives the greatest flexibility, allowing you
to individually enable/disable/reset counters in a group, or
all together.

[ Impact: fix group counter enable/disable semantics ]

Reported-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090508170028.837558214@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 00081d8..88f863e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -157,10 +157,14 @@ struct perf_counter_hw_event {
 /*
  * Ioctls that can be done on a perf counter fd:
  */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_ENABLE		_IOW('$', 0, u32)
+#define PERF_COUNTER_IOC_DISABLE	_IOW('$', 1, u32)
 #define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_RESET		_IOW('$', 3, u32)
+
+enum perf_counter_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
 
 /*
  * Structure of the page that can be mapped via mmap
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fdb0d24..f4883f1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -82,7 +82,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * add it straight to the context's counter list, or to the group
 	 * leader's sibling list:
 	 */
-	if (counter->group_leader == counter)
+	if (group_leader == counter)
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 	else {
 		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
@@ -385,24 +385,6 @@ static void perf_counter_disable(struct perf_counter *counter)
 	spin_unlock_irq(&ctx->lock);
 }
 
-/*
- * Disable a counter and all its children.
- */
-static void perf_counter_disable_family(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-
-	perf_counter_disable(counter);
-
-	/*
-	 * Lock the mutex to protect the list of children
-	 */
-	mutex_lock(&counter->mutex);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		perf_counter_disable(child);
-	mutex_unlock(&counter->mutex);
-}
-
 static int
 counter_sched_in(struct perf_counter *counter,
 		 struct perf_cpu_context *cpuctx,
@@ -753,24 +735,6 @@ static int perf_counter_refresh(struct perf_counter *counter, int refresh)
 	return 0;
 }
 
-/*
- * Enable a counter and all its children.
- */
-static void perf_counter_enable_family(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-
-	perf_counter_enable(counter);
-
-	/*
-	 * Lock the mutex to protect the list of children
-	 */
-	mutex_lock(&counter->mutex);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		perf_counter_enable(child);
-	mutex_unlock(&counter->mutex);
-}
-
 void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
@@ -1307,31 +1271,79 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 
 static void perf_counter_reset(struct perf_counter *counter)
 {
+	(void)perf_counter_read(counter);
 	atomic_set(&counter->count, 0);
+	perf_counter_update_userpage(counter);
+}
+
+static void perf_counter_for_each_sibling(struct perf_counter *counter,
+					  void (*func)(struct perf_counter *))
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *sibling;
+
+	spin_lock_irq(&ctx->lock);
+	counter = counter->group_leader;
+
+	func(counter);
+	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+		func(sibling);
+	spin_unlock_irq(&ctx->lock);
+}
+
+static void perf_counter_for_each_child(struct perf_counter *counter,
+					void (*func)(struct perf_counter *))
+{
+	struct perf_counter *child;
+
+	mutex_lock(&counter->mutex);
+	func(counter);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		func(child);
+	mutex_unlock(&counter->mutex);
+}
+
+static void perf_counter_for_each(struct perf_counter *counter,
+				  void (*func)(struct perf_counter *))
+{
+	struct perf_counter *child;
+
+	mutex_lock(&counter->mutex);
+	perf_counter_for_each_sibling(counter, func);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_for_each_sibling(child, func);
+	mutex_unlock(&counter->mutex);
 }
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
-	int err = 0;
+	void (*func)(struct perf_counter *);
+	u32 flags = arg;
 
 	switch (cmd) {
 	case PERF_COUNTER_IOC_ENABLE:
-		perf_counter_enable_family(counter);
+		func = perf_counter_enable;
 		break;
 	case PERF_COUNTER_IOC_DISABLE:
-		perf_counter_disable_family(counter);
-		break;
-	case PERF_COUNTER_IOC_REFRESH:
-		err = perf_counter_refresh(counter, arg);
+		func = perf_counter_disable;
 		break;
 	case PERF_COUNTER_IOC_RESET:
-		perf_counter_reset(counter);
+		func = perf_counter_reset;
 		break;
+
+	case PERF_COUNTER_IOC_REFRESH:
+		return perf_counter_refresh(counter, arg);
 	default:
-		err = -ENOTTY;
+		return -ENOTTY;
 	}
-	return err;
+
+	if (flags & PERF_IOC_FLAG_GROUP)
+		perf_counter_for_each(counter, func);
+	else
+		perf_counter_for_each_child(counter, func);
+
+	return 0;
 }
 
 /*
-- 
cgit v0.10.2


From a85f61abe11a46553c4562e74edb27ebc782aeb7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:23 +0200
Subject: perf_counter: add PERF_RECORD_CONFIG

Much like CONFIG_RECORD_GROUP records the hw_event.config to
identify the values, allow to record this for all counters.

[ Impact: extend perfcounter output record format ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090508170028.923228280@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 88f863e..0e6303d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -104,6 +104,7 @@ enum perf_counter_record_format {
 	PERF_RECORD_ADDR	= 1U << 3,
 	PERF_RECORD_GROUP	= 1U << 4,
 	PERF_RECORD_CALLCHAIN	= 1U << 5,
+	PERF_RECORD_CONFIG	= 1U << 6,
 };
 
 /*
@@ -258,6 +259,7 @@ enum perf_event_type {
 	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
 	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
+	 * 	{ u64			config;   } && PERF_RECORD_CONFIG
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f4883f1..c615f52 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1994,6 +1994,11 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
+	if (record_type & PERF_RECORD_CONFIG) {
+		header.type |= PERF_RECORD_CONFIG;
+		header.size += sizeof(u64);
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -2029,6 +2034,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_ADDR)
 		perf_output_put(&handle, addr);
 
+	if (record_type & PERF_RECORD_CONFIG)
+		perf_output_put(&handle, counter->hw_event.config);
+
 	/*
 	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
 	 */
-- 
cgit v0.10.2


From f370e1e2f195ec1e6420e26fc83e0319595db578 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:24 +0200
Subject: perf_counter: add PERF_RECORD_CPU

Allow recording the CPU number the event was generated on.

RFC: this leaves a u32 as reserved, should we fill in the
     node_id() there, or leave this open for future extention,
     as userspace can already easily do the cpu->node mapping
     if needed.

[ Impact: extend perfcounter output record format ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090508170029.008627711@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0e6303d..614f921 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -105,6 +105,7 @@ enum perf_counter_record_format {
 	PERF_RECORD_GROUP	= 1U << 4,
 	PERF_RECORD_CALLCHAIN	= 1U << 5,
 	PERF_RECORD_CONFIG	= 1U << 6,
+	PERF_RECORD_CPU		= 1U << 7,
 };
 
 /*
@@ -260,6 +261,7 @@ enum perf_event_type {
 	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
 	 * 	{ u64			config;   } && PERF_RECORD_CONFIG
+	 * 	{ u32			cpu, res; } && PERF_RECORD_CPU
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c615f52..d850a1f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1956,6 +1956,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
 	u64 time;
+	struct {
+		u32 cpu, reserved;
+	} cpu_entry;
 
 	header.type = 0;
 	header.size = sizeof(header);
@@ -1999,6 +2002,13 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
+	if (record_type & PERF_RECORD_CPU) {
+		header.type |= PERF_RECORD_CPU;
+		header.size += sizeof(cpu_entry);
+
+		cpu_entry.cpu = raw_smp_processor_id();
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -2037,6 +2047,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_CONFIG)
 		perf_output_put(&handle, counter->hw_event.config);
 
+	if (record_type & PERF_RECORD_CPU)
+		perf_output_put(&handle, cpu_entry);
+
 	/*
 	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
 	 */
-- 
cgit v0.10.2


From 29f93943d1916d1a3faa3f10f4a06994347ac990 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 8 May 2009 16:06:47 -0400
Subject: tracing: initialize return value for __ftrace_set_clr_event

Commit 8f31bfe538ebafac187d2d4465a92e1d9ee6d8c2
tracing/events: clean up for ftrace_set_clr_event()

Moved out the code for ftrace_set_clr_event into a helper funciton but
did not initialize the return value. As a result, we do not warn about
a typo in the echoing of events in set_event.

This patch restores the old warning:

 # echo foobar > set_event
-bash: echo: write error: Invalid argument

[ Impact: restore warning of invalid entries to set_event ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index df394bc..2eecb87 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -118,7 +118,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
 				  const char *event, int set)
 {
 	struct ftrace_event_call *call;
-	int ret;
+	int ret = -EINVAL;
 
 	mutex_lock(&event_mutex);
 	list_for_each_entry(call, &ftrace_events, list) {
-- 
cgit v0.10.2


From 4671c79408a3f8a5a6a45e39c4c164dada3a5678 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 8 May 2009 16:27:41 -0400
Subject: tracing: add trace_set_clr_event to export event enabling function

Other parts of the kernel may need to be able to enable or disable
specific events. Especially parts that create trace events.

[ Impact: allow enabling of trace events by those that create the event ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 662c1be..bae51dd 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -127,6 +127,8 @@ extern int trace_define_field(struct ftrace_event_call *call, char *type,
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
 
+int trace_set_clr_event(const char *system, const char *event, int set);
+
 /*
  * The double __builtin_constant_p is because gcc will give us an error
  * if we try to allocate the static variable to fmt if it is not a
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2eecb87..0eec0c5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -177,6 +177,23 @@ static int ftrace_set_clr_event(char *buf, int set)
 	return __ftrace_set_clr_event(match, sub, event, set);
 }
 
+/**
+ * trace_set_clr_event - enable or disable an event
+ * @system: system name to match (NULL for any system)
+ * @event: event name to match (NULL for all events, within system)
+ * @set: 1 to enable, 0 to disable
+ *
+ * This is a way for other parts of the kernel to enable or disable
+ * event recording.
+ *
+ * Returns 0 on success, -EINVAL if the parameters do not match any
+ * registered events.
+ */
+int trace_set_clr_event(const char *system, const char *event, int set)
+{
+	return __ftrace_set_clr_event(NULL, system, event, set);
+}
+
 /* 128 should be much more than enough */
 #define EVENT_BUF_SIZE		127
 
-- 
cgit v0.10.2


From 6cac5a924668a56c7ccefc345805f1fe0536a90e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Sun, 29 Mar 2009 19:56:29 -0700
Subject: xen/x86-64: fix breakpoints and hardware watchpoints

Native x86-64 uses the IST mechanism to run int3 and debug traps on
an alternative stack.  Xen does not do this, and so the frames were
being misinterpreted by the ptrace code.  This change special-cases
these two exceptions by using Xen variants which run on the normal
kernel stack properly.

Impact: avoid crash or bad data when IST trap is invoked under Xen
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0d53425..c44e500 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,6 +13,9 @@ asmlinkage void divide_error(void);
 asmlinkage void debug(void);
 asmlinkage void nmi(void);
 asmlinkage void int3(void);
+asmlinkage void xen_debug(void);
+asmlinkage void xen_int3(void);
+asmlinkage void xen_stack_segment(void);
 asmlinkage void overflow(void);
 asmlinkage void bounds(void);
 asmlinkage void invalid_op(void);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6..bb01ce0 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1379,6 +1379,11 @@ END(xen_failsafe_callback)
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
+#ifdef CONFIG_XEN
+zeroentry xen_debug do_debug
+zeroentry xen_int3 do_int3
+errorentry xen_stack_segment do_stack_segment
+#endif
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 12a3159..7566e13 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/start_kernel.h>
 #include <linux/sched.h>
+#include <linux/kprobes.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -44,6 +45,7 @@
 #include <asm/processor.h>
 #include <asm/proto.h>
 #include <asm/msr-index.h>
+#include <asm/traps.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
@@ -428,11 +430,26 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
+	unsigned long addr;
+
 	if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
 		return 0;
 
 	info->vector = vector;
-	info->address = gate_offset(*val);
+
+	addr = gate_offset(*val);
+#ifdef CONFIG_X86_64
+	if (addr == (unsigned long)debug)
+		addr = (unsigned long)xen_debug;
+	else if (addr == (unsigned long)int3)
+		addr = (unsigned long)xen_int3;
+	else if (addr == (unsigned long)stack_segment)
+		addr = (unsigned long)xen_stack_segment;
+	else
+		WARN_ON(val->ist != 0);
+#endif	/* CONFIG_X86_64 */
+	info->address = addr;
+
 	info->cs = gate_segment(*val);
 	info->flags = val->dpl;
 	/* interrupt gates clear IF */
-- 
cgit v0.10.2


From b80119bb35a49a4e8dbfb9708872adfd5cf38dee Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 24 Apr 2009 00:22:08 -0700
Subject: xen/x86-64: clean up warnings about IST-using traps

Ignore known IST-using traps.  Aside from the debugger traps, they're
low-level faults which Xen will handle for us, so the kernel needn't
worry about them.  Keep warning in case unknown trap starts using IST.

Impact: suppress spurious warnings
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7566e13..e9df942 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -439,14 +439,32 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 
 	addr = gate_offset(*val);
 #ifdef CONFIG_X86_64
+	/*
+	 * Look for known traps using IST, and substitute them
+	 * appropriately.  The debugger ones are the only ones we care
+	 * about.  Xen will handle faults like double_fault and
+	 * machine_check, so we should never see them.  Warn if
+	 * there's an unexpected IST-using fault handler.
+	 */
 	if (addr == (unsigned long)debug)
 		addr = (unsigned long)xen_debug;
 	else if (addr == (unsigned long)int3)
 		addr = (unsigned long)xen_int3;
 	else if (addr == (unsigned long)stack_segment)
 		addr = (unsigned long)xen_stack_segment;
-	else
-		WARN_ON(val->ist != 0);
+	else if (addr == (unsigned long)double_fault ||
+		 addr == (unsigned long)nmi) {
+		/* Don't need to handle these */
+		return 0;
+#ifdef CONFIG_X86_MCE
+	} else if (addr == (unsigned long)machine_check) {
+		return 0;
+#endif
+	} else {
+		/* Some other trap using IST? */
+		if (WARN_ON(val->ist != 0))
+			return 0;
+	}
 #endif	/* CONFIG_X86_64 */
 	info->address = addr;
 
-- 
cgit v0.10.2


From a789ed5fb6d0256c4177c2cc27e06520ddbe4d4c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 24 Apr 2009 00:26:50 -0700
Subject: xen: cache cr0 value to avoid trap'n'emulate for read_cr0

stts() is implemented in terms of read_cr0/write_cr0 to update the
state of the TS bit.  This happens during context switch, and so
is fairly performance critical.  Rather than falling back to
a trap-and-emulate native read_cr0, implement our own by caching
the last-written value from write_cr0 (the TS bit is the only one
we really care about).

Impact: optimise Xen context switches
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e9df942..0a1700a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -658,10 +658,26 @@ static void xen_clts(void)
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
+static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
+
+static unsigned long xen_read_cr0(void)
+{
+	unsigned long cr0 = percpu_read(xen_cr0_value);
+
+	if (unlikely(cr0 == 0)) {
+		cr0 = native_read_cr0();
+		percpu_write(xen_cr0_value, cr0);
+	}
+
+	return cr0;
+}
+
 static void xen_write_cr0(unsigned long cr0)
 {
 	struct multicall_space mcs;
 
+	percpu_write(xen_cr0_value, cr0);
+
 	/* Only pay attention to cr0.TS; everything else is
 	   ignored. */
 	mcs = xen_mc_entry(0);
@@ -847,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
 	.clts = xen_clts,
 
-	.read_cr0 = native_read_cr0,
+	.read_cr0 = xen_read_cr0,
 	.write_cr0 = xen_write_cr0,
 
 	.read_cr4 = native_read_cr4,
-- 
cgit v0.10.2


From 0b4eb462da10f832b28d518abffa4d77805928a0 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 30 Apr 2009 17:59:36 -0700
Subject: x86, boot: align the .bss section in the decompressor

Aligning the .bss section makes it trivial to use large operation
sizes for moving the initialized sections and clearing the .bss.
The alignment chosen (L1 cache) is somewhat arbitrary, but should be
large enough to avoid all known performance traps and small enough to
not cause troubles.

[ Impact: trivial performance enhancement, future patch prep	]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 0d26c92..dbe515e 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -42,6 +42,7 @@ SECTIONS
 		*(.data.*)
 		_edata = . ;
 	}
+	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
 	.bss : {
 		_bss = . ;
 		*(.bss)
-- 
cgit v0.10.2


From d3dd3b5a29bb9582957451531fed461628dfc834 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 5 May 2009 21:17:15 -0700
Subject: kbuild: allow compressors (gzip, bzip2, lzma) to take multiple inputs

Allow the compression commands in Kbuild (i.e. gzip, bzip2, lzma) to
take multiple input files and emit the concatenated compressed
output.  This avoids an intermediate step when a kernel image is built
from multiple components, such as the relocatable x86-32 kernel.

Sam Ravnborg integrated the bin_size script into the Makefile.

[ Impact: new build feature, not yet used ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 9796195..f8cf938 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -183,20 +183,34 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@
 # ---------------------------------------------------------------------------
 
 quiet_cmd_gzip = GZIP    $@
-cmd_gzip = gzip -f -9 < $< > $@
+cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \
+	(rm -f $@ ; false)
 
 
 # Bzip2
 # ---------------------------------------------------------------------------
 
-# Bzip2 does not include size in file... so we have to fake that
-size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size
-
-quiet_cmd_bzip2 = BZIP2    $@
-cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false)
+# Bzip2 and LZMA do not include size in file... so we have to fake that;
+# append the size as a 32-bit littleendian number as gzip does.
+size_append = echo -ne $(shell						\
+dec_size=0;								\
+for F in $1; do								\
+	fsize=$$(stat -c "%s" $$F);					\
+	dec_size=$$(expr $$dec_size + $$fsize);				\
+done;									\
+printf "%08x" $$dec_size |						\
+	sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'	\
+)
+
+quiet_cmd_bzip2 = BZIP2   $@
+cmd_bzip2 = (cat $(filter-out FORCE,$^) | \
+	bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
 
 # Lzma
 # ---------------------------------------------------------------------------
 
 quiet_cmd_lzma = LZMA    $@
-cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false)
+cmd_lzma = (cat $(filter-out FORCE,$^) | \
+	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
diff --git a/scripts/bin_size b/scripts/bin_size
deleted file mode 100644
index 43e1b36..0000000
--- a/scripts/bin_size
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-if [ $# = 0 ] ; then
-   echo Usage: $0 file
-fi
-
-size_dec=`stat -c "%s" $1`
-size_hex_echo_string=`printf "%08x" $size_dec |
-     sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'`
-/bin/echo -ne $size_hex_echo_string
-- 
cgit v0.10.2


From 845adf7266a7ba6970bf982ffd96abc60d2018ab Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 5 May 2009 21:20:51 -0700
Subject: x86: add a Kconfig symbol for when relocations are needed

We only need to build relocations when we are building a 32-bit
relocatable kernel.  Rather than unnecessarily complicating the
Makefiles, make an explicit Kbuild symbol for this.

[ Impact: permits future cleanup ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Sam Ravnborg <sam@ravnborg.org>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 039c3f0..5aee453 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1513,6 +1513,11 @@ config RELOCATABLE
 	  it has been loaded at and the compile time physical address
 	  (CONFIG_PHYSICAL_START) is ignored.
 
+# Relocation on x86-32 needs some additional build support
+config X86_NEED_RELOCS
+	def_bool y
+	depends on X86_32 && RELOCATABLE
+
 config PHYSICAL_ALIGN
 	hex
 	prompt "Alignment value to which kernel should be aligned" if X86_32
-- 
cgit v0.10.2


From 5f11e02019ef44f041e6e38a1363fa2fd4b8785d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 5 May 2009 22:53:11 -0700
Subject: x86, boot: simplify arch/x86/boot/compressed/Makefile

Simplify the arch/x86/boot/compressed/Makefile, by using the new
capability of specifying multiple inputs to a compressor, and the
CONFIG_X86_NEED_RELOCS Kconfig symbol.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 0f4b5e2..b35c3bb 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -29,7 +29,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 
 
 targets += vmlinux.bin.all vmlinux.relocs relocs
-hostprogs-$(CONFIG_X86_32) += relocs
+hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
 
 quiet_cmd_relocs = RELOCS  $@
       cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@@ -37,46 +37,19 @@ $(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
 	$(call if_changed,relocs)
 
 vmlinux.bin.all-y := $(obj)/vmlinux.bin
-vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
-quiet_cmd_relocbin = BUILD   $@
-      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
-$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
-	$(call if_changed,relocbin)
+vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
 
-ifeq ($(CONFIG_X86_32),y)
-
-ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
-	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
-	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
-	$(call if_changed,lzma)
-else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,lzma)
-endif
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
-
-else
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzma)
 
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-endif
-
 suffix_$(CONFIG_KERNEL_GZIP)  = gz
 suffix_$(CONFIG_KERNEL_BZIP2) = bz2
 suffix_$(CONFIG_KERNEL_LZMA)  = lzma
 
+LDFLAGS_piggy.o := -r --format binary --oformat $(CONFIG_OUTPUT_FORMAT) -T
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
 	$(call if_changed,ld)
-- 
cgit v0.10.2


From 283ab1c0bd462dd0b179393fb081a626f6687413 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 15:32:47 -0700
Subject: x86, boot: follow standard Kbuild style for compression suffix

When generating the compression suffix in
arch/x86/boot/compressed/Makefile, follow standard Kbuild
conventions, that is:

- Use a dash not underscore before y/m/n endings
- Use := whenever possible.

Requested-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index b35c3bb..7f24fdb 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -46,10 +46,10 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
 $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzma)
 
-suffix_$(CONFIG_KERNEL_GZIP)  = gz
-suffix_$(CONFIG_KERNEL_BZIP2) = bz2
-suffix_$(CONFIG_KERNEL_LZMA)  = lzma
+suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
+suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
 
 LDFLAGS_piggy.o := -r --format binary --oformat $(CONFIG_OUTPUT_FORMAT) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
 	$(call if_changed,ld)
-- 
cgit v0.10.2


From bd2a36984c50bb546a7d04cb395fddcf98a1092c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 5 May 2009 23:24:50 -0700
Subject: x86, boot: use BP_scratch in arch/x86/boot/compressed/head_*.S

Use the BP_scratch symbol from asm-offsets.h instead of hard-coding
the location.

[ Impact: cleanup ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 85bd328..e3398f3 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -53,7 +53,7 @@ ENTRY(startup_32)
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-	leal (0x1e4+4)(%esi), %esp
+	leal (BP_scratch+4)(%esi), %esp
 	call 1f
 1:	popl %ebp
 	subl $1b, %ebp
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index ed4a829..06cc7e5 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -56,7 +56,7 @@ ENTRY(startup_32)
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-	leal	(0x1e4+4)(%esi), %esp
+	leal	(BP_scratch+4)(%esi), %esp
 	call	1f
 1:	popl	%ebp
 	subl	$1b, %ebp
-- 
cgit v0.10.2


From 5f64ec64e7f9b246c0a94f34cdf7782f98a6e55d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 15:45:17 -0700
Subject: x86, boot: stylistic cleanups for boot/compressed/head_32.S

Reformat arch/x86/boot/compressed/head_32.S to be closer to currently
preferred kernel assembly style, that is:

- opcode and operand separated by tab
- operands separated by ", "
- C-style comments

This also makes it more similar to head_64.S.

[ Impact: cleanup, no object code change ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index e3398f3..7bd7766 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -12,16 +12,16 @@
  * the page directory. [According to comments etc elsewhere on a compressed
  * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
  *
- * Page 0 is deliberately kept safe, since System Management Mode code in 
+ * Page 0 is deliberately kept safe, since System Management Mode code in
  * laptops may need to access the BIOS data stored there.  This is also
- * useful for future device drivers that either access the BIOS via VM86 
+ * useful for future device drivers that either access the BIOS via VM86
  * mode.
  */
 
 /*
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
-.text
+	.text
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
@@ -29,75 +29,80 @@
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
 
-.section ".text.head","ax",@progbits
+	.section ".text.head","ax",@progbits
 ENTRY(startup_32)
 	cld
-	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
-	 * us to not reload segments */
-	testb $(1<<6), BP_loadflags(%esi)
-	jnz 1f
+	/*
+	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+	 * us to not reload segments
+	 */
+	testb	$(1<<6), BP_loadflags(%esi)
+	jnz	1f
 
 	cli
-	movl $(__BOOT_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-	movl %eax,%fs
-	movl %eax,%gs
-	movl %eax,%ss
+	movl	$__BOOT_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %fs
+	movl	%eax, %gs
+	movl	%eax, %ss
 1:
 
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
  * at and where we were actually loaded at.  This can only be done
  * with a short local call on x86.  Nothing  else will tell us what
  * address we are running at.  The reserved chunk of the real-mode
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-	leal (BP_scratch+4)(%esi), %esp
-	call 1f
-1:	popl %ebp
-	subl $1b, %ebp
+	leal	(BP_scratch+4)(%esi), %esp
+	call	1f
+1:	popl	%ebp
+	subl	$1b, %ebp
 
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+/*
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
  * contains the address where we should move the kernel image temporarily
  * for safe in-place decompression.
  */
 
 #ifdef CONFIG_RELOCATABLE
-	movl 	%ebp, %ebx
+	movl	%ebp, %ebx
 	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
 	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
 #else
-	movl $LOAD_PHYSICAL_ADDR, %ebx
+	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
 	/* Replace the compressed data size with the uncompressed size */
-	subl input_len(%ebp), %ebx
-	movl output_len(%ebp), %eax
-	addl %eax, %ebx
+	subl	input_len(%ebp), %ebx
+	movl	output_len(%ebp), %eax
+	addl	%eax, %ebx
 	/* Add 8 bytes for every 32K input block */
-	shrl $12, %eax
-	addl %eax, %ebx
+	shrl	$12, %eax
+	addl	%eax, %ebx
 	/* Add 32K + 18 bytes of extra slack */
-	addl $(32768 + 18), %ebx
+	addl	$(32768 + 18), %ebx
 	/* Align on a 4K boundary */
-	addl $4095, %ebx
-	andl $~4095, %ebx
+	addl	$4095, %ebx
+	andl	$~4095, %ebx
 
-/* Copy the compressed kernel to the end of our buffer
+/*
+ * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-	pushl %esi
-	leal _ebss(%ebp), %esi
-	leal _ebss(%ebx), %edi
-	movl $(_ebss - startup_32), %ecx
+	pushl	%esi
+	leal	_ebss(%ebp), %esi
+	leal	_ebss(%ebx), %edi
+	movl	$(_ebss - startup_32), %ecx
 	std
-	rep
-	movsb
+	rep	movsb
 	cld
-	popl %esi
+	popl	%esi
 
-/* Compute the kernel start address.
+/*
+ * Compute the kernel start address.
  */
 #ifdef CONFIG_RELOCATABLE
 	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
@@ -109,81 +114,84 @@ ENTRY(startup_32)
 /*
  * Jump to the relocated address.
  */
-	leal relocated(%ebx), %eax
-	jmp *%eax
+	leal	relocated(%ebx), %eax
+	jmp	*%eax
 ENDPROC(startup_32)
 
-.section ".text"
+	.text
 relocated:
 
 /*
  * Clear BSS
  */
-	xorl %eax,%eax
-	leal _edata(%ebx),%edi
-	leal _ebss(%ebx), %ecx
-	subl %edi,%ecx
+	xorl	%eax, %eax
+	leal	_edata(%ebx), %edi
+	leal	_ebss(%ebx), %ecx
+	subl	%edi, %ecx
 	cld
-	rep
-	stosb
+	rep	stosb
 
 /*
  * Setup the stack for the decompressor
  */
-	leal boot_stack_end(%ebx), %esp
+	leal	boot_stack_end(%ebx), %esp
 
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	movl output_len(%ebx), %eax
-	pushl %eax
-			# push arguments for decompress_kernel:
-	pushl %ebp	# output address
-	movl input_len(%ebx), %eax
-	pushl %eax	# input_len
-	leal input_data(%ebx), %eax
-	pushl %eax	# input_data
-	leal boot_heap(%ebx), %eax
-	pushl %eax	# heap area
-	pushl %esi	# real mode pointer
-	call decompress_kernel
-	addl $20, %esp
-	popl %ecx
+	movl	output_len(%ebx), %eax
+	pushl	%eax
+				/* push arguments for decompress_kernel: */
+	pushl	%ebp		/* output address */
+	movl	input_len(%ebx), %eax
+	pushl	%eax		/* input_len */
+	leal	input_data(%ebx), %eax
+	pushl	%eax		/* input_data */
+	leal	boot_heap(%ebx), %eax
+	pushl	%eax		/* heap area */
+	pushl	%esi		/* real mode pointer */
+	call	decompress_kernel
+	addl	$20, %esp
+	popl	%ecx
 
 #if CONFIG_RELOCATABLE
-/* Find the address of the relocations.
+/*
+ * Find the address of the relocations.
  */
-	movl %ebp, %edi
-	addl %ecx, %edi
+	movl	%ebp, %edi
+	addl	%ecx, %edi
 
-/* Calculate the delta between where vmlinux was compiled to run
+/*
+ * Calculate the delta between where vmlinux was compiled to run
  * and where it was actually loaded.
  */
-	movl %ebp, %ebx
-	subl $LOAD_PHYSICAL_ADDR, %ebx
-	jz   2f		/* Nothing to be done if loaded at compiled addr. */
+	movl	%ebp, %ebx
+	subl	$LOAD_PHYSICAL_ADDR, %ebx
+	jz	2f	/* Nothing to be done if loaded at compiled addr. */
 /*
  * Process relocations.
  */
 
-1:	subl $4, %edi
-	movl 0(%edi), %ecx
-	testl %ecx, %ecx
-	jz 2f
-	addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
-	jmp 1b
+1:	subl	$4, %edi
+	movl	(%edi), %ecx
+	testl	%ecx, %ecx
+	jz	2f
+	addl	%ebx, -__PAGE_OFFSET(%ebx, %ecx)
+	jmp	1b
 2:
 #endif
 
 /*
  * Jump to the decompressed kernel.
  */
-	xorl %ebx,%ebx
-	jmp *%ebp
+	xorl	%ebx, %ebx
+	jmp	*%ebp
 
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+	.bss
+	.balign 4
 boot_heap:
 	.fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
-- 
cgit v0.10.2


From b40d68d5b5b799caaf99d2e073e62962e6d917ce Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 15:59:13 -0700
Subject: x86, boot: stylistic cleanups for boot/compressed/head_64.S

Clean up style issues in arch/x86/boot/compressed/head_64.S.  This
file had a lot fewer style issues than its 32-bit cousin, but the ones
it has are worth fixing, especially since it makes the two files more
similar.

[ Impact: cleanup, no object code change ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 06cc7e5..26c3def 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -21,8 +21,8 @@
 /*
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
-.code32
-.text
+	.code32
+	.text
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
@@ -33,12 +33,14 @@
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 
-.section ".text.head"
+	.section ".text.head"
 	.code32
 ENTRY(startup_32)
 	cld
-	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
-	 * us to not reload segments */
+	/*
+	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+	 * us to not reload segments
+	 */
 	testb $(1<<6), BP_loadflags(%esi)
 	jnz 1f
 
@@ -49,7 +51,8 @@ ENTRY(startup_32)
 	movl	%eax, %ss
 1:
 
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
  * at and where we were actually loaded at.  This can only be done
  * with a short local call on x86.  Nothing  else will tell us what
  * address we are running at.  The reserved chunk of the real-mode
@@ -70,10 +73,11 @@ ENTRY(startup_32)
 	testl	%eax, %eax
 	jnz	no_longmode
 
-/* Compute the delta between where we were compiled to run at
+/*
+ * Compute the delta between where we were compiled to run at
  * and where the code will actually run at.
- */
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
  * contains the address where we should move the kernel image temporarily
  * for safe in-place decompression.
  */
@@ -114,7 +118,7 @@ ENTRY(startup_32)
  /*
   * Build early 4G boot pagetable
   */
-	/* Initialize Page tables to 0*/
+	/* Initialize Page tables to 0 */
 	leal	pgtable(%ebx), %edi
 	xorl	%eax, %eax
 	movl	$((4096*6)/4), %ecx
@@ -155,7 +159,8 @@ ENTRY(startup_32)
 	btsl	$_EFER_LME, %eax
 	wrmsr
 
-	/* Setup for the jump to 64bit mode
+	/*
+	 * Setup for the jump to 64bit mode
 	 *
 	 * When the jump is performend we will be in long mode but
 	 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
@@ -184,7 +189,8 @@ no_longmode:
 
 #include "../../kernel/verify_cpu_64.S"
 
-	/* Be careful here startup_64 needs to be at a predictable
+	/*
+	 * Be careful here startup_64 needs to be at a predictable
 	 * address so I can export it in an ELF header.  Bootloaders
 	 * should look at the ELF header to find this address, as
 	 * it may change in the future.
@@ -192,7 +198,8 @@ no_longmode:
 	.code64
 	.org 0x200
 ENTRY(startup_64)
-	/* We come here either from startup_32 or directly from a
+	/*
+	 * We come here either from startup_32 or directly from a
 	 * 64bit bootloader.  If we come here from a bootloader we depend on
 	 * an identity mapped page table being provied that maps our
 	 * entire text+data+bss and hopefully all of memory.
@@ -209,7 +216,8 @@ ENTRY(startup_64)
 	movl    $0x20, %eax
 	ltr	%ax
 
-	/* Compute the decompressed kernel start address.  It is where
+	/*
+	 * Compute the decompressed kernel start address.  It is where
 	 * we were loaded at aligned to a 2M boundary. %rbp contains the
 	 * decompressed kernel start address.
 	 *
@@ -241,7 +249,8 @@ ENTRY(startup_64)
 	addq	$(32768 + 18 + 4095), %rbx
 	andq	$~4095, %rbx
 
-/* Copy the compressed kernel to the end of our buffer
+/*
+ * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
 	leaq	_end_before_pgt(%rip), %r8
@@ -260,7 +269,7 @@ ENTRY(startup_64)
 	leaq	relocated(%rbx), %rax
 	jmp	*%rax
 
-.section ".text"
+	.text
 relocated:
 
 /*
@@ -271,8 +280,7 @@ relocated:
 	leaq    _end_before_pgt(%rbx), %rcx
 	subq	%rdi, %rcx
 	cld
-	rep
-	stosb
+	rep	stosb
 
 	/* Setup the stack */
 	leaq	boot_stack_end(%rip), %rsp
@@ -311,9 +319,11 @@ gdt:
 	.quad   0x0000000000000000	/* TS continued */
 gdt_end:
 
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+	.bss
+	.balign 4
 boot_heap:
 	.fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
-- 
cgit v0.10.2


From 5b11f1cee5797b38d16b94d8745b12b6727a8373 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 16:20:34 -0700
Subject: x86, boot: straighten out ranges to copy/zero in compressed/head*.S

Both on 32 and 64 bits, we copy all the way up to the end of bss,
except that on 64 bits there is a hack to avoid copying on top of the
page tables.  There is no point in copying bss at all, especially
since we are just about to zero it all anyway.

To clean up and unify the handling, we now do:

  - copy from startup_32 to _bss.
  - zero from _bss to _ebss.
  - the _ebss symbol is aligned to an 8-byte boundary.
  - the page tables are moved to a separate section.

Use _bss as the copy endpoint since _edata may be misaligned.

[ Impact: cleanup, trivial performance improvement ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 7bd7766..59425e1 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -93,9 +93,9 @@ ENTRY(startup_32)
  * where decompression in place becomes safe.
  */
 	pushl	%esi
-	leal	_ebss(%ebp), %esi
-	leal	_ebss(%ebx), %edi
-	movl	$(_ebss - startup_32), %ecx
+	leal	_bss(%ebp), %esi
+	leal	_bss(%ebx), %edi
+	movl	$(_bss - startup_32), %ecx
 	std
 	rep	movsb
 	cld
@@ -125,7 +125,7 @@ relocated:
  * Clear BSS
  */
 	xorl	%eax, %eax
-	leal	_edata(%ebx), %edi
+	leal	_bss(%ebx), %edi
 	leal	_ebss(%ebx), %ecx
 	subl	%edi, %ecx
 	cld
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 26c3def..5bc9052 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -253,9 +253,9 @@ ENTRY(startup_64)
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-	leaq	_end_before_pgt(%rip), %r8
-	leaq	_end_before_pgt(%rbx), %r9
-	movq	$_end_before_pgt /* - $startup_32 */, %rcx
+	leaq	_bss(%rip), %r8
+	leaq	_bss(%rbx), %r9
+	movq	$_bss /* - $startup_32 */, %rcx
 1:	subq	$8, %r8
 	subq	$8, %r9
 	movq	0(%r8), %rax
@@ -276,8 +276,8 @@ relocated:
  * Clear BSS
  */
 	xorq	%rax, %rax
-	leaq    _edata(%rbx), %rdi
-	leaq    _end_before_pgt(%rbx), %rcx
+	leaq    _bss(%rbx), %rdi
+	leaq    _ebss(%rbx), %rcx
 	subq	%rdi, %rcx
 	cld
 	rep	stosb
@@ -329,3 +329,11 @@ boot_heap:
 boot_stack:
 	.fill BOOT_STACK_SIZE, 1, 0
 boot_stack_end:
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+	.section ".pgtable","a",@nobits
+	.balign 4096
+pgtable:
+	.fill 6*4096, 1, 0
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index dbe515e..cc353e1 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -2,6 +2,8 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
 
 #undef i386
 
+#include <asm/page_types.h>
+
 #ifdef CONFIG_X86_64
 OUTPUT_ARCH(i386:x86-64)
 ENTRY(startup_64)
@@ -48,13 +50,16 @@ SECTIONS
 		*(.bss)
 		*(.bss.*)
 		*(COMMON)
-#ifdef CONFIG_X86_64
-		. = ALIGN(8);
-		_end_before_pgt = . ;
-		. = ALIGN(4096);
-		pgtable = . ;
-		. = . + 4096 * 6;
-#endif
+		. = ALIGN(8);	/* For convenience during zeroing */
 		_ebss = .;
 	}
+#ifdef CONFIG_X86_64
+       . = ALIGN(PAGE_SIZE);
+       .pgtable : {
+		_pgtable = . ;
+		*(.pgtable)
+		_epgtable = . ;
+	}
+#endif
+	_end = .;
 }
-- 
cgit v0.10.2


From 0a137736704ef9af719409933b3c33e138461786 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 16:27:41 -0700
Subject: x86, boot: set up the decompression stack as early as possible

Set up the decompression stack as soon as we know where it needs to
go.  That way we have a full-service stack as soon as possible, rather
than relying on the BP_scratch field.

Note that the stack does need to be empty during bss zeroing (or
else the stack needs to be moved out of the bss segment, which is also
an option.)

[ Impact: cleanup, minor paranoia ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 59425e1..d7245cf 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -88,6 +88,9 @@ ENTRY(startup_32)
 	addl	$4095, %ebx
 	andl	$~4095, %ebx
 
+	/* Set up the stack */
+	leal	boot_stack_end(%ebx), %esp
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
@@ -122,7 +125,7 @@ ENDPROC(startup_32)
 relocated:
 
 /*
- * Clear BSS
+ * Clear BSS (stack is currently empty)
  */
 	xorl	%eax, %eax
 	leal	_bss(%ebx), %edi
@@ -132,11 +135,6 @@ relocated:
 	rep	stosb
 
 /*
- * Setup the stack for the decompressor
- */
-	leal	boot_stack_end(%ebx), %esp
-
-/*
  * Do the decompression, and jump to the new kernel..
  */
 	movl	output_len(%ebx), %eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 5bc9052..a0b1842 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -249,6 +249,13 @@ ENTRY(startup_64)
 	addq	$(32768 + 18 + 4095), %rbx
 	andq	$~4095, %rbx
 
+	/* Set up the stack */
+	leaq	boot_stack_end(%rbx), %rsp
+
+	/* Zero EFLAGS */
+	pushq	$0
+	popfq
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
@@ -273,7 +280,7 @@ ENTRY(startup_64)
 relocated:
 
 /*
- * Clear BSS
+ * Clear BSS (stack is currently empty)
  */
 	xorq	%rax, %rax
 	leaq    _bss(%rbx), %rdi
@@ -282,13 +289,6 @@ relocated:
 	cld
 	rep	stosb
 
-	/* Setup the stack */
-	leaq	boot_stack_end(%rip), %rsp
-
-	/* zero EFLAGS after setting rsp */
-	pushq	$0
-	popfq
-
 /*
  * Do the decompression, and jump to the new kernel..
  */
-- 
cgit v0.10.2


From 97541912785369925723b6255438ad9fce2ddf04 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 6 May 2009 17:56:51 -0700
Subject: x86, boot: zero EFLAGS on 32 bits

The 64-bit code already clears EFLAGS as soon as it has a stack.  This
seems like a reasonable precaution, so do it on 32 bits as well.

[ Impact: extra paranoia ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index d7245cf..d02a4f0 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -91,6 +91,10 @@ ENTRY(startup_32)
 	/* Set up the stack */
 	leal	boot_stack_end(%ebx), %esp
 
+	/* Zero EFLAGS */
+	pushl	$0
+	popfl
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
-- 
cgit v0.10.2


From 36d3793c947f1ef7ba3d24eeeddc1be41adc5ab4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 16:45:15 -0700
Subject: x86, boot: use appropriate rep string for move and clear

In the pre-decompression code, use the appropriate largest possible
rep movs and rep stos to move code and clear bss, respectively.  For
reverse copy, do note that the initial values are supposed to be the
address of the first (highest) copy datum, not one byte beyond the end
of the buffer.

rep strings are not necessarily the fastest way to perform these
operations on all current processors, but are likely to be in the
future, and perhaps more importantly, we want to encourage the
architecturally right thing to do here.

This also fixes a couple of trivial inefficiencies on 64 bits.

[ Impact: trivial performance enhancement, increase code similarity ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index d02a4f0..6710dc7 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -100,11 +100,12 @@ ENTRY(startup_32)
  * where decompression in place becomes safe.
  */
 	pushl	%esi
-	leal	_bss(%ebp), %esi
-	leal	_bss(%ebx), %edi
+	leal	(_bss-4)(%ebp), %esi
+	leal	(_bss-4)(%ebx), %edi
 	movl	$(_bss - startup_32), %ecx
+	shrl	$2, %ecx
 	std
-	rep	movsb
+	rep	movsl
 	cld
 	popl	%esi
 
@@ -135,8 +136,8 @@ relocated:
 	leal	_bss(%ebx), %edi
 	leal	_ebss(%ebx), %ecx
 	subl	%edi, %ecx
-	cld
-	rep	stosb
+	shrl	$2, %ecx
+	rep	stosl
 
 /*
  * Do the decompression, and jump to the new kernel..
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index a0b1842..723c72d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -260,15 +260,15 @@ ENTRY(startup_64)
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-	leaq	_bss(%rip), %r8
-	leaq	_bss(%rbx), %r9
+	pushq	%rsi
+	leaq	(_bss-8)(%rip), %rsi
+	leaq	(_bss-8)(%rbx), %rdi
 	movq	$_bss /* - $startup_32 */, %rcx
-1:	subq	$8, %r8
-	subq	$8, %r9
-	movq	0(%r8), %rax
-	movq	%rax, 0(%r9)
-	subq	$8, %rcx
-	jnz	1b
+	shrq	$3, %rcx
+	std
+	rep	movsq
+	cld
+	popq	%rsi
 
 /*
  * Jump to the relocated address.
@@ -282,12 +282,12 @@ relocated:
 /*
  * Clear BSS (stack is currently empty)
  */
-	xorq	%rax, %rax
-	leaq    _bss(%rbx), %rdi
-	leaq    _ebss(%rbx), %rcx
+	xorl	%eax, %eax
+	leaq    _bss(%rip), %rdi
+	leaq    _ebss(%rip), %rcx
 	subq	%rdi, %rcx
-	cld
-	rep	stosb
+	shrq	$3, %rcx
+	rep	stosq
 
 /*
  * Do the decompression, and jump to the new kernel..
-- 
cgit v0.10.2


From 02a884c0fe7ec8459d00d34b7d4101af21fc4a86 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 8 May 2009 17:42:16 -0700
Subject: x86, boot: determine compressed code offset at compile time

Determine the compressed code offset (from the kernel runtime address)
at compile time.  This allows some minor optimizations in
arch/x86/boot/compressed/head_*.S, but more importantly it makes this
value available to the build process, which will enable a future patch
to export the necessary linear memory footprint into the bzImage
header.

[ Impact: cleanup, future patch enabling ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 7f24fdb..49c8a4c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -19,6 +19,8 @@ KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS_vmlinux := -T
 
+hostprogs-y	:= mkpiggy
+
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
@@ -50,6 +52,9 @@ suffix-$(CONFIG_KERNEL_GZIP)	:= gz
 suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
 suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
 
-LDFLAGS_piggy.o := -r --format binary --oformat $(CONFIG_OUTPUT_FORMAT) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
-	$(call if_changed,ld)
+quiet_cmd_mkpiggy = MKPIGGY $@
+      cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
+
+targets += piggy.S
+$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
+	$(call if_changed,mkpiggy)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 6710dc7..470474b 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -75,18 +75,8 @@ ENTRY(startup_32)
 	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
-	/* Replace the compressed data size with the uncompressed size */
-	subl	input_len(%ebp), %ebx
-	movl	output_len(%ebp), %eax
-	addl	%eax, %ebx
-	/* Add 8 bytes for every 32K input block */
-	shrl	$12, %eax
-	addl	%eax, %ebx
-	/* Add 32K + 18 bytes of extra slack */
-	addl	$(32768 + 18), %ebx
-	/* Align on a 4K boundary */
-	addl	$4095, %ebx
-	andl	$~4095, %ebx
+	/* Target address to relocate to for decompression */
+	addl	$z_extract_offset, %ebx
 
 	/* Set up the stack */
 	leal	boot_stack_end(%ebx), %esp
@@ -142,12 +132,10 @@ relocated:
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	movl	output_len(%ebx), %eax
-	pushl	%eax
+	leal	z_extract_offset_negative(%ebx), %ebp
 				/* push arguments for decompress_kernel: */
 	pushl	%ebp		/* output address */
-	movl	input_len(%ebx), %eax
-	pushl	%eax		/* input_len */
+	pushl	$z_input_len	/* input_len */
 	leal	input_data(%ebx), %eax
 	pushl	%eax		/* input_data */
 	leal	boot_heap(%ebx), %eax
@@ -155,14 +143,12 @@ relocated:
 	pushl	%esi		/* real mode pointer */
 	call	decompress_kernel
 	addl	$20, %esp
-	popl	%ecx
 
 #if CONFIG_RELOCATABLE
 /*
  * Find the address of the relocations.
  */
-	movl	%ebp, %edi
-	addl	%ecx, %edi
+	leal	z_output_len(%ebp), %edi
 
 /*
  * Calculate the delta between where vmlinux was compiled to run
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 723c72d..2b9f251 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -90,16 +90,8 @@ ENTRY(startup_32)
 	movl	$CONFIG_PHYSICAL_START, %ebx
 #endif
 
-	/* Replace the compressed data size with the uncompressed size */
-	subl	input_len(%ebp), %ebx
-	movl	output_len(%ebp), %eax
-	addl	%eax, %ebx
-	/* Add 8 bytes for every 32K input block */
-	shrl	$12, %eax
-	addl	%eax, %ebx
-	/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
-	addl	$(32768 + 18 + 4095), %ebx
-	andl	$~4095, %ebx
+	/* Target address to relocate to for decompression */
+	addl	$z_extract_offset, %ebx
 
 /*
  * Prepare for entering 64 bit mode
@@ -224,6 +216,9 @@ ENTRY(startup_64)
 	 * If it is a relocatable kernel then decompress and run the kernel
 	 * from load address aligned to 2MB addr, otherwise decompress and
 	 * run the kernel from CONFIG_PHYSICAL_START
+	 *
+	 * We cannot rely on the calculation done in 32-bit mode, since we
+	 * may have been invoked via the 64-bit entry point.
 	 */
 
 	/* Start with the delta to where the kernel will run at. */
@@ -237,17 +232,8 @@ ENTRY(startup_64)
 	movq	%rbp, %rbx
 #endif
 
-	/* Replace the compressed data size with the uncompressed size */
-	movl	input_len(%rip), %eax
-	subq	%rax, %rbx
-	movl	output_len(%rip), %eax
-	addq	%rax, %rbx
-	/* Add 8 bytes for every 32K input block */
-	shrq	$12, %rax
-	addq	%rax, %rbx
-	/* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
-	addq	$(32768 + 18 + 4095), %rbx
-	andq	$~4095, %rbx
+	/* Target address to relocate to for decompression */
+	leaq	z_extract_offset(%rbp), %rbx
 
 	/* Set up the stack */
 	leaq	boot_stack_end(%rbx), %rsp
@@ -292,13 +278,12 @@ relocated:
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	pushq	%rsi			# Save the real mode argument
-	movq	%rsi, %rdi		# real mode address
-	leaq	boot_heap(%rip), %rsi	# malloc area for uncompression
-	leaq	input_data(%rip), %rdx  # input_data
-	movl	input_len(%rip), %eax
-	movq	%rax, %rcx		# input_len
-	movq	%rbp, %r8		# output
+	pushq	%rsi			/* Save the real mode argument */
+	movq	%rsi, %rdi		/* real mode address */
+	leaq	boot_heap(%rip), %rsi	/* malloc area for uncompression */
+	leaq	input_data(%rip), %rdx  /* input_data */
+	movl	$z_input_len, %ecx	/* input_len */
+	movq	%rbp, %r8		/* output target address */
 	call	decompress_kernel
 	popq	%rsi
 
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644
index 0000000..bcbd36c
--- /dev/null
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *  Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License version
+ *  2 as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ *  02110-1301, USA.
+ *
+ *  H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Compute the desired load offset from a compressed program; outputs
+ * a small assembly wrapper with the appropriate symbols defined.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+static uint32_t getle32(const void *p)
+{
+	const uint8_t *cp = p;
+
+	return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
+		((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
+}
+
+int main(int argc, char *argv[])
+{
+	uint32_t olen;
+	long ilen;
+	unsigned long offs;
+	FILE *f;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+		return 1;
+	}
+
+	/* Get the information for the compressed kernel image first */
+
+	f = fopen(argv[1], "r");
+	if (!f) {
+		perror(argv[1]);
+		return 1;
+	}
+
+
+	if (fseek(f, -4L, SEEK_END)) {
+		perror(argv[1]);
+	}
+	fread(&olen, sizeof olen, 1, f);
+	ilen = ftell(f);
+	olen = getle32(&olen);
+	fclose(f);
+
+	/*
+	 * Now we have the input (compressed) and output (uncompressed)
+	 * sizes, compute the necessary decompression offset...
+	 */
+
+	offs = (olen > ilen) ? olen - ilen : 0;
+	offs += olen >> 12;	/* Add 8 bytes for each 32K block */
+	offs += 32*1024 + 18;	/* Add 32K + 18 bytes slack */
+	offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
+
+	printf(".section \".rodata.compressed\",\"a\",@progbits\n");
+	printf(".globl z_input_len\n");
+	printf("z_input_len = %lu\n", ilen);
+	printf(".globl z_output_len\n");
+	printf("z_output_len = %lu\n", (unsigned long)olen);
+	printf(".globl z_extract_offset\n");
+	printf("z_extract_offset = 0x%lx\n", offs);
+	/* z_extract_offset_negative allows simplification of head_32.S */
+	printf(".globl z_extract_offset_negative\n");
+	printf("z_extract_offset_negative = -0x%lx\n", offs);
+
+	printf(".globl input_data, input_data_end\n");
+	printf("input_data:\n");
+	printf(".incbin \"%s\"\n", argv[1]);
+	printf("input_data_end:\n");
+
+	return 0;
+}
diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr
deleted file mode 100644
index f02382a..0000000
--- a/arch/x86/boot/compressed/vmlinux.scr
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
-  .rodata.compressed : {
-	input_len = .;
-	LONG(input_data_end - input_data) input_data = .;
-	*(.data)
-	output_len = . - 4;
-	input_data_end = .;
-	}
-}
-- 
cgit v0.10.2


From 778dedae0cb76a441145f3a0c5d59fcb3ba296d5 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sat, 9 May 2009 12:54:34 +0800
Subject: x86: mce: remove duplicated #include

Remove duplicated #include in arch/x86/kernel/cpu/mcheck/mce_intel_64.c.

[ Impact: cleanup ]

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index d6b72df..aedf976 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -15,7 +15,6 @@
 #include <asm/hw_irq.h>
 #include <asm/idle.h>
 #include <asm/therm_throt.h>
-#include <asm/apic.h>
 
 asmlinkage void smp_thermal_interrupt(void)
 {
-- 
cgit v0.10.2


From b30505c81a9d4adea8b70ecff512b0216929b797 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Thu, 7 May 2009 15:40:14 -0700
Subject: futex: add requeue-pi documentation

Add Documentation/futex-requeue-pi.txt describing the motivation for the
newly added FUTEX_*REQUEUE_PI op codes and their implementation.

[ Impact: add documentation ]

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Cc: Sripathi Kodi <sripathik@in.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Dinakar Guniguntala <dino@in.ibm.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: Jakub Jelinek <jakub@redhat.com>
LKML-Reference: <4A03634E.3080609@us.ibm.com>
[ reformatted the file ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644
index 0000000..9dc1ff4
--- /dev/null
+++ b/Documentation/futex-requeue-pi.txt
@@ -0,0 +1,131 @@
+Futex Requeue PI
+----------------
+
+Requeueing of tasks from a non-PI futex to a PI futex requires
+special handling in order to ensure the underlying rt_mutex is never
+left without an owner if it has waiters; doing so would break the PI
+boosting logic [see rt-mutex-desgin.txt] For the purposes of
+brevity, this action will be referred to as "requeue_pi" throughout
+this document.  Priority inheritance is abbreviated throughout as
+"PI".
+
+Motivation
+----------
+
+Without requeue_pi, the glibc implementation of
+pthread_cond_broadcast() must resort to waking all the tasks waiting
+on a pthread_condvar and letting them try to sort out which task
+gets to run first in classic thundering-herd formation.  An ideal
+implementation would wake the highest-priority waiter, and leave the
+rest to the natural wakeup inherent in unlocking the mutex
+associated with the condvar.
+
+Consider the simplified glibc calls:
+
+/* caller must lock mutex */
+pthread_cond_wait(cond, mutex)
+{
+	lock(cond->__data.__lock);
+	unlock(mutex);
+	do {
+	   unlock(cond->__data.__lock);
+	   futex_wait(cond->__data.__futex);
+	   lock(cond->__data.__lock);
+	} while(...)
+	unlock(cond->__data.__lock);
+	lock(mutex);
+}
+
+pthread_cond_broadcast(cond)
+{
+	lock(cond->__data.__lock);
+	unlock(cond->__data.__lock);
+	futex_requeue(cond->data.__futex, cond->mutex);
+}
+
+Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
+has waiters. Note that pthread_cond_wait() attempts to lock the
+mutex only after it has returned to user space.  This will leave the
+underlying rt_mutex with waiters, and no owner, breaking the
+previously mentioned PI-boosting algorithms.
+
+In order to support PI-aware pthread_condvar's, the kernel needs to
+be able to requeue tasks to PI futexes.  This support implies that
+upon a successful futex_wait system call, the caller would return to
+user space already holding the PI futex.  The glibc implementation
+would be modified as follows:
+
+
+/* caller must lock mutex */
+pthread_cond_wait_pi(cond, mutex)
+{
+	lock(cond->__data.__lock);
+	unlock(mutex);
+	do {
+	   unlock(cond->__data.__lock);
+	   futex_wait_requeue_pi(cond->__data.__futex);
+	   lock(cond->__data.__lock);
+	} while(...)
+	unlock(cond->__data.__lock);
+        /* the kernel acquired the the mutex for us */
+}
+
+pthread_cond_broadcast_pi(cond)
+{
+	lock(cond->__data.__lock);
+	unlock(cond->__data.__lock);
+	futex_requeue_pi(cond->data.__futex, cond->mutex);
+}
+
+The actual glibc implementation will likely test for PI and make the
+necessary changes inside the existing calls rather than creating new
+calls for the PI cases.  Similar changes are needed for
+pthread_cond_timedwait() and pthread_cond_signal().
+
+Implementation
+--------------
+
+In order to ensure the rt_mutex has an owner if it has waiters, it
+is necessary for both the requeue code, as well as the waiting code,
+to be able to acquire the rt_mutex before returning to user space.
+The requeue code cannot simply wake the waiter and leave it to
+acquire the rt_mutex as it would open a race window between the
+requeue call returning to user space and the waiter waking and
+starting to run.  This is especially true in the uncontended case.
+
+The solution involves two new rt_mutex helper routines,
+rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
+allow the requeue code to acquire an uncontended rt_mutex on behalf
+of the waiter and to enqueue the waiter on a contended rt_mutex.
+Two new system calls provide the kernel<->user interface to
+requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
+
+FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
+and pthread_cond_timedwait()) to block on the initial futex and wait
+to be requeued to a PI-aware futex.  The implementation is the
+result of a high-speed collision between futex_wait() and
+futex_lock_pi(), with some extra logic to check for the additional
+wake-up scenarios.
+
+FUTEX_REQUEUE_CMP_PI is called by the waker
+(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
+possibly wake the waiting tasks. Internally, this system call is
+still handled by futex_requeue (by passing requeue_pi=1).  Before
+requeueing, futex_requeue() attempts to acquire the requeue target
+PI futex on behalf of the top waiter.  If it can, this waiter is
+woken.  futex_requeue() then proceeds to requeue the remaining
+nr_wake+nr_requeue tasks to the PI futex, calling
+rt_mutex_start_proxy_lock() prior to each requeue to prepare the
+task as a waiter on the underlying rt_mutex.  It is possible that
+the lock can be acquired at this stage as well, if so, the next
+waiter is woken to finish the acquisition of the lock.
+
+FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
+their sum is all that really matters.  futex_requeue() will wake or
+requeue up to nr_wake + nr_requeue tasks.  It will wake only as many
+tasks as it can acquire the lock for, which in the majority of cases
+should be 0 as good programming practice dictates that the caller of
+either pthread_cond_broadcast() or pthread_cond_signal() acquire the
+mutex prior to making the call. FUTEX_REQUEUE_PI requires that
+nr_wake=1.  nr_requeue should be INT_MAX for broadcast and 0 for
+signal.
-- 
cgit v0.10.2


From e9e8b1fb995543c4cc3930f465923a552b2e48b7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 14:31:37 +0900
Subject: sh: Fix up the sh64 earlyprintk build.

sci_rxd_in() on SH-5 wants to be using SCSPTR, not SCSPTR2.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index e3eae4e..38072c1 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -636,7 +636,7 @@ static inline int sci_rxd_in(struct uart_port *port)
 #elif defined(CONFIG_CPU_SUBTYPE_SH5_101) || defined(CONFIG_CPU_SUBTYPE_SH5_103)
 static inline int sci_rxd_in(struct uart_port *port)
 {
-         return sci_in(port, SCSPTR2)&0x0001 ? 1 : 0; /* SCIF */
+         return sci_in(port, SCSPTR)&0x0001 ? 1 : 0; /* SCIF */
 }
 #elif defined(__H8300H__) || defined(__H8300S__)
 static inline int sci_rxd_in(struct uart_port *port)
-- 
cgit v0.10.2


From 2fedaacdc07e053d93e0607047a96d282f62aca2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 14:38:49 +0900
Subject: sh: Cleanup irqflags size mismatch on SH-5 build.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 9e277ec..8676209 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -60,7 +60,7 @@ static inline void sh64_teardown_dtlb_cache_slot(void)
 static inline void sh64_icache_inv_all(void)
 {
 	unsigned long long addr, flag, data;
-	unsigned int flags;
+	unsigned long flags;
 
 	addr = ICCR0;
 	flag = ICCR0_ICI;
@@ -172,7 +172,7 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 		unsigned long eaddr;
 		unsigned long after_last_page_start;
 		unsigned long mm_asid, current_asid;
-		unsigned long long flags = 0ULL;
+		unsigned long flags = 0;
 
 		mm_asid = cpu_asid(smp_processor_id(), mm);
 		current_asid = get_asid();
@@ -236,7 +236,7 @@ static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
 	unsigned long long eaddr = start;
 	unsigned long long eaddr_end = start + len;
 	unsigned long current_asid, mm_asid;
-	unsigned long long flags;
+	unsigned long flags;
 	unsigned long long epage_start;
 
 	/*
@@ -342,7 +342,7 @@ static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 			 * alloco is a NOP if the cache is write-through.
 			 */
 			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
-				ctrl_inb(eaddr);
+				__raw_readb((unsigned long)eaddr);
 		}
 	}
 
-- 
cgit v0.10.2


From 6dbe47a170f80159f23c856ad4e02f2685c6f6cb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 14:44:30 +0900
Subject: sh: Provide __read_{read,write}sl() definitions for sh64.

These are presently only defined for sh32, use the plain unoptimized
versions for sh64. Fixes up smsc911x build.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index c7c360b..2534814 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -123,10 +123,15 @@ static inline void __raw_reads##bwlq(volatile void __iomem *mem,	\
 
 __BUILD_MEMORY_STRING(b, u8)
 __BUILD_MEMORY_STRING(w, u16)
-__BUILD_MEMORY_STRING(q, u64)
 
+#ifdef CONFIG_SUPERH32
 void __raw_writesl(void __iomem *addr, const void *data, int longlen);
 void __raw_readsl(const void __iomem *addr, void *data, int longlen);
+#else
+__BUILD_MEMORY_STRING(l, u32)
+#endif
+
+__BUILD_MEMORY_STRING(q, u64)
 
 #define writesb			__raw_writesb
 #define writesw			__raw_writesw
-- 
cgit v0.10.2


From 2bcfffa42309b6f73042c62459bf5207762a271d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 16:02:08 +0900
Subject: sh: Rename opcode_t to insn_size_t.

This is now clashing with a driver, so just rename it.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index 613644a..036c331 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 #include <linux/ptrace.h>
 
-typedef u16 kprobe_opcode_t;
+typedef insn_size_t kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xc33a
 
 #define MAX_INSN_SIZE 16
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index 240b31e..6c68a51 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -198,7 +198,7 @@ do {							\
 })
 #endif
 
-int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
+int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
 asmlinkage void do_address_error(struct pt_regs *regs,
diff --git a/arch/sh/include/asm/types.h b/arch/sh/include/asm/types.h
index beea4e6..b13caca 100644
--- a/arch/sh/include/asm/types.h
+++ b/arch/sh/include/asm/types.h
@@ -23,9 +23,9 @@ typedef unsigned short umode_t;
 typedef u32 dma_addr_t;
 
 #ifdef CONFIG_SUPERH32
-typedef u16 opcode_t;
+typedef u16 insn_size_t;
 #else
-typedef u32 opcode_t;
+typedef u32 insn_size_t;
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index c22853b..77dfecb 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -267,7 +267,7 @@ static struct mem_access trapped_io_access = {
 int handle_trapped_io(struct pt_regs *regs, unsigned long address)
 {
 	mm_segment_t oldfs;
-	opcode_t instruction;
+	insn_size_t instruction;
 	int tmp;
 
 	if (!lookup_tiop(address))
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index 7c747e7..305aad7 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -47,7 +47,7 @@ char in_nmi = 0;	/* Set during NMI to prevent re-entry */
 /* Calculate the new address for after a step */
 static short *get_step_address(struct pt_regs *linux_regs)
 {
-	opcode_t op = __raw_readw(linux_regs->pc);
+	insn_size_t op = __raw_readw(linux_regs->pc);
 	long addr;
 
 	/* BT */
@@ -134,7 +134,7 @@ static short *get_step_address(struct pt_regs *linux_regs)
  */
 
 static unsigned long stepped_address;
-static opcode_t stepped_opcode;
+static insn_size_t stepped_opcode;
 
 static void do_single_step(struct pt_regs *linux_regs)
 {
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 438f1eb..46348ed0 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -22,11 +22,11 @@ static void handle_BUG(struct pt_regs *regs)
 
 int is_valid_bugaddr(unsigned long addr)
 {
-	unsigned short opcode;
+	insn_size_t opcode;
 
 	if (addr < PAGE_OFFSET)
 		return 0;
-	if (probe_kernel_address((u16 *)addr, opcode))
+	if (probe_kernel_address((insn_size_t *)addr, opcode))
 		return 0;
 
 	return opcode == TRAPA_BUG_OPCODE;
@@ -66,7 +66,7 @@ BUILD_TRAP_HANDLER(bug)
 
 #ifdef CONFIG_BUG
 	if (__kernel_text_address(instruction_pointer(regs))) {
-		opcode_t insn = *(opcode_t *)instruction_pointer(regs);
+		insn_size_t insn = *(insn_size_t *)instruction_pointer(regs);
 		if (insn == TRAPA_BUG_OPCODE)
 			handle_BUG(regs);
 	}
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 67550d8..2b77277 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -177,7 +177,7 @@ static struct mem_access user_mem_access = {
  *   (if that instruction is in a branch delay slot)
  * - return 0 if emulation okay, -EFAULT on existential error
  */
-static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
+static int handle_unaligned_ins(insn_size_t instruction, struct pt_regs *regs,
 				struct mem_access *ma)
 {
 	int ret, index, count;
@@ -322,10 +322,10 @@ static int handle_unaligned_ins(opcode_t instruction, struct pt_regs *regs,
  * - fetches the instruction from PC+2
  */
 static inline int handle_delayslot(struct pt_regs *regs,
-				   opcode_t old_instruction,
+				   insn_size_t old_instruction,
 				   struct mem_access *ma)
 {
-	opcode_t instruction;
+	insn_size_t instruction;
 	void __user *addr = (void __user *)(regs->pc +
 		instruction_size(old_instruction));
 
@@ -365,7 +365,7 @@ static inline int handle_delayslot(struct pt_regs *regs,
 
 static int handle_unaligned_notify_count = 10;
 
-int handle_unaligned_access(opcode_t instruction, struct pt_regs *regs,
+int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma)
 {
 	u_int rm;
@@ -523,7 +523,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
 	unsigned long error_code = 0;
 	mm_segment_t oldfs;
 	siginfo_t info;
-	opcode_t instruction;
+	insn_size_t instruction;
 	int tmp;
 
 	/* Intentional ifdef */
-- 
cgit v0.10.2


From f15b18d0755b3ee4b29991fc2fde535ee41df53c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 9 May 2009 10:04:22 +0200
Subject: perf_counter tools: remove debug code from builtin-stat.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index e2fa117..cf575c3 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -331,8 +331,6 @@ static void create_perfstat_counter(int counter)
 	hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
 	hw_event.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
 
-printf("exclude: %d\n", event_mask[counter]);
-
 	if (scale)
 		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
 					  PERF_FORMAT_TOTAL_TIME_RUNNING;
-- 
cgit v0.10.2


From 92d23f703c608fcb2c8edd74a3fd0f4031e18606 Mon Sep 17 00:00:00 2001
From: Ron <ron@voicetronix.com>
Date: Fri, 8 May 2009 22:54:49 +0930
Subject: sched: Fix fallback sched_clock()'s offset when using jiffies

Account for the initial offset to the jiffy count.

[ Impact: fix printk timestamps on architectures using fallback sched_clock() ]

Signed-off-by: Ron Lee <ron@debian.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 819f17a..e1d16c9 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -38,7 +38,8 @@
  */
 unsigned long long __attribute__((weak)) sched_clock(void)
 {
-	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+	return (unsigned long long)(jiffies - INITIAL_JIFFIES)
+					* (NSEC_PER_SEC / HZ);
 }
 
 static __read_mostly int sched_clock_running;
-- 
cgit v0.10.2


From 1031a3a57ed76f5cb8754a89ddb22f3d9ebe8861 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 17:57:21 +0900
Subject: sh: Provide an __sdivsi3_2 export for sh64.

Newer code paths that are heavier in 64-bit math manage to get this
generated by newer compilers, provide a definition and export
accordingly. This is trivially wrapped around the existing __sdivsi3
code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index 0d74d6b..8f54ef0 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -76,5 +76,7 @@ EXPORT_SYMBOL(strcpy);
 #define DECLARE_EXPORT(name) extern void name(void);EXPORT_SYMBOL(name)
 
 DECLARE_EXPORT(__sdivsi3);
+DECLARE_EXPORT(__sdivsi3_1);
+DECLARE_EXPORT(__sdivsi3_2);
 DECLARE_EXPORT(__udivsi3);
 DECLARE_EXPORT(__div_table);
diff --git a/arch/sh/lib64/sdivsi3.S b/arch/sh/lib64/sdivsi3.S
index 6a800c6..1963bbd 100644
--- a/arch/sh/lib64/sdivsi3.S
+++ b/arch/sh/lib64/sdivsi3.S
@@ -1,4 +1,6 @@
 	.global	__sdivsi3
+	.global	__sdivsi3_1
+	.global	__sdivsi3_2
 	.section	.text..SHmedia32,"ax"
 	.align	2
 
@@ -6,13 +8,15 @@
 	/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
 	/* result in r0 */
 __sdivsi3:
+__sdivsi3_1:
 	ptb __div_table,tr0
+	gettr tr0,r20
 
+__sdivsi3_2:
 	nsb r5, r1
 	shlld r5, r1, r25    /* normalize; [-2 ..1, 1..2) in s2.62 */
 	shari r25, 58, r21   /* extract 5(6) bit index (s2.4 with hole -1..1) */
 	/* bubble */
-	gettr tr0,r20
 	ldx.ub r20, r21, r19 /* u0.8 */
 	shari r25, 32, r25   /* normalize to s2.30 */
 	shlli r21, 1, r21
-- 
cgit v0.10.2


From 7cd0378ef4c0d7b05e7dcb1daa115b841ffdc31a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 18:03:37 +0900
Subject: sh: Fix up SHmedia module ELF relocations.

This fixes up the LSB setting for SHmedia branching in updated symbols
when processing module relocations.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index c430810..c19b0f7 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -90,7 +90,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 		 * SHmedia, the LSB of the symbol needs to be asserted
 		 * for the CPU to be in SHmedia mode when it starts executing
 		 * the branch target. */
-		relocation |= (sym->st_other & 4);
+		relocation |= !!(sym->st_other & 4);
 #endif
 
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
-- 
cgit v0.10.2


From c3e2586b794b12ffcdf69b4e547030b51e18e6d9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 23:33:02 +0900
Subject: sh: Integrate sh64 bits in vmlinux_32.lds.S.

This adds all of the requisite bits from vmlinux_64.lds.S in to the _32
variant, resulting in a unified and generic linker script that can be
shared across both.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/vmlinux_32.lds.S b/arch/sh/kernel/vmlinux_32.lds.S
index dd9b2ee..8d97f52 100644
--- a/arch/sh/kernel/vmlinux_32.lds.S
+++ b/arch/sh/kernel/vmlinux_32.lds.S
@@ -1,17 +1,23 @@
 /*
  * ld script to make SuperH Linux kernel
- * Written by Niibe Yutaka
+ * Written by Niibe Yutaka and Paul Mundt
  */
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm-generic/vmlinux.lds.h>
-
+#ifdef CONFIG_SUPERH64
+#define LOAD_OFFSET	CONFIG_PAGE_OFFSET
+OUTPUT_ARCH(sh:sh5)
+#else
+OUTPUT_ARCH(sh)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux")
 #else
 OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux")
 #endif
-OUTPUT_ARCH(sh)
+#endif
+
+#include <asm/thread_info.h>
+#include <asm/cache.h>
+#include <asm-generic/vmlinux.lds.h>
+
 ENTRY(_start)
 SECTIONS
 {
@@ -24,28 +30,35 @@ SECTIONS
 	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + CONFIG_ZERO_PAGE_OFFSET;
 #endif
 
-	_text = .;			/* Text and read-only data */
-
-	.empty_zero_page : {
+	.empty_zero_page : AT(ADDR(.empty_zero_page) - LOAD_OFFSET) {
 		*(.empty_zero_page)
 	} = 0
 
-	.text : {
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		_text = .;		/* Text and read-only data */
 		HEAD_TEXT
 		TEXT_TEXT
+
+#ifdef CONFIG_SUPERH64
+		*(.text64)
+		*(.text..SHmedia32)
+#endif
+
 		SCHED_TEXT
 		LOCK_TEXT
 		KPROBES_TEXT
+		IRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
+		_etext = .;		/* End of text section */
 	} = 0x0009
 
 	. = ALIGN(16);		/* Exception table */
-	__start___ex_table = .;
-	__ex_table : { *(__ex_table) }
-	__stop___ex_table = .;
-
-	_etext = .;			/* End of text section */
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
 
 	NOTES
 	RO_DATA(PAGE_SIZE)
@@ -54,13 +67,15 @@ SECTIONS
 	 * Code which must be executed uncached and the associated data
 	 */
 	. = ALIGN(PAGE_SIZE);
-	__uncached_start = .;
-	.uncached.text : { *(.uncached.text) }
-	.uncached.data : { *(.uncached.data) }
-	__uncached_end = .;
+	.uncached : AT(ADDR(.uncached) - LOAD_OFFSET) {
+		__uncached_start = .;
+		*(.uncached.text)
+		*(.uncached.data)
+		__uncached_end = .;
+	}
 
 	. = ALIGN(THREAD_SIZE);
-	.data : {			/* Data */
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {		/* Data */
 		*(.data.init_task)
 
 		. = ALIGN(L1_CACHE_BYTES);
@@ -84,39 +99,51 @@ SECTIONS
 	_edata = .;			/* End of data section */
 
 	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	__init_begin = .;
-	_sinittext = .;
-	.init.text : { INIT_TEXT }
-	_einittext = .;
-	.init.data : { INIT_DATA }
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		__init_begin = .;
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA }
 
 	. = ALIGN(16);
-	__setup_start = .;
-	.init.setup : { *(.init.setup) }
-	__setup_end = .;
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
 
-	__initcall_start = .;
-	.initcall.init : {
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
 		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
 	}
-	__initcall_end = .;
-	__con_initcall_start = .;
-	.con_initcall.init : { *(.con_initcall.init) }
-	__con_initcall_end = .;
 
 	SECURITY_INIT
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	. = ALIGN(PAGE_SIZE);
-	__initramfs_start = .;
-	.init.ramfs : { *(.init.ramfs) }
-	__initramfs_end = .;
+	.init.ramfs : AT(ADDR(.init_ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 #endif
 
 	. = ALIGN(4);
-	__machvec_start = .;
-	.machvec.init : { *(.machvec.init) }
-	__machvec_end = .;
+	.machvec.init : AT(ADDR(.machvec.init) - LOAD_OFFSET) {
+		__machvec_start = .;
+		*(.machvec.init)
+		__machvec_end = .;
+	}
 
 	PERCPU(PAGE_SIZE)
 
@@ -124,11 +151,11 @@ SECTIONS
 	 * .exit.text is discarded at runtime, not link time, to deal with
 	 * references from __bug_table
 	 */
-	.exit.text : { EXIT_TEXT }
-	.exit.data : { EXIT_DATA }
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { EXIT_TEXT }
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA }
 
 	. = ALIGN(PAGE_SIZE);
-	.bss : {
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		__init_end = .;
 		__bss_start = .;		/* BSS */
 		*(.bss.page_aligned)
-- 
cgit v0.10.2


From dce97c8cb2ad365fa87dc6924d99528c21c63912 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 9 May 2009 23:36:10 +0900
Subject: sh: Move the unified linker script in place, kill off old _64 one.

Just forcefully rename the _32 variant overtop, and kill off the now
unused _64 version.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index d7d4991..8d97f52 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -1,5 +1,181 @@
-#ifdef CONFIG_SUPERH32
-# include "vmlinux_32.lds.S"
+/*
+ * ld script to make SuperH Linux kernel
+ * Written by Niibe Yutaka and Paul Mundt
+ */
+#ifdef CONFIG_SUPERH64
+#define LOAD_OFFSET	CONFIG_PAGE_OFFSET
+OUTPUT_ARCH(sh:sh5)
 #else
-# include "vmlinux_64.lds.S"
+OUTPUT_ARCH(sh)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux")
+#else
+OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux")
+#endif
+#endif
+
+#include <asm/thread_info.h>
+#include <asm/cache.h>
+#include <asm-generic/vmlinux.lds.h>
+
+ENTRY(_start)
+SECTIONS
+{
+#ifdef CONFIG_PMB_FIXED
+	. = CONFIG_PAGE_OFFSET + (CONFIG_MEMORY_START & 0x1fffffff) +
+	    CONFIG_ZERO_PAGE_OFFSET;
+#elif defined(CONFIG_32BIT)
+	. = CONFIG_PAGE_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
+#else
+	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + CONFIG_ZERO_PAGE_OFFSET;
+#endif
+
+	.empty_zero_page : AT(ADDR(.empty_zero_page) - LOAD_OFFSET) {
+		*(.empty_zero_page)
+	} = 0
+
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		_text = .;		/* Text and read-only data */
+		HEAD_TEXT
+		TEXT_TEXT
+
+#ifdef CONFIG_SUPERH64
+		*(.text64)
+		*(.text..SHmedia32)
+#endif
+
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+		_etext = .;		/* End of text section */
+	} = 0x0009
+
+	. = ALIGN(16);		/* Exception table */
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	NOTES
+	RO_DATA(PAGE_SIZE)
+
+	/*
+	 * Code which must be executed uncached and the associated data
+	 */
+	. = ALIGN(PAGE_SIZE);
+	.uncached : AT(ADDR(.uncached) - LOAD_OFFSET) {
+		__uncached_start = .;
+		*(.uncached.text)
+		*(.uncached.data)
+		__uncached_end = .;
+	}
+
+	. = ALIGN(THREAD_SIZE);
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {		/* Data */
+		*(.data.init_task)
+
+		. = ALIGN(L1_CACHE_BYTES);
+		*(.data.cacheline_aligned)
+
+		. = ALIGN(L1_CACHE_BYTES);
+		*(.data.read_mostly)
+
+		. = ALIGN(PAGE_SIZE);
+		*(.data.page_aligned)
+
+		__nosave_begin = .;
+		*(.data.nosave)
+		. = ALIGN(PAGE_SIZE);
+		__nosave_end = .;
+
+		DATA_DATA
+		CONSTRUCTORS
+	}
+
+	_edata = .;			/* End of data section */
+
+	. = ALIGN(PAGE_SIZE);		/* Init code and data */
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		__init_begin = .;
+		_sinittext = .;
+		INIT_TEXT
+		_einittext = .;
+	}
+
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA }
+
+	. = ALIGN(16);
+	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+
+	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
+
+	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	SECURITY_INIT
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : AT(ADDR(.init_ramfs) - LOAD_OFFSET) {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 #endif
+
+	. = ALIGN(4);
+	.machvec.init : AT(ADDR(.machvec.init) - LOAD_OFFSET) {
+		__machvec_start = .;
+		*(.machvec.init)
+		__machvec_end = .;
+	}
+
+	PERCPU(PAGE_SIZE)
+
+	/*
+	 * .exit.text is discarded at runtime, not link time, to deal with
+	 * references from __bug_table
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { EXIT_TEXT }
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA }
+
+	. = ALIGN(PAGE_SIZE);
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+		__init_end = .;
+		__bss_start = .;		/* BSS */
+		*(.bss.page_aligned)
+		*(.bss)
+		*(COMMON)
+		. = ALIGN(4);
+		_ebss = .;			/* uClinux MTD sucks */
+		_end = . ;
+	}
+
+	/*
+	 * When something in the kernel is NOT compiled as a module, the
+	 * module cleanup code and data are put into these segments. Both
+	 * can then be thrown away, as cleanup code is never called unless
+	 * it's a module.
+	 */
+	/DISCARD/ : {
+		*(.exitcall.exit)
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
+}
diff --git a/arch/sh/kernel/vmlinux_32.lds.S b/arch/sh/kernel/vmlinux_32.lds.S
deleted file mode 100644
index 8d97f52..0000000
--- a/arch/sh/kernel/vmlinux_32.lds.S
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * ld script to make SuperH Linux kernel
- * Written by Niibe Yutaka and Paul Mundt
- */
-#ifdef CONFIG_SUPERH64
-#define LOAD_OFFSET	CONFIG_PAGE_OFFSET
-OUTPUT_ARCH(sh:sh5)
-#else
-OUTPUT_ARCH(sh)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux")
-#else
-OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux")
-#endif
-#endif
-
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm-generic/vmlinux.lds.h>
-
-ENTRY(_start)
-SECTIONS
-{
-#ifdef CONFIG_PMB_FIXED
-	. = CONFIG_PAGE_OFFSET + (CONFIG_MEMORY_START & 0x1fffffff) +
-	    CONFIG_ZERO_PAGE_OFFSET;
-#elif defined(CONFIG_32BIT)
-	. = CONFIG_PAGE_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
-#else
-	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + CONFIG_ZERO_PAGE_OFFSET;
-#endif
-
-	.empty_zero_page : AT(ADDR(.empty_zero_page) - LOAD_OFFSET) {
-		*(.empty_zero_page)
-	} = 0
-
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;		/* Text and read-only data */
-		HEAD_TEXT
-		TEXT_TEXT
-
-#ifdef CONFIG_SUPERH64
-		*(.text64)
-		*(.text..SHmedia32)
-#endif
-
-		SCHED_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-		_etext = .;		/* End of text section */
-	} = 0x0009
-
-	. = ALIGN(16);		/* Exception table */
-	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-		__start___ex_table = .;
-		*(__ex_table)
-		__stop___ex_table = .;
-	}
-
-	NOTES
-	RO_DATA(PAGE_SIZE)
-
-	/*
-	 * Code which must be executed uncached and the associated data
-	 */
-	. = ALIGN(PAGE_SIZE);
-	.uncached : AT(ADDR(.uncached) - LOAD_OFFSET) {
-		__uncached_start = .;
-		*(.uncached.text)
-		*(.uncached.data)
-		__uncached_end = .;
-	}
-
-	. = ALIGN(THREAD_SIZE);
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {		/* Data */
-		*(.data.init_task)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.cacheline_aligned)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.read_mostly)
-
-		. = ALIGN(PAGE_SIZE);
-		*(.data.page_aligned)
-
-		__nosave_begin = .;
-		*(.data.nosave)
-		. = ALIGN(PAGE_SIZE);
-		__nosave_end = .;
-
-		DATA_DATA
-		CONSTRUCTORS
-	}
-
-	_edata = .;			/* End of data section */
-
-	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-		__init_begin = .;
-		_sinittext = .;
-		INIT_TEXT
-		_einittext = .;
-	}
-
-	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA }
-
-	. = ALIGN(16);
-	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-		__setup_start = .;
-		*(.init.setup)
-		__setup_end = .;
-	}
-
-	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-		__initcall_start = .;
-		INITCALLS
-		__initcall_end = .;
-	}
-
-	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-		__con_initcall_start = .;
-		*(.con_initcall.init)
-		__con_initcall_end = .;
-	}
-
-	SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(PAGE_SIZE);
-	.init.ramfs : AT(ADDR(.init_ramfs) - LOAD_OFFSET) {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		__initramfs_end = .;
-	}
-#endif
-
-	. = ALIGN(4);
-	.machvec.init : AT(ADDR(.machvec.init) - LOAD_OFFSET) {
-		__machvec_start = .;
-		*(.machvec.init)
-		__machvec_end = .;
-	}
-
-	PERCPU(PAGE_SIZE)
-
-	/*
-	 * .exit.text is discarded at runtime, not link time, to deal with
-	 * references from __bug_table
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { EXIT_TEXT }
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA }
-
-	. = ALIGN(PAGE_SIZE);
-	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-		__init_end = .;
-		__bss_start = .;		/* BSS */
-		*(.bss.page_aligned)
-		*(.bss)
-		*(COMMON)
-		. = ALIGN(4);
-		_ebss = .;			/* uClinux MTD sucks */
-		_end = . ;
-	}
-
-	/*
-	 * When something in the kernel is NOT compiled as a module, the
-	 * module cleanup code and data are put into these segments. Both
-	 * can then be thrown away, as cleanup code is never called unless
-	 * it's a module.
-	 */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-}
diff --git a/arch/sh/kernel/vmlinux_64.lds.S b/arch/sh/kernel/vmlinux_64.lds.S
deleted file mode 100644
index 6966446..0000000
--- a/arch/sh/kernel/vmlinux_64.lds.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * ld script to make SH64 Linux kernel
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * benedict.gaster@superh.com:	 2nd May 2002
- *    Add definition of empty_zero_page to be the first page of kernel image.
- *
- * benedict.gaster@superh.com:	 3rd May 2002
- *    Added support for ramdisk, removing statically linked romfs at the
- *    same time.
- *
- * lethal@linux-sh.org:          9th May 2003
- *    Kill off GLOBAL_NAME() usage and other CDC-isms.
- *
- * lethal@linux-sh.org:         19th May 2003
- *    Remove support for ancient toolchains.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-
-#define LOAD_OFFSET	CONFIG_PAGE_OFFSET
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_ARCH(sh:sh5)
-
-#define C_PHYS(x) AT (ADDR(x) - LOAD_OFFSET)
-
-ENTRY(__start)
-SECTIONS
-{
-	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + PAGE_SIZE;
-	_text = .;			/* Text and read-only data */
-
-	.empty_zero_page : C_PHYS(.empty_zero_page) {
-		*(.empty_zero_page)
-	} = 0
-
-	.text : C_PHYS(.text) {
-		HEAD_TEXT
-		TEXT_TEXT
-		*(.text64)
-		*(.text..SHmedia32)
-		SCHED_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	} = 0x6ff0fff0
-#else
-	} = 0xf0fff06f
-#endif
-
-	/* We likely want __ex_table to be Cache Line aligned */
-	. = ALIGN(L1_CACHE_BYTES);		/* Exception table */
-	__start___ex_table = .;
-	__ex_table : C_PHYS(__ex_table) { *(__ex_table) }
-	__stop___ex_table = .;
-
-	_etext = .;			/* End of text section */
-
-	NOTES 
-	RO_DATA(PAGE_SIZE)
-
-	. = ALIGN(THREAD_SIZE);
-	.data : C_PHYS(.data) {			/* Data */
-		*(.data.init_task)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.cacheline_aligned)
-
-		. = ALIGN(L1_CACHE_BYTES);
-		*(.data.read_mostly)
-
-		. = ALIGN(PAGE_SIZE);
-		*(.data.page_aligned)
-
-		__nosave_begin = .;
-		*(.data.nosave)
-		. = ALIGN(PAGE_SIZE);
-		__nosave_end = .;
-
-		DATA_DATA
-		CONSTRUCTORS
-	}
-
-	_edata = .;			/* End of data section */
-
-	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	__init_begin = .;
-	_sinittext = .;
-	.init.text : C_PHYS(.init.text) { INIT_TEXT }
-	_einittext = .;
-	.init.data : C_PHYS(.init.data) { INIT_DATA }
-	. = ALIGN(L1_CACHE_BYTES);	/* Better if Cache Line aligned */
-	__setup_start = .;
-	.init.setup : C_PHYS(.init.setup) { *(.init.setup) }
-	__setup_end = .;
-	__initcall_start = .;
-	.initcall.init : C_PHYS(.initcall.init) {
-		INITCALLS
-	}
-	__initcall_end = .;
-	__con_initcall_start = .;
-	.con_initcall.init : C_PHYS(.con_initcall.init) {
-		*(.con_initcall.init)
-	}
-	__con_initcall_end = .;
-
-	SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(PAGE_SIZE);
-	__initramfs_start = .;
-	.init.ramfs : C_PHYS(.init.ramfs) { *(.init.ramfs) }
-	__initramfs_end = .;
-#endif
-
-	. = ALIGN(8);
-	__machvec_start = .;
-	.machvec.init : C_PHYS(.machvec.init) { *(.machvec.init) }
-	__machvec_end = .;
-
-	PERCPU(PAGE_SIZE)
-
-	/*
-	 * .exit.text is discarded at runtime, not link time, to deal with
-	 * references from __bug_table
-	 */
-	.exit.text : C_PHYS(.exit.text) { EXIT_TEXT }
-	.exit.data : C_PHYS(.exit.data) { EXIT_DATA }
-
-	. = ALIGN(PAGE_SIZE);
-	.bss : C_PHYS(.bss) {
-		__init_end = .;
-		__bss_start = .;		/* BSS */
-		*(.bss.page_aligned)
-		*(.bss)
-		*(COMMON)
-		. = ALIGN(4);
-		_ebss = .;			/* uClinux MTD sucks */
-		_end = . ;
-	}
-
-	/*
-	 * When something in the kernel is NOT compiled as a module, the
-	 * module cleanup code and data are put into these segments. Both
-	 * can then be thrown away, as cleanup code is never called unless
-	 * it's a module.
-	 */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-}
-- 
cgit v0.10.2


From 7b022d07a0fd2ce02d4456b732c674ff1d16f5ce Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 00:25:08 +0900
Subject: sh: Tidy up the ldscript output format specifier.

Tie this in to the Makefile directly, where we already know what we are
running on. This tidies up the linker script a bit, and is prep work for
unifying the arch/sh/boot/compressed linker scripts.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 68348e7..b941dc9 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -100,13 +100,17 @@ LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
 endif
 
 ifdef CONFIG_CPU_LITTLE_ENDIAN
-LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64'
+ld-bfd			:= elf32-$(UTS_MACHINE)-linux
+LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64' --oformat $(ld-bfd)
 LDFLAGS			+= -EL
 else
-LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64+4'
+ld-bfd			:= elf32-$(UTS_MACHINE)big-linux
+LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64+4' --oformat $(ld-bfd)
 LDFLAGS			+= -EB
 endif
 
+export ld-bfd
+
 head-y			:= arch/sh/kernel/init_task.o
 head-$(CONFIG_SUPERH32)	+= arch/sh/kernel/head_32.o
 head-$(CONFIG_SUPERH64)	+= arch/sh/kernel/head_64.o
diff --git a/arch/sh/boot/compressed/Makefile_32 b/arch/sh/boot/compressed/Makefile_32
index b96a055..2492557 100644
--- a/arch/sh/boot/compressed/Makefile_32
+++ b/arch/sh/boot/compressed/Makefile_32
@@ -28,7 +28,8 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup -T $(obj)/../../kernel/vmlinux.lds
+LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
+		   -T $(obj)/../../kernel/vmlinux.lds
 
 $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
 	$(call if_changed,ld)
diff --git a/arch/sh/boot/compressed/Makefile_64 b/arch/sh/boot/compressed/Makefile_64
index 658d4f9..541a529 100644
--- a/arch/sh/boot/compressed/Makefile_64
+++ b/arch/sh/boot/compressed/Makefile_64
@@ -24,8 +24,8 @@ OBJECTS		:= $(obj)/vmlinux_64.lds $(obj)/head_64.o $(obj)/misc_64.o \
 ZIMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)+0x400000+0x10000]')
 
-LDFLAGS_vmlinux := -Ttext $(ZIMAGE_OFFSET) -e startup \
-		    -T $(obj)/../../kernel/vmlinux.lds
+LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(ZIMAGE_OFFSET) -e startup \
+		   -T $(obj)/../../kernel/vmlinux.lds
 
 $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 8d97f52..1a2c8db 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -6,12 +6,8 @@
 #define LOAD_OFFSET	CONFIG_PAGE_OFFSET
 OUTPUT_ARCH(sh:sh5)
 #else
+#define LOAD_OFFSET	0
 OUTPUT_ARCH(sh)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux")
-#else
-OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux")
-#endif
 #endif
 
 #include <asm/thread_info.h>
-- 
cgit v0.10.2


From 151ab22cf71b7a1b9dd696d65a1a41e13d90cd00 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 9 May 2009 16:22:58 +0100
Subject: ASoC: Fix up CODEC DAI formats for big endian CPUs

ASoC uses the standard ALSA data format definitions to specify the wire
format used between the CPU and CODEC. Since the ALSA data formats all
include the endianess of the data but this information is not relevant
by the time the data has been encoded onto the serial link to the CODEC
this means that either all the CODEC drivers need to declare both big and
little endian variants or the core needs to fix up the format constraints
specified by CODEC drivers.

For now take the latter approach - this will need to be revisited if any
CODECs are endianness dependant.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index af11791..6ac68e4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2387,6 +2387,39 @@ void snd_soc_unregister_platform(struct snd_soc_platform *platform)
 }
 EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
 
+static u64 codec_format_map[] = {
+	SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE,
+	SNDRV_PCM_FMTBIT_U16_LE | SNDRV_PCM_FMTBIT_U16_BE,
+	SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_BE,
+	SNDRV_PCM_FMTBIT_U24_LE | SNDRV_PCM_FMTBIT_U24_BE,
+	SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_S32_BE,
+	SNDRV_PCM_FMTBIT_U32_LE | SNDRV_PCM_FMTBIT_U32_BE,
+	SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_U24_3BE,
+	SNDRV_PCM_FMTBIT_U24_3LE | SNDRV_PCM_FMTBIT_U24_3BE,
+	SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S20_3BE,
+	SNDRV_PCM_FMTBIT_U20_3LE | SNDRV_PCM_FMTBIT_U20_3BE,
+	SNDRV_PCM_FMTBIT_S18_3LE | SNDRV_PCM_FMTBIT_S18_3BE,
+	SNDRV_PCM_FMTBIT_U18_3LE | SNDRV_PCM_FMTBIT_U18_3BE,
+	SNDRV_PCM_FMTBIT_FLOAT_LE | SNDRV_PCM_FMTBIT_FLOAT_BE,
+	SNDRV_PCM_FMTBIT_FLOAT64_LE | SNDRV_PCM_FMTBIT_FLOAT64_BE,
+	SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE
+	| SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE,
+};
+
+/* Fix up the DAI formats for endianness: codecs don't actually see
+ * the endianness of the data but we're using the CPU format
+ * definitions which do need to include endianness so we ensure that
+ * codec DAIs always have both big and little endian variants set.
+ */
+static void fixup_codec_formats(struct snd_soc_pcm_stream *stream)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(codec_format_map); i++)
+		if (stream->formats & codec_format_map[i])
+			stream->formats |= codec_format_map[i];
+}
+
 /**
  * snd_soc_register_codec - Register a codec with the ASoC core
  *
@@ -2394,6 +2427,8 @@ EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
  */
 int snd_soc_register_codec(struct snd_soc_codec *codec)
 {
+	int i;
+
 	if (!codec->name)
 		return -EINVAL;
 
@@ -2403,6 +2438,11 @@ int snd_soc_register_codec(struct snd_soc_codec *codec)
 
 	INIT_LIST_HEAD(&codec->list);
 
+	for (i = 0; i < codec->num_dai; i++) {
+		fixup_codec_formats(&codec->dai[i].playback);
+		fixup_codec_formats(&codec->dai[i].capture);
+	}
+
 	mutex_lock(&client_mutex);
 	list_add(&codec->list, &codec_list);
 	snd_soc_instantiate_cards();
-- 
cgit v0.10.2


From 20b27fa33743c6ef77a1248421fab51e8bf21a25 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 00:36:22 +0900
Subject: sh: Fix up the sh64 zImage build.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boot/compressed/Makefile_64 b/arch/sh/boot/compressed/Makefile_64
index 541a529..7093255 100644
--- a/arch/sh/boot/compressed/Makefile_64
+++ b/arch/sh/boot/compressed/Makefile_64
@@ -14,8 +14,7 @@
 targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
 		   head_64.o misc_64.o cache.o piggy.o
 
-OBJECTS		:= $(obj)/vmlinux_64.lds $(obj)/head_64.o $(obj)/misc_64.o \
-		   $(obj)/cache.o
+OBJECTS		:= $(obj)/head_64.o $(obj)/misc_64.o $(obj)/cache.o
 
 #
 # ZIMAGE_OFFSET is the load offset of the compression loader
diff --git a/arch/sh/boot/compressed/head_64.S b/arch/sh/boot/compressed/head_64.S
index 622eac3..9993113 100644
--- a/arch/sh/boot/compressed/head_64.S
+++ b/arch/sh/boot/compressed/head_64.S
@@ -14,6 +14,7 @@
  *   Copyright (C) 2002 Stuart Menefy (stuart.menefy@st.com)
  */
 #include <asm/cache.h>
+#include <asm/tlb.h>
 #include <cpu/mmu_context.h>
 #include <cpu/registers.h>
 
@@ -33,11 +34,7 @@
 #define	ICCR0_INIT_VAL	ICCR0_ON | ICCR0_ICI		/* ICE + ICI */
 #define	ICCR1_INIT_VAL	ICCR1_NOLOCK			/* No locking */
 
-#if 1
 #define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WB	/* OCE + OCI + WB */
-#else
-#define	OCCR0_INIT_VAL	OCCR0_OFF
-#endif
 #define	OCCR1_INIT_VAL	OCCR1_NOLOCK			/* No locking */
 
 	.text
diff --git a/arch/sh/boot/compressed/vmlinux_64.lds b/arch/sh/boot/compressed/vmlinux_64.lds
deleted file mode 100644
index 59c2ef4..0000000
--- a/arch/sh/boot/compressed/vmlinux_64.lds
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * ld script to make compressed SuperH/shmedia Linux kernel+decompression
- *		bootstrap
- * Modified by Stuart Menefy from arch/sh/vmlinux.lds.S written by Niibe Yutaka
- */
-
-
-#ifdef CONFIG_LITTLE_ENDIAN
-/* OUTPUT_FORMAT("elf32-sh64l-linux", "elf32-sh64l-linux", "elf32-sh64l-linux") */
-#define NOP 0x6ff0fff0
-#else
-/* OUTPUT_FORMAT("elf32-sh64", "elf32-sh64", "elf32-sh64") */
-#define NOP 0xf0fff06f
-#endif
-
-OUTPUT_FORMAT("elf32-sh64-linux")
-OUTPUT_ARCH(sh)
-ENTRY(_start)
-
-#define ALIGNED_GAP(section, align) (((ADDR(section)+SIZEOF(section)+(align)-1) & ~((align)-1))-ADDR(section))
-#define FOLLOWING(section, align) AT (LOADADDR(section) + ALIGNED_GAP(section,align))
-
-SECTIONS
-{
-  _text = .;			/* Text and read-only data */
-
-  .text : {
-	*(.text)
-	*(.text64)
-	*(.text..SHmedia32)
-	*(.fixup)
-	*(.gnu.warning)
-	} = NOP
-  . = ALIGN(4);
-  .rodata : { *(.rodata) }
-
-  /* There is no 'real' reason for eight byte alignment, four would work
-   * as well, but gdb downloads much (*4) faster with this.
-   */
-  . = ALIGN(8);
-  .image : { *(.image) }
-  . = ALIGN(4);
-  _etext = .;			/* End of text section */
-
-  .data :			/* Data */
-	FOLLOWING(.image, 4)
-	{
-	_data = .;
-	*(.data)
-	}
-  _data_image = LOADADDR(.data);/* Address of data section in ROM */
-
-  _edata = .;			/* End of data section */
-
-  .stack : { stack = .;  _stack = .; }
-
-  . = ALIGN(4);
-  __bss_start = .;		/* BSS */
-  .bss : {
-	*(.bss)
-	}
-  . = ALIGN(4);
-  _end = . ;
-}
-- 
cgit v0.10.2


From a2e76c80d93ec3c59a030b6ca37b9087033565c1 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 00:54:39 +0900
Subject: sh: Provide a tighter BOOT_LINK_OFFSET definition for the Cayman
 board.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6ed16c4..bca6a2a 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -704,6 +704,7 @@ config ZERO_PAGE_OFFSET
 
 config BOOT_LINK_OFFSET
 	hex "Link address offset for booting"
+	default "0x00400000" if SH_CAYMAN
 	default "0x00800000"
 	help
 	  This option allows you to set the link address offset of the zImage.
-- 
cgit v0.10.2


From b20883562455060272126c36563a7d8edafc30d3 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 00:55:45 +0900
Subject: sh: Provide a BITS definition, use it in the arch/sh/boot/ Makefiles.

This introduces a BITS export that can handily be picked up by Makefiles
for cleaner sharing. Reflect its use in arch/sh/boot/compressed/ in
preparation for unifying the Makefiles.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index b941dc9..c1bbae1 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -91,9 +91,11 @@ KBUILD_IMAGE		:= $(defaultimage-y)
 #
 ifdef CONFIG_SUPERH32
 UTS_MACHINE	:= sh
+BITS		:= 32
 LDFLAGS_vmlinux	+= -e _stext
 else
 UTS_MACHINE	:= sh64
+BITS		:= 64
 LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
 		   --defsym phys_stext_shmedia=phys_stext+1 \
 		   -e phys_stext_shmedia
@@ -109,11 +111,9 @@ LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64+4' --oformat $(ld-bfd)
 LDFLAGS			+= -EB
 endif
 
-export ld-bfd
+export ld-bfd BITS
 
-head-y			:= arch/sh/kernel/init_task.o
-head-$(CONFIG_SUPERH32)	+= arch/sh/kernel/head_32.o
-head-$(CONFIG_SUPERH64)	+= arch/sh/kernel/head_64.o
+head-y	:= arch/sh/kernel/init_task.o arch/sh/kernel/head_$(BITS).o
 
 core-y				+= arch/sh/kernel/ arch/sh/mm/ arch/sh/boards/
 core-$(CONFIG_SH_FPU_EMU)	+= arch/sh/math-emu/
diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index 95483d1..78efb04 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -20,9 +20,6 @@ CONFIG_BOOT_LINK_OFFSET	?= 0x00800000
 CONFIG_ZERO_PAGE_OFFSET	?= 0x00001000
 CONFIG_ENTRY_OFFSET	?= 0x00001000
 
-export CONFIG_PAGE_OFFSET CONFIG_MEMORY_START CONFIG_BOOT_LINK_OFFSET \
-       CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET
-
 targets := zImage vmlinux.srec uImage uImage.srec
 subdir- := compressed
 
@@ -43,6 +40,9 @@ KERNEL_MEMORY := $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_MEMORY_START)]')
 endif
 
+export CONFIG_PAGE_OFFSET CONFIG_MEMORY_START CONFIG_BOOT_LINK_OFFSET \
+       CONFIG_ZERO_PAGE_OFFSET CONFIG_ENTRY_OFFSET KERNEL_MEMORY
+
 KERNEL_LOAD	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
 			$(KERNEL_MEMORY) + \
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index efb01dc..f0a71df 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -1,5 +1 @@
-ifeq ($(CONFIG_SUPERH32),y)
 include ${srctree}/arch/sh/boot/compressed/Makefile_32
-else
-include ${srctree}/arch/sh/boot/compressed/Makefile_64
-endif
diff --git a/arch/sh/boot/compressed/Makefile_32 b/arch/sh/boot/compressed/Makefile_32
index 2492557..9531bf1 100644
--- a/arch/sh/boot/compressed/Makefile_32
+++ b/arch/sh/boot/compressed/Makefile_32
@@ -5,9 +5,9 @@
 #
 
 targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
-		   head_32.o misc_32.o piggy.o
+		   head_$(BITS).o misc_$(BITS).o piggy.o
 
-OBJECTS = $(obj)/head_32.o $(obj)/misc_32.o
+OBJECTS = $(obj)/head_$(BITS).o $(obj)/misc_$(BITS).o $(obj)/cache.o
 
 ifdef CONFIG_SH_STANDARD_BIOS
 OBJECTS += $(obj)/../../kernel/sh_bios.o
@@ -18,7 +18,7 @@ endif
 #
 IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_PAGE_OFFSET)  + \
-			$(CONFIG_MEMORY_START) + \
+			$(KERNEL_MEMORY) + \
 			$(CONFIG_BOOT_LINK_OFFSET)]')
 
 LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-- 
cgit v0.10.2


From 1eca133cc9f978a8c44788fc5b2fe54219f9425c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 00:58:21 +0900
Subject: sh: Merge the split arch/sh/boot/compressed/ Makefiles.

This kills off the _64 variant and moves the _32 one over as the generic
one to use.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index f0a71df..9531bf1 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -1 +1,47 @@
-include ${srctree}/arch/sh/boot/compressed/Makefile_32
+#
+# linux/arch/sh/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
+		   head_$(BITS).o misc_$(BITS).o piggy.o
+
+OBJECTS = $(obj)/head_$(BITS).o $(obj)/misc_$(BITS).o $(obj)/cache.o
+
+ifdef CONFIG_SH_STANDARD_BIOS
+OBJECTS += $(obj)/../../kernel/sh_bios.o
+endif
+
+#
+# IMAGE_OFFSET is the load offset of the compression loader
+#
+IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
+		     $$[$(CONFIG_PAGE_OFFSET)  + \
+			$(KERNEL_MEMORY) + \
+			$(CONFIG_BOOT_LINK_OFFSET)]')
+
+LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
+endif
+
+LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
+		   -T $(obj)/../../kernel/vmlinux.lds
+
+$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
+	$(call if_changed,ld)
+	@:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+OBJCOPYFLAGS += -R .empty_zero_page
+
+$(obj)/piggy.o: $(obj)/piggy.S $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,as_o_S)
diff --git a/arch/sh/boot/compressed/Makefile_32 b/arch/sh/boot/compressed/Makefile_32
deleted file mode 100644
index 9531bf1..0000000
--- a/arch/sh/boot/compressed/Makefile_32
+++ /dev/null
@@ -1,47 +0,0 @@
-#
-# linux/arch/sh/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
-		   head_$(BITS).o misc_$(BITS).o piggy.o
-
-OBJECTS = $(obj)/head_$(BITS).o $(obj)/misc_$(BITS).o $(obj)/cache.o
-
-ifdef CONFIG_SH_STANDARD_BIOS
-OBJECTS += $(obj)/../../kernel/sh_bios.o
-endif
-
-#
-# IMAGE_OFFSET is the load offset of the compression loader
-#
-IMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
-		     $$[$(CONFIG_PAGE_OFFSET)  + \
-			$(KERNEL_MEMORY) + \
-			$(CONFIG_BOOT_LINK_OFFSET)]')
-
-LIBGCC	:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-
-ifeq ($(CONFIG_FUNCTION_TRACER),y)
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
-endif
-
-LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
-		   -T $(obj)/../../kernel/vmlinux.lds
-
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-
-OBJCOPYFLAGS += -R .empty_zero_page
-
-$(obj)/piggy.o: $(obj)/piggy.S $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,as_o_S)
diff --git a/arch/sh/boot/compressed/Makefile_64 b/arch/sh/boot/compressed/Makefile_64
deleted file mode 100644
index 7093255..0000000
--- a/arch/sh/boot/compressed/Makefile_64
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# arch/sh/boot/compressed/Makefile_64
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-# Copyright (C) 2002 Stuart Menefy
-# Copyright (C) 2004 Paul Mundt
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
-		   head_64.o misc_64.o cache.o piggy.o
-
-OBJECTS		:= $(obj)/head_64.o $(obj)/misc_64.o $(obj)/cache.o
-
-#
-# ZIMAGE_OFFSET is the load offset of the compression loader
-# (4M for the kernel plus 64K for this loader)
-#
-ZIMAGE_OFFSET	:= $(shell /bin/bash -c 'printf "0x%08x" \
-		     $$[$(CONFIG_PAGE_OFFSET)+0x400000+0x10000]')
-
-LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(ZIMAGE_OFFSET) -e startup \
-		   -T $(obj)/../../kernel/vmlinux.lds
-
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o FORCE
-	$(call if_changed,ld)
-	@:
-
-$(obj)/vmlinux.bin: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
-
-OBJCOPYFLAGS += -R .empty_zero_page
-
-$(obj)/piggy.o: $(obj)/piggy.S $(obj)/vmlinux.bin.gz FORCE
-	$(call if_changed,as_o_S)
-- 
cgit v0.10.2


From b412a49af970e703124fa3259bf24165c0c74024 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 01:23:25 +0900
Subject: sh: Consolidate the boot link and entry offset definitions.

Consolidate these in a single place in the Kconfig menus. At the same
time, disable their interactivity and set them according to the board
config defaults.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index bca6a2a..a9dee13 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -694,23 +694,37 @@ endmenu
 menu "Boot options"
 
 config ZERO_PAGE_OFFSET
-	hex "Zero page offset"
-	default "0x00004000" if SH_SH03
-	default "0x00010000" if PAGE_SIZE_64KB
+	hex
+	default "0x00010000" if PAGE_SIZE_64KB || SH_RTS7751R2D || \
+				SH_7751_SOLUTION_ENGINE
+	default "0x00004000" if PAGE_SIZE_16KB || SH_SH03
 	default "0x00002000" if PAGE_SIZE_8KB
 	default "0x00001000"
 	help
 	  This sets the default offset of zero page.
 
 config BOOT_LINK_OFFSET
-	hex "Link address offset for booting"
+	hex
+	default "0x00210000" if SH_SHMIN
 	default "0x00400000" if SH_CAYMAN
+	default "0x00810000" if SH_7780_SOLUTION_ENGINE
+	default "0x009e0000" if SH_TITAN
+	default "0x01800000" if SH_SDK7780
+	default "0x02000000" if SH_EDOSK7760
 	default "0x00800000"
 	help
 	  This option allows you to set the link address offset of the zImage.
 	  This can be useful if you are on a board which has a small amount of
 	  memory.
 
+config ENTRY_OFFSET
+	hex
+	default "0x00001000" if PAGE_SIZE_4KB
+	default "0x00002000" if PAGE_SIZE_8KB
+	default "0x00004000" if PAGE_SIZE_16KB
+	default "0x00010000" if PAGE_SIZE_64KB
+	default "0x00000000"
+
 config UBC_WAKEUP
 	bool "Wakeup UBC on startup"
 	depends on CPU_SH4 && !CPU_SH4A
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index d4079ca..b900d2cd 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -201,14 +201,6 @@ config PAGE_SIZE_64KB
 
 endchoice
 
-config ENTRY_OFFSET
-	hex
-	default "0x00001000" if PAGE_SIZE_4KB
-	default "0x00002000" if PAGE_SIZE_8KB
-	default "0x00004000" if PAGE_SIZE_16KB
-	default "0x00010000" if PAGE_SIZE_64KB
-	default "0x00000000"
-
 choice
 	prompt "HugeTLB page size"
 	depends on HUGETLB_PAGE && (CPU_SH4 || CPU_SH5) && MMU
-- 
cgit v0.10.2


From 457daa2b66e07bbd2280b9f8d2b03e800f357243 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 01:28:01 +0900
Subject: sh: Hook up cc-cross-prefix support.

This implements a simple case that just iterates through the common
cases, looking at UTS_MACHINE for hints.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index c1bbae1..a47fbd2 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -101,6 +101,12 @@ LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
 		   -e phys_stext_shmedia
 endif
 
+ifneq ($(SUBARCH),$(ARCH))
+  ifeq ($(CROSS_COMPILE),)
+    CROSS_COMPILE := $(call cc-cross-prefix, $(UTS_MACHINE)-linux-  $(UTS_MACHINE)-linux-gnu-  $(UTS_MACHINE)-unknown-linux-gnu-)
+  endif
+endif
+
 ifdef CONFIG_CPU_LITTLE_ENDIAN
 ld-bfd			:= elf32-$(UTS_MACHINE)-linux
 LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64' --oformat $(ld-bfd)
-- 
cgit v0.10.2


From 567bb8fd47624cb9f894c64ce9530d43d5862a71 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 10 May 2009 14:25:39 +0900
Subject: sh: Fix up R0 dependence in __arch_swab16/32.

There is nothing in these routines that inherently depends on R0 use.
Given that these routines are inlined, it is rather easy to blow up the
compiler by exhausting the spill class when performing a 64-bit swab.

This presently manifests itself as the following:

CC      fs/ocfs2/suballoc.o
fs/ocfs2/suballoc.c: In function 'ocfs2_reserve_suballoc_bits':
fs/ocfs2/suballoc.c:638: error: unrecognizable insn:
(insn 2793 1230 1231 103 arch/sh/include/asm/swab.h:33 (set (reg:HI 853)
        (subreg:HI (reg:SI 149 macl) 2)) -1 (expr_list:REG_DEAD (reg:SI 149 macl)
        (nil)))
fs/ocfs2/suballoc.c:638: internal compiler error: in extract_insn, at recog.c:1991

This patch switches over to using an arbitrarily assigned register instead.

While the same issue does not exist in the SH-5 case, there is likewise no harm
in having an alternate register used for the byterev/shari pair.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/swab.h b/arch/sh/include/asm/swab.h
index e693159..0e08fe5 100644
--- a/arch/sh/include/asm/swab.h
+++ b/arch/sh/include/asm/swab.h
@@ -14,15 +14,15 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 {
 	__asm__(
 #ifdef __SH5__
-		"byterev	%0, %0\n\t"
+		"byterev	%1, %0\n\t"
 		"shari		%0, 32, %0"
 #else
-		"swap.b		%0, %0\n\t"
+		"swap.b		%1, %0\n\t"
 		"swap.w		%0, %0\n\t"
 		"swap.b		%0, %0"
 #endif
 		: "=r" (x)
-		: "0" (x));
+		: "r" (x));
 
 	return x;
 }
@@ -32,13 +32,13 @@ static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
 {
 	__asm__(
 #ifdef __SH5__
-		"byterev	%0, %0\n\t"
+		"byterev	%1, %0\n\t"
 		"shari		%0, 32, %0"
 #else
-		"swap.b		%0, %0"
+		"swap.b		%1, %0"
 #endif
 		: "=r" (x)
-		:  "0" (x));
+		:  "r" (x));
 
 	return x;
 }
-- 
cgit v0.10.2


From 82afae6016b672acb90ceb8e773bba0bd977d2a3 Mon Sep 17 00:00:00 2001
From: Erdem Aktas <eaktas1@gmail.com>
Date: Sun, 10 May 2009 02:13:19 -0400
Subject: perf_counter tools: fix buffer overwrite problem for perf top command

There is a buffer overwrite problem in builtin-top.c line 526, When I
tried to use ./perf top command, it was giving memory corruption
problem.

[ Impact: fix 'perf top' crash ]

LKML-Reference: <3fee128b0905092313x608e65e0l7b1116d86914114f@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index cd6f61d..b1549dd 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -523,7 +523,7 @@ static int read_symbol(FILE *in, struct sym_entry *s)
 	if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
 		return 1;
 
-	s->sym = malloc(strlen(str));
+	s->sym = malloc(strlen(str)+1);
 	assert(s->sym);
 
 	strcpy((char *)s->sym, str);
-- 
cgit v0.10.2


From b74d446f1f337e3fe906169a3266cb65ffa4179e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sat, 9 May 2009 15:35:10 +0600
Subject: x86: Fix false positive section mismatch warnings in the apic code

[ Impact: reduce kernel image size a bit, annotate away warnings ]

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
[ modified and tested it ]
Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Cc: Marcin Slusarz <marcin.slusarz@gmail.com>
LKML-Reference: <b9df5fa10905090235s4bfd26a8o979f93809c9727ad@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 1c11b81..3029477 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -254,7 +254,7 @@ static int parse_unisys_oem(char *oemptr)
 }
 
 #ifdef CONFIG_ACPI
-static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
 {
 	struct acpi_table_header *header = NULL;
 	struct es7000_oem_table *table;
@@ -285,7 +285,7 @@ static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
 	return 0;
 }
 
-static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
 {
 	if (!oem_addr)
 		return;
@@ -306,7 +306,7 @@ static int es7000_check_dsdt(void)
 static int es7000_acpi_ret;
 
 /* Hook from generic ACPI tables.c */
-static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	unsigned long oem_addr = 0;
 	int check_dsdt;
@@ -717,7 +717,7 @@ struct apic apic_es7000_cluster = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 };
 
-struct apic apic_es7000 = {
+struct apic __refdata apic_es7000 = {
 
 	.name				= "es7000",
 	.probe				= probe_es7000,
-- 
cgit v0.10.2


From 5e751e992f3fb08ba35e1ca8095ec8fbf9eda523 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 May 2009 13:55:22 +0100
Subject: CRED: Rename cred_exec_mutex to reflect that it's a guard against
 ptrace

Rename cred_exec_mutex to reflect that it's a guard against foreign
intervention on a process's credential state, such as is made by ptrace().  The
attachment of a debugger to a process affects execve()'s calculation of the new
credential state - _and_ also setprocattr()'s calculation of that state.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/fs/compat.c b/fs/compat.c
index 681ed81..bb2a9b2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1488,7 +1488,7 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
 	if (retval < 0)
 		goto out_free;
 	current->in_execve = 1;
@@ -1550,7 +1550,7 @@ int compat_do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1573,7 +1573,7 @@ out_unmark:
 
 out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/fs/exec.c b/fs/exec.c
index 639177b..998e856 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1045,7 +1045,7 @@ void install_exec_creds(struct linux_binprm *bprm)
 	commit_creds(bprm->cred);
 	bprm->cred = NULL;
 
-	/* cred_exec_mutex must be held at least to this point to prevent
+	/* cred_guard_mutex must be held at least to this point to prevent
 	 * ptrace_attach() from altering our determination of the task's
 	 * credentials; any time after this it may be unlocked */
 
@@ -1055,7 +1055,7 @@ EXPORT_SYMBOL(install_exec_creds);
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold current->cred_exec_mutex to protect against
+ * - the caller must hold current->cred_guard_mutex to protect against
  *   PTRACE_ATTACH
  */
 int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1297,7 +1297,7 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
 	if (retval < 0)
 		goto out_free;
 	current->in_execve = 1;
@@ -1360,7 +1360,7 @@ int do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1383,7 +1383,7 @@ out_unmark:
 
 out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d..7f54ba9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -145,8 +145,8 @@ extern struct cred init_cred;
 	.group_leader	= &tsk,						\
 	.real_cred	= &init_cred,					\
 	.cred		= &init_cred,					\
-	.cred_exec_mutex =						\
-		 __MUTEX_INITIALIZER(tsk.cred_exec_mutex),		\
+	.cred_guard_mutex =						\
+		 __MUTEX_INITIALIZER(tsk.cred_guard_mutex),		\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3fa82b3..5932ace 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1247,7 +1247,9 @@ struct task_struct {
 					 * credentials (COW) */
 	const struct cred *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct mutex cred_exec_mutex;	/* execve vs ptrace cred calculation mutex */
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a03918..1bb4d7e 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -167,7 +167,7 @@ EXPORT_SYMBOL(prepare_creds);
 
 /*
  * Prepare credentials for current to perform an execve()
- * - The caller must hold current->cred_exec_mutex
+ * - The caller must hold current->cred_guard_mutex
  */
 struct cred *prepare_exec_creds(void)
 {
@@ -276,7 +276,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	struct cred *new;
 	int ret;
 
-	mutex_init(&p->cred_exec_mutex);
+	mutex_init(&p->cred_guard_mutex);
 
 	if (
 #ifdef CONFIG_KEYS
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0692ab5..27ac802 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -185,10 +185,11 @@ int ptrace_attach(struct task_struct *task)
 	if (same_thread_group(task, current))
 		goto out;
 
-	/* Protect exec's credential calculations against our interference;
-	 * SUID, SGID and LSM creds get determined differently under ptrace.
+	/* Protect the target's credential calculations against our
+	 * interference; SUID, SGID and LSM creds get determined differently
+	 * under ptrace.
 	 */
-	retval = mutex_lock_interruptible(&task->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&task->cred_guard_mutex);
 	if (retval  < 0)
 		goto out;
 
@@ -232,7 +233,7 @@ repeat:
 bad:
 	write_unlock_irqrestore(&tasklist_lock, flags);
 	task_unlock(task);
-	mutex_unlock(&task->cred_exec_mutex);
+	mutex_unlock(&task->cred_guard_mutex);
 out:
 	return retval;
 }
-- 
cgit v0.10.2


From 107db7c7dd137aeb7361b8c2606ac936c0be58ff Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 May 2009 13:55:27 +0100
Subject: CRED: Guard the setprocattr security hook against ptrace

Guard the setprocattr security hook against ptrace by taking the target task's
cred_guard_mutex around it.  The problem is that setprocattr() may otherwise
note the lack of a debugger, and then perform an action on that basis whilst
letting a debugger attach between the two points.  Holding cred_guard_mutex
across the test and the action prevents ptrace_attach() from doing that.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index fb45615..23342e1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2128,9 +2128,15 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	if (copy_from_user(page, buf, count))
 		goto out_free;
 
+	/* Guard against adverse ptrace interaction */
+	length = mutex_lock_interruptible(&task->cred_guard_mutex);
+	if (length < 0)
+		goto out_free;
+
 	length = security_setprocattr(task,
 				      (char*)file->f_path.dentry->d_name.name,
 				      (void*)page, count);
+	mutex_unlock(&task->cred_guard_mutex);
 out_free:
 	free_page((unsigned long) page);
 out:
-- 
cgit v0.10.2


From d9d674e50007362567edb5df65c05dd56a5964bf Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 11 May 2009 12:12:38 +0900
Subject: sh: Fix up typo in arch/sh/kernel/vmlinux.lds.S

.init_ramfs ought to be .init.ramfs, fix it up.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 1a2c8db..d737230 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -127,7 +127,7 @@ SECTIONS
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	. = ALIGN(PAGE_SIZE);
-	.init.ramfs : AT(ADDR(.init_ramfs) - LOAD_OFFSET) {
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
 		__initramfs_start = .;
 		*(.init.ramfs)
 		__initramfs_end = .;
-- 
cgit v0.10.2


From 780f98ff1fa9cfcab177f6b5ab09b11321f1e5c8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 11 May 2009 12:15:14 +0900
Subject: sh: Account for INITIAL_JIFFIES when using jiffies clocksource.

In the case where we fall back on the generic jiffies clocksource,
INITIAL_JIFFIES needs to be accounted for so that printk times aren't
completely skewed.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index e0aa769..a77838f 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -104,7 +104,7 @@ unsigned long long sched_clock(void)
 
 	/* jiffies based sched_clock if no clocksource is installed */
 	if (!clocksource_sh.rating)
-		return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+		return (jiffies_64 - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ);
 
 	cycles = clocksource_sh.read(&clocksource_sh);
 	return cyc2ns(&clocksource_sh, cycles);
-- 
cgit v0.10.2


From 01f2bd48d08c6bbde12d86b66c760612e33e49a9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 11 May 2009 08:12:43 +0200
Subject: ALSA: hda - Add missing models for Realtek codecs

Added the missing descriptions and the model names for Realtek codecs
to the documentation and the config table.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 36c9712..92c9d3a 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -36,6 +36,7 @@ ALC260
   acer		Acer TravelMate
   will		Will laptops (PB V7900)
   replacer	Replacer 672V
+  favorit100	Maxdata Favorit 100XS
   basic		fixed pin assignment (old default model)
   test		for testing/debugging purpose, almost all controls can
 		adjusted.  Appearing only when compiled with
@@ -85,10 +86,11 @@ ALC269
   eeepc-p703	ASUS Eeepc P703 P900A
   eeepc-p901	ASUS Eeepc P901 S101
   fujitsu	FSC Amilo
+  lifebook	Fujitsu Lifebook S6420
   auto		auto-config reading BIOS (default)
 
-ALC662/663
-==========
+ALC662/663/272
+==============
   3stack-dig	3-stack (2-channel) with SPDIF
   3stack-6ch	 3-stack (6-channel)
   3stack-6ch-dig 3-stack (6-channel) with SPDIF
@@ -107,6 +109,8 @@ ALC662/663
   asus-mode4	ASUS
   asus-mode5	ASUS
   asus-mode6	ASUS
+  dell		Dell with ALC272
+  dell-zm1	Dell ZM1 with ALC272
   auto		auto-config reading BIOS (default)
 
 ALC882/885
@@ -118,6 +122,7 @@ ALC882/885
   asus-a7j	ASUS A7J
   asus-a7m	ASUS A7M
   macpro	MacPro support
+  mb5		Macbook 5,1
   mbp3		Macbook Pro rev3
   imac24	iMac 24'' with jack detection
   w2jc		ASUS W2JC
@@ -150,6 +155,7 @@ ALC883/888
   fujitsu-pi2515 Fujitsu AMILO Pi2515
   fujitsu-xa3530 Fujitsu AMILO XA3530
   3stack-6ch-intel Intel DG33* boards
+  asus-p5q	ASUS P5Q-EM boards
   auto		auto-config reading BIOS (default)
 
 ALC861/660
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 34f6fb7..db89612 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -16184,6 +16184,8 @@ static const char *alc662_models[ALC662_MODEL_LAST] = {
 	[ALC663_ASUS_MODE4] = "asus-mode4",
 	[ALC663_ASUS_MODE5] = "asus-mode5",
 	[ALC663_ASUS_MODE6] = "asus-mode6",
+	[ALC272_DELL]		= "dell",
+	[ALC272_DELL_ZM1]	= "dell-zm1",
 	[ALC662_AUTO]		= "auto",
 };
 
-- 
cgit v0.10.2


From 45fbe3ee01b8e463b28c2751b5dcc0cbdc142d90 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 6 May 2009 08:06:44 -0700
Subject: x86, e820, pci: reserve extra free space near end of RAM

The point is to take all RAM resources we have, and
_after_ we've added all the resources we've seen in
the E820 tree, we then _also_ try to add fake reserved
entries for any "round up to X" at the end of the RAM
resources.

[ Impact: improve PCI mem-resource allocation robustness, protect "stolen RAM" ]

Reported-by: Yannick Roehlly <yannick.roehlly@free.fr>
Acked-by: Jesse Barnes <jesse.barnes@intel.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: yannick.roehlly@free.fr
LKML-Reference: <4A01A784.2050407@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0062813..a2335d9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1371,6 +1371,23 @@ void __init e820_reserve_resources(void)
 	}
 }
 
+/* How much should we pad RAM ending depending on where it is? */
+static unsigned long ram_alignment(resource_size_t pos)
+{
+	unsigned long mb = pos >> 20;
+
+	/* To 64kB in the first megabyte */
+	if (!mb)
+		return 64*1024;
+
+	/* To 1MB in the first 16MB */
+	if (mb < 16)
+		return 1024*1024;
+
+	/* To 32MB for anything above that */
+	return 32*1024*1024;
+}
+
 void __init e820_reserve_resources_late(void)
 {
 	int i;
@@ -1382,6 +1399,24 @@ void __init e820_reserve_resources_late(void)
 			insert_resource_expand_to_fit(&iomem_resource, res);
 		res++;
 	}
+
+	/*
+	 * Try to bump up RAM regions to reasonable boundaries to
+	 * avoid stolen RAM:
+	 */
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *entry = &e820_saved.map[i];
+		resource_size_t start, end;
+
+		if (entry->type != E820_RAM)
+			continue;
+		start = entry->addr + entry->size;
+		end = round_up(start, ram_alignment(start));
+		if (start == end)
+			continue;
+		reserve_region_with_split(&iomem_resource, start,
+						  end - 1, "RAM buffer");
+	}
 }
 
 char *__init default_machine_specific_memory_setup(void)
-- 
cgit v0.10.2


From 5d423ccd7ba4285f1084e91b26805e1d0ae978ed Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 08:07:52 -0700
Subject: x86/pci: remove rounding quirk from e820_setup_gap()

Now that the e820 code explicitly reserves 'potentially dangerous'
free physical memory address space to protect ACPI stolen RAM,
there's no need for the rounding quirk in the PCI allocator anymore.

Also, this quirk was open-ended iteration that could end up reserving
a lot of free space and potentially breaking drivers - such as the one
reported by Yannick Roehlly <yannick.roehlly@free.fr> where there's
a PCI device with a large memory resource.

So remove it.

[ Impact: make more of the PCI hole available for assigning pci devices ]

Reported-by: Yannick Roehlly <yannick.roehlly@free.fr>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Jesse Barnes <jesse.barnes@intel.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A01A7C8.5090701@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a2335d9..7271fa3 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
  */
 __init void e820_setup_gap(void)
 {
-	unsigned long gapstart, gapsize, round;
+	unsigned long gapstart, gapsize;
 	int found;
 
 	gapstart = 0x10000000;
@@ -635,14 +635,9 @@ __init void e820_setup_gap(void)
 #endif
 
 	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
+	 * e820_reserve_resources_late protect stolen RAM already
 	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
+	pci_mem_start = gapstart;
 
 	printk(KERN_INFO
 	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
-- 
cgit v0.10.2


From 53d6979ab6747e758207e8ac861b96d0da0d3332 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:35 +0900
Subject: nbd: don't clear rq->sector and nr_sectors unnecessarily

There's no reason to clear rq->sector and nr_sectors after calling
blk_rq_init().  They're guaranteed to be clear.  Drop unnecessary
clearing.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4d6de4f..a9ab8be 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -580,13 +580,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
 		blk_rq_init(NULL, &sreq);
 		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
-		/*
-		 * Set these to sane values in case server implementation
-		 * fails to check the request type first and also to keep
-		 * debugging output cleaner.
-		 */
-		sreq.sector = 0;
-		sreq.nr_sectors = 0;
 		if (!lo->sock)
 			return -EINVAL;
 		nbd_send_req(lo, &sreq);
-- 
cgit v0.10.2


From 9720aef2539c10e3a872e9a92beec225030d99db Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:36 +0900
Subject: ide-tape: don't initialize rq->sector for rw requests

rq->sector is set to the tape->first_frame but it's never actually
used and not even in the correct unit (512 byte sectors).  Don't set
it.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Borislav Petkov <petkovbb@gmail.com>
Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8dfc688..7149224 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -892,7 +892,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
-	rq->sector = tape->first_frame;
 
 	if (size) {
 		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
-- 
cgit v0.10.2


From c3a4d78c580de4edc9ef0f7c59812fb02ceb037f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:37 +0900
Subject: block: add rq->resid_len

rq->data_len served two purposes - the length of data buffer on issue
and the residual count on completion.  This duality creates some
headaches.

First of all, block layer and low level drivers can't really determine
what rq->data_len contains while a request is executing.  It could be
the total request length or it coulde be anything else one of the
lower layers is using to keep track of residual count.  This
complicates things because blk_rq_bytes() and thus
[__]blk_end_request_all() relies on rq->data_len for PC commands.
Drivers which want to report residual count should first cache the
total request length, update rq->data_len and then complete the
request with the cached data length.

Secondly, it makes requests default to reporting full residual count,
ie. reporting that no data transfer occurred.  The residual count is
an exception not the norm; however, the driver should clear
rq->data_len to zero to signify the normal cases while leaving it
alone means no data transfer occurred at all.  This reverse default
behavior complicates code unnecessarily and renders block PC on some
drivers (ide-tape/floppy) unuseable.

This patch adds rq->resid_len which is used only for residual count.

While at it, remove now unnecessasry blk_rq_bytes() caching in
ide_pc_intr() as rq->data_len is not changed anymore.

Boaz	: spotted missing conversion in osd
Sergei	: spotted too early conversion to blk_rq_bytes() in ide-tape

[ Impact: cleanup residual count handling, report 0 resid by default ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Mike Miller <mike.miller@hp.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Doug Gilbert <dgilbert@interlog.com>
Cc: Mike Miller <mike.miller@hp.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Darrick J. Wong <djwong@us.ibm.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/bsg.c b/block/bsg.c
index 206060e..2d746e3 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -445,14 +445,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 	}
 
 	if (rq->next_rq) {
-		hdr->dout_resid = rq->data_len;
-		hdr->din_resid = rq->next_rq->data_len;
+		hdr->dout_resid = rq->resid_len;
+		hdr->din_resid = rq->next_rq->resid_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = rq->data_len;
+		hdr->din_resid = rq->resid_len;
 	else
-		hdr->dout_resid = rq->data_len;
+		hdr->dout_resid = rq->resid_len;
 
 	/*
 	 * If the request generated a negative error number, return it
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 58cf456..a9670dd 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -230,7 +230,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 	hdr->info = 0;
 	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
 		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = rq->data_len;
+	hdr->resid = rq->resid_len;
 	hdr->sb_len_wr = 0;
 
 	if (rq->sense_len && hdr->sbp) {
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4d4d5e0..f22d493 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1299,7 +1299,6 @@ static void cciss_softirq_done(struct request *rq)
 {
 	CommandList_struct *cmd = rq->completion_data;
 	ctlr_info_t *h = hba[cmd->ctlr];
-	unsigned int nr_bytes;
 	unsigned long flags;
 	u64bit temp64;
 	int i, ddir;
@@ -1321,15 +1320,11 @@ static void cciss_softirq_done(struct request *rq)
 	printk("Done with %p\n", rq);
 #endif				/* CCISS_DEBUG */
 
-	/*
-	 * Store the full size and set the residual count for pc requests
-	 */
-	nr_bytes = blk_rq_bytes(rq);
+	/* set the residual count for pc requests */
 	if (blk_pc_request(rq))
-		rq->data_len = cmd->err_info->ResidualCnt;
+		rq->resid_len = cmd->err_info->ResidualCnt;
 
-	if (blk_end_request(rq, (rq->errors == 0) ? 0 : -EIO, nr_bytes))
-		BUG();
+	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, cmd, 1);
@@ -2691,7 +2686,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 			printk(KERN_WARNING "cciss: cmd %p has"
 			       " completed with data underrun "
 			       "reported\n", cmd);
-			cmd->rq->data_len = cmd->err_info->ResidualCnt;
+			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
 		}
 		break;
 	case CMD_DATA_OVERRUN:
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 689cd27..8c2cc71 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -783,10 +783,8 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	if (cmd->error == 0) {
 		if (blk_pc_request(rq)) {
-			if (cmd->act_len >= rq->data_len)
-				rq->data_len = 0;
-			else
-				rq->data_len -= cmd->act_len;
+			if (cmd->act_len < rq->data_len)
+				rq->resid_len = rq->data_len - cmd->act_len;
 			scsi_status = 0;
 		} else {
 			if (cmd->act_len != cmd->len) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index afe5a43..e4a02a0 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -367,7 +367,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
 		int uptodate, error;
-		unsigned int done;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  pc->xferred);
@@ -406,12 +405,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
 			dsc = 1;
 
-		/*
-		 * ->pc_callback() might change rq->data_len for
-		 * residual count, cache total length.
-		 */
-		done = blk_rq_bytes(rq);
-
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
 
@@ -431,7 +424,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			error = uptodate ? 0 : -EIO;
 		}
 
-		ide_complete_rq(drive, error, done);
+		ide_complete_rq(drive, error, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 6736287..8bbe222 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -519,7 +519,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
 
 		if (buffer)
-			*bufflen = rq->data_len;
+			*bufflen = rq->resid_len;
 
 		flags = rq->cmd_flags;
 		blk_put_request(rq);
@@ -707,11 +707,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
 out_end:
 	if (blk_pc_request(rq) && rc == 0) {
-		unsigned int dlen = rq->data_len;
-
-		rq->data_len = 0;
-
-		if (blk_end_request(rq, 0, dlen))
+		if (blk_end_request(rq, 0, rq->data_len))
 			BUG();
 
 		hwif->rq = NULL;
@@ -740,9 +736,10 @@ out_end:
 			nsectors = 1;
 
 		if (blk_fs_request(rq) == 0) {
-			rq->data_len -= (cmd->nbytes - cmd->nleft);
+			rq->resid_len = rq->data_len -
+				(cmd->nbytes - cmd->nleft);
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
-				rq->data_len += cmd->last_xfer_len;
+				rq->resid_len += cmd->last_xfer_len;
 		}
 
 		ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7149224..65c5b70 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -380,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->data_len -= blocks * tape->blk_size;
+		rq->resid_len = rq->data_len - blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
@@ -903,7 +903,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
 	/* calculate the number of transferred bytes and update buffer state */
-	size -= rq->data_len;
+	size -= rq->resid_len;
 	tape->cur = tape->buf;
 	if (cmd == REQ_IDETAPE_READ)
 		tape->valid = size;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index a9019f0..5d5f347 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1357,8 +1357,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
 		memcpy(req->sense, smprep, sizeof(*smprep));
 		req->sense_len = sizeof(*smprep);
-		req->data_len = 0;
-		rsp->data_len -= smprep->ResponseDataLength;
+		rsp->resid_len = rsp->data_len - smprep->ResponseDataLength;
 	} else {
 		printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n",
 		    ioc->name, __func__);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 3da02e4..6605ec9 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1936,12 +1936,8 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 			       bio_data(rsp->bio), rsp->data_len);
 	if (ret > 0) {
 		/* positive number is the untransferred residual */
-		rsp->data_len = ret;
-		req->data_len = 0;
+		rsp->resid_len = ret;
 		ret = 0;
-	} else if (ret == 0) {
-		rsp->data_len = 0;
-		req->data_len = 0;
 	}
 
 	return ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index d110a36..89952ed 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -134,7 +134,7 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 {
 	u8 *req_data = NULL, *resp_data = NULL, *buf;
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
-	int error = -EINVAL, resp_data_len = rsp->data_len;
+	int error = -EINVAL;
 
 	/* eight is the minimum size for request and response frames */
 	if (req->data_len < 8 || rsp->data_len < 8)
@@ -176,17 +176,20 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	resp_data[1] = req_data[1];
 	resp_data[2] = SMP_RESP_FUNC_UNK;
 
+	req->resid_len = req->data_len;
+	rsp->resid_len = rsp->data_len;
+
 	switch (req_data[1]) {
 	case SMP_REPORT_GENERAL:
-		req->data_len -= 8;
-		resp_data_len -= 32;
+		req->resid_len -= 8;
+		rsp->resid_len -= 32;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		resp_data[9] = sas_ha->num_phys;
 		break;
 
 	case SMP_REPORT_MANUF_INFO:
-		req->data_len -= 8;
-		resp_data_len -= 64;
+		req->resid_len -= 8;
+		rsp->resid_len -= 64;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		memcpy(resp_data + 12, shost->hostt->name,
 		       SAS_EXPANDER_VENDOR_ID_LEN);
@@ -199,13 +202,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_DISCOVER:
-		req->data_len -= 16;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 16;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 56;
+		rsp->resid_len -= 56;
 		sas_host_smp_discover(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -215,13 +218,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_REPORT_PHY_SATA:
-		req->data_len -= 16;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 16;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 60;
+		rsp->resid_len -= 60;
 		sas_report_phy_sata(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -238,13 +241,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_PHY_CONTROL:
-		req->data_len -= 44;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 44;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 8;
+		rsp->resid_len -= 8;
 		sas_phy_control(sas_ha, req_data[9], req_data[10],
 				req_data[32] >> 4, req_data[33] >> 4,
 				resp_data);
@@ -265,7 +268,6 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	flush_kernel_dcache_page(bio_page(rsp->bio));
 	kunmap_atomic(buf - bio_offset(rsp->bio), KM_USER0);
 	local_irq_enable();
-	rsp->data_len = resp_data_len;
 
  out:
 	kfree(req_data);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index e03dc0b..53759c5 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1170,9 +1170,7 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
 		memcpy(req->sense, mpi_reply, sizeof(*mpi_reply));
 		req->sense_len = sizeof(*mpi_reply);
-		req->data_len = 0;
-		rsp->data_len -= mpi_reply->ResponseDataLength;
-
+		rsp->resid_len = rsp->data_len - mpi_reply->ResponseDataLength;
 	} else {
 		dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT
 		    "%s - no reply\n", ioc->name, __func__));
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aa9fc57..7d49ef5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -240,11 +240,11 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	 * is invalid.  Prevent the garbage from being misinterpreted
 	 * and prevent security leaks by zeroing out the excess data.
 	 */
-	if (unlikely(req->data_len > 0 && req->data_len <= bufflen))
-		memset(buffer + (bufflen - req->data_len), 0, req->data_len);
+	if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen))
+		memset(buffer + (bufflen - req->resid_len), 0, req->resid_len);
 
 	if (resid)
-		*resid = req->data_len;
+		*resid = req->resid_len;
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -549,7 +549,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 		int leftover = (req->hard_nr_sectors << 9);
 
 		if (blk_pc_request(req))
-			leftover = req->data_len;
+			leftover = req->resid_len;
 
 		/* kill remainder if no retrys */
 		if (error && scsi_noretry_cmd(cmd))
@@ -673,11 +673,11 @@ void scsi_release_buffers(struct scsi_cmnd *cmd)
 EXPORT_SYMBOL(scsi_release_buffers);
 
 /*
- * Bidi commands Must be complete as a whole, both sides at once.
- * If part of the bytes were written and lld returned
- * scsi_in()->resid and/or scsi_out()->resid this information will be left
- * in req->data_len and req->next_rq->data_len. The upper-layer driver can
- * decide what to do with this information.
+ * Bidi commands Must be complete as a whole, both sides at once.  If
+ * part of the bytes were written and lld returned scsi_in()->resid
+ * and/or scsi_out()->resid this information will be left in
+ * req->resid_len and req->next_rq->resid_len. The upper-layer driver
+ * can decide what to do with this information.
  */
 static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
 {
@@ -685,8 +685,8 @@ static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
 	unsigned int dlen = req->data_len;
 	unsigned int next_dlen = req->next_rq->data_len;
 
-	req->data_len = scsi_out(cmd)->resid;
-	req->next_rq->data_len = scsi_in(cmd)->resid;
+	req->resid_len = scsi_out(cmd)->resid;
+	req->next_rq->resid_len = scsi_in(cmd)->resid;
 
 	/* The req and req->next_rq have not been completed */
 	BUG_ON(blk_end_bidi_request(req, 0, dlen, next_dlen));
@@ -778,7 +778,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_end_bidi_request(cmd);
 			return;
 		}
-		req->data_len = scsi_get_resid(cmd);
+		req->resid_len = scsi_get_resid(cmd);
 	}
 
 	BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82312df..dec4c70 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1260,7 +1260,7 @@ static void sg_rq_end_io(struct request *rq, int uptodate)
 
 	sense = rq->sense;
 	result = rq->errors;
-	resid = rq->data_len;
+	resid = rq->resid_len;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n",
 		sdp->disk->disk_name, srp->header.pack_id, result));
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index eb24efe..8681b7083 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -463,7 +463,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate)
 	struct scsi_tape *STp = SRpnt->stp;
 
 	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
-	STp->buffer->cmdstat.residual = req->data_len;
+	STp->buffer->cmdstat.residual = req->resid_len;
 
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b249ae9..06ca926 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -50,10 +50,10 @@ int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
 
 	/* FIXME: should be include in osd_sense_info */
 	if (in_resid)
-		*in_resid = or->in.req ? or->in.req->data_len : 0;
+		*in_resid = or->in.req ? or->in.req->resid_len : 0;
 
 	if (out_resid)
-		*out_resid = or->out.req ? or->out.req->data_len : 0;
+		*out_resid = or->out.req ? or->out.req->resid_len : 0;
 
 	return ret;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3a5b1bd..6a967ca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -229,6 +229,7 @@ struct request {
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
+	unsigned int resid_len;	/* residual count */
 	void *sense;
 
 	unsigned long deadline;
-- 
cgit v0.10.2


From 5b93629b4509c03ffa87a9316412fedf6f58cb37 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:38 +0900
Subject: block: implement blk_rq_pos/[cur_]sectors() and convert obvious ones

Implement accessors - blk_rq_pos(), blk_rq_sectors() and
blk_rq_cur_sectors() which return rq->hard_sector, rq->hard_nr_sectors
and rq->hard_cur_sectors respectively and convert direct references of
the said fields to the accessors.

This is in preparation of request data length handling cleanup.

Geert	: suggested adding const to struct request * parameter to accessors
Sergei	: spotted error in patch description

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
Tested-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Ackec-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index c8d0876..c167de5 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -163,7 +163,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	 * For an empty barrier, there's no actual BAR request, which
 	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
 	 */
-	if (!rq->hard_nr_sectors) {
+	if (!blk_rq_sectors(rq)) {
 		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 				QUEUE_ORDERED_DO_POSTFLUSH);
 		/*
diff --git a/block/blk-core.c b/block/blk-core.c
index 394c5bd..895e55b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1683,7 +1683,7 @@ static void blk_account_io_done(struct request *req)
 unsigned int blk_rq_bytes(struct request *rq)
 {
 	if (blk_fs_request(rq))
-		return rq->hard_nr_sectors << 9;
+		return blk_rq_sectors(rq) << 9;
 
 	return rq->data_len;
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index def0c69..575083a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -760,7 +760,7 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
 						cfqd->rq_in_driver);
 
-	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
+	cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
 }
 
 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index f6586e4..c238867 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -136,7 +136,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	dev_dbg(&dev->sbd.core,
 		"%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n",
 		__func__, __LINE__, op, n, req->nr_sectors,
-		req->hard_nr_sectors);
+		blk_rq_sectors(req));
 #endif
 
 	start_sector = req->sector * priv->blocking_factor;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index ecccf65..e821eed 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -368,12 +368,12 @@ static void do_viodasd_request(struct request_queue *q)
 		blkdev_dequeue_request(req);
 		/* check that request contains a valid command */
 		if (!blk_fs_request(req)) {
-			viodasd_end_request(req, -EIO, req->hard_nr_sectors);
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 			continue;
 		}
 		/* Try sending the request */
 		if (send_request(req) != 0)
-			viodasd_end_request(req, -EIO, req->hard_nr_sectors);
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 	}
 }
 
@@ -590,7 +590,7 @@ static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
 		err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
 		printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n",
 				event->xRc, bevent->sub_result, err->msg);
-		num_sect = req->hard_nr_sectors;
+		num_sect = blk_rq_sectors(req);
 	}
 	qlock = req->q->queue_lock;
 	spin_lock_irqsave(qlock, irq_flags);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index b1e1d7e..5722931 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -645,8 +645,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
-			"request: sec=%llx hcnt=%lx, ccnt=%x, dir=%i\n",
-			(unsigned long long) req->sector, req->hard_nr_sectors,
+			"request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
+			(unsigned long long) req->sector, blk_rq_sectors(req),
 			req->current_nr_sectors, rq_data_dir(req));
 
 		ace->req = req;
@@ -654,7 +654,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR;
 		ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF);
 
-		count = req->hard_nr_sectors;
+		count = blk_rq_sectors(req);
 		if (rq_data_dir(req)) {
 			/* Kick off write request */
 			dev_dbg(ace->dev, "write data\n");
@@ -719,8 +719,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		/* bio finished; is there another one? */
 		if (__blk_end_request(ace->req, 0,
 					blk_rq_cur_bytes(ace->req))) {
-			/* dev_dbg(ace->dev, "next block; h=%li c=%i\n",
-			 *      ace->req->hard_nr_sectors,
+			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
+			 *      blk_rq_sectors(ace->req),
 			 *      ace->req->current_nr_sectors);
 			 */
 			ace->data_ptr = ace->req->buffer;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 8bbe222..182320d 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -730,7 +730,7 @@ out_end:
 		if (blk_pc_request(rq))
 			nsectors = (rq->data_len + 511) >> 9;
 		else
-			nsectors = rq->hard_nr_sectors;
+			nsectors = blk_rq_sectors(rq);
 
 		if (nsectors == 0)
 			nsectors = 1;
@@ -875,7 +875,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 
 	return ide_issue_pc(drive, &cmd);
 out_end:
-	nsectors = rq->hard_nr_sectors;
+	nsectors = blk_rq_sectors(rq);
 
 	if (nsectors == 0)
 		nsectors = 1;
@@ -1359,8 +1359,8 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 {
 	int hard_sect = queue_hardsect_size(q);
-	long block = (long)rq->hard_sector / (hard_sect >> 9);
-	unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9);
+	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
+	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
 
 	memset(rq->cmd, 0, BLK_MAX_CDB);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a0309ea..df23bcb 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -118,7 +118,7 @@ unsigned int ide_rq_bytes(struct request *rq)
 	if (blk_pc_request(rq))
 		return rq->data_len;
 	else
-		return rq->hard_cur_sectors << 9;
+		return blk_rq_cur_sectors(rq) << 9;
 }
 EXPORT_SYMBOL_GPL(ide_rq_bytes);
 
@@ -133,7 +133,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 	 * and complete the whole request right now
 	 */
 	if (blk_noretry_request(rq) && error <= 0)
-		nr_bytes = rq->hard_nr_sectors << 9;
+		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
 	if (rc == 0)
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 221317e..56e60f0 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -427,7 +427,7 @@ static void i2o_block_end_request(struct request *req, int error,
 	unsigned long flags;
 
 	if (blk_end_request(req, error, nr_bytes)) {
-		int leftover = (req->hard_nr_sectors << KERNEL_SECTOR_SHIFT);
+		int leftover = (blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
 
 		if (blk_pc_request(req))
 			leftover = req->data_len;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7d49ef5..9ff0ca9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -546,7 +546,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 	 * to queue the remainder of them.
 	 */
 	if (blk_end_request(req, error, bytes)) {
-		int leftover = (req->hard_nr_sectors << 9);
+		int leftover = blk_rq_sectors(req) << 9;
 
 		if (blk_pc_request(req))
 			leftover = req->resid_len;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6a967ca..4e5f855 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -832,13 +832,30 @@ static inline void blk_run_address_space(struct address_space *mapping)
 extern void blkdev_dequeue_request(struct request *req);
 
 /*
- * blk_end_request() takes bytes instead of sectors as a complete size.
- * blk_rq_bytes() returns bytes left to complete in the entire request.
- * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
+ * blk_rq_pos()		: the current sector
+ * blk_rq_bytes()	: bytes left in the entire request
+ * blk_rq_cur_bytes()	: bytes left in the current segment
+ * blk_rq_sectors()	: sectors left in the entire request
+ * blk_rq_cur_sectors()	: sectors left in the current segment
  */
+static inline sector_t blk_rq_pos(const struct request *rq)
+{
+	return rq->hard_sector;
+}
+
 extern unsigned int blk_rq_bytes(struct request *rq);
 extern unsigned int blk_rq_cur_bytes(struct request *rq);
 
+static inline unsigned int blk_rq_sectors(const struct request *rq)
+{
+	return rq->hard_nr_sectors;
+}
+
+static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
+{
+	return rq->hard_cur_sectors;
+}
+
 /*
  * Request completion related functions.
  *
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d..42f1c11 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -646,7 +646,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 				rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
 				rw, what, rq->errors, 0, NULL);
 	}
 }
@@ -857,7 +857,7 @@ void blk_add_driver_data(struct request_queue *q,
 		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
 				rq->errors, len, data);
 	else
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
 				0, BLK_TA_DRV_DATA, rq->errors, len, data);
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
-- 
cgit v0.10.2


From 83096ebf1263b2c1ee5e653ba37d993d02e3eb7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:39 +0900
Subject: block: convert to pos and nr_sectors accessors

With recent cleanups, there is no place where low level driver
directly manipulates request fields.  This means that the 'hard'
request fields always equal the !hard fields.  Convert all
rq->sectors, nr_sectors and current_nr_sectors references to
accessors.

While at it, drop superflous blk_rq_pos() < 0 test in swim.c.

[ Impact: use pos and nr_sectors accessors ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Tested-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Acked-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Acked-by: Mike Miller <mike.miller@hp.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Paul Clements <paul.clements@steeleye.com>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Dario Ballabio <ballabio_dario@emc.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: unsik Kim <donari75@gmail.com>
Cc: Laurent Vivier <Laurent@lvivier.info>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 4330127..402ba8f 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1239,7 +1239,7 @@ static void do_ubd_request(struct request_queue *q)
 		}
 
 		req = dev->request;
-		sector = req->sector;
+		sector = blk_rq_pos(req);
 		while(dev->start_sg < dev->end_sg){
 			struct scatterlist *sg = &dev->sg[dev->start_sg];
 
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 45bd070..7a12cf6 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -306,8 +306,8 @@ as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
 	data_dir = rq_is_sync(rq1);
 
 	last = ad->last_sector[data_dir];
-	s1 = rq1->sector;
-	s2 = rq2->sector;
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
 
 	BUG_ON(data_dir != rq_is_sync(rq2));
 
@@ -566,13 +566,15 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 			as_update_thinktime(ad, aic, thinktime);
 
 			/* Calculate read -> read seek distance */
-			if (aic->last_request_pos < rq->sector)
-				seek_dist = rq->sector - aic->last_request_pos;
+			if (aic->last_request_pos < blk_rq_pos(rq))
+				seek_dist = blk_rq_pos(rq) -
+					    aic->last_request_pos;
 			else
-				seek_dist = aic->last_request_pos - rq->sector;
+				seek_dist = aic->last_request_pos -
+					    blk_rq_pos(rq);
 			as_update_seekdist(ad, aic, seek_dist);
 		}
-		aic->last_request_pos = rq->sector + rq->nr_sectors;
+		aic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 		set_bit(AS_TASK_IOSTARTED, &aic->state);
 		spin_unlock(&aic->lock);
 	}
@@ -587,7 +589,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
 {
 	unsigned long delay;	/* jiffies */
 	sector_t last = ad->last_sector[ad->batch_data_dir];
-	sector_t next = rq->sector;
+	sector_t next = blk_rq_pos(rq);
 	sector_t delta; /* acceptable close offset (in sectors) */
 	sector_t s;
 
@@ -981,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 	 * This has to be set in order to be correctly updated by
 	 * as_find_next_rq
 	 */
-	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
+	ad->last_sector[data_dir] = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (data_dir == BLK_RW_SYNC) {
 		struct io_context *ioc = RQ_IOC(rq);
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index c167de5..8713c2f 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -324,7 +324,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 	/*
 	 * The driver must store the error location in ->bi_sector, if
 	 * it supports it. For non-stacked drivers, this should be copied
-	 * from rq->sector.
+	 * from blk_rq_pos(rq).
 	 */
 	if (error_sector)
 		*error_sector = bio->bi_sector;
diff --git a/block/blk-core.c b/block/blk-core.c
index 895e55b..82dc206 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -72,7 +72,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		return;
 
 	cpu = part_stat_lock();
-	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 
 	if (!new_io)
 		part_stat_inc(cpu, part, merges[rw]);
@@ -185,10 +185,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 
-	printk(KERN_INFO "  sector %llu, nr/cnr %lu/%u\n",
-						(unsigned long long)rq->sector,
-						rq->nr_sectors,
-						rq->current_nr_sectors);
+	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
+	       (unsigned long long)blk_rq_pos(rq),
+	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 						rq->bio, rq->biotail,
 						rq->buffer, rq->data_len);
@@ -1557,7 +1556,7 @@ EXPORT_SYMBOL(submit_bio);
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
-	if (rq->nr_sectors > q->max_sectors ||
+	if (blk_rq_sectors(rq) > q->max_sectors ||
 	    rq->data_len > q->max_hw_sectors << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
@@ -1645,7 +1644,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
@@ -1665,7 +1664,7 @@ static void blk_account_io_done(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
@@ -1846,7 +1845,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
 		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
-				(unsigned long long)req->sector);
+				(unsigned long long)blk_rq_pos(req));
 	}
 
 	blk_account_io_completion(req, nr_bytes);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 23d2a6f..bf62a87 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -259,7 +259,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 	else
 		max_sectors = q->max_sectors;
 
-	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -284,7 +284,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 		max_sectors = q->max_sectors;
 
 
-	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -315,7 +315,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	/*
 	 * Will it become too large?
 	 */
-	if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
+	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors)
 		return 0;
 
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -345,7 +345,7 @@ static void blk_account_io_merge(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part);
@@ -366,7 +366,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	/*
 	 * not contiguous
 	 */
-	if (req->sector + req->nr_sectors != next->sector)
+	if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
 		return 0;
 
 	if (rq_data_dir(req) != rq_data_dir(next)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 575083a..db4d990 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -349,8 +349,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
 	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
 		return rq2;
 
-	s1 = rq1->sector;
-	s2 = rq2->sector;
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
 
 	last = cfqd->last_position;
 
@@ -949,10 +949,10 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 					  struct request *rq)
 {
-	if (rq->sector >= cfqd->last_position)
-		return rq->sector - cfqd->last_position;
+	if (blk_rq_pos(rq) >= cfqd->last_position)
+		return blk_rq_pos(rq) - cfqd->last_position;
 	else
-		return cfqd->last_position - rq->sector;
+		return cfqd->last_position - blk_rq_pos(rq);
 }
 
 #define CIC_SEEK_THR	8 * 1024
@@ -1918,10 +1918,10 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
 
 	if (!cic->last_request_pos)
 		sdist = 0;
-	else if (cic->last_request_pos < rq->sector)
-		sdist = rq->sector - cic->last_request_pos;
+	else if (cic->last_request_pos < blk_rq_pos(rq))
+		sdist = blk_rq_pos(rq) - cic->last_request_pos;
 	else
-		sdist = cic->last_request_pos - rq->sector;
+		sdist = cic->last_request_pos - blk_rq_pos(rq);
 
 	/*
 	 * Don't allow the seek distance to get too large from the
@@ -2071,7 +2071,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	cfq_update_io_seektime(cfqd, cic, rq);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 
-	cic->last_request_pos = rq->sector + rq->nr_sectors;
+	cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (cfqq == cfqd->active_queue) {
 		/*
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c4d991d..b547cbc 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -138,7 +138,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
 
 		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
 		if (__rq) {
-			BUG_ON(sector != __rq->sector);
+			BUG_ON(sector != blk_rq_pos(__rq));
 
 			if (elv_rq_merge_ok(__rq, bio)) {
 				ret = ELEVATOR_FRONT_MERGE;
diff --git a/block/elevator.c b/block/elevator.c
index 1af5d9f..9189200 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -52,7 +52,7 @@ static const int elv_hash_shift = 6;
 #define ELV_HASH_FN(sec)	\
 		(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
 #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
+#define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 DEFINE_TRACE(block_rq_insert);
 DEFINE_TRACE(block_rq_issue);
@@ -119,9 +119,9 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio)
 	 * we can merge and sequence is ok, check if it's possible
 	 */
 	if (elv_rq_merge_ok(__rq, bio)) {
-		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
+		if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
 			ret = ELEVATOR_BACK_MERGE;
-		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
+		else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
 			ret = ELEVATOR_FRONT_MERGE;
 	}
 
@@ -370,9 +370,9 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
 		parent = *p;
 		__rq = rb_entry(parent, struct request, rb_node);
 
-		if (rq->sector < __rq->sector)
+		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
 			p = &(*p)->rb_left;
-		else if (rq->sector > __rq->sector)
+		else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
 			p = &(*p)->rb_right;
 		else
 			return __rq;
@@ -400,9 +400,9 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
 	while (n) {
 		rq = rb_entry(n, struct request, rb_node);
 
-		if (sector < rq->sector)
+		if (sector < blk_rq_pos(rq))
 			n = n->rb_left;
-		else if (sector > rq->sector)
+		else if (sector > blk_rq_pos(rq))
 			n = n->rb_right;
 		else
 			return rq;
@@ -441,14 +441,14 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 			break;
 		if (pos->cmd_flags & stop_flags)
 			break;
-		if (rq->sector >= boundary) {
-			if (pos->sector < boundary)
+		if (blk_rq_pos(rq) >= boundary) {
+			if (blk_rq_pos(pos) < boundary)
 				continue;
 		} else {
-			if (pos->sector >= boundary)
+			if (blk_rq_pos(pos) >= boundary)
 				break;
 		}
-		if (rq->sector >= pos->sector)
+		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
 			break;
 	}
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index f22ed6c..774ab05 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3338,8 +3338,8 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 	}
 	Command->Completion = Request->end_io_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
-	Command->BlockNumber = Request->sector;
-	Command->BlockCount = Request->nr_sectors;
+	Command->BlockNumber = blk_rq_pos(Request);
+	Command->BlockCount = blk_rq_sectors(Request);
 	Command->Request = Request;
 	blkdev_dequeue_request(Request);
 	Command->SegmentCount = blk_rq_map_sg(req_q,
@@ -3431,7 +3431,7 @@ static void DAC960_queue_partial_rw(DAC960_Command_T *Command)
    * successfully as possible.
    */
   Command->SegmentCount = 1;
-  Command->BlockNumber = Request->sector;
+  Command->BlockNumber = blk_rq_pos(Request);
   Command->BlockCount = 1;
   DAC960_QueueReadWriteCommand(Command);
   return;
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8ff95f2..e4a14b9 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1351,13 +1351,13 @@ static void redo_fd_request(void)
 	drive = floppy - unit;
 
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0; cnt < CURRENT->current_nr_sectors; cnt++) { 
+	for (cnt = 0; cnt < blk_rq_cur_sectors(CURRENT); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
-		       CURRENT->sector,cnt,
+		       blk_rq_pos(CURRENT), cnt,
 		       (rq_data_dir(CURRENT) == READ) ? "read" : "write");
 #endif
-		block = CURRENT->sector + cnt;
+		block = blk_rq_pos(CURRENT) + cnt;
 		if ((int)block > floppy->blocks) {
 			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 2506728..234024c 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -725,7 +725,7 @@ static void do_fd_action( int drive )
 	    if (IS_BUFFERED( drive, ReqSide, ReqTrack )) {
 		if (ReqCmd == READ) {
 		    copy_buffer( SECTOR_BUFFER(ReqSector), ReqData );
-		    if (++ReqCnt < CURRENT->current_nr_sectors) {
+		    if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) {
 			/* read next sector */
 			setup_req_params( drive );
 			goto repeat;
@@ -1130,7 +1130,7 @@ static void fd_rwsec_done1(int status)
 		}
 	}
   
-	if (++ReqCnt < CURRENT->current_nr_sectors) {
+	if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) {
 		/* read next sector */
 		setup_req_params( SelectedDrive );
 		do_fd_action( SelectedDrive );
@@ -1394,7 +1394,7 @@ static void redo_fd_request(void)
 
 	DPRINT(("redo_fd_request: CURRENT=%p dev=%s CURRENT->sector=%ld\n",
 		CURRENT, CURRENT ? CURRENT->rq_disk->disk_name : "",
-		CURRENT ? CURRENT->sector : 0 ));
+		CURRENT ? blk_rq_pos(CURRENT) : 0 ));
 
 	IsFormatting = 0;
 
@@ -1440,7 +1440,7 @@ repeat:
 		UD.autoprobe = 0;
 	}
 	
-	if (CURRENT->sector + 1 > UDT->blocks) {
+	if (blk_rq_pos(CURRENT) + 1 > UDT->blocks) {
 		__blk_end_request_cur(CURRENT, -EIO);
 		goto repeat;
 	}
@@ -1450,7 +1450,7 @@ repeat:
 		
 	ReqCnt = 0;
 	ReqCmd = rq_data_dir(CURRENT);
-	ReqBlock = CURRENT->sector;
+	ReqBlock = blk_rq_pos(CURRENT);
 	ReqBuffer = CURRENT->buffer;
 	setup_req_params( drive );
 	do_fd_action( drive );
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index f22d493..ab7b04c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2835,10 +2835,10 @@ static void do_cciss_request(struct request_queue *q)
 	c->Request.Timeout = 0;	// Don't time out
 	c->Request.CDB[0] =
 	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
-	start_blk = creq->sector;
+	start_blk = blk_rq_pos(creq);
 #ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector,
-	       (int)creq->nr_sectors);
+	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
+	       (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
 #endif				/* CCISS_DEBUG */
 
 	sg_init_table(tmp_sg, MAXSGENTRIES);
@@ -2864,8 +2864,8 @@ static void do_cciss_request(struct request_queue *q)
 		h->maxSG = seg;
 
 #ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss: Submitting %lu sectors in %d segments\n",
-	       creq->nr_sectors, seg);
+	printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n",
+	       blk_rq_sectors(creq), seg);
 #endif				/* CCISS_DEBUG */
 
 	c->Header.SGList = c->Header.SGTotal = seg;
@@ -2877,8 +2877,8 @@ static void do_cciss_request(struct request_queue *q)
 			c->Request.CDB[4] = (start_blk >> 8) & 0xff;
 			c->Request.CDB[5] = start_blk & 0xff;
 			c->Request.CDB[6] = 0;	// (sect >> 24) & 0xff; MSB
-			c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff;
-			c->Request.CDB[8] = creq->nr_sectors & 0xff;
+			c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
+			c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
 		} else {
 			u32 upper32 = upper_32_bits(start_blk);
@@ -2893,10 +2893,10 @@ static void do_cciss_request(struct request_queue *q)
 			c->Request.CDB[7]= (start_blk >> 16) & 0xff;
 			c->Request.CDB[8]= (start_blk >>  8) & 0xff;
 			c->Request.CDB[9]= start_blk & 0xff;
-			c->Request.CDB[10]= (creq->nr_sectors >>  24) & 0xff;
-			c->Request.CDB[11]= (creq->nr_sectors >>  16) & 0xff;
-			c->Request.CDB[12]= (creq->nr_sectors >>  8) & 0xff;
-			c->Request.CDB[13]= creq->nr_sectors & 0xff;
+			c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
+			c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
+			c->Request.CDB[12]= (blk_rq_sectors(creq) >>  8) & 0xff;
+			c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[14] = c->Request.CDB[15] = 0;
 		}
 	} else if (blk_pc_request(creq)) {
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 488a8f4..a5caeff 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -919,10 +919,11 @@ queue_next:
 	c->hdr.size = sizeof(rblk_t) >> 2;
 	c->size += sizeof(rblk_t);
 
-	c->req.hdr.blk = creq->sector;
+	c->req.hdr.blk = blk_rq_pos(creq);
 	c->rq = creq;
 DBGPX(
-	printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
+	printk("sector=%d, nr_sectors=%u\n",
+	       blk_rq_pos(creq), blk_rq_sectors(creq));
 );
 	sg_init_table(tmp_sg, SG_MAX);
 	seg = blk_rq_map_sg(q, creq, tmp_sg);
@@ -940,9 +941,9 @@ DBGPX(
 						 tmp_sg[i].offset,
 						 tmp_sg[i].length, dir);
 	}
-DBGPX(	printk("Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); );
+DBGPX(	printk("Submitting %u sectors in %d segments\n", blk_rq_sectors(creq), seg); );
 	c->req.hdr.sg_cnt = seg;
-	c->req.hdr.blk_cnt = creq->nr_sectors;
+	c->req.hdr.blk_cnt = blk_rq_sectors(creq);
 	c->req.hdr.cmd = (rq_data_dir(creq) == READ) ? IDA_READ : IDA_WRITE;
 	c->type = CMD_RWREQ;
 
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 1300df6..4524862 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2303,7 +2303,7 @@ static void floppy_end_request(struct request *req, int error)
 
 	/* current_count_sectors can be zero if transfer failed */
 	if (error)
-		nr_sectors = req->current_nr_sectors;
+		nr_sectors = blk_rq_cur_sectors(req);
 	if (__blk_end_request(req, error, nr_sectors << 9))
 		return;
 
@@ -2332,7 +2332,7 @@ static void request_done(int uptodate)
 	if (uptodate) {
 		/* maintain values for invalidation on geometry
 		 * change */
-		block = current_count_sectors + req->sector;
+		block = current_count_sectors + blk_rq_pos(req);
 		INFBOUND(DRS->maxblock, block);
 		if (block > _floppy->sect)
 			DRS->maxtrack = 1;
@@ -2346,10 +2346,10 @@ static void request_done(int uptodate)
 			/* record write error information */
 			DRWE->write_errors++;
 			if (DRWE->write_errors == 1) {
-				DRWE->first_error_sector = req->sector;
+				DRWE->first_error_sector = blk_rq_pos(req);
 				DRWE->first_error_generation = DRS->generation;
 			}
-			DRWE->last_error_sector = req->sector;
+			DRWE->last_error_sector = blk_rq_pos(req);
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
@@ -2503,24 +2503,24 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 
 	max_sector = transfer_size(ssize,
 				   min(max_sector, max_sector_2),
-				   current_req->nr_sectors);
+				   blk_rq_sectors(current_req));
 
 	if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
-	    buffer_max > fsector_t + current_req->nr_sectors)
+	    buffer_max > fsector_t + blk_rq_sectors(current_req))
 		current_count_sectors = min_t(int, buffer_max - fsector_t,
-					      current_req->nr_sectors);
+					      blk_rq_sectors(current_req));
 
 	remaining = current_count_sectors << 9;
 #ifdef FLOPPY_SANITY_CHECK
-	if ((remaining >> 9) > current_req->nr_sectors &&
+	if ((remaining >> 9) > blk_rq_sectors(current_req) &&
 	    CT(COMMAND) == FD_WRITE) {
 		DPRINT("in copy buffer\n");
 		printk("current_count_sectors=%ld\n", current_count_sectors);
 		printk("remaining=%d\n", remaining >> 9);
-		printk("current_req->nr_sectors=%ld\n",
-		       current_req->nr_sectors);
+		printk("current_req->nr_sectors=%u\n",
+		       blk_rq_sectors(current_req));
 		printk("current_req->current_nr_sectors=%u\n",
-		       current_req->current_nr_sectors);
+		       blk_rq_cur_sectors(current_req));
 		printk("max_sector=%d\n", max_sector);
 		printk("ssize=%d\n", ssize);
 	}
@@ -2530,7 +2530,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 
 	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
 
-	size = current_req->current_nr_sectors << 9;
+	size = blk_rq_cur_sectors(current_req) << 9;
 
 	rq_for_each_segment(bv, current_req, iter) {
 		if (!remaining)
@@ -2648,10 +2648,10 @@ static int make_raw_rw_request(void)
 
 	max_sector = _floppy->sect * _floppy->head;
 
-	TRACK = (int)current_req->sector / max_sector;
-	fsector_t = (int)current_req->sector % max_sector;
+	TRACK = (int)blk_rq_pos(current_req) / max_sector;
+	fsector_t = (int)blk_rq_pos(current_req) % max_sector;
 	if (_floppy->track && TRACK >= _floppy->track) {
-		if (current_req->current_nr_sectors & 1) {
+		if (blk_rq_cur_sectors(current_req) & 1) {
 			current_count_sectors = 1;
 			return 1;
 		} else
@@ -2669,7 +2669,7 @@ static int make_raw_rw_request(void)
 		if (fsector_t >= max_sector) {
 			current_count_sectors =
 			    min_t(int, _floppy->sect - fsector_t,
-				  current_req->nr_sectors);
+				  blk_rq_sectors(current_req));
 			return 1;
 		}
 		SIZECODE = 2;
@@ -2720,7 +2720,7 @@ static int make_raw_rw_request(void)
 
 	in_sector_offset = (fsector_t % _floppy->sect) % ssize;
 	aligned_sector_t = fsector_t - in_sector_offset;
-	max_size = current_req->nr_sectors;
+	max_size = blk_rq_sectors(current_req);
 	if ((raw_cmd->track == buffer_track) &&
 	    (current_drive == buffer_drive) &&
 	    (fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
@@ -2729,10 +2729,10 @@ static int make_raw_rw_request(void)
 			copy_buffer(1, max_sector, buffer_max);
 			return 1;
 		}
-	} else if (in_sector_offset || current_req->nr_sectors < ssize) {
+	} else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) {
 		if (CT(COMMAND) == FD_WRITE) {
-			if (fsector_t + current_req->nr_sectors > ssize &&
-			    fsector_t + current_req->nr_sectors < ssize + ssize)
+			if (fsector_t + blk_rq_sectors(current_req) > ssize &&
+			    fsector_t + blk_rq_sectors(current_req) < ssize + ssize)
 				max_size = ssize + ssize;
 			else
 				max_size = ssize;
@@ -2776,7 +2776,7 @@ static int make_raw_rw_request(void)
 		    (indirect * 2 > direct * 3 &&
 		     *errors < DP->max_errors.read_track && ((!probing
 		       || (DP->read_track & (1 << DRS->probed_format)))))) {
-			max_size = current_req->nr_sectors;
+			max_size = blk_rq_sectors(current_req);
 		} else {
 			raw_cmd->kernel_data = current_req->buffer;
 			raw_cmd->length = current_count_sectors << 9;
@@ -2801,7 +2801,7 @@ static int make_raw_rw_request(void)
 	    fsector_t > buffer_max ||
 	    fsector_t < buffer_min ||
 	    ((CT(COMMAND) == FD_READ ||
-	      (!in_sector_offset && current_req->nr_sectors >= ssize)) &&
+	      (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) &&
 	     max_sector > 2 * max_buffer_sectors + buffer_min &&
 	     max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)
 	    /* not enough space */
@@ -2879,8 +2879,8 @@ static int make_raw_rw_request(void)
 				printk("write\n");
 			return 0;
 		}
-	} else if (raw_cmd->length > current_req->nr_sectors << 9 ||
-		   current_count_sectors > current_req->nr_sectors) {
+	} else if (raw_cmd->length > blk_rq_sectors(current_req) << 9 ||
+		   current_count_sectors > blk_rq_sectors(current_req)) {
 		DPRINT("buffer overrun in direct transfer\n");
 		return 0;
 	} else if (raw_cmd->length < current_count_sectors << 9) {
@@ -2990,8 +2990,9 @@ static void do_fd_request(struct request_queue * q)
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
-		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
-		       current_req->cmd_type, current_req->cmd_flags);
+		printk("sect=%ld type=%x flags=%x\n",
+		       (long)blk_rq_pos(current_req), current_req->cmd_type,
+		       current_req->cmd_flags);
 		return;
 	}
 	if (test_bit(0, &fdc_busy)) {
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 75b9ca9..a3b3994 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -228,7 +228,7 @@ static void dump_status(const char *msg, unsigned int stat)
 			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
 				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
 			if (CURRENT)
-				printk(", sector=%ld", CURRENT->sector);
+				printk(", sector=%ld", blk_rq_pos(CURRENT));
 		}
 		printk("\n");
 	}
@@ -457,9 +457,9 @@ ok_to_read:
 	req = CURRENT;
 	insw(HD_DATA, req->buffer, 256);
 #ifdef DEBUG
-	printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n",
-		req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1,
-		req->buffer+512);
+	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
+	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
+	       blk_rq_sectors(req) - 1, req->buffer+512);
 #endif
 	if (__blk_end_request(req, 0, 512)) {
 		SET_HANDLER(&read_intr);
@@ -485,7 +485,7 @@ static void write_intr(void)
 			continue;
 		if (!OK_STATUS(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & DRQ_STAT))
+		if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
 			goto ok_to_write;
 	} while (--retries > 0);
 	dump_status("write_intr", i);
@@ -589,8 +589,8 @@ repeat:
 		return;
 	}
 	disk = req->rq_disk->private_data;
-	block = req->sector;
-	nsect = req->nr_sectors;
+	block = blk_rq_pos(req);
+	nsect = blk_rq_sectors(req);
 	if (block >= get_capacity(req->rq_disk) ||
 	    ((block+nsect) > get_capacity(req->rq_disk))) {
 		printk("%s: bad access: block=%d, count=%d\n",
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 71e56cc..826c349 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -220,7 +220,8 @@ static void mg_dump_status(const char *msg, unsigned int stat,
 			if (host->breq) {
 				req = elv_next_request(host->breq);
 				if (req)
-					printk(", sector=%u", (u32)req->sector);
+					printk(", sector=%u",
+					       (unsigned int)blk_rq_pos(req));
 			}
 
 		}
@@ -493,12 +494,12 @@ static void mg_read(struct request *req)
 	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) !=
-			MG_ERR_NONE)
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_RD, NULL) != MG_ERR_NONE)
 		mg_bad_rw_intr(host);
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       req->nr_sectors, req->sector, req->buffer);
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
 
 	do {
 		u16 *buff = (u16 *)req->buffer;
@@ -522,14 +523,14 @@ static void mg_write(struct request *req)
 	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) !=
-			MG_ERR_NONE) {
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_WR, NULL) != MG_ERR_NONE) {
 		mg_bad_rw_intr(host);
 		return;
 	}
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       req->nr_sectors, req->sector, req->buffer);
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
 
 	do {
 		u16 *buff = (u16 *)req->buffer;
@@ -579,7 +580,7 @@ ok_to_read:
 			      (i << 1));
 
 	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-			req->sector, req->nr_sectors - 1, req->buffer);
+	       blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer);
 
 	/* send read confirm */
 	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
@@ -609,7 +610,7 @@ static void mg_write_intr(struct mg_host *host)
 			break;
 		if (!MG_READY_OK(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & ATA_DRQ))
+		if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ))
 			goto ok_to_write;
 	} while (0);
 	mg_dump_status("mg_write_intr", i, host);
@@ -627,7 +628,7 @@ ok_to_write:
 			buff++;
 		}
 		MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-				req->sector, req->nr_sectors, req->buffer);
+		       blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
 		host->mg_do_intr = mg_write_intr;
 		mod_timer(&host->timer, jiffies + 3 * HZ);
 	}
@@ -749,9 +750,9 @@ static void mg_request(struct request_queue *q)
 
 		del_timer(&host->timer);
 
-		sect_num = req->sector;
+		sect_num = blk_rq_pos(req);
 		/* deal whole segments */
-		sect_cnt = req->nr_sectors;
+		sect_cnt = blk_rq_sectors(req);
 
 		/* sanity check */
 		if (sect_num >= get_capacity(req->rq_disk) ||
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a9ab8be..977a573 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -110,7 +110,7 @@ static void nbd_end_request(struct request *req)
 			req, error ? "failed" : "done");
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_end_request(req, error, req->nr_sectors << 9);
+	__blk_end_request(req, error, blk_rq_sectors(req) << 9);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -231,19 +231,19 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 {
 	int result, flags;
 	struct nbd_request request;
-	unsigned long size = req->nr_sectors << 9;
+	unsigned long size = blk_rq_sectors(req) << 9;
 
 	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(nbd_cmd(req));
-	request.from = cpu_to_be64((u64) req->sector << 9);
+	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 	request.len = htonl(size);
 	memcpy(request.handle, &req, sizeof(req));
 
-	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
+	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
 			lo->disk->disk_name, req,
 			nbdcmd_to_ascii(nbd_cmd(req)),
-			(unsigned long long)req->sector << 9,
-			req->nr_sectors << 9);
+			(unsigned long long)blk_rq_pos(req) << 9,
+			blk_rq_sectors(req) << 9);
 	result = sock_xmit(lo, 1, &request, sizeof(request),
 			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 9fd57c2..2d5dc0a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -728,8 +728,8 @@ static void do_pcd_request(struct request_queue * q)
 			if (cd != pcd_current)
 				pcd_bufblk = -1;
 			pcd_current = cd;
-			pcd_sector = pcd_req->sector;
-			pcd_count = pcd_req->current_nr_sectors;
+			pcd_sector = blk_rq_pos(pcd_req);
+			pcd_count = blk_rq_cur_sectors(pcd_req);
 			pcd_buf = pcd_req->buffer;
 			pcd_busy = 1;
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 0732df4..9ec5d4a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -444,11 +444,11 @@ static enum action do_pd_io_start(void)
 
 	pd_cmd = rq_data_dir(pd_req);
 	if (pd_cmd == READ || pd_cmd == WRITE) {
-		pd_block = pd_req->sector;
-		pd_count = pd_req->current_nr_sectors;
+		pd_block = blk_rq_pos(pd_req);
+		pd_count = blk_rq_cur_sectors(pd_req);
 		if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
 			return Fail;
-		pd_run = pd_req->nr_sectors;
+		pd_run = blk_rq_sectors(pd_req);
 		pd_buf = pd_req->buffer;
 		pd_retries = 0;
 		if (pd_cmd == READ)
@@ -479,7 +479,7 @@ static int pd_next_buf(void)
 		return 0;
 	spin_lock_irqsave(&pd_lock, saved_flags);
 	__blk_end_request_cur(pd_req, 0);
-	pd_count = pd_req->current_nr_sectors;
+	pd_count = blk_rq_cur_sectors(pd_req);
 	pd_buf = pd_req->buffer;
 	spin_unlock_irqrestore(&pd_lock, saved_flags);
 	return 0;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 3871e35..e88c889 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -768,9 +768,9 @@ repeat:
 		return;
 
 	pf_current = pf_req->rq_disk->private_data;
-	pf_block = pf_req->sector;
-	pf_run = pf_req->nr_sectors;
-	pf_count = pf_req->current_nr_sectors;
+	pf_block = blk_rq_pos(pf_req);
+	pf_run = blk_rq_sectors(pf_req);
+	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
 		pf_end_request(-EIO);
@@ -810,7 +810,7 @@ static int pf_next_buf(void)
 		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 		if (!pf_req)
 			return 1;
-		pf_count = pf_req->current_nr_sectors;
+		pf_count = blk_rq_cur_sectors(pf_req);
 		pf_buf = pf_req->buffer;
 	}
 	return 0;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index c238867..8d58308 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -134,13 +134,12 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	rq_for_each_segment(bv, req, iter)
 		n++;
 	dev_dbg(&dev->sbd.core,
-		"%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n",
-		__func__, __LINE__, op, n, req->nr_sectors,
-		blk_rq_sectors(req));
+		"%s:%u: %s req has %u bvecs for %u sectors\n",
+		__func__, __LINE__, op, n, blk_rq_sectors(req));
 #endif
 
-	start_sector = req->sector * priv->blocking_factor;
-	sectors = req->nr_sectors * priv->blocking_factor;
+	start_sector = blk_rq_pos(req) * priv->blocking_factor;
+	sectors = blk_rq_sectors(req) * priv->blocking_factor;
 	dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n",
 		__func__, __LINE__, op, sectors, start_sector);
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index f59887c..9f351bf 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -416,7 +416,7 @@ static int __send_request(struct request *req)
 		desc->slice = 0;
 	}
 	desc->status = ~0;
-	desc->offset = (req->sector << 9) / port->vdisk_block_size;
+	desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
 	desc->size = len;
 	desc->ncookies = err;
 
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 97ef426..fc6a1c3 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -531,7 +531,7 @@ static void redo_fd_request(struct request_queue *q)
 	while ((req = elv_next_request(q))) {
 
 		fs = req->rq_disk->private_data;
-		if (req->sector < 0 || req->sector >= fs->total_secs) {
+		if (blk_rq_pos(req) >= fs->total_secs) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
@@ -551,8 +551,8 @@ static void redo_fd_request(struct request_queue *q)
 			__blk_end_request_cur(req, -EIO);
 			break;
 		case READ:
-			if (floppy_read_sectors(fs, req->sector,
-						req->current_nr_sectors,
+			if (floppy_read_sectors(fs, blk_rq_pos(req),
+						blk_rq_cur_sectors(req),
 						req->buffer)) {
 				__blk_end_request_cur(req, -EIO);
 				continue;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 4248559..c1b9a4d 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -312,14 +312,14 @@ static void start_request(struct floppy_state *fs)
 	}
 	while (fs->state == idle && (req = elv_next_request(swim3_queue))) {
 #if 0
-		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
+		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
-		       (long)req->sector, req->nr_sectors, req->buffer);
-		printk("           errors=%d current_nr_sectors=%ld\n",
-		       req->errors, req->current_nr_sectors);
+		       (long)blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
+		printk("           errors=%d current_nr_sectors=%u\n",
+		       req->errors, blk_rq_cur_sectors(req));
 #endif
 
-		if (req->sector >= fs->total_secs) {
+		if (blk_rq_pos(req) >= fs->total_secs) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
@@ -337,13 +337,14 @@ static void start_request(struct floppy_state *fs)
 			}
 		}
 
-		/* Do not remove the cast. req->sector is now a sector_t and
-		 * can be 64 bits, but it will never go past 32 bits for this
-		 * driver anyway, so we can safely cast it down and not have
-		 * to do a 64/32 division
+		/* Do not remove the cast. blk_rq_pos(req) is now a
+		 * sector_t and can be 64 bits, but it will never go
+		 * past 32 bits for this driver anyway, so we can
+		 * safely cast it down and not have to do a 64/32
+		 * division
 		 */
-		fs->req_cyl = ((long)req->sector) / fs->secpercyl;
-		x = ((long)req->sector) % fs->secpercyl;
+		fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl;
+		x = ((long)blk_rq_pos(req)) % fs->secpercyl;
 		fs->head = x / fs->secpertrack;
 		fs->req_sector = x % fs->secpertrack + 1;
 		fd_req = req;
@@ -420,7 +421,7 @@ static inline void setup_transfer(struct floppy_state *fs)
 	struct dbdma_cmd *cp = fs->dma_cmd;
 	struct dbdma_regs __iomem *dr = fs->dma;
 
-	if (fd_req->current_nr_sectors <= 0) {
+	if (blk_rq_cur_sectors(fd_req) <= 0) {
 		printk(KERN_ERR "swim3: transfer 0 sectors?\n");
 		return;
 	}
@@ -428,8 +429,8 @@ static inline void setup_transfer(struct floppy_state *fs)
 		n = 1;
 	else {
 		n = fs->secpertrack - fs->req_sector + 1;
-		if (n > fd_req->current_nr_sectors)
-			n = fd_req->current_nr_sectors;
+		if (n > blk_rq_cur_sectors(fd_req))
+			n = blk_rq_cur_sectors(fd_req);
 	}
 	fs->scount = n;
 	swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0);
@@ -600,7 +601,8 @@ static void xfer_timeout(unsigned long data)
 	out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
 	out_8(&sw->select, RELAX);
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
-	       (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector);
+	       (rq_data_dir(fd_req)==WRITE? "writ": "read"),
+	       (long)blk_rq_pos(fd_req));
 	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
@@ -714,7 +716,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 			} else {
 				printk("swim3: error %sing block %ld (err=%x)\n",
 				       rq_data_dir(fd_req) == WRITE? "writ": "read",
-				       (long)fd_req->sector, err);
+				       (long)blk_rq_pos(fd_req), err);
 				__blk_end_request_cur(fd_req, -EIO);
 				fs->state = idle;
 			}
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 60e85bb..087c94c 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -903,10 +903,10 @@ queue_one_request:
 	msg->sg_count	= n_elem;
 	msg->sg_type	= SGT_32BIT;
 	msg->handle	= cpu_to_le32(TAG_ENCODE(crq->tag));
-	msg->lba	= cpu_to_le32(rq->sector & 0xffffffff);
-	tmp		= (rq->sector >> 16) >> 16;
+	msg->lba	= cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
+	tmp		= (blk_rq_pos(rq) >> 16) >> 16;
 	msg->lba_high	= cpu_to_le16( (u16) tmp );
-	msg->lba_count	= cpu_to_le16(rq->nr_sectors);
+	msg->lba_count	= cpu_to_le16(blk_rq_sectors(rq));
 
 	msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
 	for (i = 0; i < n_elem; i++) {
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 8c2cc71..dc3b899 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -726,8 +726,8 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	 * The call to blk_queue_hardsect_size() guarantees that request
 	 * is aligned, but it is given in terms of 512 byte units, always.
 	 */
-	block = rq->sector >> lun->capacity.bshift;
-	nblks = rq->nr_sectors >> lun->capacity.bshift;
+	block = blk_rq_pos(rq) >> lun->capacity.bshift;
+	nblks = blk_rq_sectors(rq) >> lun->capacity.bshift;
 
 	cmd->cdb[0] = (cmd->dir == UB_DIR_READ)? READ_10: WRITE_10;
 	/* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */
@@ -739,7 +739,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	cmd->cdb[8] = nblks;
 	cmd->cdb_len = 10;
 
-	cmd->len = rq->nr_sectors * 512;
+	cmd->len = blk_rq_sectors(rq) * 512;
 }
 
 static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index e821eed..2086cb1 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -252,7 +252,7 @@ static int send_request(struct request *req)
 	struct viodasd_device *d;
 	unsigned long flags;
 
-	start = (u64)req->sector << 9;
+	start = (u64)blk_rq_pos(req) << 9;
 
 	if (rq_data_dir(req) == READ) {
 		direction = DMA_FROM_DEVICE;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 50745e6..1980ab4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -85,7 +85,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 	vbr->req = req;
 	if (blk_fs_request(vbr->req)) {
 		vbr->out_hdr.type = 0;
-		vbr->out_hdr.sector = vbr->req->sector;
+		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else if (blk_pc_request(vbr->req)) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 14be4c1..4ef8801 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -306,8 +306,8 @@ static void do_xd_request (struct request_queue * q)
 		return;
 
 	while ((req = elv_next_request(q)) != NULL) {
-		unsigned block = req->sector;
-		unsigned count = req->nr_sectors;
+		unsigned block = blk_rq_pos(req);
+		unsigned count = blk_rq_sectors(req);
 		XD_INFO *disk = req->rq_disk->private_data;
 		int res = 0;
 		int retry;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index b456447..91fc565 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -231,7 +231,7 @@ static int blkif_queue_request(struct request *req)
 	info->shadow[id].request = (unsigned long)req;
 
 	ring_req->id = id;
-	ring_req->sector_number = (blkif_sector_t)req->sector;
+	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
 	ring_req->handle = info->handle;
 
 	ring_req->operation = rq_data_dir(req) ?
@@ -310,11 +310,10 @@ static void do_blkif_request(struct request_queue *rq)
 			goto wait;
 
 		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
-			 "(%u/%li) buffer:%p [%s]\n",
-			 req, req->cmd, (unsigned long)req->sector,
-			 req->current_nr_sectors,
-			 req->nr_sectors, req->buffer,
-			 rq_data_dir(req) ? "write" : "read");
+			 "(%u/%u) buffer:%p [%s]\n",
+			 req, req->cmd, (unsigned long)blk_rq_pos(req),
+			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
+			 req->buffer, rq_data_dir(req) ? "write" : "read");
 
 
 		blkdev_dequeue_request(req);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 5722931..97c99b4 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -646,13 +646,14 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
 			"request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
-			(unsigned long long) req->sector, blk_rq_sectors(req),
-			req->current_nr_sectors, rq_data_dir(req));
+			(unsigned long long)blk_rq_pos(req),
+			blk_rq_sectors(req), blk_rq_cur_sectors(req),
+			rq_data_dir(req));
 
 		ace->req = req;
 		ace->data_ptr = req->buffer;
-		ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR;
-		ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF);
+		ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR;
+		ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF);
 
 		count = blk_rq_sectors(req);
 		if (rq_data_dir(req)) {
@@ -688,7 +689,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			dev_dbg(ace->dev,
 				"CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n",
 				ace->fsm_task, ace->fsm_iter_num,
-				ace->req->current_nr_sectors * 16,
+				blk_rq_cur_sectors(ace->req) * 16,
 				ace->data_count, ace->in_irq);
 			ace_fsm_yield(ace);	/* need to poll CFBSY bit */
 			break;
@@ -697,7 +698,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			dev_dbg(ace->dev,
 				"DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n",
 				ace->fsm_task, ace->fsm_iter_num,
-				ace->req->current_nr_sectors * 16,
+				blk_rq_cur_sectors(ace->req) * 16,
 				ace->data_count, ace->in_irq);
 			ace_fsm_yieldirq(ace);
 			break;
@@ -721,10 +722,10 @@ static void ace_fsm_dostate(struct ace_device *ace)
 					blk_rq_cur_bytes(ace->req))) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
-			 *      ace->req->current_nr_sectors);
+			 *      blk_rq_cur_sectors(ace->req));
 			 */
 			ace->data_ptr = ace->req->buffer;
-			ace->data_count = ace->req->current_nr_sectors * 16;
+			ace->data_count = blk_rq_cur_sectors(ace->req) * 16;
 			ace_fsm_yieldirq(ace);
 			break;
 		}
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index b66ad58..d4e6b71 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -71,12 +71,12 @@ static void do_z2_request(struct request_queue *q)
 {
 	struct request *req;
 	while ((req = elv_next_request(q)) != NULL) {
-		unsigned long start = req->sector << 9;
-		unsigned long len  = req->current_nr_sectors << 9;
+		unsigned long start = blk_rq_pos(req) << 9;
+		unsigned long len  = blk_rq_cur_sectors(req) << 9;
 
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
-				req->sector, req->current_nr_sectors);
+				blk_rq_pos(req), blk_rq_cur_sectors(req));
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index cab2b1f..488423c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -584,8 +584,8 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 	list_for_each_safe(elem, next, &gdrom_deferred) {
 		req = list_entry(elem, struct request, queuelist);
 		spin_unlock(&gdrom_lock);
-		block = req->sector/GD_TO_BLK + GD_SESSION_OFFSET;
-		block_cnt = req->nr_sectors/GD_TO_BLK;
+		block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
+		block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
 		ctrl_outl(PHYSADDR(req->buffer), GDROM_DMA_STARTADDR_REG);
 		ctrl_outl(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
 		ctrl_outl(1, GDROM_DMA_DIRECTION_REG);
@@ -661,7 +661,7 @@ static void gdrom_request(struct request_queue *rq)
 			printk(" write request ignored\n");
 			__blk_end_request_cur(req, -EIO);
 		}
-		if (req->nr_sectors)
+		if (blk_rq_sectors(req))
 			gdrom_request_handler_dma(req);
 		else
 			__blk_end_request_cur(req, -EIO);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index cc3efa0..6e190a9 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -282,7 +282,7 @@ static int send_request(struct request *req)
 			viopath_targetinst(viopath_hostLp),
 			(u64)req, VIOVERSION << 16,
 			((u64)DEVICE_NR(diskinfo) << 48) | dmaaddr,
-			(u64)req->sector * 512, len, 0);
+			(u64)blk_rq_pos(req) * 512, len, 0);
 	if (hvrc != HvLpEvent_Rc_Good) {
 		printk(VIOCD_KERN_WARNING "hv error on op %d\n", (int)hvrc);
 		return -1;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index a416346..9e600d2 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -677,10 +677,10 @@ try_again:
 			continue;
 		}
 
-		t_sec = msb->block_req->sector << 9;
+		t_sec = blk_rq_pos(msb->block_req) << 9;
 		sector_div(t_sec, msb->page_size);
 
-		count = msb->block_req->nr_sectors << 9;
+		count = blk_rq_sectors(msb->block_req) << 9;
 		count /= msb->page_size;
 
 		param.system = msb->system;
@@ -745,7 +745,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 					t_len *= msb->page_size;
 			}
 		} else
-			t_len = msb->block_req->nr_sectors << 9;
+			t_len = blk_rq_sectors(msb->block_req) << 9;
 
 		dev_dbg(&card->dev, "transferred %x (%d)\n", t_len, error);
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 56e60f0..510eb47 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -761,7 +761,7 @@ static int i2o_block_transfer(struct request *req)
 			break;
 
 		case CACHE_SMARTFETCH:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x201F0008;
 			else
 				ctl_flags = 0x001F0000;
@@ -781,13 +781,13 @@ static int i2o_block_transfer(struct request *req)
 			ctl_flags = 0x001F0010;
 			break;
 		case CACHE_SMARTBACK:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x001F0004;
 			else
 				ctl_flags = 0x001F0010;
 			break;
 		case CACHE_SMARTTHROUGH:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x001F0004;
 			else
 				ctl_flags = 0x001F0010;
@@ -827,22 +827,24 @@ static int i2o_block_transfer(struct request *req)
 
 		*mptr++ = cpu_to_le32(scsi_flags);
 
-		*((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec);
-		*((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec);
+		*((u32 *) & cmd[2]) = cpu_to_be32(blk_rq_pos(req) * hwsec);
+		*((u16 *) & cmd[7]) = cpu_to_be16(blk_rq_sectors(req) * hwsec);
 
 		memcpy(mptr, cmd, 10);
 		mptr += 4;
-		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
+		*mptr++ =
+		    cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
 	} else
 #endif
 	{
 		msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
 		*mptr++ = cpu_to_le32(ctl_flags);
-		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
 		*mptr++ =
-		    cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT));
+		    cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
+		*mptr++ =
+		    cpu_to_le32((u32) (blk_rq_pos(req) << KERNEL_SECTOR_SHIFT));
 		*mptr++ =
-		    cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT));
+		    cpu_to_le32(blk_rq_pos(req) >> (32 - KERNEL_SECTOR_SHIFT));
 	}
 
 	if (!i2o_block_sglist_alloc(c, ireq, &mptr)) {
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index fe8041e..949e997 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -243,7 +243,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.mrq.cmd = &brq.cmd;
 		brq.mrq.data = &brq.data;
 
-		brq.cmd.arg = req->sector;
+		brq.cmd.arg = blk_rq_pos(req);
 		if (!mmc_card_blockaddr(card))
 			brq.cmd.arg <<= 9;
 		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
@@ -251,7 +251,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.stop.opcode = MMC_STOP_TRANSMISSION;
 		brq.stop.arg = 0;
 		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-		brq.data.blocks = req->nr_sectors;
+		brq.data.blocks = blk_rq_sectors(req);
 
 		/*
 		 * After a read error, we redo the request one sector at a time
@@ -293,7 +293,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		 * Adjust the sg list so it is the same size as the
 		 * request.
 		 */
-		if (brq.data.blocks != req->nr_sectors) {
+		if (brq.data.blocks != blk_rq_sectors(req)) {
 			int i, data_size = brq.data.blocks << 9;
 			struct scatterlist *sg;
 
@@ -344,8 +344,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			printk(KERN_ERR "%s: error %d transferring data,"
 			       " sector %u, nr %u, card status %#x\n",
 			       req->rq_disk->disk_name, brq.data.error,
-			       (unsigned)req->sector,
-			       (unsigned)req->nr_sectors, status);
+			       (unsigned)blk_rq_pos(req),
+			       (unsigned)blk_rq_sectors(req), status);
 		}
 
 		if (brq.stop.error) {
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 76c4c8d..4ea2e67 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -47,8 +47,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	unsigned long block, nsect;
 	char *buf;
 
-	block = req->sector << 9 >> tr->blkshift;
-	nsect = req->current_nr_sectors << 9 >> tr->blkshift;
+	block = blk_rq_pos(req) << 9 >> tr->blkshift;
+	nsect = blk_rq_cur_sectors(req) << 9 >> tr->blkshift;
 
 	buf = req->buffer;
 
@@ -59,7 +59,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	if (!blk_fs_request(req))
 		return -EIO;
 
-	if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk))
+	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
+	    get_capacity(req->rq_disk))
 		return -EIO;
 
 	switch(rq_data_dir(req)) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index fabec95..7df03c7a 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -603,7 +603,7 @@ static void dasd_profile_end(struct dasd_block *block,
 	if (dasd_profile_level != DASD_PROFILE_ON)
 		return;
 
-	sectors = req->nr_sectors;
+	sectors = blk_rq_sectors(req);
 	if (!cqr->buildclk || !cqr->startclk ||
 	    !cqr->stopclk || !cqr->endclk ||
 	    !sectors)
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index b9a7f77..2efaddf 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -505,8 +505,9 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
 		return ERR_PTR(-EINVAL);
 	blksize = block->bp_block;
 	/* Calculate record id of first and last block. */
-	first_rec = req->sector >> block->s2b_shift;
-	last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+	first_rec = blk_rq_pos(req) >> block->s2b_shift;
+	last_rec =
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	rq_for_each_segment(bv, req, iter) {
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index cb52da0..a41c940 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2354,10 +2354,10 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev,
 	blksize = block->bp_block;
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
 	/* Calculate record id of first and last block. */
-	first_rec = first_trk = req->sector >> block->s2b_shift;
+	first_rec = first_trk = blk_rq_pos(req) >> block->s2b_shift;
 	first_offs = sector_div(first_trk, blk_per_trk);
 	last_rec = last_trk =
-		(req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	last_offs = sector_div(last_trk, blk_per_trk);
 	cdlspecial = (private->uses_cdl && first_rec < 2*blk_per_trk);
 
@@ -2420,7 +2420,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
 	private = (struct dasd_eckd_private *) cqr->block->base->private;
 	blksize = cqr->block->bp_block;
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
-	recid = req->sector >> cqr->block->s2b_shift;
+	recid = blk_rq_pos(req) >> cqr->block->s2b_shift;
 	ccw = cqr->cpaddr;
 	/* Skip over define extent & locate record. */
 	ccw++;
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index a3eb6fd..8912358 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -270,8 +270,9 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
 		return ERR_PTR(-EINVAL);
 	blksize = block->bp_block;
 	/* Calculate record id of first and last block. */
-	first_rec = req->sector >> block->s2b_shift;
-	last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+	first_rec = blk_rq_pos(req) >> block->s2b_shift;
+	last_rec =
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	cidaw = 0;
@@ -309,7 +310,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
 	ccw = cqr->cpaddr;
 	/* First ccw is define extent. */
 	define_extent(ccw++, cqr->data, rq_data_dir(req),
-		      block->bp_block, req->sector, req->nr_sectors);
+		      block->bp_block, blk_rq_pos(req), blk_rq_sectors(req));
 	/* Build locate_record + read/write ccws. */
 	idaws = (unsigned long *) (cqr->data + sizeof(struct DE_fba_data));
 	LO_data = (struct LO_fba_data *) (idaws + cidaw);
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 5f8e8ef..2d00a38 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -1134,7 +1134,7 @@ tape_34xx_bread(struct tape_device *device, struct request *req)
 	/* Setup ccws. */
 	request->op = TO_BLOCK;
 	start_block = (struct tape_34xx_block_id *) request->cpdata;
-	start_block->block = req->sector >> TAPEBLOCK_HSEC_S2B;
+	start_block->block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block->block);
 
 	ccw = request->cpaddr;
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index 823b05b..c453b2f 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -633,7 +633,7 @@ tape_3590_bread(struct tape_device *device, struct request *req)
 	struct req_iterator iter;
 
 	DBF_EVENT(6, "xBREDid:");
-	start_block = req->sector >> TAPEBLOCK_HSEC_S2B;
+	start_block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block);
 
 	rq_for_each_segment(bv, req, iter)
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 86596d3..5d035e4 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -87,7 +87,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 	if (ccw_req->rc == 0)
 		/* Update position. */
 		device->blk_data.block_position =
-			(req->sector + req->nr_sectors) >> TAPEBLOCK_HSEC_S2B;
+		  (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B;
 	else
 		/* We lost the position information due to an error. */
 		device->blk_data.block_position = -1;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 0961788..2132c90 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -188,8 +188,8 @@ static void jsfd_do_request(struct request_queue *q)
 
 	while ((req = elv_next_request(q)) != NULL) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
-		unsigned long offset = req->sector << 9;
-		size_t len = req->current_nr_sectors << 9;
+		unsigned long offset = blk_rq_pos(req) << 9;
+		size_t len = blk_rq_cur_sectors(req) << 9;
 
 		if ((offset + len) > jdp->dsize) {
 			__blk_end_request_cur(req, -EIO);
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index be5099d..c7076ce 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1825,7 +1825,7 @@ static int eata2x_queuecommand(struct scsi_cmnd *SCpnt,
 	if (linked_comm && SCpnt->device->queue_depth > 2
 	    && TLDEV(SCpnt->device->type)) {
 		ha->cp_stat[i] = READY;
-		flush_dev(SCpnt->device, SCpnt->request->sector, ha, 0);
+		flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 0);
 		return 0;
 	}
 
@@ -2144,13 +2144,13 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 		if (!cpp->din)
 			input_only = 0;
 
-		if (SCpnt->request->sector < minsec)
-			minsec = SCpnt->request->sector;
-		if (SCpnt->request->sector > maxsec)
-			maxsec = SCpnt->request->sector;
+		if (blk_rq_pos(SCpnt->request) < minsec)
+			minsec = blk_rq_pos(SCpnt->request);
+		if (blk_rq_pos(SCpnt->request) > maxsec)
+			maxsec = blk_rq_pos(SCpnt->request);
 
-		sl[n] = SCpnt->request->sector;
-		ioseek += SCpnt->request->nr_sectors;
+		sl[n] = blk_rq_pos(SCpnt->request);
+		ioseek += blk_rq_sectors(SCpnt->request);
 
 		if (!n)
 			continue;
@@ -2190,7 +2190,7 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 			k = il[n];
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
-			ll[n] = SCpnt->request->nr_sectors;
+			ll[n] = blk_rq_sectors(SCpnt->request);
 			pl[n] = SCpnt->serial_number;
 
 			if (!n)
@@ -2236,12 +2236,12 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
 			scmd_printk(KERN_INFO, SCpnt,
-			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %ld"
+			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %u"
 			     " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
 			     (ihdlr ? "ihdlr" : "qcomm"),
 			     SCpnt->serial_number, k, flushcount,
-			     n_ready, SCpnt->request->sector,
-			     SCpnt->request->nr_sectors, cursec, YESNO(s),
+			     n_ready, blk_rq_pos(SCpnt->request),
+			     blk_rq_sectors(SCpnt->request), cursec, YESNO(s),
 			     YESNO(r), YESNO(rev), YESNO(input_only),
 			     YESNO(overlap), cpp->din);
 		}
@@ -2408,7 +2408,7 @@ static irqreturn_t ihdlr(struct Scsi_Host *shost)
 
 	if (linked_comm && SCpnt->device->queue_depth > 2
 	    && TLDEV(SCpnt->device->type))
-		flush_dev(SCpnt->device, SCpnt->request->sector, ha, 1);
+		flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 1);
 
 	tstatus = status_byte(spp->target_status);
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 36fd2e7..a8fab39 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1313,10 +1313,10 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd,
 	uint32_t bgstat = bgf->bgstat;
 	uint64_t failing_sector = 0;
 
-	printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%lx "
+	printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%x "
 			"bgstat=0x%x bghm=0x%x\n",
 			cmd->cmnd[0], (unsigned long long)scsi_get_lba(cmd),
-			cmd->request->nr_sectors, bgstat, bghm);
+			blk_rq_sectors(cmd->request), bgstat, bghm);
 
 	spin_lock(&_dump_buf_lock);
 	if (!_dump_buf_done) {
@@ -2375,15 +2375,15 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 		if (cmnd->cmnd[0] == READ_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					"9035 BLKGRD: READ @ sector %llu, "
-					 "count %lu\n",
-					 (unsigned long long)scsi_get_lba(cmnd),
-					cmnd->request->nr_sectors);
+					"count %u\n",
+					(unsigned long long)scsi_get_lba(cmnd),
+					blk_rq_sectors(cmnd->request));
 		else if (cmnd->cmnd[0] == WRITE_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					"9036 BLKGRD: WRITE @ sector %llu, "
-					"count %lu cmd=%p\n",
+					"count %u cmd=%p\n",
 					(unsigned long long)scsi_get_lba(cmnd),
-					cmnd->request->nr_sectors,
+					blk_rq_sectors(cmnd->request),
 					cmnd);
 
 		err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
@@ -2403,15 +2403,15 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 		if (cmnd->cmnd[0] == READ_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9040 dbg: READ @ sector %llu, "
-					 "count %lu\n",
+					 "count %u\n",
 					 (unsigned long long)scsi_get_lba(cmnd),
-					 cmnd->request->nr_sectors);
+					 blk_rq_sectors(cmnd->request));
 		else if (cmnd->cmnd[0] == WRITE_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9041 dbg: WRITE @ sector %llu, "
-					 "count %lu cmd=%p\n",
+					 "count %u cmd=%p\n",
 					 (unsigned long long)scsi_get_lba(cmnd),
-					 cmnd->request->nr_sectors, cmnd);
+					 blk_rq_sectors(cmnd->request), cmnd);
 		else
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9042 dbg: parser not implemented\n");
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9ff0ca9..39b3acf 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -787,9 +787,9 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 * Next deal with any sectors which we were able to correctly
 	 * handle.
 	 */
-	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
+	SCSI_LOG_HLCOMPLETE(1, printk("%u sectors total, "
 				      "%d bytes done.\n",
-				      req->nr_sectors, good_bytes));
+				      blk_rq_sectors(req), good_bytes));
 
 	/*
 	 * Recovered errors need reporting, but they're always treated
@@ -968,7 +968,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
 	if (blk_pc_request(req))
 		sdb->length = req->data_len;
 	else
-		sdb->length = req->nr_sectors << 9;
+		sdb->length = blk_rq_sectors(req) << 9;
 	return BLKPREP_OK;
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3fcb64b..70c4dd9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -383,9 +383,9 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	struct scsi_device *sdp = q->queuedata;
 	struct gendisk *disk = rq->rq_disk;
 	struct scsi_disk *sdkp;
-	sector_t block = rq->sector;
+	sector_t block = blk_rq_pos(rq);
 	sector_t threshold;
-	unsigned int this_count = rq->nr_sectors;
+	unsigned int this_count = blk_rq_sectors(rq);
 	int ret, host_dif;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -412,10 +412,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 					this_count));
 
 	if (!sdp || !scsi_device_online(sdp) ||
- 	    block + rq->nr_sectors > get_capacity(disk)) {
+	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
-						"Finishing %ld sectors\n",
-						rq->nr_sectors));
+						"Finishing %u sectors\n",
+						blk_rq_sectors(rq)));
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
 						"Retry with 0x%p\n", SCpnt));
 		goto out;
@@ -462,7 +462,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	 * for this.
 	 */
 	if (sdp->sector_size == 1024) {
-		if ((block & 1) || (rq->nr_sectors & 1)) {
+		if ((block & 1) || (blk_rq_sectors(rq) & 1)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -472,7 +472,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 		}
 	}
 	if (sdp->sector_size == 2048) {
-		if ((block & 3) || (rq->nr_sectors & 3)) {
+		if ((block & 3) || (blk_rq_sectors(rq) & 3)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -482,7 +482,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 		}
 	}
 	if (sdp->sector_size == 4096) {
-		if ((block & 7) || (rq->nr_sectors & 7)) {
+		if ((block & 7) || (blk_rq_sectors(rq) & 7)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -511,10 +511,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	}
 
 	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
-					"%s %d/%ld 512 byte blocks.\n",
+					"%s %d/%u 512 byte blocks.\n",
 					(rq_data_dir(rq) == WRITE) ?
 					"writing" : "reading", this_count,
-					rq->nr_sectors));
+					blk_rq_sectors(rq)));
 
 	/* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
 	host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
@@ -970,8 +970,8 @@ static struct block_device_operations sd_fops = {
 
 static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
 {
-	u64 start_lba = scmd->request->sector;
-	u64 end_lba = scmd->request->sector + (scsi_bufflen(scmd) / 512);
+	u64 start_lba = blk_rq_pos(scmd->request);
+	u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
 	u64 bad_lba;
 	int info_valid;
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 184dff4..82f14a9 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -507,7 +507,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	sector_sz = scmd->device->sector_size;
 	sectors = good_bytes / sector_sz;
 
-	phys = scmd->request->sector & 0xffffffff;
+	phys = blk_rq_pos(scmd->request) & 0xffffffff;
 	if (sector_sz == 4096)
 		phys >>= 3;
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0e1a0f2..fddba53 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -292,7 +292,8 @@ static int sr_done(struct scsi_cmnd *SCpnt)
 			if (cd->device->sector_size == 2048)
 				error_sector <<= 2;
 			error_sector &= ~(block_sectors - 1);
-			good_bytes = (error_sector - SCpnt->request->sector) << 9;
+			good_bytes = (error_sector -
+				      blk_rq_pos(SCpnt->request)) << 9;
 			if (good_bytes < 0 || good_bytes >= this_count)
 				good_bytes = 0;
 			/*
@@ -349,8 +350,8 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 				cd->disk->disk_name, block));
 
 	if (!cd->device || !scsi_device_online(cd->device)) {
-		SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n",
-					rq->nr_sectors));
+		SCSI_LOG_HLQUEUE(2, printk("Finishing %u sectors\n",
+					   blk_rq_sectors(rq)));
 		SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
 		goto out;
 	}
@@ -413,7 +414,7 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 	/*
 	 * request doesn't start on hw block boundary, add scatter pads
 	 */
-	if (((unsigned int)rq->sector % (s_size >> 9)) ||
+	if (((unsigned int)blk_rq_pos(rq) % (s_size >> 9)) ||
 	    (scsi_bufflen(SCpnt) % s_size)) {
 		scmd_printk(KERN_NOTICE, SCpnt, "unaligned transfer\n");
 		goto out;
@@ -422,14 +423,14 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 	this_count = (scsi_bufflen(SCpnt) >> 9) / (s_size >> 9);
 
 
-	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n",
+	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%u 512 byte blocks.\n",
 				cd->cdi.name,
 				(rq_data_dir(rq) == WRITE) ?
 					"writing" : "reading",
-				this_count, rq->nr_sectors));
+				this_count, blk_rq_sectors(rq)));
 
 	SCpnt->cmnd[1] = 0;
-	block = (unsigned int)rq->sector / (s_size >> 9);
+	block = (unsigned int)blk_rq_pos(rq) / (s_size >> 9);
 
 	if (this_count > 0xffff) {
 		this_count = 0xffff;
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index 601e951..54023d41 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -1306,7 +1306,7 @@ static int u14_34f_queuecommand(struct scsi_cmnd *SCpnt, void (*done)(struct scs
    if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type)) {
       HD(j)->cp_stat[i] = READY;
-      flush_dev(SCpnt->device, SCpnt->request->sector, j, FALSE);
+      flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, FALSE);
       return 0;
       }
 
@@ -1610,11 +1610,13 @@ static int reorder(unsigned int j, unsigned long cursec,
 
       if (!(cpp->xdir == DTD_IN)) input_only = FALSE;
 
-      if (SCpnt->request->sector < minsec) minsec = SCpnt->request->sector;
-      if (SCpnt->request->sector > maxsec) maxsec = SCpnt->request->sector;
+      if (blk_rq_pos(SCpnt->request) < minsec)
+	 minsec = blk_rq_pos(SCpnt->request);
+      if (blk_rq_pos(SCpnt->request) > maxsec)
+	 maxsec = blk_rq_pos(SCpnt->request);
 
-      sl[n] = SCpnt->request->sector;
-      ioseek += SCpnt->request->nr_sectors;
+      sl[n] = blk_rq_pos(SCpnt->request);
+      ioseek += blk_rq_sectors(SCpnt->request);
 
       if (!n) continue;
 
@@ -1642,7 +1644,7 @@ static int reorder(unsigned int j, unsigned long cursec,
 
    if (!input_only) for (n = 0; n < n_ready; n++) {
       k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-      ll[n] = SCpnt->request->nr_sectors; pl[n] = SCpnt->serial_number;
+      ll[n] = blk_rq_sectors(SCpnt->request); pl[n] = SCpnt->serial_number;
 
       if (!n) continue;
 
@@ -1666,12 +1668,12 @@ static int reorder(unsigned int j, unsigned long cursec,
    if (link_statistics && (overlap || !(flushcount % link_statistics)))
       for (n = 0; n < n_ready; n++) {
          k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %ld"\
+         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %u"\
                 " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
                 (ihdlr ? "ihdlr" : "qcomm"), SCpnt->channel, SCpnt->target,
                 SCpnt->lun, SCpnt->serial_number, k, flushcount, n_ready,
-                SCpnt->request->sector, SCpnt->request->nr_sectors, cursec,
-                YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
+                blk_rq_pos(SCpnt->request), blk_rq_sectors(SCpnt->request),
+		cursec, YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
                 YESNO(overlap), cpp->xdir);
          }
 #endif
@@ -1799,7 +1801,7 @@ static irqreturn_t ihdlr(unsigned int j)
 
    if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type))
-      flush_dev(SCpnt->device, SCpnt->request->sector, j, TRUE);
+      flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, TRUE);
 
    tstatus = status_byte(spp->target_status);
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 43b50d3..3878d1d 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -270,7 +270,7 @@ static inline unsigned char scsi_get_prot_type(struct scsi_cmnd *scmd)
 
 static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
 {
-	return scmd->request->sector;
+	return blk_rq_pos(scmd->request);
 }
 
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
-- 
cgit v0.10.2


From 9780e2dd8254351f6cbe11304849126b51dbd561 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:40 +0900
Subject: ide: convert to rq pos and nr_sectors accessors

ide doesn't manipulate request fields anymore and thus all hard and
their soft equivalents are always equal.  Convert all references to
accessors.

[ Impact: use pos and nr_sectors accessors ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 182320d..eb4f3dc 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -775,8 +775,8 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	}
 
 	/* fs requests *must* be hardware frame aligned */
-	if ((rq->nr_sectors & (sectors_per_frame - 1)) ||
-	    (rq->sector & (sectors_per_frame - 1)))
+	if ((blk_rq_sectors(rq) & (sectors_per_frame - 1)) ||
+	    (blk_rq_pos(rq) & (sectors_per_frame - 1)))
 		return ide_stopped;
 
 	/* use DMA, if possible */
@@ -868,8 +868,8 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 	cmd.rq = rq;
 
 	if (blk_fs_request(rq) || rq->data_len) {
-		ide_init_sg_cmd(&cmd, blk_fs_request(rq) ? (rq->nr_sectors << 9)
-							 : rq->data_len);
+		ide_init_sg_cmd(&cmd, blk_fs_request(rq) ?
+				(blk_rq_sectors(rq) << 9) : rq->data_len);
 		ide_map_sg(drive, &cmd);
 	}
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index c243880..ad18e14 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -82,7 +82,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
 	ide_hwif_t *hwif	= drive->hwif;
-	u16 nsectors		= (u16)rq->nr_sectors;
+	u16 nsectors		= (u16)blk_rq_sectors(rq);
 	u8 lba48		= !!(drive->dev_flags & IDE_DFLAG_LBA48);
 	u8 dma			= !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 	struct ide_cmd		cmd;
@@ -90,7 +90,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 	ide_startstop_t		rc;
 
 	if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) {
-		if (block + rq->nr_sectors > 1ULL << 28)
+		if (block + blk_rq_sectors(rq) > 1ULL << 28)
 			dma = 0;
 		else
 			lba48 = 0;
@@ -195,9 +195,9 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 
 	ledtrig_ide_activity();
 
-	pr_debug("%s: %sing: block=%llu, sectors=%lu, buffer=0x%08lx\n",
+	pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n",
 		 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
-		 (unsigned long long)block, rq->nr_sectors,
+		 (unsigned long long)block, blk_rq_sectors(rq),
 		 (unsigned long)rq->buffer);
 
 	if (hwif->rw_disk)
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index d9123ec..001f68f 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -103,7 +103,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
 				ide_finish_cmd(drive, cmd, stat);
 			else
 				ide_complete_rq(drive, 0,
-						cmd->rq->nr_sectors << 9);
+						blk_rq_sectors(cmd->rq) << 9);
 			return ide_stopped;
 		}
 		printk(KERN_ERR "%s: %s: bad DMA status (0x%02x)\n",
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 537b7c5..1c460bd 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -194,7 +194,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
 	int block = sector / floppy->bs_factor;
-	int blocks = rq->nr_sectors / floppy->bs_factor;
+	int blocks = blk_rq_sectors(rq) / floppy->bs_factor;
 	int cmd = rq_data_dir(rq);
 
 	ide_debug_log(IDE_DBG_FUNC, "block: %d, blocks: %d", block, blocks);
@@ -259,8 +259,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 			goto out_end;
 	}
 	if (blk_fs_request(rq)) {
-		if (((long)rq->sector % floppy->bs_factor) ||
-		    (rq->nr_sectors % floppy->bs_factor)) {
+		if (((long)blk_rq_pos(rq) % floppy->bs_factor) ||
+		    (blk_rq_sectors(rq) % floppy->bs_factor)) {
 			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
 				drive->name);
 			goto out_end;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index df23bcb..59799ca 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -279,7 +279,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 
 	if (cmd) {
 		if (cmd->protocol == ATA_PROT_PIO) {
-			ide_init_sg_cmd(cmd, rq->nr_sectors << 9);
+			ide_init_sg_cmd(cmd, blk_rq_sectors(rq) << 9);
 			ide_map_sg(drive, cmd);
 		}
 
@@ -387,7 +387,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 
 		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 
-		return drv->do_request(drive, rq, rq->sector);
+		return drv->do_request(drive, rq, blk_rq_pos(rq));
 	}
 	return do_special(drive);
 kill_rq:
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 56ff8c4..05b7fbc 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -114,7 +114,7 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
 
 		if (rq)
 			printk(KERN_CONT ", sector=%llu",
-			       (unsigned long long)rq->sector);
+			       (unsigned long long)blk_rq_pos(rq));
 	}
 	printk(KERN_CONT "\n");
 }
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 65c5b70..c6cf3a9 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -586,7 +586,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 				   struct ide_atapi_pc *pc, struct request *rq,
 				   u8 opcode)
 {
-	unsigned int length = rq->nr_sectors;
+	unsigned int length = blk_rq_sectors(rq);
 
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
@@ -617,8 +617,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	struct ide_cmd cmd;
 	u8 stat;
 
-	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n"
-		  (unsigned long long)rq->sector, rq->nr_sectors);
+	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %u\n"
+		  (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq));
 
 	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index f400eb4..a0c3e1b 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -385,7 +385,7 @@ out_end:
 	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 		ide_finish_cmd(drive, cmd, stat);
 	else
-		ide_complete_rq(drive, 0, cmd->rq->nr_sectors << 9);
+		ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
 	return ide_stopped;
 out_err:
 	ide_error_cmd(drive, cmd);
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 248a54b..c2a16a8 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -177,7 +177,7 @@ static void pdc202xx_dma_start(ide_drive_t *drive)
 		u8 clock = inb(high_16 + 0x11);
 
 		outb(clock | (hwif->channel ? 0x08 : 0x02), high_16 + 0x11);
-		word_count = (rq->nr_sectors << 8);
+		word_count = (blk_rq_sectors(rq) << 8);
 		word_count = (rq_data_dir(rq) == READ) ?
 					word_count | 0x05000000 :
 					word_count | 0x06000000;
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index b4cf42d..05a93d6 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -112,7 +112,7 @@ static void tc86c001_dma_start(ide_drive_t *drive)
 	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long sc_base	= hwif->config_data;
 	unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
-	unsigned long nsectors	= hwif->rq->nr_sectors;
+	unsigned long nsectors	= blk_rq_sectors(hwif->rq);
 
 	/*
 	 * We have to manually load the sector count and size into
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 564422d..5ca7622 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -307,7 +307,7 @@ static int tx4939ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
 	tx4939ide_writew(SECTOR_SIZE / 2, base, drive->dn ?
 			 TX4939IDE_Xfer_Cnt_2 : TX4939IDE_Xfer_Cnt_1);
 
-	tx4939ide_writew(cmd->rq->nr_sectors, base, TX4939IDE_Sec_Cnt);
+	tx4939ide_writew(blk_rq_sectors(cmd->rq), base, TX4939IDE_Sec_Cnt);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 2e46e8b27aa57c6bd34b3102b40ee4d0144b4fab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:41 +0900
Subject: block: drop request->hard_* and *nr_sectors

struct request has had a few different ways to represent some
properties of a request.  ->hard_* represent block layer's view of the
request progress (completion cursor) and the ones without the prefix
are supposed to represent the issue cursor and allowed to be updated
as necessary by the low level drivers.  The thing is that as block
layer supports partial completion, the two cursors really aren't
necessary and only cause confusion.  In addition, manual management of
request detail from low level drivers is cumbersome and error-prone at
the very least.

Another interesting duplicate fields are rq->[hard_]nr_sectors and
rq->{hard_cur|current}_nr_sectors against rq->data_len and
rq->bio->bi_size.  This is more convoluted than the hard_ case.

rq->[hard_]nr_sectors are initialized for requests with bio but
blk_rq_bytes() uses it only for !pc requests.  rq->data_len is
initialized for all request but blk_rq_bytes() uses it only for pc
requests.  This causes good amount of confusion throughout block layer
and its drivers and determining the request length has been a bit of
black magic which may or may not work depending on circumstances and
what the specific LLD is actually doing.

rq->{hard_cur|current}_nr_sectors represent the number of sectors in
the contiguous data area at the front.  This is mainly used by drivers
which transfers data by walking request segment-by-segment.  This
value always equals rq->bio->bi_size >> 9.  However, data length for
pc requests may not be multiple of 512 bytes and using this field
becomes a bit confusing.

In general, having multiple fields to represent the same property
leads only to confusion and subtle bugs.  With recent block low level
driver cleanups, no driver is accessing or manipulating these
duplicate fields directly.  Drop all the duplicates.  Now rq->sector
means the current sector, rq->data_len the current total length and
rq->bio->bi_size the current segment length.  Everything else is
defined in terms of these three and available only through accessors.

* blk_recalc_rq_sectors() is collapsed into blk_update_request() and
  now handles pc and fs requests equally other than rq->sector update.
  This means that now pc requests can use partial completion too (no
  in-kernel user yet tho).

* bio_cur_sectors() is replaced with bio_cur_bytes() as block layer
  now uses byte count as the primary data length.

* blk_rq_pos() is now guranteed to be always correct.  In-block users
  converted.

* blk_rq_bytes() is now guaranteed to be always valid as is
  blk_rq_sectors().  In-block users converted.

* blk_rq_sectors() is now guaranteed to equal blk_rq_bytes() >> 9.
  More convenient one is used.

* blk_rq_bytes() and blk_rq_cur_bytes() are now inlined and take const
  pointer to request.

[ Impact: API cleanup, single way to represent one property of a request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 82dc206..3596ca7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
-	rq->sector = rq->hard_sector = (sector_t) -1;
+	rq->sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
@@ -189,8 +189,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 	       (unsigned long long)blk_rq_pos(rq),
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
-						rq->bio, rq->biotail,
-						rq->buffer, rq->data_len);
+	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
@@ -1096,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
-	req->hard_sector = req->sector = bio->bi_sector;
+	req->sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
@@ -1113,14 +1112,13 @@ static inline bool queue_should_plug(struct request_queue *q)
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
-	int el_ret, nr_sectors;
+	int el_ret;
+	unsigned int bytes = bio->bi_size;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	const int unplug = bio_unplug(bio);
 	int rw_flags;
 
-	nr_sectors = bio_sectors(bio);
-
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
@@ -1145,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
-		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+		req->data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1171,10 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
-		req->current_nr_sectors = bio_cur_sectors(bio);
-		req->hard_cur_sectors = req->current_nr_sectors;
-		req->sector = req->hard_sector = bio->bi_sector;
-		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+		req->sector = bio->bi_sector;
+		req->data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1557,7 +1553,7 @@ EXPORT_SYMBOL(submit_bio);
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
 	if (blk_rq_sectors(rq) > q->max_sectors ||
-	    rq->data_len > q->max_hw_sectors << 9) {
+	    blk_rq_bytes(rq) > q->max_hw_sectors << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
@@ -1675,35 +1671,6 @@ static void blk_account_io_done(struct request *req)
 	}
 }
 
-/**
- * blk_rq_bytes - Returns bytes left to complete in the entire request
- * @rq: the request being processed
- **/
-unsigned int blk_rq_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return blk_rq_sectors(rq) << 9;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_bytes);
-
-/**
- * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
- * @rq: the request being processed
- **/
-unsigned int blk_rq_cur_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->current_nr_sectors << 9;
-
-	if (rq->bio)
-		return rq->bio->bi_size;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
-
 struct request *elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
@@ -1736,7 +1703,7 @@ struct request *elv_next_request(struct request_queue *q)
 		if (rq->cmd_flags & REQ_DONTPREP)
 			break;
 
-		if (q->dma_drain_size && rq->data_len) {
+		if (q->dma_drain_size && blk_rq_bytes(rq)) {
 			/*
 			 * make sure space for the drain appears we
 			 * know we can do this because max_hw_segments
@@ -1759,7 +1726,7 @@ struct request *elv_next_request(struct request_queue *q)
 			 * avoid resource deadlock.  REQ_STARTED will
 			 * prevent other fs requests from passing this one.
 			 */
-			if (q->dma_drain_size && rq->data_len &&
+			if (q->dma_drain_size && blk_rq_bytes(rq) &&
 			    !(rq->cmd_flags & REQ_DONTPREP)) {
 				/*
 				 * remove the space for the drain we added
@@ -1911,8 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
-		req->nr_sectors = req->hard_nr_sectors = 0;
-		req->current_nr_sectors = req->hard_cur_sectors = 0;
+		req->data_len = 0;
 		return false;
 	}
 
@@ -1926,8 +1892,25 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 
-	blk_recalc_rq_sectors(req, total_bytes >> 9);
+	req->data_len -= total_bytes;
+	req->buffer = bio_data(req->bio);
+
+	/* update sector only for requests with clear definition of sector */
+	if (blk_fs_request(req) || blk_discard_rq(req))
+		req->sector += total_bytes >> 9;
+
+	/*
+	 * If total number of sectors is less than the first segment
+	 * size, something has gone terribly wrong.
+	 */
+	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+		printk(KERN_ERR "blk: request botched\n");
+		req->data_len = blk_rq_cur_bytes(req);
+	}
+
+	/* recalculate the number of segments */
 	blk_recalc_rq_segments(req);
+
 	return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
@@ -2049,11 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
-	rq->current_nr_sectors = bio_cur_sectors(bio);
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
 	rq->data_len = bio->bi_size;
-
 	rq->bio = rq->biotail = bio;
 
 	if (bio->bi_bdev)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index bf62a87..b8df66a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,35 +9,6 @@
 
 #include "blk.h"
 
-void blk_recalc_rq_sectors(struct request *rq, int nsect)
-{
-	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
-		rq->hard_sector += nsect;
-		rq->hard_nr_sectors -= nsect;
-
-		/*
-		 * Move the I/O submission pointers ahead if required.
-		 */
-		if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
-		    (rq->sector <= rq->hard_sector)) {
-			rq->sector = rq->hard_sector;
-			rq->nr_sectors = rq->hard_nr_sectors;
-			rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
-			rq->current_nr_sectors = rq->hard_cur_sectors;
-			rq->buffer = bio_data(rq->bio);
-		}
-
-		/*
-		 * if total number of sectors is less than the first segment
-		 * size, something has gone terribly wrong
-		 */
-		if (rq->nr_sectors < rq->current_nr_sectors) {
-			printk(KERN_ERR "blk: request botched\n");
-			rq->nr_sectors = rq->current_nr_sectors;
-		}
-	}
-}
-
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
@@ -199,8 +170,9 @@ new_segment:
 
 
 	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
-	    (rq->data_len & q->dma_pad_mask)) {
-		unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1;
+	    (blk_rq_bytes(rq) & q->dma_pad_mask)) {
+		unsigned int pad_len =
+			(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
 
 		sg->length += pad_len;
 		rq->extra_len += pad_len;
@@ -398,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 
-	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+	req->data_len += blk_rq_bytes(next);
 
 	elv_merge_requests(q, req, next);
 
diff --git a/block/blk.h b/block/blk.h
index 5111559..ab54529 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -101,7 +101,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
 void blk_recalc_rq_segments(struct request *rq);
-void blk_recalc_rq_sectors(struct request *rq, int nsect);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index db4d990..99ac430 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -579,9 +579,9 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
 		 * Sort strictly based on sector.  Smallest to the left,
 		 * largest to the right.
 		 */
-		if (sector > cfqq->next_rq->sector)
+		if (sector > blk_rq_pos(cfqq->next_rq))
 			n = &(*p)->rb_right;
-		else if (sector < cfqq->next_rq->sector)
+		else if (sector < blk_rq_pos(cfqq->next_rq))
 			n = &(*p)->rb_left;
 		else
 			break;
@@ -611,8 +611,8 @@ static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		return;
 
 	cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
-	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector,
-					 &parent, &p);
+	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
+				      blk_rq_pos(cfqq->next_rq), &parent, &p);
 	if (!__cfqq) {
 		rb_link_node(&cfqq->p_node, parent, p);
 		rb_insert_color(&cfqq->p_node, cfqq->p_root);
@@ -996,7 +996,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
 	if (cfq_rq_close(cfqd, __cfqq->next_rq))
 		return __cfqq;
 
-	if (__cfqq->next_rq->sector < sector)
+	if (blk_rq_pos(__cfqq->next_rq) < sector)
 		node = rb_next(&__cfqq->p_node);
 	else
 		node = rb_prev(&__cfqq->p_node);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f37ca8c..d30ec6f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -218,12 +218,12 @@ struct bio {
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
 #define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
 
-static inline unsigned int bio_cur_sectors(struct bio *bio)
+static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
 	if (bio->bi_vcnt)
-		return bio_iovec(bio)->bv_len >> 9;
+		return bio_iovec(bio)->bv_len;
 	else /* dataless requests such as discard */
-		return bio->bi_size >> 9;
+		return bio->bi_size;
 }
 
 static inline void *bio_data(struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4e5f855..ce2bf5e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -166,19 +166,8 @@ struct request {
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 
-	/* Maintain bio traversal state for part by part I/O submission.
-	 * hard_* are block layer internals, no driver should touch them!
-	 */
-
-	sector_t sector;		/* next sector to submit */
-	sector_t hard_sector;		/* next sector to complete */
-	unsigned long nr_sectors;	/* no. of sectors left to submit */
-	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
-	/* no. of sectors left to submit in the current segment */
-	unsigned int current_nr_sectors;
-
-	/* no. of sectors left to complete in the current segment */
-	unsigned int hard_cur_sectors;
+	sector_t sector;	/* sector cursor */
+	unsigned int data_len;	/* total data len, don't access directly */
 
 	struct bio *bio;
 	struct bio *biotail;
@@ -226,7 +215,6 @@ struct request {
 	unsigned char __cmd[BLK_MAX_CDB];
 	unsigned char *cmd;
 
-	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
 	unsigned int resid_len;	/* residual count */
@@ -840,20 +828,27 @@ extern void blkdev_dequeue_request(struct request *req);
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
-	return rq->hard_sector;
+	return rq->sector;
+}
+
+static inline unsigned int blk_rq_bytes(const struct request *rq)
+{
+	return rq->data_len;
 }
 
-extern unsigned int blk_rq_bytes(struct request *rq);
-extern unsigned int blk_rq_cur_bytes(struct request *rq);
+static inline int blk_rq_cur_bytes(const struct request *rq)
+{
+	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
+}
 
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
-	return rq->hard_nr_sectors;
+	return blk_rq_bytes(rq) >> 9;
 }
 
 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 {
-	return rq->hard_cur_sectors;
+	return blk_rq_cur_bytes(rq) >> 9;
 }
 
 /*
@@ -928,7 +923,7 @@ static inline void blk_end_request_all(struct request *rq, int error)
  */
 static inline bool blk_end_request_cur(struct request *rq, int error)
 {
-	return blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 
 /**
@@ -981,7 +976,7 @@ static inline void __blk_end_request_all(struct request *rq, int error)
  */
 static inline bool __blk_end_request_cur(struct request *rq, int error)
 {
-	return __blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 
 extern void blk_complete_request(struct request *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c59b769..4e46287 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -171,7 +171,7 @@ enum {
 	ELV_MQUEUE_MUST,
 };
 
-#define rq_end_sector(rq)	((rq)->sector + (rq)->nr_sectors)
+#define rq_end_sector(rq)	(blk_rq_pos(rq) + blk_rq_sectors(rq))
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 /*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 42f1c11..5708a14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -642,12 +642,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 
 	if (blk_pc_request(rq)) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
-				rq->cmd_len, rq->cmd);
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+				what, rq->errors, rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
-				rw, what, rq->errors, 0, NULL);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
+				what, rq->errors, 0, NULL);
 	}
 }
 
@@ -854,11 +854,11 @@ void blk_add_driver_data(struct request_queue *q,
 		return;
 
 	if (blk_pc_request(rq))
-		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
-				rq->errors, len, data);
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
+				BLK_TA_DRV_DATA, rq->errors, len, data);
 	else
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
-				0, BLK_TA_DRV_DATA, rq->errors, len, data);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
+				BLK_TA_DRV_DATA, rq->errors, len, data);
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
 
-- 
cgit v0.10.2


From b0790410300abaaf4f25f702803beff701baebf1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:42 +0900
Subject: block: cleanup rq->data_len usages

With recent unification of fields, it's now guaranteed that
rq->data_len always equals blk_rq_bytes().  Convert all non-IDE direct
users to accessors.  IDE will be converted in a separate patch.

Boaz: spotted incorrect data_len/resid_len conversion in osd.

[ Impact: convert direct rq->data_len usages to blk_rq_bytes() ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: Darrick J. Wong <djwong@us.ibm.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 2733b0c..6e4c600 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1084,7 +1084,7 @@ static int atapi_drain_needed(struct request *rq)
 	if (likely(!blk_pc_request(rq)))
 		return 0;
 
-	if (!rq->data_len || (rq->cmd_flags & REQ_RW))
+	if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_RW))
 		return 0;
 
 	return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index dc3b899..1591f61 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -747,7 +747,7 @@ static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
 {
 	struct request *rq = urq->rq;
 
-	if (rq->data_len == 0) {
+	if (blk_rq_bytes(rq) == 0) {
 		cmd->dir = UB_DIR_NONE;
 	} else {
 		if (rq_data_dir(rq) == WRITE)
@@ -762,7 +762,7 @@ static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
 	memcpy(&cmd->cdb, rq->cmd, rq->cmd_len);
 	cmd->cdb_len = rq->cmd_len;
 
-	cmd->len = rq->data_len;
+	cmd->len = blk_rq_bytes(rq);
 
 	/*
 	 * To reapply this to every URB is not as incorrect as it looks.
@@ -783,8 +783,8 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	if (cmd->error == 0) {
 		if (blk_pc_request(rq)) {
-			if (cmd->act_len < rq->data_len)
-				rq->resid_len = rq->data_len - cmd->act_len;
+			if (cmd->act_len < blk_rq_bytes(rq))
+				rq->resid_len = blk_rq_bytes(rq) - cmd->act_len;
 			scsi_status = 0;
 		} else {
 			if (cmd->act_len != cmd->len) {
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 5d5f347..4e6fcf0 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1277,8 +1277,8 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	/* do we need to support multiple segments? */
 	if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
 		printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
-		    ioc->name, __func__, req->bio->bi_vcnt, req->data_len,
-		    rsp->bio->bi_vcnt, rsp->data_len);
+		    ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
+		    rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
@@ -1295,7 +1295,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	smpreq = (SmpPassthroughRequest_t *)mf;
 	memset(smpreq, 0, sizeof(*smpreq));
 
-	smpreq->RequestDataLength = cpu_to_le16(req->data_len - 4);
+	smpreq->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4);
 	smpreq->Function = MPI_FUNCTION_SMP_PASSTHROUGH;
 
 	if (rphy)
@@ -1321,10 +1321,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		       MPI_SGE_FLAGS_END_OF_BUFFER |
 		       MPI_SGE_FLAGS_DIRECTION |
 		       mpt_addr_size()) << MPI_SGE_FLAGS_SHIFT;
-	flagsLength |= (req->data_len - 4);
+	flagsLength |= (blk_rq_bytes(req) - 4);
 
 	dma_addr_out = pci_map_single(ioc->pcidev, bio_data(req->bio),
-				      req->data_len, PCI_DMA_BIDIRECTIONAL);
+				      blk_rq_bytes(req), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_out)
 		goto put_mf;
 	mpt_add_sge(psge, flagsLength, dma_addr_out);
@@ -1332,9 +1332,9 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
 	/* response */
 	flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ;
-	flagsLength |= rsp->data_len + 4;
+	flagsLength |= blk_rq_bytes(rsp) + 4;
 	dma_addr_in =  pci_map_single(ioc->pcidev, bio_data(rsp->bio),
-				      rsp->data_len, PCI_DMA_BIDIRECTIONAL);
+				      blk_rq_bytes(rsp), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_in)
 		goto unmap;
 	mpt_add_sge(psge, flagsLength, dma_addr_in);
@@ -1357,7 +1357,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
 		memcpy(req->sense, smprep, sizeof(*smprep));
 		req->sense_len = sizeof(*smprep);
-		rsp->resid_len = rsp->data_len - smprep->ResponseDataLength;
+		rsp->resid_len = blk_rq_bytes(rsp) - smprep->ResponseDataLength;
 	} else {
 		printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n",
 		    ioc->name, __func__);
@@ -1365,10 +1365,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	}
 unmap:
 	if (dma_addr_out)
-		pci_unmap_single(ioc->pcidev, dma_addr_out, req->data_len,
+		pci_unmap_single(ioc->pcidev, dma_addr_out, blk_rq_bytes(req),
 				 PCI_DMA_BIDIRECTIONAL);
 	if (dma_addr_in)
-		pci_unmap_single(ioc->pcidev, dma_addr_in, rsp->data_len,
+		pci_unmap_single(ioc->pcidev, dma_addr_in, blk_rq_bytes(rsp),
 				 PCI_DMA_BIDIRECTIONAL);
 put_mf:
 	if (mf)
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 510eb47..6b61d28 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -430,7 +430,7 @@ static void i2o_block_end_request(struct request *req, int error,
 		int leftover = (blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
 
 		if (blk_pc_request(req))
-			leftover = req->data_len;
+			leftover = blk_rq_bytes(req);
 
 		if (error)
 			blk_end_request(req, -EIO, leftover);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 6605ec9..531af9e 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1927,13 +1927,13 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	/* do we need to support multiple segments? */
 	if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
 		printk("%s: multiple segments req %u %u, rsp %u %u\n",
-		       __func__, req->bio->bi_vcnt, req->data_len,
-		       rsp->bio->bi_vcnt, rsp->data_len);
+		       __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
+		       rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
-	ret = smp_execute_task(dev, bio_data(req->bio), req->data_len,
-			       bio_data(rsp->bio), rsp->data_len);
+	ret = smp_execute_task(dev, bio_data(req->bio), blk_rq_bytes(req),
+			       bio_data(rsp->bio), blk_rq_bytes(rsp));
 	if (ret > 0) {
 		/* positive number is the untransferred residual */
 		rsp->resid_len = ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index 89952ed..be9a951 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -137,21 +137,21 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	int error = -EINVAL;
 
 	/* eight is the minimum size for request and response frames */
-	if (req->data_len < 8 || rsp->data_len < 8)
+	if (blk_rq_bytes(req) < 8 || blk_rq_bytes(rsp) < 8)
 		goto out;
 
-	if (bio_offset(req->bio) + req->data_len > PAGE_SIZE ||
-	    bio_offset(rsp->bio) + rsp->data_len > PAGE_SIZE) {
+	if (bio_offset(req->bio) + blk_rq_bytes(req) > PAGE_SIZE ||
+	    bio_offset(rsp->bio) + blk_rq_bytes(rsp) > PAGE_SIZE) {
 		shost_printk(KERN_ERR, shost,
 			"SMP request/response frame crosses page boundary");
 		goto out;
 	}
 
-	req_data = kzalloc(req->data_len, GFP_KERNEL);
+	req_data = kzalloc(blk_rq_bytes(req), GFP_KERNEL);
 
 	/* make sure frame can always be built ... we copy
 	 * back only the requested length */
-	resp_data = kzalloc(max(rsp->data_len, 128U), GFP_KERNEL);
+	resp_data = kzalloc(max(blk_rq_bytes(rsp), 128U), GFP_KERNEL);
 
 	if (!req_data || !resp_data) {
 		error = -ENOMEM;
@@ -160,7 +160,7 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 
 	local_irq_disable();
 	buf = kmap_atomic(bio_page(req->bio), KM_USER0) + bio_offset(req->bio);
-	memcpy(req_data, buf, req->data_len);
+	memcpy(req_data, buf, blk_rq_bytes(req));
 	kunmap_atomic(buf - bio_offset(req->bio), KM_USER0);
 	local_irq_enable();
 
@@ -176,8 +176,8 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	resp_data[1] = req_data[1];
 	resp_data[2] = SMP_RESP_FUNC_UNK;
 
-	req->resid_len = req->data_len;
-	rsp->resid_len = rsp->data_len;
+	req->resid_len = blk_rq_bytes(req);
+	rsp->resid_len = blk_rq_bytes(rsp);
 
 	switch (req_data[1]) {
 	case SMP_REPORT_GENERAL:
@@ -264,7 +264,7 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 
 	local_irq_disable();
 	buf = kmap_atomic(bio_page(rsp->bio), KM_USER0) + bio_offset(rsp->bio);
-	memcpy(buf, resp_data, rsp->data_len);
+	memcpy(buf, resp_data, blk_rq_bytes(rsp));
 	flush_kernel_dcache_page(bio_page(rsp->bio));
 	kunmap_atomic(buf - bio_offset(rsp->bio), KM_USER0);
 	local_irq_enable();
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 53759c5..af95a44 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1041,7 +1041,7 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
 		printk(MPT2SAS_ERR_FMT "%s: multiple segments req %u %u, "
 		    "rsp %u %u\n", ioc->name, __func__, req->bio->bi_vcnt,
-		    req->data_len, rsp->bio->bi_vcnt, rsp->data_len);
+		    blk_rq_bytes(req), rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
@@ -1104,7 +1104,7 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	*((u64 *)&mpi_request->SASAddress) = (rphy) ?
 	    cpu_to_le64(rphy->identify.sas_address) :
 	    cpu_to_le64(ioc->sas_hba.sas_address);
-	mpi_request->RequestDataLength = cpu_to_le16(req->data_len - 4);
+	mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4);
 	psge = &mpi_request->SGL;
 
 	/* WRITE sgel first */
@@ -1112,13 +1112,13 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	    MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
 	sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
 	dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
-	      req->data_len, PCI_DMA_BIDIRECTIONAL);
+		blk_rq_bytes(req), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_out) {
 		mpt2sas_base_free_smid(ioc, le16_to_cpu(smid));
 		goto unmap;
 	}
 
-	ioc->base_add_sg_single(psge, sgl_flags | (req->data_len - 4),
+	ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(req) - 4),
 	    dma_addr_out);
 
 	/* incr sgel */
@@ -1129,14 +1129,14 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	    MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
 	    MPI2_SGE_FLAGS_END_OF_LIST);
 	sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
-	dma_addr_in =  pci_map_single(ioc->pdev, bio_data(rsp->bio),
-	      rsp->data_len, PCI_DMA_BIDIRECTIONAL);
+	dma_addr_in = pci_map_single(ioc->pdev, bio_data(rsp->bio),
+				     blk_rq_bytes(rsp), PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_in) {
 		mpt2sas_base_free_smid(ioc, le16_to_cpu(smid));
 		goto unmap;
 	}
 
-	ioc->base_add_sg_single(psge, sgl_flags | (rsp->data_len + 4),
+	ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(rsp) + 4),
 	    dma_addr_in);
 
 	dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s - "
@@ -1170,7 +1170,8 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
 		memcpy(req->sense, mpi_reply, sizeof(*mpi_reply));
 		req->sense_len = sizeof(*mpi_reply);
-		rsp->resid_len = rsp->data_len - mpi_reply->ResponseDataLength;
+		rsp->resid_len = blk_rq_bytes(rsp) -
+				 mpi_reply->ResponseDataLength;
 	} else {
 		dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT
 		    "%s - no reply\n", ioc->name, __func__));
@@ -1186,10 +1187,10 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
  unmap:
 	if (dma_addr_out)
-		pci_unmap_single(ioc->pdev, dma_addr_out, req->data_len,
+		pci_unmap_single(ioc->pdev, dma_addr_out, blk_rq_bytes(req),
 		    PCI_DMA_BIDIRECTIONAL);
 	if (dma_addr_in)
-		pci_unmap_single(ioc->pdev, dma_addr_in, rsp->data_len,
+		pci_unmap_single(ioc->pdev, dma_addr_in, blk_rq_bytes(rsp),
 		    PCI_DMA_BIDIRECTIONAL);
 
  out:
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 2a5f077..d178a8b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1299,7 +1299,7 @@ int osd_finalize_request(struct osd_request *or,
 			return ret;
 		}
 		OSD_DEBUG("out bytes=%llu (bytes_req=%u)\n",
-			_LLU(or->out.total_bytes), or->out.req->data_len);
+			_LLU(or->out.total_bytes), blk_rq_bytes(or->out.req));
 	}
 	if (or->in.bio) {
 		ret = blk_rq_append_bio(or->request->q, or->in.req, or->in.bio);
@@ -1308,7 +1308,7 @@ int osd_finalize_request(struct osd_request *or,
 			return ret;
 		}
 		OSD_DEBUG("in bytes=%llu (bytes_req=%u)\n",
-			_LLU(or->in.total_bytes), or->in.req->data_len);
+			_LLU(or->in.total_bytes), blk_rq_bytes(or->in.req));
 	}
 
 	or->out.pad_buff = sg_out_pad_buffer;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 39b3acf..3d16c70 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -682,14 +682,13 @@ EXPORT_SYMBOL(scsi_release_buffers);
 static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
 {
 	struct request *req = cmd->request;
-	unsigned int dlen = req->data_len;
-	unsigned int next_dlen = req->next_rq->data_len;
 
 	req->resid_len = scsi_out(cmd)->resid;
 	req->next_rq->resid_len = scsi_in(cmd)->resid;
 
 	/* The req and req->next_rq have not been completed */
-	BUG_ON(blk_end_bidi_request(req, 0, dlen, next_dlen));
+	BUG_ON(blk_end_bidi_request(req, 0, blk_rq_bytes(req),
+				    blk_rq_bytes(req->next_rq)));
 
 	scsi_release_buffers(cmd);
 
@@ -966,7 +965,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
 	BUG_ON(count > sdb->table.nents);
 	sdb->table.nents = count;
 	if (blk_pc_request(req))
-		sdb->length = req->data_len;
+		sdb->length = blk_rq_bytes(req);
 	else
 		sdb->length = blk_rq_sectors(req) << 9;
 	return BLKPREP_OK;
@@ -1087,21 +1086,21 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 		if (unlikely(ret))
 			return ret;
 	} else {
-		BUG_ON(req->data_len);
+		BUG_ON(blk_rq_bytes(req));
 
 		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 		req->buffer = NULL;
 	}
 
 	cmd->cmd_len = req->cmd_len;
-	if (!req->data_len)
+	if (!blk_rq_bytes(req))
 		cmd->sc_data_direction = DMA_NONE;
 	else if (rq_data_dir(req) == WRITE)
 		cmd->sc_data_direction = DMA_TO_DEVICE;
 	else
 		cmd->sc_data_direction = DMA_FROM_DEVICE;
 	
-	cmd->transfersize = req->data_len;
+	cmd->transfersize = blk_rq_bytes(req);
 	cmd->allowed = req->retries;
 	return BLKPREP_OK;
 }
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 48ba413..1030327 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -387,7 +387,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd,
 	 * we use REQ_TYPE_BLOCK_PC so scsi_init_io doesn't set the
 	 * length for us.
 	 */
-	cmd->sdb.length = rq->data_len;
+	cmd->sdb.length = blk_rq_bytes(rq);
 
 	return 0;
 
-- 
cgit v0.10.2


From 34b7d2c957199834c474c9d46739265643f4d9c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:43 +0900
Subject: ide: cleanup rq->data_len usages

With recent unification of fields, it's now guaranteed that
rq->data_len always equals blk_rq_bytes().  Convert all direct users
to accessors.

[ Impact: convert direct rq->data_len usages to blk_rq_bytes() ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index e4a02a0..792534d 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -255,7 +255,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	ide_init_pc(pc);
 	memcpy(pc->c, sense_rq->cmd, 12);
 	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
-	pc->req_xfer = sense_rq->data_len;
+	pc->req_xfer = blk_rq_bytes(sense_rq);
 
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
@@ -303,7 +303,7 @@ int ide_cd_get_xferlen(struct request *rq)
 		return 32768;
 	else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 			 rq->cmd_type == REQ_TYPE_ATA_PC)
-		return rq->data_len;
+		return blk_rq_bytes(rq);
 	else
 		return 0;
 }
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index eb4f3dc..2eadc9d 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -577,7 +577,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 	struct request *rq = hwif->rq;
 	ide_expiry_t *expiry = NULL;
 	int dma_error = 0, dma, thislen, uptodate = 0;
-	int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0, nsectors;
+	int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
 	int sense = blk_sense_request(rq);
 	unsigned int timeout;
 	u16 len;
@@ -707,9 +707,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
 out_end:
 	if (blk_pc_request(rq) && rc == 0) {
-		if (blk_end_request(rq, 0, rq->data_len))
-			BUG();
-
+		blk_end_request_all(rq, 0);
 		hwif->rq = NULL;
 	} else {
 		if (sense && uptodate)
@@ -727,22 +725,14 @@ out_end:
 			ide_cd_error_cmd(drive, cmd);
 
 		/* make sure it's fully ended */
-		if (blk_pc_request(rq))
-			nsectors = (rq->data_len + 511) >> 9;
-		else
-			nsectors = blk_rq_sectors(rq);
-
-		if (nsectors == 0)
-			nsectors = 1;
-
 		if (blk_fs_request(rq) == 0) {
-			rq->resid_len = rq->data_len -
+			rq->resid_len = blk_rq_bytes(rq) -
 				(cmd->nbytes - cmd->nleft);
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
 				rq->resid_len += cmd->last_xfer_len;
 		}
 
-		ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
 
 		if (sense && rc == 2)
 			ide_error(drive, "request sense failure", stat);
@@ -819,7 +809,7 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 		 */
 		alignment = queue_dma_alignment(q) | q->dma_pad_mask;
 		if ((unsigned long)buf & alignment
-		    || rq->data_len & q->dma_pad_mask
+		    || blk_rq_bytes(rq) & q->dma_pad_mask
 		    || object_is_on_stack(buf))
 			drive->dma = 0;
 	}
@@ -867,9 +857,8 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 
 	cmd.rq = rq;
 
-	if (blk_fs_request(rq) || rq->data_len) {
-		ide_init_sg_cmd(&cmd, blk_fs_request(rq) ?
-				(blk_rq_sectors(rq) << 9) : rq->data_len);
+	if (blk_fs_request(rq) || blk_rq_bytes(rq)) {
+		ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
 		ide_map_sg(drive, &cmd);
 	}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 1c460bd..6509817 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -220,14 +220,14 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 	ide_init_pc(pc);
 	memcpy(pc->c, rq->cmd, sizeof(pc->c));
 	pc->rq = rq;
-	if (rq->data_len) {
+	if (blk_rq_bytes(rq)) {
 		pc->flags |= PC_FLAG_DMA_OK;
 		if (rq_data_dir(rq) == WRITE)
 			pc->flags |= PC_FLAG_WRITING;
 	}
 	/* pio will be performed by ide_pio_bytes() which handles sg fine */
 	pc->buf = NULL;
-	pc->req_xfer = pc->buf_size = rq->data_len;
+	pc->req_xfer = pc->buf_size = blk_rq_bytes(rq);
 }
 
 static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 59799ca..ca2519d 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -116,7 +116,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 unsigned int ide_rq_bytes(struct request *rq)
 {
 	if (blk_pc_request(rq))
-		return rq->data_len;
+		return blk_rq_bytes(rq);
 	else
 		return blk_rq_cur_sectors(rq) << 9;
 }
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index c6cf3a9..e166045 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -380,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->resid_len = rq->data_len - blocks * tape->blk_size;
+		rq->resid_len = blk_rq_bytes(rq) - blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
-- 
cgit v0.10.2


From a2dec7b36364a5cc564c4d76cf16d2e7d33f5c05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:44 +0900
Subject: block: hide request sector and data_len

Block low level drivers for some reason have been pretty good at
abusing block layer API.  Especially struct request's fields tend to
get violated in all possible ways.  Make it clear that low level
drivers MUST NOT access or manipulate rq->sector and rq->data_len
directly by prefixing them with double underscores.

This change is also necessary to break build of out-of-tree codes
which assume the previous block API where internal fields can be
manipulated and rq->data_len carries residual count on completion.

[ Impact: hide internal fields, block API change ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 3596ca7..6226a38 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
-	rq->sector = (sector_t) -1;
+	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
@@ -1095,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
-	req->sector = bio->bi_sector;
+	req->__sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
@@ -1143,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
-		req->data_len += bytes;
+		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1169,8 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
-		req->sector = bio->bi_sector;
-		req->data_len += bytes;
+		req->__sector = bio->bi_sector;
+		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1878,7 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
-		req->data_len = 0;
+		req->__data_len = 0;
 		return false;
 	}
 
@@ -1892,12 +1892,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 
-	req->data_len -= total_bytes;
+	req->__data_len -= total_bytes;
 	req->buffer = bio_data(req->bio);
 
 	/* update sector only for requests with clear definition of sector */
 	if (blk_fs_request(req) || blk_discard_rq(req))
-		req->sector += total_bytes >> 9;
+		req->__sector += total_bytes >> 9;
 
 	/*
 	 * If total number of sectors is less than the first segment
@@ -1905,7 +1905,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	 */
 	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
 		printk(KERN_ERR "blk: request botched\n");
-		req->data_len = blk_rq_cur_bytes(req);
+		req->__data_len = blk_rq_cur_bytes(req);
 	}
 
 	/* recalculate the number of segments */
@@ -2032,7 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
-	rq->data_len = bio->bi_size;
+	rq->__data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 
 	if (bio->bi_bdev)
diff --git a/block/blk-map.c b/block/blk-map.c
index 694fefa..56082be 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,7 +20,7 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
-		rq->data_len += bio->bi_size;
+		rq->__data_len += bio->bi_size;
 	}
 	return 0;
 }
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b8df66a..4974dd5 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -370,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 
-	req->data_len += blk_rq_bytes(next);
+	req->__data_len += blk_rq_bytes(next);
 
 	elv_merge_requests(q, req, next);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ce2bf5e..c755803 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -166,8 +166,9 @@ struct request {
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 
-	sector_t sector;	/* sector cursor */
-	unsigned int data_len;	/* total data len, don't access directly */
+	/* the following two fields are internal, NEVER access directly */
+	sector_t __sector;		/* sector cursor */
+	unsigned int __data_len;	/* total data len */
 
 	struct bio *bio;
 	struct bio *biotail;
@@ -828,12 +829,12 @@ extern void blkdev_dequeue_request(struct request *req);
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
-	return rq->sector;
+	return rq->__sector;
 }
 
 static inline unsigned int blk_rq_bytes(const struct request *rq)
 {
-	return rq->data_len;
+	return rq->__data_len;
 }
 
 static inline int blk_rq_cur_bytes(const struct request *rq)
-- 
cgit v0.10.2


From 1011c1b9f2e45ce7c6e38888d2b83936aec38771 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:45 +0900
Subject: block: blk_rq_[cur_]_{sectors|bytes}() usage cleanup

With the previous changes, the followings are now guaranteed for all
requests in any valid state.

* blk_rq_sectors() == blk_rq_bytes() >> 9
* blk_rq_cur_sectors() == blk_rq_cur_bytes() >> 9

Clean up accessor usages.  Notable changes are

* nbd,i2o_block: end_all used instead of explicit byte count
* scsi_lib: unnecessary conditional on request type removed

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul Clements <paul.clements@steeleye.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 4524862..1e27ed9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2512,8 +2512,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 
 	remaining = current_count_sectors << 9;
 #ifdef FLOPPY_SANITY_CHECK
-	if ((remaining >> 9) > blk_rq_sectors(current_req) &&
-	    CT(COMMAND) == FD_WRITE) {
+	if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) {
 		DPRINT("in copy buffer\n");
 		printk("current_count_sectors=%ld\n", current_count_sectors);
 		printk("remaining=%d\n", remaining >> 9);
@@ -2530,7 +2529,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 
 	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
 
-	size = blk_rq_cur_sectors(current_req) << 9;
+	size = blk_rq_cur_bytes(current_req);
 
 	rq_for_each_segment(bv, current_req, iter) {
 		if (!remaining)
@@ -2879,7 +2878,7 @@ static int make_raw_rw_request(void)
 				printk("write\n");
 			return 0;
 		}
-	} else if (raw_cmd->length > blk_rq_sectors(current_req) << 9 ||
+	} else if (raw_cmd->length > blk_rq_bytes(current_req) ||
 		   current_count_sectors > blk_rq_sectors(current_req)) {
 		DPRINT("buffer overrun in direct transfer\n");
 		return 0;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 977a573..fad167de 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -110,7 +110,7 @@ static void nbd_end_request(struct request *req)
 			req, error ? "failed" : "done");
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_end_request(req, error, blk_rq_sectors(req) << 9);
+	__blk_end_request_all(req, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -231,7 +231,7 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 {
 	int result, flags;
 	struct nbd_request request;
-	unsigned long size = blk_rq_sectors(req) << 9;
+	unsigned long size = blk_rq_bytes(req);
 
 	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(nbd_cmd(req));
@@ -243,7 +243,7 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 			lo->disk->disk_name, req,
 			nbdcmd_to_ascii(nbd_cmd(req)),
 			(unsigned long long)blk_rq_pos(req) << 9,
-			blk_rq_sectors(req) << 9);
+			blk_rq_bytes(req));
 	result = sock_xmit(lo, 1, &request, sizeof(request),
 			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 1591f61..40d03cf 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -739,7 +739,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	cmd->cdb[8] = nblks;
 	cmd->cdb_len = 10;
 
-	cmd->len = blk_rq_sectors(rq) * 512;
+	cmd->len = blk_rq_bytes(rq);
 }
 
 static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index d4e6b71..6a13838 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -72,7 +72,7 @@ static void do_z2_request(struct request_queue *q)
 	struct request *req;
 	while ((req = elv_next_request(q)) != NULL) {
 		unsigned long start = blk_rq_pos(req) << 9;
-		unsigned long len  = blk_rq_cur_sectors(req) << 9;
+		unsigned long len  = blk_rq_cur_bytes(req);
 
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 9e600d2..93b2c61 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -680,7 +680,7 @@ try_again:
 		t_sec = blk_rq_pos(msb->block_req) << 9;
 		sector_div(t_sec, msb->page_size);
 
-		count = blk_rq_sectors(msb->block_req) << 9;
+		count = blk_rq_bytes(msb->block_req);
 		count /= msb->page_size;
 
 		param.system = msb->system;
@@ -745,7 +745,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 					t_len *= msb->page_size;
 			}
 		} else
-			t_len = blk_rq_sectors(msb->block_req) << 9;
+			t_len = blk_rq_bytes(msb->block_req);
 
 		dev_dbg(&card->dev, "transferred %x (%d)\n", t_len, error);
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 6b61d28..e153f5d 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -426,15 +426,9 @@ static void i2o_block_end_request(struct request *req, int error,
 	struct request_queue *q = req->q;
 	unsigned long flags;
 
-	if (blk_end_request(req, error, nr_bytes)) {
-		int leftover = (blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
-
-		if (blk_pc_request(req))
-			leftover = blk_rq_bytes(req);
-
+	if (blk_end_request(req, error, nr_bytes))
 		if (error)
-			blk_end_request(req, -EIO, leftover);
-	}
+			blk_end_request_all(req, -EIO);
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
@@ -832,15 +826,13 @@ static int i2o_block_transfer(struct request *req)
 
 		memcpy(mptr, cmd, 10);
 		mptr += 4;
-		*mptr++ =
-		    cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
+		*mptr++ = cpu_to_le32(blk_rq_bytes(req));
 	} else
 #endif
 	{
 		msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
 		*mptr++ = cpu_to_le32(ctl_flags);
-		*mptr++ =
-		    cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
+		*mptr++ = cpu_to_le32(blk_rq_bytes(req));
 		*mptr++ =
 		    cpu_to_le32((u32) (blk_rq_pos(req) << KERNEL_SECTOR_SHIFT));
 		*mptr++ =
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 4ea2e67..50c76a2 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -48,7 +48,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	char *buf;
 
 	block = blk_rq_pos(req) << 9 >> tr->blkshift;
-	nsect = blk_rq_cur_sectors(req) << 9 >> tr->blkshift;
+	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
 
 	buf = req->buffer;
 
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 2132c90..d56ddaa 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -189,7 +189,7 @@ static void jsfd_do_request(struct request_queue *q)
 	while ((req = elv_next_request(q)) != NULL) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
-		size_t len = blk_rq_cur_sectors(req) << 9;
+		size_t len = blk_rq_cur_bytes(req);
 
 		if ((offset + len) > jdp->dsize) {
 			__blk_end_request_cur(req, -EIO);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3d16c70..ee308f6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -546,7 +546,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 	 * to queue the remainder of them.
 	 */
 	if (blk_end_request(req, error, bytes)) {
-		int leftover = blk_rq_sectors(req) << 9;
+		int leftover = blk_rq_bytes(req);
 
 		if (blk_pc_request(req))
 			leftover = req->resid_len;
@@ -964,10 +964,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
 	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
 	BUG_ON(count > sdb->table.nents);
 	sdb->table.nents = count;
-	if (blk_pc_request(req))
-		sdb->length = blk_rq_bytes(req);
-	else
-		sdb->length = blk_rq_sectors(req) << 9;
+	sdb->length = blk_rq_bytes(req);
 	return BLKPREP_OK;
 }
 
-- 
cgit v0.10.2


From 8f6205cd572fece673da0255d74843680f67f879 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:53:59 +0900
Subject: ide: dequeue in-flight request

ide generally has single request in flight and tracks it using
hwif->rq and all state handlers follow the following convention.

* ide_started is returned if the request is in flight.

* ide_stopped is returned if the queue needs to be restarted.  The
  request might or might not have been processed fully or partially.

* hwif->rq is set to NULL, when an issued request completes.

So, dequeueing model can be implemented by dequeueing after fetch,
requeueing if hwif->rq isn't NULL on ide_stopped return and doing
about the same thing on completion / port unlock paths.  These changes
can be made in ide-io proper.

In addition to the above main changes, the following updates are
necessary.

* ide-cd shouldn't dequeue a request when issuing REQUEST SENSE for it
  as the request is already dequeued.

* ide-atapi uses request queue as stack when issuing REQUEST SENSE to
  put the REQUEST SENSE in front of the failed request.  This now
  needs to be done using requeueing.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 792534d..2874c3d 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -246,6 +246,7 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
  */
 void ide_retry_pc(ide_drive_t *drive)
 {
+	struct request *failed_rq = drive->hwif->rq;
 	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
@@ -260,8 +261,17 @@ void ide_retry_pc(ide_drive_t *drive)
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
-	if (ide_queue_sense_rq(drive, pc))
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
+	/*
+	 * Push back the failed request and put request sense on top
+	 * of it.  The failed command will be retried after sense data
+	 * is acquired.
+	 */
+	blk_requeue_request(failed_rq->q, failed_rq);
+	drive->hwif->rq = NULL;
+	if (ide_queue_sense_rq(drive, pc)) {
+		blkdev_dequeue_request(failed_rq);
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+	}
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 2eadc9d..4c7792f 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -405,15 +405,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 end_request:
 	if (stat & ATA_ERR) {
-		struct request_queue *q = drive->queue;
-		unsigned long flags;
-
-		spin_lock_irqsave(q->queue_lock, flags);
-		blkdev_dequeue_request(rq);
-		spin_unlock_irqrestore(q->queue_lock, flags);
-
 		hwif->rq = NULL;
-
 		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ca2519d..abda733 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -487,10 +487,10 @@ void do_ide_request(struct request_queue *q)
 
 	if (!ide_lock_port(hwif)) {
 		ide_hwif_t *prev_port;
+
+		WARN_ON_ONCE(hwif->rq);
 repeat:
 		prev_port = hwif->host->cur_port;
-		hwif->rq = NULL;
-
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
 		    time_after(drive->sleep, jiffies)) {
 			ide_unlock_port(hwif);
@@ -519,7 +519,12 @@ repeat:
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
-		rq = elv_next_request(drive->queue);
+		if (!rq) {
+			rq = elv_next_request(drive->queue);
+			if (rq)
+				blkdev_dequeue_request(rq);
+		}
+
 		spin_unlock_irq(q->queue_lock);
 		spin_lock_irq(&hwif->lock);
 
@@ -555,8 +560,11 @@ repeat:
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwif->lock);
 
-		if (startstop == ide_stopped)
+		if (startstop == ide_stopped) {
+			rq = hwif->rq;
+			hwif->rq = NULL;
 			goto repeat;
+		}
 	} else
 		goto plug_device;
 out:
@@ -572,18 +580,24 @@ plug_device:
 plug_device_2:
 	spin_lock_irq(q->queue_lock);
 
+	if (rq)
+		blk_requeue_request(q, rq);
 	if (!elv_queue_empty(q))
 		blk_plug_device(q);
 }
 
-static void ide_plug_device(ide_drive_t *drive)
+static void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
 {
 	struct request_queue *q = drive->queue;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
+
+	if (rq)
+		blk_requeue_request(q, rq);
 	if (!elv_queue_empty(q))
 		blk_plug_device(q);
+
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -632,6 +646,7 @@ void ide_timer_expiry (unsigned long data)
 	unsigned long	flags;
 	int		wait = -1;
 	int		plug_device = 0;
+	struct request	*uninitialized_var(rq_in_flight);
 
 	spin_lock_irqsave(&hwif->lock, flags);
 
@@ -693,6 +708,8 @@ void ide_timer_expiry (unsigned long data)
 		spin_lock_irq(&hwif->lock);
 		enable_irq(hwif->irq);
 		if (startstop == ide_stopped) {
+			rq_in_flight = hwif->rq;
+			hwif->rq = NULL;
 			ide_unlock_port(hwif);
 			plug_device = 1;
 		}
@@ -701,7 +718,7 @@ void ide_timer_expiry (unsigned long data)
 
 	if (plug_device) {
 		ide_unlock_host(hwif->host);
-		ide_plug_device(drive);
+		ide_requeue_and_plug(drive, rq_in_flight);
 	}
 }
 
@@ -787,6 +804,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
 	int plug_device = 0;
+	struct request *uninitialized_var(rq_in_flight);
 
 	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
 		if (hwif != host->cur_port)
@@ -866,6 +884,8 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	 */
 	if (startstop == ide_stopped) {
 		BUG_ON(hwif->handler);
+		rq_in_flight = hwif->rq;
+		hwif->rq = NULL;
 		ide_unlock_port(hwif);
 		plug_device = 1;
 	}
@@ -875,7 +895,7 @@ out:
 out_early:
 	if (plug_device) {
 		ide_unlock_host(hwif->host);
-		ide_plug_device(drive);
+		ide_requeue_and_plug(drive, rq_in_flight);
 	}
 
 	return irq_ret;
-- 
cgit v0.10.2


From 9a8d23d8855e554fc5887f14cb008b55c4300ccc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:00 +0900
Subject: mg_disk: fix queue hang / infinite retry on !fs requests

Both request functions in mg_disk simply return when they encounter a
!fs request, which means the request will never be cleared from the
queue causing queue hang and indefinite retry of the request.  Fix it.

While at it, flatten condition checks and add unlikely to !fs tests.

[ Impact: fix possible queue hang / infinite retry of !fs requests ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 826c349..be32388 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -672,16 +672,16 @@ static void mg_request_poll(struct request_queue *q)
 
 	while ((req = elv_next_request(q)) != NULL) {
 		host = req->rq_disk->private_data;
-		if (blk_fs_request(req)) {
-			switch (rq_data_dir(req)) {
-			case READ:
-				mg_read(req);
-				break;
-			case WRITE:
-				mg_write(req);
-				break;
-			}
+
+		if (unlikely(!blk_fs_request(req))) {
+			__blk_end_request_cur(req, -EIO);
+			continue;
 		}
+
+		if (rq_data_dir(req) == READ)
+			mg_read(req);
+		else
+			mg_write(req);
 	}
 }
 
@@ -766,8 +766,10 @@ static void mg_request(struct request_queue *q)
 			continue;
 		}
 
-		if (!blk_fs_request(req))
-			return;
+		if (unlikely(!blk_fs_request(req))) {
+			__blk_end_request_cur(req, -EIO);
+			continue;
+		}
 
 		if (!mg_issue_req(req, host, sect_num, sect_cnt))
 			return;
-- 
cgit v0.10.2


From 5b36ad6000ddea390aca3c3b67ead7616ace2ffc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:01 +0900
Subject: mg_disk: dequeue and track in-flight request

mg_disk has at most single request in flight per device.  Till now,
whenever it needs to access the in-flight request it called
elv_next_request().  This patch makes mg_disk track the in-flight
request directly using mg_host->req and dequeue it when processing
starts.

q->queuedata is set to mg_host so that mg_host can be determined
without fetching request from the queue.

[ Impact: dequeue in-flight request, one elv_next_request() per request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index be32388..1ca5d14 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -135,6 +135,7 @@ struct mg_host {
 	struct device *dev;
 
 	struct request_queue *breq;
+	struct request *req;
 	spinlock_t lock;
 	struct gendisk *gd;
 
@@ -171,17 +172,27 @@ struct mg_host {
 
 static void mg_request(struct request_queue *);
 
+static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes)
+{
+	if (__blk_end_request(host->req, err, nr_bytes))
+		return true;
+
+	host->req = NULL;
+	return false;
+}
+
+static bool mg_end_request_cur(struct mg_host *host, int err)
+{
+	return mg_end_request(host, err, blk_rq_cur_bytes(host->req));
+}
+
 static void mg_dump_status(const char *msg, unsigned int stat,
 		struct mg_host *host)
 {
 	char *name = MG_DISK_NAME;
-	struct request *req;
 
-	if (host->breq) {
-		req = elv_next_request(host->breq);
-		if (req)
-			name = req->rq_disk->disk_name;
-	}
+	if (host->req)
+		name = host->req->rq_disk->disk_name;
 
 	printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
 	if (stat & ATA_BUSY)
@@ -217,13 +228,9 @@ static void mg_dump_status(const char *msg, unsigned int stat,
 			printk("AddrMarkNotFound ");
 		printk("}");
 		if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) {
-			if (host->breq) {
-				req = elv_next_request(host->breq);
-				if (req)
-					printk(", sector=%u",
-					       (unsigned int)blk_rq_pos(req));
-			}
-
+			if (host->req)
+				printk(", sector=%u",
+				       (unsigned int)blk_rq_pos(host->req));
 		}
 		printk("\n");
 	}
@@ -453,11 +460,10 @@ static int mg_disk_init(struct mg_host *host)
 
 static void mg_bad_rw_intr(struct mg_host *host)
 {
-	struct request *req = elv_next_request(host->breq);
-	if (req != NULL)
-		if (++req->errors >= MG_MAX_ERRORS ||
-				host->error == MG_ERR_TIMEOUT)
-			__blk_end_request_cur(req, -EIO);
+	if (host->req)
+		if (++host->req->errors >= MG_MAX_ERRORS ||
+		    host->error == MG_ERR_TIMEOUT)
+			mg_end_request_cur(host, -EIO);
 }
 
 static unsigned int mg_out(struct mg_host *host,
@@ -515,7 +521,7 @@ static void mg_read(struct request *req)
 
 		outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-	} while (__blk_end_request(req, 0, MG_SECTOR_SIZE));
+	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
 }
 
 static void mg_write(struct request *req)
@@ -545,14 +551,14 @@ static void mg_write(struct request *req)
 
 		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
 				MG_REG_COMMAND);
-	} while (__blk_end_request(req, 0, MG_SECTOR_SIZE));
+	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
 }
 
 static void mg_read_intr(struct mg_host *host)
 {
+	struct request *req = host->req;
 	u32 i;
 	u16 *buff;
-	struct request *req;
 
 	/* check status */
 	do {
@@ -571,7 +577,6 @@ static void mg_read_intr(struct mg_host *host)
 
 ok_to_read:
 	/* get current segment of request */
-	req = elv_next_request(host->breq);
 	buff = (u16 *)req->buffer;
 
 	/* read 1 sector */
@@ -585,7 +590,7 @@ ok_to_read:
 	/* send read confirm */
 	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
 
-	if (__blk_end_request(req, 0, MG_SECTOR_SIZE)) {
+	if (mg_end_request(host, 0, MG_SECTOR_SIZE)) {
 		/* set handler if read remains */
 		host->mg_do_intr = mg_read_intr;
 		mod_timer(&host->timer, jiffies + 3 * HZ);
@@ -595,14 +600,11 @@ ok_to_read:
 
 static void mg_write_intr(struct mg_host *host)
 {
+	struct request *req = host->req;
 	u32 i, j;
 	u16 *buff;
-	struct request *req;
 	bool rem;
 
-	/* get current segment of request */
-	req = elv_next_request(host->breq);
-
 	/* check status */
 	do {
 		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
@@ -619,7 +621,7 @@ static void mg_write_intr(struct mg_host *host)
 	return;
 
 ok_to_write:
-	if ((rem = __blk_end_request(req, 0, MG_SECTOR_SIZE))) {
+	if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) {
 		/* write 1 sector and set handler if remains */
 		buff = (u16 *)req->buffer;
 		for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) {
@@ -644,44 +646,47 @@ void mg_times_out(unsigned long data)
 {
 	struct mg_host *host = (struct mg_host *)data;
 	char *name;
-	struct request *req;
 
 	spin_lock_irq(&host->lock);
 
-	req = elv_next_request(host->breq);
-	if (!req)
+	if (!host->req)
 		goto out_unlock;
 
 	host->mg_do_intr = NULL;
 
-	name = req->rq_disk->disk_name;
+	name = host->req->rq_disk->disk_name;
 	printk(KERN_DEBUG "%s: timeout\n", name);
 
 	host->error = MG_ERR_TIMEOUT;
 	mg_bad_rw_intr(host);
 
-	mg_request(host->breq);
 out_unlock:
+	mg_request(host->breq);
 	spin_unlock_irq(&host->lock);
 }
 
 static void mg_request_poll(struct request_queue *q)
 {
-	struct request *req;
-	struct mg_host *host;
+	struct mg_host *host = q->queuedata;
 
-	while ((req = elv_next_request(q)) != NULL) {
-		host = req->rq_disk->private_data;
+	while (1) {
+		if (!host->req) {
+			host->req = elv_next_request(q);
+			if (host->req)
+				blkdev_dequeue_request(host->req);
+			else
+				break;
+		}
 
-		if (unlikely(!blk_fs_request(req))) {
-			__blk_end_request_cur(req, -EIO);
+		if (unlikely(!blk_fs_request(host->req))) {
+			mg_end_request_cur(host, -EIO);
 			continue;
 		}
 
-		if (rq_data_dir(req) == READ)
-			mg_read(req);
+		if (rq_data_dir(host->req) == READ)
+			mg_read(host->req);
 		else
-			mg_write(req);
+			mg_write(host->req);
 	}
 }
 
@@ -733,16 +738,19 @@ static unsigned int mg_issue_req(struct request *req,
 /* This function also called from IRQ context */
 static void mg_request(struct request_queue *q)
 {
+	struct mg_host *host = q->queuedata;
 	struct request *req;
-	struct mg_host *host;
 	u32 sect_num, sect_cnt;
 
 	while (1) {
-		req = elv_next_request(q);
-		if (!req)
-			return;
-
-		host = req->rq_disk->private_data;
+		if (!host->req) {
+			host->req = elv_next_request(q);
+			if (host->req)
+				blkdev_dequeue_request(host->req);
+			else
+				break;
+		}
+		req = host->req;
 
 		/* check unwanted request call */
 		if (host->mg_do_intr)
@@ -762,12 +770,12 @@ static void mg_request(struct request_queue *q)
 					"%s: bad access: sector=%d, count=%d\n",
 					req->rq_disk->disk_name,
 					sect_num, sect_cnt);
-			__blk_end_request_cur(req, -EIO);
+			mg_end_request_cur(host, -EIO);
 			continue;
 		}
 
 		if (unlikely(!blk_fs_request(req))) {
-			__blk_end_request_cur(req, -EIO);
+			mg_end_request_cur(host, -EIO);
 			continue;
 		}
 
@@ -981,6 +989,7 @@ static int mg_probe(struct platform_device *plat_dev)
 				__func__, __LINE__);
 		goto probe_err_5;
 	}
+	host->breq->queuedata = host;
 
 	/* mflash is random device, thanx for the noop */
 	elevator_exit(host->breq->elevator);
-- 
cgit v0.10.2


From 8a12c4a456c7ee261a85c2cbec835601e6aae05a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:02 +0900
Subject: hd: dequeue and track in-flight request

hd has at most single request in flight.  Till now, whenever it needs
to access the in-flight request it called elv_next_request().  This
patch makes hd track the in-flight request directly and dequeue it
when processing starts.  The added complexity is minimal and this will
help future block layer changes.

[ Impact: dequeue in-flight request, one elv_next_request() per request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index a3b3994..288ab63 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -98,10 +98,9 @@
 
 static DEFINE_SPINLOCK(hd_lock);
 static struct request_queue *hd_queue;
+static struct request *hd_req;
 
 #define MAJOR_NR HD_MAJOR
-#define QUEUE (hd_queue)
-#define CURRENT elv_next_request(hd_queue)
 
 #define TIMEOUT_VALUE	(6*HZ)
 #define	HD_DELAY	0
@@ -195,11 +194,24 @@ static void __init hd_setup(char *str, int *ints)
 	NR_HD = hdind+1;
 }
 
+static bool hd_end_request(int err, unsigned int bytes)
+{
+	if (__blk_end_request(hd_req, err, bytes))
+		return true;
+	hd_req = NULL;
+	return false;
+}
+
+static bool hd_end_request_cur(int err)
+{
+	return hd_end_request(err, blk_rq_cur_bytes(hd_req));
+}
+
 static void dump_status(const char *msg, unsigned int stat)
 {
 	char *name = "hd?";
-	if (CURRENT)
-		name = CURRENT->rq_disk->disk_name;
+	if (hd_req)
+		name = hd_req->rq_disk->disk_name;
 
 #ifdef VERBOSE_ERRORS
 	printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
@@ -227,8 +239,8 @@ static void dump_status(const char *msg, unsigned int stat)
 		if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
 			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
 				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-			if (CURRENT)
-				printk(", sector=%ld", blk_rq_pos(CURRENT));
+			if (hd_req)
+				printk(", sector=%ld", blk_rq_pos(hd_req));
 		}
 		printk("\n");
 	}
@@ -406,11 +418,12 @@ static void unexpected_hd_interrupt(void)
  */
 static void bad_rw_intr(void)
 {
-	struct request *req = CURRENT;
+	struct request *req = hd_req;
+
 	if (req != NULL) {
 		struct hd_i_struct *disk = req->rq_disk->private_data;
 		if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-			__blk_end_request_cur(req, -EIO);
+			hd_end_request_cur(-EIO);
 			disk->special_op = disk->recalibrate = 1;
 		} else if (req->errors % RESET_FREQ == 0)
 			reset = 1;
@@ -454,14 +467,14 @@ static void read_intr(void)
 	return;
 
 ok_to_read:
-	req = CURRENT;
+	req = hd_req;
 	insw(HD_DATA, req->buffer, 256);
 #ifdef DEBUG
 	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
 	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
 	       blk_rq_sectors(req) - 1, req->buffer+512);
 #endif
-	if (__blk_end_request(req, 0, 512)) {
+	if (hd_end_request(0, 512)) {
 		SET_HANDLER(&read_intr);
 		return;
 	}
@@ -475,7 +488,7 @@ ok_to_read:
 
 static void write_intr(void)
 {
-	struct request *req = CURRENT;
+	struct request *req = hd_req;
 	int i;
 	int retries = 100000;
 
@@ -494,7 +507,7 @@ static void write_intr(void)
 	return;
 
 ok_to_write:
-	if (__blk_end_request(req, 0, 512)) {
+	if (hd_end_request(0, 512)) {
 		SET_HANDLER(&write_intr);
 		outsw(HD_DATA, req->buffer, 256);
 		return;
@@ -525,18 +538,18 @@ static void hd_times_out(unsigned long dummy)
 
 	do_hd = NULL;
 
-	if (!CURRENT)
+	if (!hd_req)
 		return;
 
 	spin_lock_irq(hd_queue->queue_lock);
 	reset = 1;
-	name = CURRENT->rq_disk->disk_name;
+	name = hd_req->rq_disk->disk_name;
 	printk("%s: timeout\n", name);
-	if (++CURRENT->errors >= MAX_ERRORS) {
+	if (++hd_req->errors >= MAX_ERRORS) {
 #ifdef DEBUG
 		printk("%s: too many errors\n", name);
 #endif
-		__blk_end_request_cur(CURRENT, -EIO);
+		hd_end_request_cur(-EIO);
 	}
 	hd_request();
 	spin_unlock_irq(hd_queue->queue_lock);
@@ -551,7 +564,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req)
 	}
 	if (disk->head > 16) {
 		printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-		__blk_end_request_cur(req, -EIO);
+		hd_end_request_cur(-EIO);
 	}
 	disk->special_op = 0;
 	return 1;
@@ -578,11 +591,15 @@ static void hd_request(void)
 repeat:
 	del_timer(&device_timer);
 
-	req = CURRENT;
-	if (!req) {
-		do_hd = NULL;
-		return;
+	if (!hd_req) {
+		hd_req = elv_next_request(hd_queue);
+		if (!hd_req) {
+			do_hd = NULL;
+			return;
+		}
+		blkdev_dequeue_request(hd_req);
 	}
+	req = hd_req;
 
 	if (reset) {
 		reset_hd();
@@ -595,7 +612,7 @@ repeat:
 	    ((block+nsect) > get_capacity(req->rq_disk))) {
 		printk("%s: bad access: block=%d, count=%d\n",
 			req->rq_disk->disk_name, block, nsect);
-		__blk_end_request_cur(req, -EIO);
+		hd_end_request_cur(-EIO);
 		goto repeat;
 	}
 
@@ -635,7 +652,7 @@ repeat:
 			break;
 		default:
 			printk("unknown hd-command\n");
-			__blk_end_request_cur(req, -EIO);
+			hd_end_request_cur(-EIO);
 			break;
 		}
 	}
-- 
cgit v0.10.2


From a336ca6fe6e0c46b2eef0e520951acb4e6cb2976 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:03 +0900
Subject: ataflop: dequeue and track in-flight request

ataflop has single request in flight.  Till now, whenever it needs to
access the in-flight request it called elv_next_request().  This patch
makes ataflop track the in-flight request directly and dequeue it when
processing starts.  The added complexity is minimal and this will help
future block layer changes.

[ Impact: dequeue in-flight request, one elv_next_request() per request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 234024c..89a591d 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -79,9 +79,7 @@
 #undef DEBUG
 
 static struct request_queue *floppy_queue;
-
-#define QUEUE (floppy_queue)
-#define CURRENT elv_next_request(floppy_queue)
+static struct request *fd_request;
 
 /* Disk types: DD, HD, ED */
 static struct atari_disk_type {
@@ -376,6 +374,12 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
+static void fd_end_request_cur(int err)
+{
+	if (!__blk_end_request_cur(fd_request, err))
+		fd_request = NULL;
+}
+
 static inline void start_motor_off_timer(void)
 {
 	mod_timer(&motor_off_timer, jiffies + FD_MOTOR_OFF_DELAY);
@@ -606,15 +610,15 @@ static void fd_error( void )
 		return;
 	}
 
-	if (!CURRENT)
+	if (!fd_request)
 		return;
 
-	CURRENT->errors++;
-	if (CURRENT->errors >= MAX_ERRORS) {
+	fd_request->errors++;
+	if (fd_request->errors >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		__blk_end_request_cur(CURRENT, -EIO);
+		fd_end_request_cur(-EIO);
 	}
-	else if (CURRENT->errors == RECALIBRATE_ERRORS) {
+	else if (fd_request->errors == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
 		if (SelectedDrive != -1)
 			SUD.track = -1;
@@ -725,14 +729,14 @@ static void do_fd_action( int drive )
 	    if (IS_BUFFERED( drive, ReqSide, ReqTrack )) {
 		if (ReqCmd == READ) {
 		    copy_buffer( SECTOR_BUFFER(ReqSector), ReqData );
-		    if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) {
+		    if (++ReqCnt < blk_rq_cur_sectors(fd_request)) {
 			/* read next sector */
 			setup_req_params( drive );
 			goto repeat;
 		    }
 		    else {
 			/* all sectors finished */
-			__blk_end_request_cur(CURRENT, 0);
+			fd_end_request_cur(0);
 			redo_fd_request();
 			return;
 		    }
@@ -1130,14 +1134,14 @@ static void fd_rwsec_done1(int status)
 		}
 	}
   
-	if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) {
+	if (++ReqCnt < blk_rq_cur_sectors(fd_request)) {
 		/* read next sector */
 		setup_req_params( SelectedDrive );
 		do_fd_action( SelectedDrive );
 	}
 	else {
 		/* all sectors finished */
-		__blk_end_request_cur(CURRENT, 0);
+		fd_end_request_cur(0);
 		redo_fd_request();
 	}
 	return;
@@ -1378,7 +1382,7 @@ static void setup_req_params( int drive )
 	ReqData = ReqBuffer + 512 * ReqCnt;
 
 	if (UseTrackbuffer)
-		read_track = (ReqCmd == READ && CURRENT->errors == 0);
+		read_track = (ReqCmd == READ && fd_request->errors == 0);
 	else
 		read_track = 0;
 
@@ -1392,25 +1396,28 @@ static void redo_fd_request(void)
 	int drive, type;
 	struct atari_floppy_struct *floppy;
 
-	DPRINT(("redo_fd_request: CURRENT=%p dev=%s CURRENT->sector=%ld\n",
-		CURRENT, CURRENT ? CURRENT->rq_disk->disk_name : "",
-		CURRENT ? blk_rq_pos(CURRENT) : 0 ));
+	DPRINT(("redo_fd_request: fd_request=%p dev=%s fd_request->sector=%ld\n",
+		fd_request, fd_request ? fd_request->rq_disk->disk_name : "",
+		fd_request ? blk_rq_pos(fd_request) : 0 ));
 
 	IsFormatting = 0;
 
 repeat:
+	if (!fd_request) {
+		fd_request = elv_next_request(floppy_queue);
+		if (!fd_request)
+			goto the_end;
+		blkdev_dequeue_request(fd_request);
+	}
 
-	if (!CURRENT)
-		goto the_end;
-
-	floppy = CURRENT->rq_disk->private_data;
+	floppy = fd_request->rq_disk->private_data;
 	drive = floppy - unit;
 	type = floppy->type;
 	
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		__blk_end_request_cur(CURRENT, -EIO);
+		fd_end_request_cur(-EIO);
 		goto repeat;
 	}
 		
@@ -1426,12 +1433,12 @@ repeat:
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			__blk_end_request_cur(CURRENT, -EIO);
+			fd_end_request_cur(-EIO);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			__blk_end_request_cur(CURRENT, -EIO);
+			fd_end_request_cur(-EIO);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1440,8 +1447,8 @@ repeat:
 		UD.autoprobe = 0;
 	}
 	
-	if (blk_rq_pos(CURRENT) + 1 > UDT->blocks) {
-		__blk_end_request_cur(CURRENT, -EIO);
+	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
+		fd_end_request_cur(-EIO);
 		goto repeat;
 	}
 
@@ -1449,9 +1456,9 @@ repeat:
 	del_timer( &motor_off_timer );
 		
 	ReqCnt = 0;
-	ReqCmd = rq_data_dir(CURRENT);
-	ReqBlock = blk_rq_pos(CURRENT);
-	ReqBuffer = CURRENT->buffer;
+	ReqCmd = rq_data_dir(fd_request);
+	ReqBlock = blk_rq_pos(fd_request);
+	ReqBuffer = fd_request->buffer;
 	setup_req_params( drive );
 	do_fd_action( drive );
 
-- 
cgit v0.10.2


From f4bd4b90bfa86e867111aa182895cb5ac203aa7a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:04 +0900
Subject: swim3: dequeue in-flight request

swim3 has at most single request in flight and already tracks it using
fd_req.  Convert it to dequeuing model by updating request fetching
and wrapping completion function.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c1b9a4d..f48c6dd 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -251,6 +251,20 @@ static int floppy_release(struct gendisk *disk, fmode_t mode);
 static int floppy_check_change(struct gendisk *disk);
 static int floppy_revalidate(struct gendisk *disk);
 
+static bool swim3_end_request(int err, unsigned int nr_bytes)
+{
+	if (__blk_end_request(fd_req, err, nr_bytes))
+		return true;
+
+	fd_req = NULL;
+	return false;
+}
+
+static bool swim3_end_request_cur(int err)
+{
+	return swim3_end_request(err, blk_rq_cur_bytes(fd_req));
+}
+
 static void swim3_select(struct floppy_state *fs, int sel)
 {
 	struct swim3 __iomem *sw = fs->swim3;
@@ -310,7 +324,14 @@ static void start_request(struct floppy_state *fs)
 		wake_up(&fs->wait);
 		return;
 	}
-	while (fs->state == idle && (req = elv_next_request(swim3_queue))) {
+	while (fs->state == idle) {
+		if (!fd_req) {
+			fd_req = elv_next_request(swim3_queue);
+			if (!fd_req)
+				break;
+			blkdev_dequeue_request(fd_req);
+		}
+		req = fd_req;
 #if 0
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
@@ -320,11 +341,11 @@ static void start_request(struct floppy_state *fs)
 #endif
 
 		if (blk_rq_pos(req) >= fs->total_secs) {
-			__blk_end_request_cur(req, -EIO);
+			swim3_end_request_cur(-EIO);
 			continue;
 		}
 		if (fs->ejected) {
-			__blk_end_request_cur(req, -EIO);
+			swim3_end_request_cur(-EIO);
 			continue;
 		}
 
@@ -332,7 +353,7 @@ static void start_request(struct floppy_state *fs)
 			if (fs->write_prot < 0)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
-				__blk_end_request_cur(req, -EIO);
+				swim3_end_request_cur(-EIO);
 				continue;
 			}
 		}
@@ -505,7 +526,7 @@ static void act(struct floppy_state *fs)
 		case do_transfer:
 			if (fs->cur_cyl != fs->req_cyl) {
 				if (fs->retries > 5) {
-					__blk_end_request_cur(fd_req, -EIO);
+					swim3_end_request_cur(-EIO);
 					fs->state = idle;
 					return;
 				}
@@ -537,7 +558,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		__blk_end_request_cur(fd_req, -EIO);
+		swim3_end_request_cur(-EIO);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -556,7 +577,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	printk(KERN_ERR "swim3: seek timeout\n");
-	__blk_end_request_cur(fd_req, -EIO);
+	swim3_end_request_cur(-EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -580,7 +601,7 @@ static void settle_timeout(unsigned long data)
 		return;
 	}
 	printk(KERN_ERR "swim3: seek settle timeout\n");
-	__blk_end_request_cur(fd_req, -EIO);
+	swim3_end_request_cur(-EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -603,7 +624,7 @@ static void xfer_timeout(unsigned long data)
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
 	       (rq_data_dir(fd_req)==WRITE? "writ": "read"),
 	       (long)blk_rq_pos(fd_req));
-	__blk_end_request_cur(fd_req, -EIO);
+	swim3_end_request_cur(-EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -634,7 +655,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk(KERN_ERR "swim3: seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					__blk_end_request_cur(fd_req, -EIO);
+					swim3_end_request_cur(-EIO);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -717,7 +738,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk("swim3: error %sing block %ld (err=%x)\n",
 				       rq_data_dir(fd_req) == WRITE? "writ": "read",
 				       (long)blk_rq_pos(fd_req), err);
-				__blk_end_request_cur(fd_req, -EIO);
+				swim3_end_request_cur(-EIO);
 				fs->state = idle;
 			}
 		} else {
@@ -726,12 +747,12 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid);
 				printk(KERN_ERR "  state=%d, dir=%x, intr=%x, err=%x\n",
 				       fs->state, rq_data_dir(fd_req), intr, err);
-				__blk_end_request_cur(fd_req, -EIO);
+				swim3_end_request_cur(-EIO);
 				fs->state = idle;
 				start_request(fs);
 				break;
 			}
-			if (__blk_end_request(fd_req, 0, fs->scount << 9)) {
+			if (swim3_end_request(0, fs->scount << 9)) {
 				fs->req_sector += fs->scount;
 				if (fs->req_sector > fs->secpertrack) {
 					fs->req_sector -= fs->secpertrack;
-- 
cgit v0.10.2


From 2d75ce084eec2a8154225c7e978191b364029003 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:05 +0900
Subject: xsysace: dequeue in-flight request

xsysace already tracks in-flight request using ace->req.  Converting
to dequeueing model is mostly a matter of adding dequeueing call after
request fetching.  The only tricky part is handling CF removal which
should complete both in flight and on queue requests.  Convert to
dequeueing model.

While at it, remove explicit blk_rq_cur_bytes() and use
__blk_end_request_cur() instead.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 97c99b4..edf137b 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -466,7 +466,8 @@ struct request *ace_get_next_request(struct request_queue * q)
 	while ((req = elv_next_request(q)) != NULL) {
 		if (blk_fs_request(req))
 			break;
-		__blk_end_request_cur(req, -EIO);
+		blkdev_dequeue_request(req);
+		__blk_end_request_all(req, -EIO);
 	}
 	return req;
 }
@@ -492,9 +493,15 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		set_capacity(ace->gd, 0);
 		dev_info(ace->dev, "No CF in slot\n");
 
-		/* Drop all pending requests */
-		while ((req = elv_next_request(ace->queue)) != NULL)
-			__blk_end_request_cur(req, -EIO);
+		/* Drop all in-flight and pending requests */
+		if (ace->req) {
+			__blk_end_request_all(ace->req, -EIO);
+			ace->req = NULL;
+		}
+		while ((req = elv_next_request(ace->queue)) != NULL) {
+			blkdev_dequeue_request(req);
+			__blk_end_request_all(req, -EIO);
+		}
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -642,6 +649,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			ace->fsm_state = ACE_FSM_STATE_IDLE;
 			break;
 		}
+		blkdev_dequeue_request(req);
 
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
@@ -718,8 +726,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		}
 
 		/* bio finished; is there another one? */
-		if (__blk_end_request(ace->req, 0,
-					blk_rq_cur_bytes(ace->req))) {
+		if (__blk_end_request_cur(ace->req, 0)) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
 			 *      blk_rq_cur_sectors(ace->req));
-- 
cgit v0.10.2


From b12d4f82c1a3cdcb2441c803a3368a9426f2f47f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:06 +0900
Subject: paride: dequeue in-flight request

pd/pf/pcd have track in-flight request by pd/pf/pcd_req.  They can be
converted to dequeueing model by updating fetching and completion
paths.  Convert them.

Note that removal of elv_next_request() call from pf_next_buf()
doesn't make any functional difference.  The path is traveled only
during partial completion of a request and elv_next_request() call
must return the same request anyway.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Tim Waugh <tim@cyberelk.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 2d5dc0a..425f815 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -719,9 +719,12 @@ static void do_pcd_request(struct request_queue * q)
 	if (pcd_busy)
 		return;
 	while (1) {
-		pcd_req = elv_next_request(q);
-		if (!pcd_req)
-			return;
+		if (!pcd_req) {
+			pcd_req = elv_next_request(q);
+			if (!pcd_req)
+				return;
+			blkdev_dequeue_request(pcd_req);
+		}
 
 		if (rq_data_dir(pcd_req) == READ) {
 			struct pcd_unit *cd = pcd_req->rq_disk->private_data;
@@ -734,8 +737,10 @@ static void do_pcd_request(struct request_queue * q)
 			pcd_busy = 1;
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
-		} else
-			__blk_end_request_cur(pcd_req, -EIO);
+		} else {
+			__blk_end_request_all(pcd_req, -EIO);
+			pcd_req = NULL;
+		}
 	}
 }
 
@@ -744,7 +749,8 @@ static inline void next_request(int err)
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pcd_lock, saved_flags);
-	__blk_end_request_cur(pcd_req, err);
+	if (!__blk_end_request_cur(pcd_req, err))
+		pcd_req = NULL;
 	pcd_busy = 0;
 	do_pcd_request(pcd_queue);
 	spin_unlock_irqrestore(&pcd_lock, saved_flags);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9ec5d4a..d2ca3f5 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -410,11 +410,14 @@ static void run_fsm(void)
 				pd_claimed = 0;
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
-				__blk_end_request_cur(pd_req,
-						      res == Ok ? 0 : -EIO);
-				pd_req = elv_next_request(pd_queue);
-				if (!pd_req)
-					stop = 1;
+				if (!__blk_end_request_cur(pd_req,
+						res == Ok ? 0 : -EIO)) {
+					pd_req = elv_next_request(pd_queue);
+					if (!pd_req)
+						stop = 1;
+					else
+						blkdev_dequeue_request(pd_req);
+				}
 				spin_unlock_irqrestore(&pd_lock, saved_flags);
 				if (stop)
 					return;
@@ -706,6 +709,7 @@ static void do_pd_request(struct request_queue * q)
 	pd_req = elv_next_request(q);
 	if (!pd_req)
 		return;
+	blkdev_dequeue_request(pd_req);
 
 	schedule_fsm();
 }
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index e88c889..d6f7bd8 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -752,10 +752,8 @@ static struct request_queue *pf_queue;
 
 static void pf_end_request(int err)
 {
-	if (pf_req) {
-		__blk_end_request_cur(pf_req, err);
+	if (pf_req && !__blk_end_request_cur(pf_req, err))
 		pf_req = NULL;
-	}
 }
 
 static void do_pf_request(struct request_queue * q)
@@ -763,9 +761,12 @@ static void do_pf_request(struct request_queue * q)
 	if (pf_busy)
 		return;
 repeat:
-	pf_req = elv_next_request(q);
-	if (!pf_req)
-		return;
+	if (!pf_req) {
+		pf_req = elv_next_request(q);
+		if (!pf_req)
+			return;
+		blkdev_dequeue_request(pf_req);
+	}
 
 	pf_current = pf_req->rq_disk->private_data;
 	pf_block = blk_rq_pos(pf_req);
@@ -806,7 +807,6 @@ static int pf_next_buf(void)
 	if (!pf_count) {
 		spin_lock_irqsave(&pf_spin_lock, saved_flags);
 		pf_end_request(0);
-		pf_req = elv_next_request(pf_queue);
 		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 		if (!pf_req)
 			return 1;
-- 
cgit v0.10.2


From 10e1e629b386aef97bf66de6ef28d450bec06ee3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:07 +0900
Subject: ps3disk: dequeue in-flight request

Other than in issue error paths, ps3disk always completely finishes
fetched requests.  With full completion on error paths, it can be
easily converted to dequeueing model.

* After L1 r/w call failure, ps3disk_submit_request_sg() now fails the
  whole request.  Issue failure isn't likely to benefit from partial
  retry anyway and ps3disk uses full failure in completion error path
  too, so I don't think this amounts to any meaningful functionality
  loss.

* flush completion is converted to _all for consistency.  It doesn't
  make any functional difference.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 8d58308..f4d8db9 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -157,7 +157,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		__blk_end_request_cur(req, -EIO);
+		__blk_end_request_all(req, -EIO);
 		return 0;
 	}
 
@@ -179,7 +179,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		__blk_end_request_cur(req, -EIO);
+		__blk_end_request_all(req, -EIO);
 		return 0;
 	}
 
@@ -195,6 +195,8 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
 	while ((req = elv_next_request(q))) {
+		blkdev_dequeue_request(req);
+
 		if (blk_fs_request(req)) {
 			if (ps3disk_submit_request_sg(dev, req))
 				break;
@@ -204,7 +206,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 				break;
 		} else {
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			__blk_end_request_cur(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 	}
-- 
cgit v0.10.2


From 9e31bebee2d8b5f8abe9677f2a29b90cba848657 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:08 +0900
Subject: amiflop: dequeue in-flight request

Request processing in amiflop is done sequentially in
redo_fd_request() proper and redo_fd_request() can easily be converted
to track in-flight request.  Remove CURRENT, track in-flight request
directly and dequeue it when processing starts.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index e4a14b9..80a68b2 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -112,8 +112,6 @@ module_param(fd_def_df0, ulong, 0);
 MODULE_LICENSE("GPL");
 
 static struct request_queue *floppy_queue;
-#define QUEUE (floppy_queue)
-#define CURRENT elv_next_request(floppy_queue)
 
 /*
  *  Macros
@@ -1335,59 +1333,61 @@ static int get_track(int drive, int track)
 
 static void redo_fd_request(void)
 {
+	struct request *rq;
 	unsigned int cnt, block, track, sector;
 	int drive;
 	struct amiga_floppy_struct *floppy;
 	char *data;
 	unsigned long flags;
+	int err;
 
- repeat:
-	if (!CURRENT) {
+next_req:
+	rq = elv_next_request(floppy_queue);
+	if (!rq) {
 		/* Nothing left to do */
 		return;
 	}
+	blkdev_dequeue_request(rq);
 
-	floppy = CURRENT->rq_disk->private_data;
+	floppy = rq->rq_disk->private_data;
 	drive = floppy - unit;
 
+next_segment:
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0; cnt < blk_rq_cur_sectors(CURRENT); cnt++) {
+	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
-		       blk_rq_pos(CURRENT), cnt,
-		       (rq_data_dir(CURRENT) == READ) ? "read" : "write");
+		       blk_rq_pos(rq), cnt,
+		       (rq_data_dir(rq) == READ) ? "read" : "write");
 #endif
-		block = blk_rq_pos(CURRENT) + cnt;
+		block = blk_rq_pos(rq) + cnt;
 		if ((int)block > floppy->blocks) {
-			__blk_end_request_cur(CURRENT, -EIO);
-			goto repeat;
+			err = -EIO;
+			break;
 		}
 
 		track = block / (floppy->dtype->sects * floppy->type->sect_mult);
 		sector = block % (floppy->dtype->sects * floppy->type->sect_mult);
-		data = CURRENT->buffer + 512 * cnt;
+		data = rq->buffer + 512 * cnt;
 #ifdef DEBUG
 		printk("access to track %d, sector %d, with buffer at "
 		       "0x%08lx\n", track, sector, data);
 #endif
 
 		if (get_track(drive, track) == -1) {
-			__blk_end_request_cur(CURRENT, -EIO);
-			goto repeat;
+			err = -EIO;
+			break;
 		}
 
-		switch (rq_data_dir(CURRENT)) {
-		case READ:
+		if (rq_data_dir(rq) == READ) {
 			memcpy(data, floppy->trackbuf + sector * 512, 512);
-			break;
-
-		case WRITE:
+		} else {
 			memcpy(floppy->trackbuf + sector * 512, data, 512);
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				__blk_end_request_cur(CURRENT, -EIO);
-				goto repeat;
+				err = -EIO;
+				break;
 			}
 			/*
 			 * setup a callback to write the track buffer
@@ -1399,12 +1399,12 @@ static void redo_fd_request(void)
 		        /* reset the timer */
 			mod_timer (flush_track_timer + drive, jiffies + 1);
 			local_irq_restore(flags);
-			break;
 		}
 	}
 
-	__blk_end_request_cur(CURRENT, 0);
-	goto repeat;
+	if (__blk_end_request_cur(rq, err))
+		goto next_segment;
+	goto next_req;
 }
 
 static void do_fd_request(struct request_queue * q)
-- 
cgit v0.10.2


From 06b0608e2b46465e8e663214e7db982ddb000346 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:09 +0900
Subject: swim: dequeue in-flight request

swim processes requests one-by-one synchronously and can easily be
converted to dequeuing model.  Convert it.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Laurent Vivier <Laurent@lvivier.info>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index fc6a1c3..dedd489 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -514,7 +514,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
 			ret = swim_read_sector(fs, side, track, sector,
 						buffer);
 			if (try-- == 0)
-				return -1;
+				return -EIO;
 		} while (ret != 512);
 
 		buffer += ret;
@@ -528,38 +528,37 @@ static void redo_fd_request(struct request_queue *q)
 	struct request *req;
 	struct floppy_state *fs;
 
-	while ((req = elv_next_request(q))) {
+	req = elv_next_request(q);
+	if (req)
+		blkdev_dequeue_request(req);
+
+	while (req) {
+		int err = -EIO;
 
 		fs = req->rq_disk->private_data;
-		if (blk_rq_pos(req) >= fs->total_secs) {
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
-		if (!fs->disk_in) {
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
-		if (rq_data_dir(req) == WRITE) {
-			if (fs->write_protected) {
-				__blk_end_request_cur(req, -EIO);
-				continue;
-			}
-		}
+		if (blk_rq_pos(req) >= fs->total_secs)
+			goto done;
+		if (!fs->disk_in)
+			goto done;
+		if (rq_data_dir(req) == WRITE && fs->write_protected)
+			goto done;
+
 		switch (rq_data_dir(req)) {
 		case WRITE:
 			/* NOT IMPLEMENTED */
-			__blk_end_request_cur(req, -EIO);
 			break;
 		case READ:
-			if (floppy_read_sectors(fs, blk_rq_pos(req),
-						blk_rq_cur_sectors(req),
-						req->buffer)) {
-				__blk_end_request_cur(req, -EIO);
-				continue;
-			}
-			__blk_end_request_cur(req, 0);
+			err = floppy_read_sectors(fs, blk_rq_pos(req),
+						  blk_rq_cur_sectors(req),
+						  req->buffer);
 			break;
 		}
+	done:
+		if (!__blk_end_request_cur(req, err)) {
+			req = elv_next_request(q);
+			if (req)
+				blkdev_dequeue_request(req);
+		}
 	}
 }
 
-- 
cgit v0.10.2


From bab2a807a489822ded0c9d4a5344c80bcac10b0a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:10 +0900
Subject: xd: dequeue in-flight request

xd processes requests one-by-one synchronously and can be easily
converted to dequeueing model.  Convert it.

While at it, use rq_cur_bytes instead of rq_bytes when checking for
sector overflow.  This is for for consistency and better behavior for
merged requests.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 4ef8801..d4c4352 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -305,26 +305,31 @@ static void do_xd_request (struct request_queue * q)
 	if (xdc_busy)
 		return;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	req = elv_next_request(q);
+	if (req)
+		blkdev_dequeue_request(req);
+
+	while (req) {
 		unsigned block = blk_rq_pos(req);
-		unsigned count = blk_rq_sectors(req);
+		unsigned count = blk_rq_cur_sectors(req);
 		XD_INFO *disk = req->rq_disk->private_data;
-		int res = 0;
+		int res = -EIO;
 		int retry;
 
-		if (!blk_fs_request(req)) {
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
-		if (block + count > get_capacity(req->rq_disk)) {
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
+		if (!blk_fs_request(req))
+			goto done;
+		if (block + count > get_capacity(req->rq_disk))
+			goto done;
 		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
 			res = xd_readwrite(rq_data_dir(req), disk, req->buffer,
 					   block, count);
+	done:
 		/* wrap up, 0 = success, -errno = fail */
-		__blk_end_request_cur(req, res);
+		if (!__blk_end_request_cur(req, res)) {
+			req = elv_next_request(q);
+			if (req)
+				blkdev_dequeue_request(req);
+		}
 	}
 }
 
-- 
cgit v0.10.2


From 1498ada7a8e80afe324f2b8d86158925d0096ec9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:11 +0900
Subject: mtd_blkdevs: dequeue in-flight request

mtd_blkdevs processes requests one-by-one synchronously from a kthread
and can be easily converted to dequeueing model.  Convert it.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 50c76a2..3e10442 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -89,18 +89,22 @@ static int mtd_blktrans_thread(void *arg)
 {
 	struct mtd_blktrans_ops *tr = arg;
 	struct request_queue *rq = tr->blkcore_priv->rq;
+	struct request *req = NULL;
 
 	/* we might get involved when memory gets low, so use PF_MEMALLOC */
 	current->flags |= PF_MEMALLOC;
 
 	spin_lock_irq(rq->queue_lock);
+
 	while (!kthread_should_stop()) {
-		struct request *req;
 		struct mtd_blktrans_dev *dev;
 		int res;
 
-		req = elv_next_request(rq);
-
+		if (!req) {
+			req = elv_next_request(rq);
+			if (req)
+				blkdev_dequeue_request(req);
+		}
 		if (!req) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(rq->queue_lock);
@@ -120,8 +124,13 @@ static int mtd_blktrans_thread(void *arg)
 
 		spin_lock_irq(rq->queue_lock);
 
-		__blk_end_request_cur(req, res);
+		if (!__blk_end_request_cur(req, res))
+			req = NULL;
 	}
+
+	if (req)
+		__blk_end_request_all(req, -EIO);
+
 	spin_unlock_irq(rq->queue_lock);
 
 	return 0;
-- 
cgit v0.10.2


From 6b0bf407b586b6ba8e060ad9979cb2bc3370b7eb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:12 +0900
Subject: jsflash: dequeue in-flight request

jsflash processes requests one-by-one synchronously from a kthread and
can be easily converted to dequeueing model.  Convert it.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index d56ddaa..f572a4a 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -186,31 +186,37 @@ static void jsfd_do_request(struct request_queue *q)
 {
 	struct request *req;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	req = elv_next_request(q);
+	if (req)
+		blkdev_dequeue_request(req);
+
+	while (req) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
 		size_t len = blk_rq_cur_bytes(req);
+		int err = -EIO;
 
-		if ((offset + len) > jdp->dsize) {
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
+		if ((offset + len) > jdp->dsize)
+			goto end;
 
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_ERR "jsfd: write\n");
-			__blk_end_request_cur(req, -EIO);
-			continue;
+			goto end;
 		}
 
 		if ((jdp->dbase & 0xff000000) != 0x20000000) {
 			printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase);
-			__blk_end_request_cur(req, -EIO);
-			continue;
+			goto end;
 		}
 
 		jsfd_read(req->buffer, jdp->dbase + offset, len);
-
-		__blk_end_request_cur(req, 0);
+		err = 0;
+	end:
+		if (!__blk_end_request_cur(req, err)) {
+			req = elv_next_request(q);
+			if (req)
+				blkdev_dequeue_request(req);
+		}
 	}
 }
 
-- 
cgit v0.10.2


From fb3ac7f6b811eac8e0aafa3df1c16ed872e898a8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:13 +0900
Subject: z2ram: dequeue in-flight request

z2ram processes requests one-by-one synchronously and can be easily
converted to dequeueing model.  Convert it.

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 6a13838..c909c1a 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -70,15 +70,21 @@ static struct gendisk *z2ram_gendisk;
 static void do_z2_request(struct request_queue *q)
 {
 	struct request *req;
-	while ((req = elv_next_request(q)) != NULL) {
+
+	req = elv_next_request(q);
+	if (req)
+		blkdev_dequeue_request(req);
+
+	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
+		int err = 0;
 
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
 				blk_rq_pos(req), blk_rq_cur_sectors(req));
-			__blk_end_request_cur(req, -EIO);
-			continue;
+			err = -EIO;
+			goto done;
 		}
 		while (len) {
 			unsigned long addr = start & Z2RAM_CHUNKMASK;
@@ -93,7 +99,12 @@ static void do_z2_request(struct request_queue *q)
 			start += size;
 			len -= size;
 		}
-		__blk_end_request_cur(req, 0);
+	done:
+		if (!__blk_end_request_cur(req, err)) {
+			req = elv_next_request(q);
+			if (req)
+				blkdev_dequeue_request(req);
+		}
 	}
 }
 
-- 
cgit v0.10.2


From 296b2f6ae654581adc27f0d6f0af454c7f3d06ee Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:15 +0900
Subject: block: convert to dequeueing model (easy ones)

plat-omap/mailbox, floppy, viocd, mspro_block, i2o_block and
mmc/card/queue are already pretty close to dequeueing model and can be
converted with simple changes.  Convert them.

While at it,

* xen-blkfront: !fs check moved downwards to share dequeue call with
  normal path.

* mspro_block: __blk_end_request(..., blk_rq_cur_byte()) converted to
  __blk_end_request_cur()

* mmc/card/queue: loop of __blk_end_request() converted to
  __blk_end_request_all()

[ Impact: dequeue in-flight request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 538ba75..7a1f5c2 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -198,6 +198,8 @@ static void mbox_tx_work(struct work_struct *work)
 
 		spin_lock(q->queue_lock);
 		rq = elv_next_request(q);
+		if (rq)
+			blkdev_dequeue_request(rq);
 		spin_unlock(q->queue_lock);
 
 		if (!rq)
@@ -208,6 +210,9 @@ static void mbox_tx_work(struct work_struct *work)
 		ret = __mbox_msg_send(mbox, tx_data->msg, tx_data->arg);
 		if (ret) {
 			enable_mbox_irq(mbox, IRQ_TX);
+			spin_lock(q->queue_lock);
+			blk_requeue_request(q, rq);
+			spin_unlock(q->queue_lock);
 			return;
 		}
 
@@ -238,6 +243,8 @@ static void mbox_rx_work(struct work_struct *work)
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
 		rq = elv_next_request(q);
+		if (rq)
+			blkdev_dequeue_request(rq);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		if (!rq)
 			break;
@@ -345,6 +352,8 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf)
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
 		rq = elv_next_request(q);
+		if (rq)
+			blkdev_dequeue_request(rq);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
 		if (!rq)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 1e27ed9..e2c70d2 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -931,7 +931,7 @@ static inline void unlock_fdc(void)
 	del_timer(&fd_timeout);
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
-	if (elv_next_request(floppy_queue))
+	if (current_req || elv_next_request(floppy_queue))
 		do_fd_request(floppy_queue);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 	wake_up(&fdc_wait);
@@ -2913,6 +2913,8 @@ static void redo_fd_request(void)
 
 			spin_lock_irq(floppy_queue->queue_lock);
 			req = elv_next_request(floppy_queue);
+			if (req)
+				blkdev_dequeue_request(req);
 			spin_unlock_irq(floppy_queue->queue_lock);
 			if (!req) {
 				do_floppy = NULL;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 91fc565..66f8345 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -301,22 +301,23 @@ static void do_blkif_request(struct request_queue *rq)
 
 	while ((req = elv_next_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
-		if (!blk_fs_request(req)) {
-			__blk_end_request_cur(req, -EIO);
-			continue;
-		}
 
 		if (RING_FULL(&info->ring))
 			goto wait;
 
+		blkdev_dequeue_request(req);
+
+		if (!blk_fs_request(req)) {
+			__blk_end_request_all(req, -EIO);
+			continue;
+		}
+
 		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
 			 "(%u/%u) buffer:%p [%s]\n",
 			 req, req->cmd, (unsigned long)blk_rq_pos(req),
 			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
 			 req->buffer, rq_data_dir(req) ? "write" : "read");
 
-
-		blkdev_dequeue_request(req);
 		if (blkif_queue_request(req)) {
 			blk_requeue_request(rq, req);
 wait:
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 6e190a9..bbe9f08 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -298,6 +298,8 @@ static void do_viocd_request(struct request_queue *q)
 	struct request *req;
 
 	while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) {
+		blkdev_dequeue_request(req);
+
 		if (!blk_fs_request(req))
 			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 93b2c61..58f5be8 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -672,8 +672,7 @@ try_again:
 					       msb->req_sg);
 
 		if (!msb->seg_count) {
-			chunk = __blk_end_request(msb->block_req, -ENOMEM,
-					blk_rq_cur_bytes(msb->block_req));
+			chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
 			continue;
 		}
 
@@ -711,6 +710,7 @@ try_again:
 		dev_dbg(&card->dev, "issue end\n");
 		return -EAGAIN;
 	}
+	blkdev_dequeue_request(msb->block_req);
 
 	dev_dbg(&card->dev, "trying again\n");
 	chunk = 1;
@@ -825,8 +825,10 @@ static void mspro_block_submit_req(struct request_queue *q)
 		return;
 
 	if (msb->eject) {
-		while ((req = elv_next_request(q)) != NULL)
+		while ((req = elv_next_request(q)) != NULL) {
+			blkdev_dequeue_request(req);
 			__blk_end_request_all(req, -ENODEV);
+		}
 
 		return;
 	}
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index e153f5d..8b5cbfc 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -916,8 +916,10 @@ static void i2o_block_request_fn(struct request_queue *q)
 				blk_stop_queue(q);
 				break;
 			}
-		} else
-			__blk_end_request_cur(req, -EIO);
+		} else {
+			blkdev_dequeue_request(req);
+			__blk_end_request_all(req, -EIO);
+		}
 	}
 };
 
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 7a72e75..4b70f1e 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -54,8 +54,11 @@ static int mmc_queue_thread(void *d)
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!blk_queue_plugged(q))
+		if (!blk_queue_plugged(q)) {
 			req = elv_next_request(q);
+			if (req)
+				blkdev_dequeue_request(req);
+		}
 		mq->req = req;
 		spin_unlock_irq(q->queue_lock);
 
@@ -88,15 +91,12 @@ static void mmc_request(struct request_queue *q)
 {
 	struct mmc_queue *mq = q->queuedata;
 	struct request *req;
-	int ret;
 
 	if (!mq) {
 		printk(KERN_ERR "MMC: killing requests for dead queue\n");
 		while ((req = elv_next_request(q)) != NULL) {
-			do {
-				ret = __blk_end_request(req, -EIO,
-							blk_rq_cur_bytes(req));
-			} while (ret);
+			blkdev_dequeue_request(req);
+			__blk_end_request_all(req, -EIO);
 		}
 		return;
 	}
-- 
cgit v0.10.2


From 2343046826a8ca426b07601d9593ee046c298b68 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:14 +0900
Subject: gdrom: dequeue in-flight request

gdrom already dequeues and fully completes requests on normal path and
the error paths can be easily converted to do so too.  Clean it up and
dequeue requests on error paths too.

While at it remove superflous blk_fs_request() && !blk_rq_sectors()
condition check.

[ Impact: dequeue in-flight request, cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 488423c..3cc02bf 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -638,33 +638,31 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 	kfree(read_command);
 }
 
-static void gdrom_request_handler_dma(struct request *req)
-{
-	/* dequeue, add to list of deferred work
-	* and then schedule workqueue */
-	blkdev_dequeue_request(req);
-	list_add_tail(&req->queuelist, &gdrom_deferred);
-	schedule_work(&work);
-}
-
 static void gdrom_request(struct request_queue *rq)
 {
 	struct request *req;
 
 	while ((req = elv_next_request(rq)) != NULL) {
+		blkdev_dequeue_request(req);
+
 		if (!blk_fs_request(req)) {
 			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
-			__blk_end_request_cur(req, -EIO);
+			__blk_end_request_all(req, -EIO);
+			continue;
 		}
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_NOTICE "GDROM: Read only device -");
 			printk(" write request ignored\n");
-			__blk_end_request_cur(req, -EIO);
+			__blk_end_request_all(req, -EIO);
+			continue;
 		}
-		if (blk_rq_sectors(req))
-			gdrom_request_handler_dma(req);
-		else
-			__blk_end_request_cur(req, -EIO);
+
+		/*
+		 * Add to list of deferred work and then schedule
+		 * workqueue.
+		 */
+		list_add_tail(&req->queuelist, &gdrom_deferred);
+		schedule_work(&work);
 	}
 }
 
-- 
cgit v0.10.2


From 9934c8c04561413609d2bc38c6b9f268cba774a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:16 +0900
Subject: block: implement and enforce request peek/start/fetch

Till now block layer allowed two separate modes of request execution.
A request is always acquired from the request queue via
elv_next_request().  After that, drivers are free to either dequeue it
or process it without dequeueing.  Dequeue allows elv_next_request()
to return the next request so that multiple requests can be in flight.

Executing requests without dequeueing has its merits mostly in
allowing drivers for simpler devices which can't do sg to deal with
segments only without considering request boundary.  However, the
benefit this brings is dubious and declining while the cost of the API
ambiguity is increasing.  Segment based drivers are usually for very
old or limited devices and as converting to dequeueing model isn't
difficult, it doesn't justify the API overhead it puts on block layer
and its more modern users.

Previous patches converted all block low level drivers to dequeueing
model.  This patch completes the API transition by...

* renaming elv_next_request() to blk_peek_request()

* renaming blkdev_dequeue_request() to blk_start_request()

* adding blk_fetch_request() which is combination of peek and start

* disallowing completion of queued (not started) requests

* applying new API to all LLDs

Renamings are for consistency and to break out of tree code so that
it's apparent that out of tree drivers need updating.

[ Impact: block request issue API cleanup, no functional change ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Mike Miller <mike.miller@hp.com>
Cc: unsik Kim <donari75@gmail.com>
Cc: Paul Clements <paul.clements@steeleye.com>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Laurent Vivier <Laurent@lvivier.info>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: Stefan Weinhuber <wein@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 7a1f5c2..40424ed 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -197,9 +197,7 @@ static void mbox_tx_work(struct work_struct *work)
 		struct omap_msg_tx_data *tx_data;
 
 		spin_lock(q->queue_lock);
-		rq = elv_next_request(q);
-		if (rq)
-			blkdev_dequeue_request(rq);
+		rq = blk_fetch_request(q);
 		spin_unlock(q->queue_lock);
 
 		if (!rq)
@@ -242,9 +240,7 @@ static void mbox_rx_work(struct work_struct *work)
 
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
-		rq = elv_next_request(q);
-		if (rq)
-			blkdev_dequeue_request(rq);
+		rq = blk_fetch_request(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		if (!rq)
 			break;
@@ -351,9 +347,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf)
 
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
-		rq = elv_next_request(q);
-		if (rq)
-			blkdev_dequeue_request(rq);
+		rq = blk_fetch_request(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
 		if (!rq)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 402ba8f..aa9e926 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1228,12 +1228,11 @@ static void do_ubd_request(struct request_queue *q)
 	while(1){
 		struct ubd *dev = q->queuedata;
 		if(dev->end_sg == 0){
-			struct request *req = elv_next_request(q);
+			struct request *req = blk_fetch_request(q);
 			if(req == NULL)
 				return;
 
 			dev->request = req;
-			blkdev_dequeue_request(req);
 			dev->start_sg = 0;
 			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
 		}
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8713c2f..0ab81a0 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -180,7 +180,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	}
 
 	/* stash away the original request */
-	elv_dequeue_request(q, rq);
+	blk_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = NULL;
 
@@ -248,7 +248,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 			 * Queue ordering not supported.  Terminate
 			 * with prejudice.
 			 */
-			elv_dequeue_request(q, rq);
+			blk_dequeue_request(rq);
 			__blk_end_request_all(rq, -EOPNOTSUPP);
 			*rqp = NULL;
 			return false;
diff --git a/block/blk-core.c b/block/blk-core.c
index 6226a38..93691d2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -902,6 +902,8 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	BUG_ON(blk_queued_rq(rq));
+
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
@@ -1610,28 +1612,6 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
-/**
- * blkdev_dequeue_request - dequeue request and start timeout timer
- * @req: request to dequeue
- *
- * Dequeue @req and start timeout timer on it.  This hands off the
- * request to the driver.
- *
- * Block internal functions which don't want to start timer should
- * call elv_dequeue_request().
- */
-void blkdev_dequeue_request(struct request *req)
-{
-	elv_dequeue_request(req->q, req);
-
-	/*
-	 * We are now handing the request to the hardware, add the
-	 * timeout handler.
-	 */
-	blk_add_timer(req);
-}
-EXPORT_SYMBOL(blkdev_dequeue_request);
-
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
@@ -1671,7 +1651,23 @@ static void blk_account_io_done(struct request *req)
 	}
 }
 
-struct request *elv_next_request(struct request_queue *q)
+/**
+ * blk_peek_request - peek at the top of a request queue
+ * @q: request queue to peek at
+ *
+ * Description:
+ *     Return the request at the top of @q.  The returned request
+ *     should be started using blk_start_request() before LLD starts
+ *     processing it.
+ *
+ * Return:
+ *     Pointer to the request at the top of @q if available.  Null
+ *     otherwise.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
@@ -1748,10 +1744,12 @@ struct request *elv_next_request(struct request_queue *q)
 
 	return rq;
 }
-EXPORT_SYMBOL(elv_next_request);
+EXPORT_SYMBOL(blk_peek_request);
 
-void elv_dequeue_request(struct request_queue *q, struct request *rq)
+void blk_dequeue_request(struct request *rq)
 {
+	struct request_queue *q = rq->q;
+
 	BUG_ON(list_empty(&rq->queuelist));
 	BUG_ON(ELV_ON_HASH(rq));
 
@@ -1767,6 +1765,58 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
 }
 
 /**
+ * blk_start_request - start request processing on the driver
+ * @req: request to dequeue
+ *
+ * Description:
+ *     Dequeue @req and start timeout timer on it.  This hands off the
+ *     request to the driver.
+ *
+ *     Block internal functions which don't want to start timer should
+ *     call blk_dequeue_request().
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+void blk_start_request(struct request *req)
+{
+	blk_dequeue_request(req);
+
+	/*
+	 * We are now handing the request to the hardware, add the
+	 * timeout handler.
+	 */
+	blk_add_timer(req);
+}
+EXPORT_SYMBOL(blk_start_request);
+
+/**
+ * blk_fetch_request - fetch a request from a request queue
+ * @q: request queue to fetch a request from
+ *
+ * Description:
+ *     Return the request at the top of @q.  The request is started on
+ *     return and LLD can start processing it immediately.
+ *
+ * Return:
+ *     Pointer to the request at the top of @q if available.  Null
+ *     otherwise.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+struct request *blk_fetch_request(struct request_queue *q)
+{
+	struct request *rq;
+
+	rq = blk_peek_request(q);
+	if (rq)
+		blk_start_request(rq);
+	return rq;
+}
+EXPORT_SYMBOL(blk_fetch_request);
+
+/**
  * blk_update_request - Special helper function for request stacking drivers
  * @rq:	      the request being processed
  * @error:    %0 for success, < %0 for error
@@ -1937,12 +1987,11 @@ static bool blk_update_bidi_request(struct request *rq, int error,
  */
 static void blk_finish_request(struct request *req, int error)
 {
+	BUG_ON(blk_queued_rq(req));
+
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
-	if (blk_queued_rq(req))
-		elv_dequeue_request(req->q, req);
-
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
 
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 3c518e3..c260f7c 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -374,7 +374,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 	list_add(&rq->queuelist, &q->tag_busy_list);
 	return 0;
 }
diff --git a/block/blk.h b/block/blk.h
index ab54529..9e0042c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,7 @@ extern struct kobj_type blk_queue_ktype;
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
+void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
 void blk_unplug_work(struct work_struct *work);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 774ab05..668dc23 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3321,7 +3321,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 	DAC960_Command_T *Command;
 
    while(1) {
-	Request = elv_next_request(req_q);
+	Request = blk_peek_request(req_q);
 	if (!Request)
 		return 1;
 
@@ -3341,7 +3341,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 	Command->BlockNumber = blk_rq_pos(Request);
 	Command->BlockCount = blk_rq_sectors(Request);
 	Command->Request = Request;
-	blkdev_dequeue_request(Request);
+	blk_start_request(Request);
 	Command->SegmentCount = blk_rq_map_sg(req_q,
 		  Command->Request, Command->cmd_sglist);
 	/* pci_map_sg MAY change the value of SegCount */
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 80a68b2..9c6e5b0 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1342,12 +1342,11 @@ static void redo_fd_request(void)
 	int err;
 
 next_req:
-	rq = elv_next_request(floppy_queue);
+	rq = blk_fetch_request(floppy_queue);
 	if (!rq) {
 		/* Nothing left to do */
 		return;
 	}
-	blkdev_dequeue_request(rq);
 
 	floppy = rq->rq_disk->private_data;
 	drive = floppy - unit;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 89a591d..f5e7180 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1404,10 +1404,9 @@ static void redo_fd_request(void)
 
 repeat:
 	if (!fd_request) {
-		fd_request = elv_next_request(floppy_queue);
+		fd_request = blk_fetch_request(floppy_queue);
 		if (!fd_request)
 			goto the_end;
-		blkdev_dequeue_request(fd_request);
 	}
 
 	floppy = fd_request->rq_disk->private_data;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ab7b04c..e714e7c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2801,7 +2801,7 @@ static void do_cciss_request(struct request_queue *q)
 		goto startio;
 
       queue:
-	creq = elv_next_request(q);
+	creq = blk_peek_request(q);
 	if (!creq)
 		goto startio;
 
@@ -2810,7 +2810,7 @@ static void do_cciss_request(struct request_queue *q)
 	if ((c = cmd_alloc(h, 1)) == NULL)
 		goto full;
 
-	blkdev_dequeue_request(creq);
+	blk_start_request(creq);
 
 	spin_unlock_irq(q->queue_lock);
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index a5caeff..a02dcfc 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -903,7 +903,7 @@ static void do_ida_request(struct request_queue *q)
 		goto startio;
 
 queue_next:
-	creq = elv_next_request(q);
+	creq = blk_peek_request(q);
 	if (!creq)
 		goto startio;
 
@@ -912,7 +912,7 @@ queue_next:
 	if ((c = cmd_alloc(h,1)) == NULL)
 		goto startio;
 
-	blkdev_dequeue_request(creq);
+	blk_start_request(creq);
 
 	c->ctlr = h->ctlr;
 	c->hdr.unit = (drv_info_t *)(creq->rq_disk->private_data) - h->drv;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index e2c70d2..90877fe 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -931,7 +931,7 @@ static inline void unlock_fdc(void)
 	del_timer(&fd_timeout);
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
-	if (current_req || elv_next_request(floppy_queue))
+	if (current_req || blk_peek_request(floppy_queue))
 		do_fd_request(floppy_queue);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 	wake_up(&fdc_wait);
@@ -2912,9 +2912,7 @@ static void redo_fd_request(void)
 			struct request *req;
 
 			spin_lock_irq(floppy_queue->queue_lock);
-			req = elv_next_request(floppy_queue);
-			if (req)
-				blkdev_dequeue_request(req);
+			req = blk_fetch_request(floppy_queue);
 			spin_unlock_irq(floppy_queue->queue_lock);
 			if (!req) {
 				do_floppy = NULL;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 288ab63..961de56 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -592,12 +592,11 @@ repeat:
 	del_timer(&device_timer);
 
 	if (!hd_req) {
-		hd_req = elv_next_request(hd_queue);
+		hd_req = blk_fetch_request(hd_queue);
 		if (!hd_req) {
 			do_hd = NULL;
 			return;
 		}
-		blkdev_dequeue_request(hd_req);
 	}
 	req = hd_req;
 
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 1ca5d14..c0cd0a0 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -671,10 +671,8 @@ static void mg_request_poll(struct request_queue *q)
 
 	while (1) {
 		if (!host->req) {
-			host->req = elv_next_request(q);
-			if (host->req)
-				blkdev_dequeue_request(host->req);
-			else
+			host->req = blk_fetch_request(q);
+			if (!host->req)
 				break;
 		}
 
@@ -744,10 +742,8 @@ static void mg_request(struct request_queue *q)
 
 	while (1) {
 		if (!host->req) {
-			host->req = elv_next_request(q);
-			if (host->req)
-				blkdev_dequeue_request(host->req);
-			else
+			host->req = blk_fetch_request(q);
+			if (!host->req)
 				break;
 		}
 		req = host->req;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index fad167de..5d23ffa 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -533,11 +533,9 @@ static void do_nbd_request(struct request_queue *q)
 {
 	struct request *req;
 	
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_fetch_request(q)) != NULL) {
 		struct nbd_device *lo;
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(q->queue_lock);
 
 		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 425f815..911dfd9 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -720,10 +720,9 @@ static void do_pcd_request(struct request_queue * q)
 		return;
 	while (1) {
 		if (!pcd_req) {
-			pcd_req = elv_next_request(q);
+			pcd_req = blk_fetch_request(q);
 			if (!pcd_req)
 				return;
-			blkdev_dequeue_request(pcd_req);
 		}
 
 		if (rq_data_dir(pcd_req) == READ) {
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index d2ca3f5..bf5955b 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -412,11 +412,9 @@ static void run_fsm(void)
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
 						res == Ok ? 0 : -EIO)) {
-					pd_req = elv_next_request(pd_queue);
+					pd_req = blk_fetch_request(pd_queue);
 					if (!pd_req)
 						stop = 1;
-					else
-						blkdev_dequeue_request(pd_req);
 				}
 				spin_unlock_irqrestore(&pd_lock, saved_flags);
 				if (stop)
@@ -706,10 +704,9 @@ static void do_pd_request(struct request_queue * q)
 {
 	if (pd_req)
 		return;
-	pd_req = elv_next_request(q);
+	pd_req = blk_fetch_request(q);
 	if (!pd_req)
 		return;
-	blkdev_dequeue_request(pd_req);
 
 	schedule_fsm();
 }
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index d6f7bd8..68a9083 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -762,10 +762,9 @@ static void do_pf_request(struct request_queue * q)
 		return;
 repeat:
 	if (!pf_req) {
-		pf_req = elv_next_request(q);
+		pf_req = blk_fetch_request(q);
 		if (!pf_req)
 			return;
-		blkdev_dequeue_request(pf_req);
 	}
 
 	pf_current = pf_req->rq_disk->private_data;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index f4d8db9..338cee4 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -194,9 +194,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
-	while ((req = elv_next_request(q))) {
-		blkdev_dequeue_request(req);
-
+	while ((req = blk_fetch_request(q))) {
 		if (blk_fs_request(req)) {
 			if (ps3disk_submit_request_sg(dev, req))
 				break;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 9f351bf..cbfd9c0 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -441,12 +441,11 @@ out:
 static void do_vdc_request(struct request_queue *q)
 {
 	while (1) {
-		struct request *req = elv_next_request(q);
+		struct request *req = blk_fetch_request(q);
 
 		if (!req)
 			break;
 
-		blkdev_dequeue_request(req);
 		if (__send_request(req) < 0)
 			__blk_end_request_all(req, -EIO);
 	}
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index dedd489..cf7877f 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -528,10 +528,7 @@ static void redo_fd_request(struct request_queue *q)
 	struct request *req;
 	struct floppy_state *fs;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		int err = -EIO;
 
@@ -554,11 +551,8 @@ static void redo_fd_request(struct request_queue *q)
 			break;
 		}
 	done:
-		if (!__blk_end_request_cur(req, err)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index f48c6dd..80df93e 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -326,10 +326,9 @@ static void start_request(struct floppy_state *fs)
 	}
 	while (fs->state == idle) {
 		if (!fd_req) {
-			fd_req = elv_next_request(swim3_queue);
+			fd_req = blk_fetch_request(swim3_queue);
 			if (!fd_req)
 				break;
-			blkdev_dequeue_request(fd_req);
 		}
 		req = fd_req;
 #if 0
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 087c94c..da403b6 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -810,12 +810,10 @@ static void carm_oob_rq_fn(struct request_queue *q)
 
 	while (1) {
 		DPRINTK("get req\n");
-		rq = elv_next_request(q);
+		rq = blk_fetch_request(q);
 		if (!rq)
 			break;
 
-		blkdev_dequeue_request(rq);
-
 		crq = rq->special;
 		assert(crq != NULL);
 		assert(crq->rq == rq);
@@ -846,7 +844,7 @@ static void carm_rq_fn(struct request_queue *q)
 
 queue_one_request:
 	VPRINTK("get req\n");
-	rq = elv_next_request(q);
+	rq = blk_peek_request(q);
 	if (!rq)
 		return;
 
@@ -857,7 +855,7 @@ queue_one_request:
 	}
 	crq->rq = rq;
 
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 
 	if (rq_data_dir(rq) == WRITE) {
 		writing = 1;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 40d03cf..178f459 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -627,7 +627,7 @@ static void ub_request_fn(struct request_queue *q)
 	struct ub_lun *lun = q->queuedata;
 	struct request *rq;
 
-	while ((rq = elv_next_request(q)) != NULL) {
+	while ((rq = blk_peek_request(q)) != NULL) {
 		if (ub_request_fn_1(lun, rq) != 0) {
 			blk_stop_queue(q);
 			break;
@@ -643,13 +643,13 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 	int n_elem;
 
 	if (atomic_read(&sc->poison)) {
-		blkdev_dequeue_request(rq);
+		blk_start_request(rq);
 		ub_end_rq(rq, DID_NO_CONNECT << 16, blk_rq_bytes(rq));
 		return 0;
 	}
 
 	if (lun->changed && !blk_pc_request(rq)) {
-		blkdev_dequeue_request(rq);
+		blk_start_request(rq);
 		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION, blk_rq_bytes(rq));
 		return 0;
 	}
@@ -660,7 +660,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 		return -1;
 	memset(cmd, 0, sizeof(struct ub_scsi_cmd));
 
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 
 	urq = &lun->urq;
 	memset(urq, 0, sizeof(struct ub_request));
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 2086cb1..390d69b 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -361,11 +361,9 @@ static void do_viodasd_request(struct request_queue *q)
 	 * back later.
 	 */
 	while (num_req_outstanding < VIOMAXREQ) {
-		req = elv_next_request(q);
+		req = blk_fetch_request(q);
 		if (req == NULL)
 			return;
-		/* dequeue the current request from the queue */
-		blkdev_dequeue_request(req);
 		/* check that request contains a valid command */
 		if (!blk_fs_request(req)) {
 			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1980ab4..29a9daf 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -128,7 +128,7 @@ static void do_virtblk_request(struct request_queue *q)
 	struct request *req;
 	unsigned int issued = 0;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_peek_request(q)) != NULL) {
 		vblk = req->rq_disk->private_data;
 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
@@ -138,7 +138,7 @@ static void do_virtblk_request(struct request_queue *q)
 			blk_stop_queue(q);
 			break;
 		}
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		issued++;
 	}
 
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index d4c4352..ce24292 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -305,10 +305,7 @@ static void do_xd_request (struct request_queue * q)
 	if (xdc_busy)
 		return;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		unsigned block = blk_rq_pos(req);
 		unsigned count = blk_rq_cur_sectors(req);
@@ -325,11 +322,8 @@ static void do_xd_request (struct request_queue * q)
 					   block, count);
 	done:
 		/* wrap up, 0 = success, -errno = fail */
-		if (!__blk_end_request_cur(req, res)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, res))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 66f8345..6d4ac76 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -299,13 +299,13 @@ static void do_blkif_request(struct request_queue *rq)
 
 	queued = 0;
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
 
 		if (RING_FULL(&info->ring))
 			goto wait;
 
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 
 		if (!blk_fs_request(req)) {
 			__blk_end_request_all(req, -EIO);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index edf137b..3a4397e 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -463,10 +463,10 @@ struct request *ace_get_next_request(struct request_queue * q)
 {
 	struct request *req;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_peek_request(q)) != NULL) {
 		if (blk_fs_request(req))
 			break;
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		__blk_end_request_all(req, -EIO);
 	}
 	return req;
@@ -498,10 +498,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			__blk_end_request_all(ace->req, -EIO);
 			ace->req = NULL;
 		}
-		while ((req = elv_next_request(ace->queue)) != NULL) {
-			blkdev_dequeue_request(req);
+		while ((req = blk_fetch_request(ace->queue)) != NULL)
 			__blk_end_request_all(req, -EIO);
-		}
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -649,7 +647,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			ace->fsm_state = ACE_FSM_STATE_IDLE;
 			break;
 		}
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index c909c1a..4575171 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -71,10 +71,7 @@ static void do_z2_request(struct request_queue *q)
 {
 	struct request *req;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
@@ -100,11 +97,8 @@ static void do_z2_request(struct request_queue *q)
 			len -= size;
 		}
 	done:
-		if (!__blk_end_request_cur(req, err)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 3cc02bf..1e366ad 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -642,9 +642,7 @@ static void gdrom_request(struct request_queue *rq)
 {
 	struct request *req;
 
-	while ((req = elv_next_request(rq)) != NULL) {
-		blkdev_dequeue_request(req);
-
+	while ((req = blk_fetch_request(rq)) != NULL) {
 		if (!blk_fs_request(req)) {
 			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
 			__blk_end_request_all(req, -EIO);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index bbe9f08..ca741c2 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -297,9 +297,7 @@ static void do_viocd_request(struct request_queue *q)
 {
 	struct request *req;
 
-	while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) {
-		blkdev_dequeue_request(req);
-
+	while ((rwreq == 0) && ((req = blk_fetch_request(q)) != NULL)) {
 		if (!blk_fs_request(req))
 			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2874c3d..8a894fa 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -269,7 +269,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	blk_requeue_request(failed_rq->q, failed_rq);
 	drive->hwif->rq = NULL;
 	if (ide_queue_sense_rq(drive, pc)) {
-		blkdev_dequeue_request(failed_rq);
+		blk_start_request(failed_rq);
 		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
 	}
 }
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index abda733..e4e3a0e3 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -519,11 +519,8 @@ repeat:
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
-		if (!rq) {
-			rq = elv_next_request(drive->queue);
-			if (rq)
-				blkdev_dequeue_request(rq);
-		}
+		if (!rq)
+			rq = blk_fetch_request(drive->queue);
 
 		spin_unlock_irq(q->queue_lock);
 		spin_lock_irq(&hwif->lock);
@@ -536,7 +533,7 @@ repeat:
 		/*
 		 * Sanity: don't accept a request that isn't a PM request
 		 * if we are currently power managed. This is very important as
-		 * blk_stop_queue() doesn't prevent the elv_next_request()
+		 * blk_stop_queue() doesn't prevent the blk_fetch_request()
 		 * above to return us whatever is in the queue. Since we call
 		 * ide_do_request() ourselves, we end up taking requests while
 		 * the queue is blocked...
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 58f5be8..c0bebc6 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -704,13 +704,12 @@ try_again:
 		return 0;
 	}
 
-	dev_dbg(&card->dev, "elv_next\n");
-	msb->block_req = elv_next_request(msb->queue);
+	dev_dbg(&card->dev, "blk_fetch\n");
+	msb->block_req = blk_fetch_request(msb->queue);
 	if (!msb->block_req) {
 		dev_dbg(&card->dev, "issue end\n");
 		return -EAGAIN;
 	}
-	blkdev_dequeue_request(msb->block_req);
 
 	dev_dbg(&card->dev, "trying again\n");
 	chunk = 1;
@@ -825,10 +824,8 @@ static void mspro_block_submit_req(struct request_queue *q)
 		return;
 
 	if (msb->eject) {
-		while ((req = elv_next_request(q)) != NULL) {
-			blkdev_dequeue_request(req);
+		while ((req = blk_fetch_request(q)) != NULL)
 			__blk_end_request_all(req, -ENODEV);
-		}
 
 		return;
 	}
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 8b5cbfc..6573ef4 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -877,7 +877,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 	struct request *req;
 
 	while (!blk_queue_plugged(q)) {
-		req = elv_next_request(q);
+		req = blk_peek_request(q);
 		if (!req)
 			break;
 
@@ -890,7 +890,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 
 			if (queue_depth < I2O_BLOCK_MAX_OPEN_REQUESTS) {
 				if (!i2o_block_transfer(req)) {
-					blkdev_dequeue_request(req);
+					blk_start_request(req);
 					continue;
 				} else
 					osm_info("transfer error\n");
@@ -917,7 +917,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 				break;
 			}
 		} else {
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			__blk_end_request_all(req, -EIO);
 		}
 	}
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 4b70f1e..49e5823 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -54,11 +54,8 @@ static int mmc_queue_thread(void *d)
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!blk_queue_plugged(q)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!blk_queue_plugged(q))
+			req = blk_fetch_request(q);
 		mq->req = req;
 		spin_unlock_irq(q->queue_lock);
 
@@ -94,10 +91,8 @@ static void mmc_request(struct request_queue *q)
 
 	if (!mq) {
 		printk(KERN_ERR "MMC: killing requests for dead queue\n");
-		while ((req = elv_next_request(q)) != NULL) {
-			blkdev_dequeue_request(req);
+		while ((req = blk_fetch_request(q)) != NULL)
 			__blk_end_request_all(req, -EIO);
-		}
 		return;
 	}
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 3e10442..502622f 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -100,12 +100,7 @@ static int mtd_blktrans_thread(void *arg)
 		struct mtd_blktrans_dev *dev;
 		int res;
 
-		if (!req) {
-			req = elv_next_request(rq);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
-		if (!req) {
+		if (!req && !(req = blk_fetch_request(rq))) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(rq->queue_lock);
 			schedule();
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 7df03c7a..e64f62d 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1656,17 +1656,13 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 	if (basedev->state < DASD_STATE_READY)
 		return;
 	/* Now we try to fetch requests from the request queue */
-	while (!blk_queue_plugged(queue) &&
-	       elv_next_request(queue)) {
-
-		req = elv_next_request(queue);
-
+	while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
 		if (basedev->features & DASD_FEATURE_READONLY &&
 		    rq_data_dir(req) == WRITE) {
 			DBF_DEV_EVENT(DBF_ERR, basedev,
 				      "Rejecting write request %p",
 				      req);
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
@@ -1695,7 +1691,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "CCW creation failed (rc=%ld) "
 				      "on request %p",
 				      PTR_ERR(cqr), req);
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
@@ -1705,7 +1701,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 		 */
 		cqr->callback_data = (void *) req;
 		cqr->status = DASD_CQR_FILLED;
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		list_add_tail(&cqr->blocklist, &block->ccw_queue);
 		dasd_profile_start(block, cqr, req);
 	}
@@ -2029,10 +2025,8 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 		return;
 
 	spin_lock_irq(&block->request_queue_lock);
-	while ((req = elv_next_request(block->request_queue))) {
-		blkdev_dequeue_request(req);
+	while ((req = blk_fetch_request(block->request_queue)))
 		__blk_end_request_all(req, -EIO);
-	}
 	spin_unlock_irq(&block->request_queue_lock);
 }
 
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 5d035e4..1e79676 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -93,7 +93,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 		device->blk_data.block_position = -1;
 	device->discipline->free_bread(ccw_req);
 	if (!list_empty(&device->req_queue) ||
-	    elv_next_request(device->blk_data.request_queue))
+	    blk_peek_request(device->blk_data.request_queue))
 		tapeblock_trigger_requeue(device);
 }
 
@@ -162,19 +162,16 @@ tapeblock_requeue(struct work_struct *work) {
 	spin_lock_irq(&device->blk_data.request_queue_lock);
 	while (
 		!blk_queue_plugged(queue) &&
-		elv_next_request(queue)   &&
+		(req = blk_fetch_request(queue)) &&
 		nr_queued < TAPEBLOCK_MIN_REQUEUE
 	) {
-		req = elv_next_request(queue);
 		if (rq_data_dir(req) == WRITE) {
 			DBF_EVENT(1, "TBLOCK: Rejecting write request\n");
-			blkdev_dequeue_request(req);
 			spin_unlock_irq(&device->blk_data.request_queue_lock);
 			blk_end_request_all(req, -EIO);
 			spin_lock_irq(&device->blk_data.request_queue_lock);
 			continue;
 		}
-		blkdev_dequeue_request(req);
 		nr_queued++;
 		spin_unlock_irq(&device->blk_data.request_queue_lock);
 		rc = tapeblock_start_request(device, req);
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index f572a4a..6d46516 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -186,10 +186,7 @@ static void jsfd_do_request(struct request_queue *q)
 {
 	struct request *req;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
@@ -212,11 +209,8 @@ static void jsfd_do_request(struct request_queue *q)
 		jsfd_read(req->buffer, jdp->dbase + offset, len);
 		err = 0;
 	end:
-		if (!__blk_end_request_cur(req, err)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ee308f6..b12750f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1207,7 +1207,7 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 		break;
 	case BLKPREP_DEFER:
 		/*
-		 * If we defer, the elv_next_request() returns NULL, but the
+		 * If we defer, the blk_peek_request() returns NULL, but the
 		 * queue must be restarted, so we plug here if no returning
 		 * command will automatically do that.
 		 */
@@ -1385,7 +1385,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	struct scsi_target *starget = scsi_target(sdev);
 	struct Scsi_Host *shost = sdev->host;
 
-	blkdev_dequeue_request(req);
+	blk_start_request(req);
 
 	if (unlikely(cmd == NULL)) {
 		printk(KERN_CRIT "impossible request in %s.\n",
@@ -1477,7 +1477,7 @@ static void scsi_request_fn(struct request_queue *q)
 
 	if (!sdev) {
 		printk("scsi: killing requests for dead queue\n");
-		while ((req = elv_next_request(q)) != NULL)
+		while ((req = blk_peek_request(q)) != NULL)
 			scsi_kill_request(req, q);
 		return;
 	}
@@ -1498,7 +1498,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * that the request is fully prepared even if we cannot 
 		 * accept it.
 		 */
-		req = elv_next_request(q);
+		req = blk_peek_request(q);
 		if (!req || !scsi_dev_queue_ready(q, sdev))
 			break;
 
@@ -1514,7 +1514,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * Remove the request from the request list.
 		 */
 		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 		sdev->device_busy++;
 
 		spin_unlock(q->queue_lock);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 50988cb..d606452 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -163,12 +163,10 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while (!blk_queue_plugged(q)) {
-		req = elv_next_request(q);
+		req = blk_fetch_request(q);
 		if (!req)
 			break;
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(q->queue_lock);
 
 		handler = to_sas_internal(shost->transportt)->f->smp_handler;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c755803..6e59d3b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -818,8 +818,6 @@ static inline void blk_run_address_space(struct address_space *mapping)
 		blk_run_backing_dev(mapping->backing_dev_info, NULL);
 }
 
-extern void blkdev_dequeue_request(struct request *req);
-
 /*
  * blk_rq_pos()		: the current sector
  * blk_rq_bytes()	: bytes left in the entire request
@@ -853,6 +851,13 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 }
 
 /*
+ * Request issue related functions.
+ */
+extern struct request *blk_peek_request(struct request_queue *q);
+extern void blk_start_request(struct request *rq);
+extern struct request *blk_fetch_request(struct request_queue *q);
+
+/*
  * Request completion related functions.
  *
  * blk_update_request() completes given number of bytes and updates
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4e46287..1cb3372 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -103,10 +103,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
-extern void elv_dequeue_request(struct request_queue *, struct request *);
 extern void elv_requeue_request(struct request_queue *, struct request *);
 extern int elv_queue_empty(struct request_queue *);
-extern struct request *elv_next_request(struct request_queue *q);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
-- 
cgit v0.10.2


From 7bce6c2740fab36708233e998a9e53115649b193 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Date: Mon, 11 May 2009 06:51:28 +0000
Subject: sh: sh7785lcr: fix I2C device address map for 32-bit mode

This fixes up the broken I2C offset in 32-bit mode.
The cause is because the board datasheet had a mistake.

Signed-off-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/mach-common/mach/sh7785lcr.h b/arch/sh/include/mach-common/mach/sh7785lcr.h
index 1ce27d5..90011d4 100644
--- a/arch/sh/include/mach-common/mach/sh7785lcr.h
+++ b/arch/sh/include/mach-common/mach/sh7785lcr.h
@@ -9,11 +9,11 @@
  * -----------------------------+---------------+---------------
  * 0x00000000 - 0x03ffffff(CS0)	| NOR Flash	| NOR Flash
  * 0x04000000 - 0x05ffffff(CS1)	| PLD		| PLD
- * 0x06000000 - 0x07ffffff(CS1)	| reserved	| I2C
+ * 0x06000000 - 0x07ffffff(CS1)	| I2C		| I2C
  * 0x08000000 - 0x0bffffff(CS2)	| USB		| DDR SDRAM
  * 0x0c000000 - 0x0fffffff(CS3)	| SD		| DDR SDRAM
  * 0x10000000 - 0x13ffffff(CS4)	| SM107		| SM107
- * 0x14000000 - 0x17ffffff(CS5)	| I2C		| USB
+ * 0x14000000 - 0x17ffffff(CS5)	| reserved	| USB
  * 0x18000000 - 0x1bffffff(CS6)	| reserved	| SD
  * 0x40000000 - 0x5fffffff	| DDR SDRAM	| (cannot use)
  *
@@ -32,6 +32,9 @@
 #define PLD_VERSR		(PLD_BASE_ADDR + 0x0c)
 #define PLD_MMSR		(PLD_BASE_ADDR + 0x0e)
 
+#define PCA9564_ADDR		0x06000000	/* I2C */
+#define PCA9564_SIZE		0x00000100
+
 #define SM107_MEM_ADDR		0x10000000
 #define SM107_MEM_SIZE		0x00e00000
 #define SM107_REG_ADDR		0x13e00000
@@ -40,16 +43,13 @@
 #if defined(CONFIG_SH_SH7785LCR_29BIT_PHYSMAPS)
 #define R8A66597_ADDR		0x14000000	/* USB */
 #define CG200_ADDR		0x18000000	/* SD */
-#define PCA9564_ADDR		0x06000000	/* I2C */
 #else
 #define R8A66597_ADDR		0x08000000
 #define CG200_ADDR		0x0c000000
-#define PCA9564_ADDR		0x14000000
 #endif
 
 #define R8A66597_SIZE		0x00000100
 #define CG200_SIZE		0x00010000
-#define PCA9564_SIZE		0x00000100
 
 #endif  /* __ASM_SH_RENESAS_SH7785LCR_H */
 
-- 
cgit v0.10.2


From b9e0353fc85dab4ef5ebcef2bd09ebc4ce6d5a7b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:05:32 -0700
Subject: x86/acpi: remove irq-compression trick on 32-bit

We already have a per cpu vector on 32-bit via recent changes, and
don't need this trick any more (which trick obfuscates the real GSI
mappings and which only triggers on larger systems to begin with):

On 3 ioapic system (24 per ioapic) before patch I got:

ACPI: PCI Interrupt Link [ILSB] enabled at IRQ 71
IOAPIC[2]: Set routing entry (10-23 -> 0xa9 -> IRQ 64 Mode:1 Active:1)
pci 0000:80:01.1: PCI INT A -> Link[ILSB] -> GSI 71 (level, low) -> IRQ 64
ACPI: PCI Interrupt Link [LE5B] enabled at IRQ 67
IOAPIC[2]: Set routing entry (10-19 -> 0xb1 -> IRQ 65 Mode:1 Active:1)
pci 0000:83:00.0: PCI INT B -> Link[LE5B] -> GSI 67 (level, low) -> IRQ 65
ACPI: PCI Interrupt Link [LE5A] enabled at IRQ 66
IOAPIC[2]: Set routing entry (10-18 -> 0xb9 -> IRQ 66 Mode:1 Active:1)
pci 0000:83:00.1: PCI INT A -> Link[LE5A] -> GSI 66 (level, low) -> IRQ 66
ACPI: PCI Interrupt Link [LE5D] enabled at IRQ 65
IOAPIC[2]: Set routing entry (10-17 -> 0xc1 -> IRQ 67 Mode:1 Active:1)
pci 0000:84:00.0: PCI INT B -> Link[LE5D] -> GSI 65 (level, low) -> IRQ 67
ACPI: PCI Interrupt Link [LE5C] enabled at IRQ 64
IOAPIC[2]: Set routing entry (10-16 -> 0xc9 -> IRQ 68 Mode:1 Active:1)
pci 0000:84:00.1: PCI INT A -> Link[LE5C] -> GSI 64 (level, low) -> IRQ 68
pci 0000:87:00.0: PCI INT B -> Link[LE5A] -> GSI 66 (level, low) -> IRQ 66
pci 0000:87:00.1: PCI INT A -> Link[LE5D] -> GSI 65 (level, low) -> IRQ 67
pci 0000:88:00.0: PCI INT B -> Link[LE5C] -> GSI 64 (level, low) -> IRQ 68
pci 0000:88:00.1: PCI INT A -> Link[LE5B] -> GSI 67 (level, low) -> IRQ 65
pci 0000:8b:00.0: PCI INT B -> Link[LE5A] -> GSI 66 (level, low) -> IRQ 66
pci 0000:8b:00.1: PCI INT A -> Link[LE5D] -> GSI 65 (level, low) -> IRQ 67
pci 0000:8c:00.0: PCI INT B -> Link[LE5C] -> GSI 64 (level, low) -> IRQ 68
pci 0000:8c:00.1: PCI INT A -> Link[LE5B] -> GSI 67 (level, low) -> IRQ 65

after the patch we get:

ACPI: PCI Interrupt Link [ILSB] enabled at IRQ 71
IOAPIC[2]: Set routing entry (10-23 -> 0xa9 -> IRQ 71 Mode:1 Active:1)
pci 0000:80:01.1: PCI INT A -> Link[ILSB] -> GSI 71 (level, low) -> IRQ 71
ACPI: PCI Interrupt Link [LE5B] enabled at IRQ 67
IOAPIC[2]: Set routing entry (10-19 -> 0xb1 -> IRQ 67 Mode:1 Active:1)
pci 0000:83:00.0: PCI INT B -> Link[LE5B] -> GSI 67 (level, low) -> IRQ 67
ACPI: PCI Interrupt Link [LE5A] enabled at IRQ 66
IOAPIC[2]: Set routing entry (10-18 -> 0xb9 -> IRQ 66 Mode:1 Active:1)
pci 0000:83:00.1: PCI INT A -> Link[LE5A] -> GSI 66 (level, low) -> IRQ 66
ACPI: PCI Interrupt Link [LE5D] enabled at IRQ 65
IOAPIC[2]: Set routing entry (10-17 -> 0xc1 -> IRQ 65 Mode:1 Active:1)
pci 0000:84:00.0: PCI INT B -> Link[LE5D] -> GSI 65 (level, low) -> IRQ 65
ACPI: PCI Interrupt Link [LE5C] enabled at IRQ 64
IOAPIC[2]: Set routing entry (10-16 -> 0xc9 -> IRQ 64 Mode:1 Active:1)
pci 0000:84:00.1: PCI INT A -> Link[LE5C] -> GSI 64 (level, low) -> IRQ 64
pci 0000:87:00.0: PCI INT B -> Link[LE5A] -> GSI 66 (level, low) -> IRQ 66
pci 0000:87:00.1: PCI INT A -> Link[LE5D] -> GSI 65 (level, low) -> IRQ 65
pci 0000:88:00.0: PCI INT B -> Link[LE5C] -> GSI 64 (level, low) -> IRQ 64
pci 0000:88:00.1: PCI INT A -> Link[LE5B] -> GSI 67 (level, low) -> IRQ 67
pci 0000:8b:00.0: PCI INT B -> Link[LE5A] -> GSI 66 (level, low) -> IRQ 66
pci 0000:8b:00.1: PCI INT A -> Link[LE5D] -> GSI 65 (level, low) -> IRQ 65
pci 0000:8c:00.0: PCI INT B -> Link[LE5C] -> GSI 64 (level, low) -> IRQ 64
pci 0000:8c:00.1: PCI INT A -> Link[LE5B] -> GSI 67 (level, low) -> IRQ 67

As it can be seen that GSIs now get mapped lineary.

[ Impact: simplify irq number mapping on bigger 32-bit systems ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A01C35C.7060207@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6ee96b5..fb5e882 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1162,22 +1162,9 @@ int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	int ioapic;
 	int ioapic_pin;
-#ifdef CONFIG_X86_32
-#define MAX_GSI_NUM	4096
-#define IRQ_COMPRESSION_START	64
-
-	static int pci_irq = IRQ_COMPRESSION_START;
-	/*
-	 * Mapping between Global System Interrupts, which
-	 * represent all possible interrupts, and IRQs
-	 * assigned to actual devices.
-	 */
-	static int gsi_to_irq[MAX_GSI_NUM];
-#else
 
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
-#endif
 
 	/* Don't set up the ACPI SCI because it's already set up */
 	if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1196,66 +1183,28 @@ int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 		gsi = ioapic_renumber_irq(ioapic, gsi);
 #endif
 
-	/*
-	 * Avoid pin reprogramming.  PRTs typically include entries
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
 	if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
 		printk(KERN_ERR "Invalid reference to IOAPIC pin "
 		       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
 		       ioapic_pin);
 		return gsi;
 	}
+
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
 	if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
 		pr_debug("Pin %d-%d already programmed\n",
 			 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-#ifdef CONFIG_X86_32
-		return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-#else
 		return gsi;
-#endif
 	}
-
 	set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-#ifdef CONFIG_X86_32
-	/*
-	 * For GSI >= 64, use IRQ compression
-	 */
-	if ((gsi >= IRQ_COMPRESSION_START)
-	    && (triggering == ACPI_LEVEL_SENSITIVE)) {
-		/*
-		 * For PCI devices assign IRQs in order, avoiding gaps
-		 * due to unused I/O APIC pins.
-		 */
-		int irq = gsi;
-		if (gsi < MAX_GSI_NUM) {
-			/*
-			 * Retain the VIA chipset work-around (gsi > 15), but
-			 * avoid a problem where the 8254 timer (IRQ0) is setup
-			 * via an override (so it's not on pin 0 of the ioapic),
-			 * and at the same time, the pin 0 interrupt is a PCI
-			 * type.  The gsi > 15 test could cause these two pins
-			 * to be shared as IRQ0, and they are not shareable.
-			 * So test for this condition, and if necessary, avoid
-			 * the pin collision.
-			 */
-			gsi = pci_irq++;
-			/*
-			 * Don't assign IRQ used by ACPI SCI
-			 */
-			if (gsi == acpi_gbl_FADT.sci_interrupt)
-				gsi = pci_irq++;
-			gsi_to_irq[irq] = gsi;
-		} else {
-			printk(KERN_ERR "GSI %u is too high\n", gsi);
-			return gsi;
-		}
-	}
-#endif
 	io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi,
 				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
 				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
 	return gsi;
 }
 
-- 
cgit v0.10.2


From ee214558c2e959781a406e76c5b34364da638e1d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:07:07 -0700
Subject: x86: fix alloc_mptable()

Fix the conditions when we stop updating the mptable due to
running out of slots.

[ Impact: fix memory corruption / non-working update_mptable boot parameter ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A01C3BB.1000609@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70fd7e4..cd2a41a 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -870,24 +870,17 @@ static
 inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
 #endif /* CONFIG_X86_IO_APIC */
 
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
-		      int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	if (!mpc_new_phys) {
-		pr_info("No spare slots, try to append...take your risk, "
-			"new mpc_length %x\n", count);
-	} else {
-		if (count <= mpc_new_length)
-			pr_info("No spare slots, try to append..., "
-				"new mpc_length %x\n", count);
-		else {
-			pr_err("mpc_new_length %lx is too small\n",
-				mpc_new_length);
-			return -1;
-		}
+	int ret = 0;
+
+	if (!mpc_new_phys || count <= mpc_new_length) {
+		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+		return -1;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
@@ -946,7 +939,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 		} else {
 			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 			count += sizeof(struct mpc_intsrc);
-			if (!check_slot(mpc_new_phys, mpc_new_length, count))
+			if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
 				goto out;
 			assign_to_mpc_intsrc(&mp_irqs[i], m);
 			mpc->length = count;
-- 
cgit v0.10.2


From a31f82057ce6f7ced578d64c07a72ccbdc7336e4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:06:15 -0700
Subject: x86/acpi: call mp_config_acpi_gsi() in mp_register_gsi()

The patch to call mp_config_acpi_gsi() from the ACPI IRQ registration
code never got mainline because there were open discussions about it.

This call is needed to properly update the kernel's copy of the mptable,
when the update_mptable boot parameter is needed.

Now that the dust has settled with the APIC unification, and since there
were no objections when the patch was re-submitted, try this again.

[ Impact: fix the update_mptable boot parameter ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A01C387.7090103@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 3ea1f53..c34961a 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -77,17 +77,8 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
 				 int active_high_low);
 extern int acpi_probe_gsi(void);
 #ifdef CONFIG_X86_IO_APIC
-extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-				u32 gsi, int triggering, int polarity);
 extern int mp_find_ioapic(int gsi);
 extern int mp_find_ioapic_pin(int ioapic, int gsi);
-#else
-static inline int
-mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-		   u32 gsi, int triggering, int polarity)
-{
-	return 0;
-}
 #endif
 #else /* !CONFIG_ACPI: */
 static inline int acpi_probe_gsi(void)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb5e882..8019ecf 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <linux/bootmem.h>
 #include <linux/ioport.h>
+#include <linux/pci.h>
 
 #include <asm/pgtable.h>
 #include <asm/io_apic.h>
@@ -1158,6 +1159,44 @@ void __init mp_config_acpi_legacy_irqs(void)
 	}
 }
 
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int triggering,
+			int polarity)
+{
+#ifdef CONFIG_X86_MPPARSE
+	struct mpc_intsrc mp_irq;
+	struct pci_dev *pdev;
+	unsigned char number;
+	unsigned int devfn;
+	int ioapic;
+	u8 pin;
+
+	if (!acpi_ioapic)
+		return 0;
+	if (!dev)
+		return 0;
+	if (dev->bus != &pci_bus_type)
+		return 0;
+
+	pdev = to_pci_dev(dev);
+	number = pdev->bus->number;
+	devfn = pdev->devfn;
+	pin = pdev->pin;
+	/* print the entry should happen on mptable identically */
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+	mp_irq.srcbus = number;
+	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+	ioapic = mp_find_ioapic(gsi);
+	mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
+	mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+	save_mp_irq(&mp_irq);
+#endif
+	return 0;
+}
+
 int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	int ioapic;
@@ -1189,6 +1228,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 		       ioapic_pin);
 		return gsi;
 	}
+	mp_config_acpi_gsi(dev, gsi, triggering, polarity);
 
 	/*
 	 * Avoid pin reprogramming.  PRTs typically include entries
@@ -1208,32 +1248,6 @@ int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 	return gsi;
 }
 
-int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-			u32 gsi, int triggering, int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
-	struct mpc_intsrc mp_irq;
-	int ioapic;
-
-	if (!acpi_ioapic)
-		return 0;
-
-	/* print the entry should happen on mptable identically */
-	mp_irq.type = MP_INTSRC;
-	mp_irq.irqtype = mp_INT;
-	mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
-				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
-	mp_irq.srcbus = number;
-	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
-	ioapic = mp_find_ioapic(gsi);
-	mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
-	mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
-	save_mp_irq(&mp_irq);
-#endif
-	return 0;
-}
-
 /*
  * Parse IOAPIC related entries in MADT
  * returns 0 on success, < 0 on error
-- 
cgit v0.10.2


From bdfe8ac153546537ed24de69610ea781a734f785 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:07:41 -0700
Subject: x86/acpi: move pin_programmed bit map to io_apic.c

Prepare to call setup_io_apic_routing() in pcibios_irq_enable()
also remove not needed member apic_id.

[ Impact: clean up, prepare for future change ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A01C3DD.3050104@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8019ecf..dcfbc3a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -904,10 +904,8 @@ extern int es7000_plat;
 #endif
 
 static struct {
-	int apic_id;
 	int gsi_base;
 	int gsi_end;
-	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
 } mp_ioapic_routing[MAX_IO_APICS];
 
 int mp_find_ioapic(int gsi)
@@ -996,7 +994,6 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
 	mp_ioapic_routing[idx].gsi_end = gsi_base +
 	    io_apic_get_redir_entries(idx);
@@ -1189,7 +1186,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int triggering,
 	mp_irq.srcbus = number;
 	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
 	ioapic = mp_find_ioapic(gsi);
-	mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
+	mp_irq.dstapic = mp_ioapics[ioapic].apicid;
 	mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
 
 	save_mp_irq(&mp_irq);
@@ -1224,23 +1221,12 @@ int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 
 	if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
 		printk(KERN_ERR "Invalid reference to IOAPIC pin "
-		       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+		       "%d-%d\n", mp_ioapics[ioapic].apicid,
 		       ioapic_pin);
 		return gsi;
 	}
 	mp_config_acpi_gsi(dev, gsi, triggering, polarity);
 
-	/*
-	 * Avoid pin reprogramming.  PRTs typically include entries
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
-	if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-		pr_debug("Pin %d-%d already programmed\n",
-			 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-		return gsi;
-	}
-	set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
 	io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi,
 				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
 				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 21c30e1..e279ae3 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3922,7 +3922,7 @@ int __init io_apic_get_version(int ioapic)
 }
 #endif
 
-int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
+static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 				 int triggering, int polarity)
 {
 	struct irq_desc *desc;
@@ -3959,6 +3959,29 @@ int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 	return 0;
 }
 
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
+int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
+				 int triggering, int polarity)
+{
+
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
+	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+		pr_debug("Pin %d-%d already programmed\n",
+			 mp_ioapics[ioapic].apicid, pin);
+		return 0;
+	}
+	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+	return __io_apic_set_pci_routing(dev, ioapic, pin, irq,
+					 triggering, polarity);
+}
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
-- 
cgit v0.10.2


From e20c06fd6950265a899edd96a02dc2e6ae2d1ce5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:08:22 -0700
Subject: x86/pci: add 4 more return parameters to IO_APIC_get_PCI_irq_vector()

To prepare those params for pcibios_irq_enable() to call setup_io_apic_routing().

[ Impact: extend function call API to prepare for new functionality ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A01C406.2040303@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea4..26a40ab 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -63,7 +63,9 @@ extern unsigned long io_apic_irqs;
 extern void init_VISWS_APIC_irqs(void);
 extern void setup_IO_APIC(void);
 extern void disable_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
+					int *ioapic, int *ioapic_pin,
+					int *trigger, int *polarity);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e279ae3..caf9dbd 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -873,54 +873,6 @@ static int __init find_isa_irq_apic(int irq, int type)
 	return -1;
 }
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-			    mp_irqs[i].dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1139,6 +1091,65 @@ static int pin_2_irq(int idx, int apic, int pin)
 	return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+				int *ioapic, int *ioapic_pin,
+				int *trigger, int *polarity)
+{
+	int apic, i, best_guess = -1;
+
+	apic_printk(APIC_DEBUG,
+		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		    bus, slot, pin);
+	if (test_bit(bus, mp_bus_not_pci)) {
+		apic_printk(APIC_VERBOSE,
+			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+			    mp_irqs[i].dstapic == MP_APIC_ALL)
+				break;
+
+		if (!test_bit(lbus, mp_bus_not_pci) &&
+		    !mp_irqs[i].irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].srcbusirq & 3)) {
+				*ioapic = apic;
+				*ioapic_pin = mp_irqs[i].dstirq;
+				*trigger = irq_trigger(i);
+				*polarity = irq_polarity(i);
+				return irq;
+			}
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0) {
+				*ioapic = apic;
+				*ioapic_pin = mp_irqs[i].dstirq;
+				*trigger = irq_trigger(i);
+				*polarity = irq_polarity(i);
+				best_guess = irq;
+			}
+		}
+	}
+	return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 void lock_vector_lock(void)
 {
 	/* Used to the online set of cpus does not change
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fecbce6..a2f6bde 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1051,12 +1051,16 @@ static void __init pcibios_fixup_irqs(void)
 		 */
 		if (io_apic_assign_pci_irqs) {
 			int irq;
+			int ioapic = -1, ioapic_pin = -1;
+			int triggering, polarity;
 
 			/*
 			 * interrupt pins are numbered starting from 1
 			 */
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-				PCI_SLOT(dev->devfn), pin - 1);
+						PCI_SLOT(dev->devfn), pin - 1,
+						&ioapic, &ioapic_pin,
+						&triggering, &polarity);
 			/*
 			 * Busses behind bridges are typically not listed in the
 			 * MP-table.  In this case we have to look up the IRQ
@@ -1072,7 +1076,10 @@ static void __init pcibios_fixup_irqs(void)
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				bus = bridge->bus->number;
 				irq = IO_APIC_get_PCI_irq_vector(bus,
-						PCI_SLOT(bridge->devfn), pin - 1);
+						PCI_SLOT(bridge->devfn),
+						pin - 1,
+						&ioapic, &ioapic_pin,
+						&triggering, &polarity);
 				if (irq >= 0)
 					dev_warn(&dev->dev,
 						"using bridge %s INT %c to "
@@ -1221,8 +1228,14 @@ static int pirq_enable_irq(struct pci_dev *dev)
 
 		if (io_apic_assign_pci_irqs) {
 			int irq;
+			int ioapic = -1, ioapic_pin = -1;
+			int triggering, polarity;
 
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+						PCI_SLOT(dev->devfn),
+						pin - 1,
+						&ioapic, &ioapic_pin,
+						&triggering, &polarity);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1235,7 +1248,10 @@ static int pirq_enable_irq(struct pci_dev *dev)
 
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-						PCI_SLOT(bridge->devfn), pin - 1);
+						PCI_SLOT(bridge->devfn),
+						pin - 1,
+						&ioapic, &ioapic_pin,
+						&triggering, &polarity);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index dd18f85..ef53b05 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -153,45 +153,49 @@ int ibmphp_init_devno(struct slot **cur_slot)
 		return -1;
 	}
 	for (loop = 0; loop < len; loop++) {
-		if ((*cur_slot)->number == rtable->slots[loop].slot) {
-		if ((*cur_slot)->bus == rtable->slots[loop].bus) {
+		if ((*cur_slot)->number == rtable->slots[loop].slot &&
+		    (*cur_slot)->bus == rtable->slots[loop].bus) {
+			int ioapic = -1, ioapic_pin = -1;
+			int triggering, polarity;
+
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-						(int) (*cur_slot)->device, i);
-
-				debug("(*cur_slot)->irq[0] = %x\n",
-						(*cur_slot)->irq[0]);
-				debug("(*cur_slot)->irq[1] = %x\n",
-						(*cur_slot)->irq[1]);
-				debug("(*cur_slot)->irq[2] = %x\n",
-						(*cur_slot)->irq[2]);
-				debug("(*cur_slot)->irq[3] = %x\n",
-						(*cur_slot)->irq[3]);
-
-				debug("rtable->exlusive_irqs = %x\n",
+						(int) (*cur_slot)->device, i.
+						&ioapic, &ioapic_pin,
+						&triggering, &polarity);
+
+			debug("(*cur_slot)->irq[0] = %x\n",
+					(*cur_slot)->irq[0]);
+			debug("(*cur_slot)->irq[1] = %x\n",
+					(*cur_slot)->irq[1]);
+			debug("(*cur_slot)->irq[2] = %x\n",
+					(*cur_slot)->irq[2]);
+			debug("(*cur_slot)->irq[3] = %x\n",
+					(*cur_slot)->irq[3]);
+
+			debug("rtable->exlusive_irqs = %x\n",
 					rtable->exclusive_irqs);
-				debug("rtable->slots[loop].irq[0].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[0].bitmap = %x\n",
 					rtable->slots[loop].irq[0].bitmap);
-				debug("rtable->slots[loop].irq[1].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[1].bitmap = %x\n",
 					rtable->slots[loop].irq[1].bitmap);
-				debug("rtable->slots[loop].irq[2].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[2].bitmap = %x\n",
 					rtable->slots[loop].irq[2].bitmap);
-				debug("rtable->slots[loop].irq[3].bitmap = %x\n",
+			debug("rtable->slots[loop].irq[3].bitmap = %x\n",
 					rtable->slots[loop].irq[3].bitmap);
 
-				debug("rtable->slots[loop].irq[0].link = %x\n",
+			debug("rtable->slots[loop].irq[0].link = %x\n",
 					rtable->slots[loop].irq[0].link);
-				debug("rtable->slots[loop].irq[1].link = %x\n",
+			debug("rtable->slots[loop].irq[1].link = %x\n",
 					rtable->slots[loop].irq[1].link);
-				debug("rtable->slots[loop].irq[2].link = %x\n",
+			debug("rtable->slots[loop].irq[2].link = %x\n",
 					rtable->slots[loop].irq[2].link);
-				debug("rtable->slots[loop].irq[3].link = %x\n",
+			debug("rtable->slots[loop].irq[3].link = %x\n",
 					rtable->slots[loop].irq[3].link);
-				debug("end of init_devno\n");
-				kfree(rtable);
-				return 0;
-			}
+			debug("end of init_devno\n");
+			kfree(rtable);
+			return 0;
 		}
 	}
 
-- 
cgit v0.10.2


From 5ef2183768bb7d64b85eccbfa1537a61cbefa97c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:08:50 -0700
Subject: x86/acpi: move setup io apic routing out of CONFIG_ACPI scope

So we could set io apic routing when ACPI is not enabled.

[ Impact: prepare for new functionality ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A01C422.5070400@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 27bd2fd..6fd99f9 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,10 +154,10 @@ extern int timer_through_8259;
 extern int io_apic_get_unique_id(int ioapic, int apic_id);
 extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin,
-				  int irq, int edge_level, int active_high_low);
 #endif /* CONFIG_ACPI */
 
+extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin,
+				  int irq, int edge_level, int active_high_low);
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index caf9dbd..3a68dae 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3839,6 +3839,67 @@ int __init arch_probe_nr_irqs(void)
 }
 #endif
 
+static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
+				 int triggering, int polarity)
+{
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int node;
+
+	if (!IO_APIC_IRQ(irq)) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+			ioapic);
+		return -EINVAL;
+	}
+
+	if (dev)
+		node = dev_to_node(dev);
+	else
+		node = cpu_to_node(boot_cpu_id);
+
+	desc = irq_to_desc_alloc_node(irq, node);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= NR_IRQS_LEGACY) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, ioapic, pin);
+	}
+
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
+
+	return 0;
+}
+
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
+int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
+				 int triggering, int polarity)
+{
+
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
+	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+		pr_debug("Pin %d-%d already programmed\n",
+			 mp_ioapics[ioapic].apicid, pin);
+		return 0;
+	}
+	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+	return __io_apic_set_pci_routing(dev, ioapic, pin, irq,
+					 triggering, polarity);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3933,67 +3994,6 @@ int __init io_apic_get_version(int ioapic)
 }
 #endif
 
-static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
-				 int triggering, int polarity)
-{
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-	int node;
-
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	if (dev)
-		node = dev_to_node(dev);
-	else
-		node = cpu_to_node(boot_cpu_id);
-
-	desc = irq_to_desc_alloc_node(irq, node);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc %d\n", irq);
-		return 0;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= NR_IRQS_LEGACY) {
-		cfg = desc->chip_data;
-		add_pin_to_irq_node(cfg, node, ioapic, pin);
-	}
-
-	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-	return 0;
-}
-
-static struct {
-	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
-} mp_ioapic_routing[MAX_IO_APICS];
-
-int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
-				 int triggering, int polarity)
-{
-
-	/*
-	 * Avoid pin reprogramming.  PRTs typically include entries
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
-	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-		pr_debug("Pin %d-%d already programmed\n",
-			 mp_ioapics[ioapic].apicid, pin);
-		return 0;
-	}
-	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
-
-	return __io_apic_set_pci_routing(dev, ioapic, pin, irq,
-					 triggering, polarity);
-}
-
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
 	int i;
-- 
cgit v0.10.2


From b9c61b70075c87a8612624736faf4a2de5b1ed30 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 10:10:06 -0700
Subject: x86/pci: update pirq_enable_irq() to setup io apic routing

So we can set io apic routing only when enabling the device irq.

This is advantageous for IRQ descriptor allocation affinity: if we set up
the IO-APIC entry later, we have a chance to allocate the IRQ descriptor
later and know which device it is on and can set affinity accordingly.

[ Impact: standardize/enhance irq-enabling sequence for mptable irqs ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A01C46E.8000501@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3a68dae..5d5f412 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1480,9 +1480,13 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 	ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic_id, pin, idx, irq;
+	int apic_id = 0, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
@@ -1490,48 +1494,53 @@ static void __init setup_IO_APIC_irqs(void)
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-			idx = find_irq_entry(apic_id, pin, mp_INT);
-			if (idx == -1) {
-				if (!notcon) {
-					notcon = 1;
-					apic_printk(APIC_VERBOSE,
-						KERN_DEBUG " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				} else
-					apic_printk(APIC_VERBOSE, " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				continue;
-			}
-			if (notcon) {
-				apic_printk(APIC_VERBOSE,
-					" (apicid-pin) not connected\n");
-				notcon = 0;
-			}
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		apic_id = mp_find_ioapic(0);
+		if (apic_id < 0)
+			apic_id = 0;
+	}
+#endif
 
-			irq = pin_2_irq(idx, apic_id, pin);
+	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+		idx = find_irq_entry(apic_id, pin, mp_INT);
+		if (idx == -1) {
+			if (!notcon) {
+				notcon = 1;
+				apic_printk(APIC_VERBOSE,
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			} else
+				apic_printk(APIC_VERBOSE, " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			continue;
+		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				" (apicid-pin) not connected\n");
+			notcon = 0;
+		}
 
-			/*
-			 * Skip the timer IRQ if there's a quirk handler
-			 * installed and if it returns 1:
-			 */
-			if (apic->multi_timer_check &&
-					apic->multi_timer_check(apic_id, irq))
-				continue;
+		irq = pin_2_irq(idx, apic_id, pin);
 
-			desc = irq_to_desc_alloc_node(irq, node);
-			if (!desc) {
-				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-				continue;
-			}
-			cfg = desc->chip_data;
-			add_pin_to_irq_node(cfg, node, apic_id, pin);
+		/*
+		 * Skip the timer IRQ if there's a quirk handler
+		 * installed and if it returns 1:
+		 */
+		if (apic->multi_timer_check &&
+				apic->multi_timer_check(apic_id, irq))
+			continue;
 
-			setup_IO_APIC_irq(apic_id, pin, irq, desc,
-					irq_trigger(idx), irq_polarity(idx));
+		desc = irq_to_desc_alloc_node(irq, node);
+		if (!desc) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
 		}
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, apic_id, pin);
+		set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+		setup_IO_APIC_irq(apic_id, pin, irq, desc,
+				irq_trigger(idx), irq_polarity(idx));
 	}
 
 	if (notcon)
@@ -3876,10 +3885,6 @@ static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, in
 	return 0;
 }
 
-static struct {
-	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
-} mp_ioapic_routing[MAX_IO_APICS];
-
 int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 				 int triggering, int polarity)
 {
@@ -4023,51 +4028,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-	int pin, ioapic, irq, irq_entry;
+	int pin, ioapic = 0, irq, irq_entry;
 	struct irq_desc *desc;
-	struct irq_cfg *cfg;
 	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
 
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			desc = irq_to_desc(irq);
-			cfg = desc->chip_data;
-			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq, desc,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-				continue;
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		ioapic = mp_find_ioapic(0);
+		if (ioapic < 0)
+			ioapic = 0;
+	}
+#endif
 
-			}
+	for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+		if (irq_entry == -1)
+			continue;
+		irq = pin_2_irq(irq_entry, ioapic, pin);
 
-			/*
-			 * Honour affinities which have been set in early boot
-			 */
-			if (desc->status &
-			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
-			else
-				mask = apic->target_cpus();
+		desc = irq_to_desc(irq);
 
-			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, mask);
-			else
-				set_ioapic_affinity_irq_desc(desc, mask);
-		}
+		/*
+		 * Honour affinities which have been set in early boot
+		 */
+		if (desc->status &
+		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+			mask = desc->affinity;
+		else
+			mask = apic->target_cpus();
 
+		if (intr_remapping_enabled)
+			set_ir_ioapic_affinity_irq_desc(desc, mask);
+		else
+			set_ioapic_affinity_irq_desc(desc, mask);
 	}
+
 }
 #endif
 
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index a2f6bde..2f3e192 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		return 0;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return 0;
+
 	/* Find IRQ routing entry */
 
 	if (!pirq_table)
@@ -1039,63 +1042,15 @@ static void __init pcibios_fixup_irqs(void)
 		pirq_penalty[dev->irq]++;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return;
+
 	dev = NULL;
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 		if (!pin)
 			continue;
 
-#ifdef CONFIG_X86_IO_APIC
-		/*
-		 * Recalculate IRQ numbers if we use the I/O APIC.
-		 */
-		if (io_apic_assign_pci_irqs) {
-			int irq;
-			int ioapic = -1, ioapic_pin = -1;
-			int triggering, polarity;
-
-			/*
-			 * interrupt pins are numbered starting from 1
-			 */
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-						PCI_SLOT(dev->devfn), pin - 1,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
-			/*
-			 * Busses behind bridges are typically not listed in the
-			 * MP-table.  In this case we have to look up the IRQ
-			 * based on the parent bus, parent slot, and pin number.
-			 * The SMP code detects such bridged busses itself so we
-			 * should get into this branch reliably.
-			 */
-			if (irq < 0 && dev->bus->parent) {
-				/* go back to the bridge */
-				struct pci_dev *bridge = dev->bus->self;
-				int bus;
-
-				pin = pci_swizzle_interrupt_pin(dev, pin);
-				bus = bridge->bus->number;
-				irq = IO_APIC_get_PCI_irq_vector(bus,
-						PCI_SLOT(bridge->devfn),
-						pin - 1,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
-				if (irq >= 0)
-					dev_warn(&dev->dev,
-						"using bridge %s INT %c to "
-							"get IRQ %d\n",
-						 pci_name(bridge),
-						 'A' + pin - 1, irq);
-			}
-			if (irq >= 0) {
-				dev_info(&dev->dev,
-					"PCI->APIC IRQ transform: INT %c "
-						"-> IRQ %d\n",
-					'A' + pin - 1, irq);
-				dev->irq = irq;
-			}
-		}
-#endif
 		/*
 		 * Still no IRQ? Try to lookup one...
 		 */
@@ -1190,6 +1145,19 @@ int __init pcibios_irq_init(void)
 	pcibios_enable_irq = pirq_enable_irq;
 
 	pcibios_fixup_irqs();
+
+	if (io_apic_assign_pci_irqs && pci_routeirq) {
+		struct pci_dev *dev = NULL;
+		/*
+		 * PCI IRQ routing is set up by pci_enable_device(), but we
+		 * also do it here in case there are still broken drivers that
+		 * don't use pci_enable_device().
+		 */
+		printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+		for_each_pci_dev(dev)
+			pirq_enable_irq(dev);
+	}
+
 	return 0;
 }
 
@@ -1220,13 +1188,17 @@ void pcibios_penalize_isa_irq(int irq, int active)
 static int pirq_enable_irq(struct pci_dev *dev)
 {
 	u8 pin;
-	struct pci_dev *temp_dev;
 
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+	if (pin && !pcibios_lookup_irq(dev, 1)) {
 		char *msg = "";
 
+		if (!io_apic_assign_pci_irqs && dev->irq)
+			return 0;
+
 		if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+			struct pci_dev *temp_dev;
 			int irq;
 			int ioapic = -1, ioapic_pin = -1;
 			int triggering, polarity;
@@ -1261,12 +1233,16 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				io_apic_set_pci_routing(&dev->dev, ioapic,
+							ioapic_pin, irq,
+							triggering, polarity);
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
-				dev->irq = irq;
 				return 0;
 			} else
 				msg = "; probably buggy MP table";
+#endif
 		} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
 			msg = "";
 		else
-- 
cgit v0.10.2


From 61fe91e1319556f32bebfd7ed2c68ef02e2c17f7 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 9 May 2009 23:47:42 -0700
Subject: x86: apic: Check rev 3 fadt correctly for physical_apic bit

Impact: fix fadt version checking

FADT2_REVISION_ID has value 3 aka rev 3 FADT. So need to use >= instead
of >, as other places in the code do.

[ Impact: extend scope of APIC boot quirk ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 306e5e8..744e6d8 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	 * regardless of how many processors are present (x86_64 ES7000
 	 * is an example).
 	 */
-	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
 		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
 		printk(KERN_DEBUG "system APIC only can use physical flat");
 		return 1;
-- 
cgit v0.10.2


From 3e0c373749d7eb5b354ac0b043f2b2cdf84eefef Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 9 May 2009 23:47:42 -0700
Subject: x86: clean up and fix setup_clear/force_cpu_cap handling

setup_force_cpu_cap() only have one user (Xen guest code),
but it should not reuse cleared_cpu_cpus, otherwise it
will have problems on SMP.

Need to have a separate cpu_cpus_set array too, for forced-on
flags, beyond the forced-off flags.

Also need to setup handling before all cpus caps are combined.

[ Impact: fix the forced-set CPU feature flag logic ]

Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ccc1061..13cc6a5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -192,11 +192,11 @@ extern const char * const x86_power_flags[32];
 #define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
 #define setup_clear_cpu_cap(bit) do { \
 	clear_cpu_cap(&boot_cpu_data, bit);	\
-	set_bit(bit, (unsigned long *)cleared_cpu_caps); \
+	set_bit(bit, (unsigned long *)cpu_caps_cleared); \
 } while (0)
 #define setup_force_cpu_cap(bit) do { \
 	set_cpu_cap(&boot_cpu_data, bit);	\
-	clear_bit(bit, (unsigned long *)cleared_cpu_caps);	\
+	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 
 #define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c2cceae..fed93fe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -135,7 +135,8 @@ extern struct cpuinfo_x86	boot_cpu_data;
 extern struct cpuinfo_x86	new_cpu_data;
 
 extern struct tss_struct	doublefault_tss;
-extern __u32			cleared_cpu_caps[NCAPINTS];
+extern __u32			cpu_caps_cleared[NCAPINTS];
+extern __u32			cpu_caps_set[NCAPINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c4f6678..e7fd5c4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -292,7 +292,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
 	return NULL;		/* Not found */
 }
 
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
 
 void load_percpu_segment(int cpu)
 {
@@ -806,6 +807,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #endif
 
 	init_hypervisor(c);
+
+	/*
+	 * Clear/Set all flags overriden by options, need do it
+	 * before following smp all cpus cap AND.
+	 */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are
@@ -818,10 +829,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
-	/* Clear all flags overriden by options */
-	for (i = 0; i < NCAPINTS; i++)
-		c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
 #ifdef CONFIG_X86_MCE
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
-- 
cgit v0.10.2


From 1822952ba2b9f22f79019d07ebbeca31dc14b718 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 11 May 2009 17:56:07 +0900
Subject: block: let blk_end_request_all handle bidi requests

blk_end_request_all() and __blk_end_request_all() should finish all
bytes including bidi, by definition. That's what all bidi users need ,
bidi requests must be complete as a whole (partial completion is
impossible).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6e59d3b..1069f44 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -910,8 +910,12 @@ static inline bool blk_end_request(struct request *rq, int error,
 static inline void blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
+	unsigned int bidi_bytes = 0;
 
-	pending = blk_end_request(rq, error, blk_rq_bytes(rq));
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 
@@ -962,8 +966,12 @@ static inline bool __blk_end_request(struct request *rq, int error,
 static inline void __blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
 
-	pending = __blk_end_request(rq, error, blk_rq_bytes(rq));
+	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 
-- 
cgit v0.10.2


From e6bb7a96c2c36f20c05ef648f15bd3c2b1834c78 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 11 May 2009 17:56:08 +0900
Subject: scsi: simplify the bidi completion

Let's use blk_end_request_all() instead of blk_end_bidi_request().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b12750f..a54bec9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -673,33 +673,6 @@ void scsi_release_buffers(struct scsi_cmnd *cmd)
 EXPORT_SYMBOL(scsi_release_buffers);
 
 /*
- * Bidi commands Must be complete as a whole, both sides at once.  If
- * part of the bytes were written and lld returned scsi_in()->resid
- * and/or scsi_out()->resid this information will be left in
- * req->resid_len and req->next_rq->resid_len. The upper-layer driver
- * can decide what to do with this information.
- */
-static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
-{
-	struct request *req = cmd->request;
-
-	req->resid_len = scsi_out(cmd)->resid;
-	req->next_rq->resid_len = scsi_in(cmd)->resid;
-
-	/* The req and req->next_rq have not been completed */
-	BUG_ON(blk_end_bidi_request(req, 0, blk_rq_bytes(req),
-				    blk_rq_bytes(req->next_rq)));
-
-	scsi_release_buffers(cmd);
-
-	/*
-	 * This will goose the queue request function at the end, so we don't
-	 * need to worry about launching another command.
-	 */
-	scsi_next_command(cmd);
-}
-
-/*
  * Function:    scsi_io_completion()
  *
  * Purpose:     Completion processing for block device I/O requests.
@@ -772,12 +745,22 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			if (!sense_deferred)
 				error = -EIO;
 		}
+
+		req->resid_len = scsi_get_resid(cmd);
+
 		if (scsi_bidi_cmnd(cmd)) {
-			/* will also release_buffers */
-			scsi_end_bidi_request(cmd);
+			/*
+			 * Bidi commands Must be complete as a whole,
+			 * both sides at once.
+			 */
+			req->next_rq->resid_len = scsi_in(cmd)->resid;
+
+			blk_end_request_all(req, 0);
+
+			scsi_release_buffers(cmd);
+			scsi_next_command(cmd);
 			return;
 		}
-		req->resid_len = scsi_get_resid(cmd);
 	}
 
 	BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
-- 
cgit v0.10.2


From b1f744937f1be3e6d3009382a755679133cf782d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 11 May 2009 17:56:09 +0900
Subject: block: move completion related functions back to blk-core.c

Let's put the completion related functions back to block/blk-core.c
where they have lived. We can also unexport blk_end_bidi_request() and
__blk_end_bidi_request(), which nobody uses.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 93691d2..a2d97de 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2026,8 +2026,8 @@ static void blk_finish_request(struct request *req, int error)
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_bidi_request(struct request *rq, int error,
-			  unsigned int nr_bytes, unsigned int bidi_bytes)
+static bool blk_end_bidi_request(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
@@ -2041,7 +2041,6 @@ bool blk_end_bidi_request(struct request *rq, int error,
 
 	return false;
 }
-EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
@@ -2058,8 +2057,8 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_bidi_request(struct request *rq, int error,
-			    unsigned int nr_bytes, unsigned int bidi_bytes)
+static bool __blk_end_bidi_request(struct request *rq, int error,
+				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
@@ -2068,7 +2067,124 @@ bool __blk_end_bidi_request(struct request *rq, int error,
 
 	return false;
 }
-EXPORT_SYMBOL_GPL(__blk_end_bidi_request);
+
+/**
+ * blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	return blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+EXPORT_SYMBOL_GPL(blk_end_request);
+
+/**
+ * blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.
+ */
+void blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+	BUG_ON(pending);
+}
+EXPORT_SYMBOL_GPL(blk_end_request_all);
+
+/**
+ * blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool blk_end_request_cur(struct request *rq, int error)
+{
+	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(blk_end_request_cur);
+
+/**
+ * __blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Must be called with queue lock held unlike blk_end_request().
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+EXPORT_SYMBOL_GPL(__blk_end_request);
+
+/**
+ * __blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.  Must be called with queue lock held.
+ */
+void __blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+	BUG_ON(pending);
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_all);
+
+/**
+ * __blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.  Must
+ *     be called with queue lock held.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool __blk_end_request_cur(struct request *rq, int error)
+{
+	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_cur);
 
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1069f44..f9d60a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -872,126 +872,14 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  */
 extern bool blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
-extern bool blk_end_bidi_request(struct request *rq, int error,
-				 unsigned int nr_bytes,
-				 unsigned int bidi_bytes);
-extern bool __blk_end_bidi_request(struct request *rq, int error,
-				   unsigned int nr_bytes,
-				   unsigned int bidi_bytes);
-
-/**
- * blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- **/
-static inline bool blk_end_request(struct request *rq, int error,
-				   unsigned int nr_bytes)
-{
-	return blk_end_bidi_request(rq, error, nr_bytes, 0);
-}
-
-/**
- * blk_end_request_all - Helper function for drives to finish the request.
- * @rq: the request to finish
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Completely finish @rq.
- */
-static inline void blk_end_request_all(struct request *rq, int error)
-{
-	bool pending;
-	unsigned int bidi_bytes = 0;
-
-	if (unlikely(blk_bidi_rq(rq)))
-		bidi_bytes = blk_rq_bytes(rq->next_rq);
-
-	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
-	BUG_ON(pending);
-}
-
-/**
- * blk_end_request_cur - Helper function to finish the current request chunk.
- * @rq: the request to finish the current chunk for
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Complete the current consecutively mapped chunk from @rq.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-static inline bool blk_end_request_cur(struct request *rq, int error)
-{
-	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
-}
-
-/**
- * __blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Must be called with queue lock held unlike blk_end_request().
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- **/
-static inline bool __blk_end_request(struct request *rq, int error,
-				     unsigned int nr_bytes)
-{
-	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
-}
-
-/**
- * __blk_end_request_all - Helper function for drives to finish the request.
- * @rq: the request to finish
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Completely finish @rq.  Must be called with queue lock held.
- */
-static inline void __blk_end_request_all(struct request *rq, int error)
-{
-	bool pending;
-	unsigned int bidi_bytes = 0;
-
-	if (unlikely(blk_bidi_rq(rq)))
-		bidi_bytes = blk_rq_bytes(rq->next_rq);
-
-	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
-	BUG_ON(pending);
-}
-
-/**
- * __blk_end_request_cur - Helper function to finish the current request chunk.
- * @rq: the request to finish the current chunk for
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Complete the current consecutively mapped chunk from @rq.  Must
- *     be called with queue lock held.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-static inline bool __blk_end_request_cur(struct request *rq, int error)
-{
-	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
-}
+extern bool blk_end_request(struct request *rq, int error,
+			    unsigned int nr_bytes);
+extern void blk_end_request_all(struct request *rq, int error);
+extern bool blk_end_request_cur(struct request *rq, int error);
+extern bool __blk_end_request(struct request *rq, int error,
+			      unsigned int nr_bytes);
+extern void __blk_end_request_all(struct request *rq, int error);
+extern bool __blk_end_request_cur(struct request *rq, int error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
-- 
cgit v0.10.2


From 80989ce0643c1034822f3e339ed8d790b649abe1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 9 May 2009 23:47:42 -0700
Subject: x86: clean up and and print out initial max_pfn_mapped

Do this so we can check the range that is mapped before
init_memory_mapping().

To be able to print out meaningful info, we first have to fix
64-bit to have max_pfn_mapped assigned before that call. This
also unifies the code-path a bit.

[ Impact: print more debug info, cleanup ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <49BF0978.40605@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0d77e56..4031d6c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -862,12 +862,16 @@ void __init setup_arch(char **cmdline_p)
 		max_low_pfn = max_pfn;
 
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
 #endif
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 	setup_bios_corruption_check();
 #endif
 
+	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+			max_pfn_mapped<<PAGE_SHIFT);
+
 	reserve_brk();
 
 	/* max_pfn_mapped is updated here */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 95f5ecf..92d2108 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -132,12 +132,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 	 */
 #ifdef CONFIG_X86_32
 	start = 0x7000;
-	e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
-					tables, PAGE_SIZE);
-#else /* CONFIG_X86_64 */
+#else
 	start = 0x8000;
-	e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
 #endif
+	e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+					tables, PAGE_SIZE);
 	if (e820_table_start == -1UL)
 		panic("Cannot find space for the kernel page tables");
 
-- 
cgit v0.10.2


From 4401da6111ac58f94234417427d06a72c4048c74 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 2 May 2009 10:40:57 -0700
Subject: x86: read apic ID in the !acpi_lapic case

Ed found that on 32-bit, boot_cpu_physical_apicid is not read right,
when the mptable is broken.

Interestingly, actually three paths use/set it:

 1. acpi: at that time that is already read from reg
 2. mptable: only read from mptable
 3. no madt, and no mptable, that use default apic id 0 for 64-bit, -1 for 32-bit

so we could read the apic id for the 2/3 path. We trust the hardware
register more than we trust a BIOS data structure (the mptable).

We can also avoid the double set_fixmap() when acpi_lapic
is used, and also need to move cpu_has_apic earlier and
call apic_disable().

Also when need to update the apic id, we'd better read and
set the apic version as well - so that quirks are applied precisely.

v2: make path 3 with 64bit, use -1 as apic id, so could read it later.
v3: fix whitespace problem pointed out by Ed Swierk

[ Impact: get correct apic id for bsp other than acpi path ]

Reported-by: Ed Swierk <eswierk@aristanetworks.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <49FC85A9.2070702@kernel.org>
[ v4: sanity-check in the ACPI case too ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e258bed..1ee966f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1456,7 +1456,6 @@ static int __init detect_init_APIC(void)
 	}
 
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_physical_apicid = 0;
 	return 0;
 }
 #else
@@ -1570,6 +1569,8 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
+	unsigned int new_apicid;
+
 	if (x2apic_mode) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
@@ -1586,21 +1587,31 @@ void __init init_apic_mappings(void)
 	} else
 		apic_phys = mp_lapic_addr;
 
-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+	/* lets check if we may NOP'ify apic operations */
+	if (!cpu_has_apic) {
+		pr_info("APIC: disable apic facility\n");
+		apic_disable();
+		return;
+	}
+
+	/*
+	 * acpi lapic path already maps that address in
+	 * acpi_register_lapic_address()
+	 */
+	if (!acpi_lapic)
+		set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+
 	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
 				APIC_BASE, apic_phys);
-
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = read_apic_id();
-
-	/* lets check if we may to NOP'ify apic operations */
-	if (!cpu_has_apic) {
-		pr_info("APIC: disable apic facility\n");
-		apic_disable();
+	new_apicid = read_apic_id();
+	if (boot_cpu_physical_apicid != new_apicid) {
+		boot_cpu_physical_apicid = new_apicid;
+		apic_version[new_apicid] =
+			 GET_APIC_VERSION(apic_read(APIC_LVR));
 	}
 }
 
-- 
cgit v0.10.2


From b37ab91907e9002925f4217e3bbd496aa12c2fa3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 8 May 2009 00:36:44 -0700
Subject: x86: Sanity check the e820 against the SRAT table using e820 map only

node_cover_memory() sanity checks the SRAT table by ensuring that all
PXMs cover the memory reported in the e820.

However, when calculating the size of the holes in the e820, it uses
the early_node_map[] which contains information taken from both SRAT
and e820. If the SRAT is missing an entry, then it is not detected
that the SRAT table is incorrect and missing entries.

This patch uses the e820 map to calculate the holes instead of
early_node_map[].

comment is from Mel.

[ Impact: reject incorrect SRAT tables ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A03E10C.60906@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 0176595..c7a18aa 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -345,7 +345,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 			pxmram = 0;
 	}
 
-	e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
+	e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
 	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
 	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
 		printk(KERN_ERR
-- 
cgit v0.10.2


From 0964b0562bb9c93194e852b47bab2397b9e11c18 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 8 May 2009 00:37:34 -0700
Subject: x86: Allow 1MB of slack between the e820 map and SRAT, not 4GB

It is expected that there might be slight differences between the e820
map and the SRAT table and the intention was that 1MB of slack be allowed.

The calculation comparing e820ram and pxmram assumes the units are bytes,
when they are in fact pages. This means 4GB of slack is being allowed,
not 1MB. This patch makes the correct comparison.

comment is from Mel.

[ Impact: don't accept buggy SRATs that could dump up to 4G of RAM ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A03E13E.6050107@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index c7a18aa..87b45bf 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -346,8 +346,8 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 	}
 
 	e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
-	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
-	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
+	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+	if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
 		printk(KERN_ERR
 	"SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
 			(pxmram << PAGE_SHIFT) >> 20,
-- 
cgit v0.10.2


From 917a0153621572e88aeb2d5df025ad2e81027287 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 6 May 2009 21:36:16 -0700
Subject: x86: mtrr: Fix high_width computation when phys-addr is >= 44bit

found one system where cpu address line is 44bits, mtrr printout
is not right:

 [    0.000000] MTRR variable ranges enabled:
 [    0.000000]   0 base 0   00000000 mask FF0 00000000 write-back
 [    0.000000]   1 base 10  00000000 mask FFF 80000000 write-back
 [    0.000000]   2 base 0   80000000 mask FFF 80000000 uncachable
 [    0.000000]   3 base 0   7F800000 mask FFF FF800000 uncachable

Li Zefan and Frederic pointed out the high_width could be -4 some how.

It turns out when phys_addr is 44bit, size_or_mask will be
ffffffff,00000000 so ffs(size_or_mask) will be 0.

Try to check low 32 bit, to get correct high_width.

Signed-off-by: Yinghai Lu <yinghai@kerne.org>
Also-analyzed-by: Frederic Weisbecker <fweisbec@gmail.com>
Also-analyzed-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A026540.8060504@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0b776c0..d21d4fb 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -275,7 +275,11 @@ static void __init print_mtrr_state(void)
 	}
 	printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
 	       mtrr_state.enabled & 2 ? "en" : "dis");
-	high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+	if (size_or_mask & 0xffffffffUL)
+		high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
+	else
+		high_width = ffs(size_or_mask>>32) + 32 - 1;
+	high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
 	for (i = 0; i < num_var_ranges; ++i) {
 		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
 			printk(KERN_DEBUG "  %u base %0*X%05X000 mask %0*X%05X000 %s\n",
-- 
cgit v0.10.2


From 53c0054c3f11b49fc09f24e46f58661def952728 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 11 May 2009 08:45:27 +0000
Subject: sh: include empty_zero_page in text

Include empty_zero_page in _text. This fixes a problem
introduced by c3e2586b794b12ffcdf69b4e547030b51e18e6d9
which results in broken boot on R2D-Plus.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index d737230..f53c76a 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -26,12 +26,13 @@ SECTIONS
 	. = CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START + CONFIG_ZERO_PAGE_OFFSET;
 #endif
 
+	_text = .;		/* Text and read-only data */
+
 	.empty_zero_page : AT(ADDR(.empty_zero_page) - LOAD_OFFSET) {
 		*(.empty_zero_page)
 	} = 0
 
 	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;		/* Text and read-only data */
 		HEAD_TEXT
 		TEXT_TEXT
 
-- 
cgit v0.10.2


From 03f408f1aad8d0f5eb6380732bffc0f72b250fa7 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 11 May 2009 08:54:54 +0000
Subject: sh: TMU platform data for sh775x

This patch adds TMU platform data for sh775x. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index a1c80d9..09da0c1 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/io.h>
+#include <linux/sh_timer.h>
 #include <linux/serial_sci.h>
 
 static struct resource rtc_resources[] = {
@@ -60,9 +61,177 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+/* SH7750R, SH7751 and SH7751R all have two extra timer channels */
+#if defined(CONFIG_CPU_SUBTYPE_SH7750R) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751R)
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xfe100008,
+		.end	= 0xfe100013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 72,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xfe100014,
+		.end	= 0xfe10001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 76,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+#endif
+
 static struct platform_device *sh7750_devices[] __initdata = {
 	&rtc_device,
 	&sci_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+#if defined(CONFIG_CPU_SUBTYPE_SH7750R) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751R)
+	&tmu3_device,
+	&tmu4_device,
+#endif
 };
 
 static int __init sh7750_devices_setup(void)
@@ -72,6 +241,24 @@ static int __init sh7750_devices_setup(void)
 }
 __initcall(sh7750_devices_setup);
 
+static struct platform_device *sh7750_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+#if defined(CONFIG_CPU_SUBTYPE_SH7750R) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751) || \
+	defined(CONFIG_CPU_SUBTYPE_SH7751R)
+	&tmu3_device,
+	&tmu4_device,
+#endif
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7750_early_devices,
+				   ARRAY_SIZE(sh7750_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From c42f32dca3855d8f867387ec6993d9b62516a00e Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 11 May 2009 09:06:24 +0000
Subject: sh: TMU platform data for sh7760

This patch adds TMU platform data for sh7760. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index d9bdc93..4a6fd0d 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -10,6 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/sh_timer.h>
 #include <linux/serial_sci.h>
 #include <linux/io.h>
 
@@ -168,8 +169,104 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+
 static struct platform_device *sh7760_devices[] __initdata = {
 	&sci_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 static int __init sh7760_devices_setup(void)
@@ -179,6 +276,18 @@ static int __init sh7760_devices_setup(void)
 }
 __initcall(sh7760_devices_setup);
 
+static struct platform_device *sh7760_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7760_early_devices,
+				   ARRAY_SIZE(sh7760_early_devices));
+}
+
 #define INTC_ICR	0xffd00000UL
 #define INTC_ICR_IRLM	(1 << 7)
 
-- 
cgit v0.10.2


From 3551f88f6439cf4da3f5a3747b320280e30500de Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 7 May 2009 15:35:41 +0300
Subject: x86: unify 64-bit UMA and NUMA paging_init()

64-bit UMA and NUMA versions of paging_init() are almost identical.
Therefore, merge the copy in mm/numa_64.c to mm/init_64.c to remove
duplicate code.

[ Impact: cleanup ]

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <1241699741.17846.30.camel@penberg-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6a1a573..be7e127 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -585,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
 	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
+#endif
 
 void __init paging_init(void)
 {
@@ -595,11 +596,14 @@ void __init paging_init(void)
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
+#ifdef CONFIG_NUMA
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
+#else
 	memory_present(0, 0, max_pfn);
+#endif
 	sparse_init();
 	free_area_init_nodes(max_zone_pfns);
 }
-#endif
 
 /*
  * Memory hotplug specific functions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 2d05a12..fb61d81 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -578,21 +578,6 @@ unsigned long __init numa_free_all_bootmem(void)
 	return pages;
 }
 
-void __init paging_init(void)
-{
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
-	max_zone_pfns[ZONE_NORMAL] = max_pfn;
-
-	sparse_memory_present_with_active_regions(MAX_NUMNODES);
-	sparse_init();
-
-	free_area_init_nodes(max_zone_pfns);
-}
-
 static __init int numa_setup(char *opt)
 {
 	if (!opt)
-- 
cgit v0.10.2


From 087fa4e964e04371a67f340e1f6ac92c5db5e0a6 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 7 May 2009 15:35:42 +0300
Subject: x86: use sparse_memory_present_with_active_regions() on UMA

There's no need to use call memory_present() manually on UMA because
initmem_init() sets up early_node_map by calling
e820_register_active_regions().

[ Impact: cleanup ]

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <1241699742.17846.31.camel@penberg-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index be7e127..52bb951 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -596,11 +596,7 @@ void __init paging_init(void)
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-#ifdef CONFIG_NUMA
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
-#else
-	memory_present(0, 0, max_pfn);
-#endif
 	sparse_init();
 	free_area_init_nodes(max_zone_pfns);
 }
-- 
cgit v0.10.2


From 8823392360dc4992f87bf4c623834d315f297493 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Sun, 10 May 2009 10:53:05 +0200
Subject: perf_counter, x86: clean up throttling printk

s/PERFMON/perfcounters for perfcounter interrupt throttling warning.

'perfmon' is the CPU feature name that is Intel-only, while we do
throttling in a generic way.

[ Impact: cleanup ]

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a6878b0..da27419 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -814,7 +814,7 @@ void perf_counter_unthrottle(void)
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
 		if (printk_ratelimit())
-			printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
+			printk(KERN_WARNING "perfcounters: max interrupts exceeded!\n");
 		hw_perf_restore(cpuc->throttle_ctrl);
 	}
 	cpuc->interrupts = 0;
-- 
cgit v0.10.2


From 6751b71ea2c7ab8c0d65f01973a3fc8ea16992f4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 11 May 2009 12:08:02 +1000
Subject: perf_counter: Put whole group on when enabling group leader

Currently, if you have a group where the leader is disabled and there
are siblings that are enabled, and then you enable the leader, we only
put the leader on the PMU, and not its enabled siblings.  This is
incorrect, since the enabled group members should be all on or all off
at any given point.

This fixes it by adding a call to group_sched_in in
__perf_counter_enable in the case where we're enabling a group leader.

To avoid the need for a forward declaration this also moves
group_sched_in up before __perf_counter_enable.  The actual content of
group_sched_in is unchanged by this patch.

[ Impact: fix bug in counter enable code ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18951.34946.451546.691693@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d850a1f..a5bdc93 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -419,6 +419,54 @@ counter_sched_in(struct perf_counter *counter,
 	return 0;
 }
 
+static int
+group_sched_in(struct perf_counter *group_counter,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx,
+	       int cpu)
+{
+	struct perf_counter *counter, *partial_group;
+	int ret;
+
+	if (group_counter->state == PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
+
+	group_counter->prev_state = group_counter->state;
+	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
+		return -EAGAIN;
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		counter->prev_state = counter->state;
+		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
+			partial_group = counter;
+			goto group_error;
+		}
+	}
+
+	return 0;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		if (counter == partial_group)
+			break;
+		counter_sched_out(counter, cpuctx, ctx);
+	}
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	return -EAGAIN;
+}
+
 /*
  * Return 1 for a group consisting entirely of software counters,
  * 0 if the group contains any hardware counters.
@@ -643,6 +691,9 @@ static void __perf_counter_enable(void *info)
 
 	if (!group_can_go_on(counter, cpuctx, 1))
 		err = -EEXIST;
+	else if (counter == leader)
+		err = group_sched_in(counter, cpuctx, ctx,
+				     smp_processor_id());
 	else
 		err = counter_sched_in(counter, cpuctx, ctx,
 				       smp_processor_id());
@@ -791,54 +842,6 @@ static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
 	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
 }
 
-static int
-group_sched_in(struct perf_counter *group_counter,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx,
-	       int cpu)
-{
-	struct perf_counter *counter, *partial_group;
-	int ret;
-
-	if (group_counter->state == PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
-	if (ret)
-		return ret < 0 ? ret : 0;
-
-	group_counter->prev_state = group_counter->state;
-	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
-		return -EAGAIN;
-
-	/*
-	 * Schedule in siblings as one group (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
-		counter->prev_state = counter->state;
-		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
-			partial_group = counter;
-			goto group_error;
-		}
-	}
-
-	return 0;
-
-group_error:
-	/*
-	 * Groups can be scheduled in as one unit only, so undo any
-	 * partial group before returning:
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
-		if (counter == partial_group)
-			break;
-		counter_sched_out(counter, cpuctx, ctx);
-	}
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	return -EAGAIN;
-}
-
 static void
 __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
-- 
cgit v0.10.2


From a08b159fc243dbfe415250466d24cfc5010deee5 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 11 May 2009 15:46:10 +1000
Subject: perf_counter: don't count scheduler ticks as context switches

The context-switch software counter gives inflated values at present
because each scheduler tick and each process-wide counter
enable/disable prctl gets counted as a context switch.

This happens because perf_counter_task_tick, perf_counter_task_disable
and perf_counter_task_enable all call perf_counter_task_sched_out,
which calls perf_swcounter_event to record a context switch event.

This fixes it by introducing a variant of perf_counter_task_sched_out
with two underscores in front for internal use within the perf_counter
code, and makes perf_counter_task_{tick,disable,enable} call it.  This
variant doesn't record a context switch event, and takes a struct
perf_counter_context *.  This adds the new variant rather than
changing the behaviour or interface of perf_counter_task_sched_out
because that is called from other code.

[ Impact: fix inflated context-switch event counts ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18951.48034.485580.498953@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a5bdc93..7373b96 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -837,6 +837,14 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 	cpuctx->task_ctx = NULL;
 }
 
+static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	__perf_counter_sched_out(ctx, cpuctx);
+	cpuctx->task_ctx = NULL;
+}
+
 static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
 {
 	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
@@ -943,15 +951,13 @@ int perf_counter_task_disable(void)
 	struct perf_counter *counter;
 	unsigned long flags;
 	u64 perf_flags;
-	int cpu;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
 
 	local_irq_save(flags);
-	cpu = smp_processor_id();
 
-	perf_counter_task_sched_out(curr, cpu);
+	__perf_counter_task_sched_out(ctx);
 
 	spin_lock(&ctx->lock);
 
@@ -989,7 +995,7 @@ int perf_counter_task_enable(void)
 	local_irq_save(flags);
 	cpu = smp_processor_id();
 
-	perf_counter_task_sched_out(curr, cpu);
+	__perf_counter_task_sched_out(ctx);
 
 	spin_lock(&ctx->lock);
 
@@ -1054,7 +1060,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	ctx = &curr->perf_counter_ctx;
 
 	perf_counter_cpu_sched_out(cpuctx);
-	perf_counter_task_sched_out(curr, cpu);
+	__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
 	rotate_ctx(ctx);
-- 
cgit v0.10.2


From 615a3f1e055ac9b0ae74e1f935a12ea2cfe2a2ad Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 11 May 2009 15:50:21 +1000
Subject: perf_counter: call atomic64_set for counter->count

A compile warning triggered because we are calling
atomic_set(&counter->count). But since counter->count
is an atomic64_t, we have to use atomic64_set.

So the count can be set short, resulting in the reset ioctl
only resetting the low word.

[ Impact: clear counter properly during the reset ioctl ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18951.48285.270311.981806@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7373b96..5ea0240 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1281,7 +1281,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 static void perf_counter_reset(struct perf_counter *counter)
 {
 	(void)perf_counter_read(counter);
-	atomic_set(&counter->count, 0);
+	atomic64_set(&counter->count, 0);
 	perf_counter_update_userpage(counter);
 }
 
-- 
cgit v0.10.2


From 049862579333cc6cd9e6edfd6987cd0addfd8c59 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 11 May 2009 14:33:23 +0800
Subject: blktrace: pdu_buf of pc events should be unsigned

I got this:
  8,0    1   305.417782332  2037  I   R 32 (ffffff9e 10 00 ...) [bash]

It should be:
  8,0    1   305.417782332  2037  I   R 32 (9e 10 00 ...) [bash]

[ Impact: fix output of pc events ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A07C6B3.9080802@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e099f8c..05b4747 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1065,7 +1065,7 @@ static int blk_log_action(struct trace_iterator *iter, const char *act)
 
 static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
 {
-	const char *pdu_buf;
+	const unsigned char *pdu_buf;
 	int pdu_len;
 	int i, end, ret;
 
-- 
cgit v0.10.2


From 79c5d3ce614d8fe706545c7bca2158b63db6bb5e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 11 May 2009 15:06:46 +0800
Subject: blktrace: from-sector redundant in trace_block_remap, cleanup

The last argument of block_remap prober is the original sector
before remap, so it should be 'from', not 'to'.

[ Impact: clean up ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
LKML-Reference: <4A07CE86.5090301@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/trace/block.h b/include/trace/block.h
index 8ac945b..5b12efa 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
 
 DECLARE_TRACE(block_remap,
 	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t to),
-	      TP_ARGS(q, bio, dev, to));
+		 sector_t from),
+	      TP_ARGS(q, bio, dev, from));
 
 #endif
-- 
cgit v0.10.2


From c969f58ca43fc403c75f5d3da4cf1e21de7afaa0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 7 Apr 2009 14:13:01 +0100
Subject: GFS2: Update the rw flags

After Jens recent updates:
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=a1f242524c3c1f5d40f1c9c343427e34d1aadd6e
et al. this is a patch to bring gfs2 uptodate with the core
code. Also I've managed to squash another call to ll_rw_block()
along the way.

There is still one part of the GFS2 I/O paths which are not correctly
annotated and that is due to the sharing of the writeback code between
the data and metadata address spaces. I would like to change that too,
but this patch is still worth doing on its own, I think.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 98918a7..aa62cf5 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -120,7 +120,7 @@ __acquires(&sdp->sd_log_lock)
 			lock_buffer(bh);
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
-				submit_bh(WRITE, bh);
+				submit_bh(WRITE_SYNC_PLUG, bh);
 			} else {
 				unlock_buffer(bh);
 				brelse(bh);
@@ -604,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
 		goto skip_barrier;
 	get_bh(bh);
-	submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
+	submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
 	wait_on_buffer(bh);
 	if (buffer_eopnotsupp(bh)) {
 		clear_buffer_eopnotsupp(bh);
@@ -664,7 +664,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 		lock_buffer(bh);
 		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
 			bh->b_end_io = end_buffer_write_sync;
-			submit_bh(WRITE, bh);
+			submit_bh(WRITE_SYNC_PLUG, bh);
 		} else {
 			unlock_buffer(bh);
 			brelse(bh);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 80e4f5f..00315f5 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -13,6 +13,8 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/bio.h>
+#include <linux/fs.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -189,7 +191,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 		}
 
 		gfs2_log_unlock(sdp);
-		submit_bh(WRITE, bh);
+		submit_bh(WRITE_SYNC_PLUG, bh);
 		gfs2_log_lock(sdp);
 
 		n = 0;
@@ -199,7 +201,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
 			gfs2_log_unlock(sdp);
 			lock_buffer(bd2->bd_bh);
 			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
-			submit_bh(WRITE, bh);
+			submit_bh(WRITE_SYNC_PLUG, bh);
 			gfs2_log_lock(sdp);
 			if (++n >= num)
 				break;
@@ -341,7 +343,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 		sdp->sd_log_num_revoke--;
 
 		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-			submit_bh(WRITE, bh);
+			submit_bh(WRITE_SYNC_PLUG, bh);
 
 			bh = gfs2_log_get_buf(sdp);
 			mh = (struct gfs2_meta_header *)bh->b_data;
@@ -358,7 +360,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 	}
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 
-	submit_bh(WRITE, bh);
+	submit_bh(WRITE_SYNC_PLUG, bh);
 }
 
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -560,7 +562,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	ptr = bh_log_ptr(bh);
 	
 	get_bh(bh);
-	submit_bh(WRITE, bh);
+	submit_bh(WRITE_SYNC_PLUG, bh);
 	gfs2_log_lock(sdp);
 	while(!list_empty(list)) {
 		bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -586,7 +588,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		} else {
 			bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
 		}
-		submit_bh(WRITE, bh1);
+		submit_bh(WRITE_SYNC_PLUG, bh1);
 		gfs2_log_lock(sdp);
 		ptr += 2;
 	}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 8d6f132..75b2aec 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,16 +201,32 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		   struct buffer_head **bhp)
 {
-	*bhp = gfs2_getbuf(gl, blkno, CREATE);
-	if (!buffer_uptodate(*bhp)) {
-		ll_rw_block(READ_META, 1, bhp);
-		if (flags & DIO_WAIT) {
-			int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
-			if (error) {
-				brelse(*bhp);
-				return error;
-			}
-		}
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct buffer_head *bh;
+
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		return -EIO;
+
+	*bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
+
+	lock_buffer(bh);
+	if (buffer_uptodate(bh)) {
+		unlock_buffer(bh);
+		return 0;
+	}
+	bh->b_end_io = end_buffer_read_sync;
+	get_bh(bh);
+	submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+	if (!(flags & DIO_WAIT))
+		return 0;
+
+	wait_on_buffer(bh);
+	if (unlikely(!buffer_uptodate(bh))) {
+		struct gfs2_trans *tr = current->journal_info;
+		if (tr && tr->tr_touched)
+			gfs2_io_error_bh(sdp, bh);
+		brelse(bh);
+		return -EIO;
 	}
 
 	return 0;
@@ -404,7 +420,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(READ_META, 1, &first_bh);
+		ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
 
 	dblock++;
 	extlen--;
-- 
cgit v0.10.2


From 4a0f9a321a113392b448e477018311d14fba2b34 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 20 Apr 2009 08:16:26 +0100
Subject: GFS2: Optimise writepage for metadata

This adds a GFS2 specific writepage for metadata, rather than
continuing to use the VFS function. As a result we now tag all
our metadata I/O with the correct flag so that blktraces will
now be less confusing.

Also, the generic function was checking for a number of corner
cases which cannot happen on the metadata address spaces so that
this should be faster too.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 75b2aec..78a5f43 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -33,17 +33,65 @@
 #include "util.h"
 #include "ops_address.h"
 
-static int aspace_get_block(struct inode *inode, sector_t lblock,
-			    struct buffer_head *bh_result, int create)
+static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
 {
-	gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
-	return -EOPNOTSUPP;
-}
+	int err;
+	struct buffer_head *bh, *head;
+	int nr_underway = 0;
+	int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
+			WRITE_SYNC_PLUG : WRITE));
+
+	BUG_ON(!PageLocked(page));
+	BUG_ON(!page_has_buffers(page));
+
+	head = page_buffers(page);
+	bh = head;
+
+	do {
+		if (!buffer_mapped(bh))
+			continue;
+		/*
+		 * If it's a fully non-blocking write attempt and we cannot
+		 * lock the buffer then redirty the page.  Note that this can
+		 * potentially cause a busy-wait loop from pdflush and kswapd
+		 * activity, but those code paths have their own higher-level
+		 * throttling.
+		 */
+		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+			lock_buffer(bh);
+		} else if (!trylock_buffer(bh)) {
+			redirty_page_for_writepage(wbc, page);
+			continue;
+		}
+		if (test_clear_buffer_dirty(bh)) {
+			mark_buffer_async_write(bh);
+		} else {
+			unlock_buffer(bh);
+		}
+	} while ((bh = bh->b_this_page) != head);
+
+	/*
+	 * The page and its buffers are protected by PageWriteback(), so we can
+	 * drop the bh refcounts early.
+	 */
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+
+	do {
+		struct buffer_head *next = bh->b_this_page;
+		if (buffer_async_write(bh)) {
+			submit_bh(write_op, bh);
+			nr_underway++;
+		}
+		bh = next;
+	} while (bh != head);
+	unlock_page(page);
 
-static int gfs2_aspace_writepage(struct page *page,
-				 struct writeback_control *wbc)
-{
-	return block_write_full_page(page, aspace_get_block, wbc);
+	err = 0;
+	if (nr_underway == 0)
+		end_page_writeback(page);
+
+	return err;
 }
 
 static const struct address_space_operations aspace_aops = {
-- 
cgit v0.10.2


From 48bf2b1711dc498494e77705c415ee46bb508fd9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 29 Apr 2009 13:59:35 +0100
Subject: GFS2: Something nonlinear this way comes!

For some reason GFS2 has been missing support for non-linear
mappings. This patch fixes that, and also avoids taking any
locks for mmap in the O_NOATIME case. In fact we don't actually need
to take the lock here at all - just doing file_accessed() would be
enough, but we have to take the lock eventually and this helps
it hit disk (and thus be seen by other nodes) faster.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 5d82e91..0ee7bd2 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -425,33 +425,36 @@ static struct vm_operations_struct gfs2_vm_ops = {
 	.page_mkwrite = gfs2_page_mkwrite,
 };
 
-
 /**
  * gfs2_mmap -
  * @file: The file to map
  * @vma: The VMA which described the mapping
  *
- * Returns: 0 or error code
+ * There is no need to get a lock here unless we should be updating
+ * atime. We ignore any locking errors since the only consequence is
+ * a missed atime update (which will just be deferred until later).
+ *
+ * Returns: 0
  */
 
 static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	struct gfs2_holder i_gh;
-	int error;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-	error = gfs2_glock_nq(&i_gh);
-	if (error) {
-		gfs2_holder_uninit(&i_gh);
-		return error;
-	}
+	if (!(file->f_flags & O_NOATIME)) {
+		struct gfs2_holder i_gh;
+		int error;
 
+		gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+		error = gfs2_glock_nq(&i_gh);
+		file_accessed(file);
+		if (error == 0)
+			gfs2_glock_dq_uninit(&i_gh);
+	}
 	vma->vm_ops = &gfs2_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
 
-	gfs2_glock_dq_uninit(&i_gh);
-
-	return error;
+	return 0;
 }
 
 /**
-- 
cgit v0.10.2


From 7c77f0b3f9208c339a4b40737bb2cb0f0319bb8d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 May 2009 15:37:35 +0200
Subject: splice: implement pipe to pipe splicing

Allow splice(2) to work when both the input and the output is a pipe.

Based on the impementation of the tee(2) syscall, but instead of
duplicating the buffer references move the buffers from the input pipe
to the output pipe.

Moving the whole buffer only succeeds if the full length of the buffer
is spliced.  Otherwise duplicate the buffer, just like tee(2), set the
length of the output buffer and advance the offset on the input
buffer.

Since splice is operating on two pipes, special care needs to be taken
with locking to prevent AN ABBA deadlock.  Again this is done
similarly to the tee(2) syscall, first preparing the input and output
pipes so there's data to consume and space for that data, and then
doing the move operation while holding both locks.

If other processes are doing I/O on the same pipes parallel to the
splice, then by the time both inodes are locked there might be no
buffers left to move, or no space to move them to.  In this case retry
the whole operation, including the preparation phase.  This could lead
to starvation, but I'm not sure if that's serious enough to worry
about.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/splice.c b/fs/splice.c
index 666953d5..e405cf5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1112,6 +1112,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 	return ret;
 }
 
+static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
+			       struct pipe_inode_info *opipe,
+			       size_t len, unsigned int flags);
 /*
  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
  * location, so checking ->i_pipe is not enough to verify that this is a
@@ -1132,12 +1135,32 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		      struct file *out, loff_t __user *off_out,
 		      size_t len, unsigned int flags)
 {
-	struct pipe_inode_info *pipe;
+	struct pipe_inode_info *ipipe;
+	struct pipe_inode_info *opipe;
 	loff_t offset, *off;
 	long ret;
 
-	pipe = pipe_info(in->f_path.dentry->d_inode);
-	if (pipe) {
+	ipipe = pipe_info(in->f_path.dentry->d_inode);
+	opipe = pipe_info(out->f_path.dentry->d_inode);
+
+	if (ipipe && opipe) {
+		if (off_in || off_out)
+			return -ESPIPE;
+
+		if (!(in->f_mode & FMODE_READ))
+			return -EBADF;
+
+		if (!(out->f_mode & FMODE_WRITE))
+			return -EBADF;
+
+		/* Splicing to self would be fun, but... */
+		if (ipipe == opipe)
+			return -EINVAL;
+
+		return splice_pipe_to_pipe(ipipe, opipe, len, flags);
+	}
+
+	if (ipipe) {
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
@@ -1149,7 +1172,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		} else
 			off = &out->f_pos;
 
-		ret = do_splice_from(pipe, out, off, len, flags);
+		ret = do_splice_from(ipipe, out, off, len, flags);
 
 		if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
 			ret = -EFAULT;
@@ -1157,8 +1180,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		return ret;
 	}
 
-	pipe = pipe_info(out->f_path.dentry->d_inode);
-	if (pipe) {
+	if (opipe) {
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
@@ -1170,7 +1192,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		} else
 			off = &in->f_pos;
 
-		ret = do_splice_to(in, off, pipe, len, flags);
+		ret = do_splice_to(in, off, opipe, len, flags);
 
 		if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
 			ret = -EFAULT;
@@ -1511,7 +1533,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
  * Make sure there's data to read. Wait for input if we can, otherwise
  * return an appropriate error.
  */
-static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 {
 	int ret;
 
@@ -1549,7 +1571,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
  * Make sure there's writeable room. Wait for room if we can, otherwise
  * return an appropriate error.
  */
-static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 {
 	int ret;
 
@@ -1587,6 +1609,124 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 }
 
 /*
+ * Splice contents of ipipe to opipe.
+ */
+static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
+			       struct pipe_inode_info *opipe,
+			       size_t len, unsigned int flags)
+{
+	struct pipe_buffer *ibuf, *obuf;
+	int ret = 0, nbuf;
+	bool input_wakeup = false;
+
+
+retry:
+	ret = ipipe_prep(ipipe, flags);
+	if (ret)
+		return ret;
+
+	ret = opipe_prep(opipe, flags);
+	if (ret)
+		return ret;
+
+	/*
+	 * Potential ABBA deadlock, work around it by ordering lock
+	 * grabbing by pipe info address. Otherwise two different processes
+	 * could deadlock (one doing tee from A -> B, the other from B -> A).
+	 */
+	pipe_double_lock(ipipe, opipe);
+
+	do {
+		if (!opipe->readers) {
+			send_sig(SIGPIPE, current, 0);
+			if (!ret)
+				ret = -EPIPE;
+			break;
+		}
+
+		if (!ipipe->nrbufs && !ipipe->writers)
+			break;
+
+		/*
+		 * Cannot make any progress, because either the input
+		 * pipe is empty or the output pipe is full.
+		 */
+		if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) {
+			/* Already processed some buffers, break */
+			if (ret)
+				break;
+
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+
+			/*
+			 * We raced with another reader/writer and haven't
+			 * managed to process any buffers.  A zero return
+			 * value means EOF, so retry instead.
+			 */
+			pipe_unlock(ipipe);
+			pipe_unlock(opipe);
+			goto retry;
+		}
+
+		ibuf = ipipe->bufs + ipipe->curbuf;
+		nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS;
+		obuf = opipe->bufs + nbuf;
+
+		if (len >= ibuf->len) {
+			/*
+			 * Simply move the whole buffer from ipipe to opipe
+			 */
+			*obuf = *ibuf;
+			ibuf->ops = NULL;
+			opipe->nrbufs++;
+			ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS;
+			ipipe->nrbufs--;
+			input_wakeup = true;
+		} else {
+			/*
+			 * Get a reference to this pipe buffer,
+			 * so we can copy the contents over.
+			 */
+			ibuf->ops->get(ipipe, ibuf);
+			*obuf = *ibuf;
+
+			/*
+			 * Don't inherit the gift flag, we need to
+			 * prevent multiple steals of this page.
+			 */
+			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+
+			obuf->len = len;
+			opipe->nrbufs++;
+			ibuf->offset += obuf->len;
+			ibuf->len -= obuf->len;
+		}
+		ret += obuf->len;
+		len -= obuf->len;
+	} while (len);
+
+	pipe_unlock(ipipe);
+	pipe_unlock(opipe);
+
+	/*
+	 * If we put data in the output pipe, wakeup any potential readers.
+	 */
+	if (ret > 0) {
+		smp_mb();
+		if (waitqueue_active(&opipe->wait))
+			wake_up_interruptible(&opipe->wait);
+		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
+	}
+	if (input_wakeup)
+		wakeup_pipe_writers(ipipe);
+
+	return ret;
+}
+
+/*
  * Link contents of ipipe to opipe.
  */
 static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1690,9 +1830,9 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 		 * Keep going, unless we encounter an error. The ipipe/opipe
 		 * ordering doesn't really matter.
 		 */
-		ret = link_ipipe_prep(ipipe, flags);
+		ret = ipipe_prep(ipipe, flags);
 		if (!ret) {
-			ret = link_opipe_prep(opipe, flags);
+			ret = opipe_prep(opipe, flags);
 			if (!ret)
 				ret = link_pipe(ipipe, opipe, len, flags);
 		}
-- 
cgit v0.10.2


From 6818173bd658439b83896a2a7586f64ab51bf29c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 May 2009 15:37:36 +0200
Subject: splice: implement default splice_read method

If f_op->splice_read() is not implemented, fall back to a plain read.
Use vfs_readv() to read into previously allocated pages.

This will allow splice and functions using splice, such as the loop
device, to work on all filesystems.  This includes "direct_io" files
in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9ca4bb0..801f4ab 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -709,10 +709,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		goto out_putf;
 
-	/* new backing store needs to support loop (eg splice_read) */
-	if (!inode->i_fop->splice_read)
-		goto out_putf;
-
 	/* size of the new backing store needs to be the same */
 	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 		goto out_putf;
@@ -788,12 +784,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	error = -EINVAL;
 	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 		const struct address_space_operations *aops = mapping->a_ops;
-		/*
-		 * If we can't read - sorry. If we only can't write - well,
-		 * it's going to be read-only.
-		 */
-		if (!file->f_op->splice_read)
-			goto out_putf;
+
 		if (aops->write_begin)
 			lo_flags |= LO_FLAGS_USE_AOPS;
 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6a347fb..ffd4281 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
 		      struct pipe_inode_info *pipe, size_t count,
 		      unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	struct coda_file_info *cfi;
 	struct file *host_file;
 
@@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->splice_read)
-		return -EINVAL;
+	splice_read = host_file->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
 
-	return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
+	return splice_read(host_file, ppos, pipe, count, flags);
 }
 
 static ssize_t
diff --git a/fs/pipe.c b/fs/pipe.c
index 13414ec..f7dd21a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -302,6 +302,20 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info,
 	return 0;
 }
 
+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to put a reference to
+ *
+ * Description:
+ *	This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+			      struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+}
+
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
diff --git a/fs/read_write.c b/fs/read_write.c
index 9d1e76b..6c8c55d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -805,12 +805,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 		goto out;
 	if (!(in_file->f_mode & FMODE_READ))
 		goto fput_in;
-	retval = -EINVAL;
-	in_inode = in_file->f_path.dentry->d_inode;
-	if (!in_inode)
-		goto fput_in;
-	if (!in_file->f_op || !in_file->f_op->splice_read)
-		goto fput_in;
 	retval = -ESPIPE;
 	if (!ppos)
 		ppos = &in_file->f_pos;
@@ -834,6 +828,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	retval = -EINVAL;
 	if (!out_file->f_op || !out_file->f_op->sendpage)
 		goto fput_out;
+	in_inode = in_file->f_path.dentry->d_inode;
 	out_inode = out_file->f_path.dentry->d_inode;
 	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 	if (retval < 0)
diff --git a/fs/splice.c b/fs/splice.c
index e405cf5..3bd9cb2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -507,9 +507,116 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 
 	return ret;
 }
-
 EXPORT_SYMBOL(generic_file_splice_read);
 
+static const struct pipe_buf_operations default_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = generic_pipe_buf_release,
+	.steal = generic_pipe_buf_steal,
+	.get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+			    unsigned long vlen, loff_t offset)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+	set_fs(old_fs);
+
+	return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
+{
+	unsigned int nr_pages;
+	unsigned int nr_freed;
+	size_t offset;
+	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
+	struct iovec vec[PIPE_BUFFERS];
+	pgoff_t index;
+	ssize_t res;
+	size_t this_len;
+	int error;
+	int i;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &default_pipe_buf_ops,
+		.spd_release = spd_release_page,
+	};
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+		struct page *page;
+
+		page = alloc_page(GFP_HIGHUSER);
+		error = -ENOMEM;
+		if (!page)
+			goto err;
+
+		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		vec[i].iov_base = (void __user *) kmap(page);
+		vec[i].iov_len = this_len;
+		pages[i] = page;
+		spd.nr_pages++;
+		len -= this_len;
+		offset = 0;
+	}
+
+	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+	if (res < 0)
+		goto err;
+
+	error = 0;
+	if (!res)
+		goto err;
+
+	nr_freed = 0;
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		this_len = min_t(size_t, vec[i].iov_len, res);
+		partial[i].offset = 0;
+		partial[i].len = this_len;
+		if (!this_len) {
+			__free_page(pages[i]);
+			pages[i] = NULL;
+			nr_freed++;
+		}
+		res -= this_len;
+	}
+	spd.nr_pages -= nr_freed;
+
+	res = splice_to_pipe(pipe, &spd);
+	if (res > 0)
+		*ppos += res;
+
+	return res;
+
+err:
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		__free_page(pages[i]);
+	}
+	return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
@@ -933,11 +1040,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!in->f_op || !in->f_op->splice_read))
-		return -EINVAL;
-
 	if (unlikely(!(in->f_mode & FMODE_READ)))
 		return -EBADF;
 
@@ -945,7 +1051,11 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(ret < 0))
 		return ret;
 
-	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	splice_read = in->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
+
+	return splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bed436..d926c2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2204,6 +2204,8 @@ extern int generic_segment_checks(const struct iovec *iov,
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index c8f0385..b43a9e0 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -152,5 +152,6 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
 #endif
-- 
cgit v0.10.2


From 0b0a47f5c4a30b58432e20ae1706a27baea91a88 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 May 2009 15:37:37 +0200
Subject: splice: implement default splice_write method

If f_op->splice_write() is not implemented, fall back to a plain write.
Use vfs_writev() to write from the pipe buffers.

This will allow splice on all filesystems and file types.  This
includes "direct_io" files in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/splice.c b/fs/splice.c
index 3bd9cb2..eefd96b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -535,6 +535,21 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
 	return res;
 }
 
+static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
+			    unsigned long vlen, loff_t *ppos)
+{
+	mm_segment_t old_fs;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos);
+	set_fs(old_fs);
+
+	return res;
+}
+
 ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 				 struct pipe_inode_info *pipe, size_t len,
 				 unsigned int flags)
@@ -988,6 +1003,122 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
+static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n)
+{
+	return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS];
+}
+
+static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
+					 struct file *out, loff_t *ppos,
+					 size_t len, unsigned int flags)
+{
+	ssize_t ret = 0;
+	ssize_t total_len = 0;
+	int do_wakeup = 0;
+
+	pipe_lock(pipe);
+	while (len) {
+		struct pipe_buffer *buf;
+		void *data[PIPE_BUFFERS];
+		struct iovec vec[PIPE_BUFFERS];
+		unsigned int nr_pages = 0;
+		unsigned int write_len = 0;
+		unsigned int now_len = len;
+		unsigned int this_len;
+		int i;
+
+		BUG_ON(pipe->nrbufs > PIPE_BUFFERS);
+		for (i = 0; i < pipe->nrbufs && now_len; i++) {
+			buf = nth_pipe_buf(pipe, i);
+
+			ret = buf->ops->confirm(pipe, buf);
+			if (ret)
+				break;
+
+			data[i] = buf->ops->map(pipe, buf, 0);
+			this_len = min(buf->len, now_len);
+			vec[i].iov_base = (void __user *) data[i] + buf->offset;
+			vec[i].iov_len = this_len;
+			now_len -= this_len;
+			write_len += this_len;
+			nr_pages++;
+		}
+
+		if (nr_pages) {
+			ret = kernel_writev(out, vec, nr_pages, ppos);
+			if (ret == 0)
+				ret = -EIO;
+			if (ret > 0) {
+				len -= ret;
+				total_len += ret;
+			}
+		}
+
+		for (i = 0; i < nr_pages; i++) {
+			buf = nth_pipe_buf(pipe, i);
+			buf->ops->unmap(pipe, buf, data[i]);
+
+			if (ret > 0) {
+				this_len = min_t(unsigned, vec[i].iov_len, ret);
+				buf->offset += this_len;
+				buf->len -= this_len;
+				ret -= this_len;
+			}
+		}
+
+		if (ret < 0)
+			break;
+
+		while (pipe->nrbufs) {
+			const struct pipe_buf_operations *ops;
+
+			buf = nth_pipe_buf(pipe, 0);
+			if (buf->len)
+				break;
+
+			ops = buf->ops;
+			buf->ops = NULL;
+			ops->release(pipe, buf);
+			pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS;
+			pipe->nrbufs--;
+			if (pipe->inode)
+				do_wakeup = 1;
+		}
+
+		if (pipe->nrbufs)
+			continue;
+		if (!pipe->writers)
+			break;
+		if (!pipe->waiting_writers) {
+			if (total_len)
+				break;
+		}
+
+		if (flags & SPLICE_F_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wakeup_pipe_writers(pipe);
+			do_wakeup = 0;
+		}
+
+		pipe_wait(pipe);
+	}
+	pipe_unlock(pipe);
+
+	if (do_wakeup)
+		wakeup_pipe_writers(pipe);
+
+	return total_len ? total_len : ret;
+}
+
 /**
  * generic_splice_sendpage - splice data from a pipe to a socket
  * @pipe:	pipe to splice from
@@ -1015,11 +1146,10 @@ EXPORT_SYMBOL(generic_splice_sendpage);
 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 			   loff_t *ppos, size_t len, unsigned int flags)
 {
+	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
+				loff_t *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!out->f_op || !out->f_op->splice_write))
-		return -EINVAL;
-
 	if (unlikely(!(out->f_mode & FMODE_WRITE)))
 		return -EBADF;
 
@@ -1030,7 +1160,11 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	if (unlikely(ret < 0))
 		return ret;
 
-	return out->f_op->splice_write(pipe, out, ppos, len, flags);
+	splice_write = out->f_op->splice_write;
+	if (!splice_write)
+		splice_write = default_file_splice_write;
+
+	return splice_write(pipe, out, ppos, len, flags);
 }
 
 /*
-- 
cgit v0.10.2


From 97a52714658cd959a3cfa35c5b6f489859f0204b Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 8 May 2009 18:23:50 +0200
Subject: x86: display extended apic registers with print_local_APIC and
 cpu_debug code

Both print_local_APIC (used when apic=debug kernel param is set) and
cpu_debug code missed support for some extended APIC registers that
I'd like to see.

This adds support to show:

 - extended APIC feature register
 - extended APIC control register
 - extended LVT registers

[ Impact: print more debug info ]

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090508162350.GO29045@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index bc9514f..7ddb36a 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -22,6 +22,7 @@
 #  define	APIC_INTEGRATED(x)	(1)
 #endif
 #define		APIC_XAPIC(x)		((x) >= 0x14)
+#define		APIC_EXT_SPACE(x)	((x) & 0x80000000)
 #define	APIC_TASKPRI	0x80
 #define		APIC_TPRI_MASK		0xFFu
 #define	APIC_ARBPRI	0x90
@@ -116,7 +117,9 @@
 #define		APIC_TDR_DIV_32		0x8
 #define		APIC_TDR_DIV_64		0x9
 #define		APIC_TDR_DIV_128	0xA
-#define	APIC_EILVT0     0x500
+#define	APIC_EFEAT	0x400
+#define	APIC_ECTRL	0x410
+#define APIC_EILVTn(n)	(0x500 + 0x10 * n)
 #define		APIC_EILVT_NR_AMD_K8	1	/* # of extended interrupts */
 #define		APIC_EILVT_NR_AMD_10H	4
 #define		APIC_EILVT_LVTOFF(x)	(((x) >> 4) & 0xF)
@@ -125,9 +128,6 @@
 #define		APIC_EILVT_MSG_NMI	0x4
 #define		APIC_EILVT_MSG_EXT	0x7
 #define		APIC_EILVT_MASKED	(1 << 16)
-#define	APIC_EILVT1     0x510
-#define	APIC_EILVT2     0x520
-#define	APIC_EILVT3     0x530
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 #define APIC_BASE_MSR	0x800
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1ee966f..0e6543f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -395,7 +395,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 
 static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
 {
-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+	unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
 	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 
 	apic_write(reg, v);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2afe145..65b824c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1739,7 +1739,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-	unsigned int v, ver, maxlvt;
+	unsigned int i, v, ver, maxlvt;
 	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
@@ -1827,6 +1827,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 	v = apic_read(APIC_TDCR);
 	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
 	printk("\n");
 }
 
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab..2fc4f6b 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -588,8 +588,20 @@ static void print_apic(void *arg)
 	seq_printf(seq, " TMICT\t\t: %08x\n",  apic_read(APIC_TMICT));
 	seq_printf(seq, " TMCCT\t\t: %08x\n",  apic_read(APIC_TMCCT));
 	seq_printf(seq, " TDCR\t\t: %08x\n",  apic_read(APIC_TDCR));
-#endif /* CONFIG_X86_LOCAL_APIC */
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		unsigned int i, v, maxeilvt;
+
+		v = apic_read(APIC_EFEAT);
+		maxeilvt = (v >> 16) & 0xff;
+		seq_printf(seq, " EFEAT\t\t: %08x\n", v);
+		seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
 
+		for (i = 0; i < maxeilvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
+		}
+	}
+#endif /* CONFIG_X86_LOCAL_APIC */
 	seq_printf(seq, "\n MSR\t:\n");
 }
 
-- 
cgit v0.10.2


From 3d6ad460214cc72b93333f51f498441a56d622e9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 11 May 2009 09:01:08 +0000
Subject: sh: multiple vectors per irq - sh7760

Update intc tables and platform data to use one linux irq
per maskable interrupt source instead of keeping the one-to-one
mapping between vectors and linux irqs.

This fixes potential irq masking issues for sh7760 hardware
blocks such as DMAC/TMU2/REF.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index 666713a..63e9dd3 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -16,7 +16,8 @@ config SH_DMA_IRQ_MULTI
 		     CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
 		     CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091  || \
 		     CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7764  || \
-		     CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785
+		     CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
+		     CPU_SUBTYPE_SH7760
 
 config NR_ONCHIP_DMA_CHANNELS
 	int
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 4a6fd0d..cd09733 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -19,10 +19,7 @@ enum {
 
 	/* interrupt sources */
 	IRL0, IRL1, IRL2, IRL3,
-	HUDI, GPIOI,
-	DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, DMAC_DMTE3,
-	DMAC_DMTE4, DMAC_DMTE5, DMAC_DMTE6, DMAC_DMTE7,
-	DMAC_DMAE,
+	HUDI, GPIOI, DMAC,
 	IRQ4, IRQ5, IRQ6, IRQ7,
 	HCAN20, HCAN21,
 	SSI0, SSI1,
@@ -37,21 +34,20 @@ enum {
 	HSPI,
 	MMCIF0, MMCIF1, MMCIF2, MMCIF3,
 	MFI, ADC, CMT,
-	TMU0, TMU1, TMU2_TUNI, TMU2_TICPI,
-	WDT,
-	REF_RCMI, REF_ROVI,
+	TMU0, TMU1, TMU2,
+	WDT, REF,
 
 	/* interrupt groups */
-	DMAC, DMABRG, SCIF0, SCIF1, SCIF2, SIM, MMCIF, TMU2, REF,
+	DMABRG, SCIF0, SCIF1, SCIF2, SIM, MMCIF,
 };
 
 static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(HUDI, 0x600), INTC_VECT(GPIOI, 0x620),
-	INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660),
-	INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0),
-	INTC_VECT(DMAC_DMTE4, 0x780), INTC_VECT(DMAC_DMTE5, 0x7a0),
-	INTC_VECT(DMAC_DMTE6, 0x7c0), INTC_VECT(DMAC_DMTE7, 0x7e0),
-	INTC_VECT(DMAC_DMAE, 0x6c0),
+	INTC_VECT(DMAC, 0x640), INTC_VECT(DMAC, 0x660),
+	INTC_VECT(DMAC, 0x680), INTC_VECT(DMAC, 0x6a0),
+	INTC_VECT(DMAC, 0x780), INTC_VECT(DMAC, 0x7a0),
+	INTC_VECT(DMAC, 0x7c0), INTC_VECT(DMAC, 0x7e0),
+	INTC_VECT(DMAC, 0x6c0),
 	INTC_VECT(IRQ4, 0x800), INTC_VECT(IRQ5, 0x820),
 	INTC_VECT(IRQ6, 0x840), INTC_VECT(IRQ6, 0x860),
 	INTC_VECT(HCAN20, 0x900), INTC_VECT(HCAN21, 0x920),
@@ -75,23 +71,18 @@ static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(MFI, 0xe80), /* 0xf80 according to data sheet */
 	INTC_VECT(ADC, 0xf80), INTC_VECT(CMT, 0xfa0),
 	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
-	INTC_VECT(TMU2_TUNI, 0x440), INTC_VECT(TMU2_TICPI, 0x460),
+	INTC_VECT(TMU2, 0x440), INTC_VECT(TMU2, 0x460),
 	INTC_VECT(WDT, 0x560),
-	INTC_VECT(REF_RCMI, 0x580), INTC_VECT(REF_ROVI, 0x5a0),
+	INTC_VECT(REF, 0x580), INTC_VECT(REF, 0x5a0),
 };
 
 static struct intc_group groups[] __initdata = {
-	INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2,
-		   DMAC_DMTE3, DMAC_DMTE4, DMAC_DMTE5,
-		   DMAC_DMTE6, DMAC_DMTE7, DMAC_DMAE),
 	INTC_GROUP(DMABRG, DMABRG0, DMABRG1, DMABRG2),
 	INTC_GROUP(SCIF0, SCIF0_ERI, SCIF0_RXI, SCIF0_BRI, SCIF0_TXI),
 	INTC_GROUP(SCIF1, SCIF1_ERI, SCIF1_RXI, SCIF1_BRI, SCIF1_TXI),
 	INTC_GROUP(SCIF2, SCIF2_ERI, SCIF2_RXI, SCIF2_BRI, SCIF2_TXI),
 	INTC_GROUP(SIM, SIM_ERI, SIM_RXI, SIM_TXI, SIM_TEI),
 	INTC_GROUP(MMCIF, MMCIF0, MMCIF1, MMCIF2, MMCIF3),
-	INTC_GROUP(TMU2, TMU2_TUNI, TMU2_TICPI),
-	INTC_GROUP(REF, REF_RCMI, REF_ROVI),
 };
 
 static struct intc_mask_reg mask_registers[] __initdata = {
-- 
cgit v0.10.2


From 50e2d0d3b4ffc945a6c27f16d4e5e557e725ec67 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 11 May 2009 11:34:16 +0000
Subject: sh: r7780 highlander clock fixes

Update the r7780 highlander defconfig to fix
PCLK value, while at it fix cmdline on r7785.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index 68e5eff..943da63 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -260,7 +260,7 @@ CONFIG_SH_R7780MP=y
 #
 CONFIG_SH_TMU=y
 CONFIG_SH_TIMER_IRQ=28
-CONFIG_SH_PCLK_FREQ=32000000
+CONFIG_SH_PCLK_FREQ=33333333
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index 5e677a8..82658f6 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -333,7 +333,7 @@ CONFIG_GUSA=y
 CONFIG_ZERO_PAGE_OFFSET=0x00001000
 CONFIG_BOOT_LINK_OFFSET=0x00800000
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1 mode4_pin=high"
+CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1"
 
 #
 # Bus options
-- 
cgit v0.10.2


From ccc195655fb25d7d967b278c4a4725dc5e7a6bf4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 11 May 2009 11:37:16 +0000
Subject: sh: TMU platform data for sh7780

This patch adds TMU platform data for sh7780. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 6f7227c..f1df020 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -12,6 +12,189 @@
 #include <linux/serial.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 28,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 29,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 30,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffdc0008,
+		.end	= 0xffdc0013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 96,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffdc0014,
+		.end	= 0xffdc001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 97,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffdc0020,
+		.end	= 0xffdc002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 98,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
 
 static struct resource rtc_resources[] = {
 	[0] = {
@@ -58,6 +241,12 @@ static struct platform_device sci_device = {
 };
 
 static struct platform_device *sh7780_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&rtc_device,
 	&sci_device,
 };
@@ -69,6 +258,21 @@ static int __init sh7780_devices_setup(void)
 }
 __initcall(sh7780_devices_setup);
 
+static struct platform_device *sh7780_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7780_early_devices,
+				   ARRAY_SIZE(sh7780_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From cec6be6d1069d697beb490bbb40a290d5ff554a2 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 11 May 2009 17:41:40 +0400
Subject: x86: apic: Fixmap apic address even if apic disabled

In case if apic were disabled by boot option
we still need read_apic operation. So fixmap
a fake apic area if needed.

[ Impact: fix boot crash ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
Cc: eswierk@aristanetworks.com
LKML-Reference: <20090511134140.GH4624@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0e6543f..07cffc1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1587,13 +1587,6 @@ void __init init_apic_mappings(void)
 	} else
 		apic_phys = mp_lapic_addr;
 
-	/* lets check if we may NOP'ify apic operations */
-	if (!cpu_has_apic) {
-		pr_info("APIC: disable apic facility\n");
-		apic_disable();
-		return;
-	}
-
 	/*
 	 * acpi lapic path already maps that address in
 	 * acpi_register_lapic_address()
@@ -1602,7 +1595,15 @@ void __init init_apic_mappings(void)
 		set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
 
 	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
-				APIC_BASE, apic_phys);
+			APIC_BASE, apic_phys);
+
+	/* lets check if we may NOP'ify apic operations */
+	if (!cpu_has_apic) {
+		pr_info("APIC: disable apic facility\n");
+		apic_disable();
+		return;
+	}
+
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
-- 
cgit v0.10.2


From d756f4adb9d8a86e347a2d5435bb5cc95744733e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 4 May 2009 03:29:52 +0400
Subject: x86, 32-bit: ifdef out struct thread_struct::fs

After commit 464d1a78fbf8cf6c7fd970e7b3e2db50a320ce28 aka
"[PATCH] i386: Convert i386 PDA code to use %fs"
%fs saved during context switch moved from thread_struct to pt_regs
and value on thread_struct became unused.

[ Impact: reduce thread_struct size on 32-bit ]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: containers@lists.linux-foundation.org
LKML-Reference: <20090503232952.GI16631@x200.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c2cceae..a6732ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -428,7 +428,9 @@ struct thread_struct {
 	unsigned short		gsindex;
 #endif
 	unsigned long		ip;
+#ifdef CONFIG_X86_64
 	unsigned long		fs;
+#endif
 	unsigned long		gs;
 	/* Hardware debugging registers: */
 	unsigned long		debugreg0;
@@ -874,7 +876,6 @@ static inline void spin_lock_prefetch(const void *x)
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
-	.fs			= __KERNEL_PERCPU,			  \
 }
 
 /*
-- 
cgit v0.10.2


From 0c23590f00f85467b318ad0c20c36796a5bd4c60 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 4 May 2009 03:30:15 +0400
Subject: x86, 64-bit: ifdef out struct thread_struct::ip

struct thread_struct::ip isn't used on x86_64, struct pt_regs::ip is used
instead.

kgdb should be reading 0 always, but I can't check it.

[ Impact: (potentially) reduce thread_struct size on 64-bit ]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: containers@lists.linux-foundation.org
LKML-Reference: <20090503233015.GJ16631@x200.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a6732ff..a9ba743 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -427,7 +427,9 @@ struct thread_struct {
 	unsigned short		fsindex;
 	unsigned short		gsindex;
 #endif
+#ifdef CONFIG_X86_32
 	unsigned long		ip;
+#endif
 #ifdef CONFIG_X86_64
 	unsigned long		fs;
 #endif
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index eedfaeb..d07706f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -141,7 +141,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 	gdb_regs32[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
 	gdb_regs32[GDB_CS]	= __KERNEL_CS;
 	gdb_regs32[GDB_SS]	= __KERNEL_DS;
-	gdb_regs[GDB_PC]	= p->thread.ip;
+	gdb_regs[GDB_PC]	= 0;
 	gdb_regs[GDB_R8]	= 0;
 	gdb_regs[GDB_R9]	= 0;
 	gdb_regs[GDB_R10]	= 0;
-- 
cgit v0.10.2


From 3c598766a2bae1b208470e7cc934ac462561e3cb Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 11 May 2009 16:49:28 +0100
Subject: x86: fix percpu_{to,from}_op()

- the byte operand constraints were wrong for 32-bit
- the to-op's input operands weren't properly parenthesized

[ Impact: fix possible miscompilation or build failure ]

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index aee103b..02ecb30 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -82,22 +82,22 @@ do {							\
 	case 1:						\
 		asm(op "b %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "ri" ((T__)val));			\
+		    : "qi" ((T__)(val)));		\
 		break;					\
 	case 2:						\
 		asm(op "w %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "ri" ((T__)val));			\
+		    : "ri" ((T__)(val)));		\
 		break;					\
 	case 4:						\
 		asm(op "l %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "ri" ((T__)val));			\
+		    : "ri" ((T__)(val)));		\
 		break;					\
 	case 8:						\
 		asm(op "q %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "re" ((T__)val));			\
+		    : "re" ((T__)(val)));		\
 		break;					\
 	default: __bad_percpu_size();			\
 	}						\
@@ -109,7 +109,7 @@ do {							\
 	switch (sizeof(var)) {				\
 	case 1:						\
 		asm(op "b "__percpu_arg(1)",%0"		\
-		    : "=r" (ret__)			\
+		    : "=q" (ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 2:						\
-- 
cgit v0.10.2


From 72af2b363107df9cd67985ecb8495bbdff3c9857 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Date: Mon, 11 May 2009 09:58:19 -0700
Subject: ARM: OMAP: Fix printing of reserved memory for frambuffer

Print reserved memory only if it was actually reserved.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index ce6b4ba..3746222 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -206,9 +206,10 @@ void __init omapfb_reserve_sdram(void)
 			config_invalid = 1;
 			return;
 		}
-		if (rg.paddr)
+		if (rg.paddr) {
 			reserve_bootmem(rg.paddr, rg.size, BOOTMEM_DEFAULT);
-		reserved += rg.size;
+			reserved += rg.size;
+		}
 		omapfb_config.mem_desc.region[i] = rg;
 		configured_regions++;
 	}
-- 
cgit v0.10.2


From 5a772b2b3c68e7e0b503c5a48469113bb0634314 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 8 May 2009 10:56:33 -0400
Subject: ring-buffer: replace constants with time macros in
 ring-buffer-benchmark

The use of numeric constants is discouraged. It is cleaner and more
descriptive to use macros for constant time conversions.

This patch also removes an extra new line.

[ Impact: more descriptive time conversions ]

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index a21aa7b..7d3aef9 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -253,7 +253,7 @@ static void ring_buffer_producer(void)
 	}
 
 	time = end_tv.tv_sec - start_tv.tv_sec;
-	time *= 1000000;
+	time *= USEC_PER_SEC;
 	time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
 
 	entries = ring_buffer_entries(buffer);
@@ -273,7 +273,8 @@ static void ring_buffer_producer(void)
 	pr_info("Missed:   %ld\n", missed);
 	pr_info("Hit:      %ld\n", hit);
 
-	do_div(time, 1000);
+	/* Convert time from usecs to millisecs */
+	do_div(time, USEC_PER_MSEC);
 	if (time)
 		hit /= (long)time;
 	else
@@ -282,18 +283,19 @@ static void ring_buffer_producer(void)
 	pr_info("Entries per millisec: %ld\n", hit);
 
 	if (hit) {
-		avg = 1000000 / hit;
+		/* Calculate the average time in nanosecs */
+		avg = NSEC_PER_MSEC / hit;
 		pr_info("%ld ns per entry\n", avg);
 	}
 
-
 	if (missed) {
 		if (time)
 			missed /= (long)time;
 
 		pr_info("Total iterations per millisec: %ld\n", hit + missed);
 
-		avg = 1000000 / (hit + missed);
+		/* Caculate the average time in nanosecs */
+		avg = NSEC_PER_MSEC / (hit + missed);
 		pr_info("%ld ns per entry\n", avg);
 	}
 }
-- 
cgit v0.10.2


From d988ff94c1074c4c914235c8591bcceafb585ecf Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 8 May 2009 11:03:57 -0400
Subject: ring-buffer: check for divide by zero in ring-buffer-benchmark

Although we check if "missed" is not zero, we divide by hit + missed,
and the addition can possible overflow and become a divide by zero.

This patch checks for this case, and will report it when it happens
then modify "hit" to make the calculation be non zero.

[ Impact: prevent possible divide by zero in ring-buffer-benchmark ]

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 7d3aef9..8d68e14 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -294,6 +294,12 @@ static void ring_buffer_producer(void)
 
 		pr_info("Total iterations per millisec: %ld\n", hit + missed);
 
+		/* it is possible that hit + missed will overflow and be zero */
+		if (!(hit + missed)) {
+			pr_info("hit + missed overflowed and totalled zero!\n");
+			hit--; /* make it non zero */
+		}
+
 		/* Caculate the average time in nanosecs */
 		avg = NSEC_PER_MSEC / (hit + missed);
 		pr_info("%ld ns per entry\n", avg);
-- 
cgit v0.10.2


From 1cd8d7358948909ab80b254eb14bcebc555ad417 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 11 May 2009 14:08:09 -0400
Subject: ring-buffer: remove type parameter from rb_reserve_next_event

The rb_reserve_next_event is only called for the data type (type = 0).
There is no reason to pass in the type to the function.

Before:
   text    data     bss     dec     hex filename
  16554      24      12   16590    40ce kernel/trace/ring_buffer.o

After:
   text    data     bss     dec     hex filename
  16538      24      12   16574    40be kernel/trace/ring_buffer.o

[ Impact: cleaner, smaller and slightly more efficient code ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3611706..fe40f6c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1389,7 +1389,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
-		      unsigned type, unsigned long length)
+		      unsigned long length)
 {
 	struct ring_buffer_event *event;
 	u64 ts, delta;
@@ -1448,7 +1448,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 		/* Non commits have zero deltas */
 		delta = 0;
 
-	event = __rb_reserve_next(cpu_buffer, type, length, &ts);
+	event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
 	if (PTR_ERR(event) == -EAGAIN)
 		goto again;
 
@@ -1556,7 +1556,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	if (length > BUF_PAGE_SIZE)
 		goto out;
 
-	event = rb_reserve_next_event(cpu_buffer, 0, length);
+	event = rb_reserve_next_event(cpu_buffer, length);
 	if (!event)
 		goto out;
 
@@ -1782,7 +1782,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 		goto out;
 
 	event_length = rb_calculate_event_length(length);
-	event = rb_reserve_next_event(cpu_buffer, 0, event_length);
+	event = rb_reserve_next_event(cpu_buffer, event_length);
 	if (!event)
 		goto out;
 
-- 
cgit v0.10.2


From be957c447f7233a67904a1b11eb3ab61e702bf4d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 11 May 2009 14:42:53 -0400
Subject: ring-buffer: move calculation of event length

The event length is calculated and passed in to rb_reserve_next_event
in two different locations. Having rb_reserve_next_event do the
calculations directly makes only one location to do the change and
causes the calculation to be inlined by gcc.

Before:
   text    data     bss     dec     hex filename
  16538      24      12   16574    40be kernel/trace/ring_buffer.o

After:
   text    data     bss     dec     hex filename
  16490      24      12   16526    408e kernel/trace/ring_buffer.o

[ Impact: smaller more efficient code ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index fe40f6c..493cba4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -367,6 +367,9 @@ static inline int test_time_stamp(u64 delta)
 
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
 
+/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
+#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
+
 int ring_buffer_print_page_header(struct trace_seq *s)
 {
 	struct buffer_data_page field;
@@ -1396,6 +1399,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	int commit = 0;
 	int nr_loops = 0;
 
+	length = rb_calculate_event_length(length);
  again:
 	/*
 	 * We allow for interrupts to reenter here and do a trace.
@@ -1552,8 +1556,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
 
-	length = rb_calculate_event_length(length);
-	if (length > BUF_PAGE_SIZE)
+	if (length > BUF_MAX_DATA_SIZE)
 		goto out;
 
 	event = rb_reserve_next_event(cpu_buffer, length);
@@ -1758,7 +1761,6 @@ int ring_buffer_write(struct ring_buffer *buffer,
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
-	unsigned long event_length;
 	void *body;
 	int ret = -EBUSY;
 	int cpu, resched;
@@ -1781,8 +1783,10 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
 
-	event_length = rb_calculate_event_length(length);
-	event = rb_reserve_next_event(cpu_buffer, event_length);
+	if (length > BUF_MAX_DATA_SIZE)
+		goto out;
+
+	event = rb_reserve_next_event(cpu_buffer, length);
 	if (!event)
 		goto out;
 
-- 
cgit v0.10.2


From b68d8201433a91cabbcbeae48b53d8c1c426433a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 03:45:08 +0900
Subject: sh: clkfwk: Make recalc return an unsigned long.

This is prep work for cleaning up some of the rate propagation bits.
Trivial conversion.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index b1f2919..d63352b 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -12,7 +12,7 @@ struct clk_ops {
 	void (*init)(struct clk *clk);
 	void (*enable)(struct clk *clk);
 	void (*disable)(struct clk *clk);
-	void (*recalc)(struct clk *clk);
+	unsigned long (*recalc)(struct clk *clk);
 	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
 	int (*set_parent)(struct clk *clk, struct clk *parent);
 	long (*round_rate)(struct clk *clk, unsigned long rate);
@@ -96,4 +96,5 @@ enum clk_sh_algo_id {
 
 	IP_N1,
 };
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 133dbe4..b022aff 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -75,6 +75,7 @@ static struct clk *onchip_clocks[] = {
 	&cpu_clk,
 };
 
+/* Propagate rate to children */
 static void propagate_rate(struct clk *clk)
 {
 	struct clk *clkp;
@@ -83,7 +84,7 @@ static void propagate_rate(struct clk *clk)
 		if (likely(clkp->parent != clk))
 			continue;
 		if (likely(clkp->ops && clkp->ops->recalc))
-			clkp->ops->recalc(clkp);
+			clkp->rate = clkp->ops->recalc(clkp);
 		if (unlikely(clkp->flags & CLK_RATE_PROPAGATES))
 			propagate_rate(clkp);
 	}
@@ -240,7 +241,7 @@ void clk_recalc_rate(struct clk *clk)
 		unsigned long flags;
 
 		spin_lock_irqsave(&clock_lock, flags);
-		clk->ops->recalc(clk);
+		clk->rate = clk->ops->recalc(clk);
 		spin_unlock_irqrestore(&clock_lock, flags);
 	}
 
@@ -377,20 +378,22 @@ static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 	switch (state.event) {
 	case PM_EVENT_ON:
 		/* Resumeing from hibernation */
-		if (prev_state.event == PM_EVENT_FREEZE) {
-			list_for_each_entry(clkp, &clock_list, node)
-				if (likely(clkp->ops)) {
-					unsigned long rate = clkp->rate;
-
-					if (likely(clkp->ops->set_parent))
-						clkp->ops->set_parent(clkp,
-							clkp->parent);
-					if (likely(clkp->ops->set_rate))
-						clkp->ops->set_rate(clkp,
-							rate, NO_CHANGE);
-					else if (likely(clkp->ops->recalc))
-						clkp->ops->recalc(clkp);
-					}
+		if (prev_state.event != PM_EVENT_FREEZE)
+			break;
+
+		list_for_each_entry(clkp, &clock_list, node) {
+			if (likely(clkp->ops)) {
+				unsigned long rate = clkp->rate;
+
+				if (likely(clkp->ops->set_parent))
+					clkp->ops->set_parent(clkp,
+						clkp->parent);
+				if (likely(clkp->ops->set_rate))
+					clkp->ops->set_rate(clkp,
+						rate, NO_CHANGE);
+				else if (likely(clkp->ops->recalc))
+					clkp->rate = clkp->ops->recalc(clkp);
+			}
 		}
 		break;
 	case PM_EVENT_FREEZE:
diff --git a/arch/sh/kernel/cpu/sh2/clock-sh7619.c b/arch/sh/kernel/cpu/sh2/clock-sh7619.c
index d2c1579..2679913 100644
--- a/arch/sh/kernel/cpu/sh2/clock-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/clock-sh7619.c
@@ -38,28 +38,28 @@ static struct clk_ops sh7619_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7619_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 7];
+	return clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 7];
 }
 
 static struct clk_ops sh7619_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate;
+	return clk->parent->rate;
 }
 
 static struct clk_ops sh7619_cpu_clk_ops = {
@@ -78,4 +78,3 @@ void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 	if (idx < ARRAY_SIZE(sh7619_clk_ops))
 		*ops = sh7619_clk_ops[idx];
 }
-
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7201.c b/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
index 4a5e597..7814c76 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
@@ -34,37 +34,37 @@ static const int pfc_divisors[]={1,2,3,4,6,8,12};
 
 static void master_clk_init(struct clk *clk)
 {
-	clk->rate = 10000000 * PLL2 * pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
+	return 10000000 * PLL2 * pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
 }
 
 static struct clk_ops sh7201_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7201_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7201_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inw(FREQCR) >> 4) & 0x0007);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7201_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7203.c b/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
index fb78132..f8c6933 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
@@ -46,29 +46,29 @@ static struct clk_ops sh7203_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7203_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx-2];
+	return clk->parent->rate / pfc_divisors[idx-2];
 }
 
 static struct clk_ops sh7203_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate;
+	return clk->parent->rate;
 }
 
 static struct clk_ops sh7203_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7206.c b/arch/sh/kernel/cpu/sh2a/clock-sh7206.c
index 82d7f99..c2268bd 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7206.c
@@ -41,29 +41,29 @@ static struct clk_ops sh7206_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7206_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
+	return clk->parent->rate / pll1rate[(ctrl_inw(FREQCR) >> 8) & 0x0007];
 }
 
 static struct clk_ops sh7206_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FREQCR) & 0x0007);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7206_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh3.c b/arch/sh/kernel/cpu/sh3/clock-sh3.c
index c3c9459..27b8738 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh3.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh3.c
@@ -38,36 +38,36 @@ static struct clk_ops sh3_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x2000) >> 11) | (frqcr & 0x0003);
 
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh3_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x8000) >> 13) | ((frqcr & 0x0030) >> 4);
 
-	clk->rate = clk->parent->rate / stc_multipliers[idx];
+	return clk->parent->rate / stc_multipliers[idx];
 }
 
 static struct clk_ops sh3_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x4000) >> 12) | ((frqcr & 0x000c) >> 2);
 
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh3_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7705.c b/arch/sh/kernel/cpu/sh3/clock-sh7705.c
index dfdbf32..0ca8f2c 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7705.c
@@ -39,30 +39,30 @@ static struct clk_ops sh7705_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ctrl_inw(FRQCR) & 0x0003;
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7705_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0300) >> 8;
-	clk->rate = clk->parent->rate / stc_multipliers[idx];
+	return clk->parent->rate / stc_multipliers[idx];
 }
 
 static struct clk_ops sh7705_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0030) >> 4;
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7705_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7706.c b/arch/sh/kernel/cpu/sh3/clock-sh7706.c
index 0cf96f9..4bf7887 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7706.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7706.c
@@ -34,36 +34,36 @@ static struct clk_ops sh7706_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x2000) >> 11) | (frqcr & 0x0003);
 
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7706_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x8000) >> 13) | ((frqcr & 0x0030) >> 4);
 
-	clk->rate = clk->parent->rate / stc_multipliers[idx];
+	return clk->parent->rate / stc_multipliers[idx];
 }
 
 static struct clk_ops sh7706_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x4000) >> 12) | ((frqcr & 0x000c) >> 2);
 
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7706_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7709.c b/arch/sh/kernel/cpu/sh3/clock-sh7709.c
index b791a29..fa30b60 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7709.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7709.c
@@ -41,12 +41,12 @@ static struct clk_ops sh7709_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x2000) >> 11) | (frqcr & 0x0003);
 
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7709_module_clk_ops = {
@@ -56,25 +56,25 @@ static struct clk_ops sh7709_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = (frqcr & 0x0080) ?
 		((frqcr & 0x8000) >> 13) | ((frqcr & 0x0030) >> 4) : 1;
 
-	clk->rate = clk->parent->rate * stc_multipliers[idx];
+	return clk->parent->rate * stc_multipliers[idx];
 }
 
 static struct clk_ops sh7709_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = ((frqcr & 0x4000) >> 12) | ((frqcr & 0x000c) >> 2);
 
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7709_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7710.c b/arch/sh/kernel/cpu/sh3/clock-sh7710.c
index 4744c50..030a58b 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7710.c
@@ -33,30 +33,30 @@ static struct clk_ops sh7710_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0007);
-	clk->rate = clk->parent->rate / md_table[idx];
+	return clk->parent->rate / md_table[idx];
 }
 
 static struct clk_ops sh7710_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0700) >> 8;
-	clk->rate = clk->parent->rate / md_table[idx];
+	return clk->parent->rate / md_table[idx];
 }
 
 static struct clk_ops sh7710_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0070) >> 4;
-	clk->rate = clk->parent->rate / md_table[idx];
+	return clk->parent->rate / md_table[idx];
 }
 
 static struct clk_ops sh7710_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh3/clock-sh7712.c b/arch/sh/kernel/cpu/sh3/clock-sh7712.c
index 54f54df..6428ee6 100644
--- a/arch/sh/kernel/cpu/sh3/clock-sh7712.c
+++ b/arch/sh/kernel/cpu/sh3/clock-sh7712.c
@@ -33,24 +33,24 @@ static struct clk_ops sh7712_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = frqcr & 0x0007;
 
-	clk->rate = clk->parent->rate / divisors[idx];
+	return clk->parent->rate / divisors[idx];
 }
 
 static struct clk_ops sh7712_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int frqcr = ctrl_inw(FRQCR);
 	int idx = (frqcr & 0x0030) >> 4;
 
-	clk->rate = clk->parent->rate / divisors[idx];
+	return clk->parent->rate / divisors[idx];
 }
 
 static struct clk_ops sh7712_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index a334294..628d50e 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -21,10 +21,10 @@
 static int frqcr3_divisors[] = { 1, 2, 3, 4, 6, 8, 16 };
 static int frqcr3_values[]   = { 0, 1, 2, 3, 4, 5, 6  };
 
-static void emi_clk_recalc(struct clk *clk)
+static unsigned long emi_clk_recalc(struct clk *clk)
 {
 	int idx = ctrl_inl(CPG2_FRQCR3) & 0x0007;
-	clk->rate = clk->parent->rate / frqcr3_divisors[idx];
+	return clk->parent->rate / frqcr3_divisors[idx];
 }
 
 static inline int frqcr3_lookup(struct clk *clk, unsigned long rate)
@@ -50,10 +50,10 @@ static struct clk sh4202_emi_clk = {
 	.ops		= &sh4202_emi_clk_ops,
 };
 
-static void femi_clk_recalc(struct clk *clk)
+static unsigned long femi_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(CPG2_FRQCR3) >> 3) & 0x0007;
-	clk->rate = clk->parent->rate / frqcr3_divisors[idx];
+	return clk->parent->rate / frqcr3_divisors[idx];
 }
 
 static struct clk_ops sh4202_femi_clk_ops = {
@@ -90,10 +90,10 @@ static void shoc_clk_init(struct clk *clk)
 	WARN_ON(i == ARRAY_SIZE(frqcr3_divisors));	/* Undefined clock */
 }
 
-static void shoc_clk_recalc(struct clk *clk)
+static unsigned long shoc_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(CPG2_FRQCR3) >> 6) & 0x0007;
-	clk->rate = clk->parent->rate / frqcr3_divisors[idx];
+	return clk->parent->rate / frqcr3_divisors[idx];
 }
 
 static int shoc_clk_verify_rate(struct clk *clk, unsigned long rate)
@@ -127,7 +127,7 @@ static int shoc_clk_set_rate(struct clk *clk, unsigned long rate, int algo_id)
 	frqcr3 |= tmp << 6;
 	ctrl_outl(frqcr3, CPG2_FRQCR3);
 
-	clk->rate = clk->parent->rate / frqcr3_divisors[tmp];
+	return clk->parent->rate / frqcr3_divisors[tmp];
 
 	return 0;
 }
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4.c b/arch/sh/kernel/cpu/sh4/clock-sh4.c
index dca9f87..73294d9 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4.c
@@ -35,30 +35,30 @@ static struct clk_ops sh4_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) & 0x0007);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh4_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) >> 3) & 0x0007;
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh4_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(FRQCR) >> 6) & 0x0007;
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh4_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 1ccdfc5..5b1427f 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -151,11 +151,11 @@ static int divisors2[] = { 4, 1, 8, 12, 16, 24, 32, 1, 48, 64, 72, 96, 1, 144 };
 static int divisors2[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32, 40 };
 #endif
 
-static void master_clk_recalc(struct clk *clk)
+static unsigned long master_clk_recalc(struct clk *clk)
 {
 	unsigned frqcr = ctrl_inl(FRQCR);
 
-	clk->rate = CONFIG_SH_PCLK_FREQ * STCPLL(frqcr);
+	return CONFIG_SH_PCLK_FREQ * STCPLL(frqcr);
 }
 
 static void master_clk_init(struct clk *clk)
@@ -166,12 +166,11 @@ static void master_clk_init(struct clk *clk)
 	master_clk_recalc(clk);
 }
 
-
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	unsigned long frqcr = ctrl_inl(FRQCR);
 
-	clk->rate = clk->parent->rate / STCPLL(frqcr);
+	return clk->parent->rate / STCPLL(frqcr);
 }
 
 #if defined(CONFIG_CPU_SUBTYPE_SH7724)
@@ -283,14 +282,14 @@ static int sh7722_find_div_index(unsigned long parent_rate, unsigned rate)
 	return index;
 }
 
-static void sh7722_frqcr_recalc(struct clk *clk)
+static unsigned long sh7722_frqcr_recalc(struct clk *clk)
 {
 	struct frqcr_context ctx = sh7722_get_clk_context(clk->name);
 	unsigned long frqcr = ctrl_inl(FRQCR);
 	int index;
 
 	index = (frqcr >> ctx.shift) & ctx.mask;
-	clk->rate = clk->parent->rate * 2 / divisors2[index];
+	return clk->parent->rate * 2 / divisors2[index];
 }
 
 static int sh7722_frqcr_set_rate(struct clk *clk, unsigned long rate,
@@ -439,11 +438,8 @@ static struct clk_ops sh7722_frqcr_clk_ops = {
 
 /*
  * clock ops methods for SIU A/B and IrDA clock
- *
  */
-
 #ifndef CONFIG_CPU_SUBTYPE_SH7343
-
 static int sh7722_siu_set_rate(struct clk *clk, unsigned long rate, int algo_id)
 {
 	unsigned long r;
@@ -458,12 +454,12 @@ static int sh7722_siu_set_rate(struct clk *clk, unsigned long rate, int algo_id)
 	return 0;
 }
 
-static void sh7722_siu_recalc(struct clk *clk)
+static unsigned long sh7722_siu_recalc(struct clk *clk)
 {
 	unsigned long r;
 
 	r = ctrl_inl(clk->arch_flags);
-	clk->rate = clk->parent->rate * 2 / divisors2[r & 0xF];
+	return clk->parent->rate * 2 / divisors2[r & 0xF];
 }
 
 static int sh7722_siu_start_stop(struct clk *clk, int enable)
@@ -525,12 +521,12 @@ static int sh7722_video_set_rate(struct clk *clk, unsigned long rate,
 	return 0;
 }
 
-static void sh7722_video_recalc(struct clk *clk)
+static unsigned long sh7722_video_recalc(struct clk *clk)
 {
 	unsigned long r;
 
 	r = ctrl_inl(VCLKCR);
-	clk->rate = clk->parent->rate / ((r & 0x3F) + 1);
+	return clk->parent->rate / ((r & 0x3F) + 1);
 }
 
 static struct clk_ops sh7722_video_clk_ops = {
@@ -627,7 +623,7 @@ static int sh7722_mstpcr_start_stop(struct clk *clk, int enable)
 		break;
 	default:
 		return -EINVAL;
-	}  
+	}
 
 	r = ctrl_inl(reg);
 
@@ -650,10 +646,9 @@ static void sh7722_mstpcr_disable(struct clk *clk)
 	sh7722_mstpcr_start_stop(clk, 0);
 }
 
-static void sh7722_mstpcr_recalc(struct clk *clk)
+static unsigned long sh7722_mstpcr_recalc(struct clk *clk)
 {
-	if (clk->parent)
-		clk->rate = clk->parent->rate;
+	return clk->parent->rate;
 }
 
 static struct clk_ops sh7722_mstpcr_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index 3177d0d..26630fb 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -29,29 +29,29 @@ static struct clk_ops sh7763_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 4) & 0x07);
-	clk->rate = clk->parent->rate / p0fc_divisors[idx];
+	return clk->parent->rate / p0fc_divisors[idx];
 }
 
 static struct clk_ops sh7763_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 16) & 0x07);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7763_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
-	clk->rate = clk->parent->rate;
+	return clk->parent->rate;
 }
 
 static struct clk_ops sh7763_cpu_clk_ops = {
@@ -71,10 +71,10 @@ void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 		*ops = sh7763_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 20) & 0x07);
-	clk->rate = clk->parent->rate / cfc_divisors[idx];
+	return clk->parent->rate / cfc_divisors[idx];
 }
 
 static struct clk_ops sh7763_shyway_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7770.c b/arch/sh/kernel/cpu/sh4a/clock-sh7770.c
index 8e23606..e0b8967 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7770.c
@@ -28,30 +28,30 @@ static struct clk_ops sh7770_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 28) & 0x000f);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7770_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQCR) & 0x000f);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7770_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 24) & 0x000f);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7770_cpu_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index 01f3da6..ba8dacc 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -29,30 +29,30 @@ static struct clk_ops sh7780_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQCR) & 0x0003);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7780_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 16) & 0x0007);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7780_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 24) & 0x0001);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7780_cpu_clk_ops = {
@@ -72,10 +72,10 @@ void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 		*ops = sh7780_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> 20) & 0x0007);
-	clk->rate = clk->parent->rate / cfc_divisors[idx];
+	return clk->parent->rate / cfc_divisors[idx];
 }
 
 static struct clk_ops sh7780_shyway_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 27fa81b..52691ea 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -33,30 +33,30 @@ static struct clk_ops sh7785_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQMR1) & 0x000f);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7785_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 16) & 0x000f);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7785_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 28) & 0x0003);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7785_cpu_clk_ops = {
@@ -76,10 +76,10 @@ void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 		*ops = sh7785_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 20) & 0x0003);
-	clk->rate = clk->parent->rate / sfc_divisors[idx];
+	return clk->parent->rate / sfc_divisors[idx];
 }
 
 static struct clk_ops sh7785_shyway_clk_ops = {
@@ -92,10 +92,10 @@ static struct clk sh7785_shyway_clk = {
 	.ops		= &sh7785_shyway_clk_ops,
 };
 
-static void ddr_clk_recalc(struct clk *clk)
+static unsigned long ddr_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 12) & 0x0003);
-	clk->rate = clk->parent->rate / mfc_divisors[idx];
+	return clk->parent->rate / mfc_divisors[idx];
 }
 
 static struct clk_ops sh7785_ddr_clk_ops = {
@@ -108,10 +108,10 @@ static struct clk sh7785_ddr_clk = {
 	.ops		= &sh7785_ddr_clk_ops,
 };
 
-static void ram_clk_recalc(struct clk *clk)
+static unsigned long ram_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 24) & 0x0003);
-	clk->rate = clk->parent->rate / ufc_divisors[idx];
+	return clk->parent->rate / ufc_divisors[idx];
 }
 
 static struct clk_ops sh7785_ram_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index f84a9c1..2e00ff4 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -36,30 +36,30 @@ static struct clk_ops sh7786_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inl(FRQMR1) & 0x000f);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 16) & 0x000f);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 28) & 0x0003);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops sh7786_cpu_clk_ops = {
@@ -79,10 +79,10 @@ void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 		*ops = sh7786_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 20) & 0x0003);
-	clk->rate = clk->parent->rate / sfc_divisors[idx];
+	return clk->parent->rate / sfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_shyway_clk_ops = {
@@ -95,10 +95,10 @@ static struct clk sh7786_shyway_clk = {
 	.ops		= &sh7786_shyway_clk_ops,
 };
 
-static void ddr_clk_recalc(struct clk *clk)
+static unsigned long ddr_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQMR1) >> 12) & 0x0003);
-	clk->rate = clk->parent->rate / mfc_divisors[idx];
+	return clk->parent->rate / mfc_divisors[idx];
 }
 
 static struct clk_ops sh7786_ddr_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index c630b29..770934e 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -40,30 +40,30 @@ static struct clk_ops shx3_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> PFC_POS) & PFC_MSK);
-	clk->rate = clk->parent->rate / pfc_divisors[idx];
+	return clk->parent->rate / pfc_divisors[idx];
 }
 
 static struct clk_ops shx3_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> BFC_POS) & BFC_MSK);
-	clk->rate = clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate / bfc_divisors[idx];
 }
 
 static struct clk_ops shx3_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> IFC_POS) & IFC_MSK);
-	clk->rate = clk->parent->rate / ifc_divisors[idx];
+	return clk->parent->rate / ifc_divisors[idx];
 }
 
 static struct clk_ops shx3_cpu_clk_ops = {
@@ -83,10 +83,10 @@ void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
 		*ops = shx3_clk_ops[idx];
 }
 
-static void shyway_clk_recalc(struct clk *clk)
+static unsigned long shyway_clk_recalc(struct clk *clk)
 {
 	int idx = ((ctrl_inl(FRQCR) >> CFC_POS) & CFC_MSK);
-	clk->rate = clk->parent->rate / cfc_divisors[idx];
+	return clk->parent->rate / cfc_divisors[idx];
 }
 
 static struct clk_ops shx3_shyway_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c
index 5486324..7f864eb 100644
--- a/arch/sh/kernel/cpu/sh5/clock-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c
@@ -32,30 +32,30 @@ static struct clk_ops sh5_master_clk_ops = {
 	.init		= master_clk_init,
 };
 
-static void module_clk_recalc(struct clk *clk)
+static unsigned long module_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(cprc_base) >> 12) & 0x0007;
-	clk->rate = clk->parent->rate / ifc_table[idx];
+	return clk->parent->rate / ifc_table[idx];
 }
 
 static struct clk_ops sh5_module_clk_ops = {
 	.recalc		= module_clk_recalc,
 };
 
-static void bus_clk_recalc(struct clk *clk)
+static unsigned long bus_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(cprc_base) >> 3) & 0x0007;
-	clk->rate = clk->parent->rate / ifc_table[idx];
+	return clk->parent->rate / ifc_table[idx];
 }
 
 static struct clk_ops sh5_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static void cpu_clk_recalc(struct clk *clk)
+static unsigned long cpu_clk_recalc(struct clk *clk)
 {
 	int idx = (ctrl_inw(cprc_base) & 0x0007);
-	clk->rate = clk->parent->rate / ifc_table[idx];
+	return clk->parent->rate / ifc_table[idx];
 }
 
 static struct clk_ops sh5_cpu_clk_ops = {
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index fe8d893..a693dcd 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -172,22 +172,19 @@ static void __init tmu_clk_init(struct clk *clk)
 	clk->rate = clk_get_rate(clk->parent) / (4 << (divisor << 1));
 }
 
-static void tmu_clk_recalc(struct clk *clk)
+static unsigned long tmu_clk_recalc(struct clk *clk)
 {
 	int tmu_num = clk->name[3]-'0';
-	unsigned long prev_rate = clk_get_rate(clk);
+	unsigned long new_rate;
 	unsigned long flags;
 	u8 divisor = ctrl_inw(TMU0_TCR+tmu_num*0xC) & 0x7;
-	clk->rate  = clk_get_rate(clk->parent) / (4 << (divisor << 1));
 
-	if(prev_rate==clk_get_rate(clk))
-		return;
-
-	if(tmu_num)
-		return; /* No more work on TMU1 */
+	new_rate = clk_get_rate(clk->parent) / (4 << (divisor << 1));
+	if (clk->rate == new_rate || tmu_num)
+		return clk->rate; /* No more work on TMU1 */
 
 	local_irq_save(flags);
-	tmus_are_scaled = (prev_rate > clk->rate);
+	tmus_are_scaled = (clk->rate > new_rate);
 
 	_tmu_stop(TMU0);
 
@@ -210,6 +207,7 @@ static void tmu_clk_recalc(struct clk *clk)
 	_tmu_start(TMU0);
 
 	local_irq_restore(flags);
+	return new_rate;
 }
 
 static struct clk_ops tmu_clk_ops = {
-- 
cgit v0.10.2


From a02cb230bb4fca04f091746c593de720a0e3a94a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 03:50:44 +0900
Subject: sh: clkfwk: Add a followparent_recalc() helper.

This adds a followparent_recalc() helper for clocks that just follow the
parent's rate. Switch over the few CPUs that use this scheme for some of
their clocks.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index d63352b..241f1c1 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -44,7 +44,7 @@ int __init arch_clk_init(void);
 
 /* arch/sh/kernel/cpu/clock.c */
 int clk_init(void);
-
+unsigned long followparent_recalc(struct clk *clk);
 void clk_recalc_rate(struct clk *);
 
 int clk_register(struct clk *);
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index b022aff..17f6c07 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -90,6 +90,12 @@ static void propagate_rate(struct clk *clk)
 	}
 }
 
+/* Used for clocks that always have same value as the parent clock */
+unsigned long followparent_recalc(struct clk *clk)
+{
+	return clk->parent->rate;
+}
+
 static void __clk_init(struct clk *clk)
 {
 	/*
diff --git a/arch/sh/kernel/cpu/sh2/clock-sh7619.c b/arch/sh/kernel/cpu/sh2/clock-sh7619.c
index 2679913..4fe8631 100644
--- a/arch/sh/kernel/cpu/sh2/clock-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/clock-sh7619.c
@@ -57,13 +57,8 @@ static struct clk_ops sh7619_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static unsigned long cpu_clk_recalc(struct clk *clk)
-{
-	return clk->parent->rate;
-}
-
 static struct clk_ops sh7619_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+	.recalc		= followparent_recalc,
 };
 
 static struct clk_ops *sh7619_clk_ops[] = {
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7203.c b/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
index f8c6933..9409869 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7203.c
@@ -66,13 +66,8 @@ static struct clk_ops sh7203_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static unsigned long cpu_clk_recalc(struct clk *clk)
-{
-	return clk->parent->rate;
-}
-
 static struct clk_ops sh7203_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+	.recalc		= followparent_recalc,
 };
 
 static struct clk_ops *sh7203_clk_ops[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 5b1427f..4bdae84 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -646,15 +646,10 @@ static void sh7722_mstpcr_disable(struct clk *clk)
 	sh7722_mstpcr_start_stop(clk, 0);
 }
 
-static unsigned long sh7722_mstpcr_recalc(struct clk *clk)
-{
-	return clk->parent->rate;
-}
-
 static struct clk_ops sh7722_mstpcr_clk_ops = {
 	.enable = sh7722_mstpcr_enable,
 	.disable = sh7722_mstpcr_disable,
-	.recalc = sh7722_mstpcr_recalc,
+	.recalc = followparent_recalc,
 };
 
 #define MSTPCR(_name, _parent, regnr, bitnr) \
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index 26630fb..db51cff 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -49,13 +49,8 @@ static struct clk_ops sh7763_bus_clk_ops = {
 	.recalc		= bus_clk_recalc,
 };
 
-static unsigned long cpu_clk_recalc(struct clk *clk)
-{
-	return clk->parent->rate;
-}
-
 static struct clk_ops sh7763_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+	.recalc		= followparent_recalc,
 };
 
 static struct clk_ops *sh7763_clk_ops[] = {
-- 
cgit v0.10.2


From b1f6cfe48c3cb1dfa77db3d2f42f765febaef9bc Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 04:27:43 +0900
Subject: sh: clkfwk: refactor rate propagation.

This resyncs the rate propagation strategy with the scheme used by the
OMAP clock framework. Child clocks are tracked on a list under each
parent and propagation happens there specifically rather than constantly
iterating over the global clock list.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 241f1c1..5dc8b73 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -27,6 +27,9 @@ struct clk {
 	struct clk		*parent;
 	struct clk_ops		*ops;
 
+	struct list_head	children;
+	struct list_head	sibling;	/* node for children */
+
 	int			usecount;
 
 	unsigned long		rate;
@@ -35,7 +38,6 @@ struct clk {
 };
 
 #define CLK_ALWAYS_ENABLED	(1 << 0)
-#define CLK_RATE_PROPAGATES	(1 << 1)
 #define CLK_NEEDS_INIT		(1 << 2)
 
 /* Should be defined by processor-specific code */
@@ -44,9 +46,10 @@ int __init arch_clk_init(void);
 
 /* arch/sh/kernel/cpu/clock.c */
 int clk_init(void);
-unsigned long followparent_recalc(struct clk *clk);
+unsigned long followparent_recalc(struct clk *);
+void recalculate_root_clocks(void);
+void propagate_rate(struct clk *);
 void clk_recalc_rate(struct clk *);
-
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
 
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 17f6c07..0a06df8 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -1,11 +1,11 @@
 /*
  * arch/sh/kernel/cpu/clock.c - SuperH clock framework
  *
- *  Copyright (C) 2005, 2006, 2007  Paul Mundt
+ *  Copyright (C) 2005 - 2009  Paul Mundt
  *
  * This clock framework is derived from the OMAP version by:
  *
- *	Copyright (C) 2004 - 2005 Nokia Corporation
+ *	Copyright (C) 2004 - 2008 Nokia Corporation
  *	Written by Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>
  *
  *  Modified for omap shared clock framework by Tony Lindgren <tony@atomide.com>
@@ -43,20 +43,20 @@ static DEFINE_MUTEX(clock_list_sem);
  */
 static struct clk master_clk = {
 	.name		= "master_clk",
-	.flags		= CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
+	.flags		= CLK_ALWAYS_ENABLED,
 	.rate		= CONFIG_SH_PCLK_FREQ,
 };
 
 static struct clk module_clk = {
 	.name		= "module_clk",
 	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
+	.flags		= CLK_ALWAYS_ENABLED,
 };
 
 static struct clk bus_clk = {
 	.name		= "bus_clk",
 	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES,
+	.flags		= CLK_ALWAYS_ENABLED,
 };
 
 static struct clk cpu_clk = {
@@ -75,27 +75,24 @@ static struct clk *onchip_clocks[] = {
 	&cpu_clk,
 };
 
+/* Used for clocks that always have same value as the parent clock */
+unsigned long followparent_recalc(struct clk *clk)
+{
+	return clk->parent->rate;
+}
+
 /* Propagate rate to children */
-static void propagate_rate(struct clk *clk)
+void propagate_rate(struct clk *tclk)
 {
 	struct clk *clkp;
 
-	list_for_each_entry(clkp, &clock_list, node) {
-		if (likely(clkp->parent != clk))
-			continue;
-		if (likely(clkp->ops && clkp->ops->recalc))
+	list_for_each_entry(clkp, &tclk->children, sibling) {
+		if (clkp->ops->recalc)
 			clkp->rate = clkp->ops->recalc(clkp);
-		if (unlikely(clkp->flags & CLK_RATE_PROPAGATES))
-			propagate_rate(clkp);
+		propagate_rate(clkp);
 	}
 }
 
-/* Used for clocks that always have same value as the parent clock */
-unsigned long followparent_recalc(struct clk *clk)
-{
-	return clk->parent->rate;
-}
-
 static void __clk_init(struct clk *clk)
 {
 	/*
@@ -180,10 +177,46 @@ void clk_disable(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_disable);
 
+static LIST_HEAD(root_clks);
+
+/**
+ * recalculate_root_clocks - recalculate and propagate all root clocks
+ *
+ * Recalculates all root clocks (clocks with no parent), which if the
+ * clock's .recalc is set correctly, should also propagate their rates.
+ * Called at init.
+ */
+void recalculate_root_clocks(void)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &root_clks, sibling) {
+		if (clkp->ops->recalc)
+			clkp->rate = clkp->ops->recalc(clkp);
+		propagate_rate(clkp);
+	}
+}
+
 int clk_register(struct clk *clk)
 {
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	/*
+	 * trap out already registered clocks
+	 */
+	if (clk->node.next || clk->node.prev)
+		return 0;
+
 	mutex_lock(&clock_list_sem);
 
+	INIT_LIST_HEAD(&clk->children);
+
+	if (clk->parent)
+		list_add(&clk->sibling, &clk->parent->children);
+	else
+		list_add(&clk->sibling, &root_clks);
+
 	list_add(&clk->node, &clock_list);
 	clk->usecount = 0;
 	clk->flags |= CLK_NEEDS_INIT;
@@ -205,6 +238,7 @@ EXPORT_SYMBOL_GPL(clk_register);
 void clk_unregister(struct clk *clk)
 {
 	mutex_lock(&clock_list_sem);
+	list_del(&clk->sibling);
 	list_del(&clk->node);
 	mutex_unlock(&clock_list_sem);
 }
@@ -231,50 +265,53 @@ int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
 
 		spin_lock_irqsave(&clock_lock, flags);
 		ret = clk->ops->set_rate(clk, rate, algo_id);
+		if (ret == 0) {
+			if (clk->ops->recalc)
+				clk->rate = clk->ops->recalc(clk);
+			propagate_rate(clk);
+		}
 		spin_unlock_irqrestore(&clock_lock, flags);
 	}
 
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(clk_set_rate_ex);
 
 void clk_recalc_rate(struct clk *clk)
 {
-	if (likely(clk->ops && clk->ops->recalc)) {
-		unsigned long flags;
+	unsigned long flags;
 
-		spin_lock_irqsave(&clock_lock, flags);
-		clk->rate = clk->ops->recalc(clk);
-		spin_unlock_irqrestore(&clock_lock, flags);
-	}
+	if (!clk->ops->recalc)
+		return;
 
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
+	spin_lock_irqsave(&clock_lock, flags);
+	clk->rate = clk->ops->recalc(clk);
+	propagate_rate(clk);
+	spin_unlock_irqrestore(&clock_lock, flags);
 }
 EXPORT_SYMBOL_GPL(clk_recalc_rate);
 
 int clk_set_parent(struct clk *clk, struct clk *parent)
 {
+	unsigned long flags;
 	int ret = -EINVAL;
-	struct clk *old;
 
 	if (!parent || !clk)
 		return ret;
 
-	old = clk->parent;
-	if (likely(clk->ops && clk->ops->set_parent)) {
-		unsigned long flags;
-		spin_lock_irqsave(&clock_lock, flags);
-		ret = clk->ops->set_parent(clk, parent);
-		spin_unlock_irqrestore(&clock_lock, flags);
-		clk->parent = (ret ? old : parent);
-	}
+	spin_lock_irqsave(&clock_lock, flags);
+	if (clk->usecount == 0) {
+		if (clk->ops->set_parent)
+			ret = clk->ops->set_parent(clk, parent);
+		if (ret == 0) {
+			if (clk->ops->recalc)
+				clk->rate = clk->ops->recalc(clk);
+			propagate_rate(clk);
+		}
+	} else
+		ret = -EBUSY;
+	spin_unlock_irqrestore(&clock_lock, flags);
 
-	if (unlikely(clk->flags & CLK_RATE_PROPAGATES))
-		propagate_rate(clk);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(clk_set_parent);
@@ -457,8 +494,7 @@ int __init clk_init(void)
 	ret |= arch_clk_init();
 
 	/* Kick the child clocks.. */
-	propagate_rate(&master_clk);
-	propagate_rate(&bus_clk);
+	recalculate_root_clocks();
 
 	return ret;
 }
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 4bdae84..8e53829 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -161,9 +161,7 @@ static unsigned long master_clk_recalc(struct clk *clk)
 static void master_clk_init(struct clk *clk)
 {
 	clk->parent = NULL;
-	clk->flags |= CLK_RATE_PROPAGATES;
-	clk->rate = CONFIG_SH_PCLK_FREQ;
-	master_clk_recalc(clk);
+	clk->rate = master_clk_recalc(clk);
 }
 
 static unsigned long module_clk_recalc(struct clk *clk)
@@ -541,19 +539,16 @@ static struct clk_ops sh7722_video_clk_ops = {
 static struct clk sh7722_umem_clock = {
 	.name = "umem_clk",
 	.ops = &sh7722_frqcr_clk_ops,
-	.flags = CLK_RATE_PROPAGATES,
 };
 
 static struct clk sh7722_sh_clock = {
 	.name = "sh_clk",
 	.ops = &sh7722_frqcr_clk_ops,
-	.flags = CLK_RATE_PROPAGATES,
 };
 
 static struct clk sh7722_peripheral_clock = {
 	.name = "peripheral_clk",
 	.ops = &sh7722_frqcr_clk_ops,
-	.flags = CLK_RATE_PROPAGATES,
 };
 
 static struct clk sh7722_sdram_clock = {
@@ -564,7 +559,6 @@ static struct clk sh7722_sdram_clock = {
 static struct clk sh7722_r_clock = {
 	.name = "r_clk",
 	.rate = 32768,
-	.flags = CLK_RATE_PROPAGATES,
 };
 
 #if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\
-- 
cgit v0.10.2


From 4ff29ff8e8723a41e7defd8bc78a7b16cbf940a2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 05:14:53 +0900
Subject: sh: clkfwk: Consolidate the ALWAYS_ENABLED / NEEDS_INIT mess.

There is no real distinction here in behaviour, either a clock needs to
be enabled on initialiation or not. The ALWAYS_ENABLED flag was always
intended to only apply to clocks that were physically always on and could
simply not be disabled at all from software. Unfortunately over time this
was abused and the meaning became a bit blurry.

So, we kill off both of all of those paths now, as well as the newer
NEEDS_INIT flag, and consolidate on a CLK_ENABLE_ON_INIT. Clocks that
need to be enabled on initialization can set this, and it will purposely
enable them and bump the refcount up.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 5dc8b73..246f9eb 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -37,8 +37,7 @@ struct clk {
 	unsigned long		arch_flags;
 };
 
-#define CLK_ALWAYS_ENABLED	(1 << 0)
-#define CLK_NEEDS_INIT		(1 << 2)
+#define CLK_ENABLE_ON_INIT	(1 << 0)
 
 /* Should be defined by processor-specific code */
 void arch_init_clk_ops(struct clk_ops **, int type);
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 0a06df8..c683be5 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -43,26 +43,26 @@ static DEFINE_MUTEX(clock_list_sem);
  */
 static struct clk master_clk = {
 	.name		= "master_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.rate		= CONFIG_SH_PCLK_FREQ,
 };
 
 static struct clk module_clk = {
 	.name		= "module_clk",
 	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 };
 
 static struct clk bus_clk = {
 	.name		= "bus_clk",
 	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 };
 
 static struct clk cpu_clk = {
 	.name		= "cpu_clk",
 	.parent		= &master_clk,
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 };
 
 /*
@@ -93,39 +93,11 @@ void propagate_rate(struct clk *tclk)
 	}
 }
 
-static void __clk_init(struct clk *clk)
-{
-	/*
-	 * See if this is the first time we're enabling the clock, some
-	 * clocks that are always enabled still require "special"
-	 * initialization. This is especially true if the clock mode
-	 * changes and the clock needs to hunt for the proper set of
-	 * divisors to use before it can effectively recalc.
-	 */
-
-	if (clk->flags & CLK_NEEDS_INIT) {
-		if (clk->ops && clk->ops->init)
-			clk->ops->init(clk);
-
-		clk->flags &= ~CLK_NEEDS_INIT;
-	}
-}
-
 static int __clk_enable(struct clk *clk)
 {
-	if (!clk)
-		return -EINVAL;
-
-	clk->usecount++;
-
-	/* nothing to do if always enabled */
-	if (clk->flags & CLK_ALWAYS_ENABLED)
-		return 0;
-
-	if (clk->usecount == 1) {
-		__clk_init(clk);
-
-		__clk_enable(clk->parent);
+	if (clk->usecount++ == 0) {
+		if (clk->parent)
+			__clk_enable(clk->parent);
 
 		if (clk->ops && clk->ops->enable)
 			clk->ops->enable(clk);
@@ -139,6 +111,9 @@ int clk_enable(struct clk *clk)
 	unsigned long flags;
 	int ret;
 
+	if (!clk)
+		return -EINVAL;
+
 	spin_lock_irqsave(&clock_lock, flags);
 	ret = __clk_enable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
@@ -149,21 +124,11 @@ EXPORT_SYMBOL_GPL(clk_enable);
 
 static void __clk_disable(struct clk *clk)
 {
-	if (!clk)
-		return;
-
-	clk->usecount--;
-
-	WARN_ON(clk->usecount < 0);
-
-	if (clk->flags & CLK_ALWAYS_ENABLED)
-		return;
-
-	if (clk->usecount == 0) {
+	if (clk->usecount > 0 && !(--clk->usecount)) {
 		if (likely(clk->ops && clk->ops->disable))
 			clk->ops->disable(clk);
-
-		__clk_disable(clk->parent);
+		if (likely(clk->parent))
+			__clk_disable(clk->parent);
 	}
 }
 
@@ -171,6 +136,9 @@ void clk_disable(struct clk *clk)
 {
 	unsigned long flags;
 
+	if (!clk)
+		return;
+
 	spin_lock_irqsave(&clock_lock, flags);
 	__clk_disable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
@@ -211,6 +179,7 @@ int clk_register(struct clk *clk)
 	mutex_lock(&clock_list_sem);
 
 	INIT_LIST_HEAD(&clk->children);
+	clk->usecount = 0;
 
 	if (clk->parent)
 		list_add(&clk->sibling, &clk->parent->children);
@@ -218,19 +187,10 @@ int clk_register(struct clk *clk)
 		list_add(&clk->sibling, &root_clks);
 
 	list_add(&clk->node, &clock_list);
-	clk->usecount = 0;
-	clk->flags |= CLK_NEEDS_INIT;
-
+	if (clk->ops->init)
+		clk->ops->init(clk);
 	mutex_unlock(&clock_list_sem);
 
-	if (clk->flags & CLK_ALWAYS_ENABLED) {
-		__clk_init(clk);
-		pr_debug( "Clock '%s' is ALWAYS_ENABLED\n", clk->name);
-		if (clk->ops && clk->ops->enable)
-			clk->ops->enable(clk);
-		pr_debug( "Enabled.");
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(clk_register);
@@ -244,6 +204,15 @@ void clk_unregister(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
 
+static void clk_enable_init_clocks(void)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &clock_list, node)
+		if (clkp->flags & CLK_ENABLE_ON_INIT)
+			clk_enable(clkp);
+}
+
 unsigned long clk_get_rate(struct clk *clk)
 {
 	return clk->rate;
@@ -404,9 +373,7 @@ static int show_clocks(char *buf, char **start, off_t off,
 
 		p += sprintf(p, "%-12s\t: %ld.%02ldMHz\t%s\n", clk->name,
 			     rate / 1000000, (rate % 1000000) / 10000,
-			     ((clk->flags & CLK_ALWAYS_ENABLED) ||
-			      clk->usecount > 0) ?
-			     "enabled" : "disabled");
+			      (clk->usecount > 0) ?  "enabled" : "disabled");
 	}
 
 	return p - buf;
@@ -496,6 +463,9 @@ int __init clk_init(void)
 	/* Kick the child clocks.. */
 	recalculate_root_clocks();
 
+	/* Enable the necessary init clocks */
+	clk_enable_init_clocks();
+
 	return ret;
 }
 
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index 628d50e..0caca9f 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -46,7 +46,7 @@ static struct clk_ops sh4202_emi_clk_ops = {
 
 static struct clk sh4202_emi_clk = {
 	.name		= "emi_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh4202_emi_clk_ops,
 };
 
@@ -62,7 +62,7 @@ static struct clk_ops sh4202_femi_clk_ops = {
 
 static struct clk sh4202_femi_clk = {
 	.name		= "femi_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh4202_femi_clk_ops,
 };
 
@@ -140,7 +140,7 @@ static struct clk_ops sh4202_shoc_clk_ops = {
 
 static struct clk sh4202_shoc_clk = {
 	.name		= "shoc_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh4202_shoc_clk_ops,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index db51cff..21bd70f 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -78,7 +78,7 @@ static struct clk_ops sh7763_shyway_clk_ops = {
 
 static struct clk sh7763_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7763_shyway_clk_ops,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index ba8dacc..4c11f89 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -84,7 +84,7 @@ static struct clk_ops sh7780_shyway_clk_ops = {
 
 static struct clk sh7780_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7780_shyway_clk_ops,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 52691ea..edd4328 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -88,7 +88,7 @@ static struct clk_ops sh7785_shyway_clk_ops = {
 
 static struct clk sh7785_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7785_shyway_clk_ops,
 };
 
@@ -104,7 +104,7 @@ static struct clk_ops sh7785_ddr_clk_ops = {
 
 static struct clk sh7785_ddr_clk = {
 	.name		= "ddr_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7785_ddr_clk_ops,
 };
 
@@ -120,7 +120,7 @@ static struct clk_ops sh7785_ram_clk_ops = {
 
 static struct clk sh7785_ram_clk = {
 	.name		= "ram_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7785_ram_clk_ops,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index 2e00ff4..2825494 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -91,7 +91,7 @@ static struct clk_ops sh7786_shyway_clk_ops = {
 
 static struct clk sh7786_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7786_shyway_clk_ops,
 };
 
@@ -107,7 +107,7 @@ static struct clk_ops sh7786_ddr_clk_ops = {
 
 static struct clk sh7786_ddr_clk = {
 	.name		= "ddr_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &sh7786_ddr_clk_ops,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index 770934e..6e5c864 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -95,7 +95,7 @@ static struct clk_ops shx3_shyway_clk_ops = {
 
 static struct clk shx3_shyway_clk = {
 	.name		= "shyway_clk",
-	.flags		= CLK_ALWAYS_ENABLED,
+	.flags		= CLK_ENABLE_ON_INIT,
 	.ops		= &shx3_shyway_clk_ops,
 };
 
-- 
cgit v0.10.2


From 154502e160e02dee7b00ec2149762ae5d48e0bb4 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 05:18:13 +0900
Subject: sh: clkfwk: Convert SH-Mobile CPUs to use CLK_ENABLE_ON_INIT.

Kill off all of the clk_always_enabled leftovers and use the new flag
directly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 246f9eb..e9fced9 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -52,22 +52,6 @@ void clk_recalc_rate(struct clk *);
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
 
-static inline int clk_always_enable(const char *id)
-{
-	struct clk *clk;
-	int ret;
-
-	clk = clk_get(NULL, id);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	ret = clk_enable(clk);
-	if (ret)
-		clk_put(clk);
-
-	return ret;
-}
-
 /* the exported API, in addition to clk_set_rate */
 /**
  * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 8e53829..f777d00 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -646,207 +646,208 @@ static struct clk_ops sh7722_mstpcr_clk_ops = {
 	.recalc = followparent_recalc,
 };
 
-#define MSTPCR(_name, _parent, regnr, bitnr) \
+#define MSTPCR(_name, _parent, regnr, bitnr, _flags) \
 {						\
 	.name = _name,				\
+	.flags = _flags,			\
 	.arch_flags = MSTPCR_ARCH_FLAGS(regnr, bitnr),	\
 	.ops = (void *)_parent,		\
 }
 
 static struct clk sh7722_mstpcr_clocks[] = {
 #if defined(CONFIG_CPU_SUBTYPE_SH7722)
-	MSTPCR("uram0", "umem_clk", 0, 28),
-	MSTPCR("xymem0", "bus_clk", 0, 26),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 7),
-	MSTPCR("scif1", "peripheral_clk", 0, 6),
-	MSTPCR("scif2", "peripheral_clk", 0, 5),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("rtc0", "r_clk", 1, 8),
-	MSTPCR("sdhi0", "peripheral_clk", 2, 18),
-	MSTPCR("keysc0", "r_clk", 2, 14),
-	MSTPCR("usbf0", "peripheral_clk", 2, 11),
-	MSTPCR("2dg0", "bus_clk", 2, 9),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("jpu0", "bus_clk", 2, 6),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
+	MSTPCR("uram0", "umem_clk", 0, 28, CLK_ENABLE_ON_INIT),
+	MSTPCR("xymem0", "bus_clk", 0, 26, CLK_ENABLE_ON_INIT),
+	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
+	MSTPCR("cmt0", "r_clk", 0, 14, 0),
+	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
+	MSTPCR("flctl0", "peripheral_clk", 0, 10, 0),
+	MSTPCR("scif0", "peripheral_clk", 0, 7, 0),
+	MSTPCR("scif1", "peripheral_clk", 0, 6, 0),
+	MSTPCR("scif2", "peripheral_clk", 0, 5, 0),
+	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
+	MSTPCR("rtc0", "r_clk", 1, 8, 0),
+	MSTPCR("sdhi0", "peripheral_clk", 2, 18, 0),
+	MSTPCR("keysc0", "r_clk", 2, 14, 0),
+	MSTPCR("usbf0", "peripheral_clk", 2, 11, 0),
+	MSTPCR("2dg0", "bus_clk", 2, 9, 0),
+	MSTPCR("siu0", "bus_clk", 2, 8, 0),
+	MSTPCR("vou0", "bus_clk", 2, 5, 0),
+	MSTPCR("jpu0", "bus_clk", 2, 6, CLK_ENABLE_ON_INIT),
+	MSTPCR("beu0", "bus_clk", 2, 4, 0),
+	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
+	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
+	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
+	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7723)
 	/* See page 60 of Datasheet V1.0: Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31),
-	MSTPCR("ic0", "cpu_clk", 0, 30),
-	MSTPCR("oc0", "cpu_clk", 0, 29),
-	MSTPCR("l2c0", "sh_clk", 0, 28),
-	MSTPCR("ilmem0", "cpu_clk", 0, 27),
-	MSTPCR("fpu0", "cpu_clk", 0, 24),
-	MSTPCR("intc0", "cpu_clk", 0, 22),
-	MSTPCR("dmac0", "bus_clk", 0, 21),
-	MSTPCR("sh0", "sh_clk", 0, 20),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19),
-	MSTPCR("ubc0", "cpu_clk", 0, 17),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("dmac1", "bus_clk", 0, 12),
-	MSTPCR("tmu1", "peripheral_clk", 0, 11),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 9),
-	MSTPCR("scif1", "peripheral_clk", 0, 8),
-	MSTPCR("scif2", "peripheral_clk", 0, 7),
-	MSTPCR("scif3", "bus_clk", 0, 6),
-	MSTPCR("scif4", "bus_clk", 0, 5),
-	MSTPCR("scif5", "bus_clk", 0, 4),
-	MSTPCR("msiof0", "bus_clk", 0, 2),
-	MSTPCR("msiof1", "bus_clk", 0, 1),
-	MSTPCR("meram0", "sh_clk", 0, 0),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("rtc0", "r_clk", 1, 8),
-	MSTPCR("atapi0", "sh_clk", 2, 28),
-	MSTPCR("adc0", "peripheral_clk", 2, 28),
-	MSTPCR("tpu0", "bus_clk", 2, 25),
-	MSTPCR("irda0", "peripheral_clk", 2, 24),
-	MSTPCR("tsif0", "bus_clk", 2, 22),
-	MSTPCR("icb0", "bus_clk", 2, 21),
-	MSTPCR("sdhi0", "bus_clk", 2, 18),
-	MSTPCR("sdhi1", "bus_clk", 2, 17),
-	MSTPCR("keysc0", "r_clk", 2, 14),
-	MSTPCR("usb0", "bus_clk", 2, 11),
-	MSTPCR("2dg0", "bus_clk", 2, 10),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("veu1", "bus_clk", 2, 6),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
+	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
+	MSTPCR("ic0", "cpu_clk", 0, 30, 0),
+	MSTPCR("oc0", "cpu_clk", 0, 29, 0),
+	MSTPCR("l2c0", "sh_clk", 0, 28, 0),
+	MSTPCR("ilmem0", "cpu_clk", 0, 27, 0),
+	MSTPCR("fpu0", "cpu_clk", 0, 24, 0),
+	MSTPCR("intc0", "cpu_clk", 0, 22, 0),
+	MSTPCR("dmac0", "bus_clk", 0, 21, 0),
+	MSTPCR("sh0", "sh_clk", 0, 20, 0),
+	MSTPCR("hudi0", "peripheral_clk", 0, 19, 0),
+	MSTPCR("ubc0", "cpu_clk", 0, 17, 0),
+	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
+	MSTPCR("cmt0", "r_clk", 0, 14, 0),
+	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
+	MSTPCR("dmac1", "bus_clk", 0, 12, 0),
+	MSTPCR("tmu1", "peripheral_clk", 0, 11, 0),
+	MSTPCR("flctl0", "peripheral_clk", 0, 10, 0),
+	MSTPCR("scif0", "peripheral_clk", 0, 9, 0),
+	MSTPCR("scif1", "peripheral_clk", 0, 8, 0),
+	MSTPCR("scif2", "peripheral_clk", 0, 7, 0),
+	MSTPCR("scif3", "bus_clk", 0, 6, 0),
+	MSTPCR("scif4", "bus_clk", 0, 5, 0),
+	MSTPCR("scif5", "bus_clk", 0, 4, 0),
+	MSTPCR("msiof0", "bus_clk", 0, 2, 0),
+	MSTPCR("msiof1", "bus_clk", 0, 1, 0),
+	MSTPCR("meram0", "sh_clk", 0, 0, CLK_ENABLE_ON_INIT),
+	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
+	MSTPCR("rtc0", "r_clk", 1, 8, 0),
+	MSTPCR("atapi0", "sh_clk", 2, 28, 0),
+	MSTPCR("adc0", "peripheral_clk", 2, 28, 0),
+	MSTPCR("tpu0", "bus_clk", 2, 25, 0),
+	MSTPCR("irda0", "peripheral_clk", 2, 24, 0),
+	MSTPCR("tsif0", "bus_clk", 2, 22, 0),
+	MSTPCR("icb0", "bus_clk", 2, 21, 0),
+	MSTPCR("sdhi0", "bus_clk", 2, 18, 0),
+	MSTPCR("sdhi1", "bus_clk", 2, 17, 0),
+	MSTPCR("keysc0", "r_clk", 2, 14, 0),
+	MSTPCR("usb0", "bus_clk", 2, 11, 0),
+	MSTPCR("2dg0", "bus_clk", 2, 10, 0),
+	MSTPCR("siu0", "bus_clk", 2, 8, 0),
+	MSTPCR("veu1", "bus_clk", 2, 6, CLK_ENABLE_ON_INIT),
+	MSTPCR("vou0", "bus_clk", 2, 5, 0),
+	MSTPCR("beu0", "bus_clk", 2, 4, 0),
+	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
+	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
+	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
+	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7724)
 	/* See Datasheet : Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31),
-	MSTPCR("ic0", "cpu_clk", 0, 30),
-	MSTPCR("oc0", "cpu_clk", 0, 29),
-	MSTPCR("rs0", "bus_clk", 0, 28),
-	MSTPCR("ilmem0", "cpu_clk", 0, 27),
-	MSTPCR("l2c0", "sh_clk", 0, 26),
-	MSTPCR("fpu0", "cpu_clk", 0, 24),
-	MSTPCR("intc0", "peripheral_clk", 0, 22),
-	MSTPCR("dmac0", "bus_clk", 0, 21),
-	MSTPCR("sh0", "sh_clk", 0, 20),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19),
-	MSTPCR("ubc0", "cpu_clk", 0, 17),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("dmac1", "bus_clk", 0, 12),
-	MSTPCR("tmu1", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 9),
-	MSTPCR("scif1", "peripheral_clk", 0, 8),
-	MSTPCR("scif2", "peripheral_clk", 0, 7),
-	MSTPCR("scif3", "bus_clk", 0, 6),
-	MSTPCR("scif4", "bus_clk", 0, 5),
-	MSTPCR("scif5", "bus_clk", 0, 4),
-	MSTPCR("msiof0", "bus_clk", 0, 2),
-	MSTPCR("msiof1", "bus_clk", 0, 1),
-	MSTPCR("keysc0", "r_clk", 1, 12),
-	MSTPCR("rtc0", "r_clk", 1, 11),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("i2c1", "peripheral_clk", 1, 8),
-	MSTPCR("mmc0", "bus_clk", 2, 29),
-	MSTPCR("eth0", "bus_clk", 2, 28),
-	MSTPCR("atapi0", "bus_clk", 2, 26),
-	MSTPCR("tpu0", "bus_clk", 2, 25),
-	MSTPCR("irda0", "peripheral_clk", 2, 24),
-	MSTPCR("tsif0", "bus_clk", 2, 22),
-	MSTPCR("usb1", "bus_clk", 2, 21),
-	MSTPCR("usb0", "bus_clk", 2, 20),
-	MSTPCR("2dg0", "bus_clk", 2, 19),
-	MSTPCR("sdhi0", "bus_clk", 2, 18),
-	MSTPCR("sdhi1", "bus_clk", 2, 17),
-	MSTPCR("veu1", "bus_clk", 2, 15),
-	MSTPCR("ceu1", "bus_clk", 2, 13),
-	MSTPCR("beu1", "bus_clk", 2, 12),
-	MSTPCR("2ddmac0", "sh_clk", 2, 10),
-	MSTPCR("spu0", "bus_clk", 2, 9),
-	MSTPCR("jpu0", "bus_clk", 2, 6),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
+	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
+	MSTPCR("ic0", "cpu_clk", 0, 30, 0),
+	MSTPCR("oc0", "cpu_clk", 0, 29, 0),
+	MSTPCR("rs0", "bus_clk", 0, 28, 0),
+	MSTPCR("ilmem0", "cpu_clk", 0, 27, 0),
+	MSTPCR("l2c0", "sh_clk", 0, 26, 0),
+	MSTPCR("fpu0", "cpu_clk", 0, 24, 0),
+	MSTPCR("intc0", "peripheral_clk", 0, 22, 0),
+	MSTPCR("dmac0", "bus_clk", 0, 21, 0),
+	MSTPCR("sh0", "sh_clk", 0, 20, 0),
+	MSTPCR("hudi0", "peripheral_clk", 0, 19, 0),
+	MSTPCR("ubc0", "cpu_clk", 0, 17, 0),
+	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
+	MSTPCR("cmt0", "r_clk", 0, 14, 0),
+	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
+	MSTPCR("dmac1", "bus_clk", 0, 12, 0),
+	MSTPCR("tmu1", "peripheral_clk", 0, 10, 0),
+	MSTPCR("scif0", "peripheral_clk", 0, 9, 0),
+	MSTPCR("scif1", "peripheral_clk", 0, 8, 0),
+	MSTPCR("scif2", "peripheral_clk", 0, 7, 0),
+	MSTPCR("scif3", "bus_clk", 0, 6, 0),
+	MSTPCR("scif4", "bus_clk", 0, 5, 0),
+	MSTPCR("scif5", "bus_clk", 0, 4, 0),
+	MSTPCR("msiof0", "bus_clk", 0, 2, 0),
+	MSTPCR("msiof1", "bus_clk", 0, 1, 0),
+	MSTPCR("keysc0", "r_clk", 1, 12, 0),
+	MSTPCR("rtc0", "r_clk", 1, 11, 0),
+	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
+	MSTPCR("i2c1", "peripheral_clk", 1, 8, 0),
+	MSTPCR("mmc0", "bus_clk", 2, 29, 0),
+	MSTPCR("eth0", "bus_clk", 2, 28, 0),
+	MSTPCR("atapi0", "bus_clk", 2, 26, 0),
+	MSTPCR("tpu0", "bus_clk", 2, 25, 0),
+	MSTPCR("irda0", "peripheral_clk", 2, 24, 0),
+	MSTPCR("tsif0", "bus_clk", 2, 22, 0),
+	MSTPCR("usb1", "bus_clk", 2, 21, 0),
+	MSTPCR("usb0", "bus_clk", 2, 20, 0),
+	MSTPCR("2dg0", "bus_clk", 2, 19, 0),
+	MSTPCR("sdhi0", "bus_clk", 2, 18, 0),
+	MSTPCR("sdhi1", "bus_clk", 2, 17, 0),
+	MSTPCR("veu1", "bus_clk", 2, 15, CLK_ENABLE_ON_INIT),
+	MSTPCR("ceu1", "bus_clk", 2, 13, 0),
+	MSTPCR("beu1", "bus_clk", 2, 12, 0),
+	MSTPCR("2ddmac0", "sh_clk", 2, 10, 0),
+	MSTPCR("spu0", "bus_clk", 2, 9, 0),
+	MSTPCR("jpu0", "bus_clk", 2, 6, 0),
+	MSTPCR("vou0", "bus_clk", 2, 5, 0),
+	MSTPCR("beu0", "bus_clk", 2, 4, 0),
+	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
+	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
+	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
+	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7343)
-	MSTPCR("uram0", "umem_clk", 0, 28),
-	MSTPCR("xymem0", "bus_clk", 0, 26),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("scif0", "peripheral_clk", 0, 7),
-	MSTPCR("scif1", "peripheral_clk", 0, 6),
-	MSTPCR("scif2", "peripheral_clk", 0, 5),
-	MSTPCR("scif3", "peripheral_clk", 0, 4),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("i2c1", "peripheral_clk", 1, 8),
-	MSTPCR("sdhi0", "peripheral_clk", 2, 18),
-	MSTPCR("keysc0", "r_clk", 2, 14),
-	MSTPCR("usbf0", "peripheral_clk", 2, 11),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("jpu0", "bus_clk", 2, 6),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
+	MSTPCR("uram0", "umem_clk", 0, 28, CLK_ENABLE_ON_INIT),
+	MSTPCR("xymem0", "bus_clk", 0, 26, CLK_ENABLE_ON_INIT),
+	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
+	MSTPCR("cmt0", "r_clk", 0, 14, 0),
+	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
+	MSTPCR("scif0", "peripheral_clk", 0, 7, 0),
+	MSTPCR("scif1", "peripheral_clk", 0, 6, 0),
+	MSTPCR("scif2", "peripheral_clk", 0, 5, 0),
+	MSTPCR("scif3", "peripheral_clk", 0, 4, 0),
+	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
+	MSTPCR("i2c1", "peripheral_clk", 1, 8, 0),
+	MSTPCR("sdhi0", "peripheral_clk", 2, 18, 0),
+	MSTPCR("keysc0", "r_clk", 2, 14, 0),
+	MSTPCR("usbf0", "peripheral_clk", 2, 11, 0),
+	MSTPCR("siu0", "bus_clk", 2, 8, 0),
+	MSTPCR("jpu0", "bus_clk", 2, 6, CLK_ENABLE_ON_INIT),
+	MSTPCR("vou0", "bus_clk", 2, 5, 0),
+	MSTPCR("beu0", "bus_clk", 2, 4, 0),
+	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
+	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
+	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
+	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7366)
 	/* See page 52 of Datasheet V0.40: Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31),
-	MSTPCR("ic0", "cpu_clk", 0, 30),
-	MSTPCR("oc0", "cpu_clk", 0, 29),
-	MSTPCR("rsmem0", "sh_clk", 0, 28),
-	MSTPCR("xymem0", "cpu_clk", 0, 26),
-	MSTPCR("intc30", "peripheral_clk", 0, 23),
-	MSTPCR("intc0", "peripheral_clk", 0, 22),
-	MSTPCR("dmac0", "bus_clk", 0, 21),
-	MSTPCR("sh0", "sh_clk", 0, 20),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19),
-	MSTPCR("ubc0", "cpu_clk", 0, 17),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15),
-	MSTPCR("cmt0", "r_clk", 0, 14),
-	MSTPCR("rwdt0", "r_clk", 0, 13),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10),
-	MSTPCR("scif0", "peripheral_clk", 0, 7),
-	MSTPCR("scif1", "bus_clk", 0, 6),
-	MSTPCR("scif2", "bus_clk", 0, 5),
-	MSTPCR("msiof0", "peripheral_clk", 0, 2),
-	MSTPCR("sbr0", "peripheral_clk", 0, 1),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9),
-	MSTPCR("icb0", "bus_clk", 2, 27),
-	MSTPCR("meram0", "sh_clk", 2, 26),
-	MSTPCR("dacc0", "peripheral_clk", 2, 24),
-	MSTPCR("dacy0", "peripheral_clk", 2, 23),
-	MSTPCR("tsif0", "bus_clk", 2, 22),
-	MSTPCR("sdhi0", "bus_clk", 2, 18),
-	MSTPCR("mmcif0", "bus_clk", 2, 17),
-	MSTPCR("usb0", "bus_clk", 2, 11),
-	MSTPCR("siu0", "bus_clk", 2, 8),
-	MSTPCR("veu1", "bus_clk", 2, 7),
-	MSTPCR("vou0", "bus_clk", 2, 5),
-	MSTPCR("beu0", "bus_clk", 2, 4),
-	MSTPCR("ceu0", "bus_clk", 2, 3),
-	MSTPCR("veu0", "bus_clk", 2, 2),
-	MSTPCR("vpu0", "bus_clk", 2, 1),
-	MSTPCR("lcdc0", "bus_clk", 2, 0),
+	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
+	MSTPCR("ic0", "cpu_clk", 0, 30, 0),
+	MSTPCR("oc0", "cpu_clk", 0, 29, 0),
+	MSTPCR("rsmem0", "sh_clk", 0, 28, CLK_ENABLE_ON_INIT),
+	MSTPCR("xymem0", "cpu_clk", 0, 26, CLK_ENABLE_ON_INIT),
+	MSTPCR("intc30", "peripheral_clk", 0, 23, 0),
+	MSTPCR("intc0", "peripheral_clk", 0, 22, 0),
+	MSTPCR("dmac0", "bus_clk", 0, 21, 0),
+	MSTPCR("sh0", "sh_clk", 0, 20, 0),
+	MSTPCR("hudi0", "peripheral_clk", 0, 19, 0),
+	MSTPCR("ubc0", "cpu_clk", 0, 17, 0),
+	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
+	MSTPCR("cmt0", "r_clk", 0, 14, 0),
+	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
+	MSTPCR("flctl0", "peripheral_clk", 0, 10, 0),
+	MSTPCR("scif0", "peripheral_clk", 0, 7, 0),
+	MSTPCR("scif1", "bus_clk", 0, 6, 0),
+	MSTPCR("scif2", "bus_clk", 0, 5, 0),
+	MSTPCR("msiof0", "peripheral_clk", 0, 2, 0),
+	MSTPCR("sbr0", "peripheral_clk", 0, 1, 0),
+	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
+	MSTPCR("icb0", "bus_clk", 2, 27, 0),
+	MSTPCR("meram0", "sh_clk", 2, 26, 0),
+	MSTPCR("dacc0", "peripheral_clk", 2, 24, 0),
+	MSTPCR("dacy0", "peripheral_clk", 2, 23, 0),
+	MSTPCR("tsif0", "bus_clk", 2, 22, 0),
+	MSTPCR("sdhi0", "bus_clk", 2, 18, 0),
+	MSTPCR("mmcif0", "bus_clk", 2, 17, 0),
+	MSTPCR("usb0", "bus_clk", 2, 11, 0),
+	MSTPCR("siu0", "bus_clk", 2, 8, 0),
+	MSTPCR("veu1", "bus_clk", 2, 7, CLK_ENABLE_ON_INIT),
+	MSTPCR("vou0", "bus_clk", 2, 5, 0),
+	MSTPCR("beu0", "bus_clk", 2, 4, 0),
+	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
+	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
+	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
+	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index b9c361e..42ce5fc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -223,12 +223,6 @@ static struct platform_device *sh7343_devices[] __initdata = {
 
 static int __init sh7343_devices_setup(void)
 {
-	clk_always_enable("uram0"); /* URAM */
-	clk_always_enable("xymem0"); /* XYMEM */
-	clk_always_enable("veu0"); /* VEU */
-	clk_always_enable("vpu0"); /* VPU */
-	clk_always_enable("jpu0"); /* JPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 1 << 20);
 	platform_resource_setup_memory(&veu_device, "veu", 2 << 20);
 	platform_resource_setup_memory(&jpu_device, "jpu", 2 << 20);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index d4a994b..d0172af 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -212,12 +212,6 @@ static struct platform_device *sh7366_devices[] __initdata = {
 
 static int __init sh7366_devices_setup(void)
 {
-	clk_always_enable("rsmem0"); /* RSMEM */
-	clk_always_enable("xymem0"); /* XYMEM */
-	clk_always_enable("veu1"); /* VEU-2 */
-	clk_always_enable("veu0"); /* VEU-1 */
-	clk_always_enable("vpu0"); /* VPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 5d6247f..ea524a2 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -352,12 +352,6 @@ static struct platform_device *sh7722_devices[] __initdata = {
 
 static int __init sh7722_devices_setup(void)
 {
-	clk_always_enable("uram0"); /* URAM */
-	clk_always_enable("xymem0"); /* XYMEM */
-	clk_always_enable("veu0"); /* VEU */
-	clk_always_enable("vpu0"); /* VPU */
-	clk_always_enable("jpu0"); /* JPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 1 << 20);
 	platform_resource_setup_memory(&veu_device, "veu", 2 << 20);
 	platform_resource_setup_memory(&jpu_device, "jpu", 2 << 20);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index 1429fc5..04cb4aa 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -460,11 +460,6 @@ static struct platform_device *sh7723_devices[] __initdata = {
 
 static int __init sh7723_devices_setup(void)
 {
-	clk_always_enable("meram0"); /* MERAM */
-	clk_always_enable("veu1"); /* VEU2H1 */
-	clk_always_enable("veu0"); /* VEU2H0 */
-	clk_always_enable("vpu0"); /* VPU */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index a3039ce..080f541 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -281,10 +281,6 @@ static struct platform_device *sh7724_devices[] __initdata = {
 
 static int __init sh7724_devices_setup(void)
 {
-	clk_always_enable("vpu0");   /* VPU */
-	clk_always_enable("veu1");   /* VEU3F1 */
-	clk_always_enable("veu0");   /* VEU3F0 */
-
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
-- 
cgit v0.10.2


From ae891a4264c91246c0b4c22be68b9838747ae48d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 05:30:10 +0900
Subject: sh: clkfwk: Fix up the clk_enable() error path.

There are a couple of instances where a clk_enable() can fail, which the
SH-Mobile code presently handles, but doesn't get reported all the way
back up. This fixes up the return type so the errors make it all the way
down to the drivers.

Additionally, we now also error out properly if the parent enable fails.
Prep work for aggressively turning off unused clocks on boot.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index e9fced9..5de72ee 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -10,7 +10,7 @@ struct clk;
 
 struct clk_ops {
 	void (*init)(struct clk *clk);
-	void (*enable)(struct clk *clk);
+	int (*enable)(struct clk *clk);
 	void (*disable)(struct clk *clk);
 	unsigned long (*recalc)(struct clk *clk);
 	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index c683be5..e027fe5 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -93,57 +93,78 @@ void propagate_rate(struct clk *tclk)
 	}
 }
 
-static int __clk_enable(struct clk *clk)
+static void __clk_disable(struct clk *clk)
 {
-	if (clk->usecount++ == 0) {
-		if (clk->parent)
-			__clk_enable(clk->parent);
-
-		if (clk->ops && clk->ops->enable)
-			clk->ops->enable(clk);
+	if (clk->usecount == 0) {
+		printk(KERN_ERR "Trying disable clock %s with 0 usecount\n",
+		       clk->name);
+		WARN_ON(1);
+		return;
 	}
 
-	return 0;
+	if (!(--clk->usecount)) {
+		if (likely(clk->ops && clk->ops->disable))
+			clk->ops->disable(clk);
+		if (likely(clk->parent))
+			__clk_disable(clk->parent);
+	}
 }
 
-int clk_enable(struct clk *clk)
+void clk_disable(struct clk *clk)
 {
 	unsigned long flags;
-	int ret;
 
 	if (!clk)
-		return -EINVAL;
+		return;
 
 	spin_lock_irqsave(&clock_lock, flags);
-	ret = __clk_enable(clk);
+	__clk_disable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
-
-	return ret;
 }
-EXPORT_SYMBOL_GPL(clk_enable);
+EXPORT_SYMBOL_GPL(clk_disable);
 
-static void __clk_disable(struct clk *clk)
+static int __clk_enable(struct clk *clk)
 {
-	if (clk->usecount > 0 && !(--clk->usecount)) {
-		if (likely(clk->ops && clk->ops->disable))
-			clk->ops->disable(clk);
-		if (likely(clk->parent))
-			__clk_disable(clk->parent);
+	int ret = 0;
+
+	if (clk->usecount++ == 0) {
+		if (clk->parent) {
+			ret = __clk_enable(clk->parent);
+			if (unlikely(ret))
+				goto err;
+		}
+
+		if (clk->ops && clk->ops->enable) {
+			ret = clk->ops->enable(clk);
+			if (ret) {
+				if (clk->parent)
+					__clk_disable(clk->parent);
+				goto err;
+			}
+		}
 	}
+
+	return ret;
+err:
+	clk->usecount--;
+	return ret;
 }
 
-void clk_disable(struct clk *clk)
+int clk_enable(struct clk *clk)
 {
 	unsigned long flags;
+	int ret;
 
 	if (!clk)
-		return;
+		return -EINVAL;
 
 	spin_lock_irqsave(&clock_lock, flags);
-	__clk_disable(clk);
+	ret = __clk_enable(clk);
 	spin_unlock_irqrestore(&clock_lock, flags);
+
+	return ret;
 }
-EXPORT_SYMBOL_GPL(clk_disable);
+EXPORT_SYMBOL_GPL(clk_enable);
 
 static LIST_HEAD(root_clks);
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index f777d00..1c41db4 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -472,9 +472,9 @@ static int sh7722_siu_start_stop(struct clk *clk, int enable)
 	return 0;
 }
 
-static void sh7722_siu_enable(struct clk *clk)
+static int sh7722_siu_enable(struct clk *clk)
 {
-	sh7722_siu_start_stop(clk, 1);
+	return sh7722_siu_start_stop(clk, 1);
 }
 
 static void sh7722_siu_disable(struct clk *clk)
@@ -491,12 +491,13 @@ static struct clk_ops sh7722_siu_clk_ops = {
 
 #endif /* CONFIG_CPU_SUBTYPE_SH7343 */
 
-static void sh7722_video_enable(struct clk *clk)
+static int sh7722_video_enable(struct clk *clk)
 {
 	unsigned long r;
 
 	r = ctrl_inl(VCLKCR);
 	ctrl_outl( r & ~(1<<8), VCLKCR);
+	return 0;
 }
 
 static void sh7722_video_disable(struct clk *clk)
@@ -630,9 +631,9 @@ static int sh7722_mstpcr_start_stop(struct clk *clk, int enable)
 	return 0;
 }
 
-static void sh7722_mstpcr_enable(struct clk *clk)
+static int sh7722_mstpcr_enable(struct clk *clk)
 {
-	sh7722_mstpcr_start_stop(clk, 1);
+	return sh7722_mstpcr_start_stop(clk, 1);
 }
 
 static void sh7722_mstpcr_disable(struct clk *clk)
-- 
cgit v0.10.2


From aa87aa343f2cd236b5eccd643abd4df918ed5c4f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 05:51:05 +0900
Subject: sh: clkfwk: Improve the generic clk_set_parent() implementation.

This causes the generic clk_set_parent() implementation to be a bit more
intelligent. A clk_reparent() is added to move the clock over to the new
parent's sibling list, which then allows the generic rate propagation
code to succeed. This also becomes a nop if the new and old parents are
unchanged.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 5de72ee..fdb9156 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -48,6 +48,7 @@ int clk_init(void);
 unsigned long followparent_recalc(struct clk *);
 void recalculate_root_clocks(void);
 void propagate_rate(struct clk *);
+int clk_reparent(struct clk *child, struct clk *parent);
 void clk_recalc_rate(struct clk *);
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index e027fe5..e3d1de8 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -81,6 +81,19 @@ unsigned long followparent_recalc(struct clk *clk)
 	return clk->parent->rate;
 }
 
+int clk_reparent(struct clk *child, struct clk *parent)
+{
+	list_del_init(&child->sibling);
+	if (parent)
+		list_add(&child->sibling, &parent->children);
+	child->parent = parent;
+
+	/* now do the debugfs renaming to reattach the child
+	   to the proper parent */
+
+	return 0;
+}
+
 /* Propagate rate to children */
 void propagate_rate(struct clk *tclk)
 {
@@ -288,12 +301,19 @@ int clk_set_parent(struct clk *clk, struct clk *parent)
 
 	if (!parent || !clk)
 		return ret;
+	if (clk->parent == parent)
+		return 0;
 
 	spin_lock_irqsave(&clock_lock, flags);
 	if (clk->usecount == 0) {
 		if (clk->ops->set_parent)
 			ret = clk->ops->set_parent(clk, parent);
+		else
+			ret = clk_reparent(clk, parent);
+
 		if (ret == 0) {
+			pr_debug("clock: set parent of %s to %s (new rate %ld)\n",
+				 clk->name, clk->parent->name, clk->rate);
 			if (clk->ops->recalc)
 				clk->rate = clk->ops->recalc(clk);
 			propagate_rate(clk);
-- 
cgit v0.10.2


From f5c84cf50812c80133e64683d0500b2416d55cb3 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 05:59:27 +0900
Subject: sh: clkfwk: Tidy up on-chip clock registration and rate propagation.

This tidies up the set_rate hack that the on-chip clocks were abusing to
trigger rate propagation, which is now handled generically.

Additionally, now that CLK_ENABLE_ON_INIT is wired up where it needs to
be for these clocks, the clk_enable() can go away. In some cases this was
bumping up the refcount higher than it should have been.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index 0caca9f..435f4f1 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -127,7 +127,7 @@ static int shoc_clk_set_rate(struct clk *clk, unsigned long rate, int algo_id)
 	frqcr3 |= tmp << 6;
 	ctrl_outl(frqcr3, CPG2_FRQCR3);
 
-	return clk->parent->rate / frqcr3_divisors[tmp];
+	clk->rate = clk->parent->rate / frqcr3_divisors[tmp];
 
 	return 0;
 }
@@ -153,28 +153,17 @@ static struct clk *sh4202_onchip_clocks[] = {
 static int __init sh4202_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(sh4202_onchip_clocks); i++) {
 		struct clk *clkp = sh4202_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-
 arch_initcall(sh4202_clk_init);
-
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index 21bd70f..0110da6 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -93,28 +93,17 @@ static struct clk *sh7763_onchip_clocks[] = {
 static int __init sh7763_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(sh7763_onchip_clocks); i++) {
 		struct clk *clkp = sh7763_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-
 arch_initcall(sh7763_clk_init);
-
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index 4c11f89..0a22d50 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -99,28 +99,17 @@ static struct clk *sh7780_onchip_clocks[] = {
 static int __init sh7780_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(sh7780_onchip_clocks); i++) {
 		struct clk *clkp = sh7780_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
-
 arch_initcall(sh7780_clk_init);
-
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index edd4328..4dcd1f6 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -137,26 +137,17 @@ static struct clk *sh7785_onchip_clocks[] = {
 static int __init sh7785_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(sh7785_onchip_clocks); i++) {
 		struct clk *clkp = sh7785_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
 arch_initcall(sh7785_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index 2825494..825556f 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -123,26 +123,17 @@ static struct clk *sh7786_onchip_clocks[] = {
 static int __init sh7786_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(sh7786_onchip_clocks); i++) {
 		struct clk *clkp = sh7786_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
 arch_initcall(sh7786_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index 6e5c864..1eb149b 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -110,26 +110,17 @@ static struct clk *shx3_onchip_clocks[] = {
 static int __init shx3_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(shx3_onchip_clocks); i++) {
 		struct clk *clkp = shx3_onchip_clocks[i];
 
 		clkp->parent = clk;
-		clk_register(clkp);
-		clk_enable(clkp);
+		ret |= clk_register(clkp);
 	}
 
-	/*
-	 * Now that we have the rest of the clocks registered, we need to
-	 * force the parent clock to propagate so that these clocks will
-	 * automatically figure out their rate. We cheat by handing the
-	 * parent clock its current rate and forcing child propagation.
-	 */
-	clk_set_rate(clk, clk_get_rate(clk));
-
 	clk_put(clk);
 
-	return 0;
+	return ret;
 }
 arch_initcall(shx3_clk_init);
-- 
cgit v0.10.2


From 007e8363b656768fe3f59c180824ff704680bb25 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 06:05:32 +0900
Subject: sh: clkfwk: Kill off clk_recalc_rate().

The only user for this is the SH-Mobile r_clk, which is now added as a
root clock and can be kicked via propagate_rate() as usual. Given that,
there is no longer any need for the special clk_recalc_rate(), so we kill
it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index fdb9156..6a2f463 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -49,7 +49,6 @@ unsigned long followparent_recalc(struct clk *);
 void recalculate_root_clocks(void);
 void propagate_rate(struct clk *);
 int clk_reparent(struct clk *child, struct clk *parent);
-void clk_recalc_rate(struct clk *);
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
 
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index e3d1de8..7a356de9 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -280,20 +280,6 @@ int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
 }
 EXPORT_SYMBOL_GPL(clk_set_rate_ex);
 
-void clk_recalc_rate(struct clk *clk)
-{
-	unsigned long flags;
-
-	if (!clk->ops->recalc)
-		return;
-
-	spin_lock_irqsave(&clock_lock, flags);
-	clk->rate = clk->ops->recalc(clk);
-	propagate_rate(clk);
-	spin_unlock_irqrestore(&clock_lock, flags);
-}
-EXPORT_SYMBOL_GPL(clk_recalc_rate);
-
 int clk_set_parent(struct clk *clk, struct clk *parent)
 {
 	unsigned long flags;
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 1c41db4..426065b 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -912,7 +912,7 @@ int __init arch_clk_init(void)
 		clk_put(clk);
 	}
 
-	clk_recalc_rate(&sh7722_r_clock); /* make sure rate gets propagated */
+	propagate_rate(&sh7722_r_clock); /* make sure rate gets propagated */
 
 	return 0;
 }
-- 
cgit v0.10.2


From 31cb31f76e030ae05ed45f409ce5eb68ef2987f6 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 11 May 2009 21:57:08 +0200
Subject: ASoC: remove driver_data direct access of struct device

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c
index 510efa6..e4547de 100644
--- a/sound/soc/codecs/wm8400.c
+++ b/sound/soc/codecs/wm8400.c
@@ -1473,8 +1473,8 @@ static int wm8400_codec_probe(struct platform_device *dev)
 
 	codec = &priv->codec;
 	codec->private_data = priv;
-	codec->control_data = dev->dev.driver_data;
-	priv->wm8400 = dev->dev.driver_data;
+	codec->control_data = dev_get_drvdata(&dev->dev);
+	priv->wm8400 = dev_get_drvdata(&dev->dev);
 
 	ret = regulator_bulk_get(priv->wm8400->dev,
 				 ARRAY_SIZE(power), &power[0]);
diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
index e043e3f..7a20587 100644
--- a/sound/soc/codecs/wm8731.c
+++ b/sound/soc/codecs/wm8731.c
@@ -666,14 +666,14 @@ static int __devinit wm8731_spi_probe(struct spi_device *spi)
 	codec->hw_write = (hw_write_t)wm8731_spi_write;
 	codec->dev = &spi->dev;
 
-	spi->dev.driver_data = wm8731;
+	dev_set_drvdata(&spi->dev, wm8731);
 
 	return wm8731_register(wm8731);
 }
 
 static int __devexit wm8731_spi_remove(struct spi_device *spi)
 {
-	struct wm8731_priv *wm8731 = spi->dev.driver_data;
+	struct wm8731_priv *wm8731 = dev_get_drvdata(&spi->dev);
 
 	wm8731_unregister(wm8731);
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index a6e8f3f..d121e58 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -1822,14 +1822,14 @@ static int __devinit wm8753_spi_probe(struct spi_device *spi)
 	codec->hw_write = (hw_write_t)wm8753_spi_write;
 	codec->dev = &spi->dev;
 
-	spi->dev.driver_data = wm8753;
+	dev_set_drvdata(&spi->dev, wm8753);
 
 	return wm8753_register(wm8753);
 }
 
 static int __devexit wm8753_spi_remove(struct spi_device *spi)
 {
-	struct wm8753_priv *wm8753 = spi->dev.driver_data;
+	struct wm8753_priv *wm8753 = dev_get_drvdata(&spi->dev);
 	wm8753_unregister(wm8753);
 	return 0;
 }
-- 
cgit v0.10.2


From 0dae89572cbcd5f676ea52a9448d9639d97a53d6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 06:18:09 +0900
Subject: sh: clkfwk: Wire up clk_get_sys() support.

This stubs in clk_get_sys() from the ARM clkdev implementation.
Tentatively conver the clk_get() lookup code to use this, and once the
rest of the in-tree users are happy with this, it can replace the
fallback lookups.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 6a2f463..e2f5bf1 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -37,6 +37,13 @@ struct clk {
 	unsigned long		arch_flags;
 };
 
+struct clk_lookup {
+	struct list_head	node;
+	const char		*dev_id;
+	const char		*con_id;
+	struct clk		*clk;
+};
+
 #define CLK_ENABLE_ON_INIT	(1 << 0)
 
 /* Should be defined by processor-specific code */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 7a356de9..34707b8 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -10,6 +10,10 @@
  *
  *  Modified for omap shared clock framework by Tony Lindgren <tony@atomide.com>
  *
+ *  With clkdev bits:
+ *
+ *	Copyright (C) 2008 Russell King.
+ *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -335,14 +339,69 @@ long clk_round_rate(struct clk *clk, unsigned long rate)
 EXPORT_SYMBOL_GPL(clk_round_rate);
 
 /*
+ * Find the correct struct clk for the device and connection ID.
+ * We do slightly fuzzy matching here:
+ *  An entry with a NULL ID is assumed to be a wildcard.
+ *  If an entry has a device ID, it must match
+ *  If an entry has a connection ID, it must match
+ * Then we take the most specific entry - with the following
+ * order of precidence: dev+con > dev only > con only.
+ */
+static struct clk *clk_find(const char *dev_id, const char *con_id)
+{
+	struct clk_lookup *p;
+	struct clk *clk = NULL;
+	int match, best = 0;
+
+	list_for_each_entry(p, &clock_list, node) {
+		match = 0;
+		if (p->dev_id) {
+			if (!dev_id || strcmp(p->dev_id, dev_id))
+				continue;
+			match += 2;
+		}
+		if (p->con_id) {
+			if (!con_id || strcmp(p->con_id, con_id))
+				continue;
+			match += 1;
+		}
+		if (match == 0)
+			continue;
+
+		if (match > best) {
+			clk = p->clk;
+			best = match;
+		}
+	}
+	return clk;
+}
+
+struct clk *clk_get_sys(const char *dev_id, const char *con_id)
+{
+	struct clk *clk;
+
+	mutex_lock(&clock_list_sem);
+	clk = clk_find(dev_id, con_id);
+	mutex_unlock(&clock_list_sem);
+
+	return clk ? clk : ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(clk_get_sys);
+
+/*
  * Returns a clock. Note that we first try to use device id on the bus
  * and clock name. If this fails, we try to use clock name only.
  */
 struct clk *clk_get(struct device *dev, const char *id)
 {
+	const char *dev_id = dev ? dev_name(dev) : NULL;
 	struct clk *p, *clk = ERR_PTR(-ENOENT);
 	int idno;
 
+	clk = clk_get_sys(dev_id, id);
+	if (clk)
+		return clk;
+
 	if (dev == NULL || dev->bus != &platform_bus_type)
 		idno = -1;
 	else
-- 
cgit v0.10.2


From 77d1a4999502c260df0eb2de437d320bf8c64b36 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 14:21:12 -0700
Subject: x86, boot: make symbols from the main vmlinux available

Make symbols from the main vmlinux, as opposed to just
compressed/vmlinux, available to header.S.  Also, export a few
additional symbols.

This will be used in a subsequent patch to export the total memory
footprint of the kernel.

[ Impact: enable future enhancement ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 6633b6e..75e0301 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -86,19 +86,27 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
-sed-offsets := -e 's/^00*/0/' \
-        -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
 
-quiet_cmd_offsets = OFFSETS $@
-      cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@
+quiet_cmd_voffset = VOFFSET $@
+      cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
 
-$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE
-	$(call if_changed,offsets)
+targets += voffset.h
+$(obj)/voffset.h: vmlinux FORCE
+	$(call if_changed,voffset)
+
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+
+quiet_cmd_zoffset = ZOFFSET $@
+      cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
+
+targets += zoffset.h
+$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,zoffset)
 
-targets += offsets.h
 
 AFLAGS_header.o += -I$(obj)
-$(obj)/header.o: $(obj)/offsets.h
+$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
 
 LDFLAGS_setup.elf	:= -T
 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 5d84d1c..2728514 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -22,7 +22,8 @@
 #include <asm/page_types.h>
 #include <asm/setup.h>
 #include "boot.h"
-#include "offsets.h"
+#include "voffset.h"
+#include "zoffset.h"
 
 BOOTSEG		= 0x07C0		/* original address of boot-sector */
 SYSSEG		= 0x1000		/* historical load address >> 4 */
@@ -212,8 +213,8 @@ hardware_subarch:	.long 0			# subarchitecture, added with 2.07
 
 hardware_subarch_data:	.quad 0
 
-payload_offset:		.long input_data
-payload_length:		.long input_data_end-input_data
+payload_offset:		.long ZO_input_data
+payload_length:		.long ZO_z_input_len
 
 setup_data:		.quad 0			# 64-bit physical pointer to
 						# single linked list of
-- 
cgit v0.10.2


From 40b387a8a9a821878ecdf9fb117958c426fc1385 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 14:41:55 -0700
Subject: x86, boot: use LOAD_PHYSICAL_ADDR on 64 bits

Use LOAD_PHYSICAL_ADDR instead of CONFIG_PHYSICAL_START in the 64-bit
decompression code, for equivalence with the 32-bit code.

[ Impact: cleanup, increases code similarity ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 2b9f251..4135d43 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -87,7 +87,7 @@ ENTRY(startup_32)
 	addl	$(PMD_PAGE_SIZE -1), %ebx
 	andl	$PMD_PAGE_MASK, %ebx
 #else
-	movl	$CONFIG_PHYSICAL_START, %ebx
+	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
 	/* Target address to relocate to for decompression */
@@ -215,7 +215,7 @@ ENTRY(startup_64)
 	 *
 	 * If it is a relocatable kernel then decompress and run the kernel
 	 * from load address aligned to 2MB addr, otherwise decompress and
-	 * run the kernel from CONFIG_PHYSICAL_START
+	 * run the kernel from LOAD_PHYSICAL_ADDR
 	 *
 	 * We cannot rely on the calculation done in 32-bit mode, since we
 	 * may have been invoked via the 64-bit entry point.
@@ -228,7 +228,7 @@ ENTRY(startup_64)
 	andq	$PMD_PAGE_MASK, %rbp
 	movq	%rbp, %rbx
 #else
-	movq	$CONFIG_PHYSICAL_START, %rbp
+	movq	$LOAD_PHYSICAL_ADDR, %rbp
 	movq	%rbp, %rbx
 #endif
 
-- 
cgit v0.10.2


From 2ff799d3cff1ecb274049378b28120ee5c1c5e5f Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Mon, 11 May 2009 20:09:14 +0530
Subject: sched: Don't export sched_mc_power_savings on multi-socket single
 core system

Fix to prevent sched_mc_power_saving from being exported through sysfs
for multi-scoket single core system. Max cores should be always greater than
one (1). My earlier patch that introduced fix for not exporting
'sched_mc_power_saving' on laptops  broke it on multi-socket single core
system. This fix addresses issue on both laptop and multi-socket single
core system.
Below are the Test results:

1. Single socket - multi-core
       Before Patch: Does not export 'sched_mc_power_saving'
       After Patch: Does not export 'sched_mc_power_saving'
       Result: Pass

2. Multi Socket - single core
      Before Patch: exports 'sched_mc_power_saving'
      After Patch: Does not export 'sched_mc_power_saving'
      Result: Pass

3. Multi Socket - Multi core
      Before Patch: exports 'sched_mc_power_saving'
      After Patch: exports 'sched_mc_power_saving'

[ Impact: make the sched_mc_power_saving control available more consistently ]

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Suresh B Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090511143914.GB4853@dirshya.in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index f44b49a..066ef59 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -203,7 +203,8 @@ struct pci_bus;
 void x86_pci_root_bus_res_quirks(struct pci_bus *b);
 
 #ifdef CONFIG_SMP
-#define mc_capable()	(cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
+#define mc_capable()	((boot_cpu_data.x86_max_cores > 1) && \
+			(cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids))
 #define smt_capable()			(smp_num_siblings > 1)
 #endif
 
-- 
cgit v0.10.2


From 7b6c6c77732ca1d2498eda7eabb64f9648896e96 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Mon, 11 May 2009 17:03:00 -0400
Subject: x86, 32-bit: fix kernel_trap_sp()

Use &regs->sp instead of regs for getting the top of stack in kernel mode.
(on x86-64, regs->sp always points the top of stack)

[ Impact: Oprofile decodes only stack for backtracing on i386 ]

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
[ v2: rename the API to kernel_stack_pointer(), move variable inside ]
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: systemtap@sources.redhat.com
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Jan Blunck <jblunck@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
LKML-Reference: <20090511210300.17332.67549.stgit@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index e304b66..624f133 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -187,14 +187,15 @@ static inline int v8086_mode(struct pt_regs *regs)
 
 /*
  * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
- * when it traps.  So regs will be the current sp.
+ * when it traps.  The previous stack will be directly underneath the saved
+ * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
  *
  * This is valid only for kernel mode traps.
  */
-static inline unsigned long kernel_trap_sp(struct pt_regs *regs)
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
-	return (unsigned long)regs;
+	return (unsigned long)(&regs->sp);
 #else
 	return regs->sp;
 #endif
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 04df67f..044897b 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -76,9 +76,9 @@ void
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	struct frame_head *head = (struct frame_head *)frame_pointer(regs);
-	unsigned long stack = kernel_trap_sp(regs);
 
 	if (!user_mode_vm(regs)) {
+		unsigned long stack = kernel_stack_pointer(regs);
 		if (depth)
 			dump_trace(NULL, regs, (unsigned long *)stack, 0,
 				   &backtrace_ops, &depth);
-- 
cgit v0.10.2


From 99aa45595f45603526513d5e29fc00f8afbf3913 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 16:02:10 -0700
Subject: x86, boot: remove dead code from boot/compressed/head_*.S

Remove a couple of lines of dead code from
arch/x86/boot/compressed/head_*.S; all of these update registers that
are dead in the current code.

[ Impact: cleanup ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 470474b..2b8e0df 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -100,16 +100,6 @@ ENTRY(startup_32)
 	popl	%esi
 
 /*
- * Compute the kernel start address.
- */
-#ifdef CONFIG_RELOCATABLE
-	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
-	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
-#else
-	movl	$LOAD_PHYSICAL_ADDR, %ebp
-#endif
-
-/*
  * Jump to the relocated address.
  */
 	leal	relocated(%ebx), %eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 4135d43..2bb500a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -226,10 +226,8 @@ ENTRY(startup_64)
 	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
 	addq	$(PMD_PAGE_SIZE - 1), %rbp
 	andq	$PMD_PAGE_MASK, %rbp
-	movq	%rbp, %rbx
 #else
 	movq	$LOAD_PHYSICAL_ADDR, %rbp
-	movq	%rbp, %rbx
 #endif
 
 	/* Target address to relocate to for decompression */
-- 
cgit v0.10.2


From 37ba7ab5e33cebc25c68fffe33e9f21e7c2014e8 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 15:56:08 -0700
Subject: x86, boot: make kernel_alignment adjustable; new bzImage fields

Make the kernel_alignment field adjustable; this allows us to set it
to a large value (intended to be 16 MB to avoid ZONE_DMA contention,
memory holes and other weirdness) while a smart bootloader can still
force a loading at a lesser alignment if absolutely necessary.

Also export pref_address (preferred loading address, corresponding to
the link-time address) and init_size, the total amount of linear
memory the kernel will require during initialization.

[ Impact: allows better kernel placement, gives bootloader more info ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 2b8e0df..75e4f00 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -69,8 +69,11 @@ ENTRY(startup_32)
 
 #ifdef CONFIG_RELOCATABLE
 	movl	%ebp, %ebx
-	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
-	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
+	movl	BP_kernel_alignment(%esi), %eax
+	decl	%eax
+	addl    %eax, %ebx
+	notl	%eax
+	andl    %eax, %ebx
 #else
 	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 2bb500a..f62c284 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -84,8 +84,11 @@ ENTRY(startup_32)
 
 #ifdef CONFIG_RELOCATABLE
 	movl	%ebp, %ebx
-	addl	$(PMD_PAGE_SIZE -1), %ebx
-	andl	$PMD_PAGE_MASK, %ebx
+	movl	BP_kernel_alignment(%esi), %eax
+	decl	%eax
+	addl	%eax, %ebx
+	notl	%eax
+	andl	%eax, %ebx
 #else
 	movl	$LOAD_PHYSICAL_ADDR, %ebx
 #endif
@@ -224,8 +227,11 @@ ENTRY(startup_64)
 	/* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
 	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
-	addq	$(PMD_PAGE_SIZE - 1), %rbp
-	andq	$PMD_PAGE_MASK, %rbp
+	movl	BP_kernel_alignment(%rsi), %eax
+	decl	%eax
+	addq	%rax, %rbp
+	notq	%rax
+	andq	%rax, %rbp
 #else
 	movq	$LOAD_PHYSICAL_ADDR, %rbp
 #endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2728514..a0b4269 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -116,7 +116,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x0209		# header version number (>= 0x0105)
+		.word	0x020a		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -201,7 +201,7 @@ relocatable_kernel:    .byte 1
 #else
 relocatable_kernel:    .byte 0
 #endif
-pad2:			.byte 0
+min_alignment:		.byte MIN_KERNEL_ALIGN_LG2	# minimum alignment
 pad3:			.word 0
 
 cmdline_size:   .long   COMMAND_LINE_SIZE-1     #length of the command line,
@@ -220,6 +220,17 @@ setup_data:		.quad 0			# 64-bit physical pointer to
 						# single linked list of
 						# struct setup_data
 
+pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
+
+#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_extract_offset)
+#define VO_INIT_SIZE	(VO__end - VO__text)
+#if ZO_INIT_SIZE > VO_INIT_SIZE
+#define INIT_SIZE ZO_INIT_SIZE
+#else
+#define INIT_SIZE VO_INIT_SIZE
+#endif
+init_size:		.long INIT_SIZE		# kernel initialization size
+
 # End of setup header #####################################################
 
 	.section ".inittext", "ax"
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6ba23dd..418e632 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,11 +8,26 @@
 
 #ifdef __KERNEL__
 
+#include <asm/page_types.h>
+
 /* Physical address where kernel should be loaded. */
 #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
 				+ (CONFIG_PHYSICAL_ALIGN - 1)) \
 				& ~(CONFIG_PHYSICAL_ALIGN - 1))
 
+/* Minimum kernel alignment, as a power of two */
+#ifdef CONFIG_x86_64
+#define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
+#else
+#define MIN_KERNEL_ALIGN_LG2	(PAGE_SHIFT+1)
+#endif
+#define MIN_KERNEL_ALIGN	(_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
+
+#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
+	(CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2))
+#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
+#endif
+
 #ifdef CONFIG_KERNEL_BZIP2
 #define BOOT_HEAP_SIZE             0x400000
 #else /* !CONFIG_KERNEL_BZIP2 */
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 5a6aa1c..1a830cb 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -146,4 +146,5 @@ void foo(void)
 	OFFSET(BP_loadflags, boot_params, hdr.loadflags);
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
+	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 }
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index e72f062..898ecc4 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -125,6 +125,7 @@ int main(void)
 	OFFSET(BP_loadflags, boot_params, hdr.loadflags);
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
+	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 
 	BLANK();
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-- 
cgit v0.10.2


From d297366ba692faf1f0384811a6ff0b20c3470b1b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 16:06:23 -0700
Subject: x86: document new bzImage fields

Document the new bzImage fields for kernel memory placement.

[ Impact: adds documentation ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index e020366..cf8dfc7 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -50,6 +50,11 @@ Protocol 2.08:	(Kernel 2.6.26) Added crc32 checksum and ELF format
 Protocol 2.09:	(Kernel 2.6.26) Added a field of 64-bit physical
 		pointer to single linked list of struct	setup_data.
 
+Protocol 2.10:	(Kernel 2.6.31) A protocol for relaxed alignment
+		beyond the kernel_alignment added, new init_size and
+		pref_address fields.
+
+
 **** MEMORY LAYOUT
 
 The traditional memory map for the kernel loader, used for Image or
@@ -173,7 +178,7 @@ Offset	Proto	Name		Meaning
 022C/4	2.03+	ramdisk_max	Highest legal initrd address
 0230/4	2.05+	kernel_alignment Physical addr alignment required for kernel
 0234/1	2.05+	relocatable_kernel Whether kernel is relocatable or not
-0235/1	N/A	pad2		Unused
+0235/1	2.10+	min_alignment	Minimum alignment, as a power of two
 0236/2	N/A	pad3		Unused
 0238/4	2.06+	cmdline_size	Maximum size of the kernel command line
 023C/4	2.07+	hardware_subarch Hardware subarchitecture
@@ -182,6 +187,8 @@ Offset	Proto	Name		Meaning
 024C/4	2.08+	payload_length	Length of kernel payload
 0250/8	2.09+	setup_data	64-bit physical pointer to linked list
 				of struct setup_data
+0258/8	2.10+	pref_address	Preferred loading address
+0260/4	2.10+	init_size	Linear memory required during initialization
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -482,11 +489,19 @@ Protocol:	2.03+
   0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
 
 Field name:	kernel_alignment
-Type:		read (reloc)
+Type:		read/modify (reloc)
 Offset/size:	0x230/4
-Protocol:	2.05+
+Protocol:	2.05+ (read), 2.10+ (modify)
+
+  Alignment unit required by the kernel (if relocatable_kernel is
+  true.)  A relocatable kernel that is loaded at an alignment
+  incompatible with the value in this field will be realigned during
+  kernel initialization.
 
-  Alignment unit required by the kernel (if relocatable_kernel is true.)
+  Starting with protocol version 2.10, this reflects the kernel
+  alignment preferred for optimal performance; it is possible for the
+  loader to modify this field to permit a lesser alignment.  See the
+  min_alignment and pref_address field below.
 
 Field name:	relocatable_kernel
 Type:		read (reloc)
@@ -498,6 +513,22 @@ Protocol:	2.05+
   After loading, the boot loader must set the code32_start field to
   point to the loaded code, or to a boot loader hook.
 
+Field name:	min_alignment
+Type:		read (reloc)
+Offset/size:	0x235/1
+Protocol:	2.10+
+
+  This field, if nonzero, indicates as a power of two the minimum
+  alignment required, as opposed to preferred, by the kernel to boot.
+  If a boot loader makes use of this field, it should update the
+  kernel_alignment field with the alignment unit desired; typically:
+
+	kernel_alignment = 1 << min_alignment
+
+  There may be a considerable performance cost with an excessively
+  misaligned kernel.  Therefore, a loader should typically try each
+  power-of-two alignment from kernel_alignment down to this alignment.
+
 Field name:	cmdline_size
 Type:		read
 Offset/size:	0x238/4
@@ -582,6 +613,36 @@ Protocol:	2.09+
   sure to consider the case where the linked list already contains
   entries.
 
+Field name:	pref_address
+Type:		read (reloc)
+Offset/size:	0x258/8
+Protocol:	2.10+
+
+  This field, if nonzero, represents a preferred load address for the
+  kernel.  A relocating bootloader should attempt to load at this
+  address if possible.
+
+  A non-relocatable kernel will unconditionally move itself and to run
+  at this address.
+
+Field name:	init_size
+Type:		read
+Offset/size:	0x25c/4
+
+  This field indicates the amount of linear contiguous memory starting
+  at the kernel runtime start address that the kernel needs before it
+  is capable of examining its memory map.  This is not the same thing
+  as the total amount of memory the kernel needs to boot, but it can
+  be used by a relocating boot loader to help select a safe load
+  address for the kernel.
+
+  The kernel runtime start address is determined by the following algorithm:
+
+  if (relocatable_kernel)
+	runtime_start = align_up(load_address, kernel_alignment)
+  else
+	runtime_start = pref_address
+
 
 **** THE IMAGE CHECKSUM
 
-- 
cgit v0.10.2


From ceefccc93932b920a8ec6f35f596db05202a12fe Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 16:12:16 -0700
Subject: x86: default CONFIG_PHYSICAL_START and CONFIG_PHYSICAL_ALIGN to 16 MB

Default CONFIG_PHYSICAL_START and CONFIG_PHYSICAL_ALIGN each to 16 MB,
so that both non-relocatable and relocatable kernels are loaded at
16 MB by a non-relocating bootloader.  This is somewhat hacky, but it
appears to be the only way to do this that does not break some some
set of existing bootloaders.

We want to avoid the bottom 16 MB because of large page breakup,
memory holes, and ZONE_DMA.  Embedded systems may need to reduce this,
or update their bootloaders to be aware of the new min_alignment field.

[ Impact: performance improvement, avoids problems on some systems ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5aee453..50fbb47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1455,9 +1455,7 @@ config KEXEC_JUMP
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
-	default "0x1000000" if X86_NUMAQ
-	default "0x200000" if X86_64
-	default "0x100000"
+	default "0x1000000"
 	---help---
 	  This gives the physical address where the kernel is loaded.
 
@@ -1476,15 +1474,15 @@ config PHYSICAL_START
 	  to be specifically compiled to run from a specific memory area
 	  (normally a reserved region) and this option comes handy.
 
-	  So if you are using bzImage for capturing the crash dump, leave
-	  the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
-	  Otherwise if you plan to use vmlinux for capturing the crash dump
-	  change this value to start of the reserved region (Typically 16MB
-	  0x1000000). In other words, it can be set based on the "X" value as
-	  specified in the "crashkernel=YM@XM" command line boot parameter
-	  passed to the panic-ed kernel. Typically this parameter is set as
-	  crashkernel=64M@16M. Please take a look at
-	  Documentation/kdump/kdump.txt for more details about crash dumps.
+	  So if you are using bzImage for capturing the crash dump,
+	  leave the value here unchanged to 0x1000000 and set
+	  CONFIG_RELOCATABLE=y.  Otherwise if you plan to use vmlinux
+	  for capturing the crash dump change this value to start of
+	  the reserved region.  In other words, it can be set based on
+	  the "X" value as specified in the "crashkernel=YM@XM"
+	  command line boot parameter passed to the panic-ed
+	  kernel. Please take a look at Documentation/kdump/kdump.txt
+	  for more details about crash dumps.
 
 	  Usage of bzImage for capturing the crash dump is recommended as
 	  one does not have to build two kernels. Same kernel can be used
@@ -1521,9 +1519,8 @@ config X86_NEED_RELOCS
 config PHYSICAL_ALIGN
 	hex
 	prompt "Alignment value to which kernel should be aligned" if X86_32
-	default "0x100000" if X86_32
-	default "0x200000" if X86_64
-	range 0x2000 0x400000
+	default "0x1000000"
+	range 0x2000 0x1000000
 	---help---
 	  This value puts the alignment restrictions on physical address
 	  where kernel is loaded and run from. Kernel is compiled for an
-- 
cgit v0.10.2


From 26717808f93a27c22d4853c4fb17fa225f4ccc68 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 7 May 2009 14:19:34 -0700
Subject: x86: make CONFIG_RELOCATABLE the default

Remove the EXPERIMENTAL tag from CONFIG_RELOCATABLE and make it the
default.  Relocatable kernels have been used for a while now, and
should now have identical semantics to non-relocatable kernels when
loaded by a non-relocating bootloader.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50fbb47..3e0f80a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1495,8 +1495,8 @@ config PHYSICAL_START
 	  Don't change this unless you know what you are doing.
 
 config RELOCATABLE
-	bool "Build a relocatable kernel (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "Build a relocatable kernel"
+	default y
 	---help---
 	  This builds a kernel image that retains relocation information
 	  so it can be loaded someplace besides the default 1MB.
-- 
cgit v0.10.2


From c4a994645d04d5fa6bfa52a3204af87dd92168d5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 16:20:12 -0700
Subject: x86, defconfig: update to current, no material changes

Update defconfigs to reflect current configuration files.  No other
changes.

[ Impact: updates defconfigs to match what "make defconfig" generates ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 235b81d..70036a7 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,12 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:50:58 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:19:47 2009
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
 # CONFIG_X86_64 is not set
 CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf32-i386"
 CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
@@ -33,6 +34,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
 CONFIG_ARCH_HAS_DEFAULT_IDLE=y
 CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -40,15 +42,16 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 # CONFIG_AUDIT_ARCH is not set
 CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_X86_32_SMP=y
 CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
+CONFIG_X86_32_LAZY_GS=y
 CONFIG_KTIME_SCALAR=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
@@ -60,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_TASKSTATS=y
@@ -113,23 +123,26 @@ CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -139,6 +152,7 @@ CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -154,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -167,7 +183,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -194,12 +209,12 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
+# CONFIG_X86_BIGSMP is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
 # CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_VSMP is not set
 # CONFIG_X86_RDC321X is not set
+# CONFIG_X86_32_NON_STANDARD is not set
 CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_PARAVIRT_GUEST is not set
 # CONFIG_MEMTEST is not set
@@ -230,8 +245,10 @@ CONFIG_M686=y
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CPU=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
 CONFIG_X86_XADD=y
 # CONFIG_X86_PPRO_FENCE is not set
 CONFIG_X86_WP_WORKS_OK=y
@@ -247,7 +264,7 @@ CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_CPU_SUP_INTEL=y
 CONFIG_CPU_SUP_CYRIX_32=y
 CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_32=y
+CONFIG_CPU_SUP_CENTAUR=y
 CONFIG_CPU_SUP_TRANSMETA_32=y
 CONFIG_CPU_SUP_UMC_32=y
 CONFIG_X86_DS=y
@@ -279,6 +296,7 @@ CONFIG_MICROCODE_AMD=y
 CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
 # CONFIG_NOHIGHMEM is not set
 CONFIG_HIGHMEM4G=y
 # CONFIG_HIGHMEM64G is not set
@@ -302,6 +320,8 @@ CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_HIGHPTE=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
@@ -312,6 +332,7 @@ CONFIG_MTRR=y
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
@@ -363,7 +384,6 @@ CONFIG_ACPI_THERMAL=y
 CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
 # CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_ACPI_CONTAINER=y
 # CONFIG_ACPI_SBS is not set
@@ -425,6 +445,7 @@ CONFIG_PCI_BIOS=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_MMCONFIG=y
 CONFIG_PCI_DOMAINS=y
+# CONFIG_DMAR is not set
 CONFIG_PCIEPORTBUS=y
 # CONFIG_HOTPLUG_PCI_PCIE is not set
 CONFIG_PCIEAER=y
@@ -435,6 +456,7 @@ CONFIG_PCI_MSI=y
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
 CONFIG_ISA_DMA_API=y
 # CONFIG_ISA is not set
 # CONFIG_MCA is not set
@@ -481,7 +503,6 @@ CONFIG_NET=y
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -639,6 +660,7 @@ CONFIG_LLC=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 CONFIG_NET_SCHED=y
 
 #
@@ -696,6 +718,7 @@ CONFIG_NET_SCH_FIFO=y
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_HAMRADIO=y
 
 #
@@ -706,12 +729,10 @@ CONFIG_HAMRADIO=y
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=y
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
@@ -789,6 +810,7 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_ISL29003 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -842,6 +864,7 @@ CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
 CONFIG_ATA_ACPI=y
@@ -940,6 +963,7 @@ CONFIG_DM_ZERO=y
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_IFB is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
@@ -977,6 +1001,8 @@ CONFIG_MII=y
 CONFIG_NET_VENDOR_3COM=y
 # CONFIG_VORTEX is not set
 # CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 CONFIG_NET_TULIP=y
 # CONFIG_DE2104X is not set
 # CONFIG_TULIP is not set
@@ -1026,6 +1052,7 @@ CONFIG_E1000=y
 CONFIG_E1000E=y
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1040,6 +1067,7 @@ CONFIG_BNX2=y
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1077,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1058,6 +1087,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 CONFIG_TR=y
 # CONFIG_IBMOL is not set
 # CONFIG_IBMLS is not set
@@ -1073,8 +1103,8 @@ CONFIG_WLAN_80211=y
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 # CONFIG_PRISM54 is not set
@@ -1084,21 +1114,21 @@ CONFIG_WLAN_80211=y
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 # CONFIG_P54_COMMON is not set
 CONFIG_ATH5K=y
 # CONFIG_ATH5K_DEBUG is not set
 # CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_IPW2100 is not set
 # CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1209,6 +1239,8 @@ CONFIG_INPUT_TABLET=y
 # CONFIG_TABLET_USB_KBTAB is not set
 # CONFIG_TABLET_USB_WACOM is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -1303,6 +1335,7 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 CONFIG_HW_RANDOM_INTEL=y
 CONFIG_HW_RANDOM_AMD=y
 CONFIG_HW_RANDOM_GEODE=y
@@ -1390,7 +1423,6 @@ CONFIG_I2C_I801=y
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1424,6 +1456,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_K8TEMP is not set
 # CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_I5K_AMB is not set
@@ -1433,6 +1466,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_FSCHER is not set
 # CONFIG_SENSORS_FSCPOS is not set
 # CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_CORETEMP is not set
@@ -1448,11 +1482,14 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
 # CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
 # CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
 # CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1643,7 +1680,6 @@ CONFIG_FB_EFI=y
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
 # CONFIG_FB_VT8623 is not set
-# CONFIG_FB_CYBLA is not set
 # CONFIG_FB_TRIDENT is not set
 # CONFIG_FB_ARK is not set
 # CONFIG_FB_PM3 is not set
@@ -1652,6 +1688,7 @@ CONFIG_FB_EFI=y
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1738,6 +1775,8 @@ CONFIG_SND_PCI=y
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1811,15 +1850,17 @@ CONFIG_USB_HIDDEV=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 CONFIG_LOGITECH_FF=y
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1885,11 +1926,11 @@ CONFIG_USB_PRINTER=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1931,7 +1972,6 @@ CONFIG_USB_LIBUSUAL=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1947,6 +1987,7 @@ CONFIG_USB_LIBUSUAL=y
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1958,8 +1999,10 @@ CONFIG_LEDS_CLASS=y
 #
 # CONFIG_LEDS_ALIX2 is not set
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_CLEVO_MAIL is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1969,6 +2012,10 @@ CONFIG_LEDS_TRIGGERS=y
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 CONFIG_EDAC=y
@@ -2037,6 +2084,7 @@ CONFIG_DMADEVICES=y
 # DMA Devices
 #
 # CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 CONFIG_X86_PLATFORM_DEVICES=y
@@ -2071,6 +2119,7 @@ CONFIG_DMIID=y
 #
 # CONFIG_EXT2_FS is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -2101,6 +2150,11 @@ CONFIG_AUTOFS4_FS=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -2151,6 +2205,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -2164,7 +2219,6 @@ CONFIG_NFS_ACL_SUPPORT=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -2251,6 +2305,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -2266,6 +2321,7 @@ CONFIG_TIMER_STATS=y
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_HIGHMEM is not set
 CONFIG_DEBUG_BUGVERBOSE=y
@@ -2289,13 +2345,19 @@ CONFIG_FRAME_POINTER=y
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2305,13 +2367,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
 # CONFIG_SYSPROF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_POWER_TRACER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -2321,7 +2391,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_PER_CPU_MAPS is not set
 # CONFIG_X86_PTDUMP is not set
 CONFIG_DEBUG_RODATA=y
@@ -2329,7 +2398,7 @@ CONFIG_DEBUG_RODATA=y
 CONFIG_DEBUG_NX_TEST=m
 # CONFIG_4KSTACKS is not set
 CONFIG_DOUBLEFAULT=y
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
 CONFIG_IO_DELAY_TYPE_0X80=0
 CONFIG_IO_DELAY_TYPE_0XED=1
 CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2365,6 +2434,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
 # CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
 CONFIG_CRYPTO=y
 
 #
@@ -2380,10 +2451,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2456,6 +2529,7 @@ CONFIG_CRYPTO_DES=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -2467,11 +2541,13 @@ CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_GEODE is not set
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
 CONFIG_VIRTUALIZATION=y
 # CONFIG_KVM is not set
 # CONFIG_LGUEST is not set
 # CONFIG_VIRTIO_PCI is not set
 # CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -2489,7 +2565,10 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_AUDIT_GENERIC=y
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9fe5d21..f3e53e2 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,12 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:44:16 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:19:24 2009
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
 CONFIG_X86_64=y
 CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf64-x86-64"
 CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
@@ -34,6 +35,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
 CONFIG_ARCH_HAS_DEFAULT_IDLE=y
 CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -41,14 +43,14 @@ CONFIG_ZONE_DMA32=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_X86_64_SMP=y
 CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
 # CONFIG_KTIME_SCALAR is not set
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -61,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_TASKSTATS=y
@@ -114,23 +123,26 @@ CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -140,6 +152,7 @@ CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -155,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -167,7 +182,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
-CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 CONFIG_BLOCK_COMPAT=y
@@ -196,11 +210,10 @@ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
 # CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
-# CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
 # CONFIG_X86_VSMP is not set
+# CONFIG_X86_UV is not set
 CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_PARAVIRT_GUEST is not set
 # CONFIG_MEMTEST is not set
@@ -230,10 +243,10 @@ CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_MCORE2 is not set
 CONFIG_GENERIC_CPU=y
 CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_TSC=y
 CONFIG_X86_CMPXCHG64=y
@@ -242,7 +255,7 @@ CONFIG_X86_MINIMUM_CPU_FAMILY=64
 CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_CPU_SUP_INTEL=y
 CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_64=y
+CONFIG_CPU_SUP_CENTAUR=y
 CONFIG_X86_DS=y
 CONFIG_X86_PTRACE_BTS=y
 CONFIG_HPET_TIMER=y
@@ -269,6 +282,7 @@ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_INTEL=y
 CONFIG_X86_MCE_AMD=y
+CONFIG_X86_MCE_THRESHOLD=y
 # CONFIG_I8K is not set
 CONFIG_MICROCODE=y
 CONFIG_MICROCODE_INTEL=y
@@ -276,6 +290,7 @@ CONFIG_MICROCODE_AMD=y
 CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
 CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
 CONFIG_DIRECT_GBPAGES=y
 CONFIG_NUMA=y
@@ -309,6 +324,8 @@ CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
 CONFIG_X86_RESERVE_LOW_64K=y
@@ -317,6 +334,7 @@ CONFIG_MTRR=y
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
@@ -325,9 +343,10 @@ CONFIG_HZ=1000
 CONFIG_SCHED_HRTICK=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+# CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
 # CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
 # CONFIG_CMDLINE_BOOL is not set
@@ -370,7 +389,6 @@ CONFIG_ACPI_NUMA=y
 CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
 # CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_ACPI_CONTAINER=y
 # CONFIG_ACPI_SBS is not set
@@ -436,6 +454,7 @@ CONFIG_PCI_MSI=y
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
 CONFIG_ISA_DMA_API=y
 CONFIG_K8_NB=y
 CONFIG_PCCARD=y
@@ -481,7 +500,6 @@ CONFIG_NET=y
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -639,6 +657,7 @@ CONFIG_LLC=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 CONFIG_NET_SCHED=y
 
 #
@@ -696,6 +715,7 @@ CONFIG_NET_SCH_FIFO=y
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_HAMRADIO=y
 
 #
@@ -706,12 +726,10 @@ CONFIG_HAMRADIO=y
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=y
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
@@ -788,9 +806,8 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_TIFM_CORE is not set
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_SGI_XP is not set
 # CONFIG_HP_ILO is not set
-# CONFIG_SGI_GRU is not set
+# CONFIG_ISL29003 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -844,6 +861,7 @@ CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
 CONFIG_ATA_ACPI=y
@@ -940,6 +958,7 @@ CONFIG_DM_ZERO=y
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_IFB is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
@@ -977,6 +996,8 @@ CONFIG_MII=y
 CONFIG_NET_VENDOR_3COM=y
 # CONFIG_VORTEX is not set
 # CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 CONFIG_NET_TULIP=y
 # CONFIG_DE2104X is not set
 # CONFIG_TULIP is not set
@@ -1026,6 +1047,7 @@ CONFIG_E1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1040,6 +1062,7 @@ CONFIG_TIGON3=y
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1072,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1058,6 +1082,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 CONFIG_TR=y
 # CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
@@ -1072,8 +1097,8 @@ CONFIG_WLAN_80211=y
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 # CONFIG_PRISM54 is not set
@@ -1083,21 +1108,21 @@ CONFIG_WLAN_80211=y
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 # CONFIG_P54_COMMON is not set
 CONFIG_ATH5K=y
 # CONFIG_ATH5K_DEBUG is not set
 # CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_IPW2100 is not set
 # CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1208,6 +1233,8 @@ CONFIG_INPUT_TABLET=y
 # CONFIG_TABLET_USB_KBTAB is not set
 # CONFIG_TABLET_USB_WACOM is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -1301,6 +1328,7 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 # CONFIG_HW_RANDOM_INTEL is not set
 # CONFIG_HW_RANDOM_AMD is not set
 CONFIG_NVRAM=y
@@ -1382,7 +1410,6 @@ CONFIG_I2C_I801=y
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1416,6 +1443,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_K8TEMP is not set
 # CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_I5K_AMB is not set
@@ -1425,6 +1453,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_FSCHER is not set
 # CONFIG_SENSORS_FSCPOS is not set
 # CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_CORETEMP is not set
@@ -1440,11 +1469,14 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
 # CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
 # CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
 # CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1635,6 +1667,7 @@ CONFIG_FB_EFI=y
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1720,6 +1753,8 @@ CONFIG_SND_PCI=y
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1792,15 +1827,17 @@ CONFIG_USB_HIDDEV=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 CONFIG_LOGITECH_FF=y
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1866,11 +1903,11 @@ CONFIG_USB_PRINTER=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1912,7 +1949,6 @@ CONFIG_USB_LIBUSUAL=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1928,6 +1964,7 @@ CONFIG_USB_LIBUSUAL=y
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1939,8 +1976,10 @@ CONFIG_LEDS_CLASS=y
 #
 # CONFIG_LEDS_ALIX2 is not set
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_CLEVO_MAIL is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1950,6 +1989,10 @@ CONFIG_LEDS_TRIGGERS=y
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 CONFIG_EDAC=y
@@ -2018,6 +2061,7 @@ CONFIG_DMADEVICES=y
 # DMA Devices
 #
 # CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 CONFIG_X86_PLATFORM_DEVICES=y
@@ -2051,6 +2095,7 @@ CONFIG_DMIID=y
 #
 # CONFIG_EXT2_FS is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -2082,6 +2127,11 @@ CONFIG_AUTOFS4_FS=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -2132,6 +2182,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -2145,7 +2196,6 @@ CONFIG_NFS_ACL_SUPPORT=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -2232,6 +2282,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -2247,6 +2298,7 @@ CONFIG_TIMER_STATS=y
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
@@ -2269,13 +2321,19 @@ CONFIG_FRAME_POINTER=y
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2285,13 +2343,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
 # CONFIG_SYSPROF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_POWER_TRACER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -2301,14 +2367,13 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_PER_CPU_MAPS is not set
 # CONFIG_X86_PTDUMP is not set
 CONFIG_DEBUG_RODATA=y
 # CONFIG_DEBUG_RODATA_TEST is not set
 CONFIG_DEBUG_NX_TEST=m
 # CONFIG_IOMMU_DEBUG is not set
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
 CONFIG_IO_DELAY_TYPE_0X80=0
 CONFIG_IO_DELAY_TYPE_0XED=1
 CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2344,6 +2409,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
 # CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
 CONFIG_CRYPTO=y
 
 #
@@ -2359,10 +2426,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2414,6 +2483,7 @@ CONFIG_CRYPTO_SHA1=y
 #
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_AES_X86_64 is not set
+# CONFIG_CRYPTO_AES_NI_INTEL is not set
 # CONFIG_CRYPTO_ANUBIS is not set
 CONFIG_CRYPTO_ARC4=y
 # CONFIG_CRYPTO_BLOWFISH is not set
@@ -2435,6 +2505,7 @@ CONFIG_CRYPTO_DES=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -2444,10 +2515,12 @@ CONFIG_CRYPTO_DES=y
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
 CONFIG_VIRTUALIZATION=y
 # CONFIG_KVM is not set
 # CONFIG_VIRTIO_PCI is not set
 # CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -2464,7 +2537,10 @@ CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From fe83fcc0a14dcf71996de5eb84771b2369ba7abc Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 11 May 2009 16:23:16 -0700
Subject: x86, defconfig: update kernel position parameters

Update CONFIG_RELOCATABLE, CONFIG_PHYSICAL_START and
CONFIG_PHYSICAL_ALIGN to reflect the current defaults.

[ Impact: make defconfig match Kconfig defaults ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 70036a7..edb992e 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.30-rc2
-# Mon May 11 16:19:47 2009
+# Mon May 11 16:21:55 2009
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
@@ -343,8 +343,9 @@ CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 # CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_X86_NEED_RELOCS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
 # CONFIG_CMDLINE_BOOL is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index f3e53e2..4ba7d4e 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.30-rc2
-# Mon May 11 16:19:24 2009
+# Mon May 11 16:22:00 2009
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
@@ -345,7 +345,7 @@ CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 # CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
+CONFIG_RELOCATABLE=y
 CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
-- 
cgit v0.10.2


From 5031296c57024a78ddad4edfc993367dbf4abb98 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 7 May 2009 16:54:11 -0700
Subject: x86: add extension fields for bootloader type and version

A long ago, in days of yore, it all began with a god named Thor.
There were vikings and boats and some plans for a Linux kernel
header.  Unfortunately, a single 8-bit field was used for bootloader
type and version.  This has generally worked without *too* much pain,
but we're getting close to flat running out of ID fields.

Add extension fields for both type and version.  The type will be
extended if it the old field is 0xE; the version is a simple MSB
extension.

Keep /proc/sys/kernel/bootloader_type containing
(type << 4) + (ver & 0xf) for backwards compatiblity, but also add
/proc/sys/kernel/bootloader_version which contains the full version
number.

[ Impact: new feature to support more bootloaders ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index cf8dfc7..8da3a79 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -50,10 +50,9 @@ Protocol 2.08:	(Kernel 2.6.26) Added crc32 checksum and ELF format
 Protocol 2.09:	(Kernel 2.6.26) Added a field of 64-bit physical
 		pointer to single linked list of struct	setup_data.
 
-Protocol 2.10:	(Kernel 2.6.31) A protocol for relaxed alignment
+Protocol 2.10:	(Kernel 2.6.31) Added a protocol for relaxed alignment
 		beyond the kernel_alignment added, new init_size and
-		pref_address fields.
-
+		pref_address fields.  Added extended boot loader IDs.
 
 **** MEMORY LAYOUT
 
@@ -173,7 +172,8 @@ Offset	Proto	Name		Meaning
 021C/4	2.00+	ramdisk_size	initrd size (set by boot loader)
 0220/4	2.00+	bootsect_kludge	DO NOT USE - for bootsect.S use only
 0224/2	2.01+	heap_end_ptr	Free memory after setup end
-0226/2	N/A	pad1		Unused
+0226/1	2.02+(3 ext_loader_ver	Extended boot loader version
+0227/1	2.02+(3	ext_loader_type	Extended boot loader ID
 0228/4	2.02+	cmd_line_ptr	32-bit pointer to the kernel command line
 022C/4	2.03+	ramdisk_max	Highest legal initrd address
 0230/4	2.05+	kernel_alignment Physical addr alignment required for kernel
@@ -197,6 +197,8 @@ Offset	Proto	Name		Meaning
     field are unusable, which means the size of a bzImage kernel
     cannot be determined.
 
+(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+
 If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
 the boot protocol version is "old".  Loading an old kernel, the
 following parameters should be assumed:
@@ -350,18 +352,32 @@ Protocol:	2.00+
   0xTV here, where T is an identifier for the boot loader and V is
   a version number.  Otherwise, enter 0xFF here.
 
+  For boot loader IDs above T = 0xD, write T = 0xE to this field and
+  write the extended ID minus 0x10 to the ext_loader_type field.
+  Similarly, the ext_loader_ver field can be used to provide more than
+  four bits for the bootloader version.
+
+  For example, for T = 0x15, V = 0x234, write:
+
+  type_of_loader  <- 0xE4
+  ext_loader_type <- 0x05
+  ext_loader_ver  <- 0x23
+
   Assigned boot loader ids:
 	0  LILO			(0x00 reserved for pre-2.00 bootloader)
 	1  Loadlin
 	2  bootsect-loader	(0x20, all other values reserved)
-	3  SYSLINUX
-	4  EtherBoot
+	3  Syslinux
+	4  Etherboot/gPXE
 	5  ELILO
 	7  GRUB
-	8  U-BOOT
+	8  U-Boot
 	9  Xen
 	A  Gujin
 	B  Qemu
+	C  Arcturus Networks uCbootloader
+	E  Extended		(see ext_loader_type)
+	F  Special		(0xFF = undefined)
 
   Please contact <hpa@zytor.com> if you need a bootloader ID
   value assigned.
@@ -460,6 +476,35 @@ Protocol:	2.01+
   Set this field to the offset (from the beginning of the real-mode
   code) of the end of the setup stack/heap, minus 0x0200.
 
+Field name:	ext_loader_ver
+Type:		write (optional)
+Offset/size:	0x226/1
+Protocol:	2.02+
+
+  This field is used as an extension of the version number in the
+  type_of_loader field.  The total version number is considered to be
+  (type_of_loader & 0x0f) + (ext_loader_ver << 4).
+
+  The use of this field is boot loader specific.  If not written, it
+  is zero.
+
+  Kernels prior to 2.6.31 did not recognize this field, but it is safe
+  to write for protocol version 2.02 or higher.
+
+Field name:	ext_loader_type
+Type:		write (obligatory if (type_of_loader & 0xf0) == 0xe0)
+Offset/size:	0x227/1
+Protocol:	2.02+
+
+  This field is used as an extension of the type number in
+  type_of_loader field.  If the type in type_of_loader is 0xE, then
+  the actual type is (ext_loader_type + 0x10).
+
+  This field is ignored if the type in type_of_loader is not 0xE.
+
+  Kernels prior to 2.6.31 did not recognize this field, but it is safe
+  to write for protocol version 2.02 or higher.
+
 Field name:	cmd_line_ptr
 Type:		write (obligatory)
 Offset/size:	0x228/4
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index a0b4269..68c3bfb 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -169,7 +169,11 @@ heap_end_ptr:	.word	_end+STACK_SIZE-512
 					# end of setup code can be used by setup
 					# for local heap purposes.
 
-pad1:		.word	0
+ext_loader_ver:
+		.byte	0		# Extended boot loader version
+ext_loader_type:
+		.byte	0		# Extended boot loader type
+
 cmd_line_ptr:	.long	0		# (Header version 0x0202 or later)
 					# If nonzero, a 32-bit pointer
 					# to the kernel command line.
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 433adae..1724e8d 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -50,7 +50,8 @@ struct setup_header {
 	__u32	ramdisk_size;
 	__u32	bootsect_kludge;
 	__u16	heap_end_ptr;
-	__u16	_pad1;
+	__u8	ext_loader_ver;
+	__u8	ext_loader_type;
 	__u32	cmd_line_ptr;
 	__u32	initrd_addr_max;
 	__u32	kernel_alignment;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index fcf4d92..6384d25 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -814,6 +814,7 @@ extern unsigned int		BIOS_revision;
 
 /* Boot loader type from the setup header: */
 extern int			bootloader_type;
+extern int			bootloader_version;
 
 extern char			ignore_fpu_irq;
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b415843..2b09345 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -214,8 +214,8 @@ unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features = X86_CR4_PAE;
 #endif
 
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
+/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
+int bootloader_type, bootloader_version;
 
 /*
  * Setup options
@@ -706,6 +706,12 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	saved_video_mode = boot_params.hdr.vid_mode;
 	bootloader_type = boot_params.hdr.type_of_loader;
+	if ((bootloader_type >> 4) == 0xe) {
+		bootloader_type &= 0xf;
+		bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
+	}
+	bootloader_version  = bootloader_type & 0xf;
+	bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
 
 #ifdef CONFIG_BLK_DEV_RAM
 	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e3d2c7d..cf91c93 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -729,6 +729,14 @@ static struct ctl_table kern_table[] = {
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "bootloader_version",
+		.data		= &bootloader_version,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "kstack_depth_to_print",
 		.data		= &kstack_depth_to_print,
 		.maxlen		= sizeof(int),
-- 
cgit v0.10.2


From 37bcbf13d32e4e453e9def79ee72bd953b88302f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 11 May 2009 13:59:10 -0400
Subject: IMA: use current_cred() instead of current->cred

Proper invocation of the current credentials is to use current_cred() not
current->cred.  This patches makes IMA use the new method.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c
index b628eea..ff513ff 100644
--- a/security/integrity/ima/ima_audit.c
+++ b/security/integrity/ima/ima_audit.c
@@ -41,7 +41,7 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
 
 	ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno);
 	audit_log_format(ab, "integrity: pid=%d uid=%u auid=%u ses=%u",
-			 current->pid, current->cred->uid,
+			 current->pid, current_cred()->uid,
 			 audit_get_loginuid(current),
 			 audit_get_sessionid(current));
 	audit_log_task_context(ab);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 122f17f..cdae13c 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -184,7 +184,7 @@ int ima_path_check(struct path *path, int mask)
 		struct dentry *dentry = dget(path->dentry);
 		struct vfsmount *mnt = mntget(path->mnt);
 
-		file = dentry_open(dentry, mnt, O_RDONLY, current->cred);
+		file = dentry_open(dentry, mnt, O_RDONLY, current_cred());
 		rc = get_path_measurement(iint, file, dentry->d_name.name);
 	}
 out:
-- 
cgit v0.10.2


From f06dd16a03f6f7f72fab4db03be36e28c28c6fd6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 11 May 2009 13:59:16 -0400
Subject: IMA: Handle dentry_open failures

Currently IMA does not handle failures from dentry_open().  This means that we
leave a pointer set to ERR_PTR(errno) and then try to use it just a few lines
later in fput().  Oops.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index cdae13c..1987424 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -116,10 +116,6 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
 {
 	int rc = 0;
 
-	if (IS_ERR(file)) {
-		pr_info("%s dentry_open failed\n", filename);
-		return rc;
-	}
 	iint->opencount++;
 	iint->readcount++;
 
@@ -185,6 +181,12 @@ int ima_path_check(struct path *path, int mask)
 		struct vfsmount *mnt = mntget(path->mnt);
 
 		file = dentry_open(dentry, mnt, O_RDONLY, current_cred());
+		if (IS_ERR(file)) {
+			pr_info("%s dentry_open failed\n", dentry->d_name.name);
+			rc = PTR_ERR(file);
+			file = NULL;
+			goto out;
+		}
 		rc = get_path_measurement(iint, file, dentry->d_name.name);
 	}
 out:
-- 
cgit v0.10.2


From 1a62e958fa4aaeeb752311b4f5e16b2a86737b23 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 11 May 2009 13:59:22 -0400
Subject: IMA: open all files O_LARGEFILE

If IMA tried to measure a file which was larger than 4G dentry_open would fail
with -EOVERFLOW since IMA wasn't passing O_LARGEFILE.  This patch passes
O_LARGEFILE to all IMA opens to avoid this problem.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 1987424..c4228c0 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -180,7 +180,8 @@ int ima_path_check(struct path *path, int mask)
 		struct dentry *dentry = dget(path->dentry);
 		struct vfsmount *mnt = mntget(path->mnt);
 
-		file = dentry_open(dentry, mnt, O_RDONLY, current_cred());
+		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
+				   current_cred());
 		if (IS_ERR(file)) {
 			pr_info("%s dentry_open failed\n", dentry->d_name.name);
 			rc = PTR_ERR(file);
-- 
cgit v0.10.2


From d93e4c940f51ae06b59c14523c4d55947f9597d6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 11 May 2009 20:47:15 -0400
Subject: securityfs: securityfs_remove should handle IS_ERR pointers

Both of the securityfs users (TPM and IMA) can call securityfs_remove and pass
an IS_ERR(dentry) in their failure paths.  This patch handles those rather
than panicing when it tries to start deferencing some negative memory.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/inode.c b/security/inode.c
index f3b91bf..f7496c6 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -287,7 +287,7 @@ void securityfs_remove(struct dentry *dentry)
 {
 	struct dentry *parent;
 
-	if (!dentry)
+	if (!dentry || IS_ERR(dentry))
 		return;
 
 	parent = dentry->d_parent;
-- 
cgit v0.10.2


From 0f0c85fc80adbbd2265d89867d743f929d516805 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 11 May 2009 16:08:00 -0400
Subject: ring-buffer: small optimizations

Doing some small changes in the fast path of the ring buffer recording
saves over 3% in the ring-buffer-benchmark test.

[ Impact: a little faster ring buffer recording ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 493cba4..f452de2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1000,7 +1000,7 @@ rb_event_index(struct ring_buffer_event *event)
 	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static int
+static inline int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 	     struct ring_buffer_event *event)
 {
@@ -1423,9 +1423,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	 * also be made. But only the entry that did the actual
 	 * commit will be something other than zero.
 	 */
-	if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
-	    rb_page_write(cpu_buffer->tail_page) ==
-	    rb_commit_index(cpu_buffer)) {
+	if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
+		   rb_page_write(cpu_buffer->tail_page) ==
+		   rb_commit_index(cpu_buffer))) {
 
 		delta = ts - cpu_buffer->write_stamp;
 
@@ -1436,7 +1436,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 		if (unlikely(ts < cpu_buffer->write_stamp))
 			delta = 0;
 
-		if (test_time_stamp(delta)) {
+		else if (unlikely(test_time_stamp(delta))) {
 
 			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
 
@@ -1470,7 +1470,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	 * If the timestamp was commited, make the commit our entry
 	 * now so that we will update it when needed.
 	 */
-	if (commit)
+	if (unlikely(commit))
 		rb_set_commit_event(cpu_buffer, event);
 	else if (!rb_is_commit(cpu_buffer, event))
 		delta = 0;
-- 
cgit v0.10.2


From 88eb0125362f2ab272cbaf84252cf101ddc2dec9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 11 May 2009 16:28:23 -0400
Subject: ring-buffer: use internal time stamp function

The ring_buffer_time_stamp that is exported adds a little more overhead
than is needed for using it internally. This patch adds an internal
timestamp function that can be inlined (a single line function)
and used internally for the ring buffer.

[ Impact: a little less overhead to the ring buffer ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f452de2..a9e645a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -454,13 +454,18 @@ struct ring_buffer_iter {
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
+static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+{
+	/* shift to debug/test normalization and TIME_EXTENTS */
+	return buffer->clock() << DEBUG_SHIFT;
+}
+
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 {
 	u64 time;
 
 	preempt_disable_notrace();
-	/* shift to debug/test normalization and TIME_EXTENTS */
-	time = buffer->clock() << DEBUG_SHIFT;
+	time = rb_time_stamp(buffer, cpu);
 	preempt_enable_no_resched_notrace();
 
 	return time;
@@ -1247,7 +1252,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 		cpu_buffer->tail_page = next_page;
 
 		/* reread the time stamp */
-		*ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
+		*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
 		cpu_buffer->tail_page->page->time_stamp = *ts;
 	}
 
@@ -1413,7 +1418,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
 		return NULL;
 
-	ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+	ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
 
 	/*
 	 * Only the first commit can update the timestamp.
-- 
cgit v0.10.2


From 168b6b1d0594c7866caa73b12f3b8d91075695f2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 11 May 2009 22:11:05 -0400
Subject: ring-buffer: move code around to remove some branches

This is a bit of micro-optimizations. But since the ring buffer is used
in tracing every function call, it is an extreme hot path. Every nanosecond
counts.

This change shows over 5% improvement in the ring-buffer-benchmark.

[ Impact: more efficient code ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a9e645a..16b24d4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1400,7 +1400,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 		      unsigned long length)
 {
 	struct ring_buffer_event *event;
-	u64 ts, delta;
+	u64 ts, delta = 0;
 	int commit = 0;
 	int nr_loops = 0;
 
@@ -1431,20 +1431,21 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
 		   rb_page_write(cpu_buffer->tail_page) ==
 		   rb_commit_index(cpu_buffer))) {
+		u64 diff;
 
-		delta = ts - cpu_buffer->write_stamp;
+		diff = ts - cpu_buffer->write_stamp;
 
-		/* make sure this delta is calculated here */
+		/* make sure this diff is calculated here */
 		barrier();
 
 		/* Did the write stamp get updated already? */
 		if (unlikely(ts < cpu_buffer->write_stamp))
-			delta = 0;
+			goto get_event;
 
-		else if (unlikely(test_time_stamp(delta))) {
+		delta = diff;
+		if (unlikely(test_time_stamp(delta))) {
 
 			commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-
 			if (commit == -EBUSY)
 				return NULL;
 
@@ -1453,12 +1454,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 			RB_WARN_ON(cpu_buffer, commit < 0);
 		}
-	} else
-		/* Non commits have zero deltas */
-		delta = 0;
+	}
 
+ get_event:
 	event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
-	if (PTR_ERR(event) == -EAGAIN)
+	if (unlikely(PTR_ERR(event) == -EAGAIN))
 		goto again;
 
 	if (!event) {
-- 
cgit v0.10.2


From bc8e67409ccdcff72c3f1656b1fb1aad7ff396db Mon Sep 17 00:00:00 2001
From: Vincent Minet <vincent@vincent-minet.net>
Date: Fri, 15 May 2009 08:33:18 -0400
Subject: ext4: Fix spinlock assertions on UP systems

On UP systems without DEBUG_SPINLOCK, ext4_is_group_locked always fails
which triggers a BUG_ON() call.
This patch fixes it by using assert_spin_locked instead.

Signed-off-by: Vincent Minet <vincent@vincent-minet.net>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 149e02d..89190ae 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1583,12 +1583,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
 	spin_unlock(ext4_group_lock_ptr(sb, group));
 }
 
-static inline int ext4_is_group_locked(struct super_block *sb,
-					ext4_group_t group)
-{
-	return spin_is_locked(ext4_group_lock_ptr(sb, group));
-}
-
 /*
  * Inodes and files operations
  */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e76459c..541bd9a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -436,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	for (i = 0; i < count; i++) {
 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
 			ext4_fsblk_t blocknr;
@@ -460,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
 
 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
 		return;
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	for (i = 0; i < count; i++) {
 		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
 		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -1115,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 	struct super_block *sb = e4b->bd_sb;
 
 	BUG_ON(first + count > (sb->s_blocksize << 3));
-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
 	mb_check_buddy(e4b);
 	mb_free_blocks_double(inode, e4b, first, count);
 
@@ -1196,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
 	int ord;
 	void *buddy;
 
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	BUG_ON(ex == NULL);
 
 	buddy = mb_find_buddy(e4b, order, &max);
@@ -1260,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 
 	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
 	BUG_ON(e4b->bd_group != ex->fe_group);
-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
 	mb_check_buddy(e4b);
 	mb_mark_used_double(e4b, start, len);
 
-- 
cgit v0.10.2


From f888e652d758bfe0c04c209b72a05972daeba386 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 12 May 2009 00:21:29 -0400
Subject: ext4: Simplify function signature for ext4_da_get_block_write()

The function ext4_da_get_block_write() is called in exactly one write,
and the last argument, create, is always 1.  Remove it to simplify the
code slightly.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4e7f363..476d843 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2000,7 +2000,7 @@ static void ext4_print_free_blocks(struct inode *inode)
 
 #define		EXT4_DELALLOC_RSVED	1
 static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-				   struct buffer_head *bh_result, int create)
+				   struct buffer_head *bh_result)
 {
 	int ret;
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
@@ -2010,7 +2010,7 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 	handle = ext4_journal_current_handle();
 	BUG_ON(!handle);
 	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-				   bh_result, create, 0, EXT4_DELALLOC_RSVED);
+				   bh_result, 1, 0, EXT4_DELALLOC_RSVED);
 	if (ret <= 0)
 		return ret;
 
@@ -2088,7 +2088,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	if (!new.b_size)
 		return 0;
 
-	err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
+	err = ext4_da_get_block_write(mpd->inode, next, &new);
 	if (err) {
 		/*
 		 * If get block returns with error we simply
-- 
cgit v0.10.2


From e4d996ca806e93dddb5d76c0d3d859b494c559f6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 12 May 2009 00:25:28 -0400
Subject: ext4: Rename ext4_get_blocks_handle() to be ext4_ind_get_blocks()

The static function ext4_get_blocks_handle() is badly named.  Of
*course* it takes a handle.  Since its counterpart for extent-based
file is ext4_ext_get_blocks(), rename it to be ext4_ind_get_blocks().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 476d843..f758e80 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -914,7 +914,7 @@ err_out:
  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
  */
-static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 				  ext4_lblk_t iblock, unsigned int maxblocks,
 				  struct buffer_head *bh_result,
 				  int create, int extend_disksize)
@@ -1129,7 +1129,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
  * mapped.
  *
  * If file type is extents based, it will call ext4_ext_get_blocks(),
- * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
  * based files
  *
  * On success, it returns the number of blocks being mapped or allocate.
@@ -1160,8 +1160,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
 				bh, 0, 0);
 	} else {
-		retval = ext4_get_blocks_handle(handle,
-				inode, block, max_blocks, bh, 0, 0);
+		retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
+					     bh, 0, 0);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
@@ -1215,7 +1215,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
 				bh, create, extend_disksize);
 	} else {
-		retval = ext4_get_blocks_handle(handle, inode, block,
+		retval = ext4_ind_get_blocks(handle, inode, block,
 				max_blocks, bh, create, extend_disksize);
 
 		if (retval > 0 && buffer_new(bh)) {
@@ -1297,7 +1297,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 	err = ext4_get_blocks_wrap(handle, inode, block, 1,
 					&dummy, create, 1, 0);
 	/*
-	 * ext4_get_blocks_handle() returns number of blocks
+	 * ext4_get_blocks_wrap() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
 	 */
 	if (err > 0) {
-- 
cgit v0.10.2


From 9541ba1d665542c96f7c0b5b836bbc1fd9d961b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Chris=20Pockel=C3=A9?= <chris.pockele.f1@gmail.com>
Date: Tue, 12 May 2009 08:08:53 +0200
Subject: ALSA: hda - Add support of Samsung NC10 mini notebook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add specific configuration for Samsung NC10 mini notebook.  Internal
mic/speakers will be correctly muted when plugging in external ones.
Mixer controls are added for speakers, headphones and PC beep.
"Boost" is added for the microphones.

Signed-off-by: Chris Pockelé <chris.pockele.f1@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 92c9d3a..14ed1d1 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -111,6 +111,7 @@ ALC662/663/272
   asus-mode6	ASUS
   dell		Dell with ALC272
   dell-zm1	Dell ZM1 with ALC272
+  samsung-nc10	Samsung NC10 mini notebook
   auto		auto-config reading BIOS (default)
 
 ALC882/885
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index db89612..c14020a 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -190,6 +190,7 @@ enum {
 	ALC663_ASUS_MODE6,
 	ALC272_DELL,
 	ALC272_DELL_ZM1,
+	ALC272_SAMSUNG_NC10,
 	ALC662_AUTO,
 	ALC662_MODEL_LAST,
 };
@@ -15120,6 +15121,38 @@ static struct hda_input_mux alc663_m51va_capture_source = {
 	},
 };
 
+#if 1 /* set to 0 for testing other input sources below */
+static struct hda_input_mux alc272_nc10_capture_source = {
+	.num_items = 2,
+	.items = {
+		{ "Autoselect Mic", 0x0 },
+		{ "Internal Mic", 0x1 },
+	},
+};
+#else
+static struct hda_input_mux alc272_nc10_capture_source = {
+	.num_items = 16,
+	.items = {
+		{ "Autoselect Mic", 0x0 },
+		{ "Internal Mic", 0x1 },
+		{ "In-0x02", 0x2 },
+		{ "In-0x03", 0x3 },
+		{ "In-0x04", 0x4 },
+		{ "In-0x05", 0x5 },
+		{ "In-0x06", 0x6 },
+		{ "In-0x07", 0x7 },
+		{ "In-0x08", 0x8 },
+		{ "In-0x09", 0x9 },
+		{ "In-0x0a", 0x0a },
+		{ "In-0x0b", 0x0b },
+		{ "In-0x0c", 0x0c },
+		{ "In-0x0d", 0x0d },
+		{ "In-0x0e", 0x0e },
+		{ "In-0x0f", 0x0f },
+	},
+};
+#endif
+
 /*
  * 2ch mode
  */
@@ -16151,6 +16184,23 @@ static struct snd_kcontrol_new alc662_ecs_mixer[] = {
 	{ } /* end */
 };
 
+static struct snd_kcontrol_new alc272_nc10_mixer[] = {
+	/* Master Playback automatically created from Speaker and Headphone */
+	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x02, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Headphone Playback Volume", 0x03, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Headphone Playback Switch", 0x21, 0x0, HDA_OUTPUT),
+
+	HDA_CODEC_VOLUME("Ext Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Ext Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Ext Mic Boost", 0x18, 0, HDA_INPUT),
+
+	HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+	HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+	HDA_CODEC_VOLUME("Int Mic Boost", 0x19, 0, HDA_INPUT),
+	{ } /* end */
+};
+
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 #define alc662_loopbacks	alc880_loopbacks
 #endif
@@ -16186,6 +16236,7 @@ static const char *alc662_models[ALC662_MODEL_LAST] = {
 	[ALC663_ASUS_MODE6] = "asus-mode6",
 	[ALC272_DELL]		= "dell",
 	[ALC272_DELL_ZM1]	= "dell-zm1",
+	[ALC272_SAMSUNG_NC10]	= "samsung-nc10",
 	[ALC662_AUTO]		= "auto",
 };
 
@@ -16243,6 +16294,7 @@ static struct snd_pci_quirk alc662_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_ECS),
 	SND_PCI_QUIRK(0x105b, 0x0d47, "Foxconn 45CMX/45GMX/45CMX-K",
 		      ALC662_3ST_6ch_DIG),
+	SND_PCI_QUIRK(0x144d, 0xca00, "Samsung NC10", ALC272_SAMSUNG_NC10),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte 945GCM-S2L",
 		      ALC662_3ST_6ch_DIG),
 	SND_PCI_QUIRK(0x1565, 0x820f, "Biostar TA780G M2+", ALC662_3ST_6ch_DIG),
@@ -16514,6 +16566,18 @@ static struct alc_config_preset alc662_presets[] = {
 		.unsol_event = alc663_m51va_unsol_event,
 		.init_hook = alc663_m51va_inithook,
 	},
+	[ALC272_SAMSUNG_NC10] = {
+		.mixers = { alc272_nc10_mixer },
+		.init_verbs = { alc662_init_verbs,
+				alc663_21jd_amic_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc272_dac_nids),
+		.dac_nids = alc272_dac_nids,
+		.num_channel_mode = ARRAY_SIZE(alc662_3ST_2ch_modes),
+		.channel_mode = alc662_3ST_2ch_modes,
+		.input_mux = &alc272_nc10_capture_source,
+		.unsol_event = alc663_mode4_unsol_event,
+		.init_hook = alc663_mode4_inithook,
+	},
 };
 
 
-- 
cgit v0.10.2


From e458824f9d32e9bf7700d1eb0d201749af48eee0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 May 2009 08:49:32 +0200
Subject: scsi: fix resid_len mis-conversion in scsi_end_request()

Commit c3a4d78c580de4edc9ef0f7c59812fb02ceb037f introduced
rq->data_len and converted residual count users to it.  While
converting, it mistakenly converted scsi_end_request() to finish
requests with residual count when it wants to do is fully complete the
request.  Fix it by using blk_end_request_all() instead.

This bug was spotted by Boaz Harrosh.

Signed-off-by: Tejun Heo <tj@kernel.org>
Spotted-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a54bec9..e410d66 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -546,14 +546,9 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 	 * to queue the remainder of them.
 	 */
 	if (blk_end_request(req, error, bytes)) {
-		int leftover = blk_rq_bytes(req);
-
-		if (blk_pc_request(req))
-			leftover = req->resid_len;
-
 		/* kill remainder if no retrys */
 		if (error && scsi_noretry_cmd(cmd))
-			blk_end_request(req, error, leftover);
+			blk_end_request_all(req, error);
 		else {
 			if (requeue) {
 				/*
-- 
cgit v0.10.2


From f3f8290cb3fa4aa627321530bb85d5f35e487433 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 16:07:40 +0900
Subject: sh: clkfwk: Handle clk_get_sys() returning an ERR_PTR.

clk_get() needs to also perform an IS_ERR() check to see whether
clk_get_sys() failed or not.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 34707b8..9e1fc13 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -399,7 +399,7 @@ struct clk *clk_get(struct device *dev, const char *id)
 	int idno;
 
 	clk = clk_get_sys(dev_id, id);
-	if (clk)
+	if (clk && !IS_ERR(clk))
 		return clk;
 
 	if (dev == NULL || dev->bus != &platform_bus_type)
-- 
cgit v0.10.2


From ae31c1fbdbb18d917b0a1139497c2dbd35886989 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 4 May 2009 12:40:54 -0700
Subject: sound: remove driver_data direct access of struct device

In the near future, the driver core is going to not allow direct access
to the driver_data pointer in struct device.  Instead, the functions
dev_get_drvdata() and dev_set_drvdata() should be used.  These functions
have been around since the beginning, so are backwards compatible with
all older kernel versions.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c
index fbf5c93..586965f 100644
--- a/sound/aoa/fabrics/layout.c
+++ b/sound/aoa/fabrics/layout.c
@@ -1037,7 +1037,7 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev)
 	}
 	ldev->selfptr_headphone.ptr = ldev;
 	ldev->selfptr_lineout.ptr = ldev;
-	sdev->ofdev.dev.driver_data = ldev;
+	dev_set_drvdata(&sdev->ofdev.dev, ldev);
 	list_add(&ldev->list, &layouts_list);
 	layouts_list_items++;
 
@@ -1081,7 +1081,7 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev)
 
 static int aoa_fabric_layout_remove(struct soundbus_dev *sdev)
 {
-	struct layout_dev *ldev = sdev->ofdev.dev.driver_data;
+	struct layout_dev *ldev = dev_get_drvdata(&sdev->ofdev.dev);
 	int i;
 
 	for (i=0; i<MAX_CODECS_PER_BUS; i++) {
@@ -1114,7 +1114,7 @@ static int aoa_fabric_layout_remove(struct soundbus_dev *sdev)
 #ifdef CONFIG_PM
 static int aoa_fabric_layout_suspend(struct soundbus_dev *sdev, pm_message_t state)
 {
-	struct layout_dev *ldev = sdev->ofdev.dev.driver_data;
+	struct layout_dev *ldev = dev_get_drvdata(&sdev->ofdev.dev);
 
 	if (ldev->gpio.methods && ldev->gpio.methods->all_amps_off)
 		ldev->gpio.methods->all_amps_off(&ldev->gpio);
@@ -1124,7 +1124,7 @@ static int aoa_fabric_layout_suspend(struct soundbus_dev *sdev, pm_message_t sta
 
 static int aoa_fabric_layout_resume(struct soundbus_dev *sdev)
 {
-	struct layout_dev *ldev = sdev->ofdev.dev.driver_data;
+	struct layout_dev *ldev = dev_get_drvdata(&sdev->ofdev.dev);
 
 	if (ldev->gpio.methods && ldev->gpio.methods->all_amps_off)
 		ldev->gpio.methods->all_amps_restore(&ldev->gpio);
diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c
index 418c84c..4e3b819 100644
--- a/sound/aoa/soundbus/i2sbus/core.c
+++ b/sound/aoa/soundbus/i2sbus/core.c
@@ -358,14 +358,14 @@ static int i2sbus_probe(struct macio_dev* dev, const struct of_device_id *match)
 		return -ENODEV;
 	}
 
-	dev->ofdev.dev.driver_data = control;
+	dev_set_drvdata(&dev->ofdev.dev, control);
 
 	return 0;
 }
 
 static int i2sbus_remove(struct macio_dev* dev)
 {
-	struct i2sbus_control *control = dev->ofdev.dev.driver_data;
+	struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
 	struct i2sbus_dev *i2sdev, *tmp;
 
 	list_for_each_entry_safe(i2sdev, tmp, &control->list, item)
@@ -377,7 +377,7 @@ static int i2sbus_remove(struct macio_dev* dev)
 #ifdef CONFIG_PM
 static int i2sbus_suspend(struct macio_dev* dev, pm_message_t state)
 {
-	struct i2sbus_control *control = dev->ofdev.dev.driver_data;
+	struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
 	struct codec_info_item *cii;
 	struct i2sbus_dev* i2sdev;
 	int err, ret = 0;
@@ -407,7 +407,7 @@ static int i2sbus_suspend(struct macio_dev* dev, pm_message_t state)
 
 static int i2sbus_resume(struct macio_dev* dev)
 {
-	struct i2sbus_control *control = dev->ofdev.dev.driver_data;
+	struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
 	struct codec_info_item *cii;
 	struct i2sbus_dev* i2sdev;
 	int err, ret = 0;
diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c
index 510efa6..e4547de 100644
--- a/sound/soc/codecs/wm8400.c
+++ b/sound/soc/codecs/wm8400.c
@@ -1473,8 +1473,8 @@ static int wm8400_codec_probe(struct platform_device *dev)
 
 	codec = &priv->codec;
 	codec->private_data = priv;
-	codec->control_data = dev->dev.driver_data;
-	priv->wm8400 = dev->dev.driver_data;
+	codec->control_data = dev_get_drvdata(&dev->dev);
+	priv->wm8400 = dev_get_drvdata(&dev->dev);
 
 	ret = regulator_bulk_get(priv->wm8400->dev,
 				 ARRAY_SIZE(power), &power[0]);
diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
index e043e3f..7a20587 100644
--- a/sound/soc/codecs/wm8731.c
+++ b/sound/soc/codecs/wm8731.c
@@ -666,14 +666,14 @@ static int __devinit wm8731_spi_probe(struct spi_device *spi)
 	codec->hw_write = (hw_write_t)wm8731_spi_write;
 	codec->dev = &spi->dev;
 
-	spi->dev.driver_data = wm8731;
+	dev_set_drvdata(&spi->dev, wm8731);
 
 	return wm8731_register(wm8731);
 }
 
 static int __devexit wm8731_spi_remove(struct spi_device *spi)
 {
-	struct wm8731_priv *wm8731 = spi->dev.driver_data;
+	struct wm8731_priv *wm8731 = dev_get_drvdata(&spi->dev);
 
 	wm8731_unregister(wm8731);
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index a6e8f3f..d121e58 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -1822,14 +1822,14 @@ static int __devinit wm8753_spi_probe(struct spi_device *spi)
 	codec->hw_write = (hw_write_t)wm8753_spi_write;
 	codec->dev = &spi->dev;
 
-	spi->dev.driver_data = wm8753;
+	dev_set_drvdata(&spi->dev, wm8753);
 
 	return wm8753_register(wm8753);
 }
 
 static int __devexit wm8753_spi_remove(struct spi_device *spi)
 {
-	struct wm8753_priv *wm8753 = spi->dev.driver_data;
+	struct wm8753_priv *wm8753 = dev_get_drvdata(&spi->dev);
 	wm8753_unregister(wm8753);
 	return 0;
 }
-- 
cgit v0.10.2


From 871b72dd1e12afc3f024479531d25a9339d2e3f9 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Mon, 11 May 2009 23:48:27 +0200
Subject: x86: microcode: use smp_call_function_single instead of
 set_cpus_allowed, cleanup of synchronization logic

* Solve issues described in 6f66cbc63081fd70e3191b4dbb796746780e5ae1
  in a way that doesn't resort to set_cpus_allowed();

* in fact, only collect_cpu_info and apply_microcode callbacks
  must run on a target cpu, others will do just fine on any other.
  smp_call_function_single() (as suggested by Ingo) is used to run
  these callbacks on a target cpu.

* cleanup of synchronization logic of the 'microcode_core' part

  The generic 'microcode_core' part guarantees that only a single cpu
  (be it a full-fledged cpu, one of the cores or HT)
  is being updated at any particular moment of time.

  In general, there is no need for any additional sync. mechanism in
  arch-specific parts (the patch removes existing spinlocks).

  See also the "Synchronization" section in microcode_core.c.

* return -EINVAL instead of -1 (which is translated into -EPERM) in
  microcode_write(), reload_cpu() and mc_sysdev_add(). Other suggestions
  for an error code?

* use 'enum ucode_state' as return value of request_microcode_{fw, user}
  to gain more flexibility by distinguishing between real error cases
  and situations when an appropriate ucode was not found (which is not an
  error per-se).

* some minor cleanups

Thanks a lot to Hugh Dickins for review/suggestions/testing!

   Reference: http://marc.info/?l=linux-kernel&m=124025889012541&w=2

[ Impact: refactor and clean up microcode driver locking code ]

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Peter Oruba <peter.oruba@amd.com>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <1242078507.5560.9.camel@earth>
[ did some more cleanups ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
 arch/x86/include/asm/microcode.h  |   25 ++
 arch/x86/kernel/microcode_amd.c   |   58 ++----
 arch/x86/kernel/microcode_core.c  |  326 +++++++++++++++++++++-----------------
 arch/x86/kernel/microcode_intel.c |   92 +++-------
 4 files changed, 261 insertions(+), 240 deletions(-)

(~20 new comment lines)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c882664..ef51b50 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -9,20 +9,31 @@ struct cpu_signature {
 
 struct device;
 
+enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+
 struct microcode_ops {
-	int  (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
-	int  (*request_microcode_fw) (int cpu, struct device *device);
+	enum ucode_state (*request_microcode_user) (int cpu,
+				const void __user *buf, size_t size);
 
-	void (*apply_microcode) (int cpu);
+	enum ucode_state (*request_microcode_fw) (int cpu,
+				struct device *device);
 
-	int  (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 	void (*microcode_fini_cpu) (int cpu);
+
+	/*
+	 * The generic 'microcode_core' part guarantees that
+	 * the callbacks below run on a target cpu when they
+	 * are being called.
+	 * See also the "Synchronization" section in microcode_core.c.
+	 */
+	int (*apply_microcode) (int cpu);
+	int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 };
 
 struct ucode_cpu_info {
-	struct cpu_signature cpu_sig;
-	int valid;
-	void *mc;
+	struct cpu_signature	cpu_sig;
+	int			valid;
+	void			*mc;
 };
 extern struct ucode_cpu_info ucode_cpu_info[];
 
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 453b579..c8be20f 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,25 +13,13 @@
  *  Licensed under the terms of the GNU General Public
  *  License version 2. See file COPYING for details.
  */
-#include <linux/platform_device.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
 #include <linux/firmware.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
 #include <linux/pci_ids.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/pci.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -79,9 +67,6 @@ struct microcode_amd {
 #define UCODE_CONTAINER_SECTION_HDR	8
 #define UCODE_CONTAINER_HEADER_SIZE	12
 
-/* serialize access to the physical write */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
 static struct equiv_cpu_entry *equiv_cpu_table;
 
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
@@ -144,9 +129,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
 	return 1;
 }
 
-static void apply_microcode_amd(int cpu)
+static int apply_microcode_amd(int cpu)
 {
-	unsigned long flags;
 	u32 rev, dummy;
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
@@ -156,25 +140,25 @@ static void apply_microcode_amd(int cpu)
 	BUG_ON(cpu_num != cpu);
 
 	if (mc_amd == NULL)
-		return;
+		return 0;
 
-	spin_lock_irqsave(&microcode_update_lock, flags);
 	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
 	/* get patch id after patching */
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-	spin_unlock_irqrestore(&microcode_update_lock, flags);
 
 	/* check current patch id and patch's id for match */
 	if (rev != mc_amd->hdr.patch_id) {
 		printk(KERN_ERR "microcode: CPU%d: update failed "
 		       "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
-		return;
+		return -1;
 	}
 
 	printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
 	       cpu, rev);
 
 	uci->cpu_sig.rev = rev;
+
+	return 0;
 }
 
 static int get_ucode_data(void *to, const u8 *from, size_t n)
@@ -263,7 +247,8 @@ static void free_equiv_cpu_table(void)
 	}
 }
 
-static int generic_load_microcode(int cpu, const u8 *data, size_t size)
+static enum ucode_state
+generic_load_microcode(int cpu, const u8 *data, size_t size)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	const u8 *ucode_ptr = data;
@@ -272,12 +257,13 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover;
 	unsigned long offset;
+	enum ucode_state state = UCODE_OK;
 
 	offset = install_equiv_cpu_table(ucode_ptr);
 	if (!offset) {
 		printk(KERN_ERR "microcode: failed to create "
 		       "equivalent cpu table\n");
-		return -EINVAL;
+		return UCODE_ERROR;
 	}
 
 	ucode_ptr += offset;
@@ -312,28 +298,27 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
 			pr_debug("microcode: CPU%d found a matching microcode "
 				 "update with version 0x%x (current=0x%x)\n",
 				 cpu, new_rev, uci->cpu_sig.rev);
-		} else
+		} else {
 			vfree(new_mc);
-	}
+			state = UCODE_ERROR;
+		}
+	} else
+		state = UCODE_NFOUND;
 
 	free_equiv_cpu_table();
 
-	return (int)leftover;
+	return state;
 }
 
-static int request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device)
 {
 	const char *fw_name = "amd-ucode/microcode_amd.bin";
 	const struct firmware *firmware;
-	int ret;
-
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
+	enum ucode_state ret;
 
-	ret = request_firmware(&firmware, fw_name, device);
-	if (ret) {
+	if (request_firmware(&firmware, fw_name, device)) {
 		printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
-		return ret;
+		return UCODE_NFOUND;
 	}
 
 	ret = generic_load_microcode(cpu, firmware->data, firmware->size);
@@ -343,11 +328,12 @@ static int request_microcode_fw(int cpu, struct device *device)
 	return ret;
 }
 
-static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
 	printk(KERN_INFO "microcode: AMD microcode update via "
 	       "/dev/cpu/microcode not supported\n");
-	return -1;
+	return UCODE_ERROR;
 }
 
 static void microcode_fini_cpu_amd(int cpu)
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 98c470c..9c44615 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -71,27 +71,18 @@
  *		Thanks to Stuart Swales for pointing out this bug.
  */
 #include <linux/platform_device.h>
-#include <linux/capability.h>
 #include <linux/miscdevice.h>
-#include <linux/firmware.h>
+#include <linux/capability.h>
 #include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
-#include <asm/msr.h>
 
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -101,36 +92,110 @@ MODULE_LICENSE("GPL");
 
 static struct microcode_ops	*microcode_ops;
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ *   the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
 static DEFINE_MUTEX(microcode_mutex);
 
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
+/*
+ * Operations that are run on a target cpu:
+ */
+
+struct cpu_info_ctx {
+	struct cpu_signature	*cpu_sig;
+	int			err;
+};
+
+static void collect_cpu_info_local(void *arg)
+{
+	struct cpu_info_ctx *ctx = arg;
+
+	ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
+						   ctx->cpu_sig);
+}
+
+static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
+{
+	struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
+	int ret;
+
+	ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
+	if (!ret)
+		ret = ctx.err;
+
+	return ret;
+}
+
+static int collect_cpu_info(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int ret;
+
+	memset(uci, 0, sizeof(*uci));
+
+	ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
+	if (!ret)
+		uci->valid = 1;
+
+	return ret;
+}
+
+struct apply_microcode_ctx {
+	int err;
+};
+
+static void apply_microcode_local(void *arg)
+{
+	struct apply_microcode_ctx *ctx = arg;
+
+	ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+}
+
+static int apply_microcode_on_target(int cpu)
+{
+	struct apply_microcode_ctx ctx = { .err = 0 };
+	int ret;
+
+	ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
+	if (!ret)
+		ret = ctx.err;
+
+	return ret;
+}
+
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
 static int do_microcode_update(const void __user *buf, size_t size)
 {
-	cpumask_t old;
 	int error = 0;
 	int cpu;
 
-	old = current->cpus_allowed;
-
 	for_each_online_cpu(cpu) {
 		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+		enum ucode_state ustate;
 
 		if (!uci->valid)
 			continue;
 
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		error = microcode_ops->request_microcode_user(cpu, buf, size);
-		if (error < 0)
-			goto out;
-		if (!error)
-			microcode_ops->apply_microcode(cpu);
+		ustate = microcode_ops->request_microcode_user(cpu, buf, size);
+		if (ustate == UCODE_ERROR) {
+			error = -1;
+			break;
+		} else if (ustate == UCODE_OK)
+			apply_microcode_on_target(cpu);
 	}
-out:
-	set_cpus_allowed_ptr(current, &old);
+
 	return error;
 }
 
@@ -143,19 +208,17 @@ static int microcode_open(struct inode *unused1, struct file *unused2)
 static ssize_t microcode_write(struct file *file, const char __user *buf,
 			       size_t len, loff_t *ppos)
 {
-	ssize_t ret;
+	ssize_t ret = -EINVAL;
 
 	if ((len >> PAGE_SHIFT) > num_physpages) {
-		printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
-		       num_physpages);
-		return -EINVAL;
+		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+		return ret;
 	}
 
 	get_online_cpus();
 	mutex_lock(&microcode_mutex);
 
-	ret = do_microcode_update(buf, len);
-	if (!ret)
+	if (do_microcode_update(buf, len) == 0)
 		ret = (ssize_t)len;
 
 	mutex_unlock(&microcode_mutex);
@@ -165,15 +228,15 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 }
 
 static const struct file_operations microcode_fops = {
-	.owner		= THIS_MODULE,
-	.write		= microcode_write,
-	.open		= microcode_open,
+	.owner			= THIS_MODULE,
+	.write			= microcode_write,
+	.open			= microcode_open,
 };
 
 static struct miscdevice microcode_dev = {
-	.minor		= MICROCODE_MINOR,
-	.name		= "microcode",
-	.fops		= &microcode_fops,
+	.minor			= MICROCODE_MINOR,
+	.name			= "microcode",
+	.fops			= &microcode_fops,
 };
 
 static int __init microcode_dev_init(void)
@@ -182,9 +245,7 @@ static int __init microcode_dev_init(void)
 
 	error = misc_register(&microcode_dev);
 	if (error) {
-		printk(KERN_ERR
-			"microcode: can't misc_register on minor=%d\n",
-			MICROCODE_MINOR);
+		pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
 		return error;
 	}
 
@@ -205,42 +266,51 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 /* fake device for request_firmware */
 static struct platform_device	*microcode_pdev;
 
-static long reload_for_cpu(void *unused)
+static int reload_for_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int err = 0;
 
 	mutex_lock(&microcode_mutex);
 	if (uci->valid) {
-		err = microcode_ops->request_microcode_fw(smp_processor_id(),
-							  &microcode_pdev->dev);
-		if (!err)
-			microcode_ops->apply_microcode(smp_processor_id());
+		enum ucode_state ustate;
+
+		ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+		if (ustate == UCODE_OK)
+			apply_microcode_on_target(cpu);
+		else
+			if (ustate == UCODE_ERROR)
+				err = -EINVAL;
 	}
 	mutex_unlock(&microcode_mutex);
+
 	return err;
 }
 
 static ssize_t reload_store(struct sys_device *dev,
 			    struct sysdev_attribute *attr,
-			    const char *buf, size_t sz)
+			    const char *buf, size_t size)
 {
-	char *end;
-	unsigned long val = simple_strtoul(buf, &end, 0);
-	int err = 0;
+	unsigned long val;
 	int cpu = dev->id;
+	int ret = 0;
+	char *end;
 
+	val = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
+
 	if (val == 1) {
 		get_online_cpus();
 		if (cpu_online(cpu))
-			err = work_on_cpu(cpu, reload_for_cpu, NULL);
+			ret = reload_for_cpu(cpu);
 		put_online_cpus();
 	}
-	if (err)
-		return err;
-	return sz;
+
+	if (!ret)
+		ret = size;
+
+	return ret;
 }
 
 static ssize_t version_show(struct sys_device *dev,
@@ -271,11 +341,11 @@ static struct attribute *mc_default_attrs[] = {
 };
 
 static struct attribute_group mc_attr_group = {
-	.attrs		= mc_default_attrs,
-	.name		= "microcode",
+	.attrs			= mc_default_attrs,
+	.name			= "microcode",
 };
 
-static void __microcode_fini_cpu(int cpu)
+static void microcode_fini_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
@@ -283,103 +353,68 @@ static void __microcode_fini_cpu(int cpu)
 	uci->valid = 0;
 }
 
-static void microcode_fini_cpu(int cpu)
-{
-	mutex_lock(&microcode_mutex);
-	__microcode_fini_cpu(cpu);
-	mutex_unlock(&microcode_mutex);
-}
-
-static void collect_cpu_info(int cpu)
+static enum ucode_state microcode_resume_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	memset(uci, 0, sizeof(*uci));
-	if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
-		uci->valid = 1;
+	if (!uci->mc)
+		return UCODE_NFOUND;
+
+	pr_debug("microcode: CPU%d updated upon resume\n", cpu);
+	apply_microcode_on_target(cpu);
+
+	return UCODE_OK;
 }
 
-static int microcode_resume_cpu(int cpu)
+static enum ucode_state microcode_init_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct cpu_signature nsig;
+	enum ucode_state ustate;
 
-	pr_debug("microcode: CPU%d resumed\n", cpu);
+	if (collect_cpu_info(cpu))
+		return UCODE_ERROR;
 
-	if (!uci->mc)
-		return 1;
+	/* --dimm. Trigger a delayed update? */
+	if (system_state != SYSTEM_RUNNING)
+		return UCODE_NFOUND;
 
-	/*
-	 * Let's verify that the 'cached' ucode does belong
-	 * to this cpu (a bit of paranoia):
-	 */
-	if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
-		__microcode_fini_cpu(cpu);
-		printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
-				cpu);
-		return -1;
-	}
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
 
-	if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
-		__microcode_fini_cpu(cpu);
-		printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
-				cpu);
-		/* Should we look for a new ucode here? */
-		return 1;
+	if (ustate == UCODE_OK) {
+		pr_debug("microcode: CPU%d updated upon init\n", cpu);
+		apply_microcode_on_target(cpu);
 	}
 
-	return 0;
+	return ustate;
 }
 
-static long microcode_update_cpu(void *unused)
+static enum ucode_state microcode_update_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
-	int err = 0;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ustate;
 
-	/*
-	 * Check if the system resume is in progress (uci->valid != NULL),
-	 * otherwise just request a firmware:
-	 */
-	if (uci->valid) {
-		err = microcode_resume_cpu(smp_processor_id());
-	} else {
-		collect_cpu_info(smp_processor_id());
-		if (uci->valid && system_state == SYSTEM_RUNNING)
-			err = microcode_ops->request_microcode_fw(
-					smp_processor_id(),
-					&microcode_pdev->dev);
-	}
-	if (!err)
-		microcode_ops->apply_microcode(smp_processor_id());
-	return err;
-}
+	if (uci->valid)
+		ustate = microcode_resume_cpu(cpu);
+	else
+		ustate = microcode_init_cpu(cpu);
 
-static int microcode_init_cpu(int cpu)
-{
-	int err;
-	mutex_lock(&microcode_mutex);
-	err = work_on_cpu(cpu, microcode_update_cpu, NULL);
-	mutex_unlock(&microcode_mutex);
-
-	return err;
+	return ustate;
 }
 
 static int mc_sysdev_add(struct sys_device *sys_dev)
 {
 	int err, cpu = sys_dev->id;
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	if (!cpu_online(cpu))
 		return 0;
 
 	pr_debug("microcode: CPU%d added\n", cpu);
-	memset(uci, 0, sizeof(*uci));
 
 	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
 	if (err)
 		return err;
 
-	err = microcode_init_cpu(cpu);
+	if (microcode_init_cpu(cpu) == UCODE_ERROR)
+		err = -EINVAL;
 
 	return err;
 }
@@ -400,19 +435,30 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 static int mc_sysdev_resume(struct sys_device *dev)
 {
 	int cpu = dev->id;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 	if (!cpu_online(cpu))
 		return 0;
 
-	/* only CPU 0 will apply ucode here */
-	microcode_update_cpu(NULL);
+	/*
+	 * All non-bootup cpus are still disabled,
+	 * so only CPU 0 will apply ucode here.
+	 *
+	 * Moreover, there can be no concurrent
+	 * updates from any other places at this point.
+	 */
+	WARN_ON(cpu != 0);
+
+	if (uci->valid && uci->mc)
+		microcode_ops->apply_microcode(cpu);
+
 	return 0;
 }
 
 static struct sysdev_driver mc_sysdev_driver = {
-	.add		= mc_sysdev_add,
-	.remove		= mc_sysdev_remove,
-	.resume		= mc_sysdev_resume,
+	.add			= mc_sysdev_add,
+	.remove			= mc_sysdev_remove,
+	.resume			= mc_sysdev_resume,
 };
 
 static __cpuinit int
@@ -425,15 +471,12 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		if (microcode_init_cpu(cpu))
-			printk(KERN_ERR "microcode: failed to init CPU%d\n",
-			       cpu);
+		microcode_update_cpu(cpu);
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
 		pr_debug("microcode: CPU%d added\n", cpu);
 		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
-			printk(KERN_ERR "microcode: Failed to create the sysfs "
-				"group for CPU%d\n", cpu);
+			pr_err("microcode: Failed to create group for CPU%d\n", cpu);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
@@ -465,13 +508,10 @@ static int __init microcode_init(void)
 		microcode_ops = init_amd_microcode();
 
 	if (!microcode_ops) {
-		printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+		pr_err("microcode: no support for this CPU vendor\n");
 		return -ENODEV;
 	}
 
-	error = microcode_dev_init();
-	if (error)
-		return error;
 	microcode_pdev = platform_device_register_simple("microcode", -1,
 							 NULL, 0);
 	if (IS_ERR(microcode_pdev)) {
@@ -480,23 +520,31 @@ static int __init microcode_init(void)
 	}
 
 	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
 	error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+
+	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
+
 	if (error) {
-		microcode_dev_exit();
 		platform_device_unregister(microcode_pdev);
 		return error;
 	}
 
+	error = microcode_dev_init();
+	if (error)
+		return error;
+
 	register_hotcpu_notifier(&mc_cpu_notifier);
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
 	       " <tigran@aivazian.fsnet.co.uk>,"
 	       " Peter Oruba\n");
 
 	return 0;
 }
+module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
@@ -505,16 +553,17 @@ static void __exit microcode_exit(void)
 	unregister_hotcpu_notifier(&mc_cpu_notifier);
 
 	get_online_cpus();
+	mutex_lock(&microcode_mutex);
+
 	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+
+	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
 	platform_device_unregister(microcode_pdev);
 
 	microcode_ops = NULL;
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
-
-module_init(microcode_init);
 module_exit(microcode_exit);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 149b9ec..0d334dd 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,24 +70,11 @@
  *		Fix sigmatch() macro to handle old CPUs with pf == 0.
  *		Thanks to Stuart Swales for pointing out this bug.
  */
-#include <linux/platform_device.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
 #include <linux/firmware.h>
-#include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
 #include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
+#include <linux/vmalloc.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -150,13 +137,9 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
-/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
 static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-	unsigned long flags;
 	unsigned int val[2];
 
 	memset(csig, 0, sizeof(*csig));
@@ -176,18 +159,14 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 		csig->pf = 1 << ((val[1] >> 18) & 7);
 	}
 
-	/* serialize access to the physical write to MSR 0x79 */
-	spin_lock_irqsave(&microcode_update_lock, flags);
-
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 	/* see notes above for revision 1.07.  Apparent chip bug */
 	sync_core();
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
-	spin_unlock_irqrestore(&microcode_update_lock, flags);
 
-	pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
-			csig->sig, csig->pf, csig->rev);
+	printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
+			cpu_num, csig->sig, csig->pf, csig->rev);
 
 	return 0;
 }
@@ -318,11 +297,10 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
 	return 0;
 }
 
-static void apply_microcode(int cpu)
+static int apply_microcode(int cpu)
 {
 	struct microcode_intel *mc_intel;
 	struct ucode_cpu_info *uci;
-	unsigned long flags;
 	unsigned int val[2];
 	int cpu_num;
 
@@ -334,10 +312,7 @@ static void apply_microcode(int cpu)
 	BUG_ON(cpu_num != cpu);
 
 	if (mc_intel == NULL)
-		return;
-
-	/* serialize access to the physical write to MSR 0x79 */
-	spin_lock_irqsave(&microcode_update_lock, flags);
+		return 0;
 
 	/* write microcode via MSR 0x79 */
 	wrmsr(MSR_IA32_UCODE_WRITE,
@@ -351,30 +326,32 @@ static void apply_microcode(int cpu)
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
-	spin_unlock_irqrestore(&microcode_update_lock, flags);
 	if (val[1] != mc_intel->hdr.rev) {
-		printk(KERN_ERR "microcode: CPU%d update from revision "
-				"0x%x to 0x%x failed\n",
-			cpu_num, uci->cpu_sig.rev, val[1]);
-		return;
+		printk(KERN_ERR "microcode: CPU%d update "
+				"to revision 0x%x failed\n",
+			cpu_num, mc_intel->hdr.rev);
+		return -1;
 	}
-	printk(KERN_INFO "microcode: CPU%d updated from revision "
-			 "0x%x to 0x%x, date = %04x-%02x-%02x \n",
-		cpu_num, uci->cpu_sig.rev, val[1],
+	printk(KERN_INFO "microcode: CPU%d updated to revision "
+			 "0x%x, date = %04x-%02x-%02x \n",
+		cpu_num, val[1],
 		mc_intel->hdr.date & 0xffff,
 		mc_intel->hdr.date >> 24,
 		(mc_intel->hdr.date >> 16) & 0xff);
 
 	uci->cpu_sig.rev = val[1];
+
+	return 0;
 }
 
-static int generic_load_microcode(int cpu, void *data, size_t size,
-		int (*get_ucode_data)(void *, const void *, size_t))
+static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
+				int (*get_ucode_data)(void *, const void *, size_t))
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	u8 *ucode_ptr = data, *new_mc = NULL, *mc;
 	int new_rev = uci->cpu_sig.rev;
 	unsigned int leftover = size;
+	enum ucode_state state = UCODE_OK;
 
 	while (leftover) {
 		struct microcode_header_intel mc_header;
@@ -412,11 +389,15 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 		leftover  -= mc_size;
 	}
 
-	if (!new_mc)
+	if (leftover) {
+		if (new_mc)
+			vfree(new_mc);
+		state = UCODE_ERROR;
 		goto out;
+	}
 
-	if (leftover) {
-		vfree(new_mc);
+	if (!new_mc) {
+		state = UCODE_NFOUND;
 		goto out;
 	}
 
@@ -427,9 +408,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
 	pr_debug("microcode: CPU%d found a matching microcode update with"
 		 " version 0x%x (current=0x%x)\n",
 			cpu, new_rev, uci->cpu_sig.rev);
-
- out:
-	return (int)leftover;
+out:
+	return state;
 }
 
 static int get_ucode_fw(void *to, const void *from, size_t n)
@@ -438,21 +418,19 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
 	return 0;
 }
 
-static int request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
-	int ret;
+	enum ucode_state ret;
 
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
 	sprintf(name, "intel-ucode/%02x-%02x-%02x",
 		c->x86, c->x86_model, c->x86_mask);
-	ret = request_firmware(&firmware, name, device);
-	if (ret) {
+
+	if (request_firmware(&firmware, name, device)) {
 		pr_debug("microcode: data file %s load failed\n", name);
-		return ret;
+		return UCODE_NFOUND;
 	}
 
 	ret = generic_load_microcode(cpu, (void *)firmware->data,
@@ -468,11 +446,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
 	return copy_from_user(to, from, n);
 }
 
-static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu != raw_smp_processor_id());
-
 	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
 }
 
-- 
cgit v0.10.2


From 2b1ccc0ee918a653d0483fdad9dd6112ce8e9043 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 12 May 2009 11:11:48 +0200
Subject: splice: fix misleading comment

Splice is tied to pipes by design, it'll not change. And now that
the splice stuff is in splice.h (and note pipe.h), the rest of the comment
is out-of-date as well.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/include/linux/splice.h b/include/linux/splice.h
index 5f3faa9..18e7c7c 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -11,8 +11,7 @@
 #include <linux/pipe_fs_i.h>
 
 /*
- * splice is tied to pipes as a transport (at least for now), so we'll just
- * add the splice flags here.
+ * Flags passed in from splice/tee/vmsplice
  */
 #define SPLICE_F_MOVE	(0x01)	/* move pages instead of copying */
 #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
-- 
cgit v0.10.2


From 9d62dcdfa6f6fc843f7d9b494bcd48f00b94f883 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Mon, 11 May 2009 22:05:28 -0400
Subject: x86: merge process.c a bit

Merge arch_align_stack() and arch_randomize_brk(), since
they are the same.

Tested on x86_64.

[ Impact: cleanup ]

Signed-off-by: Amerigo Wang <amwang@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ca98915..2b9a8d0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/random.h>
 #include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
@@ -613,3 +614,16 @@ static int __init idle_setup(char *str)
 }
 early_param("idle", idle_setup);
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() % 8192;
+	return sp & ~0xf;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 76f8f84..a3bb049 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -33,7 +33,6 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/personality.h>
 #include <linux/tick.h>
 #include <linux/percpu.h>
@@ -497,15 +496,3 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b751a41..34386f7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -32,7 +32,6 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
@@ -660,15 +659,3 @@ long sys_arch_prctl(int code, unsigned long addr)
 	return do_arch_prctl(current, code, addr);
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
-- 
cgit v0.10.2


From bf78ad69cd351798b9447a269c6bd41ce1f111f4 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Mon, 11 May 2009 23:29:09 -0400
Subject: x86: process.c, remove useless headers

<stdarg.h> is not needed by these files, remove them.

[ Impact: cleanup ]

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: akpm@linux-foundation.org
LKML-Reference: <20090512032956.5040.77055.sendpatchset@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3bb049..56d50b7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -9,8 +9,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 34386f7..9d6b20e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -14,8 +14,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
-- 
cgit v0.10.2


From ed077b58f6146684069975122b1728a9d248a501 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 12 May 2009 16:40:00 +0800
Subject: x86: make sparse mem work in non-NUMA mode

With sparse memory, holes should not be marked present for memmap.
This patch makes sure sparsemem really works on SMP mode (!NUMA).

[ Impact: use less memory to map fragmented RAM, avoid boot-OOM/crash ]

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
LKML-Reference: <1242117600.22431.0.camel@sli10-desk.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index fef1d90..949708d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -706,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn,
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	memory_present(0, 0, highend_pfn);
 	e820_register_active_regions(0, 0, highend_pfn);
+	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	memory_present(0, 0, max_low_pfn);
 	e820_register_active_regions(0, 0, max_low_pfn);
+	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
-- 
cgit v0.10.2


From 8e365f9b6e583e82dff96ff415ff9cc2106f5b3c Mon Sep 17 00:00:00 2001
From: Raphael Doursenaud <rdoursenaud@free.fr>
Date: Tue, 12 May 2009 11:49:39 +0200
Subject: ALSA: hdsp: allow firmware loading from inside the kernel

Allow the use of the FIRMWARE_IN_KERNEL option with hdsp cards and
in-kernel driver.
Also corrected a typo in the comment.

Signed-off-by: Raphael Doursenaud <rdoursenaud@free.fr>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 314e735..d0d4629 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -402,9 +402,9 @@ MODULE_FIRMWARE("digiface_firmware_rev11.bin");
 #define HDSP_DMA_AREA_BYTES ((HDSP_MAX_CHANNELS+1) * HDSP_CHANNEL_BUFFER_BYTES)
 #define HDSP_DMA_AREA_KILOBYTES (HDSP_DMA_AREA_BYTES/1024)
 
-/* use hotplug firmeare loader? */
+/* use hotplug firmware loader? */
 #if defined(CONFIG_FW_LOADER) || defined(CONFIG_FW_LOADER_MODULE)
-#if !defined(HDSP_USE_HWDEP_LOADER) && !defined(CONFIG_SND_HDSP)
+#if !defined(HDSP_USE_HWDEP_LOADER)
 #define HDSP_FW_LOADER
 #endif
 #endif
-- 
cgit v0.10.2


From eaaa5328835d8085d24221a0e5ceaacbe14a7523 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Mon, 11 May 2009 15:05:29 +0300
Subject: ASoC: em-x270: make the driver support also eXeda and CM-X300
 machines

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index eb75a1c..dcd163a 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -89,13 +89,13 @@ config SND_PXA2XX_SOC_E800
 	  Toshiba e800 PDA
 
 config SND_PXA2XX_SOC_EM_X270
-	tristate "SoC Audio support for CompuLab EM-x270"
+	tristate "SoC Audio support for CompuLab EM-x270, eXeda and CM-X300"
 	depends on SND_PXA2XX_SOC && MACH_EM_X270
 	select SND_PXA2XX_SOC_AC97
 	select SND_SOC_WM9712
 	help
 	  Say Y if you want to add support for SoC audio on
-	  CompuLab EM-x270.
+	  CompuLab EM-x270, eXeda and CM-X300 machines.
 
 config SND_PXA2XX_SOC_PALM27X
 	bool "SoC Audio support for Palm T|X, T5 and LifeDrive"
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index 949be9c..f4756e4 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -1,7 +1,7 @@
 /*
- * em-x270.c  --  SoC audio for EM-X270
+ * SoC audio driver for EM-X270, eXeda and CM-X300
  *
- * Copyright 2007 CompuLab, Ltd.
+ * Copyright 2007, 2009 CompuLab, Ltd.
  *
  * Author: Mike Rapoport <mike@compulab.co.il>
  *
@@ -68,7 +68,8 @@ static int __init em_x270_init(void)
 {
 	int ret;
 
-	if (!machine_is_em_x270())
+	if (!(machine_is_em_x270() || machine_is_exeda()
+	      || machine_is_cm_x300()))
 		return -ENODEV;
 
 	em_x270_snd_device = platform_device_alloc("soc-audio", -1);
@@ -95,5 +96,5 @@ module_exit(em_x270_exit);
 
 /* Module information */
 MODULE_AUTHOR("Mike Rapoport");
-MODULE_DESCRIPTION("ALSA SoC EM-X270");
+MODULE_DESCRIPTION("ALSA SoC EM-X270, eXeda and CM-X300");
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 7de0a0aee5cf24639c14b17ab4077f5dba0d7cb9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 11 May 2009 20:05:57 +0100
Subject: ASoC: Enforce symmetric rates for PXA2xx I2S

There is a single I2S_SYNC pin on the chip.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index 2f4b6e4..6014577 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -329,6 +329,7 @@ struct snd_soc_dai pxa_i2s_dai = {
 		.rates = PXA2XX_I2S_RATES,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
 	.ops = &pxa_i2s_dai_ops,
+	.symmetric_rates = 1,
 };
 
 EXPORT_SYMBOL_GPL(pxa_i2s_dai);
-- 
cgit v0.10.2


From 97b8096dc92ae62b1d40e6bec7e7b257d2b30161 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Mon, 11 May 2009 20:36:08 +0900
Subject: ASoC: TWL4030: change DAPM for analog microphone selection

The inputs of the twl4030 codec can be mixed, so we will use the mixer
DAPM for the analog microphone registers(0x05, 0x06), but if we enable
more than one input at the same time, the input impedance of the input
amplifier will be reduced.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index fd392c6..eaf91ab 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -422,36 +422,18 @@ static const struct snd_kcontrol_new twl4030_dapm_vibrapath_control =
 SOC_DAPM_ENUM("Route", twl4030_vibrapath_enum);
 
 /* Left analog microphone selection */
-static const char *twl4030_analoglmic_texts[] =
-		{"Off", "Main mic", "Headset mic", "AUXL", "Carkit mic"};
-
-static const unsigned int twl4030_analoglmic_values[] =
-		{0x0, 0x1, 0x2, 0x4, 0x8};
-
-static const struct soc_enum twl4030_analoglmic_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_ANAMICL, 0, 0xf,
-			ARRAY_SIZE(twl4030_analoglmic_texts),
-			twl4030_analoglmic_texts,
-			twl4030_analoglmic_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_analoglmic_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_analoglmic_enum);
+static const struct snd_kcontrol_new twl4030_dapm_analoglmic_controls[] = {
+	SOC_DAPM_SINGLE("Main mic", TWL4030_REG_ANAMICL, 0, 1, 0),
+	SOC_DAPM_SINGLE("Headset mic", TWL4030_REG_ANAMICL, 1, 1, 0),
+	SOC_DAPM_SINGLE("AUXL", TWL4030_REG_ANAMICL, 2, 1, 0),
+	SOC_DAPM_SINGLE("Carkit mic", TWL4030_REG_ANAMICL, 3, 1, 0),
+};
 
 /* Right analog microphone selection */
-static const char *twl4030_analogrmic_texts[] =
-		{"Off", "Sub mic", "AUXR"};
-
-static const unsigned int twl4030_analogrmic_values[] =
-		{0x0, 0x1, 0x4};
-
-static const struct soc_enum twl4030_analogrmic_enum =
-	SOC_VALUE_ENUM_SINGLE(TWL4030_REG_ANAMICR, 0, 0x5,
-			ARRAY_SIZE(twl4030_analogrmic_texts),
-			twl4030_analogrmic_texts,
-			twl4030_analogrmic_values);
-
-static const struct snd_kcontrol_new twl4030_dapm_analogrmic_control =
-SOC_DAPM_VALUE_ENUM("Route", twl4030_analogrmic_enum);
+static const struct snd_kcontrol_new twl4030_dapm_analogrmic_controls[] = {
+	SOC_DAPM_SINGLE("Sub mic", TWL4030_REG_ANAMICR, 0, 1, 0),
+	SOC_DAPM_SINGLE("AUXR", TWL4030_REG_ANAMICR, 1, 1, 0),
+};
 
 /* TX1 L/R Analog/Digital microphone selection */
 static const char *twl4030_micpathtx1_texts[] =
@@ -1138,11 +1120,15 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD|
 		SND_SOC_DAPM_POST_REG),
 
-	/* Analog input muxes with switch for the capture amplifiers */
-	SND_SOC_DAPM_VALUE_MUX("Analog Left Capture Route",
-		TWL4030_REG_ANAMICL, 4, 0, &twl4030_dapm_analoglmic_control),
-	SND_SOC_DAPM_VALUE_MUX("Analog Right Capture Route",
-		TWL4030_REG_ANAMICR, 4, 0, &twl4030_dapm_analogrmic_control),
+	/* Analog input mixers for the capture amplifiers */
+	SND_SOC_DAPM_MIXER("Analog Left Capture Route",
+		TWL4030_REG_ANAMICL, 4, 0,
+		&twl4030_dapm_analoglmic_controls[0],
+		ARRAY_SIZE(twl4030_dapm_analoglmic_controls)),
+	SND_SOC_DAPM_MIXER("Analog Right Capture Route",
+		TWL4030_REG_ANAMICR, 4, 0,
+		&twl4030_dapm_analogrmic_controls[0],
+		ARRAY_SIZE(twl4030_dapm_analogrmic_controls)),
 
 	SND_SOC_DAPM_PGA("ADC Physical Left",
 		TWL4030_REG_AVADC_CTL, 3, 0, NULL, 0),
-- 
cgit v0.10.2


From 511b4c171a8054506e8898c40833e31270689b8b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 12 May 2009 11:51:46 +0200
Subject: ALSA: hdsp - Add a comment about external firmwares for hdsp

When the hdsp driver is built in kernel, the corresponding firmware
files have to be built into the kernel as well (otherwise the boot
will hang up for fairly long time), but there is no way to control
it in Kconfig yet, unfortunately.  Instead, show a comment for user
just to make sure.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 93422e3..7478971 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -532,6 +532,9 @@ config SND_HDSP
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-hdsp.
 
+comment "Don't forget to add built-in firmwares for HDSP driver"
+	depends on SND_HDSP=y
+
 config SND_HDSPM
 	tristate "RME Hammerfall DSP MADI"
 	select SND_HWDEP
-- 
cgit v0.10.2


From 4797f6b021a3fa399942245d07a1feb30df81bb8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 2 May 2009 10:40:57 -0700
Subject: x86: read apic ID in the !acpi_lapic case

Ed found that on 32-bit, boot_cpu_physical_apicid is not read right,
when the mptable is broken.

Interestingly, actually three paths use/set it:

 1. acpi: at that time that is already read from reg
 2. mptable: only read from mptable
 3. no madt, and no mptable, that use default apic id 0 for 64-bit, -1 for 32-bit

so we could read the apic id for the 2/3 path. We trust the hardware
register more than we trust a BIOS data structure (the mptable).

We can also avoid the double set_fixmap() when acpi_lapic
is used, and also need to move cpu_has_apic earlier and
call apic_disable().

Also when need to update the apic id, we'd better read and
set the apic version as well - so that quirks are applied precisely.

v2: make path 3 with 64bit, use -1 as apic id, so could read it later.
v3: fix whitespace problem pointed out by Ed Swierk
v5: fix boot crash

[ Impact: get correct apic id for bsp other than acpi path ]

Reported-by: Ed Swierk <eswierk@aristanetworks.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <49FC85A9.2070702@kernel.org>
[ v4: sanity-check in the ACPI case too ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19e0d88..6a84ed1 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void);
 static inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+	return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
 #endif
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 07cffc1..b0fd264 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -242,17 +242,24 @@ static int modern_apic(void)
  * bare function to substitute write operation
  * and it's _that_ fast :)
  */
-void native_apic_write_dummy(u32 reg, u32 v)
+static void native_apic_write_dummy(u32 reg, u32 v)
 {
 	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
 }
 
+static u32 native_apic_read_dummy(u32 reg)
+{
+	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+	return 0;
+}
+
 /*
- * right after this call apic->write doesn't do anything
+ * right after this call apic->write/read doesn't do anything
  * note that there is no restore operation it works one way
  */
 void apic_disable(void)
 {
+	apic->read = native_apic_read_dummy;
 	apic->write = native_apic_write_dummy;
 }
 
@@ -1576,32 +1583,23 @@ void __init init_apic_mappings(void)
 		return;
 	}
 
-	/*
-	 * If no local APIC can be found then set up a fake all
-	 * zeroes page to simulate the local APIC and another
-	 * one for the IO-APIC.
-	 */
+	/* If no local APIC can be found return early */
 	if (!smp_found_config && detect_init_APIC()) {
-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-		apic_phys = __pa(apic_phys);
-	} else
+		/* lets NOP'ify apic operations */
+		pr_info("APIC: disable apic facility\n");
+		apic_disable();
+	} else {
 		apic_phys = mp_lapic_addr;
 
-	/*
-	 * acpi lapic path already maps that address in
-	 * acpi_register_lapic_address()
-	 */
-	if (!acpi_lapic)
-		set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-
-	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
-			APIC_BASE, apic_phys);
+		/*
+		 * acpi lapic path already maps that address in
+		 * acpi_register_lapic_address()
+		 */
+		if (!acpi_lapic)
+			set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
 
-	/* lets check if we may NOP'ify apic operations */
-	if (!cpu_has_apic) {
-		pr_info("APIC: disable apic facility\n");
-		apic_disable();
-		return;
+		apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+					APIC_BASE, apic_phys);
 	}
 
 	/*
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1f3d366..74d2b48 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1878,6 +1878,11 @@ __apicdebuginit(void) print_PIC(void)
 __apicdebuginit(int) print_all_ICs(void)
 {
 	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic || disable_apic)
+		return 0;
+
 	print_all_local_APICs();
 	print_IO_APIC();
 
-- 
cgit v0.10.2


From 9fe5ee0efb1b1d4a0939bc4252a8427e3337d96a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 19:29:04 +0900
Subject: sh: clkfwk: Use arch_clk_init() for on-chip clock registration.

CPUs registering on-chip clocks should be using arch_clk_init() with the
new scheme so that the CPUs have the opportunity to establish the
topology prior to the initial root clock rate propagation. This ensures
that CPUs with on-chip clocks that use CLK_ENABLE_ON_INIT are properly
enabled at the initial propagation time, without having to further poke
the root clocks.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 9e1fc13..f833843 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -437,13 +437,7 @@ void clk_put(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_put);
 
-void __init __attribute__ ((weak))
-arch_init_clk_ops(struct clk_ops **ops, int type)
-{
-}
-
-int __init __attribute__ ((weak))
-arch_clk_init(void)
+int __init __weak arch_clk_init(void)
 {
 	return 0;
 }
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index 435f4f1..a72dc32 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -150,7 +150,7 @@ static struct clk *sh4202_onchip_clocks[] = {
 	&sh4202_shoc_clk,
 };
 
-static int __init sh4202_clk_init(void)
+int __init arch_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
 	int i, ret = 0;
@@ -166,4 +166,3 @@ static int __init sh4202_clk_init(void)
 
 	return ret;
 }
-arch_initcall(sh4202_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index 0110da6..ce3d4e6 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -90,7 +90,7 @@ static struct clk *sh7763_onchip_clocks[] = {
 	&sh7763_shyway_clk,
 };
 
-static int __init sh7763_clk_init(void)
+int __init arch_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
 	int i, ret = 0;
@@ -106,4 +106,3 @@ static int __init sh7763_clk_init(void)
 
 	return ret;
 }
-arch_initcall(sh7763_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index 0a22d50..38b8b1d 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -96,7 +96,7 @@ static struct clk *sh7780_onchip_clocks[] = {
 	&sh7780_shyway_clk,
 };
 
-static int __init sh7780_clk_init(void)
+int __init arch_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
 	int i, ret = 0;
@@ -112,4 +112,3 @@ static int __init sh7780_clk_init(void)
 
 	return ret;
 }
-arch_initcall(sh7780_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 4dcd1f6..fa14f35 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -134,7 +134,7 @@ static struct clk *sh7785_onchip_clocks[] = {
 	&sh7785_ram_clk,
 };
 
-static int __init sh7785_clk_init(void)
+int __init arch_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
 	int i, ret = 0;
@@ -150,4 +150,3 @@ static int __init sh7785_clk_init(void)
 
 	return ret;
 }
-arch_initcall(sh7785_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index 825556f..7907002 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -120,7 +120,7 @@ static struct clk *sh7786_onchip_clocks[] = {
 	&sh7786_ddr_clk,
 };
 
-static int __init sh7786_clk_init(void)
+int __init arch_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
 	int i, ret = 0;
@@ -136,4 +136,3 @@ static int __init sh7786_clk_init(void)
 
 	return ret;
 }
-arch_initcall(sh7786_clk_init);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index 1eb149b..c14553e 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -107,7 +107,7 @@ static struct clk *shx3_onchip_clocks[] = {
 	&shx3_shyway_clk,
 };
 
-static int __init shx3_clk_init(void)
+int __init arch_clk_init(void)
 {
 	struct clk *clk = clk_get(NULL, "master_clk");
 	int i, ret = 0;
@@ -123,4 +123,3 @@ static int __init shx3_clk_init(void)
 
 	return ret;
 }
-arch_initcall(shx3_clk_init);
-- 
cgit v0.10.2


From 0ee8b4d7c7d39d9a9913e27686ef786642c3ccbb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:13:59 +0000
Subject: sh: TMU platform data for sh7763

This patch adds TMU platform data for sh7763. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index bdf0f61..c91f34c 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
 
@@ -113,7 +114,195 @@ static struct platform_device usbf_device = {
 	.resource	= usbf_resources,
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 28,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 29,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 30,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd88008,
+		.end	= 0xffd88013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 96,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd88014,
+		.end	= 0xffd8801f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 97,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd88020,
+		.end	= 0xffd8802b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 98,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct platform_device *sh7763_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&rtc_device,
 	&sci_device,
 	&usb_ohci_device,
@@ -127,6 +316,21 @@ static int __init sh7763_devices_setup(void)
 }
 __initcall(sh7763_devices_setup);
 
+static struct platform_device *sh7763_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7763_early_devices,
+				   ARRAY_SIZE(sh7763_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From 5d8728a71f416fd38a0945271c71cb2933fc5734 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:18:13 +0000
Subject: sh: add sh7770_generic_defconfig

This patch adds a generic sh7770 defconfig file.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig
new file mode 100644
index 0000000..d6489b4
--- /dev/null
+++ b/arch/sh/configs/sh7770_generic_defconfig
@@ -0,0 +1,700 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc4
+# Tue May 12 14:48:21 2009
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+# CONFIG_GENERIC_GPIO is not set
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_ARCH_SUSPEND_POSSIBLE is not set
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SYS_SUPPORTS_TMU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_NS is not set
+# CONFIG_CGROUP_FREEZER is not set
+# CONFIG_CGROUP_DEVICE is not set
+# CONFIG_CPUSETS is not set
+# CONFIG_CGROUP_CPUACCT is not set
+# CONFIG_RESOURCE_COUNTERS is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+# CONFIG_MODULES is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_FREEZER=y
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+CONFIG_CPU_SH4A=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7201 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+# CONFIG_CPU_SUBTYPE_SH7724 is not set
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+CONFIG_CPU_SUBTYPE_SH7770=y
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SH7786 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_MEMORY_START=0x08000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_29BIT=y
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_HAVE_MEMORY_PRESENT=y
+CONFIG_SPARSEMEM_STATIC=y
+
+#
+# Memory hotplug is currently incompatible with Software Suspend
+#
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Cache configuration
+#
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TMU=y
+CONFIG_SH_TIMER_IRQ=16
+CONFIG_SH_PCLK_FREQ=41666666
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_SH_CPU_FREQ=y
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+# CONFIG_HEARTBEAT is not set
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_SCHED_HRTICK=y
+CONFIG_KEXEC=y
+# CONFIG_CRASH_DUMP is not set
+CONFIG_KEXEC_JUMP=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_GUSA=y
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+CONFIG_ENTRY_OFFSET=0x00001000
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options (EXPERIMENTAL)
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_SLEEP=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=6
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_UNIX98_PTYS is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_SH_MOBILE=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_SPI is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_SH=y
+# CONFIG_RTC_DRV_GENERIC is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+# CONFIG_PROC_FS is not set
+# CONFIG_SYSFS is not set
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_SH_STANDARD_BIOS is not set
+# CONFIG_EARLY_SCIF_CONSOLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From f251935e02e89aa203e0729bfd727175018cec6f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:20:08 +0000
Subject: sh: TMU platform data for sh7770

This patch adds TMU platform data for sh7770. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index b73578e..b61d614 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
@@ -76,7 +77,288 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd81008,
+		.end	= 0xffd81013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 19,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd81014,
+		.end	= 0xffd8101f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 20,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd81020,
+		.end	= 0xffd8102f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 21,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
+static struct sh_timer_config tmu6_platform_data = {
+	.name = "TMU6",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+};
+
+static struct resource tmu6_resources[] = {
+	[0] = {
+		.name	= "TMU6",
+		.start	= 0xffd82008,
+		.end	= 0xffd82013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 22,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu6_device = {
+	.name		= "sh_tmu",
+	.id		= 6,
+	.dev = {
+		.platform_data	= &tmu6_platform_data,
+	},
+	.resource	= tmu6_resources,
+	.num_resources	= ARRAY_SIZE(tmu6_resources),
+};
+
+static struct sh_timer_config tmu7_platform_data = {
+	.name = "TMU7",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource tmu7_resources[] = {
+	[0] = {
+		.name	= "TMU7",
+		.start	= 0xffd82014,
+		.end	= 0xffd8201f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 23,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu7_device = {
+	.name		= "sh_tmu",
+	.id		= 7,
+	.dev = {
+		.platform_data	= &tmu7_platform_data,
+	},
+	.resource	= tmu7_resources,
+	.num_resources	= ARRAY_SIZE(tmu7_resources),
+};
+
+static struct sh_timer_config tmu8_platform_data = {
+	.name = "TMU8",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu8_resources[] = {
+	[0] = {
+		.name	= "TMU8",
+		.start	= 0xffd82020,
+		.end	= 0xffd8202b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 24,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu8_device = {
+	.name		= "sh_tmu",
+	.id		= 8,
+	.dev = {
+		.platform_data	= &tmu8_platform_data,
+	},
+	.resource	= tmu8_resources,
+	.num_resources	= ARRAY_SIZE(tmu8_resources),
+};
+
 static struct platform_device *sh7770_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&tmu6_device,
+	&tmu7_device,
+	&tmu8_device,
 	&sci_device,
 };
 
@@ -87,6 +369,24 @@ static int __init sh7770_devices_setup(void)
 }
 __initcall(sh7770_devices_setup);
 
+static struct platform_device *sh7770_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
+	&tmu6_device,
+	&tmu7_device,
+	&tmu8_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7770_early_devices,
+				   ARRAY_SIZE(sh7770_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 }
-- 
cgit v0.10.2


From 5f8a29ba39d52b2eaaed907b3cb3016b949a8f9b Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:28:25 +0000
Subject: sh: TMU platform data for sh4-202

This patch adds TMU platform data for sh4-202. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index 7371abf..034c069 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
@@ -31,8 +32,103 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh4202_devices[] __initdata = {
 	&sci_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 static int __init sh4202_devices_setup(void)
@@ -42,6 +138,18 @@ static int __init sh4202_devices_setup(void)
 }
 __initcall(sh4202_devices_setup);
 
+static struct platform_device *sh4202_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh4202_early_devices,
+				   ARRAY_SIZE(sh4202_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	/* do nothing - all IRL interrupts are handled by the board code */
-- 
cgit v0.10.2


From 67d889bd828a3cbf36ef27bd4a5c5f7ec76e20b9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:34:26 +0000
Subject: sh: add sh4-202 INTC tables

This patch adds INTC tables for sh4-202 with support
for HUDI, TMU0, TMU1, TMU2, RTC, SCIF and WDT.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index 034c069..be79fa1 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -2,6 +2,7 @@
  * SH4-202 Setup
  *
  *  Copyright (C) 2006  Paul Mundt
+ *  Copyright (C) 2009  Magnus Damm
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -12,6 +13,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/sh_timer.h>
+#include <linux/io.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
@@ -150,7 +152,59 @@ void __init plat_early_device_setup(void)
 				   ARRAY_SIZE(sh4202_early_devices));
 }
 
+enum {
+	UNUSED = 0,
+
+	/* interrupt sources */
+	IRL0, IRL1, IRL2, IRL3, /* only IRLM mode supported */
+	HUDI, TMU0, TMU1, TMU2, RTC, SCIF, WDT,
+};
+
+static struct intc_vect vectors[] __initdata = {
+	INTC_VECT(HUDI, 0x600),
+	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
+	INTC_VECT(TMU2, 0x440), INTC_VECT(TMU2, 0x460),
+	INTC_VECT(RTC, 0x480), INTC_VECT(RTC, 0x4a0),
+	INTC_VECT(RTC, 0x4c0),
+	INTC_VECT(SCIF, 0x700), INTC_VECT(SCIF, 0x720),
+	INTC_VECT(SCIF, 0x740), INTC_VECT(SCIF, 0x760),
+	INTC_VECT(WDT, 0x560),
+};
+
+static struct intc_prio_reg prio_registers[] __initdata = {
+	{ 0xffd00004, 0, 16, 4, /* IPRA */ { TMU0, TMU1, TMU2, RTC } },
+	{ 0xffd00008, 0, 16, 4, /* IPRB */ { WDT, 0, 0, 0 } },
+	{ 0xffd0000c, 0, 16, 4, /* IPRC */ { 0, 0, SCIF, HUDI } },
+	{ 0xffd00010, 0, 16, 4, /* IPRD */ { IRL0, IRL1, IRL2, IRL3 } },
+};
+
+static DECLARE_INTC_DESC(intc_desc, "sh4-202", vectors, NULL,
+			 NULL, prio_registers, NULL);
+
+static struct intc_vect vectors_irlm[] __initdata = {
+	INTC_VECT(IRL0, 0x240), INTC_VECT(IRL1, 0x2a0),
+	INTC_VECT(IRL2, 0x300), INTC_VECT(IRL3, 0x360),
+};
+
+static DECLARE_INTC_DESC(intc_desc_irlm, "sh4-202_irlm", vectors_irlm, NULL,
+			 NULL, prio_registers, NULL);
+
 void __init plat_irq_setup(void)
 {
-	/* do nothing - all IRL interrupts are handled by the board code */
+	register_intc_controller(&intc_desc);
+}
+
+#define INTC_ICR	0xffd00000UL
+#define INTC_ICR_IRLM   (1<<7)
+
+void __init plat_irq_setup_pins(int mode)
+{
+	switch (mode) {
+	case IRQ_MODE_IRQ: /* individual interrupt mode for IRL3-0 */
+		ctrl_outw(ctrl_inw(INTC_ICR) | INTC_ICR_IRLM, INTC_ICR);
+		register_intc_controller(&intc_desc_irlm);
+		break;
+	default:
+		BUG();
+	}
 }
-- 
cgit v0.10.2


From e37677a429a67cb45004b060ea3376e500bc73c4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:39:06 +0000
Subject: sh: TMU platform data for sh7343

This patch adds TMU platform data for sh7343. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index b9c361e..51204dc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -173,6 +173,98 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffe00000,
@@ -213,6 +305,9 @@ static struct platform_device sci_device = {
 
 static struct platform_device *sh7343_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&iic0_device,
 	&iic1_device,
 	&sci_device,
@@ -240,6 +335,9 @@ __initcall(sh7343_devices_setup);
 
 static struct platform_device *sh7343_early_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 void __init plat_early_device_setup(void)
-- 
cgit v0.10.2


From f2710ebcd0a0404ccca3bbad68fd3639fe87ba6f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:48:21 +0000
Subject: sh: TMU platform data for sh7366

This patch adds TMU platform data for sh7366. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index d4a994b..04de0fa 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -180,6 +180,98 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffe00000,
@@ -202,6 +294,9 @@ static struct platform_device sci_device = {
 
 static struct platform_device *sh7366_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&iic_device,
 	&sci_device,
 	&usb_host_device,
@@ -229,6 +324,9 @@ __initcall(sh7366_devices_setup);
 
 static struct platform_device *sh7366_early_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 };
 
 void __init plat_early_device_setup(void)
-- 
cgit v0.10.2


From 6a3501b63d30643120566cc1efba5acd0e64b12d Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 09:50:46 +0000
Subject: sh: TMU platform data for sh7724

This patch adds TMU platform data for sh7724. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index a3039ce..852f810 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -268,8 +268,197 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu0",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xffd80008,
+		.end	= 0xffd80013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu0",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xffd80014,
+		.end	= 0xffd8001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu0",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xffd80020,
+		.end	= 0xffd8002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
+
+static struct sh_timer_config tmu3_platform_data = {
+	.name = "TMU3",
+	.channel_offset = 0x04,
+	.timer_bit = 0,
+	.clk = "tmu1",
+};
+
+static struct resource tmu3_resources[] = {
+	[0] = {
+		.name	= "TMU3",
+		.start	= 0xffd90008,
+		.end	= 0xffd90013,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu3_device = {
+	.name		= "sh_tmu",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &tmu3_platform_data,
+	},
+	.resource	= tmu3_resources,
+	.num_resources	= ARRAY_SIZE(tmu3_resources),
+};
+
+static struct sh_timer_config tmu4_platform_data = {
+	.name = "TMU4",
+	.channel_offset = 0x10,
+	.timer_bit = 1,
+	.clk = "tmu1",
+};
+
+static struct resource tmu4_resources[] = {
+	[0] = {
+		.name	= "TMU4",
+		.start	= 0xffd90014,
+		.end	= 0xffd9001f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 58,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu4_device = {
+	.name		= "sh_tmu",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &tmu4_platform_data,
+	},
+	.resource	= tmu4_resources,
+	.num_resources	= ARRAY_SIZE(tmu4_resources),
+};
+
+static struct sh_timer_config tmu5_platform_data = {
+	.name = "TMU5",
+	.channel_offset = 0x1c,
+	.timer_bit = 2,
+	.clk = "tmu1",
+};
+
+static struct resource tmu5_resources[] = {
+	[0] = {
+		.name	= "TMU5",
+		.start	= 0xffd90020,
+		.end	= 0xffd9002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 57,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu5_device = {
+	.name		= "sh_tmu",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &tmu5_platform_data,
+	},
+	.resource	= tmu5_resources,
+	.num_resources	= ARRAY_SIZE(tmu5_resources),
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 	&sci_device,
 	&rtc_device,
 	&iic0_device,
@@ -296,6 +485,12 @@ device_initcall(sh7724_devices_setup);
 
 static struct platform_device *sh7724_early_devices[] __initdata = {
 	&cmt_device,
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+	&tmu3_device,
+	&tmu4_device,
+	&tmu5_device,
 };
 
 void __init plat_early_device_setup(void)
-- 
cgit v0.10.2


From acd664ab54a449a7257dcb3c7ef9cd1e310b4c4f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 10:01:01 +0000
Subject: sh: TMU platform data for sh7705

This patch adds TMU platform data for sh7705. Both clockevent
and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index 63b67ba..3951366 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 #include <asm/rtc.h>
 
 enum {
@@ -116,7 +117,102 @@ static struct platform_device rtc_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xfffffe94,
+		.end	= 0xfffffe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xfffffea0,
+		.end	= 0xfffffeab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xfffffeac,
+		.end	= 0xfffffebb,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh7705_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&sci_device,
 	&rtc_device,
 };
@@ -128,6 +224,18 @@ static int __init sh7705_devices_setup(void)
 }
 __initcall(sh7705_devices_setup);
 
+static struct platform_device *sh7705_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7705_early_devices,
+				   ARRAY_SIZE(sh7705_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
-- 
cgit v0.10.2


From c8a9011bce6c43f4988cbcf1db8fc31433e69964 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 10:04:49 +0000
Subject: sh: TMU platform data for sh7706/sh7707/sh7708/sh7709

Add TMU platform data for sh7706/sh7707/sh7708/sh7709.
Both clockevent and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index a74f960..9412d91 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 
 enum {
 	UNUSED = 0,
@@ -144,7 +145,102 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xfffffe94,
+		.end	= 0xfffffe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xfffffea0,
+		.end	= 0xfffffeab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xfffffeac,
+		.end	= 0xfffffebb,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh770x_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&sci_device,
 	&rtc_device,
 };
@@ -156,6 +252,18 @@ static int __init sh770x_devices_setup(void)
 }
 __initcall(sh770x_devices_setup);
 
+static struct platform_device *sh770x_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh770x_early_devices,
+				   ARRAY_SIZE(sh770x_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
-- 
cgit v0.10.2


From e5ad00896a381937326ac55fc173630fe731d041 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 10:15:30 +0000
Subject: sh: TMU platform data for sh7710/sh7712

This patch adds TMU platform data for sh7710 and sh7712.
Both clockevent and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index 335098b..07ff38d 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 #include <asm/rtc.h>
 
 enum {
@@ -120,7 +121,102 @@ static struct platform_device sci_device = {
 	},
 };
 
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xa412fe94,
+		.end	= 0xa412fe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xa412fea0,
+		.end	= 0xa412feab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xa412feac,
+		.end	= 0xa412feb5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh7710_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&sci_device,
 	&rtc_device,
 };
@@ -132,6 +228,18 @@ static int __init sh7710_devices_setup(void)
 }
 __initcall(sh7710_devices_setup);
 
+static struct platform_device *sh7710_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7710_early_devices,
+				   ARRAY_SIZE(sh7710_early_devices));
+}
+
 void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
-- 
cgit v0.10.2


From 4a1a5a2f60ceabc026ba28cdbf81d7d47603b480 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 10:17:52 +0000
Subject: sh: TMU platform data for sh7720/sh7721

This patch adds TMU platform data for sh7720 and sh7721.
Both clockevent and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 003874a..c09619c 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -18,6 +18,7 @@
 #include <linux/serial.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
+#include <linux/sh_timer.h>
 #include <asm/rtc.h>
 
 static struct resource rtc_resources[] = {
@@ -123,7 +124,103 @@ static struct platform_device usbf_device = {
 	.resource	= usbf_resources,
 };
 
+
+static struct sh_timer_config tmu0_platform_data = {
+	.name = "TMU0",
+	.channel_offset = 0x02,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 200,
+};
+
+static struct resource tmu0_resources[] = {
+	[0] = {
+		.name	= "TMU0",
+		.start	= 0xa412fe94,
+		.end	= 0xa412fe9f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 16,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu0_device = {
+	.name		= "sh_tmu",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &tmu0_platform_data,
+	},
+	.resource	= tmu0_resources,
+	.num_resources	= ARRAY_SIZE(tmu0_resources),
+};
+
+static struct sh_timer_config tmu1_platform_data = {
+	.name = "TMU1",
+	.channel_offset = 0xe,
+	.timer_bit = 1,
+	.clk = "module_clk",
+	.clocksource_rating = 200,
+};
+
+static struct resource tmu1_resources[] = {
+	[0] = {
+		.name	= "TMU1",
+		.start	= 0xa412fea0,
+		.end	= 0xa412feab,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 17,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu1_device = {
+	.name		= "sh_tmu",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &tmu1_platform_data,
+	},
+	.resource	= tmu1_resources,
+	.num_resources	= ARRAY_SIZE(tmu1_resources),
+};
+
+static struct sh_timer_config tmu2_platform_data = {
+	.name = "TMU2",
+	.channel_offset = 0x1a,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource tmu2_resources[] = {
+	[0] = {
+		.name	= "TMU2",
+		.start	= 0xa412feac,
+		.end	= 0xa412feb5,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 18,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device tmu2_device = {
+	.name		= "sh_tmu",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &tmu2_platform_data,
+	},
+	.resource	= tmu2_resources,
+	.num_resources	= ARRAY_SIZE(tmu2_resources),
+};
+
 static struct platform_device *sh7720_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
 	&rtc_device,
 	&sci_device,
 	&usb_ohci_device,
@@ -137,6 +234,18 @@ static int __init sh7720_devices_setup(void)
 }
 __initcall(sh7720_devices_setup);
 
+static struct platform_device *sh7720_early_devices[] __initdata = {
+	&tmu0_device,
+	&tmu1_device,
+	&tmu2_device,
+};
+
+void __init plat_early_device_setup(void)
+{
+	early_platform_add_devices(sh7720_early_devices,
+				   ARRAY_SIZE(sh7720_early_devices));
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From 2b23a8826a60268ec52302729911dd7ac6b10776 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 10:21:11 +0000
Subject: sh: CMT platform data for sh7720/sh7721

This patch adds CMT platform data for sh7720 and sh7721.
All 5 32-bit CMT channels unfortunately share a single IRQ.
Both clockevent and clocksource support is enabled.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index a9dee13..5391746 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -302,6 +302,7 @@ config CPU_SUBTYPE_SH7720
 	bool "Support SH7720 processor"
 	select CPU_SH3
 	select CPU_HAS_DSP
+	select SYS_SUPPORTS_CMT
 	help
 	  Select SH7720 if you have a SH3-DSP SH7720 CPU.
 
@@ -309,6 +310,7 @@ config CPU_SUBTYPE_SH7721
 	bool "Support SH7721 processor"
 	select CPU_SH3
 	select CPU_HAS_DSP
+	select SYS_SUPPORTS_CMT
 	help
 	  Select SH7721 if you have a SH3-DSP SH7721 CPU.
 
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index c09619c..d8b46f5 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -124,6 +124,157 @@ static struct platform_device usbf_device = {
 	.resource	= usbf_resources,
 };
 
+static struct sh_timer_config cmt0_platform_data = {
+	.name = "CMT0",
+	.channel_offset = 0x10,
+	.timer_bit = 0,
+	.clk = "module_clk",
+	.clockevent_rating = 125,
+	.clocksource_rating = 125,
+};
+
+static struct resource cmt0_resources[] = {
+	[0] = {
+		.name	= "CMT0",
+		.start	= 0x044a0010,
+		.end	= 0x044a001b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt0_device = {
+	.name		= "sh_cmt",
+	.id		= 0,
+	.dev = {
+		.platform_data	= &cmt0_platform_data,
+	},
+	.resource	= cmt0_resources,
+	.num_resources	= ARRAY_SIZE(cmt0_resources),
+};
+
+static struct sh_timer_config cmt1_platform_data = {
+	.name = "CMT1",
+	.channel_offset = 0x20,
+	.timer_bit = 1,
+	.clk = "module_clk",
+};
+
+static struct resource cmt1_resources[] = {
+	[0] = {
+		.name	= "CMT1",
+		.start	= 0x044a0020,
+		.end	= 0x044a002b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt1_device = {
+	.name		= "sh_cmt",
+	.id		= 1,
+	.dev = {
+		.platform_data	= &cmt1_platform_data,
+	},
+	.resource	= cmt1_resources,
+	.num_resources	= ARRAY_SIZE(cmt1_resources),
+};
+
+static struct sh_timer_config cmt2_platform_data = {
+	.name = "CMT2",
+	.channel_offset = 0x30,
+	.timer_bit = 2,
+	.clk = "module_clk",
+};
+
+static struct resource cmt2_resources[] = {
+	[0] = {
+		.name	= "CMT2",
+		.start	= 0x044a0030,
+		.end	= 0x044a003b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt2_device = {
+	.name		= "sh_cmt",
+	.id		= 2,
+	.dev = {
+		.platform_data	= &cmt2_platform_data,
+	},
+	.resource	= cmt2_resources,
+	.num_resources	= ARRAY_SIZE(cmt2_resources),
+};
+
+static struct sh_timer_config cmt3_platform_data = {
+	.name = "CMT3",
+	.channel_offset = 0x40,
+	.timer_bit = 3,
+	.clk = "module_clk",
+};
+
+static struct resource cmt3_resources[] = {
+	[0] = {
+		.name	= "CMT3",
+		.start	= 0x044a0040,
+		.end	= 0x044a004b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt3_device = {
+	.name		= "sh_cmt",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &cmt3_platform_data,
+	},
+	.resource	= cmt3_resources,
+	.num_resources	= ARRAY_SIZE(cmt3_resources),
+};
+
+static struct sh_timer_config cmt4_platform_data = {
+	.name = "CMT4",
+	.channel_offset = 0x50,
+	.timer_bit = 4,
+	.clk = "module_clk",
+};
+
+static struct resource cmt4_resources[] = {
+	[0] = {
+		.name	= "CMT4",
+		.start	= 0x044a0050,
+		.end	= 0x044a005b,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 104,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device cmt4_device = {
+	.name		= "sh_cmt",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &cmt4_platform_data,
+	},
+	.resource	= cmt4_resources,
+	.num_resources	= ARRAY_SIZE(cmt4_resources),
+};
 
 static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
@@ -218,6 +369,11 @@ static struct platform_device tmu2_device = {
 };
 
 static struct platform_device *sh7720_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+	&cmt2_device,
+	&cmt3_device,
+	&cmt4_device,
 	&tmu0_device,
 	&tmu1_device,
 	&tmu2_device,
@@ -235,6 +391,11 @@ static int __init sh7720_devices_setup(void)
 __initcall(sh7720_devices_setup);
 
 static struct platform_device *sh7720_early_devices[] __initdata = {
+	&cmt0_device,
+	&cmt1_device,
+	&cmt2_device,
+	&cmt3_device,
+	&cmt4_device,
 	&tmu0_device,
 	&tmu1_device,
 	&tmu2_device,
-- 
cgit v0.10.2


From f19900b2e608b604777a74d6d711bbf744657756 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 12 May 2009 10:25:54 +0000
Subject: sh: remove old TMU driver

This patch removes the old TMU driver (CONFIG_SH_TMU/timer-tmu.c)

As replacement, select the sh_tmu driver with CONFIG_SH_TIMER_TMU
and configure timer channel using platform data.

If multiple TMU channels are enabled using platform data, use the
earlytimer parameter on the kernel command line to select channel.
For instance, use "earlytimer=sh_tmu.0" to select the first channel.

To verify which timer is being used, look at printouts or the timer
irq count in /proc/interrupts.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5391746..e7b6406 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -473,16 +473,9 @@ source "arch/sh/boards/Kconfig"
 
 menu "Timer and clock configuration"
 
-config SH_TMU
-	bool "TMU timer support"
-	depends on CPU_SH3 || CPU_SH4
-	default y
-	help
-	  This enables the use of the TMU as the system timer.
-
 config SH_TIMER_TMU
 	bool "TMU timer driver"
-	depends on !SH_TMU && SYS_SUPPORTS_TMU
+	depends on SYS_SUPPORTS_TMU
 	default y
 	help
 	  This enables the build of the TMU timer driver.
diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
index 6c08511..f27a88b 100644
--- a/arch/sh/include/asm/timer.h
+++ b/arch/sh/include/asm/timer.h
@@ -18,7 +18,6 @@ struct sys_timer {
 	struct sys_timer_ops	*ops;
 };
 
-extern struct sys_timer tmu_timer;
 extern struct sys_timer *sys_timer;
 
 /* arch/sh/kernel/timers/timer.c */
diff --git a/arch/sh/include/cpu-sh3/cpu/timer.h b/arch/sh/include/cpu-sh3/cpu/timer.h
deleted file mode 100644
index 793acf1..0000000
--- a/arch/sh/include/cpu-sh3/cpu/timer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * include/asm-sh/cpu-sh3/timer.h
- *
- * Copyright (C) 2004 Lineo Solutions, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH3_TIMER_H
-#define __ASM_CPU_SH3_TIMER_H
-
-/*
- * ---------------------------------------------------------------------------
- * TMU Common definitions for SH3 processors
- *	SH7706
- *	SH7709S
- *	SH7727
- *	SH7729R
- *	SH7710
- *	SH7720
- *	SH7710
- * ---------------------------------------------------------------------------
- */
-
-#if  !defined(CONFIG_CPU_SUBTYPE_SH7720) && !defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define TMU_TOCR	0xfffffe90	/* Byte access */
-#endif
-
-#if defined(CONFIG_CPU_SUBTYPE_SH7710) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7720) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define TMU_012_TSTR	0xa412fe92	/* Byte access */
-
-#define TMU0_TCOR	0xa412fe94	/* Long access */
-#define TMU0_TCNT	0xa412fe98	/* Long access */
-#define TMU0_TCR	0xa412fe9c	/* Word access */
-
-#define TMU1_TCOR	0xa412fea0	/* Long access */
-#define TMU1_TCNT	0xa412fea4	/* Long access */
-#define TMU1_TCR	0xa412fea8	/* Word access */
-
-#define TMU2_TCOR	0xa412feac	/* Long access */
-#define TMU2_TCNT	0xa412feb0	/* Long access */
-#define TMU2_TCR	0xa412feb4	/* Word access */
-
-#else
-#define TMU_012_TSTR	0xfffffe92	/* Byte access */
-
-#define TMU0_TCOR	0xfffffe94	/* Long access */
-#define TMU0_TCNT	0xfffffe98	/* Long access */
-#define TMU0_TCR	0xfffffe9c	/* Word access */
-
-#define TMU1_TCOR	0xfffffea0	/* Long access */
-#define TMU1_TCNT	0xfffffea4	/* Long access */
-#define TMU1_TCR	0xfffffea8	/* Word access */
-
-#define TMU2_TCOR	0xfffffeac	/* Long access */
-#define TMU2_TCNT	0xfffffeb0	/* Long access */
-#define TMU2_TCR	0xfffffeb4	/* Word access */
-#if !defined(CONFIG_CPU_SUBTYPE_SH7720) && !defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define TMU2_TCPR2	0xfffffeb8	/* Long access */
-#endif
-#endif
-
-#endif /* __ASM_CPU_SH3_TIMER_H */
-
diff --git a/arch/sh/include/cpu-sh4/cpu/timer.h b/arch/sh/include/cpu-sh4/cpu/timer.h
deleted file mode 100644
index d1e796b..0000000
--- a/arch/sh/include/cpu-sh4/cpu/timer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * include/asm-sh/cpu-sh4/timer.h
- *
- * Copyright (C) 2004 Lineo Solutions, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __ASM_CPU_SH4_TIMER_H
-#define __ASM_CPU_SH4_TIMER_H
-
-/*
- * ---------------------------------------------------------------------------
- * TMU Common definitions for SH4 processors
- *	SH7750S/SH7750R
- *	SH7751/SH7751R
- *	SH7760
- *	SH-X3
- * ---------------------------------------------------------------------------
- */
-#ifdef CONFIG_CPU_SUBTYPE_SHX3
-#define TMU_012_BASE	0xffc10000
-#define TMU_345_BASE	0xffc20000
-#else
-#define TMU_012_BASE	0xffd80000
-#define TMU_345_BASE	0xfe100000
-#endif
-
-#define TMU_TOCR	TMU_012_BASE	/* Not supported on all CPUs */
-
-#define TMU_012_TSTR	(TMU_012_BASE + 0x04)
-#define TMU_345_TSTR	(TMU_345_BASE + 0x04)
-
-#define TMU0_TCOR	(TMU_012_BASE + 0x08)
-#define TMU0_TCNT	(TMU_012_BASE + 0x0c)
-#define TMU0_TCR	(TMU_012_BASE + 0x10)
-
-#define TMU1_TCOR       (TMU_012_BASE + 0x14)
-#define TMU1_TCNT       (TMU_012_BASE + 0x18)
-#define TMU1_TCR        (TMU_012_BASE + 0x1c)
-
-#define TMU2_TCOR       (TMU_012_BASE + 0x20)
-#define TMU2_TCNT       (TMU_012_BASE + 0x24)
-#define TMU2_TCR	(TMU_012_BASE + 0x28)
-#define TMU2_TCPR	(TMU_012_BASE + 0x2c)
-
-#define TMU3_TCOR	(TMU_345_BASE + 0x08)
-#define TMU3_TCNT	(TMU_345_BASE + 0x0c)
-#define TMU3_TCR	(TMU_345_BASE + 0x10)
-
-#define TMU4_TCOR	(TMU_345_BASE + 0x14)
-#define TMU4_TCNT	(TMU_345_BASE + 0x18)
-#define TMU4_TCR	(TMU_345_BASE + 0x1c)
-
-#define TMU5_TCOR	(TMU_345_BASE + 0x20)
-#define TMU5_TCNT	(TMU_345_BASE + 0x24)
-#define TMU5_TCR	(TMU_345_BASE + 0x28)
-
-#endif /* __ASM_CPU_SH4_TIMER_H */
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
index 8bceba5..fefd7ed 100644
--- a/arch/sh/kernel/timers/Makefile
+++ b/arch/sh/kernel/timers/Makefile
@@ -3,5 +3,3 @@
 #
 
 obj-y	:= timer.o
-
-obj-$(CONFIG_SH_TMU)		+= timer-tmu.o
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
deleted file mode 100644
index fe8d893..0000000
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer-tmu.c - TMU Timer Support
- *
- *  Copyright (C) 2005 - 2007  Paul Mundt
- *
- * TMU handling code hacked out of arch/sh/kernel/time.c
- *
- *  Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
- *  Copyright (C) 2000  Philipp Rumpf <prumpf@tux.org>
- *  Copyright (C) 2002, 2003, 2004  Paul Mundt
- *  Copyright (C) 2002  M. R. Brown  <mrbrown@linux-sh.org>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/seqlock.h>
-#include <linux/clockchips.h>
-#include <asm/timer.h>
-#include <asm/rtc.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/clock.h>
-
-#define TMU_TOCR_INIT	0x00
-#define TMU_TCR_INIT	0x0020
-
-#define TMU0		(0)
-#define TMU1		(1)
-
-static inline void _tmu_start(int tmu_num)
-{
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) | (0x1<<tmu_num), TMU_012_TSTR);
-}
-
-static inline void _tmu_set_irq(int tmu_num, int enabled)
-{
-	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
-	ctrl_outw( (enabled ? ctrl_inw(tmu_tcr) | (1<<5) : ctrl_inw(tmu_tcr) & ~(1<<5)), tmu_tcr);
-}
-
-static inline void _tmu_stop(int tmu_num)
-{
-	ctrl_outb(ctrl_inb(TMU_012_TSTR) & ~(0x1<<tmu_num), TMU_012_TSTR);
-}
-
-static inline void _tmu_clear_status(int tmu_num)
-{
-	register unsigned long tmu_tcr = TMU0_TCR + (0xc*tmu_num);
-	/* Clear UNF bit */
-	ctrl_outw(ctrl_inw(tmu_tcr) & ~0x100, tmu_tcr);
-}
-
-static inline unsigned long _tmu_read(int tmu_num)
-{
-        return ctrl_inl(TMU0_TCNT+0xC*tmu_num);
-}
-
-static int tmu_timer_start(void)
-{
-	_tmu_start(TMU0);
-	_tmu_start(TMU1);
-	_tmu_set_irq(TMU0,1);
-	return 0;
-}
-
-static int tmu_timer_stop(void)
-{
-	_tmu_stop(TMU0);
-	_tmu_stop(TMU1);
-	_tmu_clear_status(TMU0);
-	return 0;
-}
-
-/*
- * also when the module_clk is scaled the TMU1
- * will show the same frequency
- */
-static int tmus_are_scaled;
-
-static cycle_t tmu_timer_read(struct clocksource *cs)
-{
-	return ((cycle_t)(~_tmu_read(TMU1)))<<tmus_are_scaled;
-}
-
-
-static unsigned long tmu_latest_interval[3];
-static void tmu_timer_set_interval(int tmu_num, unsigned long interval, unsigned int reload)
-{
-	unsigned long tmu_tcnt = TMU0_TCNT + tmu_num*0xC;
-	unsigned long tmu_tcor = TMU0_TCOR + tmu_num*0xC;
-
-	_tmu_stop(tmu_num);
-
-	ctrl_outl(interval, tmu_tcnt);
-	tmu_latest_interval[tmu_num] = interval;
-
-	/*
-	 * TCNT reloads from TCOR on underflow, clear it if we don't
-	 * intend to auto-reload
-	 */
-	ctrl_outl( reload ? interval : 0 , tmu_tcor);
-
-	_tmu_start(tmu_num);
-}
-
-static int tmu_set_next_event(unsigned long cycles,
-			      struct clock_event_device *evt)
-{
-	tmu_timer_set_interval(TMU0,cycles, evt->mode == CLOCK_EVT_MODE_PERIODIC);
-	_tmu_set_irq(TMU0,1);
-	return 0;
-}
-
-static void tmu_set_mode(enum clock_event_mode mode,
-			 struct clock_event_device *evt)
-{
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		ctrl_outl(tmu_latest_interval[TMU0], TMU0_TCOR);
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-		ctrl_outl(0, TMU0_TCOR);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_RESUME:
-		break;
-	}
-}
-
-static struct clock_event_device tmu0_clockevent = {
-	.name		= "tmu0",
-	.shift		= 32,
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.set_mode	= tmu_set_mode,
-	.set_next_event	= tmu_set_next_event,
-};
-
-static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
-{
-	struct clock_event_device *evt = &tmu0_clockevent;
-	_tmu_clear_status(TMU0);
-	_tmu_set_irq(TMU0,tmu0_clockevent.mode != CLOCK_EVT_MODE_ONESHOT);
-
-	switch (tmu0_clockevent.mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-	case CLOCK_EVT_MODE_PERIODIC:
-		evt->event_handler(evt);
-		break;
-	default:
-		break;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction tmu0_irq = {
-	.name		= "periodic/oneshot timer",
-	.handler	= tmu_timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-};
-
-static void __init tmu_clk_init(struct clk *clk)
-{
-	u8 divisor  = TMU_TCR_INIT & 0x7;
-	int tmu_num = clk->name[3]-'0';
-	ctrl_outw(TMU_TCR_INIT, TMU0_TCR+(tmu_num*0xC));
-	clk->rate = clk_get_rate(clk->parent) / (4 << (divisor << 1));
-}
-
-static void tmu_clk_recalc(struct clk *clk)
-{
-	int tmu_num = clk->name[3]-'0';
-	unsigned long prev_rate = clk_get_rate(clk);
-	unsigned long flags;
-	u8 divisor = ctrl_inw(TMU0_TCR+tmu_num*0xC) & 0x7;
-	clk->rate  = clk_get_rate(clk->parent) / (4 << (divisor << 1));
-
-	if(prev_rate==clk_get_rate(clk))
-		return;
-
-	if(tmu_num)
-		return; /* No more work on TMU1 */
-
-	local_irq_save(flags);
-	tmus_are_scaled = (prev_rate > clk->rate);
-
-	_tmu_stop(TMU0);
-
-	tmu0_clockevent.mult = div_sc(clk->rate, NSEC_PER_SEC,
-				tmu0_clockevent.shift);
-	tmu0_clockevent.max_delta_ns =
-			clockevent_delta2ns(-1, &tmu0_clockevent);
-	tmu0_clockevent.min_delta_ns =
-			clockevent_delta2ns(1, &tmu0_clockevent);
-
-	if (tmus_are_scaled)
-		tmu_latest_interval[TMU0] >>= 1;
-	else
-		tmu_latest_interval[TMU0] <<= 1;
-
-	tmu_timer_set_interval(TMU0,
-		tmu_latest_interval[TMU0],
-		tmu0_clockevent.mode == CLOCK_EVT_MODE_PERIODIC);
-
-	_tmu_start(TMU0);
-
-	local_irq_restore(flags);
-}
-
-static struct clk_ops tmu_clk_ops = {
-	.init		= tmu_clk_init,
-	.recalc		= tmu_clk_recalc,
-};
-
-static struct clk tmu0_clk = {
-	.name		= "tmu0_clk",
-	.ops		= &tmu_clk_ops,
-};
-
-static struct clk tmu1_clk = {
-	.name		= "tmu1_clk",
-	.ops		= &tmu_clk_ops,
-};
-
-static int tmu_timer_init(void)
-{
-	unsigned long interval;
-	unsigned long frequency;
-
-	setup_irq(CONFIG_SH_TIMER_IRQ, &tmu0_irq);
-
-	tmu0_clk.parent = clk_get(NULL, "module_clk");
-	tmu1_clk.parent = clk_get(NULL, "module_clk");
-
-	tmu_timer_stop();
-
-#if !defined(CONFIG_CPU_SUBTYPE_SH7720) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7721) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7760) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7785) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7786) && \
-    !defined(CONFIG_CPU_SUBTYPE_SHX3)
-	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
-#endif
-
-	clk_register(&tmu0_clk);
-	clk_register(&tmu1_clk);
-	clk_enable(&tmu0_clk);
-	clk_enable(&tmu1_clk);
-
-	frequency = clk_get_rate(&tmu0_clk);
-	interval = (frequency + HZ / 2) / HZ;
-
-	tmu_timer_set_interval(TMU0,interval, 1);
-	tmu_timer_set_interval(TMU1,~0,1);
-
-	_tmu_start(TMU1);
-
-	clocksource_sh.rating = 200;
-	clocksource_sh.mask = CLOCKSOURCE_MASK(32);
-	clocksource_sh.read = tmu_timer_read;
-	clocksource_sh.shift = 10;
-	clocksource_sh.mult = clocksource_hz2mult(clk_get_rate(&tmu1_clk),
-						  clocksource_sh.shift);
-	clocksource_sh.flags = CLOCK_SOURCE_IS_CONTINUOUS;
-	clocksource_register(&clocksource_sh);
-
-	tmu0_clockevent.mult = div_sc(frequency, NSEC_PER_SEC,
-				      tmu0_clockevent.shift);
-	tmu0_clockevent.max_delta_ns =
-			clockevent_delta2ns(-1, &tmu0_clockevent);
-	tmu0_clockevent.min_delta_ns =
-			clockevent_delta2ns(1, &tmu0_clockevent);
-
-	tmu0_clockevent.cpumask = cpumask_of(0);
-	tmu0_clockevent.rating = 100;
-
-	clockevents_register_device(&tmu0_clockevent);
-
-	return 0;
-}
-
-static struct sys_timer_ops tmu_timer_ops = {
-	.init		= tmu_timer_init,
-	.start		= tmu_timer_start,
-	.stop		= tmu_timer_stop,
-};
-
-struct sys_timer tmu_timer = {
-	.name	= "tmu",
-	.ops	= &tmu_timer_ops,
-};
diff --git a/arch/sh/kernel/timers/timer.c b/arch/sh/kernel/timers/timer.c
index 920891a..f8812bd 100644
--- a/arch/sh/kernel/timers/timer.c
+++ b/arch/sh/kernel/timers/timer.c
@@ -14,9 +14,6 @@
 #include <asm/timer.h>
 
 static struct sys_timer *sys_timers[] = {
-#ifdef CONFIG_SH_TMU
-	&tmu_timer,
-#endif
 	NULL,
 };
 
-- 
cgit v0.10.2


From 8be5f1a68f2c14082939dd54e7037dcee2eb54f8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 12 May 2009 19:53:55 +0900
Subject: sh: Kill off the remnants of the old timer code.

Now with all of the TMU users moved over to the new TMU driver, and the
old TMU driver killed off, the left-over infrastructure can go along
with it.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e7b6406..fb75c2d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -494,17 +494,6 @@ config SH_TIMER_MTU2
 	help
 	  This enables build of the MTU2 timer driver.
 
-config SH_TIMER_IRQ
-	int
-	default "28" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
-			CPU_SUBTYPE_SH7763
-	default "86" if CPU_SUBTYPE_SH7619
-	default "140" if CPU_SUBTYPE_SH7206
-	default "142" if CPU_SUBTYPE_SH7203 && SH_CMT
-	default "153" if CPU_SUBTYPE_SH7203 && SH_MTU2
-	default "238" if CPU_SUBTYPE_MXG
-	default "16"
-
 config SH_PCLK_FREQ
 	int "Peripheral clock frequency (in Hz)"
 	default "27000000" if CPU_SUBTYPE_SH7343
diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h
deleted file mode 100644
index f27a88b..0000000
--- a/arch/sh/include/asm/timer.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef __ASM_SH_TIMER_H
-#define __ASM_SH_TIMER_H
-
-#include <linux/sysdev.h>
-#include <linux/clocksource.h>
-#include <cpu/timer.h>
-
-struct sys_timer_ops {
-	int (*init)(void);
-	int (*start)(void);
-	int (*stop)(void);
-};
-
-struct sys_timer {
-	const char		*name;
-
-	struct sys_device	dev;
-	struct sys_timer_ops	*ops;
-};
-
-extern struct sys_timer *sys_timer;
-
-/* arch/sh/kernel/timers/timer.c */
-struct sys_timer *get_sys_timer(void);
-
-extern struct clocksource clocksource_sh;
-
-#endif /* __ASM_SH_TIMER_H */
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index aee08af..9411e3e 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -14,7 +14,7 @@ obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o			\
 	   sys_sh.o sys_sh32.o syscalls_32.o time.o topology.o	\
 	   traps.o traps_32.o
 
-obj-y				+= cpu/ timers/
+obj-y				+= cpu/
 obj-$(CONFIG_VSYSCALL)		+= vsyscall/
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SH_STANDARD_BIOS)	+= sh_bios.o
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index c845a0d..67b9f6c 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -4,7 +4,7 @@ obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o machvec.o process_64.o \
 	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
 	   syscalls_64.o time.o topology.o traps.o traps_64.o
 
-obj-y				+= cpu/ timers/
+obj-y				+= cpu/
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SH_CPU_FREQ)	+= cpufreq.o
 obj-$(CONFIG_MODULES)		+= sh_ksyms_64.o module.o
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 133dbe4..f54769f 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -26,7 +26,6 @@
 #include <linux/platform_device.h>
 #include <linux/proc_fs.h>
 #include <asm/clock.h>
-#include <asm/timer.h>
 
 static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index a77838f..2edde32 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -22,9 +22,6 @@
 #include <linux/rtc.h>
 #include <asm/clock.h>
 #include <asm/rtc.h>
-#include <asm/timer.h>
-
-struct sys_timer *sys_timer;
 
 /* Dummy RTC ops */
 static void null_rtc_get_time(struct timespec *tv)
@@ -94,20 +91,9 @@ module_init(rtc_generic_init);
 
 void (*board_time_init)(void);
 
-struct clocksource clocksource_sh = {
-	.name		= "SuperH",
-};
-
 unsigned long long sched_clock(void)
 {
-	unsigned long long cycles;
-
-	/* jiffies based sched_clock if no clocksource is installed */
-	if (!clocksource_sh.rating)
-		return (jiffies_64 - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ);
-
-	cycles = clocksource_sh.read(&clocksource_sh);
-	return cyc2ns(&clocksource_sh, cycles);
+	return (jiffies_64 - INITIAL_JIFFIES) * (NSEC_PER_SEC / HZ);
 }
 
 static void __init sh_late_time_init(void)
@@ -117,18 +103,7 @@ static void __init sh_late_time_init(void)
 	 * Run probe() for one "earlytimer" device.
 	 */
 	early_platform_driver_register_all("earlytimer");
-	if (early_platform_driver_probe("earlytimer", 1, 0))
-		return;
-
-	/*
-	 * Find the timer to use as the system timer, it will be
-	 * initialized for us.
-	 */
-	sys_timer = get_sys_timer();
-	if (unlikely(!sys_timer))
-		panic("System timer missing.\n");
-
-	printk(KERN_INFO "Using %s for system timer\n", sys_timer->name);
+	early_platform_driver_probe("earlytimer", 1, 0);
 }
 
 void __init time_init(void)
diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile
deleted file mode 100644
index fefd7ed..0000000
--- a/arch/sh/kernel/timers/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the various Linux/SuperH timers
-#
-
-obj-y	:= timer.o
diff --git a/arch/sh/kernel/timers/timer.c b/arch/sh/kernel/timers/timer.c
deleted file mode 100644
index f8812bd..0000000
--- a/arch/sh/kernel/timers/timer.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * arch/sh/kernel/timers/timer.c - Common timer code
- *
- *  Copyright (C) 2005  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-static struct sys_timer *sys_timers[] = {
-	NULL,
-};
-
-static char timer_override[10];
-static int __init timer_setup(char *str)
-{
-	if (str)
-		strlcpy(timer_override, str, sizeof(timer_override));
-	return 1;
-}
-__setup("timer=", timer_setup);
-
-struct sys_timer *get_sys_timer(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(sys_timers); i++) {
-		struct sys_timer *t = sys_timers[i];
-
-		if (unlikely(!t))
-			break;
-		if (unlikely(timer_override[0]))
-			if ((strcmp(timer_override, t->name) != 0))
-				continue;
-		if (likely(t->ops->init() == 0))
-			return t;
-	}
-
-	return NULL;
-}
-- 
cgit v0.10.2


From e758a33d6fc5b9d6a3ae489863d04fcecad8120b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 12 May 2009 21:59:01 +1000
Subject: perf_counter: call hw_perf_save_disable/restore around group_sched_in

I noticed that when enabling a group via the PERF_COUNTER_IOC_ENABLE
ioctl on the group leader, the counters weren't enabled and counting
immediately on return from the ioctl, but did start counting a little
while later (presumably after a context switch).

The reason was that __perf_counter_enable calls group_sched_in which
calls hw_perf_group_sched_in, which on powerpc assumes that the caller
has called hw_perf_save_disable already.  Until commit 46d686c6
("perf_counter: put whole group on when enabling group leader") it was
true that all callers of group_sched_in had called
hw_perf_save_disable first, and the powerpc hw_perf_group_sched_in
relies on that (there isn't an x86 version).

This fixes the problem by putting calls to hw_perf_save_disable /
hw_perf_restore around the calls to group_sched_in and
counter_sched_in in __perf_counter_enable.  Having the calls to
hw_perf_save_disable/restore around the counter_sched_in call is
harmless and makes this call consistent with the other call sites
of counter_sched_in, which have all called hw_perf_save_disable first.

[ Impact: more precise counter group disable/enable functionality ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18953.25733.53359.147452@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5ea0240..ff166c1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -663,6 +663,7 @@ static void __perf_counter_enable(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *leader = counter->group_leader;
+	unsigned long pmuflags;
 	unsigned long flags;
 	int err;
 
@@ -689,14 +690,18 @@ static void __perf_counter_enable(void *info)
 	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
 		goto unlock;
 
-	if (!group_can_go_on(counter, cpuctx, 1))
+	if (!group_can_go_on(counter, cpuctx, 1)) {
 		err = -EEXIST;
-	else if (counter == leader)
-		err = group_sched_in(counter, cpuctx, ctx,
-				     smp_processor_id());
-	else
-		err = counter_sched_in(counter, cpuctx, ctx,
-				       smp_processor_id());
+	} else {
+		pmuflags = hw_perf_save_disable();
+		if (counter == leader)
+			err = group_sched_in(counter, cpuctx, ctx,
+					     smp_processor_id());
+		else
+			err = counter_sched_in(counter, cpuctx, ctx,
+					       smp_processor_id());
+		hw_perf_restore(pmuflags);
+	}
 
 	if (err) {
 		/*
-- 
cgit v0.10.2


From b5710ce92a8cf8e3fc0ffc230cfdbfa23463f1c8 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 12 May 2009 18:51:28 +0400
Subject: x86/pci: add 4 more return parameters to
 IO_APIC_get_PCI_irq_vector(), fix

Fix trivial typo in the drivers/pci/hotplug/ibmphp_core.c changes.

[ Impact: build fix ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Yinghai Lu <yinghai@kernel.org>
Cc: eswierk@aristanetworks.com
LKML-Reference: <20090512145128.GA10220@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index ef53b05..79901a0 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -161,7 +161,7 @@ int ibmphp_init_devno(struct slot **cur_slot)
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-						(int) (*cur_slot)->device, i.
+						(int) (*cur_slot)->device, i,
 						&ioapic, &ioapic_pin,
 						&triggering, &polarity);
 
-- 
cgit v0.10.2


From 7537d81aa7b7cd31b0caeac8091456e93d96fa8d Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Tue, 12 May 2009 11:16:20 -0500
Subject: GFS2: Fix timestamps on write

This patch copies the timestamps from the vfs inode into gfs2 and syncs
it to the disk inode during writes.

Signed-off-by: Abhijith Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index a6dde17..e566421 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -781,10 +781,12 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	unlock_page(page);
 	page_cache_release(page);
 
-	if (inode->i_size < to) {
-		i_size_write(inode, to);
-		ip->i_disksize = inode->i_size;
-		di->di_size = cpu_to_be64(inode->i_size);
+	if (copied) {
+		if (inode->i_size < to) {
+			i_size_write(inode, to);
+			ip->i_disksize = inode->i_size;
+		}
+		gfs2_dinode_out(ip, di);
 		mark_inode_dirty(inode);
 	}
 
@@ -824,7 +826,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh;
 	struct gfs2_alloc *al = ip->i_alloc;
-	struct gfs2_dinode *di;
 	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
@@ -847,11 +848,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 		gfs2_page_add_databufs(ip, page, from, to);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-
-	if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) {
-		di = (struct gfs2_dinode *)dibh->b_data;
-		ip->i_disksize = inode->i_size;
-		di->di_size = cpu_to_be64(inode->i_size);
+	if (ret > 0) {
+		if (inode->i_size > ip->i_disksize)
+			ip->i_disksize = inode->i_size;
+		gfs2_dinode_out(ip, dibh->b_data);
 		mark_inode_dirty(inode);
 	}
 
-- 
cgit v0.10.2


From 12b7ac176831df1aa58a787e67c3e5d698b30163 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 May 2009 00:57:44 -0400
Subject: ext4: Rename ext4_get_blocks_wrap() to be ext4_get_blocks()

Another function rename for clarity's sake.  The _wrap prefix simply
confuses people, and didn't add much people trying to follow the code
paths.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b647899..052d637 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
-						0, 0, 0);
+		err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0, 0, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 89190ae..5dc8368 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1617,10 +1617,10 @@ extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 			  loff_t len);
-extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
-			sector_t block, unsigned int max_blocks,
-			struct buffer_head *bh, int create,
-			int extend_disksize, int flag);
+extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
+			   sector_t block, unsigned int max_blocks,
+			   struct buffer_head *bh, int create,
+			   int extend_disksize, int flag);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
 
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4fec6b7..7e7d02d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3151,9 +3151,9 @@ retry:
 			break;
 		}
 		map_bh.b_state = 0;
-		ret = ext4_get_blocks_wrap(handle, inode, block,
-					  max_blocks, &map_bh,
-					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
+		ret = ext4_get_blocks(handle, inode, block,
+				      max_blocks, &map_bh,
+				      EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f758e80..a9a9b9b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1121,7 +1121,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 }
 
 /*
- * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * The ext4_get_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
  *
  * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1142,9 +1142,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
  *
  * It returns the error in case of allocation failure.
  */
-int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
-			unsigned int max_blocks, struct buffer_head *bh,
-			int create, int extend_disksize, int flag)
+int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
+		    unsigned int max_blocks, struct buffer_head *bh,
+		    int create, int extend_disksize, int flag)
 {
 	int retval;
 
@@ -1268,8 +1268,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
 		started = 1;
 	}
 
-	ret = ext4_get_blocks_wrap(handle, inode, iblock,
-					max_blocks, bh_result, create, 0, 0);
+	ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
+			      create, 0, 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1294,10 +1294,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext4_get_blocks_wrap(handle, inode, block, 1,
-					&dummy, create, 1, 0);
+	err = ext4_get_blocks(handle, inode, block, 1, &dummy, create, 1, 0);
 	/*
-	 * ext4_get_blocks_wrap() returns number of blocks
+	 * ext4_get_blocks() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
 	 */
 	if (err > 0) {
@@ -2009,8 +2008,8 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 
 	handle = ext4_journal_current_handle();
 	BUG_ON(!handle);
-	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-				   bh_result, 1, 0, EXT4_DELALLOC_RSVED);
+	ret = ext4_get_blocks(handle, inode, iblock, max_blocks,
+			      bh_result, 1, 0, EXT4_DELALLOC_RSVED);
 	if (ret <= 0)
 		return ret;
 
@@ -2067,11 +2066,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	/*
 	 * We need to make sure the BH_Delay flag is passed down to
 	 * ext4_da_get_block_write(), since it calls
-	 * ext4_get_blocks_wrap() with the EXT4_DELALLOC_RSVED flag.
-	 * This flag causes ext4_get_blocks_wrap() to call
+	 * ext4_get_blocks() with the EXT4_DELALLOC_RSVED flag.
+	 * This flag causes ext4_get_blocks() to call
 	 * ext4_da_update_reserve_space() if the passed buffer head
 	 * has the BH_Delay flag set.  In the future, once we clean up
-	 * the interfaces to ext4_get_blocks_wrap(), we should pass in
+	 * the interfaces to ext4_get_blocks(), we should pass in
 	 * a separate flag which requests that the delayed allocation
 	 * statistics should be updated, instead of depending on the
 	 * state information getting passed down via the map_bh's
@@ -2363,7 +2362,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
 	if ((ret == 0) && !buffer_delay(bh_result)) {
 		/* the block isn't (pre)allocated yet, let's reserve space */
 		/*
@@ -2407,8 +2406,8 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
 	 * we don't want to do block allocation in writepage
 	 * so call get_block_wrap with create = 0
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
-				   bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks,
+			      bh_result, 0, 0, 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -5034,7 +5033,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
  * Calculate the journal credits for a chunk of data modification.
  *
  * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
  *
  * journal buffers for data blocks are not included here, as DIO
  * and fallocate do no need to journal data buffers.
-- 
cgit v0.10.2


From c21770573319922e3f3fcb331cfaa290c49f1c81 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 May 2009 00:58:52 -0400
Subject: ext4: Define a new set of flags for ext4_get_blocks()

The functions ext4_get_blocks(), ext4_ext_get_blocks(), and
ext4_ind_get_blocks() used an ad-hoc set of integer variables used as
boolean flags passed in as arguments.  Use a single flags parameter
and a setandard set of bitfield flags instead.  This saves space on
the call stack, and it also makes the code a bit more understandable.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 052d637..9dc93168 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,7 +131,7 @@ static int ext4_readdir(struct file *filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0, 0, 0);
+		err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5dc8368..17feb4a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -314,10 +314,20 @@ struct ext4_new_group_data {
 };
 
 /*
- * Following is used by preallocation code to tell get_blocks() that we
- * want uninitialzed extents.
+ * Flags used by ext4_get_blocks()
  */
-#define EXT4_CREATE_UNINITIALIZED_EXT		2
+	/* Allocate any needed blocks and/or convert an unitialized
+	   extent to be an initialized ext4 */
+#define EXT4_GET_BLOCKS_CREATE			1
+	/* Request the creation of an unitialized extent */
+#define EXT4_GET_BLOCKS_UNINIT_EXT		2
+#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
+						 EXT4_GET_BLOCKS_CREATE)
+	/* Update the ext4_inode_info i_disksize field */
+#define EXT4_GET_BLOCKS_EXTEND_DISKSIZE		4
+	/* Caller is from the delayed allocation writeout path,
+	   so the filesystem blocks have already been accounted for */
+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	8
 
 /*
  * ioctl commands
@@ -1610,8 +1620,7 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 				       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			       ext4_lblk_t iblock, unsigned int max_blocks,
-			       struct buffer_head *bh_result,
-			       int create, int extend_disksize);
+			       struct buffer_head *bh_result, int flags);
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
@@ -1619,8 +1628,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 			  loff_t len);
 extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
 			   sector_t block, unsigned int max_blocks,
-			   struct buffer_head *bh, int create,
-			   int extend_disksize, int flag);
+			   struct buffer_head *bh, int flags);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
 
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7e7d02d..27c383c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2784,7 +2784,7 @@ fix_extent_len:
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			ext4_lblk_t iblock,
 			unsigned int max_blocks, struct buffer_head *bh_result,
-			int create, int extend_disksize)
+			int flags)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
@@ -2803,7 +2803,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	cache_type = ext4_ext_in_cache(inode, iblock, &newex);
 	if (cache_type) {
 		if (cache_type == EXT4_EXT_CACHE_GAP) {
-			if (!create) {
+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 				/*
 				 * block isn't allocated yet and
 				 * user doesn't want to allocate it
@@ -2869,9 +2869,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 							EXT4_EXT_CACHE_EXTENT);
 				goto out;
 			}
-			if (create == EXT4_CREATE_UNINITIALIZED_EXT)
+			if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
 				goto out;
-			if (!create) {
+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 				if (allocated > max_blocks)
 					allocated = max_blocks;
 				/*
@@ -2903,7 +2903,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	 * requested block isn't allocated yet;
 	 * we couldn't try to create block if create flag is zero
 	 */
-	if (!create) {
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 		/*
 		 * put just found gap into cache to speed up
 		 * subsequent requests
@@ -2932,10 +2932,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	 * EXT_UNINIT_MAX_LEN.
 	 */
 	if (max_blocks > EXT_INIT_MAX_LEN &&
-	    create != EXT4_CREATE_UNINITIALIZED_EXT)
+	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
 		max_blocks = EXT_INIT_MAX_LEN;
 	else if (max_blocks > EXT_UNINIT_MAX_LEN &&
-		 create == EXT4_CREATE_UNINITIALIZED_EXT)
+		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
 		max_blocks = EXT_UNINIT_MAX_LEN;
 
 	/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2966,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	/* try to insert new extent into found leaf and return */
 	ext4_ext_store_pblock(&newex, newblock);
 	newex.ee_len = cpu_to_le16(ar.len);
-	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
+	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
 		ext4_ext_mark_uninitialized(&newex);
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
 	if (err) {
@@ -2983,7 +2983,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	newblock = ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-	if (extend_disksize) {
+	if (flags & EXT4_GET_BLOCKS_EXTEND_DISKSIZE) {
 		disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
 		if (disksize > i_size_read(inode))
 			disksize = i_size_read(inode);
@@ -2994,7 +2994,7 @@ outnew:
 	set_buffer_new(bh_result);
 
 	/* Cache only when it is _not_ an uninitialized extent */
-	if (create != EXT4_CREATE_UNINITIALIZED_EXT)
+	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
 		ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
 						EXT4_EXT_CACHE_EXTENT);
 out:
@@ -3153,7 +3153,7 @@ retry:
 		map_bh.b_state = 0;
 		ret = ext4_get_blocks(handle, inode, block,
 				      max_blocks, &map_bh,
-				      EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
+				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a9a9b9b..8b7564d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -917,7 +917,7 @@ err_out:
 static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 				  ext4_lblk_t iblock, unsigned int maxblocks,
 				  struct buffer_head *bh_result,
-				  int create, int extend_disksize)
+				  int flags)
 {
 	int err = -EIO;
 	ext4_lblk_t offsets[4];
@@ -934,7 +934,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 
 
 	J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
-	J_ASSERT(handle != NULL || create == 0);
+	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
 	depth = ext4_block_to_path(inode, iblock, offsets,
 					&blocks_to_boundary);
 
@@ -963,7 +963,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
-	if (!create || err == -EIO)
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
 		goto cleanup;
 
 	/*
@@ -1002,7 +1002,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 	 * protect it if you're about to implement concurrent
 	 * ext4_get_block() -bzzz
 	*/
-	if (!err && extend_disksize) {
+	if (!err && (flags & EXT4_GET_BLOCKS_EXTEND_DISKSIZE)) {
 		disksize = ((loff_t) iblock + count) << inode->i_blkbits;
 		if (disksize > i_size_read(inode))
 			disksize = i_size_read(inode);
@@ -1144,7 +1144,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
  */
 int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 		    unsigned int max_blocks, struct buffer_head *bh,
-		    int create, int extend_disksize, int flag)
+		    int flags)
 {
 	int retval;
 
@@ -1158,15 +1158,15 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 	down_read((&EXT4_I(inode)->i_data_sem));
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-				bh, 0, 0);
+				bh, 0);
 	} else {
 		retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
-					     bh, 0, 0);
+					     bh, 0);
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
 	/* If it is only a block(s) look up */
-	if (!create)
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
 		return retval;
 
 	/*
@@ -1205,7 +1205,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 	 * let the underlying get_block() function know to
 	 * avoid double accounting
 	 */
-	if (flag)
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 1;
 	/*
 	 * We need to check for EXT4 here because migrate
@@ -1213,10 +1213,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 	 */
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-				bh, create, extend_disksize);
+					      bh, flags);
 	} else {
 		retval = ext4_ind_get_blocks(handle, inode, block,
-				max_blocks, bh, create, extend_disksize);
+					     max_blocks, bh, flags);
 
 		if (retval > 0 && buffer_new(bh)) {
 			/*
@@ -1229,7 +1229,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 		}
 	}
 
-	if (flag) {
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
 		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
 		/*
 		 * Update reserved blocks/metadata blocks
@@ -1269,7 +1269,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
 	}
 
 	ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-			      create, 0, 0);
+			      create ? EXT4_GET_BLOCKS_CREATE : 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1288,16 +1288,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head dummy;
 	int fatal = 0, err;
+	int flags = EXT4_GET_BLOCKS_EXTEND_DISKSIZE;
 
 	J_ASSERT(handle != NULL || create == 0);
 
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext4_get_blocks(handle, inode, block, 1, &dummy, create, 1, 0);
+	if (create)
+		flags |= EXT4_GET_BLOCKS_CREATE;
+	err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
 	/*
-	 * ext4_get_blocks() returns number of blocks
-	 * mapped. 0 in case of a HOLE.
+	 * ext4_get_blocks() returns number of blocks mapped. 0 in
+	 * case of a HOLE.
 	 */
 	if (err > 0) {
 		if (err > 1)
@@ -1997,7 +2000,6 @@ static void ext4_print_free_blocks(struct inode *inode)
 	return;
 }
 
-#define		EXT4_DELALLOC_RSVED	1
 static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result)
 {
@@ -2009,7 +2011,8 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 	handle = ext4_journal_current_handle();
 	BUG_ON(!handle);
 	ret = ext4_get_blocks(handle, inode, iblock, max_blocks,
-			      bh_result, 1, 0, EXT4_DELALLOC_RSVED);
+			      bh_result, EXT4_GET_BLOCKS_CREATE|
+			      EXT4_GET_BLOCKS_DELALLOC_RESERVE);
 	if (ret <= 0)
 		return ret;
 
@@ -2065,16 +2068,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 		return 0;
 	/*
 	 * We need to make sure the BH_Delay flag is passed down to
-	 * ext4_da_get_block_write(), since it calls
-	 * ext4_get_blocks() with the EXT4_DELALLOC_RSVED flag.
-	 * This flag causes ext4_get_blocks() to call
+	 * ext4_da_get_block_write(), since it calls ext4_get_blocks()
+	 * with the EXT4_GET_BLOCKS_DELALLOC_RESERVE flag.  This flag
+	 * causes ext4_get_blocks() to call
 	 * ext4_da_update_reserve_space() if the passed buffer head
 	 * has the BH_Delay flag set.  In the future, once we clean up
-	 * the interfaces to ext4_get_blocks(), we should pass in
-	 * a separate flag which requests that the delayed allocation
+	 * the interfaces to ext4_get_blocks(), we should pass in a
+	 * separate flag which requests that the delayed allocation
 	 * statistics should be updated, instead of depending on the
 	 * state information getting passed down via the map_bh's
-	 * state bitmasks plus the magic EXT4_DELALLOC_RSVED flag.
+	 * state bitmasks plus the magic
+	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE flag.
 	 */
 	new.b_state = mpd->b_state & (1 << BH_Delay);
 	new.b_blocknr = 0;
@@ -2362,7 +2366,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
 	if ((ret == 0) && !buffer_delay(bh_result)) {
 		/* the block isn't (pre)allocated yet, let's reserve space */
 		/*
@@ -2406,8 +2410,7 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
 	 * we don't want to do block allocation in writepage
 	 * so call get_block_wrap with create = 0
 	 */
-	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks,
-			      bh_result, 0, 0, 0);
+	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
-- 
cgit v0.10.2


From b920c75502cb2c48654ef196d647c8eb81ab608a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 May 2009 00:54:29 -0400
Subject: ext4: Add documentation to the ext4_*get_block* functions

This adds more documentation to various internal functions in
fs/ext4/inode.c, most notably ext4_ind_get_blocks(),
ext4_da_get_block_write(), ext4_da_get_block_prep(),
ext4_normal_get_block_write().

In addition, the static function ext4_normal_get_block_write() has
been renamed noalloc_get_block_write(), since it is used in many
places far beyond ext4_normal_writepage().

Plenty of warnings have been added to the noalloc_get_block_write()
function, since the way it is used is amazingly fragile.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8b7564d..fd5f27a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -892,6 +892,10 @@ err_out:
 }
 
 /*
+ * The ext4_ind_get_blocks() function handles non-extents inodes
+ * (i.e., using the traditional indirect/double-indirect i_blocks
+ * scheme) for ext4_get_blocks().
+ *
  * Allocation strategy is simple: if we have to allocate something, we will
  * have to go the whole way to leaf. So let's do it before attaching anything
  * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,10 +913,11 @@ err_out:
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  *
- *
- * Need to be called with
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
- * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ * The ext4_ind_get_blocks() function should be called with
+ * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
+ * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
+ * blocks.
  */
 static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 				  ext4_lblk_t iblock, unsigned int maxblocks,
@@ -1152,8 +1157,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 	clear_buffer_unwritten(bh);
 
 	/*
-	 * Try to see if we can get  the block without requesting
-	 * for new file system block.
+	 * Try to see if we can get the block without requesting a new
+	 * file system block.
 	 */
 	down_read((&EXT4_I(inode)->i_data_sem));
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -2000,6 +2005,12 @@ static void ext4_print_free_blocks(struct inode *inode)
 	return;
 }
 
+/*
+ * This function is used by mpage_da_map_blocks().  We separate it out
+ * as a separate function just to make life easier, and because
+ * mpage_da_map_blocks() used to be a generic function that took a
+ * get_block_t.
+ */
 static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result)
 {
@@ -2031,8 +2042,8 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 
 	/*
 	 * Update on-disk size along with block allocation we don't
-	 * use 'extend_disksize' as size may change within already
-	 * allocated block -bzzz
+	 * use EXT4_GET_BLOCKS_EXTEND_DISKSIZE as size may change
+	 * within already allocated block -bzzz
 	 */
 	disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
 	if (disksize > i_size_read(inode))
@@ -2338,8 +2349,9 @@ static int __mpage_da_writepage(struct page *page,
 }
 
 /*
- * this is a special callback for ->write_begin() only
- * it's intention is to return mapped block or reserve space
+ * This is a special get_blocks_t callback which is used by
+ * ext4_da_write_begin().  It will either return mapped block or
+ * reserve space for a single block.
  *
  * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
  * We also have b_blocknr = -1 and b_bdev initialized properly
@@ -2347,7 +2359,6 @@ static int __mpage_da_writepage(struct page *page,
  * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
  * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
  * initialized properly.
- *
  */
 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 				  struct buffer_head *bh_result, int create)
@@ -2400,7 +2411,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 	return ret;
 }
 
-static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+/*
+ * This function is used as a standard get_block_t calback function
+ * when there is no desire to allocate any blocks.  It is used as a
+ * callback function for block_prepare_write(), nobh_writepage(), and
+ * block_write_full_page().  These functions should only try to map a
+ * single block at a time.
+ *
+ * Since this function doesn't do block allocations even if the caller
+ * requests it by passing in create=1, it is critically important that
+ * any caller checks to make sure that any buffer heads are returned
+ * by this function are either all already mapped or marked for
+ * delayed allocation before calling nobh_writepage() or
+ * block_write_full_page().  Otherwise, b_blocknr could be left
+ * unitialized, and the page write functions will be taken by
+ * surprise.
+ */
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
 	int ret = 0;
@@ -2419,10 +2446,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
 }
 
 /*
- * get called vi ext4_da_writepages after taking page lock (have journal handle)
- * get called via journal_submit_inode_data_buffers (no journal handle)
- * get called via shrink_page_list via pdflush (no journal handle)
- * or grab_page_cache when doing write_begin (have journal handle)
+ * This function can get called via...
+ *   - ext4_da_writepages after taking page lock (have journal handle)
+ *   - journal_submit_inode_data_buffers (no journal handle)
+ *   - shrink_page_list via pdflush (no journal handle)
+ *   - grab_page_cache when doing write_begin (have journal handle)
  */
 static int ext4_da_writepage(struct page *page,
 				struct writeback_control *wbc)
@@ -2473,7 +2501,7 @@ static int ext4_da_writepage(struct page *page,
 		 * do block allocation here.
 		 */
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-						ext4_normal_get_block_write);
+					  noalloc_get_block_write);
 		if (!ret) {
 			page_bufs = page_buffers(page);
 			/* check whether all are mapped and non delay */
@@ -2498,11 +2526,10 @@ static int ext4_da_writepage(struct page *page,
 	}
 
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-		ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
+		ret = nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
-		ret = block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		ret = block_write_full_page(page, noalloc_get_block_write,
+					    wbc);
 
 	return ret;
 }
@@ -2814,7 +2841,7 @@ retry:
 	*pagep = page;
 
 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-							ext4_da_get_block_prep);
+				ext4_da_get_block_prep);
 	if (ret < 0) {
 		unlock_page(page);
 		ext4_journal_stop(handle);
@@ -3122,12 +3149,10 @@ static int __ext4_normal_writepage(struct page *page,
 	struct inode *inode = page->mapping->host;
 
 	if (test_opt(inode->i_sb, NOBH))
-		return nobh_writepage(page,
-					ext4_normal_get_block_write, wbc);
+		return nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
-		return block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		return block_write_full_page(page, noalloc_get_block_write,
+					     wbc);
 }
 
 static int ext4_normal_writepage(struct page *page,
@@ -3179,7 +3204,7 @@ static int __ext4_journalled_writepage(struct page *page,
 	int err;
 
 	ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext4_normal_get_block_write);
+				  noalloc_get_block_write);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -3264,9 +3289,8 @@ static int ext4_journalled_writepage(struct page *page,
 		 * really know unless we go poke around in the buffer_heads.
 		 * But block_write_full_page will do the right thing.
 		 */
-		return block_write_full_page(page,
-						ext4_normal_get_block_write,
-						wbc);
+		return block_write_full_page(page, noalloc_get_block_write,
+					     wbc);
 	}
 no_write:
 	redirty_page_for_writepage(wbc, page);
-- 
cgit v0.10.2


From a2dc52b5d1d8cc280b3e795abf1c80ac8c49f30c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 12 May 2009 13:51:29 -0400
Subject: ext4: Add BUG_ON debugging checks to noalloc_get_block_write()

Enforce that noalloc_get_block_write() is only called to map one block
at a time, and that it always is successful in finding a mapping for
given an inode's logical block block number if it is called with
create == 1.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fd5f27a..e6113c3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2433,11 +2433,14 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 	int ret = 0;
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 
+	BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+
 	/*
 	 * we don't want to do block allocation in writepage
 	 * so call get_block_wrap with create = 0
 	 */
 	ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
+	BUG_ON(create && ret == 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
-- 
cgit v0.10.2


From d80c19df5fcceb8c741e96f09f275c2da719efef Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 12 May 2009 16:29:13 +0200
Subject: lockdep: increase MAX_LOCKDEP_ENTRIES and MAX_LOCKDEP_CHAINS

Now that lockdep coverage has increased it has become easier to
run out of entries:

[   21.401387] BUG: MAX_LOCKDEP_ENTRIES too low!
[   21.402007] turning off the locking correctness validator.
[   21.402007] Pid: 1555, comm: S99local Not tainted 2.6.30-rc5-tip #2
[   21.402007] Call Trace:
[   21.402007]  [<ffffffff81069789>] add_lock_to_list+0x53/0xba
[   21.402007]  [<ffffffff810eb615>] ? lookup_mnt+0x19/0x53
[   21.402007]  [<ffffffff8106be14>] check_prev_add+0x14b/0x1c7
[   21.402007]  [<ffffffff8106c304>] validate_chain+0x474/0x52a
[   21.402007]  [<ffffffff8106c6fc>] __lock_acquire+0x342/0x3c7
[   21.402007]  [<ffffffff8106c842>] lock_acquire+0xc1/0xe5
[   21.402007]  [<ffffffff810eb615>] ? lookup_mnt+0x19/0x53
[   21.402007]  [<ffffffff8153aedc>] _spin_lock+0x31/0x66

Double the size - as we've done in the past.

[ Impact: allow lockdep to cover more locks ]

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index a2cc7e9..699a2ac 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -54,9 +54,9 @@ enum {
  * table (if it's not there yet), and we check it for lock order
  * conflicts and deadlocks.
  */
-#define MAX_LOCKDEP_ENTRIES	8192UL
+#define MAX_LOCKDEP_ENTRIES	16384UL
 
-#define MAX_LOCKDEP_CHAINS_BITS	14
+#define MAX_LOCKDEP_CHAINS_BITS	15
 #define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
 
 #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
-- 
cgit v0.10.2


From 6f733a349c2b70a83b5451fbe61ef2cf67d41cdf Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Mon, 11 May 2009 09:58:19 -0700
Subject: ARM: OMAP: GPIO de-bounce clocks don't affect module idle state

GPIO de-bounce clocks don't have any impact on the module idle state, so
the clock code should not wait for the module to enable after the de-bounce
clocks are enabled.

Problem found by Kevin Hilman <khilman@deeprootsystems.com>.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h
index 6763b8f..017a30e 100644
--- a/arch/arm/mach-omap2/clock34xx.h
+++ b/arch/arm/mach-omap2/clock34xx.h
@@ -2182,7 +2182,7 @@ static struct clk wkup_32k_fck = {
 
 static struct clk gpio1_dbck = {
 	.name		= "gpio1_dbck",
-	.ops		= &clkops_omap2_dflt_wait,
+	.ops		= &clkops_omap2_dflt,
 	.parent		= &wkup_32k_fck,
 	.enable_reg	= OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN),
 	.enable_bit	= OMAP3430_EN_GPIO1_SHIFT,
@@ -2427,7 +2427,7 @@ static struct clk per_32k_alwon_fck = {
 
 static struct clk gpio6_dbck = {
 	.name		= "gpio6_dbck",
-	.ops		= &clkops_omap2_dflt_wait,
+	.ops		= &clkops_omap2_dflt,
 	.parent		= &per_32k_alwon_fck,
 	.enable_reg	= OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN),
 	.enable_bit	= OMAP3430_EN_GPIO6_SHIFT,
@@ -2437,7 +2437,7 @@ static struct clk gpio6_dbck = {
 
 static struct clk gpio5_dbck = {
 	.name		= "gpio5_dbck",
-	.ops		= &clkops_omap2_dflt_wait,
+	.ops		= &clkops_omap2_dflt,
 	.parent		= &per_32k_alwon_fck,
 	.enable_reg	= OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN),
 	.enable_bit	= OMAP3430_EN_GPIO5_SHIFT,
@@ -2447,7 +2447,7 @@ static struct clk gpio5_dbck = {
 
 static struct clk gpio4_dbck = {
 	.name		= "gpio4_dbck",
-	.ops		= &clkops_omap2_dflt_wait,
+	.ops		= &clkops_omap2_dflt,
 	.parent		= &per_32k_alwon_fck,
 	.enable_reg	= OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN),
 	.enable_bit	= OMAP3430_EN_GPIO4_SHIFT,
@@ -2457,7 +2457,7 @@ static struct clk gpio4_dbck = {
 
 static struct clk gpio3_dbck = {
 	.name		= "gpio3_dbck",
-	.ops		= &clkops_omap2_dflt_wait,
+	.ops		= &clkops_omap2_dflt,
 	.parent		= &per_32k_alwon_fck,
 	.enable_reg	= OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN),
 	.enable_bit	= OMAP3430_EN_GPIO3_SHIFT,
@@ -2467,7 +2467,7 @@ static struct clk gpio3_dbck = {
 
 static struct clk gpio2_dbck = {
 	.name		= "gpio2_dbck",
-	.ops		= &clkops_omap2_dflt_wait,
+	.ops		= &clkops_omap2_dflt,
 	.parent		= &per_32k_alwon_fck,
 	.enable_reg	= OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN),
 	.enable_bit	= OMAP3430_EN_GPIO2_SHIFT,
-- 
cgit v0.10.2


From eaf9393bb79a9c7c8e708669f9581ca466122430 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@nokia.com>
Date: Tue, 12 May 2009 11:20:02 -0700
Subject: ARM: OMAP2: Fix tusb6010 init error and compilation warning

Fix "tusb6010 init error 5, -19" and compilation warning from function
tusb6010_platform_retime "warning: 'sysclk_ps' is used uninitialized in this
function".

I suppose commit c094ba34b8f780885d029ce3c2715a194b780e5d was meant to test
for zero fclk_ps instead of sysclk_ps.

Signed-off-by: Jarkko Nikula <jarkko.nikula@nokia.com>
Cc: Roel Kluin <roel.kluin@gmail.com>
Tested-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/usb-tusb6010.c b/arch/arm/mach-omap2/usb-tusb6010.c
index 8df55f4..8622c24 100644
--- a/arch/arm/mach-omap2/usb-tusb6010.c
+++ b/arch/arm/mach-omap2/usb-tusb6010.c
@@ -187,7 +187,7 @@ int tusb6010_platform_retime(unsigned is_refclk)
 	unsigned	sysclk_ps;
 	int		status;
 
-	if (!refclk_psec || sysclk_ps == 0)
+	if (!refclk_psec || fclk_ps == 0)
 		return -ENODEV;
 
 	sysclk_ps = is_refclk ? refclk_psec : TUSB6010_OSCCLK_60;
-- 
cgit v0.10.2


From c8a799b03a981b4b27d8fd7320c96cfbe781b2fb Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jhnikula@gmail.com>
Date: Tue, 12 May 2009 11:20:02 -0700
Subject: ARM: OMAP2: Fix SPI driver failure on 2420 when running multi-omap
 config

SPI driver will do unhandled fault on OMAP2420 if trying to probe
non-existing SPI busses. Register those additional busses runtime only
for cpus having them.

Signed-off-by: Jarkko Nikula <jhnikula@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 496983a..894cc35 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -354,10 +354,12 @@ static void omap_init_mcspi(void)
 	platform_device_register(&omap2_mcspi1);
 	platform_device_register(&omap2_mcspi2);
 #if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3)
-	platform_device_register(&omap2_mcspi3);
+	if (cpu_is_omap2430() || cpu_is_omap343x())
+		platform_device_register(&omap2_mcspi3);
 #endif
 #ifdef CONFIG_ARCH_OMAP3
-	platform_device_register(&omap2_mcspi4);
+	if (cpu_is_omap343x())
+		platform_device_register(&omap2_mcspi4);
 #endif
 }
 
-- 
cgit v0.10.2


From 4ea60b0c7a8487af5de736d394b147baf7691f3c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 12 May 2009 11:20:03 -0700
Subject: ARM: OMAP2/3: Add name for musb clocks

With the clkdev, musb_core.c needs to register clock with name "ick".

Once all the platforms using the musb driver have been converted
to use clockdev, the clock name does not need to be passed
from the low-level init code.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c
index efc59c4..ff9b4c0 100644
--- a/arch/arm/mach-omap2/clock24xx.c
+++ b/arch/arm/mach-omap2/clock24xx.c
@@ -206,7 +206,7 @@ static struct omap_clk omap24xx_clks[] = {
 	CLK(NULL,	"aes_ick",	&aes_ick,	CK_243X | CK_242X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_243X | CK_242X),
 	CLK(NULL,	"usb_fck",	&usb_fck,	CK_243X | CK_242X),
-	CLK(NULL,	"usbhs_ick",	&usbhs_ick,	CK_243X),
+	CLK("musb_hdrc",	"ick",	&usbhs_ick,	CK_243X),
 	CLK("mmci-omap-hs.0", "ick",	&mmchs1_ick,	CK_243X),
 	CLK("mmci-omap-hs.0", "fck",	&mmchs1_fck,	CK_243X),
 	CLK("mmci-omap-hs.1", "ick",	&mmchs2_ick,	CK_243X),
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index 0a14dca..a057539 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -157,7 +157,7 @@ static struct omap_clk omap34xx_clks[] = {
 	CLK(NULL,	"ssi_ssr_fck",	&ssi_ssr_fck,	CK_343X),
 	CLK(NULL,	"ssi_sst_fck",	&ssi_sst_fck,	CK_343X),
 	CLK(NULL,	"core_l3_ick",	&core_l3_ick,	CK_343X),
-	CLK(NULL,	"hsotgusb_ick",	&hsotgusb_ick,	CK_343X),
+	CLK("musb_hdrc",	"ick",	&hsotgusb_ick,	CK_343X),
 	CLK(NULL,	"sdrc_ick",	&sdrc_ick,	CK_343X),
 	CLK(NULL,	"gpmc_fck",	&gpmc_fck,	CK_343X),
 	CLK(NULL,	"security_l3_ick", &security_l3_ick, CK_343X),
-- 
cgit v0.10.2


From 7ed42a28b269f8682eefae27f5c11187eb56e63b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 12 May 2009 11:33:08 -0700
Subject: x86, boot: correct sanity checks in boot/compressed/misc.c

arch/x86/boot/compressed/misc.c contains several sanity checks on the
output address.  Correct constraints that are no longer correct:

- the alignment test should be MIN_KERNEL_ALIGN on both 32 and 64
  bits.
- the 64 bit maximum address was set to 2^40, which was the limit of
  one specific x86-64 implementation.  Change the test to 2^46, the
  current Linux limit, and at least try to test the end rather than
  the beginning.
- for non-relocatable kernels, test against LOAD_PHYSICAL_ADDR on both
  32 and 64 bits.

[ Impact: fix potential boot failure due to invalid tests ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index e45be73..842b2a3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,21 +325,19 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
+	if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
+		error("Destination address inappropriately aligned");
 #ifdef CONFIG_X86_64
-	if ((unsigned long)output & (__KERNEL_ALIGN - 1))
-		error("Destination address not 2M aligned");
-	if ((unsigned long)output >= 0xffffffffffUL)
+	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
 #else
-	if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
-		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
 	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
+#endif
 #ifndef CONFIG_RELOCATABLE
-	if ((u32)output != LOAD_PHYSICAL_ADDR)
+	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
 		error("Wrong destination address");
 #endif
-#endif
 
 	if (!quiet)
 		putstr("\nDecompressing Linux... ");
-- 
cgit v0.10.2


From c4f68236e41641494f9c8a418ccc0678c335bbb5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 12 May 2009 11:37:34 -0700
Subject: x86-64: align __PHYSICAL_START, remove __KERNEL_ALIGN

Handle the misconfiguration where CONFIG_PHYSICAL_START is
incompatible with CONFIG_PHYSICAL_ALIGN.  This is a configuration
error, but one which arises easily since Kconfig doesn't have the
smarts to express the true relationship between these two variables.
Hence, align __PHYSICAL_START the same way we align LOAD_PHYSICAL_ADDR
in <asm/boot.h>.

For non-relocatable kernels, this would cause the boot to fail.

[ Impact: fix boot failures for non-relocatable kernels ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d38c91b..e11900f 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -32,17 +32,9 @@
  */
 #define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
 
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN		0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
+#define __PHYSICAL_START	((CONFIG_PHYSICAL_START +	 	\
+				  (CONFIG_PHYSICAL_ALIGN - 1)) &	\
+				 ~(CONFIG_PHYSICAL_ALIGN - 1))
 
 #define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
-- 
cgit v0.10.2


From d9295746c0ed3991bdec18c6a5890d71d88904b4 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:09 -0600
Subject: OMAP3 SRAM: mark OCM RAM as Non-cacheable Normal memory

Mark the SRAM (aka OCM RAM) as Non-cacheable Normal memory[1].  This
is to prevent the ARM from evicting existing cache lines to SDRAM
while code is executing from the SRAM.  Necessary since one of the
primary uses for the SRAM is to hold the code and data for the CORE
DPLL M2 divider reprogramming code, which must execute while the SDRC
is idled.  If the ARM attempts to write cache lines back to the while
the SRAM code is running, the ARM will stall[2].

TI deals with this problem in the CDP kernel by marking the SRAM as
Strongly-ordered memory.

Tero Kristo <tero.kristo@nokia.com> caught a bug in an earlier version of
this patch - thanks Tero.

...

1. ARMv7 ARM (DDI 0406A) pp. A3-30, A3-31, B3-32.

2. Private communication with Richard Woodruff <r-woodruff2@ti.com>

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: Tero Kristo <tero.kristo@nokia.com>
Cc: Richard Woodruff <r-woodruff2@ti.com>

diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index fa5297d..3835338 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -201,6 +201,15 @@ void __init omap_map_sram(void)
 		base = OMAP3_SRAM_PA;
 		base = ROUND_DOWN(base, PAGE_SIZE);
 		omap_sram_io_desc[0].pfn = __phys_to_pfn(base);
+
+		/*
+		 * SRAM must be marked as non-cached on OMAP3 since the
+		 * CORE DPLL M2 divider change code (in SRAM) runs with the
+		 * SDRAM controller disabled, and if it is marked cached,
+		 * the ARM may attempt to write cache lines back to SDRAM
+		 * which will cause the system to hang.
+		 */
+		omap_sram_io_desc[0].type = MT_MEMORY_NONCACHED;
 	}
 
 	omap_sram_io_desc[0].length = 1024 * 1024;	/* Use section desc */
-- 
cgit v0.10.2


From 69d4255b139274f71faca28bc93bb49da8eb1a91 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:09 -0600
Subject: OMAP3 SRAM: add ARM barriers to omap3_sram_configure_core_dpll

Add more barriers in the SRAM CORE DPLL M2 divider change code.

- Add a DSB SY after the function's entry point to flush all cached
  and buffered writes and wait for the interconnect to claim that they
  have completed[1].  The idea here is to force all delayed write
  traffic going to the SDRAM to at least post to the L3 interconnect
  before continuing.  If these writes are allowed to occur after the
  SDRC is idled, the writes will not be acknowledged and the ARM will
  stall.

  Note that in this case, it does not matter if the writes actually
  complete to the SDRAM - it is only necessary for the writes to leave
  the ARM itself.  If the writes are posted by the interconnect when
  the SDRC goes into idle, the writes will be delayed until the SDRC
  returns from idle[2].  If the SDRC is in the middle of a write when
  it is requested to enter idle, the SDRC will not acknowledge the
  idle request until the writes complete to the SDRAM.[3]

  The old-style DMB in sdram_in_selfrefresh is now superfluous, so,
  remove it.

- Add an ISB before the function's exit point to prevent the ARM from
  speculatively executing into SDRAM before the SDRAM is enabled[4].

...

1. ARMv7 ARM (DDI 0406A) A3-47, A3-48.

2. Private communication with Richard Woodruff <r-woodruff2@ti.com>.

3. Private communication with Richard Woodruff <r-woodruff2@ti.com>.

4. ARMv7 ARM (DDI 0406A) A3-48.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: Richard Woodruff <r-woodruff2@ti.com>

diff --git a/arch/arm/mach-omap2/sram34xx.S b/arch/arm/mach-omap2/sram34xx.S
index 2c71461..f4a356d 100644
--- a/arch/arm/mach-omap2/sram34xx.S
+++ b/arch/arm/mach-omap2/sram34xx.S
@@ -43,6 +43,7 @@
  */
 ENTRY(omap3_sram_configure_core_dpll)
 	stmfd	sp!, {r1-r12, lr}	@ store regs to stack
+	dsb				@ flush buffered writes to interconnect
 	cmp	r3, #0x2
 	blne	configure_sdrc
 	cmp	r3, #0x2
@@ -58,6 +59,7 @@ ENTRY(omap3_sram_configure_core_dpll)
 	blne	wait_dll_lock
 	cmp	r3, #0x1
 	blne	configure_sdrc
+	isb				@ prevent speculative exec past here
 	mov 	r0, #0 			@ return value
 	ldmfd	sp!, {r1-r12, pc}	@ restore regs and return
 unlock_dll:
@@ -73,8 +75,6 @@ lock_dll:
 	str	r5, [r4]
 	bx	lr
 sdram_in_selfrefresh:
-	mov	r5, #0x0		@ Move 0 to R5
-	mcr	p15, 0, r5, c7, c10, 5	@ memory barrier
 	ldr	r4, omap3_sdrc_power	@ read the SDRC_POWER register
 	ldr	r5, [r4]		@ read the contents of SDRC_POWER
 	orr 	r5, r5, #0x40		@ enable self refresh on idle req
-- 
cgit v0.10.2


From d75d9e73cd59127a4d926a2bf5e9cdcc90f033d6 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:09 -0600
Subject: OMAP3 clock: add interconnect barriers to CORE DPLL M2 change

Where necessary, add interconnect barriers to force posted writes to
complete before continuing.

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap2/sram34xx.S b/arch/arm/mach-omap2/sram34xx.S
index f4a356d..8d524f3 100644
--- a/arch/arm/mach-omap2/sram34xx.S
+++ b/arch/arm/mach-omap2/sram34xx.S
@@ -66,22 +66,23 @@ unlock_dll:
 	ldr	r4, omap3_sdrc_dlla_ctrl
 	ldr	r5, [r4]
 	orr	r5, r5, #0x4
-	str	r5, [r4]
+	str	r5, [r4]		@ (no OCP barrier needed)
 	bx	lr
 lock_dll:
 	ldr	r4, omap3_sdrc_dlla_ctrl
 	ldr	r5, [r4]
 	bic	r5, r5, #0x4
-	str	r5, [r4]
+	str	r5, [r4]		@ (no OCP barrier needed)
 	bx	lr
 sdram_in_selfrefresh:
 	ldr	r4, omap3_sdrc_power	@ read the SDRC_POWER register
 	ldr	r5, [r4]		@ read the contents of SDRC_POWER
 	orr 	r5, r5, #0x40		@ enable self refresh on idle req
 	str 	r5, [r4]		@ write back to SDRC_POWER register
+	ldr	r5, [r4]		@ posted-write barrier for SDRC
 	ldr	r4, omap3_cm_iclken1_core	@ read the CM_ICLKEN1_CORE reg
 	ldr	r5, [r4]
-	bic	r5, r5, #0x2		@ disable iclk bit for SRDC
+	bic	r5, r5, #0x2		@ disable iclk bit for SDRC
 	str 	r5, [r4]
 wait_sdrc_idle:
 	ldr 	r4, omap3_cm_idlest1_core
@@ -97,6 +98,7 @@ configure_core_dpll:
 	and	r5, r5, r6
 	orr	r5, r5, r3, lsl #0x1B	@ r3 contains the M2 val
 	str	r5, [r4]
+	ldr	r5, [r4]		@ posted-write barrier for CM
 	mov 	r5, #0x800		@ wait for the clock to stabilise
 	cmp	r3, #2
 	bne	wait_clk_stable
@@ -152,6 +154,7 @@ configure_sdrc:
 	str	r1, [r4]
 	ldr	r4, omap3_sdrc_actim_ctrlb
 	str	r2, [r4]
+	ldr	r2, [r4]		@ posted-write barrier for SDRC
 	bx	lr
 
 omap3_sdrc_power:
-- 
cgit v0.10.2


From fa0406a8d8c3a4a302085ccd031d999161405f70 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:09 -0600
Subject: OMAP3 SRAM: clear the SDRC PWRENA bit during SDRC frequency change

Clear the SDRC_POWER.PWRENA bit before putting the SDRAM into self-refresh
mode.  This prevents the SDRC from attempting to power off the SDRAM,
which can cause the system to hang.

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap2/sram34xx.S b/arch/arm/mach-omap2/sram34xx.S
index 8d524f3..9a45415 100644
--- a/arch/arm/mach-omap2/sram34xx.S
+++ b/arch/arm/mach-omap2/sram34xx.S
@@ -77,7 +77,9 @@ lock_dll:
 sdram_in_selfrefresh:
 	ldr	r4, omap3_sdrc_power	@ read the SDRC_POWER register
 	ldr	r5, [r4]		@ read the contents of SDRC_POWER
+	mov	r9, r5			@ keep a copy of SDRC_POWER bits
 	orr 	r5, r5, #0x40		@ enable self refresh on idle req
+	bic 	r5, r5, #0x4		@ clear PWDENA
 	str 	r5, [r4]		@ write back to SDRC_POWER register
 	ldr	r5, [r4]		@ posted-write barrier for SDRC
 	ldr	r4, omap3_cm_iclken1_core	@ read the CM_ICLKEN1_CORE reg
@@ -128,10 +130,9 @@ wait_sdrc_idle1:
 	and 	r5, r5, #0x2
 	cmp	r5, #0
 	bne	wait_sdrc_idle1
+restore_sdrc_power_val:
 	ldr	r4, omap3_sdrc_power
-	ldr	r5, [r4]
-	bic 	r5, r5, #0x40
-	str 	r5, [r4]
+	str	r9, [r4]		@ restore SDRC_POWER, no barrier needed
 	bx	lr
 wait_dll_lock:
 	ldr	r4, omap3_sdrc_dlla_status
-- 
cgit v0.10.2


From 98cfe5abf24c6d2f06b70fa8a8a111681bed3ed9 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:09 -0600
Subject: OMAP3 SDRC: initialize SDRC_POWER at boot

Initialize SDRC_POWER to a known-good setting when the kernel boots.
Necessary since some bootloaders don't initialize SDRC_POWER properly.

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap2/sdrc.c b/arch/arm/mach-omap2/sdrc.c
index 2a30060..d62e4e1 100644
--- a/arch/arm/mach-omap2/sdrc.c
+++ b/arch/arm/mach-omap2/sdrc.c
@@ -37,6 +37,10 @@ static struct omap_sdrc_params *sdrc_init_params;
 void __iomem *omap2_sdrc_base;
 void __iomem *omap2_sms_base;
 
+/* SDRC_POWER register bits */
+#define SDRC_POWER_EXTCLKDIS_SHIFT		3
+#define SDRC_POWER_PWDENA_SHIFT			2
+#define SDRC_POWER_PAGEPOLICY_SHIFT		0
 
 /**
  * omap2_sdrc_get_params - return SDRC register values for a given clock rate
@@ -74,7 +78,14 @@ void __init omap2_set_globals_sdrc(struct omap_globals *omap2_globals)
 	omap2_sms_base = omap2_globals->sms;
 }
 
-/* turn on smart idle modes for SDRAM scheduler and controller */
+/**
+ * omap2_sdrc_init - initialize SMS, SDRC devices on boot
+ * @sp: pointer to a null-terminated list of struct omap_sdrc_params
+ *
+ * Turn on smart idle modes for SDRAM scheduler and controller.
+ * Program a known-good configuration for the SDRC to deal with buggy
+ * bootloaders.
+ */
 void __init omap2_sdrc_init(struct omap_sdrc_params *sp)
 {
 	u32 l;
@@ -90,4 +101,10 @@ void __init omap2_sdrc_init(struct omap_sdrc_params *sp)
 	sdrc_write_reg(l, SDRC_SYSCONFIG);
 
 	sdrc_init_params = sp;
+
+	/* XXX Enable SRFRONIDLEREQ here also? */
+	l = (1 << SDRC_POWER_EXTCLKDIS_SHIFT) |
+		(1 << SDRC_POWER_PWDENA_SHIFT) |
+		(1 << SDRC_POWER_PAGEPOLICY_SHIFT);
+	sdrc_write_reg(l, SDRC_POWER);
 }
-- 
cgit v0.10.2


From b2abb271a5705bc80478e79d95fc9f3babc2605c Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:10 -0600
Subject: OMAP3 SRAM: renumber registers to make space for argument passing

Renumber registers in omap3_sram_configure_core_dpll() assembly code to
make space for additional parameters.

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap2/sram34xx.S b/arch/arm/mach-omap2/sram34xx.S
index 9a45415..35131e5 100644
--- a/arch/arm/mach-omap2/sram34xx.S
+++ b/arch/arm/mach-omap2/sram34xx.S
@@ -63,50 +63,50 @@ ENTRY(omap3_sram_configure_core_dpll)
 	mov 	r0, #0 			@ return value
 	ldmfd	sp!, {r1-r12, pc}	@ restore regs and return
 unlock_dll:
-	ldr	r4, omap3_sdrc_dlla_ctrl
-	ldr	r5, [r4]
-	orr	r5, r5, #0x4
-	str	r5, [r4]		@ (no OCP barrier needed)
+	ldr	r11, omap3_sdrc_dlla_ctrl
+	ldr	r12, [r11]
+	orr	r12, r12, #0x4
+	str	r12, [r11]		@ (no OCP barrier needed)
 	bx	lr
 lock_dll:
-	ldr	r4, omap3_sdrc_dlla_ctrl
-	ldr	r5, [r4]
-	bic	r5, r5, #0x4
-	str	r5, [r4]		@ (no OCP barrier needed)
+	ldr	r11, omap3_sdrc_dlla_ctrl
+	ldr	r12, [r11]
+	bic	r12, r12, #0x4
+	str	r12, [r11]		@ (no OCP barrier needed)
 	bx	lr
 sdram_in_selfrefresh:
-	ldr	r4, omap3_sdrc_power	@ read the SDRC_POWER register
-	ldr	r5, [r4]		@ read the contents of SDRC_POWER
-	mov	r9, r5			@ keep a copy of SDRC_POWER bits
-	orr 	r5, r5, #0x40		@ enable self refresh on idle req
-	bic 	r5, r5, #0x4		@ clear PWDENA
-	str 	r5, [r4]		@ write back to SDRC_POWER register
-	ldr	r5, [r4]		@ posted-write barrier for SDRC
-	ldr	r4, omap3_cm_iclken1_core	@ read the CM_ICLKEN1_CORE reg
-	ldr	r5, [r4]
-	bic	r5, r5, #0x2		@ disable iclk bit for SDRC
-	str 	r5, [r4]
+	ldr	r11, omap3_sdrc_power	@ read the SDRC_POWER register
+	ldr	r12, [r11]		@ read the contents of SDRC_POWER
+	mov	r9, r12			@ keep a copy of SDRC_POWER bits
+	orr 	r12, r12, #0x40		@ enable self refresh on idle req
+	bic 	r12, r12, #0x4		@ clear PWDENA
+	str 	r12, [r11]		@ write back to SDRC_POWER register
+	ldr	r12, [r11]		@ posted-write barrier for SDRC
+	ldr	r11, omap3_cm_iclken1_core	@ read the CM_ICLKEN1_CORE reg
+	ldr	r12, [r11]
+	bic	r12, r12, #0x2		@ disable iclk bit for SDRC
+	str 	r12, [r11]
 wait_sdrc_idle:
-	ldr 	r4, omap3_cm_idlest1_core
-	ldr 	r5, [r4]
-	and 	r5, r5, #0x2		@ check for SDRC idle
-	cmp 	r5, #2
+	ldr 	r11, omap3_cm_idlest1_core
+	ldr 	r12, [r11]
+	and 	r12, r12, #0x2		@ check for SDRC idle
+	cmp 	r12, #2
 	bne 	wait_sdrc_idle
 	bx 	lr
 configure_core_dpll:
-	ldr 	r4, omap3_cm_clksel1_pll
-	ldr	r5, [r4]
-	ldr	r6, core_m2_mask_val	@ modify m2 for core dpll
-	and	r5, r5, r6
-	orr	r5, r5, r3, lsl #0x1B	@ r3 contains the M2 val
-	str	r5, [r4]
-	ldr	r5, [r4]		@ posted-write barrier for CM
-	mov 	r5, #0x800		@ wait for the clock to stabilise
+	ldr 	r11, omap3_cm_clksel1_pll
+	ldr	r12, [r11]
+	ldr	r10, core_m2_mask_val	@ modify m2 for core dpll
+	and	r12, r12, r10
+	orr	r12, r12, r3, lsl #0x1B	@ r3 contains the M2 val
+	str	r12, [r11]
+	ldr	r12, [r11]		@ posted-write barrier for CM
+	mov 	r12, #0x800		@ wait for the clock to stabilise
 	cmp	r3, #2
 	bne	wait_clk_stable
 	bx	lr
 wait_clk_stable:
-	subs 	r5, r5, #1
+	subs 	r12, r12, #1
 	bne	wait_clk_stable
 	nop
 	nop
@@ -120,42 +120,42 @@ wait_clk_stable:
 	nop
 	bx	lr
 enable_sdrc:
-	ldr 	r4, omap3_cm_iclken1_core
-	ldr	r5, [r4]
-	orr 	r5, r5, #0x2		@ enable iclk bit for SDRC
-	str 	r5, [r4]
+	ldr 	r11, omap3_cm_iclken1_core
+	ldr	r12, [r11]
+	orr 	r12, r12, #0x2		@ enable iclk bit for SDRC
+	str 	r12, [r11]
 wait_sdrc_idle1:
-	ldr 	r4, omap3_cm_idlest1_core
-	ldr	r5, [r4]
-	and 	r5, r5, #0x2
-	cmp	r5, #0
+	ldr 	r11, omap3_cm_idlest1_core
+	ldr	r12, [r11]
+	and 	r12, r12, #0x2
+	cmp	r12, #0
 	bne	wait_sdrc_idle1
 restore_sdrc_power_val:
-	ldr	r4, omap3_sdrc_power
-	str	r9, [r4]		@ restore SDRC_POWER, no barrier needed
+	ldr	r11, omap3_sdrc_power
+	str	r9, [r11]		@ restore SDRC_POWER, no barrier needed
 	bx	lr
 wait_dll_lock:
-	ldr	r4, omap3_sdrc_dlla_status
-	ldr	r5, [r4]
-	and 	r5, r5, #0x4
-	cmp	r5, #0x4
+	ldr	r11, omap3_sdrc_dlla_status
+	ldr	r12, [r11]
+	and 	r12, r12, #0x4
+	cmp	r12, #0x4
 	bne	wait_dll_lock
 	bx	lr
 wait_dll_unlock:
-	ldr	r4, omap3_sdrc_dlla_status
-	ldr	r5, [r4]
-	and	r5, r5, #0x4
-	cmp	r5, #0x0
+	ldr	r11, omap3_sdrc_dlla_status
+	ldr	r12, [r11]
+	and	r12, r12, #0x4
+	cmp	r12, #0x0
 	bne	wait_dll_unlock
 	bx	lr
 configure_sdrc:
-	ldr	r4, omap3_sdrc_rfr_ctrl
-	str	r0, [r4]
-	ldr	r4, omap3_sdrc_actim_ctrla
-	str	r1, [r4]
-	ldr	r4, omap3_sdrc_actim_ctrlb
-	str	r2, [r4]
-	ldr	r2, [r4]		@ posted-write barrier for SDRC
+	ldr	r11, omap3_sdrc_rfr_ctrl
+	str	r0, [r11]
+	ldr	r11, omap3_sdrc_actim_ctrla
+	str	r1, [r11]
+	ldr	r11, omap3_sdrc_actim_ctrlb
+	str	r2, [r11]
+	ldr	r2, [r11]		@ posted-write barrier for SDRC
 	bx	lr
 
 omap3_sdrc_power:
-- 
cgit v0.10.2


From 4519c2bf433b97d091635eb51e4ba8ffa1c84d62 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:26:32 -0600
Subject: OMAP3 clock: only unlock SDRC DLL if SDRC clk < 83MHz

According to the 34xx TRM Rev. K section 11.2.4.4.11.1 "Purpose of the
DLL/CDL Module," the SDRC delay-locked-loop can be locked at any SDRC
clock frequency from 83MHz to 166MHz.  CDP code unconditionally
unlocked the DLL whenever shifting to a lower SDRC speed, but this
seems unnecessary and error-prone, as the DLL is no longer able to
compensate for process, voltage, and temperature variations.  Instead,
only unlock the DLL when the SDRC clock rate would be less than 83MHz.

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index 0a14dca..811360a 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -281,6 +281,8 @@ static struct omap_clk omap34xx_clks[] = {
 
 #define MAX_DPLL_WAIT_TRIES		1000000
 
+#define MIN_SDRC_DLL_LOCK_FREQ		83000000
+
 /**
  * omap3_dpll_recalc - recalculate DPLL rate
  * @clk: DPLL struct clk
@@ -703,6 +705,7 @@ static int omap3_dpll4_set_rate(struct clk *clk, unsigned long rate)
 static int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 {
 	u32 new_div = 0;
+	u32 unlock_dll = 0;
 	unsigned long validrate, sdrcrate;
 	struct omap_sdrc_params *sp;
 
@@ -729,6 +732,11 @@ static int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 	if (!sp)
 		return -EINVAL;
 
+	if (sdrcrate < MIN_SDRC_DLL_LOCK_FREQ) {
+		pr_debug("clock: will unlock SDRC DLL\n");
+		unlock_dll = 1;
+	}
+
 	pr_info("clock: changing CORE DPLL rate from %lu to %lu\n", clk->rate,
 		validrate);
 	pr_info("clock: SDRC timing params used: %08x %08x %08x\n",
@@ -739,7 +747,7 @@ static int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 
 	/* REVISIT: Add SDRC_MR changing to this code also */
 	omap3_configure_core_dpll(sp->rfr_ctrl, sp->actim_ctrla,
-				  sp->actim_ctrlb, new_div);
+				  sp->actim_ctrlb, new_div, unlock_dll);
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/sram34xx.S b/arch/arm/mach-omap2/sram34xx.S
index 35131e5..c080c825 100644
--- a/arch/arm/mach-omap2/sram34xx.S
+++ b/arch/arm/mach-omap2/sram34xx.S
@@ -40,22 +40,23 @@
 /*
  * Change frequency of core dpll
  * r0 = sdrc_rfr_ctrl r1 = sdrc_actim_ctrla r2 = sdrc_actim_ctrlb r3 = M2
+ * r4 = Unlock SDRC DLL? (1 = yes, 0 = no) -- only unlock DLL for
+ *      SDRC rates < 83MHz
  */
 ENTRY(omap3_sram_configure_core_dpll)
 	stmfd	sp!, {r1-r12, lr}	@ store regs to stack
+	ldr	r4, [sp, #52]		@ pull extra args off the stack
 	dsb				@ flush buffered writes to interconnect
 	cmp	r3, #0x2
 	blne	configure_sdrc
-	cmp	r3, #0x2
+	cmp	r4, #0x1
+	bleq	unlock_dll
 	blne	lock_dll
-	cmp	r3, #0x1
-	blne	unlock_dll
 	bl	sdram_in_selfrefresh	@ put the SDRAM in self refresh
 	bl 	configure_core_dpll
 	bl	enable_sdrc
-	cmp	r3, #0x1
-	blne	wait_dll_unlock
-	cmp	r3, #0x2
+	cmp	r4, #0x1
+	bleq	wait_dll_unlock
 	blne	wait_dll_lock
 	cmp	r3, #0x1
 	blne	configure_sdrc
diff --git a/arch/arm/plat-omap/include/mach/sram.h b/arch/arm/plat-omap/include/mach/sram.h
index ab35d62..dca7c16 100644
--- a/arch/arm/plat-omap/include/mach/sram.h
+++ b/arch/arm/plat-omap/include/mach/sram.h
@@ -23,7 +23,8 @@ extern u32 omap2_set_prcm(u32 dpll_ctrl_val, u32 sdrc_rfr_val, int bypass);
 
 extern u32 omap3_configure_core_dpll(u32 sdrc_rfr_ctrl,
 				     u32 sdrc_actim_ctrla,
-				     u32 sdrc_actim_ctrlb, u32 m2);
+				     u32 sdrc_actim_ctrlb, u32 m2,
+				     u32 unlock_dll);
 
 /* Do not use these */
 extern void omap1_sram_reprogram_clock(u32 ckctl, u32 dpllctl);
@@ -60,7 +61,8 @@ extern unsigned long omap243x_sram_reprogram_sdrc_sz;
 
 extern u32 omap3_sram_configure_core_dpll(u32 sdrc_rfr_ctrl,
 					  u32 sdrc_actim_ctrla,
-					  u32 sdrc_actim_ctrlb, u32 m2);
+					  u32 sdrc_actim_ctrlb, u32 m2,
+					  u32 unlock_dll);
 extern unsigned long omap3_sram_configure_core_dpll_sz;
 
 #endif
diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index 3835338..876f5a7 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -365,16 +365,17 @@ static inline int omap243x_sram_init(void)
 static u32 (*_omap3_sram_configure_core_dpll)(u32 sdrc_rfr_ctrl,
 					      u32 sdrc_actim_ctrla,
 					      u32 sdrc_actim_ctrlb,
-					      u32 m2);
+					      u32 m2, u32 unlock_dll);
 u32 omap3_configure_core_dpll(u32 sdrc_rfr_ctrl, u32 sdrc_actim_ctrla,
-			      u32 sdrc_actim_ctrlb, u32 m2)
+			      u32 sdrc_actim_ctrlb, u32 m2, u32 unlock_dll)
 {
 	if (!_omap3_sram_configure_core_dpll)
 		omap_sram_error();
 
 	return _omap3_sram_configure_core_dpll(sdrc_rfr_ctrl,
 					       sdrc_actim_ctrla,
-					       sdrc_actim_ctrlb, m2);
+					       sdrc_actim_ctrlb, m2,
+					       unlock_dll);
 }
 
 /* REVISIT: Should this be same as omap34xx_sram_init() after off-idle? */
-- 
cgit v0.10.2


From b7aee4bfa7cad909220491214037731c1edb510a Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:27:10 -0600
Subject: OMAP3 clock: use pr_debug() rather than pr_info() in some clock
 change code

The CORE DPLL M2 frequency change code should use pr_debug(), not
pr_info(), for its debug messages.  Same with
omap2_clksel_round_rate_div().  While here, convert a few printk(KERN_ERR ..
into pr_err().

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 4247a15..8935a8b 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -547,8 +547,8 @@ u32 omap2_clksel_round_rate_div(struct clk *clk, unsigned long target_rate,
 	const struct clksel_rate *clkr;
 	u32 last_div = 0;
 
-	printk(KERN_INFO "clock: clksel_round_rate_div: %s target_rate %ld\n",
-	       clk->name, target_rate);
+	pr_debug("clock: clksel_round_rate_div: %s target_rate %ld\n",
+		 clk->name, target_rate);
 
 	*new_div = 1;
 
@@ -562,7 +562,7 @@ u32 omap2_clksel_round_rate_div(struct clk *clk, unsigned long target_rate,
 
 		/* Sanity check */
 		if (clkr->div <= last_div)
-			printk(KERN_ERR "clock: clksel_rate table not sorted "
+			pr_err("clock: clksel_rate table not sorted "
 			       "for clock %s", clk->name);
 
 		last_div = clkr->div;
@@ -574,7 +574,7 @@ u32 omap2_clksel_round_rate_div(struct clk *clk, unsigned long target_rate,
 	}
 
 	if (!clkr->div) {
-		printk(KERN_ERR "clock: Could not find divisor for target "
+		pr_err("clock: Could not find divisor for target "
 		       "rate %ld for clock %s parent %s\n", target_rate,
 		       clk->name, clk->parent->name);
 		return ~0;
@@ -582,8 +582,8 @@ u32 omap2_clksel_round_rate_div(struct clk *clk, unsigned long target_rate,
 
 	*new_div = clkr->div;
 
-	printk(KERN_INFO "clock: new_div = %d, new_rate = %ld\n", *new_div,
-	       (clk->parent->rate / clkr->div));
+	pr_debug("clock: new_div = %d, new_rate = %ld\n", *new_div,
+		 (clk->parent->rate / clkr->div));
 
 	return (clk->parent->rate / clkr->div);
 }
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index 811360a..2ee58fa 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -737,10 +737,10 @@ static int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 		unlock_dll = 1;
 	}
 
-	pr_info("clock: changing CORE DPLL rate from %lu to %lu\n", clk->rate,
-		validrate);
-	pr_info("clock: SDRC timing params used: %08x %08x %08x\n",
-		sp->rfr_ctrl, sp->actim_ctrla, sp->actim_ctrlb);
+	pr_debug("clock: changing CORE DPLL rate from %lu to %lu\n", clk->rate,
+		 validrate);
+	pr_debug("clock: SDRC timing params used: %08x %08x %08x\n",
+		 sp->rfr_ctrl, sp->actim_ctrla, sp->actim_ctrlb);
 
 	/* REVISIT: SRAM code doesn't support other M2 divisors yet */
 	WARN_ON(new_div != 1 && new_div != 2);
-- 
cgit v0.10.2


From 0db4e8259792202060b962ad1a8715f030268c8b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 12 May 2009 17:34:40 -0600
Subject: OMAP3 clock: lessen amount of noisy messages

On our system we see the following messages:

Disabling unused clock "gpt2_ick"
Disabling unused clock "gpt3_ick"
Disabling unused clock "gpt4_ick"
Disabling unused clock "gpt5_ick"
...

The messages have KERN_INFO level and if you have serial
console, they normally go there. I do not think it is good
idea to print that much stuff there. Moreover, messages
are not properly prefixed and for mortals it is not
immeadietly clear where they come from.

Let's give them debugging level instead.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
[paul@pwsan.com: trimmed debugging output in patch description]

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 8935a8b..076f0a7 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -1035,7 +1035,7 @@ void omap2_clk_disable_unused(struct clk *clk)
 	if ((regval32 & (1 << clk->enable_bit)) == v)
 		return;
 
-	printk(KERN_INFO "Disabling unused clock \"%s\"\n", clk->name);
+	printk(KERN_DEBUG "Disabling unused clock \"%s\"\n", clk->name);
 	if (cpu_is_omap34xx()) {
 		omap2_clk_enable(clk);
 		omap2_clk_disable(clk);
-- 
cgit v0.10.2


From 7971687094ef48695aa56a0c03416b609bd4d1fd Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 12 May 2009 17:50:30 -0600
Subject: OMAP2xxx clock: rename clk_init_one() to clk_preinit()

Rename clk_init_one() to clk_preinit() to distinguish its function
from clk_init() and the individual struct clk init functions.

Signed-off-by: Paul Walmsley <paul@pwsan.com>

diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index 336e51d..436eed2 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -776,7 +776,7 @@ int __init omap1_clk_init(void)
 	arm_idlect1_mask = ~0;
 
 	for (c = omap_clks; c < omap_clks + ARRAY_SIZE(omap_clks); c++)
-		clk_init_one(c->lk.clk);
+		clk_preinit(c->lk.clk);
 
 	cpu_mask = 0;
 	if (cpu_is_omap16xx())
diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c
index efc59c4..cc94672 100644
--- a/arch/arm/mach-omap2/clock24xx.c
+++ b/arch/arm/mach-omap2/clock24xx.c
@@ -722,7 +722,7 @@ int __init omap2_clk_init(void)
 	clk_init(&omap2_clk_functions);
 
 	for (c = omap24xx_clks; c < omap24xx_clks + ARRAY_SIZE(omap24xx_clks); c++)
-		clk_init_one(c->lk.clk);
+		clk_preinit(c->lk.clk);
 
 	osc_ck.rate = omap2_osc_clk_recalc(&osc_ck);
 	propagate_rate(&osc_ck);
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index 2ee58fa..62092f2 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -964,7 +964,7 @@ int __init omap2_clk_init(void)
 	clk_init(&omap2_clk_functions);
 
 	for (c = omap34xx_clks; c < omap34xx_clks + ARRAY_SIZE(omap34xx_clks); c++)
-		clk_init_one(c->lk.clk);
+		clk_preinit(c->lk.clk);
 
 	for (c = omap34xx_clks; c < omap34xx_clks + ARRAY_SIZE(omap34xx_clks); c++)
 		if (c->cpu & cpu_clkflg) {
diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c
index 29efc27..508c96a 100644
--- a/arch/arm/plat-omap/clock.c
+++ b/arch/arm/plat-omap/clock.c
@@ -240,13 +240,13 @@ void recalculate_root_clocks(void)
 }
 
 /**
- * clk_init_one - initialize any fields in the struct clk before clk init
+ * clk_preinit - initialize any fields in the struct clk before clk init
  * @clk: struct clk * to initialize
  *
  * Initialize any struct clk fields needed before normal clk initialization
  * can run.  No return value.
  */
-void clk_init_one(struct clk *clk)
+void clk_preinit(struct clk *clk)
 {
 	INIT_LIST_HEAD(&clk->children);
 }
diff --git a/arch/arm/plat-omap/include/mach/clock.h b/arch/arm/plat-omap/include/mach/clock.h
index 073a2c5..d7bd19c 100644
--- a/arch/arm/plat-omap/include/mach/clock.h
+++ b/arch/arm/plat-omap/include/mach/clock.h
@@ -119,7 +119,7 @@ struct clk_functions {
 extern unsigned int mpurate;
 
 extern int clk_init(struct clk_functions *custom_clocks);
-extern void clk_init_one(struct clk *clk);
+extern void clk_preinit(struct clk *clk);
 extern int clk_register(struct clk *clk);
 extern void clk_reparent(struct clk *child, struct clk *parent);
 extern void clk_unregister(struct clk *clk);
-- 
cgit v0.10.2


From 5bb9efe33ea4001a17ab98186a40a134a3061d67 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 08:12:51 +0200
Subject: perf_counter: fix print debug irq disable

inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
bash/15802 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (sysrq_key_table_lock){?.....},

Don't unconditionally enable interrupts in the perf_counter_print_debug()
path.

[ Impact: fix potential deadlock pointed out by lockdep ]

LKML-Reference: <new-submission>
Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index da27419..f7772ff 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -621,12 +621,13 @@ void perf_counter_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
 	struct cpu_hw_counters *cpuc;
+	unsigned long flags;
 	int cpu, idx;
 
 	if (!x86_pmu.num_counters)
 		return;
 
-	local_irq_disable();
+	local_irq_save(flags);
 
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
@@ -664,7 +665,7 @@ void perf_counter_print_debug(void)
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
 	}
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 static void x86_pmu_disable(struct perf_counter *counter)
-- 
cgit v0.10.2


From 4f23122858a27ba97444b9b37a066d83edebd4c8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 13 May 2009 08:35:35 +0200
Subject: splice: fix repeated kmap()'s in default_file_splice_read()

We cannot reliably map more than one page at the time, or we risk
deadlocking. Just allocate the pages from low mem instead.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/splice.c b/fs/splice.c
index eefd96b..c5e3c79 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -580,13 +580,13 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
 		struct page *page;
 
-		page = alloc_page(GFP_HIGHUSER);
+		page = alloc_page(GFP_USER);
 		error = -ENOMEM;
 		if (!page)
 			goto err;
 
 		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
-		vec[i].iov_base = (void __user *) kmap(page);
+		vec[i].iov_base = (void __user *) page_address(page);
 		vec[i].iov_len = this_len;
 		pages[i] = page;
 		spd.nr_pages++;
@@ -604,7 +604,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 
 	nr_freed = 0;
 	for (i = 0; i < spd.nr_pages; i++) {
-		kunmap(pages[i]);
 		this_len = min_t(size_t, vec[i].iov_len, res);
 		partial[i].offset = 0;
 		partial[i].len = this_len;
@@ -624,10 +623,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	return res;
 
 err:
-	for (i = 0; i < spd.nr_pages; i++) {
-		kunmap(pages[i]);
+	for (i = 0; i < spd.nr_pages; i++)
 		__free_page(pages[i]);
-	}
+
 	return error;
 }
 EXPORT_SYMBOL(default_file_splice_read);
-- 
cgit v0.10.2


From c8a4fb472c5101ec52f94b1e1277b8fde4b823cf Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 12 May 2009 21:41:03 +0200
Subject: FB: fix unsafe use of disable_irq() in mx3fb.c

mx3fb.c calls disable_irq() from a DMA callback, i.e., in an IRQ-handler
context, which has always been unsafe, and became deadly after the merge of
threaded interrupt handler support. Use disable_irq_nosync() instead.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index 9894de1..7a168ba 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -706,7 +706,7 @@ static void mx3fb_dma_done(void *arg)
 	dev_dbg(mx3fb->dev, "irq %d callback\n", ichannel->eof_irq);
 
 	/* We only need one interrupt, it will be re-enabled as needed */
-	disable_irq(ichannel->eof_irq);
+	disable_irq_nosync(ichannel->eof_irq);
 
 	complete(&mx3_fbi->flip_cmpl);
 }
-- 
cgit v0.10.2


From af777ce42d3d51cdef353ce296d6f99dc503feef Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 16:59:40 +0900
Subject: sh: clkfwk: module_clk -> peripheral_clk rename.

For consistenct naming, and to allow us to fix up some confusion in the
SH-Mobile clock framework, amongst other places.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 61ff227..0eedf93 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -50,8 +50,8 @@ static struct clk master_clk = {
 	.rate		= CONFIG_SH_PCLK_FREQ,
 };
 
-static struct clk module_clk = {
-	.name		= "module_clk",
+static struct clk peripheral_clk = {
+	.name		= "peripheral_clk",
 	.parent		= &master_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 };
@@ -73,7 +73,7 @@ static struct clk cpu_clk = {
  */
 static struct clk *onchip_clocks[] = {
 	&master_clk,
-	&module_clk,
+	&peripheral_clk,
 	&bus_clk,
 	&cpu_clk,
 };
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index 94ac27f..1379873 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -115,7 +115,7 @@ static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 0, /* disabled due to code generation issues */
 };
@@ -147,7 +147,7 @@ static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 0, /* disabled due to code generation issues */
 };
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index a452d96..869c2da 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -118,7 +118,7 @@ static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -149,7 +149,7 @@ static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -180,7 +180,7 @@ static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index 772358b..d8febe1 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -255,7 +255,7 @@ static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -286,7 +286,7 @@ static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -317,7 +317,7 @@ static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index d749341..62e3039 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -211,7 +211,7 @@ static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 0, /* disabled due to code generation issues */
 };
@@ -243,7 +243,7 @@ static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 0, /* disabled due to code generation issues */
 };
@@ -275,7 +275,7 @@ static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -306,7 +306,7 @@ static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index 2fc6bff..3e6f3d7 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -171,7 +171,7 @@ static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 0, /* disabled due to code generation issues */
 };
@@ -203,7 +203,7 @@ static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 0, /* disabled due to code generation issues */
 };
@@ -235,7 +235,7 @@ static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -266,7 +266,7 @@ static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -297,7 +297,7 @@ static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index 3951366..88f742f 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -121,7 +121,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -152,7 +152,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0xe,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -183,7 +183,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1a,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index 9412d91..c563067 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -149,7 +149,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -180,7 +180,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0xe,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -211,7 +211,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1a,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index 07ff38d..efa76c8 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -125,7 +125,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -156,7 +156,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0xe,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -187,7 +187,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1a,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index d8b46f5..5b21077 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -128,7 +128,7 @@ static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x10,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 125,
 	.clocksource_rating = 125,
 };
@@ -160,7 +160,7 @@ static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x20,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource cmt1_resources[] = {
@@ -190,7 +190,7 @@ static struct sh_timer_config cmt2_platform_data = {
 	.name = "CMT2",
 	.channel_offset = 0x30,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource cmt2_resources[] = {
@@ -220,7 +220,7 @@ static struct sh_timer_config cmt3_platform_data = {
 	.name = "CMT3",
 	.channel_offset = 0x40,
 	.timer_bit = 3,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource cmt3_resources[] = {
@@ -250,7 +250,7 @@ static struct sh_timer_config cmt4_platform_data = {
 	.name = "CMT4",
 	.channel_offset = 0x50,
 	.timer_bit = 4,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource cmt4_resources[] = {
@@ -280,7 +280,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -311,7 +311,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0xe,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -342,7 +342,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1a,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index be79fa1..6d088d1 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -38,7 +38,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -69,7 +69,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -100,7 +100,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 09da0c1..851672d 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -65,7 +65,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -96,7 +96,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -127,7 +127,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -162,7 +162,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -192,7 +192,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index cd09733..5b82251 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -164,7 +164,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -195,7 +195,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -226,7 +226,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index c91f34c..f1e0c0d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -118,7 +118,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -149,7 +149,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -180,7 +180,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -210,7 +210,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -240,7 +240,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
@@ -270,7 +270,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu5_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index b61d614..12fb875 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -81,7 +81,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -112,7 +112,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -143,7 +143,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -173,7 +173,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -203,7 +203,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
@@ -233,7 +233,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu5_resources[] = {
@@ -263,7 +263,7 @@ static struct sh_timer_config tmu6_platform_data = {
 	.name = "TMU6",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu6_resources[] = {
@@ -293,7 +293,7 @@ static struct sh_timer_config tmu7_platform_data = {
 	.name = "TMU7",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu7_resources[] = {
@@ -323,7 +323,7 @@ static struct sh_timer_config tmu8_platform_data = {
 	.name = "TMU8",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu8_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index f1df020..715e05b 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -18,7 +18,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -49,7 +49,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -80,7 +80,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -110,7 +110,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -140,7 +140,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
@@ -170,7 +170,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu5_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index dc5d3e5..d7e77bc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -20,7 +20,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -51,7 +51,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -82,7 +82,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -112,7 +112,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -142,7 +142,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
@@ -172,7 +172,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu5_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 2c464bf..93e0d2c 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -75,7 +75,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -106,7 +106,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -137,7 +137,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -167,7 +167,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -197,7 +197,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
@@ -227,7 +227,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu5_resources[] = {
@@ -257,7 +257,7 @@ static struct sh_timer_config tmu6_platform_data = {
 	.name = "TMU6",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu6_resources[] = {
@@ -287,7 +287,7 @@ static struct sh_timer_config tmu7_platform_data = {
 	.name = "TMU7",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu7_resources[] = {
@@ -317,7 +317,7 @@ static struct sh_timer_config tmu8_platform_data = {
 	.name = "TMU8",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu8_resources[] = {
@@ -347,7 +347,7 @@ static struct sh_timer_config tmu9_platform_data = {
 	.name = "TMU9",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu9_resources[] = {
@@ -377,7 +377,7 @@ static struct sh_timer_config tmu10_platform_data = {
 	.name = "TMU10",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu10_resources[] = {
@@ -407,7 +407,7 @@ static struct sh_timer_config tmu11_platform_data = {
 	.name = "TMU11",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu11_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 9d5185b..53c65fd 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -53,7 +53,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -84,7 +84,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -115,7 +115,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
@@ -145,7 +145,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu3_resources[] = {
@@ -175,7 +175,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu4_resources[] = {
@@ -205,7 +205,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu5_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index 678d69b..f5ff1ac 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -75,7 +75,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clockevent_rating = 200,
 };
 
@@ -106,7 +106,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 	.clocksource_rating = 200,
 };
 
@@ -137,7 +137,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "module_clk",
+	.clk = "peripheral_clk",
 };
 
 static struct resource tmu2_resources[] = {
diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c
index baa28b7..b9680f5 100644
--- a/drivers/i2c/busses/i2c-sh7760.c
+++ b/drivers/i2c/busses/i2c-sh7760.c
@@ -396,7 +396,7 @@ static int __devinit calc_CCR(unsigned long scl_hz)
 	signed char cdf, cdfm;
 	int scgd, scgdm, scgds;
 
-	mclk = clk_get(NULL, "module_clk");
+	mclk = clk_get(NULL, "peripheral_clk");
 	if (IS_ERR(mclk)) {
 		return PTR_ERR(mclk);
 	} else {
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 4e3248d..fa4d52a 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1084,7 +1084,7 @@ static void __devinit sci_init_single(struct platform_device *dev,
 	sci_port->port.uartclk	= CONFIG_CPU_CLOCK;
 #elif defined(CONFIG_HAVE_CLK)
 	sci_port->iclk		= p->clk ? clk_get(&dev->dev, p->clk) : NULL;
-	sci_port->dclk		= clk_get(&dev->dev, "module_clk");
+	sci_port->dclk		= clk_get(&dev->dev, "peripheral_clk");
 	sci_port->enable	= sci_clk_enable;
 	sci_port->disable	= sci_clk_disable;
 #else
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index 78cfb66..6fa3140 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -95,18 +95,18 @@ static void dac_audio_stop(void)
 		outw(v, HD64461_GPADR);
 	}
 
- 	sh_dac_output(0, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
+	sh_dac_output(0, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 	sh_dac_disable(CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 }
 
 static void dac_audio_set_rate(void)
 {
 	unsigned long interval;
- 	struct clk *clk;
+	struct clk *clk;
 
- 	clk = clk_get(NULL, "module_clk");
- 	interval = (clk_get_rate(clk) / 4) / rate;
- 	clk_put(clk);
+	clk = clk_get(NULL, "peripheral_clk");
+	interval = (clk_get_rate(clk) / 4) / rate;
+	clk_put(clk);
 	ctrl_outl(interval, TMU1_TCOR);
 	ctrl_outl(interval, TMU1_TCNT);
 }
-- 
cgit v0.10.2


From d672fef02738582bdeae6e77176e141eeb9169bc Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 17:03:09 +0900
Subject: sh: clkfwk: Handle NULL clkops for root clocks.

root clocks may simply be placeholders for rate and ancestry information,
and have no real associated operations of their own. Account for this,
so we are still able to use these sorts of clocks for rate propagation.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 0eedf93..2ced20f 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -103,7 +103,7 @@ void propagate_rate(struct clk *tclk)
 	struct clk *clkp;
 
 	list_for_each_entry(clkp, &tclk->children, sibling) {
-		if (clkp->ops->recalc)
+		if (clkp->ops && clkp->ops->recalc)
 			clkp->rate = clkp->ops->recalc(clkp);
 		propagate_rate(clkp);
 	}
@@ -196,7 +196,7 @@ void recalculate_root_clocks(void)
 	struct clk *clkp;
 
 	list_for_each_entry(clkp, &root_clks, sibling) {
-		if (clkp->ops->recalc)
+		if (clkp->ops && clkp->ops->recalc)
 			clkp->rate = clkp->ops->recalc(clkp);
 		propagate_rate(clkp);
 	}
@@ -224,7 +224,7 @@ int clk_register(struct clk *clk)
 		list_add(&clk->sibling, &root_clks);
 
 	list_add(&clk->node, &clock_list);
-	if (clk->ops->init)
+	if (clk->ops && clk->ops->init)
 		clk->ops->init(clk);
 	mutex_unlock(&clock_list_sem);
 
-- 
cgit v0.10.2


From 100890c55e326a9acb4429593c5ad2012c194564 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 17:05:51 +0900
Subject: sh: clkfwk: Provide a generic clk_set_rate_ex() path for root clocks.

In the case of root clocks (such as clkin oscillators, extal, etc.), the
rate information is entirely platform dependent and needs to be lazily
set and propagated from the platform code. This provides a method for
establishing the rate update on these types of clocks that define no
set_rate() op of their own.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 2ced20f..0a7755c 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -265,20 +265,27 @@ EXPORT_SYMBOL_GPL(clk_set_rate);
 int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
 {
 	int ret = -EOPNOTSUPP;
+	unsigned long flags;
 
-	if (likely(clk->ops && clk->ops->set_rate)) {
-		unsigned long flags;
+	spin_lock_irqsave(&clock_lock, flags);
 
-		spin_lock_irqsave(&clock_lock, flags);
+	if (likely(clk->ops && clk->ops->set_rate)) {
 		ret = clk->ops->set_rate(clk, rate, algo_id);
-		if (ret == 0) {
-			if (clk->ops->recalc)
-				clk->rate = clk->ops->recalc(clk);
-			propagate_rate(clk);
-		}
-		spin_unlock_irqrestore(&clock_lock, flags);
+		if (ret != 0)
+			goto out_unlock;
+	} else {
+		clk->rate = rate;
+		ret = 0;
 	}
 
+	if (clk->ops && clk->ops->recalc)
+		clk->rate = clk->ops->recalc(clk);
+
+	propagate_rate(clk);
+
+out_unlock:
+	spin_unlock_irqrestore(&clock_lock, flags);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(clk_set_rate_ex);
-- 
cgit v0.10.2


From 253b0887b3736160feac9ccdcf146a2073e41463 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 17:38:11 +0900
Subject: sh: clkfwk: Rework legacy CPG clock handling.

This moves out the old legacy CPG clocks to their own file, and converts
over the existing users. With these clocks going away and each CPU
dealing with them on their own, CPUs can gradually move over to the new
interface.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index fb75c2d..d7990cd 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -513,6 +513,9 @@ config SH_PCLK_FREQ
 	  This is necessary for determining the reference clock value on
 	  platforms lacking an RTC.
 
+config SH_CLK_CPG_LEGACY
+	def_bool y
+
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
 	depends on CPU_SH2
diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index e2f5bf1..c27e844 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -47,7 +47,7 @@ struct clk_lookup {
 #define CLK_ENABLE_ON_INIT	(1 << 0)
 
 /* Should be defined by processor-specific code */
-void arch_init_clk_ops(struct clk_ops **, int type);
+void __deprecated arch_init_clk_ops(struct clk_ops **, int type);
 int __init arch_clk_init(void);
 
 /* arch/sh/kernel/cpu/clock.c */
@@ -59,6 +59,9 @@ int clk_reparent(struct clk *child, struct clk *parent);
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
 
+/* arch/sh/kernel/cpu/clock-cpg.c */
+int __init __deprecated cpg_clk_init(void);
+
 /* the exported API, in addition to clk_set_rate */
 /**
  * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter
diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index 64b1c16..73d6d16 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -46,6 +46,8 @@ struct sh_machine_vector {
 
 	void __iomem *(*mv_ioport_map)(unsigned long port, unsigned int size);
 	void (*mv_ioport_unmap)(void __iomem *);
+
+	int (*mv_clk_init)(void);
 };
 
 extern struct sh_machine_vector sh_mv;
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index 2600641..05105b9 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -17,5 +17,6 @@ obj-$(CONFIG_ARCH_SHMOBILE)	+= shmobile/
 
 obj-$(CONFIG_UBC_WAKEUP)	+= ubc.o
 obj-$(CONFIG_SH_ADC)		+= adc.o
+obj-$(CONFIG_SH_CLK_CPG_LEGACY)	+= clock-cpg.o
 
 obj-y	+= irq/ init.o clock.o
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
new file mode 100644
index 0000000..b4ca200
--- /dev/null
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -0,0 +1,60 @@
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <asm/clock.h>
+
+static struct clk master_clk = {
+	.name		= "master_clk",
+	.flags		= CLK_ENABLE_ON_INIT,
+	.rate		= CONFIG_SH_PCLK_FREQ,
+};
+
+static struct clk peripheral_clk = {
+	.name		= "peripheral_clk",
+	.parent		= &master_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static struct clk bus_clk = {
+	.name		= "bus_clk",
+	.parent		= &master_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static struct clk cpu_clk = {
+	.name		= "cpu_clk",
+	.parent		= &master_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+/*
+ * The ordering of these clocks matters, do not change it.
+ */
+static struct clk *onchip_clocks[] = {
+	&master_clk,
+	&peripheral_clk,
+	&bus_clk,
+	&cpu_clk,
+};
+
+int __init __deprecated cpg_clk_init(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(onchip_clocks); i++) {
+		struct clk *clk = onchip_clocks[i];
+		arch_init_clk_ops(&clk->ops, i);
+		if (clk->ops)
+			ret |= clk_register(clk);
+	}
+
+	return ret;
+}
+
+/*
+ * Placeholder for compatability, until the lazy CPUs do this
+ * on their own.
+ */
+int __init __weak arch_clk_init(void)
+{
+	return cpg_clk_init();
+}
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 0a7755c..033f466 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -30,54 +30,12 @@
 #include <linux/platform_device.h>
 #include <linux/proc_fs.h>
 #include <asm/clock.h>
+#include <asm/machvec.h>
 
 static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
 static DEFINE_MUTEX(clock_list_sem);
 
-/*
- * Each subtype is expected to define the init routines for these clocks,
- * as each subtype (or processor family) will have these clocks at the
- * very least. These are all provided through the CPG, which even some of
- * the more quirky parts (such as ST40, SH4-202, etc.) still have.
- *
- * The processor-specific code is expected to register any additional
- * clock sources that are of interest.
- */
-static struct clk master_clk = {
-	.name		= "master_clk",
-	.flags		= CLK_ENABLE_ON_INIT,
-	.rate		= CONFIG_SH_PCLK_FREQ,
-};
-
-static struct clk peripheral_clk = {
-	.name		= "peripheral_clk",
-	.parent		= &master_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-};
-
-static struct clk bus_clk = {
-	.name		= "bus_clk",
-	.parent		= &master_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-};
-
-static struct clk cpu_clk = {
-	.name		= "cpu_clk",
-	.parent		= &master_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-};
-
-/*
- * The ordering of these clocks matters, do not change it.
- */
-static struct clk *onchip_clocks[] = {
-	&master_clk,
-	&peripheral_clk,
-	&bus_clk,
-	&cpu_clk,
-};
-
 /* Used for clocks that always have same value as the parent clock */
 unsigned long followparent_recalc(struct clk *clk)
 {
@@ -443,10 +401,6 @@ void clk_put(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_put);
 
-int __init __weak arch_clk_init(void)
-{
-	return 0;
-}
 
 static int show_clocks(char *buf, char **start, off_t off,
 		       int len, int *eof, void *data)
@@ -533,18 +487,22 @@ subsys_initcall(clk_sysdev_init);
 
 int __init clk_init(void)
 {
-	int i, ret = 0;
-
-	BUG_ON(!master_clk.rate);
-
-	for (i = 0; i < ARRAY_SIZE(onchip_clocks); i++) {
-		struct clk *clk = onchip_clocks[i];
+	int ret;
 
-		arch_init_clk_ops(&clk->ops, i);
-		ret |= clk_register(clk);
+	ret = arch_clk_init();
+	if (unlikely(ret)) {
+		pr_err("%s: CPU clock registration failed.\n", __func__);
+		return ret;
 	}
 
-	ret |= arch_clk_init();
+	if (sh_mv.mv_clk_init) {
+		ret = sh_mv.mv_clk_init();
+		if (unlikely(ret)) {
+			pr_err("%s: machvec clock initialization failed.\n",
+			       __func__);
+			return ret;
+		}
+	}
 
 	/* Kick the child clocks.. */
 	recalculate_root_clocks();
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index a72dc32..21421e3 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -152,9 +152,12 @@ static struct clk *sh4202_onchip_clocks[] = {
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
+	struct clk *clk;
 	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh4202_onchip_clocks); i++) {
 		struct clk *clkp = sh4202_onchip_clocks[i];
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 426065b..a98d7da 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -892,6 +892,8 @@ int __init arch_clk_init(void)
 	struct clk *clk;
 	int i;
 
+	clk_cpg_init();
+
 	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7722_clocks); i++) {
 		pr_debug( "Registering clock '%s'\n", sh7722_clocks[i]->name);
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
index ce3d4e6..370cd47 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c
@@ -92,9 +92,12 @@ static struct clk *sh7763_onchip_clocks[] = {
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
+	struct clk *clk;
 	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7763_onchip_clocks); i++) {
 		struct clk *clkp = sh7763_onchip_clocks[i];
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
index 38b8b1d..a249d82 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c
@@ -98,9 +98,12 @@ static struct clk *sh7780_onchip_clocks[] = {
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
+	struct clk *clk;
 	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7780_onchip_clocks); i++) {
 		struct clk *clkp = sh7780_onchip_clocks[i];
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index fa14f35..bf5a0da 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -136,9 +136,12 @@ static struct clk *sh7785_onchip_clocks[] = {
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
+	struct clk *clk;
 	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7785_onchip_clocks); i++) {
 		struct clk *clkp = sh7785_onchip_clocks[i];
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index 7907002..a0e8869 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -122,9 +122,12 @@ static struct clk *sh7786_onchip_clocks[] = {
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
+	struct clk *clk;
 	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7786_onchip_clocks); i++) {
 		struct clk *clkp = sh7786_onchip_clocks[i];
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
index c14553e..23c27d3 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c
@@ -109,9 +109,12 @@ static struct clk *shx3_onchip_clocks[] = {
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk = clk_get(NULL, "master_clk");
+	struct clk *clk;
 	int i, ret = 0;
 
+	cpg_clk_init();
+
+	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(shx3_onchip_clocks); i++) {
 		struct clk *clkp = shx3_onchip_clocks[i];
 
-- 
cgit v0.10.2


From a77b5ac0ea8e47c77008d3a9a9976dcfbc01c42a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 17:55:00 +0900
Subject: sh: clkfwk: Update SH7785 for refactored clock framework.

This updates the SH7785 CPU code as well as the SH7785LCR board support
code for making use of the newly refactored clock framework. Support for
the legacy CPG clocks is dropped at this point, with the extal frequency
fed in from the board code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d7990cd..df764c5 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -514,7 +514,7 @@ config SH_PCLK_FREQ
 	  platforms lacking an RTC.
 
 config SH_CLK_CPG_LEGACY
-	def_bool y
+	def_bool y if !CPU_SUBTYPE_SH7785
 
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index 6f94f17..33b194b 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -2,12 +2,12 @@
  * Renesas Technology Corp. R0P7785LC0011RL Support.
  *
  * Copyright (C) 2008  Yoshihiro Shimoda
+ * Copyright (C) 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/sm501.h>
@@ -19,8 +19,11 @@
 #include <linux/i2c-pca-platform.h>
 #include <linux/i2c-algo-pca.h>
 #include <linux/irq.h>
-#include <asm/heartbeat.h>
+#include <linux/clk.h>
+#include <linux/errno.h>
 #include <mach/sh7785lcr.h>
+#include <asm/heartbeat.h>
+#include <asm/clock.h>
 
 /*
  * NOTE: This board has 2 physical memory maps.
@@ -273,6 +276,20 @@ void __init init_sh7785lcr_IRQ(void)
 	plat_irq_setup_pins(IRQ_MODE_IRQ3210);
 }
 
+static int sh7785lcr_clk_init(void)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = clk_get(NULL, "extal");
+	if (!clk || IS_ERR(clk))
+		return PTR_ERR(clk);
+	ret = clk_set_rate(clk, 33333333);
+	clk_put(clk);
+
+	return ret;
+}
+
 static void sh7785lcr_power_off(void)
 {
 	unsigned char *p;
@@ -309,6 +326,7 @@ static void __init sh7785lcr_setup(char **cmdline_p)
 static struct sh_machine_vector mv_sh7785lcr __initmv = {
 	.mv_name		= "SH7785LCR",
 	.mv_setup		= sh7785lcr_setup,
+	.mv_clk_init		= sh7785lcr_clk_init,
 	.mv_init_irq		= init_sh7785lcr_IRQ,
 };
 
diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index c27e844..40cf3c0 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -34,7 +34,9 @@ struct clk {
 
 	unsigned long		rate;
 	unsigned long		flags;
+
 	unsigned long		arch_flags;
+	void			*priv;
 };
 
 struct clk_lookup {
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index bf5a0da..87584dc 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -3,7 +3,7 @@
  *
  * SH7785 support for the clock framework
  *
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2007 - 2009  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -11,145 +11,146 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
-#include <asm/io.h>
-
-static int ifc_divisors[] = { 1, 2, 4, 6 };
-static int ufc_divisors[] = { 1, 1, 4, 6 };
-static int sfc_divisors[] = { 1, 1, 4, 6 };
-static int bfc_divisors[] = { 1, 1, 1, 1, 1, 12, 16, 18,
-			     24, 32, 36, 48, 1, 1, 1, 1 };
-static int mfc_divisors[] = { 1, 1, 4, 6 };
-static int pfc_divisors[] = { 1, 1, 1, 1, 1, 1, 1, 18,
-			      24, 32, 36, 48, 1, 1, 1, 1 };
-
-static void master_clk_init(struct clk *clk)
-{
-	clk->rate *= pfc_divisors[ctrl_inl(FRQMR1) & 0x000f];
-}
 
-static struct clk_ops sh7785_master_clk_ops = {
-	.init		= master_clk_init,
+static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
+			       24, 32, 36, 48 };
+struct clk_priv {
+	unsigned int shift;
 };
 
-static unsigned long module_clk_recalc(struct clk *clk)
-{
-	int idx = (ctrl_inl(FRQMR1) & 0x000f);
-	return clk->parent->rate / pfc_divisors[idx];
-}
+#define FRQMR_CLK_DATA(_name, _shift)	\
+static struct clk_priv _name##_data = {	.shift = _shift, }
 
-static struct clk_ops sh7785_module_clk_ops = {
-	.recalc		= module_clk_recalc,
-};
+FRQMR_CLK_DATA(pfc,  0);
+FRQMR_CLK_DATA(s3fc, 4);
+FRQMR_CLK_DATA(s2fc, 8);
+FRQMR_CLK_DATA(mfc, 12);
+FRQMR_CLK_DATA(bfc, 16);
+FRQMR_CLK_DATA(sfc, 20);
+FRQMR_CLK_DATA(ufc, 24);
+FRQMR_CLK_DATA(ifc, 28);
 
-static unsigned long bus_clk_recalc(struct clk *clk)
+static unsigned long frqmr_clk_recalc(struct clk *clk)
 {
-	int idx = ((ctrl_inl(FRQMR1) >> 16) & 0x000f);
-	return clk->parent->rate / bfc_divisors[idx];
-}
+	struct clk_priv *data = clk->priv;
+	unsigned int idx;
 
-static struct clk_ops sh7785_bus_clk_ops = {
-	.recalc		= bus_clk_recalc,
-};
+	idx = (__raw_readl(FRQMR1) >> data->shift) & 0x000f;
 
-static unsigned long cpu_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 28) & 0x0003);
-	return clk->parent->rate / ifc_divisors[idx];
+	/*
+	 * XXX: PLL1 multiplier is locked for the default clock mode,
+	 * when mode pin detection and configuration support is added,
+	 * select the multiplier dynamically.
+	 */
+	return clk->parent->rate * 36 / div2[idx];
 }
 
-static struct clk_ops sh7785_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+static struct clk_ops frqmr_clk_ops = {
+	.recalc		= frqmr_clk_recalc,
 };
 
-static struct clk_ops *sh7785_clk_ops[] = {
-	&sh7785_master_clk_ops,
-	&sh7785_module_clk_ops,
-	&sh7785_bus_clk_ops,
-	&sh7785_cpu_clk_ops,
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+static struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
 };
 
-void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
-{
-	if (idx < ARRAY_SIZE(sh7785_clk_ops))
-		*ops = sh7785_clk_ops[idx];
-}
-
-static unsigned long shyway_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 20) & 0x0003);
-	return clk->parent->rate / sfc_divisors[idx];
-}
-
-static struct clk_ops sh7785_shyway_clk_ops = {
-	.recalc		= shyway_clk_recalc,
+static struct clk cpu_clk = {
+	.name		= "cpu_clk",		/* Ick */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+	.priv		= &ifc_data,
 };
 
-static struct clk sh7785_shyway_clk = {
-	.name		= "shyway_clk",
+static struct clk shyway_clk = {
+	.name		= "shyway_clk",		/* SHck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
-	.ops		= &sh7785_shyway_clk_ops,
+	.priv		= &sfc_data,
 };
 
-static unsigned long ddr_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 12) & 0x0003);
-	return clk->parent->rate / mfc_divisors[idx];
-}
+static struct clk peripheral_clk = {
+	.name		= "peripheral_clk",	/* Pck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+	.priv		= &pfc_data,
+};
 
-static struct clk_ops sh7785_ddr_clk_ops = {
-	.recalc		= ddr_clk_recalc,
+static struct clk ddr_clk = {
+	.name		= "ddr_clk",		/* DDRck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+	.priv		= &mfc_data,
 };
 
-static struct clk sh7785_ddr_clk = {
-	.name		= "ddr_clk",
+static struct clk bus_clk = {
+	.name		= "bus_clk",		/* Bck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
-	.ops		= &sh7785_ddr_clk_ops,
+	.priv		= &bfc_data,
 };
 
-static unsigned long ram_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 24) & 0x0003);
-	return clk->parent->rate / ufc_divisors[idx];
-}
+static struct clk ga_clk = {
+	.name		= "ga_clk",		/* GAck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
+	.priv		= &s2fc_data,
+};
 
-static struct clk_ops sh7785_ram_clk_ops = {
-	.recalc		= ram_clk_recalc,
+static struct clk du_clk = {
+	.name		= "du_clk",		/* DUck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
+	.priv		= &s3fc_data,
 };
 
-static struct clk sh7785_ram_clk = {
-	.name		= "ram_clk",
+static struct clk umem_clk = {
+	.name		= "umem_clk",		/* uck */
+	.id		= -1,
+	.ops		= &frqmr_clk_ops,
+	.parent		= &extal_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
-	.ops		= &sh7785_ram_clk_ops,
+	.priv		= &ufc_data,
 };
 
-/*
- * Additional SH7785-specific on-chip clocks that aren't already part of the
- * clock framework
- */
-static struct clk *sh7785_onchip_clocks[] = {
-	&sh7785_shyway_clk,
-	&sh7785_ddr_clk,
-	&sh7785_ram_clk,
+static struct clk *clks[] = {
+	&extal_clk,
+	&cpu_clk,
+	&shyway_clk,
+	&peripheral_clk,
+	&ddr_clk,
+	&bus_clk,
+	&ga_clk,
+	&du_clk,
+	&umem_clk,
 };
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk;
 	int i, ret = 0;
 
-	cpg_clk_init();
-
-	clk = clk_get(NULL, "master_clk");
-	for (i = 0; i < ARRAY_SIZE(sh7785_onchip_clocks); i++) {
-		struct clk *clkp = sh7785_onchip_clocks[i];
-
-		clkp->parent = clk;
-		ret |= clk_register(clkp);
-	}
-
-	clk_put(clk);
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		ret |= clk_register(clks[i]);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 22b530e0287724f00dde9a15a94d408c3334e86c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 13 May 2009 11:32:52 +0200
Subject: ALSA: hda - Disable fallback to model=acer for Acer laptops

The model=acer for ALC883/889 doesn't work well for the recent Acer
Aspire laptops.  Since model=auto works better nowadays, it's safer
to use the default fallback instead of the Acer specific one.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index c14020a..51954ba 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8622,8 +8622,10 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = {
 		ALC888_ACER_ASPIRE_4930G),
 	SND_PCI_QUIRK(0x1025, 0x0166, "Acer Aspire 6530G",
 		ALC888_ACER_ASPIRE_4930G),
-	/* default Acer */
-	SND_PCI_QUIRK_VENDOR(0x1025, "Acer laptop", ALC883_ACER),
+	/* default Acer -- disabled as it causes more problems.
+	 *    model=auto should work fine now
+	 */
+	/* SND_PCI_QUIRK_VENDOR(0x1025, "Acer laptop", ALC883_ACER), */
 	SND_PCI_QUIRK(0x1028, 0x020d, "Dell Inspiron 530", ALC888_6ST_DELL),
 	SND_PCI_QUIRK(0x103c, 0x2a3d, "HP Pavillion", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x103c, 0x2a4f, "HP Samba", ALC888_3ST_HP),
-- 
cgit v0.10.2


From 92e0d896ce3087112602449efd87c6d7f4eae8d0 Mon Sep 17 00:00:00 2001
From: Sergey Belyashov <Sergey.Belyashov@gmail.com>
Date: Mon, 4 May 2009 13:01:02 +0400
Subject: HID: autocentering support for Logitech G25 Racing Wheel

Some months ago I send patch which adds autocentering for Logitech MOMO Wheel.
Now I have access to Logitech G25 Racing Wheel and test autocentering for it. I
write patch for current kernel to support autocentering for G25 in legacy mode
(this device supports other modes, but after switching device reconnects with
ID 0xc299 and FF support comes out) and others Logitech (Driving Force,
Formula Force Ex etc) wheels with ID 046d:c294.

Signed-off-by: Sergey Belyashov <Sergey.Belyashov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c
index 51aff08..9735be6 100644
--- a/drivers/hid/hid-lgff.c
+++ b/drivers/hid/hid-lgff.c
@@ -61,7 +61,7 @@ static const struct dev_type devices[] = {
 	{ 0x046d, 0xc219, ff_rumble },
 	{ 0x046d, 0xc283, ff_joystick },
 	{ 0x046d, 0xc286, ff_joystick },
-	{ 0x046d, 0xc294, ff_joystick },
+	{ 0x046d, 0xc294, ff_wheel },
 	{ 0x046d, 0xc295, ff_joystick },
 	{ 0x046d, 0xca03, ff_wheel },
 };
-- 
cgit v0.10.2


From ca2dcd40f54c8928b3994712a6cadd2078a087fa Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Mon, 11 May 2009 17:18:12 +0200
Subject: HID: Wacom Graphire Bluetooth driver

Based on the work by Andrew Zabolotny, an HID driver for the Bluetooth
Wacom tablet. This is required as it uses a slightly different
protocols from what's currently support by the drivers/input/wacom*
driver, and those only support USB.

A user-space patch is required to activate mode 2 of the Wacom tablet,
as hidp does not support hid_output_raw_report.

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 7e67dcb..21edf40 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -271,6 +271,13 @@ config THRUSTMASTER_FF
 	  Say Y here if you have a THRUSTMASTER FireStore Dual Power 2 or
 	  a THRUSTMASTER Ferrari GT Rumble Force or Force Feedback Wheel.
 
+config HID_WACOM
+	tristate "Wacom Bluetooth devices support" if EMBEDDED
+	depends on BT_HIDP
+	default !EMBEDDED
+	---help---
+	Support for Wacom Graphire Bluetooth tablet.
+
 config ZEROPLUS_FF
 	tristate "Zeroplus based game controller support"
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 1f7cb0f..39cebcf 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_GREENASIA_FF)	+= hid-gaff.o
 obj-$(CONFIG_THRUSTMASTER_FF)	+= hid-tmff.o
 obj-$(CONFIG_HID_TOPSEED)	+= hid-topseed.o
 obj-$(CONFIG_ZEROPLUS_FF)	+= hid-zpff.o
+obj-$(CONFIG_HID_WACOM)		+= hid-wacom.o
 
 obj-$(CONFIG_USB_HID)		+= usbhid/
 obj-$(CONFIG_USB_MOUSE)		+= usbhid/
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8551693..9f38ab8 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1312,6 +1312,7 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_WACOM, USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
 
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 4d5ee2b..b519692 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -414,6 +414,7 @@
 #define USB_DEVICE_ID_VERNIER_LCSPEC	0x0006
 
 #define USB_VENDOR_ID_WACOM		0x056a
+#define USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH	0x81
 
 #define USB_VENDOR_ID_WISEGROUP		0x0925
 #define USB_DEVICE_ID_1_PHIDGETSERVO_20	0x8101
diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c
new file mode 100644
index 0000000..1f9237f
--- /dev/null
+++ b/drivers/hid/hid-wacom.c
@@ -0,0 +1,259 @@
+/*
+ *  Bluetooth Wacom Tablet support
+ *
+ *  Copyright (c) 1999 Andreas Gal
+ *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+ *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+ *  Copyright (c) 2006-2007 Jiri Kosina
+ *  Copyright (c) 2007 Paul Walmsley
+ *  Copyright (c) 2008 Jiri Slaby <jirislaby@gmail.com>
+ *  Copyright (c) 2006 Andrew Zabolotny <zap@homelink.ru>
+ *  Copyright (c) 2009 Bastien Nocera <hadess@hadess.net>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+struct wacom_data {
+	__u16 tool;
+	unsigned char butstate;
+};
+
+static int wacom_raw_event(struct hid_device *hdev, struct hid_report *report,
+		u8 *raw_data, int size)
+{
+	struct wacom_data *wdata = hid_get_drvdata(hdev);
+	struct hid_input *hidinput;
+	struct input_dev *input;
+	unsigned char *data = (unsigned char *) raw_data;
+	int tool, x, y, rw;
+
+	if (!(hdev->claimed & HID_CLAIMED_INPUT))
+		return 0;
+
+	tool = 0;
+	hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
+	input = hidinput->input;
+
+	/* Check if this is a tablet report */
+	if (data[0] != 0x03)
+		return 0;
+
+	/* Get X & Y positions */
+	x = le16_to_cpu(*(__le16 *) &data[2]);
+	y = le16_to_cpu(*(__le16 *) &data[4]);
+
+	/* Get current tool identifier */
+	if (data[1] & 0x90) { /* If pen is in the in/active area */
+		switch ((data[1] >> 5) & 3) {
+		case 0:	/* Pen */
+			tool = BTN_TOOL_PEN;
+			break;
+
+		case 1: /* Rubber */
+			tool = BTN_TOOL_RUBBER;
+			break;
+
+		case 2: /* Mouse with wheel */
+		case 3: /* Mouse without wheel */
+			tool = BTN_TOOL_MOUSE;
+			break;
+		}
+
+		/* Reset tool if out of active tablet area */
+		if (!(data[1] & 0x10))
+			tool = 0;
+	}
+
+	/* If tool changed, notify input subsystem */
+	if (wdata->tool != tool) {
+		if (wdata->tool) {
+			/* Completely reset old tool state */
+			if (wdata->tool == BTN_TOOL_MOUSE) {
+				input_report_key(input, BTN_LEFT, 0);
+				input_report_key(input, BTN_RIGHT, 0);
+				input_report_key(input, BTN_MIDDLE, 0);
+				input_report_abs(input, ABS_DISTANCE,
+						input->absmax[ABS_DISTANCE]);
+			} else {
+				input_report_key(input, BTN_TOUCH, 0);
+				input_report_key(input, BTN_STYLUS, 0);
+				input_report_key(input, BTN_STYLUS2, 0);
+				input_report_abs(input, ABS_PRESSURE, 0);
+			}
+			input_report_key(input, wdata->tool, 0);
+			input_sync(input);
+		}
+		wdata->tool = tool;
+		if (tool)
+			input_report_key(input, tool, 1);
+	}
+
+	if (tool) {
+		input_report_abs(input, ABS_X, x);
+		input_report_abs(input, ABS_Y, y);
+
+		switch ((data[1] >> 5) & 3) {
+		case 2: /* Mouse with wheel */
+			input_report_key(input, BTN_MIDDLE, data[1] & 0x04);
+			rw = (data[6] & 0x01) ? -1 :
+				(data[6] & 0x02) ? 1 : 0;
+			input_report_rel(input, REL_WHEEL, rw);
+			/* fall through */
+
+		case 3: /* Mouse without wheel */
+			input_report_key(input, BTN_LEFT, data[1] & 0x01);
+			input_report_key(input, BTN_RIGHT, data[1] & 0x02);
+			/* Compute distance between mouse and tablet */
+			rw = 44 - (data[6] >> 2);
+			if (rw < 0)
+				rw = 0;
+			else if (rw > 31)
+				rw = 31;
+			input_report_abs(input, ABS_DISTANCE, rw);
+			break;
+
+		default:
+			input_report_abs(input, ABS_PRESSURE,
+					data[6] | (((__u16) (data[1] & 0x08)) << 5));
+			input_report_key(input, BTN_TOUCH, data[1] & 0x01);
+			input_report_key(input, BTN_STYLUS, data[1] & 0x02);
+			input_report_key(input, BTN_STYLUS2, (tool == BTN_TOOL_PEN) && data[1] & 0x04);
+			break;
+		}
+
+		input_sync(input);
+	}
+
+	/* Report the state of the two buttons at the top of the tablet
+	 * as two extra fingerpad keys (buttons 4 & 5). */
+	rw = data[7] & 0x03;
+	if (rw != wdata->butstate) {
+		wdata->butstate = rw;
+		input_report_key(input, BTN_0, rw & 0x02);
+		input_report_key(input, BTN_1, rw & 0x01);
+		input_event(input, EV_MSC, MSC_SERIAL, 0xf0);
+		input_sync(input);
+	}
+
+	return 1;
+}
+
+static int wacom_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	struct hid_input *hidinput;
+	struct input_dev *input;
+	struct wacom_data *wdata;
+	int ret;
+
+	wdata = kzalloc(sizeof(*wdata), GFP_KERNEL);
+	if (wdata == NULL) {
+		dev_err(&hdev->dev, "can't alloc wacom descriptor\n");
+		return -ENOMEM;
+	}
+
+	hid_set_drvdata(hdev, wdata);
+
+	ret = hid_parse(hdev);
+	if (ret) {
+		dev_err(&hdev->dev, "parse failed\n");
+		goto err_free;
+	}
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+	if (ret) {
+		dev_err(&hdev->dev, "hw start failed\n");
+		goto err_free;
+	}
+
+	hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
+	input = hidinput->input;
+
+	/* Basics */
+	input->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_REL);
+	input->absbit[0] |= BIT(ABS_X) | BIT(ABS_Y) |
+		BIT(ABS_PRESSURE) | BIT(ABS_DISTANCE);
+	input->relbit[0] |= BIT(REL_WHEEL);
+	set_bit(BTN_TOOL_PEN, input->keybit);
+	set_bit(BTN_TOUCH, input->keybit);
+	set_bit(BTN_STYLUS, input->keybit);
+	set_bit(BTN_STYLUS2, input->keybit);
+	set_bit(BTN_LEFT, input->keybit);
+	set_bit(BTN_RIGHT, input->keybit);
+	set_bit(BTN_MIDDLE, input->keybit);
+
+	/* Pad */
+	input->evbit[0] |= BIT(EV_MSC);
+	input->mscbit[0] |= BIT(MSC_SERIAL);
+
+	/* Distance, rubber and mouse */
+	input->absbit[0] |= BIT(ABS_DISTANCE);
+	set_bit(BTN_TOOL_RUBBER, input->keybit);
+	set_bit(BTN_TOOL_MOUSE, input->keybit);
+
+	input->absmax[ABS_PRESSURE] = 511;
+	input->absmax[ABS_DISTANCE] = 32;
+
+	input->absmax[ABS_X] = 16704;
+	input->absmax[ABS_Y] = 12064;
+	input->absfuzz[ABS_X] = 4;
+	input->absfuzz[ABS_Y] = 4;
+
+	return 0;
+err_free:
+	kfree(wdata);
+	return ret;
+}
+
+static void wacom_remove(struct hid_device *hdev)
+{
+	hid_hw_stop(hdev);
+	kfree(hid_get_drvdata(hdev));
+}
+
+static const struct hid_device_id wacom_devices[] = {
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_WACOM, USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH) },
+
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, wacom_devices);
+
+static struct hid_driver wacom_driver = {
+	.name = "wacom",
+	.id_table = wacom_devices,
+	.probe = wacom_probe,
+	.remove = wacom_remove,
+	.raw_event = wacom_raw_event,
+};
+
+static int wacom_init(void)
+{
+	int ret;
+
+	ret = hid_register_driver(&wacom_driver);
+	if (ret)
+		printk(KERN_ERR "can't register wacom driver\n");
+	printk(KERN_ERR "wacom driver registered\n");
+	return ret;
+}
+
+static void wacom_exit(void)
+{
+	hid_unregister_driver(&wacom_driver);
+}
+
+module_init(wacom_init);
+module_exit(wacom_exit);
+MODULE_LICENSE("GPL");
+
-- 
cgit v0.10.2


From a1c0643ff9f360a30644f6e3cd643ca2a5083aea Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 13 May 2009 10:56:52 +0100
Subject: GFS2: Move journal live test at transaction start

There seems little point grabbing the transaction glock
only to have to release it again if the journal isn't
live. This moves the test earlier to avoid grabbing the lock
when we don't need it in the first place.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 053752d..4ef0e9f 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -33,6 +33,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 	BUG_ON(current->journal_info);
 	BUG_ON(blocks == 0 && revokes == 0);
 
+	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+		return -EROFS;
+
 	tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
 	if (!tr)
 		return -ENOMEM;
@@ -54,12 +57,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 	if (error)
 		goto fail_holder_uninit;
 
-	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		tr->tr_t_gh.gh_flags |= GL_NOCACHE;
-		error = -EROFS;
-		goto fail_gunlock;
-	}
-
 	error = gfs2_log_reserve(sdp, tr->tr_reserved);
 	if (error)
 		goto fail_gunlock;
-- 
cgit v0.10.2


From cc96eace48fdf0f8925a74c6c1f7ffa512e458d2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 20:28:15 +0900
Subject: sh: clkfwk: rate table construction and rounding for SH7785.

This adds support for constructing a rate table by looking at potential
divisors for a specified clock. Each FQRMR clock is given its own table.
Presently each table is rebuilt when the parent propagates down a new
rate, so some more logic needs to be added to do this more intelligently.

Additionally, a fairly generic round_rate() implementation is then
layered on top of it, which subsequently provides us with cpufreq support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 40cf3c0..da681de 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -16,6 +16,7 @@ struct clk_ops {
 	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
 	int (*set_parent)(struct clk *clk, struct clk *parent);
 	long (*round_rate)(struct clk *clk, unsigned long rate);
+	void (*build_rate_table)(struct clk *clk);
 };
 
 struct clk {
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 033f466..56c6e11 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -63,6 +63,9 @@ void propagate_rate(struct clk *tclk)
 	list_for_each_entry(clkp, &tclk->children, sibling) {
 		if (clkp->ops && clkp->ops->recalc)
 			clkp->rate = clkp->ops->recalc(clkp);
+		if (clkp->ops && clkp->ops->build_rate_table)
+			clkp->ops->build_rate_table(clkp);
+
 		propagate_rate(clkp);
 	}
 }
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 87584dc..b7a32dd 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -13,28 +13,43 @@
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/cpufreq.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
 
 static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
 			       24, 32, 36, 48 };
 struct clk_priv {
-	unsigned int shift;
+	unsigned int			shift;
+
+	/* allowable divisor bitmap */
+	unsigned long			div_bitmap;
+
+	/* Supportable frequencies + termination entry */
+	struct cpufreq_frequency_table	freq_table[ARRAY_SIZE(div2)+1];
 };
 
-#define FRQMR_CLK_DATA(_name, _shift)	\
-static struct clk_priv _name##_data = {	.shift = _shift, }
+#define FRQMR_CLK_DATA(_name, _shift, _div_bitmap)	\
+static struct clk_priv _name##_data = {			\
+	.shift		= _shift,			\
+	.div_bitmap	= _div_bitmap,			\
+							\
+	.freq_table[0]	= {				\
+		.index = 0,				\
+		.frequency = CPUFREQ_TABLE_END,		\
+	},						\
+}
 
-FRQMR_CLK_DATA(pfc,  0);
-FRQMR_CLK_DATA(s3fc, 4);
-FRQMR_CLK_DATA(s2fc, 8);
-FRQMR_CLK_DATA(mfc, 12);
-FRQMR_CLK_DATA(bfc, 16);
-FRQMR_CLK_DATA(sfc, 20);
-FRQMR_CLK_DATA(ufc, 24);
-FRQMR_CLK_DATA(ifc, 28);
+FRQMR_CLK_DATA(pfc,  0, 0x0f80);
+FRQMR_CLK_DATA(s3fc, 4, 0x0ff0);
+FRQMR_CLK_DATA(s2fc, 8, 0x0030);
+FRQMR_CLK_DATA(mfc, 12, 0x000c);
+FRQMR_CLK_DATA(bfc, 16, 0x0fe0);
+FRQMR_CLK_DATA(sfc, 20, 0x000c);
+FRQMR_CLK_DATA(ufc, 24, 0x000c);
+FRQMR_CLK_DATA(ifc, 28, 0x000e);
 
-static unsigned long frqmr_clk_recalc(struct clk *clk)
+static unsigned long frqmr_recalc(struct clk *clk)
 {
 	struct clk_priv *data = clk->priv;
 	unsigned int idx;
@@ -49,8 +64,76 @@ static unsigned long frqmr_clk_recalc(struct clk *clk)
 	return clk->parent->rate * 36 / div2[idx];
 }
 
+static void frqmr_build_rate_table(struct clk *clk)
+{
+	struct clk_priv *data = clk->priv;
+	int i, entry;
+
+	for (i = entry = 0; i < ARRAY_SIZE(div2); i++) {
+		if ((data->div_bitmap & (1 << i)) == 0)
+			continue;
+
+		data->freq_table[entry].index = entry;
+		data->freq_table[entry].frequency =
+			clk->parent->rate * 36 / div2[i];
+
+		entry++;
+	}
+
+	if (entry == 0) {
+		pr_warning("clkfwk: failed to build frequency table "
+			   "for \"%s\" clk!\n", clk->name);
+		return;
+	}
+
+	/* Termination entry */
+	data->freq_table[entry].index = entry;
+	data->freq_table[entry].frequency = CPUFREQ_TABLE_END;
+}
+
+static long frqmr_round_rate(struct clk *clk, unsigned long rate)
+{
+	struct clk_priv *data = clk->priv;
+	unsigned long rate_error, rate_error_prev = ~0UL;
+	unsigned long rate_best_fit = rate;
+	unsigned long highest, lowest;
+	int i;
+
+	highest = lowest = 0;
+
+	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = data->freq_table[i].frequency;
+
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq > highest)
+			highest = freq;
+		if (freq < lowest)
+			lowest = freq;
+
+		rate_error = abs(freq - rate);
+		if (rate_error < rate_error_prev) {
+			rate_best_fit = freq;
+			rate_error_prev = rate_error;
+		}
+
+		if (rate_error == 0)
+			break;
+	}
+
+	if (rate >= highest)
+		rate_best_fit = highest;
+	if (rate <= lowest)
+		rate_best_fit = lowest;
+
+	return rate_best_fit;
+}
+
 static struct clk_ops frqmr_clk_ops = {
-	.recalc		= frqmr_clk_recalc,
+	.recalc			= frqmr_recalc,
+	.build_rate_table	= frqmr_build_rate_table,
+	.round_rate		= frqmr_round_rate,
 };
 
 /*
-- 
cgit v0.10.2


From cedcf3366f2191885aff92d33d6078ef08203e52 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 13 May 2009 21:51:28 +0900
Subject: sh: clkfwk: Map tree hierarchy in debugfs.

This adopts the OMAP clock framework debugfs bits and replaces the aging
procfs bits. The procfs clocks entry was primarily a debugging aid, and
used to be tied in to cpuinfo before the clock list grew too unweildly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index da681de..c499d47 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -38,6 +38,7 @@ struct clk {
 
 	unsigned long		arch_flags;
 	void			*priv;
+	struct dentry		*dentry;
 };
 
 struct clk_lookup {
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 56c6e11..686477f 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -28,7 +28,7 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
-#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
 
@@ -404,24 +404,6 @@ void clk_put(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_put);
 
-
-static int show_clocks(char *buf, char **start, off_t off,
-		       int len, int *eof, void *data)
-{
-	struct clk *clk;
-	char *p = buf;
-
-	list_for_each_entry_reverse(clk, &clock_list, node) {
-		unsigned long rate = clk_get_rate(clk);
-
-		p += sprintf(p, "%-12s\t: %ld.%02ldMHz\t%s\n", clk->name,
-			     rate / 1000000, (rate % 1000000) / 10000,
-			      (clk->usecount > 0) ?  "enabled" : "disabled");
-	}
-
-	return p - buf;
-}
-
 #ifdef CONFIG_PM
 static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 {
@@ -516,14 +498,90 @@ int __init clk_init(void)
 	return ret;
 }
 
-static int __init clk_proc_init(void)
+/*
+ *	debugfs support to trace clock tree hierarchy and attributes
+ */
+static struct dentry *clk_debugfs_root;
+
+static int clk_debugfs_register_one(struct clk *c)
 {
-	struct proc_dir_entry *p;
-	p = create_proc_read_entry("clocks", S_IRUSR, NULL,
-				   show_clocks, NULL);
-	if (unlikely(!p))
-		return -EINVAL;
+	int err;
+	struct dentry *d, *child;
+	struct clk *pa = c->parent;
+	char s[255];
+	char *p = s;
+
+	p += sprintf(p, "%s", c->name);
+	if (c->id > 0)
+		sprintf(p, ":%d", c->id);
+	d = debugfs_create_dir(s, pa ? pa->dentry : clk_debugfs_root);
+	if (!d)
+		return -ENOMEM;
+	c->dentry = d;
+
+	d = debugfs_create_u8("usecount", S_IRUGO, c->dentry, (u8 *)&c->usecount);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	d = debugfs_create_u32("rate", S_IRUGO, c->dentry, (u32 *)&c->rate);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	d = debugfs_create_x32("flags", S_IRUGO, c->dentry, (u32 *)&c->flags);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	return 0;
+
+err_out:
+	d = c->dentry;
+	list_for_each_entry(child, &d->d_subdirs, d_u.d_child)
+		debugfs_remove(child);
+	debugfs_remove(c->dentry);
+	return err;
+}
+
+static int clk_debugfs_register(struct clk *c)
+{
+	int err;
+	struct clk *pa = c->parent;
+
+	if (pa && !pa->dentry) {
+		err = clk_debugfs_register(pa);
+		if (err)
+			return err;
+	}
 
+	if (!c->dentry) {
+		err = clk_debugfs_register_one(c);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int __init clk_debugfs_init(void)
+{
+	struct clk *c;
+	struct dentry *d;
+	int err;
+
+	d = debugfs_create_dir("clock", NULL);
+	if (!d)
+		return -ENOMEM;
+	clk_debugfs_root = d;
+
+	list_for_each_entry(c, &clock_list, node) {
+		err = clk_debugfs_register(c);
+		if (err)
+			goto err_out;
+	}
 	return 0;
+err_out:
+	debugfs_remove(clk_debugfs_root); /* REVISIT: Cleanup correctly */
+	return err;
 }
-subsys_initcall(clk_proc_init);
+late_initcall(clk_debugfs_init);
-- 
cgit v0.10.2


From 44408ad7368906c84000e87a99c14a16dbb867fd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 12 May 2009 13:31:40 -0700
Subject: xen: use header for EXPORT_SYMBOL_GPL

mmu.c needs to #include module.h to prevent these warnings:

 arch/x86/xen/mmu.c:239: warning: data definition has no type or storage class
 arch/x86/xen/mmu.c:239: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL'
 arch/x86/xen/mmu.c:239: warning: parameter names (without types) in function declaration

[ Impact: cleanup ]

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e25a78e..fba55b1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,6 +42,7 @@
 #include <linux/highmem.h>
 #include <linux/debugfs.h>
 #include <linux/bug.h>
+#include <linux/module.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-- 
cgit v0.10.2


From 48c2b613616235d7c97fda5982f50100a6c79166 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 13 May 2009 14:49:48 +0100
Subject: GFS2: Add commit= mount option

It has always been possible to adjust the gfs2 log commit
interval, but only from the sysfs interface. This adds a
mount option, commit=<nn>, which will be familar to ext3
users.

The sysfs interface continues to be available as well, although
this might be removed in the future.

Also this patch cleans up some duplicated structures in the GFS2
sysfs code.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 399d1b9..65f438e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -418,6 +418,7 @@ struct gfs2_args {
 	unsigned int ar_data:2;			/* ordered/writeback */
 	unsigned int ar_meta:1;			/* mount metafs */
 	unsigned int ar_discard:1;		/* discard requests */
+	int ar_commit;				/* Commit interval */
 };
 
 struct gfs2_tune {
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index f7e8527..947af15 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -45,6 +45,7 @@ enum {
 	Opt_meta,
 	Opt_discard,
 	Opt_nodiscard,
+	Opt_commit,
 	Opt_err,
 };
 
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
 	{Opt_meta, "meta"},
 	{Opt_discard, "discard"},
 	{Opt_nodiscard, "nodiscard"},
+	{Opt_commit, "commit=%d"},
 	{Opt_err, NULL}
 };
 
@@ -89,6 +91,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
 	char *o;
 	int token;
 	substring_t tmp[MAX_OPT_ARGS];
+	int rv;
 
 	/* Split the options into tokens with the "," character and
 	   process them */
@@ -173,6 +176,13 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
 		case Opt_nodiscard:
 			args->ar_discard = 0;
 			break;
+		case Opt_commit:
+			rv = match_int(&tmp[0], &args->ar_commit);
+			if (rv || args->ar_commit <= 0) {
+				fs_info(sdp, "commit mount option requires a positive numeric argument\n");
+				return rv ? rv : -EINVAL;
+			}
+			break;
 		case Opt_err:
 		default:
 			fs_info(sdp, "invalid mount option: %s\n", o);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1ff9473..7981fbc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -55,7 +55,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 	spin_lock_init(&gt->gt_spin);
 
 	gt->gt_incore_log_blocks = 1024;
-	gt->gt_log_flush_secs = 60;
 	gt->gt_recoverd_secs = 60;
 	gt->gt_logd_secs = 1;
 	gt->gt_quota_simul_sync = 64;
@@ -1165,6 +1164,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
 
 	sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
 	sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
+	sdp->sd_args.ar_commit = 60;
 
 	error = gfs2_mount_args(sdp, &sdp->sd_args, data);
 	if (error) {
@@ -1191,6 +1191,8 @@ static int fill_super(struct super_block *sb, void *data, int silent)
                                GFS2_BASIC_BLOCK_SHIFT;
 	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
 
+	sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit;
+
 	error = init_names(sdp, silent);
 	if (error)
 		goto fail;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 4580195..0677a83 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -436,8 +436,12 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_args args = sdp->sd_args; /* Default to current settings */
+	struct gfs2_tune *gt = &sdp->sd_tune;
 	int error;
 
+	spin_lock(&gt->gt_spin);
+	args.ar_commit = gt->gt_log_flush_secs;
+	spin_unlock(&gt->gt_spin);
 	error = gfs2_mount_args(sdp, &args, data);
 	if (error)
 		return error;
@@ -473,6 +477,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 		sb->s_flags |= MS_POSIXACL;
 	else
 		sb->s_flags &= ~MS_POSIXACL;
+	spin_lock(&gt->gt_spin);
+	gt->gt_log_flush_secs = args.ar_commit;
+	spin_unlock(&gt->gt_spin);
+
 	return 0;
 }
 
@@ -550,6 +558,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 {
 	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
 	struct gfs2_args *args = &sdp->sd_args;
+	int lfsecs;
 
 	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
 		seq_printf(s, ",meta");
@@ -610,7 +619,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 	}
 	if (args->ar_discard)
 		seq_printf(s, ",discard");
-
+	lfsecs = sdp->sd_tune.gt_log_flush_secs;
+	if (lfsecs != 60)
+		seq_printf(s, ",commit=%d", lfsecs);
 	return 0;
 }
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7655f502..d53b22e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -26,6 +26,36 @@
 #include "util.h"
 #include "glops.h"
 
+struct gfs2_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gfs2_sbd *, char *);
+	ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
+};
+
+static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *buf)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
+	return a->show ? a->show(sdp, buf) : 0;
+}
+
+static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
+	return a->store ? a->store(sdp, buf, len) : len;
+}
+
+static struct sysfs_ops gfs2_attr_ops = {
+	.show  = gfs2_attr_show,
+	.store = gfs2_attr_store,
+};
+
+
+static struct kset *gfs2_kset;
+
 static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%u:%u\n",
@@ -212,11 +242,6 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
 	return len;
 }
 
-struct gfs2_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-	ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
-};
 
 #define GFS2_ATTR(name, mode, show, store) \
 static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
@@ -246,49 +271,21 @@ static struct attribute *gfs2_attrs[] = {
 	NULL,
 };
 
-static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
-			      char *buf)
-{
-	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
-	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
-	return a->show ? a->show(sdp, buf) : 0;
-}
-
-static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
-			       const char *buf, size_t len)
-{
-	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
-	struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
-	return a->store ? a->store(sdp, buf, len) : len;
-}
-
-static struct sysfs_ops gfs2_attr_ops = {
-	.show  = gfs2_attr_show,
-	.store = gfs2_attr_store,
-};
-
 static struct kobj_type gfs2_ktype = {
 	.default_attrs = gfs2_attrs,
 	.sysfs_ops     = &gfs2_attr_ops,
 };
 
-static struct kset *gfs2_kset;
-
 /*
  * display struct lm_lockstruct fields
  */
 
-struct lockstruct_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-};
-
 #define LOCKSTRUCT_ATTR(name, fmt)                                          \
 static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
 {                                                                           \
 	return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \
 }                                                                           \
-static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
+static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name)
 
 LOCKSTRUCT_ATTR(jid,      "%u\n");
 LOCKSTRUCT_ATTR(first,    "%u\n");
@@ -401,14 +398,8 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
 	return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
 }
 
-struct gdlm_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *sdp, char *);
-	ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t);
-};
-
 #define GDLM_ATTR(_name,_mode,_show,_store) \
-static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
 GDLM_ATTR(proto_name,     0444, proto_name_show,     NULL);
 GDLM_ATTR(block,          0644, block_show,          block_store);
@@ -434,21 +425,12 @@ static struct attribute *lock_module_attrs[] = {
 	NULL,
 };
 
-/*
- * display struct gfs2_args fields
- */
-
-struct args_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-};
-
 #define ARGS_ATTR(name, fmt)                                                \
 static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
 {                                                                           \
 	return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name);       \
 }                                                                           \
-static struct args_attr args_attr_##name = __ATTR_RO(name)
+static struct gfs2_attr args_attr_##name = __ATTR_RO(name)
 
 ARGS_ATTR(lockproto,       "%s\n");
 ARGS_ATTR(locktable,       "%s\n");
@@ -531,14 +513,8 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
 	return len;
 }
 
-struct tune_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct gfs2_sbd *, char *);
-	ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
-};
-
 #define TUNE_ATTR_3(name, show, store)                                        \
-static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
+static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store)
 
 #define TUNE_ATTR_2(name, store)                                              \
 static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                   \
-- 
cgit v0.10.2


From 9582d41135c0d362f04ed6bf3dc8d693a7eafee2 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 13 May 2009 15:06:25 +0100
Subject: GFS2: Remove a couple of unused sysfs entries

These two tunables are pointless and would never need to be
changed anyway. There is also a race between them and umount
as the deamons which they refer to might have gone away. The
easiest way to fix the race is to remove the interface.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d53b22e..894bf77 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -530,15 +530,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
-#define TUNE_ATTR_DAEMON(name, process)                                       \
-static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
-{                                                                             \
-	ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len);      \
-	wake_up_process(sdp->sd_##process);                                   \
-	return r;                                                             \
-}                                                                             \
-TUNE_ATTR_2(name, name##_store)
-
 TUNE_ATTR(incore_log_blocks, 0);
 TUNE_ATTR(log_flush_secs, 0);
 TUNE_ATTR(quota_warn_period, 0);
@@ -550,8 +541,6 @@ TUNE_ATTR(new_files_jdata, 0);
 TUNE_ATTR(quota_simul_sync, 1);
 TUNE_ATTR(stall_secs, 1);
 TUNE_ATTR(statfs_quantum, 1);
-TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
-TUNE_ATTR_DAEMON(logd_secs, logd_process);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
@@ -565,8 +554,6 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_quota_simul_sync.attr,
 	&tune_attr_stall_secs.attr,
 	&tune_attr_statfs_quantum.attr,
-	&tune_attr_recoverd_secs.attr,
-	&tune_attr_logd_secs.attr,
 	&tune_attr_quota_scale.attr,
 	&tune_attr_new_files_jdata.attr,
 	NULL,
-- 
cgit v0.10.2


From 334034c132017fa662716b70591b2297ed7f238c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 May 2009 13:37:11 -0700
Subject: avr32: remove obsolete hw_interrupt_type

The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) have
been kept around for migration reasons.  After more than two years it's
time to remove them finally.

This patch cleans up one of the remaining users.  When all such patches
hit mainline we can remove the defines and typedefs finally.

Impact: cleanup

Convert the last remaining users to struct irq_chip and remove the
define.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/include/asm/hw_irq.h b/arch/avr32/include/asm/hw_irq.h
index 218b0a6..a36f9fc 100644
--- a/arch/avr32/include/asm/hw_irq.h
+++ b/arch/avr32/include/asm/hw_irq.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_AVR32_HW_IRQ_H
 #define __ASM_AVR32_HW_IRQ_H
 
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
+static inline void hw_resend_irq(struct irq_chip *h, unsigned int i)
 {
 	/* Nothing to do */
 }
-- 
cgit v0.10.2


From aa512a27e9e8ed32f31b15eec67ab1ceca33839b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 13 May 2009 13:52:19 -0400
Subject: x86/function-graph: fix constraint for recording old return value

After upgrading from gcc 4.2.2 to 4.4.0, the function graph tracer broke.
Investigating, I found that in the asm that replaces the return value,
gcc was using the same register for the old value as it was for the
new value.

	mov	(addr), old
	mov	new, (addr)

But if old and new are the same register, we clobber new with old!
I first thought this was a bug in gcc 4.4.0 and reported it:

  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40132

Andrew Pinski responded (quickly), saying that it was correct gcc behavior
and the code needed to denote old as an "early clobber".

Instead of "=r"(old), we need "=&r"(old).

[Impact: keep function graph tracer from breaking with gcc 4.4.0 ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 18dfa30..b79c553 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -442,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		_ASM_EXTABLE(1b, 4b)
 		_ASM_EXTABLE(2b, 4b)
 
-		: [old] "=r" (old), [faulted] "=r" (faulted)
+		: [old] "=&r" (old), [faulted] "=r" (faulted)
 		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 		: "memory"
 	);
-- 
cgit v0.10.2


From 8c10dc4f54d315ce801dc9ef4018aab8d0d75a7b Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@gmail.com>
Date: Mon, 11 May 2009 23:49:41 +0200
Subject: ASoC: pxa2xx-i2s: Proper initialization

Reset FIFO logic and registers, and make sure REC and RPL functions along
with FIFO service are disabled at probe.

Signed-off-by: Karl Beldan <karl.beldan@mobile-devices.fr>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index 6014577..fce8a28 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -347,6 +347,19 @@ static int pxa2xx_i2s_probe(struct platform_device *dev)
 	if (ret != 0)
 		clk_put(clk_i2s);
 
+	/*
+	 * PXA Developer's Manual:
+	 * If SACR0[ENB] is toggled in the middle of a normal operation,
+	 * the SACR0[RST] bit must also be set and cleared to reset all
+	 * I2S controller registers.
+	 */
+	SACR0 = SACR0_RST;
+	SACR0 = 0;
+	/* Make sure RPL and REC are disabled */
+	SACR1 = SACR1_DRPL | SACR1_DREC;
+	/* Along with FIFO servicing */
+	SAIMR &= ~(SAIMR_RFS | SAIMR_TFS);
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From bb74a6e5c5535ddd977aa161c72bef738e566daa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 13 May 2009 17:23:54 +0100
Subject: ASoC: Point at kernel.org git

The Wolfson git is not currently tracking bleeding edge ASoC so change
to my kernel.org git which is doing so.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 17c8ec1..00401d8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5273,7 +5273,7 @@ P:	Liam Girdwood
 M:	lrg@slimlogic.co.uk
 P:	Mark Brown
 M:	broonie@opensource.wolfsonmicro.com
-T:	git git://opensource.wolfsonmicro.com/linux-2.6-asoc
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound-2.6.git
 L:	alsa-devel@alsa-project.org (subscribers-only)
 W:	http://alsa-project.org/main/index.php/ASoC
 S:	Supported
-- 
cgit v0.10.2


From b103387037cea2ba0f04b44d408d54c53f678061 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 May 2009 12:50:40 -0400
Subject: TPM: get_event_name stack corruption

get_event_name uses sprintf to fill a buffer declared on the stack.  It fills
the buffer 2 bytes at a time.  What the code doesn't take into account is that
sprintf(buf, "%02x", data) actually writes 3 bytes.  2 bytes for the data and
then it nul terminates the string.  Since we declare buf to be 40 characters
long and then we write 40 bytes of data into buf sprintf is going to write 41
characters.  The fix is to leave room in buf for the nul terminator.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
index ed306eb..0c2f55a 100644
--- a/drivers/char/tpm/tpm_bios.c
+++ b/drivers/char/tpm/tpm_bios.c
@@ -212,7 +212,8 @@ static int get_event_name(char *dest, struct tcpa_event *event,
 			unsigned char * event_entry)
 {
 	const char *name = "";
-	char data[40] = "";
+	/* 41 so there is room for 40 data and 1 nul */
+	char data[41] = "";
 	int i, n_len = 0, d_len = 0;
 	struct tcpa_pc_event *pc_event;
 
-- 
cgit v0.10.2


From bf31a1a02eb28d9bda0bb74345df7889faeb7335 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <antonb@au1.ibm.com>
Date: Wed, 13 May 2009 16:52:40 -0700
Subject: IB/ehca: Replace vmalloc() with kmalloc() for queue allocation

To improve performance of driver resource allocation, replace
vmalloc() calls with kmalloc().

Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index c3a3284..a260559 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -220,7 +220,7 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 	queue->small_page = NULL;
 
 	/* allocate queue page pointers */
-	queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+	queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
 	if (!queue->queue_pages) {
 		ehca_gen_err("Couldn't allocate queue page list");
 		return 0;
@@ -240,7 +240,7 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 ipz_queue_ctor_exit0:
 	ehca_gen_err("Couldn't alloc pages queue=%p "
 		 "nr_of_pages=%x",  queue, nr_of_pages);
-	vfree(queue->queue_pages);
+	kfree(queue->queue_pages);
 
 	return 0;
 }
@@ -262,7 +262,7 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
 			free_page((unsigned long)queue->queue_pages[i]);
 	}
 
-	vfree(queue->queue_pages);
+	kfree(queue->queue_pages);
 
 	return 1;
 }
-- 
cgit v0.10.2


From c94f156f63c835ffc02b686f9d4238b106f31a5d Mon Sep 17 00:00:00 2001
From: Stefan Roscher <ossrosch@linux.vnet.ibm.com>
Date: Wed, 13 May 2009 16:52:42 -0700
Subject: IB/ehca: Fall back to vmalloc() for big allocations

In case of large queue pairs there is the possibillity of allocation
failures due to memory fragmentation when using kmalloc().  To ensure
the memory is allocated even if kmalloc() can not find chunks which
are big enough, we fall back to allocating the memory with vmalloc().

Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index a260559..1227c59 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -222,8 +222,11 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 	/* allocate queue page pointers */
 	queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
 	if (!queue->queue_pages) {
-		ehca_gen_err("Couldn't allocate queue page list");
-		return 0;
+		queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+		if (!queue->queue_pages) {
+			ehca_gen_err("Couldn't allocate queue page list");
+			return 0;
+		}
 	}
 	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
 
@@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 ipz_queue_ctor_exit0:
 	ehca_gen_err("Couldn't alloc pages queue=%p "
 		 "nr_of_pages=%x",  queue, nr_of_pages);
-	kfree(queue->queue_pages);
+	if (is_vmalloc_addr(queue->queue_pages))
+		vfree(queue->queue_pages);
+	else
+		kfree(queue->queue_pages);
 
 	return 0;
 }
@@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
 			free_page((unsigned long)queue->queue_pages[i]);
 	}
 
-	kfree(queue->queue_pages);
+	if (is_vmalloc_addr(queue->queue_pages))
+		vfree(queue->queue_pages);
+	else
+		kfree(queue->queue_pages);
 
 	return 1;
 }
-- 
cgit v0.10.2


From 1988d1fa1a9d642c5714a6afc9775fba0627f3ed Mon Sep 17 00:00:00 2001
From: Stefan Roscher <ossrosch@linux.vnet.ibm.com>
Date: Wed, 13 May 2009 16:52:43 -0700
Subject: IB/ehca: Remove unnecessary memory operations for userspace queue
 pairs

The queue map for flush completion circumvention is only used for
kernel space queue pairs.  This patch skips the allocation of the
queue maps in case the QP is created for userspace.  In addition, this
patch does not iomap the galpas for kernel usage if the queue pair is
only used in userspace.  These changes will improve the performance of
creation of userspace queue pairs.

Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 00c1081..ead4e71 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -461,7 +461,7 @@ static struct ehca_qp *internal_create_qp(
 					      ib_device);
 	struct ib_ucontext *context = NULL;
 	u64 h_ret;
-	int is_llqp = 0, has_srq = 0;
+	int is_llqp = 0, has_srq = 0, is_user = 0;
 	int qp_type, max_send_sge, max_recv_sge, ret;
 
 	/* h_call's out parameters */
@@ -609,9 +609,6 @@ static struct ehca_qp *internal_create_qp(
 		}
 	}
 
-	if (pd->uobject && udata)
-		context = pd->uobject->context;
-
 	my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
 	if (!my_qp) {
 		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
@@ -619,6 +616,11 @@ static struct ehca_qp *internal_create_qp(
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (pd->uobject && udata) {
+		is_user = 1;
+		context = pd->uobject->context;
+	}
+
 	atomic_set(&my_qp->nr_events, 0);
 	init_waitqueue_head(&my_qp->wait_completion);
 	spin_lock_init(&my_qp->spinlock_s);
@@ -707,7 +709,7 @@ static struct ehca_qp *internal_create_qp(
 			(parms.squeue.is_small || parms.rqueue.is_small);
 	}
 
-	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
+	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
 	if (h_ret != H_SUCCESS) {
 		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
 			 h_ret);
@@ -769,18 +771,20 @@ static struct ehca_qp *internal_create_qp(
 			goto create_qp_exit2;
 		}
 
-		my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-			 my_qp->ipz_squeue.qe_size;
-		my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
-					sizeof(struct ehca_qmap_entry));
-		if (!my_qp->sq_map.map) {
-			ehca_err(pd->device, "Couldn't allocate squeue "
-				 "map ret=%i", ret);
-			goto create_qp_exit3;
+		if (!is_user) {
+			my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
+				my_qp->ipz_squeue.qe_size;
+			my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
+						    sizeof(struct ehca_qmap_entry));
+			if (!my_qp->sq_map.map) {
+				ehca_err(pd->device, "Couldn't allocate squeue "
+					 "map ret=%i", ret);
+				goto create_qp_exit3;
+			}
+			INIT_LIST_HEAD(&my_qp->sq_err_node);
+			/* to avoid the generation of bogus flush CQEs */
+			reset_queue_map(&my_qp->sq_map);
 		}
-		INIT_LIST_HEAD(&my_qp->sq_err_node);
-		/* to avoid the generation of bogus flush CQEs */
-		reset_queue_map(&my_qp->sq_map);
 	}
 
 	if (HAS_RQ(my_qp)) {
@@ -792,20 +796,21 @@ static struct ehca_qp *internal_create_qp(
 				 "and pages ret=%i", ret);
 			goto create_qp_exit4;
 		}
-
-		my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-			my_qp->ipz_rqueue.qe_size;
-		my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
-				sizeof(struct ehca_qmap_entry));
-		if (!my_qp->rq_map.map) {
-			ehca_err(pd->device, "Couldn't allocate squeue "
-					"map ret=%i", ret);
-			goto create_qp_exit5;
+		if (!is_user) {
+			my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+				my_qp->ipz_rqueue.qe_size;
+			my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+						    sizeof(struct ehca_qmap_entry));
+			if (!my_qp->rq_map.map) {
+				ehca_err(pd->device, "Couldn't allocate squeue "
+					 "map ret=%i", ret);
+				goto create_qp_exit5;
+			}
+			INIT_LIST_HEAD(&my_qp->rq_err_node);
+			/* to avoid the generation of bogus flush CQEs */
+			reset_queue_map(&my_qp->rq_map);
 		}
-		INIT_LIST_HEAD(&my_qp->rq_err_node);
-		/* to avoid the generation of bogus flush CQEs */
-		reset_queue_map(&my_qp->rq_map);
-	} else if (init_attr->srq) {
+	} else if (init_attr->srq && !is_user) {
 		/* this is a base QP, use the queue map of the SRQ */
 		my_qp->rq_map = my_srq->rq_map;
 		INIT_LIST_HEAD(&my_qp->rq_err_node);
@@ -918,7 +923,7 @@ create_qp_exit7:
 	kfree(my_qp->mod_qp_parm);
 
 create_qp_exit6:
-	if (HAS_RQ(my_qp))
+	if (HAS_RQ(my_qp) && !is_user)
 		vfree(my_qp->rq_map.map);
 
 create_qp_exit5:
@@ -926,7 +931,7 @@ create_qp_exit5:
 		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
 
 create_qp_exit4:
-	if (HAS_SQ(my_qp))
+	if (HAS_SQ(my_qp) && !is_user)
 		vfree(my_qp->sq_map.map);
 
 create_qp_exit3:
@@ -1244,6 +1249,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 	u64 update_mask;
 	u64 h_ret;
 	int bad_wqe_cnt = 0;
+	int is_user = 0;
 	int squeue_locked = 0;
 	unsigned long flags = 0;
 
@@ -1266,6 +1272,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 		ret = ehca2ib_return_code(h_ret);
 		goto modify_qp_exit1;
 	}
+	if (ibqp->uobject)
+		is_user = 1;
 
 	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
 
@@ -1728,7 +1736,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 			goto modify_qp_exit2;
 		}
 	}
-	if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
+	if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
+	    && !is_user) {
 		ret = check_for_left_cqes(my_qp, shca);
 		if (ret)
 			goto modify_qp_exit2;
@@ -1738,16 +1747,17 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 		ipz_qeit_reset(&my_qp->ipz_rqueue);
 		ipz_qeit_reset(&my_qp->ipz_squeue);
 
-		if (qp_cur_state == IB_QPS_ERR) {
+		if (qp_cur_state == IB_QPS_ERR && !is_user) {
 			del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
 
 			if (HAS_RQ(my_qp))
 				del_from_err_list(my_qp->recv_cq,
 						  &my_qp->rq_err_node);
 		}
-		reset_queue_map(&my_qp->sq_map);
+		if (!is_user)
+			reset_queue_map(&my_qp->sq_map);
 
-		if (HAS_RQ(my_qp))
+		if (HAS_RQ(my_qp) && !is_user)
 			reset_queue_map(&my_qp->rq_map);
 	}
 
@@ -2138,10 +2148,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 	int ret;
 	u64 h_ret;
 	u8 port_num;
+	int is_user = 0;
 	enum ib_qp_type	qp_type;
 	unsigned long flags;
 
 	if (uobject) {
+		is_user = 1;
 		if (my_qp->mm_count_galpa ||
 		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
 			ehca_err(dev, "Resources still referenced in "
@@ -2168,10 +2180,10 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 	 * SRQs will never get into an error list and do not have a recv_cq,
 	 * so we need to skip them here.
 	 */
-	if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
+	if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
 		del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
 
-	if (HAS_SQ(my_qp))
+	if (HAS_SQ(my_qp) && !is_user)
 		del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
 
 	/* now wait until all pending events have completed */
@@ -2209,13 +2221,13 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 
 	if (HAS_RQ(my_qp)) {
 		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-
-		vfree(my_qp->rq_map.map);
+		if (!is_user)
+			vfree(my_qp->rq_map.map);
 	}
 	if (HAS_SQ(my_qp)) {
 		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-
-		vfree(my_qp->sq_map.map);
+		if (!is_user)
+			vfree(my_qp->sq_map.map);
 	}
 	kmem_cache_free(qp_cache, my_qp);
 	atomic_dec(&shca->num_qps);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index d0ab0c0..4d5dc33 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
 	param->act_pages = (u32)outs[4];
 
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+		hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%lli", ret);
@@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
 }
 
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms)
+			     struct ehca_alloc_qp_parms *parms, int is_user)
 {
 	u64 ret;
 	u64 allocate_controls, max_r10_reg, r11, r12;
@@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
 		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
 
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
+		hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%lli", ret);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 2c3c6e0..39c1c36 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
  * initialize resources, create empty QPPTs (2 rings).
  */
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms);
+			     struct ehca_alloc_qp_parms *parms, int is_user);
 
 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 		      const u8 port_id,
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
index 2148210..b3e0e72 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr)
 	return 0;
 }
 
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
 		    u64 paddr_kernel, u64 paddr_user)
 {
-	int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
-	if (ret)
-		return ret;
+	if (!is_user) {
+		int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+		if (ret)
+			return ret;
+	} else
+		galpas->kernel.fw_handle = 0;
 
 	galpas->user.fw_handle = paddr_user;
 
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
index 5305c2a..204227d 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.h
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -78,7 +78,7 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
 	*(volatile u64 __force *)addr = value;
 }
 
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
 		    u64 paddr_kernel, u64 paddr_user);
 
 int hcp_galpas_dtor(struct h_galpas *galpas);
-- 
cgit v0.10.2


From bde2cfaf8ff5511b4f434078554f89ff6cb677f2 Mon Sep 17 00:00:00 2001
From: Stefan Roscher <ossrosch@linux.vnet.ibm.com>
Date: Wed, 13 May 2009 16:52:43 -0700
Subject: IB/ehca: Increment version number

Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 368311c..85905ab 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
 #include "ehca_tools.h"
 #include "hcp_if.h"
 
-#define HCAD_VERSION "0026"
+#define HCAD_VERSION "0027"
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
-- 
cgit v0.10.2


From 5b891a9332dc4212bf166a4506092fbcd60f2319 Mon Sep 17 00:00:00 2001
From: Jack Stone <jwjstone@fastmail.fm>
Date: Wed, 13 May 2009 16:53:39 -0700
Subject: infiniband: Remove void casts

Remove uneeded casts of void *.

Signed-off-by: Jack Stone <jwjstone@fastmail.fm>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index bb17cce..f5c45b1 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -133,7 +133,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
 	struct c2_qp *qp;
 	int is_recv = 0;
 
-	ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+	ce = c2_mq_consume(&cq->mq);
 	if (!ce) {
 		return -EAGAIN;
 	}
@@ -146,7 +146,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
 	while ((qp =
 		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
 		c2_mq_free(&cq->mq);
-		ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+		ce = c2_mq_consume(&cq->mq);
 		if (!ce)
 			return -EAGAIN;
 	}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 99bcbd7..4b89b79 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -479,13 +479,13 @@ void ehca_tasklet_neq(unsigned long data)
 	struct ehca_eqe *eqe;
 	u64 ret;
 
-	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+	eqe = ehca_poll_eq(shca, &shca->neq);
 
 	while (eqe) {
 		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
 			parse_ec(shca, eqe->entry);
 
-		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+		eqe = ehca_poll_eq(shca, &shca->neq);
 	}
 
 	ret = hipz_h_reset_event(shca->ipz_hca_handle,
@@ -572,8 +572,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
 	eqe_cnt = 0;
 	do {
 		u32 token;
-		eqe_cache[eqe_cnt].eqe =
-			(struct ehca_eqe *)ehca_poll_eq(shca, eq);
+		eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
 		if (!eqe_cache[eqe_cnt].eqe)
 			break;
 		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
@@ -637,7 +636,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
 		goto unlock_irq_spinlock;
 	do {
 		struct ehca_eqe *eqe;
-		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+		eqe = ehca_poll_eq(shca, &shca->eq);
 		if (!eqe)
 			break;
 		process_eqe(shca, eqe);
-- 
cgit v0.10.2


From 5df6d737dd4b0fe9eccf943abb3677cfea05a6c4 Mon Sep 17 00:00:00 2001
From: Abhijeet Joglekar <abjoglek@cisco.com>
Date: Fri, 17 Apr 2009 18:33:26 -0700
Subject: [SCSI] fnic: Add new Cisco PCI-Express FCoE HBA

fnic is a driver for the Cisco PCI-Express FCoE HBA

Signed-off-by: Abhijeet Joglekar <abjoglek@cisco.com>
Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 2b349ba..c7bed16 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1431,6 +1431,14 @@ P:	Russell King
 M:	linux@arm.linux.org.uk
 F:	include/linux/clk.h
 
+CISCO FCOE HBA DRIVER
+P:	Abhijeet Joglekar
+M:	abjoglek@cisco.com
+P:	Joe Eykholt
+M:	jeykholt@cisco.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+
 CODA FILE SYSTEM
 P:	Jan Harkes
 M:	jaharkes@cs.cmu.edu
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8ed2990..fb27407 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -628,6 +628,17 @@ config FCOE
 	---help---
 	  Fibre Channel over Ethernet module
 
+config FCOE_FNIC
+	tristate "Cisco FNIC Driver"
+	depends on PCI && X86
+	select LIBFC
+	help
+	  This is support for the Cisco PCI-Express FCoE HBA.
+
+	  To compile this driver as a module, choose M here and read
+	  <file:Documentation/scsi/scsi.txt>.
+	  The module will be called fnic.
+
 config SCSI_DMX3191D
 	tristate "DMX3191D SCSI support"
 	depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index e7c861a..a5049cf 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_SCSI_DH)		+= device_handler/
 obj-$(CONFIG_LIBFC)		+= libfc/
 obj-$(CONFIG_LIBFCOE)		+= fcoe/
 obj-$(CONFIG_FCOE)		+= fcoe/
+obj-$(CONFIG_FCOE_FNIC)		+= fnic/
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	libiscsi_tcp.o iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
 obj-$(CONFIG_SCSI_A4000T)	+= 53c700.o	a4000t.o
diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile
new file mode 100644
index 0000000..37c3440
--- /dev/null
+++ b/drivers/scsi/fnic/Makefile
@@ -0,0 +1,15 @@
+obj-$(CONFIG_FCOE_FNIC) += fnic.o
+
+fnic-y	:= \
+	fnic_attrs.o \
+	fnic_isr.o \
+	fnic_main.o \
+	fnic_res.o \
+	fnic_fcs.o \
+	fnic_scsi.o \
+	vnic_cq.o \
+	vnic_dev.o \
+	vnic_intr.o \
+	vnic_rq.o \
+	vnic_wq_copy.o \
+	vnic_wq.o
diff --git a/drivers/scsi/fnic/cq_desc.h b/drivers/scsi/fnic/cq_desc.h
new file mode 100644
index 0000000..d1225cf
--- /dev/null
+++ b/drivers/scsi/fnic/cq_desc.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _CQ_DESC_H_
+#define _CQ_DESC_H_
+
+/*
+ * Completion queue descriptor types
+ */
+enum cq_desc_types {
+	CQ_DESC_TYPE_WQ_ENET = 0,
+	CQ_DESC_TYPE_DESC_COPY = 1,
+	CQ_DESC_TYPE_WQ_EXCH = 2,
+	CQ_DESC_TYPE_RQ_ENET = 3,
+	CQ_DESC_TYPE_RQ_FCP = 4,
+};
+
+/* Completion queue descriptor: 16B
+ *
+ * All completion queues have this basic layout.  The
+ * type_specfic area is unique for each completion
+ * queue type.
+ */
+struct cq_desc {
+	__le16 completed_index;
+	__le16 q_number;
+	u8 type_specfic[11];
+	u8 type_color;
+};
+
+#define CQ_DESC_TYPE_BITS        4
+#define CQ_DESC_TYPE_MASK        ((1 << CQ_DESC_TYPE_BITS) - 1)
+#define CQ_DESC_COLOR_MASK       1
+#define CQ_DESC_COLOR_SHIFT      7
+#define CQ_DESC_Q_NUM_BITS       10
+#define CQ_DESC_Q_NUM_MASK       ((1 << CQ_DESC_Q_NUM_BITS) - 1)
+#define CQ_DESC_COMP_NDX_BITS    12
+#define CQ_DESC_COMP_NDX_MASK    ((1 << CQ_DESC_COMP_NDX_BITS) - 1)
+
+static inline void cq_desc_dec(const struct cq_desc *desc_arg,
+	u8 *type, u8 *color, u16 *q_number, u16 *completed_index)
+{
+	const struct cq_desc *desc = desc_arg;
+	const u8 type_color = desc->type_color;
+
+	*color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK;
+
+	/*
+	 * Make sure color bit is read from desc *before* other fields
+	 * are read from desc.  Hardware guarantees color bit is last
+	 * bit (byte) written.  Adding the rmb() prevents the compiler
+	 * and/or CPU from reordering the reads which would potentially
+	 * result in reading stale values.
+	 */
+
+	rmb();
+
+	*type = type_color & CQ_DESC_TYPE_MASK;
+	*q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK;
+	*completed_index = le16_to_cpu(desc->completed_index) &
+		CQ_DESC_COMP_NDX_MASK;
+}
+
+#endif /* _CQ_DESC_H_ */
diff --git a/drivers/scsi/fnic/cq_enet_desc.h b/drivers/scsi/fnic/cq_enet_desc.h
new file mode 100644
index 0000000..a9fa26f
--- /dev/null
+++ b/drivers/scsi/fnic/cq_enet_desc.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _CQ_ENET_DESC_H_
+#define _CQ_ENET_DESC_H_
+
+#include "cq_desc.h"
+
+/* Ethernet completion queue descriptor: 16B */
+struct cq_enet_wq_desc {
+	__le16 completed_index;
+	__le16 q_number;
+	u8 reserved[11];
+	u8 type_color;
+};
+
+static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc,
+	u8 *type, u8 *color, u16 *q_number, u16 *completed_index)
+{
+	cq_desc_dec((struct cq_desc *)desc, type,
+		color, q_number, completed_index);
+}
+
+/* Completion queue descriptor: Ethernet receive queue, 16B */
+struct cq_enet_rq_desc {
+	__le16 completed_index_flags;
+	__le16 q_number_rss_type_flags;
+	__le32 rss_hash;
+	__le16 bytes_written_flags;
+	__le16 vlan;
+	__le16 checksum_fcoe;
+	u8 flags;
+	u8 type_color;
+};
+
+#define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT          (0x1 << 12)
+#define CQ_ENET_RQ_DESC_FLAGS_FCOE                  (0x1 << 13)
+#define CQ_ENET_RQ_DESC_FLAGS_EOP                   (0x1 << 14)
+#define CQ_ENET_RQ_DESC_FLAGS_SOP                   (0x1 << 15)
+
+#define CQ_ENET_RQ_DESC_RSS_TYPE_BITS               4
+#define CQ_ENET_RQ_DESC_RSS_TYPE_MASK \
+	((1 << CQ_ENET_RQ_DESC_RSS_TYPE_BITS) - 1)
+#define CQ_ENET_RQ_DESC_RSS_TYPE_NONE               0
+#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv4               1
+#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4           2
+#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6               3
+#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6           4
+#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX            5
+#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX        6
+
+#define CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC         (0x1 << 14)
+
+#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS          14
+#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK \
+	((1 << CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS) - 1)
+#define CQ_ENET_RQ_DESC_FLAGS_TRUNCATED             (0x1 << 14)
+#define CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED         (0x1 << 15)
+
+#define CQ_ENET_RQ_DESC_FCOE_SOF_BITS               4
+#define CQ_ENET_RQ_DESC_FCOE_SOF_MASK \
+	((1 << CQ_ENET_RQ_DESC_FCOE_SOF_BITS) - 1)
+#define CQ_ENET_RQ_DESC_FCOE_EOF_BITS               8
+#define CQ_ENET_RQ_DESC_FCOE_EOF_MASK \
+	((1 << CQ_ENET_RQ_DESC_FCOE_EOF_BITS) - 1)
+#define CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT              8
+
+#define CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK       (0x1 << 0)
+#define CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK              (0x1 << 0)
+#define CQ_ENET_RQ_DESC_FLAGS_UDP                   (0x1 << 1)
+#define CQ_ENET_RQ_DESC_FCOE_ENC_ERROR              (0x1 << 1)
+#define CQ_ENET_RQ_DESC_FLAGS_TCP                   (0x1 << 2)
+#define CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK          (0x1 << 3)
+#define CQ_ENET_RQ_DESC_FLAGS_IPV6                  (0x1 << 4)
+#define CQ_ENET_RQ_DESC_FLAGS_IPV4                  (0x1 << 5)
+#define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT         (0x1 << 6)
+#define CQ_ENET_RQ_DESC_FLAGS_FCS_OK                (0x1 << 7)
+
+static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc,
+	u8 *type, u8 *color, u16 *q_number, u16 *completed_index,
+	u8 *ingress_port, u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type,
+	u8 *csum_not_calc, u32 *rss_hash, u16 *bytes_written, u8 *packet_error,
+	u8 *vlan_stripped, u16 *vlan, u16 *checksum, u8 *fcoe_sof,
+	u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, u8 *fcoe_eof,
+	u8 *tcp_udp_csum_ok, u8 *udp, u8 *tcp, u8 *ipv4_csum_ok,
+	u8 *ipv6, u8 *ipv4, u8 *ipv4_fragment, u8 *fcs_ok)
+{
+	u16 completed_index_flags = le16_to_cpu(desc->completed_index_flags);
+	u16 q_number_rss_type_flags =
+		le16_to_cpu(desc->q_number_rss_type_flags);
+	u16 bytes_written_flags = le16_to_cpu(desc->bytes_written_flags);
+
+	cq_desc_dec((struct cq_desc *)desc, type,
+		color, q_number, completed_index);
+
+	*ingress_port = (completed_index_flags &
+		CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0;
+	*fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ?
+		1 : 0;
+	*eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ?
+		1 : 0;
+	*sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ?
+		1 : 0;
+
+	*rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) &
+		CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
+	*csum_not_calc = (q_number_rss_type_flags &
+		CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0;
+
+	*rss_hash = le32_to_cpu(desc->rss_hash);
+
+	*bytes_written = bytes_written_flags &
+		CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+	*packet_error = (bytes_written_flags &
+		CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0;
+	*vlan_stripped = (bytes_written_flags &
+		CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0;
+
+	*vlan = le16_to_cpu(desc->vlan);
+
+	if (*fcoe) {
+		*fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) &
+			CQ_ENET_RQ_DESC_FCOE_SOF_MASK);
+		*fcoe_fc_crc_ok = (desc->flags &
+			CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0;
+		*fcoe_enc_error = (desc->flags &
+			CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0;
+		*fcoe_eof = (u8)((desc->checksum_fcoe >>
+			CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) &
+			CQ_ENET_RQ_DESC_FCOE_EOF_MASK);
+		*checksum = 0;
+	} else {
+		*fcoe_sof = 0;
+		*fcoe_fc_crc_ok = 0;
+		*fcoe_enc_error = 0;
+		*fcoe_eof = 0;
+		*checksum = le16_to_cpu(desc->checksum_fcoe);
+	}
+
+	*tcp_udp_csum_ok =
+		(desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0;
+	*udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0;
+	*tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0;
+	*ipv4_csum_ok =
+		(desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0;
+	*ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0;
+	*ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0;
+	*ipv4_fragment =
+		(desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0;
+	*fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0;
+}
+
+#endif /* _CQ_ENET_DESC_H_ */
diff --git a/drivers/scsi/fnic/cq_exch_desc.h b/drivers/scsi/fnic/cq_exch_desc.h
new file mode 100644
index 0000000..501660c
--- /dev/null
+++ b/drivers/scsi/fnic/cq_exch_desc.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _CQ_EXCH_DESC_H_
+#define _CQ_EXCH_DESC_H_
+
+#include "cq_desc.h"
+
+/* Exchange completion queue descriptor: 16B */
+struct cq_exch_wq_desc {
+	u16 completed_index;
+	u16 q_number;
+	u16 exchange_id;
+	u8  tmpl;
+	u8  reserved0;
+	u32 reserved1;
+	u8  exch_status;
+	u8  reserved2[2];
+	u8  type_color;
+};
+
+#define CQ_EXCH_WQ_STATUS_BITS      2
+#define CQ_EXCH_WQ_STATUS_MASK      ((1 << CQ_EXCH_WQ_STATUS_BITS) - 1)
+
+enum cq_exch_status_types {
+	CQ_EXCH_WQ_STATUS_TYPE_COMPLETE = 0,
+	CQ_EXCH_WQ_STATUS_TYPE_ABORT = 1,
+	CQ_EXCH_WQ_STATUS_TYPE_SGL_EOF = 2,
+	CQ_EXCH_WQ_STATUS_TYPE_TMPL_ERR = 3,
+};
+
+static inline void cq_exch_wq_desc_dec(struct cq_exch_wq_desc *desc_ptr,
+				       u8  *type,
+				       u8  *color,
+				       u16 *q_number,
+				       u16 *completed_index,
+				       u8  *exch_status)
+{
+	cq_desc_dec((struct cq_desc *)desc_ptr, type,
+		    color, q_number, completed_index);
+	*exch_status = desc_ptr->exch_status & CQ_EXCH_WQ_STATUS_MASK;
+}
+
+struct cq_fcp_rq_desc {
+	u16 completed_index_eop_sop_prt;
+	u16 q_number;
+	u16 exchange_id;
+	u16 tmpl;
+	u16 bytes_written;
+	u16 vlan;
+	u8  sof;
+	u8  eof;
+	u8  fcs_fer_fck;
+	u8  type_color;
+};
+
+#define CQ_FCP_RQ_DESC_FLAGS_SOP		(1 << 15)
+#define CQ_FCP_RQ_DESC_FLAGS_EOP		(1 << 14)
+#define CQ_FCP_RQ_DESC_FLAGS_PRT		(1 << 12)
+#define CQ_FCP_RQ_DESC_TMPL_MASK		0x1f
+#define CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK	0x3fff
+#define CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT		14
+#define CQ_FCP_RQ_DESC_PACKET_ERR_MASK (1 << CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT)
+#define CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT	15
+#define CQ_FCP_RQ_DESC_VS_STRIPPED_MASK (1 << CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT)
+#define CQ_FCP_RQ_DESC_FC_CRC_OK_MASK		0x1
+#define CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT		1
+#define CQ_FCP_RQ_DESC_FCOE_ERR_MASK (1 << CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT)
+#define CQ_FCP_RQ_DESC_FCS_OK_SHIFT		7
+#define CQ_FCP_RQ_DESC_FCS_OK_MASK (1 << CQ_FCP_RQ_DESC_FCS_OK_SHIFT)
+
+static inline void cq_fcp_rq_desc_dec(struct cq_fcp_rq_desc *desc_ptr,
+				      u8  *type,
+				      u8  *color,
+				      u16 *q_number,
+				      u16 *completed_index,
+				      u8  *eop,
+				      u8  *sop,
+				      u8  *fck,
+				      u16 *exchange_id,
+				      u16 *tmpl,
+				      u32 *bytes_written,
+				      u8  *sof,
+				      u8  *eof,
+				      u8  *ingress_port,
+				      u8  *packet_err,
+				      u8  *fcoe_err,
+				      u8  *fcs_ok,
+				      u8  *vlan_stripped,
+				      u16 *vlan)
+{
+	cq_desc_dec((struct cq_desc *)desc_ptr, type,
+		    color, q_number, completed_index);
+	*eop = (desc_ptr->completed_index_eop_sop_prt &
+		CQ_FCP_RQ_DESC_FLAGS_EOP) ? 1 : 0;
+	*sop = (desc_ptr->completed_index_eop_sop_prt &
+		CQ_FCP_RQ_DESC_FLAGS_SOP) ? 1 : 0;
+	*ingress_port =
+		(desc_ptr->completed_index_eop_sop_prt &
+		 CQ_FCP_RQ_DESC_FLAGS_PRT) ? 1 : 0;
+	*exchange_id = desc_ptr->exchange_id;
+	*tmpl = desc_ptr->tmpl & CQ_FCP_RQ_DESC_TMPL_MASK;
+	*bytes_written =
+		desc_ptr->bytes_written & CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK;
+	*packet_err =
+		(desc_ptr->bytes_written & CQ_FCP_RQ_DESC_PACKET_ERR_MASK) >>
+		CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT;
+	*vlan_stripped =
+		(desc_ptr->bytes_written & CQ_FCP_RQ_DESC_VS_STRIPPED_MASK) >>
+		CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT;
+	*vlan = desc_ptr->vlan;
+	*sof = desc_ptr->sof;
+	*fck = desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FC_CRC_OK_MASK;
+	*fcoe_err = (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCOE_ERR_MASK) >>
+		CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT;
+	*eof = desc_ptr->eof;
+	*fcs_ok =
+		(desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCS_OK_MASK) >>
+		CQ_FCP_RQ_DESC_FCS_OK_SHIFT;
+}
+
+struct cq_sgl_desc {
+	u16 exchange_id;
+	u16 q_number;
+	u32 active_burst_offset;
+	u32 tot_data_bytes;
+	u16 tmpl;
+	u8  sgl_err;
+	u8  type_color;
+};
+
+enum cq_sgl_err_types {
+	CQ_SGL_ERR_NO_ERROR = 0,
+	CQ_SGL_ERR_OVERFLOW,         /* data ran beyond end of SGL */
+	CQ_SGL_ERR_SGL_LCL_ADDR_ERR, /* sgl access to local vnic addr illegal*/
+	CQ_SGL_ERR_ADDR_RSP_ERR,     /* sgl address error */
+	CQ_SGL_ERR_DATA_RSP_ERR,     /* sgl data rsp error */
+	CQ_SGL_ERR_CNT_ZERO_ERR,     /* SGL count is 0 */
+	CQ_SGL_ERR_CNT_MAX_ERR,      /* SGL count is larger than supported */
+	CQ_SGL_ERR_ORDER_ERR,        /* frames recv on both ports, order err */
+	CQ_SGL_ERR_DATA_LCL_ADDR_ERR,/* sgl data buf to local vnic addr ill */
+	CQ_SGL_ERR_HOST_CQ_ERR,      /* host cq entry to local vnic addr ill */
+};
+
+#define CQ_SGL_SGL_ERR_MASK             0x1f
+#define CQ_SGL_TMPL_MASK                0x1f
+
+static inline void cq_sgl_desc_dec(struct cq_sgl_desc *desc_ptr,
+				   u8  *type,
+				   u8  *color,
+				   u16 *q_number,
+				   u16 *exchange_id,
+				   u32 *active_burst_offset,
+				   u32 *tot_data_bytes,
+				   u16 *tmpl,
+				   u8  *sgl_err)
+{
+	/* Cheat a little by assuming exchange_id is the same as completed
+	   index */
+	cq_desc_dec((struct cq_desc *)desc_ptr, type, color, q_number,
+		    exchange_id);
+	*active_burst_offset = desc_ptr->active_burst_offset;
+	*tot_data_bytes = desc_ptr->tot_data_bytes;
+	*tmpl = desc_ptr->tmpl & CQ_SGL_TMPL_MASK;
+	*sgl_err = desc_ptr->sgl_err & CQ_SGL_SGL_ERR_MASK;
+}
+
+#endif /* _CQ_EXCH_DESC_H_ */
diff --git a/drivers/scsi/fnic/fcpio.h b/drivers/scsi/fnic/fcpio.h
new file mode 100644
index 0000000..12d770d
--- /dev/null
+++ b/drivers/scsi/fnic/fcpio.h
@@ -0,0 +1,780 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _FCPIO_H_
+#define _FCPIO_H_
+
+#include <linux/if_ether.h>
+
+/*
+ * This header file includes all of the data structures used for
+ * communication by the host driver to the fcp firmware.
+ */
+
+/*
+ * Exchange and sequence id space allocated to the host driver
+ */
+#define FCPIO_HOST_EXCH_RANGE_START         0x1000
+#define FCPIO_HOST_EXCH_RANGE_END           0x1fff
+#define FCPIO_HOST_SEQ_ID_RANGE_START       0x80
+#define FCPIO_HOST_SEQ_ID_RANGE_END         0xff
+
+/*
+ * Command entry type
+ */
+enum fcpio_type {
+	/*
+	 * Initiator request types
+	 */
+	FCPIO_ICMND_16 = 0x1,
+	FCPIO_ICMND_32,
+	FCPIO_ICMND_CMPL,
+	FCPIO_ITMF,
+	FCPIO_ITMF_CMPL,
+
+	/*
+	 * Target request types
+	 */
+	FCPIO_TCMND_16 = 0x11,
+	FCPIO_TCMND_32,
+	FCPIO_TDATA,
+	FCPIO_TXRDY,
+	FCPIO_TRSP,
+	FCPIO_TDRSP_CMPL,
+	FCPIO_TTMF,
+	FCPIO_TTMF_ACK,
+	FCPIO_TABORT,
+	FCPIO_TABORT_CMPL,
+
+	/*
+	 * Misc request types
+	 */
+	FCPIO_ACK = 0x20,
+	FCPIO_RESET,
+	FCPIO_RESET_CMPL,
+	FCPIO_FLOGI_REG,
+	FCPIO_FLOGI_REG_CMPL,
+	FCPIO_ECHO,
+	FCPIO_ECHO_CMPL,
+	FCPIO_LUNMAP_CHNG,
+	FCPIO_LUNMAP_REQ,
+	FCPIO_LUNMAP_REQ_CMPL,
+	FCPIO_FLOGI_FIP_REG,
+	FCPIO_FLOGI_FIP_REG_CMPL,
+};
+
+/*
+ * Header status codes from the firmware
+ */
+enum fcpio_status {
+	FCPIO_SUCCESS = 0,              /* request was successful */
+
+	/*
+	 * If a request to the firmware is rejected, the original request
+	 * header will be returned with the status set to one of the following:
+	 */
+	FCPIO_INVALID_HEADER,    /* header contains invalid data */
+	FCPIO_OUT_OF_RESOURCE,   /* out of resources to complete request */
+	FCPIO_INVALID_PARAM,     /* some parameter in request is invalid */
+	FCPIO_REQ_NOT_SUPPORTED, /* request type is not supported */
+	FCPIO_IO_NOT_FOUND,      /* requested I/O was not found */
+
+	/*
+	 * Once a request is processed, the firmware will usually return
+	 * a cmpl message type.  In cases where errors occurred,
+	 * the header status field would be filled in with one of the following:
+	 */
+	FCPIO_ABORTED = 0x41,     /* request was aborted */
+	FCPIO_TIMEOUT,            /* request was timed out */
+	FCPIO_SGL_INVALID,        /* request was aborted due to sgl error */
+	FCPIO_MSS_INVALID,        /* request was aborted due to mss error */
+	FCPIO_DATA_CNT_MISMATCH,  /* recv/sent more/less data than exp. */
+	FCPIO_FW_ERR,             /* request was terminated due to fw error */
+	FCPIO_ITMF_REJECTED,      /* itmf req was rejected by remote node */
+	FCPIO_ITMF_FAILED,        /* itmf req was failed by remote node */
+	FCPIO_ITMF_INCORRECT_LUN, /* itmf req targeted incorrect LUN */
+	FCPIO_CMND_REJECTED,      /* request was invalid and rejected */
+	FCPIO_NO_PATH_AVAIL,      /* no paths to the lun was available */
+	FCPIO_PATH_FAILED,        /* i/o sent to current path failed */
+	FCPIO_LUNMAP_CHNG_PEND,   /* i/o rejected due to lunmap change */
+};
+
+/*
+ * The header command tag.  All host requests will use the "tag" field
+ * to mark commands with a unique tag.  When the firmware responds to
+ * a host request, it will copy the tag field into the response.
+ *
+ * The only firmware requests that will use the rx_id/ox_id fields instead
+ * of the tag field will be the target command and target task management
+ * requests.  These two requests do not have corresponding host requests
+ * since they come directly from the FC initiator on the network.
+ */
+struct fcpio_tag {
+	union {
+		u32 req_id;
+		struct {
+			u16 rx_id;
+			u16 ox_id;
+		} ex_id;
+	} u;
+};
+
+static inline void
+fcpio_tag_id_enc(struct fcpio_tag *tag, u32 id)
+{
+	tag->u.req_id = id;
+}
+
+static inline void
+fcpio_tag_id_dec(struct fcpio_tag *tag, u32 *id)
+{
+	*id = tag->u.req_id;
+}
+
+static inline void
+fcpio_tag_exid_enc(struct fcpio_tag *tag, u16 ox_id, u16 rx_id)
+{
+	tag->u.ex_id.rx_id = rx_id;
+	tag->u.ex_id.ox_id = ox_id;
+}
+
+static inline void
+fcpio_tag_exid_dec(struct fcpio_tag *tag, u16 *ox_id, u16 *rx_id)
+{
+	*rx_id = tag->u.ex_id.rx_id;
+	*ox_id = tag->u.ex_id.ox_id;
+}
+
+/*
+ * The header for an fcpio request, whether from the firmware or from the
+ * host driver
+ */
+struct fcpio_header {
+	u8            type;           /* enum fcpio_type */
+	u8            status;         /* header status entry */
+	u16           _resvd;         /* reserved */
+	struct fcpio_tag    tag;      /* header tag */
+};
+
+static inline void
+fcpio_header_enc(struct fcpio_header *hdr,
+		 u8 type, u8 status,
+		 struct fcpio_tag tag)
+{
+	hdr->type = type;
+	hdr->status = status;
+	hdr->_resvd = 0;
+	hdr->tag = tag;
+}
+
+static inline void
+fcpio_header_dec(struct fcpio_header *hdr,
+		 u8 *type, u8 *status,
+		 struct fcpio_tag *tag)
+{
+	*type = hdr->type;
+	*status = hdr->status;
+	*tag = hdr->tag;
+}
+
+#define CDB_16      16
+#define CDB_32      32
+#define LUN_ADDRESS 8
+
+/*
+ * fcpio_icmnd_16: host -> firmware request
+ *
+ * used for sending out an initiator SCSI 16-byte command
+ */
+struct fcpio_icmnd_16 {
+	u32	  lunmap_id;		/* index into lunmap table */
+	u8	  special_req_flags;	/* special exchange request flags */
+	u8	  _resvd0[3];	        /* reserved */
+	u32	  sgl_cnt;		/* scatter-gather list count */
+	u32	  sense_len;		/* sense buffer length */
+	u64	  sgl_addr;		/* scatter-gather list addr */
+	u64	  sense_addr;		/* sense buffer address */
+	u8	  crn;			/* SCSI Command Reference No. */
+	u8	  pri_ta;		/* SCSI Priority and Task attribute */
+	u8	  _resvd1;		/* reserved: should be 0 */
+	u8	  flags;		/* command flags */
+	u8	  scsi_cdb[CDB_16];	/* SCSI Cmnd Descriptor Block */
+	u32	  data_len;		/* length of data expected */
+	u8	  lun[LUN_ADDRESS];	/* FC vNIC only: LUN address */
+	u8	  _resvd2;		/* reserved */
+	u8	  d_id[3];		/* FC vNIC only: Target D_ID */
+	u16	  mss;			/* FC vNIC only: max burst */
+	u16	  _resvd3;		/* reserved */
+	u32	  r_a_tov;		/* FC vNIC only: Res. Alloc Timeout */
+	u32	  e_d_tov;	        /* FC vNIC only: Err Detect Timeout */
+};
+
+/*
+ * Special request flags
+ */
+#define FCPIO_ICMND_SRFLAG_RETRY 0x01   /* Enable Retry handling on exchange */
+
+/*
+ * Priority/Task Attribute settings
+ */
+#define FCPIO_ICMND_PTA_SIMPLE      0   /* simple task attribute */
+#define FCPIO_ICMND_PTA_HEADQ       1   /* head of queue task attribute */
+#define FCPIO_ICMND_PTA_ORDERED     2   /* ordered task attribute */
+#define FCPIO_ICMND_PTA_ACA         4   /* auto contingent allegiance */
+#define FCPIO_ICMND_PRI_SHIFT       3   /* priority field starts in bit 3 */
+
+/*
+ * Command flags
+ */
+#define FCPIO_ICMND_RDDATA      0x02    /* read data */
+#define FCPIO_ICMND_WRDATA      0x01    /* write data */
+
+/*
+ * fcpio_icmnd_32: host -> firmware request
+ *
+ * used for sending out an initiator SCSI 32-byte command
+ */
+struct fcpio_icmnd_32 {
+	u32   lunmap_id;              /* index into lunmap table */
+	u8    special_req_flags;      /* special exchange request flags */
+	u8    _resvd0[3];             /* reserved */
+	u32   sgl_cnt;                /* scatter-gather list count */
+	u32   sense_len;              /* sense buffer length */
+	u64   sgl_addr;               /* scatter-gather list addr */
+	u64   sense_addr;             /* sense buffer address */
+	u8    crn;                    /* SCSI Command Reference No. */
+	u8    pri_ta;                 /* SCSI Priority and Task attribute */
+	u8    _resvd1;                /* reserved: should be 0 */
+	u8    flags;                  /* command flags */
+	u8    scsi_cdb[CDB_32];       /* SCSI Cmnd Descriptor Block */
+	u32   data_len;               /* length of data expected */
+	u8    lun[LUN_ADDRESS];       /* FC vNIC only: LUN address */
+	u8    _resvd2;                /* reserved */
+	u8    d_id[3];		      /* FC vNIC only: Target D_ID */
+	u16   mss;                    /* FC vNIC only: max burst */
+	u16   _resvd3;                /* reserved */
+	u32   r_a_tov;                /* FC vNIC only: Res. Alloc Timeout */
+	u32   e_d_tov;                /* FC vNIC only: Error Detect Timeout */
+};
+
+/*
+ * fcpio_itmf: host -> firmware request
+ *
+ * used for requesting the firmware to abort a request and/or send out
+ * a task management function
+ *
+ * The t_tag field is only needed when the request type is ABT_TASK.
+ */
+struct fcpio_itmf {
+	u32   lunmap_id;              /* index into lunmap table */
+	u32   tm_req;                 /* SCSI Task Management request */
+	u32   t_tag;                  /* header tag of fcpio to be aborted */
+	u32   _resvd;                 /* _reserved */
+	u8    lun[LUN_ADDRESS];       /* FC vNIC only: LUN address */
+	u8    _resvd1;                /* reserved */
+	u8    d_id[3];		      /* FC vNIC only: Target D_ID */
+	u32   r_a_tov;                /* FC vNIC only: R_A_TOV in msec */
+	u32   e_d_tov;                /* FC vNIC only: E_D_TOV in msec */
+};
+
+/*
+ * Task Management request
+ */
+enum fcpio_itmf_tm_req_type {
+	FCPIO_ITMF_ABT_TASK_TERM = 0x01,    /* abort task and terminate */
+	FCPIO_ITMF_ABT_TASK,                /* abort task and issue abts */
+	FCPIO_ITMF_ABT_TASK_SET,            /* abort task set */
+	FCPIO_ITMF_CLR_TASK_SET,            /* clear task set */
+	FCPIO_ITMF_LUN_RESET,               /* logical unit reset task mgmt */
+	FCPIO_ITMF_CLR_ACA,                 /* Clear ACA condition */
+};
+
+/*
+ * fcpio_tdata: host -> firmware request
+ *
+ * used for requesting the firmware to send out a read data transfer for a
+ * target command
+ */
+struct fcpio_tdata {
+	u16   rx_id;                  /* FC rx_id of target command */
+	u16   flags;                  /* command flags */
+	u32   rel_offset;             /* data sequence relative offset */
+	u32   sgl_cnt;                /* scatter-gather list count */
+	u32   data_len;               /* length of data expected to send */
+	u64   sgl_addr;               /* scatter-gather list address */
+};
+
+/*
+ * Command flags
+ */
+#define FCPIO_TDATA_SCSI_RSP    0x01    /* send a scsi resp. after last frame */
+
+/*
+ * fcpio_txrdy: host -> firmware request
+ *
+ * used for requesting the firmware to send out a write data transfer for a
+ * target command
+ */
+struct fcpio_txrdy {
+	u16   rx_id;                  /* FC rx_id of target command */
+	u16   _resvd0;                /* reserved */
+	u32   rel_offset;             /* data sequence relative offset */
+	u32   sgl_cnt;                /* scatter-gather list count */
+	u32   data_len;               /* length of data expected to send */
+	u64   sgl_addr;               /* scatter-gather list address */
+};
+
+/*
+ * fcpio_trsp: host -> firmware request
+ *
+ * used for requesting the firmware to send out a response for a target
+ * command
+ */
+struct fcpio_trsp {
+	u16   rx_id;                  /* FC rx_id of target command */
+	u16   _resvd0;                /* reserved */
+	u32   sense_len;              /* sense data buffer length */
+	u64   sense_addr;             /* sense data buffer address */
+	u16   _resvd1;                /* reserved */
+	u8    flags;                  /* response request flags */
+	u8    scsi_status;            /* SCSI status */
+	u32   residual;               /* SCSI data residual value of I/O */
+};
+
+/*
+ * resposnse request flags
+ */
+#define FCPIO_TRSP_RESID_UNDER  0x08   /* residual is valid and is underflow */
+#define FCPIO_TRSP_RESID_OVER   0x04   /* residual is valid and is overflow */
+
+/*
+ * fcpio_ttmf_ack: host -> firmware response
+ *
+ * used by the host to indicate to the firmware it has received and processed
+ * the target tmf request
+ */
+struct fcpio_ttmf_ack {
+	u16   rx_id;                  /* FC rx_id of target command */
+	u16   _resvd0;                /* reserved */
+	u32   tmf_status;             /* SCSI task management status */
+};
+
+/*
+ * fcpio_tabort: host -> firmware request
+ *
+ * used by the host to request the firmware to abort a target request that was
+ * received by the firmware
+ */
+struct fcpio_tabort {
+	u16   rx_id;                  /* rx_id of the target request */
+};
+
+/*
+ * fcpio_reset: host -> firmware request
+ *
+ * used by the host to signal a reset of the driver to the firmware
+ * and to request firmware to clean up all outstanding I/O
+ */
+struct fcpio_reset {
+	u32   _resvd;
+};
+
+enum fcpio_flogi_reg_format_type {
+	FCPIO_FLOGI_REG_DEF_DEST = 0,    /* Use the oui | s_id mac format */
+	FCPIO_FLOGI_REG_GW_DEST,         /* Use the fixed gateway mac */
+};
+
+/*
+ * fcpio_flogi_reg: host -> firmware request
+ *
+ * fc vnic only
+ * used by the host to notify the firmware of the lif's s_id
+ * and destination mac address format
+ */
+struct fcpio_flogi_reg {
+	u8 format;
+	u8 s_id[3];			/* FC vNIC only: Source S_ID */
+	u8 gateway_mac[ETH_ALEN];	/* Destination gateway mac */
+	u16 _resvd;
+	u32 r_a_tov;			/* R_A_TOV in msec */
+	u32 e_d_tov;			/* E_D_TOV in msec */
+};
+
+/*
+ * fcpio_echo: host -> firmware request
+ *
+ * sends a heartbeat echo request to the firmware
+ */
+struct fcpio_echo {
+	u32 _resvd;
+};
+
+/*
+ * fcpio_lunmap_req: host -> firmware request
+ *
+ * scsi vnic only
+ * sends a request to retrieve the lunmap table for scsi vnics
+ */
+struct fcpio_lunmap_req {
+	u64 addr;                     /* address of the buffer */
+	u32 len;                      /* len of the buffer */
+};
+
+/*
+ * fcpio_flogi_fip_reg: host -> firmware request
+ *
+ * fc vnic only
+ * used by the host to notify the firmware of the lif's s_id
+ * and destination mac address format
+ */
+struct fcpio_flogi_fip_reg {
+	u8    _resvd0;
+	u8     s_id[3];               /* FC vNIC only: Source S_ID */
+	u8     fcf_mac[ETH_ALEN];     /* FCF Target destination mac */
+	u16   _resvd1;
+	u32   r_a_tov;                /* R_A_TOV in msec */
+	u32   e_d_tov;                /* E_D_TOV in msec */
+	u8    ha_mac[ETH_ALEN];       /* Host adapter source mac */
+	u16   _resvd2;
+};
+
+/*
+ * Basic structure for all fcpio structures that are sent from the host to the
+ * firmware.  They are 128 bytes per structure.
+ */
+#define FCPIO_HOST_REQ_LEN      128     /* expected length of host requests */
+
+struct fcpio_host_req {
+	struct fcpio_header hdr;
+
+	union {
+		/*
+		 * Defines space needed for request
+		 */
+		u8 buf[FCPIO_HOST_REQ_LEN - sizeof(struct fcpio_header)];
+
+		/*
+		 * Initiator host requests
+		 */
+		struct fcpio_icmnd_16               icmnd_16;
+		struct fcpio_icmnd_32               icmnd_32;
+		struct fcpio_itmf                   itmf;
+
+		/*
+		 * Target host requests
+		 */
+		struct fcpio_tdata                  tdata;
+		struct fcpio_txrdy                  txrdy;
+		struct fcpio_trsp                   trsp;
+		struct fcpio_ttmf_ack               ttmf_ack;
+		struct fcpio_tabort                 tabort;
+
+		/*
+		 * Misc requests
+		 */
+		struct fcpio_reset                  reset;
+		struct fcpio_flogi_reg              flogi_reg;
+		struct fcpio_echo                   echo;
+		struct fcpio_lunmap_req             lunmap_req;
+		struct fcpio_flogi_fip_reg          flogi_fip_reg;
+	} u;
+};
+
+/*
+ * fcpio_icmnd_cmpl: firmware -> host response
+ *
+ * used for sending the host a response to an initiator command
+ */
+struct fcpio_icmnd_cmpl {
+	u8    _resvd0[6];             /* reserved */
+	u8    flags;                  /* response flags */
+	u8    scsi_status;            /* SCSI status */
+	u32   residual;               /* SCSI data residual length */
+	u32   sense_len;              /* SCSI sense length */
+};
+
+/*
+ * response flags
+ */
+#define FCPIO_ICMND_CMPL_RESID_UNDER    0x08    /* resid under and valid */
+#define FCPIO_ICMND_CMPL_RESID_OVER     0x04    /* resid over and valid */
+
+/*
+ * fcpio_itmf_cmpl: firmware -> host response
+ *
+ * used for sending the host a response for a itmf request
+ */
+struct fcpio_itmf_cmpl {
+	u32    _resvd;                /* reserved */
+};
+
+/*
+ * fcpio_tcmnd_16: firmware -> host request
+ *
+ * used by the firmware to notify the host of an incoming target SCSI 16-Byte
+ * request
+ */
+struct fcpio_tcmnd_16 {
+	u8    lun[LUN_ADDRESS];       /* FC vNIC only: LUN address */
+	u8    crn;                    /* SCSI Command Reference No. */
+	u8    pri_ta;                 /* SCSI Priority and Task attribute */
+	u8    _resvd2;                /* reserved: should be 0 */
+	u8    flags;                  /* command flags */
+	u8    scsi_cdb[CDB_16];       /* SCSI Cmnd Descriptor Block */
+	u32   data_len;               /* length of data expected */
+	u8    _resvd1;                /* reserved */
+	u8    s_id[3];		      /* FC vNIC only: Source S_ID */
+};
+
+/*
+ * Priority/Task Attribute settings
+ */
+#define FCPIO_TCMND_PTA_SIMPLE      0   /* simple task attribute */
+#define FCPIO_TCMND_PTA_HEADQ       1   /* head of queue task attribute */
+#define FCPIO_TCMND_PTA_ORDERED     2   /* ordered task attribute */
+#define FCPIO_TCMND_PTA_ACA         4   /* auto contingent allegiance */
+#define FCPIO_TCMND_PRI_SHIFT       3   /* priority field starts in bit 3 */
+
+/*
+ * Command flags
+ */
+#define FCPIO_TCMND_RDDATA      0x02    /* read data */
+#define FCPIO_TCMND_WRDATA      0x01    /* write data */
+
+/*
+ * fcpio_tcmnd_32: firmware -> host request
+ *
+ * used by the firmware to notify the host of an incoming target SCSI 32-Byte
+ * request
+ */
+struct fcpio_tcmnd_32 {
+	u8    lun[LUN_ADDRESS];       /* FC vNIC only: LUN address */
+	u8    crn;                    /* SCSI Command Reference No. */
+	u8    pri_ta;                 /* SCSI Priority and Task attribute */
+	u8    _resvd2;                /* reserved: should be 0 */
+	u8    flags;                  /* command flags */
+	u8    scsi_cdb[CDB_32];       /* SCSI Cmnd Descriptor Block */
+	u32   data_len;               /* length of data expected */
+	u8    _resvd0;                /* reserved */
+	u8    s_id[3];		      /* FC vNIC only: Source S_ID */
+};
+
+/*
+ * fcpio_tdrsp_cmpl: firmware -> host response
+ *
+ * used by the firmware to notify the host of a response to a host target
+ * command
+ */
+struct fcpio_tdrsp_cmpl {
+	u16   rx_id;                  /* rx_id of the target request */
+	u16   _resvd0;                /* reserved */
+};
+
+/*
+ * fcpio_ttmf: firmware -> host request
+ *
+ * used by the firmware to notify the host of an incoming task management
+ * function request
+ */
+struct fcpio_ttmf {
+	u8    _resvd0;                /* reserved */
+	u8    s_id[3];		      /* FC vNIC only: Source S_ID */
+	u8    lun[LUN_ADDRESS];       /* FC vNIC only: LUN address */
+	u8    crn;                    /* SCSI Command Reference No. */
+	u8    _resvd2[3];             /* reserved */
+	u32   tmf_type;               /* task management request type */
+};
+
+/*
+ * Task Management request
+ */
+#define FCPIO_TTMF_CLR_ACA      0x40    /* Clear ACA condition */
+#define FCPIO_TTMF_LUN_RESET    0x10    /* logical unit reset task mgmt */
+#define FCPIO_TTMF_CLR_TASK_SET 0x04    /* clear task set */
+#define FCPIO_TTMF_ABT_TASK_SET 0x02    /* abort task set */
+#define FCPIO_TTMF_ABT_TASK     0x01    /* abort task */
+
+/*
+ * fcpio_tabort_cmpl: firmware -> host response
+ *
+ * used by the firmware to respond to a host's tabort request
+ */
+struct fcpio_tabort_cmpl {
+	u16   rx_id;                  /* rx_id of the target request */
+	u16   _resvd0;                /* reserved */
+};
+
+/*
+ * fcpio_ack: firmware -> host response
+ *
+ * used by firmware to notify the host of the last work request received
+ */
+struct fcpio_ack {
+	u16  request_out;             /* last host entry received */
+	u16  _resvd;
+};
+
+/*
+ * fcpio_reset_cmpl: firmware -> host response
+ *
+ * use by firmware to respond to the host's reset request
+ */
+struct fcpio_reset_cmpl {
+	u16   vnic_id;
+};
+
+/*
+ * fcpio_flogi_reg_cmpl: firmware -> host response
+ *
+ * fc vnic only
+ * response to the fcpio_flogi_reg request
+ */
+struct fcpio_flogi_reg_cmpl {
+	u32 _resvd;
+};
+
+/*
+ * fcpio_echo_cmpl: firmware -> host response
+ *
+ * response to the fcpio_echo request
+ */
+struct fcpio_echo_cmpl {
+	u32 _resvd;
+};
+
+/*
+ * fcpio_lunmap_chng: firmware -> host notification
+ *
+ * scsi vnic only
+ * notifies the host that the lunmap tables have changed
+ */
+struct fcpio_lunmap_chng {
+	u32 _resvd;
+};
+
+/*
+ * fcpio_lunmap_req_cmpl: firmware -> host response
+ *
+ * scsi vnic only
+ * response for lunmap table request from the host
+ */
+struct fcpio_lunmap_req_cmpl {
+	u32 _resvd;
+};
+
+/*
+ * Basic structure for all fcpio structures that are sent from the firmware to
+ * the host.  They are 64 bytes per structure.
+ */
+#define FCPIO_FW_REQ_LEN        64      /* expected length of fw requests */
+struct fcpio_fw_req {
+	struct fcpio_header hdr;
+
+	union {
+		/*
+		 * Defines space needed for request
+		 */
+		u8 buf[FCPIO_FW_REQ_LEN - sizeof(struct fcpio_header)];
+
+		/*
+		 * Initiator firmware responses
+		 */
+		struct fcpio_icmnd_cmpl         icmnd_cmpl;
+		struct fcpio_itmf_cmpl          itmf_cmpl;
+
+		/*
+		 * Target firmware new requests
+		 */
+		struct fcpio_tcmnd_16           tcmnd_16;
+		struct fcpio_tcmnd_32           tcmnd_32;
+
+		/*
+		 * Target firmware responses
+		 */
+		struct fcpio_tdrsp_cmpl         tdrsp_cmpl;
+		struct fcpio_ttmf               ttmf;
+		struct fcpio_tabort_cmpl        tabort_cmpl;
+
+		/*
+		 * Firmware response to work received
+		 */
+		struct fcpio_ack                ack;
+
+		/*
+		 * Misc requests
+		 */
+		struct fcpio_reset_cmpl         reset_cmpl;
+		struct fcpio_flogi_reg_cmpl     flogi_reg_cmpl;
+		struct fcpio_echo_cmpl          echo_cmpl;
+		struct fcpio_lunmap_chng        lunmap_chng;
+		struct fcpio_lunmap_req_cmpl    lunmap_req_cmpl;
+	} u;
+};
+
+/*
+ * Access routines to encode and decode the color bit, which is the most
+ * significant bit of the MSB of the structure
+ */
+static inline void fcpio_color_enc(struct fcpio_fw_req *fw_req, u8 color)
+{
+	u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1;
+
+	if (color)
+		*c |= 0x80;
+	else
+		*c &= ~0x80;
+}
+
+static inline void fcpio_color_dec(struct fcpio_fw_req *fw_req, u8 *color)
+{
+	u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1;
+
+	*color = *c >> 7;
+
+	/*
+	 * Make sure color bit is read from desc *before* other fields
+	 * are read from desc.  Hardware guarantees color bit is last
+	 * bit (byte) written.  Adding the rmb() prevents the compiler
+	 * and/or CPU from reordering the reads which would potentially
+	 * result in reading stale values.
+	 */
+
+	rmb();
+
+}
+
+/*
+ * Lunmap table entry for scsi vnics
+ */
+#define FCPIO_LUNMAP_TABLE_SIZE     256
+#define FCPIO_FLAGS_LUNMAP_VALID    0x80
+#define FCPIO_FLAGS_BOOT            0x01
+struct fcpio_lunmap_entry {
+	u8    bus;
+	u8    target;
+	u8    lun;
+	u8    path_cnt;
+	u16   flags;
+	u16   update_cnt;
+};
+
+struct fcpio_lunmap_tbl {
+	u32                   update_cnt;
+	struct fcpio_lunmap_entry   lunmaps[FCPIO_LUNMAP_TABLE_SIZE];
+};
+
+#endif /* _FCPIO_H_ */
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
new file mode 100644
index 0000000..e4c0a3d
--- /dev/null
+++ b/drivers/scsi/fnic/fnic.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _FNIC_H_
+#define _FNIC_H_
+
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <scsi/libfc.h>
+#include "fnic_io.h"
+#include "fnic_res.h"
+#include "vnic_dev.h"
+#include "vnic_wq.h"
+#include "vnic_rq.h"
+#include "vnic_cq.h"
+#include "vnic_wq_copy.h"
+#include "vnic_intr.h"
+#include "vnic_stats.h"
+#include "vnic_scsi.h"
+
+#define DRV_NAME		"fnic"
+#define DRV_DESCRIPTION		"Cisco FCoE HBA Driver"
+#define DRV_VERSION		"1.0.0.1121"
+#define PFX			DRV_NAME ": "
+#define DFX                     DRV_NAME "%d: "
+
+#define DESC_CLEAN_LOW_WATERMARK 8
+#define FNIC_MAX_IO_REQ		2048 /* scsi_cmnd tag map entries */
+#define	FNIC_IO_LOCKS		64 /* IO locks: power of 2 */
+#define FNIC_DFLT_QUEUE_DEPTH	32
+#define	FNIC_STATS_RATE_LIMIT	4 /* limit rate at which stats are pulled up */
+
+/*
+ * Tag bits used for special requests.
+ */
+#define BIT(nr)			(1UL << (nr))
+#define FNIC_TAG_ABORT		BIT(30)		/* tag bit indicating abort */
+#define FNIC_TAG_DEV_RST	BIT(29)		/* indicates device reset */
+#define FNIC_TAG_MASK		(BIT(24) - 1)	/* mask for lookup */
+#define FNIC_NO_TAG             -1
+
+/*
+ * Usage of the scsi_cmnd scratchpad.
+ * These fields are locked by the hashed io_req_lock.
+ */
+#define CMD_SP(Cmnd)		((Cmnd)->SCp.ptr)
+#define CMD_STATE(Cmnd)		((Cmnd)->SCp.phase)
+#define CMD_ABTS_STATUS(Cmnd)	((Cmnd)->SCp.Message)
+#define CMD_LR_STATUS(Cmnd)	((Cmnd)->SCp.have_data_in)
+#define CMD_TAG(Cmnd)           ((Cmnd)->SCp.sent_command)
+
+#define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */
+
+#define FNIC_LUN_RESET_TIMEOUT	     10000	/* mSec */
+#define FNIC_HOST_RESET_TIMEOUT	     10000	/* mSec */
+#define FNIC_RMDEVICE_TIMEOUT        1000       /* mSec */
+#define FNIC_HOST_RESET_SETTLE_TIME  30         /* Sec */
+
+#define FNIC_MAX_FCP_TARGET     256
+
+extern unsigned int fnic_log_level;
+
+#define FNIC_MAIN_LOGGING 0x01
+#define FNIC_FCS_LOGGING 0x02
+#define FNIC_SCSI_LOGGING 0x04
+#define FNIC_ISR_LOGGING 0x08
+
+#define FNIC_CHECK_LOGGING(LEVEL, CMD)				\
+do {								\
+	if (unlikely(fnic_log_level & LEVEL))			\
+		do {						\
+			CMD;					\
+		} while (0);					\
+} while (0)
+
+#define FNIC_MAIN_DBG(kern_level, host, fmt, args...)		\
+	FNIC_CHECK_LOGGING(FNIC_MAIN_LOGGING,			\
+			 shost_printk(kern_level, host, fmt, ##args);)
+
+#define FNIC_FCS_DBG(kern_level, host, fmt, args...)		\
+	FNIC_CHECK_LOGGING(FNIC_FCS_LOGGING,			\
+			 shost_printk(kern_level, host, fmt, ##args);)
+
+#define FNIC_SCSI_DBG(kern_level, host, fmt, args...)		\
+	FNIC_CHECK_LOGGING(FNIC_SCSI_LOGGING,			\
+			 shost_printk(kern_level, host, fmt, ##args);)
+
+#define FNIC_ISR_DBG(kern_level, host, fmt, args...)		\
+	FNIC_CHECK_LOGGING(FNIC_ISR_LOGGING,			\
+			 shost_printk(kern_level, host, fmt, ##args);)
+
+extern const char *fnic_state_str[];
+
+enum fnic_intx_intr_index {
+	FNIC_INTX_WQ_RQ_COPYWQ,
+	FNIC_INTX_ERR,
+	FNIC_INTX_NOTIFY,
+	FNIC_INTX_INTR_MAX,
+};
+
+enum fnic_msix_intr_index {
+	FNIC_MSIX_RQ,
+	FNIC_MSIX_WQ,
+	FNIC_MSIX_WQ_COPY,
+	FNIC_MSIX_ERR_NOTIFY,
+	FNIC_MSIX_INTR_MAX,
+};
+
+struct fnic_msix_entry {
+	int requested;
+	char devname[IFNAMSIZ];
+	irqreturn_t (*isr)(int, void *);
+	void *devid;
+};
+
+enum fnic_state {
+	FNIC_IN_FC_MODE = 0,
+	FNIC_IN_FC_TRANS_ETH_MODE,
+	FNIC_IN_ETH_MODE,
+	FNIC_IN_ETH_TRANS_FC_MODE,
+};
+
+#define FNIC_WQ_COPY_MAX 1
+#define FNIC_WQ_MAX 1
+#define FNIC_RQ_MAX 1
+#define FNIC_CQ_MAX (FNIC_WQ_COPY_MAX + FNIC_WQ_MAX + FNIC_RQ_MAX)
+
+struct mempool;
+
+/* Per-instance private data structure */
+struct fnic {
+	struct fc_lport *lport;
+	struct vnic_dev_bar bar0;
+
+	struct msix_entry msix_entry[FNIC_MSIX_INTR_MAX];
+	struct fnic_msix_entry msix[FNIC_MSIX_INTR_MAX];
+
+	struct vnic_stats *stats;
+	unsigned long stats_time;	/* time of stats update */
+	struct vnic_nic_cfg *nic_cfg;
+	char name[IFNAMSIZ];
+	struct timer_list notify_timer; /* used for MSI interrupts */
+
+	unsigned int err_intr_offset;
+	unsigned int link_intr_offset;
+
+	unsigned int wq_count;
+	unsigned int cq_count;
+
+	u32 fcoui_mode:1;		/* use fcoui address*/
+	u32 vlan_hw_insert:1;	        /* let hw insert the tag */
+	u32 in_remove:1;                /* fnic device in removal */
+	u32 stop_rx_link_events:1;      /* stop proc. rx frames, link events */
+
+	struct completion *remove_wait; /* device remove thread blocks */
+
+	struct fc_frame *flogi;
+	struct fc_frame *flogi_resp;
+	u16 flogi_oxid;
+	unsigned long s_id;
+	enum fnic_state state;
+	spinlock_t fnic_lock;
+
+	u16 vlan_id;	                /* VLAN tag including priority */
+	u8 mac_addr[ETH_ALEN];
+	u8 dest_addr[ETH_ALEN];
+	u8 data_src_addr[ETH_ALEN];
+	u64 fcp_input_bytes;		/* internal statistic */
+	u64 fcp_output_bytes;		/* internal statistic */
+	u32 link_down_cnt;
+	int link_status;
+
+	struct list_head list;
+	struct pci_dev *pdev;
+	struct vnic_fc_config config;
+	struct vnic_dev *vdev;
+	unsigned int raw_wq_count;
+	unsigned int wq_copy_count;
+	unsigned int rq_count;
+	int fw_ack_index[FNIC_WQ_COPY_MAX];
+	unsigned short fw_ack_recd[FNIC_WQ_COPY_MAX];
+	unsigned short wq_copy_desc_low[FNIC_WQ_COPY_MAX];
+	unsigned int intr_count;
+	u32 __iomem *legacy_pba;
+	struct fnic_host_tag *tags;
+	mempool_t *io_req_pool;
+	mempool_t *io_sgl_pool[FNIC_SGL_NUM_CACHES];
+	spinlock_t io_req_lock[FNIC_IO_LOCKS];	/* locks for scsi cmnds */
+
+	struct work_struct link_work;
+	struct work_struct frame_work;
+	struct sk_buff_head frame_queue;
+
+	/* copy work queue cache line section */
+	____cacheline_aligned struct vnic_wq_copy wq_copy[FNIC_WQ_COPY_MAX];
+	/* completion queue cache line section */
+	____cacheline_aligned struct vnic_cq cq[FNIC_CQ_MAX];
+
+	spinlock_t wq_copy_lock[FNIC_WQ_COPY_MAX];
+
+	/* work queue cache line section */
+	____cacheline_aligned struct vnic_wq wq[FNIC_WQ_MAX];
+	spinlock_t wq_lock[FNIC_WQ_MAX];
+
+	/* receive queue cache line section */
+	____cacheline_aligned struct vnic_rq rq[FNIC_RQ_MAX];
+
+	/* interrupt resource cache line section */
+	____cacheline_aligned struct vnic_intr intr[FNIC_MSIX_INTR_MAX];
+};
+
+extern struct workqueue_struct *fnic_event_queue;
+extern struct device_attribute *fnic_attrs[];
+
+void fnic_clear_intr_mode(struct fnic *fnic);
+int fnic_set_intr_mode(struct fnic *fnic);
+void fnic_free_intr(struct fnic *fnic);
+int fnic_request_intr(struct fnic *fnic);
+
+int fnic_send(struct fc_lport *, struct fc_frame *);
+void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf);
+void fnic_handle_frame(struct work_struct *work);
+void fnic_handle_link(struct work_struct *work);
+int fnic_rq_cmpl_handler(struct fnic *fnic, int);
+int fnic_alloc_rq_frame(struct vnic_rq *rq);
+void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf);
+int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp);
+
+int fnic_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *));
+int fnic_abort_cmd(struct scsi_cmnd *);
+int fnic_device_reset(struct scsi_cmnd *);
+int fnic_host_reset(struct scsi_cmnd *);
+int fnic_reset(struct Scsi_Host *);
+void fnic_scsi_cleanup(struct fc_lport *);
+void fnic_scsi_abort_io(struct fc_lport *);
+void fnic_empty_scsi_cleanup(struct fc_lport *);
+void fnic_exch_mgr_reset(struct fc_lport *, u32, u32);
+int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int);
+int fnic_wq_cmpl_handler(struct fnic *fnic, int);
+int fnic_flogi_reg_handler(struct fnic *fnic);
+void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
+				  struct fcpio_host_req *desc);
+int fnic_fw_reset_handler(struct fnic *fnic);
+void fnic_terminate_rport_io(struct fc_rport *);
+const char *fnic_state_to_str(unsigned int state);
+
+void fnic_log_q_error(struct fnic *fnic);
+void fnic_handle_link_event(struct fnic *fnic);
+
+#endif /* _FNIC_H_ */
diff --git a/drivers/scsi/fnic/fnic_attrs.c b/drivers/scsi/fnic/fnic_attrs.c
new file mode 100644
index 0000000..aea0c3b
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_attrs.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/string.h>
+#include <linux/device.h>
+#include <scsi/scsi_host.h>
+#include "fnic.h"
+
+static ssize_t fnic_show_state(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct fc_lport *lp = shost_priv(class_to_shost(dev));
+	struct fnic *fnic = lport_priv(lp);
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", fnic_state_str[fnic->state]);
+}
+
+static ssize_t fnic_show_drv_version(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
+}
+
+static ssize_t fnic_show_link_state(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct fc_lport *lp = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", (lp->link_up)
+			? "Link Up" : "Link Down");
+}
+
+static DEVICE_ATTR(fnic_state, S_IRUGO, fnic_show_state, NULL);
+static DEVICE_ATTR(drv_version, S_IRUGO, fnic_show_drv_version, NULL);
+static DEVICE_ATTR(link_state, S_IRUGO, fnic_show_link_state, NULL);
+
+struct device_attribute *fnic_attrs[] = {
+	&dev_attr_fnic_state,
+	&dev_attr_drv_version,
+	&dev_attr_link_state,
+	NULL,
+};
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
new file mode 100644
index 0000000..07e6eed
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -0,0 +1,742 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/workqueue.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_fcoe.h>
+#include <scsi/fc_frame.h>
+#include <scsi/libfc.h>
+#include "fnic_io.h"
+#include "fnic.h"
+#include "cq_enet_desc.h"
+#include "cq_exch_desc.h"
+
+struct workqueue_struct *fnic_event_queue;
+
+void fnic_handle_link(struct work_struct *work)
+{
+	struct fnic *fnic = container_of(work, struct fnic, link_work);
+	unsigned long flags;
+	int old_link_status;
+	u32 old_link_down_cnt;
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+
+	if (fnic->stop_rx_link_events) {
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		return;
+	}
+
+	old_link_down_cnt = fnic->link_down_cnt;
+	old_link_status = fnic->link_status;
+	fnic->link_status = vnic_dev_link_status(fnic->vdev);
+	fnic->link_down_cnt = vnic_dev_link_down_cnt(fnic->vdev);
+
+	if (old_link_status == fnic->link_status) {
+		if (!fnic->link_status)
+			/* DOWN -> DOWN */
+			spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		else {
+			if (old_link_down_cnt != fnic->link_down_cnt) {
+				/* UP -> DOWN -> UP */
+				fnic->lport->host_stats.link_failure_count++;
+				spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+				FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
+					     "link down\n");
+				fc_linkdown(fnic->lport);
+				FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
+					     "link up\n");
+				fc_linkup(fnic->lport);
+			} else
+				/* UP -> UP */
+				spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		}
+	} else if (fnic->link_status) {
+		/* DOWN -> UP */
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link up\n");
+		fc_linkup(fnic->lport);
+	} else {
+		/* UP -> DOWN */
+		fnic->lport->host_stats.link_failure_count++;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link down\n");
+		fc_linkdown(fnic->lport);
+	}
+
+}
+
+/*
+ * This function passes incoming fabric frames to libFC
+ */
+void fnic_handle_frame(struct work_struct *work)
+{
+	struct fnic *fnic = container_of(work, struct fnic, frame_work);
+	struct fc_lport *lp = fnic->lport;
+	unsigned long flags;
+	struct sk_buff *skb;
+	struct fc_frame *fp;
+
+	while ((skb = skb_dequeue(&fnic->frame_queue))) {
+
+		spin_lock_irqsave(&fnic->fnic_lock, flags);
+		if (fnic->stop_rx_link_events) {
+			spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+			dev_kfree_skb(skb);
+			return;
+		}
+		fp = (struct fc_frame *)skb;
+		/* if Flogi resp frame, register the address */
+		if (fr_flags(fp)) {
+			vnic_dev_add_addr(fnic->vdev,
+					  fnic->data_src_addr);
+			fr_flags(fp) = 0;
+		}
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+		fc_exch_recv(lp, lp->emp, fp);
+	}
+
+}
+
+static inline void fnic_import_rq_fc_frame(struct sk_buff *skb,
+					   u32 len, u8 sof, u8 eof)
+{
+	struct fc_frame *fp = (struct fc_frame *)skb;
+
+	skb_trim(skb, len);
+	fr_eof(fp) = eof;
+	fr_sof(fp) = sof;
+}
+
+
+static inline int fnic_import_rq_eth_pkt(struct sk_buff *skb, u32 len)
+{
+	struct fc_frame *fp;
+	struct ethhdr *eh;
+	struct vlan_ethhdr *vh;
+	struct fcoe_hdr *fcoe_hdr;
+	struct fcoe_crc_eof *ft;
+	u32    transport_len = 0;
+
+	eh = (struct ethhdr *)skb->data;
+	vh = (struct vlan_ethhdr *)skb->data;
+	if (vh->h_vlan_proto == htons(ETH_P_8021Q) &&
+	    vh->h_vlan_encapsulated_proto == htons(ETH_P_FCOE)) {
+		skb_pull(skb, sizeof(struct vlan_ethhdr));
+		transport_len += sizeof(struct vlan_ethhdr);
+	} else if (eh->h_proto == htons(ETH_P_FCOE)) {
+		transport_len += sizeof(struct ethhdr);
+		skb_pull(skb, sizeof(struct ethhdr));
+	} else
+		return -1;
+
+	fcoe_hdr = (struct fcoe_hdr *)skb->data;
+	if (FC_FCOE_DECAPS_VER(fcoe_hdr) != FC_FCOE_VER)
+		return -1;
+
+	fp = (struct fc_frame *)skb;
+	fc_frame_init(fp);
+	fr_sof(fp) = fcoe_hdr->fcoe_sof;
+	skb_pull(skb, sizeof(struct fcoe_hdr));
+	transport_len += sizeof(struct fcoe_hdr);
+
+	ft = (struct fcoe_crc_eof *)(skb->data + len -
+				     transport_len - sizeof(*ft));
+	fr_eof(fp) = ft->fcoe_eof;
+	skb_trim(skb, len - transport_len - sizeof(*ft));
+	return 0;
+}
+
+static inline int fnic_handle_flogi_resp(struct fnic *fnic,
+					 struct fc_frame *fp)
+{
+	u8 mac[ETH_ALEN] = FC_FCOE_FLOGI_MAC;
+	struct ethhdr *eth_hdr;
+	struct fc_frame_header *fh;
+	int ret = 0;
+	unsigned long flags;
+	struct fc_frame *old_flogi_resp = NULL;
+
+	fh = (struct fc_frame_header *)fr_hdr(fp);
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+
+	if (fnic->state == FNIC_IN_ETH_MODE) {
+
+		/*
+		 * Check if oxid matches on taking the lock. A new Flogi
+		 * issued by libFC might have changed the fnic cached oxid
+		 */
+		if (fnic->flogi_oxid != ntohs(fh->fh_ox_id)) {
+			FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
+				     "Flogi response oxid not"
+				     " matching cached oxid, dropping frame"
+				     "\n");
+			ret = -1;
+			spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+			dev_kfree_skb_irq(fp_skb(fp));
+			goto handle_flogi_resp_end;
+		}
+
+		/* Drop older cached flogi response frame, cache this frame */
+		old_flogi_resp = fnic->flogi_resp;
+		fnic->flogi_resp = fp;
+		fnic->flogi_oxid = FC_XID_UNKNOWN;
+
+		/*
+		 * this frame is part of flogi get the src mac addr from this
+		 * frame if the src mac is fcoui based then we mark the
+		 * address mode flag to use fcoui base for dst mac addr
+		 * otherwise we have to store the fcoe gateway addr
+		 */
+		eth_hdr = (struct ethhdr *)skb_mac_header(fp_skb(fp));
+		memcpy(mac, eth_hdr->h_source, ETH_ALEN);
+
+		if (ntoh24(mac) == FC_FCOE_OUI)
+			fnic->fcoui_mode = 1;
+		else {
+			fnic->fcoui_mode = 0;
+			memcpy(fnic->dest_addr, mac, ETH_ALEN);
+		}
+
+		/*
+		 * Except for Flogi frame, all outbound frames from us have the
+		 * Eth Src address as FC_FCOE_OUI"our_sid". Flogi frame uses
+		 * the vnic MAC address as the Eth Src address
+		 */
+		fc_fcoe_set_mac(fnic->data_src_addr, fh->fh_d_id);
+
+		/* We get our s_id from the d_id of the flogi resp frame */
+		fnic->s_id = ntoh24(fh->fh_d_id);
+
+		/* Change state to reflect transition from Eth to FC mode */
+		fnic->state = FNIC_IN_ETH_TRANS_FC_MODE;
+
+	} else {
+		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
+			     "Unexpected fnic state %s while"
+			     " processing flogi resp\n",
+			     fnic_state_to_str(fnic->state));
+		ret = -1;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		dev_kfree_skb_irq(fp_skb(fp));
+		goto handle_flogi_resp_end;
+	}
+
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	/* Drop older cached frame */
+	if (old_flogi_resp)
+		dev_kfree_skb_irq(fp_skb(old_flogi_resp));
+
+	/*
+	 * send flogi reg request to firmware, this will put the fnic in
+	 * in FC mode
+	 */
+	ret = fnic_flogi_reg_handler(fnic);
+
+	if (ret < 0) {
+		int free_fp = 1;
+		spin_lock_irqsave(&fnic->fnic_lock, flags);
+		/*
+		 * free the frame is some other thread is not
+		 * pointing to it
+		 */
+		if (fnic->flogi_resp != fp)
+			free_fp = 0;
+		else
+			fnic->flogi_resp = NULL;
+
+		if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE)
+			fnic->state = FNIC_IN_ETH_MODE;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		if (free_fp)
+			dev_kfree_skb_irq(fp_skb(fp));
+	}
+
+ handle_flogi_resp_end:
+	return ret;
+}
+
+/* Returns 1 for a response that matches cached flogi oxid */
+static inline int is_matching_flogi_resp_frame(struct fnic *fnic,
+					       struct fc_frame *fp)
+{
+	struct fc_frame_header *fh;
+	int ret = 0;
+	u32 f_ctl;
+
+	fh = fc_frame_header_get(fp);
+	f_ctl = ntoh24(fh->fh_f_ctl);
+
+	if (fnic->flogi_oxid == ntohs(fh->fh_ox_id) &&
+	    fh->fh_r_ctl == FC_RCTL_ELS_REP &&
+	    (f_ctl & (FC_FC_EX_CTX | FC_FC_SEQ_CTX)) == FC_FC_EX_CTX &&
+	    fh->fh_type == FC_TYPE_ELS)
+		ret = 1;
+
+	return ret;
+}
+
+static void fnic_rq_cmpl_frame_recv(struct vnic_rq *rq, struct cq_desc
+				    *cq_desc, struct vnic_rq_buf *buf,
+				    int skipped __attribute__((unused)),
+				    void *opaque)
+{
+	struct fnic *fnic = vnic_dev_priv(rq->vdev);
+	struct sk_buff *skb;
+	struct fc_frame *fp;
+	unsigned int eth_hdrs_stripped;
+	u8 type, color, eop, sop, ingress_port, vlan_stripped;
+	u8 fcoe = 0, fcoe_sof, fcoe_eof;
+	u8 fcoe_fc_crc_ok = 1, fcoe_enc_error = 0;
+	u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
+	u8 ipv6, ipv4, ipv4_fragment, rss_type, csum_not_calc;
+	u8 fcs_ok = 1, packet_error = 0;
+	u16 q_number, completed_index, bytes_written = 0, vlan, checksum;
+	u32 rss_hash;
+	u16 exchange_id, tmpl;
+	u8 sof = 0;
+	u8 eof = 0;
+	u32 fcp_bytes_written = 0;
+	unsigned long flags;
+
+	pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len,
+			 PCI_DMA_FROMDEVICE);
+	skb = buf->os_buf;
+	buf->os_buf = NULL;
+
+	cq_desc_dec(cq_desc, &type, &color, &q_number, &completed_index);
+	if (type == CQ_DESC_TYPE_RQ_FCP) {
+		cq_fcp_rq_desc_dec((struct cq_fcp_rq_desc *)cq_desc,
+				   &type, &color, &q_number, &completed_index,
+				   &eop, &sop, &fcoe_fc_crc_ok, &exchange_id,
+				   &tmpl, &fcp_bytes_written, &sof, &eof,
+				   &ingress_port, &packet_error,
+				   &fcoe_enc_error, &fcs_ok, &vlan_stripped,
+				   &vlan);
+		eth_hdrs_stripped = 1;
+
+	} else if (type == CQ_DESC_TYPE_RQ_ENET) {
+		cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
+				    &type, &color, &q_number, &completed_index,
+				    &ingress_port, &fcoe, &eop, &sop,
+				    &rss_type, &csum_not_calc, &rss_hash,
+				    &bytes_written, &packet_error,
+				    &vlan_stripped, &vlan, &checksum,
+				    &fcoe_sof, &fcoe_fc_crc_ok,
+				    &fcoe_enc_error, &fcoe_eof,
+				    &tcp_udp_csum_ok, &udp, &tcp,
+				    &ipv4_csum_ok, &ipv6, &ipv4,
+				    &ipv4_fragment, &fcs_ok);
+		eth_hdrs_stripped = 0;
+
+	} else {
+		/* wrong CQ type*/
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "fnic rq_cmpl wrong cq type x%x\n", type);
+		goto drop;
+	}
+
+	if (!fcs_ok || packet_error || !fcoe_fc_crc_ok || fcoe_enc_error) {
+		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
+			     "fnic rq_cmpl fcoe x%x fcsok x%x"
+			     " pkterr x%x fcoe_fc_crc_ok x%x, fcoe_enc_err"
+			     " x%x\n",
+			     fcoe, fcs_ok, packet_error,
+			     fcoe_fc_crc_ok, fcoe_enc_error);
+		goto drop;
+	}
+
+	if (eth_hdrs_stripped)
+		fnic_import_rq_fc_frame(skb, fcp_bytes_written, sof, eof);
+	else if (fnic_import_rq_eth_pkt(skb, bytes_written))
+		goto drop;
+
+	fp = (struct fc_frame *)skb;
+
+	/*
+	 * If frame is an ELS response that matches the cached FLOGI OX_ID,
+	 * and is accept, issue flogi_reg_request copy wq request to firmware
+	 * to register the S_ID and determine whether FC_OUI mode or GW mode.
+	 */
+	if (is_matching_flogi_resp_frame(fnic, fp)) {
+		if (!eth_hdrs_stripped) {
+			if (fc_frame_payload_op(fp) == ELS_LS_ACC) {
+				fnic_handle_flogi_resp(fnic, fp);
+				return;
+			}
+			/*
+			 * Recd. Flogi reject. No point registering
+			 * with fw, but forward to libFC
+			 */
+			goto forward;
+		}
+		goto drop;
+	}
+	if (!eth_hdrs_stripped)
+		goto drop;
+
+forward:
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	if (fnic->stop_rx_link_events) {
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		goto drop;
+	}
+	/* Use fr_flags to indicate whether succ. flogi resp or not */
+	fr_flags(fp) = 0;
+	fr_dev(fp) = fnic->lport;
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	skb_queue_tail(&fnic->frame_queue, skb);
+	queue_work(fnic_event_queue, &fnic->frame_work);
+
+	return;
+drop:
+	dev_kfree_skb_irq(skb);
+}
+
+static int fnic_rq_cmpl_handler_cont(struct vnic_dev *vdev,
+				     struct cq_desc *cq_desc, u8 type,
+				     u16 q_number, u16 completed_index,
+				     void *opaque)
+{
+	struct fnic *fnic = vnic_dev_priv(vdev);
+
+	vnic_rq_service(&fnic->rq[q_number], cq_desc, completed_index,
+			VNIC_RQ_RETURN_DESC, fnic_rq_cmpl_frame_recv,
+			NULL);
+	return 0;
+}
+
+int fnic_rq_cmpl_handler(struct fnic *fnic, int rq_work_to_do)
+{
+	unsigned int tot_rq_work_done = 0, cur_work_done;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < fnic->rq_count; i++) {
+		cur_work_done = vnic_cq_service(&fnic->cq[i], rq_work_to_do,
+						fnic_rq_cmpl_handler_cont,
+						NULL);
+		if (cur_work_done) {
+			err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame);
+			if (err)
+				shost_printk(KERN_ERR, fnic->lport->host,
+					     "fnic_alloc_rq_frame cant alloc"
+					     " frame\n");
+		}
+		tot_rq_work_done += cur_work_done;
+	}
+
+	return tot_rq_work_done;
+}
+
+/*
+ * This function is called once at init time to allocate and fill RQ
+ * buffers. Subsequently, it is called in the interrupt context after RQ
+ * buffer processing to replenish the buffers in the RQ
+ */
+int fnic_alloc_rq_frame(struct vnic_rq *rq)
+{
+	struct fnic *fnic = vnic_dev_priv(rq->vdev);
+	struct sk_buff *skb;
+	u16 len;
+	dma_addr_t pa;
+
+	len = FC_FRAME_HEADROOM + FC_MAX_FRAME + FC_FRAME_TAILROOM;
+	skb = dev_alloc_skb(len);
+	if (!skb) {
+		FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host,
+			     "Unable to allocate RQ sk_buff\n");
+		return -ENOMEM;
+	}
+	skb_reset_mac_header(skb);
+	skb_reset_transport_header(skb);
+	skb_reset_network_header(skb);
+	skb_put(skb, len);
+	pa = pci_map_single(fnic->pdev, skb->data, len, PCI_DMA_FROMDEVICE);
+	fnic_queue_rq_desc(rq, skb, pa, len);
+	return 0;
+}
+
+void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
+{
+	struct fc_frame *fp = buf->os_buf;
+	struct fnic *fnic = vnic_dev_priv(rq->vdev);
+
+	pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len,
+			 PCI_DMA_FROMDEVICE);
+
+	dev_kfree_skb(fp_skb(fp));
+	buf->os_buf = NULL;
+}
+
+static inline int is_flogi_frame(struct fc_frame_header *fh)
+{
+	return fh->fh_r_ctl == FC_RCTL_ELS_REQ && *(u8 *)(fh + 1) == ELS_FLOGI;
+}
+
+int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
+{
+	struct vnic_wq *wq = &fnic->wq[0];
+	struct sk_buff *skb;
+	dma_addr_t pa;
+	struct ethhdr *eth_hdr;
+	struct vlan_ethhdr *vlan_hdr;
+	struct fcoe_hdr *fcoe_hdr;
+	struct fc_frame_header *fh;
+	u32 tot_len, eth_hdr_len;
+	int ret = 0;
+	unsigned long flags;
+
+	fh = fc_frame_header_get(fp);
+	skb = fp_skb(fp);
+
+	if (!fnic->vlan_hw_insert) {
+		eth_hdr_len = sizeof(*vlan_hdr) + sizeof(*fcoe_hdr);
+		vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, eth_hdr_len);
+		eth_hdr = (struct ethhdr *)vlan_hdr;
+		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
+		vlan_hdr->h_vlan_encapsulated_proto = htons(ETH_P_FCOE);
+		vlan_hdr->h_vlan_TCI = htons(fnic->vlan_id);
+		fcoe_hdr = (struct fcoe_hdr *)(vlan_hdr + 1);
+	} else {
+		eth_hdr_len = sizeof(*eth_hdr) + sizeof(*fcoe_hdr);
+		eth_hdr = (struct ethhdr *)skb_push(skb, eth_hdr_len);
+		eth_hdr->h_proto = htons(ETH_P_FCOE);
+		fcoe_hdr = (struct fcoe_hdr *)(eth_hdr + 1);
+	}
+
+	if (is_flogi_frame(fh)) {
+		fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id);
+		memcpy(eth_hdr->h_source, fnic->mac_addr, ETH_ALEN);
+	} else {
+		if (fnic->fcoui_mode)
+			fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id);
+		else
+			memcpy(eth_hdr->h_dest, fnic->dest_addr, ETH_ALEN);
+		memcpy(eth_hdr->h_source, fnic->data_src_addr, ETH_ALEN);
+	}
+
+	tot_len = skb->len;
+	BUG_ON(tot_len % 4);
+
+	memset(fcoe_hdr, 0, sizeof(*fcoe_hdr));
+	fcoe_hdr->fcoe_sof = fr_sof(fp);
+	if (FC_FCOE_VER)
+		FC_FCOE_ENCAPS_VER(fcoe_hdr, FC_FCOE_VER);
+
+	pa = pci_map_single(fnic->pdev, eth_hdr, tot_len, PCI_DMA_TODEVICE);
+
+	spin_lock_irqsave(&fnic->wq_lock[0], flags);
+
+	if (!vnic_wq_desc_avail(wq)) {
+		pci_unmap_single(fnic->pdev, pa,
+				 tot_len, PCI_DMA_TODEVICE);
+		ret = -1;
+		goto fnic_send_frame_end;
+	}
+
+	fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp),
+			   fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1);
+fnic_send_frame_end:
+	spin_unlock_irqrestore(&fnic->wq_lock[0], flags);
+
+	if (ret)
+		dev_kfree_skb_any(fp_skb(fp));
+
+	return ret;
+}
+
+/*
+ * fnic_send
+ * Routine to send a raw frame
+ */
+int fnic_send(struct fc_lport *lp, struct fc_frame *fp)
+{
+	struct fnic *fnic = lport_priv(lp);
+	struct fc_frame_header *fh;
+	int ret = 0;
+	enum fnic_state old_state;
+	unsigned long flags;
+	struct fc_frame *old_flogi = NULL;
+	struct fc_frame *old_flogi_resp = NULL;
+
+	if (fnic->in_remove) {
+		dev_kfree_skb(fp_skb(fp));
+		ret = -1;
+		goto fnic_send_end;
+	}
+
+	fh = fc_frame_header_get(fp);
+	/* if not an Flogi frame, send it out, this is the common case */
+	if (!is_flogi_frame(fh))
+		return fnic_send_frame(fnic, fp);
+
+	/* Flogi frame, now enter the state machine */
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+again:
+	/* Get any old cached frames, free them after dropping lock */
+	old_flogi = fnic->flogi;
+	fnic->flogi = NULL;
+	old_flogi_resp = fnic->flogi_resp;
+	fnic->flogi_resp = NULL;
+
+	fnic->flogi_oxid = FC_XID_UNKNOWN;
+
+	old_state = fnic->state;
+	switch (old_state) {
+	case FNIC_IN_FC_MODE:
+	case FNIC_IN_ETH_TRANS_FC_MODE:
+	default:
+		fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
+		vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr);
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+		if (old_flogi) {
+			dev_kfree_skb(fp_skb(old_flogi));
+			old_flogi = NULL;
+		}
+		if (old_flogi_resp) {
+			dev_kfree_skb(fp_skb(old_flogi_resp));
+			old_flogi_resp = NULL;
+		}
+
+		ret = fnic_fw_reset_handler(fnic);
+
+		spin_lock_irqsave(&fnic->fnic_lock, flags);
+		if (fnic->state != FNIC_IN_FC_TRANS_ETH_MODE)
+			goto again;
+		if (ret) {
+			fnic->state = old_state;
+			spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+			dev_kfree_skb(fp_skb(fp));
+			goto fnic_send_end;
+		}
+		old_flogi = fnic->flogi;
+		fnic->flogi = fp;
+		fnic->flogi_oxid = ntohs(fh->fh_ox_id);
+		old_flogi_resp = fnic->flogi_resp;
+		fnic->flogi_resp = NULL;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		break;
+
+	case FNIC_IN_FC_TRANS_ETH_MODE:
+		/*
+		 * A reset is pending with the firmware. Store the flogi
+		 * and its oxid. The transition out of this state happens
+		 * only when Firmware completes the reset, either with
+		 * success or failed. If success, transition to
+		 * FNIC_IN_ETH_MODE, if fail, then transition to
+		 * FNIC_IN_FC_MODE
+		 */
+		fnic->flogi = fp;
+		fnic->flogi_oxid = ntohs(fh->fh_ox_id);
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		break;
+
+	case FNIC_IN_ETH_MODE:
+		/*
+		 * The fw/hw is already in eth mode. Store the oxid,
+		 * and send the flogi frame out. The transition out of this
+		 * state happens only we receive flogi response from the
+		 * network, and the oxid matches the cached oxid when the
+		 * flogi frame was sent out. If they match, then we issue
+		 * a flogi_reg request and transition to state
+		 * FNIC_IN_ETH_TRANS_FC_MODE
+		 */
+		fnic->flogi_oxid = ntohs(fh->fh_ox_id);
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		ret = fnic_send_frame(fnic, fp);
+		break;
+	}
+
+fnic_send_end:
+	if (old_flogi)
+		dev_kfree_skb(fp_skb(old_flogi));
+	if (old_flogi_resp)
+		dev_kfree_skb(fp_skb(old_flogi_resp));
+	return ret;
+}
+
+static void fnic_wq_complete_frame_send(struct vnic_wq *wq,
+					struct cq_desc *cq_desc,
+					struct vnic_wq_buf *buf, void *opaque)
+{
+	struct sk_buff *skb = buf->os_buf;
+	struct fc_frame *fp = (struct fc_frame *)skb;
+	struct fnic *fnic = vnic_dev_priv(wq->vdev);
+
+	pci_unmap_single(fnic->pdev, buf->dma_addr,
+			 buf->len, PCI_DMA_TODEVICE);
+	dev_kfree_skb_irq(fp_skb(fp));
+	buf->os_buf = NULL;
+}
+
+static int fnic_wq_cmpl_handler_cont(struct vnic_dev *vdev,
+				     struct cq_desc *cq_desc, u8 type,
+				     u16 q_number, u16 completed_index,
+				     void *opaque)
+{
+	struct fnic *fnic = vnic_dev_priv(vdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->wq_lock[q_number], flags);
+	vnic_wq_service(&fnic->wq[q_number], cq_desc, completed_index,
+			fnic_wq_complete_frame_send, NULL);
+	spin_unlock_irqrestore(&fnic->wq_lock[q_number], flags);
+
+	return 0;
+}
+
+int fnic_wq_cmpl_handler(struct fnic *fnic, int work_to_do)
+{
+	unsigned int wq_work_done = 0;
+	unsigned int i;
+
+	for (i = 0; i < fnic->raw_wq_count; i++) {
+		wq_work_done  += vnic_cq_service(&fnic->cq[fnic->rq_count+i],
+						 work_to_do,
+						 fnic_wq_cmpl_handler_cont,
+						 NULL);
+	}
+
+	return wq_work_done;
+}
+
+
+void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
+{
+	struct fc_frame *fp = buf->os_buf;
+	struct fnic *fnic = vnic_dev_priv(wq->vdev);
+
+	pci_unmap_single(fnic->pdev, buf->dma_addr,
+			 buf->len, PCI_DMA_TODEVICE);
+
+	dev_kfree_skb(fp_skb(fp));
+	buf->os_buf = NULL;
+}
diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h
new file mode 100644
index 0000000..f0b8969
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_io.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _FNIC_IO_H_
+#define _FNIC_IO_H_
+
+#include <scsi/fc/fc_fcp.h>
+
+#define FNIC_DFLT_SG_DESC_CNT  32
+#define FNIC_MAX_SG_DESC_CNT        1024    /* Maximum descriptors per sgl */
+#define FNIC_SG_DESC_ALIGN          16      /* Descriptor address alignment */
+
+struct host_sg_desc {
+	__le64 addr;
+	__le32 len;
+	u32 _resvd;
+};
+
+struct fnic_dflt_sgl_list {
+	struct host_sg_desc sg_desc[FNIC_DFLT_SG_DESC_CNT];
+};
+
+struct fnic_sgl_list {
+	struct host_sg_desc sg_desc[FNIC_MAX_SG_DESC_CNT];
+};
+
+enum fnic_sgl_list_type {
+	FNIC_SGL_CACHE_DFLT = 0,  /* cache with default size sgl */
+	FNIC_SGL_CACHE_MAX,       /* cache with max size sgl */
+	FNIC_SGL_NUM_CACHES       /* number of sgl caches */
+};
+
+enum fnic_ioreq_state {
+	FNIC_IOREQ_CMD_PENDING = 0,
+	FNIC_IOREQ_ABTS_PENDING,
+	FNIC_IOREQ_ABTS_COMPLETE,
+	FNIC_IOREQ_CMD_COMPLETE,
+};
+
+struct fnic_io_req {
+	struct host_sg_desc *sgl_list; /* sgl list */
+	void *sgl_list_alloc; /* sgl list address used for free */
+	dma_addr_t sense_buf_pa; /* dma address for sense buffer*/
+	dma_addr_t sgl_list_pa;	/* dma address for sgl list */
+	u16 sgl_cnt;
+	u8 sgl_type; /* device DMA descriptor list type */
+	u8 io_completed:1; /* set to 1 when fw completes IO */
+	u32 port_id; /* remote port DID */
+	struct completion *abts_done; /* completion for abts */
+	struct completion *dr_done; /* completion for device reset */
+};
+
+#endif /* _FNIC_IO_H_ */
diff --git a/drivers/scsi/fnic/fnic_isr.c b/drivers/scsi/fnic/fnic_isr.c
new file mode 100644
index 0000000..2b30648
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_isr.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <scsi/libfc.h>
+#include <scsi/fc_frame.h>
+#include "vnic_dev.h"
+#include "vnic_intr.h"
+#include "vnic_stats.h"
+#include "fnic_io.h"
+#include "fnic.h"
+
+static irqreturn_t fnic_isr_legacy(int irq, void *data)
+{
+	struct fnic *fnic = data;
+	u32 pba;
+	unsigned long work_done = 0;
+
+	pba = vnic_intr_legacy_pba(fnic->legacy_pba);
+	if (!pba)
+		return IRQ_NONE;
+
+	if (pba & (1 << FNIC_INTX_NOTIFY)) {
+		vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_NOTIFY]);
+		fnic_handle_link_event(fnic);
+	}
+
+	if (pba & (1 << FNIC_INTX_ERR)) {
+		vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_ERR]);
+		fnic_log_q_error(fnic);
+	}
+
+	if (pba & (1 << FNIC_INTX_WQ_RQ_COPYWQ)) {
+		work_done += fnic_wq_copy_cmpl_handler(fnic, 8);
+		work_done += fnic_wq_cmpl_handler(fnic, 4);
+		work_done += fnic_rq_cmpl_handler(fnic, 4);
+
+		vnic_intr_return_credits(&fnic->intr[FNIC_INTX_WQ_RQ_COPYWQ],
+					 work_done,
+					 1 /* unmask intr */,
+					 1 /* reset intr timer */);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fnic_isr_msi(int irq, void *data)
+{
+	struct fnic *fnic = data;
+	unsigned long work_done = 0;
+
+	work_done += fnic_wq_copy_cmpl_handler(fnic, 8);
+	work_done += fnic_wq_cmpl_handler(fnic, 4);
+	work_done += fnic_rq_cmpl_handler(fnic, 4);
+
+	vnic_intr_return_credits(&fnic->intr[0],
+				 work_done,
+				 1 /* unmask intr */,
+				 1 /* reset intr timer */);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fnic_isr_msix_rq(int irq, void *data)
+{
+	struct fnic *fnic = data;
+	unsigned long rq_work_done = 0;
+
+	rq_work_done = fnic_rq_cmpl_handler(fnic, 4);
+	vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_RQ],
+				 rq_work_done,
+				 1 /* unmask intr */,
+				 1 /* reset intr timer */);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fnic_isr_msix_wq(int irq, void *data)
+{
+	struct fnic *fnic = data;
+	unsigned long wq_work_done = 0;
+
+	wq_work_done = fnic_wq_cmpl_handler(fnic, 4);
+	vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ],
+				 wq_work_done,
+				 1 /* unmask intr */,
+				 1 /* reset intr timer */);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fnic_isr_msix_wq_copy(int irq, void *data)
+{
+	struct fnic *fnic = data;
+	unsigned long wq_copy_work_done = 0;
+
+	wq_copy_work_done = fnic_wq_copy_cmpl_handler(fnic, 8);
+	vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ_COPY],
+				 wq_copy_work_done,
+				 1 /* unmask intr */,
+				 1 /* reset intr timer */);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fnic_isr_msix_err_notify(int irq, void *data)
+{
+	struct fnic *fnic = data;
+
+	vnic_intr_return_all_credits(&fnic->intr[FNIC_MSIX_ERR_NOTIFY]);
+	fnic_log_q_error(fnic);
+	fnic_handle_link_event(fnic);
+
+	return IRQ_HANDLED;
+}
+
+void fnic_free_intr(struct fnic *fnic)
+{
+	int i;
+
+	switch (vnic_dev_get_intr_mode(fnic->vdev)) {
+	case VNIC_DEV_INTR_MODE_INTX:
+	case VNIC_DEV_INTR_MODE_MSI:
+		free_irq(fnic->pdev->irq, fnic);
+		break;
+
+	case VNIC_DEV_INTR_MODE_MSIX:
+		for (i = 0; i < ARRAY_SIZE(fnic->msix); i++)
+			if (fnic->msix[i].requested)
+				free_irq(fnic->msix_entry[i].vector,
+					 fnic->msix[i].devid);
+		break;
+
+	default:
+		break;
+	}
+}
+
+int fnic_request_intr(struct fnic *fnic)
+{
+	int err = 0;
+	int i;
+
+	switch (vnic_dev_get_intr_mode(fnic->vdev)) {
+
+	case VNIC_DEV_INTR_MODE_INTX:
+		err = request_irq(fnic->pdev->irq, &fnic_isr_legacy,
+				  IRQF_SHARED, DRV_NAME, fnic);
+		break;
+
+	case VNIC_DEV_INTR_MODE_MSI:
+		err = request_irq(fnic->pdev->irq, &fnic_isr_msi,
+				  0, fnic->name, fnic);
+		break;
+
+	case VNIC_DEV_INTR_MODE_MSIX:
+
+		sprintf(fnic->msix[FNIC_MSIX_RQ].devname,
+			"%.11s-fcs-rq", fnic->name);
+		fnic->msix[FNIC_MSIX_RQ].isr = fnic_isr_msix_rq;
+		fnic->msix[FNIC_MSIX_RQ].devid = fnic;
+
+		sprintf(fnic->msix[FNIC_MSIX_WQ].devname,
+			"%.11s-fcs-wq", fnic->name);
+		fnic->msix[FNIC_MSIX_WQ].isr = fnic_isr_msix_wq;
+		fnic->msix[FNIC_MSIX_WQ].devid = fnic;
+
+		sprintf(fnic->msix[FNIC_MSIX_WQ_COPY].devname,
+			"%.11s-scsi-wq", fnic->name);
+		fnic->msix[FNIC_MSIX_WQ_COPY].isr = fnic_isr_msix_wq_copy;
+		fnic->msix[FNIC_MSIX_WQ_COPY].devid = fnic;
+
+		sprintf(fnic->msix[FNIC_MSIX_ERR_NOTIFY].devname,
+			"%.11s-err-notify", fnic->name);
+		fnic->msix[FNIC_MSIX_ERR_NOTIFY].isr =
+			fnic_isr_msix_err_notify;
+		fnic->msix[FNIC_MSIX_ERR_NOTIFY].devid = fnic;
+
+		for (i = 0; i < ARRAY_SIZE(fnic->msix); i++) {
+			err = request_irq(fnic->msix_entry[i].vector,
+					  fnic->msix[i].isr, 0,
+					  fnic->msix[i].devname,
+					  fnic->msix[i].devid);
+			if (err) {
+				shost_printk(KERN_ERR, fnic->lport->host,
+					     "MSIX: request_irq"
+					     " failed %d\n", err);
+				fnic_free_intr(fnic);
+				break;
+			}
+			fnic->msix[i].requested = 1;
+		}
+		break;
+
+	default:
+		break;
+	}
+
+	return err;
+}
+
+int fnic_set_intr_mode(struct fnic *fnic)
+{
+	unsigned int n = ARRAY_SIZE(fnic->rq);
+	unsigned int m = ARRAY_SIZE(fnic->wq);
+	unsigned int o = ARRAY_SIZE(fnic->wq_copy);
+	unsigned int i;
+
+	/*
+	 * Set interrupt mode (INTx, MSI, MSI-X) depending
+	 * system capabilities.
+	 *
+	 * Try MSI-X first
+	 *
+	 * We need n RQs, m WQs, o Copy WQs, n+m+o CQs, and n+m+o+1 INTRs
+	 * (last INTR is used for WQ/RQ errors and notification area)
+	 */
+
+	BUG_ON(ARRAY_SIZE(fnic->msix_entry) < n + m + o + 1);
+	for (i = 0; i < n + m + o + 1; i++)
+		fnic->msix_entry[i].entry = i;
+
+	if (fnic->rq_count >= n &&
+	    fnic->raw_wq_count >= m &&
+	    fnic->wq_copy_count >= o &&
+	    fnic->cq_count >= n + m + o) {
+		if (!pci_enable_msix(fnic->pdev, fnic->msix_entry,
+				    n + m + o + 1)) {
+			fnic->rq_count = n;
+			fnic->raw_wq_count = m;
+			fnic->wq_copy_count = o;
+			fnic->wq_count = m + o;
+			fnic->cq_count = n + m + o;
+			fnic->intr_count = n + m + o + 1;
+			fnic->err_intr_offset = FNIC_MSIX_ERR_NOTIFY;
+
+			FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host,
+				     "Using MSI-X Interrupts\n");
+			vnic_dev_set_intr_mode(fnic->vdev,
+					       VNIC_DEV_INTR_MODE_MSIX);
+			return 0;
+		}
+	}
+
+	/*
+	 * Next try MSI
+	 * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 1 INTR
+	 */
+	if (fnic->rq_count >= 1 &&
+	    fnic->raw_wq_count >= 1 &&
+	    fnic->wq_copy_count >= 1 &&
+	    fnic->cq_count >= 3 &&
+	    fnic->intr_count >= 1 &&
+	    !pci_enable_msi(fnic->pdev)) {
+
+		fnic->rq_count = 1;
+		fnic->raw_wq_count = 1;
+		fnic->wq_copy_count = 1;
+		fnic->wq_count = 2;
+		fnic->cq_count = 3;
+		fnic->intr_count = 1;
+		fnic->err_intr_offset = 0;
+
+		FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host,
+			     "Using MSI Interrupts\n");
+		vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_MSI);
+
+		return 0;
+	}
+
+	/*
+	 * Next try INTx
+	 * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 3 INTRs
+	 * 1 INTR is used for all 3 queues, 1 INTR for queue errors
+	 * 1 INTR for notification area
+	 */
+
+	if (fnic->rq_count >= 1 &&
+	    fnic->raw_wq_count >= 1 &&
+	    fnic->wq_copy_count >= 1 &&
+	    fnic->cq_count >= 3 &&
+	    fnic->intr_count >= 3) {
+
+		fnic->rq_count = 1;
+		fnic->raw_wq_count = 1;
+		fnic->wq_copy_count = 1;
+		fnic->cq_count = 3;
+		fnic->intr_count = 3;
+
+		FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host,
+			     "Using Legacy Interrupts\n");
+		vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX);
+
+		return 0;
+	}
+
+	vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
+
+	return -EINVAL;
+}
+
+void fnic_clear_intr_mode(struct fnic *fnic)
+{
+	switch (vnic_dev_get_intr_mode(fnic->vdev)) {
+	case VNIC_DEV_INTR_MODE_MSIX:
+		pci_disable_msix(fnic->pdev);
+		break;
+	case VNIC_DEV_INTR_MODE_MSI:
+		pci_disable_msi(fnic->pdev);
+		break;
+	default:
+		break;
+	}
+
+	vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX);
+}
+
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
new file mode 100644
index 0000000..32ef6b8
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -0,0 +1,942 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/mempool.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/libfc.h>
+#include <scsi/fc_frame.h>
+
+#include "vnic_dev.h"
+#include "vnic_intr.h"
+#include "vnic_stats.h"
+#include "fnic_io.h"
+#include "fnic.h"
+
+#define PCI_DEVICE_ID_CISCO_FNIC	0x0045
+
+/* Timer to poll notification area for events. Used for MSI interrupts */
+#define FNIC_NOTIFY_TIMER_PERIOD	(2 * HZ)
+
+static struct kmem_cache *fnic_sgl_cache[FNIC_SGL_NUM_CACHES];
+static struct kmem_cache *fnic_io_req_cache;
+LIST_HEAD(fnic_list);
+DEFINE_SPINLOCK(fnic_list_lock);
+
+/* Supported devices by fnic module */
+static struct pci_device_id fnic_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_FNIC) },
+	{ 0, }
+};
+
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
+MODULE_AUTHOR("Abhijeet Joglekar <abjoglek@cisco.com>, "
+	      "Joseph R. Eykholt <jeykholt@cisco.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, fnic_id_table);
+
+unsigned int fnic_log_level;
+module_param(fnic_log_level, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels");
+
+
+static struct libfc_function_template fnic_transport_template = {
+	.frame_send = fnic_send,
+	.fcp_abort_io = fnic_empty_scsi_cleanup,
+	.fcp_cleanup = fnic_empty_scsi_cleanup,
+	.exch_mgr_reset = fnic_exch_mgr_reset
+};
+
+static int fnic_slave_alloc(struct scsi_device *sdev)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	struct fc_lport *lp = shost_priv(sdev->host);
+	struct fnic *fnic = lport_priv(lp);
+
+	sdev->tagged_supported = 1;
+
+	if (!rport || fc_remote_port_chkready(rport))
+		return -ENXIO;
+
+	scsi_activate_tcq(sdev, FNIC_DFLT_QUEUE_DEPTH);
+	rport->dev_loss_tmo = fnic->config.port_down_timeout / 1000;
+
+	return 0;
+}
+
+static struct scsi_host_template fnic_host_template = {
+	.module = THIS_MODULE,
+	.name = DRV_NAME,
+	.queuecommand = fnic_queuecommand,
+	.eh_abort_handler = fnic_abort_cmd,
+	.eh_device_reset_handler = fnic_device_reset,
+	.eh_host_reset_handler = fnic_host_reset,
+	.slave_alloc = fnic_slave_alloc,
+	.change_queue_depth = fc_change_queue_depth,
+	.change_queue_type = fc_change_queue_type,
+	.this_id = -1,
+	.cmd_per_lun = 3,
+	.can_queue = FNIC_MAX_IO_REQ,
+	.use_clustering = ENABLE_CLUSTERING,
+	.sg_tablesize = FNIC_MAX_SG_DESC_CNT,
+	.max_sectors = 0xffff,
+	.shost_attrs = fnic_attrs,
+};
+
+static void fnic_get_host_speed(struct Scsi_Host *shost);
+static struct scsi_transport_template *fnic_fc_transport;
+static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *);
+
+static struct fc_function_template fnic_fc_functions = {
+
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fnic_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.show_rport_dev_loss_tmo = 1,
+	.issue_fc_host_lip = fnic_reset,
+	.get_fc_host_stats = fnic_get_stats,
+	.dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
+	.terminate_rport_io = fnic_terminate_rport_io,
+};
+
+static void fnic_get_host_speed(struct Scsi_Host *shost)
+{
+	struct fc_lport *lp = shost_priv(shost);
+	struct fnic *fnic = lport_priv(lp);
+	u32 port_speed = vnic_dev_port_speed(fnic->vdev);
+
+	/* Add in other values as they get defined in fw */
+	switch (port_speed) {
+	case 10000:
+		fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
+		break;
+	default:
+		fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
+		break;
+	}
+}
+
+static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *host)
+{
+	int ret;
+	struct fc_lport *lp = shost_priv(host);
+	struct fnic *fnic = lport_priv(lp);
+	struct fc_host_statistics *stats = &lp->host_stats;
+	struct vnic_stats *vs;
+	unsigned long flags;
+
+	if (time_before(jiffies, fnic->stats_time + HZ / FNIC_STATS_RATE_LIMIT))
+		return stats;
+	fnic->stats_time = jiffies;
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	ret = vnic_dev_stats_dump(fnic->vdev, &fnic->stats);
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	if (ret) {
+		FNIC_MAIN_DBG(KERN_DEBUG, fnic->lport->host,
+			      "fnic: Get vnic stats failed"
+			      " 0x%x", ret);
+		return stats;
+	}
+	vs = fnic->stats;
+	stats->tx_frames = vs->tx.tx_unicast_frames_ok;
+	stats->tx_words  = vs->tx.tx_unicast_bytes_ok / 4;
+	stats->rx_frames = vs->rx.rx_unicast_frames_ok;
+	stats->rx_words  = vs->rx.rx_unicast_bytes_ok / 4;
+	stats->error_frames = vs->tx.tx_errors + vs->rx.rx_errors;
+	stats->dumped_frames = vs->tx.tx_drops + vs->rx.rx_drop;
+	stats->invalid_crc_count = vs->rx.rx_crc_errors;
+	stats->seconds_since_last_reset = (jiffies - lp->boot_time) / HZ;
+	stats->fcp_input_megabytes = div_u64(fnic->fcp_input_bytes, 1000000);
+	stats->fcp_output_megabytes = div_u64(fnic->fcp_output_bytes, 1000000);
+
+	return stats;
+}
+
+void fnic_log_q_error(struct fnic *fnic)
+{
+	unsigned int i;
+	u32 error_status;
+
+	for (i = 0; i < fnic->raw_wq_count; i++) {
+		error_status = ioread32(&fnic->wq[i].ctrl->error_status);
+		if (error_status)
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "WQ[%d] error_status"
+				     " %d\n", i, error_status);
+	}
+
+	for (i = 0; i < fnic->rq_count; i++) {
+		error_status = ioread32(&fnic->rq[i].ctrl->error_status);
+		if (error_status)
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "RQ[%d] error_status"
+				     " %d\n", i, error_status);
+	}
+
+	for (i = 0; i < fnic->wq_copy_count; i++) {
+		error_status = ioread32(&fnic->wq_copy[i].ctrl->error_status);
+		if (error_status)
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "CWQ[%d] error_status"
+				     " %d\n", i, error_status);
+	}
+}
+
+void fnic_handle_link_event(struct fnic *fnic)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	if (fnic->stop_rx_link_events) {
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	queue_work(fnic_event_queue, &fnic->link_work);
+
+}
+
+static int fnic_notify_set(struct fnic *fnic)
+{
+	int err;
+
+	switch (vnic_dev_get_intr_mode(fnic->vdev)) {
+	case VNIC_DEV_INTR_MODE_INTX:
+		err = vnic_dev_notify_set(fnic->vdev, FNIC_INTX_NOTIFY);
+		break;
+	case VNIC_DEV_INTR_MODE_MSI:
+		err = vnic_dev_notify_set(fnic->vdev, -1);
+		break;
+	case VNIC_DEV_INTR_MODE_MSIX:
+		err = vnic_dev_notify_set(fnic->vdev, FNIC_MSIX_ERR_NOTIFY);
+		break;
+	default:
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Interrupt mode should be set up"
+			     " before devcmd notify set %d\n",
+			     vnic_dev_get_intr_mode(fnic->vdev));
+		err = -1;
+		break;
+	}
+
+	return err;
+}
+
+static void fnic_notify_timer(unsigned long data)
+{
+	struct fnic *fnic = (struct fnic *)data;
+
+	fnic_handle_link_event(fnic);
+	mod_timer(&fnic->notify_timer,
+		  round_jiffies(jiffies + FNIC_NOTIFY_TIMER_PERIOD));
+}
+
+static void fnic_notify_timer_start(struct fnic *fnic)
+{
+	switch (vnic_dev_get_intr_mode(fnic->vdev)) {
+	case VNIC_DEV_INTR_MODE_MSI:
+		/*
+		 * Schedule first timeout immediately. The driver is
+		 * initiatialized and ready to look for link up notification
+		 */
+		mod_timer(&fnic->notify_timer, jiffies);
+		break;
+	default:
+		/* Using intr for notification for INTx/MSI-X */
+		break;
+	};
+}
+
+static int fnic_dev_wait(struct vnic_dev *vdev,
+			 int (*start)(struct vnic_dev *, int),
+			 int (*finished)(struct vnic_dev *, int *),
+			 int arg)
+{
+	unsigned long time;
+	int done;
+	int err;
+
+	err = start(vdev, arg);
+	if (err)
+		return err;
+
+	/* Wait for func to complete...2 seconds max */
+	time = jiffies + (HZ * 2);
+	do {
+		err = finished(vdev, &done);
+		if (err)
+			return err;
+		if (done)
+			return 0;
+		schedule_timeout_uninterruptible(HZ / 10);
+	} while (time_after(time, jiffies));
+
+	return -ETIMEDOUT;
+}
+
+static int fnic_cleanup(struct fnic *fnic)
+{
+	unsigned int i;
+	int err;
+	unsigned long flags;
+	struct fc_frame *flogi = NULL;
+	struct fc_frame *flogi_resp = NULL;
+
+	vnic_dev_disable(fnic->vdev);
+	for (i = 0; i < fnic->intr_count; i++)
+		vnic_intr_mask(&fnic->intr[i]);
+
+	for (i = 0; i < fnic->rq_count; i++) {
+		err = vnic_rq_disable(&fnic->rq[i]);
+		if (err)
+			return err;
+	}
+	for (i = 0; i < fnic->raw_wq_count; i++) {
+		err = vnic_wq_disable(&fnic->wq[i]);
+		if (err)
+			return err;
+	}
+	for (i = 0; i < fnic->wq_copy_count; i++) {
+		err = vnic_wq_copy_disable(&fnic->wq_copy[i]);
+		if (err)
+			return err;
+	}
+
+	/* Clean up completed IOs and FCS frames */
+	fnic_wq_copy_cmpl_handler(fnic, -1);
+	fnic_wq_cmpl_handler(fnic, -1);
+	fnic_rq_cmpl_handler(fnic, -1);
+
+	/* Clean up the IOs and FCS frames that have not completed */
+	for (i = 0; i < fnic->raw_wq_count; i++)
+		vnic_wq_clean(&fnic->wq[i], fnic_free_wq_buf);
+	for (i = 0; i < fnic->rq_count; i++)
+		vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf);
+	for (i = 0; i < fnic->wq_copy_count; i++)
+		vnic_wq_copy_clean(&fnic->wq_copy[i],
+				   fnic_wq_copy_cleanup_handler);
+
+	for (i = 0; i < fnic->cq_count; i++)
+		vnic_cq_clean(&fnic->cq[i]);
+	for (i = 0; i < fnic->intr_count; i++)
+		vnic_intr_clean(&fnic->intr[i]);
+
+	/*
+	 * Remove cached flogi and flogi resp frames if any
+	 * These frames are not in any queue, and therefore queue
+	 * cleanup does not clean them. So clean them explicitly
+	 */
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	flogi = fnic->flogi;
+	fnic->flogi = NULL;
+	flogi_resp = fnic->flogi_resp;
+	fnic->flogi_resp = NULL;
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	if (flogi)
+		dev_kfree_skb(fp_skb(flogi));
+
+	if (flogi_resp)
+		dev_kfree_skb(fp_skb(flogi_resp));
+
+	mempool_destroy(fnic->io_req_pool);
+	for (i = 0; i < FNIC_SGL_NUM_CACHES; i++)
+		mempool_destroy(fnic->io_sgl_pool[i]);
+
+	return 0;
+}
+
+static void fnic_iounmap(struct fnic *fnic)
+{
+	if (fnic->bar0.vaddr)
+		iounmap(fnic->bar0.vaddr);
+}
+
+/*
+ * Allocate element for mempools requiring GFP_DMA flag.
+ * Otherwise, checks in kmem_flagcheck() hit BUG_ON().
+ */
+static void *fnic_alloc_slab_dma(gfp_t gfp_mask, void *pool_data)
+{
+	struct kmem_cache *mem = pool_data;
+
+	return kmem_cache_alloc(mem, gfp_mask | GFP_ATOMIC | GFP_DMA);
+}
+
+static int __devinit fnic_probe(struct pci_dev *pdev,
+				const struct pci_device_id *ent)
+{
+	struct Scsi_Host *host;
+	struct fc_lport *lp;
+	struct fnic *fnic;
+	mempool_t *pool;
+	int err;
+	int i;
+	unsigned long flags;
+
+	/*
+	 * Allocate SCSI Host and set up association between host,
+	 * local port, and fnic
+	 */
+	host = scsi_host_alloc(&fnic_host_template,
+			       sizeof(struct fc_lport) + sizeof(struct fnic));
+	if (!host) {
+		printk(KERN_ERR PFX "Unable to alloc SCSI host\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+	lp = shost_priv(host);
+	lp->host = host;
+	fnic = lport_priv(lp);
+	fnic->lport = lp;
+
+	snprintf(fnic->name, sizeof(fnic->name) - 1, "%s%d", DRV_NAME,
+		 host->host_no);
+
+	host->transportt = fnic_fc_transport;
+
+	err = scsi_init_shared_tag_map(host, FNIC_MAX_IO_REQ);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Unable to alloc shared tag map\n");
+		goto err_out_free_hba;
+	}
+
+	/* Setup PCI resources */
+	pci_set_drvdata(pdev, fnic);
+
+	fnic->pdev = pdev;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Cannot enable PCI device, aborting.\n");
+		goto err_out_free_hba;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Cannot enable PCI resources, aborting\n");
+		goto err_out_disable_device;
+	}
+
+	pci_set_master(pdev);
+
+	/* Query PCI controller on system for DMA addressing
+	 * limitation for the device.  Try 40-bit first, and
+	 * fail to 32-bit.
+	 */
+	err = pci_set_dma_mask(pdev, DMA_40BIT_MASK);
+	if (err) {
+		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		if (err) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "No usable DMA configuration "
+				     "aborting\n");
+			goto err_out_release_regions;
+		}
+		err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+		if (err) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "Unable to obtain 32-bit DMA "
+				     "for consistent allocations, aborting.\n");
+			goto err_out_release_regions;
+		}
+	} else {
+		err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK);
+		if (err) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "Unable to obtain 40-bit DMA "
+				     "for consistent allocations, aborting.\n");
+			goto err_out_release_regions;
+		}
+	}
+
+	/* Map vNIC resources from BAR0 */
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "BAR0 not memory-map'able, aborting.\n");
+		err = -ENODEV;
+		goto err_out_release_regions;
+	}
+
+	fnic->bar0.vaddr = pci_iomap(pdev, 0, 0);
+	fnic->bar0.bus_addr = pci_resource_start(pdev, 0);
+	fnic->bar0.len = pci_resource_len(pdev, 0);
+
+	if (!fnic->bar0.vaddr) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Cannot memory-map BAR0 res hdr, "
+			     "aborting.\n");
+		err = -ENODEV;
+		goto err_out_release_regions;
+	}
+
+	fnic->vdev = vnic_dev_register(NULL, fnic, pdev, &fnic->bar0);
+	if (!fnic->vdev) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "vNIC registration failed, "
+			     "aborting.\n");
+		err = -ENODEV;
+		goto err_out_iounmap;
+	}
+
+	err = fnic_dev_wait(fnic->vdev, vnic_dev_open,
+			    vnic_dev_open_done, 0);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "vNIC dev open failed, aborting.\n");
+		goto err_out_vnic_unregister;
+	}
+
+	err = vnic_dev_init(fnic->vdev, 0);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "vNIC dev init failed, aborting.\n");
+		goto err_out_dev_close;
+	}
+
+	err = vnic_dev_mac_addr(fnic->vdev, fnic->mac_addr);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "vNIC get MAC addr failed \n");
+		goto err_out_dev_close;
+	}
+
+	/* Get vNIC configuration */
+	err = fnic_get_vnic_config(fnic);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Get vNIC configuration failed, "
+			     "aborting.\n");
+		goto err_out_dev_close;
+	}
+	host->max_lun = fnic->config.luns_per_tgt;
+	host->max_id = FNIC_MAX_FCP_TARGET;
+
+	fnic_get_res_counts(fnic);
+
+	err = fnic_set_intr_mode(fnic);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Failed to set intr mode, "
+			     "aborting.\n");
+		goto err_out_dev_close;
+	}
+
+	err = fnic_request_intr(fnic);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Unable to request irq.\n");
+		goto err_out_clear_intr;
+	}
+
+	err = fnic_alloc_vnic_resources(fnic);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Failed to alloc vNIC resources, "
+			     "aborting.\n");
+		goto err_out_free_intr;
+	}
+
+
+	/* initialize all fnic locks */
+	spin_lock_init(&fnic->fnic_lock);
+
+	for (i = 0; i < FNIC_WQ_MAX; i++)
+		spin_lock_init(&fnic->wq_lock[i]);
+
+	for (i = 0; i < FNIC_WQ_COPY_MAX; i++) {
+		spin_lock_init(&fnic->wq_copy_lock[i]);
+		fnic->wq_copy_desc_low[i] = DESC_CLEAN_LOW_WATERMARK;
+		fnic->fw_ack_recd[i] = 0;
+		fnic->fw_ack_index[i] = -1;
+	}
+
+	for (i = 0; i < FNIC_IO_LOCKS; i++)
+		spin_lock_init(&fnic->io_req_lock[i]);
+
+	fnic->io_req_pool = mempool_create_slab_pool(2, fnic_io_req_cache);
+	if (!fnic->io_req_pool)
+		goto err_out_free_resources;
+
+	pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab,
+			      fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
+	if (!pool)
+		goto err_out_free_ioreq_pool;
+	fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT] = pool;
+
+	pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab,
+			      fnic_sgl_cache[FNIC_SGL_CACHE_MAX]);
+	if (!pool)
+		goto err_out_free_dflt_pool;
+	fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX] = pool;
+
+	/* setup vlan config, hw inserts vlan header */
+	fnic->vlan_hw_insert = 1;
+	fnic->vlan_id = 0;
+
+	fnic->flogi_oxid = FC_XID_UNKNOWN;
+	fnic->flogi = NULL;
+	fnic->flogi_resp = NULL;
+	fnic->state = FNIC_IN_FC_MODE;
+
+	/* Enable hardware stripping of vlan header on ingress */
+	fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1);
+
+	/* Setup notification buffer area */
+	err = fnic_notify_set(fnic);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Failed to alloc notify buffer, aborting.\n");
+		goto err_out_free_max_pool;
+	}
+
+	/* Setup notify timer when using MSI interrupts */
+	if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI)
+		setup_timer(&fnic->notify_timer,
+			    fnic_notify_timer, (unsigned long)fnic);
+
+	/* allocate RQ buffers and post them to RQ*/
+	for (i = 0; i < fnic->rq_count; i++) {
+		err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame);
+		if (err) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+				     "fnic_alloc_rq_frame can't alloc "
+				     "frame\n");
+			goto err_out_free_rq_buf;
+		}
+	}
+
+	/*
+	 * Initialization done with PCI system, hardware, firmware.
+	 * Add host to SCSI
+	 */
+	err = scsi_add_host(lp->host, &pdev->dev);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "fnic: scsi_add_host failed...exiting\n");
+		goto err_out_free_rq_buf;
+	}
+
+	/* Start local port initiatialization */
+
+	lp->link_up = 0;
+	lp->tt = fnic_transport_template;
+
+	lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3,
+				    FCPIO_HOST_EXCH_RANGE_START,
+				    FCPIO_HOST_EXCH_RANGE_END);
+	if (!lp->emp) {
+		err = -ENOMEM;
+		goto err_out_remove_scsi_host;
+	}
+
+	lp->max_retry_count = fnic->config.flogi_retries;
+	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+			      FCP_SPPF_CONF_COMPL);
+	if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR)
+		lp->service_params |= FCP_SPPF_RETRY;
+
+	lp->boot_time = jiffies;
+	lp->e_d_tov = fnic->config.ed_tov;
+	lp->r_a_tov = fnic->config.ra_tov;
+	lp->link_supported_speeds = FC_PORTSPEED_10GBIT;
+	fc_set_wwnn(lp, fnic->config.node_wwn);
+	fc_set_wwpn(lp, fnic->config.port_wwn);
+
+	fc_exch_init(lp);
+	fc_lport_init(lp);
+	fc_elsct_init(lp);
+	fc_rport_init(lp);
+	fc_disc_init(lp);
+
+	fc_lport_config(lp);
+
+	if (fc_set_mfs(lp, fnic->config.maxdatafieldsize +
+		       sizeof(struct fc_frame_header))) {
+		err = -EINVAL;
+		goto err_out_free_exch_mgr;
+	}
+	fc_host_maxframe_size(lp->host) = lp->mfs;
+
+	sprintf(fc_host_symbolic_name(lp->host),
+		DRV_NAME " v" DRV_VERSION " over %s", fnic->name);
+
+	spin_lock_irqsave(&fnic_list_lock, flags);
+	list_add_tail(&fnic->list, &fnic_list);
+	spin_unlock_irqrestore(&fnic_list_lock, flags);
+
+	INIT_WORK(&fnic->link_work, fnic_handle_link);
+	INIT_WORK(&fnic->frame_work, fnic_handle_frame);
+	skb_queue_head_init(&fnic->frame_queue);
+
+	/* Enable all queues */
+	for (i = 0; i < fnic->raw_wq_count; i++)
+		vnic_wq_enable(&fnic->wq[i]);
+	for (i = 0; i < fnic->rq_count; i++)
+		vnic_rq_enable(&fnic->rq[i]);
+	for (i = 0; i < fnic->wq_copy_count; i++)
+		vnic_wq_copy_enable(&fnic->wq_copy[i]);
+
+	fc_fabric_login(lp);
+
+	vnic_dev_enable(fnic->vdev);
+	for (i = 0; i < fnic->intr_count; i++)
+		vnic_intr_unmask(&fnic->intr[i]);
+
+	fnic_notify_timer_start(fnic);
+
+	return 0;
+
+err_out_free_exch_mgr:
+	fc_exch_mgr_free(lp->emp);
+err_out_remove_scsi_host:
+	fc_remove_host(fnic->lport->host);
+	scsi_remove_host(fnic->lport->host);
+err_out_free_rq_buf:
+	for (i = 0; i < fnic->rq_count; i++)
+		vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf);
+	vnic_dev_notify_unset(fnic->vdev);
+err_out_free_max_pool:
+	mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX]);
+err_out_free_dflt_pool:
+	mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT]);
+err_out_free_ioreq_pool:
+	mempool_destroy(fnic->io_req_pool);
+err_out_free_resources:
+	fnic_free_vnic_resources(fnic);
+err_out_free_intr:
+	fnic_free_intr(fnic);
+err_out_clear_intr:
+	fnic_clear_intr_mode(fnic);
+err_out_dev_close:
+	vnic_dev_close(fnic->vdev);
+err_out_vnic_unregister:
+	vnic_dev_unregister(fnic->vdev);
+err_out_iounmap:
+	fnic_iounmap(fnic);
+err_out_release_regions:
+	pci_release_regions(pdev);
+err_out_disable_device:
+	pci_disable_device(pdev);
+err_out_free_hba:
+	scsi_host_put(lp->host);
+err_out:
+	return err;
+}
+
+static void __devexit fnic_remove(struct pci_dev *pdev)
+{
+	struct fnic *fnic = pci_get_drvdata(pdev);
+	unsigned long flags;
+
+	/*
+	 * Mark state so that the workqueue thread stops forwarding
+	 * received frames and link events to the local port. ISR and
+	 * other threads that can queue work items will also stop
+	 * creating work items on the fnic workqueue
+	 */
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	fnic->stop_rx_link_events = 1;
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI)
+		del_timer_sync(&fnic->notify_timer);
+
+	/*
+	 * Flush the fnic event queue. After this call, there should
+	 * be no event queued for this fnic device in the workqueue
+	 */
+	flush_workqueue(fnic_event_queue);
+	skb_queue_purge(&fnic->frame_queue);
+
+	/*
+	 * Log off the fabric. This stops all remote ports, dns port,
+	 * logs off the fabric. This flushes all rport, disc, lport work
+	 * before returning
+	 */
+	fc_fabric_logoff(fnic->lport);
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	fnic->in_remove = 1;
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	fc_lport_destroy(fnic->lport);
+
+	/*
+	 * This stops the fnic device, masks all interrupts. Completed
+	 * CQ entries are drained. Posted WQ/RQ/Copy-WQ entries are
+	 * cleaned up
+	 */
+	fnic_cleanup(fnic);
+
+	BUG_ON(!skb_queue_empty(&fnic->frame_queue));
+
+	spin_lock_irqsave(&fnic_list_lock, flags);
+	list_del(&fnic->list);
+	spin_unlock_irqrestore(&fnic_list_lock, flags);
+
+	fc_remove_host(fnic->lport->host);
+	scsi_remove_host(fnic->lport->host);
+	fc_exch_mgr_free(fnic->lport->emp);
+	vnic_dev_notify_unset(fnic->vdev);
+	fnic_free_vnic_resources(fnic);
+	fnic_free_intr(fnic);
+	fnic_clear_intr_mode(fnic);
+	vnic_dev_close(fnic->vdev);
+	vnic_dev_unregister(fnic->vdev);
+	fnic_iounmap(fnic);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	scsi_host_put(fnic->lport->host);
+}
+
+static struct pci_driver fnic_driver = {
+	.name = DRV_NAME,
+	.id_table = fnic_id_table,
+	.probe = fnic_probe,
+	.remove = __devexit_p(fnic_remove),
+};
+
+static int __init fnic_init_module(void)
+{
+	size_t len;
+	int err = 0;
+
+	printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION);
+
+	/* Create a cache for allocation of default size sgls */
+	len = sizeof(struct fnic_dflt_sgl_list);
+	fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create
+		("fnic_sgl_dflt", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN,
+		 SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA,
+		 NULL);
+	if (!fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]) {
+		printk(KERN_ERR PFX "failed to create fnic dflt sgl slab\n");
+		err = -ENOMEM;
+		goto err_create_fnic_sgl_slab_dflt;
+	}
+
+	/* Create a cache for allocation of max size sgls*/
+	len = sizeof(struct fnic_sgl_list);
+	fnic_sgl_cache[FNIC_SGL_CACHE_MAX] = kmem_cache_create
+		("fnic_sgl_max", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN,
+		 SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA,
+		 NULL);
+	if (!fnic_sgl_cache[FNIC_SGL_CACHE_MAX]) {
+		printk(KERN_ERR PFX "failed to create fnic max sgl slab\n");
+		err = -ENOMEM;
+		goto err_create_fnic_sgl_slab_max;
+	}
+
+	/* Create a cache of io_req structs for use via mempool */
+	fnic_io_req_cache = kmem_cache_create("fnic_io_req",
+					      sizeof(struct fnic_io_req),
+					      0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!fnic_io_req_cache) {
+		printk(KERN_ERR PFX "failed to create fnic io_req slab\n");
+		err = -ENOMEM;
+		goto err_create_fnic_ioreq_slab;
+	}
+
+	fnic_event_queue = create_singlethread_workqueue("fnic_event_wq");
+	if (!fnic_event_queue) {
+		printk(KERN_ERR PFX "fnic work queue create failed\n");
+		err = -ENOMEM;
+		goto err_create_fnic_workq;
+	}
+
+	spin_lock_init(&fnic_list_lock);
+	INIT_LIST_HEAD(&fnic_list);
+
+	fnic_fc_transport = fc_attach_transport(&fnic_fc_functions);
+	if (!fnic_fc_transport) {
+		printk(KERN_ERR PFX "fc_attach_transport error\n");
+		err = -ENOMEM;
+		goto err_fc_transport;
+	}
+
+	/* register the driver with PCI system */
+	err = pci_register_driver(&fnic_driver);
+	if (err < 0) {
+		printk(KERN_ERR PFX "pci register error\n");
+		goto err_pci_register;
+	}
+	return err;
+
+err_pci_register:
+	fc_release_transport(fnic_fc_transport);
+err_fc_transport:
+	destroy_workqueue(fnic_event_queue);
+err_create_fnic_workq:
+	kmem_cache_destroy(fnic_io_req_cache);
+err_create_fnic_ioreq_slab:
+	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]);
+err_create_fnic_sgl_slab_max:
+	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
+err_create_fnic_sgl_slab_dflt:
+	return err;
+}
+
+static void __exit fnic_cleanup_module(void)
+{
+	pci_unregister_driver(&fnic_driver);
+	destroy_workqueue(fnic_event_queue);
+	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]);
+	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
+	kmem_cache_destroy(fnic_io_req_cache);
+	fc_release_transport(fnic_fc_transport);
+}
+
+module_init(fnic_init_module);
+module_exit(fnic_cleanup_module);
+
diff --git a/drivers/scsi/fnic/fnic_res.c b/drivers/scsi/fnic/fnic_res.c
new file mode 100644
index 0000000..7ba61ec
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_res.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include "wq_enet_desc.h"
+#include "rq_enet_desc.h"
+#include "cq_enet_desc.h"
+#include "vnic_resource.h"
+#include "vnic_dev.h"
+#include "vnic_wq.h"
+#include "vnic_rq.h"
+#include "vnic_cq.h"
+#include "vnic_intr.h"
+#include "vnic_stats.h"
+#include "vnic_nic.h"
+#include "fnic.h"
+
+int fnic_get_vnic_config(struct fnic *fnic)
+{
+	struct vnic_fc_config *c = &fnic->config;
+	int err;
+
+#define GET_CONFIG(m) \
+	do { \
+		err = vnic_dev_spec(fnic->vdev, \
+				    offsetof(struct vnic_fc_config, m), \
+				    sizeof(c->m), &c->m); \
+		if (err) { \
+			shost_printk(KERN_ERR, fnic->lport->host, \
+				     "Error getting %s, %d\n", #m, \
+				     err); \
+			return err; \
+		} \
+	} while (0);
+
+	GET_CONFIG(node_wwn);
+	GET_CONFIG(port_wwn);
+	GET_CONFIG(wq_enet_desc_count);
+	GET_CONFIG(wq_copy_desc_count);
+	GET_CONFIG(rq_desc_count);
+	GET_CONFIG(maxdatafieldsize);
+	GET_CONFIG(ed_tov);
+	GET_CONFIG(ra_tov);
+	GET_CONFIG(intr_timer);
+	GET_CONFIG(intr_timer_type);
+	GET_CONFIG(flags);
+	GET_CONFIG(flogi_retries);
+	GET_CONFIG(flogi_timeout);
+	GET_CONFIG(plogi_retries);
+	GET_CONFIG(plogi_timeout);
+	GET_CONFIG(io_throttle_count);
+	GET_CONFIG(link_down_timeout);
+	GET_CONFIG(port_down_timeout);
+	GET_CONFIG(port_down_io_retries);
+	GET_CONFIG(luns_per_tgt);
+
+	c->wq_enet_desc_count =
+		min_t(u32, VNIC_FNIC_WQ_DESCS_MAX,
+		      max_t(u32, VNIC_FNIC_WQ_DESCS_MIN,
+			    c->wq_enet_desc_count));
+	c->wq_enet_desc_count = ALIGN(c->wq_enet_desc_count, 16);
+
+	c->wq_copy_desc_count =
+		min_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MAX,
+		      max_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MIN,
+			    c->wq_copy_desc_count));
+	c->wq_copy_desc_count = ALIGN(c->wq_copy_desc_count, 16);
+
+	c->rq_desc_count =
+		min_t(u32, VNIC_FNIC_RQ_DESCS_MAX,
+		      max_t(u32, VNIC_FNIC_RQ_DESCS_MIN,
+			    c->rq_desc_count));
+	c->rq_desc_count = ALIGN(c->rq_desc_count, 16);
+
+	c->maxdatafieldsize =
+		min_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MAX,
+		      max_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MIN,
+			    c->maxdatafieldsize));
+	c->ed_tov =
+		min_t(u32, VNIC_FNIC_EDTOV_MAX,
+		      max_t(u32, VNIC_FNIC_EDTOV_MIN,
+			    c->ed_tov));
+
+	c->ra_tov =
+		min_t(u32, VNIC_FNIC_RATOV_MAX,
+		      max_t(u32, VNIC_FNIC_RATOV_MIN,
+			    c->ra_tov));
+
+	c->flogi_retries =
+		min_t(u32, VNIC_FNIC_FLOGI_RETRIES_MAX, c->flogi_retries);
+
+	c->flogi_timeout =
+		min_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MAX,
+		      max_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MIN,
+			    c->flogi_timeout));
+
+	c->plogi_retries =
+		min_t(u32, VNIC_FNIC_PLOGI_RETRIES_MAX, c->plogi_retries);
+
+	c->plogi_timeout =
+		min_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MAX,
+		      max_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MIN,
+			    c->plogi_timeout));
+
+	c->io_throttle_count =
+		min_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MAX,
+		      max_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MIN,
+			    c->io_throttle_count));
+
+	c->link_down_timeout =
+		min_t(u32, VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX,
+		      c->link_down_timeout);
+
+	c->port_down_timeout =
+		min_t(u32, VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX,
+		      c->port_down_timeout);
+
+	c->port_down_io_retries =
+		min_t(u32, VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX,
+		      c->port_down_io_retries);
+
+	c->luns_per_tgt =
+		min_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MAX,
+		      max_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MIN,
+			    c->luns_per_tgt));
+
+	c->intr_timer = min_t(u16, VNIC_INTR_TIMER_MAX, c->intr_timer);
+	c->intr_timer_type = c->intr_timer_type;
+
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC MAC addr %02x:%02x:%02x:%02x:%02x:%02x "
+		     "wq/wq_copy/rq %d/%d/%d\n",
+		     fnic->mac_addr[0], fnic->mac_addr[1], fnic->mac_addr[2],
+		     fnic->mac_addr[3], fnic->mac_addr[4], fnic->mac_addr[5],
+		     c->wq_enet_desc_count, c->wq_copy_desc_count,
+		     c->rq_desc_count);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC node wwn %llx port wwn %llx\n",
+		     c->node_wwn, c->port_wwn);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC ed_tov %d ra_tov %d\n",
+		     c->ed_tov, c->ra_tov);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC mtu %d intr timer %d\n",
+		     c->maxdatafieldsize, c->intr_timer);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC flags 0x%x luns per tgt %d\n",
+		     c->flags, c->luns_per_tgt);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC flogi_retries %d flogi timeout %d\n",
+		     c->flogi_retries, c->flogi_timeout);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC plogi retries %d plogi timeout %d\n",
+		     c->plogi_retries, c->plogi_timeout);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC io throttle count %d link dn timeout %d\n",
+		     c->io_throttle_count, c->link_down_timeout);
+	shost_printk(KERN_INFO, fnic->lport->host,
+		     "vNIC port dn io retries %d port dn timeout %d\n",
+		     c->port_down_io_retries, c->port_down_timeout);
+
+	return 0;
+}
+
+int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu,
+			u8 rss_hash_type,
+			u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable,
+			u8 tso_ipid_split_en, u8 ig_vlan_strip_en)
+{
+	u64 a0, a1;
+	u32 nic_cfg;
+	int wait = 1000;
+
+	vnic_set_nic_cfg(&nic_cfg, rss_default_cpu,
+		rss_hash_type, rss_hash_bits, rss_base_cpu,
+		rss_enable, tso_ipid_split_en, ig_vlan_strip_en);
+
+	a0 = nic_cfg;
+	a1 = 0;
+
+	return vnic_dev_cmd(fnic->vdev, CMD_NIC_CFG, &a0, &a1, wait);
+}
+
+void fnic_get_res_counts(struct fnic *fnic)
+{
+	fnic->wq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_WQ);
+	fnic->raw_wq_count = fnic->wq_count - 1;
+	fnic->wq_copy_count = fnic->wq_count - fnic->raw_wq_count;
+	fnic->rq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_RQ);
+	fnic->cq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_CQ);
+	fnic->intr_count = vnic_dev_get_res_count(fnic->vdev,
+		RES_TYPE_INTR_CTRL);
+}
+
+void fnic_free_vnic_resources(struct fnic *fnic)
+{
+	unsigned int i;
+
+	for (i = 0; i < fnic->raw_wq_count; i++)
+		vnic_wq_free(&fnic->wq[i]);
+
+	for (i = 0; i < fnic->wq_copy_count; i++)
+		vnic_wq_copy_free(&fnic->wq_copy[i]);
+
+	for (i = 0; i < fnic->rq_count; i++)
+		vnic_rq_free(&fnic->rq[i]);
+
+	for (i = 0; i < fnic->cq_count; i++)
+		vnic_cq_free(&fnic->cq[i]);
+
+	for (i = 0; i < fnic->intr_count; i++)
+		vnic_intr_free(&fnic->intr[i]);
+}
+
+int fnic_alloc_vnic_resources(struct fnic *fnic)
+{
+	enum vnic_dev_intr_mode intr_mode;
+	unsigned int mask_on_assertion;
+	unsigned int interrupt_offset;
+	unsigned int error_interrupt_enable;
+	unsigned int error_interrupt_offset;
+	unsigned int i, cq_index;
+	unsigned int wq_copy_cq_desc_count;
+	int err;
+
+	intr_mode = vnic_dev_get_intr_mode(fnic->vdev);
+
+	shost_printk(KERN_INFO, fnic->lport->host, "vNIC interrupt mode: %s\n",
+		     intr_mode == VNIC_DEV_INTR_MODE_INTX ? "legacy PCI INTx" :
+		     intr_mode == VNIC_DEV_INTR_MODE_MSI ? "MSI" :
+		     intr_mode == VNIC_DEV_INTR_MODE_MSIX ?
+		     "MSI-X" : "unknown");
+
+	shost_printk(KERN_INFO, fnic->lport->host, "vNIC resources avail: "
+		     "wq %d cp_wq %d raw_wq %d rq %d cq %d intr %d\n",
+		     fnic->wq_count, fnic->wq_copy_count, fnic->raw_wq_count,
+		     fnic->rq_count, fnic->cq_count, fnic->intr_count);
+
+	/* Allocate Raw WQ used for FCS frames */
+	for (i = 0; i < fnic->raw_wq_count; i++) {
+		err = vnic_wq_alloc(fnic->vdev, &fnic->wq[i], i,
+			fnic->config.wq_enet_desc_count,
+			sizeof(struct wq_enet_desc));
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	/* Allocate Copy WQs used for SCSI IOs */
+	for (i = 0; i < fnic->wq_copy_count; i++) {
+		err = vnic_wq_copy_alloc(fnic->vdev, &fnic->wq_copy[i],
+			(fnic->raw_wq_count + i),
+			fnic->config.wq_copy_desc_count,
+			sizeof(struct fcpio_host_req));
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	/* RQ for receiving FCS frames */
+	for (i = 0; i < fnic->rq_count; i++) {
+		err = vnic_rq_alloc(fnic->vdev, &fnic->rq[i], i,
+			fnic->config.rq_desc_count,
+			sizeof(struct rq_enet_desc));
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	/* CQ for each RQ */
+	for (i = 0; i < fnic->rq_count; i++) {
+		cq_index = i;
+		err = vnic_cq_alloc(fnic->vdev,
+			&fnic->cq[cq_index], cq_index,
+			fnic->config.rq_desc_count,
+			sizeof(struct cq_enet_rq_desc));
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	/* CQ for each WQ */
+	for (i = 0; i < fnic->raw_wq_count; i++) {
+		cq_index = fnic->rq_count + i;
+		err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index], cq_index,
+			fnic->config.wq_enet_desc_count,
+			sizeof(struct cq_enet_wq_desc));
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	/* CQ for each COPY WQ */
+	wq_copy_cq_desc_count = (fnic->config.wq_copy_desc_count * 3);
+	for (i = 0; i < fnic->wq_copy_count; i++) {
+		cq_index = fnic->raw_wq_count + fnic->rq_count + i;
+		err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index],
+			cq_index,
+			wq_copy_cq_desc_count,
+			sizeof(struct fcpio_fw_req));
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	for (i = 0; i < fnic->intr_count; i++) {
+		err = vnic_intr_alloc(fnic->vdev, &fnic->intr[i], i);
+		if (err)
+			goto err_out_cleanup;
+	}
+
+	fnic->legacy_pba = vnic_dev_get_res(fnic->vdev,
+				RES_TYPE_INTR_PBA_LEGACY, 0);
+
+	if (!fnic->legacy_pba && intr_mode == VNIC_DEV_INTR_MODE_INTX) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Failed to hook legacy pba resource\n");
+		err = -ENODEV;
+		goto err_out_cleanup;
+	}
+
+	/*
+	 * Init RQ/WQ resources.
+	 *
+	 * RQ[0 to n-1] point to CQ[0 to n-1]
+	 * WQ[0 to m-1] point to CQ[n to n+m-1]
+	 * WQ_COPY[0 to k-1] points to CQ[n+m to n+m+k-1]
+	 *
+	 * Note for copy wq we always initialize with cq_index = 0
+	 *
+	 * Error interrupt is not enabled for MSI.
+	 */
+
+	switch (intr_mode) {
+	case VNIC_DEV_INTR_MODE_INTX:
+	case VNIC_DEV_INTR_MODE_MSIX:
+		error_interrupt_enable = 1;
+		error_interrupt_offset = fnic->err_intr_offset;
+		break;
+	default:
+		error_interrupt_enable = 0;
+		error_interrupt_offset = 0;
+		break;
+	}
+
+	for (i = 0; i < fnic->rq_count; i++) {
+		cq_index = i;
+		vnic_rq_init(&fnic->rq[i],
+			     cq_index,
+			     error_interrupt_enable,
+			     error_interrupt_offset);
+	}
+
+	for (i = 0; i < fnic->raw_wq_count; i++) {
+		cq_index = i + fnic->rq_count;
+		vnic_wq_init(&fnic->wq[i],
+			     cq_index,
+			     error_interrupt_enable,
+			     error_interrupt_offset);
+	}
+
+	for (i = 0; i < fnic->wq_copy_count; i++) {
+		vnic_wq_copy_init(&fnic->wq_copy[i],
+				  0 /* cq_index 0 - always */,
+				  error_interrupt_enable,
+				  error_interrupt_offset);
+	}
+
+	for (i = 0; i < fnic->cq_count; i++) {
+
+		switch (intr_mode) {
+		case VNIC_DEV_INTR_MODE_MSIX:
+			interrupt_offset = i;
+			break;
+		default:
+			interrupt_offset = 0;
+			break;
+		}
+
+		vnic_cq_init(&fnic->cq[i],
+			0 /* flow_control_enable */,
+			1 /* color_enable */,
+			0 /* cq_head */,
+			0 /* cq_tail */,
+			1 /* cq_tail_color */,
+			1 /* interrupt_enable */,
+			1 /* cq_entry_enable */,
+			0 /* cq_message_enable */,
+			interrupt_offset,
+			0 /* cq_message_addr */);
+	}
+
+	/*
+	 * Init INTR resources
+	 *
+	 * mask_on_assertion is not used for INTx due to the level-
+	 * triggered nature of INTx
+	 */
+
+	switch (intr_mode) {
+	case VNIC_DEV_INTR_MODE_MSI:
+	case VNIC_DEV_INTR_MODE_MSIX:
+		mask_on_assertion = 1;
+		break;
+	default:
+		mask_on_assertion = 0;
+		break;
+	}
+
+	for (i = 0; i < fnic->intr_count; i++) {
+		vnic_intr_init(&fnic->intr[i],
+			fnic->config.intr_timer,
+			fnic->config.intr_timer_type,
+			mask_on_assertion);
+	}
+
+	/* init the stats memory by making the first call here */
+	err = vnic_dev_stats_dump(fnic->vdev, &fnic->stats);
+	if (err) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "vnic_dev_stats_dump failed - x%x\n", err);
+		goto err_out_cleanup;
+	}
+
+	/* Clear LIF stats */
+	vnic_dev_stats_clear(fnic->vdev);
+
+	return 0;
+
+err_out_cleanup:
+	fnic_free_vnic_resources(fnic);
+
+	return err;
+}
diff --git a/drivers/scsi/fnic/fnic_res.h b/drivers/scsi/fnic/fnic_res.h
new file mode 100644
index 0000000..b6f3102
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_res.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _FNIC_RES_H_
+#define _FNIC_RES_H_
+
+#include "wq_enet_desc.h"
+#include "rq_enet_desc.h"
+#include "vnic_wq.h"
+#include "vnic_rq.h"
+#include "fnic_io.h"
+#include "fcpio.h"
+#include "vnic_wq_copy.h"
+#include "vnic_cq_copy.h"
+
+static inline void fnic_queue_wq_desc(struct vnic_wq *wq,
+				      void *os_buf, dma_addr_t dma_addr,
+				      unsigned int len, unsigned int fc_eof,
+				      int vlan_tag_insert,
+				      unsigned int vlan_tag,
+				      int cq_entry, int sop, int eop)
+{
+	struct wq_enet_desc *desc = vnic_wq_next_desc(wq);
+
+	wq_enet_desc_enc(desc,
+			 (u64)dma_addr | VNIC_PADDR_TARGET,
+			 (u16)len,
+			 0, /* mss_or_csum_offset */
+			 (u16)fc_eof,
+			 0, /* offload_mode */
+			 (u8)eop, (u8)cq_entry,
+			 1, /* fcoe_encap */
+			 (u8)vlan_tag_insert,
+			 (u16)vlan_tag,
+			 0 /* loopback */);
+
+	vnic_wq_post(wq, os_buf, dma_addr, len, sop, eop);
+}
+
+static inline void fnic_queue_wq_copy_desc_icmnd_16(struct vnic_wq_copy *wq,
+						    u32 req_id,
+						    u32 lunmap_id, u8 spl_flags,
+						    u32 sgl_cnt, u32 sense_len,
+						    u64 sgl_addr, u64 sns_addr,
+						    u8 crn, u8 pri_ta,
+						    u8 flags, u8 *scsi_cdb,
+						    u32 data_len, u8 *lun,
+						    u32 d_id, u16 mss,
+						    u32 ratov, u32 edtov)
+{
+	struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
+
+	desc->hdr.type = FCPIO_ICMND_16; /* enum fcpio_type */
+	desc->hdr.status = 0;            /* header status entry */
+	desc->hdr._resvd = 0;            /* reserved */
+	desc->hdr.tag.u.req_id = req_id; /* id for this request */
+
+	desc->u.icmnd_16.lunmap_id = lunmap_id; /* index into lunmap table */
+	desc->u.icmnd_16.special_req_flags = spl_flags; /* exch req flags */
+	desc->u.icmnd_16._resvd0[0] = 0;        /* reserved */
+	desc->u.icmnd_16._resvd0[1] = 0;        /* reserved */
+	desc->u.icmnd_16._resvd0[2] = 0;        /* reserved */
+	desc->u.icmnd_16.sgl_cnt = sgl_cnt;     /* scatter-gather list count */
+	desc->u.icmnd_16.sense_len = sense_len; /* sense buffer length */
+	desc->u.icmnd_16.sgl_addr = sgl_addr;   /* scatter-gather list addr */
+	desc->u.icmnd_16.sense_addr = sns_addr; /* sense buffer address */
+	desc->u.icmnd_16.crn = crn;             /* SCSI Command Reference No.*/
+	desc->u.icmnd_16.pri_ta = pri_ta; 	/* SCSI Pri & Task attribute */
+	desc->u.icmnd_16._resvd1 = 0;           /* reserved: should be 0 */
+	desc->u.icmnd_16.flags = flags;         /* command flags */
+	memcpy(desc->u.icmnd_16.scsi_cdb, scsi_cdb, CDB_16);    /* SCSI CDB */
+	desc->u.icmnd_16.data_len = data_len;   /* length of data expected */
+	memcpy(desc->u.icmnd_16.lun, lun, LUN_ADDRESS);  /* LUN address */
+	desc->u.icmnd_16._resvd2 = 0;          	/* reserved */
+	hton24(desc->u.icmnd_16.d_id, d_id);	/* FC vNIC only: Target D_ID */
+	desc->u.icmnd_16.mss = mss;            	/* FC vNIC only: max burst */
+	desc->u.icmnd_16.r_a_tov = ratov; /*FC vNIC only: Res. Alloc Timeout */
+	desc->u.icmnd_16.e_d_tov = edtov; /*FC vNIC only: Err Detect Timeout */
+
+	vnic_wq_copy_post(wq);
+}
+
+static inline void fnic_queue_wq_copy_desc_itmf(struct vnic_wq_copy *wq,
+						u32 req_id, u32 lunmap_id,
+						u32 tm_req, u32 tm_id, u8 *lun,
+						u32 d_id, u32 r_a_tov,
+						u32 e_d_tov)
+{
+	struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
+
+	desc->hdr.type = FCPIO_ITMF;     /* enum fcpio_type */
+	desc->hdr.status = 0;            /* header status entry */
+	desc->hdr._resvd = 0;            /* reserved */
+	desc->hdr.tag.u.req_id = req_id; /* id for this request */
+
+	desc->u.itmf.lunmap_id = lunmap_id; /* index into lunmap table */
+	desc->u.itmf.tm_req = tm_req;       /* SCSI Task Management request */
+	desc->u.itmf.t_tag = tm_id;         /* tag of fcpio to be aborted */
+	desc->u.itmf._resvd = 0;
+	memcpy(desc->u.itmf.lun, lun, LUN_ADDRESS);  /* LUN address */
+	desc->u.itmf._resvd1 = 0;
+	hton24(desc->u.itmf.d_id, d_id);    /* FC vNIC only: Target D_ID */
+	desc->u.itmf.r_a_tov = r_a_tov;     /* FC vNIC only: R_A_TOV in msec */
+	desc->u.itmf.e_d_tov = e_d_tov;     /* FC vNIC only: E_D_TOV in msec */
+
+	vnic_wq_copy_post(wq);
+}
+
+static inline void fnic_queue_wq_copy_desc_flogi_reg(struct vnic_wq_copy *wq,
+						     u32 req_id, u8 format,
+						     u32 s_id, u8 *gw_mac)
+{
+	struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
+
+	desc->hdr.type = FCPIO_FLOGI_REG;     /* enum fcpio_type */
+	desc->hdr.status = 0;                 /* header status entry */
+	desc->hdr._resvd = 0;                 /* reserved */
+	desc->hdr.tag.u.req_id = req_id;      /* id for this request */
+
+	desc->u.flogi_reg.format = format;
+	hton24(desc->u.flogi_reg.s_id, s_id);
+	memcpy(desc->u.flogi_reg.gateway_mac, gw_mac, ETH_ALEN);
+
+	vnic_wq_copy_post(wq);
+}
+
+static inline void fnic_queue_wq_copy_desc_fw_reset(struct vnic_wq_copy *wq,
+						    u32 req_id)
+{
+	struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
+
+	desc->hdr.type = FCPIO_RESET;     /* enum fcpio_type */
+	desc->hdr.status = 0;             /* header status entry */
+	desc->hdr._resvd = 0;             /* reserved */
+	desc->hdr.tag.u.req_id = req_id;  /* id for this request */
+
+	vnic_wq_copy_post(wq);
+}
+
+static inline void fnic_queue_wq_copy_desc_lunmap(struct vnic_wq_copy *wq,
+						  u32 req_id, u64 lunmap_addr,
+						  u32 lunmap_len)
+{
+	struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq);
+
+	desc->hdr.type = FCPIO_LUNMAP_REQ;	/* enum fcpio_type */
+	desc->hdr.status = 0;			/* header status entry */
+	desc->hdr._resvd = 0;			/* reserved */
+	desc->hdr.tag.u.req_id = req_id;	/* id for this request */
+
+	desc->u.lunmap_req.addr = lunmap_addr;	/* address of the buffer */
+	desc->u.lunmap_req.len = lunmap_len;	/* len of the buffer */
+
+	vnic_wq_copy_post(wq);
+}
+
+static inline void fnic_queue_rq_desc(struct vnic_rq *rq,
+				      void *os_buf, dma_addr_t dma_addr,
+				      u16 len)
+{
+	struct rq_enet_desc *desc = vnic_rq_next_desc(rq);
+
+	rq_enet_desc_enc(desc,
+		(u64)dma_addr | VNIC_PADDR_TARGET,
+		RQ_ENET_TYPE_ONLY_SOP,
+		(u16)len);
+
+	vnic_rq_post(rq, os_buf, 0, dma_addr, len);
+}
+
+
+struct fnic;
+
+int fnic_get_vnic_config(struct fnic *);
+int fnic_alloc_vnic_resources(struct fnic *);
+void fnic_free_vnic_resources(struct fnic *);
+void fnic_get_res_counts(struct fnic *);
+int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu,
+			u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu,
+			u8 rss_enable, u8 tso_ipid_split_en,
+			u8 ig_vlan_strip_en);
+
+#endif /* _FNIC_RES_H_ */
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
new file mode 100644
index 0000000..eabf365
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -0,0 +1,1850 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mempool.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/delay.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_fcoe.h>
+#include <scsi/libfc.h>
+#include <scsi/fc_frame.h>
+#include "fnic_io.h"
+#include "fnic.h"
+
+const char *fnic_state_str[] = {
+	[FNIC_IN_FC_MODE] =           "FNIC_IN_FC_MODE",
+	[FNIC_IN_FC_TRANS_ETH_MODE] = "FNIC_IN_FC_TRANS_ETH_MODE",
+	[FNIC_IN_ETH_MODE] =          "FNIC_IN_ETH_MODE",
+	[FNIC_IN_ETH_TRANS_FC_MODE] = "FNIC_IN_ETH_TRANS_FC_MODE",
+};
+
+static const char *fnic_ioreq_state_str[] = {
+	[FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING",
+	[FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING",
+	[FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE",
+	[FNIC_IOREQ_CMD_COMPLETE] = "FNIC_IOREQ_CMD_COMPLETE",
+};
+
+static const char *fcpio_status_str[] =  {
+	[FCPIO_SUCCESS] = "FCPIO_SUCCESS", /*0x0*/
+	[FCPIO_INVALID_HEADER] = "FCPIO_INVALID_HEADER",
+	[FCPIO_OUT_OF_RESOURCE] = "FCPIO_OUT_OF_RESOURCE",
+	[FCPIO_INVALID_PARAM] = "FCPIO_INVALID_PARAM]",
+	[FCPIO_REQ_NOT_SUPPORTED] = "FCPIO_REQ_NOT_SUPPORTED",
+	[FCPIO_IO_NOT_FOUND] = "FCPIO_IO_NOT_FOUND",
+	[FCPIO_ABORTED] = "FCPIO_ABORTED", /*0x41*/
+	[FCPIO_TIMEOUT] = "FCPIO_TIMEOUT",
+	[FCPIO_SGL_INVALID] = "FCPIO_SGL_INVALID",
+	[FCPIO_MSS_INVALID] = "FCPIO_MSS_INVALID",
+	[FCPIO_DATA_CNT_MISMATCH] = "FCPIO_DATA_CNT_MISMATCH",
+	[FCPIO_FW_ERR] = "FCPIO_FW_ERR",
+	[FCPIO_ITMF_REJECTED] = "FCPIO_ITMF_REJECTED",
+	[FCPIO_ITMF_FAILED] = "FCPIO_ITMF_FAILED",
+	[FCPIO_ITMF_INCORRECT_LUN] = "FCPIO_ITMF_INCORRECT_LUN",
+	[FCPIO_CMND_REJECTED] = "FCPIO_CMND_REJECTED",
+	[FCPIO_NO_PATH_AVAIL] = "FCPIO_NO_PATH_AVAIL",
+	[FCPIO_PATH_FAILED] = "FCPIO_PATH_FAILED",
+	[FCPIO_LUNMAP_CHNG_PEND] = "FCPIO_LUNHMAP_CHNG_PEND",
+};
+
+const char *fnic_state_to_str(unsigned int state)
+{
+	if (state >= ARRAY_SIZE(fnic_state_str) || !fnic_state_str[state])
+		return "unknown";
+
+	return fnic_state_str[state];
+}
+
+static const char *fnic_ioreq_state_to_str(unsigned int state)
+{
+	if (state >= ARRAY_SIZE(fnic_ioreq_state_str) ||
+	    !fnic_ioreq_state_str[state])
+		return "unknown";
+
+	return fnic_ioreq_state_str[state];
+}
+
+static const char *fnic_fcpio_status_to_str(unsigned int status)
+{
+	if (status >= ARRAY_SIZE(fcpio_status_str) || !fcpio_status_str[status])
+		return "unknown";
+
+	return fcpio_status_str[status];
+}
+
+static void fnic_cleanup_io(struct fnic *fnic, int exclude_id);
+
+static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
+					    struct scsi_cmnd *sc)
+{
+	u32 hash = sc->request->tag & (FNIC_IO_LOCKS - 1);
+
+	return &fnic->io_req_lock[hash];
+}
+
+/*
+ * Unmap the data buffer and sense buffer for an io_req,
+ * also unmap and free the device-private scatter/gather list.
+ */
+static void fnic_release_ioreq_buf(struct fnic *fnic,
+				   struct fnic_io_req *io_req,
+				   struct scsi_cmnd *sc)
+{
+	if (io_req->sgl_list_pa)
+		pci_unmap_single(fnic->pdev, io_req->sgl_list_pa,
+				 sizeof(io_req->sgl_list[0]) * io_req->sgl_cnt,
+				 PCI_DMA_TODEVICE);
+	scsi_dma_unmap(sc);
+
+	if (io_req->sgl_cnt)
+		mempool_free(io_req->sgl_list_alloc,
+			     fnic->io_sgl_pool[io_req->sgl_type]);
+	if (io_req->sense_buf_pa)
+		pci_unmap_single(fnic->pdev, io_req->sense_buf_pa,
+				 SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE);
+}
+
+/* Free up Copy Wq descriptors. Called with copy_wq lock held */
+static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq)
+{
+	/* if no Ack received from firmware, then nothing to clean */
+	if (!fnic->fw_ack_recd[0])
+		return 1;
+
+	/*
+	 * Update desc_available count based on number of freed descriptors
+	 * Account for wraparound
+	 */
+	if (wq->to_clean_index <= fnic->fw_ack_index[0])
+		wq->ring.desc_avail += (fnic->fw_ack_index[0]
+					- wq->to_clean_index + 1);
+	else
+		wq->ring.desc_avail += (wq->ring.desc_count
+					- wq->to_clean_index
+					+ fnic->fw_ack_index[0] + 1);
+
+	/*
+	 * just bump clean index to ack_index+1 accounting for wraparound
+	 * this will essentially free up all descriptors between
+	 * to_clean_index and fw_ack_index, both inclusive
+	 */
+	wq->to_clean_index =
+		(fnic->fw_ack_index[0] + 1) % wq->ring.desc_count;
+
+	/* we have processed the acks received so far */
+	fnic->fw_ack_recd[0] = 0;
+	return 0;
+}
+
+
+/*
+ * fnic_fw_reset_handler
+ * Routine to send reset msg to fw
+ */
+int fnic_fw_reset_handler(struct fnic *fnic)
+{
+	struct vnic_wq_copy *wq = &fnic->wq_copy[0];
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
+
+	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
+		free_wq_copy_descs(fnic, wq);
+
+	if (!vnic_wq_copy_desc_avail(wq))
+		ret = -EAGAIN;
+	else
+		fnic_queue_wq_copy_desc_fw_reset(wq, SCSI_NO_TAG);
+
+	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+
+	if (!ret)
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "Issued fw reset\n");
+	else
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "Failed to issue fw reset\n");
+	return ret;
+}
+
+
+/*
+ * fnic_flogi_reg_handler
+ * Routine to send flogi register msg to fw
+ */
+int fnic_flogi_reg_handler(struct fnic *fnic)
+{
+	struct vnic_wq_copy *wq = &fnic->wq_copy[0];
+	u8 gw_mac[ETH_ALEN];
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
+
+	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
+		free_wq_copy_descs(fnic, wq);
+
+	if (!vnic_wq_copy_desc_avail(wq)) {
+		ret = -EAGAIN;
+		goto flogi_reg_ioreq_end;
+	}
+
+	if (fnic->fcoui_mode)
+		memset(gw_mac, 0xff, ETH_ALEN);
+	else
+		memcpy(gw_mac, fnic->dest_addr, ETH_ALEN);
+
+	fnic_queue_wq_copy_desc_flogi_reg(wq, SCSI_NO_TAG,
+					  FCPIO_FLOGI_REG_GW_DEST,
+					  fnic->s_id,
+					  gw_mac);
+
+flogi_reg_ioreq_end:
+	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+
+	if (!ret)
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "flog reg issued\n");
+
+	return ret;
+}
+
+/*
+ * fnic_queue_wq_copy_desc
+ * Routine to enqueue a wq copy desc
+ */
+static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
+					  struct vnic_wq_copy *wq,
+					  struct fnic_io_req *io_req,
+					  struct scsi_cmnd *sc,
+					  u32 sg_count)
+{
+	struct scatterlist *sg;
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct host_sg_desc *desc;
+	u8 pri_tag = 0;
+	unsigned int i;
+	unsigned long intr_flags;
+	int flags;
+	u8 exch_flags;
+	struct scsi_lun fc_lun;
+	char msg[2];
+
+	if (sg_count) {
+		BUG_ON(sg_count < 0);
+		BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT);
+
+		/* For each SGE, create a device desc entry */
+		desc = io_req->sgl_list;
+		for_each_sg(scsi_sglist(sc), sg, sg_count, i) {
+			desc->addr = cpu_to_le64(sg_dma_address(sg));
+			desc->len = cpu_to_le32(sg_dma_len(sg));
+			desc->_resvd = 0;
+			desc++;
+		}
+
+		io_req->sgl_list_pa = pci_map_single
+			(fnic->pdev,
+			 io_req->sgl_list,
+			 sizeof(io_req->sgl_list[0]) * sg_count,
+			 PCI_DMA_TODEVICE);
+	}
+
+	io_req->sense_buf_pa = pci_map_single(fnic->pdev,
+					      sc->sense_buffer,
+					      SCSI_SENSE_BUFFERSIZE,
+					      PCI_DMA_FROMDEVICE);
+
+	int_to_scsilun(sc->device->lun, &fc_lun);
+
+	pri_tag = FCPIO_ICMND_PTA_SIMPLE;
+	msg[0] = MSG_SIMPLE_TAG;
+	scsi_populate_tag_msg(sc, msg);
+	if (msg[0] == MSG_ORDERED_TAG)
+		pri_tag = FCPIO_ICMND_PTA_ORDERED;
+
+	/* Enqueue the descriptor in the Copy WQ */
+	spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags);
+
+	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
+		free_wq_copy_descs(fnic, wq);
+
+	if (unlikely(!vnic_wq_copy_desc_avail(wq))) {
+		spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+
+	flags = 0;
+	if (sc->sc_data_direction == DMA_FROM_DEVICE)
+		flags = FCPIO_ICMND_RDDATA;
+	else if (sc->sc_data_direction == DMA_TO_DEVICE)
+		flags = FCPIO_ICMND_WRDATA;
+
+	exch_flags = 0;
+	if ((fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) &&
+	    (rp->flags & FC_RP_FLAGS_RETRY))
+		exch_flags |= FCPIO_ICMND_SRFLAG_RETRY;
+
+	fnic_queue_wq_copy_desc_icmnd_16(wq, sc->request->tag,
+					 0, exch_flags, io_req->sgl_cnt,
+					 SCSI_SENSE_BUFFERSIZE,
+					 io_req->sgl_list_pa,
+					 io_req->sense_buf_pa,
+					 0, /* scsi cmd ref, always 0 */
+					 pri_tag, /* scsi pri and tag */
+					 flags,	/* command flags */
+					 sc->cmnd, scsi_bufflen(sc),
+					 fc_lun.scsi_lun, io_req->port_id,
+					 rport->maxframe_size, rp->r_a_tov,
+					 rp->e_d_tov);
+
+	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
+	return 0;
+}
+
+/*
+ * fnic_queuecommand
+ * Routine to send a scsi cdb
+ * Called with host_lock held and interrupts disabled.
+ */
+int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+{
+	struct fc_lport *lp;
+	struct fc_rport *rport;
+	struct fnic_io_req *io_req;
+	struct fnic *fnic;
+	struct vnic_wq_copy *wq;
+	int ret;
+	u32 sg_count;
+	unsigned long flags;
+	unsigned long ptr;
+
+	rport = starget_to_rport(scsi_target(sc->device));
+	ret = fc_remote_port_chkready(rport);
+	if (ret) {
+		sc->result = ret;
+		done(sc);
+		return 0;
+	}
+
+	lp = shost_priv(sc->device->host);
+	if (lp->state != LPORT_ST_READY || !(lp->link_up))
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	/*
+	 * Release host lock, use driver resource specific locks from here.
+	 * Don't re-enable interrupts in case they were disabled prior to the
+	 * caller disabling them.
+	 */
+	spin_unlock(lp->host->host_lock);
+
+	/* Get a new io_req for this SCSI IO */
+	fnic = lport_priv(lp);
+
+	io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
+	if (!io_req) {
+		ret = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	}
+	memset(io_req, 0, sizeof(*io_req));
+
+	/* Map the data buffer */
+	sg_count = scsi_dma_map(sc);
+	if (sg_count < 0) {
+		mempool_free(io_req, fnic->io_req_pool);
+		goto out;
+	}
+
+	/* Determine the type of scatter/gather list we need */
+	io_req->sgl_cnt = sg_count;
+	io_req->sgl_type = FNIC_SGL_CACHE_DFLT;
+	if (sg_count > FNIC_DFLT_SG_DESC_CNT)
+		io_req->sgl_type = FNIC_SGL_CACHE_MAX;
+
+	if (sg_count) {
+		io_req->sgl_list =
+			mempool_alloc(fnic->io_sgl_pool[io_req->sgl_type],
+				      GFP_ATOMIC | GFP_DMA);
+		if (!io_req->sgl_list) {
+			ret = SCSI_MLQUEUE_HOST_BUSY;
+			scsi_dma_unmap(sc);
+			mempool_free(io_req, fnic->io_req_pool);
+			goto out;
+		}
+
+		/* Cache sgl list allocated address before alignment */
+		io_req->sgl_list_alloc = io_req->sgl_list;
+		ptr = (unsigned long) io_req->sgl_list;
+		if (ptr % FNIC_SG_DESC_ALIGN) {
+			io_req->sgl_list = (struct host_sg_desc *)
+				(((unsigned long) ptr
+				  + FNIC_SG_DESC_ALIGN - 1)
+				 & ~(FNIC_SG_DESC_ALIGN - 1));
+		}
+	}
+
+	/* initialize rest of io_req */
+	io_req->port_id = rport->port_id;
+	CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING;
+	CMD_SP(sc) = (char *)io_req;
+	sc->scsi_done = done;
+
+	/* create copy wq desc and enqueue it */
+	wq = &fnic->wq_copy[0];
+	ret = fnic_queue_wq_copy_desc(fnic, wq, io_req, sc, sg_count);
+	if (ret) {
+		/*
+		 * In case another thread cancelled the request,
+		 * refetch the pointer under the lock.
+		 */
+		spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc);
+
+		spin_lock_irqsave(io_lock, flags);
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		CMD_SP(sc) = NULL;
+		CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
+		spin_unlock_irqrestore(io_lock, flags);
+		if (io_req) {
+			fnic_release_ioreq_buf(fnic, io_req, sc);
+			mempool_free(io_req, fnic->io_req_pool);
+		}
+	}
+out:
+	/* acquire host lock before returning to SCSI */
+	spin_lock(lp->host->host_lock);
+	return ret;
+}
+
+/*
+ * fnic_fcpio_fw_reset_cmpl_handler
+ * Routine to handle fw reset completion
+ */
+static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic,
+					    struct fcpio_fw_req *desc)
+{
+	u8 type;
+	u8 hdr_status;
+	struct fcpio_tag tag;
+	int ret = 0;
+	struct fc_frame *flogi;
+	unsigned long flags;
+
+	fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
+
+	/* Clean up all outstanding io requests */
+	fnic_cleanup_io(fnic, SCSI_NO_TAG);
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+
+	flogi = fnic->flogi;
+	fnic->flogi = NULL;
+
+	/* fnic should be in FC_TRANS_ETH_MODE */
+	if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) {
+		/* Check status of reset completion */
+		if (!hdr_status) {
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				      "reset cmpl success\n");
+			/* Ready to send flogi out */
+			fnic->state = FNIC_IN_ETH_MODE;
+		} else {
+			FNIC_SCSI_DBG(KERN_DEBUG,
+				      fnic->lport->host,
+				      "fnic fw_reset : failed %s\n",
+				      fnic_fcpio_status_to_str(hdr_status));
+
+			/*
+			 * Unable to change to eth mode, cannot send out flogi
+			 * Change state to fc mode, so that subsequent Flogi
+			 * requests from libFC will cause more attempts to
+			 * reset the firmware. Free the cached flogi
+			 */
+			fnic->state = FNIC_IN_FC_MODE;
+			ret = -1;
+		}
+	} else {
+		FNIC_SCSI_DBG(KERN_DEBUG,
+			      fnic->lport->host,
+			      "Unexpected state %s while processing"
+			      " reset cmpl\n", fnic_state_to_str(fnic->state));
+		ret = -1;
+	}
+
+	/* Thread removing device blocks till firmware reset is complete */
+	if (fnic->remove_wait)
+		complete(fnic->remove_wait);
+
+	/*
+	 * If fnic is being removed, or fw reset failed
+	 * free the flogi frame. Else, send it out
+	 */
+	if (fnic->remove_wait || ret) {
+		fnic->flogi_oxid = FC_XID_UNKNOWN;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		if (flogi)
+			dev_kfree_skb_irq(fp_skb(flogi));
+		goto reset_cmpl_handler_end;
+	}
+
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	if (flogi)
+		ret = fnic_send_frame(fnic, flogi);
+
+ reset_cmpl_handler_end:
+	return ret;
+}
+
+/*
+ * fnic_fcpio_flogi_reg_cmpl_handler
+ * Routine to handle flogi register completion
+ */
+static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic,
+					     struct fcpio_fw_req *desc)
+{
+	u8 type;
+	u8 hdr_status;
+	struct fcpio_tag tag;
+	int ret = 0;
+	struct fc_frame *flogi_resp = NULL;
+	unsigned long flags;
+	struct sk_buff *skb;
+
+	fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
+
+	/* Update fnic state based on status of flogi reg completion */
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+
+	flogi_resp = fnic->flogi_resp;
+	fnic->flogi_resp = NULL;
+
+	if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE) {
+
+		/* Check flogi registration completion status */
+		if (!hdr_status) {
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				      "flog reg succeeded\n");
+			fnic->state = FNIC_IN_FC_MODE;
+		} else {
+			FNIC_SCSI_DBG(KERN_DEBUG,
+				      fnic->lport->host,
+				      "fnic flogi reg :failed %s\n",
+				      fnic_fcpio_status_to_str(hdr_status));
+			fnic->state = FNIC_IN_ETH_MODE;
+			ret = -1;
+		}
+	} else {
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "Unexpected fnic state %s while"
+			      " processing flogi reg completion\n",
+			      fnic_state_to_str(fnic->state));
+		ret = -1;
+	}
+
+	/* Successful flogi reg cmpl, pass frame to LibFC */
+	if (!ret && flogi_resp) {
+		if (fnic->stop_rx_link_events) {
+			spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+			goto reg_cmpl_handler_end;
+		}
+		skb = (struct sk_buff *)flogi_resp;
+		/* Use fr_flags to indicate whether flogi resp or not */
+		fr_flags(flogi_resp) = 1;
+		fr_dev(flogi_resp) = fnic->lport;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+		skb_queue_tail(&fnic->frame_queue, skb);
+		queue_work(fnic_event_queue, &fnic->frame_work);
+
+	} else {
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		if (flogi_resp)
+			dev_kfree_skb_irq(fp_skb(flogi_resp));
+	}
+
+reg_cmpl_handler_end:
+	return ret;
+}
+
+static inline int is_ack_index_in_range(struct vnic_wq_copy *wq,
+					u16 request_out)
+{
+	if (wq->to_clean_index <= wq->to_use_index) {
+		/* out of range, stale request_out index */
+		if (request_out < wq->to_clean_index ||
+		    request_out >= wq->to_use_index)
+			return 0;
+	} else {
+		/* out of range, stale request_out index */
+		if (request_out < wq->to_clean_index &&
+		    request_out >= wq->to_use_index)
+			return 0;
+	}
+	/* request_out index is in range */
+	return 1;
+}
+
+
+/*
+ * Mark that ack received and store the Ack index. If there are multiple
+ * acks received before Tx thread cleans it up, the latest value will be
+ * used which is correct behavior. This state should be in the copy Wq
+ * instead of in the fnic
+ */
+static inline void fnic_fcpio_ack_handler(struct fnic *fnic,
+					  unsigned int cq_index,
+					  struct fcpio_fw_req *desc)
+{
+	struct vnic_wq_copy *wq;
+	u16 request_out = desc->u.ack.request_out;
+	unsigned long flags;
+
+	/* mark the ack state */
+	wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count];
+	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
+
+	if (is_ack_index_in_range(wq, request_out)) {
+		fnic->fw_ack_index[0] = request_out;
+		fnic->fw_ack_recd[0] = 1;
+	}
+	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+}
+
+/*
+ * fnic_fcpio_icmnd_cmpl_handler
+ * Routine to handle icmnd completions
+ */
+static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
+					 struct fcpio_fw_req *desc)
+{
+	u8 type;
+	u8 hdr_status;
+	struct fcpio_tag tag;
+	u32 id;
+	u64 xfer_len = 0;
+	struct fcpio_icmnd_cmpl *icmnd_cmpl;
+	struct fnic_io_req *io_req;
+	struct scsi_cmnd *sc;
+	unsigned long flags;
+	spinlock_t *io_lock;
+
+	/* Decode the cmpl description to get the io_req id */
+	fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
+	fcpio_tag_id_dec(&tag, &id);
+
+	if (id >= FNIC_MAX_IO_REQ)
+		return;
+
+	sc = scsi_host_find_tag(fnic->lport->host, id);
+	WARN_ON_ONCE(!sc);
+	if (!sc)
+		return;
+
+	io_lock = fnic_io_lock_hash(fnic, sc);
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	WARN_ON_ONCE(!io_req);
+	if (!io_req) {
+		spin_unlock_irqrestore(io_lock, flags);
+		return;
+	}
+
+	/* firmware completed the io */
+	io_req->io_completed = 1;
+
+	/*
+	 *  if SCSI-ML has already issued abort on this command,
+	 * ignore completion of the IO. The abts path will clean it up
+	 */
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+		spin_unlock_irqrestore(io_lock, flags);
+		return;
+	}
+
+	/* Mark the IO as complete */
+	CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
+
+	icmnd_cmpl = &desc->u.icmnd_cmpl;
+
+	switch (hdr_status) {
+	case FCPIO_SUCCESS:
+		sc->result = (DID_OK << 16) | icmnd_cmpl->scsi_status;
+		xfer_len = scsi_bufflen(sc);
+		scsi_set_resid(sc, icmnd_cmpl->residual);
+
+		if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER)
+			xfer_len -= icmnd_cmpl->residual;
+
+		/*
+		 * If queue_full, then try to reduce queue depth for all
+		 * LUNS on the target. Todo: this should be accompanied
+		 * by a periodic queue_depth rampup based on successful
+		 * IO completion.
+		 */
+		if (icmnd_cmpl->scsi_status == QUEUE_FULL) {
+			struct scsi_device *t_sdev;
+			int qd = 0;
+
+			shost_for_each_device(t_sdev, sc->device->host) {
+				if (t_sdev->id != sc->device->id)
+					continue;
+
+				if (t_sdev->queue_depth > 1) {
+					qd = scsi_track_queue_full
+						(t_sdev,
+						 t_sdev->queue_depth - 1);
+					if (qd == -1)
+						qd = t_sdev->host->cmd_per_lun;
+					shost_printk(KERN_INFO,
+						     fnic->lport->host,
+						     "scsi[%d:%d:%d:%d"
+						     "] queue full detected,"
+						     "new depth = %d\n",
+						     t_sdev->host->host_no,
+						     t_sdev->channel,
+						     t_sdev->id, t_sdev->lun,
+						     t_sdev->queue_depth);
+				}
+			}
+		}
+		break;
+
+	case FCPIO_TIMEOUT:          /* request was timed out */
+		sc->result = (DID_TIME_OUT << 16) | icmnd_cmpl->scsi_status;
+		break;
+
+	case FCPIO_ABORTED:          /* request was aborted */
+		sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
+		break;
+
+	case FCPIO_DATA_CNT_MISMATCH: /* recv/sent more/less data than exp. */
+		scsi_set_resid(sc, icmnd_cmpl->residual);
+		sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
+		break;
+
+	case FCPIO_OUT_OF_RESOURCE:  /* out of resources to complete request */
+		sc->result = (DID_REQUEUE << 16) | icmnd_cmpl->scsi_status;
+		break;
+	case FCPIO_INVALID_HEADER:   /* header contains invalid data */
+	case FCPIO_INVALID_PARAM:    /* some parameter in request invalid */
+	case FCPIO_REQ_NOT_SUPPORTED:/* request type is not supported */
+	case FCPIO_IO_NOT_FOUND:     /* requested I/O was not found */
+	case FCPIO_SGL_INVALID:      /* request was aborted due to sgl error */
+	case FCPIO_MSS_INVALID:      /* request was aborted due to mss error */
+	case FCPIO_FW_ERR:           /* request was terminated due fw error */
+	default:
+		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
+			     fnic_fcpio_status_to_str(hdr_status));
+		sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
+		break;
+	}
+
+	/* Break link with the SCSI command */
+	CMD_SP(sc) = NULL;
+
+	spin_unlock_irqrestore(io_lock, flags);
+
+	fnic_release_ioreq_buf(fnic, io_req, sc);
+
+	mempool_free(io_req, fnic->io_req_pool);
+
+	if (sc->sc_data_direction == DMA_FROM_DEVICE) {
+		fnic->lport->host_stats.fcp_input_requests++;
+		fnic->fcp_input_bytes += xfer_len;
+	} else if (sc->sc_data_direction == DMA_TO_DEVICE) {
+		fnic->lport->host_stats.fcp_output_requests++;
+		fnic->fcp_output_bytes += xfer_len;
+	} else
+		fnic->lport->host_stats.fcp_control_requests++;
+
+	/* Call SCSI completion function to complete the IO */
+	if (sc->scsi_done)
+		sc->scsi_done(sc);
+
+}
+
+/* fnic_fcpio_itmf_cmpl_handler
+ * Routine to handle itmf completions
+ */
+static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
+					struct fcpio_fw_req *desc)
+{
+	u8 type;
+	u8 hdr_status;
+	struct fcpio_tag tag;
+	u32 id;
+	struct scsi_cmnd *sc;
+	struct fnic_io_req *io_req;
+	unsigned long flags;
+	spinlock_t *io_lock;
+
+	fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
+	fcpio_tag_id_dec(&tag, &id);
+
+	if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ)
+		return;
+
+	sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK);
+	WARN_ON_ONCE(!sc);
+	if (!sc)
+		return;
+
+	io_lock = fnic_io_lock_hash(fnic, sc);
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	WARN_ON_ONCE(!io_req);
+	if (!io_req) {
+		spin_unlock_irqrestore(io_lock, flags);
+		return;
+	}
+
+	if (id & FNIC_TAG_ABORT) {
+		/* Completion of abort cmd */
+		if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) {
+			/* This is a late completion. Ignore it */
+			spin_unlock_irqrestore(io_lock, flags);
+			return;
+		}
+		CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
+		CMD_ABTS_STATUS(sc) = hdr_status;
+
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "abts cmpl recd. id %d status %s\n",
+			      (int)(id & FNIC_TAG_MASK),
+			      fnic_fcpio_status_to_str(hdr_status));
+
+		/*
+		 * If scsi_eh thread is blocked waiting for abts to complete,
+		 * signal completion to it. IO will be cleaned in the thread
+		 * else clean it in this context
+		 */
+		if (io_req->abts_done) {
+			complete(io_req->abts_done);
+			spin_unlock_irqrestore(io_lock, flags);
+		} else {
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				      "abts cmpl, completing IO\n");
+			CMD_SP(sc) = NULL;
+			sc->result = (DID_ERROR << 16);
+
+			spin_unlock_irqrestore(io_lock, flags);
+
+			fnic_release_ioreq_buf(fnic, io_req, sc);
+			mempool_free(io_req, fnic->io_req_pool);
+			if (sc->scsi_done)
+				sc->scsi_done(sc);
+		}
+
+	} else if (id & FNIC_TAG_DEV_RST) {
+		/* Completion of device reset */
+		CMD_LR_STATUS(sc) = hdr_status;
+		CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "dev reset cmpl recd. id %d status %s\n",
+			      (int)(id & FNIC_TAG_MASK),
+			      fnic_fcpio_status_to_str(hdr_status));
+		if (io_req->dr_done)
+			complete(io_req->dr_done);
+		spin_unlock_irqrestore(io_lock, flags);
+
+	} else {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			     "Unexpected itmf io state %s tag %x\n",
+			     fnic_ioreq_state_to_str(CMD_STATE(sc)), id);
+		spin_unlock_irqrestore(io_lock, flags);
+	}
+
+}
+
+/*
+ * fnic_fcpio_cmpl_handler
+ * Routine to service the cq for wq_copy
+ */
+static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev,
+				   unsigned int cq_index,
+				   struct fcpio_fw_req *desc)
+{
+	struct fnic *fnic = vnic_dev_priv(vdev);
+	int ret = 0;
+
+	switch (desc->hdr.type) {
+	case FCPIO_ACK: /* fw copied copy wq desc to its queue */
+		fnic_fcpio_ack_handler(fnic, cq_index, desc);
+		break;
+
+	case FCPIO_ICMND_CMPL: /* fw completed a command */
+		fnic_fcpio_icmnd_cmpl_handler(fnic, desc);
+		break;
+
+	case FCPIO_ITMF_CMPL: /* fw completed itmf (abort cmd, lun reset)*/
+		fnic_fcpio_itmf_cmpl_handler(fnic, desc);
+		break;
+
+	case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */
+		ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc);
+		break;
+
+	case FCPIO_RESET_CMPL: /* fw completed reset */
+		ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc);
+		break;
+
+	default:
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "firmware completion type %d\n",
+			      desc->hdr.type);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * fnic_wq_copy_cmpl_handler
+ * Routine to process wq copy
+ */
+int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
+{
+	unsigned int wq_work_done = 0;
+	unsigned int i, cq_index;
+	unsigned int cur_work_done;
+
+	for (i = 0; i < fnic->wq_copy_count; i++) {
+		cq_index = i + fnic->raw_wq_count + fnic->rq_count;
+		cur_work_done = vnic_cq_copy_service(&fnic->cq[cq_index],
+						     fnic_fcpio_cmpl_handler,
+						     copy_work_to_do);
+		wq_work_done += cur_work_done;
+	}
+	return wq_work_done;
+}
+
+static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
+{
+	unsigned int i;
+	struct fnic_io_req *io_req;
+	unsigned long flags = 0;
+	struct scsi_cmnd *sc;
+	spinlock_t *io_lock;
+
+	for (i = 0; i < FNIC_MAX_IO_REQ; i++) {
+		if (i == exclude_id)
+			continue;
+
+		sc = scsi_host_find_tag(fnic->lport->host, i);
+		if (!sc)
+			continue;
+
+		io_lock = fnic_io_lock_hash(fnic, sc);
+		spin_lock_irqsave(io_lock, flags);
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		if (!io_req) {
+			spin_unlock_irqrestore(io_lock, flags);
+			goto cleanup_scsi_cmd;
+		}
+
+		CMD_SP(sc) = NULL;
+
+		spin_unlock_irqrestore(io_lock, flags);
+
+		/*
+		 * If there is a scsi_cmnd associated with this io_req, then
+		 * free the corresponding state
+		 */
+		fnic_release_ioreq_buf(fnic, io_req, sc);
+		mempool_free(io_req, fnic->io_req_pool);
+
+cleanup_scsi_cmd:
+		sc->result = DID_TRANSPORT_DISRUPTED << 16;
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_cleanup_io:"
+			      " DID_TRANSPORT_DISRUPTED\n");
+
+		/* Complete the command to SCSI */
+		if (sc->scsi_done)
+			sc->scsi_done(sc);
+	}
+}
+
+void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
+				  struct fcpio_host_req *desc)
+{
+	u32 id;
+	struct fnic *fnic = vnic_dev_priv(wq->vdev);
+	struct fnic_io_req *io_req;
+	struct scsi_cmnd *sc;
+	unsigned long flags;
+	spinlock_t *io_lock;
+
+	/* get the tag reference */
+	fcpio_tag_id_dec(&desc->hdr.tag, &id);
+	id &= FNIC_TAG_MASK;
+
+	if (id >= FNIC_MAX_IO_REQ)
+		return;
+
+	sc = scsi_host_find_tag(fnic->lport->host, id);
+	if (!sc)
+		return;
+
+	io_lock = fnic_io_lock_hash(fnic, sc);
+	spin_lock_irqsave(io_lock, flags);
+
+	/* Get the IO context which this desc refers to */
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+	/* fnic interrupts are turned off by now */
+
+	if (!io_req) {
+		spin_unlock_irqrestore(io_lock, flags);
+		goto wq_copy_cleanup_scsi_cmd;
+	}
+
+	CMD_SP(sc) = NULL;
+
+	spin_unlock_irqrestore(io_lock, flags);
+
+	fnic_release_ioreq_buf(fnic, io_req, sc);
+	mempool_free(io_req, fnic->io_req_pool);
+
+wq_copy_cleanup_scsi_cmd:
+	sc->result = DID_NO_CONNECT << 16;
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:"
+		      " DID_NO_CONNECT\n");
+
+	if (sc->scsi_done)
+		sc->scsi_done(sc);
+}
+
+static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
+					  u32 task_req, u8 *fc_lun,
+					  struct fnic_io_req *io_req)
+{
+	struct vnic_wq_copy *wq = &fnic->wq_copy[0];
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
+
+	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
+		free_wq_copy_descs(fnic, wq);
+
+	if (!vnic_wq_copy_desc_avail(wq)) {
+		spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+		return 1;
+	}
+	fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT,
+				     0, task_req, tag, fc_lun, io_req->port_id,
+				     fnic->config.ra_tov, fnic->config.ed_tov);
+
+	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+	return 0;
+}
+
+void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+{
+	int tag;
+	struct fnic_io_req *io_req;
+	spinlock_t *io_lock;
+	unsigned long flags;
+	struct scsi_cmnd *sc;
+	struct scsi_lun fc_lun;
+	enum fnic_ioreq_state old_ioreq_state;
+
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		      fnic->lport->host,
+		      "fnic_rport_reset_exch called portid 0x%06x\n",
+		      port_id);
+
+	if (fnic->in_remove)
+		return;
+
+	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+		sc = scsi_host_find_tag(fnic->lport->host, tag);
+		if (!sc)
+			continue;
+
+		io_lock = fnic_io_lock_hash(fnic, sc);
+		spin_lock_irqsave(io_lock, flags);
+
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+		if (!io_req || io_req->port_id != port_id) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+
+		/*
+		 * Found IO that is still pending with firmware and
+		 * belongs to rport that went away
+		 */
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+		old_ioreq_state = CMD_STATE(sc);
+		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+
+		BUG_ON(io_req->abts_done);
+
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "fnic_rport_reset_exch: Issuing abts\n");
+
+		spin_unlock_irqrestore(io_lock, flags);
+
+		/* Now queue the abort command to firmware */
+		int_to_scsilun(sc->device->lun, &fc_lun);
+
+		if (fnic_queue_abort_io_req(fnic, tag,
+					    FCPIO_ITMF_ABT_TASK_TERM,
+					    fc_lun.scsi_lun, io_req)) {
+			/*
+			 * Revert the cmd state back to old state, if
+			 * it hasnt changed in between. This cmd will get
+			 * aborted later by scsi_eh, or cleaned up during
+			 * lun reset
+			 */
+			io_lock = fnic_io_lock_hash(fnic, sc);
+
+			spin_lock_irqsave(io_lock, flags);
+			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+				CMD_STATE(sc) = old_ioreq_state;
+			spin_unlock_irqrestore(io_lock, flags);
+		}
+	}
+
+}
+
+void fnic_terminate_rport_io(struct fc_rport *rport)
+{
+	int tag;
+	struct fnic_io_req *io_req;
+	spinlock_t *io_lock;
+	unsigned long flags;
+	struct scsi_cmnd *sc;
+	struct scsi_lun fc_lun;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct fnic *fnic = lport_priv(lport);
+	struct fc_rport *cmd_rport;
+	enum fnic_ioreq_state old_ioreq_state;
+
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		      fnic->lport->host, "fnic_terminate_rport_io called"
+		      " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n",
+		      rport->port_name, rport->node_name,
+		      rport->port_id);
+
+	if (fnic->in_remove)
+		return;
+
+	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+		sc = scsi_host_find_tag(fnic->lport->host, tag);
+		if (!sc)
+			continue;
+
+		cmd_rport = starget_to_rport(scsi_target(sc->device));
+		if (rport != cmd_rport)
+			continue;
+
+		io_lock = fnic_io_lock_hash(fnic, sc);
+		spin_lock_irqsave(io_lock, flags);
+
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+		if (!io_req || rport != cmd_rport) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+
+		/*
+		 * Found IO that is still pending with firmware and
+		 * belongs to rport that went away
+		 */
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+		old_ioreq_state = CMD_STATE(sc);
+		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+
+		BUG_ON(io_req->abts_done);
+
+		FNIC_SCSI_DBG(KERN_DEBUG,
+			      fnic->lport->host,
+			      "fnic_terminate_rport_io: Issuing abts\n");
+
+		spin_unlock_irqrestore(io_lock, flags);
+
+		/* Now queue the abort command to firmware */
+		int_to_scsilun(sc->device->lun, &fc_lun);
+
+		if (fnic_queue_abort_io_req(fnic, tag,
+					    FCPIO_ITMF_ABT_TASK_TERM,
+					    fc_lun.scsi_lun, io_req)) {
+			/*
+			 * Revert the cmd state back to old state, if
+			 * it hasnt changed in between. This cmd will get
+			 * aborted later by scsi_eh, or cleaned up during
+			 * lun reset
+			 */
+			io_lock = fnic_io_lock_hash(fnic, sc);
+
+			spin_lock_irqsave(io_lock, flags);
+			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+				CMD_STATE(sc) = old_ioreq_state;
+			spin_unlock_irqrestore(io_lock, flags);
+		}
+	}
+
+}
+
+static void fnic_block_error_handler(struct scsi_cmnd *sc)
+{
+	struct Scsi_Host *shost = sc->device->host;
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
+	unsigned long flags;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	while (rport->port_state == FC_PORTSTATE_BLOCKED) {
+		spin_unlock_irqrestore(shost->host_lock, flags);
+		msleep(1000);
+		spin_lock_irqsave(shost->host_lock, flags);
+	}
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+}
+
+/*
+ * This function is exported to SCSI for sending abort cmnds.
+ * A SCSI IO is represented by a io_req in the driver.
+ * The ioreq is linked to the SCSI Cmd, thus a link with the ULP's IO.
+ */
+int fnic_abort_cmd(struct scsi_cmnd *sc)
+{
+	struct fc_lport *lp;
+	struct fnic *fnic;
+	struct fnic_io_req *io_req;
+	struct fc_rport *rport;
+	spinlock_t *io_lock;
+	unsigned long flags;
+	int ret = SUCCESS;
+	u32 task_req;
+	struct scsi_lun fc_lun;
+	DECLARE_COMPLETION_ONSTACK(tm_done);
+
+	/* Wait for rport to unblock */
+	fnic_block_error_handler(sc);
+
+	/* Get local-port, check ready and link up */
+	lp = shost_priv(sc->device->host);
+
+	fnic = lport_priv(lp);
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		      fnic->lport->host,
+		      "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n",
+		      (starget_to_rport(scsi_target(sc->device)))->port_id,
+		      sc->device->lun, sc->request->tag);
+
+	if (lp->state != LPORT_ST_READY || !(lp->link_up)) {
+		ret = FAILED;
+		goto fnic_abort_cmd_end;
+	}
+
+	/*
+	 * Avoid a race between SCSI issuing the abort and the device
+	 * completing the command.
+	 *
+	 * If the command is already completed by the fw cmpl code,
+	 * we just return SUCCESS from here. This means that the abort
+	 * succeeded. In the SCSI ML, since the timeout for command has
+	 * happened, the completion wont actually complete the command
+	 * and it will be considered as an aborted command
+	 *
+	 * The CMD_SP will not be cleared except while holding io_req_lock.
+	 */
+	io_lock = fnic_io_lock_hash(fnic, sc);
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if (!io_req) {
+		spin_unlock_irqrestore(io_lock, flags);
+		goto fnic_abort_cmd_end;
+	}
+
+	io_req->abts_done = &tm_done;
+
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+		spin_unlock_irqrestore(io_lock, flags);
+		goto wait_pending;
+	}
+	/*
+	 * Command is still pending, need to abort it
+	 * If the firmware completes the command after this point,
+	 * the completion wont be done till mid-layer, since abort
+	 * has already started.
+	 */
+	CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+	CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+
+	spin_unlock_irqrestore(io_lock, flags);
+
+	/*
+	 * Check readiness of the remote port. If the path to remote
+	 * port is up, then send abts to the remote port to terminate
+	 * the IO. Else, just locally terminate the IO in the firmware
+	 */
+	rport = starget_to_rport(scsi_target(sc->device));
+	if (fc_remote_port_chkready(rport) == 0)
+		task_req = FCPIO_ITMF_ABT_TASK;
+	else
+		task_req = FCPIO_ITMF_ABT_TASK_TERM;
+
+	/* Now queue the abort command to firmware */
+	int_to_scsilun(sc->device->lun, &fc_lun);
+
+	if (fnic_queue_abort_io_req(fnic, sc->request->tag, task_req,
+				    fc_lun.scsi_lun, io_req)) {
+		spin_lock_irqsave(io_lock, flags);
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		if (io_req)
+			io_req->abts_done = NULL;
+		spin_unlock_irqrestore(io_lock, flags);
+		ret = FAILED;
+		goto fnic_abort_cmd_end;
+	}
+
+	/*
+	 * We queued an abort IO, wait for its completion.
+	 * Once the firmware completes the abort command, it will
+	 * wake up this thread.
+	 */
+ wait_pending:
+	wait_for_completion_timeout(&tm_done,
+				    msecs_to_jiffies
+				    (2 * fnic->config.ra_tov +
+				     fnic->config.ed_tov));
+
+	/* Check the abort status */
+	spin_lock_irqsave(io_lock, flags);
+
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if (!io_req) {
+		spin_unlock_irqrestore(io_lock, flags);
+		ret = FAILED;
+		goto fnic_abort_cmd_end;
+	}
+	io_req->abts_done = NULL;
+
+	/* fw did not complete abort, timed out */
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+		spin_unlock_irqrestore(io_lock, flags);
+		ret = FAILED;
+		goto fnic_abort_cmd_end;
+	}
+
+	/*
+	 * firmware completed the abort, check the status,
+	 * free the io_req irrespective of failure or success
+	 */
+	if (CMD_ABTS_STATUS(sc) != FCPIO_SUCCESS)
+		ret = FAILED;
+
+	CMD_SP(sc) = NULL;
+
+	spin_unlock_irqrestore(io_lock, flags);
+
+	fnic_release_ioreq_buf(fnic, io_req, sc);
+	mempool_free(io_req, fnic->io_req_pool);
+
+fnic_abort_cmd_end:
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "Returning from abort cmd %s\n",
+		      (ret == SUCCESS) ?
+		      "SUCCESS" : "FAILED");
+	return ret;
+}
+
+static inline int fnic_queue_dr_io_req(struct fnic *fnic,
+				       struct scsi_cmnd *sc,
+				       struct fnic_io_req *io_req)
+{
+	struct vnic_wq_copy *wq = &fnic->wq_copy[0];
+	struct scsi_lun fc_lun;
+	int ret = 0;
+	unsigned long intr_flags;
+
+	spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags);
+
+	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
+		free_wq_copy_descs(fnic, wq);
+
+	if (!vnic_wq_copy_desc_avail(wq)) {
+		ret = -EAGAIN;
+		goto lr_io_req_end;
+	}
+
+	/* fill in the lun info */
+	int_to_scsilun(sc->device->lun, &fc_lun);
+
+	fnic_queue_wq_copy_desc_itmf(wq, sc->request->tag | FNIC_TAG_DEV_RST,
+				     0, FCPIO_ITMF_LUN_RESET, SCSI_NO_TAG,
+				     fc_lun.scsi_lun, io_req->port_id,
+				     fnic->config.ra_tov, fnic->config.ed_tov);
+
+lr_io_req_end:
+	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
+
+	return ret;
+}
+
+/*
+ * Clean up any pending aborts on the lun
+ * For each outstanding IO on this lun, whose abort is not completed by fw,
+ * issue a local abort. Wait for abort to complete. Return 0 if all commands
+ * successfully aborted, 1 otherwise
+ */
+static int fnic_clean_pending_aborts(struct fnic *fnic,
+				     struct scsi_cmnd *lr_sc)
+{
+	int tag;
+	struct fnic_io_req *io_req;
+	spinlock_t *io_lock;
+	unsigned long flags;
+	int ret = 0;
+	struct scsi_cmnd *sc;
+	struct fc_rport *rport;
+	struct scsi_lun fc_lun;
+	struct scsi_device *lun_dev = lr_sc->device;
+	DECLARE_COMPLETION_ONSTACK(tm_done);
+
+	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+		sc = scsi_host_find_tag(fnic->lport->host, tag);
+		/*
+		 * ignore this lun reset cmd or cmds that do not belong to
+		 * this lun
+		 */
+		if (!sc || sc == lr_sc || sc->device != lun_dev)
+			continue;
+
+		io_lock = fnic_io_lock_hash(fnic, sc);
+		spin_lock_irqsave(io_lock, flags);
+
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+		if (!io_req || sc->device != lun_dev) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+
+		/*
+		 * Found IO that is still pending with firmware and
+		 * belongs to the LUN that we are resetting
+		 */
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "Found IO in %s on lun\n",
+			      fnic_ioreq_state_to_str(CMD_STATE(sc)));
+
+		BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING);
+
+		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+		io_req->abts_done = &tm_done;
+		spin_unlock_irqrestore(io_lock, flags);
+
+		/* Now queue the abort command to firmware */
+		int_to_scsilun(sc->device->lun, &fc_lun);
+		rport = starget_to_rport(scsi_target(sc->device));
+
+		if (fnic_queue_abort_io_req(fnic, tag,
+					    FCPIO_ITMF_ABT_TASK_TERM,
+					    fc_lun.scsi_lun, io_req)) {
+			spin_lock_irqsave(io_lock, flags);
+			io_req = (struct fnic_io_req *)CMD_SP(sc);
+			if (io_req)
+				io_req->abts_done = NULL;
+			spin_unlock_irqrestore(io_lock, flags);
+			ret = 1;
+			goto clean_pending_aborts_end;
+		}
+
+		wait_for_completion_timeout(&tm_done,
+					    msecs_to_jiffies
+					    (fnic->config.ed_tov));
+
+		/* Recheck cmd state to check if it is now aborted */
+		spin_lock_irqsave(io_lock, flags);
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		if (!io_req) {
+			spin_unlock_irqrestore(io_lock, flags);
+			ret = 1;
+			goto clean_pending_aborts_end;
+		}
+
+		io_req->abts_done = NULL;
+
+		/* if abort is still pending with fw, fail */
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+			spin_unlock_irqrestore(io_lock, flags);
+			ret = 1;
+			goto clean_pending_aborts_end;
+		}
+		CMD_SP(sc) = NULL;
+		spin_unlock_irqrestore(io_lock, flags);
+
+		fnic_release_ioreq_buf(fnic, io_req, sc);
+		mempool_free(io_req, fnic->io_req_pool);
+	}
+
+clean_pending_aborts_end:
+	return ret;
+}
+
+/*
+ * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN
+ * fail to get aborted. It calls driver's eh_device_reset with a SCSI command
+ * on the LUN.
+ */
+int fnic_device_reset(struct scsi_cmnd *sc)
+{
+	struct fc_lport *lp;
+	struct fnic *fnic;
+	struct fnic_io_req *io_req;
+	struct fc_rport *rport;
+	int status;
+	int ret = FAILED;
+	spinlock_t *io_lock;
+	unsigned long flags;
+	DECLARE_COMPLETION_ONSTACK(tm_done);
+
+	/* Wait for rport to unblock */
+	fnic_block_error_handler(sc);
+
+	/* Get local-port, check ready and link up */
+	lp = shost_priv(sc->device->host);
+
+	fnic = lport_priv(lp);
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		      fnic->lport->host,
+		      "Device reset called FCID 0x%x, LUN 0x%x\n",
+		      (starget_to_rport(scsi_target(sc->device)))->port_id,
+		      sc->device->lun);
+
+
+	if (lp->state != LPORT_ST_READY || !(lp->link_up))
+		goto fnic_device_reset_end;
+
+	/* Check if remote port up */
+	rport = starget_to_rport(scsi_target(sc->device));
+	if (fc_remote_port_chkready(rport))
+		goto fnic_device_reset_end;
+
+	io_lock = fnic_io_lock_hash(fnic, sc);
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+	/*
+	 * If there is a io_req attached to this command, then use it,
+	 * else allocate a new one.
+	 */
+	if (!io_req) {
+		io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
+		if (!io_req) {
+			spin_unlock_irqrestore(io_lock, flags);
+			goto fnic_device_reset_end;
+		}
+		memset(io_req, 0, sizeof(*io_req));
+		io_req->port_id = rport->port_id;
+		CMD_SP(sc) = (char *)io_req;
+	}
+	io_req->dr_done = &tm_done;
+	CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING;
+	CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE;
+	spin_unlock_irqrestore(io_lock, flags);
+
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n",
+		      sc->request->tag);
+
+	/*
+	 * issue the device reset, if enqueue failed, clean up the ioreq
+	 * and break assoc with scsi cmd
+	 */
+	if (fnic_queue_dr_io_req(fnic, sc, io_req)) {
+		spin_lock_irqsave(io_lock, flags);
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		if (io_req)
+			io_req->dr_done = NULL;
+		goto fnic_device_reset_clean;
+	}
+
+	/*
+	 * Wait on the local completion for LUN reset.  The io_req may be
+	 * freed while we wait since we hold no lock.
+	 */
+	wait_for_completion_timeout(&tm_done,
+				    msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT));
+
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if (!io_req) {
+		spin_unlock_irqrestore(io_lock, flags);
+		goto fnic_device_reset_end;
+	}
+	io_req->dr_done = NULL;
+
+	status = CMD_LR_STATUS(sc);
+	spin_unlock_irqrestore(io_lock, flags);
+
+	/*
+	 * If lun reset not completed, bail out with failed. io_req
+	 * gets cleaned up during higher levels of EH
+	 */
+	if (status == FCPIO_INVALID_CODE) {
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "Device reset timed out\n");
+		goto fnic_device_reset_end;
+	}
+
+	/* Completed, but not successful, clean up the io_req, return fail */
+	if (status != FCPIO_SUCCESS) {
+		spin_lock_irqsave(io_lock, flags);
+		FNIC_SCSI_DBG(KERN_DEBUG,
+			      fnic->lport->host,
+			      "Device reset completed - failed\n");
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		goto fnic_device_reset_clean;
+	}
+
+	/*
+	 * Clean up any aborts on this lun that have still not
+	 * completed. If any of these fail, then LUN reset fails.
+	 * clean_pending_aborts cleans all cmds on this lun except
+	 * the lun reset cmd. If all cmds get cleaned, the lun reset
+	 * succeeds
+	 */
+	if (fnic_clean_pending_aborts(fnic, sc)) {
+		spin_lock_irqsave(io_lock, flags);
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "Device reset failed"
+			      " since could not abort all IOs\n");
+		goto fnic_device_reset_clean;
+	}
+
+	/* Clean lun reset command */
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if (io_req)
+		/* Completed, and successful */
+		ret = SUCCESS;
+
+fnic_device_reset_clean:
+	if (io_req)
+		CMD_SP(sc) = NULL;
+
+	spin_unlock_irqrestore(io_lock, flags);
+
+	if (io_req) {
+		fnic_release_ioreq_buf(fnic, io_req, sc);
+		mempool_free(io_req, fnic->io_req_pool);
+	}
+
+fnic_device_reset_end:
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "Returning from device reset %s\n",
+		      (ret == SUCCESS) ?
+		      "SUCCESS" : "FAILED");
+	return ret;
+}
+
+/* Clean up all IOs, clean up libFC local port */
+int fnic_reset(struct Scsi_Host *shost)
+{
+	struct fc_lport *lp;
+	struct fnic *fnic;
+	int ret = SUCCESS;
+
+	lp = shost_priv(shost);
+	fnic = lport_priv(lp);
+
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "fnic_reset called\n");
+
+	/*
+	 * Reset local port, this will clean up libFC exchanges,
+	 * reset remote port sessions, and if link is up, begin flogi
+	 */
+	if (lp->tt.lport_reset(lp))
+		ret = FAILED;
+
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "Returning from fnic reset %s\n",
+		      (ret == SUCCESS) ?
+		      "SUCCESS" : "FAILED");
+
+	return ret;
+}
+
+/*
+ * SCSI Error handling calls driver's eh_host_reset if all prior
+ * error handling levels return FAILED. If host reset completes
+ * successfully, and if link is up, then Fabric login begins.
+ *
+ * Host Reset is the highest level of error recovery. If this fails, then
+ * host is offlined by SCSI.
+ *
+ */
+int fnic_host_reset(struct scsi_cmnd *sc)
+{
+	int ret;
+	unsigned long wait_host_tmo;
+	struct Scsi_Host *shost = sc->device->host;
+	struct fc_lport *lp = shost_priv(shost);
+
+	/*
+	 * If fnic_reset is successful, wait for fabric login to complete
+	 * scsi-ml tries to send a TUR to every device if host reset is
+	 * successful, so before returning to scsi, fabric should be up
+	 */
+	ret = fnic_reset(shost);
+	if (ret == SUCCESS) {
+		wait_host_tmo = jiffies + FNIC_HOST_RESET_SETTLE_TIME * HZ;
+		ret = FAILED;
+		while (time_before(jiffies, wait_host_tmo)) {
+			if ((lp->state == LPORT_ST_READY) &&
+			    (lp->link_up)) {
+				ret = SUCCESS;
+				break;
+			}
+			ssleep(1);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * This fxn is called from libFC when host is removed
+ */
+void fnic_scsi_abort_io(struct fc_lport *lp)
+{
+	int err = 0;
+	unsigned long flags;
+	enum fnic_state old_state;
+	struct fnic *fnic = lport_priv(lp);
+	DECLARE_COMPLETION_ONSTACK(remove_wait);
+
+	/* Issue firmware reset for fnic, wait for reset to complete */
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	fnic->remove_wait = &remove_wait;
+	old_state = fnic->state;
+	fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
+	vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr);
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	err = fnic_fw_reset_handler(fnic);
+	if (err) {
+		spin_lock_irqsave(&fnic->fnic_lock, flags);
+		if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)
+			fnic->state = old_state;
+		fnic->remove_wait = NULL;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		return;
+	}
+
+	/* Wait for firmware reset to complete */
+	wait_for_completion_timeout(&remove_wait,
+				    msecs_to_jiffies(FNIC_RMDEVICE_TIMEOUT));
+
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	fnic->remove_wait = NULL;
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "fnic_scsi_abort_io %s\n",
+		      (fnic->state == FNIC_IN_ETH_MODE) ?
+		      "SUCCESS" : "FAILED");
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+}
+
+/*
+ * This fxn called from libFC to clean up driver IO state on link down
+ */
+void fnic_scsi_cleanup(struct fc_lport *lp)
+{
+	unsigned long flags;
+	enum fnic_state old_state;
+	struct fnic *fnic = lport_priv(lp);
+
+	/* issue fw reset */
+	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	old_state = fnic->state;
+	fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
+	vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr);
+	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+	if (fnic_fw_reset_handler(fnic)) {
+		spin_lock_irqsave(&fnic->fnic_lock, flags);
+		if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)
+			fnic->state = old_state;
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+	}
+
+}
+
+void fnic_empty_scsi_cleanup(struct fc_lport *lp)
+{
+}
+
+void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
+{
+	struct fnic *fnic = lport_priv(lp);
+
+	/* Non-zero sid, nothing to do */
+	if (sid)
+		goto call_fc_exch_mgr_reset;
+
+	if (did) {
+		fnic_rport_exch_reset(fnic, did);
+		goto call_fc_exch_mgr_reset;
+	}
+
+	/*
+	 * sid = 0, did = 0
+	 * link down or device being removed
+	 */
+	if (!fnic->in_remove)
+		fnic_scsi_cleanup(lp);
+	else
+		fnic_scsi_abort_io(lp);
+
+	/* call libFC exch mgr reset to reset its exchanges */
+call_fc_exch_mgr_reset:
+	fc_exch_mgr_reset(lp, sid, did);
+
+}
diff --git a/drivers/scsi/fnic/rq_enet_desc.h b/drivers/scsi/fnic/rq_enet_desc.h
new file mode 100644
index 0000000..92e80ae
--- /dev/null
+++ b/drivers/scsi/fnic/rq_enet_desc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _RQ_ENET_DESC_H_
+#define _RQ_ENET_DESC_H_
+
+/* Ethernet receive queue descriptor: 16B */
+struct rq_enet_desc {
+	__le64 address;
+	__le16 length_type;
+	u8 reserved[6];
+};
+
+enum rq_enet_type_types {
+	RQ_ENET_TYPE_ONLY_SOP = 0,
+	RQ_ENET_TYPE_NOT_SOP = 1,
+	RQ_ENET_TYPE_RESV2 = 2,
+	RQ_ENET_TYPE_RESV3 = 3,
+};
+
+#define RQ_ENET_ADDR_BITS		64
+#define RQ_ENET_LEN_BITS		14
+#define RQ_ENET_LEN_MASK		((1 << RQ_ENET_LEN_BITS) - 1)
+#define RQ_ENET_TYPE_BITS		2
+#define RQ_ENET_TYPE_MASK		((1 << RQ_ENET_TYPE_BITS) - 1)
+
+static inline void rq_enet_desc_enc(struct rq_enet_desc *desc,
+	u64 address, u8 type, u16 length)
+{
+	desc->address = cpu_to_le64(address);
+	desc->length_type = cpu_to_le16((length & RQ_ENET_LEN_MASK) |
+		((type & RQ_ENET_TYPE_MASK) << RQ_ENET_LEN_BITS));
+}
+
+static inline void rq_enet_desc_dec(struct rq_enet_desc *desc,
+	u64 *address, u8 *type, u16 *length)
+{
+	*address = le64_to_cpu(desc->address);
+	*length = le16_to_cpu(desc->length_type) & RQ_ENET_LEN_MASK;
+	*type = (u8)((le16_to_cpu(desc->length_type) >> RQ_ENET_LEN_BITS) &
+		RQ_ENET_TYPE_MASK);
+}
+
+#endif /* _RQ_ENET_DESC_H_ */
diff --git a/drivers/scsi/fnic/vnic_cq.c b/drivers/scsi/fnic/vnic_cq.c
new file mode 100644
index 0000000..c5db32e
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_cq.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include "vnic_dev.h"
+#include "vnic_cq.h"
+
+void vnic_cq_free(struct vnic_cq *cq)
+{
+	vnic_dev_free_desc_ring(cq->vdev, &cq->ring);
+
+	cq->ctrl = NULL;
+}
+
+int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index,
+	unsigned int desc_count, unsigned int desc_size)
+{
+	int err;
+
+	cq->index = index;
+	cq->vdev = vdev;
+
+	cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index);
+	if (!cq->ctrl) {
+		printk(KERN_ERR "Failed to hook CQ[%d] resource\n", index);
+		return -EINVAL;
+	}
+
+	err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable,
+	unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail,
+	unsigned int cq_tail_color, unsigned int interrupt_enable,
+	unsigned int cq_entry_enable, unsigned int cq_message_enable,
+	unsigned int interrupt_offset, u64 cq_message_addr)
+{
+	u64 paddr;
+
+	paddr = (u64)cq->ring.base_addr | VNIC_PADDR_TARGET;
+	writeq(paddr, &cq->ctrl->ring_base);
+	iowrite32(cq->ring.desc_count, &cq->ctrl->ring_size);
+	iowrite32(flow_control_enable, &cq->ctrl->flow_control_enable);
+	iowrite32(color_enable, &cq->ctrl->color_enable);
+	iowrite32(cq_head, &cq->ctrl->cq_head);
+	iowrite32(cq_tail, &cq->ctrl->cq_tail);
+	iowrite32(cq_tail_color, &cq->ctrl->cq_tail_color);
+	iowrite32(interrupt_enable, &cq->ctrl->interrupt_enable);
+	iowrite32(cq_entry_enable, &cq->ctrl->cq_entry_enable);
+	iowrite32(cq_message_enable, &cq->ctrl->cq_message_enable);
+	iowrite32(interrupt_offset, &cq->ctrl->interrupt_offset);
+	writeq(cq_message_addr, &cq->ctrl->cq_message_addr);
+}
+
+void vnic_cq_clean(struct vnic_cq *cq)
+{
+	cq->to_clean = 0;
+	cq->last_color = 0;
+
+	iowrite32(0, &cq->ctrl->cq_head);
+	iowrite32(0, &cq->ctrl->cq_tail);
+	iowrite32(1, &cq->ctrl->cq_tail_color);
+
+	vnic_dev_clear_desc_ring(&cq->ring);
+}
diff --git a/drivers/scsi/fnic/vnic_cq.h b/drivers/scsi/fnic/vnic_cq.h
new file mode 100644
index 0000000..4ede680
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_cq.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_CQ_H_
+#define _VNIC_CQ_H_
+
+#include "cq_desc.h"
+#include "vnic_dev.h"
+
+/*
+ * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
+ * Driver) when both are built with CONFIG options =y
+ */
+#define vnic_cq_service fnic_cq_service
+#define vnic_cq_free fnic_cq_free
+#define vnic_cq_alloc fnic_cq_alloc
+#define vnic_cq_init fnic_cq_init
+#define vnic_cq_clean fnic_cq_clean
+
+/* Completion queue control */
+struct vnic_cq_ctrl {
+	u64 ring_base;			/* 0x00 */
+	u32 ring_size;			/* 0x08 */
+	u32 pad0;
+	u32 flow_control_enable;	/* 0x10 */
+	u32 pad1;
+	u32 color_enable;		/* 0x18 */
+	u32 pad2;
+	u32 cq_head;			/* 0x20 */
+	u32 pad3;
+	u32 cq_tail;			/* 0x28 */
+	u32 pad4;
+	u32 cq_tail_color;		/* 0x30 */
+	u32 pad5;
+	u32 interrupt_enable;		/* 0x38 */
+	u32 pad6;
+	u32 cq_entry_enable;		/* 0x40 */
+	u32 pad7;
+	u32 cq_message_enable;		/* 0x48 */
+	u32 pad8;
+	u32 interrupt_offset;		/* 0x50 */
+	u32 pad9;
+	u64 cq_message_addr;		/* 0x58 */
+	u32 pad10;
+};
+
+struct vnic_cq {
+	unsigned int index;
+	struct vnic_dev *vdev;
+	struct vnic_cq_ctrl __iomem *ctrl;	/* memory-mapped */
+	struct vnic_dev_ring ring;
+	unsigned int to_clean;
+	unsigned int last_color;
+};
+
+static inline unsigned int vnic_cq_service(struct vnic_cq *cq,
+	unsigned int work_to_do,
+	int (*q_service)(struct vnic_dev *vdev, struct cq_desc *cq_desc,
+	u8 type, u16 q_number, u16 completed_index, void *opaque),
+	void *opaque)
+{
+	struct cq_desc *cq_desc;
+	unsigned int work_done = 0;
+	u16 q_number, completed_index;
+	u8 type, color;
+
+	cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs +
+		cq->ring.desc_size * cq->to_clean);
+	cq_desc_dec(cq_desc, &type, &color,
+		&q_number, &completed_index);
+
+	while (color != cq->last_color) {
+
+		if ((*q_service)(cq->vdev, cq_desc, type,
+			q_number, completed_index, opaque))
+			break;
+
+		cq->to_clean++;
+		if (cq->to_clean == cq->ring.desc_count) {
+			cq->to_clean = 0;
+			cq->last_color = cq->last_color ? 0 : 1;
+		}
+
+		cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs +
+			cq->ring.desc_size * cq->to_clean);
+		cq_desc_dec(cq_desc, &type, &color,
+			&q_number, &completed_index);
+
+		work_done++;
+		if (work_done >= work_to_do)
+			break;
+	}
+
+	return work_done;
+}
+
+void vnic_cq_free(struct vnic_cq *cq);
+int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index,
+	unsigned int desc_count, unsigned int desc_size);
+void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable,
+	unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail,
+	unsigned int cq_tail_color, unsigned int interrupt_enable,
+	unsigned int cq_entry_enable, unsigned int message_enable,
+	unsigned int interrupt_offset, u64 message_addr);
+void vnic_cq_clean(struct vnic_cq *cq);
+
+#endif /* _VNIC_CQ_H_ */
diff --git a/drivers/scsi/fnic/vnic_cq_copy.h b/drivers/scsi/fnic/vnic_cq_copy.h
new file mode 100644
index 0000000..7901ce2
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_cq_copy.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_CQ_COPY_H_
+#define _VNIC_CQ_COPY_H_
+
+#include "fcpio.h"
+
+static inline unsigned int vnic_cq_copy_service(
+	struct vnic_cq *cq,
+	int (*q_service)(struct vnic_dev *vdev,
+			 unsigned int index,
+			 struct fcpio_fw_req *desc),
+	unsigned int work_to_do)
+
+{
+	struct fcpio_fw_req *desc;
+	unsigned int work_done = 0;
+	u8 color;
+
+	desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs +
+		cq->ring.desc_size * cq->to_clean);
+	fcpio_color_dec(desc, &color);
+
+	while (color != cq->last_color) {
+
+		if ((*q_service)(cq->vdev, cq->index, desc))
+			break;
+
+		cq->to_clean++;
+		if (cq->to_clean == cq->ring.desc_count) {
+			cq->to_clean = 0;
+			cq->last_color = cq->last_color ? 0 : 1;
+		}
+
+		desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs +
+			cq->ring.desc_size * cq->to_clean);
+		fcpio_color_dec(desc, &color);
+
+		work_done++;
+		if (work_done >= work_to_do)
+			break;
+	}
+
+	return work_done;
+}
+
+#endif /* _VNIC_CQ_COPY_H_ */
diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c
new file mode 100644
index 0000000..5667706
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_dev.c
@@ -0,0 +1,690 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/if_ether.h>
+#include "vnic_resource.h"
+#include "vnic_devcmd.h"
+#include "vnic_dev.h"
+#include "vnic_stats.h"
+
+struct vnic_res {
+	void __iomem *vaddr;
+	unsigned int count;
+};
+
+struct vnic_dev {
+	void *priv;
+	struct pci_dev *pdev;
+	struct vnic_res res[RES_TYPE_MAX];
+	enum vnic_dev_intr_mode intr_mode;
+	struct vnic_devcmd __iomem *devcmd;
+	struct vnic_devcmd_notify *notify;
+	struct vnic_devcmd_notify notify_copy;
+	dma_addr_t notify_pa;
+	u32 *linkstatus;
+	dma_addr_t linkstatus_pa;
+	struct vnic_stats *stats;
+	dma_addr_t stats_pa;
+	struct vnic_devcmd_fw_info *fw_info;
+	dma_addr_t fw_info_pa;
+};
+
+#define VNIC_MAX_RES_HDR_SIZE \
+	(sizeof(struct vnic_resource_header) + \
+	sizeof(struct vnic_resource) * RES_TYPE_MAX)
+#define VNIC_RES_STRIDE	128
+
+void *vnic_dev_priv(struct vnic_dev *vdev)
+{
+	return vdev->priv;
+}
+
+static int vnic_dev_discover_res(struct vnic_dev *vdev,
+	struct vnic_dev_bar *bar)
+{
+	struct vnic_resource_header __iomem *rh;
+	struct vnic_resource __iomem *r;
+	u8 type;
+
+	if (bar->len < VNIC_MAX_RES_HDR_SIZE) {
+		printk(KERN_ERR "vNIC BAR0 res hdr length error\n");
+		return -EINVAL;
+	}
+
+	rh = bar->vaddr;
+	if (!rh) {
+		printk(KERN_ERR "vNIC BAR0 res hdr not mem-mapped\n");
+		return -EINVAL;
+	}
+
+	if (ioread32(&rh->magic) != VNIC_RES_MAGIC ||
+	    ioread32(&rh->version) != VNIC_RES_VERSION) {
+		printk(KERN_ERR "vNIC BAR0 res magic/version error "
+			"exp (%lx/%lx) curr (%x/%x)\n",
+			VNIC_RES_MAGIC, VNIC_RES_VERSION,
+			ioread32(&rh->magic), ioread32(&rh->version));
+		return -EINVAL;
+	}
+
+	r = (struct vnic_resource __iomem *)(rh + 1);
+
+	while ((type = ioread8(&r->type)) != RES_TYPE_EOL) {
+
+		u8 bar_num = ioread8(&r->bar);
+		u32 bar_offset = ioread32(&r->bar_offset);
+		u32 count = ioread32(&r->count);
+		u32 len;
+
+		r++;
+
+		if (bar_num != 0)  /* only mapping in BAR0 resources */
+			continue;
+
+		switch (type) {
+		case RES_TYPE_WQ:
+		case RES_TYPE_RQ:
+		case RES_TYPE_CQ:
+		case RES_TYPE_INTR_CTRL:
+			/* each count is stride bytes long */
+			len = count * VNIC_RES_STRIDE;
+			if (len + bar_offset > bar->len) {
+				printk(KERN_ERR "vNIC BAR0 resource %d "
+					"out-of-bounds, offset 0x%x + "
+					"size 0x%x > bar len 0x%lx\n",
+					type, bar_offset,
+					len,
+					bar->len);
+				return -EINVAL;
+			}
+			break;
+		case RES_TYPE_INTR_PBA_LEGACY:
+		case RES_TYPE_DEVCMD:
+			len = count;
+			break;
+		default:
+			continue;
+		}
+
+		vdev->res[type].count = count;
+		vdev->res[type].vaddr = (char __iomem *)bar->vaddr + bar_offset;
+	}
+
+	return 0;
+}
+
+unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev,
+	enum vnic_res_type type)
+{
+	return vdev->res[type].count;
+}
+
+void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type,
+	unsigned int index)
+{
+	if (!vdev->res[type].vaddr)
+		return NULL;
+
+	switch (type) {
+	case RES_TYPE_WQ:
+	case RES_TYPE_RQ:
+	case RES_TYPE_CQ:
+	case RES_TYPE_INTR_CTRL:
+		return (char __iomem *)vdev->res[type].vaddr +
+					index * VNIC_RES_STRIDE;
+	default:
+		return (char __iomem *)vdev->res[type].vaddr;
+	}
+}
+
+unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring,
+				     unsigned int desc_count,
+				     unsigned int desc_size)
+{
+	/* The base address of the desc rings must be 512 byte aligned.
+	 * Descriptor count is aligned to groups of 32 descriptors.  A
+	 * count of 0 means the maximum 4096 descriptors.  Descriptor
+	 * size is aligned to 16 bytes.
+	 */
+
+	unsigned int count_align = 32;
+	unsigned int desc_align = 16;
+
+	ring->base_align = 512;
+
+	if (desc_count == 0)
+		desc_count = 4096;
+
+	ring->desc_count = ALIGN(desc_count, count_align);
+
+	ring->desc_size = ALIGN(desc_size, desc_align);
+
+	ring->size = ring->desc_count * ring->desc_size;
+	ring->size_unaligned = ring->size + ring->base_align;
+
+	return ring->size_unaligned;
+}
+
+void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring)
+{
+	memset(ring->descs, 0, ring->size);
+}
+
+int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
+	unsigned int desc_count, unsigned int desc_size)
+{
+	vnic_dev_desc_ring_size(ring, desc_count, desc_size);
+
+	ring->descs_unaligned = pci_alloc_consistent(vdev->pdev,
+		ring->size_unaligned,
+		&ring->base_addr_unaligned);
+
+	if (!ring->descs_unaligned) {
+		printk(KERN_ERR
+		  "Failed to allocate ring (size=%d), aborting\n",
+			(int)ring->size);
+		return -ENOMEM;
+	}
+
+	ring->base_addr = ALIGN(ring->base_addr_unaligned,
+		ring->base_align);
+	ring->descs = (u8 *)ring->descs_unaligned +
+		(ring->base_addr - ring->base_addr_unaligned);
+
+	vnic_dev_clear_desc_ring(ring);
+
+	ring->desc_avail = ring->desc_count - 1;
+
+	return 0;
+}
+
+void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring)
+{
+	if (ring->descs) {
+		pci_free_consistent(vdev->pdev,
+			ring->size_unaligned,
+			ring->descs_unaligned,
+			ring->base_addr_unaligned);
+		ring->descs = NULL;
+	}
+}
+
+int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
+	u64 *a0, u64 *a1, int wait)
+{
+	struct vnic_devcmd __iomem *devcmd = vdev->devcmd;
+	int delay;
+	u32 status;
+	int dev_cmd_err[] = {
+		/* convert from fw's version of error.h to host's version */
+		0,	/* ERR_SUCCESS */
+		EINVAL,	/* ERR_EINVAL */
+		EFAULT,	/* ERR_EFAULT */
+		EPERM,	/* ERR_EPERM */
+		EBUSY,  /* ERR_EBUSY */
+	};
+	int err;
+
+	status = ioread32(&devcmd->status);
+	if (status & STAT_BUSY) {
+		printk(KERN_ERR "Busy devcmd %d\n", _CMD_N(cmd));
+		return -EBUSY;
+	}
+
+	if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) {
+		writeq(*a0, &devcmd->args[0]);
+		writeq(*a1, &devcmd->args[1]);
+		wmb();
+	}
+
+	iowrite32(cmd, &devcmd->cmd);
+
+	if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT))
+			return 0;
+
+	for (delay = 0; delay < wait; delay++) {
+
+		udelay(100);
+
+		status = ioread32(&devcmd->status);
+		if (!(status & STAT_BUSY)) {
+
+			if (status & STAT_ERROR) {
+				err = dev_cmd_err[(int)readq(&devcmd->args[0])];
+				printk(KERN_ERR "Error %d devcmd %d\n",
+					err, _CMD_N(cmd));
+				return -err;
+			}
+
+			if (_CMD_DIR(cmd) & _CMD_DIR_READ) {
+				rmb();
+				*a0 = readq(&devcmd->args[0]);
+				*a1 = readq(&devcmd->args[1]);
+			}
+
+			return 0;
+		}
+	}
+
+	printk(KERN_ERR "Timedout devcmd %d\n", _CMD_N(cmd));
+	return -ETIMEDOUT;
+}
+
+int vnic_dev_fw_info(struct vnic_dev *vdev,
+	struct vnic_devcmd_fw_info **fw_info)
+{
+	u64 a0, a1 = 0;
+	int wait = 1000;
+	int err = 0;
+
+	if (!vdev->fw_info) {
+		vdev->fw_info = pci_alloc_consistent(vdev->pdev,
+			sizeof(struct vnic_devcmd_fw_info),
+			&vdev->fw_info_pa);
+		if (!vdev->fw_info)
+			return -ENOMEM;
+
+		a0 = vdev->fw_info_pa;
+
+		/* only get fw_info once and cache it */
+		err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO, &a0, &a1, wait);
+	}
+
+	*fw_info = vdev->fw_info;
+
+	return err;
+}
+
+int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, unsigned int size,
+	void *value)
+{
+	u64 a0, a1;
+	int wait = 1000;
+	int err;
+
+	a0 = offset;
+	a1 = size;
+
+	err = vnic_dev_cmd(vdev, CMD_DEV_SPEC, &a0, &a1, wait);
+
+	switch (size) {
+	case 1:
+		*(u8 *)value = (u8)a0;
+		break;
+	case 2:
+		*(u16 *)value = (u16)a0;
+		break;
+	case 4:
+		*(u32 *)value = (u32)a0;
+		break;
+	case 8:
+		*(u64 *)value = a0;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return err;
+}
+
+int vnic_dev_stats_clear(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_STATS_CLEAR, &a0, &a1, wait);
+}
+
+int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
+{
+	u64 a0, a1;
+	int wait = 1000;
+
+	if (!vdev->stats) {
+		vdev->stats = pci_alloc_consistent(vdev->pdev,
+			sizeof(struct vnic_stats), &vdev->stats_pa);
+		if (!vdev->stats)
+			return -ENOMEM;
+	}
+
+	*stats = vdev->stats;
+	a0 = vdev->stats_pa;
+	a1 = sizeof(struct vnic_stats);
+
+	return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait);
+}
+
+int vnic_dev_close(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait);
+}
+
+int vnic_dev_enable(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait);
+}
+
+int vnic_dev_disable(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_DISABLE, &a0, &a1, wait);
+}
+
+int vnic_dev_open(struct vnic_dev *vdev, int arg)
+{
+	u64 a0 = (u32)arg, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_OPEN, &a0, &a1, wait);
+}
+
+int vnic_dev_open_done(struct vnic_dev *vdev, int *done)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int err;
+
+	*done = 0;
+
+	err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait);
+	if (err)
+		return err;
+
+	*done = (a0 == 0);
+
+	return 0;
+}
+
+int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg)
+{
+	u64 a0 = (u32)arg, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_SOFT_RESET, &a0, &a1, wait);
+}
+
+int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int err;
+
+	*done = 0;
+
+	err = vnic_dev_cmd(vdev, CMD_SOFT_RESET_STATUS, &a0, &a1, wait);
+	if (err)
+		return err;
+
+	*done = (a0 == 0);
+
+	return 0;
+}
+
+int vnic_dev_hang_notify(struct vnic_dev *vdev)
+{
+	u64 a0, a1;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait);
+}
+
+int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr)
+{
+	u64 a0, a1;
+	int wait = 1000;
+	int err, i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = 0;
+
+	err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a0, &a1, wait);
+	if (err)
+		return err;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = ((u8 *)&a0)[i];
+
+	return 0;
+}
+
+void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
+	int broadcast, int promisc, int allmulti)
+{
+	u64 a0, a1 = 0;
+	int wait = 1000;
+	int err;
+
+	a0 = (directed ? CMD_PFILTER_DIRECTED : 0) |
+	     (multicast ? CMD_PFILTER_MULTICAST : 0) |
+	     (broadcast ? CMD_PFILTER_BROADCAST : 0) |
+	     (promisc ? CMD_PFILTER_PROMISCUOUS : 0) |
+	     (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0);
+
+	err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait);
+	if (err)
+		printk(KERN_ERR "Can't set packet filter\n");
+}
+
+void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int err;
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		((u8 *)&a0)[i] = addr[i];
+
+	err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait);
+	if (err)
+		printk(KERN_ERR
+			"Can't add addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n",
+			addr[0], addr[1], addr[2], addr[3], addr[4], addr[5],
+			err);
+}
+
+void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int err;
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		((u8 *)&a0)[i] = addr[i];
+
+	err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait);
+	if (err)
+		printk(KERN_ERR
+			"Can't del addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n",
+			addr[0], addr[1], addr[2], addr[3], addr[4], addr[5],
+			err);
+}
+
+int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr)
+{
+	u64 a0, a1;
+	int wait = 1000;
+
+	if (!vdev->notify) {
+		vdev->notify = pci_alloc_consistent(vdev->pdev,
+			sizeof(struct vnic_devcmd_notify),
+			&vdev->notify_pa);
+		if (!vdev->notify)
+			return -ENOMEM;
+	}
+
+	a0 = vdev->notify_pa;
+	a1 = ((u64)intr << 32) & 0x0000ffff00000000ULL;
+	a1 += sizeof(struct vnic_devcmd_notify);
+
+	return vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait);
+}
+
+void vnic_dev_notify_unset(struct vnic_dev *vdev)
+{
+	u64 a0, a1;
+	int wait = 1000;
+
+	a0 = 0;  /* paddr = 0 to unset notify buffer */
+	a1 = 0x0000ffff00000000ULL; /* intr num = -1 to unreg for intr */
+	a1 += sizeof(struct vnic_devcmd_notify);
+
+	vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait);
+}
+
+static int vnic_dev_notify_ready(struct vnic_dev *vdev)
+{
+	u32 *words;
+	unsigned int nwords = sizeof(struct vnic_devcmd_notify) / 4;
+	unsigned int i;
+	u32 csum;
+
+	if (!vdev->notify)
+		return 0;
+
+	do {
+		csum = 0;
+		memcpy(&vdev->notify_copy, vdev->notify,
+			sizeof(struct vnic_devcmd_notify));
+		words = (u32 *)&vdev->notify_copy;
+		for (i = 1; i < nwords; i++)
+			csum += words[i];
+	} while (csum != words[0]);
+
+	return 1;
+}
+
+int vnic_dev_init(struct vnic_dev *vdev, int arg)
+{
+	u64 a0 = (u32)arg, a1 = 0;
+	int wait = 1000;
+	return vnic_dev_cmd(vdev, CMD_INIT, &a0, &a1, wait);
+}
+
+int vnic_dev_link_status(struct vnic_dev *vdev)
+{
+	if (vdev->linkstatus)
+		return *vdev->linkstatus;
+
+	if (!vnic_dev_notify_ready(vdev))
+		return 0;
+
+	return vdev->notify_copy.link_state;
+}
+
+u32 vnic_dev_port_speed(struct vnic_dev *vdev)
+{
+	if (!vnic_dev_notify_ready(vdev))
+		return 0;
+
+	return vdev->notify_copy.port_speed;
+}
+
+u32 vnic_dev_msg_lvl(struct vnic_dev *vdev)
+{
+	if (!vnic_dev_notify_ready(vdev))
+		return 0;
+
+	return vdev->notify_copy.msglvl;
+}
+
+u32 vnic_dev_mtu(struct vnic_dev *vdev)
+{
+	if (!vnic_dev_notify_ready(vdev))
+		return 0;
+
+	return vdev->notify_copy.mtu;
+}
+
+u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev)
+{
+	if (!vnic_dev_notify_ready(vdev))
+		return 0;
+
+	return vdev->notify_copy.link_down_cnt;
+}
+
+void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
+	enum vnic_dev_intr_mode intr_mode)
+{
+	vdev->intr_mode = intr_mode;
+}
+
+enum vnic_dev_intr_mode vnic_dev_get_intr_mode(
+	struct vnic_dev *vdev)
+{
+	return vdev->intr_mode;
+}
+
+void vnic_dev_unregister(struct vnic_dev *vdev)
+{
+	if (vdev) {
+		if (vdev->notify)
+			pci_free_consistent(vdev->pdev,
+				sizeof(struct vnic_devcmd_notify),
+				vdev->notify,
+				vdev->notify_pa);
+		if (vdev->linkstatus)
+			pci_free_consistent(vdev->pdev,
+				sizeof(u32),
+				vdev->linkstatus,
+				vdev->linkstatus_pa);
+		if (vdev->stats)
+			pci_free_consistent(vdev->pdev,
+				sizeof(struct vnic_dev),
+				vdev->stats, vdev->stats_pa);
+		if (vdev->fw_info)
+			pci_free_consistent(vdev->pdev,
+				sizeof(struct vnic_devcmd_fw_info),
+				vdev->fw_info, vdev->fw_info_pa);
+		kfree(vdev);
+	}
+}
+
+struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
+	void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar)
+{
+	if (!vdev) {
+		vdev = kzalloc(sizeof(struct vnic_dev), GFP_KERNEL);
+		if (!vdev)
+			return NULL;
+	}
+
+	vdev->priv = priv;
+	vdev->pdev = pdev;
+
+	if (vnic_dev_discover_res(vdev, bar))
+		goto err_out;
+
+	vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0);
+	if (!vdev->devcmd)
+		goto err_out;
+
+	return vdev;
+
+err_out:
+	vnic_dev_unregister(vdev);
+	return NULL;
+}
diff --git a/drivers/scsi/fnic/vnic_dev.h b/drivers/scsi/fnic/vnic_dev.h
new file mode 100644
index 0000000..f9935a8
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_dev.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_DEV_H_
+#define _VNIC_DEV_H_
+
+#include "vnic_resource.h"
+#include "vnic_devcmd.h"
+
+/*
+ * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
+ * Driver) when both are built with CONFIG options =y
+ */
+#define vnic_dev_priv fnic_dev_priv
+#define vnic_dev_get_res_count fnic_dev_get_res_count
+#define vnic_dev_get_res fnic_dev_get_res
+#define vnic_dev_desc_ring_size fnic_dev_desc_ring_siz
+#define vnic_dev_clear_desc_ring fnic_dev_clear_desc_ring
+#define vnic_dev_alloc_desc_ring fnic_dev_alloc_desc_ring
+#define vnic_dev_free_desc_ring fnic_dev_free_desc_ring
+#define vnic_dev_cmd fnic_dev_cmd
+#define vnic_dev_fw_info fnic_dev_fw_info
+#define vnic_dev_spec fnic_dev_spec
+#define vnic_dev_stats_clear fnic_dev_stats_clear
+#define vnic_dev_stats_dump fnic_dev_stats_dump
+#define vnic_dev_hang_notify fnic_dev_hang_notify
+#define vnic_dev_packet_filter fnic_dev_packet_filter
+#define vnic_dev_add_addr fnic_dev_add_addr
+#define vnic_dev_del_addr fnic_dev_del_addr
+#define vnic_dev_mac_addr fnic_dev_mac_addr
+#define vnic_dev_notify_set fnic_dev_notify_set
+#define vnic_dev_notify_unset fnic_dev_notify_unset
+#define vnic_dev_link_status fnic_dev_link_status
+#define vnic_dev_port_speed fnic_dev_port_speed
+#define vnic_dev_msg_lvl fnic_dev_msg_lvl
+#define vnic_dev_mtu fnic_dev_mtu
+#define vnic_dev_link_down_cnt fnic_dev_link_down_cnt
+#define vnic_dev_close fnic_dev_close
+#define vnic_dev_enable fnic_dev_enable
+#define vnic_dev_disable fnic_dev_disable
+#define vnic_dev_open fnic_dev_open
+#define vnic_dev_open_done fnic_dev_open_done
+#define vnic_dev_init fnic_dev_init
+#define vnic_dev_soft_reset fnic_dev_soft_reset
+#define vnic_dev_soft_reset_done fnic_dev_soft_reset_done
+#define vnic_dev_set_intr_mode fnic_dev_set_intr_mode
+#define vnic_dev_get_intr_mode fnic_dev_get_intr_mode
+#define vnic_dev_unregister fnic_dev_unregister
+#define vnic_dev_register fnic_dev_register
+
+#ifndef VNIC_PADDR_TARGET
+#define VNIC_PADDR_TARGET	0x0000000000000000ULL
+#endif
+
+#ifndef readq
+static inline u64 readq(void __iomem *reg)
+{
+	return ((u64)readl(reg + 0x4UL) << 32) | (u64)readl(reg);
+}
+
+static inline void writeq(u64 val, void __iomem *reg)
+{
+	writel(val & 0xffffffff, reg);
+	writel(val >> 32, reg + 0x4UL);
+}
+#endif
+
+enum vnic_dev_intr_mode {
+	VNIC_DEV_INTR_MODE_UNKNOWN,
+	VNIC_DEV_INTR_MODE_INTX,
+	VNIC_DEV_INTR_MODE_MSI,
+	VNIC_DEV_INTR_MODE_MSIX,
+};
+
+struct vnic_dev_bar {
+	void __iomem *vaddr;
+	dma_addr_t bus_addr;
+	unsigned long len;
+};
+
+struct vnic_dev_ring {
+	void *descs;
+	size_t size;
+	dma_addr_t base_addr;
+	size_t base_align;
+	void *descs_unaligned;
+	size_t size_unaligned;
+	dma_addr_t base_addr_unaligned;
+	unsigned int desc_size;
+	unsigned int desc_count;
+	unsigned int desc_avail;
+};
+
+struct vnic_dev;
+struct vnic_stats;
+
+void *vnic_dev_priv(struct vnic_dev *vdev);
+unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev,
+				    enum vnic_res_type type);
+void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type,
+			       unsigned int index);
+unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring,
+				     unsigned int desc_count,
+				     unsigned int desc_size);
+void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring);
+int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
+			     unsigned int desc_count, unsigned int desc_size);
+void vnic_dev_free_desc_ring(struct vnic_dev *vdev,
+			     struct vnic_dev_ring *ring);
+int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
+		 u64 *a0, u64 *a1, int wait);
+int vnic_dev_fw_info(struct vnic_dev *vdev,
+		     struct vnic_devcmd_fw_info **fw_info);
+int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset,
+		  unsigned int size, void *value);
+int vnic_dev_stats_clear(struct vnic_dev *vdev);
+int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats);
+int vnic_dev_hang_notify(struct vnic_dev *vdev);
+void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
+			    int broadcast, int promisc, int allmulti);
+void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr);
+void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr);
+int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr);
+int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr);
+void vnic_dev_notify_unset(struct vnic_dev *vdev);
+int vnic_dev_link_status(struct vnic_dev *vdev);
+u32 vnic_dev_port_speed(struct vnic_dev *vdev);
+u32 vnic_dev_msg_lvl(struct vnic_dev *vdev);
+u32 vnic_dev_mtu(struct vnic_dev *vdev);
+u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev);
+int vnic_dev_close(struct vnic_dev *vdev);
+int vnic_dev_enable(struct vnic_dev *vdev);
+int vnic_dev_disable(struct vnic_dev *vdev);
+int vnic_dev_open(struct vnic_dev *vdev, int arg);
+int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
+int vnic_dev_init(struct vnic_dev *vdev, int arg);
+int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
+int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
+void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
+			    enum vnic_dev_intr_mode intr_mode);
+enum vnic_dev_intr_mode vnic_dev_get_intr_mode(struct vnic_dev *vdev);
+void vnic_dev_unregister(struct vnic_dev *vdev);
+struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
+				   void *priv, struct pci_dev *pdev,
+				   struct vnic_dev_bar *bar);
+
+#endif /* _VNIC_DEV_H_ */
diff --git a/drivers/scsi/fnic/vnic_devcmd.h b/drivers/scsi/fnic/vnic_devcmd.h
new file mode 100644
index 0000000..d62b906
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_devcmd.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_DEVCMD_H_
+#define _VNIC_DEVCMD_H_
+
+#define _CMD_NBITS      14
+#define _CMD_VTYPEBITS	10
+#define _CMD_FLAGSBITS  6
+#define _CMD_DIRBITS	2
+
+#define _CMD_NMASK      ((1 << _CMD_NBITS)-1)
+#define _CMD_VTYPEMASK  ((1 << _CMD_VTYPEBITS)-1)
+#define _CMD_FLAGSMASK  ((1 << _CMD_FLAGSBITS)-1)
+#define _CMD_DIRMASK    ((1 << _CMD_DIRBITS)-1)
+
+#define _CMD_NSHIFT     0
+#define _CMD_VTYPESHIFT (_CMD_NSHIFT+_CMD_NBITS)
+#define _CMD_FLAGSSHIFT (_CMD_VTYPESHIFT+_CMD_VTYPEBITS)
+#define _CMD_DIRSHIFT   (_CMD_FLAGSSHIFT+_CMD_FLAGSBITS)
+
+/*
+ * Direction bits (from host perspective).
+ */
+#define _CMD_DIR_NONE   0U
+#define _CMD_DIR_WRITE  1U
+#define _CMD_DIR_READ   2U
+#define _CMD_DIR_RW     (_CMD_DIR_WRITE | _CMD_DIR_READ)
+
+/*
+ * Flag bits.
+ */
+#define _CMD_FLAGS_NONE 0U
+#define _CMD_FLAGS_NOWAIT 1U
+
+/*
+ * vNIC type bits.
+ */
+#define _CMD_VTYPE_NONE  0U
+#define _CMD_VTYPE_ENET  1U
+#define _CMD_VTYPE_FC    2U
+#define _CMD_VTYPE_SCSI  4U
+#define _CMD_VTYPE_ALL   (_CMD_VTYPE_ENET | _CMD_VTYPE_FC | _CMD_VTYPE_SCSI)
+
+/*
+ * Used to create cmds..
+*/
+#define _CMDCF(dir, flags, vtype, nr)  \
+	(((dir)   << _CMD_DIRSHIFT) | \
+	((flags) << _CMD_FLAGSSHIFT) | \
+	((vtype) << _CMD_VTYPESHIFT) | \
+	((nr)    << _CMD_NSHIFT))
+#define _CMDC(dir, vtype, nr)    _CMDCF(dir, 0, vtype, nr)
+#define _CMDCNW(dir, vtype, nr)  _CMDCF(dir, _CMD_FLAGS_NOWAIT, vtype, nr)
+
+/*
+ * Used to decode cmds..
+*/
+#define _CMD_DIR(cmd)            (((cmd) >> _CMD_DIRSHIFT) & _CMD_DIRMASK)
+#define _CMD_FLAGS(cmd)          (((cmd) >> _CMD_FLAGSSHIFT) & _CMD_FLAGSMASK)
+#define _CMD_VTYPE(cmd)          (((cmd) >> _CMD_VTYPESHIFT) & _CMD_VTYPEMASK)
+#define _CMD_N(cmd)              (((cmd) >> _CMD_NSHIFT) & _CMD_NMASK)
+
+enum vnic_devcmd_cmd {
+	CMD_NONE                = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_NONE, 0),
+
+	/* mcpu fw info in mem: (u64)a0=paddr to struct vnic_devcmd_fw_info */
+	CMD_MCPU_FW_INFO        = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 1),
+
+	/* dev-specific block member:
+	 *    in: (u16)a0=offset,(u8)a1=size
+	 *    out: a0=value */
+	CMD_DEV_SPEC            = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 2),
+
+	/* stats clear */
+	CMD_STATS_CLEAR         = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 3),
+
+	/* stats dump in mem: (u64)a0=paddr to stats area,
+	 *                    (u16)a1=sizeof stats area */
+	CMD_STATS_DUMP          = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 4),
+
+	/* set Rx packet filter: (u32)a0=filters (see CMD_PFILTER_*) */
+	CMD_PACKET_FILTER	= _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 7),
+
+	/* hang detection notification */
+	CMD_HANG_NOTIFY         = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 8),
+
+	/* MAC address in (u48)a0 */
+	CMD_MAC_ADDR            = _CMDC(_CMD_DIR_READ,
+					_CMD_VTYPE_ENET | _CMD_VTYPE_FC, 9),
+
+	/* disable/enable promisc mode: (u8)a0=0/1 */
+/***** XXX DEPRECATED *****/
+	CMD_PROMISC_MODE        = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 10),
+
+	/* disable/enable all-multi mode: (u8)a0=0/1 */
+/***** XXX DEPRECATED *****/
+	CMD_ALLMULTI_MODE       = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 11),
+
+	/* add addr from (u48)a0 */
+	CMD_ADDR_ADD            = _CMDCNW(_CMD_DIR_WRITE,
+					_CMD_VTYPE_ENET | _CMD_VTYPE_FC, 12),
+
+	/* del addr from (u48)a0 */
+	CMD_ADDR_DEL            = _CMDCNW(_CMD_DIR_WRITE,
+					_CMD_VTYPE_ENET | _CMD_VTYPE_FC, 13),
+
+	/* add VLAN id in (u16)a0 */
+	CMD_VLAN_ADD            = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 14),
+
+	/* del VLAN id in (u16)a0 */
+	CMD_VLAN_DEL            = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 15),
+
+	/* nic_cfg in (u32)a0 */
+	CMD_NIC_CFG             = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16),
+
+	/* union vnic_rss_key in mem: (u64)a0=paddr, (u16)a1=len */
+	CMD_RSS_KEY             = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 17),
+
+	/* union vnic_rss_cpu in mem: (u64)a0=paddr, (u16)a1=len */
+	CMD_RSS_CPU             = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 18),
+
+	/* initiate softreset */
+	CMD_SOFT_RESET          = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 19),
+
+	/* softreset status:
+	 *    out: a0=0 reset complete, a0=1 reset in progress */
+	CMD_SOFT_RESET_STATUS   = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 20),
+
+	/* set struct vnic_devcmd_notify buffer in mem:
+	 * in:
+	 *   (u64)a0=paddr to notify (set paddr=0 to unset)
+	 *   (u32)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify)
+	 *   (u16)a1 & 0x0000ffff00000000=intr num (-1 for no intr)
+	 * out:
+	 *   (u32)a1 = effective size
+	 */
+	CMD_NOTIFY              = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 21),
+
+	/* UNDI API: (u64)a0=paddr to s_PXENV_UNDI_ struct,
+	 *           (u8)a1=PXENV_UNDI_xxx */
+	CMD_UNDI                = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 22),
+
+	/* initiate open sequence (u32)a0=flags (see CMD_OPENF_*) */
+	CMD_OPEN		= _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 23),
+
+	/* open status:
+	 *    out: a0=0 open complete, a0=1 open in progress */
+	CMD_OPEN_STATUS		= _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 24),
+
+	/* close vnic */
+	CMD_CLOSE		= _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 25),
+
+	/* initialize virtual link: (u32)a0=flags (see CMD_INITF_*) */
+	CMD_INIT		= _CMDCNW(_CMD_DIR_READ, _CMD_VTYPE_ALL, 26),
+
+	/* variant of CMD_INIT, with provisioning info
+	 *     (u64)a0=paddr of vnic_devcmd_provinfo
+	 *     (u32)a1=sizeof provision info */
+	CMD_INIT_PROV_INFO	= _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 27),
+
+	/* enable virtual link */
+	CMD_ENABLE		= _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28),
+
+	/* disable virtual link */
+	CMD_DISABLE		= _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 29),
+
+	/* stats dump all vnics on uplink in mem: (u64)a0=paddr (u32)a1=uif */
+	CMD_STATS_DUMP_ALL	= _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 30),
+
+	/* init status:
+	 *    out: a0=0 init complete, a0=1 init in progress
+	 *         if a0=0, a1=errno */
+	CMD_INIT_STATUS		= _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 31),
+
+	/* INT13 API: (u64)a0=paddr to vnic_int13_params struct
+	 *            (u8)a1=INT13_CMD_xxx */
+	CMD_INT13               = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_FC, 32),
+
+	/* logical uplink enable/disable: (u64)a0: 0/1=disable/enable */
+	CMD_LOGICAL_UPLINK      = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 33),
+
+	/* undo initialize of virtual link */
+	CMD_DEINIT		= _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 34),
+};
+
+/* flags for CMD_OPEN */
+#define CMD_OPENF_OPROM		0x1	/* open coming from option rom */
+
+/* flags for CMD_INIT */
+#define CMD_INITF_DEFAULT_MAC	0x1	/* init with default mac addr */
+
+/* flags for CMD_PACKET_FILTER */
+#define CMD_PFILTER_DIRECTED		0x01
+#define CMD_PFILTER_MULTICAST		0x02
+#define CMD_PFILTER_BROADCAST		0x04
+#define CMD_PFILTER_PROMISCUOUS		0x08
+#define CMD_PFILTER_ALL_MULTICAST	0x10
+
+enum vnic_devcmd_status {
+	STAT_NONE = 0,
+	STAT_BUSY = 1 << 0,	/* cmd in progress */
+	STAT_ERROR = 1 << 1,	/* last cmd caused error (code in a0) */
+};
+
+enum vnic_devcmd_error {
+	ERR_SUCCESS = 0,
+	ERR_EINVAL = 1,
+	ERR_EFAULT = 2,
+	ERR_EPERM = 3,
+	ERR_EBUSY = 4,
+	ERR_ECMDUNKNOWN = 5,
+	ERR_EBADSTATE = 6,
+	ERR_ENOMEM = 7,
+	ERR_ETIMEDOUT = 8,
+	ERR_ELINKDOWN = 9,
+};
+
+struct vnic_devcmd_fw_info {
+	char fw_version[32];
+	char fw_build[32];
+	char hw_version[32];
+	char hw_serial_number[32];
+};
+
+struct vnic_devcmd_notify {
+	u32 csum;		/* checksum over following words */
+
+	u32 link_state;		/* link up == 1 */
+	u32 port_speed;		/* effective port speed (rate limit) */
+	u32 mtu;		/* MTU */
+	u32 msglvl;		/* requested driver msg lvl */
+	u32 uif;		/* uplink interface */
+	u32 status;		/* status bits (see VNIC_STF_*) */
+	u32 error;		/* error code (see ERR_*) for first ERR */
+	u32 link_down_cnt;	/* running count of link down transitions */
+};
+#define VNIC_STF_FATAL_ERR	0x0001	/* fatal fw error */
+
+struct vnic_devcmd_provinfo {
+	u8 oui[3];
+	u8 type;
+	u8 data[0];
+};
+
+/*
+ * Writing cmd register causes STAT_BUSY to get set in status register.
+ * When cmd completes, STAT_BUSY will be cleared.
+ *
+ * If cmd completed successfully STAT_ERROR will be clear
+ * and args registers contain cmd-specific results.
+ *
+ * If cmd error, STAT_ERROR will be set and args[0] contains error code.
+ *
+ * status register is read-only.  While STAT_BUSY is set,
+ * all other register contents are read-only.
+ */
+
+/* Make sizeof(vnic_devcmd) a power-of-2 for I/O BAR. */
+#define VNIC_DEVCMD_NARGS 15
+struct vnic_devcmd {
+	u32 status;			/* RO */
+	u32 cmd;			/* RW */
+	u64 args[VNIC_DEVCMD_NARGS];	/* RW cmd args (little-endian) */
+};
+
+#endif /* _VNIC_DEVCMD_H_ */
diff --git a/drivers/scsi/fnic/vnic_intr.c b/drivers/scsi/fnic/vnic_intr.c
new file mode 100644
index 0000000..4f4dc87
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_intr.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "vnic_dev.h"
+#include "vnic_intr.h"
+
+void vnic_intr_free(struct vnic_intr *intr)
+{
+	intr->ctrl = NULL;
+}
+
+int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr,
+	unsigned int index)
+{
+	intr->index = index;
+	intr->vdev = vdev;
+
+	intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index);
+	if (!intr->ctrl) {
+		printk(KERN_ERR "Failed to hook INTR[%d].ctrl resource\n",
+			index);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer,
+	unsigned int coalescing_type, unsigned int mask_on_assertion)
+{
+	iowrite32(coalescing_timer, &intr->ctrl->coalescing_timer);
+	iowrite32(coalescing_type, &intr->ctrl->coalescing_type);
+	iowrite32(mask_on_assertion, &intr->ctrl->mask_on_assertion);
+	iowrite32(0, &intr->ctrl->int_credits);
+}
+
+void vnic_intr_clean(struct vnic_intr *intr)
+{
+	iowrite32(0, &intr->ctrl->int_credits);
+}
diff --git a/drivers/scsi/fnic/vnic_intr.h b/drivers/scsi/fnic/vnic_intr.h
new file mode 100644
index 0000000..d5fb40e
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_intr.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_INTR_H_
+#define _VNIC_INTR_H_
+
+#include <linux/pci.h>
+#include "vnic_dev.h"
+
+/*
+ * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
+ * Driver) when both are built with CONFIG options =y
+ */
+#define vnic_intr_unmask fnic_intr_unmask
+#define vnic_intr_mask fnic_intr_mask
+#define vnic_intr_return_credits fnic_intr_return_credits
+#define vnic_intr_credits fnic_intr_credits
+#define vnic_intr_return_all_credits fnic_intr_return_all_credits
+#define vnic_intr_legacy_pba fnic_intr_legacy_pba
+#define vnic_intr_free fnic_intr_free
+#define vnic_intr_alloc fnic_intr_alloc
+#define vnic_intr_init fnic_intr_init
+#define vnic_intr_clean fnic_intr_clean
+
+#define VNIC_INTR_TIMER_MAX		0xffff
+
+#define VNIC_INTR_TIMER_TYPE_ABS	0
+#define VNIC_INTR_TIMER_TYPE_QUIET	1
+
+/* Interrupt control */
+struct vnic_intr_ctrl {
+	u32 coalescing_timer;		/* 0x00 */
+	u32 pad0;
+	u32 coalescing_value;		/* 0x08 */
+	u32 pad1;
+	u32 coalescing_type;		/* 0x10 */
+	u32 pad2;
+	u32 mask_on_assertion;		/* 0x18 */
+	u32 pad3;
+	u32 mask;			/* 0x20 */
+	u32 pad4;
+	u32 int_credits;		/* 0x28 */
+	u32 pad5;
+	u32 int_credit_return;		/* 0x30 */
+	u32 pad6;
+};
+
+struct vnic_intr {
+	unsigned int index;
+	struct vnic_dev *vdev;
+	struct vnic_intr_ctrl __iomem *ctrl;	/* memory-mapped */
+};
+
+static inline void vnic_intr_unmask(struct vnic_intr *intr)
+{
+	iowrite32(0, &intr->ctrl->mask);
+}
+
+static inline void vnic_intr_mask(struct vnic_intr *intr)
+{
+	iowrite32(1, &intr->ctrl->mask);
+}
+
+static inline void vnic_intr_return_credits(struct vnic_intr *intr,
+	unsigned int credits, int unmask, int reset_timer)
+{
+#define VNIC_INTR_UNMASK_SHIFT		16
+#define VNIC_INTR_RESET_TIMER_SHIFT	17
+
+	u32 int_credit_return = (credits & 0xffff) |
+		(unmask ? (1 << VNIC_INTR_UNMASK_SHIFT) : 0) |
+		(reset_timer ? (1 << VNIC_INTR_RESET_TIMER_SHIFT) : 0);
+
+	iowrite32(int_credit_return, &intr->ctrl->int_credit_return);
+}
+
+static inline unsigned int vnic_intr_credits(struct vnic_intr *intr)
+{
+	return ioread32(&intr->ctrl->int_credits);
+}
+
+static inline void vnic_intr_return_all_credits(struct vnic_intr *intr)
+{
+	unsigned int credits = vnic_intr_credits(intr);
+	int unmask = 1;
+	int reset_timer = 1;
+
+	vnic_intr_return_credits(intr, credits, unmask, reset_timer);
+}
+
+static inline u32 vnic_intr_legacy_pba(u32 __iomem *legacy_pba)
+{
+	/* read PBA without clearing */
+	return ioread32(legacy_pba);
+}
+
+void vnic_intr_free(struct vnic_intr *intr);
+int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr,
+	unsigned int index);
+void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer,
+	unsigned int coalescing_type, unsigned int mask_on_assertion);
+void vnic_intr_clean(struct vnic_intr *intr);
+
+#endif /* _VNIC_INTR_H_ */
diff --git a/drivers/scsi/fnic/vnic_nic.h b/drivers/scsi/fnic/vnic_nic.h
new file mode 100644
index 0000000..f15b83e
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_nic.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_NIC_H_
+#define _VNIC_NIC_H_
+
+/*
+ * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
+ * Driver) when both are built with CONFIG options =y
+ */
+#define vnic_set_nic_cfg fnic_set_nic_cfg
+
+#define NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD	0xffUL
+#define NIC_CFG_RSS_DEFAULT_CPU_SHIFT		0
+#define NIC_CFG_RSS_HASH_TYPE			(0xffUL << 8)
+#define NIC_CFG_RSS_HASH_TYPE_MASK_FIELD	0xffUL
+#define NIC_CFG_RSS_HASH_TYPE_SHIFT		8
+#define NIC_CFG_RSS_HASH_BITS			(7UL << 16)
+#define NIC_CFG_RSS_HASH_BITS_MASK_FIELD	7UL
+#define NIC_CFG_RSS_HASH_BITS_SHIFT		16
+#define NIC_CFG_RSS_BASE_CPU			(7UL << 19)
+#define NIC_CFG_RSS_BASE_CPU_MASK_FIELD		7UL
+#define NIC_CFG_RSS_BASE_CPU_SHIFT		19
+#define NIC_CFG_RSS_ENABLE			(1UL << 22)
+#define NIC_CFG_RSS_ENABLE_MASK_FIELD		1UL
+#define NIC_CFG_RSS_ENABLE_SHIFT		22
+#define NIC_CFG_TSO_IPID_SPLIT_EN		(1UL << 23)
+#define NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD	1UL
+#define NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT		23
+#define NIC_CFG_IG_VLAN_STRIP_EN		(1UL << 24)
+#define NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD	1UL
+#define NIC_CFG_IG_VLAN_STRIP_EN_SHIFT		24
+
+static inline void vnic_set_nic_cfg(u32 *nic_cfg,
+	u8 rss_default_cpu, u8 rss_hash_type,
+	u8 rss_hash_bits, u8 rss_base_cpu,
+	u8 rss_enable, u8 tso_ipid_split_en,
+	u8 ig_vlan_strip_en)
+{
+	*nic_cfg = (rss_default_cpu & NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD) |
+		((rss_hash_type & NIC_CFG_RSS_HASH_TYPE_MASK_FIELD)
+			<< NIC_CFG_RSS_HASH_TYPE_SHIFT) |
+		((rss_hash_bits & NIC_CFG_RSS_HASH_BITS_MASK_FIELD)
+			<< NIC_CFG_RSS_HASH_BITS_SHIFT) |
+		((rss_base_cpu & NIC_CFG_RSS_BASE_CPU_MASK_FIELD)
+			<< NIC_CFG_RSS_BASE_CPU_SHIFT) |
+		((rss_enable & NIC_CFG_RSS_ENABLE_MASK_FIELD)
+			<< NIC_CFG_RSS_ENABLE_SHIFT) |
+		((tso_ipid_split_en & NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD)
+			<< NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT) |
+		((ig_vlan_strip_en & NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD)
+			<< NIC_CFG_IG_VLAN_STRIP_EN_SHIFT);
+}
+
+#endif /* _VNIC_NIC_H_ */
diff --git a/drivers/scsi/fnic/vnic_resource.h b/drivers/scsi/fnic/vnic_resource.h
new file mode 100644
index 0000000..2d842f7
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_resource.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_RESOURCE_H_
+#define _VNIC_RESOURCE_H_
+
+#define VNIC_RES_MAGIC		0x766E6963L	/* 'vnic' */
+#define VNIC_RES_VERSION	0x00000000L
+
+/* vNIC resource types */
+enum vnic_res_type {
+	RES_TYPE_EOL,			/* End-of-list */
+	RES_TYPE_WQ,			/* Work queues */
+	RES_TYPE_RQ,			/* Receive queues */
+	RES_TYPE_CQ,			/* Completion queues */
+	RES_TYPE_RSVD1,
+	RES_TYPE_NIC_CFG,		/* Enet NIC config registers */
+	RES_TYPE_RSVD2,
+	RES_TYPE_RSVD3,
+	RES_TYPE_RSVD4,
+	RES_TYPE_RSVD5,
+	RES_TYPE_INTR_CTRL,		/* Interrupt ctrl table */
+	RES_TYPE_INTR_TABLE,		/* MSI/MSI-X Interrupt table */
+	RES_TYPE_INTR_PBA,		/* MSI/MSI-X PBA table */
+	RES_TYPE_INTR_PBA_LEGACY,	/* Legacy intr status */
+	RES_TYPE_RSVD6,
+	RES_TYPE_RSVD7,
+	RES_TYPE_DEVCMD,		/* Device command region */
+	RES_TYPE_PASS_THRU_PAGE,	/* Pass-thru page */
+
+	RES_TYPE_MAX,			/* Count of resource types */
+};
+
+struct vnic_resource_header {
+	u32 magic;
+	u32 version;
+};
+
+struct vnic_resource {
+	u8 type;
+	u8 bar;
+	u8 pad[2];
+	u32 bar_offset;
+	u32 count;
+};
+
+#endif /* _VNIC_RESOURCE_H_ */
diff --git a/drivers/scsi/fnic/vnic_rq.c b/drivers/scsi/fnic/vnic_rq.c
new file mode 100644
index 0000000..bedd0d2
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_rq.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "vnic_dev.h"
+#include "vnic_rq.h"
+
+static int vnic_rq_alloc_bufs(struct vnic_rq *rq)
+{
+	struct vnic_rq_buf *buf;
+	struct vnic_dev *vdev;
+	unsigned int i, j, count = rq->ring.desc_count;
+	unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count);
+
+	vdev = rq->vdev;
+
+	for (i = 0; i < blks; i++) {
+		rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ, GFP_ATOMIC);
+		if (!rq->bufs[i]) {
+			printk(KERN_ERR "Failed to alloc rq_bufs\n");
+			return -ENOMEM;
+		}
+	}
+
+	for (i = 0; i < blks; i++) {
+		buf = rq->bufs[i];
+		for (j = 0; j < VNIC_RQ_BUF_BLK_ENTRIES; j++) {
+			buf->index = i * VNIC_RQ_BUF_BLK_ENTRIES + j;
+			buf->desc = (u8 *)rq->ring.descs +
+				rq->ring.desc_size * buf->index;
+			if (buf->index + 1 == count) {
+				buf->next = rq->bufs[0];
+				break;
+			} else if (j + 1 == VNIC_RQ_BUF_BLK_ENTRIES) {
+				buf->next = rq->bufs[i + 1];
+			} else {
+				buf->next = buf + 1;
+				buf++;
+			}
+		}
+	}
+
+	rq->to_use = rq->to_clean = rq->bufs[0];
+	rq->buf_index = 0;
+
+	return 0;
+}
+
+void vnic_rq_free(struct vnic_rq *rq)
+{
+	struct vnic_dev *vdev;
+	unsigned int i;
+
+	vdev = rq->vdev;
+
+	vnic_dev_free_desc_ring(vdev, &rq->ring);
+
+	for (i = 0; i < VNIC_RQ_BUF_BLKS_MAX; i++) {
+		kfree(rq->bufs[i]);
+		rq->bufs[i] = NULL;
+	}
+
+	rq->ctrl = NULL;
+}
+
+int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index,
+	unsigned int desc_count, unsigned int desc_size)
+{
+	int err;
+
+	rq->index = index;
+	rq->vdev = vdev;
+
+	rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index);
+	if (!rq->ctrl) {
+		printk(KERN_ERR "Failed to hook RQ[%d] resource\n", index);
+		return -EINVAL;
+	}
+
+	vnic_rq_disable(rq);
+
+	err = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size);
+	if (err)
+		return err;
+
+	err = vnic_rq_alloc_bufs(rq);
+	if (err) {
+		vnic_rq_free(rq);
+		return err;
+	}
+
+	return 0;
+}
+
+void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
+	unsigned int error_interrupt_enable,
+	unsigned int error_interrupt_offset)
+{
+	u64 paddr;
+	u32 fetch_index;
+
+	paddr = (u64)rq->ring.base_addr | VNIC_PADDR_TARGET;
+	writeq(paddr, &rq->ctrl->ring_base);
+	iowrite32(rq->ring.desc_count, &rq->ctrl->ring_size);
+	iowrite32(cq_index, &rq->ctrl->cq_index);
+	iowrite32(error_interrupt_enable, &rq->ctrl->error_interrupt_enable);
+	iowrite32(error_interrupt_offset, &rq->ctrl->error_interrupt_offset);
+	iowrite32(0, &rq->ctrl->dropped_packet_count);
+	iowrite32(0, &rq->ctrl->error_status);
+
+	/* Use current fetch_index as the ring starting point */
+	fetch_index = ioread32(&rq->ctrl->fetch_index);
+	rq->to_use = rq->to_clean =
+		&rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES]
+			[fetch_index % VNIC_RQ_BUF_BLK_ENTRIES];
+	iowrite32(fetch_index, &rq->ctrl->posted_index);
+
+	rq->buf_index = 0;
+}
+
+unsigned int vnic_rq_error_status(struct vnic_rq *rq)
+{
+	return ioread32(&rq->ctrl->error_status);
+}
+
+void vnic_rq_enable(struct vnic_rq *rq)
+{
+	iowrite32(1, &rq->ctrl->enable);
+}
+
+int vnic_rq_disable(struct vnic_rq *rq)
+{
+	unsigned int wait;
+
+	iowrite32(0, &rq->ctrl->enable);
+
+	/* Wait for HW to ACK disable request */
+	for (wait = 0; wait < 100; wait++) {
+		if (!(ioread32(&rq->ctrl->running)))
+			return 0;
+		udelay(1);
+	}
+
+	printk(KERN_ERR "Failed to disable RQ[%d]\n", rq->index);
+
+	return -ETIMEDOUT;
+}
+
+void vnic_rq_clean(struct vnic_rq *rq,
+	void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf))
+{
+	struct vnic_rq_buf *buf;
+	u32 fetch_index;
+
+	BUG_ON(ioread32(&rq->ctrl->enable));
+
+	buf = rq->to_clean;
+
+	while (vnic_rq_desc_used(rq) > 0) {
+
+		(*buf_clean)(rq, buf);
+
+		buf = rq->to_clean = buf->next;
+		rq->ring.desc_avail++;
+	}
+
+	/* Use current fetch_index as the ring starting point */
+	fetch_index = ioread32(&rq->ctrl->fetch_index);
+	rq->to_use = rq->to_clean =
+		&rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES]
+			[fetch_index % VNIC_RQ_BUF_BLK_ENTRIES];
+	iowrite32(fetch_index, &rq->ctrl->posted_index);
+
+	rq->buf_index = 0;
+
+	vnic_dev_clear_desc_ring(&rq->ring);
+}
+
diff --git a/drivers/scsi/fnic/vnic_rq.h b/drivers/scsi/fnic/vnic_rq.h
new file mode 100644
index 0000000..aebdfbd
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_rq.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_RQ_H_
+#define _VNIC_RQ_H_
+
+#include <linux/pci.h>
+#include "vnic_dev.h"
+#include "vnic_cq.h"
+
+/*
+ * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
+ * Driver) when both are built with CONFIG options =y
+ */
+#define vnic_rq_desc_avail fnic_rq_desc_avail
+#define vnic_rq_desc_used fnic_rq_desc_used
+#define vnic_rq_next_desc fnic_rq_next_desc
+#define vnic_rq_next_index fnic_rq_next_index
+#define vnic_rq_next_buf_index fnic_rq_next_buf_index
+#define vnic_rq_post fnic_rq_post
+#define vnic_rq_posting_soon fnic_rq_posting_soon
+#define vnic_rq_return_descs fnic_rq_return_descs
+#define vnic_rq_service fnic_rq_service
+#define vnic_rq_fill fnic_rq_fill
+#define vnic_rq_free fnic_rq_free
+#define vnic_rq_alloc fnic_rq_alloc
+#define vnic_rq_init fnic_rq_init
+#define vnic_rq_error_status fnic_rq_error_status
+#define vnic_rq_enable fnic_rq_enable
+#define vnic_rq_disable fnic_rq_disable
+#define vnic_rq_clean fnic_rq_clean
+
+/* Receive queue control */
+struct vnic_rq_ctrl {
+	u64 ring_base;			/* 0x00 */
+	u32 ring_size;			/* 0x08 */
+	u32 pad0;
+	u32 posted_index;		/* 0x10 */
+	u32 pad1;
+	u32 cq_index;			/* 0x18 */
+	u32 pad2;
+	u32 enable;			/* 0x20 */
+	u32 pad3;
+	u32 running;			/* 0x28 */
+	u32 pad4;
+	u32 fetch_index;		/* 0x30 */
+	u32 pad5;
+	u32 error_interrupt_enable;	/* 0x38 */
+	u32 pad6;
+	u32 error_interrupt_offset;	/* 0x40 */
+	u32 pad7;
+	u32 error_status;		/* 0x48 */
+	u32 pad8;
+	u32 dropped_packet_count;	/* 0x50 */
+	u32 pad9;
+	u32 dropped_packet_count_rc;	/* 0x58 */
+	u32 pad10;
+};
+
+/* Break the vnic_rq_buf allocations into blocks of 64 entries */
+#define VNIC_RQ_BUF_BLK_ENTRIES 64
+#define VNIC_RQ_BUF_BLK_SZ \
+	(VNIC_RQ_BUF_BLK_ENTRIES * sizeof(struct vnic_rq_buf))
+#define VNIC_RQ_BUF_BLKS_NEEDED(entries) \
+	DIV_ROUND_UP(entries, VNIC_RQ_BUF_BLK_ENTRIES)
+#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(4096)
+
+struct vnic_rq_buf {
+	struct vnic_rq_buf *next;
+	dma_addr_t dma_addr;
+	void *os_buf;
+	unsigned int os_buf_index;
+	unsigned int len;
+	unsigned int index;
+	void *desc;
+};
+
+struct vnic_rq {
+	unsigned int index;
+	struct vnic_dev *vdev;
+	struct vnic_rq_ctrl __iomem *ctrl;	/* memory-mapped */
+	struct vnic_dev_ring ring;
+	struct vnic_rq_buf *bufs[VNIC_RQ_BUF_BLKS_MAX];
+	struct vnic_rq_buf *to_use;
+	struct vnic_rq_buf *to_clean;
+	void *os_buf_head;
+	unsigned int buf_index;
+	unsigned int pkts_outstanding;
+};
+
+static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq)
+{
+	/* how many does SW own? */
+	return rq->ring.desc_avail;
+}
+
+static inline unsigned int vnic_rq_desc_used(struct vnic_rq *rq)
+{
+	/* how many does HW own? */
+	return rq->ring.desc_count - rq->ring.desc_avail - 1;
+}
+
+static inline void *vnic_rq_next_desc(struct vnic_rq *rq)
+{
+	return rq->to_use->desc;
+}
+
+static inline unsigned int vnic_rq_next_index(struct vnic_rq *rq)
+{
+	return rq->to_use->index;
+}
+
+static inline unsigned int vnic_rq_next_buf_index(struct vnic_rq *rq)
+{
+	return rq->buf_index++;
+}
+
+static inline void vnic_rq_post(struct vnic_rq *rq,
+	void *os_buf, unsigned int os_buf_index,
+	dma_addr_t dma_addr, unsigned int len)
+{
+	struct vnic_rq_buf *buf = rq->to_use;
+
+	buf->os_buf = os_buf;
+	buf->os_buf_index = os_buf_index;
+	buf->dma_addr = dma_addr;
+	buf->len = len;
+
+	buf = buf->next;
+	rq->to_use = buf;
+	rq->ring.desc_avail--;
+
+	/* Move the posted_index every nth descriptor
+	 */
+
+#ifndef VNIC_RQ_RETURN_RATE
+#define VNIC_RQ_RETURN_RATE		0xf	/* keep 2^n - 1 */
+#endif
+
+	if ((buf->index & VNIC_RQ_RETURN_RATE) == 0) {
+		/* Adding write memory barrier prevents compiler and/or CPU
+		 * reordering, thus avoiding descriptor posting before
+		 * descriptor is initialized. Otherwise, hardware can read
+		 * stale descriptor fields.
+		 */
+		wmb();
+		iowrite32(buf->index, &rq->ctrl->posted_index);
+	}
+}
+
+static inline int vnic_rq_posting_soon(struct vnic_rq *rq)
+{
+	return (rq->to_use->index & VNIC_RQ_RETURN_RATE) == 0;
+}
+
+static inline void vnic_rq_return_descs(struct vnic_rq *rq, unsigned int count)
+{
+	rq->ring.desc_avail += count;
+}
+
+enum desc_return_options {
+	VNIC_RQ_RETURN_DESC,
+	VNIC_RQ_DEFER_RETURN_DESC,
+};
+
+static inline void vnic_rq_service(struct vnic_rq *rq,
+	struct cq_desc *cq_desc, u16 completed_index,
+	int desc_return, void (*buf_service)(struct vnic_rq *rq,
+	struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
+	int skipped, void *opaque), void *opaque)
+{
+	struct vnic_rq_buf *buf;
+	int skipped;
+
+	buf = rq->to_clean;
+	while (1) {
+
+		skipped = (buf->index != completed_index);
+
+		(*buf_service)(rq, cq_desc, buf, skipped, opaque);
+
+		if (desc_return == VNIC_RQ_RETURN_DESC)
+			rq->ring.desc_avail++;
+
+		rq->to_clean = buf->next;
+
+		if (!skipped)
+			break;
+
+		buf = rq->to_clean;
+	}
+}
+
+static inline int vnic_rq_fill(struct vnic_rq *rq,
+	int (*buf_fill)(struct vnic_rq *rq))
+{
+	int err;
+
+	while (vnic_rq_desc_avail(rq) > 1) {
+
+		err = (*buf_fill)(rq);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void vnic_rq_free(struct vnic_rq *rq);
+int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index,
+	unsigned int desc_count, unsigned int desc_size);
+void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
+	unsigned int error_interrupt_enable,
+	unsigned int error_interrupt_offset);
+unsigned int vnic_rq_error_status(struct vnic_rq *rq);
+void vnic_rq_enable(struct vnic_rq *rq);
+int vnic_rq_disable(struct vnic_rq *rq);
+void vnic_rq_clean(struct vnic_rq *rq,
+	void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf));
+
+#endif /* _VNIC_RQ_H_ */
diff --git a/drivers/scsi/fnic/vnic_scsi.h b/drivers/scsi/fnic/vnic_scsi.h
new file mode 100644
index 0000000..46baa52
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_scsi.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_SCSI_H_
+#define _VNIC_SCSI_H_
+
+#define VNIC_FNIC_WQ_COPY_COUNT_MIN         1
+#define VNIC_FNIC_WQ_COPY_COUNT_MAX         1
+
+#define VNIC_FNIC_WQ_DESCS_MIN              64
+#define VNIC_FNIC_WQ_DESCS_MAX              128
+
+#define VNIC_FNIC_WQ_COPY_DESCS_MIN         64
+#define VNIC_FNIC_WQ_COPY_DESCS_MAX         512
+
+#define VNIC_FNIC_RQ_DESCS_MIN              64
+#define VNIC_FNIC_RQ_DESCS_MAX              128
+
+#define VNIC_FNIC_EDTOV_MIN                 1000
+#define VNIC_FNIC_EDTOV_MAX                 255000
+#define VNIC_FNIC_EDTOV_DEF                 2000
+
+#define VNIC_FNIC_RATOV_MIN                 1000
+#define VNIC_FNIC_RATOV_MAX                 255000
+
+#define VNIC_FNIC_MAXDATAFIELDSIZE_MIN      256
+#define VNIC_FNIC_MAXDATAFIELDSIZE_MAX      2112
+
+#define VNIC_FNIC_FLOGI_RETRIES_MIN         0
+#define VNIC_FNIC_FLOGI_RETRIES_MAX         0xffffffff
+#define VNIC_FNIC_FLOGI_RETRIES_DEF         0xffffffff
+
+#define VNIC_FNIC_FLOGI_TIMEOUT_MIN         1000
+#define VNIC_FNIC_FLOGI_TIMEOUT_MAX         255000
+
+#define VNIC_FNIC_PLOGI_RETRIES_MIN         0
+#define VNIC_FNIC_PLOGI_RETRIES_MAX         255
+#define VNIC_FNIC_PLOGI_RETRIES_DEF         8
+
+#define VNIC_FNIC_PLOGI_TIMEOUT_MIN         1000
+#define VNIC_FNIC_PLOGI_TIMEOUT_MAX         255000
+
+#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN     256
+#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX     4096
+
+#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MIN     0
+#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX     240000
+
+#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MIN     0
+#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX     240000
+
+#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MIN  0
+#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX  255
+
+#define VNIC_FNIC_LUNS_PER_TARGET_MIN       1
+#define VNIC_FNIC_LUNS_PER_TARGET_MAX       1024
+
+/* Device-specific region: scsi configuration */
+struct vnic_fc_config {
+	u64 node_wwn;
+	u64 port_wwn;
+	u32 flags;
+	u32 wq_enet_desc_count;
+	u32 wq_copy_desc_count;
+	u32 rq_desc_count;
+	u32 flogi_retries;
+	u32 flogi_timeout;
+	u32 plogi_retries;
+	u32 plogi_timeout;
+	u32 io_throttle_count;
+	u32 link_down_timeout;
+	u32 port_down_timeout;
+	u32 port_down_io_retries;
+	u32 luns_per_tgt;
+	u16 maxdatafieldsize;
+	u16 ed_tov;
+	u16 ra_tov;
+	u16 intr_timer;
+	u8 intr_timer_type;
+};
+
+#define VFCF_FCP_SEQ_LVL_ERR	0x1	/* Enable FCP-2 Error Recovery */
+#define VFCF_PERBI		0x2	/* persistent binding info available */
+
+#endif /* _VNIC_SCSI_H_ */
diff --git a/drivers/scsi/fnic/vnic_stats.h b/drivers/scsi/fnic/vnic_stats.h
new file mode 100644
index 0000000..5372e23
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_stats.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_STATS_H_
+#define _VNIC_STATS_H_
+
+/* Tx statistics */
+struct vnic_tx_stats {
+	u64 tx_frames_ok;
+	u64 tx_unicast_frames_ok;
+	u64 tx_multicast_frames_ok;
+	u64 tx_broadcast_frames_ok;
+	u64 tx_bytes_ok;
+	u64 tx_unicast_bytes_ok;
+	u64 tx_multicast_bytes_ok;
+	u64 tx_broadcast_bytes_ok;
+	u64 tx_drops;
+	u64 tx_errors;
+	u64 tx_tso;
+	u64 rsvd[16];
+};
+
+/* Rx statistics */
+struct vnic_rx_stats {
+	u64 rx_frames_ok;
+	u64 rx_frames_total;
+	u64 rx_unicast_frames_ok;
+	u64 rx_multicast_frames_ok;
+	u64 rx_broadcast_frames_ok;
+	u64 rx_bytes_ok;
+	u64 rx_unicast_bytes_ok;
+	u64 rx_multicast_bytes_ok;
+	u64 rx_broadcast_bytes_ok;
+	u64 rx_drop;
+	u64 rx_no_bufs;
+	u64 rx_errors;
+	u64 rx_rss;
+	u64 rx_crc_errors;
+	u64 rx_frames_64;
+	u64 rx_frames_127;
+	u64 rx_frames_255;
+	u64 rx_frames_511;
+	u64 rx_frames_1023;
+	u64 rx_frames_1518;
+	u64 rx_frames_to_max;
+	u64 rsvd[16];
+};
+
+struct vnic_stats {
+	struct vnic_tx_stats tx;
+	struct vnic_rx_stats rx;
+};
+
+#endif /* _VNIC_STATS_H_ */
diff --git a/drivers/scsi/fnic/vnic_wq.c b/drivers/scsi/fnic/vnic_wq.c
new file mode 100644
index 0000000..1f9ea79
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "vnic_dev.h"
+#include "vnic_wq.h"
+
+static int vnic_wq_alloc_bufs(struct vnic_wq *wq)
+{
+	struct vnic_wq_buf *buf;
+	struct vnic_dev *vdev;
+	unsigned int i, j, count = wq->ring.desc_count;
+	unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count);
+
+	vdev = wq->vdev;
+
+	for (i = 0; i < blks; i++) {
+		wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ, GFP_ATOMIC);
+		if (!wq->bufs[i]) {
+			printk(KERN_ERR "Failed to alloc wq_bufs\n");
+			return -ENOMEM;
+		}
+	}
+
+	for (i = 0; i < blks; i++) {
+		buf = wq->bufs[i];
+		for (j = 0; j < VNIC_WQ_BUF_BLK_ENTRIES; j++) {
+			buf->index = i * VNIC_WQ_BUF_BLK_ENTRIES + j;
+			buf->desc = (u8 *)wq->ring.descs +
+				wq->ring.desc_size * buf->index;
+			if (buf->index + 1 == count) {
+				buf->next = wq->bufs[0];
+				break;
+			} else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES) {
+				buf->next = wq->bufs[i + 1];
+			} else {
+				buf->next = buf + 1;
+				buf++;
+			}
+		}
+	}
+
+	wq->to_use = wq->to_clean = wq->bufs[0];
+
+	return 0;
+}
+
+void vnic_wq_free(struct vnic_wq *wq)
+{
+	struct vnic_dev *vdev;
+	unsigned int i;
+
+	vdev = wq->vdev;
+
+	vnic_dev_free_desc_ring(vdev, &wq->ring);
+
+	for (i = 0; i < VNIC_WQ_BUF_BLKS_MAX; i++) {
+		kfree(wq->bufs[i]);
+		wq->bufs[i] = NULL;
+	}
+
+	wq->ctrl = NULL;
+
+}
+
+int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index,
+	unsigned int desc_count, unsigned int desc_size)
+{
+	int err;
+
+	wq->index = index;
+	wq->vdev = vdev;
+
+	wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index);
+	if (!wq->ctrl) {
+		printk(KERN_ERR "Failed to hook WQ[%d] resource\n", index);
+		return -EINVAL;
+	}
+
+	vnic_wq_disable(wq);
+
+	err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size);
+	if (err)
+		return err;
+
+	err = vnic_wq_alloc_bufs(wq);
+	if (err) {
+		vnic_wq_free(wq);
+		return err;
+	}
+
+	return 0;
+}
+
+void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
+	unsigned int error_interrupt_enable,
+	unsigned int error_interrupt_offset)
+{
+	u64 paddr;
+
+	paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET;
+	writeq(paddr, &wq->ctrl->ring_base);
+	iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size);
+	iowrite32(0, &wq->ctrl->fetch_index);
+	iowrite32(0, &wq->ctrl->posted_index);
+	iowrite32(cq_index, &wq->ctrl->cq_index);
+	iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable);
+	iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset);
+	iowrite32(0, &wq->ctrl->error_status);
+}
+
+unsigned int vnic_wq_error_status(struct vnic_wq *wq)
+{
+	return ioread32(&wq->ctrl->error_status);
+}
+
+void vnic_wq_enable(struct vnic_wq *wq)
+{
+	iowrite32(1, &wq->ctrl->enable);
+}
+
+int vnic_wq_disable(struct vnic_wq *wq)
+{
+	unsigned int wait;
+
+	iowrite32(0, &wq->ctrl->enable);
+
+	/* Wait for HW to ACK disable request */
+	for (wait = 0; wait < 100; wait++) {
+		if (!(ioread32(&wq->ctrl->running)))
+			return 0;
+		udelay(1);
+	}
+
+	printk(KERN_ERR "Failed to disable WQ[%d]\n", wq->index);
+
+	return -ETIMEDOUT;
+}
+
+void vnic_wq_clean(struct vnic_wq *wq,
+	void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf))
+{
+	struct vnic_wq_buf *buf;
+
+	BUG_ON(ioread32(&wq->ctrl->enable));
+
+	buf = wq->to_clean;
+
+	while (vnic_wq_desc_used(wq) > 0) {
+
+		(*buf_clean)(wq, buf);
+
+		buf = wq->to_clean = buf->next;
+		wq->ring.desc_avail++;
+	}
+
+	wq->to_use = wq->to_clean = wq->bufs[0];
+
+	iowrite32(0, &wq->ctrl->fetch_index);
+	iowrite32(0, &wq->ctrl->posted_index);
+	iowrite32(0, &wq->ctrl->error_status);
+
+	vnic_dev_clear_desc_ring(&wq->ring);
+}
diff --git a/drivers/scsi/fnic/vnic_wq.h b/drivers/scsi/fnic/vnic_wq.h
new file mode 100644
index 0000000..5cd094f
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_WQ_H_
+#define _VNIC_WQ_H_
+
+#include <linux/pci.h>
+#include "vnic_dev.h"
+#include "vnic_cq.h"
+
+/*
+ * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth
+ * Driver) when both are built with CONFIG options =y
+ */
+#define vnic_wq_desc_avail fnic_wq_desc_avail
+#define vnic_wq_desc_used fnic_wq_desc_used
+#define vnic_wq_next_desc fni_cwq_next_desc
+#define vnic_wq_post fnic_wq_post
+#define vnic_wq_service fnic_wq_service
+#define vnic_wq_free fnic_wq_free
+#define vnic_wq_alloc fnic_wq_alloc
+#define vnic_wq_init fnic_wq_init
+#define vnic_wq_error_status fnic_wq_error_status
+#define vnic_wq_enable fnic_wq_enable
+#define vnic_wq_disable fnic_wq_disable
+#define vnic_wq_clean fnic_wq_clean
+
+/* Work queue control */
+struct vnic_wq_ctrl {
+	u64 ring_base;			/* 0x00 */
+	u32 ring_size;			/* 0x08 */
+	u32 pad0;
+	u32 posted_index;		/* 0x10 */
+	u32 pad1;
+	u32 cq_index;			/* 0x18 */
+	u32 pad2;
+	u32 enable;			/* 0x20 */
+	u32 pad3;
+	u32 running;			/* 0x28 */
+	u32 pad4;
+	u32 fetch_index;		/* 0x30 */
+	u32 pad5;
+	u32 dca_value;			/* 0x38 */
+	u32 pad6;
+	u32 error_interrupt_enable;	/* 0x40 */
+	u32 pad7;
+	u32 error_interrupt_offset;	/* 0x48 */
+	u32 pad8;
+	u32 error_status;		/* 0x50 */
+	u32 pad9;
+};
+
+struct vnic_wq_buf {
+	struct vnic_wq_buf *next;
+	dma_addr_t dma_addr;
+	void *os_buf;
+	unsigned int len;
+	unsigned int index;
+	int sop;
+	void *desc;
+};
+
+/* Break the vnic_wq_buf allocations into blocks of 64 entries */
+#define VNIC_WQ_BUF_BLK_ENTRIES 64
+#define VNIC_WQ_BUF_BLK_SZ \
+	(VNIC_WQ_BUF_BLK_ENTRIES * sizeof(struct vnic_wq_buf))
+#define VNIC_WQ_BUF_BLKS_NEEDED(entries) \
+	DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES)
+#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096)
+
+struct vnic_wq {
+	unsigned int index;
+	struct vnic_dev *vdev;
+	struct vnic_wq_ctrl __iomem *ctrl;	/* memory-mapped */
+	struct vnic_dev_ring ring;
+	struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX];
+	struct vnic_wq_buf *to_use;
+	struct vnic_wq_buf *to_clean;
+	unsigned int pkts_outstanding;
+};
+
+static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq)
+{
+	/* how many does SW own? */
+	return wq->ring.desc_avail;
+}
+
+static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq)
+{
+	/* how many does HW own? */
+	return wq->ring.desc_count - wq->ring.desc_avail - 1;
+}
+
+static inline void *vnic_wq_next_desc(struct vnic_wq *wq)
+{
+	return wq->to_use->desc;
+}
+
+static inline void vnic_wq_post(struct vnic_wq *wq,
+	void *os_buf, dma_addr_t dma_addr,
+	unsigned int len, int sop, int eop)
+{
+	struct vnic_wq_buf *buf = wq->to_use;
+
+	buf->sop = sop;
+	buf->os_buf = eop ? os_buf : NULL;
+	buf->dma_addr = dma_addr;
+	buf->len = len;
+
+	buf = buf->next;
+	if (eop) {
+		/* Adding write memory barrier prevents compiler and/or CPU
+		 * reordering, thus avoiding descriptor posting before
+		 * descriptor is initialized. Otherwise, hardware can read
+		 * stale descriptor fields.
+		 */
+		wmb();
+		iowrite32(buf->index, &wq->ctrl->posted_index);
+	}
+	wq->to_use = buf;
+
+	wq->ring.desc_avail--;
+}
+
+static inline void vnic_wq_service(struct vnic_wq *wq,
+	struct cq_desc *cq_desc, u16 completed_index,
+	void (*buf_service)(struct vnic_wq *wq,
+	struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque),
+	void *opaque)
+{
+	struct vnic_wq_buf *buf;
+
+	buf = wq->to_clean;
+	while (1) {
+
+		(*buf_service)(wq, cq_desc, buf, opaque);
+
+		wq->ring.desc_avail++;
+
+		wq->to_clean = buf->next;
+
+		if (buf->index == completed_index)
+			break;
+
+		buf = wq->to_clean;
+	}
+}
+
+void vnic_wq_free(struct vnic_wq *wq);
+int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index,
+	unsigned int desc_count, unsigned int desc_size);
+void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
+	unsigned int error_interrupt_enable,
+	unsigned int error_interrupt_offset);
+unsigned int vnic_wq_error_status(struct vnic_wq *wq);
+void vnic_wq_enable(struct vnic_wq *wq);
+int vnic_wq_disable(struct vnic_wq *wq);
+void vnic_wq_clean(struct vnic_wq *wq,
+	void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf));
+
+#endif /* _VNIC_WQ_H_ */
diff --git a/drivers/scsi/fnic/vnic_wq_copy.c b/drivers/scsi/fnic/vnic_wq_copy.c
new file mode 100644
index 0000000..9eab7e7
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq_copy.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "vnic_wq_copy.h"
+
+void vnic_wq_copy_enable(struct vnic_wq_copy *wq)
+{
+	iowrite32(1, &wq->ctrl->enable);
+}
+
+int vnic_wq_copy_disable(struct vnic_wq_copy *wq)
+{
+	unsigned int wait;
+
+	iowrite32(0, &wq->ctrl->enable);
+
+	/* Wait for HW to ACK disable request */
+	for (wait = 0; wait < 100; wait++) {
+		if (!(ioread32(&wq->ctrl->running)))
+			return 0;
+		udelay(1);
+	}
+
+	printk(KERN_ERR "Failed to disable Copy WQ[%d],"
+	       " fetch index=%d, posted_index=%d\n",
+	       wq->index, ioread32(&wq->ctrl->fetch_index),
+	       ioread32(&wq->ctrl->posted_index));
+
+	return -ENODEV;
+}
+
+void vnic_wq_copy_clean(struct vnic_wq_copy *wq,
+	void (*q_clean)(struct vnic_wq_copy *wq,
+	struct fcpio_host_req *wq_desc))
+{
+	BUG_ON(ioread32(&wq->ctrl->enable));
+
+	if (vnic_wq_copy_desc_in_use(wq))
+		vnic_wq_copy_service(wq, -1, q_clean);
+
+	wq->to_use_index = wq->to_clean_index = 0;
+
+	iowrite32(0, &wq->ctrl->fetch_index);
+	iowrite32(0, &wq->ctrl->posted_index);
+	iowrite32(0, &wq->ctrl->error_status);
+
+	vnic_dev_clear_desc_ring(&wq->ring);
+}
+
+void vnic_wq_copy_free(struct vnic_wq_copy *wq)
+{
+	struct vnic_dev *vdev;
+
+	vdev = wq->vdev;
+	vnic_dev_free_desc_ring(vdev, &wq->ring);
+	wq->ctrl = NULL;
+}
+
+int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq,
+		       unsigned int index, unsigned int desc_count,
+		       unsigned int desc_size)
+{
+	int err;
+
+	wq->index = index;
+	wq->vdev = vdev;
+	wq->to_use_index = wq->to_clean_index = 0;
+	wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index);
+	if (!wq->ctrl) {
+		printk(KERN_ERR "Failed to hook COPY WQ[%d] resource\n", index);
+		return -EINVAL;
+	}
+
+	vnic_wq_copy_disable(wq);
+
+	err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index,
+	unsigned int error_interrupt_enable,
+	unsigned int error_interrupt_offset)
+{
+	u64 paddr;
+
+	paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET;
+	writeq(paddr, &wq->ctrl->ring_base);
+	iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size);
+	iowrite32(0, &wq->ctrl->fetch_index);
+	iowrite32(0, &wq->ctrl->posted_index);
+	iowrite32(cq_index, &wq->ctrl->cq_index);
+	iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable);
+	iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset);
+}
+
diff --git a/drivers/scsi/fnic/vnic_wq_copy.h b/drivers/scsi/fnic/vnic_wq_copy.h
new file mode 100644
index 0000000..6aff974
--- /dev/null
+++ b/drivers/scsi/fnic/vnic_wq_copy.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _VNIC_WQ_COPY_H_
+#define _VNIC_WQ_COPY_H_
+
+#include <linux/pci.h>
+#include "vnic_wq.h"
+#include "fcpio.h"
+
+#define	VNIC_WQ_COPY_MAX 1
+
+struct vnic_wq_copy {
+	unsigned int index;
+	struct vnic_dev *vdev;
+	struct vnic_wq_ctrl __iomem *ctrl;	/* memory-mapped */
+	struct vnic_dev_ring ring;
+	unsigned to_use_index;
+	unsigned to_clean_index;
+};
+
+static inline unsigned int vnic_wq_copy_desc_avail(struct vnic_wq_copy *wq)
+{
+	return wq->ring.desc_avail;
+}
+
+static inline unsigned int vnic_wq_copy_desc_in_use(struct vnic_wq_copy *wq)
+{
+	return wq->ring.desc_count - 1 - wq->ring.desc_avail;
+}
+
+static inline void *vnic_wq_copy_next_desc(struct vnic_wq_copy *wq)
+{
+	struct fcpio_host_req *desc = wq->ring.descs;
+	return &desc[wq->to_use_index];
+}
+
+static inline void vnic_wq_copy_post(struct vnic_wq_copy *wq)
+{
+
+	((wq->to_use_index + 1) == wq->ring.desc_count) ?
+		(wq->to_use_index = 0) : (wq->to_use_index++);
+	wq->ring.desc_avail--;
+
+	/* Adding write memory barrier prevents compiler and/or CPU
+	 * reordering, thus avoiding descriptor posting before
+	 * descriptor is initialized. Otherwise, hardware can read
+	 * stale descriptor fields.
+	 */
+	wmb();
+
+	iowrite32(wq->to_use_index, &wq->ctrl->posted_index);
+}
+
+static inline void vnic_wq_copy_desc_process(struct vnic_wq_copy *wq, u16 index)
+{
+	unsigned int cnt;
+
+	if (wq->to_clean_index <= index)
+		cnt = (index - wq->to_clean_index) + 1;
+	else
+		cnt = wq->ring.desc_count - wq->to_clean_index + index + 1;
+
+	wq->to_clean_index = ((index + 1) % wq->ring.desc_count);
+	wq->ring.desc_avail += cnt;
+
+}
+
+static inline void vnic_wq_copy_service(struct vnic_wq_copy *wq,
+	u16 completed_index,
+	void (*q_service)(struct vnic_wq_copy *wq,
+	struct fcpio_host_req *wq_desc))
+{
+	struct fcpio_host_req *wq_desc = wq->ring.descs;
+	unsigned int curr_index;
+
+	while (1) {
+
+		if (q_service)
+			(*q_service)(wq, &wq_desc[wq->to_clean_index]);
+
+		wq->ring.desc_avail++;
+
+		curr_index = wq->to_clean_index;
+
+		/* increment the to-clean index so that we start
+		 * with an unprocessed index next time we enter the loop
+		 */
+		((wq->to_clean_index + 1) == wq->ring.desc_count) ?
+			(wq->to_clean_index = 0) : (wq->to_clean_index++);
+
+		if (curr_index == completed_index)
+			break;
+
+		/* we have cleaned all the entries */
+		if ((completed_index == (u16)-1) &&
+		    (wq->to_clean_index == wq->to_use_index))
+			break;
+	}
+}
+
+void vnic_wq_copy_enable(struct vnic_wq_copy *wq);
+int vnic_wq_copy_disable(struct vnic_wq_copy *wq);
+void vnic_wq_copy_free(struct vnic_wq_copy *wq);
+int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq,
+	unsigned int index, unsigned int desc_count, unsigned int desc_size);
+void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index,
+	unsigned int error_interrupt_enable,
+	unsigned int error_interrupt_offset);
+void vnic_wq_copy_clean(struct vnic_wq_copy *wq,
+	void (*q_clean)(struct vnic_wq_copy *wq,
+	struct fcpio_host_req *wq_desc));
+
+#endif /* _VNIC_WQ_COPY_H_ */
diff --git a/drivers/scsi/fnic/wq_enet_desc.h b/drivers/scsi/fnic/wq_enet_desc.h
new file mode 100644
index 0000000..b121cba
--- /dev/null
+++ b/drivers/scsi/fnic/wq_enet_desc.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _WQ_ENET_DESC_H_
+#define _WQ_ENET_DESC_H_
+
+/* Ethernet work queue descriptor: 16B */
+struct wq_enet_desc {
+	__le64 address;
+	__le16 length;
+	__le16 mss_loopback;
+	__le16 header_length_flags;
+	__le16 vlan_tag;
+};
+
+#define WQ_ENET_ADDR_BITS		64
+#define WQ_ENET_LEN_BITS		14
+#define WQ_ENET_LEN_MASK		((1 << WQ_ENET_LEN_BITS) - 1)
+#define WQ_ENET_MSS_BITS		14
+#define WQ_ENET_MSS_MASK		((1 << WQ_ENET_MSS_BITS) - 1)
+#define WQ_ENET_MSS_SHIFT		2
+#define WQ_ENET_LOOPBACK_SHIFT		1
+#define WQ_ENET_HDRLEN_BITS		10
+#define WQ_ENET_HDRLEN_MASK		((1 << WQ_ENET_HDRLEN_BITS) - 1)
+#define WQ_ENET_FLAGS_OM_BITS		2
+#define WQ_ENET_FLAGS_OM_MASK		((1 << WQ_ENET_FLAGS_OM_BITS) - 1)
+#define WQ_ENET_FLAGS_EOP_SHIFT		12
+#define WQ_ENET_FLAGS_CQ_ENTRY_SHIFT	13
+#define WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT	14
+#define WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT	15
+
+#define WQ_ENET_OFFLOAD_MODE_CSUM	0
+#define WQ_ENET_OFFLOAD_MODE_RESERVED	1
+#define WQ_ENET_OFFLOAD_MODE_CSUM_L4	2
+#define WQ_ENET_OFFLOAD_MODE_TSO	3
+
+static inline void wq_enet_desc_enc(struct wq_enet_desc *desc,
+	u64 address, u16 length, u16 mss, u16 header_length,
+	u8 offload_mode, u8 eop, u8 cq_entry, u8 fcoe_encap,
+	u8 vlan_tag_insert, u16 vlan_tag, u8 loopback)
+{
+	desc->address = cpu_to_le64(address);
+	desc->length = cpu_to_le16(length & WQ_ENET_LEN_MASK);
+	desc->mss_loopback = cpu_to_le16((mss & WQ_ENET_MSS_MASK) <<
+		WQ_ENET_MSS_SHIFT | (loopback & 1) << WQ_ENET_LOOPBACK_SHIFT);
+	desc->header_length_flags = cpu_to_le16(
+		(header_length & WQ_ENET_HDRLEN_MASK) |
+		(offload_mode & WQ_ENET_FLAGS_OM_MASK) << WQ_ENET_HDRLEN_BITS |
+		(eop & 1) << WQ_ENET_FLAGS_EOP_SHIFT |
+		(cq_entry & 1) << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT |
+		(fcoe_encap & 1) << WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT |
+		(vlan_tag_insert & 1) << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT);
+	desc->vlan_tag = cpu_to_le16(vlan_tag);
+}
+
+static inline void wq_enet_desc_dec(struct wq_enet_desc *desc,
+	u64 *address, u16 *length, u16 *mss, u16 *header_length,
+	u8 *offload_mode, u8 *eop, u8 *cq_entry, u8 *fcoe_encap,
+	u8 *vlan_tag_insert, u16 *vlan_tag, u8 *loopback)
+{
+	*address = le64_to_cpu(desc->address);
+	*length = le16_to_cpu(desc->length) & WQ_ENET_LEN_MASK;
+	*mss = (le16_to_cpu(desc->mss_loopback) >> WQ_ENET_MSS_SHIFT) &
+		WQ_ENET_MSS_MASK;
+	*loopback = (u8)((le16_to_cpu(desc->mss_loopback) >>
+		WQ_ENET_LOOPBACK_SHIFT) & 1);
+	*header_length = le16_to_cpu(desc->header_length_flags) &
+		WQ_ENET_HDRLEN_MASK;
+	*offload_mode = (u8)((le16_to_cpu(desc->header_length_flags) >>
+		WQ_ENET_HDRLEN_BITS) & WQ_ENET_FLAGS_OM_MASK);
+	*eop = (u8)((le16_to_cpu(desc->header_length_flags) >>
+		WQ_ENET_FLAGS_EOP_SHIFT) & 1);
+	*cq_entry = (u8)((le16_to_cpu(desc->header_length_flags) >>
+		WQ_ENET_FLAGS_CQ_ENTRY_SHIFT) & 1);
+	*fcoe_encap = (u8)((le16_to_cpu(desc->header_length_flags) >>
+		WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT) & 1);
+	*vlan_tag_insert = (u8)((le16_to_cpu(desc->header_length_flags) >>
+		WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT) & 1);
+	*vlan_tag = le16_to_cpu(desc->vlan_tag);
+}
+
+#endif /* _WQ_ENET_DESC_H_ */
-- 
cgit v0.10.2


From 8cc72361481f00253f1e468ade5795427386d593 Mon Sep 17 00:00:00 2001
From: Wai Yew CHAY <wychay@ctl.creative.com>
Date: Thu, 14 May 2009 08:05:58 +0200
Subject: ALSA: SB X-Fi driver merge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Sound Blaster X-Fi driver supports Creative solutions based on
20K1 and 20K2 chipsets.

Supported hardware :

Creative Sound Blaster X-Fi Titanium Fatal1ty® Champion Series
Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series
Creative Sound Blaster X-Fi Titanium Professional Audio
Creative Sound Blaster X-Fi Titanium
Creative Sound Blaster X-Fi Elite Pro
Creative Sound Blaster X-Fi Platinum
Creative Sound Blaster X-Fi Fatal1ty
Creative Sound Blaster X-Fi XtremeGamer
Creative Sound Blaster X-Fi XtremeMusic

Current release features:

* ALSA PCM Playback
* ALSA Record
* ALSA Mixer

Note:

* External I/O modules detection not included.

Signed-off-by: Wai Yew CHAY <wychay@ctl.creative.com>
Singed-off-by: Ryan RICHARDS <ryan_richards@creativelabs.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..c903db8 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -460,6 +460,25 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 
     The power-management is supported.
 
+  Module snd-ctxfi
+  ----------------
+
+    Module for Creative Sound Blaster X-Fi boards (20k1 / 20k2 chips)
+	* Creative Sound Blaster X-Fi Titanium Fatal1ty Champion Series
+	* Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series
+	* Creative Sound Blaster X-Fi Titanium Professional Audio
+	* Creative Sound Blaster X-Fi Titanium
+	* Creative Sound Blaster X-Fi Elite Pro
+	* Creative Sound Blaster X-Fi Platinum
+	* Creative Sound Blaster X-Fi Fatal1ty
+	* Creative Sound Blaster X-Fi XtremeGamer
+	* Creative Sound Blaster X-Fi XtremeMusic
+
+    reference_rate	- reference sample rate, 44100 or 48000 (default)
+    multiple		- multiple to ref. sample rate, 1 or 2 (default)
+
+    This module supports multiple cards.
+
   Module snd-darla20
   ------------------
 
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 93422e3..3a7640f 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -275,6 +275,16 @@ config SND_CS5535AUDIO
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-cs5535audio.
 
+config SND_CTXFI
+	tristate "Creative Sound Blaster X-Fi"
+	select SND_PCM
+	help
+	  If you want to use soundcards based on Creative Sound Blastr X-Fi
+	  boards with 20k1 or 20k2 chips, say Y here.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-ctxfi.
+
 config SND_DARLA20
 	tristate "(Echoaudio) Darla20"
 	select FW_LOADER
diff --git a/sound/pci/Makefile b/sound/pci/Makefile
index 65b25d2..6a1281e 100644
--- a/sound/pci/Makefile
+++ b/sound/pci/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_SND) += \
 	ali5451/ \
 	au88x0/ \
 	aw2/ \
+	ctxfi/ \
 	ca0106/ \
 	cs46xx/ \
 	cs5535audio/ \
diff --git a/sound/pci/ctxfi/Makefile b/sound/pci/ctxfi/Makefile
new file mode 100644
index 0000000..2904323
--- /dev/null
+++ b/sound/pci/ctxfi/Makefile
@@ -0,0 +1,5 @@
+snd-ctxfi-objs := xfi.o ctatc.o ctvmem.o ctpcm.o ctmixer.o ctresource.o \
+	ctsrc.o ctamixer.o ctdaio.o ctimap.o cthardware.o \
+	cthw20k2.o cthw20k1.o
+
+obj-$(CONFIG_SND_CTXFI) += snd-ctxfi.o
diff --git a/sound/pci/ctxfi/ct20k1reg.h b/sound/pci/ctxfi/ct20k1reg.h
new file mode 100644
index 0000000..c62e677
--- /dev/null
+++ b/sound/pci/ctxfi/ct20k1reg.h
@@ -0,0 +1,634 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#ifndef CT20K1REG_H
+#define CT20k1REG_H
+
+/* 20k1 registers */
+#define 	DSPXRAM_START 			0x000000
+#define 	DSPXRAM_END 			0x013FFC
+#define 	DSPAXRAM_START 			0x020000
+#define 	DSPAXRAM_END 			0x023FFC
+#define 	DSPYRAM_START 			0x040000
+#define 	DSPYRAM_END 			0x04FFFC
+#define 	DSPAYRAM_START 			0x020000
+#define 	DSPAYRAM_END 			0x063FFC
+#define 	DSPMICRO_START 			0x080000
+#define 	DSPMICRO_END 			0x0B3FFC
+#define 	DSP0IO_START 			0x100000
+#define 	DSP0IO_END	 		0x101FFC
+#define 	AUDIORINGIPDSP0_START 		0x100000
+#define 	AUDIORINGIPDSP0_END 		0x1003FC
+#define 	AUDIORINGOPDSP0_START 		0x100400
+#define 	AUDIORINGOPDSP0_END 		0x1007FC
+#define 	AUDPARARINGIODSP0_START 	0x100800
+#define 	AUDPARARINGIODSP0_END	 	0x100BFC
+#define 	DSP0LOCALHWREG_START 		0x100C00
+#define 	DSP0LOCALHWREG_END	 	0x100C3C
+#define 	DSP0XYRAMAGINDEX_START 		0x100C40
+#define 	DSP0XYRAMAGINDEX_END	 	0x100C5C
+#define 	DSP0XYRAMAGMDFR_START 		0x100C60
+#define 	DSP0XYRAMAGMDFR_END	 	0x100C7C
+#define 	DSP0INTCONTLVEC_START 		0x100C80
+#define 	DSP0INTCONTLVEC_END	 	0x100CD8
+#define 	INTCONTLGLOBALREG_START 	0x100D1C
+#define 	INTCONTLGLOBALREG_END	 	0x100D3C
+#define		HOSTINTFPORTADDRCONTDSP0	0x100D40
+#define		HOSTINTFPORTDATADSP0		0x100D44
+#define		TIME0PERENBDSP0			0x100D60
+#define		TIME0COUNTERDSP0		0x100D64
+#define		TIME1PERENBDSP0			0x100D68
+#define		TIME1COUNTERDSP0		0x100D6C
+#define		TIME2PERENBDSP0			0x100D70
+#define		TIME2COUNTERDSP0		0x100D74
+#define		TIME3PERENBDSP0			0x100D78
+#define		TIME3COUNTERDSP0		0x100D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP0 	0x100D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP0	0x100D9C
+#define 	XRAMINDOPERREFUP_STARTDSP0	0x100DA0
+#define 	XRAMINDOPERREFUP_ENDDSP0	0x100DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP0 	0x100DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP0 	0x100DDC
+#define 	YRAMINDOPERREFUP_STARTDSP0	0x100DE0
+#define 	YRAMINDOPERREFUP_ENDDSP0 	0x100DFC
+#define 	DSP0CONDCODE 			0x100E00
+#define 	DSP0STACKFLAG 			0x100E04
+#define 	DSP0PROGCOUNTSTACKPTREG 	0x100E08
+#define 	DSP0PROGCOUNTSTACKDATAREG 	0x100E0C
+#define 	DSP0CURLOOPADDRREG 		0x100E10
+#define 	DSP0CURLOOPCOUNT 		0x100E14
+#define 	DSP0TOPLOOPCOUNTSTACK 		0x100E18
+#define 	DSP0TOPLOOPADDRSTACK 		0x100E1C
+#define 	DSP0LOOPSTACKPTR 		0x100E20
+#define 	DSP0STASSTACKDATAREG 		0x100E24
+#define 	DSP0STASSTACKPTR 		0x100E28
+#define 	DSP0PROGCOUNT 			0x100E2C
+#define  	GLOBDSPDEBGREG			0x100E30
+#define  	GLOBDSPBREPTRREG		0x100E30
+#define 	DSP0XYRAMBASE_START 		0x100EA0
+#define 	DSP0XYRAMBASE_END 		0x100EBC
+#define 	DSP0XYRAMLENG_START 		0x100EC0
+#define 	DSP0XYRAMLENG_END 		0x100EDC
+#define		SEMAPHOREREGDSP0		0x100EE0
+#define		DSP0INTCONTMASKREG		0x100EE4
+#define		DSP0INTCONTPENDREG		0x100EE8
+#define		DSP0INTCONTSERVINT		0x100EEC
+#define		DSPINTCONTEXTINTMODREG		0x100EEC
+#define		GPIODSP0			0x100EFC
+#define 	DMADSPBASEADDRREG_STARTDSP0	0x100F00
+#define 	DMADSPBASEADDRREG_ENDDSP0	0x100F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP0	0x100F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP0	0x100F3C
+#define 	DMADSPCURADDRREG_STARTDSP0	0x100F40
+#define 	DMADSPCURADDRREG_ENDDSP0	0x100F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP0	0x100F60
+#define 	DMAHOSTCURADDRREG_ENDDSP0	0x100F7C
+#define 	DMATANXCOUNTREG_STARTDSP0	0x100F80
+#define 	DMATANXCOUNTREG_ENDDSP0		0x100F9C
+#define 	DMATIMEBUGREG_STARTDSP0		0x100FA0
+#define 	DMATIMEBUGREG_ENDDSP0		0x100FAC
+#define 	DMACNTLMODFREG_STARTDSP0	0x100FA0
+#define 	DMACNTLMODFREG_ENDDSP0		0x100FAC
+
+#define 	DMAGLOBSTATSREGDSP0		0x100FEC
+#define 	DSP0XGPRAM_START 		0x101000
+#define 	DSP0XGPRAM_END 			0x1017FC
+#define 	DSP0YGPRAM_START 		0x101800
+#define 	DSP0YGPRAM_END 			0x101FFC
+
+
+
+
+#define 	AUDIORINGIPDSP1_START 		0x102000
+#define 	AUDIORINGIPDSP1_END	 	0x1023FC
+#define 	AUDIORINGOPDSP1_START 		0x102400
+#define 	AUDIORINGOPDSP1_END	 	0x1027FC
+#define 	AUDPARARINGIODSP1_START 	0x102800
+#define 	AUDPARARINGIODSP1_END	 	0x102BFC
+#define 	DSP1LOCALHWREG_START 		0x102C00
+#define 	DSP1LOCALHWREG_END	 	0x102C3C
+#define 	DSP1XYRAMAGINDEX_START 		0x102C40
+#define 	DSP1XYRAMAGINDEX_END	 	0x102C5C
+#define 	DSP1XYRAMAGMDFR_START 		0x102C60
+#define 	DSP1XYRAMAGMDFR_END	 	0x102C7C
+#define 	DSP1INTCONTLVEC_START 		0x102C80
+#define 	DSP1INTCONTLVEC_END	 	0x102CD8
+#define		HOSTINTFPORTADDRCONTDSP1	0x102D40
+#define		HOSTINTFPORTDATADSP1		0x102D44
+#define		TIME0PERENBDSP1			0x102D60
+#define		TIME0COUNTERDSP1		0x102D64
+#define		TIME1PERENBDSP1			0x102D68
+#define		TIME1COUNTERDSP1		0x102D6C
+#define		TIME2PERENBDSP1			0x102D70
+#define		TIME2COUNTERDSP1		0x102D74
+#define		TIME3PERENBDSP1			0x102D78
+#define		TIME3COUNTERDSP1		0x102D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP1 	0x102D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP1	0x102D9C
+#define 	XRAMINDOPERREFUP_STARTDSP1	0x102DA0
+#define 	XRAMINDOPERREFUP_ENDDSP1	0x102DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP1 	0x102DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP1	0x102DDC
+#define 	YRAMINDOPERREFUP_STARTDSP1	0x102DE0
+#define 	YRAMINDOPERREFUP_ENDDSP1	0x102DFC
+
+#define 	DSP1CONDCODE 			0x102E00
+#define 	DSP1STACKFLAG 			0x102E04
+#define 	DSP1PROGCOUNTSTACKPTREG 	0x102E08
+#define 	DSP1PROGCOUNTSTACKDATAREG 	0x102E0C
+#define 	DSP1CURLOOPADDRREG 		0x102E10
+#define 	DSP1CURLOOPCOUNT 		0x102E14
+#define 	DSP1TOPLOOPCOUNTSTACK 		0x102E18
+#define 	DSP1TOPLOOPADDRSTACK 		0x102E1C
+#define 	DSP1LOOPSTACKPTR 		0x102E20
+#define 	DSP1STASSTACKDATAREG 		0x102E24
+#define 	DSP1STASSTACKPTR 		0x102E28
+#define 	DSP1PROGCOUNT 			0x102E2C
+#define 	DSP1XYRAMBASE_START 		0x102EA0
+#define 	DSP1XYRAMBASE_END 		0x102EBC
+#define 	DSP1XYRAMLENG_START 		0x102EC0
+#define 	DSP1XYRAMLENG_END 		0x102EDC
+#define		SEMAPHOREREGDSP1		0x102EE0
+#define		DSP1INTCONTMASKREG		0x102EE4
+#define		DSP1INTCONTPENDREG		0x102EE8
+#define		DSP1INTCONTSERVINT		0x102EEC
+#define		GPIODSP1			0x102EFC
+#define 	DMADSPBASEADDRREG_STARTDSP1	0x102F00
+#define 	DMADSPBASEADDRREG_ENDDSP1	0x102F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP1	0x102F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP1	0x102F3C
+#define 	DMADSPCURADDRREG_STARTDSP1	0x102F40
+#define 	DMADSPCURADDRREG_ENDDSP1	0x102F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP1	0x102F60
+#define 	DMAHOSTCURADDRREG_ENDDSP1	0x102F7C
+#define 	DMATANXCOUNTREG_STARTDSP1	0x102F80
+#define 	DMATANXCOUNTREG_ENDDSP1		0x102F9C
+#define 	DMATIMEBUGREG_STARTDSP1		0x102FA0
+#define 	DMATIMEBUGREG_ENDDSP1		0x102FAC
+#define 	DMACNTLMODFREG_STARTDSP1	0x102FA0
+#define 	DMACNTLMODFREG_ENDDSP1		0x102FAC
+
+#define 	DMAGLOBSTATSREGDSP1		0x102FEC
+#define 	DSP1XGPRAM_START 		0x103000
+#define 	DSP1XGPRAM_END 			0x1033FC
+#define 	DSP1YGPRAM_START 		0x103400
+#define 	DSP1YGPRAM_END 			0x1037FC
+
+
+
+#define 	AUDIORINGIPDSP2_START 		0x104000
+#define 	AUDIORINGIPDSP2_END	 	0x1043FC
+#define 	AUDIORINGOPDSP2_START 		0x104400
+#define 	AUDIORINGOPDSP2_END	 	0x1047FC
+#define 	AUDPARARINGIODSP2_START 	0x104800
+#define 	AUDPARARINGIODSP2_END	 	0x104BFC
+#define 	DSP2LOCALHWREG_START 		0x104C00
+#define 	DSP2LOCALHWREG_END	 	0x104C3C
+#define 	DSP2XYRAMAGINDEX_START 		0x104C40
+#define 	DSP2XYRAMAGINDEX_END	 	0x104C5C
+#define 	DSP2XYRAMAGMDFR_START 		0x104C60
+#define 	DSP2XYRAMAGMDFR_END		0x104C7C
+#define 	DSP2INTCONTLVEC_START 		0x104C80
+#define 	DSP2INTCONTLVEC_END	 	0x104CD8
+#define		HOSTINTFPORTADDRCONTDSP2	0x104D40
+#define		HOSTINTFPORTDATADSP2		0x104D44
+#define		TIME0PERENBDSP2			0x104D60
+#define		TIME0COUNTERDSP2		0x104D64
+#define		TIME1PERENBDSP2			0x104D68
+#define		TIME1COUNTERDSP2		0x104D6C
+#define		TIME2PERENBDSP2			0x104D70
+#define		TIME2COUNTERDSP2		0x104D74
+#define		TIME3PERENBDSP2			0x104D78
+#define		TIME3COUNTERDSP2		0x104D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP2 	0x104D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP2	0x104D9C
+#define 	XRAMINDOPERREFUP_STARTDSP2	0x104DA0
+#define 	XRAMINDOPERREFUP_ENDDSP2	0x104DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP2 	0x104DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP2	0x104DDC
+#define 	YRAMINDOPERREFUP_STARTDSP2	0x104DE0
+#define 	YRAMINDOPERREFUP_ENDDSP2 	0x104DFC
+#define 	DSP2CONDCODE 			0x104E00
+#define 	DSP2STACKFLAG 			0x104E04
+#define 	DSP2PROGCOUNTSTACKPTREG 	0x104E08
+#define 	DSP2PROGCOUNTSTACKDATAREG 	0x104E0C
+#define 	DSP2CURLOOPADDRREG 		0x104E10
+#define 	DSP2CURLOOPCOUNT 		0x104E14
+#define 	DSP2TOPLOOPCOUNTSTACK 		0x104E18
+#define 	DSP2TOPLOOPADDRSTACK 		0x104E1C
+#define 	DSP2LOOPSTACKPTR 		0x104E20
+#define 	DSP2STASSTACKDATAREG 		0x104E24
+#define 	DSP2STASSTACKPTR 		0x104E28
+#define 	DSP2PROGCOUNT 			0x104E2C
+#define 	DSP2XYRAMBASE_START 		0x104EA0
+#define 	DSP2XYRAMBASE_END 		0x104EBC
+#define 	DSP2XYRAMLENG_START 		0x104EC0
+#define 	DSP2XYRAMLENG_END 		0x104EDC
+#define		SEMAPHOREREGDSP2		0x104EE0
+#define		DSP2INTCONTMASKREG		0x104EE4
+#define		DSP2INTCONTPENDREG		0x104EE8
+#define		DSP2INTCONTSERVINT		0x104EEC
+#define		GPIODSP2			0x104EFC
+#define 	DMADSPBASEADDRREG_STARTDSP2	0x104F00
+#define 	DMADSPBASEADDRREG_ENDDSP2	0x104F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP2	0x104F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP2	0x104F3C
+#define 	DMADSPCURADDRREG_STARTDSP2	0x104F40
+#define 	DMADSPCURADDRREG_ENDDSP2	0x104F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP2	0x104F60
+#define 	DMAHOSTCURADDRREG_ENDDSP2	0x104F7C
+#define 	DMATANXCOUNTREG_STARTDSP2	0x104F80
+#define 	DMATANXCOUNTREG_ENDDSP2		0x104F9C
+#define 	DMATIMEBUGREG_STARTDSP2		0x104FA0
+#define 	DMATIMEBUGREG_ENDDSP2		0x104FAC
+#define 	DMACNTLMODFREG_STARTDSP2	0x104FA0
+#define 	DMACNTLMODFREG_ENDDSP2		0x104FAC
+
+#define 	DMAGLOBSTATSREGDSP2		0x104FEC
+#define 	DSP2XGPRAM_START 		0x105000
+#define 	DSP2XGPRAM_END 			0x1051FC
+#define 	DSP2YGPRAM_START 		0x105800
+#define 	DSP2YGPRAM_END 			0x1059FC
+
+
+
+#define 	AUDIORINGIPDSP3_START 		0x106000
+#define 	AUDIORINGIPDSP3_END	 	0x1063FC
+#define 	AUDIORINGOPDSP3_START 		0x106400
+#define 	AUDIORINGOPDSP3_END	 	0x1067FC
+#define 	AUDPARARINGIODSP3_START 	0x106800
+#define 	AUDPARARINGIODSP3_END	 	0x106BFC
+#define 	DSP3LOCALHWREG_START 		0x106C00
+#define 	DSP3LOCALHWREG_END	 	0x106C3C
+#define 	DSP3XYRAMAGINDEX_START 		0x106C40
+#define 	DSP3XYRAMAGINDEX_END	 	0x106C5C
+#define 	DSP3XYRAMAGMDFR_START 		0x106C60
+#define 	DSP3XYRAMAGMDFR_END		0x106C7C
+#define 	DSP3INTCONTLVEC_START 		0x106C80
+#define 	DSP3INTCONTLVEC_END	 	0x106CD8
+#define		HOSTINTFPORTADDRCONTDSP3	0x106D40
+#define		HOSTINTFPORTDATADSP3		0x106D44
+#define		TIME0PERENBDSP3			0x106D60
+#define		TIME0COUNTERDSP3		0x106D64
+#define		TIME1PERENBDSP3			0x106D68
+#define		TIME1COUNTERDSP3		0x106D6C
+#define		TIME2PERENBDSP3			0x106D70
+#define		TIME2COUNTERDSP3		0x106D74
+#define		TIME3PERENBDSP3			0x106D78
+#define		TIME3COUNTERDSP3		0x106D7C
+#define 	XRAMINDOPERREFNOUP_STARTDSP3 	0x106D80
+#define 	XRAMINDOPERREFNOUP_ENDDSP3	0x106D9C
+#define 	XRAMINDOPERREFUP_STARTDSP3	0x106DA0
+#define 	XRAMINDOPERREFUP_ENDDSP3	0x106DBC
+#define 	YRAMINDOPERREFNOUP_STARTDSP3 	0x106DC0
+#define 	YRAMINDOPERREFNOUP_ENDDSP3	0x106DDC
+#define 	YRAMINDOPERREFUP_STARTDSP3	0x106DE0
+#define 	YRAMINDOPERREFUP_ENDDSP3	0x100DFC
+
+#define 	DSP3CONDCODE 			0x106E00
+#define 	DSP3STACKFLAG 			0x106E04
+#define 	DSP3PROGCOUNTSTACKPTREG 	0x106E08
+#define 	DSP3PROGCOUNTSTACKDATAREG 	0x106E0C
+#define 	DSP3CURLOOPADDRREG 		0x106E10
+#define 	DSP3CURLOOPCOUNT 		0x106E14
+#define 	DSP3TOPLOOPCOUNTSTACK 		0x106E18
+#define 	DSP3TOPLOOPADDRSTACK 		0x106E1C
+#define 	DSP3LOOPSTACKPTR 		0x106E20
+#define 	DSP3STASSTACKDATAREG 		0x106E24
+#define 	DSP3STASSTACKPTR 		0x106E28
+#define 	DSP3PROGCOUNT 			0x106E2C
+#define 	DSP3XYRAMBASE_START 		0x106EA0
+#define 	DSP3XYRAMBASE_END 		0x106EBC
+#define 	DSP3XYRAMLENG_START 		0x106EC0
+#define 	DSP3XYRAMLENG_END 		0x106EDC
+#define		SEMAPHOREREGDSP3		0x106EE0
+#define		DSP3INTCONTMASKREG		0x106EE4
+#define		DSP3INTCONTPENDREG		0x106EE8
+#define		DSP3INTCONTSERVINT		0x106EEC
+#define		GPIODSP3			0x106EFC
+#define 	DMADSPBASEADDRREG_STARTDSP3	0x106F00
+#define 	DMADSPBASEADDRREG_ENDDSP3	0x106F1C
+#define 	DMAHOSTBASEADDRREG_STARTDSP3	0x106F20
+#define 	DMAHOSTBASEADDRREG_ENDDSP3	0x106F3C
+#define 	DMADSPCURADDRREG_STARTDSP3	0x106F40
+#define 	DMADSPCURADDRREG_ENDDSP3	0x106F5C
+#define 	DMAHOSTCURADDRREG_STARTDSP3	0x106F60
+#define 	DMAHOSTCURADDRREG_ENDDSP3	0x106F7C
+#define 	DMATANXCOUNTREG_STARTDSP3	0x106F80
+#define 	DMATANXCOUNTREG_ENDDSP3		0x106F9C
+#define 	DMATIMEBUGREG_STARTDSP3		0x106FA0
+#define 	DMATIMEBUGREG_ENDDSP3		0x106FAC
+#define 	DMACNTLMODFREG_STARTDSP3	0x106FA0
+#define 	DMACNTLMODFREG_ENDDSP3		0x106FAC
+
+#define 	DMAGLOBSTATSREGDSP3		0x106FEC
+#define 	DSP3XGPRAM_START 		0x107000
+#define 	DSP3XGPRAM_END 			0x1071FC
+#define 	DSP3YGPRAM_START 		0x107800
+#define 	DSP3YGPRAM_END 			0x1079FC
+
+/* end of DSP reg definitions */
+
+#define  	DSPAIMAP_START			0x108000
+#define  	DSPAIMAP_END			0x1083FC
+#define  	DSPPIMAP_START			0x108400
+#define  	DSPPIMAP_END			0x1087FC
+#define  	DSPPOMAP_START			0x108800
+#define  	DSPPOMAP_END			0x108BFC
+#define  	DSPPOCTL			0x108C00
+#define 	TKCTL_START			0x110000
+#define 	TKCTL_END			0x110FFC
+#define 	TKCC_START			0x111000
+#define 	TKCC_END			0x111FFC
+#define 	TKIMAP_START			0x112000
+#define 	TKIMAP_END			0x112FFC
+#define		TKDCTR16			0x113000
+#define		TKPB16				0x113004
+#define		TKBS16				0x113008
+#define		TKDCTR32			0x11300C
+#define		TKPB32				0x113010
+#define		TKBS32				0x113014
+#define		ICDCTR16			0x113018
+#define		ITBS16				0x11301C
+#define		ICDCTR32			0x113020
+#define		ITBS32				0x113024
+#define		ITSTART				0x113028
+#define		TKSQ				0x11302C
+
+#define		TKSCCTL_START			0x114000
+#define		TKSCCTL_END			0x11403C
+#define		TKSCADR_START			0x114100
+#define		TKSCADR_END			0x11413C
+#define		TKSCDATAX_START			0x114800
+#define		TKSCDATAX_END			0x1149FC
+#define		TKPCDATAX_START			0x120000
+#define		TKPCDATAX_END			0x12FFFC
+
+#define		MALSA				0x130000
+#define		MAPPHA				0x130004
+#define		MAPPLA				0x130008
+#define		MALSB				0x130010
+#define		MAPPHB				0x130014
+#define		MAPPLB				0x130018
+
+#define 	TANSPORTMAPABREGS_START		0x130020
+#define 	TANSPORTMAPABREGS_END		0x13A2FC
+
+#define		PTPAHX				0x13B000
+#define		PTPALX				0x13B004
+
+#define		TANSPPAGETABLEPHYADDR015_START	0x13B008
+#define		TANSPPAGETABLEPHYADDR015_END	0x13B07C
+#define		TRNQADRX_START			0x13B100
+#define		TRNQADRX_END			0x13B13C
+#define		TRNQTIMX_START			0x13B200
+#define		TRNQTIMX_END			0x13B23C
+#define		TRNQAPARMX_START		0x13B300
+#define		TRNQAPARMX_END			0x13B33C
+
+#define		TRNQCNT				0x13B400
+#define		TRNCTL				0x13B404
+#define		TRNIS				0x13B408
+#define		TRNCURTS			0x13B40C
+
+#define		AMOP_START			0x140000
+#define		AMOPLO				0x140000
+#define		AMOPHI				0x140004
+#define		AMOP_END			0x147FFC
+#define		PMOP_START			0x148000
+#define		PMOPLO				0x148000
+#define		PMOPHI				0x148004
+#define		PMOP_END			0x14FFFC
+#define		PCURR_START			0x150000
+#define		PCURR_END			0x153FFC
+#define		PTRAG_START			0x154000
+#define		PTRAG_END			0x157FFC
+#define		PSR_START			0x158000
+#define		PSR_END				0x15BFFC
+
+#define		PFSTAT4SEG_START		0x160000
+#define		PFSTAT4SEG_END			0x160BFC
+#define		PFSTAT2SEG_START		0x160C00
+#define		PFSTAT2SEG_END			0x1617FC
+#define		PFTARG4SEG_START		0x164000
+#define		PFTARG4SEG_END			0x164BFC
+#define		PFTARG2SEG_START		0x164C00
+#define		PFTARG2SEG_END			0x1657FC
+#define		PFSR4SEG_START			0x168000
+#define		PFSR4SEG_END			0x168BFC
+#define		PFSR2SEG_START			0x168C00
+#define		PFSR2SEG_END			0x1697FC
+#define		PCURRMS4SEG_START		0x16C000
+#define		PCURRMS4SEG_END			0x16CCFC
+#define		PCURRMS2SEG_START		0x16CC00
+#define		PCURRMS2SEG_END			0x16D7FC
+#define		PTARGMS4SEG_START		0x170000
+#define		PTARGMS4SEG_END			0x172FFC
+#define		PTARGMS2SEG_START		0x173000
+#define		PTARGMS2SEG_END			0x1747FC
+#define		PSRMS4SEG_START			0x170000
+#define		PSRMS4SEG_END			0x172FFC
+#define		PSRMS2SEG_START			0x173000
+#define		PSRMS2SEG_END			0x1747FC
+
+#define		PRING_LO_START			0x190000
+#define		PRING_LO_END			0x193FFC
+#define		PRING_HI_START			0x194000
+#define		PRING_HI_END			0x197FFC
+#define		PRING_LO_HI_START		0x198000
+#define		PRING_LO_HI			0x198000
+#define		PRING_LO_HI_END			0x19BFFC
+
+#define		PINTFIFO			0x1A0000
+#define		SRCCTL				0x1B0000
+#define		SRCCCR				0x1B0004
+#define		SRCIMAP				0x1B0008
+#define		SRCODDC				0x1B000C
+#define		SRCCA				0x1B0010
+#define		SRCCF				0x1B0014
+#define		SRCSA				0x1B0018
+#define		SRCLA				0x1B001C
+#define		SRCCTLSWR			0x1B0020
+
+/* SRC HERE */
+#define		SRCALBA				0x1B002C
+#define		SRCMCTL				0x1B012C
+#define		SRCCERR				0x1B022C
+#define		SRCITB				0x1B032C
+#define		SRCIPM				0x1B082C
+#define		SRCIP				0x1B102C
+#define		SRCENBSTAT			0x1B202C
+#define		SRCENBLO			0x1B212C
+#define		SRCENBHI			0x1B222C
+#define		SRCENBS				0x1B232C
+#define		SRCENB				0x1B282C
+#define		SRCENB07			0x1B282C
+#define		SRCENBS07			0x1B302C
+
+#define		SRCDN0Z				0x1B0030
+#define		SRCDN0Z0			0x1B0030
+#define		SRCDN0Z1			0x1B0034
+#define		SRCDN0Z2			0x1B0038
+#define		SRCDN0Z3			0x1B003C
+#define		SRCDN1Z				0x1B0040
+#define		SRCDN1Z0			0x1B0040
+#define		SRCDN1Z1			0x1B0044
+#define		SRCDN1Z2			0x1B0048
+#define		SRCDN1Z3			0x1B004C
+#define		SRCDN1Z4			0x1B0050
+#define		SRCDN1Z5			0x1B0054
+#define		SRCDN1Z6			0x1B0058
+#define		SRCDN1Z7			0x1B005C
+#define		SRCUPZ				0x1B0060
+#define		SRCUPZ0				0x1B0060
+#define		SRCUPZ1				0x1B0064
+#define		SRCUPZ2				0x1B0068
+#define		SRCUPZ3				0x1B006C
+#define		SRCUPZ4				0x1B0070
+#define		SRCUPZ5				0x1B0074
+#define		SRCUPZ6				0x1B0078
+#define		SRCUPZ7				0x1B007C
+#define		SRCCD0				0x1B0080
+#define		SRCCD1				0x1B0084
+#define		SRCCD2				0x1B0088
+#define		SRCCD3				0x1B008C
+#define		SRCCD4				0x1B0090
+#define		SRCCD5				0x1B0094
+#define		SRCCD6				0x1B0098
+#define		SRCCD7				0x1B009C
+#define		SRCCD8				0x1B00A0
+#define		SRCCD9				0x1B00A4
+#define		SRCCDA				0x1B00A8
+#define		SRCCDB				0x1B00AC
+#define		SRCCDC				0x1B00B0
+#define		SRCCDD				0x1B00B4
+#define		SRCCDE				0x1B00B8
+#define		SRCCDF				0x1B00BC
+#define		SRCCD10				0x1B00C0
+#define		SRCCD11				0x1B00C4
+#define		SRCCD12				0x1B00C8
+#define		SRCCD13				0x1B00CC
+#define		SRCCD14				0x1B00D0
+#define		SRCCD15				0x1B00D4
+#define		SRCCD16				0x1B00D8
+#define		SRCCD17				0x1B00DC
+#define		SRCCD18				0x1B00E0
+#define		SRCCD19				0x1B00E4
+#define		SRCCD1A				0x1B00E8
+#define		SRCCD1B				0x1B00EC
+#define		SRCCD1C				0x1B00F0
+#define		SRCCD1D				0x1B00F4
+#define		SRCCD1E				0x1B00F8
+#define		SRCCD1F				0x1B00FC
+
+#define		SRCCONTRBLOCK_START		0x1B0100
+#define		SRCCONTRBLOCK_END		0x1BFFFC
+#define		FILTOP_START	0x1C0000
+#define		FILTOP_END	0x1C05FC
+#define		FILTIMAP_START	0x1C0800
+#define		FILTIMAP_END	0x1C0DFC
+#define		FILTZ1_START	0x1C1000
+#define		FILTZ1_END	0x1C15FC
+#define		FILTZ2_START	0x1C1800
+#define		FILTZ2_END	0x1C1DFC
+#define		DAOIMAP_START	0x1C5000
+#define		DAOIMAP		0x1C5000
+#define		DAOIMAP_END	0x1C5124
+
+#define		AC97D		0x1C5400
+#define		AC97A		0x1C5404
+#define		AC97CTL		0x1C5408
+#define		I2SCTL		0x1C5420
+
+#define		SPOS		0x1C5440
+#define		SPOSA		0x1C5440
+#define		SPOSB		0x1C5444
+#define		SPOSC		0x1C5448
+#define		SPOSD		0x1C544C
+
+#define		SPISA		0x1C5450
+#define		SPISB		0x1C5454
+#define		SPISC		0x1C5458
+#define		SPISD		0x1C545C
+
+#define		SPFSCTL		0x1C5460
+
+#define		SPFS0		0x1C5468
+#define		SPFS1		0x1C546C
+#define		SPFS2		0x1C5470
+#define		SPFS3		0x1C5474
+#define		SPFS4		0x1C5478
+#define		SPFS5		0x1C547C
+
+#define		SPOCTL		0x1C5480
+#define		SPICTL		0x1C5484
+#define		SPISTS		0x1C5488
+#define		SPINTP		0x1C548C
+#define		SPINTE		0x1C5490
+#define		SPUTCTLAB	0x1C5494
+#define		SPUTCTLCD	0x1C5498
+
+#define		SRTSPA		0x1C54C0
+#define		SRTSPB		0x1C54C4
+#define		SRTSPC		0x1C54C8
+#define		SRTSPD		0x1C54CC
+
+#define		SRTSCTL		0x1C54D0
+#define		SRTSCTLA	0x1C54D0
+#define		SRTSCTLB	0x1C54D4
+#define		SRTSCTLC	0x1C54D8
+#define		SRTSCTLD	0x1C54DC
+
+#define		SRTI2S		0x1C54E0
+#define		SRTICTL		0x1C54F0
+
+#define		WC		0x1C6000
+#define		TIMR		0x1C6004
+
+#define		GIP		0x1C6010
+#define		GIE		0x1C6014
+#define		DIE		0x1C6018
+#define		DIC		0x1C601C
+#define		GPIO		0x1C6020
+#define		GPIOCTL		0x1C6024
+#define		GPIP		0x1C6028
+#define		GPIE		0x1C602C
+#define		DSPINT0		0x1C6030
+#define		DSPEIOC		0x1C6034
+#define		MUADAT		0x1C6040
+#define		MUACMD		0x1C6044
+#define 	MUASTAT		0x1C6044
+#define		MUBDAT		0x1C6048
+#define		MUBCMD		0x1C604C
+#define		MUBSTAT		0x1C604C
+#define		UARTCMA		0x1C6050
+#define		UARTCMB		0x1C6054
+#define		UARTIP		0x1C6058
+#define		UARTIE		0x1C605C
+#define		PLLCTL		0x1C6060
+#define		PLLDCD		0x1C6064
+#define		GCTL		0x1C6070
+#define		ID0		0x1C6080
+#define		ID1		0x1C6084
+#define		ID2		0x1C6088
+#define		ID3		0x1C608C
+#define		SDRCTL		0x1C7000
+
+
+#define I2SA_L    0x0L
+#define I2SA_R    0x1L
+#define I2SB_L    0x8L
+#define I2SB_R    0x9L
+#define I2SC_L    0x10L
+#define I2SC_R    0x11L
+#define I2SD_L    0x18L
+#define I2SD_R    0x19L
+
+#endif /* CT20K1REG_H */
+
+
diff --git a/sound/pci/ctxfi/ct20k2reg.h b/sound/pci/ctxfi/ct20k2reg.h
new file mode 100644
index 0000000..2d07986
--- /dev/null
+++ b/sound/pci/ctxfi/ct20k2reg.h
@@ -0,0 +1,85 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#ifndef _20K2REGISTERS_H_
+#define _20K2REGISTERS_H_
+
+
+/* Timer Registers */
+#define TIMER_TIMR          0x1B7004
+#define INTERRUPT_GIP       0x1B7010
+#define INTERRUPT_GIE       0x1B7014
+
+/* I2C Registers */
+#define I2C_IF_ADDRESS   0x1B9000
+#define I2C_IF_WDATA     0x1B9004
+#define I2C_IF_RDATA     0x1B9008
+#define I2C_IF_STATUS    0x1B900C
+#define I2C_IF_WLOCK     0x1B9010
+
+/* Global Control Registers */
+#define GLOBAL_CNTL_GCTL    0x1B7090
+
+/* PLL Registers */
+#define PLL_CTL 		0x1B7080
+#define PLL_STAT		0x1B7084
+#define PLL_ENB			0x1B7088
+
+/* SRC Registers */
+#define SRC_CTL             0x1A0000 /* 0x1A0000 + (256 * Chn) */
+#define SRC_CCR             0x1A0004 /* 0x1A0004 + (256 * Chn) */
+#define SRC_IMAP            0x1A0008 /* 0x1A0008 + (256 * Chn) */
+#define SRC_CA              0x1A0010 /* 0x1A0010 + (256 * Chn) */
+#define SRC_CF              0x1A0014 /* 0x1A0014 + (256 * Chn) */
+#define SRC_SA              0x1A0018 /* 0x1A0018 + (256 * Chn) */
+#define SRC_LA              0x1A001C /* 0x1A001C + (256 * Chn) */
+#define SRC_CTLSWR	    0x1A0020 /* 0x1A0020 + (256 * Chn) */
+#define SRC_CD		    0x1A0080 /* 0x1A0080 + (256 * Chn) + (4 * Regn) */
+#define SRC_MCTL		0x1A012C
+#define SRC_IP			0x1A102C /* 0x1A102C + (256 * Regn) */
+#define SRC_ENB			0x1A282C /* 0x1A282C + (256 * Regn) */
+#define SRC_ENBSTAT		0x1A202C
+#define SRC_ENBSA		0x1A232C
+#define SRC_DN0Z		0x1A0030
+#define SRC_DN1Z		0x1A0040
+#define SRC_UPZ			0x1A0060
+
+/* GPIO Registers */
+#define GPIO_DATA           0x1B7020
+#define GPIO_CTRL           0x1B7024
+
+/* Virtual memory registers */
+#define VMEM_PTPAL          0x1C6300 /* 0x1C6300 + (16 * Chn) */
+#define VMEM_PTPAH          0x1C6304 /* 0x1C6304 + (16 * Chn) */
+#define VMEM_CTL            0x1C7000
+
+/* Transport Registers */
+#define TRANSPORT_ENB       0x1B6000
+#define TRANSPORT_CTL       0x1B6004
+#define TRANSPORT_INT       0x1B6008
+
+/* Audio IO */
+#define AUDIO_IO_AIM        0x1B5000 /* 0x1B5000 + (0x04 * Chn) */
+#define AUDIO_IO_TX_CTL     0x1B5400 /* 0x1B5400 + (0x40 * Chn) */
+#define AUDIO_IO_TX_CSTAT_L 0x1B5408 /* 0x1B5408 + (0x40 * Chn) */
+#define AUDIO_IO_TX_CSTAT_H 0x1B540C /* 0x1B540C + (0x40 * Chn) */
+#define AUDIO_IO_RX_CTL     0x1B5410 /* 0x1B5410 + (0x40 * Chn) */
+#define AUDIO_IO_RX_SRT_CTL 0x1B5420 /* 0x1B5420 + (0x40 * Chn) */
+#define AUDIO_IO_MCLK       0x1B5600
+#define AUDIO_IO_TX_BLRCLK  0x1B5604
+#define AUDIO_IO_RX_BLRCLK  0x1B5608
+
+/* Mixer */
+#define MIXER_AMOPLO		0x130000 /* 0x130000 + (8 * Chn) [4095 : 0] */
+#define MIXER_AMOPHI		0x130004 /* 0x130004 + (8 * Chn) [4095 : 0] */
+#define MIXER_PRING_LO_HI	0x188000 /* 0x188000 + (4 * Chn) [4095 : 0] */
+#define MIXER_PMOPLO		0x138000 /* 0x138000 + (8 * Chn) [4095 : 0] */
+#define MIXER_PMOPHI		0x138004 /* 0x138004 + (8 * Chn) [4095 : 0] */
+#define MIXER_AR_ENABLE		0x19000C
+
+#endif
diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c
new file mode 100644
index 0000000..119791a
--- /dev/null
+++ b/sound/pci/ctxfi/ctamixer.c
@@ -0,0 +1,488 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctamixer.c
+ *
+ * @Brief
+ * This file contains the implementation of the Audio Mixer
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 21 2008
+ *
+ */
+
+#include "ctamixer.h"
+#include "cthardware.h"
+#include <linux/slab.h>
+
+#define AMIXER_RESOURCE_NUM	256
+#define SUM_RESOURCE_NUM	256
+
+#define AMIXER_Y_IMMEDIATE	1
+
+#define BLANK_SLOT		4094
+
+static int amixer_master(struct rsc *rsc)
+{
+	rsc->conj = 0;
+	return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
+}
+
+static int amixer_next_conj(struct rsc *rsc)
+{
+	rsc->conj++;
+	return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
+}
+
+static int amixer_index(const struct rsc *rsc)
+{
+	return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
+}
+
+static int amixer_output_slot(const struct rsc *rsc)
+{
+	return (amixer_index(rsc) << 4) + 0x4;
+}
+
+static struct rsc_ops amixer_basic_rsc_ops = {
+	.master		= amixer_master,
+	.next_conj	= amixer_next_conj,
+	.index		= amixer_index,
+	.output_slot	= amixer_output_slot,
+};
+
+static int amixer_set_input(struct amixer *amixer, struct rsc *rsc)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	hw->amixer_set_mode(amixer->rsc.ctrl_blk, AMIXER_Y_IMMEDIATE);
+	amixer->input = rsc;
+	if (NULL == rsc)
+		hw->amixer_set_x(amixer->rsc.ctrl_blk, BLANK_SLOT);
+	else
+		hw->amixer_set_x(amixer->rsc.ctrl_blk,
+					rsc->ops->output_slot(rsc));
+
+	return 0;
+}
+
+/* y is a 14-bit immediate constant */
+static int amixer_set_y(struct amixer *amixer, unsigned int y)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	hw->amixer_set_y(amixer->rsc.ctrl_blk, y);
+
+	return 0;
+}
+
+static int amixer_set_invalid_squash(struct amixer *amixer, unsigned int iv)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	hw->amixer_set_iv(amixer->rsc.ctrl_blk, iv);
+
+	return 0;
+}
+
+static int amixer_set_sum(struct amixer *amixer, struct sum *sum)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	amixer->sum = sum;
+	if (NULL == sum) {
+		hw->amixer_set_se(amixer->rsc.ctrl_blk, 0);
+	} else {
+		hw->amixer_set_se(amixer->rsc.ctrl_blk, 1);
+		hw->amixer_set_sadr(amixer->rsc.ctrl_blk,
+					sum->rsc.ops->index(&sum->rsc));
+	}
+
+	return 0;
+}
+
+static int amixer_commit_write(struct amixer *amixer)
+{
+	struct hw *hw = NULL;
+	unsigned int index = 0;
+	int i = 0;
+	struct rsc *input = NULL;
+	struct sum *sum = NULL;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	input = amixer->input;
+	sum = amixer->sum;
+
+	/* Program master and conjugate resources */
+	amixer->rsc.ops->master(&amixer->rsc);
+	if (NULL != input)
+		input->ops->master(input);
+
+	if (NULL != sum)
+		sum->rsc.ops->master(&sum->rsc);
+
+	for (i = 0; i < amixer->rsc.msr; i++) {
+		hw->amixer_set_dirty_all(amixer->rsc.ctrl_blk);
+		if (NULL != input) {
+			hw->amixer_set_x(amixer->rsc.ctrl_blk,
+						input->ops->output_slot(input));
+			input->ops->next_conj(input);
+		}
+		if (NULL != sum) {
+			hw->amixer_set_sadr(amixer->rsc.ctrl_blk,
+						sum->rsc.ops->index(&sum->rsc));
+			sum->rsc.ops->next_conj(&sum->rsc);
+		}
+		index = amixer->rsc.ops->output_slot(&amixer->rsc);
+		hw->amixer_commit_write(hw, index, amixer->rsc.ctrl_blk);
+		amixer->rsc.ops->next_conj(&amixer->rsc);
+	}
+	amixer->rsc.ops->master(&amixer->rsc);
+	if (NULL != input)
+		input->ops->master(input);
+
+	if (NULL != sum)
+		sum->rsc.ops->master(&sum->rsc);
+
+	return 0;
+}
+
+static int amixer_commit_raw_write(struct amixer *amixer)
+{
+	struct hw *hw = NULL;
+	unsigned int index = 0;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	index = amixer->rsc.ops->output_slot(&amixer->rsc);
+	hw->amixer_commit_write(hw, index, amixer->rsc.ctrl_blk);
+
+	return 0;
+}
+
+static int amixer_get_y(struct amixer *amixer)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)amixer->rsc.hw;
+	return hw->amixer_get_y(amixer->rsc.ctrl_blk);
+}
+
+static int amixer_setup(struct amixer *amixer, struct rsc *input,
+			unsigned int scale, struct sum *sum)
+{
+	amixer_set_input(amixer, input);
+	amixer_set_y(amixer, scale);
+	amixer_set_sum(amixer, sum);
+	amixer_commit_write(amixer);
+	return 0;
+}
+
+static struct amixer_rsc_ops amixer_ops = {
+	.set_input		= amixer_set_input,
+	.set_invalid_squash	= amixer_set_invalid_squash,
+	.set_scale		= amixer_set_y,
+	.set_sum		= amixer_set_sum,
+	.commit_write		= amixer_commit_write,
+	.commit_raw_write	= amixer_commit_raw_write,
+	.setup			= amixer_setup,
+	.get_scale		= amixer_get_y,
+};
+
+static int amixer_rsc_init(struct amixer *amixer,
+			   const struct amixer_desc *desc,
+			   struct amixer_mgr *mgr)
+{
+	int err = 0;
+
+	err = rsc_init(&amixer->rsc, amixer->idx[0],
+			AMIXER, desc->msr, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	/* Set amixer specific operations */
+	amixer->rsc.ops = &amixer_basic_rsc_ops;
+	amixer->ops = &amixer_ops;
+	amixer->input = NULL;
+	amixer->sum = NULL;
+
+	amixer_setup(amixer, NULL, 0, NULL);
+
+	return 0;
+}
+
+static int amixer_rsc_uninit(struct amixer *amixer)
+{
+	amixer_setup(amixer, NULL, 0, NULL);
+	rsc_uninit(&amixer->rsc);
+	amixer->ops = NULL;
+	amixer->input = NULL;
+	amixer->sum = NULL;
+	return 0;
+}
+
+static int get_amixer_rsc(struct amixer_mgr *mgr,
+			  const struct amixer_desc *desc,
+			  struct amixer **ramixer)
+{
+	int err = 0, i = 0;
+	unsigned int idx = 0;
+	struct amixer *amixer = NULL;
+	unsigned long flags;
+
+	*ramixer = NULL;
+
+	/* Allocate mem for amixer resource */
+	amixer = kzalloc(sizeof(*amixer), GFP_KERNEL);
+	if (NULL == amixer) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Check whether there are sufficient
+	 * amixer resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < desc->msr; i++) {
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+		if (err)
+			break;
+
+		amixer->idx[i] = idx;
+	}
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "Can't meet AMIXER resource request!\n");
+		goto error;
+	}
+
+	err = amixer_rsc_init(amixer, desc, mgr);
+	if (err)
+		goto error;
+
+	*ramixer = amixer;
+
+	return 0;
+
+error:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i--; i >= 0; i--)
+		mgr_put_resource(&mgr->mgr, 1, amixer->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	kfree(amixer);
+	return err;
+}
+
+static int put_amixer_rsc(struct amixer_mgr *mgr, struct amixer *amixer)
+{
+	unsigned long flags;
+	int i = 0;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < amixer->rsc.msr; i++)
+		mgr_put_resource(&mgr->mgr, 1, amixer->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	amixer_rsc_uninit(amixer);
+	kfree(amixer);
+
+	return 0;
+}
+
+int amixer_mgr_create(void *hw, struct amixer_mgr **ramixer_mgr)
+{
+	int err = 0;
+	struct amixer_mgr *amixer_mgr;
+
+	*ramixer_mgr = NULL;
+	amixer_mgr = kzalloc(sizeof(*amixer_mgr), GFP_KERNEL);
+	if (NULL == amixer_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&amixer_mgr->mgr, AMIXER, AMIXER_RESOURCE_NUM, hw);
+	if (err)
+		goto error;
+
+	spin_lock_init(&amixer_mgr->mgr_lock);
+
+	amixer_mgr->get_amixer = get_amixer_rsc;
+	amixer_mgr->put_amixer = put_amixer_rsc;
+
+	*ramixer_mgr = amixer_mgr;
+
+	return 0;
+
+error:
+	kfree(amixer_mgr);
+	return err;
+}
+
+int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr)
+{
+	rsc_mgr_uninit(&amixer_mgr->mgr);
+	kfree(amixer_mgr);
+	return 0;
+}
+
+/* SUM resource management */
+
+static int sum_master(struct rsc *rsc)
+{
+	rsc->conj = 0;
+	return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
+}
+
+static int sum_next_conj(struct rsc *rsc)
+{
+	rsc->conj++;
+	return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
+}
+
+static int sum_index(const struct rsc *rsc)
+{
+	return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
+}
+
+static int sum_output_slot(const struct rsc *rsc)
+{
+	return (sum_index(rsc) << 4) + 0xc;
+}
+
+static struct rsc_ops sum_basic_rsc_ops = {
+	.master		= sum_master,
+	.next_conj	= sum_next_conj,
+	.index		= sum_index,
+	.output_slot	= sum_output_slot,
+};
+
+static int sum_rsc_init(struct sum *sum,
+			const struct sum_desc *desc,
+			struct sum_mgr *mgr)
+{
+	int err = 0;
+
+	err = rsc_init(&sum->rsc, sum->idx[0], SUM, desc->msr, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	sum->rsc.ops = &sum_basic_rsc_ops;
+
+	return 0;
+}
+
+static int sum_rsc_uninit(struct sum *sum)
+{
+	rsc_uninit(&sum->rsc);
+	return 0;
+}
+
+static int get_sum_rsc(struct sum_mgr *mgr,
+		       const struct sum_desc *desc,
+		       struct sum **rsum)
+{
+	int err = 0, i = 0;
+	unsigned int idx = 0;
+	struct sum *sum = NULL;
+	unsigned long flags;
+
+	*rsum = NULL;
+
+	/* Allocate mem for sum resource */
+	sum = kzalloc(sizeof(*sum), GFP_KERNEL);
+	if (NULL == sum) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Check whether there are sufficient sum resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < desc->msr; i++) {
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+		if (err)
+			break;
+
+		sum->idx[i] = idx;
+	}
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "Can't meet SUM resource request!\n");
+		goto error;
+	}
+
+	err = sum_rsc_init(sum, desc, mgr);
+	if (err)
+		goto error;
+
+	*rsum = sum;
+
+	return 0;
+
+error:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i--; i >= 0; i--)
+		mgr_put_resource(&mgr->mgr, 1, sum->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	kfree(sum);
+	return err;
+}
+
+static int put_sum_rsc(struct sum_mgr *mgr, struct sum *sum)
+{
+	unsigned long flags;
+	int i = 0;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < sum->rsc.msr; i++)
+		mgr_put_resource(&mgr->mgr, 1, sum->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	sum_rsc_uninit(sum);
+	kfree(sum);
+
+	return 0;
+}
+
+int sum_mgr_create(void *hw, struct sum_mgr **rsum_mgr)
+{
+	int err = 0;
+	struct sum_mgr *sum_mgr;
+
+	*rsum_mgr = NULL;
+	sum_mgr = kzalloc(sizeof(*sum_mgr), GFP_KERNEL);
+	if (NULL == sum_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&sum_mgr->mgr, SUM, SUM_RESOURCE_NUM, hw);
+	if (err)
+		goto error;
+
+	spin_lock_init(&sum_mgr->mgr_lock);
+
+	sum_mgr->get_sum = get_sum_rsc;
+	sum_mgr->put_sum = put_sum_rsc;
+
+	*rsum_mgr = sum_mgr;
+
+	return 0;
+
+error:
+	kfree(sum_mgr);
+	return err;
+}
+
+int sum_mgr_destroy(struct sum_mgr *sum_mgr)
+{
+	rsc_mgr_uninit(&sum_mgr->mgr);
+	kfree(sum_mgr);
+	return 0;
+}
+
diff --git a/sound/pci/ctxfi/ctamixer.h b/sound/pci/ctxfi/ctamixer.h
new file mode 100644
index 0000000..cc49e5a
--- /dev/null
+++ b/sound/pci/ctxfi/ctamixer.h
@@ -0,0 +1,96 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctamixer.h
+ *
+ * @Brief
+ * This file contains the definition of the Audio Mixer
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 21 2008
+ *
+ */
+
+#ifndef CTAMIXER_H
+#define CTAMIXER_H
+
+#include "ctresource.h"
+#include <linux/spinlock.h>
+
+/* Define the descriptor of a summation node resource */
+struct sum {
+	struct rsc rsc;		/* Basic resource info */
+	unsigned char idx[8];
+};
+
+/* Define sum resource request description info */
+struct sum_desc {
+	unsigned int msr;
+};
+
+struct sum_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+
+	 /* request one sum resource */
+	int (*get_sum)(struct sum_mgr *mgr,
+			const struct sum_desc *desc, struct sum **rsum);
+	/* return one sum resource */
+	int (*put_sum)(struct sum_mgr *mgr, struct sum *sum);
+};
+
+/* Constructor and destructor of daio resource manager */
+int sum_mgr_create(void *hw, struct sum_mgr **rsum_mgr);
+int sum_mgr_destroy(struct sum_mgr *sum_mgr);
+
+/* Define the descriptor of a amixer resource */
+struct amixer_rsc_ops;
+
+struct amixer {
+	struct rsc rsc;		/* Basic resource info */
+	unsigned char idx[8];
+	struct rsc *input;	/* pointer to a resource acting as source */
+	struct sum *sum;	/* Put amixer output to this summation node */
+	struct amixer_rsc_ops *ops;	/* AMixer specific operations */
+};
+
+struct amixer_rsc_ops {
+	int (*set_input)(struct amixer *amixer, struct rsc *rsc);
+	int (*set_scale)(struct amixer *amixer, unsigned int scale);
+	int (*set_invalid_squash)(struct amixer *amixer, unsigned int iv);
+	int (*set_sum)(struct amixer *amixer, struct sum *sum);
+	int (*commit_write)(struct amixer *amixer);
+	/* Only for interleaved recording */
+	int (*commit_raw_write)(struct amixer *amixer);
+	int (*setup)(struct amixer *amixer, struct rsc *input,
+			unsigned int scale, struct sum *sum);
+	int (*get_scale)(struct amixer *amixer);
+};
+
+/* Define amixer resource request description info */
+struct amixer_desc {
+	unsigned int msr;
+};
+
+struct amixer_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+
+	 /* request one amixer resource */
+	int (*get_amixer)(struct amixer_mgr *mgr,
+			  const struct amixer_desc *desc,
+			  struct amixer **ramixer);
+	/* return one amixer resource */
+	int (*put_amixer)(struct amixer_mgr *mgr, struct amixer *amixer);
+};
+
+/* Constructor and destructor of amixer resource manager */
+int amixer_mgr_create(void *hw, struct amixer_mgr **ramixer_mgr);
+int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr);
+
+#endif /* CTAMIXER_H */
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
new file mode 100644
index 0000000..5f35374
--- /dev/null
+++ b/sound/pci/ctxfi/ctatc.c
@@ -0,0 +1,1605 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File    ctatc.c
+ *
+ * @Brief
+ * This file contains the implementation of the device resource management
+ * object.
+ *
+ * @Author Liu Chun
+ * @Date Mar 28 2008
+ */
+
+#include "ctatc.h"
+#include "ctpcm.h"
+#include "ctmixer.h"
+#include "ctdrv.h"
+#include "cthardware.h"
+#include "ctsrc.h"
+#include "ctamixer.h"
+#include "ctdaio.h"
+#include <linux/delay.h>
+#include <sound/pcm.h>
+#include <sound/control.h>
+#include <sound/asoundef.h>
+
+#define MONO_SUM_SCALE	0x19a8	/* 2^(-0.5) in 14-bit floating format */
+#define DAIONUM		7
+#define MAX_MULTI_CHN	8
+
+#define IEC958_DEFAULT_CON ((IEC958_AES0_NONAUDIO \
+			    | IEC958_AES0_CON_NOT_COPYRIGHT) \
+			    | ((IEC958_AES1_CON_MIXER \
+			    | IEC958_AES1_CON_ORIGINAL) << 8) \
+			    | (0x10 << 16) \
+			    | ((IEC958_AES3_CON_FS_48000) << 24))
+
+static const struct ct_atc_chip_sub_details atc_sub_details[NUM_CTCARDS] = {
+	[CTSB0760] = {.subsys = PCI_SUBSYS_CREATIVE_SB0760,
+		      .nm_model = "SB076x"},
+	[CTHENDRIX] = {.subsys = PCI_SUBSYS_CREATIVE_HENDRIX,
+		      .nm_model = "Hendrix"},
+	[CTSB08801] = {.subsys = PCI_SUBSYS_CREATIVE_SB08801,
+		      .nm_model = "SB0880"},
+	[CTSB08802] = {.subsys = PCI_SUBSYS_CREATIVE_SB08802,
+		      .nm_model = "SB0880"},
+	[CTSB08803] = {.subsys = PCI_SUBSYS_CREATIVE_SB08803,
+		      .nm_model = "SB0880"}
+};
+
+static struct ct_atc_chip_details atc_chip_details[] = {
+	{.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K1,
+	 .sub_details = NULL,
+	 .nm_card = "X-Fi 20k1"},
+	{.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K2,
+	 .sub_details = atc_sub_details,
+	 .nm_card = "X-Fi 20k2"},
+	{} /* terminator */
+};
+
+static struct {
+	int (*create)(struct ct_atc *atc,
+			enum CTALSADEVS device, const char *device_name);
+	int (*destroy)(void *alsa_dev);
+	const char *public_name;
+} alsa_dev_funcs[NUM_CTALSADEVS] = {
+	[FRONT]		= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Front/WaveIn"},
+	[REAR]		= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Rear"},
+	[CLFE]		= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Center/LFE"},
+	[SURROUND]	= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "Surround"},
+	[IEC958]	= { .create = ct_alsa_pcm_create,
+			    .destroy = NULL,
+			    .public_name = "IEC958 Non-audio"},
+
+	[MIXER]		= { .create = ct_alsa_mix_create,
+			    .destroy = NULL,
+			    .public_name = "Mixer"}
+};
+
+typedef int (*create_t)(void *, void **);
+typedef int (*destroy_t)(void *);
+
+static struct {
+	int (*create)(void *hw, void **rmgr);
+	int (*destroy)(void *mgr);
+} rsc_mgr_funcs[NUM_RSCTYP] = {
+	[SRC] 		= { .create 	= (create_t)src_mgr_create,
+			    .destroy 	= (destroy_t)src_mgr_destroy	},
+	[SRCIMP] 	= { .create 	= (create_t)srcimp_mgr_create,
+			    .destroy 	= (destroy_t)srcimp_mgr_destroy	},
+	[AMIXER]	= { .create	= (create_t)amixer_mgr_create,
+			    .destroy	= (destroy_t)amixer_mgr_destroy	},
+	[SUM]		= { .create	= (create_t)sum_mgr_create,
+			    .destroy	= (destroy_t)sum_mgr_destroy	},
+	[DAIO]		= { .create	= (create_t)daio_mgr_create,
+			    .destroy	= (destroy_t)daio_mgr_destroy	}
+};
+
+static int
+atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+
+/* *
+ * Only mono and interleaved modes are supported now.
+ * Always allocates a contiguous channel block.
+ * */
+
+static int ct_map_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	unsigned long flags;
+	struct snd_pcm_runtime *runtime;
+	struct ct_vm *vm;
+
+	if (NULL == apcm->substream)
+		return 0;
+
+	runtime = apcm->substream->runtime;
+	vm = atc->vm;
+
+	spin_lock_irqsave(&atc->vm_lock, flags);
+	apcm->vm_block = vm->map(vm, runtime->dma_area, runtime->dma_bytes);
+	spin_unlock_irqrestore(&atc->vm_lock, flags);
+
+	if (NULL == apcm->vm_block)
+		return -ENOENT;
+
+	return 0;
+}
+
+static void ct_unmap_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	unsigned long flags;
+	struct ct_vm *vm;
+
+	if (NULL == apcm->vm_block)
+		return;
+
+	vm = atc->vm;
+
+	spin_lock_irqsave(&atc->vm_lock, flags);
+	vm->unmap(vm, apcm->vm_block);
+	spin_unlock_irqrestore(&atc->vm_lock, flags);
+
+	apcm->vm_block = NULL;
+}
+
+static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index)
+{
+	struct ct_vm *vm;
+	void *kvirt_addr;
+	unsigned long phys_addr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atc->vm_lock, flags);
+	vm = atc->vm;
+	kvirt_addr = vm->get_ptp_virt(vm, index);
+	if (kvirt_addr == NULL)
+		phys_addr = (~0UL);
+	else
+		phys_addr = virt_to_phys(kvirt_addr);
+
+	spin_unlock_irqrestore(&atc->vm_lock, flags);
+
+	return phys_addr;
+}
+
+static unsigned int convert_format(snd_pcm_format_t snd_format)
+{
+	switch (snd_format) {
+	case SNDRV_PCM_FORMAT_U8:
+	case SNDRV_PCM_FORMAT_S8:
+		return SRC_SF_U8;
+	case SNDRV_PCM_FORMAT_S16_LE:
+	case SNDRV_PCM_FORMAT_U16_LE:
+		return SRC_SF_S16;
+	case SNDRV_PCM_FORMAT_S24_3LE:
+		return SRC_SF_S24;
+	case SNDRV_PCM_FORMAT_S24_LE:
+	case SNDRV_PCM_FORMAT_S32_LE:
+		return SRC_SF_S32;
+	default:
+		printk(KERN_ERR "not recognized snd format is %d \n",
+			snd_format);
+		return SRC_SF_S16;
+	}
+}
+
+static unsigned int
+atc_get_pitch(unsigned int input_rate, unsigned int output_rate)
+{
+	unsigned int pitch = 0;
+	int b = 0;
+
+	/* get pitch and convert to fixed-point 8.24 format. */
+	pitch = (input_rate / output_rate) << 24;
+	input_rate %= output_rate;
+	input_rate /= 100;
+	output_rate /= 100;
+	for (b = 31; ((b >= 0) && !(input_rate >> b)); )
+		b--;
+
+	if (b >= 0) {
+		input_rate <<= (31 - b);
+		input_rate /= output_rate;
+		b = 24 - (31 - b);
+		if (b >= 0)
+			input_rate <<= b;
+		else
+			input_rate >>= -b;
+
+		pitch |= input_rate;
+	}
+
+	return pitch;
+}
+
+static int select_rom(unsigned int pitch)
+{
+	if ((pitch > 0x00428f5c) && (pitch < 0x01b851ec)) {
+		/* 0.26 <= pitch <= 1.72 */
+		return 1;
+	} else if ((0x01d66666 == pitch) || (0x01d66667 == pitch)) {
+		/* pitch == 1.8375 */
+		return 2;
+	} else if (0x02000000 == pitch) {
+		/* pitch == 2 */
+		return 3;
+	} else if ((pitch >= 0x0) && (pitch <= 0x08000000)) {
+		/* 0 <= pitch <= 8 */
+		return 0;
+	} else {
+		return -ENOENT;
+	}
+}
+
+static int atc_pcm_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct src_desc desc = {0};
+	struct amixer_desc mix_dsc = {0};
+	struct src *src = NULL;
+	struct amixer *amixer = NULL;
+	int err = 0;
+	int n_amixer = apcm->substream->runtime->channels, i = 0;
+	int device = apcm->substream->pcm->device;
+	unsigned int pitch = 0;
+	unsigned long flags;
+
+	if (NULL != apcm->src) {
+		/* Prepared pcm playback */
+		return 0;
+	}
+
+	/* Get SRC resource */
+	desc.multi = apcm->substream->runtime->channels;
+	desc.msr = atc->msr;
+	desc.mode = MEMRD;
+	err = src_mgr->get_src(src_mgr, &desc, (struct src **)&apcm->src);
+	if (err)
+		goto error1;
+
+	pitch = atc_get_pitch(apcm->substream->runtime->rate,
+						(atc->rsr * atc->msr));
+	src = apcm->src;
+	src->ops->set_pitch(src, pitch);
+	src->ops->set_rom(src, select_rom(pitch));
+	src->ops->set_sf(src, convert_format(apcm->substream->runtime->format));
+	src->ops->set_pm(src, (src->ops->next_interleave(src) != NULL));
+
+	/* Get AMIXER resource */
+	n_amixer = (n_amixer < 2) ? 2 : n_amixer;
+	apcm->amixers = kzalloc(sizeof(void *)*n_amixer, GFP_KERNEL);
+	if (NULL == apcm->amixers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	mix_dsc.msr = atc->msr;
+	for (i = 0, apcm->n_amixer = 0; i < n_amixer; i++) {
+		err = amixer_mgr->get_amixer(amixer_mgr, &mix_dsc,
+					(struct amixer **)&apcm->amixers[i]);
+		if (err)
+			goto error1;
+
+		apcm->n_amixer++;
+	}
+
+	/* Set up device virtual mem map */
+	err = ct_map_audio_buffer(atc, apcm);
+	if (err < 0)
+		goto error1;
+
+	/* Connect resources */
+	src = apcm->src;
+	for (i = 0; i < n_amixer; i++) {
+		amixer = apcm->amixers[i];
+		spin_lock_irqsave(&atc->atc_lock, flags);
+		amixer->ops->setup(amixer, &src->rsc,
+					INIT_VOL, atc->pcm[i+device*2]);
+		spin_unlock_irqrestore(&atc->atc_lock, flags);
+		src = src->ops->next_interleave(src);
+		if (NULL == src)
+			src = apcm->src;
+	}
+
+	return 0;
+
+error1:
+	atc_pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int
+atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct srcimp_mgr *srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct sum_mgr *sum_mgr = atc->rsc_mgrs[SUM];
+	struct srcimp *srcimp = NULL;
+	int i = 0;
+
+	if (NULL != apcm->srcimps) {
+		for (i = 0; i < apcm->n_srcimp; i++) {
+			srcimp = apcm->srcimps[i];
+			srcimp->ops->unmap(srcimp);
+			srcimp_mgr->put_srcimp(srcimp_mgr, srcimp);
+			apcm->srcimps[i] = NULL;
+		}
+		kfree(apcm->srcimps);
+		apcm->srcimps = NULL;
+	}
+
+	if (NULL != apcm->srccs) {
+		for (i = 0; i < apcm->n_srcc; i++) {
+			src_mgr->put_src(src_mgr, apcm->srccs[i]);
+			apcm->srccs[i] = NULL;
+		}
+		kfree(apcm->srccs);
+		apcm->srccs = NULL;
+	}
+
+	if (NULL != apcm->amixers) {
+		for (i = 0; i < apcm->n_amixer; i++) {
+			amixer_mgr->put_amixer(amixer_mgr, apcm->amixers[i]);
+			apcm->amixers[i] = NULL;
+		}
+		kfree(apcm->amixers);
+		apcm->amixers = NULL;
+	}
+
+	if (NULL != apcm->mono) {
+		sum_mgr->put_sum(sum_mgr, apcm->mono);
+		apcm->mono = NULL;
+	}
+
+	if (NULL != apcm->src) {
+		src_mgr->put_src(src_mgr, apcm->src);
+		apcm->src = NULL;
+	}
+
+	if (NULL != apcm->vm_block) {
+		/* Undo device virtual mem map */
+		ct_unmap_audio_buffer(atc, apcm);
+		apcm->vm_block = NULL;
+	}
+
+	return 0;
+}
+
+static int atc_pcm_playback_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	unsigned int max_cisz = 0;
+	struct src *src = apcm->src;
+
+	max_cisz = src->multi * src->rsc.msr;
+	max_cisz = 0x80 * (max_cisz < 8 ? max_cisz : 8);
+
+	src->ops->set_sa(src, apcm->vm_block->addr);
+	src->ops->set_la(src, apcm->vm_block->addr + apcm->vm_block->size);
+	src->ops->set_ca(src, apcm->vm_block->addr + max_cisz);
+	src->ops->set_cisz(src, max_cisz);
+
+	src->ops->set_bm(src, 1);
+	src->ops->set_state(src, SRC_STATE_INIT);
+	src->ops->commit_write(src);
+
+	return 0;
+}
+
+static int atc_pcm_stop(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = NULL;
+	int i = 0;
+
+	src = apcm->src;
+	src->ops->set_bm(src, 0);
+	src->ops->set_state(src, SRC_STATE_OFF);
+	src->ops->commit_write(src);
+
+	if (NULL != apcm->srccs) {
+		for (i = 0; i < apcm->n_srcc; i++) {
+			src = apcm->srccs[i];
+			src->ops->set_bm(src, 0);
+			src->ops->set_state(src, SRC_STATE_OFF);
+			src->ops->commit_write(src);
+		}
+	}
+
+	apcm->started = 0;
+
+	return 0;
+}
+
+static int
+atc_pcm_playback_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = apcm->src;
+	u32 size = 0, max_cisz = 0;
+	int position = 0;
+
+	position = src->ops->get_ca(src);
+
+	size = apcm->vm_block->size;
+	max_cisz = src->multi * src->rsc.msr;
+	max_cisz = 128 * (max_cisz < 8 ? max_cisz : 8);
+
+	return (position + size - max_cisz - apcm->vm_block->addr) % size;
+}
+
+struct src_node_conf_t {
+	unsigned int pitch;
+	unsigned int msr:8;
+	unsigned int mix_msr:8;
+	unsigned int imp_msr:8;
+	unsigned int vo:1;
+};
+
+static void setup_src_node_conf(struct ct_atc *atc, struct ct_atc_pcm *apcm,
+				struct src_node_conf_t *conf, int *n_srcc)
+{
+	unsigned int pitch = 0;
+
+	/* get pitch and convert to fixed-point 8.24 format. */
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+	*n_srcc = 0;
+
+	if (1 == atc->msr) {
+		*n_srcc = apcm->substream->runtime->channels;
+		conf[0].pitch = pitch;
+		conf[0].mix_msr = conf[0].imp_msr = conf[0].msr = 1;
+		conf[0].vo = 1;
+	} else if (2 == atc->msr) {
+		if (0x8000000 < pitch) {
+			/* Need two-stage SRCs, SRCIMPs and
+			 * AMIXERs for converting format */
+			conf[0].pitch = (atc->msr << 24);
+			conf[0].msr = conf[0].mix_msr = 1;
+			conf[0].imp_msr = atc->msr;
+			conf[0].vo = 0;
+			conf[1].pitch = atc_get_pitch(atc->rsr,
+					apcm->substream->runtime->rate);
+			conf[1].msr = conf[1].mix_msr = conf[1].imp_msr = 1;
+			conf[1].vo = 1;
+			*n_srcc = apcm->substream->runtime->channels * 2;
+		} else if (0x1000000 < pitch) {
+			/* Need one-stage SRCs, SRCIMPs and
+			 * AMIXERs for converting format */
+			conf[0].pitch = pitch;
+			conf[0].msr = conf[0].mix_msr
+				    = conf[0].imp_msr = atc->msr;
+			conf[0].vo = 1;
+			*n_srcc = apcm->substream->runtime->channels;
+		}
+	}
+}
+
+static int
+atc_pcm_capture_get_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct srcimp_mgr *srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct sum_mgr *sum_mgr = atc->rsc_mgrs[SUM];
+	struct src_desc src_dsc = {0};
+	struct src *src = NULL;
+	struct srcimp_desc srcimp_dsc = {0};
+	struct srcimp *srcimp = NULL;
+	struct amixer_desc mix_dsc = {0};
+	struct sum_desc sum_dsc = {0};
+	unsigned int pitch = 0;
+	int multi = 0, err = 0, i = 0;
+	int n_srcimp = 0, n_amixer = 0, n_srcc = 0, n_sum = 0;
+	struct src_node_conf_t src_node_conf[2] = {{0} };
+
+	/* The numbers of converting SRCs and SRCIMPs should be determined
+	 * by pitch value. */
+
+	multi = apcm->substream->runtime->channels;
+
+	/* get pitch and convert to fixed-point 8.24 format. */
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+
+	setup_src_node_conf(atc, apcm, src_node_conf, &n_srcc);
+	n_sum = (1 == multi) ? 1 : 0;
+	n_amixer += n_sum * 2 + n_srcc;
+	n_srcimp += n_srcc;
+	if ((multi > 1) && (0x8000000 >= pitch)) {
+		/* Need extra AMIXERs and SRCIMPs for special treatment
+		 * of interleaved recording of conjugate channels */
+		n_amixer += multi * atc->msr;
+		n_srcimp += multi * atc->msr;
+	} else {
+		n_srcimp += multi;
+	}
+
+	if (n_srcc) {
+		apcm->srccs = kzalloc(sizeof(void *)*n_srcc, GFP_KERNEL);
+		if (NULL == apcm->srccs)
+			return -ENOMEM;
+	}
+	if (n_amixer) {
+		apcm->amixers = kzalloc(sizeof(void *)*n_amixer, GFP_KERNEL);
+		if (NULL == apcm->amixers) {
+			err = -ENOMEM;
+			goto error1;
+		}
+	}
+	apcm->srcimps = kzalloc(sizeof(void *)*n_srcimp, GFP_KERNEL);
+	if (NULL == apcm->srcimps) {
+		err = -ENOMEM;
+		goto error1;
+	}
+
+	/* Allocate SRCs for sample rate conversion if needed */
+	src_dsc.multi = 1;
+	src_dsc.mode = ARCRW;
+	for (i = 0, apcm->n_srcc = 0; i < n_srcc; i++) {
+		src_dsc.msr = src_node_conf[i/multi].msr;
+		err = src_mgr->get_src(src_mgr, &src_dsc,
+					(struct src **)&apcm->srccs[i]);
+		if (err)
+			goto error1;
+
+		src = apcm->srccs[i];
+		pitch = src_node_conf[i/multi].pitch;
+		src->ops->set_pitch(src, pitch);
+		src->ops->set_rom(src, select_rom(pitch));
+		src->ops->set_vo(src, src_node_conf[i/multi].vo);
+
+		apcm->n_srcc++;
+	}
+
+	/* Allocate AMIXERs for routing SRCs of conversion if needed */
+	for (i = 0, apcm->n_amixer = 0; i < n_amixer; i++) {
+		if (i < (n_sum*2))
+			mix_dsc.msr = atc->msr;
+		else if (i < (n_sum*2+n_srcc))
+			mix_dsc.msr = src_node_conf[(i-n_sum*2)/multi].mix_msr;
+		else
+			mix_dsc.msr = 1;
+
+		err = amixer_mgr->get_amixer(amixer_mgr, &mix_dsc,
+					(struct amixer **)&apcm->amixers[i]);
+		if (err)
+			goto error1;
+
+		apcm->n_amixer++;
+	}
+
+	/* Allocate a SUM resource to mix all input channels together */
+	sum_dsc.msr = atc->msr;
+	err = sum_mgr->get_sum(sum_mgr, &sum_dsc, (struct sum **)&apcm->mono);
+	if (err)
+		goto error1;
+
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+	/* Allocate SRCIMP resources */
+	for (i = 0, apcm->n_srcimp = 0; i < n_srcimp; i++) {
+		if (i < (n_srcc))
+			srcimp_dsc.msr = src_node_conf[i/multi].imp_msr;
+		else if (1 == multi)
+			srcimp_dsc.msr = (pitch <= 0x8000000) ? atc->msr : 1;
+		else
+			srcimp_dsc.msr = 1;
+
+		err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc, &srcimp);
+		if (err)
+			goto error1;
+
+		apcm->srcimps[i] = srcimp;
+		apcm->n_srcimp++;
+	}
+
+	/* Allocate a SRC for writing data to host memory */
+	src_dsc.multi = apcm->substream->runtime->channels;
+	src_dsc.msr = 1;
+	src_dsc.mode = MEMWR;
+	err = src_mgr->get_src(src_mgr, &src_dsc, (struct src **)&apcm->src);
+	if (err)
+		goto error1;
+
+	src = apcm->src;
+	src->ops->set_pitch(src, pitch);
+
+	/* Set up device virtual mem map */
+	err = ct_map_audio_buffer(atc, apcm);
+	if (err < 0)
+		goto error1;
+
+	return 0;
+
+error1:
+	atc_pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int atc_pcm_capture_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = NULL;
+	struct amixer *amixer = NULL;
+	struct srcimp *srcimp = NULL;
+	struct ct_mixer *mixer = atc->mixer;
+	struct sum *mono = NULL;
+	struct rsc *out_ports[8] = {NULL};
+	int err = 0, i = 0, j = 0, n_sum = 0, multi = 0;
+	unsigned int pitch = 0;
+	int mix_base = 0, imp_base = 0;
+
+	if (NULL != apcm->src) {
+		/* Prepared pcm capture */
+		return 0;
+	}
+
+	/* Get needed resources. */
+	err = atc_pcm_capture_get_resources(atc, apcm);
+	if (err)
+		return err;
+
+	/* Connect resources */
+	mixer->get_output_ports(mixer, MIX_PCMO_FRONT,
+				&out_ports[0], &out_ports[1]);
+
+	multi = apcm->substream->runtime->channels;
+	if (1 == multi) {
+		mono = apcm->mono;
+		for (i = 0; i < 2; i++) {
+			amixer = apcm->amixers[i];
+			amixer->ops->setup(amixer, out_ports[i],
+						MONO_SUM_SCALE, mono);
+		}
+		out_ports[0] = &mono->rsc;
+		n_sum = 1;
+		mix_base = n_sum * 2;
+	}
+
+	for (i = 0; i < apcm->n_srcc; i++) {
+		src = apcm->srccs[i];
+		srcimp = apcm->srcimps[imp_base+i];
+		amixer = apcm->amixers[mix_base+i];
+		srcimp->ops->map(srcimp, src, out_ports[i%multi]);
+		amixer->ops->setup(amixer, &src->rsc, INIT_VOL, NULL);
+		out_ports[i%multi] = &amixer->rsc;
+	}
+
+	pitch = atc_get_pitch((atc->rsr * atc->msr),
+				apcm->substream->runtime->rate);
+
+	if ((multi > 1) && (pitch <= 0x8000000)) {
+		/* Special connection for interleaved
+		 * recording with conjugate channels */
+		for (i = 0; i < multi; i++) {
+			out_ports[i]->ops->master(out_ports[i]);
+			for (j = 0; j < atc->msr; j++) {
+				amixer = apcm->amixers[apcm->n_srcc+j*multi+i];
+				amixer->ops->set_input(amixer, out_ports[i]);
+				amixer->ops->set_scale(amixer, INIT_VOL);
+				amixer->ops->set_sum(amixer, NULL);
+				amixer->ops->commit_raw_write(amixer);
+				out_ports[i]->ops->next_conj(out_ports[i]);
+
+				srcimp = apcm->srcimps[apcm->n_srcc+j*multi+i];
+				srcimp->ops->map(srcimp, apcm->src,
+							&amixer->rsc);
+			}
+		}
+	} else {
+		for (i = 0; i < multi; i++) {
+			srcimp = apcm->srcimps[apcm->n_srcc+i];
+			srcimp->ops->map(srcimp, apcm->src, out_ports[i]);
+		}
+	}
+
+	return 0;
+}
+
+static int atc_pcm_capture_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = NULL;
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	int i = 0, multi = 0;
+
+	if (apcm->started)
+		return 0;
+
+	apcm->started = 1;
+	multi = apcm->substream->runtime->channels;
+	/* Set up converting SRCs */
+	for (i = 0; i < apcm->n_srcc; i++) {
+		src = apcm->srccs[i];
+		src->ops->set_pm(src, ((i%multi) != (multi-1)));
+		src_mgr->src_disable(src_mgr, src);
+	}
+
+	/*  Set up recording SRC */
+	src = apcm->src;
+	src->ops->set_sf(src, convert_format(apcm->substream->runtime->format));
+	src->ops->set_sa(src, apcm->vm_block->addr);
+	src->ops->set_la(src, apcm->vm_block->addr + apcm->vm_block->size);
+	src->ops->set_ca(src, apcm->vm_block->addr);
+	src_mgr->src_disable(src_mgr, src);
+
+	/* Disable relevant SRCs firstly */
+	src_mgr->commit_write(src_mgr);
+
+	/* Enable SRCs respectively */
+	for (i = 0; i < apcm->n_srcc; i++) {
+		src = apcm->srccs[i];
+		src->ops->set_state(src, SRC_STATE_RUN);
+		src->ops->commit_write(src);
+		src_mgr->src_enable_s(src_mgr, src);
+	}
+	src = apcm->src;
+	src->ops->set_bm(src, 1);
+	src->ops->set_state(src, SRC_STATE_RUN);
+	src->ops->commit_write(src);
+	src_mgr->src_enable_s(src_mgr, src);
+
+	/* Enable relevant SRCs synchronously */
+	src_mgr->commit_write(src_mgr);
+
+	return 0;
+}
+
+static int
+atc_pcm_capture_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = apcm->src;
+
+	return src->ops->get_ca(src) - apcm->vm_block->addr;
+}
+
+static int spdif_passthru_playback_get_resources(struct ct_atc *atc,
+						 struct ct_atc_pcm *apcm)
+{
+	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
+	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
+	struct src_desc desc = {0};
+	struct amixer_desc mix_dsc = {0};
+	struct src *src = NULL;
+	int err = 0;
+	int n_amixer = apcm->substream->runtime->channels, i = 0;
+	unsigned int pitch = 0, rsr = atc->pll_rate;
+
+	/* Get SRC resource */
+	desc.multi = apcm->substream->runtime->channels;
+	desc.msr = 1;
+	while (apcm->substream->runtime->rate > (rsr * desc.msr))
+		desc.msr <<= 1;
+
+	desc.mode = MEMRD;
+	err = src_mgr->get_src(src_mgr, &desc, (struct src **)&apcm->src);
+	if (err)
+		goto error1;
+
+	pitch = atc_get_pitch(apcm->substream->runtime->rate, (rsr * desc.msr));
+	src = apcm->src;
+	src->ops->set_pitch(src, pitch);
+	src->ops->set_rom(src, select_rom(pitch));
+	src->ops->set_sf(src, convert_format(apcm->substream->runtime->format));
+	src->ops->set_pm(src, (src->ops->next_interleave(src) != NULL));
+	src->ops->set_bp(src, 1);
+
+	/* Get AMIXER resource */
+	n_amixer = (n_amixer < 2) ? 2 : n_amixer;
+	apcm->amixers = kzalloc(sizeof(void *)*n_amixer, GFP_KERNEL);
+	if (NULL == apcm->amixers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	mix_dsc.msr = desc.msr;
+	for (i = 0, apcm->n_amixer = 0; i < n_amixer; i++) {
+		err = amixer_mgr->get_amixer(amixer_mgr, &mix_dsc,
+					(struct amixer **)&apcm->amixers[i]);
+		if (err)
+			goto error1;
+
+		apcm->n_amixer++;
+	}
+
+	/* Set up device virtual mem map */
+	err = ct_map_audio_buffer(atc, apcm);
+	if (err < 0)
+		goto error1;
+
+	return 0;
+
+error1:
+	atc_pcm_release_resources(atc, apcm);
+	return err;
+}
+
+static int
+spdif_passthru_playback_setup(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct dao *dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
+	unsigned long flags;
+	unsigned int rate = apcm->substream->runtime->rate;
+	unsigned int status = 0;
+	int err = 0;
+	unsigned char iec958_con_fs = 0;
+
+	switch (rate) {
+	case 48000:
+		iec958_con_fs = IEC958_AES3_CON_FS_48000;
+		break;
+	case 44100:
+		iec958_con_fs = IEC958_AES3_CON_FS_44100;
+		break;
+	case 32000:
+		iec958_con_fs = IEC958_AES3_CON_FS_32000;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	spin_lock_irqsave(&atc->atc_lock, flags);
+	dao->ops->get_spos(dao, &status);
+	if (((status >> 24) & IEC958_AES3_CON_FS) != iec958_con_fs) {
+		status &= ((~IEC958_AES3_CON_FS) << 24);
+		status |= (iec958_con_fs << 24);
+		dao->ops->set_spos(dao, status);
+		dao->ops->commit_write(dao);
+	}
+	if ((rate != atc->pll_rate) && (32000 != rate)) {
+		err = ((struct hw *)atc->hw)->pll_init(atc->hw, rate);
+		atc->pll_rate = err ? 0 : rate;
+	}
+	spin_unlock_irqrestore(&atc->atc_lock, flags);
+
+	return err;
+}
+
+static int
+spdif_passthru_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
+{
+	struct src *src = NULL;
+	struct amixer *amixer = NULL;
+	struct dao *dao = NULL;
+	int err = 0;
+	int i = 0;
+	unsigned long flags;
+
+	if (NULL != apcm->src)
+		return 0;
+
+	/* Configure SPDIFOO and PLL to passthrough mode;
+	 * determine pll_rate. */
+	err = spdif_passthru_playback_setup(atc, apcm);
+	if (err)
+		return err;
+
+	/* Get needed resources. */
+	err = spdif_passthru_playback_get_resources(atc, apcm);
+	if (err)
+		return err;
+
+	/* Connect resources */
+	src = apcm->src;
+	for (i = 0; i < apcm->n_amixer; i++) {
+		amixer = apcm->amixers[i];
+		amixer->ops->setup(amixer, &src->rsc, INIT_VOL, NULL);
+		src = src->ops->next_interleave(src);
+		if (NULL == src)
+			src = apcm->src;
+	}
+	/* Connect to SPDIFOO */
+	spin_lock_irqsave(&atc->atc_lock, flags);
+	dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
+	amixer = apcm->amixers[0];
+	dao->ops->set_left_input(dao, &amixer->rsc);
+	amixer = apcm->amixers[1];
+	dao->ops->set_right_input(dao, &amixer->rsc);
+	spin_unlock_irqrestore(&atc->atc_lock, flags);
+
+	return 0;
+}
+
+static int atc_select_line_in(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+	struct ct_mixer *mixer = atc->mixer;
+	struct src *src = NULL;
+
+	if (hw->is_adc_source_selected(hw, ADC_LINEIN))
+		return 0;
+
+	mixer->set_input_left(mixer, MIX_MIC_IN, NULL);
+	mixer->set_input_right(mixer, MIX_MIC_IN, NULL);
+
+	hw->select_adc_source(hw, ADC_LINEIN);
+
+	src = atc->srcs[2];
+	mixer->set_input_left(mixer, MIX_LINE_IN, &src->rsc);
+	src = atc->srcs[3];
+	mixer->set_input_right(mixer, MIX_LINE_IN, &src->rsc);
+
+	return 0;
+}
+
+static int atc_select_mic_in(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+	struct ct_mixer *mixer = atc->mixer;
+	struct src *src = NULL;
+
+	if (hw->is_adc_source_selected(hw, ADC_MICIN))
+		return 0;
+
+	mixer->set_input_left(mixer, MIX_LINE_IN, NULL);
+	mixer->set_input_right(mixer, MIX_LINE_IN, NULL);
+
+	hw->select_adc_source(hw, ADC_MICIN);
+
+	src = atc->srcs[2];
+	mixer->set_input_left(mixer, MIX_MIC_IN, &src->rsc);
+	src = atc->srcs[3];
+	mixer->set_input_right(mixer, MIX_MIC_IN, &src->rsc);
+
+	return 0;
+}
+
+static int atc_have_digit_io_switch(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+
+	return hw->have_digit_io_switch(hw);
+}
+
+static int atc_select_digit_io(struct ct_atc *atc)
+{
+	struct hw *hw = atc->hw;
+
+	if (hw->is_adc_source_selected(hw, ADC_NONE))
+		return 0;
+
+	hw->select_adc_source(hw, ADC_NONE);
+
+	return 0;
+}
+
+static int atc_daio_unmute(struct ct_atc *atc, unsigned char state, int type)
+{
+	struct daio_mgr *daio_mgr = atc->rsc_mgrs[DAIO];
+
+	if (state)
+		daio_mgr->daio_enable(daio_mgr, atc->daios[type]);
+	else
+		daio_mgr->daio_disable(daio_mgr, atc->daios[type]);
+
+	daio_mgr->commit_write(daio_mgr);
+
+	return 0;
+}
+
+static int
+atc_dao_get_status(struct ct_atc *atc, unsigned int *status, int type)
+{
+	struct dao *dao = container_of(atc->daios[type], struct dao, daio);
+	return dao->ops->get_spos(dao, status);
+}
+
+static int
+atc_dao_set_status(struct ct_atc *atc, unsigned int status, int type)
+{
+	struct dao *dao = container_of(atc->daios[type], struct dao, daio);
+
+	dao->ops->set_spos(dao, status);
+	dao->ops->commit_write(dao);
+	return 0;
+}
+
+static int atc_line_front_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO1);
+}
+
+static int atc_line_surround_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO4);
+}
+
+static int atc_line_clfe_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO3);
+}
+
+static int atc_line_rear_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEO2);
+}
+
+static int atc_line_in_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, LINEIM);
+}
+
+static int atc_spdif_out_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, SPDIFOO);
+}
+
+static int atc_spdif_in_unmute(struct ct_atc *atc, unsigned char state)
+{
+	return atc_daio_unmute(atc, state, SPDIFIO);
+}
+
+static int atc_spdif_out_get_status(struct ct_atc *atc, unsigned int *status)
+{
+	return atc_dao_get_status(atc, status, SPDIFOO);
+}
+
+static int atc_spdif_out_set_status(struct ct_atc *atc, unsigned int status)
+{
+	return atc_dao_set_status(atc, status, SPDIFOO);
+}
+
+static int atc_spdif_out_passthru(struct ct_atc *atc, unsigned char state)
+{
+	unsigned long flags;
+	struct dao_desc da_dsc = {0};
+	struct dao *dao = NULL;
+	int err = 0;
+	struct ct_mixer *mixer = atc->mixer;
+	struct rsc *rscs[2] = {NULL};
+	unsigned int spos = 0;
+
+	spin_lock_irqsave(&atc->atc_lock, flags);
+	dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
+	da_dsc.msr = state ? 1 : atc->msr;
+	da_dsc.passthru = state ? 1 : 0;
+	err = dao->ops->reinit(dao, &da_dsc);
+	if (state) {
+		spos = IEC958_DEFAULT_CON;
+	} else {
+		mixer->get_output_ports(mixer, MIX_SPDIF_OUT,
+					&rscs[0], &rscs[1]);
+		dao->ops->set_left_input(dao, rscs[0]);
+		dao->ops->set_right_input(dao, rscs[1]);
+		/* Restore PLL to atc->rsr if needed. */
+		if (atc->pll_rate != atc->rsr) {
+			err = ((struct hw *)atc->hw)->pll_init(atc->hw,
+							       atc->rsr);
+			atc->pll_rate = err ? 0 : atc->rsr;
+		}
+	}
+	dao->ops->set_spos(dao, spos);
+	dao->ops->commit_write(dao);
+	spin_unlock_irqrestore(&atc->atc_lock, flags);
+
+	return err;
+}
+
+static int ct_atc_destroy(struct ct_atc *atc)
+{
+	struct daio_mgr *daio_mgr = NULL;
+	struct dao *dao = NULL;
+	struct dai *dai = NULL;
+	struct daio *daio = NULL;
+	struct sum_mgr *sum_mgr = NULL;
+	struct src_mgr *src_mgr = NULL;
+	struct srcimp_mgr *srcimp_mgr = NULL;
+	struct srcimp *srcimp = NULL;
+	struct ct_mixer *mixer = NULL;
+	int i = 0;
+
+	if (NULL == atc)
+		return 0;
+
+	/* Stop hardware and disable all interrupts */
+	if (NULL != atc->hw)
+		((struct hw *)atc->hw)->card_stop(atc->hw);
+
+	/* Destroy internal mixer objects */
+	if (NULL != atc->mixer) {
+		mixer = atc->mixer;
+		mixer->set_input_left(mixer, MIX_LINE_IN, NULL);
+		mixer->set_input_right(mixer, MIX_LINE_IN, NULL);
+		mixer->set_input_left(mixer, MIX_MIC_IN, NULL);
+		mixer->set_input_right(mixer, MIX_MIC_IN, NULL);
+		mixer->set_input_left(mixer, MIX_SPDIF_IN, NULL);
+		mixer->set_input_right(mixer, MIX_SPDIF_IN, NULL);
+		ct_mixer_destroy(atc->mixer);
+	}
+
+	if (NULL != atc->daios) {
+		daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO];
+		for (i = 0; i < atc->n_daio; i++) {
+			daio = atc->daios[i];
+			if (daio->type < LINEIM) {
+				dao = container_of(daio, struct dao, daio);
+				dao->ops->clear_left_input(dao);
+				dao->ops->clear_right_input(dao);
+			} else {
+				dai = container_of(daio, struct dai, daio);
+				/* some thing to do for dai ... */
+			}
+			daio_mgr->put_daio(daio_mgr, daio);
+		}
+		kfree(atc->daios);
+	}
+
+	if (NULL != atc->pcm) {
+		sum_mgr = atc->rsc_mgrs[SUM];
+		for (i = 0; i < atc->n_pcm; i++)
+			sum_mgr->put_sum(sum_mgr, atc->pcm[i]);
+
+		kfree(atc->pcm);
+	}
+
+	if (NULL != atc->srcs) {
+		src_mgr = atc->rsc_mgrs[SRC];
+		for (i = 0; i < atc->n_src; i++)
+			src_mgr->put_src(src_mgr, atc->srcs[i]);
+
+		kfree(atc->srcs);
+	}
+
+	if (NULL != atc->srcimps) {
+		srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+		for (i = 0; i < atc->n_srcimp; i++) {
+			srcimp = atc->srcimps[i];
+			srcimp->ops->unmap(srcimp);
+			srcimp_mgr->put_srcimp(srcimp_mgr, atc->srcimps[i]);
+		}
+		kfree(atc->srcimps);
+	}
+
+	for (i = 0; i < NUM_RSCTYP; i++) {
+		if ((NULL != rsc_mgr_funcs[i].destroy) &&
+		    (NULL != atc->rsc_mgrs[i]))
+			rsc_mgr_funcs[i].destroy(atc->rsc_mgrs[i]);
+
+	}
+
+	if (NULL != atc->hw)
+		destroy_hw_obj((struct hw *)atc->hw);
+
+	/* Destroy device virtual memory manager object */
+	if (NULL != atc->vm) {
+		ct_vm_destroy(atc->vm);
+		atc->vm = NULL;
+	}
+
+	kfree(atc);
+
+	return 0;
+}
+
+static int atc_dev_free(struct snd_device *dev)
+{
+	struct ct_atc *atc = dev->device_data;
+	return ct_atc_destroy(atc);
+}
+
+static int atc_identify_card(struct ct_atc *atc)
+{
+	u16 subsys = 0;
+	u8 revision = 0;
+	struct pci_dev *pci = atc->pci;
+	const struct ct_atc_chip_details *d;
+	enum CTCARDS i;
+
+	pci_read_config_word(pci, PCI_SUBSYSTEM_ID, &subsys);
+	pci_read_config_byte(pci, PCI_REVISION_ID, &revision);
+	atc->chip_details = NULL;
+	atc->model = NUM_CTCARDS;
+	for (d = atc_chip_details; d->vendor; d++) {
+		if (d->vendor != pci->vendor || d->device != pci->device)
+			continue;
+
+		if (NULL == d->sub_details) {
+			atc->chip_details = d;
+			break;
+		}
+		for (i = 0; i < NUM_CTCARDS; i++) {
+			if ((d->sub_details[i].subsys == subsys) ||
+			    (((subsys & 0x6000) == 0x6000) &&
+			    ((d->sub_details[i].subsys & 0x6000) == 0x6000))) {
+				atc->model = i;
+				break;
+			}
+		}
+		if (i >= NUM_CTCARDS)
+			continue;
+
+		atc->chip_details = d;
+		break;
+		/* not take revision into consideration now */
+	}
+	if (!d->vendor)
+		return -ENOENT;
+
+	return 0;
+}
+
+static int ct_create_alsa_devs(struct ct_atc *atc)
+{
+	enum CTALSADEVS i;
+	struct hw *hw = atc->hw;
+	int err;
+
+	switch (hw->get_chip_type(hw)) {
+	case ATC20K1:
+		alsa_dev_funcs[MIXER].public_name = "20K1";
+		break;
+	case ATC20K2:
+		alsa_dev_funcs[MIXER].public_name = "20K2";
+		break;
+	default:
+		alsa_dev_funcs[MIXER].public_name = "Unknown";
+		break;
+	}
+
+	for (i = 0; i < NUM_CTALSADEVS; i++) {
+		if (NULL == alsa_dev_funcs[i].create)
+			continue;
+
+		err = alsa_dev_funcs[i].create(atc, i,
+				alsa_dev_funcs[i].public_name);
+		if (err) {
+			printk(KERN_ERR "Creating alsa device %d failed!\n", i);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int atc_create_hw_devs(struct ct_atc *atc)
+{
+	struct hw *hw = NULL;
+	struct card_conf info = {0};
+	int i = 0, err = 0;
+
+	err = create_hw_obj(atc->pci, &hw);
+	if (err) {
+		printk(KERN_ERR "Failed to create hw obj!!!\n");
+		return err;
+	}
+	atc->hw = hw;
+
+	/* Initialize card hardware. */
+	info.rsr = atc->rsr;
+	info.msr = atc->msr;
+	info.vm_pgt_phys = atc_get_ptp_phys(atc, 0);
+	err = hw->card_init(hw, &info);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < NUM_RSCTYP; i++) {
+		if (NULL == rsc_mgr_funcs[i].create)
+			continue;
+
+		err = rsc_mgr_funcs[i].create(atc->hw, &atc->rsc_mgrs[i]);
+		if (err) {
+			printk(KERN_ERR "Failed to create rsc_mgr %d!!!\n", i);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int atc_get_resources(struct ct_atc *atc)
+{
+	struct daio_desc da_desc = {0};
+	struct daio_mgr *daio_mgr = NULL;
+	struct src_desc src_dsc = {0};
+	struct src_mgr *src_mgr = NULL;
+	struct srcimp_desc srcimp_dsc = {0};
+	struct srcimp_mgr *srcimp_mgr = NULL;
+	struct sum_desc sum_dsc = {0};
+	struct sum_mgr *sum_mgr = NULL;
+	int err = 0, i = 0;
+	unsigned short subsys_id = 0;
+
+	atc->daios = kzalloc(sizeof(void *)*(DAIONUM), GFP_KERNEL);
+	if (NULL == atc->daios)
+		return -ENOMEM;
+
+	atc->srcs = kzalloc(sizeof(void *)*(2*2), GFP_KERNEL);
+	if (NULL == atc->srcs)
+		return -ENOMEM;
+
+	atc->srcimps = kzalloc(sizeof(void *)*(2*2), GFP_KERNEL);
+	if (NULL == atc->srcimps)
+		return -ENOMEM;
+
+	atc->pcm = kzalloc(sizeof(void *)*(2*4), GFP_KERNEL);
+	if (NULL == atc->pcm)
+		return -ENOMEM;
+
+	daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO];
+	da_desc.msr = atc->msr;
+	for (i = 0, atc->n_daio = 0; i < DAIONUM-1; i++) {
+		da_desc.type = i;
+		err = daio_mgr->get_daio(daio_mgr, &da_desc,
+					(struct daio **)&atc->daios[i]);
+		if (err) {
+			printk(KERN_ERR "Failed to get DAIO "
+					"resource %d!!!\n", i);
+			return err;
+		}
+		atc->n_daio++;
+	}
+	pci_read_config_word(atc->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
+		/* SB073x cards */
+		da_desc.type = SPDIFI1;
+	} else {
+		da_desc.type = SPDIFIO;
+	}
+	err = daio_mgr->get_daio(daio_mgr, &da_desc,
+				(struct daio **)&atc->daios[i]);
+	if (err) {
+		printk(KERN_ERR "Failed to get S/PDIF-in resource!!!\n");
+		return err;
+	}
+	atc->n_daio++;
+
+	src_mgr = atc->rsc_mgrs[SRC];
+	src_dsc.multi = 1;
+	src_dsc.msr = atc->msr;
+	src_dsc.mode = ARCRW;
+	for (i = 0, atc->n_src = 0; i < (2*2); i++) {
+		err = src_mgr->get_src(src_mgr, &src_dsc,
+					(struct src **)&atc->srcs[i]);
+		if (err)
+			return err;
+
+		atc->n_src++;
+	}
+
+	srcimp_mgr = atc->rsc_mgrs[SRCIMP];
+	srcimp_dsc.msr = 8; /* SRCIMPs for S/PDIFIn SRT */
+	for (i = 0, atc->n_srcimp = 0; i < (2*1); i++) {
+		err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc,
+					(struct srcimp **)&atc->srcimps[i]);
+		if (err)
+			return err;
+
+		atc->n_srcimp++;
+	}
+	srcimp_dsc.msr = 8; /* SRCIMPs for LINE/MICIn SRT */
+	for (i = 0; i < (2*1); i++) {
+		err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc,
+				(struct srcimp **)&atc->srcimps[2*1+i]);
+		if (err)
+			return err;
+
+		atc->n_srcimp++;
+	}
+
+	sum_mgr = atc->rsc_mgrs[SUM];
+	sum_dsc.msr = atc->msr;
+	for (i = 0, atc->n_pcm = 0; i < (2*4); i++) {
+		err = sum_mgr->get_sum(sum_mgr, &sum_dsc,
+					(struct sum **)&atc->pcm[i]);
+		if (err)
+			return err;
+
+		atc->n_pcm++;
+	}
+
+	err = ct_mixer_create(atc, (struct ct_mixer **)&atc->mixer);
+	if (err) {
+		printk(KERN_ERR "Failed to create mixer obj!!!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void
+atc_connect_dai(struct src_mgr *src_mgr, struct dai *dai,
+		struct src **srcs, struct srcimp **srcimps)
+{
+	struct rsc *rscs[2] = {NULL};
+	struct src *src = NULL;
+	struct srcimp *srcimp = NULL;
+	int i = 0;
+
+	rscs[0] = &dai->daio.rscl;
+	rscs[1] = &dai->daio.rscr;
+	for (i = 0; i < 2; i++) {
+		src = srcs[i];
+		srcimp = srcimps[i];
+		srcimp->ops->map(srcimp, src, rscs[i]);
+		src_mgr->src_disable(src_mgr, src);
+	}
+
+	src_mgr->commit_write(src_mgr); /* Actually disable SRCs */
+
+	src = srcs[0];
+	src->ops->set_pm(src, 1);
+	for (i = 0; i < 2; i++) {
+		src = srcs[i];
+		src->ops->set_state(src, SRC_STATE_RUN);
+		src->ops->commit_write(src);
+		src_mgr->src_enable_s(src_mgr, src);
+	}
+
+	dai->ops->set_srt_srcl(dai, &(srcs[0]->rsc));
+	dai->ops->set_srt_srcr(dai, &(srcs[1]->rsc));
+
+	dai->ops->set_enb_src(dai, 1);
+	dai->ops->set_enb_srt(dai, 1);
+	dai->ops->commit_write(dai);
+
+	src_mgr->commit_write(src_mgr); /* Synchronously enable SRCs */
+}
+
+static void atc_connect_resources(struct ct_atc *atc)
+{
+	struct dai *dai = NULL;
+	struct dao *dao = NULL;
+	struct src *src = NULL;
+	struct sum *sum = NULL;
+	struct ct_mixer *mixer = NULL;
+	struct rsc *rscs[2] = {NULL};
+	int i = 0, j = 0;
+
+	mixer = atc->mixer;
+
+	for (i = MIX_WAVE_FRONT, j = LINEO1; i <= MIX_SPDIF_OUT; i++, j++) {
+		mixer->get_output_ports(mixer, i, &rscs[0], &rscs[1]);
+		dao = container_of(atc->daios[j], struct dao, daio);
+		dao->ops->set_left_input(dao, rscs[0]);
+		dao->ops->set_right_input(dao, rscs[1]);
+	}
+
+	dai = container_of(atc->daios[LINEIM], struct dai, daio);
+	atc_connect_dai(atc->rsc_mgrs[SRC], dai,
+			(struct src **)&atc->srcs[2],
+			(struct srcimp **)&atc->srcimps[2]);
+	src = atc->srcs[2];
+	mixer->set_input_left(mixer, MIX_LINE_IN, &src->rsc);
+	src = atc->srcs[3];
+	mixer->set_input_right(mixer, MIX_LINE_IN, &src->rsc);
+
+	dai = container_of(atc->daios[SPDIFIO], struct dai, daio);
+	atc_connect_dai(atc->rsc_mgrs[SRC], dai,
+			(struct src **)&atc->srcs[0],
+			(struct srcimp **)&atc->srcimps[0]);
+
+	src = atc->srcs[0];
+	mixer->set_input_left(mixer, MIX_SPDIF_IN, &src->rsc);
+	src = atc->srcs[1];
+	mixer->set_input_right(mixer, MIX_SPDIF_IN, &src->rsc);
+
+	for (i = MIX_PCMI_FRONT, j = 0; i <= MIX_PCMI_SURROUND; i++, j += 2) {
+		sum = atc->pcm[j];
+		mixer->set_input_left(mixer, i, &sum->rsc);
+		sum = atc->pcm[j+1];
+		mixer->set_input_right(mixer, i, &sum->rsc);
+	}
+}
+
+static void atc_set_ops(struct ct_atc *atc)
+{
+	/* Set operations */
+	atc->map_audio_buffer = ct_map_audio_buffer;
+	atc->unmap_audio_buffer = ct_unmap_audio_buffer;
+	atc->pcm_playback_prepare = atc_pcm_playback_prepare;
+	atc->pcm_release_resources = atc_pcm_release_resources;
+	atc->pcm_playback_start = atc_pcm_playback_start;
+	atc->pcm_playback_stop = atc_pcm_stop;
+	atc->pcm_playback_position = atc_pcm_playback_position;
+	atc->pcm_capture_prepare = atc_pcm_capture_prepare;
+	atc->pcm_capture_start = atc_pcm_capture_start;
+	atc->pcm_capture_stop = atc_pcm_stop;
+	atc->pcm_capture_position = atc_pcm_capture_position;
+	atc->spdif_passthru_playback_prepare = spdif_passthru_playback_prepare;
+	atc->get_ptp_phys = atc_get_ptp_phys;
+	atc->select_line_in = atc_select_line_in;
+	atc->select_mic_in = atc_select_mic_in;
+	atc->select_digit_io = atc_select_digit_io;
+	atc->line_front_unmute = atc_line_front_unmute;
+	atc->line_surround_unmute = atc_line_surround_unmute;
+	atc->line_clfe_unmute = atc_line_clfe_unmute;
+	atc->line_rear_unmute = atc_line_rear_unmute;
+	atc->line_in_unmute = atc_line_in_unmute;
+	atc->spdif_out_unmute = atc_spdif_out_unmute;
+	atc->spdif_in_unmute = atc_spdif_in_unmute;
+	atc->spdif_out_get_status = atc_spdif_out_get_status;
+	atc->spdif_out_set_status = atc_spdif_out_set_status;
+	atc->spdif_out_passthru = atc_spdif_out_passthru;
+	atc->have_digit_io_switch = atc_have_digit_io_switch;
+}
+
+/**
+ *  ct_atc_create - create and initialize a hardware manager
+ *  @card: corresponding alsa card object
+ *  @pci: corresponding kernel pci device object
+ *  @ratc: return created object address in it
+ *
+ *  Creates and initializes a hardware manager.
+ *
+ *  Creates kmallocated ct_atc structure. Initializes hardware.
+ *  Returns 0 if suceeds, or negative error code if fails.
+ */
+
+int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
+		  unsigned int rsr, unsigned int msr, struct ct_atc **ratc)
+{
+	struct ct_atc *atc = NULL;
+	static struct snd_device_ops ops = {
+		.dev_free = atc_dev_free,
+	};
+	int err = 0;
+
+	*ratc = NULL;
+
+	atc = kzalloc(sizeof(*atc), GFP_KERNEL);
+	if (NULL == atc)
+		return -ENOMEM;
+
+	atc->card = card;
+	atc->pci = pci;
+	atc->rsr = rsr;
+	atc->msr = msr;
+
+	/* Set operations */
+	atc_set_ops(atc);
+
+	spin_lock_init(&atc->atc_lock);
+	spin_lock_init(&atc->vm_lock);
+
+	/* Find card model */
+	err = atc_identify_card(atc);
+	if (err < 0) {
+		printk(KERN_ERR "ctatc: Card not recognised\n");
+		goto error1;
+	}
+
+	/* Set up device virtual memory management object */
+	err = ct_vm_create(&atc->vm);
+	if (err < 0)
+		goto error1;
+
+	/* Create all atc hw devices */
+	err = atc_create_hw_devs(atc);
+	if (err < 0)
+		goto error1;
+
+	/* Get resources */
+	err = atc_get_resources(atc);
+	if (err < 0)
+		goto error1;
+
+	/* Build topology */
+	atc_connect_resources(atc);
+
+	atc->create_alsa_devs = ct_create_alsa_devs;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, atc, &ops);
+	if (err < 0)
+		goto error1;
+
+	snd_card_set_dev(card, &pci->dev);
+
+	*ratc = atc;
+	return 0;
+
+error1:
+	ct_atc_destroy(atc);
+	printk(KERN_ERR "Something wrong!!!\n");
+	return err;
+}
+
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
new file mode 100644
index 0000000..286c993
--- /dev/null
+++ b/sound/pci/ctxfi/ctatc.h
@@ -0,0 +1,155 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctatc.h
+ *
+ * @Brief
+ * This file contains the definition of the device resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	Mar 28 2008
+ *
+ */
+
+#ifndef CTATC_H
+#define CTATC_H
+
+#include <linux/types.h>
+#include <linux/spinlock_types.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+#include <sound/core.h>
+
+#include "ctvmem.h"
+#include "ctresource.h"
+
+enum CTALSADEVS {		/* Types of alsa devices */
+	FRONT,
+	REAR,
+	CLFE,
+	SURROUND,
+	IEC958,
+	MIXER,
+	NUM_CTALSADEVS		/* This should always be the last */
+};
+
+enum CTCARDS {
+	CTSB0760,
+	CTHENDRIX,
+	CTSB08801,
+	CTSB08802,
+	CTSB08803,
+	NUM_CTCARDS		/* This should always be the last */
+};
+
+struct ct_atc_chip_sub_details {
+	u16 subsys;
+	const char *nm_model;
+};
+
+struct ct_atc_chip_details {
+	u16 vendor;
+	u16 device;
+	const struct ct_atc_chip_sub_details *sub_details;
+	const char *nm_card;
+};
+
+struct ct_atc;
+
+/* alsa pcm stream descriptor */
+struct ct_atc_pcm {
+	struct snd_pcm_substream *substream;
+	void (*interrupt)(struct ct_atc_pcm *apcm);
+	unsigned int started:1;
+	unsigned int stop_timer:1;
+	struct timer_list timer;
+	spinlock_t timer_lock;
+	unsigned int position;
+
+	/* Only mono and interleaved modes are supported now. */
+	struct ct_vm_block *vm_block;
+	void *src;		/* SRC for interacting with host memory */
+	void **srccs;		/* SRCs for sample rate conversion */
+	void **srcimps;		/* SRC Input Mappers */
+	void **amixers;		/* AMIXERs for routing converted data */
+	void *mono;		/* A SUM resource for mixing chs to one */
+	unsigned char n_srcc;	/* Number of converting SRCs */
+	unsigned char n_srcimp;	/* Number of SRC Input Mappers */
+	unsigned char n_amixer;	/* Number of AMIXERs */
+};
+
+/* Chip resource management object */
+struct ct_atc {
+	struct pci_dev *pci;
+	struct snd_card *card;
+	unsigned int rsr; /* reference sample rate in Hz */
+	unsigned int msr; /* master sample rate in rsr */
+	unsigned int pll_rate; /* current rate of Phase Lock Loop */
+
+	const struct ct_atc_chip_details *chip_details;
+	enum CTCARDS model;
+	/* Create all alsa devices */
+	int (*create_alsa_devs)(struct ct_atc *atc);
+
+	struct ct_vm *vm; /* device virtual memory manager for this card */
+	int (*map_audio_buffer)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	void (*unmap_audio_buffer)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	unsigned long (*get_ptp_phys)(struct ct_atc *atc, int index);
+
+	spinlock_t atc_lock;
+	spinlock_t vm_lock;
+
+	int (*pcm_playback_prepare)(struct ct_atc *atc,
+				    struct ct_atc_pcm *apcm);
+	int (*pcm_playback_start)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_playback_stop)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_playback_position)(struct ct_atc *atc,
+				     struct ct_atc_pcm *apcm);
+	int (*spdif_passthru_playback_prepare)(struct ct_atc *atc,
+					       struct ct_atc_pcm *apcm);
+	int (*pcm_capture_prepare)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_capture_start)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_capture_stop)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
+	int (*pcm_capture_position)(struct ct_atc *atc,
+				    struct ct_atc_pcm *apcm);
+	int (*pcm_release_resources)(struct ct_atc *atc,
+				     struct ct_atc_pcm *apcm);
+	int (*select_line_in)(struct ct_atc *atc);
+	int (*select_mic_in)(struct ct_atc *atc);
+	int (*select_digit_io)(struct ct_atc *atc);
+	int (*line_front_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_surround_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_clfe_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_rear_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*line_in_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*spdif_out_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*spdif_in_unmute)(struct ct_atc *atc, unsigned char state);
+	int (*spdif_out_get_status)(struct ct_atc *atc, unsigned int *status);
+	int (*spdif_out_set_status)(struct ct_atc *atc, unsigned int status);
+	int (*spdif_out_passthru)(struct ct_atc *atc, unsigned char state);
+	int (*have_digit_io_switch)(struct ct_atc *atc);
+
+	/* Don't touch! Used for internal object. */
+	void *rsc_mgrs[NUM_RSCTYP]; /* chip resource managers */
+	void *mixer;		/* internal mixer object */
+	void *hw;		/* chip specific hardware access object */
+	void **daios;		/* digital audio io resources */
+	void **pcm;		/* SUMs for collecting all pcm stream */
+	void **srcs;		/* Sample Rate Converters for input signal */
+	void **srcimps;		/* input mappers for SRCs */
+	unsigned char n_daio;
+	unsigned char n_src;
+	unsigned char n_srcimp;
+	unsigned char n_pcm;
+};
+
+
+int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
+			    unsigned int rsr, unsigned int msr,
+			    struct ct_atc **ratc);
+
+#endif /* CTATC_H */
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
new file mode 100644
index 0000000..a2aea39
--- /dev/null
+++ b/sound/pci/ctxfi/ctdaio.c
@@ -0,0 +1,769 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctdaio.c
+ *
+ * @Brief
+ * This file contains the implementation of Digital Audio Input Output
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#include "ctdaio.h"
+#include "cthardware.h"
+#include "ctimap.h"
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#define DAIO_RESOURCE_NUM	NUM_DAIOTYP
+#define DAIO_OUT_MAX		SPDIFOO
+
+union daio_usage {
+	struct {
+		unsigned short lineo1:1;
+		unsigned short lineo2:1;
+		unsigned short lineo3:1;
+		unsigned short lineo4:1;
+		unsigned short spdifoo:1;
+		unsigned short lineim:1;
+		unsigned short spdifio:1;
+		unsigned short spdifi1:1;
+	} bf;
+	unsigned short data;
+};
+
+struct daio_rsc_idx {
+	unsigned short left;
+	unsigned short right;
+};
+
+struct daio_rsc_idx idx_20k1[NUM_DAIOTYP] = {
+	[LINEO1] = {.left = 0x00, .right = 0x01},
+	[LINEO2] = {.left = 0x18, .right = 0x19},
+	[LINEO3] = {.left = 0x08, .right = 0x09},
+	[LINEO4] = {.left = 0x10, .right = 0x11},
+	[LINEIM] = {.left = 0x1b5, .right = 0x1bd},
+	[SPDIFOO] = {.left = 0x20, .right = 0x21},
+	[SPDIFIO] = {.left = 0x15, .right = 0x1d},
+	[SPDIFI1] = {.left = 0x95, .right = 0x9d},
+};
+
+struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = {
+	[LINEO1] = {.left = 0x40, .right = 0x41},
+	[LINEO2] = {.left = 0x70, .right = 0x71},
+	[LINEO3] = {.left = 0x50, .right = 0x51},
+	[LINEO4] = {.left = 0x60, .right = 0x61},
+	[LINEIM] = {.left = 0x45, .right = 0xc5},
+	[SPDIFOO] = {.left = 0x00, .right = 0x01},
+	[SPDIFIO] = {.left = 0x05, .right = 0x85},
+};
+
+static int daio_master(struct rsc *rsc)
+{
+	/* Actually, this is not the resource index of DAIO.
+	 * For DAO, it is the input mapper index. And, for DAI,
+	 * it is the output time-slot index. */
+	return rsc->conj = rsc->idx;
+}
+
+static int daio_index(const struct rsc *rsc)
+{
+	return rsc->conj;
+}
+
+static int daio_out_next_conj(struct rsc *rsc)
+{
+	return rsc->conj += 2;
+}
+
+static int daio_in_next_conj_20k1(struct rsc *rsc)
+{
+	return rsc->conj += 0x200;
+}
+
+static int daio_in_next_conj_20k2(struct rsc *rsc)
+{
+	return rsc->conj += 0x100;
+}
+
+static struct rsc_ops daio_out_rsc_ops = {
+	.master		= daio_master,
+	.next_conj	= daio_out_next_conj,
+	.index		= daio_index,
+	.output_slot	= NULL,
+};
+
+static struct rsc_ops daio_in_rsc_ops_20k1 = {
+	.master		= daio_master,
+	.next_conj	= daio_in_next_conj_20k1,
+	.index		= NULL,
+	.output_slot	= daio_index,
+};
+
+static struct rsc_ops daio_in_rsc_ops_20k2 = {
+	.master		= daio_master,
+	.next_conj	= daio_in_next_conj_20k2,
+	.index		= NULL,
+	.output_slot	= daio_index,
+};
+
+static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw)
+{
+	switch (hw->get_chip_type(hw)) {
+	case ATC20K1:
+		switch (type) {
+		case SPDIFOO:	return 0;
+		case SPDIFIO:	return 0;
+		case SPDIFI1:	return 1;
+		case LINEO1:	return 4;
+		case LINEO2:	return 7;
+		case LINEO3:	return 5;
+		case LINEO4:	return 6;
+		case LINEIM:	return 7;
+		default:	return -EINVAL;
+		}
+	case ATC20K2:
+		switch (type) {
+		case SPDIFOO:	return 0;
+		case SPDIFIO:	return 0;
+		case LINEO1:	return 4;
+		case LINEO2:	return 7;
+		case LINEO3:	return 5;
+		case LINEO4:	return 6;
+		case LINEIM:	return 4;
+		default:	return -EINVAL;
+		}
+	default:
+		return -EINVAL;
+	}
+}
+
+static int dao_rsc_reinit(struct dao *dao, const struct dao_desc *desc);
+
+static int dao_spdif_get_spos(struct dao *dao, unsigned int *spos)
+{
+	((struct hw *)dao->hw)->dao_get_spos(dao->ctrl_blk, spos);
+	return 0;
+}
+
+static int dao_spdif_set_spos(struct dao *dao, unsigned int spos)
+{
+	((struct hw *)dao->hw)->dao_set_spos(dao->ctrl_blk, spos);
+	return 0;
+}
+
+static int dao_commit_write(struct dao *dao)
+{
+	((struct hw *)dao->hw)->dao_commit_write(dao->hw,
+		daio_device_index(dao->daio.type, dao->hw), dao->ctrl_blk);
+	return 0;
+}
+
+static int dao_set_left_input(struct dao *dao, struct rsc *input)
+{
+	struct imapper *entry = NULL;
+	struct daio *daio = &dao->daio;
+	int i = 0;
+
+	entry = kzalloc((sizeof(*entry) * daio->rscl.msr), GFP_KERNEL);
+	if (NULL == entry)
+		return -ENOMEM;
+
+	/* Program master and conjugate resources */
+	input->ops->master(input);
+	daio->rscl.ops->master(&daio->rscl);
+	for (i = 0; i < daio->rscl.msr; i++, entry++) {
+		entry->slot = input->ops->output_slot(input);
+		entry->user = entry->addr = daio->rscl.ops->index(&daio->rscl);
+		dao->mgr->imap_add(dao->mgr, entry);
+		dao->imappers[i] = entry;
+
+		input->ops->next_conj(input);
+		daio->rscl.ops->next_conj(&daio->rscl);
+	}
+	input->ops->master(input);
+	daio->rscl.ops->master(&daio->rscl);
+
+	return 0;
+}
+
+static int dao_set_right_input(struct dao *dao, struct rsc *input)
+{
+	struct imapper *entry = NULL;
+	struct daio *daio = &dao->daio;
+	int i = 0;
+
+	entry = kzalloc((sizeof(*entry) * daio->rscr.msr), GFP_KERNEL);
+	if (NULL == entry)
+		return -ENOMEM;
+
+	/* Program master and conjugate resources */
+	input->ops->master(input);
+	daio->rscr.ops->master(&daio->rscr);
+	for (i = 0; i < daio->rscr.msr; i++, entry++) {
+		entry->slot = input->ops->output_slot(input);
+		entry->user = entry->addr = daio->rscr.ops->index(&daio->rscr);
+		dao->mgr->imap_add(dao->mgr, entry);
+		dao->imappers[daio->rscl.msr + i] = entry;
+
+		input->ops->next_conj(input);
+		daio->rscr.ops->next_conj(&daio->rscr);
+	}
+	input->ops->master(input);
+	daio->rscr.ops->master(&daio->rscr);
+
+	return 0;
+}
+
+static int dao_clear_left_input(struct dao *dao)
+{
+	struct imapper *entry = NULL;
+	struct daio *daio = &dao->daio;
+	int i = 0;
+
+	if (NULL == dao->imappers[0])
+		return 0;
+
+	entry = dao->imappers[0];
+	dao->mgr->imap_delete(dao->mgr, entry);
+	/* Program conjugate resources */
+	for (i = 1; i < daio->rscl.msr; i++) {
+		entry = dao->imappers[i];
+		dao->mgr->imap_delete(dao->mgr, entry);
+		dao->imappers[i] = NULL;
+	}
+
+	kfree(dao->imappers[0]);
+	dao->imappers[0] = NULL;
+
+	return 0;
+}
+
+static int dao_clear_right_input(struct dao *dao)
+{
+	struct imapper *entry = NULL;
+	struct daio *daio = &dao->daio;
+	int i = 0;
+
+	if (NULL == dao->imappers[daio->rscl.msr])
+		return 0;
+
+	entry = dao->imappers[daio->rscl.msr];
+	dao->mgr->imap_delete(dao->mgr, entry);
+	/* Program conjugate resources */
+	for (i = 1; i < daio->rscr.msr; i++) {
+		entry = dao->imappers[daio->rscl.msr + i];
+		dao->mgr->imap_delete(dao->mgr, entry);
+		dao->imappers[daio->rscl.msr + i] = NULL;
+	}
+
+	kfree(dao->imappers[daio->rscl.msr]);
+	dao->imappers[daio->rscl.msr] = NULL;
+
+	return 0;
+}
+
+static struct dao_rsc_ops dao_ops = {
+	.set_spos		= dao_spdif_set_spos,
+	.commit_write		= dao_commit_write,
+	.get_spos		= dao_spdif_get_spos,
+	.reinit			= dao_rsc_reinit,
+	.set_left_input		= dao_set_left_input,
+	.set_right_input	= dao_set_right_input,
+	.clear_left_input	= dao_clear_left_input,
+	.clear_right_input	= dao_clear_right_input,
+};
+
+static int dai_set_srt_srcl(struct dai *dai, struct rsc *src)
+{
+	src->ops->master(src);
+	((struct hw *)dai->hw)->dai_srt_set_srcm(dai->ctrl_blk,
+						src->ops->index(src));
+	return 0;
+}
+
+static int dai_set_srt_srcr(struct dai *dai, struct rsc *src)
+{
+	src->ops->master(src);
+	((struct hw *)dai->hw)->dai_srt_set_srco(dai->ctrl_blk,
+						src->ops->index(src));
+	return 0;
+}
+
+static int dai_set_srt_msr(struct dai *dai, unsigned int msr)
+{
+	unsigned int rsr = 0;
+
+	for (rsr = 0; msr > 1; msr >>= 1)
+		rsr++;
+
+	((struct hw *)dai->hw)->dai_srt_set_rsr(dai->ctrl_blk, rsr);
+	return 0;
+}
+
+static int dai_set_enb_src(struct dai *dai, unsigned int enb)
+{
+	((struct hw *)dai->hw)->dai_srt_set_ec(dai->ctrl_blk, enb);
+	return 0;
+}
+
+static int dai_set_enb_srt(struct dai *dai, unsigned int enb)
+{
+	((struct hw *)dai->hw)->dai_srt_set_et(dai->ctrl_blk, enb);
+	return 0;
+}
+
+static int dai_commit_write(struct dai *dai)
+{
+	((struct hw *)dai->hw)->dai_commit_write(dai->hw,
+		daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk);
+	return 0;
+}
+
+static struct dai_rsc_ops dai_ops = {
+	.set_srt_srcl		= dai_set_srt_srcl,
+	.set_srt_srcr		= dai_set_srt_srcr,
+	.set_srt_msr		= dai_set_srt_msr,
+	.set_enb_src		= dai_set_enb_src,
+	.set_enb_srt		= dai_set_enb_srt,
+	.commit_write		= dai_commit_write,
+};
+
+static int daio_rsc_init(struct daio *daio,
+			 const struct daio_desc *desc,
+			 void *hw)
+{
+	int err = 0;
+	unsigned int idx_l = 0, idx_r = 0;
+
+	switch (((struct hw *)hw)->get_chip_type(hw)) {
+	case ATC20K1:
+		idx_l = idx_20k1[desc->type].left;
+		idx_r = idx_20k1[desc->type].right;
+		break;
+	case ATC20K2:
+		idx_l = idx_20k2[desc->type].left;
+		idx_r = idx_20k2[desc->type].right;
+		break;
+	default:
+		return -EINVAL;
+	}
+	err = rsc_init(&daio->rscl, idx_l, DAIO, desc->msr, hw);
+	if (err)
+		return err;
+
+	err = rsc_init(&daio->rscr, idx_r, DAIO, desc->msr, hw);
+	if (err)
+		goto error1;
+
+	/* Set daio->rscl/r->ops to daio specific ones */
+	if (desc->type <= DAIO_OUT_MAX) {
+		daio->rscl.ops = daio->rscr.ops = &daio_out_rsc_ops;
+	} else {
+		switch (((struct hw *)hw)->get_chip_type(hw)) {
+		case ATC20K1:
+			daio->rscl.ops = daio->rscr.ops = &daio_in_rsc_ops_20k1;
+			break;
+		case ATC20K2:
+			daio->rscl.ops = daio->rscr.ops = &daio_in_rsc_ops_20k2;
+			break;
+		default:
+			break;
+		}
+	}
+	daio->type = desc->type;
+
+	return 0;
+
+error1:
+	rsc_uninit(&daio->rscl);
+	return err;
+}
+
+static int daio_rsc_uninit(struct daio *daio)
+{
+	rsc_uninit(&daio->rscl);
+	rsc_uninit(&daio->rscr);
+
+	return 0;
+}
+
+static int dao_rsc_init(struct dao *dao,
+			const struct daio_desc *desc,
+			struct daio_mgr *mgr)
+{
+	struct hw *hw = mgr->mgr.hw;
+	unsigned int conf = 0;
+	int err = 0;
+
+	err = daio_rsc_init(&dao->daio, desc, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	dao->imappers = kzalloc(sizeof(void *)*desc->msr*2, GFP_KERNEL);
+	if (NULL == dao->imappers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	dao->ops = &dao_ops;
+	dao->mgr = mgr;
+	dao->hw = hw;
+	err = hw->dao_get_ctrl_blk(&dao->ctrl_blk);
+	if (err)
+		goto error2;
+
+	hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk,
+			daio_device_index(dao->daio.type, hw));
+	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+
+	conf |= (desc->msr & 0x7) | (desc->passthru << 3);
+	hw->daio_mgr_dao_init(mgr->mgr.ctrl_blk,
+			daio_device_index(dao->daio.type, hw), conf);
+	hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
+			daio_device_index(dao->daio.type, hw));
+	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+
+	return 0;
+
+error2:
+	kfree(dao->imappers);
+	dao->imappers = NULL;
+error1:
+	daio_rsc_uninit(&dao->daio);
+	return err;
+}
+
+static int dao_rsc_uninit(struct dao *dao)
+{
+	if (NULL != dao->imappers) {
+		if (NULL != dao->imappers[0])
+			dao_clear_left_input(dao);
+
+		if (NULL != dao->imappers[dao->daio.rscl.msr])
+			dao_clear_right_input(dao);
+
+		kfree(dao->imappers);
+		dao->imappers = NULL;
+	}
+	((struct hw *)dao->hw)->dao_put_ctrl_blk(dao->ctrl_blk);
+	dao->hw = dao->ctrl_blk = NULL;
+	daio_rsc_uninit(&dao->daio);
+
+	return 0;
+}
+
+static int dao_rsc_reinit(struct dao *dao, const struct dao_desc *desc)
+{
+	struct daio_mgr *mgr = dao->mgr;
+	struct daio_desc dsc = {0};
+
+	dsc.type = dao->daio.type;
+	dsc.msr = desc->msr;
+	dsc.passthru = desc->passthru;
+	dao_rsc_uninit(dao);
+	return dao_rsc_init(dao, &dsc, mgr);
+}
+
+static int dai_rsc_init(struct dai *dai,
+			const struct daio_desc *desc,
+			struct daio_mgr *mgr)
+{
+	int err = 0;
+	struct hw *hw = mgr->mgr.hw;
+	unsigned int rsr = 0, msr = 0;
+
+	err = daio_rsc_init(&dai->daio, desc, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	dai->ops = &dai_ops;
+	dai->hw = mgr->mgr.hw;
+	err = hw->dai_get_ctrl_blk(&dai->ctrl_blk);
+	if (err)
+		goto error1;
+
+	for (rsr = 0, msr = desc->msr; msr > 1; msr >>= 1)
+		rsr++;
+
+	hw->dai_srt_set_rsr(dai->ctrl_blk, rsr);
+	hw->dai_srt_set_drat(dai->ctrl_blk, 0);
+	/* default to disabling control of a SRC */
+	hw->dai_srt_set_ec(dai->ctrl_blk, 0);
+	hw->dai_srt_set_et(dai->ctrl_blk, 0); /* default to disabling SRT */
+	hw->dai_commit_write(hw,
+		daio_device_index(dai->daio.type, dai->hw), dai->ctrl_blk);
+
+	return 0;
+
+error1:
+	daio_rsc_uninit(&dai->daio);
+	return err;
+}
+
+static int dai_rsc_uninit(struct dai *dai)
+{
+	((struct hw *)dai->hw)->dai_put_ctrl_blk(dai->ctrl_blk);
+	dai->hw = dai->ctrl_blk = NULL;
+	daio_rsc_uninit(&dai->daio);
+	return 0;
+}
+
+static int daio_mgr_get_rsc(struct rsc_mgr *mgr, enum DAIOTYP type)
+{
+	if (((union daio_usage *)mgr->rscs)->data & (0x1 << type))
+		return -ENOENT;
+
+	((union daio_usage *)mgr->rscs)->data |= (0x1 << type);
+
+	return 0;
+}
+
+static int daio_mgr_put_rsc(struct rsc_mgr *mgr, enum DAIOTYP type)
+{
+	((union daio_usage *)mgr->rscs)->data &= ~(0x1 << type);
+
+	return 0;
+}
+
+static int get_daio_rsc(struct daio_mgr *mgr,
+			const struct daio_desc *desc,
+			struct daio **rdaio)
+{
+	int err = 0;
+	struct dai *dai = NULL;
+	struct dao *dao = NULL;
+	unsigned long flags;
+
+	*rdaio = NULL;
+
+	/* Check whether there are sufficient daio resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	err = daio_mgr_get_rsc(&mgr->mgr, desc->type);
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "Can't meet DAIO resource request!\n");
+		return err;
+	}
+
+	/* Allocate mem for daio resource */
+	if (desc->type <= DAIO_OUT_MAX) {
+		dao = kzalloc(sizeof(*dao), GFP_KERNEL);
+		if (NULL == dao) {
+			err = -ENOMEM;
+			goto error;
+		}
+		err = dao_rsc_init(dao, desc, mgr);
+		if (err)
+			goto error;
+
+		*rdaio = &dao->daio;
+	} else {
+		dai = kzalloc(sizeof(*dai), GFP_KERNEL);
+		if (NULL == dai) {
+			err = -ENOMEM;
+			goto error;
+		}
+		err = dai_rsc_init(dai, desc, mgr);
+		if (err)
+			goto error;
+
+		*rdaio = &dai->daio;
+	}
+
+	mgr->daio_enable(mgr, *rdaio);
+	mgr->commit_write(mgr);
+
+	return 0;
+
+error:
+	if (NULL != dao)
+		kfree(dao);
+	else if (NULL != dai)
+		kfree(dai);
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	daio_mgr_put_rsc(&mgr->mgr, desc->type);
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	return err;
+}
+
+static int put_daio_rsc(struct daio_mgr *mgr, struct daio *daio)
+{
+	unsigned long flags;
+
+	mgr->daio_disable(mgr, daio);
+	mgr->commit_write(mgr);
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	daio_mgr_put_rsc(&mgr->mgr, daio->type);
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+
+	if (daio->type <= DAIO_OUT_MAX) {
+		dao_rsc_uninit(container_of(daio, struct dao, daio));
+		kfree(container_of(daio, struct dao, daio));
+	} else {
+		dai_rsc_uninit(container_of(daio, struct dai, daio));
+		kfree(container_of(daio, struct dai, daio));
+	}
+
+	return 0;
+}
+
+static int daio_mgr_enb_daio(struct daio_mgr *mgr, struct daio *daio)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	if (DAIO_OUT_MAX >= daio->type) {
+		hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	} else {
+		hw->daio_mgr_enb_dai(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	}
+	return 0;
+}
+
+static int daio_mgr_dsb_daio(struct daio_mgr *mgr, struct daio *daio)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	if (DAIO_OUT_MAX >= daio->type) {
+		hw->daio_mgr_dsb_dao(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	} else {
+		hw->daio_mgr_dsb_dai(mgr->mgr.ctrl_blk,
+				daio_device_index(daio->type, hw));
+	}
+	return 0;
+}
+
+static int daio_map_op(void *data, struct imapper *entry)
+{
+	struct rsc_mgr *mgr = &((struct daio_mgr *)data)->mgr;
+	struct hw *hw = mgr->hw;
+
+	hw->daio_mgr_set_imaparc(mgr->ctrl_blk, entry->slot);
+	hw->daio_mgr_set_imapnxt(mgr->ctrl_blk, entry->next);
+	hw->daio_mgr_set_imapaddr(mgr->ctrl_blk, entry->addr);
+	hw->daio_mgr_commit_write(mgr->hw, mgr->ctrl_blk);
+
+	return 0;
+}
+
+static int daio_imap_add(struct daio_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	if ((0 == entry->addr) && (mgr->init_imap_added)) {
+		input_mapper_delete(&mgr->imappers, mgr->init_imap,
+							daio_map_op, mgr);
+		mgr->init_imap_added = 0;
+	}
+	err = input_mapper_add(&mgr->imappers, entry, daio_map_op, mgr);
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+static int daio_imap_delete(struct daio_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	err = input_mapper_delete(&mgr->imappers, entry, daio_map_op, mgr);
+	if (list_empty(&mgr->imappers)) {
+		input_mapper_add(&mgr->imappers, mgr->init_imap,
+							daio_map_op, mgr);
+		mgr->init_imap_added = 1;
+	}
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+static int daio_mgr_commit_write(struct daio_mgr *mgr)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+	return 0;
+}
+
+int daio_mgr_create(void *hw, struct daio_mgr **rdaio_mgr)
+{
+	int err = 0, i = 0;
+	struct daio_mgr *daio_mgr;
+	struct imapper *entry;
+
+	*rdaio_mgr = NULL;
+	daio_mgr = kzalloc(sizeof(*daio_mgr), GFP_KERNEL);
+	if (NULL == daio_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&daio_mgr->mgr, DAIO, DAIO_RESOURCE_NUM, hw);
+	if (err)
+		goto error1;
+
+	spin_lock_init(&daio_mgr->mgr_lock);
+	spin_lock_init(&daio_mgr->imap_lock);
+	INIT_LIST_HEAD(&daio_mgr->imappers);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (NULL == entry) {
+		err = -ENOMEM;
+		goto error2;
+	}
+	entry->slot = entry->addr = entry->next = entry->user = 0;
+	list_add(&entry->list, &daio_mgr->imappers);
+	daio_mgr->init_imap = entry;
+	daio_mgr->init_imap_added = 1;
+
+	daio_mgr->get_daio = get_daio_rsc;
+	daio_mgr->put_daio = put_daio_rsc;
+	daio_mgr->daio_enable = daio_mgr_enb_daio;
+	daio_mgr->daio_disable = daio_mgr_dsb_daio;
+	daio_mgr->imap_add = daio_imap_add;
+	daio_mgr->imap_delete = daio_imap_delete;
+	daio_mgr->commit_write = daio_mgr_commit_write;
+
+	for (i = 0; i < 8; i++) {
+		((struct hw *)hw)->daio_mgr_dsb_dao(daio_mgr->mgr.ctrl_blk, i);
+		((struct hw *)hw)->daio_mgr_dsb_dai(daio_mgr->mgr.ctrl_blk, i);
+	}
+	((struct hw *)hw)->daio_mgr_commit_write(hw, daio_mgr->mgr.ctrl_blk);
+
+	*rdaio_mgr = daio_mgr;
+
+	return 0;
+
+error2:
+	rsc_mgr_uninit(&daio_mgr->mgr);
+error1:
+	kfree(daio_mgr);
+	return err;
+}
+
+int daio_mgr_destroy(struct daio_mgr *daio_mgr)
+{
+	unsigned long flags;
+
+	/* free daio input mapper list */
+	spin_lock_irqsave(&daio_mgr->imap_lock, flags);
+	free_input_mapper_list(&daio_mgr->imappers);
+	spin_unlock_irqrestore(&daio_mgr->imap_lock, flags);
+
+	rsc_mgr_uninit(&daio_mgr->mgr);
+	kfree(daio_mgr);
+
+	return 0;
+}
+
diff --git a/sound/pci/ctxfi/ctdaio.h b/sound/pci/ctxfi/ctdaio.h
new file mode 100644
index 0000000..0f52ce5
--- /dev/null
+++ b/sound/pci/ctxfi/ctdaio.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctdaio.h
+ *
+ * @Brief
+ * This file contains the definition of Digital Audio Input Output
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#ifndef CTDAIO_H
+#define CTDAIO_H
+
+#include "ctresource.h"
+#include "ctimap.h"
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+/* Define the descriptor of a daio resource */
+enum DAIOTYP {
+	LINEO1,
+	LINEO2,
+	LINEO3,
+	LINEO4,
+	SPDIFOO,	/* S/PDIF Out (Flexijack/Optical) */
+	LINEIM,
+	SPDIFIO,	/* S/PDIF In (Flexijack/Optical) on the card */
+	SPDIFI1,	/* S/PDIF In on internal Drive Bay */
+	NUM_DAIOTYP
+};
+
+struct dao_rsc_ops;
+struct dai_rsc_ops;
+struct daio_mgr;
+
+struct daio {
+	struct rsc rscl;	/* Basic resource info for left TX/RX */
+	struct rsc rscr;	/* Basic resource info for right TX/RX */
+	enum DAIOTYP type;
+};
+
+struct dao {
+	struct daio daio;
+	struct dao_rsc_ops *ops;	/* DAO specific operations */
+	struct imapper **imappers;
+	struct daio_mgr *mgr;
+	void *hw;
+	void *ctrl_blk;
+};
+
+struct dai {
+	struct daio daio;
+	struct dai_rsc_ops *ops;	/* DAI specific operations */
+	void *hw;
+	void *ctrl_blk;
+};
+
+struct dao_desc {
+	unsigned int msr:4;
+	unsigned int passthru:1;
+};
+
+struct dao_rsc_ops {
+	int (*set_spos)(struct dao *dao, unsigned int spos);
+	int (*commit_write)(struct dao *dao);
+	int (*get_spos)(struct dao *dao, unsigned int *spos);
+	int (*reinit)(struct dao *dao, const struct dao_desc *desc);
+	int (*set_left_input)(struct dao *dao, struct rsc *input);
+	int (*set_right_input)(struct dao *dao, struct rsc *input);
+	int (*clear_left_input)(struct dao *dao);
+	int (*clear_right_input)(struct dao *dao);
+};
+
+struct dai_rsc_ops {
+	int (*set_srt_srcl)(struct dai *dai, struct rsc *src);
+	int (*set_srt_srcr)(struct dai *dai, struct rsc *src);
+	int (*set_srt_msr)(struct dai *dai, unsigned int msr);
+	int (*set_enb_src)(struct dai *dai, unsigned int enb);
+	int (*set_enb_srt)(struct dai *dai, unsigned int enb);
+	int (*commit_write)(struct dai *dai);
+};
+
+/* Define daio resource request description info */
+struct daio_desc {
+	unsigned int type:4;
+	unsigned int msr:4;
+	unsigned int passthru:1;
+};
+
+struct daio_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+	spinlock_t imap_lock;
+	struct list_head imappers;
+	struct imapper *init_imap;
+	unsigned int init_imap_added;
+
+	 /* request one daio resource */
+	int (*get_daio)(struct daio_mgr *mgr,
+			const struct daio_desc *desc, struct daio **rdaio);
+	/* return one daio resource */
+	int (*put_daio)(struct daio_mgr *mgr, struct daio *daio);
+	int (*daio_enable)(struct daio_mgr *mgr, struct daio *daio);
+	int (*daio_disable)(struct daio_mgr *mgr, struct daio *daio);
+	int (*imap_add)(struct daio_mgr *mgr, struct imapper *entry);
+	int (*imap_delete)(struct daio_mgr *mgr, struct imapper *entry);
+	int (*commit_write)(struct daio_mgr *mgr);
+};
+
+/* Constructor and destructor of daio resource manager */
+int daio_mgr_create(void *hw, struct daio_mgr **rdaio_mgr);
+int daio_mgr_destroy(struct daio_mgr *daio_mgr);
+
+#endif /* CTDAIO_H */
diff --git a/sound/pci/ctxfi/ctdrv.h b/sound/pci/ctxfi/ctdrv.h
new file mode 100644
index 0000000..f776a44
--- /dev/null
+++ b/sound/pci/ctxfi/ctdrv.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @file    ctdrv.h
+ *
+ * @breaf
+ * This file contains the definition of card IDs supported by this driver.
+ *
+ * @author Liu Chun
+ *
+ */
+
+#ifndef CTDRV_H
+#define CTDRV_H
+
+#define PCI_VENDOR_CREATIVE		0x1102
+#define PCI_DEVICE_CREATIVE_20K1	0x0005
+#define PCI_DEVICE_CREATIVE_20K2	0x000B
+#define PCI_SUBVENDOR_CREATIVE		0x1102
+#define PCI_SUBSYS_CREATIVE_SB0760	0x0024
+#define PCI_SUBSYS_CREATIVE_SB08801	0x0041
+#define PCI_SUBSYS_CREATIVE_SB08802	0x0042
+#define PCI_SUBSYS_CREATIVE_SB08803	0x0043
+#define PCI_SUBSYS_CREATIVE_HENDRIX	0x6000
+
+#endif /* CTDRV_H */
diff --git a/sound/pci/ctxfi/cthardware.c b/sound/pci/ctxfi/cthardware.c
new file mode 100644
index 0000000..8e58860
--- /dev/null
+++ b/sound/pci/ctxfi/cthardware.c
@@ -0,0 +1,108 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthardware.c
+ *
+ * @Brief
+ * This file contains the implementation of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	Jun 26 2008
+ *
+ */
+
+#include "cthardware.h"
+#include "cthw20k1.h"
+#include "cthw20k2.h"
+#include <linux/bug.h>
+
+static enum CHIPTYP get_chip_type(struct hw *hw)
+{
+	enum CHIPTYP type = ATCNONE;
+
+	switch (hw->pci->device) {
+	case 0x0005:	/* 20k1 device */
+		type = ATC20K1;
+		break;
+	case 0x000B:	/* 20k2 device */
+		type = ATC20K2;
+		break;
+	default:
+		type = ATCNONE;
+		break;
+	}
+
+	return type;
+}
+
+int create_hw_obj(struct pci_dev *pci, struct hw **rhw)
+{
+	int err = 0;
+
+	switch (pci->device) {
+	case 0x0005:	/* 20k1 device */
+		err = create_20k1_hw_obj(rhw);
+		break;
+	case 0x000B:	/* 20k2 device */
+		err = create_20k2_hw_obj(rhw);
+		break;
+	default:
+		err = -ENODEV;
+		break;
+	}
+	if (err)
+		return err;
+
+	(*rhw)->pci = pci;
+	(*rhw)->get_chip_type = get_chip_type;
+
+	return 0;
+}
+
+int destroy_hw_obj(struct hw *hw)
+{
+	int err = 0;
+
+	switch (hw->pci->device) {
+	case 0x0005:	/* 20k1 device */
+		err = destroy_20k1_hw_obj(hw);
+		break;
+	case 0x000B:	/* 20k2 device */
+		err = destroy_20k2_hw_obj(hw);
+		break;
+	default:
+		err = -ENODEV;
+		break;
+	}
+
+	return err;
+}
+
+unsigned int get_field(unsigned int data, unsigned int field)
+{
+	int i;
+
+	BUG_ON(!field);
+	/* @field should always be greater than 0 */
+	for (i = 0; !(field & (1 << i)); )
+		i++;
+
+	return (data & field) >> i;
+}
+
+void set_field(unsigned int *data, unsigned int field, unsigned int value)
+{
+	int i;
+
+	BUG_ON(!field);
+	/* @field should always be greater than 0 */
+	for (i = 0; !(field & (1 << i)); )
+		i++;
+
+	*data = (*data & (~field)) | ((value << i) & field);
+}
+
diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h
new file mode 100644
index 0000000..b0512df
--- /dev/null
+++ b/sound/pci/ctxfi/cthardware.h
@@ -0,0 +1,160 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthardware.h
+ *
+ * @Brief
+ * This file contains the definition of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTHARDWARE_H
+#define CTHARDWARE_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+enum CHIPTYP {
+	ATC20K1,
+	ATC20K2,
+	ATCNONE
+};
+
+/* Type of input source for ADC */
+enum ADCSRC{
+	ADC_MICIN,
+	ADC_LINEIN,
+	ADC_VIDEO,
+	ADC_AUX,
+	ADC_NONE	/* Switch to digital input */
+};
+
+struct card_conf {
+	/* device virtual mem page table page physical addr
+	 * (supporting one page table page now) */
+	unsigned long vm_pgt_phys;
+	unsigned int rsr;	/* reference sample rate in Hzs*/
+	unsigned int msr;	/* master sample rate in rsrs */
+};
+
+struct hw {
+	int (*card_init)(struct hw *hw, struct card_conf *info);
+	int (*card_stop)(struct hw *hw);
+	int (*pll_init)(struct hw *hw, unsigned int rsr);
+	enum CHIPTYP (*get_chip_type)(struct hw *hw);
+	int (*is_adc_source_selected)(struct hw *hw, enum ADCSRC source);
+	int (*select_adc_source)(struct hw *hw, enum ADCSRC source);
+	int (*have_digit_io_switch)(struct hw *hw);
+
+	/* SRC operations */
+	int (*src_rsc_get_ctrl_blk)(void **rblk);
+	int (*src_rsc_put_ctrl_blk)(void *blk);
+	int (*src_set_state)(void *blk, unsigned int state);
+	int (*src_set_bm)(void *blk, unsigned int bm);
+	int (*src_set_rsr)(void *blk, unsigned int rsr);
+	int (*src_set_sf)(void *blk, unsigned int sf);
+	int (*src_set_wr)(void *blk, unsigned int wr);
+	int (*src_set_pm)(void *blk, unsigned int pm);
+	int (*src_set_rom)(void *blk, unsigned int rom);
+	int (*src_set_vo)(void *blk, unsigned int vo);
+	int (*src_set_st)(void *blk, unsigned int st);
+	int (*src_set_ie)(void *blk, unsigned int ie);
+	int (*src_set_ilsz)(void *blk, unsigned int ilsz);
+	int (*src_set_bp)(void *blk, unsigned int bp);
+	int (*src_set_cisz)(void *blk, unsigned int cisz);
+	int (*src_set_ca)(void *blk, unsigned int ca);
+	int (*src_set_sa)(void *blk, unsigned int sa);
+	int (*src_set_la)(void *blk, unsigned int la);
+	int (*src_set_pitch)(void *blk, unsigned int pitch);
+	int (*src_set_clear_zbufs)(void *blk, unsigned int clear);
+	int (*src_set_dirty)(void *blk, unsigned int flags);
+	int (*src_set_dirty_all)(void *blk);
+	int (*src_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*src_get_ca)(struct hw *hw, unsigned int idx, void *blk);
+	unsigned int (*src_get_dirty)(void *blk);
+	unsigned int (*src_dirty_conj_mask)(void);
+	int (*src_mgr_get_ctrl_blk)(void **rblk);
+	int (*src_mgr_put_ctrl_blk)(void *blk);
+	/* syncly enable src @idx */
+	int (*src_mgr_enbs_src)(void *blk, unsigned int idx);
+	/* enable src @idx */
+	int (*src_mgr_enb_src)(void *blk, unsigned int idx);
+	/* disable src @idx */
+	int (*src_mgr_dsb_src)(void *blk, unsigned int idx);
+	int (*src_mgr_commit_write)(struct hw *hw, void *blk);
+
+	/* SRC Input Mapper operations */
+	int (*srcimp_mgr_get_ctrl_blk)(void **rblk);
+	int (*srcimp_mgr_put_ctrl_blk)(void *blk);
+	int (*srcimp_mgr_set_imaparc)(void *blk, unsigned int slot);
+	int (*srcimp_mgr_set_imapuser)(void *blk, unsigned int user);
+	int (*srcimp_mgr_set_imapnxt)(void *blk, unsigned int next);
+	int (*srcimp_mgr_set_imapaddr)(void *blk, unsigned int addr);
+	int (*srcimp_mgr_commit_write)(struct hw *hw, void *blk);
+
+	/* AMIXER operations */
+	int (*amixer_rsc_get_ctrl_blk)(void **rblk);
+	int (*amixer_rsc_put_ctrl_blk)(void *blk);
+	int (*amixer_mgr_get_ctrl_blk)(void **rblk);
+	int (*amixer_mgr_put_ctrl_blk)(void *blk);
+	int (*amixer_set_mode)(void *blk, unsigned int mode);
+	int (*amixer_set_iv)(void *blk, unsigned int iv);
+	int (*amixer_set_x)(void *blk, unsigned int x);
+	int (*amixer_set_y)(void *blk, unsigned int y);
+	int (*amixer_set_sadr)(void *blk, unsigned int sadr);
+	int (*amixer_set_se)(void *blk, unsigned int se);
+	int (*amixer_set_dirty)(void *blk, unsigned int flags);
+	int (*amixer_set_dirty_all)(void *blk);
+	int (*amixer_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*amixer_get_y)(void *blk);
+	unsigned int (*amixer_get_dirty)(void *blk);
+
+	/* DAIO operations */
+	int (*dai_get_ctrl_blk)(void **rblk);
+	int (*dai_put_ctrl_blk)(void *blk);
+	int (*dai_srt_set_srco)(void *blk, unsigned int src);
+	int (*dai_srt_set_srcm)(void *blk, unsigned int src);
+	int (*dai_srt_set_rsr)(void *blk, unsigned int rsr);
+	int (*dai_srt_set_drat)(void *blk, unsigned int drat);
+	int (*dai_srt_set_ec)(void *blk, unsigned int ec);
+	int (*dai_srt_set_et)(void *blk, unsigned int et);
+	int (*dai_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*dao_get_ctrl_blk)(void **rblk);
+	int (*dao_put_ctrl_blk)(void *blk);
+	int (*dao_set_spos)(void *blk, unsigned int spos);
+	int (*dao_commit_write)(struct hw *hw, unsigned int idx, void *blk);
+	int (*dao_get_spos)(void *blk, unsigned int *spos);
+
+	int (*daio_mgr_get_ctrl_blk)(struct hw *hw, void **rblk);
+	int (*daio_mgr_put_ctrl_blk)(void *blk);
+	int (*daio_mgr_enb_dai)(void *blk, unsigned int idx);
+	int (*daio_mgr_dsb_dai)(void *blk, unsigned int idx);
+	int (*daio_mgr_enb_dao)(void *blk, unsigned int idx);
+	int (*daio_mgr_dsb_dao)(void *blk, unsigned int idx);
+	int (*daio_mgr_dao_init)(void *blk, unsigned int idx,
+						unsigned int conf);
+	int (*daio_mgr_set_imaparc)(void *blk, unsigned int slot);
+	int (*daio_mgr_set_imapnxt)(void *blk, unsigned int next);
+	int (*daio_mgr_set_imapaddr)(void *blk, unsigned int addr);
+	int (*daio_mgr_commit_write)(struct hw *hw, void *blk);
+
+	struct pci_dev *pci;	/* the pci kernel structure of this card */
+	int irq;
+	unsigned long io_base;
+	unsigned long mem_base;
+};
+
+int create_hw_obj(struct pci_dev *pci, struct hw **rhw);
+int destroy_hw_obj(struct hw *hw);
+
+unsigned int get_field(unsigned int data, unsigned int field);
+void set_field(unsigned int *data, unsigned int field, unsigned int value);
+
+#endif /* CTHARDWARE_H */
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
new file mode 100644
index 0000000..53572d92
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -0,0 +1,2230 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k1.c
+ *
+ * @Brief
+ * This file contains the implementation of hardware access methord for 20k1.
+ *
+ * @Author	Liu Chun
+ * @Date 	Jun 24 2008
+ *
+ */
+
+#include "cthw20k1.h"
+#include "ct20k1reg.h"
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bits */
+
+struct hw20k1 {
+	struct hw hw;
+	spinlock_t reg_20k1_lock;
+	spinlock_t reg_pci_lock;
+};
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg);
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data);
+static u32 hw_read_pci(struct hw *hw, u32 reg);
+static void hw_write_pci(struct hw *hw, u32 reg, u32 data);
+
+/*
+ * Type definition block.
+ * The layout of control structures can be directly applied on 20k2 chip.
+ */
+
+/*
+ * SRC control block definitions.
+ */
+
+/* SRC resource control block */
+#define SRCCTL_STATE	0x00000007
+#define SRCCTL_BM	0x00000008
+#define SRCCTL_RSR	0x00000030
+#define SRCCTL_SF	0x000001C0
+#define SRCCTL_WR	0x00000200
+#define SRCCTL_PM	0x00000400
+#define SRCCTL_ROM	0x00001800
+#define SRCCTL_VO	0x00002000
+#define SRCCTL_ST	0x00004000
+#define SRCCTL_IE	0x00008000
+#define SRCCTL_ILSZ	0x000F0000
+#define SRCCTL_BP	0x00100000
+
+#define SRCCCR_CISZ	0x000007FF
+#define SRCCCR_CWA	0x001FF800
+#define SRCCCR_D	0x00200000
+#define SRCCCR_RS	0x01C00000
+#define SRCCCR_NAL	0x3E000000
+#define SRCCCR_RA	0xC0000000
+
+#define SRCCA_CA	0x03FFFFFF
+#define SRCCA_RS	0x1C000000
+#define SRCCA_NAL	0xE0000000
+
+#define SRCSA_SA	0x03FFFFFF
+
+#define SRCLA_LA	0x03FFFFFF
+
+/* Mixer Parameter Ring ram Low and Hight register.
+ * Fixed-point value in 8.24 format for parameter channel */
+#define MPRLH_PITCH	0xFFFFFFFF
+
+/* SRC resource register dirty flags */
+union src_dirty {
+	struct {
+		u16 ctl:1;
+		u16 ccr:1;
+		u16 sa:1;
+		u16 la:1;
+		u16 ca:1;
+		u16 mpr:1;
+		u16 czbfs:1;	/* Clear Z-Buffers */
+		u16 rsv:9;
+	} bf;
+	u16 data;
+};
+
+struct src_rsc_ctrl_blk {
+	unsigned int	ctl;
+	unsigned int 	ccr;
+	unsigned int	ca;
+	unsigned int	sa;
+	unsigned int	la;
+	unsigned int	mpr;
+	union src_dirty	dirty;
+};
+
+/* SRC manager control block */
+union src_mgr_dirty {
+	struct {
+		u16 enb0:1;
+		u16 enb1:1;
+		u16 enb2:1;
+		u16 enb3:1;
+		u16 enb4:1;
+		u16 enb5:1;
+		u16 enb6:1;
+		u16 enb7:1;
+		u16 enbsa:1;
+		u16 rsv:7;
+	} bf;
+	u16 data;
+};
+
+struct src_mgr_ctrl_blk {
+	unsigned int		enbsa;
+	unsigned int		enb[8];
+	union src_mgr_dirty	dirty;
+};
+
+/* SRCIMP manager control block */
+#define SRCAIM_ARC	0x00000FFF
+#define SRCAIM_NXT	0x00FF0000
+#define SRCAIM_SRC	0xFF000000
+
+struct srcimap {
+	unsigned int srcaim;
+	unsigned int idx;
+};
+
+/* SRCIMP manager register dirty flags */
+union srcimp_mgr_dirty {
+	struct {
+		u16 srcimap:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+struct srcimp_mgr_ctrl_blk {
+	struct srcimap		srcimap;
+	union srcimp_mgr_dirty	dirty;
+};
+
+/*
+ * Function implementation block.
+ */
+
+static int src_get_rsc_ctrl_blk(void **rblk)
+{
+	struct src_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_put_rsc_ctrl_blk(void *blk)
+{
+	kfree((struct src_rsc_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int src_set_state(void *blk, unsigned int state)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_STATE, state);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bm(void *blk, unsigned int bm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BM, bm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rsr(void *blk, unsigned int rsr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_RSR, rsr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_sf(void *blk, unsigned int sf)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_SF, sf);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_wr(void *blk, unsigned int wr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_WR, wr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_pm(void *blk, unsigned int pm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_PM, pm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rom(void *blk, unsigned int rom)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ROM, rom);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_vo(void *blk, unsigned int vo)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_VO, vo);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_st(void *blk, unsigned int st)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ST, st);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ie(void *blk, unsigned int ie)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_IE, ie);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ilsz(void *blk, unsigned int ilsz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ILSZ, ilsz);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bp(void *blk, unsigned int bp)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BP, bp);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_cisz(void *blk, unsigned int cisz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ccr, SRCCCR_CISZ, cisz);
+	ctl->dirty.bf.ccr = 1;
+	return 0;
+}
+
+static int src_set_ca(void *blk, unsigned int ca)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ca, SRCCA_CA, ca);
+	ctl->dirty.bf.ca = 1;
+	return 0;
+}
+
+static int src_set_sa(void *blk, unsigned int sa)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->sa, SRCSA_SA, sa);
+	ctl->dirty.bf.sa = 1;
+	return 0;
+}
+
+static int src_set_la(void *blk, unsigned int la)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->la, SRCLA_LA, la);
+	ctl->dirty.bf.la = 1;
+	return 0;
+}
+
+static int src_set_pitch(void *blk, unsigned int pitch)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->mpr, MPRLH_PITCH, pitch);
+	ctl->dirty.bf.mpr = 1;
+	return 0;
+}
+
+static int src_set_clear_zbufs(void *blk, unsigned int clear)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.bf.czbfs = (clear ? 1 : 0);
+	return 0;
+}
+
+static int src_set_dirty(void *blk, unsigned int flags)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int src_set_dirty_all(void *blk)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+#define AR_SLOT_SIZE		4096
+#define AR_SLOT_BLOCK_SIZE	16
+#define AR_PTS_PITCH		6
+#define AR_PARAM_SRC_OFFSET	0x60
+
+static unsigned int src_param_pitch_mixer(unsigned int src_idx)
+{
+	return ((src_idx << 4) + AR_PTS_PITCH + AR_SLOT_SIZE
+			- AR_PARAM_SRC_OFFSET) % AR_SLOT_SIZE;
+
+}
+
+static int src_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+	int i = 0;
+
+	if (ctl->dirty.bf.czbfs) {
+		/* Clear Z-Buffer registers */
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRCUPZ+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 4; i++)
+			hw_write_20kx(hw, SRCDN0Z+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRCDN1Z+idx*0x100+i*0x4, 0);
+
+		ctl->dirty.bf.czbfs = 0;
+	}
+	if (ctl->dirty.bf.mpr) {
+		/* Take the parameter mixer resource in the same group as that
+		 * the idx src is in for simplicity. Unlike src, all conjugate
+		 * parameter mixer resources must be programmed for
+		 * corresponding conjugate src resources. */
+		unsigned int pm_idx = src_param_pitch_mixer(idx);
+		hw_write_20kx(hw, PRING_LO_HI+4*pm_idx, ctl->mpr);
+		hw_write_20kx(hw, PMOPLO+8*pm_idx, 0x3);
+		hw_write_20kx(hw, PMOPHI+8*pm_idx, 0x0);
+		ctl->dirty.bf.mpr = 0;
+	}
+	if (ctl->dirty.bf.sa) {
+		hw_write_20kx(hw, SRCSA+idx*0x100, ctl->sa);
+		ctl->dirty.bf.sa = 0;
+	}
+	if (ctl->dirty.bf.la) {
+		hw_write_20kx(hw, SRCLA+idx*0x100, ctl->la);
+		ctl->dirty.bf.la = 0;
+	}
+	if (ctl->dirty.bf.ca) {
+		hw_write_20kx(hw, SRCCA+idx*0x100, ctl->ca);
+		ctl->dirty.bf.ca = 0;
+	}
+
+	/* Write srccf register */
+	hw_write_20kx(hw, SRCCF+idx*0x100, 0x0);
+
+	if (ctl->dirty.bf.ccr) {
+		hw_write_20kx(hw, SRCCCR+idx*0x100, ctl->ccr);
+		ctl->dirty.bf.ccr = 0;
+	}
+	if (ctl->dirty.bf.ctl) {
+		hw_write_20kx(hw, SRCCTL+idx*0x100, ctl->ctl);
+		ctl->dirty.bf.ctl = 0;
+	}
+
+	return 0;
+}
+
+static int src_get_ca(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	ctl->ca = hw_read_20kx(hw, SRCCA+idx*0x100);
+	ctl->dirty.bf.ca = 0;
+
+	return get_field(ctl->ca, SRCCA_CA);
+}
+
+static unsigned int src_get_dirty(void *blk)
+{
+	return ((struct src_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static unsigned int src_dirty_conj_mask(void)
+{
+	return 0x20;
+}
+
+static int src_mgr_enbs_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enbsa = ~(0x0);
+	((struct src_mgr_ctrl_blk *)blk)->dirty.bf.enbsa = 1;
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	return 0;
+}
+
+static int src_mgr_enb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_dsb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] &= ~(0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct src_mgr_ctrl_blk *ctl = blk;
+	int i = 0;
+	unsigned int ret = 0;
+
+	if (ctl->dirty.bf.enbsa) {
+		do {
+			ret = hw_read_20kx(hw, SRCENBSTAT);
+		} while (ret & 0x1);
+		hw_write_20kx(hw, SRCENBS, ctl->enbsa);
+		ctl->dirty.bf.enbsa = 0;
+	}
+	for (i = 0; i < 8; i++) {
+		if ((ctl->dirty.data & (0x1 << i))) {
+			hw_write_20kx(hw, SRCENB+(i*0x100), ctl->enb[i]);
+			ctl->dirty.data &= ~(0x1 << i);
+		}
+	}
+
+	return 0;
+}
+
+static int src_mgr_get_ctrl_blk(void **rblk)
+{
+	struct src_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct src_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_get_ctrl_blk(void **rblk)
+{
+	struct srcimp_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int srcimp_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct srcimp_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_ARC, slot);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapuser(void *blk, unsigned int user)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_SRC, user);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_NXT, next);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	ctl->srcimap.idx = addr;
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srcimap) {
+		hw_write_20kx(hw, SRCIMAP+ctl->srcimap.idx*0x100,
+						ctl->srcimap.srcaim);
+		ctl->dirty.bf.srcimap = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * AMIXER control block definitions.
+ */
+
+#define AMOPLO_M	0x00000003
+#define AMOPLO_X	0x0003FFF0
+#define AMOPLO_Y	0xFFFC0000
+
+#define AMOPHI_SADR	0x000000FF
+#define AMOPHI_SE	0x80000000
+
+/* AMIXER resource register dirty flags */
+union amixer_dirty {
+	struct {
+		u16 amoplo:1;
+		u16 amophi:1;
+		u16 rsv:14;
+	} bf;
+	u16 data;
+};
+
+/* AMIXER resource control block */
+struct amixer_rsc_ctrl_blk {
+	unsigned int		amoplo;
+	unsigned int		amophi;
+	union amixer_dirty	dirty;
+};
+
+static int amixer_set_mode(void *blk, unsigned int mode)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_M, mode);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_iv(void *blk, unsigned int iv)
+{
+	/* 20k1 amixer does not have this field */
+	return 0;
+}
+
+static int amixer_set_x(void *blk, unsigned int x)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_X, x);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_y(void *blk, unsigned int y)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_Y, y);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_sadr(void *blk, unsigned int sadr)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SADR, sadr);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_se(void *blk, unsigned int se)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SE, se);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_dirty(void *blk, unsigned int flags)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int amixer_set_dirty_all(void *blk)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+static int amixer_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.amoplo || ctl->dirty.bf.amophi) {
+		hw_write_20kx(hw, AMOPLO+idx*8, ctl->amoplo);
+		ctl->dirty.bf.amoplo = 0;
+		hw_write_20kx(hw, AMOPHI+idx*8, ctl->amophi);
+		ctl->dirty.bf.amophi = 0;
+	}
+
+	return 0;
+}
+
+static int amixer_get_y(void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	return get_field(ctl->amoplo, AMOPLO_Y);
+}
+
+static unsigned int amixer_get_dirty(void *blk)
+{
+	return ((struct amixer_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static int amixer_rsc_get_ctrl_blk(void **rblk)
+{
+	struct amixer_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int amixer_rsc_put_ctrl_blk(void *blk)
+{
+	kfree((struct amixer_rsc_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int amixer_mgr_get_ctrl_blk(void **rblk)
+{
+	/*amixer_mgr_ctrl_blk_t *blk;*/
+
+	*rblk = NULL;
+	/*blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;*/
+
+	return 0;
+}
+
+static int amixer_mgr_put_ctrl_blk(void *blk)
+{
+	/*kfree((amixer_mgr_ctrl_blk_t *)blk);*/
+
+	return 0;
+}
+
+/*
+ * DAIO control block definitions.
+ */
+
+/* Receiver Sample Rate Tracker Control register */
+#define SRTCTL_SRCR	0x000000FF
+#define SRTCTL_SRCL	0x0000FF00
+#define SRTCTL_RSR	0x00030000
+#define SRTCTL_DRAT	0x000C0000
+#define SRTCTL_RLE	0x10000000
+#define SRTCTL_RLP	0x20000000
+#define SRTCTL_EC	0x40000000
+#define SRTCTL_ET	0x80000000
+
+/* DAIO Receiver register dirty flags */
+union dai_dirty {
+	struct {
+		u16 srtctl:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* DAIO Receiver control block */
+struct dai_ctrl_blk {
+	unsigned int	srtctl;
+	union dai_dirty	dirty;
+};
+
+/* S/PDIF Transmitter register dirty flags */
+union dao_dirty {
+	struct {
+		u16 spos:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* S/PDIF Transmitter control block */
+struct dao_ctrl_blk {
+	unsigned int 	spos; /* S/PDIF Output Channel Status Register */
+	union dao_dirty	dirty;
+};
+
+/* Audio Input Mapper RAM */
+#define AIM_ARC		0x00000FFF
+#define AIM_NXT		0x007F0000
+
+struct daoimap {
+	unsigned int aim;
+	unsigned int idx;
+};
+
+/* I2S Transmitter/Receiver Control register */
+#define I2SCTL_EA	0x00000004
+#define I2SCTL_EI	0x00000010
+
+/* S/PDIF Transmitter Control register */
+#define SPOCTL_OE	0x00000001
+#define SPOCTL_OS	0x0000000E
+#define SPOCTL_RIV	0x00000010
+#define SPOCTL_LIV	0x00000020
+#define SPOCTL_SR	0x000000C0
+
+/* S/PDIF Receiver Control register */
+#define SPICTL_EN	0x00000001
+#define SPICTL_I24	0x00000002
+#define SPICTL_IB	0x00000004
+#define SPICTL_SM	0x00000008
+#define SPICTL_VM	0x00000010
+
+/* DAIO manager register dirty flags */
+union daio_mgr_dirty {
+	struct {
+		u32 i2soctl:4;
+		u32 i2sictl:4;
+		u32 spoctl:4;
+		u32 spictl:4;
+		u32 daoimap:1;
+		u32 rsv:15;
+	} bf;
+	u32 data;
+};
+
+/* DAIO manager control block */
+struct daio_mgr_ctrl_blk {
+	unsigned int		i2sctl;
+	unsigned int		spoctl;
+	unsigned int		spictl;
+	struct daoimap		daoimap;
+	union daio_mgr_dirty	dirty;
+};
+
+static int dai_srt_set_srcr(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_SRCR, src);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_srcl(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_SRCL, src);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_rsr(void *blk, unsigned int rsr)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_RSR, rsr);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_drat(void *blk, unsigned int drat)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_DRAT, drat);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_ec(void *blk, unsigned int ec)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_EC, ec ? 1 : 0);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_srt_set_et(void *blk, unsigned int et)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srtctl, SRTCTL_ET, et ? 1 : 0);
+	ctl->dirty.bf.srtctl = 1;
+	return 0;
+}
+
+static int dai_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srtctl) {
+		if (idx < 4) {
+			/* S/PDIF SRTs */
+			hw_write_20kx(hw, SRTSCTL+0x4*idx, ctl->srtctl);
+		} else {
+			/* I2S SRT */
+			hw_write_20kx(hw, SRTICTL, ctl->srtctl);
+		}
+		ctl->dirty.bf.srtctl = 0;
+	}
+
+	return 0;
+}
+
+static int dai_get_ctrl_blk(void **rblk)
+{
+	struct dai_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dai_put_ctrl_blk(void *blk)
+{
+	kfree((struct dai_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int dao_set_spos(void *blk, unsigned int spos)
+{
+	((struct dao_ctrl_blk *)blk)->spos = spos;
+	((struct dao_ctrl_blk *)blk)->dirty.bf.spos = 1;
+	return 0;
+}
+
+static int dao_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dao_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.spos) {
+		if (idx < 4) {
+			/* S/PDIF SPOSx */
+			hw_write_20kx(hw, SPOS+0x4*idx, ctl->spos);
+		}
+		ctl->dirty.bf.spos = 0;
+	}
+
+	return 0;
+}
+
+static int dao_get_spos(void *blk, unsigned int *spos)
+{
+	*spos = ((struct dao_ctrl_blk *)blk)->spos;
+	return 0;
+}
+
+static int dao_get_ctrl_blk(void **rblk)
+{
+	struct dao_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dao_put_ctrl_blk(void *blk)
+{
+	kfree((struct dao_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int daio_mgr_enb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF input */
+		set_field(&ctl->spictl, SPICTL_EN << (idx*8), 1);
+		ctl->dirty.bf.spictl |= (0x1 << idx);
+	} else {
+		/* I2S input */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EI << (idx*8), 1);
+		ctl->dirty.bf.i2sictl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_dsb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF input */
+		set_field(&ctl->spictl, SPICTL_EN << (idx*8), 0);
+		ctl->dirty.bf.spictl |= (0x1 << idx);
+	} else {
+		/* I2S input */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EI << (idx*8), 0);
+		ctl->dirty.bf.i2sictl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_enb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		set_field(&ctl->spoctl, SPOCTL_OE << (idx*8), 1);
+		ctl->dirty.bf.spoctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EA << (idx*8), 1);
+		ctl->dirty.bf.i2soctl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_dsb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		set_field(&ctl->spoctl, SPOCTL_OE << (idx*8), 0);
+		ctl->dirty.bf.spoctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		idx %= 4;
+		set_field(&ctl->i2sctl, I2SCTL_EA << (idx*8), 0);
+		ctl->dirty.bf.i2soctl |= (0x1 << idx);
+	}
+	return 0;
+}
+
+static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		switch ((conf & 0x7)) {
+		case 0:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 3);
+			break; /* CDIF */
+		case 1:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 0);
+			break;
+		case 2:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 1);
+			break;
+		case 4:
+			set_field(&ctl->spoctl, SPOCTL_SR << (idx*8), 2);
+			break;
+		default:
+			break;
+		}
+		set_field(&ctl->spoctl, SPOCTL_LIV << (idx*8),
+			  (conf >> 4) & 0x1); /* Non-audio */
+		set_field(&ctl->spoctl, SPOCTL_RIV << (idx*8),
+			  (conf >> 4) & 0x1); /* Non-audio */
+		set_field(&ctl->spoctl, SPOCTL_OS << (idx*8),
+			  ((conf >> 3) & 0x1) ? 2 : 2); /* Raw */
+
+		ctl->dirty.bf.spoctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		/*idx %= 4; */
+	}
+	return 0;
+}
+
+static int daio_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_ARC, slot);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_NXT, next);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	ctl->daoimap.idx = addr;
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+	int i = 0;
+
+	if (ctl->dirty.bf.i2sictl || ctl->dirty.bf.i2soctl) {
+		for (i = 0; i < 4; i++) {
+			if ((ctl->dirty.bf.i2sictl & (0x1 << i)))
+				ctl->dirty.bf.i2sictl &= ~(0x1 << i);
+
+			if ((ctl->dirty.bf.i2soctl & (0x1 << i)))
+				ctl->dirty.bf.i2soctl &= ~(0x1 << i);
+		}
+		hw_write_20kx(hw, I2SCTL, ctl->i2sctl);
+		mdelay(1);
+	}
+	if (ctl->dirty.bf.spoctl) {
+		for (i = 0; i < 4; i++) {
+			if ((ctl->dirty.bf.spoctl & (0x1 << i)))
+				ctl->dirty.bf.spoctl &= ~(0x1 << i);
+		}
+		hw_write_20kx(hw, SPOCTL, ctl->spoctl);
+		mdelay(1);
+	}
+	if (ctl->dirty.bf.spictl) {
+		for (i = 0; i < 4; i++) {
+			if ((ctl->dirty.bf.spictl & (0x1 << i)))
+				ctl->dirty.bf.spictl &= ~(0x1 << i);
+		}
+		hw_write_20kx(hw, SPICTL, ctl->spictl);
+		mdelay(1);
+	}
+	if (ctl->dirty.bf.daoimap) {
+		hw_write_20kx(hw, DAOIMAP+ctl->daoimap.idx*4,
+					ctl->daoimap.aim);
+		ctl->dirty.bf.daoimap = 0;
+	}
+
+	return 0;
+}
+
+static int daio_mgr_get_ctrl_blk(struct hw *hw, void **rblk)
+{
+	struct daio_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	blk->i2sctl = hw_read_20kx(hw, I2SCTL);
+	blk->spoctl = hw_read_20kx(hw, SPOCTL);
+	blk->spictl = hw_read_20kx(hw, SPICTL);
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int daio_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct daio_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+/* Card hardware initialization block */
+struct dac_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct adc_conf {
+	unsigned int msr; 	/* master sample rate in rsrs */
+	unsigned char input; 	/* the input source of ADC */
+	unsigned char mic20db; 	/* boost mic by 20db if input is microphone */
+};
+
+struct daio_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct trn_conf {
+	unsigned long vm_pgt_phys;
+};
+
+static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
+{
+	u32 i2sorg = 0;
+	u32 spdorg = 0;
+
+	/* Read I2S CTL.  Keep original value. */
+	/*i2sorg = hw_read_20kx(hw, I2SCTL);*/
+	i2sorg = 0x94040404; /* enable all audio out and I2S-D input */
+	/* Program I2S with proper master sample rate and enable
+	 * the correct I2S channel. */
+	i2sorg &= 0xfffffffc;
+
+	/* Enable S/PDIF-out-A in fixed 24-bit data
+	 * format and default to 48kHz. */
+	/* Disable all before doing any changes. */
+	hw_write_20kx(hw, SPOCTL, 0x0);
+	spdorg = 0x05;
+
+	switch (info->msr) {
+	case 1:
+		i2sorg |= 1;
+		spdorg |= (0x0 << 6);
+		break;
+	case 2:
+		i2sorg |= 2;
+		spdorg |= (0x1 << 6);
+		break;
+	case 4:
+		i2sorg |= 3;
+		spdorg |= (0x2 << 6);
+		break;
+	default:
+		i2sorg |= 1;
+		break;
+	}
+
+	hw_write_20kx(hw, I2SCTL, i2sorg);
+	hw_write_20kx(hw, SPOCTL, spdorg);
+
+	/* Enable S/PDIF-in-A in fixed 24-bit data format. */
+	/* Disable all before doing any changes. */
+	hw_write_20kx(hw, SPICTL, 0x0);
+	mdelay(1);
+	spdorg = 0x0a0a0a0a;
+	hw_write_20kx(hw, SPICTL, spdorg);
+	mdelay(1);
+
+	return 0;
+}
+
+/* TRANSPORT operations */
+static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
+{
+	u32 trnctl = 0;
+	unsigned long ptp_phys_low = 0, ptp_phys_high = 0;
+
+	/* Set up device page table */
+	if ((~0UL) == info->vm_pgt_phys) {
+		printk(KERN_ERR "Wrong device page table page address!\n");
+		return -1;
+	}
+
+	trnctl = 0x13;  /* 32-bit, 4k-size page */
+#if BITS_PER_LONG == 64
+	ptp_phys_low = info->vm_pgt_phys & ((1UL<<32)-1);
+	ptp_phys_high = (info->vm_pgt_phys>>32) & ((1UL<<32)-1);
+	trnctl |= (1<<2);
+#elif BITS_PER_LONG == 32
+	ptp_phys_low = info->vm_pgt_phys & (~0UL);
+	ptp_phys_high = 0;
+#else
+#	error "Unknown BITS_PER_LONG!"
+#endif
+#if PAGE_SIZE == 8192
+	trnctl |= (1<<5);
+#endif
+	hw_write_20kx(hw, PTPALX, ptp_phys_low);
+	hw_write_20kx(hw, PTPAHX, ptp_phys_high);
+	hw_write_20kx(hw, TRNCTL, trnctl);
+	hw_write_20kx(hw, TRNIS, 0x200c01); /* realy needed? */
+
+	return 0;
+}
+
+/* Card initialization */
+#define GCTL_EAC	0x00000001
+#define GCTL_EAI	0x00000002
+#define GCTL_BEP	0x00000004
+#define GCTL_BES	0x00000008
+#define GCTL_DSP	0x00000010
+#define GCTL_DBP	0x00000020
+#define GCTL_ABP	0x00000040
+#define GCTL_TBP	0x00000080
+#define GCTL_SBP	0x00000100
+#define GCTL_FBP	0x00000200
+#define GCTL_XA		0x00000400
+#define GCTL_ET		0x00000800
+#define GCTL_PR		0x00001000
+#define GCTL_MRL	0x00002000
+#define GCTL_SDE	0x00004000
+#define GCTL_SDI	0x00008000
+#define GCTL_SM		0x00010000
+#define GCTL_SR		0x00020000
+#define GCTL_SD		0x00040000
+#define GCTL_SE		0x00080000
+#define GCTL_AID	0x00100000
+
+static int hw_pll_init(struct hw *hw, unsigned int rsr)
+{
+	unsigned int pllctl;
+	int i = 0;
+
+	pllctl = (48000 == rsr) ? 0x1480a001 : 0x1480a731;
+	for (i = 0; i < 3; i++) {
+		if (hw_read_20kx(hw, PLLCTL) == pllctl)
+			break;
+
+		hw_write_20kx(hw, PLLCTL, pllctl);
+		mdelay(40);
+	}
+	if (i >= 3) {
+		printk(KERN_ALERT "PLL initialization failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int hw_auto_init(struct hw *hw)
+{
+	unsigned int gctl;
+	int i;
+
+	gctl = hw_read_20kx(hw, GCTL);
+	set_field(&gctl, GCTL_EAI, 0);
+	hw_write_20kx(hw, GCTL, gctl);
+	set_field(&gctl, GCTL_EAI, 1);
+	hw_write_20kx(hw, GCTL, gctl);
+	mdelay(10);
+	for (i = 0; i < 400000; i++) {
+		gctl = hw_read_20kx(hw, GCTL);
+		if (get_field(gctl, GCTL_AID))
+			break;
+	}
+	if (!get_field(gctl, GCTL_AID)) {
+		printk(KERN_ALERT "Card Auto-init failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int i2c_unlock(struct hw *hw)
+{
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		return 0;
+
+	hw_write_pci(hw, 0xcc, 0x8c);
+	hw_write_pci(hw, 0xcc, 0x0e);
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		return 0;
+
+	hw_write_pci(hw, 0xcc, 0xee);
+	hw_write_pci(hw, 0xcc, 0xaa);
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		return 0;
+
+	return -1;
+}
+
+static void i2c_lock(struct hw *hw)
+{
+	if ((hw_read_pci(hw, 0xcc) & 0xff) == 0xaa)
+		hw_write_pci(hw, 0xcc, 0x00);
+}
+
+static void i2c_write(struct hw *hw, u32 device, u32 addr, u32 data)
+{
+	unsigned int ret = 0;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000));
+	hw_write_pci(hw, 0xE0, device);
+	hw_write_pci(hw, 0xE4, (data << 8) | (addr & 0xff));
+}
+
+/* DAC operations */
+
+static int hw_reset_dac(struct hw *hw)
+{
+	u32 i = 0;
+	u16 gpioorg = 0;
+	unsigned int ret = 0;
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000));
+	hw_write_pci(hw, 0xEC, 0x05);  /* write to i2c status control */
+
+	/* To be effective, need to reset the DAC twice. */
+	for (i = 0; i < 2;  i++) {
+		/* set gpio */
+		mdelay(100);
+		gpioorg = (u16)hw_read_20kx(hw, GPIO);
+		gpioorg &= 0xfffd;
+		hw_write_20kx(hw, GPIO, gpioorg);
+		mdelay(1);
+		hw_write_20kx(hw, GPIO, gpioorg | 0x2);
+	}
+
+	i2c_write(hw, 0x00180080, 0x01, 0x80);
+	i2c_write(hw, 0x00180080, 0x02, 0x10);
+
+	i2c_lock(hw);
+
+	return 0;
+}
+
+static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
+{
+	u32 data = 0;
+	u16 gpioorg = 0;
+	u16 subsys_id = 0;
+	unsigned int ret = 0;
+
+	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
+		/* SB055x, unmute outputs */
+		gpioorg = (u16)hw_read_20kx(hw, GPIO);
+		gpioorg &= 0xffbf;	/* set GPIO6 to low */
+		gpioorg |= 2;		/* set GPIO1 to high */
+		hw_write_20kx(hw, GPIO, gpioorg);
+		return 0;
+	}
+
+	/* mute outputs */
+	gpioorg = (u16)hw_read_20kx(hw, GPIO);
+	gpioorg &= 0xffbf;
+	hw_write_20kx(hw, GPIO, gpioorg);
+
+	hw_reset_dac(hw);
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	hw_write_pci(hw, 0xEC, 0x05);  /* write to i2c status control */
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000));
+
+	switch (info->msr) {
+	case 1:
+		data = 0x24;
+		break;
+	case 2:
+		data = 0x25;
+		break;
+	case 4:
+		data = 0x26;
+		break;
+	default:
+		data = 0x24;
+		break;
+	}
+
+	i2c_write(hw, 0x00180080, 0x06, data);
+	i2c_write(hw, 0x00180080, 0x09, data);
+	i2c_write(hw, 0x00180080, 0x0c, data);
+	i2c_write(hw, 0x00180080, 0x0f, data);
+
+	i2c_lock(hw);
+
+	/* unmute outputs */
+	gpioorg = (u16)hw_read_20kx(hw, GPIO);
+	gpioorg = gpioorg | 0x40;
+	hw_write_20kx(hw, GPIO, gpioorg);
+
+	return 0;
+}
+
+/* ADC operations */
+
+static int is_adc_input_selected_SB055x(struct hw *hw, enum ADCSRC type)
+{
+	u32 data = 0;
+	return data;
+}
+
+static int is_adc_input_selected_SBx(struct hw *hw, enum ADCSRC type)
+{
+	u32 data = 0;
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data = ((data & (0x1<<7)) && (data & (0x1<<8)));
+		break;
+	case ADC_LINEIN:
+		data = (!(data & (0x1<<7)) && (data & (0x1<<8)));
+		break;
+	case ADC_NONE: /* Digital I/O */
+		data = (!(data & (0x1<<8)));
+		break;
+	default:
+		data = 0;
+	}
+	return data;
+}
+
+static int is_adc_input_selected_hendrix(struct hw *hw, enum ADCSRC type)
+{
+	u32 data = 0;
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data = (data & (0x1 << 7)) ? 1 : 0;
+		break;
+	case ADC_LINEIN:
+		data = (data & (0x1 << 7)) ? 0 : 1;
+		break;
+	default:
+		data = 0;
+	}
+	return data;
+}
+
+static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
+{
+	u16 subsys_id = 0;
+
+	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
+		/* SB055x cards */
+		return is_adc_input_selected_SB055x(hw, type);
+	} else if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
+		/* SB073x cards */
+		return is_adc_input_selected_hendrix(hw, type);
+	} else if ((subsys_id & 0xf000) == 0x6000) {
+		/* Vista compatible cards */
+		return is_adc_input_selected_hendrix(hw, type);
+	} else {
+		return is_adc_input_selected_SBx(hw, type);
+	}
+}
+
+static int
+adc_input_select_SB055x(struct hw *hw, enum ADCSRC type, unsigned char boost)
+{
+	u32 data = 0;
+
+	/*
+	 * check and set the following GPIO bits accordingly
+	 * ADC_Gain		= GPIO2
+	 * DRM_off		= GPIO3
+	 * Mic_Pwr_on		= GPIO7
+	 * Digital_IO_Sel	= GPIO8
+	 * Mic_Sw		= GPIO9
+	 * Aux/MicLine_Sw	= GPIO12
+	 */
+	data = hw_read_20kx(hw, GPIO);
+	data &= 0xec73;
+	switch (type) {
+	case ADC_MICIN:
+		data |= (0x1<<7) | (0x1<<8) | (0x1<<9) ;
+		data |= boost ? (0x1<<2) : 0;
+		break;
+	case ADC_LINEIN:
+		data |= (0x1<<8);
+		break;
+	case ADC_AUX:
+		data |= (0x1<<8) | (0x1<<12);
+		break;
+	case ADC_NONE:
+		data |= (0x1<<12);  /* set to digital */
+		break;
+	default:
+		return -1;
+	}
+
+	hw_write_20kx(hw, GPIO, data);
+
+	return 0;
+}
+
+
+static int
+adc_input_select_SBx(struct hw *hw, enum ADCSRC type, unsigned char boost)
+{
+	u32 data = 0;
+	u32 i2c_data = 0;
+	unsigned int ret = 0;
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000)); /* i2c ready poll */
+	/* set i2c access mode as Direct Control */
+	hw_write_pci(hw, 0xEC, 0x05);
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data |= ((0x1 << 7) | (0x1 << 8));
+		i2c_data = 0x1;  /* Mic-in */
+		break;
+	case ADC_LINEIN:
+		data &= ~(0x1 << 7);
+		data |= (0x1 << 8);
+		i2c_data = 0x2; /* Line-in */
+		break;
+	case ADC_NONE:
+		data &= ~(0x1 << 8);
+		i2c_data = 0x0; /* set to Digital */
+		break;
+	default:
+		i2c_lock(hw);
+		return -1;
+	}
+	hw_write_20kx(hw, GPIO, data);
+	i2c_write(hw, 0x001a0080, 0x2a, i2c_data);
+	if (boost) {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xe7); /* +12dB boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xe7); /* +12dB boost */
+	} else {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xcf); /* No boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xcf); /* No boost */
+	}
+
+	i2c_lock(hw);
+
+	return 0;
+}
+
+static int
+adc_input_select_hendrix(struct hw *hw, enum ADCSRC type, unsigned char boost)
+{
+	u32 data = 0;
+	u32 i2c_data = 0;
+	unsigned int ret = 0;
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000)); /* i2c ready poll */
+	/* set i2c access mode as Direct Control */
+	hw_write_pci(hw, 0xEC, 0x05);
+
+	data = hw_read_20kx(hw, GPIO);
+	switch (type) {
+	case ADC_MICIN:
+		data |= (0x1 << 7);
+		i2c_data = 0x1;  /* Mic-in */
+		break;
+	case ADC_LINEIN:
+		data &= ~(0x1 << 7);
+		i2c_data = 0x2; /* Line-in */
+		break;
+	default:
+		i2c_lock(hw);
+		return -1;
+	}
+	hw_write_20kx(hw, GPIO, data);
+	i2c_write(hw, 0x001a0080, 0x2a, i2c_data);
+	if (boost) {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xe7); /* +12dB boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xe7); /* +12dB boost */
+	} else {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xcf); /* No boost */
+		i2c_write(hw, 0x001a0080, 0x1e, 0xcf); /* No boost */
+	}
+
+	i2c_lock(hw);
+
+	return 0;
+}
+
+static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
+{
+	u16 subsys_id = 0;
+
+	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
+		/* SB055x cards */
+		return adc_input_select_SB055x(hw, type, (ADC_MICIN == type));
+	} else if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
+		/* SB073x cards */
+		return adc_input_select_hendrix(hw, type, (ADC_MICIN == type));
+	} else if ((subsys_id & 0xf000) == 0x6000) {
+		/* Vista compatible cards */
+		return adc_input_select_hendrix(hw, type, (ADC_MICIN == type));
+	} else {
+		return adc_input_select_SBx(hw, type, (ADC_MICIN == type));
+	}
+}
+
+static int adc_init_SB055x(struct hw *hw, int input, int mic20db)
+{
+	return adc_input_select_SB055x(hw, input, mic20db);
+}
+
+static int adc_init_SBx(struct hw *hw, int input, int mic20db)
+{
+	u16 gpioorg;
+	u16 input_source;
+	u32 adcdata = 0;
+	unsigned int ret = 0;
+
+	input_source = 0x100;  /* default to analog */
+	switch (input) {
+	case ADC_MICIN:
+		adcdata = 0x1;
+		input_source = 0x180;  /* set GPIO7 to select Mic */
+		break;
+	case ADC_LINEIN:
+		adcdata = 0x2;
+		break;
+	case ADC_VIDEO:
+		adcdata = 0x4;
+		break;
+	case ADC_AUX:
+		adcdata = 0x8;
+		break;
+	case ADC_NONE:
+		adcdata = 0x0;
+		input_source = 0x0;  /* set to Digital */
+		break;
+	default:
+		break;
+	}
+
+	if (i2c_unlock(hw))
+		return -1;
+
+	do {
+		ret = hw_read_pci(hw, 0xEC);
+	} while (!(ret & 0x800000)); /* i2c ready poll */
+	hw_write_pci(hw, 0xEC, 0x05);  /* write to i2c status control */
+
+	i2c_write(hw, 0x001a0080, 0x0e, 0x08);
+	i2c_write(hw, 0x001a0080, 0x18, 0x0a);
+	i2c_write(hw, 0x001a0080, 0x28, 0x86);
+	i2c_write(hw, 0x001a0080, 0x2a, adcdata);
+
+	if (mic20db) {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xf7);
+		i2c_write(hw, 0x001a0080, 0x1e, 0xf7);
+	} else {
+		i2c_write(hw, 0x001a0080, 0x1c, 0xcf);
+		i2c_write(hw, 0x001a0080, 0x1e, 0xcf);
+	}
+
+	if (!(hw_read_20kx(hw, ID0) & 0x100))
+		i2c_write(hw, 0x001a0080, 0x16, 0x26);
+
+	i2c_lock(hw);
+
+	gpioorg = (u16)hw_read_20kx(hw,  GPIO);
+	gpioorg &= 0xfe7f;
+	gpioorg |= input_source;
+	hw_write_20kx(hw, GPIO, gpioorg);
+
+	return 0;
+}
+
+static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
+{
+	int err = 0;
+	u16 subsys_id = 0;
+
+	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
+		/* Sb055x card */
+		err = adc_init_SB055x(hw, info->input, info->mic20db);
+	} else {
+		err = adc_init_SBx(hw, info->input, info->mic20db);
+	}
+
+	return err;
+}
+
+static int hw_have_digit_io_switch(struct hw *hw)
+{
+	u16 subsys_id = 0;
+
+	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	/* SB073x and Vista compatible cards have no digit IO switch */
+	return !((subsys_id == 0x0029) || (subsys_id == 0x0031)
+				|| ((subsys_id & 0xf000) == 0x6000));
+}
+
+#define UAA_CFG_PWRSTATUS	0x44
+#define UAA_CFG_SPACE_FLAG	0xA0
+#define UAA_CORE_CHANGE		0x3FFC
+static int uaa_to_xfi(struct pci_dev *pci)
+{
+	unsigned int bar0, bar1, bar2, bar3, bar4, bar5;
+	unsigned int cmd, irq, cl_size, l_timer, pwr;
+	unsigned int CTLA, CTLZ, CTLL, CTLX, CTL_, CTLF, CTLi;
+	unsigned int is_uaa = 0;
+	unsigned int data[4] = {0};
+	unsigned int io_base;
+	void *mem_base;
+	int i = 0;
+
+	/* By default, Hendrix card UAA Bar0 should be using memory... */
+	io_base = pci_resource_start(pci, 0);
+	mem_base = ioremap(io_base, pci_resource_len(pci, 0));
+	if (NULL == mem_base)
+		return -ENOENT;
+
+	CTLX = ___constant_swab32(*((unsigned int *)"CTLX"));
+	CTL_ = ___constant_swab32(*((unsigned int *)"CTL-"));
+	CTLF = ___constant_swab32(*((unsigned int *)"CTLF"));
+	CTLi = ___constant_swab32(*((unsigned int *)"CTLi"));
+	CTLA = ___constant_swab32(*((unsigned int *)"CTLA"));
+	CTLZ = ___constant_swab32(*((unsigned int *)"CTLZ"));
+	CTLL = ___constant_swab32(*((unsigned int *)"CTLL"));
+
+	/* Read current mode from Mode Change Register */
+	for (i = 0; i < 4; i++)
+		data[i] = readl(mem_base + UAA_CORE_CHANGE);
+
+	/* Determine current mode... */
+	if (data[0] == CTLA) {
+		is_uaa = ((data[1] == CTLZ && data[2] == CTLL
+			  && data[3] == CTLA) || (data[1] == CTLA
+			  && data[2] == CTLZ && data[3] == CTLL));
+	} else if (data[0] == CTLZ) {
+		is_uaa = (data[1] == CTLL
+				&& data[2] == CTLA && data[3] == CTLA);
+	} else if (data[0] == CTLL) {
+		is_uaa = (data[1] == CTLA
+				&& data[2] == CTLA && data[3] == CTLZ);
+	} else {
+		is_uaa = 0;
+	}
+
+	if (!is_uaa) {
+		/* Not in UAA mode currently. Return directly. */
+		iounmap(mem_base);
+		return 0;
+	}
+
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_0, &bar0);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_1, &bar1);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_2, &bar2);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_3, &bar3);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_4, &bar4);
+	pci_read_config_dword(pci, PCI_BASE_ADDRESS_5, &bar5);
+	pci_read_config_dword(pci, PCI_INTERRUPT_LINE, &irq);
+	pci_read_config_dword(pci, PCI_CACHE_LINE_SIZE, &cl_size);
+	pci_read_config_dword(pci, PCI_LATENCY_TIMER, &l_timer);
+	pci_read_config_dword(pci, UAA_CFG_PWRSTATUS, &pwr);
+	pci_read_config_dword(pci, PCI_COMMAND, &cmd);
+
+	/* Set up X-Fi core PCI configuration space. */
+	/* Switch to X-Fi config space with BAR0 exposed. */
+	pci_write_config_dword(pci, UAA_CFG_SPACE_FLAG, 0x87654321);
+	/* Copy UAA's BAR5 into X-Fi BAR0 */
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_0, bar5);
+	/* Switch to X-Fi config space without BAR0 exposed. */
+	pci_write_config_dword(pci, UAA_CFG_SPACE_FLAG, 0x12345678);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_1, bar1);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_2, bar2);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_3, bar3);
+	pci_write_config_dword(pci, PCI_BASE_ADDRESS_4, bar4);
+	pci_write_config_dword(pci, PCI_INTERRUPT_LINE, irq);
+	pci_write_config_dword(pci, PCI_CACHE_LINE_SIZE, cl_size);
+	pci_write_config_dword(pci, PCI_LATENCY_TIMER, l_timer);
+	pci_write_config_dword(pci, UAA_CFG_PWRSTATUS, pwr);
+	pci_write_config_dword(pci, PCI_COMMAND, cmd);
+
+	/* Switch to X-Fi mode */
+	writel(CTLX, (mem_base + UAA_CORE_CHANGE));
+	writel(CTL_, (mem_base + UAA_CORE_CHANGE));
+	writel(CTLF, (mem_base + UAA_CORE_CHANGE));
+	writel(CTLi, (mem_base + UAA_CORE_CHANGE));
+
+	iounmap(mem_base);
+
+	return 0;
+}
+
+static int hw_card_start(struct hw *hw)
+{
+	int err = 0;
+	struct pci_dev *pci = hw->pci;
+	u16 subsys_id = 0;
+	unsigned int dma_mask = 0;
+
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	/* Set DMA transfer mask */
+	dma_mask = CT_XFI_DMA_MASK;
+	if (pci_set_dma_mask(pci, dma_mask) < 0 ||
+	    pci_set_consistent_dma_mask(pci, dma_mask) < 0) {
+		printk(KERN_ERR "architecture does not support PCI "
+				"busmaster DMA with mask 0x%x\n", dma_mask);
+		err = -ENXIO;
+		goto error1;
+	}
+
+	err = pci_request_regions(pci, "XFi");
+	if (err < 0)
+		goto error1;
+
+	/* Switch to X-Fi mode from UAA mode if neeeded */
+	pci_read_config_word(pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	if ((0x5 == pci->device) && (0x6000 == (subsys_id & 0x6000))) {
+		err = uaa_to_xfi(pci);
+		if (err)
+			goto error2;
+
+		hw->io_base = pci_resource_start(pci, 5);
+	} else {
+		hw->io_base = pci_resource_start(pci, 0);
+	}
+
+	/*if ((err = request_irq(pci->irq, ct_atc_interrupt, IRQF_SHARED,
+				atc->chip_details->nm_card, hw))) {
+		goto error2;
+	}
+	hw->irq = pci->irq;
+	*/
+
+	pci_set_master(pci);
+
+	return 0;
+
+error2:
+	pci_release_regions(pci);
+	hw->io_base = 0;
+error1:
+	pci_disable_device(pci);
+	return err;
+}
+
+static int hw_card_stop(struct hw *hw)
+{
+	/* TODO: Disable interrupt and so on... */
+	return 0;
+}
+
+static int hw_card_shutdown(struct hw *hw)
+{
+	if (hw->irq >= 0)
+		free_irq(hw->irq, hw);
+
+	hw->irq	= -1;
+
+	if (NULL != ((void *)hw->mem_base))
+		iounmap((void *)hw->mem_base);
+
+	hw->mem_base = (unsigned long)NULL;
+
+	if (hw->io_base)
+		pci_release_regions(hw->pci);
+
+	hw->io_base = 0;
+
+	pci_disable_device(hw->pci);
+
+	return 0;
+}
+
+static int hw_card_init(struct hw *hw, struct card_conf *info)
+{
+	int err;
+	unsigned int gctl;
+	u16 subsys_id = 0;
+	u32 data = 0;
+	struct dac_conf dac_info = {0};
+	struct adc_conf adc_info = {0};
+	struct daio_conf daio_info = {0};
+	struct trn_conf trn_info = {0};
+
+	/* Get PCI io port base address and do Hendrix switch if needed. */
+	if (!hw->io_base) {
+		err = hw_card_start(hw);
+		if (err)
+			return err;
+	}
+
+	/* PLL init */
+	err = hw_pll_init(hw, info->rsr);
+	if (err < 0)
+		return err;
+
+	/* kick off auto-init */
+	err = hw_auto_init(hw);
+	if (err < 0)
+		return err;
+
+	/* Enable audio ring */
+	gctl = hw_read_20kx(hw, GCTL);
+	set_field(&gctl, GCTL_EAC, 1);
+	set_field(&gctl, GCTL_DBP, 1);
+	set_field(&gctl, GCTL_TBP, 1);
+	set_field(&gctl, GCTL_FBP, 1);
+	set_field(&gctl, GCTL_ET, 1);
+	hw_write_20kx(hw, GCTL, gctl);
+	mdelay(10);
+
+	/* Reset all global pending interrupts */
+	hw_write_20kx(hw, GIE, 0);
+	/* Reset all SRC pending interrupts */
+	hw_write_20kx(hw, SRCIP, 0);
+	mdelay(30);
+
+	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	/* Detect the card ID and configure GPIO accordingly. */
+	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
+		/* SB055x cards */
+		hw_write_20kx(hw, GPIOCTL, 0x13fe);
+	} else if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
+		/* SB073x cards */
+		hw_write_20kx(hw, GPIOCTL, 0x00e6);
+	} else if ((subsys_id & 0xf000) == 0x6000) {
+		/* Vista compatible cards */
+		hw_write_20kx(hw, GPIOCTL, 0x00c2);
+	} else {
+		hw_write_20kx(hw, GPIOCTL, 0x01e6);
+	}
+
+	trn_info.vm_pgt_phys = info->vm_pgt_phys;
+	err = hw_trn_init(hw, &trn_info);
+	if (err < 0)
+		return err;
+
+	daio_info.msr = info->msr;
+	err = hw_daio_init(hw, &daio_info);
+	if (err < 0)
+		return err;
+
+	dac_info.msr = info->msr;
+	err = hw_dac_init(hw, &dac_info);
+	if (err < 0)
+		return err;
+
+	adc_info.msr = info->msr;
+	adc_info.input = ADC_LINEIN;
+	adc_info.mic20db = 0;
+	err = hw_adc_init(hw, &adc_info);
+	if (err < 0)
+		return err;
+
+	data = hw_read_20kx(hw, SRCMCTL);
+	data |= 0x1; /* Enables input from the audio ring */
+	hw_write_20kx(hw, SRCMCTL, data);
+
+	return 0;
+}
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg)
+{
+	u32 value;
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+	outl(reg, hw->io_base + 0x0);
+	value = inl(hw->io_base + 0x4);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+
+	return value;
+}
+
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+	outl(reg, hw->io_base + 0x0);
+	outl(data, hw->io_base + 0x4);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_20k1_lock, flags);
+
+}
+
+static u32 hw_read_pci(struct hw *hw, u32 reg)
+{
+	u32 value;
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+	outl(reg, hw->io_base + 0x10);
+	value = inl(hw->io_base + 0x14);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+
+	return value;
+}
+
+static void hw_write_pci(struct hw *hw, u32 reg, u32 data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+	outl(reg, hw->io_base + 0x10);
+	outl(data, hw->io_base + 0x14);
+	spin_unlock_irqrestore(
+		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
+}
+
+int create_20k1_hw_obj(struct hw **rhw)
+{
+	struct hw *hw;
+	struct hw20k1 *hw20k1;
+
+	*rhw = NULL;
+	hw20k1 = kzalloc(sizeof(*hw20k1), GFP_KERNEL);
+	if (NULL == hw20k1)
+		return -ENOMEM;
+
+	spin_lock_init(&hw20k1->reg_20k1_lock);
+	spin_lock_init(&hw20k1->reg_pci_lock);
+
+	hw = &hw20k1->hw;
+
+	hw->io_base = 0;
+	hw->mem_base = (unsigned long)NULL;
+	hw->irq = -1;
+
+	hw->card_init = hw_card_init;
+	hw->card_stop = hw_card_stop;
+	hw->pll_init = hw_pll_init;
+	hw->is_adc_source_selected = hw_is_adc_input_selected;
+	hw->select_adc_source = hw_adc_input_select;
+	hw->have_digit_io_switch = hw_have_digit_io_switch;
+
+	hw->src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk;
+	hw->src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk;
+	hw->src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk;
+	hw->src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk;
+	hw->src_set_state = src_set_state;
+	hw->src_set_bm = src_set_bm;
+	hw->src_set_rsr = src_set_rsr;
+	hw->src_set_sf = src_set_sf;
+	hw->src_set_wr = src_set_wr;
+	hw->src_set_pm = src_set_pm;
+	hw->src_set_rom = src_set_rom;
+	hw->src_set_vo = src_set_vo;
+	hw->src_set_st = src_set_st;
+	hw->src_set_ie = src_set_ie;
+	hw->src_set_ilsz = src_set_ilsz;
+	hw->src_set_bp = src_set_bp;
+	hw->src_set_cisz = src_set_cisz;
+	hw->src_set_ca = src_set_ca;
+	hw->src_set_sa = src_set_sa;
+	hw->src_set_la = src_set_la;
+	hw->src_set_pitch = src_set_pitch;
+	hw->src_set_dirty = src_set_dirty;
+	hw->src_set_clear_zbufs = src_set_clear_zbufs;
+	hw->src_set_dirty_all = src_set_dirty_all;
+	hw->src_commit_write = src_commit_write;
+	hw->src_get_ca = src_get_ca;
+	hw->src_get_dirty = src_get_dirty;
+	hw->src_dirty_conj_mask = src_dirty_conj_mask;
+	hw->src_mgr_enbs_src = src_mgr_enbs_src;
+	hw->src_mgr_enb_src = src_mgr_enb_src;
+	hw->src_mgr_dsb_src = src_mgr_dsb_src;
+	hw->src_mgr_commit_write = src_mgr_commit_write;
+
+	hw->srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk;
+	hw->srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk;
+	hw->srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc;
+	hw->srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser;
+	hw->srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt;
+	hw->srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr;
+	hw->srcimp_mgr_commit_write = srcimp_mgr_commit_write;
+
+	hw->amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk;
+	hw->amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk;
+	hw->amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk;
+	hw->amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk;
+	hw->amixer_set_mode = amixer_set_mode;
+	hw->amixer_set_iv = amixer_set_iv;
+	hw->amixer_set_x = amixer_set_x;
+	hw->amixer_set_y = amixer_set_y;
+	hw->amixer_set_sadr = amixer_set_sadr;
+	hw->amixer_set_se = amixer_set_se;
+	hw->amixer_set_dirty = amixer_set_dirty;
+	hw->amixer_set_dirty_all = amixer_set_dirty_all;
+	hw->amixer_commit_write = amixer_commit_write;
+	hw->amixer_get_y = amixer_get_y;
+	hw->amixer_get_dirty = amixer_get_dirty;
+
+	hw->dai_get_ctrl_blk = dai_get_ctrl_blk;
+	hw->dai_put_ctrl_blk = dai_put_ctrl_blk;
+	hw->dai_srt_set_srco = dai_srt_set_srcr;
+	hw->dai_srt_set_srcm = dai_srt_set_srcl;
+	hw->dai_srt_set_rsr = dai_srt_set_rsr;
+	hw->dai_srt_set_drat = dai_srt_set_drat;
+	hw->dai_srt_set_ec = dai_srt_set_ec;
+	hw->dai_srt_set_et = dai_srt_set_et;
+	hw->dai_commit_write = dai_commit_write;
+
+	hw->dao_get_ctrl_blk = dao_get_ctrl_blk;
+	hw->dao_put_ctrl_blk = dao_put_ctrl_blk;
+	hw->dao_set_spos = dao_set_spos;
+	hw->dao_commit_write = dao_commit_write;
+	hw->dao_get_spos = dao_get_spos;
+
+	hw->daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk;
+	hw->daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk;
+	hw->daio_mgr_enb_dai = daio_mgr_enb_dai;
+	hw->daio_mgr_dsb_dai = daio_mgr_dsb_dai;
+	hw->daio_mgr_enb_dao = daio_mgr_enb_dao;
+	hw->daio_mgr_dsb_dao = daio_mgr_dsb_dao;
+	hw->daio_mgr_dao_init = daio_mgr_dao_init;
+	hw->daio_mgr_set_imaparc = daio_mgr_set_imaparc;
+	hw->daio_mgr_set_imapnxt = daio_mgr_set_imapnxt;
+	hw->daio_mgr_set_imapaddr = daio_mgr_set_imapaddr;
+	hw->daio_mgr_commit_write = daio_mgr_commit_write;
+
+	*rhw = hw;
+
+	return 0;
+}
+
+int destroy_20k1_hw_obj(struct hw *hw)
+{
+	if (hw->io_base)
+		hw_card_shutdown(hw);
+
+	kfree(container_of(hw, struct hw20k1, hw));
+	return 0;
+}
diff --git a/sound/pci/ctxfi/cthw20k1.h b/sound/pci/ctxfi/cthw20k1.h
new file mode 100644
index 0000000..02f72fb
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k1.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k1.h
+ *
+ * @Brief
+ * This file contains the definition of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTHW20K1_H
+#define CTHW20K1_H
+
+#include "cthardware.h"
+
+int create_20k1_hw_obj(struct hw **rhw);
+int destroy_20k1_hw_obj(struct hw *hw);
+
+#endif /* CTHW20K1_H */
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
new file mode 100644
index 0000000..cdcb75c
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -0,0 +1,2133 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k2.c
+ *
+ * @Brief
+ * This file contains the implementation of hardware access methord for 20k2.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 14 2008
+ *
+ */
+
+#include "cthw20k2.h"
+#include "ct20k2reg.h"
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bits */
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg);
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data);
+
+/*
+ * Type definition block.
+ * The layout of control structures can be directly applied on 20k2 chip.
+ */
+
+/*
+ * SRC control block definitions.
+ */
+
+/* SRC resource control block */
+#define SRCCTL_STATE	0x00000007
+#define SRCCTL_BM	0x00000008
+#define SRCCTL_RSR	0x00000030
+#define SRCCTL_SF	0x000001C0
+#define SRCCTL_WR	0x00000200
+#define SRCCTL_PM	0x00000400
+#define SRCCTL_ROM	0x00001800
+#define SRCCTL_VO	0x00002000
+#define SRCCTL_ST	0x00004000
+#define SRCCTL_IE	0x00008000
+#define SRCCTL_ILSZ	0x000F0000
+#define SRCCTL_BP	0x00100000
+
+#define SRCCCR_CISZ	0x000007FF
+#define SRCCCR_CWA	0x001FF800
+#define SRCCCR_D	0x00200000
+#define SRCCCR_RS	0x01C00000
+#define SRCCCR_NAL	0x3E000000
+#define SRCCCR_RA	0xC0000000
+
+#define SRCCA_CA	0x0FFFFFFF
+#define SRCCA_RS	0xE0000000
+
+#define SRCSA_SA	0x0FFFFFFF
+
+#define SRCLA_LA	0x0FFFFFFF
+
+/* Mixer Parameter Ring ram Low and Hight register.
+ * Fixed-point value in 8.24 format for parameter channel */
+#define MPRLH_PITCH	0xFFFFFFFF
+
+/* SRC resource register dirty flags */
+union src_dirty {
+	struct {
+		u16 ctl:1;
+		u16 ccr:1;
+		u16 sa:1;
+		u16 la:1;
+		u16 ca:1;
+		u16 mpr:1;
+		u16 czbfs:1;	/* Clear Z-Buffers */
+		u16 rsv:9;
+	} bf;
+	u16 data;
+};
+
+struct src_rsc_ctrl_blk {
+	unsigned int	ctl;
+	unsigned int 	ccr;
+	unsigned int	ca;
+	unsigned int	sa;
+	unsigned int	la;
+	unsigned int	mpr;
+	union src_dirty	dirty;
+};
+
+/* SRC manager control block */
+union src_mgr_dirty {
+	struct {
+		u16 enb0:1;
+		u16 enb1:1;
+		u16 enb2:1;
+		u16 enb3:1;
+		u16 enb4:1;
+		u16 enb5:1;
+		u16 enb6:1;
+		u16 enb7:1;
+		u16 enbsa:1;
+		u16 rsv:7;
+	} bf;
+	u16 data;
+};
+
+struct src_mgr_ctrl_blk {
+	unsigned int		enbsa;
+	unsigned int		enb[8];
+	union src_mgr_dirty	dirty;
+};
+
+/* SRCIMP manager control block */
+#define SRCAIM_ARC	0x00000FFF
+#define SRCAIM_NXT	0x00FF0000
+#define SRCAIM_SRC	0xFF000000
+
+struct srcimap {
+	unsigned int srcaim;
+	unsigned int idx;
+};
+
+/* SRCIMP manager register dirty flags */
+union srcimp_mgr_dirty {
+	struct {
+		u16 srcimap:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+struct srcimp_mgr_ctrl_blk {
+	struct srcimap		srcimap;
+	union srcimp_mgr_dirty	dirty;
+};
+
+/*
+ * Function implementation block.
+ */
+
+static int src_get_rsc_ctrl_blk(void **rblk)
+{
+	struct src_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_put_rsc_ctrl_blk(void *blk)
+{
+	kfree((struct src_rsc_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int src_set_state(void *blk, unsigned int state)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_STATE, state);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bm(void *blk, unsigned int bm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BM, bm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rsr(void *blk, unsigned int rsr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_RSR, rsr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_sf(void *blk, unsigned int sf)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_SF, sf);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_wr(void *blk, unsigned int wr)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_WR, wr);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_pm(void *blk, unsigned int pm)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_PM, pm);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_rom(void *blk, unsigned int rom)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ROM, rom);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_vo(void *blk, unsigned int vo)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_VO, vo);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_st(void *blk, unsigned int st)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ST, st);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ie(void *blk, unsigned int ie)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_IE, ie);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_ilsz(void *blk, unsigned int ilsz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_ILSZ, ilsz);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_bp(void *blk, unsigned int bp)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ctl, SRCCTL_BP, bp);
+	ctl->dirty.bf.ctl = 1;
+	return 0;
+}
+
+static int src_set_cisz(void *blk, unsigned int cisz)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ccr, SRCCCR_CISZ, cisz);
+	ctl->dirty.bf.ccr = 1;
+	return 0;
+}
+
+static int src_set_ca(void *blk, unsigned int ca)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->ca, SRCCA_CA, ca);
+	ctl->dirty.bf.ca = 1;
+	return 0;
+}
+
+static int src_set_sa(void *blk, unsigned int sa)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->sa, SRCSA_SA, sa);
+	ctl->dirty.bf.sa = 1;
+	return 0;
+}
+
+static int src_set_la(void *blk, unsigned int la)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->la, SRCLA_LA, la);
+	ctl->dirty.bf.la = 1;
+	return 0;
+}
+
+static int src_set_pitch(void *blk, unsigned int pitch)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->mpr, MPRLH_PITCH, pitch);
+	ctl->dirty.bf.mpr = 1;
+	return 0;
+}
+
+static int src_set_clear_zbufs(void *blk, unsigned int clear)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.bf.czbfs = (clear ? 1 : 0);
+	return 0;
+}
+
+static int src_set_dirty(void *blk, unsigned int flags)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int src_set_dirty_all(void *blk)
+{
+	((struct src_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+#define AR_SLOT_SIZE		4096
+#define AR_SLOT_BLOCK_SIZE	16
+#define AR_PTS_PITCH		6
+#define AR_PARAM_SRC_OFFSET	0x60
+
+static unsigned int src_param_pitch_mixer(unsigned int src_idx)
+{
+	return ((src_idx << 4) + AR_PTS_PITCH + AR_SLOT_SIZE
+			- AR_PARAM_SRC_OFFSET) % AR_SLOT_SIZE;
+
+}
+
+static int src_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+	int i = 0;
+
+	if (ctl->dirty.bf.czbfs) {
+		/* Clear Z-Buffer registers */
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRC_UPZ+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 4; i++)
+			hw_write_20kx(hw, SRC_DN0Z+idx*0x100+i*0x4, 0);
+
+		for (i = 0; i < 8; i++)
+			hw_write_20kx(hw, SRC_DN1Z+idx*0x100+i*0x4, 0);
+
+		ctl->dirty.bf.czbfs = 0;
+	}
+	if (ctl->dirty.bf.mpr) {
+		/* Take the parameter mixer resource in the same group as that
+		 * the idx src is in for simplicity. Unlike src, all conjugate
+		 * parameter mixer resources must be programmed for
+		 * corresponding conjugate src resources. */
+		unsigned int pm_idx = src_param_pitch_mixer(idx);
+		hw_write_20kx(hw, MIXER_PRING_LO_HI+4*pm_idx, ctl->mpr);
+		hw_write_20kx(hw, MIXER_PMOPLO+8*pm_idx, 0x3);
+		hw_write_20kx(hw, MIXER_PMOPHI+8*pm_idx, 0x0);
+		ctl->dirty.bf.mpr = 0;
+	}
+	if (ctl->dirty.bf.sa) {
+		hw_write_20kx(hw, SRC_SA+idx*0x100, ctl->sa);
+		ctl->dirty.bf.sa = 0;
+	}
+	if (ctl->dirty.bf.la) {
+		hw_write_20kx(hw, SRC_LA+idx*0x100, ctl->la);
+		ctl->dirty.bf.la = 0;
+	}
+	if (ctl->dirty.bf.ca) {
+		hw_write_20kx(hw, SRC_CA+idx*0x100, ctl->ca);
+		ctl->dirty.bf.ca = 0;
+	}
+
+	/* Write srccf register */
+	hw_write_20kx(hw, SRC_CF+idx*0x100, 0x0);
+
+	if (ctl->dirty.bf.ccr) {
+		hw_write_20kx(hw, SRC_CCR+idx*0x100, ctl->ccr);
+		ctl->dirty.bf.ccr = 0;
+	}
+	if (ctl->dirty.bf.ctl) {
+		hw_write_20kx(hw, SRC_CTL+idx*0x100, ctl->ctl);
+		ctl->dirty.bf.ctl = 0;
+	}
+
+	return 0;
+}
+
+static int src_get_ca(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct src_rsc_ctrl_blk *ctl = blk;
+
+	ctl->ca = hw_read_20kx(hw, SRC_CA+idx*0x100);
+	ctl->dirty.bf.ca = 0;
+
+	return get_field(ctl->ca, SRCCA_CA);
+}
+
+static unsigned int src_get_dirty(void *blk)
+{
+	return ((struct src_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static unsigned int src_dirty_conj_mask(void)
+{
+	return 0x20;
+}
+
+static int src_mgr_enbs_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enbsa |= (0x1 << ((idx%128)/4));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.bf.enbsa = 1;
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	return 0;
+}
+
+static int src_mgr_enb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] |= (0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_dsb_src(void *blk, unsigned int idx)
+{
+	((struct src_mgr_ctrl_blk *)blk)->enb[idx/32] &= ~(0x1 << (idx%32));
+	((struct src_mgr_ctrl_blk *)blk)->dirty.data |= (0x1 << (idx/32));
+	return 0;
+}
+
+static int src_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct src_mgr_ctrl_blk *ctl = blk;
+	int i = 0;
+	unsigned int ret = 0;
+
+	if (ctl->dirty.bf.enbsa) {
+		do {
+			ret = hw_read_20kx(hw, SRC_ENBSTAT);
+		} while (ret & 0x1);
+		hw_write_20kx(hw, SRC_ENBSA, ctl->enbsa);
+		ctl->dirty.bf.enbsa = 0;
+	}
+	for (i = 0; i < 8; i++) {
+		if ((ctl->dirty.data & (0x1 << i))) {
+			hw_write_20kx(hw, SRC_ENB+(i*0x100), ctl->enb[i]);
+			ctl->dirty.data &= ~(0x1 << i);
+		}
+	}
+
+	return 0;
+}
+
+static int src_mgr_get_ctrl_blk(void **rblk)
+{
+	struct src_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int src_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct src_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_get_ctrl_blk(void **rblk)
+{
+	struct srcimp_mgr_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int srcimp_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct srcimp_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int srcimp_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_ARC, slot);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapuser(void *blk, unsigned int user)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_SRC, user);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srcimap.srcaim, SRCAIM_NXT, next);
+	ctl->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	((struct srcimp_mgr_ctrl_blk *)blk)->srcimap.idx = addr;
+	((struct srcimp_mgr_ctrl_blk *)blk)->dirty.bf.srcimap = 1;
+	return 0;
+}
+
+static int srcimp_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct srcimp_mgr_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srcimap) {
+		hw_write_20kx(hw, SRC_IMAP+ctl->srcimap.idx*0x100,
+						ctl->srcimap.srcaim);
+		ctl->dirty.bf.srcimap = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * AMIXER control block definitions.
+ */
+
+#define AMOPLO_M	0x00000003
+#define AMOPLO_IV	0x00000004
+#define AMOPLO_X	0x0003FFF0
+#define AMOPLO_Y	0xFFFC0000
+
+#define AMOPHI_SADR	0x000000FF
+#define AMOPHI_SE	0x80000000
+
+/* AMIXER resource register dirty flags */
+union amixer_dirty {
+	struct {
+		u16 amoplo:1;
+		u16 amophi:1;
+		u16 rsv:14;
+	} bf;
+	u16 data;
+};
+
+/* AMIXER resource control block */
+struct amixer_rsc_ctrl_blk {
+	unsigned int		amoplo;
+	unsigned int		amophi;
+	union amixer_dirty	dirty;
+};
+
+static int amixer_set_mode(void *blk, unsigned int mode)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_M, mode);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_iv(void *blk, unsigned int iv)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_IV, iv);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_x(void *blk, unsigned int x)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_X, x);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_y(void *blk, unsigned int y)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amoplo, AMOPLO_Y, y);
+	ctl->dirty.bf.amoplo = 1;
+	return 0;
+}
+
+static int amixer_set_sadr(void *blk, unsigned int sadr)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SADR, sadr);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_se(void *blk, unsigned int se)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->amophi, AMOPHI_SE, se);
+	ctl->dirty.bf.amophi = 1;
+	return 0;
+}
+
+static int amixer_set_dirty(void *blk, unsigned int flags)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = (flags & 0xffff);
+	return 0;
+}
+
+static int amixer_set_dirty_all(void *blk)
+{
+	((struct amixer_rsc_ctrl_blk *)blk)->dirty.data = ~(0x0);
+	return 0;
+}
+
+static int amixer_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.amoplo || ctl->dirty.bf.amophi) {
+		hw_write_20kx(hw, MIXER_AMOPLO+idx*8, ctl->amoplo);
+		ctl->dirty.bf.amoplo = 0;
+		hw_write_20kx(hw, MIXER_AMOPHI+idx*8, ctl->amophi);
+		ctl->dirty.bf.amophi = 0;
+	}
+
+	return 0;
+}
+
+static int amixer_get_y(void *blk)
+{
+	struct amixer_rsc_ctrl_blk *ctl = blk;
+
+	return get_field(ctl->amoplo, AMOPLO_Y);
+}
+
+static unsigned int amixer_get_dirty(void *blk)
+{
+	return ((struct amixer_rsc_ctrl_blk *)blk)->dirty.data;
+}
+
+static int amixer_rsc_get_ctrl_blk(void **rblk)
+{
+	struct amixer_rsc_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int amixer_rsc_put_ctrl_blk(void *blk)
+{
+	kfree((struct amixer_rsc_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int amixer_mgr_get_ctrl_blk(void **rblk)
+{
+	*rblk = NULL;
+
+	return 0;
+}
+
+static int amixer_mgr_put_ctrl_blk(void *blk)
+{
+	return 0;
+}
+
+/*
+ * DAIO control block definitions.
+ */
+
+/* Receiver Sample Rate Tracker Control register */
+#define SRTCTL_SRCO	0x000000FF
+#define SRTCTL_SRCM	0x0000FF00
+#define SRTCTL_RSR	0x00030000
+#define SRTCTL_DRAT	0x00300000
+#define SRTCTL_EC	0x01000000
+#define SRTCTL_ET	0x10000000
+
+/* DAIO Receiver register dirty flags */
+union dai_dirty {
+	struct {
+		u16 srt:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* DAIO Receiver control block */
+struct dai_ctrl_blk {
+	unsigned int	srt;
+	union dai_dirty	dirty;
+};
+
+/* Audio Input Mapper RAM */
+#define AIM_ARC		0x00000FFF
+#define AIM_NXT		0x007F0000
+
+struct daoimap {
+	unsigned int aim;
+	unsigned int idx;
+};
+
+/* Audio Transmitter Control and Status register */
+#define ATXCTL_EN	0x00000001
+#define ATXCTL_MODE	0x00000010
+#define ATXCTL_CD	0x00000020
+#define ATXCTL_RAW	0x00000100
+#define ATXCTL_MT	0x00000200
+#define ATXCTL_NUC	0x00003000
+#define ATXCTL_BEN	0x00010000
+#define ATXCTL_BMUX	0x00700000
+#define ATXCTL_B24	0x01000000
+#define ATXCTL_CPF	0x02000000
+#define ATXCTL_RIV	0x10000000
+#define ATXCTL_LIV	0x20000000
+#define ATXCTL_RSAT	0x40000000
+#define ATXCTL_LSAT	0x80000000
+
+/* XDIF Transmitter register dirty flags */
+union dao_dirty {
+	struct {
+		u16 atxcsl:1;
+		u16 rsv:15;
+	} bf;
+	u16 data;
+};
+
+/* XDIF Transmitter control block */
+struct dao_ctrl_blk {
+	/* XDIF Transmitter Channel Status Low Register */
+	unsigned int	atxcsl;
+	union dao_dirty	dirty;
+};
+
+/* Audio Receiver Control register */
+#define ARXCTL_EN	0x00000001
+
+/* DAIO manager register dirty flags */
+union daio_mgr_dirty {
+	struct {
+		u32 atxctl:8;
+		u32 arxctl:8;
+		u32 daoimap:1;
+		u32 rsv:15;
+	} bf;
+	u32 data;
+};
+
+/* DAIO manager control block */
+struct daio_mgr_ctrl_blk {
+	struct daoimap		daoimap;
+	unsigned int		txctl[8];
+	unsigned int		rxctl[8];
+	union daio_mgr_dirty	dirty;
+};
+
+static int dai_srt_set_srco(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_SRCO, src);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_srcm(void *blk, unsigned int src)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_SRCM, src);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_rsr(void *blk, unsigned int rsr)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_RSR, rsr);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_drat(void *blk, unsigned int drat)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_DRAT, drat);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_ec(void *blk, unsigned int ec)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_EC, ec ? 1 : 0);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_srt_set_et(void *blk, unsigned int et)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->srt, SRTCTL_ET, et ? 1 : 0);
+	ctl->dirty.bf.srt = 1;
+	return 0;
+}
+
+static int dai_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dai_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.srt) {
+		hw_write_20kx(hw, AUDIO_IO_RX_SRT_CTL+0x40*idx, ctl->srt);
+		ctl->dirty.bf.srt = 0;
+	}
+
+	return 0;
+}
+
+static int dai_get_ctrl_blk(void **rblk)
+{
+	struct dai_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dai_put_ctrl_blk(void *blk)
+{
+	kfree((struct dai_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int dao_set_spos(void *blk, unsigned int spos)
+{
+	((struct dao_ctrl_blk *)blk)->atxcsl = spos;
+	((struct dao_ctrl_blk *)blk)->dirty.bf.atxcsl = 1;
+	return 0;
+}
+
+static int dao_commit_write(struct hw *hw, unsigned int idx, void *blk)
+{
+	struct dao_ctrl_blk *ctl = blk;
+
+	if (ctl->dirty.bf.atxcsl) {
+		if (idx < 4) {
+			/* S/PDIF SPOSx */
+			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_L+0x40*idx,
+							ctl->atxcsl);
+		}
+		ctl->dirty.bf.atxcsl = 0;
+	}
+
+	return 0;
+}
+
+static int dao_get_spos(void *blk, unsigned int *spos)
+{
+	*spos = ((struct dao_ctrl_blk *)blk)->atxcsl;
+	return 0;
+}
+
+static int dao_get_ctrl_blk(void **rblk)
+{
+	struct dao_ctrl_blk *blk;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int dao_put_ctrl_blk(void *blk)
+{
+	kfree((struct dao_ctrl_blk *)blk);
+
+	return 0;
+}
+
+static int daio_mgr_enb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->rxctl[idx], ARXCTL_EN, 1);
+	ctl->dirty.bf.arxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_dsb_dai(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->rxctl[idx], ARXCTL_EN, 0);
+
+	ctl->dirty.bf.arxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_enb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->txctl[idx], ATXCTL_EN, 1);
+	ctl->dirty.bf.atxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_dsb_dao(void *blk, unsigned int idx)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->txctl[idx], ATXCTL_EN, 0);
+	ctl->dirty.bf.atxctl |= (0x1 << idx);
+	return 0;
+}
+
+static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	if (idx < 4) {
+		/* S/PDIF output */
+		switch ((conf & 0x7)) {
+		case 1:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 0);
+			break;
+		case 2:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 1);
+			break;
+		case 4:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 2);
+			break;
+		case 8:
+			set_field(&ctl->txctl[idx], ATXCTL_NUC, 3);
+			break;
+		default:
+			break;
+		}
+		/* CDIF */
+		set_field(&ctl->txctl[idx], ATXCTL_CD, (!(conf & 0x7)));
+		/* Non-audio */
+		set_field(&ctl->txctl[idx], ATXCTL_LIV, (conf >> 4) & 0x1);
+		/* Non-audio */
+		set_field(&ctl->txctl[idx], ATXCTL_RIV, (conf >> 4) & 0x1);
+		set_field(&ctl->txctl[idx], ATXCTL_RAW,
+			  ((conf >> 3) & 0x1) ? 0 : 0);
+		ctl->dirty.bf.atxctl |= (0x1 << idx);
+	} else {
+		/* I2S output */
+		/*idx %= 4; */
+	}
+	return 0;
+}
+
+static int daio_mgr_set_imaparc(void *blk, unsigned int slot)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_ARC, slot);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapnxt(void *blk, unsigned int next)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+
+	set_field(&ctl->daoimap.aim, AIM_NXT, next);
+	ctl->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_set_imapaddr(void *blk, unsigned int addr)
+{
+	((struct daio_mgr_ctrl_blk *)blk)->daoimap.idx = addr;
+	((struct daio_mgr_ctrl_blk *)blk)->dirty.bf.daoimap = 1;
+	return 0;
+}
+
+static int daio_mgr_commit_write(struct hw *hw, void *blk)
+{
+	struct daio_mgr_ctrl_blk *ctl = blk;
+	unsigned int data = 0;
+	int i = 0;
+
+	for (i = 0; i < 8; i++) {
+		if ((ctl->dirty.bf.atxctl & (0x1 << i))) {
+			data = ctl->txctl[i];
+			hw_write_20kx(hw, (AUDIO_IO_TX_CTL+(0x40*i)), data);
+			ctl->dirty.bf.atxctl &= ~(0x1 << i);
+			mdelay(1);
+		}
+		if ((ctl->dirty.bf.arxctl & (0x1 << i))) {
+			data = ctl->rxctl[i];
+			hw_write_20kx(hw, (AUDIO_IO_RX_CTL+(0x40*i)), data);
+			ctl->dirty.bf.arxctl &= ~(0x1 << i);
+			mdelay(1);
+		}
+	}
+	if (ctl->dirty.bf.daoimap) {
+		hw_write_20kx(hw, AUDIO_IO_AIM+ctl->daoimap.idx*4,
+						ctl->daoimap.aim);
+		ctl->dirty.bf.daoimap = 0;
+	}
+
+	return 0;
+}
+
+static int daio_mgr_get_ctrl_blk(struct hw *hw, void **rblk)
+{
+	struct daio_mgr_ctrl_blk *blk;
+	int i = 0;
+
+	*rblk = NULL;
+	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
+	if (NULL == blk)
+		return -ENOMEM;
+
+	for (i = 0; i < 8; i++) {
+		blk->txctl[i] = hw_read_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i));
+		blk->rxctl[i] = hw_read_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i));
+	}
+
+	*rblk = blk;
+
+	return 0;
+}
+
+static int daio_mgr_put_ctrl_blk(void *blk)
+{
+	kfree((struct daio_mgr_ctrl_blk *)blk);
+
+	return 0;
+}
+
+/* Card hardware initialization block */
+struct dac_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct adc_conf {
+	unsigned int msr; 	/* master sample rate in rsrs */
+	unsigned char input; 	/* the input source of ADC */
+	unsigned char mic20db; 	/* boost mic by 20db if input is microphone */
+};
+
+struct daio_conf {
+	unsigned int msr; /* master sample rate in rsrs */
+};
+
+struct trn_conf {
+	unsigned long vm_pgt_phys;
+};
+
+static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
+{
+	u32 dwData = 0;
+	int i;
+
+	/* Program I2S with proper sample rate and enable the correct I2S
+	 * channel. ED(0/8/16/24): Enable all I2S/I2X master clock output */
+	if (1 == info->msr) {
+		hw_write_20kx(hw, AUDIO_IO_MCLK, 0x01010101);
+		hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x01010101);
+		hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0);
+	} else if (2 == info->msr) {
+		hw_write_20kx(hw, AUDIO_IO_MCLK, 0x11111111);
+		/* Specify all playing 96khz
+		 * EA [0]	- Enabled
+		 * RTA [4:5]	- 96kHz
+		 * EB [8]	- Enabled
+		 * RTB [12:13]	- 96kHz
+		 * EC [16]	- Enabled
+		 * RTC [20:21]	- 96kHz
+		 * ED [24]	- Enabled
+		 * RTD [28:29]	- 96kHz */
+		hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x11111111);
+		hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0);
+	} else {
+		printk(KERN_ALERT "ERROR!!! Invalid sampling rate!!!\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 8; i++) {
+		if (i <= 3) {
+			/* 1st 3 channels are SPDIFs (SB0960) */
+			if (i == 3)
+				dwData = 0x1001001;
+			else
+				dwData = 0x1000001;
+
+			hw_write_20kx(hw, (AUDIO_IO_TX_CTL+(0x40*i)), dwData);
+			hw_write_20kx(hw, (AUDIO_IO_RX_CTL+(0x40*i)), dwData);
+
+			/* Initialize the SPDIF Out Channel status registers.
+			 * The value specified here is based on the typical
+			 * values provided in the specification, namely: Clock
+			 * Accuracy of 1000ppm, Sample Rate of 48KHz,
+			 * unspecified source number, Generation status = 1,
+			 * Category code = 0x12 (Digital Signal Mixer),
+			 * Mode = 0, Emph = 0, Copy Permitted, AN = 0
+			 * (indicating that we're transmitting digital audio,
+			 * and the Professional Use bit is 0. */
+
+			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_L+(0x40*i),
+					0x02109204); /* Default to 48kHz */
+
+			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_H+(0x40*i), 0x0B);
+		} else {
+			/* Next 5 channels are I2S (SB0960) */
+			dwData = 0x11;
+			hw_write_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i), dwData);
+			if (2 == info->msr) {
+				/* Four channels per sample period */
+				dwData |= 0x1000;
+			}
+			hw_write_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i), dwData);
+		}
+	}
+
+	return 0;
+}
+
+/* TRANSPORT operations */
+static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
+{
+	u32 vmctl = 0, data = 0;
+	unsigned long ptp_phys_low = 0, ptp_phys_high = 0;
+	int i = 0;
+
+	/* Set up device page table */
+	if ((~0UL) == info->vm_pgt_phys) {
+		printk(KERN_ALERT "Wrong device page table page address!!!\n");
+		return -1;
+	}
+
+	vmctl = 0x80000C0F;  /* 32-bit, 4k-size page */
+#if BITS_PER_LONG == 64
+	ptp_phys_low = info->vm_pgt_phys & ((1UL<<32)-1);
+	ptp_phys_high = (info->vm_pgt_phys>>32) & ((1UL<<32)-1);
+	vmctl |= (3<<8);
+#elif BITS_PER_LONG == 32
+	ptp_phys_low = info->vm_pgt_phys & (~0UL);
+	ptp_phys_high = 0;
+#else
+#	error "Unknown BITS_PER_LONG!"
+#endif
+#if PAGE_SIZE == 8192
+#	error "Don't support 8k-page!"
+#endif
+	/* Write page table physical address to all PTPAL registers */
+	for (i = 0; i < 64; i++) {
+		hw_write_20kx(hw, VMEM_PTPAL+(16*i), ptp_phys_low);
+		hw_write_20kx(hw, VMEM_PTPAH+(16*i), ptp_phys_high);
+	}
+	/* Enable virtual memory transfer */
+	hw_write_20kx(hw, VMEM_CTL, vmctl);
+	/* Enable transport bus master and queueing of request */
+	hw_write_20kx(hw, TRANSPORT_CTL, 0x03);
+	hw_write_20kx(hw, TRANSPORT_INT, 0x200c01);
+	/* Enable transport ring */
+	data = hw_read_20kx(hw, TRANSPORT_ENB);
+	hw_write_20kx(hw, TRANSPORT_ENB, (data | 0x03));
+
+	return 0;
+}
+
+/* Card initialization */
+#define GCTL_AIE	0x00000001
+#define GCTL_UAA	0x00000002
+#define GCTL_DPC	0x00000004
+#define GCTL_DBP	0x00000008
+#define GCTL_ABP	0x00000010
+#define GCTL_TBP	0x00000020
+#define GCTL_SBP	0x00000040
+#define GCTL_FBP	0x00000080
+#define GCTL_ME		0x00000100
+#define GCTL_AID	0x00001000
+
+#define PLLCTL_SRC	0x00000007
+#define PLLCTL_SPE	0x00000008
+#define PLLCTL_RD	0x000000F0
+#define PLLCTL_FD	0x0001FF00
+#define PLLCTL_OD	0x00060000
+#define PLLCTL_B	0x00080000
+#define PLLCTL_AS	0x00100000
+#define PLLCTL_LF	0x03E00000
+#define PLLCTL_SPS	0x1C000000
+#define PLLCTL_AD	0x60000000
+
+#define PLLSTAT_CCS	0x00000007
+#define PLLSTAT_SPL	0x00000008
+#define PLLSTAT_CRD	0x000000F0
+#define PLLSTAT_CFD	0x0001FF00
+#define PLLSTAT_SL	0x00020000
+#define PLLSTAT_FAS	0x00040000
+#define PLLSTAT_B	0x00080000
+#define PLLSTAT_PD	0x00100000
+#define PLLSTAT_OCA	0x00200000
+#define PLLSTAT_NCA	0x00400000
+
+static int hw_pll_init(struct hw *hw, unsigned int rsr)
+{
+	unsigned int pllenb;
+	unsigned int pllctl;
+	unsigned int pllstat;
+	int i;
+
+	pllenb = 0xB;
+	hw_write_20kx(hw, PLL_ENB, pllenb);
+	pllctl = 0x20D00000;
+	set_field(&pllctl, PLLCTL_FD, 16 - 4);
+	hw_write_20kx(hw, PLL_CTL, pllctl);
+	mdelay(40);
+	pllctl = hw_read_20kx(hw, PLL_CTL);
+	set_field(&pllctl, PLLCTL_B, 0);
+	if (48000 == rsr) {
+		set_field(&pllctl, PLLCTL_FD, 16 - 2);
+		set_field(&pllctl, PLLCTL_RD, 1 - 1);
+	} else { /* 44100 */
+		set_field(&pllctl, PLLCTL_FD, 147 - 2);
+		set_field(&pllctl, PLLCTL_RD, 10 - 1);
+	}
+	hw_write_20kx(hw, PLL_CTL, pllctl);
+	mdelay(40);
+	for (i = 0; i < 1000; i++) {
+		pllstat = hw_read_20kx(hw, PLL_STAT);
+		if (get_field(pllstat, PLLSTAT_PD))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_B) !=
+					get_field(pllctl, PLLCTL_B))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_CCS) !=
+					get_field(pllctl, PLLCTL_SRC))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_CRD) !=
+					get_field(pllctl, PLLCTL_RD))
+			continue;
+
+		if (get_field(pllstat, PLLSTAT_CFD) !=
+					get_field(pllctl, PLLCTL_FD))
+			continue;
+
+		break;
+	}
+	if (i >= 1000) {
+		printk(KERN_ALERT "PLL initialization failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int hw_auto_init(struct hw *hw)
+{
+	unsigned int gctl;
+	int i;
+
+	gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+	set_field(&gctl, GCTL_AIE, 0);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+	set_field(&gctl, GCTL_AIE, 1);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+	mdelay(10);
+	for (i = 0; i < 400000; i++) {
+		gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+		if (get_field(gctl, GCTL_AID))
+			break;
+	}
+	if (!get_field(gctl, GCTL_AID)) {
+		printk(KERN_ALERT "Card Auto-init failed!!!\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/* DAC operations */
+
+#define CS4382_MC1 		0x1
+#define CS4382_MC2 		0x2
+#define CS4382_MC3		0x3
+#define CS4382_FC		0x4
+#define CS4382_IC		0x5
+#define CS4382_XC1		0x6
+#define CS4382_VCA1 		0x7
+#define CS4382_VCB1 		0x8
+#define CS4382_XC2		0x9
+#define CS4382_VCA2 		0xA
+#define CS4382_VCB2 		0xB
+#define CS4382_XC3		0xC
+#define CS4382_VCA3		0xD
+#define CS4382_VCB3		0xE
+#define CS4382_XC4 		0xF
+#define CS4382_VCA4 		0x10
+#define CS4382_VCB4 		0x11
+#define CS4382_CREV 		0x12
+
+/* I2C status */
+#define STATE_LOCKED		0x00
+#define STATE_UNLOCKED		0xAA
+#define DATA_READY		0x800000    /* Used with I2C_IF_STATUS */
+#define DATA_ABORT		0x10000     /* Used with I2C_IF_STATUS */
+
+#define I2C_STATUS_DCM	0x00000001
+#define I2C_STATUS_BC	0x00000006
+#define I2C_STATUS_APD	0x00000008
+#define I2C_STATUS_AB	0x00010000
+#define I2C_STATUS_DR	0x00800000
+
+#define I2C_ADDRESS_PTAD	0x0000FFFF
+#define I2C_ADDRESS_SLAD	0x007F0000
+
+struct REGS_CS4382 {
+	u32 dwModeControl_1;
+	u32 dwModeControl_2;
+	u32 dwModeControl_3;
+
+	u32 dwFilterControl;
+	u32 dwInvertControl;
+
+	u32 dwMixControl_P1;
+	u32 dwVolControl_A1;
+	u32 dwVolControl_B1;
+
+	u32 dwMixControl_P2;
+	u32 dwVolControl_A2;
+	u32 dwVolControl_B2;
+
+	u32 dwMixControl_P3;
+	u32 dwVolControl_A3;
+	u32 dwVolControl_B3;
+
+	u32 dwMixControl_P4;
+	u32 dwVolControl_A4;
+	u32 dwVolControl_B4;
+};
+
+static u8 m_bAddressSize, m_bDataSize, m_bDeviceID;
+
+static int I2CUnlockFullAccess(struct hw *hw)
+{
+	u8 UnlockKeySequence_FLASH_FULLACCESS_MODE[2] =  {0xB3, 0xD4};
+
+	/* Send keys for forced BIOS mode */
+	hw_write_20kx(hw, I2C_IF_WLOCK,
+			UnlockKeySequence_FLASH_FULLACCESS_MODE[0]);
+	hw_write_20kx(hw, I2C_IF_WLOCK,
+			UnlockKeySequence_FLASH_FULLACCESS_MODE[1]);
+	/* Check whether the chip is unlocked */
+	if (hw_read_20kx(hw, I2C_IF_WLOCK) == STATE_UNLOCKED)
+		return 0;
+
+	return -1;
+}
+
+static int I2CLockChip(struct hw *hw)
+{
+	/* Write twice */
+	hw_write_20kx(hw, I2C_IF_WLOCK, STATE_LOCKED);
+	hw_write_20kx(hw, I2C_IF_WLOCK, STATE_LOCKED);
+	if (hw_read_20kx(hw, I2C_IF_WLOCK) == STATE_LOCKED)
+		return 0;
+
+	return -1;
+}
+
+static int I2CInit(struct hw *hw, u8 bDeviceID, u8 bAddressSize, u8 bDataSize)
+{
+	int err = 0;
+	unsigned int RegI2CStatus;
+	unsigned int RegI2CAddress;
+
+	err = I2CUnlockFullAccess(hw);
+	if (err < 0)
+		return err;
+
+	m_bAddressSize = bAddressSize;
+	m_bDataSize = bDataSize;
+	m_bDeviceID = bDeviceID;
+
+	RegI2CAddress = 0;
+	set_field(&RegI2CAddress, I2C_ADDRESS_SLAD, bDeviceID);
+
+	hw_write_20kx(hw, I2C_IF_ADDRESS, RegI2CAddress);
+
+	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+
+	set_field(&RegI2CStatus, I2C_STATUS_DCM, 1); /* Direct control mode */
+
+	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
+
+	return 0;
+}
+
+static int I2CUninit(struct hw *hw)
+{
+	unsigned int RegI2CStatus;
+	unsigned int RegI2CAddress;
+
+	RegI2CAddress = 0;
+	set_field(&RegI2CAddress, I2C_ADDRESS_SLAD, 0x57); /* I2C id */
+
+	hw_write_20kx(hw, I2C_IF_ADDRESS, RegI2CAddress);
+
+	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+
+	set_field(&RegI2CStatus, I2C_STATUS_DCM, 0); /* I2C mode */
+
+	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
+
+	return I2CLockChip(hw);
+}
+
+static int I2CWaitDataReady(struct hw *hw)
+{
+	int i = 0x400000;
+	unsigned int ret = 0;
+
+	do {
+		ret = hw_read_20kx(hw, I2C_IF_STATUS);
+	} while ((!(ret & DATA_READY)) && --i);
+
+	return i;
+}
+
+static int I2CRead(struct hw *hw, u16 wAddress, u32 *pdwData)
+{
+	unsigned int RegI2CStatus;
+
+	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+	set_field(&RegI2CStatus, I2C_STATUS_BC,
+			(4 == m_bAddressSize) ? 0 : m_bAddressSize);
+	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
+	if (!I2CWaitDataReady(hw))
+		return -1;
+
+	hw_write_20kx(hw, I2C_IF_WDATA, (u32)wAddress);
+	if (!I2CWaitDataReady(hw))
+		return -1;
+
+	/* Force a read operation */
+	hw_write_20kx(hw, I2C_IF_RDATA, 0);
+	if (!I2CWaitDataReady(hw))
+		return -1;
+
+	*pdwData = hw_read_20kx(hw, I2C_IF_RDATA);
+
+	return 0;
+}
+
+static int I2CWrite(struct hw *hw, u16 wAddress, u32 dwData)
+{
+	unsigned int dwI2CData = (dwData << (m_bAddressSize * 8)) | wAddress;
+	unsigned int RegI2CStatus;
+
+	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+
+	set_field(&RegI2CStatus, I2C_STATUS_BC,
+		  (4 == (m_bAddressSize + m_bDataSize)) ?
+		  0 : (m_bAddressSize + m_bDataSize));
+
+	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
+	I2CWaitDataReady(hw);
+	/* Dummy write to trigger the write oprtation */
+	hw_write_20kx(hw, I2C_IF_WDATA, 0);
+	I2CWaitDataReady(hw);
+
+	/* This is the real data */
+	hw_write_20kx(hw, I2C_IF_WDATA, dwI2CData);
+	I2CWaitDataReady(hw);
+
+	return 0;
+}
+
+static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
+{
+	int err = 0;
+	u32 dwData = 0;
+	int i = 0;
+	struct REGS_CS4382 cs4382_Read = {0};
+	struct REGS_CS4382 cs4382_Def = {
+				   0x00000001,  /* Mode Control 1 */
+				   0x00000000,  /* Mode Control 2 */
+				   0x00000084,  /* Mode Control 3 */
+				   0x00000000,  /* Filter Control */
+				   0x00000000,  /* Invert Control */
+				   0x00000024,  /* Mixing Control Pair 1 */
+				   0x00000000,  /* Vol Control A1 */
+				   0x00000000,  /* Vol Control B1 */
+				   0x00000024,  /* Mixing Control Pair 2 */
+				   0x00000000,  /* Vol Control A2 */
+				   0x00000000,  /* Vol Control B2 */
+				   0x00000024,  /* Mixing Control Pair 3 */
+				   0x00000000,  /* Vol Control A3 */
+				   0x00000000,  /* Vol Control B3 */
+				   0x00000024,  /* Mixing Control Pair 4 */
+				   0x00000000,  /* Vol Control A4 */
+				   0x00000000   /* Vol Control B4 */
+				 };
+
+	/* Set DAC reset bit as output */
+	dwData = hw_read_20kx(hw, GPIO_CTRL);
+	dwData |= 0x02;
+	hw_write_20kx(hw, GPIO_CTRL, dwData);
+
+	err = I2CInit(hw, 0x18, 1, 1);
+	if (err < 0)
+		goto End;
+
+	for (i = 0; i < 2; i++) {
+		/* Reset DAC twice just in-case the chip
+		 * didn't initialized properly */
+		dwData = hw_read_20kx(hw, GPIO_DATA);
+		/* GPIO data bit 1 */
+		dwData &= 0xFFFFFFFD;
+		hw_write_20kx(hw, GPIO_DATA, dwData);
+		mdelay(10);
+		dwData |= 0x2;
+		hw_write_20kx(hw, GPIO_DATA, dwData);
+		mdelay(50);
+
+		/* Reset the 2nd time */
+		dwData &= 0xFFFFFFFD;
+		hw_write_20kx(hw, GPIO_DATA, dwData);
+		mdelay(10);
+		dwData |= 0x2;
+		hw_write_20kx(hw, GPIO_DATA, dwData);
+		mdelay(50);
+
+		if (I2CRead(hw, CS4382_MC1,  &cs4382_Read.dwModeControl_1))
+			continue;
+
+		if (I2CRead(hw, CS4382_MC2,  &cs4382_Read.dwModeControl_2))
+			continue;
+
+		if (I2CRead(hw, CS4382_MC3,  &cs4382_Read.dwModeControl_3))
+			continue;
+
+		if (I2CRead(hw, CS4382_FC,   &cs4382_Read.dwFilterControl))
+			continue;
+
+		if (I2CRead(hw, CS4382_IC,   &cs4382_Read.dwInvertControl))
+			continue;
+
+		if (I2CRead(hw, CS4382_XC1,  &cs4382_Read.dwMixControl_P1))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCA1, &cs4382_Read.dwVolControl_A1))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCB1, &cs4382_Read.dwVolControl_B1))
+			continue;
+
+		if (I2CRead(hw, CS4382_XC2,  &cs4382_Read.dwMixControl_P2))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCA2, &cs4382_Read.dwVolControl_A2))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCB2, &cs4382_Read.dwVolControl_B2))
+			continue;
+
+		if (I2CRead(hw, CS4382_XC3,  &cs4382_Read.dwMixControl_P3))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCA3, &cs4382_Read.dwVolControl_A3))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCB3, &cs4382_Read.dwVolControl_B3))
+			continue;
+
+		if (I2CRead(hw, CS4382_XC4,  &cs4382_Read.dwMixControl_P4))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCA4, &cs4382_Read.dwVolControl_A4))
+			continue;
+
+		if (I2CRead(hw, CS4382_VCB4, &cs4382_Read.dwVolControl_B4))
+			continue;
+
+		if (memcmp(&cs4382_Read, &cs4382_Def,
+						sizeof(struct REGS_CS4382)))
+			continue;
+		else
+			break;
+	}
+
+	if (i >= 2)
+		goto End;
+
+	/* Note: Every I2C write must have some delay.
+	 * This is not a requirement but the delay works here... */
+	I2CWrite(hw, CS4382_MC1, 0x80);
+	I2CWrite(hw, CS4382_MC2, 0x10);
+	if (1 == info->msr) {
+		I2CWrite(hw, CS4382_XC1, 0x24);
+		I2CWrite(hw, CS4382_XC2, 0x24);
+		I2CWrite(hw, CS4382_XC3, 0x24);
+		I2CWrite(hw, CS4382_XC4, 0x24);
+	} else if (2 == info->msr) {
+		I2CWrite(hw, CS4382_XC1, 0x25);
+		I2CWrite(hw, CS4382_XC2, 0x25);
+		I2CWrite(hw, CS4382_XC3, 0x25);
+		I2CWrite(hw, CS4382_XC4, 0x25);
+	} else {
+		I2CWrite(hw, CS4382_XC1, 0x26);
+		I2CWrite(hw, CS4382_XC2, 0x26);
+		I2CWrite(hw, CS4382_XC3, 0x26);
+		I2CWrite(hw, CS4382_XC4, 0x26);
+	}
+
+	return 0;
+End:
+
+	I2CUninit(hw);
+	return -1;
+}
+
+/* ADC operations */
+#define MAKE_WM8775_ADDR(addr, data)	(u32)(((addr<<1)&0xFE)|((data>>8)&0x1))
+#define MAKE_WM8775_DATA(data)	(u32)(data&0xFF)
+
+#define WM8775_IC       0x0B
+#define WM8775_MMC      0x0C
+#define WM8775_AADCL    0x0E
+#define WM8775_AADCR    0x0F
+#define WM8775_ADCMC    0x15
+#define WM8775_RESET    0x17
+
+static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
+{
+	u32 data = 0;
+
+	data = hw_read_20kx(hw, GPIO_DATA);
+	switch (type) {
+	case ADC_MICIN:
+		data = (data & (0x1 << 14)) ? 1 : 0;
+		break;
+	case ADC_LINEIN:
+		data = (data & (0x1 << 14)) ? 0 : 1;
+		break;
+	default:
+		data = 0;
+	}
+	return data;
+}
+
+static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
+{
+	u32 data = 0;
+
+	data = hw_read_20kx(hw, GPIO_DATA);
+	switch (type) {
+	case ADC_MICIN:
+		data |= (0x1 << 14);
+		hw_write_20kx(hw, GPIO_DATA, data);
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
+				MAKE_WM8775_DATA(0x101)); /* Mic-in */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+		break;
+	case ADC_LINEIN:
+		data &= ~(0x1 << 14);
+		hw_write_20kx(hw, GPIO_DATA, data);
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
+				MAKE_WM8775_DATA(0x102)); /* Line-in */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
+{
+	int err = 0;
+	u32 dwMux = 2, dwData = 0, dwCtl = 0;
+
+	/*  Set ADC reset bit as output */
+	dwData = hw_read_20kx(hw, GPIO_CTRL);
+	dwData |= (0x1 << 15);
+	hw_write_20kx(hw, GPIO_CTRL, dwData);
+
+	/* Initialize I2C */
+	err = I2CInit(hw, 0x1A, 1, 1);
+	if (err < 0) {
+		printk(KERN_ALERT "Failure to acquire I2C!!!\n");
+		goto error;
+	}
+
+	/* Make ADC in normal operation */
+	dwData = hw_read_20kx(hw, GPIO_DATA);
+	dwData &= ~(0x1 << 15);
+	mdelay(10);
+	dwData |= (0x1 << 15);
+	hw_write_20kx(hw, GPIO_DATA, dwData);
+	mdelay(50);
+
+	/* Set the master mode (256fs) */
+	if (1 == info->msr) {
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x02),
+						MAKE_WM8775_DATA(0x02));
+	} else if (2 == info->msr) {
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x0A),
+						MAKE_WM8775_DATA(0x0A));
+	} else {
+		printk(KERN_ALERT "Invalid master sampling "
+				  "rate (msr %d)!!!\n", info->msr);
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Configure GPIO bit 14 change to line-in/mic-in */
+	dwCtl = hw_read_20kx(hw, GPIO_CTRL);
+	dwCtl |= 0x1<<14;
+	hw_write_20kx(hw, GPIO_CTRL, dwCtl);
+
+	/* Check using Mic-in or Line-in */
+	dwData = hw_read_20kx(hw, GPIO_DATA);
+
+	if (dwMux == 1) {
+		/* Configures GPIO data to select Mic-in */
+		dwData |= 0x1<<14;
+		hw_write_20kx(hw, GPIO_DATA, dwData);
+
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
+				MAKE_WM8775_DATA(0x101)); /* Mic-in */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
+				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
+	} else if (dwMux == 2) {
+		/* Configures GPIO data to select Line-in */
+		dwData &= ~(0x1<<14);
+		hw_write_20kx(hw, GPIO_DATA, dwData);
+
+		/* Setup ADC */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
+				MAKE_WM8775_DATA(0x102)); /* Line-in */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
+				MAKE_WM8775_DATA(0xCF)); /* No boost */
+	} else {
+		printk(KERN_ALERT "ERROR!!! Invalid input mux!!!\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	I2CUninit(hw);
+	return err;
+}
+
+static int hw_have_digit_io_switch(struct hw *hw)
+{
+	return 0;
+}
+
+static int hw_card_start(struct hw *hw)
+{
+	int err = 0;
+	struct pci_dev *pci = hw->pci;
+	unsigned int gctl;
+	unsigned int dma_mask = 0;
+
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	/* Set DMA transfer mask */
+	dma_mask = CT_XFI_DMA_MASK;
+	if (pci_set_dma_mask(pci, dma_mask) < 0 ||
+	    pci_set_consistent_dma_mask(pci, dma_mask) < 0) {
+		printk(KERN_ERR "architecture does not support PCI "
+		"busmaster DMA with mask 0x%x\n", dma_mask);
+		err = -ENXIO;
+		goto error1;
+	}
+
+	err = pci_request_regions(pci, "XFi");
+	if (err < 0)
+		goto error1;
+
+	hw->io_base = pci_resource_start(hw->pci, 2);
+	hw->mem_base = (unsigned long)ioremap(hw->io_base,
+					pci_resource_len(hw->pci, 2));
+	if (NULL == (void *)hw->mem_base) {
+		err = -ENOENT;
+		goto error2;
+	}
+
+	/* Switch to 20k2 mode from UAA mode. */
+	gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+	set_field(&gctl, GCTL_UAA, 0);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+
+	/*if ((err = request_irq(pci->irq, ct_atc_interrupt, IRQF_SHARED,
+				atc->chip_details->nm_card, hw))) {
+		goto error3;
+	}
+	hw->irq = pci->irq;
+	*/
+
+	pci_set_master(pci);
+
+	return 0;
+
+/*error3:
+	iounmap((void *)hw->mem_base);
+	hw->mem_base = (unsigned long)NULL;*/
+error2:
+	pci_release_regions(pci);
+	hw->io_base = 0;
+error1:
+	pci_disable_device(pci);
+	return err;
+}
+
+static int hw_card_stop(struct hw *hw)
+{
+	/* TODO: Disable interrupt and so on... */
+	return 0;
+}
+
+static int hw_card_shutdown(struct hw *hw)
+{
+	if (hw->irq >= 0)
+		free_irq(hw->irq, hw);
+
+	hw->irq	= -1;
+
+	if (NULL != ((void *)hw->mem_base))
+		iounmap((void *)hw->mem_base);
+
+	hw->mem_base = (unsigned long)NULL;
+
+	if (hw->io_base)
+		pci_release_regions(hw->pci);
+
+	hw->io_base = 0;
+
+	pci_disable_device(hw->pci);
+
+	return 0;
+}
+
+static int hw_card_init(struct hw *hw, struct card_conf *info)
+{
+	int err;
+	unsigned int gctl;
+	u32 data = 0;
+	struct dac_conf dac_info = {0};
+	struct adc_conf adc_info = {0};
+	struct daio_conf daio_info = {0};
+	struct trn_conf trn_info = {0};
+
+	/* Get PCI io port/memory base address and
+	 * do 20kx core switch if needed. */
+	if (!hw->io_base) {
+		err = hw_card_start(hw);
+		if (err)
+			return err;
+	}
+
+	/* PLL init */
+	err = hw_pll_init(hw, info->rsr);
+	if (err < 0)
+		return err;
+
+	/* kick off auto-init */
+	err = hw_auto_init(hw);
+	if (err < 0)
+		return err;
+
+	gctl = hw_read_20kx(hw, GLOBAL_CNTL_GCTL);
+	set_field(&gctl, GCTL_DBP, 1);
+	set_field(&gctl, GCTL_TBP, 1);
+	set_field(&gctl, GCTL_FBP, 1);
+	set_field(&gctl, GCTL_DPC, 0);
+	hw_write_20kx(hw, GLOBAL_CNTL_GCTL, gctl);
+
+	/* Reset all global pending interrupts */
+	hw_write_20kx(hw, INTERRUPT_GIE, 0);
+	/* Reset all SRC pending interrupts */
+	hw_write_20kx(hw, SRC_IP, 0);
+
+	/* TODO: detect the card ID and configure GPIO accordingly. */
+	/* Configures GPIO (0xD802 0x98028) */
+	/*hw_write_20kx(hw, GPIO_CTRL, 0x7F07);*/
+	/* Configures GPIO (SB0880) */
+	/*hw_write_20kx(hw, GPIO_CTRL, 0xFF07);*/
+	hw_write_20kx(hw, GPIO_CTRL, 0xD802);
+
+	/* Enable audio ring */
+	hw_write_20kx(hw, MIXER_AR_ENABLE, 0x01);
+
+	trn_info.vm_pgt_phys = info->vm_pgt_phys;
+	err = hw_trn_init(hw, &trn_info);
+	if (err < 0)
+		return err;
+
+	daio_info.msr = info->msr;
+	err = hw_daio_init(hw, &daio_info);
+	if (err < 0)
+		return err;
+
+	dac_info.msr = info->msr;
+	err = hw_dac_init(hw, &dac_info);
+	if (err < 0)
+		return err;
+
+	adc_info.msr = info->msr;
+	adc_info.input = ADC_LINEIN;
+	adc_info.mic20db = 0;
+	err = hw_adc_init(hw, &adc_info);
+	if (err < 0)
+		return err;
+
+	data = hw_read_20kx(hw, SRC_MCTL);
+	data |= 0x1; /* Enables input from the audio ring */
+	hw_write_20kx(hw, SRC_MCTL, data);
+
+	return 0;
+}
+
+static u32 hw_read_20kx(struct hw *hw, u32 reg)
+{
+	return readl((void *)(hw->mem_base + reg));
+}
+
+static void hw_write_20kx(struct hw *hw, u32 reg, u32 data)
+{
+	writel(data, (void *)(hw->mem_base + reg));
+}
+
+int create_20k2_hw_obj(struct hw **rhw)
+{
+	struct hw *hw;
+
+	*rhw = NULL;
+	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+	if (NULL == hw)
+		return -ENOMEM;
+
+	hw->io_base = 0;
+	hw->mem_base = (unsigned long)NULL;
+	hw->irq = -1;
+
+	hw->card_init = hw_card_init;
+	hw->card_stop = hw_card_stop;
+	hw->pll_init = hw_pll_init;
+	hw->is_adc_source_selected = hw_is_adc_input_selected;
+	hw->select_adc_source = hw_adc_input_select;
+	hw->have_digit_io_switch = hw_have_digit_io_switch;
+
+	hw->src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk;
+	hw->src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk;
+	hw->src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk;
+	hw->src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk;
+	hw->src_set_state = src_set_state;
+	hw->src_set_bm = src_set_bm;
+	hw->src_set_rsr = src_set_rsr;
+	hw->src_set_sf = src_set_sf;
+	hw->src_set_wr = src_set_wr;
+	hw->src_set_pm = src_set_pm;
+	hw->src_set_rom = src_set_rom;
+	hw->src_set_vo = src_set_vo;
+	hw->src_set_st = src_set_st;
+	hw->src_set_ie = src_set_ie;
+	hw->src_set_ilsz = src_set_ilsz;
+	hw->src_set_bp = src_set_bp;
+	hw->src_set_cisz = src_set_cisz;
+	hw->src_set_ca = src_set_ca;
+	hw->src_set_sa = src_set_sa;
+	hw->src_set_la = src_set_la;
+	hw->src_set_pitch = src_set_pitch;
+	hw->src_set_dirty = src_set_dirty;
+	hw->src_set_clear_zbufs = src_set_clear_zbufs;
+	hw->src_set_dirty_all = src_set_dirty_all;
+	hw->src_commit_write = src_commit_write;
+	hw->src_get_ca = src_get_ca;
+	hw->src_get_dirty = src_get_dirty;
+	hw->src_dirty_conj_mask = src_dirty_conj_mask;
+	hw->src_mgr_enbs_src = src_mgr_enbs_src;
+	hw->src_mgr_enb_src = src_mgr_enb_src;
+	hw->src_mgr_dsb_src = src_mgr_dsb_src;
+	hw->src_mgr_commit_write = src_mgr_commit_write;
+
+	hw->srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk;
+	hw->srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk;
+	hw->srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc;
+	hw->srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser;
+	hw->srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt;
+	hw->srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr;
+	hw->srcimp_mgr_commit_write = srcimp_mgr_commit_write;
+
+	hw->amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk;
+	hw->amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk;
+	hw->amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk;
+	hw->amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk;
+	hw->amixer_set_mode = amixer_set_mode;
+	hw->amixer_set_iv = amixer_set_iv;
+	hw->amixer_set_x = amixer_set_x;
+	hw->amixer_set_y = amixer_set_y;
+	hw->amixer_set_sadr = amixer_set_sadr;
+	hw->amixer_set_se = amixer_set_se;
+	hw->amixer_set_dirty = amixer_set_dirty;
+	hw->amixer_set_dirty_all = amixer_set_dirty_all;
+	hw->amixer_commit_write = amixer_commit_write;
+	hw->amixer_get_y = amixer_get_y;
+	hw->amixer_get_dirty = amixer_get_dirty;
+
+	hw->dai_get_ctrl_blk = dai_get_ctrl_blk;
+	hw->dai_put_ctrl_blk = dai_put_ctrl_blk;
+	hw->dai_srt_set_srco = dai_srt_set_srco;
+	hw->dai_srt_set_srcm = dai_srt_set_srcm;
+	hw->dai_srt_set_rsr = dai_srt_set_rsr;
+	hw->dai_srt_set_drat = dai_srt_set_drat;
+	hw->dai_srt_set_ec = dai_srt_set_ec;
+	hw->dai_srt_set_et = dai_srt_set_et;
+	hw->dai_commit_write = dai_commit_write;
+
+	hw->dao_get_ctrl_blk = dao_get_ctrl_blk;
+	hw->dao_put_ctrl_blk = dao_put_ctrl_blk;
+	hw->dao_set_spos = dao_set_spos;
+	hw->dao_commit_write = dao_commit_write;
+	hw->dao_get_spos = dao_get_spos;
+
+	hw->daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk;
+	hw->daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk;
+	hw->daio_mgr_enb_dai = daio_mgr_enb_dai;
+	hw->daio_mgr_dsb_dai = daio_mgr_dsb_dai;
+	hw->daio_mgr_enb_dao = daio_mgr_enb_dao;
+	hw->daio_mgr_dsb_dao = daio_mgr_dsb_dao;
+	hw->daio_mgr_dao_init = daio_mgr_dao_init;
+	hw->daio_mgr_set_imaparc = daio_mgr_set_imaparc;
+	hw->daio_mgr_set_imapnxt = daio_mgr_set_imapnxt;
+	hw->daio_mgr_set_imapaddr = daio_mgr_set_imapaddr;
+	hw->daio_mgr_commit_write = daio_mgr_commit_write;
+
+	*rhw = hw;
+
+	return 0;
+}
+
+int destroy_20k2_hw_obj(struct hw *hw)
+{
+	if (hw->io_base)
+		hw_card_shutdown(hw);
+
+	kfree(hw);
+	return 0;
+}
diff --git a/sound/pci/ctxfi/cthw20k2.h b/sound/pci/ctxfi/cthw20k2.h
new file mode 100644
index 0000000..d2b7daa
--- /dev/null
+++ b/sound/pci/ctxfi/cthw20k2.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	cthw20k2.h
+ *
+ * @Brief
+ * This file contains the definition of hardware access methord.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTHW20K2_H
+#define CTHW20K2_H
+
+#include "cthardware.h"
+
+int create_20k2_hw_obj(struct hw **rhw);
+int destroy_20k2_hw_obj(struct hw *hw);
+
+#endif /* CTHW20K2_H */
diff --git a/sound/pci/ctxfi/ctimap.c b/sound/pci/ctxfi/ctimap.c
new file mode 100644
index 0000000..d34eacd
--- /dev/null
+++ b/sound/pci/ctxfi/ctimap.c
@@ -0,0 +1,112 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctimap.c
+ *
+ * @Brief
+ * This file contains the implementation of generic input mapper operations
+ * for input mapper management.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#include "ctimap.h"
+#include <linux/slab.h>
+
+int input_mapper_add(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data)
+{
+	struct list_head *pos, *pre, *head;
+	struct imapper *pre_ent, *pos_ent;
+
+	head = mappers;
+
+	if (list_empty(head)) {
+		entry->next = entry->addr;
+		map_op(data, entry);
+		list_add(&entry->list, head);
+		return 0;
+	}
+
+	list_for_each(pos, head) {
+		pos_ent = list_entry(pos, struct imapper, list);
+		if (pos_ent->slot > entry->slot) {
+			/* found a position in list */
+			break;
+		}
+	}
+
+	if (pos != head) {
+		pre = pos->prev;
+		if (pre == head)
+			pre = head->prev;
+
+		__list_add(&entry->list, pos->prev, pos);
+	} else {
+		pre = head->prev;
+		pos = head->next;
+		list_add_tail(&entry->list, head);
+	}
+
+	pre_ent = list_entry(pre, struct imapper, list);
+	pos_ent = list_entry(pos, struct imapper, list);
+
+	entry->next = pos_ent->addr;
+	map_op(data, entry);
+	pre_ent->next = entry->addr;
+	map_op(data, pre_ent);
+
+	return 0;
+}
+
+int input_mapper_delete(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data)
+{
+	struct list_head *next, *pre, *head;
+	struct imapper *pre_ent, *next_ent;
+
+	head = mappers;
+
+	if (list_empty(head))
+		return 0;
+
+	pre = (entry->list.prev == head) ? head->prev : entry->list.prev;
+	next = (entry->list.next == head) ? head->next : entry->list.next;
+
+	if (pre == &entry->list) {
+		/* entry is the only one node in mappers list */
+		entry->next = entry->addr = entry->user = entry->slot = 0;
+		map_op(data, entry);
+		list_del(&entry->list);
+		return 0;
+	}
+
+	pre_ent = list_entry(pre, struct imapper, list);
+	next_ent = list_entry(next, struct imapper, list);
+
+	pre_ent->next = next_ent->addr;
+	map_op(data, pre_ent);
+	list_del(&entry->list);
+
+	return 0;
+}
+
+void free_input_mapper_list(struct list_head *head)
+{
+	struct imapper *entry = NULL;
+	struct list_head *pos = NULL;
+
+	while (!list_empty(head)) {
+		pos = head->next;
+		list_del(pos);
+		entry = list_entry(pos, struct imapper, list);
+		kfree(entry);
+	}
+}
+
diff --git a/sound/pci/ctxfi/ctimap.h b/sound/pci/ctxfi/ctimap.h
new file mode 100644
index 0000000..53ccf9b
--- /dev/null
+++ b/sound/pci/ctxfi/ctimap.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctimap.h
+ *
+ * @Brief
+ * This file contains the definition of generic input mapper operations
+ * for input mapper management.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 23 2008
+ *
+ */
+
+#ifndef CTIMAP_H
+#define CTIMAP_H
+
+#include <linux/list.h>
+
+struct imapper {
+	unsigned short slot; /* the id of the slot containing input data */
+	unsigned short user; /* the id of the user resource consuming data */
+	unsigned short addr; /* the input mapper ram id */
+	unsigned short next; /* the next input mapper ram id */
+	struct list_head	list;
+};
+
+int input_mapper_add(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data);
+
+int input_mapper_delete(struct list_head *mappers, struct imapper *entry,
+		     int (*map_op)(void *, struct imapper *), void *data);
+
+void free_input_mapper_list(struct list_head *mappers);
+
+#endif /* CTIMAP_H */
diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
new file mode 100644
index 0000000..c80d692
--- /dev/null
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -0,0 +1,1108 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctmixer.c
+ *
+ * @Brief
+ * This file contains the implementation of alsa mixer device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 28 2008
+ *
+ */
+
+
+#include "ctmixer.h"
+#include "ctamixer.h"
+#include <sound/core.h>
+#include <sound/control.h>
+#include <sound/asoundef.h>
+#include <sound/pcm.h>
+#include <linux/slab.h>
+
+enum CT_SUM_CTL {
+	SUM_IN_F,
+	SUM_IN_R,
+	SUM_IN_C,
+	SUM_IN_S,
+	SUM_IN_F_C,
+
+	NUM_CT_SUMS
+};
+
+enum CT_AMIXER_CTL {
+	/* volume control mixers */
+	AMIXER_MASTER_F,
+	AMIXER_MASTER_R,
+	AMIXER_MASTER_C,
+	AMIXER_MASTER_S,
+	AMIXER_PCM_F,
+	AMIXER_PCM_R,
+	AMIXER_PCM_C,
+	AMIXER_PCM_S,
+	AMIXER_SPDIFI,
+	AMIXER_LINEIN,
+	AMIXER_MIC,
+	AMIXER_SPDIFO,
+	AMIXER_WAVE_F,
+	AMIXER_WAVE_R,
+	AMIXER_WAVE_C,
+	AMIXER_WAVE_S,
+	AMIXER_MASTER_F_C,
+	AMIXER_PCM_F_C,
+	AMIXER_SPDIFI_C,
+	AMIXER_LINEIN_C,
+	AMIXER_MIC_C,
+
+	/* this should always be the last one */
+	NUM_CT_AMIXERS
+};
+
+enum CTALSA_MIXER_CTL {
+	/* volume control mixers */
+	MIXER_MASTER_P,
+	MIXER_PCM_P,
+	MIXER_LINEIN_P,
+	MIXER_MIC_P,
+	MIXER_SPDIFI_P,
+	MIXER_SPDIFO_P,
+	MIXER_WAVEF_P,
+	MIXER_WAVER_P,
+	MIXER_WAVEC_P,
+	MIXER_WAVES_P,
+	MIXER_MASTER_C,
+	MIXER_PCM_C,
+	MIXER_LINEIN_C,
+	MIXER_MIC_C,
+	MIXER_SPDIFI_C,
+
+	/* switch control mixers */
+	MIXER_PCM_C_S,
+	MIXER_LINEIN_C_S,
+	MIXER_MIC_C_S,
+	MIXER_SPDIFI_C_S,
+	MIXER_LINEIN_P_S,
+	MIXER_SPDIFO_P_S,
+	MIXER_SPDIFI_P_S,
+	MIXER_WAVEF_P_S,
+	MIXER_WAVER_P_S,
+	MIXER_WAVEC_P_S,
+	MIXER_WAVES_P_S,
+	MIXER_DIGITAL_IO_S,
+	MIXER_IEC958_MASK,
+	MIXER_IEC958_DEFAULT,
+	MIXER_IEC958_STREAM,
+
+	/* this should always be the last one */
+	NUM_CTALSA_MIXERS
+};
+
+#define VOL_MIXER_START		MIXER_MASTER_P
+#define VOL_MIXER_END		MIXER_SPDIFI_C
+#define VOL_MIXER_NUM		(VOL_MIXER_END - VOL_MIXER_START + 1)
+#define SWH_MIXER_START		MIXER_PCM_C_S
+#define SWH_MIXER_END		MIXER_DIGITAL_IO_S
+#define SWH_CAPTURE_START	MIXER_PCM_C_S
+#define SWH_CAPTURE_END		MIXER_SPDIFI_C_S
+
+#define CHN_NUM		2
+
+struct ct_kcontrol_init {
+	unsigned char ctl;
+	char *name;
+};
+
+static struct ct_kcontrol_init
+ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = {
+	[MIXER_MASTER_P] = {
+		.ctl = 1,
+		.name = "Master Playback Volume",
+	},
+	[MIXER_MASTER_C] = {
+		.ctl = 1,
+		.name = "Master Capture Volume",
+	},
+	[MIXER_PCM_P] = {
+		.ctl = 1,
+		.name = "PCM Playback Volume",
+	},
+	[MIXER_PCM_C] = {
+		.ctl = 1,
+		.name = "PCM Capture Volume",
+	},
+	[MIXER_LINEIN_P] = {
+		.ctl = 1,
+		.name = "Line-in Playback Volume",
+	},
+	[MIXER_LINEIN_C] = {
+		.ctl = 1,
+		.name = "Line-in Capture Volume",
+	},
+	[MIXER_MIC_P] = {
+		.ctl = 1,
+		.name = "Mic Playback Volume",
+	},
+	[MIXER_MIC_C] = {
+		.ctl = 1,
+		.name = "Mic Capture Volume",
+	},
+	[MIXER_SPDIFI_P] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Playback Volume",
+	},
+	[MIXER_SPDIFI_C] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Capture Volume",
+	},
+	[MIXER_SPDIFO_P] = {
+		.ctl = 1,
+		.name = "S/PDIF-out Playback Volume",
+	},
+	[MIXER_WAVEF_P] = {
+		.ctl = 1,
+		.name = "Front Playback Volume",
+	},
+	[MIXER_WAVES_P] = {
+		.ctl = 1,
+		.name = "Surround Playback Volume",
+	},
+	[MIXER_WAVEC_P] = {
+		.ctl = 1,
+		.name = "Center/LFE Playback Volume",
+	},
+	[MIXER_WAVER_P] = {
+		.ctl = 1,
+		.name = "Rear Playback Volume",
+	},
+
+	[MIXER_PCM_C_S] = {
+		.ctl = 1,
+		.name = "PCM Capture Switch",
+	},
+	[MIXER_LINEIN_C_S] = {
+		.ctl = 1,
+		.name = "Line-in Capture Switch",
+	},
+	[MIXER_MIC_C_S] = {
+		.ctl = 1,
+		.name = "Mic Capture Switch",
+	},
+	[MIXER_SPDIFI_C_S] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Capture Switch",
+	},
+	[MIXER_LINEIN_P_S] = {
+		.ctl = 1,
+		.name = "Line-in Playback Switch",
+	},
+	[MIXER_SPDIFO_P_S] = {
+		.ctl = 1,
+		.name = "S/PDIF-out Playback Switch",
+	},
+	[MIXER_SPDIFI_P_S] = {
+		.ctl = 1,
+		.name = "S/PDIF-in Playback Switch",
+	},
+	[MIXER_WAVEF_P_S] = {
+		.ctl = 1,
+		.name = "Front Playback Switch",
+	},
+	[MIXER_WAVES_P_S] = {
+		.ctl = 1,
+		.name = "Surround Playback Switch",
+	},
+	[MIXER_WAVEC_P_S] = {
+		.ctl = 1,
+		.name = "Center/LFE Playback Switch",
+	},
+	[MIXER_WAVER_P_S] = {
+		.ctl = 1,
+		.name = "Rear Playback Switch",
+	},
+	[MIXER_DIGITAL_IO_S] = {
+		.ctl = 0,
+		.name = "Digit-IO Playback Switch",
+	},
+};
+
+static void
+ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type);
+
+static void
+ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type);
+
+static struct snd_kcontrol *kctls[2] = {NULL};
+
+static enum CT_AMIXER_CTL get_amixer_index(enum CTALSA_MIXER_CTL alsa_index)
+{
+	switch (alsa_index) {
+	case MIXER_MASTER_P:	return AMIXER_MASTER_F;
+	case MIXER_MASTER_C:	return AMIXER_MASTER_F_C;
+	case MIXER_PCM_P:	return AMIXER_PCM_F;
+	case MIXER_PCM_C:
+	case MIXER_PCM_C_S:	return AMIXER_PCM_F_C;
+	case MIXER_LINEIN_P:	return AMIXER_LINEIN;
+	case MIXER_LINEIN_C:
+	case MIXER_LINEIN_C_S:	return AMIXER_LINEIN_C;
+	case MIXER_MIC_P:	return AMIXER_MIC;
+	case MIXER_MIC_C:
+	case MIXER_MIC_C_S:	return AMIXER_MIC_C;
+	case MIXER_SPDIFI_P:	return AMIXER_SPDIFI;
+	case MIXER_SPDIFI_C:
+	case MIXER_SPDIFI_C_S:	return AMIXER_SPDIFI_C;
+	case MIXER_SPDIFO_P:	return AMIXER_SPDIFO;
+	case MIXER_WAVEF_P:	return AMIXER_WAVE_F;
+	case MIXER_WAVES_P:	return AMIXER_WAVE_S;
+	case MIXER_WAVEC_P:	return AMIXER_WAVE_C;
+	case MIXER_WAVER_P:	return AMIXER_WAVE_R;
+	default:		return NUM_CT_AMIXERS;
+	}
+}
+
+static enum CT_AMIXER_CTL get_recording_amixer(enum CT_AMIXER_CTL index)
+{
+	switch (index) {
+	case AMIXER_MASTER_F:	return AMIXER_MASTER_F_C;
+	case AMIXER_PCM_F:	return AMIXER_PCM_F_C;
+	case AMIXER_SPDIFI:	return AMIXER_SPDIFI_C;
+	case AMIXER_LINEIN:	return AMIXER_LINEIN_C;
+	case AMIXER_MIC:	return AMIXER_MIC_C;
+	default:		return NUM_CT_AMIXERS;
+	}
+}
+
+static unsigned char
+get_switch_state(struct ct_mixer *mixer, enum CTALSA_MIXER_CTL type)
+{
+	return (mixer->switch_state & (0x1 << (type - SWH_MIXER_START)))
+		? 1 : 0;
+}
+
+static void
+set_switch_state(struct ct_mixer *mixer,
+		 enum CTALSA_MIXER_CTL type, unsigned char state)
+{
+	if (state)
+		mixer->switch_state |= (0x1 << (type - SWH_MIXER_START));
+	else
+		mixer->switch_state &= ~(0x1 << (type - SWH_MIXER_START));
+}
+
+/* Map integer value ranging from 0 to 65535 to 14-bit float value ranging
+ * from 2^-6 to (1+1023/1024) */
+static unsigned int uint16_to_float14(unsigned int x)
+{
+	unsigned int i = 0;
+
+	if (x < 17)
+		return 0;
+
+	x *= 2031;
+	x /= 65535;
+	x += 16;
+
+	/* i <= 6 */
+	for (i = 0; !(x & 0x400); i++)
+		x <<= 1;
+
+	x = (((7 - i) & 0x7) << 10) | (x & 0x3ff);
+
+	return x;
+}
+
+static unsigned int float14_to_uint16(unsigned int x)
+{
+	unsigned int e = 0;
+
+	if (!x)
+		return x;
+
+	e = (x >> 10) & 0x7;
+	x &= 0x3ff;
+	x += 1024;
+	x >>= (7 - e);
+	x -= 16;
+	x *= 65535;
+	x /= 2031;
+
+	return x;
+}
+
+static int ct_alsa_mix_volume_info(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 2;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 43690;
+	uinfo->value.integer.step = 128;
+
+	return 0;
+}
+
+static int ct_alsa_mix_volume_get(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	enum CT_AMIXER_CTL type = get_amixer_index(kcontrol->private_value);
+	struct amixer *amixer = NULL;
+	int i = 0;
+
+	for (i = 0; i < 2; i++) {
+		amixer = ((struct ct_mixer *)atc->mixer)->
+						amixers[type*CHN_NUM+i];
+		/* Convert 14-bit float-point scale to 16-bit integer volume */
+		ucontrol->value.integer.value[i] =
+		(float14_to_uint16(amixer->ops->get_scale(amixer)) & 0xffff);
+	}
+
+	return 0;
+}
+
+static int ct_alsa_mix_volume_put(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	struct ct_mixer *mixer = atc->mixer;
+	enum CT_AMIXER_CTL type = get_amixer_index(kcontrol->private_value);
+	struct amixer *amixer = NULL;
+	int i = 0, j = 0, change = 0, val = 0;
+
+	for (i = 0; i < 2; i++) {
+		/* Convert 16-bit integer volume to 14-bit float-point scale */
+		val = (ucontrol->value.integer.value[i] & 0xffff);
+		amixer = mixer->amixers[type*CHN_NUM+i];
+		if ((float14_to_uint16(amixer->ops->get_scale(amixer)) & 0xff80)
+				!= (val & 0xff80)) {
+			val = uint16_to_float14(val);
+			amixer->ops->set_scale(amixer, val);
+			amixer->ops->commit_write(amixer);
+			change = 1;
+			/* Synchronize Master/PCM playback AMIXERs. */
+			if (AMIXER_MASTER_F == type || AMIXER_PCM_F == type) {
+				for (j = 1; j < 4; j++) {
+					amixer = mixer->
+						amixers[(type+j)*CHN_NUM+i];
+					amixer->ops->set_scale(amixer, val);
+					amixer->ops->commit_write(amixer);
+				}
+			}
+		}
+	}
+
+	return change;
+}
+
+static struct snd_kcontrol_new vol_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.iface		= SNDRV_CTL_ELEM_IFACE_MIXER,
+	.info		= ct_alsa_mix_volume_info,
+	.get		= ct_alsa_mix_volume_get,
+	.put		= ct_alsa_mix_volume_put
+};
+
+static void
+do_line_mic_switch(struct ct_atc *atc, enum CTALSA_MIXER_CTL type)
+{
+
+	if (MIXER_LINEIN_C_S == type) {
+		atc->select_line_in(atc);
+		set_switch_state(atc->mixer, MIXER_MIC_C_S, 0);
+		snd_ctl_notify(atc->card, SNDRV_CTL_EVENT_MASK_VALUE,
+							&kctls[1]->id);
+	} else if (MIXER_MIC_C_S == type) {
+		atc->select_mic_in(atc);
+		set_switch_state(atc->mixer, MIXER_LINEIN_C_S, 0);
+		snd_ctl_notify(atc->card, SNDRV_CTL_EVENT_MASK_VALUE,
+							&kctls[0]->id);
+	}
+}
+
+static void
+do_digit_io_switch(struct ct_atc *atc, int state)
+{
+	struct ct_mixer *mixer = atc->mixer;
+
+	if (state) {
+		atc->select_digit_io(atc);
+		atc->spdif_out_unmute(atc,
+				get_switch_state(mixer, MIXER_SPDIFO_P_S));
+		atc->spdif_in_unmute(atc, 1);
+		atc->line_in_unmute(atc, 0);
+		return;
+	}
+
+	if (get_switch_state(mixer, MIXER_LINEIN_C_S))
+		atc->select_line_in(atc);
+	else if (get_switch_state(mixer, MIXER_MIC_C_S))
+		atc->select_mic_in(atc);
+
+	atc->spdif_out_unmute(atc, 0);
+	atc->spdif_in_unmute(atc, 0);
+	atc->line_in_unmute(atc, 1);
+	return;
+}
+
+static int ct_alsa_mix_switch_info(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	uinfo->value.integer.step = 1;
+
+	return 0;
+}
+
+static int ct_alsa_mix_switch_get(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_mixer *mixer =
+		((struct ct_atc *)snd_kcontrol_chip(kcontrol))->mixer;
+	enum CTALSA_MIXER_CTL type = kcontrol->private_value;
+
+	ucontrol->value.integer.value[0] = get_switch_state(mixer, type);
+	return 0;
+}
+
+static int ct_alsa_mix_switch_put(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	struct ct_mixer *mixer = atc->mixer;
+	enum CTALSA_MIXER_CTL type = kcontrol->private_value;
+	int state = 0;
+
+	state = ucontrol->value.integer.value[0];
+	if (get_switch_state(mixer, type) == state)
+		return 0;
+
+	set_switch_state(mixer, type, state);
+	/* Do changes in mixer. */
+	if ((SWH_CAPTURE_START <= type) && (SWH_CAPTURE_END >= type)) {
+		if (state) {
+			ct_mixer_recording_select(mixer,
+						  get_amixer_index(type));
+		} else {
+			ct_mixer_recording_unselect(mixer,
+						    get_amixer_index(type));
+		}
+	}
+	/* Do changes out of mixer. */
+	if (state && (MIXER_LINEIN_C_S == type || MIXER_MIC_C_S == type))
+		do_line_mic_switch(atc, type);
+	else if (MIXER_WAVEF_P_S == type)
+		atc->line_front_unmute(atc, state);
+	else if (MIXER_WAVES_P_S == type)
+		atc->line_surround_unmute(atc, state);
+	else if (MIXER_WAVEC_P_S == type)
+		atc->line_clfe_unmute(atc, state);
+	else if (MIXER_WAVER_P_S == type)
+		atc->line_rear_unmute(atc, state);
+	else if (MIXER_LINEIN_P_S == type)
+		atc->line_in_unmute(atc, state);
+	else if (MIXER_SPDIFO_P_S == type)
+		atc->spdif_out_unmute(atc, state);
+	else if (MIXER_SPDIFI_P_S == type)
+		atc->spdif_in_unmute(atc, state);
+	else if (MIXER_DIGITAL_IO_S == type)
+		do_digit_io_switch(atc, state);
+
+	return 1;
+}
+
+static struct snd_kcontrol_new swh_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.iface		= SNDRV_CTL_ELEM_IFACE_MIXER,
+	.info		= ct_alsa_mix_switch_info,
+	.get		= ct_alsa_mix_switch_get,
+	.put		= ct_alsa_mix_switch_put
+};
+
+static int ct_spdif_info(struct snd_kcontrol *kcontrol,
+			 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958;
+	uinfo->count = 1;
+	return 0;
+}
+
+static int ct_spdif_get_mask(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	ucontrol->value.iec958.status[0] = 0xff;
+	ucontrol->value.iec958.status[1] = 0xff;
+	ucontrol->value.iec958.status[2] = 0xff;
+	ucontrol->value.iec958.status[3] = 0xff;
+	return 0;
+}
+
+static int ct_spdif_default_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	unsigned int status = SNDRV_PCM_DEFAULT_CON_SPDIF;
+
+	ucontrol->value.iec958.status[0] = (status >> 0) & 0xff;
+	ucontrol->value.iec958.status[1] = (status >> 8) & 0xff;
+	ucontrol->value.iec958.status[2] = (status >> 16) & 0xff;
+	ucontrol->value.iec958.status[3] = (status >> 24) & 0xff;
+
+	return 0;
+}
+
+static int ct_spdif_get(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	unsigned int status = 0;
+
+	atc->spdif_out_get_status(atc, &status);
+	ucontrol->value.iec958.status[0] = (status >> 0) & 0xff;
+	ucontrol->value.iec958.status[1] = (status >> 8) & 0xff;
+	ucontrol->value.iec958.status[2] = (status >> 16) & 0xff;
+	ucontrol->value.iec958.status[3] = (status >> 24) & 0xff;
+
+	return 0;
+}
+
+static int ct_spdif_put(struct snd_kcontrol *kcontrol,
+			struct snd_ctl_elem_value *ucontrol)
+{
+	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
+	int change = 1;
+	unsigned int status = 0, old_status = 0;
+
+	status = (ucontrol->value.iec958.status[0] << 0) |
+		 (ucontrol->value.iec958.status[1] << 8) |
+		 (ucontrol->value.iec958.status[2] << 16) |
+		 (ucontrol->value.iec958.status[3] << 24);
+
+	atc->spdif_out_get_status(atc, &old_status);
+	change = (old_status != status);
+	if (change)
+		atc->spdif_out_set_status(atc, status);
+
+	return change;
+}
+
+static struct snd_kcontrol_new iec958_mask_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READ,
+	.iface		= SNDRV_CTL_ELEM_IFACE_PCM,
+	.name		= SNDRV_CTL_NAME_IEC958("", PLAYBACK, MASK),
+	.count		= 1,
+	.info		= ct_spdif_info,
+	.get		= ct_spdif_get_mask,
+	.private_value	= MIXER_IEC958_MASK
+};
+
+static struct snd_kcontrol_new iec958_default_ctl = {
+	.iface		= SNDRV_CTL_ELEM_IFACE_PCM,
+	.name		= SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT),
+	.count		= 1,
+	.info		= ct_spdif_info,
+	.get		= ct_spdif_default_get,
+	.put		= ct_spdif_put,
+	.private_value	= MIXER_IEC958_DEFAULT
+};
+
+static struct snd_kcontrol_new iec958_ctl = {
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.iface		= SNDRV_CTL_ELEM_IFACE_PCM,
+	.name		= SNDRV_CTL_NAME_IEC958("", PLAYBACK, PCM_STREAM),
+	.count		= 1,
+	.info		= ct_spdif_info,
+	.get		= ct_spdif_get,
+	.put		= ct_spdif_put,
+	.private_value	= MIXER_IEC958_STREAM
+};
+
+#define NUM_IEC958_CTL 3
+
+static int
+ct_mixer_kcontrol_new(struct ct_mixer *mixer, struct snd_kcontrol_new *new)
+{
+	struct snd_kcontrol *kctl = NULL;
+	int err = 0;
+
+	kctl = snd_ctl_new1(new, mixer->atc);
+	if (NULL == kctl)
+		return -ENOMEM;
+
+	if (SNDRV_CTL_ELEM_IFACE_PCM == kctl->id.iface)
+		kctl->id.device = IEC958;
+
+	err = snd_ctl_add(mixer->atc->card, kctl);
+	if (err)
+		return err;
+
+	switch (new->private_value) {
+	case MIXER_LINEIN_C_S:
+		kctls[0] = kctl; break;
+	case MIXER_MIC_C_S:
+		kctls[1] = kctl; break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ct_mixer_kcontrols_create(struct ct_mixer *mixer)
+{
+	enum CTALSA_MIXER_CTL type = 0;
+	struct ct_atc *atc = mixer->atc;
+	int err = 0;
+
+	/* Create snd kcontrol instances on demand */
+	for (type = VOL_MIXER_START; type <= VOL_MIXER_END; type++) {
+		if (ct_kcontrol_init_table[type].ctl) {
+			vol_ctl.name = ct_kcontrol_init_table[type].name;
+			vol_ctl.private_value = (unsigned long)type;
+			err = ct_mixer_kcontrol_new(mixer, &vol_ctl);
+			if (err)
+				return err;
+		}
+	}
+
+	ct_kcontrol_init_table[MIXER_DIGITAL_IO_S].ctl =
+					atc->have_digit_io_switch(atc);
+	for (type = SWH_MIXER_START; type <= SWH_MIXER_END; type++) {
+		if (ct_kcontrol_init_table[type].ctl) {
+			swh_ctl.name = ct_kcontrol_init_table[type].name;
+			swh_ctl.private_value = (unsigned long)type;
+			err = ct_mixer_kcontrol_new(mixer, &swh_ctl);
+			if (err)
+				return err;
+		}
+	}
+
+	err = ct_mixer_kcontrol_new(mixer, &iec958_mask_ctl);
+	if (err)
+		return err;
+
+	err = ct_mixer_kcontrol_new(mixer, &iec958_default_ctl);
+	if (err)
+		return err;
+
+	err = ct_mixer_kcontrol_new(mixer, &iec958_ctl);
+	if (err)
+		return err;
+
+	atc->line_front_unmute(atc, 1);
+	set_switch_state(mixer, MIXER_WAVEF_P_S, 1);
+	atc->line_surround_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_WAVES_P_S, 0);
+	atc->line_clfe_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_WAVEC_P_S, 0);
+	atc->line_rear_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_WAVER_P_S, 0);
+	atc->spdif_out_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_SPDIFO_P_S, 0);
+	atc->line_in_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_LINEIN_P_S, 0);
+	atc->spdif_in_unmute(atc, 0);
+	set_switch_state(mixer, MIXER_SPDIFI_P_S, 0);
+
+	set_switch_state(mixer, MIXER_PCM_C_S, 1);
+	set_switch_state(mixer, MIXER_LINEIN_C_S, 1);
+	set_switch_state(mixer, MIXER_SPDIFI_C_S, 1);
+
+	return 0;
+}
+
+static void
+ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
+{
+	struct amixer *amix_d = NULL;
+	struct sum *sum_c = NULL;
+	int i = 0;
+
+	for (i = 0; i < 2; i++) {
+		amix_d = mixer->amixers[type*CHN_NUM+i];
+		sum_c = mixer->sums[SUM_IN_F_C*CHN_NUM+i];
+		amix_d->ops->set_sum(amix_d, sum_c);
+		amix_d->ops->commit_write(amix_d);
+	}
+}
+
+static void
+ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
+{
+	struct amixer *amix_d = NULL;
+	int i = 0;
+
+	for (i = 0; i < 2; i++) {
+		amix_d = mixer->amixers[type*CHN_NUM+i];
+		amix_d->ops->set_sum(amix_d, NULL);
+		amix_d->ops->commit_write(amix_d);
+	}
+}
+
+static int ct_mixer_get_resources(struct ct_mixer *mixer)
+{
+	struct sum_mgr *sum_mgr = NULL;
+	struct sum *sum = NULL;
+	struct sum_desc sum_desc = {0};
+	struct amixer_mgr *amixer_mgr = NULL;
+	struct amixer *amixer = NULL;
+	struct amixer_desc am_desc = {0};
+	int err = 0;
+	int i = 0;
+
+	/* Allocate sum resources for mixer obj */
+	sum_mgr = (struct sum_mgr *)mixer->atc->rsc_mgrs[SUM];
+	sum_desc.msr = mixer->atc->msr;
+	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
+		err = sum_mgr->get_sum(sum_mgr, &sum_desc, &sum);
+		if (err) {
+			printk(KERN_ERR "Failed to get sum resources for "
+					  "front output!\n");
+			break;
+		}
+		mixer->sums[i] = sum;
+	}
+	if (err)
+		goto error1;
+
+	/* Allocate amixer resources for mixer obj */
+	amixer_mgr = (struct amixer_mgr *)mixer->atc->rsc_mgrs[AMIXER];
+	am_desc.msr = mixer->atc->msr;
+	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
+		err = amixer_mgr->get_amixer(amixer_mgr, &am_desc, &amixer);
+		if (err) {
+			printk(KERN_ERR "Failed to get amixer resources for "
+					  "mixer obj!\n");
+			break;
+		}
+		mixer->amixers[i] = amixer;
+	}
+	if (err)
+		goto error2;
+
+	return 0;
+
+error2:
+	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
+		if (NULL != mixer->amixers[i]) {
+			amixer = mixer->amixers[i];
+			amixer_mgr->put_amixer(amixer_mgr, amixer);
+			mixer->amixers[i] = NULL;
+		}
+	}
+error1:
+	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
+		if (NULL != mixer->sums[i]) {
+			sum_mgr->put_sum(sum_mgr, (struct sum *)mixer->sums[i]);
+			mixer->sums[i] = NULL;
+		}
+	}
+
+	return err;
+}
+
+static int ct_mixer_get_mem(struct ct_mixer **rmixer)
+{
+	struct ct_mixer *mixer = NULL;
+	int err = 0;
+
+	*rmixer = NULL;
+	/* Allocate mem for mixer obj */
+	mixer = kzalloc(sizeof(*mixer), GFP_KERNEL);
+	if (NULL == mixer)
+		return -ENOMEM;
+
+	mixer->amixers = kzalloc(sizeof(void *)*(NUM_CT_AMIXERS*CHN_NUM),
+				 GFP_KERNEL);
+	if (NULL == mixer->amixers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+	mixer->sums = kzalloc(sizeof(void *)*(NUM_CT_SUMS*CHN_NUM), GFP_KERNEL);
+	if (NULL == mixer->sums) {
+		err = -ENOMEM;
+		goto error2;
+	}
+
+	*rmixer = mixer;
+	return 0;
+
+error2:
+	kfree(mixer->amixers);
+error1:
+	kfree(mixer);
+	return err;
+}
+
+static int ct_mixer_topology_build(struct ct_mixer *mixer)
+{
+	struct sum *sum = NULL;
+	struct amixer *amix_d = NULL, *amix_s = NULL;
+	enum CT_AMIXER_CTL i = 0, j = 0;
+
+	/* Build topology from destination to source */
+
+	/* Set up Master mixer */
+	for (i = AMIXER_MASTER_F, j = SUM_IN_F;
+					i <= AMIXER_MASTER_S; i++, j++) {
+		amix_d = mixer->amixers[i*CHN_NUM];
+		sum = mixer->sums[j*CHN_NUM];
+		amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+		amix_d = mixer->amixers[i*CHN_NUM+1];
+		sum = mixer->sums[j*CHN_NUM+1];
+		amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+	}
+
+	/* Set up Wave-out mixer */
+	for (i = AMIXER_WAVE_F, j = AMIXER_MASTER_F;
+					i <= AMIXER_WAVE_S; i++, j++) {
+		amix_d = mixer->amixers[i*CHN_NUM];
+		amix_s = mixer->amixers[j*CHN_NUM];
+		amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+		amix_d = mixer->amixers[i*CHN_NUM+1];
+		amix_s = mixer->amixers[j*CHN_NUM+1];
+		amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+	}
+
+	/* Set up S/PDIF-out mixer */
+	amix_d = mixer->amixers[AMIXER_SPDIFO*CHN_NUM];
+	amix_s = mixer->amixers[AMIXER_MASTER_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+	amix_d = mixer->amixers[AMIXER_SPDIFO*CHN_NUM+1];
+	amix_s = mixer->amixers[AMIXER_MASTER_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, &amix_s->rsc, INIT_VOL, NULL);
+
+	/* Set up PCM-in mixer */
+	for (i = AMIXER_PCM_F, j = SUM_IN_F; i <= AMIXER_PCM_S; i++, j++) {
+		amix_d = mixer->amixers[i*CHN_NUM];
+		sum = mixer->sums[j*CHN_NUM];
+		amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+		amix_d = mixer->amixers[i*CHN_NUM+1];
+		sum = mixer->sums[j*CHN_NUM+1];
+		amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	}
+
+	/* Set up Line-in mixer */
+	amix_d = mixer->amixers[AMIXER_LINEIN*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_LINEIN*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Mic-in mixer */
+	amix_d = mixer->amixers[AMIXER_MIC*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_MIC*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up S/PDIF-in mixer */
+	amix_d = mixer->amixers[AMIXER_SPDIFI*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_SPDIFI*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Master recording mixer */
+	amix_d = mixer->amixers[AMIXER_MASTER_F_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+	amix_d = mixer->amixers[AMIXER_MASTER_F_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, &sum->rsc, INIT_VOL, NULL);
+
+	/* Set up PCM-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_PCM_F_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_PCM_F_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Line-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_LINEIN_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_LINEIN_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up Mic-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_MIC_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_MIC_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	/* Set up S/PDIF-in recording mixer */
+	amix_d = mixer->amixers[AMIXER_SPDIFI_C*CHN_NUM];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+	amix_d = mixer->amixers[AMIXER_SPDIFI_C*CHN_NUM+1];
+	sum = mixer->sums[SUM_IN_F_C*CHN_NUM+1];
+	amix_d->ops->setup(amix_d, NULL, INIT_VOL, sum);
+
+	return 0;
+}
+
+static int mixer_set_input_port(struct amixer *amixer, struct rsc *rsc)
+{
+	amixer->ops->set_input(amixer, rsc);
+	amixer->ops->commit_write(amixer);
+
+	return 0;
+}
+
+static enum CT_AMIXER_CTL port_to_amixer(enum MIXER_PORT_T type)
+{
+	switch (type) {
+	case MIX_WAVE_FRONT:	return AMIXER_WAVE_F;
+	case MIX_WAVE_SURROUND:	return AMIXER_WAVE_S;
+	case MIX_WAVE_CENTLFE:	return AMIXER_WAVE_C;
+	case MIX_WAVE_REAR:	return AMIXER_WAVE_R;
+	case MIX_PCMO_FRONT:	return AMIXER_MASTER_F_C;
+	case MIX_SPDIF_OUT:	return AMIXER_SPDIFO;
+	case MIX_LINE_IN:	return AMIXER_LINEIN;
+	case MIX_MIC_IN:	return AMIXER_MIC;
+	case MIX_SPDIF_IN:	return AMIXER_SPDIFI;
+	case MIX_PCMI_FRONT:	return AMIXER_PCM_F;
+	case MIX_PCMI_SURROUND:	return AMIXER_PCM_S;
+	case MIX_PCMI_CENTLFE:	return AMIXER_PCM_C;
+	case MIX_PCMI_REAR:	return AMIXER_PCM_R;
+	default: 		return 0;
+	}
+}
+
+static int mixer_get_output_ports(struct ct_mixer *mixer,
+				  enum MIXER_PORT_T type,
+				  struct rsc **rleft, struct rsc **rright)
+{
+	enum CT_AMIXER_CTL amix = port_to_amixer(type);
+
+	if (NULL != rleft)
+		*rleft = &((struct amixer *)mixer->amixers[amix*CHN_NUM])->rsc;
+
+	if (NULL != rright)
+		*rright =
+			&((struct amixer *)mixer->amixers[amix*CHN_NUM+1])->rsc;
+
+	return 0;
+}
+
+static int mixer_set_input_left(struct ct_mixer *mixer,
+				enum MIXER_PORT_T type, struct rsc *rsc)
+{
+	enum CT_AMIXER_CTL amix = port_to_amixer(type);
+
+	mixer_set_input_port(mixer->amixers[amix*CHN_NUM], rsc);
+	amix = get_recording_amixer(amix);
+	if (amix < NUM_CT_AMIXERS)
+		mixer_set_input_port(mixer->amixers[amix*CHN_NUM], rsc);
+
+	return 0;
+}
+
+static int
+mixer_set_input_right(struct ct_mixer *mixer,
+		      enum MIXER_PORT_T type, struct rsc *rsc)
+{
+	enum CT_AMIXER_CTL amix = port_to_amixer(type);
+
+	mixer_set_input_port(mixer->amixers[amix*CHN_NUM+1], rsc);
+	amix = get_recording_amixer(amix);
+	if (amix < NUM_CT_AMIXERS)
+		mixer_set_input_port(mixer->amixers[amix*CHN_NUM+1], rsc);
+
+	return 0;
+}
+
+int ct_mixer_destroy(struct ct_mixer *mixer)
+{
+	struct sum_mgr *sum_mgr = (struct sum_mgr *)mixer->atc->rsc_mgrs[SUM];
+	struct amixer_mgr *amixer_mgr =
+			(struct amixer_mgr *)mixer->atc->rsc_mgrs[AMIXER];
+	struct amixer *amixer = NULL;
+	int i = 0;
+
+	/* Release amixer resources */
+	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
+		if (NULL != mixer->amixers[i]) {
+			amixer = mixer->amixers[i];
+			amixer_mgr->put_amixer(amixer_mgr, amixer);
+		}
+	}
+
+	/* Release sum resources */
+	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
+		if (NULL != mixer->sums[i])
+			sum_mgr->put_sum(sum_mgr, (struct sum *)mixer->sums[i]);
+	}
+
+	/* Release mem assigned to mixer object */
+	kfree(mixer->sums);
+	kfree(mixer->amixers);
+	kfree(mixer);
+
+	return 0;
+}
+
+int ct_mixer_create(struct ct_atc *atc, struct ct_mixer **rmixer)
+{
+	struct ct_mixer *mixer = NULL;
+	int err = 0;
+
+	*rmixer = NULL;
+
+	/* Allocate mem for mixer obj */
+	err = ct_mixer_get_mem(&mixer);
+	if (err)
+		return err;
+
+	mixer->switch_state = 0;
+	mixer->atc = atc;
+	/* Set operations */
+	mixer->get_output_ports = mixer_get_output_ports;
+	mixer->set_input_left = mixer_set_input_left;
+	mixer->set_input_right = mixer_set_input_right;
+
+	/* Allocate chip resources for mixer obj */
+	err = ct_mixer_get_resources(mixer);
+	if (err)
+		goto error;
+
+	/* Build internal mixer topology */
+	ct_mixer_topology_build(mixer);
+
+	*rmixer = mixer;
+
+	return 0;
+
+error:
+	ct_mixer_destroy(mixer);
+	return err;
+}
+
+int ct_alsa_mix_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name)
+{
+	int err = 0;
+
+	/* Create snd kcontrol instances on demand */
+	vol_ctl.device = swh_ctl.device = device;
+	err = ct_mixer_kcontrols_create((struct ct_mixer *)atc->mixer);
+	if (err)
+		return err;
+
+	strcpy(atc->card->mixername, device_name);
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctmixer.h b/sound/pci/ctxfi/ctmixer.h
new file mode 100644
index 0000000..e2d96eb
--- /dev/null
+++ b/sound/pci/ctxfi/ctmixer.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctmixer.h
+ *
+ * @Brief
+ * This file contains the definition of the mixer device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	Mar 28 2008
+ *
+ */
+
+#ifndef CTMIXER_H
+#define CTMIXER_H
+
+#include "ctatc.h"
+#include "ctresource.h"
+
+#define INIT_VOL	0x1c00
+
+enum MIXER_PORT_T {
+	MIX_WAVE_FRONT,
+	MIX_WAVE_REAR,
+	MIX_WAVE_CENTLFE,
+	MIX_WAVE_SURROUND,
+	MIX_SPDIF_OUT,
+	MIX_PCMO_FRONT,
+	MIX_MIC_IN,
+	MIX_LINE_IN,
+	MIX_SPDIF_IN,
+	MIX_PCMI_FRONT,
+	MIX_PCMI_REAR,
+	MIX_PCMI_CENTLFE,
+	MIX_PCMI_SURROUND,
+
+	NUM_MIX_PORTS
+};
+
+/* alsa mixer descriptor */
+struct ct_mixer {
+	struct ct_atc *atc;
+
+	void **amixers;		/* amixer resources for volume control */
+	void **sums;		/* sum resources for signal collection */
+	unsigned int switch_state; /* A bit-map to indicate state of switches */
+
+	int (*get_output_ports)(struct ct_mixer *mixer, enum MIXER_PORT_T type,
+				  struct rsc **rleft, struct rsc **rright);
+
+	int (*set_input_left)(struct ct_mixer *mixer,
+			      enum MIXER_PORT_T type, struct rsc *rsc);
+	int (*set_input_right)(struct ct_mixer *mixer,
+			       enum MIXER_PORT_T type, struct rsc *rsc);
+};
+
+int ct_alsa_mix_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name);
+int ct_mixer_create(struct ct_atc *atc, struct ct_mixer **rmixer);
+int ct_mixer_destroy(struct ct_mixer *mixer);
+
+#endif /* CTMIXER_H */
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
new file mode 100644
index 0000000..73d4fdb
--- /dev/null
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -0,0 +1,499 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctpcm.c
+ *
+ * @Brief
+ * This file contains the definition of the pcm device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	Apr 2 2008
+ *
+ */
+
+#include "ctpcm.h"
+#include <sound/pcm.h>
+
+/* Hardware descriptions for playback */
+static struct snd_pcm_hardware ct_pcm_playback_hw = {
+	.info			= (SNDRV_PCM_INFO_MMAP |
+				   SNDRV_PCM_INFO_INTERLEAVED |
+				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				   SNDRV_PCM_INFO_MMAP_VALID |
+				   SNDRV_PCM_INFO_PAUSE),
+	.formats		= (SNDRV_PCM_FMTBIT_U8 |
+				   SNDRV_PCM_FMTBIT_S8 |
+				   SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_U16_LE |
+				   SNDRV_PCM_FMTBIT_S24_3LE |
+				   SNDRV_PCM_FMTBIT_S24_LE |
+				   SNDRV_PCM_FMTBIT_S32_LE),
+	.rates			= (SNDRV_PCM_RATE_CONTINUOUS |
+				   SNDRV_PCM_RATE_8000_192000),
+	.rate_min		= 8000,
+	.rate_max		= 192000,
+	.channels_min		= 1,
+	.channels_max		= 2,
+	.buffer_bytes_max	= (128*1024),
+	.period_bytes_min	= (64),
+	.period_bytes_max	= (128*1024),
+	.periods_min		= 1,
+	.periods_max		= 1024,
+	.fifo_size		= 0,
+};
+
+static struct snd_pcm_hardware ct_spdif_passthru_playback_hw = {
+	.info			= (SNDRV_PCM_INFO_MMAP |
+				   SNDRV_PCM_INFO_INTERLEAVED |
+				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				   SNDRV_PCM_INFO_MMAP_VALID |
+				   SNDRV_PCM_INFO_PAUSE),
+	.formats		= (SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_U16_LE),
+	.rates			= (SNDRV_PCM_RATE_48000 |
+				   SNDRV_PCM_RATE_44100 |
+				   SNDRV_PCM_RATE_32000),
+	.rate_min		= 32000,
+	.rate_max		= 48000,
+	.channels_min		= 2,
+	.channels_max		= 2,
+	.buffer_bytes_max	= (128*1024),
+	.period_bytes_min	= (64),
+	.period_bytes_max	= (128*1024),
+	.periods_min		= 1,
+	.periods_max		= 1024,
+	.fifo_size		= 0,
+};
+
+/* Hardware descriptions for capture */
+static struct snd_pcm_hardware ct_pcm_capture_hw = {
+	.info			= (SNDRV_PCM_INFO_MMAP |
+				   SNDRV_PCM_INFO_INTERLEAVED |
+				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				   SNDRV_PCM_INFO_PAUSE |
+				   SNDRV_PCM_INFO_MMAP_VALID),
+	.formats		= (SNDRV_PCM_FMTBIT_U8 |
+				   SNDRV_PCM_FMTBIT_S8 |
+				   SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_U16_LE |
+				   SNDRV_PCM_FMTBIT_S24_3LE |
+				   SNDRV_PCM_FMTBIT_S24_LE |
+				   SNDRV_PCM_FMTBIT_S32_LE),
+	.rates			= (SNDRV_PCM_RATE_CONTINUOUS |
+				   SNDRV_PCM_RATE_8000_96000),
+	.rate_min		= 8000,
+	.rate_max		= 96000,
+	.channels_min		= 1,
+	.channels_max		= 2,
+	.buffer_bytes_max	= (128*1024),
+	.period_bytes_min	= (384),
+	.period_bytes_max	= (64*1024),
+	.periods_min		= 2,
+	.periods_max		= 1024,
+	.fifo_size		= 0,
+};
+
+static void ct_atc_pcm_interrupt(struct ct_atc_pcm *atc_pcm)
+{
+	struct ct_atc_pcm *apcm = atc_pcm;
+
+	if (NULL == apcm->substream)
+		return;
+
+	snd_pcm_period_elapsed(apcm->substream);
+}
+
+static void ct_atc_pcm_free_substream(struct snd_pcm_runtime *runtime)
+{
+	struct ct_atc_pcm *apcm = runtime->private_data;
+	struct ct_atc *atc = snd_pcm_substream_chip(apcm->substream);
+
+	atc->pcm_release_resources(atc, apcm);
+	kfree(apcm);
+	runtime->private_data = NULL;
+}
+
+/* pcm playback operations */
+static int ct_pcm_playback_open(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm;
+	int err;
+
+	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
+	if (NULL == apcm)
+		return -ENOMEM;
+
+	spin_lock_init(&apcm->timer_lock);
+	apcm->stop_timer = 0;
+	apcm->substream = substream;
+	apcm->interrupt = ct_atc_pcm_interrupt;
+	runtime->private_data = apcm;
+	runtime->private_free = ct_atc_pcm_free_substream;
+	if (IEC958 == substream->pcm->device) {
+		runtime->hw = ct_spdif_passthru_playback_hw;
+		atc->spdif_out_passthru(atc, 1);
+	} else {
+		runtime->hw = ct_pcm_playback_hw;
+		if (FRONT == substream->pcm->device)
+			runtime->hw.channels_max = 8;
+	}
+
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+					   1024, UINT_MAX);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+
+	return 0;
+}
+
+static int ct_pcm_playback_close(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+
+	/* TODO: Notify mixer inactive. */
+	if (IEC958 == substream->pcm->device)
+		atc->spdif_out_passthru(atc, 0);
+
+	/* The ct_atc_pcm object will be freed by runtime->private_free */
+
+	return 0;
+}
+
+static int ct_pcm_hw_params(struct snd_pcm_substream *substream,
+				     struct snd_pcm_hw_params *hw_params)
+{
+	return snd_pcm_lib_malloc_pages(substream,
+					params_buffer_bytes(hw_params));
+}
+
+static int ct_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	/* Free snd-allocated pages */
+	return snd_pcm_lib_free_pages(substream);
+}
+
+static void ct_pcm_timer_callback(unsigned long data)
+{
+	struct ct_atc_pcm *apcm = (struct ct_atc_pcm *)data;
+	struct snd_pcm_substream *substream = apcm->substream;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	unsigned int period_size = runtime->period_size;
+	unsigned int buffer_size = runtime->buffer_size;
+	unsigned long flags;
+	unsigned int position = 0, dist = 0, interval = 0;
+
+	position = substream->ops->pointer(substream);
+	dist = (position + buffer_size - apcm->position) % buffer_size;
+	if ((dist >= period_size) ||
+		(position/period_size != apcm->position/period_size)) {
+		apcm->interrupt(apcm);
+		apcm->position = position;
+	}
+	/* Add extra HZ*5/1000 to avoid overrun issue when recording
+	 * at 8kHz in 8-bit format or at 88kHz in 24-bit format. */
+	interval = ((period_size - (position % period_size))
+		   * HZ + (runtime->rate - 1)) / runtime->rate + HZ * 5 / 1000;
+	spin_lock_irqsave(&apcm->timer_lock, flags);
+	apcm->timer.expires = jiffies + interval;
+	if (!apcm->stop_timer)
+		add_timer(&apcm->timer);
+
+	spin_unlock_irqrestore(&apcm->timer_lock, flags);
+}
+
+static int ct_pcm_timer_prepare(struct ct_atc_pcm *apcm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&apcm->timer_lock, flags);
+	if (timer_pending(&apcm->timer)) {
+		/* The timer has already been started. */
+		spin_unlock_irqrestore(&apcm->timer_lock, flags);
+		return 0;
+	}
+
+	init_timer(&apcm->timer);
+	apcm->timer.data = (unsigned long)apcm;
+	apcm->timer.function = ct_pcm_timer_callback;
+	spin_unlock_irqrestore(&apcm->timer_lock, flags);
+	apcm->position = 0;
+
+	return 0;
+}
+
+static int ct_pcm_timer_start(struct ct_atc_pcm *apcm)
+{
+	struct snd_pcm_runtime *runtime = apcm->substream->runtime;
+	unsigned long flags;
+
+	spin_lock_irqsave(&apcm->timer_lock, flags);
+	if (timer_pending(&apcm->timer)) {
+		/* The timer has already been started. */
+		spin_unlock_irqrestore(&apcm->timer_lock, flags);
+		return 0;
+	}
+
+	apcm->timer.expires = jiffies + (runtime->period_size * HZ +
+				(runtime->rate - 1)) / runtime->rate;
+	apcm->stop_timer = 0;
+	add_timer(&apcm->timer);
+	spin_unlock_irqrestore(&apcm->timer_lock, flags);
+
+	return 0;
+}
+
+static int ct_pcm_timer_stop(struct ct_atc_pcm *apcm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&apcm->timer_lock, flags);
+	apcm->stop_timer = 1;
+	del_timer(&apcm->timer);
+	spin_unlock_irqrestore(&apcm->timer_lock, flags);
+
+	try_to_del_timer_sync(&apcm->timer);
+
+	return 0;
+}
+
+static int ct_pcm_playback_prepare(struct snd_pcm_substream *substream)
+{
+	int err;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	if (IEC958 == substream->pcm->device)
+		err = atc->spdif_passthru_playback_prepare(atc, apcm);
+	else
+		err = atc->pcm_playback_prepare(atc, apcm);
+
+	if (err < 0) {
+		printk(KERN_ERR "Preparing pcm playback failed!!!\n");
+		return err;
+	}
+
+	ct_pcm_timer_prepare(apcm);
+
+	return 0;
+}
+
+static int
+ct_pcm_playback_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		atc->pcm_playback_start(atc, apcm);
+		ct_pcm_timer_start(apcm);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		ct_pcm_timer_stop(apcm);
+		atc->pcm_playback_stop(atc, apcm);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static snd_pcm_uframes_t
+ct_pcm_playback_pointer(struct snd_pcm_substream *substream)
+{
+	unsigned long position;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	/* Read out playback position */
+	position = atc->pcm_playback_position(atc, apcm);
+	position = bytes_to_frames(runtime, position);
+	return position;
+}
+
+/* pcm capture operations */
+static int ct_pcm_capture_open(struct snd_pcm_substream *substream)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm;
+	int err;
+
+	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
+	if (NULL == apcm)
+		return -ENOMEM;
+
+	spin_lock_init(&apcm->timer_lock);
+	apcm->started = 0;
+	apcm->stop_timer = 0;
+	apcm->substream = substream;
+	apcm->interrupt = ct_atc_pcm_interrupt;
+	runtime->private_data = apcm;
+	runtime->private_free = ct_atc_pcm_free_substream;
+	runtime->hw = ct_pcm_capture_hw;
+	runtime->hw.rate_max = atc->rsr * atc->msr;
+
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+					   1024, UINT_MAX);
+	if (err < 0) {
+		kfree(apcm);
+		return err;
+	}
+
+	return 0;
+}
+
+static int ct_pcm_capture_close(struct snd_pcm_substream *substream)
+{
+	/* The ct_atc_pcm object will be freed by runtime->private_free */
+	/* TODO: Notify mixer inactive. */
+	return 0;
+}
+
+static int ct_pcm_capture_prepare(struct snd_pcm_substream *substream)
+{
+	int err;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	err = atc->pcm_capture_prepare(atc, apcm);
+	if (err < 0) {
+		printk(KERN_ERR "Preparing pcm capture failed!!!\n");
+		return err;
+	}
+
+	ct_pcm_timer_prepare(apcm);
+
+	return 0;
+}
+
+static int
+ct_pcm_capture_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		atc->pcm_capture_start(atc, apcm);
+		ct_pcm_timer_start(apcm);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+		ct_pcm_timer_stop(apcm);
+		atc->pcm_capture_stop(atc, apcm);
+		break;
+	default:
+		ct_pcm_timer_stop(apcm);
+		atc->pcm_capture_stop(atc, apcm);
+		break;
+	}
+
+	return 0;
+}
+
+static snd_pcm_uframes_t
+ct_pcm_capture_pointer(struct snd_pcm_substream *substream)
+{
+	unsigned long position;
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = runtime->private_data;
+
+	/* Read out playback position */
+	position = atc->pcm_capture_position(atc, apcm);
+	position = bytes_to_frames(runtime, position);
+	return position;
+}
+
+/* PCM operators for playback */
+static struct snd_pcm_ops ct_pcm_playback_ops = {
+	.open	 	= ct_pcm_playback_open,
+	.close		= ct_pcm_playback_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= ct_pcm_hw_params,
+	.hw_free	= ct_pcm_hw_free,
+	.prepare	= ct_pcm_playback_prepare,
+	.trigger	= ct_pcm_playback_trigger,
+	.pointer	= ct_pcm_playback_pointer,
+};
+
+/* PCM operators for capture */
+static struct snd_pcm_ops ct_pcm_capture_ops = {
+	.open	 	= ct_pcm_capture_open,
+	.close		= ct_pcm_capture_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= ct_pcm_hw_params,
+	.hw_free	= ct_pcm_hw_free,
+	.prepare	= ct_pcm_capture_prepare,
+	.trigger	= ct_pcm_capture_trigger,
+	.pointer	= ct_pcm_capture_pointer,
+};
+
+/* Create ALSA pcm device */
+int ct_alsa_pcm_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name)
+{
+	struct snd_pcm *pcm;
+	int err;
+	int playback_count, capture_count;
+	char name[128];
+
+	strncpy(name, device_name, sizeof(name));
+	playback_count = (IEC958 == device) ? 1 : 8;
+	capture_count = (FRONT == device) ? 1 : 0;
+	err = snd_pcm_new(atc->card, name, device,
+			  playback_count, capture_count, &pcm);
+	if (err < 0) {
+		printk(KERN_ERR "snd_pcm_new failed!! Err=%d\n", err);
+		return err;
+	}
+
+	pcm->private_data = atc;
+	pcm->info_flags = 0;
+	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
+	strcpy(pcm->name, device_name);
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &ct_pcm_playback_ops);
+
+	if (FRONT == device)
+		snd_pcm_set_ops(pcm,
+				SNDRV_PCM_STREAM_CAPTURE, &ct_pcm_capture_ops);
+
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+			snd_dma_pci_data(atc->pci), 128*1024, 128*1024);
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctpcm.h b/sound/pci/ctxfi/ctpcm.h
new file mode 100644
index 0000000..178da0d
--- /dev/null
+++ b/sound/pci/ctxfi/ctpcm.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctpcm.h
+ *
+ * @Brief
+ * This file contains the definition of the pcm device functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	Mar 28 2008
+ *
+ */
+
+#ifndef CTPCM_H
+#define CTPCM_H
+
+#include "ctatc.h"
+
+int ct_alsa_pcm_create(struct ct_atc *atc,
+		       enum CTALSADEVS device,
+		       const char *device_name);
+
+#endif /* CTPCM_H */
diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c
new file mode 100644
index 0000000..414fc23
--- /dev/null
+++ b/sound/pci/ctxfi/ctresource.c
@@ -0,0 +1,297 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctresource.c
+ *
+ * @Brief
+ * This file contains the implementation of some generic helper functions.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 15 2008
+ *
+ */
+
+#include "ctresource.h"
+#include "cthardware.h"
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#define AUDIO_SLOT_BLOCK_NUM 	256
+
+/* Resource allocation based on bit-map management mechanism */
+static int
+get_resource(u8 *rscs, unsigned int amount,
+	     unsigned int multi, unsigned int *ridx)
+{
+	int i = 0, j = 0, k = 0, n = 0;
+
+	/* Check whether there are sufficient resources to meet request. */
+	for (i = 0, n = multi; i < amount; i++) {
+		j = i / 8;
+		k = i % 8;
+		if (rscs[j] & ((u8)1 << k)) {
+			n = multi;
+			continue;
+		}
+		if (!(--n))
+			break; /* found sufficient contiguous resources */
+	}
+
+	if (i >= amount) {
+		/* Can not find sufficient contiguous resources */
+		return -ENOENT;
+	}
+
+	/* Mark the contiguous bits in resource bit-map as used */
+	for (n = multi; n > 0; n--) {
+		j = i / 8;
+		k = i % 8;
+		rscs[j] |= ((u8)1 << k);
+		i--;
+	}
+
+	*ridx = i + 1;
+
+	return 0;
+}
+
+static int put_resource(u8 *rscs, unsigned int multi, unsigned int idx)
+{
+	unsigned int i = 0, j = 0, k = 0, n = 0;
+
+	/* Mark the contiguous bits in resource bit-map as used */
+	for (n = multi, i = idx; n > 0; n--) {
+		j = i / 8;
+		k = i % 8;
+		rscs[j] &= ~((u8)1 << k);
+		i++;
+	}
+
+	return 0;
+}
+
+int mgr_get_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int *ridx)
+{
+	int err = 0;
+
+	if (n > mgr->avail)
+		return -ENOENT;
+
+	err = get_resource(mgr->rscs, mgr->amount, n, ridx);
+	if (!err)
+		mgr->avail -= n;
+
+	return err;
+}
+
+int mgr_put_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int idx)
+{
+	put_resource(mgr->rscs, n, idx);
+	mgr->avail += n;
+
+	return 0;
+}
+
+static unsigned char offset_in_audio_slot_block[NUM_RSCTYP] = {
+	/* SRC channel is at Audio Ring slot 1 every 16 slots. */
+	[SRC]		= 0x1,
+	[AMIXER]	= 0x4,
+	[SUM]		= 0xc,
+};
+
+static int rsc_index(const struct rsc *rsc)
+{
+    return rsc->conj;
+}
+
+static int audio_ring_slot(const struct rsc *rsc)
+{
+    return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type];
+}
+
+static int rsc_next_conj(struct rsc *rsc)
+{
+	unsigned int i;
+	for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); )
+		i++;
+	rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i);
+	return rsc->conj;
+}
+
+static int rsc_master(struct rsc *rsc)
+{
+	return rsc->conj = rsc->idx;
+}
+
+static struct rsc_ops rsc_generic_ops = {
+	.index		= rsc_index,
+	.output_slot	= audio_ring_slot,
+	.master		= rsc_master,
+	.next_conj	= rsc_next_conj,
+};
+
+int rsc_init(struct rsc *rsc, u32 idx, enum RSCTYP type, u32 msr, void *hw)
+{
+	int err = 0;
+
+	rsc->idx = idx;
+	rsc->conj = idx;
+	rsc->type = type;
+	rsc->msr = msr;
+	rsc->hw = hw;
+	rsc->ops = &rsc_generic_ops;
+	if (NULL == hw) {
+		rsc->ctrl_blk = NULL;
+		return 0;
+	}
+
+	switch (type) {
+	case SRC:
+		err = ((struct hw *)hw)->src_rsc_get_ctrl_blk(&rsc->ctrl_blk);
+		break;
+	case AMIXER:
+		err = ((struct hw *)hw)->
+				amixer_rsc_get_ctrl_blk(&rsc->ctrl_blk);
+		break;
+	case SRCIMP:
+	case SUM:
+	case DAIO:
+		break;
+	default:
+		printk(KERN_ERR "Invalid resource type value %d!\n", type);
+		return -EINVAL;
+	}
+
+	if (err) {
+		printk(KERN_ERR "Failed to get resource control block!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+int rsc_uninit(struct rsc *rsc)
+{
+	if ((NULL != rsc->hw) && (NULL != rsc->ctrl_blk)) {
+		switch (rsc->type) {
+		case SRC:
+			((struct hw *)rsc->hw)->
+				src_rsc_put_ctrl_blk(rsc->ctrl_blk);
+			break;
+		case AMIXER:
+			((struct hw *)rsc->hw)->
+				amixer_rsc_put_ctrl_blk(rsc->ctrl_blk);
+			break;
+		case SUM:
+		case DAIO:
+			break;
+		default:
+			printk(KERN_ERR "Invalid resource type value %d!\n",
+					  rsc->type);
+			break;
+		}
+
+		rsc->hw = rsc->ctrl_blk = NULL;
+	}
+
+	rsc->idx = rsc->conj = 0;
+	rsc->type = NUM_RSCTYP;
+	rsc->msr = 0;
+
+	return 0;
+}
+
+int rsc_mgr_init(struct rsc_mgr *mgr, enum RSCTYP type,
+		 unsigned int amount, void *hw_obj)
+{
+	int err = 0;
+	struct hw *hw = hw_obj;
+
+	mgr->type = NUM_RSCTYP;
+
+	mgr->rscs = kzalloc(((amount + 8 - 1) / 8), GFP_KERNEL);
+	if (NULL == mgr->rscs)
+		return -ENOMEM;
+
+	switch (type) {
+	case SRC:
+		err = hw->src_mgr_get_ctrl_blk(&mgr->ctrl_blk);
+		break;
+	case SRCIMP:
+		err = hw->srcimp_mgr_get_ctrl_blk(&mgr->ctrl_blk);
+		break;
+	case AMIXER:
+		err = hw->amixer_mgr_get_ctrl_blk(&mgr->ctrl_blk);
+		break;
+	case DAIO:
+		err = hw->daio_mgr_get_ctrl_blk(hw, &mgr->ctrl_blk);
+		break;
+	case SUM:
+		break;
+	default:
+		printk(KERN_ERR "Invalid resource type value %d!\n", type);
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (err) {
+		printk(KERN_ERR "Failed to get manager control block!\n");
+		goto error;
+	}
+
+	mgr->type = type;
+	mgr->avail = mgr->amount = amount;
+	mgr->hw = hw;
+
+	return 0;
+
+error:
+	kfree(mgr->rscs);
+	return err;
+}
+
+int rsc_mgr_uninit(struct rsc_mgr *mgr)
+{
+	if (NULL != mgr->rscs) {
+		kfree(mgr->rscs);
+		mgr->rscs = NULL;
+	}
+
+	if ((NULL != mgr->hw) && (NULL != mgr->ctrl_blk)) {
+		switch (mgr->type) {
+		case SRC:
+			((struct hw *)mgr->hw)->
+				src_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case SRCIMP:
+			((struct hw *)mgr->hw)->
+				srcimp_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case AMIXER:
+			((struct hw *)mgr->hw)->
+				amixer_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case DAIO:
+			((struct hw *)mgr->hw)->
+				daio_mgr_put_ctrl_blk(mgr->ctrl_blk);
+			break;
+		case SUM:
+			break;
+		default:
+			printk(KERN_ERR "Invalid resource type value %d!\n",
+					  mgr->type);
+			break;
+		}
+
+		mgr->hw = mgr->ctrl_blk = NULL;
+	}
+
+	mgr->type = NUM_RSCTYP;
+	mgr->avail = mgr->amount = 0;
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctresource.h b/sound/pci/ctxfi/ctresource.h
new file mode 100644
index 0000000..0838c2e
--- /dev/null
+++ b/sound/pci/ctxfi/ctresource.h
@@ -0,0 +1,72 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctresource.h
+ *
+ * @Brief
+ * This file contains the definition of generic hardware resources for
+ * resource management.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTRESOURCE_H
+#define CTRESOURCE_H
+
+#include <linux/types.h>
+
+enum RSCTYP {
+	SRC,
+	SRCIMP,
+	AMIXER,
+	SUM,
+	DAIO,
+	NUM_RSCTYP	/* This must be the last one and less than 16 */
+};
+
+struct rsc_ops;
+
+struct rsc {
+	u32 idx:12;	/* The index of a resource */
+	u32 type:4;	/* The type (RSCTYP) of a resource */
+	u32 conj:12;	/* Current conjugate index */
+	u32 msr:4;	/* The Master Sample Rate a resource working on */
+	void *ctrl_blk;	/* Chip specific control info block for a resource */
+	void *hw;	/* Chip specific object for hardware access means */
+	struct rsc_ops *ops;	/* Generic resource operations */
+};
+
+struct rsc_ops {
+	int (*master)(struct rsc *rsc);	/* Move to master resource */
+	int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
+	int (*index)(const struct rsc *rsc); /* Return the index of resource */
+	/* Return the output slot number */
+	int (*output_slot)(const struct rsc *rsc);
+};
+
+int rsc_init(struct rsc *rsc, u32 idx, enum RSCTYP type, u32 msr, void *hw);
+int rsc_uninit(struct rsc *rsc);
+
+struct rsc_mgr {
+	enum RSCTYP type; /* The type (RSCTYP) of resource to manage */
+	unsigned int amount; /* The total amount of a kind of resource */
+	unsigned int avail; /* The amount of currently available resources */
+	unsigned char *rscs; /* The bit-map for resource allocation */
+	void *ctrl_blk; /* Chip specific control info block */
+	void *hw; /* Chip specific object for hardware access */
+};
+
+/* Resource management is based on bit-map mechanism */
+int rsc_mgr_init(struct rsc_mgr *mgr, enum RSCTYP type,
+		 unsigned int amount, void *hw);
+int rsc_mgr_uninit(struct rsc_mgr *mgr);
+int mgr_get_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int *ridx);
+int mgr_put_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int idx);
+
+#endif /* CTRESOURCE_H */
diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c
new file mode 100644
index 0000000..d3e0ad5
--- /dev/null
+++ b/sound/pci/ctxfi/ctsrc.c
@@ -0,0 +1,886 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctsrc.c
+ *
+ * @Brief
+ * This file contains the implementation of the Sample Rate Convertor
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#include "ctsrc.h"
+#include "cthardware.h"
+#include <linux/slab.h>
+
+#define SRC_RESOURCE_NUM	64
+#define SRCIMP_RESOURCE_NUM	256
+
+static unsigned int conj_mask;
+
+static int src_default_config_memrd(struct src *src);
+static int src_default_config_memwr(struct src *src);
+static int src_default_config_arcrw(struct src *src);
+
+static int (*src_default_config[3])(struct src *) = {
+	[MEMRD] = src_default_config_memrd,
+	[MEMWR] = src_default_config_memwr,
+	[ARCRW] = src_default_config_arcrw
+};
+
+static int src_set_state(struct src *src, unsigned int state)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_state(src->rsc.ctrl_blk, state);
+
+	return 0;
+}
+
+static int src_set_bm(struct src *src, unsigned int bm)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_bm(src->rsc.ctrl_blk, bm);
+
+	return 0;
+}
+
+static int src_set_sf(struct src *src, unsigned int sf)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_sf(src->rsc.ctrl_blk, sf);
+
+	return 0;
+}
+
+static int src_set_pm(struct src *src, unsigned int pm)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_pm(src->rsc.ctrl_blk, pm);
+
+	return 0;
+}
+
+static int src_set_rom(struct src *src, unsigned int rom)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_rom(src->rsc.ctrl_blk, rom);
+
+	return 0;
+}
+
+static int src_set_vo(struct src *src, unsigned int vo)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_vo(src->rsc.ctrl_blk, vo);
+
+	return 0;
+}
+
+static int src_set_st(struct src *src, unsigned int st)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_st(src->rsc.ctrl_blk, st);
+
+	return 0;
+}
+
+static int src_set_bp(struct src *src, unsigned int bp)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_bp(src->rsc.ctrl_blk, bp);
+
+	return 0;
+}
+
+static int src_set_cisz(struct src *src, unsigned int cisz)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_cisz(src->rsc.ctrl_blk, cisz);
+
+	return 0;
+}
+
+static int src_set_ca(struct src *src, unsigned int ca)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_ca(src->rsc.ctrl_blk, ca);
+
+	return 0;
+}
+
+static int src_set_sa(struct src *src, unsigned int sa)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_sa(src->rsc.ctrl_blk, sa);
+
+	return 0;
+}
+
+static int src_set_la(struct src *src, unsigned int la)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_la(src->rsc.ctrl_blk, la);
+
+	return 0;
+}
+
+static int src_set_pitch(struct src *src, unsigned int pitch)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_pitch(src->rsc.ctrl_blk, pitch);
+
+	return 0;
+}
+
+static int src_set_clear_zbufs(struct src *src)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	return 0;
+}
+
+static int src_commit_write(struct src *src)
+{
+	struct hw *hw = NULL;
+	int i = 0;
+	unsigned int dirty = 0;
+
+	hw = (struct hw *)src->rsc.hw;
+	src->rsc.ops->master(&src->rsc);
+	if (src->rsc.msr > 1) {
+		/* Save dirty flags for conjugate resource programming */
+		dirty = hw->src_get_dirty(src->rsc.ctrl_blk) & conj_mask;
+	}
+	hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+
+	/* Program conjugate parameter mixer resources */
+	if (MEMWR == src->mode)
+		return 0;
+
+	for (i = 1; i < src->rsc.msr; i++) {
+		src->rsc.ops->next_conj(&src->rsc);
+		hw->src_set_dirty(src->rsc.ctrl_blk, dirty);
+		hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+							src->rsc.ctrl_blk);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_get_ca(struct src *src)
+{
+	struct hw *hw = NULL;
+
+	hw = (struct hw *)src->rsc.hw;
+	return hw->src_get_ca(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+}
+
+static int src_init(struct src *src)
+{
+	src_default_config[src->mode](src);
+
+	return 0;
+}
+
+static struct src *src_next_interleave(struct src *src)
+{
+	return src->intlv;
+}
+
+static int src_default_config_memrd(struct src *src)
+{
+	struct hw *hw = src->rsc.hw;
+	unsigned int rsr = 0, msr = 0;
+
+	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
+	hw->src_set_bm(src->rsc.ctrl_blk, 1);
+	for (rsr = 0, msr = src->rsc.msr; msr > 1; msr >>= 1)
+		rsr++;
+
+	hw->src_set_rsr(src->rsc.ctrl_blk, rsr);
+	hw->src_set_sf(src->rsc.ctrl_blk, SRC_SF_S16);
+	hw->src_set_wr(src->rsc.ctrl_blk, 0);
+	hw->src_set_pm(src->rsc.ctrl_blk, 0);
+	hw->src_set_rom(src->rsc.ctrl_blk, 0);
+	hw->src_set_vo(src->rsc.ctrl_blk, 0);
+	hw->src_set_st(src->rsc.ctrl_blk, 0);
+	hw->src_set_ilsz(src->rsc.ctrl_blk, src->multi - 1);
+	hw->src_set_cisz(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_sa(src->rsc.ctrl_blk, 0x0);
+	hw->src_set_la(src->rsc.ctrl_blk, 0x1000);
+	hw->src_set_ca(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	src->rsc.ops->master(&src->rsc);
+	hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+
+	for (msr = 1; msr < src->rsc.msr; msr++) {
+		src->rsc.ops->next_conj(&src->rsc);
+		hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+		hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+							src->rsc.ctrl_blk);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_default_config_memwr(struct src *src)
+{
+	struct hw *hw = src->rsc.hw;
+
+	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
+	hw->src_set_bm(src->rsc.ctrl_blk, 1);
+	hw->src_set_rsr(src->rsc.ctrl_blk, 0);
+	hw->src_set_sf(src->rsc.ctrl_blk, SRC_SF_S16);
+	hw->src_set_wr(src->rsc.ctrl_blk, 1);
+	hw->src_set_pm(src->rsc.ctrl_blk, 0);
+	hw->src_set_rom(src->rsc.ctrl_blk, 0);
+	hw->src_set_vo(src->rsc.ctrl_blk, 0);
+	hw->src_set_st(src->rsc.ctrl_blk, 0);
+	hw->src_set_ilsz(src->rsc.ctrl_blk, 0);
+	hw->src_set_cisz(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_sa(src->rsc.ctrl_blk, 0x0);
+	hw->src_set_la(src->rsc.ctrl_blk, 0x1000);
+	hw->src_set_ca(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	src->rsc.ops->master(&src->rsc);
+	hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+						src->rsc.ctrl_blk);
+
+	return 0;
+}
+
+static int src_default_config_arcrw(struct src *src)
+{
+	struct hw *hw = src->rsc.hw;
+	unsigned int rsr = 0, msr = 0;
+	unsigned int dirty;
+
+	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
+	hw->src_set_bm(src->rsc.ctrl_blk, 0);
+	for (rsr = 0, msr = src->rsc.msr; msr > 1; msr >>= 1)
+		rsr++;
+
+	hw->src_set_rsr(src->rsc.ctrl_blk, rsr);
+	hw->src_set_sf(src->rsc.ctrl_blk, SRC_SF_F32);
+	hw->src_set_wr(src->rsc.ctrl_blk, 0);
+	hw->src_set_pm(src->rsc.ctrl_blk, 0);
+	hw->src_set_rom(src->rsc.ctrl_blk, 0);
+	hw->src_set_vo(src->rsc.ctrl_blk, 0);
+	hw->src_set_st(src->rsc.ctrl_blk, 0);
+	hw->src_set_ilsz(src->rsc.ctrl_blk, 0);
+	hw->src_set_cisz(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_sa(src->rsc.ctrl_blk, 0x0);
+	/*hw->src_set_sa(src->rsc.ctrl_blk, 0x100);*/
+	hw->src_set_la(src->rsc.ctrl_blk, 0x1000);
+	/*hw->src_set_la(src->rsc.ctrl_blk, 0x03ffffe0);*/
+	hw->src_set_ca(src->rsc.ctrl_blk, 0x80);
+	hw->src_set_pitch(src->rsc.ctrl_blk, 0x1000000);
+	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
+
+	dirty = hw->src_get_dirty(src->rsc.ctrl_blk);
+	src->rsc.ops->master(&src->rsc);
+	for (msr = 0; msr < src->rsc.msr; msr++) {
+		hw->src_set_dirty(src->rsc.ctrl_blk, dirty);
+		hw->src_commit_write(hw, src->rsc.ops->index(&src->rsc),
+							src->rsc.ctrl_blk);
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static struct src_rsc_ops src_rsc_ops = {
+	.set_state		= src_set_state,
+	.set_bm			= src_set_bm,
+	.set_sf			= src_set_sf,
+	.set_pm			= src_set_pm,
+	.set_rom		= src_set_rom,
+	.set_vo			= src_set_vo,
+	.set_st			= src_set_st,
+	.set_bp			= src_set_bp,
+	.set_cisz		= src_set_cisz,
+	.set_ca			= src_set_ca,
+	.set_sa			= src_set_sa,
+	.set_la			= src_set_la,
+	.set_pitch		= src_set_pitch,
+	.set_clr_zbufs		= src_set_clear_zbufs,
+	.commit_write		= src_commit_write,
+	.get_ca			= src_get_ca,
+	.init			= src_init,
+	.next_interleave	= src_next_interleave,
+};
+
+static int
+src_rsc_init(struct src *src, u32 idx,
+	     const struct src_desc *desc, struct src_mgr *mgr)
+{
+	int err = 0;
+	int i = 0, n = 0;
+	struct src *p;
+
+	n = (MEMRD == desc->mode) ? desc->multi : 1;
+	for (i = 0, p = src; i < n; i++, p++) {
+		err = rsc_init(&p->rsc, idx + i, SRC, desc->msr, mgr->mgr.hw);
+		if (err)
+			goto error1;
+
+		/* Initialize src specific rsc operations */
+		p->ops = &src_rsc_ops;
+		p->multi = (0 == i) ? desc->multi : 1;
+		p->mode = desc->mode;
+		src_default_config[desc->mode](p);
+		mgr->src_enable(mgr, p);
+		p->intlv = p + 1;
+	}
+	(--p)->intlv = NULL;	/* Set @intlv of the last SRC to NULL */
+
+	mgr->commit_write(mgr);
+
+	return 0;
+
+error1:
+	for (i--, p--; i >= 0; i--, p--) {
+		mgr->src_disable(mgr, p);
+		rsc_uninit(&p->rsc);
+	}
+	mgr->commit_write(mgr);
+	return err;
+}
+
+static int src_rsc_uninit(struct src *src, struct src_mgr *mgr)
+{
+	int i = 0, n = 0;
+	struct src *p;
+
+	n = (MEMRD == src->mode) ? src->multi : 1;
+	for (i = 0, p = src; i < n; i++, p++) {
+		mgr->src_disable(mgr, p);
+		rsc_uninit(&p->rsc);
+		p->multi = 0;
+		p->ops = NULL;
+		p->mode = NUM_SRCMODES;
+		p->intlv = NULL;
+	}
+	mgr->commit_write(mgr);
+
+	return 0;
+}
+
+static int
+get_src_rsc(struct src_mgr *mgr, const struct src_desc *desc, struct src **rsrc)
+{
+	unsigned int idx = SRC_RESOURCE_NUM;
+	int err = 0;
+	struct src *src = NULL;
+	unsigned long flags;
+
+	*rsrc = NULL;
+
+	/* Check whether there are sufficient src resources to meet request. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	if (MEMRD == desc->mode)
+		err = mgr_get_resource(&mgr->mgr, desc->multi, &idx);
+	else
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "Can't meet SRC resource request!\n");
+		return err;
+	}
+
+	/* Allocate mem for master src resource */
+	if (MEMRD == desc->mode)
+		src = kzalloc(sizeof(*src)*desc->multi, GFP_KERNEL);
+	else
+		src = kzalloc(sizeof(*src), GFP_KERNEL);
+
+	if (NULL == src) {
+		err = -ENOMEM;
+		goto error1;
+	}
+
+	err = src_rsc_init(src, idx, desc, mgr);
+	if (err)
+		goto error2;
+
+	*rsrc = src;
+
+	return 0;
+
+error2:
+	kfree(src);
+error1:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	if (MEMRD == desc->mode)
+		mgr_put_resource(&mgr->mgr, desc->multi, idx);
+	else
+		mgr_put_resource(&mgr->mgr, 1, idx);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	return err;
+}
+
+static int put_src_rsc(struct src_mgr *mgr, struct src *src)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	src->rsc.ops->master(&src->rsc);
+	if (MEMRD == src->mode)
+		mgr_put_resource(&mgr->mgr, src->multi,
+				 src->rsc.ops->index(&src->rsc));
+	else
+		mgr_put_resource(&mgr->mgr, 1, src->rsc.ops->index(&src->rsc));
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	src_rsc_uninit(src, mgr);
+	kfree(src);
+
+	return 0;
+}
+
+static int src_enable_s(struct src_mgr *mgr, struct src *src)
+{
+	struct hw *hw = mgr->mgr.hw;
+	int i = 0;
+
+	src->rsc.ops->master(&src->rsc);
+	for (i = 0; i < src->rsc.msr; i++) {
+		hw->src_mgr_enbs_src(mgr->mgr.ctrl_blk,
+				     src->rsc.ops->index(&src->rsc));
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_enable(struct src_mgr *mgr, struct src *src)
+{
+	struct hw *hw = mgr->mgr.hw;
+	int i = 0;
+
+	src->rsc.ops->master(&src->rsc);
+	for (i = 0; i < src->rsc.msr; i++) {
+		hw->src_mgr_enb_src(mgr->mgr.ctrl_blk,
+				    src->rsc.ops->index(&src->rsc));
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_disable(struct src_mgr *mgr, struct src *src)
+{
+	struct hw *hw = mgr->mgr.hw;
+	int i = 0;
+
+	src->rsc.ops->master(&src->rsc);
+	for (i = 0; i < src->rsc.msr; i++) {
+		hw->src_mgr_dsb_src(mgr->mgr.ctrl_blk,
+				    src->rsc.ops->index(&src->rsc));
+		src->rsc.ops->next_conj(&src->rsc);
+	}
+	src->rsc.ops->master(&src->rsc);
+
+	return 0;
+}
+
+static int src_mgr_commit_write(struct src_mgr *mgr)
+{
+	struct hw *hw = mgr->mgr.hw;
+
+	hw->src_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
+
+	return 0;
+}
+
+int src_mgr_create(void *hw, struct src_mgr **rsrc_mgr)
+{
+	int err = 0, i = 0;
+	struct src_mgr *src_mgr;
+
+	*rsrc_mgr = NULL;
+	src_mgr = kzalloc(sizeof(*src_mgr), GFP_KERNEL);
+	if (NULL == src_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&src_mgr->mgr, SRC, SRC_RESOURCE_NUM, hw);
+	if (err)
+		goto error1;
+
+	spin_lock_init(&src_mgr->mgr_lock);
+	conj_mask = ((struct hw *)hw)->src_dirty_conj_mask();
+
+	src_mgr->get_src = get_src_rsc;
+	src_mgr->put_src = put_src_rsc;
+	src_mgr->src_enable_s = src_enable_s;
+	src_mgr->src_enable = src_enable;
+	src_mgr->src_disable = src_disable;
+	src_mgr->commit_write = src_mgr_commit_write;
+
+	/* Disable all SRC resources. */
+	for (i = 0; i < 256; i++)
+		((struct hw *)hw)->src_mgr_dsb_src(src_mgr->mgr.ctrl_blk, i);
+
+	((struct hw *)hw)->src_mgr_commit_write(hw, src_mgr->mgr.ctrl_blk);
+
+	*rsrc_mgr = src_mgr;
+
+	return 0;
+
+error1:
+	kfree(src_mgr);
+	return err;
+}
+
+int src_mgr_destroy(struct src_mgr *src_mgr)
+{
+	rsc_mgr_uninit(&src_mgr->mgr);
+	kfree(src_mgr);
+
+	return 0;
+}
+
+/* SRCIMP resource manager operations */
+
+static int srcimp_master(struct rsc *rsc)
+{
+	rsc->conj = 0;
+	return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
+}
+
+static int srcimp_next_conj(struct rsc *rsc)
+{
+	rsc->conj++;
+	return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
+}
+
+static int srcimp_index(const struct rsc *rsc)
+{
+	return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
+}
+
+static struct rsc_ops srcimp_basic_rsc_ops = {
+	.master		= srcimp_master,
+	.next_conj	= srcimp_next_conj,
+	.index		= srcimp_index,
+	.output_slot	= NULL,
+};
+
+static int srcimp_map(struct srcimp *srcimp, struct src *src, struct rsc *input)
+{
+	struct imapper *entry = NULL;
+	int i = 0;
+
+	srcimp->rsc.ops->master(&srcimp->rsc);
+	src->rsc.ops->master(&src->rsc);
+	input->ops->master(input);
+
+	/* Program master and conjugate resources */
+	for (i = 0; i < srcimp->rsc.msr; i++) {
+		entry = &srcimp->imappers[i];
+		entry->slot = input->ops->output_slot(input);
+		entry->user = src->rsc.ops->index(&src->rsc);
+		entry->addr = srcimp->rsc.ops->index(&srcimp->rsc);
+		srcimp->mgr->imap_add(srcimp->mgr, entry);
+		srcimp->mapped |= (0x1 << i);
+
+		srcimp->rsc.ops->next_conj(&srcimp->rsc);
+		input->ops->next_conj(input);
+	}
+
+	srcimp->rsc.ops->master(&srcimp->rsc);
+	input->ops->master(input);
+
+	return 0;
+}
+
+static int srcimp_unmap(struct srcimp *srcimp)
+{
+	int i = 0;
+
+	/* Program master and conjugate resources */
+	for (i = 0; i < srcimp->rsc.msr; i++) {
+		if (srcimp->mapped & (0x1 << i)) {
+			srcimp->mgr->imap_delete(srcimp->mgr,
+						 &srcimp->imappers[i]);
+			srcimp->mapped &= ~(0x1 << i);
+		}
+	}
+
+	return 0;
+}
+
+static struct srcimp_rsc_ops srcimp_ops = {
+	.map = srcimp_map,
+	.unmap = srcimp_unmap
+};
+
+static int srcimp_rsc_init(struct srcimp *srcimp,
+			   const struct srcimp_desc *desc,
+			   struct srcimp_mgr *mgr)
+{
+	int err = 0;
+
+	err = rsc_init(&srcimp->rsc, srcimp->idx[0],
+		       SRCIMP, desc->msr, mgr->mgr.hw);
+	if (err)
+		return err;
+
+	/* Reserve memory for imapper nodes */
+	srcimp->imappers = kzalloc(sizeof(struct imapper)*desc->msr,
+				   GFP_KERNEL);
+	if (NULL == srcimp->imappers) {
+		err = -ENOMEM;
+		goto error1;
+	}
+
+	/* Set srcimp specific operations */
+	srcimp->rsc.ops = &srcimp_basic_rsc_ops;
+	srcimp->ops = &srcimp_ops;
+	srcimp->mgr = mgr;
+
+	srcimp->rsc.ops->master(&srcimp->rsc);
+
+	return 0;
+
+error1:
+	rsc_uninit(&srcimp->rsc);
+	return err;
+}
+
+static int srcimp_rsc_uninit(struct srcimp *srcimp)
+{
+	if (NULL != srcimp->imappers) {
+		kfree(srcimp->imappers);
+		srcimp->imappers = NULL;
+	}
+	srcimp->ops = NULL;
+	srcimp->mgr = NULL;
+	rsc_uninit(&srcimp->rsc);
+
+	return 0;
+}
+
+static int get_srcimp_rsc(struct srcimp_mgr *mgr,
+			  const struct srcimp_desc *desc,
+			  struct srcimp **rsrcimp)
+{
+	int err = 0, i = 0;
+	unsigned int idx = 0;
+	struct srcimp *srcimp = NULL;
+	unsigned long flags;
+
+	*rsrcimp = NULL;
+
+	/* Allocate mem for SRCIMP resource */
+	srcimp = kzalloc(sizeof(*srcimp), GFP_KERNEL);
+	if (NULL == srcimp) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Check whether there are sufficient SRCIMP resources. */
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < desc->msr; i++) {
+		err = mgr_get_resource(&mgr->mgr, 1, &idx);
+		if (err)
+			break;
+
+		srcimp->idx[i] = idx;
+	}
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	if (err) {
+		printk(KERN_ERR "Can't meet SRCIMP resource request!\n");
+		goto error1;
+	}
+
+	err = srcimp_rsc_init(srcimp, desc, mgr);
+	if (err)
+		goto error1;
+
+	*rsrcimp = srcimp;
+
+	return 0;
+
+error1:
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i--; i >= 0; i--)
+		mgr_put_resource(&mgr->mgr, 1, srcimp->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	kfree(srcimp);
+	return err;
+}
+
+static int put_srcimp_rsc(struct srcimp_mgr *mgr, struct srcimp *srcimp)
+{
+	unsigned long flags;
+	int i = 0;
+
+	spin_lock_irqsave(&mgr->mgr_lock, flags);
+	for (i = 0; i < srcimp->rsc.msr; i++)
+		mgr_put_resource(&mgr->mgr, 1, srcimp->idx[i]);
+
+	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
+	srcimp_rsc_uninit(srcimp);
+	kfree(srcimp);
+
+	return 0;
+}
+
+static int srcimp_map_op(void *data, struct imapper *entry)
+{
+	struct rsc_mgr *mgr = &((struct srcimp_mgr *)data)->mgr;
+	struct hw *hw = mgr->hw;
+
+	hw->srcimp_mgr_set_imaparc(mgr->ctrl_blk, entry->slot);
+	hw->srcimp_mgr_set_imapuser(mgr->ctrl_blk, entry->user);
+	hw->srcimp_mgr_set_imapnxt(mgr->ctrl_blk, entry->next);
+	hw->srcimp_mgr_set_imapaddr(mgr->ctrl_blk, entry->addr);
+	hw->srcimp_mgr_commit_write(mgr->hw, mgr->ctrl_blk);
+
+	return 0;
+}
+
+static int srcimp_imap_add(struct srcimp_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	if ((0 == entry->addr) && (mgr->init_imap_added)) {
+		input_mapper_delete(&mgr->imappers,
+				    mgr->init_imap, srcimp_map_op, mgr);
+		mgr->init_imap_added = 0;
+	}
+	err = input_mapper_add(&mgr->imappers, entry, srcimp_map_op, mgr);
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+static int srcimp_imap_delete(struct srcimp_mgr *mgr, struct imapper *entry)
+{
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&mgr->imap_lock, flags);
+	err = input_mapper_delete(&mgr->imappers, entry, srcimp_map_op, mgr);
+	if (list_empty(&mgr->imappers)) {
+		input_mapper_add(&mgr->imappers, mgr->init_imap,
+				 srcimp_map_op, mgr);
+		mgr->init_imap_added = 1;
+	}
+	spin_unlock_irqrestore(&mgr->imap_lock, flags);
+
+	return err;
+}
+
+int srcimp_mgr_create(void *hw, struct srcimp_mgr **rsrcimp_mgr)
+{
+	int err = 0;
+	struct srcimp_mgr *srcimp_mgr;
+	struct imapper *entry;
+
+	*rsrcimp_mgr = NULL;
+	srcimp_mgr = kzalloc(sizeof(*srcimp_mgr), GFP_KERNEL);
+	if (NULL == srcimp_mgr)
+		return -ENOMEM;
+
+	err = rsc_mgr_init(&srcimp_mgr->mgr, SRCIMP, SRCIMP_RESOURCE_NUM, hw);
+	if (err)
+		goto error1;
+
+	spin_lock_init(&srcimp_mgr->mgr_lock);
+	spin_lock_init(&srcimp_mgr->imap_lock);
+	INIT_LIST_HEAD(&srcimp_mgr->imappers);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (NULL == entry) {
+		err = -ENOMEM;
+		goto error2;
+	}
+	entry->slot = entry->addr = entry->next = entry->user = 0;
+	list_add(&entry->list, &srcimp_mgr->imappers);
+	srcimp_mgr->init_imap = entry;
+	srcimp_mgr->init_imap_added = 1;
+
+	srcimp_mgr->get_srcimp = get_srcimp_rsc;
+	srcimp_mgr->put_srcimp = put_srcimp_rsc;
+	srcimp_mgr->imap_add = srcimp_imap_add;
+	srcimp_mgr->imap_delete = srcimp_imap_delete;
+
+	*rsrcimp_mgr = srcimp_mgr;
+
+	return 0;
+
+error2:
+	rsc_mgr_uninit(&srcimp_mgr->mgr);
+error1:
+	kfree(srcimp_mgr);
+	return err;
+}
+
+int srcimp_mgr_destroy(struct srcimp_mgr *srcimp_mgr)
+{
+	unsigned long flags;
+
+	/* free src input mapper list */
+	spin_lock_irqsave(&srcimp_mgr->imap_lock, flags);
+	free_input_mapper_list(&srcimp_mgr->imappers);
+	spin_unlock_irqrestore(&srcimp_mgr->imap_lock, flags);
+
+	rsc_mgr_uninit(&srcimp_mgr->mgr);
+	kfree(srcimp_mgr);
+
+	return 0;
+}
diff --git a/sound/pci/ctxfi/ctsrc.h b/sound/pci/ctxfi/ctsrc.h
new file mode 100644
index 0000000..259366a
--- /dev/null
+++ b/sound/pci/ctxfi/ctsrc.h
@@ -0,0 +1,149 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File	ctsrc.h
+ *
+ * @Brief
+ * This file contains the definition of the Sample Rate Convertor
+ * resource management object.
+ *
+ * @Author	Liu Chun
+ * @Date 	May 13 2008
+ *
+ */
+
+#ifndef CTSRC_H
+#define CTSRC_H
+
+#include "ctresource.h"
+#include "ctimap.h"
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#define SRC_STATE_OFF	0x0
+#define SRC_STATE_INIT	0x4
+#define SRC_STATE_RUN	0x5
+
+#define SRC_SF_U8	0x0
+#define SRC_SF_S16	0x1
+#define SRC_SF_S24	0x2
+#define SRC_SF_S32	0x3
+#define SRC_SF_F32	0x4
+
+/* Define the descriptor of a src resource */
+enum SRCMODE {
+	MEMRD,		/* Read data from host memory */
+	MEMWR,		/* Write data to host memory */
+	ARCRW,		/* Read from and write to audio ring channel */
+	NUM_SRCMODES
+};
+
+struct src_rsc_ops;
+
+struct src {
+	struct rsc rsc; /* Basic resource info */
+	struct src *intlv; /* Pointer to next interleaved SRC in a series */
+	struct src_rsc_ops *ops; /* SRC specific operations */
+	/* Number of contiguous srcs for interleaved usage */
+	unsigned char multi;
+	unsigned char mode; /* Working mode of this SRC resource */
+};
+
+struct src_rsc_ops {
+	int (*set_state)(struct src *src, unsigned int state);
+	int (*set_bm)(struct src *src, unsigned int bm);
+	int (*set_sf)(struct src *src, unsigned int sf);
+	int (*set_pm)(struct src *src, unsigned int pm);
+	int (*set_rom)(struct src *src, unsigned int rom);
+	int (*set_vo)(struct src *src, unsigned int vo);
+	int (*set_st)(struct src *src, unsigned int st);
+	int (*set_bp)(struct src *src, unsigned int bp);
+	int (*set_cisz)(struct src *src, unsigned int cisz);
+	int (*set_ca)(struct src *src, unsigned int ca);
+	int (*set_sa)(struct src *src, unsigned int sa);
+	int (*set_la)(struct src *src, unsigned int la);
+	int (*set_pitch)(struct src *src, unsigned int pitch);
+	int (*set_clr_zbufs)(struct src *src);
+	int (*commit_write)(struct src *src);
+	int (*get_ca)(struct src *src);
+	int (*init)(struct src *src);
+	struct src* (*next_interleave)(struct src *src);
+};
+
+/* Define src resource request description info */
+struct src_desc {
+	/* Number of contiguous master srcs for interleaved usage */
+	unsigned char multi;
+	unsigned char msr;
+	unsigned char mode; /* Working mode of the requested srcs */
+};
+
+/* Define src manager object */
+struct src_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+
+	 /* request src resource */
+	int (*get_src)(struct src_mgr *mgr,
+		       const struct src_desc *desc, struct src **rsrc);
+	/* return src resource */
+	int (*put_src)(struct src_mgr *mgr, struct src *src);
+	int (*src_enable_s)(struct src_mgr *mgr, struct src *src);
+	int (*src_enable)(struct src_mgr *mgr, struct src *src);
+	int (*src_disable)(struct src_mgr *mgr, struct src *src);
+	int (*commit_write)(struct src_mgr *mgr);
+};
+
+/* Define the descriptor of a SRC Input Mapper resource */
+struct srcimp_mgr;
+struct srcimp_rsc_ops;
+
+struct srcimp {
+	struct rsc rsc;
+	unsigned char idx[8];
+	struct imapper *imappers;
+	unsigned int mapped; /* A bit-map indicating which conj rsc is mapped */
+	struct srcimp_mgr *mgr;
+	struct srcimp_rsc_ops *ops;
+};
+
+struct srcimp_rsc_ops {
+	int (*map)(struct srcimp *srcimp, struct src *user, struct rsc *input);
+	int (*unmap)(struct srcimp *srcimp);
+};
+
+/* Define SRCIMP resource request description info */
+struct srcimp_desc {
+	unsigned int msr;
+};
+
+struct srcimp_mgr {
+	struct rsc_mgr mgr;	/* Basic resource manager info */
+	spinlock_t mgr_lock;
+	spinlock_t imap_lock;
+	struct list_head imappers;
+	struct imapper *init_imap;
+	unsigned int init_imap_added;
+
+	 /* request srcimp resource */
+	int (*get_srcimp)(struct srcimp_mgr *mgr,
+			  const struct srcimp_desc *desc,
+			  struct srcimp **rsrcimp);
+	/* return srcimp resource */
+	int (*put_srcimp)(struct srcimp_mgr *mgr, struct srcimp *srcimp);
+	int (*imap_add)(struct srcimp_mgr *mgr, struct imapper *entry);
+	int (*imap_delete)(struct srcimp_mgr *mgr, struct imapper *entry);
+};
+
+/* Constructor and destructor of SRC resource manager */
+int src_mgr_create(void *hw, struct src_mgr **rsrc_mgr);
+int src_mgr_destroy(struct src_mgr *src_mgr);
+/* Constructor and destructor of SRCIMP resource manager */
+int srcimp_mgr_create(void *hw, struct srcimp_mgr **rsrc_mgr);
+int srcimp_mgr_destroy(struct srcimp_mgr *srcimp_mgr);
+
+#endif /* CTSRC_H */
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
new file mode 100644
index 0000000..46ca04c
--- /dev/null
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -0,0 +1,254 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File    ctvmem.c
+ *
+ * @Brief
+ * This file contains the implementation of virtual memory management object
+ * for card device.
+ *
+ * @Author Liu Chun
+ * @Date Apr 1 2008
+ */
+
+#include "ctvmem.h"
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <asm/page.h>	/* for PAGE_SIZE macro definition */
+#include <linux/io.h>
+#include <asm/pgtable.h>
+
+#define CT_PTES_PER_PAGE (PAGE_SIZE / sizeof(void *))
+#define CT_ADDRS_PER_PAGE (CT_PTES_PER_PAGE * PAGE_SIZE)
+
+/* *
+ * Find or create vm block based on requested @size.
+ * @size must be page aligned.
+ * */
+static struct ct_vm_block *
+get_vm_block(struct ct_vm *vm, unsigned int size)
+{
+	struct ct_vm_block *block = NULL, *entry = NULL;
+	struct list_head *pos = NULL;
+
+	list_for_each(pos, &vm->unused) {
+		entry = list_entry(pos, struct ct_vm_block, list);
+		if (entry->size >= size)
+			break; /* found a block that is big enough */
+	}
+	if (pos == &vm->unused)
+		return NULL;
+
+	if (entry->size == size) {
+		/* Move the vm node from unused list to used list directly */
+		list_del(&entry->list);
+		list_add(&entry->list, &vm->used);
+		vm->size -= size;
+		return entry;
+	}
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (NULL == block)
+		return NULL;
+
+	block->addr = entry->addr;
+	block->size = size;
+	list_add(&block->list, &vm->used);
+	entry->addr += size;
+	entry->size -= size;
+	vm->size -= size;
+
+	return block;
+}
+
+static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
+{
+	struct ct_vm_block *entry = NULL, *pre_ent = NULL;
+	struct list_head *pos = NULL, *pre = NULL;
+
+	list_del(&block->list);
+	vm->size += block->size;
+
+	list_for_each(pos, &vm->unused) {
+		entry = list_entry(pos, struct ct_vm_block, list);
+		if (entry->addr >= (block->addr + block->size))
+			break; /* found a position */
+	}
+	if (pos == &vm->unused) {
+		list_add_tail(&block->list, &vm->unused);
+		entry = block;
+	} else {
+		if ((block->addr + block->size) == entry->addr) {
+			entry->addr = block->addr;
+			entry->size += block->size;
+			kfree(block);
+		} else {
+			__list_add(&block->list, pos->prev, pos);
+			entry = block;
+		}
+	}
+
+	pos = &entry->list;
+	pre = pos->prev;
+	while (pre != &vm->unused) {
+		entry = list_entry(pos, struct ct_vm_block, list);
+		pre_ent = list_entry(pre, struct ct_vm_block, list);
+		if ((pre_ent->addr + pre_ent->size) > entry->addr)
+			break;
+
+		pre_ent->size += entry->size;
+		list_del(pos);
+		kfree(entry);
+		pos = pre;
+		pre = pos->prev;
+	}
+}
+
+/* Map host addr (kmalloced/vmalloced) to device logical addr. */
+static struct ct_vm_block *
+ct_vm_map(struct ct_vm *vm, void *host_addr, int size)
+{
+	struct ct_vm_block *block = NULL;
+	unsigned long pte_start;
+	unsigned long i;
+	unsigned long pages;
+	unsigned long start_phys;
+	unsigned long *ptp;
+
+	/* do mapping */
+	if ((unsigned long)host_addr >= VMALLOC_START) {
+		printk(KERN_ERR "Fail! Not support vmalloced addr now!\n");
+		return NULL;
+	}
+
+	if (size > vm->size) {
+		printk(KERN_ERR "Fail! No sufficient device virtural "
+				  "memory space available!\n");
+		return NULL;
+	}
+
+	start_phys = (virt_to_phys(host_addr) & PAGE_MASK);
+	pages = (PAGE_ALIGN(virt_to_phys(host_addr) + size)
+			- start_phys) >> PAGE_SHIFT;
+
+	ptp = vm->ptp[0];
+
+	block = get_vm_block(vm, (pages << PAGE_SHIFT));
+	if (block == NULL) {
+		printk(KERN_ERR "No virtual memory block that is big "
+				  "enough to allocate!\n");
+		return NULL;
+	}
+
+	pte_start = (block->addr >> PAGE_SHIFT);
+	for (i = 0; i < pages; i++)
+		ptp[pte_start+i] = start_phys + (i << PAGE_SHIFT);
+
+	block->addr += (virt_to_phys(host_addr) & (~PAGE_MASK));
+	block->size = size;
+
+	return block;
+}
+
+static void ct_vm_unmap(struct ct_vm *vm, struct ct_vm_block *block)
+{
+	/* do unmapping */
+	block->size = ((block->addr + block->size + PAGE_SIZE - 1)
+			& PAGE_MASK) - (block->addr & PAGE_MASK);
+	block->addr &= PAGE_MASK;
+	put_vm_block(vm, block);
+}
+
+/* *
+ * return the host (kmalloced) addr of the @index-th device
+ * page talbe page on success, or NULL on failure.
+ * The first returned NULL indicates the termination.
+ * */
+static void *
+ct_get_ptp_virt(struct ct_vm *vm, int index)
+{
+	void *addr;
+
+	addr = (index >= CT_PTP_NUM) ? NULL : vm->ptp[index];
+
+	return addr;
+}
+
+int ct_vm_create(struct ct_vm **rvm)
+{
+	struct ct_vm *vm;
+	struct ct_vm_block *block;
+	int i;
+
+	*rvm = NULL;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (NULL == vm)
+		return -ENOMEM;
+
+	/* Allocate page table pages */
+	for (i = 0; i < CT_PTP_NUM; i++) {
+		vm->ptp[i] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (NULL == vm->ptp[i])
+			break;
+	}
+	if (!i) {
+		/* no page table pages are allocated */
+		kfree(vm);
+		return -ENOMEM;
+	}
+	vm->size = CT_ADDRS_PER_PAGE * i;
+	/* Initialise remaining ptps */
+	for (; i < CT_PTP_NUM; i++)
+		vm->ptp[i] = NULL;
+
+	vm->map = ct_vm_map;
+	vm->unmap = ct_vm_unmap;
+	vm->get_ptp_virt = ct_get_ptp_virt;
+	INIT_LIST_HEAD(&vm->unused);
+	INIT_LIST_HEAD(&vm->used);
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (NULL != block) {
+		block->addr = 0;
+		block->size = vm->size;
+		list_add(&block->list, &vm->unused);
+	}
+
+	*rvm = vm;
+	return 0;
+}
+
+/* The caller must ensure no mapping pages are being used
+ * by hardware before calling this function */
+void ct_vm_destroy(struct ct_vm *vm)
+{
+	int i;
+	struct list_head *pos = NULL;
+	struct ct_vm_block *entry = NULL;
+
+	/* free used and unused list nodes */
+	while (!list_empty(&vm->used)) {
+		pos = vm->used.next;
+		list_del(pos);
+		entry = list_entry(pos, struct ct_vm_block, list);
+		kfree(entry);
+	}
+	while (!list_empty(&vm->unused)) {
+		pos = vm->unused.next;
+		list_del(pos);
+		entry = list_entry(pos, struct ct_vm_block, list);
+		kfree(entry);
+	}
+
+	/* free allocated page table pages */
+	for (i = 0; i < CT_PTP_NUM; i++)
+		kfree(vm->ptp[i]);
+
+	vm->size = 0;
+
+	kfree(vm);
+}
diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h
new file mode 100644
index 0000000..4eb5bdd
--- /dev/null
+++ b/sound/pci/ctxfi/ctvmem.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ *
+ * @File    ctvmem.h
+ *
+ * @Brief
+ * This file contains the definition of virtual memory management object
+ * for card device.
+ *
+ * @Author Liu Chun
+ * @Date Mar 28 2008
+ */
+
+#ifndef CTVMEM_H
+#define CTVMEM_H
+
+#define CT_PTP_NUM	1	/* num of device page table pages */
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+struct ct_vm_block {
+	unsigned int addr;	/* starting logical addr of this block */
+	unsigned int size;	/* size of this device virtual mem block */
+	struct list_head list;
+};
+
+/* Virtual memory management object for card device */
+struct ct_vm {
+	void *ptp[CT_PTP_NUM];		/* Device page table pages */
+	unsigned int size;		/* Available addr space in bytes */
+	struct list_head unused;	/* List of unused blocks */
+	struct list_head used;		/* List of used blocks */
+
+	/* Map host addr (kmalloced/vmalloced) to device logical addr. */
+	struct ct_vm_block *(*map)(struct ct_vm *, void *host_addr, int size);
+	/* Unmap device logical addr area. */
+	void (*unmap)(struct ct_vm *, struct ct_vm_block *block);
+	void *(*get_ptp_virt)(struct ct_vm *vm, int index);
+};
+
+int ct_vm_create(struct ct_vm **rvm);
+void ct_vm_destroy(struct ct_vm *vm);
+
+#endif /* CTVMEM_H */
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
new file mode 100644
index 0000000..f911440
--- /dev/null
+++ b/sound/pci/ctxfi/xfi.c
@@ -0,0 +1,125 @@
+/*
+ * xfi linux driver.
+ *
+ * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/moduleparam.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include "ctatc.h"
+#include "ctdrv.h"
+
+MODULE_AUTHOR("Creative Technology Ltd");
+MODULE_DESCRIPTION("X-Fi driver version 1.03");
+MODULE_LICENSE("GPLv2");
+MODULE_SUPPORTED_DEVICE("{{Creative Labs, Sound Blaster X-Fi}");
+
+static unsigned int reference_rate = 48000;
+static unsigned int multiple = 2;
+module_param(reference_rate, uint, S_IRUGO);
+module_param(multiple, uint, S_IRUGO);
+
+static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+static struct pci_device_id ct_pci_dev_ids[] = {
+	/* only X-Fi is supported, so... */
+	{ PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K1) },
+	{ PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K2) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ct_pci_dev_ids);
+
+static int __devinit
+ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+	static int dev;
+	struct snd_card *card;
+	struct ct_atc *atc;
+	int err;
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+
+	if (!enable[dev]) {
+		dev++;
+		return -ENOENT;
+	}
+	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+	if (err)
+		return err;
+	if ((reference_rate != 48000) && (reference_rate != 44100)) {
+		printk(KERN_ERR "Invalid reference_rate value %u!!!\n"
+				"The valid values for reference_rate "
+				"are 48000 and 44100.\nValue 48000 is "
+				"assumed.\n", reference_rate);
+		reference_rate = 48000;
+	}
+	if ((multiple != 1) && (multiple != 2)) {
+		printk(KERN_ERR "Invalid multiple value %u!!!\nThe valid "
+				"values for multiple are 1 and 2.\nValue 2 "
+				"is assumed.\n", multiple);
+		multiple = 2;
+	}
+	err = ct_atc_create(card, pci, reference_rate, multiple, &atc);
+	if (err < 0)
+		goto error;
+
+	card->private_data = atc;
+
+	/* Create alsa devices supported by this card */
+	err = atc->create_alsa_devs(atc);
+	if (err < 0)
+		goto error;
+
+	strcpy(card->driver, "SB-XFi");
+	strcpy(card->shortname, "Creative X-Fi");
+	strcpy(card->longname, "Creative ALSA Driver X-Fi");
+
+	err = snd_card_register(card);
+	if (err < 0)
+		goto error;
+
+	pci_set_drvdata(pci, card);
+	dev++;
+
+	return 0;
+
+error:
+	snd_card_free(card);
+	return err;
+}
+
+static void __devexit ct_card_remove(struct pci_dev *pci)
+{
+	snd_card_free(pci_get_drvdata(pci));
+	pci_set_drvdata(pci, NULL);
+}
+
+static struct pci_driver ct_driver = {
+	.name = "SB-XFi",
+	.id_table = ct_pci_dev_ids,
+	.probe = ct_card_probe,
+	.remove = __devexit_p(ct_card_remove),
+};
+
+static int __init ct_card_init(void)
+{
+	return pci_register_driver(&ct_driver);
+}
+
+static void __exit ct_card_exit(void)
+{
+	pci_unregister_driver(&ct_driver);
+}
+
+module_init(ct_card_init)
+module_exit(ct_card_exit)
-- 
cgit v0.10.2


From 77f6bf57ba9d2c50173536dbfdacdab27cb867ca Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 14 May 2009 09:49:44 +0200
Subject: splice: fix error return code

fs/splice.c: In function 'default_file_splice_read':
fs/splice.c:566: warning: 'error' may be used uninitialized in this function

which is sort-of true.  The code will in fact return -ENOMEM instead of the
kernel_readv() return value.

Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/splice.c b/fs/splice.c
index c5e3c79..41179c0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -595,8 +595,10 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	}
 
 	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
-	if (res < 0)
+	if (res < 0) {
+		error = res;
 		goto err;
+	}
 
 	error = 0;
 	if (!res)
-- 
cgit v0.10.2


From 549b5e358d17a8c04953ed80896ce07d37722451 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 14 May 2009 17:38:46 +0900
Subject: sh: clkfwk: Add MSTP bits to SH7785 clock framework.

This plugs in all of the MSTP functions in to the clock framework,
and hands them off to the platform devices that want them.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index c499d47..64c93cb 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -36,6 +36,9 @@ struct clk {
 	unsigned long		rate;
 	unsigned long		flags;
 
+	void __iomem		*enable_reg;
+	unsigned int		enable_bit;
+
 	unsigned long		arch_flags;
 	void			*priv;
 	struct dentry		*dentry;
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 686477f..012d234 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -39,7 +39,7 @@ static DEFINE_MUTEX(clock_list_sem);
 /* Used for clocks that always have same value as the parent clock */
 unsigned long followparent_recalc(struct clk *clk)
 {
-	return clk->parent->rate;
+	return clk->parent ? clk->parent->rate : 0;
 }
 
 int clk_reparent(struct clk *child, struct clk *parent)
@@ -512,7 +512,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	char *p = s;
 
 	p += sprintf(p, "%s", c->name);
-	if (c->id > 0)
+	if (c->id >= 0)
 		sprintf(p, ":%d", c->id);
 	d = debugfs_create_dir(s, pa ? pa->dentry : clk_debugfs_root);
 	if (!d)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index b7a32dd..cf042b5 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -228,12 +228,75 @@ static struct clk *clks[] = {
 	&umem_clk,
 };
 
+static int mstpcr_clk_enable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) & ~(1 << clk->enable_bit),
+		     clk->enable_reg);
+	return 0;
+}
+
+static void mstpcr_clk_disable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) | (1 << clk->enable_bit),
+		     clk->enable_reg);
+}
+
+static struct clk_ops mstpcr_clk_ops = {
+	.enable		= mstpcr_clk_enable,
+	.disable	= mstpcr_clk_disable,
+	.recalc		= followparent_recalc,
+};
+
+#define MSTPCR0		0xffc80030
+#define MSTPCR1		0xffc80034
+
+#define CLK(_name, _id, _parent, _enable_reg,		\
+	    _enable_bit, _flags)			\
+{							\
+	.name		= _name,			\
+	.id		= _id,				\
+	.parent		= _parent,			\
+	.enable_reg	= (void __iomem *)_enable_reg,	\
+	.enable_bit	= _enable_bit,			\
+	.flags		= _flags,			\
+	.ops		= &mstpcr_clk_ops,		\
+}
+
+static struct clk mstpcr_clks[] = {
+	/* MSTPCR0 */
+	CLK("scif_fck", 5, &peripheral_clk, MSTPCR0, 29, 0),
+	CLK("scif_fck", 4, &peripheral_clk, MSTPCR0, 28, 0),
+	CLK("scif_fck", 3, &peripheral_clk, MSTPCR0, 27, 0),
+	CLK("scif_fck", 2, &peripheral_clk, MSTPCR0, 26, 0),
+	CLK("scif_fck", 1, &peripheral_clk, MSTPCR0, 25, 0),
+	CLK("scif_fck", 0, &peripheral_clk, MSTPCR0, 24, 0),
+	CLK("ssi_fck", 1, &peripheral_clk, MSTPCR0, 21, 0),
+	CLK("ssi_fck", 0, &peripheral_clk, MSTPCR0, 20, 0),
+	CLK("hac_fck", 1, &peripheral_clk, MSTPCR0, 17, 0),
+	CLK("hac_fck", 0, &peripheral_clk, MSTPCR0, 16, 0),
+	CLK("mmcif_fck", -1, &peripheral_clk, MSTPCR0, 13, 0),
+	CLK("flctl_fck", -1, &peripheral_clk, MSTPCR0, 12, 0),
+	CLK("tmu345_fck", -1, &peripheral_clk, MSTPCR0, 9, 0),
+	CLK("tmu012_fck", -1, &peripheral_clk, MSTPCR0, 8, 0),
+	CLK("siof_fck", -1, &peripheral_clk, MSTPCR0, 3, 0),
+	CLK("hspi_fck", -1, &peripheral_clk, MSTPCR0, 2, 0),
+
+	/* MSTPCR1 */
+	CLK("hudi_fck", -1, NULL, MSTPCR1, 19, 0),
+	CLK("ubc_fck", -1, NULL, MSTPCR1, 17, 0),
+	CLK("dmac_11_6_fck", -1, NULL, MSTPCR1, 5, 0),
+	CLK("dmac_5_0_fck", -1, NULL, MSTPCR1, 4, 0),
+	CLK("gdta_fck", -1, NULL, MSTPCR1, 0, 0),
+};
+
 int __init arch_clk_init(void)
 {
 	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(clks); i++)
 		ret |= clk_register(clks[i]);
+	for (i = 0; i < ARRAY_SIZE(mstpcr_clks); i++)
+		ret |= clk_register(&mstpcr_clks[i]);
 
 	return ret;
 }
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index d7e77bc..af56140 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -20,7 +20,7 @@ static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "peripheral_clk",
+	.clk = "tmu012_fck",
 	.clockevent_rating = 200,
 };
 
@@ -51,7 +51,7 @@ static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "peripheral_clk",
+	.clk = "tmu012_fck",
 	.clocksource_rating = 200,
 };
 
@@ -82,7 +82,7 @@ static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "peripheral_clk",
+	.clk = "tmu012_fck",
 };
 
 static struct resource tmu2_resources[] = {
@@ -112,7 +112,7 @@ static struct sh_timer_config tmu3_platform_data = {
 	.name = "TMU3",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
-	.clk = "peripheral_clk",
+	.clk = "tmu345_fck",
 };
 
 static struct resource tmu3_resources[] = {
@@ -142,7 +142,7 @@ static struct sh_timer_config tmu4_platform_data = {
 	.name = "TMU4",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
-	.clk = "peripheral_clk",
+	.clk = "tmu345_fck",
 };
 
 static struct resource tmu4_resources[] = {
@@ -172,7 +172,7 @@ static struct sh_timer_config tmu5_platform_data = {
 	.name = "TMU5",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
-	.clk = "peripheral_clk",
+	.clk = "tmu345_fck",
 };
 
 static struct resource tmu5_resources[] = {
@@ -204,31 +204,37 @@ static struct plat_sci_port sci_platform_data[] = {
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 40, 40, 40, 40 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffeb0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 44, 44, 44, 44 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffec0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 60, 60, 60, 60 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffed0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 61, 61, 61, 61 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffee0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 62, 62, 62, 62 },
+		.clk		= "scif_fck",
 	}, {
 		.mapbase	= 0xffef0000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
 		.irqs		= { 63, 63, 63, 63 },
+		.clk		= "scif_fck",
 	}, {
 		.flags = 0,
 	}
-- 
cgit v0.10.2


From ad3256e361663923342bff7f292dd289f794aa33 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 14 May 2009 17:40:08 +0900
Subject: sh: Provide FORCE_MAX_ZONEORDER.

Several platforms want to be able to do large physically contiguous
allocations (primarily nommu and video codecs on SH-Mobile), provide a
MAX_ORDER override for those cases.

Tested-by: Conrad Parker <conrad@metadecks.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index b900d2cd..2795618 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -21,6 +21,29 @@ config PAGE_OFFSET
 	default "0x20000000" if MMU && SUPERH64
 	default "0x00000000"
 
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	range 9 64 if PAGE_SIZE_16KB
+	default "9" if PAGE_SIZE_16KB
+	range 7 64 if PAGE_SIZE_64KB
+	default "7" if PAGE_SIZE_64KB
+	range 11 64
+	default "14" if !MMU
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  The page size is not necessarily 4KB. Keep this in mind when
+	  choosing a value for this option.
+
 config MEMORY_START
 	hex "Physical memory start address"
 	default "0x08000000"
-- 
cgit v0.10.2


From 9304d0ccf1922b9b954b6e9223d1b2bc83198ad2 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 14 May 2009 07:53:39 +0000
Subject: sh: intc tables for sh7770

This patch adds INTC tables for sh7770, thanks
goes to Paul for the first prototype version.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index b61d614..0feba41 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -12,6 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/sh_timer.h>
+#include <linux/io.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
@@ -387,6 +388,251 @@ void __init plat_early_device_setup(void)
 				   ARRAY_SIZE(sh7770_early_devices));
 }
 
+enum {
+	UNUSED = 0,
+
+	/* interrupt sources */
+	IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	IRL_HHLL, IRL_HHLH, IRL_HHHL,
+
+	IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5,
+
+	GPIO,
+	TMU0, TMU1, TMU2, TMU2_TICPI,
+	TMU3, TMU4, TMU5, TMU5_TICPI,
+	TMU6, TMU7, TMU8,
+	HAC, IPI, SPDIF, HUDI, I2C,
+	DMAC0_DMINT0, DMAC0_DMINT1, DMAC0_DMINT2,
+	I2S0, I2S1, I2S2, I2S3,
+	SRC_RX, SRC_TX, SRC_SPDIF,
+	DU, VIDEO_IN, REMOTE, YUV, USB, ATAPI, CAN, GPS, GFX2D,
+	GFX3D_MBX, GFX3D_DMAC,
+	EXBUS_ATA,
+	SPI0, SPI1,
+	SCIF089, SCIF1234, SCIF567,
+	ADC,
+	BBDMAC_0_3, BBDMAC_4_7, BBDMAC_8_10, BBDMAC_11_14,
+	BBDMAC_15_18, BBDMAC_19_22, BBDMAC_23_26, BBDMAC_27,
+	BBDMAC_28, BBDMAC_29, BBDMAC_30, BBDMAC_31,
+
+	/* interrupt groups */
+	TMU, DMAC, I2S, SRC, GFX3D, SPI, SCIF, BBDMAC,
+};
+
+static struct intc_vect vectors[] __initdata = {
+	INTC_VECT(GPIO, 0x3e0),
+	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
+	INTC_VECT(TMU2, 0x440), INTC_VECT(TMU2_TICPI, 0x460),
+	INTC_VECT(TMU3, 0x480), INTC_VECT(TMU4, 0x4a0),
+	INTC_VECT(TMU5, 0x4c0), INTC_VECT(TMU5_TICPI, 0x4e0),
+	INTC_VECT(TMU6, 0x500), INTC_VECT(TMU7, 0x520),
+	INTC_VECT(TMU8, 0x540),
+	INTC_VECT(HAC, 0x580), INTC_VECT(IPI, 0x5c0),
+	INTC_VECT(SPDIF, 0x5e0),
+	INTC_VECT(HUDI, 0x600), INTC_VECT(I2C, 0x620),
+	INTC_VECT(DMAC0_DMINT0, 0x640), INTC_VECT(DMAC0_DMINT1, 0x660),
+	INTC_VECT(DMAC0_DMINT2, 0x680),
+	INTC_VECT(I2S0, 0x6a0), INTC_VECT(I2S1, 0x6c0),
+	INTC_VECT(I2S2, 0x6e0), INTC_VECT(I2S3, 0x700),
+	INTC_VECT(SRC_RX, 0x720), INTC_VECT(SRC_TX, 0x740),
+	INTC_VECT(SRC_SPDIF, 0x760),
+	INTC_VECT(DU, 0x780), INTC_VECT(VIDEO_IN, 0x7a0),
+	INTC_VECT(REMOTE, 0x7c0), INTC_VECT(YUV, 0x7e0),
+	INTC_VECT(USB, 0x840), INTC_VECT(ATAPI, 0x860),
+	INTC_VECT(CAN, 0x880), INTC_VECT(GPS, 0x8a0),
+	INTC_VECT(GFX2D, 0x8c0),
+	INTC_VECT(GFX3D_MBX, 0x900), INTC_VECT(GFX3D_DMAC, 0x920),
+	INTC_VECT(EXBUS_ATA, 0x940),
+	INTC_VECT(SPI0, 0x960), INTC_VECT(SPI1, 0x980),
+	INTC_VECT(SCIF089, 0x9a0), INTC_VECT(SCIF1234, 0x9c0),
+	INTC_VECT(SCIF1234, 0x9e0), INTC_VECT(SCIF1234, 0xa00),
+	INTC_VECT(SCIF1234, 0xa20), INTC_VECT(SCIF567, 0xa40),
+	INTC_VECT(SCIF567, 0xa60), INTC_VECT(SCIF567, 0xa80),
+	INTC_VECT(SCIF089, 0xaa0), INTC_VECT(SCIF089, 0xac0),
+	INTC_VECT(ADC, 0xb20),
+	INTC_VECT(BBDMAC_0_3, 0xba0), INTC_VECT(BBDMAC_0_3, 0xbc0),
+	INTC_VECT(BBDMAC_0_3, 0xbe0), INTC_VECT(BBDMAC_0_3, 0xc00),
+	INTC_VECT(BBDMAC_4_7, 0xc20), INTC_VECT(BBDMAC_4_7, 0xc40),
+	INTC_VECT(BBDMAC_4_7, 0xc60), INTC_VECT(BBDMAC_4_7, 0xc80),
+	INTC_VECT(BBDMAC_8_10, 0xca0), INTC_VECT(BBDMAC_8_10, 0xcc0),
+	INTC_VECT(BBDMAC_8_10, 0xce0), INTC_VECT(BBDMAC_11_14, 0xd00),
+	INTC_VECT(BBDMAC_11_14, 0xd20), INTC_VECT(BBDMAC_11_14, 0xd40),
+	INTC_VECT(BBDMAC_11_14, 0xd60), INTC_VECT(BBDMAC_15_18, 0xd80),
+	INTC_VECT(BBDMAC_15_18, 0xda0), INTC_VECT(BBDMAC_15_18, 0xdc0),
+	INTC_VECT(BBDMAC_15_18, 0xde0), INTC_VECT(BBDMAC_19_22, 0xe00),
+	INTC_VECT(BBDMAC_19_22, 0xe20), INTC_VECT(BBDMAC_19_22, 0xe40),
+	INTC_VECT(BBDMAC_19_22, 0xe60), INTC_VECT(BBDMAC_23_26, 0xe80),
+	INTC_VECT(BBDMAC_23_26, 0xea0), INTC_VECT(BBDMAC_23_26, 0xec0),
+	INTC_VECT(BBDMAC_23_26, 0xee0), INTC_VECT(BBDMAC_27, 0xf00),
+	INTC_VECT(BBDMAC_28, 0xf20), INTC_VECT(BBDMAC_29, 0xf40),
+	INTC_VECT(BBDMAC_30, 0xf60), INTC_VECT(BBDMAC_31, 0xf80),
+};
+
+static struct intc_group groups[] __initdata = {
+	INTC_GROUP(TMU, TMU0, TMU1, TMU2, TMU2_TICPI, TMU3, TMU4, TMU5,
+		   TMU5_TICPI, TMU6, TMU7, TMU8),
+	INTC_GROUP(DMAC, DMAC0_DMINT0, DMAC0_DMINT1, DMAC0_DMINT2),
+	INTC_GROUP(I2S, I2S0, I2S1, I2S2, I2S3),
+	INTC_GROUP(SRC, SRC_RX, SRC_TX, SRC_SPDIF),
+	INTC_GROUP(GFX3D, GFX3D_MBX, GFX3D_DMAC),
+	INTC_GROUP(SPI, SPI0, SPI1),
+	INTC_GROUP(SCIF, SCIF089, SCIF1234, SCIF567),
+	INTC_GROUP(BBDMAC,
+		   BBDMAC_0_3, BBDMAC_4_7, BBDMAC_8_10, BBDMAC_11_14,
+		   BBDMAC_15_18, BBDMAC_19_22, BBDMAC_23_26, BBDMAC_27,
+		   BBDMAC_28, BBDMAC_29, BBDMAC_30, BBDMAC_31),
+};
+
+static struct intc_mask_reg mask_registers[] __initdata = {
+	{ 0xffe00040, 0xffe00044, 32, /* INT2MSKR / INT2MSKCR */
+	  { 0, BBDMAC, ADC, SCIF, SPI, EXBUS_ATA, GFX3D, GFX2D,
+	    GPS, CAN, ATAPI, USB, YUV, REMOTE, VIDEO_IN, DU, SRC, I2S,
+	    DMAC, I2C, HUDI, SPDIF, IPI, HAC, TMU, GPIO } },
+};
+
+static struct intc_prio_reg prio_registers[] __initdata = {
+	{ 0xffe00000, 0, 32, 8, /* INT2PRI0 */ { GPIO, TMU0, 0, HAC } },
+	{ 0xffe00004, 0, 32, 8, /* INT2PRI1 */ { IPI, SPDIF, HUDI, I2C } },
+	{ 0xffe00008, 0, 32, 8, /* INT2PRI2 */ { DMAC, I2S, SRC, DU } },
+	{ 0xffe0000c, 0, 32, 8, /* INT2PRI3 */ { VIDEO_IN, REMOTE, YUV, USB } },
+	{ 0xffe00010, 0, 32, 8, /* INT2PRI4 */ { ATAPI, CAN, GPS, GFX2D } },
+	{ 0xffe00014, 0, 32, 8, /* INT2PRI5 */ { 0, GFX3D, EXBUS_ATA, SPI } },
+	{ 0xffe00018, 0, 32, 8, /* INT2PRI6 */ { SCIF1234, SCIF567, SCIF089 } },
+	{ 0xffe0001c, 0, 32, 8, /* INT2PRI7 */ { ADC, 0, 0, BBDMAC_0_3 } },
+	{ 0xffe00020, 0, 32, 8, /* INT2PRI8 */
+	  { BBDMAC_4_7, BBDMAC_8_10, BBDMAC_11_14, BBDMAC_15_18 } },
+	{ 0xffe00024, 0, 32, 8, /* INT2PRI9 */
+	  { BBDMAC_19_22, BBDMAC_23_26, BBDMAC_27, BBDMAC_28 } },
+	{ 0xffe00028, 0, 32, 8, /* INT2PRI10 */
+	  { BBDMAC_29, BBDMAC_30, BBDMAC_31 } },
+	{ 0xffe0002c, 0, 32, 8, /* INT2PRI11 */
+	  { TMU1, TMU2, TMU2_TICPI, TMU3 } },
+	{ 0xffe00030, 0, 32, 8, /* INT2PRI12 */
+	  { TMU4, TMU5, TMU5_TICPI, TMU6 } },
+	{ 0xffe00034, 0, 32, 8, /* INT2PRI13 */
+	  { TMU7, TMU8 } },
+};
+
+static DECLARE_INTC_DESC(intc_desc, "sh7770", vectors, groups,
+			 mask_registers, prio_registers, NULL);
+
+/* Support for external interrupt pins in IRQ mode */
+static struct intc_vect irq_vectors[] __initdata = {
+	INTC_VECT(IRQ0, 0x240), INTC_VECT(IRQ1, 0x280),
+	INTC_VECT(IRQ2, 0x2c0), INTC_VECT(IRQ3, 0x300),
+	INTC_VECT(IRQ4, 0x340), INTC_VECT(IRQ5, 0x380),
+};
+
+static struct intc_mask_reg irq_mask_registers[] __initdata = {
+	{ 0xffd00044, 0xffd00064, 32, /* INTMSK0 / INTMSKCLR0 */
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, } },
+};
+
+static struct intc_prio_reg irq_prio_registers[] __initdata = {
+	{ 0xffd00010, 0, 32, 4, /* INTPRI */ { IRQ0, IRQ1, IRQ2, IRQ3,
+					       IRQ4, IRQ5, } },
+};
+
+static struct intc_sense_reg irq_sense_registers[] __initdata = {
+	{ 0xffd0001c, 32, 2, /* ICR1 */   { IRQ0, IRQ1, IRQ2, IRQ3,
+					    IRQ4, IRQ5, } },
+};
+
+static DECLARE_INTC_DESC(intc_irq_desc, "sh7770-irq", irq_vectors,
+			 NULL, irq_mask_registers, irq_prio_registers,
+			 irq_sense_registers);
+
+/* External interrupt pins in IRL mode */
+static struct intc_vect irl_vectors[] __initdata = {
+	INTC_VECT(IRL_LLLL, 0x200), INTC_VECT(IRL_LLLH, 0x220),
+	INTC_VECT(IRL_LLHL, 0x240), INTC_VECT(IRL_LLHH, 0x260),
+	INTC_VECT(IRL_LHLL, 0x280), INTC_VECT(IRL_LHLH, 0x2a0),
+	INTC_VECT(IRL_LHHL, 0x2c0), INTC_VECT(IRL_LHHH, 0x2e0),
+	INTC_VECT(IRL_HLLL, 0x300), INTC_VECT(IRL_HLLH, 0x320),
+	INTC_VECT(IRL_HLHL, 0x340), INTC_VECT(IRL_HLHH, 0x360),
+	INTC_VECT(IRL_HHLL, 0x380), INTC_VECT(IRL_HHLH, 0x3a0),
+	INTC_VECT(IRL_HHHL, 0x3c0),
+};
+
+static struct intc_mask_reg irl3210_mask_registers[] __initdata = {
+	{ 0xffd40080, 0xffd40084, 32, /* INTMSK2 / INTMSKCLR2 */
+	  { IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	    IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	    IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	    IRL_HHLL, IRL_HHLH, IRL_HHHL, } },
+};
+
+static struct intc_mask_reg irl7654_mask_registers[] __initdata = {
+	{ 0xffd40080, 0xffd40084, 32, /* INTMSK2 / INTMSKCLR2 */
+	  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	    IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	    IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	    IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	    IRL_HHLL, IRL_HHLH, IRL_HHHL, } },
+};
+
+static DECLARE_INTC_DESC(intc_irl7654_desc, "sh7780-irl7654", irl_vectors,
+			 NULL, irl7654_mask_registers, NULL, NULL);
+
+static DECLARE_INTC_DESC(intc_irl3210_desc, "sh7780-irl3210", irl_vectors,
+			 NULL, irl3210_mask_registers, NULL, NULL);
+
+#define INTC_ICR0	0xffd00000
+#define INTC_INTMSK0	0xffd00044
+#define INTC_INTMSK1	0xffd00048
+#define INTC_INTMSK2	0xffd40080
+#define INTC_INTMSKCLR1	0xffd00068
+#define INTC_INTMSKCLR2	0xffd40084
+
 void __init plat_irq_setup(void)
 {
+	/* disable IRQ7-0 */
+	ctrl_outl(0xff000000, INTC_INTMSK0);
+
+	/* disable IRL3-0 + IRL7-4 */
+	ctrl_outl(0xc0000000, INTC_INTMSK1);
+	ctrl_outl(0xfffefffe, INTC_INTMSK2);
+
+	/* select IRL mode for IRL3-0 + IRL7-4 */
+	ctrl_outl(ctrl_inl(INTC_ICR0) & ~0x00c00000, INTC_ICR0);
+
+	/* disable holding function, ie enable "SH-4 Mode" */
+	ctrl_outl(ctrl_inl(INTC_ICR0) | 0x00200000, INTC_ICR0);
+
+	register_intc_controller(&intc_desc);
+}
+
+void __init plat_irq_setup_pins(int mode)
+{
+	switch (mode) {
+	case IRQ_MODE_IRQ:
+		/* select IRQ mode for IRL3-0 + IRL7-4 */
+		ctrl_outl(ctrl_inl(INTC_ICR0) | 0x00c00000, INTC_ICR0);
+		register_intc_controller(&intc_irq_desc);
+		break;
+	case IRQ_MODE_IRL7654:
+		/* enable IRL7-4 but don't provide any masking */
+		ctrl_outl(0x40000000, INTC_INTMSKCLR1);
+		ctrl_outl(0x0000fffe, INTC_INTMSKCLR2);
+		break;
+	case IRQ_MODE_IRL3210:
+		/* enable IRL0-3 but don't provide any masking */
+		ctrl_outl(0x80000000, INTC_INTMSKCLR1);
+		ctrl_outl(0xfffe0000, INTC_INTMSKCLR2);
+		break;
+	case IRQ_MODE_IRL7654_MASK:
+		/* enable IRL7-4 and mask using cpu intc controller */
+		ctrl_outl(0x40000000, INTC_INTMSKCLR1);
+		register_intc_controller(&intc_irl7654_desc);
+		break;
+	case IRQ_MODE_IRL3210_MASK:
+		/* enable IRL0-3 and mask using cpu intc controller */
+		ctrl_outl(0x80000000, INTC_INTMSKCLR1);
+		register_intc_controller(&intc_irl3210_desc);
+		break;
+	default:
+		BUG();
+	}
 }
-- 
cgit v0.10.2


From d0da727e025da8b443a4a614dbb7a031b89857d0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 14 May 2009 10:56:04 +0200
Subject: ALSA: ctxfi - Add missing inclusion of linux/delay.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 53572d92..44283bd 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/delay.h>
 
 #define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bits */
 
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index cdcb75c..9c2d38b 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -24,6 +24,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/delay.h>
 
 #define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bits */
 
-- 
cgit v0.10.2


From 14610ce711a363028ffffad98947d57f21fa5372 Mon Sep 17 00:00:00 2001
From: Anuj Aggarwal <anuj.aggarwal@ti.com>
Date: Thu, 14 May 2009 13:59:19 +0530
Subject: ASoC: Added OMAP3 EVM support in ASoC.

Resending the patch after fixing the minor issues.

Signed-off-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/Kconfig b/sound/soc/omap/Kconfig
index 675732e..b771238 100644
--- a/sound/soc/omap/Kconfig
+++ b/sound/soc/omap/Kconfig
@@ -39,6 +39,14 @@ config SND_OMAP_SOC_OMAP2EVM
 	help
 	  Say Y if you want to add support for SoC audio on the omap2evm board.
 
+config SND_OMAP_SOC_OMAP3EVM
+	tristate "SoC Audio support for OMAP3EVM board"
+	depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP3EVM
+	select SND_OMAP_SOC_MCBSP
+	select SND_SOC_TWL4030
+	help
+	  Say Y if you want to add support for SoC audio on the omap3evm board.
+
 config SND_OMAP_SOC_SDP3430
 	tristate "SoC Audio support for Texas Instruments SDP3430"
 	depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP_3430SDP
diff --git a/sound/soc/omap/Makefile b/sound/soc/omap/Makefile
index 0c9e4ac..a37f498 100644
--- a/sound/soc/omap/Makefile
+++ b/sound/soc/omap/Makefile
@@ -10,6 +10,7 @@ snd-soc-n810-objs := n810.o
 snd-soc-osk5912-objs := osk5912.o
 snd-soc-overo-objs := overo.o
 snd-soc-omap2evm-objs := omap2evm.o
+snd-soc-omap3evm-objs := omap3evm.o
 snd-soc-sdp3430-objs := sdp3430.o
 snd-soc-omap3pandora-objs := omap3pandora.o
 snd-soc-omap3beagle-objs := omap3beagle.o
@@ -18,6 +19,7 @@ obj-$(CONFIG_SND_OMAP_SOC_N810) += snd-soc-n810.o
 obj-$(CONFIG_SND_OMAP_SOC_OSK5912) += snd-soc-osk5912.o
 obj-$(CONFIG_SND_OMAP_SOC_OVERO) += snd-soc-overo.o
 obj-$(CONFIG_MACH_OMAP2EVM) += snd-soc-omap2evm.o
+obj-$(CONFIG_MACH_OMAP3EVM) += snd-soc-omap3evm.o
 obj-$(CONFIG_SND_OMAP_SOC_SDP3430) += snd-soc-sdp3430.o
 obj-$(CONFIG_SND_OMAP_SOC_OMAP3_PANDORA) += snd-soc-omap3pandora.o
 obj-$(CONFIG_SND_OMAP_SOC_OMAP3_BEAGLE) += snd-soc-omap3beagle.o
diff --git a/sound/soc/omap/omap3evm.c b/sound/soc/omap/omap3evm.c
new file mode 100644
index 0000000..9114c26
--- /dev/null
+++ b/sound/soc/omap/omap3evm.c
@@ -0,0 +1,147 @@
+/*
+ * omap3evm.c  -- ALSA SoC support for OMAP3 EVM
+ *
+ * Author: Anuj Aggarwal <anuj.aggarwal@ti.com>
+ *
+ * Based on sound/soc/omap/beagle.c by Steve Sakoman
+ *
+ * Copyright (C) 2008 Texas Instruments, Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include <asm/mach-types.h>
+#include <mach/hardware.h>
+#include <mach/gpio.h>
+#include <mach/mcbsp.h>
+
+#include "omap-mcbsp.h"
+#include "omap-pcm.h"
+#include "../codecs/twl4030.h"
+
+static int omap3evm_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int ret;
+
+	/* Set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai,
+				  SND_SOC_DAIFMT_I2S |
+				  SND_SOC_DAIFMT_NB_NF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0) {
+		printk(KERN_ERR "Can't set codec DAI configuration\n");
+		return ret;
+	}
+
+	/* Set cpu DAI configuration */
+	ret = snd_soc_dai_set_fmt(cpu_dai,
+				  SND_SOC_DAIFMT_I2S |
+				  SND_SOC_DAIFMT_NB_NF |
+				  SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0) {
+		printk(KERN_ERR "Can't set cpu DAI configuration\n");
+		return ret;
+	}
+
+	/* Set the codec system clock for DAC and ADC */
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, 26000000,
+				     SND_SOC_CLOCK_IN);
+	if (ret < 0) {
+		printk(KERN_ERR "Can't set codec system clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct snd_soc_ops omap3evm_ops = {
+	.hw_params = omap3evm_hw_params,
+};
+
+/* Digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link omap3evm_dai = {
+	.name 		= "TWL4030",
+	.stream_name 	= "TWL4030",
+	.cpu_dai 	= &omap_mcbsp_dai[0],
+	.codec_dai 	= &twl4030_dai[TWL4030_DAI_HIFI],
+	.ops 		= &omap3evm_ops,
+};
+
+/* Audio machine driver */
+static struct snd_soc_card snd_soc_omap3evm = {
+	.name = "omap3evm",
+	.platform = &omap_soc_platform,
+	.dai_link = &omap3evm_dai,
+	.num_links = 1,
+};
+
+/* Audio subsystem */
+static struct snd_soc_device omap3evm_snd_devdata = {
+	.card = &snd_soc_omap3evm,
+	.codec_dev = &soc_codec_dev_twl4030,
+};
+
+static struct platform_device *omap3evm_snd_device;
+
+static int __init omap3evm_soc_init(void)
+{
+	int ret;
+
+	if (!machine_is_omap3evm()) {
+		pr_err("Not OMAP3 EVM!\n");
+		return -ENODEV;
+	}
+	pr_info("OMAP3 EVM SoC init\n");
+
+	omap3evm_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!omap3evm_snd_device) {
+		printk(KERN_ERR "Platform device allocation failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(omap3evm_snd_device, &omap3evm_snd_devdata);
+	omap3evm_snd_devdata.dev = &omap3evm_snd_device->dev;
+	*(unsigned int *)omap3evm_dai.cpu_dai->private_data = 1;
+
+	ret = platform_device_add(omap3evm_snd_device);
+	if (ret)
+		goto err1;
+
+	return 0;
+
+err1:
+	printk(KERN_ERR "Unable to add platform device\n");
+	platform_device_put(omap3evm_snd_device);
+
+	return ret;
+}
+
+static void __exit omap3evm_soc_exit(void)
+{
+	platform_device_unregister(omap3evm_snd_device);
+}
+
+module_init(omap3evm_soc_init);
+module_exit(omap3evm_soc_exit);
+
+MODULE_AUTHOR("Anuj Aggarwal <anuj.aggarwal@ti.com>");
+MODULE_DESCRIPTION("ALSA SoC OMAP3 EVM");
+MODULE_LICENSE("GPLv2");
-- 
cgit v0.10.2


From d34c43078236b41146877c49af341ab85b5fb8db Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Wed, 13 May 2009 21:59:14 -0400
Subject: ASoC: Add SNDRV_PCM_FMTBIT_S32_BE as a valid AC97 format

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 496dc30..352d7ee 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -79,7 +79,8 @@ struct snd_pcm_substream;
 #define SND_SOC_CLOCK_OUT		1
 
 #define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\
-                               SNDRV_PCM_FMTBIT_S32_LE)
+                               SNDRV_PCM_FMTBIT_S32_LE |\
+                               SNDRV_PCM_FMTBIT_S32_BE)
 
 struct snd_soc_dai_ops;
 struct snd_soc_dai;
-- 
cgit v0.10.2


From 903dba1eae49270a8c2275636071c3a238c8104d Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 14 May 2009 14:37:21 +0200
Subject: ALSA: keywest: Get rid of useless i2c_device_name() macro

The i2c_device_name() macro is used only once and doesn't have much
value, it hurts redability more than it helps. Get rid of it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
index a5afb26..b2e2aac 100644
--- a/sound/ppc/keywest.c
+++ b/sound/ppc/keywest.c
@@ -33,10 +33,6 @@
 static struct pmac_keywest *keywest_ctx;
 
 
-#ifndef i2c_device_name
-#define i2c_device_name(x)	((x)->name)
-#endif
-
 static int keywest_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -56,7 +52,7 @@ static int keywest_attach_adapter(struct i2c_adapter *adapter)
 	if (! keywest_ctx)
 		return -EINVAL;
 
-	if (strncmp(i2c_device_name(adapter), "mac-io", 6))
+	if (strncmp(adapter->name, "mac-io", 6))
 		return 0; /* ignored */
 
 	memset(&info, 0, sizeof(struct i2c_board_info));
-- 
cgit v0.10.2


From 9fc20f030ba457d20eb994e1def7e2ce7d5ae1a8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 14 May 2009 15:14:18 +0200
Subject: ALSA: ctxfi - Move PCI ID definitions to linux/pci_ids.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06ba90c..6191531 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1314,6 +1314,13 @@
 
 #define PCI_VENDOR_ID_CREATIVE		0x1102 /* duplicate: ECTIVA */
 #define PCI_DEVICE_ID_CREATIVE_EMU10K1	0x0002
+#define PCI_DEVICE_ID_CREATIVE_20K1	0x0005
+#define PCI_DEVICE_ID_CREATIVE_20K2	0x000b
+#define PCI_SUBDEVICE_ID_CREATIVE_SB0760	0x0024
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08801	0x0041
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08802	0x0042
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08803	0x0043
+#define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX	0x6000
 
 #define PCI_VENDOR_ID_ECTIVA		0x1102 /* duplicate: CREATIVE */
 #define PCI_DEVICE_ID_ECTIVA_EV1938	0x8938
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 5f35374..073fe2a 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -18,7 +18,6 @@
 #include "ctatc.h"
 #include "ctpcm.h"
 #include "ctmixer.h"
-#include "ctdrv.h"
 #include "cthardware.h"
 #include "ctsrc.h"
 #include "ctamixer.h"
@@ -40,23 +39,25 @@
 			    | ((IEC958_AES3_CON_FS_48000) << 24))
 
 static const struct ct_atc_chip_sub_details atc_sub_details[NUM_CTCARDS] = {
-	[CTSB0760] = {.subsys = PCI_SUBSYS_CREATIVE_SB0760,
+	[CTSB0760] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB0760,
 		      .nm_model = "SB076x"},
-	[CTHENDRIX] = {.subsys = PCI_SUBSYS_CREATIVE_HENDRIX,
+	[CTHENDRIX] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_HENDRIX,
 		      .nm_model = "Hendrix"},
-	[CTSB08801] = {.subsys = PCI_SUBSYS_CREATIVE_SB08801,
+	[CTSB08801] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08801,
 		      .nm_model = "SB0880"},
-	[CTSB08802] = {.subsys = PCI_SUBSYS_CREATIVE_SB08802,
+	[CTSB08802] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08802,
 		      .nm_model = "SB0880"},
-	[CTSB08803] = {.subsys = PCI_SUBSYS_CREATIVE_SB08803,
+	[CTSB08803] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08803,
 		      .nm_model = "SB0880"}
 };
 
 static struct ct_atc_chip_details atc_chip_details[] = {
-	{.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K1,
+	{.vendor = PCI_VENDOR_ID_CREATIVE,
+	 .device = PCI_DEVICE_ID_CREATIVE_20K1,
 	 .sub_details = NULL,
 	 .nm_card = "X-Fi 20k1"},
-	{.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K2,
+	{.vendor = PCI_VENDOR_ID_CREATIVE,
+	 .device = PCI_DEVICE_ID_CREATIVE_20K2,
 	 .sub_details = atc_sub_details,
 	 .nm_card = "X-Fi 20k2"},
 	{} /* terminator */
diff --git a/sound/pci/ctxfi/ctdrv.h b/sound/pci/ctxfi/ctdrv.h
deleted file mode 100644
index f776a44..0000000
--- a/sound/pci/ctxfi/ctdrv.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
- *
- * This source file is released under GPL v2 license (no other versions).
- * See the COPYING file included in the main directory of this source
- * distribution for the license terms and conditions.
- *
- * @file    ctdrv.h
- *
- * @breaf
- * This file contains the definition of card IDs supported by this driver.
- *
- * @author Liu Chun
- *
- */
-
-#ifndef CTDRV_H
-#define CTDRV_H
-
-#define PCI_VENDOR_CREATIVE		0x1102
-#define PCI_DEVICE_CREATIVE_20K1	0x0005
-#define PCI_DEVICE_CREATIVE_20K2	0x000B
-#define PCI_SUBVENDOR_CREATIVE		0x1102
-#define PCI_SUBSYS_CREATIVE_SB0760	0x0024
-#define PCI_SUBSYS_CREATIVE_SB08801	0x0041
-#define PCI_SUBSYS_CREATIVE_SB08802	0x0042
-#define PCI_SUBSYS_CREATIVE_SB08803	0x0043
-#define PCI_SUBSYS_CREATIVE_HENDRIX	0x6000
-
-#endif /* CTDRV_H */
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index f911440..b7368fd 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -11,10 +11,10 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/moduleparam.h>
+#include <linux/pci_ids.h>
 #include <sound/core.h>
 #include <sound/initval.h>
 #include "ctatc.h"
-#include "ctdrv.h"
 
 MODULE_AUTHOR("Creative Technology Ltd");
 MODULE_DESCRIPTION("X-Fi driver version 1.03");
@@ -32,8 +32,8 @@ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
 static struct pci_device_id ct_pci_dev_ids[] = {
 	/* only X-Fi is supported, so... */
-	{ PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K1) },
-	{ PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ct_pci_dev_ids);
-- 
cgit v0.10.2


From 35b053becb64eba13f3ea5c8c51023997169ff34 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 14 May 2009 15:17:16 +0200
Subject: ALSA: ctxfi - Avoid unneeded pci_read_config_*() calls

Use struct pci subsystem_device and revision fields instead of
unneeded calls of pci_read_config_*().

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 073fe2a..f523450 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -1190,14 +1190,14 @@ static int atc_dev_free(struct snd_device *dev)
 
 static int atc_identify_card(struct ct_atc *atc)
 {
-	u16 subsys = 0;
-	u8 revision = 0;
+	u16 subsys;
+	u8 revision;
 	struct pci_dev *pci = atc->pci;
 	const struct ct_atc_chip_details *d;
 	enum CTCARDS i;
 
-	pci_read_config_word(pci, PCI_SUBSYSTEM_ID, &subsys);
-	pci_read_config_byte(pci, PCI_REVISION_ID, &revision);
+	subsys = pci->subsystem_device;
+	revision = pci->revision;
 	atc->chip_details = NULL;
 	atc->model = NUM_CTCARDS;
 	for (d = atc_chip_details; d->vendor; d++) {
@@ -1308,7 +1308,7 @@ static int atc_get_resources(struct ct_atc *atc)
 	struct sum_desc sum_dsc = {0};
 	struct sum_mgr *sum_mgr = NULL;
 	int err = 0, i = 0;
-	unsigned short subsys_id = 0;
+	unsigned short subsys_id;
 
 	atc->daios = kzalloc(sizeof(void *)*(DAIONUM), GFP_KERNEL);
 	if (NULL == atc->daios)
@@ -1339,7 +1339,7 @@ static int atc_get_resources(struct ct_atc *atc)
 		}
 		atc->n_daio++;
 	}
-	pci_read_config_word(atc->pci, PCI_SUBSYSTEM_ID, &subsys_id);
+	subsys_id = atc->pci->subsystem_device;
 	if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
 		/* SB073x cards */
 		da_desc.type = SPDIFI1;
-- 
cgit v0.10.2


From b3e0afe61e8271a8d082478752a67e5c279c8f23 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 14 May 2009 15:19:30 +0200
Subject: ALSA: ctxfi - Add prefix to debug prints

Added ctxfi: prefix to each debug print.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c
index 119791a..859e996 100644
--- a/sound/pci/ctxfi/ctamixer.c
+++ b/sound/pci/ctxfi/ctamixer.c
@@ -259,7 +259,7 @@ static int get_amixer_rsc(struct amixer_mgr *mgr,
 	}
 	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
 	if (err) {
-		printk(KERN_ERR "Can't meet AMIXER resource request!\n");
+		printk(KERN_ERR "ctxfi: Can't meet AMIXER resource request!\n");
 		goto error;
 	}
 
@@ -413,7 +413,7 @@ static int get_sum_rsc(struct sum_mgr *mgr,
 	}
 	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
 	if (err) {
-		printk(KERN_ERR "Can't meet SUM resource request!\n");
+		printk(KERN_ERR "ctxfi: Can't meet SUM resource request!\n");
 		goto error;
 	}
 
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index f523450..ead104e 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -191,7 +191,7 @@ static unsigned int convert_format(snd_pcm_format_t snd_format)
 	case SNDRV_PCM_FORMAT_S32_LE:
 		return SRC_SF_S32;
 	default:
-		printk(KERN_ERR "not recognized snd format is %d \n",
+		printk(KERN_ERR "ctxfi: not recognized snd format is %d \n",
 			snd_format);
 		return SRC_SF_S16;
 	}
@@ -1254,7 +1254,8 @@ static int ct_create_alsa_devs(struct ct_atc *atc)
 		err = alsa_dev_funcs[i].create(atc, i,
 				alsa_dev_funcs[i].public_name);
 		if (err) {
-			printk(KERN_ERR "Creating alsa device %d failed!\n", i);
+			printk(KERN_ERR "ctxfi: "
+			       "Creating alsa device %d failed!\n", i);
 			return err;
 		}
 	}
@@ -1289,7 +1290,8 @@ static int atc_create_hw_devs(struct ct_atc *atc)
 
 		err = rsc_mgr_funcs[i].create(atc->hw, &atc->rsc_mgrs[i]);
 		if (err) {
-			printk(KERN_ERR "Failed to create rsc_mgr %d!!!\n", i);
+			printk(KERN_ERR "ctxfi: "
+			       "Failed to create rsc_mgr %d!!!\n", i);
 			return err;
 		}
 	}
@@ -1333,7 +1335,7 @@ static int atc_get_resources(struct ct_atc *atc)
 		err = daio_mgr->get_daio(daio_mgr, &da_desc,
 					(struct daio **)&atc->daios[i]);
 		if (err) {
-			printk(KERN_ERR "Failed to get DAIO "
+			printk(KERN_ERR "ctxfi: Failed to get DAIO "
 					"resource %d!!!\n", i);
 			return err;
 		}
@@ -1349,7 +1351,7 @@ static int atc_get_resources(struct ct_atc *atc)
 	err = daio_mgr->get_daio(daio_mgr, &da_desc,
 				(struct daio **)&atc->daios[i]);
 	if (err) {
-		printk(KERN_ERR "Failed to get S/PDIF-in resource!!!\n");
+		printk(KERN_ERR "ctxfi: Failed to get S/PDIF-in resource!!!\n");
 		return err;
 	}
 	atc->n_daio++;
@@ -1400,7 +1402,7 @@ static int atc_get_resources(struct ct_atc *atc)
 
 	err = ct_mixer_create(atc, (struct ct_mixer **)&atc->mixer);
 	if (err) {
-		printk(KERN_ERR "Failed to create mixer obj!!!\n");
+		printk(KERN_ERR "ctxfi: Failed to create mixer obj!!!\n");
 		return err;
 	}
 
@@ -1600,7 +1602,7 @@ int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 
 error1:
 	ct_atc_destroy(atc);
-	printk(KERN_ERR "Something wrong!!!\n");
+	printk(KERN_ERR "ctxfi: Something wrong!!!\n");
 	return err;
 }
 
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index 9c2d38b..7c24c2c 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -1144,7 +1144,7 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 		hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x11111111);
 		hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0);
 	} else {
-		printk(KERN_ALERT "ERROR!!! Invalid sampling rate!!!\n");
+		printk(KERN_ALERT "ctxfi: ERROR!!! Invalid sampling rate!!!\n");
 		return -EINVAL;
 	}
 
@@ -1197,7 +1197,8 @@ static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
 
 	/* Set up device page table */
 	if ((~0UL) == info->vm_pgt_phys) {
-		printk(KERN_ALERT "Wrong device page table page address!!!\n");
+		printk(KERN_ALERT "ctxfi: "
+		       "Wrong device page table page address!!!\n");
 		return -1;
 	}
 
@@ -1314,7 +1315,7 @@ static int hw_pll_init(struct hw *hw, unsigned int rsr)
 		break;
 	}
 	if (i >= 1000) {
-		printk(KERN_ALERT "PLL initialization failed!!!\n");
+		printk(KERN_ALERT "ctxfi: PLL initialization failed!!!\n");
 		return -EBUSY;
 	}
 
@@ -1338,7 +1339,7 @@ static int hw_auto_init(struct hw *hw)
 			break;
 	}
 	if (!get_field(gctl, GCTL_AID)) {
-		printk(KERN_ALERT "Card Auto-init failed!!!\n");
+		printk(KERN_ALERT "ctxfi: Card Auto-init failed!!!\n");
 		return -EBUSY;
 	}
 
@@ -1762,7 +1763,7 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 	/* Initialize I2C */
 	err = I2CInit(hw, 0x1A, 1, 1);
 	if (err < 0) {
-		printk(KERN_ALERT "Failure to acquire I2C!!!\n");
+		printk(KERN_ALERT "ctxfi: Failure to acquire I2C!!!\n");
 		goto error;
 	}
 
@@ -1782,7 +1783,7 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x0A),
 						MAKE_WM8775_DATA(0x0A));
 	} else {
-		printk(KERN_ALERT "Invalid master sampling "
+		printk(KERN_ALERT "ctxfi: Invalid master sampling "
 				  "rate (msr %d)!!!\n", info->msr);
 		err = -EINVAL;
 		goto error;
@@ -1820,7 +1821,7 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
 				MAKE_WM8775_DATA(0xCF)); /* No boost */
 	} else {
-		printk(KERN_ALERT "ERROR!!! Invalid input mux!!!\n");
+		printk(KERN_ALERT "ctxfi: ERROR!!! Invalid input mux!!!\n");
 		err = -EINVAL;
 		goto error;
 	}
@@ -1852,7 +1853,7 @@ static int hw_card_start(struct hw *hw)
 	dma_mask = CT_XFI_DMA_MASK;
 	if (pci_set_dma_mask(pci, dma_mask) < 0 ||
 	    pci_set_consistent_dma_mask(pci, dma_mask) < 0) {
-		printk(KERN_ERR "architecture does not support PCI "
+		printk(KERN_ERR "ctxfi: architecture does not support PCI "
 		"busmaster DMA with mask 0x%x\n", dma_mask);
 		err = -ENXIO;
 		goto error1;
diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
index c80d692..b795076 100644
--- a/sound/pci/ctxfi/ctmixer.c
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -760,7 +760,7 @@ static int ct_mixer_get_resources(struct ct_mixer *mixer)
 	for (i = 0; i < (NUM_CT_SUMS * CHN_NUM); i++) {
 		err = sum_mgr->get_sum(sum_mgr, &sum_desc, &sum);
 		if (err) {
-			printk(KERN_ERR "Failed to get sum resources for "
+			printk(KERN_ERR "ctxfi:Failed to get sum resources for "
 					  "front output!\n");
 			break;
 		}
@@ -775,8 +775,8 @@ static int ct_mixer_get_resources(struct ct_mixer *mixer)
 	for (i = 0; i < (NUM_CT_AMIXERS * CHN_NUM); i++) {
 		err = amixer_mgr->get_amixer(amixer_mgr, &am_desc, &amixer);
 		if (err) {
-			printk(KERN_ERR "Failed to get amixer resources for "
-					  "mixer obj!\n");
+			printk(KERN_ERR "ctxfi:Failed to get amixer resources "
+			       "for mixer obj!\n");
 			break;
 		}
 		mixer->amixers[i] = amixer;
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 73d4fdb..a64cb0e 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -284,7 +284,7 @@ static int ct_pcm_playback_prepare(struct snd_pcm_substream *substream)
 		err = atc->pcm_playback_prepare(atc, apcm);
 
 	if (err < 0) {
-		printk(KERN_ERR "Preparing pcm playback failed!!!\n");
+		printk(KERN_ERR "ctxfi: Preparing pcm playback failed!!!\n");
 		return err;
 	}
 
@@ -389,7 +389,7 @@ static int ct_pcm_capture_prepare(struct snd_pcm_substream *substream)
 
 	err = atc->pcm_capture_prepare(atc, apcm);
 	if (err < 0) {
-		printk(KERN_ERR "Preparing pcm capture failed!!!\n");
+		printk(KERN_ERR "ctxfi: Preparing pcm capture failed!!!\n");
 		return err;
 	}
 
@@ -477,7 +477,7 @@ int ct_alsa_pcm_create(struct ct_atc *atc,
 	err = snd_pcm_new(atc->card, name, device,
 			  playback_count, capture_count, &pcm);
 	if (err < 0) {
-		printk(KERN_ERR "snd_pcm_new failed!! Err=%d\n", err);
+		printk(KERN_ERR "ctxfi: snd_pcm_new failed!! Err=%d\n", err);
 		return err;
 	}
 
diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c
index 414fc23..da21a71 100644
--- a/sound/pci/ctxfi/ctresource.c
+++ b/sound/pci/ctxfi/ctresource.c
@@ -162,12 +162,14 @@ int rsc_init(struct rsc *rsc, u32 idx, enum RSCTYP type, u32 msr, void *hw)
 	case DAIO:
 		break;
 	default:
-		printk(KERN_ERR "Invalid resource type value %d!\n", type);
+		printk(KERN_ERR
+		       "ctxfi: Invalid resource type value %d!\n", type);
 		return -EINVAL;
 	}
 
 	if (err) {
-		printk(KERN_ERR "Failed to get resource control block!\n");
+		printk(KERN_ERR
+		       "ctxfi: Failed to get resource control block!\n");
 		return err;
 	}
 
@@ -190,8 +192,8 @@ int rsc_uninit(struct rsc *rsc)
 		case DAIO:
 			break;
 		default:
-			printk(KERN_ERR "Invalid resource type value %d!\n",
-					  rsc->type);
+			printk(KERN_ERR "ctxfi: "
+			       "Invalid resource type value %d!\n", rsc->type);
 			break;
 		}
 
@@ -233,13 +235,15 @@ int rsc_mgr_init(struct rsc_mgr *mgr, enum RSCTYP type,
 	case SUM:
 		break;
 	default:
-		printk(KERN_ERR "Invalid resource type value %d!\n", type);
+		printk(KERN_ERR
+		       "ctxfi: Invalid resource type value %d!\n", type);
 		err = -EINVAL;
 		goto error;
 	}
 
 	if (err) {
-		printk(KERN_ERR "Failed to get manager control block!\n");
+		printk(KERN_ERR
+		       "ctxfi: Failed to get manager control block!\n");
 		goto error;
 	}
 
@@ -282,8 +286,8 @@ int rsc_mgr_uninit(struct rsc_mgr *mgr)
 		case SUM:
 			break;
 		default:
-			printk(KERN_ERR "Invalid resource type value %d!\n",
-					  mgr->type);
+			printk(KERN_ERR "ctxfi: "
+			       "Invalid resource type value %d!\n", mgr->type);
 			break;
 		}
 
diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c
index d3e0ad5..77e118c 100644
--- a/sound/pci/ctxfi/ctsrc.c
+++ b/sound/pci/ctxfi/ctsrc.c
@@ -431,7 +431,7 @@ get_src_rsc(struct src_mgr *mgr, const struct src_desc *desc, struct src **rsrc)
 
 	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
 	if (err) {
-		printk(KERN_ERR "Can't meet SRC resource request!\n");
+		printk(KERN_ERR "ctxfi: Can't meet SRC resource request!\n");
 		return err;
 	}
 
@@ -740,7 +740,7 @@ static int get_srcimp_rsc(struct srcimp_mgr *mgr,
 	}
 	spin_unlock_irqrestore(&mgr->mgr_lock, flags);
 	if (err) {
-		printk(KERN_ERR "Can't meet SRCIMP resource request!\n");
+		printk(KERN_ERR "ctxfi: Can't meet SRCIMP resource request!\n");
 		goto error1;
 	}
 
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
index 46ca04c..cecf77e 100644
--- a/sound/pci/ctxfi/ctvmem.c
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -121,12 +121,13 @@ ct_vm_map(struct ct_vm *vm, void *host_addr, int size)
 
 	/* do mapping */
 	if ((unsigned long)host_addr >= VMALLOC_START) {
-		printk(KERN_ERR "Fail! Not support vmalloced addr now!\n");
+		printk(KERN_ERR "ctxfi: "
+		       "Fail! Not support vmalloced addr now!\n");
 		return NULL;
 	}
 
 	if (size > vm->size) {
-		printk(KERN_ERR "Fail! No sufficient device virtural "
+		printk(KERN_ERR "ctxfi: Fail! No sufficient device virtural "
 				  "memory space available!\n");
 		return NULL;
 	}
@@ -139,7 +140,7 @@ ct_vm_map(struct ct_vm *vm, void *host_addr, int size)
 
 	block = get_vm_block(vm, (pages << PAGE_SHIFT));
 	if (block == NULL) {
-		printk(KERN_ERR "No virtual memory block that is big "
+		printk(KERN_ERR "ctxfi: No virtual memory block that is big "
 				  "enough to allocate!\n");
 		return NULL;
 	}
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index b7368fd..19310ed 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -57,16 +57,17 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 	if (err)
 		return err;
 	if ((reference_rate != 48000) && (reference_rate != 44100)) {
-		printk(KERN_ERR "Invalid reference_rate value %u!!!\n"
-				"The valid values for reference_rate "
-				"are 48000 and 44100.\nValue 48000 is "
-				"assumed.\n", reference_rate);
+		printk(KERN_ERR "ctxfi: Invalid reference_rate value %u!!!\n",
+		       reference_rate);
+		printk(KERN_ERR "ctxfi: The valid values for reference_rate "
+		       "are 48000 and 44100, Value 48000 is assumed.\n");
 		reference_rate = 48000;
 	}
 	if ((multiple != 1) && (multiple != 2)) {
-		printk(KERN_ERR "Invalid multiple value %u!!!\nThe valid "
-				"values for multiple are 1 and 2.\nValue 2 "
-				"is assumed.\n", multiple);
+		printk(KERN_ERR "ctxfi: Invalid multiple value %u!!!\n",
+		       multiple);
+		printk(KERN_ERR "ctxfi: The valid values for multiple are "
+		       "1 and 2, Value 2 is assumed.\n");
 		multiple = 2;
 	}
 	err = ct_atc_create(card, pci, reference_rate, multiple, &atc);
-- 
cgit v0.10.2


From 2fa3cdfb319055fd8b25abdafa413e16f00ad493 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 May 2009 09:29:45 -0400
Subject: ext4: Merge ext4_da_get_block_write() into mpage_da_map_blocks()

The static function ext4_da_get_block_write() was only used by
mpage_da_map_blocks().  So to simplify the code, merge that function
into mpage_da_map_blocks().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e6113c3..bfe50a2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2006,57 +2006,6 @@ static void ext4_print_free_blocks(struct inode *inode)
 }
 
 /*
- * This function is used by mpage_da_map_blocks().  We separate it out
- * as a separate function just to make life easier, and because
- * mpage_da_map_blocks() used to be a generic function that took a
- * get_block_t.
- */
-static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-				   struct buffer_head *bh_result)
-{
-	int ret;
-	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-	loff_t disksize = EXT4_I(inode)->i_disksize;
-	handle_t *handle = NULL;
-
-	handle = ext4_journal_current_handle();
-	BUG_ON(!handle);
-	ret = ext4_get_blocks(handle, inode, iblock, max_blocks,
-			      bh_result, EXT4_GET_BLOCKS_CREATE|
-			      EXT4_GET_BLOCKS_DELALLOC_RESERVE);
-	if (ret <= 0)
-		return ret;
-
-	bh_result->b_size = (ret << inode->i_blkbits);
-
-	if (ext4_should_order_data(inode)) {
-		int retval;
-		retval = ext4_jbd2_file_inode(handle, inode);
-		if (retval)
-			/*
-			 * Failed to add inode for ordered mode. Don't
-			 * update file size
-			 */
-			return retval;
-	}
-
-	/*
-	 * Update on-disk size along with block allocation we don't
-	 * use EXT4_GET_BLOCKS_EXTEND_DISKSIZE as size may change
-	 * within already allocated block -bzzz
-	 */
-	disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
-	if (disksize > i_size_read(inode))
-		disksize = i_size_read(inode);
-	if (disksize > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, disksize);
-		ret = ext4_mark_inode_dirty(handle, inode);
-		return ret;
-	}
-	return 0;
-}
-
-/*
  * mpage_da_map_blocks - go through given space
  *
  * @mpd - bh describing space
@@ -2066,9 +2015,12 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
  */
 static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
-	int err = 0;
+	int err, blks;
 	struct buffer_head new;
-	sector_t next;
+	sector_t next = mpd->b_blocknr;
+	unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
+	loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
+	handle_t *handle = NULL;
 
 	/*
 	 * We consider only non-mapped and non-allocated blocks
@@ -2077,6 +2029,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 		!(mpd->b_state & (1 << BH_Delay)) &&
 		!(mpd->b_state & (1 << BH_Unwritten)))
 		return 0;
+
+	/*
+	 * If we didn't accumulate anything to write simply return
+	 */
+	if (!mpd->b_size)
+		return 0;
+
+	handle = ext4_journal_current_handle();
+	BUG_ON(!handle);
+
 	/*
 	 * We need to make sure the BH_Delay flag is passed down to
 	 * ext4_da_get_block_write(), since it calls ext4_get_blocks()
@@ -2092,18 +2054,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE flag.
 	 */
 	new.b_state = mpd->b_state & (1 << BH_Delay);
-	new.b_blocknr = 0;
-	new.b_size = mpd->b_size;
-	next = mpd->b_blocknr;
-	/*
-	 * If we didn't accumulate anything
-	 * to write simply return
-	 */
-	if (!new.b_size)
-		return 0;
-
-	err = ext4_da_get_block_write(mpd->inode, next, &new);
-	if (err) {
+	blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
+			       &new, EXT4_GET_BLOCKS_CREATE|
+			       EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+	if (blks < 0) {
+		err = blks;
 		/*
 		 * If get block returns with error we simply
 		 * return. Later writepage will redirty the page and
@@ -2136,12 +2091,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 		if (err == -ENOSPC) {
 			ext4_print_free_blocks(mpd->inode);
 		}
-		/* invlaidate all the pages */
+		/* invalidate all the pages */
 		ext4_da_block_invalidatepages(mpd, next,
 				mpd->b_size >> mpd->inode->i_blkbits);
 		return err;
 	}
-	BUG_ON(new.b_size == 0);
+	BUG_ON(blks == 0);
+
+	new.b_size = (blks << mpd->inode->i_blkbits);
 
 	if (buffer_new(&new))
 		__unmap_underlying_blocks(mpd->inode, &new);
@@ -2154,6 +2111,25 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	    (mpd->b_state & (1 << BH_Unwritten)))
 		mpage_put_bnr_to_bhs(mpd, next, &new);
 
+	if (ext4_should_order_data(mpd->inode)) {
+		err = ext4_jbd2_file_inode(handle, mpd->inode);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Update on-disk size along with block allocation we don't
+	 * use EXT4_GET_BLOCKS_EXTEND_DISKSIZE as size may change
+	 * within already allocated block -bzzz
+	 */
+	disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
+	if (disksize > i_size_read(mpd->inode))
+		disksize = i_size_read(mpd->inode);
+	if (disksize > EXT4_I(mpd->inode)->i_disksize) {
+		ext4_update_i_disksize(mpd->inode, disksize);
+		return ext4_mark_inode_dirty(handle, mpd->inode);
+	}
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 7442f9dadb8626bd9e7e01445560499560b99f5d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 14 May 2009 18:20:33 +0200
Subject: ALSA: hda - Add a quirk entry for Macbook Pro 5,1

Added the codec SSID for MacBook Pro 5,1 as compatible as MP51.
However, the headphone auto-muting function doesn't work.  So,
this is just a tentative solution.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 51954ba..1bf9d05 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7293,6 +7293,10 @@ static int patch_alc882(struct hda_codec *codec)
 			board_config = ALC885_MBP3;
 			break;
 		case 0x106b3f00: /* Macbook 5,1 */
+		case 0x106b4000: /* Macbook Pro 5,1 - FIXME: HP jack sense
+				  *   seems not working, so apparently
+				  *   no perfect solution yet
+				  */
 			board_config = ALC885_MB5;
 			break;
 		default:
-- 
cgit v0.10.2


From 2ac3b6e00acb46406c993d57921f86a594aafe08 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 May 2009 13:57:08 -0400
Subject: ext4: Clean up ext4_get_blocks() so it does not depend on
 bh_result->b_state

The ext4_get_blocks() function was depending on the value of
bh_result->b_state as an input parameter to decide whether or not
update the delalloc accounting statistics by calling
ext4_da_update_reserve_space().  We now use a separate flag,
EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE, to requests this update, so that
all callers of ext4_get_blocks() can clear map_bh.b_state before
calling ext4_get_blocks() without worrying about any consistency
issues.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 17feb4a..d164f12 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -318,16 +318,21 @@ struct ext4_new_group_data {
  */
 	/* Allocate any needed blocks and/or convert an unitialized
 	   extent to be an initialized ext4 */
-#define EXT4_GET_BLOCKS_CREATE			1
+#define EXT4_GET_BLOCKS_CREATE			0x0001
 	/* Request the creation of an unitialized extent */
-#define EXT4_GET_BLOCKS_UNINIT_EXT		2
+#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
 #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
 						 EXT4_GET_BLOCKS_CREATE)
 	/* Update the ext4_inode_info i_disksize field */
-#define EXT4_GET_BLOCKS_EXTEND_DISKSIZE		4
+#define EXT4_GET_BLOCKS_EXTEND_DISKSIZE		0x0004
 	/* Caller is from the delayed allocation writeout path,
-	   so the filesystem blocks have already been accounted for */
-#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	8
+	   so set the magic i_delalloc_reserve_flag after taking the 
+	   inode allocation semaphore for */
+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0008
+	/* Call ext4_da_update_reserve_space() after successfully 
+	   allocating the blocks */
+#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE	0x0010
+
 
 /*
  * ioctl commands
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bfe50a2..d7b7480 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1234,16 +1234,15 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 		}
 	}
 
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
-		/*
-		 * Update reserved blocks/metadata blocks
-		 * after successful block allocation
-		 * which were deferred till now
-		 */
-		if ((retval > 0) && buffer_delay(bh))
-			ext4_da_update_reserve_space(inode, retval);
-	}
+
+	/*
+	 * Update reserved blocks/metadata blocks after successful
+	 * block allocation which had been deferred till now.
+	 */
+	if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
+		ext4_da_update_reserve_space(inode, retval);
 
 	up_write((&EXT4_I(inode)->i_data_sem));
 	return retval;
@@ -2015,7 +2014,7 @@ static void ext4_print_free_blocks(struct inode *inode)
  */
 static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
-	int err, blks;
+	int err, blks, get_blocks_flags;
 	struct buffer_head new;
 	sector_t next = mpd->b_blocknr;
 	unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
@@ -2040,23 +2039,30 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	BUG_ON(!handle);
 
 	/*
-	 * We need to make sure the BH_Delay flag is passed down to
-	 * ext4_da_get_block_write(), since it calls ext4_get_blocks()
-	 * with the EXT4_GET_BLOCKS_DELALLOC_RESERVE flag.  This flag
-	 * causes ext4_get_blocks() to call
-	 * ext4_da_update_reserve_space() if the passed buffer head
-	 * has the BH_Delay flag set.  In the future, once we clean up
-	 * the interfaces to ext4_get_blocks(), we should pass in a
-	 * separate flag which requests that the delayed allocation
-	 * statistics should be updated, instead of depending on the
-	 * state information getting passed down via the map_bh's
-	 * state bitmasks plus the magic
-	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE flag.
+	 * Call ext4_get_blocks() to allocate any delayed allocation
+	 * blocks, or to convert an uninitialized extent to be
+	 * initialized (in the case where we have written into
+	 * one or more preallocated blocks).
+	 *
+	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
+	 * indicate that we are on the delayed allocation path.  This
+	 * affects functions in many different parts of the allocation
+	 * call path.  This flag exists primarily because we don't
+	 * want to change *many* call functions, so ext4_get_blocks()
+	 * will set the magic i_delalloc_reserved_flag once the
+	 * inode's allocation semaphore is taken.
+	 *
+	 * If the blocks in questions were delalloc blocks, set
+	 * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
+	 * variables are updated after the blocks have been allocated.
 	 */
-	new.b_state = mpd->b_state & (1 << BH_Delay);
+	new.b_state = 0;
+	get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
+			    EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+	if (mpd->b_state & (1 << BH_Delay))
+		get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
 	blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
-			       &new, EXT4_GET_BLOCKS_CREATE|
-			       EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+			       &new, get_blocks_flags);
 	if (blks < 0) {
 		err = blks;
 		/*
-- 
cgit v0.10.2


From b243b77c708665d7af8c5e42611c27c89f918788 Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@gmail.com>
Date: Thu, 14 May 2009 10:25:42 +0200
Subject: ASoC: pxa2xx-i2s: Proper hw initialization

Make sure we are in a know good state at end of probe :
Reset FIFO logic and registers, and make sure REC and RPL functions
along with FIFO service are disabled (SACR0_RST enables REC and RPL).

Resetting loses current settings so remove reset from stream startup.
Now reset occurs only at probe.

Signed-off-by: Karl Beldan <karl.beldan@mobile-devices.fr>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index 6014577..bb8630b 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -106,10 +106,8 @@ static int pxa2xx_i2s_startup(struct snd_pcm_substream *substream,
 	if (IS_ERR(clk_i2s))
 		return PTR_ERR(clk_i2s);
 
-	if (!cpu_dai->active) {
-		SACR0 |= SACR0_RST;
+	if (!cpu_dai->active)
 		SACR0 = 0;
-	}
 
 	return 0;
 }
@@ -347,6 +345,19 @@ static int pxa2xx_i2s_probe(struct platform_device *dev)
 	if (ret != 0)
 		clk_put(clk_i2s);
 
+	/*
+	 * PXA Developer's Manual:
+	 * If SACR0[ENB] is toggled in the middle of a normal operation,
+	 * the SACR0[RST] bit must also be set and cleared to reset all
+	 * I2S controller registers.
+	 */
+	SACR0 = SACR0_RST;
+	SACR0 = 0;
+	/* Make sure RPL and REC are disabled */
+	SACR1 = SACR1_DRPL | SACR1_DREC;
+	/* Along with FIFO servicing */
+	SAIMR &= ~(SAIMR_RFS | SAIMR_TFS);
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 34555c1077ac8f4854e0db9ad11b989a6908d210 Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@gmail.com>
Date: Wed, 13 May 2009 22:16:46 +0200
Subject: ASoC: pxa2xx-i2s: Handle SACR1_DRPL and SACR1_DREC separately

- hw_params enables both RPL and REC functions each time : Enable the
	appropriate function in pxa2xx_i2s_trigger.
- pxa2xx_i2s_shutdown disables i2s anytime one of RPL or REC function is
	off : Turn it off only when both functions are off.

Signed-off-by: Karl Beldan <karl.beldan@mobile-devices.fr>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index bb8630b..115b471 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -176,9 +176,7 @@ static int pxa2xx_i2s_hw_params(struct snd_pcm_substream *substream,
 
 	/* is port used by another stream */
 	if (!(SACR0 & SACR0_ENB)) {
-
 		SACR0 = 0;
-		SACR1 = 0;
 		if (pxa_i2s.master)
 			SACR0 |= SACR0_BCKD;
 
@@ -224,6 +222,10 @@ static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
+		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+			SACR1 &= ~SACR1_DRPL;
+		else
+			SACR1 &= ~SACR1_DREC;
 		SACR0 |= SACR0_ENB;
 		break;
 	case SNDRV_PCM_TRIGGER_RESUME:
@@ -250,7 +252,7 @@ static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream,
 		SAIMR &= ~SAIMR_RFS;
 	}
 
-	if (SACR1 & (SACR1_DREC | SACR1_DRPL)) {
+	if ((SACR1 & (SACR1_DREC | SACR1_DRPL)) == (SACR1_DREC | SACR1_DRPL)) {
 		SACR0 &= ~SACR0_ENB;
 		pxa_i2s_wait();
 		clk_disable(clk_i2s);
-- 
cgit v0.10.2


From 9bc04fd1677a956fdd7c5645a09de34ca9a8f1a6 Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@gmail.com>
Date: Wed, 13 May 2009 22:16:52 +0200
Subject: ASoC: pxa2xx-i2s: Fix inappropriate release of i2s clock

i2s_clk is 'put' for no reason in pxa2xx_i2s_shutdown.
Now we 'get' i2s_clk at probe and 'put' it at driver removal or when
probe fails.

Signed-off-by: Karl Beldan <karl.beldan@mobile-devices.fr>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index 115b471..bc12a09 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -257,8 +257,6 @@ static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream,
 		pxa_i2s_wait();
 		clk_disable(clk_i2s);
 	}
-
-	clk_put(clk_i2s);
 }
 
 #ifdef CONFIG_PM
-- 
cgit v0.10.2


From 916465a841937a60baac6144ae3f41b0d1560f3b Mon Sep 17 00:00:00 2001
From: Karl Beldan <karl.beldan@gmail.com>
Date: Wed, 13 May 2009 22:16:59 +0200
Subject: ASoC: pxa2xx-i2s: Fix suspend/resume

pxa2xx_i2s_resume is :
 - unconditionnaly setting SACR0_ENB
 - unsetting SACR0_ENB in saved SACR0 pxa_i2s.sacr0
fix these.

In pxa2xx_i2s_{resume,suspend}, save/restore registers even
when !dai->active.

Signed-off-by: Karl Beldan <karl.beldan@mobile-devices.fr>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index bc12a09..4743e26 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -262,9 +262,6 @@ static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream,
 #ifdef CONFIG_PM
 static int pxa2xx_i2s_suspend(struct snd_soc_dai *dai)
 {
-	if (!dai->active)
-		return 0;
-
 	/* store registers */
 	pxa_i2s.sacr0 = SACR0;
 	pxa_i2s.sacr1 = SACR1;
@@ -279,16 +276,14 @@ static int pxa2xx_i2s_suspend(struct snd_soc_dai *dai)
 
 static int pxa2xx_i2s_resume(struct snd_soc_dai *dai)
 {
-	if (!dai->active)
-		return 0;
-
 	pxa_i2s_wait();
 
-	SACR0 = pxa_i2s.sacr0 &= ~SACR0_ENB;
+	SACR0 = pxa_i2s.sacr0 & ~SACR0_ENB;
 	SACR1 = pxa_i2s.sacr1;
 	SAIMR = pxa_i2s.saimr;
 	SADIV = pxa_i2s.sadiv;
-	SACR0 |= SACR0_ENB;
+
+	SACR0 = pxa_i2s.sacr0;
 
 	return 0;
 }
-- 
cgit v0.10.2


From 63c26baa2aa624b023892d66ed696525fc787560 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 14 May 2009 20:52:46 +0100
Subject: ASoC: Support AC97 link off by default on WM9712

The WM9712 can be configured by resistor strapping GPIO4 to behave like
the WM9713 and default to leaving the AC97 link disabled after cold
reset until a warm reset occurs.  In this configuration we need to issue
a warm reset after cold to bring the link up so do so.  The warm reset
will be harmless on systems that don't need it.

[Changelog rewritten to document the reasoning. -- broonie]

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 550c903..1fd4e88 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -585,6 +585,8 @@ static int wm9712_reset(struct snd_soc_codec *codec, int try_warm)
 	}
 
 	soc_ac97_ops.reset(codec->ac97);
+	if (soc_ac97_ops.warm_reset)
+		soc_ac97_ops.warm_reset(codec->ac97);
 	if (ac97_read(codec, 0) != wm9712_reg[0])
 		goto err;
 	return 0;
-- 
cgit v0.10.2


From c53a284f8be23735dc6b53929640a987055f2933 Mon Sep 17 00:00:00 2001
From: Edward Goggin <egoggin@vmware.com>
Date: Thu, 9 Apr 2009 10:02:22 -0700
Subject: [SCSI] initialize max_target_blocked in scsi_alloc_target

This patch initializes the max_target_blocked field of a scsi target
structure so that a queuecommand return value of
SCSI_MLQUEUE_TARGET_BUSY will actually result in having the
scsi_queue_insert blocking the device queue before requeuing the
command and running the queue.  Otherwise, can and does cause livelock
on single CPU configurations if/when open-iSCSI software initiator's
command PDU window fills.

Signed-off-by: Ed Goggin <egoggin@vmware.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 6f51ca4..e2b50d8 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -425,6 +425,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	INIT_LIST_HEAD(&starget->devices);
 	starget->state = STARGET_CREATED;
 	starget->scsi_level = SCSI_2;
+	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
  retry:
 	spin_lock_irqsave(shost->host_lock, flags);
 
-- 
cgit v0.10.2


From f850a7c040d9faafb41bceb0a05d6bb7432c8c7a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 12 May 2009 15:13:55 -0400
Subject: IMA: remove read permissions on the ima policy file

The IMA policy file does not implement read.  Trying to just open/read/close
the file will load a blank policy and you cannot then change the policy
without a reboot.  This removes the read permission from the file so one must
at least be attempting to write...

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ffbe259..3305a96 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -15,6 +15,7 @@
  *	implemenents security file system for reporting
  *	current measurement list and IMA statistics
  */
+#include <linux/fcntl.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/rculist.h>
@@ -283,6 +284,9 @@ static atomic_t policy_opencount = ATOMIC_INIT(1);
  */
 int ima_open_policy(struct inode * inode, struct file * filp)
 {
+	/* No point in being allowed to open it if you aren't going to write */
+	if (!(filp->f_flags & O_WRONLY))
+		return -EACCES;
 	if (atomic_dec_and_test(&policy_opencount))
 		return 0;
 	return -EBUSY;
@@ -349,7 +353,7 @@ int ima_fs_init(void)
 		goto out;
 
 	ima_policy = securityfs_create_file("policy",
-					    S_IRUSR | S_IRGRP | S_IWUSR,
+					    S_IWUSR,
 					    ima_dir, NULL,
 					    &ima_measure_policy_ops);
 	if (IS_ERR(ima_policy))
-- 
cgit v0.10.2


From c3d20103d08e5c0b6738fbd0acf3ca004e5356c5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 12 May 2009 15:14:23 -0400
Subject: IMA: do not measure everything opened by root by default

The IMA default policy measures every single file opened by root.  This is
terrible for most users.  Consider a system (like mine) with virtual machine
images.  When those images are touched (which happens at boot for me) those
images are measured.  This is just way too much for the default case.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index b168c1d..dec6dcb 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -61,8 +61,6 @@ static struct ima_measure_rule_entry default_rules[] = {
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
-	{.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0,
-	 .flags = IMA_FUNC | IMA_MASK | IMA_UID}
 };
 
 static LIST_HEAD(measure_default_rules);
-- 
cgit v0.10.2


From bec36eca6f5d1d83a9c3733fc40ba173ad849df2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 15 May 2009 12:03:04 +0900
Subject: sh: hd64461: Fix up I/O base register offsets.

hd64461 is mapped in a fixed location, so the I/O base itself is fairly
meaningless as a configuration item. Additionally, this makes it
impossible to share hd64461 code alongside generic drivers (in the case
of sh_dac_audio), so simply make it commonly defined and permit the
mach_is_foo() logic to work out the proper semantics.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/cchips/Kconfig b/arch/sh/cchips/Kconfig
index f43d183..a5ab2ec 100644
--- a/arch/sh/cchips/Kconfig
+++ b/arch/sh/cchips/Kconfig
@@ -34,11 +34,6 @@ config HD64461_IRQ
 
 	  Do not change this unless you know what you are doing.
 
-config HD64461_IOBASE
-	hex "HD64461 start address"
-	depends on HD64461
-	default "0xb0000000"
-
 config HD64461_ENABLER
 	bool "HD64461 PCMCIA enabler"
 	depends on HD64461
diff --git a/arch/sh/include/asm/hd64461.h b/arch/sh/include/asm/hd64461.h
index 52b4b62..c16d545 100644
--- a/arch/sh/include/asm/hd64461.h
+++ b/arch/sh/include/asm/hd64461.h
@@ -13,13 +13,15 @@
 #define	HD64461_PCC_WINDOW	0x01000000
 
 /* Area 6 - Slot 0 - memory and/or IO card */
-#define	HD64461_PCC0_BASE	(CONFIG_HD64461_IOBASE + 0x8000000)
+#define HD64461_IOBASE		0xb0000000
+#define HD64461_IO_OFFSET(x)	(HD64461_IOBASE + (x))
+#define	HD64461_PCC0_BASE	HD64461_IO_OFFSET(0x8000000)
 #define	HD64461_PCC0_ATTR	(HD64461_PCC0_BASE)				/* 0xb80000000 */
 #define	HD64461_PCC0_COMM	(HD64461_PCC0_BASE+HD64461_PCC_WINDOW)		/* 0xb90000000 */
 #define	HD64461_PCC0_IO		(HD64461_PCC0_BASE+2*HD64461_PCC_WINDOW)	/* 0xba0000000 */
 
 /* Area 5 - Slot 1 - memory card only */
-#define	HD64461_PCC1_BASE	(CONFIG_HD64461_IOBASE + 0x4000000)
+#define	HD64461_PCC1_BASE	HD64461_IO_OFFSET(0x4000000)
 #define	HD64461_PCC1_ATTR	(HD64461_PCC1_BASE)				/* 0xb4000000 */
 #define	HD64461_PCC1_COMM	(HD64461_PCC1_BASE+HD64461_PCC_WINDOW)		/* 0xb5000000 */
 
@@ -41,19 +43,19 @@
 #define	HD64461_STBCR_SURTST		0x0001
 
 /* System Configuration Register */
-#define	HD64461_SYSCR		(CONFIG_HD64461_IOBASE + 0x02)
+#define	HD64461_SYSCR		HD64461_IO_OFFSET(0x02)
 
 /* CPU Data Bus Control Register */
-#define	HD64461_SCPUCR		(CONFIG_HD64461_IOBASE + 0x04)
+#define	HD64461_SCPUCR		HD64461_IO_OFFSET(0x04)
 
 /* Base Address Register */
-#define	HD64461_LCDCBAR		(CONFIG_HD64461_IOBASE + 0x1000)
+#define	HD64461_LCDCBAR		HD64461_IO_OFFSET(0x1000)
 
 /* Line increment address */
-#define	HD64461_LCDCLOR		(CONFIG_HD64461_IOBASE + 0x1002)
+#define	HD64461_LCDCLOR		HD64461_IO_OFFSET(0x1002)
 
 /* Controls LCD controller */
-#define	HD64461_LCDCCR		(CONFIG_HD64461_IOBASE + 0x1004)
+#define	HD64461_LCDCCR		HD64461_IO_OFFSET(0x1004)
 
 /* LCCDR control bits */
 #define	HD64461_LCDCCR_STBACK	0x0400	/* Standby Back */
@@ -64,30 +66,30 @@
 #define	HD64461_LCDCCR_SPON	0x0010	/* Start Power On */
 
 /* Controls LCD (1) */
-#define	HD64461_LDR1		(CONFIG_HD64461_IOBASE + 0x1010)
+#define	HD64461_LDR1		HD64461_IO_OFFSET(0x1010)
 #define	HD64461_LDR1_DON	0x01	/* Display On */
 #define	HD64461_LDR1_DINV	0x80	/* Display Invert */
 
 /* Controls LCD (2) */
-#define	HD64461_LDR2		(CONFIG_HD64461_IOBASE + 0x1012)
-#define	HD64461_LDHNCR		(CONFIG_HD64461_IOBASE + 0x1014)	/* Number of horizontal characters */
-#define	HD64461_LDHNSR		(CONFIG_HD64461_IOBASE + 0x1016)	/* Specify output start position + width of CL1 */
-#define	HD64461_LDVNTR		(CONFIG_HD64461_IOBASE + 0x1018)	/* Specify total vertical lines */
-#define	HD64461_LDVNDR		(CONFIG_HD64461_IOBASE + 0x101a)	/* specify number of display vertical lines */
-#define	HD64461_LDVSPR		(CONFIG_HD64461_IOBASE + 0x101c)	/* specify vertical synchronization pos and AC nr */
+#define	HD64461_LDR2		HD64461_IO_OFFSET(0x1012)
+#define	HD64461_LDHNCR		HD64461_IO_OFFSET(0x1014)	/* Number of horizontal characters */
+#define	HD64461_LDHNSR		HD64461_IO_OFFSET(0x1016)	/* Specify output start position + width of CL1 */
+#define	HD64461_LDVNTR		HD64461_IO_OFFSET(0x1018)	/* Specify total vertical lines */
+#define	HD64461_LDVNDR		HD64461_IO_OFFSET(0x101a)	/* specify number of display vertical lines */
+#define	HD64461_LDVSPR		HD64461_IO_OFFSET(0x101c)	/* specify vertical synchronization pos and AC nr */
 
 /* Controls LCD (3) */
-#define	HD64461_LDR3		(CONFIG_HD64461_IOBASE + 0x101e)
+#define	HD64461_LDR3		HD64461_IO_OFFSET(0x101e)
 
 /* Palette Registers */
-#define	HD64461_CPTWAR		(CONFIG_HD64461_IOBASE + 0x1030)	/* Color Palette Write Address Register */
-#define	HD64461_CPTWDR		(CONFIG_HD64461_IOBASE + 0x1032)	/* Color Palette Write Data Register */
-#define	HD64461_CPTRAR		(CONFIG_HD64461_IOBASE + 0x1034)	/* Color Palette Read Address Register */
-#define	HD64461_CPTRDR		(CONFIG_HD64461_IOBASE + 0x1036)	/* Color Palette Read Data Register */
+#define	HD64461_CPTWAR		HD64461_IO_OFFSET(0x1030)	/* Color Palette Write Address Register */
+#define	HD64461_CPTWDR		HD64461_IO_OFFSET(0x1032)	/* Color Palette Write Data Register */
+#define	HD64461_CPTRAR		HD64461_IO_OFFSET(0x1034)	/* Color Palette Read Address Register */
+#define	HD64461_CPTRDR		HD64461_IO_OFFSET(0x1036)	/* Color Palette Read Data Register */
 
-#define	HD64461_GRDOR		(CONFIG_HD64461_IOBASE + 0x1040)	/* Display Resolution Offset Register */
-#define	HD64461_GRSCR		(CONFIG_HD64461_IOBASE + 0x1042)	/* Solid Color Register */
-#define	HD64461_GRCFGR		(CONFIG_HD64461_IOBASE + 0x1044)	/* Accelerator Configuration Register */
+#define	HD64461_GRDOR		HD64461_IO_OFFSET(0x1040)	/* Display Resolution Offset Register */
+#define	HD64461_GRSCR		HD64461_IO_OFFSET(0x1042)	/* Solid Color Register */
+#define	HD64461_GRCFGR		HD64461_IO_OFFSET(0x1044)	/* Accelerator Configuration Register */
 
 #define	HD64461_GRCFGR_ACCSTATUS	0x10	/* Accelerator Status */
 #define	HD64461_GRCFGR_ACCRESET		0x08	/* Accelerator Reset */
@@ -97,41 +99,41 @@
 #define	HD64461_GRCFGR_COLORDEPTH8	0x01	/* Sets Colordepth 8 for Accelerator */
 
 /* Line Drawing Registers */
-#define	HD64461_LNSARH		(CONFIG_HD64461_IOBASE + 0x1046)	/* Line Start Address Register (H) */
-#define	HD64461_LNSARL		(CONFIG_HD64461_IOBASE + 0x1048)	/* Line Start Address Register (L) */
-#define	HD64461_LNAXLR		(CONFIG_HD64461_IOBASE + 0x104a)	/* Axis Pixel Length Register */
-#define	HD64461_LNDGR		(CONFIG_HD64461_IOBASE + 0x104c)	/* Diagonal Register */
-#define	HD64461_LNAXR		(CONFIG_HD64461_IOBASE + 0x104e)	/* Axial Register */
-#define	HD64461_LNERTR		(CONFIG_HD64461_IOBASE + 0x1050)	/* Start Error Term Register */
-#define	HD64461_LNMDR		(CONFIG_HD64461_IOBASE + 0x1052)	/* Line Mode Register */
+#define	HD64461_LNSARH		HD64461_IO_OFFSET(0x1046)	/* Line Start Address Register (H) */
+#define	HD64461_LNSARL		HD64461_IO_OFFSET(0x1048)	/* Line Start Address Register (L) */
+#define	HD64461_LNAXLR		HD64461_IO_OFFSET(0x104a)	/* Axis Pixel Length Register */
+#define	HD64461_LNDGR		HD64461_IO_OFFSET(0x104c)	/* Diagonal Register */
+#define	HD64461_LNAXR		HD64461_IO_OFFSET(0x104e)	/* Axial Register */
+#define	HD64461_LNERTR		HD64461_IO_OFFSET(0x1050)	/* Start Error Term Register */
+#define	HD64461_LNMDR		HD64461_IO_OFFSET(0x1052)	/* Line Mode Register */
 
 /* BitBLT Registers */
-#define	HD64461_BBTSSARH	(CONFIG_HD64461_IOBASE + 0x1054)	/* Source Start Address Register (H) */
-#define	HD64461_BBTSSARL	(CONFIG_HD64461_IOBASE + 0x1056)	/* Source Start Address Register (L) */
-#define	HD64461_BBTDSARH	(CONFIG_HD64461_IOBASE + 0x1058)	/* Destination Start Address Register (H) */
-#define	HD64461_BBTDSARL	(CONFIG_HD64461_IOBASE + 0x105a)	/* Destination Start Address Register (L) */
-#define	HD64461_BBTDWR		(CONFIG_HD64461_IOBASE + 0x105c)	/* Destination Block Width Register */
-#define	HD64461_BBTDHR		(CONFIG_HD64461_IOBASE + 0x105e)	/* Destination Block Height Register */
-#define	HD64461_BBTPARH		(CONFIG_HD64461_IOBASE + 0x1060)	/* Pattern Start Address Register (H) */
-#define	HD64461_BBTPARL		(CONFIG_HD64461_IOBASE + 0x1062)	/* Pattern Start Address Register (L) */
-#define	HD64461_BBTMARH		(CONFIG_HD64461_IOBASE + 0x1064)	/* Mask Start Address Register (H) */
-#define	HD64461_BBTMARL		(CONFIG_HD64461_IOBASE + 0x1066)	/* Mask Start Address Register (L) */
-#define	HD64461_BBTROPR		(CONFIG_HD64461_IOBASE + 0x1068)	/* ROP Register */
-#define	HD64461_BBTMDR		(CONFIG_HD64461_IOBASE + 0x106a)	/* BitBLT Mode Register */
+#define	HD64461_BBTSSARH	HD64461_IO_OFFSET(0x1054)	/* Source Start Address Register (H) */
+#define	HD64461_BBTSSARL	HD64461_IO_OFFSET(0x1056)	/* Source Start Address Register (L) */
+#define	HD64461_BBTDSARH	HD64461_IO_OFFSET(0x1058)	/* Destination Start Address Register (H) */
+#define	HD64461_BBTDSARL	HD64461_IO_OFFSET(0x105a)	/* Destination Start Address Register (L) */
+#define	HD64461_BBTDWR		HD64461_IO_OFFSET(0x105c)	/* Destination Block Width Register */
+#define	HD64461_BBTDHR		HD64461_IO_OFFSET(0x105e)	/* Destination Block Height Register */
+#define	HD64461_BBTPARH		HD64461_IO_OFFSET(0x1060)	/* Pattern Start Address Register (H) */
+#define	HD64461_BBTPARL		HD64461_IO_OFFSET(0x1062)	/* Pattern Start Address Register (L) */
+#define	HD64461_BBTMARH		HD64461_IO_OFFSET(0x1064)	/* Mask Start Address Register (H) */
+#define	HD64461_BBTMARL		HD64461_IO_OFFSET(0x1066)	/* Mask Start Address Register (L) */
+#define	HD64461_BBTROPR		HD64461_IO_OFFSET(0x1068)	/* ROP Register */
+#define	HD64461_BBTMDR		HD64461_IO_OFFSET(0x106a)	/* BitBLT Mode Register */
 
 /* PC Card Controller Registers */
 /* Maps to Physical Area 6 */
-#define	HD64461_PCC0ISR		(CONFIG_HD64461_IOBASE + 0x2000)	/* socket 0 interface status */
-#define	HD64461_PCC0GCR		(CONFIG_HD64461_IOBASE + 0x2002)	/* socket 0 general control */
-#define	HD64461_PCC0CSCR	(CONFIG_HD64461_IOBASE + 0x2004)	/* socket 0 card status change */
-#define	HD64461_PCC0CSCIER	(CONFIG_HD64461_IOBASE + 0x2006)	/* socket 0 card status change interrupt enable */
-#define	HD64461_PCC0SCR		(CONFIG_HD64461_IOBASE + 0x2008)	/* socket 0 software control */
+#define	HD64461_PCC0ISR		HD64461_IO_OFFSET(0x2000)	/* socket 0 interface status */
+#define	HD64461_PCC0GCR		HD64461_IO_OFFSET(0x2002)	/* socket 0 general control */
+#define	HD64461_PCC0CSCR	HD64461_IO_OFFSET(0x2004)	/* socket 0 card status change */
+#define	HD64461_PCC0CSCIER	HD64461_IO_OFFSET(0x2006)	/* socket 0 card status change interrupt enable */
+#define	HD64461_PCC0SCR		HD64461_IO_OFFSET(0x2008)	/* socket 0 software control */
 /* Maps to Physical Area 5 */
-#define	HD64461_PCC1ISR		(CONFIG_HD64461_IOBASE + 0x2010)	/* socket 1 interface status */
-#define	HD64461_PCC1GCR		(CONFIG_HD64461_IOBASE + 0x2012)	/* socket 1 general control */
-#define	HD64461_PCC1CSCR	(CONFIG_HD64461_IOBASE + 0x2014)	/* socket 1 card status change */
-#define	HD64461_PCC1CSCIER	(CONFIG_HD64461_IOBASE + 0x2016)	/* socket 1 card status change interrupt enable */
-#define	HD64461_PCC1SCR		(CONFIG_HD64461_IOBASE + 0x2018)	/* socket 1 software control */
+#define	HD64461_PCC1ISR		HD64461_IO_OFFSET(0x2010)	/* socket 1 interface status */
+#define	HD64461_PCC1GCR		HD64461_IO_OFFSET(0x2012)	/* socket 1 general control */
+#define	HD64461_PCC1CSCR	HD64461_IO_OFFSET(0x2014)	/* socket 1 card status change */
+#define	HD64461_PCC1CSCIER	HD64461_IO_OFFSET(0x2016)	/* socket 1 card status change interrupt enable */
+#define	HD64461_PCC1SCR		HD64461_IO_OFFSET(0x2018)	/* socket 1 software control */
 
 /* PCC Interface Status Register */
 #define	HD64461_PCCISR_READY		0x80	/* card ready */
@@ -189,41 +191,41 @@
 #define	HD64461_PCCSCR_SWP		0x01	/* write protect */
 
 /* PCC0 Output Pins Control Register */
-#define	HD64461_P0OCR		(CONFIG_HD64461_IOBASE + 0x202a)
+#define	HD64461_P0OCR		HD64461_IO_OFFSET(0x202a)
 
 /* PCC1 Output Pins Control Register */
-#define	HD64461_P1OCR		(CONFIG_HD64461_IOBASE + 0x202c)
+#define	HD64461_P1OCR		HD64461_IO_OFFSET(0x202c)
 
 /* PC Card General Control Register */
-#define	HD64461_PGCR		(CONFIG_HD64461_IOBASE + 0x202e)
+#define	HD64461_PGCR		HD64461_IO_OFFSET(0x202e)
 
 /* Port Control Registers */
-#define	HD64461_GPACR		(CONFIG_HD64461_IOBASE + 0x4000)	/* Port A - Handles IRDA/TIMER */
-#define	HD64461_GPBCR		(CONFIG_HD64461_IOBASE + 0x4002)	/* Port B - Handles UART */
-#define	HD64461_GPCCR		(CONFIG_HD64461_IOBASE + 0x4004)	/* Port C - Handles PCMCIA 1 */
-#define	HD64461_GPDCR		(CONFIG_HD64461_IOBASE + 0x4006)	/* Port D - Handles PCMCIA 1 */
+#define	HD64461_GPACR		HD64461_IO_OFFSET(0x4000)	/* Port A - Handles IRDA/TIMER */
+#define	HD64461_GPBCR		HD64461_IO_OFFSET(0x4002)	/* Port B - Handles UART */
+#define	HD64461_GPCCR		HD64461_IO_OFFSET(0x4004)	/* Port C - Handles PCMCIA 1 */
+#define	HD64461_GPDCR		HD64461_IO_OFFSET(0x4006)	/* Port D - Handles PCMCIA 1 */
 
 /* Port Control Data Registers */
-#define	HD64461_GPADR		(CONFIG_HD64461_IOBASE + 0x4010)	/* A */
-#define	HD64461_GPBDR		(CONFIG_HD64461_IOBASE + 0x4012)	/* B */
-#define	HD64461_GPCDR		(CONFIG_HD64461_IOBASE + 0x4014)	/* C */
-#define	HD64461_GPDDR		(CONFIG_HD64461_IOBASE + 0x4016)	/* D */
+#define	HD64461_GPADR		HD64461_IO_OFFSET(0x4010)	/* A */
+#define	HD64461_GPBDR		HD64461_IO_OFFSET(0x4012)	/* B */
+#define	HD64461_GPCDR		HD64461_IO_OFFSET(0x4014)	/* C */
+#define	HD64461_GPDDR		HD64461_IO_OFFSET(0x4016)	/* D */
 
 /* Interrupt Control Registers */
-#define	HD64461_GPAICR		(CONFIG_HD64461_IOBASE + 0x4020)	/* A */
-#define	HD64461_GPBICR		(CONFIG_HD64461_IOBASE + 0x4022)	/* B */
-#define	HD64461_GPCICR		(CONFIG_HD64461_IOBASE + 0x4024)	/* C */
-#define	HD64461_GPDICR		(CONFIG_HD64461_IOBASE + 0x4026)	/* D */
+#define	HD64461_GPAICR		HD64461_IO_OFFSET(0x4020)	/* A */
+#define	HD64461_GPBICR		HD64461_IO_OFFSET(0x4022)	/* B */
+#define	HD64461_GPCICR		HD64461_IO_OFFSET(0x4024)	/* C */
+#define	HD64461_GPDICR		HD64461_IO_OFFSET(0x4026)	/* D */
 
 /* Interrupt Status Registers */
-#define	HD64461_GPAISR		(CONFIG_HD64461_IOBASE + 0x4040)	/* A */
-#define	HD64461_GPBISR		(CONFIG_HD64461_IOBASE + 0x4042)	/* B */
-#define	HD64461_GPCISR		(CONFIG_HD64461_IOBASE + 0x4044)	/* C */
-#define	HD64461_GPDISR		(CONFIG_HD64461_IOBASE + 0x4046)	/* D */
+#define	HD64461_GPAISR		HD64461_IO_OFFSET(0x4040)	/* A */
+#define	HD64461_GPBISR		HD64461_IO_OFFSET(0x4042)	/* B */
+#define	HD64461_GPCISR		HD64461_IO_OFFSET(0x4044)	/* C */
+#define	HD64461_GPDISR		HD64461_IO_OFFSET(0x4046)	/* D */
 
 /* Interrupt Request Register & Interrupt Mask Register */
-#define	HD64461_NIRR		(CONFIG_HD64461_IOBASE + 0x5000)
-#define	HD64461_NIMR		(CONFIG_HD64461_IOBASE + 0x5002)
+#define	HD64461_NIRR		HD64461_IO_OFFSET(0x5000)
+#define	HD64461_NIMR		HD64461_IO_OFFSET(0x5002)
 
 #define	HD64461_IRQBASE		OFFCHIP_IRQ_BASE
 #define	OFFCHIP_IRQ_BASE	64
-- 
cgit v0.10.2


From 639138a991aaf1a3f65cc66700d097edc5f602fe Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 15 May 2009 12:07:17 +0900
Subject: sound: oss: sh_dac_audio timer fixes.

This patch modifies sh_dac_audio in the following ways:

- use high resolution timer instead of TMU1
- fix cpu/dac.h include
- add future rewrite comment

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig
index 1ca7427..bcf2a06 100644
--- a/sound/oss/Kconfig
+++ b/sound/oss/Kconfig
@@ -561,7 +561,7 @@ endif	# SOUND_OSS
 
 config SOUND_SH_DAC_AUDIO
 	tristate "SuperH DAC audio support"
-	depends on CPU_SH3
+	depends on CPU_SH3 && HIGH_RES_TIMERS
 
 config SOUND_SH_DAC_AUDIO_CHANNEL
 	int "DAC channel"
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index 78cfb66..b2ed875 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -18,47 +18,36 @@
 #include <linux/sound.h>
 #include <linux/soundcard.h>
 #include <linux/interrupt.h>
+#include <linux/hrtimer.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/irq.h>
 #include <asm/delay.h>
 #include <asm/clock.h>
-#include <asm/cpu/dac.h>
-#include <asm/cpu/timer.h>
+#include <cpu/dac.h>
 #include <asm/machvec.h>
 #include <mach/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define MODNAME "sh_dac_audio"
 
-#define TMU_TOCR_INIT	0x00
-
-#define TMU1_TCR_INIT	0x0020	/* Clock/4, rising edge; interrupt on */
-#define TMU1_TSTR_INIT  0x02	/* Bit to turn on TMU1 */
-
 #define BUFFER_SIZE 48000
 
 static int rate;
 static int empty;
 static char *data_buffer, *buffer_begin, *buffer_end;
 static int in_use, device_major;
+static struct hrtimer hrtimer;
+static ktime_t wakeups_per_second;
 
 static void dac_audio_start_timer(void)
 {
-	u8 tstr;
-
-	tstr = ctrl_inb(TMU_TSTR);
-	tstr |= TMU1_TSTR_INIT;
-	ctrl_outb(tstr, TMU_TSTR);
+	hrtimer_start(&hrtimer, wakeups_per_second, HRTIMER_MODE_REL);
 }
 
 static void dac_audio_stop_timer(void)
 {
-	u8 tstr;
-
-	tstr = ctrl_inb(TMU_TSTR);
-	tstr &= ~TMU1_TSTR_INIT;
-	ctrl_outb(tstr, TMU_TSTR);
+	hrtimer_cancel(&hrtimer);
 }
 
 static void dac_audio_reset(void)
@@ -77,38 +66,30 @@ static void dac_audio_sync(void)
 static void dac_audio_start(void)
 {
 	if (mach_is_hp6xx()) {
-		u16 v = inw(HD64461_GPADR);
+		u16 v = __raw_readw(HD64461_GPADR);
 		v &= ~HD64461_GPADR_SPEAKER;
-		outw(v, HD64461_GPADR);
+		__raw_writew(v, HD64461_GPADR);
 	}
 
 	sh_dac_enable(CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
-	ctrl_outw(TMU1_TCR_INIT, TMU1_TCR);
 }
 static void dac_audio_stop(void)
 {
 	dac_audio_stop_timer();
 
 	if (mach_is_hp6xx()) {
-		u16 v = inw(HD64461_GPADR);
+		u16 v = __raw_readw(HD64461_GPADR);
 		v |= HD64461_GPADR_SPEAKER;
-		outw(v, HD64461_GPADR);
+		__raw_writew(v, HD64461_GPADR);
 	}
 
- 	sh_dac_output(0, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
+	sh_dac_output(0, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 	sh_dac_disable(CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 }
 
 static void dac_audio_set_rate(void)
 {
-	unsigned long interval;
- 	struct clk *clk;
-
- 	clk = clk_get(NULL, "module_clk");
- 	interval = (clk_get_rate(clk) / 4) / rate;
- 	clk_put(clk);
-	ctrl_outl(interval, TMU1_TCOR);
-	ctrl_outl(interval, TMU1_TCNT);
+	wakeups_per_second = ktime_set(0, 1000000000 / rate);
 }
 
 static int dac_audio_ioctl(struct inode *inode, struct file *file,
@@ -265,32 +246,26 @@ const struct file_operations dac_audio_fops = {
       .release =	dac_audio_release,
 };
 
-static irqreturn_t timer1_interrupt(int irq, void *dev)
+static enum hrtimer_restart sh_dac_audio_timer(struct hrtimer *handle)
 {
-	unsigned long timer_status;
-
-	timer_status = ctrl_inw(TMU1_TCR);
-	timer_status &= ~0x100;
-	ctrl_outw(timer_status, TMU1_TCR);
-
 	if (!empty) {
 		sh_dac_output(*buffer_begin, CONFIG_SOUND_SH_DAC_AUDIO_CHANNEL);
 		buffer_begin++;
 
 		if (buffer_begin == data_buffer + BUFFER_SIZE)
 			buffer_begin = data_buffer;
-		if (buffer_begin == buffer_end) {
+		if (buffer_begin == buffer_end)
 			empty = 1;
-			dac_audio_stop_timer();
-		}
 	}
-	return IRQ_HANDLED;
+
+	if (!empty)
+		hrtimer_start(&hrtimer, wakeups_per_second, HRTIMER_MODE_REL);
+
+	return HRTIMER_NORESTART;
 }
 
 static int __init dac_audio_init(void)
 {
-	int retval;
-
 	if ((device_major = register_sound_dsp(&dac_audio_fops, -1)) < 0) {
 		printk(KERN_ERR "Cannot register dsp device");
 		return device_major;
@@ -306,21 +281,25 @@ static int __init dac_audio_init(void)
 	rate = 8000;
 	dac_audio_set_rate();
 
-	retval =
-	    request_irq(TIMER1_IRQ, timer1_interrupt, IRQF_DISABLED, MODNAME, 0);
-	if (retval < 0) {
-		printk(KERN_ERR "sh_dac_audio: IRQ %d request failed\n",
-		       TIMER1_IRQ);
-		return retval;
-	}
+	/* Today: High Resolution Timer driven DAC playback.
+	 * The timer callback gets called once per sample. Ouch.
+	 *
+	 * Future: A much better approach would be to use the
+	 * SH7720 CMT+DMAC+DAC hardware combination like this:
+	 * - Program sample rate using CMT0 or CMT1
+	 * - Program DMAC to use CMT for timing and output to DAC
+	 * - Play sound using DMAC, let CPU sleep.
+	 * - While at it, rewrite this driver to use ALSA.
+	 */
+
+	hrtimer_init(&hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer.function = sh_dac_audio_timer;
 
 	return 0;
 }
 
 static void __exit dac_audio_exit(void)
 {
-	free_irq(TIMER1_IRQ, 0);
-
 	unregister_sound_dsp(device_major);
 	kfree((void *)data_buffer);
 }
-- 
cgit v0.10.2


From 29a679754b1a2581ee456eada6c2de7ce95068bb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 14 May 2009 23:19:09 -0400
Subject: x86/stacktrace: return 0 instead of -1 for stack ops

If we return -1 in the ops->stack for the stacktrace saving, we end up
breaking out of the loop if the stack we are tracing is in the exception
stack. This causes traces like:

          <idle>-0     [002] 34263.745825: raise_softirq_irqoff <-__blk_complete_request
          <idle>-0     [002] 34263.745826:
 <= 0
 <= 0
 <= 0
 <= 0
 <= 0
 <= 0
 <= 0

By returning "0" instead, the irq stack is saved as well, and we see:

          <idle>-0     [003]   883.280992: raise_softirq_irqoff <-__hrtimer_star
t_range_ns
          <idle>-0     [003]   883.280992:
 <= hrtimer_start_range_ns
 <= tick_nohz_restart_sched_tick
 <= cpu_idle
 <= start_secondary
 <=
 <= 0
 <= 0

[ Impact: record stacks from interrupts ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index f7bddc2..4aaf7e4 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
 
 static int save_stack_stack(void *data, char *name)
 {
-	return -1;
+	return 0;
 }
 
 static void save_stack_address(void *data, unsigned long addr, int reliable)
-- 
cgit v0.10.2


From 1ec7c4849c214fc78b023230264399836ea3b245 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 14 May 2009 23:40:06 -0400
Subject: tracing: stop stack trace on first empty entry

The stack tracer stores eight entries in the ring buffer when an event
traces the stack. The output outputs all eight entries regardless of
how many entries were recorded.

This patch breaks out of the loop when a null entry is discovered.

[ Impact: only print the stack that is recorded ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8bd9a2c..489c0e8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -898,6 +898,8 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 	trace_assign_type(field, iter->ent);
 
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+		if (!field->caller[i])
+			break;
 		if (i) {
 			if (!trace_seq_puts(s, " <= "))
 				goto partial;
-- 
cgit v0.10.2


From 8cd995b6deedf98b7694ed32a786ee7f793d1eec Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 15 May 2009 11:07:27 +0800
Subject: tracing/filters: add missing unlock in a failure path

[ Impact: fix deadlock in a rare case we fail to allocate memory ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A0CDC6F.7070200@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 85ad6a8..22c2998 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1079,9 +1079,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 		return 0;
 	}
 
+	err = -ENOMEM;
 	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
 	if (!ps)
-		return -ENOMEM;
+		goto out_unlock;
 
 	filter_disable_preds(call);
 	replace_filter_string(call->filter, filter_string);
@@ -1101,7 +1102,7 @@ out:
 	filter_opstack_clear(ps);
 	postfix_clear(ps);
 	kfree(ps);
-
+out_unlock:
 	mutex_unlock(&filter_mutex);
 
 	return err;
@@ -1123,9 +1124,10 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 		return 0;
 	}
 
+	err = -ENOMEM;
 	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
 	if (!ps)
-		return -ENOMEM;
+		goto out_unlock;
 
 	filter_free_subsystem_preds(system);
 	replace_filter_string(system->filter, filter_string);
@@ -1145,7 +1147,7 @@ out:
 	filter_opstack_clear(ps);
 	postfix_clear(ps);
 	kfree(ps);
-
+out_unlock:
 	mutex_unlock(&filter_mutex);
 
 	return err;
-- 
cgit v0.10.2


From 5872144f64b34a5942f6b4acedc90b02de72c58b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 15 May 2009 11:07:56 +0800
Subject: tracing/filters: fix off-by-one bug

We should leave the last slot for the ending '\0'.

[ Impact: fix possible crash when the length of an operand is 128 ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A0CDC8C.30602@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 22c2998..a7430b1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -736,7 +736,7 @@ static inline void clear_operand_string(struct filter_parse_state *ps)
 
 static inline int append_operand_char(struct filter_parse_state *ps, char c)
 {
-	if (ps->operand.tail == MAX_FILTER_STR_VAL)
+	if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
 		return -EINVAL;
 
 	ps->operand.string[ps->operand.tail++] = c;
-- 
cgit v0.10.2


From 1a853e36871b533ccc3f3c5bdd5cd0d867043a00 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 May 2009 22:50:46 -0300
Subject: perf record: Allow specifying a pid to record

Allow specifying a pid instead of always fork+exec'ing a command.

Because the PERF_EVENT_COMM and PERF_EVENT_MMAP events happened before
we connected, we must synthesize them so that 'perf report' can get what
it needs.

[ Impact: add new command line option ]

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20090515015046.GA13664@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 5f5e6df..efb8759 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -34,6 +34,9 @@
 
 #include "perf.h"
 
+#define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
+#define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
+
 static int			nr_counters			=  0;
 static __u64			event_id[MAX_COUNTERS]		= { };
 static int			default_interval = 100000;
@@ -47,6 +50,7 @@ static char 			*output_name			= "output.perf";
 static int			group				= 0;
 static unsigned int		realtime_prio			= 0;
 static int			system_wide			= 0;
+static pid_t			target_pid			= -1;
 static int			inherit				= 1;
 static int			nmi				= 1;
 
@@ -180,6 +184,7 @@ static void display_help(void)
 	" -c CNT    --count=CNT          # event period to sample\n"
 	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
 	" -o file   --output=<file>      # output file\n"
+	" -p pid    --pid=<pid>		 # record events on existing pid\n"
 	" -r prio   --realtime=<prio>    # use RT prio\n"
 	" -s        --system             # system wide profiling\n"
 	);
@@ -199,13 +204,14 @@ static void process_options(int argc, const char *argv[])
 			{"event",	required_argument,	NULL, 'e'},
 			{"mmap_pages",	required_argument,	NULL, 'm'},
 			{"output",	required_argument,	NULL, 'o'},
+			{"pid",		required_argument,	NULL, 'p'},
 			{"realtime",	required_argument,	NULL, 'r'},
 			{"system",	no_argument,		NULL, 's'},
 			{"inherit",	no_argument,		NULL, 'i'},
 			{"nmi",		no_argument,		NULL, 'n'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:c:e:m:o:r:sin",
+		int c = getopt_long(argc, argv, "+:c:e:m:o:p:r:sin",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -215,6 +221,7 @@ static void process_options(int argc, const char *argv[])
 		case 'e': error				= parse_events(optarg); break;
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'o': output_name			= strdup(optarg); break;
+		case 'p': target_pid			=   atoi(optarg); break;
 		case 'r': realtime_prio			=   atoi(optarg); break;
 		case 's': system_wide                   ^=             1; break;
 		case 'i': inherit			^=	       1; break;
@@ -223,7 +230,7 @@ static void process_options(int argc, const char *argv[])
 		}
 	}
 
-	if (argc - optind == 0)
+	if (argc - optind == 0 && target_pid == -1)
 		error = 1;
 
 	if (error)
@@ -350,15 +357,135 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 static int nr_poll;
 static int nr_cpu;
 
-static void open_counters(int cpu)
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid,tid;
+	char comm[16];
+};
+
+static pid_t pid_synthesize_comm_event(pid_t pid)
+{
+	char filename[PATH_MAX];
+	char bf[BUFSIZ];
+	struct comm_event comm_ev;
+	size_t size;
+	int fd;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "couldn't open %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	if (read(fd, bf, sizeof(bf)) < 0) {
+		fprintf(stderr, "couldn't read %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	close(fd);
+
+	pid_t spid, ppid;
+	char state;
+	char comm[18];
+
+	memset(&comm_ev, 0, sizeof(comm_ev));
+        int nr = sscanf(bf, "%d %s %c %d %d ",
+			&spid, comm, &state, &ppid, &comm_ev.pid);
+	if (nr != 5) {
+		fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
+			filename);
+		exit(EXIT_FAILURE);
+	}
+	comm_ev.header.type = PERF_EVENT_COMM;
+	comm_ev.tid = pid;
+	size = strlen(comm);
+	comm[--size] = '\0'; /* Remove the ')' at the end */
+	--size; /* Remove the '(' at the begin */
+	memcpy(comm_ev.comm, comm + 1, size);
+	size = ALIGN(size, sizeof(uint64_t));
+	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
+	int ret = write(output, &comm_ev, comm_ev.header.size);
+	if (ret < 0) {
+		perror("failed to write");
+		exit(-1);
+	}
+	return comm_ev.pid;
+}
+
+static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
+{
+	char filename[PATH_MAX];
+	FILE *fp;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	while (1) {
+		char bf[BUFSIZ];
+		unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
+		struct mmap_event mmap_ev = {
+			.header.type = PERF_EVENT_MMAP,
+		};
+		unsigned long ino;
+		int major, minor;
+		size_t size;
+		if (fgets(bf, sizeof(bf), fp) == NULL)
+			break;
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
+			&mmap_ev.start, &mmap_ev.len,
+                        &vm_read, &vm_write, &vm_exec, &vm_mayshare,
+                        &mmap_ev.pgoff, &major, &minor, &ino);
+		if (vm_exec == 'x') {
+			char *execname = strrchr(bf, ' ');
+
+			if (execname == NULL || execname[1] != '/')
+				continue;
+
+			execname += 1;
+			size = strlen(execname);
+			execname[size - 1] = '\0'; /* Remove \n */
+			memcpy(mmap_ev.filename, execname, size);
+			size = ALIGN(size, sizeof(uint64_t));
+			mmap_ev.len -= mmap_ev.start;
+			mmap_ev.header.size = (sizeof(mmap_ev) -
+					       (sizeof(mmap_ev.filename) - size));
+			mmap_ev.pid = pgid;
+			mmap_ev.tid = pid;
+
+			if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
+				perror("failed to write");
+				exit(-1);
+			}
+		}
+	}
+
+	fclose(fp);
+}
+
+static void open_counters(int cpu, pid_t pid)
 {
 	struct perf_counter_hw_event hw_event;
 	int counter, group_fd;
 	int track = 1;
-	pid_t pid = -1;
 
-	if (cpu < 0)
-		pid = 0;
+	if (pid > 0) {
+		pid_t pgid = pid_synthesize_comm_event(pid);
+		pid_synthesize_mmap_events(pid, pgid);
+	}
 
 	group_fd = -1;
 	for (counter = 0; counter < nr_counters; counter++) {
@@ -435,22 +562,24 @@ int cmd_record(int argc, const char **argv)
 	argc -= optind;
 	argv += optind;
 
-	if (!system_wide)
-		open_counters(-1);
-	else for (i = 0; i < nr_cpus; i++)
-		open_counters(i);
+	if (!system_wide) {
+		open_counters(-1, target_pid != -1 ? target_pid : 0);
+	} else for (i = 0; i < nr_cpus; i++)
+		open_counters(i, target_pid);
 
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 
-	pid = fork();
-	if (pid < 0)
-		perror("failed to fork");
+	if (target_pid == -1) {
+		pid = fork();
+		if (pid < 0)
+			perror("failed to fork");
 
-	if (!pid) {
-		if (execvp(argv[0], argv)) {
-			perror(argv[0]);
-			exit(-1);
+		if (!pid) {
+			if (execvp(argv[0], argv)) {
+				perror(argv[0]);
+				exit(-1);
+			}
 		}
 	}
 
-- 
cgit v0.10.2


From ec3232bdf8518bea8410f0027f870b24d3aa8753 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 09:45:19 +0200
Subject: perf_counter: x86: More accurate counter update

Take the counter width into account instead of assuming 32 bits.

In particular Nehalem has 44 bit wide counters, and all
arithmetics should happen on a 44-bit signed integer basis.

[ Impact: fix rare event imprecision, warning message on Nehalem ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f7772ff..3a92a2b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -138,7 +138,9 @@ static u64
 x86_perf_counter_update(struct perf_counter *counter,
 			struct hw_perf_counter *hwc, int idx)
 {
-	u64 prev_raw_count, new_raw_count, delta;
+	int shift = 64 - x86_pmu.counter_bits;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
 
 	/*
 	 * Careful: an NMI might modify the previous counter value.
@@ -161,9 +163,10 @@ again:
 	 * (counter-)time and add that to the generic counter.
 	 *
 	 * Careful, not all hw sign-extends above the physical width
-	 * of the count, so we do that by clipping the delta to 32 bits:
+	 * of the count.
 	 */
-	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
 
 	atomic64_add(delta, &counter->count);
 	atomic64_sub(delta, &hwc->period_left);
-- 
cgit v0.10.2


From f5a5a2f6e69e88647ae12da39f0ff3a510bcf0a6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 13 May 2009 12:54:01 +0200
Subject: perf_counter: x86: Fix throttling

If counters are disabled globally when a perfcounter IRQ/NMI hits,
and if we throttle in that case, we'll promote the '0' value to
the next lapic IRQ and disable all perfcounters at that point,
permanently ...

Fix it.

[ Impact: fix hung perfcounters under load ]

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3a92a2b..88ae8ce 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -765,8 +765,13 @@ out:
 	/*
 	 * Restore - do not reenable when global enable is off or throttled:
 	 */
-	if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
-		intel_pmu_restore_all(cpuc->throttle_ctrl);
+	if (cpuc->throttle_ctrl) {
+		if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) {
+			intel_pmu_restore_all(cpuc->throttle_ctrl);
+		} else {
+			pr_info("CPU#%d: perfcounters: max interrupt rate exceeded! Throttle on.\n", smp_processor_id());
+		}
+	}
 
 	return ret;
 }
@@ -817,11 +822,16 @@ void perf_counter_unthrottle(void)
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
-		if (printk_ratelimit())
-			printk(KERN_WARNING "perfcounters: max interrupts exceeded!\n");
+		pr_info("CPU#%d: perfcounters: throttle off.\n", smp_processor_id());
+
+		/*
+		 * Clear them before re-enabling irqs/NMIs again:
+		 */
+		cpuc->interrupts = 0;
 		hw_perf_restore(cpuc->throttle_ctrl);
+	} else {
+		cpuc->interrupts = 0;
 	}
-	cpuc->interrupts = 0;
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
-- 
cgit v0.10.2


From a026dfecc035f213c1cfa0bf6407ce3155f6a9df Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 10:02:57 +0200
Subject: perf_counter: x86: Allow unpriviliged use of NMIs

Apply sysctl_perf_counter_priv to NMIs. Also, fail the counter
creation instead of silently down-grading to regular interrupts.

[ Impact: allow wider perf-counter usage ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 88ae8ce..c19e927 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -280,8 +280,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * If privileged enough, allow NMI events:
 	 */
 	hwc->nmi = 0;
-	if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
+	if (hw_event->nmi) {
+		if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 		hwc->nmi = 1;
+	}
 
 	hwc->irq_period	= hw_event->irq_period;
 	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-- 
cgit v0.10.2


From 53020fe81eecd0b7be295868ce5850ef8f41074e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 21:26:19 +0200
Subject: perf_counter: Fix perf_output_copy() WARN to account for overflow

The simple reservation test in perf_output_copy() failed to take
unsigned int overflow into account, fix this.

[ Impact: fix false positive warning with more than 4GB of profiling data ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ff166c1..985be0b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1927,7 +1927,11 @@ static void perf_output_copy(struct perf_output_handle *handle,
 
 	handle->offset = offset;
 
-	WARN_ON_ONCE(handle->offset > handle->head);
+	/*
+	 * Check we didn't copy past our reservation window, taking the
+	 * possible unsigned int wrap into account.
+	 */
+	WARN_ON_ONCE(((int)(handle->head - handle->offset)) < 0);
 }
 
 #define perf_output_put(handle, x) \
-- 
cgit v0.10.2


From 962bf7a66edca4d36a730a38ff8410a67f560e40 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 13:21:36 +0200
Subject: perf_counter: x86: Fix up the amd NMI/INT throttle

perf_counter_unthrottle() restores throttle_ctrl, buts its never set.
Also, we fail to disable all counters when throttling.

[ Impact: fix rare stuck perf-counters when they are throttled ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index c19e927..7601c01 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -334,6 +334,8 @@ static u64 amd_pmu_save_disable_all(void)
 	 * right thing.
 	 */
 	barrier();
+	if (!enabled)
+		goto out;
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
@@ -347,6 +349,7 @@ static u64 amd_pmu_save_disable_all(void)
 		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
 	}
 
+out:
 	return enabled;
 }
 
@@ -787,32 +790,43 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	int handled = 0;
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
-	int idx;
+	int idx, throttle = 0;
+
+	cpuc->throttle_ctrl = cpuc->enabled;
+	cpuc->enabled = 0;
+	barrier();
+
+	if (cpuc->throttle_ctrl) {
+		if (++cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
+			throttle = 1;
+	}
 
-	++cpuc->interrupts;
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		int disable = 0;
+
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
+
 		counter = cpuc->counters[idx];
 		hwc = &counter->hw;
 		val = x86_perf_counter_update(counter, hwc, idx);
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
+			goto next;
+
 		/* counter overflow */
 		x86_perf_counter_set_period(counter, hwc, idx);
 		handled = 1;
 		inc_irq_stat(apic_perf_irqs);
-		if (perf_counter_overflow(counter, nmi, regs, 0))
-			amd_pmu_disable_counter(hwc, idx);
-		else if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
-			/*
-			 * do not reenable when throttled, but reload
-			 * the register
-			 */
+		disable = perf_counter_overflow(counter, nmi, regs, 0);
+
+next:
+		if (disable || throttle)
 			amd_pmu_disable_counter(hwc, idx);
-		else if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-			amd_pmu_enable_counter(hwc, idx);
 	}
+
+	if (cpuc->throttle_ctrl && !throttle)
+		cpuc->enabled = 1;
+
 	return handled;
 }
 
-- 
cgit v0.10.2


From 9e35ad388bea89f7d6f375af4c0ae98803688666 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 16:21:38 +0200
Subject: perf_counter: Rework the perf counter disable/enable

The current disable/enable mechanism is:

	token = hw_perf_save_disable();
	...
	/* do bits */
	...
	hw_perf_restore(token);

This works well, provided that the use nests properly. Except we don't.

x86 NMI/INT throttling has non-nested use of this, breaking things. Therefore
provide a reference counter disable/enable interface, where the first disable
disables the hardware, and the last enable enables the hardware again.

[ Impact: refactor, simplify the PMU disable/enable logic ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 15cdc8e..bb1b463 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -386,7 +386,7 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
  * Disable all counters to prevent PMU interrupts and to allow
  * counters to be added or removed.
  */
-u64 hw_perf_save_disable(void)
+void hw_perf_disable(void)
 {
 	struct cpu_hw_counters *cpuhw;
 	unsigned long ret;
@@ -428,7 +428,6 @@ u64 hw_perf_save_disable(void)
 		mb();
 	}
 	local_irq_restore(flags);
-	return ret;
 }
 
 /*
@@ -436,7 +435,7 @@ u64 hw_perf_save_disable(void)
  * If we were previously disabled and counters were added, then
  * put the new config on the PMU.
  */
-void hw_perf_restore(u64 disable)
+void hw_perf_enable(void)
 {
 	struct perf_counter *counter;
 	struct cpu_hw_counters *cpuhw;
@@ -448,9 +447,12 @@ void hw_perf_restore(u64 disable)
 	int n_lim;
 	int idx;
 
-	if (disable)
-		return;
 	local_irq_save(flags);
+	if (!cpuhw->disabled) {
+		local_irq_restore(flags);
+		return;
+	}
+
 	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	cpuhw->disabled = 0;
 
@@ -649,19 +651,18 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 /*
  * Add a counter to the PMU.
  * If all counters are not already frozen, then we disable and
- * re-enable the PMU in order to get hw_perf_restore to do the
+ * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
  */
 static int power_pmu_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	unsigned long flags;
-	u64 pmudis;
 	int n0;
 	int ret = -EAGAIN;
 
 	local_irq_save(flags);
-	pmudis = hw_perf_save_disable();
+	perf_disable();
 
 	/*
 	 * Add the counter to the list (if there is room)
@@ -685,7 +686,7 @@ static int power_pmu_enable(struct perf_counter *counter)
 
 	ret = 0;
  out:
-	hw_perf_restore(pmudis);
+	perf_enable();
 	local_irq_restore(flags);
 	return ret;
 }
@@ -697,11 +698,10 @@ static void power_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	long i;
-	u64 pmudis;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	pmudis = hw_perf_save_disable();
+	perf_disable();
 
 	power_pmu_read(counter);
 
@@ -735,7 +735,7 @@ static void power_pmu_disable(struct perf_counter *counter)
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 	}
 
-	hw_perf_restore(pmudis);
+	perf_enable();
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7601c01..313638c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -31,7 +31,6 @@ struct cpu_hw_counters {
 	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
-	u64			throttle_ctrl;
 	int			enabled;
 };
 
@@ -42,8 +41,8 @@ struct x86_pmu {
 	const char	*name;
 	int		version;
 	int		(*handle_irq)(struct pt_regs *, int);
-	u64		(*save_disable_all)(void);
-	void		(*restore_all)(u64);
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
 	void		(*enable)(struct hw_perf_counter *, int);
 	void		(*disable)(struct hw_perf_counter *, int);
 	unsigned	eventsel;
@@ -56,6 +55,7 @@ struct x86_pmu {
 	int		counter_bits;
 	u64		counter_mask;
 	u64		max_period;
+	u64		intel_ctrl;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -311,22 +311,19 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	return 0;
 }
 
-static u64 intel_pmu_save_disable_all(void)
+static void intel_pmu_disable_all(void)
 {
-	u64 ctrl;
-
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	return ctrl;
 }
 
-static u64 amd_pmu_save_disable_all(void)
+static void amd_pmu_disable_all(void)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int enabled, idx;
+	int idx;
+
+	if (!cpuc->enabled)
+		return;
 
-	enabled = cpuc->enabled;
 	cpuc->enabled = 0;
 	/*
 	 * ensure we write the disable before we start disabling the
@@ -334,8 +331,6 @@ static u64 amd_pmu_save_disable_all(void)
 	 * right thing.
 	 */
 	barrier();
-	if (!enabled)
-		goto out;
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
@@ -348,37 +343,31 @@ static u64 amd_pmu_save_disable_all(void)
 		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
 		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
 	}
-
-out:
-	return enabled;
 }
 
-u64 hw_perf_save_disable(void)
+void hw_perf_disable(void)
 {
 	if (!x86_pmu_initialized())
-		return 0;
-	return x86_pmu.save_disable_all();
+		return;
+	return x86_pmu.disable_all();
 }
-/*
- * Exported because of ACPI idle
- */
-EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
-static void intel_pmu_restore_all(u64 ctrl)
+static void intel_pmu_enable_all(void)
 {
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 }
 
-static void amd_pmu_restore_all(u64 ctrl)
+static void amd_pmu_enable_all(void)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	int idx;
 
-	cpuc->enabled = ctrl;
-	barrier();
-	if (!ctrl)
+	if (cpuc->enabled)
 		return;
 
+	cpuc->enabled = 1;
+	barrier();
+
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
@@ -392,16 +381,12 @@ static void amd_pmu_restore_all(u64 ctrl)
 	}
 }
 
-void hw_perf_restore(u64 ctrl)
+void hw_perf_enable(void)
 {
 	if (!x86_pmu_initialized())
 		return;
-	x86_pmu.restore_all(ctrl);
+	x86_pmu.enable_all();
 }
-/*
- * Exported because of ACPI idle
- */
-EXPORT_SYMBOL_GPL(hw_perf_restore);
 
 static inline u64 intel_pmu_get_status(void)
 {
@@ -735,15 +720,14 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	int bit, cpu = smp_processor_id();
 	u64 ack, status;
 	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
-	int ret = 0;
-
-	cpuc->throttle_ctrl = intel_pmu_save_disable_all();
 
+	perf_disable();
 	status = intel_pmu_get_status();
-	if (!status)
-		goto out;
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
 
-	ret = 1;
 again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
@@ -767,19 +751,11 @@ again:
 	status = intel_pmu_get_status();
 	if (status)
 		goto again;
-out:
-	/*
-	 * Restore - do not reenable when global enable is off or throttled:
-	 */
-	if (cpuc->throttle_ctrl) {
-		if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) {
-			intel_pmu_restore_all(cpuc->throttle_ctrl);
-		} else {
-			pr_info("CPU#%d: perfcounters: max interrupt rate exceeded! Throttle on.\n", smp_processor_id());
-		}
-	}
 
-	return ret;
+	if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS)
+		perf_enable();
+
+	return 1;
 }
 
 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
@@ -792,13 +768,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	struct hw_perf_counter *hwc;
 	int idx, throttle = 0;
 
-	cpuc->throttle_ctrl = cpuc->enabled;
-	cpuc->enabled = 0;
-	barrier();
-
-	if (cpuc->throttle_ctrl) {
-		if (++cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
-			throttle = 1;
+	if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
+		throttle = 1;
+		__perf_disable();
+		cpuc->enabled = 0;
+		barrier();
 	}
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -824,9 +798,6 @@ next:
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
-	if (cpuc->throttle_ctrl && !throttle)
-		cpuc->enabled = 1;
-
 	return handled;
 }
 
@@ -839,13 +810,11 @@ void perf_counter_unthrottle(void)
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
-		pr_info("CPU#%d: perfcounters: throttle off.\n", smp_processor_id());
-
 		/*
 		 * Clear them before re-enabling irqs/NMIs again:
 		 */
 		cpuc->interrupts = 0;
-		hw_perf_restore(cpuc->throttle_ctrl);
+		perf_enable();
 	} else {
 		cpuc->interrupts = 0;
 	}
@@ -931,8 +900,8 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 static struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
-	.save_disable_all	= intel_pmu_save_disable_all,
-	.restore_all		= intel_pmu_restore_all,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
 	.enable			= intel_pmu_enable_counter,
 	.disable		= intel_pmu_disable_counter,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
@@ -951,8 +920,8 @@ static struct x86_pmu intel_pmu = {
 static struct x86_pmu amd_pmu = {
 	.name			= "AMD",
 	.handle_irq		= amd_pmu_handle_irq,
-	.save_disable_all	= amd_pmu_save_disable_all,
-	.restore_all		= amd_pmu_restore_all,
+	.disable_all		= amd_pmu_disable_all,
+	.enable_all		= amd_pmu_enable_all,
 	.enable			= amd_pmu_enable_counter,
 	.disable		= amd_pmu_disable_counter,
 	.eventsel		= MSR_K7_EVNTSEL0,
@@ -1003,6 +972,8 @@ static int intel_pmu_init(void)
 	x86_pmu.counter_bits = eax.split.bit_width;
 	x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
 
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
 	return 0;
 }
 
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index d2830f3..9645758 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -763,11 +763,9 @@ static int acpi_idle_bm_check(void)
  */
 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
-	u64 perf_flags;
-
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -782,7 +780,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	hw_perf_restore(perf_flags);
+	perf_enable();
 	start_critical_timings();
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 614f921..e543ecc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -544,8 +544,10 @@ extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void perf_counter_unthrottle(void);
-extern u64 hw_perf_save_disable(void);
-extern void hw_perf_restore(u64 ctrl);
+extern void __perf_disable(void);
+extern bool __perf_enable(void);
+extern void perf_disable(void);
+extern void perf_enable(void);
 extern int perf_counter_task_disable(void);
 extern int perf_counter_task_enable(void);
 extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -600,8 +602,8 @@ static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_counter_unthrottle(void)			{ }
-static inline void hw_perf_restore(u64 ctrl)				{ }
-static inline u64 hw_perf_save_disable(void)		      { return 0; }
+static inline void perf_disable(void)					{ }
+static inline void perf_enable(void)					{ }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 985be0b..e814ff0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -60,8 +60,9 @@ extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counte
 	return NULL;
 }
 
-u64 __weak hw_perf_save_disable(void)		{ return 0; }
-void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
+void __weak hw_perf_disable(void)		{ barrier(); }
+void __weak hw_perf_enable(void)		{ barrier(); }
+
 void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
 int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
@@ -72,6 +73,32 @@ int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 
 void __weak perf_counter_print_debug(void)	{ }
 
+static DEFINE_PER_CPU(int, disable_count);
+
+void __perf_disable(void)
+{
+	__get_cpu_var(disable_count)++;
+}
+
+bool __perf_enable(void)
+{
+	return !--__get_cpu_var(disable_count);
+}
+
+void perf_disable(void)
+{
+	__perf_disable();
+	hw_perf_disable();
+}
+EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
+
+void perf_enable(void)
+{
+	if (__perf_enable())
+		hw_perf_enable();
+}
+EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
+
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -170,7 +197,6 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
-	u64 perf_flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -191,9 +217,9 @@ static void __perf_counter_remove_from_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 	list_del_counter(counter, ctx);
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	if (!ctx->task) {
 		/*
@@ -538,7 +564,6 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter *leader = counter->group_leader;
 	int cpu = smp_processor_id();
 	unsigned long flags;
-	u64 perf_flags;
 	int err;
 
 	/*
@@ -556,7 +581,7 @@ static void __perf_install_in_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 
 	add_counter_to_ctx(counter, ctx);
 
@@ -596,7 +621,7 @@ static void __perf_install_in_context(void *info)
 		cpuctx->max_pertask--;
 
  unlock:
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
 }
@@ -663,7 +688,6 @@ static void __perf_counter_enable(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *leader = counter->group_leader;
-	unsigned long pmuflags;
 	unsigned long flags;
 	int err;
 
@@ -693,14 +717,14 @@ static void __perf_counter_enable(void *info)
 	if (!group_can_go_on(counter, cpuctx, 1)) {
 		err = -EEXIST;
 	} else {
-		pmuflags = hw_perf_save_disable();
+		perf_disable();
 		if (counter == leader)
 			err = group_sched_in(counter, cpuctx, ctx,
 					     smp_processor_id());
 		else
 			err = counter_sched_in(counter, cpuctx, ctx,
 					       smp_processor_id());
-		hw_perf_restore(pmuflags);
+		perf_enable();
 	}
 
 	if (err) {
@@ -795,7 +819,6 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
 	struct perf_counter *counter;
-	u64 flags;
 
 	spin_lock(&ctx->lock);
 	ctx->is_active = 0;
@@ -803,12 +826,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 		goto out;
 	update_context_time(ctx);
 
-	flags = hw_perf_save_disable();
+	perf_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
-	hw_perf_restore(flags);
+	perf_enable();
  out:
 	spin_unlock(&ctx->lock);
 }
@@ -860,7 +883,6 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
 {
 	struct perf_counter *counter;
-	u64 flags;
 	int can_add_hw = 1;
 
 	spin_lock(&ctx->lock);
@@ -870,7 +892,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 
 	ctx->timestamp = perf_clock();
 
-	flags = hw_perf_save_disable();
+	perf_disable();
 
 	/*
 	 * First go through the list and put on any pinned groups
@@ -917,7 +939,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 				can_add_hw = 0;
 		}
 	}
-	hw_perf_restore(flags);
+	perf_enable();
  out:
 	spin_unlock(&ctx->lock);
 }
@@ -955,7 +977,6 @@ int perf_counter_task_disable(void)
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
 	unsigned long flags;
-	u64 perf_flags;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
@@ -969,7 +990,7 @@ int perf_counter_task_disable(void)
 	/*
 	 * Disable all the counters:
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (counter->state != PERF_COUNTER_STATE_ERROR) {
@@ -978,7 +999,7 @@ int perf_counter_task_disable(void)
 		}
 	}
 
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
 
@@ -991,7 +1012,6 @@ int perf_counter_task_enable(void)
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
 	unsigned long flags;
-	u64 perf_flags;
 	int cpu;
 
 	if (likely(!ctx->nr_counters))
@@ -1007,7 +1027,7 @@ int perf_counter_task_enable(void)
 	/*
 	 * Disable all the counters:
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (counter->state > PERF_COUNTER_STATE_OFF)
@@ -1017,7 +1037,7 @@ int perf_counter_task_enable(void)
 			ctx->time - counter->total_time_enabled;
 		counter->hw_event.disabled = 0;
 	}
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock(&ctx->lock);
 
@@ -1034,7 +1054,6 @@ int perf_counter_task_enable(void)
 static void rotate_ctx(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
-	u64 perf_flags;
 
 	if (!ctx->nr_counters)
 		return;
@@ -1043,12 +1062,12 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		list_move_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock(&ctx->lock);
 }
@@ -3194,7 +3213,6 @@ __perf_counter_exit_task(struct task_struct *child,
 	} else {
 		struct perf_cpu_context *cpuctx;
 		unsigned long flags;
-		u64 perf_flags;
 
 		/*
 		 * Disable and unlink this counter.
@@ -3203,7 +3221,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		 * could still be processing it:
 		 */
 		local_irq_save(flags);
-		perf_flags = hw_perf_save_disable();
+		perf_disable();
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
@@ -3214,7 +3232,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		child_ctx->nr_counters--;
 
-		hw_perf_restore(perf_flags);
+		perf_enable();
 		local_irq_restore(flags);
 	}
 
-- 
cgit v0.10.2


From a4016a79fcbd139e7378944c0d86a39fdbc70ecc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 May 2009 14:52:17 +0200
Subject: perf_counter: x86: Robustify interrupt handling

Two consecutive NMIs could daze and confuse the machine when the
first would handle the overflow of both counters.

[ Impact: fix false-positive syslog messages under multi-session profiling ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 313638c..1dcf670 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -783,6 +783,10 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 
 		counter = cpuc->counters[idx];
 		hwc = &counter->hw;
+
+		if (counter->hw_event.nmi != nmi)
+			goto next;
+
 		val = x86_perf_counter_update(counter, hwc, idx);
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
 			goto next;
@@ -869,7 +873,6 @@ perf_counter_nmi_handler(struct notifier_block *self,
 {
 	struct die_args *args = __args;
 	struct pt_regs *regs;
-	int ret;
 
 	if (!atomic_read(&active_counters))
 		return NOTIFY_DONE;
@@ -886,9 +889,16 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	regs = args->regs;
 
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	ret = x86_pmu.handle_irq(regs, 1);
+	/*
+	 * Can't rely on the handled return value to say it was our NMI, two
+	 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
+	 *
+	 * If the first NMI handles both, the latter will be empty and daze
+	 * the CPU.
+	 */
+	x86_pmu.handle_irq(regs, 1);
 
-	return ret ? NOTIFY_STOP : NOTIFY_OK;
+	return NOTIFY_STOP;
 }
 
 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-- 
cgit v0.10.2


From 1c80f4b598d9b075a2a0be694e28be93a6702bcc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 15 May 2009 08:25:22 +0200
Subject: perf_counter: x86: Disallow interval of 1

On certain CPUs i have observed a stuck PMU if interval was set to
1 and NMIs were used. The PMU had PMC0 set in MSR_CORE_PERF_GLOBAL_STATUS,
but it was not possible to ack it via MSR_CORE_PERF_GLOBAL_OVF_CTRL,
and the NMI loop got stuck infinitely.

[ Impact: fix rare hangs during high perfcounter load ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1dcf670..46a82d1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -473,6 +473,11 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 		left += period;
 		atomic64_set(&hwc->period_left, left);
 	}
+	/*
+	 * Quirk: certain CPUs dont like it if just 1 event is left:
+	 */
+	if (unlikely(left < 2))
+		left = 2;
 
 	per_cpu(prev_left[idx], smp_processor_id()) = left;
 
-- 
cgit v0.10.2


From 9029a5e3801f1cc7cdaab80169d82427acf928d8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 15 May 2009 08:26:20 +0200
Subject: perf_counter: x86: Protect against infinite loops in
 intel_pmu_handle_irq()

intel_pmu_handle_irq() can lock up in an infinite loop if the hardware
does not allow the acking of irqs. Alas, this happened in testing so
make this robust and emit a warning if it happens in the future.

Also, clean up the IRQ handlers a bit.

[ Impact: improve perfcounter irq/nmi handling robustness ]

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 46a82d1..5a7f718 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -722,9 +722,13 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
  */
 static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
-	int bit, cpu = smp_processor_id();
+	struct cpu_hw_counters *cpuc;
+	struct cpu_hw_counters;
+	int bit, cpu, loops;
 	u64 ack, status;
-	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	perf_disable();
 	status = intel_pmu_get_status();
@@ -733,7 +737,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 		return 0;
 	}
 
+	loops = 0;
 again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
+		return 1;
+	}
+
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
@@ -765,13 +775,14 @@ again:
 
 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
-	int cpu = smp_processor_id();
-	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
-	u64 val;
-	int handled = 0;
+	int cpu, idx, throttle = 0, handled = 0;
+	struct cpu_hw_counters *cpuc;
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
-	int idx, throttle = 0;
+	u64 val;
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
 		throttle = 1;
-- 
cgit v0.10.2


From 251e8e3c7235f5944805a64f24c79fc4696793f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 14 May 2009 05:16:59 +0200
Subject: perf_counter: Remove ACPI quirk

We had a disable/enable around acpi_idle_do_entry() due to an erratum
in an early prototype CPU i had access to. That erratum has been fixed
in the BIOS so remove the quirk.

The quirk also kept us from profiling interrupts that hit the ACPI idle
instruction - so this is an improvement as well, beyond a cleanup and
a micro-optimization.

[ Impact: improve profiling scope, cleanup, micro-optimization ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 9645758..f7ca8c5 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -765,7 +765,6 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	perf_disable();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -780,7 +779,6 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	perf_enable();
 	start_critical_timings();
 }
 
-- 
cgit v0.10.2


From 58d7e993b16b62d30f8ef27757614056fe4def11 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 15 May 2009 11:03:23 +0200
Subject: perf stat: handle Ctrl-C

Before this change, if a long-running perf stat workload was Ctrl-C-ed,
the utility exited without displaying statistics.

After the change, the Ctrl-C gets propagated into the workload (and
causes its early exit there), but perf stat itself will still continue
to run and will display counter results.

This is useful to run open-ended workloads, let them run for
a while, then Ctrl-C them to get the stats.

[ Impact: extend perf stat with new functionality ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index cf575c3..03518d7 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -538,8 +538,14 @@ static void process_options(int argc, char **argv)
 	}
 }
 
+static void skip_signal(int signo)
+{
+}
+
 int cmd_stat(int argc, char **argv, const char *prefix)
 {
+	sigset_t blocked;
+
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	process_options(argc, argv);
@@ -548,5 +554,15 @@ int cmd_stat(int argc, char **argv, const char *prefix)
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
 
+	/*
+	 * We dont want to block the signals - that would cause
+	 * child tasks to inherit that and Ctrl-C would not work.
+	 * What we want is for Ctrl-C to work in the exec()-ed
+	 * task, but being ignored by perf stat itself:
+	 */
+	signal(SIGINT,  skip_signal);
+	signal(SIGALRM, skip_signal);
+	signal(SIGABRT, skip_signal);
+
 	return do_perfstat(argc, argv);
 }
-- 
cgit v0.10.2


From 2baaec28068d07db3d4ae6ba885fa07255b2ad79 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 15 May 2009 12:18:47 +0200
Subject: ASoC: Add missing __devexit in wm8940.c

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
index a66dacc..b8e17d6 100644
--- a/sound/soc/codecs/wm8940.c
+++ b/sound/soc/codecs/wm8940.c
@@ -907,7 +907,7 @@ static int wm8940_i2c_probe(struct i2c_client *i2c,
 	return wm8940_register(wm8940);
 }
 
-static int wm8940_i2c_remove(struct i2c_client *client)
+static int __devexit wm8940_i2c_remove(struct i2c_client *client)
 {
 	struct wm8940_priv *wm8940 = i2c_get_clientdata(client);
 
-- 
cgit v0.10.2


From 2bf2778e0fb38255e55ab5e10022132b0a72420e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 15 May 2009 12:20:52 +0200
Subject: ASoC: Optimize switch/case in magician.c

Use default to optimize the switch/case in magicial_playback_hw_params(),
which also fixes the compile warnings below:
  sound/soc/pxa/magician.c:89: warning: 'acds' may be used uninitialized in this function
  sound/soc/pxa/magician.c:89: warning: 'acps' may be used uninitialized in this function

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c
index 0625c34..c89a3cd 100644
--- a/sound/soc/pxa/magician.c
+++ b/sound/soc/pxa/magician.c
@@ -106,7 +106,7 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream,
 			/* 513156 Hz ~= _2_ * 8000 Hz * 32 (+0.23%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_16;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 1026312 Hz ~= _2_ * 8000 Hz * 64 (+0.23%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_8;
 		}
@@ -118,7 +118,7 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream,
 			/* 351375 Hz ~= 11025 Hz * 32 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_4;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 702750 Hz ~= 11025 Hz * 64 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 		}
@@ -130,7 +130,7 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream,
 			/* 702750 Hz ~= 22050 Hz * 32 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 1405500 Hz ~= 22050 Hz * 64 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 		}
@@ -142,7 +142,7 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream,
 			/* 1405500 Hz ~= 44100 Hz * 32 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 2811000 Hz ~= 44100 Hz * 64 (-0.41%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 		}
@@ -154,19 +154,20 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream,
 			/* 1529375 Hz ~= 48000 Hz * 32 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 3058750 Hz ~= 48000 Hz * 64 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 		}
 		break;
 	case 96000:
+	default:
 		acps = 12235000;
 		switch (width) {
 		case 16:
 			/* 3058750 Hz ~= 96000 Hz * 32 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_1;
 			break;
-		case 32:
+		default: /* 32 */
 			/* 6117500 Hz ~= 96000 Hz * 64 (-0.44%) */
 			acds = PXA_SSP_CLK_AUDIO_DIV_2;
 			div4 = PXA_SSP_CLK_SCDB_1;
-- 
cgit v0.10.2


From ceec1c33c36e4783cba41416f327f4bbc05218c3 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 17 Apr 2009 12:29:22 +0100
Subject: [ARM] nwfpe: Add decleration for ExtendedCPDO

Add header file decleration for 'ExtendedCPDO' in fpa11.h
to stop the following sparse warning:

extended_cpdo.c:90:14: warning: symbol 'ExtendedCPDO' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h
index 386cbd1..d3a6f92 100644
--- a/arch/arm/nwfpe/fpa11.h
+++ b/arch/arm/nwfpe/fpa11.h
@@ -114,4 +114,8 @@ extern unsigned int SingleCPDO(struct roundingData *roundData,
 extern unsigned int DoubleCPDO(struct roundingData *roundData,
 			       const unsigned int opcode, FPREG * rFd);
 
+/* extneded_cpdo.c */
+extern unsigned int ExtendedCPDO(struct roundingData *roundData,
+				 const unsigned int opcode, FPREG * rFd);
+
 #endif
-- 
cgit v0.10.2


From 3ea385f061d08a9509a884e347ad1007bb6c3c66 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 17 Apr 2009 12:21:56 +0100
Subject: [ARM] nwfpe: fix 'floatx80_is_nan' sparse warning

The symbol 'floatx80_is_nan' prototype was defined
locally in fpa11_cprt.c when it was built outside the
file in softfloat-specialisze.

Move this into softfloat.h to fix the following sparse
warning:

softfloat-specialize:276:6: warning: symbol 'floatx80_is_nan' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c
index 9843dc5..31c4eee 100644
--- a/arch/arm/nwfpe/fpa11_cprt.c
+++ b/arch/arm/nwfpe/fpa11_cprt.c
@@ -27,10 +27,6 @@
 #include "fpmodule.inl"
 #include "softfloat.h"
 
-#ifdef CONFIG_FPE_NWFPE_XP
-extern flag floatx80_is_nan(floatx80);
-#endif
-
 unsigned int PerformFLT(const unsigned int opcode);
 unsigned int PerformFIX(const unsigned int opcode);
 
diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h
index 260fe29..13e479c 100644
--- a/arch/arm/nwfpe/softfloat.h
+++ b/arch/arm/nwfpe/softfloat.h
@@ -226,6 +226,8 @@ char floatx80_le_quiet( floatx80, floatx80 );
 char floatx80_lt_quiet( floatx80, floatx80 );
 char floatx80_is_signaling_nan( floatx80 );
 
+extern flag floatx80_is_nan(floatx80);
+
 #endif
 
 static inline flag extractFloat32Sign(float32 a)
-- 
cgit v0.10.2


From 6e03a201bbe8137487f340d26aa662110e324b20 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Thu, 9 Apr 2009 22:04:07 -0700
Subject: firmware: speed up request_firmware(), v3

Rather than calling vmalloc() repeatedly to grow the firmware image as
we receive data from userspace, just allocate and fill individual pages.
Then vmap() the whole lot in one go when we're done.

A quick test with a 337KiB iwlagn firmware shows the time taken for
request_firmware() going from ~32ms to ~5ms after I apply this patch.

[v2: define PAGE_KERNEL_RO as PAGE_KERNEL where necessary, use min_t()]
[v3: kunmap() takes the struct page *, not the virtual address]

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Tested-by: Sachin Sant <sachinp@in.ibm.com>

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index d3a59c6..8a267c4 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -17,7 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
-
+#include <linux/highmem.h>
 #include <linux/firmware.h>
 #include "base.h"
 
@@ -45,7 +45,10 @@ struct firmware_priv {
 	struct bin_attribute attr_data;
 	struct firmware *fw;
 	unsigned long status;
-	int alloc_size;
+	struct page **pages;
+	int nr_pages;
+	int page_array_size;
+	const char *vdata;
 	struct timer_list timeout;
 };
 
@@ -122,6 +125,10 @@ static ssize_t firmware_loading_show(struct device *dev,
 	return sprintf(buf, "%d\n", loading);
 }
 
+/* Some architectures don't have PAGE_KERNEL_RO */
+#ifndef PAGE_KERNEL_RO
+#define PAGE_KERNEL_RO PAGE_KERNEL
+#endif
 /**
  * firmware_loading_store - set value in the 'loading' control file
  * @dev: device pointer
@@ -141,6 +148,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 {
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
 	int loading = simple_strtol(buf, NULL, 10);
+	int i;
 
 	switch (loading) {
 	case 1:
@@ -151,13 +159,30 @@ static ssize_t firmware_loading_store(struct device *dev,
 		}
 		vfree(fw_priv->fw->data);
 		fw_priv->fw->data = NULL;
+		for (i = 0; i < fw_priv->nr_pages; i++)
+			__free_page(fw_priv->pages[i]);
+		kfree(fw_priv->pages);
+		fw_priv->pages = NULL;
+		fw_priv->page_array_size = 0;
+		fw_priv->nr_pages = 0;
 		fw_priv->fw->size = 0;
-		fw_priv->alloc_size = 0;
 		set_bit(FW_STATUS_LOADING, &fw_priv->status);
 		mutex_unlock(&fw_lock);
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
+			vfree(fw_priv->fw->data);
+			fw_priv->fw->data = vmap(fw_priv->pages,
+						 fw_priv->nr_pages,
+						 0, PAGE_KERNEL_RO);
+			if (!fw_priv->fw->data) {
+				dev_err(dev, "%s: vmap() failed\n", __func__);
+				goto err;
+			}
+			/* Pages will be freed by vfree() */
+			fw_priv->pages = NULL;
+			fw_priv->page_array_size = 0;
+			fw_priv->nr_pages = 0;
 			complete(&fw_priv->completion);
 			clear_bit(FW_STATUS_LOADING, &fw_priv->status);
 			break;
@@ -167,6 +192,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 		dev_err(dev, "%s: unexpected value (%d)\n", __func__, loading);
 		/* fallthrough */
 	case -1:
+	err:
 		fw_load_abort(fw_priv);
 		break;
 	}
@@ -191,8 +217,28 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 		ret_count = -ENODEV;
 		goto out;
 	}
-	ret_count = memory_read_from_buffer(buffer, count, &offset,
-						fw->data, fw->size);
+	if (offset > fw->size)
+		return 0;
+	if (count > fw->size - offset)
+		count = fw->size - offset;
+
+	ret_count = count;
+
+	while (count) {
+		void *page_data;
+		int page_nr = offset >> PAGE_SHIFT;
+		int page_ofs = offset & (PAGE_SIZE-1);
+		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
+
+		page_data = kmap(fw_priv->pages[page_nr]);
+
+		memcpy(buffer, page_data + page_ofs, page_cnt);
+
+		kunmap(fw_priv->pages[page_nr]);
+		buffer += page_cnt;
+		offset += page_cnt;
+		count -= page_cnt;
+	}
 out:
 	mutex_unlock(&fw_lock);
 	return ret_count;
@@ -201,27 +247,39 @@ out:
 static int
 fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 {
-	u8 *new_data;
-	int new_size = fw_priv->alloc_size;
+	int pages_needed = ALIGN(min_size, PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* If the array of pages is too small, grow it... */
+	if (fw_priv->page_array_size < pages_needed) {
+		int new_array_size = max(pages_needed,
+					 fw_priv->page_array_size * 2);
+		struct page **new_pages;
+
+		new_pages = kmalloc(new_array_size * sizeof(void *),
+				    GFP_KERNEL);
+		if (!new_pages) {
+			fw_load_abort(fw_priv);
+			return -ENOMEM;
+		}
+		memcpy(new_pages, fw_priv->pages,
+		       fw_priv->page_array_size * sizeof(void *));
+		memset(&new_pages[fw_priv->page_array_size], 0, sizeof(void *) *
+		       (new_array_size - fw_priv->page_array_size));
+		kfree(fw_priv->pages);
+		fw_priv->pages = new_pages;
+		fw_priv->page_array_size = new_array_size;
+	}
 
-	if (min_size <= fw_priv->alloc_size)
-		return 0;
+	while (fw_priv->nr_pages < pages_needed) {
+		fw_priv->pages[fw_priv->nr_pages] =
+			alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
 
-	new_size = ALIGN(min_size, PAGE_SIZE);
-	new_data = vmalloc(new_size);
-	if (!new_data) {
-		printk(KERN_ERR "%s: unable to alloc buffer\n", __func__);
-		/* Make sure that we don't keep incomplete data */
-		fw_load_abort(fw_priv);
-		return -ENOMEM;
-	}
-	fw_priv->alloc_size = new_size;
-	if (fw_priv->fw->data) {
-		memcpy(new_data, fw_priv->fw->data, fw_priv->fw->size);
-		vfree(fw_priv->fw->data);
+		if (!fw_priv->pages[fw_priv->nr_pages]) {
+			fw_load_abort(fw_priv);
+			return -ENOMEM;
+		}
+		fw_priv->nr_pages++;
 	}
-	fw_priv->fw->data = new_data;
-	BUG_ON(min_size > fw_priv->alloc_size);
 	return 0;
 }
 
@@ -258,10 +316,25 @@ firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 	if (retval)
 		goto out;
 
-	memcpy((u8 *)fw->data + offset, buffer, count);
-
-	fw->size = max_t(size_t, offset + count, fw->size);
 	retval = count;
+
+	while (count) {
+		void *page_data;
+		int page_nr = offset >> PAGE_SHIFT;
+		int page_ofs = offset & (PAGE_SIZE - 1);
+		int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
+
+		page_data = kmap(fw_priv->pages[page_nr]);
+
+		memcpy(page_data + page_ofs, buffer, page_cnt);
+
+		kunmap(fw_priv->pages[page_nr]);
+		buffer += page_cnt;
+		offset += page_cnt;
+		count -= page_cnt;
+	}
+
+	fw->size = max_t(size_t, offset, fw->size);
 out:
 	mutex_unlock(&fw_lock);
 	return retval;
@@ -277,7 +350,11 @@ static struct bin_attribute firmware_attr_data_tmpl = {
 static void fw_dev_release(struct device *dev)
 {
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
+	int i;
 
+	for (i = 0; i < fw_priv->nr_pages; i++)
+		__free_page(fw_priv->pages[i]);
+	kfree(fw_priv->pages);
 	kfree(fw_priv);
 	kfree(dev);
 
-- 
cgit v0.10.2


From fac733f029251a393c42a8313432f2d9fe43bb83 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 13 May 2009 11:54:38 +0300
Subject: HID: force feedback support for SmartJoy PLUS PS2/USB adapter

This driver adds force feedback support for SmartJoy PLUS PS2/USB adapter. I
made this driver one device spesific instead of making generic 'wisegroup-ff'
because I have another Wisegroup PS2/USB adapter that doesn't work same way as
SmartJoy PLUS. If another device that is compatible pops up, this driver could
be then renamed to something more generic.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 21edf40..4cdf846 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -256,6 +256,21 @@ config GREENASIA_FF
 	(like MANTA Warrior MM816 and SpeedLink Strike2 SL-6635) or adapter
 	and want to enable force feedback support for it.
 
+config HID_SMARTJOYPLUS
+	tristate "SmartJoy PLUS PS2/USB adapter support" if EMBEDDED
+	depends on USB_HID
+	default !EMBEDDED
+	---help---
+	Support for SmartJoy PLUS PS2/USB adapter.
+
+config SMARTJOYPLUS_FF
+	bool "SmartJoy PLUS PS2/USB adapter force feedback support"
+	depends on HID_SMARTJOYPLUS
+	select INPUT_FF_MEMLESS
+	---help---
+	Say Y here if you have a SmartJoy PLUS PS2/USB adapter and want to
+	enable force feedback support for it.
+
 config HID_TOPSEED
 	tristate "TopSeed Cyberlink remote control support" if EMBEDDED
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 39cebcf..eddd5b6 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_HID_NTRIG)		+= hid-ntrig.o
 obj-$(CONFIG_HID_PANTHERLORD)	+= hid-pl.o
 obj-$(CONFIG_HID_PETALYNX)	+= hid-petalynx.o
 obj-$(CONFIG_HID_SAMSUNG)	+= hid-samsung.o
+obj-$(CONFIG_HID_SMARTJOYPLUS)	+= hid-sjoy.o
 obj-$(CONFIG_HID_SONY)		+= hid-sony.o
 obj-$(CONFIG_HID_SUNPLUS)	+= hid-sunplus.o
 obj-$(CONFIG_GREENASIA_FF)	+= hid-gaff.o
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 9f38ab8..f2c21d5 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1312,6 +1312,7 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_WACOM, USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index b519692..6301010 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -417,6 +417,7 @@
 #define USB_DEVICE_ID_WACOM_GRAPHIRE_BLUETOOTH	0x81
 
 #define USB_VENDOR_ID_WISEGROUP		0x0925
+#define USB_DEVICE_ID_SMARTJOY_PLUS	0x0005
 #define USB_DEVICE_ID_1_PHIDGETSERVO_20	0x8101
 #define USB_DEVICE_ID_4_PHIDGETSERVO_20	0x8104
 #define USB_DEVICE_ID_8_8_4_IF_KIT	0x8201
diff --git a/drivers/hid/hid-sjoy.c b/drivers/hid/hid-sjoy.c
new file mode 100644
index 0000000..eab169e
--- /dev/null
+++ b/drivers/hid/hid-sjoy.c
@@ -0,0 +1,180 @@
+/*
+ *  Force feedback support for SmartJoy PLUS PS2->USB adapter
+ *
+ *  Copyright (c) 2009 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ *  Based of hid-pl.c and hid-gaff.c
+ *   Copyright (c) 2007, 2009 Anssi Hannula <anssi.hannula@gmail.com>
+ *   Copyright (c) 2008 Lukasz Lubojanski <lukasz@lubojanski.info>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* #define DEBUG */
+
+#include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/hid.h>
+#include "hid-ids.h"
+
+#ifdef CONFIG_SMARTJOYPLUS_FF
+#include "usbhid/usbhid.h"
+
+struct sjoyff_device {
+	struct hid_report *report;
+};
+
+static int hid_sjoyff_play(struct input_dev *dev, void *data,
+			 struct ff_effect *effect)
+{
+	struct hid_device *hid = input_get_drvdata(dev);
+	struct sjoyff_device *sjoyff = data;
+	u32 left, right;
+
+	left = effect->u.rumble.strong_magnitude;
+	right = effect->u.rumble.weak_magnitude;
+	dev_dbg(&dev->dev, "called with 0x%08x 0x%08x\n", left, right);
+
+	left = left * 0xff / 0xffff;
+	right = (right != 0); /* on/off only */
+
+	sjoyff->report->field[0]->value[1] = right;
+	sjoyff->report->field[0]->value[2] = left;
+	dev_dbg(&dev->dev, "running with 0x%02x 0x%02x\n", left, right);
+	usbhid_submit_report(hid, sjoyff->report, USB_DIR_OUT);
+
+	return 0;
+}
+
+static int sjoyff_init(struct hid_device *hid)
+{
+	struct sjoyff_device *sjoyff;
+	struct hid_report *report;
+	struct hid_input *hidinput = list_entry(hid->inputs.next,
+						struct hid_input, list);
+	struct list_head *report_list =
+			&hid->report_enum[HID_OUTPUT_REPORT].report_list;
+	struct list_head *report_ptr = report_list;
+	struct input_dev *dev;
+	int error;
+
+	if (list_empty(report_list)) {
+		dev_err(&hid->dev, "no output reports found\n");
+		return -ENODEV;
+	}
+
+	report_ptr = report_ptr->next;
+
+	if (report_ptr == report_list) {
+		dev_err(&hid->dev, "required output report is "
+				"missing\n");
+		return -ENODEV;
+	}
+
+	report = list_entry(report_ptr, struct hid_report, list);
+	if (report->maxfield < 1) {
+		dev_err(&hid->dev, "no fields in the report\n");
+		return -ENODEV;
+	}
+
+	if (report->field[0]->report_count < 3) {
+		dev_err(&hid->dev, "not enough values in the field\n");
+		return -ENODEV;
+	}
+
+	sjoyff = kzalloc(sizeof(struct sjoyff_device), GFP_KERNEL);
+	if (!sjoyff)
+		return -ENOMEM;
+
+	dev = hidinput->input;
+
+	set_bit(FF_RUMBLE, dev->ffbit);
+
+	error = input_ff_create_memless(dev, sjoyff, hid_sjoyff_play);
+	if (error) {
+		kfree(sjoyff);
+		return error;
+	}
+
+	sjoyff->report = report;
+	sjoyff->report->field[0]->value[0] = 0x01;
+	sjoyff->report->field[0]->value[1] = 0x00;
+	sjoyff->report->field[0]->value[2] = 0x00;
+	usbhid_submit_report(hid, sjoyff->report, USB_DIR_OUT);
+
+	dev_info(&hid->dev,
+		"Force feedback for SmartJoy PLUS PS2/USB adapter\n");
+
+	return 0;
+}
+#else
+static inline int sjoyff_init(struct hid_device *hid)
+{
+	return 0;
+}
+#endif
+
+static int sjoy_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+	int ret;
+
+	ret = hid_parse(hdev);
+	if (ret) {
+		dev_err(&hdev->dev, "parse failed\n");
+		goto err;
+	}
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
+	if (ret) {
+		dev_err(&hdev->dev, "hw start failed\n");
+		goto err;
+	}
+
+	sjoyff_init(hdev);
+
+	return 0;
+err:
+	return ret;
+}
+
+static const struct hid_device_id sjoy_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, sjoy_devices);
+
+static struct hid_driver sjoy_driver = {
+	.name = "smartjoyplus",
+	.id_table = sjoy_devices,
+	.probe = sjoy_probe,
+};
+
+static int sjoy_init(void)
+{
+	return hid_register_driver(&sjoy_driver);
+}
+
+static void sjoy_exit(void)
+{
+	hid_unregister_driver(&sjoy_driver);
+}
+
+module_init(sjoy_init);
+module_exit(sjoy_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jussi Kivilinna");
+
-- 
cgit v0.10.2


From f1a11e0576c7a73d759d05d776692b2b2d37172b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 5 May 2009 19:21:40 +0200
Subject: futex: remove the wait queue

The waitqueue which is used in struct futex_q is a leftover from the
futexfd implementation. There is no need to use a waitqueue at all, as
the waiting task is the only user of it. The waitqueue just adds
additional locking and a loop in the wake up path which both can be
avoided.

We have already a task reference in struct futex_q which is used for
PI futexes. Use it for normal futexes as well and just wake up the
task directly.

The logic of signalling the futex wakeup via setting q->lock_ptr to
NULL is kept with the difference that we set it NULL before doing the
wakeup. This opens an exit race window vs. a non futex wake up of the
to be woken up task, which we prevent with get_task_struct /
put_task_struct on the waiter.

[ Impact: simplification ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index aec8bf8..157bfcd 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -100,8 +100,8 @@ struct futex_pi_state {
  */
 struct futex_q {
 	struct plist_node list;
-	/* There can only be a single waiter */
-	wait_queue_head_t waiter;
+	/* Waiter reference */
+	struct task_struct *task;
 
 	/* Which hash list lock to use: */
 	spinlock_t *lock_ptr;
@@ -111,7 +111,6 @@ struct futex_q {
 
 	/* Optional priority inheritance state: */
 	struct futex_pi_state *pi_state;
-	struct task_struct *task;
 
 	/* rt_waiter storage for requeue_pi: */
 	struct rt_mutex_waiter *rt_waiter;
@@ -694,22 +693,29 @@ retry:
  */
 static void wake_futex(struct futex_q *q)
 {
-	plist_del(&q->list, &q->list.plist);
+	struct task_struct *p = q->task;
+
 	/*
-	 * The lock in wake_up_all() is a crucial memory barrier after the
-	 * plist_del() and also before assigning to q->lock_ptr.
+	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
+	 * a non futex wake up happens on another CPU then the task
+	 * might exit and p would dereference a non existing task
+	 * struct. Prevent this by holding a reference on p across the
+	 * wake up.
 	 */
-	wake_up(&q->waiter);
+	get_task_struct(p);
+
+	plist_del(&q->list, &q->list.plist);
 	/*
-	 * The waiting task can free the futex_q as soon as this is written,
-	 * without taking any locks.  This must come last.
-	 *
-	 * A memory barrier is required here to prevent the following store to
-	 * lock_ptr from getting ahead of the wakeup. Clearing the lock at the
-	 * end of wake_up() does not prevent this store from moving.
+	 * The waiting task can free the futex_q as soon as
+	 * q->lock_ptr = NULL is written, without taking any locks. A
+	 * memory barrier is required here to prevent the following
+	 * store to lock_ptr from getting ahead of the plist_del.
 	 */
 	smp_wmb();
 	q->lock_ptr = NULL;
+
+	wake_up_state(p, TASK_NORMAL);
+	put_task_struct(p);
 }
 
 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -1003,7 +1009,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
 	WARN_ON(!q->rt_waiter);
 	q->rt_waiter = NULL;
 
-	wake_up(&q->waiter);
+	wake_up_state(q->task, TASK_NORMAL);
 }
 
 /**
@@ -1280,8 +1286,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
 {
 	struct futex_hash_bucket *hb;
 
-	init_waitqueue_head(&q->waiter);
-
 	get_futex_key_refs(&q->key);
 	hb = hash_futex(&q->key);
 	q->lock_ptr = &hb->lock;
@@ -1575,11 +1579,9 @@ out:
  * @hb:		the futex hash bucket, must be locked by the caller
  * @q:		the futex_q to queue up on
  * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
- * @wait:	the wait_queue to add to the futex_q after queueing in the hb
  */
 static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
-				struct hrtimer_sleeper *timeout,
-				wait_queue_t *wait)
+				struct hrtimer_sleeper *timeout)
 {
 	queue_me(q, hb);
 
@@ -1587,19 +1589,11 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
 	 * There might have been scheduling since the queue_me(), as we
 	 * cannot hold a spinlock across the get_user() in case it
 	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
-	 * queueing ourselves into the futex hash.  This code thus has to
+	 * queueing ourselves into the futex hash. This code thus has to
 	 * rely on the futex_wake() code removing us from hash when it
 	 * wakes us up.
 	 */
-
-	/* add_wait_queue is the barrier after __set_current_state. */
-	__set_current_state(TASK_INTERRUPTIBLE);
-
-	/*
-	 * Add current as the futex_q waiter.  We don't remove ourselves from
-	 * the wait_queue because we are the only user of it.
-	 */
-	add_wait_queue(&q->waiter, wait);
+	set_current_state(TASK_INTERRUPTIBLE);
 
 	/* Arm the timer */
 	if (timeout) {
@@ -1704,7 +1698,6 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
-	DECLARE_WAITQUEUE(wait, current);
 	struct restart_block *restart;
 	struct futex_hash_bucket *hb;
 	struct futex_q q;
@@ -1733,7 +1726,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 		goto out;
 
 	/* queue_me and wait for wakeup, timeout, or a signal. */
-	futex_wait_queue_me(hb, &q, to, &wait);
+	futex_wait_queue_me(hb, &q, to);
 
 	/* If we were woken (and unqueued), we succeeded, whatever. */
 	ret = 0;
@@ -2147,7 +2140,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct rt_mutex_waiter rt_waiter;
 	struct rt_mutex *pi_mutex = NULL;
-	DECLARE_WAITQUEUE(wait, current);
 	struct restart_block *restart;
 	struct futex_hash_bucket *hb;
 	union futex_key key2;
@@ -2191,7 +2183,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 	}
 
 	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
-	futex_wait_queue_me(hb, &q, to, &wait);
+	futex_wait_queue_me(hb, &q, to);
 
 	spin_lock(&hb->lock);
 	ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
-- 
cgit v0.10.2


From 548e1ddf255b4ebfb4ef20c08936fd8d4deb3bd9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:26 +0200
Subject: perf_counter: remove perf_disable/enable exports

Now that ACPI idle doesn't use it anymore, remove the exports.

[ Impact: remove dead code/data ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.429826617@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e814ff0..0173738 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -90,14 +90,12 @@ void perf_disable(void)
 	__perf_disable();
 	hw_perf_disable();
 }
-EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
 
 void perf_enable(void)
 {
 	if (__perf_enable())
 		hw_perf_enable();
 }
-EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-- 
cgit v0.10.2


From 789f90fcf6b0b54e655740e9396c954378542c79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:27 +0200
Subject: perf_counter: per user mlock gift

Instead of a per-process mlock gift for perf-counters, use a
per-user gift so that there is less of a DoS potential.

[ Impact: allow less worst-case unprivileged memory consumption ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.496182835@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d185758..ff59d12 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,6 +674,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0173738..93f4a0e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
@@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_mmap_vmops = {
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	struct user_struct *user = current_user();
 	unsigned long vma_size;
 	unsigned long nr_pages;
+	unsigned long user_locked, user_lock_limit;
 	unsigned long locked, lock_limit;
+	long user_extra, extra;
 	int ret = 0;
-	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	extra = nr_pages /* + 1 only account the data pages */;
-	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-	if (extra < 0)
-		extra = 0;
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-	locked = vma->vm_mm->locked_vm + extra;
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
@@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
 unlock:
-- 
cgit v0.10.2


From 60db5e09c13109b13830cc9dcae688003fd39e79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:28 +0200
Subject: perf_counter: frequency based adaptive irq_period

Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

[ Impact: new perf-counter attribute/feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.646195868@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bb1b463..db8d5ca 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw_event.irq_period) {
+		if (counter->hw.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if ((s64)counter->hw_event.irq_period < 0)
-		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
+	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	val = 0;
 	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (counter->hw_event.irq_period) {
+	if (period) {
 		if (left <= 0) {
-			left += counter->hw_event.irq_period;
+			left += period;
 			if (left <= 0)
-				left = counter->hw_event.irq_period;
+				left = period;
 			record = 1;
 		}
 		if (left < 0x80000000L)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a7f718..886dcf3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->nmi = 1;
 	}
 
-	hwc->irq_period	= hw_event->irq_period;
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-		hwc->irq_period = x86_pmu.max_period;
-
-	atomic64_set(&hwc->period_left, hwc->irq_period);
+	atomic64_set(&hwc->period_left,
+			min(x86_pmu.max_period, hwc->irq_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = min(x86_pmu.max_period, hwc->irq_period);
 	int err;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e543ecc..004b6e1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
 	 */
 	__u64			config;
 
-	__u64			irq_period;
+	union {
+		__u64		irq_period;
+		__u64		irq_freq;
+	};
+
 	__u32			record_type;
 	__u32			read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
+	u64				interrupts;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 93f4a0e..0ad1db4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	u64 irq_period;
+	u64 events, period;
+	s64 delta;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+			continue;
+
+		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		period = div64_u64(events, counter->hw_event.irq_freq);
+
+		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta >>= 1;
+
+		irq_period = counter->hw.irq_period + delta;
+
+		if (!irq_period)
+			irq_period = 1;
+
+		counter->hw.irq_period = irq_period;
+		counter->hw.interrupts = 0;
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = &curr->perf_counter_ctx;
 
+	perf_adjust_freq(&cpuctx->ctx);
+	perf_adjust_freq(ctx);
+
 	perf_counter_cpu_sched_out(cpuctx);
 	__perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->hw.interrupts++;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
+	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			ret = HRTIMER_NORESTART;
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+	period = max_t(u64, 10000, counter->hw.irq_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct pmu *pmu = NULL;
-	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
 		/*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		else
 			pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		break;
 	}
 
-	if (pmu)
-		hwc->irq_period = hw_event->irq_period;
-
 	return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 {
 	const struct pmu *pmu;
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	pmu = NULL;
 
+	hwc = &counter->hw;
+	if (hw_event->freq && hw_event->irq_freq)
+		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+	else
+		hwc->irq_period = hw_event->irq_period;
+
 	/*
 	 * we currently do not support PERF_RECORD_GROUP on inherited counters
 	 */
-- 
cgit v0.10.2


From f5456a6b056b709282e87a68b4c1b81ac2e866fa Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:29 +0200
Subject: perf top: update to use the new freq interface

Provide perf top -F as alternative to -c.

[ Impact: new 'perf top' feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.707922166@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index b1549dd..814b2e4 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -98,6 +98,7 @@ static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
 static int			use_mmap			= 0;
 static int			use_munmap			= 0;
+static int			freq				= 0;
 
 static char			*vmlinux;
 
@@ -846,9 +847,10 @@ static void process_options(int argc, char **argv)
 			{"stat",	no_argument,		NULL, 'S'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
 			{"zero",	no_argument,		NULL, 'z'},
+			{"freq",	required_argument,	NULL, 'F'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -889,6 +891,7 @@ static void process_options(int argc, char **argv)
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'M': use_mmap			=              1; break;
 		case 'U': use_munmap			=              1; break;
+		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
 		default: error = 1; break;
 		}
 	}
@@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const char *prefix)
 			hw_event.nmi		= nmi;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
+			hw_event.freq		= freq;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
-- 
cgit v0.10.2


From dce48a84adf1806676319f6f480e30a6daa012f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 11 Apr 2009 10:43:41 +0200
Subject: sched, timers: move calc_load() to scheduler

Dimitri Sivanich noticed that xtime_lock is held write locked across
calc_load() which iterates over all online CPUs. That can cause long
latencies for xtime_lock readers on large SMP systems.

The load average calculation is an rough estimate anyway so there is
no real need to protect the readers vs. the update. It's not a problem
when the avenrun array is updated while a reader copies the values.

Instead of iterating over all online CPUs let the scheduler_tick code
update the number of active tasks shortly before the avenrun update
happens. The avenrun update itself is handled by the CPU which calls
do_timer().

[ Impact: reduce xtime_lock write locked section ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..6eb4892 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -135,8 +135,8 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
-extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern void calc_global_load(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 8908d19..f4eb881 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -630,6 +630,10 @@ struct rq {
 	struct list_head migration_queue;
 #endif
 
+	/* calc_load related fields */
+	unsigned long calc_load_update;
+	long calc_load_active;
+
 #ifdef CONFIG_SCHED_HRTICK
 #ifdef CONFIG_SMP
 	int hrtick_csd_pending;
@@ -1728,6 +1732,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 }
 #endif
 
+static void calc_load_account_active(struct rq *this_rq);
+
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
@@ -2856,19 +2862,57 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
-unsigned long nr_active(void)
+/* Variables and functions for calc_load */
+static atomic_long_t calc_load_tasks;
+static unsigned long calc_load_update;
+unsigned long avenrun[3];
+EXPORT_SYMBOL(avenrun);
+
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
 {
-	unsigned long i, running = 0, uninterruptible = 0;
+	load *= exp;
+	load += active * (FIXED_1 - exp);
+	return load >> FSHIFT;
+}
 
-	for_each_online_cpu(i) {
-		running += cpu_rq(i)->nr_running;
-		uninterruptible += cpu_rq(i)->nr_uninterruptible;
-	}
+/*
+ * calc_load - update the avenrun load estimates 10 ticks after the
+ * CPUs have updated calc_load_tasks.
+ */
+void calc_global_load(void)
+{
+	unsigned long upd = calc_load_update + 10;
+	long active;
+
+	if (time_before(jiffies, upd))
+		return;
 
-	if (unlikely((long)uninterruptible < 0))
-		uninterruptible = 0;
+	active = atomic_long_read(&calc_load_tasks);
+	active = active > 0 ? active * FIXED_1 : 0;
 
-	return running + uninterruptible;
+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+	calc_load_update += LOAD_FREQ;
+}
+
+/*
+ * Either called from update_cpu_load() or from a cpu going idle
+ */
+static void calc_load_account_active(struct rq *this_rq)
+{
+	long nr_active, delta;
+
+	nr_active = this_rq->nr_running;
+	nr_active += (long) this_rq->nr_uninterruptible;
+
+	if (nr_active != this_rq->calc_load_active) {
+		delta = nr_active - this_rq->calc_load_active;
+		this_rq->calc_load_active = nr_active;
+		atomic_long_add(delta, &calc_load_tasks);
+	}
 }
 
 /*
@@ -2899,6 +2943,11 @@ static void update_cpu_load(struct rq *this_rq)
 			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
 	}
+
+	if (time_after_eq(jiffies, this_rq->calc_load_update)) {
+		this_rq->calc_load_update += LOAD_FREQ;
+		calc_load_account_active(this_rq);
+	}
 }
 
 #ifdef CONFIG_SMP
@@ -7091,6 +7140,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 
 	}
 }
+
+/*
+ * remove the tasks which were accounted by rq from calc_load_tasks.
+ */
+static void calc_global_load_remove(struct rq *rq)
+{
+	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7325,6 +7382,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		/* Update our root-domain */
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
+		rq->calc_load_update = calc_load_update;
+		rq->calc_load_active = 0;
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 
@@ -7364,7 +7423,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		cpuset_unlock();
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
-
+		calc_global_load_remove(rq);
 		/*
 		 * No need to migrate the tasks: it was best-effort if
 		 * they didn't take sched_hotcpu_mutex. Just wake up
@@ -9059,6 +9118,8 @@ void __init sched_init(void)
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
 		rq->nr_running = 0;
+		rq->calc_load_active = 0;
+		rq->calc_load_update = jiffies + LOAD_FREQ;
 		init_cfs_rq(&rq->cfs, rq);
 		init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -9166,6 +9227,9 @@ void __init sched_init(void)
 	 * when this runqueue becomes "idle".
 	 */
 	init_idle(current, smp_processor_id());
+
+	calc_load_update = jiffies + LOAD_FREQ;
+
 	/*
 	 * During early bootup we pretend to be a normal task:
 	 */
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 8a21a2e..499672c 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
 static struct task_struct *pick_next_task_idle(struct rq *rq)
 {
 	schedstat_inc(rq, sched_goidle);
-
+	/* adjust the active tasks as we might go into a long sleep */
+	calc_load_account_active(rq);
 	return rq->idle;
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 687dff4..52a8bf8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,7 +22,7 @@
 
 /*
  * This read-write spinlock protects us from races in SMP while
- * playing with xtime and avenrun.
+ * playing with xtime.
  */
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
 
diff --git a/kernel/timer.c b/kernel/timer.c
index cffffad..6a21d7a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1123,47 +1123,6 @@ void update_process_times(int user_tick)
 }
 
 /*
- * Nr of active tasks - counted in fixed-point numbers
- */
-static unsigned long count_active_tasks(void)
-{
-	return nr_active() * FIXED_1;
-}
-
-/*
- * Hmm.. Changed this, as the GNU make sources (load.c) seems to
- * imply that avenrun[] is the standard name for this kind of thing.
- * Nothing else seems to be standardized: the fractional size etc
- * all seem to differ on different machines.
- *
- * Requires xtime_lock to access.
- */
-unsigned long avenrun[3];
-
-EXPORT_SYMBOL(avenrun);
-
-/*
- * calc_load - given tick count, update the avenrun load estimates.
- * This is called while holding a write_lock on xtime_lock.
- */
-static inline void calc_load(unsigned long ticks)
-{
-	unsigned long active_tasks; /* fixed-point */
-	static int count = LOAD_FREQ;
-
-	count -= ticks;
-	if (unlikely(count < 0)) {
-		active_tasks = count_active_tasks();
-		do {
-			CALC_LOAD(avenrun[0], EXP_1, active_tasks);
-			CALC_LOAD(avenrun[1], EXP_5, active_tasks);
-			CALC_LOAD(avenrun[2], EXP_15, active_tasks);
-			count += LOAD_FREQ;
-		} while (count < 0);
-	}
-}
-
-/*
  * This function runs timers and the timer-tq in bottom half context.
  */
 static void run_timer_softirq(struct softirq_action *h)
@@ -1187,16 +1146,6 @@ void run_local_timers(void)
 }
 
 /*
- * Called by the timer interrupt. xtime_lock must already be taken
- * by the timer IRQ!
- */
-static inline void update_times(unsigned long ticks)
-{
-	update_wall_time();
-	calc_load(ticks);
-}
-
-/*
  * The 64-bit jiffies value is not atomic - you MUST NOT read it
  * without sampling the sequence number in xtime_lock.
  * jiffies is defined in the linker script...
@@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks)
 void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
-	update_times(ticks);
+	update_wall_time();
+	calc_global_load();
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
-- 
cgit v0.10.2


From 2d02494f5a90f2e4b3c4c6acc85ec94674cdc431 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 2 May 2009 20:08:52 +0200
Subject: sched, timers: cleanup avenrun users

avenrun is an rough estimate so we don't have to worry about
consistency of the three avenrun values. Remove the xtime lock
dependency and provide a function to scale the values. Cleanup the
users.

[ Impact: cleanup ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>

diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 9bca39c..1afa4dd 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -12,20 +12,14 @@
 
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
-	int a, b, c;
-	unsigned long seq;
+	unsigned long avnrun[3];
 
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		a = avenrun[0] + (FIXED_1/200);
-		b = avenrun[1] + (FIXED_1/200);
-		c = avenrun[2] + (FIXED_1/200);
-	} while (read_seqretry(&xtime_lock, seq));
+	get_avenrun(avnrun, FIXED_1/200, 0);
 
-	seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
-		LOAD_INT(a), LOAD_FRAC(a),
-		LOAD_INT(b), LOAD_FRAC(b),
-		LOAD_INT(c), LOAD_FRAC(c),
+	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
 		nr_running(), nr_threads,
 		task_active_pid_ns(current)->last_pid);
 	return 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6eb4892..de7b3b2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -116,6 +116,7 @@ struct fs_struct;
  *    11 bit fractions.
  */
 extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 
 #define FSHIFT		11		/* nr of bits of precision */
 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
diff --git a/kernel/sched.c b/kernel/sched.c
index f4eb881..497c09b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2868,6 +2868,21 @@ static unsigned long calc_load_update;
 unsigned long avenrun[3];
 EXPORT_SYMBOL(avenrun);
 
+/**
+ * get_avenrun - get the load average array
+ * @loads:	pointer to dest load array
+ * @offset:	offset to add
+ * @shift:	shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+{
+	loads[0] = (avenrun[0] + offset) << shift;
+	loads[1] = (avenrun[1] + offset) << shift;
+	loads[2] = (avenrun[2] + offset) << shift;
+}
+
 static unsigned long
 calc_load(unsigned long load, unsigned long exp, unsigned long active)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 6a21d7a..a26ed29 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1356,37 +1356,17 @@ int do_sysinfo(struct sysinfo *info)
 {
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
-	unsigned long seq;
+	struct timespec tp;
 
 	memset(info, 0, sizeof(struct sysinfo));
 
-	do {
-		struct timespec tp;
-		seq = read_seqbegin(&xtime_lock);
-
-		/*
-		 * This is annoying.  The below is the same thing
-		 * posix_get_clock_monotonic() does, but it wants to
-		 * take the lock which we want to cover the loads stuff
-		 * too.
-		 */
-
-		getnstimeofday(&tp);
-		tp.tv_sec += wall_to_monotonic.tv_sec;
-		tp.tv_nsec += wall_to_monotonic.tv_nsec;
-		monotonic_to_bootbased(&tp);
-		if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
-			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
-			tp.tv_sec++;
-		}
-		info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+	ktime_get_ts(&tp);
+	monotonic_to_bootbased(&tp);
+	info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
-		info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+	get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
 
-		info->procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	info->procs = nr_threads;
 
 	si_meminfo(info);
 	si_swapinfo(info);
-- 
cgit v0.10.2


From 2fc998907947f92b1b9113faad531d7f5a857987 Mon Sep 17 00:00:00 2001
From: Nickolas Lloyd <ultrageek.lloyd@gmail.com>
Date: Fri, 15 May 2009 15:33:30 +0200
Subject: ALSA: hda - add controls to toggle DC bias on mic ports

This patch adds a mixer control for the STAC92XX boards to control the
DC bias of mic ports, allowing recording from both powered and
non-powered sources.  It replaces the "Mic Output Switch" with "Mic Jack
Mode" to switch between Mic, Line In, and Line Out.

Signed-off-by: Nickolas Lloyd <ultrageek.lloyd@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index ecf53f7..0295098 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -634,6 +634,96 @@ static int stac92xx_smux_enum_put(struct snd_kcontrol *kcontrol,
 	return 0;
 }
 
+static unsigned int stac92xx_vref_set(struct hda_codec *codec,
+					hda_nid_t nid, unsigned int new_vref)
+{
+	unsigned int error;
+	unsigned int pincfg;
+	pincfg = snd_hda_codec_read(codec, nid, 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+
+	pincfg &= 0xff;
+	pincfg &= ~(AC_PINCTL_VREFEN | AC_PINCTL_IN_EN | AC_PINCTL_OUT_EN);
+	pincfg |= new_vref;
+
+	if (new_vref == AC_PINCTL_VREF_HIZ)
+		pincfg |= AC_PINCTL_OUT_EN;
+	else
+		pincfg |= AC_PINCTL_IN_EN;
+
+	error = snd_hda_codec_write_cache(codec, nid, 0,
+					AC_VERB_SET_PIN_WIDGET_CONTROL, pincfg);
+	if (error < 0)
+		return error;
+	else
+		return 1;
+}
+
+static unsigned int stac92xx_vref_get(struct hda_codec *codec, hda_nid_t nid)
+{
+	unsigned int vref;
+	vref = snd_hda_codec_read(codec, nid, 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+	vref &= AC_PINCTL_VREFEN;
+	return vref;
+}
+
+static int stac92xx_dc_bias_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int new_vref;
+	unsigned int error;
+
+	if (ucontrol->value.enumerated.item[0] == 0)
+		new_vref = AC_PINCTL_VREF_80;
+	else if (ucontrol->value.enumerated.item[0] == 1)
+		new_vref = AC_PINCTL_VREF_GRD;
+	else
+		new_vref = AC_PINCTL_VREF_HIZ;
+
+	if (new_vref != stac92xx_vref_get(codec, kcontrol->private_value)) {
+		error = stac92xx_vref_set(codec,
+					kcontrol->private_value, new_vref);
+		return error;
+	}
+
+	return 0;
+}
+
+static int stac92xx_dc_bias_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int vref = stac92xx_vref_get(codec, kcontrol->private_value);
+	if (vref == AC_PINCTL_VREF_80)
+		ucontrol->value.enumerated.item[0] = 0;
+	else if (vref == AC_PINCTL_VREF_GRD)
+		ucontrol->value.enumerated.item[0] = 1;
+	else if (vref == AC_PINCTL_VREF_HIZ)
+		ucontrol->value.enumerated.item[0] = 2;
+
+	return 0;
+}
+
+static int stac92xx_dc_bias_info(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[] = {
+		"Mic In", "Line In", "Line Out"
+	};
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->value.enumerated.items = 3;
+	uinfo->count = 1;
+	if (uinfo->value.enumerated.item >= 3)
+		uinfo->value.enumerated.item = 2;
+	strcpy(uinfo->value.enumerated.name,
+		texts[uinfo->value.enumerated.item]);
+
+	return 0;
+}
+
 static int stac92xx_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
@@ -995,6 +1085,17 @@ static struct hda_verb stac9205_core_init[] = {
 		.private_value = verb_read | (verb_write << 16), \
 	}
 
+#define DC_BIAS(xname, idx, nid) \
+	{ \
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+		.name = xname, \
+		.index = idx, \
+		.info = stac92xx_dc_bias_info, \
+		.get = stac92xx_dc_bias_get, \
+		.put = stac92xx_dc_bias_put, \
+		.private_value = nid, \
+	}
+
 static struct snd_kcontrol_new stac9200_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0xb, 0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Master Playback Switch", 0xb, 0, HDA_OUTPUT),
@@ -2702,7 +2803,8 @@ enum {
 	STAC_CTL_WIDGET_AMP_VOL,
 	STAC_CTL_WIDGET_HP_SWITCH,
 	STAC_CTL_WIDGET_IO_SWITCH,
-	STAC_CTL_WIDGET_CLFE_SWITCH
+	STAC_CTL_WIDGET_CLFE_SWITCH,
+	STAC_CTL_WIDGET_DC_BIAS
 };
 
 static struct snd_kcontrol_new stac92xx_control_templates[] = {
@@ -2714,6 +2816,7 @@ static struct snd_kcontrol_new stac92xx_control_templates[] = {
 	STAC_CODEC_HP_SWITCH(NULL),
 	STAC_CODEC_IO_SWITCH(NULL, 0),
 	STAC_CODEC_CLFE_SWITCH(NULL, 0),
+	DC_BIAS(NULL, 0, 0),
 };
 
 /* add dynamic controls */
@@ -3165,9 +3268,9 @@ static int stac92xx_auto_create_multi_out_ctls(struct hda_codec *codec,
 	}
 
 	if (spec->mic_switch) {
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_IO_SWITCH,
-					   "Mic as Output Switch",
-					   (spec->mic_switch << 8) | 1);
+		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_DC_BIAS,
+					   "Mic Jack Mode",
+					   spec->mic_switch);
 		if (err < 0)
 			return err;
 	}
-- 
cgit v0.10.2


From 2e569d36729c8105ae066a9b105068305442cc77 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:37:47 +0200
Subject: perf_counter: frequency based adaptive irq_period, 32-bit fix

fix:

  kernel/built-in.o: In function `perf_counter_alloc':
  perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

[ Impact: build fix on 32-bit systems ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1242394667.6642.1887.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0ad1db4..728a595 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2942,7 +2942,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	hwc = &counter->hw;
 	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
 	else
 		hwc->irq_period = hw_event->irq_period;
 
-- 
cgit v0.10.2


From 0f6f4319a72a2b32d19643ff811f25633d8b0207 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 15 May 2009 15:46:44 +0200
Subject: HID: fix hid-ff drivers so that devices work even without ff support

Currently, the hid-*ff force feedback drivers, which claim the blacklisted
device on a HID bus, are only compiled in if the user selects force feedback
support.

However we want the device to be supported even when the kernel is configured
without force feedback.

This patch fixes the drivers in a way that they get compiled even if force
feedback is turned off; all the force feedback support code is compiled out in
such case, and the driver works as a usual driver on HID bus, claiming and
initializing the device, making it operational without FF effects.

Reported-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 4cdf846..7831a03 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -116,9 +116,16 @@ config HID_CYPRESS
 	---help---
 	Support for cypress mouse and barcode readers.
 
-config DRAGONRISE_FF
-	tristate "DragonRise Inc. force feedback support"
+config HID_DRAGONRISE
+	tristate "DragonRise Inc. support" if EMBEDDED
 	depends on USB_HID
+	default !EMBEDDED
+	---help---
+	Say Y here if you have DragonRise Inc.game controllers.
+
+config DRAGONRISE_FF
+	bool "DragonRise Inc. force feedback support"
+	depends on HID_DRAGONRISE
 	select INPUT_FF_MEMLESS
 	---help---
 	Say Y here if you want to enable force feedback support for DragonRise Inc.
@@ -160,7 +167,7 @@ config HID_LOGITECH
 	Support for Logitech devices that are not fully compliant with HID standard.
 
 config LOGITECH_FF
-	bool "Logitech force feedback"
+	bool "Logitech force feedback support"
 	depends on HID_LOGITECH
 	select INPUT_FF_MEMLESS
 	help
@@ -176,7 +183,7 @@ config LOGITECH_FF
 	  force feedback.
 
 config LOGIRUMBLEPAD2_FF
-	bool "Logitech Rumblepad 2 force feedback"
+	bool "Logitech Rumblepad 2 force feedback support"
 	depends on HID_LOGITECH
 	select INPUT_FF_MEMLESS
 	help
@@ -211,11 +218,19 @@ config HID_PANTHERLORD
 	---help---
 	Support for PantherLord/GreenAsia based device support.
 
+config HID_PANTHERLORD
+	tristate "Pantherlord support" if EMBEDDED
+	depends on USB_HID
+	default !EMBEDDED
+	---help---
+	  Say Y here if you have a PantherLord/GreenAsia based game controller
+	  or adapter.
+
 config PANTHERLORD_FF
 	bool "Pantherlord force feedback support"
 	depends on HID_PANTHERLORD
 	select INPUT_FF_MEMLESS
-	help
+	---help---
 	  Say Y here if you have a PantherLord/GreenAsia based game controller
 	  or adapter and want to enable force feedback support for it.
 
@@ -247,9 +262,17 @@ config HID_SUNPLUS
 	---help---
 	Support for Sunplus wireless desktop.
 
-config GREENASIA_FF
-	tristate "GreenAsia (Product ID 0x12) force feedback support"
+config HID_GREENASIA
+	tristate "GreenAsia (Product ID 0x12) support" if EMBEDDED
 	depends on USB_HID
+	default !EMBEDDED
+	---help---
+	  Say Y here if you have a GreenAsia (Product ID 0x12) based game
+	  controller or adapter.
+
+config GREENASIA_FF
+	bool "GreenAsia (Product ID 0x12) force feedback support"
+	depends on HID_GREENASIA
 	select INPUT_FF_MEMLESS
 	---help---
 	Say Y here if you have a GreenAsia (Product ID 0x12) based game controller
@@ -278,13 +301,22 @@ config HID_TOPSEED
 	---help---
 	Say Y if you have a TopSeed Cyberlink remote control.
 
-config THRUSTMASTER_FF
-	tristate "ThrustMaster devices support"
+config HID_THRUSTMASTER
+	tristate "ThrustMaster devices support" if EMBEDDED
 	depends on USB_HID
+	default !EMBEDDED
+	---help---
+	  Say Y here if you have a THRUSTMASTER FireStore Dual Power 2 or
+	  a THRUSTMASTER Ferrari GT Rumble Wheel.
+
+config THRUSTMASTER_FF
+	bool "ThrustMaster devices force feedback support"
+	depends on HID_THRUSTMASTER
 	select INPUT_FF_MEMLESS
-	help
+	---help---
 	  Say Y here if you have a THRUSTMASTER FireStore Dual Power 2 or
-	  a THRUSTMASTER Ferrari GT Rumble Force or Force Feedback Wheel.
+	  a THRUSTMASTER Ferrari GT Rumble Force or Force Feedback Wheel and
+	  want to enable force feedback support for it.
 
 config HID_WACOM
 	tristate "Wacom Bluetooth devices support" if EMBEDDED
@@ -293,13 +325,21 @@ config HID_WACOM
 	---help---
 	Support for Wacom Graphire Bluetooth tablet.
 
-config ZEROPLUS_FF
-	tristate "Zeroplus based game controller support"
+config HID_ZEROPLUS
+	tristate "Zeroplus based game controller support" if EMBEDDED
 	depends on USB_HID
-	select INPUT_FF_MEMLESS
-	help
+	default !EMBEDDED
+	---help---
 	  Say Y here if you have a Zeroplus based game controller.
 
+config ZEROPLUS_FF
+	bool "Zeroplus based game controller force feedback support"
+	depends on HID_ZEROPLUS
+	select INPUT_FF_MEMLESS
+	---help---
+	  Say Y here if you have a Zeroplus based game controller and want
+	  to have force feedback support for it.
+
 endmenu
 
 endif # HID_SUPPORT
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index eddd5b6..db35151 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_HID_BELKIN)	+= hid-belkin.o
 obj-$(CONFIG_HID_CHERRY)	+= hid-cherry.o
 obj-$(CONFIG_HID_CHICONY)	+= hid-chicony.o
 obj-$(CONFIG_HID_CYPRESS)	+= hid-cypress.o
-obj-$(CONFIG_DRAGONRISE_FF)	+= hid-drff.o
+obj-$(CONFIG_HID_DRAGONRISE)	+= hid-drff.o
 obj-$(CONFIG_HID_EZKEY)		+= hid-ezkey.o
 obj-$(CONFIG_HID_GYRATION)	+= hid-gyration.o
 obj-$(CONFIG_HID_KENSINGTON)	+= hid-kensington.o
@@ -37,10 +37,10 @@ obj-$(CONFIG_HID_SAMSUNG)	+= hid-samsung.o
 obj-$(CONFIG_HID_SMARTJOYPLUS)	+= hid-sjoy.o
 obj-$(CONFIG_HID_SONY)		+= hid-sony.o
 obj-$(CONFIG_HID_SUNPLUS)	+= hid-sunplus.o
-obj-$(CONFIG_GREENASIA_FF)	+= hid-gaff.o
-obj-$(CONFIG_THRUSTMASTER_FF)	+= hid-tmff.o
+obj-$(CONFIG_HID_GREENASIA)	+= hid-gaff.o
+obj-$(CONFIG_HID_THRUSTMASTER)	+= hid-tmff.o
 obj-$(CONFIG_HID_TOPSEED)	+= hid-topseed.o
-obj-$(CONFIG_ZEROPLUS_FF)	+= hid-zpff.o
+obj-$(CONFIG_HID_ZEROPLUS)	+= hid-zpff.o
 obj-$(CONFIG_HID_WACOM)		+= hid-wacom.o
 
 obj-$(CONFIG_USB_HID)		+= usbhid/
diff --git a/drivers/hid/hid-drff.c b/drivers/hid/hid-drff.c
index 34f3eb65..a239d20 100644
--- a/drivers/hid/hid-drff.c
+++ b/drivers/hid/hid-drff.c
@@ -32,6 +32,8 @@
 #include <linux/hid.h>
 
 #include "hid-ids.h"
+
+#ifdef CONFIG_DRAGONRISE_FF
 #include "usbhid/usbhid.h"
 
 struct drff_device {
@@ -135,6 +137,12 @@ static int drff_init(struct hid_device *hid)
 
 	return 0;
 }
+#else
+static inline int drff_init(struct hid_device *hid)
+{
+	return 0;
+}
+#endif
 
 static int dr_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c
index 510ad3a..8a11ccd 100644
--- a/drivers/hid/hid-gaff.c
+++ b/drivers/hid/hid-gaff.c
@@ -31,6 +31,8 @@
 #include <linux/usb.h>
 #include <linux/hid.h>
 #include "hid-ids.h"
+
+#ifdef CONFIG_GREENASIA_FF
 #include "usbhid/usbhid.h"
 
 struct gaff_device {
@@ -130,6 +132,12 @@ static int gaff_init(struct hid_device *hid)
 
 	return 0;
 }
+#else
+static inline int gaff_init(struct hid_device *hdev)
+{
+	return 0;
+}
+#endif
 
 static int ga_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c
index 7c1f7b5..fcd6ccd 100644
--- a/drivers/hid/hid-tmff.c
+++ b/drivers/hid/hid-tmff.c
@@ -33,11 +33,6 @@
 
 #include "hid-ids.h"
 
-#include "usbhid/usbhid.h"
-
-/* Usages for thrustmaster devices I know about */
-#define THRUSTMASTER_USAGE_FF	(HID_UP_GENDESK | 0xbb)
-
 static const signed short ff_rumble[] = {
 	FF_RUMBLE,
 	-1
@@ -48,6 +43,12 @@ static const signed short ff_joystick[] = {
 	-1
 };
 
+#ifdef CONFIG_THRUSTMASTER_FF
+#include "usbhid/usbhid.h"
+
+/* Usages for thrustmaster devices I know about */
+#define THRUSTMASTER_USAGE_FF	(HID_UP_GENDESK | 0xbb)
+
 struct tmff_device {
 	struct hid_report *report;
 	struct hid_field *ff_field;
@@ -209,6 +210,12 @@ fail:
 	kfree(tmff);
 	return error;
 }
+#else
+static inline int tmff_init(struct hid_device *hid, const signed short *ff_bits)
+{
+	return 0;
+}
+#endif
 
 static int tm_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c
index 85a198a..57f7107 100644
--- a/drivers/hid/hid-zpff.c
+++ b/drivers/hid/hid-zpff.c
@@ -27,6 +27,7 @@
 
 #include "hid-ids.h"
 
+#ifdef CONFIG_ZEROPLUS_FF
 #include "usbhid/usbhid.h"
 
 struct zpff_device {
@@ -108,6 +109,12 @@ static int zpff_init(struct hid_device *hid)
 
 	return 0;
 }
+#else
+static inline int zpff_init(struct hid_device *hid)
+{
+	return 0;
+}
+#endif
 
 static int zp_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
-- 
cgit v0.10.2


From b04b4f7862de8d6e8b536853aaf66a6c1bb05cbd Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Fri, 15 May 2009 18:00:38 +0400
Subject: ALSA: parisc/harmony: fix printk format warning

Fix this warning:
sound/parisc/harmony.c:938: warning: format '%lx' expects type 'long unsigned int',
but argument 2 has type 'resource_size_t'

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/parisc/harmony.c b/sound/parisc/harmony.c
index 6055fd6..63ae0f9 100644
--- a/sound/parisc/harmony.c
+++ b/sound/parisc/harmony.c
@@ -935,7 +935,7 @@ snd_harmony_create(struct snd_card *card,
 	h->iobase = ioremap_nocache(padev->hpa.start, HARMONY_SIZE);
 	if (h->iobase == NULL) {
 		printk(KERN_ERR PFX "unable to remap hpa 0x%lx\n",
-		       padev->hpa.start);
+		       (unsigned long)padev->hpa.start);
 		err = -EBUSY;
 		goto free_and_ret;
 	}
-- 
cgit v0.10.2


From f0bca459829fd33d659c8cd0369ac86a3924a9bc Mon Sep 17 00:00:00 2001
From: Sergey Belyashov <Sergey.Belyashov@gmail.com>
Date: Fri, 15 May 2009 16:05:57 +0200
Subject: HID: autocentering support for Logitech Force 3D Pro

This patch adds autocentering support for Logitech Force 3D Pro.

Signed-off-by: Sergey Belyashov <Sergey.Belyashov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c
index 9735be6..5609970 100644
--- a/drivers/hid/hid-lgff.c
+++ b/drivers/hid/hid-lgff.c
@@ -50,6 +50,12 @@ static const signed short ff_joystick[] = {
 	-1
 };
 
+static const signed short ff_joystick_ac[] = {
+	FF_CONSTANT,
+	FF_AUTOCENTER,
+	-1
+};
+
 static const signed short ff_wheel[] = {
 	FF_CONSTANT,
 	FF_AUTOCENTER,
@@ -60,7 +66,7 @@ static const struct dev_type devices[] = {
 	{ 0x046d, 0xc211, ff_rumble },
 	{ 0x046d, 0xc219, ff_rumble },
 	{ 0x046d, 0xc283, ff_joystick },
-	{ 0x046d, 0xc286, ff_joystick },
+	{ 0x046d, 0xc286, ff_joystick_ac },
 	{ 0x046d, 0xc294, ff_wheel },
 	{ 0x046d, 0xc295, ff_joystick },
 	{ 0x046d, 0xca03, ff_wheel },
-- 
cgit v0.10.2


From f36dd6e7c0c04a056d3441185a17e5d4e3723192 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 May 2009 15:07:47 +0200
Subject: [ARM] S3C64XX: GPIO include cleanup

Cleanup arm/plat-s3c64xx/include/plat/gpio-bank-h.h include file.
Using shift-left operation with value >32 is a bad habit.

Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h b/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h
index 8154951..2ba1767 100644
--- a/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h
+++ b/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h
@@ -61,14 +61,14 @@
 #define S3C64XX_GPH7_ADDR_CF1		(0x06 << 28)
 #define S3C64XX_GPH7_EINT_G6_7		(0x07 << 28)
 
-#define S3C64XX_GPH8_MMC1_DATA6		(0x02 << 32)
-#define S3C64XX_GPH8_MMC2_DATA2		(0x03 << 32)
-#define S3C64XX_GPH8_I2S_V40_LRCLK	(0x05 << 32)
-#define S3C64XX_GPH8_ADDR_CF2		(0x06 << 32)
-#define S3C64XX_GPH8_EINT_G6_8		(0x07 << 32)
-
-#define S3C64XX_GPH9_MMC1_DATA7		(0x02 << 36)
-#define S3C64XX_GPH9_MMC2_DATA3		(0x03 << 36)
-#define S3C64XX_GPH9_I2S_V40_DI		(0x05 << 36)
-#define S3C64XX_GPH9_EINT_G6_9		(0x07 << 36)
+#define S3C64XX_GPH8_MMC1_DATA6		(0x02 <<  0)
+#define S3C64XX_GPH8_MMC2_DATA2		(0x03 <<  0)
+#define S3C64XX_GPH8_I2S_V40_LRCLK	(0x05 <<  0)
+#define S3C64XX_GPH8_ADDR_CF2		(0x06 <<  0)
+#define S3C64XX_GPH8_EINT_G6_8		(0x07 <<  0)
 
+#define S3C64XX_GPH9_OUTPUT		(0x01 <<  4)
+#define S3C64XX_GPH9_MMC1_DATA7		(0x02 <<  4)
+#define S3C64XX_GPH9_MMC2_DATA3		(0x03 <<  4)
+#define S3C64XX_GPH9_I2S_V40_DI		(0x05 <<  4)
+#define S3C64XX_GPH9_EINT_G6_9		(0x07 <<  4)
-- 
cgit v0.10.2


From beb9f4ed22ee618ffcf5064ff7d5ca4cfc83104b Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 May 2009 15:07:41 +0200
Subject: [ARM] S3C64XX: fix GPIO debug

Fix compilation bug when debug was enabled

Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c64xx/gpiolib.c b/arch/arm/plat-s3c64xx/gpiolib.c
index ee9188a..78ee52c 100644
--- a/arch/arm/plat-s3c64xx/gpiolib.c
+++ b/arch/arm/plat-s3c64xx/gpiolib.c
@@ -57,7 +57,7 @@
 #if 1
 #define gpio_dbg(x...) do { } while(0)
 #else
-#define gpio_dbg(x...) printk(KERN_DEBUG ## x)
+#define gpio_dbg(x...) printk(KERN_DEBUG x)
 #endif
 
 /* The s3c64xx_gpiolib_4bit routines are to control the gpio banks where
-- 
cgit v0.10.2


From 871fcd7cf791b853ae044f813db94caefbf66725 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 15 May 2009 14:20:53 +0100
Subject: [ARM] S3C24XX: Fix unused code warning in arch/arm/plat-s3c24xx/dma.c

Fix unused code warning in arch/arm/plat-s3c24xx/dma.c if there
is no PM support enabled. The function to_dma_chan() should
be marked inline so that the compiler will eliminate it without
warning if it isn't used.

arch/arm/plat-s3c24xx/dma.c:1239: warning: 'to_dma_chan' defined but not used

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index aee2aeb..07326f6 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -1235,7 +1235,7 @@ int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *d
 
 EXPORT_SYMBOL(s3c2410_dma_getposition);
 
-static struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev)
+static inline struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev)
 {
 	return container_of(dev, struct s3c2410_dma_chan, dev);
 }
-- 
cgit v0.10.2


From a8af6de00fafed316fea8f39d87c7e8e19ec1ea0 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 15 May 2009 14:57:09 +0100
Subject: [ARM] S3C2410: mach-bast.c registering i2c data too early

The BAST support code is calling s3c_i2c0_set_platdata() from
the map_io() entry, instead of the bast_init() code. This causes
the registration to fail due to kmalloc() not being available
at the time.

This fixes the following error:
s3c_i2c0_set_platdata: no memory for platform data

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 4389c16..8637dea 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -588,8 +588,6 @@ static void __init bast_map_io(void)
 
 	s3c_device_nand.dev.platform_data = &bast_nand_info;
 
-	s3c_i2c0_set_platdata(&bast_i2c_info);
-
 	s3c24xx_init_io(bast_iodesc, ARRAY_SIZE(bast_iodesc));
 	s3c24xx_init_clocks(0);
 	s3c24xx_init_uarts(bast_uartcfgs, ARRAY_SIZE(bast_uartcfgs));
@@ -602,6 +600,7 @@ static void __init bast_init(void)
 	sysdev_class_register(&bast_pm_sysclass);
 	sysdev_register(&bast_pm_sysdev);
 
+	s3c_i2c0_set_platdata(&bast_i2c_info);
 	s3c24xx_fb_set_platdata(&bast_fb_info);
 	platform_add_devices(bast_devices, ARRAY_SIZE(bast_devices));
 
-- 
cgit v0.10.2


From 3ac19bb443255579b30f05af5e688b6c73b1bb91 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 15 May 2009 15:21:57 +0100
Subject: [ARM] S3C: Do not set clk->owner field if unset

The s3c24xx_register_clock() function has been doing a test
on clk->owner to see if it is NULL, and then setting itself
as the owner if clk->owner == NULL.

This is not needed, arch/arm/plat-s3c/clock.c cannot be
compiled as a module, and even if it was, it should not be
playing with this field if it being registered from somewhere
else.

The best course of action is to remove this bit of
code completely.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/clock.c b/arch/arm/plat-s3c/clock.c
index b6be76e..4d01ef1 100644
--- a/arch/arm/plat-s3c/clock.c
+++ b/arch/arm/plat-s3c/clock.c
@@ -306,8 +306,6 @@ struct clk s3c24xx_uclk = {
 
 int s3c24xx_register_clock(struct clk *clk)
 {
-	clk->owner = THIS_MODULE;
-
 	if (clk->enable == NULL)
 		clk->enable = clk_null_enable;
 
-- 
cgit v0.10.2


From 391fbdc5d527149578490db2f1619951d91f3561 Mon Sep 17 00:00:00 2001
From: Christine Caulfield <ccaulfie@redhat.com>
Date: Thu, 7 May 2009 10:54:16 -0500
Subject: dlm: connect to nodes earlier

Make network connections to other nodes earlier, in the context of
dlm_recoverd.  This avoids connecting to nodes from dlm_send where we
try to avoid allocations which could possibly deadlock if memory reclaim
goes into the cluster fs which may try to do a dlm operation.

Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 609108a..2559a97 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk)
 		lowcomms_write_space(sk);
 }
 
+int dlm_lowcomms_connect_node(int nodeid)
+{
+	struct connection *con;
+
+	if (nodeid == dlm_our_nodeid())
+		return 0;
+
+	con = nodeid2con(nodeid, GFP_NOFS);
+	if (!con)
+		return -ENOMEM;
+	lowcomms_connect_sock(con);
+	return 0;
+}
+
 /* Make a socket active */
 static int add_sock(struct socket *sock, struct connection *con)
 {
@@ -1421,7 +1435,7 @@ static int work_start(void)
 static void stop_conn(struct connection *con)
 {
 	con->flags |= 0x0F;
-	if (con->sock)
+	if (con->sock && con->sock->sk)
 		con->sock->sk->sk_user_data = NULL;
 }
 
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index a9a9618..1311e64 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void);
 int dlm_lowcomms_close(int nodeid);
 void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
 void dlm_lowcomms_commit_buffer(void *mh);
+int dlm_lowcomms_connect_node(int nodeid);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 26133f0..2afb770 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
 #include "recover.h"
 #include "rcom.h"
 #include "config.h"
+#include "lowcomms.h"
 
 static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 {
@@ -45,7 +46,7 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 static int dlm_add_member(struct dlm_ls *ls, int nodeid)
 {
 	struct dlm_member *memb;
-	int w;
+	int w, error;
 
 	memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
 	if (!memb)
@@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
 		return w;
 	}
 
+	error = dlm_lowcomms_connect_node(nodeid);
+	if (error < 0) {
+		kfree(memb);
+		return error;
+	}
+
 	memb->nodeid = nodeid;
 	memb->weight = w;
 	add_ordered_member(ls, memb);
-- 
cgit v0.10.2


From ef923214a4816c289e4af2d67a9ebb1a31e4ac61 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 14 May 2009 13:29:14 +1000
Subject: perf_counter: powerpc: use u64 for event codes internally

Although the perf_counter API allows 63-bit raw event codes,
internally in the powerpc back-end we had been using 32-bit
event codes.  This expands them to 64 bits so that we can add
bits for specifying threshold start/stop events and instruction
sampling modes later.

This also corrects the return value of can_go_on_limited_pmc;
we were returning an event code rather than just a 0/1 value in
some circumstances. That didn't particularly matter while event
codes were 32-bit, but now that event codes are 64-bit it
might, so this fixes it.

[ Impact: extend PowerPC perfcounter interfaces from u32 to u64 ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18955.36874.472452.353104@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index 56d66c3..ceea76a 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -23,13 +23,13 @@ struct power_pmu {
 	int	max_alternatives;
 	u64	add_fields;
 	u64	test_adder;
-	int	(*compute_mmcr)(unsigned int events[], int n_ev,
+	int	(*compute_mmcr)(u64 events[], int n_ev,
 				unsigned int hwc[], u64 mmcr[]);
-	int	(*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
-	int	(*get_alternatives)(unsigned int event, unsigned int flags,
-				    unsigned int alt[]);
+	int	(*get_constraint)(u64 event, u64 *mskp, u64 *valp);
+	int	(*get_alternatives)(u64 event, unsigned int flags,
+				    u64 alt[]);
 	void	(*disable_pmc)(unsigned int pmc, u64 mmcr[]);
-	int	(*limited_pmc_event)(unsigned int event);
+	int	(*limited_pmc_event)(u64 event);
 	int	limited_pmc5_6;	/* PMC5 and PMC6 have limited function */
 	int	n_generic;
 	int	*generic_events;
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index db8d5ca..8d4cafc 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -26,7 +26,7 @@ struct cpu_hw_counters {
 	int n_limited;
 	u8  pmcs_enabled;
 	struct perf_counter *counter[MAX_HWCOUNTERS];
-	unsigned int events[MAX_HWCOUNTERS];
+	u64 events[MAX_HWCOUNTERS];
 	unsigned int flags[MAX_HWCOUNTERS];
 	u64 mmcr[3];
 	struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
@@ -131,11 +131,11 @@ static void write_pmc(int idx, unsigned long val)
  * and see if any combination of alternative codes is feasible.
  * The feasible set is returned in event[].
  */
-static int power_check_constraints(unsigned int event[], unsigned int cflags[],
+static int power_check_constraints(u64 event[], unsigned int cflags[],
 				   int n_ev)
 {
 	u64 mask, value, nv;
-	unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
+	u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
 	u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
 	u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
 	u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
@@ -564,7 +564,7 @@ void hw_perf_enable(void)
 }
 
 static int collect_events(struct perf_counter *group, int max_count,
-			  struct perf_counter *ctrs[], unsigned int *events,
+			  struct perf_counter *ctrs[], u64 *events,
 			  unsigned int *flags)
 {
 	int n = 0;
@@ -752,11 +752,11 @@ struct pmu power_pmu = {
  * that a limited PMC can count, doesn't require interrupts, and
  * doesn't exclude any processor mode.
  */
-static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev,
+static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
 				 unsigned int flags)
 {
 	int n;
-	unsigned int alt[MAX_EVENT_ALTERNATIVES];
+	u64 alt[MAX_EVENT_ALTERNATIVES];
 
 	if (counter->hw_event.exclude_user
 	    || counter->hw_event.exclude_kernel
@@ -776,10 +776,8 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev,
 
 	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
 	n = ppmu->get_alternatives(ev, flags, alt);
-	if (n)
-		return alt[0];
 
-	return 0;
+	return n > 0;
 }
 
 /*
@@ -787,10 +785,9 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev,
  * and return the event code, or 0 if there is no such alternative.
  * (Note: event code 0 is "don't count" on all machines.)
  */
-static unsigned long normal_pmc_alternative(unsigned long ev,
-					    unsigned long flags)
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
 {
-	unsigned int alt[MAX_EVENT_ALTERNATIVES];
+	u64 alt[MAX_EVENT_ALTERNATIVES];
 	int n;
 
 	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
@@ -820,9 +817,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
 
 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
-	unsigned long ev, flags;
+	u64 ev;
+	unsigned long flags;
 	struct perf_counter *ctrs[MAX_HWCOUNTERS];
-	unsigned int events[MAX_HWCOUNTERS];
+	u64 events[MAX_HWCOUNTERS];
 	unsigned int cflags[MAX_HWCOUNTERS];
 	int n;
 	int err;
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 744a275..836fa11 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -213,7 +213,7 @@ static unsigned char direct_marked_event[8] = {
  * Returns 1 if event counts things relating to marked instructions
  * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  */
-static int p4_marked_instr_event(unsigned int event)
+static int p4_marked_instr_event(u64 event)
 {
 	int pmc, psel, unit, byte, bit;
 	unsigned int mask;
@@ -249,7 +249,7 @@ static int p4_marked_instr_event(unsigned int event)
 	return (mask >> (byte * 8 + bit)) & 1;
 }
 
-static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
 {
 	int pmc, byte, unit, lower, sh;
 	u64 mask = 0, value = 0;
@@ -320,8 +320,7 @@ static unsigned int ppc_inst_cmpl[] = {
 	0x1001, 0x4001, 0x6001, 0x7001, 0x8001
 };
 
-static int p4_get_alternatives(unsigned int event, unsigned int flags,
-			       unsigned int alt[])
+static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	int i, j, na;
 
@@ -353,7 +352,7 @@ static int p4_get_alternatives(unsigned int event, unsigned int flags,
 	return na;
 }
 
-static int p4_compute_mmcr(unsigned int event[], int n_ev,
+static int p4_compute_mmcr(u64 event[], int n_ev,
 			   unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 8154eaa..3ac0654 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -135,7 +135,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = {
 	[PM_GRS] =   { 0x0e00000000ull, 0x0c40000000ull },
 };
 
-static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
 {
 	int pmc, byte, unit, sh;
 	int bit, fmask;
@@ -188,7 +188,7 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 	return 0;
 }
 
-static int power5p_limited_pmc_event(unsigned int event)
+static int power5p_limited_pmc_event(u64 event)
 {
 	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
 
@@ -273,11 +273,11 @@ static int find_alternative_bdecode(unsigned int event)
 	return -1;
 }
 
-static int power5p_get_alternatives(unsigned int event, unsigned int flags,
-				    unsigned int alt[])
+static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
-	int i, j, ae, nalt = 1;
+	int i, j, nalt = 1;
 	int nlim;
+	u64 ae;
 
 	alt[0] = event;
 	nalt = 1;
@@ -402,7 +402,7 @@ static unsigned char direct_event_is_marked[0x28] = {
  * Returns 1 if event counts things relating to marked instructions
  * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  */
-static int power5p_marked_instr_event(unsigned int event)
+static int power5p_marked_instr_event(u64 event)
 {
 	int pmc, psel;
 	int bit, byte, unit;
@@ -451,7 +451,7 @@ static int power5p_marked_instr_event(unsigned int event)
 	return (mask >> (byte * 8 + bit)) & 1;
 }
 
-static int power5p_compute_mmcr(unsigned int event[], int n_ev,
+static int power5p_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr1 = 0;
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 6e667dc..d534496 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -139,7 +139,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = {
 	[PM_GRS] =   { 0x30002000000000ull, 0x30000400000000ull },
 };
 
-static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
 {
 	int pmc, byte, unit, sh;
 	int bit, fmask;
@@ -224,7 +224,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
  * Scan the alternatives table for a match and return the
  * index into the alternatives table if found, else -1.
  */
-static int find_alternative(unsigned int event)
+static int find_alternative(u64 event)
 {
 	int i, j;
 
@@ -250,7 +250,7 @@ static const unsigned char bytedecode_alternatives[4][4] = {
  * PMCSEL values on other counters.  This returns the alternative
  * event code for those that do, or -1 otherwise.
  */
-static int find_alternative_bdecode(unsigned int event)
+static u64 find_alternative_bdecode(u64 event)
 {
 	int pmc, altpmc, pp, j;
 
@@ -269,10 +269,10 @@ static int find_alternative_bdecode(unsigned int event)
 	return -1;
 }
 
-static int power5_get_alternatives(unsigned int event, unsigned int flags,
-				   unsigned int alt[])
+static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
-	int i, j, ae, nalt = 1;
+	int i, j, nalt = 1;
+	u64 ae;
 
 	alt[0] = event;
 	nalt = 1;
@@ -338,7 +338,7 @@ static unsigned char direct_event_is_marked[0x28] = {
  * Returns 1 if event counts things relating to marked instructions
  * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  */
-static int power5_marked_instr_event(unsigned int event)
+static int power5_marked_instr_event(u64 event)
 {
 	int pmc, psel;
 	int bit, byte, unit;
@@ -382,7 +382,7 @@ static int power5_marked_instr_event(unsigned int event)
 	return (mask >> (byte * 8 + bit)) & 1;
 }
 
-static int power5_compute_mmcr(unsigned int event[], int n_ev,
+static int power5_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr1 = 0;
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index d44049f..ab7c615 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -134,7 +134,7 @@ static u32 marked_bus_events[16] = {
  * Returns 1 if event counts things relating to marked instructions
  * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  */
-static int power6_marked_instr_event(unsigned int event)
+static int power6_marked_instr_event(u64 event)
 {
 	int pmc, psel, ptype;
 	int bit, byte, unit;
@@ -172,7 +172,7 @@ static int power6_marked_instr_event(unsigned int event)
 /*
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
-static int p6_compute_mmcr(unsigned int event[], int n_ev,
+static int p6_compute_mmcr(u64 event[], int n_ev,
 			   unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr1 = 0;
@@ -265,7 +265,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
  *	20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
  *	32-34	select field: nest (subunit) event selector
  */
-static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
 {
 	int pmc, byte, sh, subunit;
 	u64 mask = 0, value = 0;
@@ -298,7 +298,7 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 	return 0;
 }
 
-static int p6_limited_pmc_event(unsigned int event)
+static int p6_limited_pmc_event(u64 event)
 {
 	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
 
@@ -337,7 +337,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
  * This could be made more efficient with a binary search on
  * a presorted list, if necessary
  */
-static int find_alternatives_list(unsigned int event)
+static int find_alternatives_list(u64 event)
 {
 	int i, j;
 	unsigned int alt;
@@ -356,12 +356,12 @@ static int find_alternatives_list(unsigned int event)
 	return -1;
 }
 
-static int p6_get_alternatives(unsigned int event, unsigned int flags,
-			       unsigned int alt[])
+static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	int i, j, nlim;
-	unsigned int aevent, psel, pmc;
+	unsigned int psel, pmc;
 	unsigned int nalt = 1;
+	u64 aevent;
 
 	alt[0] = event;
 	nlim = p6_limited_pmc_event(event);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index af2d188..eed47c4 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -147,7 +147,7 @@ static unsigned char direct_marked_event[8] = {
  * Returns 1 if event counts things relating to marked instructions
  * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  */
-static int p970_marked_instr_event(unsigned int event)
+static int p970_marked_instr_event(u64 event)
 {
 	int pmc, psel, unit, byte, bit;
 	unsigned int mask;
@@ -192,7 +192,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = {
 	[PM_STS] =   { 0x380000000000ull, 0x310000000000ull },
 };
 
-static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
+static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
 {
 	int pmc, byte, unit, sh, spcsel;
 	u64 mask = 0, value = 0;
@@ -243,8 +243,7 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
 	return 0;
 }
 
-static int p970_get_alternatives(unsigned int event, unsigned int flags,
-				 unsigned int alt[])
+static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	alt[0] = event;
 
@@ -257,7 +256,7 @@ static int p970_get_alternatives(unsigned int event, unsigned int flags,
 	return 1;
 }
 
-static int p970_compute_mmcr(unsigned int event[], int n_ev,
+static int p970_compute_mmcr(u64 event[], int n_ev,
 			     unsigned int hwc[], u64 mmcr[])
 {
 	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
-- 
cgit v0.10.2


From 9d23a90a67261e73b2fcac04d8ca963c6b496afb Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 14 May 2009 21:48:08 +1000
Subject: perf_counter: allow arch to supply event misc flags and instruction
 pointer

At present the values we put in overflow events for the misc
flags indicating processor mode and the instruction pointer are
obtained using the standard user_mode() and
instruction_pointer() functions. Those functions tell you where
the performance monitor interrupt was taken, which might not be
exactly where the counter overflow occurred, for example
because interrupts were disabled at the point where the
overflow occurred, or because the processor had many
instructions in flight and chose to complete some more
instructions beyond the one that caused the counter overflow.

Some architectures (e.g. powerpc) can supply more precise
information about where the counter overflow occurred and the
processor mode at that point.  This introduces new functions,
perf_misc_flags() and perf_instruction_pointer(), which arch
code can override to provide more precise information if
available.  They have default implementations which are
identical to the existing code.

This also adds a new misc flag value,
PERF_EVENT_MISC_HYPERVISOR, for the case where a counter
overflow occurred in the hypervisor.  We encode the processor
mode in the 2 bits previously used to indicate user or kernel
mode; the values for user and kernel mode are unchanged and
hypervisor mode is indicated by both bits being set.

[ Impact: generalize perfcounter core facilities ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18956.1272.818511.561835@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 004b6e1..c8c1dfc2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -215,8 +215,11 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
+#define PERF_EVENT_MISC_CPUMODE_MASK	(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN	(0 << 0)
 #define PERF_EVENT_MISC_KERNEL		(1 << 0)
-#define PERF_EVENT_MISC_USER		(1 << 1)
+#define PERF_EVENT_MISC_USER		(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR	(3 << 0)
 #define PERF_EVENT_MISC_OVERFLOW	(1 << 2)
 
 struct perf_event_header {
@@ -596,6 +599,12 @@ extern int sysctl_perf_counter_mlock;
 
 extern void perf_counter_init(void);
 
+#ifndef perf_misc_flags
+#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
+				 PERF_EVENT_MISC_KERNEL)
+#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+#endif
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 728a595..57840a9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2042,11 +2042,10 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.size = sizeof(header);
 
 	header.misc = PERF_EVENT_MISC_OVERFLOW;
-	header.misc |= user_mode(regs) ?
-		PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
+	header.misc |= perf_misc_flags(regs);
 
 	if (record_type & PERF_RECORD_IP) {
-		ip = instruction_pointer(regs);
+		ip = perf_instruction_pointer(regs);
 		header.type |= PERF_RECORD_IP;
 		header.size += sizeof(ip);
 	}
-- 
cgit v0.10.2


From 0bbd0d4be8d5d3676c126e06e3c75c16def00441 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 14 May 2009 13:31:48 +1000
Subject: perf_counter: powerpc: supply more precise information on counter
 overflow events

This uses values from the MMCRA, SIAR and SDAR registers on
powerpc to supply more precise information for overflow events,
including a data address when PERF_RECORD_ADDR is specified.

Since POWER6 uses different bit positions in MMCRA from earlier
processors, this converts the struct power_pmu limited_pmc5_6
field, which only had 0/1 values, into a flags field and
defines bit values for its previous use (PPMU_LIMITED_PMC5_6)
and a new flag (PPMU_ALT_SIPR) to indicate that the processor
uses the POWER6 bit positions rather than the earlier
positions.  It also adds definitions in reg.h for the new and
old positions of the bit that indicates that the SIAR and SDAR
values come from the same instruction.

For the data address, the SDAR value is supplied if we are not
doing instruction sampling.  In that case there is no guarantee
that the address given in the PERF_RECORD_ADDR subrecord will
correspond to the instruction whose address is given in the
PERF_RECORD_IP subrecord.

If instruction sampling is enabled (e.g. because this counter
is counting a marked instruction event), then we only supply
the SDAR value for the PERF_RECORD_ADDR subrecord if it
corresponds to the instruction whose address is in the
PERF_RECORD_IP subrecord.  Otherwise we supply 0.

[ Impact: support more PMU hardware features on PowerPC ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18955.37028.48861.555309@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index ceea76a..1c60f0c 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -30,7 +30,7 @@ struct power_pmu {
 				    u64 alt[]);
 	void	(*disable_pmc)(unsigned int pmc, u64 mmcr[]);
 	int	(*limited_pmc_event)(u64 event);
-	int	limited_pmc5_6;	/* PMC5 and PMC6 have limited function */
+	u32	flags;
 	int	n_generic;
 	int	*generic_events;
 };
@@ -38,12 +38,24 @@ struct power_pmu {
 extern struct power_pmu *ppmu;
 
 /*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
+
+/*
  * Values for flags to get_alternatives()
  */
 #define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
 #define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
 #define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
 
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
 /*
  * The power_pmu.get_constraint function returns a 64-bit value and
  * a 64-bit mask that express the constraints between this event and
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e8018d5..fb359b0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -492,11 +492,13 @@
 #define   MMCR0_FCHV	0x00000001UL /* freeze conditions in hypervisor mode */
 #define SPRN_MMCR1	798
 #define SPRN_MMCRA	0x312
+#define   MMCRA_SDSYNC	0x80000000UL /* SDAR synced with SIAR */
 #define   MMCRA_SIHV	0x10000000UL /* state of MSR HV when SIAR set */
 #define   MMCRA_SIPR	0x08000000UL /* state of MSR PR when SIAR set */
 #define   MMCRA_SLOT	0x07000000UL /* SLOT bits (37-39) */
 #define   MMCRA_SLOT_SHIFT	24
 #define   MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define   POWER6_MMCRA_SDSYNC 0x0000080000000000ULL	/* SDAR/SIAR synced */
 #define   POWER6_MMCRA_SIHV   0x0000040000000000ULL
 #define   POWER6_MMCRA_SIPR   0x0000020000000000ULL
 #define   POWER6_MMCRA_THRM	0x00000020UL
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 8d4cafc..6baae5a 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -17,6 +17,7 @@
 #include <asm/pmc.h>
 #include <asm/machdep.h>
 #include <asm/firmware.h>
+#include <asm/ptrace.h>
 
 struct cpu_hw_counters {
 	int n_counters;
@@ -310,7 +311,8 @@ static void power_pmu_read(struct perf_counter *counter)
  */
 static int is_limited_pmc(int pmcnum)
 {
-	return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6);
+	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+		&& (pmcnum == 5 || pmcnum == 6);
 }
 
 static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
@@ -860,7 +862,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 	 * If this machine has limited counters, check whether this
 	 * event could go on a limited counter.
 	 */
-	if (ppmu->limited_pmc5_6) {
+	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
 		if (can_go_on_limited_pmc(counter, ev, flags)) {
 			flags |= PPMU_LIMITED_PMC_OK;
 		} else if (ppmu->limited_pmc_event(ev)) {
@@ -933,6 +935,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
+	u64 addr, mmcra, sdsync;
 
 	/* we don't have to worry about interrupts here */
 	prev = atomic64_read(&counter->hw.prev_count);
@@ -963,8 +966,76 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	/*
 	 * Finally record data if requested.
 	 */
-	if (record)
-		perf_counter_overflow(counter, nmi, regs, 0);
+	if (record) {
+		addr = 0;
+		if (counter->hw_event.record_type & PERF_RECORD_ADDR) {
+			/*
+			 * The user wants a data address recorded.
+			 * If we're not doing instruction sampling,
+			 * give them the SDAR (sampled data address).
+			 * If we are doing instruction sampling, then only
+			 * give them the SDAR if it corresponds to the
+			 * instruction pointed to by SIAR; this is indicated
+			 * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA.
+			 */
+			mmcra = regs->dsisr;
+			sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
+				POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
+			if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+				addr = mfspr(SPRN_SDAR);
+		}
+		perf_counter_overflow(counter, nmi, regs, addr);
+	}
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	unsigned long mmcra;
+
+	if (TRAP(regs) != 0xf00) {
+		/* not a PMU interrupt */
+		return user_mode(regs) ? PERF_EVENT_MISC_USER :
+			PERF_EVENT_MISC_KERNEL;
+	}
+
+	mmcra = regs->dsisr;
+	if (ppmu->flags & PPMU_ALT_SIPR) {
+		if (mmcra & POWER6_MMCRA_SIHV)
+			return PERF_EVENT_MISC_HYPERVISOR;
+		return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
+			PERF_EVENT_MISC_KERNEL;
+	}
+	if (mmcra & MMCRA_SIHV)
+		return PERF_EVENT_MISC_HYPERVISOR;
+	return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
+			PERF_EVENT_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long mmcra;
+	unsigned long ip;
+	unsigned long slot;
+
+	if (TRAP(regs) != 0xf00)
+		return regs->nip;	/* not a PMU interrupt */
+
+	ip = mfspr(SPRN_SIAR);
+	mmcra = regs->dsisr;
+	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
+		slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+		if (slot > 1)
+			ip += 4 * (slot - 1);
+	}
+	return ip;
 }
 
 /*
@@ -984,6 +1055,11 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 					mfspr(SPRN_PMC6));
 
 	/*
+	 * Overload regs->dsisr to store MMCRA so we only need to read it once.
+	 */
+	regs->dsisr = mfspr(SPRN_MMCRA);
+
+	/*
 	 * If interrupts were soft-disabled when this PMU interrupt
 	 * occurred, treat it as an NMI.
 	 */
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 3ac0654..c6cdfc1 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -625,6 +625,6 @@ struct power_pmu power5p_pmu = {
 	.disable_pmc = power5p_disable_pmc,
 	.n_generic = ARRAY_SIZE(power5p_generic_events),
 	.generic_events = power5p_generic_events,
-	.limited_pmc5_6 = 1,
+	.flags = PPMU_LIMITED_PMC5_6,
 	.limited_pmc_event = power5p_limited_pmc_event,
 };
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index ab7c615..cd4fbe0 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -485,6 +485,6 @@ struct power_pmu power6_pmu = {
 	.disable_pmc = p6_disable_pmc,
 	.n_generic = ARRAY_SIZE(power6_generic_events),
 	.generic_events = power6_generic_events,
-	.limited_pmc5_6 = 1,
+	.flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
 	.limited_pmc_event = p6_limited_pmc_event,
 };
-- 
cgit v0.10.2


From d9bcc01d58d18ed287091707b0b45c6ac888a11a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Thu, 14 May 2009 12:06:12 +0530
Subject: x86, mtrr: replace MTRRcap_MSR with msr-index's MSR_MTRRcap

Use standard msr-index.h's MSR declaration and no need to declare again.

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0b776c0..de9c20f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -306,7 +306,7 @@ void __init get_mtrr_state(void)
 
 	vrs = mtrr_state.var_ranges;
 
-	rdmsr(MTRRcap_MSR, lo, dummy);
+	rdmsr(MSR_MTRRcap, lo, dummy);
 	mtrr_state.have_fixed = (lo >> 8) & 1;
 
 	for (i = 0; i < num_var_ranges; i++)
@@ -703,7 +703,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
 static int generic_have_wrcomb(void)
 {
 	unsigned long config, dummy;
-	rdmsr(MTRRcap_MSR, config, dummy);
+	rdmsr(MSR_MTRRcap, config, dummy);
 	return (config & (1 << 10));
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 03cda01..8fc248b 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void)
 	unsigned long config = 0, dummy;
 
 	if (use_intel()) {
-		rdmsr(MTRRcap_MSR, config, dummy);
+		rdmsr(MSR_MTRRcap, config, dummy);
 	} else if (is_cpu(AMD))
 		config = 2;
 	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 77f67f7..5d37fb1 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 
-#define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
 
 #define MTRRfix64K_00000_MSR 0x250
-- 
cgit v0.10.2


From a036c7a358cc9d7ed28a188480b9a4d709e09b24 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Thu, 14 May 2009 12:10:43 +0530
Subject: x86, mtrr: replace MTRRfix64K_00000_MSR with msr-index's
 MSR_MTRRfix64K_00000

Use standard msr-index.h's MSR declaration and no need to declare again.

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index de9c20f..8b115c0 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -20,7 +20,7 @@ struct fixed_range_block {
 };
 
 static struct fixed_range_block fixed_range_blocks[] = {
-	{ MTRRfix64K_00000_MSR, 1 }, /* one  64k MTRR  */
+	{ MSR_MTRRfix64K_00000, 1 }, /* one  64k MTRR  */
 	{ MTRRfix16K_80000_MSR, 2 }, /* two  16k MTRRs */
 	{ MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
 	{}
@@ -194,7 +194,7 @@ get_fixed_ranges(mtrr_type * frs)
 
 	k8_check_syscfg_dram_mod_en();
 
-	rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+	rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
 
 	for (i = 0; i < 2; i++)
 		rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 5d37fb1..7f23cae 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -7,7 +7,6 @@
 
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
 #define MTRRfix4K_C0000_MSR 0x268
-- 
cgit v0.10.2


From 7d9d55e449089df8463bca2045d702ae6cda64a2 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Thu, 14 May 2009 12:15:32 +0530
Subject: x86, mtrr: replace MTRRfix16K_80000_MSR with msr-index's
 MSR_MTRRfix16K_80000

Use standard msr-index.h's MSR declaration and no need to declare again

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 8b115c0..00437c2 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -21,7 +21,7 @@ struct fixed_range_block {
 
 static struct fixed_range_block fixed_range_blocks[] = {
 	{ MSR_MTRRfix64K_00000, 1 }, /* one  64k MTRR  */
-	{ MTRRfix16K_80000_MSR, 2 }, /* two  16k MTRRs */
+	{ MSR_MTRRfix16K_80000, 2 }, /* two  16k MTRRs */
 	{ MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
 	{}
 };
@@ -197,7 +197,7 @@ get_fixed_ranges(mtrr_type * frs)
 	rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
 
 	for (i = 0; i < 2; i++)
-		rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+		rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
 	for (i = 0; i < 8; i++)
 		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
 }
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 7f23cae..712b605 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -7,7 +7,6 @@
 
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
 #define MTRRfix4K_C0000_MSR 0x268
 #define MTRRfix4K_C8000_MSR 0x269
-- 
cgit v0.10.2


From 654ac05801ae806661c8fdeb3b5c420a31cbc5b1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Thu, 14 May 2009 12:21:54 +0530
Subject: x86, mtrr: remove mtrr MSRs double declaration

Removed MTRR MSR from mtrr/mtrr.h as these are already declared in
msr-index.h and nobody is using them:
 MTRRfix16K_A0000_MSR
 MTRRfix4K_C8000_MSR
 MTRRfix4K_D0000_MSR
 MTRRfix4K_D8000_MSR
 MTRRfix4K_E0000_MSR
 MTRRfix4K_E8000_MSR
 MTRRfix4K_F0000_MSR
 MTRRfix4K_F8000_MSR

Use standard msr-index.h's MSR declaration and no need to declare again

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 712b605..5053793 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -7,15 +7,7 @@
 
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRfix16K_A0000_MSR 0x259
 #define MTRRfix4K_C0000_MSR 0x268
-#define MTRRfix4K_C8000_MSR 0x269
-#define MTRRfix4K_D0000_MSR 0x26a
-#define MTRRfix4K_D8000_MSR 0x26b
-#define MTRRfix4K_E0000_MSR 0x26c
-#define MTRRfix4K_E8000_MSR 0x26d
-#define MTRRfix4K_F0000_MSR 0x26e
-#define MTRRfix4K_F8000_MSR 0x26f
 
 #define MTRR_CHANGE_MASK_FIXED     0x01
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
-- 
cgit v0.10.2


From ba5673ff1ff5f428256db4cedd4b05b7be008bb6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Thu, 14 May 2009 12:29:25 +0530
Subject: x86, mtrr: replace MTRRfix4K_C0000_MSR with msr-index's
 MSR_MTRRfix4K_C0000

Use standard msr-index.h's MSR declaration and no need to declare again.

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 00437c2..3cf58e2 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -22,7 +22,7 @@ struct fixed_range_block {
 static struct fixed_range_block fixed_range_blocks[] = {
 	{ MSR_MTRRfix64K_00000, 1 }, /* one  64k MTRR  */
 	{ MSR_MTRRfix16K_80000, 2 }, /* two  16k MTRRs */
-	{ MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
+	{ MSR_MTRRfix4K_C0000,  8 }, /* eight 4k MTRRs */
 	{}
 };
 
@@ -199,7 +199,7 @@ get_fixed_ranges(mtrr_type * frs)
 	for (i = 0; i < 2; i++)
 		rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
 	for (i = 0; i < 8; i++)
-		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+		rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
 }
 
 void mtrr_save_fixed_ranges(void *info)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 5053793..e5ee686 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -7,8 +7,6 @@
 
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRfix4K_C0000_MSR 0x268
-
 #define MTRR_CHANGE_MASK_FIXED     0x01
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
-- 
cgit v0.10.2


From 52650257ea06bb15c2e2bbe854cbdf463726141a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Thu, 14 May 2009 12:35:46 +0530
Subject: x86, mtrr: replace MTRRdefType_MSR with msr-index's MSR_MTRRdefType

Use standard msr-index.h's MSR declaration and no need to declare again.

[ Impact: cleanup, no object code change ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ce0fe4b..1d584a1 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits)
 
 	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
 		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
+	rdmsr(MSR_MTRRdefType, def, dummy);
 	def &= 0xff;
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
@@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	 */
 	if (!is_cpu(INTEL) || disable_mtrr_trim)
 		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
+	rdmsr(MSR_MTRRdefType, def, dummy);
 	def &= 0xff;
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 3cf58e2..e930a31 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -314,7 +314,7 @@ void __init get_mtrr_state(void)
 	if (mtrr_state.have_fixed)
 		get_fixed_ranges(mtrr_state.fixed_ranges);
 
-	rdmsr(MTRRdefType_MSR, lo, dummy);
+	rdmsr(MSR_MTRRdefType, lo, dummy);
 	mtrr_state.def_type = (lo & 0xff);
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
 
@@ -579,10 +579,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	__flush_tlb();
 
 	/*  Save MTRR state */
-	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+	rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 
 	/*  Disable MTRRs, and set the default type to uncached  */
-	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
 }
 
 static void post_set(void) __releases(set_atomicity_lock)
@@ -591,7 +591,7 @@ static void post_set(void) __releases(set_atomicity_lock)
 	__flush_tlb();
 
 	/* Intel (P6) standard MTRRs */
-	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 		
 	/*  Enable caches  */
 	write_cr0(read_cr0() & 0xbfffffff);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index e5ee686..7538b76 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 
-#define MTRRdefType_MSR 0x2ff
-
 #define MTRR_CHANGE_MASK_FIXED     0x01
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 7f7e275..1f5fb15 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 
 		if (use_intel())
 			/*  Save MTRR state */
-			rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+			rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
 		else
 			/* Cyrix ARRs - everything else were excluded at the top */
 			ctxt->ccr3 = getCx86(CX86_CCR3);
@@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
 {
 	if (use_intel())
 		/*  Disable MTRRs, and set the default type to uncached  */
-		mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+		mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
 		      ctxt->deftype_hi);
 	else if (is_cpu(CYRIX))
 		/* Cyrix ARRs - everything else were excluded at the top */
@@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt)
 		/*  Restore MTRRdefType  */
 		if (use_intel())
 			/* Intel (P6) standard MTRRs */
-			mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+			mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
 		else
 			/* Cyrix ARRs - everything else was excluded at the top */
 			setCx86(CX86_CCR3, ctxt->ccr3);
-- 
cgit v0.10.2


From 9a1a69a1f41cbefebf3172761f197db6aba71e68 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 29 Apr 2009 13:12:39 -0500
Subject: [SCSI] fc-transport: Close state transition-window during rport
 deletion.

Andrew Vasquez wrote:
> fc-transport: Close state transition-window during rport deletion.
>
> After an rport's state has transitioned to FC_PORTSTATE_BLOCKED,
> but, prior to making the upcall to 'block' the scsi-target
> associated with an rport, queued commands can recycle and
> ultimately run out of retries causing failures to propagate to
> upper-level drivers.  Close this transition-window by returning
> the non-'retries' modifying DID_IMM_RETRY status for submitted
> I/Os.

The same can happen for iscsi when transitioning from logged in
to failed and blocking the sdevs.

This patch converts iscsi and fc's transitions back to use DID_IMM_RETRY
instead of DID_TRANSPORT_DISRUPTED which has a limited number of retries
that we do not want to use for handling this race.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
[Addition of iscsi and fc port online devloss case conversion by Mike Christie]
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 0947954..0a2ce7b 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -357,7 +357,7 @@ int iscsi_session_chkready(struct iscsi_cls_session *session)
 		err = 0;
 		break;
 	case ISCSI_SESSION_FAILED:
-		err = DID_TRANSPORT_DISRUPTED << 16;
+		err = DID_IMM_RETRY << 16;
 		break;
 	case ISCSI_SESSION_FREE:
 		err = DID_TRANSPORT_FAILFAST << 16;
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index c9184f7..68a8d87 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -680,7 +680,7 @@ fc_remote_port_chkready(struct fc_rport *rport)
 		if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
 			result = 0;
 		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
-			result = DID_TRANSPORT_DISRUPTED << 16;
+			result = DID_IMM_RETRY << 16;
 		else
 			result = DID_NO_CONNECT << 16;
 		break;
@@ -688,7 +688,7 @@ fc_remote_port_chkready(struct fc_rport *rport)
 		if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)
 			result = DID_TRANSPORT_FAILFAST << 16;
 		else
-			result = DID_TRANSPORT_DISRUPTED << 16;
+			result = DID_IMM_RETRY << 16;
 		break;
 	default:
 		result = DID_NO_CONNECT << 16;
-- 
cgit v0.10.2


From 5e43754fd949193252ecb470d7fb08b547a1e310 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 30 Apr 2009 19:13:41 -0700
Subject: [SCSI] ses: fix problems caused by empty SES provided name

We use the name provided by SES to name objects.  An empty name is
legal in SES but causes problems in our generic device hierarchy.  Fix
this by falling back to a number if the name is either NULL or empty.

Also fix a secondary bug spotted in that dev_set_name(dev, name) uses
a string format and so would go wrong if name contained a '%'.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 3cf61ec..348443b 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -119,7 +119,7 @@ enclosure_register(struct device *dev, const char *name, int components,
 	edev->edev.class = &enclosure_class;
 	edev->edev.parent = get_device(dev);
 	edev->cb = cb;
-	dev_set_name(&edev->edev, name);
+	dev_set_name(&edev->edev, "%s", name);
 	err = device_register(&edev->edev);
 	if (err)
 		goto err;
@@ -255,8 +255,8 @@ enclosure_component_register(struct enclosure_device *edev,
 	ecomp->number = number;
 	cdev = &ecomp->cdev;
 	cdev->parent = get_device(&edev->edev);
-	if (name)
-		dev_set_name(cdev, name);
+	if (name && name[0])
+		dev_set_name(cdev, "%s", name);
 	else
 		dev_set_name(cdev, "%u", number);
 
-- 
cgit v0.10.2


From 8454e9888cb0316dd296fd5d47c612248ed5e1d1 Mon Sep 17 00:00:00 2001
From: adam radford <aradford@gmail.com>
Date: Tue, 5 May 2009 11:45:37 -0700
Subject: [SCSI] 3w-9xxx: scsi_dma_unmap fix

This patch fixes the following regression the occurred during the
scsi_dma_map()/unmap() changes:

3w-9xxx 0001:45:00.0: DMA-API: device driver tries to free DMA memory
it has not allocated [device address=0x0000000000000000] [size=36
bytes]

Signed-off-by: Adam Radford <aradford@gmail.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 8b7983a..36c21b1 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1978,7 +1978,8 @@ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id)
 {
 	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
 
-	scsi_dma_unmap(cmd);
+	if (cmd->SCp.phase == TW_PHASE_SGLIST)
+		scsi_dma_unmap(cmd);
 } /* End twa_unmap_scsi_data() */
 
 /* scsi_host_template initializer */
-- 
cgit v0.10.2


From 7b14f58ad65f9d74e4273fb45360cfea824495aa Mon Sep 17 00:00:00 2001
From: adam radford <aradford@gmail.com>
Date: Mon, 11 May 2009 14:55:55 -0700
Subject: [SCSI] 3w-xxxx: scsi_dma_unmap fix

This patch fixes the following regression that occurred during the
scsi_dma_map()/unmap()
changes when compiling with CONFIG_DMA_API_DEBUG=y :

WARNING: at lib/dma-debug.c:496 check_unmap+0x142/0x542()
Hardware name:
3w-xxxx 0000:02:02.0: DMA-API: device driver tries to free DMA memory
it has not allocated [device address=0x0000000000000000] [size=36
bytes]

Signed-off-by: Adam Radford <aradford@gmail.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index c03f1d2..faa0fcf 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -6,7 +6,7 @@
    		     Arnaldo Carvalho de Melo <acme@conectiva.com.br>
                      Brad Strand <linux@3ware.com>
 
-   Copyright (C) 1999-2007 3ware Inc.
+   Copyright (C) 1999-2009 3ware Inc.
 
    Kernel compatiblity By: 	Andre Hedrick <andre@suse.com>
    Non-Copyright (C) 2000	Andre Hedrick <andre@suse.com>
@@ -1294,7 +1294,8 @@ static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
 {
 	dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n");
 
-	scsi_dma_unmap(cmd);
+	if (cmd->SCp.phase == TW_PHASE_SGLIST)
+		scsi_dma_unmap(cmd);
 } /* End tw_unmap_scsi_data() */
 
 /* This function will reset a device extension */
diff --git a/drivers/scsi/3w-xxxx.h b/drivers/scsi/3w-xxxx.h
index 8e71e5e..a5a2ba2 100644
--- a/drivers/scsi/3w-xxxx.h
+++ b/drivers/scsi/3w-xxxx.h
@@ -6,7 +6,7 @@
    		     Arnaldo Carvalho de Melo <acme@conectiva.com.br>
                      Brad Strand <linux@3ware.com>
 
-   Copyright (C) 1999-2007 3ware Inc.
+   Copyright (C) 1999-2009 3ware Inc.
 
    Kernel compatiblity By:	Andre Hedrick <andre@suse.com>
    Non-Copyright (C) 2000	Andre Hedrick <andre@suse.com>
-- 
cgit v0.10.2


From 748285ccf7ea76d3d76d0d5f2945ad6fb91f5329 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Fri, 15 May 2009 10:50:57 -0500
Subject: dlm: use more NOFS allocation

Change some GFP_KERNEL allocations to use either GFP_NOFS or
ls_allocation (when available) which the fs sets to GFP_NOFS.
The point is to prevent allocations from going back into the
cluster fs in places where that might lead to deadlock.

Signed-off-by: David Teigland <teigland@redhat.com>

diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 858fba1..c4dfa1d 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
 	spin_unlock(&ls->ls_recover_list_lock);
 
 	if (!found)
-		de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
+		de = kzalloc(sizeof(struct dlm_direntry) + len,
+			     ls->ls_allocation);
 	return de;
 }
 
@@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
 
 	dlm_dir_clear(ls);
 
-	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
+	last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation);
 	if (!last_name)
 		goto out;
 
@@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
 	if (namelen > DLM_RESNAME_MAXLEN)
 		return -EINVAL;
 
-	de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
+	de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation);
 	if (!de)
 		return -ENOMEM;
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 2559a97..cdb580a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -500,7 +500,7 @@ static void process_sctp_notification(struct connection *con,
 				return;
 			}
 
-			new_con = nodeid2con(nodeid, GFP_KERNEL);
+			new_con = nodeid2con(nodeid, GFP_NOFS);
 			if (!new_con)
 				return;
 
@@ -736,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con)
 	 *  the same time and the connections cross on the wire.
 	 *  In this case we store the incoming one in "othercon"
 	 */
-	newcon = nodeid2con(nodeid, GFP_KERNEL);
+	newcon = nodeid2con(nodeid, GFP_NOFS);
 	if (!newcon) {
 		result = -ENOMEM;
 		goto accept_err;
@@ -746,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con)
 		struct connection *othercon = newcon->othercon;
 
 		if (!othercon) {
-			othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
+			othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
 			if (!othercon) {
 				log_print("failed to allocate incoming socket");
 				mutex_unlock(&newcon->sock_mutex);
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 2afb770..b128775 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -48,7 +48,7 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
 	struct dlm_member *memb;
 	int w, error;
 
-	memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+	memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
 	if (!memb)
 		return -ENOMEM;
 
@@ -143,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
 
 	ls->ls_total_weight = total;
 
-	array = kmalloc(sizeof(int) * total, GFP_KERNEL);
+	array = kmalloc(sizeof(int) * total, ls->ls_allocation);
 	if (!array)
 		return;
 
@@ -226,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 			continue;
 		log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
 
-		memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+		memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
 		if (!memb)
 			return -ENOMEM;
 		memb->nodeid = rv->new[i];
@@ -341,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
 	int *ids = NULL, *new = NULL;
 	int error, ids_count = 0, new_count = 0;
 
-	rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
+	rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation);
 	if (!rv)
 		return -ENOMEM;
 
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index daa4183..7a2307c 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
 	struct rq_entry *e;
 	int length = ms->m_header.h_length - sizeof(struct dlm_message);
 
-	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
+	e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory len %d", length);
 		return;
-- 
cgit v0.10.2


From 28e43a519b9edb8277fc6b490ad17aa38c45a02b Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 15 May 2009 10:16:45 -0700
Subject: RDMA/nes: Fix off-by-one bugs in reset_adapter_ne020() and
 init_serdes()

With a postfix increment, i is incremented one past 10K/5K before the
loop ends, so the error messages will be displayed too soon if the
test succeeds on the last iteration.  Fix the comparisons to be >
instead of >=.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index b832a7b..4a84d02 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -667,7 +667,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
 		i = 0;
 		while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
 			mdelay(1);
-		if (i >= 10000) {
+		if (i > 10000) {
 			nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
 			return 0;
 		}
@@ -675,7 +675,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
 		i = 0;
 		while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
 			mdelay(1);
-		if (i >= 10000) {
+		if (i > 10000) {
 			printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
 			       nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
 			return 0;
@@ -701,7 +701,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
 	i = 0;
 	while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
 		mdelay(1);
-	if (i >= 10000) {
+	if (i > 10000) {
 		nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n");
 		return 0;
 	}
@@ -711,7 +711,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
 	while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
 			& 0x0000000f)) != 0x0000000f) && i++ < 5000)
 		mdelay(1);
-	if (i >= 5000) {
+	if (i > 5000) {
 		nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp);
 		return 0;
 	}
@@ -722,7 +722,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
 		while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
 				& 0x0000000f)) != 0x0000000f) && i++ < 5000)
 			mdelay(1);
-		if (i >= 5000) {
+		if (i > 5000) {
 			nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp);
 			return 0;
 		}
@@ -792,7 +792,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
 		while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
 				& 0x0000000f)) != 0x0000000f) && i++ < 5000)
 			mdelay(1);
-		if (i >= 5000) {
+		if (i > 5000) {
 			nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp);
 			return 1;
 		}
@@ -815,7 +815,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
 			while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
 				& 0x0000000f)) != 0x0000000f) && (i++ < 5000))
 				mdelay(1);
-			if (i >= 5000) {
+			if (i > 5000) {
 				printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp);
 				/* return 1; */
 			}
-- 
cgit v0.10.2


From 88fc86c283d9c3854e67e4155808027bc2519eb6 Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Thu, 14 May 2009 17:23:38 +0900
Subject: tracing: Append prompt in /debug/tracing/README file

append prompt in /debug/tracing/README file.

This is trivial issue. Fix typo Mini Howto file(README) for ftrace.

[ Impact: cleanup ]

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: williams <williams@redhat.com>
LKML-Reference: <1242289418.31161.45.camel@centos51>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a884c09..cda81ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2380,7 +2380,7 @@ static const char readme_msg[] =
 	"# echo print-parent > /debug/tracing/trace_options\n"
 	"# echo 1 > /debug/tracing/tracing_enabled\n"
 	"# cat /debug/tracing/trace > /tmp/trace.txt\n"
-	"echo 0 > /debug/tracing/tracing_enabled\n"
+	"# echo 0 > /debug/tracing/tracing_enabled\n"
 ;
 
 static ssize_t
-- 
cgit v0.10.2


From b4ecc126991b30fe5f9a59dfacda046aeac124b2 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 13 May 2009 17:16:55 -0700
Subject: x86: Fix performance regression caused by paravirt_ops on native
 kernels

Xiaohui Xin and some other folks at Intel have been looking into what's
behind the performance hit of paravirt_ops when running native.

It appears that the hit is entirely due to the paravirtualized
spinlocks introduced by:

 | commit 8efcbab674de2bee45a2e4cdf97de16b8e609ac8
 | Date:   Mon Jul 7 12:07:51 2008 -0700
 |
 |     paravirt: introduce a "lock-byte" spinlock implementation

The extra call/return in the spinlock path is somehow
causing an increase in the cycles/instruction of somewhere around 2-7%
(seems to vary quite a lot from test to test).  The working theory is
that the CPU's pipeline is getting upset about the
call->call->locked-op->return->return, and seems to be failing to
speculate (though I haven't seen anything definitive about the precise
reasons).  This doesn't entirely make sense, because the performance
hit is also visible on unlock and other operations which don't involve
locked instructions.  But spinlock operations clearly swamp all the
other pvops operations, even though I can't imagine that they're
nearly as common (there's only a .05% increase in instructions
executed).

If I disable just the pv-spinlock calls, my tests show that pvops is
identical to non-pvops performance on native (my measurements show that
it is actually about .1% faster, but Xiaohui shows a .05% slowdown).

Summary of results, averaging 10 runs of the "mmperf" test, using a
no-pvops build as baseline:

		nopv		Pv-nospin	Pv-spin
CPU cycles	100.00%		99.89%		102.18%
instructions	100.00%		100.10%		100.15%
CPI		100.00%		99.79%		102.03%
cache ref	100.00%		100.84%		100.28%
cache miss	100.00%		90.47%		88.56%
cache miss rate	100.00%		89.72%		88.31%
branches	100.00%		99.93%		100.04%
branch miss	100.00%		103.66%		107.72%
branch miss rt	100.00%		103.73%		107.67%
wallclock	100.00%		99.90%		102.20%

The clear effect here is that the 2% increase in CPI is
directly reflected in the final wallclock time.

(The other interesting effect is that the more ops are
out of line calls via pvops, the lower the cache access
and miss rates.  Not too surprising, but it suggests that
the non-pvops kernel is over-inlined.  On the flipside,
the branch misses go up correspondingly...)

So, what's the fix?

Paravirt patching turns all the pvops calls into direct calls, so
_spin_lock etc do end up having direct calls.  For example, the compiler
generated code for paravirtualized _spin_lock is:

<_spin_lock+0>:		mov    %gs:0xb4c8,%rax
<_spin_lock+9>:		incl   0xffffffffffffe044(%rax)
<_spin_lock+15>:	callq  *0xffffffff805a5b30
<_spin_lock+22>:	retq

The indirect call will get patched to:
<_spin_lock+0>:		mov    %gs:0xb4c8,%rax
<_spin_lock+9>:		incl   0xffffffffffffe044(%rax)
<_spin_lock+15>:	callq <__ticket_spin_lock>
<_spin_lock+20>:	nop; nop		/* or whatever 2-byte nop */
<_spin_lock+22>:	retq

One possibility is to inline _spin_lock, etc, when building an
optimised kernel (ie, when there's no spinlock/preempt
instrumentation/debugging enabled).  That will remove the outer
call/return pair, returning the instruction stream to a single
call/return, which will presumably execute the same as the non-pvops
case.  The downsides arel 1) it will replicate the
preempt_disable/enable code at eack lock/unlock callsite; this code is
fairly small, but not nothing; and 2) the spinlock definitions are
already a very heavily tangled mass of #ifdefs and other preprocessor
magic, and making any changes will be non-trivial.

The other obvious answer is to disable pv-spinlocks.  Making them a
separate config option is fairly easy, and it would be trivial to
enable them only when Xen is enabled (as the only non-default user).
But it doesn't really address the common case of a distro build which
is going to have Xen support enabled, and leaves the open question of
whether the native performance cost of pv-spinlocks is worth the
performance improvement on a loaded Xen system (10% saving of overall
system CPU when guests block rather than spin).  Still it is a
reasonable short-term workaround.

[ Impact: fix pvops performance regression when running native ]

Analysed-by: "Xin Xiaohui" <xiaohui.xin@intel.com>
Analysed-by: "Li Xin" <xin.li@intel.com>
Analysed-by: "Nakajima Jun" <jun.nakajima@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Xen-devel <xen-devel@lists.xensource.com>
LKML-Reference: <4A0B62F7.5030802@goop.org>
[ fixed the help text ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index df9e885..a6efe0a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -498,6 +498,19 @@ config PARAVIRT
 	  over full virtualization.  However, when run without a hypervisor
 	  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_SPINLOCKS
+	bool "Paravirtualization layer for spinlocks"
+	depends on PARAVIRT && SMP && EXPERIMENTAL
+	---help---
+	  Paravirtualized spinlocks allow a pvops backend to replace the
+	  spinlock implementation with something virtualization-friendly
+	  (for example, block the virtual CPU rather than spinning).
+
+	  Unfortunately the downside is an up to 5% performance hit on
+	  native kernels, with various workloads.
+
+	  If you are unsure how to answer this question, answer N.
+
 config PARAVIRT_CLOCK
 	bool
 	default n
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 378e369..a53da00 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1443,7 +1443,7 @@ u64 _paravirt_ident_64(u64);
 
 #define paravirt_nop	((void *)_paravirt_nop)
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
 static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
 {
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e5e6caf..b7e5db8 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,7 +172,7 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
 	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
 }
 
-#ifndef CONFIG_PARAVIRT
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
 
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
@@ -206,7 +206,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
 	__raw_spin_lock(lock);
 }
 
-#endif
+#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 145cce7..88d1bfc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -89,7 +89,8 @@ obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f44..9faf43b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -134,7 +134,9 @@ static void *get_call_destination(u8 type)
 		.pv_irq_ops = pv_irq_ops,
 		.pv_apic_ops = pv_apic_ops,
 		.pv_mmu_ops = pv_mmu_ops,
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
 		.pv_lock_ops = pv_lock_ops,
+#endif
 	};
 	return *((void **)&tmpl + type);
 }
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3b767d0..172438f 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -9,5 +9,6 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			time.o xen-asm.o xen-asm_$(BITS).o \
 			grant-table.o suspend.o
 
-obj-$(CONFIG_SMP)		+= smp.o spinlock.o
-obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
\ No newline at end of file
+obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2013946..ca6596b 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -62,15 +62,26 @@ void xen_setup_vcpu_info_placement(void);
 #ifdef CONFIG_SMP
 void xen_smp_init(void);
 
-void __init xen_init_spinlocks(void);
-__cpuinit void xen_init_lock_cpu(int cpu);
-void xen_uninit_lock_cpu(int cpu);
-
 extern cpumask_var_t xen_cpu_initialized_map;
 #else
 static inline void xen_smp_init(void) {}
 #endif
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init xen_init_spinlocks(void);
+__cpuinit void xen_init_lock_cpu(int cpu);
+void xen_uninit_lock_cpu(int cpu);
+#else
+static inline void xen_init_spinlocks(void)
+{
+}
+static inline void xen_init_lock_cpu(int cpu)
+{
+}
+static inline void xen_uninit_lock_cpu(int cpu)
+{
+}
+#endif
 
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
-- 
cgit v0.10.2


From a2bd40d215b0997e21a0cfd06c1aa474ec3a0944 Mon Sep 17 00:00:00 2001
From: Hartley Sweeten <hartleys@visionengravers.com>
Date: Mon, 4 May 2009 19:09:25 +0100
Subject: [ARM] 5504/1: ep93xx: Merge all edb93xx platforms

The Cirrus Logic EDB93xx development board platform init files
share redundant code. The only differences are in the flash
memory configuration, MACH_TYPE, and additional on-board
I2C devices. This patch merges all of them into one file.

Cc: Lennert Buytenhek <buytenh@wantstofly.org>
Cc: Herbert Valerio Riedel <hvr@gnu.org>
Cc: Toufeeq Hussain <toufeeq_hussain@infosys.com>
Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: Ryan Mallon <ryan@bluewatersys.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/Kconfig b/arch/arm/mach-ep93xx/Kconfig
index 56bddce..9414eb6 100644
--- a/arch/arm/mach-ep93xx/Kconfig
+++ b/arch/arm/mach-ep93xx/Kconfig
@@ -15,44 +15,62 @@ config MACH_ADSSPHERE
 	  Say 'Y' here if you want your kernel to support the ADS
 	  Sphere board.
 
+config MACH_EDB93XX
+	bool
+	default n
+
+config MACH_EDB9301
+	bool "Support Cirrus Logic EDB9301"
+	select MACH_EDB93XX
+	help
+	  Say 'Y' here if you want your kernel to support the Cirrus
+	  Logic EDB9301 Evaluation Board.
+
 config MACH_EDB9302
 	bool "Support Cirrus Logic EDB9302"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9302 Evaluation Board.
 
 config MACH_EDB9302A
 	bool "Support Cirrus Logic EDB9302A"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9302A Evaluation Board.
 
 config MACH_EDB9307
 	bool "Support Cirrus Logic EDB9307"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9307 Evaluation Board.
 
 config MACH_EDB9307A
 	bool "Support Cirrus Logic EDB9307A"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9307A Evaluation Board.
 
 config MACH_EDB9312
 	bool "Support Cirrus Logic EDB9312"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9312 Evaluation Board.
 
 config MACH_EDB9315
 	bool "Support Cirrus Logic EDB9315"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9315 Evaluation Board.
 
 config MACH_EDB9315A
 	bool "Support Cirrus Logic EDB9315A"
+	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9315A Evaluation Board.
@@ -64,29 +82,29 @@ config MACH_GESBC9312
 	  GESBC-9312-sx board.
 
 config MACH_MICRO9
-        bool
-        default n
+	bool
+	default n
 
 config MACH_MICRO9H
-       bool "Support Contec Hypercontrol Micro9-H"
-       select MACH_MICRO9
-       help
-         Say 'Y' here if you want your kernel to support the
-         Contec Hypercontrol Micro9-H board.
+	bool "Support Contec Hypercontrol Micro9-H"
+	select MACH_MICRO9
+	help
+	  Say 'Y' here if you want your kernel to support the
+	  Contec Hypercontrol Micro9-H board.
 
 config MACH_MICRO9M
-       bool "Support Contec Hypercontrol Micro9-M"
-       select MACH_MICRO9
-       help
-         Say 'Y' here if you want your kernel to support the
-         Contec Hypercontrol Micro9-M board.
+	bool "Support Contec Hypercontrol Micro9-M"
+	select MACH_MICRO9
+	help
+	  Say 'Y' here if you want your kernel to support the
+	  Contec Hypercontrol Micro9-M board.
 
 config MACH_MICRO9L
-       bool "Support Contec Hypercontrol Micro9-L"
-       select MACH_MICRO9
-       help
-         Say 'Y' here if you want your kernel to support the
-         Contec Hypercontrol Micro9-L board.
+	bool "Support Contec Hypercontrol Micro9-L"
+	select MACH_MICRO9
+	help
+	  Say 'Y' here if you want your kernel to support the
+	  Contec Hypercontrol Micro9-L board.
 
 config MACH_TS72XX
 	bool "Support Technologic Systems TS-72xx SBC"
diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile
index 9522e20..eae6199 100644
--- a/arch/arm/mach-ep93xx/Makefile
+++ b/arch/arm/mach-ep93xx/Makefile
@@ -7,13 +7,7 @@ obj-n			:=
 obj-			:=
 
 obj-$(CONFIG_MACH_ADSSPHERE)	+= adssphere.o
-obj-$(CONFIG_MACH_EDB9302)	+= edb9302.o
-obj-$(CONFIG_MACH_EDB9302A)	+= edb9302a.o
-obj-$(CONFIG_MACH_EDB9307)	+= edb9307.o
-obj-$(CONFIG_MACH_EDB9307A)	+= edb9307a.o
-obj-$(CONFIG_MACH_EDB9312)	+= edb9312.o
-obj-$(CONFIG_MACH_EDB9315)	+= edb9315.o
-obj-$(CONFIG_MACH_EDB9315A)	+= edb9315a.o
+obj-$(CONFIG_MACH_EDB93XX)	+= edb93xx.o
 obj-$(CONFIG_MACH_GESBC9312)	+= gesbc9312.o
 obj-$(CONFIG_MACH_MICRO9)	+= micro9.o
 obj-$(CONFIG_MACH_TS72XX)	+= ts72xx.o
diff --git a/arch/arm/mach-ep93xx/edb9302.c b/arch/arm/mach-ep93xx/edb9302.c
deleted file mode 100644
index 8bf8d7c..0000000
--- a/arch/arm/mach-ep93xx/edb9302.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9302.c
- * Cirrus Logic EDB9302 support.
- *
- * Copyright (C) 2006 George Kashperko <george@chas.com.ua>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9302_flash_data = {
-	.width		= 2,
-};
-
-static struct resource edb9302_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_16M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9302_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9302_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9302_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9302_eth_data = {
-	.phy_id		= 1,
-};
-
-static void __init edb9302_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9302_flash);
-
-	ep93xx_register_eth(&edb9302_eth_data, 1);
-}
-
-MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board")
-	/* Maintainer: George Kashperko <george@chas.com.ua> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9302_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb9302a.c b/arch/arm/mach-ep93xx/edb9302a.c
deleted file mode 100644
index a352c57..0000000
--- a/arch/arm/mach-ep93xx/edb9302a.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9302a.c
- * Cirrus Logic EDB9302A support.
- *
- * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9302a_flash_data = {
-	.width		= 2,
-};
-
-static struct resource edb9302a_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_16M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9302a_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9302a_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9302a_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9302a_eth_data = {
-	.phy_id		= 1,
-};
-
-static void __init edb9302a_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9302a_flash);
-
-	ep93xx_register_eth(&edb9302a_eth_data, 1);
-}
-
-MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board")
-	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9302a_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb9307.c b/arch/arm/mach-ep93xx/edb9307.c
deleted file mode 100644
index 5ab22f6..0000000
--- a/arch/arm/mach-ep93xx/edb9307.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9307.c
- * Cirrus Logic EDB9307 support.
- *
- * Copyright (C) 2007 Herbert Valerio Riedel <hvr@gnu.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9307_flash_data = {
-	.width		= 4,
-};
-
-static struct resource edb9307_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_32M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9307_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9307_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9307_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9307_eth_data = {
-	.phy_id		= 1,
-};
-
-static void __init edb9307_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9307_flash);
-
-	ep93xx_register_eth(&edb9307_eth_data, 1);
-}
-
-MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board")
-	/* Maintainer: Herbert Valerio Riedel <hvr@gnu.org> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9307_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb9307a.c b/arch/arm/mach-ep93xx/edb9307a.c
deleted file mode 100644
index 6171167..0000000
--- a/arch/arm/mach-ep93xx/edb9307a.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9307a.c
- * Cirrus Logic EDB9307A support.
- *
- * Copyright (C) 2008 H Hartley Sweeten <hsweeten@visionengravers.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9307a_flash_data = {
-	.width		= 2,
-};
-
-static struct resource edb9307a_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_16M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9307a_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9307a_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9307a_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9307a_eth_data = {
-	.phy_id		= 1,
-};
-
-static struct i2c_board_info __initdata edb9307a_i2c_data[] = {
-	{
-		/* On-board battery backed RTC */
-		I2C_BOARD_INFO("isl1208", 0x6f),
-	},
-	/*
-	 * The I2C signals are also routed to the Expansion Connector (J4)
-	 */
-};
-
-static void __init edb9307a_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9307a_flash);
-
-	ep93xx_register_eth(&edb9307a_eth_data, 1);
-
-	ep93xx_init_i2c(edb9307a_i2c_data, ARRAY_SIZE(edb9307a_i2c_data));
-}
-
-MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board")
-	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9307a_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb9312.c b/arch/arm/mach-ep93xx/edb9312.c
deleted file mode 100644
index d7179f6..0000000
--- a/arch/arm/mach-ep93xx/edb9312.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9312.c
- * Cirrus Logic EDB9312 support.
- *
- * Copyright (C) 2006 Infosys Technologies Limited
- * 	Toufeeq Hussain	<toufeeq_hussain@infosys.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9312_flash_data = {
-	.width		= 4,
-};
-
-static struct resource edb9312_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_32M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9312_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9312_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9312_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9312_eth_data = {
-	.phy_id		= 1,
-};
-
-static void __init edb9312_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9312_flash);
-
-	ep93xx_register_eth(&edb9312_eth_data, 1);
-}
-
-MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board")
-	/* Maintainer: Toufeeq Hussain <toufeeq_hussain@infosys.com> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9312_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb9315.c b/arch/arm/mach-ep93xx/edb9315.c
deleted file mode 100644
index 025af6e..0000000
--- a/arch/arm/mach-ep93xx/edb9315.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9315.c
- * Cirrus Logic EDB9315 support.
- *
- * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9315_flash_data = {
-	.width		= 4,
-};
-
-static struct resource edb9315_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_32M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9315_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9315_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9315_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9315_eth_data = {
-	.phy_id		= 1,
-};
-
-static void __init edb9315_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9315_flash);
-
-	ep93xx_register_eth(&edb9315_eth_data, 1);
-}
-
-MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board")
-	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9315_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb9315a.c b/arch/arm/mach-ep93xx/edb9315a.c
deleted file mode 100644
index 4c9cc8a..0000000
--- a/arch/arm/mach-ep93xx/edb9315a.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * arch/arm/mach-ep93xx/edb9315a.c
- * Cirrus Logic EDB9315A support.
- *
- * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/i2c.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-static struct physmap_flash_data edb9315a_flash_data = {
-	.width		= 2,
-};
-
-static struct resource edb9315a_flash_resource = {
-	.start		= EP93XX_CS6_PHYS_BASE,
-	.end		= EP93XX_CS6_PHYS_BASE + SZ_16M - 1,
-	.flags		= IORESOURCE_MEM,
-};
-
-static struct platform_device edb9315a_flash = {
-	.name		= "physmap-flash",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &edb9315a_flash_data,
-	},
-	.num_resources	= 1,
-	.resource	= &edb9315a_flash_resource,
-};
-
-static struct ep93xx_eth_data edb9315a_eth_data = {
-	.phy_id		= 1,
-};
-
-static void __init edb9315a_init_machine(void)
-{
-	ep93xx_init_devices();
-	platform_device_register(&edb9315a_flash);
-
-	ep93xx_register_eth(&edb9315a_eth_data, 1);
-}
-
-MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board")
-	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.phys_io	= EP93XX_APB_PHYS_BASE,
-	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
-	.map_io		= ep93xx_map_io,
-	.init_irq	= ep93xx_init_irq,
-	.timer		= &ep93xx_timer,
-	.init_machine	= edb9315a_init_machine,
-MACHINE_END
diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c
new file mode 100644
index 0000000..e9e45b9
--- /dev/null
+++ b/arch/arm/mach-ep93xx/edb93xx.c
@@ -0,0 +1,217 @@
+/*
+ * arch/arm/mach-ep93xx/edb93xx.c
+ * Cirrus Logic EDB93xx Development Board support.
+ *
+ * EDB93XX, EDB9301, EDB9307A
+ * Copyright (C) 2008-2009 H Hartley Sweeten <hsweeten@visionengravers.com>
+ *
+ * EDB9302
+ * Copyright (C) 2006 George Kashperko <george@chas.com.ua>
+ *
+ * EDB9302A, EDB9315, EDB9315A
+ * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
+ *
+ * EDB9307
+ * Copyright (C) 2007 Herbert Valerio Riedel <hvr@gnu.org>
+ *
+ * EDB9312
+ * Copyright (C) 2006 Infosys Technologies Limited
+ *                    Toufeeq Hussain <toufeeq_hussain@infosys.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mtd/physmap.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+static struct physmap_flash_data edb93xx_flash_data;
+
+static struct resource edb93xx_flash_resource = {
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device edb93xx_flash = {
+	.name		= "physmap-flash",
+	.id		= 0,
+	.dev		= {
+		.platform_data	= &edb93xx_flash_data,
+	},
+	.num_resources	= 1,
+	.resource	= &edb93xx_flash_resource,
+};
+
+static void __init __edb93xx_register_flash(unsigned int width,
+			resource_size_t start, resource_size_t size)
+{
+	edb93xx_flash_data.width	= width;
+	edb93xx_flash_resource.start	= start;
+	edb93xx_flash_resource.end	= start + size - 1;
+
+	platform_device_register(&edb93xx_flash);
+}
+
+static void __init edb93xx_register_flash(void)
+{
+	if (machine_is_edb9307() || machine_is_edb9312() ||
+	    machine_is_edb9315()) {
+		__edb93xx_register_flash(4, EP93XX_CS6_PHYS_BASE, SZ_32M);
+	} else {
+		__edb93xx_register_flash(2, EP93XX_CS6_PHYS_BASE, SZ_16M);
+	}
+}
+
+static struct ep93xx_eth_data edb93xx_eth_data = {
+	.phy_id		= 1,
+};
+
+static struct i2c_board_info __initdata edb93xxa_i2c_data[] = {
+	{
+		I2C_BOARD_INFO("isl1208", 0x6f),
+	},
+};
+
+static struct i2c_board_info __initdata edb93xx_i2c_data[] = {
+	{
+		I2C_BOARD_INFO("ds1337", 0x68),
+	},
+};
+
+static void __init edb93xx_register_i2c(void)
+{
+	if (machine_is_edb9302a() || machine_is_edb9307a() ||
+	    machine_is_edb9315a()) {
+		ep93xx_register_i2c(edb93xxa_i2c_data,
+				ARRAY_SIZE(edb93xxa_i2c_data));
+	} else if (machine_is_edb9307() || machine_is_edb9312() ||
+		   machine_is_edb9315()) {
+		ep93xx_register_i2c(edb93xx_i2c_data,
+				ARRAY_SIZE(edb93xx_i2c_data));
+	}
+}
+
+static void __init edb93xx_init_machine(void)
+{
+	ep93xx_init_devices();
+	edb93xx_register_flash();
+	ep93xx_register_eth(&edb93xx_eth_data, 1);
+	edb93xx_register_i2c();
+}
+
+
+#ifdef CONFIG_MACH_EDB9301
+MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board")
+	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9302
+MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board")
+	/* Maintainer: George Kashperko <george@chas.com.ua> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9302A
+MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board")
+	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9307
+MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board")
+	/* Maintainer: Herbert Valerio Riedel <hvr@gnu.org> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9307A
+MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board")
+	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9312
+MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board")
+	/* Maintainer: Toufeeq Hussain <toufeeq_hussain@infosys.com> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9315
+MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board")
+	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
+
+#ifdef CONFIG_MACH_EDB9315A
+MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board")
+	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
+	.phys_io	= EP93XX_APB_PHYS_BASE,
+	.io_pg_offst	= ((EP93XX_APB_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.map_io		= ep93xx_map_io,
+	.init_irq	= ep93xx_init_irq,
+	.timer		= &ep93xx_timer,
+	.init_machine	= edb93xx_init_machine,
+MACHINE_END
+#endif
-- 
cgit v0.10.2


From 4484079d517c2b6521621be0b1ea246ccc55c7d7 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 15 May 2009 23:30:50 +0200
Subject: PM: check sysdev_suspend(PMSG_FREEZE) return value

Check the return value of sysdev_suspend().  I think this was a typo.
Without this change, the following "if" check is always false.
I also changed the error message so it's distinguishable from the
similar message a few lines above.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e71ca9c..b0dc9e7 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -241,9 +241,9 @@ static int create_image(int platform_mode)
 
 	local_irq_disable();
 
-	sysdev_suspend(PMSG_FREEZE);
+	error = sysdev_suspend(PMSG_FREEZE);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
+		printk(KERN_ERR "PM: Some system devices failed to power down, "
 			"aborting hibernation\n");
 		goto Enable_irqs;
 	}
-- 
cgit v0.10.2


From 812a2cca295ee7f56cd1b988a0f93646285c214a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sat, 16 May 2009 10:00:49 +0200
Subject: ALSA: hda - Split codec->name to vendor and chip name strings

Split the name string in hda_codec struct to vendor_name and chip_name
strings to be stored directly from the preset name.
Since mostly only the chip name is referred in many patch_*.c, this
results in the reduction of many codes in the end.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index b91f6ed..77385de 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -623,7 +623,10 @@ static int get_codec_name(struct hda_codec *codec)
 	const struct hda_vendor_id *c;
 	const char *vendor = NULL;
 	u16 vendor_id = codec->vendor_id >> 16;
-	char tmp[16], name[32];
+	char tmp[16];
+
+	if (codec->vendor_name)
+		goto get_chip_name;
 
 	for (c = hda_vendor_ids; c->id; c++) {
 		if (c->id == vendor_id) {
@@ -635,14 +638,21 @@ static int get_codec_name(struct hda_codec *codec)
 		sprintf(tmp, "Generic %04x", vendor_id);
 		vendor = tmp;
 	}
+	codec->vendor_name = kstrdup(vendor, GFP_KERNEL);
+	if (!codec->vendor_name)
+		return -ENOMEM;
+
+ get_chip_name:
+	if (codec->chip_name)
+		return 0;
+
 	if (codec->preset && codec->preset->name)
-		snprintf(name, sizeof(name), "%s %s", vendor,
-			 codec->preset->name);
-	else
-		snprintf(name, sizeof(name), "%s ID %x", vendor,
-			 codec->vendor_id & 0xffff);
-	codec->name = kstrdup(name, GFP_KERNEL);
-	if (!codec->name)
+		codec->chip_name = kstrdup(codec->preset->name, GFP_KERNEL);
+	else {
+		sprintf(tmp, "ID %x", codec->vendor_id & 0xffff);
+		codec->chip_name = kstrdup(tmp, GFP_KERNEL);
+	}
+	if (!codec->chip_name)
 		return -ENOMEM;
 	return 0;
 }
@@ -848,7 +858,8 @@ static void snd_hda_codec_free(struct hda_codec *codec)
 	module_put(codec->owner);
 	free_hda_cache(&codec->amp_cache);
 	free_hda_cache(&codec->cmd_cache);
-	kfree(codec->name);
+	kfree(codec->vendor_name);
+	kfree(codec->chip_name);
 	kfree(codec->modelname);
 	kfree(codec->wcaps);
 	kfree(codec);
@@ -989,15 +1000,16 @@ int snd_hda_codec_configure(struct hda_codec *codec)
 	int err;
 
 	codec->preset = find_codec_preset(codec);
-	if (!codec->name) {
+	if (!codec->vendor_name || !codec->chip_name) {
 		err = get_codec_name(codec);
 		if (err < 0)
 			return err;
 	}
 	/* audio codec should override the mixer name */
 	if (codec->afg || !*codec->bus->card->mixername)
-		strlcpy(codec->bus->card->mixername, codec->name,
-			sizeof(codec->bus->card->mixername));
+		snprintf(codec->bus->card->mixername,
+			 sizeof(codec->bus->card->mixername),
+			 "%s %s", codec->vendor_name, codec->chip_name);
 
 	if (is_generic_config(codec)) {
 		err = snd_hda_parse_generic_codec(codec);
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index cd8979c..c5bd40f 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -748,7 +748,8 @@ struct hda_codec {
 	/* detected preset */
 	const struct hda_codec_preset *preset;
 	struct module *owner;
-	const char *name;	/* codec name */
+	const char *vendor_name;	/* codec vendor name */
+	const char *chip_name;		/* codec chip name */
 	const char *modelname;	/* model name for preset */
 
 	/* set by patch */
diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c
index 1c57505..6812fbe 100644
--- a/sound/pci/hda/hda_hwdep.c
+++ b/sound/pci/hda/hda_hwdep.c
@@ -242,7 +242,8 @@ CODEC_INFO_SHOW(subsystem_id);
 CODEC_INFO_SHOW(revision_id);
 CODEC_INFO_SHOW(afg);
 CODEC_INFO_SHOW(mfg);
-CODEC_INFO_STR_SHOW(name);
+CODEC_INFO_STR_SHOW(vendor_name);
+CODEC_INFO_STR_SHOW(chip_name);
 CODEC_INFO_STR_SHOW(modelname);
 
 #define CODEC_INFO_STORE(type)					\
@@ -275,7 +276,8 @@ static ssize_t type##_store(struct device *dev,			\
 CODEC_INFO_STORE(vendor_id);
 CODEC_INFO_STORE(subsystem_id);
 CODEC_INFO_STORE(revision_id);
-CODEC_INFO_STR_STORE(name);
+CODEC_INFO_STR_STORE(vendor_name);
+CODEC_INFO_STR_STORE(chip_name);
 CODEC_INFO_STR_STORE(modelname);
 
 #define CODEC_ACTION_STORE(type)				\
@@ -499,7 +501,8 @@ static struct device_attribute codec_attrs[] = {
 	CODEC_ATTR_RW(revision_id),
 	CODEC_ATTR_RO(afg),
 	CODEC_ATTR_RO(mfg),
-	CODEC_ATTR_RW(name),
+	CODEC_ATTR_RW(vendor_name),
+	CODEC_ATTR_RW(chip_name),
 	CODEC_ATTR_RW(modelname),
 	CODEC_ATTR_RW(init_verbs),
 	CODEC_ATTR_RW(hints),
diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c
index 93d7499..418c5d1 100644
--- a/sound/pci/hda/hda_proc.c
+++ b/sound/pci/hda/hda_proc.c
@@ -466,8 +466,12 @@ static void print_codec_info(struct snd_info_entry *entry,
 	hda_nid_t nid;
 	int i, nodes;
 
-	snd_iprintf(buffer, "Codec: %s\n",
-		    codec->name ? codec->name : "Not Set");
+	snd_iprintf(buffer, "Codec: ");
+	if (codec->vendor_name && codec->chip_name)
+		snd_iprintf(buffer, "%s %s\n",
+			    codec->vendor_name, codec->chip_name);
+	else
+		snd_iprintf(buffer, "Not Set\n");
 	snd_iprintf(buffer, "Address: %d\n", codec->addr);
 	snd_iprintf(buffer, "Function Id: 0x%x\n", codec->function_id);
 	snd_iprintf(buffer, "Vendor Id: 0x%08x\n", codec->vendor_id);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1bf9d05..4bc3e7e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -277,13 +277,13 @@ struct alc_spec {
 						 */
 	unsigned int num_init_verbs;
 
-	char *stream_name_analog;	/* analog PCM stream */
+	char stream_name_analog[16];	/* analog PCM stream */
 	struct hda_pcm_stream *stream_analog_playback;
 	struct hda_pcm_stream *stream_analog_capture;
 	struct hda_pcm_stream *stream_analog_alt_playback;
 	struct hda_pcm_stream *stream_analog_alt_capture;
 
-	char *stream_name_digital;	/* digital PCM stream */
+	char stream_name_digital[16];	/* digital PCM stream */
 	struct hda_pcm_stream *stream_digital_playback;
 	struct hda_pcm_stream *stream_digital_capture;
 
@@ -3169,7 +3169,10 @@ static int alc_build_pcms(struct hda_codec *codec)
 	if (spec->no_analog)
 		goto skip_analog;
 
+	snprintf(spec->stream_name_analog, sizeof(spec->stream_name_analog),
+		 "%s Analog", codec->chip_name);
 	info->name = spec->stream_name_analog;
+	
 	if (spec->stream_analog_playback) {
 		if (snd_BUG_ON(!spec->multiout.dac_nids))
 			return -EINVAL;
@@ -3195,6 +3198,9 @@ static int alc_build_pcms(struct hda_codec *codec)
  skip_analog:
 	/* SPDIF for stream index #1 */
 	if (spec->multiout.dig_out_nid || spec->dig_in_nid) {
+		snprintf(spec->stream_name_digital,
+			 sizeof(spec->stream_name_digital),
+			 "%s Digital", codec->chip_name);
 		codec->num_pcms = 2;
 	        codec->slave_dig_outs = spec->multiout.slave_dig_outs;
 		info = spec->pcm_rec + 1;
@@ -4432,12 +4438,10 @@ static int patch_alc880(struct hda_codec *codec)
 	if (board_config != ALC880_AUTO)
 		setup_preset(spec, &alc880_presets[board_config]);
 
-	spec->stream_name_analog = "ALC880 Analog";
 	spec->stream_analog_playback = &alc880_pcm_analog_playback;
 	spec->stream_analog_capture = &alc880_pcm_analog_capture;
 	spec->stream_analog_alt_capture = &alc880_pcm_analog_alt_capture;
 
-	spec->stream_name_digital = "ALC880 Digital";
 	spec->stream_digital_playback = &alc880_pcm_digital_playback;
 	spec->stream_digital_capture = &alc880_pcm_digital_capture;
 
@@ -6078,11 +6082,9 @@ static int patch_alc260(struct hda_codec *codec)
 	if (board_config != ALC260_AUTO)
 		setup_preset(spec, &alc260_presets[board_config]);
 
-	spec->stream_name_analog = "ALC260 Analog";
 	spec->stream_analog_playback = &alc260_pcm_analog_playback;
 	spec->stream_analog_capture = &alc260_pcm_analog_capture;
 
-	spec->stream_name_digital = "ALC260 Digital";
 	spec->stream_digital_playback = &alc260_pcm_digital_playback;
 	spec->stream_digital_capture = &alc260_pcm_digital_capture;
 
@@ -7337,14 +7339,6 @@ static int patch_alc882(struct hda_codec *codec)
 	if (board_config != ALC882_AUTO)
 		setup_preset(spec, &alc882_presets[board_config]);
 
-	if (codec->vendor_id == 0x10ec0885) {
-		spec->stream_name_analog = "ALC885 Analog";
-		spec->stream_name_digital = "ALC885 Digital";
-	} else {
-		spec->stream_name_analog = "ALC882 Analog";
-		spec->stream_name_digital = "ALC882 Digital";
-	}
-
 	spec->stream_analog_playback = &alc882_pcm_analog_playback;
 	spec->stream_analog_capture = &alc882_pcm_analog_capture;
 	/* FIXME: setup DAC5 */
@@ -9232,13 +9226,6 @@ static int patch_alc883(struct hda_codec *codec)
 
 	switch (codec->vendor_id) {
 	case 0x10ec0888:
-		if (codec->revision_id == 0x100101) {
-			spec->stream_name_analog = "ALC1200 Analog";
-			spec->stream_name_digital = "ALC1200 Digital";
-		} else {
-			spec->stream_name_analog = "ALC888 Analog";
-			spec->stream_name_digital = "ALC888 Digital";
-		}
 		if (!spec->num_adc_nids) {
 			spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
 			spec->adc_nids = alc883_adc_nids;
@@ -9249,8 +9236,6 @@ static int patch_alc883(struct hda_codec *codec)
 		spec->init_amp = ALC_INIT_DEFAULT; /* always initialize */
 		break;
 	case 0x10ec0889:
-		spec->stream_name_analog = "ALC889 Analog";
-		spec->stream_name_digital = "ALC889 Digital";
 		if (!spec->num_adc_nids) {
 			spec->num_adc_nids = ARRAY_SIZE(alc889_adc_nids);
 			spec->adc_nids = alc889_adc_nids;
@@ -9261,8 +9246,6 @@ static int patch_alc883(struct hda_codec *codec)
 							capture */
 		break;
 	default:
-		spec->stream_name_analog = "ALC883 Analog";
-		spec->stream_name_digital = "ALC883 Digital";
 		if (!spec->num_adc_nids) {
 			spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
 			spec->adc_nids = alc883_adc_nids;
@@ -11087,11 +11070,9 @@ static int patch_alc262(struct hda_codec *codec)
 	if (board_config != ALC262_AUTO)
 		setup_preset(spec, &alc262_presets[board_config]);
 
-	spec->stream_name_analog = "ALC262 Analog";
 	spec->stream_analog_playback = &alc262_pcm_analog_playback;
 	spec->stream_analog_capture = &alc262_pcm_analog_capture;
 
-	spec->stream_name_digital = "ALC262 Digital";
 	spec->stream_digital_playback = &alc262_pcm_digital_playback;
 	spec->stream_digital_capture = &alc262_pcm_digital_capture;
 
@@ -12117,14 +12098,6 @@ static int patch_alc268(struct hda_codec *codec)
 	if (board_config != ALC268_AUTO)
 		setup_preset(spec, &alc268_presets[board_config]);
 
-	if (codec->vendor_id == 0x10ec0267) {
-		spec->stream_name_analog = "ALC267 Analog";
-		spec->stream_name_digital = "ALC267 Digital";
-	} else {
-		spec->stream_name_analog = "ALC268 Analog";
-		spec->stream_name_digital = "ALC268 Digital";
-	}
-
 	spec->stream_analog_playback = &alc268_pcm_analog_playback;
 	spec->stream_analog_capture = &alc268_pcm_analog_capture;
 	spec->stream_analog_alt_capture = &alc268_pcm_analog_alt_capture;
@@ -12979,7 +12952,6 @@ static int patch_alc269(struct hda_codec *codec)
 	if (board_config != ALC269_AUTO)
 		setup_preset(spec, &alc269_presets[board_config]);
 
-	spec->stream_name_analog = "ALC269 Analog";
 	if (codec->subsystem_id == 0x17aa3bf8) {
 		/* Due to a hardware problem on Lenovo Ideadpad, we need to
 		 * fix the sample rate of analog I/O to 44.1kHz
@@ -12990,7 +12962,6 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->stream_analog_playback = &alc269_pcm_analog_playback;
 		spec->stream_analog_capture = &alc269_pcm_analog_capture;
 	}
-	spec->stream_name_digital = "ALC269 Digital";
 	spec->stream_digital_playback = &alc269_pcm_digital_playback;
 	spec->stream_digital_capture = &alc269_pcm_digital_capture;
 
@@ -14080,11 +14051,9 @@ static int patch_alc861(struct hda_codec *codec)
 	if (board_config != ALC861_AUTO)
 		setup_preset(spec, &alc861_presets[board_config]);
 
-	spec->stream_name_analog = "ALC861 Analog";
 	spec->stream_analog_playback = &alc861_pcm_analog_playback;
 	spec->stream_analog_capture = &alc861_pcm_analog_capture;
 
-	spec->stream_name_digital = "ALC861 Digital";
 	spec->stream_digital_playback = &alc861_pcm_digital_playback;
 	spec->stream_digital_capture = &alc861_pcm_digital_capture;
 
@@ -15007,13 +14976,8 @@ static int patch_alc861vd(struct hda_codec *codec)
 		setup_preset(spec, &alc861vd_presets[board_config]);
 
 	if (codec->vendor_id == 0x10ec0660) {
-		spec->stream_name_analog = "ALC660-VD Analog";
-		spec->stream_name_digital = "ALC660-VD Digital";
 		/* always turn on EAPD */
 		add_verb(spec, alc660vd_eapd_verbs);
-	} else {
-		spec->stream_name_analog = "ALC861VD Analog";
-		spec->stream_name_digital = "ALC861VD Digital";
 	}
 
 	spec->stream_analog_playback = &alc861vd_pcm_analog_playback;
@@ -16936,17 +16900,6 @@ static int patch_alc662(struct hda_codec *codec)
 	if (board_config != ALC662_AUTO)
 		setup_preset(spec, &alc662_presets[board_config]);
 
-	if (codec->vendor_id == 0x10ec0663) {
-		spec->stream_name_analog = "ALC663 Analog";
-		spec->stream_name_digital = "ALC663 Digital";
-	} else if (codec->vendor_id == 0x10ec0272) {
-		spec->stream_name_analog = "ALC272 Analog";
-		spec->stream_name_digital = "ALC272 Digital";
-	} else {
-		spec->stream_name_analog = "ALC662 Analog";
-		spec->stream_name_digital = "ALC662 Digital";
-	}
-
 	spec->stream_analog_playback = &alc662_pcm_analog_playback;
 	spec->stream_analog_capture = &alc662_pcm_analog_capture;
 
-- 
cgit v0.10.2


From 183c7fff50a6a83234b5dc6673b6906bd7145326 Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Mon, 4 May 2009 22:18:09 +0200
Subject: i.MX31: Add NAND device driver for Litekit board.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lite.c b/arch/arm/mach-mx3/mx31lite.c
index e709229..86fe70f 100644
--- a/arch/arm/mach-mx3/mx31lite.c
+++ b/arch/arm/mach-mx3/mx31lite.c
@@ -38,6 +38,7 @@
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
 #include <mach/irqs.h>
+#include <mach/mxc_nand.h>
 #include "devices.h"
 
 /*
@@ -58,6 +59,11 @@ static struct imxuart_platform_data uart_pdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
+static struct mxc_nand_platform_data mx31lite_nand_board_info = {
+	.width = 1,
+	.hw_ecc = 1,
+};
+
 static struct smsc911x_platform_config smsc911x_config = {
 	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
 	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
@@ -124,6 +130,7 @@ static void __init mxc_board_init(void)
 				      "mx31lite");
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	mxc_register_device(&mxc_nand_device, &mx31lite_nand_board_info);
 
 	/* SMSC9117 IRQ pin */
 	ret = gpio_request(IOMUX_TO_GPIO(MX31_PIN_SFS6), "sms9117-irq");
-- 
cgit v0.10.2


From 92ab0a50157449c5bcf55ab4aad0a0c2da0be7c7 Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Fri, 15 May 2009 18:48:13 +0200
Subject: ARM MXC:
 Make-sure-ipg_per_clk-is-generated-by-ipg_clk-and-not-usb_pll

From ff1fd9d7015d9b9ad3e0df2016d0415e2719747c Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Fri, 15 May 2009 17:21:21 +0200
Subject: [PATCH] Make sure ipg_per_clk is generated by ipg_clk and not usb_pll

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c
index 09cbd4a..28bd11d 100644
--- a/arch/arm/mach-mx3/clock.c
+++ b/arch/arm/mach-mx3/clock.c
@@ -579,6 +579,12 @@ int __init mx31_clocks_init(unsigned long fref)
 					   MX32, but still required to be set */
 		     MXC_CCM_CGR2);
 
+	/*
+	 * Before turning off usb_pll make sure ipg_per_clk is generated
+	 * by ipg_clk and not usb_pll.
+	 */
+	__raw_writel(__raw_readl(MXC_CCM_CCMR) | (1 << 24), MXC_CCM_CCMR);
+
 	usb_pll_disable(&usb_pll_clk);
 
 	pr_info("Clock input source is %ld\n", clk_get_rate(&ckih_clk));
-- 
cgit v0.10.2


From 153fa1d8c6cf7b0e552bbd093dbad2e3c9b318ed Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Sat, 16 May 2009 12:43:10 +0200
Subject: i.MX31: Restructure UART setup for PDK board.

Restructure UART pin setup in preparation for adding other pins
in later patches.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31pdk.c b/arch/arm/mach-mx3/mx31pdk.c
index 048084b..5345498 100644
--- a/arch/arm/mach-mx3/mx31pdk.c
+++ b/arch/arm/mach-mx3/mx31pdk.c
@@ -41,29 +41,27 @@
  * @ingroup System
  */
 
-static struct imxuart_platform_data uart_pdata = {
-	.flags = IMXUART_HAVE_RTSCTS,
-};
-
-static int uart_pins[] = {
+static int mx31pdk_pins[] = {
+	/* UART1 */
 	MX31_PIN_CTS1__CTS1,
 	MX31_PIN_RTS1__RTS1,
 	MX31_PIN_TXD1__TXD1,
 	MX31_PIN_RXD1__RXD1
 };
 
-static inline void mxc_init_imx_uart(void)
-{
-	mxc_iomux_setup_multiple_pins(uart_pins, ARRAY_SIZE(uart_pins), "uart-0");
-	mxc_register_device(&mxc_uart_device0, &uart_pdata);
-}
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
 
 /*!
  * Board specific initialization.
  */
 static void __init mxc_board_init(void)
 {
-	mxc_init_imx_uart();
+	mxc_iomux_setup_multiple_pins(mx31pdk_pins, ARRAY_SIZE(mx31pdk_pins),
+				      "mx31pdk");
+
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
 }
 
 static void __init mx31pdk_timer_init(void)
-- 
cgit v0.10.2


From e102657ed16bbed49820d9c58509220fc8d9289a Mon Sep 17 00:00:00 2001
From: Vikram Pandita <vikram.pandita@ti.com>
Date: Sat, 16 May 2009 08:28:16 -0700
Subject: ARM: OMAP3: Fix number of GPIO lines for 34xx

As per 3430 TRM, there are 6 banks [0 to 191]

Signed-off-by: Tom Rix <Tom.Rix@windriver.com>
Signed-off-by: Vikram Pandita <vikram.pandita@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 17d7afe..ee0b21f 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -307,7 +307,7 @@ static inline int gpio_valid(int gpio)
 		return 0;
 	if (cpu_is_omap24xx() && gpio < 128)
 		return 0;
-	if (cpu_is_omap34xx() && gpio < 160)
+	if (cpu_is_omap34xx() && gpio < 192)
 		return 0;
 	return -1;
 }
-- 
cgit v0.10.2


From 8dbe43930a4e9bede88eb67a9c613773a2747caf Mon Sep 17 00:00:00 2001
From: Kalle Jokiniemi <kalle.jokiniemi@digia.com>
Date: Sat, 16 May 2009 08:28:17 -0700
Subject: ARM: OMAP3: Fix HW SAVEANDRESTORE shift define

The OMAP3430ES2_SAVEANDRESTORE_SHIFT macro is used
by powerdomain code in
"1 << OMAP3430ES2_SAVEANDRESTORE_SHIFT" manner, but
the definition was also (1 << 4), meaning we actually
modified bit 16. So the definition needs to be 4.

This fixes also a cold reset HW bug in OMAP3430 ES3.x
where some of the efuse bits are not isolated during
wake-up from off mode. This can cause randomish
cold resets with off mode. Enabling the USBTLL hardware
SAVEANDRESTORE causes the core power up assert to be
delayed in a way that we will not get faulty values
when boot ROM is reading the unisolated registers.

Signed-off-by: Kalle Jokiniemi <kalle.jokiniemi@digia.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Acked-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h
index c6a7940..9fd03a2 100644
--- a/arch/arm/mach-omap2/prm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-34xx.h
@@ -409,7 +409,7 @@
 /* PM_PREPWSTST_CAM specific bits */
 
 /* PM_PWSTCTRL_USBHOST specific bits */
-#define OMAP3430ES2_SAVEANDRESTORE_SHIFT		(1 << 4)
+#define OMAP3430ES2_SAVEANDRESTORE_SHIFT		4
 
 /* RM_RSTST_PER specific bits */
 
-- 
cgit v0.10.2


From 005187eecaa400b4b43d9f640fbde9fcc50f37c1 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sat, 16 May 2009 08:28:17 -0700
Subject: ARM: OMAP2/3: Change omapfb to use clkdev for dispc and rfbi, v2

This makes the framebuffer work on omap3.

Also fix the clk_get usage for checkpatch.pl
"ERROR: do not use assignment in if condition".

Cc: Imre Deak <imre.deak@nokia.com>
Cc: linux-fbdev-devel@lists.sourceforge.net
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c
index ff9b4c0..e4cef33 100644
--- a/arch/arm/mach-omap2/clock24xx.c
+++ b/arch/arm/mach-omap2/clock24xx.c
@@ -103,10 +103,10 @@ static struct omap_clk omap24xx_clks[] = {
 	CLK(NULL,	"mdm_ick",	&mdm_ick,	CK_243X),
 	CLK(NULL,	"mdm_osc_ck",	&mdm_osc_ck,	CK_243X),
 	/* DSS domain clocks */
-	CLK(NULL,	"dss_ick",	&dss_ick,	CK_243X | CK_242X),
-	CLK(NULL,	"dss1_fck",	&dss1_fck,	CK_243X | CK_242X),
-	CLK(NULL,	"dss2_fck",	&dss2_fck,	CK_243X | CK_242X),
-	CLK(NULL,	"dss_54m_fck",	&dss_54m_fck,	CK_243X | CK_242X),
+	CLK("omapfb",	"ick",		&dss_ick,	CK_243X | CK_242X),
+	CLK("omapfb",	"dss1_fck",	&dss1_fck,	CK_243X | CK_242X),
+	CLK("omapfb",	"dss2_fck",	&dss2_fck,	CK_243X | CK_242X),
+	CLK("omapfb",	"tv_fck",	&dss_54m_fck,	CK_243X | CK_242X),
 	/* L3 domain clocks */
 	CLK(NULL,	"core_l3_ck",	&core_l3_ck,	CK_243X | CK_242X),
 	CLK(NULL,	"ssi_fck",	&ssi_ssr_sst_fck, CK_243X | CK_242X),
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index a057539..ba05aa4 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -197,11 +197,11 @@ static struct omap_clk omap34xx_clks[] = {
 	CLK("omap_rng",	"ick",		&rng_ick,	CK_343X),
 	CLK(NULL,	"sha11_ick",	&sha11_ick,	CK_343X),
 	CLK(NULL,	"des1_ick",	&des1_ick,	CK_343X),
-	CLK(NULL,	"dss1_alwon_fck", &dss1_alwon_fck, CK_343X),
-	CLK(NULL,	"dss_tv_fck",	&dss_tv_fck,	CK_343X),
-	CLK(NULL,	"dss_96m_fck",	&dss_96m_fck,	CK_343X),
-	CLK(NULL,	"dss2_alwon_fck", &dss2_alwon_fck, CK_343X),
-	CLK(NULL,	"dss_ick",	&dss_ick,	CK_343X),
+	CLK("omapfb",	"dss1_fck",	&dss1_alwon_fck, CK_343X),
+	CLK("omapfb",	"tv_fck",	&dss_tv_fck,	CK_343X),
+	CLK("omapfb",	"video_fck",	&dss_96m_fck,	CK_343X),
+	CLK("omapfb",	"dss2_fck",	&dss2_alwon_fck, CK_343X),
+	CLK("omapfb",	"ick",		&dss_ick,	CK_343X),
 	CLK(NULL,	"cam_mclk",	&cam_mclk,	CK_343X),
 	CLK(NULL,	"cam_ick",	&cam_ick,	CK_343X),
 	CLK(NULL,	"csi2_96m_fck",	&csi2_96m_fck,	CK_343X),
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index dfb72f5e..148cbcc 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -880,20 +880,22 @@ static irqreturn_t omap_dispc_irq_handler(int irq, void *dev)
 
 static int get_dss_clocks(void)
 {
-	if (IS_ERR((dispc.dss_ick = clk_get(dispc.fbdev->dev, "dss_ick")))) {
-		dev_err(dispc.fbdev->dev, "can't get dss_ick\n");
+	dispc.dss_ick = clk_get(dispc.fbdev->dev, "ick");
+	if (IS_ERR(dispc.dss_ick)) {
+		dev_err(dispc.fbdev->dev, "can't get ick\n");
 		return PTR_ERR(dispc.dss_ick);
 	}
 
-	if (IS_ERR((dispc.dss1_fck = clk_get(dispc.fbdev->dev, "dss1_fck")))) {
+	dispc.dss1_fck = clk_get(dispc.fbdev->dev, "dss1_fck");
+	if (IS_ERR(dispc.dss1_fck)) {
 		dev_err(dispc.fbdev->dev, "can't get dss1_fck\n");
 		clk_put(dispc.dss_ick);
 		return PTR_ERR(dispc.dss1_fck);
 	}
 
-	if (IS_ERR((dispc.dss_54m_fck =
-				clk_get(dispc.fbdev->dev, "dss_54m_fck")))) {
-		dev_err(dispc.fbdev->dev, "can't get dss_54m_fck\n");
+	dispc.dss_54m_fck = clk_get(dispc.fbdev->dev, "tv_fck");
+	if (IS_ERR(dispc.dss_54m_fck)) {
+		dev_err(dispc.fbdev->dev, "can't get tv_fck\n");
 		clk_put(dispc.dss_ick);
 		clk_put(dispc.dss1_fck);
 		return PTR_ERR(dispc.dss_54m_fck);
diff --git a/drivers/video/omap/rfbi.c b/drivers/video/omap/rfbi.c
index a13c8dc..9332d6c 100644
--- a/drivers/video/omap/rfbi.c
+++ b/drivers/video/omap/rfbi.c
@@ -83,12 +83,14 @@ static inline u32 rfbi_read_reg(int idx)
 
 static int rfbi_get_clocks(void)
 {
-	if (IS_ERR((rfbi.dss_ick = clk_get(rfbi.fbdev->dev, "dss_ick")))) {
-		dev_err(rfbi.fbdev->dev, "can't get dss_ick\n");
+	rfbi.dss_ick = clk_get(rfbi.fbdev->dev, "ick");
+	if (IS_ERR(rfbi.dss_ick)) {
+		dev_err(rfbi.fbdev->dev, "can't get ick\n");
 		return PTR_ERR(rfbi.dss_ick);
 	}
 
-	if (IS_ERR((rfbi.dss1_fck = clk_get(rfbi.fbdev->dev, "dss1_fck")))) {
+	rfbi.dss1_fck = clk_get(rfbi.fbdev->dev, "dss1_fck");
+	if (IS_ERR(rfbi.dss1_fck)) {
 		dev_err(rfbi.fbdev->dev, "can't get dss1_fck\n");
 		clk_put(rfbi.dss_ick);
 		return PTR_ERR(rfbi.dss1_fck);
-- 
cgit v0.10.2


From ff05c0330b9880f9ccbb7fa40f2ed3b5842f5693 Mon Sep 17 00:00:00 2001
From: Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 7 May 2009 18:41:47 +0100
Subject: [ARM] 5509/1: ep93xx: clkdev enable UARTS

Fix the clkdev API support for the ep93xx uart clocks.

The uarts available in the ep93xx have individual clock controls.
The current implementation assumes that the bootloader has enabled
the clocks before the kernel has booted. It also assumes that the
bootloader has set the UARTBAUD bit indicating that the uarts are
running off the 14.7456MHz external crystal.

This fixes both issues. It also allows the uart clocks to be stopped
when there are no users.

Tested-by: Matthias Kaehlcke <matthias@kaehlcke.net>

Cc: Ryan Mallon <ryan@bluewatersys.com>
Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index e8ebeae..b2eede5 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -21,15 +21,50 @@
 #include <asm/div64.h>
 #include <mach/hardware.h>
 
+
+/*
+ * The EP93xx has two external crystal oscillators.  To generate the
+ * required high-frequency clocks, the processor uses two phase-locked-
+ * loops (PLLs) to multiply the incoming external clock signal to much
+ * higher frequencies that are then divided down by programmable dividers
+ * to produce the needed clocks.  The PLLs operate independently of one
+ * another.
+ */
+#define EP93XX_EXT_CLK_RATE	14745600
+#define EP93XX_EXT_RTC_RATE	32768
+
+
 struct clk {
 	unsigned long	rate;
 	int		users;
+	int		sw_locked;
 	u32		enable_reg;
 	u32		enable_mask;
+
+	unsigned long	(*get_rate)(struct clk *clk);
 };
 
-static struct clk clk_uart = {
-	.rate		= 14745600,
+
+static unsigned long get_uart_rate(struct clk *clk);
+
+
+static struct clk clk_uart1 = {
+	.sw_locked	= 1,
+	.enable_reg	= EP93XX_SYSCON_DEVICE_CONFIG,
+	.enable_mask	= EP93XX_SYSCON_DEVICE_CONFIG_U1EN,
+	.get_rate	= get_uart_rate,
+};
+static struct clk clk_uart2 = {
+	.sw_locked	= 1,
+	.enable_reg	= EP93XX_SYSCON_DEVICE_CONFIG,
+	.enable_mask	= EP93XX_SYSCON_DEVICE_CONFIG_U2EN,
+	.get_rate	= get_uart_rate,
+};
+static struct clk clk_uart3 = {
+	.sw_locked	= 1,
+	.enable_reg	= EP93XX_SYSCON_DEVICE_CONFIG,
+	.enable_mask	= EP93XX_SYSCON_DEVICE_CONFIG_U3EN,
+	.get_rate	= get_uart_rate,
 };
 static struct clk clk_pll1;
 static struct clk clk_f;
@@ -95,9 +130,9 @@ static struct clk clk_m2m1 = {
 	{ .dev_id = dev, .con_id = con, .clk = ck }
 
 static struct clk_lookup clocks[] = {
-	INIT_CK("apb:uart1", NULL, &clk_uart),
-	INIT_CK("apb:uart2", NULL, &clk_uart),
-	INIT_CK("apb:uart3", NULL, &clk_uart),
+	INIT_CK("apb:uart1", NULL, &clk_uart1),
+	INIT_CK("apb:uart2", NULL, &clk_uart2),
+	INIT_CK("apb:uart3", NULL, &clk_uart3),
 	INIT_CK(NULL, "pll1", &clk_pll1),
 	INIT_CK(NULL, "fclk", &clk_f),
 	INIT_CK(NULL, "hclk", &clk_h),
@@ -125,6 +160,8 @@ int clk_enable(struct clk *clk)
 		u32 value;
 
 		value = __raw_readl(clk->enable_reg);
+		if (clk->sw_locked)
+			__raw_writel(0xaa, EP93XX_SYSCON_SWLOCK);
 		__raw_writel(value | clk->enable_mask, clk->enable_reg);
 	}
 
@@ -138,13 +175,29 @@ void clk_disable(struct clk *clk)
 		u32 value;
 
 		value = __raw_readl(clk->enable_reg);
+		if (clk->sw_locked)
+			__raw_writel(0xaa, EP93XX_SYSCON_SWLOCK);
 		__raw_writel(value & ~clk->enable_mask, clk->enable_reg);
 	}
 }
 EXPORT_SYMBOL(clk_disable);
 
+static unsigned long get_uart_rate(struct clk *clk)
+{
+	u32 value;
+
+	value = __raw_readl(EP93XX_SYSCON_CLOCK_CONTROL);
+	if (value & EP93XX_SYSCON_CLOCK_UARTBAUD)
+		return EP93XX_EXT_CLK_RATE;
+	else
+		return EP93XX_EXT_CLK_RATE / 2;
+}
+
 unsigned long clk_get_rate(struct clk *clk)
 {
+	if (clk->get_rate)
+		return clk->get_rate(clk);
+
 	return clk->rate;
 }
 EXPORT_SYMBOL(clk_get_rate);
@@ -162,7 +215,7 @@ static unsigned long calc_pll_rate(u32 config_word)
 	unsigned long long rate;
 	int i;
 
-	rate = 14745600;
+	rate = EP93XX_EXT_CLK_RATE;
 	rate *= ((config_word >> 11) & 0x1f) + 1;		/* X1FBD */
 	rate *= ((config_word >> 5) & 0x3f) + 1;		/* X2FBD */
 	do_div(rate, (config_word & 0x1f) + 1);			/* X2IPD */
@@ -195,7 +248,7 @@ static int __init ep93xx_clock_init(void)
 
 	value = __raw_readl(EP93XX_SYSCON_CLOCK_SET1);
 	if (!(value & 0x00800000)) {			/* PLL1 bypassed?  */
-		clk_pll1.rate = 14745600;
+		clk_pll1.rate = EP93XX_EXT_CLK_RATE;
 	} else {
 		clk_pll1.rate = calc_pll_rate(value);
 	}
@@ -206,7 +259,7 @@ static int __init ep93xx_clock_init(void)
 
 	value = __raw_readl(EP93XX_SYSCON_CLOCK_SET2);
 	if (!(value & 0x00080000)) {			/* PLL2 bypassed?  */
-		clk_pll2.rate = 14745600;
+		clk_pll2.rate = EP93XX_EXT_CLK_RATE;
 	} else if (value & 0x00040000) {		/* PLL2 enabled?  */
 		clk_pll2.rate = calc_pll_rate(value);
 	} else {
diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
index f66be12..1732de7 100644
--- a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
+++ b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
@@ -159,7 +159,10 @@
 #define EP93XX_SYSCON_CLOCK_SET1	EP93XX_SYSCON_REG(0x20)
 #define EP93XX_SYSCON_CLOCK_SET2	EP93XX_SYSCON_REG(0x24)
 #define EP93XX_SYSCON_DEVICE_CONFIG	EP93XX_SYSCON_REG(0x80)
-#define EP93XX_SYSCON_DEVICE_CONFIG_CRUNCH_ENABLE	0x00800000
+#define EP93XX_SYSCON_DEVICE_CONFIG_U3EN		(1<<24)
+#define EP93XX_SYSCON_DEVICE_CONFIG_CRUNCH_ENABLE	(1<<23)
+#define EP93XX_SYSCON_DEVICE_CONFIG_U2EN		(1<<20)
+#define EP93XX_SYSCON_DEVICE_CONFIG_U1EN		(1<<18)
 #define EP93XX_SYSCON_SWLOCK		EP93XX_SYSCON_REG(0xc0)
 
 #define EP93XX_WATCHDOG_BASE		(EP93XX_APB_VIRT_BASE + 0x00140000)
-- 
cgit v0.10.2


From 776abac81764847338a6a02c34609d4b8dfb4918 Mon Sep 17 00:00:00 2001
From: Ricardo Martins <rasm@fe.up.pt>
Date: Mon, 11 May 2009 00:15:08 +0100
Subject: [ARM] 5513/1: Eurotech VIPER SBC: fix compilation error

Compilation for this board yields the following errors:

arch/arm/mach-pxa/viper.c:511: error: 'FFUART' undeclared here (not in a function)
arch/arm/mach-pxa/viper.c:520: error: 'BTUART' undeclared here (not in a function)
arch/arm/mach-pxa/viper.c:529: error: 'STUART' undeclared here (not in a function)

Fix them by including the necessary header.

Signed-off-by: Ricardo Martins <rasm@fe.up.pt>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 0e65344..dd031cc 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -46,6 +46,7 @@
 #include <mach/audio.h>
 #include <mach/pxafb.h>
 #include <mach/i2c.h>
+#include <mach/regs-uart.h>
 #include <mach/viper.h>
 
 #include <asm/setup.h>
-- 
cgit v0.10.2


From f0e1fa760099eef43f4450471d795807a1cc43c8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Sat, 16 May 2009 22:05:27 +0100
Subject: [ARM] S3C: Add USB high-speed/OtG device definitions

Add platform device definitions for the high-speed and OtG
capable device block on the newer Samsung parts.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/include/mach/map.h b/arch/arm/mach-s3c6400/include/mach/map.h
index ea4223b..99d5e3b 100644
--- a/arch/arm/mach-s3c6400/include/mach/map.h
+++ b/arch/arm/mach-s3c6400/include/mach/map.h
@@ -39,6 +39,7 @@
 #define S3C_VA_UART3		S3C_VA_UARTx(3)
 
 #define S3C64XX_PA_FB		(0x77100000)
+#define S3C64XX_PA_USB_HSOTG	(0x7C000000)
 #define S3C64XX_PA_SYSCON	(0x7E00F000)
 #define S3C64XX_PA_TIMER	(0x7F006000)
 #define S3C64XX_PA_IIC0		(0x7F004000)
@@ -70,5 +71,6 @@
 #define S3C_PA_IIC1		S3C64XX_PA_IIC1
 #define S3C_PA_FB		S3C64XX_PA_FB
 #define S3C_PA_USBHOST		S3C64XX_PA_USBHOST
+#define S3C_PA_USB_HSOTG	S3C64XX_PA_USB_HSOTG
 
 #endif /* __ASM_ARCH_6400_MAP_H */
diff --git a/arch/arm/plat-s3c/Kconfig b/arch/arm/plat-s3c/Kconfig
index a4c5027..1403915 100644
--- a/arch/arm/plat-s3c/Kconfig
+++ b/arch/arm/plat-s3c/Kconfig
@@ -186,4 +186,9 @@ config S3C_DEV_USB_HOST
 	help
 	  Compile in platform device definition for USB host.
 
+config S3C_DEV_USB_HSOTG
+	bool
+	help
+	  Compile in platform device definition for USB high-speed OtG
+
 endif
diff --git a/arch/arm/plat-s3c/Makefile b/arch/arm/plat-s3c/Makefile
index bfc8f53..061e204 100644
--- a/arch/arm/plat-s3c/Makefile
+++ b/arch/arm/plat-s3c/Makefile
@@ -32,3 +32,4 @@ obj-y				+= dev-i2c0.o
 obj-$(CONFIG_S3C_DEV_I2C1)	+= dev-i2c1.o
 obj-$(CONFIG_S3C_DEV_FB)	+= dev-fb.o
 obj-$(CONFIG_S3C_DEV_USB_HOST)	+= dev-usb.o
+obj-$(CONFIG_S3C_DEV_USB_HSOTG)	+= dev-usb-hsotg.o
diff --git a/arch/arm/plat-s3c/dev-usb-hsotg.c b/arch/arm/plat-s3c/dev-usb-hsotg.c
new file mode 100644
index 0000000..e2f604b
--- /dev/null
+++ b/arch/arm/plat-s3c/dev-usb-hsotg.c
@@ -0,0 +1,41 @@
+/* linux/arch/arm/plat-s3c/dev-usb-hsotg.c
+ *
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C series device definition for USB high-speed UDC/OtG block
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+
+#include <mach/irqs.h>
+#include <mach/map.h>
+
+#include <plat/devs.h>
+
+static struct resource s3c_usb_hsotg_resources[] = {
+	[0] = {
+		.start	= S3C_PA_USB_HSOTG,
+		.end	= S3C_PA_USB_HSOTG + 0x10000 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_OTG,
+		.end	= IRQ_OTG,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device s3c_device_usb_hsotg = {
+	.name		= "s3c-hsotg",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(s3c_usb_hsotg_resources),
+	.resource	= s3c_usb_hsotg_resources,
+};
diff --git a/arch/arm/plat-s3c/include/plat/devs.h b/arch/arm/plat-s3c/include/plat/devs.h
index 26f0cec..a0b6768 100644
--- a/arch/arm/plat-s3c/include/plat/devs.h
+++ b/arch/arm/plat-s3c/include/plat/devs.h
@@ -45,6 +45,7 @@ extern struct platform_device s3c_device_spi1;
 extern struct platform_device s3c_device_nand;
 
 extern struct platform_device s3c_device_usbgadget;
+extern struct platform_device s3c_device_usb_hsotg;
 
 /* s3c2440 specific devices */
 
diff --git a/arch/arm/plat-s3c/include/plat/udc-hs.h b/arch/arm/plat-s3c/include/plat/udc-hs.h
new file mode 100644
index 0000000..dd04db0
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/udc-hs.h
@@ -0,0 +1,29 @@
+/* arch/arm/plat-s3c/include/plat/udc-hs.h
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *      Ben Dooks <ben@simtec.co.uk>
+ *      http://armlinux.simtec.co.uk/
+ *
+ * S3C USB2.0 High-speed / OtG platform information
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+enum s3c_hostg_dmamode {
+	S3C_HSOTG_DMA_NONE,	/* do not use DMA at-all */
+	S3C_HSOTG_DMA_ONLY,	/* always use DMA */
+	S3C_HSOTG_DMA_DRV,	/* DMA is chosen by driver */
+};
+
+/**
+ * struct s3c_hsotg_plat - platform data for high-speed otg/udc
+ * @dma: Whether to use DMA or not.
+ * @is_osc: The clock source is an oscillator, not a crystal
+ */
+struct s3c_hsotg_plat {
+	enum s3c_hostg_dmamode	dma;
+	unsigned int		is_osc : 1;
+};
-- 
cgit v0.10.2


From 06fa1d37ce9a8fab86a5db304ca5b576f0223d18 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Sat, 16 May 2009 22:11:20 +0100
Subject: [ARM] SMDK6410: Add USB high-speed/OtG gadget device

Add the USB gadget support to the SMDK6410.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig
index 9c163f3..e63aac7 100644
--- a/arch/arm/mach-s3c6410/Kconfig
+++ b/arch/arm/mach-s3c6410/Kconfig
@@ -36,6 +36,7 @@ config MACH_SMDK6410
 	select S3C_DEV_I2C1
 	select S3C_DEV_FB
 	select S3C_DEV_USB_HOST
+	select S3C_DEV_USB_HSOTG
 	select S3C6410_SETUP_SDHCI
 	select S3C64XX_SETUP_I2C1
 	select S3C64XX_SETUP_FB_24BPP
diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c
index 8fd7632..bc9a7de 100644
--- a/arch/arm/mach-s3c6410/mach-smdk6410.c
+++ b/arch/arm/mach-s3c6410/mach-smdk6410.c
@@ -183,6 +183,7 @@ static struct platform_device *smdk6410_devices[] __initdata = {
 	&s3c_device_i2c1,
 	&s3c_device_fb,
 	&s3c_device_usb,
+	&s3c_device_usb_hsotg,
 	&smdk6410_lcd_powerdev,
 
 	&smdk6410_smsc911x,
-- 
cgit v0.10.2


From b57f0fe10712026d15a9d5a20e31f240ee72ec15 Mon Sep 17 00:00:00 2001
From: Nelson Castillo <arhuaco@freaks-unidos.net>
Date: Fri, 8 May 2009 08:10:04 -0500
Subject: [ARM] S3C: ADC: Fix lines with more than 80 chars in adc.h

Small cleanup.

Signed-off-by: Nelson Castillo <arhuaco@freaks-unidos.net>
[ben-linux@fluff.org: rewrote subject]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/adc.h b/arch/arm/plat-s3c/include/plat/adc.h
index 43df2a4..ef92e50 100644
--- a/arch/arm/plat-s3c/include/plat/adc.h
+++ b/arch/arm/plat-s3c/include/plat/adc.h
@@ -19,10 +19,11 @@ struct s3c_adc_client;
 extern int s3c_adc_start(struct s3c_adc_client *client,
 			 unsigned int channel, unsigned int nr_samples);
 
-extern struct s3c_adc_client *s3c_adc_register(struct platform_device *pdev,
-					       void (*select)(unsigned selected),
-					       void (*conv)(unsigned d0, unsigned d1),
-					       unsigned int is_ts);
+extern struct s3c_adc_client *
+	s3c_adc_register(struct platform_device *pdev,
+			 void (*select)(unsigned selected),
+			 void (*conv)(unsigned d0, unsigned d1),
+			 unsigned int is_ts);
 
 extern void s3c_adc_release(struct s3c_adc_client *client);
 
-- 
cgit v0.10.2


From 3f7ea467be1bad860c0f71ba7373dd3cf76b485a Mon Sep 17 00:00:00 2001
From: Nelson Castillo <arhuaco@freaks-unidos.net>
Date: Fri, 8 May 2009 08:10:12 -0500
Subject: [ARM] S3C: ADC: Expose number of remaining conversions to convert
 callback

This patch allow us to efficiently modify the number of
remaining conversions from the client side. This us useful
when we do not know in advance how many conversions we will
need or when we need to cancel pending conversions.

This change is simple enough to be compatible with existing
code that can just define the new pointer in the callback
and ignore it.

Sample usage:

http://tinyurl.com/s3c2410-ts-c (function stylus_adc_action).

Signed-off-by: Nelson Castillo <arhuaco@freaks-unidos.net>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c/include/plat/adc.h b/arch/arm/plat-s3c/include/plat/adc.h
index ef92e50..d847bd4 100644
--- a/arch/arm/plat-s3c/include/plat/adc.h
+++ b/arch/arm/plat-s3c/include/plat/adc.h
@@ -22,7 +22,8 @@ extern int s3c_adc_start(struct s3c_adc_client *client,
 extern struct s3c_adc_client *
 	s3c_adc_register(struct platform_device *pdev,
 			 void (*select)(unsigned selected),
-			 void (*conv)(unsigned d0, unsigned d1),
+			 void (*conv)(unsigned d0, unsigned d1,
+				      unsigned *samples_left),
 			 unsigned int is_ts);
 
 extern void s3c_adc_release(struct s3c_adc_client *client);
diff --git a/arch/arm/plat-s3c24xx/adc.c b/arch/arm/plat-s3c24xx/adc.c
index 91adfa7..ee1baf1 100644
--- a/arch/arm/plat-s3c24xx/adc.c
+++ b/arch/arm/plat-s3c24xx/adc.c
@@ -45,7 +45,8 @@ struct s3c_adc_client {
 	unsigned char		 channel;
 
 	void	(*select_cb)(unsigned selected);
-	void	(*convert_cb)(unsigned val1, unsigned val2);
+	void	(*convert_cb)(unsigned val1, unsigned val2,
+			      unsigned *samples_left);
 };
 
 struct adc_device {
@@ -158,7 +159,8 @@ static void s3c_adc_default_select(unsigned select)
 
 struct s3c_adc_client *s3c_adc_register(struct platform_device *pdev,
 					void (*select)(unsigned int selected),
-					void (*conv)(unsigned d0, unsigned d1),
+					void (*conv)(unsigned d0, unsigned d1,
+						     unsigned *samples_left),
 					unsigned int is_ts)
 {
 	struct s3c_adc_client *client;
@@ -227,9 +229,10 @@ static irqreturn_t s3c_adc_irq(int irq, void *pw)
 	data1 = readl(adc->regs + S3C2410_ADCDAT1);
 	adc_dbg(adc, "read %d: 0x%04x, 0x%04x\n", client->nr_samples, data0, data1);
 
-	(client->convert_cb)(data0 & 0x3ff, data1 & 0x3ff);
+	client->nr_samples--;
+	(client->convert_cb)(data0 & 0x3ff, data1 & 0x3ff, &client->nr_samples);
 
-	if (--client->nr_samples > 0) {
+	if (client->nr_samples > 0) {
 		/* fire another conversion for this */
 
 		client->select_cb(1);
-- 
cgit v0.10.2


From 8bc2095951517e2c74b8aeeca4685ddd6b16ed4b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 20:45:59 +0200
Subject: perf_counter: Fix inheritance cleanup code

Clean up code that open-coded the list_{add,del}_counter() code in
__perf_counter_exit_task() which consequently diverged. This could
lead to software counter crashes.

Also, fold the ctx->nr_counter inc/dec into those functions and clean
up some of the related code.

[ Impact: fix potential sw counter crash, cleanup ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 57840a9..59a926d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -115,6 +115,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	}
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
+	ctx->nr_counters++;
 }
 
 static void
@@ -122,6 +123,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
 	struct perf_counter *sibling, *tmp;
 
+	ctx->nr_counters--;
+
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
 
@@ -209,7 +212,6 @@ static void __perf_counter_remove_from_context(void *info)
 	counter_sched_out(counter, cpuctx, ctx);
 
 	counter->task = NULL;
-	ctx->nr_counters--;
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -276,7 +278,6 @@ retry:
 	 * succeed.
 	 */
 	if (!list_empty(&counter->list_entry)) {
-		ctx->nr_counters--;
 		list_del_counter(counter, ctx);
 		counter->task = NULL;
 	}
@@ -544,7 +545,6 @@ static void add_counter_to_ctx(struct perf_counter *counter,
 			       struct perf_counter_context *ctx)
 {
 	list_add_counter(counter, ctx);
-	ctx->nr_counters++;
 	counter->prev_state = PERF_COUNTER_STATE_OFF;
 	counter->tstamp_enabled = ctx->time;
 	counter->tstamp_running = ctx->time;
@@ -3206,9 +3206,8 @@ static int inherit_group(struct perf_counter *parent_counter,
 static void sync_child_counter(struct perf_counter *child_counter,
 			       struct perf_counter *parent_counter)
 {
-	u64 parent_val, child_val;
+	u64 child_val;
 
-	parent_val = atomic64_read(&parent_counter->count);
 	child_val = atomic64_read(&child_counter->count);
 
 	/*
@@ -3240,7 +3239,6 @@ __perf_counter_exit_task(struct task_struct *child,
 			 struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *parent_counter;
-	struct perf_counter *sub, *tmp;
 
 	/*
 	 * If we do not self-reap then we have to wait for the
@@ -3252,8 +3250,8 @@ __perf_counter_exit_task(struct task_struct *child,
 	 */
 	if (child != current) {
 		wait_task_inactive(child, 0);
-		list_del_init(&child_counter->list_entry);
 		update_counter_times(child_counter);
+		list_del_counter(child_counter, child_ctx);
 	} else {
 		struct perf_cpu_context *cpuctx;
 		unsigned long flags;
@@ -3272,9 +3270,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		group_sched_out(child_counter, cpuctx, child_ctx);
 		update_counter_times(child_counter);
 
-		list_del_init(&child_counter->list_entry);
-
-		child_ctx->nr_counters--;
+		list_del_counter(child_counter, child_ctx);
 
 		perf_enable();
 		local_irq_restore(flags);
@@ -3288,13 +3284,6 @@ __perf_counter_exit_task(struct task_struct *child,
 	 */
 	if (parent_counter) {
 		sync_child_counter(child_counter, parent_counter);
-		list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
-					 list_entry) {
-			if (sub->parent) {
-				sync_child_counter(sub, sub->parent);
-				free_counter(sub);
-			}
-		}
 		free_counter(child_counter);
 	}
 }
@@ -3315,9 +3304,18 @@ void perf_counter_exit_task(struct task_struct *child)
 	if (likely(!child_ctx->nr_counters))
 		return;
 
+again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
 		__perf_counter_exit_task(child, child_counter, child_ctx);
+
+	/*
+	 * If the last counter was a group counter, it will have appended all
+	 * its siblings to the list, but we obtained 'tmp' before that which
+	 * will still point to the list head terminating the iteration.
+	 */
+	if (!list_empty(&child_ctx->counter_list))
+		goto again;
 }
 
 /*
-- 
cgit v0.10.2


From 856d56b9e5de650a64a6c41c17aaed702b55d578 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 20:45:59 +0200
Subject: perf_counter: Fix counter inheritance

Srivatsa Vaddagiri reported that a Java workload triggers this
warning in kernel/exit.c:

   WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));

Add the inherited counter propagation on self-detach, this could
cause counter leaks and incomplete stats in threaded code like
the below:

  #include <pthread.h>
  #include <unistd.h>

  void *thread(void *arg)
  {
          sleep(5);
          return NULL;
  }

  void main(void)
  {
          pthread_t thr;
          pthread_create(&thr, NULL, thread, NULL);
  }

Reported-by: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/exit.c b/kernel/exit.c
index 4741376..16d74f1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -128,6 +128,12 @@ static void __exit_signal(struct task_struct *tsk)
 		sig = NULL; /* Marker for below. */
 	}
 
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(tsk);
+
 	__unhash_process(tsk);
 
 	/*
-- 
cgit v0.10.2


From 0203026b58b4299ba7281c0b4b417207c1f05d0e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 11:24:08 +0200
Subject: perf_counter: fix threaded task exit

Flushing counters in __exit_signal() with irqs disabled is not
a good idea as perf_counter_exit_task() acquires mutexes. So
flush it before acquiring the tasklist lock.

(Note, we still need a fix for when the PID has been unhashed.)

[ Impact: fix crash with inherited counters ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/exit.c b/kernel/exit.c
index 16d74f1..73affd3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -128,12 +128,6 @@ static void __exit_signal(struct task_struct *tsk)
 		sig = NULL; /* Marker for below. */
 	}
 
-	/*
-	 * Flush inherited counters to the parent - before the parent
-	 * gets woken up by child-exit notifications.
-	 */
-	perf_counter_exit_task(tsk);
-
 	__unhash_process(tsk);
 
 	/*
@@ -183,6 +177,13 @@ repeat:
 	atomic_dec(&__task_cred(p)->user->processes);
 
 	proc_flush_task(p);
+
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(p);
+
 	write_lock_irq(&tasklist_lock);
 	tracehook_finish_release_task(p);
 	__exit_signal(p);
-- 
cgit v0.10.2


From d2517a49d55536b38c7a87e5289550cfedaa4dcc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 10:04:45 +0200
Subject: perf_counter, x86: fix zero irq_period counters

The quirk to irq_period unearthed an unrobustness we had in the
hw_counter initialization sequence: we left irq_period at 0, which
was then quirked up to 2 ... which then generated a _lot_ of
interrupts during 'perf stat' runs, slowed them down and skewed
the counter results in general.

Initialize irq_period to the maximum instead.

[ Impact: fix perf stat results ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 886dcf3..5bfd30a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -286,6 +286,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->nmi = 1;
 	}
 
+	if (!hwc->irq_period)
+		hwc->irq_period = x86_pmu.max_period;
+
 	atomic64_set(&hwc->period_left,
 			min(x86_pmu.max_period, hwc->irq_period));
 
-- 
cgit v0.10.2


From 2eec8c318b9bbfe9e0f2a889b4ad3f4b4e5ba429 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 20 Mar 2009 20:27:37 +0100
Subject: mx3fb: Issue prettier log message

Without this patch we end up with a log message like
"mx3_sdc_fb mx3_sdc_fb: mx3fb: fb registered". That's two fb
too much for my taste.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index 7a168ba..b7af525 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -1366,7 +1366,7 @@ static int init_fb_chan(struct mx3fb_data *mx3fb, struct idmac_channel *ichan)
 
 	mx3fb_blank(FB_BLANK_UNBLANK, fbi);
 
-	dev_info(dev, "mx3fb: fb registered, using mode %s\n", fb_mode);
+	dev_info(dev, "registered, using mode %s\n", fb_mode);
 
 	ret = register_framebuffer(fbi);
 	if (ret < 0)
-- 
cgit v0.10.2


From 826681043d7184b4d650cab5b007b9a86b628eb5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 17 May 2009 16:20:18 +0100
Subject: [ARM] smp: fix cpumask usage in ARM SMP code

The ARM SMP code wasn't properly updated for the cpumask changes, which
results in smp_timer_broadcast() broadcasting ticks to non-online CPUs.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index c6884ba..3e1714c 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -253,9 +253,9 @@ void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base)
 }
 
 #ifdef CONFIG_SMP
-void gic_raise_softirq(cpumask_t cpumask, unsigned int irq)
+void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
-	unsigned long map = *cpus_addr(cpumask);
+	unsigned long map = *cpus_addr(*mask);
 
 	/* this always happens on GIC0 */
 	writel(map << 16 | irq, gic_data[0].dist_base + GIC_DIST_SOFTINT);
diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
index 4924914..7f34333 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -36,7 +36,7 @@
 void gic_dist_init(unsigned int gic_nr, void __iomem *base, unsigned int irq_start);
 void gic_cpu_init(unsigned int gic_nr, void __iomem *base);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
-void gic_raise_softirq(cpumask_t cpumask, unsigned int irq);
+void gic_raise_softirq(const struct cpumask *mask, unsigned int irq);
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index fad70da..5995935 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -53,17 +53,12 @@ extern void smp_store_cpu_info(unsigned int cpuid);
 /*
  * Raise an IPI cross call on CPUs in callmap.
  */
-extern void smp_cross_call(cpumask_t callmap);
-
-/*
- * Broadcast a timer interrupt to the other CPUs.
- */
-extern void smp_send_timer(void);
+extern void smp_cross_call(const struct cpumask *mask);
 
 /*
  * Broadcast a clock event to other CPUs.
  */
-extern void smp_timer_broadcast(cpumask_t mask);
+extern void smp_timer_broadcast(const struct cpumask *mask);
 
 /*
  * Boot a secondary CPU, and assign it the specified idle task.
@@ -102,7 +97,8 @@ extern int platform_cpu_kill(unsigned int cpu);
 extern void platform_cpu_enable(unsigned int cpu);
 
 extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 
 /*
  * Local timer interrupt handling function (can be IPI'ed).
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 7801aac..6014dfd 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -326,14 +326,14 @@ void __init smp_prepare_boot_cpu(void)
 	per_cpu(cpu_data, cpu).idle = current;
 }
 
-static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg)
+static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
 {
 	unsigned long flags;
 	unsigned int cpu;
 
 	local_irq_save(flags);
 
-	for_each_cpu_mask(cpu, callmap) {
+	for_each_cpu(cpu, mask) {
 		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 
 		spin_lock(&ipi->lock);
@@ -344,19 +344,19 @@ static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg)
 	/*
 	 * Call the platform specific cross-CPU call function.
 	 */
-	smp_cross_call(callmap);
+	smp_cross_call(mask);
 
 	local_irq_restore(flags);
 }
 
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
 	send_ipi_message(mask, IPI_CALL_FUNC);
 }
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE);
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 }
 
 void show_ipi_list(struct seq_file *p)
@@ -498,17 +498,10 @@ asmlinkage void __exception do_IPI(struct pt_regs *regs)
 
 void smp_send_reschedule(int cpu)
 {
-	send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE);
+	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 }
 
-void smp_send_timer(void)
-{
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-	send_ipi_message(mask, IPI_TIMER);
-}
-
-void smp_timer_broadcast(cpumask_t mask)
+void smp_timer_broadcast(const struct cpumask *mask)
 {
 	send_ipi_message(mask, IPI_TIMER);
 }
@@ -517,7 +510,7 @@ void smp_send_stop(void)
 {
 	cpumask_t mask = cpu_online_map;
 	cpu_clear(smp_processor_id(), mask);
-	send_ipi_message(mask, IPI_CPU_STOP);
+	send_ipi_message(&mask, IPI_CPU_STOP);
 }
 
 /*
@@ -528,20 +521,17 @@ int setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 }
 
-static int
-on_each_cpu_mask(void (*func)(void *), void *info, int wait, cpumask_t mask)
+static void
+on_each_cpu_mask(void (*func)(void *), void *info, int wait,
+		const struct cpumask *mask)
 {
-	int ret = 0;
-
 	preempt_disable();
 
-	ret = smp_call_function_mask(mask, func, info, wait);
-	if (cpu_isset(smp_processor_id(), mask))
+	smp_call_function_many(mask, func, info, wait);
+	if (cpumask_test_cpu(smp_processor_id(), mask))
 		func(info);
 
 	preempt_enable();
-
-	return ret;
 }
 
 /**********************************************************************/
@@ -602,20 +592,17 @@ void flush_tlb_all(void)
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	cpumask_t mask = mm->cpu_vm_mask;
-
-	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mask);
+	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 {
-	cpumask_t mask = vma->vm_mm->cpu_vm_mask;
 	struct tlb_args ta;
 
 	ta.ta_vma = vma;
 	ta.ta_start = uaddr;
 
-	on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mask);
+	on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
 }
 
 void flush_tlb_kernel_page(unsigned long kaddr)
@@ -630,14 +617,13 @@ void flush_tlb_kernel_page(unsigned long kaddr)
 void flush_tlb_range(struct vm_area_struct *vma,
                      unsigned long start, unsigned long end)
 {
-	cpumask_t mask = vma->vm_mm->cpu_vm_mask;
 	struct tlb_args ta;
 
 	ta.ta_vma = vma;
 	ta.ta_start = start;
 	ta.ta_end = end;
 
-	on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mask);
+	on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/arm/mach-realview/include/mach/smp.h b/arch/arm/mach-realview/include/mach/smp.h
index 515819e..e2ff534 100644
--- a/arch/arm/mach-realview/include/mach/smp.h
+++ b/arch/arm/mach-realview/include/mach/smp.h
@@ -15,15 +15,15 @@
 /*
  * We use IRQ1 as the IPI
  */
-static inline void smp_cross_call(cpumask_t callmap)
+static inline void smp_cross_call(const struct cpumask *mask)
 {
-	gic_raise_softirq(callmap, 1);
+	gic_raise_softirq(mask, 1);
 }
 
 /*
  * Do nothing on MPcore.
  */
-static inline void smp_cross_call_done(cpumask_t callmap)
+static inline void smp_cross_call_done(const struct cpumask *mask)
 {
 }
 
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index ea3c755..a573046 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -82,7 +82,7 @@ void __cpuinit platform_secondary_init(unsigned int cpu)
 	 * to get this processor out of WFI in the BootMonitor - make
 	 * sure that we are no longer being sent this soft interrupt
 	 */
-	smp_cross_call_done(cpumask_of_cpu(cpu));
+	smp_cross_call_done(cpumask_of(cpu));
 
 	/*
 	 * if any interrupts are already enabled for the primary
@@ -136,7 +136,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * Use smp_cross_call() for this, since there's little
 	 * point duplicating the code here
 	 */
-	smp_cross_call(cpumask_of_cpu(cpu));
+	smp_cross_call(cpumask_of(cpu));
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
-- 
cgit v0.10.2


From e8e7526c3c0863be25ab03a0871ee0978de5ba50 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sun, 17 May 2009 17:22:53 +0200
Subject: ide-tape: fix debug call

This error only occurs when IDETAPE_DEBUG_LOG is enabled.

Signed-off-by: Mark de Wever <koraq@xs4all.nl>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8dfc688..203bbea 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -617,7 +617,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	struct ide_cmd cmd;
 	u8 stat;
 
-	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n"
+	debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n",
 		  (unsigned long long)rq->sector, rq->nr_sectors);
 
 	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
-- 
cgit v0.10.2


From 6029336426a2b43e4bc6f4a84be8789a047d139e Mon Sep 17 00:00:00 2001
From: Joao Ramos <joao.ramos@inov.pt>
Date: Sun, 17 May 2009 17:22:54 +0200
Subject: ide: try to use PIO Mode 0 during probe if possible

Initially set PIO Mode 0 for all host drivers that have a 'set_pio_mode'
method before the IDE core figures out the most suited PIO mode for the
attached device.

Signed-off-by: Joao Ramos <joao.ramos@inov.pt>
Cc: Sergei Shtylyov <sshtylyov@ru.montavista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 7f264ed..b609a58 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1032,6 +1032,15 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
 		if (port_ops && port_ops->init_dev)
 			port_ops->init_dev(drive);
 	}
+
+	ide_port_for_each_dev(i, drive, hwif) {
+		/*
+		 * default to PIO Mode 0 before we figure out
+		 * the most suited mode for the attached device
+		 */
+		if (port_ops && port_ops->set_pio_mode)
+			port_ops->set_pio_mode(drive, 0);
+	}
 }
 
 static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
-- 
cgit v0.10.2


From 78d236c2b30d4712c1fd8c9768b163c94b39e77d Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 17 May 2009 16:23:45 +0100
Subject: [ARM] realview: remove useless smp_cross_call_done()

smp_cross_call_done() is a no-op for MPCore, and since it's only
used by platform code, there's no point in having it unless it's
doing something.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-realview/include/mach/smp.h b/arch/arm/mach-realview/include/mach/smp.h
index e2ff534..dd53892 100644
--- a/arch/arm/mach-realview/include/mach/smp.h
+++ b/arch/arm/mach-realview/include/mach/smp.h
@@ -20,11 +20,4 @@ static inline void smp_cross_call(const struct cpumask *mask)
 	gic_raise_softirq(mask, 1);
 }
 
-/*
- * Do nothing on MPcore.
- */
-static inline void smp_cross_call_done(const struct cpumask *mask)
-{
-}
-
 #endif
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index a573046..b037fd6 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -78,13 +78,6 @@ void __cpuinit platform_secondary_init(unsigned int cpu)
 	trace_hardirqs_off();
 
 	/*
-	 * the primary core may have used a "cross call" soft interrupt
-	 * to get this processor out of WFI in the BootMonitor - make
-	 * sure that we are no longer being sent this soft interrupt
-	 */
-	smp_cross_call_done(cpumask_of(cpu));
-
-	/*
 	 * if any interrupts are already enabled for the primary
 	 * core (e.g. timer irq), then they will not have been enabled
 	 * for us: do so
-- 
cgit v0.10.2


From ee348d5a1d810bc9958cabb7c27302aab235d36e Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 17 May 2009 17:00:47 +0100
Subject: [ARM] realview: fix broadcast tick support

Having discussed broadcast tick support with Thomas Glexiner, the
broadcast tick devices should be registered with a higher rating
than the global tick device, and it should have the ONESHOT and
PERIODIC feature flags set.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Thomas Glexiner <tglx@linutronix.de>

diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 942e1a7..076acbc 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -750,14 +750,6 @@ void __init realview_timer_init(unsigned int timer_irq)
 {
 	u32 val;
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-	/*
-	 * The dummy clock device has to be registered before the main device
-	 * so that the latter will broadcast the clock events
-	 */
-	local_timer_setup();
-#endif
-
 	/* 
 	 * set clock frequency: 
 	 *	REALVIEW_REFCLK is 32KHz
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index d0d39ad..1c01d13 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -189,8 +189,10 @@ void __cpuinit local_timer_setup(void)
 	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
 
 	clk->name		= "dummy_timer";
-	clk->features		= CLOCK_EVT_FEAT_DUMMY;
-	clk->rating		= 200;
+	clk->features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_PERIODIC |
+				  CLOCK_EVT_FEAT_DUMMY;
+	clk->rating		= 400;
 	clk->mult               = 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index b037fd6..30a9c68 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -217,11 +217,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	if (max_cpus > ncores)
 		max_cpus = ncores;
 
-#ifdef CONFIG_LOCAL_TIMERS
+#if defined(CONFIG_LOCAL_TIMERS) || defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)
 	/*
-	 * Enable the local timer for primary CPU. If the device is
-	 * dummy (!CONFIG_LOCAL_TIMERS), it was already registers in
-	 * realview_timer_init
+	 * Enable the local timer or broadcast device for the boot CPU.
 	 */
 	local_timer_setup();
 #endif
-- 
cgit v0.10.2


From 5f582c8e2193e3848039de87e6a3ace7cbc7ed88 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:18 +0200
Subject: ide: BUG() on unknown flags in ide_disk_special()

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a0309ea..ef806ab 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -201,12 +201,8 @@ static ide_startstop_t ide_disk_special(ide_drive_t *drive)
 	} else if (s->b.set_multmode) {
 		s->b.set_multmode = 0;
 		ide_tf_set_setmult_cmd(drive, &cmd.tf);
-	} else if (s->all) {
-		int special = s->all;
-		s->all = 0;
-		printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
-		return ide_stopped;
-	}
+	} else
+		BUG();
 
 	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
 	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-- 
cgit v0.10.2


From 582078ee3d7dacd74a7b3fe02ea258cadf32b602 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:18 +0200
Subject: ide: merge ide_disk_special() into do_special() (v2)

While at it:
- change debug printk() level to KERN_DEBUG and use __func__
- update documentation

v2:
- fix DEBUG build (noticed by Sergei)

There should be no functional changes caused by this patch.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ef806ab..1855768 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -184,11 +184,28 @@ static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
 	tf->command = ATA_CMD_SET_MULTI;
 }
 
-static ide_startstop_t ide_disk_special(ide_drive_t *drive)
+/**
+ *	do_special		-	issue some special commands
+ *	@drive: drive the command is for
+ *
+ *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
+ *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
+ */
+
+static ide_startstop_t do_special(ide_drive_t *drive)
 {
 	special_t *s = &drive->special;
 	struct ide_cmd cmd;
 
+#ifdef DEBUG
+	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__, s->all);
+#endif
+	if (drive->media != ide_disk) {
+		s->all = 0;
+		drive->mult_req = 0;
+		return ide_stopped;
+	}
+
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.protocol = ATA_PROT_NODATA;
 
@@ -213,31 +230,6 @@ static ide_startstop_t ide_disk_special(ide_drive_t *drive)
 	return ide_started;
 }
 
-/**
- *	do_special		-	issue some special commands
- *	@drive: drive the command is for
- *
- *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
- *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
- *
- *	It used to do much more, but has been scaled back.
- */
-
-static ide_startstop_t do_special (ide_drive_t *drive)
-{
-	special_t *s = &drive->special;
-
-#ifdef DEBUG
-	printk("%s: do_special: 0x%02x\n", drive->name, s->all);
-#endif
-	if (drive->media == ide_disk)
-		return ide_disk_special(drive);
-
-	s->all = 0;
-	drive->mult_req = 0;
-	return ide_stopped;
-}
-
 void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
 {
 	ide_hwif_t *hwif = drive->hwif;
-- 
cgit v0.10.2


From ca1b96e00ab5d1b0838965834469a0284c81a517 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:21 +0200
Subject: ide: replace special_t typedef by IDE_SFLAG_* flags

Replace:
- special_t typedef by IDE_SFLAG_* flags
- 'special_t special' ide_drive_t's field by 'u8 special_flags' one

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index c243880..d345f5f 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -428,14 +428,14 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
 		return -EINVAL;
 
-	if (drive->special.b.set_multmode)
+	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
-	drive->special.b.set_multmode = 1;
+	drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
 	blk_put_request(rq);
 
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 5d5fb96..39d5892 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -52,7 +52,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
 	}
 
 	if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
-		drive->special.b.recalibrate = 1;
+		drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 
 	++rq->errors;
 
@@ -268,9 +268,8 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
 {
 	int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
 
-	drive->special.all = 0;
-	drive->special.b.set_geometry = legacy;
-	drive->special.b.recalibrate  = legacy;
+	drive->special_flags =
+		legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0;
 
 	drive->mult_count = 0;
 	drive->dev_flags &= ~IDE_DFLAG_PARKED;
@@ -280,7 +279,7 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
 		drive->mult_req = 0;
 
 	if (drive->mult_req != drive->mult_count)
-		drive->special.b.set_multmode = 1;
+		drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 }
 
 static void pre_reset(ide_drive_t *drive)
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1855768..644d7b4 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -194,14 +194,14 @@ static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
 
 static ide_startstop_t do_special(ide_drive_t *drive)
 {
-	special_t *s = &drive->special;
 	struct ide_cmd cmd;
 
 #ifdef DEBUG
-	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__, s->all);
+	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__,
+		drive->special_flags);
 #endif
 	if (drive->media != ide_disk) {
-		s->all = 0;
+		drive->special_flags = 0;
 		drive->mult_req = 0;
 		return ide_stopped;
 	}
@@ -209,14 +209,14 @@ static ide_startstop_t do_special(ide_drive_t *drive)
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.protocol = ATA_PROT_NODATA;
 
-	if (s->b.set_geometry) {
-		s->b.set_geometry = 0;
+	if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) {
+		drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY;
 		ide_tf_set_specify_cmd(drive, &cmd.tf);
-	} else if (s->b.recalibrate) {
-		s->b.recalibrate = 0;
+	} else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) {
+		drive->special_flags &= ~IDE_SFLAG_RECALIBRATE;
 		ide_tf_set_restore_cmd(drive, &cmd.tf);
-	} else if (s->b.set_multmode) {
-		s->b.set_multmode = 0;
+	} else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) {
+		drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE;
 		ide_tf_set_setmult_cmd(drive, &cmd.tf);
 	} else
 		BUG();
@@ -339,7 +339,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
 		return startstop;
 	}
-	if (!drive->special.all) {
+
+	if (drive->special_flags == 0) {
 		struct ide_driver *drv;
 
 		/*
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index b609a58..727a671 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -97,7 +97,7 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
 		drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
 
 		if (drive->mult_req)
-			drive->special.b.set_multmode = 1;
+			drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 	}
 }
 
@@ -1138,8 +1138,8 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 		drive->hwif			= hwif;
 		drive->ready_stat		= ATA_DRDY;
 		drive->bad_wstat		= BAD_W_STAT;
-		drive->special.b.recalibrate	= 1;
-		drive->special.b.set_geometry	= 1;
+		drive->special_flags		= IDE_SFLAG_RECALIBRATE |
+						  IDE_SFLAG_SET_GEOMETRY;
 		drive->name[0]			= 'h';
 		drive->name[1]			= 'd';
 		drive->name[2]			= 'a' + j;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index f400eb4..8cab3c2 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -166,7 +166,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
 	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
 		if (custom && tf->command == ATA_CMD_SET_MULTI) {
 			drive->mult_req = drive->mult_count = 0;
-			drive->special.b.recalibrate = 1;
+			drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 			(void)ide_dump_status(drive, __func__, stat);
 			return ide_stopped;
 		} else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) {
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index e4973cd..bd82d22 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -451,8 +451,8 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 static void sil_sata_pre_reset(ide_drive_t *drive)
 {
 	if (drive->media == ide_disk) {
-		drive->special.b.set_geometry = 0;
-		drive->special.b.recalibrate = 0;
+		drive->special_flags &=
+			~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE);
 	}
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 34c128f..fc61328 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -218,21 +218,12 @@ static inline void ide_std_init_ports(hw_regs_t *hw,
 
 /*
  * Special Driver Flags
- *
- * set_geometry	: respecify drive geometry
- * recalibrate	: seek to cyl 0
- * set_multmode	: set multmode count
- * reserved	: unused
  */
-typedef union {
-	unsigned all			: 8;
-	struct {
-		unsigned set_geometry	: 1;
-		unsigned recalibrate	: 1;
-		unsigned set_multmode	: 1;
-		unsigned reserved	: 5;
-	} b;
-} special_t;
+enum {
+	IDE_SFLAG_SET_GEOMETRY		= (1 << 0),
+	IDE_SFLAG_RECALIBRATE		= (1 << 1),
+	IDE_SFLAG_SET_MULTMODE		= (1 << 2),
+};
 
 /*
  * Status returned from various ide_ functions
@@ -530,7 +521,7 @@ struct ide_drive_s {
 	unsigned long sleep;		/* sleep until this time */
 	unsigned long timeout;		/* max time to wait for irq */
 
-	special_t	special;	/* special action flags */
+	u8	special_flags;		/* special action flags */
 
 	u8	select;			/* basic drive/head select reg value */
 	u8	retry_pio;		/* retrying dma capable host in pio */
-- 
cgit v0.10.2


From 29e52cf793ded6bece50de50e738596f94f07d9f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:22 +0200
Subject: ide: remove chipset field from hw_regs_t

* Convert host drivers that still use hw_regs_t's chipset field to use
  the one in struct ide_port_info instead.

* Move special handling of ide_pci chipset type from ide_hw_configure()
  to ide_init_port().

* Remove chipset field from hw_regs_t.

While at it:
- remove stale comment in delkin_cb.c

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 403d0e4..8d39cc9 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -216,6 +216,7 @@ static const struct ide_port_info at91_ide_port_info __initdata = {
 	.host_flags 	= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE |
 			  IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS,
 	.pio_mask 	= ATA_PIO6,
+	.chipset	= ide_generic,
 };
 
 /*
@@ -304,7 +305,6 @@ static int __init at91_ide_probe(struct platform_device *pdev)
 		ide_std_init_ports(&hw, tf_base, ctl_base + 6);
 
 	hw.irq = board->irq_pin;
-	hw.chipset = ide_generic;
 	hw.dev = &pdev->dev;
 
 	host = ide_host_alloc(&at91_ide_port_info, hws);
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 4601364..9b31f83 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -499,6 +499,7 @@ static const struct ide_port_info au1xxx_port_info = {
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	.mwdma_mask		= ATA_MWDMA2,
 #endif
+	.chipset		= ide_au1xxx,
 };
 
 static int au_ide_probe(struct platform_device *dev)
@@ -548,7 +549,6 @@ static int au_ide_probe(struct platform_device *dev)
 	auide_setup_ports(&hw, ahwif);
 	hw.irq = ahwif->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_au1xxx;
 
 	ret = ide_host_add(&au1xxx_port_info, hws, &host);
 	if (ret)
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index d028f88..9aa2cd9 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -139,13 +139,12 @@ static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info buddha_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
     /*
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 8890276..e862a25 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -762,11 +762,9 @@ static int __init cmd640x_init(void)
 
 	ide_std_init_ports(&hw[0], 0x1f0, 0x3f6);
 	hw[0].irq = 14;
-	hw[0].chipset = ide_cmd640;
 
 	ide_std_init_ports(&hw[1], 0x170, 0x376);
 	hw[1].irq = 15;
-	hw[1].chipset = ide_cmd640;
 
 	printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x"
 			 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr);
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index f153b95..a0de834 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -68,6 +68,7 @@ static const struct ide_port_info delkin_cb_port_info = {
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
 	.init_chipset		= delkin_cb_init_chipset,
+	.chipset		= ide_pci,
 };
 
 static int __devinit
@@ -97,7 +98,6 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_pci;		/* this enables IRQ sharing */
 
 	rc = ide_host_add(&delkin_cb_port_info, hws, &host);
 	if (rc)
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 0e2df67..770cfa6 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -111,6 +111,7 @@ static const struct ide_port_info falconide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 static void __init falconide_setup_ports(hw_regs_t *hw)
@@ -128,8 +129,6 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
 
 	hw->irq = IRQ_MFP_IDE;
 	hw->ack_intr = NULL;
-
-	hw->chipset = ide_generic;
 }
 
     /*
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index c711951..71db2f9 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -106,14 +106,13 @@ static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info gayle_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
     /*
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 36da913..6352a44 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -398,11 +398,11 @@ static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
 
 	hw->irq = ec->irq;
 	hw->dev = &ec->dev;
-	hw->chipset = ide_acorn;
 }
 
 static const struct ide_port_info icside_v5_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_acorn,
 };
 
 static int __devinit
@@ -457,6 +457,7 @@ static const struct ide_port_info icside_v6_port_info __initdata = {
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
 	.mwdma_mask		= ATA_MWDMA2,
 	.swdma_mask		= ATA_SWDMA2,
+	.chipset		= ide_acorn,
 };
 
 static int __devinit
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 78aca75..617ca7a 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -25,6 +25,7 @@ static const struct ide_port_info ide_4drives_port_info = {
 	.port_ops		= &ide_4drives_port_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA |
 				  IDE_HFLAG_4DRIVES,
+	.chipset		= ide_4drives,
 };
 
 static int __init ide_4drives_init(void)
@@ -52,7 +53,6 @@ static int __init ide_4drives_init(void)
 
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = 14;
-	hw.chipset = ide_4drives;
 
 	return ide_host_add(&ide_4drives_port_info, hws, NULL);
 }
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 9e47f35..43d09dc 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -155,6 +155,7 @@ static const struct ide_port_info idecs_port_info = {
 	.port_ops		= &idecs_port_ops,
 	.host_flags		= IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_pci,
 };
 
 static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
@@ -181,7 +182,6 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     memset(&hw, 0, sizeof(hw));
     ide_std_init_ports(&hw, io, ctl);
     hw.irq = irq;
-    hw.chipset = ide_pci;
     hw.dev = &handle->dev;
 
     rc = ide_host_add(&idecs_port_info, hws, &host);
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 7812ca0..0427759 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -29,6 +29,7 @@ MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports");
 
 static const struct ide_port_info ide_generic_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 #ifdef CONFIG_ARM
@@ -132,8 +133,6 @@ static int __init ide_generic_init(void)
 #else
 			hw.irq = legacy_irqs[i];
 #endif
-			hw.chipset = ide_generic;
-
 			rc = ide_host_add(&ide_generic_port_info, hws, NULL);
 			if (rc) {
 				release_region(io_addr + 0x206, 1);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index c06ebdc..40eff6c 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -73,12 +73,12 @@ static inline void hw_setup(hw_regs_t *hw)
 		hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
 	hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info h8300_port_info = {
 	.tp_ops			= &h8300_tp_ops,
 	.host_flags		= IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int __init h8300_ide_init(void)
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 8c5dcbf..0c5b29c 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -33,7 +33,6 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 
 	ide_std_init_ports(hw, base, ctl);
 	hw->irq = irq;
-	hw->chipset = d->chipset;
 	hw->config = config;
 
 	hws[port_no] = hw;
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 6e80b77..47043fd 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -29,6 +29,7 @@ static struct pnp_device_id idepnp_devices[] = {
 
 static const struct ide_port_info ide_pnp_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
@@ -62,7 +63,6 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = pnp_irq(dev, 0);
-	hw.chipset = ide_generic;
 
 	rc = ide_host_add(&ide_pnp_port_info, hws, &host);
 	if (rc)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 727a671..f17ba19 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1048,8 +1048,7 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 {
 	hwif->channel = port;
 
-	if (d->chipset)
-		hwif->chipset = d->chipset;
+	hwif->chipset = d->chipset ? d->chipset : ide_pci;
 
 	if (d->init_iops)
 		d->init_iops(hwif);
@@ -1178,7 +1177,6 @@ static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
 {
 	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
-	hwif->chipset = hw->chipset;
 	hwif->dev = hw->dev;
 	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
 	hwif->ack_intr = hw->ack_intr;
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 051b4ab..8136533 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -40,12 +40,11 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
 	hw->io_ports.ctl_addr = (unsigned long)ctrl;
 
 	hw->irq = irq;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info platform_ide_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int __devinit plat_ide_probe(struct platform_device *pdev)
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 4b1718e..3af9e96 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -76,13 +76,12 @@ static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info macide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 static const char *mac_ide_name[] =
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 09d813d..a455c25 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -306,6 +306,7 @@ static struct ide_port_info __devinitdata palm_bk3710_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO,
 	.pio_mask		= ATA_PIO4,
 	.mwdma_mask		= ATA_MWDMA2,
+	.chipset		= ide_palm3710,
 };
 
 static int __init palm_bk3710_probe(struct platform_device *pdev)
@@ -363,7 +364,6 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 			(base + IDE_PALM_ATA_PRI_CTL_OFFSET);
 	hw.irq = irq->start;
 	hw.dev = &pdev->dev;
-	hw.chipset = ide_palm3710;
 
 	palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
 							     ATA_UDMA5;
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index c793466..7488d4f 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -70,8 +70,6 @@ static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
@@ -119,6 +117,7 @@ static const struct ide_port_info q40ide_port_info = {
 	.tp_ops			= &q40ide_tp_ops,
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 /* 
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index d5003ca..bd4d7a8 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -13,6 +13,7 @@
 
 static const struct ide_port_info rapide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
@@ -49,7 +50,6 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 
 	memset(&hw, 0, sizeof(hw));
 	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
-	hw.chipset = ide_generic;
 	hw.dev = &ec->dev;
 
 	ret = ide_host_add(&rapide_port_info, hws, &host);
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 5be41f2..9e3aef3 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -567,7 +567,6 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 		hw.io_ports_array[i] = ports->dma + 0x20 + i * 4;
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_pci;
 
 	rc = ide_host_add(d, hws, &host);
 	if (rc)
@@ -823,6 +822,7 @@ static const struct ide_port_info scc_chipset __devinitdata = {
 	.host_flags	= IDE_HFLAG_SINGLE,
 	.irq_flags	= IRQF_SHARED,
 	.pio_mask	= ATA_PIO4,
+	.chipset	= ide_pci,
 };
 
 /**
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 7a3a12d..82519dd 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -344,7 +344,6 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
 
 	memset(hw, 0, sizeof(*hw));
 	hw->dev = &dev->dev;
-	hw->chipset = d->chipset ? d->chipset : ide_pci;
 	ide_std_init_ports(hw, base, ctl | 2);
 
 	return 0;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index e5d2a48..676d41c 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -575,7 +575,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	memset(&hw, 0, sizeof(hw));
 	sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
 	hw.irq = dev->irq;
-	hw.chipset = ide_pci;
 	hw.dev = &dev->dev;
 
 	/* Initializing chipset IRQ Registers */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fc61328..9652edb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -186,7 +186,6 @@ typedef struct hw_regs_s {
 
 	int		irq;			/* our irq number */
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
-	hwif_chipset_t  chipset;
 	struct device	*dev, *parent;
 	unsigned long	config;
 } hw_regs_t;
-- 
cgit v0.10.2


From dca3983059a4481e4ae97bbf0ac4b4c21429e1a5 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:24 +0200
Subject: ide: pass number of ports to ide_host_{alloc,add}() (v2)

Pass number of ports to ide_host_{alloc,add}() and then update
all users accordingly.

v2:
- drop no longer needed NULL initializers in buddha.c, cmd640.c and gayle.c
  (noticed by Sergei)

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 8d39cc9..11fe1ff 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -247,8 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id)
 static int __init at91_ide_probe(struct platform_device *pdev)
 {
 	int ret;
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	unsigned long tf_base = 0, ctl_base = 0;
@@ -307,7 +306,7 @@ static int __init at91_ide_probe(struct platform_device *pdev)
 	hw.irq = board->irq_pin;
 	hw.dev = &pdev->dev;
 
-	host = ide_host_alloc(&at91_ide_port_info, hws);
+	host = ide_host_alloc(&at91_ide_port_info, hws, 1);
 	if (!host) {
 		perr("failed to allocate ide host\n");
 		return -ENOMEM;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 9b31f83..32f5be6 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -508,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev)
 	struct resource *res;
 	struct ide_host *host;
 	int ret = 0;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
@@ -550,7 +550,7 @@ static int au_ide_probe(struct platform_device *dev)
 	hw.irq = ahwif->irq;
 	hw.dev = &dev->dev;
 
-	ret = ide_host_add(&au1xxx_port_info, hws, &host);
+	ret = ide_host_add(&au1xxx_port_info, hws, 1, &host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index 9aa2cd9..0450652 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -160,7 +160,7 @@ static int __init buddha_init(void)
 
 	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
 		unsigned long board;
-		hw_regs_t hw[MAX_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+		hw_regs_t hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
 
 		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
 			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
@@ -224,7 +224,7 @@ fail_base2:
 			hws[i] = &hw[i];
 		}
 
-		ide_host_add(&buddha_port_info, hws, NULL);
+		ide_host_add(&buddha_port_info, hws, i, NULL);
 	}
 
 	return 0;
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index e862a25..edb3a7a 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@ static int __init cmd640x_init(void)
 	int second_port_cmd640 = 0, rc;
 	const char *bus_type, *port2;
 	u8 b, cfr;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[2];
 
 	if (cmd640_vlb && probe_for_cmd640_vlb()) {
 		bus_type = "VLB";
@@ -822,7 +822,8 @@ static int __init cmd640x_init(void)
 	cmd640_dump_regs();
 #endif
 
-	return ide_host_add(&cmd640_port_info, hws, NULL);
+	return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1,
+			    NULL);
 }
 
 module_param_named(probe_vlb, cmd640_vlb, bool, 0);
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 87987a7..a9023d7 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = {
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct ide_port_info *d = &cyrix_chipset;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
 
@@ -136,7 +136,7 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
 	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
 	hw[0].irq = 14;
 
-	return ide_host_add(d, hws, NULL);
+	return ide_host_add(d, hws, 2, NULL);
 }
 
 static const struct pci_device_id cs5520_pci_tbl[] = {
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index a0de834..d4a76f2 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -77,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	struct ide_host *host;
 	unsigned long base;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	rc = pci_enable_device(dev);
 	if (rc) {
@@ -99,7 +99,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
 
-	rc = ide_host_add(&delkin_cb_port_info, hws, &host);
+	rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host);
 	if (rc)
 		goto out_disable;
 
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 770cfa6..adb5b0c 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -138,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
 static int __init falconide_init(void)
 {
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int rc;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
@@ -153,7 +153,7 @@ static int __init falconide_init(void)
 
 	falconide_setup_ports(&hw);
 
-	host = ide_host_alloc(&falconide_port_info, hws);
+	host = ide_host_alloc(&falconide_port_info, hws, 1);
 	if (host == NULL) {
 		rc = -ENOMEM;
 		goto err;
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index 71db2f9..253ff34 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -125,7 +125,7 @@ static int __init gayle_init(void)
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
     int a4000, i, rc;
-    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
 
     if (!MACH_IS_AMIGA)
 	return -ENODEV;
@@ -170,7 +170,7 @@ found:
 	hws[i] = &hw[i];
     }
 
-    rc = ide_host_add(&gayle_port_info, hws, NULL);
+    rc = ide_host_add(&gayle_port_info, hws, i, NULL);
     if (rc)
 	release_mem_region(res_start, res_n);
 
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 6352a44..6223b80 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -410,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 {
 	void __iomem *base;
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int ret;
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -431,7 +431,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 
 	icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
 
-	host = ide_host_alloc(&icside_v5_port_info, hws);
+	host = ide_host_alloc(&icside_v5_port_info, hws, 1);
 	if (host == NULL)
 		return -ENODEV;
 
@@ -467,7 +467,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	struct ide_host *host;
 	unsigned int sel = 0;
 	int ret;
-	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1] };
 	struct ide_port_info d = icside_v6_port_info;
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
@@ -507,7 +507,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
 	icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
 
-	host = ide_host_alloc(&d, hws);
+	host = ide_host_alloc(&d, hws, 2);
 	if (host == NULL)
 		return -ENODEV;
 
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 617ca7a..189b8bd 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -31,7 +31,7 @@ static const struct ide_port_info ide_4drives_port_info = {
 static int __init ide_4drives_init(void)
 {
 	unsigned long base = 0x1f0, ctl = 0x3f6;
-	hw_regs_t hw, *hws[] = { &hw, &hw, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw, &hw };
 
 	if (probe_4drives == 0)
 		return -ENODEV;
@@ -54,7 +54,7 @@ static int __init ide_4drives_init(void)
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = 14;
 
-	return ide_host_add(&ide_4drives_port_info, hws, NULL);
+	return ide_host_add(&ide_4drives_port_info, hws, 2, NULL);
 }
 
 module_init(ide_4drives_init);
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 43d09dc..63309ad 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -164,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     struct ide_host *host;
     ide_hwif_t *hwif;
     int i, rc;
-    hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+    hw_regs_t hw, *hws[] = { &hw };
 
     if (!request_region(io, 8, DRV_NAME)) {
 	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
@@ -184,7 +184,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     hw.irq = irq;
     hw.dev = &handle->dev;
 
-    rc = ide_host_add(&idecs_port_info, hws, &host);
+    rc = ide_host_add(&idecs_port_info, hws, 1, &host);
     if (rc)
 	goto out_release;
 
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 0427759..0d40848 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -86,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
 
 static int __init ide_generic_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	unsigned long io_addr;
 	int i, rc = 0, primary = 0, secondary = 0;
 
@@ -133,7 +133,7 @@ static int __init ide_generic_init(void)
 #else
 			hw.irq = legacy_irqs[i];
 #endif
-			rc = ide_host_add(&ide_generic_port_info, hws, NULL);
+			rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL);
 			if (rc) {
 				release_region(io_addr + 0x206, 1);
 				release_region(io_addr, 8);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index 40eff6c..0b5fabe 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -83,7 +83,7 @@ static const struct ide_port_info h8300_port_info = {
 
 static int __init h8300_ide_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
 
@@ -96,7 +96,7 @@ static int __init h8300_ide_init(void)
 
 	hw_setup(&hw);
 
-	return ide_host_add(&h8300_port_info, hws, NULL);
+	return ide_host_add(&h8300_port_info, hws, 1, NULL);
 
 out_busy:
 	printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n");
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 0c5b29c..98389e5 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -40,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 
 int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 {
-	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL };
 
 	memset(&hw, 0, sizeof(hw));
 
@@ -52,6 +52,6 @@ int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 	    (d->host_flags & IDE_HFLAG_SINGLE))
 		return -ENOENT;
 
-	return ide_host_add(d, hws, NULL);
+	return ide_host_add(d, hws, 2, NULL);
 }
 EXPORT_SYMBOL_GPL(ide_legacy_device_add);
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 47043fd..6bca0f0 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -37,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	struct ide_host *host;
 	unsigned long base, ctl;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
 
@@ -64,7 +64,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = pnp_irq(dev, 0);
 
-	rc = ide_host_add(&ide_pnp_port_info, hws, &host);
+	rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host);
 	if (rc)
 		goto out;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f17ba19..6c7451a 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1261,7 +1261,8 @@ out_nomem:
 	return -ENOMEM;
 }
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
+struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws,
+				unsigned int n_ports)
 {
 	struct ide_host *host;
 	struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1272,7 +1273,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
 	if (host == NULL)
 		return NULL;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
+	for (i = 0; i < n_ports; i++) {
 		ide_hwif_t *hwif;
 		int idx;
 
@@ -1443,12 +1444,12 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 EXPORT_SYMBOL_GPL(ide_host_register);
 
 int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
-		 struct ide_host **hostp)
+		 unsigned int n_ports, struct ide_host **hostp)
 {
 	struct ide_host *host;
 	int rc;
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, n_ports);
 	if (host == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 8136533..47413c2 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -54,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	struct pata_platform_info *pdata;
 	struct ide_host *host;
 	int ret = 0, mmio = 0;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_port_info d = platform_ide_port_info;
 
 	pdata = pdev->dev.platform_data;
@@ -98,7 +98,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	if (mmio)
 		d.host_flags |= IDE_HFLAG_MMIO;
 
-	ret = ide_host_add(&d, hws, &host);
+	ret = ide_host_add(&d, hws, 1, &host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 3af9e96..31aa278 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -96,7 +96,7 @@ static int __init macide_init(void)
 	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
@@ -126,7 +126,7 @@ static int __init macide_init(void)
 
 	macide_setup_ports(&hw, base, irq, ack_intr);
 
-	return ide_host_add(&macide_port_info, hws, NULL);
+	return ide_host_add(&macide_port_info, hws, 1, NULL);
 }
 
 module_init(macide_init);
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index a455c25..4507a6d 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -316,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 	void __iomem *base;
 	unsigned long rate, mem_size;
 	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
@@ -369,7 +369,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 							     ATA_UDMA5;
 
 	/* Register the IDE interface with Linux */
-	rc = ide_host_add(&palm_bk3710_port_info, hws, NULL);
+	rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL);
 	if (rc)
 		goto out;
 
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index f76e4e6..f4f8064 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1029,7 +1029,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 	const int *bidp;
 	struct ide_host *host;
 	ide_hwif_t *hwif;
-	hw_regs_t *hws[] = { hw, NULL, NULL, NULL };
+	hw_regs_t *hws[] = { hw };
 	struct ide_port_info d = pmac_port_info;
 	int rc;
 
@@ -1077,7 +1077,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 	/* Make sure we have sane timings */
 	sanitize_timings(pmif);
 
-	host = ide_host_alloc(&d, hws);
+	host = ide_host_alloc(&d, hws, 1);
 	if (host == NULL)
 		return -ENOMEM;
 	hwif = host->ports[0];
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index 7488d4f..e46229f 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -135,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
 static int __init q40ide_init(void)
 {
     int i;
-    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
 
     if (!MACH_IS_Q40)
       return -ENODEV;
@@ -162,7 +162,7 @@ static int __init q40ide_init(void)
 	hws[i] = &hw[i];
     }
 
-    return ide_host_add(&q40ide_port_info, hws, NULL);
+    return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL);
 }
 
 module_init(q40ide_init);
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index bd4d7a8..c4da3dd 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -36,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	void __iomem *base;
 	struct ide_host *host;
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
@@ -52,7 +52,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
 	hw.dev = &ec->dev;
 
-	ret = ide_host_add(&rapide_port_info, hws, &host);
+	ret = ide_host_add(&rapide_port_info, hws, 1, &host);
 	if (ret)
 		goto release;
 
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 9e3aef3..9415f8c 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 {
 	struct scc_ports *ports = pci_get_drvdata(dev);
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int i, rc;
 
 	memset(&hw, 0, sizeof(hw));
@@ -568,7 +568,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
 
-	rc = ide_host_add(d, hws, &host);
+	rc = ide_host_add(d, hws, 1, &host);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 82519dd..d78f4c9 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -538,7 +538,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 		     void *priv)
 {
 	struct ide_host *host;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL };
 	int ret;
 
 	ret = ide_setup_pci_controller(dev, d, 1);
@@ -547,7 +547,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 
 	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, 2);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
@@ -596,7 +596,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
 	}
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, 4);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index 676d41c..3f8ee35 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	unsigned long cmd_base, irqport;
 	unsigned long bar0, cmd_phys_base, ctl;
 	void __iomem *virt_base;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int rc;
 
 	/*  Get the CmdBlk and CtrlBlk Base Registers */
@@ -580,7 +580,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	/* Initializing chipset IRQ Registers */
 	writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4));
 
-	rc = ide_host_add(&sgiioc4_port_info, hws, NULL);
+	rc = ide_host_add(&sgiioc4_port_info, hws, 1, NULL);
 	if (!rc)
 		return 0;
 
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index e33d764..16adc18 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -130,8 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
@@ -183,7 +182,7 @@ static int __init tx4938ide_probe(struct platform_device *pdev)
 		tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0);
 	else
 		d.port_ops = NULL;
-	ret = ide_host_add(&d, hws, &host);
+	ret = ide_host_add(&d, hws, 1, &host);
 	if (!ret)
 		platform_set_drvdata(pdev, host);
 	return ret;
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 564422d..fa57920 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -537,8 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	int irq, ret;
@@ -581,7 +580,7 @@ static int __init tx4939ide_probe(struct platform_device *pdev)
 	hw.dev = &pdev->dev;
 
 	pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq);
-	host = ide_host_alloc(&tx4939ide_port_info, hws);
+	host = ide_host_alloc(&tx4939ide_port_info, hws, 1);
 	if (!host)
 		return -ENOMEM;
 	/* use extra_base for base address of the all registers */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9652edb..a3cd568 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1456,11 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *);
 void ide_port_apply_params(ide_hwif_t *);
 int ide_sysfs_register_port(ide_hwif_t *);
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
+struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **,
+				unsigned int);
 void ide_host_free(struct ide_host *);
 int ide_host_register(struct ide_host *, const struct ide_port_info *,
 		      hw_regs_t **);
-int ide_host_add(const struct ide_port_info *, hw_regs_t **,
+int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int,
 		 struct ide_host **);
 void ide_host_remove(struct ide_host *);
 int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
-- 
cgit v0.10.2


From 9f36d31437922354d104a2db407f397e79e4027e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:25 +0200
Subject: ide: remove hw_regs_t typedef

Remove hw_regs_t typedef and rename struct hw_regs_s to struct ide_hw.

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 11fe1ff..fc0949a 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -247,7 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id)
 static int __init at91_ide_probe(struct platform_device *pdev)
 {
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	unsigned long tf_base = 0, ctl_base = 0;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 32f5be6..58121bd 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -449,7 +449,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 }
 #endif
 
-static void auide_setup_ports(hw_regs_t *hw, _auide_hwif *ahwif)
+static void auide_setup_ports(struct ide_hw *hw, _auide_hwif *ahwif)
 {
 	int i;
 	unsigned long *ata_regs = hw->io_ports_array;
@@ -508,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev)
 	struct resource *res;
 	struct ide_host *host;
 	int ret = 0;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index 0450652..e3c6a59 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -121,7 +121,7 @@ static int xsurf_ack_intr(ide_hwif_t *hwif)
     return 1;
 }
 
-static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
 				      unsigned long ctl, unsigned long irq_port,
 				      ide_ack_intr_t *ack_intr)
 {
@@ -160,7 +160,7 @@ static int __init buddha_init(void)
 
 	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
 		unsigned long board;
-		hw_regs_t hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
+		struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
 
 		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
 			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index edb3a7a..1683ed5 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@ static int __init cmd640x_init(void)
 	int second_port_cmd640 = 0, rc;
 	const char *bus_type, *port2;
 	u8 b, cfr;
-	hw_regs_t hw[2], *hws[2];
+	struct ide_hw hw[2], *hws[2];
 
 	if (cmd640_vlb && probe_for_cmd640_vlb()) {
 		bus_type = "VLB";
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index a9023d7..bd066bb 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = {
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct ide_port_info *d = &cyrix_chipset;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
 
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index d4a76f2..1e10eba 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -77,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	struct ide_host *host;
 	unsigned long base;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	rc = pci_enable_device(dev);
 	if (rc) {
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index adb5b0c..22fa273 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -114,7 +114,7 @@ static const struct ide_port_info falconide_port_info = {
 	.chipset		= ide_generic,
 };
 
-static void __init falconide_setup_ports(hw_regs_t *hw)
+static void __init falconide_setup_ports(struct ide_hw *hw)
 {
 	int i;
 
@@ -138,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
 static int __init falconide_init(void)
 {
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int rc;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index 253ff34..4451a6a 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -88,7 +88,7 @@ static int gayle_ack_intr_a1200(ide_hwif_t *hwif)
     return 1;
 }
 
-static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
 				     unsigned long ctl, unsigned long irq_port,
 				     ide_ack_intr_t *ack_intr)
 {
@@ -125,7 +125,7 @@ static int __init gayle_init(void)
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
     int a4000, i, rc;
-    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
+    struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
 
     if (!MACH_IS_AMIGA)
 	return -ENODEV;
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 6223b80..c5269fa 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -381,7 +381,7 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 	return -EOPNOTSUPP;
 }
 
-static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
+static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
 			       struct cardinfo *info, struct expansion_card *ec)
 {
 	unsigned long port = (unsigned long)base + info->dataoffset;
@@ -410,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 {
 	void __iomem *base;
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int ret;
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -467,7 +467,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	struct ide_host *host;
 	unsigned int sel = 0;
 	int ret;
-	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1] };
+	struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] };
 	struct ide_port_info d = icside_v6_port_info;
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 189b8bd..979d342 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -31,7 +31,7 @@ static const struct ide_port_info ide_4drives_port_info = {
 static int __init ide_4drives_init(void)
 {
 	unsigned long base = 0x1f0, ctl = 0x3f6;
-	hw_regs_t hw, *hws[] = { &hw, &hw };
+	struct ide_hw hw, *hws[] = { &hw, &hw };
 
 	if (probe_4drives == 0)
 		return -ENODEV;
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 63309ad..527908f 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -164,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     struct ide_host *host;
     ide_hwif_t *hwif;
     int i, rc;
-    hw_regs_t hw, *hws[] = { &hw };
+    struct ide_hw hw, *hws[] = { &hw };
 
     if (!request_region(io, 8, DRV_NAME)) {
 	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 0d40848..54d7c46 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -86,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
 
 static int __init ide_generic_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	unsigned long io_addr;
 	int i, rc = 0, primary = 0, secondary = 0;
 
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index 0b5fabe..520f42c 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -64,11 +64,11 @@ static const struct ide_tp_ops h8300_tp_ops = {
 
 #define H8300_IDE_GAP (2)
 
-static inline void hw_setup(hw_regs_t *hw)
+static inline void hw_setup(struct ide_hw *hw)
 {
 	int i;
 
-	memset(hw, 0, sizeof(hw_regs_t));
+	memset(hw, 0, sizeof(*hw));
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
@@ -83,7 +83,7 @@ static const struct ide_port_info h8300_port_info = {
 
 static int __init h8300_ide_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
 
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 98389e5..b9654a7 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -1,7 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 
-static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
+static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
 				u8 port_no, const struct ide_port_info *d,
 				unsigned long config)
 {
@@ -40,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 
 int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 {
-	hw_regs_t hw[2], *hws[] = { NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 
 	memset(&hw, 0, sizeof(hw));
 
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 6bca0f0..017b1df 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -37,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	struct ide_host *host;
 	unsigned long base, ctl;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 6c7451a..2936382 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1173,7 +1173,7 @@ static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
 	ide_port_init_devices_data(hwif);
 }
 
-static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
+static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
 {
 	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
@@ -1261,8 +1261,8 @@ out_nomem:
 	return -ENOMEM;
 }
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws,
-				unsigned int n_ports)
+struct ide_host *ide_host_alloc(const struct ide_port_info *d,
+				struct ide_hw **hws, unsigned int n_ports)
 {
 	struct ide_host *host;
 	struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1349,7 +1349,7 @@ static void ide_disable_port(ide_hwif_t *hwif)
 }
 
 int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
-		      hw_regs_t **hws)
+		      struct ide_hw **hws)
 {
 	ide_hwif_t *hwif, *mate = NULL;
 	int i, j = 0;
@@ -1443,7 +1443,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 }
 EXPORT_SYMBOL_GPL(ide_host_register);
 
-int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
+int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws,
 		 unsigned int n_ports, struct ide_host **hostp)
 {
 	struct ide_host *host;
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 47413c2..ee9b55e 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -21,7 +21,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
-static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
+static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
 					   void __iomem *base,
 					   void __iomem *ctrl,
 					   struct pata_platform_info *pdata,
@@ -54,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	struct pata_platform_info *pdata;
 	struct ide_host *host;
 	int ret = 0, mmio = 0;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_port_info d = platform_ide_port_info;
 
 	pdata = pdev->dev.platform_data;
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 31aa278..1447c8c 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -62,7 +62,7 @@ int macide_ack_intr(ide_hwif_t* hwif)
 	return 0;
 }
 
-static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
 				      int irq, ide_ack_intr_t *ack_intr)
 {
 	int i;
@@ -96,7 +96,7 @@ static int __init macide_init(void)
 	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 4507a6d..3c1dc01 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -316,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 	void __iomem *base;
 	unsigned long rate, mem_size;
 	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index f4f8064..97642a7a 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1023,13 +1023,14 @@ static const struct ide_port_info pmac_port_info = {
  * Setup, register & probe an IDE channel driven by this driver, this is
  * called by one of the 2 probe functions (macio or PCI).
  */
-static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
+static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
+					   struct ide_hw *hw)
 {
 	struct device_node *np = pmif->node;
 	const int *bidp;
 	struct ide_host *host;
 	ide_hwif_t *hwif;
-	hw_regs_t *hws[] = { hw };
+	struct ide_hw *hws[] = { hw };
 	struct ide_port_info d = pmac_port_info;
 	int rc;
 
@@ -1124,7 +1125,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 	return 0;
 }
 
-static void __devinit pmac_ide_init_ports(hw_regs_t *hw, unsigned long base)
+static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
 {
 	int i;
 
@@ -1144,7 +1145,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
 	unsigned long regbase;
 	pmac_ide_hwif_t *pmif;
 	int irq, rc;
-	hw_regs_t hw;
+	struct ide_hw hw;
 
 	pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
 	if (pmif == NULL)
@@ -1268,7 +1269,7 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	void __iomem *base;
 	unsigned long rbase, rlen;
 	int rc;
-	hw_regs_t hw;
+	struct ide_hw hw;
 
 	np = pci_device_to_OF_node(pdev);
 	if (np == NULL) {
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index e46229f..ab49a97 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -51,11 +51,11 @@ static int q40ide_default_irq(unsigned long base)
 /*
  * Addresses are pretranslated for Q40 ISA access.
  */
-static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
+static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
 			ide_ack_intr_t *ack_intr,
 			int irq)
 {
-	memset(hw, 0, sizeof(hw_regs_t));
+	memset(hw, 0, sizeof(*hw));
 	/* BIG FAT WARNING: 
 	   assumption: only DATA port is ever used in 16 bit mode */
 	hw->io_ports.data_addr = Q40_ISA_IO_W(base);
@@ -135,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
 static int __init q40ide_init(void)
 {
     int i;
-    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
+    struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
 
     if (!MACH_IS_Q40)
       return -ENODEV;
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index c4da3dd..00f5424 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -16,7 +16,7 @@ static const struct ide_port_info rapide_port_info = {
 	.chipset		= ide_generic,
 };
 
-static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
+static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base,
 			       void __iomem *ctrl, unsigned int sz, int irq)
 {
 	unsigned long port = (unsigned long)base;
@@ -36,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	void __iomem *base;
 	struct ide_host *host;
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 9415f8c..1104bb3 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 {
 	struct scc_ports *ports = pci_get_drvdata(dev);
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int i, rc;
 
 	memset(&hw, 0, sizeof(hw));
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index d78f4c9..5314edf 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -301,11 +301,11 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
 }
 
 /**
- *	ide_hw_configure	-	configure a hw_regs_t instance
+ *	ide_hw_configure	-	configure a struct ide_hw instance
  *	@dev: PCI device holding interface
  *	@d: IDE port info
  *	@port: port number
- *	@hw: hw_regs_t instance corresponding to this port
+ *	@hw: struct ide_hw instance corresponding to this port
  *
  *	Perform the initial set up for the hardware interface structure. This
  *	is done per interface port rather than per PCI device. There may be
@@ -315,7 +315,7 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
  */
 
 static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
-			    unsigned int port, hw_regs_t *hw)
+			    unsigned int port, struct ide_hw *hw)
 {
 	unsigned long ctl = 0, base = 0;
 
@@ -445,8 +445,8 @@ out:
  *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
  *	@dev: PCI device
  *	@d: IDE port info
- *	@hw: hw_regs_t instances corresponding to this PCI IDE device
- *	@hws: hw_regs_t pointers table to update
+ *	@hw: struct ide_hw instances corresponding to this PCI IDE device
+ *	@hws: struct ide_hw pointers table to update
  *
  *	Scan the interfaces attached to this device and do any
  *	necessary per port setup. Attach the devices and ask the
@@ -458,7 +458,7 @@ out:
  */
 
 void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
-			 hw_regs_t *hw, hw_regs_t **hws)
+			 struct ide_hw *hw, struct ide_hw **hws)
 {
 	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
 	u8 tmp;
@@ -538,7 +538,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 		     void *priv)
 {
 	struct ide_host *host;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 	int ret;
 
 	ret = ide_setup_pci_controller(dev, d, 1);
@@ -586,7 +586,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 	struct pci_dev *pdev[] = { dev1, dev2 };
 	struct ide_host *host;
 	int ret, i;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
 	for (i = 0; i < 2; i++) {
 		ret = ide_setup_pci_controller(pdev[i], d, !i);
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index 3f8ee35..5f37f16 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -91,7 +91,7 @@ typedef struct {
 
 
 static void
-sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
+sgiioc4_init_hwif_ports(struct ide_hw *hw, unsigned long data_port,
 			unsigned long ctrl_port, unsigned long irq_port)
 {
 	unsigned long reg = data_port;
@@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	unsigned long cmd_base, irqport;
 	unsigned long bar0, cmd_phys_base, ctl;
 	void __iomem *virt_base;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int rc;
 
 	/*  Get the CmdBlk and CtrlBlk Base Registers */
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index 16adc18..ea89fdd 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -130,7 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index fa57920..9f73fd4 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -537,7 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	int irq, ret;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a3cd568..b1b903a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -178,7 +178,7 @@ typedef u8 hwif_chipset_t;
 /*
  * Structure to hold all information about the location of this port
  */
-typedef struct hw_regs_s {
+struct ide_hw {
 	union {
 		struct ide_io_ports	io_ports;
 		unsigned long		io_ports_array[IDE_NR_PORTS];
@@ -188,9 +188,9 @@ typedef struct hw_regs_s {
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
 	struct device	*dev, *parent;
 	unsigned long	config;
-} hw_regs_t;
+};
 
-static inline void ide_std_init_ports(hw_regs_t *hw,
+static inline void ide_std_init_ports(struct ide_hw *hw,
 				      unsigned long io_addr,
 				      unsigned long ctl_addr)
 {
@@ -1212,7 +1212,7 @@ static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev)
 }
 
 void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *,
-			 hw_regs_t *, hw_regs_t **);
+			 struct ide_hw *, struct ide_hw **);
 void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
@@ -1456,12 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *);
 void ide_port_apply_params(ide_hwif_t *);
 int ide_sysfs_register_port(ide_hwif_t *);
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **,
+struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **,
 				unsigned int);
 void ide_host_free(struct ide_host *);
 int ide_host_register(struct ide_host *, const struct ide_port_info *,
-		      hw_regs_t **);
-int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int,
+		      struct ide_hw **);
+int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int,
 		 struct ide_host **);
 void ide_host_remove(struct ide_host *);
 int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
-- 
cgit v0.10.2


From 5b740ea975c4ce3da12ac21b56f9e43354ca4327 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sun, 17 May 2009 11:29:21 +0200
Subject: sound: use dev_set_drvdata

Eliminate direct accesses to the driver_data field.
cf 82ab13b26f15f49be45f15ccc96bfa0b81dfd015

The semantic patch that makes this change is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
struct device *dev;
expression E;
type T;
@@

- dev->driver_data = (T)E
+ dev_set_drvdata(dev, E)

@@
struct device *dev;
type T;
@@

- (T)dev->driver_data
+ dev_get_drvdata(dev)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 3711d84..47afaa9 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -674,7 +674,7 @@ struct snd_soc_dai *fsl_ssi_create_dai(struct fsl_ssi_info *ssi_info)
 	ssi_private->dev = ssi_info->dev;
 	ssi_private->asynchronous = ssi_info->asynchronous;
 
-	ssi_private->dev->driver_data = fsl_ssi_dai;
+	dev_set_drvdata(ssi_private->dev, fsl_ssi_dai);
 
 	/* Initialize the the device_attribute structure */
 	dev_attr->attr.name = "ssi-stats";
-- 
cgit v0.10.2


From bc28248ee25e5c239cbe6afca35a100b08401de5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 17 May 2009 18:58:34 +0100
Subject: [ARM] smp: move core localtimer support out of platform specific
 files

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e60ec54..ef4f860 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -859,6 +859,7 @@ source "kernel/time/Kconfig"
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP)
+	depends on GENERIC_CLOCKEVENTS
 	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h
new file mode 100644
index 0000000..3f8c9eb
--- /dev/null
+++ b/arch/arm/include/asm/localtimer.h
@@ -0,0 +1,51 @@
+/*
+ *  arch/arm/include/asm/localtimer.h
+ *
+ *  Copyright (C) 2004-2005 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARM_LOCALTIMER_H
+#define __ASM_ARM_LOCALTIMER_H
+
+struct clock_event_device;
+
+/*
+ * Setup a per-cpu timer, whether it be a local timer or dummy broadcast
+ */
+void percpu_timer_setup(void);
+
+/*
+ * Called from assembly, this is the local timer IRQ handler
+ */
+asmlinkage void do_local_timer(struct pt_regs *);
+
+
+#ifdef CONFIG_LOCAL_TIMERS
+/*
+ * Platform provides this to acknowledge a local timer IRQ.
+ * Returns true if the local timer IRQ is to be processed.
+ */
+int local_timer_ack(void);
+
+/*
+ * Stop a local timer interrupt.
+ */
+void local_timer_stop(void);
+
+/*
+ * Setup a local timer interrupt for a CPU.
+ */
+void local_timer_setup(struct clock_event_device *);
+
+#else
+
+static inline void local_timer_stop(void)
+{
+}
+
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 5995935..608f2d5 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -56,11 +56,6 @@ extern void smp_store_cpu_info(unsigned int cpuid);
 extern void smp_cross_call(const struct cpumask *mask);
 
 /*
- * Broadcast a clock event to other CPUs.
- */
-extern void smp_timer_broadcast(const struct cpumask *mask);
-
-/*
  * Boot a secondary CPU, and assign it the specified idle task.
  * This also gives us the initial stack to use for this CPU.
  */
@@ -101,43 +96,8 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 #define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 
 /*
- * Local timer interrupt handling function (can be IPI'ed).
- */
-extern void local_timer_interrupt(void);
-
-#ifdef CONFIG_LOCAL_TIMERS
-
-/*
- * Stop a local timer interrupt.
- */
-extern void local_timer_stop(void);
-
-/*
- * Platform provides this to acknowledge a local timer IRQ
- */
-extern int local_timer_ack(void);
-
-#else
-
-static inline void local_timer_stop(void)
-{
-}
-
-#endif
-
-/*
- * Setup a local timer interrupt for a CPU.
- */
-extern void local_timer_setup(void);
-
-/*
  * show local interrupt info
  */
 extern void show_local_irqs(struct seq_file *);
 
-/*
- * Called from assembly, this is the local timer IRQ handler
- */
-asmlinkage void do_local_timer(struct pt_regs *);
-
 #endif /* ifndef __ASM_ARM_SMP_H */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 6014dfd..91130e2 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -22,6 +22,8 @@
 #include <linux/smp.h>
 #include <linux/seq_file.h>
 #include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/clockchips.h>
 
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
@@ -32,6 +34,7 @@
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
+#include <asm/localtimer.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -274,9 +277,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	local_fiq_enable();
 
 	/*
-	 * Setup local timer for this CPU.
+	 * Setup the percpu timer for this CPU.
 	 */
-	local_timer_setup();
+	percpu_timer_setup();
 
 	calibrate_delay();
 
@@ -383,10 +386,16 @@ void show_local_irqs(struct seq_file *p)
 	seq_putc(p, '\n');
 }
 
+/*
+ * Timer (local or broadcast) support
+ */
+static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent);
+
 static void ipi_timer(void)
 {
+	struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent);
 	irq_enter();
-	local_timer_interrupt();
+	evt->event_handler(evt);
 	irq_exit();
 }
 
@@ -405,6 +414,42 @@ asmlinkage void __exception do_local_timer(struct pt_regs *regs)
 }
 #endif
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+static void smp_timer_broadcast(const struct cpumask *mask)
+{
+	send_ipi_message(mask, IPI_TIMER);
+}
+
+static void broadcast_timer_set_mode(enum clock_event_mode mode,
+	struct clock_event_device *evt)
+{
+}
+
+static void local_timer_setup(struct clock_event_device *evt)
+{
+	evt->name	= "dummy_timer";
+	evt->features	= CLOCK_EVT_FEAT_ONESHOT |
+			  CLOCK_EVT_FEAT_PERIODIC |
+			  CLOCK_EVT_FEAT_DUMMY;
+	evt->rating	= 400;
+	evt->mult	= 1;
+	evt->set_mode	= broadcast_timer_set_mode;
+	evt->broadcast	= smp_timer_broadcast;
+
+	clockevents_register_device(evt);
+}
+#endif
+
+void __cpuinit percpu_timer_setup(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
+
+	evt->cpumask = cpumask_of(cpu);
+
+	local_timer_setup(evt);
+}
+
 static DEFINE_SPINLOCK(stop_lock);
 
 /*
@@ -501,11 +546,6 @@ void smp_send_reschedule(int cpu)
 	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 }
 
-void smp_timer_broadcast(const struct cpumask *mask)
-{
-	send_ipi_message(mask, IPI_TIMER);
-}
-
 void smp_send_stop(void)
 {
 	cpumask_t mask = cpu_online_map;
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 1c01d13..cd98e7a 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -11,10 +11,8 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/device.h>
 #include <linux/smp.h>
 #include <linux/jiffies.h>
-#include <linux/percpu.h>
 #include <linux/clockchips.h>
 #include <linux/irq.h>
 #include <linux/io.h>
@@ -24,18 +22,6 @@
 #include <mach/hardware.h>
 #include <asm/irq.h>
 
-static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
-
-/*
- * Used on SMP for either the local timer or IPI_TIMER
- */
-void local_timer_interrupt(void)
-{
-	struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
-
-	clk->event_handler(clk);
-}
-
 #ifdef CONFIG_LOCAL_TIMERS
 
 /* set up by the platform code */
@@ -44,7 +30,7 @@ void __iomem *twd_base;
 static unsigned long mpcore_timer_rate;
 
 static void local_timer_set_mode(enum clock_event_mode mode,
-				 struct clock_event_device *clk)
+				 struct clock_event_device *evt)
 {
 	unsigned long ctrl;
 
@@ -140,32 +126,29 @@ static void __cpuinit twd_calibrate_rate(void)
 /*
  * Setup the local clock events for a CPU.
  */
-void __cpuinit local_timer_setup(void)
+void __cpuinit local_timer_setup(struct clock_event_device *evt)
 {
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
 	unsigned long flags;
 
 	twd_calibrate_rate();
 
-	clk->name		= "local_timer";
-	clk->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
-	clk->rating		= 350;
-	clk->set_mode		= local_timer_set_mode;
-	clk->set_next_event	= local_timer_set_next_event;
-	clk->irq		= IRQ_LOCALTIMER;
-	clk->cpumask		= cpumask_of(cpu);
-	clk->shift		= 20;
-	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
-	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
-	clk->min_delta_ns	= clockevent_delta2ns(0xf, clk);
+	evt->name		= "local_timer";
+	evt->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	evt->rating		= 350;
+	evt->set_mode		= local_timer_set_mode;
+	evt->set_next_event	= local_timer_set_next_event;
+	evt->irq		= IRQ_LOCALTIMER;
+	evt->shift		= 20;
+	evt->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, evt->shift);
+	evt->max_delta_ns	= clockevent_delta2ns(0xffffffff, evt);
+	evt->min_delta_ns	= clockevent_delta2ns(0xf, evt);
 
 	/* Make sure our local interrupt controller has this enabled */
 	local_irq_save(flags);
 	get_irq_chip(IRQ_LOCALTIMER)->unmask(IRQ_LOCALTIMER);
 	local_irq_restore(flags);
 
-	clockevents_register_device(clk);
+	clockevents_register_device(evt);
 }
 
 /*
@@ -176,29 +159,4 @@ void __cpuexit local_timer_stop(void)
 	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
 }
 
-#else	/* CONFIG_LOCAL_TIMERS */
-
-static void dummy_timer_set_mode(enum clock_event_mode mode,
-				 struct clock_event_device *clk)
-{
-}
-
-void __cpuinit local_timer_setup(void)
-{
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
-
-	clk->name		= "dummy_timer";
-	clk->features		= CLOCK_EVT_FEAT_ONESHOT |
-				  CLOCK_EVT_FEAT_PERIODIC |
-				  CLOCK_EVT_FEAT_DUMMY;
-	clk->rating		= 400;
-	clk->mult               = 1;
-	clk->set_mode		= dummy_timer_set_mode;
-	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of(cpu);
-
-	clockevents_register_device(clk);
-}
-
-#endif	/* !CONFIG_LOCAL_TIMERS */
+#endif	/* CONFIG_LOCAL_TIMERS */
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 30a9c68..b34d3a5 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -19,6 +19,7 @@
 #include <asm/cacheflush.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
+#include <asm/localtimer.h>
 
 #include <mach/board-eb.h>
 #include <mach/board-pb11mp.h>
@@ -217,13 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	if (max_cpus > ncores)
 		max_cpus = ncores;
 
-#if defined(CONFIG_LOCAL_TIMERS) || defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)
-	/*
-	 * Enable the local timer or broadcast device for the boot CPU.
-	 */
-	local_timer_setup();
-#endif
-
 	/*
 	 * Initialise the present map, which describes the set of CPUs
 	 * actually populated at the present time.
@@ -239,6 +233,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	 * WFI
 	 */
 	if (max_cpus > 1) {
+		/*
+		 * Enable the local timer or broadcast device for the
+		 * boot CPU, but only if we have more than one CPU.
+		 */
+		percpu_timer_setup();
+
 		scu_enable();
 		poke_milo();
 	}
-- 
cgit v0.10.2


From 49613d4d9ae759193915823e67de546fca58c951 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 16 May 2009 11:41:53 +0100
Subject: [ARM] smp: SCU is used on non-realview platforms

The SCU can be used by non-realview platforms, so make it visible
for other people to use rather than having them copy the header file.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
new file mode 100644
index 0000000..d55802d
--- /dev/null
+++ b/arch/arm/include/asm/smp_scu.h
@@ -0,0 +1,13 @@
+#ifndef __ASMARM_ARCH_SCU_H
+#define __ASMARM_ARCH_SCU_H
+
+/*
+ * SCU registers
+ */
+#define SCU_CTRL		0x00
+#define SCU_CONFIG		0x04
+#define SCU_CPU_STATUS		0x08
+#define SCU_INVALIDATE		0x0c
+#define SCU_FPGA_REVISION	0x10
+
+#endif
diff --git a/arch/arm/mach-realview/include/mach/scu.h b/arch/arm/mach-realview/include/mach/scu.h
deleted file mode 100644
index d55802d..0000000
--- a/arch/arm/mach-realview/include/mach/scu.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASMARM_ARCH_SCU_H
-#define __ASMARM_ARCH_SCU_H
-
-/*
- * SCU registers
- */
-#define SCU_CTRL		0x00
-#define SCU_CONFIG		0x04
-#define SCU_CPU_STATUS		0x08
-#define SCU_INVALIDATE		0x0c
-#define SCU_FPGA_REVISION	0x10
-
-#endif
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index b34d3a5..c567d3e 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -23,7 +23,7 @@
 
 #include <mach/board-eb.h>
 #include <mach/board-pb11mp.h>
-#include <mach/scu.h>
+#include <asm/smp_scu.h>
 
 #include "core.h"
 
-- 
cgit v0.10.2


From a8cbcd92bd4bf893085eddf7f58e63ea98503d94 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 16 May 2009 11:51:14 +0100
Subject: [ARM] smp: separate SCU support code from realview

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ef4f860..f19a951 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -861,6 +861,7 @@ config SMP
 	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP)
 	depends on GENERIC_CLOCKEVENTS
 	select USE_GENERIC_SMP_HELPERS
+	select HAVE_ARM_SCU if ARCH_REALVIEW
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -878,6 +879,12 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config HAVE_ARM_SCU
+	bool
+	depends on SMP
+	help
+	  This option enables support for the ARM system coherency unit
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index d55802d..2376835 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -1,13 +1,7 @@
 #ifndef __ASMARM_ARCH_SCU_H
 #define __ASMARM_ARCH_SCU_H
 
-/*
- * SCU registers
- */
-#define SCU_CTRL		0x00
-#define SCU_CONFIG		0x04
-#define SCU_CPU_STATUS		0x08
-#define SCU_INVALIDATE		0x0c
-#define SCU_FPGA_REVISION	0x10
+unsigned int scu_get_core_count(void __iomem *);
+void scu_enable(void __iomem *);
 
 #endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 11a5197..90ffbaf 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-decode.o
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
new file mode 100644
index 0000000..7f24ee9
--- /dev/null
+++ b/arch/arm/kernel/smp_scu.c
@@ -0,0 +1,41 @@
+/*
+ *  linux/arch/arm/kernel/smp_scu.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include <asm/smp_scu.h>
+
+#define SCU_CTRL		0x00
+#define SCU_CONFIG		0x04
+#define SCU_CPU_STATUS		0x08
+#define SCU_INVALIDATE		0x0c
+#define SCU_FPGA_REVISION	0x10
+
+/*
+ * Get the number of CPU cores from the SCU configuration
+ */
+unsigned int __init scu_get_core_count(void __iomem *scu_base)
+{
+	unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG);
+	return (ncores & 0x03) + 1;
+}
+
+/*
+ * Enable the SCU
+ */
+void __init scu_enable(void __iomem *scu_base)
+{
+	u32 scu_ctrl;
+
+	scu_ctrl = __raw_readl(scu_base + SCU_CTRL);
+	scu_ctrl |= 1;
+	__raw_writel(scu_ctrl, scu_base + SCU_CTRL);
+}
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index c567d3e..c862ce1 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -45,31 +45,12 @@ static void __iomem *scu_base_addr(void)
 		return (void __iomem *)0;
 }
 
-static unsigned int __init get_core_count(void)
+static inline unsigned int get_core_count(void)
 {
-	unsigned int ncores;
 	void __iomem *scu_base = scu_base_addr();
-
-	if (scu_base) {
-		ncores = __raw_readl(scu_base + SCU_CONFIG);
-		ncores = (ncores & 0x03) + 1;
-	} else
-		ncores = 1;
-
-	return ncores;
-}
-
-/*
- * Setup the SCU
- */
-static void scu_enable(void)
-{
-	u32 scu_ctrl;
-	void __iomem *scu_base = scu_base_addr();
-
-	scu_ctrl = __raw_readl(scu_base + SCU_CTRL);
-	scu_ctrl |= 1;
-	__raw_writel(scu_ctrl, scu_base + SCU_CTRL);
+	if (scu_base)
+		return scu_get_core_count(scu_base);
+	return 1;
 }
 
 static DEFINE_SPINLOCK(boot_lock);
@@ -239,7 +220,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		 */
 		percpu_timer_setup();
 
-		scu_enable();
+		scu_enable(scu_base_addr());
 		poke_milo();
 	}
 }
-- 
cgit v0.10.2


From f32f4ce25745209f16a5a6cef7442144b596c68a Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 16 May 2009 12:14:21 +0100
Subject: [ARM] smp: allow re-use of realview localtimer TWD support

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f19a951..93d63bf 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -885,6 +885,12 @@ config HAVE_ARM_SCU
 	help
 	  This option enables support for the ARM system coherency unit
 
+config HAVE_ARM_TWD
+	bool
+	depends on SMP
+	help
+	  This options enables support for the ARM timer and watchdog unit
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
@@ -925,6 +931,7 @@ config LOCAL_TIMERS
 	bool "Use local timer interrupts"
 	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || REALVIEW_EB_A9MP)
 	default y
+	select HAVE_ARM_TWD if ARCH_REALVIEW
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system
diff --git a/arch/arm/include/asm/hardware/arm_twd.h b/arch/arm/include/asm/hardware/arm_twd.h
deleted file mode 100644
index e521b70..0000000
--- a/arch/arm/include/asm/hardware/arm_twd.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef __ASM_HARDWARE_TWD_H
-#define __ASM_HARDWARE_TWD_H
-
-#define TWD_TIMER_LOAD 			0x00
-#define TWD_TIMER_COUNTER		0x04
-#define TWD_TIMER_CONTROL		0x08
-#define TWD_TIMER_INTSTAT		0x0C
-
-#define TWD_WDOG_LOAD			0x20
-#define TWD_WDOG_COUNTER		0x24
-#define TWD_WDOG_CONTROL		0x28
-#define TWD_WDOG_INTSTAT		0x2C
-#define TWD_WDOG_RESETSTAT		0x30
-#define TWD_WDOG_DISABLE		0x34
-
-#define TWD_TIMER_CONTROL_ENABLE	(1 << 0)
-#define TWD_TIMER_CONTROL_ONESHOT	(0 << 1)
-#define TWD_TIMER_CONTROL_PERIODIC	(1 << 1)
-#define TWD_TIMER_CONTROL_IT_ENABLE	(1 << 2)
-
-#endif
diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h
index 3f8c9eb..50c7e7c 100644
--- a/arch/arm/include/asm/localtimer.h
+++ b/arch/arm/include/asm/localtimer.h
@@ -24,6 +24,16 @@ asmlinkage void do_local_timer(struct pt_regs *);
 
 
 #ifdef CONFIG_LOCAL_TIMERS
+
+#ifdef CONFIG_HAVE_ARM_TWD
+
+#include "smp_twd.h"
+
+#define local_timer_ack()	twd_timer_ack()
+#define local_timer_stop()	twd_timer_stop()
+
+#else
+
 /*
  * Platform provides this to acknowledge a local timer IRQ.
  * Returns true if the local timer IRQ is to be processed.
@@ -35,6 +45,8 @@ int local_timer_ack(void);
  */
 void local_timer_stop(void);
 
+#endif
+
 /*
  * Setup a local timer interrupt for a CPU.
  */
diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h
new file mode 100644
index 0000000..7be0978
--- /dev/null
+++ b/arch/arm/include/asm/smp_twd.h
@@ -0,0 +1,12 @@
+#ifndef __ASMARM_SMP_TWD_H
+#define __ASMARM_SMP_TWD_H
+
+struct clock_event_device;
+
+extern void __iomem *twd_base;
+
+void twd_timer_stop(void);
+int twd_timer_ack(void);
+void twd_timer_setup(struct clock_event_device *);
+
+#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 90ffbaf..ff89d0b 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
+obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes-decode.o
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
new file mode 100644
index 0000000..aabd62d
--- /dev/null
+++ b/arch/arm/kernel/smp_twd.c
@@ -0,0 +1,173 @@
+/*
+ *  linux/arch/arm/kernel/smp_twd.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/smp.h>
+#include <linux/jiffies.h>
+#include <linux/clockchips.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <asm/smp_twd.h>
+#include <asm/hardware/gic.h>
+
+#define TWD_TIMER_LOAD 			0x00
+#define TWD_TIMER_COUNTER		0x04
+#define TWD_TIMER_CONTROL		0x08
+#define TWD_TIMER_INTSTAT		0x0C
+
+#define TWD_WDOG_LOAD			0x20
+#define TWD_WDOG_COUNTER		0x24
+#define TWD_WDOG_CONTROL		0x28
+#define TWD_WDOG_INTSTAT		0x2C
+#define TWD_WDOG_RESETSTAT		0x30
+#define TWD_WDOG_DISABLE		0x34
+
+#define TWD_TIMER_CONTROL_ENABLE	(1 << 0)
+#define TWD_TIMER_CONTROL_ONESHOT	(0 << 1)
+#define TWD_TIMER_CONTROL_PERIODIC	(1 << 1)
+#define TWD_TIMER_CONTROL_IT_ENABLE	(1 << 2)
+
+/* set up by the platform code */
+void __iomem *twd_base;
+
+static unsigned long twd_timer_rate;
+
+static void twd_set_mode(enum clock_event_mode mode,
+			struct clock_event_device *clk)
+{
+	unsigned long ctrl;
+
+	switch(mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* timer load already set up */
+		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
+			| TWD_TIMER_CONTROL_PERIODIC;
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* period set, and timer enabled in 'next_event' hook */
+		ctrl = TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_ONESHOT;
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		ctrl = 0;
+	}
+
+	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
+}
+
+static int twd_set_next_event(unsigned long evt,
+			struct clock_event_device *unused)
+{
+	unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
+
+	__raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
+	__raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
+
+	return 0;
+}
+
+/*
+ * local_timer_ack: checks for a local timer interrupt.
+ *
+ * If a local timer interrupt has occurred, acknowledge and return 1.
+ * Otherwise, return 0.
+ */
+int twd_timer_ack(void)
+{
+	if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
+		__raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void __cpuinit twd_calibrate_rate(void)
+{
+	unsigned long load, count;
+	u64 waitjiffies;
+
+	/*
+	 * If this is the first time round, we need to work out how fast
+	 * the timer ticks
+	 */
+	if (twd_timer_rate == 0) {
+		printk("Calibrating local timer... ");
+
+		/* Wait for a tick to start */
+		waitjiffies = get_jiffies_64() + 1;
+
+		while (get_jiffies_64() < waitjiffies)
+			udelay(10);
+
+		/* OK, now the tick has started, let's get the timer going */
+		waitjiffies += 5;
+
+				 /* enable, no interrupt or reload */
+		__raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
+
+				 /* maximum value */
+		__raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
+
+		while (get_jiffies_64() < waitjiffies)
+			udelay(10);
+
+		count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
+
+		twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
+
+		printk("%lu.%02luMHz.\n", twd_timer_rate / 1000000,
+			(twd_timer_rate / 100000) % 100);
+	}
+
+	load = twd_timer_rate / HZ;
+
+	__raw_writel(load, twd_base + TWD_TIMER_LOAD);
+}
+
+/*
+ * Setup the local clock events for a CPU.
+ */
+void __cpuinit twd_timer_setup(struct clock_event_device *clk)
+{
+	unsigned long flags;
+
+	twd_calibrate_rate();
+
+	clk->name		= "local_timer";
+	clk->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	clk->rating		= 350;
+	clk->set_mode		= twd_set_mode;
+	clk->set_next_event	= twd_set_next_event;
+	clk->shift		= 20;
+	clk->mult		= div_sc(twd_timer_rate, NSEC_PER_SEC, clk->shift);
+	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
+	clk->min_delta_ns	= clockevent_delta2ns(0xf, clk);
+
+	/* Make sure our local interrupt controller has this enabled */
+	local_irq_save(flags);
+	get_irq_chip(clk->irq)->unmask(clk->irq);
+	local_irq_restore(flags);
+
+	clockevents_register_device(clk);
+}
+
+/*
+ * take a local timer down
+ */
+void __cpuexit twd_timer_stop(void)
+{
+	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
+}
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
index 7bea8ff..e13d094 100644
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_MACH_REALVIEW_EB)		+= realview_eb.o
 obj-$(CONFIG_MACH_REALVIEW_PB11MP)	+= realview_pb11mp.o
 obj-$(CONFIG_MACH_REALVIEW_PB1176)	+= realview_pb1176.o
 obj-$(CONFIG_MACH_REALVIEW_PBA8)	+= realview_pba8.o
-obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o localtimer.o
+obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
+obj-$(CONFIG_LOCAL_TIMERS)		+= localtimer.o
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 21c0863..59a337b 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -51,9 +51,6 @@ extern struct mmc_platform_data realview_mmc0_plat_data;
 extern struct mmc_platform_data realview_mmc1_plat_data;
 extern struct clcd_board clcd_plat_data;
 extern void __iomem *gic_cpu_base_addr;
-#ifdef CONFIG_LOCAL_TIMERS
-extern void __iomem *twd_base;
-#endif
 extern void __iomem *timer0_va_base;
 extern void __iomem *timer1_va_base;
 extern void __iomem *timer2_va_base;
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index cd98e7a..60b4e11 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -9,154 +9,18 @@
  * published by the Free Software Foundation.
  */
 #include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
 #include <linux/smp.h>
-#include <linux/jiffies.h>
 #include <linux/clockchips.h>
-#include <linux/irq.h>
-#include <linux/io.h>
 
-#include <asm/hardware/arm_twd.h>
-#include <asm/hardware/gic.h>
-#include <mach/hardware.h>
 #include <asm/irq.h>
-
-#ifdef CONFIG_LOCAL_TIMERS
-
-/* set up by the platform code */
-void __iomem *twd_base;
-
-static unsigned long mpcore_timer_rate;
-
-static void local_timer_set_mode(enum clock_event_mode mode,
-				 struct clock_event_device *evt)
-{
-	unsigned long ctrl;
-
-	switch(mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		/* timer load already set up */
-		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
-			| TWD_TIMER_CONTROL_PERIODIC;
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* period set, and timer enabled in 'next_event' hook */
-		ctrl = TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_ONESHOT;
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	default:
-		ctrl = 0;
-	}
-
-	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
-}
-
-static int local_timer_set_next_event(unsigned long evt,
-				      struct clock_event_device *unused)
-{
-	unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
-
-	__raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
-	__raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
-
-	return 0;
-}
-
-/*
- * local_timer_ack: checks for a local timer interrupt.
- *
- * If a local timer interrupt has occurred, acknowledge and return 1.
- * Otherwise, return 0.
- */
-int local_timer_ack(void)
-{
-	if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
-		__raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
-		return 1;
-	}
-
-	return 0;
-}
-
-static void __cpuinit twd_calibrate_rate(void)
-{
-	unsigned long load, count;
-	u64 waitjiffies;
-
-	/*
-	 * If this is the first time round, we need to work out how fast
-	 * the timer ticks
-	 */
-	if (mpcore_timer_rate == 0) {
-		printk("Calibrating local timer... ");
-
-		/* Wait for a tick to start */
-		waitjiffies = get_jiffies_64() + 1;
-
-		while (get_jiffies_64() < waitjiffies)
-			udelay(10);
-
-		/* OK, now the tick has started, let's get the timer going */
-		waitjiffies += 5;
-
-				 /* enable, no interrupt or reload */
-		__raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
-
-				 /* maximum value */
-		__raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
-
-		while (get_jiffies_64() < waitjiffies)
-			udelay(10);
-
-		count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
-
-		mpcore_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
-
-		printk("%lu.%02luMHz.\n", mpcore_timer_rate / 1000000,
-			(mpcore_timer_rate / 100000) % 100);
-	}
-
-	load = mpcore_timer_rate / HZ;
-
-	__raw_writel(load, twd_base + TWD_TIMER_LOAD);
-}
+#include <asm/smp_twd.h>
+#include <asm/localtimer.h>
 
 /*
  * Setup the local clock events for a CPU.
  */
 void __cpuinit local_timer_setup(struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	twd_calibrate_rate();
-
-	evt->name		= "local_timer";
-	evt->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
-	evt->rating		= 350;
-	evt->set_mode		= local_timer_set_mode;
-	evt->set_next_event	= local_timer_set_next_event;
-	evt->irq		= IRQ_LOCALTIMER;
-	evt->shift		= 20;
-	evt->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, evt->shift);
-	evt->max_delta_ns	= clockevent_delta2ns(0xffffffff, evt);
-	evt->min_delta_ns	= clockevent_delta2ns(0xf, evt);
-
-	/* Make sure our local interrupt controller has this enabled */
-	local_irq_save(flags);
-	get_irq_chip(IRQ_LOCALTIMER)->unmask(IRQ_LOCALTIMER);
-	local_irq_restore(flags);
-
-	clockevents_register_device(evt);
+	evt->irq = IRQ_LOCALTIMER;
+	twd_timer_setup(evt);
 }
-
-/*
- * take a local timer down
- */
-void __cpuexit local_timer_stop(void)
-{
-	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
-}
-
-#endif	/* CONFIG_LOCAL_TIMERS */
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index c20fbef..8dfa44e 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -32,6 +32,7 @@
 #include <asm/hardware/gic.h>
 #include <asm/hardware/icst307.h>
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/localtimer.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index ea1e60e..dc4b169 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -32,6 +32,7 @@
 #include <asm/hardware/gic.h>
 #include <asm/hardware/icst307.h>
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/localtimer.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
-- 
cgit v0.10.2


From 4c5158d4c3ab1f2927a740372a0ee9c3fed7ba47 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 17 May 2009 10:58:54 +0100
Subject: [ARM] smp: fix style issues in smp_twd.c

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index aabd62d..d8c88c6 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -48,7 +48,7 @@ static void twd_set_mode(enum clock_event_mode mode,
 {
 	unsigned long ctrl;
 
-	switch(mode) {
+	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 		/* timer load already set up */
 		ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE
@@ -72,8 +72,10 @@ static int twd_set_next_event(unsigned long evt,
 {
 	unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
 
+	ctrl |= TWD_TIMER_CONTROL_ENABLE;
+
 	__raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
-	__raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
+	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
 
 	return 0;
 }
@@ -104,7 +106,7 @@ static void __cpuinit twd_calibrate_rate(void)
 	 * the timer ticks
 	 */
 	if (twd_timer_rate == 0) {
-		printk("Calibrating local timer... ");
+		printk(KERN_INFO "Calibrating local timer... ");
 
 		/* Wait for a tick to start */
 		waitjiffies = get_jiffies_64() + 1;
@@ -146,15 +148,15 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 
 	twd_calibrate_rate();
 
-	clk->name		= "local_timer";
-	clk->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
-	clk->rating		= 350;
-	clk->set_mode		= twd_set_mode;
-	clk->set_next_event	= twd_set_next_event;
-	clk->shift		= 20;
-	clk->mult		= div_sc(twd_timer_rate, NSEC_PER_SEC, clk->shift);
-	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
-	clk->min_delta_ns	= clockevent_delta2ns(0xf, clk);
+	clk->name = "local_timer";
+	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	clk->rating = 350;
+	clk->set_mode = twd_set_mode;
+	clk->set_next_event = twd_set_next_event;
+	clk->shift = 20;
+	clk->mult = div_sc(twd_timer_rate, NSEC_PER_SEC, clk->shift);
+	clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
+	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
 
 	/* Make sure our local interrupt controller has this enabled */
 	local_irq_save(flags);
-- 
cgit v0.10.2


From 12abb35a03e32c97235fcefdcf2d851be9f82dc2 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sun, 17 May 2009 01:02:01 -0400
Subject: reiserfs: clean up ifdefs

With xattr cleanup even with xattrs disabled, much of the initial setup
is still performed.  Some #ifdefs are just not needed since the options
they protect wouldn't be available anyway.

This cleans those up.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 2237e10..cf94964 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -685,20 +685,6 @@ out:
 	return err;
 }
 
-/* Actual operations that are exported to VFS-land */
-struct xattr_handler *reiserfs_xattr_handlers[] = {
-	&reiserfs_xattr_user_handler,
-	&reiserfs_xattr_trusted_handler,
-#ifdef CONFIG_REISERFS_FS_SECURITY
-	&reiserfs_xattr_security_handler,
-#endif
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	&reiserfs_posix_acl_access_handler,
-	&reiserfs_posix_acl_default_handler,
-#endif
-	NULL
-};
-
 /*
  * In order to implement different sets of xattr operations for each xattr
  * prefix with the generic xattr API, a filesystem should create a
@@ -922,6 +908,28 @@ static int create_privroot(struct dentry *dentry)
 	return 0;
 }
 
+#else
+int __init reiserfs_xattr_register_handlers(void) { return 0; }
+void reiserfs_xattr_unregister_handlers(void) {}
+static int create_privroot(struct dentry *dentry) { return 0; }
+#endif
+
+/* Actual operations that are exported to VFS-land */
+struct xattr_handler *reiserfs_xattr_handlers[] = {
+#ifdef CONFIG_REISERFS_FS_XATTR
+	&reiserfs_xattr_user_handler,
+	&reiserfs_xattr_trusted_handler,
+#endif
+#ifdef CONFIG_REISERFS_FS_SECURITY
+	&reiserfs_xattr_security_handler,
+#endif
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
+	&reiserfs_posix_acl_access_handler,
+	&reiserfs_posix_acl_default_handler,
+#endif
+	NULL
+};
+
 static int xattr_mount_check(struct super_block *s)
 {
 	/* We need generation numbers to ensure that the oid mapping is correct
@@ -941,11 +949,6 @@ static int xattr_mount_check(struct super_block *s)
 	return 0;
 }
 
-#else
-int __init reiserfs_xattr_register_handlers(void) { return 0; }
-void reiserfs_xattr_unregister_handlers(void) {}
-#endif
-
 /* This will catch lookups from the fs root to .reiserfs_priv */
 static int
 xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
@@ -992,7 +995,6 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 	int err = 0;
 	struct dentry *privroot = REISERFS_SB(s)->priv_root;
 
-#ifdef CONFIG_REISERFS_FS_XATTR
 	err = xattr_mount_check(s);
 	if (err)
 		goto error;
@@ -1023,14 +1025,11 @@ error:
 		clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
 		clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
 	}
-#endif
 
 	/* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
 	if (reiserfs_posixacl(s))
 		s->s_flags |= MS_POSIXACL;
 	else
-#endif
 		s->s_flags &= ~MS_POSIXACL;
 
 	return err;
-- 
cgit v0.10.2


From ceb5edc457f07956c82dccfb54ca8ae7e2a399f0 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sun, 17 May 2009 01:02:02 -0400
Subject: reiserfs: deal with NULL xattr root w/ xattrs disabled

This avoids an Oops in open_xa_root that can occur when deleting a file
with xattrs disabled.  It assumes that the xattr root will be there, and
that is not guaranteed.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1215a4f..3567fb9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -448,13 +448,11 @@ int remove_save_link(struct inode *inode, int truncate)
 static void reiserfs_kill_sb(struct super_block *s)
 {
 	if (REISERFS_SB(s)) {
-#ifdef CONFIG_REISERFS_FS_XATTR
 		if (REISERFS_SB(s)->xattr_root) {
 			d_invalidate(REISERFS_SB(s)->xattr_root);
 			dput(REISERFS_SB(s)->xattr_root);
 			REISERFS_SB(s)->xattr_root = NULL;
 		}
-#endif
 		if (REISERFS_SB(s)->priv_root) {
 			d_invalidate(REISERFS_SB(s)->priv_root);
 			dput(REISERFS_SB(s)->priv_root);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index cf94964..628075c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -123,7 +123,9 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
 	mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
 
 	xaroot = dget(REISERFS_SB(sb)->xattr_root);
-	if (!xaroot->d_inode) {
+	if (!xaroot)
+		xaroot = ERR_PTR(-ENODATA);
+	else if (!xaroot->d_inode) {
 		int err = -ENODATA;
 		if (xattr_may_create(flags))
 			err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
-- 
cgit v0.10.2


From b83674c0da6558e357c6b482ccf299eeea77d8ef Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sun, 17 May 2009 01:02:03 -0400
Subject: reiserfs: fixup perms when xattrs are disabled

This adds CONFIG_REISERFS_FS_XATTR protection from reiserfs_permission.

This is needed to avoid warnings during file deletions and chowns with
xattrs disabled.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 628075c..8e7deb0 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -871,23 +871,6 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
 	return error;
 }
 
-int reiserfs_permission(struct inode *inode, int mask)
-{
-	/*
-	 * We don't do permission checks on the internal objects.
-	 * Permissions are determined by the "owning" object.
-	 */
-	if (IS_PRIVATE(inode))
-		return 0;
-	/*
-	 * Stat data v1 doesn't support ACLs.
-	 */
-	if (get_inode_sd_version(inode) == STAT_DATA_V1)
-		return generic_permission(inode, mask, NULL);
-	else
-		return generic_permission(inode, mask, reiserfs_check_acl);
-}
-
 static int create_privroot(struct dentry *dentry)
 {
 	int err;
@@ -951,6 +934,25 @@ static int xattr_mount_check(struct super_block *s)
 	return 0;
 }
 
+int reiserfs_permission(struct inode *inode, int mask)
+{
+	/*
+	 * We don't do permission checks on the internal objects.
+	 * Permissions are determined by the "owning" object.
+	 */
+	if (IS_PRIVATE(inode))
+		return 0;
+
+#ifdef CONFIG_REISERFS_FS_XATTR
+	/*
+	 * Stat data v1 doesn't support ACLs.
+	 */
+	if (get_inode_sd_version(inode) != STAT_DATA_V1)
+		return generic_permission(inode, mask, reiserfs_check_acl);
+#endif
+	return generic_permission(inode, mask, NULL);
+}
+
 /* This will catch lookups from the fs root to .reiserfs_priv */
 static int
 xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index cdedc01..99928dc 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -41,6 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
 int reiserfs_lookup_privroot(struct super_block *sb);
 int reiserfs_delete_xattrs(struct inode *inode);
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
+int reiserfs_permission(struct inode *inode, int mask);
 
 #ifdef CONFIG_REISERFS_FS_XATTR
 #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
@@ -50,7 +51,6 @@ int reiserfs_setxattr(struct dentry *dentry, const char *name,
 		      const void *value, size_t size, int flags);
 ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int reiserfs_removexattr(struct dentry *dentry, const char *name);
-int reiserfs_permission(struct inode *inode, int mask);
 
 int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
 int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
@@ -117,8 +117,6 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
 #define reiserfs_listxattr NULL
 #define reiserfs_removexattr NULL
 
-#define reiserfs_permission NULL
-
 static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
 {
 }
-- 
cgit v0.10.2


From 6fd058f7791087648c683eb8572edf3be3c4c23c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 17 May 2009 15:38:01 -0400
Subject: ext4: Add a comprehensive block validity check to ext4_get_blocks()

To catch filesystem bugs or corruption which could lead to the
filesystem getting severly damaged, this patch adds a facility for
tracking all of the filesystem metadata blocks by contiguous regions
in a red-black tree.  This allows quick searching of the tree to
locate extents which might overlap with filesystem metadata blocks.

This facility is also used by the multi-block allocator to assure that
it is not allocating blocks out of the system zone, as well as by the
routines used when reading indirect blocks and extents information
from disk to make sure their contents are valid.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003..8a34710 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
 obj-$(CONFIG_EXT4_FS) += ext4.o
 
 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-		   ext4_jbd2.o migrate.o mballoc.o
+		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+		ext4_jbd2.o migrate.o mballoc.o block_validity.o
 
 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 0000000..50784ef
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
+/*
+ *  linux/fs/ext4/block_validity.c
+ *
+ * Copyright (C) 2009
+ * Theodore Ts'o (tytso@mit.edu)
+ *
+ * Track which blocks in the filesystem are metadata blocks that
+ * should never be used as data blocks by files or directories.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include "ext4.h"
+
+struct ext4_system_zone {
+	struct rb_node	node;
+	ext4_fsblk_t	start_blk;
+	unsigned int	count;
+};
+
+static struct kmem_cache *ext4_system_zone_cachep;
+
+int __init init_ext4_system_zone(void)
+{
+	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
+					     SLAB_RECLAIM_ACCOUNT);
+	if (ext4_system_zone_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void exit_ext4_system_zone(void)
+{
+	kmem_cache_destroy(ext4_system_zone_cachep);
+}
+
+static inline int can_merge(struct ext4_system_zone *entry1,
+		     struct ext4_system_zone *entry2)
+{
+	if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+		return 1;
+	return 0;
+}
+
+/*
+ * Mark a range of blocks as belonging to the "system zone" --- that
+ * is, filesystem metadata blocks which should never be used by
+ * inodes.
+ */
+static int add_system_zone(struct ext4_sb_info *sbi,
+			   ext4_fsblk_t start_blk,
+			   unsigned int count)
+{
+	struct ext4_system_zone *new_entry = NULL, *entry;
+	struct rb_node **n = &sbi->system_blks.rb_node, *node;
+	struct rb_node *parent = NULL, *new_node = NULL;
+
+	while (*n) {
+		parent = *n;
+		entry = rb_entry(parent, struct ext4_system_zone, node);
+		if (start_blk < entry->start_blk)
+			n = &(*n)->rb_left;
+		else if (start_blk >= (entry->start_blk + entry->count))
+			n = &(*n)->rb_right;
+		else {
+			if (start_blk + count > (entry->start_blk + 
+						 entry->count))
+				entry->count = (start_blk + count - 
+						entry->start_blk);
+			new_node = *n;
+			new_entry = rb_entry(new_node, struct ext4_system_zone,
+					     node);
+			break;
+		}
+	}
+
+	if (!new_entry) {
+		new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+					     GFP_KERNEL);
+		if (!new_entry)
+			return -ENOMEM;
+		new_entry->start_blk = start_blk;
+		new_entry->count = count;
+		new_node = &new_entry->node;
+
+		rb_link_node(new_node, parent, n);
+		rb_insert_color(new_node, &sbi->system_blks);
+	}
+
+	/* Can we merge to the left? */
+	node = rb_prev(new_node);
+	if (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		if (can_merge(entry, new_entry)) {
+			new_entry->start_blk = entry->start_blk;
+			new_entry->count += entry->count;
+			rb_erase(node, &sbi->system_blks);
+			kmem_cache_free(ext4_system_zone_cachep, entry);
+		}
+	}
+
+	/* Can we merge to the right? */
+	node = rb_next(new_node);
+	if (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		if (can_merge(new_entry, entry)) {
+			new_entry->count += entry->count;
+			rb_erase(node, &sbi->system_blks);
+			kmem_cache_free(ext4_system_zone_cachep, entry);
+		}
+	}
+	return 0;
+}
+
+static void debug_print_tree(struct ext4_sb_info *sbi)
+{
+	struct rb_node *node;
+	struct ext4_system_zone *entry;
+	int first = 1;
+
+	printk(KERN_INFO "System zones: ");
+	node = rb_first(&sbi->system_blks);
+	while (node) {
+		entry = rb_entry(node, struct ext4_system_zone, node);
+		printk("%s%llu-%llu", first ? "" : ", ",
+		       entry->start_blk, entry->start_blk + entry->count - 1);
+		first = 0;
+		node = rb_next(node);
+	}
+	printk("\n");
+}
+
+int ext4_setup_system_zone(struct super_block *sb)
+{
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *gdp;
+	ext4_group_t i;
+	int flex_size = ext4_flex_bg_size(sbi);
+	int ret;
+
+	if (!test_opt(sb, BLOCK_VALIDITY)) {
+		if (EXT4_SB(sb)->system_blks.rb_node)
+			ext4_release_system_zone(sb);
+		return 0;
+	}
+	if (EXT4_SB(sb)->system_blks.rb_node)
+		return 0;
+
+	for (i=0; i < ngroups; i++) {
+		if (ext4_bg_has_super(sb, i) &&
+		    ((i < 5) || ((i % flex_size) == 0)))
+			add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+					sbi->s_gdb_count + 1);
+		gdp = ext4_get_group_desc(sb, i, NULL);
+		ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+		if (ret)
+			return ret;
+		ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+		if (ret)
+			return ret;
+		ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
+				sbi->s_itb_per_group);
+		if (ret)
+			return ret;
+	}
+
+	if (test_opt(sb, DEBUG))
+		debug_print_tree(EXT4_SB(sb));
+	return 0;
+}
+
+/* Called when the filesystem is unmounted */
+void ext4_release_system_zone(struct super_block *sb)
+{
+	struct rb_node	*n = EXT4_SB(sb)->system_blks.rb_node;
+	struct rb_node	*parent;
+	struct ext4_system_zone	*entry;
+
+	while (n) {
+		/* Do the node's children first */
+		if (n->rb_left) {
+			n = n->rb_left;
+			continue;
+		}
+		if (n->rb_right) {
+			n = n->rb_right;
+			continue;
+		}
+		/*
+		 * The node has no children; free it, and then zero
+		 * out parent's link to it.  Finally go to the
+		 * beginning of the loop and try to free the parent
+		 * node.
+		 */
+		parent = rb_parent(n);
+		entry = rb_entry(n, struct ext4_system_zone, node);
+		kmem_cache_free(ext4_system_zone_cachep, entry);
+		if (!parent)
+			EXT4_SB(sb)->system_blks.rb_node = NULL;
+		else if (parent->rb_left == n)
+			parent->rb_left = NULL;
+		else if (parent->rb_right == n)
+			parent->rb_right = NULL;
+		n = parent;
+	}
+	EXT4_SB(sb)->system_blks.rb_node = NULL;
+}
+
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with filesystem metadata blocks.
+ */
+int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
+			  unsigned int count)
+{
+	struct ext4_system_zone *entry;
+	struct rb_node *n = sbi->system_blks.rb_node;
+
+	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+	    (start_blk + count > ext4_blocks_count(sbi->s_es)))
+		return 0;
+	while (n) {
+		entry = rb_entry(n, struct ext4_system_zone, node);
+		if (start_blk + count - 1 < entry->start_blk)
+			n = n->rb_left;
+		else if (start_blk >= (entry->start_blk + entry->count))
+			n = n->rb_right;
+		else
+			return 0;
+	}
+	return 1;
+}
+
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d164f12..4311cc8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -696,6 +696,7 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
+#define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
 
 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
@@ -887,6 +888,7 @@ struct ext4_sb_info {
 	int s_jquota_fmt;			/* Format of quota to use */
 #endif
 	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+	struct rb_root system_blks;
 
 #ifdef EXTENTS_STATS
 	/* ext4 extents stats */
@@ -1618,6 +1620,15 @@ extern struct dentry *ext4_get_parent(struct dentry *child);
 extern const struct inode_operations ext4_symlink_inode_operations;
 extern const struct inode_operations ext4_fast_symlink_inode_operations;
 
+/* block_validity */
+extern void ext4_release_system_zone(struct super_block *sb);
+extern int ext4_setup_system_zone(struct super_block *sb);
+extern int __init init_ext4_system_zone(void);
+extern void exit_ext4_system_zone(void);
+extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
+				 ext4_fsblk_t start_blk,
+				 unsigned int count);
+
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 27c383c..d04b779 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
 
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
-	ext4_fsblk_t block = ext_pblock(ext), valid_block;
+	ext4_fsblk_t block = ext_pblock(ext);
 	int len = ext4_ext_get_actual_len(ext);
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 
-	valid_block = le32_to_cpu(es->s_first_data_block) +
-		EXT4_SB(inode->i_sb)->s_gdb_count;
-	if (unlikely(block <= valid_block ||
-		     ((block + len) > ext4_blocks_count(es))))
-		return 0;
-	else
-		return 1;
+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
 
 static int ext4_valid_extent_idx(struct inode *inode,
 				struct ext4_extent_idx *ext_idx)
 {
-	ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+	ext4_fsblk_t block = idx_pblock(ext_idx);
 
-	valid_block = le32_to_cpu(es->s_first_data_block) +
-		EXT4_SB(inode->i_sb)->s_gdb_count;
-	if (unlikely(block <= valid_block ||
-		     (block >= ext4_blocks_count(es))))
-		return 0;
-	else
-		return 1;
+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
 }
 
 static int ext4_valid_extent_entries(struct inode *inode,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d7b7480..dadd3f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode,
 }
 
 static int __ext4_check_blockref(const char *function, struct inode *inode,
-				 __le32 *p, unsigned int max) {
-
-	unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
+				 __le32 *p, unsigned int max)
+{
 	__le32 *bref = p;
+	unsigned int blk;
+
 	while (bref < p+max) {
-		if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
+		blk = le32_to_cpu(*bref++);
+		if (blk && 
+		    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 
+						    blk, 1))) {
 			ext4_error(inode->i_sb, function,
-				   "block reference %u >= max (%u) "
-				   "in inode #%lu, offset=%d",
-				   le32_to_cpu(*bref), maxblocks,
-				   inode->i_ino, (int)(bref-p));
+				   "invalid block reference %u "
+				   "in inode #%lu", blk, inode->i_ino);
  			return -EIO;
  		}
-		bref++;
  	}
  	return 0;
 }
@@ -1125,6 +1126,21 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 		ext4_discard_preallocations(inode);
 }
 
+static int check_block_validity(struct inode *inode, sector_t logical,
+				sector_t phys, int len)
+{
+	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
+		ext4_error(inode->i_sb, "check_block_validity",
+			   "inode #%lu logical block %llu mapped to %llu "
+			   "(size %d)", inode->i_ino,
+			   (unsigned long long) logical,
+			   (unsigned long long) phys, len);
+		WARN_ON(1);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
  * The ext4_get_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
@@ -1170,6 +1186,13 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 	}
 	up_read((&EXT4_I(inode)->i_data_sem));
 
+	if (retval > 0 && buffer_mapped(bh)) {
+		int ret = check_block_validity(inode, block, 
+					       bh->b_blocknr, retval);
+		if (ret != 0)
+			return ret;
+	}
+
 	/* If it is only a block(s) look up */
 	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
 		return retval;
@@ -1245,6 +1268,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 		ext4_da_update_reserve_space(inode, retval);
 
 	up_write((&EXT4_I(inode)->i_data_sem));
+	if (retval > 0 && buffer_mapped(bh)) {
+		int ret = check_block_validity(inode, block, 
+					       bh->b_blocknr, retval);
+		if (ret != 0)
+			return ret;
+	}
 	return retval;
 }
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 541bd9a..ed8482e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2961,15 +2961,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		+ le32_to_cpu(es->s_first_data_block);
 
 	len = ac->ac_b_ex.fe_len;
-	if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
-	    in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
-	    in_range(block, ext4_inode_table(sb, gdp),
-		     EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(block + len - 1, ext4_inode_table(sb, gdp),
-		     EXT4_SB(sb)->s_itb_per_group)) {
+	if (!ext4_data_block_valid(sbi, block, len)) {
 		ext4_error(sb, __func__,
-			   "Allocating block %llu in system zone of %d group\n",
-			   block, ac->ac_b_ex.fe_group);
+			   "Allocating blocks %llu-%llu which overlap "
+			   "fs metadata\n", block, block+len);
 		/* File system mounted not to panic on error
 		 * Fix the bitmap and repeat the block allocation
 		 * We leak some of the blocks here.
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dc34ed3..600b7ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -568,6 +568,7 @@ static void ext4_put_super(struct super_block *sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
 	ext4_xattr_put_super(sb);
@@ -1055,6 +1056,7 @@ enum {
 	Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
 	Opt_usrquota, Opt_grpquota, Opt_i_version,
 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+	Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio
 };
 
@@ -1114,6 +1116,8 @@ static const match_table_t tokens = {
 	{Opt_resize, "resize"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
+	{Opt_block_validity, "block_validity"},
+	{Opt_noblock_validity, "noblock_validity"},
 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
 	{Opt_journal_ioprio, "journal_ioprio=%u"},
 	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1508,6 +1512,12 @@ set_qf_format:
 		case Opt_delalloc:
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
+		case Opt_block_validity:
+			set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+			break;
+		case Opt_noblock_validity:
+			clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+			break;
 		case Opt_inode_readahead_blks:
 			if (match_int(&args[0], &option))
 				return 0;
@@ -2826,6 +2836,13 @@ no_journal:
 	} else if (test_opt(sb, DELALLOC))
 		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
 
+	err = ext4_setup_system_zone(sb);
+	if (err) {
+		printk(KERN_ERR "EXT4-fs: failed to initialize system "
+		       "zone (%d)\n", err);
+		goto failed_mount4;
+	}
+
 	ext4_ext_init(sb);
 	err = ext4_mb_init(sb, needs_recovery);
 	if (err) {
@@ -2875,6 +2892,7 @@ cantfind_ext4:
 
 failed_mount4:
 	printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+	ext4_release_system_zone(sb);
 	if (sbi->s_journal) {
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
@@ -3515,6 +3533,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 				sb->s_flags &= ~MS_RDONLY;
 		}
 	}
+	ext4_setup_system_zone(sb);
 	if (sbi->s_journal == NULL)
 		ext4_commit_super(sb, 1);
 
@@ -3927,13 +3946,16 @@ static int __init init_ext4_fs(void)
 {
 	int err;
 
+	err = init_ext4_system_zone();
+	if (err)
+		return err;
 	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
 	if (!ext4_kset)
-		return -ENOMEM;
+		goto out4;
 	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
 	err = init_ext4_mballoc();
 	if (err)
-		return err;
+		goto out3;
 
 	err = init_ext4_xattr();
 	if (err)
@@ -3958,6 +3980,11 @@ out1:
 	exit_ext4_xattr();
 out2:
 	exit_ext4_mballoc();
+out3:
+	remove_proc_entry("fs/ext4", NULL);
+	kset_unregister(ext4_kset);
+out4:
+	exit_ext4_system_zone();
 	return err;
 }
 
@@ -3972,6 +3999,7 @@ static void __exit exit_ext4_fs(void)
 	exit_ext4_mballoc();
 	remove_proc_entry("fs/ext4", NULL);
 	kset_unregister(ext4_kset);
+	exit_ext4_system_zone();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-- 
cgit v0.10.2


From ec7f4d5d67ef63c724ab6d4bdc7d2ffa8861071a Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Wed, 13 May 2009 22:20:35 +0100
Subject: [ARM] S3C24XX: GPIO: Remove s3c2410_gpio_irq2pin() call

Remove the s3c2410_gpio_irq2pin() function as it is not being
used in any in kernel driver and the function is probably not
being used anywhere else.

This is also part of the effort to remove any of the s3c24xx gpio
specific code that cannot be recreated by using the gpiolib
framework now in the kernel.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/hardware.h b/arch/arm/mach-s3c2410/include/mach/hardware.h
index 74d5a1a..d7745d8 100644
--- a/arch/arm/mach-s3c2410/include/mach/hardware.h
+++ b/arch/arm/mach-s3c2410/include/mach/hardware.h
@@ -46,17 +46,6 @@ extern unsigned int s3c2410_gpio_getcfg(unsigned int pin);
 
 extern int s3c2410_gpio_getirq(unsigned int pin);
 
-/* s3c2410_gpio_irq2pin
- *
- * turn the given irq number into the corresponding GPIO number
- *
- * returns:
- *	< 0 = no pin
- *	>=0 = gpio pin number
-*/
-
-extern int s3c2410_gpio_irq2pin(unsigned int irq);
-
 #ifdef CONFIG_CPU_S3C2400
 
 extern int s3c2400_gpio_getirq(unsigned int pin);
diff --git a/arch/arm/plat-s3c24xx/gpio.c b/arch/arm/plat-s3c24xx/gpio.c
index 4a899c2..65ebbaf 100644
--- a/arch/arm/plat-s3c24xx/gpio.c
+++ b/arch/arm/plat-s3c24xx/gpio.c
@@ -199,19 +199,3 @@ int s3c2410_gpio_getirq(unsigned int pin)
 }
 
 EXPORT_SYMBOL(s3c2410_gpio_getirq);
-
-int s3c2410_gpio_irq2pin(unsigned int irq)
-{
-	if (irq >= IRQ_EINT0 && irq <= IRQ_EINT3)
-		return S3C2410_GPF0 + (irq - IRQ_EINT0);
-
-	if (irq >= IRQ_EINT4 && irq <= IRQ_EINT7)
-		return S3C2410_GPF4 + (irq - IRQ_EINT4);
-
-	if (irq >= IRQ_EINT8 && irq <= IRQ_EINT23)
-		return S3C2410_GPG0 + (irq - IRQ_EINT8);
-
-	return -EINVAL;
-}
-
-EXPORT_SYMBOL(s3c2410_gpio_irq2pin);
-- 
cgit v0.10.2


From 9f05f6a921353f4293cda37f221b9bfa532d3c57 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 13 May 2009 21:46:15 +0100
Subject: [ARM] S3C24XX: GPIO: Remove pin specific input and output defines

The use of S3C2410_GP[A-Z]x_INP and S3C2410_GP[A-Z]x_OUTP are
very rare and are taking up large amounts of space in the
regs-gpio.h header.

The GPIO layer has had generic input and out defines called
S3C2410_GPIO_INPUT and S3C2410_GPIO_OUTPUT for a while which work
for all S3C24XX GPIOs.

Do the following replacements:

   S3C2410_GP[A-Z][0-9]*_\OUTP => S3C2410_GPIO_OUTPUT
   S3C2410_GP[A-Z][0-9]*_\INP  => /S3C2410_GPIO_INPUT
   S3C2410_GPA[0-9]*_OUT       => S3C2410_GPIO_OUTPUT

to remove any usages of these and prepare the header for
the removal of these.

The following command was used to acheive this:

find . -type f -writable ! -name regs-gpio.h ! -name "*~" | xargs sed -i~ -e 's/S3C2410_GP[A-Z][0-9]*_\OUTP/S3C2410_GPIO_OUTPUT/g' -e 's/S3C2410_GP[A-Z][0-9]*_\INP/S3C2410_GPIO_INPUT/g' -e 's/S3C2410_GPA[0-9]*_OUT/S3C2410_GPIO_OUTPUT/g'

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 5a6bc56..1f332b3 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -89,7 +89,7 @@ static int __init h1940bt_probe(struct platform_device *pdev)
 	/* Configures BT serial port GPIOs */
 	s3c2410_gpio_cfgpin(S3C2410_GPH0, S3C2410_GPH0_nCTS0);
 	s3c2410_gpio_pullup(S3C2410_GPH0, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPH1, S3C2410_GPH1_OUTP);
+	s3c2410_gpio_cfgpin(S3C2410_GPH1, S3C2410_GPIO_OUTPUT);
 	s3c2410_gpio_pullup(S3C2410_GPH1, 1);
 	s3c2410_gpio_cfgpin(S3C2410_GPH2, S3C2410_GPH2_TXD0);
 	s3c2410_gpio_pullup(S3C2410_GPH2, 1);
diff --git a/arch/arm/mach-s3c2410/mach-amlm5900.c b/arch/arm/mach-s3c2410/mach-amlm5900.c
index 6d6995a..79428ed 100644
--- a/arch/arm/mach-s3c2410/mach-amlm5900.c
+++ b/arch/arm/mach-s3c2410/mach-amlm5900.c
@@ -224,7 +224,7 @@ static void amlm5900_init_pm(void)
 	} else {
 		enable_irq_wake(IRQ_EINT9);
 		/* configure the suspend/resume status pin */
-		s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPF2_OUTP);
+		s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPIO_OUTPUT);
 		s3c2410_gpio_pullup(S3C2410_GPF2, 0);
 	}
 }
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 4389c16..10ba91b 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -213,7 +213,7 @@ static int bast_pm_suspend(struct sys_device *sd, pm_message_t state)
 {
 	/* ensure that an nRESET is not generated on resume. */
 	s3c2410_gpio_setpin(S3C2410_GPA21, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPA21_OUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPIO_OUTPUT);
 
 	return 0;
 }
diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c
index 61a1ea9..7e002f9 100644
--- a/arch/arm/mach-s3c2410/mach-vr1000.c
+++ b/arch/arm/mach-s3c2410/mach-vr1000.c
@@ -355,7 +355,7 @@ static struct clk *vr1000_clocks[] __initdata = {
 
 static void vr1000_power_off(void)
 {
-	s3c2410_gpio_cfgpin(S3C2410_GPB9, S3C2410_GPB9_OUTP);
+	s3c2410_gpio_cfgpin(S3C2410_GPB9, S3C2410_GPIO_OUTPUT);
 	s3c2410_gpio_setpin(S3C2410_GPB9, 1);
 }
 
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index 8331e8d..b209749b 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -109,7 +109,7 @@ int usb_simtec_init(void)
 	printk("USB Power Control, (c) 2004 Simtec Electronics\n");
 	s3c_device_usb.dev.platform_data = &usb_simtec_info;
 
-	s3c2410_gpio_cfgpin(S3C2410_GPB4, S3C2410_GPB4_OUTP);
+	s3c2410_gpio_cfgpin(S3C2410_GPB4, S3C2410_GPIO_OUTPUT);
 	s3c2410_gpio_setpin(S3C2410_GPB4, 1);
 	return 0;
 }
diff --git a/arch/arm/mach-s3c2440/mach-nexcoder.c b/arch/arm/mach-s3c2440/mach-nexcoder.c
index 7aeaa97..3d4c61e 100644
--- a/arch/arm/mach-s3c2440/mach-nexcoder.c
+++ b/arch/arm/mach-s3c2440/mach-nexcoder.c
@@ -121,15 +121,15 @@ static void __init nexcoder_sensorboard_init(void)
 {
 	// Initialize SCCB bus
 	s3c2410_gpio_setpin(S3C2410_GPE14, 1); // IICSCL
-	s3c2410_gpio_cfgpin(S3C2410_GPE14, S3C2410_GPE14_OUTP);
+	s3c2410_gpio_cfgpin(S3C2410_GPE14, S3C2410_GPIO_OUTPUT);
 	s3c2410_gpio_setpin(S3C2410_GPE15, 1); // IICSDA
-	s3c2410_gpio_cfgpin(S3C2410_GPE15, S3C2410_GPE15_OUTP);
+	s3c2410_gpio_cfgpin(S3C2410_GPE15, S3C2410_GPIO_OUTPUT);
 
 	// Power up the sensor board
 	s3c2410_gpio_setpin(S3C2410_GPF1, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPF1, S3C2410_GPF1_OUTP); // CAM_GPIO7 => nLDO_PWRDN
+	s3c2410_gpio_cfgpin(S3C2410_GPF1, S3C2410_GPIO_OUTPUT); // CAM_GPIO7 => nLDO_PWRDN
 	s3c2410_gpio_setpin(S3C2410_GPF2, 0);
-	s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPF2_OUTP); // CAM_GPIO6 => CAM_PWRDN
+	s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPIO_OUTPUT); // CAM_GPIO6 => CAM_PWRDN
 }
 
 static void __init nexcoder_map_io(void)
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index c8a4668..e05f955 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -292,7 +292,7 @@ static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
 
 	/* ensure that an nRESET is not generated on resume. */
 	s3c2410_gpio_setpin(S3C2410_GPA21, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPA21_OUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPIO_OUTPUT);
 
 	return 0;
 }
diff --git a/arch/arm/plat-s3c24xx/common-smdk.c b/arch/arm/plat-s3c24xx/common-smdk.c
index 1a8347c..ef4c8fd 100644
--- a/arch/arm/plat-s3c24xx/common-smdk.c
+++ b/arch/arm/plat-s3c24xx/common-smdk.c
@@ -184,10 +184,10 @@ void __init smdk_machine_init(void)
 {
 	/* Configure the LEDs (even if we have no LED support)*/
 
-	s3c2410_gpio_cfgpin(S3C2410_GPF4, S3C2410_GPF4_OUTP);
-	s3c2410_gpio_cfgpin(S3C2410_GPF5, S3C2410_GPF5_OUTP);
-	s3c2410_gpio_cfgpin(S3C2410_GPF6, S3C2410_GPF6_OUTP);
-	s3c2410_gpio_cfgpin(S3C2410_GPF7, S3C2410_GPF7_OUTP);
+	s3c2410_gpio_cfgpin(S3C2410_GPF4, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF5, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF6, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF7, S3C2410_GPIO_OUTPUT);
 
 	s3c2410_gpio_setpin(S3C2410_GPF4, 1);
 	s3c2410_gpio_setpin(S3C2410_GPF5, 1);
-- 
cgit v0.10.2


From 3cba5ef862bc2c2a0cf2841d993bfb99b9a6c476 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 23:30:45 +0100
Subject: [ARM] S3C: Move watchdog system reset to own file.

Move the watchdog reset code from <mach/system-reset.h> to
a new file <plat/watchdog-reset.h> as this code is needed
by both s3c2410, s3c64xx and soon-to-be added s3c24a0.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/system-reset.h b/arch/arm/mach-s3c2410/include/mach/system-reset.h
index b8687f7..6faadce 100644
--- a/arch/arm/mach-s3c2410/include/mach/system-reset.h
+++ b/arch/arm/mach-s3c2410/include/mach/system-reset.h
@@ -11,21 +11,13 @@
 */
 
 #include <mach/hardware.h>
-#include <linux/io.h>
-
-#include <plat/regs-watchdog.h>
-#include <mach/regs-clock.h>
-
-#include <linux/clk.h>
-#include <linux/err.h>
+#include <plat/watchdog-reset.h>
 
 extern void (*s3c24xx_reset_hook)(void);
 
 static void
 arch_reset(char mode, const char *cmd)
 {
-	struct clk *wdtclk;
-
 	if (mode == 's') {
 		cpu_reset(0);
 	}
@@ -33,31 +25,7 @@ arch_reset(char mode, const char *cmd)
 	if (s3c24xx_reset_hook)
 		s3c24xx_reset_hook();
 
-	printk("arch_reset: attempting watchdog reset\n");
-
-	__raw_writel(0, S3C2410_WTCON);	  /* disable watchdog, to be safe  */
-
-	wdtclk = clk_get(NULL, "watchdog");
-	if (!IS_ERR(wdtclk)) {
-		clk_enable(wdtclk);
-	} else
-		printk(KERN_WARNING "%s: warning: cannot get watchdog clock\n", __func__);
-
-	/* put initial values into count and data */
-	__raw_writel(0x80, S3C2410_WTCNT);
-	__raw_writel(0x80, S3C2410_WTDAT);
-
-	/* set the watchdog to go and reset... */
-	__raw_writel(S3C2410_WTCON_ENABLE|S3C2410_WTCON_DIV16|S3C2410_WTCON_RSTEN |
-		     S3C2410_WTCON_PRESCALE(0x20), S3C2410_WTCON);
-
-	/* wait for reset to assert... */
-	mdelay(500);
-
-	printk(KERN_ERR "Watchdog reset failed to assert reset\n");
-
-	/* delay to allow the serial port to show the message */
-	mdelay(50);
+	arch_wdt_reset();
 
 	/* we'll take a jump through zero as a poor second */
 	cpu_reset(0);
diff --git a/arch/arm/plat-s3c/include/plat/watchdog-reset.h b/arch/arm/plat-s3c/include/plat/watchdog-reset.h
new file mode 100644
index 0000000..277506a
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/watchdog-reset.h
@@ -0,0 +1,48 @@
+/* arch/arm/plat-s3c/include/plat/watchdog-reset.h
+ *
+ * Copyright (c) 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * S3C2410 - System define for arch_reset() function
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <plat/regs-watchdog.h>
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+static inline void arch_wdt_reset(void)
+{
+	struct clk *wdtclk;
+
+	printk("arch_reset: attempting watchdog reset\n");
+
+	__raw_writel(0, S3C2410_WTCON);	  /* disable watchdog, to be safe  */
+
+	wdtclk = clk_get(NULL, "watchdog");
+	if (!IS_ERR(wdtclk)) {
+		clk_enable(wdtclk);
+	} else
+		printk(KERN_WARNING "%s: warning: cannot get watchdog clock\n", __func__);
+
+	/* put initial values into count and data */
+	__raw_writel(0x80, S3C2410_WTCNT);
+	__raw_writel(0x80, S3C2410_WTDAT);
+
+	/* set the watchdog to go and reset... */
+	__raw_writel(S3C2410_WTCON_ENABLE|S3C2410_WTCON_DIV16|S3C2410_WTCON_RSTEN |
+		     S3C2410_WTCON_PRESCALE(0x20), S3C2410_WTCON);
+
+	/* wait for reset to assert... */
+	mdelay(500);
+
+	printk(KERN_ERR "Watchdog reset failed to assert reset\n");
+
+	/* delay to allow the serial port to show the message */
+	mdelay(50);
+}
-- 
cgit v0.10.2


From 543899f610799426babb5313682fd9c249e34677 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 23:40:30 +0100
Subject: [ARM] S3C64XX: Use common watchdog reset for system reset.

Use the newly moved <plat/watchdog-reset.h> to perform the
arch_reset() call which has been unimplemented for a while.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c6400/include/mach/map.h b/arch/arm/mach-s3c6400/include/mach/map.h
index 99d5e3b..7a1e5a2 100644
--- a/arch/arm/mach-s3c6400/include/mach/map.h
+++ b/arch/arm/mach-s3c6400/include/mach/map.h
@@ -40,6 +40,7 @@
 
 #define S3C64XX_PA_FB		(0x77100000)
 #define S3C64XX_PA_USB_HSOTG	(0x7C000000)
+#define S3C64XX_PA_WATCHDOG	(0x7E004000)
 #define S3C64XX_PA_SYSCON	(0x7E00F000)
 #define S3C64XX_PA_TIMER	(0x7F006000)
 #define S3C64XX_PA_IIC0		(0x7F004000)
diff --git a/arch/arm/mach-s3c6400/include/mach/system.h b/arch/arm/mach-s3c6400/include/mach/system.h
index 090cfd9..2e58cb7 100644
--- a/arch/arm/mach-s3c6400/include/mach/system.h
+++ b/arch/arm/mach-s3c6400/include/mach/system.h
@@ -11,6 +11,8 @@
 #ifndef __ASM_ARCH_SYSTEM_H
 #define __ASM_ARCH_SYSTEM_H __FILE__
 
+#include <plat/watchdog-reset.h>
+
 static void arch_idle(void)
 {
 	/* nothing here yet */
@@ -18,7 +20,11 @@ static void arch_idle(void)
 
 static void arch_reset(char mode, const char *cmd)
 {
-	/* nothing here yet */
+	if (mode != 's')
+		arch_wdt_reset();
+
+	/* if all else fails, or mode was for soft, jump to 0 */
+	cpu_reset(0);
 }
 
 #endif /* __ASM_ARCH_IRQ_H */
diff --git a/arch/arm/plat-s3c/include/plat/watchdog-reset.h b/arch/arm/plat-s3c/include/plat/watchdog-reset.h
index 277506a..54b762a 100644
--- a/arch/arm/plat-s3c/include/plat/watchdog-reset.h
+++ b/arch/arm/plat-s3c/include/plat/watchdog-reset.h
@@ -11,6 +11,7 @@
 */
 
 #include <plat/regs-watchdog.h>
+#include <mach/map.h>
 
 #include <linux/clk.h>
 #include <linux/err.h>
diff --git a/arch/arm/plat-s3c64xx/cpu.c b/arch/arm/plat-s3c64xx/cpu.c
index 19f68b5..b1fdd83 100644
--- a/arch/arm/plat-s3c64xx/cpu.c
+++ b/arch/arm/plat-s3c64xx/cpu.c
@@ -102,6 +102,11 @@ static struct map_desc s3c_iodesc[] __initdata = {
 		.pfn		= __phys_to_pfn(S3C64XX_PA_MODEM),
 		.length		= SZ_4K,
 		.type		= MT_DEVICE,
+	}, {
+		.virtual	= (unsigned long)S3C_VA_WATCHDOG,
+		.pfn		= __phys_to_pfn(S3C64XX_PA_WATCHDOG),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
 	},
 };
 
-- 
cgit v0.10.2


From 22ef37eed673587ac984965dc88ba94c68873291 Mon Sep 17 00:00:00 2001
From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Date: Sat, 16 May 2009 22:56:28 -0700
Subject: page-writeback: fix the calculation of the oldest_jif in wb_kupdate()

wb_kupdate() function has a bug on linux-2.6.30-rc5.  This bug causes
generic_sync_sb_inodes() to start to write inodes back much earlier than
our expectations because it miscalculates oldest_jif in wb_kupdate().

This bug was introduced in 704503d836042d4a4c7685b7036e7de0418fbc0f
('mm: fix proc_dointvec_userhz_jiffies "breakage"').

Signed-off-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 30351f0..bb553c3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -94,12 +94,12 @@ unsigned long vm_dirty_bytes;
 /*
  * The interval between `kupdate'-style writebacks
  */
-unsigned int dirty_writeback_interval = 5 * 100; /* sentiseconds */
+unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
 
 /*
  * The longest time for which data is allowed to remain dirty
  */
-unsigned int dirty_expire_interval = 30 * 100; /* sentiseconds */
+unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
 
 /*
  * Flag that makes the machine dump writes/reads and block dirtyings.
@@ -770,7 +770,7 @@ static void wb_kupdate(unsigned long arg)
 
 	sync_supers();
 
-	oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval);
+	oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
 	start_jif = jiffies;
 	next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
 	nr_to_write = global_page_state(NR_FILE_DIRTY) +
-- 
cgit v0.10.2


From 0568c518937ee3a9b6a94d18bae9c150fe5d6832 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 17 May 2009 23:31:23 -0400
Subject: ext4: down i_data_sem only for read when walking tree for fiemap

Not sure why I put this in as down_write originally; all we are
doing is walking the tree, nothing will change under us and
concurrent reads should be no problem.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d04b779..d4e99e9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3312,10 +3312,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		 * Walk the extent tree gathering extent information.
 		 * ext4_ext_fiemap_cb will push extents back to user.
 		 */
-		down_write(&EXT4_I(inode)->i_data_sem);
+		down_read(&EXT4_I(inode)->i_data_sem);
 		error = ext4_ext_walk_space(inode, start_blk, len_blks,
 					  ext4_ext_fiemap_cb, fieinfo);
-		up_write(&EXT4_I(inode)->i_data_sem);
+		up_read(&EXT4_I(inode)->i_data_sem);
 	}
 
 	return error;
-- 
cgit v0.10.2


From f68301656b5f5d2de104f2687add6beeb8f3c3b9 Mon Sep 17 00:00:00 2001
From: Manish Katiyar <mkatiyar@gmail.com>
Date: Sun, 17 May 2009 23:52:44 -0400
Subject: ext4: Fix memory leak in ext4_fill_super() in case of a failed mount

Signed-off-by: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 600b7ad..eca6c05 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2924,6 +2924,7 @@ failed_mount:
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	lock_kernel();
 	return ret;
-- 
cgit v0.10.2


From de5ce037304f2c88a319b1c3b808ab0c4c618c1c Mon Sep 17 00:00:00 2001
From: Manish Katiyar <mkatiyar@gmail.com>
Date: Sun, 17 May 2009 23:52:47 -0400
Subject: ext3: Fix memory leak in ext3_fill_super() in case of a failed mount

Signed-off-by: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe..d8b73d4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2021,6 +2021,7 @@ failed_mount:
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	lock_kernel();
 	return ret;
-- 
cgit v0.10.2


From 0f7ee7c17241915fdaff49d1a36f5aafd80a7dce Mon Sep 17 00:00:00 2001
From: Manish Katiyar <mkatiyar@gmail.com>
Date: Sun, 17 May 2009 23:52:51 -0400
Subject: ext2: Fix memory leak in ext2_fill_super() in case of a failed mount

Signed-off-by: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 5c4afe6..e3c748f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1093,6 +1093,7 @@ failed_mount:
 	brelse(bh);
 failed_sbi:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 	return ret;
 }
-- 
cgit v0.10.2


From af3e4aca47d2e05a545a5e10ba5c7193e0b665e0 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 30 Apr 2009 10:59:19 +0000
Subject: powerpc: Do not assert pte_locked for hugepage PTE entries

With CONFIG_DEBUG_VM, an assertion is made when changing the protection
flags of a PTE that the PTE is locked. Huge pages use a different pagetable
format and the assertion is bogus and will always trigger with a bug looking
something like

 Unable to handle kernel paging request for data at address 0xf1a00235800006f8
 Faulting instruction address: 0xc000000000034a80
 Oops: Kernel access of bad area, sig: 11 [#1]
 SMP NR_CPUS=32 NUMA Maple
 Modules linked in: dm_snapshot dm_mirror dm_region_hash
  dm_log dm_mod loop evdev ext3 jbd mbcache sg sd_mod ide_pci_generic
  pata_amd ata_generic ipr libata tg3 libphy scsi_mod windfarm_pid
  windfarm_smu_sat windfarm_max6690_sensor windfarm_lm75_sensor
  windfarm_cpufreq_clamp windfarm_core i2c_powermac
 NIP: c000000000034a80 LR: c000000000034b18 CTR: 0000000000000003
 REGS: c000000003037600 TRAP: 0300   Not tainted (2.6.30-rc3-autokern1)
 MSR: 9000000000009032 <EE,ME,IR,DR>  CR: 28002484  XER: 200fffff
 DAR: f1a00235800006f8, DSISR: 0000000040010000
 TASK = c0000002e54cc740[2960] 'map_high_trunca' THREAD: c000000003034000 CPU: 2
 GPR00: 4000000000000000 c000000003037880 c000000000895d30 c0000002e5a2e500
 GPR04: 00000000a0000000 c0000002edc40880 0000005700000393 0000000000000001
 GPR08: f000000011ac0000 01a00235800006e8 00000000000000f5 f1a00235800006e8
 GPR12: 0000000028000484 c0000000008dd780 0000000000001000 0000000000000000
 GPR16: fffffffffffff000 0000000000000000 00000000a0000000 c000000003037a20
 GPR20: c0000002e5f4ece8 0000000000001000 c0000002edc40880 0000000000000000
 GPR24: c0000002e5f4ece8 0000000000000000 00000000a0000000 c0000002e5f4ece8
 GPR28: 0000005700000393 c0000002e5a2e500 00000000a0000000 c000000003037880
 NIP [c000000000034a80] .assert_pte_locked+0xa4/0xd0
 LR [c000000000034b18] .ptep_set_access_flags+0x6c/0xb4
 Call Trace:
 [c000000003037880] [c000000003037990] 0xc000000003037990 (unreliable)
 [c000000003037910] [c000000000034b18] .ptep_set_access_flags+0x6c/0xb4
 [c0000000030379b0] [c00000000014bef8] .hugetlb_cow+0x124/0x674
 [c000000003037b00] [c00000000014c930] .hugetlb_fault+0x4e8/0x6f8
 [c000000003037c00] [c00000000013443c] .handle_mm_fault+0xac/0x828
 [c000000003037cf0] [c0000000000340a8] .do_page_fault+0x39c/0x584
 [c000000003037e30] [c0000000000057b0] handle_page_fault+0x20/0x5c
 Instruction dump:
 7d29582a 7d200074 7800d182 0b000000 3c004000 3960ffff 780007c6 796b00c4
 7d290214 7929a302 1d290068 7d6b4a14 <800b0010> 7c000074 7800d182 0b000000

This patch fixes the problem by not asseting the PTE is locked for VMAs
backed by huge pages.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index f5c6fd4..ae1d67c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -219,7 +219,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 		entry = do_dcache_icache_coherency(entry);
 	changed = !pte_same(*(ptep), entry);
 	if (changed) {
-		assert_pte_locked(vma->vm_mm, address);
+		if (!(vma->vm_flags & VM_HUGETLB))
+			assert_pte_locked(vma->vm_mm, address);
 		__ptep_set_access_flags(ptep, entry);
 		flush_tlb_page_nohash(vma, address);
 	}
-- 
cgit v0.10.2


From 021376a3b655364c92c10be544a3319946a792e8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 13 May 2009 20:30:24 +0000
Subject: powerpc/ftrace: Use pr_devel() in ftrace.c

pr_debug() can now result in code being generated even when #DEBUG
is not defined. That's not really desirable in the ftrace code
which we want to be snappy.

With CONFIG_DYNAMIC_DEBUG=y:

size before:
   text	   data	    bss	    dec	    hex	filename
   3334	    672	      4	   4010	    faa	arch/powerpc/kernel/ftrace.o

size after:
   text	   data	    bss	    dec	    hex	filename
   2616	    360	      4	   2980	    ba4	arch/powerpc/kernel/ftrace.o

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 70e2a73..5b078ee 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -157,7 +157,7 @@ __ftrace_make_nop(struct module *mod,
 	 * 0xe8, 0x4c, 0x00, 0x28,    ld      r2,40(r12)
 	 */
 
-	pr_debug("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
+	pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
 
 	/* Find where the trampoline jumps to */
 	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
@@ -165,7 +165,7 @@ __ftrace_make_nop(struct module *mod,
 		return -EFAULT;
 	}
 
-	pr_debug(" %08x %08x", jmp[0], jmp[1]);
+	pr_devel(" %08x %08x", jmp[0], jmp[1]);
 
 	/* verify that this is what we expect it to be */
 	if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
@@ -181,18 +181,18 @@ __ftrace_make_nop(struct module *mod,
 	offset = ((unsigned)((unsigned short)jmp[0]) << 16) +
 		(int)((short)jmp[1]);
 
-	pr_debug(" %x ", offset);
+	pr_devel(" %x ", offset);
 
 	/* get the address this jumps too */
 	tramp = mod->arch.toc + offset + 32;
-	pr_debug("toc: %lx", tramp);
+	pr_devel("toc: %lx", tramp);
 
 	if (probe_kernel_read(jmp, (void *)tramp, 8)) {
 		printk(KERN_ERR "Failed to read %lx\n", tramp);
 		return -EFAULT;
 	}
 
-	pr_debug(" %08x %08x\n", jmp[0], jmp[1]);
+	pr_devel(" %08x %08x\n", jmp[0], jmp[1]);
 
 	ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
 
@@ -269,7 +269,7 @@ __ftrace_make_nop(struct module *mod,
 	 *  0x4e, 0x80, 0x04, 0x20  bctr
 	 */
 
-	pr_debug("ip:%lx jumps to %lx", ip, tramp);
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
 
 	/* Find where the trampoline jumps to */
 	if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
@@ -277,7 +277,7 @@ __ftrace_make_nop(struct module *mod,
 		return -EFAULT;
 	}
 
-	pr_debug(" %08x %08x ", jmp[0], jmp[1]);
+	pr_devel(" %08x %08x ", jmp[0], jmp[1]);
 
 	/* verify that this is what we expect it to be */
 	if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
@@ -293,7 +293,7 @@ __ftrace_make_nop(struct module *mod,
 	if (tramp & 0x8000)
 		tramp -= 0x10000;
 
-	pr_debug(" %lx ", tramp);
+	pr_devel(" %lx ", tramp);
 
 	if (tramp != addr) {
 		printk(KERN_ERR
@@ -402,7 +402,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	/* ld r2,40(r1) */
 	op[1] = 0xe8410028;
 
-	pr_debug("write to %lx\n", rec->ip);
+	pr_devel("write to %lx\n", rec->ip);
 
 	if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
 		return -EPERM;
@@ -442,7 +442,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EINVAL;
 	}
 
-	pr_debug("write to %lx\n", rec->ip);
+	pr_devel("write to %lx\n", rec->ip);
 
 	if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
 		return -EPERM;
-- 
cgit v0.10.2


From c3cf8667ed7db58c1960958cbb0a9098d513cc60 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 15 May 2009 04:33:54 +0000
Subject: powerpc/ftrace: Fix constraint to be early clobber

After upgrading my distcc boxes from gcc 4.2.2 to 4.4.0, the function
graph tracer broke. This was discovered on my x86 boxes.

The issue is that gcc used the same register for an output as it did for
an input in an asm statement. I first thought this was a bug in gcc and
reported it. I was notified that gcc was correct and that the output had
to be flagged as an "early clobber".

I noticed that powerpc had the same issue and this patch fixes it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 5b078ee..2d182f1 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -594,7 +594,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 			PPC_LONG "2b,4b\n"
 		".previous"
 
-		: [old] "=r" (old), [faulted] "=r" (faulted)
+		: [old] "=&r" (old), [faulted] "=r" (faulted)
 		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 		: "memory"
 	);
-- 
cgit v0.10.2


From dc892288f42661a140124ecbf9d44850a95de222 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoffrey.levand@am.sony.com>
Date: Fri, 15 May 2009 08:01:59 +0000
Subject: powerpc/ps3: Update ps3_defconfig

Refresh and set these options:

 CONFIG_SYSFS_DEPRECATED_V2: y -> n
 CONFIG_INPUT_JOYSTICK:      y -> n
 CONFIG_HID_SONY:            n -> m
 CONFIG_RTC_DRV_PS3:         - -> m

Signed-off-by: Geoff Levand <geoffrey.levand@am.sony.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index ac14f52..e28e65e 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -1,13 +1,14 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc8
-# Fri Mar 13 09:28:45 2009
+# Linux kernel version: 2.6.30-rc5
+# Fri May 15 10:37:00 2009
 #
 CONFIG_PPC64=y
 
 #
 # Processor support
 #
+CONFIG_PPC_BOOK3S=y
 # CONFIG_POWER4_ONLY is not set
 CONFIG_POWER3=y
 CONFIG_POWER4=y
@@ -55,9 +56,11 @@ CONFIG_OF=y
 # CONFIG_GENERIC_TBSYNC is not set
 CONFIG_AUDIT_ARCH=y
 CONFIG_GENERIC_BUG=y
+CONFIG_DTC=y
 # CONFIG_DEFAULT_UIMAGE is not set
 # CONFIG_PPC_DCR_NATIVE is not set
 # CONFIG_PPC_DCR_MMIO is not set
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
@@ -72,6 +75,7 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
@@ -88,8 +92,7 @@ CONFIG_CLASSIC_RCU=y
 CONFIG_LOG_BUF_SHIFT=17
 # CONFIG_GROUP_SCHED is not set
 # CONFIG_CGROUPS is not set
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
 # CONFIG_RELAY is not set
 CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
@@ -99,6 +102,9 @@ CONFIG_NAMESPACES=y
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -107,6 +113,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -138,6 +145,7 @@ CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
 CONFIG_HAVE_DMA_ATTRS=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_SLOW_WORK is not set
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -150,7 +158,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
-# CONFIG_BLK_DEV_IO_TRACE is not set
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 CONFIG_BLOCK_COMPAT=y
@@ -172,7 +179,6 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 #
 # Platform support
 #
-CONFIG_PPC_MULTIPLATFORM=y
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_ISERIES is not set
 # CONFIG_PPC_PMAC is not set
@@ -209,6 +215,7 @@ CONFIG_SPU_FS_64K_LS=y
 # CONFIG_SPU_TRACE is not set
 CONFIG_SPU_BASE=y
 # CONFIG_PQ2ADS is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
 # CONFIG_IPIC is not set
 # CONFIG_MPIC is not set
 # CONFIG_MPIC_WEIRD is not set
@@ -279,11 +286,14 @@ CONFIG_PHYS_ADDR_T_64BIT=y
 CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_ARCH_MEMORY_PROBE=y
 CONFIG_PPC_HAS_HASH_64K=y
 CONFIG_PPC_4K_PAGES=y
 # CONFIG_PPC_16K_PAGES is not set
 # CONFIG_PPC_64K_PAGES is not set
+# CONFIG_PPC_256K_PAGES is not set
 CONFIG_FORCE_MAX_ZONEORDER=13
 CONFIG_SCHED_SMT=y
 CONFIG_PROC_DEVICETREE=y
@@ -316,7 +326,6 @@ CONFIG_NET=y
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -389,6 +398,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 # CONFIG_NET_SCHED is not set
 # CONFIG_DCB is not set
 
@@ -396,6 +406,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y
 # Network testing
 #
 # CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_DROP_MONITOR is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_CAN is not set
 # CONFIG_IRDA is not set
@@ -419,11 +430,9 @@ CONFIG_BT_HCIBTUSB=m
 # CONFIG_BT_HCIBFUSB is not set
 # CONFIG_BT_HCIVHCI is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=m
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 # CONFIG_WIRELESS_OLD_REGULATORY is not set
 CONFIG_WIRELESS_EXT=y
 # CONFIG_WIRELESS_EXT_SYSFS is not set
@@ -602,6 +611,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 # CONFIG_SCSI_SRP_ATTRS is not set
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 # CONFIG_ATA is not set
 CONFIG_MD=y
 # CONFIG_BLK_DEV_MD is not set
@@ -616,6 +626,7 @@ CONFIG_BLK_DEV_DM=m
 # CONFIG_DM_UEVENT is not set
 # CONFIG_MACINTOSH_DRIVERS is not set
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
 # CONFIG_MACVLAN is not set
@@ -625,6 +636,8 @@ CONFIG_NETDEVICES=y
 # CONFIG_PHYLIB is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=m
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
 # CONFIG_IBM_NEW_EMAC_TAH is not set
@@ -646,12 +659,13 @@ CONFIG_GELIC_WIRELESS_OLD_PSK_INTERFACE=y
 CONFIG_WLAN_80211=y
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_USB_ZD1201 is not set
 # CONFIG_USB_NET_RNDIS_WLAN is not set
 # CONFIG_RTL8187 is not set
 # CONFIG_MAC80211_HWSIM is not set
 # CONFIG_P54_COMMON is not set
-# CONFIG_IWLWIFI_LEDS is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
@@ -673,6 +687,7 @@ CONFIG_USB_PEGASUS=m
 CONFIG_USB_USBNET=m
 CONFIG_USB_NET_AX8817X=m
 # CONFIG_USB_NET_CDCETHER is not set
+# CONFIG_USB_NET_CDC_EEM is not set
 # CONFIG_USB_NET_DM9601 is not set
 # CONFIG_USB_NET_SMSC95XX is not set
 # CONFIG_USB_NET_GL620A is not set
@@ -724,28 +739,7 @@ CONFIG_INPUT_EVDEV=m
 #
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
-CONFIG_INPUT_JOYSTICK=y
-# CONFIG_JOYSTICK_ANALOG is not set
-# CONFIG_JOYSTICK_A3D is not set
-# CONFIG_JOYSTICK_ADI is not set
-# CONFIG_JOYSTICK_COBRA is not set
-# CONFIG_JOYSTICK_GF2K is not set
-# CONFIG_JOYSTICK_GRIP is not set
-# CONFIG_JOYSTICK_GRIP_MP is not set
-# CONFIG_JOYSTICK_GUILLEMOT is not set
-# CONFIG_JOYSTICK_INTERACT is not set
-# CONFIG_JOYSTICK_SIDEWINDER is not set
-# CONFIG_JOYSTICK_TMDC is not set
-# CONFIG_JOYSTICK_IFORCE is not set
-# CONFIG_JOYSTICK_WARRIOR is not set
-# CONFIG_JOYSTICK_MAGELLAN is not set
-# CONFIG_JOYSTICK_SPACEORB is not set
-# CONFIG_JOYSTICK_SPACEBALL is not set
-# CONFIG_JOYSTICK_STINGER is not set
-# CONFIG_JOYSTICK_TWIDJOY is not set
-# CONFIG_JOYSTICK_ZHENHUA is not set
-# CONFIG_JOYSTICK_JOYDUMP is not set
-# CONFIG_JOYSTICK_XPAD is not set
+# CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 # CONFIG_INPUT_TOUCHSCREEN is not set
 # CONFIG_INPUT_MISC is not set
@@ -864,6 +858,7 @@ CONFIG_FB_PS3_DEFAULT_SIZE_M=9
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
@@ -934,15 +929,17 @@ CONFIG_USB_HIDDEV=y
 #
 # Special HID drivers
 #
-# CONFIG_HID_COMPAT is not set
 # CONFIG_HID_A4TECH is not set
 # CONFIG_HID_APPLE is not set
 # CONFIG_HID_BELKIN is not set
 # CONFIG_HID_CHERRY is not set
 # CONFIG_HID_CHICONY is not set
 # CONFIG_HID_CYPRESS is not set
+# CONFIG_DRAGONRISE_FF is not set
 # CONFIG_HID_EZKEY is not set
+# CONFIG_HID_KYE is not set
 # CONFIG_HID_GYRATION is not set
+# CONFIG_HID_KENSINGTON is not set
 # CONFIG_HID_LOGITECH is not set
 # CONFIG_HID_MICROSOFT is not set
 # CONFIG_HID_MONTEREY is not set
@@ -950,7 +947,7 @@ CONFIG_USB_HIDDEV=y
 # CONFIG_HID_PANTHERLORD is not set
 # CONFIG_HID_PETALYNX is not set
 # CONFIG_HID_SAMSUNG is not set
-# CONFIG_HID_SONY is not set
+CONFIG_HID_SONY=m
 # CONFIG_HID_SUNPLUS is not set
 # CONFIG_GREENASIA_FF is not set
 # CONFIG_HID_TOPSEED is not set
@@ -1012,11 +1009,11 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=m
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1058,7 +1055,6 @@ CONFIG_USB_STORAGE=m
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1074,6 +1070,7 @@ CONFIG_USB_STORAGE=m
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
@@ -1113,8 +1110,10 @@ CONFIG_RTC_INTF_DEV=y
 #
 # on-CPU RTC drivers
 #
-CONFIG_RTC_DRV_PPC=m
+# CONFIG_RTC_DRV_GENERIC is not set
+CONFIG_RTC_DRV_PS3=m
 # CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 
@@ -1125,6 +1124,7 @@ CONFIG_EXT2_FS=m
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=m
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 # CONFIG_EXT3_FS_POSIX_ACL is not set
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1161,6 +1161,11 @@ CONFIG_AUTOFS4_FS=m
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=m
@@ -1211,6 +1216,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1223,7 +1229,6 @@ CONFIG_LOCKD_V4=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -1283,6 +1288,7 @@ CONFIG_NLS_ISO8859_1=y
 # CONFIG_NLS_KOI8_U is not set
 # CONFIG_NLS_UTF8 is not set
 # CONFIG_DLM is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -1296,15 +1302,16 @@ CONFIG_CRC_ITU_T=m
 CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
-CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
 CONFIG_LZO_COMPRESS=m
 CONFIG_LZO_DECOMPRESS=m
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
 CONFIG_HAVE_LMB=y
+CONFIG_NLATTR=y
 
 #
 # Kernel hacking
@@ -1322,6 +1329,9 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1357,12 +1367,15 @@ CONFIG_DEBUG_LIST=y
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
 CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -1371,18 +1384,21 @@ CONFIG_TRACING=y
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_FTRACE_STARTUP_TEST is not set
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
 CONFIG_PRINT_STACK_DEPTH=64
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_CODE_PATCHING_SELFTEST is not set
 # CONFIG_FTR_FIXUP_SELFTEST is not set
 # CONFIG_MSI_BITMAP_SELFTEST is not set
@@ -1415,10 +1431,12 @@ CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG=m
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 CONFIG_CRYPTO_GF128MUL=m
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 # CONFIG_CRYPTO_AUTHENC is not set
 # CONFIG_CRYPTO_TEST is not set
@@ -1487,6 +1505,7 @@ CONFIG_CRYPTO_SALSA20=m
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 CONFIG_CRYPTO_LZO=m
 
 #
-- 
cgit v0.10.2


From 0e337b42d620ca7c45fe64e64dd71957c56216c9 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sun, 17 May 2009 18:29:03 +0000
Subject: powerpc: Explicit alignment for .data.cacheline_aligned

I don't think anything guarantees that the objects in data.page_aligned
are a multiple of PAGE_SIZE, thus the section may end on any boundary.

So the following section, .data.cacheline_aligned needs an explicit
alignment.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index a047a6c..8ef8a14 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -264,6 +264,7 @@ SECTIONS
 		*(.data.page_aligned)
 	}
 
+	. = ALIGN(L1_CACHE_BYTES);
 	.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
 		*(.data.cacheline_aligned)
 	}
-- 
cgit v0.10.2


From c0daaf3f1f672defa3a45ca449b76d0e86c55892 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 18 May 2009 14:02:12 +1000
Subject: perf_counter: powerpc: initialize cpuhw pointer before use

Commit 9e35ad38 ("perf_counter: Rework the perf counter
disable/enable") added code to the powerpc hw_perf_enable (renamed
from hw_perf_restore) to test cpuhw->disabled and return immediately
if it is not set (i.e. if the PMU is already enabled).

Unfortunately the test got added before cpuhw was initialized,
resulting in an oops the first time hw_perf_enable got called.
This fixes it by moving the initialization of cpuhw to before
cpuhw->disabled is tested.

[ Impact: fix oops-causing bug on powerpc ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18960.56772.869734.304631@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 6baae5a..fe21b24 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -450,12 +450,11 @@ void hw_perf_enable(void)
 	int idx;
 
 	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	if (!cpuhw->disabled) {
 		local_irq_restore(flags);
 		return;
 	}
-
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	cpuhw->disabled = 0;
 
 	/*
-- 
cgit v0.10.2


From 135cad366b4e7d6a79f6369f6cb5b721985aa62f Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Sun, 17 May 2009 20:18:08 +0200
Subject: i.MX31: Add support for the CPLD on PDK Debug board.

The i.MX31 PDK consists of several boards, one of them is a debug
board containing a CPLD which controls some debug leds, switch
buttons, an interrupt chip and an Ethernet controller.
This patch adds support for detecting if the PDK board is present
(during boot) and adds the interrupt chip to the kernel.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31pdk.c b/arch/arm/mach-mx3/mx31pdk.c
index 5345498..32599e5 100644
--- a/arch/arm/mach-mx3/mx31pdk.c
+++ b/arch/arm/mach-mx3/mx31pdk.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/irq.h>
+#include <linux/gpio.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -46,13 +47,156 @@ static int mx31pdk_pins[] = {
 	MX31_PIN_CTS1__CTS1,
 	MX31_PIN_RTS1__RTS1,
 	MX31_PIN_TXD1__TXD1,
-	MX31_PIN_RXD1__RXD1
+	MX31_PIN_RXD1__RXD1,
+	IOMUX_MODE(MX31_PIN_GPIO1_1, IOMUX_CONFIG_GPIO),
 };
 
 static struct imxuart_platform_data uart_pdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
+/*
+ * Routines for the CPLD on the debug board. It contains a CPLD handling
+ * LEDs, switches, interrupts for Ethernet.
+ */
+
+static void mx31pdk_expio_irq_handler(uint32_t irq, struct irq_desc *desc)
+{
+	uint32_t imr_val;
+	uint32_t int_valid;
+	uint32_t expio_irq;
+
+	imr_val = __raw_readw(CPLD_INT_MASK_REG);
+	int_valid = __raw_readw(CPLD_INT_STATUS_REG) & ~imr_val;
+
+	expio_irq = MXC_EXP_IO_BASE;
+	for (; int_valid != 0; int_valid >>= 1, expio_irq++) {
+		if ((int_valid & 1) == 0)
+			continue;
+		generic_handle_irq(expio_irq);
+	}
+}
+
+/*
+ * Disable an expio pin's interrupt by setting the bit in the imr.
+ * @param irq           an expio virtual irq number
+ */
+static void expio_mask_irq(uint32_t irq)
+{
+	uint16_t reg;
+	uint32_t expio = MXC_IRQ_TO_EXPIO(irq);
+
+	/* mask the interrupt */
+	reg = __raw_readw(CPLD_INT_MASK_REG);
+	reg |= 1 << expio;
+	__raw_writew(reg, CPLD_INT_MASK_REG);
+}
+
+/*
+ * Acknowledge an expanded io pin's interrupt by clearing the bit in the isr.
+ * @param irq           an expanded io virtual irq number
+ */
+static void expio_ack_irq(uint32_t irq)
+{
+	uint32_t expio = MXC_IRQ_TO_EXPIO(irq);
+
+	/* clear the interrupt status */
+	__raw_writew(1 << expio, CPLD_INT_RESET_REG);
+	__raw_writew(0, CPLD_INT_RESET_REG);
+	/* mask the interrupt */
+	expio_mask_irq(irq);
+}
+
+/*
+ * Enable a expio pin's interrupt by clearing the bit in the imr.
+ * @param irq           a expio virtual irq number
+ */
+static void expio_unmask_irq(uint32_t irq)
+{
+	uint16_t reg;
+	uint32_t expio = MXC_IRQ_TO_EXPIO(irq);
+
+	/* unmask the interrupt */
+	reg = __raw_readw(CPLD_INT_MASK_REG);
+	reg &= ~(1 << expio);
+	__raw_writew(reg, CPLD_INT_MASK_REG);
+}
+
+static struct irq_chip expio_irq_chip = {
+	.ack = expio_ack_irq,
+	.mask = expio_mask_irq,
+	.unmask = expio_unmask_irq,
+};
+
+static int __init mx31pdk_init_expio(void)
+{
+	int i;
+	int ret;
+
+	/* Check if there's a debug board connected */
+	if ((__raw_readw(CPLD_MAGIC_NUMBER1_REG) != 0xAAAA) ||
+	    (__raw_readw(CPLD_MAGIC_NUMBER2_REG) != 0x5555) ||
+	    (__raw_readw(CPLD_MAGIC_NUMBER3_REG) != 0xCAFE)) {
+		/* No Debug board found */
+		return -ENODEV;
+	}
+
+	pr_info("i.MX31PDK Debug board detected, rev = 0x%04X\n",
+		__raw_readw(CPLD_CODE_VER_REG));
+
+	/*
+	 * Configure INT line as GPIO input
+	 */
+	ret = gpio_request(IOMUX_TO_GPIO(MX31_PIN_GPIO1_1), "sms9217-irq");
+	if (ret)
+		pr_warning("could not get LAN irq gpio\n");
+	else
+		gpio_direction_input(IOMUX_TO_GPIO(MX31_PIN_GPIO1_1));
+
+	/* Disable the interrupts and clear the status */
+	__raw_writew(0, CPLD_INT_MASK_REG);
+	__raw_writew(0xFFFF, CPLD_INT_RESET_REG);
+	__raw_writew(0, CPLD_INT_RESET_REG);
+	__raw_writew(0x1F, CPLD_INT_MASK_REG);
+	for (i = MXC_EXP_IO_BASE;
+	     i < (MXC_EXP_IO_BASE + MXC_MAX_EXP_IO_LINES);
+	     i++) {
+		set_irq_chip(i, &expio_irq_chip);
+		set_irq_handler(i, handle_level_irq);
+		set_irq_flags(i, IRQF_VALID);
+	}
+	set_irq_type(EXPIO_PARENT_INT, IRQ_TYPE_LEVEL_LOW);
+	set_irq_chained_handler(EXPIO_PARENT_INT, mx31pdk_expio_irq_handler);
+
+	return 0;
+}
+
+/*
+ * This structure defines the MX31 memory map.
+ */
+static struct map_desc mx31pdk_io_desc[] __initdata = {
+	{
+		.virtual = SPBA0_BASE_ADDR_VIRT,
+		.pfn = __phys_to_pfn(SPBA0_BASE_ADDR),
+		.length = SPBA0_SIZE,
+		.type = MT_DEVICE_NONSHARED,
+	}, {
+		.virtual = CS5_BASE_ADDR_VIRT,
+		.pfn = __phys_to_pfn(CS5_BASE_ADDR),
+		.length = CS5_SIZE,
+		.type = MT_DEVICE,
+	},
+};
+
+/*
+ * Set up static virtual mappings.
+ */
+static void __init mx31pdk_map_io(void)
+{
+	mx31_map_io();
+	iotable_init(mx31pdk_io_desc, ARRAY_SIZE(mx31pdk_io_desc));
+}
+
 /*!
  * Board specific initialization.
  */
@@ -62,6 +206,8 @@ static void __init mxc_board_init(void)
 				      "mx31pdk");
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+
+	mx31pdk_init_expio();
 }
 
 static void __init mx31pdk_timer_init(void)
@@ -82,7 +228,7 @@ MACHINE_START(MX31_3DS, "Freescale MX31PDK (3DS)")
 	.phys_io	= AIPS1_BASE_ADDR,
 	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mx31_map_io,
+	.map_io         = mx31pdk_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mxc_board_init,
 	.timer          = &mx31pdk_timer,
diff --git a/arch/arm/plat-mxc/include/mach/board-mx31pdk.h b/arch/arm/plat-mxc/include/mach/board-mx31pdk.h
index b1e6463..519bab3 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31pdk.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31pdk.h
@@ -16,4 +16,49 @@
 #define MXC_LL_UART_PADDR	UART1_BASE_ADDR
 #define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
 
+/* Definitions for components on the Debug board */
+
+/* Base address of CPLD controller on the Debug board */
+#define DEBUG_BASE_ADDRESS		CS5_IO_ADDRESS(CS5_BASE_ADDR)
+
+/* LAN9217 ethernet base address */
+#define LAN9217_BASE_ADDR		CS5_BASE_ADDR
+
+/* CPLD config and interrupt base address */
+#define CPLD_ADDR			(DEBUG_BASE_ADDRESS + 0x20000)
+
+/* LED switchs */
+#define CPLD_LED_REG			(CPLD_ADDR + 0x00)
+/* buttons */
+#define CPLD_SWITCH_BUTTONS_REG	(EXPIO_ADDR + 0x08)
+/* status, interrupt */
+#define CPLD_INT_STATUS_REG		(CPLD_ADDR + 0x10)
+#define CPLD_INT_MASK_REG		(CPLD_ADDR + 0x38)
+#define CPLD_INT_RESET_REG		(CPLD_ADDR + 0x20)
+/* magic word for debug CPLD */
+#define CPLD_MAGIC_NUMBER1_REG		(CPLD_ADDR + 0x40)
+#define CPLD_MAGIC_NUMBER2_REG		(CPLD_ADDR + 0x48)
+/* CPLD code version */
+#define CPLD_CODE_VER_REG		(CPLD_ADDR + 0x50)
+/* magic word for debug CPLD */
+#define CPLD_MAGIC_NUMBER3_REG		(CPLD_ADDR + 0x58)
+/* module reset register */
+#define CPLD_MODULE_RESET_REG		(CPLD_ADDR + 0x60)
+/* CPU ID and Personality ID */
+#define CPLD_MCU_BOARD_ID_REG		(CPLD_ADDR + 0x68)
+
+/* CPLD IRQ line for external uart, external ethernet etc */
+#define EXPIO_PARENT_INT	IOMUX_TO_IRQ(MX31_PIN_GPIO1_1)
+
+#define MXC_EXP_IO_BASE		(MXC_BOARD_IRQ_START)
+#define MXC_IRQ_TO_EXPIO(irq)	((irq) - MXC_EXP_IO_BASE)
+
+#define EXPIO_INT_ENET		(MXC_EXP_IO_BASE + 0)
+#define EXPIO_INT_XUART_A	(MXC_EXP_IO_BASE + 1)
+#define EXPIO_INT_XUART_B	(MXC_EXP_IO_BASE + 2)
+#define EXPIO_INT_BUTTON_A	(MXC_EXP_IO_BASE + 3)
+#define EXPIO_INT_BUTTON_B	(MXC_EXP_IO_BASE + 4)
+
+#define MXC_MAX_EXP_IO_LINES	16
+
 #endif /* __ASM_ARCH_MXC_BOARD_MX31PDK_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/mx3x.h b/arch/arm/plat-mxc/include/mach/mx3x.h
index 3878c60..b559a4b 100644
--- a/arch/arm/plat-mxc/include/mach/mx3x.h
+++ b/arch/arm/plat-mxc/include/mach/mx3x.h
@@ -48,6 +48,9 @@
 #define CS4_SIZE		SZ_32M
 
 #define CS5_BASE_ADDR		0xB6000000
+#define CS5_BASE_ADDR_VIRT	0xF6000000
+#define CS5_SIZE		SZ_32M
+
 #define PCMCIA_MEM_BASE_ADDR	0xBC000000
 
 /*
@@ -191,6 +194,9 @@
 #define CS4_IO_ADDRESS(x)  \
 	(((x) - CS4_BASE_ADDR) + CS4_BASE_ADDR_VIRT)
 
+#define CS5_IO_ADDRESS(x)  \
+	(((x) - CS5_BASE_ADDR) + CS5_BASE_ADDR_VIRT)
+
 #define X_MEMC_IO_ADDRESS(x)  \
 	(((x) - X_MEMC_BASE_ADDR) + X_MEMC_BASE_ADDR_VIRT)
 
-- 
cgit v0.10.2


From e5198075c67a22ec9a09565b1ce88d3d3f5ba855 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:05:16 -0700
Subject: x86, apic: introduce io_apic_irq_attr

according to Ingo, io_apic irq-setup related functions have too many
parameters with a repetitive signature.

So reduce related funcs to get less params by passing a pointer
to a newly defined io_apic_irq_attr structure.

v2: io_apic_irq ==> irq_attr
    triggering ==> trigger

v3: add set_io_apic_irq_attr

[ Impact: cleanup ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A08ACD3.2070401@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 26a40ab..a7d14bb 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -63,9 +63,26 @@ extern unsigned long io_apic_irqs;
 extern void init_VISWS_APIC_irqs(void);
 extern void setup_IO_APIC(void);
 extern void disable_IO_APIC(void);
+
+struct io_apic_irq_attr {
+	int ioapic;
+	int ioapic_pin;
+	int trigger;
+	int polarity;
+};
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic     = ioapic;
+	irq_attr->ioapic_pin = ioapic_pin;
+	irq_attr->trigger    = trigger;
+	irq_attr->polarity   = polarity;
+}
+
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
-					int *ioapic, int *ioapic_pin,
-					int *trigger, int *polarity);
+					struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6fd99f9..daf866e 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -156,8 +156,9 @@ extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
 #endif /* CONFIG_ACPI */
 
-extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin,
-				  int irq, int edge_level, int active_high_low);
+struct io_apic_irq_attr;
+extern int io_apic_set_pci_routing(struct device *dev, int irq,
+		 struct io_apic_irq_attr *irq_attr);
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index dcfbc3a..4af63df 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -523,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 {
 	unsigned int irq;
 	unsigned int plat_gsi = gsi;
@@ -533,14 +533,14 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 	 * Make sure all (legacy) PCI IRQs are set as level-triggered.
 	 */
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-		if (triggering == ACPI_LEVEL_SENSITIVE)
+		if (trigger == ACPI_LEVEL_SENSITIVE)
 			eisa_set_level_irq(gsi);
 	}
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
 	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
-		plat_gsi = mp_register_gsi(dev, gsi, triggering, polarity);
+		plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
 	}
 #endif
 	acpi_gsi_to_irq(plat_gsi, &irq);
@@ -1156,7 +1156,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	}
 }
 
-static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int triggering,
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
 			int polarity)
 {
 #ifdef CONFIG_X86_MPPARSE
@@ -1181,7 +1181,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int triggering,
 	/* print the entry should happen on mptable identically */
 	mp_irq.type = MP_INTSRC;
 	mp_irq.irqtype = mp_INT;
-	mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+	mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
 				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
 	mp_irq.srcbus = number;
 	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
@@ -1194,10 +1194,11 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int triggering,
 	return 0;
 }
 
-int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
+int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 {
 	int ioapic;
 	int ioapic_pin;
+	struct io_apic_irq_attr irq_attr;
 
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
@@ -1225,11 +1226,12 @@ int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 		       ioapic_pin);
 		return gsi;
 	}
-	mp_config_acpi_gsi(dev, gsi, triggering, polarity);
+	mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
-	io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi,
-				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+	set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
+			     trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
+			     polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+	io_apic_set_pci_routing(dev, gsi, &irq_attr);
 
 	return gsi;
 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 74d2b48..ce1ac74 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1096,8 +1096,7 @@ static int pin_2_irq(int idx, int apic, int pin)
  * Not an __init, possibly needed by modules
  */
 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
-				int *ioapic, int *ioapic_pin,
-				int *trigger, int *polarity)
+				struct io_apic_irq_attr *irq_attr)
 {
 	int apic, i, best_guess = -1;
 
@@ -1127,10 +1126,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 				continue;
 
 			if (pin == (mp_irqs[i].srcbusirq & 3)) {
-				*ioapic = apic;
-				*ioapic_pin = mp_irqs[i].dstirq;
-				*trigger = irq_trigger(i);
-				*polarity = irq_polarity(i);
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
 				return irq;
 			}
 			/*
@@ -1138,10 +1137,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 			 * best-guess fuzzy result for broken mptables.
 			 */
 			if (best_guess < 0) {
-				*ioapic = apic;
-				*ioapic_pin = mp_irqs[i].dstirq;
-				*trigger = irq_trigger(i);
-				*polarity = irq_polarity(i);
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
 				best_guess = irq;
 			}
 		}
@@ -3865,13 +3864,16 @@ int __init arch_probe_nr_irqs(void)
 }
 #endif
 
-static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
-				 int triggering, int polarity)
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
 {
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
 	int node;
+	int ioapic, pin;
+	int trigger, polarity;
 
+	ioapic = irq_attr->ioapic;
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
@@ -3889,6 +3891,10 @@ static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, in
 		return 0;
 	}
 
+	pin = irq_attr->ioapic_pin;
+	trigger = irq_attr->trigger;
+	polarity = irq_attr->polarity;
+
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
@@ -3897,20 +3903,22 @@ static int __io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, in
 		add_pin_to_irq_node(cfg, node, ioapic, pin);
 	}
 
-	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
 
 	return 0;
 }
 
-int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
-				 int triggering, int polarity)
+int io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
 {
-
+	int ioapic, pin;
 	/*
 	 * Avoid pin reprogramming.  PRTs typically include entries
 	 * with redundant pin->gsi mappings (but unique PCI devices);
 	 * we only program the IOAPIC on the first.
 	 */
+	ioapic = irq_attr->ioapic;
+	pin = irq_attr->ioapic_pin;
 	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
 		pr_debug("Pin %d-%d already programmed\n",
 			 mp_ioapics[ioapic].apicid, pin);
@@ -3918,8 +3926,7 @@ int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
 	}
 	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
 
-	return __io_apic_set_pci_routing(dev, ioapic, pin, irq,
-					 triggering, polarity);
+	return __io_apic_set_pci_routing(dev, irq, irq_attr);
 }
 
 /* --------------------------------------------------------------------------
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 2f3e192..0696d50 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1200,14 +1200,11 @@ static int pirq_enable_irq(struct pci_dev *dev)
 #ifdef CONFIG_X86_IO_APIC
 			struct pci_dev *temp_dev;
 			int irq;
-			int ioapic = -1, ioapic_pin = -1;
-			int triggering, polarity;
+			struct io_apic_irq_attr irq_attr;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
 						PCI_SLOT(dev->devfn),
-						pin - 1,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
+						pin - 1, &irq_attr);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1221,9 +1218,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn),
-						pin - 1,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
+						pin - 1, &irq_attr);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1233,9 +1228,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				io_apic_set_pci_routing(&dev->dev, ioapic,
-							ioapic_pin, irq,
-							triggering, polarity);
+				io_apic_set_pci_routing(&dev->dev, irq,
+							 &irq_attr);
 				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 79901a0..42e4260 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -155,15 +155,13 @@ int ibmphp_init_devno(struct slot **cur_slot)
 	for (loop = 0; loop < len; loop++) {
 		if ((*cur_slot)->number == rtable->slots[loop].slot &&
 		    (*cur_slot)->bus == rtable->slots[loop].bus) {
-			int ioapic = -1, ioapic_pin = -1;
-			int triggering, polarity;
+			struct io_apic_irq_attr irq_attr;
 
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
 						(int) (*cur_slot)->device, i,
-						&ioapic, &ioapic_pin,
-						&triggering, &polarity);
+						&irq_attr);
 
 			debug("(*cur_slot)->irq[0] = %x\n",
 					(*cur_slot)->irq[0]);
-- 
cgit v0.10.2


From 2759c3287de27266e06f1f4e82cbd2d65f6a044c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:05:16 -0700
Subject: x86: don't call read_apic_id if !cpu_has_apic

should not call that if apic is disabled.

[ Impact: fix crash on certain UP configs ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
LKML-Reference: <4A09CCBB.2000306@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 744e6d8..d0c99ab 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -161,7 +161,7 @@ static int flat_apic_id_registered(void)
 
 static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
 {
-	return hard_smp_processor_id() >> index_msb;
+	return initial_apic_id >> index_msb;
 }
 
 struct apic apic_flat =  {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459..728b375 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -272,7 +272,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	int cpu = smp_processor_id();
 	int node;
-	unsigned apicid = hard_smp_processor_id();
+	unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	node = c->phys_proc_id;
 	if (apicid_to_node[apicid] != NUMA_NO_NODE)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c1caefc..017c600 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -761,6 +761,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
+	/* Clear/Set all flags overriden by options, after probe */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 #ifdef CONFIG_X86_64
 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 #endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7437fa1..daed39b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 }
 #endif
 
-static void __cpuinit srat_detect_node(void)
+static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 	unsigned node;
 	int cpu = smp_processor_id();
-	int apicid = hard_smp_processor_id();
+	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
@@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	}
 
 	/* Work around errata */
-	srat_detect_node();
+	srat_detect_node(c);
 
 	if (cpu_has(c, X86_FEATURE_VMX))
 		detect_vmx_virtcap(c);
-- 
cgit v0.10.2


From 888a589f6be07d624e21e2174d98375e9f95911b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:59:37 -0700
Subject: mm, x86: remove MEMORY_HOTPLUG_RESERVE related code

after:

 | commit b263295dbffd33b0fbff670720fa178c30e3392a
 | Author: Christoph Lameter <clameter@sgi.com>
 | Date:   Wed Jan 30 13:30:47 2008 +0100
 |
 |    x86: 64-bit, make sparsemem vmemmap the only memory model

we don't have MEMORY_HOTPLUG_RESERVE anymore.

Historically, x86-64 had an architecture-specific method for memory hotplug
whereby it scanned the SRAT for physical memory ranges that could be
potentially used for memory hot-add later. By reserving those ranges
without physical memory, the memmap would be allocated and left dormant
until needed. This depended on the DISCONTIG memory model which has been
removed so the code implementing HOTPLUG_RESERVE is now dead.

This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE.

(Changelog authored by Mel.)

v2: updated changelog, and remove hotadd= in doc

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Workflow-found-OK-by: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A0C4910.7090508@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c1304..2db5893 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
 		Otherwise, the remaining system RAM is allocated to an
 		additional node.
 
-  numa=hotadd=percent
-		Only allow hotadd memory to preallocate page structures upto
-		percent of already available memory.
-		numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off	Don't enable ACPI
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6d..7feff06 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index fb61d81..a6a93c3 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -591,8 +588,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 87b45bf..b0dbbd4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdata;
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, int b)
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d..511b098 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ec..474c7e9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3103,64 +3099,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
 }
 
 /**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
  * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
-- 
cgit v0.10.2


From 7c43769a9776141ec23ca81a1bdd5a9c0512f165 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:59:37 -0700
Subject: x86, mm: Fix node_possible_map logic

Recently there were some changes to the meaning of node_possible_map,
and it is quite strange:

- the node without memory would be set in node_possible_map
- but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.

fix it by adding strict_setup_node_bootmem().

Also, remove unparse_node().

so result will be:

1. cpu_to_node() will return online node only (nearest one)
2. apicid_to_node() still returns the node that could be not online but is set
   in node_possible_map.
3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE

v2: after move_cpus_to_node change.

[ Impact: get node_possible_map right ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <4A0C49BE.6080800@kernel.org>
[ v3: various small cleanups and comment clarifications ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 7feff06..c4ae822 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -24,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 			       unsigned long end);
 
 #ifdef CONFIG_NUMA
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a6a93c3..459913b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 }
 
 /* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start,
-			       unsigned long end)
+void __init
+setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 {
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
-	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	int nid;
 
 	if (!end)
 		return;
 
+	/*
+	 * Don't confuse VM with a node that doesn't have the
+	 * minimum amount of memory:
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
 	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index b0dbbd4..2dfcbf9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -36,10 +36,6 @@ static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
 static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -338,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 	return 1;
 }
 
-static void __init unparse_node(int node)
-{
-	int i;
-	node_clear(node, nodes_parsed);
-	node_clear(node, cpu_nodes_parsed);
-	for (i = 0; i < MAX_LOCAL_APIC; i++) {
-		if (apicid_to_node[i] == node)
-			apicid_to_node[i] = NUMA_NO_NODE;
-	}
-}
-
 void __init acpi_numa_arch_fixup(void) {}
 
 /* Use the information discovered above to actually set up the nodes. */
@@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		return -1;
 
 	/* First clean up the node list */
-	for (i = 0; i < MAX_NUMNODES; i++) {
+	for (i = 0; i < MAX_NUMNODES; i++)
 		cutoff_node(i, start, end);
-		/*
-		 * don't confuse VM with a node that doesn't have the
-		 * minimum memory.
-		 */
-		if (nodes[i].end &&
-			(nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
-			unparse_node(i);
-			node_set_offline(i);
-		}
-	}
 
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
@@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 
 		if (node == NUMA_NO_NODE)
 			continue;
-		if (!node_isset(node, node_possible_map))
+		if (!node_online(node))
 			numa_clear_node(i);
 	}
 	numa_init_array();
-- 
cgit v0.10.2


From 35d5a9a61490bf39d2e48d7f499c8c801a39ebe9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:59:37 -0700
Subject: x86: fix system without memory on node0

Jack found a boot crash on a system which doesn't have memory on node0.

It turns out with recent per_cpu changes, node_number for BSP will always
be 0, and it is not consistent to cpu_to_node() that might set it to a
different (nearer) node already.

aka when numa_set_node() for node0 is called early before per_cpu area is
setup:

two places touched that per_cpu(node_number,):

1. in cpu/common.c::cpu_init() and it is not for BP
| #ifdef CONFIG_NUMA
|        if (cpu != 0 && percpu_read(node_number) == 0 &&
|            cpu_to_node(cpu) != NUMA_NO_NODE)
|                percpu_write(node_number, cpu_to_node(cpu));
| #endif
for BP: traps_init ==> cpu_init
for AP: start_secondary ==> cpu_init

2. cpu/intel.c or amd.c::srat_detect_node via numa_set_node()
for BP: check_bugs ==> identify_boot_cpu ==> identify_cpu()
	 that is rather later before numa_node_id() is used for BP...
for AP: start_secondary => smp_callin => smp_store_cpu_info() =>
	=> identify_secondary_cpu => identify_cpu()

so try to set that for BP earlier in setup_per_cpu_areas(), and
don't bother to set that for APs there (it will be updated later
and will be used later)

(and don't mess the 0 before the copying BP per_cpu data to APs)

[ Impact: fix boot crash on memoryless node-0 ]

Reported-and-tested-by: Jack Steiner <steiner@sgi.com>
Cc: Tejun Heo <htejun@gmail.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4A0C4A02.7050401@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 3a97a4c..3b5f327 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -423,6 +423,14 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+	/*
+	 * make sure boot cpu node_number is right, when boot cpu is on the
+	 * node that doesn't have mem installed
+	 */
+	per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+#endif
+
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
 
-- 
cgit v0.10.2


From 629e15d245f46bef9d26199b450f882f9437a8fe Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:05:16 -0700
Subject: x86, irq: update_mptable needs pci_routeirq

To get all device irq routing and to save them.

This is basically an implicit pci=routeirq enablement if (and on if)
the update_mptable boot option (which is off by default) has been
specified.

[ Impact: extend the update_mptable boot opion's scope ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <4A0DB7B4.4060702@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index cd2a41a..e6bf9d0 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/smp.h>
+#include <linux/pci.h>
 
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
@@ -961,6 +962,9 @@ static int __initdata enable_update_mptable;
 static int __init update_mptable_setup(char *str)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	return 0;
 }
 early_param("update_mptable", update_mptable_setup);
@@ -973,6 +977,9 @@ static int __initdata alloc_mptable;
 static int __init parse_alloc_mptable_opt(char *p)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	alloc_mptable = 1;
 	if (!p)
 		return 0;
-- 
cgit v0.10.2


From f1bdb523880c7f6990e9e8e50b0fc972ca475e84 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:05:16 -0700
Subject: x86, irq: don't call mp_config_acpi_gsi() if update_mptable is not
 enabled

Len expressed concern that the update_mptable feature has
side-effects on the ACPI code.

Make it sure explicitly that the code only ever gets called if
the (default disabled) update_mptable boot quirk option is
disabled.

[ Impact: isolate the update_mptable feature from ACPI code more ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <4A0DC832.5090200@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c34961a..3dcbaaa 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -87,6 +87,15 @@ static inline int acpi_probe_gsi(void)
 }
 #endif /* CONFIG_ACPI */
 
+#ifdef CONFIG_X86_MPPARSE
+extern int enable_update_mptable;
+#else
+static inline int enable_update_mptable(void)
+{
+	return 0;
+}
+#endif
+
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
 
 struct physid_mask {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4af63df..844e5e2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1226,7 +1226,9 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 		       ioapic_pin);
 		return gsi;
 	}
-	mp_config_acpi_gsi(dev, gsi, trigger, polarity);
+
+	if (enable_update_mptable)
+		mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
 	set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
 			     trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e6bf9d0..651c93b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -957,7 +957,7 @@ out:
 	return 0;
 }
 
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
 
 static int __init update_mptable_setup(char *str)
 {
-- 
cgit v0.10.2


From b68f1d2e7aa21029d73c7d453a8046e95d351740 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 19:37:25 +0200
Subject: perf_counter, x86: speed up the scheduling fast-path

We have to set up the LVT entry only at counter init time, not at
every switch-in time.

There's friction between NMI and non-NMI use here - we'll probably
remove the per counter configurability of it - but until then, dont
slow down things ...

[ Impact: micro-optimization ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5bfd30a..c109819 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -285,6 +285,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 			return -EACCES;
 		hwc->nmi = 1;
 	}
+	perf_counters_lapic_init(hwc->nmi);
 
 	if (!hwc->irq_period)
 		hwc->irq_period = x86_pmu.max_period;
@@ -603,8 +604,6 @@ try_generic:
 		hwc->counter_base = x86_pmu.perfctr;
 	}
 
-	perf_counters_lapic_init(hwc->nmi);
-
 	x86_pmu.disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
@@ -1054,7 +1053,7 @@ void __init init_hw_perf_counters(void)
 
 	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
 
-	perf_counters_lapic_init(0);
+	perf_counters_lapic_init(1);
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-- 
cgit v0.10.2


From 24ed0c4bfc7d2d7507bb9d50f7f3bbdcd85d76dd Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Sun, 17 May 2009 15:31:38 +0800
Subject: tracing: fix check for return value of register_module_notifier

return zero should be correct, so fix it.

[ Impact: eliminate incorrect syslog message ]

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: rostedt@goodmis.org
LKML-Reference: <1242545498-7285-1-git-send-email-tom.leiming@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b606f4..140699a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2826,7 +2826,7 @@ void __init ftrace_init(void)
 				  __stop_mcount_loc);
 
 	ret = register_module_notifier(&ftrace_module_nb);
-	if (!ret)
+	if (ret)
 		pr_warning("Failed to register trace ftrace module notifier\n");
 
 	return;
-- 
cgit v0.10.2


From b7a755a8a145a7e34e735bda9c452317de7a538a Mon Sep 17 00:00:00 2001
From: Misael Lopez Cruz <x0052729@ti.com>
Date: Sun, 17 May 2009 20:02:31 -0500
Subject: ASoC: TWL4030: Enable/disable voice digital filters

Enable TWL4030 VTXL/VTXR and VRX digital filters for uplink
and downlink paths, respectively.

This patch also corrects voice 8/16kHz mode selection bit
(SEL_16K) of CODEC_MODE register.

Signed-off-by: Misael Lopez Cruz <x0052729@ti.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index eaf91ab..e4d683d 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1629,6 +1629,28 @@ static int twl4030_set_dai_fmt(struct snd_soc_dai *codec_dai,
 	return 0;
 }
 
+/* In case of voice mode, the RX1 L(VRX) for downlink and the TX2 L/R
+ * (VTXL, VTXR) for uplink has to be enabled/disabled. */
+static void twl4030_voice_enable(struct snd_soc_codec *codec, int direction,
+				int enable)
+{
+	u8 reg, mask;
+
+	reg = twl4030_read_reg_cache(codec, TWL4030_REG_OPTION);
+
+	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
+		mask = TWL4030_ARXL1_VRX_EN;
+	else
+		mask = TWL4030_ATXL2_VTXL_EN | TWL4030_ATXR2_VTXR_EN;
+
+	if (enable)
+		reg |= mask;
+	else
+		reg &= ~mask;
+
+	twl4030_write(codec, TWL4030_REG_OPTION, reg);
+}
+
 static int twl4030_voice_startup(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
@@ -1665,6 +1687,17 @@ static int twl4030_voice_startup(struct snd_pcm_substream *substream,
 	return 0;
 }
 
+static void twl4030_voice_shutdown(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	/* Enable voice digital filters */
+	twl4030_voice_enable(codec, substream->stream, 0);
+}
+
 static int twl4030_voice_hw_params(struct snd_pcm_substream *substream,
 		struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
 {
@@ -1673,6 +1706,9 @@ static int twl4030_voice_hw_params(struct snd_pcm_substream *substream,
 	struct snd_soc_codec *codec = socdev->card->codec;
 	u8 old_mode, mode;
 
+	/* Enable voice digital filters */
+	twl4030_voice_enable(codec, substream->stream, 1);
+
 	/* bit rate */
 	old_mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE)
 		& ~(TWL4030_CODECPDZ);
@@ -1780,6 +1816,7 @@ static struct snd_soc_dai_ops twl4030_dai_ops = {
 
 static struct snd_soc_dai_ops twl4030_dai_voice_ops = {
 	.startup	= twl4030_voice_startup,
+	.shutdown	= twl4030_voice_shutdown,
 	.hw_params	= twl4030_voice_hw_params,
 	.set_sysclk	= twl4030_voice_set_dai_sysclk,
 	.set_fmt	= twl4030_voice_set_dai_fmt,
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index 3441115..9668bdf 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -110,7 +110,7 @@
 #define TWL4030_APLL_RATE_44100		0x90
 #define TWL4030_APLL_RATE_48000		0xA0
 #define TWL4030_APLL_RATE_96000		0xE0
-#define TWL4030_SEL_16K			0x04
+#define TWL4030_SEL_16K			0x08
 #define TWL4030_CODECPDZ		0x02
 #define TWL4030_OPT_MODE		0x01
 #define TWL4030_OPTION_1		(1 << 0)
-- 
cgit v0.10.2


From 5b4662f098b47f68d7fcea9b065d1513547fef12 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 15 May 2009 14:50:33 -0400
Subject: regulator: da903x: add missing __devexit_p()

The remove function uses __devexit, so the .remove assignment needs
__devexit_p() to fix a build error with hotplug disabled.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
CC: Liam Girdwood <lrg@slimlogic.co.uk>
CC: Mike Rapoport <mike@compulab.co.il>
CC: Eric Miao <eric.miao@marvell.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index 72b1549..c6628f5 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -497,7 +497,7 @@ static struct platform_driver da903x_regulator_driver = {
 		.owner	= THIS_MODULE,
 	},
 	.probe		= da903x_regulator_probe,
-	.remove		= da903x_regulator_remove,
+	.remove		= __devexit_p(da903x_regulator_remove),
 };
 
 static int __init da903x_regulator_init(void)
-- 
cgit v0.10.2


From eb33575cf67d3f35fa2510210ef92631266e2465 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 13 May 2009 17:34:48 +0100
Subject: [ARM] Double check memmap is actually valid with a memmap has
 unexpected holes V2

pfn_valid() is meant to be able to tell if a given PFN has valid memmap
associated with it or not. In FLATMEM, it is expected that holes always
have valid memmap as long as there is valid PFNs either side of the hole.
In SPARSEMEM, it is assumed that a valid section has a memmap for the
entire section.

However, ARM and maybe other embedded architectures in the future free
memmap backing holes to save memory on the assumption the memmap is never
used. The page_zone linkages are then broken even though pfn_valid()
returns true. A walker of the full memmap must then do this additional
check to ensure the memmap they are looking at is sane by making sure the
zone and PFN linkages are still valid. This is expensive, but walkers of
the full memmap are extremely rare.

This was caught before for FLATMEM and hacked around but it hits again for
SPARSEMEM because the page_zone linkages can look ok where the PFN linkages
are totally screwed. This looks like a hatchet job but the reality is that
any clean solution would end up consumning all the memory saved by punching
these unexpected holes in the memmap. For example, we tried marking the
memmap within the section invalid but the section size exceeds the size of
the hole in most cases so pfn_valid() starts returning false where valid
memmap exists. Shrinking the size of the section would increase memory
consumption offsetting the gains.

This patch identifies when an architecture is punching unexpected holes
in the memmap that the memory model cannot automatically detect and sets
ARCH_HAS_HOLES_MEMORYMODEL. At the moment, this is restricted to EP93xx
which is the model sub-architecture this has been reported on but may expand
later. When set, walkers of the full memmap must call memmap_valid_within()
for each PFN and passing in what it expects the page and zone to be for
that PFN. If it finds the linkages to be broken, it assumes the memmap is
invalid for that PFN.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e60ec54..9d02cdb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -273,6 +273,7 @@ config ARCH_EP93XX
 	select HAVE_CLK
 	select COMMON_CLKDEV
 	select ARCH_REQUIRE_GPIOLIB
+	select ARCH_HAS_HOLES_MEMORYMODEL
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
 
@@ -976,10 +977,9 @@ config OABI_COMPAT
 	  UNPREDICTABLE (in fact it can be predicted that it won't work
 	  at all). If in doubt say Y.
 
-config ARCH_FLATMEM_HAS_HOLES
+config ARCH_HAS_HOLES_MEMORYMODEL
 	bool
-	default y
-	depends on FLATMEM
+	default n
 
 # Discontigmem is deprecated
 config ARCH_DISCONTIGMEM_ENABLE
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 186ec6a..a47c879 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1097,6 +1097,32 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
 #define pfn_valid_within(pfn) (1)
 #endif
 
+#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
+/*
+ * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
+ * associated with it or not. In FLATMEM, it is expected that holes always
+ * have valid memmap as long as there is valid PFNs either side of the hole.
+ * In SPARSEMEM, it is assumed that a valid section has a memmap for the
+ * entire section.
+ *
+ * However, an ARM, and maybe other embedded architectures in the future
+ * free memmap backing holes to save memory on the assumption the memmap is
+ * never used. The page_zone linkages are then broken even though pfn_valid()
+ * returns true. A walker of the full memmap must then do this additional
+ * check to ensure the memmap they are looking at is sane by making sure
+ * the zone and PFN linkages are still valid. This is expensive, but walkers
+ * of the full memmap are extremely rare.
+ */
+int memmap_valid_within(unsigned long pfn,
+					struct page *page, struct zone *zone);
+#else
+static inline int memmap_valid_within(unsigned long pfn,
+					struct page *page, struct zone *zone)
+{
+	return 1;
+}
+#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
+
 #endif /* !__GENERATING_BOUNDS.H */
 #endif /* !__ASSEMBLY__ */
 #endif /* _LINUX_MMZONE_H */
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 16ce8b9..f5b7d17 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -6,6 +6,7 @@
 
 
 #include <linux/stddef.h>
+#include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
 
@@ -72,3 +73,17 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
 	*zone = zonelist_zone(z);
 	return z;
 }
+
+#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
+int memmap_valid_within(unsigned long pfn,
+					struct page *page, struct zone *zone)
+{
+	if (page_to_pfn(page) != pfn)
+		return 0;
+
+	if (page_zone(page) != zone)
+		return 0;
+
+	return 1;
+}
+#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 66f6130..74d66db 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -509,22 +509,11 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
 			continue;
 
 		page = pfn_to_page(pfn);
-#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
-		/*
-		 * Ordinarily, memory holes in flatmem still have a valid
-		 * memmap for the PFN range. However, an architecture for
-		 * embedded systems (e.g. ARM) can free up the memmap backing
-		 * holes to save memory on the assumption the memmap is
-		 * never used. The page_zone linkages are then broken even
-		 * though pfn_valid() returns true. Skip the page if the
-		 * linkages are broken. Even if this test passed, the impact
-		 * is that the counters for the movable type are off but
-		 * fragmentation monitoring is likely meaningless on small
-		 * systems.
-		 */
-		if (page_zone(page) != zone)
+
+		/* Watch for unexpected holes punched in the memmap */
+		if (!memmap_valid_within(pfn, page, zone))
 			continue;
-#endif
+
 		mtype = get_pageblock_migratetype(page);
 
 		if (mtype < MIGRATE_TYPES)
-- 
cgit v0.10.2


From 8190b37f6483fedc530110d8c7d710d1c0317ba4 Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Tue, 12 May 2009 13:36:46 -0700
Subject: [ARM] mach-l7200: fix spelling of SYS_CLOCK_OFF

Signed-off-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-l7200/include/mach/sys-clock.h b/arch/arm/mach-l7200/include/mach/sys-clock.h
index 2d7722b..e9729a3 100644
--- a/arch/arm/mach-l7200/include/mach/sys-clock.h
+++ b/arch/arm/mach-l7200/include/mach/sys-clock.h
@@ -18,7 +18,7 @@
 
 /* IO_START and IO_BASE are defined in hardware.h */
 
-#define SYS_CLOCK_START (IO_START + SYS_CLCOK_OFF)  /* Physical address */
+#define SYS_CLOCK_START (IO_START + SYS_CLOCK_OFF)  /* Physical address */
 #define SYS_CLOCK_BASE  (IO_BASE  + SYS_CLOCK_OFF)  /* Virtual address  */
 
 /* Define the interface to the SYS_CLOCK */
-- 
cgit v0.10.2


From 6c627f3978bf3059d45713e2e1240b7c43cc85f5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 18 May 2009 12:33:36 +0200
Subject: ALSA: hda - Show the actual chip name in 'unkown model' messages

Show the actual chip name in 'unknown model..' info messages for
Realtek codecs.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4bc3e7e..62f4d0c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4410,8 +4410,8 @@ static int patch_alc880(struct hda_codec *codec)
 						  alc880_models,
 						  alc880_cfg_tbl);
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC880, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC880_AUTO;
 	}
 
@@ -6054,8 +6054,9 @@ static int patch_alc260(struct hda_codec *codec)
 						  alc260_models,
 						  alc260_cfg_tbl);
 	if (board_config < 0) {
-		snd_printd(KERN_INFO "hda_codec: Unknown model for ALC260, "
-			   "trying auto-probe from BIOS...\n");
+		snd_printd(KERN_INFO "hda_codec: Unknown model for %s, "
+			   "trying auto-probe from BIOS...\n",
+			   codec->chip_name);
 		board_config = ALC260_AUTO;
 	}
 
@@ -7308,8 +7309,9 @@ static int patch_alc882(struct hda_codec *codec)
 				alc_free(codec);
 				return patch_alc883(codec);
 			}
-			printk(KERN_INFO "hda_codec: Unknown model for ALC882, "
-		       			 "trying auto-probe from BIOS...\n");
+			printk(KERN_INFO "hda_codec: Unknown model for %s, "
+			       "trying auto-probe from BIOS...\n",
+			       codec->chip_name);
 			board_config = ALC882_AUTO;
 		}
 	}
@@ -9196,8 +9198,8 @@ static int patch_alc883(struct hda_codec *codec)
 						  alc883_models,
 						  alc883_cfg_tbl);
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC883, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC883_AUTO;
 	}
 
@@ -11040,8 +11042,8 @@ static int patch_alc262(struct hda_codec *codec)
 						  alc262_cfg_tbl);
 
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC262, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC262_AUTO;
 	}
 
@@ -12076,8 +12078,8 @@ static int patch_alc268(struct hda_codec *codec)
 						  alc268_cfg_tbl);
 
 	if (board_config < 0 || board_config >= ALC268_MODEL_LAST) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC268, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC268_AUTO;
 	}
 
@@ -12924,8 +12926,8 @@ static int patch_alc269(struct hda_codec *codec)
 						  alc269_cfg_tbl);
 
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC269, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC269_AUTO;
 	}
 
@@ -14023,8 +14025,8 @@ static int patch_alc861(struct hda_codec *codec)
 						  alc861_cfg_tbl);
 
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC861, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC861_AUTO;
 	}
 
@@ -14947,8 +14949,8 @@ static int patch_alc861vd(struct hda_codec *codec)
 						  alc861vd_cfg_tbl);
 
 	if (board_config < 0 || board_config >= ALC861VD_MODEL_LAST) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC660VD/"
-			"ALC861VD, trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC861VD_AUTO;
 	}
 
@@ -16872,8 +16874,8 @@ static int patch_alc662(struct hda_codec *codec)
 						  alc662_models,
 			  	                  alc662_cfg_tbl);
 	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for ALC662, "
-		       "trying auto-probe from BIOS...\n");
+		printk(KERN_INFO "hda_codec: Unknown model for %s, "
+		       "trying auto-probe from BIOS...\n", codec->chip_name);
 		board_config = ALC662_AUTO;
 	}
 
-- 
cgit v0.10.2


From 313f6e2d40bd67e394a65e7d64acd0cf9c9d248d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 18 May 2009 12:40:52 +0200
Subject: ALSA: hda - Avoid conflicts with snd-ctxfi driver

The PCI entries of Creative with HD-audio class can be the devices
with emu20k1/emu20k2 chips.  These are supported better by snd-ctxfi
driver.  With that driver, the device will mutate from HD-audio to
its native class.

This patch adds a simple ifdef to avoid the conflict of device probe
between snd-hda-intel and snd-ctxfi drivers.  1102:0009 seems still
OK to be added as it has no emu20kx chip, and is a pure HD-audio
device.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 04f19f8..7bb6dd2 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2517,10 +2517,19 @@ static struct pci_device_id azx_ids[] = {
 	/* Teradici */
 	{ PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA },
 	/* Creative X-Fi (CA0110-IBG) */
+#if !defined(CONFIG_SND_CTXFI) && !defined(CONFIG_SND_CTXFI_MODULE)
+	/* the following entry conflicts with snd-ctxfi driver,
+	 * as ctxfi driver mutates from HD-audio to native mode with
+	 * a special command sequence.
+	 */
 	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_ANY_ID),
 	  .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
 	  .class_mask = 0xffffff,
 	  .driver_data = AZX_DRIVER_GENERIC },
+#else
+	/* this entry seems still valid -- i.e. without emu20kx chip */
+	{ PCI_DEVICE(0x1102, 0x0009), .driver_data = AZX_DRIVER_GENERIC },
+#endif
 	/* AMD Generic, PCI class code and Vendor ID for HD Audio */
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_ANY_ID),
 	  .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8,
-- 
cgit v0.10.2


From 6c3b46f74587d46e71b8c2b78fdca626a3aca280 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 May 2009 14:38:28 +0200
Subject: virtio_blk: don't blindly derefence req->rq_disk

request->rq_disk is only set for FS requests or BLOCK_PC requests
originating from the generic block layer scsi ioctls.  It's not set
for requests origination from other soures or internal cache flush
commands implemented by the patch I'll send after this.

So instead of using it to get at the private data in do_virtblk_request
setup queue->queuedata and use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 29a9daf..dfe9ee5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -124,12 +124,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 
 static void do_virtblk_request(struct request_queue *q)
 {
-	struct virtio_blk *vblk = NULL;
+	struct virtio_blk *vblk = q->queuedata;
 	struct request *req;
 	unsigned int issued = 0;
 
 	while ((req = blk_peek_request(q)) != NULL) {
-		vblk = req->rq_disk->private_data;
 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
 		/* If this request fails, stop queue and wait for something to
@@ -249,6 +248,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 		goto out_put_disk;
 	}
 
+	vblk->disk->queue->queuedata = vblk;
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, vblk->disk->queue);
 
 	if (index < 26) {
-- 
cgit v0.10.2


From 1cde26f928863d90e9e7c1217880c8450464d305 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 18 May 2009 14:41:30 +0200
Subject: virtio_blk: SG_IO passthru support

Add support for SG_IO passthru to virtio_blk.  We add the scsi command
block after the normal outhdr, and the scsi inhdr with full status
information aswell as the sense buffer before the regular inhdr.

[hch: forward ported, added the VIRTIO_BLK_F_SCSI flags, some comments
 and tested the whole beast]
[axboe: updated to use ->resid and not dual-path the byte count]

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ checkpatch.pl tweak)
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dfe9ee5..62275db 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -37,6 +37,7 @@ struct virtblk_req
 	struct list_head list;
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
+	struct virtio_scsi_inhdr in_hdr;
 	u8 status;
 };
 
@@ -49,7 +50,9 @@ static void blk_done(struct virtqueue *vq)
 
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
+		unsigned int nr_bytes;
 		int error;
+
 		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
 			error = 0;
@@ -62,6 +65,12 @@ static void blk_done(struct virtqueue *vq)
 			break;
 		}
 
+		if (blk_pc_request(vbr->req)) {
+			vbr->req->resid_len = vbr->in_hdr.residual;
+			vbr->req->sense_len = vbr->in_hdr.sense_len;
+			vbr->req->errors = vbr->in_hdr.errors;
+		}
+
 		__blk_end_request_all(vbr->req, error);
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
@@ -74,7 +83,7 @@ static void blk_done(struct virtqueue *vq)
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		   struct request *req)
 {
-	unsigned long num, out, in;
+	unsigned long num, out = 0, in = 0;
 	struct virtblk_req *vbr;
 
 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
@@ -99,18 +108,36 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 	if (blk_barrier_rq(vbr->req))
 		vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
 
-	sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
-	num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
-	sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status));
+	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
 
-	if (rq_data_dir(vbr->req) == WRITE) {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-		out = 1 + num;
-		in = 1;
-	} else {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-		out = 1;
-		in = 1 + num;
+	/*
+	 * If this is a packet command we need a couple of additional headers.
+	 * Behind the normal outhdr we put a segment with the scsi command
+	 * block, and before the normal inhdr we put the sense data and the
+	 * inhdr with additional status information before the normal inhdr.
+	 */
+	if (blk_pc_request(vbr->req))
+		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
+
+	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
+
+	if (blk_pc_request(vbr->req)) {
+		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
+			   sizeof(vbr->in_hdr));
+	}
+
+	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
+		   sizeof(vbr->status));
+
+	if (num) {
+		if (rq_data_dir(vbr->req) == WRITE) {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			out += num;
+		} else {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			in += num;
+		}
 	}
 
 	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
@@ -148,8 +175,16 @@ static void do_virtblk_request(struct request_queue *q)
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
-	return scsi_cmd_ioctl(bdev->bd_disk->queue,
-			      bdev->bd_disk, mode, cmd,
+	struct gendisk *disk = bdev->bd_disk;
+	struct virtio_blk *vblk = disk->private_data;
+
+	/*
+	 * Only allow the generic SCSI ioctls if the host can support it.
+	 */
+	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
+		return -ENOIOCTLCMD;
+
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
 			      (void __user *)data);
 }
 
@@ -356,6 +391,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
+	VIRTIO_BLK_F_SCSI,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 94c56d2..4dbcbc1 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -15,6 +15,7 @@
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
+#define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
 
 struct virtio_blk_config
 {
@@ -55,6 +56,13 @@ struct virtio_blk_outhdr
 	__u64 sector;
 };
 
+struct virtio_scsi_inhdr {
+	__u32 errors;
+	__u32 data_len;
+	__u32 sense_len;
+	__u32 residual;
+};
+
 /* And this is the final byte of the write scatter-gather list. */
 #define VIRTIO_BLK_S_OK		0
 #define VIRTIO_BLK_S_IOERR	1
-- 
cgit v0.10.2


From f831cc03490c78a76e2d35ad77ec2292d0323728 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 18 May 2009 14:44:45 +0200
Subject: virtio_blk: get rid of unused variable

drivers/block/virtio_blk.c: In function 'blk_done':
drivers/block/virtio_blk.c:53: warning: unused variable 'nr_bytes'

Leftover from commit 1cde26f928863d90e9e7c1217880c8450464d305

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 62275db..511d4ae 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -50,7 +50,6 @@ static void blk_done(struct virtqueue *vq)
 
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
-		unsigned int nr_bytes;
 		int error;
 
 		switch (vbr->status) {
-- 
cgit v0.10.2


From 3026589c1bc17b0c389b95afec127e92e2a745e2 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Mon, 11 May 2009 09:24:47 +0200
Subject: microblaze: Update Microblaze defconfig

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index beb7ecd..4ef6af0 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Tue Mar 24 10:23:20 2009
+# Linux kernel version: 2.6.30-rc5
+# Mon May 11 09:01:02 2009
 #
 CONFIG_MICROBLAZE=y
 # CONFIG_SWAP is not set
@@ -32,6 +32,7 @@ CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 # CONFIG_TASKSTATS is not set
@@ -63,6 +64,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -80,6 +82,8 @@ CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_SLOW_WORK is not set
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -92,7 +96,6 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -166,6 +169,8 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_NOMMU_INITIAL_TRIM_EXCESS=1
 
 #
 # Exectuable file formats
@@ -180,7 +185,6 @@ CONFIG_NET=y
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
@@ -232,6 +236,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 # CONFIG_NET_SCHED is not set
 # CONFIG_DCB is not set
 
@@ -244,7 +249,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_WIRELESS=y
 # CONFIG_CFG80211 is not set
 CONFIG_WIRELESS_OLD_REGULATORY=y
@@ -379,6 +383,7 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
 # CONFIG_MACVLAN is not set
@@ -388,6 +393,7 @@ CONFIG_NETDEVICES=y
 # CONFIG_PHYLIB is not set
 CONFIG_NET_ETHERNET=y
 # CONFIG_MII is not set
+# CONFIG_ETHOC is not set
 # CONFIG_DNET is not set
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
@@ -405,7 +411,6 @@ CONFIG_NETDEV_10000=y
 #
 # CONFIG_WLAN_PRE80211 is not set
 # CONFIG_WLAN_80211 is not set
-# CONFIG_IWLWIFI_LEDS is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -455,6 +460,7 @@ CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 # CONFIG_RTC is not set
 # CONFIG_GEN_RTC is not set
 # CONFIG_R3964 is not set
@@ -525,7 +531,7 @@ CONFIG_USB_SUPPORT=y
 #
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 # CONFIG_USB_GADGET is not set
 
@@ -538,6 +544,7 @@ CONFIG_USB_SUPPORT=y
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_RTC_CLASS is not set
 # CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 
@@ -563,6 +570,11 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -601,8 +613,13 @@ CONFIG_CRAMFS=y
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -614,7 +631,6 @@ CONFIG_LOCKD_V4=y
 CONFIG_NFS_ACL_SUPPORT=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -647,6 +663,9 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -678,15 +697,8 @@ CONFIG_DEBUG_SG=y
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
-
-#
-# Tracers
-#
-# CONFIG_SCHED_TRACER is not set
-# CONFIG_CONTEXT_SWITCH_TRACER is not set
-# CONFIG_BOOT_TRACER is not set
-# CONFIG_TRACE_BRANCH_PROFILING is not set
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_HEART_BEAT=y
@@ -777,6 +789,7 @@ CONFIG_CRYPTO=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -784,6 +797,7 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
@@ -797,8 +811,8 @@ CONFIG_GENERIC_FIND_LAST_BIT=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
 CONFIG_HAVE_LMB=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From 7b7210d7a9d260becef4dba98b9075e2c9b41f93 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Thu, 14 May 2009 13:35:52 +0200
Subject: microblaze: Fix kind-of-intr checking against number of interrupts

+ Fix typographic fault.

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c
index a69d3e3..b156052 100644
--- a/arch/microblaze/kernel/intc.c
+++ b/arch/microblaze/kernel/intc.c
@@ -137,8 +137,8 @@ void __init init_IRQ(void)
 
 	intr_type =
 		*(int *) of_get_property(intc, "xlnx,kind-of-intr", NULL);
-	if (intr_type >= (1 << nr_irq))
-		printk(KERN_INFO " ERROR: Mishmash in king-of-intr param\n");
+	if (intr_type >= (1 << (nr_irq + 1)))
+		printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n");
 
 #ifdef CONFIG_SELFMOD_INTC
 	selfmod_function((int *) arr_func, intc_baseaddr);
-- 
cgit v0.10.2


From eb4c41d30ba1f973c8b10be3644571ab997ae0d6 Mon Sep 17 00:00:00 2001
From: Torben Schulz <public@letorbi.de>
Date: Mon, 18 May 2009 15:02:35 +0200
Subject: ALSA: hda - Improved MacBook 3,1 support

This patch adds support for MacBook 3,1 sound by adding a model new
"mb31" with the appropriate init verbs, mixers and channel modes to
the ALC883 configuration. patch_alc882() and patch_alc883() are
modified to handle the MacBook 3,1 sound-chip (Realtek ALC889A)
correctly.

Signed-off-by: Torben Schulz <public@letorbi.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 14ed1d1..ebb444d 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -157,6 +157,7 @@ ALC883/888
   fujitsu-xa3530 Fujitsu AMILO XA3530
   3stack-6ch-intel Intel DG33* boards
   asus-p5q	ASUS P5Q-EM boards
+  mb31		MacBook 3,1
   auto		auto-config reading BIOS (default)
 
 ALC861/660
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 62f4d0c..b942019 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -240,6 +240,7 @@ enum {
 	ALC883_3ST_6ch_INTEL,
 	ALC888_ASUS_M90V,
 	ALC888_ASUS_EEE1601,
+	ALC889A_MB31,
 	ALC1200_ASUS_P5Q,
 	ALC883_AUTO,
 	ALC883_MODEL_LAST,
@@ -7291,7 +7292,7 @@ static int patch_alc882(struct hda_codec *codec)
 		case 0x106b00a1: /* Macbook (might be wrong - PCI SSID?) */
 		case 0x106b00a4: /* MacbookPro4,1 */
 		case 0x106b2c00: /* Macbook Pro rev3 */
-		case 0x106b3600: /* Macbook 3.1 */
+		/* Macbook 3.1 (0x106b3600) is handled by patch_alc883() */
 		case 0x106b3800: /* MacbookPro4,1 - latter revision */
 			board_config = ALC885_MBP3;
 			break;
@@ -7493,6 +7494,17 @@ static struct hda_input_mux alc883_asus_eee1601_capture_source = {
 	},
 };
 
+static struct hda_input_mux alc889A_mb31_capture_source = {
+	.num_items = 2,
+	.items = {
+		{ "Mic", 0x0 },
+		/* Front Mic (0x01) unused */
+		{ "Line", 0x2 },
+		/* Line 2 (0x03) unused */
+		/* CD (0x04) unsused? */
+	},
+};
+
 /*
  * 2ch mode
  */
@@ -7611,6 +7623,49 @@ static struct hda_channel_mode alc883_sixstack_modes[2] = {
 	{ 8, alc883_sixstack_ch8_init },
 };
 
+/* 2ch mode (Speaker:front, Subwoofer:CLFE, Line:input, Headphones:front) */
+static struct hda_verb alc889A_mb31_ch2_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},             /* HP as front */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Subwoofer on */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},    /* Line as input */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},   /* Line off */
+	{ } /* end */
+};
+
+/* 4ch mode (Speaker:front, Subwoofer:CLFE, Line:CLFE, Headphones:front) */
+static struct hda_verb alc889A_mb31_ch4_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},             /* HP as front */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Subwoofer on */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},   /* Line as output */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Line on */
+	{ } /* end */
+};
+
+/* 5ch mode (Speaker:front, Subwoofer:CLFE, Line:input, Headphones:rear) */
+static struct hda_verb alc889A_mb31_ch5_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x01},             /* HP as rear */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Subwoofer on */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},    /* Line as input */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},   /* Line off */
+	{ } /* end */
+};
+
+/* 6ch mode (Speaker:front, Subwoofer:off, Line:CLFE, Headphones:Rear) */
+static struct hda_verb alc889A_mb31_ch6_init[] = {
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x01},             /* HP as front */
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},   /* Subwoofer off */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},   /* Line as output */
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Line on */
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc889A_mb31_6ch_modes[4] = {
+	{ 2, alc889A_mb31_ch2_init },
+	{ 4, alc889A_mb31_ch4_init },
+	{ 5, alc889A_mb31_ch5_init },
+	{ 6, alc889A_mb31_ch6_init },
+};
+
 static struct hda_verb alc883_medion_eapd_verbs[] = {
         /* eanable EAPD on medion laptop */
 	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
@@ -7889,6 +7944,32 @@ static struct snd_kcontrol_new alc888_lenovo_sky_mixer[] = {
 	{ } /* end */
 };
 
+static struct snd_kcontrol_new alc889A_mb31_mixer[] = {
+	/* Output mixers */
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x00,
+		HDA_OUTPUT),
+	HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 0x02, HDA_INPUT),
+	/* Output switches */
+	HDA_CODEC_MUTE("Enable Speaker", 0x14, 0x00, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Enable Headphones", 0x15, 0x00, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("Enable LFE", 0x16, 2, 0x00, HDA_OUTPUT),
+	/* Boost mixers */
+	HDA_CODEC_VOLUME("Mic Boost", 0x18, 0x00, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Boost", 0x1a, 0x00, HDA_INPUT),
+	/* Input mixers */
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x00, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x00, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+	{ } /* end */
+};
+
 static struct hda_bind_ctls alc883_bind_cap_vol = {
 	.ops = &snd_hda_bind_vol,
 	.values = {
@@ -8561,6 +8642,42 @@ static void alc883_eee1601_inithook(struct hda_codec *codec)
 	alc_automute_pin(codec);
 }
 
+static struct hda_verb alc889A_mb31_verbs[] = {
+	/* Init rear pin (used as headphone output) */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc4},    /* Apple Headphones */
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},           /* Connect to front */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+	/* Init line pin (used as output in 4ch and 6ch mode) */
+	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x02},           /* Connect to CLFE */
+	/* Init line 2 pin (used as headphone out by default) */
+	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},  /* Use as input */
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* Mute output */
+	{ } /* end */
+};
+
+/* Mute speakers according to the headphone jack state */
+static void alc889A_mb31_automute(struct hda_codec *codec)
+{
+	unsigned int present;
+
+	/* Mute only in 2ch or 4ch mode */
+	if (snd_hda_codec_read(codec, 0x15, 0, AC_VERB_GET_CONNECT_SEL, 0)
+	    == 0x00) {
+		present = snd_hda_codec_read(codec, 0x15, 0,
+			AC_VERB_GET_PIN_SENSE, 0) & AC_PINSENSE_PRESENCE;
+		snd_hda_codec_amp_stereo(codec, 0x14,  HDA_OUTPUT, 0,
+			HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+		snd_hda_codec_amp_stereo(codec, 0x16, HDA_OUTPUT, 0,
+			HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+	}
+}
+
+static void alc889A_mb31_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+	if ((res >> 26) == ALC880_HP_EVENT)
+		alc889A_mb31_automute(codec);
+}
+
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 #define alc883_loopbacks	alc880_loopbacks
 #endif
@@ -8601,6 +8718,7 @@ static const char *alc883_models[ALC883_MODEL_LAST] = {
 	[ALC888_FUJITSU_XA3530] = "fujitsu-xa3530",
 	[ALC883_3ST_6ch_INTEL]	= "3stack-6ch-intel",
 	[ALC1200_ASUS_P5Q]	= "asus-p5q",
+	[ALC889A_MB31]		= "mb31",
 	[ALC883_AUTO]		= "auto",
 };
 
@@ -9052,6 +9170,21 @@ static struct alc_config_preset alc883_presets[] = {
 		.channel_mode = alc883_sixstack_modes,
 		.input_mux = &alc883_capture_source,
 	},
+	[ALC889A_MB31] = {
+		.mixers = { alc889A_mb31_mixer, alc883_chmode_mixer},
+		.init_verbs = { alc883_init_verbs, alc889A_mb31_verbs,
+			alc880_gpio1_init_verbs },
+		.adc_nids = alc883_adc_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.channel_mode = alc889A_mb31_6ch_modes,
+		.num_channel_mode = ARRAY_SIZE(alc889A_mb31_6ch_modes),
+		.input_mux = &alc889A_mb31_capture_source,
+		.dig_out_nid = ALC883_DIGOUT_NID,
+		.unsol_event = alc889A_mb31_unsol_event,
+		.init_hook = alc889A_mb31_automute,
+	},
 };
 
 
@@ -9197,10 +9330,18 @@ static int patch_alc883(struct hda_codec *codec)
 	board_config = snd_hda_check_board_config(codec, ALC883_MODEL_LAST,
 						  alc883_models,
 						  alc883_cfg_tbl);
-	if (board_config < 0) {
-		printk(KERN_INFO "hda_codec: Unknown model for %s, "
-		       "trying auto-probe from BIOS...\n", codec->chip_name);
-		board_config = ALC883_AUTO;
+	if (board_config < 0 || board_config >= ALC883_MODEL_LAST) {
+		/* Pick up systems that don't supply PCI SSID */
+		switch (codec->subsystem_id) {
+		case 0x106b3600: /* Macbook 3.1 */
+			board_config = ALC889A_MB31;
+			break;
+		default:
+			printk(KERN_INFO
+				"hda_codec: Unknown model for %s, trying "
+				"auto-probe from BIOS...\n", codec->chip_name);
+			board_config = ALC883_AUTO;
+		}
 	}
 
 	if (board_config == ALC883_AUTO) {
-- 
cgit v0.10.2


From 818bc814447a35350ae90a329133e474bf1a2bd7 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Sat, 2 May 2009 15:05:59 -0300
Subject: [ARM] pxa: save/restore PGSR on suspend/resume.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 7ffb91d..6ae50604 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -322,6 +322,7 @@ static inline void pxa27x_mfp_init(void) {}
 #ifdef CONFIG_PM
 static unsigned long saved_gafr[2][4];
 static unsigned long saved_gpdr[4];
+static unsigned long saved_pgsr[4];
 
 static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
 {
@@ -332,6 +333,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
 		saved_gafr[0][i] = GAFR_L(i);
 		saved_gafr[1][i] = GAFR_U(i);
 		saved_gpdr[i] = GPDR(i * 32);
+		saved_pgsr[i] = PGSR(i);
 
 		GPDR(i * 32) = gpdr_lpm[i];
 	}
@@ -346,6 +348,7 @@ static int pxa2xx_mfp_resume(struct sys_device *d)
 		GAFR_L(i) = saved_gafr[0][i];
 		GAFR_U(i) = saved_gafr[1][i];
 		GPDR(i * 32) = saved_gpdr[i];
+		PGSR(i) = saved_pgsr[i];
 	}
 	PSSR = PSSR_RDH | PSSR_PH;
 	return 0;
-- 
cgit v0.10.2


From 216e3b7abbd05c35d2d1a3f86629ade485351f0d Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Tue, 5 May 2009 22:43:18 -0300
Subject: [ARM] pxa: allow gpio_reset drive high during normal work

I want to reuse tosa/spitz gpio_reset code, but my board needs the reset
gpio to be driven high during normal operation.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/include/mach/reset.h b/arch/arm/mach-pxa/include/mach/reset.h
index 31e6a7b..b6c1055 100644
--- a/arch/arm/mach-pxa/include/mach/reset.h
+++ b/arch/arm/mach-pxa/include/mach/reset.h
@@ -13,8 +13,9 @@ extern void clear_reset_status(unsigned int mask);
 /**
  * init_gpio_reset() - register GPIO as reset generator
  * @gpio: gpio nr
- * @output: set gpio as out/low instead of input during normal work
+ * @output: set gpio as output instead of input during normal work
+ * @level: output level
  */
-extern int init_gpio_reset(int gpio, int output);
+extern int init_gpio_reset(int gpio, int output, int level);
 
 #endif /* __ASM_ARCH_RESET_H */
diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c
index df29d45..01e9d64 100644
--- a/arch/arm/mach-pxa/reset.c
+++ b/arch/arm/mach-pxa/reset.c
@@ -20,7 +20,7 @@ static void do_hw_reset(void);
 
 static int reset_gpio = -1;
 
-int init_gpio_reset(int gpio, int output)
+int init_gpio_reset(int gpio, int output, int level)
 {
 	int rc;
 
@@ -31,7 +31,7 @@ int init_gpio_reset(int gpio, int output)
 	}
 
 	if (output)
-		rc = gpio_direction_output(gpio, 0);
+		rc = gpio_direction_output(gpio, level);
 	else
 		rc = gpio_direction_input(gpio);
 	if (rc) {
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index c18e34a..cdacea0 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -731,7 +731,7 @@ static void spitz_restart(char mode, const char *cmd)
 
 static void __init common_init(void)
 {
-	init_gpio_reset(SPITZ_GPIO_ON_RESET, 1);
+	init_gpio_reset(SPITZ_GPIO_ON_RESET, 1, 0);
 	pm_power_off = spitz_poweroff;
 	arm_pm_restart = spitz_restart;
 
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index afac5b6..a0bd46e 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -897,7 +897,7 @@ static void __init tosa_init(void)
 	gpio_set_wake(MFP_PIN_GPIO1, 1);
 	/* We can't pass to gpio-keys since it will drop the Reset altfunc */
 
-	init_gpio_reset(TOSA_GPIO_ON_RESET, 0);
+	init_gpio_reset(TOSA_GPIO_ON_RESET, 0, 0);
 
 	pm_power_off = tosa_poweroff;
 	arm_pm_restart = tosa_restart;
-- 
cgit v0.10.2


From 866bd435819df8d97767c407f8828a7a2ff971e6 Mon Sep 17 00:00:00 2001
From: Timothy Clacy <tcl@phaseone.com>
Date: Thu, 7 May 2009 19:40:33 +0200
Subject: [ARM] pxa: enable GPIO receivers after configuring pins

'mach-pxa' platforms currently rely on a bootloader to setup GPIO pins
and clear RDH (to enable inputs).

A kernel loaded by a 'minimal' bootloader, that doesn't touch any pins,
will not function correctly; inputs will remain disabled, even after the
pins are configured. The following change fixes the issue and has been
verified on Gumstix Verdex XL6P and a custom PXA270 platform.

Signed-off-by: Timothy Clacy <tcl@phaseone.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 6ae50604..cf6b720 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -377,6 +377,9 @@ static int __init pxa2xx_mfp_init(void)
 	if (cpu_is_pxa27x())
 		pxa27x_mfp_init();
 
+	/* clear RDH bit to enable GPIO receivers after reset/sleep exit */
+	PSSR = PSSR_RDH;
+
 	/* initialize gafr_run[], pgsr_lpm[] from existing values */
 	for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++)
 		gpdr_lpm[i] = GPDR(i * 32);
-- 
cgit v0.10.2


From a81b38688f50f51123490d45d51f4a10e8dc1184 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Fri, 15 May 2009 10:11:22 +0400
Subject: [ARM] pxa/spitz: provide spitz_ohci_exit() that unregisters USB_HOST
 GPIO

Currently spitz_ohci_init() that requests GPIO doesn't have
corresponding spitz_ohci_exit() which will gpio_free(). This causes
minor problems e.g. during resume when the OHCI device can't be resumed.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index cdacea0..5a45fe3 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -531,9 +531,15 @@ static int spitz_ohci_init(struct device *dev)
 	return gpio_direction_output(SPITZ_GPIO_USB_HOST, 1);
 }
 
+static void spitz_ohci_exit(struct device *dev)
+{
+	gpio_free(SPITZ_GPIO_USB_HOST);
+}
+
 static struct pxaohci_platform_data spitz_ohci_platform_data = {
 	.port_mode	= PMM_NPS_MODE,
 	.init		= spitz_ohci_init,
+	.exit		= spitz_ohci_exit,
 	.flags		= ENABLE_PORT_ALL | NO_OC_PROTECTION,
 	.power_budget	= 150,
 };
-- 
cgit v0.10.2


From 6d3ddc81f5762d54ce7d1db70eb757c6c12fabbc Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 16 May 2009 17:47:29 +0100
Subject: ASoC: Split DAPM power checks from sequencing of power changes

DAPM has always applied any changes to the power state of widgets as soon
as it has determined that they are required. Instead of doing this store
all the changes that are required on lists of widgets to power up and
down, then iterate over those lists and apply the changes. This changes
the sequence in which changes are implemented, doing all power downs
before power ups and always using the up/down sequences (previously they
were only used when changes were due to DAC/ADC power events). The error
handling is also changed so that we continue attempting to power widgets
if some changes fail.

The main benefit of this is to allow future changes to do optimisations
over the whole power sequence and to reduce the number of walks of the
widget graph required to check the power status of widgets.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 533f9f2..b3f789d 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -385,6 +385,9 @@ struct snd_soc_dapm_widget {
 	/* widget input and outputs */
 	struct list_head sources;
 	struct list_head sinks;
+
+	/* used during DAPM updates */
+	struct list_head power_list;
 };
 
 #endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 6ab80bf..8309ce8 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -372,6 +372,8 @@ struct snd_soc_codec {
 	enum snd_soc_bias_level bias_level;
 	enum snd_soc_bias_level suspend_bias_level;
 	struct delayed_work delayed_work;
+	struct list_head up_list;
+	struct list_head down_list;
 
 	/* codec DAI's */
 	struct snd_soc_dai *dai;
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 7847f80..04ef841 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -658,7 +658,7 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
 static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 			     struct snd_soc_dapm_widget *w)
 {
-	int power, ret;
+	int ret;
 
 	switch (w->id) {
 	case snd_soc_dapm_pre:
@@ -696,18 +696,8 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 		return 0;
 
 	default:
-		break;
+		return dapm_generic_apply_power(w);
 	}
-
-	if (!w->power_check)
-		return 0;
-
-	power = w->power_check(w);
-	if (w->power == power)
-		return 0;
-	w->power = power;
-
-	return dapm_generic_apply_power(w);
 }
 
 /*
@@ -722,27 +712,68 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 {
 	struct snd_soc_dapm_widget *w;
-	int i, c = 1, *seq = NULL, ret = 0;
-
-	/* do we have a sequenced stream event */
-	if (event == SND_SOC_DAPM_STREAM_START) {
-		c = ARRAY_SIZE(dapm_up_seq);
-		seq = dapm_up_seq;
-	} else if (event == SND_SOC_DAPM_STREAM_STOP) {
-		c = ARRAY_SIZE(dapm_down_seq);
-		seq = dapm_down_seq;
+	int ret = 0;
+	int i, power;
+
+	INIT_LIST_HEAD(&codec->up_list);
+	INIT_LIST_HEAD(&codec->down_list);
+
+	/* Check which widgets we need to power and store them in
+	 * lists indicating if they should be powered up or down.
+	 */
+	list_for_each_entry(w, &codec->dapm_widgets, list) {
+		switch (w->id) {
+		case snd_soc_dapm_pre:
+			list_add_tail(&codec->down_list, &w->power_list);
+			break;
+		case snd_soc_dapm_post:
+			list_add_tail(&codec->up_list, &w->power_list);
+			break;
+
+		default:
+			if (!w->power_check)
+				continue;
+
+			power = w->power_check(w);
+			if (w->power == power)
+				continue;
+
+			if (power)
+				list_add_tail(&w->power_list, &codec->up_list);
+			else
+				list_add_tail(&w->power_list,
+					      &codec->down_list);
+
+			w->power = power;
+			break;
+		}
 	}
 
-	for (i = 0; i < c; i++) {
-		list_for_each_entry(w, &codec->dapm_widgets, list) {
+	/* Power down widgets first; try to avoid amplifying pops. */
+	for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) {
+		list_for_each_entry(w, &codec->down_list, power_list) {
+			/* is widget in stream order */
+			if (w->id != dapm_down_seq[i])
+				continue;
+
+			ret = dapm_power_widget(codec, event, w);
+			if (ret != 0)
+				pr_err("Failed to power down %s: %d\n",
+				       w->name, ret);
+		}
+	}
 
+	/* Now power up. */
+	for (i = 0; i < ARRAY_SIZE(dapm_up_seq); i++) {
+		list_for_each_entry(w, &codec->up_list, power_list) {
 			/* is widget in stream order */
-			if (seq && seq[i] && w->id != seq[i])
+			if (w->id != dapm_up_seq[i])
 				continue;
 
 			ret = dapm_power_widget(codec, event, w);
 			if (ret != 0)
-				return ret;
+				pr_err("Failed to power up %s: %d\n",
+				       w->name, ret);
 		}
 	}
 
-- 
cgit v0.10.2


From aef908434cd24dd5529065bf5d781773fad21125 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 16 May 2009 17:53:16 +0100
Subject: ASoC: Make DAPM sysfs entries non-optional

sysfs is so standard these days there's no point.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 04ef841..d130602 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -67,10 +67,6 @@ static int dapm_down_seq[] = {
 	snd_soc_dapm_post
 };
 
-static int dapm_status = 1;
-module_param(dapm_status, int, 0);
-MODULE_PARM_DESC(dapm_status, "enable DPM sysfs entries");
-
 static void pop_wait(u32 pop_time)
 {
 	if (pop_time)
@@ -974,16 +970,12 @@ static DEVICE_ATTR(dapm_widget, 0444, dapm_widget_show, NULL);
 
 int snd_soc_dapm_sys_add(struct device *dev)
 {
-	if (!dapm_status)
-		return 0;
 	return device_create_file(dev, &dev_attr_dapm_widget);
 }
 
 static void snd_soc_dapm_sys_remove(struct device *dev)
 {
-	if (dapm_status) {
-		device_remove_file(dev, &dev_attr_dapm_widget);
-	}
+	device_remove_file(dev, &dev_attr_dapm_widget);
 }
 
 /* free all dapm widgets and resources */
-- 
cgit v0.10.2


From 452c5eaa0d5162e02ffee742ea17540887bc2904 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 17 May 2009 21:41:23 +0100
Subject: ASoC: Integrate bias management with DAPM power management

Rather than managing the bias level of the system based on if there is
an active audio stream manage it based on there being an active DAPM
widget. This simplifies the code a little, moving the power handling
into one place, and improves audio performance for bypass paths when no
playbacks or captures are active.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index b3f789d..ec8a45f 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -279,8 +279,6 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec,
 /* dapm events */
 int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream,
 	int event);
-int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
-	enum snd_soc_bias_level level);
 
 /* dapm sys fs - used by the core */
 int snd_soc_dapm_sys_add(struct device *dev);
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 8309ce8..2af3213 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -339,6 +339,7 @@ struct snd_soc_codec {
 	struct module *owner;
 	struct mutex mutex;
 	struct device *dev;
+	struct snd_soc_device *socdev;
 
 	struct list_head list;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index c0e7066..4aa8e2d 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -299,7 +299,6 @@ static void close_delayed_work(struct work_struct *work)
 {
 	struct snd_soc_card *card = container_of(work, struct snd_soc_card,
 						 delayed_work.work);
-	struct snd_soc_device *socdev = card->socdev;
 	struct snd_soc_codec *codec = card->codec;
 	struct snd_soc_dai *codec_dai;
 	int i;
@@ -315,27 +314,10 @@ static void close_delayed_work(struct work_struct *work)
 
 		/* are we waiting on this codec DAI stream */
 		if (codec_dai->pop_wait == 1) {
-
-			/* Reduce power if no longer active */
-			if (codec->active == 0) {
-				pr_debug("pop wq D1 %s %s\n", codec->name,
-					 codec_dai->playback.stream_name);
-				snd_soc_dapm_set_bias_level(socdev,
-					SND_SOC_BIAS_PREPARE);
-			}
-
 			codec_dai->pop_wait = 0;
 			snd_soc_dapm_stream_event(codec,
 				codec_dai->playback.stream_name,
 				SND_SOC_DAPM_STREAM_STOP);
-
-			/* Fall into standby if no longer active */
-			if (codec->active == 0) {
-				pr_debug("pop wq D3 %s %s\n", codec->name,
-					 codec_dai->playback.stream_name);
-				snd_soc_dapm_set_bias_level(socdev,
-					SND_SOC_BIAS_STANDBY);
-			}
 		}
 	}
 	mutex_unlock(&pcm_mutex);
@@ -399,10 +381,6 @@ static int soc_codec_close(struct snd_pcm_substream *substream)
 		snd_soc_dapm_stream_event(codec,
 			codec_dai->capture.stream_name,
 			SND_SOC_DAPM_STREAM_STOP);
-
-		if (codec->active == 0 && codec_dai->pop_wait == 0)
-			snd_soc_dapm_set_bias_level(socdev,
-						SND_SOC_BIAS_STANDBY);
 	}
 
 	mutex_unlock(&pcm_mutex);
@@ -467,36 +445,16 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 		cancel_delayed_work(&card->delayed_work);
 	}
 
-	/* do we need to power up codec */
-	if (codec->bias_level != SND_SOC_BIAS_ON) {
-		snd_soc_dapm_set_bias_level(socdev,
-					    SND_SOC_BIAS_PREPARE);
-
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->playback.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-		else
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->capture.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-
-		snd_soc_dapm_set_bias_level(socdev, SND_SOC_BIAS_ON);
-		snd_soc_dai_digital_mute(codec_dai, 0);
-
-	} else {
-		/* codec already powered - power on widgets */
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->playback.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-		else
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->capture.stream_name,
-					SND_SOC_DAPM_STREAM_START);
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		snd_soc_dapm_stream_event(codec,
+					  codec_dai->playback.stream_name,
+					  SND_SOC_DAPM_STREAM_START);
+	else
+		snd_soc_dapm_stream_event(codec,
+					  codec_dai->capture.stream_name,
+					  SND_SOC_DAPM_STREAM_START);
 
-		snd_soc_dai_digital_mute(codec_dai, 0);
-	}
+	snd_soc_dai_digital_mute(codec_dai, 0);
 
 out:
 	mutex_unlock(&pcm_mutex);
@@ -1372,6 +1330,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid)
 		return ret;
 	}
 
+	codec->socdev = socdev;
 	codec->card->dev = socdev->dev;
 	codec->card->private_data = codec;
 	strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver));
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index d130602..4ca5e56 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -94,6 +94,30 @@ static inline struct snd_soc_dapm_widget *dapm_cnew_widget(
 	return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL);
 }
 
+/**
+ * snd_soc_dapm_set_bias_level - set the bias level for the system
+ * @socdev: audio device
+ * @level: level to configure
+ *
+ * Configure the bias (power) levels for the SoC audio device.
+ *
+ * Returns 0 for success else error.
+ */
+static int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
+				       enum snd_soc_bias_level level)
+{
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int ret = 0;
+
+	if (card->set_bias_level)
+		ret = card->set_bias_level(card, level);
+	if (ret == 0 && codec->set_bias_level)
+		ret = codec->set_bias_level(codec, level);
+
+	return ret;
+}
+
 /* set up initial codec paths */
 static void dapm_set_path_status(struct snd_soc_dapm_widget *w,
 	struct snd_soc_dapm_path *p, int i)
@@ -707,9 +731,11 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
  */
 static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 {
+	struct snd_soc_device *socdev = codec->socdev;
 	struct snd_soc_dapm_widget *w;
 	int ret = 0;
 	int i, power;
+	int sys_power = 0;
 
 	INIT_LIST_HEAD(&codec->up_list);
 	INIT_LIST_HEAD(&codec->down_list);
@@ -731,6 +757,9 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 				continue;
 
 			power = w->power_check(w);
+			if (power)
+				sys_power = 1;
+
 			if (w->power == power)
 				continue;
 
@@ -745,6 +774,15 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 		}
 	}
 
+	/* If we're changing to all on or all off then prepare */
+	if ((sys_power && codec->bias_level == SND_SOC_BIAS_STANDBY) ||
+	    (!sys_power && codec->bias_level == SND_SOC_BIAS_ON)) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_PREPARE);
+		if (ret != 0)
+			pr_err("Failed to prepare bias: %d\n", ret);
+	}
+
 	/* Power down widgets first; try to avoid amplifying pops. */
 	for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) {
 		list_for_each_entry(w, &codec->down_list, power_list) {
@@ -773,6 +811,22 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 		}
 	}
 
+	/* If we just powered the last thing off drop to standby bias */
+	if (codec->bias_level == SND_SOC_BIAS_PREPARE && !sys_power) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_STANDBY);
+		if (ret != 0)
+			pr_err("Failed to apply standby bias: %d\n", ret);
+	}
+
+	/* If we just powered up then move to active bias */
+	if (codec->bias_level == SND_SOC_BIAS_PREPARE && sys_power) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_ON);
+		if (ret != 0)
+			pr_err("Failed to apply active bias: %d\n", ret);
+	}
+
 	return 0;
 }
 
@@ -1721,30 +1775,6 @@ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec,
 EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event);
 
 /**
- * snd_soc_dapm_set_bias_level - set the bias level for the system
- * @socdev: audio device
- * @level: level to configure
- *
- * Configure the bias (power) levels for the SoC audio device.
- *
- * Returns 0 for success else error.
- */
-int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
-				enum snd_soc_bias_level level)
-{
-	struct snd_soc_card *card = socdev->card;
-	struct snd_soc_codec *codec = socdev->card->codec;
-	int ret = 0;
-
-	if (card->set_bias_level)
-		ret = card->set_bias_level(card, level);
-	if (ret == 0 && codec->set_bias_level)
-		ret = codec->set_bias_level(codec, level);
-
-	return ret;
-}
-
-/**
  * snd_soc_dapm_enable_pin - enable pin.
  * @codec: SoC codec
  * @pin: pin name
-- 
cgit v0.10.2


From f83fba8baab9e95fff0fe2be0e1e32a1650bdd7f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 18 May 2009 15:44:43 +0100
Subject: ASoC: Add debug trace for bias level transitions

A standard way of making sure we know when the bias level changes.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 4ca5e56..39a63f9 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -110,6 +110,24 @@ static int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
 	struct snd_soc_codec *codec = socdev->card->codec;
 	int ret = 0;
 
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		dev_dbg(socdev->dev, "Setting full bias\n");
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		dev_dbg(socdev->dev, "Setting bias prepare\n");
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		dev_dbg(socdev->dev, "Setting standby bias\n");
+		break;
+	case SND_SOC_BIAS_OFF:
+		dev_dbg(socdev->dev, "Setting bias off\n");
+		break;
+	default:
+		dev_err(socdev->dev, "Setting invalid bias %d\n", level);
+		return -EINVAL;
+	}
+
 	if (card->set_bias_level)
 		ret = card->set_bias_level(card, level);
 	if (ret == 0 && codec->set_bias_level)
-- 
cgit v0.10.2


From ec976d6eb021dc8f2994248c310a41540f4756bd Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Wed, 13 May 2009 22:52:24 +0100
Subject: [ARM] S3C24XX: GPIO: Move gpio functions out of <mach/hardware.h>

Move all the gpio functions out of <mach/hardware.h> as
this file is for defining the generic IO base addresses
for the kernel IO calls.

Make a new header <mach/gpio-fns.h> to take this and
include it via the chain from <linux/gpio.h> which is
what most of these files should be using (and will be
changed as soon as possible).

Note, this does make minor changes to some drivers but
should not mess up any pending merges.

CC: Richard Purdie <rpurdie@rpsys.net>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
CC: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 1f332b3..9bbea8a 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -16,6 +16,8 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/leds.h>
+#include <linux/gpio.h>
+
 #include <mach/regs-gpio.h>
 #include <mach/hardware.h>
 #include <mach/h1940-latch.h>
diff --git a/arch/arm/mach-s3c2410/include/mach/gpio-fns.h b/arch/arm/mach-s3c2410/include/mach/gpio-fns.h
new file mode 100644
index 0000000..b4f3955
--- /dev/null
+++ b/arch/arm/mach-s3c2410/include/mach/gpio-fns.h
@@ -0,0 +1,103 @@
+/* arch/arm/mach-s3c2410/include/mach/gpio-fns.h
+ *
+ * Copyright (c) 2003,2009 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * S3C2410 - hardware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/* These functions are in the to-be-removed category and it is strongly
+ * encouraged not to use these in new code. They will be marked deprecated
+ * very soon.
+ *
+ * Most of the functionality can be either replaced by the gpiocfg calls
+ * for the s3c platform or by the generic GPIOlib API.
+*/
+
+/* external functions for GPIO support
+ *
+ * These allow various different clients to access the same GPIO
+ * registers without conflicting. If your driver only owns the entire
+ * GPIO register, then it is safe to ioremap/__raw_{read|write} to it.
+*/
+
+/* s3c2410_gpio_cfgpin
+ *
+ * set the configuration of the given pin to the value passed.
+ *
+ * eg:
+ *    s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0);
+ *    s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1);
+*/
+
+extern void s3c2410_gpio_cfgpin(unsigned int pin, unsigned int function);
+
+extern unsigned int s3c2410_gpio_getcfg(unsigned int pin);
+
+/* s3c2410_gpio_getirq
+ *
+ * turn the given pin number into the corresponding IRQ number
+ *
+ * returns:
+ *	< 0 = no interrupt for this pin
+ *	>=0 = interrupt number for the pin
+*/
+
+extern int s3c2410_gpio_getirq(unsigned int pin);
+
+#ifdef CONFIG_CPU_S3C2400
+
+extern int s3c2400_gpio_getirq(unsigned int pin);
+
+#endif /* CONFIG_CPU_S3C2400 */
+
+/* s3c2410_gpio_irqfilter
+ *
+ * set the irq filtering on the given pin
+ *
+ * on = 0 => disable filtering
+ *      1 => enable filtering
+ *
+ * config = S3C2410_EINTFLT_PCLK or S3C2410_EINTFLT_EXTCLK orred with
+ *          width of filter (0 through 63)
+ *
+ *
+*/
+
+extern int s3c2410_gpio_irqfilter(unsigned int pin, unsigned int on,
+				  unsigned int config);
+
+/* s3c2410_gpio_pullup
+ *
+ * configure the pull-up control on the given pin
+ *
+ * to = 1 => disable the pull-up
+ *      0 => enable the pull-up
+ *
+ * eg;
+ *
+ *   s3c2410_gpio_pullup(S3C2410_GPB0, 0);
+ *   s3c2410_gpio_pullup(S3C2410_GPE8, 0);
+*/
+
+extern void s3c2410_gpio_pullup(unsigned int pin, unsigned int to);
+
+/* s3c2410_gpio_getpull
+ *
+ * Read the state of the pull-up on a given pin
+ *
+ * return:
+ *	< 0 => error code
+ *	  0 => enabled
+ *	  1 => disabled
+*/
+
+extern int s3c2410_gpio_getpull(unsigned int pin);
+
+extern void s3c2410_gpio_setpin(unsigned int pin, unsigned int to);
+
+extern unsigned int s3c2410_gpio_getpin(unsigned int pin);
diff --git a/arch/arm/mach-s3c2410/include/mach/gpio.h b/arch/arm/mach-s3c2410/include/mach/gpio.h
index 51a88cf..15f0b3e 100644
--- a/arch/arm/mach-s3c2410/include/mach/gpio.h
+++ b/arch/arm/mach-s3c2410/include/mach/gpio.h
@@ -24,5 +24,6 @@
 
 #include <asm-generic/gpio.h>
 #include <mach/gpio-nrs.h>
+#include <mach/gpio-fns.h>
 
 #define S3C_GPIO_END	(S3C2410_GPIO_BANKH + 32)
diff --git a/arch/arm/mach-s3c2410/include/mach/hardware.h b/arch/arm/mach-s3c2410/include/mach/hardware.h
index d7745d8..aef5631 100644
--- a/arch/arm/mach-s3c2410/include/mach/hardware.h
+++ b/arch/arm/mach-s3c2410/include/mach/hardware.h
@@ -15,90 +15,6 @@
 
 #ifndef __ASSEMBLY__
 
-/* external functions for GPIO support
- *
- * These allow various different clients to access the same GPIO
- * registers without conflicting. If your driver only owns the entire
- * GPIO register, then it is safe to ioremap/__raw_{read|write} to it.
-*/
-
-/* s3c2410_gpio_cfgpin
- *
- * set the configuration of the given pin to the value passed.
- *
- * eg:
- *    s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0);
- *    s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1);
-*/
-
-extern void s3c2410_gpio_cfgpin(unsigned int pin, unsigned int function);
-
-extern unsigned int s3c2410_gpio_getcfg(unsigned int pin);
-
-/* s3c2410_gpio_getirq
- *
- * turn the given pin number into the corresponding IRQ number
- *
- * returns:
- *	< 0 = no interrupt for this pin
- *	>=0 = interrupt number for the pin
-*/
-
-extern int s3c2410_gpio_getirq(unsigned int pin);
-
-#ifdef CONFIG_CPU_S3C2400
-
-extern int s3c2400_gpio_getirq(unsigned int pin);
-
-#endif /* CONFIG_CPU_S3C2400 */
-
-/* s3c2410_gpio_irqfilter
- *
- * set the irq filtering on the given pin
- *
- * on = 0 => disable filtering
- *      1 => enable filtering
- *
- * config = S3C2410_EINTFLT_PCLK or S3C2410_EINTFLT_EXTCLK orred with
- *          width of filter (0 through 63)
- *
- *
-*/
-
-extern int s3c2410_gpio_irqfilter(unsigned int pin, unsigned int on,
-				  unsigned int config);
-
-/* s3c2410_gpio_pullup
- *
- * configure the pull-up control on the given pin
- *
- * to = 1 => disable the pull-up
- *      0 => enable the pull-up
- *
- * eg;
- *
- *   s3c2410_gpio_pullup(S3C2410_GPB0, 0);
- *   s3c2410_gpio_pullup(S3C2410_GPE8, 0);
-*/
-
-extern void s3c2410_gpio_pullup(unsigned int pin, unsigned int to);
-
-/* s3c2410_gpio_getpull
- *
- * Read the state of the pull-up on a given pin
- *
- * return:
- *	< 0 => error code
- *	  0 => enabled
- *	  1 => disabled
-*/
-
-extern int s3c2410_gpio_getpull(unsigned int pin);
-
-extern void s3c2410_gpio_setpin(unsigned int pin, unsigned int to);
-
-extern unsigned int s3c2410_gpio_getpin(unsigned int pin);
-
 extern unsigned int s3c2410_modify_misccr(unsigned int clr, unsigned int chg);
 
 #ifdef CONFIG_CPU_S3C2440
diff --git a/arch/arm/mach-s3c2410/mach-amlm5900.c b/arch/arm/mach-s3c2410/mach-amlm5900.c
index 79428ed..43f7536 100644
--- a/arch/arm/mach-s3c2410/mach-amlm5900.c
+++ b/arch/arm/mach-s3c2410/mach-amlm5900.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/proc_fs.h>
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 10ba91b..3410cae 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -16,6 +16,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/sysdev.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c
index 2b83f87..8370503 100644
--- a/arch/arm/mach-s3c2410/mach-n30.c
+++ b/arch/arm/mach-s3c2410/mach-n30.c
@@ -19,6 +19,7 @@
 
 #include <linux/gpio_keys.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c
index 9f1ba9b..7520aee 100644
--- a/arch/arm/mach-s3c2410/mach-qt2410.c
+++ b/arch/arm/mach-s3c2410/mach-qt2410.c
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/sysdev.h>
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c
index 7e002f9..0507a7e 100644
--- a/arch/arm/mach-s3c2410/mach-vr1000.c
+++ b/arch/arm/mach-s3c2410/mach-vr1000.c
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/dm9000.h>
 #include <linux/i2c.h>
 
diff --git a/arch/arm/mach-s3c2410/pm.c b/arch/arm/mach-s3c2410/pm.c
index 87fc481..726cfc0 100644
--- a/arch/arm/mach-s3c2410/pm.c
+++ b/arch/arm/mach-s3c2410/pm.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/sysdev.h>
+#include <linux/gpio.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index b209749b..506252b 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -18,6 +18,7 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
+#include <linux/gpio.h>
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/device.h>
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index 8f0d37d..56d12b5 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -16,6 +16,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/sysdev.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
diff --git a/arch/arm/mach-s3c2412/mach-smdk2413.c b/arch/arm/mach-s3c2412/mach-smdk2413.c
index eba66aa..753aaed 100644
--- a/arch/arm/mach-s3c2412/mach-smdk2413.c
+++ b/arch/arm/mach-s3c2412/mach-smdk2413.c
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
diff --git a/arch/arm/mach-s3c2440/mach-anubis.c b/arch/arm/mach-s3c2440/mach-anubis.c
index 9c6abf9..d363b6f 100644
--- a/arch/arm/mach-s3c2440/mach-anubis.c
+++ b/arch/arm/mach-s3c2440/mach-anubis.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
diff --git a/arch/arm/mach-s3c2440/mach-nexcoder.c b/arch/arm/mach-s3c2440/mach-nexcoder.c
index 3d4c61e..4b2088c 100644
--- a/arch/arm/mach-s3c2440/mach-nexcoder.c
+++ b/arch/arm/mach-s3c2440/mach-nexcoder.c
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/string.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index e05f955..ad0fdc4 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/device.h>
 #include <linux/sysdev.h>
 #include <linux/serial_core.h>
diff --git a/arch/arm/plat-s3c24xx/common-smdk.c b/arch/arm/plat-s3c24xx/common-smdk.c
index ef4c8fd..fe658c4 100644
--- a/arch/arm/plat-s3c24xx/common-smdk.c
+++ b/arch/arm/plat-s3c24xx/common-smdk.c
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/sysdev.h>
 #include <linux/platform_device.h>
 
diff --git a/arch/arm/plat-s3c24xx/pm.c b/arch/arm/plat-s3c24xx/pm.c
index 5135c40..16aaf36 100644
--- a/arch/arm/plat-s3c24xx/pm.c
+++ b/arch/arm/plat-s3c24xx/pm.c
@@ -30,6 +30,7 @@
 #include <linux/suspend.h>
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/serial_core.h>
 #include <linux/io.h>
diff --git a/arch/arm/plat-s3c24xx/setup-i2c.c b/arch/arm/plat-s3c24xx/setup-i2c.c
index d62b7e7..e0d1853 100644
--- a/arch/arm/plat-s3c24xx/setup-i2c.c
+++ b/arch/arm/plat-s3c24xx/setup-i2c.c
@@ -11,6 +11,7 @@
 */
 
 #include <linux/kernel.h>
+#include <linux/gpio.h>
 
 struct platform_device;
 
diff --git a/drivers/leds/leds-h1940.c b/drivers/leds/leds-h1940.c
index 1aa46a3..173d104 100644
--- a/drivers/leds/leds-h1940.c
+++ b/drivers/leds/leds-h1940.c
@@ -16,6 +16,8 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/leds.h>
+#include <linux/gpio.h>
+
 #include <mach/regs-gpio.h>
 #include <mach/hardware.h>
 #include <mach/h1940-latch.h>
diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c
index aa2e7ae..aa7acf3 100644
--- a/drivers/leds/leds-s3c24xx.c
+++ b/drivers/leds/leds-s3c24xx.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/leds.h>
+#include <linux/gpio.h>
 
 #include <mach/hardware.h>
 #include <mach/regs-gpio.h>
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 2db166b..2e7da8e 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -17,6 +17,7 @@
 #include <linux/mmc/host.h>
 #include <linux/platform_device.h>
 #include <linux/cpufreq.h>
+#include <linux/gpio.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 
@@ -1121,7 +1122,7 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	case MMC_POWER_OFF:
 	default:
 		s3c2410_gpio_setpin(S3C2410_GPE5, 0);
-		s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPE5_OUTP);
+		s3c2410_gpio_cfgpin(S3C2410_GPE5, S3C2410_GPIO_OUTPUT);
 
 		if (host->is2440)
 			mci_con |= S3C2440_SDICON_SDRESET;
diff --git a/drivers/spi/spi_s3c24xx_gpio.c b/drivers/spi/spi_s3c24xx_gpio.c
index f2447a5..bbf9371 100644
--- a/drivers/spi/spi_s3c24xx_gpio.c
+++ b/drivers/spi/spi_s3c24xx_gpio.c
@@ -17,6 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c
index b7e0b3f..c35b74b 100644
--- a/sound/soc/s3c24xx/s3c2412-i2s.c
+++ b/sound/soc/s3c24xx/s3c2412-i2s.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c
index 3698f70..3f03d5d 100644
--- a/sound/soc/s3c24xx/s3c2443-ac97.c
+++ b/sound/soc/s3c24xx/s3c2443-ac97.c
@@ -19,6 +19,7 @@
 #include <linux/io.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
 #include <linux/clk.h>
 
 #include <sound/core.h>
diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c
index cc06696..556e35f 100644
--- a/sound/soc/s3c24xx/s3c24xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c24xx-i2s.c
@@ -21,6 +21,8 @@
 #include <linux/clk.h>
 #include <linux/jiffies.h>
 #include <linux/io.h>
+#include <linux/gpio.h>
+
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
-- 
cgit v0.10.2


From 86c03c526e2b282846c09509a48ab8be68fe7168 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 22:47:07 +0100
Subject: [ARM] S3C24XX: Fix missing <linux/sysdev.h>

In our recent changes, arch/arm/plat-s3c24xx/gpiolib.c needs
to have <linux/sysdev.h> included for it to build.

This fixes the following error/warnings:

arch/arm/plat-s3c/include/plat/pm.h:104: error: expected declaration specifiers or '...' before 'pm_message_t'
arch/arm/plat-s3c/include/plat/pm.h:104: warning: 'struct sys_device' declared inside parameter list
arch/arm/plat-s3c/include/plat/pm.h:104: warning: its scope is only this definition or declaration, which is probably not what you want
arch/arm/plat-s3c/include/plat/pm.h:105: warning: 'struct sys_device' declared inside parameter list

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index 4bac12d..3cbec6d 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
-- 
cgit v0.10.2


From 9c7099ca7519268f6ec79782bc06faa27a714d95 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 22:08:32 +0100
Subject: [ARM] S3C24XX: GPIO: Clean out unused definitions in
 <mach/regs-gpio.h>

The <mach/regs-gpio.h> really does not need the input and output
pin configurations as these are standard and have a generic
representation (plus the s3c24xx gpio specific code is going to
be phased out soon).

The following sed was applied to remove the lines:

    sed -i~ -e '/S3C2410_GP[A-Z][0-9]*_\INP/d' \
	    -e '/S3C2410_GP[A-Z][0-9]*_\OUTP/d' \
	    -e '/S3C2410_GPA[0-9]*_OUT/d'

to remove these.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/regs-gpio.h b/arch/arm/mach-s3c2410/include/mach/regs-gpio.h
index 35a03df..90cefeb 100644
--- a/arch/arm/mach-s3c2410/include/mach/regs-gpio.h
+++ b/arch/arm/mach-s3c2410/include/mach/regs-gpio.h
@@ -70,103 +70,80 @@
 #define S3C2400_GPADAT	   S3C2410_GPIOREG(0x04)
 
 #define S3C2410_GPA0         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 0)
-#define S3C2410_GPA0_OUT     (0<<0)
 #define S3C2410_GPA0_ADDR0   (1<<0)
 
 #define S3C2410_GPA1         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 1)
-#define S3C2410_GPA1_OUT     (0<<1)
 #define S3C2410_GPA1_ADDR16  (1<<1)
 
 #define S3C2410_GPA2         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 2)
-#define S3C2410_GPA2_OUT     (0<<2)
 #define S3C2410_GPA2_ADDR17  (1<<2)
 
 #define S3C2410_GPA3         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 3)
-#define S3C2410_GPA3_OUT     (0<<3)
 #define S3C2410_GPA3_ADDR18  (1<<3)
 
 #define S3C2410_GPA4         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 4)
-#define S3C2410_GPA4_OUT     (0<<4)
 #define S3C2410_GPA4_ADDR19  (1<<4)
 
 #define S3C2410_GPA5         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 5)
-#define S3C2410_GPA5_OUT     (0<<5)
 #define S3C2410_GPA5_ADDR20  (1<<5)
 
 #define S3C2410_GPA6         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 6)
-#define S3C2410_GPA6_OUT     (0<<6)
 #define S3C2410_GPA6_ADDR21  (1<<6)
 
 #define S3C2410_GPA7         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 7)
-#define S3C2410_GPA7_OUT     (0<<7)
 #define S3C2410_GPA7_ADDR22  (1<<7)
 
 #define S3C2410_GPA8         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 8)
-#define S3C2410_GPA8_OUT     (0<<8)
 #define S3C2410_GPA8_ADDR23  (1<<8)
 
 #define S3C2410_GPA9         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 9)
-#define S3C2410_GPA9_OUT     (0<<9)
 #define S3C2410_GPA9_ADDR24  (1<<9)
 
 #define S3C2410_GPA10        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 10)
-#define S3C2410_GPA10_OUT    (0<<10)
 #define S3C2410_GPA10_ADDR25 (1<<10)
 #define S3C2400_GPA10_SCKE   (1<<10)
 
 #define S3C2410_GPA11        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 11)
-#define S3C2410_GPA11_OUT    (0<<11)
 #define S3C2410_GPA11_ADDR26 (1<<11)
 #define S3C2400_GPA11_nCAS0  (1<<11)
 
 #define S3C2410_GPA12        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 12)
-#define S3C2410_GPA12_OUT    (0<<12)
 #define S3C2410_GPA12_nGCS1  (1<<12)
 #define S3C2400_GPA12_nCAS1  (1<<12)
 
 #define S3C2410_GPA13        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 13)
-#define S3C2410_GPA13_OUT    (0<<13)
 #define S3C2410_GPA13_nGCS2  (1<<13)
 #define S3C2400_GPA13_nGCS1  (1<<13)
 
 #define S3C2410_GPA14        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 14)
-#define S3C2410_GPA14_OUT    (0<<14)
 #define S3C2410_GPA14_nGCS3  (1<<14)
 #define S3C2400_GPA14_nGCS2  (1<<14)
 
 #define S3C2410_GPA15        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 15)
-#define S3C2410_GPA15_OUT    (0<<15)
 #define S3C2410_GPA15_nGCS4  (1<<15)
 #define S3C2400_GPA15_nGCS3  (1<<15)
 
 #define S3C2410_GPA16        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 16)
-#define S3C2410_GPA16_OUT    (0<<16)
 #define S3C2410_GPA16_nGCS5  (1<<16)
 #define S3C2400_GPA16_nGCS4  (1<<16)
 
 #define S3C2410_GPA17        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 17)
-#define S3C2410_GPA17_OUT    (0<<17)
 #define S3C2410_GPA17_CLE    (1<<17)
 #define S3C2400_GPA17_nGCS5  (1<<17)
 
 #define S3C2410_GPA18        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 18)
-#define S3C2410_GPA18_OUT    (0<<18)
 #define S3C2410_GPA18_ALE    (1<<18)
 
 #define S3C2410_GPA19        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 19)
-#define S3C2410_GPA19_OUT    (0<<19)
 #define S3C2410_GPA19_nFWE   (1<<19)
 
 #define S3C2410_GPA20        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 20)
-#define S3C2410_GPA20_OUT    (0<<20)
 #define S3C2410_GPA20_nFRE   (1<<20)
 
 #define S3C2410_GPA21        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 21)
-#define S3C2410_GPA21_OUT    (0<<21)
 #define S3C2410_GPA21_nRSTOUT (1<<21)
 
 #define S3C2410_GPA22        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 22)
-#define S3C2410_GPA22_OUT    (0<<22)
 #define S3C2410_GPA22_nFCE   (1<<22)
 
 /* 0x08 and 0x0c are reserved on S3C2410 */
@@ -195,34 +172,24 @@
 /* no i/o pin in port b can have value 3 (unless it is a s3c2443) ! */
 
 #define S3C2410_GPB0         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 0)
-#define S3C2410_GPB0_INP     (0x00 << 0)
-#define S3C2410_GPB0_OUTP    (0x01 << 0)
 #define S3C2410_GPB0_TOUT0   (0x02 << 0)
 #define S3C2400_GPB0_DATA16  (0x02 << 0)
 
 #define S3C2410_GPB1         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 1)
-#define S3C2410_GPB1_INP     (0x00 << 2)
-#define S3C2410_GPB1_OUTP    (0x01 << 2)
 #define S3C2410_GPB1_TOUT1   (0x02 << 2)
 #define S3C2400_GPB1_DATA17  (0x02 << 2)
 
 #define S3C2410_GPB2         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 2)
-#define S3C2410_GPB2_INP     (0x00 << 4)
-#define S3C2410_GPB2_OUTP    (0x01 << 4)
 #define S3C2410_GPB2_TOUT2   (0x02 << 4)
 #define S3C2400_GPB2_DATA18  (0x02 << 4)
 #define S3C2400_GPB2_TCLK1   (0x03 << 4)
 
 #define S3C2410_GPB3         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 3)
-#define S3C2410_GPB3_INP     (0x00 << 6)
-#define S3C2410_GPB3_OUTP    (0x01 << 6)
 #define S3C2410_GPB3_TOUT3   (0x02 << 6)
 #define S3C2400_GPB3_DATA19  (0x02 << 6)
 #define S3C2400_GPB3_TXD1    (0x03 << 6)
 
 #define S3C2410_GPB4         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 4)
-#define S3C2410_GPB4_INP     (0x00 << 8)
-#define S3C2410_GPB4_OUTP    (0x01 << 8)
 #define S3C2410_GPB4_TCLK0   (0x02 << 8)
 #define S3C2400_GPB4_DATA20  (0x02 << 8)
 #define S3C2410_GPB4_MASK    (0x03 << 8)
@@ -230,45 +197,33 @@
 #define S3C2400_GPB4_MASK    (0x03 << 8)
 
 #define S3C2410_GPB5         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 5)
-#define S3C2410_GPB5_INP     (0x00 << 10)
-#define S3C2410_GPB5_OUTP    (0x01 << 10)
 #define S3C2410_GPB5_nXBACK  (0x02 << 10)
 #define S3C2443_GPB5_XBACK   (0x03 << 10)
 #define S3C2400_GPB5_DATA21  (0x02 << 10)
 #define S3C2400_GPB5_nCTS1   (0x03 << 10)
 
 #define S3C2410_GPB6         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 6)
-#define S3C2410_GPB6_INP     (0x00 << 12)
-#define S3C2410_GPB6_OUTP    (0x01 << 12)
 #define S3C2410_GPB6_nXBREQ  (0x02 << 12)
 #define S3C2443_GPB6_XBREQ   (0x03 << 12)
 #define S3C2400_GPB6_DATA22  (0x02 << 12)
 #define S3C2400_GPB6_nRTS1   (0x03 << 12)
 
 #define S3C2410_GPB7         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 7)
-#define S3C2410_GPB7_INP     (0x00 << 14)
-#define S3C2410_GPB7_OUTP    (0x01 << 14)
 #define S3C2410_GPB7_nXDACK1 (0x02 << 14)
 #define S3C2443_GPB7_XDACK1  (0x03 << 14)
 #define S3C2400_GPB7_DATA23  (0x02 << 14)
 
 #define S3C2410_GPB8         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 8)
-#define S3C2410_GPB8_INP     (0x00 << 16)
-#define S3C2410_GPB8_OUTP    (0x01 << 16)
 #define S3C2410_GPB8_nXDREQ1 (0x02 << 16)
 #define S3C2400_GPB8_DATA24  (0x02 << 16)
 
 #define S3C2410_GPB9         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 9)
-#define S3C2410_GPB9_INP     (0x00 << 18)
-#define S3C2410_GPB9_OUTP    (0x01 << 18)
 #define S3C2410_GPB9_nXDACK0 (0x02 << 18)
 #define S3C2443_GPB9_XDACK0  (0x03 << 18)
 #define S3C2400_GPB9_DATA25  (0x02 << 18)
 #define S3C2400_GPB9_I2SSDI  (0x03 << 18)
 
 #define S3C2410_GPB10        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 10)
-#define S3C2410_GPB10_INP    (0x00 << 20)
-#define S3C2410_GPB10_OUTP   (0x01 << 20)
 #define S3C2410_GPB10_nXDRE0 (0x02 << 20)
 #define S3C2443_GPB10_XDREQ0 (0x03 << 20)
 #define S3C2400_GPB10_DATA26 (0x02 << 20)
@@ -316,98 +271,66 @@
 #define S3C2400_GPCUP	   S3C2410_GPIOREG(0x1C)
 
 #define S3C2410_GPC0            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 0)
-#define S3C2410_GPC0_INP	(0x00 << 0)
-#define S3C2410_GPC0_OUTP	(0x01 << 0)
 #define S3C2410_GPC0_LEND	(0x02 << 0)
 #define S3C2400_GPC0_VD0 	(0x02 << 0)
 
 #define S3C2410_GPC1            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 1)
-#define S3C2410_GPC1_INP	(0x00 << 2)
-#define S3C2410_GPC1_OUTP	(0x01 << 2)
 #define S3C2410_GPC1_VCLK	(0x02 << 2)
 #define S3C2400_GPC1_VD1 	(0x02 << 2)
 
 #define S3C2410_GPC2            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 2)
-#define S3C2410_GPC2_INP	(0x00 << 4)
-#define S3C2410_GPC2_OUTP	(0x01 << 4)
 #define S3C2410_GPC2_VLINE	(0x02 << 4)
 #define S3C2400_GPC2_VD2  	(0x02 << 4)
 
 #define S3C2410_GPC3            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 3)
-#define S3C2410_GPC3_INP	(0x00 << 6)
-#define S3C2410_GPC3_OUTP	(0x01 << 6)
 #define S3C2410_GPC3_VFRAME	(0x02 << 6)
 #define S3C2400_GPC3_VD3   	(0x02 << 6)
 
 #define S3C2410_GPC4            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 4)
-#define S3C2410_GPC4_INP	(0x00 << 8)
-#define S3C2410_GPC4_OUTP	(0x01 << 8)
 #define S3C2410_GPC4_VM		(0x02 << 8)
 #define S3C2400_GPC4_VD4	(0x02 << 8)
 
 #define S3C2410_GPC5            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 5)
-#define S3C2410_GPC5_INP	(0x00 << 10)
-#define S3C2410_GPC5_OUTP	(0x01 << 10)
 #define S3C2410_GPC5_LCDVF0	(0x02 << 10)
 #define S3C2400_GPC5_VD5   	(0x02 << 10)
 
 #define S3C2410_GPC6            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 6)
-#define S3C2410_GPC6_INP	(0x00 << 12)
-#define S3C2410_GPC6_OUTP	(0x01 << 12)
 #define S3C2410_GPC6_LCDVF1	(0x02 << 12)
 #define S3C2400_GPC6_VD6   	(0x02 << 12)
 
 #define S3C2410_GPC7            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 7)
-#define S3C2410_GPC7_INP	(0x00 << 14)
-#define S3C2410_GPC7_OUTP	(0x01 << 14)
 #define S3C2410_GPC7_LCDVF2	(0x02 << 14)
 #define S3C2400_GPC7_VD7   	(0x02 << 14)
 
 #define S3C2410_GPC8            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 8)
-#define S3C2410_GPC8_INP	(0x00 << 16)
-#define S3C2410_GPC8_OUTP	(0x01 << 16)
 #define S3C2410_GPC8_VD0	(0x02 << 16)
 #define S3C2400_GPC8_VD8	(0x02 << 16)
 
 #define S3C2410_GPC9            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 9)
-#define S3C2410_GPC9_INP	(0x00 << 18)
-#define S3C2410_GPC9_OUTP	(0x01 << 18)
 #define S3C2410_GPC9_VD1	(0x02 << 18)
 #define S3C2400_GPC9_VD9	(0x02 << 18)
 
 #define S3C2410_GPC10           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 10)
-#define S3C2410_GPC10_INP	(0x00 << 20)
-#define S3C2410_GPC10_OUTP	(0x01 << 20)
 #define S3C2410_GPC10_VD2	(0x02 << 20)
 #define S3C2400_GPC10_VD10	(0x02 << 20)
 
 #define S3C2410_GPC11           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 11)
-#define S3C2410_GPC11_INP	(0x00 << 22)
-#define S3C2410_GPC11_OUTP	(0x01 << 22)
 #define S3C2410_GPC11_VD3	(0x02 << 22)
 #define S3C2400_GPC11_VD11	(0x02 << 22)
 
 #define S3C2410_GPC12           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 12)
-#define S3C2410_GPC12_INP	(0x00 << 24)
-#define S3C2410_GPC12_OUTP	(0x01 << 24)
 #define S3C2410_GPC12_VD4	(0x02 << 24)
 #define S3C2400_GPC12_VD12	(0x02 << 24)
 
 #define S3C2410_GPC13           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 13)
-#define S3C2410_GPC13_INP	(0x00 << 26)
-#define S3C2410_GPC13_OUTP	(0x01 << 26)
 #define S3C2410_GPC13_VD5	(0x02 << 26)
 #define S3C2400_GPC13_VD13	(0x02 << 26)
 
 #define S3C2410_GPC14           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 14)
-#define S3C2410_GPC14_INP	(0x00 << 28)
-#define S3C2410_GPC14_OUTP	(0x01 << 28)
 #define S3C2410_GPC14_VD6	(0x02 << 28)
 #define S3C2400_GPC14_VD14	(0x02 << 28)
 
 #define S3C2410_GPC15           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 15)
-#define S3C2410_GPC15_INP	(0x00 << 30)
-#define S3C2410_GPC15_OUTP	(0x01 << 30)
 #define S3C2410_GPC15_VD7	(0x02 << 30)
 #define S3C2400_GPC15_VD15	(0x02 << 30)
 
@@ -433,98 +356,66 @@
 #define S3C2400_GPDUP	   S3C2410_GPIOREG(0x28)
 
 #define S3C2410_GPD0            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 0)
-#define S3C2410_GPD0_INP	(0x00 << 0)
-#define S3C2410_GPD0_OUTP	(0x01 << 0)
 #define S3C2410_GPD0_VD8	(0x02 << 0)
 #define S3C2400_GPD0_VFRAME	(0x02 << 0)
 #define S3C2442_GPD0_nSPICS1	(0x03 << 0)
 
 #define S3C2410_GPD1            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 1)
-#define S3C2410_GPD1_INP	(0x00 << 2)
-#define S3C2410_GPD1_OUTP	(0x01 << 2)
 #define S3C2410_GPD1_VD9	(0x02 << 2)
 #define S3C2400_GPD1_VM		(0x02 << 2)
 #define S3C2442_GPD1_SPICLK1	(0x03 << 2)
 
 #define S3C2410_GPD2            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 2)
-#define S3C2410_GPD2_INP	(0x00 << 4)
-#define S3C2410_GPD2_OUTP	(0x01 << 4)
 #define S3C2410_GPD2_VD10	(0x02 << 4)
 #define S3C2400_GPD2_VLINE	(0x02 << 4)
 
 #define S3C2410_GPD3            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 3)
-#define S3C2410_GPD3_INP	(0x00 << 6)
-#define S3C2410_GPD3_OUTP	(0x01 << 6)
 #define S3C2410_GPD3_VD11	(0x02 << 6)
 #define S3C2400_GPD3_VCLK	(0x02 << 6)
 
 #define S3C2410_GPD4            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 4)
-#define S3C2410_GPD4_INP	(0x00 << 8)
-#define S3C2410_GPD4_OUTP	(0x01 << 8)
 #define S3C2410_GPD4_VD12	(0x02 << 8)
 #define S3C2400_GPD4_LEND	(0x02 << 8)
 
 #define S3C2410_GPD5            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 5)
-#define S3C2410_GPD5_INP	(0x00 << 10)
-#define S3C2410_GPD5_OUTP	(0x01 << 10)
 #define S3C2410_GPD5_VD13	(0x02 << 10)
 #define S3C2400_GPD5_TOUT0	(0x02 << 10)
 
 #define S3C2410_GPD6            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 6)
-#define S3C2410_GPD6_INP	(0x00 << 12)
-#define S3C2410_GPD6_OUTP	(0x01 << 12)
 #define S3C2410_GPD6_VD14	(0x02 << 12)
 #define S3C2400_GPD6_TOUT1	(0x02 << 12)
 
 #define S3C2410_GPD7            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 7)
-#define S3C2410_GPD7_INP	(0x00 << 14)
-#define S3C2410_GPD7_OUTP	(0x01 << 14)
 #define S3C2410_GPD7_VD15	(0x02 << 14)
 #define S3C2400_GPD7_TOUT2	(0x02 << 14)
 
 #define S3C2410_GPD8            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 8)
-#define S3C2410_GPD8_INP	(0x00 << 16)
-#define S3C2410_GPD8_OUTP	(0x01 << 16)
 #define S3C2410_GPD8_VD16	(0x02 << 16)
 #define S3C2400_GPD8_TOUT3	(0x02 << 16)
 
 #define S3C2410_GPD9            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 9)
-#define S3C2410_GPD9_INP	(0x00 << 18)
-#define S3C2410_GPD9_OUTP	(0x01 << 18)
 #define S3C2410_GPD9_VD17	(0x02 << 18)
 #define S3C2400_GPD9_TCLK0	(0x02 << 18)
 #define S3C2410_GPD9_MASK       (0x03 << 18)
 
 #define S3C2410_GPD10           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 10)
-#define S3C2410_GPD10_INP	(0x00 << 20)
-#define S3C2410_GPD10_OUTP	(0x01 << 20)
 #define S3C2410_GPD10_VD18	(0x02 << 20)
 #define S3C2400_GPD10_nWAIT	(0x02 << 20)
 
 #define S3C2410_GPD11           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 11)
-#define S3C2410_GPD11_INP	(0x00 << 22)
-#define S3C2410_GPD11_OUTP	(0x01 << 22)
 #define S3C2410_GPD11_VD19	(0x02 << 22)
 
 #define S3C2410_GPD12           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 12)
-#define S3C2410_GPD12_INP	(0x00 << 24)
-#define S3C2410_GPD12_OUTP	(0x01 << 24)
 #define S3C2410_GPD12_VD20	(0x02 << 24)
 
 #define S3C2410_GPD13           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 13)
-#define S3C2410_GPD13_INP	(0x00 << 26)
-#define S3C2410_GPD13_OUTP	(0x01 << 26)
 #define S3C2410_GPD13_VD21	(0x02 << 26)
 
 #define S3C2410_GPD14           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 14)
-#define S3C2410_GPD14_INP	(0x00 << 28)
-#define S3C2410_GPD14_OUTP	(0x01 << 28)
 #define S3C2410_GPD14_VD22	(0x02 << 28)
 #define S3C2410_GPD14_nSS1	(0x03 << 28)
 
 #define S3C2410_GPD15           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 15)
-#define S3C2410_GPD15_INP	(0x00 << 30)
-#define S3C2410_GPD15_OUTP	(0x01 << 30)
 #define S3C2410_GPD15_VD23	(0x02 << 30)
 #define S3C2410_GPD15_nSS0	(0x03 << 30)
 
@@ -551,16 +442,12 @@
 #define S3C2400_GPEUP	   S3C2410_GPIOREG(0x34)
 
 #define S3C2410_GPE0           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 0)
-#define S3C2410_GPE0_INP       (0x00 << 0)
-#define S3C2410_GPE0_OUTP      (0x01 << 0)
 #define S3C2410_GPE0_I2SLRCK   (0x02 << 0)
 #define S3C2443_GPE0_AC_nRESET (0x03 << 0)
 #define S3C2400_GPE0_EINT0     (0x02 << 0)
 #define S3C2410_GPE0_MASK      (0x03 << 0)
 
 #define S3C2410_GPE1           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 1)
-#define S3C2410_GPE1_INP       (0x00 << 2)
-#define S3C2410_GPE1_OUTP      (0x01 << 2)
 #define S3C2410_GPE1_I2SSCLK   (0x02 << 2)
 #define S3C2443_GPE1_AC_SYNC   (0x03 << 2)
 #define S3C2400_GPE1_EINT1     (0x02 << 2)
@@ -568,16 +455,12 @@
 #define S3C2410_GPE1_MASK      (0x03 << 2)
 
 #define S3C2410_GPE2           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 2)
-#define S3C2410_GPE2_INP       (0x00 << 4)
-#define S3C2410_GPE2_OUTP      (0x01 << 4)
 #define S3C2410_GPE2_CDCLK     (0x02 << 4)
 #define S3C2443_GPE2_AC_BITCLK (0x03 << 4)
 #define S3C2400_GPE2_EINT2     (0x02 << 4)
 #define S3C2400_GPE2_I2SSDI    (0x03 << 4)
 
 #define S3C2410_GPE3           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 3)
-#define S3C2410_GPE3_INP       (0x00 << 6)
-#define S3C2410_GPE3_OUTP      (0x01 << 6)
 #define S3C2410_GPE3_I2SSDI    (0x02 << 6)
 #define S3C2443_GPE3_AC_SDI    (0x03 << 6)
 #define S3C2400_GPE3_EINT3     (0x02 << 6)
@@ -586,8 +469,6 @@
 #define S3C2410_GPE3_MASK      (0x03 << 6)
 
 #define S3C2410_GPE4           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 4)
-#define S3C2410_GPE4_INP       (0x00 << 8)
-#define S3C2410_GPE4_OUTP      (0x01 << 8)
 #define S3C2410_GPE4_I2SSDO    (0x02 << 8)
 #define S3C2443_GPE4_AC_SDO    (0x03 << 8)
 #define S3C2400_GPE4_EINT4     (0x02 << 8)
@@ -596,40 +477,30 @@
 #define S3C2410_GPE4_MASK      (0x03 << 8)
 
 #define S3C2410_GPE5           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 5)
-#define S3C2410_GPE5_INP       (0x00 << 10)
-#define S3C2410_GPE5_OUTP      (0x01 << 10)
 #define S3C2410_GPE5_SDCLK     (0x02 << 10)
 #define S3C2443_GPE5_SD1_CLK   (0x02 << 10)
 #define S3C2400_GPE5_EINT5     (0x02 << 10)
 #define S3C2400_GPE5_TCLK1     (0x03 << 10)
 
 #define S3C2410_GPE6           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 6)
-#define S3C2410_GPE6_INP       (0x00 << 12)
-#define S3C2410_GPE6_OUTP      (0x01 << 12)
 #define S3C2410_GPE6_SDCMD     (0x02 << 12)
 #define S3C2443_GPE6_SD1_CMD   (0x02 << 12)
 #define S3C2443_GPE6_AC_BITCLK (0x03 << 12)
 #define S3C2400_GPE6_EINT6     (0x02 << 12)
 
 #define S3C2410_GPE7           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 7)
-#define S3C2410_GPE7_INP       (0x00 << 14)
-#define S3C2410_GPE7_OUTP      (0x01 << 14)
 #define S3C2410_GPE7_SDDAT0    (0x02 << 14)
 #define S3C2443_GPE5_SD1_DAT0  (0x02 << 14)
 #define S3C2443_GPE7_AC_SDI    (0x03 << 14)
 #define S3C2400_GPE7_EINT7     (0x02 << 14)
 
 #define S3C2410_GPE8           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 8)
-#define S3C2410_GPE8_INP       (0x00 << 16)
-#define S3C2410_GPE8_OUTP      (0x01 << 16)
 #define S3C2410_GPE8_SDDAT1    (0x02 << 16)
 #define S3C2443_GPE8_SD1_DAT1  (0x02 << 16)
 #define S3C2443_GPE8_AC_SDO    (0x03 << 16)
 #define S3C2400_GPE8_nXDACK0   (0x02 << 16)
 
 #define S3C2410_GPE9           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 9)
-#define S3C2410_GPE9_INP       (0x00 << 18)
-#define S3C2410_GPE9_OUTP      (0x01 << 18)
 #define S3C2410_GPE9_SDDAT2    (0x02 << 18)
 #define S3C2443_GPE9_SD1_DAT2  (0x02 << 18)
 #define S3C2443_GPE9_AC_SYNC   (0x03 << 18)
@@ -637,39 +508,27 @@
 #define S3C2400_GPE9_nXBACK    (0x03 << 18)
 
 #define S3C2410_GPE10          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 10)
-#define S3C2410_GPE10_INP      (0x00 << 20)
-#define S3C2410_GPE10_OUTP     (0x01 << 20)
 #define S3C2410_GPE10_SDDAT3   (0x02 << 20)
 #define S3C2443_GPE10_SD1_DAT3 (0x02 << 20)
 #define S3C2443_GPE10_AC_nRESET (0x03 << 20)
 #define S3C2400_GPE10_nXDREQ0  (0x02 << 20)
 
 #define S3C2410_GPE11          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 11)
-#define S3C2410_GPE11_INP      (0x00 << 22)
-#define S3C2410_GPE11_OUTP     (0x01 << 22)
 #define S3C2410_GPE11_SPIMISO0 (0x02 << 22)
 #define S3C2400_GPE11_nXDREQ1  (0x02 << 22)
 #define S3C2400_GPE11_nXBREQ   (0x03 << 22)
 
 #define S3C2410_GPE12          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 12)
-#define S3C2410_GPE12_INP      (0x00 << 24)
-#define S3C2410_GPE12_OUTP     (0x01 << 24)
 #define S3C2410_GPE12_SPIMOSI0 (0x02 << 24)
 
 #define S3C2410_GPE13          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 13)
-#define S3C2410_GPE13_INP      (0x00 << 26)
-#define S3C2410_GPE13_OUTP     (0x01 << 26)
 #define S3C2410_GPE13_SPICLK0  (0x02 << 26)
 
 #define S3C2410_GPE14          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 14)
-#define S3C2410_GPE14_INP      (0x00 << 28)
-#define S3C2410_GPE14_OUTP     (0x01 << 28)
 #define S3C2410_GPE14_IICSCL   (0x02 << 28)
 #define S3C2410_GPE14_MASK     (0x03 << 28)
 
 #define S3C2410_GPE15          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 15)
-#define S3C2410_GPE15_INP      (0x00 << 30)
-#define S3C2410_GPE15_OUTP     (0x01 << 30)
 #define S3C2410_GPE15_IICSDA   (0x02 << 30)
 #define S3C2410_GPE15_MASK     (0x03 << 30)
 
@@ -706,54 +565,38 @@
 #define S3C2400_GPFUP	   S3C2410_GPIOREG(0x40)
 
 #define S3C2410_GPF0        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 0)
-#define S3C2410_GPF0_INP    (0x00 << 0)
-#define S3C2410_GPF0_OUTP   (0x01 << 0)
 #define S3C2410_GPF0_EINT0  (0x02 << 0)
 #define S3C2400_GPF0_RXD0   (0x02 << 0)
 
 #define S3C2410_GPF1        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 1)
-#define S3C2410_GPF1_INP    (0x00 << 2)
-#define S3C2410_GPF1_OUTP   (0x01 << 2)
 #define S3C2410_GPF1_EINT1  (0x02 << 2)
 #define S3C2400_GPF1_RXD1   (0x02 << 2)
 #define S3C2400_GPF1_IICSDA (0x03 << 2)
 
 #define S3C2410_GPF2        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 2)
-#define S3C2410_GPF2_INP    (0x00 << 4)
-#define S3C2410_GPF2_OUTP   (0x01 << 4)
 #define S3C2410_GPF2_EINT2  (0x02 << 4)
 #define S3C2400_GPF2_TXD0   (0x02 << 4)
 
 #define S3C2410_GPF3        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 3)
-#define S3C2410_GPF3_INP    (0x00 << 6)
-#define S3C2410_GPF3_OUTP   (0x01 << 6)
 #define S3C2410_GPF3_EINT3  (0x02 << 6)
 #define S3C2400_GPF3_TXD1   (0x02 << 6)
 #define S3C2400_GPF3_IICSCL (0x03 << 6)
 
 #define S3C2410_GPF4        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 4)
-#define S3C2410_GPF4_INP    (0x00 << 8)
-#define S3C2410_GPF4_OUTP   (0x01 << 8)
 #define S3C2410_GPF4_EINT4  (0x02 << 8)
 #define S3C2400_GPF4_nRTS0  (0x02 << 8)
 #define S3C2400_GPF4_nXBACK (0x03 << 8)
 
 #define S3C2410_GPF5        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 5)
-#define S3C2410_GPF5_INP    (0x00 << 10)
-#define S3C2410_GPF5_OUTP   (0x01 << 10)
 #define S3C2410_GPF5_EINT5  (0x02 << 10)
 #define S3C2400_GPF5_nCTS0  (0x02 << 10)
 #define S3C2400_GPF5_nXBREQ (0x03 << 10)
 
 #define S3C2410_GPF6        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 6)
-#define S3C2410_GPF6_INP    (0x00 << 12)
-#define S3C2410_GPF6_OUTP   (0x01 << 12)
 #define S3C2410_GPF6_EINT6  (0x02 << 12)
 #define S3C2400_GPF6_CLKOUT (0x02 << 12)
 
 #define S3C2410_GPF7        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 7)
-#define S3C2410_GPF7_INP    (0x00 << 14)
-#define S3C2410_GPF7_OUTP   (0x01 << 14)
 #define S3C2410_GPF7_EINT7  (0x02 << 14)
 
 #define S3C2410_GPF_PUPDIS(x)  (1<<(x))
@@ -779,35 +622,25 @@
 #define S3C2400_GPGUP	   S3C2410_GPIOREG(0x4C)
 
 #define S3C2410_GPG0          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 0)
-#define S3C2410_GPG0_INP      (0x00 << 0)
-#define S3C2410_GPG0_OUTP     (0x01 << 0)
 #define S3C2410_GPG0_EINT8    (0x02 << 0)
 #define S3C2400_GPG0_I2SLRCK  (0x02 << 0)
 
 #define S3C2410_GPG1          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 1)
-#define S3C2410_GPG1_INP      (0x00 << 2)
-#define S3C2410_GPG1_OUTP     (0x01 << 2)
 #define S3C2410_GPG1_EINT9    (0x02 << 2)
 #define S3C2400_GPG1_I2SSCLK  (0x02 << 2)
 
 #define S3C2410_GPG2          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 2)
-#define S3C2410_GPG2_INP      (0x00 << 4)
-#define S3C2410_GPG2_OUTP     (0x01 << 4)
 #define S3C2410_GPG2_EINT10   (0x02 << 4)
 #define S3C2410_GPG2_nSS0     (0x03 << 4)
 #define S3C2400_GPG2_CDCLK    (0x02 << 4)
 
 #define S3C2410_GPG3          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 3)
-#define S3C2410_GPG3_INP      (0x00 << 6)
-#define S3C2410_GPG3_OUTP     (0x01 << 6)
 #define S3C2410_GPG3_EINT11   (0x02 << 6)
 #define S3C2410_GPG3_nSS1     (0x03 << 6)
 #define S3C2400_GPG3_I2SSDO   (0x02 << 6)
 #define S3C2400_GPG3_I2SSDI   (0x03 << 6)
 
 #define S3C2410_GPG4          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 4)
-#define S3C2410_GPG4_INP      (0x00 << 8)
-#define S3C2410_GPG4_OUTP     (0x01 << 8)
 #define S3C2410_GPG4_EINT12   (0x02 << 8)
 #define S3C2400_GPG4_MMCCLK   (0x02 << 8)
 #define S3C2400_GPG4_I2SSDI   (0x03 << 8)
@@ -815,80 +648,58 @@
 #define S3C2443_GPG4_LCDPWRDN (0x03 << 8)
 
 #define S3C2410_GPG5          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 5)
-#define S3C2410_GPG5_INP      (0x00 << 10)
-#define S3C2410_GPG5_OUTP     (0x01 << 10)
 #define S3C2410_GPG5_EINT13   (0x02 << 10)
 #define S3C2400_GPG5_MMCCMD   (0x02 << 10)
 #define S3C2400_GPG5_IICSDA   (0x03 << 10)
 #define S3C2410_GPG5_SPIMISO1 (0x03 << 10)	/* not s3c2443 */
 
 #define S3C2410_GPG6          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 6)
-#define S3C2410_GPG6_INP      (0x00 << 12)
-#define S3C2410_GPG6_OUTP     (0x01 << 12)
 #define S3C2410_GPG6_EINT14   (0x02 << 12)
 #define S3C2400_GPG6_MMCDAT   (0x02 << 12)
 #define S3C2400_GPG6_IICSCL   (0x03 << 12)
 #define S3C2410_GPG6_SPIMOSI1 (0x03 << 12)
 
 #define S3C2410_GPG7          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 7)
-#define S3C2410_GPG7_INP      (0x00 << 14)
-#define S3C2410_GPG7_OUTP     (0x01 << 14)
 #define S3C2410_GPG7_EINT15   (0x02 << 14)
 #define S3C2410_GPG7_SPICLK1  (0x03 << 14)
 #define S3C2400_GPG7_SPIMISO  (0x02 << 14)
 #define S3C2400_GPG7_IICSDA   (0x03 << 14)
 
 #define S3C2410_GPG8          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 8)
-#define S3C2410_GPG8_INP      (0x00 << 16)
-#define S3C2410_GPG8_OUTP     (0x01 << 16)
 #define S3C2410_GPG8_EINT16   (0x02 << 16)
 #define S3C2400_GPG8_SPIMOSI  (0x02 << 16)
 #define S3C2400_GPG8_IICSCL   (0x03 << 16)
 
 #define S3C2410_GPG9          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 9)
-#define S3C2410_GPG9_INP      (0x00 << 18)
-#define S3C2410_GPG9_OUTP     (0x01 << 18)
 #define S3C2410_GPG9_EINT17   (0x02 << 18)
 #define S3C2400_GPG9_SPICLK   (0x02 << 18)
 #define S3C2400_GPG9_MMCCLK   (0x03 << 18)
 
 #define S3C2410_GPG10         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 10)
-#define S3C2410_GPG10_INP     (0x00 << 20)
-#define S3C2410_GPG10_OUTP    (0x01 << 20)
 #define S3C2410_GPG10_EINT18  (0x02 << 20)
 
 #define S3C2410_GPG11         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 11)
-#define S3C2410_GPG11_INP     (0x00 << 22)
-#define S3C2410_GPG11_OUTP    (0x01 << 22)
 #define S3C2410_GPG11_EINT19  (0x02 << 22)
 #define S3C2410_GPG11_TCLK1   (0x03 << 22)
 #define S3C2443_GPG11_CF_nIREQ (0x03 << 22)
 
 #define S3C2410_GPG12         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 12)
-#define S3C2410_GPG12_INP     (0x00 << 24)
-#define S3C2410_GPG12_OUTP    (0x01 << 24)
 #define S3C2410_GPG12_EINT20  (0x02 << 24)
 #define S3C2410_GPG12_XMON    (0x03 << 24)
 #define S3C2442_GPG12_nSPICS0 (0x03 << 24)
 #define S3C2443_GPG12_nINPACK (0x03 << 24)
 
 #define S3C2410_GPG13         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 13)
-#define S3C2410_GPG13_INP     (0x00 << 26)
-#define S3C2410_GPG13_OUTP    (0x01 << 26)
 #define S3C2410_GPG13_EINT21  (0x02 << 26)
 #define S3C2410_GPG13_nXPON   (0x03 << 26)
 #define S3C2443_GPG13_CF_nREG (0x03 << 26)
 
 #define S3C2410_GPG14         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 14)
-#define S3C2410_GPG14_INP     (0x00 << 28)
-#define S3C2410_GPG14_OUTP    (0x01 << 28)
 #define S3C2410_GPG14_EINT22  (0x02 << 28)
 #define S3C2410_GPG14_YMON    (0x03 << 28)
 #define S3C2443_GPG14_CF_RESET (0x03 << 28)
 
 #define S3C2410_GPG15         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 15)
-#define S3C2410_GPG15_INP     (0x00 << 30)
-#define S3C2410_GPG15_OUTP    (0x01 << 30)
 #define S3C2410_GPG15_EINT23  (0x02 << 30)
 #define S3C2410_GPG15_nYPON   (0x03 << 30)
 #define S3C2443_GPG15_CF_PWR  (0x03 << 30)
@@ -908,61 +719,39 @@
 #define S3C2410_GPHUP	   S3C2410_GPIOREG(0x78)
 
 #define S3C2410_GPH0        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 0)
-#define S3C2410_GPH0_INP    (0x00 << 0)
-#define S3C2410_GPH0_OUTP   (0x01 << 0)
 #define S3C2410_GPH0_nCTS0  (0x02 << 0)
 
 #define S3C2410_GPH1        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 1)
-#define S3C2410_GPH1_INP    (0x00 << 2)
-#define S3C2410_GPH1_OUTP   (0x01 << 2)
 #define S3C2410_GPH1_nRTS0  (0x02 << 2)
 
 #define S3C2410_GPH2        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 2)
-#define S3C2410_GPH2_INP    (0x00 << 4)
-#define S3C2410_GPH2_OUTP   (0x01 << 4)
 #define S3C2410_GPH2_TXD0   (0x02 << 4)
 
 #define S3C2410_GPH3        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 3)
-#define S3C2410_GPH3_INP    (0x00 << 6)
-#define S3C2410_GPH3_OUTP   (0x01 << 6)
 #define S3C2410_GPH3_RXD0   (0x02 << 6)
 
 #define S3C2410_GPH4        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 4)
-#define S3C2410_GPH4_INP    (0x00 << 8)
-#define S3C2410_GPH4_OUTP   (0x01 << 8)
 #define S3C2410_GPH4_TXD1   (0x02 << 8)
 
 #define S3C2410_GPH5        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 5)
-#define S3C2410_GPH5_INP    (0x00 << 10)
-#define S3C2410_GPH5_OUTP   (0x01 << 10)
 #define S3C2410_GPH5_RXD1   (0x02 << 10)
 
 #define S3C2410_GPH6        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 6)
-#define S3C2410_GPH6_INP    (0x00 << 12)
-#define S3C2410_GPH6_OUTP   (0x01 << 12)
 #define S3C2410_GPH6_TXD2   (0x02 << 12)
 #define S3C2410_GPH6_nRTS1  (0x03 << 12)
 
 #define S3C2410_GPH7        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 7)
-#define S3C2410_GPH7_INP    (0x00 << 14)
-#define S3C2410_GPH7_OUTP   (0x01 << 14)
 #define S3C2410_GPH7_RXD2   (0x02 << 14)
 #define S3C2410_GPH7_nCTS1  (0x03 << 14)
 
 #define S3C2410_GPH8        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 8)
-#define S3C2410_GPH8_INP    (0x00 << 16)
-#define S3C2410_GPH8_OUTP   (0x01 << 16)
 #define S3C2410_GPH8_UCLK   (0x02 << 16)
 
 #define S3C2410_GPH9          S3C2410_GPIONO(S3C2410_GPIO_BANKH, 9)
-#define S3C2410_GPH9_INP      (0x00 << 18)
-#define S3C2410_GPH9_OUTP     (0x01 << 18)
 #define S3C2410_GPH9_CLKOUT0  (0x02 << 18)
 #define S3C2442_GPH9_nSPICS0  (0x03 << 18)
 
 #define S3C2410_GPH10         S3C2410_GPIONO(S3C2410_GPIO_BANKH, 10)
-#define S3C2410_GPH10_INP     (0x00 << 20)
-#define S3C2410_GPH10_OUTP    (0x01 << 20)
 #define S3C2410_GPH10_CLKOUT1 (0x02 << 20)
 
 /* The S3C2412 and S3C2413 move the GPJ register set to after
-- 
cgit v0.10.2


From fda7b2b097fed9f88bc93ed3de0caea87ffe778e Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 22:18:27 +0100
Subject: [ARM] S3C24XX: GPIO: Start removal of S3C24XX_GPIO_BASE

The S3C24XX_GPIO_BASE makes it difficult to compress the
GPIO number space, and is only used in a few places of
which everything outside arch/arm/plat-s3c24xx/gpiolib.c
will be removed as soon as possible.

Change gpiolib.c to use the S3C2410_GPxCON register addresses
as the base for each bank, thus eliminating S3C24XX_GPIO_BASE.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index 3cbec6d..60a9f72 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -79,7 +79,7 @@ static int s3c24xx_gpiolib_bankg_toirq(struct gpio_chip *chip, unsigned offset)
 
 struct s3c_gpio_chip s3c24xx_gpios[] = {
 	[0] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPA0),
+		.base	= S3C2410_GPACON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_1bit),
 		.chip	= {
 			.base			= S3C2410_GPA0,
@@ -91,7 +91,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		},
 	},
 	[1] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPB0),
+		.base	= S3C2410_GPBCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPB0,
@@ -101,7 +101,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		},
 	},
 	[2] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPC0),
+		.base	= S3C2410_GPCCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPC0,
@@ -111,7 +111,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		},
 	},
 	[3] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPD0),
+		.base	= S3C2410_GPDCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPD0,
@@ -121,7 +121,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		},
 	},
 	[4] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPE0),
+		.base	= S3C2410_GPECON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPE0,
@@ -131,7 +131,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		},
 	},
 	[5] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPF0),
+		.base	= S3C2410_GPFCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPF0,
@@ -142,7 +142,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		},
 	},
 	[6] = {
-		.base	= S3C24XX_GPIO_BASE(S3C2410_GPG0),
+		.base	= S3C2410_GPGCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
 			.base			= S3C2410_GPG0,
-- 
cgit v0.10.2


From 75cbcff3729fe2568dff38d16d6494f8fb7f59fe Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 22:44:33 +0100
Subject: [ARM] S3C24XX: GPIO: Add S3C64XX style GPIO numbering

Move the new style of GPIO numbering by using a single
macro for each GPIO bank. This means S3C2410_GPA0 becomes
S3C2410_GPA(0), and so on.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h b/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h
index ce1ec69..5213e45 100644
--- a/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h
+++ b/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h
@@ -11,6 +11,9 @@
  * published by the Free Software Foundation.
 */
 
+#ifndef __MACH_GPIONRS_H
+#define __MACH_GPIONRS_H
+
 #define S3C2410_GPIONO(bank,offset) ((bank) + (offset))
 
 #define S3C2410_GPIO_BANKA   (32*0)
@@ -21,3 +24,49 @@
 #define S3C2410_GPIO_BANKF   (32*5)
 #define S3C2410_GPIO_BANKG   (32*6)
 #define S3C2410_GPIO_BANKH   (32*7)
+
+/* GPIO bank sizes */
+#define S3C2410_GPIO_A_NR	(32)
+#define S3C2410_GPIO_B_NR	(32)
+#define S3C2410_GPIO_C_NR	(32)
+#define S3C2410_GPIO_D_NR	(32)
+#define S3C2410_GPIO_E_NR	(32)
+#define S3C2410_GPIO_F_NR	(32)
+#define S3C2410_GPIO_G_NR	(32)
+#define S3C2410_GPIO_H_NR	(32)
+
+#if CONFIG_S3C_GPIO_SPACE != 0
+#error CONFIG_S3C_GPIO_SPACE cannot be zero at the moment
+#endif
+
+#define S3C2410_GPIO_NEXT(__gpio) \
+	((__gpio##_START) + (__gpio##_NR) + CONFIG_S3C_GPIO_SPACE + 0)
+
+#ifndef __ASSEMBLY__
+
+enum s3c_gpio_number {
+	S3C2410_GPIO_A_START = 0,
+	S3C2410_GPIO_B_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_A),
+	S3C2410_GPIO_C_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_B),
+	S3C2410_GPIO_D_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_C),
+	S3C2410_GPIO_E_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_D),
+	S3C2410_GPIO_F_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_E),
+	S3C2410_GPIO_G_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_F),
+	S3C2410_GPIO_H_START = S3C2410_GPIO_NEXT(S3C2410_GPIO_G),
+};
+
+#endif /* __ASSEMBLY__ */
+
+/* S3C2410 GPIO number definitions. */
+
+#define S3C2410_GPA(_nr)	(S3C2410_GPIO_A_START + (_nr))
+#define S3C2410_GPB(_nr)	(S3C2410_GPIO_B_START + (_nr))
+#define S3C2410_GPC(_nr)	(S3C2410_GPIO_C_START + (_nr))
+#define S3C2410_GPD(_nr)	(S3C2410_GPIO_D_START + (_nr))
+#define S3C2410_GPE(_nr)	(S3C2410_GPIO_E_START + (_nr))
+#define S3C2410_GPF(_nr)	(S3C2410_GPIO_F_START + (_nr))
+#define S3C2410_GPG(_nr)	(S3C2410_GPIO_G_START + (_nr))
+#define S3C2410_GPH(_nr)	(S3C2410_GPIO_H_START + (_nr))
+
+#endif /* __MACH_GPIONRS_H */
+
-- 
cgit v0.10.2


From 070276d5d049f385763dee19112bea08f56c9a0d Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 17 May 2009 22:32:23 +0100
Subject: [ARM] S3C24XX: GPIO: Change to macros for GPIO numbering

Prepare to remove the large number of S3C2410_GPxn defines
by moving to S3C2410_GPx(n) in arch/arm.

The following perl was used to change the files:

    perl -pi~ -e 's/S3C2410_GP([A-Z])([0-9]+)([^_^0-9])/S3C2410_GP\1\(\2\)\3/g'

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
index ea7ccfc..948c871 100644
--- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt
+++ b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
@@ -51,7 +51,7 @@ PIN Numbers
 -----------
 
   Each pin has an unique number associated with it in regs-gpio.h,
-  eg S3C2410_GPA0 or S3C2410_GPF1. These defines are used to tell
+  eg S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
   the GPIO functions which pin is to be used.
 
 
@@ -65,11 +65,11 @@ Configuring a pin
 
   Eg:
 
-     s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0);
-     s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1);
+     s3c2410_gpio_cfgpin(S3C2410_GPA(0), S3C2410_GPA0_ADDR0);
+     s3c2410_gpio_cfgpin(S3C2410_GPE(8), S3C2410_GPE8_SDDAT1);
 
-   which would turn GPA0 into the lowest Address line A0, and set
-   GPE8 to be connected to the SDIO/MMC controller's SDDAT1 line.
+   which would turn GPA(0) into the lowest Address line A0, and set
+   GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
 
 
 Reading the current configuration
diff --git a/arch/arm/mach-s3c2400/gpio.c b/arch/arm/mach-s3c2400/gpio.c
index 7a7ed417..81ebe2f 100644
--- a/arch/arm/mach-s3c2400/gpio.c
+++ b/arch/arm/mach-s3c2400/gpio.c
@@ -33,10 +33,10 @@
 
 int s3c2400_gpio_getirq(unsigned int pin)
 {
-	if (pin < S3C2410_GPE0 || pin > S3C2400_GPE7_EINT7)
+	if (pin < S3C2410_GPE(0) || pin > S3C2400_GPE7_EINT7)
 		return -1;  /* not valid interrupts */
 
-	return (pin - S3C2410_GPE0) + IRQ_EINT0;
+	return (pin - S3C2410_GPE(0)) + IRQ_EINT0;
 }
 
 EXPORT_SYMBOL(s3c2400_gpio_getirq);
diff --git a/arch/arm/mach-s3c2410/gpio.c b/arch/arm/mach-s3c2410/gpio.c
index 36a3132..bf7fbfe 100644
--- a/arch/arm/mach-s3c2410/gpio.c
+++ b/arch/arm/mach-s3c2410/gpio.c
@@ -39,12 +39,12 @@ int s3c2410_gpio_irqfilter(unsigned int pin, unsigned int on,
 	unsigned long flags;
 	unsigned long val;
 
-	if (pin < S3C2410_GPG8 || pin > S3C2410_GPG15)
+	if (pin < S3C2410_GPG(8) || pin > S3C2410_GPG(15))
 		return -1;
 
 	config &= 0xff;
 
-	pin -= S3C2410_GPG8;
+	pin -= S3C2410_GPG(8);
 	reg += pin & ~3;
 
 	local_irq_save(flags);
diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 9bbea8a..5aabf11 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -43,9 +43,9 @@ static void h1940bt_enable(int on)
 		h1940_latch_control(0, H1940_LATCH_BLUETOOTH_POWER);
 		/* Reset the chip */
 		mdelay(10);
-		s3c2410_gpio_setpin(S3C2410_GPH1, 1);
+		s3c2410_gpio_setpin(S3C2410_GPH(1), 1);
 		mdelay(10);
-		s3c2410_gpio_setpin(S3C2410_GPH1, 0);
+		s3c2410_gpio_setpin(S3C2410_GPH(1), 0);
 
 		state = 1;
 	}
@@ -54,9 +54,9 @@ static void h1940bt_enable(int on)
 		led_trigger_event(bt_led_trigger, 0);
 #endif
 
-		s3c2410_gpio_setpin(S3C2410_GPH1, 1);
+		s3c2410_gpio_setpin(S3C2410_GPH(1), 1);
 		mdelay(10);
-		s3c2410_gpio_setpin(S3C2410_GPH1, 0);
+		s3c2410_gpio_setpin(S3C2410_GPH(1), 0);
 		mdelay(10);
 		h1940_latch_control(H1940_LATCH_BLUETOOTH_POWER, 0);
 
@@ -89,14 +89,14 @@ static DEVICE_ATTR(enable, 0644,
 static int __init h1940bt_probe(struct platform_device *pdev)
 {
 	/* Configures BT serial port GPIOs */
-	s3c2410_gpio_cfgpin(S3C2410_GPH0, S3C2410_GPH0_nCTS0);
-	s3c2410_gpio_pullup(S3C2410_GPH0, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPH1, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_pullup(S3C2410_GPH1, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPH2, S3C2410_GPH2_TXD0);
-	s3c2410_gpio_pullup(S3C2410_GPH2, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPH3, S3C2410_GPH3_RXD0);
-	s3c2410_gpio_pullup(S3C2410_GPH3, 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0);
+	s3c2410_gpio_pullup(S3C2410_GPH(0), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_pullup(S3C2410_GPH(1), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0);
+	s3c2410_gpio_pullup(S3C2410_GPH(2), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0);
+	s3c2410_gpio_pullup(S3C2410_GPH(3), 1);
 
 #ifdef CONFIG_LEDS_H1940
 	led_trigger_register_simple("h1940-bluetooth", &bt_led_trigger);
diff --git a/arch/arm/mach-s3c2410/include/mach/gpio-core.h b/arch/arm/mach-s3c2410/include/mach/gpio-core.h
index 6c9fbb9..8fe1920 100644
--- a/arch/arm/mach-s3c2410/include/mach/gpio-core.h
+++ b/arch/arm/mach-s3c2410/include/mach/gpio-core.h
@@ -24,7 +24,7 @@ static inline struct s3c_gpio_chip *s3c_gpiolib_getchip(unsigned int pin)
 {
 	struct s3c_gpio_chip *chip;
 
-	if (pin > S3C2410_GPG10)
+	if (pin > S3C2410_GPG(10))
 		return NULL;
 
 	chip = &s3c24xx_gpios[pin/32];
diff --git a/arch/arm/mach-s3c2410/include/mach/gpio-fns.h b/arch/arm/mach-s3c2410/include/mach/gpio-fns.h
index b4f3955..801dff1 100644
--- a/arch/arm/mach-s3c2410/include/mach/gpio-fns.h
+++ b/arch/arm/mach-s3c2410/include/mach/gpio-fns.h
@@ -30,8 +30,8 @@
  * set the configuration of the given pin to the value passed.
  *
  * eg:
- *    s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0);
- *    s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1);
+ *    s3c2410_gpio_cfgpin(S3C2410_GPA(0), S3C2410_GPA0_ADDR0);
+ *    s3c2410_gpio_cfgpin(S3C2410_GPE(8), S3C2410_GPE8_SDDAT1);
 */
 
 extern void s3c2410_gpio_cfgpin(unsigned int pin, unsigned int function);
@@ -80,8 +80,8 @@ extern int s3c2410_gpio_irqfilter(unsigned int pin, unsigned int on,
  *
  * eg;
  *
- *   s3c2410_gpio_pullup(S3C2410_GPB0, 0);
- *   s3c2410_gpio_pullup(S3C2410_GPE8, 0);
+ *   s3c2410_gpio_pullup(S3C2410_GPB(0), 0);
+ *   s3c2410_gpio_pullup(S3C2410_GPE(8), 0);
 */
 
 extern void s3c2410_gpio_pullup(unsigned int pin, unsigned int to);
diff --git a/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h b/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h
index 5213e45..2edbb9c 100644
--- a/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h
+++ b/arch/arm/mach-s3c2410/include/mach/gpio-nrs.h
@@ -68,5 +68,26 @@ enum s3c_gpio_number {
 #define S3C2410_GPG(_nr)	(S3C2410_GPIO_G_START + (_nr))
 #define S3C2410_GPH(_nr)	(S3C2410_GPIO_H_START + (_nr))
 
+/* compatibility until drivers can be modified */
+
+#define S3C2410_GPA0	S3C2410_GPA(0)
+#define S3C2410_GPA1	S3C2410_GPA(1)
+#define S3C2410_GPA3	S3C2410_GPA(3)
+#define S3C2410_GPA7	S3C2410_GPA(7)
+
+#define S3C2410_GPE0	S3C2410_GPE(0)
+#define S3C2410_GPE1	S3C2410_GPE(1)
+#define S3C2410_GPE2	S3C2410_GPE(2)
+#define S3C2410_GPE3	S3C2410_GPE(3)
+#define S3C2410_GPE4	S3C2410_GPE(4)
+#define S3C2410_GPE5	S3C2410_GPE(5)
+#define S3C2410_GPE6	S3C2410_GPE(6)
+#define S3C2410_GPE7	S3C2410_GPE(7)
+#define S3C2410_GPE8	S3C2410_GPE(8)
+#define S3C2410_GPE9	S3C2410_GPE(9)
+#define S3C2410_GPE10	S3C2410_GPE(10)
+
+#define S3C2410_GPH10	S3C2410_GPH(10)
+
 #endif /* __MACH_GPIONRS_H */
 
diff --git a/arch/arm/mach-s3c2410/include/mach/regs-gpio.h b/arch/arm/mach-s3c2410/include/mach/regs-gpio.h
index 90cefeb..b278d0c 100644
--- a/arch/arm/mach-s3c2410/include/mach/regs-gpio.h
+++ b/arch/arm/mach-s3c2410/include/mach/regs-gpio.h
@@ -69,81 +69,58 @@
 #define S3C2400_GPACON	   S3C2410_GPIOREG(0x00)
 #define S3C2400_GPADAT	   S3C2410_GPIOREG(0x04)
 
-#define S3C2410_GPA0         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 0)
 #define S3C2410_GPA0_ADDR0   (1<<0)
 
-#define S3C2410_GPA1         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 1)
 #define S3C2410_GPA1_ADDR16  (1<<1)
 
-#define S3C2410_GPA2         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 2)
 #define S3C2410_GPA2_ADDR17  (1<<2)
 
-#define S3C2410_GPA3         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 3)
 #define S3C2410_GPA3_ADDR18  (1<<3)
 
-#define S3C2410_GPA4         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 4)
 #define S3C2410_GPA4_ADDR19  (1<<4)
 
-#define S3C2410_GPA5         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 5)
 #define S3C2410_GPA5_ADDR20  (1<<5)
 
-#define S3C2410_GPA6         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 6)
 #define S3C2410_GPA6_ADDR21  (1<<6)
 
-#define S3C2410_GPA7         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 7)
 #define S3C2410_GPA7_ADDR22  (1<<7)
 
-#define S3C2410_GPA8         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 8)
 #define S3C2410_GPA8_ADDR23  (1<<8)
 
-#define S3C2410_GPA9         S3C2410_GPIONO(S3C2410_GPIO_BANKA, 9)
 #define S3C2410_GPA9_ADDR24  (1<<9)
 
-#define S3C2410_GPA10        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 10)
 #define S3C2410_GPA10_ADDR25 (1<<10)
 #define S3C2400_GPA10_SCKE   (1<<10)
 
-#define S3C2410_GPA11        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 11)
 #define S3C2410_GPA11_ADDR26 (1<<11)
 #define S3C2400_GPA11_nCAS0  (1<<11)
 
-#define S3C2410_GPA12        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 12)
 #define S3C2410_GPA12_nGCS1  (1<<12)
 #define S3C2400_GPA12_nCAS1  (1<<12)
 
-#define S3C2410_GPA13        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 13)
 #define S3C2410_GPA13_nGCS2  (1<<13)
 #define S3C2400_GPA13_nGCS1  (1<<13)
 
-#define S3C2410_GPA14        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 14)
 #define S3C2410_GPA14_nGCS3  (1<<14)
 #define S3C2400_GPA14_nGCS2  (1<<14)
 
-#define S3C2410_GPA15        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 15)
 #define S3C2410_GPA15_nGCS4  (1<<15)
 #define S3C2400_GPA15_nGCS3  (1<<15)
 
-#define S3C2410_GPA16        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 16)
 #define S3C2410_GPA16_nGCS5  (1<<16)
 #define S3C2400_GPA16_nGCS4  (1<<16)
 
-#define S3C2410_GPA17        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 17)
 #define S3C2410_GPA17_CLE    (1<<17)
 #define S3C2400_GPA17_nGCS5  (1<<17)
 
-#define S3C2410_GPA18        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 18)
 #define S3C2410_GPA18_ALE    (1<<18)
 
-#define S3C2410_GPA19        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 19)
 #define S3C2410_GPA19_nFWE   (1<<19)
 
-#define S3C2410_GPA20        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 20)
 #define S3C2410_GPA20_nFRE   (1<<20)
 
-#define S3C2410_GPA21        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 21)
 #define S3C2410_GPA21_nRSTOUT (1<<21)
 
-#define S3C2410_GPA22        S3C2410_GPIONO(S3C2410_GPIO_BANKA, 22)
 #define S3C2410_GPA22_nFCE   (1<<22)
 
 /* 0x08 and 0x0c are reserved on S3C2410 */
@@ -171,85 +148,69 @@
 
 /* no i/o pin in port b can have value 3 (unless it is a s3c2443) ! */
 
-#define S3C2410_GPB0         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 0)
 #define S3C2410_GPB0_TOUT0   (0x02 << 0)
 #define S3C2400_GPB0_DATA16  (0x02 << 0)
 
-#define S3C2410_GPB1         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 1)
 #define S3C2410_GPB1_TOUT1   (0x02 << 2)
 #define S3C2400_GPB1_DATA17  (0x02 << 2)
 
-#define S3C2410_GPB2         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 2)
 #define S3C2410_GPB2_TOUT2   (0x02 << 4)
 #define S3C2400_GPB2_DATA18  (0x02 << 4)
 #define S3C2400_GPB2_TCLK1   (0x03 << 4)
 
-#define S3C2410_GPB3         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 3)
 #define S3C2410_GPB3_TOUT3   (0x02 << 6)
 #define S3C2400_GPB3_DATA19  (0x02 << 6)
 #define S3C2400_GPB3_TXD1    (0x03 << 6)
 
-#define S3C2410_GPB4         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 4)
 #define S3C2410_GPB4_TCLK0   (0x02 << 8)
 #define S3C2400_GPB4_DATA20  (0x02 << 8)
 #define S3C2410_GPB4_MASK    (0x03 << 8)
 #define S3C2400_GPB4_RXD1    (0x03 << 8)
 #define S3C2400_GPB4_MASK    (0x03 << 8)
 
-#define S3C2410_GPB5         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 5)
 #define S3C2410_GPB5_nXBACK  (0x02 << 10)
 #define S3C2443_GPB5_XBACK   (0x03 << 10)
 #define S3C2400_GPB5_DATA21  (0x02 << 10)
 #define S3C2400_GPB5_nCTS1   (0x03 << 10)
 
-#define S3C2410_GPB6         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 6)
 #define S3C2410_GPB6_nXBREQ  (0x02 << 12)
 #define S3C2443_GPB6_XBREQ   (0x03 << 12)
 #define S3C2400_GPB6_DATA22  (0x02 << 12)
 #define S3C2400_GPB6_nRTS1   (0x03 << 12)
 
-#define S3C2410_GPB7         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 7)
 #define S3C2410_GPB7_nXDACK1 (0x02 << 14)
 #define S3C2443_GPB7_XDACK1  (0x03 << 14)
 #define S3C2400_GPB7_DATA23  (0x02 << 14)
 
-#define S3C2410_GPB8         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 8)
 #define S3C2410_GPB8_nXDREQ1 (0x02 << 16)
 #define S3C2400_GPB8_DATA24  (0x02 << 16)
 
-#define S3C2410_GPB9         S3C2410_GPIONO(S3C2410_GPIO_BANKB, 9)
 #define S3C2410_GPB9_nXDACK0 (0x02 << 18)
 #define S3C2443_GPB9_XDACK0  (0x03 << 18)
 #define S3C2400_GPB9_DATA25  (0x02 << 18)
 #define S3C2400_GPB9_I2SSDI  (0x03 << 18)
 
-#define S3C2410_GPB10        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 10)
 #define S3C2410_GPB10_nXDRE0 (0x02 << 20)
 #define S3C2443_GPB10_XDREQ0 (0x03 << 20)
 #define S3C2400_GPB10_DATA26 (0x02 << 20)
 #define S3C2400_GPB10_nSS    (0x03 << 20)
 
-#define S3C2400_GPB11        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 11)
 #define S3C2400_GPB11_INP    (0x00 << 22)
 #define S3C2400_GPB11_OUTP   (0x01 << 22)
 #define S3C2400_GPB11_DATA27 (0x02 << 22)
 
-#define S3C2400_GPB12        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 12)
 #define S3C2400_GPB12_INP    (0x00 << 24)
 #define S3C2400_GPB12_OUTP   (0x01 << 24)
 #define S3C2400_GPB12_DATA28 (0x02 << 24)
 
-#define S3C2400_GPB13        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 13)
 #define S3C2400_GPB13_INP    (0x00 << 26)
 #define S3C2400_GPB13_OUTP   (0x01 << 26)
 #define S3C2400_GPB13_DATA29 (0x02 << 26)
 
-#define S3C2400_GPB14        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 14)
 #define S3C2400_GPB14_INP    (0x00 << 28)
 #define S3C2400_GPB14_OUTP   (0x01 << 28)
 #define S3C2400_GPB14_DATA30 (0x02 << 28)
 
-#define S3C2400_GPB15        S3C2410_GPIONO(S3C2410_GPIO_BANKB, 15)
 #define S3C2400_GPB15_INP    (0x00 << 30)
 #define S3C2400_GPB15_OUTP   (0x01 << 30)
 #define S3C2400_GPB15_DATA31 (0x02 << 30)
@@ -270,67 +231,51 @@
 #define S3C2400_GPCDAT	   S3C2410_GPIOREG(0x18)
 #define S3C2400_GPCUP	   S3C2410_GPIOREG(0x1C)
 
-#define S3C2410_GPC0            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 0)
 #define S3C2410_GPC0_LEND	(0x02 << 0)
 #define S3C2400_GPC0_VD0 	(0x02 << 0)
 
-#define S3C2410_GPC1            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 1)
 #define S3C2410_GPC1_VCLK	(0x02 << 2)
 #define S3C2400_GPC1_VD1 	(0x02 << 2)
 
-#define S3C2410_GPC2            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 2)
 #define S3C2410_GPC2_VLINE	(0x02 << 4)
 #define S3C2400_GPC2_VD2  	(0x02 << 4)
 
-#define S3C2410_GPC3            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 3)
 #define S3C2410_GPC3_VFRAME	(0x02 << 6)
 #define S3C2400_GPC3_VD3   	(0x02 << 6)
 
-#define S3C2410_GPC4            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 4)
 #define S3C2410_GPC4_VM		(0x02 << 8)
 #define S3C2400_GPC4_VD4	(0x02 << 8)
 
-#define S3C2410_GPC5            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 5)
 #define S3C2410_GPC5_LCDVF0	(0x02 << 10)
 #define S3C2400_GPC5_VD5   	(0x02 << 10)
 
-#define S3C2410_GPC6            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 6)
 #define S3C2410_GPC6_LCDVF1	(0x02 << 12)
 #define S3C2400_GPC6_VD6   	(0x02 << 12)
 
-#define S3C2410_GPC7            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 7)
 #define S3C2410_GPC7_LCDVF2	(0x02 << 14)
 #define S3C2400_GPC7_VD7   	(0x02 << 14)
 
-#define S3C2410_GPC8            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 8)
 #define S3C2410_GPC8_VD0	(0x02 << 16)
 #define S3C2400_GPC8_VD8	(0x02 << 16)
 
-#define S3C2410_GPC9            S3C2410_GPIONO(S3C2410_GPIO_BANKC, 9)
 #define S3C2410_GPC9_VD1	(0x02 << 18)
 #define S3C2400_GPC9_VD9	(0x02 << 18)
 
-#define S3C2410_GPC10           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 10)
 #define S3C2410_GPC10_VD2	(0x02 << 20)
 #define S3C2400_GPC10_VD10	(0x02 << 20)
 
-#define S3C2410_GPC11           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 11)
 #define S3C2410_GPC11_VD3	(0x02 << 22)
 #define S3C2400_GPC11_VD11	(0x02 << 22)
 
-#define S3C2410_GPC12           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 12)
 #define S3C2410_GPC12_VD4	(0x02 << 24)
 #define S3C2400_GPC12_VD12	(0x02 << 24)
 
-#define S3C2410_GPC13           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 13)
 #define S3C2410_GPC13_VD5	(0x02 << 26)
 #define S3C2400_GPC13_VD13	(0x02 << 26)
 
-#define S3C2410_GPC14           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 14)
 #define S3C2410_GPC14_VD6	(0x02 << 28)
 #define S3C2400_GPC14_VD14	(0x02 << 28)
 
-#define S3C2410_GPC15           S3C2410_GPIONO(S3C2410_GPIO_BANKC, 15)
 #define S3C2410_GPC15_VD7	(0x02 << 30)
 #define S3C2400_GPC15_VD15	(0x02 << 30)
 
@@ -355,67 +300,51 @@
 #define S3C2400_GPDDAT	   S3C2410_GPIOREG(0x24)
 #define S3C2400_GPDUP	   S3C2410_GPIOREG(0x28)
 
-#define S3C2410_GPD0            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 0)
 #define S3C2410_GPD0_VD8	(0x02 << 0)
 #define S3C2400_GPD0_VFRAME	(0x02 << 0)
 #define S3C2442_GPD0_nSPICS1	(0x03 << 0)
 
-#define S3C2410_GPD1            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 1)
 #define S3C2410_GPD1_VD9	(0x02 << 2)
 #define S3C2400_GPD1_VM		(0x02 << 2)
 #define S3C2442_GPD1_SPICLK1	(0x03 << 2)
 
-#define S3C2410_GPD2            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 2)
 #define S3C2410_GPD2_VD10	(0x02 << 4)
 #define S3C2400_GPD2_VLINE	(0x02 << 4)
 
-#define S3C2410_GPD3            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 3)
 #define S3C2410_GPD3_VD11	(0x02 << 6)
 #define S3C2400_GPD3_VCLK	(0x02 << 6)
 
-#define S3C2410_GPD4            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 4)
 #define S3C2410_GPD4_VD12	(0x02 << 8)
 #define S3C2400_GPD4_LEND	(0x02 << 8)
 
-#define S3C2410_GPD5            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 5)
 #define S3C2410_GPD5_VD13	(0x02 << 10)
 #define S3C2400_GPD5_TOUT0	(0x02 << 10)
 
-#define S3C2410_GPD6            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 6)
 #define S3C2410_GPD6_VD14	(0x02 << 12)
 #define S3C2400_GPD6_TOUT1	(0x02 << 12)
 
-#define S3C2410_GPD7            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 7)
 #define S3C2410_GPD7_VD15	(0x02 << 14)
 #define S3C2400_GPD7_TOUT2	(0x02 << 14)
 
-#define S3C2410_GPD8            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 8)
 #define S3C2410_GPD8_VD16	(0x02 << 16)
 #define S3C2400_GPD8_TOUT3	(0x02 << 16)
 
-#define S3C2410_GPD9            S3C2410_GPIONO(S3C2410_GPIO_BANKD, 9)
 #define S3C2410_GPD9_VD17	(0x02 << 18)
 #define S3C2400_GPD9_TCLK0	(0x02 << 18)
 #define S3C2410_GPD9_MASK       (0x03 << 18)
 
-#define S3C2410_GPD10           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 10)
 #define S3C2410_GPD10_VD18	(0x02 << 20)
 #define S3C2400_GPD10_nWAIT	(0x02 << 20)
 
-#define S3C2410_GPD11           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 11)
 #define S3C2410_GPD11_VD19	(0x02 << 22)
 
-#define S3C2410_GPD12           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 12)
 #define S3C2410_GPD12_VD20	(0x02 << 24)
 
-#define S3C2410_GPD13           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 13)
 #define S3C2410_GPD13_VD21	(0x02 << 26)
 
-#define S3C2410_GPD14           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 14)
 #define S3C2410_GPD14_VD22	(0x02 << 28)
 #define S3C2410_GPD14_nSS1	(0x03 << 28)
 
-#define S3C2410_GPD15           S3C2410_GPIONO(S3C2410_GPIO_BANKD, 15)
 #define S3C2410_GPD15_VD23	(0x02 << 30)
 #define S3C2410_GPD15_nSS0	(0x03 << 30)
 
@@ -441,26 +370,22 @@
 #define S3C2400_GPEDAT	   S3C2410_GPIOREG(0x30)
 #define S3C2400_GPEUP	   S3C2410_GPIOREG(0x34)
 
-#define S3C2410_GPE0           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 0)
 #define S3C2410_GPE0_I2SLRCK   (0x02 << 0)
 #define S3C2443_GPE0_AC_nRESET (0x03 << 0)
 #define S3C2400_GPE0_EINT0     (0x02 << 0)
 #define S3C2410_GPE0_MASK      (0x03 << 0)
 
-#define S3C2410_GPE1           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 1)
 #define S3C2410_GPE1_I2SSCLK   (0x02 << 2)
 #define S3C2443_GPE1_AC_SYNC   (0x03 << 2)
 #define S3C2400_GPE1_EINT1     (0x02 << 2)
 #define S3C2400_GPE1_nSS       (0x03 << 2)
 #define S3C2410_GPE1_MASK      (0x03 << 2)
 
-#define S3C2410_GPE2           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 2)
 #define S3C2410_GPE2_CDCLK     (0x02 << 4)
 #define S3C2443_GPE2_AC_BITCLK (0x03 << 4)
 #define S3C2400_GPE2_EINT2     (0x02 << 4)
 #define S3C2400_GPE2_I2SSDI    (0x03 << 4)
 
-#define S3C2410_GPE3           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 3)
 #define S3C2410_GPE3_I2SSDI    (0x02 << 6)
 #define S3C2443_GPE3_AC_SDI    (0x03 << 6)
 #define S3C2400_GPE3_EINT3     (0x02 << 6)
@@ -468,7 +393,6 @@
 #define S3C2410_GPE3_nSS0      (0x03 << 6)
 #define S3C2410_GPE3_MASK      (0x03 << 6)
 
-#define S3C2410_GPE4           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 4)
 #define S3C2410_GPE4_I2SSDO    (0x02 << 8)
 #define S3C2443_GPE4_AC_SDO    (0x03 << 8)
 #define S3C2400_GPE4_EINT4     (0x02 << 8)
@@ -476,59 +400,48 @@
 #define S3C2410_GPE4_I2SSDI    (0x03 << 8)
 #define S3C2410_GPE4_MASK      (0x03 << 8)
 
-#define S3C2410_GPE5           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 5)
 #define S3C2410_GPE5_SDCLK     (0x02 << 10)
 #define S3C2443_GPE5_SD1_CLK   (0x02 << 10)
 #define S3C2400_GPE5_EINT5     (0x02 << 10)
 #define S3C2400_GPE5_TCLK1     (0x03 << 10)
 
-#define S3C2410_GPE6           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 6)
 #define S3C2410_GPE6_SDCMD     (0x02 << 12)
 #define S3C2443_GPE6_SD1_CMD   (0x02 << 12)
 #define S3C2443_GPE6_AC_BITCLK (0x03 << 12)
 #define S3C2400_GPE6_EINT6     (0x02 << 12)
 
-#define S3C2410_GPE7           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 7)
 #define S3C2410_GPE7_SDDAT0    (0x02 << 14)
 #define S3C2443_GPE5_SD1_DAT0  (0x02 << 14)
 #define S3C2443_GPE7_AC_SDI    (0x03 << 14)
 #define S3C2400_GPE7_EINT7     (0x02 << 14)
 
-#define S3C2410_GPE8           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 8)
 #define S3C2410_GPE8_SDDAT1    (0x02 << 16)
 #define S3C2443_GPE8_SD1_DAT1  (0x02 << 16)
 #define S3C2443_GPE8_AC_SDO    (0x03 << 16)
 #define S3C2400_GPE8_nXDACK0   (0x02 << 16)
 
-#define S3C2410_GPE9           S3C2410_GPIONO(S3C2410_GPIO_BANKE, 9)
 #define S3C2410_GPE9_SDDAT2    (0x02 << 18)
 #define S3C2443_GPE9_SD1_DAT2  (0x02 << 18)
 #define S3C2443_GPE9_AC_SYNC   (0x03 << 18)
 #define S3C2400_GPE9_nXDACK1   (0x02 << 18)
 #define S3C2400_GPE9_nXBACK    (0x03 << 18)
 
-#define S3C2410_GPE10          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 10)
 #define S3C2410_GPE10_SDDAT3   (0x02 << 20)
 #define S3C2443_GPE10_SD1_DAT3 (0x02 << 20)
 #define S3C2443_GPE10_AC_nRESET (0x03 << 20)
 #define S3C2400_GPE10_nXDREQ0  (0x02 << 20)
 
-#define S3C2410_GPE11          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 11)
 #define S3C2410_GPE11_SPIMISO0 (0x02 << 22)
 #define S3C2400_GPE11_nXDREQ1  (0x02 << 22)
 #define S3C2400_GPE11_nXBREQ   (0x03 << 22)
 
-#define S3C2410_GPE12          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 12)
 #define S3C2410_GPE12_SPIMOSI0 (0x02 << 24)
 
-#define S3C2410_GPE13          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 13)
 #define S3C2410_GPE13_SPICLK0  (0x02 << 26)
 
-#define S3C2410_GPE14          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 14)
 #define S3C2410_GPE14_IICSCL   (0x02 << 28)
 #define S3C2410_GPE14_MASK     (0x03 << 28)
 
-#define S3C2410_GPE15          S3C2410_GPIONO(S3C2410_GPIO_BANKE, 15)
 #define S3C2410_GPE15_IICSDA   (0x02 << 30)
 #define S3C2410_GPE15_MASK     (0x03 << 30)
 
@@ -564,39 +477,31 @@
 #define S3C2400_GPFDAT	   S3C2410_GPIOREG(0x3C)
 #define S3C2400_GPFUP	   S3C2410_GPIOREG(0x40)
 
-#define S3C2410_GPF0        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 0)
 #define S3C2410_GPF0_EINT0  (0x02 << 0)
 #define S3C2400_GPF0_RXD0   (0x02 << 0)
 
-#define S3C2410_GPF1        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 1)
 #define S3C2410_GPF1_EINT1  (0x02 << 2)
 #define S3C2400_GPF1_RXD1   (0x02 << 2)
 #define S3C2400_GPF1_IICSDA (0x03 << 2)
 
-#define S3C2410_GPF2        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 2)
 #define S3C2410_GPF2_EINT2  (0x02 << 4)
 #define S3C2400_GPF2_TXD0   (0x02 << 4)
 
-#define S3C2410_GPF3        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 3)
 #define S3C2410_GPF3_EINT3  (0x02 << 6)
 #define S3C2400_GPF3_TXD1   (0x02 << 6)
 #define S3C2400_GPF3_IICSCL (0x03 << 6)
 
-#define S3C2410_GPF4        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 4)
 #define S3C2410_GPF4_EINT4  (0x02 << 8)
 #define S3C2400_GPF4_nRTS0  (0x02 << 8)
 #define S3C2400_GPF4_nXBACK (0x03 << 8)
 
-#define S3C2410_GPF5        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 5)
 #define S3C2410_GPF5_EINT5  (0x02 << 10)
 #define S3C2400_GPF5_nCTS0  (0x02 << 10)
 #define S3C2400_GPF5_nXBREQ (0x03 << 10)
 
-#define S3C2410_GPF6        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 6)
 #define S3C2410_GPF6_EINT6  (0x02 << 12)
 #define S3C2400_GPF6_CLKOUT (0x02 << 12)
 
-#define S3C2410_GPF7        S3C2410_GPIONO(S3C2410_GPIO_BANKF, 7)
 #define S3C2410_GPF7_EINT7  (0x02 << 14)
 
 #define S3C2410_GPF_PUPDIS(x)  (1<<(x))
@@ -621,85 +526,69 @@
 #define S3C2400_GPGDAT	   S3C2410_GPIOREG(0x48)
 #define S3C2400_GPGUP	   S3C2410_GPIOREG(0x4C)
 
-#define S3C2410_GPG0          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 0)
 #define S3C2410_GPG0_EINT8    (0x02 << 0)
 #define S3C2400_GPG0_I2SLRCK  (0x02 << 0)
 
-#define S3C2410_GPG1          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 1)
 #define S3C2410_GPG1_EINT9    (0x02 << 2)
 #define S3C2400_GPG1_I2SSCLK  (0x02 << 2)
 
-#define S3C2410_GPG2          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 2)
 #define S3C2410_GPG2_EINT10   (0x02 << 4)
 #define S3C2410_GPG2_nSS0     (0x03 << 4)
 #define S3C2400_GPG2_CDCLK    (0x02 << 4)
 
-#define S3C2410_GPG3          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 3)
 #define S3C2410_GPG3_EINT11   (0x02 << 6)
 #define S3C2410_GPG3_nSS1     (0x03 << 6)
 #define S3C2400_GPG3_I2SSDO   (0x02 << 6)
 #define S3C2400_GPG3_I2SSDI   (0x03 << 6)
 
-#define S3C2410_GPG4          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 4)
 #define S3C2410_GPG4_EINT12   (0x02 << 8)
 #define S3C2400_GPG4_MMCCLK   (0x02 << 8)
 #define S3C2400_GPG4_I2SSDI   (0x03 << 8)
 #define S3C2410_GPG4_LCDPWREN (0x03 << 8)
 #define S3C2443_GPG4_LCDPWRDN (0x03 << 8)
 
-#define S3C2410_GPG5          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 5)
 #define S3C2410_GPG5_EINT13   (0x02 << 10)
 #define S3C2400_GPG5_MMCCMD   (0x02 << 10)
 #define S3C2400_GPG5_IICSDA   (0x03 << 10)
 #define S3C2410_GPG5_SPIMISO1 (0x03 << 10)	/* not s3c2443 */
 
-#define S3C2410_GPG6          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 6)
 #define S3C2410_GPG6_EINT14   (0x02 << 12)
 #define S3C2400_GPG6_MMCDAT   (0x02 << 12)
 #define S3C2400_GPG6_IICSCL   (0x03 << 12)
 #define S3C2410_GPG6_SPIMOSI1 (0x03 << 12)
 
-#define S3C2410_GPG7          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 7)
 #define S3C2410_GPG7_EINT15   (0x02 << 14)
 #define S3C2410_GPG7_SPICLK1  (0x03 << 14)
 #define S3C2400_GPG7_SPIMISO  (0x02 << 14)
 #define S3C2400_GPG7_IICSDA   (0x03 << 14)
 
-#define S3C2410_GPG8          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 8)
 #define S3C2410_GPG8_EINT16   (0x02 << 16)
 #define S3C2400_GPG8_SPIMOSI  (0x02 << 16)
 #define S3C2400_GPG8_IICSCL   (0x03 << 16)
 
-#define S3C2410_GPG9          S3C2410_GPIONO(S3C2410_GPIO_BANKG, 9)
 #define S3C2410_GPG9_EINT17   (0x02 << 18)
 #define S3C2400_GPG9_SPICLK   (0x02 << 18)
 #define S3C2400_GPG9_MMCCLK   (0x03 << 18)
 
-#define S3C2410_GPG10         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 10)
 #define S3C2410_GPG10_EINT18  (0x02 << 20)
 
-#define S3C2410_GPG11         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 11)
 #define S3C2410_GPG11_EINT19  (0x02 << 22)
 #define S3C2410_GPG11_TCLK1   (0x03 << 22)
 #define S3C2443_GPG11_CF_nIREQ (0x03 << 22)
 
-#define S3C2410_GPG12         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 12)
 #define S3C2410_GPG12_EINT20  (0x02 << 24)
 #define S3C2410_GPG12_XMON    (0x03 << 24)
 #define S3C2442_GPG12_nSPICS0 (0x03 << 24)
 #define S3C2443_GPG12_nINPACK (0x03 << 24)
 
-#define S3C2410_GPG13         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 13)
 #define S3C2410_GPG13_EINT21  (0x02 << 26)
 #define S3C2410_GPG13_nXPON   (0x03 << 26)
 #define S3C2443_GPG13_CF_nREG (0x03 << 26)
 
-#define S3C2410_GPG14         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 14)
 #define S3C2410_GPG14_EINT22  (0x02 << 28)
 #define S3C2410_GPG14_YMON    (0x03 << 28)
 #define S3C2443_GPG14_CF_RESET (0x03 << 28)
 
-#define S3C2410_GPG15         S3C2410_GPIONO(S3C2410_GPIO_BANKG, 15)
 #define S3C2410_GPG15_EINT23  (0x02 << 30)
 #define S3C2410_GPG15_nYPON   (0x03 << 30)
 #define S3C2443_GPG15_CF_PWR  (0x03 << 30)
@@ -718,40 +607,29 @@
 #define S3C2410_GPHDAT	   S3C2410_GPIOREG(0x74)
 #define S3C2410_GPHUP	   S3C2410_GPIOREG(0x78)
 
-#define S3C2410_GPH0        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 0)
 #define S3C2410_GPH0_nCTS0  (0x02 << 0)
 
-#define S3C2410_GPH1        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 1)
 #define S3C2410_GPH1_nRTS0  (0x02 << 2)
 
-#define S3C2410_GPH2        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 2)
 #define S3C2410_GPH2_TXD0   (0x02 << 4)
 
-#define S3C2410_GPH3        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 3)
 #define S3C2410_GPH3_RXD0   (0x02 << 6)
 
-#define S3C2410_GPH4        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 4)
 #define S3C2410_GPH4_TXD1   (0x02 << 8)
 
-#define S3C2410_GPH5        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 5)
 #define S3C2410_GPH5_RXD1   (0x02 << 10)
 
-#define S3C2410_GPH6        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 6)
 #define S3C2410_GPH6_TXD2   (0x02 << 12)
 #define S3C2410_GPH6_nRTS1  (0x03 << 12)
 
-#define S3C2410_GPH7        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 7)
 #define S3C2410_GPH7_RXD2   (0x02 << 14)
 #define S3C2410_GPH7_nCTS1  (0x03 << 14)
 
-#define S3C2410_GPH8        S3C2410_GPIONO(S3C2410_GPIO_BANKH, 8)
 #define S3C2410_GPH8_UCLK   (0x02 << 16)
 
-#define S3C2410_GPH9          S3C2410_GPIONO(S3C2410_GPIO_BANKH, 9)
 #define S3C2410_GPH9_CLKOUT0  (0x02 << 18)
 #define S3C2442_GPH9_nSPICS0  (0x03 << 18)
 
-#define S3C2410_GPH10         S3C2410_GPIONO(S3C2410_GPIO_BANKH, 10)
 #define S3C2410_GPH10_CLKOUT1 (0x02 << 20)
 
 /* The S3C2412 and S3C2413 move the GPJ register set to after
diff --git a/arch/arm/mach-s3c2410/mach-amlm5900.c b/arch/arm/mach-s3c2410/mach-amlm5900.c
index 43f7536..06a84ad 100644
--- a/arch/arm/mach-s3c2410/mach-amlm5900.c
+++ b/arch/arm/mach-s3c2410/mach-amlm5900.c
@@ -225,8 +225,8 @@ static void amlm5900_init_pm(void)
 	} else {
 		enable_irq_wake(IRQ_EINT9);
 		/* configure the suspend/resume status pin */
-		s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPIO_OUTPUT);
-		s3c2410_gpio_pullup(S3C2410_GPF2, 0);
+		s3c2410_gpio_cfgpin(S3C2410_GPF(2), S3C2410_GPIO_OUTPUT);
+		s3c2410_gpio_pullup(S3C2410_GPF(2), 0);
 	}
 }
 static void __init amlm5900_init(void)
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 3410cae..4964637 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -213,15 +213,15 @@ static struct s3c2410_uartcfg bast_uartcfgs[] __initdata = {
 static int bast_pm_suspend(struct sys_device *sd, pm_message_t state)
 {
 	/* ensure that an nRESET is not generated on resume. */
-	s3c2410_gpio_setpin(S3C2410_GPA21, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPA(21), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPIO_OUTPUT);
 
 	return 0;
 }
 
 static int bast_pm_resume(struct sys_device *sd)
 {
-	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPA21_nRSTOUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
 	return 0;
 }
 
diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 7a7c4da..d9cd5dd 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -127,7 +127,7 @@ static void h1940_udc_pullup(enum s3c2410_udc_cmd_e cmd)
 
 static struct s3c2410_udc_mach_info h1940_udc_cfg __initdata = {
 	.udc_command		= h1940_udc_pullup,
-	.vbus_pin		= S3C2410_GPG5,
+	.vbus_pin		= S3C2410_GPG(5),
 	.vbus_pin_inverted	= 1,
 };
 
diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c
index 8370503..0f6ed61 100644
--- a/arch/arm/mach-s3c2410/mach-n30.c
+++ b/arch/arm/mach-s3c2410/mach-n30.c
@@ -86,10 +86,10 @@ static void n30_udc_pullup(enum s3c2410_udc_cmd_e cmd)
 {
 	switch (cmd) {
 	case S3C2410_UDC_P_ENABLE :
-		s3c2410_gpio_setpin(S3C2410_GPB3, 1);
+		s3c2410_gpio_setpin(S3C2410_GPB(3), 1);
 		break;
 	case S3C2410_UDC_P_DISABLE :
-		s3c2410_gpio_setpin(S3C2410_GPB3, 0);
+		s3c2410_gpio_setpin(S3C2410_GPB(3), 0);
 		break;
 	case S3C2410_UDC_P_RESET :
 		break;
@@ -100,55 +100,55 @@ static void n30_udc_pullup(enum s3c2410_udc_cmd_e cmd)
 
 static struct s3c2410_udc_mach_info n30_udc_cfg __initdata = {
 	.udc_command		= n30_udc_pullup,
-	.vbus_pin		= S3C2410_GPG1,
+	.vbus_pin		= S3C2410_GPG(1),
 	.vbus_pin_inverted	= 0,
 };
 
 static struct gpio_keys_button n30_buttons[] = {
 	{
-		.gpio		= S3C2410_GPF0,
+		.gpio		= S3C2410_GPF(0),
 		.code		= KEY_POWER,
 		.desc		= "Power",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG9,
+		.gpio		= S3C2410_GPG(9),
 		.code		= KEY_UP,
 		.desc		= "Thumbwheel Up",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG8,
+		.gpio		= S3C2410_GPG(8),
 		.code		= KEY_DOWN,
 		.desc		= "Thumbwheel Down",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG7,
+		.gpio		= S3C2410_GPG(7),
 		.code		= KEY_ENTER,
 		.desc		= "Thumbwheel Press",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF7,
+		.gpio		= S3C2410_GPF(7),
 		.code		= KEY_HOMEPAGE,
 		.desc		= "Home",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF6,
+		.gpio		= S3C2410_GPF(6),
 		.code		= KEY_CALENDAR,
 		.desc		= "Calendar",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF5,
+		.gpio		= S3C2410_GPF(5),
 		.code		= KEY_ADDRESSBOOK,
 		.desc		= "Contacts",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF4,
+		.gpio		= S3C2410_GPF(4),
 		.code		= KEY_MAIL,
 		.desc		= "Mail",
 		.active_low	= 0,
@@ -170,73 +170,73 @@ static struct platform_device n30_button_device = {
 
 static struct gpio_keys_button n35_buttons[] = {
 	{
-		.gpio		= S3C2410_GPF0,
+		.gpio		= S3C2410_GPF(0),
 		.code		= KEY_POWER,
 		.desc		= "Power",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG9,
+		.gpio		= S3C2410_GPG(9),
 		.code		= KEY_UP,
 		.desc		= "Joystick Up",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG8,
+		.gpio		= S3C2410_GPG(8),
 		.code		= KEY_DOWN,
 		.desc		= "Joystick Down",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG6,
+		.gpio		= S3C2410_GPG(6),
 		.code		= KEY_DOWN,
 		.desc		= "Joystick Left",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG5,
+		.gpio		= S3C2410_GPG(5),
 		.code		= KEY_DOWN,
 		.desc		= "Joystick Right",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG7,
+		.gpio		= S3C2410_GPG(7),
 		.code		= KEY_ENTER,
 		.desc		= "Joystick Press",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF7,
+		.gpio		= S3C2410_GPF(7),
 		.code		= KEY_HOMEPAGE,
 		.desc		= "Home",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF6,
+		.gpio		= S3C2410_GPF(6),
 		.code		= KEY_CALENDAR,
 		.desc		= "Calendar",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF5,
+		.gpio		= S3C2410_GPF(5),
 		.code		= KEY_ADDRESSBOOK,
 		.desc		= "Contacts",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF4,
+		.gpio		= S3C2410_GPF(4),
 		.code		= KEY_MAIL,
 		.desc		= "Mail",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPF3,
+		.gpio		= S3C2410_GPF(3),
 		.code		= SW_RADIO,
 		.desc		= "GPS Antenna",
 		.active_low	= 0,
 	},
 	{
-		.gpio		= S3C2410_GPG2,
+		.gpio		= S3C2410_GPG(2),
 		.code		= SW_HEADPHONE_INSERT,
 		.desc		= "Headphone",
 		.active_low	= 0,
@@ -260,7 +260,7 @@ static struct platform_device n35_button_device = {
 /* This is the bluetooth LED on the device. */
 static struct s3c24xx_led_platdata n30_blue_led_pdata = {
 	.name		= "blue_led",
-	.gpio		= S3C2410_GPG6,
+	.gpio		= S3C2410_GPG(6),
 	.def_trigger	= "",
 };
 
@@ -271,7 +271,7 @@ static struct s3c24xx_led_platdata n30_blue_led_pdata = {
 static struct s3c24xx_led_platdata n30_warning_led_pdata = {
 	.name		= "warning_led",
 	.flags          = S3C24XX_LEDF_ACTLOW,
-	.gpio		= S3C2410_GPD9,
+	.gpio		= S3C2410_GPD(9),
 	.def_trigger	= "",
 };
 
diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c
index 7520aee..2cc9849 100644
--- a/arch/arm/mach-s3c2410/mach-qt2410.c
+++ b/arch/arm/mach-s3c2410/mach-qt2410.c
@@ -199,7 +199,7 @@ static struct platform_device qt2410_cs89x0 = {
 /* LED */
 
 static struct s3c24xx_led_platdata qt2410_pdata_led = {
-	.gpio		= S3C2410_GPB0,
+	.gpio		= S3C2410_GPB(0),
 	.flags		= S3C24XX_LEDF_ACTLOW | S3C24XX_LEDF_TRISTATE,
 	.name		= "led",
 	.def_trigger	= "timer",
@@ -219,18 +219,18 @@ static void spi_gpio_cs(struct s3c2410_spigpio_info *spi, int cs)
 {
 	switch (cs) {
 	case BITBANG_CS_ACTIVE:
-		s3c2410_gpio_setpin(S3C2410_GPB5, 0);
+		s3c2410_gpio_setpin(S3C2410_GPB(5), 0);
 		break;
 	case BITBANG_CS_INACTIVE:
-		s3c2410_gpio_setpin(S3C2410_GPB5, 1);
+		s3c2410_gpio_setpin(S3C2410_GPB(5), 1);
 		break;
 	}
 }
 
 static struct s3c2410_spigpio_info spi_gpio_cfg = {
-	.pin_clk	= S3C2410_GPG7,
-	.pin_mosi	= S3C2410_GPG6,
-	.pin_miso	= S3C2410_GPG5,
+	.pin_clk	= S3C2410_GPG(7),
+	.pin_mosi	= S3C2410_GPG(6),
+	.pin_miso	= S3C2410_GPG(5),
 	.chip_select	= &spi_gpio_cs,
 };
 
@@ -347,13 +347,13 @@ static void __init qt2410_machine_init(void)
 	}
 	s3c24xx_fb_set_platdata(&qt2410_fb_info);
 
-	s3c2410_gpio_cfgpin(S3C2410_GPB0, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_setpin(S3C2410_GPB0, 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPB(0), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPB(0), 1);
 
 	s3c24xx_udc_set_platdata(&qt2410_udc_cfg);
 	s3c_i2c0_set_platdata(NULL);
 
-	s3c2410_gpio_cfgpin(S3C2410_GPB5, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPB(5), S3C2410_GPIO_OUTPUT);
 
 	platform_add_devices(qt2410_devices, ARRAY_SIZE(qt2410_devices));
 	s3c_pm_init();
diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c
index 0507a7e..1628cc7 100644
--- a/arch/arm/mach-s3c2410/mach-vr1000.c
+++ b/arch/arm/mach-s3c2410/mach-vr1000.c
@@ -278,19 +278,19 @@ static struct platform_device vr1000_dm9k1 = {
 
 static struct s3c24xx_led_platdata vr1000_led1_pdata = {
 	.name		= "led1",
-	.gpio		= S3C2410_GPB0,
+	.gpio		= S3C2410_GPB(0),
 	.def_trigger	= "",
 };
 
 static struct s3c24xx_led_platdata vr1000_led2_pdata = {
 	.name		= "led2",
-	.gpio		= S3C2410_GPB1,
+	.gpio		= S3C2410_GPB(1),
 	.def_trigger	= "",
 };
 
 static struct s3c24xx_led_platdata vr1000_led3_pdata = {
 	.name		= "led3",
-	.gpio		= S3C2410_GPB2,
+	.gpio		= S3C2410_GPB(2),
 	.def_trigger	= "",
 };
 
@@ -356,8 +356,8 @@ static struct clk *vr1000_clocks[] __initdata = {
 
 static void vr1000_power_off(void)
 {
-	s3c2410_gpio_cfgpin(S3C2410_GPB9, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_setpin(S3C2410_GPB9, 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPB(9), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPB(9), 1);
 }
 
 static void __init vr1000_map_io(void)
diff --git a/arch/arm/mach-s3c2410/pm.c b/arch/arm/mach-s3c2410/pm.c
index 726cfc0..143e08a 100644
--- a/arch/arm/mach-s3c2410/pm.c
+++ b/arch/arm/mach-s3c2410/pm.c
@@ -77,7 +77,7 @@ static void s3c2410_pm_prepare(void)
 	}
 
 	if ( machine_is_aml_m5900() )
-		s3c2410_gpio_setpin(S3C2410_GPF2, 1);
+		s3c2410_gpio_setpin(S3C2410_GPF(2), 1);
 
 }
 
@@ -92,7 +92,7 @@ static int s3c2410_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp, S3C2410_GSTATUS2);
 
 	if ( machine_is_aml_m5900() )
-		s3c2410_gpio_setpin(S3C2410_GPF2, 0);
+		s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
 
 	return 0;
 }
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index 506252b..dd45eb4 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -54,9 +54,9 @@ usb_simtec_powercontrol(int port, int to)
 	power_state[port] = to;
 
 	if (power_state[0] && power_state[1])
-		s3c2410_gpio_setpin(S3C2410_GPB4, 0);
+		s3c2410_gpio_setpin(S3C2410_GPB(4), 0);
 	else
-		s3c2410_gpio_setpin(S3C2410_GPB4, 1);
+		s3c2410_gpio_setpin(S3C2410_GPB(4), 1);
 }
 
 static irqreturn_t
@@ -64,7 +64,7 @@ usb_simtec_ocirq(int irq, void *pw)
 {
 	struct s3c2410_hcd_info *info = pw;
 
-	if (s3c2410_gpio_getpin(S3C2410_GPG10) == 0) {
+	if (s3c2410_gpio_getpin(S3C2410_GPG(10)) == 0) {
 		pr_debug("usb_simtec: over-current irq (oc detected)\n");
 		s3c2410_usb_report_oc(info, 3);
 	} else {
@@ -110,7 +110,7 @@ int usb_simtec_init(void)
 	printk("USB Power Control, (c) 2004 Simtec Electronics\n");
 	s3c_device_usb.dev.platform_data = &usb_simtec_info;
 
-	s3c2410_gpio_cfgpin(S3C2410_GPB4, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_setpin(S3C2410_GPB4, 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPB(4), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPB(4), 1);
 	return 0;
 }
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index 56d12b5..8df506e 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -357,8 +357,8 @@ static void jive_lcm_reset(unsigned int set)
 {
 	printk(KERN_DEBUG "%s(%d)\n", __func__, set);
 
-	s3c2410_gpio_setpin(S3C2410_GPG13, set);
-	s3c2410_gpio_cfgpin(S3C2410_GPG13, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPG(13), set);
+	s3c2410_gpio_cfgpin(S3C2410_GPG(13), S3C2410_GPIO_OUTPUT);
 }
 
 #undef LCD_UPPER_MARGIN
@@ -391,13 +391,13 @@ static struct ili9320_platdata jive_lcm_config = {
 
 static void jive_lcd_spi_chipselect(struct s3c2410_spigpio_info *spi, int cs)
 {
-	s3c2410_gpio_setpin(S3C2410_GPB7, cs ? 0 : 1);
+	s3c2410_gpio_setpin(S3C2410_GPB(7), cs ? 0 : 1);
 }
 
 static struct s3c2410_spigpio_info jive_lcd_spi = {
 	.bus_num	= 1,
-	.pin_clk	= S3C2410_GPG8,
-	.pin_mosi	= S3C2410_GPB8,
+	.pin_clk	= S3C2410_GPG(8),
+	.pin_mosi	= S3C2410_GPB(8),
 	.num_chipselect	= 1,
 	.chip_select	= jive_lcd_spi_chipselect,
 };
@@ -413,13 +413,13 @@ static struct platform_device jive_device_lcdspi = {
 
 static void jive_wm8750_chipselect(struct s3c2410_spigpio_info *spi, int cs)
 {
-	s3c2410_gpio_setpin(S3C2410_GPH10, cs ? 0 : 1);
+	s3c2410_gpio_setpin(S3C2410_GPH(10), cs ? 0 : 1);
 }
 
 static struct s3c2410_spigpio_info jive_wm8750_spi = {
 	.bus_num	= 2,
-	.pin_clk	= S3C2410_GPB4,
-	.pin_mosi	= S3C2410_GPB9,
+	.pin_clk	= S3C2410_GPB(4),
+	.pin_mosi	= S3C2410_GPB(9),
 	.num_chipselect	= 1,
 	.chip_select	= jive_wm8750_chipselect,
 };
@@ -480,7 +480,7 @@ static struct platform_device *jive_devices[] __initdata = {
 };
 
 static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = {
-	.vbus_pin	= S3C2410_GPG1,		/* detect is on GPG1 */
+	.vbus_pin	= S3C2410_GPG(1),		/* detect is on GPG1 */
 };
 
 /* Jive power management device */
@@ -530,8 +530,8 @@ static void jive_power_off(void)
 {
 	printk(KERN_INFO "powering system down...\n");
 
-	s3c2410_gpio_setpin(S3C2410_GPC5, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPC5, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPC(5), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPC(5), S3C2410_GPIO_OUTPUT);
 }
 
 static void __init jive_machine_init(void)
@@ -635,22 +635,22 @@ static void __init jive_machine_init(void)
 
 	/* initialise the spi */
 
-	s3c2410_gpio_setpin(S3C2410_GPG13, 0);
-	s3c2410_gpio_cfgpin(S3C2410_GPG13, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPG(13), 0);
+	s3c2410_gpio_cfgpin(S3C2410_GPG(13), S3C2410_GPIO_OUTPUT);
 
-	s3c2410_gpio_setpin(S3C2410_GPB7, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPB7, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPB(7), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPB(7), S3C2410_GPIO_OUTPUT);
 
-	s3c2410_gpio_setpin(S3C2410_GPB6, 0);
-	s3c2410_gpio_cfgpin(S3C2410_GPB6, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPB(6), 0);
+	s3c2410_gpio_cfgpin(S3C2410_GPB(6), S3C2410_GPIO_OUTPUT);
 
-	s3c2410_gpio_setpin(S3C2410_GPG8, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPG8, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPG(8), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPG(8), S3C2410_GPIO_OUTPUT);
 
 	/* initialise the WM8750 spi */
 
-	s3c2410_gpio_setpin(S3C2410_GPH10, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPH10, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPH(10), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPH(10), S3C2410_GPIO_OUTPUT);
 
 	/* Turn off suspend on both USB ports, and switch the
 	 * selectable USB port to USB device mode. */
diff --git a/arch/arm/mach-s3c2412/mach-smdk2413.c b/arch/arm/mach-s3c2412/mach-smdk2413.c
index 753aaed..9a5e434 100644
--- a/arch/arm/mach-s3c2412/mach-smdk2413.c
+++ b/arch/arm/mach-s3c2412/mach-smdk2413.c
@@ -85,10 +85,10 @@ static void smdk2413_udc_pullup(enum s3c2410_udc_cmd_e cmd)
 	switch (cmd)
 	{
 		case S3C2410_UDC_P_ENABLE :
-			s3c2410_gpio_setpin(S3C2410_GPF2, 1);
+			s3c2410_gpio_setpin(S3C2410_GPF(2), 1);
 			break;
 		case S3C2410_UDC_P_DISABLE :
-			s3c2410_gpio_setpin(S3C2410_GPF2, 0);
+			s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
 			break;
 		case S3C2410_UDC_P_RESET :
 			break;
@@ -135,8 +135,8 @@ static void __init smdk2413_machine_init(void)
 {	/* Turn off suspend on both USB ports, and switch the
 	 * selectable USB port to USB device mode. */
 
-	s3c2410_gpio_setpin(S3C2410_GPF2, 0);
-	s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(2), S3C2410_GPIO_OUTPUT);
 
 	s3c2410_modify_misccr(S3C2410_MISCCR_USBHOST |
 			      S3C2410_MISCCR_USBSUSPND0 |
diff --git a/arch/arm/mach-s3c2440/mach-anubis.c b/arch/arm/mach-s3c2440/mach-anubis.c
index d363b6f..68f3870 100644
--- a/arch/arm/mach-s3c2440/mach-anubis.c
+++ b/arch/arm/mach-s3c2440/mach-anubis.c
@@ -469,7 +469,7 @@ static void __init anubis_map_io(void)
 		anubis_nand_sets[0].nr_partitions = ARRAY_SIZE(anubis_default_nand_part_large);
 	} else {
 		/* ensure that the GPIO is setup */
-		s3c2410_gpio_setpin(S3C2410_GPA0, 1);
+		s3c2410_gpio_setpin(S3C2410_GPA(0), 1);
 	}
 }
 
diff --git a/arch/arm/mach-s3c2440/mach-at2440evb.c b/arch/arm/mach-s3c2440/mach-at2440evb.c
index 315c42e..dfc7010 100644
--- a/arch/arm/mach-s3c2440/mach-at2440evb.c
+++ b/arch/arm/mach-s3c2440/mach-at2440evb.c
@@ -166,7 +166,7 @@ static struct platform_device at2440evb_device_eth = {
 };
 
 static struct s3c24xx_mci_pdata at2440evb_mci_pdata = {
-	.gpio_detect	= S3C2410_GPG10,
+	.gpio_detect	= S3C2410_GPG(10),
 };
 
 /* 7" LCD panel */
diff --git a/arch/arm/mach-s3c2440/mach-nexcoder.c b/arch/arm/mach-s3c2440/mach-nexcoder.c
index 4b2088c..d43eded 100644
--- a/arch/arm/mach-s3c2440/mach-nexcoder.c
+++ b/arch/arm/mach-s3c2440/mach-nexcoder.c
@@ -121,16 +121,16 @@ static struct platform_device *nexcoder_devices[] __initdata = {
 static void __init nexcoder_sensorboard_init(void)
 {
 	// Initialize SCCB bus
-	s3c2410_gpio_setpin(S3C2410_GPE14, 1); // IICSCL
-	s3c2410_gpio_cfgpin(S3C2410_GPE14, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_setpin(S3C2410_GPE15, 1); // IICSDA
-	s3c2410_gpio_cfgpin(S3C2410_GPE15, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPE(14), 1); // IICSCL
+	s3c2410_gpio_cfgpin(S3C2410_GPE(14), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPE(15), 1); // IICSDA
+	s3c2410_gpio_cfgpin(S3C2410_GPE(15), S3C2410_GPIO_OUTPUT);
 
 	// Power up the sensor board
-	s3c2410_gpio_setpin(S3C2410_GPF1, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPF1, S3C2410_GPIO_OUTPUT); // CAM_GPIO7 => nLDO_PWRDN
-	s3c2410_gpio_setpin(S3C2410_GPF2, 0);
-	s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPIO_OUTPUT); // CAM_GPIO6 => CAM_PWRDN
+	s3c2410_gpio_setpin(S3C2410_GPF(1), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(1), S3C2410_GPIO_OUTPUT); // CAM_GPIO7 => nLDO_PWRDN
+	s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(2), S3C2410_GPIO_OUTPUT); // CAM_GPIO6 => CAM_PWRDN
 }
 
 static void __init nexcoder_map_io(void)
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index ad0fdc4..cba064b 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -292,8 +292,8 @@ static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
 	__raw_writeb(tmp, OSIRIS_VA_CTRL0);
 
 	/* ensure that an nRESET is not generated on resume. */
-	s3c2410_gpio_setpin(S3C2410_GPA21, 1);
-	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_setpin(S3C2410_GPA(21), 1);
+	s3c2410_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPIO_OUTPUT);
 
 	return 0;
 }
@@ -305,7 +305,7 @@ static int osiris_pm_resume(struct sys_device *sd)
 
 	__raw_writeb(pm_osiris_ctrl0, OSIRIS_VA_CTRL0);
 
-	s3c2410_gpio_cfgpin(S3C2410_GPA21, S3C2410_GPA21_nRSTOUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
 
 	return 0;
 }
@@ -385,7 +385,7 @@ static void __init osiris_map_io(void)
 		osiris_nand_sets[0].nr_partitions = ARRAY_SIZE(osiris_default_nand_part_large);
 	} else {
 		/* write-protect line to the NAND */
-		s3c2410_gpio_setpin(S3C2410_GPA0, 1);
+		s3c2410_gpio_setpin(S3C2410_GPA(0), 1);
 	}
 
 	/* fix bus configuration (nBE settings wrong on ABLE pre v2.20) */
diff --git a/arch/arm/plat-s3c24xx/common-smdk.c b/arch/arm/plat-s3c24xx/common-smdk.c
index fe658c4..aa11986 100644
--- a/arch/arm/plat-s3c24xx/common-smdk.c
+++ b/arch/arm/plat-s3c24xx/common-smdk.c
@@ -48,27 +48,27 @@
 /* LED devices */
 
 static struct s3c24xx_led_platdata smdk_pdata_led4 = {
-	.gpio		= S3C2410_GPF4,
+	.gpio		= S3C2410_GPF(4),
 	.flags		= S3C24XX_LEDF_ACTLOW | S3C24XX_LEDF_TRISTATE,
 	.name		= "led4",
 	.def_trigger	= "timer",
 };
 
 static struct s3c24xx_led_platdata smdk_pdata_led5 = {
-	.gpio		= S3C2410_GPF5,
+	.gpio		= S3C2410_GPF(5),
 	.flags		= S3C24XX_LEDF_ACTLOW | S3C24XX_LEDF_TRISTATE,
 	.name		= "led5",
 	.def_trigger	= "nand-disk",
 };
 
 static struct s3c24xx_led_platdata smdk_pdata_led6 = {
-	.gpio		= S3C2410_GPF6,
+	.gpio		= S3C2410_GPF(6),
 	.flags		= S3C24XX_LEDF_ACTLOW | S3C24XX_LEDF_TRISTATE,
 	.name		= "led6",
 };
 
 static struct s3c24xx_led_platdata smdk_pdata_led7 = {
-	.gpio		= S3C2410_GPF7,
+	.gpio		= S3C2410_GPF(7),
 	.flags		= S3C24XX_LEDF_ACTLOW | S3C24XX_LEDF_TRISTATE,
 	.name		= "led7",
 };
@@ -185,15 +185,15 @@ void __init smdk_machine_init(void)
 {
 	/* Configure the LEDs (even if we have no LED support)*/
 
-	s3c2410_gpio_cfgpin(S3C2410_GPF4, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_cfgpin(S3C2410_GPF5, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_cfgpin(S3C2410_GPF6, S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_cfgpin(S3C2410_GPF7, S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(4), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(5), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(6), S3C2410_GPIO_OUTPUT);
+	s3c2410_gpio_cfgpin(S3C2410_GPF(7), S3C2410_GPIO_OUTPUT);
 
-	s3c2410_gpio_setpin(S3C2410_GPF4, 1);
-	s3c2410_gpio_setpin(S3C2410_GPF5, 1);
-	s3c2410_gpio_setpin(S3C2410_GPF6, 1);
-	s3c2410_gpio_setpin(S3C2410_GPF7, 1);
+	s3c2410_gpio_setpin(S3C2410_GPF(4), 1);
+	s3c2410_gpio_setpin(S3C2410_GPF(5), 1);
+	s3c2410_gpio_setpin(S3C2410_GPF(6), 1);
+	s3c2410_gpio_setpin(S3C2410_GPF(7), 1);
 
 	if (machine_is_smdk2443())
 		smdk_nand_info.twrph0 = 50;
diff --git a/arch/arm/plat-s3c24xx/gpio.c b/arch/arm/plat-s3c24xx/gpio.c
index 65ebbaf..ae0b9d6 100644
--- a/arch/arm/plat-s3c24xx/gpio.c
+++ b/arch/arm/plat-s3c24xx/gpio.c
@@ -183,19 +183,19 @@ EXPORT_SYMBOL(s3c2410_modify_misccr);
 
 int s3c2410_gpio_getirq(unsigned int pin)
 {
-	if (pin < S3C2410_GPF0 || pin > S3C2410_GPG15)
+	if (pin < S3C2410_GPF(0) || pin > S3C2410_GPG(15))
 		return -1;	/* not valid interrupts */
 
-	if (pin < S3C2410_GPG0 && pin > S3C2410_GPF7)
+	if (pin < S3C2410_GPG(0) && pin > S3C2410_GPF(7))
 		return -1;	/* not valid pin */
 
-	if (pin < S3C2410_GPF4)
-		return (pin - S3C2410_GPF0) + IRQ_EINT0;
+	if (pin < S3C2410_GPF(4))
+		return (pin - S3C2410_GPF(0)) + IRQ_EINT0;
 
-	if (pin < S3C2410_GPG0)
-		return (pin - S3C2410_GPF4) + IRQ_EINT4;
+	if (pin < S3C2410_GPG(0))
+		return (pin - S3C2410_GPF(4)) + IRQ_EINT4;
 
-	return (pin - S3C2410_GPG0) + IRQ_EINT8;
+	return (pin - S3C2410_GPG(0)) + IRQ_EINT8;
 }
 
 EXPORT_SYMBOL(s3c2410_gpio_getirq);
diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index 60a9f72..a82adc3 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -82,7 +82,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPACON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_1bit),
 		.chip	= {
-			.base			= S3C2410_GPA0,
+			.base			= S3C2410_GPA(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOA",
 			.ngpio			= 24,
@@ -94,7 +94,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPBCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
-			.base			= S3C2410_GPB0,
+			.base			= S3C2410_GPB(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOB",
 			.ngpio			= 16,
@@ -104,7 +104,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPCCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
-			.base			= S3C2410_GPC0,
+			.base			= S3C2410_GPC(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOC",
 			.ngpio			= 16,
@@ -114,7 +114,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPDCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
-			.base			= S3C2410_GPD0,
+			.base			= S3C2410_GPD(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOD",
 			.ngpio			= 16,
@@ -124,7 +124,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPECON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
-			.base			= S3C2410_GPE0,
+			.base			= S3C2410_GPE(0),
 			.label			= "GPIOE",
 			.owner			= THIS_MODULE,
 			.ngpio			= 16,
@@ -134,7 +134,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPFCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
-			.base			= S3C2410_GPF0,
+			.base			= S3C2410_GPF(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOF",
 			.ngpio			= 8,
@@ -145,7 +145,7 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 		.base	= S3C2410_GPGCON,
 		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
 		.chip	= {
-			.base			= S3C2410_GPG0,
+			.base			= S3C2410_GPG(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOG",
 			.ngpio			= 10,
diff --git a/arch/arm/plat-s3c24xx/pm.c b/arch/arm/plat-s3c24xx/pm.c
index 16aaf36..56e5253 100644
--- a/arch/arm/plat-s3c24xx/pm.c
+++ b/arch/arm/plat-s3c24xx/pm.c
@@ -124,12 +124,12 @@ void s3c_pm_configure_extint(void)
 	 * and then configure it as an input if it is not
 	*/
 
-	for (pin = S3C2410_GPF0; pin <= S3C2410_GPF7; pin++) {
-		s3c_pm_check_resume_pin(pin, pin - S3C2410_GPF0);
+	for (pin = S3C2410_GPF(0); pin <= S3C2410_GPF(7); pin++) {
+		s3c_pm_check_resume_pin(pin, pin - S3C2410_GPF(0));
 	}
 
-	for (pin = S3C2410_GPG0; pin <= S3C2410_GPG7; pin++) {
-		s3c_pm_check_resume_pin(pin, (pin - S3C2410_GPG0)+8);
+	for (pin = S3C2410_GPG(0); pin <= S3C2410_GPG(7); pin++) {
+		s3c_pm_check_resume_pin(pin, (pin - S3C2410_GPG(0))+8);
 	}
 }
 
diff --git a/arch/arm/plat-s3c24xx/setup-i2c.c b/arch/arm/plat-s3c24xx/setup-i2c.c
index e0d1853..71a6acc 100644
--- a/arch/arm/plat-s3c24xx/setup-i2c.c
+++ b/arch/arm/plat-s3c24xx/setup-i2c.c
@@ -21,6 +21,6 @@ struct platform_device;
 
 void s3c_i2c0_cfg_gpio(struct platform_device *dev)
 {
-	s3c2410_gpio_cfgpin(S3C2410_GPE15, S3C2410_GPE15_IICSDA);
-	s3c2410_gpio_cfgpin(S3C2410_GPE14, S3C2410_GPE14_IICSCL);
+	s3c2410_gpio_cfgpin(S3C2410_GPE(15), S3C2410_GPE15_IICSDA);
+	s3c2410_gpio_cfgpin(S3C2410_GPE(14), S3C2410_GPE14_IICSCL);
 }
diff --git a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
index 8b403cb..9edf789 100644
--- a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
+++ b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
@@ -22,16 +22,16 @@ void s3c24xx_spi_gpiocfg_bus0_gpe11_12_13(struct s3c2410_spi_info *spi,
 					  int enable)
 {
 	if (enable) {
-		s3c2410_gpio_cfgpin(S3C2410_GPE13, S3C2410_GPE13_SPICLK0);
-		s3c2410_gpio_cfgpin(S3C2410_GPE12, S3C2410_GPE12_SPIMOSI0);
-		s3c2410_gpio_cfgpin(S3C2410_GPE11, S3C2410_GPE11_SPIMISO0);
-		s3c2410_gpio_pullup(S3C2410_GPE11, 0);
-		s3c2410_gpio_pullup(S3C2410_GPE13, 0);
+		s3c2410_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPE13_SPICLK0);
+		s3c2410_gpio_cfgpin(S3C2410_GPE(12), S3C2410_GPE12_SPIMOSI0);
+		s3c2410_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPE11_SPIMISO0);
+		s3c2410_gpio_pullup(S3C2410_GPE(11), 0);
+		s3c2410_gpio_pullup(S3C2410_GPE(13), 0);
 	} else {
-		s3c2410_gpio_cfgpin(S3C2410_GPE13, S3C2410_GPIO_INPUT);
-		s3c2410_gpio_cfgpin(S3C2410_GPE11, S3C2410_GPIO_INPUT);
-		s3c2410_gpio_pullup(S3C2410_GPE11, 1);
-		s3c2410_gpio_pullup(S3C2410_GPE12, 1);
-		s3c2410_gpio_pullup(S3C2410_GPE13, 1);
+		s3c2410_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT);
+		s3c2410_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT);
+		s3c2410_gpio_pullup(S3C2410_GPE(11), 1);
+		s3c2410_gpio_pullup(S3C2410_GPE(12), 1);
+		s3c2410_gpio_pullup(S3C2410_GPE(13), 1);
 	}
 }
diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
index 8fccd4e..f34d0fc 100644
--- a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
+++ b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
@@ -22,16 +22,16 @@ void s3c24xx_spi_gpiocfg_bus1_gpg5_6_7(struct s3c2410_spi_info *spi,
 				       int enable)
 {
 	if (enable) {
-		s3c2410_gpio_cfgpin(S3C2410_GPG7, S3C2410_GPG7_SPICLK1);
-		s3c2410_gpio_cfgpin(S3C2410_GPG6, S3C2410_GPG6_SPIMOSI1);
-		s3c2410_gpio_cfgpin(S3C2410_GPG5, S3C2410_GPG5_SPIMISO1);
-		s3c2410_gpio_pullup(S3C2410_GPG5, 0);
-		s3c2410_gpio_pullup(S3C2410_GPG6, 0);
+		s3c2410_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPG7_SPICLK1);
+		s3c2410_gpio_cfgpin(S3C2410_GPG(6), S3C2410_GPG6_SPIMOSI1);
+		s3c2410_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPG5_SPIMISO1);
+		s3c2410_gpio_pullup(S3C2410_GPG(5), 0);
+		s3c2410_gpio_pullup(S3C2410_GPG(6), 0);
 	} else {
-		s3c2410_gpio_cfgpin(S3C2410_GPG7, S3C2410_GPIO_INPUT);
-		s3c2410_gpio_cfgpin(S3C2410_GPG5, S3C2410_GPIO_INPUT);
-		s3c2410_gpio_pullup(S3C2410_GPG5, 1);
-		s3c2410_gpio_pullup(S3C2410_GPG6, 1);
-		s3c2410_gpio_pullup(S3C2410_GPG7, 1);
+		s3c2410_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT);
+		s3c2410_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT);
+		s3c2410_gpio_pullup(S3C2410_GPG(5), 1);
+		s3c2410_gpio_pullup(S3C2410_GPG(6), 1);
+		s3c2410_gpio_pullup(S3C2410_GPG(7), 1);
 	}
 }
-- 
cgit v0.10.2


From fa7a7883fe5c647decee57a9c1b86a96408c5b26 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 10 Mar 2009 23:57:26 +0000
Subject: [ARM] S3C64XX: DMA support

Add support for the DMA blocks in the S3C64XX series of CPUS,
which are based on the ARM PL080 PrimeCell system.

Unfortunately, these DMA controllers diverge from the PL080
design by adding another DMA controller register and
configuration for OneNAND.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/include/asm/hardware/pl080.h b/arch/arm/include/asm/hardware/pl080.h
new file mode 100644
index 0000000..6a6c66b
--- /dev/null
+++ b/arch/arm/include/asm/hardware/pl080.h
@@ -0,0 +1,138 @@
+/* arch/arm/include/asm/hardware/pl080.h
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *      http://armlinux.simtec.co.uk/
+ *      Ben Dooks <ben@simtec.co.uk>
+ *
+ * ARM PrimeCell PL080 DMA controller
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/* Note, there are some Samsung updates to this controller block which
+ * make it not entierly compatible with the PL080 specification from
+ * ARM. When in doubt, check the Samsung documentation first.
+ *
+ * The Samsung defines are PL080S, and add an extra controll register,
+ * the ability to move more than 2^11 counts of data and some extra
+ * OneNAND features.
+*/
+
+#define PL080_INT_STATUS			(0x00)
+#define PL080_TC_STATUS				(0x04)
+#define PL080_TC_CLEAR				(0x08)
+#define PL080_ERR_STATUS			(0x0C)
+#define PL080_ERR_CLEAR				(0x10)
+#define PL080_RAW_TC_STATUS			(0x14)
+#define PL080_RAW_ERR_STATUS			(0x18)
+#define PL080_EN_CHAN				(0x1c)
+#define PL080_SOFT_BREQ				(0x20)
+#define PL080_SOFT_SREQ				(0x24)
+#define PL080_SOFT_LBREQ			(0x28)
+#define PL080_SOFT_LSREQ			(0x2C)
+
+#define PL080_CONFIG				(0x30)
+#define PL080_CONFIG_M2_BE			(1 << 2)
+#define PL080_CONFIG_M1_BE			(1 << 1)
+#define PL080_CONFIG_ENABLE			(1 << 0)
+
+#define PL080_SYNC				(0x34)
+
+/* Per channel configuration registers */
+
+#define PL008_Cx_STRIDE				(0x20)
+#define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
+#define PL080_Cx_SRC_ADDR(x)			((0x100 + (x * 0x20)))
+#define PL080_Cx_DST_ADDR(x)			((0x104 + (x * 0x20)))
+#define PL080_Cx_LLI(x)				((0x108 + (x * 0x20)))
+#define PL080_Cx_CONTROL(x)			((0x10C + (x * 0x20)))
+#define PL080_Cx_CONFIG(x)			((0x110 + (x * 0x20)))
+#define PL080S_Cx_CONTROL2(x)			((0x110 + (x * 0x20)))
+#define PL080S_Cx_CONFIG(x)			((0x114 + (x * 0x20)))
+
+#define PL080_CH_SRC_ADDR			(0x00)
+#define PL080_CH_DST_ADDR			(0x04)
+#define PL080_CH_LLI				(0x08)
+#define PL080_CH_CONTROL			(0x0C)
+#define PL080_CH_CONFIG				(0x10)
+#define PL080S_CH_CONTROL2			(0x10)
+#define PL080S_CH_CONFIG			(0x14)
+
+#define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
+#define PL080_LLI_ADDR_SHIFT			(2)
+#define PL080_LLI_LM_AHB2			(1 << 0)
+
+#define PL080_CONTROL_TC_IRQ_EN			(1 << 31)
+#define PL080_CONTROL_PROT_MASK			(0x7 << 28)
+#define PL080_CONTROL_PROT_SHIFT		(28)
+#define PL080_CONTROL_PROT_SYS			(1 << 28)
+#define PL080_CONTROL_DST_INCR			(1 << 27)
+#define PL080_CONTROL_SRC_INCR			(1 << 26)
+#define PL080_CONTROL_DST_AHB2			(1 << 25)
+#define PL080_CONTROL_SRC_AHB2			(1 << 24)
+#define PL080_CONTROL_DWIDTH_MASK		(0x7 << 21)
+#define PL080_CONTROL_DWIDTH_SHIFT		(21)
+#define PL080_CONTROL_SWIDTH_MASK		(0x7 << 18)
+#define PL080_CONTROL_SWIDTH_SHIFT		(18)
+#define PL080_CONTROL_DB_SIZE_MASK		(0x7 << 15)
+#define PL080_CONTROL_DB_SIZE_SHIFT		(15)
+#define PL080_CONTROL_SB_SIZE_MASK		(0x7 << 12)
+#define PL080_CONTROL_SB_SIZE_SHIFT		(12)
+#define PL080_CONTROL_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define PL080_CONTROL_TRANSFER_SIZE_SHIFT	(0)
+
+#define PL080_BSIZE_1				(0x0)
+#define PL080_BSIZE_4				(0x1)
+#define PL080_BSIZE_8				(0x2)
+#define PL080_BSIZE_16				(0x3)
+#define PL080_BSIZE_32				(0x4)
+#define PL080_BSIZE_64				(0x5)
+#define PL080_BSIZE_128				(0x6)
+#define PL080_BSIZE_256				(0x7)
+
+#define PL080_WIDTH_8BIT			(0x0)
+#define PL080_WIDTH_16BIT			(0x1)
+#define PL080_WIDTH_32BIT			(0x2)
+
+#define PL080_CONFIG_HALT			(1 << 18)
+#define PL080_CONFIG_ACTIVE			(1 << 17)  /* RO */
+#define PL080_CONFIG_LOCK			(1 << 16)
+#define PL080_CONFIG_TC_IRQ_MASK		(1 << 15)
+#define PL080_CONFIG_ERR_IRQ_MASK		(1 << 14)
+#define PL080_CONFIG_FLOW_CONTROL_MASK		(0x7 << 11)
+#define PL080_CONFIG_FLOW_CONTROL_SHIFT		(11)
+#define PL080_CONFIG_DST_SEL_MASK		(0xf << 6)
+#define PL080_CONFIG_DST_SEL_SHIFT		(6)
+#define PL080_CONFIG_SRC_SEL_MASK		(0xf << 1)
+#define PL080_CONFIG_SRC_SEL_SHIFT		(1)
+#define PL080_CONFIG_ENABLE			(1 << 0)
+
+#define PL080_FLOW_MEM2MEM			(0x0)
+#define PL080_FLOW_MEM2PER			(0x1)
+#define PL080_FLOW_PER2MEM			(0x2)
+#define PL080_FLOW_SRC2DST			(0x3)
+#define PL080_FLOW_SRC2DST_DST			(0x4)
+#define PL080_FLOW_MEM2PER_PER			(0x5)
+#define PL080_FLOW_PER2MEM_PER			(0x6)
+#define PL080_FLOW_SRC2DST_SRC			(0x7)
+
+/* DMA linked list chain structure */
+
+struct pl080_lli {
+	u32	src_addr;
+	u32	dst_addr;
+	u32	next_lli;
+	u32	control0;
+};
+
+struct pl080s_lli {
+	u32	src_addr;
+	u32	dst_addr;
+	u32	next_lli;
+	u32	control0;
+	u32	control1;
+};
+
diff --git a/arch/arm/mach-s3c6400/include/mach/dma.h b/arch/arm/mach-s3c6400/include/mach/dma.h
index 9771ac2..1067619 100644
--- a/arch/arm/mach-s3c6400/include/mach/dma.h
+++ b/arch/arm/mach-s3c6400/include/mach/dma.h
@@ -11,6 +11,63 @@
 #ifndef __ASM_ARCH_DMA_H
 #define __ASM_ARCH_DMA_H __FILE__
 
-/* currently nothing here, placeholder */
+#define S3C_DMA_CHANNELS	(16)
+
+/* see mach-s3c2410/dma.h for notes on dma channel numbers */
+
+/* Note, for the S3C64XX architecture we keep the DMACH_
+ * defines in the order they are allocated to [S]DMA0/[S]DMA1
+ * so that is easy to do DHACH_ -> DMA controller conversion
+ */
+enum dma_ch {
+	/* DMA0/SDMA0 */
+	DMACH_UART0 = 0,
+	DMACH_UART0_SRC2,
+	DMACH_UART1,
+	DMACH_UART1_SRC2,
+	DMACH_UART2,
+	DMACH_UART2_SRC2,
+	DMACH_UART3,
+	DMACH_UART3_SRC2,
+	DMACH_PCM0_TX,
+	DMACH_PCM0_RX,
+	DMACH_I2S0_OUT,
+	DMACH_I2S0_IN,
+	DMACH_SPI0_TX,
+	DMACH_SPI0_RX,
+	DMACH_HSI_I2SV40_TX,
+	DMACH_HSI_I2SV40_RX,
+
+	/* DMA1/SDMA1 */
+	DMACH_PCM1_TX = 16,
+	DMACH_PCM1_RX,
+	DMACH_I2S1_OUT,
+	DMACH_I2S1_IN,
+	DMACH_SPI1_TX,
+	DMACH_SPI1_RX,
+	DMACH_AC97_PCMOUT,
+	DMACH_AC97_PCMIN,
+	DMACH_AC97_MICIN,
+	DMACH_PWM,
+	DMACH_IRDA,
+	DMACH_EXTERNAL,
+	DMACH_RES1,
+	DMACH_RES2,
+	DMACH_SECURITY_RX,	/* SDMA1 only */
+	DMACH_SECURITY_TX,	/* SDMA1 only */
+	DMACH_MAX		/* the end */
+};
+
+static __inline__ int s3c_dma_has_circular(void)
+{
+	/* we will be supporting ciruclar buffers as soon as we have DMA
+	 * engine support.
+	 */
+	return 1;
+}
+
+#define S3C2410_DMAF_CIRCULAR		(1 << 0)
+
+#include <plat/dma.h>
 
 #endif /* __ASM_ARCH_IRQ_H */
diff --git a/arch/arm/plat-s3c64xx/Kconfig b/arch/arm/plat-s3c64xx/Kconfig
index 54375a0..9ce6615 100644
--- a/arch/arm/plat-s3c64xx/Kconfig
+++ b/arch/arm/plat-s3c64xx/Kconfig
@@ -38,6 +38,10 @@ config CPU_S3C6400_CLOCK
 	  Common clock support code for the S3C6400 that is shared
 	  by other CPUs in the series, such as the S3C6410.
 
+config S3C64XX_DMA
+	bool "S3C64XX DMA"
+	select S3C_DMA
+
 # platform specific device setup
 
 config S3C64XX_SETUP_I2C0
diff --git a/arch/arm/plat-s3c64xx/Makefile b/arch/arm/plat-s3c64xx/Makefile
index 2e6d79b..61bdb67 100644
--- a/arch/arm/plat-s3c64xx/Makefile
+++ b/arch/arm/plat-s3c64xx/Makefile
@@ -24,6 +24,10 @@ obj-y				+= gpiolib.o
 obj-$(CONFIG_CPU_S3C6400_INIT)	+= s3c6400-init.o
 obj-$(CONFIG_CPU_S3C6400_CLOCK)	+= s3c6400-clock.o
 
+# DMA support
+
+obj-$(CONFIG_S3C64XX_DMA)	+= dma.o
+
 # Device setup
 
 obj-$(CONFIG_S3C64XX_SETUP_I2C0) += setup-i2c0.o
diff --git a/arch/arm/plat-s3c64xx/dma.c b/arch/arm/plat-s3c64xx/dma.c
new file mode 100644
index 0000000..1c0f51c
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/dma.c
@@ -0,0 +1,722 @@
+/* linux/arch/arm/plat-s3c64xx/dma.c
+ *
+ * Copyright 2009 Openmoko, Inc.
+ * Copyright 2009 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX DMA core
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmapool.h>
+#include <linux/sysdev.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <mach/dma.h>
+#include <mach/map.h>
+#include <mach/irqs.h>
+
+#include <plat/dma-plat.h>
+#include <plat/regs-sys.h>
+
+#include <asm/hardware/pl080.h>
+
+/* dma channel state information */
+
+struct s3c64xx_dmac {
+	struct sys_device	 sysdev;
+	struct clk		*clk;
+	void __iomem		*regs;
+	struct s3c2410_dma_chan *channels;
+	enum dma_ch		 chanbase;
+};
+
+/* pool to provide LLI buffers */
+static struct dma_pool *dma_pool;
+
+/* Debug configuration and code */
+
+static unsigned char debug_show_buffs = 0;
+
+static void dbg_showchan(struct s3c2410_dma_chan *chan)
+{
+	pr_debug("DMA%d: %08x->%08x L %08x C %08x,%08x S %08x\n",
+		 chan->number,
+		 readl(chan->regs + PL080_CH_SRC_ADDR),
+		 readl(chan->regs + PL080_CH_DST_ADDR),
+		 readl(chan->regs + PL080_CH_LLI),
+		 readl(chan->regs + PL080_CH_CONTROL),
+		 readl(chan->regs + PL080S_CH_CONTROL2),
+		 readl(chan->regs + PL080S_CH_CONFIG));
+}
+
+static void show_lli(struct pl080s_lli *lli)
+{
+	pr_debug("LLI[%p] %08x->%08x, NL %08x C %08x,%08x\n",
+		 lli, lli->src_addr, lli->dst_addr, lli->next_lli,
+		 lli->control0, lli->control1);
+}
+
+static void dbg_showbuffs(struct s3c2410_dma_chan *chan)
+{
+	struct s3c64xx_dma_buff *ptr;
+	struct s3c64xx_dma_buff *end;
+
+	pr_debug("DMA%d: buffs next %p, curr %p, end %p\n",
+		 chan->number, chan->next, chan->curr, chan->end);
+
+	ptr = chan->next;
+	end = chan->end;
+
+	if (debug_show_buffs) {
+		for (; ptr != NULL; ptr = ptr->next) {
+			pr_debug("DMA%d: %08x ",
+				 chan->number, ptr->lli_dma);
+			show_lli(ptr->lli);
+		}
+	}
+}
+
+/* End of Debug */
+
+static struct s3c2410_dma_chan *s3c64xx_dma_map_channel(unsigned int channel)
+{
+	struct s3c2410_dma_chan *chan;
+	unsigned int start, offs;
+
+	start = 0;
+
+	if (channel >= DMACH_PCM1_TX)
+		start = 8;
+
+	for (offs = 0; offs < 8; offs++) {
+		chan = &s3c2410_chans[start + offs];
+		if (!chan->in_use)
+			goto found;
+	}
+
+	return NULL;
+
+found:
+	s3c_dma_chan_map[channel] = chan;
+	return chan;
+}
+
+int s3c2410_dma_config(unsigned int channel, int xferunit)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+
+	if (chan == NULL)
+		return -EINVAL;
+
+	switch (xferunit) {
+	case 1:
+		chan->hw_width = 0;
+		break;
+	case 2:
+		chan->hw_width = 1;
+		break;
+	case 4:
+		chan->hw_width = 2;
+		break;
+	default:
+		printk(KERN_ERR "%s: illegal width %d\n", __func__, xferunit);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(s3c2410_dma_config);
+
+static void s3c64xx_dma_fill_lli(struct s3c2410_dma_chan *chan,
+				 struct pl080s_lli *lli,
+				 dma_addr_t data, int size)
+{
+	dma_addr_t src, dst;
+	u32 control0, control1;
+
+	switch (chan->source) {
+	case S3C2410_DMASRC_HW:
+		src = chan->dev_addr;
+		dst = data;
+		control0 = PL080_CONTROL_SRC_AHB2;
+		control0 |= (u32)chan->hw_width << PL080_CONTROL_SWIDTH_SHIFT;
+		control0 |= 2 << PL080_CONTROL_DWIDTH_SHIFT;
+		control0 |= PL080_CONTROL_DST_INCR;
+		break;
+
+	case S3C2410_DMASRC_MEM:
+		src = data;
+		dst = chan->dev_addr;
+		control0 = PL080_CONTROL_DST_AHB2;
+		control0 |= (u32)chan->hw_width << PL080_CONTROL_DWIDTH_SHIFT;
+		control0 |= 2 << PL080_CONTROL_SWIDTH_SHIFT;
+		control0 |= PL080_CONTROL_SRC_INCR;
+		break;
+	default:
+		BUG();
+	}
+
+	/* note, we do not currently setup any of the burst controls */
+
+	control1 = size >> chan->hw_width;	/* size in no of xfers */
+	control0 |= PL080_CONTROL_PROT_SYS;	/* always in priv. mode */
+	control0 |= PL080_CONTROL_TC_IRQ_EN;	/* always fire IRQ */
+
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+	lli->next_lli = 0;
+	lli->control0 = control0;
+	lli->control1 = control1;
+}
+
+static void s3c64xx_lli_to_regs(struct s3c2410_dma_chan *chan,
+				struct pl080s_lli *lli)
+{
+	void __iomem *regs = chan->regs;
+
+	pr_debug("%s: LLI %p => regs\n", __func__, lli);
+	show_lli(lli);
+
+	writel(lli->src_addr, regs + PL080_CH_SRC_ADDR);
+	writel(lli->dst_addr, regs + PL080_CH_DST_ADDR);
+	writel(lli->next_lli, regs + PL080_CH_LLI);
+	writel(lli->control0, regs + PL080_CH_CONTROL);
+	writel(lli->control1, regs + PL080S_CH_CONTROL2);
+}
+
+static int s3c64xx_dma_start(struct s3c2410_dma_chan *chan)
+{
+	struct s3c64xx_dmac *dmac = chan->dmac;
+	u32 config;
+	u32 bit = chan->bit;
+
+	dbg_showchan(chan);
+
+	pr_debug("%s: clearing interrupts\n", __func__);
+
+	/* clear interrupts */
+	writel(bit, dmac->regs + PL080_TC_CLEAR);
+	writel(bit, dmac->regs + PL080_ERR_CLEAR);
+
+	pr_debug("%s: starting channel\n", __func__);
+
+	config = readl(chan->regs + PL080S_CH_CONFIG);
+	config |= PL080_CONFIG_ENABLE;
+
+	pr_debug("%s: writing config %08x\n", __func__, config);
+	writel(config, chan->regs + PL080S_CH_CONFIG);
+
+	return 0;
+}
+
+static int s3c64xx_dma_stop(struct s3c2410_dma_chan *chan)
+{
+	u32 config;
+	int timeout;
+
+	pr_debug("%s: stopping channel\n", __func__);
+
+	dbg_showchan(chan);
+
+	config = readl(chan->regs + PL080S_CH_CONFIG);
+	config |= PL080_CONFIG_HALT;
+	writel(config, chan->regs + PL080S_CH_CONFIG);
+
+	timeout = 1000;
+	do {
+		config = readl(chan->regs + PL080S_CH_CONFIG);
+		pr_debug("%s: %d - config %08x\n", __func__, timeout, config);
+		if (config & PL080_CONFIG_ACTIVE)
+			udelay(10);
+		else
+			break;
+		} while (--timeout > 0);
+
+	if (config & PL080_CONFIG_ACTIVE) {
+		printk(KERN_ERR "%s: channel still active\n", __func__);
+		return -EFAULT;
+	}
+
+	config = readl(chan->regs + PL080S_CH_CONFIG);
+	config &= ~PL080_CONFIG_ENABLE;
+	writel(config, chan->regs + PL080S_CH_CONFIG);
+
+	return 0;
+}
+
+static inline void s3c64xx_dma_bufffdone(struct s3c2410_dma_chan *chan,
+					 struct s3c64xx_dma_buff *buf,
+					 enum s3c2410_dma_buffresult result)
+{
+	if (chan->callback_fn != NULL)
+		(chan->callback_fn)(chan, buf->pw, 0, result);
+}
+
+static void s3c64xx_dma_freebuff(struct s3c64xx_dma_buff *buff)
+{
+	dma_pool_free(dma_pool, buff->lli, buff->lli_dma);
+	kfree(buff);
+}
+
+static int s3c64xx_dma_flush(struct s3c2410_dma_chan *chan)
+{
+	struct s3c64xx_dma_buff *buff, *next;
+	u32 config;
+
+	dbg_showchan(chan);
+
+	pr_debug("%s: flushing channel\n", __func__);
+
+	config = readl(chan->regs + PL080S_CH_CONFIG);
+	config &= ~PL080_CONFIG_ENABLE;
+	writel(config, chan->regs + PL080S_CH_CONFIG);
+
+	/* dump all the buffers associated with this channel */
+
+	for (buff = chan->curr; buff != NULL; buff = next) {
+		next = buff->next;
+		pr_debug("%s: buff %p (next %p)\n", __func__, buff, buff->next);
+
+		s3c64xx_dma_bufffdone(chan, buff, S3C2410_RES_ABORT);
+		s3c64xx_dma_freebuff(buff);
+	}
+
+	chan->curr = chan->next = chan->end = NULL;
+
+	return 0;
+}
+
+int s3c2410_dma_ctrl(unsigned int channel, enum s3c2410_chan_op op)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+
+	WARN_ON(!chan);
+	if (!chan)
+		return -EINVAL;
+
+	switch (op) {
+	case S3C2410_DMAOP_START:
+		return s3c64xx_dma_start(chan);
+
+	case S3C2410_DMAOP_STOP:
+		return s3c64xx_dma_stop(chan);
+
+	case S3C2410_DMAOP_FLUSH:
+		return s3c64xx_dma_flush(chan);
+
+	/* belive PAUSE/RESUME are no-ops */
+	case S3C2410_DMAOP_PAUSE:
+	case S3C2410_DMAOP_RESUME:
+	case S3C2410_DMAOP_STARTED:
+	case S3C2410_DMAOP_TIMEOUT:
+		return 0;
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(s3c2410_dma_ctrl);
+
+/* s3c2410_dma_enque
+ *
+ */
+
+int s3c2410_dma_enqueue(unsigned int channel, void *id,
+			dma_addr_t data, int size)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+	struct s3c64xx_dma_buff *next;
+	struct s3c64xx_dma_buff *buff;
+	struct pl080s_lli *lli;
+	int ret;
+
+	WARN_ON(!chan);
+	if (!chan)
+		return -EINVAL;
+
+	buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_KERNEL);
+	if (!buff) {
+		printk(KERN_ERR "%s: no memory for buffer\n", __func__);
+		return -ENOMEM;
+	}
+
+	lli = dma_pool_alloc(dma_pool, GFP_KERNEL, &buff->lli_dma);
+	if (!lli) {
+		printk(KERN_ERR "%s: no memory for lli\n", __func__);
+		ret = -ENOMEM;
+		goto err_buff;
+	}
+
+	pr_debug("%s: buff %p, dp %08x lli (%p, %08x) %d\n",
+		 __func__, buff, data, lli, (u32)buff->lli_dma, size);
+
+	buff->lli = lli;
+	buff->pw = id;
+
+	s3c64xx_dma_fill_lli(chan, lli, data, size);
+
+	if ((next = chan->next) != NULL) {
+		struct s3c64xx_dma_buff *end = chan->end;
+		struct pl080s_lli *endlli = end->lli;
+
+		pr_debug("enquing onto channel\n");
+
+		end->next = buff;
+		endlli->next_lli = buff->lli_dma;
+
+		if (chan->flags & S3C2410_DMAF_CIRCULAR) {
+			struct s3c64xx_dma_buff *curr = chan->curr;
+			lli->next_lli = curr->lli_dma;
+		}
+
+		if (next == chan->curr) {
+			writel(buff->lli_dma, chan->regs + PL080_CH_LLI);
+			chan->next = buff;
+		}
+
+		show_lli(endlli);
+		chan->end = buff;
+	} else {
+		pr_debug("enquing onto empty channel\n");
+
+		chan->curr = buff;
+		chan->next = buff;
+		chan->end = buff;
+
+		s3c64xx_lli_to_regs(chan, lli);
+	}
+
+	show_lli(lli);
+
+	dbg_showchan(chan);
+	dbg_showbuffs(chan);
+	return 0;
+
+err_buff:
+	kfree(buff);
+	return ret;
+}
+
+EXPORT_SYMBOL(s3c2410_dma_enqueue);
+
+
+int s3c2410_dma_devconfig(int channel,
+			  enum s3c2410_dmasrc source,
+			  unsigned long devaddr)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+	u32 peripheral;
+	u32 config = 0;
+
+	printk("%s: channel %d, source %d, dev %08lx, chan %p\n",
+		 __func__, channel, source, devaddr, chan);
+
+	WARN_ON(!chan);
+	if (!chan)
+		return -EINVAL;
+
+	peripheral = (chan->peripheral & 0xf);
+	chan->source = source;
+	chan->dev_addr = devaddr;
+
+	pr_debug("%s: peripheral %d\n", __func__, peripheral);
+
+	switch (source) {
+	case S3C2410_DMASRC_HW:
+		config = 2 << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		config |= peripheral << PL080_CONFIG_SRC_SEL_SHIFT;
+		break;
+	case S3C2410_DMASRC_MEM:
+		config = 1 << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		config |= peripheral << PL080_CONFIG_DST_SEL_SHIFT;
+		break;
+	default:
+		printk(KERN_ERR "%s: bad source\n", __func__);
+		return -EINVAL;
+	}
+
+	/* allow TC and ERR interrupts */
+	config |= PL080_CONFIG_TC_IRQ_MASK;
+	config |= PL080_CONFIG_ERR_IRQ_MASK;
+
+	pr_debug("%s: config %08x\n", __func__, config);
+
+	writel(config, chan->regs + PL080S_CH_CONFIG);
+
+	return 0;
+}
+EXPORT_SYMBOL(s3c2410_dma_devconfig);
+
+
+int s3c2410_dma_getposition(unsigned int channel,
+			    dma_addr_t *src, dma_addr_t *dst)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+
+	WARN_ON(!chan);
+	if (!chan)
+		return -EINVAL;
+
+	if (src != NULL)
+		*src = readl(chan->regs + PL080_CH_SRC_ADDR);
+
+	if (dst != NULL)
+		*dst = readl(chan->regs + PL080_CH_DST_ADDR);
+
+	return 0;
+}
+EXPORT_SYMBOL(s3c2410_dma_getposition);
+
+/* s3c2410_request_dma
+ *
+ * get control of an dma channel
+*/
+
+int s3c2410_dma_request(unsigned int channel,
+			struct s3c2410_dma_client *client,
+			void *dev)
+{
+	struct s3c2410_dma_chan *chan;
+	unsigned long flags;
+
+	pr_debug("dma%d: s3c2410_request_dma: client=%s, dev=%p\n",
+		 channel, client->name, dev);
+
+	local_irq_save(flags);
+
+	chan = s3c64xx_dma_map_channel(channel);
+	if (chan == NULL) {
+		local_irq_restore(flags);
+		return -EBUSY;
+	}
+
+	dbg_showchan(chan);
+
+	chan->client = client;
+	chan->in_use = 1;
+	chan->peripheral = channel;
+
+	local_irq_restore(flags);
+
+	/* need to setup */
+
+	pr_debug("%s: channel initialised, %p\n", __func__, chan);
+
+	return chan->number | DMACH_LOW_LEVEL;
+}
+
+EXPORT_SYMBOL(s3c2410_dma_request);
+
+/* s3c2410_dma_free
+ *
+ * release the given channel back to the system, will stop and flush
+ * any outstanding transfers, and ensure the channel is ready for the
+ * next claimant.
+ *
+ * Note, although a warning is currently printed if the freeing client
+ * info is not the same as the registrant's client info, the free is still
+ * allowed to go through.
+*/
+
+int s3c2410_dma_free(unsigned int channel, struct s3c2410_dma_client *client)
+{
+	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+	unsigned long flags;
+
+	if (chan == NULL)
+		return -EINVAL;
+
+	local_irq_save(flags);
+
+	if (chan->client != client) {
+		printk(KERN_WARNING "dma%d: possible free from different client (channel %p, passed %p)\n",
+		       channel, chan->client, client);
+	}
+
+	/* sort out stopping and freeing the channel */
+
+
+	chan->client = NULL;
+	chan->in_use = 0;
+
+	if (!(channel & DMACH_LOW_LEVEL))
+		s3c_dma_chan_map[channel] = NULL;
+
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(s3c2410_dma_free);
+
+
+static void s3c64xx_dma_tcirq(struct s3c64xx_dmac *dmac, int offs)
+{
+	struct s3c2410_dma_chan *chan = dmac->channels + offs;
+
+	/* note, we currently do not bother to work out which buffer
+	 * or buffers have been completed since the last tc-irq. */
+
+	if (chan->callback_fn)
+		(chan->callback_fn)(chan, chan->curr->pw, 0, S3C2410_RES_OK);
+}
+
+static void s3c64xx_dma_errirq(struct s3c64xx_dmac *dmac, int offs)
+{
+	printk(KERN_DEBUG "%s: offs %d\n", __func__, offs);
+}
+
+static irqreturn_t s3c64xx_dma_irq(int irq, void *pw)
+{
+	struct s3c64xx_dmac *dmac = pw;
+	u32 tcstat, errstat;
+	u32 bit;
+	int offs;
+
+	tcstat = readl(dmac->regs + PL080_TC_STATUS);
+	errstat = readl(dmac->regs + PL080_ERR_STATUS);
+
+	for (offs = 0, bit = 1; offs < 8; offs++, bit <<= 1) {
+		if (tcstat & bit) {
+			writel(bit, dmac->regs + PL080_TC_CLEAR);
+			s3c64xx_dma_tcirq(dmac, offs);
+		}
+
+		if (errstat & bit) {
+			s3c64xx_dma_errirq(dmac, offs);
+			writel(bit, dmac->regs + PL080_ERR_CLEAR);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct sysdev_class dma_sysclass = {
+	.name		= "s3c64xx-dma",
+};
+
+static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
+			     int irq, unsigned int base)
+{
+	struct s3c2410_dma_chan *chptr = &s3c2410_chans[chno];
+	struct s3c64xx_dmac *dmac;
+	char clkname[16];
+	void __iomem *regs;
+	void __iomem *regptr;
+	int err, ch;
+
+	dmac = kzalloc(sizeof(struct s3c64xx_dmac), GFP_KERNEL);
+	if (!dmac) {
+		printk(KERN_ERR "%s: failed to alloc mem\n", __func__);
+		return -ENOMEM;
+	}
+
+	dmac->sysdev.id = chno / 8;
+	dmac->sysdev.cls = &dma_sysclass;
+
+	err = sysdev_register(&dmac->sysdev);
+	if (err) {
+		printk(KERN_ERR "%s: failed to register sysdevice\n", __func__);
+		goto err_alloc;
+	}
+
+	regs = ioremap(base, 0x200);
+	if (!regs) {
+		printk(KERN_ERR "%s: failed to ioremap()\n", __func__);
+		err = -ENXIO;
+		goto err_dev;
+	}
+
+	snprintf(clkname, sizeof(clkname), "dma%d", dmac->sysdev.id);
+
+	dmac->clk = clk_get(NULL, clkname);
+	if (IS_ERR(dmac->clk)) {
+		printk(KERN_ERR "%s: failed to get clock %s\n", __func__, clkname);
+		err = PTR_ERR(dmac->clk);
+		goto err_map;
+	}
+
+	clk_enable(dmac->clk);
+
+	dmac->regs = regs;
+	dmac->chanbase = chbase;
+	dmac->channels = chptr;
+
+	err = request_irq(irq, s3c64xx_dma_irq, 0, "DMA", dmac);
+	if (err < 0) {
+		printk(KERN_ERR "%s: failed to get irq\n", __func__);
+		goto err_clk;
+	}
+
+	regptr = regs + PL080_Cx_BASE(0);
+
+	for (ch = 0; ch < 8; ch++, chno++, chptr++) {
+		printk(KERN_INFO "%s: registering DMA %d (%p)\n",
+		       __func__, chno, regptr);
+
+		chptr->bit = 1 << ch;
+		chptr->number = chno;
+		chptr->dmac = dmac;
+		chptr->regs = regptr;
+		regptr += PL008_Cx_STRIDE;
+	}
+
+	/* for the moment, permanently enable the controller */
+	writel(PL080_CONFIG_ENABLE, regs + PL080_CONFIG);
+
+	printk(KERN_INFO "PL080: IRQ %d, at %p\n", irq, regs);
+
+	return 0;
+
+err_clk:
+	clk_disable(dmac->clk);
+	clk_put(dmac->clk);
+err_map:
+	iounmap(regs);
+err_dev:
+	sysdev_unregister(&dmac->sysdev);
+err_alloc:
+	kfree(dmac);
+	return err;
+}
+
+static int __init s3c64xx_dma_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "%s: Registering DMA channels\n", __func__);
+
+	dma_pool = dma_pool_create("DMA-LLI", NULL, 32, 16, 0);
+	if (!dma_pool) {
+		printk(KERN_ERR "%s: failed to create pool\n", __func__);
+		return -ENOMEM;
+	}
+
+	ret = sysdev_class_register(&dma_sysclass);
+	if (ret) {
+		printk(KERN_ERR "%s: failed to create sysclass\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Set all DMA configuration to be DMA, not SDMA */
+	writel(0xffffff, S3C_SYSREG(0x110));
+
+	/* Register standard DMA controlers */
+	s3c64xx_dma_init1(0, DMACH_UART0, IRQ_DMA0, 0x75000000);
+	s3c64xx_dma_init1(8, DMACH_PCM1_TX, IRQ_DMA1, 0x75100000);
+
+	return 0;
+}
+
+arch_initcall(s3c64xx_dma_init);
diff --git a/arch/arm/plat-s3c64xx/include/plat/dma-plat.h b/arch/arm/plat-s3c64xx/include/plat/dma-plat.h
new file mode 100644
index 0000000..0c30dd9
--- /dev/null
+++ b/arch/arm/plat-s3c64xx/include/plat/dma-plat.h
@@ -0,0 +1,70 @@
+/* linux/arch/arm/plat-s3c64xx/include/plat/dma-plat.h
+ *
+ * Copyright 2009 Openmoko, Inc.
+ * Copyright 2009 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * S3C64XX DMA core
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define DMACH_LOW_LEVEL (1<<28) /* use this to specifiy hardware ch no */
+
+struct s3c64xx_dma_buff;
+
+/** s3c64xx_dma_buff - S3C64XX DMA buffer descriptor
+ * @next: Pointer to next buffer in queue or ring.
+ * @pw: Client provided identifier
+ * @lli: Pointer to hardware descriptor this buffer is associated with.
+ * @lli_dma: Hardare address of the descriptor.
+ */
+struct s3c64xx_dma_buff {
+	struct s3c64xx_dma_buff *next;
+
+	void			*pw;
+	struct pl080_lli	*lli;
+	dma_addr_t		 lli_dma;
+};
+
+struct s3c64xx_dmac;
+
+struct s3c2410_dma_chan {
+	unsigned char		 number;      /* number of this dma channel */
+	unsigned char		 in_use;      /* channel allocated */
+	unsigned char		 bit;	      /* bit for enable/disable/etc */
+	unsigned char		 hw_width;
+	unsigned char		 peripheral;
+
+	unsigned int		 flags;
+	enum s3c2410_dmasrc	 source;
+
+
+	dma_addr_t		dev_addr;
+
+	struct s3c2410_dma_client *client;
+	struct s3c64xx_dmac	*dmac;		/* pointer to controller */
+
+	void __iomem		*regs;
+
+	/* cdriver callbacks */
+	s3c2410_dma_cbfn_t	 callback_fn;	/* buffer done callback */
+	s3c2410_dma_opfn_t	 op_fn;		/* channel op callback */
+
+	/* buffer list and information */
+	struct s3c64xx_dma_buff	*curr;		/* current dma buffer */
+	struct s3c64xx_dma_buff	*next;		/* next buffer to load */
+	struct s3c64xx_dma_buff	*end;		/* end of queue */
+
+	/* note, when channel is running in circular mode, curr is the
+	 * first buffer enqueued, end is the last and curr is where the
+	 * last buffer-done event is set-at. The buffers are not freed
+	 * and the last buffer hardware descriptor points back to the
+	 * first.
+	 */
+};
+
+#include <plat/dma-core.h>
-- 
cgit v0.10.2


From 0b13406a1f1928ec71e81dde52cb62d72ffd28ef Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 14:58:40 +0100
Subject: [ARM] S3C64XX: Lower severity of DMA logging

The message was missing a severity macro so pick pr_debug().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c64xx/dma.c b/arch/arm/plat-s3c64xx/dma.c
index 1c0f51c..67aa93d 100644
--- a/arch/arm/plat-s3c64xx/dma.c
+++ b/arch/arm/plat-s3c64xx/dma.c
@@ -419,7 +419,7 @@ int s3c2410_dma_devconfig(int channel,
 	u32 peripheral;
 	u32 config = 0;
 
-	printk("%s: channel %d, source %d, dev %08lx, chan %p\n",
+	pr_debug("%s: channel %d, source %d, dev %08lx, chan %p\n",
 		 __func__, channel, source, devaddr, chan);
 
 	WARN_ON(!chan);
-- 
cgit v0.10.2


From bac9caf016bf147af7d3afbe7580a7f773cb1566 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sun, 17 May 2009 18:18:58 -0700
Subject: asm-generic: fix local_add_unless macro

`local_add_unless(x, y, z)' will be expanded to `(&(x)->y, (y), (x))', but
`&(x)->y' should be `&(x)->a'

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index dbd6150..fc21844 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -42,7 +42,7 @@ typedef struct
 
 #define local_cmpxchg(l, o, n) atomic_long_cmpxchg((&(l)->a), (o), (n))
 #define local_xchg(l, n) atomic_long_xchg((&(l)->a), (n))
-#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u))
+#define local_add_unless(l, _a, u) atomic_long_add_unless((&(l)->a), (_a), (u))
 #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a)
 
 /* Non-atomic variants, ie. preemption disabled and won't be touched
-- 
cgit v0.10.2


From dbf8c11f821b6ff83302c34f2403b4f7231f50ae Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <jacmet@sunsite.dk>
Date: Mon, 18 May 2009 11:13:54 +0100
Subject: mtd_dataflash: unbreak erase support

Commit 5b7f3a50 (fix dataflash 64-bit divisions) unfortunately
introduced a typo. Erase addr and len were swapped in the pageaddr
calculation, causing the wrong sectors to get erased.

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 62dee54..43976aa 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -178,7 +178,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 		/* Calculate flash page address; use block erase (for speed) if
 		 * we're at a block boundary and need to erase the whole block.
 		 */
-		pageaddr = div_u64(instr->len, priv->page_size);
+		pageaddr = div_u64(instr->addr, priv->page_size);
 		do_block = (pageaddr & 0x7) == 0 && instr->len >= blocksize;
 		pageaddr = pageaddr << priv->page_offset;
 
-- 
cgit v0.10.2


From 80193195f87ebca6d7417516d6edeb3969631c15 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 18 May 2009 13:07:35 +0100
Subject: Fix oops on close of hot-unplugged FTDI serial converter

Commit c45d6320 ("fix reference counting of ftdi_private") stopped
ftdi_sio_port_remove() from directly freeing the port-private data, with
the intention if the port was still open, it would be freed when
ftdi_close() is eventually called and releases the last refcount on the
structure.

That's all very well, but ftdi_sio_port_remove() still contains a call
to usb_set_serial_port_data(port, NULL) -- so by the time we get to
ftdi_close() for the port which was unplugged, it _still_ oopses on
dereferencing that NULL pointer, as it did before (and does in 2.6.29).

The fix is just not to clear the private data in ftdi_sio_port_remove().
Then the refcount is properly reduced to zero when the final kref_put()
happens in ftdi_close().

Remove a bogus comment too, while we're at it. And stop doing things
inside "if (priv)" -- it must _always_ be there.

Based loosely on an earlier patch by Daniel Mack, and suggestions by
Alan Stern.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Tested-by: Daniel Mack <daniel@caiaq.de>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 0ab8474b..d9fcdae 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1487,14 +1487,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port)
 
 	remove_sysfs_attrs(port);
 
-	/* all open ports are closed at this point
-	 *    (by usbserial.c:__serial_close, which calls ftdi_close)
-	 */
-
-	if (priv) {
-		usb_set_serial_port_data(port, NULL);
-		kref_put(&priv->kref, ftdi_sio_priv_release);
-	}
+	kref_put(&priv->kref, ftdi_sio_priv_release);
 
 	return 0;
 }
-- 
cgit v0.10.2


From f7a9a4d782628b4bd2392d931b95bd49732a2918 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Fri, 8 May 2009 08:59:51 +0100
Subject: [ARM] 5510/1: U300 GPIO debug and init fixes

This moves the GPIO driver away from using __devinit and __devexit
It also removes all printk() in favor of using dev_* print macros
on pdev->dev struct instead. Surplus prints are removed, and the
platform_device_probe() function is used instead of putting a
.probe function in the platform driver struct.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/gpio.c b/arch/arm/mach-u300/gpio.c
index 2d5ae8e..308cdb1 100644
--- a/arch/arm/mach-u300/gpio.c
+++ b/arch/arm/mach-u300/gpio.c
@@ -36,6 +36,7 @@ static struct clk *clk;
 /* Memory resource */
 static struct resource *memres;
 static void __iomem *virtbase;
+static struct device *gpiodev;
 
 struct u300_gpio_port {
 	const char *name;
@@ -264,8 +265,8 @@ static struct gpio_struct gpio_pin[U300_GPIO_MAX];
 int gpio_register_callback(unsigned gpio, int (*func)(void *arg), void *data)
 {
 	if (gpio_pin[gpio].callback)
-		printk(KERN_WARNING "GPIO: %s: WARNING: callback already " \
-		       "registered for gpio pin#%d\n", __func__, gpio);
+		dev_warn(gpiodev, "%s: WARNING: callback already "
+			 "registered for gpio pin#%d\n", __func__, gpio);
 	gpio_pin[gpio].callback = func;
 	gpio_pin[gpio].data = data;
 
@@ -276,8 +277,8 @@ EXPORT_SYMBOL(gpio_register_callback);
 int gpio_unregister_callback(unsigned gpio)
 {
 	if (!gpio_pin[gpio].callback)
-		printk(KERN_WARNING "GPIO: %s: WARNING: callback already " \
-		       "unregistered for gpio pin#%d\n", __func__, gpio);
+		dev_warn(gpiodev, "%s: WARNING: callback already "
+			 "unregistered for gpio pin#%d\n", __func__, gpio);
 	gpio_pin[gpio].callback = NULL;
 	gpio_pin[gpio].data = NULL;
 
@@ -303,8 +304,8 @@ void gpio_free(unsigned gpio)
 	gpio_users--;
 	gpio_pin[gpio].users--;
 	if (unlikely(gpio_pin[gpio].users < 0)) {
-		printk(KERN_WARNING "GPIO: Warning: gpio#%d release mismatch\n",
-				gpio);
+		dev_warn(gpiodev, "warning: gpio#%d release mismatch\n",
+			 gpio);
 		gpio_pin[gpio].users = 0;
 	}
 
@@ -492,7 +493,7 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
 		if (gpio_pin[gpio].callback)
 			(void)gpio_pin[gpio].callback(gpio_pin[gpio].data);
 		else
-			printk(KERN_DEBUG "GPIO: Stray GPIO IRQ on line %d\n",
+			dev_dbg(gpiodev, "stray GPIO IRQ on line %d\n",
 			       gpio);
 	}
 	return IRQ_HANDLED;
@@ -548,25 +549,26 @@ static void gpio_set_initial_values(void)
 #endif
 }
 
-static int __devinit gpio_probe(struct platform_device *pdev)
+static int __init gpio_probe(struct platform_device *pdev)
 {
 	u32 val;
 	int err = 0;
 	int i;
 	int num_irqs;
 
+	gpiodev = &pdev->dev;
 	memset(gpio_pin, 0, sizeof(gpio_pin));
 
 	/* Get GPIO clock */
 	clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(clk)) {
 		err = PTR_ERR(clk);
-		printk(KERN_ERR "GPIO: could not get GPIO clock\n");
+		dev_err(gpiodev, "could not get GPIO clock\n");
 		goto err_no_clk;
 	}
 	err = clk_enable(clk);
 	if (err) {
-		printk(KERN_ERR "GPIO: could not enable GPIO clock\n");
+		dev_err(gpiodev, "could not enable GPIO clock\n");
 		goto err_no_clk_enable;
 	}
 
@@ -580,22 +582,24 @@ static int __devinit gpio_probe(struct platform_device *pdev)
 		goto err_no_ioregion;
 	}
 
-	virtbase = ioremap(memres->start, memres->end - memres->start + 1);
+	virtbase = ioremap(memres->start, resource_size(memres));
 	if (!virtbase) {
 		err = -ENOMEM;
 		goto err_no_ioremap;
 	}
+	dev_info(gpiodev, "remapped 0x%08x to %p\n",
+		 memres->start, virtbase);
 
 #ifdef U300_COH901335
-	printk(KERN_INFO "GPIO: Initializing GPIO Controller COH 901 335\n");
+	dev_info(gpiodev, "initializing GPIO Controller COH 901 335\n");
 	/* Turn on the GPIO block */
 	writel(U300_GPIO_CR_BLOCK_CLOCK_ENABLE, virtbase + U300_GPIO_CR);
 #endif
 
 #ifdef U300_COH901571_3
-	printk(KERN_INFO "GPIO: Initializing GPIO Controller COH 901 571/3\n");
+	dev_info(gpiodev, "initializing GPIO Controller COH 901 571/3\n");
 	val = readl(virtbase + U300_GPIO_CR);
-	printk(KERN_INFO "GPIO: COH901571/3 block version: %d, " \
+	dev_info(gpiodev, "COH901571/3 block version: %d, " \
 	       "number of cores: %d\n",
 	       ((val & 0x0000FE00) >> 9),
 	       ((val & 0x000001FC) >> 2));
@@ -623,15 +627,14 @@ static int __devinit gpio_probe(struct platform_device *pdev)
 				  gpio_ports[num_irqs].name,
 				  &gpio_ports[num_irqs]);
 		if (err) {
-			printk(KERN_CRIT "GPIO: Cannot allocate IRQ for %s!\n",
-			       gpio_ports[num_irqs].name);
+			dev_err(gpiodev, "cannot allocate IRQ for %s!\n",
+				gpio_ports[num_irqs].name);
 			goto err_no_irq;
 		}
 		/* Turns off PortX_irq_force */
 		writel(0x0, virtbase + U300_GPIO_PXIFR +
 				 num_irqs * U300_GPIO_PORTX_SPACING);
 	}
-	printk(KERN_INFO "GPIO: U300 gpio module loaded\n");
 
 	return 0;
 
@@ -647,11 +650,11 @@ static int __devinit gpio_probe(struct platform_device *pdev)
  err_no_clk_enable:
 	clk_put(clk);
  err_no_clk:
-	printk(KERN_INFO "GPIO: module ERROR:%d\n", err);
+	dev_info(gpiodev, "module ERROR:%d\n", err);
 	return err;
 }
 
-static int __devexit gpio_remove(struct platform_device *pdev)
+static int __exit gpio_remove(struct platform_device *pdev)
 {
 	int i;
 
@@ -670,14 +673,13 @@ static struct platform_driver gpio_driver = {
 	.driver		= {
 		.name	= "u300-gpio",
 	},
-	.probe		= gpio_probe,
-	.remove		= __devexit_p(gpio_remove),
+	.remove		= __exit_p(gpio_remove),
 };
 
 
 static int __init u300_gpio_init(void)
 {
-	return platform_driver_register(&gpio_driver);
+	return platform_driver_probe(&gpio_driver, gpio_probe);
 }
 
 static void __exit u300_gpio_exit(void)
-- 
cgit v0.10.2


From 0351c506af1443570194278a7f17414e212a4a74 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Sat, 9 May 2009 02:39:41 +0100
Subject: [ARM] 5511/1: Add w90p910 rtc,kpi and usbd device map_desc define.

Add rtc,kpi and usbd device map_desc define in
/arch/arm/mach-w90x900/w90p910.c.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/w90p910.c b/arch/arm/mach-w90x900/w90p910.c
index d2e4a16..87214b8 100644
--- a/arch/arm/mach-w90x900/w90p910.c
+++ b/arch/arm/mach-w90x900/w90p910.c
@@ -46,6 +46,9 @@ static struct map_desc w90p910_iodesc[] __initdata = {
 	IODESC_ENT(USBEHCIHOST),
 	IODESC_ENT(USBOHCIHOST),
 	IODESC_ENT(ADC),
+	IODESC_ENT(RTC),
+	IODESC_ENT(KPI),
+	IODESC_ENT(USBDEV),
 	/*IODESC_ENT(LCD),*/
 };
 
-- 
cgit v0.10.2


From e1baa011a7160b491896b577d4f01b696be308fa Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Sat, 9 May 2009 02:41:01 +0100
Subject: [ARM] 5512/1: Add w90p910 rtc,kpi and usbd device platform_device
 define.

Add rtc,kpi and usbd device platform_device define
in arch/arm/mach-w90x900/mach-w90p910evb.c.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/mach-w90p910evb.c b/arch/arm/mach-w90x900/mach-w90p910evb.c
index 26de71f..1968ed4 100644
--- a/arch/arm/mach-w90x900/mach-w90p910evb.c
+++ b/arch/arm/mach-w90x900/mach-w90p910evb.c
@@ -158,6 +158,76 @@ struct platform_device w90x900_device_ts = {
 };
 EXPORT_SYMBOL(w90x900_device_ts);
 
+/* RTC controller*/
+
+static struct resource w90x900_rtc_resource[] = {
+	[0] = {
+		.start = W90X900_PA_RTC,
+		.end   = W90X900_PA_RTC + 0xff,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_RTC,
+		.end   = IRQ_RTC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device w90x900_device_rtc = {
+	.name		= "w90x900-rtc",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(w90x900_rtc_resource),
+	.resource	= w90x900_rtc_resource,
+};
+EXPORT_SYMBOL(w90x900_device_rtc);
+
+/* KPI controller*/
+
+static struct resource w90x900_kpi_resource[] = {
+	[0] = {
+		.start = W90X900_PA_KPI,
+		.end   = W90X900_PA_KPI + W90X900_SZ_KPI - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_KPI,
+		.end   = IRQ_KPI,
+		.flags = IORESOURCE_IRQ,
+	}
+
+};
+
+struct platform_device w90x900_device_kpi = {
+	.name		= "w90x900-kpi",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(w90x900_kpi_resource),
+	.resource	= w90x900_kpi_resource,
+};
+EXPORT_SYMBOL(w90x900_device_kpi);
+
+/* USB Device (Gadget)*/
+
+static struct resource w90x900_usbgadget_resource[] = {
+	[0] = {
+		.start = W90X900_PA_USBDEV,
+		.end   = W90X900_PA_USBDEV + W90X900_SZ_USBDEV - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_USBD,
+		.end   = IRQ_USBD,
+		.flags = IORESOURCE_IRQ,
+	}
+};
+
+struct platform_device w90x900_device_usbgadget = {
+	.name		= "w90x900-usbgadget",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(w90x900_usbgadget_resource),
+	.resource	= w90x900_usbgadget_resource,
+};
+EXPORT_SYMBOL(w90x900_device_usbgadget);
+
 static struct map_desc w90p910_iodesc[] __initdata = {
 };
 
@@ -169,6 +239,9 @@ static struct platform_device *w90p910evb_dev[] __initdata = {
 	&w90x900_device_usb_ehci,
 	&w90x900_device_usb_ohci,
 	&w90x900_device_ts,
+	&w90x900_device_rtc,
+	&w90x900_device_kpi,
+	&w90x900_device_usbgadget,
 };
 
 static void __init w90p910evb_map_io(void)
-- 
cgit v0.10.2


From 982db66352d31892f624390cfb64a1cea5df765a Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Mon, 18 May 2009 17:29:30 +0100
Subject: [ARM] 5518/1: versatile: don't put clock lookups in __initdata

Remove the __initdata annotation for the clock lookups, since they
will be needed when loading modules which use clk_get().

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 1f929c3..b3bebcc 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -413,7 +413,7 @@ static struct clk ref24_clk = {
 	.rate	= 24000000,
 };
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	{	/* UART0 */
 		.dev_id		= "dev:f1",
 		.clk		= &ref24_clk,
-- 
cgit v0.10.2


From a93ea9b357a4d4fce9a1f65bf9c152fb67c30716 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Mon, 18 May 2009 17:26:08 +0100
Subject: [ARM] 5517/1: integrator: don't put clock lookups in __initdata

Remove the __initdata annotation for the clock lookups, since they
will be needed when loading modules which use clk_get().

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c
index 6f88729..a0f60e5 100644
--- a/arch/arm/mach-integrator/core.c
+++ b/arch/arm/mach-integrator/core.c
@@ -121,7 +121,7 @@ static struct clk uartclk = {
 	.rate	= 14745600,
 };
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	{	/* UART0 */
 		.dev_id		= "mb:16",
 		.clk		= &uartclk,
-- 
cgit v0.10.2


From 373e9644c5f557bc8992036f9a9281e9d98aef40 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 18 May 2009 19:40:18 +0100
Subject: [ARM] S3C24XX: GPIO: Fix error returns from gpio functions

Several GPIO functions have been returning -1 to indicate
an error instead of returning a proper error code. Change
to return -EINVAL for invalid argument(s).

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2400/gpio.c b/arch/arm/mach-s3c2400/gpio.c
index 81ebe2f..6c68e78 100644
--- a/arch/arm/mach-s3c2400/gpio.c
+++ b/arch/arm/mach-s3c2400/gpio.c
@@ -33,8 +33,8 @@
 
 int s3c2400_gpio_getirq(unsigned int pin)
 {
-	if (pin < S3C2410_GPE(0) || pin > S3C2400_GPE7_EINT7)
-		return -1;  /* not valid interrupts */
+	if (pin < S3C2410_GPE(0) || pin > S3C2400_GPE(7))
+		return -EINVAL;  /* not valid interrupts */
 
 	return (pin - S3C2410_GPE(0)) + IRQ_EINT0;
 }
diff --git a/arch/arm/mach-s3c2410/gpio.c b/arch/arm/mach-s3c2410/gpio.c
index bf7fbfe..7974afc 100644
--- a/arch/arm/mach-s3c2410/gpio.c
+++ b/arch/arm/mach-s3c2410/gpio.c
@@ -40,7 +40,7 @@ int s3c2410_gpio_irqfilter(unsigned int pin, unsigned int on,
 	unsigned long val;
 
 	if (pin < S3C2410_GPG(8) || pin > S3C2410_GPG(15))
-		return -1;
+		return -EINVAL;
 
 	config &= 0xff;
 
diff --git a/arch/arm/plat-s3c24xx/gpio.c b/arch/arm/plat-s3c24xx/gpio.c
index ae0b9d6..95df059 100644
--- a/arch/arm/plat-s3c24xx/gpio.c
+++ b/arch/arm/plat-s3c24xx/gpio.c
@@ -184,10 +184,10 @@ EXPORT_SYMBOL(s3c2410_modify_misccr);
 int s3c2410_gpio_getirq(unsigned int pin)
 {
 	if (pin < S3C2410_GPF(0) || pin > S3C2410_GPG(15))
-		return -1;	/* not valid interrupts */
+		return -EINVAL;	/* not valid interrupts */
 
 	if (pin < S3C2410_GPG(0) && pin > S3C2410_GPF(7))
-		return -1;	/* not valid pin */
+		return -EINVAL;	/* not valid pin */
 
 	if (pin < S3C2410_GPF(4))
 		return (pin - S3C2410_GPF(0)) + IRQ_EINT0;
-- 
cgit v0.10.2


From 9c0ec95797052af84b7579e2a00ea212cebf71a8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 18 May 2009 20:03:23 +0100
Subject: [ARM] S3C24XX: GPIO: Move gpiolib initialisation earlier

The arch_initcall() is too late for board initialisation to use
gpiolib when doing their machine specific initilisation via the
.init_machine callback.

Bring the file into line with the s3c64xx implementation and use
the core_initcall() to register the GPIO chips.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index a82adc3..9a2d114 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -165,4 +165,4 @@ static __init int s3c24xx_gpiolib_init(void)
 	return 0;
 }
 
-arch_initcall(s3c24xx_gpiolib_init);
+core_initcall(s3c24xx_gpiolib_init);
-- 
cgit v0.10.2


From 2b69a8a2b6e5f5d26a038a6494a88a1a776ac88f Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsi.com>
Date: Mon, 18 May 2009 12:57:24 -0600
Subject: [SCSI] mpt2sas: fix driver version inconsistency

In Commit

commit 3b8b5c9b1f08660583e5dfe095c24170df62f1d2
Author: Eric Moore <eric.moore@lsi.com>
Date:   Tue Apr 21 15:44:27 2009 -0600

    [SCSI] mpt2sas : bump driver version to 01.100.02.00

The MPT2SAS_MAJOR_VERSION didn't get bumped from 00 to 01 so
applications will see it incorrectly as 00.100.02.00 driver instead of
01.100.02.00.  Fix by making MPT2SAS_MAJOR_VERSION match the major
number in MPT2SAS_DRIVER_VERSION

Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index babd4cc..36b1d10 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -69,7 +69,7 @@
 #define MPT2SAS_AUTHOR	"LSI Corporation <DL-MPTFusionLinux@lsi.com>"
 #define MPT2SAS_DESCRIPTION	"LSI MPT Fusion SAS 2.0 Device Driver"
 #define MPT2SAS_DRIVER_VERSION		"01.100.02.00"
-#define MPT2SAS_MAJOR_VERSION		00
+#define MPT2SAS_MAJOR_VERSION		01
 #define MPT2SAS_MINOR_VERSION		100
 #define MPT2SAS_BUILD_VERSION		02
 #define MPT2SAS_RELEASE_VERSION		00
-- 
cgit v0.10.2


From 5233c178d4cedad0acd0eb0f24b2e09f7fafaa1c Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 18 May 2009 20:10:43 +0100
Subject: [ARM] S3C24XX: GPIO: Fix missing GPIOs in gpiolib

The GPG bank has 16 IOs, not 10. Add the missing GPH bank.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/plat-s3c24xx/gpiolib.c b/arch/arm/plat-s3c24xx/gpiolib.c
index 9a2d114..6d7a961 100644
--- a/arch/arm/plat-s3c24xx/gpiolib.c
+++ b/arch/arm/plat-s3c24xx/gpiolib.c
@@ -148,9 +148,18 @@ struct s3c_gpio_chip s3c24xx_gpios[] = {
 			.base			= S3C2410_GPG(0),
 			.owner			= THIS_MODULE,
 			.label			= "GPIOG",
-			.ngpio			= 10,
+			.ngpio			= 16,
 			.to_irq			= s3c24xx_gpiolib_bankg_toirq,
 		},
+	}, {
+		.base	= S3C2410_GPHCON,
+		.pm	= __gpio_pm(&s3c_gpio_pm_2bit),
+		.chip	= {
+			.base			= S3C2410_GPH(0),
+			.owner			= THIS_MODULE,
+			.label			= "GPIOH",
+			.ngpio			= 11,
+		},
 	},
 };
 
-- 
cgit v0.10.2


From 7a05a2cbd317cbd16e6d1689bb1e5e208eaaba6f Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 18 May 2009 20:15:01 +0100
Subject: [ARM] S3C24XX: GPIO: Change usb-simtec.c to use gpiolib.

Make arch/arm/mach-s3c2410/usb-simtec.c use gpiolib to
manage gpio access.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 4964637..31af898 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -594,8 +594,6 @@ static void __init bast_map_io(void)
 	s3c24xx_init_io(bast_iodesc, ARRAY_SIZE(bast_iodesc));
 	s3c24xx_init_clocks(0);
 	s3c24xx_init_uarts(bast_uartcfgs, ARRAY_SIZE(bast_uartcfgs));
-
-	usb_simtec_init();
 }
 
 static void __init bast_init(void)
@@ -609,6 +607,7 @@ static void __init bast_init(void)
 	i2c_register_board_info(0, bast_i2c_devs,
 				ARRAY_SIZE(bast_i2c_devs));
 
+	usb_simtec_init();
 	nor_simtec_init();
 }
 
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index dd45eb4..6cd9377 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -22,6 +22,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/gpio.h>
 #include <linux/io.h>
 
 #include <asm/mach/arch.h>
@@ -30,7 +31,6 @@
 
 #include <mach/bast-map.h>
 #include <mach/bast-irq.h>
-#include <mach/regs-gpio.h>
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
@@ -54,9 +54,9 @@ usb_simtec_powercontrol(int port, int to)
 	power_state[port] = to;
 
 	if (power_state[0] && power_state[1])
-		s3c2410_gpio_setpin(S3C2410_GPB(4), 0);
+		gpio_set_value(S3C2410_GPB(4), 0);
 	else
-		s3c2410_gpio_setpin(S3C2410_GPB(4), 1);
+		gpio_set_value(S3C2410_GPB(4), 1);
 }
 
 static irqreturn_t
@@ -64,7 +64,7 @@ usb_simtec_ocirq(int irq, void *pw)
 {
 	struct s3c2410_hcd_info *info = pw;
 
-	if (s3c2410_gpio_getpin(S3C2410_GPG(10)) == 0) {
+	if (gpio_get_value(S3C2410_GPG(10)) == 0) {
 		pr_debug("usb_simtec: over-current irq (oc detected)\n");
 		s3c2410_usb_report_oc(info, 3);
 	} else {
@@ -107,10 +107,27 @@ static struct s3c2410_hcd_info usb_simtec_info = {
 
 int usb_simtec_init(void)
 {
+	int ret;
+
 	printk("USB Power Control, (c) 2004 Simtec Electronics\n");
-	s3c_device_usb.dev.platform_data = &usb_simtec_info;
 
-	s3c2410_gpio_cfgpin(S3C2410_GPB(4), S3C2410_GPIO_OUTPUT);
-	s3c2410_gpio_setpin(S3C2410_GPB(4), 1);
+	ret = gpio_request(S3C2410_GPB(4), "USB power control");
+	if (ret < 0) {
+		pr_err("%s: failed to get GPB4\n", __func__);
+		return ret;
+	}
+
+	ret = gpio_request(S3C2410_GPG(10), "USB overcurrent");
+	if (ret < 0) {
+		pr_err("%s: failed to get GPG10\n", __func__);
+		gpio_free(S3C2410_GPB(4));
+		return ret;
+	}
+
+	/* turn power on */
+	gpio_direction_output(S3C2410_GPB(4), 1);
+	gpio_direction_input(S3C2410_GPG(10));
+
+	s3c_device_usb.dev.platform_data = &usb_simtec_info;
 	return 0;
 }
-- 
cgit v0.10.2


From 75834fc3b6fcff00327f5d2a18760c1e8e0179c5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 18 May 2009 10:26:10 -0400
Subject: SELinux: move SELINUX_MAGIC into magic.h

The selinuxfs superblock magic is used inside the IMA code, but is being
defined in two places and could someday get out of sync.  This patch moves the
declaration into magic.h so it is only done once.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/include/linux/magic.h b/include/linux/magic.h
index 5b4e28b..927138c 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -9,6 +9,7 @@
 #define DEBUGFS_MAGIC          0x64626720
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
+#define SELINUX_MAGIC		0xf97cff8c
 #define TMPFS_MAGIC		0x01021994
 #define SQUASHFS_MAGIC		0x73717368
 #define EFS_SUPER_MAGIC		0x414A53
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index dec6dcb..31d677f 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -49,14 +49,12 @@ struct ima_measure_rule_entry {
  * written in terms of .action, .func, .mask, .fsmagic, and .uid
  */
 static struct ima_measure_rule_entry default_rules[] = {
-	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,
-	 .flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
-	{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,
-	 .flags = IMA_FSMAGIC},
-	{.action = DONT_MEASURE,.fsmagic = 0xF97CFF8C,.flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = SELINUX_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index a7be3f0..ca83579 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -8,14 +8,13 @@
 #ifndef _SELINUX_SECURITY_H_
 #define _SELINUX_SECURITY_H_
 
+#include <linux/magic.h>
 #include "flask.h"
 
 #define SECSID_NULL			0x00000000 /* unspecified SID */
 #define SECSID_WILD			0xffffffff /* wildcard SID */
 #define SECCLASS_NULL			0x0000 /* no class */
 
-#define SELINUX_MAGIC 0xf97cff8c
-
 /* Identify specific policy version changes */
 #define POLICYDB_VERSION_BASE		15
 #define POLICYDB_VERSION_BOOL		16
-- 
cgit v0.10.2


From 7ee2cb7f32b299c2b06a31fde155457203e4b7dd Mon Sep 17 00:00:00 2001
From: Frank Filz <ffilzlnx@us.ibm.com>
Date: Mon, 18 May 2009 17:41:40 -0400
Subject: nfs: Fix NFS v4 client handling of MAY_EXEC in nfs_permission.

The problem is that permission checking is skipped if atomic open is
possible, but when exec opens a file, it just opens it O_READONLY which
means EXEC permission will not be checked at that time.

This problem is observed by the following sequence (executed as root):

  mount -t nfs4 server:/ /mnt4
  echo "ls" >/mnt4/foo
  chmod 744 /mnt4/foo
  su guest -c "mnt4/foo"

Signed-off-by: Frank Filz <ffilzlnx@us.ibm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
Tested-by: Eugene Teo <eugeneteo@kernel.sg>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 370b190..89f98e9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1943,7 +1943,8 @@ int nfs_permission(struct inode *inode, int mask)
 		case S_IFREG:
 			/* NFSv4 has atomic_open... */
 			if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
-					&& (mask & MAY_OPEN))
+					&& (mask & MAY_OPEN)
+					&& !(mask & MAY_EXEC))
 				goto out;
 			break;
 		case S_IFDIR:
-- 
cgit v0.10.2


From 066aa9c1e3d0af52619fe26f05810990ff381d8c Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Wed, 28 Jan 2009 21:32:04 +0200
Subject: omap iommu: omap3 iommu device registration

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/mach-omap2/omap3-iommu.c b/arch/arm/mach-omap2/omap3-iommu.c
new file mode 100644
index 0000000..91ee38a
--- /dev/null
+++ b/arch/arm/mach-omap2/omap3-iommu.c
@@ -0,0 +1,103 @@
+/*
+ * omap iommu: omap3 device registration
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+
+#include <mach/iommu.h>
+
+#define OMAP3_MMU1_BASE	0x480bd400
+#define OMAP3_MMU2_BASE	0x5d000000
+#define OMAP3_MMU1_IRQ	24
+#define OMAP3_MMU2_IRQ	28
+
+
+static unsigned long iommu_base[] __initdata = {
+	OMAP3_MMU1_BASE,
+	OMAP3_MMU2_BASE,
+};
+
+static int iommu_irq[] __initdata = {
+	OMAP3_MMU1_IRQ,
+	OMAP3_MMU2_IRQ,
+};
+
+static const struct iommu_platform_data omap3_iommu_pdata[] __initconst = {
+	{
+		.name = "isp",
+		.nr_tlb_entries = 8,
+		.clk_name = "cam_ick",
+	},
+	{
+		.name = "iva2",
+		.nr_tlb_entries = 32,
+		.clk_name = "iva2_ck",
+	},
+};
+#define NR_IOMMU_DEVICES ARRAY_SIZE(omap3_iommu_pdata)
+
+static struct platform_device *omap3_iommu_pdev[NR_IOMMU_DEVICES];
+
+static int __init omap3_iommu_init(void)
+{
+	int i, err;
+
+	for (i = 0; i < NR_IOMMU_DEVICES; i++) {
+		struct platform_device *pdev;
+		struct resource res[2];
+
+		pdev = platform_device_alloc("omap-iommu", i);
+		if (!pdev) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+
+		memset(res, 0,  sizeof(res));
+		res[0].start = iommu_base[i];
+		res[0].end = iommu_base[i] + MMU_REG_SIZE - 1;
+		res[0].flags = IORESOURCE_MEM;
+		res[1].start = res[1].end = iommu_irq[i];
+		res[1].flags = IORESOURCE_IRQ;
+
+		err = platform_device_add_resources(pdev, res,
+						    ARRAY_SIZE(res));
+		if (err)
+			goto err_out;
+		err = platform_device_add_data(pdev, &omap3_iommu_pdata[i],
+					       sizeof(omap3_iommu_pdata[0]));
+		if (err)
+			goto err_out;
+		err = platform_device_add(pdev);
+		if (err)
+			goto err_out;
+		omap3_iommu_pdev[i] = pdev;
+	}
+	return 0;
+
+err_out:
+	while (i--)
+		platform_device_put(omap3_iommu_pdev[i]);
+	return err;
+}
+module_init(omap3_iommu_init);
+
+static void __exit omap3_iommu_exit(void)
+{
+	int i;
+
+	for (i = 0; i < NR_IOMMU_DEVICES; i++)
+		platform_device_unregister(omap3_iommu_pdev[i]);
+}
+module_exit(omap3_iommu_exit);
+
+MODULE_AUTHOR("Hiroshi DOYU");
+MODULE_DESCRIPTION("omap iommu: omap3 device registration");
+MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From 69d3a84a646d6ad6cd693a7a3d5b9af414113d2c Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Wed, 28 Jan 2009 21:32:08 +0200
Subject: omap iommu: simple virtual address space management

This patch provides a device drivers, which has a omap iommu, with
address mapping APIs between device virtual address(iommu), physical
address and MPU virtual address.

There are 4 possible patterns for iommu virtual address(iova/da) mapping.

    |iova/			  mapping		iommu_		page
    | da	pa	va	(d)-(p)-(v)		function	type
  ---------------------------------------------------------------------------
  1 | c		c	c	 1 - 1 - 1	  _kmap() / _kunmap()	s
  2 | c		c,a	c	 1 - 1 - 1	_kmalloc()/ _kfree()	s
  3 | c		d	c	 1 - n - 1	  _vmap() / _vunmap()	s
  4 | c		d,a	c	 1 - n - 1	_vmalloc()/ _vfree()	n*

    'iova':	device iommu virtual address
    'da':	alias of 'iova'
    'pa':	physical address
    'va':	mpu virtual address

    'c':	contiguous memory area
    'd':	dicontiguous memory area
    'a':	anonymous memory allocation
    '()':	optional feature

    'n':	a normal page(4KB) size is used.
    's':	multiple iommu superpage(16MB, 1MB, 64KB, 4KB) size is used.

    '*':	not yet, but feasible.

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index 58cf91f..742c2aa 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -30,6 +30,14 @@ struct map_desc {
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
+
+struct mem_type;
+extern const struct mem_type *get_mem_type(unsigned int type);
+/*
+ * external interface to remap single page with appropriate type
+ */
+extern int ioremap_page(unsigned long virt, unsigned long phys,
+			const struct mem_type *mtype);
 #else
 #define iotable_init(map,num)	do { } while (0)
 #endif
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 9f88dd3..0ab75c6 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -110,6 +110,12 @@ static int remap_area_pages(unsigned long start, unsigned long pfn,
 	return err;
 }
 
+int ioremap_page(unsigned long virt, unsigned long phys,
+		 const struct mem_type *mtype)
+{
+	return remap_area_pages(virt, __phys_to_pfn(phys), PAGE_SIZE, mtype);
+}
+EXPORT_SYMBOL(ioremap_page);
 
 void __check_kvm_seq(struct mm_struct *mm)
 {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e6344ec..70974d7 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -255,6 +255,7 @@ const struct mem_type *get_mem_type(unsigned int type)
 {
 	return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
 }
+EXPORT_SYMBOL(get_mem_type);
 
 /*
  * Adjust the PMD section entries according to the CPU in use.
diff --git a/arch/arm/plat-omap/include/mach/iovmm.h b/arch/arm/plat-omap/include/mach/iovmm.h
new file mode 100644
index 0000000..bdc7ce5
--- /dev/null
+++ b/arch/arm/plat-omap/include/mach/iovmm.h
@@ -0,0 +1,94 @@
+/*
+ * omap iommu: simple virtual address space management
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __IOMMU_MMAP_H
+#define __IOMMU_MMAP_H
+
+struct iovm_struct {
+	struct iommu		*iommu;	/* iommu object which this belongs to */
+	u32			da_start; /* area definition */
+	u32			da_end;
+	u32			flags; /* IOVMF_: see below */
+	struct list_head	list; /* linked in ascending order */
+	const struct sg_table	*sgt; /* keep 'page' <-> 'da' mapping */
+	void			*va; /* mpu side mapped address */
+};
+
+/*
+ * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma)
+ *
+ * lower 16 bit is used for h/w and upper 16 bit is for s/w.
+ */
+#define IOVMF_SW_SHIFT		16
+#define IOVMF_HW_SIZE		(1 << IOVMF_SW_SHIFT)
+#define IOVMF_HW_MASK		(IOVMF_HW_SIZE - 1)
+#define IOVMF_SW_MASK		(~IOVMF_HW_MASK)UL
+
+/*
+ * iovma: h/w flags derived from cam and ram attribute
+ */
+#define IOVMF_CAM_MASK		(~((1 << 10) - 1))
+#define IOVMF_RAM_MASK		(~IOVMF_CAM_MASK)
+
+#define IOVMF_PGSZ_MASK		(3 << 0)
+#define IOVMF_PGSZ_1M		MMU_CAM_PGSZ_1M
+#define IOVMF_PGSZ_64K		MMU_CAM_PGSZ_64K
+#define IOVMF_PGSZ_4K		MMU_CAM_PGSZ_4K
+#define IOVMF_PGSZ_16M		MMU_CAM_PGSZ_16M
+
+#define IOVMF_ENDIAN_MASK	(1 << 9)
+#define IOVMF_ENDIAN_BIG	MMU_RAM_ENDIAN_BIG
+#define IOVMF_ENDIAN_LITTLE	MMU_RAM_ENDIAN_LITTLE
+
+#define IOVMF_ELSZ_MASK		(3 << 7)
+#define IOVMF_ELSZ_8		MMU_RAM_ELSZ_8
+#define IOVMF_ELSZ_16		MMU_RAM_ELSZ_16
+#define IOVMF_ELSZ_32		MMU_RAM_ELSZ_32
+#define IOVMF_ELSZ_NONE		MMU_RAM_ELSZ_NONE
+
+#define IOVMF_MIXED_MASK	(1 << 6)
+#define IOVMF_MIXED		MMU_RAM_MIXED
+
+/*
+ * iovma: s/w flags, used for mapping and umapping internally.
+ */
+#define IOVMF_MMIO		(1 << IOVMF_SW_SHIFT)
+#define IOVMF_ALLOC		(2 << IOVMF_SW_SHIFT)
+#define IOVMF_ALLOC_MASK	(3 << IOVMF_SW_SHIFT)
+
+/* "superpages" is supported just with physically linear pages */
+#define IOVMF_DISCONT		(1 << (2 + IOVMF_SW_SHIFT))
+#define IOVMF_LINEAR		(2 << (2 + IOVMF_SW_SHIFT))
+#define IOVMF_LINEAR_MASK	(3 << (2 + IOVMF_SW_SHIFT))
+
+#define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
+#define IOVMF_DA_ANON		(2 << (4 + IOVMF_SW_SHIFT))
+#define IOVMF_DA_MASK		(3 << (4 + IOVMF_SW_SHIFT))
+
+
+extern struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da);
+extern u32 iommu_vmap(struct iommu *obj, u32 da,
+			const struct sg_table *sgt, u32 flags);
+extern struct sg_table *iommu_vunmap(struct iommu *obj, u32 da);
+extern u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes,
+			   u32 flags);
+extern void iommu_vfree(struct iommu *obj, const u32 da);
+extern u32 iommu_kmap(struct iommu *obj, u32 da, u32 pa, size_t bytes,
+			u32 flags);
+extern void iommu_kunmap(struct iommu *obj, u32 da);
+extern u32 iommu_kmalloc(struct iommu *obj, u32 da, size_t bytes,
+			   u32 flags);
+extern void iommu_kfree(struct iommu *obj, u32 da);
+
+extern void *da_to_va(struct iommu *obj, u32 da);
+
+#endif /* __IOMMU_MMAP_H */
diff --git a/arch/arm/plat-omap/iovmm.c b/arch/arm/plat-omap/iovmm.c
new file mode 100644
index 0000000..2fce2c1
--- /dev/null
+++ b/arch/arm/plat-omap/iovmm.c
@@ -0,0 +1,896 @@
+/*
+ * omap iommu: simple virtual address space management
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/scatterlist.h>
+
+#include <asm/cacheflush.h>
+#include <asm/mach/map.h>
+
+#include <mach/iommu.h>
+#include <mach/iovmm.h>
+
+#include "iopgtable.h"
+
+/*
+ * A device driver needs to create address mappings between:
+ *
+ * - iommu/device address
+ * - physical address
+ * - mpu virtual address
+ *
+ * There are 4 possible patterns for them:
+ *
+ *    |iova/			  mapping		iommu_		page
+ *    | da	pa	va	(d)-(p)-(v)		function	type
+ *  ---------------------------------------------------------------------------
+ *  1 | c	c	c	 1 - 1 - 1	  _kmap() / _kunmap()	s
+ *  2 | c	c,a	c	 1 - 1 - 1	_kmalloc()/ _kfree()	s
+ *  3 | c	d	c	 1 - n - 1	  _vmap() / _vunmap()	s
+ *  4 | c	d,a	c	 1 - n - 1	_vmalloc()/ _vfree()	n*
+ *
+ *
+ *	'iova':	device iommu virtual address
+ *	'da':	alias of 'iova'
+ *	'pa':	physical address
+ *	'va':	mpu virtual address
+ *
+ *	'c':	contiguous memory area
+ *	'd':	dicontiguous memory area
+ *	'a':	anonymous memory allocation
+ *	'()':	optional feature
+ *
+ *	'n':	a normal page(4KB) size is used.
+ *	's':	multiple iommu superpage(16MB, 1MB, 64KB, 4KB) size is used.
+ *
+ *	'*':	not yet, but feasible.
+ */
+
+static struct kmem_cache *iovm_area_cachep;
+
+/* return total bytes of sg buffers */
+static size_t sgtable_len(const struct sg_table *sgt)
+{
+	unsigned int i, total = 0;
+	struct scatterlist *sg;
+
+	if (!sgt)
+		return 0;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes;
+
+		bytes = sg_dma_len(sg);
+
+		if (!iopgsz_ok(bytes)) {
+			pr_err("%s: sg[%d] not iommu pagesize(%x)\n",
+			       __func__, i, bytes);
+			return 0;
+		}
+
+		total += bytes;
+	}
+
+	return total;
+}
+#define sgtable_ok(x)	(!!sgtable_len(x))
+
+/*
+ * calculate the optimal number sg elements from total bytes based on
+ * iommu superpages
+ */
+static unsigned int sgtable_nents(size_t bytes)
+{
+	int i;
+	unsigned int nr_entries;
+	const unsigned long pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, };
+
+	if (!IS_ALIGNED(bytes, PAGE_SIZE)) {
+		pr_err("%s: wrong size %08x\n", __func__, bytes);
+		return 0;
+	}
+
+	nr_entries = 0;
+	for (i = 0; i < ARRAY_SIZE(pagesize); i++) {
+		if (bytes >= pagesize[i]) {
+			nr_entries += (bytes / pagesize[i]);
+			bytes %= pagesize[i];
+		}
+	}
+	BUG_ON(bytes);
+
+	return nr_entries;
+}
+
+/* allocate and initialize sg_table header(a kind of 'superblock') */
+static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags)
+{
+	unsigned int nr_entries;
+	int err;
+	struct sg_table *sgt;
+
+	if (!bytes)
+		return ERR_PTR(-EINVAL);
+
+	if (!IS_ALIGNED(bytes, PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	/* FIXME: IOVMF_DA_FIXED should support 'superpages' */
+	if ((flags & IOVMF_LINEAR) && (flags & IOVMF_DA_ANON)) {
+		nr_entries = sgtable_nents(bytes);
+		if (!nr_entries)
+			return ERR_PTR(-EINVAL);
+	} else
+		nr_entries =  bytes / PAGE_SIZE;
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL);
+	if (err)
+		return ERR_PTR(err);
+
+	pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries);
+
+	return sgt;
+}
+
+/* free sg_table header(a kind of superblock) */
+static void sgtable_free(struct sg_table *sgt)
+{
+	if (!sgt)
+		return;
+
+	sg_free_table(sgt);
+	kfree(sgt);
+
+	pr_debug("%s: sgt:%p\n", __func__, sgt);
+}
+
+/* map 'sglist' to a contiguous mpu virtual area and return 'va' */
+static void *vmap_sg(const struct sg_table *sgt)
+{
+	u32 va;
+	size_t total;
+	unsigned int i;
+	struct scatterlist *sg;
+	struct vm_struct *new;
+	const struct mem_type *mtype;
+
+	mtype = get_mem_type(MT_DEVICE);
+	if (!mtype)
+		return ERR_PTR(-EINVAL);
+
+	total = sgtable_len(sgt);
+	if (!total)
+		return ERR_PTR(-EINVAL);
+
+	new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+	va = (u32)new->addr;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes;
+		u32 pa;
+		int err;
+
+		pa = sg_phys(sg);
+		bytes = sg_dma_len(sg);
+
+		BUG_ON(bytes != PAGE_SIZE);
+
+		err = ioremap_page(va,  pa, mtype);
+		if (err)
+			goto err_out;
+
+		va += bytes;
+	}
+
+	flush_cache_vmap(new->addr, total);
+	return new->addr;
+
+err_out:
+	WARN_ON(1); /* FIXME: cleanup some mpu mappings */
+	vunmap(new->addr);
+	return ERR_PTR(-EAGAIN);
+}
+
+static inline void vunmap_sg(const void *va)
+{
+	vunmap(va);
+}
+
+static struct iovm_struct *__find_iovm_area(struct iommu *obj, const u32 da)
+{
+	struct iovm_struct *tmp;
+
+	list_for_each_entry(tmp, &obj->mmap, list) {
+		if ((da >= tmp->da_start) && (da < tmp->da_end)) {
+			size_t len;
+
+			len = tmp->da_end - tmp->da_start;
+
+			dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n",
+				__func__, tmp->da_start, da, tmp->da_end, len,
+				tmp->flags);
+
+			return tmp;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * find_iovm_area  -  find iovma which includes @da
+ * @da:		iommu device virtual address
+ *
+ * Find the existing iovma starting at @da
+ */
+struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da)
+{
+	struct iovm_struct *area;
+
+	mutex_lock(&obj->mmap_lock);
+	area = __find_iovm_area(obj, da);
+	mutex_unlock(&obj->mmap_lock);
+
+	return area;
+}
+EXPORT_SYMBOL_GPL(find_iovm_area);
+
+/*
+ * This finds the hole(area) which fits the requested address and len
+ * in iovmas mmap, and returns the new allocated iovma.
+ */
+static struct iovm_struct *alloc_iovm_area(struct iommu *obj, u32 da,
+					   size_t bytes, u32 flags)
+{
+	struct iovm_struct *new, *tmp;
+	u32 start, prev_end, alignement;
+
+	if (!obj || !bytes)
+		return ERR_PTR(-EINVAL);
+
+	start = da;
+	alignement = PAGE_SIZE;
+
+	if (flags & IOVMF_DA_ANON) {
+		/*
+		 * Reserve the first page for NULL
+		 */
+		start = PAGE_SIZE;
+		if (flags & IOVMF_LINEAR)
+			alignement = iopgsz_max(bytes);
+		start = roundup(start, alignement);
+	}
+
+	tmp = NULL;
+	if (list_empty(&obj->mmap))
+		goto found;
+
+	prev_end = 0;
+	list_for_each_entry(tmp, &obj->mmap, list) {
+
+		if ((prev_end <= start) && (start + bytes < tmp->da_start))
+			goto found;
+
+		if (flags & IOVMF_DA_ANON)
+			start = roundup(tmp->da_end, alignement);
+
+		prev_end = tmp->da_end;
+	}
+
+	if ((start >= prev_end) && (ULONG_MAX - start >= bytes))
+		goto found;
+
+	dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n",
+		__func__, da, bytes, flags);
+
+	return ERR_PTR(-EINVAL);
+
+found:
+	new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new->iommu = obj;
+	new->da_start = start;
+	new->da_end = start + bytes;
+	new->flags = flags;
+
+	/*
+	 * keep ascending order of iovmas
+	 */
+	if (tmp)
+		list_add_tail(&new->list, &tmp->list);
+	else
+		list_add(&new->list, &obj->mmap);
+
+	dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n",
+		__func__, new->da_start, start, new->da_end, bytes, flags);
+
+	return new;
+}
+
+static void free_iovm_area(struct iommu *obj, struct iovm_struct *area)
+{
+	size_t bytes;
+
+	BUG_ON(!obj || !area);
+
+	bytes = area->da_end - area->da_start;
+
+	dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n",
+		__func__, area->da_start, area->da_end, bytes, area->flags);
+
+	list_del(&area->list);
+	kmem_cache_free(iovm_area_cachep, area);
+}
+
+/**
+ * da_to_va - convert (d) to (v)
+ * @obj:	objective iommu
+ * @da:		iommu device virtual address
+ * @va:		mpu virtual address
+ *
+ * Returns mpu virtual addr which corresponds to a given device virtual addr
+ */
+void *da_to_va(struct iommu *obj, u32 da)
+{
+	void *va = NULL;
+	struct iovm_struct *area;
+
+	mutex_lock(&obj->mmap_lock);
+
+	area = __find_iovm_area(obj, da);
+	if (!area) {
+		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
+		goto out;
+	}
+	va = area->va;
+	mutex_unlock(&obj->mmap_lock);
+out:
+	return va;
+}
+EXPORT_SYMBOL_GPL(da_to_va);
+
+static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va)
+{
+	unsigned int i;
+	struct scatterlist *sg;
+	void *va = _va;
+	void *va_end;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		struct page *pg;
+		const size_t bytes = PAGE_SIZE;
+
+		/*
+		 * iommu 'superpage' isn't supported with 'iommu_vmalloc()'
+		 */
+		pg = vmalloc_to_page(va);
+		BUG_ON(!pg);
+		sg_set_page(sg, pg, bytes, 0);
+
+		va += bytes;
+	}
+
+	va_end = _va + PAGE_SIZE * i;
+	flush_cache_vmap(_va, va_end);
+}
+
+static inline void sgtable_drain_vmalloc(struct sg_table *sgt)
+{
+	/*
+	 * Actually this is not necessary at all, just exists for
+	 * consistency of the code readibility.
+	 */
+	BUG_ON(!sgt);
+}
+
+static void sgtable_fill_kmalloc(struct sg_table *sgt, u32 pa, size_t len)
+{
+	unsigned int i;
+	struct scatterlist *sg;
+	void *va;
+
+	va = phys_to_virt(pa);
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes;
+
+		bytes = iopgsz_max(len);
+
+		BUG_ON(!iopgsz_ok(bytes));
+
+		sg_set_buf(sg, phys_to_virt(pa), bytes);
+		/*
+		 * 'pa' is cotinuous(linear).
+		 */
+		pa += bytes;
+		len -= bytes;
+	}
+	BUG_ON(len);
+
+	clean_dcache_area(va, len);
+}
+
+static inline void sgtable_drain_kmalloc(struct sg_table *sgt)
+{
+	/*
+	 * Actually this is not necessary at all, just exists for
+	 * consistency of the code readibility
+	 */
+	BUG_ON(!sgt);
+}
+
+/* create 'da' <-> 'pa' mapping from 'sgt' */
+static int map_iovm_area(struct iommu *obj, struct iovm_struct *new,
+			 const struct sg_table *sgt, u32 flags)
+{
+	int err;
+	unsigned int i, j;
+	struct scatterlist *sg;
+	u32 da = new->da_start;
+
+	if (!obj || !new || !sgt)
+		return -EINVAL;
+
+	BUG_ON(!sgtable_ok(sgt));
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa;
+		int pgsz;
+		size_t bytes;
+		struct iotlb_entry e;
+
+		pa = sg_phys(sg);
+		bytes = sg_dma_len(sg);
+
+		flags &= ~IOVMF_PGSZ_MASK;
+		pgsz = bytes_to_iopgsz(bytes);
+		if (pgsz < 0)
+			goto err_out;
+		flags |= pgsz;
+
+		pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
+			 i, da, pa, bytes);
+
+		iotlb_init_entry(&e, da, pa, flags);
+		err = iopgtable_store_entry(obj, &e);
+		if (err)
+			goto err_out;
+
+		da += bytes;
+	}
+	return 0;
+
+err_out:
+	da = new->da_start;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes;
+
+		bytes = iopgtable_clear_entry(obj, da);
+
+		BUG_ON(!iopgsz_ok(bytes));
+
+		da += bytes;
+	}
+	return err;
+}
+
+/* release 'da' <-> 'pa' mapping */
+static void unmap_iovm_area(struct iommu *obj, struct iovm_struct *area)
+{
+	u32 start;
+	size_t total = area->da_end - area->da_start;
+
+	BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
+
+	start = area->da_start;
+	while (total > 0) {
+		size_t bytes;
+
+		bytes = iopgtable_clear_entry(obj, start);
+		if (bytes == 0)
+			bytes = PAGE_SIZE;
+		else
+			dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
+				__func__, start, bytes, area->flags);
+
+		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
+
+		total -= bytes;
+		start += bytes;
+	}
+	BUG_ON(total);
+}
+
+/* template function for all unmapping */
+static struct sg_table *unmap_vm_area(struct iommu *obj, const u32 da,
+				      void (*fn)(const void *), u32 flags)
+{
+	struct sg_table *sgt = NULL;
+	struct iovm_struct *area;
+
+	if (!IS_ALIGNED(da, PAGE_SIZE)) {
+		dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da);
+		return NULL;
+	}
+
+	mutex_lock(&obj->mmap_lock);
+
+	area = __find_iovm_area(obj, da);
+	if (!area) {
+		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
+		goto out;
+	}
+
+	if ((area->flags & flags) != flags) {
+		dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__,
+			area->flags);
+		goto out;
+	}
+	sgt = (struct sg_table *)area->sgt;
+
+	unmap_iovm_area(obj, area);
+
+	fn(area->va);
+
+	dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__,
+		area->da_start, da, area->da_end,
+		area->da_end - area->da_start, area->flags);
+
+	free_iovm_area(obj, area);
+out:
+	mutex_unlock(&obj->mmap_lock);
+
+	return sgt;
+}
+
+static u32 map_iommu_region(struct iommu *obj, u32 da,
+	      const struct sg_table *sgt, void *va, size_t bytes, u32 flags)
+{
+	int err = -ENOMEM;
+	struct iovm_struct *new;
+
+	mutex_lock(&obj->mmap_lock);
+
+	new = alloc_iovm_area(obj, da, bytes, flags);
+	if (IS_ERR(new)) {
+		err = PTR_ERR(new);
+		goto err_alloc_iovma;
+	}
+	new->va = va;
+	new->sgt = sgt;
+
+	if (map_iovm_area(obj, new, sgt, new->flags))
+		goto err_map;
+
+	mutex_unlock(&obj->mmap_lock);
+
+	dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n",
+		__func__, new->da_start, bytes, new->flags, va);
+
+	return new->da_start;
+
+err_map:
+	free_iovm_area(obj, new);
+err_alloc_iovma:
+	mutex_unlock(&obj->mmap_lock);
+	return err;
+}
+
+static inline u32 __iommu_vmap(struct iommu *obj, u32 da,
+		 const struct sg_table *sgt, void *va, size_t bytes, u32 flags)
+{
+	return map_iommu_region(obj, da, sgt, va, bytes, flags);
+}
+
+/**
+ * iommu_vmap  -  (d)-(p)-(v) address mapper
+ * @obj:	objective iommu
+ * @sgt:	address of scatter gather table
+ * @flags:	iovma and page property
+ *
+ * Creates 1-n-1 mapping with given @sgt and returns @da.
+ * All @sgt element must be io page size aligned.
+ */
+u32 iommu_vmap(struct iommu *obj, u32 da, const struct sg_table *sgt,
+		 u32 flags)
+{
+	size_t bytes;
+	void *va;
+
+	if (!obj || !obj->dev || !sgt)
+		return -EINVAL;
+
+	bytes = sgtable_len(sgt);
+	if (!bytes)
+		return -EINVAL;
+	bytes = PAGE_ALIGN(bytes);
+
+	va = vmap_sg(sgt);
+	if (IS_ERR(va))
+		return PTR_ERR(va);
+
+	flags &= IOVMF_HW_MASK;
+	flags |= IOVMF_DISCONT;
+	flags |= IOVMF_MMIO;
+	flags |= (da ? IOVMF_DA_FIXED : IOVMF_DA_ANON);
+
+	da = __iommu_vmap(obj, da, sgt, va, bytes, flags);
+	if (IS_ERR_VALUE(da))
+		vunmap_sg(va);
+
+	return da;
+}
+EXPORT_SYMBOL_GPL(iommu_vmap);
+
+/**
+ * iommu_vunmap  -  release virtual mapping obtained by 'iommu_vmap()'
+ * @obj:	objective iommu
+ * @da:		iommu device virtual address
+ *
+ * Free the iommu virtually contiguous memory area starting at
+ * @da, which was returned by 'iommu_vmap()'.
+ */
+struct sg_table *iommu_vunmap(struct iommu *obj, u32 da)
+{
+	struct sg_table *sgt;
+	/*
+	 * 'sgt' is allocated before 'iommu_vmalloc()' is called.
+	 * Just returns 'sgt' to the caller to free
+	 */
+	sgt = unmap_vm_area(obj, da, vunmap_sg, IOVMF_DISCONT | IOVMF_MMIO);
+	if (!sgt)
+		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
+	return sgt;
+}
+EXPORT_SYMBOL_GPL(iommu_vunmap);
+
+/**
+ * iommu_vmalloc  -  (d)-(p)-(v) address allocator and mapper
+ * @obj:	objective iommu
+ * @da:		contiguous iommu virtual memory
+ * @bytes:	allocation size
+ * @flags:	iovma and page property
+ *
+ * Allocate @bytes linearly and creates 1-n-1 mapping and returns
+ * @da again, which might be adjusted if 'IOVMF_DA_ANON' is set.
+ */
+u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags)
+{
+	void *va;
+	struct sg_table *sgt;
+
+	if (!obj || !obj->dev || !bytes)
+		return -EINVAL;
+
+	bytes = PAGE_ALIGN(bytes);
+
+	va = vmalloc(bytes);
+	if (!va)
+		return -ENOMEM;
+
+	sgt = sgtable_alloc(bytes, flags);
+	if (IS_ERR(sgt)) {
+		da = PTR_ERR(sgt);
+		goto err_sgt_alloc;
+	}
+	sgtable_fill_vmalloc(sgt, va);
+
+	flags &= IOVMF_HW_MASK;
+	flags |= IOVMF_DISCONT;
+	flags |= IOVMF_ALLOC;
+	flags |= (da ? IOVMF_DA_FIXED : IOVMF_DA_ANON);
+
+	da = __iommu_vmap(obj, da, sgt, va, bytes, flags);
+	if (IS_ERR_VALUE(da))
+		goto err_iommu_vmap;
+
+	return da;
+
+err_iommu_vmap:
+	sgtable_drain_vmalloc(sgt);
+	sgtable_free(sgt);
+err_sgt_alloc:
+	vfree(va);
+	return da;
+}
+EXPORT_SYMBOL_GPL(iommu_vmalloc);
+
+/**
+ * iommu_vfree  -  release memory allocated by 'iommu_vmalloc()'
+ * @obj:	objective iommu
+ * @da:		iommu device virtual address
+ *
+ * Frees the iommu virtually continuous memory area starting at
+ * @da, as obtained from 'iommu_vmalloc()'.
+ */
+void iommu_vfree(struct iommu *obj, const u32 da)
+{
+	struct sg_table *sgt;
+
+	sgt = unmap_vm_area(obj, da, vfree, IOVMF_DISCONT | IOVMF_ALLOC);
+	if (!sgt)
+		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
+	sgtable_free(sgt);
+}
+EXPORT_SYMBOL_GPL(iommu_vfree);
+
+static u32 __iommu_kmap(struct iommu *obj, u32 da, u32 pa, void *va,
+			  size_t bytes, u32 flags)
+{
+	struct sg_table *sgt;
+
+	sgt = sgtable_alloc(bytes, flags);
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	sgtable_fill_kmalloc(sgt, pa, bytes);
+
+	da = map_iommu_region(obj, da, sgt, va, bytes, flags);
+	if (IS_ERR_VALUE(da)) {
+		sgtable_drain_kmalloc(sgt);
+		sgtable_free(sgt);
+	}
+
+	return da;
+}
+
+/**
+ * iommu_kmap  -  (d)-(p)-(v) address mapper
+ * @obj:	objective iommu
+ * @da:		contiguous iommu virtual memory
+ * @pa:		contiguous physical memory
+ * @flags:	iovma and page property
+ *
+ * Creates 1-1-1 mapping and returns @da again, which can be
+ * adjusted if 'IOVMF_DA_ANON' is set.
+ */
+u32 iommu_kmap(struct iommu *obj, u32 da, u32 pa, size_t bytes,
+		 u32 flags)
+{
+	void *va;
+
+	if (!obj || !obj->dev || !bytes)
+		return -EINVAL;
+
+	bytes = PAGE_ALIGN(bytes);
+
+	va = ioremap(pa, bytes);
+	if (!va)
+		return -ENOMEM;
+
+	flags &= IOVMF_HW_MASK;
+	flags |= IOVMF_LINEAR;
+	flags |= IOVMF_MMIO;
+	flags |= (da ? IOVMF_DA_FIXED : IOVMF_DA_ANON);
+
+	da = __iommu_kmap(obj, da, pa, va, bytes, flags);
+	if (IS_ERR_VALUE(da))
+		iounmap(va);
+
+	return da;
+}
+EXPORT_SYMBOL_GPL(iommu_kmap);
+
+/**
+ * iommu_kunmap  -  release virtual mapping obtained by 'iommu_kmap()'
+ * @obj:	objective iommu
+ * @da:		iommu device virtual address
+ *
+ * Frees the iommu virtually contiguous memory area starting at
+ * @da, which was passed to and was returned by'iommu_kmap()'.
+ */
+void iommu_kunmap(struct iommu *obj, u32 da)
+{
+	struct sg_table *sgt;
+	typedef void (*func_t)(const void *);
+
+	sgt = unmap_vm_area(obj, da, (func_t)__iounmap,
+			    IOVMF_LINEAR | IOVMF_MMIO);
+	if (!sgt)
+		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
+	sgtable_free(sgt);
+}
+EXPORT_SYMBOL_GPL(iommu_kunmap);
+
+/**
+ * iommu_kmalloc  -  (d)-(p)-(v) address allocator and mapper
+ * @obj:	objective iommu
+ * @da:		contiguous iommu virtual memory
+ * @bytes:	bytes for allocation
+ * @flags:	iovma and page property
+ *
+ * Allocate @bytes linearly and creates 1-1-1 mapping and returns
+ * @da again, which might be adjusted if 'IOVMF_DA_ANON' is set.
+ */
+u32 iommu_kmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags)
+{
+	void *va;
+	u32 pa;
+
+	if (!obj || !obj->dev || !bytes)
+		return -EINVAL;
+
+	bytes = PAGE_ALIGN(bytes);
+
+	va = kmalloc(bytes, GFP_KERNEL | GFP_DMA);
+	if (!va)
+		return -ENOMEM;
+	pa = virt_to_phys(va);
+
+	flags &= IOVMF_HW_MASK;
+	flags |= IOVMF_LINEAR;
+	flags |= IOVMF_ALLOC;
+	flags |= (da ? IOVMF_DA_FIXED : IOVMF_DA_ANON);
+
+	da = __iommu_kmap(obj, da, pa, va, bytes, flags);
+	if (IS_ERR_VALUE(da))
+		kfree(va);
+
+	return da;
+}
+EXPORT_SYMBOL_GPL(iommu_kmalloc);
+
+/**
+ * iommu_kfree  -  release virtual mapping obtained by 'iommu_kmalloc()'
+ * @obj:	objective iommu
+ * @da:		iommu device virtual address
+ *
+ * Frees the iommu virtually contiguous memory area starting at
+ * @da, which was passed to and was returned by'iommu_kmalloc()'.
+ */
+void iommu_kfree(struct iommu *obj, u32 da)
+{
+	struct sg_table *sgt;
+
+	sgt = unmap_vm_area(obj, da, kfree, IOVMF_LINEAR | IOVMF_ALLOC);
+	if (!sgt)
+		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
+	sgtable_free(sgt);
+}
+EXPORT_SYMBOL_GPL(iommu_kfree);
+
+
+static int __init iovmm_init(void)
+{
+	const unsigned long flags = SLAB_HWCACHE_ALIGN;
+	struct kmem_cache *p;
+
+	p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0,
+			      flags, NULL);
+	if (!p)
+		return -ENOMEM;
+	iovm_area_cachep = p;
+
+	return 0;
+}
+module_init(iovmm_init);
+
+static void __exit iovmm_exit(void)
+{
+	kmem_cache_destroy(iovm_area_cachep);
+}
+module_exit(iovmm_exit);
+
+MODULE_DESCRIPTION("omap iommu: simple virtual address space management");
+MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
+MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From 28afea5b2ffaa48f4f43d22ae8edcc384c05df80 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 19 May 2009 08:25:48 +0200
Subject: xen/blkfront: allow xenbus state transition to Closing->Closed  when
 not Connected

This situation can occur when attempting to attach a block device whose
backend is an empty physical CD-ROM driver. The backend in this case
will go directly from the Initialising state to Closing->Closed.
Previously this would result in a NULL pointer deref on info->gd
(xenbus_dev_fatal does not return as a1a15ac5 seems to expect)

Cc: stable@kernel.org
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8f90508..6971a12 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -977,8 +977,10 @@ static void backend_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosing:
-		if (info->gd == NULL)
-			xenbus_dev_fatal(dev, -ENODEV, "gd is NULL");
+		if (info->gd == NULL) {
+			xenbus_frontend_closed(dev);
+			break;
+		}
 		bd = bdget_disk(info->gd, 0);
 		if (bd == NULL)
 			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
-- 
cgit v0.10.2


From 31a14400e8f365e5d4d1a0b2a1ef4d5405d6091e Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 19 May 2009 08:27:42 +0200
Subject: xen/blkfront: fix warning when deleting gendisk on unplug/shutdown

Currently blkfront gives a warning when hot unplugging due to calling
del_gendisk() with interrupts disabled (due to blkif_io_lock).

WARNING: at kernel/softirq.c:124 local_bh_enable+0x36/0x84()
Modules linked in: xenfs xen_netfront ext3 jbd mbcache xen_blkfront
Pid: 13, comm: xenwatch Not tainted 2.6.29-xs5.5.0.13 #3
Call Trace:
 [<c012611c>] warn_slowpath+0x80/0xb6
 [<c0104cf1>] xen_sched_clock+0x16/0x63
 [<c0104710>] xen_force_evtchn_callback+0xc/0x10
 [<c0104e32>] check_events+0x8/0xe
 [<c0104d9b>] xen_restore_fl_direct_end+0x0/0x1
 [<c0103749>] xen_mc_flush+0x10a/0x13f
 [<c0105bd2>] __switch_to+0x114/0x14e
 [<c011d92b>] dequeue_task+0x62/0x70
 [<c0123b6f>] finish_task_switch+0x2b/0x84
 [<c0299877>] schedule+0x66d/0x6e7
 [<c0104710>] xen_force_evtchn_callback+0xc/0x10
 [<c0104710>] xen_force_evtchn_callback+0xc/0x10
 [<c012a642>] local_bh_enable+0x36/0x84
 [<c022f9a7>] sk_filter+0x57/0x5c
 [<c0233dae>] netlink_broadcast+0x1d5/0x315
 [<c01c6371>] kobject_uevent_env+0x28d/0x331
 [<c01e7ead>] device_del+0x10f/0x120
 [<c01e7ec6>] device_unregister+0x8/0x10
 [<c015f86d>] bdi_unregister+0x2d/0x39
 [<c01bf6f4>] unlink_gendisk+0x23/0x3e
 [<c01ac946>] del_gendisk+0x7b/0xe7
 [<d0828c19>] blkfront_closing+0x28/0x6e [xen_blkfront]
 [<d082900c>] backend_changed+0x3ad/0x41d [xen_blkfront]

We can fix this by calling del_gendisk() later in blkfront_closing, after
releasing blkif_io_lock. Since the queue is stopped during the interrupts
disabled phase I don't think there is any danger of an event occuring between
releasing the blkif_io_lock and deleting the disk.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 6971a12..a6cbf7b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -934,8 +934,6 @@ static void blkfront_closing(struct xenbus_device *dev)
 
 	spin_lock_irqsave(&blkif_io_lock, flags);
 
-	del_gendisk(info->gd);
-
 	/* No more blkif_request(). */
 	blk_stop_queue(info->rq);
 
@@ -949,6 +947,8 @@ static void blkfront_closing(struct xenbus_device *dev)
 	blk_cleanup_queue(info->rq);
 	info->rq = NULL;
 
+	del_gendisk(info->gd);
+
  out:
 	xenbus_frontend_closed(dev);
 }
-- 
cgit v0.10.2


From 4200efd9acda4accf24640f1e77d24fdcdb524df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 May 2009 09:22:19 +0200
Subject: sched: properly define the sched_group::cpumask and
 sched_domain::span fields

Properly document the variable-size structure tricks we are doing
wrt. struct sched_group and sched_domain, and use the field[0] GCC
extension instead of defining a vla array.

Dont use unions for this, as pointed out by Linus.

[ Impact: cleanup, un-confuse Sparse and LLVM ]

Reported-by: Jeff Garzik <jeff@garzik.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.01.0905180850110.3301@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index de7b3b2..dbb1043 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -839,7 +839,17 @@ struct sched_group {
 	 */
 	u32 reciprocal_cpu_power;
 
-	unsigned long cpumask[];
+	/*
+	 * The CPUs this group covers.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_group' in kernel/sched.c)
+	 */
+	unsigned long cpumask[0];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
@@ -925,8 +935,17 @@ struct sched_domain {
 	char *name;
 #endif
 
-	/* span of all CPUs in this domain */
-	unsigned long span[];
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
+	 */
+	unsigned long span[0];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
diff --git a/kernel/sched.c b/kernel/sched.c
index 497c09b..228acae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7948,8 +7948,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
 /*
  * The cpus mask in sched_group and sched_domain hangs off the end.
- * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
- * for nr_cpu_ids < CONFIG_NR_CPUS.
+ *
+ * ( See the the comments in include/linux/sched.h:struct sched_group
+ *   and struct sched_domain. )
  */
 struct static_sched_group {
 	struct sched_group sg;
-- 
cgit v0.10.2


From 143c145e3a475065a4be661468d0df1bd0b25f74 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 19 May 2009 14:43:15 +0800
Subject: tracing/events: Documentation updates

- fix some typos
- document the difference between '>' and '>>'
- document the 'enable' toggle
- remove section "Defining an event-enabled tracepoint", since it's
  out-dated and sample/trace_events/ already serves this purpose.

v2: add "Updated by Li Zefan"

[ Impact: make documentation up-to-date ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
LKML-Reference: <4A125503.5060406@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index abdee66..f157d75 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -1,9 +1,10 @@
 			     Event Tracing
 
 		Documentation written by Theodore Ts'o
+			Updated by Li Zefan
 
-Introduction
-============
+1. Introduction
+===============
 
 Tracepoints (see Documentation/trace/tracepoints.txt) can be used
 without creating custom kernel modules to register probe functions
@@ -12,30 +13,37 @@ using the event tracing infrastructure.
 Not all tracepoints can be traced using the event tracing system;
 the kernel developer must provide code snippets which define how the
 tracing information is saved into the tracing buffer, and how the
-the tracing information should be printed.
+tracing information should be printed.
 
-Using Event Tracing
-===================
+2. Using Event Tracing
+======================
+
+2.1 Via the 'set_event' interface
+---------------------------------
 
 The events which are available for tracing can be found in the file
-/sys/kernel/debug/tracing/available_events.
+/debug/tracing/available_events.
 
 To enable a particular event, such as 'sched_wakeup', simply echo it
-to /sys/debug/tracing/set_event. For example:
+to /debug/tracing/set_event. For example:
 
-	# echo sched_wakeup > /sys/kernel/debug/tracing/set_event
+	# echo sched_wakeup >> /debug/tracing/set_event
 
-[ Note: events can also be enabled/disabled via the 'enabled' toggle
-  found in the /sys/kernel/tracing/events/ hierarchy of directories. ]
+[ Note: '>>' is necessary, otherwise it will firstly disable
+  all the events. ]
 
 To disable an event, echo the event name to the set_event file prefixed
 with an exclamation point:
 
-	# echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event
+	# echo '!sched_wakeup' >> /debug/tracing/set_event
+
+To disable all events, echo an empty line to the set_event file:
+
+	# echo > /debug/tracing/set_event
 
-To disable events, echo an empty line to the set_event file:
+To enable all events, echo '*:*' or '*:' to the set_event file:
 
-	# echo > /sys/kernel/debug/tracing/set_event
+	# echo *:* > /debug/tracing/set_event
 
 The events are organized into subsystems, such as ext4, irq, sched,
 etc., and a full event name looks like this: <subsystem>:<event>.  The
@@ -44,92 +52,39 @@ file.  All of the events in a subsystem can be specified via the syntax
 "<subsystem>:*"; for example, to enable all irq events, you can use the
 command:
 
-	# echo 'irq:*' > /sys/kernel/debug/tracing/set_event
-
-Defining an event-enabled tracepoint
-------------------------------------
-
-A kernel developer which wishes to define an event-enabled tracepoint
-must declare the tracepoint using TRACE_EVENT instead of DECLARE_TRACE.
-This is done via two header files in include/trace.  For example, to
-event-enable the jbd2 subsystem, we must create two files,
-include/trace/jbd2.h and include/trace/jbd2_event_types.h.  The
-include/trace/jbd2.h file should be included by kernel source files that
-will have a tracepoint inserted, and might look like this:
-
-#ifndef _TRACE_JBD2_H
-#define _TRACE_JBD2_H
-
-#include <linux/jbd2.h>
-#include <linux/tracepoint.h>
-
-#include <trace/jbd2_event_types.h>
-
-#endif
-
-In a file that utilizes a jbd2 tracepoint, this header file would be
-included.  Note that you still have to use DEFINE_TRACE().  So for
-example, if fs/jbd2/commit.c planned to use the jbd2_start_commit
-tracepoint, it would have the following near the beginning of the file:
-
-#include <trace/jbd2.h>
-
-DEFINE_TRACE(jbd2_start_commit);
-
-Then in the function that would call the tracepoint, it would call the
-tracepoint function.  (For more information, please see the tracepoint
-documentation in Documentation/trace/tracepoints.txt):
-
-	trace_jbd2_start_commit(journal, commit_transaction);
-
-The code snippets which allow jbd2_start_commit to be an event-enabled
-tracepoint are placed in the file include/trace/jbd2_event_types.h:
-
-/* use <trace/jbd2.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM jbd2
-
-#include <linux/jbd2.h>
-
-TRACE_EVENT(jbd2_start_commit,
-	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
-	TP_ARGS(journal, commit_transaction),
-	TP_STRUCT__entry(
-		__array(	char,	devname, BDEVNAME_SIZE+24 )
-		__field(	int,	transaction		  )
-	),
-	TP_fast_assign(
-		memcpy(__entry->devname, journal->j_devname, BDEVNAME_SIZE+24);
-		__entry->transaction	= commit_transaction->t_tid;
-	),
-	TP_printk("dev %s transaction %d",
-		  __entry->devname, __entry->transaction)
-);
-
-The TP_PROTO and TP_ARGS are unchanged from DECLARE_TRACE.  The new
-arguments to TRACE_EVENT are TP_STRUCT__entry, TP_fast_assign, and
-TP_printk.
-
-TP_STRUCT__entry defines the data structure which will be stored in the
-trace buffer.  Normally, fields in __entry will be arrays or simple
-types.  It is possible to place data structures in __entry --- however,
-pointers in the data structure can not be trusted, since they will be
-accessed sometime later by TP_printk, and if the data structure contains
-fields that will not or cannot be used by TP_printk, this will waste
-space in the trace buffer.  In general, data structures should be
-avoided, unless they do only contain non-pointer types and all of the
-fields will be used by TP_printk.
-
-TP_fast_assign defines the code snippet which saves information into the
-__entry data structure, using the passed-in arguments defined in
-TP_PROTO and TP_ARGS.
-
-Finally, TP_printk will print the __entry data structure.  At the time
-when the code snippet defined by TP_printk is executed, it will not have
-access to the TP_ARGS arguments; it can only use the information saved
-in the __entry data structure.
+	# echo 'irq:*' > /debug/tracing/set_event
+
+2.2 Via the 'enable' toggle
+---------------------------
+
+The events available are also listed in /debug/tracing/events/ hierarchy
+of directories.
+
+To enable event 'sched_wakeup':
+
+	# echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To disable it:
+
+	# echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To enable all events in sched subsystem:
+
+	# echo 1 > /debug/tracing/events/sched/enable
+
+To eanble all events:
+
+	# echo 1 > /debug/tracing/events/enable
+
+When reading one of these enable files, there are four results:
+
+ 0 - all events this file affects are disabled
+ 1 - all events this file affects are enabled
+ X - there is a mixture of events enabled and disabled
+ ? - this file does not affect any event
+
+3. Defining an event-enabled tracepoint
+=======================================
+
+See The example provided in samples/trace_events
+
-- 
cgit v0.10.2


From fd51d251e4cdb21f68e9dbc4336514d64a105a79 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.vnet.ibm.com>
Date: Tue, 19 May 2009 09:59:08 +0200
Subject: blktrace: remove debugfs entries on bad path

debugfs directory entries for devices are not removed on some
of the failure pathes in do_blk_trace_setup().
One way to reproduce is to start blktrace on multiple devices
with insufficient Vmalloc space: Devices will fail with
a message like this:

	BLKTRACESETUP(2) /dev/sdu failed: 5/Input/output error

If so, the respective entries in debugfs
(e.g. /sys/kernel/debug/block/sdu) will remain and subsequent
attempts to start blktrace on the respective devices will not
succeed due to existing directories.

[ Impact: fix /debug/tracing file cleanup corner case ]

Signed-off-by: Stefan Raspl <stefan.raspl@linux.vnet.ibm.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: schwidefsky@de.ibm.com
Cc: heiko.carstens@de.ibm.com
LKML-Reference: <4A1266CC.5040801@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 05b4747..e3abf55 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -262,6 +262,7 @@ static void blk_trace_free(struct blk_trace *bt)
 {
 	debugfs_remove(bt->msg_file);
 	debugfs_remove(bt->dropped_file);
+	debugfs_remove(bt->dir);
 	relay_close(bt->rchan);
 	free_percpu(bt->sequence);
 	free_percpu(bt->msg_data);
-- 
cgit v0.10.2


From 181da78cd048ce866b05a2e0208ea09d2f80e721 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Tue, 19 May 2009 10:51:03 +0300
Subject: ASoC: TWL4030: Fix Analog capture path for AUXR

AUXR is selected by bit 2 and not by bit 1 in the ANAMICR register.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index e4d683d..abf6914 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -432,7 +432,7 @@ static const struct snd_kcontrol_new twl4030_dapm_analoglmic_controls[] = {
 /* Right analog microphone selection */
 static const struct snd_kcontrol_new twl4030_dapm_analogrmic_controls[] = {
 	SOC_DAPM_SINGLE("Sub mic", TWL4030_REG_ANAMICR, 0, 1, 0),
-	SOC_DAPM_SINGLE("AUXR", TWL4030_REG_ANAMICR, 1, 1, 0),
+	SOC_DAPM_SINGLE("AUXR", TWL4030_REG_ANAMICR, 2, 1, 0),
 };
 
 /* TX1 L/R Analog/Digital microphone selection */
-- 
cgit v0.10.2


From fe64d517df0970a68417184a12fcd4ba0589cc28 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 19 May 2009 10:01:18 +0100
Subject: GFS2: Umount recovery race fix

This patch fixes a race condition where we can receive recovery
requests part way through processing a umount. This was causing
problems since the recovery thread had already gone away.

Looking in more detail at the recovery code, it was really trying
to implement a slight variation on a work queue, and that happens to
align nicely with the recently introduced slow-work subsystem. As a
result I've updated the code to use slow-work, rather than its own home
grown variety of work queue.

When using the wait_on_bit() function, I noticed that the wait function
that was supplied as an argument was appearing in the WCHAN field, so
I've updated the function names in order to produce more meaningful
output.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 3a981b7..cad957c 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -7,6 +7,7 @@ config GFS2_FS
 	select IP_SCTP if DLM_SCTP
 	select FS_POSIX_ACL
 	select CRC32
+	select SLOW_WORK
 	help
 	  A cluster filesystem.
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ff49810..2bf62bc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -796,22 +796,37 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 	gh->gh_ip = 0;
 }
 
-static int just_schedule(void *word)
+/**
+ * gfs2_glock_holder_wait
+ * @word: unused
+ *
+ * This function and gfs2_glock_demote_wait both show up in the WCHAN
+ * field. Thus I've separated these otherwise identical functions in
+ * order to be more informative to the user.
+ */
+
+static int gfs2_glock_holder_wait(void *word)
 {
         schedule();
         return 0;
 }
 
+static int gfs2_glock_demote_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 static void wait_on_holder(struct gfs2_holder *gh)
 {
 	might_sleep();
-	wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
 }
 
 static void wait_on_demote(struct gfs2_glock *gl)
 {
 	might_sleep();
-	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
 }
 
 /**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 65f438e..0060e95 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,6 +12,7 @@
 
 #include <linux/fs.h>
 #include <linux/workqueue.h>
+#include <linux/slow-work.h>
 #include <linux/dlm.h>
 #include <linux/buffer_head.h>
 
@@ -376,11 +377,11 @@ struct gfs2_journal_extent {
 struct gfs2_jdesc {
 	struct list_head jd_list;
 	struct list_head extent_list;
-
+	struct slow_work jd_work;
 	struct inode *jd_inode;
+	unsigned long jd_flags;
+#define JDF_RECOVERY 1
 	unsigned int jd_jid;
-	int jd_dirty;
-
 	unsigned int jd_blocks;
 };
 
@@ -390,9 +391,6 @@ struct gfs2_statfs_change_host {
 	s64 sc_dinodes;
 };
 
-#define GFS2_GLOCKD_DEFAULT	1
-#define GFS2_GLOCKD_MAX		16
-
 #define GFS2_QUOTA_DEFAULT	GFS2_QUOTA_OFF
 #define GFS2_QUOTA_OFF		0
 #define GFS2_QUOTA_ACCOUNT	1
@@ -427,7 +425,6 @@ struct gfs2_tune {
 	unsigned int gt_incore_log_blocks;
 	unsigned int gt_log_flush_secs;
 
-	unsigned int gt_recoverd_secs;
 	unsigned int gt_logd_secs;
 
 	unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
@@ -448,6 +445,7 @@ enum {
 	SDF_JOURNAL_LIVE	= 1,
 	SDF_SHUTDOWN		= 2,
 	SDF_NOBARRIERS		= 3,
+	SDF_NORECOVERY		= 4,
 };
 
 #define GFS2_FSNAME_LEN		256
@@ -494,7 +492,6 @@ struct lm_lockstruct {
 	unsigned long ls_flags;
 	dlm_lockspace_t *ls_dlm;
 
-	int ls_recover_jid;
 	int ls_recover_jid_done;
 	int ls_recover_jid_status;
 };
@@ -583,7 +580,6 @@ struct gfs2_sbd {
 
 	/* Daemon stuff */
 
-	struct task_struct *sd_recoverd_process;
 	struct task_struct *sd_logd_process;
 	struct task_struct *sd_quotad_process;
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a6892ed..eacd78a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/gfs2_ondisk.h>
 #include <asm/atomic.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -113,12 +114,18 @@ static int __init init_gfs2_fs(void)
 	if (error)
 		goto fail_unregister;
 
+	error = slow_work_register_user();
+	if (error)
+		goto fail_slow;
+
 	gfs2_register_debugfs();
 
 	printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
 
+fail_slow:
+	unregister_filesystem(&gfs2meta_fs_type);
 fail_unregister:
 	unregister_filesystem(&gfs2_fs_type);
 fail:
@@ -156,6 +163,7 @@ static void __exit exit_gfs2_fs(void)
 	gfs2_unregister_debugfs();
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
+	slow_work_unregister_user();
 
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7981fbc..2cd1164 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,6 +17,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -55,7 +56,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 	spin_lock_init(&gt->gt_spin);
 
 	gt->gt_incore_log_blocks = 1024;
-	gt->gt_recoverd_secs = 60;
 	gt->gt_logd_secs = 1;
 	gt->gt_quota_simul_sync = 64;
 	gt->gt_quota_warn_period = 10;
@@ -675,6 +675,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 			break;
 
 		INIT_LIST_HEAD(&jd->extent_list);
+		slow_work_init(&jd->jd_work, &gfs2_recover_ops);
 		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
 		if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
 			if (!jd->jd_inode)
@@ -700,14 +701,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct inode *master = sdp->sd_master_dir->d_inode;
 	struct gfs2_holder ji_gh;
-	struct task_struct *p;
 	struct gfs2_inode *ip;
 	int jindex = 1;
 	int error = 0;
 
 	if (undo) {
 		jindex = 0;
-		goto fail_recoverd;
+		goto fail_jinode_gh;
 	}
 
 	sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
@@ -800,18 +800,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 	gfs2_glock_dq_uninit(&ji_gh);
 	jindex = 0;
 
-	p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
-	error = IS_ERR(p);
-	if (error) {
-		fs_err(sdp, "can't start recoverd thread: %d\n", error);
-		goto fail_jinode_gh;
-	}
-	sdp->sd_recoverd_process = p;
-
 	return 0;
 
-fail_recoverd:
-	kthread_stop(sdp->sd_recoverd_process);
 fail_jinode_gh:
 	if (!sdp->sd_args.ar_spectator)
 		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
@@ -1172,8 +1162,10 @@ static int fill_super(struct super_block *sb, void *data, int silent)
 		goto fail;
 	}
 
-	if (sdp->sd_args.ar_spectator)
+	if (sdp->sd_args.ar_spectator) {
                 sb->s_flags |= MS_RDONLY;
+		set_bit(SDF_NORECOVERY, &sdp->sd_flags);
+	}
 	if (sdp->sd_args.ar_posix_acl)
 		sb->s_flags |= MS_POSIXACL;
 
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 0677a83..a3c2272 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -121,6 +121,12 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	return error;
 }
 
+static int gfs2_umount_recovery_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 /**
  * gfs2_put_super - Unmount the filesystem
  * @sb: The VFS superblock
@@ -131,6 +137,7 @@ static void gfs2_put_super(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	int error;
+	struct gfs2_jdesc *jd;
 
 	/*  Unfreeze the filesystem, if we need to  */
 
@@ -139,9 +146,25 @@ static void gfs2_put_super(struct super_block *sb)
 		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
 	mutex_unlock(&sdp->sd_freeze_lock);
 
+	/* No more recovery requests */
+	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
+	smp_mb();
+
+	/* Wait on outstanding recovery */
+restart:
+	spin_lock(&sdp->sd_jindex_spin);
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
+			continue;
+		spin_unlock(&sdp->sd_jindex_spin);
+		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
+			    gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+		goto restart;
+	}
+	spin_unlock(&sdp->sd_jindex_spin);
+
 	kthread_stop(sdp->sd_quotad_process);
 	kthread_stop(sdp->sd_logd_process);
-	kthread_stop(sdp->sd_recoverd_process);
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		error = gfs2_make_fs_ro(sdp);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 247e8f7..59d2695 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,8 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 }
 
-/**
- * gfs2_recover_journal - recover a given journal
- * @jd: the struct gfs2_jdesc describing the journal
- *
- * Acquire the journal's lock, check to see if the journal is clean, and
- * do recovery if necessary.
- *
- * Returns: errno
- */
+static int gfs2_recover_get_ref(struct slow_work *work)
+{
+	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+	if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
+		return -EBUSY;
+	return 0;
+}
 
-int gfs2_recover_journal(struct gfs2_jdesc *jd)
+static void gfs2_recover_put_ref(struct slow_work *work)
+{
+	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+	clear_bit(JDF_RECOVERY, &jd->jd_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
+}
+
+static void gfs2_recover_work(struct slow_work *work)
 {
+	struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct gfs2_log_header_host head;
@@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
 		gfs2_glock_dq_uninit(&j_gh);
 
 	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
-	return 0;
+	return;
 
 fail_gunlock_tr:
 	gfs2_glock_dq_uninit(&t_gh);
@@ -584,70 +590,28 @@ fail_gunlock_j:
 
 fail:
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
-	return error;
 }
 
-static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
-{
-	struct gfs2_jdesc *jd;
-	int found = 0;
-
-	spin_lock(&sdp->sd_jindex_spin);
+struct slow_work_ops gfs2_recover_ops = {
+	.get_ref = gfs2_recover_get_ref,
+	.put_ref = gfs2_recover_put_ref,
+	.execute = gfs2_recover_work,
+};
 
-	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
-		if (jd->jd_dirty) {
-			jd->jd_dirty = 0;
-			found = 1;
-			break;
-		}
-	}
-	spin_unlock(&sdp->sd_jindex_spin);
-
-	if (!found)
-		jd = NULL;
 
-	return jd;
-}
-
-/**
- * gfs2_check_journals - Recover any dirty journals
- * @sdp: the filesystem
- *
- */
-
-static void gfs2_check_journals(struct gfs2_sbd *sdp)
+static int gfs2_recovery_wait(void *word)
 {
-	struct gfs2_jdesc *jd;
-
-	for (;;) {
-		jd = gfs2_jdesc_find_dirty(sdp);
-		if (!jd)
-			break;
-
-		if (jd != sdp->sd_jdesc)
-			gfs2_recover_journal(jd);
-	}
+	schedule();
+	return 0;
 }
 
-/**
- * gfs2_recoverd - Recover dead machine's journals
- * @sdp: Pointer to GFS2 superblock
- *
- */
-
-int gfs2_recoverd(void *data)
+int gfs2_recover_journal(struct gfs2_jdesc *jd)
 {
-	struct gfs2_sbd *sdp = data;
-	unsigned long t;
-
-	while (!kthread_should_stop()) {
-		gfs2_check_journals(sdp);
-		t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
-		schedule_timeout_interruptible(t);
-	}
-
+	int rv;
+	rv = slow_work_enqueue(&jd->jd_work);
+	if (rv)
+		return rv;
+	wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
 	return 0;
 }
 
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index a8218ea..1616ac2 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -28,7 +28,7 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp);
 extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 		    struct gfs2_log_header_host *head);
 extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
-extern int gfs2_recoverd(void *data);
+extern struct slow_work_ops gfs2_recover_ops;
 
 #endif /* __RECOVERY_DOT_H__ */
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 894bf77..9f6d48b 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -356,34 +356,33 @@ static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
 	return sprintf(buf, "%d\n", ls->ls_first_done);
 }
 
-static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf)
-{
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	return sprintf(buf, "%d\n", ls->ls_recover_jid);
-}
-
-static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 {
+	unsigned jid;
 	struct gfs2_jdesc *jd;
+	int rv;
+
+	rv = sscanf(buf, "%u", &jid);
+	if (rv != 1)
+		return -EINVAL;
 
+	rv = -ESHUTDOWN;
 	spin_lock(&sdp->sd_jindex_spin);
+	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
+		goto out;
+	rv = -EBUSY;
+	if (sdp->sd_jdesc->jd_jid == jid)
+		goto out;
+	rv = -ENOENT;
 	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 		if (jd->jd_jid != jid)
 			continue;
-		jd->jd_dirty = 1;
+		rv = slow_work_enqueue(&jd->jd_work);
 		break;
 	}
+out:
 	spin_unlock(&sdp->sd_jindex_spin);
-}
-
-static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
-{
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	ls->ls_recover_jid = simple_strtol(buf, NULL, 0);
-	gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid);
-	if (sdp->sd_recoverd_process)
-		wake_up_process(sdp->sd_recoverd_process);
-	return len;
+	return rv ? rv : len;
 }
 
 static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
@@ -401,15 +400,15 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
 #define GDLM_ATTR(_name,_mode,_show,_store) \
 static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-GDLM_ATTR(proto_name,     0444, proto_name_show,     NULL);
-GDLM_ATTR(block,          0644, block_show,          block_store);
-GDLM_ATTR(withdraw,       0644, withdraw_show,       withdraw_store);
-GDLM_ATTR(id,             0444, lkid_show,           NULL);
-GDLM_ATTR(first,          0444, lkfirst_show,        NULL);
-GDLM_ATTR(first_done,     0444, first_done_show,     NULL);
-GDLM_ATTR(recover,        0644, recover_show,        recover_store);
-GDLM_ATTR(recover_done,   0444, recover_done_show,   NULL);
-GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+GDLM_ATTR(proto_name,     0444, proto_name_show,	NULL);
+GDLM_ATTR(block,          0644, block_show,		block_store);
+GDLM_ATTR(withdraw,       0644, withdraw_show,		withdraw_store);
+GDLM_ATTR(id,             0444, lkid_show,		NULL);
+GDLM_ATTR(first,          0444, lkfirst_show,		NULL);
+GDLM_ATTR(first_done,     0444, first_done_show,	NULL);
+GDLM_ATTR(recover,        0200, NULL,			recover_store);
+GDLM_ATTR(recover_done,   0444, recover_done_show,	NULL);
+GDLM_ATTR(recover_status, 0444, recover_status_show,	NULL);
 
 static struct attribute *lock_module_attrs[] = {
 	&gdlm_attr_proto_name.attr,
-- 
cgit v0.10.2


From e9ccb73ab57ada469602506496c42e5b4468ac3e Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 19 May 2009 10:23:23 +0100
Subject: GFS2: Update docs

Update a few things which were out of date, and fix a typo.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
index 4dae9a3..0494f78 100644
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -60,7 +60,7 @@ go_lock          | Called for the first local holder of a lock
 go_unlock        | Called on the final local unlock of a lock
 go_dump          | Called to print content of object for debugfs file, or on
                  | error to dump glock to the log.
-go_type;         | The type of the glock, LM_TYPE_.....
+go_type          | The type of the glock, LM_TYPE_.....
 go_min_hold_time | The minimum hold time
 
 The minimum hold time for each lock is the time after a remote lock
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt
index 593004b..5e3ab8f 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.txt
@@ -11,18 +11,15 @@ their I/O so file system consistency is maintained.  One of the nifty
 features of GFS is perfect consistency -- changes made to the file system
 on one machine show up immediately on all other machines in the cluster.
 
-GFS uses interchangable inter-node locking mechanisms.  Different lock
-modules can plug into GFS and each file system selects the appropriate
-lock module at mount time.  Lock modules include:
+GFS uses interchangable inter-node locking mechanisms, the currently
+supported mechanisms are:
 
   lock_nolock -- allows gfs to be used as a local file system
 
   lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
   The dlm is found at linux/fs/dlm/
 
-In addition to interfacing with an external locking manager, a gfs lock
-module is responsible for interacting with external cluster management
-systems.  Lock_dlm depends on user space cluster management systems found
+Lock_dlm depends on user space cluster management systems found
 at the URL above.
 
 To use gfs as a local file system, no external clustering systems are
@@ -31,13 +28,19 @@ needed, simply:
   $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
   $ mount -t gfs2 /dev/block_device /dir
 
-GFS2 is not on-disk compatible with previous versions of GFS.
+If you are using Fedora, you need to install the gfs2-utils package
+and, for lock_dlm, you will also need to install the cman package
+and write a cluster.conf as per the documentation.
+
+GFS2 is not on-disk compatible with previous versions of GFS, but it
+is pretty close.
 
 The following man pages can be found at the URL above:
-  gfs2_fsck	to repair a filesystem
+  fsck.gfs2	to repair a filesystem
   gfs2_grow	to expand a filesystem online
   gfs2_jadd	to add journals to a filesystem online
   gfs2_tool	to manipulate, examine and tune a filesystem
   gfs2_quota	to examine and change quota values in a filesystem
+  gfs2_convert	to convert a gfs filesystem to gfs2 in-place
   mount.gfs2	to help mount(8) mount a filesystem
   mkfs.gfs2	to make a filesystem
-- 
cgit v0.10.2


From 5e9145edcc368295cb353872b4a6026e13f5565e Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Tue, 19 May 2009 10:01:03 +0200
Subject: Atmark Armadillo 500 board support.

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index 32e4515..ebab18a 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -75,4 +75,11 @@ config MACH_PCM043
 	  Include support for Phytec pcm043 platform. This includes
 	  specific configurations for the board and its peripherals.
 
+config MACH_ARMADILLO5X0
+	bool "Support Atmark Armadillo-500 Development Base Board"
+	select ARCH_MX31
+	help
+	  Include support for Atmark Armadillo-500 platform. This includes
+	  specific configurations for the board and its peripherals.
+
 endif
diff --git a/arch/arm/mach-mx3/Makefile b/arch/arm/mach-mx3/Makefile
index cd6547b..611812b 100644
--- a/arch/arm/mach-mx3/Makefile
+++ b/arch/arm/mach-mx3/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_MACH_MX31MOBOARD)	+= mx31moboard.o mx31moboard-devboard.o \
 				   mx31moboard-marxbot.o
 obj-$(CONFIG_MACH_QONG)		+= qong.o
 obj-$(CONFIG_MACH_PCM043)	+= pcm043.o
+obj-$(CONFIG_MACH_ARMADILLO5X0) += armadillo5x0.o
diff --git a/arch/arm/mach-mx3/armadillo5x0.c b/arch/arm/mach-mx3/armadillo5x0.c
new file mode 100644
index 0000000..cac4538
--- /dev/null
+++ b/arch/arm/mach-mx3/armadillo5x0.c
@@ -0,0 +1,309 @@
+/*
+ * armadillo5x0.c
+ *
+ * Copyright 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ * updates in http://alberdroid.blogspot.com/
+ *
+ * Based on Atmark Techno, Inc. armadillo 500 BSP 2008
+ * Based on mx31ads.c and pcm037.c Great Work!
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/smsc911x.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/memory.h>
+#include <asm/mach/map.h>
+
+#include <mach/common.h>
+#include <mach/imx-uart.h>
+#include <mach/iomux-mx3.h>
+#include <mach/board-armadillo5x0.h>
+#include <mach/mmc.h>
+#include <mach/ipu.h>
+#include <mach/mx3fb.h>
+
+#include "devices.h"
+
+static int armadillo5x0_pins[] = {
+	/* UART1 */
+	MX31_PIN_CTS1__CTS1,
+	MX31_PIN_RTS1__RTS1,
+	MX31_PIN_TXD1__TXD1,
+	MX31_PIN_RXD1__RXD1,
+	/* UART2 */
+	MX31_PIN_CTS2__CTS2,
+	MX31_PIN_RTS2__RTS2,
+	MX31_PIN_TXD2__TXD2,
+	MX31_PIN_RXD2__RXD2,
+	/* LAN9118_IRQ */
+	IOMUX_MODE(MX31_PIN_GPIO1_0, IOMUX_CONFIG_GPIO),
+	/* SDHC1 */
+	MX31_PIN_SD1_DATA3__SD1_DATA3,
+	MX31_PIN_SD1_DATA2__SD1_DATA2,
+	MX31_PIN_SD1_DATA1__SD1_DATA1,
+	MX31_PIN_SD1_DATA0__SD1_DATA0,
+	MX31_PIN_SD1_CLK__SD1_CLK,
+	MX31_PIN_SD1_CMD__SD1_CMD,
+	/* Framebuffer */
+	MX31_PIN_LD0__LD0,
+	MX31_PIN_LD1__LD1,
+	MX31_PIN_LD2__LD2,
+	MX31_PIN_LD3__LD3,
+	MX31_PIN_LD4__LD4,
+	MX31_PIN_LD5__LD5,
+	MX31_PIN_LD6__LD6,
+	MX31_PIN_LD7__LD7,
+	MX31_PIN_LD8__LD8,
+	MX31_PIN_LD9__LD9,
+	MX31_PIN_LD10__LD10,
+	MX31_PIN_LD11__LD11,
+	MX31_PIN_LD12__LD12,
+	MX31_PIN_LD13__LD13,
+	MX31_PIN_LD14__LD14,
+	MX31_PIN_LD15__LD15,
+	MX31_PIN_LD16__LD16,
+	MX31_PIN_LD17__LD17,
+	MX31_PIN_VSYNC3__VSYNC3,
+	MX31_PIN_HSYNC__HSYNC,
+	MX31_PIN_FPSHIFT__FPSHIFT,
+	MX31_PIN_DRDY0__DRDY0,
+	IOMUX_MODE(MX31_PIN_LCS1, IOMUX_CONFIG_GPIO), /*ADV7125_PSAVE*/
+
+};
+
+/*
+ * FB support
+ */
+static const struct fb_videomode fb_modedb[] = {
+	{	/* 640x480 @ 60 Hz */
+		.name		= "CRT-VGA",
+		.refresh	= 60,
+		.xres		= 640,
+		.yres		= 480,
+		.pixclock	= 39721,
+		.left_margin	= 35,
+		.right_margin	= 115,
+		.upper_margin	= 43,
+		.lower_margin	= 1,
+		.hsync_len	= 10,
+		.vsync_len	= 1,
+		.sync		= FB_SYNC_OE_ACT_HIGH,
+		.vmode		= FB_VMODE_NONINTERLACED,
+		.flag		= 0,
+	}, {/* 800x600 @ 56 Hz */
+		.name		= "CRT-SVGA",
+		.refresh	= 56,
+		.xres		= 800,
+		.yres		= 600,
+		.pixclock	= 30000,
+		.left_margin	= 30,
+		.right_margin	= 108,
+		.upper_margin	= 13,
+		.lower_margin	= 10,
+		.hsync_len	= 10,
+		.vsync_len	= 1,
+		.sync		= FB_SYNC_OE_ACT_HIGH | FB_SYNC_HOR_HIGH_ACT |
+				  FB_SYNC_VERT_HIGH_ACT,
+		.vmode		= FB_VMODE_NONINTERLACED,
+		.flag		= 0,
+	},
+};
+
+static struct ipu_platform_data mx3_ipu_data = {
+	.irq_base = MXC_IPU_IRQ_START,
+};
+
+static struct mx3fb_platform_data mx3fb_pdata = {
+	.dma_dev	= &mx3_ipu.dev,
+	.name		= "CRT-VGA",
+	.mode		= fb_modedb,
+	.num_modes	= ARRAY_SIZE(fb_modedb),
+};
+
+/*
+ * SDHC 1
+ * MMC support
+ */
+static int armadillo5x0_sdhc1_get_ro(struct device *dev)
+{
+	return gpio_get_value(IOMUX_TO_GPIO(MX31_PIN_ATA_RESET_B));
+}
+
+static int armadillo5x0_sdhc1_init(struct device *dev,
+				   irq_handler_t detect_irq, void *data)
+{
+	int ret;
+	int gpio_det, gpio_wp;
+
+	gpio_det = IOMUX_TO_GPIO(MX31_PIN_ATA_DMACK);
+	gpio_wp = IOMUX_TO_GPIO(MX31_PIN_ATA_RESET_B);
+
+	ret = gpio_request(gpio_det, "sdhc-card-detect");
+	if (ret)
+		return ret;
+
+	gpio_direction_input(gpio_det);
+
+	ret = gpio_request(gpio_wp, "sdhc-write-protect");
+	if (ret)
+		goto err_gpio_free;
+
+	gpio_direction_input(gpio_wp);
+
+	/* When supported the trigger type have to be BOTH */
+	ret = request_irq(IOMUX_TO_IRQ(MX31_PIN_ATA_DMACK), detect_irq,
+			  IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+			  "sdhc-detect", data);
+
+	if (ret)
+		goto err_gpio_free_2;
+
+	return 0;
+
+err_gpio_free_2:
+	gpio_free(gpio_wp);
+
+err_gpio_free:
+	gpio_free(gpio_det);
+
+	return ret;
+
+}
+
+static void armadillo5x0_sdhc1_exit(struct device *dev, void *data)
+{
+	free_irq(IOMUX_TO_IRQ(MX31_PIN_ATA_DMACK), data);
+	gpio_free(IOMUX_TO_GPIO(MX31_PIN_ATA_DMACK));
+	gpio_free(IOMUX_TO_GPIO(MX31_PIN_ATA_RESET_B));
+}
+
+static struct imxmmc_platform_data sdhc_pdata = {
+	.get_ro = armadillo5x0_sdhc1_get_ro,
+	.init = armadillo5x0_sdhc1_init,
+	.exit = armadillo5x0_sdhc1_exit,
+};
+
+/*
+ * SMSC 9118
+ * Network support
+ */
+static struct resource armadillo5x0_smc911x_resources[] = {
+	{
+		.start	= CS3_BASE_ADDR,
+		.end	= CS3_BASE_ADDR + SZ_32M - 1,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IOMUX_TO_IRQ(MX31_PIN_GPIO1_0),
+		.end	= IOMUX_TO_IRQ(MX31_PIN_GPIO1_0),
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
+	},
+};
+
+static struct smsc911x_platform_config smsc911x_info = {
+	.flags		= SMSC911X_USE_32BIT,
+	.irq_polarity   = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type       = SMSC911X_IRQ_TYPE_PUSH_PULL,
+};
+
+static struct platform_device armadillo5x0_smc911x_device = {
+	.name           = "smsc911x",
+	.id             = -1,
+	.num_resources  = ARRAY_SIZE(armadillo5x0_smc911x_resources),
+	.resource       = armadillo5x0_smc911x_resources,
+	.dev            = {
+		.platform_data = &smsc911x_info,
+	},
+};
+
+/* UART device data */
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+static struct platform_device *devices[] __initdata = {
+	&armadillo5x0_smc911x_device,
+};
+
+/*
+ * Perform board specific initializations
+ */
+static void __init armadillo5x0_init(void)
+{
+	mxc_iomux_setup_multiple_pins(armadillo5x0_pins,
+			ARRAY_SIZE(armadillo5x0_pins), "armadillo5x0");
+
+	platform_add_devices(devices, ARRAY_SIZE(devices));
+
+	/* Register UART */
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	mxc_register_device(&mxc_uart_device1, &uart_pdata);
+
+	/* SMSC9118 IRQ pin */
+	gpio_direction_input(MX31_PIN_GPIO1_0);
+
+	/* Register SDHC */
+	mxc_register_device(&mxcsdhc_device0, &sdhc_pdata);
+
+	/* Register FB */
+	mxc_register_device(&mx3_ipu, &mx3_ipu_data);
+	mxc_register_device(&mx3_fb, &mx3fb_pdata);
+}
+
+/*
+ * Set up static virtual mappings.
+ */
+static void __init armadillo5x0_map_io(void)
+{
+	/*
+	 * Maps:
+	 * - NAND, SDRAM, WEIM, M3IF, EMI controllers
+	 * - AVIC (Vectored Interrupt controller)
+	 * - AIPS1 and AIPS2
+	 **/
+	mxc_map_io();
+}
+
+static void __init armadillo5x0_timer_init(void)
+{
+	mx31_clocks_init(26000000);
+}
+
+static struct sys_timer armadillo5x0_timer = {
+	.init	= armadillo5x0_timer_init,
+};
+
+MACHINE_START(ARMADILLO5X0, "Armadillo-500")
+	/* Maintainer: Alberto Panizzo  */
+	.phys_io	= AIPS1_BASE_ADDR,
+	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params	= PHYS_OFFSET + 0x00000100,
+	.map_io		= armadillo5x0_map_io,
+	.init_irq	= mxc_init_irq,
+	.timer		= &armadillo5x0_timer,
+	.init_machine	= armadillo5x0_init,
+MACHINE_END
diff --git a/arch/arm/plat-mxc/include/mach/board-armadillo5x0.h b/arch/arm/plat-mxc/include/mach/board-armadillo5x0.h
new file mode 100644
index 0000000..8769e91
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-armadillo5x0.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>.
+ * All Rights Reserved.
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_ARMADILLO5X0_H__
+#define __ASM_ARCH_MXC_BOARD_ARMADILLO5X0_H__
+
+#include <mach/hardware.h>
+
+/* mandatory for CONFIG_DEBUG_LL */
+
+#define MXC_LL_UART_PADDR	UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
+
+#endif
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S
index e6b841b..57d3e3f 100644
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -43,6 +43,9 @@
 #ifdef CONFIG_MACH_MX27_3DS
 #include <mach/board-mx27pdk.h>
 #endif
+#ifdef CONFIG_MACH_ARMADILLO5X0
+#include <mach/board-armadillo5x0.h>
+#endif
 		.macro	addruart,rx
 		mrc	p15, 0, \rx, c1, c0
 		tst	\rx, #1			@ MMU enabled?
-- 
cgit v0.10.2


From b74bd40fa4ae018898c8a6429c2a7daf61516524 Mon Sep 17 00:00:00 2001
From: "Lopez Cruz, Misael" <x0052729@ti.com>
Date: Mon, 18 May 2009 11:52:55 -0500
Subject: ASoC: TWL4030: Add control for selecting codec operation mode

Add a control for selecting the codec operation mode. TWL4030 codec
has two modes:
- Option 1. Audio only (4 audio DACs)
- Option 2. Voice/Audio (2 audio DACs and voice ADC/DAC)

Control is restricted when a stream is ongoing, since codec's
operation mode cannot be changed on-the-fly.

Signed-off-by: Misael Lopez Cruz <x0052729@ti.com>
Acked-by: Peter Ujflausi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index abf6914..731534c 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -814,6 +814,48 @@ static int snd_soc_put_volsw_r2_twl4030(struct snd_kcontrol *kcontrol,
 	return err;
 }
 
+/* Codec operation modes */
+static const char *twl4030_op_modes_texts[] = {
+	"Option 2 (voice/audio)", "Option 1 (audio)"
+};
+
+static const struct soc_enum twl4030_op_modes_enum =
+	SOC_ENUM_SINGLE(TWL4030_REG_CODEC_MODE, 0,
+			ARRAY_SIZE(twl4030_op_modes_texts),
+			twl4030_op_modes_texts);
+
+int snd_soc_put_twl4030_opmode_enum_double(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct twl4030_priv *twl4030 = codec->private_data;
+	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+	unsigned short val;
+	unsigned short mask, bitmask;
+
+	if (twl4030->configured) {
+		printk(KERN_ERR "twl4030 operation mode cannot be "
+			"changed on-the-fly\n");
+		return -EBUSY;
+	}
+
+	for (bitmask = 1; bitmask < e->max; bitmask <<= 1)
+		;
+	if (ucontrol->value.enumerated.item[0] > e->max - 1)
+		return -EINVAL;
+
+	val = ucontrol->value.enumerated.item[0] << e->shift_l;
+	mask = (bitmask - 1) << e->shift_l;
+	if (e->shift_l != e->shift_r) {
+		if (ucontrol->value.enumerated.item[1] > e->max - 1)
+			return -EINVAL;
+		val |= ucontrol->value.enumerated.item[1] << e->shift_r;
+		mask |= (bitmask - 1) << e->shift_r;
+	}
+
+	return snd_soc_update_bits(codec, e->reg, mask, val);
+}
+
 /*
  * FGAIN volume control:
  * from -62 to 0 dB in 1 dB steps (mute instead of -63 dB)
@@ -895,6 +937,11 @@ static const struct soc_enum twl4030_vibradir_enum =
 			twl4030_vibradir_texts);
 
 static const struct snd_kcontrol_new twl4030_snd_controls[] = {
+	/* Codec operation mode control */
+	SOC_ENUM_EXT("Codec Operation Mode", twl4030_op_modes_enum,
+		snd_soc_get_enum_double,
+		snd_soc_put_twl4030_opmode_enum_double),
+
 	/* Common playback gain controls */
 	SOC_DOUBLE_R_TLV("DAC1 Digital Fine Playback Volume",
 		TWL4030_REG_ARXL1PGA, TWL4030_REG_ARXR1PGA,
-- 
cgit v0.10.2


From 11a728110633320d95935a1ba79c038db303596f Mon Sep 17 00:00:00 2001
From: "Lopez Cruz, Misael" <x0052729@ti.com>
Date: Mon, 18 May 2009 11:53:04 -0500
Subject: ASoC: SDP3430: Connect twl4030 voice DAI to McBSP3

Connect twl4030 voice DAI to McBSP3 in sdp3430 machine driver.
Voice DAI init function enables corresponding interface by
writting directly to VOICE_IF codec register.

Signed-off-by: Misael Lopez Cruz <x0052729@ti.com>
Acked-by: Peter Ujflausi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c
index 1c79741..19966a7 100644
--- a/sound/soc/omap/sdp3430.c
+++ b/sound/soc/omap/sdp3430.c
@@ -84,6 +84,49 @@ static struct snd_soc_ops sdp3430_ops = {
 	.hw_params = sdp3430_hw_params,
 };
 
+static int sdp3430_hw_voice_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	int ret;
+
+	/* Set codec DAI configuration */
+	ret = snd_soc_dai_set_fmt(codec_dai,
+				SND_SOC_DAIFMT_DSP_A |
+				SND_SOC_DAIFMT_IB_NF |
+				SND_SOC_DAIFMT_CBS_CFM);
+	if (ret) {
+		printk(KERN_ERR "can't set codec DAI configuration\n");
+		return ret;
+	}
+
+	/* Set cpu DAI configuration */
+	ret = snd_soc_dai_set_fmt(cpu_dai,
+				SND_SOC_DAIFMT_DSP_A |
+				SND_SOC_DAIFMT_IB_NF |
+				SND_SOC_DAIFMT_CBM_CFM);
+	if (ret < 0) {
+		printk(KERN_ERR "can't set cpu DAI configuration\n");
+		return ret;
+	}
+
+	/* Set the codec system clock for DAC and ADC */
+	ret = snd_soc_dai_set_sysclk(codec_dai, 0, 26000000,
+					    SND_SOC_CLOCK_IN);
+	if (ret < 0) {
+		printk(KERN_ERR "can't set codec system clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct snd_soc_ops sdp3430_voice_ops = {
+	.hw_params = sdp3430_hw_voice_params,
+};
+
 /* Headset jack */
 static struct snd_soc_jack hs_jack;
 
@@ -192,22 +235,45 @@ static int sdp3430_twl4030_init(struct snd_soc_codec *codec)
 	return ret;
 }
 
+static int sdp3430_twl4030_voice_init(struct snd_soc_codec *codec)
+{
+	unsigned short reg;
+
+	/* Enable voice interface */
+	reg = codec->read(codec, TWL4030_REG_VOICE_IF);
+	reg |= TWL4030_VIF_DIN_EN | TWL4030_VIF_DOUT_EN | TWL4030_VIF_EN;
+	codec->write(codec, TWL4030_REG_VOICE_IF, reg);
+
+	return 0;
+}
+
+
 /* Digital audio interface glue - connects codec <--> CPU */
-static struct snd_soc_dai_link sdp3430_dai = {
-	.name = "TWL4030",
-	.stream_name = "TWL4030",
-	.cpu_dai = &omap_mcbsp_dai[0],
-	.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
-	.init = sdp3430_twl4030_init,
-	.ops = &sdp3430_ops,
+static struct snd_soc_dai_link sdp3430_dai[] = {
+	{
+		.name = "TWL4030 I2S",
+		.stream_name = "TWL4030 Audio",
+		.cpu_dai = &omap_mcbsp_dai[0],
+		.codec_dai = &twl4030_dai[TWL4030_DAI_HIFI],
+		.init = sdp3430_twl4030_init,
+		.ops = &sdp3430_ops,
+	},
+	{
+		.name = "TWL4030 PCM",
+		.stream_name = "TWL4030 Voice",
+		.cpu_dai = &omap_mcbsp_dai[1],
+		.codec_dai = &twl4030_dai[TWL4030_DAI_VOICE],
+		.init = sdp3430_twl4030_voice_init,
+		.ops = &sdp3430_voice_ops,
+	},
 };
 
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_sdp3430 = {
 	.name = "SDP3430",
 	.platform = &omap_soc_platform,
-	.dai_link = &sdp3430_dai,
-	.num_links = 1,
+	.dai_link = sdp3430_dai,
+	.num_links = ARRAY_SIZE(sdp3430_dai),
 };
 
 /* Audio subsystem */
@@ -236,7 +302,8 @@ static int __init sdp3430_soc_init(void)
 
 	platform_set_drvdata(sdp3430_snd_device, &sdp3430_snd_devdata);
 	sdp3430_snd_devdata.dev = &sdp3430_snd_device->dev;
-	*(unsigned int *)sdp3430_dai.cpu_dai->private_data = 1; /* McBSP2 */
+	*(unsigned int *)sdp3430_dai[0].cpu_dai->private_data = 1; /* McBSP2 */
+	*(unsigned int *)sdp3430_dai[1].cpu_dai->private_data = 2; /* McBSP3 */
 
 	ret = platform_device_add(sdp3430_snd_device);
 	if (ret)
-- 
cgit v0.10.2


From 3755100dd5f66761aaaa7ae44c70b319a7c78a56 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 May 2009 18:33:04 +0900
Subject: ub: use __blk_end_request_all()

ub_end_rq() always tries to complete full request.  The @cmd_len
parameter was there because rq->data_len used to be overwritten with
residue count.  Drop @cmd_len and use __blk_end_request_all().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 178f459..f32781c 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -360,8 +360,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_scsi_cmd *cmd, struct ub_request *urq);
 static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
-static void ub_end_rq(struct request *rq, unsigned int status,
-    unsigned int cmd_len);
+static void ub_end_rq(struct request *rq, unsigned int status);
 static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_request *urq, struct ub_scsi_cmd *cmd);
 static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
@@ -644,13 +643,13 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 
 	if (atomic_read(&sc->poison)) {
 		blk_start_request(rq);
-		ub_end_rq(rq, DID_NO_CONNECT << 16, blk_rq_bytes(rq));
+		ub_end_rq(rq, DID_NO_CONNECT << 16);
 		return 0;
 	}
 
 	if (lun->changed && !blk_pc_request(rq)) {
 		blk_start_request(rq);
-		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION, blk_rq_bytes(rq));
+		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION);
 		return 0;
 	}
 
@@ -702,7 +701,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 
 drop:
 	ub_put_cmd(lun, cmd);
-	ub_end_rq(rq, DID_ERROR << 16, blk_rq_bytes(rq));
+	ub_end_rq(rq, DID_ERROR << 16);
 	return 0;
 }
 
@@ -777,7 +776,6 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	struct ub_request *urq = cmd->back;
 	struct request *rq;
 	unsigned int scsi_status;
-	unsigned int cmd_len;
 
 	rq = urq->rq;
 
@@ -816,17 +814,14 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	urq->rq = NULL;
 
-	cmd_len = cmd->len;
 	ub_put_cmd(lun, cmd);
-	ub_end_rq(rq, scsi_status, cmd_len);
+	ub_end_rq(rq, scsi_status);
 	blk_start_queue(lun->disk->queue);
 }
 
-static void ub_end_rq(struct request *rq, unsigned int scsi_status,
-    unsigned int cmd_len)
+static void ub_end_rq(struct request *rq, unsigned int scsi_status)
 {
 	int error;
-	long rqlen;
 
 	if (scsi_status == 0) {
 		error = 0;
@@ -834,12 +829,7 @@ static void ub_end_rq(struct request *rq, unsigned int scsi_status,
 		error = -EIO;
 		rq->errors = scsi_status;
 	}
-	rqlen = blk_rq_bytes(rq);    /* Oddly enough, this is the residue. */
-	if (__blk_end_request(rq, error, cmd_len)) {
-		printk(KERN_WARNING DRV_NAME
-		    ": __blk_end_request blew, %s-cmd total %u rqlen %ld\n",
-		    blk_pc_request(rq)? "pc": "fs", cmd_len, rqlen);
-	}
+	__blk_end_request_all(rq, error);
 }
 
 static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
-- 
cgit v0.10.2


From 5f49f63178360b07a095bd33b0d850d60edf7590 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 May 2009 18:33:05 +0900
Subject: block: set rq->resid_len to blk_rq_bytes() on issue

In commit c3a4d78c580de4edc9ef0f7c59812fb02ceb037f, while introducing
rq->resid_len, the default value of residue count was changed from
full count to zero.  The conversion was done under the assumption that
when a request fails residue count wasn't defined.  However, Boaz and
James pointed out that this wasn't true and the residue count should
be preserved for failed requests too.

This patchset restores the original behavior by setting rq->resid_len
to blk_rq_bytes(rq) on request start and restoring explicit clearing
in affected drivers.  While at it, take advantage of the fact that
rq->resid_len is set to full count where applicable.

* ide-cd: rq->resid_len cleared on pc success

* mptsas: req->resid_len cleared on success

* sas_expander: rsp/req->resid_len cleared on success

* mpt2sas_transport: req->resid_len cleared on success

* ide-cd, ide-tape, mptsas, sas_host_smp, mpt2sas_transport, ub: take
  advantage of initial full count to simplify code

Boaz Harrosh spotted bug in resid_len initialization.  Fixed as
suggested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Borislav Petkov <petkovbb@googlemail.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index a2d97de..e3f7e6a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1783,9 +1783,10 @@ void blk_start_request(struct request *req)
 	blk_dequeue_request(req);
 
 	/*
-	 * We are now handing the request to the hardware, add the
-	 * timeout handler.
+	 * We are now handing the request to the hardware, initialize
+	 * resid_len to full count and add the timeout handler.
 	 */
+	req->resid_len = blk_rq_bytes(req);
 	blk_add_timer(req);
 }
 EXPORT_SYMBOL(blk_start_request);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index f32781c..e67bbae 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -781,8 +781,10 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	if (cmd->error == 0) {
 		if (blk_pc_request(rq)) {
-			if (cmd->act_len < blk_rq_bytes(rq))
-				rq->resid_len = blk_rq_bytes(rq) - cmd->act_len;
+			if (cmd->act_len >= rq->resid_len)
+				rq->resid_len = 0;
+			else
+				rq->resid_len -= cmd->act_len;
 			scsi_status = 0;
 		} else {
 			if (cmd->act_len != cmd->len) {
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 4c7792f..081aed6 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -699,6 +699,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
 out_end:
 	if (blk_pc_request(rq) && rc == 0) {
+		rq->resid_len = 0;
 		blk_end_request_all(rq, 0);
 		hwif->rq = NULL;
 	} else {
@@ -718,8 +719,7 @@ out_end:
 
 		/* make sure it's fully ended */
 		if (blk_fs_request(rq) == 0) {
-			rq->resid_len = blk_rq_bytes(rq) -
-				(cmd->nbytes - cmd->nleft);
+			rq->resid_len -= cmd->nbytes - cmd->nleft;
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
 				rq->resid_len += cmd->last_xfer_len;
 		}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index e166045..683ff37 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -380,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->resid_len = blk_rq_bytes(rq) - blocks * tape->blk_size;
+		rq->resid_len -= blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 4e6fcf0..79f5433 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1357,7 +1357,8 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
 		memcpy(req->sense, smprep, sizeof(*smprep));
 		req->sense_len = sizeof(*smprep);
-		rsp->resid_len = blk_rq_bytes(rsp) - smprep->ResponseDataLength;
+		req->resid_len = 0;
+		rsp->resid_len -= smprep->ResponseDataLength;
 	} else {
 		printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n",
 		    ioc->name, __func__);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 531af9e..54fa1e4 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1937,7 +1937,11 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	if (ret > 0) {
 		/* positive number is the untransferred residual */
 		rsp->resid_len = ret;
+		req->resid_len = 0;
 		ret = 0;
+	} else if (ret == 0) {
+		rsp->resid_len = 0;
+		req->resid_len = 0;
 	}
 
 	return ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index be9a951..1bc3b75 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -176,9 +176,6 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	resp_data[1] = req_data[1];
 	resp_data[2] = SMP_RESP_FUNC_UNK;
 
-	req->resid_len = blk_rq_bytes(req);
-	rsp->resid_len = blk_rq_bytes(rsp);
-
 	switch (req_data[1]) {
 	case SMP_REPORT_GENERAL:
 		req->resid_len -= 8;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index af95a44..5c65da5 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1170,8 +1170,8 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
 		memcpy(req->sense, mpi_reply, sizeof(*mpi_reply));
 		req->sense_len = sizeof(*mpi_reply);
-		rsp->resid_len = blk_rq_bytes(rsp) -
-				 mpi_reply->ResponseDataLength;
+		req->resid_len = 0;
+		rsp->resid_len -= mpi_reply->ResponseDataLength;
 	} else {
 		dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT
 		    "%s - no reply\n", ioc->name, __func__));
-- 
cgit v0.10.2


From 4fc981ef9e7c0953d5c4896ce088b19c50cb018f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 May 2009 18:33:06 +0900
Subject: bio: always copy back data for copied kernel requests

When a read bio_copy_kern() request fails, the content of the bounce
buffer is not copied back.  However, as request failure doesn't
necessarily mean complete failure, the buffer state can be useful.
This behavior is also inconsistent with the user map counterpart and
causes the subtle difference between bounced and unbounced IO causes
confusion.

This patch makes bio_copy_kern_endio() ignore @err and always copy
back data on request completion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/bio.c b/fs/bio.c
index 7bbc98f..ee3bc67 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1198,7 +1198,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
 		char *addr = page_address(bvec->bv_page);
 		int len = bmd->iovecs[i].bv_len;
 
-		if (read && !err)
+		if (read)
 			memcpy(p, addr, len);
 
 		__free_page(bvec->bv_page);
-- 
cgit v0.10.2


From b2858d7d1639c04ca3c54988d76c5f7300b76f1c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 19 May 2009 11:37:46 +0200
Subject: splice: fix kmaps in default_file_splice_write()

Unfortunately multiple kmap() within a single thread are deadlockable,
so writing out multiple buffers with writev() isn't possible.

Change the implementation so that it does a separate write() for each
buffer.  This actually simplifies the code a lot since the
splice_from_pipe() helper can be used.

This limitation is caused by HIGHMEM pages, and so only affects a
subset of architectures and configurations.  In the future it may be
worth to implement default_file_splice_write() in a more efficient way
on configs that allow it.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/splice.c b/fs/splice.c
index 41179c0..73766d2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -535,8 +535,8 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
 	return res;
 }
 
-static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
-			    unsigned long vlen, loff_t *ppos)
+static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+			    loff_t pos)
 {
 	mm_segment_t old_fs;
 	ssize_t res;
@@ -544,7 +544,7 @@ static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
 	old_fs = get_fs();
 	set_fs(get_ds());
 	/* The cast to a user pointer is valid due to the set_fs() */
-	res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos);
+	res = vfs_write(file, (const char __user *)buf, count, &pos);
 	set_fs(old_fs);
 
 	return res;
@@ -1003,120 +1003,34 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
-static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n)
+static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+			  struct splice_desc *sd)
 {
-	return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS];
+	int ret;
+	void *data;
+
+	ret = buf->ops->confirm(pipe, buf);
+	if (ret)
+		return ret;
+
+	data = buf->ops->map(pipe, buf, 0);
+	ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+	buf->ops->unmap(pipe, buf, data);
+
+	return ret;
 }
 
 static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
 					 struct file *out, loff_t *ppos,
 					 size_t len, unsigned int flags)
 {
-	ssize_t ret = 0;
-	ssize_t total_len = 0;
-	int do_wakeup = 0;
-
-	pipe_lock(pipe);
-	while (len) {
-		struct pipe_buffer *buf;
-		void *data[PIPE_BUFFERS];
-		struct iovec vec[PIPE_BUFFERS];
-		unsigned int nr_pages = 0;
-		unsigned int write_len = 0;
-		unsigned int now_len = len;
-		unsigned int this_len;
-		int i;
-
-		BUG_ON(pipe->nrbufs > PIPE_BUFFERS);
-		for (i = 0; i < pipe->nrbufs && now_len; i++) {
-			buf = nth_pipe_buf(pipe, i);
-
-			ret = buf->ops->confirm(pipe, buf);
-			if (ret)
-				break;
-
-			data[i] = buf->ops->map(pipe, buf, 0);
-			this_len = min(buf->len, now_len);
-			vec[i].iov_base = (void __user *) data[i] + buf->offset;
-			vec[i].iov_len = this_len;
-			now_len -= this_len;
-			write_len += this_len;
-			nr_pages++;
-		}
-
-		if (nr_pages) {
-			ret = kernel_writev(out, vec, nr_pages, ppos);
-			if (ret == 0)
-				ret = -EIO;
-			if (ret > 0) {
-				len -= ret;
-				total_len += ret;
-			}
-		}
-
-		for (i = 0; i < nr_pages; i++) {
-			buf = nth_pipe_buf(pipe, i);
-			buf->ops->unmap(pipe, buf, data[i]);
-
-			if (ret > 0) {
-				this_len = min_t(unsigned, vec[i].iov_len, ret);
-				buf->offset += this_len;
-				buf->len -= this_len;
-				ret -= this_len;
-			}
-		}
-
-		if (ret < 0)
-			break;
-
-		while (pipe->nrbufs) {
-			const struct pipe_buf_operations *ops;
-
-			buf = nth_pipe_buf(pipe, 0);
-			if (buf->len)
-				break;
-
-			ops = buf->ops;
-			buf->ops = NULL;
-			ops->release(pipe, buf);
-			pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS;
-			pipe->nrbufs--;
-			if (pipe->inode)
-				do_wakeup = 1;
-		}
-
-		if (pipe->nrbufs)
-			continue;
-		if (!pipe->writers)
-			break;
-		if (!pipe->waiting_writers) {
-			if (total_len)
-				break;
-		}
-
-		if (flags & SPLICE_F_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		if (do_wakeup) {
-			wakeup_pipe_writers(pipe);
-			do_wakeup = 0;
-		}
-
-		pipe_wait(pipe);
-	}
-	pipe_unlock(pipe);
+	ssize_t ret;
 
-	if (do_wakeup)
-		wakeup_pipe_writers(pipe);
+	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+	if (ret > 0)
+		*ppos += ret;
 
-	return total_len ? total_len : ret;
+	return ret;
 }
 
 /**
-- 
cgit v0.10.2


From 3a5a39276d2a32b05b1ee25b384516805b17cf87 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 17 May 2009 18:55:18 +0300
Subject: block: allow blk_rq_map_kern to append to requests

Use blk_rq_append_bio() internally instead of blk_rq_bio_prep()
so blk_rq_map_kern can be called multiple times, to map multiple
buffers.

This is in the effort to un-export blk_rq_append_bio()

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-map.c b/block/blk-map.c
index 56082be..caa05a6 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -282,7 +282,8 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
  *
  * Description:
  *    Data will be mapped directly if possible. Otherwise a bounce
- *    buffer is used.
+ *    buffer is used. Can be called multple times to append multple
+ *    buffers.
  */
 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
@@ -290,6 +291,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	int reading = rq_data_dir(rq) == READ;
 	int do_copy = 0;
 	struct bio *bio;
+	int ret;
 
 	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
@@ -311,7 +313,13 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (do_copy)
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	blk_rq_bio_prep(q, rq, bio);
+	ret = blk_rq_append_bio(q, rq, bio);
+	if (unlikely(ret)) {
+		/* request is too big */
+		bio_put(bio);
+		return ret;
+	}
+
 	blk_queue_bounce(q, &rq->bio);
 	rq->buffer = NULL;
 	return 0;
-- 
cgit v0.10.2


From bc38bf106c967389a465d926be22c7371abba69d Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 17 May 2009 18:56:17 +0300
Subject: libosd: Use new blk_rq_map_kern

Now that blk_rq_map_kern will append the buffer onto the
request we can use it easily for adding extra segments
(eg. attributes)

This patch is dependent on a block layer patch titled:
   [BLOCK] allow blk_rq_map_kern to append to requests

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index d178a8b..bf66e30 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -826,26 +826,6 @@ int osd_req_add_set_attr_list(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_add_set_attr_list);
 
-static int _append_map_kern(struct request *req,
-	void *buff, unsigned len, gfp_t flags)
-{
-	struct bio *bio;
-	int ret;
-
-	bio = bio_map_kern(req->q, buff, len, flags);
-	if (IS_ERR(bio)) {
-		OSD_ERR("Failed bio_map_kern(%p, %d) => %ld\n", buff, len,
-			PTR_ERR(bio));
-		return PTR_ERR(bio);
-	}
-	ret = blk_rq_append_bio(req->q, req, bio);
-	if (ret) {
-		OSD_ERR("Failed blk_rq_append_bio(%p) => %d\n", bio, ret);
-		bio_put(bio);
-	}
-	return ret;
-}
-
 static int _req_append_segment(struct osd_request *or,
 	unsigned padding, struct _osd_req_data_segment *seg,
 	struct _osd_req_data_segment *last_seg, struct _osd_io_info *io)
@@ -861,14 +841,14 @@ static int _req_append_segment(struct osd_request *or,
 		else
 			pad_buff = io->pad_buff;
 
-		ret = _append_map_kern(io->req, pad_buff, padding,
+		ret = blk_rq_map_kern(io->req->q, io->req, pad_buff, padding,
 				       or->alloc_flags);
 		if (ret)
 			return ret;
 		io->total_bytes += padding;
 	}
 
-	ret = _append_map_kern(io->req, seg->buff, seg->total_bytes,
+	ret = blk_rq_map_kern(io->req->q, io->req, seg->buff, seg->total_bytes,
 			       or->alloc_flags);
 	if (ret)
 		return ret;
-- 
cgit v0.10.2


From 79eb63e9e5875b84341a3a05f8e6ae9cdb4bb6f6 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 17 May 2009 18:57:15 +0300
Subject: block: Add blk_make_request(), takes bio, returns a request

New block API:
given a struct bio allocates a new request. This is the parallel of
generic_make_request for BLOCK_PC commands users.

The passed bio may be a chained-bio. The bio is bounced if needed
inside the call to this member.

This is in the effort of un-exporting blk_rq_append_bio().

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index e3f7e6a..bec1d69 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -891,6 +891,51 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 EXPORT_SYMBOL(blk_get_request);
 
 /**
+ * blk_make_request - given a bio, allocate a corresponding struct request.
+ *
+ * @bio:  The bio describing the memory mappings that will be submitted for IO.
+ *        It may be a chained-bio properly constructed by block/bio layer.
+ *
+ * blk_make_request is the parallel of generic_make_request for BLOCK_PC
+ * type commands. Where the struct request needs to be farther initialized by
+ * the caller. It is passed a &struct bio, which describes the memory info of
+ * the I/O transfer.
+ *
+ * The caller of blk_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffers. That bio_data_dir() will return
+ * the needed direction of the request. (And all bio's in the passed bio-chain
+ * are properly set accordingly)
+ *
+ * If called under none-sleepable conditions, mapped bio buffers must not
+ * need bouncing, by calling the appropriate masked or flagged allocator,
+ * suitable for the target device. Otherwise the call to blk_queue_bounce will
+ * BUG.
+ */
+struct request *blk_make_request(struct request_queue *q, struct bio *bio,
+				 gfp_t gfp_mask)
+{
+	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
+
+	if (unlikely(!rq))
+		return ERR_PTR(-ENOMEM);
+
+	for_each_bio(bio) {
+		struct bio *bounce_bio = bio;
+		int ret;
+
+		blk_queue_bounce(q, &bounce_bio);
+		ret = blk_rq_append_bio(q, rq, bounce_bio);
+		if (unlikely(ret)) {
+			blk_put_request(rq);
+			return ERR_PTR(ret);
+		}
+	}
+
+	return rq;
+}
+EXPORT_SYMBOL(blk_make_request);
+
+/**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f9d60a7..88a83e1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -740,6 +740,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_make_request(struct request_queue *, struct bio *,
+					gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
-- 
cgit v0.10.2


From c29b70f6ee4f2fa3ef07f55bc9082945861e5391 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 17 May 2009 18:58:41 +0300
Subject: libosd: Use of new blk_make_request

Use new blk_make_request() to allocate a request from bio
and avoid using deprecated blk_rq_append_bio().

This patch is dependent on a block layer patch titled:
    [BLOCK] New blk_make_request() takes bio returns request

This is the last usage of blk_rq_append_bio in osd, it can now
be un-exported.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Jeff Garzik <jeff@garzik.org>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index bf66e30..865ec0f 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1200,6 +1200,21 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or,
 /*
  * osd_finalize_request and helpers
  */
+static struct request *_make_request(struct request_queue *q, bool has_write,
+			      struct _osd_io_info *oii, gfp_t flags)
+{
+	if (oii->bio)
+		return blk_make_request(q, oii->bio, flags);
+	else {
+		struct request *req;
+
+		req = blk_get_request(q, has_write ? WRITE : READ, flags);
+		if (unlikely(!req))
+			return ERR_PTR(-ENOMEM);
+
+		return req;
+	}
+}
 
 static int _init_blk_request(struct osd_request *or,
 	bool has_in, bool has_out)
@@ -1208,11 +1223,13 @@ static int _init_blk_request(struct osd_request *or,
 	struct scsi_device *scsi_device = or->osd_dev->scsi_device;
 	struct request_queue *q = scsi_device->request_queue;
 	struct request *req;
-	int ret = -ENOMEM;
+	int ret;
 
-	req = blk_get_request(q, has_out, flags);
-	if (!req)
+	req = _make_request(q, has_out, has_out ? &or->out : &or->in, flags);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
 		goto out;
+	}
 
 	or->request = req;
 	req->cmd_type = REQ_TYPE_BLOCK_PC;
@@ -1225,9 +1242,10 @@ static int _init_blk_request(struct osd_request *or,
 		or->out.req = req;
 		if (has_in) {
 			/* allocate bidi request */
-			req = blk_get_request(q, READ, flags);
-			if (!req) {
+			req = _make_request(q, false, &or->in, flags);
+			if (IS_ERR(req)) {
 				OSD_DEBUG("blk_get_request for bidi failed\n");
+				ret = PTR_ERR(req);
 				goto out;
 			}
 			req->cmd_type = REQ_TYPE_BLOCK_PC;
@@ -1271,26 +1289,6 @@ int osd_finalize_request(struct osd_request *or,
 		return ret;
 	}
 
-	if (or->out.bio) {
-		ret = blk_rq_append_bio(or->request->q, or->out.req,
-					or->out.bio);
-		if (ret) {
-			OSD_DEBUG("blk_rq_append_bio out failed\n");
-			return ret;
-		}
-		OSD_DEBUG("out bytes=%llu (bytes_req=%u)\n",
-			_LLU(or->out.total_bytes), blk_rq_bytes(or->out.req));
-	}
-	if (or->in.bio) {
-		ret = blk_rq_append_bio(or->request->q, or->in.req, or->in.bio);
-		if (ret) {
-			OSD_DEBUG("blk_rq_append_bio in failed\n");
-			return ret;
-		}
-		OSD_DEBUG("in bytes=%llu (bytes_req=%u)\n",
-			_LLU(or->in.total_bytes), blk_rq_bytes(or->in.req));
-	}
-
 	or->out.pad_buff = sg_out_pad_buffer;
 	or->in.pad_buff = sg_in_pad_buffer;
 
-- 
cgit v0.10.2


From a411f4bbb89f1f08687b344064d6775bce1e4658 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 17 May 2009 19:00:01 +0300
Subject: block: Un-export blk_rq_append_bio

OSD was the last in-tree user of blk_rq_append_bio(). Now
that it is fixed blk_rq_append_bio is un-exported and
is only used internally by block layer.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-map.c b/block/blk-map.c
index caa05a6..ef2492a 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -24,7 +24,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(blk_rq_append_bio);
 
 static int __blk_rq_unmap_user(struct bio *bio)
 {
diff --git a/block/blk.h b/block/blk.h
index 9e0042c..c863ec2 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,8 @@ extern struct kobj_type blk_queue_ktype;
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
+int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+		      struct bio *bio);
 void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88a83e1..564445b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -758,12 +758,6 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
 /*
- * Temporary export, until SCSI gets fixed up.
- */
-extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-			     struct bio *bio);
-
-/*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
-- 
cgit v0.10.2


From 4abc1cc2f9fe4b6bb3acc1d78e2c15af47b8133d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 19 May 2009 12:16:46 +0200
Subject: ALSA: hda - Add prefix to kernel messages

Add proper prefix to each kernel message in hda_intel.c.
Also, avoid the unneeded prefix when CONFIG_SND_VERBOSE_PRINTK is used
together with snd_print*().

Reference: bko#13207
	http://bugzilla.kernel.org/show_bug.cgi?id=13207

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 7bb6dd2..49fd973 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -128,8 +128,11 @@ MODULE_SUPPORTED_DEVICE("{{Intel, ICH6},"
 			 "{ULI, M5461}}");
 MODULE_DESCRIPTION("Intel HDA driver");
 
+#ifdef CONFIG_SND_VERBOSE_PRINTK
+#define SFX	/* nop */
+#else
 #define SFX	"hda-intel: "
-
+#endif
 
 /*
  * registers
@@ -620,7 +623,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 	}
 
 	if (chip->msi) {
-		snd_printk(KERN_WARNING "hda_intel: No response from codec, "
+		snd_printk(KERN_WARNING SFX "No response from codec, "
 			   "disabling MSI: last cmd=0x%08x\n", chip->last_cmd);
 		free_irq(chip->irq, chip);
 		chip->irq = -1;
@@ -634,7 +637,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 	}
 
 	if (!chip->polling_mode) {
-		snd_printk(KERN_WARNING "hda_intel: azx_get_response timeout, "
+		snd_printk(KERN_WARNING SFX "azx_get_response timeout, "
 			   "switching to polling mode: last cmd=0x%08x\n",
 			   chip->last_cmd);
 		chip->polling_mode = 1;
@@ -649,7 +652,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 		return -1;
 	}
 
-	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout (ERROR): "
+	snd_printk(KERN_ERR SFX "azx_get_response timeout (ERROR): "
 		   "last cmd=0x%08x\n", chip->last_cmd);
 	spin_lock_irq(&chip->reg_lock);
 	chip->rirb.cmds = 0; /* reset the index */
@@ -776,7 +779,7 @@ static int azx_reset(struct azx *chip)
 
 	/* check to see if controller is ready */
 	if (!azx_readb(chip, GCTL)) {
-		snd_printd("azx_reset: controller not ready!\n");
+		snd_printd(SFX "azx_reset: controller not ready!\n");
 		return -EBUSY;
 	}
 
@@ -786,7 +789,7 @@ static int azx_reset(struct azx *chip)
 	/* detect codecs */
 	if (!chip->codec_mask) {
 		chip->codec_mask = azx_readw(chip, STATESTS);
-		snd_printdd("codec_mask = 0x%x\n", chip->codec_mask);
+		snd_printdd(SFX "codec_mask = 0x%x\n", chip->codec_mask);
 	}
 
 	return 0;
@@ -954,12 +957,12 @@ static void azx_init_pci(struct azx *chip)
 	case AZX_DRIVER_SCH:
 		pci_read_config_word(chip->pci, INTEL_SCH_HDA_DEVC, &snoop);
 		if (snoop & INTEL_SCH_HDA_DEVC_NOSNOOP) {
-			pci_write_config_word(chip->pci, INTEL_SCH_HDA_DEVC, \
+			pci_write_config_word(chip->pci, INTEL_SCH_HDA_DEVC,
 				snoop & (~INTEL_SCH_HDA_DEVC_NOSNOOP));
 			pci_read_config_word(chip->pci,
 				INTEL_SCH_HDA_DEVC, &snoop);
-			snd_printdd("HDA snoop disabled, enabling ... %s\n",\
-				(snoop & INTEL_SCH_HDA_DEVC_NOSNOOP) \
+			snd_printdd(SFX "HDA snoop disabled, enabling ... %s\n",
+				(snoop & INTEL_SCH_HDA_DEVC_NOSNOOP)
 				? "Failed" : "OK");
 		}
 		break;
@@ -1099,7 +1102,7 @@ static int azx_setup_periods(struct azx *chip,
 				pos_align;
 		pos_adj = frames_to_bytes(runtime, pos_adj);
 		if (pos_adj >= period_bytes) {
-			snd_printk(KERN_WARNING "Too big adjustment %d\n",
+			snd_printk(KERN_WARNING SFX "Too big adjustment %d\n",
 				   bdl_pos_adj[chip->dev_index]);
 			pos_adj = 0;
 		} else {
@@ -1123,7 +1126,7 @@ static int azx_setup_periods(struct azx *chip,
 	return 0;
 
  error:
-	snd_printk(KERN_ERR "Too many BDL entries: buffer=%d, period=%d\n",
+	snd_printk(KERN_ERR SFX "Too many BDL entries: buffer=%d, period=%d\n",
 		   azx_dev->bufsize, period_bytes);
 	return -EINVAL;
 }
@@ -1216,7 +1219,7 @@ static int probe_codec(struct azx *chip, int addr)
 	chip->probing = 0;
 	if (res == -1)
 		return -EIO;
-	snd_printdd("hda_intel: codec #%d probed OK\n", addr);
+	snd_printdd(SFX "codec #%d probed OK\n", addr);
 	return 0;
 }
 
@@ -1271,8 +1274,8 @@ static int __devinit azx_codec_create(struct azx *chip, const char *model,
 				/* Some BIOSen give you wrong codec addresses
 				 * that don't exist
 				 */
-				snd_printk(KERN_WARNING
-					   "hda_intel: Codec #%d probe error; "
+				snd_printk(KERN_WARNING SFX
+					   "Codec #%d probe error; "
 					   "disabling it...\n", c);
 				chip->codec_mask &= ~(1 << c);
 				/* More badly, accessing to a non-existing
@@ -1488,7 +1491,7 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 	bufsize = snd_pcm_lib_buffer_bytes(substream);
 	period_bytes = snd_pcm_lib_period_bytes(substream);
 
-	snd_printdd("azx_pcm_prepare: bufsize=0x%x, format=0x%x\n",
+	snd_printdd(SFX "azx_pcm_prepare: bufsize=0x%x, format=0x%x\n",
 		    bufsize, format_val);
 
 	if (bufsize != azx_dev->bufsize ||
@@ -2265,7 +2268,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	synchronize_irq(chip->irq);
 
 	gcap = azx_readw(chip, GCAP);
-	snd_printdd("chipset global capabilities = 0x%x\n", gcap);
+	snd_printdd(SFX "chipset global capabilities = 0x%x\n", gcap);
 
 	/* ATI chips seems buggy about 64bit DMA addresses */
 	if (chip->driver_type == AZX_DRIVER_ATI)
@@ -2309,7 +2312,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 	chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev),
 				GFP_KERNEL);
 	if (!chip->azx_dev) {
-		snd_printk(KERN_ERR "cannot malloc azx_dev\n");
+		snd_printk(KERN_ERR SFX "cannot malloc azx_dev\n");
 		goto errout;
 	}
 
-- 
cgit v0.10.2


From fa7979663190240b838ab8c8bad7f59e618bf77c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 19 May 2009 12:50:04 +0200
Subject: ALSA: hda - Fix digital beep tone calculation

The digital beep tone is calculated in two different ways depending
on the codec chip.  The standard one is using a divider, and another
one is a linear tone for IDT/STAC codecs.  Currently, only the
latter type is used for all codecs, which resulted in a wrong tone
pitch.

This patch adds the calculation of the standard HD-audio type.
Also clean-up the fields in hda_beep struct.

Reference: bko#13162
	http://bugzilla.kernel.org/show_bug.cgi?id=13162

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_beep.c b/sound/pci/hda/hda_beep.c
index 4de5bac..29272f2 100644
--- a/sound/pci/hda/hda_beep.c
+++ b/sound/pci/hda/hda_beep.c
@@ -45,6 +45,46 @@ static void snd_hda_generate_beep(struct work_struct *work)
 			AC_VERB_SET_BEEP_CONTROL, beep->tone);
 }
 
+/* (non-standard) Linear beep tone calculation for IDT/STAC codecs 
+ *
+ * The tone frequency of beep generator on IDT/STAC codecs is
+ * defined from the 8bit tone parameter, in Hz,
+ *    freq = 48000 * (257 - tone) / 1024
+ * that is from 12kHz to 93.75kHz in step of 46.875 hz
+ */
+static int beep_linear_tone(struct hda_beep *beep, int hz)
+{
+	hz *= 1000; /* fixed point */
+	hz = hz - DIGBEEP_HZ_MIN;
+	if (hz < 0)
+		hz = 0; /* turn off PC beep*/
+	else if (hz >= (DIGBEEP_HZ_MAX - DIGBEEP_HZ_MIN))
+		hz = 0xff;
+	else {
+		hz /= DIGBEEP_HZ_STEP;
+		hz++;
+	}
+	return hz;
+}
+
+/* HD-audio standard beep tone parameter calculation
+ *
+ * The tone frequency in Hz is calculated as
+ *   freq = 48000 / (tone * 4)
+ * from 47Hz to 12kHz
+ */
+static int beep_standard_tone(struct hda_beep *beep, int hz)
+{
+	if (hz <= 0)
+		return 0; /* disabled */
+	hz = 12000 / hz;
+	if (hz > 0xff)
+		return 0xff;
+	if (hz <= 0)
+		return 1;
+	return hz;
+}
+
 static int snd_hda_beep_event(struct input_dev *dev, unsigned int type,
 				unsigned int code, int hz)
 {
@@ -55,21 +95,14 @@ static int snd_hda_beep_event(struct input_dev *dev, unsigned int type,
 		if (hz)
 			hz = 1000;
 	case SND_TONE:
-		hz *= 1000; /* fixed point */
-		hz = hz - DIGBEEP_HZ_MIN;
-		if (hz < 0)
-			hz = 0; /* turn off PC beep*/
-		else if (hz >= (DIGBEEP_HZ_MAX - DIGBEEP_HZ_MIN))
-			hz = 0xff;
-		else {
-			hz /= DIGBEEP_HZ_STEP;
-			hz++;
-		}
+		if (beep->linear_tone)
+			beep->tone = beep_linear_tone(beep, hz);
+		else
+			beep->tone = beep_standard_tone(beep, hz);
 		break;
 	default:
 		return -1;
 	}
-	beep->tone = hz;
 
 	/* schedule beep event */
 	schedule_work(&beep->beep_work);
diff --git a/sound/pci/hda/hda_beep.h b/sound/pci/hda/hda_beep.h
index 51bf6a5..0c3de78 100644
--- a/sound/pci/hda/hda_beep.h
+++ b/sound/pci/hda/hda_beep.h
@@ -30,8 +30,9 @@ struct hda_beep {
 	struct hda_codec *codec;
 	char phys[32];
 	int tone;
-	int nid;
-	int enabled;
+	hda_nid_t nid;
+	unsigned int enabled:1;
+	unsigned int linear_tone:1;	/* linear tone for IDT/STAC codec */
 	struct work_struct beep_work; /* scheduled task for beep event */
 };
 
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 0295098..1731081 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -3737,6 +3737,8 @@ static int stac92xx_parse_auto_config(struct hda_codec *codec, hda_nid_t dig_out
 		err = snd_hda_attach_beep_device(codec, nid);
 		if (err < 0)
 			return err;
+		/* IDT/STAC codecs have linear beep tone parameter */
+		codec->beep->linear_tone = 1;
 		/* if no beep switch is available, make its own one */
 		caps = query_amp_caps(codec, nid, HDA_OUTPUT);
 		if (codec->beep &&
-- 
cgit v0.10.2


From 4aee2ad461889132bfb5a1518a9580d00e17008c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Tue, 19 May 2009 17:07:01 +0530
Subject: x86: asm/processor.h: remove double declaration

Remove double declaration of:

 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;

they are already defined in the same file.

[ Impact: cleanup ]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
LKML-Reference: <1242733021.3377.1.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 34c5237..85628ea 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,9 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary);
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
 
 struct thread_struct {
 	/* Cached TLS descriptors: */
-- 
cgit v0.10.2


From 4c6f18fc81565967da20f2d4a3922cdba33f8e2b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 18 May 2009 10:23:28 -0700
Subject: x86, io-apic: Don't mark pin_programmed early

Peter bisected that:

| commit b9c61b70075c87a8612624736faf4a2de5b1ed30
| Date:   Wed May 6 10:10:06 2009 -0700
|
|     x86/pci: update pirq_enable_irq() to setup io apic routing
|
|     So we can set io apic routing only when enabling the device irq.

wrecked his opteron box, ata1 interrupts fail to get through.

ata1 is using irq 11:

[    1.451839] sata_svw 0000:01:0e.0: version 2.3
[    1.456333] sata_svw 0000:01:0e.0: PCI INT A -> GSI 11 (level, low) -> IRQ 11
[    1.463639] scsi0 : sata_svw
[    1.466949] scsi1 : sata_svw
[    1.470022] scsi2 : sata_svw
[    1.473090] scsi3 : sata_svw
[    1.476112] ata1: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe000 irq 11
[    1.483490] ata2: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe100 irq 11
[    1.490870] ata3: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe200 irq 11
[    1.498247] ata4: SATA max UDMA/133 mmio m8192@0xff3fe000 port 0xff3fe300 irq 11

that pin is overlapped with pin with legacy ones.

We should not set bits in pin_programmed here, so that those bit could
be set later via io_apic_set_pci_routing().

[ Impact: fix boot hang on certain systems ]

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>
Tested-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jack Steiner <steiner@sgi.com>
LKML-Reference: <4A119990.9020606@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ce1ac74..ac7f3b6 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1537,7 +1537,10 @@ static void __init setup_IO_APIC_irqs(void)
 		}
 		cfg = desc->chip_data;
 		add_pin_to_irq_node(cfg, node, apic_id, pin);
-		set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
 		setup_IO_APIC_irq(apic_id, pin, irq, desc,
 				irq_trigger(idx), irq_polarity(idx));
 	}
-- 
cgit v0.10.2


From ef9e8b14a5c1d0afbaf12b4c3b271188ddfc52a4 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 19 May 2009 14:25:16 +0100
Subject: GFS2: Don't warn when delete inode fails on ro filesystem

If the filesystem is read-only, then we expect that delete inode
will fail, so there is no need to warn about it.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index a3c2272..2fd1dcb 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -714,7 +714,7 @@ out_unlock:
 		gfs2_glock_dq(&ip->i_iopen_gh);
 	gfs2_holder_uninit(&ip->i_iopen_gh);
 	gfs2_glock_dq_uninit(&gh);
-	if (error && error != GLR_TRYFAILED)
+	if (error && error != GLR_TRYFAILED && error != -EROFS)
 		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
 out:
 	truncate_inode_pages(&inode->i_data, 0);
-- 
cgit v0.10.2


From c5642f4bbae30122beb696e723f6da273caa570e Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 19 May 2009 09:02:23 -0400
Subject: selinux: remove obsolete read buffer limit from sel_read_bool

On Tue, 2009-05-19 at 00:05 -0400, Eamon Walsh wrote:
> Recent versions of coreutils have bumped the read buffer size from 4K to
> 32K in several of the utilities.
>
> This means that "cat /selinux/booleans/xserver_object_manager" no longer
> works, it returns "Invalid argument" on F11.  getsebool works fine.
>
> sel_read_bool has a check for "count > PAGE_SIZE" that doesn't seem to
> be present in the other read functions.  Maybe it could be removed?

Yes, that check is obsoleted by the conversion of those functions to
using simple_read_from_buffer(), which will reduce count if necessary to
what is available in the buffer.

Signed-off-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 8d4007f..b4fc506 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -803,10 +803,6 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf,
 		goto out;
 	}
 
-	if (count > PAGE_SIZE) {
-		ret = -EINVAL;
-		goto out;
-	}
 	page = (char *)get_zeroed_page(GFP_KERNEL);
 	if (!page) {
 		ret = -ENOMEM;
-- 
cgit v0.10.2


From 8b6427a2a8f7dd43e9208fb33a3b116d66db4979 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 19 May 2009 09:57:03 -0400
Subject: cifs: fix pointer initialization and checks in cifs_follow_symlink
 (try #4)

This is the third respin of the patch posted yesterday to fix the error
handling in cifs_follow_symlink. It also includes a fix for a bogus NULL
pointer check in CIFSSMBQueryUnixSymLink that Jeff Moyer spotted.

It's possible for CIFSSMBQueryUnixSymLink to return without setting
target_path to a valid pointer. If that happens then the current value
to which we're initializing this pointer could cause an oops when it's
kfree'd.

This patch is a little more comprehensive than the last patches. It
reorganizes cifs_follow_link a bit for (hopefully) better readability.
It should also eliminate the uneeded allocation of full_path on servers
without unix extensions (assuming they can get to this point anyway, of
which I'm not convinced).

On a side note, I'm not sure I agree with the logic of enabling this
query even when unix extensions are disabled on the client. It seems
like that should disable this as well. But, changing that is outside the
scope of this fix, so I've left it alone for now.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Christoph Hellwig <hch@inraded.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5759ba5..d062602 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2475,7 +2475,7 @@ querySymLinkRetry:
 			/* BB FIXME investigate remapping reserved chars here */
 			*symlinkinfo = cifs_strndup_from_ucs(data_start, count,
 						    is_unicode, nls_codepage);
-			if (!symlinkinfo)
+			if (!*symlinkinfo)
 				rc = -ENOMEM;
 		}
 	}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index ea9d11e..cd83c53 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -107,48 +107,48 @@ void *
 cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 {
 	struct inode *inode = direntry->d_inode;
-	int rc = -EACCES;
+	int rc = -ENOMEM;
 	int xid;
 	char *full_path = NULL;
-	char *target_path = ERR_PTR(-ENOMEM);
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
+	char *target_path = NULL;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsTconInfo *tcon = cifs_sb->tcon;
 
 	xid = GetXid();
 
-	full_path = build_path_from_dentry(direntry);
+	/*
+	 * For now, we just handle symlinks with unix extensions enabled.
+	 * Eventually we should handle NTFS reparse points, and MacOS
+	 * symlink support. For instance...
+	 *
+	 * rc = CIFSSMBQueryReparseLinkInfo(...)
+	 *
+	 * For now, just return -EACCES when the server doesn't support posix
+	 * extensions. Note that we still allow querying symlinks when posix
+	 * extensions are manually disabled. We could disable these as well
+	 * but there doesn't seem to be any harm in allowing the client to
+	 * read them.
+	 */
+	if (!(tcon->ses->capabilities & CAP_UNIX)) {
+		rc = -EACCES;
+		goto out;
+	}
 
+	full_path = build_path_from_dentry(direntry);
 	if (!full_path)
 		goto out;
 
 	cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode));
-	cifs_sb = CIFS_SB(inode->i_sb);
-	pTcon = cifs_sb->tcon;
-
-	/* We could change this to:
-		if (pTcon->unix_ext)
-	   but there does not seem any point in refusing to
-	   get symlink info if we can, even if unix extensions
-	   turned off for this mount */
-
-	if (pTcon->ses->capabilities & CAP_UNIX)
-		rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path,
-					     &target_path,
-					     cifs_sb->local_nls);
-	else {
-		/* BB add read reparse point symlink code here */
-		/* rc = CIFSSMBQueryReparseLinkInfo */
-		/* BB Add code to Query ReparsePoint info */
-		/* BB Add MAC style xsymlink check here if enabled */
-	}
 
+	rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path,
+				     cifs_sb->local_nls);
+	kfree(full_path);
+out:
 	if (rc != 0) {
 		kfree(target_path);
 		target_path = ERR_PTR(rc);
 	}
 
-	kfree(full_path);
-out:
 	FreeXid(xid);
 	nd_set_link(nd, target_path);
 	return NULL;
-- 
cgit v0.10.2


From 8e7d2b2c6ecd3c21a54b877eae3d5be48292e6b5 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 8 May 2009 16:13:25 -0700
Subject: drm/i915: allocate large pointer arrays with vmalloc

For awhile now, many of the GEM code paths have allocated page or
object arrays with the slab allocator.  This is nice and fast, but
won't work well if memory is fragmented, since the slab allocator works
with physically contiguous memory (i.e. order > 2 allocations are
likely to fail fairly early after booting and doing some work).

This patch works around the issue by falling back to vmalloc for
>PAGE_SIZE allocations.  This is ugly, but much less work than chaining
a bunch of pages together by hand (suprisingly there's not a bunch of
generic kernel helpers for this yet afaik).  vmalloc space is somewhat
precious on 32 bit kernels, but our allocations shouldn't be big enough
to cause problems, though they're routinely more than a page.

Note that this patch doesn't address the unchecked
alloc-based-on-ioctl-args in GEM; that needs to be fixed in a separate
patch.

Also, I've deliberately ignored the DRM's "area" junk.  I don't think
anyone actually uses it anymore and I'm hoping it gets ripped out soon.

[Updated: removed size arg to new free function.  We could unify the
free functions as well once the DRM mem tracking is ripped out.]

fd.o bug #20152 (part 1/3)

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b189b49..4a24c90 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -349,7 +349,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
 	if (user_pages == NULL)
 		return -ENOMEM;
 
@@ -429,7 +429,7 @@ fail_put_user_pages:
 		SetPageDirty(user_pages[i]);
 		page_cache_release(user_pages[i]);
 	}
-	kfree(user_pages);
+	drm_free_large(user_pages);
 
 	return ret;
 }
@@ -649,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
 	if (user_pages == NULL)
 		return -ENOMEM;
 
@@ -719,7 +719,7 @@ out_unlock:
 out_unpin_pages:
 	for (i = 0; i < pinned_pages; i++)
 		page_cache_release(user_pages[i]);
-	kfree(user_pages);
+	drm_free_large(user_pages);
 
 	return ret;
 }
@@ -824,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
 	if (user_pages == NULL)
 		return -ENOMEM;
 
@@ -902,7 +902,7 @@ fail_unlock:
 fail_put_user_pages:
 	for (i = 0; i < pinned_pages; i++)
 		page_cache_release(user_pages[i]);
-	kfree(user_pages);
+	drm_free_large(user_pages);
 
 	return ret;
 }
@@ -1408,9 +1408,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
 		}
 	obj_priv->dirty = 0;
 
-	drm_free(obj_priv->pages,
-		 page_count * sizeof(struct page *),
-		 DRM_MEM_DRIVER);
+	drm_free_large(obj_priv->pages);
 	obj_priv->pages = NULL;
 }
 
@@ -2024,8 +2022,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
 	 */
 	page_count = obj->size / PAGE_SIZE;
 	BUG_ON(obj_priv->pages != NULL);
-	obj_priv->pages = drm_calloc(page_count, sizeof(struct page *),
-				     DRM_MEM_DRIVER);
+	obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
 	if (obj_priv->pages == NULL) {
 		DRM_ERROR("Faled to allocate page list\n");
 		obj_priv->pages_refcount--;
@@ -3111,7 +3108,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 		reloc_count += exec_list[i].relocation_count;
 	}
 
-	*relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER);
+	*relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
 	if (*relocs == NULL)
 		return -ENOMEM;
 
@@ -3125,8 +3122,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 				     exec_list[i].relocation_count *
 				     sizeof(**relocs));
 		if (ret != 0) {
-			drm_free(*relocs, reloc_count * sizeof(**relocs),
-				 DRM_MEM_DRIVER);
+			drm_free_large(*relocs);
 			*relocs = NULL;
 			return -EFAULT;
 		}
@@ -3165,7 +3161,7 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
 	}
 
 err:
-	drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER);
+	drm_free_large(relocs);
 
 	return ret;
 }
@@ -3198,10 +3194,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 	/* Copy in the exec list from userland */
-	exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
-			       DRM_MEM_DRIVER);
-	object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
-				 DRM_MEM_DRIVER);
+	exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
+	object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
 	if (exec_list == NULL || object_list == NULL) {
 		DRM_ERROR("Failed to allocate exec or object list "
 			  "for %d buffers\n",
@@ -3462,10 +3456,8 @@ err:
 	}
 
 pre_mutex_err:
-	drm_free(object_list, sizeof(*object_list) * args->buffer_count,
-		 DRM_MEM_DRIVER);
-	drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
-		 DRM_MEM_DRIVER);
+	drm_free_large(object_list);
+	drm_free_large(exec_list);
 	drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,
 		 DRM_MEM_DRIVER);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index c8c4221..b84d8ae 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1519,6 +1519,30 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area)
 {
 	return kcalloc(nmemb, size, GFP_KERNEL);
 }
+
+static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
+{
+	u8 *addr;
+
+	if (size <= PAGE_SIZE)
+	    return kcalloc(nmemb, size, GFP_KERNEL);
+
+	addr = vmalloc(nmemb * size);
+	if (!addr)
+		return NULL;
+
+	memset(addr, 0, nmemb * size);
+
+	return addr;
+}
+
+static __inline void drm_free_large(void *ptr)
+{
+	if (!is_vmalloc_addr(ptr))
+		return kfree(ptr);
+
+	vfree(ptr);
+}
 #else
 extern void *drm_alloc(size_t size, int area);
 extern void drm_free(void *pt, size_t size, int area);
-- 
cgit v0.10.2


From 53674ac5a997a8eedbb2dfdc308b895170746c8b Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 19 May 2009 19:52:35 +0200
Subject: block: add warning to blk_make_request()

Add a note about how one needs to be careful when setting up these bio
chains.

Extracted from Boaz's updated patch.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index bec1d69..4906507 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -910,6 +910,15 @@ EXPORT_SYMBOL(blk_get_request);
  * need bouncing, by calling the appropriate masked or flagged allocator,
  * suitable for the target device. Otherwise the call to blk_queue_bounce will
  * BUG.
+ *
+ * WARNING: When allocating/cloning a bio-chain, careful consideration should be
+ * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
+ * anything but the first bio in the chain. Otherwise you risk waiting for IO
+ * completion of a bio that hasn't been submitted yet, thus resulting in a
+ * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
+ * of bio_alloc(), as that avoids the mempool deadlock.
+ * If possible a big IO should be split into smaller parts when allocation
+ * fails. Partial allocation should not be an error, or you risk a live-lock.
  */
 struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 				 gfp_t gfp_mask)
-- 
cgit v0.10.2


From ac36552a52a6ec8563ac0a109e2a0935673f4abb Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 19 May 2009 19:54:09 +0200
Subject: scsi_lib: remove unused variable

The last request completion cleanup in scsi_lib left an unused
this_count variable in scsi_io_completion().
(It was used before in a code segment that now uses blk_end_request_all())

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e410d66..d7c6c75 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -706,7 +706,6 @@ EXPORT_SYMBOL(scsi_release_buffers);
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
 	int result = cmd->result;
-	int this_count;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
 	int error = 0;
@@ -789,7 +788,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 */
 	if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
 		return;
-	this_count = blk_rq_bytes(req);
 
 	error = -EIO;
 
-- 
cgit v0.10.2


From 26a9a418237c0b06528941bca693c49c8d97edbe Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 19 May 2009 11:25:35 -0700
Subject: Avoid ICE in get_random_int() with gcc-3.4.5

Martin Knoblauch reports that trying to build 2.6.30-rc6-git3 with
RHEL4.3 userspace (gcc (GCC) 3.4.5 20051201 (Red Hat 3.4.5-2)) causes an
internal compiler error (ICE):

    drivers/char/random.c: In function `get_random_int':
    drivers/char/random.c:1672: error: unrecognizable insn:
    (insn 202 148 150 0 /scratch/build/linux-2.6.30-rc6-git3/arch/x86/include/asm/tsc.h:23 (set (reg:SI 0 ax [91])
            (subreg:SI (plus:DI (plus:DI (reg:DI 0 ax [88])
                        (subreg:DI (reg:SI 6 bp) 0))
                    (const_int -4 [0xfffffffffffffffc])) 0)) -1 (nil)
        (nil))
    drivers/char/random.c:1672: internal compiler error: in extract_insn, at recog.c:2083

and after some debugging it turns out that it's due to the code trying
to figure out the rough value of the current stack pointer by taking an
address of an uninitialized variable and casting that to an integer.

This is clearly a compiler bug, but it's not worth fighting - while the
current stack kernel pointer might be somewhat hard to predict in user
space, it's also not generally going to change for a lot of the call
chains for a particular process.

So just drop it, and mumble some incoherent curses at the compiler.

Tested-by: Martin Knoblauch <spamtrap@knobisoft.de>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b2ced39..8c74448 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1673,7 +1673,7 @@ unsigned int get_random_int(void)
 	int ret;
 
 	keyptr = get_keyptr();
-	hash[0] += current->pid + jiffies + get_cycles() + (int)(long)&ret;
+	hash[0] += current->pid + jiffies + get_cycles();
 
 	ret = half_md4_transform(hash, keyptr->secret);
 	put_cpu_var(get_random_int_hash);
-- 
cgit v0.10.2


From e24805dd85283ac0912b9c400768a4d171b400ff Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Tue, 19 May 2009 22:12:15 +0900
Subject: ASoC: Add TXx9 AC link controller driver (v3)

This patch adds support for the integrated ACLC of the TXx9 family.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig
index 3304f9d..d3e786a 100644
--- a/sound/soc/Kconfig
+++ b/sound/soc/Kconfig
@@ -34,6 +34,7 @@ source "sound/soc/pxa/Kconfig"
 source "sound/soc/s3c24xx/Kconfig"
 source "sound/soc/s6000/Kconfig"
 source "sound/soc/sh/Kconfig"
+source "sound/soc/txx9/Kconfig"
 
 # Supported codecs
 source "sound/soc/codecs/Kconfig"
diff --git a/sound/soc/Makefile b/sound/soc/Makefile
index 8943a14..6f1e28d 100644
--- a/sound/soc/Makefile
+++ b/sound/soc/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_SND_SOC)	+= pxa/
 obj-$(CONFIG_SND_SOC)	+= s3c24xx/
 obj-$(CONFIG_SND_SOC)	+= s6000/
 obj-$(CONFIG_SND_SOC)	+= sh/
+obj-$(CONFIG_SND_SOC)	+= txx9/
diff --git a/sound/soc/txx9/Kconfig b/sound/soc/txx9/Kconfig
new file mode 100644
index 0000000..ebc9327
--- /dev/null
+++ b/sound/soc/txx9/Kconfig
@@ -0,0 +1,29 @@
+##
+## TXx9 ACLC
+##
+config SND_SOC_TXX9ACLC
+	tristate "SoC Audio for TXx9"
+	depends on HAS_TXX9_ACLC && TXX9_DMAC
+	help
+	  This option enables support for the AC Link Controllers in TXx9 SoC.
+
+config HAS_TXX9_ACLC
+	bool
+
+config SND_SOC_TXX9ACLC_AC97
+	tristate
+	select AC97_BUS
+	select SND_AC97_CODEC
+	select SND_SOC_AC97_BUS
+
+
+##
+## Boards
+##
+config SND_SOC_TXX9ACLC_GENERIC
+	tristate "Generic TXx9 ACLC sound machine"
+	depends on SND_SOC_TXX9ACLC
+	select SND_SOC_TXX9ACLC_AC97
+	select SND_SOC_AC97_CODEC
+	help
+	  This is a generic AC97 sound machine for use in TXx9 based systems.
diff --git a/sound/soc/txx9/Makefile b/sound/soc/txx9/Makefile
new file mode 100644
index 0000000..551f16c
--- /dev/null
+++ b/sound/soc/txx9/Makefile
@@ -0,0 +1,11 @@
+# Platform
+snd-soc-txx9aclc-objs := txx9aclc.o
+snd-soc-txx9aclc-ac97-objs := txx9aclc-ac97.o
+
+obj-$(CONFIG_SND_SOC_TXX9ACLC) += snd-soc-txx9aclc.o
+obj-$(CONFIG_SND_SOC_TXX9ACLC_AC97) += snd-soc-txx9aclc-ac97.o
+
+# Machine
+snd-soc-txx9aclc-generic-objs := txx9aclc-generic.o
+
+obj-$(CONFIG_SND_SOC_TXX9ACLC_GENERIC) += snd-soc-txx9aclc-generic.o
diff --git a/sound/soc/txx9/txx9aclc-ac97.c b/sound/soc/txx9/txx9aclc-ac97.c
new file mode 100644
index 0000000..0f83bdb
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc-ac97.c
@@ -0,0 +1,255 @@
+/*
+ * TXx9 ACLC AC97 driver
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * Based on RBTX49xx patch from CELF patch archive.
+ * (C) Copyright TOSHIBA CORPORATION 2004-2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include "txx9aclc.h"
+
+#define AC97_DIR	\
+	(SND_SOC_DAIDIR_PLAYBACK | SND_SOC_DAIDIR_CAPTURE)
+
+#define AC97_RATES	\
+	SNDRV_PCM_RATE_8000_48000
+
+#ifdef __BIG_ENDIAN
+#define AC97_FMTS	SNDRV_PCM_FMTBIT_S16_BE
+#else
+#define AC97_FMTS	SNDRV_PCM_FMTBIT_S16_LE
+#endif
+
+static DECLARE_WAIT_QUEUE_HEAD(ac97_waitq);
+
+/* REVISIT: How to find txx9aclc_soc_device from snd_ac97? */
+static struct txx9aclc_soc_device *txx9aclc_soc_dev;
+
+static int txx9aclc_regready(struct txx9aclc_soc_device *dev)
+{
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+
+	return __raw_readl(drvdata->base + ACINTSTS) & ACINT_REGACCRDY;
+}
+
+/* AC97 controller reads codec register */
+static unsigned short txx9aclc_ac97_read(struct snd_ac97 *ac97,
+					 unsigned short reg)
+{
+	struct txx9aclc_soc_device *dev = txx9aclc_soc_dev;
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	u32 dat;
+
+	if (!(__raw_readl(base + ACINTSTS) & ACINT_CODECRDY(ac97->num)))
+		return 0xffff;
+	reg |= ac97->num << 7;
+	dat = (reg << ACREGACC_REG_SHIFT) | ACREGACC_READ;
+	__raw_writel(dat, base + ACREGACC);
+	__raw_writel(ACINT_REGACCRDY, base + ACINTEN);
+	if (!wait_event_timeout(ac97_waitq, txx9aclc_regready(dev), HZ)) {
+		__raw_writel(ACINT_REGACCRDY, base + ACINTDIS);
+		dev_err(dev->soc_dev.dev, "ac97 read timeout (reg %#x)\n", reg);
+		dat = 0xffff;
+		goto done;
+	}
+	dat = __raw_readl(base + ACREGACC);
+	if (((dat >> ACREGACC_REG_SHIFT) & 0xff) != reg) {
+		dev_err(dev->soc_dev.dev, "reg mismatch %x with %x\n",
+			dat, reg);
+		dat = 0xffff;
+		goto done;
+	}
+	dat = (dat >> ACREGACC_DAT_SHIFT) & 0xffff;
+done:
+	__raw_writel(ACINT_REGACCRDY, base + ACINTDIS);
+	return dat;
+}
+
+/* AC97 controller writes to codec register */
+static void txx9aclc_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
+				unsigned short val)
+{
+	struct txx9aclc_soc_device *dev = txx9aclc_soc_dev;
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+
+	__raw_writel(((reg | (ac97->num << 7)) << ACREGACC_REG_SHIFT) |
+		     (val << ACREGACC_DAT_SHIFT),
+		     base + ACREGACC);
+	__raw_writel(ACINT_REGACCRDY, base + ACINTEN);
+	if (!wait_event_timeout(ac97_waitq, txx9aclc_regready(dev), HZ)) {
+		dev_err(dev->soc_dev.dev,
+			"ac97 write timeout (reg %#x)\n", reg);
+	}
+	__raw_writel(ACINT_REGACCRDY, base + ACINTDIS);
+}
+
+static void txx9aclc_ac97_cold_reset(struct snd_ac97 *ac97)
+{
+	struct txx9aclc_soc_device *dev = txx9aclc_soc_dev;
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	u32 ready = ACINT_CODECRDY(ac97->num) | ACINT_REGACCRDY;
+
+	__raw_writel(ACCTL_ENLINK, base + ACCTLDIS);
+	mmiowb();
+	udelay(1);
+	__raw_writel(ACCTL_ENLINK, base + ACCTLEN);
+	/* wait for primary codec ready status */
+	__raw_writel(ready, base + ACINTEN);
+	if (!wait_event_timeout(ac97_waitq,
+				(__raw_readl(base + ACINTSTS) & ready) == ready,
+				HZ)) {
+		dev_err(&ac97->dev, "primary codec is not ready "
+			"(status %#x)\n",
+			__raw_readl(base + ACINTSTS));
+	}
+	__raw_writel(ACINT_REGACCRDY, base + ACINTSTS);
+	__raw_writel(ready, base + ACINTDIS);
+}
+
+/* AC97 controller operations */
+struct snd_ac97_bus_ops soc_ac97_ops = {
+	.read		= txx9aclc_ac97_read,
+	.write		= txx9aclc_ac97_write,
+	.reset		= txx9aclc_ac97_cold_reset,
+};
+EXPORT_SYMBOL_GPL(soc_ac97_ops);
+
+static irqreturn_t txx9aclc_ac97_irq(int irq, void *dev_id)
+{
+	struct txx9aclc_plat_drvdata *drvdata = dev_id;
+	void __iomem *base = drvdata->base;
+
+	__raw_writel(__raw_readl(base + ACINTMSTS), base + ACINTDIS);
+	wake_up(&ac97_waitq);
+	return IRQ_HANDLED;
+}
+
+static int txx9aclc_ac97_probe(struct platform_device *pdev,
+			       struct snd_soc_dai *dai)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct txx9aclc_soc_device *dev =
+		container_of(socdev, struct txx9aclc_soc_device, soc_dev);
+
+	dev->aclc_pdev = to_platform_device(dai->dev);
+	txx9aclc_soc_dev = dev;
+	return 0;
+}
+
+static void txx9aclc_ac97_remove(struct platform_device *pdev,
+				 struct snd_soc_dai *dai)
+{
+	struct platform_device *aclc_pdev = to_platform_device(dai->dev);
+	struct txx9aclc_plat_drvdata *drvdata = platform_get_drvdata(aclc_pdev);
+
+	/* disable AC-link */
+	__raw_writel(ACCTL_ENLINK, drvdata->base + ACCTLDIS);
+	txx9aclc_soc_dev = NULL;
+}
+
+struct snd_soc_dai txx9aclc_ac97_dai = {
+	.name			= "txx9aclc_ac97",
+	.ac97_control		= 1,
+	.probe			= txx9aclc_ac97_probe,
+	.remove			= txx9aclc_ac97_remove,
+	.playback = {
+		.rates		= AC97_RATES,
+		.formats	= AC97_FMTS,
+		.channels_min	= 2,
+		.channels_max	= 2,
+	},
+	.capture = {
+		.rates		= AC97_RATES,
+		.formats	= AC97_FMTS,
+		.channels_min	= 2,
+		.channels_max	= 2,
+	},
+};
+EXPORT_SYMBOL_GPL(txx9aclc_ac97_dai);
+
+static int __devinit txx9aclc_ac97_dev_probe(struct platform_device *pdev)
+{
+	struct txx9aclc_plat_drvdata *drvdata;
+	struct resource *r;
+	int err;
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r)
+		return -EBUSY;
+
+	if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r),
+				     dev_name(&pdev->dev)))
+		return -EBUSY;
+
+	drvdata = devm_kzalloc(&pdev->dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, drvdata);
+	drvdata->physbase = r->start;
+	if (sizeof(drvdata->physbase) > sizeof(r->start) &&
+	    r->start >= TXX9_DIRECTMAP_BASE &&
+	    r->start < TXX9_DIRECTMAP_BASE + 0x400000)
+		drvdata->physbase |= 0xf00000000ull;
+	drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+	if (!drvdata->base)
+		return -EBUSY;
+	err = devm_request_irq(&pdev->dev, irq, txx9aclc_ac97_irq,
+			       IRQF_DISABLED, dev_name(&pdev->dev), drvdata);
+	if (err < 0)
+		return err;
+
+	txx9aclc_ac97_dai.dev = &pdev->dev;
+	return snd_soc_register_dai(&txx9aclc_ac97_dai);
+}
+
+static int __devexit txx9aclc_ac97_dev_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_dai(&txx9aclc_ac97_dai);
+	return 0;
+}
+
+static struct platform_driver txx9aclc_ac97_driver = {
+	.probe		= txx9aclc_ac97_dev_probe,
+	.remove		= __devexit_p(txx9aclc_ac97_dev_remove),
+	.driver		= {
+		.name	= "txx9aclc-ac97",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init txx9aclc_ac97_init(void)
+{
+	return platform_driver_register(&txx9aclc_ac97_driver);
+}
+
+static void __exit txx9aclc_ac97_exit(void)
+{
+	platform_driver_unregister(&txx9aclc_ac97_driver);
+}
+
+module_init(txx9aclc_ac97_init);
+module_exit(txx9aclc_ac97_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("TXx9 ACLC AC97 driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/txx9/txx9aclc-generic.c b/sound/soc/txx9/txx9aclc-generic.c
new file mode 100644
index 0000000..3175de9
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc-generic.c
@@ -0,0 +1,98 @@
+/*
+ * Generic TXx9 ACLC machine driver
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * Based on RBTX49xx patch from CELF patch archive.
+ * (C) Copyright TOSHIBA CORPORATION 2004-2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is a very generic AC97 sound machine driver for boards which
+ * have (AC97) audio at ACLC (e.g. RBTX49XX boards).
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include "../codecs/ac97.h"
+#include "txx9aclc.h"
+
+static struct snd_soc_dai_link txx9aclc_generic_dai = {
+	.name = "AC97",
+	.stream_name = "AC97 HiFi",
+	.cpu_dai = &txx9aclc_ac97_dai,
+	.codec_dai = &ac97_dai,
+};
+
+static struct snd_soc_card txx9aclc_generic_card = {
+	.name		= "Generic TXx9 ACLC Audio",
+	.platform	= &txx9aclc_soc_platform,
+	.dai_link	= &txx9aclc_generic_dai,
+	.num_links	= 1,
+};
+
+static struct txx9aclc_soc_device txx9aclc_generic_soc_device = {
+	.soc_dev = {
+		.card		= &txx9aclc_generic_card,
+		.codec_dev	= &soc_codec_dev_ac97,
+	},
+};
+
+static int __init txx9aclc_generic_probe(struct platform_device *pdev)
+{
+	struct txx9aclc_soc_device *dev = &txx9aclc_generic_soc_device;
+	struct platform_device *soc_pdev;
+	int ret;
+
+	soc_pdev = platform_device_alloc("soc-audio", -1);
+	if (!soc_pdev)
+		return -ENOMEM;
+	platform_set_drvdata(soc_pdev, &dev->soc_dev);
+	dev->soc_dev.dev = &soc_pdev->dev;
+	ret = platform_device_add(soc_pdev);
+	if (ret) {
+		platform_device_put(soc_pdev);
+		return ret;
+	}
+	platform_set_drvdata(pdev, soc_pdev);
+	return 0;
+}
+
+static int __exit txx9aclc_generic_remove(struct platform_device *pdev)
+{
+	struct platform_device *soc_pdev = platform_get_drvdata(pdev);
+
+	platform_device_unregister(soc_pdev);
+	return 0;
+}
+
+static struct platform_driver txx9aclc_generic_driver = {
+	.remove = txx9aclc_generic_remove,
+	.driver = {
+		.name = "txx9aclc-generic",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init txx9aclc_generic_init(void)
+{
+	return platform_driver_probe(&txx9aclc_generic_driver,
+				     txx9aclc_generic_probe);
+}
+
+static void __exit txx9aclc_generic_exit(void)
+{
+	platform_driver_unregister(&txx9aclc_generic_driver);
+}
+
+module_init(txx9aclc_generic_init);
+module_exit(txx9aclc_generic_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("Generic TXx9 ACLC ALSA SoC audio driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c
new file mode 100644
index 0000000..fa33661
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc.c
@@ -0,0 +1,430 @@
+/*
+ * Generic TXx9 ACLC platform driver
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * Based on RBTX49xx patch from CELF patch archive.
+ * (C) Copyright TOSHIBA CORPORATION 2004-2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include "txx9aclc.h"
+
+static const struct snd_pcm_hardware txx9aclc_pcm_hardware = {
+	/*
+	 * REVISIT: SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID
+	 * needs more works for noncoherent MIPS.
+	 */
+	.info		  = SNDRV_PCM_INFO_INTERLEAVED |
+			    SNDRV_PCM_INFO_BATCH |
+			    SNDRV_PCM_INFO_PAUSE,
+#ifdef __BIG_ENDIAN
+	.formats	  = SNDRV_PCM_FMTBIT_S16_BE,
+#else
+	.formats	  = SNDRV_PCM_FMTBIT_S16_LE,
+#endif
+	.period_bytes_min = 1024,
+	.period_bytes_max = 8 * 1024,
+	.periods_min	  = 2,
+	.periods_max	  = 4096,
+	.buffer_bytes_max = 32 * 1024,
+};
+
+static int txx9aclc_pcm_hw_params(struct snd_pcm_substream *substream,
+				  struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct txx9aclc_dmadata *dmadata = runtime->private_data;
+	int ret;
+
+	ret = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params));
+	if (ret < 0)
+		return ret;
+
+	dev_dbg(socdev->dev,
+		"runtime->dma_area = %#lx dma_addr = %#lx dma_bytes = %zd "
+		"runtime->min_align %ld\n",
+		(unsigned long)runtime->dma_area,
+		(unsigned long)runtime->dma_addr, runtime->dma_bytes,
+		runtime->min_align);
+	dev_dbg(socdev->dev,
+		"periods %d period_bytes %d stream %d\n",
+		params_periods(params), params_period_bytes(params),
+		substream->stream);
+
+	dmadata->substream = substream;
+	dmadata->pos = 0;
+	return 0;
+}
+
+static int txx9aclc_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_pages(substream);
+}
+
+static int txx9aclc_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct txx9aclc_dmadata *dmadata = runtime->private_data;
+
+	dmadata->dma_addr = runtime->dma_addr;
+	dmadata->buffer_bytes = snd_pcm_lib_buffer_bytes(substream);
+	dmadata->period_bytes = snd_pcm_lib_period_bytes(substream);
+
+	if (dmadata->buffer_bytes == dmadata->period_bytes) {
+		dmadata->frag_bytes = dmadata->period_bytes >> 1;
+		dmadata->frags = 2;
+	} else {
+		dmadata->frag_bytes = dmadata->period_bytes;
+		dmadata->frags = dmadata->buffer_bytes / dmadata->period_bytes;
+	}
+	dmadata->frag_count = 0;
+	dmadata->pos = 0;
+	return 0;
+}
+
+static void txx9aclc_dma_complete(void *arg)
+{
+	struct txx9aclc_dmadata *dmadata = arg;
+	unsigned long flags;
+
+	/* dma completion handler cannot submit new operations */
+	spin_lock_irqsave(&dmadata->dma_lock, flags);
+	if (dmadata->frag_count >= 0) {
+		dmadata->dmacount--;
+		BUG_ON(dmadata->dmacount < 0);
+		tasklet_schedule(&dmadata->tasklet);
+	}
+	spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+txx9aclc_dma_submit(struct txx9aclc_dmadata *dmadata, dma_addr_t buf_dma_addr)
+{
+	struct dma_chan *chan = dmadata->dma_chan;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist sg;
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, pfn_to_page(PFN_DOWN(buf_dma_addr)),
+		    dmadata->frag_bytes, buf_dma_addr & (PAGE_SIZE - 1));
+	sg_dma_address(&sg) = buf_dma_addr;
+	desc = chan->device->device_prep_slave_sg(chan, &sg, 1,
+		dmadata->substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		DMA_TO_DEVICE : DMA_FROM_DEVICE,
+		DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		dev_err(&chan->dev->device, "cannot prepare slave dma\n");
+		return NULL;
+	}
+	desc->callback = txx9aclc_dma_complete;
+	desc->callback_param = dmadata;
+	desc->tx_submit(desc);
+	return desc;
+}
+
+#define NR_DMA_CHAIN		2
+
+static void txx9aclc_dma_tasklet(unsigned long data)
+{
+	struct txx9aclc_dmadata *dmadata = (struct txx9aclc_dmadata *)data;
+	struct dma_chan *chan = dmadata->dma_chan;
+	struct dma_async_tx_descriptor *desc;
+	struct snd_pcm_substream *substream = dmadata->substream;
+	u32 ctlbit = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		ACCTL_AUDODMA : ACCTL_AUDIDMA;
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dmadata->dma_lock, flags);
+	if (dmadata->frag_count < 0) {
+		struct txx9aclc_soc_device *dev =
+			container_of(dmadata, struct txx9aclc_soc_device,
+				     dmadata[substream->stream]);
+		struct txx9aclc_plat_drvdata *drvdata =
+			txx9aclc_get_plat_drvdata(dev);
+		void __iomem *base = drvdata->base;
+
+		spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+		chan->device->device_terminate_all(chan);
+		/* first time */
+		for (i = 0; i < NR_DMA_CHAIN; i++) {
+			desc = txx9aclc_dma_submit(dmadata,
+				dmadata->dma_addr + i * dmadata->frag_bytes);
+			if (!desc)
+				return;
+		}
+		dmadata->dmacount = NR_DMA_CHAIN;
+		chan->device->device_issue_pending(chan);
+		spin_lock_irqsave(&dmadata->dma_lock, flags);
+		__raw_writel(ctlbit, base + ACCTLEN);
+		dmadata->frag_count = NR_DMA_CHAIN % dmadata->frags;
+		spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+		return;
+	}
+	BUG_ON(dmadata->dmacount >= NR_DMA_CHAIN);
+	while (dmadata->dmacount < NR_DMA_CHAIN) {
+		dmadata->dmacount++;
+		spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+		desc = txx9aclc_dma_submit(dmadata,
+			dmadata->dma_addr +
+			dmadata->frag_count * dmadata->frag_bytes);
+		if (!desc)
+			return;
+		chan->device->device_issue_pending(chan);
+
+		spin_lock_irqsave(&dmadata->dma_lock, flags);
+		dmadata->frag_count++;
+		dmadata->frag_count %= dmadata->frags;
+		dmadata->pos += dmadata->frag_bytes;
+		dmadata->pos %= dmadata->buffer_bytes;
+		if ((dmadata->frag_count * dmadata->frag_bytes) %
+		    dmadata->period_bytes == 0)
+			snd_pcm_period_elapsed(substream);
+	}
+	spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+}
+
+static int txx9aclc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct txx9aclc_dmadata *dmadata = substream->runtime->private_data;
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct txx9aclc_soc_device *dev =
+		container_of(rtd->socdev, struct txx9aclc_soc_device, soc_dev);
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	unsigned long flags;
+	int ret = 0;
+	u32 ctlbit = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		ACCTL_AUDODMA : ACCTL_AUDIDMA;
+
+	spin_lock_irqsave(&dmadata->dma_lock, flags);
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		dmadata->frag_count = -1;
+		tasklet_schedule(&dmadata->tasklet);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		__raw_writel(ctlbit, base + ACCTLDIS);
+		break;
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+	case SNDRV_PCM_TRIGGER_RESUME:
+		__raw_writel(ctlbit, base + ACCTLEN);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&dmadata->dma_lock, flags);
+	return ret;
+}
+
+static snd_pcm_uframes_t
+txx9aclc_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct txx9aclc_dmadata *dmadata = substream->runtime->private_data;
+
+	return bytes_to_frames(substream->runtime, dmadata->pos);
+}
+
+static int txx9aclc_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct txx9aclc_soc_device *dev =
+		container_of(rtd->socdev, struct txx9aclc_soc_device, soc_dev);
+	struct txx9aclc_dmadata *dmadata = &dev->dmadata[substream->stream];
+	int ret;
+
+	ret = snd_soc_set_runtime_hwparams(substream, &txx9aclc_pcm_hardware);
+	if (ret)
+		return ret;
+	/* ensure that buffer size is a multiple of period size */
+	ret = snd_pcm_hw_constraint_integer(substream->runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		return ret;
+	substream->runtime->private_data = dmadata;
+	return 0;
+}
+
+static int txx9aclc_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct txx9aclc_dmadata *dmadata = substream->runtime->private_data;
+	struct dma_chan *chan = dmadata->dma_chan;
+
+	dmadata->frag_count = -1;
+	chan->device->device_terminate_all(chan);
+	return 0;
+}
+
+static struct snd_pcm_ops txx9aclc_pcm_ops = {
+	.open		= txx9aclc_pcm_open,
+	.close		= txx9aclc_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= txx9aclc_pcm_hw_params,
+	.hw_free	= txx9aclc_pcm_hw_free,
+	.prepare	= txx9aclc_pcm_prepare,
+	.trigger	= txx9aclc_pcm_trigger,
+	.pointer	= txx9aclc_pcm_pointer,
+};
+
+static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm)
+{
+	snd_pcm_lib_preallocate_free_for_all(pcm);
+}
+
+static int txx9aclc_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
+			    struct snd_pcm *pcm)
+{
+	return snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+		card->dev, 64 * 1024, 4 * 1024 * 1024);
+}
+
+static bool filter(struct dma_chan *chan, void *param)
+{
+	struct txx9aclc_dmadata *dmadata = param;
+	char devname[BUS_ID_SIZE + 2];
+
+	sprintf(devname, "%s.%d", dmadata->dma_res->name,
+		(int)dmadata->dma_res->start);
+	if (strcmp(dev_name(chan->device->dev), devname) == 0) {
+		chan->private = &dmadata->dma_slave;
+		return true;
+	}
+	return false;
+}
+
+static int txx9aclc_dma_init(struct txx9aclc_soc_device *dev,
+			     struct txx9aclc_dmadata *dmadata)
+{
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	struct txx9dmac_slave *ds = &dmadata->dma_slave;
+	dma_cap_mask_t mask;
+
+	spin_lock_init(&dmadata->dma_lock);
+
+	ds->reg_width = sizeof(u32);
+	if (dmadata->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		ds->tx_reg = drvdata->physbase + ACAUDODAT;
+		ds->rx_reg = 0;
+	} else {
+		ds->tx_reg = 0;
+		ds->rx_reg = drvdata->physbase + ACAUDIDAT;
+	}
+
+	/* Try to grab a DMA channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dmadata->dma_chan = dma_request_channel(mask, filter, dmadata);
+	if (!dmadata->dma_chan) {
+		dev_err(dev->soc_dev.dev,
+			"DMA channel for %s is not available\n",
+			dmadata->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+			"playback" : "capture");
+		return -EBUSY;
+	}
+	tasklet_init(&dmadata->tasklet, txx9aclc_dma_tasklet,
+		     (unsigned long)dmadata);
+	return 0;
+}
+
+static int txx9aclc_pcm_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct txx9aclc_soc_device *dev =
+		container_of(socdev, struct txx9aclc_soc_device, soc_dev);
+	struct resource *r;
+	int i;
+	int ret;
+
+	dev->dmadata[0].stream = SNDRV_PCM_STREAM_PLAYBACK;
+	dev->dmadata[1].stream = SNDRV_PCM_STREAM_CAPTURE;
+	for (i = 0; i < 2; i++) {
+		r = platform_get_resource(dev->aclc_pdev, IORESOURCE_DMA, i);
+		if (!r) {
+			ret = -EBUSY;
+			goto exit;
+		}
+		dev->dmadata[i].dma_res = r;
+		ret = txx9aclc_dma_init(dev, &dev->dmadata[i]);
+		if (ret)
+			goto exit;
+	}
+	return 0;
+
+exit:
+	for (i = 0; i < 2; i++) {
+		if (dev->dmadata[i].dma_chan)
+			dma_release_channel(dev->dmadata[i].dma_chan);
+		dev->dmadata[i].dma_chan = NULL;
+	}
+	return ret;
+}
+
+static int txx9aclc_pcm_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct txx9aclc_soc_device *dev =
+		container_of(socdev, struct txx9aclc_soc_device, soc_dev);
+	struct txx9aclc_plat_drvdata *drvdata = txx9aclc_get_plat_drvdata(dev);
+	void __iomem *base = drvdata->base;
+	int i;
+
+	/* disable all FIFO DMAs */
+	__raw_writel(ACCTL_AUDODMA | ACCTL_AUDIDMA, base + ACCTLDIS);
+	/* dummy R/W to clear pending DMAREQ if any */
+	__raw_writel(__raw_readl(base + ACAUDIDAT), base + ACAUDODAT);
+
+	for (i = 0; i < 2; i++) {
+		struct txx9aclc_dmadata *dmadata = &dev->dmadata[i];
+		struct dma_chan *chan = dmadata->dma_chan;
+		if (chan) {
+			dmadata->frag_count = -1;
+			chan->device->device_terminate_all(chan);
+			dma_release_channel(chan);
+		}
+		dev->dmadata[i].dma_chan = NULL;
+	}
+	return 0;
+}
+
+struct snd_soc_platform txx9aclc_soc_platform = {
+	.name		= "txx9aclc-audio",
+	.probe		= txx9aclc_pcm_probe,
+	.remove		= txx9aclc_pcm_remove,
+	.pcm_ops 	= &txx9aclc_pcm_ops,
+	.pcm_new	= txx9aclc_pcm_new,
+	.pcm_free	= txx9aclc_pcm_free_dma_buffers,
+};
+EXPORT_SYMBOL_GPL(txx9aclc_soc_platform);
+
+static int __init txx9aclc_soc_platform_init(void)
+{
+	return snd_soc_register_platform(&txx9aclc_soc_platform);
+}
+
+static void __exit txx9aclc_soc_platform_exit(void)
+{
+	snd_soc_unregister_platform(&txx9aclc_soc_platform);
+}
+
+module_init(txx9aclc_soc_platform_init);
+module_exit(txx9aclc_soc_platform_exit);
+
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_DESCRIPTION("TXx9 ACLC Audio DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/txx9/txx9aclc.h b/sound/soc/txx9/txx9aclc.h
new file mode 100644
index 0000000..6769aab
--- /dev/null
+++ b/sound/soc/txx9/txx9aclc.h
@@ -0,0 +1,83 @@
+/*
+ * TXx9 SoC AC Link Controller
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __TXX9ACLC_H
+#define __TXX9ACLC_H
+
+#include <linux/interrupt.h>
+#include <asm/txx9/dmac.h>
+
+#define ACCTLEN			0x00	/* control enable */
+#define ACCTLDIS		0x04	/* control disable */
+#define   ACCTL_ENLINK		0x00000001	/* enable/disable AC-link */
+#define   ACCTL_AUDODMA		0x00000100	/* AUDODMA enable/disable */
+#define   ACCTL_AUDIDMA		0x00001000	/* AUDIDMA enable/disable */
+#define   ACCTL_AUDOEHLT	0x00010000	/* AUDO error halt
+						   enable/disable */
+#define   ACCTL_AUDIEHLT	0x00100000	/* AUDI error halt
+						   enable/disable */
+#define ACREGACC		0x08	/* codec register access */
+#define   ACREGACC_DAT_SHIFT	0	/* data field */
+#define   ACREGACC_REG_SHIFT	16	/* address field */
+#define   ACREGACC_CODECID_SHIFT	24	/* CODEC ID field */
+#define   ACREGACC_READ		0x80000000	/* CODEC read */
+#define   ACREGACC_WRITE	0x00000000	/* CODEC write */
+#define ACINTSTS		0x10	/* interrupt status */
+#define ACINTMSTS		0x14	/* interrupt masked status */
+#define ACINTEN			0x18	/* interrupt enable */
+#define ACINTDIS		0x1c	/* interrupt disable */
+#define   ACINT_CODECRDY(n)	(0x00000001 << (n))	/* CODECn ready */
+#define   ACINT_REGACCRDY	0x00000010	/* ACREGACC ready */
+#define   ACINT_AUDOERR		0x00000100	/* AUDO underrun error */
+#define   ACINT_AUDIERR		0x00001000	/* AUDI overrun error */
+#define ACDMASTS		0x80	/* DMA request status */
+#define   ACDMA_AUDO		0x00000001	/* AUDODMA pending */
+#define   ACDMA_AUDI		0x00000010	/* AUDIDMA pending */
+#define ACAUDODAT		0xa0	/* audio out data */
+#define ACAUDIDAT		0xb0	/* audio in data */
+#define ACREVID			0xfc	/* revision ID */
+
+struct txx9aclc_dmadata {
+	struct resource *dma_res;
+	struct txx9dmac_slave dma_slave;
+	struct dma_chan *dma_chan;
+	struct tasklet_struct tasklet;
+	spinlock_t dma_lock;
+	int stream; /* SNDRV_PCM_STREAM_PLAYBACK or SNDRV_PCM_STREAM_CAPTURE */
+	struct snd_pcm_substream *substream;
+	unsigned long pos;
+	dma_addr_t dma_addr;
+	unsigned long buffer_bytes;
+	unsigned long period_bytes;
+	unsigned long frag_bytes;
+	int frags;
+	int frag_count;
+	int dmacount;
+};
+
+struct txx9aclc_plat_drvdata {
+	void __iomem *base;
+	u64 physbase;
+};
+
+struct txx9aclc_soc_device {
+	struct snd_soc_device soc_dev;
+	struct platform_device *aclc_pdev;	/* for ioresources, drvdata */
+	struct txx9aclc_dmadata dmadata[2];
+};
+
+static inline struct txx9aclc_plat_drvdata *txx9aclc_get_plat_drvdata(
+	struct txx9aclc_soc_device *sdev)
+{
+	return platform_get_drvdata(sdev->aclc_pdev);
+}
+
+extern struct snd_soc_platform txx9aclc_soc_platform;
+extern struct snd_soc_dai txx9aclc_ac97_dai;
+
+#endif /* __TXX9ACLC_H */
-- 
cgit v0.10.2


From 2b0c3677bfcf0c21bcaf2130e6e094a92ab93ae8 Mon Sep 17 00:00:00 2001
From: Magnus Lilja <lilja.magnus@gmail.com>
Date: Mon, 18 May 2009 18:46:33 +0200
Subject: i.MX31: Add support for LAN9217 on PDK debug board.

Signed-off-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31pdk.c b/arch/arm/mach-mx3/mx31pdk.c
index 32599e5..c19838d 100644
--- a/arch/arm/mach-mx3/mx31pdk.c
+++ b/arch/arm/mach-mx3/mx31pdk.c
@@ -21,6 +21,8 @@
 #include <linux/clk.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
+#include <linux/smsc911x.h>
+#include <linux/platform_device.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -56,6 +58,39 @@ static struct imxuart_platform_data uart_pdata = {
 };
 
 /*
+ * Support for the SMSC9217 on the Debug board.
+ */
+
+static struct smsc911x_platform_config smsc911x_config = {
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
+	.flags		= SMSC911X_USE_16BIT | SMSC911X_FORCE_INTERNAL_PHY,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+};
+
+static struct resource smsc911x_resources[] = {
+	{
+		.start		= LAN9217_BASE_ADDR,
+		.end		= LAN9217_BASE_ADDR + 0xff,
+		.flags		= IORESOURCE_MEM,
+	}, {
+		.start		= EXPIO_INT_ENET,
+		.end		= EXPIO_INT_ENET,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device smsc911x_device = {
+	.name		= "smsc911x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(smsc911x_resources),
+	.resource	= smsc911x_resources,
+	.dev		= {
+		.platform_data = &smsc911x_config,
+	},
+};
+
+/*
  * Routines for the CPLD on the debug board. It contains a CPLD handling
  * LEDs, switches, interrupts for Ethernet.
  */
@@ -207,7 +242,8 @@ static void __init mxc_board_init(void)
 
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
 
-	mx31pdk_init_expio();
+	if (!mx31pdk_init_expio())
+		platform_device_register(&smsc911x_device);
 }
 
 static void __init mx31pdk_timer_init(void)
-- 
cgit v0.10.2


From b126d113484886d900179b4ae55ea7fabe15f936 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 30 Apr 2009 14:48:36 +0100
Subject: mfd: Keep a cache of WM8350 volatile values

Due to the way that the WM8350 audio driver handles CODEC_ENA many of
the WM8350 audio registers are marked as volatile when they aren't
actually so. Allow the audio driver to see a cache of these values for
inspection during interrupt context.

To do this we need to stop satisfying any bits from volatile registers
from cache - there's no real benefit from doing so anyway, we did the
read already.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index c2be308..fe24079 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -79,10 +79,6 @@ static int wm8350_phys_read(struct wm8350 *wm8350, u8 reg, int num_regs,
 		/* Cache is CPU endian */
 		dest[i - reg] = be16_to_cpu(dest[i - reg]);
 
-		/* Satisfy non-volatile bits from cache */
-		dest[i - reg] &= wm8350_reg_io_map[i].vol;
-		dest[i - reg] |= wm8350->reg_cache[i];
-
 		/* Mask out non-readable bits */
 		dest[i - reg] &= wm8350_reg_io_map[i].readable;
 	}
@@ -182,9 +178,6 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src)
 			(wm8350->reg_cache[i] & ~wm8350_reg_io_map[i].writable)
 			| src[i - reg];
 
-		/* Don't store volatile bits */
-		wm8350->reg_cache[i] &= ~wm8350_reg_io_map[i].vol;
-
 		src[i - reg] = cpu_to_be16(src[i - reg]);
 	}
 
@@ -1261,7 +1254,6 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 		    (i < WM8350_CLOCK_CONTROL_1 || i > WM8350_AIF_TEST)) {
 			value = be16_to_cpu(wm8350->reg_cache[i]);
 			value &= wm8350_reg_io_map[i].readable;
-			value &= ~wm8350_reg_io_map[i].vol;
 			wm8350->reg_cache[i] = value;
 		} else
 			wm8350->reg_cache[i] = reg_map[i];
-- 
cgit v0.10.2


From f43ab901005c2bb3c5440e91b6efae9f5db02e7c Mon Sep 17 00:00:00 2001
From: Nelson Castillo <arhuaco@freaks-unidos.net>
Date: Tue, 12 May 2009 13:26:47 -0700
Subject: mfd: pcf50633: fix unsafe disable_irq()

Without this change Openmoko Freerunner (GTA02) bootstrap will deadlock.
As pointed out in other patches this issue is in the wild since the merge
of:

: commit 3aa551c9b4c40018f0e261a178e3d25478dc04a9
: Author: Thomas Gleixner <tglx@linutronix.de>
: Date:   Mon Mar 23 18:28:15 2009 +0100
:
:    genirq: add threaded interrupt handler support
:
:    Add support for threaded interrupt handlers

Signed-off-by: Nelson Castillo <arhuaco@freaks-unidos.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <balajirrao@openmoko.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>

diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index 7793932..11a6248 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -443,7 +443,7 @@ static irqreturn_t pcf50633_irq(int irq, void *data)
 	dev_dbg(pcf->dev, "pcf50633_irq\n");
 
 	get_device(pcf->dev);
-	disable_irq(pcf->irq);
+	disable_irq_nosync(pcf->irq);
 	schedule_work(&pcf->irq_work);
 
 	return IRQ_HANDLED;
-- 
cgit v0.10.2


From 33252572e727ccdcc54efdb67157e7ab3d6942db Mon Sep 17 00:00:00 2001
From: Nico Schottelius <nico-linux-20090330@schottelius.org>
Date: Sat, 16 May 2009 14:00:56 +0200
Subject: Fix scripts/setlocalversion with tagged git commit

Produce correct output for
- tagged commit (v2.6.30-rc6)
- past tagged commit (v2.6.30-rc5-299-g7c7327d)
- no tag

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 32c8554..0079047 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -1,5 +1,13 @@
 #!/bin/sh
-# Print additional version information for non-release trees.
+#
+# This scripts adds local version information from the version
+# control systems git, mercurial (hg) and subversion (svn).
+#
+# If something goes wrong, send a mail the kernel build mailinglist
+# (see MAINTAINERS) and CC Nico Schottelius
+# <nico-linuxsetlocalversion -at- schottelius.org>.
+#
+#
 
 usage() {
 	echo "Usage: $0 [srctree]" >&2
@@ -10,12 +18,20 @@ cd "${1:-.}" || usage
 
 # Check for git and a git repo.
 if head=`git rev-parse --verify --short HEAD 2>/dev/null`; then
-	# Do we have an untagged tag?
-	if atag=`git describe 2>/dev/null`; then
-		echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}'
-	# add -g${head}, if there is no usable tag
-	else
-		printf '%s%s' -g $head
+
+	# If we are at a tagged commit (like "v2.6.30-rc6"), we ignore it,
+	# because this version is defined in the top level Makefile.
+	if [ -z "`git describe --exact-match 2>/dev/null`" ]; then
+
+		# If we are past a tagged commit (like "v2.6.30-rc5-302-g72357d5"),
+		# we pretty print it.
+		if atag="`git describe 2>/dev/null`"; then
+			echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}'
+
+		# If we don't have a tag at all we print -g{commitish}.
+		else
+			printf '%s%s' -g $head
+		fi
 	fi
 
 	# Is this git on svn?
-- 
cgit v0.10.2


From 64d1304a64477629cb16b75491a77bafe6f86963 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 18 May 2009 21:20:10 +0200
Subject: futex: setup writeable mapping for futex ops which modify user space
 data

The futex code installs a read only mapping via get_user_pages_fast()
even if the futex op function has to modify user space data. The
eventual fault was fixed up by futex_handle_fault() which walked the
VMA with mmap_sem held.

After the cleanup patches which removed the mmap_sem dependency of the
futex code commit 4dc5b7a36a49eff97050894cf1b3a9a02523717 (futex:
clean up fault logic) removed the private VMA walk logic from the
futex code. This change results in a stale RO mapping which is not
fixed up.

Instead of reintroducing the previous fault logic we set up the
mapping in get_user_pages_fast() read/write for all operations which
modify user space data. Also handle private futexes in the same way
and make the current unconditional access_ok(VERIFY_WRITE) depend on
the futex op.

Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
CC: stable@kernel.org

diff --git a/kernel/futex.c b/kernel/futex.c
index eef8cd2..d546b2d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -193,6 +193,7 @@ static void drop_futex_key_refs(union futex_key *key)
  * @uaddr: virtual address of the futex
  * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
  * @key: address where result is stored.
+ * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE)
  *
  * Returns a negative error code or 0
  * The key words are stored in *key on success.
@@ -203,7 +204,8 @@ static void drop_futex_key_refs(union futex_key *key)
  *
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
-static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
+static int
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
@@ -226,7 +228,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 	 *        but access_ok() should be faster than find_vma()
 	 */
 	if (!fshared) {
-		if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
+		if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
 			return -EFAULT;
 		key->private.mm = mm;
 		key->private.address = address;
@@ -235,7 +237,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 	}
 
 again:
-	err = get_user_pages_fast(address, 1, 0, &page);
+	err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
 	if (err < 0)
 		return err;
 
@@ -677,7 +679,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 	if (!bitset)
 		return -EINVAL;
 
-	ret = get_futex_key(uaddr, fshared, &key);
+	ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -723,10 +725,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
 	int ret, op_ret;
 
 retry:
-	ret = get_futex_key(uaddr1, fshared, &key1);
+	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, fshared, &key2);
+	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -814,10 +816,10 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
 	int ret, drop_count = 0;
 
 retry:
-	ret = get_futex_key(uaddr1, fshared, &key1);
+	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, fshared, &key2);
+	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -1140,7 +1142,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 	q.bitset = bitset;
 retry:
 	q.key = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr, fshared, &q.key);
+	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -1330,7 +1332,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 	q.pi_state = NULL;
 retry:
 	q.key = FUTEX_KEY_INIT;
-	ret = get_futex_key(uaddr, fshared, &q.key);
+	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -1594,7 +1596,7 @@ retry:
 	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
 		return -EPERM;
 
-	ret = get_futex_key(uaddr, fshared, &key);
+	ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
 	if (unlikely(ret != 0))
 		goto out;
 
-- 
cgit v0.10.2


From 33b2fb303fe7f6b08bbb32f708e67b96eaa94a7a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 11:08:41 +0200
Subject: perf_counter: fix counter freeing logic

Fix counter lifetime bugs which explain the crashes reported by
Marcelo Tosatti and Arnaldo Carvalho de Melo.

The new rule is: flushing + freeing is only done for a task's
own counters, never for other tasks.

[ Impact: fix crashes/lockups with inherited counters ]

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/exit.c b/kernel/exit.c
index 73affd3..f9dfedd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -178,12 +178,6 @@ repeat:
 
 	proc_flush_task(p);
 
-	/*
-	 * Flush inherited counters to the parent - before the parent
-	 * gets woken up by child-exit notifications.
-	 */
-	perf_counter_exit_task(p);
-
 	write_lock_irq(&tasklist_lock);
 	tracehook_finish_release_task(p);
 	__exit_signal(p);
@@ -985,6 +979,13 @@ NORET_TYPE void do_exit(long code)
 		module_put(tsk->binfmt->module);
 
 	proc_exit_connector(tsk);
+
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(tsk);
+
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
 	mpol_put(tsk->mempolicy);
@@ -1257,12 +1258,6 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	 */
 	read_unlock(&tasklist_lock);
 
-	/*
-	 * Flush inherited counters to the parent - before the parent
-	 * gets woken up by child-exit notifications.
-	 */
-	perf_counter_exit_task(p);
-
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 		? p->signal->group_exit_code : p->exit_code;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 59a926d..7af16d1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3299,6 +3299,8 @@ void perf_counter_exit_task(struct task_struct *child)
 	struct perf_counter *child_counter, *tmp;
 	struct perf_counter_context *child_ctx;
 
+	WARN_ON_ONCE(child != current);
+
 	child_ctx = &child->perf_counter_ctx;
 
 	if (likely(!child_ctx->nr_counters))
-- 
cgit v0.10.2


From c44d70a340554a33071339064a303ac0f1a31623 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 11:24:08 +0200
Subject: perf_counter: fix counter inheritance race

Context rotation should not occur when we are in the middle of
walking the counter list when inheriting counters ...

[ Impact: fix occasionally incorrect perf stat results ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c8c1dfc2..13cb2fb 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -508,6 +508,7 @@ struct perf_counter_context {
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
+	int			rr_allowed;
 	struct task_struct	*task;
 
 	/*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7af16d1..4d8f973 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1120,7 +1120,8 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
-	rotate_ctx(ctx);
+	if (ctx->rr_allowed)
+		rotate_ctx(ctx);
 
 	perf_counter_cpu_sched_in(cpuctx, cpu);
 	perf_counter_task_sched_in(curr, cpu);
@@ -3108,6 +3109,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
 	INIT_LIST_HEAD(&ctx->event_list);
+	ctx->rr_allowed = 1;
 	ctx->task = task;
 }
 
@@ -3348,6 +3350,9 @@ void perf_counter_init_task(struct task_struct *child)
 	 */
 	mutex_lock(&parent_ctx->mutex);
 
+	parent_ctx->rr_allowed = 0;
+	barrier(); /* irqs */
+
 	/*
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
@@ -3361,6 +3366,9 @@ void perf_counter_init_task(struct task_struct *child)
 			break;
 	}
 
+	barrier(); /* irqs */
+	parent_ctx->rr_allowed = 1;
+
 	mutex_unlock(&parent_ctx->mutex);
 }
 
-- 
cgit v0.10.2


From b3bad72e494fb2ff0c81be4ca2ddb94adf6a47c2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 17 May 2009 20:17:06 +0200
Subject: PCI PM: Fix initialization and kexec breakage for some devices

Recent PCI PM changes introduced a bug that causes some devices to be
mishandled after kexec and during early initialization.  The failure
scenario in the kexec case is the following:

* Assume a PCI device is not power-manageable by the platform and has
  PCI_PM_CTRL_NO_SOFT_RESET set in PMCSR.
* The device is put into D3 before kexec (using the native PCI PM).
* After kexec, pci_setup_device() sets the device's power state to
  PCI_UNKNOWN.
* pci_set_power_state(dev, PCI_D0) is called by the device's driver.
* __pci_start_power_transition(dev, PCI_D0) is called and since the
  device is not power-manageable by the platform, it causes
  pci_update_current_state(dev, PCI_D0) to be called.  As a result
  the device's current_state field is updated to PCI_D3, in
  accordance with the contents of its PCI PM registers.
* pci_raw_set_power_state() is called and it changes the device power
  state to D0.  *However*, it should also call pci_restore_bars() to
  reinitialize the device, but it doesn't, because the device's
  current_state field has been modified earlier.

To prevent this from happening, modify pci_platform_power_transition()
so that it doesn't use pci_update_current_state() to update the
current_state field for devices that aren't power-manageable by the
platform.  Instead, this field should be updated directly for devices
that don't support the native PCI PM.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 34bf0fd..1a91bf9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -557,7 +557,8 @@ static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
 	} else {
 		error = -ENODEV;
 		/* Fall back to PCI_D0 if native PM is not supported */
-		pci_update_current_state(dev, PCI_D0);
+		if (!dev->pm_cap)
+			dev->current_state = PCI_D0;
 	}
 
 	return error;
-- 
cgit v0.10.2


From fbaa58696cef848de818768783ef185bd3f05158 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 May 2009 12:50:40 -0400
Subject: TPM: get_event_name stack corruption

get_event_name uses sprintf to fill a buffer declared on the stack.  It fills
the buffer 2 bytes at a time.  What the code doesn't take into account is that
sprintf(buf, "%02x", data) actually writes 3 bytes.  2 bytes for the data and
then it nul terminates the string.  Since we declare buf to be 40 characters
long and then we write 40 bytes of data into buf sprintf is going to write 41
characters.  The fix is to leave room in buf for the nul terminator.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
index ed306eb..0c2f55a 100644
--- a/drivers/char/tpm/tpm_bios.c
+++ b/drivers/char/tpm/tpm_bios.c
@@ -212,7 +212,8 @@ static int get_event_name(char *dest, struct tcpa_event *event,
 			unsigned char * event_entry)
 {
 	const char *name = "";
-	char data[40] = "";
+	/* 41 so there is room for 40 data and 1 nul */
+	char data[41] = "";
 	int i, n_len = 0, d_len = 0;
 	struct tcpa_pc_event *pc_event;
 
-- 
cgit v0.10.2


From b674137755bbe2750f997a2a1264db3cdf8abcb3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 18 May 2009 11:56:16 +1000
Subject: drm: Round size of SHM maps to PAGE_SIZE

Currently, userspace can fail to obtain the SAREA mapping (among other
reasons) if it passes SAREA_MAX to drmAddMap without aligning it to the
page size. This breaks for example on PowerPC with 64K pages and radeon
despite the kernel radeon actually doing the right rouding in the first
place.

The way SAREA_MAX is defined with a bunch of ifdef's and duplicated
between libdrm and the X server is gross, ultimately it should be
retrieved by userspace from the kernel, but in the meantime, we have
plenty of existing userspace built with bad values that need to work.

This patch works around broken userspace by rounding the requested size
in drm_addmap_core() of any SHM map to the page size. Since the backing
memory for SHM maps is also allocated within addmap_core, there is no
danger of adjacent memory being exposed due to the increased map size.
The only side effect is that drivers that previously tried to create or
access SHM maps using a size < PAGE_SIZE and failed (getting -EINVAL),
will now succeed at the cost of a little bit more memory used if that
happens to be when the map is created.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 6d80d17..0411d91 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -170,6 +170,14 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	}
 	DRM_DEBUG("offset = 0x%08llx, size = 0x%08lx, type = %d\n",
 		  (unsigned long long)map->offset, map->size, map->type);
+
+	/* page-align _DRM_SHM maps. They are allocated here so there is no security
+	 * hole created by that and it works around various broken drivers that use
+	 * a non-aligned quantity to map the SAREA. --BenH
+	 */
+	if (map->type == _DRM_SHM)
+		map->size = PAGE_ALIGN(map->size);
+
 	if ((map->offset & (~(resource_size_t)PAGE_MASK)) || (map->size & (~PAGE_MASK))) {
 		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 		return -EINVAL;
-- 
cgit v0.10.2


From ff71338ed31398384b2e5992623d52f9aaba1da1 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Fri, 15 May 2009 06:33:50 -0300
Subject: [ARM] pxa/ezx: fix pin configuration for low power mode

Fix LPM configuration on ezx.c

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 92ba16e..7db966d 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -111,9 +111,9 @@ static unsigned long ezx_pin_config[] __initdata = {
 	GPIO25_SSP1_TXD,
 	GPIO26_SSP1_RXD,
 	GPIO24_GPIO,				/* pcap chip select */
-	GPIO1_GPIO,				/* pcap interrupt */
-	GPIO4_GPIO,				/* WDI_AP */
-	GPIO55_GPIO,				/* SYS_RESTART */
+	GPIO1_GPIO | WAKEUP_ON_EDGE_RISE,	/* pcap interrupt */
+	GPIO4_GPIO | MFP_LPM_DRIVE_HIGH,	/* WDI_AP */
+	GPIO55_GPIO | MFP_LPM_DRIVE_HIGH,	/* SYS_RESTART */
 
 	/* MMC */
 	GPIO32_MMC_CLK,
@@ -144,20 +144,20 @@ static unsigned long ezx_pin_config[] __initdata = {
 #if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_E680)
 static unsigned long gen1_pin_config[] __initdata = {
 	/* flip / lockswitch */
-	GPIO12_GPIO,
+	GPIO12_GPIO | WAKEUP_ON_EDGE_BOTH,
 
 	/* bluetooth (bcm2035) */
-	GPIO14_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* HOSTWAKE */
+	GPIO14_GPIO | WAKEUP_ON_EDGE_RISE,	/* HOSTWAKE */
 	GPIO48_GPIO,				/* RESET */
 	GPIO28_GPIO,				/* WAKEUP */
 
 	/* Neptune handshake */
-	GPIO0_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* BP_RDY */
-	GPIO57_GPIO,				/* AP_RDY */
-	GPIO13_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* WDI */
-	GPIO3_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* WDI2 */
-	GPIO82_GPIO,				/* RESET */
-	GPIO99_GPIO,				/* TC_MM_EN */
+	GPIO0_GPIO | WAKEUP_ON_EDGE_FALL,	/* BP_RDY */
+	GPIO57_GPIO | MFP_LPM_DRIVE_HIGH,	/* AP_RDY */
+	GPIO13_GPIO | WAKEUP_ON_EDGE_BOTH,	/* WDI */
+	GPIO3_GPIO | WAKEUP_ON_EDGE_BOTH,	/* WDI2 */
+	GPIO82_GPIO | MFP_LPM_DRIVE_HIGH,	/* RESET */
+	GPIO99_GPIO | MFP_LPM_DRIVE_HIGH,	/* TC_MM_EN */
 
 	/* sound */
 	GPIO52_SSP3_SCLK,
@@ -199,21 +199,21 @@ static unsigned long gen1_pin_config[] __initdata = {
 	defined(CONFIG_MACH_EZX_E2) || defined(CONFIG_MACH_EZX_E6)
 static unsigned long gen2_pin_config[] __initdata = {
 	/* flip / lockswitch */
-	GPIO15_GPIO,
+	GPIO15_GPIO | WAKEUP_ON_EDGE_BOTH,
 
 	/* EOC */
-	GPIO10_GPIO,
+	GPIO10_GPIO | WAKEUP_ON_EDGE_RISE,
 
 	/* bluetooth (bcm2045) */
-	GPIO13_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* HOSTWAKE */
+	GPIO13_GPIO | WAKEUP_ON_EDGE_RISE,	/* HOSTWAKE */
 	GPIO37_GPIO,				/* RESET */
 	GPIO57_GPIO,				/* WAKEUP */
 
 	/* Neptune handshake */
-	GPIO0_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* BP_RDY */
-	GPIO96_GPIO,				/* AP_RDY */
-	GPIO3_GPIO | WAKEUP_ON_LEVEL_HIGH,	/* WDI */
-	GPIO116_GPIO,				/* RESET */
+	GPIO0_GPIO | WAKEUP_ON_EDGE_FALL,	/* BP_RDY */
+	GPIO96_GPIO | MFP_LPM_DRIVE_HIGH,	/* AP_RDY */
+	GPIO3_GPIO | WAKEUP_ON_EDGE_FALL,	/* WDI */
+	GPIO116_GPIO | MFP_LPM_DRIVE_HIGH,	/* RESET */
 	GPIO41_GPIO,				/* BP_FLASH */
 
 	/* sound */
-- 
cgit v0.10.2


From 62669e61a5f559826b1d2e863649a6005eee629b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 20 May 2009 11:27:13 +0900
Subject: sh: mach-hp6xx: Fix up the hp6xx build for hd64461 changes.

Fixes several compile errors due to the recent hd64461 I/O base changes.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c
index 25ef910..50aa0c1 100644
--- a/arch/sh/cchips/hd6446x/hd64461.c
+++ b/arch/sh/cchips/hd6446x/hd64461.c
@@ -80,7 +80,7 @@ int __init setup_hd64461(void)
 
 	printk(KERN_INFO
 	       "HD64461 configured at 0x%x on irq %d(mapped into %d to %d)\n",
-	       CONFIG_HD64461_IOBASE, CONFIG_HD64461_IRQ, HD64461_IRQBASE,
+	       HD64461_IOBASE, CONFIG_HD64461_IRQ, HD64461_IRQBASE,
 	       HD64461_IRQBASE + 15);
 
 /* Should be at processor specific part.. */
diff --git a/arch/sh/include/asm/hd64461.h b/arch/sh/include/asm/hd64461.h
index c16d545..977355f 100644
--- a/arch/sh/include/asm/hd64461.h
+++ b/arch/sh/include/asm/hd64461.h
@@ -26,7 +26,7 @@
 #define	HD64461_PCC1_COMM	(HD64461_PCC1_BASE+HD64461_PCC_WINDOW)		/* 0xb5000000 */
 
 /* Standby Control Register for HD64461 */
-#define	HD64461_STBCR			CONFIG_HD64461_IOBASE
+#define	HD64461_STBCR			HD64461_IO_OFFSET(0x00000000)
 #define	HD64461_STBCR_CKIO_STBY		0x2000
 #define	HD64461_STBCR_SAFECKE_IST	0x1000
 #define	HD64461_STBCR_SLCKE_IST		0x0800
diff --git a/drivers/video/hitfb.c b/drivers/video/hitfb.c
index e6467cf..020db7f 100644
--- a/drivers/video/hitfb.c
+++ b/drivers/video/hitfb.c
@@ -335,9 +335,9 @@ static int __init hitfb_probe(struct platform_device *dev)
 	if (fb_get_options("hitfb", NULL))
 		return -ENODEV;
 
-	hitfb_fix.mmio_start = CONFIG_HD64461_IOBASE+0x1000;
+	hitfb_fix.mmio_start = HD64461_IO_OFFSET(0x1000);
 	hitfb_fix.mmio_len = 0x1000;
-	hitfb_fix.smem_start = CONFIG_HD64461_IOBASE + 0x02000000;
+	hitfb_fix.smem_start = HD64461_IO_OFFSET(0x02000000);
 	hitfb_fix.smem_len = 512 * 1024;
 
 	lcdclor = fb_readw(HD64461_LCDCLOR);
-- 
cgit v0.10.2


From d3375ea7213d09284ee3683d8bc075b9d3786a03 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Wed, 20 May 2009 08:42:06 +0200
Subject: cdrom: beyond ARRAY_SIZE of viocd_diskinfo

Do not go beyond ARRAY_SIZE of viocd_diskinfo

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 1392935..9b1624e 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -587,7 +587,7 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	struct device_node *node = vdev->dev.archdata.of_node;
 
 	deviceno = vdev->unit_address;
-	if (deviceno > VIOCD_MAX_CD)
+	if (deviceno >= VIOCD_MAX_CD)
 		return -ENODEV;
 	if (!node)
 		return -ENODEV;
-- 
cgit v0.10.2


From 0a7ae2ff0d29bb3b327edff4c8ab67b3834fa811 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 20 May 2009 08:54:31 +0200
Subject: block: change the tag sync vs async restriction logic

Make them fully share the tag space, but disallow async requests using
the last any two slots.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 0ab81a0..0d98054 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -218,7 +218,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	} else
 		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
+	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
 		rq = NULL;
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
diff --git a/block/blk-core.c b/block/blk-core.c
index 4906507..1c74840 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1815,7 +1815,7 @@ void blk_dequeue_request(struct request *rq)
 	 * the driver side.
 	 */
 	if (blk_account_rq(rq))
-		q->in_flight++;
+		q->in_flight[rq_is_sync(rq)]++;
 }
 
 /**
diff --git a/block/blk-tag.c b/block/blk-tag.c
index c260f7c..2e5cfeb 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
-	unsigned max_depth, offset;
+	unsigned max_depth;
 	int tag;
 
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -355,13 +355,16 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	 * to starve sync IO on behalf of flooding async IO.
 	 */
 	max_depth = bqt->max_depth;
-	if (rq_is_sync(rq))
-		offset = 0;
-	else
-		offset = max_depth >> 2;
+	if (!rq_is_sync(rq) && max_depth > 1) {
+		max_depth -= 2;
+		if (!max_depth)
+			max_depth = 1;
+		if (q->in_flight[0] > max_depth)
+			return 1;
+	}
 
 	do {
-		tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
+		tag = find_first_zero_bit(bqt->tag_map, max_depth);
 		if (tag >= max_depth)
 			return 1;
 
diff --git a/block/elevator.c b/block/elevator.c
index 9189200..ebee948 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -546,7 +546,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 	 * in_flight count again
 	 */
 	if (blk_account_rq(rq)) {
-		q->in_flight--;
+		q->in_flight[rq_is_sync(rq)]--;
 		if (blk_sorted_rq(rq))
 			elv_deactivate_rq(q, rq);
 	}
@@ -685,7 +685,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 
 	if (unplug_it && blk_queue_plugged(q)) {
 		int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
-			- q->in_flight;
+				- queue_in_flight(q);
 
 		if (nrq >= q->unplug_thresh)
 			__generic_unplug_device(q);
@@ -823,7 +823,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	 * request is released from the driver, io must be done
 	 */
 	if (blk_account_rq(rq)) {
-		q->in_flight--;
+		q->in_flight[rq_is_sync(rq)]--;
 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
@@ -838,7 +838,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 		if (!list_empty(&q->queue_head))
 			next = list_entry_rq(q->queue_head.next);
 
-		if (!q->in_flight &&
+		if (!queue_in_flight(q) &&
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
 		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 564445b..a967dd7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -404,7 +404,7 @@ struct request_queue
 	struct list_head	tag_busy_list;
 
 	unsigned int		nr_sorted;
-	unsigned int		in_flight;
+	unsigned int		in_flight[2];
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
@@ -511,6 +511,11 @@ static inline void queue_flag_clear_unlocked(unsigned int flag,
 	__clear_bit(flag, &q->queue_flags);
 }
 
+static inline int queue_in_flight(struct request_queue *q)
+{
+	return q->in_flight[0] + q->in_flight[1];
+}
+
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
-- 
cgit v0.10.2


From c8b15a706d921baed3195407e4f55270112bb3c6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 20 May 2009 09:18:50 +0200
Subject: futex: cleanup error exit

Reuse the put_key_ref(key2) call in the exit path.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 476603a..381125a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2185,10 +2185,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 
 	/* Prepare to wait on uaddr. */
 	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
-	if (ret) {
-		put_futex_key(fshared, &key2);
-		goto out;
-	}
+	if (ret)
+		goto out_key2;
 
 	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
 	futex_wait_queue_me(hb, &q, to);
@@ -2282,6 +2280,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 
 out_put_keys:
 	put_futex_key(fshared, &q.key);
+out_key2:
 	put_futex_key(fshared, &key2);
 
 out:
-- 
cgit v0.10.2


From 1c840c14906d4ddf66c1f4f5daea059aad951c82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 20 May 2009 09:22:40 +0200
Subject: futex: fix restart for early wakeup in futex_wait_requeue_pi()

The futex_wait_requeue_pi op should restart unconditionally like
futex_lock_pi. The user of that function e.g. pthread_cond_wait can
not be interrupted so we do not care about the SA_RESTART flag of the
signal. Clean up the FIXMEs.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 381125a..2aa216e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2060,7 +2060,7 @@ pi_faulted:
  *
  * Returns
  *  0 - no early wakeup detected
- * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?)
+ * <0 - -ETIMEDOUT or -ERESTARTNOINTR
  */
 static inline
 int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
@@ -2087,15 +2087,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
 
 		if (timeout && !timeout->task)
 			ret = -ETIMEDOUT;
-		else {
-			/*
-			 * We expect signal_pending(current), but another
-			 * thread may have handled it for us already.
-			 */
-			/* FIXME: ERESTARTSYS or ERESTARTNOINTR?  Do we care if
-			 * the user specified SA_RESTART or not? */
-			ret = -ERESTARTSYS;
-		}
+		else
+			ret = -ERESTARTNOINTR;
 	}
 	return ret;
 }
-- 
cgit v0.10.2


From 2070887fdeacd9c13f3e805e3f0086c9f22a4d93 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2009 23:04:59 +0200
Subject: futex: fix restart in wait_requeue_pi

If the waiter has been requeued to the outer PI futex and is
interrupted by a signal and the thread handles the signal then
ERESTART_RESTARTBLOCK is changed to EINTR and the restart block is
discarded. That way we return an unexcpected EINTR to user space
instead of ending up in futex_lock_pi_restart.

But we do not need to restart the syscall because we know that the
condition has changed since we have been requeued. If we would simply
restart the syscall then we would drop out via the comparison of the
user space value with EWOULDBLOCK.

The user space side needs to handle EWOULDBLOCK anyway as the
enqueueing on the inner futex can race with a requeue/wake. So we can
simply return EWOULDBLOCK to user space which also signals that we did
not take the outer futex and let user space handle it in the same way
it has to handle the requeue/wake race.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/futex.c b/kernel/futex.c
index 2aa216e..80b5ce7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1507,7 +1507,6 @@ handle_fault:
 #define FLAGS_HAS_TIMEOUT	0x04
 
 static long futex_wait_restart(struct restart_block *restart);
-static long futex_lock_pi_restart(struct restart_block *restart);
 
 /**
  * fixup_owner() - Post lock pi_state and corner case management
@@ -1930,21 +1929,6 @@ uaddr_faulted:
 	goto retry;
 }
 
-static long futex_lock_pi_restart(struct restart_block *restart)
-{
-	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-	ktime_t t, *tp = NULL;
-	int fshared = restart->futex.flags & FLAGS_SHARED;
-
-	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-		t.tv64 = restart->futex.time;
-		tp = &t;
-	}
-	restart->fn = do_no_restart_syscall;
-
-	return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0);
-}
-
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
  * This is the in-kernel slowpath: we look up the PI state (if any),
@@ -2141,12 +2125,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct rt_mutex_waiter rt_waiter;
 	struct rt_mutex *pi_mutex = NULL;
-	struct restart_block *restart;
 	struct futex_hash_bucket *hb;
 	union futex_key key2;
 	struct futex_q q;
 	int res, ret;
-	u32 uval;
 
 	if (!bitset)
 		return -EINVAL;
@@ -2245,30 +2227,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 		if (rt_mutex_owner(pi_mutex) == current)
 			rt_mutex_unlock(pi_mutex);
 	} else if (ret == -EINTR) {
-		ret = -EFAULT;
-		if (get_user(uval, uaddr2))
-			goto out_put_keys;
-
 		/*
-		 * We've already been requeued, so restart by calling
-		 * futex_lock_pi() directly, rather then returning to this
-		 * function.
+		 * We've already been requeued, but we have no way to
+		 * restart by calling futex_lock_pi() directly. We
+		 * could restart the syscall, but that will look at
+		 * the user space value and return right away. So we
+		 * drop back with EWOULDBLOCK to tell user space that
+		 * "val" has been changed. That's the same what the
+		 * restart of the syscall would do in
+		 * futex_wait_setup().
 		 */
-		ret = -ERESTART_RESTARTBLOCK;
-		restart = &current_thread_info()->restart_block;
-		restart->fn = futex_lock_pi_restart;
-		restart->futex.uaddr = (u32 *)uaddr2;
-		restart->futex.val = uval;
-		restart->futex.flags = 0;
-		if (abs_time) {
-			restart->futex.flags |= FLAGS_HAS_TIMEOUT;
-			restart->futex.time = abs_time->tv64;
-		}
-
-		if (fshared)
-			restart->futex.flags |= FLAGS_SHARED;
-		if (clockrt)
-			restart->futex.flags |= FLAGS_CLOCKRT;
+		ret = -EWOULDBLOCK;
 	}
 
 out_put_keys:
-- 
cgit v0.10.2


From 4005d39a5f5549f1f6afe88abceed78b2ab225b6 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Mon, 18 May 2009 16:02:04 +0300
Subject: ASoC: TWL4030: Change DAPM routings and controls for DACs and PGAs

Restructuring the twl4030 codec's DAPM routing to be able to handle the power
sequences correctly.

The twl4030 codec internal implementation have this order:
DAC -> Analog PGA -> Mixer/Mux

While the ASoC framework expects the following order:
DAC -> Mixer -> Analog PGA

This patch moves the Analog PGA handling from SND_SOC_DAPM_PGA to _MIXER and
adds two levels of mixer to handle the digital and analog loopback
functionality.

Now the analog loopback does not powers on any of the DACs.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Tested-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Tested-by: Jarkko Nikula <jhnikula@gmail.com>
Tested-by: Misael Lopez Cruz  <x0052729@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 731534c..99fe44f 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1051,18 +1051,6 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_DAC("DAC Voice", "Voice Playback",
 			SND_SOC_NOPM, 0, 0),
 
-	/* Analog PGAs */
-	SND_SOC_DAPM_PGA("ARXR1_APGA", TWL4030_REG_ARXR1_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("ARXL1_APGA", TWL4030_REG_ARXL1_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("ARXR2_APGA", TWL4030_REG_ARXR2_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("ARXL2_APGA", TWL4030_REG_ARXL2_APGA_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_PGA("VDL_APGA", TWL4030_REG_VDL_APGA_CTL,
-			0, 0, NULL, 0),
-
 	/* Analog bypasses */
 	SND_SOC_DAPM_SWITCH_E("Right1 Analog Loopback", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_abypassr1_control, bypass_event,
@@ -1091,16 +1079,29 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 			&twl4030_dapm_dbypassv_control, bypass_event,
 			SND_SOC_DAPM_POST_REG),
 
-	SND_SOC_DAPM_MIXER("Analog R1 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			0, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog L1 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			1, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog R2 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			2, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			3, 0, NULL, 0),
-	SND_SOC_DAPM_MIXER("Analog Voice Playback Mixer", TWL4030_REG_AVDAC_CTL,
-			4, 0, NULL, 0),
+	/* Digital mixers, power control for the physical DACs */
+	SND_SOC_DAPM_MIXER("Digital R1 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital L1 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 1, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital R2 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 2, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital L2 Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 3, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Digital Voice Playback Mixer",
+			TWL4030_REG_AVDAC_CTL, 4, 0, NULL, 0),
+
+	/* Analog mixers, power control for the physical PGAs */
+	SND_SOC_DAPM_MIXER("Analog R1 Playback Mixer",
+			TWL4030_REG_ARXR1_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog L1 Playback Mixer",
+			TWL4030_REG_ARXL1_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog R2 Playback Mixer",
+			TWL4030_REG_ARXR2_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer",
+			TWL4030_REG_ARXL2_APGA_CTL, 0, 0, NULL, 0),
+	SND_SOC_DAPM_MIXER("Analog Voice Playback Mixer",
+			TWL4030_REG_VDL_APGA_CTL, 0, 0, NULL, 0),
 
 	/* Output MIXER controls */
 	/* Earpiece */
@@ -1194,60 +1195,60 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 };
 
 static const struct snd_soc_dapm_route intercon[] = {
-	{"Analog L1 Playback Mixer", NULL, "DAC Left1"},
-	{"Analog R1 Playback Mixer", NULL, "DAC Right1"},
-	{"Analog L2 Playback Mixer", NULL, "DAC Left2"},
-	{"Analog R2 Playback Mixer", NULL, "DAC Right2"},
-	{"Analog Voice Playback Mixer", NULL, "DAC Voice"},
-
-	{"ARXL1_APGA", NULL, "Analog L1 Playback Mixer"},
-	{"ARXR1_APGA", NULL, "Analog R1 Playback Mixer"},
-	{"ARXL2_APGA", NULL, "Analog L2 Playback Mixer"},
-	{"ARXR2_APGA", NULL, "Analog R2 Playback Mixer"},
-	{"VDL_APGA", NULL, "Analog Voice Playback Mixer"},
+	{"Digital L1 Playback Mixer", NULL, "DAC Left1"},
+	{"Digital R1 Playback Mixer", NULL, "DAC Right1"},
+	{"Digital L2 Playback Mixer", NULL, "DAC Left2"},
+	{"Digital R2 Playback Mixer", NULL, "DAC Right2"},
+	{"Digital Voice Playback Mixer", NULL, "DAC Voice"},
+
+	{"Analog L1 Playback Mixer", NULL, "Digital L1 Playback Mixer"},
+	{"Analog R1 Playback Mixer", NULL, "Digital R1 Playback Mixer"},
+	{"Analog L2 Playback Mixer", NULL, "Digital L2 Playback Mixer"},
+	{"Analog R2 Playback Mixer", NULL, "Digital R2 Playback Mixer"},
+	{"Analog Voice Playback Mixer", NULL, "Digital Voice Playback Mixer"},
 
 	/* Internal playback routings */
 	/* Earpiece */
-	{"Earpiece Mixer", "Voice", "VDL_APGA"},
-	{"Earpiece Mixer", "AudioL1", "ARXL1_APGA"},
-	{"Earpiece Mixer", "AudioL2", "ARXL2_APGA"},
-	{"Earpiece Mixer", "AudioR1", "ARXR1_APGA"},
+	{"Earpiece Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"Earpiece Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"Earpiece Mixer", "AudioL2", "Analog L2 Playback Mixer"},
+	{"Earpiece Mixer", "AudioR1", "Analog R1 Playback Mixer"},
 	/* PreDrivL */
-	{"PredriveL Mixer", "Voice", "VDL_APGA"},
-	{"PredriveL Mixer", "AudioL1", "ARXL1_APGA"},
-	{"PredriveL Mixer", "AudioL2", "ARXL2_APGA"},
-	{"PredriveL Mixer", "AudioR2", "ARXR2_APGA"},
+	{"PredriveL Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"PredriveL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"PredriveL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
+	{"PredriveL Mixer", "AudioR2", "Analog R2 Playback Mixer"},
 	/* PreDrivR */
-	{"PredriveR Mixer", "Voice", "VDL_APGA"},
-	{"PredriveR Mixer", "AudioR1", "ARXR1_APGA"},
-	{"PredriveR Mixer", "AudioR2", "ARXR2_APGA"},
-	{"PredriveR Mixer", "AudioL2", "ARXL2_APGA"},
+	{"PredriveR Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"PredriveR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
+	{"PredriveR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
+	{"PredriveR Mixer", "AudioL2", "Analog L2 Playback Mixer"},
 	/* HeadsetL */
-	{"HeadsetL Mixer", "Voice", "VDL_APGA"},
-	{"HeadsetL Mixer", "AudioL1", "ARXL1_APGA"},
-	{"HeadsetL Mixer", "AudioL2", "ARXL2_APGA"},
+	{"HeadsetL Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"HeadsetL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"HeadsetL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
 	/* HeadsetR */
-	{"HeadsetR Mixer", "Voice", "VDL_APGA"},
-	{"HeadsetR Mixer", "AudioR1", "ARXR1_APGA"},
-	{"HeadsetR Mixer", "AudioR2", "ARXR2_APGA"},
+	{"HeadsetR Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"HeadsetR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
+	{"HeadsetR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
 	/* CarkitL */
-	{"CarkitL Mixer", "Voice", "VDL_APGA"},
-	{"CarkitL Mixer", "AudioL1", "ARXL1_APGA"},
-	{"CarkitL Mixer", "AudioL2", "ARXL2_APGA"},
+	{"CarkitL Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"CarkitL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
+	{"CarkitL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
 	/* CarkitR */
-	{"CarkitR Mixer", "Voice", "VDL_APGA"},
-	{"CarkitR Mixer", "AudioR1", "ARXR1_APGA"},
-	{"CarkitR Mixer", "AudioR2", "ARXR2_APGA"},
+	{"CarkitR Mixer", "Voice", "Analog Voice Playback Mixer"},
+	{"CarkitR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
+	{"CarkitR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
 	/* HandsfreeL */
-	{"HandsfreeL Mux", "Voice", "VDL_APGA"},
-	{"HandsfreeL Mux", "AudioL1", "ARXL1_APGA"},
-	{"HandsfreeL Mux", "AudioL2", "ARXL2_APGA"},
-	{"HandsfreeL Mux", "AudioR2", "ARXR2_APGA"},
+	{"HandsfreeL Mux", "Voice", "Analog Voice Playback Mixer"},
+	{"HandsfreeL Mux", "AudioL1", "Analog L1 Playback Mixer"},
+	{"HandsfreeL Mux", "AudioL2", "Analog L2 Playback Mixer"},
+	{"HandsfreeL Mux", "AudioR2", "Analog R2 Playback Mixer"},
 	/* HandsfreeR */
-	{"HandsfreeR Mux", "Voice", "VDL_APGA"},
-	{"HandsfreeR Mux", "AudioR1", "ARXR1_APGA"},
-	{"HandsfreeR Mux", "AudioR2", "ARXR2_APGA"},
-	{"HandsfreeR Mux", "AudioL2", "ARXL2_APGA"},
+	{"HandsfreeR Mux", "Voice", "Analog Voice Playback Mixer"},
+	{"HandsfreeR Mux", "AudioR1", "Analog R1 Playback Mixer"},
+	{"HandsfreeR Mux", "AudioR2", "Analog R2 Playback Mixer"},
+	{"HandsfreeR Mux", "AudioL2", "Analog L2 Playback Mixer"},
 	/* Vibra */
 	{"Vibra Mux", "AudioL1", "DAC Left1"},
 	{"Vibra Mux", "AudioR1", "DAC Right1"},
@@ -1255,8 +1256,8 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"Vibra Mux", "AudioR2", "DAC Right2"},
 
 	/* outputs */
-	{"OUTL", NULL, "ARXL2_APGA"},
-	{"OUTR", NULL, "ARXR2_APGA"},
+	{"OUTL", NULL, "Analog L2 Playback Mixer"},
+	{"OUTR", NULL, "Analog R2 Playback Mixer"},
 	{"EARPIECE", NULL, "Earpiece Mixer"},
 	{"PREDRIVEL", NULL, "PredriveL Mixer"},
 	{"PREDRIVER", NULL, "PredriveR Mixer"},
@@ -1320,9 +1321,9 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"Left Digital Loopback", "Volume", "TX1 Capture Route"},
 	{"Voice Digital Loopback", "Volume", "TX2 Capture Route"},
 
-	{"Analog R2 Playback Mixer", NULL, "Right Digital Loopback"},
-	{"Analog L2 Playback Mixer", NULL, "Left Digital Loopback"},
-	{"Analog Voice Playback Mixer", NULL, "Voice Digital Loopback"},
+	{"Digital R2 Playback Mixer", NULL, "Right Digital Loopback"},
+	{"Digital L2 Playback Mixer", NULL, "Left Digital Loopback"},
+	{"Digital Voice Playback Mixer", NULL, "Voice Digital Loopback"},
 
 };
 
-- 
cgit v0.10.2


From 6943c92e87c4aa2a6d7a1f4dbd79cf4a0b5fd67b Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Mon, 18 May 2009 16:02:05 +0300
Subject: ASoC: TWL4030: Move the Headset pop-attenuation code to PGA event

This patch adds SND_SOC_DAPM_PGA_E to the headset path, which handles
the headset ramp up and down sequences needed for the pop noise
removal.

With this patch the order of the internal components in the twl4030
codec is turned on and off in a correct order.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Tested-by: Anuj Aggarwal <anuj.aggarwal@ti.com>
Tested-by: Jarkko Nikula <jhnikula@gmail.com>
Tested-by: Misael Lopez Cruz  <x0052729@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 99fe44f..f554672 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -130,6 +130,12 @@ struct twl4030_priv {
 	unsigned int rate;
 	unsigned int sample_bits;
 	unsigned int channels;
+
+	unsigned int sysclk;
+
+	/* Headset output state handling */
+	unsigned int hsl_enabled;
+	unsigned int hsr_enabled;
 };
 
 /*
@@ -564,39 +570,85 @@ static int handsfree_event(struct snd_soc_dapm_widget *w,
 	return 0;
 }
 
-static int headsetl_event(struct snd_soc_dapm_widget *w,
-		struct snd_kcontrol *kcontrol, int event)
+static void headset_ramp(struct snd_soc_codec *codec, int ramp)
 {
 	unsigned char hs_gain, hs_pop;
+	struct twl4030_priv *twl4030 = codec->private_data;
+	/* Base values for ramp delay calculation: 2^19 - 2^26 */
+	unsigned int ramp_base[] = {524288, 1048576, 2097152, 4194304,
+				    8388608, 16777216, 33554432, 67108864};
 
-	/* Save the current volume */
-	hs_gain = twl4030_read_reg_cache(w->codec, TWL4030_REG_HS_GAIN_SET);
-	hs_pop = twl4030_read_reg_cache(w->codec, TWL4030_REG_HS_POPN_SET);
+	hs_gain = twl4030_read_reg_cache(codec, TWL4030_REG_HS_GAIN_SET);
+	hs_pop = twl4030_read_reg_cache(codec, TWL4030_REG_HS_POPN_SET);
 
-	switch (event) {
-	case SND_SOC_DAPM_POST_PMU:
-		/* Do the anti-pop/bias ramp enable according to the TRM */
+	if (ramp) {
+		/* Headset ramp-up according to the TRM */
 		hs_pop |= TWL4030_VMID_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
-		/* Is this needed? Can we just use whatever gain here? */
-		twl4030_write(w->codec, TWL4030_REG_HS_GAIN_SET,
-				(hs_gain & (~0x0f)) | 0x0a);
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		twl4030_write(codec, TWL4030_REG_HS_GAIN_SET, hs_gain);
 		hs_pop |= TWL4030_RAMP_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
-
-		/* Restore the original volume */
-		twl4030_write(w->codec, TWL4030_REG_HS_GAIN_SET, hs_gain);
-		break;
-	case SND_SOC_DAPM_POST_PMD:
-		/* Do the anti-pop/bias ramp disable according to the TRM */
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+	} else {
+		/* Headset ramp-down _not_ according to
+		 * the TRM, but in a way that it is working */
 		hs_pop &= ~TWL4030_RAMP_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		/* Wait ramp delay time + 1, so the VMID can settle */
+		mdelay((ramp_base[(hs_pop & TWL4030_RAMP_DELAY) >> 2] /
+			twl4030->sysclk) + 1);
 		/* Bypass the reg_cache to mute the headset */
 		twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
 					hs_gain & (~0x0f),
 					TWL4030_REG_HS_GAIN_SET);
+
 		hs_pop &= ~TWL4030_VMID_EN;
-		twl4030_write(w->codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+		twl4030_write(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+	}
+}
+
+static int headsetlpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	struct twl4030_priv *twl4030 = w->codec->private_data;
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		/* Do the ramp-up only once */
+		if (!twl4030->hsr_enabled)
+			headset_ramp(w->codec, 1);
+
+		twl4030->hsl_enabled = 1;
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* Do the ramp-down only if both headsetL/R is disabled */
+		if (!twl4030->hsr_enabled)
+			headset_ramp(w->codec, 0);
+
+		twl4030->hsl_enabled = 0;
+		break;
+	}
+	return 0;
+}
+
+static int headsetrpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	struct twl4030_priv *twl4030 = w->codec->private_data;
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		/* Do the ramp-up only once */
+		if (!twl4030->hsl_enabled)
+			headset_ramp(w->codec, 1);
+
+		twl4030->hsr_enabled = 1;
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* Do the ramp-down only if both headsetL/R is disabled */
+		if (!twl4030->hsl_enabled)
+			headset_ramp(w->codec, 0);
+
+		twl4030->hsr_enabled = 0;
 		break;
 	}
 	return 0;
@@ -1116,13 +1168,18 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 			&twl4030_dapm_predriver_controls[0],
 			ARRAY_SIZE(twl4030_dapm_predriver_controls)),
 	/* HeadsetL/R */
-	SND_SOC_DAPM_MIXER_E("HeadsetL Mixer", SND_SOC_NOPM, 0, 0,
+	SND_SOC_DAPM_MIXER("HeadsetL Mixer", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_hsol_controls[0],
-			ARRAY_SIZE(twl4030_dapm_hsol_controls), headsetl_event,
+			ARRAY_SIZE(twl4030_dapm_hsol_controls)),
+	SND_SOC_DAPM_PGA_E("HeadsetL PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, headsetlpga_event,
 			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
 	SND_SOC_DAPM_MIXER("HeadsetR Mixer", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_hsor_controls[0],
 			ARRAY_SIZE(twl4030_dapm_hsor_controls)),
+	SND_SOC_DAPM_PGA_E("HeadsetR PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, headsetrpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
 	/* CarkitL/R */
 	SND_SOC_DAPM_MIXER("CarkitL Mixer", SND_SOC_NOPM, 0, 0,
 			&twl4030_dapm_carkitl_controls[0],
@@ -1227,10 +1284,12 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"HeadsetL Mixer", "Voice", "Analog Voice Playback Mixer"},
 	{"HeadsetL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
 	{"HeadsetL Mixer", "AudioL2", "Analog L2 Playback Mixer"},
+	{"HeadsetL PGA", NULL, "HeadsetL Mixer"},
 	/* HeadsetR */
 	{"HeadsetR Mixer", "Voice", "Analog Voice Playback Mixer"},
 	{"HeadsetR Mixer", "AudioR1", "Analog R1 Playback Mixer"},
 	{"HeadsetR Mixer", "AudioR2", "Analog R2 Playback Mixer"},
+	{"HeadsetR PGA", NULL, "HeadsetR Mixer"},
 	/* CarkitL */
 	{"CarkitL Mixer", "Voice", "Analog Voice Playback Mixer"},
 	{"CarkitL Mixer", "AudioL1", "Analog L1 Playback Mixer"},
@@ -1261,8 +1320,8 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"EARPIECE", NULL, "Earpiece Mixer"},
 	{"PREDRIVEL", NULL, "PredriveL Mixer"},
 	{"PREDRIVER", NULL, "PredriveR Mixer"},
-	{"HSOL", NULL, "HeadsetL Mixer"},
-	{"HSOR", NULL, "HeadsetR Mixer"},
+	{"HSOL", NULL, "HeadsetL PGA"},
+	{"HSOR", NULL, "HeadsetR PGA"},
 	{"CARKITL", NULL, "CarkitL Mixer"},
 	{"CARKITR", NULL, "CarkitR Mixer"},
 	{"HFL", NULL, "HandsfreeL Mux"},
@@ -1601,17 +1660,21 @@ static int twl4030_set_dai_sysclk(struct snd_soc_dai *codec_dai,
 		int clk_id, unsigned int freq, int dir)
 {
 	struct snd_soc_codec *codec = codec_dai->codec;
+	struct twl4030_priv *twl4030 = codec->private_data;
 	u8 infreq;
 
 	switch (freq) {
 	case 19200000:
 		infreq = TWL4030_APLL_INFREQ_19200KHZ;
+		twl4030->sysclk = 19200;
 		break;
 	case 26000000:
 		infreq = TWL4030_APLL_INFREQ_26000KHZ;
+		twl4030->sysclk = 26000;
 		break;
 	case 38400000:
 		infreq = TWL4030_APLL_INFREQ_38400KHZ;
+		twl4030->sysclk = 38400;
 		break;
 	default:
 		printk(KERN_ERR "TWL4030 set sysclk: unknown rate %d\n",
@@ -2000,6 +2063,9 @@ static int twl4030_probe(struct platform_device *pdev)
 		kfree(codec);
 		return -ENOMEM;
 	}
+	/* Set default sysclk (used by the headsetl/rpga_event callback for
+	 * pop-attenuation) */
+	twl4030->sysclk = 26000;
 
 	codec->private_data = twl4030;
 	socdev->card->codec = codec;
-- 
cgit v0.10.2


From 09010978345e8883003bf411bb99753710eb5a3a Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 20 May 2009 10:48:47 +0100
Subject: GFS2: Improve resource group error handling

This patch improves the error handling in the case where we
discover that the summary information in the resource group
doesn't match the bitmap information while in the process of
allocating blocks. Originally this resulted in a kernel bug,
but this patch changes that so that we return -EIO and print
some messages explaining what went wrong, and how to fix it.

We also remember locally not to try and allocate from the
same rgrp again, so that a subsequent allocation in a
different rgrp should succeed.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3a5d3f8..253e1a3 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -136,7 +136,9 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 		   and write it out to disk */
 
 		unsigned int n = 1;
-		block = gfs2_alloc_block(ip, &n);
+		error = gfs2_alloc_block(ip, &block, &n);
+		if (error)
+			goto out_brelse;
 		if (isdir) {
 			gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 			error = gfs2_dir_get_new_buffer(ip, block, &bh);
@@ -476,8 +478,11 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 	blks = dblks + iblks;
 	i = sheight;
 	do {
+		int error;
 		n = blks - alloced;
-		bn = gfs2_alloc_block(ip, &n);
+		error = gfs2_alloc_block(ip, &bn, &n);
+		if (error)
+			return error;
 		alloced += n;
 		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 			gfs2_trans_add_unrevoke(sdp, bn, n);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index aef4d0c..297d7e5 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -803,13 +803,20 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned int n = 1;
-	u64 bn = gfs2_alloc_block(ip, &n);
-	struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
+	u64 bn;
+	int error;
+	struct buffer_head *bh;
 	struct gfs2_leaf *leaf;
 	struct gfs2_dirent *dent;
 	struct qstr name = { .name = "", .len = 0, .hash = 0 };
+
+	error = gfs2_alloc_block(ip, &bn, &n);
+	if (error)
+		return NULL;
+	bh = gfs2_meta_new(ip->i_gl, bn);
 	if (!bh)
 		return NULL;
+
 	gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 899763a..07ea952 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -582,8 +582,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
 	struct gfs2_ea_header *ea;
 	unsigned int n = 1;
 	u64 block;
+	int error;
 
-	block = gfs2_alloc_block(ip, &n);
+	error = gfs2_alloc_block(ip, &block, &n);
+	if (error)
+		return error;
 	gfs2_trans_add_unrevoke(sdp, block, 1);
 	*bhp = gfs2_meta_new(ip->i_gl, block);
 	gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
@@ -617,6 +620,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 		    struct gfs2_ea_request *er)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	int error;
 
 	ea->ea_data_len = cpu_to_be32(er->er_data_len);
 	ea->ea_name_len = er->er_name_len;
@@ -642,7 +646,9 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 			int mh_size = sizeof(struct gfs2_meta_header);
 			unsigned int n = 1;
 
-			block = gfs2_alloc_block(ip, &n);
+			error = gfs2_alloc_block(ip, &block, &n);
+			if (error)
+				return error;
 			gfs2_trans_add_unrevoke(sdp, block, 1);
 			bh = gfs2_meta_new(ip->i_gl, block);
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
@@ -963,7 +969,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	} else {
 		u64 blk;
 		unsigned int n = 1;
-		blk = gfs2_alloc_block(ip, &n);
+		error = gfs2_alloc_block(ip, &blk, &n);
+		if (error)
+			return error;
 		gfs2_trans_add_unrevoke(sdp, blk, 1);
 		indbh = gfs2_meta_new(ip->i_gl, blk);
 		gfs2_trans_add_bh(ip->i_gl, indbh, 1);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 70f87f4..d5e4ab1 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -310,24 +310,6 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
 }
 
 /**
- * rgrp_go_dump - print out an rgrp
- * @seq: The iterator
- * @gl: The glock in question
- *
- */
-
-static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
-{
-	const struct gfs2_rgrpd *rgd = gl->gl_object;
-	if (rgd == NULL)
-		return 0;
-	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
-		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
-	return 0;
-}
-
-/**
  * trans_go_sync - promote/demote the transaction glock
  * @gl: the glock
  * @state: the requested state
@@ -410,7 +392,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
 	.go_demote_ok = rgrp_go_demote_ok,
 	.go_lock = rgrp_go_lock,
 	.go_unlock = rgrp_go_unlock,
-	.go_dump = rgrp_go_dump,
+	.go_dump = gfs2_rgrp_dump,
 	.go_type = LM_TYPE_RGRP,
 	.go_min_hold_time = HZ / 5,
 };
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0060e95..de50d86 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -92,9 +92,10 @@ struct gfs2_rgrpd {
 	unsigned int rd_bh_count;
 	u32 rd_last_alloc;
 	unsigned char rd_flags;
-#define GFS2_RDF_CHECK        0x01      /* Need to check for unlinked inodes */
-#define GFS2_RDF_NOALLOC      0x02      /* rg prohibits allocation */
-#define GFS2_RDF_UPTODATE     0x04      /* rg is up to date */
+#define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
+#define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
+#define GFS2_RDF_ERROR		0x40000000 /* error in rg */
+#define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
 };
 
 enum gfs2_state_bits {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5650382..fbacf09 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -701,10 +701,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 	u32 rg_flags;
 
 	rg_flags = be32_to_cpu(str->rg_flags);
-	if (rg_flags & GFS2_RGF_NOALLOC)
-		rgd->rd_flags |= GFS2_RDF_NOALLOC;
-	else
-		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
+	rg_flags &= ~GFS2_RDF_MASK;
 	rgd->rd_free = be32_to_cpu(str->rg_free);
 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
@@ -713,11 +710,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
-	u32 rg_flags = 0;
 
-	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
-		rg_flags |= GFS2_RGF_NOALLOC;
-	str->rg_flags = cpu_to_be32(rg_flags);
+	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
 	str->rg_free = cpu_to_be32(rgd->rd_free);
 	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
 	str->__pad = cpu_to_be32(0);
@@ -942,7 +936,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	int ret = 0;
 
-	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
+	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
 		return 0;
 
 	spin_lock(&sdp->sd_rindex_spin);
@@ -1435,13 +1429,33 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 }
 
 /**
- * gfs2_alloc_block - Allocate a block
+ * gfs2_rgrp_dump - print out an rgrp
+ * @seq: The iterator
+ * @gl: The glock in question
+ *
+ */
+
+int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+{
+	const struct gfs2_rgrpd *rgd = gl->gl_object;
+	if (rgd == NULL)
+		return 0;
+	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
+		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+	return 0;
+}
+
+/**
+ * gfs2_alloc_block - Allocate one or more blocks
  * @ip: the inode to allocate the block for
+ * @bn: Used to return the starting block number
+ * @n: requested number of blocks/extent length (value/result)
  *
- * Returns: the allocated block
+ * Returns: 0 or error
  */
 
-u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
+int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
@@ -1457,7 +1471,10 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
 		goal = rgd->rd_last_alloc;
 
 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
-	BUG_ON(blk == BFITNOENT);
+
+	/* Since all blocks are reserved in advance, this shouldn't happen */
+	if (blk == BFITNOENT)
+		goto rgrp_error;
 
 	rgd->rd_last_alloc = blk;
 	block = rgd->rd_data0 + blk;
@@ -1469,7 +1486,9 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
 		di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
 		brelse(dibh);
 	}
-	gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
+	if (rgd->rd_free < *n)
+		goto rgrp_error;
+
 	rgd->rd_free -= *n;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1484,7 +1503,16 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
 	rgd->rd_free_clone -= *n;
 	spin_unlock(&sdp->sd_rindex_spin);
 
-	return block;
+	*bn = block;
+	return 0;
+
+rgrp_error:
+	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
+	        (unsigned long long)rgd->rd_addr);
+	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
+	gfs2_rgrp_dump(NULL, rgd->rd_gl);
+	rgd->rd_flags |= GFS2_RDF_ERROR;
+	return -EIO;
 }
 
 /**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3181c7e..1e76ff0 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -14,22 +14,22 @@ struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
 
-void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
 
 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
 
-void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
-int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
+extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
+extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
 
-int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
-void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
-void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
+extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
 
-void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
+extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
 
-struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 {
 	BUG_ON(ip->i_alloc == NULL);
@@ -37,22 +37,22 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 	ip->i_alloc = NULL;
 }
 
-int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
-			 char *file, unsigned int line);
+extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file,
+				  unsigned int line);
 #define gfs2_inplace_reserve(ip) \
 gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
 
-void gfs2_inplace_release(struct gfs2_inode *ip);
+extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
-unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
+extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
 
-u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n);
-u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
+extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
+extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
 
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
-void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
-void gfs2_unlink_di(struct inode *inode);
+extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
+extern void gfs2_unlink_di(struct inode *inode);
 
 struct gfs2_rgrp_list {
 	unsigned int rl_rgrps;
@@ -61,10 +61,11 @@ struct gfs2_rgrp_list {
 	struct gfs2_holder *rl_ghs;
 };
 
-void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
-		    u64 block);
-void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
-void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
-u64 gfs2_ri_total(struct gfs2_sbd *sdp);
+extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
+			   u64 block);
+extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
+extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
+extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
 
 #endif /* __RGRP_DOT_H__ */
-- 
cgit v0.10.2


From d7b629a34fc4134a43c730b5f0197855dc4948d0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 May 2009 12:21:19 +0200
Subject: perf_counter: Solve the rotate_ctx vs inherit race differently

Instead of disabling RR scheduling of the counters, use a different list
that does not get rotated to iterate the counters on inheritance.

[ Impact: cleanup, optimization ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090520102553.237504544@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 13cb2fb..c8c1dfc2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -508,7 +508,6 @@ struct perf_counter_context {
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
-	int			rr_allowed;
 	struct task_struct	*task;
 
 	/*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4d8f973..64113e6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1120,8 +1120,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
-	if (ctx->rr_allowed)
-		rotate_ctx(ctx);
+	rotate_ctx(ctx);
 
 	perf_counter_cpu_sched_in(cpuctx, cpu);
 	perf_counter_task_sched_in(curr, cpu);
@@ -3109,7 +3108,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
 	INIT_LIST_HEAD(&ctx->event_list);
-	ctx->rr_allowed = 1;
 	ctx->task = task;
 }
 
@@ -3350,14 +3348,14 @@ void perf_counter_init_task(struct task_struct *child)
 	 */
 	mutex_lock(&parent_ctx->mutex);
 
-	parent_ctx->rr_allowed = 0;
-	barrier(); /* irqs */
-
 	/*
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
 	 */
-	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
+	list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
+		if (counter != counter->group_leader)
+			continue;
+
 		if (!counter->hw_event.inherit)
 			continue;
 
@@ -3366,9 +3364,6 @@ void perf_counter_init_task(struct task_struct *child)
 			break;
 	}
 
-	barrier(); /* irqs */
-	parent_ctx->rr_allowed = 1;
-
 	mutex_unlock(&parent_ctx->mutex);
 }
 
-- 
cgit v0.10.2


From 26b119bc811a73bac6ecf95bdf284bf31c7955f0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 May 2009 12:21:20 +0200
Subject: perf_counter: Log irq_period changes

For the dynamic irq_period code, log whenever we change the period so that
analyzing code can normalize the event flow.

[ Impact: add new feature to allow more precise profiling ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090520102553.298769743@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c8c1dfc2..f612941 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -258,6 +258,14 @@ enum perf_event_type {
 	PERF_EVENT_COMM			= 3,
 
 	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				irq_period;
+	 * };
+	 */
+	PERF_EVENT_PERIOD		= 4,
+
+	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
 	 *
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 64113e6..db02eb1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,7 +1046,9 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
-void perf_adjust_freq(struct perf_counter_context *ctx)
+static void perf_log_period(struct perf_counter *counter, u64 period);
+
+static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
 	u64 irq_period;
@@ -1072,6 +1074,8 @@ void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (!irq_period)
 			irq_period = 1;
 
+		perf_log_period(counter, irq_period);
+
 		counter->hw.irq_period = irq_period;
 		counter->hw.interrupts = 0;
 	}
@@ -2407,6 +2411,40 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 }
 
 /*
+ *
+ */
+
+static void perf_log_period(struct perf_counter *counter, u64 period)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+		u64				period;
+	} freq_event = {
+		.header = {
+			.type = PERF_EVENT_PERIOD,
+			.misc = 0,
+			.size = sizeof(freq_event),
+		},
+		.time = sched_clock(),
+		.period = period,
+	};
+
+	if (counter->hw.irq_period == period)
+		return;
+
+	ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, freq_event);
+	perf_output_end(&handle);
+}
+
+/*
  * Generic counter overflow handling.
  */
 
-- 
cgit v0.10.2


From b986d7ec0f8b7ea3cc7366d80a137fbe839df227 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 May 2009 12:21:21 +0200
Subject: perf_counter: Optimize disable of time based sw counters

Currently we call hrtimer_cancel() unconditionally on disable of time based
software counters. Avoid when possible.

[ Impact: micro-optimize the code ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090520102553.388185031@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index db02eb1..473ed2c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2716,7 +2716,8 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	hrtimer_cancel(&counter->hw.hrtimer);
+	if (counter->hw.irq_period)
+		hrtimer_cancel(&counter->hw.hrtimer);
 	cpu_clock_perf_counter_update(counter);
 }
 
@@ -2767,7 +2768,8 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	hrtimer_cancel(&counter->hw.hrtimer);
+	if (counter->hw.irq_period)
+		hrtimer_cancel(&counter->hw.hrtimer);
 	task_clock_perf_counter_update(counter, counter->ctx->time);
 
 }
-- 
cgit v0.10.2


From afedadf23a2c90f3ba0d963282cbe6a6be129494 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 May 2009 12:21:22 +0200
Subject: perf_counter: Optimize sched in/out of counters

Avoid a function call for !group counters by directly calling the counter
function.

[ Impact: micro-optimize the code ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090520102553.511933670@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 473ed2c..69d4de8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -826,8 +826,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 
 	perf_disable();
 	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->counter_list, list_entry)
-			group_sched_out(counter, cpuctx, ctx);
+		list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+			if (counter != counter->group_leader)
+				counter_sched_out(counter, cpuctx, ctx);
+			else
+				group_sched_out(counter, cpuctx, ctx);
+		}
 	}
 	perf_enable();
  out:
@@ -903,8 +907,12 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		if (group_can_go_on(counter, cpuctx, 1))
-			group_sched_in(counter, cpuctx, ctx, cpu);
+		if (counter != counter->group_leader)
+			counter_sched_in(counter, cpuctx, ctx, cpu);
+		else {
+			if (group_can_go_on(counter, cpuctx, 1))
+				group_sched_in(counter, cpuctx, ctx, cpu);
+		}
 
 		/*
 		 * If this pinned group hasn't been scheduled,
@@ -932,9 +940,14 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		if (group_can_go_on(counter, cpuctx, can_add_hw)) {
-			if (group_sched_in(counter, cpuctx, ctx, cpu))
+		if (counter != counter->group_leader) {
+			if (counter_sched_in(counter, cpuctx, ctx, cpu))
 				can_add_hw = 0;
+		} else {
+			if (group_can_go_on(counter, cpuctx, can_add_hw)) {
+				if (group_sched_in(counter, cpuctx, ctx, cpu))
+					can_add_hw = 0;
+			}
 		}
 	}
 	perf_enable();
-- 
cgit v0.10.2


From a6a9fb857b5599b3cefef5576967389c1c43eb4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Fri, 20 Feb 2009 01:01:33 +0100
Subject: IXP4xx: Add support for the second half of the 64 hardware queues.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/include/mach/qmgr.h b/arch/arm/mach-ixp4xx/include/mach/qmgr.h
index 0cbe6ce..32077e1 100644
--- a/arch/arm/mach-ixp4xx/include/mach/qmgr.h
+++ b/arch/arm/mach-ixp4xx/include/mach/qmgr.h
@@ -15,7 +15,7 @@
 #define DEBUG_QMGR	0
 
 #define HALF_QUEUES	32
-#define QUEUES		64	/* only 32 lower queues currently supported */
+#define QUEUES		64
 #define MAX_QUEUE_LENGTH 4	/* in dwords */
 
 #define QUEUE_STAT1_EMPTY		1 /* queue status bits */
@@ -110,48 +110,95 @@ static inline u32 qmgr_get_entry(unsigned int queue)
 	return val;
 }
 
-static inline int qmgr_get_stat1(unsigned int queue)
+static inline int __qmgr_get_stat1(unsigned int queue)
 {
 	extern struct qmgr_regs __iomem *qmgr_regs;
 	return (__raw_readl(&qmgr_regs->stat1[queue >> 3])
 		>> ((queue & 7) << 2)) & 0xF;
 }
 
-static inline int qmgr_get_stat2(unsigned int queue)
+static inline int __qmgr_get_stat2(unsigned int queue)
 {
 	extern struct qmgr_regs __iomem *qmgr_regs;
+	BUG_ON(queue >= HALF_QUEUES);
 	return (__raw_readl(&qmgr_regs->stat2[queue >> 4])
 		>> ((queue & 0xF) << 1)) & 0x3;
 }
 
+/**
+ * qmgr_stat_empty() - checks if a hardware queue is empty
+ * @queue:	queue number
+ *
+ * Returns non-zero value if the queue is empty.
+ */
 static inline int qmgr_stat_empty(unsigned int queue)
 {
-	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_EMPTY);
+	BUG_ON(queue >= HALF_QUEUES);
+	return __qmgr_get_stat1(queue) & QUEUE_STAT1_EMPTY;
 }
 
+/**
+ * qmgr_stat_empty() - checks if a hardware queue is nearly empty
+ * @queue:	queue number
+ *
+ * Returns non-zero value if the queue is nearly or completely empty.
+ */
 static inline int qmgr_stat_nearly_empty(unsigned int queue)
 {
-	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_EMPTY);
+	extern struct qmgr_regs __iomem *qmgr_regs;
+	if (queue >= HALF_QUEUES)
+		return (__raw_readl(&qmgr_regs->statne_h) >>
+			(queue - HALF_QUEUES)) & 0x01;
+	return __qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_EMPTY;
 }
 
+/**
+ * qmgr_stat_empty() - checks if a hardware queue is nearly full
+ * @queue:	queue number
+ *
+ * Returns non-zero value if the queue is nearly or completely full.
+ */
 static inline int qmgr_stat_nearly_full(unsigned int queue)
 {
-	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_FULL);
+	BUG_ON(queue >= HALF_QUEUES);
+	return __qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_FULL;
 }
 
+/**
+ * qmgr_stat_empty() - checks if a hardware queue is full
+ * @queue:	queue number
+ *
+ * Returns non-zero value if the queue is full.
+ */
 static inline int qmgr_stat_full(unsigned int queue)
 {
-	return !!(qmgr_get_stat1(queue) & QUEUE_STAT1_FULL);
+	extern struct qmgr_regs __iomem *qmgr_regs;
+	if (queue >= HALF_QUEUES)
+		return (__raw_readl(&qmgr_regs->statf_h) >>
+			(queue - HALF_QUEUES)) & 0x01;
+	return __qmgr_get_stat1(queue) & QUEUE_STAT1_FULL;
 }
 
+/**
+ * qmgr_stat_empty() - checks if a hardware queue experienced underflow
+ * @queue:	queue number
+ *
+ * Returns non-zero value if empty.
+ */
 static inline int qmgr_stat_underflow(unsigned int queue)
 {
-	return !!(qmgr_get_stat2(queue) & QUEUE_STAT2_UNDERFLOW);
+	return __qmgr_get_stat2(queue) & QUEUE_STAT2_UNDERFLOW;
 }
 
+/**
+ * qmgr_stat_empty() - checks if a hardware queue experienced overflow
+ * @queue:	queue number
+ *
+ * Returns non-zero value if empty.
+ */
 static inline int qmgr_stat_overflow(unsigned int queue)
 {
-	return !!(qmgr_get_stat2(queue) & QUEUE_STAT2_OVERFLOW);
+	return __qmgr_get_stat2(queue) & QUEUE_STAT2_OVERFLOW;
 }
 
 #endif
diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
index bfddc73..7531bfd 100644
--- a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
+++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
@@ -18,8 +18,8 @@ struct qmgr_regs __iomem *qmgr_regs;
 static struct resource *mem_res;
 static spinlock_t qmgr_lock;
 static u32 used_sram_bitmap[4]; /* 128 16-dword pages */
-static void (*irq_handlers[HALF_QUEUES])(void *pdev);
-static void *irq_pdevs[HALF_QUEUES];
+static void (*irq_handlers[QUEUES])(void *pdev);
+static void *irq_pdevs[QUEUES];
 
 #if DEBUG_QMGR
 char qmgr_queue_descs[QUEUES][32];
@@ -28,29 +28,38 @@ char qmgr_queue_descs[QUEUES][32];
 void qmgr_set_irq(unsigned int queue, int src,
 		  void (*handler)(void *pdev), void *pdev)
 {
-	u32 __iomem *reg = &qmgr_regs->irqsrc[queue / 8]; /* 8 queues / u32 */
-	int bit = (queue % 8) * 4; /* 3 bits + 1 reserved bit per queue */
 	unsigned long flags;
 
-	src &= 7;
 	spin_lock_irqsave(&qmgr_lock, flags);
-	__raw_writel((__raw_readl(reg) & ~(7 << bit)) | (src << bit), reg);
+	if (queue < HALF_QUEUES) {
+		u32 __iomem *reg;
+		int bit;
+		BUG_ON(src > QUEUE_IRQ_SRC_NOT_FULL);
+		reg = &qmgr_regs->irqsrc[queue >> 3]; /* 8 queues per u32 */
+		bit = (queue % 8) * 4; /* 3 bits + 1 reserved bit per queue */
+		__raw_writel((__raw_readl(reg) & ~(7 << bit)) | (src << bit),
+			     reg);
+	} else
+		/* IRQ source for queues 32-63 is fixed */
+		BUG_ON(src != QUEUE_IRQ_SRC_NOT_NEARLY_EMPTY);
+
 	irq_handlers[queue] = handler;
 	irq_pdevs[queue] = pdev;
 	spin_unlock_irqrestore(&qmgr_lock, flags);
 }
 
 
-static irqreturn_t qmgr_irq1(int irq, void *pdev)
+static irqreturn_t qmgr_irq(int irq, void *pdev)
 {
-	int i;
-	u32 val = __raw_readl(&qmgr_regs->irqstat[0]);
-	__raw_writel(val, &qmgr_regs->irqstat[0]); /* ACK */
+	int i, half = (irq == IRQ_IXP4XX_QM1 ? 0 : 1);
+	u32 val = __raw_readl(&qmgr_regs->irqstat[half]);
+	__raw_writel(val, &qmgr_regs->irqstat[half]); /* ACK */
 
 	for (i = 0; i < HALF_QUEUES; i++)
-		if (val & (1 << i))
-			irq_handlers[i](irq_pdevs[i]);
-
+		if (val & (1 << i)) {
+			int irq = half * HALF_QUEUES + i;
+			irq_handlers[irq](irq_pdevs[irq]);
+		}
 	return val ? IRQ_HANDLED : 0;
 }
 
@@ -58,21 +67,25 @@ static irqreturn_t qmgr_irq1(int irq, void *pdev)
 void qmgr_enable_irq(unsigned int queue)
 {
 	unsigned long flags;
+	int half = queue / 32;
+	u32 mask = 1 << (queue & (HALF_QUEUES - 1));
 
 	spin_lock_irqsave(&qmgr_lock, flags);
-	__raw_writel(__raw_readl(&qmgr_regs->irqen[0]) | (1 << queue),
-		     &qmgr_regs->irqen[0]);
+	__raw_writel(__raw_readl(&qmgr_regs->irqen[half]) | mask,
+		     &qmgr_regs->irqen[half]);
 	spin_unlock_irqrestore(&qmgr_lock, flags);
 }
 
 void qmgr_disable_irq(unsigned int queue)
 {
 	unsigned long flags;
+	int half = queue / 32;
+	u32 mask = 1 << (queue & (HALF_QUEUES - 1));
 
 	spin_lock_irqsave(&qmgr_lock, flags);
-	__raw_writel(__raw_readl(&qmgr_regs->irqen[0]) & ~(1 << queue),
-		     &qmgr_regs->irqen[0]);
-	__raw_writel(1 << queue, &qmgr_regs->irqstat[0]); /* clear */
+	__raw_writel(__raw_readl(&qmgr_regs->irqen[half]) & ~mask,
+		     &qmgr_regs->irqen[half]);
+	__raw_writel(mask, &qmgr_regs->irqstat[half]); /* clear */
 	spin_unlock_irqrestore(&qmgr_lock, flags);
 }
 
@@ -98,8 +111,7 @@ int __qmgr_request_queue(unsigned int queue, unsigned int len /* dwords */,
 	u32 cfg, addr = 0, mask[4]; /* in 16-dwords */
 	int err;
 
-	if (queue >= HALF_QUEUES)
-		return -ERANGE;
+	BUG_ON(queue >= QUEUES);
 
 	if ((nearly_empty_watermark | nearly_full_watermark) & ~7)
 		return -EINVAL;
@@ -180,7 +192,7 @@ void qmgr_release_queue(unsigned int queue)
 {
 	u32 cfg, addr, mask[4];
 
-	BUG_ON(queue >= HALF_QUEUES); /* not in valid range */
+	BUG_ON(queue >= QUEUES); /* not in valid range */
 
 	spin_lock_irq(&qmgr_lock);
 	cfg = __raw_readl(&qmgr_regs->sram[queue]);
@@ -247,10 +259,13 @@ static int qmgr_init(void)
 		__raw_writel(0, &qmgr_regs->irqen[i]);
 	}
 
+	__raw_writel(0xFFFFFFFF, &qmgr_regs->statne_h);
+	__raw_writel(0, &qmgr_regs->statf_h);
+
 	for (i = 0; i < QUEUES; i++)
 		__raw_writel(0, &qmgr_regs->sram[i]);
 
-	err = request_irq(IRQ_IXP4XX_QM1, qmgr_irq1, 0,
+	err = request_irq(IRQ_IXP4XX_QM1, qmgr_irq, 0,
 			  "IXP4xx Queue Manager", NULL);
 	if (err) {
 		printk(KERN_ERR "qmgr: failed to request IRQ%i\n",
@@ -258,12 +273,22 @@ static int qmgr_init(void)
 		goto error_irq;
 	}
 
+	err = request_irq(IRQ_IXP4XX_QM2, qmgr_irq, 0,
+			  "IXP4xx Queue Manager", NULL);
+	if (err) {
+		printk(KERN_ERR "qmgr: failed to request IRQ%i\n",
+		       IRQ_IXP4XX_QM2);
+		goto error_irq2;
+	}
+
 	used_sram_bitmap[0] = 0xF; /* 4 first pages reserved for config */
 	spin_lock_init(&qmgr_lock);
 
 	printk(KERN_INFO "IXP4xx Queue Manager initialized.\n");
 	return 0;
 
+error_irq2:
+	free_irq(IRQ_IXP4XX_QM1, NULL);
 error_irq:
 	iounmap(qmgr_regs);
 error_map:
@@ -274,7 +299,9 @@ error_map:
 static void qmgr_remove(void)
 {
 	free_irq(IRQ_IXP4XX_QM1, NULL);
+	free_irq(IRQ_IXP4XX_QM2, NULL);
 	synchronize_irq(IRQ_IXP4XX_QM1);
+	synchronize_irq(IRQ_IXP4XX_QM2);
 	iounmap(qmgr_regs);
 	release_mem_region(IXP4XX_QMGR_BASE_PHYS, IXP4XX_QMGR_REGION_SIZE);
 }
-- 
cgit v0.10.2


From 89f536ccfa8b370ff4d054f4061858ca9322c25a Mon Sep 17 00:00:00 2001
From: Stephane Chatty <chatty@enac.fr>
Date: Wed, 20 May 2009 15:41:24 +0200
Subject: HID: add new multitouch and digitizer contants

Added constants to hid.h for all digitizer usages (including the new multitouch
ones that are not yet in the official USB spec but are being pushed by Microsft
as described in their paper "Digitizer Drivers for Windows Touch and Pen-Based
Computers"). Updated hid-debug.c to support the new MT input constants such as
ABS_MT_POSITION_X.

Signed-off-by: Stephane Chatty <chatty@enac.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 47ac1a7..04359ed 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -137,6 +137,14 @@ static const struct hid_usage_entry hid_usage_table[] = {
     {0, 0x44, "BarrelSwitch"},
     {0, 0x45, "Eraser"},
     {0, 0x46, "TabletPick"},
+    {0, 0x47, "Confidence"},
+    {0, 0x48, "Width"},
+    {0, 0x49, "Height"},
+    {0, 0x51, "ContactID"},
+    {0, 0x52, "InputMode"},
+    {0, 0x53, "DeviceIndex"},
+    {0, 0x54, "ContactCount"},
+    {0, 0x55, "ContactMaximumNumber"},
   { 15, 0, "PhysicalInterfaceDevice" },
     {0, 0x00, "Undefined"},
     {0, 0x01, "Physical_Interface_Device"},
@@ -514,9 +522,11 @@ static const char *events[EV_MAX + 1] = {
 	[EV_FF_STATUS] = "ForceFeedbackStatus",
 };
 
-static const char *syncs[2] = {
+static const char *syncs[3] = {
 	[SYN_REPORT] = "Report",		[SYN_CONFIG] = "Config",
+	[SYN_MT_REPORT] = "MT Report",
 };
+
 static const char *keys[KEY_MAX + 1] = {
 	[KEY_RESERVED] = "Reserved",		[KEY_ESC] = "Esc",
 	[KEY_1] = "1",				[KEY_2] = "2",
@@ -734,8 +744,17 @@ static const char *absolutes[ABS_MAX + 1] = {
 	[ABS_HAT2Y] = "Hat2Y",		[ABS_HAT3X] = "Hat3X",
 	[ABS_HAT3Y] = "Hat 3Y",		[ABS_PRESSURE] = "Pressure",
 	[ABS_DISTANCE] = "Distance",	[ABS_TILT_X] = "XTilt",
-	[ABS_TILT_Y] = "YTilt",		[ABS_TOOL_WIDTH] = "Tool Width",
+	[ABS_TILT_Y] = "YTilt",		[ABS_TOOL_WIDTH] = "ToolWidth",
 	[ABS_VOLUME] = "Volume",	[ABS_MISC] = "Misc",
+	[ABS_MT_TOUCH_MAJOR] = "MTMajor",
+	[ABS_MT_TOUCH_MINOR] = "MTMinor",
+	[ABS_MT_WIDTH_MAJOR] = "MTMajorW",
+	[ABS_MT_WIDTH_MINOR] = "MTMinorW",
+	[ABS_MT_ORIENTATION] = "MTOrientation",
+	[ABS_MT_POSITION_X] = "MTPositionX",
+	[ABS_MT_POSITION_Y] = "MTPositionY",
+	[ABS_MT_TOOL_TYPE] = "MTToolType",
+	[ABS_MT_BLOB_ID] = "MTBlobID",
 };
 
 static const char *misc[MSC_MAX + 1] = {
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a72876e..53489fd 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -238,6 +238,42 @@ struct hid_item {
 #define HID_GD_RIGHT		0x00010092
 #define HID_GD_LEFT		0x00010093
 
+#define HID_DG_DIGITIZER	0x000d0001
+#define HID_DG_PEN		0x000d0002
+#define HID_DG_LIGHTPEN		0x000d0003
+#define HID_DG_TOUCHSCREEN	0x000d0004
+#define HID_DG_TOUCHPAD		0x000d0005
+#define HID_DG_STYLUS		0x000d0020
+#define HID_DG_PUCK		0x000d0021
+#define HID_DG_FINGER		0x000d0022
+#define HID_DG_TIPPRESSURE	0x000d0030
+#define HID_DG_BARRELPRESSURE	0x000d0031
+#define HID_DG_INRANGE		0x000d0032
+#define HID_DG_TOUCH		0x000d0033
+#define HID_DG_UNTOUCH		0x000d0034
+#define HID_DG_TAP		0x000d0035
+#define HID_DG_TABLETFUNCTIONKEY	0x000d0039
+#define HID_DG_PROGRAMCHANGEKEY	0x000d003a
+#define HID_DG_INVERT		0x000d003c
+#define HID_DG_TIPSWITCH	0x000d0042
+#define HID_DG_TIPSWITCH2	0x000d0043
+#define HID_DG_BARRELSWITCH	0x000d0044
+#define HID_DG_ERASER		0x000d0045
+#define HID_DG_TABLETPICK	0x000d0046
+/*
+ * as of May 20, 2009 the usages below are not yet in the official USB spec
+ * but are being pushed by Microsft as described in their paper "Digitizer
+ * Drivers for Windows Touch and Pen-Based Computers"
+ */
+#define HID_DG_CONFIDENCE	0x000d0047
+#define HID_DG_WIDTH		0x000d0048
+#define HID_DG_HEIGHT		0x000d0049
+#define HID_DG_CONTACTID	0x000d0051
+#define HID_DG_INPUTMODE	0x000d0052
+#define HID_DG_DEVICEINDEX	0x000d0053
+#define HID_DG_CONTACTCOUNT	0x000d0054
+#define HID_DG_CONTACTMAX	0x000d0055
+
 /*
  * HID report types --- Ouch! HID spec says 1 2 3!
  */
-- 
cgit v0.10.2


From 57fd637ad9ac6b13c1c47b9a0ced4ee99bb26e76 Mon Sep 17 00:00:00 2001
From: Stephane Chatty <chatty@enac.fr>
Date: Wed, 20 May 2009 15:49:35 +0200
Subject: HID: Multitouch support for the N-Trig touchscreen

Adds support for multitouch interaction on the N-Trig touchscreen, using the
new ABS_MT_* input constants. Single touch support works as previously. This
code was tested against two versions of the N- Trig firmware: one that supports
dual pen/finger single touch, and one that supports finger multitouch but no
pen at all. Copyright notices that looked wrong were removed, as it seems that
there is only code written in 2009 by Rafin Rubin and Stephane Chatty in this
file.

Signed-off-by: Stephane Chatty <chatty@enac.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c
index c5b252b..75ed9d2 100644
--- a/drivers/hid/hid-ntrig.c
+++ b/drivers/hid/hid-ntrig.c
@@ -1,13 +1,8 @@
 /*
- *  HID driver for some ntrig "special" devices
+ *  HID driver for N-Trig touchscreens
  *
- *  Copyright (c) 1999 Andreas Gal
- *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
- *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
- *  Copyright (c) 2006-2007 Jiri Kosina
- *  Copyright (c) 2007 Paul Walmsley
- *  Copyright (c) 2008 Jiri Slaby
  *  Copyright (c) 2008 Rafi Rubin
+ *  Copyright (c) 2009 Stephane Chatty
  *
  */
 
@@ -29,15 +24,79 @@
 #define nt_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
 					EV_KEY, (c))
 
+struct ntrig_data {
+	__s32 x, y, id, w, h;
+	char reading_a_point, found_contact_id;
+};
+
+/*
+ * this driver is aimed at two firmware versions in circulation:
+ *  - dual pen/finger single touch
+ *  - finger multitouch, pen not working
+ */
+
 static int ntrig_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 		struct hid_field *field, struct hid_usage *usage,
 		unsigned long **bit, int *max)
 {
-	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_DIGITIZER &&
-			(usage->hid & 0xff) == 0x47) {
-		nt_map_key_clear(BTN_TOOL_DOUBLETAP);
-		return 1;
+	switch (usage->hid & HID_USAGE_PAGE) {
+
+	case HID_UP_GENDESK:
+		switch (usage->hid) {
+		case HID_GD_X:
+			hid_map_usage(hi, usage, bit, max,
+					EV_ABS, ABS_MT_POSITION_X);
+			input_set_abs_params(hi->input, ABS_X,
+					field->logical_minimum,
+					field->logical_maximum, 0, 0);
+			return 1;
+		case HID_GD_Y:
+			hid_map_usage(hi, usage, bit, max,
+					EV_ABS, ABS_MT_POSITION_Y);
+			input_set_abs_params(hi->input, ABS_Y,
+					field->logical_minimum,
+					field->logical_maximum, 0, 0);
+			return 1;
+		}
+		return 0;
+
+	case HID_UP_DIGITIZER:
+		switch (usage->hid) {
+		/* we do not want to map these for now */
+		case HID_DG_INVERT: /* value is always 0 */
+		case HID_DG_ERASER: /* value is always 0 */
+		case HID_DG_CONTACTID: /* value is useless */
+		case HID_DG_BARRELSWITCH:  /* doubtful */
+		case HID_DG_INPUTMODE:
+		case HID_DG_DEVICEINDEX:
+		case HID_DG_CONTACTCOUNT:
+		case HID_DG_CONTACTMAX:
+			return -1;
+
+		/* original mapping by Rafi Rubin */
+		case HID_DG_CONFIDENCE:
+			nt_map_key_clear(BTN_TOOL_DOUBLETAP);
+			return 1;
+
+		/* width/height mapped on TouchMajor/TouchMinor/Orientation */
+		case HID_DG_WIDTH:
+			hid_map_usage(hi, usage, bit, max,
+					EV_ABS, ABS_MT_TOUCH_MAJOR);
+			return 1;
+		case HID_DG_HEIGHT:
+			hid_map_usage(hi, usage, bit, max,
+					EV_ABS, ABS_MT_TOUCH_MINOR);
+			input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
+					0, 1, 0, 0);
+			return 1;
+		}
+		return 0;
+
+	case 0xff000000:
+		/* we do not want to map these: no input-oriented meaning */
+		return -1;
 	}
+
 	return 0;
 }
 
@@ -51,6 +110,138 @@ static int ntrig_input_mapped(struct hid_device *hdev, struct hid_input *hi,
 
 	return 0;
 }
+
+/*
+ * this function is called upon all reports
+ * so that we can filter contact point information,
+ * decide whether we are in multi or single touch mode
+ * and call input_mt_sync after each point if necessary
+ */
+static int ntrig_event (struct hid_device *hid, struct hid_field *field,
+		                        struct hid_usage *usage, __s32 value)
+{
+	struct input_dev *input = field->hidinput->input;
+	struct ntrig_data *nd = hid_get_drvdata(hid);
+
+        if (hid->claimed & HID_CLAIMED_INPUT) {
+		switch (usage->hid) {
+		case HID_GD_X:
+			nd->x = value;
+			nd->reading_a_point = 1;
+			break;
+		case HID_GD_Y:
+			nd->y = value;
+			break;
+		case HID_DG_CONTACTID:
+			nd->id = value;
+			/* we receive this only when in multitouch mode */
+			nd->found_contact_id = 1;
+			break;
+		case HID_DG_WIDTH:
+			nd->w = value;
+			break;
+		case HID_DG_HEIGHT:
+			nd->h = value;
+			/*
+			 * when in single touch mode, this is the last
+			 * report received in a finger event. We want
+			 * to emit a normal (X, Y) position
+			 */
+			if (! nd->found_contact_id) {
+				input_event(input, EV_ABS, ABS_X, nd->x);
+				input_event(input, EV_ABS, ABS_Y, nd->y);
+			}
+			break;
+		case HID_DG_TIPPRESSURE:
+			/*
+			 * when in single touch mode, this is the last
+			 * report received in a pen event. We want
+			 * to emit a normal (X, Y) position
+			 */
+			if (! nd->found_contact_id) {
+				input_event(input, EV_ABS, ABS_X, nd->x);
+				input_event(input, EV_ABS, ABS_Y, nd->y);
+				input_event(input, EV_ABS, ABS_PRESSURE, value);
+			}
+			break;
+		case 0xff000002:
+			/*
+			 * we receive this when the device is in multitouch
+			 * mode. The first of the three values tagged with
+			 * this usage tells if the contact point is real
+			 * or a placeholder
+			 */
+			if (!nd->reading_a_point || value != 1)
+				break;
+			/* emit a normal (X, Y) for the first point only */
+			if (nd->id == 0) {
+				input_event(input, EV_ABS, ABS_X, nd->x);
+				input_event(input, EV_ABS, ABS_Y, nd->y);
+			}
+			input_event(input, EV_ABS, ABS_MT_POSITION_X, nd->x);
+			input_event(input, EV_ABS, ABS_MT_POSITION_Y, nd->y);
+			if (nd->w > nd->h) {
+				input_event(input, EV_ABS,
+						ABS_MT_ORIENTATION, 1);
+				input_event(input, EV_ABS,
+						ABS_MT_TOUCH_MAJOR, nd->w);
+				input_event(input, EV_ABS,
+						ABS_MT_TOUCH_MINOR, nd->h);
+			} else {
+				input_event(input, EV_ABS,
+						ABS_MT_ORIENTATION, 0);
+				input_event(input, EV_ABS,
+						ABS_MT_TOUCH_MAJOR, nd->h);
+				input_event(input, EV_ABS,
+						ABS_MT_TOUCH_MINOR, nd->w);
+			}
+			input_mt_sync(field->hidinput->input);
+			nd->reading_a_point = 0;
+			nd->found_contact_id = 0;
+			break;
+
+		default:
+			/* fallback to the generic hidinput handling */
+			return 0;
+		}
+	}
+
+	/* we have handled the hidinput part, now remains hiddev */
+        if (hid->claimed & HID_CLAIMED_HIDDEV && hid->hiddev_hid_event)
+                hid->hiddev_hid_event(hid, field, usage, value);
+
+	return 1;
+}
+
+static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+	int ret;
+	struct ntrig_data *nd;
+
+	nd = kmalloc(sizeof(struct ntrig_data), GFP_KERNEL);
+	if (!nd) {
+		dev_err(&hdev->dev, "cannot allocate N-Trig data\n");
+		return -ENOMEM;
+	}
+	nd->reading_a_point = 0;
+	nd->found_contact_id = 0;
+	hid_set_drvdata(hdev, nd);
+
+	ret = hid_parse(hdev);
+	if (!ret)
+		ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+
+	if (ret)
+		kfree (nd);
+	return ret;
+}
+
+static void ntrig_remove(struct hid_device *hdev)
+{
+	hid_hw_stop(hdev);
+	kfree(hid_get_drvdata(hdev));
+}
+
 static const struct hid_device_id ntrig_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN),
 		.driver_data = NTRIG_DUPLICATE_USAGES },
@@ -58,11 +249,20 @@ static const struct hid_device_id ntrig_devices[] = {
 };
 MODULE_DEVICE_TABLE(hid, ntrig_devices);
 
+static const struct hid_usage_id ntrig_grabbed_usages[] = {
+	{ HID_ANY_ID, HID_ANY_ID, HID_ANY_ID },
+	{ HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1}
+};
+
 static struct hid_driver ntrig_driver = {
 	.name = "ntrig",
 	.id_table = ntrig_devices,
+	.probe = ntrig_probe,
+	.remove = ntrig_remove,
 	.input_mapping = ntrig_input_mapping,
 	.input_mapped = ntrig_input_mapped,
+	.usage_table = ntrig_grabbed_usages,
+	.event = ntrig_event,
 };
 
 static int ntrig_init(void)
-- 
cgit v0.10.2


From 3e3ee6dc949d25423f72bb5ee5f58dfa79ac85c7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 20 May 2009 16:47:59 +0200
Subject: ALSA: ctxfi - Add depends on X86

The ctxfi driver requires explicitly the 4k page size, and gives a
build error on architectures with non-4k pages.
As a workaround, just add the kconfig dependency on X86, which is
the only architecture ever tested.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 3a7640f..2d7fef5 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -277,6 +277,7 @@ config SND_CS5535AUDIO
 
 config SND_CTXFI
 	tristate "Creative Sound Blaster X-Fi"
+	depends on X86
 	select SND_PCM
 	help
 	  If you want to use soundcards based on Creative Sound Blastr X-Fi
-- 
cgit v0.10.2


From b3b778b387ed3849ebc4a51baf8617be90df6625 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 20 May 2009 17:05:52 +0200
Subject: ALSA: pcsp - fix printk format warning again

The commit 5a641bcd6398841cc4606b0a732d41a09256fd94 changed the
printk format to '%lu', but the value passed seems to be dependent
on the architecture.  On x86-64, I got a new warning now because an
int value is passed actaully.

As a workaround, just cast the value always to unsigned long.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/drivers/pcsp/pcsp_mixer.c b/sound/drivers/pcsp/pcsp_mixer.c
index 771955a..199b033 100644
--- a/sound/drivers/pcsp/pcsp_mixer.c
+++ b/sound/drivers/pcsp/pcsp_mixer.c
@@ -51,7 +51,7 @@ static int pcsp_treble_info(struct snd_kcontrol *kcontrol,
 	if (uinfo->value.enumerated.item > chip->max_treble)
 		uinfo->value.enumerated.item = chip->max_treble;
 	sprintf(uinfo->value.enumerated.name, "%lu",
-			PCSP_CALC_RATE(uinfo->value.enumerated.item));
+		(unsigned long)PCSP_CALC_RATE(uinfo->value.enumerated.item));
 	return 0;
 }
 
-- 
cgit v0.10.2


From 0003b795c310da83501fcf0329f6be7a0984647d Mon Sep 17 00:00:00 2001
From: Eric Lammerts <linux-arm@lists.lammerts.org>
Date: Tue, 19 May 2009 20:53:20 -0400
Subject: fix oops when using console=ttymxcN with N > 0

Signed-off-by: Eric Lammerts <eric@lammerts.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 3f5d5a2..738c8a5 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -1024,6 +1024,8 @@ imx_console_setup(struct console *co, char *options)
 	if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports))
 		co->index = 0;
 	sport = imx_ports[co->index];
+	if(sport == NULL)
+		return -ENODEV;
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
-- 
cgit v0.10.2


From 5537937696c55530447c20aa27daccb8d0d29b33 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Mon, 18 May 2009 23:04:46 +0800
Subject: ftrace: fix check for return value of register_module_notifier in
 event_trace_init

register_module_notifier() returns zero in the success case.
So fix the inverted fail case check in trace events modules
handler.

[ Impact: fix spurious warning on ftrace initialization]

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0eec0c5..9e91c4a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1174,7 +1174,7 @@ static __init int event_trace_init(void)
 	}
 
 	ret = register_module_notifier(&trace_module_nb);
-	if (!ret)
+	if (ret)
 		pr_warning("Failed to register trace events module notifier\n");
 
 	return 0;
-- 
cgit v0.10.2


From 950312ce22696ddfa42a957bdadaa9c24151e679 Mon Sep 17 00:00:00 2001
From: Andrew Randrianasulu <randrik_a@yahoo.com>
Date: Thu, 14 May 2009 11:29:27 -0700
Subject: MIPS: IP32: Fix hang on shutdown in power button interrupt handler.

The hang was caused by the use of disable_irq() from the interrupt handler
itself.  Fixed by the use of disable_irq_nosync().  The issue was
triggered by:

    commit 3aa551c9b4c40018f0e261a178e3d25478dc04a9
    Author: Thomas Gleixner <tglx@linutronix.de>
    Date:   Mon Mar 23 18:28:15 2009 +0100

        genirq: add threaded interrupt handler support

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index b6cab08..667da93 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -145,7 +145,7 @@ static irqreturn_t ip32_rtc_int(int irq, void *dev_id)
 			"%s: RTC IRQ without RTC_IRQF\n", __func__);
 	}
 	/* Wait until interrupt goes away */
-	disable_irq(MACEISA_RTC_IRQ);
+	disable_irq_nosync(MACEISA_RTC_IRQ);
 	init_timer(&debounce_timer);
 	debounce_timer.function = debounce;
 	debounce_timer.expires = jiffies + 50;
-- 
cgit v0.10.2


From 7e9e05cad94217498e4d9bd6ef7137b4e9e7ed64 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sat, 16 May 2009 12:23:45 +0100
Subject: MIPS: IP22: Fix hang in power button interrupt handler

The hang was caused by the use of disable_irq() from the interrupt handler
itself.  Fixed by the use of disable_irq_nosync().  The issue was
triggered by:

    commit 3aa551c9b4c40018f0e261a178e3d25478dc04a9
    Author: Thomas Gleixner <tglx@linutronix.de>
    Date:   Mon Mar 23 18:28:15 2009 +0100

        genirq: add threaded interrupt handler support

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index 4ad5c33..45b6694 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -148,7 +148,7 @@ static irqreturn_t panel_int(int irq, void *dev_id)
 
 	if (sgint->istat1 & SGINT_ISTAT1_PWR) {
 		/* Wait until interrupt goes away */
-		disable_irq(SGI_PANEL_IRQ);
+		disable_irq_nosync(SGI_PANEL_IRQ);
 		init_timer(&debounce_timer);
 		debounce_timer.function = debounce;
 		debounce_timer.expires = jiffies + 5;
-- 
cgit v0.10.2


From 195d1a96ae5fdfbedb8dc4b97afee578921fa99e Mon Sep 17 00:00:00 2001
From: peter fuerst <post@pfrst.de>
Date: Sun, 17 May 2009 23:49:45 +0200
Subject: MIPS: IP28: Change to build with -mr10k-cache-barrier=store

Richard Sandiford's new code for inserting the cache-barriers, for GCC
4.3 and above and already incorporated in the current GCC-release, uses
a slightly different option-syntax.

Signed-off-by: peter fuerst <post@pfrst.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 26947ab..c4cae9e 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -473,12 +473,12 @@ endif
 # Simplified: what IP22 does at 128MB+ in ksegN, IP28 does at 512MB+ in xkphys
 #
 ifdef CONFIG_SGI_IP28
-  ifeq ($(call cc-option-yn,-mr10k-cache-barrier=1), n)
-      $(error gcc doesn't support needed option -mr10k-cache-barrier=1)
+  ifeq ($(call cc-option-yn,-mr10k-cache-barrier=store), n)
+      $(error gcc doesn't support needed option -mr10k-cache-barrier=store)
   endif
 endif
 core-$(CONFIG_SGI_IP28)		+= arch/mips/sgi-ip22/
-cflags-$(CONFIG_SGI_IP28)	+= -mr10k-cache-barrier=1 -I$(srctree)/arch/mips/include/asm/mach-ip28
+cflags-$(CONFIG_SGI_IP28)	+= -mr10k-cache-barrier=store -I$(srctree)/arch/mips/include/asm/mach-ip28
 load-$(CONFIG_SGI_IP28)		+= 0xa800000020004000
 
 #
-- 
cgit v0.10.2


From a5e696e5d0f1377ff6beb10d2f40edb6a3d1de18 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@snapgear.com>
Date: Wed, 20 May 2009 16:12:32 +1000
Subject: MIPS: 64-bit: Fix system lockup.

The address range size calculation inside local_flush_tlb_kernel_range()
is being truncated by a too small size variable holder on 64-bit systems.
The truncated size can result in an erroneous tlbsize check that means we
sit spinning inside a loop trying to flush a hige number of TLB entries.
This is for all intents and purposes a system hang. Fix by using an
appropriately sized valiable to hold the size.

[Ralf: Greg's original patch submission identified the issue and fixed one
instance in tlb-r4k.c but there there were several more.  For consistency
I also modified tlb-r3k.c even though that file is only used on 32-bit.]

Signed-off-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
index f0cf46a..1c0048a 100644
--- a/arch/mips/mm/tlb-r3k.c
+++ b/arch/mips/mm/tlb-r3k.c
@@ -82,8 +82,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	int cpu = smp_processor_id();
 
 	if (cpu_context(cpu, mm) != 0) {
-		unsigned long flags;
-		int size;
+		unsigned long size, flags;
 
 #ifdef DEBUG_TLB
 		printk("[tlbrange<%lu,0x%08lx,0x%08lx>]",
@@ -121,8 +120,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
-	int size;
+	unsigned long size, flags;
 
 #ifdef DEBUG_TLB
 	printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end);
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 9619f66..892be42 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -117,8 +117,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	int cpu = smp_processor_id();
 
 	if (cpu_context(cpu, mm) != 0) {
-		unsigned long flags;
-		int size;
+		unsigned long size, flags;
 
 		ENTER_CRITICAL(flags);
 		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
@@ -160,8 +159,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
-	int size;
+	unsigned long size, flags;
 
 	ENTER_CRITICAL(flags);
 	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c
index 4f01a3b..4ec95cc 100644
--- a/arch/mips/mm/tlb-r8k.c
+++ b/arch/mips/mm/tlb-r8k.c
@@ -111,8 +111,7 @@ out_restore:
 /* Usable for KV1 addresses only! */
 void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
-	int size;
+	unsigned long size, flags;
 
 	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	size = (size + 1) >> 1;
-- 
cgit v0.10.2


From 34adc8062227f41b04ade0ff3fbd1dbe3002669e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 20 May 2009 20:13:28 +0200
Subject: perf_counter: Fix context removal deadlock

Disable the PMU globally before removing a counter from a
context. This fixes the following lockup:

[22081.741922] ------------[ cut here ]------------
[22081.746668] WARNING: at arch/x86/kernel/cpu/perf_counter.c:803 intel_pmu_handle_irq+0x9b/0x24e()
[22081.755624] Hardware name: X8DTN
[22081.758903] perfcounters: irq loop stuck!
[22081.762985] Modules linked in:
[22081.766136] Pid: 11082, comm: perf Not tainted 2.6.30-rc6-tip #226
[22081.772432] Call Trace:
[22081.774940]  <NMI>  [<ffffffff81019aed>] ? intel_pmu_handle_irq+0x9b/0x24e
[22081.781993]  [<ffffffff81019aed>] ? intel_pmu_handle_irq+0x9b/0x24e
[22081.788368]  [<ffffffff8104505c>] ? warn_slowpath_common+0x77/0xa3
[22081.794649]  [<ffffffff810450d3>] ? warn_slowpath_fmt+0x40/0x45
[22081.800696]  [<ffffffff81019aed>] ? intel_pmu_handle_irq+0x9b/0x24e
[22081.807080]  [<ffffffff814d1a72>] ? perf_counter_nmi_handler+0x3f/0x4a
[22081.813751]  [<ffffffff814d2d09>] ? notifier_call_chain+0x58/0x86
[22081.819951]  [<ffffffff8105b250>] ? notify_die+0x2d/0x32
[22081.825392]  [<ffffffff814d1414>] ? do_nmi+0x8e/0x242
[22081.830538]  [<ffffffff814d0f0a>] ? nmi+0x1a/0x20
[22081.835342]  [<ffffffff8117e102>] ? selinux_file_free_security+0x0/0x1a
[22081.842105]  [<ffffffff81018793>] ? x86_pmu_disable_counter+0x15/0x41
[22081.848673]  <<EOE>>  [<ffffffff81018f3d>] ? x86_pmu_disable+0x86/0x103
[22081.855512]  [<ffffffff8108fedd>] ? __perf_counter_remove_from_context+0x0/0xfe
[22081.862926]  [<ffffffff8108fcbc>] ? counter_sched_out+0x30/0xce
[22081.868909]  [<ffffffff8108ff36>] ? __perf_counter_remove_from_context+0x59/0xfe
[22081.876382]  [<ffffffff8106808a>] ? smp_call_function_single+0x6c/0xe6
[22081.882955]  [<ffffffff81091b96>] ? perf_release+0x86/0x14c
[22081.888600]  [<ffffffff810c4c84>] ? __fput+0xe7/0x195
[22081.893718]  [<ffffffff810c213e>] ? filp_close+0x5b/0x62
[22081.899107]  [<ffffffff81046a70>] ? put_files_struct+0x64/0xc2
[22081.905031]  [<ffffffff8104841a>] ? do_exit+0x1e2/0x6ef
[22081.910360]  [<ffffffff814d0a60>] ? _spin_lock_irqsave+0x9/0xe
[22081.916292]  [<ffffffff8104898e>] ? do_group_exit+0x67/0x93
[22081.921953]  [<ffffffff810489cc>] ? sys_exit_group+0x12/0x16
[22081.927759]  [<ffffffff8100baab>] ? system_call_fastpath+0x16/0x1b
[22081.934076] ---[ end trace 3a3936ce3e1b4505 ]---

And could potentially also fix the lockup reported by Marcelo Tosatti.

Also, print more debug info in case of a detected lockup.

[ Impact: fix lockup ]

Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index c109819..6cc1660 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -740,6 +740,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 again:
 	if (++loops > 100) {
 		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
+		perf_counter_print_debug();
 		return 1;
 	}
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 69d4de8..08584c1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -208,18 +208,17 @@ static void __perf_counter_remove_from_context(void *info)
 		return;
 
 	spin_lock_irqsave(&ctx->lock, flags);
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * counters on a global level.
+	 */
+	perf_disable();
 
 	counter_sched_out(counter, cpuctx, ctx);
 
 	counter->task = NULL;
 
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level. NOP for non NMI based counters.
-	 */
-	perf_disable();
 	list_del_counter(counter, ctx);
-	perf_enable();
 
 	if (!ctx->task) {
 		/*
@@ -231,6 +230,7 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
+	perf_enable();
 	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
-- 
cgit v0.10.2


From c6ac4c18fbc92a26df71ece609b082bc3099676b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 20 May 2009 11:26:09 -0700
Subject: x86, boot: correct the calculation of ZO_INIT_SIZE

Correct the calculation of ZO_INIT_SIZE (the amount of memory we need
during decompression).  One symbol (ZO_startup_32) was missing from
zoffset.h, and another (ZO_z_extract_offset) was misspelled.

[ Impact: build fix ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 75e0301..619d297 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -95,7 +95,7 @@ targets += voffset.h
 $(obj)/voffset.h: vmlinux FORCE
 	$(call if_changed,voffset)
 
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
 
 quiet_cmd_zoffset = ZOFFSET $@
       cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 68c3bfb..1040f6e 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -226,7 +226,7 @@ setup_data:		.quad 0			# 64-bit physical pointer to
 
 pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 
-#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_extract_offset)
+#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_z_extract_offset)
 #define VO_INIT_SIZE	(VO__end - VO__text)
 #if ZO_INIT_SIZE > VO_INIT_SIZE
 #define INIT_SIZE ZO_INIT_SIZE
-- 
cgit v0.10.2


From 9b6fe313bfce27d4a261257da70196be0ac2bef5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <daenzer@vmware.com>
Date: Wed, 20 May 2009 13:32:00 +0200
Subject: drm: Copy back ioctl data to userspace regardless of return code.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes a regression from commit 9d5b3ffc42f7820e8ee07705496955e4c2c38dd9
('drm: fixup some of the ioctl function exit paths'): The vblank ioctl
needs to update the userspace parameters when interrupted by a signal,
which was prevented by the return code check. This could cause the X
server to hang in drmWaitVBlank().

Signed-off-by: Michel Dänzer <daenzer@vmware.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index f01def1..019b7c5 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -481,7 +481,7 @@ int drm_ioctl(struct inode *inode, struct file *filp,
 		}
 		retcode = func(dev, kdata, file_priv);
 
-		if ((retcode == 0) && (cmd & IOC_OUT)) {
+		if (cmd & IOC_OUT) {
 			if (copy_to_user((void __user *)arg, kdata,
 					 _IOC_SIZE(cmd)) != 0)
 				retcode = -EFAULT;
-- 
cgit v0.10.2


From d53b48d512ef477c939aba09c7e258b8dc331b6a Mon Sep 17 00:00:00 2001
From: Seokmann Ju <seokmann.ju@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:37 -0700
Subject: [SCSI] qla2xxx: Correct bus-reset behaviour with recent ISPs.

The short-circuit to skip the non-applicable 'full-login-lip'
process on 81xx ISPs was nested too deeply in the 'bus-reset'
routine, as the code in qla2x00_loop_reset() should skip the
whole enable_lip_full_login process.  The original code could
cause device tear-down due to the qla2x00_wait_for_loop_ready()
call taking a large amount of time.

Signed-off-by: Seokmann Ju <seokmann.ju@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index e67c166..1458438 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1864,9 +1864,6 @@ qla2x00_full_login_lip(scsi_qla_host_t *vha)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	if (IS_QLA81XX(vha->hw))
-	    return QLA_SUCCESS;
-
 	DEBUG11(printk("qla2x00_full_login_lip(%ld): entered.\n",
 	    vha->host_no));
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index e4fdcda..29234ba 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1037,7 +1037,8 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 	struct fc_port *fcport;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (ha->flags.enable_lip_full_login && !vha->vp_idx) {
+	if (ha->flags.enable_lip_full_login && !vha->vp_idx &&
+	    !IS_QLA81XX(ha)) {
 		ret = qla2x00_full_login_lip(vha);
 		if (ret != QLA_SUCCESS) {
 			DEBUG2_3(printk("%s(%ld): failed: "
-- 
cgit v0.10.2


From bad7001c200458c24864df6f2b1b66548bca7c75 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:38 -0700
Subject: [SCSI] qla2xxx: Export additional FCoE attributes for application
 support.

Cull and export VN_Port MAC address and VLAN_ID information on
supported FCoE ISPs.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index b09993a..5d44e3e 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1088,6 +1088,33 @@ qla2x00_flash_block_size_show(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "0x%x\n", ha->fdt_block_size);
 }
 
+static ssize_t
+qla2x00_vlan_id_show(struct device *dev, struct device_attribute *attr,
+    char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+	if (!IS_QLA81XX(vha->hw))
+		return snprintf(buf, PAGE_SIZE, "\n");
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vha->fcoe_vlan_id);
+}
+
+static ssize_t
+qla2x00_vn_port_mac_address_show(struct device *dev,
+    struct device_attribute *attr, char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+	if (!IS_QLA81XX(vha->hw))
+		return snprintf(buf, PAGE_SIZE, "\n");
+
+	return snprintf(buf, PAGE_SIZE, "%02x:%02x:%02x:%02x:%02x:%02x\n",
+	    vha->fcoe_vn_port_mac[5], vha->fcoe_vn_port_mac[4],
+	    vha->fcoe_vn_port_mac[3], vha->fcoe_vn_port_mac[2],
+	    vha->fcoe_vn_port_mac[1], vha->fcoe_vn_port_mac[0]);
+}
+
 static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_drvr_version_show, NULL);
 static DEVICE_ATTR(fw_version, S_IRUGO, qla2x00_fw_version_show, NULL);
 static DEVICE_ATTR(serial_num, S_IRUGO, qla2x00_serial_num_show, NULL);
@@ -1116,6 +1143,9 @@ static DEVICE_ATTR(mpi_version, S_IRUGO, qla2x00_mpi_version_show, NULL);
 static DEVICE_ATTR(phy_version, S_IRUGO, qla2x00_phy_version_show, NULL);
 static DEVICE_ATTR(flash_block_size, S_IRUGO, qla2x00_flash_block_size_show,
 		   NULL);
+static DEVICE_ATTR(vlan_id, S_IRUGO, qla2x00_vlan_id_show, NULL);
+static DEVICE_ATTR(vn_port_mac_address, S_IRUGO,
+		   qla2x00_vn_port_mac_address_show, NULL);
 
 struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_driver_version,
@@ -1138,6 +1168,8 @@ struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_mpi_version,
 	&dev_attr_phy_version,
 	&dev_attr_flash_block_size,
+	&dev_attr_vlan_id,
+	&dev_attr_vn_port_mac_address,
 	NULL,
 };
 
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 714ee67..645cfd9 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2618,6 +2618,11 @@ typedef struct scsi_qla_host {
 	uint8_t		node_name[WWN_SIZE];
 	uint8_t		port_name[WWN_SIZE];
 	uint8_t		fabric_node_name[WWN_SIZE];
+
+	uint16_t	fcoe_vlan_id;
+	uint16_t	fcoe_fcf_idx;
+	uint8_t		fcoe_vn_port_mac[6];
+
 	uint32_t   	vp_abort_cnt;
 
 	struct fc_vport	*fc_vport;	/* holds fc_vport * for each vport */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 1458438..dc5a1fe 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -931,6 +931,8 @@ qla2x00_get_adapter_id(scsi_qla_host_t *vha, uint16_t *id, uint8_t *al_pa,
 	mcp->mb[9] = vha->vp_idx;
 	mcp->out_mb = MBX_9|MBX_0;
 	mcp->in_mb = MBX_9|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
+	if (IS_QLA81XX(vha->hw))
+		mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -952,9 +954,19 @@ qla2x00_get_adapter_id(scsi_qla_host_t *vha, uint16_t *id, uint8_t *al_pa,
 		DEBUG2_3_11(printk("qla2x00_get_adapter_id(%ld): failed=%x.\n",
 		    vha->host_no, rval));
 	} else {
-		/*EMPTY*/
 		DEBUG11(printk("qla2x00_get_adapter_id(%ld): done.\n",
 		    vha->host_no));
+
+		if (IS_QLA81XX(vha->hw)) {
+			vha->fcoe_vlan_id = mcp->mb[9] & 0xfff;
+			vha->fcoe_fcf_idx = mcp->mb[10];
+			vha->fcoe_vn_port_mac[5] = mcp->mb[11] >> 8;
+			vha->fcoe_vn_port_mac[4] = mcp->mb[11] & 0xff;
+			vha->fcoe_vn_port_mac[3] = mcp->mb[12] >> 8;
+			vha->fcoe_vn_port_mac[2] = mcp->mb[12] & 0xff;
+			vha->fcoe_vn_port_mac[1] = mcp->mb[13] >> 8;
+			vha->fcoe_vn_port_mac[0] = mcp->mb[13] & 0xff;
+		}
 	}
 
 	return rval;
-- 
cgit v0.10.2


From 7640335ea5b1a2da0d64303e6003012c619ae01a Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:39 -0700
Subject: [SCSI] qla2xxx: Correct compilation failures when DEBUG'n' options
 are enabled.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index bd7dd84..9a343ec 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -634,7 +634,7 @@ qla2x00_chip_diag(scsi_qla_host_t *vha)
 		goto chip_diag_failed;
 
 	DEBUG3(printk("scsi(%ld): Reset register cleared by chip reset\n",
-	    ha->host_no));
+	    vha->host_no));
 
 	/* Reset RISC processor. */
 	WRT_REG_WORD(&reg->hccr, HCCR_RESET_RISC);
@@ -655,7 +655,7 @@ qla2x00_chip_diag(scsi_qla_host_t *vha)
 		goto chip_diag_failed;
 
 	/* Check product ID of chip */
-	DEBUG3(printk("scsi(%ld): Checking product ID of chip\n", ha->host_no));
+	DEBUG3(printk("scsi(%ld): Checking product ID of chip\n", vha->host_no));
 
 	mb[1] = RD_MAILBOX_REG(ha, reg, 1);
 	mb[2] = RD_MAILBOX_REG(ha, reg, 2);
@@ -2110,7 +2110,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 		goto cleanup_allocation;
 
 	DEBUG3(printk("scsi(%ld): Entries in ID list (%d)\n",
-	    ha->host_no, entries));
+	    vha->host_no, entries));
 	DEBUG3(qla2x00_dump_buffer((uint8_t *)ha->gid_list,
 	    entries * sizeof(struct gid_list_info)));
 
@@ -3587,7 +3587,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha)
 	for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++)
 		chksum += le32_to_cpu(*dptr++);
 
-	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", ha->host_no));
+	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", vha->host_no));
 	DEBUG5(qla2x00_dump_buffer((uint8_t *)nv, ha->nvram_size));
 
 	/* Bad NVRAM data, set defaults parameters. */
@@ -4305,7 +4305,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 	for (cnt = 0, chksum = 0; cnt < ha->nvram_size >> 2; cnt++)
 		chksum += le32_to_cpu(*dptr++);
 
-	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", ha->host_no));
+	DEBUG5(printk("scsi(%ld): Contents of NVRAM\n", vha->host_no));
 	DEBUG5(qla2x00_dump_buffer((uint8_t *)nv, ha->nvram_size));
 
 	/* Bad NVRAM data, set defaults parameters. */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index dc5a1fe..4f7e94c 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3393,7 +3393,7 @@ qla2x00_read_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
 		DEBUG2_3_11(printk("%s(%ld): failed=%x (%x).\n", __func__,
 		    vha->host_no, rval, mcp->mb[0]));
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
 	}
 
 	return rval;
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 152ecfc..81187a0 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -219,8 +219,8 @@ qla2x00_write_nvram_word(struct qla_hw_data *ha, uint32_t addr, uint16_t data)
 	wait_cnt = NVR_WAIT_CNT;
 	do {
 		if (!--wait_cnt) {
-			DEBUG9_10(printk("%s(%ld): NVRAM didn't go ready...\n",
-			    __func__, vha->host_no));
+			DEBUG9_10(qla_printk(KERN_WARNING, ha,
+			    "NVRAM didn't go ready...\n"));
 			break;
 		}
 		NVRAM_DELAY();
@@ -349,7 +349,7 @@ qla2x00_clear_nvram_protection(struct qla_hw_data *ha)
 		wait_cnt = NVR_WAIT_CNT;
 		do {
 			if (!--wait_cnt) {
-				DEBUG9_10(qla_printk(
+				DEBUG9_10(qla_printk(KERN_WARNING, ha,
 				    "NVRAM didn't go ready...\n"));
 				break;
 			}
@@ -408,7 +408,8 @@ qla2x00_set_nvram_protection(struct qla_hw_data *ha, int stat)
 	wait_cnt = NVR_WAIT_CNT;
 	do {
 		if (!--wait_cnt) {
-			DEBUG9_10(qla_printk("NVRAM didn't go ready...\n"));
+			DEBUG9_10(qla_printk(KERN_WARNING, ha,
+			    "NVRAM didn't go ready...\n"));
 			break;
 		}
 		NVRAM_DELAY();
@@ -1079,8 +1080,9 @@ qla24xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
 				    0xff0000) | ((fdata >> 16) & 0xff));
 			ret = qla24xx_erase_sector(vha, fdata);
 			if (ret != QLA_SUCCESS) {
-				DEBUG9(qla_printk("Unable to erase sector: "
-				    "address=%x.\n", faddr));
+				DEBUG9(qla_printk(KERN_WARNING, ha,
+				    "Unable to erase sector: address=%x.\n",
+				    faddr));
 				break;
 			}
 		}
@@ -1240,8 +1242,9 @@ qla24xx_write_nvram_data(scsi_qla_host_t *vha, uint8_t *buf, uint32_t naddr,
 		ret = qla24xx_write_flash_dword(ha,
 		    nvram_data_addr(ha, naddr), cpu_to_le32(*dwptr));
 		if (ret != QLA_SUCCESS) {
-			DEBUG9(qla_printk("Unable to program nvram address=%x "
-			    "data=%x.\n", naddr, *dwptr));
+			DEBUG9(qla_printk(KERN_WARNING, ha,
+			    "Unable to program nvram address=%x data=%x.\n",
+			    naddr, *dwptr));
 			break;
 		}
 	}
-- 
cgit v0.10.2


From 2afa19a9377ca61b9489e44bf50029574fbe63be Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:40 -0700
Subject: [SCSI] qla2xxx: Add QoS support.

Set the number of request queues to the module paramater
ql2xmaxqueues.  Each vport gets a request queue. The QoS value
set to the request queues determines priority control for queued
IOs. If QoS value is not specified, the vports use the default
queue 0.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 5d44e3e..bda6658 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1469,11 +1469,12 @@ static int
 qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 {
 	int	ret = 0;
-	int	cnt = 0;
-	uint8_t	qos = QLA_DEFAULT_QUE_QOS;
+	uint8_t	qos = 0;
 	scsi_qla_host_t *base_vha = shost_priv(fc_vport->shost);
 	scsi_qla_host_t *vha = NULL;
 	struct qla_hw_data *ha = base_vha->hw;
+	uint16_t options = 0;
+	int	cnt;
 
 	ret = qla24xx_vport_create_req_sanity_check(fc_vport);
 	if (ret) {
@@ -1529,23 +1530,35 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 
 	qla24xx_vport_disable(fc_vport, disable);
 
-	/* Create a queue pair for the vport */
-	if (ha->mqenable) {
-		if (ha->npiv_info) {
-			for (; cnt < ha->nvram_npiv_size; cnt++) {
-				if (ha->npiv_info[cnt].port_name ==
-					vha->port_name &&
-					ha->npiv_info[cnt].node_name ==
-					vha->node_name) {
-					qos = ha->npiv_info[cnt].q_qos;
-					break;
-				}
-			}
+	ret = 0;
+	if (ha->cur_vport_count <= ha->flex_port_count
+		|| ha->max_req_queues == 1 || !ha->npiv_info)
+		goto vport_queue;
+	/* Create a request queue in QoS mode for the vport */
+	for (cnt = ha->flex_port_count; cnt < ha->nvram_npiv_size; cnt++) {
+		if (ha->npiv_info[cnt].port_name == vha->port_name &&
+			ha->npiv_info[cnt].node_name == vha->node_name) {
+			qos = ha->npiv_info[cnt].q_qos;
+			break;
 		}
-		qla25xx_create_queues(vha, qos);
+	}
+	if (qos) {
+		ret = qla25xx_create_req_que(ha, options, vha->vp_idx, 0, 0,
+			qos);
+		if (!ret)
+			qla_printk(KERN_WARNING, ha,
+			"Can't create request queue for vp_idx:%d\n",
+			vha->vp_idx);
+		else
+			DEBUG2(qla_printk(KERN_INFO, ha,
+			"Request Que:%d created for vp_idx:%d\n",
+			ret, vha->vp_idx));
 	}
 
+vport_queue:
+	vha->req = ha->req_q_map[ret];
 	return 0;
+
 vport_create_failed_2:
 	qla24xx_disable_vp(vha);
 	qla24xx_deallocate_vp_id(vha);
@@ -1586,8 +1599,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 		    vha->host_no, vha->vp_idx, vha));
         }
 
-	if (ha->mqenable) {
-		if (qla25xx_delete_queues(vha, 0) != QLA_SUCCESS)
+	if (vha->req->id) {
+		if (qla25xx_delete_req_que(vha, vha->req) != QLA_SUCCESS)
 			qla_printk(KERN_WARNING, ha,
 				"Queue delete failed.\n");
 	}
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 34760f8..68671a2 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -351,7 +351,7 @@ static inline void *
 qla25xx_copy_mq(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain)
 {
 	uint32_t cnt, que_idx;
-	uint8_t req_cnt, rsp_cnt, que_cnt;
+	uint8_t que_cnt;
 	struct qla2xxx_mq_chain *mq = ptr;
 	struct device_reg_25xxmq __iomem *reg;
 
@@ -363,9 +363,8 @@ qla25xx_copy_mq(struct qla_hw_data *ha, void *ptr, uint32_t **last_chain)
 	mq->type = __constant_htonl(DUMP_CHAIN_MQ);
 	mq->chain_size = __constant_htonl(sizeof(struct qla2xxx_mq_chain));
 
-	req_cnt = find_first_zero_bit(ha->req_qid_map, ha->max_queues);
-	rsp_cnt = find_first_zero_bit(ha->rsp_qid_map, ha->max_queues);
-	que_cnt = req_cnt > rsp_cnt ? req_cnt : rsp_cnt;
+	que_cnt = ha->max_req_queues > ha->max_rsp_queues ?
+		ha->max_req_queues : ha->max_rsp_queues;
 	mq->count = htonl(que_cnt);
 	for (cnt = 0; cnt < que_cnt; cnt++) {
 		reg = (struct device_reg_25xxmq *) ((void *)
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 645cfd9..57d659c 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -93,6 +93,7 @@
 #define LSD(x)	((uint32_t)((uint64_t)(x)))
 #define MSD(x)	((uint32_t)((((uint64_t)(x)) >> 16) >> 16))
 
+#define MAKE_HANDLE(x, y) ((uint32_t)((((uint32_t)(x)) << 16) | (uint32_t)(y)))
 
 /*
  * I/O register
@@ -179,6 +180,7 @@
 #define REQUEST_ENTRY_CNT_24XX		2048	/* Number of request entries. */
 #define RESPONSE_ENTRY_CNT_2100		64	/* Number of response entries.*/
 #define RESPONSE_ENTRY_CNT_2300		512	/* Number of response entries.*/
+#define RESPONSE_ENTRY_CNT_MQ		128	/* Number of response entries.*/
 
 struct req_que;
 
@@ -2008,7 +2010,8 @@ typedef struct vport_params {
 #define VP_RET_CODE_NOT_FOUND		6
 
 struct qla_hw_data;
-
+struct req_que;
+struct rsp_que;
 /*
  * ISP operations
  */
@@ -2030,10 +2033,9 @@ struct isp_operations {
 	void (*enable_intrs) (struct qla_hw_data *);
 	void (*disable_intrs) (struct qla_hw_data *);
 
-	int (*abort_command) (struct scsi_qla_host *, srb_t *,
-		struct req_que *);
-	int (*target_reset) (struct fc_port *, unsigned int);
-	int (*lun_reset) (struct fc_port *, unsigned int);
+	int (*abort_command) (srb_t *);
+	int (*target_reset) (struct fc_port *, unsigned int, int);
+	int (*lun_reset) (struct fc_port *, unsigned int, int);
 	int (*fabric_login) (struct scsi_qla_host *, uint16_t, uint8_t,
 		uint8_t, uint8_t, uint16_t *, uint8_t);
 	int (*fabric_logout) (struct scsi_qla_host *, uint16_t, uint8_t,
@@ -2079,7 +2081,6 @@ struct isp_operations {
 #define QLA_PCI_MSIX_CONTROL	0xa2
 
 struct scsi_qla_host;
-struct rsp_que;
 
 struct qla_msix_entry {
 	int have_irq;
@@ -2140,7 +2141,6 @@ struct qla_statistics {
 #define MBC_INITIALIZE_MULTIQ 0x1f
 #define QLA_QUE_PAGE 0X1000
 #define QLA_MQ_SIZE 32
-#define QLA_MAX_HOST_QUES 16
 #define QLA_MAX_QUEUES 256
 #define ISP_QUE_REG(ha, id) \
 	((ha->mqenable) ? \
@@ -2170,6 +2170,7 @@ struct rsp_que {
 	struct qla_hw_data *hw;
 	struct qla_msix_entry *msix;
 	struct req_que *req;
+	srb_t *status_srb; /* status continuation entry */
 };
 
 /* Request queue data structure */
@@ -2246,7 +2247,8 @@ struct qla_hw_data {
 	struct rsp_que **rsp_q_map;
 	unsigned long req_qid_map[(QLA_MAX_QUEUES / 8) / sizeof(unsigned long)];
 	unsigned long rsp_qid_map[(QLA_MAX_QUEUES / 8) / sizeof(unsigned long)];
-	uint16_t 	max_queues;
+	uint8_t 	max_req_queues;
+	uint8_t 	max_rsp_queues;
 	struct qla_npiv_entry *npiv_info;
 	uint16_t	nvram_npiv_size;
 
@@ -2532,6 +2534,7 @@ struct qla_hw_data {
 	uint16_t        num_vsans;      /* number of vsan created */
 	uint16_t        max_npiv_vports;        /* 63 or 125 per topoloty */
 	int             cur_vport_count;
+	uint16_t	flex_port_count;
 
 	struct qla_chip_state_84xx *cs84xx;
 	struct qla_statistics qla_stats;
@@ -2591,8 +2594,6 @@ typedef struct scsi_qla_host {
 #define SWITCH_FOUND		BIT_0
 #define DFLG_NO_CABLE		BIT_1
 
-	srb_t		*status_srb;	/* Status continuation entry. */
-
 	/* ISP configuration data. */
 	uint16_t	loop_id;		/* Host adapter loop id */
 
@@ -2648,7 +2649,7 @@ typedef struct scsi_qla_host {
 #define VP_ERR_FAB_LOGOUT	4
 #define VP_ERR_ADAP_NORESOURCES	5
 	struct qla_hw_data *hw;
-	int	req_ques[QLA_MAX_HOST_QUES];
+	struct req_que *req;
 } scsi_qla_host_t;
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 528913f..b12de01 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -67,6 +67,7 @@ extern int ql2xextended_error_logging;
 extern int ql2xqfullrampup;
 extern int ql2xiidmaenable;
 extern int ql2xmaxqueues;
+extern int ql2xmultique_tag;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -165,13 +166,13 @@ extern int
 qla2x00_issue_iocb(scsi_qla_host_t *, void *, dma_addr_t, size_t);
 
 extern int
-qla2x00_abort_command(scsi_qla_host_t *, srb_t *, struct req_que *);
+qla2x00_abort_command(srb_t *);
 
 extern int
-qla2x00_abort_target(struct fc_port *, unsigned int);
+qla2x00_abort_target(struct fc_port *, unsigned int, int);
 
 extern int
-qla2x00_lun_reset(struct fc_port *, unsigned int);
+qla2x00_lun_reset(struct fc_port *, unsigned int, int);
 
 extern int
 qla2x00_get_adapter_id(scsi_qla_host_t *, uint16_t *, uint8_t *, uint8_t *,
@@ -236,9 +237,11 @@ extern int
 qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
     dma_addr_t);
 
-extern int qla24xx_abort_command(scsi_qla_host_t *, srb_t *, struct req_que *);
-extern int qla24xx_abort_target(struct fc_port *, unsigned int);
-extern int qla24xx_lun_reset(struct fc_port *, unsigned int);
+extern int qla24xx_abort_command(srb_t *);
+extern int
+qla24xx_abort_target(struct fc_port *, unsigned int, int);
+extern int
+qla24xx_lun_reset(struct fc_port *, unsigned int, int);
 
 extern int
 qla2x00_system_error(scsi_qla_host_t *);
@@ -295,8 +298,8 @@ extern irqreturn_t qla2100_intr_handler(int, void *);
 extern irqreturn_t qla2300_intr_handler(int, void *);
 extern irqreturn_t qla24xx_intr_handler(int, void *);
 extern void qla2x00_process_response_queue(struct rsp_que *);
-extern void qla24xx_process_response_queue(struct rsp_que *);
-
+extern void
+qla24xx_process_response_queue(struct scsi_qla_host *, struct rsp_que *);
 extern int qla2x00_request_irqs(struct qla_hw_data *, struct rsp_que *);
 extern void qla2x00_free_irqs(scsi_qla_host_t *);
 
@@ -401,19 +404,21 @@ extern int qla25xx_request_irq(struct rsp_que *);
 extern int qla25xx_init_req_que(struct scsi_qla_host *, struct req_que *);
 extern int qla25xx_init_rsp_que(struct scsi_qla_host *, struct rsp_que *);
 extern int qla25xx_create_req_que(struct qla_hw_data *, uint16_t, uint8_t,
-	uint16_t, uint8_t, uint8_t);
+	uint16_t, int, uint8_t);
 extern int qla25xx_create_rsp_que(struct qla_hw_data *, uint16_t, uint8_t,
-	uint16_t);
+	uint16_t, int);
 extern int qla25xx_update_req_que(struct scsi_qla_host *, uint8_t, uint8_t);
 extern void qla2x00_init_response_q_entries(struct rsp_que *);
 extern int qla25xx_delete_req_que(struct scsi_qla_host *, struct req_que *);
 extern int qla25xx_delete_rsp_que(struct scsi_qla_host *, struct rsp_que *);
 extern int qla25xx_create_queues(struct scsi_qla_host *, uint8_t);
-extern int qla25xx_delete_queues(struct scsi_qla_host *, uint8_t);
+extern int qla25xx_delete_queues(struct scsi_qla_host *);
 extern uint16_t qla24xx_rd_req_reg(struct qla_hw_data *, uint16_t);
 extern uint16_t qla25xx_rd_req_reg(struct qla_hw_data *, uint16_t);
 extern void qla24xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla25xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla25xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla24xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
+extern struct scsi_qla_host * qla25xx_get_host(struct rsp_que *);
+
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9a343ec..059909c 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -786,7 +786,6 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
 		    sizeof(uint32_t);
 		if (ha->mqenable)
 			mq_size = sizeof(struct qla2xxx_mq_chain);
-
 		/* Allocate memory for Fibre Channel Event Buffer. */
 		if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha))
 			goto try_eft;
@@ -850,8 +849,7 @@ cont_alloc:
 	rsp_q_size = rsp->length * sizeof(response_t);
 
 	dump_size = offsetof(struct qla2xxx_fw_dump, isp);
-	dump_size += fixed_size + mem_size + req_q_size + rsp_q_size +
-	    eft_size;
+	dump_size += fixed_size + mem_size + req_q_size + rsp_q_size + eft_size;
 	ha->chain_offset = dump_size;
 	dump_size += mq_size + fce_size;
 
@@ -1013,12 +1011,14 @@ qla2x00_init_response_q_entries(struct rsp_que *rsp)
 	uint16_t cnt;
 	response_t *pkt;
 
+	rsp->ring_ptr = rsp->ring;
+	rsp->ring_index    = 0;
+	rsp->status_srb = NULL;
 	pkt = rsp->ring_ptr;
 	for (cnt = 0; cnt < rsp->length; cnt++) {
 		pkt->signature = RESPONSE_PROCESSED;
 		pkt++;
 	}
-
 }
 
 /**
@@ -1176,7 +1176,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha)
 		if (ha->flags.msix_enabled) {
 			msix = &ha->msix_entries[1];
 			DEBUG2_17(printk(KERN_INFO
-			"Reistering vector 0x%x for base que\n", msix->entry));
+			"Registering vector 0x%x for base que\n", msix->entry));
 			icb->msix = cpu_to_le16(msix->entry);
 		}
 		/* Use alternate PCI bus number */
@@ -1230,14 +1230,14 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
 	/* Clear outstanding commands array. */
-	for (que = 0; que < ha->max_queues; que++) {
+	for (que = 0; que < ha->max_req_queues; que++) {
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
-		for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
+		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
 			req->outstanding_cmds[cnt] = NULL;
 
-		req->current_outstanding_cmd = 0;
+		req->current_outstanding_cmd = 1;
 
 		/* Initialize firmware. */
 		req->ring_ptr  = req->ring;
@@ -1245,13 +1245,10 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 		req->cnt      = req->length;
 	}
 
-	for (que = 0; que < ha->max_queues; que++) {
+	for (que = 0; que < ha->max_rsp_queues; que++) {
 		rsp = ha->rsp_q_map[que];
 		if (!rsp)
 			continue;
-		rsp->ring_ptr = rsp->ring;
-		rsp->ring_index    = 0;
-
 		/* Initialize response queue entries */
 		qla2x00_init_response_q_entries(rsp);
 	}
@@ -3180,8 +3177,7 @@ qla2x00_loop_resync(scsi_qla_host_t *vha)
 {
 	int rval = QLA_SUCCESS;
 	uint32_t wait_time;
-	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = ha->req_q_map[vha->req_ques[0]];
+	struct req_que *req = vha->req;
 	struct rsp_que *rsp = req->rsp;
 
 	atomic_set(&vha->loop_state, LOOP_UPDATE);
@@ -3448,7 +3444,7 @@ qla25xx_init_queues(struct qla_hw_data *ha)
 	int ret = -1;
 	int i;
 
-	for (i = 1; i < ha->max_queues; i++) {
+	for (i = 1; i < ha->max_rsp_queues; i++) {
 		rsp = ha->rsp_q_map[i];
 		if (rsp) {
 			rsp->options &= ~BIT_0;
@@ -3462,6 +3458,8 @@ qla25xx_init_queues(struct qla_hw_data *ha)
 					"%s Rsp que:%d inited\n", __func__,
 						rsp->id));
 		}
+	}
+	for (i = 1; i < ha->max_req_queues; i++) {
 		req = ha->req_q_map[i];
 		if (req) {
 		/* Clear outstanding commands array. */
@@ -4165,7 +4163,7 @@ qla24xx_configure_vhba(scsi_qla_host_t *vha)
 	uint16_t mb[MAILBOX_REGISTER_COUNT];
 	struct qla_hw_data *ha = vha->hw;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
-	struct req_que *req = ha->req_q_map[vha->req_ques[0]];
+	struct req_que *req = vha->req;
 	struct rsp_que *rsp = req->rsp;
 
 	if (!vha->vp_idx)
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index a8abbb9..94b69d8 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -453,6 +453,7 @@ __qla2x00_marker(struct scsi_qla_host *vha, struct req_que *req,
 			mrk24->lun[2] = MSB(lun);
 			host_to_fcp_swap(mrk24->lun, sizeof(mrk24->lun));
 			mrk24->vp_index = vha->vp_idx;
+			mrk24->handle = MAKE_HANDLE(req->id, mrk24->handle);
 		} else {
 			SET_TARGET_ID(ha, mrk->target, loop_id);
 			mrk->lun = cpu_to_le16(lun);
@@ -531,9 +532,6 @@ qla2x00_req_pkt(struct scsi_qla_host *vha, struct req_que *req,
 			for (cnt = 0; cnt < REQUEST_ENTRY_SIZE / 4; cnt++)
 				*dword_ptr++ = 0;
 
-			/* Set system defined field. */
-			pkt->sys_define = (uint8_t)req->ring_index;
-
 			/* Set entry count. */
 			pkt->entry_count = 1;
 
@@ -724,19 +722,14 @@ qla24xx_start_scsi(srb_t *sp)
 	struct scsi_cmnd *cmd = sp->cmd;
 	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
-	uint16_t que_id;
 
 	/* Setup device pointers. */
 	ret = 0;
-	que_id = vha->req_ques[0];
 
-	req = ha->req_q_map[que_id];
+	req = vha->req;
+	rsp = ha->rsp_q_map[0];
 	sp->que = req;
 
-	if (req->rsp)
-		rsp = req->rsp;
-	else
-		rsp = ha->rsp_q_map[que_id];
 	/* So we know we haven't pci_map'ed anything yet */
 	tot_dsds = 0;
 
@@ -794,7 +787,7 @@ qla24xx_start_scsi(srb_t *sp)
 	req->cnt -= req_cnt;
 
 	cmd_pkt = (struct cmd_type_7 *)req->ring_ptr;
-	cmd_pkt->handle = handle;
+	cmd_pkt->handle = MAKE_HANDLE(req->id, handle);
 
 	/* Zero out remaining portion of packet. */
 	/*    tagged queuing modifier -- default is TSK_SIMPLE (0). */
@@ -823,6 +816,8 @@ qla24xx_start_scsi(srb_t *sp)
 
 	/* Set total data segment count. */
 	cmd_pkt->entry_count = (uint8_t)req_cnt;
+	/* Specify response queue number where completion should happen */
+	cmd_pkt->entry_status = (uint8_t) rsp->id;
 	wmb();
 
 	/* Adjust ring index. */
@@ -842,7 +837,7 @@ qla24xx_start_scsi(srb_t *sp)
 	/* Manage unprocessed RIO/ZIO commands in response queue. */
 	if (vha->flags.process_response_queue &&
 		rsp->ring_ptr->signature != RESPONSE_PROCESSED)
-		qla24xx_process_response_queue(rsp);
+		qla24xx_process_response_queue(vha, rsp);
 
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	return QLA_SUCCESS;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d049818..c8e906c 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -13,10 +13,9 @@ static void qla2x00_mbx_completion(scsi_qla_host_t *, uint16_t);
 static void qla2x00_process_completed_request(struct scsi_qla_host *,
 	struct req_que *, uint32_t);
 static void qla2x00_status_entry(scsi_qla_host_t *, struct rsp_que *, void *);
-static void qla2x00_status_cont_entry(scsi_qla_host_t *, sts_cont_entry_t *);
+static void qla2x00_status_cont_entry(struct rsp_que *, sts_cont_entry_t *);
 static void qla2x00_error_entry(scsi_qla_host_t *, struct rsp_que *,
 	sts_entry_t *);
-static struct scsi_qla_host *qla2x00_get_rsp_host(struct rsp_que *);
 
 /**
  * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
@@ -51,7 +50,7 @@ qla2100_intr_handler(int irq, void *dev_id)
 	status = 0;
 
 	spin_lock(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	for (iter = 50; iter--; ) {
 		hccr = RD_REG_WORD(&reg->hccr);
 		if (hccr & HCCR_RISC_PAUSE) {
@@ -147,7 +146,7 @@ qla2300_intr_handler(int irq, void *dev_id)
 	status = 0;
 
 	spin_lock(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	for (iter = 50; iter--; ) {
 		stat = RD_REG_DWORD(&reg->u.isp2300.host_status);
 		if (stat & HSR_RISC_PAUSED) {
@@ -685,7 +684,7 @@ skip_rio:
 		    vha->host_no));
 
 		if (IS_FWI2_CAPABLE(ha))
-			qla24xx_process_response_queue(rsp);
+			qla24xx_process_response_queue(vha, rsp);
 		else
 			qla2x00_process_response_queue(rsp);
 		break;
@@ -766,7 +765,7 @@ qla2x00_adjust_sdev_qdepth_up(struct scsi_device *sdev, void *data)
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = NULL;
 
-	req = ha->req_q_map[vha->req_ques[0]];
+	req = vha->req;
 	if (!req)
 		return;
 	if (req->max_q_depth <= sdev->queue_depth)
@@ -858,8 +857,8 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		qla2x00_ramp_up_queue_depth(vha, req, sp);
 		qla2x00_sp_compl(ha, sp);
 	} else {
-		DEBUG2(printk("scsi(%ld): Invalid ISP SCSI completion handle\n",
-		    vha->host_no));
+		DEBUG2(printk("scsi(%ld) Req:%d: Invalid ISP SCSI completion"
+			" handle(%d)\n", vha->host_no, req->id, index));
 		qla_printk(KERN_WARNING, ha,
 		    "Invalid ISP SCSI completion handle\n");
 
@@ -881,7 +880,7 @@ qla2x00_process_response_queue(struct rsp_que *rsp)
 	uint16_t        handle_cnt;
 	uint16_t        cnt;
 
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 
 	if (!vha->flags.online)
 		return;
@@ -926,7 +925,7 @@ qla2x00_process_response_queue(struct rsp_que *rsp)
 			}
 			break;
 		case STATUS_CONT_TYPE:
-			qla2x00_status_cont_entry(vha, (sts_cont_entry_t *)pkt);
+			qla2x00_status_cont_entry(rsp, (sts_cont_entry_t *)pkt);
 			break;
 		default:
 			/* Type Not Supported. */
@@ -945,7 +944,8 @@ qla2x00_process_response_queue(struct rsp_que *rsp)
 }
 
 static inline void
-qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t sense_len)
+qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t sense_len,
+	struct rsp_que *rsp)
 {
 	struct scsi_cmnd *cp = sp->cmd;
 
@@ -962,7 +962,7 @@ qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t sense_len)
 	sp->request_sense_ptr += sense_len;
 	sp->request_sense_length -= sense_len;
 	if (sp->request_sense_length != 0)
-		sp->fcport->vha->status_srb = sp;
+		rsp->status_srb = sp;
 
 	DEBUG5(printk("%s(): Check condition Sense data, scsi(%ld:%d:%d:%d) "
 	    "cmd=%p pid=%ld\n", __func__, sp->fcport->vha->host_no,
@@ -992,7 +992,9 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 	uint32_t	sense_len, rsp_info_len, resid_len, fw_resid_len;
 	uint8_t		*rsp_info, *sense_data;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = rsp->req;
+	uint32_t handle;
+	uint16_t que;
+	struct req_que *req;
 
 	sts = (sts_entry_t *) pkt;
 	sts24 = (struct sts_entry_24xx *) pkt;
@@ -1003,18 +1005,20 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		comp_status = le16_to_cpu(sts->comp_status);
 		scsi_status = le16_to_cpu(sts->scsi_status) & SS_MASK;
 	}
-
+	handle = (uint32_t) LSW(sts->handle);
+	que = MSW(sts->handle);
+	req = ha->req_q_map[que];
 	/* Fast path completion. */
 	if (comp_status == CS_COMPLETE && scsi_status == 0) {
-		qla2x00_process_completed_request(vha, req, sts->handle);
+		qla2x00_process_completed_request(vha, req, handle);
 
 		return;
 	}
 
 	/* Validate handle. */
-	if (sts->handle < MAX_OUTSTANDING_COMMANDS) {
-		sp = req->outstanding_cmds[sts->handle];
-		req->outstanding_cmds[sts->handle] = NULL;
+	if (handle < MAX_OUTSTANDING_COMMANDS) {
+		sp = req->outstanding_cmds[handle];
+		req->outstanding_cmds[handle] = NULL;
 	} else
 		sp = NULL;
 
@@ -1030,7 +1034,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 	cp = sp->cmd;
 	if (cp == NULL) {
 		DEBUG2(printk("scsi(%ld): Command already returned back to OS "
-		    "pkt->handle=%d sp=%p.\n", vha->host_no, sts->handle, sp));
+		    "pkt->handle=%d sp=%p.\n", vha->host_no, handle, sp));
 		qla_printk(KERN_WARNING, ha,
 		    "Command is NULL: already returned to OS (sp=%p)\n", sp);
 
@@ -1133,7 +1137,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		if (!(scsi_status & SS_SENSE_LEN_VALID))
 			break;
 
-		qla2x00_handle_sense(sp, sense_data, sense_len);
+		qla2x00_handle_sense(sp, sense_data, sense_len, rsp);
 		break;
 
 	case CS_DATA_UNDERRUN:
@@ -1192,7 +1196,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 			if (!(scsi_status & SS_SENSE_LEN_VALID))
 				break;
 
-			qla2x00_handle_sense(sp, sense_data, sense_len);
+			qla2x00_handle_sense(sp, sense_data, sense_len, rsp);
 		} else {
 			/*
 			 * If RISC reports underrun and target does not report
@@ -1334,7 +1338,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 	}
 
 	/* Place command on done queue. */
-	if (vha->status_srb == NULL)
+	if (rsp->status_srb == NULL)
 		qla2x00_sp_compl(ha, sp);
 }
 
@@ -1346,11 +1350,11 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
  * Extended sense data.
  */
 static void
-qla2x00_status_cont_entry(scsi_qla_host_t *vha, sts_cont_entry_t *pkt)
+qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 {
 	uint8_t		sense_sz = 0;
-	struct qla_hw_data *ha = vha->hw;
-	srb_t		*sp = vha->status_srb;
+	struct qla_hw_data *ha = rsp->hw;
+	srb_t		*sp = rsp->status_srb;
 	struct scsi_cmnd *cp;
 
 	if (sp != NULL && sp->request_sense_length != 0) {
@@ -1362,7 +1366,7 @@ qla2x00_status_cont_entry(scsi_qla_host_t *vha, sts_cont_entry_t *pkt)
 			    "cmd is NULL: already returned to OS (sp=%p)\n",
 			    sp);
 
-			vha->status_srb = NULL;
+			rsp->status_srb = NULL;
 			return;
 		}
 
@@ -1383,7 +1387,7 @@ qla2x00_status_cont_entry(scsi_qla_host_t *vha, sts_cont_entry_t *pkt)
 
 		/* Place command on done queue. */
 		if (sp->request_sense_length == 0) {
-			vha->status_srb = NULL;
+			rsp->status_srb = NULL;
 			qla2x00_sp_compl(ha, sp);
 		}
 	}
@@ -1399,7 +1403,9 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 {
 	srb_t *sp;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = rsp->req;
+	uint32_t handle = LSW(pkt->handle);
+	uint16_t que = MSW(pkt->handle);
+	struct req_que *req = ha->req_q_map[que];
 #if defined(QL_DEBUG_LEVEL_2)
 	if (pkt->entry_status & RF_INV_E_ORDER)
 		qla_printk(KERN_ERR, ha, "%s: Invalid Entry Order\n", __func__);
@@ -1417,14 +1423,14 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 #endif
 
 	/* Validate handle. */
-	if (pkt->handle < MAX_OUTSTANDING_COMMANDS)
-		sp = req->outstanding_cmds[pkt->handle];
+	if (handle < MAX_OUTSTANDING_COMMANDS)
+		sp = req->outstanding_cmds[handle];
 	else
 		sp = NULL;
 
 	if (sp) {
 		/* Free outstanding command slot. */
-		req->outstanding_cmds[pkt->handle] = NULL;
+		req->outstanding_cmds[handle] = NULL;
 
 		/* Bad payload or header */
 		if (pkt->entry_status &
@@ -1486,13 +1492,10 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
  * qla24xx_process_response_queue() - Process response queue entries.
  * @ha: SCSI driver HA context
  */
-void
-qla24xx_process_response_queue(struct rsp_que *rsp)
+void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+	struct rsp_que *rsp)
 {
 	struct sts_entry_24xx *pkt;
-	struct scsi_qla_host *vha;
-
-	vha = qla2x00_get_rsp_host(rsp);
 
 	if (!vha->flags.online)
 		return;
@@ -1523,7 +1526,7 @@ qla24xx_process_response_queue(struct rsp_que *rsp)
 			qla2x00_status_entry(vha, rsp, pkt);
 			break;
 		case STATUS_CONT_TYPE:
-			qla2x00_status_cont_entry(vha, (sts_cont_entry_t *)pkt);
+			qla2x00_status_cont_entry(rsp, (sts_cont_entry_t *)pkt);
 			break;
 		case VP_RPT_ID_IOCB_TYPE:
 			qla24xx_report_id_acquisition(vha,
@@ -1626,7 +1629,7 @@ qla24xx_intr_handler(int irq, void *dev_id)
 	status = 0;
 
 	spin_lock(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	for (iter = 50; iter--; ) {
 		stat = RD_REG_DWORD(&reg->host_status);
 		if (stat & HSRX_RISC_PAUSED) {
@@ -1664,7 +1667,7 @@ qla24xx_intr_handler(int irq, void *dev_id)
 			break;
 		case 0x13:
 		case 0x14:
-			qla24xx_process_response_queue(rsp);
+			qla24xx_process_response_queue(vha, rsp);
 			break;
 		default:
 			DEBUG2(printk("scsi(%ld): Unrecognized interrupt type "
@@ -1692,6 +1695,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
 	struct device_reg_24xx __iomem *reg;
+	struct scsi_qla_host *vha;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -1704,7 +1708,8 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 
 	spin_lock_irq(&ha->hardware_lock);
 
-	qla24xx_process_response_queue(rsp);
+	vha = qla25xx_get_host(rsp);
+	qla24xx_process_response_queue(vha, rsp);
 	WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_INT);
 
 	spin_unlock_irq(&ha->hardware_lock);
@@ -1713,31 +1718,6 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 }
 
 static irqreturn_t
-qla25xx_msix_rsp_q(int irq, void *dev_id)
-{
-	struct qla_hw_data *ha;
-	struct rsp_que *rsp;
-	struct device_reg_24xx __iomem *reg;
-
-	rsp = (struct rsp_que *) dev_id;
-	if (!rsp) {
-		printk(KERN_INFO
-			"%s(): NULL response queue pointer\n", __func__);
-		return IRQ_NONE;
-	}
-	ha = rsp->hw;
-	reg = &ha->iobase->isp24;
-
-	spin_lock_irq(&ha->hardware_lock);
-
-	qla24xx_process_response_queue(rsp);
-
-	spin_unlock_irq(&ha->hardware_lock);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t
 qla24xx_msix_default(int irq, void *dev_id)
 {
 	scsi_qla_host_t	*vha;
@@ -1760,7 +1740,7 @@ qla24xx_msix_default(int irq, void *dev_id)
 	status = 0;
 
 	spin_lock_irq(&ha->hardware_lock);
-	vha = qla2x00_get_rsp_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	do {
 		stat = RD_REG_DWORD(&reg->host_status);
 		if (stat & HSRX_RISC_PAUSED) {
@@ -1798,7 +1778,7 @@ qla24xx_msix_default(int irq, void *dev_id)
 			break;
 		case 0x13:
 		case 0x14:
-			qla24xx_process_response_queue(rsp);
+			qla24xx_process_response_queue(vha, rsp);
 			break;
 		default:
 			DEBUG2(printk("scsi(%ld): Unrecognized interrupt type "
@@ -1822,31 +1802,13 @@ qla24xx_msix_default(int irq, void *dev_id)
 /* Interrupt handling helpers. */
 
 struct qla_init_msix_entry {
-	uint16_t entry;
-	uint16_t index;
 	const char *name;
 	irq_handler_t handler;
 };
 
-static struct qla_init_msix_entry base_queue = {
-	.entry = 0,
-	.index = 0,
-	.name = "qla2xxx (default)",
-	.handler = qla24xx_msix_default,
-};
-
-static struct qla_init_msix_entry base_rsp_queue = {
-	.entry = 1,
-	.index = 1,
-	.name = "qla2xxx (rsp_q)",
-	.handler = qla24xx_msix_rsp_q,
-};
-
-static struct qla_init_msix_entry multi_rsp_queue = {
-	.entry = 1,
-	.index = 1,
-	.name = "qla2xxx (multi_q)",
-	.handler = qla25xx_msix_rsp_q,
+static struct qla_init_msix_entry msix_entries[2] = {
+	{ "qla2xxx (default)", qla24xx_msix_default },
+	{ "qla2xxx (rsp_q)", qla24xx_msix_rsp_q },
 };
 
 static void
@@ -1873,7 +1835,6 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
 	int i, ret;
 	struct msix_entry *entries;
 	struct qla_msix_entry *qentry;
-	struct qla_init_msix_entry *msix_queue;
 
 	entries = kzalloc(sizeof(struct msix_entry) * ha->msix_count,
 					GFP_KERNEL);
@@ -1900,7 +1861,7 @@ msix_failed:
 				ha->msix_count, ret);
 			goto msix_out;
 		}
-		ha->max_queues = ha->msix_count - 1;
+		ha->max_rsp_queues = ha->msix_count - 1;
 	}
 	ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) *
 				ha->msix_count, GFP_KERNEL);
@@ -1918,45 +1879,27 @@ msix_failed:
 		qentry->rsp = NULL;
 	}
 
-	/* Enable MSI-X for AENs for queue 0 */
-	qentry = &ha->msix_entries[0];
-	ret = request_irq(qentry->vector, base_queue.handler, 0,
-					base_queue.name, rsp);
-	if (ret) {
-		qla_printk(KERN_WARNING, ha,
+	/* Enable MSI-X vectors for the base queue */
+	for (i = 0; i < 2; i++) {
+		qentry = &ha->msix_entries[i];
+		ret = request_irq(qentry->vector, msix_entries[i].handler,
+					0, msix_entries[i].name, rsp);
+		if (ret) {
+			qla_printk(KERN_WARNING, ha,
 			"MSI-X: Unable to register handler -- %x/%d.\n",
 			qentry->vector, ret);
-		qla24xx_disable_msix(ha);
-		goto msix_out;
+			qla24xx_disable_msix(ha);
+			ha->mqenable = 0;
+			goto msix_out;
+		}
+		qentry->have_irq = 1;
+		qentry->rsp = rsp;
+		rsp->msix = qentry;
 	}
-	qentry->have_irq = 1;
-	qentry->rsp = rsp;
 
 	/* Enable MSI-X vector for response queue update for queue 0 */
-	if (ha->max_queues > 1 && ha->mqiobase) {
+	if (ha->mqiobase &&  (ha->max_rsp_queues > 1 || ha->max_req_queues > 1))
 		ha->mqenable = 1;
-		msix_queue = &multi_rsp_queue;
-		qla_printk(KERN_INFO, ha,
-				"MQ enabled, Number of Queue Resources: %d \n",
-				ha->max_queues);
-	} else {
-		ha->mqenable = 0;
-		msix_queue = &base_rsp_queue;
-	}
-
-	qentry = &ha->msix_entries[1];
-	ret = request_irq(qentry->vector, msix_queue->handler, 0,
-						msix_queue->name, rsp);
-	if (ret) {
-		qla_printk(KERN_WARNING, ha,
-			"MSI-X: Unable to register handler -- %x/%d.\n",
-			qentry->vector, ret);
-		qla24xx_disable_msix(ha);
-		ha->mqenable = 0;
-		goto msix_out;
-	}
-	qentry->have_irq = 1;
-	qentry->rsp = rsp;
 
 msix_out:
 	kfree(entries);
@@ -2063,35 +2006,11 @@ qla2x00_free_irqs(scsi_qla_host_t *vha)
 	}
 }
 
-static struct scsi_qla_host *
-qla2x00_get_rsp_host(struct rsp_que *rsp)
-{
-	srb_t *sp;
-	struct qla_hw_data *ha = rsp->hw;
-	struct scsi_qla_host *vha = NULL;
-	struct sts_entry_24xx *pkt;
-	struct req_que *req;
-
-	if (rsp->id) {
-		pkt = (struct sts_entry_24xx *) rsp->ring_ptr;
-		req = rsp->req;
-		if (pkt && pkt->handle < MAX_OUTSTANDING_COMMANDS) {
-			sp = req->outstanding_cmds[pkt->handle];
-			if (sp)
-				vha = sp->fcport->vha;
-		}
-	}
-	if (!vha)
-	/* handle it in base queue */
-		vha = pci_get_drvdata(ha->pdev);
-
-	return vha;
-}
 
 int qla25xx_request_irq(struct rsp_que *rsp)
 {
 	struct qla_hw_data *ha = rsp->hw;
-	struct qla_init_msix_entry *intr = &multi_rsp_queue;
+	struct qla_init_msix_entry *intr = &msix_entries[2];
 	struct qla_msix_entry *msix = rsp->msix;
 	int ret;
 
@@ -2106,3 +2025,30 @@ int qla25xx_request_irq(struct rsp_que *rsp)
 	msix->rsp = rsp;
 	return ret;
 }
+
+struct scsi_qla_host *
+qla25xx_get_host(struct rsp_que *rsp)
+{
+	srb_t *sp;
+	struct qla_hw_data *ha = rsp->hw;
+	struct scsi_qla_host *vha = NULL;
+	struct sts_entry_24xx *pkt;
+	struct req_que *req;
+	uint16_t que;
+	uint32_t handle;
+
+	pkt = (struct sts_entry_24xx *) rsp->ring_ptr;
+	que = MSW(pkt->handle);
+	handle = (uint32_t) LSW(pkt->handle);
+	req = ha->req_q_map[que];
+	if (handle < MAX_OUTSTANDING_COMMANDS) {
+		sp = req->outstanding_cmds[handle];
+		if (sp)
+			return  sp->fcport->vha;
+		else
+			goto base_que;
+	}
+base_que:
+	vha = pci_get_drvdata(ha->pdev);
+	return vha;
+}
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 4f7e94c..bfdc89f 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -748,20 +748,20 @@ qla2x00_issue_iocb(scsi_qla_host_t *vha, void *buffer, dma_addr_t phys_addr,
  *	Kernel context.
  */
 int
-qla2x00_abort_command(scsi_qla_host_t *vha, srb_t *sp, struct req_que *req)
+qla2x00_abort_command(srb_t *sp)
 {
 	unsigned long   flags = 0;
-	fc_port_t	*fcport;
 	int		rval;
 	uint32_t	handle = 0;
 	mbx_cmd_t	mc;
 	mbx_cmd_t	*mcp = &mc;
+	fc_port_t	*fcport = sp->fcport;
+	scsi_qla_host_t *vha = fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req = vha->req;
 
 	DEBUG11(printk("qla2x00_abort_command(%ld): entered.\n", vha->host_no));
 
-	fcport = sp->fcport;
-
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) {
 		if (req->outstanding_cmds[handle] == sp)
@@ -800,7 +800,7 @@ qla2x00_abort_command(scsi_qla_host_t *vha, srb_t *sp, struct req_que *req)
 }
 
 int
-qla2x00_abort_target(struct fc_port *fcport, unsigned int l)
+qla2x00_abort_target(struct fc_port *fcport, unsigned int l, int tag)
 {
 	int rval, rval2;
 	mbx_cmd_t  mc;
@@ -813,8 +813,8 @@ qla2x00_abort_target(struct fc_port *fcport, unsigned int l)
 
 	l = l;
 	vha = fcport->vha;
-	req = vha->hw->req_q_map[0];
-	rsp = vha->hw->rsp_q_map[0];
+	req = vha->hw->req_q_map[tag];
+	rsp = vha->hw->rsp_q_map[tag];
 	mcp->mb[0] = MBC_ABORT_TARGET;
 	mcp->out_mb = MBX_9|MBX_2|MBX_1|MBX_0;
 	if (HAS_EXTENDED_IDS(vha->hw)) {
@@ -850,7 +850,7 @@ qla2x00_abort_target(struct fc_port *fcport, unsigned int l)
 }
 
 int
-qla2x00_lun_reset(struct fc_port *fcport, unsigned int l)
+qla2x00_lun_reset(struct fc_port *fcport, unsigned int l, int tag)
 {
 	int rval, rval2;
 	mbx_cmd_t  mc;
@@ -862,8 +862,8 @@ qla2x00_lun_reset(struct fc_port *fcport, unsigned int l)
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, fcport->vha->host_no));
 
 	vha = fcport->vha;
-	req = vha->hw->req_q_map[0];
-	rsp = vha->hw->rsp_q_map[0];
+	req = vha->hw->req_q_map[tag];
+	rsp = vha->hw->rsp_q_map[tag];
 	mcp->mb[0] = MBC_LUN_RESET;
 	mcp->out_mb = MBX_9|MBX_3|MBX_2|MBX_1|MBX_0;
 	if (HAS_EXTENDED_IDS(vha->hw))
@@ -1492,9 +1492,14 @@ qla24xx_login_fabric(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t domain,
 	dma_addr_t	lg_dma;
 	uint32_t	iop[2];
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req;
+	struct rsp_que *rsp;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
+	req = vha->req;
+	rsp = req->rsp;
+
 	lg = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &lg_dma);
 	if (lg == NULL) {
 		DEBUG2_3(printk("%s(%ld): failed to allocate Login IOCB.\n",
@@ -1505,6 +1510,7 @@ qla24xx_login_fabric(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t domain,
 
 	lg->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	lg->entry_count = 1;
+	lg->handle = MAKE_HANDLE(req->id, lg->handle);
 	lg->nport_handle = cpu_to_le16(loop_id);
 	lg->control_flags = __constant_cpu_to_le16(LCF_COMMAND_PLOGI);
 	if (opt & BIT_0)
@@ -1753,6 +1759,8 @@ qla24xx_fabric_logout(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t domain,
 	struct logio_entry_24xx *lg;
 	dma_addr_t	lg_dma;
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req;
+	struct rsp_que *rsp;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
@@ -1764,8 +1772,14 @@ qla24xx_fabric_logout(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t domain,
 	}
 	memset(lg, 0, sizeof(struct logio_entry_24xx));
 
+	if (ql2xmaxqueues > 1)
+		req = ha->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
 	lg->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	lg->entry_count = 1;
+	lg->handle = MAKE_HANDLE(req->id, lg->handle);
 	lg->nport_handle = cpu_to_le16(loop_id);
 	lg->control_flags =
 	    __constant_cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
@@ -2204,21 +2218,21 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
 }
 
 int
-qla24xx_abort_command(scsi_qla_host_t *vha, srb_t *sp, struct req_que *req)
+qla24xx_abort_command(srb_t *sp)
 {
 	int		rval;
-	fc_port_t	*fcport;
 	unsigned long   flags = 0;
 
 	struct abort_entry_24xx *abt;
 	dma_addr_t	abt_dma;
 	uint32_t	handle;
+	fc_port_t	*fcport = sp->fcport;
+	struct scsi_qla_host *vha = fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
+	struct req_que *req = sp->que;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
-	fcport = sp->fcport;
-
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) {
 		if (req->outstanding_cmds[handle] == sp)
@@ -2240,6 +2254,7 @@ qla24xx_abort_command(scsi_qla_host_t *vha, srb_t *sp, struct req_que *req)
 
 	abt->entry_type = ABORT_IOCB_TYPE;
 	abt->entry_count = 1;
+	abt->handle = MAKE_HANDLE(req->id, abt->handle);
 	abt->nport_handle = cpu_to_le16(fcport->loop_id);
 	abt->handle_to_abort = handle;
 	abt->port_id[0] = fcport->d_id.b.al_pa;
@@ -2281,7 +2296,7 @@ struct tsk_mgmt_cmd {
 
 static int
 __qla24xx_issue_tmf(char *name, uint32_t type, struct fc_port *fcport,
-    unsigned int l)
+    unsigned int l, int tag)
 {
 	int		rval, rval2;
 	struct tsk_mgmt_cmd *tsk;
@@ -2295,8 +2310,8 @@ __qla24xx_issue_tmf(char *name, uint32_t type, struct fc_port *fcport,
 
 	vha = fcport->vha;
 	ha = vha->hw;
-	req = ha->req_q_map[0];
-	rsp = ha->rsp_q_map[0];
+	req = vha->req;
+	rsp = req->rsp;
 	tsk = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &tsk_dma);
 	if (tsk == NULL) {
 		DEBUG2_3(printk("%s(%ld): failed to allocate Task Management "
@@ -2307,6 +2322,7 @@ __qla24xx_issue_tmf(char *name, uint32_t type, struct fc_port *fcport,
 
 	tsk->p.tsk.entry_type = TSK_MGMT_IOCB_TYPE;
 	tsk->p.tsk.entry_count = 1;
+	tsk->p.tsk.handle = MAKE_HANDLE(req->id, tsk->p.tsk.handle);
 	tsk->p.tsk.nport_handle = cpu_to_le16(fcport->loop_id);
 	tsk->p.tsk.timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
 	tsk->p.tsk.control_flags = cpu_to_le32(type);
@@ -2353,15 +2369,15 @@ __qla24xx_issue_tmf(char *name, uint32_t type, struct fc_port *fcport,
 }
 
 int
-qla24xx_abort_target(struct fc_port *fcport, unsigned int l)
+qla24xx_abort_target(struct fc_port *fcport, unsigned int l, int tag)
 {
-	return __qla24xx_issue_tmf("Target", TCF_TARGET_RESET, fcport, l);
+	return __qla24xx_issue_tmf("Target", TCF_TARGET_RESET, fcport, l, tag);
 }
 
 int
-qla24xx_lun_reset(struct fc_port *fcport, unsigned int l)
+qla24xx_lun_reset(struct fc_port *fcport, unsigned int l, int tag)
 {
-	return __qla24xx_issue_tmf("Lun", TCF_LUN_RESET, fcport, l);
+	return __qla24xx_issue_tmf("Lun", TCF_LUN_RESET, fcport, l, tag);
 }
 
 int
@@ -3150,6 +3166,8 @@ qla25xx_init_req_que(struct scsi_qla_host *vha, struct req_que *req)
 		WRT_REG_DWORD(&reg->req_q_in, 0);
 		WRT_REG_DWORD(&reg->req_q_out, 0);
 	}
+	req->req_q_in = &reg->req_q_in;
+	req->req_q_out = &reg->req_q_out;
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -3176,7 +3194,6 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp)
 	mcp->mb[6] = MSW(MSD(rsp->dma));
 	mcp->mb[7] = LSW(MSD(rsp->dma));
 	mcp->mb[5] = rsp->length;
-	mcp->mb[11] = rsp->vp_idx;
 	mcp->mb[14] = rsp->msix->entry;
 	mcp->mb[13] = rsp->rid;
 
@@ -3188,7 +3205,7 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp)
 	mcp->mb[8] = 0;
 	/* que out ptr index */
 	mcp->mb[9] = 0;
-	mcp->out_mb = MBX_14|MBX_13|MBX_12|MBX_11|MBX_10|MBX_9|MBX_8|MBX_7
+	mcp->out_mb = MBX_14|MBX_13|MBX_9|MBX_8|MBX_7
 			|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0;
 	mcp->in_mb = MBX_0;
 	mcp->flags = MBX_DMA_OUT;
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 51716c7..9c08479 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -398,9 +398,8 @@ qla24xx_create_vhost(struct fc_vport *fc_vport)
 
 	qla2x00_start_timer(vha, qla2x00_timer, WATCH_INTERVAL);
 
-	memset(vha->req_ques, 0, sizeof(vha->req_ques));
-	vha->req_ques[0] = ha->req_q_map[0]->id;
-	host->can_queue = ha->req_q_map[0]->length + 128;
+	vha->req = base_vha->req;
+	host->can_queue = base_vha->req->length + 128;
 	host->this_id = 255;
 	host->cmd_per_lun = 3;
 	host->max_cmd_len = MAX_CMDSZ;
@@ -515,76 +514,53 @@ int qla25xx_update_req_que(struct scsi_qla_host *vha, uint8_t que, uint8_t qos)
 
 /* Delete all queues for a given vhost */
 int
-qla25xx_delete_queues(struct scsi_qla_host *vha, uint8_t que_no)
+qla25xx_delete_queues(struct scsi_qla_host *vha)
 {
 	int cnt, ret = 0;
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (que_no) {
-	/* Delete request queue */
-		req = ha->req_q_map[que_no];
+	/* Delete request queues */
+	for (cnt = 1; cnt < ha->max_req_queues; cnt++) {
+		req = ha->req_q_map[cnt];
 		if (req) {
-			rsp = req->rsp;
 			ret = qla25xx_delete_req_que(vha, req);
 			if (ret != QLA_SUCCESS) {
 				qla_printk(KERN_WARNING, ha,
-				"Couldn't delete req que %d\n", req->id);
+				"Couldn't delete req que %d\n",
+				req->id);
 				return ret;
 			}
-			/* Delete associated response queue */
-			if (rsp) {
-				ret = qla25xx_delete_rsp_que(vha, rsp);
-				if (ret != QLA_SUCCESS) {
-					qla_printk(KERN_WARNING, ha,
-						"Couldn't delete rsp que %d\n",
-						rsp->id);
-					return ret;
-				}
-			}
 		}
-	} else {  /* delete all queues of this host */
-		for (cnt = 0; cnt < QLA_MAX_HOST_QUES; cnt++) {
-			/* Delete request queues */
-			req = ha->req_q_map[vha->req_ques[cnt]];
-			if (req && req->id) {
-				rsp = req->rsp;
-				ret = qla25xx_delete_req_que(vha, req);
-				if (ret != QLA_SUCCESS) {
-					qla_printk(KERN_WARNING, ha,
-						"Couldn't delete req que %d\n",
-						vha->req_ques[cnt]);
-					return ret;
-				}
-				vha->req_ques[cnt] = ha->req_q_map[0]->id;
-			/* Delete associated response queue */
-				if (rsp && rsp->id) {
-					ret = qla25xx_delete_rsp_que(vha, rsp);
-					if (ret != QLA_SUCCESS) {
-						qla_printk(KERN_WARNING, ha,
-						"Couldn't delete rsp que %d\n",
-						rsp->id);
-						return ret;
-					}
-				}
+	}
+
+	/* Delete response queues */
+	for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) {
+		rsp = ha->rsp_q_map[cnt];
+		if (rsp) {
+			ret = qla25xx_delete_rsp_que(vha, rsp);
+			if (ret != QLA_SUCCESS) {
+				qla_printk(KERN_WARNING, ha,
+				"Couldn't delete rsp que %d\n",
+				rsp->id);
+				return ret;
 			}
 		}
 	}
-	qla_printk(KERN_INFO, ha, "Queues deleted for vport:%d\n",
-		vha->vp_idx);
 	return ret;
 }
 
 int
 qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
-	uint8_t vp_idx, uint16_t rid, uint8_t rsp_que, uint8_t qos)
+	uint8_t vp_idx, uint16_t rid, int rsp_que, uint8_t qos)
 {
 	int ret = 0;
 	struct req_que *req = NULL;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 	uint16_t que_id = 0;
 	device_reg_t __iomem *reg;
+	uint32_t cnt;
 
 	req = kzalloc(sizeof(struct req_que), GFP_KERNEL);
 	if (req == NULL) {
@@ -604,8 +580,8 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
 	}
 
 	mutex_lock(&ha->vport_lock);
-	que_id = find_first_zero_bit(ha->req_qid_map, ha->max_queues);
-	if (que_id >= ha->max_queues) {
+	que_id = find_first_zero_bit(ha->req_qid_map, ha->max_req_queues);
+	if (que_id >= ha->max_req_queues) {
 		mutex_unlock(&ha->vport_lock);
 		qla_printk(KERN_INFO, ha, "No resources to create "
 			 "additional request queue\n");
@@ -617,10 +593,10 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
 	req->vp_idx = vp_idx;
 	req->qos = qos;
 
-	if (ha->rsp_q_map[rsp_que]) {
+	if (rsp_que < 0)
+		req->rsp = NULL;
+	else
 		req->rsp = ha->rsp_q_map[rsp_que];
-		req->rsp->req = req;
-	}
 	/* Use alternate PCI bus number */
 	if (MSB(req->rid))
 		options |= BIT_4;
@@ -628,13 +604,16 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
 	if (LSB(req->rid))
 		options |= BIT_5;
 	req->options = options;
+
+	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
+		req->outstanding_cmds[cnt] = NULL;
+	req->current_outstanding_cmd = 1;
+
 	req->ring_ptr = req->ring;
 	req->ring_index = 0;
 	req->cnt = req->length;
 	req->id = que_id;
 	reg = ISP_QUE_REG(ha, que_id);
-	req->req_q_in = &reg->isp25mq.req_q_in;
-	req->req_q_out = &reg->isp25mq.req_q_out;
 	req->max_q_depth = ha->req_q_map[0]->max_q_depth;
 	mutex_unlock(&ha->vport_lock);
 
@@ -657,7 +636,7 @@ que_failed:
 /* create response queue */
 int
 qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
-	uint8_t vp_idx, uint16_t rid)
+	uint8_t vp_idx, uint16_t rid, int req)
 {
 	int ret = 0;
 	struct rsp_que *rsp = NULL;
@@ -672,7 +651,7 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
 		goto que_failed;
 	}
 
-	rsp->length = RESPONSE_ENTRY_CNT_2300;
+	rsp->length = RESPONSE_ENTRY_CNT_MQ;
 	rsp->ring = dma_alloc_coherent(&ha->pdev->dev,
 			(rsp->length + 1) * sizeof(response_t),
 			&rsp->dma, GFP_KERNEL);
@@ -683,8 +662,8 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
 	}
 
 	mutex_lock(&ha->vport_lock);
-	que_id = find_first_zero_bit(ha->rsp_qid_map, ha->max_queues);
-	if (que_id >= ha->max_queues) {
+	que_id = find_first_zero_bit(ha->rsp_qid_map, ha->max_rsp_queues);
+	if (que_id >= ha->max_rsp_queues) {
 		mutex_unlock(&ha->vport_lock);
 		qla_printk(KERN_INFO, ha, "No resources to create "
 			 "additional response queue\n");
@@ -708,8 +687,6 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
 	if (LSB(rsp->rid))
 		options |= BIT_5;
 	rsp->options = options;
-	rsp->ring_ptr = rsp->ring;
-	rsp->ring_index = 0;
 	rsp->id = que_id;
 	reg = ISP_QUE_REG(ha, que_id);
 	rsp->rsp_q_in = &reg->isp25mq.rsp_q_in;
@@ -728,9 +705,12 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
 		mutex_unlock(&ha->vport_lock);
 		goto que_failed;
 	}
+	if (req >= 0)
+		rsp->req = ha->req_q_map[req];
+	else
+		rsp->req = NULL;
 
 	qla2x00_init_response_q_entries(rsp);
-
 	return rsp->id;
 
 que_failed:
@@ -744,14 +724,16 @@ qla25xx_create_queues(struct scsi_qla_host *vha, uint8_t qos)
 	uint16_t options = 0;
 	uint8_t ret = 0;
 	struct qla_hw_data *ha = vha->hw;
+	struct rsp_que *rsp;
 
 	options |= BIT_1;
-	ret = qla25xx_create_rsp_que(ha, options, vha->vp_idx, 0);
+	ret = qla25xx_create_rsp_que(ha, options, vha->vp_idx, 0, -1);
 	if (!ret) {
 		qla_printk(KERN_WARNING, ha, "Response Que create failed\n");
 		return ret;
 	} else
 		qla_printk(KERN_INFO, ha, "Response Que:%d created.\n", ret);
+	rsp = ha->rsp_q_map[ret];
 
 	options = 0;
 	if (qos & BIT_7)
@@ -759,10 +741,11 @@ qla25xx_create_queues(struct scsi_qla_host *vha, uint8_t qos)
 	ret = qla25xx_create_req_que(ha, options, vha->vp_idx, 0, ret,
 					qos & ~BIT_7);
 	if (ret) {
-		vha->req_ques[0] = ret;
+		vha->req = ha->req_q_map[ret];
 		qla_printk(KERN_INFO, ha, "Request Que:%d created.\n", ret);
 	} else
 		qla_printk(KERN_WARNING, ha, "Request Que create failed\n");
+	rsp->req = ha->req_q_map[ret];
 
 	return ret;
 }
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 29234ba..e2647e0 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -187,7 +187,7 @@ static void qla2x00_sp_free_dma(srb_t *);
 /* -------------------------------------------------------------------------- */
 static int qla2x00_alloc_queues(struct qla_hw_data *ha)
 {
-	ha->req_q_map = kzalloc(sizeof(struct req_que *) * ha->max_queues,
+	ha->req_q_map = kzalloc(sizeof(struct req_que *) * ha->max_req_queues,
 				GFP_KERNEL);
 	if (!ha->req_q_map) {
 		qla_printk(KERN_WARNING, ha,
@@ -195,7 +195,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha)
 		goto fail_req_map;
 	}
 
-	ha->rsp_q_map = kzalloc(sizeof(struct rsp_que *) * ha->max_queues,
+	ha->rsp_q_map = kzalloc(sizeof(struct rsp_que *) * ha->max_rsp_queues,
 				GFP_KERNEL);
 	if (!ha->rsp_q_map) {
 		qla_printk(KERN_WARNING, ha,
@@ -213,16 +213,8 @@ fail_req_map:
 	return -ENOMEM;
 }
 
-static void qla2x00_free_que(struct qla_hw_data *ha, struct req_que *req,
-	struct rsp_que *rsp)
+static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 {
-	if (rsp && rsp->ring)
-		dma_free_coherent(&ha->pdev->dev,
-		(rsp->length + 1) * sizeof(response_t),
-		rsp->ring, rsp->dma);
-
-	kfree(rsp);
-	rsp = NULL;
 	if (req && req->ring)
 		dma_free_coherent(&ha->pdev->dev,
 		(req->length + 1) * sizeof(request_t),
@@ -232,22 +224,36 @@ static void qla2x00_free_que(struct qla_hw_data *ha, struct req_que *req,
 	req = NULL;
 }
 
+static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
+{
+	if (rsp && rsp->ring)
+		dma_free_coherent(&ha->pdev->dev,
+		(rsp->length + 1) * sizeof(response_t),
+		rsp->ring, rsp->dma);
+
+	kfree(rsp);
+	rsp = NULL;
+}
+
 static void qla2x00_free_queues(struct qla_hw_data *ha)
 {
 	struct req_que *req;
 	struct rsp_que *rsp;
 	int cnt;
 
-	for (cnt = 0; cnt < ha->max_queues; cnt++) {
-		rsp = ha->rsp_q_map[cnt];
+	for (cnt = 0; cnt < ha->max_req_queues; cnt++) {
 		req = ha->req_q_map[cnt];
-		qla2x00_free_que(ha, req, rsp);
+		qla2x00_free_req_que(ha, req);
 	}
-	kfree(ha->rsp_q_map);
-	ha->rsp_q_map = NULL;
-
 	kfree(ha->req_q_map);
 	ha->req_q_map = NULL;
+
+	for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) {
+		rsp = ha->rsp_q_map[cnt];
+		qla2x00_free_rsp_que(ha, rsp);
+	}
+	kfree(ha->rsp_q_map);
+	ha->rsp_q_map = NULL;
 }
 
 static char *
@@ -612,7 +618,7 @@ qla2x00_wait_for_loop_ready(scsi_qla_host_t *vha)
 void
 qla2x00_abort_fcport_cmds(fc_port_t *fcport)
 {
-	int cnt, que, id;
+	int cnt;
 	unsigned long flags;
 	srb_t *sp;
 	scsi_qla_host_t *vha = fcport->vha;
@@ -620,32 +626,27 @@ qla2x00_abort_fcport_cmds(fc_port_t *fcport)
 	struct req_que *req;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	for (que = 0; que < QLA_MAX_HOST_QUES; que++) {
-		id = vha->req_ques[que];
-		req = ha->req_q_map[id];
-		if (!req)
+	req = vha->req;
+	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		sp = req->outstanding_cmds[cnt];
+		if (!sp)
+			continue;
+		if (sp->fcport != fcport)
 			continue;
-		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
-			sp = req->outstanding_cmds[cnt];
-			if (!sp)
-				continue;
-			if (sp->fcport != fcport)
-				continue;
 
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-			if (ha->isp_ops->abort_command(vha, sp, req)) {
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+		if (ha->isp_ops->abort_command(sp)) {
+			DEBUG2(qla_printk(KERN_WARNING, ha,
+			"Abort failed --  %lx\n",
+			sp->cmd->serial_number));
+		} else {
+			if (qla2x00_eh_wait_on_command(sp->cmd) !=
+				QLA_SUCCESS)
 				DEBUG2(qla_printk(KERN_WARNING, ha,
-				"Abort failed --  %lx\n",
+				"Abort failed while waiting --  %lx\n",
 				sp->cmd->serial_number));
-			} else {
-				if (qla2x00_eh_wait_on_command(sp->cmd) !=
-					QLA_SUCCESS)
-					DEBUG2(qla_printk(KERN_WARNING, ha,
-					"Abort failed while waiting --  %lx\n",
-					sp->cmd->serial_number));
-			}
-			spin_lock_irqsave(&ha->hardware_lock, flags);
 		}
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
@@ -726,7 +727,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 		" pid=%ld.\n", __func__, vha->host_no, sp, serial));
 
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
-		if (ha->isp_ops->abort_command(vha, sp, req)) {
+		if (ha->isp_ops->abort_command(sp)) {
 			DEBUG2(printk("%s(%ld): abort_command "
 			"mbx failed.\n", __func__, vha->host_no));
 			ret = FAILED;
@@ -820,7 +821,7 @@ static char *reset_errors[] = {
 
 static int
 __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
-    struct scsi_cmnd *cmd, int (*do_reset)(struct fc_port *, unsigned int))
+    struct scsi_cmnd *cmd, int (*do_reset)(struct fc_port *, unsigned int, int))
 {
 	scsi_qla_host_t *vha = shost_priv(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
@@ -841,7 +842,8 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
 	if (qla2x00_wait_for_loop_ready(vha) != QLA_SUCCESS)
 		goto eh_reset_failed;
 	err = 2;
-	if (do_reset(fcport, cmd->device->lun) != QLA_SUCCESS)
+	if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1)
+		!= QLA_SUCCESS)
 		goto eh_reset_failed;
 	err = 3;
 	if (qla2x00_eh_wait_for_pending_commands(vha, cmd->device->id,
@@ -1065,7 +1067,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 			if (fcport->port_type != FCT_TARGET)
 				continue;
 
-			ret = ha->isp_ops->target_reset(fcport, 0);
+			ret = ha->isp_ops->target_reset(fcport, 0, 0);
 			if (ret != QLA_SUCCESS) {
 				DEBUG2_3(printk("%s(%ld): bus_reset failed: "
 				    "target_reset=%d d_id=%x.\n", __func__,
@@ -1089,7 +1091,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 	struct req_que *req;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	for (que = 0; que < ha->max_queues; que++) {
+	for (que = 0; que < ha->max_req_queues; que++) {
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
@@ -1124,7 +1126,7 @@ qla2xxx_slave_configure(struct scsi_device *sdev)
 	scsi_qla_host_t *vha = shost_priv(sdev->host);
 	struct qla_hw_data *ha = vha->hw;
 	struct fc_rport *rport = starget_to_rport(sdev->sdev_target);
-	struct req_que *req = ha->req_q_map[vha->req_ques[0]];
+	struct req_que *req = vha->req;
 
 	if (sdev->tagged_supported)
 		scsi_activate_tcq(sdev, req->max_q_depth);
@@ -1572,8 +1574,9 @@ skip_pio:
 	}
 
 	/* Determine queue resources */
-	ha->max_queues = 1;
-	if (ql2xmaxqueues <= 1 || (!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
+	ha->max_req_queues = ha->max_rsp_queues = 1;
+	if (ql2xmaxqueues <= 1  &&
+		(!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
 		goto mqiobase_exit;
 	ha->mqiobase = ioremap(pci_resource_start(ha->pdev, 3),
 			pci_resource_len(ha->pdev, 3));
@@ -1581,20 +1584,17 @@ skip_pio:
 		/* Read MSIX vector size of the board */
 		pci_read_config_word(ha->pdev, QLA_PCI_MSIX_CONTROL, &msix);
 		ha->msix_count = msix;
-		/* Max queues are bounded by available msix vectors */
-		/* queue 0 uses two msix vectors */
-		if (ha->msix_count - 1 < ql2xmaxqueues)
-			ha->max_queues = ha->msix_count - 1;
-		else if (ql2xmaxqueues > QLA_MQ_SIZE)
-			ha->max_queues = QLA_MQ_SIZE;
-		else
-			ha->max_queues = ql2xmaxqueues;
-		qla_printk(KERN_INFO, ha,
-			"MSI-X vector count: %d\n", msix);
-	}
+		if (ql2xmaxqueues > 1) {
+			ha->max_req_queues = ql2xmaxqueues > QLA_MQ_SIZE ?
+						QLA_MQ_SIZE : ql2xmaxqueues;
+			DEBUG2(qla_printk(KERN_INFO, ha, "QoS mode set, max no"
+			" of request queues:%d\n", ha->max_req_queues));
+		}
+	} else
+		qla_printk(KERN_INFO, ha, "BAR 3 not enabled\n");
 
 mqiobase_exit:
-	ha->msix_count = ha->max_queues + 1;
+	ha->msix_count = ha->max_rsp_queues + 1;
 	return (0);
 
 iospace_error_exit:
@@ -1804,14 +1804,15 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 		ret = -ENOMEM;
 		qla2x00_mem_free(ha);
-		qla2x00_free_que(ha, req, rsp);
+		qla2x00_free_req_que(ha, req);
+		qla2x00_free_rsp_que(ha, rsp);
 		goto probe_hw_failed;
 	}
 
 	pci_set_drvdata(pdev, base_vha);
 
 	host = base_vha->host;
-	base_vha->req_ques[0] = req->id;
+	base_vha->req = req;
 	host->can_queue = req->length + 128;
 	if (IS_QLA2XXX_MIDTYPE(ha))
 		base_vha->mgmt_svr_loop_id = 10 + base_vha->vp_idx;
@@ -1842,7 +1843,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 	ha->rsp_q_map[0] = rsp;
 	ha->req_q_map[0] = req;
-
+	rsp->req = req;
+	req->rsp = rsp;
+	set_bit(0, ha->req_qid_map);
+	set_bit(0, ha->rsp_qid_map);
 	/* FWI2-capable only. */
 	req->req_q_in = &ha->iobase->isp24.req_q_in;
 	req->req_q_out = &ha->iobase->isp24.req_q_out;
@@ -1918,8 +1922,9 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 probe_init_failed:
-	qla2x00_free_que(ha, req, rsp);
-	ha->max_queues = 0;
+	qla2x00_free_req_que(ha, req);
+	qla2x00_free_rsp_que(ha, rsp);
+	ha->max_req_queues = ha->max_rsp_queues = 0;
 
 probe_failed:
 	if (base_vha->timer_active)
@@ -2018,6 +2023,8 @@ qla2x00_free_device(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
 
+	qla25xx_delete_queues(vha);
+
 	if (ha->flags.fce_enabled)
 		qla2x00_disable_fce_trace(vha, NULL, NULL);
 
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 81187a0..2a9b3f8 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -920,6 +920,7 @@ qla2xxx_flash_npiv_conf(scsi_qla_host_t *vha)
 
 	entry = data + sizeof(struct qla_npiv_header);
 	cnt = le16_to_cpu(hdr.entries);
+	ha->flex_port_count = cnt;
 	for (i = 0; cnt; cnt--, entry++, i++) {
 		uint16_t flags;
 		struct fc_vport_identifiers vid;
-- 
cgit v0.10.2


From 68ca949cdb04b4dc71451a999148fbc5f187a220 Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:41 -0700
Subject: [SCSI] qla2xxx: Add CPU affinity support.

Set the module parameter ql2xmultique_tag to 1 to enable this
feature. In this mode, the total number of response queues
created is equal to the number of online cpus. Turning the block
layer's rq_affinity mode on enables requests to be routed to the
proper cpu and at the same time it enables completion of the IO
in a response queue that is affined to the cpu in the request
path.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index bda6658..f3536e5 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1531,7 +1531,7 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 	qla24xx_vport_disable(fc_vport, disable);
 
 	ret = 0;
-	if (ha->cur_vport_count <= ha->flex_port_count
+	if (ha->cur_vport_count <= ha->flex_port_count || ql2xmultique_tag
 		|| ha->max_req_queues == 1 || !ha->npiv_info)
 		goto vport_queue;
 	/* Create a request queue in QoS mode for the vport */
@@ -1599,7 +1599,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 		    vha->host_no, vha->vp_idx, vha));
         }
 
-	if (vha->req->id) {
+	if (vha->req->id && !ql2xmultique_tag) {
 		if (qla25xx_delete_req_que(vha, vha->req) != QLA_SUCCESS)
 			qla_printk(KERN_WARNING, ha,
 				"Queue delete failed.\n");
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 57d659c..09190ba 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2171,6 +2171,7 @@ struct rsp_que {
 	struct qla_msix_entry *msix;
 	struct req_que *req;
 	srb_t *status_srb; /* status continuation entry */
+	struct work_struct q_work;
 };
 
 /* Request queue data structure */
@@ -2539,6 +2540,7 @@ struct qla_hw_data {
 	struct qla_chip_state_84xx *cs84xx;
 	struct qla_statistics qla_stats;
 	struct isp_operations *isp_ops;
+	struct workqueue_struct *wq;
 };
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 94b69d8..7b15ded 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -15,6 +15,7 @@ static request_t *qla2x00_req_pkt(struct scsi_qla_host *, struct req_que *,
 							struct rsp_que *rsp);
 static void qla2x00_isp_cmd(struct scsi_qla_host *, struct req_que *);
 
+static void qla25xx_set_que(srb_t *, struct req_que **, struct rsp_que **);
 /**
  * qla2x00_get_cmd_direction() - Determine control_flag data direction.
  * @cmd: SCSI command
@@ -726,8 +727,7 @@ qla24xx_start_scsi(srb_t *sp)
 	/* Setup device pointers. */
 	ret = 0;
 
-	req = vha->req;
-	rsp = ha->rsp_q_map[0];
+	qla25xx_set_que(sp, &req, &rsp);
 	sp->que = req;
 
 	/* So we know we haven't pci_map'ed anything yet */
@@ -850,3 +850,21 @@ queuing_error:
 
 	return QLA_FUNCTION_FAILED;
 }
+
+static void qla25xx_set_que(srb_t *sp, struct req_que **req,
+	struct rsp_que **rsp)
+{
+	struct scsi_cmnd *cmd = sp->cmd;
+	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	int affinity = cmd->request->cpu;
+
+	if (ql2xmultique_tag && affinity >= 0 &&
+		affinity < ha->max_rsp_queues - 1) {
+		*rsp = ha->rsp_q_map[affinity + 1];
+		*req = ha->req_q_map[1];
+	} else {
+		*req = vha->req;
+		*rsp = ha->rsp_q_map[0];
+	}
+}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index c8e906c..41e50c2 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1718,6 +1718,25 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 }
 
 static irqreturn_t
+qla25xx_msix_rsp_q(int irq, void *dev_id)
+{
+	struct qla_hw_data *ha;
+	struct rsp_que *rsp;
+
+	rsp = (struct rsp_que *) dev_id;
+	if (!rsp) {
+		printk(KERN_INFO
+			"%s(): NULL response queue pointer\n", __func__);
+		return IRQ_NONE;
+	}
+	ha = rsp->hw;
+
+	queue_work_on((int) (rsp->id - 1), ha->wq, &rsp->q_work);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
 qla24xx_msix_default(int irq, void *dev_id)
 {
 	scsi_qla_host_t	*vha;
@@ -1806,9 +1825,10 @@ struct qla_init_msix_entry {
 	irq_handler_t handler;
 };
 
-static struct qla_init_msix_entry msix_entries[2] = {
+static struct qla_init_msix_entry msix_entries[3] = {
 	{ "qla2xxx (default)", qla24xx_msix_default },
 	{ "qla2xxx (rsp_q)", qla24xx_msix_rsp_q },
+	{ "qla2xxx (multiq)", qla25xx_msix_rsp_q },
 };
 
 static void
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index bfdc89f..366522e 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1497,7 +1497,10 @@ qla24xx_login_fabric(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t domain,
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
-	req = vha->req;
+	if (ql2xmultique_tag)
+		req = ha->req_q_map[0];
+	else
+		req = vha->req;
 	rsp = req->rsp;
 
 	lg = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &lg_dma);
@@ -2311,7 +2314,10 @@ __qla24xx_issue_tmf(char *name, uint32_t type, struct fc_port *fcport,
 	vha = fcport->vha;
 	ha = vha->hw;
 	req = vha->req;
-	rsp = req->rsp;
+	if (ql2xmultique_tag)
+		rsp = ha->rsp_q_map[tag + 1];
+	else
+		rsp = req->rsp;
 	tsk = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &tsk_dma);
 	if (tsk == NULL) {
 		DEBUG2_3(printk("%s(%ld): failed to allocate Task Management "
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 9c08479..650bcef 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -633,6 +633,15 @@ que_failed:
 	return 0;
 }
 
+static void qla_do_work(struct work_struct *work)
+{
+	struct rsp_que *rsp = container_of(work, struct rsp_que, q_work);
+	struct scsi_qla_host *vha;
+
+	vha = qla25xx_get_host(rsp);
+	qla24xx_process_response_queue(vha, rsp);
+}
+
 /* create response queue */
 int
 qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
@@ -711,6 +720,8 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options,
 		rsp->req = NULL;
 
 	qla2x00_init_response_q_entries(rsp);
+	if (rsp->hw->wq)
+		INIT_WORK(&rsp->q_work, qla_do_work);
 	return rsp->id;
 
 que_failed:
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index e2647e0..d6817df 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -96,6 +96,13 @@ MODULE_PARM_DESC(ql2xmaxqueues,
 		"Enables MQ settings "
 		"Default is 1 for single queue. Set it to number \
 			of queues in MQ mode.");
+
+int ql2xmultique_tag;
+module_param(ql2xmultique_tag, int, S_IRUGO|S_IRUSR);
+MODULE_PARM_DESC(ql2xmultique_tag,
+		"Enables CPU affinity settings for the driver "
+		"Default is 0 for no affinity of request and response IO. "
+		"Set it to 1 to turn on the cpu affinity.");
 /*
  * SCSI host template entry points
  */
@@ -256,6 +263,47 @@ static void qla2x00_free_queues(struct qla_hw_data *ha)
 	ha->rsp_q_map = NULL;
 }
 
+static int qla25xx_setup_mode(struct scsi_qla_host *vha)
+{
+	uint16_t options = 0;
+	int ques, req, ret;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (ql2xmultique_tag) {
+		/* CPU affinity mode */
+		ha->wq = create_workqueue("qla2xxx_wq");
+		/* create a request queue for IO */
+		options |= BIT_7;
+		req = qla25xx_create_req_que(ha, options, 0, 0, -1,
+			QLA_DEFAULT_QUE_QOS);
+		if (!req) {
+			qla_printk(KERN_WARNING, ha,
+				"Can't create request queue\n");
+			goto fail;
+		}
+		vha->req = ha->req_q_map[req];
+		options |= BIT_1;
+		for (ques = 1; ques < ha->max_rsp_queues; ques++) {
+			ret = qla25xx_create_rsp_que(ha, options, 0, 0, req);
+			if (!ret) {
+				qla_printk(KERN_WARNING, ha,
+					"Response Queue create failed\n");
+				goto fail2;
+			}
+		}
+		DEBUG2(qla_printk(KERN_INFO, ha,
+			"CPU affinity mode enabled, no. of response"
+			" queues:%d, no. of request queues:%d\n",
+			ha->max_rsp_queues, ha->max_req_queues));
+	}
+	return 0;
+fail2:
+	qla25xx_delete_queues(vha);
+fail:
+	ha->mqenable = 0;
+	return 1;
+}
+
 static char *
 qla2x00_pci_info_str(struct scsi_qla_host *vha, char *str)
 {
@@ -998,6 +1046,9 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd)
 		if (qla2x00_vp_abort_isp(vha))
 			goto eh_host_reset_lock;
 	} else {
+		if (ha->wq)
+			flush_workqueue(ha->wq);
+
 		set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
 		if (qla2x00_abort_isp(base_vha)) {
 			clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
@@ -1521,6 +1572,7 @@ qla2x00_iospace_config(struct qla_hw_data *ha)
 {
 	resource_size_t pio;
 	uint16_t msix;
+	int cpus;
 
 	if (pci_request_selected_regions(ha->pdev, ha->bars,
 	    QLA2XXX_DRIVER_NAME)) {
@@ -1575,7 +1627,7 @@ skip_pio:
 
 	/* Determine queue resources */
 	ha->max_req_queues = ha->max_rsp_queues = 1;
-	if (ql2xmaxqueues <= 1  &&
+	if ((ql2xmaxqueues <= 1 || ql2xmultique_tag < 1) &&
 		(!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
 		goto mqiobase_exit;
 	ha->mqiobase = ioremap(pci_resource_start(ha->pdev, 3),
@@ -1584,12 +1636,21 @@ skip_pio:
 		/* Read MSIX vector size of the board */
 		pci_read_config_word(ha->pdev, QLA_PCI_MSIX_CONTROL, &msix);
 		ha->msix_count = msix;
-		if (ql2xmaxqueues > 1) {
+		/* Max queues are bounded by available msix vectors */
+		/* queue 0 uses two msix vectors */
+		if (ql2xmultique_tag) {
+			cpus = num_online_cpus();
+			ha->max_rsp_queues = (ha->msix_count - 1 - cpus) ?
+				(cpus + 1) : (ha->msix_count - 1);
+			ha->max_req_queues = 2;
+		} else if (ql2xmaxqueues > 1) {
 			ha->max_req_queues = ql2xmaxqueues > QLA_MQ_SIZE ?
 						QLA_MQ_SIZE : ql2xmaxqueues;
 			DEBUG2(qla_printk(KERN_INFO, ha, "QoS mode set, max no"
 			" of request queues:%d\n", ha->max_req_queues));
 		}
+		qla_printk(KERN_INFO, ha,
+			"MSI-X vector count: %d\n", msix);
 	} else
 		qla_printk(KERN_INFO, ha, "BAR 3 not enabled\n");
 
@@ -1871,6 +1932,12 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto probe_failed;
 	}
 
+	if (ha->mqenable)
+		if (qla25xx_setup_mode(base_vha))
+			qla_printk(KERN_WARNING, ha,
+				"Can't create queues, falling back to single"
+				" queue mode\n");
+
 	/*
 	 * Startup the kernel thread for this host adapter
 	 */
@@ -1982,6 +2049,13 @@ qla2x00_remove_one(struct pci_dev *pdev)
 
 	base_vha->flags.online = 0;
 
+	/* Flush the work queue and remove it */
+	if (ha->wq) {
+		flush_workqueue(ha->wq);
+		destroy_workqueue(ha->wq);
+		ha->wq = NULL;
+	}
+
 	/* Kill the kernel thread for this host */
 	if (ha->dpc_thread) {
 		struct task_struct *t = ha->dpc_thread;
-- 
cgit v0.10.2


From 67c2e93ae7465a3e279503ceddd7bd153d74bcf8 Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:42 -0700
Subject: [SCSI] qla2xxx: Remove reference to request queue from scsi request
 block.

srbs used to maintain a reference to the request queue on which
it was enqueued. This is no longer required as the request queue
pointer is now maintained in the scsi host that issues the srb.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 09190ba..6911b9b 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -188,7 +188,6 @@ struct req_que;
  * SCSI Request Block
  */
 typedef struct srb {
-	struct req_que *que;
 	struct fc_port *fcport;
 
 	struct scsi_cmnd *cmd;		/* Linux SCSI command pkt */
@@ -2010,7 +2009,6 @@ typedef struct vport_params {
 #define VP_RET_CODE_NOT_FOUND		6
 
 struct qla_hw_data;
-struct req_que;
 struct rsp_que;
 /*
  * ISP operations
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 059909c..4c14cde 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3177,8 +3177,14 @@ qla2x00_loop_resync(scsi_qla_host_t *vha)
 {
 	int rval = QLA_SUCCESS;
 	uint32_t wait_time;
-	struct req_que *req = vha->req;
-	struct rsp_que *rsp = req->rsp;
+	struct req_que *req;
+	struct rsp_que *rsp;
+
+	if (ql2xmultique_tag)
+		req = vha->hw->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
 
 	atomic_set(&vha->loop_state, LOOP_UPDATE);
 	clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
@@ -4163,13 +4169,19 @@ qla24xx_configure_vhba(scsi_qla_host_t *vha)
 	uint16_t mb[MAILBOX_REGISTER_COUNT];
 	struct qla_hw_data *ha = vha->hw;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
-	struct req_que *req = vha->req;
-	struct rsp_que *rsp = req->rsp;
+	struct req_que *req;
+	struct rsp_que *rsp;
 
 	if (!vha->vp_idx)
 		return -EINVAL;
 
 	rval = qla2x00_fw_ready(base_vha);
+	if (ql2xmultique_tag)
+		req = ha->req_q_map[0];
+	else
+		req = vha->req;
+	rsp = req->rsp;
+
 	if (rval == QLA_SUCCESS) {
 		clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
 		qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 7b15ded..b4c6010 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -93,9 +93,10 @@ qla2x00_calc_iocbs_64(uint16_t dsds)
  * Returns a pointer to the Continuation Type 0 IOCB packet.
  */
 static inline cont_entry_t *
-qla2x00_prep_cont_type0_iocb(struct req_que *req, struct scsi_qla_host *vha)
+qla2x00_prep_cont_type0_iocb(struct scsi_qla_host *vha)
 {
 	cont_entry_t *cont_pkt;
+	struct req_que *req = vha->req;
 	/* Adjust ring index. */
 	req->ring_index++;
 	if (req->ring_index == req->length) {
@@ -121,10 +122,11 @@ qla2x00_prep_cont_type0_iocb(struct req_que *req, struct scsi_qla_host *vha)
  * Returns a pointer to the continuation type 1 IOCB packet.
  */
 static inline cont_a64_entry_t *
-qla2x00_prep_cont_type1_iocb(struct req_que *req, scsi_qla_host_t *vha)
+qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha)
 {
 	cont_a64_entry_t *cont_pkt;
 
+	struct req_que *req = vha->req;
 	/* Adjust ring index. */
 	req->ring_index++;
 	if (req->ring_index == req->length) {
@@ -160,7 +162,6 @@ void qla2x00_build_scsi_iocbs_32(srb_t *sp, cmd_entry_t *cmd_pkt,
 	struct scsi_cmnd *cmd;
 	struct scatterlist *sg;
 	int i;
-	struct req_que *req;
 
 	cmd = sp->cmd;
 
@@ -175,8 +176,6 @@ void qla2x00_build_scsi_iocbs_32(srb_t *sp, cmd_entry_t *cmd_pkt,
 	}
 
 	vha = sp->fcport->vha;
-	req = sp->que;
-
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Three DSDs are available in the Command Type 2 IOCB */
@@ -193,7 +192,7 @@ void qla2x00_build_scsi_iocbs_32(srb_t *sp, cmd_entry_t *cmd_pkt,
 			 * Seven DSDs are available in the Continuation
 			 * Type 0 IOCB.
 			 */
-			cont_pkt = qla2x00_prep_cont_type0_iocb(req, vha);
+			cont_pkt = qla2x00_prep_cont_type0_iocb(vha);
 			cur_dsd = (uint32_t *)&cont_pkt->dseg_0_address;
 			avail_dsds = 7;
 		}
@@ -221,7 +220,6 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
 	struct scsi_cmnd *cmd;
 	struct scatterlist *sg;
 	int i;
-	struct req_que *req;
 
 	cmd = sp->cmd;
 
@@ -236,8 +234,6 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
 	}
 
 	vha = sp->fcport->vha;
-	req = sp->que;
-
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Two DSDs are available in the Command Type 3 IOCB */
@@ -255,7 +251,7 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
 			 * Five DSDs are available in the Continuation
 			 * Type 1 IOCB.
 			 */
-			cont_pkt = qla2x00_prep_cont_type1_iocb(req, vha);
+			cont_pkt = qla2x00_prep_cont_type1_iocb(vha);
 			cur_dsd = (uint32_t *)cont_pkt->dseg_0_address;
 			avail_dsds = 5;
 		}
@@ -354,7 +350,6 @@ qla2x00_start_scsi(srb_t *sp)
 	/* Build command packet */
 	req->current_outstanding_cmd = handle;
 	req->outstanding_cmds[handle] = sp;
-	sp->que = req;
 	sp->cmd->host_scribble = (unsigned char *)(unsigned long)handle;
 	req->cnt -= req_cnt;
 
@@ -655,7 +650,7 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
 	}
 
 	vha = sp->fcport->vha;
-	req = sp->que;
+	req = vha->req;
 
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
@@ -686,7 +681,7 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
 			 * Five DSDs are available in the Continuation
 			 * Type 1 IOCB.
 			 */
-			cont_pkt = qla2x00_prep_cont_type1_iocb(req, vha);
+			cont_pkt = qla2x00_prep_cont_type1_iocb(vha);
 			cur_dsd = (uint32_t *)cont_pkt->dseg_0_address;
 			avail_dsds = 5;
 		}
@@ -728,7 +723,6 @@ qla24xx_start_scsi(srb_t *sp)
 	ret = 0;
 
 	qla25xx_set_que(sp, &req, &rsp);
-	sp->que = req;
 
 	/* So we know we haven't pci_map'ed anything yet */
 	tot_dsds = 0;
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 366522e..258c39d 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2232,7 +2232,7 @@ qla24xx_abort_command(srb_t *sp)
 	fc_port_t	*fcport = sp->fcport;
 	struct scsi_qla_host *vha = fcport->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req = sp->que;
+	struct req_que *req = vha->req;
 
 	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d6817df..94e53a5 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -441,7 +441,6 @@ qla2x00_get_new_sp(scsi_qla_host_t *vha, fc_port_t *fcport,
 
 	sp->fcport = fcport;
 	sp->cmd = cmd;
-	sp->que = ha->req_q_map[0];
 	sp->flags = 0;
 	CMD_SP(cmd) = (void *)sp;
 	cmd->scsi_done = done;
@@ -742,7 +741,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	unsigned long flags;
 	int wait = 0;
 	struct qla_hw_data *ha = vha->hw;
-	struct req_que *req;
+	struct req_que *req = vha->req;
 	srb_t *spt;
 
 	qla2x00_block_error_handler(cmd);
@@ -758,7 +757,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	spt = (srb_t *) CMD_SP(cmd);
 	if (!spt)
 		return SUCCESS;
-	req = spt->que;
 
 	/* Check active list for command command. */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -826,7 +824,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
 		return status;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	req = sp->que;
+	req = vha->req;
 	for (cnt = 1; status == QLA_SUCCESS &&
 		cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
 		sp = req->outstanding_cmds[cnt];
-- 
cgit v0.10.2


From 8f979751367b9975fe606bce6a64b9d871dcfcfa Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:43 -0700
Subject: [SCSI] qla2xxx: Correct typo in read_nvram() callback.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index f3536e5..fc30f8e 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -97,7 +97,7 @@ qla2x00_sysfs_read_nvram(struct kobject *kobj,
 		return 0;
 
 	if (IS_NOCACHE_VPD_TYPE(ha))
-		ha->isp_ops->read_optrom(vha, ha->vpd, ha->flt_region_nvram << 2,
+		ha->isp_ops->read_optrom(vha, ha->nvram, ha->flt_region_nvram << 2,
 		    ha->nvram_size);
 	return memory_read_from_buffer(buf, count, &off, ha->nvram,
 					ha->nvram_size);
-- 
cgit v0.10.2


From fc3ea9bcb86a1c5126807f747291563e08405944 Mon Sep 17 00:00:00 2001
From: Harish Zunjarrao <harish.zunjarrao@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:44 -0700
Subject: [SCSI] qla2xxx: Correct hard-coded address of a second-port's NVRAM.

Signed-off-by: Harish Zunjarrao <harish.zunjarrao@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 96ccb96..f389f3d 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1711,7 +1711,7 @@ struct ex_init_cb_81xx {
 #define FA_VPD0_ADDR_81		0xD0000
 #define FA_VPD1_ADDR_81		0xD0400
 #define FA_NVRAM0_ADDR_81	0xD0080
-#define FA_NVRAM1_ADDR_81	0xD0480
+#define FA_NVRAM1_ADDR_81	0xD0180
 #define FA_FEATURE_ADDR_81	0xD4000
 #define FA_FLASH_DESCR_ADDR_81	0xD8000
 #define FA_FLASH_LAYOUT_ADDR_81	0xD8400
-- 
cgit v0.10.2


From 7d0dba174af217c73931532adf6bffb91d16c40f Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:45 -0700
Subject: [SCSI] qla2xxx: Restrict model-name/description device-table usage.

Information present in static table is only valid for pre-ISP25xx
adapters.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 4c14cde..a4a6a14 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1538,6 +1538,7 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len,
 	char *st, *en;
 	uint16_t index;
 	struct qla_hw_data *ha = vha->hw;
+	int use_tbl = !IS_QLA25XX(ha) && IS_QLA81XX(ha);
 
 	if (memcmp(model, BINZERO, len) != 0) {
 		strncpy(ha->model_number, model, len);
@@ -1550,14 +1551,16 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len,
 		}
 
 		index = (ha->pdev->subsystem_device & 0xff);
-		if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
+		if (use_tbl &&
+		    ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
 		    index < QLA_MODEL_NAMES)
 			strncpy(ha->model_desc,
 			    qla2x00_model_name[index * 2 + 1],
 			    sizeof(ha->model_desc) - 1);
 	} else {
 		index = (ha->pdev->subsystem_device & 0xff);
-		if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
+		if (use_tbl &&
+		    ha->pdev->subsystem_vendor == PCI_VENDOR_ID_QLOGIC &&
 		    index < QLA_MODEL_NAMES) {
 			strcpy(ha->model_number,
 			    qla2x00_model_name[index * 2]);
-- 
cgit v0.10.2


From e1f916035f149540e5090207ceafca9ba779084e Mon Sep 17 00:00:00 2001
From: Joe Carnuccio <joe.carnuccio@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:46 -0700
Subject: [SCSI] qla2xxx: Perform an implicit login to the Management Server.

Set the conditional plogi option bit whenever logging in the
fabric management server (if it is already logged in, it does not
need an explicit login; an implicit login suffices).

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 557f58d..3dbb9e7 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -1107,7 +1107,7 @@ qla2x00_mgmt_svr_login(scsi_qla_host_t *vha)
 		return ret;
 
 	ha->isp_ops->fabric_login(vha, vha->mgmt_svr_loop_id, 0xff, 0xff, 0xfa,
-	    mb, BIT_1);
+	    mb, BIT_1|BIT_0);
 	if (mb[0] != MBS_COMMAND_COMPLETE) {
 		DEBUG2_13(printk("%s(%ld): Failed MANAGEMENT_SERVER login: "
 		    "loop_id=%x mb[0]=%x mb[1]=%x mb[2]=%x mb[6]=%x mb[7]=%x\n",
-- 
cgit v0.10.2


From a13d8ac057705c479b8bf15e5303f18f899502f9 Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Mon, 6 Apr 2009 22:33:47 -0700
Subject: [SCSI] qla2xxx: Conditionally disable automatic queue full tracking.

Changing a lun's queue depth (/sys/block/sdX/device/queue_depth)
isn't sticky when the device is connected via a QLogic fibre
channel adapter.

The QLogic qla2xxx fibre channel driver dynamically adjusts a
lun's queue depth.  If a user has a specific need to limit the
number of commands issued to a lun (say a tape drive, or a shared
raid where the total commands issued to all luns is limited at
the controller level, for example) and writes a limiting value to
/sys/block/sdXX/device/queue_depth, the qla2xxx driver will
silently and gradually increase the queue depth back to the
driver limit of ql2xmaxqdepth.  While reducing this value (module
parameter) or increasing the interval between ramp ups
(ql2xqfullrampup) offers the potential for a work around it would
be better to have the option of just disabling the dynamic
adjustment of queue depth.

This patch implements an "off switch" as a module parameter.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index b12de01..5347e35 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -65,6 +65,7 @@ extern int ql2xfdmienable;
 extern int ql2xallocfwdump;
 extern int ql2xextended_error_logging;
 extern int ql2xqfullrampup;
+extern int ql2xqfulltracking;
 extern int ql2xiidmaenable;
 extern int ql2xmaxqueues;
 extern int ql2xmultique_tag;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 41e50c2..eb35d20 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -765,6 +765,9 @@ qla2x00_adjust_sdev_qdepth_up(struct scsi_device *sdev, void *data)
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = NULL;
 
+	if (!ql2xqfulltracking)
+		return;
+
 	req = vha->req;
 	if (!req)
 		return;
@@ -807,6 +810,9 @@ qla2x00_ramp_up_queue_depth(scsi_qla_host_t *vha, struct req_que *req,
 	fc_port_t *fcport;
 	struct scsi_device *sdev;
 
+	if (!ql2xqfulltracking)
+		return;
+
 	sdev = sp->cmd->device;
 	if (sdev->queue_depth >= req->max_q_depth)
 		return;
@@ -1125,6 +1131,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 			    scsi_status));
 
 			/* Adjust queue depth for all luns on the port. */
+			if (!ql2xqfulltracking)
+				break;
 			fcport->last_queue_full = jiffies;
 			starget_for_each_device(cp->device->sdev_target,
 			    fcport, qla2x00_adjust_sdev_qdepth_down);
@@ -1183,6 +1191,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 				 * Adjust queue depth for all luns on the
 				 * port.
 				 */
+				if (!ql2xqfulltracking)
+					break;
 				fcport->last_queue_full = jiffies;
 				starget_for_each_device(
 				    cp->device->sdev_target, fcport,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 94e53a5..155a204e 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -77,6 +77,14 @@ module_param(ql2xmaxqdepth, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xmaxqdepth,
 		"Maximum queue depth to report for target devices.");
 
+int ql2xqfulltracking = 1;
+module_param(ql2xqfulltracking, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql2xqfulltracking,
+		"Controls whether the driver tracks queue full status "
+		"returns and dynamically adjusts a scsi device's queue "
+		"depth.  Default is 1, perform tracking.  Set to 0 to "
+		"disable dynamic tracking and adjustment of queue depth.");
+
 int ql2xqfullrampup = 120;
 module_param(ql2xqfullrampup, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xqfullrampup,
-- 
cgit v0.10.2


From b469a7cbe9bf68939c90f4ac6bc2bb99e47d7229 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:48 -0700
Subject: [SCSI] qla2xxx: Don't try to 'stop' firmware if already in ROM code.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a4a6a14..138ae88 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4153,7 +4153,7 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
 
 	ret = qla2x00_stop_firmware(vha);
 	for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
-	    retries ; retries--) {
+	    ret != QLA_INVALID_COMMAND && retries ; retries--) {
 		ha->isp_ops->reset_chip(vha);
 		if (ha->isp_ops->chip_diag(vha) != QLA_SUCCESS)
 			continue;
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 258c39d..7d0eeec 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2477,6 +2477,8 @@ qla2x00_stop_firmware(scsi_qla_host_t *vha)
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed=%x.\n", __func__,
 		    vha->host_no, rval));
+		if (mcp->mb[0] == MBS_INVALID_COMMAND)
+			rval = QLA_INVALID_COMMAND;
 	} else {
 		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
 	}
-- 
cgit v0.10.2


From e337d9070e5821e7c8e5973679bdd34376263bd1 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:49 -0700
Subject: [SCSI] qla2xxx: Add an override option to specify ISP firmware load
 semantics.

As it may be useful during debugging to use a specific firmware
image.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 5347e35..f17d525 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -69,6 +69,7 @@ extern int ql2xqfulltracking;
 extern int ql2xiidmaenable;
 extern int ql2xmaxqueues;
 extern int ql2xmultique_tag;
+extern int ql2xfwloadbin;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 138ae88..4952fc1 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4111,6 +4111,9 @@ qla24xx_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
 {
 	int rval;
 
+	if (ql2xfwloadbin == 1)
+		return qla81xx_load_risc(vha, srisc_addr);
+
 	/*
 	 * FW Load priority:
 	 * 1) Firmware via request-firmware interface (.bin file).
@@ -4128,6 +4131,9 @@ qla81xx_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
 {
 	int rval;
 
+	if (ql2xfwloadbin == 2)
+		return qla24xx_load_risc(vha, srisc_addr);
+
 	/*
 	 * FW Load priority:
 	 * 1) Firmware residing in flash.
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 155a204e..88a75d0 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -111,6 +111,16 @@ MODULE_PARM_DESC(ql2xmultique_tag,
 		"Enables CPU affinity settings for the driver "
 		"Default is 0 for no affinity of request and response IO. "
 		"Set it to 1 to turn on the cpu affinity.");
+
+int ql2xfwloadbin;
+module_param(ql2xfwloadbin, int, S_IRUGO|S_IRUSR);
+MODULE_PARM_DESC(ql2xfwloadbin,
+		"Option to specify location from which to load ISP firmware:\n"
+		" 2 -- load firmware via the request_firmware() (hotplug)\n"
+		"      interface.\n"
+		" 1 -- load firmware from flash.\n"
+		" 0 -- use default semantics.\n");
+
 /*
  * SCSI host template entry points
  */
-- 
cgit v0.10.2


From e5b68a61e15ca8e200c60cfd4dbe1818e6beb4e1 Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:50 -0700
Subject: [SCSI] qla2xxx: Use port number to compute nvram/vpd parameter
 offsets.

Read adapter's physical port number from interrupt pin register
and use it instead of pci function number to offset into the
nvram to obtain the port's configuration parameters.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 6911b9b..721bae9 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2222,6 +2222,7 @@ struct qla_hw_data {
 		uint32_t	fce_enabled		:1;
 		uint32_t	fac_supported		:1;
 		uint32_t	chip_reset_done		:1;
+		uint32_t	port0			:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
@@ -2256,6 +2257,9 @@ struct qla_hw_data {
 #define FLOGI_MID_SUPPORT       BIT_10
 #define FLOGI_VSAN_SUPPORT      BIT_12
 #define FLOGI_SP_SUPPORT        BIT_13
+
+	uint8_t		port_no;		/* Physical port of adapter */
+
 	/* Timeout timers. */
 	uint8_t 	loop_down_abort_time;    /* port down timer */
 	atomic_t	loop_down_timer;         /* loop down timer */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 4952fc1..33e9248 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3573,14 +3573,15 @@ qla24xx_nvram_config(scsi_qla_host_t *vha)
 	nv = ha->nvram;
 
 	/* Determine NVRAM starting address. */
-	ha->nvram_size = sizeof(struct nvram_24xx);
-	ha->nvram_base = FA_NVRAM_FUNC0_ADDR;
-	ha->vpd_size = FA_NVRAM_VPD_SIZE;
-	ha->vpd_base = FA_NVRAM_VPD0_ADDR;
-	if (PCI_FUNC(ha->pdev->devfn)) {
+	if (ha->flags.port0) {
+		ha->nvram_base = FA_NVRAM_FUNC0_ADDR;
+		ha->vpd_base = FA_NVRAM_VPD0_ADDR;
+	} else {
 		ha->nvram_base = FA_NVRAM_FUNC1_ADDR;
 		ha->vpd_base = FA_NVRAM_VPD1_ADDR;
 	}
+	ha->nvram_size = sizeof(struct nvram_24xx);
+	ha->vpd_size = FA_NVRAM_VPD_SIZE;
 
 	/* Get VPD data into cache */
 	ha->vpd = ha->nvram + VPD_OFFSET;
@@ -3619,7 +3620,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha)
 		nv->exchange_count = __constant_cpu_to_le16(0);
 		nv->hard_address = __constant_cpu_to_le16(124);
 		nv->port_name[0] = 0x21;
-		nv->port_name[1] = 0x00 + PCI_FUNC(ha->pdev->devfn);
+		nv->port_name[1] = 0x00 + ha->port_no;
 		nv->port_name[2] = 0x00;
 		nv->port_name[3] = 0xe0;
 		nv->port_name[4] = 0x8b;
@@ -4348,7 +4349,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 		nv->execution_throttle = __constant_cpu_to_le16(0xFFFF);
 		nv->exchange_count = __constant_cpu_to_le16(0);
 		nv->port_name[0] = 0x21;
-		nv->port_name[1] = 0x00 + PCI_FUNC(ha->pdev->devfn);
+		nv->port_name[1] = 0x00 + ha->port_no;
 		nv->port_name[2] = 0x00;
 		nv->port_name[3] = 0xe0;
 		nv->port_name[4] = 0x8b;
@@ -4382,7 +4383,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 		nv->enode_mac[2] = 0x03;
 		nv->enode_mac[3] = 0x04;
 		nv->enode_mac[4] = 0x05;
-		nv->enode_mac[5] = 0x06 + PCI_FUNC(ha->pdev->devfn);
+		nv->enode_mac[5] = 0x06 + ha->port_no;
 
 		rval = 1;
 	}
@@ -4415,7 +4416,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 		icb->enode_mac[2] = 0x03;
 		icb->enode_mac[3] = 0x04;
 		icb->enode_mac[4] = 0x05;
-		icb->enode_mac[5] = 0x06 + PCI_FUNC(ha->pdev->devfn);
+		icb->enode_mac[5] = 0x06 + ha->port_no;
 	}
 
 	/* Use extended-initialization control block. */
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 88a75d0..f4f5355 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1581,6 +1581,13 @@ qla2x00_set_isp_flags(struct qla_hw_data *ha)
 		ha->fw_srisc_address = RISC_START_ADDRESS_2400;
 		break;
 	}
+
+	/* Get adapter physical port no from interrupt pin register. */
+	pci_read_config_byte(ha->pdev, PCI_INTERRUPT_PIN, &ha->port_no);
+	if (ha->port_no & 1)
+		ha->flags.port0 = 1;
+	else
+		ha->flags.port0 = 0;
 }
 
 static int
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 2a9b3f8..22f97eb 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -702,30 +702,30 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
 			break;
 		case FLT_REG_VPD_0:
 			ha->flt_region_vpd_nvram = start;
-			if (!(PCI_FUNC(ha->pdev->devfn) & 1))
+			if (ha->flags.port0)
 				ha->flt_region_vpd = start;
 			break;
 		case FLT_REG_VPD_1:
-			if (PCI_FUNC(ha->pdev->devfn) & 1)
+			if (!ha->flags.port0)
 				ha->flt_region_vpd = start;
 			break;
 		case FLT_REG_NVRAM_0:
-			if (!(PCI_FUNC(ha->pdev->devfn) & 1))
+			if (ha->flags.port0)
 				ha->flt_region_nvram = start;
 			break;
 		case FLT_REG_NVRAM_1:
-			if (PCI_FUNC(ha->pdev->devfn) & 1)
+			if (!ha->flags.port0)
 				ha->flt_region_nvram = start;
 			break;
 		case FLT_REG_FDT:
 			ha->flt_region_fdt = start;
 			break;
 		case FLT_REG_NPIV_CONF_0:
-			if (!(PCI_FUNC(ha->pdev->devfn) & 1))
+			if (ha->flags.port0)
 				ha->flt_region_npiv_conf = start;
 			break;
 		case FLT_REG_NPIV_CONF_1:
-			if (PCI_FUNC(ha->pdev->devfn) & 1)
+			if (!ha->flags.port0)
 				ha->flt_region_npiv_conf = start;
 			break;
 		}
@@ -745,12 +745,12 @@ no_flash_data:
 	ha->flt_region_fw = def_fw[def];
 	ha->flt_region_boot = def_boot[def];
 	ha->flt_region_vpd_nvram = def_vpd_nvram[def];
-	ha->flt_region_vpd = !(PCI_FUNC(ha->pdev->devfn) & 1) ?
+	ha->flt_region_vpd = ha->flags.port0 ?
 	    def_vpd0[def]: def_vpd1[def];
-	ha->flt_region_nvram = !(PCI_FUNC(ha->pdev->devfn) & 1) ?
+	ha->flt_region_nvram = ha->flags.port0 ?
 	    def_nvram0[def]: def_nvram1[def];
 	ha->flt_region_fdt = def_fdt[def];
-	ha->flt_region_npiv_conf = !(PCI_FUNC(ha->pdev->devfn) & 1) ?
+	ha->flt_region_npiv_conf = ha->flags.port0 ?
 	    def_npiv_conf0[def]: def_npiv_conf1[def];
 done:
 	DEBUG2(qla_printk(KERN_DEBUG, ha, "FLT[%s]: boot=0x%x fw=0x%x "
-- 
cgit v0.10.2


From a03706017e443ced6e354d434142989c9e8653d7 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Mon, 6 Apr 2009 22:33:51 -0700
Subject: [SCSI] qla2xxx: Update version number to 8.03.01-k2.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 19d1afc..a1094e7 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.03.01-k1"
+#define QLA2XXX_VERSION      "8.03.01-k2"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	3
-- 
cgit v0.10.2


From 2ad52f473bbc1aa5b33c4a329b8a359f125e19d1 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 8 May 2009 16:35:37 -0400
Subject: [SCSI] mvsas: move into new directory drivers/scsi/mvsas/

Zero functional changes, just file movement.

This commit prepares for the upcoming integration of the
Marvell-provided driver update that splits the driver into support
for both 64xx and 94xx chip families.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index fb27407..6e8106a 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -508,6 +508,7 @@ config SCSI_AIC7XXX_OLD
 
 source "drivers/scsi/aic7xxx/Kconfig.aic79xx"
 source "drivers/scsi/aic94xx/Kconfig"
+source "drivers/scsi/mvsas/Kconfig"
 
 config SCSI_DPT_I2O
 	tristate "Adaptec I2O RAID support "
@@ -1050,16 +1051,6 @@ config SCSI_IZIP_SLOW_CTR
 
 	  Generally, saying N is fine.
 
-config SCSI_MVSAS
-	tristate "Marvell 88SE6440 SAS/SATA support"
-	depends on PCI && SCSI
-	select SCSI_SAS_LIBSAS
-	help
-	  This driver supports Marvell SAS/SATA PCI devices.
-
-	  To compiler this driver as a module, choose M here: the module
-	  will be called mvsas.
-
 config SCSI_NCR53C406A
 	tristate "NCR53c406a SCSI support"
 	depends on ISA && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index a5049cf..8795c30 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -126,7 +126,7 @@ obj-$(CONFIG_SCSI_IBMVSCSIS)	+= ibmvscsi/
 obj-$(CONFIG_SCSI_IBMVFC)	+= ibmvscsi/
 obj-$(CONFIG_SCSI_HPTIOP)	+= hptiop.o
 obj-$(CONFIG_SCSI_STEX)		+= stex.o
-obj-$(CONFIG_SCSI_MVSAS)	+= mvsas.o
+obj-$(CONFIG_SCSI_MVSAS)	+= mvsas/
 obj-$(CONFIG_PS3_ROM)		+= ps3rom.o
 obj-$(CONFIG_SCSI_CXGB3_ISCSI)	+= libiscsi.o libiscsi_tcp.o cxgb3i/
 
diff --git a/drivers/scsi/mvsas.c b/drivers/scsi/mvsas.c
deleted file mode 100644
index e4acebd..0000000
--- a/drivers/scsi/mvsas.c
+++ /dev/null
@@ -1,3222 +0,0 @@
-/*
-	mvsas.c - Marvell 88SE6440 SAS/SATA support
-
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
-	---------------------------------------------------------------
-
-	Random notes:
-	* hardware supports controlling the endian-ness of data
-	  structures.  this permits elimination of all the le32_to_cpu()
-	  and cpu_to_le32() conversions.
-
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-#include <scsi/libsas.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/sas_ata.h>
-#include <asm/io.h>
-
-#define DRV_NAME	"mvsas"
-#define DRV_VERSION	"0.5.2"
-#define _MV_DUMP	0
-#define MVS_DISABLE_NVRAM
-#define MVS_DISABLE_MSI
-
-#define mr32(reg)	readl(regs + MVS_##reg)
-#define mw32(reg,val)	writel((val), regs + MVS_##reg)
-#define mw32_f(reg,val)	do {			\
-	writel((val), regs + MVS_##reg);	\
-	readl(regs + MVS_##reg);		\
-	} while (0)
-
-#define MVS_ID_NOT_MAPPED	0x7f
-#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
-
-/* offset for D2H FIS in the Received FIS List Structure */
-#define SATA_RECEIVED_D2H_FIS(reg_set)	\
-	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x40)
-#define SATA_RECEIVED_PIO_FIS(reg_set)	\
-	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x20)
-#define UNASSOC_D2H_FIS(id)		\
-	((void *) mvi->rx_fis + 0x100 * id)
-
-#define for_each_phy(__lseq_mask, __mc, __lseq, __rest)			\
-	for ((__mc) = (__lseq_mask), (__lseq) = 0;			\
-					(__mc) != 0 && __rest;		\
-					(++__lseq), (__mc) >>= 1)
-
-/* driver compile-time configuration */
-enum driver_configuration {
-	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
-	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
-					/* software requires power-of-2
-					   ring size */
-
-	MVS_SLOTS		= 512,	/* command slots */
-	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
-	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
-	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
-	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
-
-	MVS_RX_FIS_COUNT	= 17,	/* Optional rx'd FISs (max 17) */
-
-	MVS_QUEUE_SIZE		= 30,	/* Support Queue depth */
-	MVS_CAN_QUEUE		= MVS_SLOTS - 1,	/* SCSI Queue depth */
-};
-
-/* unchangeable hardware details */
-enum hardware_details {
-	MVS_MAX_PHYS		= 8,	/* max. possible phys */
-	MVS_MAX_PORTS		= 8,	/* max. possible ports */
-	MVS_RX_FISL_SZ		= 0x400 + (MVS_RX_FIS_COUNT * 0x100),
-};
-
-/* peripheral registers (BAR2) */
-enum peripheral_registers {
-	SPI_CTL			= 0x10,	/* EEPROM control */
-	SPI_CMD			= 0x14,	/* EEPROM command */
-	SPI_DATA		= 0x18, /* EEPROM data */
-};
-
-enum peripheral_register_bits {
-	TWSI_RDY		= (1U << 7),	/* EEPROM interface ready */
-	TWSI_RD			= (1U << 4),	/* EEPROM read access */
-
-	SPI_ADDR_MASK		= 0x3ffff,	/* bits 17:0 */
-};
-
-/* enhanced mode registers (BAR4) */
-enum hw_registers {
-	MVS_GBL_CTL		= 0x04,  /* global control */
-	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
-	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
-	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
-
-	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
-	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
-	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
-	MVS_CMD_LIST_HI		= 0x10C,
-	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
-	MVS_RX_FIS_HI		= 0x114,
-
-	MVS_TX_CFG		= 0x120, /* TX configuration */
-	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
-	MVS_TX_HI		= 0x128,
-
-	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
-	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
-	MVS_RX_CFG		= 0x134, /* RX configuration */
-	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
-	MVS_RX_HI		= 0x13C,
-	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
-
-	MVS_INT_COAL		= 0x148, /* Int coalescing config */
-	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
-	MVS_INT_STAT		= 0x150, /* Central int status */
-	MVS_INT_MASK		= 0x154, /* Central int enable */
-	MVS_INT_STAT_SRS	= 0x158, /* SATA register set status */
-	MVS_INT_MASK_SRS	= 0x15C,
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
-	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
-	MVS_P4_INT_STAT		= 0x200, /* Port 4 interrupt status */
-	MVS_P4_INT_MASK		= 0x204, /* Port 4 interrupt enable mask */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
-	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
-
-	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
-	MVS_CMD_DATA		= 0x1BC, /* Command register port (data) */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
-	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
-	MVS_P4_CFG_ADDR		= 0x230, /* Port 4 config address */
-	MVS_P4_CFG_DATA		= 0x234, /* Port 4 config data */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
-	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
-	MVS_P4_VSR_ADDR		= 0x250, /* port 4 VSR addr */
-	MVS_P4_VSR_DATA		= 0x254, /* port 4 VSR data */
-};
-
-enum hw_register_bits {
-	/* MVS_GBL_CTL */
-	INT_EN			= (1U << 1),	/* Global int enable */
-	HBA_RST			= (1U << 0),	/* HBA reset */
-
-	/* MVS_GBL_INT_STAT */
-	INT_XOR			= (1U << 4),	/* XOR engine event */
-	INT_SAS_SATA		= (1U << 0),	/* SAS/SATA event */
-
-	/* MVS_GBL_PORT_TYPE */			/* shl for ports 1-3 */
-	SATA_TARGET		= (1U << 16),	/* port0 SATA target enable */
-	MODE_AUTO_DET_PORT7 = (1U << 15),	/* port0 SAS/SATA autodetect */
-	MODE_AUTO_DET_PORT6 = (1U << 14),
-	MODE_AUTO_DET_PORT5 = (1U << 13),
-	MODE_AUTO_DET_PORT4 = (1U << 12),
-	MODE_AUTO_DET_PORT3 = (1U << 11),
-	MODE_AUTO_DET_PORT2 = (1U << 10),
-	MODE_AUTO_DET_PORT1 = (1U << 9),
-	MODE_AUTO_DET_PORT0 = (1U << 8),
-	MODE_AUTO_DET_EN    =	MODE_AUTO_DET_PORT0 | MODE_AUTO_DET_PORT1 |
-				MODE_AUTO_DET_PORT2 | MODE_AUTO_DET_PORT3 |
-				MODE_AUTO_DET_PORT4 | MODE_AUTO_DET_PORT5 |
-				MODE_AUTO_DET_PORT6 | MODE_AUTO_DET_PORT7,
-	MODE_SAS_PORT7_MASK = (1U << 7),  /* port0 SAS(1), SATA(0) mode */
-	MODE_SAS_PORT6_MASK = (1U << 6),
-	MODE_SAS_PORT5_MASK = (1U << 5),
-	MODE_SAS_PORT4_MASK = (1U << 4),
-	MODE_SAS_PORT3_MASK = (1U << 3),
-	MODE_SAS_PORT2_MASK = (1U << 2),
-	MODE_SAS_PORT1_MASK = (1U << 1),
-	MODE_SAS_PORT0_MASK = (1U << 0),
-	MODE_SAS_SATA	=	MODE_SAS_PORT0_MASK | MODE_SAS_PORT1_MASK |
-				MODE_SAS_PORT2_MASK | MODE_SAS_PORT3_MASK |
-				MODE_SAS_PORT4_MASK | MODE_SAS_PORT5_MASK |
-				MODE_SAS_PORT6_MASK | MODE_SAS_PORT7_MASK,
-
-				/* SAS_MODE value may be
-				 * dictated (in hw) by values
-				 * of SATA_TARGET & AUTO_DET
-				 */
-
-	/* MVS_TX_CFG */
-	TX_EN			= (1U << 16),	/* Enable TX */
-	TX_RING_SZ_MASK		= 0xfff,	/* TX ring size, bits 11:0 */
-
-	/* MVS_RX_CFG */
-	RX_EN			= (1U << 16),	/* Enable RX */
-	RX_RING_SZ_MASK		= 0xfff,	/* RX ring size, bits 11:0 */
-
-	/* MVS_INT_COAL */
-	COAL_EN			= (1U << 16),	/* Enable int coalescing */
-
-	/* MVS_INT_STAT, MVS_INT_MASK */
-	CINT_I2C		= (1U << 31),	/* I2C event */
-	CINT_SW0		= (1U << 30),	/* software event 0 */
-	CINT_SW1		= (1U << 29),	/* software event 1 */
-	CINT_PRD_BC		= (1U << 28),	/* PRD BC err for read cmd */
-	CINT_DMA_PCIE		= (1U << 27),	/* DMA to PCIE timeout */
-	CINT_MEM		= (1U << 26),	/* int mem parity err */
-	CINT_I2C_SLAVE		= (1U << 25),	/* slave I2C event */
-	CINT_SRS		= (1U << 3),	/* SRS event */
-	CINT_CI_STOP		= (1U << 1),	/* cmd issue stopped */
-	CINT_DONE		= (1U << 0),	/* cmd completion */
-
-						/* shl for ports 1-3 */
-	CINT_PORT_STOPPED	= (1U << 16),	/* port0 stopped */
-	CINT_PORT		= (1U << 8),	/* port0 event */
-	CINT_PORT_MASK_OFFSET	= 8,
-	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
-
-	/* TX (delivery) ring bits */
-	TXQ_CMD_SHIFT		= 29,
-	TXQ_CMD_SSP		= 1,		/* SSP protocol */
-	TXQ_CMD_SMP		= 2,		/* SMP protocol */
-	TXQ_CMD_STP		= 3,		/* STP/SATA protocol */
-	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
-	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
-	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
-	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
-	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
-	TXQ_SRS_MASK		= 0x7f,
-	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
-	TXQ_PHY_MASK		= 0xff,
-	TXQ_SLOT_MASK		= 0xfff,	/* slot number */
-
-	/* RX (completion) ring bits */
-	RXQ_GOOD		= (1U << 23),	/* Response good */
-	RXQ_SLOT_RESET		= (1U << 21),	/* Slot reset complete */
-	RXQ_CMD_RX		= (1U << 20),	/* target cmd received */
-	RXQ_ATTN		= (1U << 19),	/* attention */
-	RXQ_RSP			= (1U << 18),	/* response frame xfer'd */
-	RXQ_ERR			= (1U << 17),	/* err info rec xfer'd */
-	RXQ_DONE		= (1U << 16),	/* cmd complete */
-	RXQ_SLOT_MASK		= 0xfff,	/* slot number */
-
-	/* mvs_cmd_hdr bits */
-	MCH_PRD_LEN_SHIFT	= 16,		/* 16-bit PRD table len */
-	MCH_SSP_FR_TYPE_SHIFT	= 13,		/* SSP frame type */
-
-						/* SSP initiator only */
-	MCH_SSP_FR_CMD		= 0x0,		/* COMMAND frame */
-
-						/* SSP initiator or target */
-	MCH_SSP_FR_TASK		= 0x1,		/* TASK frame */
-
-						/* SSP target only */
-	MCH_SSP_FR_XFER_RDY	= 0x4,		/* XFER_RDY frame */
-	MCH_SSP_FR_RESP		= 0x5,		/* RESPONSE frame */
-	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
-	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
-
-	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
-	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
-	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
-	MCH_RETRY		= (1U << 9),	/* tport layer retry (SSP) */
-	MCH_PROTECTION		= (1U << 8),	/* protection info rec (SSP) */
-	MCH_RESET		= (1U << 7),	/* Reset (STP/SATA) */
-	MCH_FPDMA		= (1U << 6),	/* First party DMA (STP/SATA) */
-	MCH_ATAPI		= (1U << 5),	/* ATAPI (STP/SATA) */
-	MCH_BIST		= (1U << 4),	/* BIST activate (STP/SATA) */
-	MCH_PMP_MASK		= 0xf,		/* PMP from cmd FIS (STP/SATA)*/
-
-	CCTL_RST		= (1U << 5),	/* port logic reset */
-
-						/* 0(LSB first), 1(MSB first) */
-	CCTL_ENDIAN_DATA	= (1U << 3),	/* PRD data */
-	CCTL_ENDIAN_RSP		= (1U << 2),	/* response frame */
-	CCTL_ENDIAN_OPEN	= (1U << 1),	/* open address frame */
-	CCTL_ENDIAN_CMD		= (1U << 0),	/* command table */
-
-	/* MVS_Px_SER_CTLSTAT (per-phy control) */
-	PHY_SSP_RST		= (1U << 3),	/* reset SSP link layer */
-	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
-	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
-	PHY_RST			= (1U << 0),	/* phy reset */
-	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
-	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
-			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
-	PHY_READY_MASK		= (1U << 20),
-
-	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
-	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
-	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
-	PHYEV_AN		= (1U << 18),	/* SATA async notification */
-	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
-	PHYEV_SIG_FIS		= (1U << 16),	/* signature FIS */
-	PHYEV_POOF		= (1U << 12),	/* phy ready from 1 -> 0 */
-	PHYEV_IU_BIG		= (1U << 11),	/* IU too long err */
-	PHYEV_IU_SMALL		= (1U << 10),	/* IU too short err */
-	PHYEV_UNK_TAG		= (1U << 9),	/* unknown tag */
-	PHYEV_BROAD_CH		= (1U << 8),	/* broadcast(CHANGE) */
-	PHYEV_COMWAKE		= (1U << 7),	/* COMWAKE rx'd */
-	PHYEV_PORT_SEL		= (1U << 6),	/* port selector present */
-	PHYEV_HARD_RST		= (1U << 5),	/* hard reset rx'd */
-	PHYEV_ID_TMOUT		= (1U << 4),	/* identify timeout */
-	PHYEV_ID_FAIL		= (1U << 3),	/* identify failed */
-	PHYEV_ID_DONE		= (1U << 2),	/* identify done */
-	PHYEV_HARD_RST_DONE	= (1U << 1),	/* hard reset done */
-	PHYEV_RDY_CH		= (1U << 0),	/* phy ready changed state */
-
-	/* MVS_PCS */
-	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
-	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
-	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6480 */
-	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
-	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
-	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
-	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
-	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
-	PCS_CMD_RST		= (1U << 1),	/* reset cmd issue */
-	PCS_CMD_EN		= (1U << 0),	/* enable cmd issue */
-
-	/* Port n Attached Device Info */
-	PORT_DEV_SSP_TRGT	= (1U << 19),
-	PORT_DEV_SMP_TRGT	= (1U << 18),
-	PORT_DEV_STP_TRGT	= (1U << 17),
-	PORT_DEV_SSP_INIT	= (1U << 11),
-	PORT_DEV_SMP_INIT	= (1U << 10),
-	PORT_DEV_STP_INIT	= (1U << 9),
-	PORT_PHY_ID_MASK	= (0xFFU << 24),
-	PORT_DEV_TRGT_MASK	= (0x7U << 17),
-	PORT_DEV_INIT_MASK	= (0x7U << 9),
-	PORT_DEV_TYPE_MASK	= (0x7U << 0),
-
-	/* Port n PHY Status */
-	PHY_RDY			= (1U << 2),
-	PHY_DW_SYNC		= (1U << 1),
-	PHY_OOB_DTCTD		= (1U << 0),
-
-	/* VSR */
-	/* PHYMODE 6 (CDB) */
-	PHY_MODE6_LATECLK	= (1U << 29),	/* Lock Clock */
-	PHY_MODE6_DTL_SPEED	= (1U << 27),	/* Digital Loop Speed */
-	PHY_MODE6_FC_ORDER	= (1U << 26),	/* Fibre Channel Mode Order*/
-	PHY_MODE6_MUCNT_EN	= (1U << 24),	/* u Count Enable */
-	PHY_MODE6_SEL_MUCNT_LEN	= (1U << 22),	/* Training Length Select */
-	PHY_MODE6_SELMUPI	= (1U << 20),	/* Phase Multi Select (init) */
-	PHY_MODE6_SELMUPF	= (1U << 18),	/* Phase Multi Select (final) */
-	PHY_MODE6_SELMUFF	= (1U << 16),	/* Freq Loop Multi Sel(final) */
-	PHY_MODE6_SELMUFI	= (1U << 14),	/* Freq Loop Multi Sel(init) */
-	PHY_MODE6_FREEZE_LOOP	= (1U << 12),	/* Freeze Rx CDR Loop */
-	PHY_MODE6_INT_RXFOFFS	= (1U << 3),	/* Rx CDR Freq Loop Enable */
-	PHY_MODE6_FRC_RXFOFFS	= (1U << 2),	/* Initial Rx CDR Offset */
-	PHY_MODE6_STAU_0D8	= (1U << 1),	/* Rx CDR Freq Loop Saturate */
-	PHY_MODE6_RXSAT_DIS	= (1U << 0),	/* Saturate Ctl */
-};
-
-enum mvs_info_flags {
-	MVF_MSI			= (1U << 0),	/* MSI is enabled */
-	MVF_PHY_PWR_FIX		= (1U << 1),	/* bug workaround */
-};
-
-enum sas_cmd_port_registers {
-	CMD_CMRST_OOB_DET	= 0x100, /* COMRESET OOB detect register */
-	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
-	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
-	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
-	CMD_OOB_SPACE		= 0x110, /* OOB space control register */
-	CMD_OOB_BURST		= 0x114, /* OOB burst control register */
-	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
-	CMD_PHY_CONFIG0		= 0x11c, /* PHY config register 0 */
-	CMD_PHY_CONFIG1		= 0x120, /* PHY config register 1 */
-	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
-	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
-	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
-	CMD_SAS_CTL3		= 0x130, /* SAS control register 3 */
-	CMD_ID_TEST		= 0x134, /* ID test register */
-	CMD_PL_TIMER		= 0x138, /* PL timer register */
-	CMD_WD_TIMER		= 0x13c, /* WD timer register */
-	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
-	CMD_APP_MEM_CTL		= 0x144, /* Application Memory Control */
-	CMD_XOR_MEM_CTL		= 0x148, /* XOR Block Memory Control */
-	CMD_DMA_MEM_CTL		= 0x14c, /* DMA Block Memory Control */
-	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
-	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
-	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
-	CMD_SATA_PORT_MEM_CTL1	= 0x15c, /* SATA Port Memory Control 1 */
-	CMD_XOR_MEM_BIST_CTL	= 0x160, /* XOR Memory BIST Control */
-	CMD_XOR_MEM_BIST_STAT	= 0x164, /* XOR Memroy BIST Status */
-	CMD_DMA_MEM_BIST_CTL	= 0x168, /* DMA Memory BIST Control */
-	CMD_DMA_MEM_BIST_STAT	= 0x16c, /* DMA Memory BIST Status */
-	CMD_PORT_MEM_BIST_CTL	= 0x170, /* Port Memory BIST Control */
-	CMD_PORT_MEM_BIST_STAT0 = 0x174, /* Port Memory BIST Status 0 */
-	CMD_PORT_MEM_BIST_STAT1 = 0x178, /* Port Memory BIST Status 1 */
-	CMD_STP_MEM_BIST_CTL	= 0x17c, /* STP Memory BIST Control */
-	CMD_STP_MEM_BIST_STAT0	= 0x180, /* STP Memory BIST Status 0 */
-	CMD_STP_MEM_BIST_STAT1	= 0x184, /* STP Memory BIST Status 1 */
-	CMD_RESET_COUNT		= 0x188, /* Reset Count */
-	CMD_MONTR_DATA_SEL	= 0x18C, /* Monitor Data/Select */
-	CMD_PLL_PHY_CONFIG	= 0x190, /* PLL/PHY Configuration */
-	CMD_PHY_CTL		= 0x194, /* PHY Control and Status */
-	CMD_PHY_TEST_COUNT0	= 0x198, /* Phy Test Count 0 */
-	CMD_PHY_TEST_COUNT1	= 0x19C, /* Phy Test Count 1 */
-	CMD_PHY_TEST_COUNT2	= 0x1A0, /* Phy Test Count 2 */
-	CMD_APP_ERR_CONFIG	= 0x1A4, /* Application Error Configuration */
-	CMD_PND_FIFO_CTL0	= 0x1A8, /* Pending FIFO Control 0 */
-	CMD_HOST_CTL		= 0x1AC, /* Host Control Status */
-	CMD_HOST_WR_DATA	= 0x1B0, /* Host Write Data */
-	CMD_HOST_RD_DATA	= 0x1B4, /* Host Read Data */
-	CMD_PHY_MODE_21		= 0x1B8, /* Phy Mode 21 */
-	CMD_SL_MODE0		= 0x1BC, /* SL Mode 0 */
-	CMD_SL_MODE1		= 0x1C0, /* SL Mode 1 */
-	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
-};
-
-/* SAS/SATA configuration port registers, aka phy registers */
-enum sas_sata_config_port_regs {
-	PHYR_IDENTIFY		= 0x00,	/* info for IDENTIFY frame */
-	PHYR_ADDR_LO		= 0x04,	/* my SAS address (low) */
-	PHYR_ADDR_HI		= 0x08,	/* my SAS address (high) */
-	PHYR_ATT_DEV_INFO	= 0x0C,	/* attached device info */
-	PHYR_ATT_ADDR_LO	= 0x10,	/* attached dev SAS addr (low) */
-	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
-	PHYR_SATA_CTL		= 0x18,	/* SATA control */
-	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
-	PHYR_SATA_SIG0		= 0x20,	/*port SATA signature FIS(Byte 0-3) */
-	PHYR_SATA_SIG1		= 0x24,	/*port SATA signature FIS(Byte 4-7) */
-	PHYR_SATA_SIG2		= 0x28,	/*port SATA signature FIS(Byte 8-11) */
-	PHYR_SATA_SIG3		= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
-	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
-	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
-	PHYR_WIDE_PORT		= 0x38,	/* wide port participating */
-	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
-	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
-	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
-};
-
-/*  SAS/SATA Vendor Specific Port Registers */
-enum sas_sata_vsp_regs {
-	VSR_PHY_STAT		= 0x00, /* Phy Status */
-	VSR_PHY_MODE1		= 0x01, /* phy tx */
-	VSR_PHY_MODE2		= 0x02, /* tx scc */
-	VSR_PHY_MODE3		= 0x03, /* pll */
-	VSR_PHY_MODE4		= 0x04, /* VCO */
-	VSR_PHY_MODE5		= 0x05, /* Rx */
-	VSR_PHY_MODE6		= 0x06, /* CDR */
-	VSR_PHY_MODE7		= 0x07, /* Impedance */
-	VSR_PHY_MODE8		= 0x08, /* Voltage */
-	VSR_PHY_MODE9		= 0x09, /* Test */
-	VSR_PHY_MODE10		= 0x0A, /* Power */
-	VSR_PHY_MODE11		= 0x0B, /* Phy Mode */
-	VSR_PHY_VS0		= 0x0C, /* Vednor Specific 0 */
-	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
-};
-
-enum pci_cfg_registers {
-	PCR_PHY_CTL	= 0x40,
-	PCR_PHY_CTL2	= 0x90,
-	PCR_DEV_CTRL	= 0xE8,
-};
-
-enum pci_cfg_register_bits {
-	PCTL_PWR_ON	= (0xFU << 24),
-	PCTL_OFF	= (0xFU << 12),
-	PRD_REQ_SIZE	= (0x4000),
-	PRD_REQ_MASK	= (0x00007000),
-};
-
-enum nvram_layout_offsets {
-	NVR_SIG		= 0x00,		/* 0xAA, 0x55 */
-	NVR_SAS_ADDR	= 0x02,		/* 8-byte SAS address */
-};
-
-enum chip_flavors {
-	chip_6320,
-	chip_6440,
-	chip_6480,
-};
-
-enum port_type {
-	PORT_TYPE_SAS	=  (1L << 1),
-	PORT_TYPE_SATA	=  (1L << 0),
-};
-
-/* Command Table Format */
-enum ct_format {
-	/* SSP */
-	SSP_F_H		=  0x00,
-	SSP_F_IU	=  0x18,
-	SSP_F_MAX	=  0x4D,
-	/* STP */
-	STP_CMD_FIS	=  0x00,
-	STP_ATAPI_CMD	=  0x40,
-	STP_F_MAX	=  0x10,
-	/* SMP */
-	SMP_F_T		=  0x00,
-	SMP_F_DEP	=  0x01,
-	SMP_F_MAX	=  0x101,
-};
-
-enum status_buffer {
-	SB_EIR_OFF	=  0x00,	/* Error Information Record */
-	SB_RFB_OFF	=  0x08,	/* Response Frame Buffer */
-	SB_RFB_MAX	=  0x400,	/* RFB size*/
-};
-
-enum error_info_rec {
-	CMD_ISS_STPD	= (1U << 31),	/* Cmd Issue Stopped */
-	CMD_PI_ERR	= (1U << 30),	/* Protection info error.  see flags2 */
-	RSP_OVER	= (1U << 29),	/* rsp buffer overflow */
-	RETRY_LIM	= (1U << 28),	/* FIS/frame retry limit exceeded */
-	UNK_FIS 	= (1U << 27),	/* unknown FIS */
-	DMA_TERM	= (1U << 26),	/* DMA terminate primitive rx'd */
-	SYNC_ERR	= (1U << 25),	/* SYNC rx'd during frame xmit */
-	TFILE_ERR	= (1U << 24),	/* SATA taskfile Error bit set */
-	R_ERR		= (1U << 23),	/* SATA returned R_ERR prim */
-	RD_OFS		= (1U << 20),	/* Read DATA frame invalid offset */
-	XFER_RDY_OFS	= (1U << 19),	/* XFER_RDY offset error */
-	UNEXP_XFER_RDY	= (1U << 18),	/* unexpected XFER_RDY error */
-	DATA_OVER_UNDER = (1U << 16),	/* data overflow/underflow */
-	INTERLOCK	= (1U << 15),	/* interlock error */
-	NAK		= (1U << 14),	/* NAK rx'd */
-	ACK_NAK_TO	= (1U << 13),	/* ACK/NAK timeout */
-	CXN_CLOSED	= (1U << 12),	/* cxn closed w/out ack/nak */
-	OPEN_TO 	= (1U << 11),	/* I_T nexus lost, open cxn timeout */
-	PATH_BLOCKED	= (1U << 10),	/* I_T nexus lost, pathway blocked */
-	NO_DEST 	= (1U << 9),	/* I_T nexus lost, no destination */
-	STP_RES_BSY	= (1U << 8),	/* STP resources busy */
-	BREAK		= (1U << 7),	/* break received */
-	BAD_DEST	= (1U << 6),	/* bad destination */
-	BAD_PROTO	= (1U << 5),	/* protocol not supported */
-	BAD_RATE	= (1U << 4),	/* cxn rate not supported */
-	WRONG_DEST	= (1U << 3),	/* wrong destination error */
-	CREDIT_TO	= (1U << 2),	/* credit timeout */
-	WDOG_TO 	= (1U << 1),	/* watchdog timeout */
-	BUF_PAR 	= (1U << 0),	/* buffer parity error */
-};
-
-enum error_info_rec_2 {
-	SLOT_BSY_ERR	= (1U << 31),	/* Slot Busy Error */
-	GRD_CHK_ERR	= (1U << 14),	/* Guard Check Error */
-	APP_CHK_ERR	= (1U << 13),	/* Application Check error */
-	REF_CHK_ERR	= (1U << 12),	/* Reference Check Error */
-	USR_BLK_NM	= (1U << 0),	/* User Block Number */
-};
-
-struct mvs_chip_info {
-	u32		n_phy;
-	u32		srs_sz;
-	u32		slot_width;
-};
-
-struct mvs_err_info {
-	__le32			flags;
-	__le32			flags2;
-};
-
-struct mvs_prd {
-	__le64			addr;		/* 64-bit buffer address */
-	__le32			reserved;
-	__le32			len;		/* 16-bit length */
-};
-
-struct mvs_cmd_hdr {
-	__le32			flags;		/* PRD tbl len; SAS, SATA ctl */
-	__le32			lens;		/* cmd, max resp frame len */
-	__le32			tags;		/* targ port xfer tag; tag */
-	__le32			data_len;	/* data xfer len */
-	__le64			cmd_tbl;	/* command table address */
-	__le64			open_frame;	/* open addr frame address */
-	__le64			status_buf;	/* status buffer address */
-	__le64			prd_tbl;	/* PRD tbl address */
-	__le32			reserved[4];
-};
-
-struct mvs_port {
-	struct asd_sas_port	sas_port;
-	u8			port_attached;
-	u8			taskfileset;
-	u8			wide_port_phymap;
-	struct list_head	list;
-};
-
-struct mvs_phy {
-	struct mvs_port		*port;
-	struct asd_sas_phy	sas_phy;
-	struct sas_identify	identify;
-	struct scsi_device	*sdev;
-	u64		dev_sas_addr;
-	u64		att_dev_sas_addr;
-	u32		att_dev_info;
-	u32		dev_info;
-	u32		phy_type;
-	u32		phy_status;
-	u32		irq_status;
-	u32		frame_rcvd_size;
-	u8		frame_rcvd[32];
-	u8		phy_attached;
-	enum sas_linkrate	minimum_linkrate;
-	enum sas_linkrate	maximum_linkrate;
-};
-
-struct mvs_slot_info {
-	struct list_head	list;
-	struct sas_task		*task;
-	u32			n_elem;
-	u32			tx;
-
-	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
-	 * and PRD table
-	 */
-	void			*buf;
-	dma_addr_t		buf_dma;
-#if _MV_DUMP
-	u32			cmd_size;
-#endif
-
-	void			*response;
-	struct mvs_port		*port;
-};
-
-struct mvs_info {
-	unsigned long		flags;
-
-	spinlock_t		lock;		/* host-wide lock */
-	struct pci_dev		*pdev;		/* our device */
-	void __iomem		*regs;		/* enhanced mode registers */
-	void __iomem		*peri_regs;	/* peripheral registers */
-
-	u8			sas_addr[SAS_ADDR_SIZE];
-	struct sas_ha_struct	sas;		/* SCSI/SAS glue */
-	struct Scsi_Host	*shost;
-
-	__le32			*tx;		/* TX (delivery) DMA ring */
-	dma_addr_t		tx_dma;
-	u32			tx_prod;	/* cached next-producer idx */
-
-	__le32			*rx;		/* RX (completion) DMA ring */
-	dma_addr_t		rx_dma;
-	u32			rx_cons;	/* RX consumer idx */
-
-	__le32			*rx_fis;	/* RX'd FIS area */
-	dma_addr_t		rx_fis_dma;
-
-	struct mvs_cmd_hdr	*slot;	/* DMA command header slots */
-	dma_addr_t		slot_dma;
-
-	const struct mvs_chip_info *chip;
-
-	u8			tags[MVS_SLOTS];
-	struct mvs_slot_info	slot_info[MVS_SLOTS];
-				/* further per-slot information */
-	struct mvs_phy		phy[MVS_MAX_PHYS];
-	struct mvs_port		port[MVS_MAX_PHYS];
-#ifdef MVS_USE_TASKLET
-	struct tasklet_struct	tasklet;
-#endif
-};
-
-static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
-			   void *funcdata);
-static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port);
-static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val);
-static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port);
-static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val);
-static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val);
-static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port);
-
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i);
-static void mvs_detect_porttype(struct mvs_info *mvi, int i);
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st);
-static void mvs_release_task(struct mvs_info *mvi, int phy_no);
-
-static int mvs_scan_finished(struct Scsi_Host *, unsigned long);
-static void mvs_scan_start(struct Scsi_Host *);
-static int mvs_slave_configure(struct scsi_device *sdev);
-
-static struct scsi_transport_template *mvs_stt;
-
-static const struct mvs_chip_info mvs_chips[] = {
-	[chip_6320] =		{ 2, 16, 9  },
-	[chip_6440] =		{ 4, 16, 9  },
-	[chip_6480] =		{ 8, 32, 10 },
-};
-
-static struct scsi_host_template mvs_sht = {
-	.module			= THIS_MODULE,
-	.name			= DRV_NAME,
-	.queuecommand		= sas_queuecommand,
-	.target_alloc		= sas_target_alloc,
-	.slave_configure	= mvs_slave_configure,
-	.slave_destroy		= sas_slave_destroy,
-	.scan_finished		= mvs_scan_finished,
-	.scan_start		= mvs_scan_start,
-	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
-	.bios_param		= sas_bios_param,
-	.can_queue		= 1,
-	.cmd_per_lun		= 1,
-	.this_id		= -1,
-	.sg_tablesize		= SG_ALL,
-	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
-	.use_clustering		= ENABLE_CLUSTERING,
-	.eh_device_reset_handler	= sas_eh_device_reset_handler,
-	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
-	.slave_alloc		= sas_slave_alloc,
-	.target_destroy		= sas_target_destroy,
-	.ioctl			= sas_ioctl,
-};
-
-static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
-{
-	u32 i;
-	u32 run;
-	u32 offset;
-
-	offset = 0;
-	while (size) {
-		printk("%08X : ", baseaddr + offset);
-		if (size >= 16)
-			run = 16;
-		else
-			run = size;
-		size -= run;
-		for (i = 0; i < 16; i++) {
-			if (i < run)
-				printk("%02X ", (u32)data[i]);
-			else
-				printk("   ");
-		}
-		printk(": ");
-		for (i = 0; i < run; i++)
-			printk("%c", isalnum(data[i]) ? data[i] : '.');
-		printk("\n");
-		data = &data[16];
-		offset += run;
-	}
-	printk("\n");
-}
-
-#if _MV_DUMP
-static void mvs_hba_sb_dump(struct mvs_info *mvi, u32 tag,
-				   enum sas_protocol proto)
-{
-	u32 offset;
-	struct pci_dev *pdev = mvi->pdev;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-
-	offset = slot->cmd_size + MVS_OAF_SZ +
-	    sizeof(struct mvs_prd) * slot->n_elem;
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Status buffer[%d] :\n",
-			tag);
-	mvs_hexdump(32, (u8 *) slot->response,
-		    (u32) slot->buf_dma + offset);
-}
-#endif
-
-static void mvs_hba_memory_dump(struct mvs_info *mvi, u32 tag,
-				enum sas_protocol proto)
-{
-#if _MV_DUMP
-	u32 sz, w_ptr;
-	u64 addr;
-	void __iomem *regs = mvi->regs;
-	struct pci_dev *pdev = mvi->pdev;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-
-	/*Delivery Queue */
-	sz = mr32(TX_CFG) & TX_RING_SZ_MASK;
-	w_ptr = slot->tx;
-	addr = mr32(TX_HI) << 16 << 16 | mr32(TX_LO);
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Delivery Queue Size=%04d , WRT_PTR=%04X\n", sz, w_ptr);
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Delivery Queue Base Address=0x%llX (PA)"
-		"(tx_dma=0x%llX), Entry=%04d\n",
-		addr, mvi->tx_dma, w_ptr);
-	mvs_hexdump(sizeof(u32), (u8 *)(&mvi->tx[mvi->tx_prod]),
-			(u32) mvi->tx_dma + sizeof(u32) * w_ptr);
-	/*Command List */
-	addr = mvi->slot_dma;
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Command List Base Address=0x%llX (PA)"
-		"(slot_dma=0x%llX), Header=%03d\n",
-		addr, slot->buf_dma, tag);
-	dev_printk(KERN_DEBUG, &pdev->dev, "Command Header[%03d]:\n", tag);
-	/*mvs_cmd_hdr */
-	mvs_hexdump(sizeof(struct mvs_cmd_hdr), (u8 *)(&mvi->slot[tag]),
-		(u32) mvi->slot_dma + tag * sizeof(struct mvs_cmd_hdr));
-	/*1.command table area */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Command Table :\n");
-	mvs_hexdump(slot->cmd_size, (u8 *) slot->buf, (u32) slot->buf_dma);
-	/*2.open address frame area */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Open Address Frame :\n");
-	mvs_hexdump(MVS_OAF_SZ, (u8 *) slot->buf + slot->cmd_size,
-				(u32) slot->buf_dma + slot->cmd_size);
-	/*3.status buffer */
-	mvs_hba_sb_dump(mvi, tag, proto);
-	/*4.PRD table */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->PRD table :\n");
-	mvs_hexdump(sizeof(struct mvs_prd) * slot->n_elem,
-		(u8 *) slot->buf + slot->cmd_size + MVS_OAF_SZ,
-		(u32) slot->buf_dma + slot->cmd_size + MVS_OAF_SZ);
-#endif
-}
-
-static void mvs_hba_cq_dump(struct mvs_info *mvi)
-{
-#if (_MV_DUMP > 2)
-	u64 addr;
-	void __iomem *regs = mvi->regs;
-	struct pci_dev *pdev = mvi->pdev;
-	u32 entry = mvi->rx_cons + 1;
-	u32 rx_desc = le32_to_cpu(mvi->rx[entry]);
-
-	/*Completion Queue */
-	addr = mr32(RX_HI) << 16 << 16 | mr32(RX_LO);
-	dev_printk(KERN_DEBUG, &pdev->dev, "Completion Task = 0x%p\n",
-		   mvi->slot_info[rx_desc & RXQ_SLOT_MASK].task);
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Completion List Base Address=0x%llX (PA), "
-		"CQ_Entry=%04d, CQ_WP=0x%08X\n",
-		addr, entry - 1, mvi->rx[0]);
-	mvs_hexdump(sizeof(u32), (u8 *)(&rx_desc),
-		    mvi->rx_dma + sizeof(u32) * entry);
-#endif
-}
-
-static void mvs_hba_interrupt_enable(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	tmp = mr32(GBL_CTL);
-
-	mw32(GBL_CTL, tmp | INT_EN);
-}
-
-static void mvs_hba_interrupt_disable(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	tmp = mr32(GBL_CTL);
-
-	mw32(GBL_CTL, tmp & ~INT_EN);
-}
-
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
-
-/* move to PCI layer or libata core? */
-static int pci_go_64(struct pci_dev *pdev)
-{
-	int rc;
-
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-			if (rc) {
-				dev_printk(KERN_ERR, &pdev->dev,
-					   "64-bit DMA enable failed\n");
-				return rc;
-			}
-		}
-	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc) {
-			dev_printk(KERN_ERR, &pdev->dev,
-				   "32-bit DMA enable failed\n");
-			return rc;
-		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc) {
-			dev_printk(KERN_ERR, &pdev->dev,
-				   "32-bit consistent DMA enable failed\n");
-			return rc;
-		}
-	}
-
-	return rc;
-}
-
-static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
-{
-	if (task->lldd_task) {
-		struct mvs_slot_info *slot;
-		slot = (struct mvs_slot_info *) task->lldd_task;
-		*tag = slot - mvi->slot_info;
-		return 1;
-	}
-	return 0;
-}
-
-static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
-{
-	void *bitmap = (void *) &mvi->tags;
-	clear_bit(tag, bitmap);
-}
-
-static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
-{
-	mvs_tag_clear(mvi, tag);
-}
-
-static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
-{
-	void *bitmap = (void *) &mvi->tags;
-	set_bit(tag, bitmap);
-}
-
-static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
-{
-	unsigned int index, tag;
-	void *bitmap = (void *) &mvi->tags;
-
-	index = find_first_zero_bit(bitmap, MVS_SLOTS);
-	tag = index;
-	if (tag >= MVS_SLOTS)
-		return -SAS_QUEUE_FULL;
-	mvs_tag_set(mvi, tag);
-	*tag_out = tag;
-	return 0;
-}
-
-static void mvs_tag_init(struct mvs_info *mvi)
-{
-	int i;
-	for (i = 0; i < MVS_SLOTS; ++i)
-		mvs_tag_clear(mvi, i);
-}
-
-#ifndef MVS_DISABLE_NVRAM
-static int mvs_eep_read(void __iomem *regs, u32 addr, u32 *data)
-{
-	int timeout = 1000;
-
-	if (addr & ~SPI_ADDR_MASK)
-		return -EINVAL;
-
-	writel(addr, regs + SPI_CMD);
-	writel(TWSI_RD, regs + SPI_CTL);
-
-	while (timeout-- > 0) {
-		if (readl(regs + SPI_CTL) & TWSI_RDY) {
-			*data = readl(regs + SPI_DATA);
-			return 0;
-		}
-
-		udelay(10);
-	}
-
-	return -EBUSY;
-}
-
-static int mvs_eep_read_buf(void __iomem *regs, u32 addr,
-			    void *buf, u32 buflen)
-{
-	u32 addr_end, tmp_addr, i, j;
-	u32 tmp = 0;
-	int rc;
-	u8 *tmp8, *buf8 = buf;
-
-	addr_end = addr + buflen;
-	tmp_addr = ALIGN(addr, 4);
-	if (addr > 0xff)
-		return -EINVAL;
-
-	j = addr & 0x3;
-	if (j) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		tmp8 = (u8 *)&tmp;
-		for (i = j; i < 4; i++)
-			*buf8++ = tmp8[i];
-
-		tmp_addr += 4;
-	}
-
-	for (j = ALIGN(addr_end, 4); tmp_addr < j; tmp_addr += 4) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		memcpy(buf8, &tmp, 4);
-		buf8 += 4;
-	}
-
-	if (tmp_addr < addr_end) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		tmp8 = (u8 *)&tmp;
-		j = addr_end - tmp_addr;
-		for (i = 0; i < j; i++)
-			*buf8++ = tmp8[i];
-
-		tmp_addr += 4;
-	}
-
-	return 0;
-}
-#endif
-
-static int mvs_nvram_read(struct mvs_info *mvi, u32 addr,
-			  void *buf, u32 buflen)
-{
-#ifndef MVS_DISABLE_NVRAM
-	void __iomem *regs = mvi->regs;
-	int rc, i;
-	u32 sum;
-	u8 hdr[2], *tmp;
-	const char *msg;
-
-	rc = mvs_eep_read_buf(regs, addr, &hdr, 2);
-	if (rc) {
-		msg = "nvram hdr read failed";
-		goto err_out;
-	}
-	rc = mvs_eep_read_buf(regs, addr + 2, buf, buflen);
-	if (rc) {
-		msg = "nvram read failed";
-		goto err_out;
-	}
-
-	if (hdr[0] != 0x5A) {
-		/* entry id */
-		msg = "invalid nvram entry id";
-		rc = -ENOENT;
-		goto err_out;
-	}
-
-	tmp = buf;
-	sum = ((u32)hdr[0]) + ((u32)hdr[1]);
-	for (i = 0; i < buflen; i++)
-		sum += ((u32)tmp[i]);
-
-	if (sum) {
-		msg = "nvram checksum failure";
-		rc = -EILSEQ;
-		goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	dev_printk(KERN_ERR, &mvi->pdev->dev, "%s", msg);
-	return rc;
-#else
-	/* FIXME , For SAS target mode */
-	memcpy(buf, "\x50\x05\x04\x30\x11\xab\x00\x00", 8);
-	return 0;
-#endif
-}
-
-static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
-{
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-	if (!phy->phy_attached)
-		return;
-
-	if (sas_phy->phy) {
-		struct sas_phy *sphy = sas_phy->phy;
-
-		sphy->negotiated_linkrate = sas_phy->linkrate;
-		sphy->minimum_linkrate = phy->minimum_linkrate;
-		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
-		sphy->maximum_linkrate = phy->maximum_linkrate;
-		sphy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
-	}
-
-	if (phy->phy_type & PORT_TYPE_SAS) {
-		struct sas_identify_frame *id;
-
-		id = (struct sas_identify_frame *)phy->frame_rcvd;
-		id->dev_type = phy->identify.device_type;
-		id->initiator_bits = SAS_PROTOCOL_ALL;
-		id->target_bits = phy->identify.target_port_protocols;
-	} else if (phy->phy_type & PORT_TYPE_SATA) {
-		/* TODO */
-	}
-	mvi->sas.sas_phy[i]->frame_rcvd_size = phy->frame_rcvd_size;
-	mvi->sas.notify_port_event(mvi->sas.sas_phy[i],
-				   PORTE_BYTES_DMAED);
-}
-
-static int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time)
-{
-	/* give the phy enabling interrupt event time to come in (1s
-	 * is empirically about all it takes) */
-	if (time < HZ)
-		return 0;
-	/* Wait for discovery to finish */
-	scsi_flush_work(shost);
-	return 1;
-}
-
-static void mvs_scan_start(struct Scsi_Host *shost)
-{
-	int i;
-	struct mvs_info *mvi = SHOST_TO_SAS_HA(shost)->lldd_ha;
-
-	for (i = 0; i < mvi->chip->n_phy; ++i) {
-		mvs_bytes_dmaed(mvi, i);
-	}
-}
-
-static int mvs_slave_configure(struct scsi_device *sdev)
-{
-	struct domain_device *dev = sdev_to_domain_dev(sdev);
-	int ret = sas_slave_configure(sdev);
-
-	if (ret)
-		return ret;
-
-	if (dev_is_sata(dev)) {
-		/* struct ata_port *ap = dev->sata_dev.ap; */
-		/* struct ata_device *adev = ap->link.device; */
-
-		/* clamp at no NCQ for the time being */
-		/* adev->flags |= ATA_DFLAG_NCQ_OFF; */
-		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
-	}
-	return 0;
-}
-
-static void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
-{
-	struct pci_dev *pdev = mvi->pdev;
-	struct sas_ha_struct *sas_ha = &mvi->sas;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-
-	phy->irq_status = mvs_read_port_irq_stat(mvi, phy_no);
-	/*
-	* events is port event now ,
-	* we need check the interrupt status which belongs to per port.
-	*/
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Port %d Event = %X\n",
-		phy_no, phy->irq_status);
-
-	if (phy->irq_status & (PHYEV_POOF | PHYEV_DEC_ERR)) {
-		mvs_release_task(mvi, phy_no);
-		if (!mvs_is_phy_ready(mvi, phy_no)) {
-			sas_phy_disconnected(sas_phy);
-			sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
-			dev_printk(KERN_INFO, &pdev->dev,
-				"Port %d Unplug Notice\n", phy_no);
-
-		} else
-			mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET, NULL);
-	}
-	if (!(phy->irq_status & PHYEV_DEC_ERR)) {
-		if (phy->irq_status & PHYEV_COMWAKE) {
-			u32 tmp = mvs_read_port_irq_mask(mvi, phy_no);
-			mvs_write_port_irq_mask(mvi, phy_no,
-						tmp | PHYEV_SIG_FIS);
-		}
-		if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
-			phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
-			if (phy->phy_status) {
-				mvs_detect_porttype(mvi, phy_no);
-
-				if (phy->phy_type & PORT_TYPE_SATA) {
-					u32 tmp = mvs_read_port_irq_mask(mvi,
-								phy_no);
-					tmp &= ~PHYEV_SIG_FIS;
-					mvs_write_port_irq_mask(mvi,
-								phy_no, tmp);
-				}
-
-				mvs_update_phyinfo(mvi, phy_no, 0);
-				sas_ha->notify_phy_event(sas_phy,
-							PHYE_OOB_DONE);
-				mvs_bytes_dmaed(mvi, phy_no);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"plugin interrupt but phy is gone\n");
-				mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET,
-							NULL);
-			}
-		} else if (phy->irq_status & PHYEV_BROAD_CH) {
-			mvs_release_task(mvi, phy_no);
-			sas_ha->notify_port_event(sas_phy,
-						PORTE_BROADCAST_RCVD);
-		}
-	}
-	mvs_write_port_irq_stat(mvi, phy_no, phy->irq_status);
-}
-
-static void mvs_int_sata(struct mvs_info *mvi)
-{
-	u32 tmp;
-	void __iomem *regs = mvi->regs;
-	tmp = mr32(INT_STAT_SRS);
-	mw32(INT_STAT_SRS, tmp & 0xFFFF);
-}
-
-static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
-				u32 slot_idx)
-{
-	void __iomem *regs = mvi->regs;
-	struct domain_device *dev = task->dev;
-	struct asd_sas_port *sas_port = dev->port;
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	u32 reg_set, phy_mask;
-
-	if (!sas_protocol_ata(task->task_proto)) {
-		reg_set = 0;
-		phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-				sas_port->phy_mask;
-	} else {
-		reg_set = port->taskfileset;
-		phy_mask = sas_port->phy_mask;
-	}
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | slot_idx |
-					(TXQ_CMD_SLOT_RESET << TXQ_CMD_SHIFT) |
-					(phy_mask << TXQ_PHY_SHIFT) |
-					(reg_set << TXQ_SRS_SHIFT));
-
-	mw32(TX_PROD_IDX, mvi->tx_prod);
-	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-}
-
-static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
-			u32 slot_idx, int err)
-{
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	struct task_status_struct *tstat = &task->task_status;
-	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
-	int stat = SAM_GOOD;
-
-	resp->frame_len = sizeof(struct dev_to_host_fis);
-	memcpy(&resp->ending_fis[0],
-	       SATA_RECEIVED_D2H_FIS(port->taskfileset),
-	       sizeof(struct dev_to_host_fis));
-	tstat->buf_valid_size = sizeof(*resp);
-	if (unlikely(err))
-		stat = SAS_PROTO_RESPONSE;
-	return stat;
-}
-
-static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
-{
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	mvs_tag_clear(mvi, slot_idx);
-}
-
-static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
-			  struct mvs_slot_info *slot, u32 slot_idx)
-{
-	if (!sas_protocol_ata(task->task_proto))
-		if (slot->n_elem)
-			pci_unmap_sg(mvi->pdev, task->scatter,
-				     slot->n_elem, task->data_dir);
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_resp, 1,
-			     PCI_DMA_FROMDEVICE);
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_req, 1,
-			     PCI_DMA_TODEVICE);
-		break;
-
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SSP:
-	default:
-		/* do nothing */
-		break;
-	}
-	list_del(&slot->list);
-	task->lldd_task = NULL;
-	slot->task = NULL;
-	slot->port = NULL;
-}
-
-static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
-			 u32 slot_idx)
-{
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
-	u32 err_dw1 = le32_to_cpu(*(u32 *) (slot->response + 4));
-	int stat = SAM_CHECK_COND;
-
-	if (err_dw1 & SLOT_BSY_ERR) {
-		stat = SAS_QUEUE_FULL;
-		mvs_slot_reset(mvi, task, slot_idx);
-	}
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		break;
-	case SAS_PROTOCOL_SMP:
-		break;
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-		if (err_dw0 & TFILE_ERR)
-			stat = mvs_sata_done(mvi, task, slot_idx, 1);
-		break;
-	default:
-		break;
-	}
-
-	mvs_hexdump(16, (u8 *) slot->response, 0);
-	return stat;
-}
-
-static int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
-{
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	struct sas_task *task = slot->task;
-	struct task_status_struct *tstat;
-	struct mvs_port *port;
-	bool aborted;
-	void *to;
-
-	if (unlikely(!task || !task->lldd_task))
-		return -1;
-
-	mvs_hba_cq_dump(mvi);
-
-	spin_lock(&task->task_state_lock);
-	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
-	if (!aborted) {
-		task->task_state_flags &=
-		    ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
-		task->task_state_flags |= SAS_TASK_STATE_DONE;
-	}
-	spin_unlock(&task->task_state_lock);
-
-	if (aborted) {
-		mvs_slot_task_free(mvi, task, slot, slot_idx);
-		mvs_slot_free(mvi, rx_desc);
-		return -1;
-	}
-
-	port = slot->port;
-	tstat = &task->task_status;
-	memset(tstat, 0, sizeof(*tstat));
-	tstat->resp = SAS_TASK_COMPLETE;
-
-	if (unlikely(!port->port_attached || flags)) {
-		mvs_slot_err(mvi, task, slot_idx);
-		if (!sas_protocol_ata(task->task_proto))
-			tstat->stat = SAS_PHY_DOWN;
-		goto out;
-	}
-
-	/* error info record present */
-	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
-		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
-		goto out;
-	}
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		/* hw says status == 0, datapres == 0 */
-		if (rx_desc & RXQ_GOOD) {
-			tstat->stat = SAM_GOOD;
-			tstat->resp = SAS_TASK_COMPLETE;
-		}
-		/* response frame present */
-		else if (rx_desc & RXQ_RSP) {
-			struct ssp_response_iu *iu =
-			    slot->response + sizeof(struct mvs_err_info);
-			sas_ssp_task_response(&mvi->pdev->dev, task, iu);
-		}
-
-		/* should never happen? */
-		else
-			tstat->stat = SAM_CHECK_COND;
-		break;
-
-	case SAS_PROTOCOL_SMP: {
-			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-			tstat->stat = SAM_GOOD;
-			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
-			memcpy(to + sg_resp->offset,
-				slot->response + sizeof(struct mvs_err_info),
-				sg_dma_len(sg_resp));
-			kunmap_atomic(to, KM_IRQ0);
-			break;
-		}
-
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
-			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
-			break;
-		}
-
-	default:
-		tstat->stat = SAM_CHECK_COND;
-		break;
-	}
-
-out:
-	mvs_slot_task_free(mvi, task, slot, slot_idx);
-	if (unlikely(tstat->stat != SAS_QUEUE_FULL))
-		mvs_slot_free(mvi, rx_desc);
-
-	spin_unlock(&mvi->lock);
-	task->task_done(task);
-	spin_lock(&mvi->lock);
-	return tstat->stat;
-}
-
-static void mvs_release_task(struct mvs_info *mvi, int phy_no)
-{
-	struct list_head *pos, *n;
-	struct mvs_slot_info *slot;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct mvs_port *port = phy->port;
-	u32 rx_desc;
-
-	if (!port)
-		return;
-
-	list_for_each_safe(pos, n, &port->list) {
-		slot = container_of(pos, struct mvs_slot_info, list);
-		rx_desc = (u32) (slot - mvi->slot_info);
-		mvs_slot_complete(mvi, rx_desc, 1);
-	}
-}
-
-static void mvs_int_full(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp, stat;
-	int i;
-
-	stat = mr32(INT_STAT);
-
-	mvs_int_rx(mvi, false);
-
-	for (i = 0; i < MVS_MAX_PORTS; i++) {
-		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
-		if (tmp)
-			mvs_int_port(mvi, i, tmp);
-	}
-
-	if (stat & CINT_SRS)
-		mvs_int_sata(mvi);
-
-	mw32(INT_STAT, stat);
-}
-
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
-{
-	void __iomem *regs = mvi->regs;
-	u32 rx_prod_idx, rx_desc;
-	bool attn = false;
-	struct pci_dev *pdev = mvi->pdev;
-
-	/* the first dword in the RX ring is special: it contains
-	 * a mirror of the hardware's RX producer index, so that
-	 * we don't have to stall the CPU reading that register.
-	 * The actual RX ring is offset by one dword, due to this.
-	 */
-	rx_prod_idx = mvi->rx_cons;
-	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
-	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
-		return 0;
-
-	/* The CMPL_Q may come late, read from register and try again
-	* note: if coalescing is enabled,
-	* it will need to read from register every time for sure
-	*/
-	if (mvi->rx_cons == rx_prod_idx)
-		mvi->rx_cons = mr32(RX_CONS_IDX) & RX_RING_SZ_MASK;
-
-	if (mvi->rx_cons == rx_prod_idx)
-		return 0;
-
-	while (mvi->rx_cons != rx_prod_idx) {
-
-		/* increment our internal RX consumer pointer */
-		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
-
-		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
-
-		if (likely(rx_desc & RXQ_DONE))
-			mvs_slot_complete(mvi, rx_desc, 0);
-		if (rx_desc & RXQ_ATTN) {
-			attn = true;
-			dev_printk(KERN_DEBUG, &pdev->dev, "ATTN %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_ERR) {
-			if (!(rx_desc & RXQ_DONE))
-				mvs_slot_complete(mvi, rx_desc, 0);
-			dev_printk(KERN_DEBUG, &pdev->dev, "RXQ_ERR %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_SLOT_RESET) {
-			dev_printk(KERN_DEBUG, &pdev->dev, "Slot reset[%X]\n",
-				rx_desc);
-			mvs_slot_free(mvi, rx_desc);
-		}
-	}
-
-	if (attn && self_clear)
-		mvs_int_full(mvi);
-
-	return 0;
-}
-
-#ifdef MVS_USE_TASKLET
-static void mvs_tasklet(unsigned long data)
-{
-	struct mvs_info *mvi = (struct mvs_info *) data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-
-#ifdef MVS_DISABLE_MSI
-	mvs_int_full(mvi);
-#else
-	mvs_int_rx(mvi, true);
-#endif
-	spin_unlock_irqrestore(&mvi->lock, flags);
-}
-#endif
-
-static irqreturn_t mvs_interrupt(int irq, void *opaque)
-{
-	struct mvs_info *mvi = opaque;
-	void __iomem *regs = mvi->regs;
-	u32 stat;
-
-	stat = mr32(GBL_INT_STAT);
-
-	if (stat == 0 || stat == 0xffffffff)
-		return IRQ_NONE;
-
-	/* clear CMD_CMPLT ASAP */
-	mw32_f(INT_STAT, CINT_DONE);
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_full(mvi);
-
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
-}
-
-#ifndef MVS_DISABLE_MSI
-static irqreturn_t mvs_msi_interrupt(int irq, void *opaque)
-{
-	struct mvs_info *mvi = opaque;
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_rx(mvi, true);
-
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
-}
-#endif
-
-struct mvs_task_exec_info {
-	struct sas_task *task;
-	struct mvs_cmd_hdr *hdr;
-	struct mvs_port *port;
-	u32 tag;
-	int n_elem;
-};
-
-static int mvs_task_prep_smp(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	int elem, rc, i;
-	struct sas_task *task = tei->task;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct scatterlist *sg_req, *sg_resp;
-	u32 req_len, resp_len, tag = tei->tag;
-	void *buf_tmp;
-	u8 *buf_oaf;
-	dma_addr_t buf_tmp_dma;
-	struct mvs_prd *buf_prd;
-	struct scatterlist *sg;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-	struct asd_sas_port *sas_port = task->dev->port;
-	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
-#if _MV_DUMP
-	u8 *buf_cmd;
-	void *from;
-#endif
-	/*
-	 * DMA-map SMP request, response buffers
-	 */
-	sg_req = &task->smp_task.smp_req;
-	elem = pci_map_sg(mvi->pdev, sg_req, 1, PCI_DMA_TODEVICE);
-	if (!elem)
-		return -ENOMEM;
-	req_len = sg_dma_len(sg_req);
-
-	sg_resp = &task->smp_task.smp_resp;
-	elem = pci_map_sg(mvi->pdev, sg_resp, 1, PCI_DMA_FROMDEVICE);
-	if (!elem) {
-		rc = -ENOMEM;
-		goto err_out;
-	}
-	resp_len = sg_dma_len(sg_resp);
-
-	/* must be in dwords */
-	if ((req_len & 0x3) || (resp_len & 0x3)) {
-		rc = -EINVAL;
-		goto err_out_2;
-	}
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
-	buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-#if _MV_DUMP
-	buf_cmd = buf_tmp;
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-	buf_tmp += req_len;
-	buf_tmp_dma += req_len;
-	slot->cmd_size = req_len;
-#else
-	hdr->cmd_tbl = cpu_to_le64(sg_dma_address(sg_req));
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	/*
-	 * Fill in TX ring and command slot header
-	 */
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
-					TXQ_MODE_I | tag |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT));
-
-	hdr->flags |= flags;
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
-	hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = 0;
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (0 << 4) | 0x01; /* initiator, SMP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-#if _MV_DUMP
-	/* copy cmd table */
-	from = kmap_atomic(sg_page(sg_req), KM_IRQ0);
-	memcpy(buf_cmd, from + sg_req->offset, req_len);
-	kunmap_atomic(from, KM_IRQ0);
-#endif
-	return 0;
-
-err_out_2:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_resp, 1,
-		     PCI_DMA_FROMDEVICE);
-err_out:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_req, 1,
-		     PCI_DMA_TODEVICE);
-	return rc;
-}
-
-static void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp, offs;
-	u8 *tfs = &port->taskfileset;
-
-	if (*tfs == MVS_ID_NOT_MAPPED)
-		return;
-
-	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
-	if (*tfs < 16) {
-		tmp = mr32(PCS);
-		mw32(PCS, tmp & ~offs);
-	} else {
-		tmp = mr32(CTL);
-		mw32(CTL, tmp & ~offs);
-	}
-
-	tmp = mr32(INT_STAT_SRS) & (1U << *tfs);
-	if (tmp)
-		mw32(INT_STAT_SRS, tmp);
-
-	*tfs = MVS_ID_NOT_MAPPED;
-}
-
-static u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port)
-{
-	int i;
-	u32 tmp, offs;
-	void __iomem *regs = mvi->regs;
-
-	if (port->taskfileset != MVS_ID_NOT_MAPPED)
-		return 0;
-
-	tmp = mr32(PCS);
-
-	for (i = 0; i < mvi->chip->srs_sz; i++) {
-		if (i == 16)
-			tmp = mr32(CTL);
-		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
-		if (!(tmp & offs)) {
-			port->taskfileset = i;
-
-			if (i < 16)
-				mw32(PCS, tmp | offs);
-			else
-				mw32(CTL, tmp | offs);
-			tmp = mr32(INT_STAT_SRS) & (1U << i);
-			if (tmp)
-				mw32(INT_STAT_SRS, tmp);
-			return 0;
-		}
-	}
-	return MVS_ID_NOT_MAPPED;
-}
-
-static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
-{
-	struct ata_queued_cmd *qc = task->uldd_task;
-
-	if (qc) {
-		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
-			qc->tf.command == ATA_CMD_FPDMA_READ) {
-			*tag = qc->tag;
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int mvs_task_prep_ata(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	struct sas_task *task = tei->task;
-	struct domain_device *dev = task->dev;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct asd_sas_port *sas_port = dev->port;
-	struct mvs_slot_info *slot;
-	struct scatterlist *sg;
-	struct mvs_prd *buf_prd;
-	struct mvs_port *port = tei->port;
-	u32 tag = tei->tag;
-	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
-	void *buf_tmp;
-	u8 *buf_cmd, *buf_oaf;
-	dma_addr_t buf_tmp_dma;
-	u32 i, req_len, resp_len;
-	const u32 max_resp_len = SB_RFB_MAX;
-
-	if (mvs_assign_reg_set(mvi, port) == MVS_ID_NOT_MAPPED)
-		return -EBUSY;
-
-	slot = &mvi->slot_info[tag];
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
-					(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT) |
-					(port->taskfileset << TXQ_SRS_SHIFT));
-
-	if (task->ata_task.use_ncq)
-		flags |= MCH_FPDMA;
-	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET) {
-		if (task->ata_task.fis.command != ATA_CMD_ID_ATAPI)
-			flags |= MCH_ATAPI;
-	}
-
-	/* FIXME: fill in port multiplier number */
-
-	hdr->flags = cpu_to_le32(flags);
-
-	/* FIXME: the low order order 5 bits for the TAG if enable NCQ */
-	if (task->ata_task.use_ncq && mvs_get_ncq_tag(task, &hdr->tags))
-		task->ata_task.fis.sector_count |= hdr->tags << 3;
-	else
-		hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = cpu_to_le32(task->total_xfer_len);
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_ATA_CMD_SZ bytes) ************** */
-	buf_cmd = buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_ATA_CMD_SZ;
-	buf_tmp_dma += MVS_ATA_CMD_SZ;
-#if _MV_DUMP
-	slot->cmd_size = MVS_ATA_CMD_SZ;
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	/* used for STP.  unused for SATA? */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	/* FIXME: probably unused, for SATA.  kept here just in case
-	 * we get a STP/SATA error information record
-	 */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	req_len = sizeof(struct host_to_dev_fis);
-	resp_len = MVS_SLOT_BUF_SZ - MVS_ATA_CMD_SZ -
-	    sizeof(struct mvs_err_info) - i;
-
-	/* request, response lengths */
-	resp_len = min(resp_len, max_resp_len);
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
-
-	task->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
-	/* fill in command FIS and ATAPI CDB */
-	memcpy(buf_cmd, &task->ata_task.fis, sizeof(struct host_to_dev_fis));
-	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
-		memcpy(buf_cmd + STP_ATAPI_CMD,
-			task->ata_task.atapi_packet, 16);
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (2 << 4) | 0x1;	/* initiator, STP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-	return 0;
-}
-
-static int mvs_task_prep_ssp(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	struct sas_task *task = tei->task;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct mvs_port *port = tei->port;
-	struct mvs_slot_info *slot;
-	struct scatterlist *sg;
-	struct mvs_prd *buf_prd;
-	struct ssp_frame_hdr *ssp_hdr;
-	void *buf_tmp;
-	u8 *buf_cmd, *buf_oaf, fburst = 0;
-	dma_addr_t buf_tmp_dma;
-	u32 flags;
-	u32 resp_len, req_len, i, tag = tei->tag;
-	const u32 max_resp_len = SB_RFB_MAX;
-	u8 phy_mask;
-
-	slot = &mvi->slot_info[tag];
-
-	phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-		task->dev->port->phy_mask;
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
-				(TXQ_CMD_SSP << TXQ_CMD_SHIFT) |
-				(phy_mask << TXQ_PHY_SHIFT));
-
-	flags = MCH_RETRY;
-	if (task->ssp_task.enable_first_burst) {
-		flags |= MCH_FBURST;
-		fburst = (1 << 7);
-	}
-	hdr->flags = cpu_to_le32(flags |
-				 (tei->n_elem << MCH_PRD_LEN_SHIFT) |
-				 (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT));
-
-	hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = cpu_to_le32(task->total_xfer_len);
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
-	buf_cmd = buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_SSP_CMD_SZ;
-	buf_tmp_dma += MVS_SSP_CMD_SZ;
-#if _MV_DUMP
-	slot->cmd_size = MVS_SSP_CMD_SZ;
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	resp_len = MVS_SLOT_BUF_SZ - MVS_SSP_CMD_SZ - MVS_OAF_SZ -
-	    sizeof(struct mvs_err_info) - i;
-	resp_len = min(resp_len, max_resp_len);
-
-	req_len = sizeof(struct ssp_frame_hdr) + 28;
-
-	/* request, response lengths */
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (1 << 4) | 0x1;	/* initiator, SSP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in SSP frame header (Command Table.SSP frame header) */
-	ssp_hdr = (struct ssp_frame_hdr *)buf_cmd;
-	ssp_hdr->frame_type = SSP_COMMAND;
-	memcpy(ssp_hdr->hashed_dest_addr, task->dev->hashed_sas_addr,
-	       HASHED_SAS_ADDR_SIZE);
-	memcpy(ssp_hdr->hashed_src_addr,
-	       task->dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
-	ssp_hdr->tag = cpu_to_be16(tag);
-
-	/* fill in command frame IU */
-	buf_cmd += sizeof(*ssp_hdr);
-	memcpy(buf_cmd, &task->ssp_task.LUN, 8);
-	buf_cmd[9] = fburst | task->ssp_task.task_attr |
-			(task->ssp_task.task_prio << 3);
-	memcpy(buf_cmd + 12, &task->ssp_task.cdb, 16);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-	return 0;
-}
-
-static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
-{
-	struct domain_device *dev = task->dev;
-	struct mvs_info *mvi = dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	void __iomem *regs = mvi->regs;
-	struct mvs_task_exec_info tei;
-	struct sas_task *t = task;
-	struct mvs_slot_info *slot;
-	u32 tag = 0xdeadbeef, rc, n_elem = 0;
-	unsigned long flags;
-	u32 n = num, pass = 0;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-	do {
-		dev = t->dev;
-		tei.port = &mvi->port[dev->port->id];
-
-		if (!tei.port->port_attached) {
-			if (sas_protocol_ata(t->task_proto)) {
-				rc = SAS_PHY_DOWN;
-				goto out_done;
-			} else {
-				struct task_status_struct *ts = &t->task_status;
-				ts->resp = SAS_TASK_UNDELIVERED;
-				ts->stat = SAS_PHY_DOWN;
-				t->task_done(t);
-				if (n > 1)
-					t = list_entry(t->list.next,
-							struct sas_task, list);
-				continue;
-			}
-		}
-
-		if (!sas_protocol_ata(t->task_proto)) {
-			if (t->num_scatter) {
-				n_elem = pci_map_sg(mvi->pdev, t->scatter,
-						    t->num_scatter,
-						    t->data_dir);
-				if (!n_elem) {
-					rc = -ENOMEM;
-					goto err_out;
-				}
-			}
-		} else {
-			n_elem = t->num_scatter;
-		}
-
-		rc = mvs_tag_alloc(mvi, &tag);
-		if (rc)
-			goto err_out;
-
-		slot = &mvi->slot_info[tag];
-		t->lldd_task = NULL;
-		slot->n_elem = n_elem;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
-		tei.task = t;
-		tei.hdr = &mvi->slot[tag];
-		tei.tag = tag;
-		tei.n_elem = n_elem;
-
-		switch (t->task_proto) {
-		case SAS_PROTOCOL_SMP:
-			rc = mvs_task_prep_smp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SSP:
-			rc = mvs_task_prep_ssp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SATA:
-		case SAS_PROTOCOL_STP:
-		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-			rc = mvs_task_prep_ata(mvi, &tei);
-			break;
-		default:
-			dev_printk(KERN_ERR, &pdev->dev,
-				"unknown sas_task proto: 0x%x\n",
-				t->task_proto);
-			rc = -EINVAL;
-			break;
-		}
-
-		if (rc)
-			goto err_out_tag;
-
-		slot->task = t;
-		slot->port = tei.port;
-		t->lldd_task = (void *) slot;
-		list_add_tail(&slot->list, &slot->port->list);
-		/* TODO: select normal or high priority */
-
-		spin_lock(&t->task_state_lock);
-		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
-		spin_unlock(&t->task_state_lock);
-
-		mvs_hba_memory_dump(mvi, tag, t->task_proto);
-
-		++pass;
-		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-		if (n > 1)
-			t = list_entry(t->list.next, struct sas_task, list);
-	} while (--n);
-
-	rc = 0;
-	goto out_done;
-
-err_out_tag:
-	mvs_tag_free(mvi, tag);
-err_out:
-	dev_printk(KERN_ERR, &pdev->dev, "mvsas exec failed[%d]!\n", rc);
-	if (!sas_protocol_ata(t->task_proto))
-		if (n_elem)
-			pci_unmap_sg(mvi->pdev, t->scatter, n_elem,
-				     t->data_dir);
-out_done:
-	if (pass)
-		mw32(TX_PROD_IDX, (mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
-	spin_unlock_irqrestore(&mvi->lock, flags);
-	return rc;
-}
-
-static int mvs_task_abort(struct sas_task *task)
-{
-	int rc;
-	unsigned long flags;
-	struct mvs_info *mvi = task->dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	int tag;
-
-	spin_lock_irqsave(&task->task_state_lock, flags);
-	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
-		rc = TMF_RESP_FUNC_COMPLETE;
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		goto out_done;
-	}
-	spin_unlock_irqrestore(&task->task_state_lock, flags);
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SMP Abort! \n");
-		break;
-	case SAS_PROTOCOL_SSP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SSP Abort! \n");
-		break;
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:{
-		dev_printk(KERN_DEBUG, &pdev->dev, "STP Abort! \n");
-#if _MV_DUMP
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump D2H FIS: \n");
-		mvs_hexdump(sizeof(struct host_to_dev_fis),
-				(void *)&task->ata_task.fis, 0);
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump ATAPI Cmd : \n");
-		mvs_hexdump(16, task->ata_task.atapi_packet, 0);
-#endif
-		spin_lock_irqsave(&task->task_state_lock, flags);
-		if (task->task_state_flags & SAS_TASK_NEED_DEV_RESET) {
-			/* TODO */
-			;
-		}
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		break;
-	}
-	default:
-		break;
-	}
-
-	if (mvs_find_tag(mvi, task, &tag)) {
-		spin_lock_irqsave(&mvi->lock, flags);
-		mvs_slot_task_free(mvi, task, &mvi->slot_info[tag], tag);
-		spin_unlock_irqrestore(&mvi->lock, flags);
-	}
-	if (!mvs_task_exec(task, 1, GFP_ATOMIC))
-		rc = TMF_RESP_FUNC_COMPLETE;
-	else
-		rc = TMF_RESP_FUNC_FAILED;
-out_done:
-	return rc;
-}
-
-static void mvs_free(struct mvs_info *mvi)
-{
-	int i;
-
-	if (!mvi)
-		return;
-
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
-
-		if (slot->buf)
-			dma_free_coherent(&mvi->pdev->dev, MVS_SLOT_BUF_SZ,
-					  slot->buf, slot->buf_dma);
-	}
-
-	if (mvi->tx)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
-				  mvi->tx, mvi->tx_dma);
-	if (mvi->rx_fis)
-		dma_free_coherent(&mvi->pdev->dev, MVS_RX_FISL_SZ,
-				  mvi->rx_fis, mvi->rx_fis_dma);
-	if (mvi->rx)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
-				  mvi->rx, mvi->rx_dma);
-	if (mvi->slot)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->slot) * MVS_SLOTS,
-				  mvi->slot, mvi->slot_dma);
-#ifdef MVS_ENABLE_PERI
-	if (mvi->peri_regs)
-		iounmap(mvi->peri_regs);
-#endif
-	if (mvi->regs)
-		iounmap(mvi->regs);
-	if (mvi->shost)
-		scsi_host_put(mvi->shost);
-	kfree(mvi->sas.sas_port);
-	kfree(mvi->sas.sas_phy);
-	kfree(mvi);
-}
-
-/* FIXME: locking? */
-static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
-			   void *funcdata)
-{
-	struct mvs_info *mvi = sas_phy->ha->lldd_ha;
-	int rc = 0, phy_id = sas_phy->id;
-	u32 tmp;
-
-	tmp = mvs_read_phy_ctl(mvi, phy_id);
-
-	switch (func) {
-	case PHY_FUNC_SET_LINK_RATE:{
-			struct sas_phy_linkrates *rates = funcdata;
-			u32 lrmin = 0, lrmax = 0;
-
-			lrmin = (rates->minimum_linkrate << 8);
-			lrmax = (rates->maximum_linkrate << 12);
-
-			if (lrmin) {
-				tmp &= ~(0xf << 8);
-				tmp |= lrmin;
-			}
-			if (lrmax) {
-				tmp &= ~(0xf << 12);
-				tmp |= lrmax;
-			}
-			mvs_write_phy_ctl(mvi, phy_id, tmp);
-			break;
-		}
-
-	case PHY_FUNC_HARD_RESET:
-		if (tmp & PHY_RST_HARD)
-			break;
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST_HARD);
-		break;
-
-	case PHY_FUNC_LINK_RESET:
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST);
-		break;
-
-	case PHY_FUNC_DISABLE:
-	case PHY_FUNC_RELEASE_SPINUP_HOLD:
-	default:
-		rc = -EOPNOTSUPP;
-	}
-
-	return rc;
-}
-
-static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
-{
-	struct mvs_phy *phy = &mvi->phy[phy_id];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-
-	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
-	sas_phy->class = SAS;
-	sas_phy->iproto = SAS_PROTOCOL_ALL;
-	sas_phy->tproto = 0;
-	sas_phy->type = PHY_TYPE_PHYSICAL;
-	sas_phy->role = PHY_ROLE_INITIATOR;
-	sas_phy->oob_mode = OOB_NOT_CONNECTED;
-	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
-
-	sas_phy->id = phy_id;
-	sas_phy->sas_addr = &mvi->sas_addr[0];
-	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
-	sas_phy->ha = &mvi->sas;
-	sas_phy->lldd_phy = phy;
-}
-
-static struct mvs_info *__devinit mvs_alloc(struct pci_dev *pdev,
-					    const struct pci_device_id *ent)
-{
-	struct mvs_info *mvi;
-	unsigned long res_start, res_len, res_flag;
-	struct asd_sas_phy **arr_phy;
-	struct asd_sas_port **arr_port;
-	const struct mvs_chip_info *chip = &mvs_chips[ent->driver_data];
-	int i;
-
-	/*
-	 * alloc and init our per-HBA mvs_info struct
-	 */
-
-	mvi = kzalloc(sizeof(*mvi), GFP_KERNEL);
-	if (!mvi)
-		return NULL;
-
-	spin_lock_init(&mvi->lock);
-#ifdef MVS_USE_TASKLET
-	tasklet_init(&mvi->tasklet, mvs_tasklet, (unsigned long)mvi);
-#endif
-	mvi->pdev = pdev;
-	mvi->chip = chip;
-
-	if (pdev->device == 0x6440 && pdev->revision == 0)
-		mvi->flags |= MVF_PHY_PWR_FIX;
-
-	/*
-	 * alloc and init SCSI, SAS glue
-	 */
-
-	mvi->shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
-	if (!mvi->shost)
-		goto err_out;
-
-	arr_phy = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	arr_port = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	if (!arr_phy || !arr_port)
-		goto err_out;
-
-	for (i = 0; i < MVS_MAX_PHYS; i++) {
-		mvs_phy_init(mvi, i);
-		arr_phy[i] = &mvi->phy[i].sas_phy;
-		arr_port[i] = &mvi->port[i].sas_port;
-		mvi->port[i].taskfileset = MVS_ID_NOT_MAPPED;
-		mvi->port[i].wide_port_phymap = 0;
-		mvi->port[i].port_attached = 0;
-		INIT_LIST_HEAD(&mvi->port[i].list);
-	}
-
-	SHOST_TO_SAS_HA(mvi->shost) = &mvi->sas;
-	mvi->shost->transportt = mvs_stt;
-	mvi->shost->max_id = 21;
-	mvi->shost->max_lun = ~0;
-	mvi->shost->max_channel = 0;
-	mvi->shost->max_cmd_len = 16;
-
-	mvi->sas.sas_ha_name = DRV_NAME;
-	mvi->sas.dev = &pdev->dev;
-	mvi->sas.lldd_module = THIS_MODULE;
-	mvi->sas.sas_addr = &mvi->sas_addr[0];
-	mvi->sas.sas_phy = arr_phy;
-	mvi->sas.sas_port = arr_port;
-	mvi->sas.num_phys = chip->n_phy;
-	mvi->sas.lldd_max_execute_num = 1;
-	mvi->sas.lldd_queue_size = MVS_QUEUE_SIZE;
-	mvi->shost->can_queue = MVS_CAN_QUEUE;
-	mvi->shost->cmd_per_lun = MVS_SLOTS / mvi->sas.num_phys;
-	mvi->sas.lldd_ha = mvi;
-	mvi->sas.core.shost = mvi->shost;
-
-	mvs_tag_init(mvi);
-
-	/*
-	 * ioremap main and peripheral registers
-	 */
-
-#ifdef MVS_ENABLE_PERI
-	res_start = pci_resource_start(pdev, 2);
-	res_len = pci_resource_len(pdev, 2);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	mvi->peri_regs = ioremap_nocache(res_start, res_len);
-	if (!mvi->peri_regs)
-		goto err_out;
-#endif
-
-	res_start = pci_resource_start(pdev, 4);
-	res_len = pci_resource_len(pdev, 4);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	res_flag = pci_resource_flags(pdev, 4);
-	if (res_flag & IORESOURCE_CACHEABLE)
-		mvi->regs = ioremap(res_start, res_len);
-	else
-		mvi->regs = ioremap_nocache(res_start, res_len);
-
-	if (!mvi->regs)
-		goto err_out;
-
-	/*
-	 * alloc and init our DMA areas
-	 */
-
-	mvi->tx = dma_alloc_coherent(&pdev->dev,
-				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
-				     &mvi->tx_dma, GFP_KERNEL);
-	if (!mvi->tx)
-		goto err_out;
-	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
-
-	mvi->rx_fis = dma_alloc_coherent(&pdev->dev, MVS_RX_FISL_SZ,
-					 &mvi->rx_fis_dma, GFP_KERNEL);
-	if (!mvi->rx_fis)
-		goto err_out;
-	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
-
-	mvi->rx = dma_alloc_coherent(&pdev->dev,
-				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
-				     &mvi->rx_dma, GFP_KERNEL);
-	if (!mvi->rx)
-		goto err_out;
-	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
-
-	mvi->rx[0] = cpu_to_le32(0xfff);
-	mvi->rx_cons = 0xfff;
-
-	mvi->slot = dma_alloc_coherent(&pdev->dev,
-				       sizeof(*mvi->slot) * MVS_SLOTS,
-				       &mvi->slot_dma, GFP_KERNEL);
-	if (!mvi->slot)
-		goto err_out;
-	memset(mvi->slot, 0, sizeof(*mvi->slot) * MVS_SLOTS);
-
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
-
-		slot->buf = dma_alloc_coherent(&pdev->dev, MVS_SLOT_BUF_SZ,
-					       &slot->buf_dma, GFP_KERNEL);
-		if (!slot->buf)
-			goto err_out;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
-	}
-
-	/* finally, read NVRAM to get our SAS address */
-	if (mvs_nvram_read(mvi, NVR_SAS_ADDR, &mvi->sas_addr, 8))
-		goto err_out;
-	return mvi;
-
-err_out:
-	mvs_free(mvi);
-	return NULL;
-}
-
-static u32 mvs_cr32(void __iomem *regs, u32 addr)
-{
-	mw32(CMD_ADDR, addr);
-	return mr32(CMD_DATA);
-}
-
-static void mvs_cw32(void __iomem *regs, u32 addr, u32 val)
-{
-	mw32(CMD_ADDR, addr);
-	mw32(CMD_DATA, val);
-}
-
-static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
-{
-	void __iomem *regs = mvi->regs;
-	return (port < 4)?mr32(P0_SER_CTLSTAT + port * 4):
-		mr32(P4_SER_CTLSTAT + (port - 4) * 4);
-}
-
-static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
-{
-	void __iomem *regs = mvi->regs;
-	if (port < 4)
-		mw32(P0_SER_CTLSTAT + port * 4, val);
-	else
-		mw32(P4_SER_CTLSTAT + (port - 4) * 4, val);
-}
-
-static u32 mvs_read_port(struct mvs_info *mvi, u32 off, u32 off2, u32 port)
-{
-	void __iomem *regs = mvi->regs + off;
-	void __iomem *regs2 = mvi->regs + off2;
-	return (port < 4)?readl(regs + port * 8):
-		readl(regs2 + (port - 4) * 8);
-}
-
-static void mvs_write_port(struct mvs_info *mvi, u32 off, u32 off2,
-				u32 port, u32 val)
-{
-	void __iomem *regs = mvi->regs + off;
-	void __iomem *regs2 = mvi->regs + off2;
-	if (port < 4)
-		writel(val, regs + port * 8);
-	else
-		writel(val, regs2 + (port - 4) * 8);
-}
-
-static u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port);
-}
-
-static void mvs_write_port_cfg_data(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port, val);
-}
-
-static void mvs_write_port_cfg_addr(struct mvs_info *mvi, u32 port, u32 addr)
-{
-	mvs_write_port(mvi, MVS_P0_CFG_ADDR, MVS_P4_CFG_ADDR, port, addr);
-}
-
-static u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port);
-}
-
-static void mvs_write_port_vsr_data(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port, val);
-}
-
-static void mvs_write_port_vsr_addr(struct mvs_info *mvi, u32 port, u32 addr)
-{
-	mvs_write_port(mvi, MVS_P0_VSR_ADDR, MVS_P4_VSR_ADDR, port, addr);
-}
-
-static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port);
-}
-
-static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port, val);
-}
-
-static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port);
-}
-
-static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port, val);
-}
-
-static void __devinit mvs_phy_hacks(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	/* workaround for SATA R-ERR, to ignore phy glitch */
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= ~(1 << 9);
-	tmp |= (1 << 10);
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
-
-	/* enable retry 127 times */
-	mvs_cw32(regs, CMD_SAS_CTL1, 0x7f7f);
-
-	/* extend open frame timeout to max */
-	tmp = mvs_cr32(regs, CMD_SAS_CTL0);
-	tmp &= ~0xffff;
-	tmp |= 0x3fff;
-	mvs_cw32(regs, CMD_SAS_CTL0, tmp);
-
-	/* workaround for WDTIMEOUT , set to 550 ms */
-	mvs_cw32(regs, CMD_WD_TIMER, 0x86470);
-
-	/* not to halt for different port op during wideport link change */
-	mvs_cw32(regs, CMD_APP_ERR_CONFIG, 0xffefbf7d);
-
-	/* workaround for Seagate disk not-found OOB sequence, recv
-	 * COMINIT before sending out COMWAKE */
-	tmp = mvs_cr32(regs, CMD_PHY_MODE_21);
-	tmp &= 0x0000ffff;
-	tmp |= 0x00fa0000;
-	mvs_cw32(regs, CMD_PHY_MODE_21, tmp);
-
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= 0x1fffffff;
-	tmp |= (2U << 29);	/* 8 ms retry */
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
-
-	/* TEST - for phy decoding error, adjust voltage levels */
-	mw32(P0_VSR_ADDR + 0, 0x8);
-	mw32(P0_VSR_DATA + 0, 0x2F0);
-
-	mw32(P0_VSR_ADDR + 8, 0x8);
-	mw32(P0_VSR_DATA + 8, 0x2F0);
-
-	mw32(P0_VSR_ADDR + 16, 0x8);
-	mw32(P0_VSR_DATA + 16, 0x2F0);
-
-	mw32(P0_VSR_ADDR + 24, 0x8);
-	mw32(P0_VSR_DATA + 24, 0x2F0);
-
-}
-
-static void mvs_enable_xmt(struct mvs_info *mvi, int PhyId)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
-
-	tmp = mr32(PCS);
-	if (mvi->chip->n_phy <= 4)
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT);
-	else
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT2);
-	mw32(PCS, tmp);
-}
-
-static void mvs_detect_porttype(struct mvs_info *mvi, int i)
-{
-	void __iomem *regs = mvi->regs;
-	u32 reg;
-	struct mvs_phy *phy = &mvi->phy[i];
-
-	/* TODO check & save device type */
-	reg = mr32(GBL_PORT_TYPE);
-
-	if (reg & MODE_SAS_SATA & (1 << i))
-		phy->phy_type |= PORT_TYPE_SAS;
-	else
-		phy->phy_type |= PORT_TYPE_SATA;
-}
-
-static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
-{
-	u32 *s = (u32 *) buf;
-
-	if (!s)
-		return NULL;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
-	s[3] = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
-	s[2] = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
-	s[1] = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
-	s[0] = mvs_read_port_cfg_data(mvi, i);
-
-	return (void *)s;
-}
-
-static u32 mvs_is_sig_fis_received(u32 irq_status)
-{
-	return irq_status & PHYEV_SIG_FIS;
-}
-
-static void mvs_update_wideport(struct mvs_info *mvi, int i)
-{
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;
-	int j, no;
-
-	for_each_phy(port->wide_port_phymap, no, j, mvi->chip->n_phy)
-		if (no & 1) {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no,
-						port->wide_port_phymap);
-		} else {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no, 0);
-		}
-}
-
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
-{
-	u32 tmp;
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;;
-
-	tmp = mvs_read_phy_ctl(mvi, i);
-
-	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
-		if (!port)
-			phy->phy_attached = 1;
-		return tmp;
-	}
-
-	if (port) {
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			port->wide_port_phymap &= ~(1U << i);
-			if (!port->wide_port_phymap)
-				port->port_attached = 0;
-			mvs_update_wideport(mvi, i);
-		} else if (phy->phy_type & PORT_TYPE_SATA)
-			port->port_attached = 0;
-		mvs_free_reg_set(mvi, phy->port);
-		phy->port = NULL;
-		phy->phy_attached = 0;
-		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
-	}
-	return 0;
-}
-
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
-					int get_st)
-{
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct pci_dev *pdev = mvi->pdev;
-	u32 tmp;
-	u64 tmp64;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
-	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-	phy->dev_sas_addr = (u64) mvs_read_port_cfg_data(mvi, i) << 32;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-	phy->dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
-
-	if (get_st) {
-		phy->irq_status = mvs_read_port_irq_stat(mvi, i);
-		phy->phy_status = mvs_is_phy_ready(mvi, i);
-	}
-
-	if (phy->phy_status) {
-		u32 phy_st;
-		struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-		mvs_write_port_cfg_addr(mvi, i, PHYR_PHY_STAT);
-		phy_st = mvs_read_port_cfg_data(mvi, i);
-
-		sas_phy->linkrate =
-			(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-				PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
-		phy->minimum_linkrate =
-			(phy->phy_status &
-				PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
-		phy->maximum_linkrate =
-			(phy->phy_status &
-				PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
-
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			/* Updated attached_sas_addr */
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
-			phy->att_dev_sas_addr =
-				(u64) mvs_read_port_cfg_data(mvi, i) << 32;
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
-			phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
-			phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
-			phy->identify.device_type =
-			    phy->att_dev_info & PORT_DEV_TYPE_MASK;
-
-			if (phy->identify.device_type == SAS_END_DEV)
-				phy->identify.target_port_protocols =
-							SAS_PROTOCOL_SSP;
-			else if (phy->identify.device_type != NO_DEVICE)
-				phy->identify.target_port_protocols =
-							SAS_PROTOCOL_SMP;
-			if (phy_st & PHY_OOB_DTCTD)
-				sas_phy->oob_mode = SAS_OOB_MODE;
-			phy->frame_rcvd_size =
-			    sizeof(struct sas_identify_frame);
-		} else if (phy->phy_type & PORT_TYPE_SATA) {
-			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
-			if (mvs_is_sig_fis_received(phy->irq_status)) {
-				phy->att_dev_sas_addr = i;	/* temp */
-				if (phy_st & PHY_OOB_DTCTD)
-					sas_phy->oob_mode = SATA_OOB_MODE;
-				phy->frame_rcvd_size =
-				    sizeof(struct dev_to_host_fis);
-				mvs_get_d2h_reg(mvi, i,
-						(void *)sas_phy->frame_rcvd);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"No sig fis\n");
-				phy->phy_type &= ~(PORT_TYPE_SATA);
-				goto out_done;
-			}
-		}
-		tmp64 = cpu_to_be64(phy->att_dev_sas_addr);
-		memcpy(sas_phy->attached_sas_addr, &tmp64, SAS_ADDR_SIZE);
-
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"phy[%d] Get Attached Address 0x%llX ,"
-			" SAS Address 0x%llX\n",
-			i,
-			(unsigned long long)phy->att_dev_sas_addr,
-			(unsigned long long)phy->dev_sas_addr);
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"Rate = %x , type = %d\n",
-			sas_phy->linkrate, phy->phy_type);
-
-		/* workaround for HW phy decoding error on 1.5g disk drive */
-		mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
-		tmp = mvs_read_port_vsr_data(mvi, i);
-		if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-		     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
-			SAS_LINK_RATE_1_5_GBPS)
-			tmp &= ~PHY_MODE6_LATECLK;
-		else
-			tmp |= PHY_MODE6_LATECLK;
-		mvs_write_port_vsr_data(mvi, i, tmp);
-
-	}
-out_done:
-	if (get_st)
-		mvs_write_port_irq_stat(mvi, i, phy->irq_status);
-}
-
-static void mvs_port_formed(struct asd_sas_phy *sas_phy)
-{
-	struct sas_ha_struct *sas_ha = sas_phy->ha;
-	struct mvs_info *mvi = sas_ha->lldd_ha;
-	struct asd_sas_port *sas_port = sas_phy->port;
-	struct mvs_phy *phy = sas_phy->lldd_phy;
-	struct mvs_port *port = &mvi->port[sas_port->id];
-	unsigned long flags;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-	port->port_attached = 1;
-	phy->port = port;
-	port->taskfileset = MVS_ID_NOT_MAPPED;
-	if (phy->phy_type & PORT_TYPE_SAS) {
-		port->wide_port_phymap = sas_port->phy_mask;
-		mvs_update_wideport(mvi, sas_phy->id);
-	}
-	spin_unlock_irqrestore(&mvi->lock, flags);
-}
-
-static int mvs_I_T_nexus_reset(struct domain_device *dev)
-{
-	return TMF_RESP_FUNC_FAILED;
-}
-
-static int __devinit mvs_hw_init(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	int i;
-	u32 tmp, cctl;
-
-	/* make sure interrupts are masked immediately (paranoia) */
-	mw32(GBL_CTL, 0);
-	tmp = mr32(GBL_CTL);
-
-	/* Reset Controller */
-	if (!(tmp & HBA_RST)) {
-		if (mvi->flags & MVF_PHY_PWR_FIX) {
-			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
-			tmp &= ~PCTL_PWR_ON;
-			tmp |= PCTL_OFF;
-			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
-
-			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
-			tmp &= ~PCTL_PWR_ON;
-			tmp |= PCTL_OFF;
-			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
-		}
-
-		/* global reset, incl. COMRESET/H_RESET_N (self-clearing) */
-		mw32_f(GBL_CTL, HBA_RST);
-	}
-
-	/* wait for reset to finish; timeout is just a guess */
-	i = 1000;
-	while (i-- > 0) {
-		msleep(10);
-
-		if (!(mr32(GBL_CTL) & HBA_RST))
-			break;
-	}
-	if (mr32(GBL_CTL) & HBA_RST) {
-		dev_printk(KERN_ERR, &mvi->pdev->dev, "HBA reset failed\n");
-		return -EBUSY;
-	}
-
-	/* Init Chip */
-	/* make sure RST is set; HBA_RST /should/ have done that for us */
-	cctl = mr32(CTL);
-	if (cctl & CCTL_RST)
-		cctl &= ~CCTL_RST;
-	else
-		mw32_f(CTL, cctl | CCTL_RST);
-
-	/* write to device control _AND_ device status register? - A.C. */
-	pci_read_config_dword(mvi->pdev, PCR_DEV_CTRL, &tmp);
-	tmp &= ~PRD_REQ_MASK;
-	tmp |= PRD_REQ_SIZE;
-	pci_write_config_dword(mvi->pdev, PCR_DEV_CTRL, tmp);
-
-	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
-	tmp |= PCTL_PWR_ON;
-	tmp &= ~PCTL_OFF;
-	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
-
-	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
-	tmp |= PCTL_PWR_ON;
-	tmp &= ~PCTL_OFF;
-	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
-
-	mw32_f(CTL, cctl);
-
-	/* reset control */
-	mw32(PCS, 0);		/*MVS_PCS */
-
-	mvs_phy_hacks(mvi);
-
-	mw32(CMD_LIST_LO, mvi->slot_dma);
-	mw32(CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
-
-	mw32(RX_FIS_LO, mvi->rx_fis_dma);
-	mw32(RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
-
-	mw32(TX_CFG, MVS_CHIP_SLOT_SZ);
-	mw32(TX_LO, mvi->tx_dma);
-	mw32(TX_HI, (mvi->tx_dma >> 16) >> 16);
-
-	mw32(RX_CFG, MVS_RX_RING_SZ);
-	mw32(RX_LO, mvi->rx_dma);
-	mw32(RX_HI, (mvi->rx_dma >> 16) >> 16);
-
-	/* enable auto port detection */
-	mw32(GBL_PORT_TYPE, MODE_AUTO_DET_EN);
-	msleep(1100);
-	/* init and reset phys */
-	for (i = 0; i < mvi->chip->n_phy; i++) {
-		u32 lo = be32_to_cpu(*(u32 *)&mvi->sas_addr[4]);
-		u32 hi = be32_to_cpu(*(u32 *)&mvi->sas_addr[0]);
-
-		mvs_detect_porttype(mvi, i);
-
-		/* set phy local SAS address */
-		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-		mvs_write_port_cfg_data(mvi, i, lo);
-		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-		mvs_write_port_cfg_data(mvi, i, hi);
-
-		/* reset phy */
-		tmp = mvs_read_phy_ctl(mvi, i);
-		tmp |= PHY_RST;
-		mvs_write_phy_ctl(mvi, i, tmp);
-	}
-
-	msleep(100);
-
-	for (i = 0; i < mvi->chip->n_phy; i++) {
-		/* clear phy int status */
-		tmp = mvs_read_port_irq_stat(mvi, i);
-		tmp &= ~PHYEV_SIG_FIS;
-		mvs_write_port_irq_stat(mvi, i, tmp);
-
-		/* set phy int mask */
-		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH | PHYEV_UNASSOC_FIS |
-			PHYEV_ID_DONE | PHYEV_DEC_ERR;
-		mvs_write_port_irq_mask(mvi, i, tmp);
-
-		msleep(100);
-		mvs_update_phyinfo(mvi, i, 1);
-		mvs_enable_xmt(mvi, i);
-	}
-
-	/* FIXME: update wide port bitmaps */
-
-	/* little endian for open address and command table, etc. */
-	/* A.C.
-	 * it seems that ( from the spec ) turning on big-endian won't
-	 * do us any good on big-endian machines, need further confirmation
-	 */
-	cctl = mr32(CTL);
-	cctl |= CCTL_ENDIAN_CMD;
-	cctl |= CCTL_ENDIAN_DATA;
-	cctl &= ~CCTL_ENDIAN_OPEN;
-	cctl |= CCTL_ENDIAN_RSP;
-	mw32_f(CTL, cctl);
-
-	/* reset CMD queue */
-	tmp = mr32(PCS);
-	tmp |= PCS_CMD_RST;
-	mw32(PCS, tmp);
-	/* interrupt coalescing may cause missing HW interrput in some case,
-	 * and the max count is 0x1ff, while our max slot is 0x200,
-	 * it will make count 0.
-	 */
-	tmp = 0;
-	mw32(INT_COAL, tmp);
-
-	tmp = 0x100;
-	mw32(INT_COAL_TMOUT, tmp);
-
-	/* ladies and gentlemen, start your engines */
-	mw32(TX_CFG, 0);
-	mw32(TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
-	mw32(RX_CFG, MVS_RX_RING_SZ | RX_EN);
-	/* enable CMD/CMPL_Q/RESP mode */
-	mw32(PCS, PCS_SATA_RETRY | PCS_FIS_RX_EN | PCS_CMD_EN);
-
-	/* enable completion queue interrupt */
-	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS);
-	mw32(INT_MASK, tmp);
-
-	/* Enable SRS interrupt */
-	mw32(INT_MASK_SRS, 0xFF);
-	return 0;
-}
-
-static void __devinit mvs_print_info(struct mvs_info *mvi)
-{
-	struct pci_dev *pdev = mvi->pdev;
-	static int printed_version;
-
-	if (!printed_version++)
-		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
-
-	dev_printk(KERN_INFO, &pdev->dev, "%u phys, addr %llx\n",
-		   mvi->chip->n_phy, SAS_ADDR(mvi->sas_addr));
-}
-
-static int __devinit mvs_pci_init(struct pci_dev *pdev,
-				  const struct pci_device_id *ent)
-{
-	int rc;
-	struct mvs_info *mvi;
-	irq_handler_t irq_handler = mvs_interrupt;
-
-	rc = pci_enable_device(pdev);
-	if (rc)
-		return rc;
-
-	pci_set_master(pdev);
-
-	rc = pci_request_regions(pdev, DRV_NAME);
-	if (rc)
-		goto err_out_disable;
-
-	rc = pci_go_64(pdev);
-	if (rc)
-		goto err_out_regions;
-
-	mvi = mvs_alloc(pdev, ent);
-	if (!mvi) {
-		rc = -ENOMEM;
-		goto err_out_regions;
-	}
-
-	rc = mvs_hw_init(mvi);
-	if (rc)
-		goto err_out_mvi;
-
-#ifndef MVS_DISABLE_MSI
-	if (!pci_enable_msi(pdev)) {
-		u32 tmp;
-		void __iomem *regs = mvi->regs;
-		mvi->flags |= MVF_MSI;
-		irq_handler = mvs_msi_interrupt;
-		tmp = mr32(PCS);
-		mw32(PCS, tmp | PCS_SELF_CLEAR);
-	}
-#endif
-
-	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, mvi);
-	if (rc)
-		goto err_out_msi;
-
-	rc = scsi_add_host(mvi->shost, &pdev->dev);
-	if (rc)
-		goto err_out_irq;
-
-	rc = sas_register_ha(&mvi->sas);
-	if (rc)
-		goto err_out_shost;
-
-	pci_set_drvdata(pdev, mvi);
-
-	mvs_print_info(mvi);
-
-	mvs_hba_interrupt_enable(mvi);
-
-	scsi_scan_host(mvi->shost);
-
-	return 0;
-
-err_out_shost:
-	scsi_remove_host(mvi->shost);
-err_out_irq:
-	free_irq(pdev->irq, mvi);
-err_out_msi:
-	if (mvi->flags |= MVF_MSI)
-		pci_disable_msi(pdev);
-err_out_mvi:
-	mvs_free(mvi);
-err_out_regions:
-	pci_release_regions(pdev);
-err_out_disable:
-	pci_disable_device(pdev);
-	return rc;
-}
-
-static void __devexit mvs_pci_remove(struct pci_dev *pdev)
-{
-	struct mvs_info *mvi = pci_get_drvdata(pdev);
-
-	pci_set_drvdata(pdev, NULL);
-
-	if (mvi) {
-		sas_unregister_ha(&mvi->sas);
-		mvs_hba_interrupt_disable(mvi);
-		sas_remove_host(mvi->shost);
-		scsi_remove_host(mvi->shost);
-
-		free_irq(pdev->irq, mvi);
-		if (mvi->flags & MVF_MSI)
-			pci_disable_msi(pdev);
-		mvs_free(mvi);
-		pci_release_regions(pdev);
-	}
-	pci_disable_device(pdev);
-}
-
-static struct sas_domain_function_template mvs_transport_ops = {
-	.lldd_execute_task	= mvs_task_exec,
-	.lldd_control_phy	= mvs_phy_control,
-	.lldd_abort_task	= mvs_task_abort,
-	.lldd_port_formed	= mvs_port_formed,
-	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
-};
-
-static struct pci_device_id __devinitdata mvs_pci_table[] = {
-	{ PCI_VDEVICE(MARVELL, 0x6320), chip_6320 },
-	{ PCI_VDEVICE(MARVELL, 0x6340), chip_6440 },
-	{
-		.vendor 	= PCI_VENDOR_ID_MARVELL,
-		.device 	= 0x6440,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= 0x6480,
-		.class		= 0,
-		.class_mask	= 0,
-		.driver_data	= chip_6480,
-	},
-	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
-	{ PCI_VDEVICE(MARVELL, 0x6480), chip_6480 },
-
-	{ }	/* terminate list */
-};
-
-static struct pci_driver mvs_pci_driver = {
-	.name		= DRV_NAME,
-	.id_table	= mvs_pci_table,
-	.probe		= mvs_pci_init,
-	.remove		= __devexit_p(mvs_pci_remove),
-};
-
-static int __init mvs_init(void)
-{
-	int rc;
-
-	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
-	if (!mvs_stt)
-		return -ENOMEM;
-
-	rc = pci_register_driver(&mvs_pci_driver);
-	if (rc)
-		goto err_out;
-
-	return 0;
-
-err_out:
-	sas_release_transport(mvs_stt);
-	return rc;
-}
-
-static void __exit mvs_exit(void)
-{
-	pci_unregister_driver(&mvs_pci_driver);
-	sas_release_transport(mvs_stt);
-}
-
-module_init(mvs_init);
-module_exit(mvs_exit);
-
-MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
-MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
-MODULE_VERSION(DRV_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(pci, mvs_pci_table);
diff --git a/drivers/scsi/mvsas/Kconfig b/drivers/scsi/mvsas/Kconfig
new file mode 100644
index 0000000..f83f368
--- /dev/null
+++ b/drivers/scsi/mvsas/Kconfig
@@ -0,0 +1,35 @@
+#
+# Kernel configuration file for 88SE64XX SAS/SATA driver.
+#
+# Copyright 2007 Red Hat, Inc.
+# Copyright 2008 Marvell. <kewei@marvell.com>
+#
+# This file is licensed under GPLv2.
+#
+# This file is part of the 88SE64XX driver.
+#
+# The 88SE64XX driver is free software; you can redistribute
+# it and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2 of the
+# License.
+#
+# The 88SE64XX driver is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with 88SE64XX Driver; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+#
+
+config SCSI_MVSAS
+	tristate "Marvell 88SE64XX SAS/SATA support"
+	depends on PCI
+	select SCSI_SAS_LIBSAS
+	select FW_LOADER
+	help
+		This driver supports Marvell's SAS/SATA 3Gb/s PCI-E 88SE64XX
+		chip based host adapters.
+
diff --git a/drivers/scsi/mvsas/Makefile b/drivers/scsi/mvsas/Makefile
new file mode 100644
index 0000000..1ac6ed9
--- /dev/null
+++ b/drivers/scsi/mvsas/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for Marvell 88SE64xx SAS/SATA driver.
+#
+# Copyright 2007 Red Hat, Inc.
+# Copyright 2008 Marvell. <kewei@marvell.com>
+#
+# This file is licensed under GPLv2.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+obj-$(CONFIG_SCSI_MVSAS) += mvsas.o
+mvsas-y +=  mv_sas.o
+
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
new file mode 100644
index 0000000..e4acebd
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -0,0 +1,3222 @@
+/*
+	mvsas.c - Marvell 88SE6440 SAS/SATA support
+
+	Copyright 2007 Red Hat, Inc.
+	Copyright 2008 Marvell. <kewei@marvell.com>
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License as
+	published by the Free Software Foundation; either version 2,
+	or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty
+	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+	See the GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this program; see the file COPYING.	If not,
+	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
+	MA 02139, USA.
+
+	---------------------------------------------------------------
+
+	Random notes:
+	* hardware supports controlling the endian-ness of data
+	  structures.  this permits elimination of all the le32_to_cpu()
+	  and cpu_to_le32() conversions.
+
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
+#include <scsi/libsas.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/sas_ata.h>
+#include <asm/io.h>
+
+#define DRV_NAME	"mvsas"
+#define DRV_VERSION	"0.5.2"
+#define _MV_DUMP	0
+#define MVS_DISABLE_NVRAM
+#define MVS_DISABLE_MSI
+
+#define mr32(reg)	readl(regs + MVS_##reg)
+#define mw32(reg,val)	writel((val), regs + MVS_##reg)
+#define mw32_f(reg,val)	do {			\
+	writel((val), regs + MVS_##reg);	\
+	readl(regs + MVS_##reg);		\
+	} while (0)
+
+#define MVS_ID_NOT_MAPPED	0x7f
+#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
+
+/* offset for D2H FIS in the Received FIS List Structure */
+#define SATA_RECEIVED_D2H_FIS(reg_set)	\
+	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x40)
+#define SATA_RECEIVED_PIO_FIS(reg_set)	\
+	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x20)
+#define UNASSOC_D2H_FIS(id)		\
+	((void *) mvi->rx_fis + 0x100 * id)
+
+#define for_each_phy(__lseq_mask, __mc, __lseq, __rest)			\
+	for ((__mc) = (__lseq_mask), (__lseq) = 0;			\
+					(__mc) != 0 && __rest;		\
+					(++__lseq), (__mc) >>= 1)
+
+/* driver compile-time configuration */
+enum driver_configuration {
+	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
+	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
+					/* software requires power-of-2
+					   ring size */
+
+	MVS_SLOTS		= 512,	/* command slots */
+	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
+	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
+	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
+	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
+
+	MVS_RX_FIS_COUNT	= 17,	/* Optional rx'd FISs (max 17) */
+
+	MVS_QUEUE_SIZE		= 30,	/* Support Queue depth */
+	MVS_CAN_QUEUE		= MVS_SLOTS - 1,	/* SCSI Queue depth */
+};
+
+/* unchangeable hardware details */
+enum hardware_details {
+	MVS_MAX_PHYS		= 8,	/* max. possible phys */
+	MVS_MAX_PORTS		= 8,	/* max. possible ports */
+	MVS_RX_FISL_SZ		= 0x400 + (MVS_RX_FIS_COUNT * 0x100),
+};
+
+/* peripheral registers (BAR2) */
+enum peripheral_registers {
+	SPI_CTL			= 0x10,	/* EEPROM control */
+	SPI_CMD			= 0x14,	/* EEPROM command */
+	SPI_DATA		= 0x18, /* EEPROM data */
+};
+
+enum peripheral_register_bits {
+	TWSI_RDY		= (1U << 7),	/* EEPROM interface ready */
+	TWSI_RD			= (1U << 4),	/* EEPROM read access */
+
+	SPI_ADDR_MASK		= 0x3ffff,	/* bits 17:0 */
+};
+
+/* enhanced mode registers (BAR4) */
+enum hw_registers {
+	MVS_GBL_CTL		= 0x04,  /* global control */
+	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
+	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
+	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
+
+	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
+	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
+	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
+	MVS_CMD_LIST_HI		= 0x10C,
+	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
+	MVS_RX_FIS_HI		= 0x114,
+
+	MVS_TX_CFG		= 0x120, /* TX configuration */
+	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
+	MVS_TX_HI		= 0x128,
+
+	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
+	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
+	MVS_RX_CFG		= 0x134, /* RX configuration */
+	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
+	MVS_RX_HI		= 0x13C,
+	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
+
+	MVS_INT_COAL		= 0x148, /* Int coalescing config */
+	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
+	MVS_INT_STAT		= 0x150, /* Central int status */
+	MVS_INT_MASK		= 0x154, /* Central int enable */
+	MVS_INT_STAT_SRS	= 0x158, /* SATA register set status */
+	MVS_INT_MASK_SRS	= 0x15C,
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
+	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
+	MVS_P4_INT_STAT		= 0x200, /* Port 4 interrupt status */
+	MVS_P4_INT_MASK		= 0x204, /* Port 4 interrupt enable mask */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
+	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
+
+	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
+	MVS_CMD_DATA		= 0x1BC, /* Command register port (data) */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
+	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
+	MVS_P4_CFG_ADDR		= 0x230, /* Port 4 config address */
+	MVS_P4_CFG_DATA		= 0x234, /* Port 4 config data */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
+	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
+	MVS_P4_VSR_ADDR		= 0x250, /* port 4 VSR addr */
+	MVS_P4_VSR_DATA		= 0x254, /* port 4 VSR data */
+};
+
+enum hw_register_bits {
+	/* MVS_GBL_CTL */
+	INT_EN			= (1U << 1),	/* Global int enable */
+	HBA_RST			= (1U << 0),	/* HBA reset */
+
+	/* MVS_GBL_INT_STAT */
+	INT_XOR			= (1U << 4),	/* XOR engine event */
+	INT_SAS_SATA		= (1U << 0),	/* SAS/SATA event */
+
+	/* MVS_GBL_PORT_TYPE */			/* shl for ports 1-3 */
+	SATA_TARGET		= (1U << 16),	/* port0 SATA target enable */
+	MODE_AUTO_DET_PORT7 = (1U << 15),	/* port0 SAS/SATA autodetect */
+	MODE_AUTO_DET_PORT6 = (1U << 14),
+	MODE_AUTO_DET_PORT5 = (1U << 13),
+	MODE_AUTO_DET_PORT4 = (1U << 12),
+	MODE_AUTO_DET_PORT3 = (1U << 11),
+	MODE_AUTO_DET_PORT2 = (1U << 10),
+	MODE_AUTO_DET_PORT1 = (1U << 9),
+	MODE_AUTO_DET_PORT0 = (1U << 8),
+	MODE_AUTO_DET_EN    =	MODE_AUTO_DET_PORT0 | MODE_AUTO_DET_PORT1 |
+				MODE_AUTO_DET_PORT2 | MODE_AUTO_DET_PORT3 |
+				MODE_AUTO_DET_PORT4 | MODE_AUTO_DET_PORT5 |
+				MODE_AUTO_DET_PORT6 | MODE_AUTO_DET_PORT7,
+	MODE_SAS_PORT7_MASK = (1U << 7),  /* port0 SAS(1), SATA(0) mode */
+	MODE_SAS_PORT6_MASK = (1U << 6),
+	MODE_SAS_PORT5_MASK = (1U << 5),
+	MODE_SAS_PORT4_MASK = (1U << 4),
+	MODE_SAS_PORT3_MASK = (1U << 3),
+	MODE_SAS_PORT2_MASK = (1U << 2),
+	MODE_SAS_PORT1_MASK = (1U << 1),
+	MODE_SAS_PORT0_MASK = (1U << 0),
+	MODE_SAS_SATA	=	MODE_SAS_PORT0_MASK | MODE_SAS_PORT1_MASK |
+				MODE_SAS_PORT2_MASK | MODE_SAS_PORT3_MASK |
+				MODE_SAS_PORT4_MASK | MODE_SAS_PORT5_MASK |
+				MODE_SAS_PORT6_MASK | MODE_SAS_PORT7_MASK,
+
+				/* SAS_MODE value may be
+				 * dictated (in hw) by values
+				 * of SATA_TARGET & AUTO_DET
+				 */
+
+	/* MVS_TX_CFG */
+	TX_EN			= (1U << 16),	/* Enable TX */
+	TX_RING_SZ_MASK		= 0xfff,	/* TX ring size, bits 11:0 */
+
+	/* MVS_RX_CFG */
+	RX_EN			= (1U << 16),	/* Enable RX */
+	RX_RING_SZ_MASK		= 0xfff,	/* RX ring size, bits 11:0 */
+
+	/* MVS_INT_COAL */
+	COAL_EN			= (1U << 16),	/* Enable int coalescing */
+
+	/* MVS_INT_STAT, MVS_INT_MASK */
+	CINT_I2C		= (1U << 31),	/* I2C event */
+	CINT_SW0		= (1U << 30),	/* software event 0 */
+	CINT_SW1		= (1U << 29),	/* software event 1 */
+	CINT_PRD_BC		= (1U << 28),	/* PRD BC err for read cmd */
+	CINT_DMA_PCIE		= (1U << 27),	/* DMA to PCIE timeout */
+	CINT_MEM		= (1U << 26),	/* int mem parity err */
+	CINT_I2C_SLAVE		= (1U << 25),	/* slave I2C event */
+	CINT_SRS		= (1U << 3),	/* SRS event */
+	CINT_CI_STOP		= (1U << 1),	/* cmd issue stopped */
+	CINT_DONE		= (1U << 0),	/* cmd completion */
+
+						/* shl for ports 1-3 */
+	CINT_PORT_STOPPED	= (1U << 16),	/* port0 stopped */
+	CINT_PORT		= (1U << 8),	/* port0 event */
+	CINT_PORT_MASK_OFFSET	= 8,
+	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
+
+	/* TX (delivery) ring bits */
+	TXQ_CMD_SHIFT		= 29,
+	TXQ_CMD_SSP		= 1,		/* SSP protocol */
+	TXQ_CMD_SMP		= 2,		/* SMP protocol */
+	TXQ_CMD_STP		= 3,		/* STP/SATA protocol */
+	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
+	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
+	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
+	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
+	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
+	TXQ_SRS_MASK		= 0x7f,
+	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
+	TXQ_PHY_MASK		= 0xff,
+	TXQ_SLOT_MASK		= 0xfff,	/* slot number */
+
+	/* RX (completion) ring bits */
+	RXQ_GOOD		= (1U << 23),	/* Response good */
+	RXQ_SLOT_RESET		= (1U << 21),	/* Slot reset complete */
+	RXQ_CMD_RX		= (1U << 20),	/* target cmd received */
+	RXQ_ATTN		= (1U << 19),	/* attention */
+	RXQ_RSP			= (1U << 18),	/* response frame xfer'd */
+	RXQ_ERR			= (1U << 17),	/* err info rec xfer'd */
+	RXQ_DONE		= (1U << 16),	/* cmd complete */
+	RXQ_SLOT_MASK		= 0xfff,	/* slot number */
+
+	/* mvs_cmd_hdr bits */
+	MCH_PRD_LEN_SHIFT	= 16,		/* 16-bit PRD table len */
+	MCH_SSP_FR_TYPE_SHIFT	= 13,		/* SSP frame type */
+
+						/* SSP initiator only */
+	MCH_SSP_FR_CMD		= 0x0,		/* COMMAND frame */
+
+						/* SSP initiator or target */
+	MCH_SSP_FR_TASK		= 0x1,		/* TASK frame */
+
+						/* SSP target only */
+	MCH_SSP_FR_XFER_RDY	= 0x4,		/* XFER_RDY frame */
+	MCH_SSP_FR_RESP		= 0x5,		/* RESPONSE frame */
+	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
+	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
+
+	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
+	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
+	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
+	MCH_RETRY		= (1U << 9),	/* tport layer retry (SSP) */
+	MCH_PROTECTION		= (1U << 8),	/* protection info rec (SSP) */
+	MCH_RESET		= (1U << 7),	/* Reset (STP/SATA) */
+	MCH_FPDMA		= (1U << 6),	/* First party DMA (STP/SATA) */
+	MCH_ATAPI		= (1U << 5),	/* ATAPI (STP/SATA) */
+	MCH_BIST		= (1U << 4),	/* BIST activate (STP/SATA) */
+	MCH_PMP_MASK		= 0xf,		/* PMP from cmd FIS (STP/SATA)*/
+
+	CCTL_RST		= (1U << 5),	/* port logic reset */
+
+						/* 0(LSB first), 1(MSB first) */
+	CCTL_ENDIAN_DATA	= (1U << 3),	/* PRD data */
+	CCTL_ENDIAN_RSP		= (1U << 2),	/* response frame */
+	CCTL_ENDIAN_OPEN	= (1U << 1),	/* open address frame */
+	CCTL_ENDIAN_CMD		= (1U << 0),	/* command table */
+
+	/* MVS_Px_SER_CTLSTAT (per-phy control) */
+	PHY_SSP_RST		= (1U << 3),	/* reset SSP link layer */
+	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
+	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
+	PHY_RST			= (1U << 0),	/* phy reset */
+	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
+	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
+			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
+	PHY_READY_MASK		= (1U << 20),
+
+	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
+	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
+	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
+	PHYEV_AN		= (1U << 18),	/* SATA async notification */
+	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
+	PHYEV_SIG_FIS		= (1U << 16),	/* signature FIS */
+	PHYEV_POOF		= (1U << 12),	/* phy ready from 1 -> 0 */
+	PHYEV_IU_BIG		= (1U << 11),	/* IU too long err */
+	PHYEV_IU_SMALL		= (1U << 10),	/* IU too short err */
+	PHYEV_UNK_TAG		= (1U << 9),	/* unknown tag */
+	PHYEV_BROAD_CH		= (1U << 8),	/* broadcast(CHANGE) */
+	PHYEV_COMWAKE		= (1U << 7),	/* COMWAKE rx'd */
+	PHYEV_PORT_SEL		= (1U << 6),	/* port selector present */
+	PHYEV_HARD_RST		= (1U << 5),	/* hard reset rx'd */
+	PHYEV_ID_TMOUT		= (1U << 4),	/* identify timeout */
+	PHYEV_ID_FAIL		= (1U << 3),	/* identify failed */
+	PHYEV_ID_DONE		= (1U << 2),	/* identify done */
+	PHYEV_HARD_RST_DONE	= (1U << 1),	/* hard reset done */
+	PHYEV_RDY_CH		= (1U << 0),	/* phy ready changed state */
+
+	/* MVS_PCS */
+	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
+	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
+	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6480 */
+	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
+	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
+	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
+	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
+	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
+	PCS_CMD_RST		= (1U << 1),	/* reset cmd issue */
+	PCS_CMD_EN		= (1U << 0),	/* enable cmd issue */
+
+	/* Port n Attached Device Info */
+	PORT_DEV_SSP_TRGT	= (1U << 19),
+	PORT_DEV_SMP_TRGT	= (1U << 18),
+	PORT_DEV_STP_TRGT	= (1U << 17),
+	PORT_DEV_SSP_INIT	= (1U << 11),
+	PORT_DEV_SMP_INIT	= (1U << 10),
+	PORT_DEV_STP_INIT	= (1U << 9),
+	PORT_PHY_ID_MASK	= (0xFFU << 24),
+	PORT_DEV_TRGT_MASK	= (0x7U << 17),
+	PORT_DEV_INIT_MASK	= (0x7U << 9),
+	PORT_DEV_TYPE_MASK	= (0x7U << 0),
+
+	/* Port n PHY Status */
+	PHY_RDY			= (1U << 2),
+	PHY_DW_SYNC		= (1U << 1),
+	PHY_OOB_DTCTD		= (1U << 0),
+
+	/* VSR */
+	/* PHYMODE 6 (CDB) */
+	PHY_MODE6_LATECLK	= (1U << 29),	/* Lock Clock */
+	PHY_MODE6_DTL_SPEED	= (1U << 27),	/* Digital Loop Speed */
+	PHY_MODE6_FC_ORDER	= (1U << 26),	/* Fibre Channel Mode Order*/
+	PHY_MODE6_MUCNT_EN	= (1U << 24),	/* u Count Enable */
+	PHY_MODE6_SEL_MUCNT_LEN	= (1U << 22),	/* Training Length Select */
+	PHY_MODE6_SELMUPI	= (1U << 20),	/* Phase Multi Select (init) */
+	PHY_MODE6_SELMUPF	= (1U << 18),	/* Phase Multi Select (final) */
+	PHY_MODE6_SELMUFF	= (1U << 16),	/* Freq Loop Multi Sel(final) */
+	PHY_MODE6_SELMUFI	= (1U << 14),	/* Freq Loop Multi Sel(init) */
+	PHY_MODE6_FREEZE_LOOP	= (1U << 12),	/* Freeze Rx CDR Loop */
+	PHY_MODE6_INT_RXFOFFS	= (1U << 3),	/* Rx CDR Freq Loop Enable */
+	PHY_MODE6_FRC_RXFOFFS	= (1U << 2),	/* Initial Rx CDR Offset */
+	PHY_MODE6_STAU_0D8	= (1U << 1),	/* Rx CDR Freq Loop Saturate */
+	PHY_MODE6_RXSAT_DIS	= (1U << 0),	/* Saturate Ctl */
+};
+
+enum mvs_info_flags {
+	MVF_MSI			= (1U << 0),	/* MSI is enabled */
+	MVF_PHY_PWR_FIX		= (1U << 1),	/* bug workaround */
+};
+
+enum sas_cmd_port_registers {
+	CMD_CMRST_OOB_DET	= 0x100, /* COMRESET OOB detect register */
+	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
+	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
+	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
+	CMD_OOB_SPACE		= 0x110, /* OOB space control register */
+	CMD_OOB_BURST		= 0x114, /* OOB burst control register */
+	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
+	CMD_PHY_CONFIG0		= 0x11c, /* PHY config register 0 */
+	CMD_PHY_CONFIG1		= 0x120, /* PHY config register 1 */
+	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
+	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
+	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
+	CMD_SAS_CTL3		= 0x130, /* SAS control register 3 */
+	CMD_ID_TEST		= 0x134, /* ID test register */
+	CMD_PL_TIMER		= 0x138, /* PL timer register */
+	CMD_WD_TIMER		= 0x13c, /* WD timer register */
+	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
+	CMD_APP_MEM_CTL		= 0x144, /* Application Memory Control */
+	CMD_XOR_MEM_CTL		= 0x148, /* XOR Block Memory Control */
+	CMD_DMA_MEM_CTL		= 0x14c, /* DMA Block Memory Control */
+	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
+	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
+	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
+	CMD_SATA_PORT_MEM_CTL1	= 0x15c, /* SATA Port Memory Control 1 */
+	CMD_XOR_MEM_BIST_CTL	= 0x160, /* XOR Memory BIST Control */
+	CMD_XOR_MEM_BIST_STAT	= 0x164, /* XOR Memroy BIST Status */
+	CMD_DMA_MEM_BIST_CTL	= 0x168, /* DMA Memory BIST Control */
+	CMD_DMA_MEM_BIST_STAT	= 0x16c, /* DMA Memory BIST Status */
+	CMD_PORT_MEM_BIST_CTL	= 0x170, /* Port Memory BIST Control */
+	CMD_PORT_MEM_BIST_STAT0 = 0x174, /* Port Memory BIST Status 0 */
+	CMD_PORT_MEM_BIST_STAT1 = 0x178, /* Port Memory BIST Status 1 */
+	CMD_STP_MEM_BIST_CTL	= 0x17c, /* STP Memory BIST Control */
+	CMD_STP_MEM_BIST_STAT0	= 0x180, /* STP Memory BIST Status 0 */
+	CMD_STP_MEM_BIST_STAT1	= 0x184, /* STP Memory BIST Status 1 */
+	CMD_RESET_COUNT		= 0x188, /* Reset Count */
+	CMD_MONTR_DATA_SEL	= 0x18C, /* Monitor Data/Select */
+	CMD_PLL_PHY_CONFIG	= 0x190, /* PLL/PHY Configuration */
+	CMD_PHY_CTL		= 0x194, /* PHY Control and Status */
+	CMD_PHY_TEST_COUNT0	= 0x198, /* Phy Test Count 0 */
+	CMD_PHY_TEST_COUNT1	= 0x19C, /* Phy Test Count 1 */
+	CMD_PHY_TEST_COUNT2	= 0x1A0, /* Phy Test Count 2 */
+	CMD_APP_ERR_CONFIG	= 0x1A4, /* Application Error Configuration */
+	CMD_PND_FIFO_CTL0	= 0x1A8, /* Pending FIFO Control 0 */
+	CMD_HOST_CTL		= 0x1AC, /* Host Control Status */
+	CMD_HOST_WR_DATA	= 0x1B0, /* Host Write Data */
+	CMD_HOST_RD_DATA	= 0x1B4, /* Host Read Data */
+	CMD_PHY_MODE_21		= 0x1B8, /* Phy Mode 21 */
+	CMD_SL_MODE0		= 0x1BC, /* SL Mode 0 */
+	CMD_SL_MODE1		= 0x1C0, /* SL Mode 1 */
+	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
+};
+
+/* SAS/SATA configuration port registers, aka phy registers */
+enum sas_sata_config_port_regs {
+	PHYR_IDENTIFY		= 0x00,	/* info for IDENTIFY frame */
+	PHYR_ADDR_LO		= 0x04,	/* my SAS address (low) */
+	PHYR_ADDR_HI		= 0x08,	/* my SAS address (high) */
+	PHYR_ATT_DEV_INFO	= 0x0C,	/* attached device info */
+	PHYR_ATT_ADDR_LO	= 0x10,	/* attached dev SAS addr (low) */
+	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
+	PHYR_SATA_CTL		= 0x18,	/* SATA control */
+	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
+	PHYR_SATA_SIG0		= 0x20,	/*port SATA signature FIS(Byte 0-3) */
+	PHYR_SATA_SIG1		= 0x24,	/*port SATA signature FIS(Byte 4-7) */
+	PHYR_SATA_SIG2		= 0x28,	/*port SATA signature FIS(Byte 8-11) */
+	PHYR_SATA_SIG3		= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
+	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
+	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
+	PHYR_WIDE_PORT		= 0x38,	/* wide port participating */
+	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
+	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
+	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
+};
+
+/*  SAS/SATA Vendor Specific Port Registers */
+enum sas_sata_vsp_regs {
+	VSR_PHY_STAT		= 0x00, /* Phy Status */
+	VSR_PHY_MODE1		= 0x01, /* phy tx */
+	VSR_PHY_MODE2		= 0x02, /* tx scc */
+	VSR_PHY_MODE3		= 0x03, /* pll */
+	VSR_PHY_MODE4		= 0x04, /* VCO */
+	VSR_PHY_MODE5		= 0x05, /* Rx */
+	VSR_PHY_MODE6		= 0x06, /* CDR */
+	VSR_PHY_MODE7		= 0x07, /* Impedance */
+	VSR_PHY_MODE8		= 0x08, /* Voltage */
+	VSR_PHY_MODE9		= 0x09, /* Test */
+	VSR_PHY_MODE10		= 0x0A, /* Power */
+	VSR_PHY_MODE11		= 0x0B, /* Phy Mode */
+	VSR_PHY_VS0		= 0x0C, /* Vednor Specific 0 */
+	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
+};
+
+enum pci_cfg_registers {
+	PCR_PHY_CTL	= 0x40,
+	PCR_PHY_CTL2	= 0x90,
+	PCR_DEV_CTRL	= 0xE8,
+};
+
+enum pci_cfg_register_bits {
+	PCTL_PWR_ON	= (0xFU << 24),
+	PCTL_OFF	= (0xFU << 12),
+	PRD_REQ_SIZE	= (0x4000),
+	PRD_REQ_MASK	= (0x00007000),
+};
+
+enum nvram_layout_offsets {
+	NVR_SIG		= 0x00,		/* 0xAA, 0x55 */
+	NVR_SAS_ADDR	= 0x02,		/* 8-byte SAS address */
+};
+
+enum chip_flavors {
+	chip_6320,
+	chip_6440,
+	chip_6480,
+};
+
+enum port_type {
+	PORT_TYPE_SAS	=  (1L << 1),
+	PORT_TYPE_SATA	=  (1L << 0),
+};
+
+/* Command Table Format */
+enum ct_format {
+	/* SSP */
+	SSP_F_H		=  0x00,
+	SSP_F_IU	=  0x18,
+	SSP_F_MAX	=  0x4D,
+	/* STP */
+	STP_CMD_FIS	=  0x00,
+	STP_ATAPI_CMD	=  0x40,
+	STP_F_MAX	=  0x10,
+	/* SMP */
+	SMP_F_T		=  0x00,
+	SMP_F_DEP	=  0x01,
+	SMP_F_MAX	=  0x101,
+};
+
+enum status_buffer {
+	SB_EIR_OFF	=  0x00,	/* Error Information Record */
+	SB_RFB_OFF	=  0x08,	/* Response Frame Buffer */
+	SB_RFB_MAX	=  0x400,	/* RFB size*/
+};
+
+enum error_info_rec {
+	CMD_ISS_STPD	= (1U << 31),	/* Cmd Issue Stopped */
+	CMD_PI_ERR	= (1U << 30),	/* Protection info error.  see flags2 */
+	RSP_OVER	= (1U << 29),	/* rsp buffer overflow */
+	RETRY_LIM	= (1U << 28),	/* FIS/frame retry limit exceeded */
+	UNK_FIS 	= (1U << 27),	/* unknown FIS */
+	DMA_TERM	= (1U << 26),	/* DMA terminate primitive rx'd */
+	SYNC_ERR	= (1U << 25),	/* SYNC rx'd during frame xmit */
+	TFILE_ERR	= (1U << 24),	/* SATA taskfile Error bit set */
+	R_ERR		= (1U << 23),	/* SATA returned R_ERR prim */
+	RD_OFS		= (1U << 20),	/* Read DATA frame invalid offset */
+	XFER_RDY_OFS	= (1U << 19),	/* XFER_RDY offset error */
+	UNEXP_XFER_RDY	= (1U << 18),	/* unexpected XFER_RDY error */
+	DATA_OVER_UNDER = (1U << 16),	/* data overflow/underflow */
+	INTERLOCK	= (1U << 15),	/* interlock error */
+	NAK		= (1U << 14),	/* NAK rx'd */
+	ACK_NAK_TO	= (1U << 13),	/* ACK/NAK timeout */
+	CXN_CLOSED	= (1U << 12),	/* cxn closed w/out ack/nak */
+	OPEN_TO 	= (1U << 11),	/* I_T nexus lost, open cxn timeout */
+	PATH_BLOCKED	= (1U << 10),	/* I_T nexus lost, pathway blocked */
+	NO_DEST 	= (1U << 9),	/* I_T nexus lost, no destination */
+	STP_RES_BSY	= (1U << 8),	/* STP resources busy */
+	BREAK		= (1U << 7),	/* break received */
+	BAD_DEST	= (1U << 6),	/* bad destination */
+	BAD_PROTO	= (1U << 5),	/* protocol not supported */
+	BAD_RATE	= (1U << 4),	/* cxn rate not supported */
+	WRONG_DEST	= (1U << 3),	/* wrong destination error */
+	CREDIT_TO	= (1U << 2),	/* credit timeout */
+	WDOG_TO 	= (1U << 1),	/* watchdog timeout */
+	BUF_PAR 	= (1U << 0),	/* buffer parity error */
+};
+
+enum error_info_rec_2 {
+	SLOT_BSY_ERR	= (1U << 31),	/* Slot Busy Error */
+	GRD_CHK_ERR	= (1U << 14),	/* Guard Check Error */
+	APP_CHK_ERR	= (1U << 13),	/* Application Check error */
+	REF_CHK_ERR	= (1U << 12),	/* Reference Check Error */
+	USR_BLK_NM	= (1U << 0),	/* User Block Number */
+};
+
+struct mvs_chip_info {
+	u32		n_phy;
+	u32		srs_sz;
+	u32		slot_width;
+};
+
+struct mvs_err_info {
+	__le32			flags;
+	__le32			flags2;
+};
+
+struct mvs_prd {
+	__le64			addr;		/* 64-bit buffer address */
+	__le32			reserved;
+	__le32			len;		/* 16-bit length */
+};
+
+struct mvs_cmd_hdr {
+	__le32			flags;		/* PRD tbl len; SAS, SATA ctl */
+	__le32			lens;		/* cmd, max resp frame len */
+	__le32			tags;		/* targ port xfer tag; tag */
+	__le32			data_len;	/* data xfer len */
+	__le64			cmd_tbl;	/* command table address */
+	__le64			open_frame;	/* open addr frame address */
+	__le64			status_buf;	/* status buffer address */
+	__le64			prd_tbl;	/* PRD tbl address */
+	__le32			reserved[4];
+};
+
+struct mvs_port {
+	struct asd_sas_port	sas_port;
+	u8			port_attached;
+	u8			taskfileset;
+	u8			wide_port_phymap;
+	struct list_head	list;
+};
+
+struct mvs_phy {
+	struct mvs_port		*port;
+	struct asd_sas_phy	sas_phy;
+	struct sas_identify	identify;
+	struct scsi_device	*sdev;
+	u64		dev_sas_addr;
+	u64		att_dev_sas_addr;
+	u32		att_dev_info;
+	u32		dev_info;
+	u32		phy_type;
+	u32		phy_status;
+	u32		irq_status;
+	u32		frame_rcvd_size;
+	u8		frame_rcvd[32];
+	u8		phy_attached;
+	enum sas_linkrate	minimum_linkrate;
+	enum sas_linkrate	maximum_linkrate;
+};
+
+struct mvs_slot_info {
+	struct list_head	list;
+	struct sas_task		*task;
+	u32			n_elem;
+	u32			tx;
+
+	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
+	 * and PRD table
+	 */
+	void			*buf;
+	dma_addr_t		buf_dma;
+#if _MV_DUMP
+	u32			cmd_size;
+#endif
+
+	void			*response;
+	struct mvs_port		*port;
+};
+
+struct mvs_info {
+	unsigned long		flags;
+
+	spinlock_t		lock;		/* host-wide lock */
+	struct pci_dev		*pdev;		/* our device */
+	void __iomem		*regs;		/* enhanced mode registers */
+	void __iomem		*peri_regs;	/* peripheral registers */
+
+	u8			sas_addr[SAS_ADDR_SIZE];
+	struct sas_ha_struct	sas;		/* SCSI/SAS glue */
+	struct Scsi_Host	*shost;
+
+	__le32			*tx;		/* TX (delivery) DMA ring */
+	dma_addr_t		tx_dma;
+	u32			tx_prod;	/* cached next-producer idx */
+
+	__le32			*rx;		/* RX (completion) DMA ring */
+	dma_addr_t		rx_dma;
+	u32			rx_cons;	/* RX consumer idx */
+
+	__le32			*rx_fis;	/* RX'd FIS area */
+	dma_addr_t		rx_fis_dma;
+
+	struct mvs_cmd_hdr	*slot;	/* DMA command header slots */
+	dma_addr_t		slot_dma;
+
+	const struct mvs_chip_info *chip;
+
+	u8			tags[MVS_SLOTS];
+	struct mvs_slot_info	slot_info[MVS_SLOTS];
+				/* further per-slot information */
+	struct mvs_phy		phy[MVS_MAX_PHYS];
+	struct mvs_port		port[MVS_MAX_PHYS];
+#ifdef MVS_USE_TASKLET
+	struct tasklet_struct	tasklet;
+#endif
+};
+
+static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+			   void *funcdata);
+static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port);
+static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val);
+static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port);
+static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val);
+static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val);
+static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port);
+
+static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i);
+static void mvs_detect_porttype(struct mvs_info *mvi, int i);
+static void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st);
+static void mvs_release_task(struct mvs_info *mvi, int phy_no);
+
+static int mvs_scan_finished(struct Scsi_Host *, unsigned long);
+static void mvs_scan_start(struct Scsi_Host *);
+static int mvs_slave_configure(struct scsi_device *sdev);
+
+static struct scsi_transport_template *mvs_stt;
+
+static const struct mvs_chip_info mvs_chips[] = {
+	[chip_6320] =		{ 2, 16, 9  },
+	[chip_6440] =		{ 4, 16, 9  },
+	[chip_6480] =		{ 8, 32, 10 },
+};
+
+static struct scsi_host_template mvs_sht = {
+	.module			= THIS_MODULE,
+	.name			= DRV_NAME,
+	.queuecommand		= sas_queuecommand,
+	.target_alloc		= sas_target_alloc,
+	.slave_configure	= mvs_slave_configure,
+	.slave_destroy		= sas_slave_destroy,
+	.scan_finished		= mvs_scan_finished,
+	.scan_start		= mvs_scan_start,
+	.change_queue_depth	= sas_change_queue_depth,
+	.change_queue_type	= sas_change_queue_type,
+	.bios_param		= sas_bios_param,
+	.can_queue		= 1,
+	.cmd_per_lun		= 1,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.eh_device_reset_handler	= sas_eh_device_reset_handler,
+	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
+	.slave_alloc		= sas_slave_alloc,
+	.target_destroy		= sas_target_destroy,
+	.ioctl			= sas_ioctl,
+};
+
+static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
+{
+	u32 i;
+	u32 run;
+	u32 offset;
+
+	offset = 0;
+	while (size) {
+		printk("%08X : ", baseaddr + offset);
+		if (size >= 16)
+			run = 16;
+		else
+			run = size;
+		size -= run;
+		for (i = 0; i < 16; i++) {
+			if (i < run)
+				printk("%02X ", (u32)data[i]);
+			else
+				printk("   ");
+		}
+		printk(": ");
+		for (i = 0; i < run; i++)
+			printk("%c", isalnum(data[i]) ? data[i] : '.');
+		printk("\n");
+		data = &data[16];
+		offset += run;
+	}
+	printk("\n");
+}
+
+#if _MV_DUMP
+static void mvs_hba_sb_dump(struct mvs_info *mvi, u32 tag,
+				   enum sas_protocol proto)
+{
+	u32 offset;
+	struct pci_dev *pdev = mvi->pdev;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+
+	offset = slot->cmd_size + MVS_OAF_SZ +
+	    sizeof(struct mvs_prd) * slot->n_elem;
+	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Status buffer[%d] :\n",
+			tag);
+	mvs_hexdump(32, (u8 *) slot->response,
+		    (u32) slot->buf_dma + offset);
+}
+#endif
+
+static void mvs_hba_memory_dump(struct mvs_info *mvi, u32 tag,
+				enum sas_protocol proto)
+{
+#if _MV_DUMP
+	u32 sz, w_ptr;
+	u64 addr;
+	void __iomem *regs = mvi->regs;
+	struct pci_dev *pdev = mvi->pdev;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+
+	/*Delivery Queue */
+	sz = mr32(TX_CFG) & TX_RING_SZ_MASK;
+	w_ptr = slot->tx;
+	addr = mr32(TX_HI) << 16 << 16 | mr32(TX_LO);
+	dev_printk(KERN_DEBUG, &pdev->dev,
+		"Delivery Queue Size=%04d , WRT_PTR=%04X\n", sz, w_ptr);
+	dev_printk(KERN_DEBUG, &pdev->dev,
+		"Delivery Queue Base Address=0x%llX (PA)"
+		"(tx_dma=0x%llX), Entry=%04d\n",
+		addr, mvi->tx_dma, w_ptr);
+	mvs_hexdump(sizeof(u32), (u8 *)(&mvi->tx[mvi->tx_prod]),
+			(u32) mvi->tx_dma + sizeof(u32) * w_ptr);
+	/*Command List */
+	addr = mvi->slot_dma;
+	dev_printk(KERN_DEBUG, &pdev->dev,
+		"Command List Base Address=0x%llX (PA)"
+		"(slot_dma=0x%llX), Header=%03d\n",
+		addr, slot->buf_dma, tag);
+	dev_printk(KERN_DEBUG, &pdev->dev, "Command Header[%03d]:\n", tag);
+	/*mvs_cmd_hdr */
+	mvs_hexdump(sizeof(struct mvs_cmd_hdr), (u8 *)(&mvi->slot[tag]),
+		(u32) mvi->slot_dma + tag * sizeof(struct mvs_cmd_hdr));
+	/*1.command table area */
+	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Command Table :\n");
+	mvs_hexdump(slot->cmd_size, (u8 *) slot->buf, (u32) slot->buf_dma);
+	/*2.open address frame area */
+	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Open Address Frame :\n");
+	mvs_hexdump(MVS_OAF_SZ, (u8 *) slot->buf + slot->cmd_size,
+				(u32) slot->buf_dma + slot->cmd_size);
+	/*3.status buffer */
+	mvs_hba_sb_dump(mvi, tag, proto);
+	/*4.PRD table */
+	dev_printk(KERN_DEBUG, &pdev->dev, "+---->PRD table :\n");
+	mvs_hexdump(sizeof(struct mvs_prd) * slot->n_elem,
+		(u8 *) slot->buf + slot->cmd_size + MVS_OAF_SZ,
+		(u32) slot->buf_dma + slot->cmd_size + MVS_OAF_SZ);
+#endif
+}
+
+static void mvs_hba_cq_dump(struct mvs_info *mvi)
+{
+#if (_MV_DUMP > 2)
+	u64 addr;
+	void __iomem *regs = mvi->regs;
+	struct pci_dev *pdev = mvi->pdev;
+	u32 entry = mvi->rx_cons + 1;
+	u32 rx_desc = le32_to_cpu(mvi->rx[entry]);
+
+	/*Completion Queue */
+	addr = mr32(RX_HI) << 16 << 16 | mr32(RX_LO);
+	dev_printk(KERN_DEBUG, &pdev->dev, "Completion Task = 0x%p\n",
+		   mvi->slot_info[rx_desc & RXQ_SLOT_MASK].task);
+	dev_printk(KERN_DEBUG, &pdev->dev,
+		"Completion List Base Address=0x%llX (PA), "
+		"CQ_Entry=%04d, CQ_WP=0x%08X\n",
+		addr, entry - 1, mvi->rx[0]);
+	mvs_hexdump(sizeof(u32), (u8 *)(&rx_desc),
+		    mvi->rx_dma + sizeof(u32) * entry);
+#endif
+}
+
+static void mvs_hba_interrupt_enable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(GBL_CTL);
+
+	mw32(GBL_CTL, tmp | INT_EN);
+}
+
+static void mvs_hba_interrupt_disable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(GBL_CTL);
+
+	mw32(GBL_CTL, tmp & ~INT_EN);
+}
+
+static int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
+
+/* move to PCI layer or libata core? */
+static int pci_go_64(struct pci_dev *pdev)
+{
+	int rc;
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+		if (rc) {
+			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			if (rc) {
+				dev_printk(KERN_ERR, &pdev->dev,
+					   "64-bit DMA enable failed\n");
+				return rc;
+			}
+		}
+	} else {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				   "32-bit DMA enable failed\n");
+			return rc;
+		}
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				   "32-bit consistent DMA enable failed\n");
+			return rc;
+		}
+	}
+
+	return rc;
+}
+
+static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
+{
+	if (task->lldd_task) {
+		struct mvs_slot_info *slot;
+		slot = (struct mvs_slot_info *) task->lldd_task;
+		*tag = slot - mvi->slot_info;
+		return 1;
+	}
+	return 0;
+}
+
+static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
+{
+	void *bitmap = (void *) &mvi->tags;
+	clear_bit(tag, bitmap);
+}
+
+static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
+{
+	mvs_tag_clear(mvi, tag);
+}
+
+static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
+{
+	void *bitmap = (void *) &mvi->tags;
+	set_bit(tag, bitmap);
+}
+
+static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
+{
+	unsigned int index, tag;
+	void *bitmap = (void *) &mvi->tags;
+
+	index = find_first_zero_bit(bitmap, MVS_SLOTS);
+	tag = index;
+	if (tag >= MVS_SLOTS)
+		return -SAS_QUEUE_FULL;
+	mvs_tag_set(mvi, tag);
+	*tag_out = tag;
+	return 0;
+}
+
+static void mvs_tag_init(struct mvs_info *mvi)
+{
+	int i;
+	for (i = 0; i < MVS_SLOTS; ++i)
+		mvs_tag_clear(mvi, i);
+}
+
+#ifndef MVS_DISABLE_NVRAM
+static int mvs_eep_read(void __iomem *regs, u32 addr, u32 *data)
+{
+	int timeout = 1000;
+
+	if (addr & ~SPI_ADDR_MASK)
+		return -EINVAL;
+
+	writel(addr, regs + SPI_CMD);
+	writel(TWSI_RD, regs + SPI_CTL);
+
+	while (timeout-- > 0) {
+		if (readl(regs + SPI_CTL) & TWSI_RDY) {
+			*data = readl(regs + SPI_DATA);
+			return 0;
+		}
+
+		udelay(10);
+	}
+
+	return -EBUSY;
+}
+
+static int mvs_eep_read_buf(void __iomem *regs, u32 addr,
+			    void *buf, u32 buflen)
+{
+	u32 addr_end, tmp_addr, i, j;
+	u32 tmp = 0;
+	int rc;
+	u8 *tmp8, *buf8 = buf;
+
+	addr_end = addr + buflen;
+	tmp_addr = ALIGN(addr, 4);
+	if (addr > 0xff)
+		return -EINVAL;
+
+	j = addr & 0x3;
+	if (j) {
+		rc = mvs_eep_read(regs, tmp_addr, &tmp);
+		if (rc)
+			return rc;
+
+		tmp8 = (u8 *)&tmp;
+		for (i = j; i < 4; i++)
+			*buf8++ = tmp8[i];
+
+		tmp_addr += 4;
+	}
+
+	for (j = ALIGN(addr_end, 4); tmp_addr < j; tmp_addr += 4) {
+		rc = mvs_eep_read(regs, tmp_addr, &tmp);
+		if (rc)
+			return rc;
+
+		memcpy(buf8, &tmp, 4);
+		buf8 += 4;
+	}
+
+	if (tmp_addr < addr_end) {
+		rc = mvs_eep_read(regs, tmp_addr, &tmp);
+		if (rc)
+			return rc;
+
+		tmp8 = (u8 *)&tmp;
+		j = addr_end - tmp_addr;
+		for (i = 0; i < j; i++)
+			*buf8++ = tmp8[i];
+
+		tmp_addr += 4;
+	}
+
+	return 0;
+}
+#endif
+
+static int mvs_nvram_read(struct mvs_info *mvi, u32 addr,
+			  void *buf, u32 buflen)
+{
+#ifndef MVS_DISABLE_NVRAM
+	void __iomem *regs = mvi->regs;
+	int rc, i;
+	u32 sum;
+	u8 hdr[2], *tmp;
+	const char *msg;
+
+	rc = mvs_eep_read_buf(regs, addr, &hdr, 2);
+	if (rc) {
+		msg = "nvram hdr read failed";
+		goto err_out;
+	}
+	rc = mvs_eep_read_buf(regs, addr + 2, buf, buflen);
+	if (rc) {
+		msg = "nvram read failed";
+		goto err_out;
+	}
+
+	if (hdr[0] != 0x5A) {
+		/* entry id */
+		msg = "invalid nvram entry id";
+		rc = -ENOENT;
+		goto err_out;
+	}
+
+	tmp = buf;
+	sum = ((u32)hdr[0]) + ((u32)hdr[1]);
+	for (i = 0; i < buflen; i++)
+		sum += ((u32)tmp[i]);
+
+	if (sum) {
+		msg = "nvram checksum failure";
+		rc = -EILSEQ;
+		goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	dev_printk(KERN_ERR, &mvi->pdev->dev, "%s", msg);
+	return rc;
+#else
+	/* FIXME , For SAS target mode */
+	memcpy(buf, "\x50\x05\x04\x30\x11\xab\x00\x00", 8);
+	return 0;
+#endif
+}
+
+static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
+
+	if (!phy->phy_attached)
+		return;
+
+	if (sas_phy->phy) {
+		struct sas_phy *sphy = sas_phy->phy;
+
+		sphy->negotiated_linkrate = sas_phy->linkrate;
+		sphy->minimum_linkrate = phy->minimum_linkrate;
+		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		sphy->maximum_linkrate = phy->maximum_linkrate;
+		sphy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+	}
+
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		struct sas_identify_frame *id;
+
+		id = (struct sas_identify_frame *)phy->frame_rcvd;
+		id->dev_type = phy->identify.device_type;
+		id->initiator_bits = SAS_PROTOCOL_ALL;
+		id->target_bits = phy->identify.target_port_protocols;
+	} else if (phy->phy_type & PORT_TYPE_SATA) {
+		/* TODO */
+	}
+	mvi->sas.sas_phy[i]->frame_rcvd_size = phy->frame_rcvd_size;
+	mvi->sas.notify_port_event(mvi->sas.sas_phy[i],
+				   PORTE_BYTES_DMAED);
+}
+
+static int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time)
+{
+	/* give the phy enabling interrupt event time to come in (1s
+	 * is empirically about all it takes) */
+	if (time < HZ)
+		return 0;
+	/* Wait for discovery to finish */
+	scsi_flush_work(shost);
+	return 1;
+}
+
+static void mvs_scan_start(struct Scsi_Host *shost)
+{
+	int i;
+	struct mvs_info *mvi = SHOST_TO_SAS_HA(shost)->lldd_ha;
+
+	for (i = 0; i < mvi->chip->n_phy; ++i) {
+		mvs_bytes_dmaed(mvi, i);
+	}
+}
+
+static int mvs_slave_configure(struct scsi_device *sdev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(sdev);
+	int ret = sas_slave_configure(sdev);
+
+	if (ret)
+		return ret;
+
+	if (dev_is_sata(dev)) {
+		/* struct ata_port *ap = dev->sata_dev.ap; */
+		/* struct ata_device *adev = ap->link.device; */
+
+		/* clamp at no NCQ for the time being */
+		/* adev->flags |= ATA_DFLAG_NCQ_OFF; */
+		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
+	}
+	return 0;
+}
+
+static void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
+{
+	struct pci_dev *pdev = mvi->pdev;
+	struct sas_ha_struct *sas_ha = &mvi->sas;
+	struct mvs_phy *phy = &mvi->phy[phy_no];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	phy->irq_status = mvs_read_port_irq_stat(mvi, phy_no);
+	/*
+	* events is port event now ,
+	* we need check the interrupt status which belongs to per port.
+	*/
+	dev_printk(KERN_DEBUG, &pdev->dev,
+		"Port %d Event = %X\n",
+		phy_no, phy->irq_status);
+
+	if (phy->irq_status & (PHYEV_POOF | PHYEV_DEC_ERR)) {
+		mvs_release_task(mvi, phy_no);
+		if (!mvs_is_phy_ready(mvi, phy_no)) {
+			sas_phy_disconnected(sas_phy);
+			sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
+			dev_printk(KERN_INFO, &pdev->dev,
+				"Port %d Unplug Notice\n", phy_no);
+
+		} else
+			mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET, NULL);
+	}
+	if (!(phy->irq_status & PHYEV_DEC_ERR)) {
+		if (phy->irq_status & PHYEV_COMWAKE) {
+			u32 tmp = mvs_read_port_irq_mask(mvi, phy_no);
+			mvs_write_port_irq_mask(mvi, phy_no,
+						tmp | PHYEV_SIG_FIS);
+		}
+		if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
+			phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
+			if (phy->phy_status) {
+				mvs_detect_porttype(mvi, phy_no);
+
+				if (phy->phy_type & PORT_TYPE_SATA) {
+					u32 tmp = mvs_read_port_irq_mask(mvi,
+								phy_no);
+					tmp &= ~PHYEV_SIG_FIS;
+					mvs_write_port_irq_mask(mvi,
+								phy_no, tmp);
+				}
+
+				mvs_update_phyinfo(mvi, phy_no, 0);
+				sas_ha->notify_phy_event(sas_phy,
+							PHYE_OOB_DONE);
+				mvs_bytes_dmaed(mvi, phy_no);
+			} else {
+				dev_printk(KERN_DEBUG, &pdev->dev,
+					"plugin interrupt but phy is gone\n");
+				mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET,
+							NULL);
+			}
+		} else if (phy->irq_status & PHYEV_BROAD_CH) {
+			mvs_release_task(mvi, phy_no);
+			sas_ha->notify_port_event(sas_phy,
+						PORTE_BROADCAST_RCVD);
+		}
+	}
+	mvs_write_port_irq_stat(mvi, phy_no, phy->irq_status);
+}
+
+static void mvs_int_sata(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(INT_STAT_SRS);
+	mw32(INT_STAT_SRS, tmp & 0xFFFF);
+}
+
+static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
+				u32 slot_idx)
+{
+	void __iomem *regs = mvi->regs;
+	struct domain_device *dev = task->dev;
+	struct asd_sas_port *sas_port = dev->port;
+	struct mvs_port *port = mvi->slot_info[slot_idx].port;
+	u32 reg_set, phy_mask;
+
+	if (!sas_protocol_ata(task->task_proto)) {
+		reg_set = 0;
+		phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
+				sas_port->phy_mask;
+	} else {
+		reg_set = port->taskfileset;
+		phy_mask = sas_port->phy_mask;
+	}
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | slot_idx |
+					(TXQ_CMD_SLOT_RESET << TXQ_CMD_SHIFT) |
+					(phy_mask << TXQ_PHY_SHIFT) |
+					(reg_set << TXQ_SRS_SHIFT));
+
+	mw32(TX_PROD_IDX, mvi->tx_prod);
+	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
+}
+
+static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
+			u32 slot_idx, int err)
+{
+	struct mvs_port *port = mvi->slot_info[slot_idx].port;
+	struct task_status_struct *tstat = &task->task_status;
+	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
+	int stat = SAM_GOOD;
+
+	resp->frame_len = sizeof(struct dev_to_host_fis);
+	memcpy(&resp->ending_fis[0],
+	       SATA_RECEIVED_D2H_FIS(port->taskfileset),
+	       sizeof(struct dev_to_host_fis));
+	tstat->buf_valid_size = sizeof(*resp);
+	if (unlikely(err))
+		stat = SAS_PROTO_RESPONSE;
+	return stat;
+}
+
+static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
+{
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	mvs_tag_clear(mvi, slot_idx);
+}
+
+static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
+			  struct mvs_slot_info *slot, u32 slot_idx)
+{
+	if (!sas_protocol_ata(task->task_proto))
+		if (slot->n_elem)
+			pci_unmap_sg(mvi->pdev, task->scatter,
+				     slot->n_elem, task->data_dir);
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_resp, 1,
+			     PCI_DMA_FROMDEVICE);
+		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_req, 1,
+			     PCI_DMA_TODEVICE);
+		break;
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SSP:
+	default:
+		/* do nothing */
+		break;
+	}
+	list_del(&slot->list);
+	task->lldd_task = NULL;
+	slot->task = NULL;
+	slot->port = NULL;
+}
+
+static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
+			 u32 slot_idx)
+{
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
+	u32 err_dw1 = le32_to_cpu(*(u32 *) (slot->response + 4));
+	int stat = SAM_CHECK_COND;
+
+	if (err_dw1 & SLOT_BSY_ERR) {
+		stat = SAS_QUEUE_FULL;
+		mvs_slot_reset(mvi, task, slot_idx);
+	}
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SSP:
+		break;
+	case SAS_PROTOCOL_SMP:
+		break;
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+		if (err_dw0 & TFILE_ERR)
+			stat = mvs_sata_done(mvi, task, slot_idx, 1);
+		break;
+	default:
+		break;
+	}
+
+	mvs_hexdump(16, (u8 *) slot->response, 0);
+	return stat;
+}
+
+static int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
+{
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	struct sas_task *task = slot->task;
+	struct task_status_struct *tstat;
+	struct mvs_port *port;
+	bool aborted;
+	void *to;
+
+	if (unlikely(!task || !task->lldd_task))
+		return -1;
+
+	mvs_hba_cq_dump(mvi);
+
+	spin_lock(&task->task_state_lock);
+	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
+	if (!aborted) {
+		task->task_state_flags &=
+		    ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
+		task->task_state_flags |= SAS_TASK_STATE_DONE;
+	}
+	spin_unlock(&task->task_state_lock);
+
+	if (aborted) {
+		mvs_slot_task_free(mvi, task, slot, slot_idx);
+		mvs_slot_free(mvi, rx_desc);
+		return -1;
+	}
+
+	port = slot->port;
+	tstat = &task->task_status;
+	memset(tstat, 0, sizeof(*tstat));
+	tstat->resp = SAS_TASK_COMPLETE;
+
+	if (unlikely(!port->port_attached || flags)) {
+		mvs_slot_err(mvi, task, slot_idx);
+		if (!sas_protocol_ata(task->task_proto))
+			tstat->stat = SAS_PHY_DOWN;
+		goto out;
+	}
+
+	/* error info record present */
+	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
+		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
+		goto out;
+	}
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SSP:
+		/* hw says status == 0, datapres == 0 */
+		if (rx_desc & RXQ_GOOD) {
+			tstat->stat = SAM_GOOD;
+			tstat->resp = SAS_TASK_COMPLETE;
+		}
+		/* response frame present */
+		else if (rx_desc & RXQ_RSP) {
+			struct ssp_response_iu *iu =
+			    slot->response + sizeof(struct mvs_err_info);
+			sas_ssp_task_response(&mvi->pdev->dev, task, iu);
+		}
+
+		/* should never happen? */
+		else
+			tstat->stat = SAM_CHECK_COND;
+		break;
+
+	case SAS_PROTOCOL_SMP: {
+			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
+			tstat->stat = SAM_GOOD;
+			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
+			memcpy(to + sg_resp->offset,
+				slot->response + sizeof(struct mvs_err_info),
+				sg_dma_len(sg_resp));
+			kunmap_atomic(to, KM_IRQ0);
+			break;
+		}
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
+			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
+			break;
+		}
+
+	default:
+		tstat->stat = SAM_CHECK_COND;
+		break;
+	}
+
+out:
+	mvs_slot_task_free(mvi, task, slot, slot_idx);
+	if (unlikely(tstat->stat != SAS_QUEUE_FULL))
+		mvs_slot_free(mvi, rx_desc);
+
+	spin_unlock(&mvi->lock);
+	task->task_done(task);
+	spin_lock(&mvi->lock);
+	return tstat->stat;
+}
+
+static void mvs_release_task(struct mvs_info *mvi, int phy_no)
+{
+	struct list_head *pos, *n;
+	struct mvs_slot_info *slot;
+	struct mvs_phy *phy = &mvi->phy[phy_no];
+	struct mvs_port *port = phy->port;
+	u32 rx_desc;
+
+	if (!port)
+		return;
+
+	list_for_each_safe(pos, n, &port->list) {
+		slot = container_of(pos, struct mvs_slot_info, list);
+		rx_desc = (u32) (slot - mvi->slot_info);
+		mvs_slot_complete(mvi, rx_desc, 1);
+	}
+}
+
+static void mvs_int_full(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp, stat;
+	int i;
+
+	stat = mr32(INT_STAT);
+
+	mvs_int_rx(mvi, false);
+
+	for (i = 0; i < MVS_MAX_PORTS; i++) {
+		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
+		if (tmp)
+			mvs_int_port(mvi, i, tmp);
+	}
+
+	if (stat & CINT_SRS)
+		mvs_int_sata(mvi);
+
+	mw32(INT_STAT, stat);
+}
+
+static int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
+{
+	void __iomem *regs = mvi->regs;
+	u32 rx_prod_idx, rx_desc;
+	bool attn = false;
+	struct pci_dev *pdev = mvi->pdev;
+
+	/* the first dword in the RX ring is special: it contains
+	 * a mirror of the hardware's RX producer index, so that
+	 * we don't have to stall the CPU reading that register.
+	 * The actual RX ring is offset by one dword, due to this.
+	 */
+	rx_prod_idx = mvi->rx_cons;
+	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
+	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
+		return 0;
+
+	/* The CMPL_Q may come late, read from register and try again
+	* note: if coalescing is enabled,
+	* it will need to read from register every time for sure
+	*/
+	if (mvi->rx_cons == rx_prod_idx)
+		mvi->rx_cons = mr32(RX_CONS_IDX) & RX_RING_SZ_MASK;
+
+	if (mvi->rx_cons == rx_prod_idx)
+		return 0;
+
+	while (mvi->rx_cons != rx_prod_idx) {
+
+		/* increment our internal RX consumer pointer */
+		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
+
+		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
+
+		if (likely(rx_desc & RXQ_DONE))
+			mvs_slot_complete(mvi, rx_desc, 0);
+		if (rx_desc & RXQ_ATTN) {
+			attn = true;
+			dev_printk(KERN_DEBUG, &pdev->dev, "ATTN %X\n",
+				rx_desc);
+		} else if (rx_desc & RXQ_ERR) {
+			if (!(rx_desc & RXQ_DONE))
+				mvs_slot_complete(mvi, rx_desc, 0);
+			dev_printk(KERN_DEBUG, &pdev->dev, "RXQ_ERR %X\n",
+				rx_desc);
+		} else if (rx_desc & RXQ_SLOT_RESET) {
+			dev_printk(KERN_DEBUG, &pdev->dev, "Slot reset[%X]\n",
+				rx_desc);
+			mvs_slot_free(mvi, rx_desc);
+		}
+	}
+
+	if (attn && self_clear)
+		mvs_int_full(mvi);
+
+	return 0;
+}
+
+#ifdef MVS_USE_TASKLET
+static void mvs_tasklet(unsigned long data)
+{
+	struct mvs_info *mvi = (struct mvs_info *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mvi->lock, flags);
+
+#ifdef MVS_DISABLE_MSI
+	mvs_int_full(mvi);
+#else
+	mvs_int_rx(mvi, true);
+#endif
+	spin_unlock_irqrestore(&mvi->lock, flags);
+}
+#endif
+
+static irqreturn_t mvs_interrupt(int irq, void *opaque)
+{
+	struct mvs_info *mvi = opaque;
+	void __iomem *regs = mvi->regs;
+	u32 stat;
+
+	stat = mr32(GBL_INT_STAT);
+
+	if (stat == 0 || stat == 0xffffffff)
+		return IRQ_NONE;
+
+	/* clear CMD_CMPLT ASAP */
+	mw32_f(INT_STAT, CINT_DONE);
+
+#ifndef MVS_USE_TASKLET
+	spin_lock(&mvi->lock);
+
+	mvs_int_full(mvi);
+
+	spin_unlock(&mvi->lock);
+#else
+	tasklet_schedule(&mvi->tasklet);
+#endif
+	return IRQ_HANDLED;
+}
+
+#ifndef MVS_DISABLE_MSI
+static irqreturn_t mvs_msi_interrupt(int irq, void *opaque)
+{
+	struct mvs_info *mvi = opaque;
+
+#ifndef MVS_USE_TASKLET
+	spin_lock(&mvi->lock);
+
+	mvs_int_rx(mvi, true);
+
+	spin_unlock(&mvi->lock);
+#else
+	tasklet_schedule(&mvi->tasklet);
+#endif
+	return IRQ_HANDLED;
+}
+#endif
+
+struct mvs_task_exec_info {
+	struct sas_task *task;
+	struct mvs_cmd_hdr *hdr;
+	struct mvs_port *port;
+	u32 tag;
+	int n_elem;
+};
+
+static int mvs_task_prep_smp(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
+{
+	int elem, rc, i;
+	struct sas_task *task = tei->task;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct scatterlist *sg_req, *sg_resp;
+	u32 req_len, resp_len, tag = tei->tag;
+	void *buf_tmp;
+	u8 *buf_oaf;
+	dma_addr_t buf_tmp_dma;
+	struct mvs_prd *buf_prd;
+	struct scatterlist *sg;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+	struct asd_sas_port *sas_port = task->dev->port;
+	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#if _MV_DUMP
+	u8 *buf_cmd;
+	void *from;
+#endif
+	/*
+	 * DMA-map SMP request, response buffers
+	 */
+	sg_req = &task->smp_task.smp_req;
+	elem = pci_map_sg(mvi->pdev, sg_req, 1, PCI_DMA_TODEVICE);
+	if (!elem)
+		return -ENOMEM;
+	req_len = sg_dma_len(sg_req);
+
+	sg_resp = &task->smp_task.smp_resp;
+	elem = pci_map_sg(mvi->pdev, sg_resp, 1, PCI_DMA_FROMDEVICE);
+	if (!elem) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+	resp_len = sg_dma_len(sg_resp);
+
+	/* must be in dwords */
+	if ((req_len & 0x3) || (resp_len & 0x3)) {
+		rc = -EINVAL;
+		goto err_out_2;
+	}
+
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
+
+	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
+	buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
+
+#if _MV_DUMP
+	buf_cmd = buf_tmp;
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+	buf_tmp += req_len;
+	buf_tmp_dma += req_len;
+	slot->cmd_size = req_len;
+#else
+	hdr->cmd_tbl = cpu_to_le64(sg_dma_address(sg_req));
+#endif
+
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
+
+	/* region 3: PRD table ********************************************* */
+	buf_prd = buf_tmp;
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
+
+	i = sizeof(struct mvs_prd) * tei->n_elem;
+	buf_tmp += i;
+	buf_tmp_dma += i;
+
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+
+	/*
+	 * Fill in TX ring and command slot header
+	 */
+	slot->tx = mvi->tx_prod;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
+					TXQ_MODE_I | tag |
+					(sas_port->phy_mask << TXQ_PHY_SHIFT));
+
+	hdr->flags |= flags;
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
+	hdr->tags = cpu_to_le32(tag);
+	hdr->data_len = 0;
+
+	/* generate open address frame hdr (first 12 bytes) */
+	buf_oaf[0] = (1 << 7) | (0 << 4) | 0x01; /* initiator, SMP, ftype 1h */
+	buf_oaf[1] = task->dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
+	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in PRD (scatter/gather) table, if any */
+	for_each_sg(task->scatter, sg, tei->n_elem, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+
+#if _MV_DUMP
+	/* copy cmd table */
+	from = kmap_atomic(sg_page(sg_req), KM_IRQ0);
+	memcpy(buf_cmd, from + sg_req->offset, req_len);
+	kunmap_atomic(from, KM_IRQ0);
+#endif
+	return 0;
+
+err_out_2:
+	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_resp, 1,
+		     PCI_DMA_FROMDEVICE);
+err_out:
+	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_req, 1,
+		     PCI_DMA_TODEVICE);
+	return rc;
+}
+
+static void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp, offs;
+	u8 *tfs = &port->taskfileset;
+
+	if (*tfs == MVS_ID_NOT_MAPPED)
+		return;
+
+	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
+	if (*tfs < 16) {
+		tmp = mr32(PCS);
+		mw32(PCS, tmp & ~offs);
+	} else {
+		tmp = mr32(CTL);
+		mw32(CTL, tmp & ~offs);
+	}
+
+	tmp = mr32(INT_STAT_SRS) & (1U << *tfs);
+	if (tmp)
+		mw32(INT_STAT_SRS, tmp);
+
+	*tfs = MVS_ID_NOT_MAPPED;
+}
+
+static u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port)
+{
+	int i;
+	u32 tmp, offs;
+	void __iomem *regs = mvi->regs;
+
+	if (port->taskfileset != MVS_ID_NOT_MAPPED)
+		return 0;
+
+	tmp = mr32(PCS);
+
+	for (i = 0; i < mvi->chip->srs_sz; i++) {
+		if (i == 16)
+			tmp = mr32(CTL);
+		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
+		if (!(tmp & offs)) {
+			port->taskfileset = i;
+
+			if (i < 16)
+				mw32(PCS, tmp | offs);
+			else
+				mw32(CTL, tmp | offs);
+			tmp = mr32(INT_STAT_SRS) & (1U << i);
+			if (tmp)
+				mw32(INT_STAT_SRS, tmp);
+			return 0;
+		}
+	}
+	return MVS_ID_NOT_MAPPED;
+}
+
+static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
+{
+	struct ata_queued_cmd *qc = task->uldd_task;
+
+	if (qc) {
+		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
+			qc->tf.command == ATA_CMD_FPDMA_READ) {
+			*tag = qc->tag;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int mvs_task_prep_ata(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
+{
+	struct sas_task *task = tei->task;
+	struct domain_device *dev = task->dev;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct asd_sas_port *sas_port = dev->port;
+	struct mvs_slot_info *slot;
+	struct scatterlist *sg;
+	struct mvs_prd *buf_prd;
+	struct mvs_port *port = tei->port;
+	u32 tag = tei->tag;
+	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+	void *buf_tmp;
+	u8 *buf_cmd, *buf_oaf;
+	dma_addr_t buf_tmp_dma;
+	u32 i, req_len, resp_len;
+	const u32 max_resp_len = SB_RFB_MAX;
+
+	if (mvs_assign_reg_set(mvi, port) == MVS_ID_NOT_MAPPED)
+		return -EBUSY;
+
+	slot = &mvi->slot_info[tag];
+	slot->tx = mvi->tx_prod;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
+					(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
+					(sas_port->phy_mask << TXQ_PHY_SHIFT) |
+					(port->taskfileset << TXQ_SRS_SHIFT));
+
+	if (task->ata_task.use_ncq)
+		flags |= MCH_FPDMA;
+	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET) {
+		if (task->ata_task.fis.command != ATA_CMD_ID_ATAPI)
+			flags |= MCH_ATAPI;
+	}
+
+	/* FIXME: fill in port multiplier number */
+
+	hdr->flags = cpu_to_le32(flags);
+
+	/* FIXME: the low order order 5 bits for the TAG if enable NCQ */
+	if (task->ata_task.use_ncq && mvs_get_ncq_tag(task, &hdr->tags))
+		task->ata_task.fis.sector_count |= hdr->tags << 3;
+	else
+		hdr->tags = cpu_to_le32(tag);
+	hdr->data_len = cpu_to_le32(task->total_xfer_len);
+
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
+
+	/* region 1: command table area (MVS_ATA_CMD_SZ bytes) ************** */
+	buf_cmd = buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
+
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_ATA_CMD_SZ;
+	buf_tmp_dma += MVS_ATA_CMD_SZ;
+#if _MV_DUMP
+	slot->cmd_size = MVS_ATA_CMD_SZ;
+#endif
+
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	/* used for STP.  unused for SATA? */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
+
+	/* region 3: PRD table ********************************************* */
+	buf_prd = buf_tmp;
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
+
+	i = sizeof(struct mvs_prd) * tei->n_elem;
+	buf_tmp += i;
+	buf_tmp_dma += i;
+
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	/* FIXME: probably unused, for SATA.  kept here just in case
+	 * we get a STP/SATA error information record
+	 */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+
+	req_len = sizeof(struct host_to_dev_fis);
+	resp_len = MVS_SLOT_BUF_SZ - MVS_ATA_CMD_SZ -
+	    sizeof(struct mvs_err_info) - i;
+
+	/* request, response lengths */
+	resp_len = min(resp_len, max_resp_len);
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
+
+	task->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
+	/* fill in command FIS and ATAPI CDB */
+	memcpy(buf_cmd, &task->ata_task.fis, sizeof(struct host_to_dev_fis));
+	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
+		memcpy(buf_cmd + STP_ATAPI_CMD,
+			task->ata_task.atapi_packet, 16);
+
+	/* generate open address frame hdr (first 12 bytes) */
+	buf_oaf[0] = (1 << 7) | (2 << 4) | 0x1;	/* initiator, STP, ftype 1h */
+	buf_oaf[1] = task->dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
+	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in PRD (scatter/gather) table, if any */
+	for_each_sg(task->scatter, sg, tei->n_elem, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+
+	return 0;
+}
+
+static int mvs_task_prep_ssp(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
+{
+	struct sas_task *task = tei->task;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct mvs_port *port = tei->port;
+	struct mvs_slot_info *slot;
+	struct scatterlist *sg;
+	struct mvs_prd *buf_prd;
+	struct ssp_frame_hdr *ssp_hdr;
+	void *buf_tmp;
+	u8 *buf_cmd, *buf_oaf, fburst = 0;
+	dma_addr_t buf_tmp_dma;
+	u32 flags;
+	u32 resp_len, req_len, i, tag = tei->tag;
+	const u32 max_resp_len = SB_RFB_MAX;
+	u8 phy_mask;
+
+	slot = &mvi->slot_info[tag];
+
+	phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
+		task->dev->port->phy_mask;
+	slot->tx = mvi->tx_prod;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
+				(TXQ_CMD_SSP << TXQ_CMD_SHIFT) |
+				(phy_mask << TXQ_PHY_SHIFT));
+
+	flags = MCH_RETRY;
+	if (task->ssp_task.enable_first_burst) {
+		flags |= MCH_FBURST;
+		fburst = (1 << 7);
+	}
+	hdr->flags = cpu_to_le32(flags |
+				 (tei->n_elem << MCH_PRD_LEN_SHIFT) |
+				 (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT));
+
+	hdr->tags = cpu_to_le32(tag);
+	hdr->data_len = cpu_to_le32(task->total_xfer_len);
+
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
+
+	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
+	buf_cmd = buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
+
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_SSP_CMD_SZ;
+	buf_tmp_dma += MVS_SSP_CMD_SZ;
+#if _MV_DUMP
+	slot->cmd_size = MVS_SSP_CMD_SZ;
+#endif
+
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
+
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
+
+	/* region 3: PRD table ********************************************* */
+	buf_prd = buf_tmp;
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
+
+	i = sizeof(struct mvs_prd) * tei->n_elem;
+	buf_tmp += i;
+	buf_tmp_dma += i;
+
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+
+	resp_len = MVS_SLOT_BUF_SZ - MVS_SSP_CMD_SZ - MVS_OAF_SZ -
+	    sizeof(struct mvs_err_info) - i;
+	resp_len = min(resp_len, max_resp_len);
+
+	req_len = sizeof(struct ssp_frame_hdr) + 28;
+
+	/* request, response lengths */
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
+
+	/* generate open address frame hdr (first 12 bytes) */
+	buf_oaf[0] = (1 << 7) | (1 << 4) | 0x1;	/* initiator, SSP, ftype 1h */
+	buf_oaf[1] = task->dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
+	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in SSP frame header (Command Table.SSP frame header) */
+	ssp_hdr = (struct ssp_frame_hdr *)buf_cmd;
+	ssp_hdr->frame_type = SSP_COMMAND;
+	memcpy(ssp_hdr->hashed_dest_addr, task->dev->hashed_sas_addr,
+	       HASHED_SAS_ADDR_SIZE);
+	memcpy(ssp_hdr->hashed_src_addr,
+	       task->dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	ssp_hdr->tag = cpu_to_be16(tag);
+
+	/* fill in command frame IU */
+	buf_cmd += sizeof(*ssp_hdr);
+	memcpy(buf_cmd, &task->ssp_task.LUN, 8);
+	buf_cmd[9] = fburst | task->ssp_task.task_attr |
+			(task->ssp_task.task_prio << 3);
+	memcpy(buf_cmd + 12, &task->ssp_task.cdb, 16);
+
+	/* fill in PRD (scatter/gather) table, if any */
+	for_each_sg(task->scatter, sg, tei->n_elem, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+
+	return 0;
+}
+
+static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
+{
+	struct domain_device *dev = task->dev;
+	struct mvs_info *mvi = dev->port->ha->lldd_ha;
+	struct pci_dev *pdev = mvi->pdev;
+	void __iomem *regs = mvi->regs;
+	struct mvs_task_exec_info tei;
+	struct sas_task *t = task;
+	struct mvs_slot_info *slot;
+	u32 tag = 0xdeadbeef, rc, n_elem = 0;
+	unsigned long flags;
+	u32 n = num, pass = 0;
+
+	spin_lock_irqsave(&mvi->lock, flags);
+	do {
+		dev = t->dev;
+		tei.port = &mvi->port[dev->port->id];
+
+		if (!tei.port->port_attached) {
+			if (sas_protocol_ata(t->task_proto)) {
+				rc = SAS_PHY_DOWN;
+				goto out_done;
+			} else {
+				struct task_status_struct *ts = &t->task_status;
+				ts->resp = SAS_TASK_UNDELIVERED;
+				ts->stat = SAS_PHY_DOWN;
+				t->task_done(t);
+				if (n > 1)
+					t = list_entry(t->list.next,
+							struct sas_task, list);
+				continue;
+			}
+		}
+
+		if (!sas_protocol_ata(t->task_proto)) {
+			if (t->num_scatter) {
+				n_elem = pci_map_sg(mvi->pdev, t->scatter,
+						    t->num_scatter,
+						    t->data_dir);
+				if (!n_elem) {
+					rc = -ENOMEM;
+					goto err_out;
+				}
+			}
+		} else {
+			n_elem = t->num_scatter;
+		}
+
+		rc = mvs_tag_alloc(mvi, &tag);
+		if (rc)
+			goto err_out;
+
+		slot = &mvi->slot_info[tag];
+		t->lldd_task = NULL;
+		slot->n_elem = n_elem;
+		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+		tei.task = t;
+		tei.hdr = &mvi->slot[tag];
+		tei.tag = tag;
+		tei.n_elem = n_elem;
+
+		switch (t->task_proto) {
+		case SAS_PROTOCOL_SMP:
+			rc = mvs_task_prep_smp(mvi, &tei);
+			break;
+		case SAS_PROTOCOL_SSP:
+			rc = mvs_task_prep_ssp(mvi, &tei);
+			break;
+		case SAS_PROTOCOL_SATA:
+		case SAS_PROTOCOL_STP:
+		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+			rc = mvs_task_prep_ata(mvi, &tei);
+			break;
+		default:
+			dev_printk(KERN_ERR, &pdev->dev,
+				"unknown sas_task proto: 0x%x\n",
+				t->task_proto);
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc)
+			goto err_out_tag;
+
+		slot->task = t;
+		slot->port = tei.port;
+		t->lldd_task = (void *) slot;
+		list_add_tail(&slot->list, &slot->port->list);
+		/* TODO: select normal or high priority */
+
+		spin_lock(&t->task_state_lock);
+		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
+		spin_unlock(&t->task_state_lock);
+
+		mvs_hba_memory_dump(mvi, tag, t->task_proto);
+
+		++pass;
+		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
+		if (n > 1)
+			t = list_entry(t->list.next, struct sas_task, list);
+	} while (--n);
+
+	rc = 0;
+	goto out_done;
+
+err_out_tag:
+	mvs_tag_free(mvi, tag);
+err_out:
+	dev_printk(KERN_ERR, &pdev->dev, "mvsas exec failed[%d]!\n", rc);
+	if (!sas_protocol_ata(t->task_proto))
+		if (n_elem)
+			pci_unmap_sg(mvi->pdev, t->scatter, n_elem,
+				     t->data_dir);
+out_done:
+	if (pass)
+		mw32(TX_PROD_IDX, (mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
+	spin_unlock_irqrestore(&mvi->lock, flags);
+	return rc;
+}
+
+static int mvs_task_abort(struct sas_task *task)
+{
+	int rc;
+	unsigned long flags;
+	struct mvs_info *mvi = task->dev->port->ha->lldd_ha;
+	struct pci_dev *pdev = mvi->pdev;
+	int tag;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		rc = TMF_RESP_FUNC_COMPLETE;
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		goto out_done;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		dev_printk(KERN_DEBUG, &pdev->dev, "SMP Abort! \n");
+		break;
+	case SAS_PROTOCOL_SSP:
+		dev_printk(KERN_DEBUG, &pdev->dev, "SSP Abort! \n");
+		break;
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:{
+		dev_printk(KERN_DEBUG, &pdev->dev, "STP Abort! \n");
+#if _MV_DUMP
+		dev_printk(KERN_DEBUG, &pdev->dev, "Dump D2H FIS: \n");
+		mvs_hexdump(sizeof(struct host_to_dev_fis),
+				(void *)&task->ata_task.fis, 0);
+		dev_printk(KERN_DEBUG, &pdev->dev, "Dump ATAPI Cmd : \n");
+		mvs_hexdump(16, task->ata_task.atapi_packet, 0);
+#endif
+		spin_lock_irqsave(&task->task_state_lock, flags);
+		if (task->task_state_flags & SAS_TASK_NEED_DEV_RESET) {
+			/* TODO */
+			;
+		}
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		break;
+	}
+	default:
+		break;
+	}
+
+	if (mvs_find_tag(mvi, task, &tag)) {
+		spin_lock_irqsave(&mvi->lock, flags);
+		mvs_slot_task_free(mvi, task, &mvi->slot_info[tag], tag);
+		spin_unlock_irqrestore(&mvi->lock, flags);
+	}
+	if (!mvs_task_exec(task, 1, GFP_ATOMIC))
+		rc = TMF_RESP_FUNC_COMPLETE;
+	else
+		rc = TMF_RESP_FUNC_FAILED;
+out_done:
+	return rc;
+}
+
+static void mvs_free(struct mvs_info *mvi)
+{
+	int i;
+
+	if (!mvi)
+		return;
+
+	for (i = 0; i < MVS_SLOTS; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
+
+		if (slot->buf)
+			dma_free_coherent(&mvi->pdev->dev, MVS_SLOT_BUF_SZ,
+					  slot->buf, slot->buf_dma);
+	}
+
+	if (mvi->tx)
+		dma_free_coherent(&mvi->pdev->dev,
+				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
+				  mvi->tx, mvi->tx_dma);
+	if (mvi->rx_fis)
+		dma_free_coherent(&mvi->pdev->dev, MVS_RX_FISL_SZ,
+				  mvi->rx_fis, mvi->rx_fis_dma);
+	if (mvi->rx)
+		dma_free_coherent(&mvi->pdev->dev,
+				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
+				  mvi->rx, mvi->rx_dma);
+	if (mvi->slot)
+		dma_free_coherent(&mvi->pdev->dev,
+				  sizeof(*mvi->slot) * MVS_SLOTS,
+				  mvi->slot, mvi->slot_dma);
+#ifdef MVS_ENABLE_PERI
+	if (mvi->peri_regs)
+		iounmap(mvi->peri_regs);
+#endif
+	if (mvi->regs)
+		iounmap(mvi->regs);
+	if (mvi->shost)
+		scsi_host_put(mvi->shost);
+	kfree(mvi->sas.sas_port);
+	kfree(mvi->sas.sas_phy);
+	kfree(mvi);
+}
+
+/* FIXME: locking? */
+static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+			   void *funcdata)
+{
+	struct mvs_info *mvi = sas_phy->ha->lldd_ha;
+	int rc = 0, phy_id = sas_phy->id;
+	u32 tmp;
+
+	tmp = mvs_read_phy_ctl(mvi, phy_id);
+
+	switch (func) {
+	case PHY_FUNC_SET_LINK_RATE:{
+			struct sas_phy_linkrates *rates = funcdata;
+			u32 lrmin = 0, lrmax = 0;
+
+			lrmin = (rates->minimum_linkrate << 8);
+			lrmax = (rates->maximum_linkrate << 12);
+
+			if (lrmin) {
+				tmp &= ~(0xf << 8);
+				tmp |= lrmin;
+			}
+			if (lrmax) {
+				tmp &= ~(0xf << 12);
+				tmp |= lrmax;
+			}
+			mvs_write_phy_ctl(mvi, phy_id, tmp);
+			break;
+		}
+
+	case PHY_FUNC_HARD_RESET:
+		if (tmp & PHY_RST_HARD)
+			break;
+		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST_HARD);
+		break;
+
+	case PHY_FUNC_LINK_RESET:
+		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST);
+		break;
+
+	case PHY_FUNC_DISABLE:
+	case PHY_FUNC_RELEASE_SPINUP_HOLD:
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	return rc;
+}
+
+static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
+{
+	struct mvs_phy *phy = &mvi->phy[phy_id];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
+	sas_phy->class = SAS;
+	sas_phy->iproto = SAS_PROTOCOL_ALL;
+	sas_phy->tproto = 0;
+	sas_phy->type = PHY_TYPE_PHYSICAL;
+	sas_phy->role = PHY_ROLE_INITIATOR;
+	sas_phy->oob_mode = OOB_NOT_CONNECTED;
+	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
+
+	sas_phy->id = phy_id;
+	sas_phy->sas_addr = &mvi->sas_addr[0];
+	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
+	sas_phy->ha = &mvi->sas;
+	sas_phy->lldd_phy = phy;
+}
+
+static struct mvs_info *__devinit mvs_alloc(struct pci_dev *pdev,
+					    const struct pci_device_id *ent)
+{
+	struct mvs_info *mvi;
+	unsigned long res_start, res_len, res_flag;
+	struct asd_sas_phy **arr_phy;
+	struct asd_sas_port **arr_port;
+	const struct mvs_chip_info *chip = &mvs_chips[ent->driver_data];
+	int i;
+
+	/*
+	 * alloc and init our per-HBA mvs_info struct
+	 */
+
+	mvi = kzalloc(sizeof(*mvi), GFP_KERNEL);
+	if (!mvi)
+		return NULL;
+
+	spin_lock_init(&mvi->lock);
+#ifdef MVS_USE_TASKLET
+	tasklet_init(&mvi->tasklet, mvs_tasklet, (unsigned long)mvi);
+#endif
+	mvi->pdev = pdev;
+	mvi->chip = chip;
+
+	if (pdev->device == 0x6440 && pdev->revision == 0)
+		mvi->flags |= MVF_PHY_PWR_FIX;
+
+	/*
+	 * alloc and init SCSI, SAS glue
+	 */
+
+	mvi->shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
+	if (!mvi->shost)
+		goto err_out;
+
+	arr_phy = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
+	arr_port = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
+	if (!arr_phy || !arr_port)
+		goto err_out;
+
+	for (i = 0; i < MVS_MAX_PHYS; i++) {
+		mvs_phy_init(mvi, i);
+		arr_phy[i] = &mvi->phy[i].sas_phy;
+		arr_port[i] = &mvi->port[i].sas_port;
+		mvi->port[i].taskfileset = MVS_ID_NOT_MAPPED;
+		mvi->port[i].wide_port_phymap = 0;
+		mvi->port[i].port_attached = 0;
+		INIT_LIST_HEAD(&mvi->port[i].list);
+	}
+
+	SHOST_TO_SAS_HA(mvi->shost) = &mvi->sas;
+	mvi->shost->transportt = mvs_stt;
+	mvi->shost->max_id = 21;
+	mvi->shost->max_lun = ~0;
+	mvi->shost->max_channel = 0;
+	mvi->shost->max_cmd_len = 16;
+
+	mvi->sas.sas_ha_name = DRV_NAME;
+	mvi->sas.dev = &pdev->dev;
+	mvi->sas.lldd_module = THIS_MODULE;
+	mvi->sas.sas_addr = &mvi->sas_addr[0];
+	mvi->sas.sas_phy = arr_phy;
+	mvi->sas.sas_port = arr_port;
+	mvi->sas.num_phys = chip->n_phy;
+	mvi->sas.lldd_max_execute_num = 1;
+	mvi->sas.lldd_queue_size = MVS_QUEUE_SIZE;
+	mvi->shost->can_queue = MVS_CAN_QUEUE;
+	mvi->shost->cmd_per_lun = MVS_SLOTS / mvi->sas.num_phys;
+	mvi->sas.lldd_ha = mvi;
+	mvi->sas.core.shost = mvi->shost;
+
+	mvs_tag_init(mvi);
+
+	/*
+	 * ioremap main and peripheral registers
+	 */
+
+#ifdef MVS_ENABLE_PERI
+	res_start = pci_resource_start(pdev, 2);
+	res_len = pci_resource_len(pdev, 2);
+	if (!res_start || !res_len)
+		goto err_out;
+
+	mvi->peri_regs = ioremap_nocache(res_start, res_len);
+	if (!mvi->peri_regs)
+		goto err_out;
+#endif
+
+	res_start = pci_resource_start(pdev, 4);
+	res_len = pci_resource_len(pdev, 4);
+	if (!res_start || !res_len)
+		goto err_out;
+
+	res_flag = pci_resource_flags(pdev, 4);
+	if (res_flag & IORESOURCE_CACHEABLE)
+		mvi->regs = ioremap(res_start, res_len);
+	else
+		mvi->regs = ioremap_nocache(res_start, res_len);
+
+	if (!mvi->regs)
+		goto err_out;
+
+	/*
+	 * alloc and init our DMA areas
+	 */
+
+	mvi->tx = dma_alloc_coherent(&pdev->dev,
+				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
+				     &mvi->tx_dma, GFP_KERNEL);
+	if (!mvi->tx)
+		goto err_out;
+	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
+
+	mvi->rx_fis = dma_alloc_coherent(&pdev->dev, MVS_RX_FISL_SZ,
+					 &mvi->rx_fis_dma, GFP_KERNEL);
+	if (!mvi->rx_fis)
+		goto err_out;
+	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
+
+	mvi->rx = dma_alloc_coherent(&pdev->dev,
+				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
+				     &mvi->rx_dma, GFP_KERNEL);
+	if (!mvi->rx)
+		goto err_out;
+	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
+
+	mvi->rx[0] = cpu_to_le32(0xfff);
+	mvi->rx_cons = 0xfff;
+
+	mvi->slot = dma_alloc_coherent(&pdev->dev,
+				       sizeof(*mvi->slot) * MVS_SLOTS,
+				       &mvi->slot_dma, GFP_KERNEL);
+	if (!mvi->slot)
+		goto err_out;
+	memset(mvi->slot, 0, sizeof(*mvi->slot) * MVS_SLOTS);
+
+	for (i = 0; i < MVS_SLOTS; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
+
+		slot->buf = dma_alloc_coherent(&pdev->dev, MVS_SLOT_BUF_SZ,
+					       &slot->buf_dma, GFP_KERNEL);
+		if (!slot->buf)
+			goto err_out;
+		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+	}
+
+	/* finally, read NVRAM to get our SAS address */
+	if (mvs_nvram_read(mvi, NVR_SAS_ADDR, &mvi->sas_addr, 8))
+		goto err_out;
+	return mvi;
+
+err_out:
+	mvs_free(mvi);
+	return NULL;
+}
+
+static u32 mvs_cr32(void __iomem *regs, u32 addr)
+{
+	mw32(CMD_ADDR, addr);
+	return mr32(CMD_DATA);
+}
+
+static void mvs_cw32(void __iomem *regs, u32 addr, u32 val)
+{
+	mw32(CMD_ADDR, addr);
+	mw32(CMD_DATA, val);
+}
+
+static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
+{
+	void __iomem *regs = mvi->regs;
+	return (port < 4)?mr32(P0_SER_CTLSTAT + port * 4):
+		mr32(P4_SER_CTLSTAT + (port - 4) * 4);
+}
+
+static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
+{
+	void __iomem *regs = mvi->regs;
+	if (port < 4)
+		mw32(P0_SER_CTLSTAT + port * 4, val);
+	else
+		mw32(P4_SER_CTLSTAT + (port - 4) * 4, val);
+}
+
+static u32 mvs_read_port(struct mvs_info *mvi, u32 off, u32 off2, u32 port)
+{
+	void __iomem *regs = mvi->regs + off;
+	void __iomem *regs2 = mvi->regs + off2;
+	return (port < 4)?readl(regs + port * 8):
+		readl(regs2 + (port - 4) * 8);
+}
+
+static void mvs_write_port(struct mvs_info *mvi, u32 off, u32 off2,
+				u32 port, u32 val)
+{
+	void __iomem *regs = mvi->regs + off;
+	void __iomem *regs2 = mvi->regs + off2;
+	if (port < 4)
+		writel(val, regs + port * 8);
+	else
+		writel(val, regs2 + (port - 4) * 8);
+}
+
+static u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port);
+}
+
+static void mvs_write_port_cfg_data(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port, val);
+}
+
+static void mvs_write_port_cfg_addr(struct mvs_info *mvi, u32 port, u32 addr)
+{
+	mvs_write_port(mvi, MVS_P0_CFG_ADDR, MVS_P4_CFG_ADDR, port, addr);
+}
+
+static u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port);
+}
+
+static void mvs_write_port_vsr_data(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port, val);
+}
+
+static void mvs_write_port_vsr_addr(struct mvs_info *mvi, u32 port, u32 addr)
+{
+	mvs_write_port(mvi, MVS_P0_VSR_ADDR, MVS_P4_VSR_ADDR, port, addr);
+}
+
+static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port);
+}
+
+static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port, val);
+}
+
+static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port);
+}
+
+static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port, val);
+}
+
+static void __devinit mvs_phy_hacks(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	/* workaround for SATA R-ERR, to ignore phy glitch */
+	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
+	tmp &= ~(1 << 9);
+	tmp |= (1 << 10);
+	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+
+	/* enable retry 127 times */
+	mvs_cw32(regs, CMD_SAS_CTL1, 0x7f7f);
+
+	/* extend open frame timeout to max */
+	tmp = mvs_cr32(regs, CMD_SAS_CTL0);
+	tmp &= ~0xffff;
+	tmp |= 0x3fff;
+	mvs_cw32(regs, CMD_SAS_CTL0, tmp);
+
+	/* workaround for WDTIMEOUT , set to 550 ms */
+	mvs_cw32(regs, CMD_WD_TIMER, 0x86470);
+
+	/* not to halt for different port op during wideport link change */
+	mvs_cw32(regs, CMD_APP_ERR_CONFIG, 0xffefbf7d);
+
+	/* workaround for Seagate disk not-found OOB sequence, recv
+	 * COMINIT before sending out COMWAKE */
+	tmp = mvs_cr32(regs, CMD_PHY_MODE_21);
+	tmp &= 0x0000ffff;
+	tmp |= 0x00fa0000;
+	mvs_cw32(regs, CMD_PHY_MODE_21, tmp);
+
+	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
+	tmp &= 0x1fffffff;
+	tmp |= (2U << 29);	/* 8 ms retry */
+	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+
+	/* TEST - for phy decoding error, adjust voltage levels */
+	mw32(P0_VSR_ADDR + 0, 0x8);
+	mw32(P0_VSR_DATA + 0, 0x2F0);
+
+	mw32(P0_VSR_ADDR + 8, 0x8);
+	mw32(P0_VSR_DATA + 8, 0x2F0);
+
+	mw32(P0_VSR_ADDR + 16, 0x8);
+	mw32(P0_VSR_DATA + 16, 0x2F0);
+
+	mw32(P0_VSR_ADDR + 24, 0x8);
+	mw32(P0_VSR_DATA + 24, 0x2F0);
+
+}
+
+static void mvs_enable_xmt(struct mvs_info *mvi, int PhyId)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(PCS);
+	if (mvi->chip->n_phy <= 4)
+		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT);
+	else
+		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT2);
+	mw32(PCS, tmp);
+}
+
+static void mvs_detect_porttype(struct mvs_info *mvi, int i)
+{
+	void __iomem *regs = mvi->regs;
+	u32 reg;
+	struct mvs_phy *phy = &mvi->phy[i];
+
+	/* TODO check & save device type */
+	reg = mr32(GBL_PORT_TYPE);
+
+	if (reg & MODE_SAS_SATA & (1 << i))
+		phy->phy_type |= PORT_TYPE_SAS;
+	else
+		phy->phy_type |= PORT_TYPE_SATA;
+}
+
+static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
+{
+	u32 *s = (u32 *) buf;
+
+	if (!s)
+		return NULL;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
+	s[3] = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
+	s[2] = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
+	s[1] = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
+	s[0] = mvs_read_port_cfg_data(mvi, i);
+
+	return (void *)s;
+}
+
+static u32 mvs_is_sig_fis_received(u32 irq_status)
+{
+	return irq_status & PHYEV_SIG_FIS;
+}
+
+static void mvs_update_wideport(struct mvs_info *mvi, int i)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct mvs_port *port = phy->port;
+	int j, no;
+
+	for_each_phy(port->wide_port_phymap, no, j, mvi->chip->n_phy)
+		if (no & 1) {
+			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
+			mvs_write_port_cfg_data(mvi, no,
+						port->wide_port_phymap);
+		} else {
+			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
+			mvs_write_port_cfg_data(mvi, no, 0);
+		}
+}
+
+static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
+{
+	u32 tmp;
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct mvs_port *port = phy->port;;
+
+	tmp = mvs_read_phy_ctl(mvi, i);
+
+	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
+		if (!port)
+			phy->phy_attached = 1;
+		return tmp;
+	}
+
+	if (port) {
+		if (phy->phy_type & PORT_TYPE_SAS) {
+			port->wide_port_phymap &= ~(1U << i);
+			if (!port->wide_port_phymap)
+				port->port_attached = 0;
+			mvs_update_wideport(mvi, i);
+		} else if (phy->phy_type & PORT_TYPE_SATA)
+			port->port_attached = 0;
+		mvs_free_reg_set(mvi, phy->port);
+		phy->port = NULL;
+		phy->phy_attached = 0;
+		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	}
+	return 0;
+}
+
+static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
+					int get_st)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct pci_dev *pdev = mvi->pdev;
+	u32 tmp;
+	u64 tmp64;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
+	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
+	phy->dev_sas_addr = (u64) mvs_read_port_cfg_data(mvi, i) << 32;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
+	phy->dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+
+	if (get_st) {
+		phy->irq_status = mvs_read_port_irq_stat(mvi, i);
+		phy->phy_status = mvs_is_phy_ready(mvi, i);
+	}
+
+	if (phy->phy_status) {
+		u32 phy_st;
+		struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
+
+		mvs_write_port_cfg_addr(mvi, i, PHYR_PHY_STAT);
+		phy_st = mvs_read_port_cfg_data(mvi, i);
+
+		sas_phy->linkrate =
+			(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+				PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
+		phy->minimum_linkrate =
+			(phy->phy_status &
+				PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
+		phy->maximum_linkrate =
+			(phy->phy_status &
+				PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
+
+		if (phy->phy_type & PORT_TYPE_SAS) {
+			/* Updated attached_sas_addr */
+			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
+			phy->att_dev_sas_addr =
+				(u64) mvs_read_port_cfg_data(mvi, i) << 32;
+			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
+			phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
+			phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
+			phy->identify.device_type =
+			    phy->att_dev_info & PORT_DEV_TYPE_MASK;
+
+			if (phy->identify.device_type == SAS_END_DEV)
+				phy->identify.target_port_protocols =
+							SAS_PROTOCOL_SSP;
+			else if (phy->identify.device_type != NO_DEVICE)
+				phy->identify.target_port_protocols =
+							SAS_PROTOCOL_SMP;
+			if (phy_st & PHY_OOB_DTCTD)
+				sas_phy->oob_mode = SAS_OOB_MODE;
+			phy->frame_rcvd_size =
+			    sizeof(struct sas_identify_frame);
+		} else if (phy->phy_type & PORT_TYPE_SATA) {
+			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
+			if (mvs_is_sig_fis_received(phy->irq_status)) {
+				phy->att_dev_sas_addr = i;	/* temp */
+				if (phy_st & PHY_OOB_DTCTD)
+					sas_phy->oob_mode = SATA_OOB_MODE;
+				phy->frame_rcvd_size =
+				    sizeof(struct dev_to_host_fis);
+				mvs_get_d2h_reg(mvi, i,
+						(void *)sas_phy->frame_rcvd);
+			} else {
+				dev_printk(KERN_DEBUG, &pdev->dev,
+					"No sig fis\n");
+				phy->phy_type &= ~(PORT_TYPE_SATA);
+				goto out_done;
+			}
+		}
+		tmp64 = cpu_to_be64(phy->att_dev_sas_addr);
+		memcpy(sas_phy->attached_sas_addr, &tmp64, SAS_ADDR_SIZE);
+
+		dev_printk(KERN_DEBUG, &pdev->dev,
+			"phy[%d] Get Attached Address 0x%llX ,"
+			" SAS Address 0x%llX\n",
+			i,
+			(unsigned long long)phy->att_dev_sas_addr,
+			(unsigned long long)phy->dev_sas_addr);
+		dev_printk(KERN_DEBUG, &pdev->dev,
+			"Rate = %x , type = %d\n",
+			sas_phy->linkrate, phy->phy_type);
+
+		/* workaround for HW phy decoding error on 1.5g disk drive */
+		mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
+		tmp = mvs_read_port_vsr_data(mvi, i);
+		if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+		     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
+			SAS_LINK_RATE_1_5_GBPS)
+			tmp &= ~PHY_MODE6_LATECLK;
+		else
+			tmp |= PHY_MODE6_LATECLK;
+		mvs_write_port_vsr_data(mvi, i, tmp);
+
+	}
+out_done:
+	if (get_st)
+		mvs_write_port_irq_stat(mvi, i, phy->irq_status);
+}
+
+static void mvs_port_formed(struct asd_sas_phy *sas_phy)
+{
+	struct sas_ha_struct *sas_ha = sas_phy->ha;
+	struct mvs_info *mvi = sas_ha->lldd_ha;
+	struct asd_sas_port *sas_port = sas_phy->port;
+	struct mvs_phy *phy = sas_phy->lldd_phy;
+	struct mvs_port *port = &mvi->port[sas_port->id];
+	unsigned long flags;
+
+	spin_lock_irqsave(&mvi->lock, flags);
+	port->port_attached = 1;
+	phy->port = port;
+	port->taskfileset = MVS_ID_NOT_MAPPED;
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		port->wide_port_phymap = sas_port->phy_mask;
+		mvs_update_wideport(mvi, sas_phy->id);
+	}
+	spin_unlock_irqrestore(&mvi->lock, flags);
+}
+
+static int mvs_I_T_nexus_reset(struct domain_device *dev)
+{
+	return TMF_RESP_FUNC_FAILED;
+}
+
+static int __devinit mvs_hw_init(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	int i;
+	u32 tmp, cctl;
+
+	/* make sure interrupts are masked immediately (paranoia) */
+	mw32(GBL_CTL, 0);
+	tmp = mr32(GBL_CTL);
+
+	/* Reset Controller */
+	if (!(tmp & HBA_RST)) {
+		if (mvi->flags & MVF_PHY_PWR_FIX) {
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
+			tmp &= ~PCTL_PWR_ON;
+			tmp |= PCTL_OFF;
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
+			tmp &= ~PCTL_PWR_ON;
+			tmp |= PCTL_OFF;
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+		}
+
+		/* global reset, incl. COMRESET/H_RESET_N (self-clearing) */
+		mw32_f(GBL_CTL, HBA_RST);
+	}
+
+	/* wait for reset to finish; timeout is just a guess */
+	i = 1000;
+	while (i-- > 0) {
+		msleep(10);
+
+		if (!(mr32(GBL_CTL) & HBA_RST))
+			break;
+	}
+	if (mr32(GBL_CTL) & HBA_RST) {
+		dev_printk(KERN_ERR, &mvi->pdev->dev, "HBA reset failed\n");
+		return -EBUSY;
+	}
+
+	/* Init Chip */
+	/* make sure RST is set; HBA_RST /should/ have done that for us */
+	cctl = mr32(CTL);
+	if (cctl & CCTL_RST)
+		cctl &= ~CCTL_RST;
+	else
+		mw32_f(CTL, cctl | CCTL_RST);
+
+	/* write to device control _AND_ device status register? - A.C. */
+	pci_read_config_dword(mvi->pdev, PCR_DEV_CTRL, &tmp);
+	tmp &= ~PRD_REQ_MASK;
+	tmp |= PRD_REQ_SIZE;
+	pci_write_config_dword(mvi->pdev, PCR_DEV_CTRL, tmp);
+
+	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
+	tmp |= PCTL_PWR_ON;
+	tmp &= ~PCTL_OFF;
+	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+
+	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
+	tmp |= PCTL_PWR_ON;
+	tmp &= ~PCTL_OFF;
+	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+
+	mw32_f(CTL, cctl);
+
+	/* reset control */
+	mw32(PCS, 0);		/*MVS_PCS */
+
+	mvs_phy_hacks(mvi);
+
+	mw32(CMD_LIST_LO, mvi->slot_dma);
+	mw32(CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
+
+	mw32(RX_FIS_LO, mvi->rx_fis_dma);
+	mw32(RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
+
+	mw32(TX_CFG, MVS_CHIP_SLOT_SZ);
+	mw32(TX_LO, mvi->tx_dma);
+	mw32(TX_HI, (mvi->tx_dma >> 16) >> 16);
+
+	mw32(RX_CFG, MVS_RX_RING_SZ);
+	mw32(RX_LO, mvi->rx_dma);
+	mw32(RX_HI, (mvi->rx_dma >> 16) >> 16);
+
+	/* enable auto port detection */
+	mw32(GBL_PORT_TYPE, MODE_AUTO_DET_EN);
+	msleep(1100);
+	/* init and reset phys */
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		u32 lo = be32_to_cpu(*(u32 *)&mvi->sas_addr[4]);
+		u32 hi = be32_to_cpu(*(u32 *)&mvi->sas_addr[0]);
+
+		mvs_detect_porttype(mvi, i);
+
+		/* set phy local SAS address */
+		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
+		mvs_write_port_cfg_data(mvi, i, lo);
+		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
+		mvs_write_port_cfg_data(mvi, i, hi);
+
+		/* reset phy */
+		tmp = mvs_read_phy_ctl(mvi, i);
+		tmp |= PHY_RST;
+		mvs_write_phy_ctl(mvi, i, tmp);
+	}
+
+	msleep(100);
+
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* clear phy int status */
+		tmp = mvs_read_port_irq_stat(mvi, i);
+		tmp &= ~PHYEV_SIG_FIS;
+		mvs_write_port_irq_stat(mvi, i, tmp);
+
+		/* set phy int mask */
+		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH | PHYEV_UNASSOC_FIS |
+			PHYEV_ID_DONE | PHYEV_DEC_ERR;
+		mvs_write_port_irq_mask(mvi, i, tmp);
+
+		msleep(100);
+		mvs_update_phyinfo(mvi, i, 1);
+		mvs_enable_xmt(mvi, i);
+	}
+
+	/* FIXME: update wide port bitmaps */
+
+	/* little endian for open address and command table, etc. */
+	/* A.C.
+	 * it seems that ( from the spec ) turning on big-endian won't
+	 * do us any good on big-endian machines, need further confirmation
+	 */
+	cctl = mr32(CTL);
+	cctl |= CCTL_ENDIAN_CMD;
+	cctl |= CCTL_ENDIAN_DATA;
+	cctl &= ~CCTL_ENDIAN_OPEN;
+	cctl |= CCTL_ENDIAN_RSP;
+	mw32_f(CTL, cctl);
+
+	/* reset CMD queue */
+	tmp = mr32(PCS);
+	tmp |= PCS_CMD_RST;
+	mw32(PCS, tmp);
+	/* interrupt coalescing may cause missing HW interrput in some case,
+	 * and the max count is 0x1ff, while our max slot is 0x200,
+	 * it will make count 0.
+	 */
+	tmp = 0;
+	mw32(INT_COAL, tmp);
+
+	tmp = 0x100;
+	mw32(INT_COAL_TMOUT, tmp);
+
+	/* ladies and gentlemen, start your engines */
+	mw32(TX_CFG, 0);
+	mw32(TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
+	mw32(RX_CFG, MVS_RX_RING_SZ | RX_EN);
+	/* enable CMD/CMPL_Q/RESP mode */
+	mw32(PCS, PCS_SATA_RETRY | PCS_FIS_RX_EN | PCS_CMD_EN);
+
+	/* enable completion queue interrupt */
+	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS);
+	mw32(INT_MASK, tmp);
+
+	/* Enable SRS interrupt */
+	mw32(INT_MASK_SRS, 0xFF);
+	return 0;
+}
+
+static void __devinit mvs_print_info(struct mvs_info *mvi)
+{
+	struct pci_dev *pdev = mvi->pdev;
+	static int printed_version;
+
+	if (!printed_version++)
+		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
+
+	dev_printk(KERN_INFO, &pdev->dev, "%u phys, addr %llx\n",
+		   mvi->chip->n_phy, SAS_ADDR(mvi->sas_addr));
+}
+
+static int __devinit mvs_pci_init(struct pci_dev *pdev,
+				  const struct pci_device_id *ent)
+{
+	int rc;
+	struct mvs_info *mvi;
+	irq_handler_t irq_handler = mvs_interrupt;
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	pci_set_master(pdev);
+
+	rc = pci_request_regions(pdev, DRV_NAME);
+	if (rc)
+		goto err_out_disable;
+
+	rc = pci_go_64(pdev);
+	if (rc)
+		goto err_out_regions;
+
+	mvi = mvs_alloc(pdev, ent);
+	if (!mvi) {
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+
+	rc = mvs_hw_init(mvi);
+	if (rc)
+		goto err_out_mvi;
+
+#ifndef MVS_DISABLE_MSI
+	if (!pci_enable_msi(pdev)) {
+		u32 tmp;
+		void __iomem *regs = mvi->regs;
+		mvi->flags |= MVF_MSI;
+		irq_handler = mvs_msi_interrupt;
+		tmp = mr32(PCS);
+		mw32(PCS, tmp | PCS_SELF_CLEAR);
+	}
+#endif
+
+	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, mvi);
+	if (rc)
+		goto err_out_msi;
+
+	rc = scsi_add_host(mvi->shost, &pdev->dev);
+	if (rc)
+		goto err_out_irq;
+
+	rc = sas_register_ha(&mvi->sas);
+	if (rc)
+		goto err_out_shost;
+
+	pci_set_drvdata(pdev, mvi);
+
+	mvs_print_info(mvi);
+
+	mvs_hba_interrupt_enable(mvi);
+
+	scsi_scan_host(mvi->shost);
+
+	return 0;
+
+err_out_shost:
+	scsi_remove_host(mvi->shost);
+err_out_irq:
+	free_irq(pdev->irq, mvi);
+err_out_msi:
+	if (mvi->flags |= MVF_MSI)
+		pci_disable_msi(pdev);
+err_out_mvi:
+	mvs_free(mvi);
+err_out_regions:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+	return rc;
+}
+
+static void __devexit mvs_pci_remove(struct pci_dev *pdev)
+{
+	struct mvs_info *mvi = pci_get_drvdata(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+
+	if (mvi) {
+		sas_unregister_ha(&mvi->sas);
+		mvs_hba_interrupt_disable(mvi);
+		sas_remove_host(mvi->shost);
+		scsi_remove_host(mvi->shost);
+
+		free_irq(pdev->irq, mvi);
+		if (mvi->flags & MVF_MSI)
+			pci_disable_msi(pdev);
+		mvs_free(mvi);
+		pci_release_regions(pdev);
+	}
+	pci_disable_device(pdev);
+}
+
+static struct sas_domain_function_template mvs_transport_ops = {
+	.lldd_execute_task	= mvs_task_exec,
+	.lldd_control_phy	= mvs_phy_control,
+	.lldd_abort_task	= mvs_task_abort,
+	.lldd_port_formed	= mvs_port_formed,
+	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
+};
+
+static struct pci_device_id __devinitdata mvs_pci_table[] = {
+	{ PCI_VDEVICE(MARVELL, 0x6320), chip_6320 },
+	{ PCI_VDEVICE(MARVELL, 0x6340), chip_6440 },
+	{
+		.vendor 	= PCI_VENDOR_ID_MARVELL,
+		.device 	= 0x6440,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= 0x6480,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= chip_6480,
+	},
+	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
+	{ PCI_VDEVICE(MARVELL, 0x6480), chip_6480 },
+
+	{ }	/* terminate list */
+};
+
+static struct pci_driver mvs_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= mvs_pci_table,
+	.probe		= mvs_pci_init,
+	.remove		= __devexit_p(mvs_pci_remove),
+};
+
+static int __init mvs_init(void)
+{
+	int rc;
+
+	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
+	if (!mvs_stt)
+		return -ENOMEM;
+
+	rc = pci_register_driver(&mvs_pci_driver);
+	if (rc)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	sas_release_transport(mvs_stt);
+	return rc;
+}
+
+static void __exit mvs_exit(void)
+{
+	pci_unregister_driver(&mvs_pci_driver);
+	sas_release_transport(mvs_stt);
+}
+
+module_init(mvs_init);
+module_exit(mvs_exit);
+
+MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
+MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, mvs_pci_table);
-- 
cgit v0.10.2


From dd4969a892ea522ecf9d7d826ba1531ce044d46f Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 8 May 2009 17:44:01 -0400
Subject: [SCSI] mvsas: split driver into multiple files

Split mvsas driver into multiple source codes, based on the split
and function distribution found in Marvell's mvsas update.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/Makefile b/drivers/scsi/mvsas/Makefile
index 1ac6ed9..a1ca681 100644
--- a/drivers/scsi/mvsas/Makefile
+++ b/drivers/scsi/mvsas/Makefile
@@ -22,5 +22,6 @@
 # USA
 
 obj-$(CONFIG_SCSI_MVSAS) += mvsas.o
-mvsas-y +=  mv_sas.o
-
+mvsas-y +=  mv_init.o  \
+           mv_sas.o   \
+           mv_64xx.o
diff --git a/drivers/scsi/mvsas/mv_64xx.c b/drivers/scsi/mvsas/mv_64xx.c
new file mode 100644
index 0000000..697806c8
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_64xx.c
@@ -0,0 +1,184 @@
+/*
+	mv_64xx.c - Marvell 88SE6440 SAS/SATA support
+
+	Copyright 2007 Red Hat, Inc.
+	Copyright 2008 Marvell. <kewei@marvell.com>
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License as
+	published by the Free Software Foundation; either version 2,
+	or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty
+	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+	See the GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this program; see the file COPYING.	If not,
+	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
+	MA 02139, USA.
+
+ */
+
+#include "mv_sas.h"
+#include "mv_64xx.h"
+#include "mv_chips.h"
+
+void mvs_detect_porttype(struct mvs_info *mvi, int i)
+{
+	void __iomem *regs = mvi->regs;
+	u32 reg;
+	struct mvs_phy *phy = &mvi->phy[i];
+
+	/* TODO check & save device type */
+	reg = mr32(GBL_PORT_TYPE);
+
+	if (reg & MODE_SAS_SATA & (1 << i))
+		phy->phy_type |= PORT_TYPE_SAS;
+	else
+		phy->phy_type |= PORT_TYPE_SATA;
+}
+
+void mvs_enable_xmt(struct mvs_info *mvi, int PhyId)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(PCS);
+	if (mvi->chip->n_phy <= 4)
+		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT);
+	else
+		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT2);
+	mw32(PCS, tmp);
+}
+
+void __devinit mvs_phy_hacks(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	/* workaround for SATA R-ERR, to ignore phy glitch */
+	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
+	tmp &= ~(1 << 9);
+	tmp |= (1 << 10);
+	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+
+	/* enable retry 127 times */
+	mvs_cw32(regs, CMD_SAS_CTL1, 0x7f7f);
+
+	/* extend open frame timeout to max */
+	tmp = mvs_cr32(regs, CMD_SAS_CTL0);
+	tmp &= ~0xffff;
+	tmp |= 0x3fff;
+	mvs_cw32(regs, CMD_SAS_CTL0, tmp);
+
+	/* workaround for WDTIMEOUT , set to 550 ms */
+	mvs_cw32(regs, CMD_WD_TIMER, 0x86470);
+
+	/* not to halt for different port op during wideport link change */
+	mvs_cw32(regs, CMD_APP_ERR_CONFIG, 0xffefbf7d);
+
+	/* workaround for Seagate disk not-found OOB sequence, recv
+	 * COMINIT before sending out COMWAKE */
+	tmp = mvs_cr32(regs, CMD_PHY_MODE_21);
+	tmp &= 0x0000ffff;
+	tmp |= 0x00fa0000;
+	mvs_cw32(regs, CMD_PHY_MODE_21, tmp);
+
+	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
+	tmp &= 0x1fffffff;
+	tmp |= (2U << 29);	/* 8 ms retry */
+	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+
+	/* TEST - for phy decoding error, adjust voltage levels */
+	mw32(P0_VSR_ADDR + 0, 0x8);
+	mw32(P0_VSR_DATA + 0, 0x2F0);
+
+	mw32(P0_VSR_ADDR + 8, 0x8);
+	mw32(P0_VSR_DATA + 8, 0x2F0);
+
+	mw32(P0_VSR_ADDR + 16, 0x8);
+	mw32(P0_VSR_DATA + 16, 0x2F0);
+
+	mw32(P0_VSR_ADDR + 24, 0x8);
+	mw32(P0_VSR_DATA + 24, 0x2F0);
+
+}
+
+void mvs_hba_interrupt_enable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(GBL_CTL);
+
+	mw32(GBL_CTL, tmp | INT_EN);
+}
+
+void mvs_hba_interrupt_disable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(GBL_CTL);
+
+	mw32(GBL_CTL, tmp & ~INT_EN);
+}
+
+void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp, offs;
+	u8 *tfs = &port->taskfileset;
+
+	if (*tfs == MVS_ID_NOT_MAPPED)
+		return;
+
+	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
+	if (*tfs < 16) {
+		tmp = mr32(PCS);
+		mw32(PCS, tmp & ~offs);
+	} else {
+		tmp = mr32(CTL);
+		mw32(CTL, tmp & ~offs);
+	}
+
+	tmp = mr32(INT_STAT_SRS) & (1U << *tfs);
+	if (tmp)
+		mw32(INT_STAT_SRS, tmp);
+
+	*tfs = MVS_ID_NOT_MAPPED;
+}
+
+u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port)
+{
+	int i;
+	u32 tmp, offs;
+	void __iomem *regs = mvi->regs;
+
+	if (port->taskfileset != MVS_ID_NOT_MAPPED)
+		return 0;
+
+	tmp = mr32(PCS);
+
+	for (i = 0; i < mvi->chip->srs_sz; i++) {
+		if (i == 16)
+			tmp = mr32(CTL);
+		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
+		if (!(tmp & offs)) {
+			port->taskfileset = i;
+
+			if (i < 16)
+				mw32(PCS, tmp | offs);
+			else
+				mw32(CTL, tmp | offs);
+			tmp = mr32(INT_STAT_SRS) & (1U << i);
+			if (tmp)
+				mw32(INT_STAT_SRS, tmp);
+			return 0;
+		}
+	}
+	return MVS_ID_NOT_MAPPED;
+}
+
diff --git a/drivers/scsi/mvsas/mv_64xx.h b/drivers/scsi/mvsas/mv_64xx.h
new file mode 100644
index 0000000..c9f399e
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_64xx.h
@@ -0,0 +1,92 @@
+#ifndef _MVS64XX_REG_H_
+#define _MVS64XX_REG_H_
+
+/* enhanced mode registers (BAR4) */
+enum hw_registers {
+	MVS_GBL_CTL		= 0x04,  /* global control */
+	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
+	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
+	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
+
+	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
+	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
+	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
+	MVS_CMD_LIST_HI		= 0x10C,
+	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
+	MVS_RX_FIS_HI		= 0x114,
+
+	MVS_TX_CFG		= 0x120, /* TX configuration */
+	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
+	MVS_TX_HI		= 0x128,
+
+	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
+	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
+	MVS_RX_CFG		= 0x134, /* RX configuration */
+	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
+	MVS_RX_HI		= 0x13C,
+	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
+
+	MVS_INT_COAL		= 0x148, /* Int coalescing config */
+	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
+	MVS_INT_STAT		= 0x150, /* Central int status */
+	MVS_INT_MASK		= 0x154, /* Central int enable */
+	MVS_INT_STAT_SRS	= 0x158, /* SATA register set status */
+	MVS_INT_MASK_SRS	= 0x15C,
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
+	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
+	MVS_P4_INT_STAT		= 0x200, /* Port 4 interrupt status */
+	MVS_P4_INT_MASK		= 0x204, /* Port 4 interrupt enable mask */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
+	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
+
+	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
+	MVS_CMD_DATA		= 0x1BC, /* Command register port (data) */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
+	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
+	MVS_P4_CFG_ADDR		= 0x230, /* Port 4 config address */
+	MVS_P4_CFG_DATA		= 0x234, /* Port 4 config data */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
+	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
+	MVS_P4_VSR_ADDR		= 0x250, /* port 4 VSR addr */
+	MVS_P4_VSR_DATA		= 0x254, /* port 4 VSR data */
+};
+
+enum pci_cfg_registers {
+	PCR_PHY_CTL		= 0x40,
+	PCR_PHY_CTL2		= 0x90,
+	PCR_DEV_CTRL		= 0xE8,
+};
+
+/*  SAS/SATA Vendor Specific Port Registers */
+enum sas_sata_vsp_regs {
+	VSR_PHY_STAT		= 0x00, /* Phy Status */
+	VSR_PHY_MODE1		= 0x01, /* phy tx */
+	VSR_PHY_MODE2		= 0x02, /* tx scc */
+	VSR_PHY_MODE3		= 0x03, /* pll */
+	VSR_PHY_MODE4		= 0x04, /* VCO */
+	VSR_PHY_MODE5		= 0x05, /* Rx */
+	VSR_PHY_MODE6		= 0x06, /* CDR */
+	VSR_PHY_MODE7		= 0x07, /* Impedance */
+	VSR_PHY_MODE8		= 0x08, /* Voltage */
+	VSR_PHY_MODE9		= 0x09, /* Test */
+	VSR_PHY_MODE10		= 0x0A, /* Power */
+	VSR_PHY_MODE11		= 0x0B, /* Phy Mode */
+	VSR_PHY_VS0		= 0x0C, /* Vednor Specific 0 */
+	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
+};
+
+struct mvs_prd {
+	__le64			addr;		/* 64-bit buffer address */
+	__le32			reserved;
+	__le32			len;		/* 16-bit length */
+};
+
+#endif
diff --git a/drivers/scsi/mvsas/mv_chips.h b/drivers/scsi/mvsas/mv_chips.h
new file mode 100644
index 0000000..cf74b7a
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_chips.h
@@ -0,0 +1,118 @@
+#ifndef _MV_CHIPS_H_
+#define _MV_CHIPS_H_
+
+#define mr32(reg)	readl(regs + MVS_##reg)
+#define mw32(reg,val)	writel((val), regs + MVS_##reg)
+#define mw32_f(reg,val)	do {			\
+	writel((val), regs + MVS_##reg);	\
+	readl(regs + MVS_##reg);		\
+	} while (0)
+
+static inline u32 mvs_cr32(void __iomem *regs, u32 addr)
+{
+	mw32(CMD_ADDR, addr);
+	return mr32(CMD_DATA);
+}
+
+static inline void mvs_cw32(void __iomem *regs, u32 addr, u32 val)
+{
+	mw32(CMD_ADDR, addr);
+	mw32(CMD_DATA, val);
+}
+
+static inline u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
+{
+	void __iomem *regs = mvi->regs;
+	return (port < 4)?mr32(P0_SER_CTLSTAT + port * 4):
+		mr32(P4_SER_CTLSTAT + (port - 4) * 4);
+}
+
+static inline void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
+{
+	void __iomem *regs = mvi->regs;
+	if (port < 4)
+		mw32(P0_SER_CTLSTAT + port * 4, val);
+	else
+		mw32(P4_SER_CTLSTAT + (port - 4) * 4, val);
+}
+
+static inline u32 mvs_read_port(struct mvs_info *mvi, u32 off, u32 off2, u32 port)
+{
+	void __iomem *regs = mvi->regs + off;
+	void __iomem *regs2 = mvi->regs + off2;
+	return (port < 4)?readl(regs + port * 8):
+		readl(regs2 + (port - 4) * 8);
+}
+
+static inline void mvs_write_port(struct mvs_info *mvi, u32 off, u32 off2,
+				u32 port, u32 val)
+{
+	void __iomem *regs = mvi->regs + off;
+	void __iomem *regs2 = mvi->regs + off2;
+	if (port < 4)
+		writel(val, regs + port * 8);
+	else
+		writel(val, regs2 + (port - 4) * 8);
+}
+
+static inline u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_CFG_DATA,
+			MVS_P4_CFG_DATA, port);
+}
+
+static inline void mvs_write_port_cfg_data(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_CFG_DATA,
+			MVS_P4_CFG_DATA, port, val);
+}
+
+static inline void mvs_write_port_cfg_addr(struct mvs_info *mvi, u32 port, u32 addr)
+{
+	mvs_write_port(mvi, MVS_P0_CFG_ADDR,
+			MVS_P4_CFG_ADDR, port, addr);
+}
+
+static inline u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_VSR_DATA,
+			MVS_P4_VSR_DATA, port);
+}
+
+static inline void mvs_write_port_vsr_data(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_VSR_DATA,
+			MVS_P4_VSR_DATA, port, val);
+}
+
+static inline void mvs_write_port_vsr_addr(struct mvs_info *mvi, u32 port, u32 addr)
+{
+	mvs_write_port(mvi, MVS_P0_VSR_ADDR,
+			MVS_P4_VSR_ADDR, port, addr);
+}
+
+static inline u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_INT_STAT,
+			MVS_P4_INT_STAT, port);
+}
+
+static inline void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_INT_STAT,
+			MVS_P4_INT_STAT, port, val);
+}
+
+static inline u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
+{
+	return mvs_read_port(mvi, MVS_P0_INT_MASK,
+			MVS_P4_INT_MASK, port);
+}
+
+static inline void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val)
+{
+	mvs_write_port(mvi, MVS_P0_INT_MASK,
+			MVS_P4_INT_MASK, port, val);
+}
+
+#endif
diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h
new file mode 100644
index 0000000..d8e96a3
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_defs.h
@@ -0,0 +1,441 @@
+/*
+	mv_defs.h - Marvell 88SE6440 SAS/SATA support
+
+	Copyright 2007 Red Hat, Inc.
+	Copyright 2008 Marvell. <kewei@marvell.com>
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License as
+	published by the Free Software Foundation; either version 2,
+	or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty
+	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+	See the GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this program; see the file COPYING.	If not,
+	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
+	MA 02139, USA.
+
+ */
+
+#ifndef _MV_DEFS_H_
+#define _MV_DEFS_H_
+
+/* driver compile-time configuration */
+enum driver_configuration {
+	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
+	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
+					/* software requires power-of-2
+					   ring size */
+
+	MVS_SLOTS		= 512,	/* command slots */
+	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
+	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
+	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
+	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
+
+	MVS_RX_FIS_COUNT	= 17,	/* Optional rx'd FISs (max 17) */
+
+	MVS_QUEUE_SIZE		= 30,	/* Support Queue depth */
+	MVS_CAN_QUEUE		= MVS_SLOTS - 1,	/* SCSI Queue depth */
+};
+
+/* unchangeable hardware details */
+enum hardware_details {
+	MVS_MAX_PHYS		= 8,	/* max. possible phys */
+	MVS_MAX_PORTS		= 8,	/* max. possible ports */
+	MVS_RX_FISL_SZ		= 0x400 + (MVS_RX_FIS_COUNT * 0x100),
+};
+
+/* peripheral registers (BAR2) */
+enum peripheral_registers {
+	SPI_CTL			= 0x10,	/* EEPROM control */
+	SPI_CMD			= 0x14,	/* EEPROM command */
+	SPI_DATA		= 0x18, /* EEPROM data */
+};
+
+enum peripheral_register_bits {
+	TWSI_RDY		= (1U << 7),	/* EEPROM interface ready */
+	TWSI_RD			= (1U << 4),	/* EEPROM read access */
+
+	SPI_ADDR_MASK		= 0x3ffff,	/* bits 17:0 */
+};
+
+enum hw_register_bits {
+	/* MVS_GBL_CTL */
+	INT_EN			= (1U << 1),	/* Global int enable */
+	HBA_RST			= (1U << 0),	/* HBA reset */
+
+	/* MVS_GBL_INT_STAT */
+	INT_XOR			= (1U << 4),	/* XOR engine event */
+	INT_SAS_SATA		= (1U << 0),	/* SAS/SATA event */
+
+	/* MVS_GBL_PORT_TYPE */			/* shl for ports 1-3 */
+	SATA_TARGET		= (1U << 16),	/* port0 SATA target enable */
+	MODE_AUTO_DET_PORT7 = (1U << 15),	/* port0 SAS/SATA autodetect */
+	MODE_AUTO_DET_PORT6 = (1U << 14),
+	MODE_AUTO_DET_PORT5 = (1U << 13),
+	MODE_AUTO_DET_PORT4 = (1U << 12),
+	MODE_AUTO_DET_PORT3 = (1U << 11),
+	MODE_AUTO_DET_PORT2 = (1U << 10),
+	MODE_AUTO_DET_PORT1 = (1U << 9),
+	MODE_AUTO_DET_PORT0 = (1U << 8),
+	MODE_AUTO_DET_EN    =	MODE_AUTO_DET_PORT0 | MODE_AUTO_DET_PORT1 |
+				MODE_AUTO_DET_PORT2 | MODE_AUTO_DET_PORT3 |
+				MODE_AUTO_DET_PORT4 | MODE_AUTO_DET_PORT5 |
+				MODE_AUTO_DET_PORT6 | MODE_AUTO_DET_PORT7,
+	MODE_SAS_PORT7_MASK = (1U << 7),  /* port0 SAS(1), SATA(0) mode */
+	MODE_SAS_PORT6_MASK = (1U << 6),
+	MODE_SAS_PORT5_MASK = (1U << 5),
+	MODE_SAS_PORT4_MASK = (1U << 4),
+	MODE_SAS_PORT3_MASK = (1U << 3),
+	MODE_SAS_PORT2_MASK = (1U << 2),
+	MODE_SAS_PORT1_MASK = (1U << 1),
+	MODE_SAS_PORT0_MASK = (1U << 0),
+	MODE_SAS_SATA	=	MODE_SAS_PORT0_MASK | MODE_SAS_PORT1_MASK |
+				MODE_SAS_PORT2_MASK | MODE_SAS_PORT3_MASK |
+				MODE_SAS_PORT4_MASK | MODE_SAS_PORT5_MASK |
+				MODE_SAS_PORT6_MASK | MODE_SAS_PORT7_MASK,
+
+				/* SAS_MODE value may be
+				 * dictated (in hw) by values
+				 * of SATA_TARGET & AUTO_DET
+				 */
+
+	/* MVS_TX_CFG */
+	TX_EN			= (1U << 16),	/* Enable TX */
+	TX_RING_SZ_MASK		= 0xfff,	/* TX ring size, bits 11:0 */
+
+	/* MVS_RX_CFG */
+	RX_EN			= (1U << 16),	/* Enable RX */
+	RX_RING_SZ_MASK		= 0xfff,	/* RX ring size, bits 11:0 */
+
+	/* MVS_INT_COAL */
+	COAL_EN			= (1U << 16),	/* Enable int coalescing */
+
+	/* MVS_INT_STAT, MVS_INT_MASK */
+	CINT_I2C		= (1U << 31),	/* I2C event */
+	CINT_SW0		= (1U << 30),	/* software event 0 */
+	CINT_SW1		= (1U << 29),	/* software event 1 */
+	CINT_PRD_BC		= (1U << 28),	/* PRD BC err for read cmd */
+	CINT_DMA_PCIE		= (1U << 27),	/* DMA to PCIE timeout */
+	CINT_MEM		= (1U << 26),	/* int mem parity err */
+	CINT_I2C_SLAVE		= (1U << 25),	/* slave I2C event */
+	CINT_SRS		= (1U << 3),	/* SRS event */
+	CINT_CI_STOP		= (1U << 1),	/* cmd issue stopped */
+	CINT_DONE		= (1U << 0),	/* cmd completion */
+
+						/* shl for ports 1-3 */
+	CINT_PORT_STOPPED	= (1U << 16),	/* port0 stopped */
+	CINT_PORT		= (1U << 8),	/* port0 event */
+	CINT_PORT_MASK_OFFSET	= 8,
+	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
+
+	/* TX (delivery) ring bits */
+	TXQ_CMD_SHIFT		= 29,
+	TXQ_CMD_SSP		= 1,		/* SSP protocol */
+	TXQ_CMD_SMP		= 2,		/* SMP protocol */
+	TXQ_CMD_STP		= 3,		/* STP/SATA protocol */
+	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
+	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
+	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
+	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
+	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
+	TXQ_SRS_MASK		= 0x7f,
+	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
+	TXQ_PHY_MASK		= 0xff,
+	TXQ_SLOT_MASK		= 0xfff,	/* slot number */
+
+	/* RX (completion) ring bits */
+	RXQ_GOOD		= (1U << 23),	/* Response good */
+	RXQ_SLOT_RESET		= (1U << 21),	/* Slot reset complete */
+	RXQ_CMD_RX		= (1U << 20),	/* target cmd received */
+	RXQ_ATTN		= (1U << 19),	/* attention */
+	RXQ_RSP			= (1U << 18),	/* response frame xfer'd */
+	RXQ_ERR			= (1U << 17),	/* err info rec xfer'd */
+	RXQ_DONE		= (1U << 16),	/* cmd complete */
+	RXQ_SLOT_MASK		= 0xfff,	/* slot number */
+
+	/* mvs_cmd_hdr bits */
+	MCH_PRD_LEN_SHIFT	= 16,		/* 16-bit PRD table len */
+	MCH_SSP_FR_TYPE_SHIFT	= 13,		/* SSP frame type */
+
+						/* SSP initiator only */
+	MCH_SSP_FR_CMD		= 0x0,		/* COMMAND frame */
+
+						/* SSP initiator or target */
+	MCH_SSP_FR_TASK		= 0x1,		/* TASK frame */
+
+						/* SSP target only */
+	MCH_SSP_FR_XFER_RDY	= 0x4,		/* XFER_RDY frame */
+	MCH_SSP_FR_RESP		= 0x5,		/* RESPONSE frame */
+	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
+	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
+
+	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
+	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
+	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
+	MCH_RETRY		= (1U << 9),	/* tport layer retry (SSP) */
+	MCH_PROTECTION		= (1U << 8),	/* protection info rec (SSP) */
+	MCH_RESET		= (1U << 7),	/* Reset (STP/SATA) */
+	MCH_FPDMA		= (1U << 6),	/* First party DMA (STP/SATA) */
+	MCH_ATAPI		= (1U << 5),	/* ATAPI (STP/SATA) */
+	MCH_BIST		= (1U << 4),	/* BIST activate (STP/SATA) */
+	MCH_PMP_MASK		= 0xf,		/* PMP from cmd FIS (STP/SATA)*/
+
+	CCTL_RST		= (1U << 5),	/* port logic reset */
+
+						/* 0(LSB first), 1(MSB first) */
+	CCTL_ENDIAN_DATA	= (1U << 3),	/* PRD data */
+	CCTL_ENDIAN_RSP		= (1U << 2),	/* response frame */
+	CCTL_ENDIAN_OPEN	= (1U << 1),	/* open address frame */
+	CCTL_ENDIAN_CMD		= (1U << 0),	/* command table */
+
+	/* MVS_Px_SER_CTLSTAT (per-phy control) */
+	PHY_SSP_RST		= (1U << 3),	/* reset SSP link layer */
+	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
+	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
+	PHY_RST			= (1U << 0),	/* phy reset */
+	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
+	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
+			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
+	PHY_READY_MASK		= (1U << 20),
+
+	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
+	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
+	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
+	PHYEV_AN		= (1U << 18),	/* SATA async notification */
+	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
+	PHYEV_SIG_FIS		= (1U << 16),	/* signature FIS */
+	PHYEV_POOF		= (1U << 12),	/* phy ready from 1 -> 0 */
+	PHYEV_IU_BIG		= (1U << 11),	/* IU too long err */
+	PHYEV_IU_SMALL		= (1U << 10),	/* IU too short err */
+	PHYEV_UNK_TAG		= (1U << 9),	/* unknown tag */
+	PHYEV_BROAD_CH		= (1U << 8),	/* broadcast(CHANGE) */
+	PHYEV_COMWAKE		= (1U << 7),	/* COMWAKE rx'd */
+	PHYEV_PORT_SEL		= (1U << 6),	/* port selector present */
+	PHYEV_HARD_RST		= (1U << 5),	/* hard reset rx'd */
+	PHYEV_ID_TMOUT		= (1U << 4),	/* identify timeout */
+	PHYEV_ID_FAIL		= (1U << 3),	/* identify failed */
+	PHYEV_ID_DONE		= (1U << 2),	/* identify done */
+	PHYEV_HARD_RST_DONE	= (1U << 1),	/* hard reset done */
+	PHYEV_RDY_CH		= (1U << 0),	/* phy ready changed state */
+
+	/* MVS_PCS */
+	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
+	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
+	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6480 */
+	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
+	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
+	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
+	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
+	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
+	PCS_CMD_RST		= (1U << 1),	/* reset cmd issue */
+	PCS_CMD_EN		= (1U << 0),	/* enable cmd issue */
+
+	/* Port n Attached Device Info */
+	PORT_DEV_SSP_TRGT	= (1U << 19),
+	PORT_DEV_SMP_TRGT	= (1U << 18),
+	PORT_DEV_STP_TRGT	= (1U << 17),
+	PORT_DEV_SSP_INIT	= (1U << 11),
+	PORT_DEV_SMP_INIT	= (1U << 10),
+	PORT_DEV_STP_INIT	= (1U << 9),
+	PORT_PHY_ID_MASK	= (0xFFU << 24),
+	PORT_DEV_TRGT_MASK	= (0x7U << 17),
+	PORT_DEV_INIT_MASK	= (0x7U << 9),
+	PORT_DEV_TYPE_MASK	= (0x7U << 0),
+
+	/* Port n PHY Status */
+	PHY_RDY			= (1U << 2),
+	PHY_DW_SYNC		= (1U << 1),
+	PHY_OOB_DTCTD		= (1U << 0),
+
+	/* VSR */
+	/* PHYMODE 6 (CDB) */
+	PHY_MODE6_LATECLK	= (1U << 29),	/* Lock Clock */
+	PHY_MODE6_DTL_SPEED	= (1U << 27),	/* Digital Loop Speed */
+	PHY_MODE6_FC_ORDER	= (1U << 26),	/* Fibre Channel Mode Order*/
+	PHY_MODE6_MUCNT_EN	= (1U << 24),	/* u Count Enable */
+	PHY_MODE6_SEL_MUCNT_LEN	= (1U << 22),	/* Training Length Select */
+	PHY_MODE6_SELMUPI	= (1U << 20),	/* Phase Multi Select (init) */
+	PHY_MODE6_SELMUPF	= (1U << 18),	/* Phase Multi Select (final) */
+	PHY_MODE6_SELMUFF	= (1U << 16),	/* Freq Loop Multi Sel(final) */
+	PHY_MODE6_SELMUFI	= (1U << 14),	/* Freq Loop Multi Sel(init) */
+	PHY_MODE6_FREEZE_LOOP	= (1U << 12),	/* Freeze Rx CDR Loop */
+	PHY_MODE6_INT_RXFOFFS	= (1U << 3),	/* Rx CDR Freq Loop Enable */
+	PHY_MODE6_FRC_RXFOFFS	= (1U << 2),	/* Initial Rx CDR Offset */
+	PHY_MODE6_STAU_0D8	= (1U << 1),	/* Rx CDR Freq Loop Saturate */
+	PHY_MODE6_RXSAT_DIS	= (1U << 0),	/* Saturate Ctl */
+};
+
+/* SAS/SATA configuration port registers, aka phy registers */
+enum sas_sata_config_port_regs {
+	PHYR_IDENTIFY		= 0x00,	/* info for IDENTIFY frame */
+	PHYR_ADDR_LO		= 0x04,	/* my SAS address (low) */
+	PHYR_ADDR_HI		= 0x08,	/* my SAS address (high) */
+	PHYR_ATT_DEV_INFO	= 0x0C,	/* attached device info */
+	PHYR_ATT_ADDR_LO	= 0x10,	/* attached dev SAS addr (low) */
+	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
+	PHYR_SATA_CTL		= 0x18,	/* SATA control */
+	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
+	PHYR_SATA_SIG0		= 0x20,	/*port SATA signature FIS(Byte 0-3) */
+	PHYR_SATA_SIG1		= 0x24,	/*port SATA signature FIS(Byte 4-7) */
+	PHYR_SATA_SIG2		= 0x28,	/*port SATA signature FIS(Byte 8-11) */
+	PHYR_SATA_SIG3		= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
+	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
+	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
+	PHYR_WIDE_PORT		= 0x38,	/* wide port participating */
+	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
+	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
+	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
+};
+
+enum mvs_info_flags {
+	MVF_MSI			= (1U << 0),	/* MSI is enabled */
+	MVF_PHY_PWR_FIX		= (1U << 1),	/* bug workaround */
+};
+
+enum sas_cmd_port_registers {
+	CMD_CMRST_OOB_DET	= 0x100, /* COMRESET OOB detect register */
+	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
+	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
+	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
+	CMD_OOB_SPACE		= 0x110, /* OOB space control register */
+	CMD_OOB_BURST		= 0x114, /* OOB burst control register */
+	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
+	CMD_PHY_CONFIG0		= 0x11c, /* PHY config register 0 */
+	CMD_PHY_CONFIG1		= 0x120, /* PHY config register 1 */
+	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
+	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
+	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
+	CMD_SAS_CTL3		= 0x130, /* SAS control register 3 */
+	CMD_ID_TEST		= 0x134, /* ID test register */
+	CMD_PL_TIMER		= 0x138, /* PL timer register */
+	CMD_WD_TIMER		= 0x13c, /* WD timer register */
+	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
+	CMD_APP_MEM_CTL		= 0x144, /* Application Memory Control */
+	CMD_XOR_MEM_CTL		= 0x148, /* XOR Block Memory Control */
+	CMD_DMA_MEM_CTL		= 0x14c, /* DMA Block Memory Control */
+	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
+	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
+	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
+	CMD_SATA_PORT_MEM_CTL1	= 0x15c, /* SATA Port Memory Control 1 */
+	CMD_XOR_MEM_BIST_CTL	= 0x160, /* XOR Memory BIST Control */
+	CMD_XOR_MEM_BIST_STAT	= 0x164, /* XOR Memroy BIST Status */
+	CMD_DMA_MEM_BIST_CTL	= 0x168, /* DMA Memory BIST Control */
+	CMD_DMA_MEM_BIST_STAT	= 0x16c, /* DMA Memory BIST Status */
+	CMD_PORT_MEM_BIST_CTL	= 0x170, /* Port Memory BIST Control */
+	CMD_PORT_MEM_BIST_STAT0 = 0x174, /* Port Memory BIST Status 0 */
+	CMD_PORT_MEM_BIST_STAT1 = 0x178, /* Port Memory BIST Status 1 */
+	CMD_STP_MEM_BIST_CTL	= 0x17c, /* STP Memory BIST Control */
+	CMD_STP_MEM_BIST_STAT0	= 0x180, /* STP Memory BIST Status 0 */
+	CMD_STP_MEM_BIST_STAT1	= 0x184, /* STP Memory BIST Status 1 */
+	CMD_RESET_COUNT		= 0x188, /* Reset Count */
+	CMD_MONTR_DATA_SEL	= 0x18C, /* Monitor Data/Select */
+	CMD_PLL_PHY_CONFIG	= 0x190, /* PLL/PHY Configuration */
+	CMD_PHY_CTL		= 0x194, /* PHY Control and Status */
+	CMD_PHY_TEST_COUNT0	= 0x198, /* Phy Test Count 0 */
+	CMD_PHY_TEST_COUNT1	= 0x19C, /* Phy Test Count 1 */
+	CMD_PHY_TEST_COUNT2	= 0x1A0, /* Phy Test Count 2 */
+	CMD_APP_ERR_CONFIG	= 0x1A4, /* Application Error Configuration */
+	CMD_PND_FIFO_CTL0	= 0x1A8, /* Pending FIFO Control 0 */
+	CMD_HOST_CTL		= 0x1AC, /* Host Control Status */
+	CMD_HOST_WR_DATA	= 0x1B0, /* Host Write Data */
+	CMD_HOST_RD_DATA	= 0x1B4, /* Host Read Data */
+	CMD_PHY_MODE_21		= 0x1B8, /* Phy Mode 21 */
+	CMD_SL_MODE0		= 0x1BC, /* SL Mode 0 */
+	CMD_SL_MODE1		= 0x1C0, /* SL Mode 1 */
+	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
+};
+
+enum pci_cfg_register_bits {
+	PCTL_PWR_ON	= (0xFU << 24),
+	PCTL_OFF	= (0xFU << 12),
+	PRD_REQ_SIZE	= (0x4000),
+	PRD_REQ_MASK	= (0x00007000),
+};
+
+enum nvram_layout_offsets {
+	NVR_SIG		= 0x00,		/* 0xAA, 0x55 */
+	NVR_SAS_ADDR	= 0x02,		/* 8-byte SAS address */
+};
+
+enum chip_flavors {
+	chip_6320,
+	chip_6440,
+	chip_6480,
+};
+
+enum port_type {
+	PORT_TYPE_SAS	=  (1L << 1),
+	PORT_TYPE_SATA	=  (1L << 0),
+};
+
+/* Command Table Format */
+enum ct_format {
+	/* SSP */
+	SSP_F_H		=  0x00,
+	SSP_F_IU	=  0x18,
+	SSP_F_MAX	=  0x4D,
+	/* STP */
+	STP_CMD_FIS	=  0x00,
+	STP_ATAPI_CMD	=  0x40,
+	STP_F_MAX	=  0x10,
+	/* SMP */
+	SMP_F_T		=  0x00,
+	SMP_F_DEP	=  0x01,
+	SMP_F_MAX	=  0x101,
+};
+
+enum status_buffer {
+	SB_EIR_OFF	=  0x00,	/* Error Information Record */
+	SB_RFB_OFF	=  0x08,	/* Response Frame Buffer */
+	SB_RFB_MAX	=  0x400,	/* RFB size*/
+};
+
+enum error_info_rec {
+	CMD_ISS_STPD	= (1U << 31),	/* Cmd Issue Stopped */
+	CMD_PI_ERR	= (1U << 30),	/* Protection info error.  see flags2 */
+	RSP_OVER	= (1U << 29),	/* rsp buffer overflow */
+	RETRY_LIM	= (1U << 28),	/* FIS/frame retry limit exceeded */
+	UNK_FIS 	= (1U << 27),	/* unknown FIS */
+	DMA_TERM	= (1U << 26),	/* DMA terminate primitive rx'd */
+	SYNC_ERR	= (1U << 25),	/* SYNC rx'd during frame xmit */
+	TFILE_ERR	= (1U << 24),	/* SATA taskfile Error bit set */
+	R_ERR		= (1U << 23),	/* SATA returned R_ERR prim */
+	RD_OFS		= (1U << 20),	/* Read DATA frame invalid offset */
+	XFER_RDY_OFS	= (1U << 19),	/* XFER_RDY offset error */
+	UNEXP_XFER_RDY	= (1U << 18),	/* unexpected XFER_RDY error */
+	DATA_OVER_UNDER = (1U << 16),	/* data overflow/underflow */
+	INTERLOCK	= (1U << 15),	/* interlock error */
+	NAK		= (1U << 14),	/* NAK rx'd */
+	ACK_NAK_TO	= (1U << 13),	/* ACK/NAK timeout */
+	CXN_CLOSED	= (1U << 12),	/* cxn closed w/out ack/nak */
+	OPEN_TO 	= (1U << 11),	/* I_T nexus lost, open cxn timeout */
+	PATH_BLOCKED	= (1U << 10),	/* I_T nexus lost, pathway blocked */
+	NO_DEST 	= (1U << 9),	/* I_T nexus lost, no destination */
+	STP_RES_BSY	= (1U << 8),	/* STP resources busy */
+	BREAK		= (1U << 7),	/* break received */
+	BAD_DEST	= (1U << 6),	/* bad destination */
+	BAD_PROTO	= (1U << 5),	/* protocol not supported */
+	BAD_RATE	= (1U << 4),	/* cxn rate not supported */
+	WRONG_DEST	= (1U << 3),	/* wrong destination error */
+	CREDIT_TO	= (1U << 2),	/* credit timeout */
+	WDOG_TO 	= (1U << 1),	/* watchdog timeout */
+	BUF_PAR 	= (1U << 0),	/* buffer parity error */
+};
+
+enum error_info_rec_2 {
+	SLOT_BSY_ERR	= (1U << 31),	/* Slot Busy Error */
+	GRD_CHK_ERR	= (1U << 14),	/* Guard Check Error */
+	APP_CHK_ERR	= (1U << 13),	/* Application Check error */
+	REF_CHK_ERR	= (1U << 12),	/* Reference Check Error */
+	USR_BLK_NM	= (1U << 0),	/* User Block Number */
+};
+
+#endif
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
new file mode 100644
index 0000000..258a1a9
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -0,0 +1,524 @@
+/*
+	mv_init.c - Marvell 88SE6440 SAS/SATA init support
+
+	Copyright 2007 Red Hat, Inc.
+	Copyright 2008 Marvell. <kewei@marvell.com>
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License as
+	published by the Free Software Foundation; either version 2,
+	or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty
+	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+	See the GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this program; see the file COPYING.	If not,
+	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
+	MA 02139, USA.
+
+ */
+
+#include "mv_sas.h"
+#include "mv_64xx.h"
+#include "mv_chips.h"
+
+static struct scsi_transport_template *mvs_stt;
+
+static const struct mvs_chip_info mvs_chips[] = {
+	[chip_6320] =		{ 2, 16, 9  },
+	[chip_6440] =		{ 4, 16, 9  },
+	[chip_6480] =		{ 8, 32, 10 },
+};
+
+static struct scsi_host_template mvs_sht = {
+	.module			= THIS_MODULE,
+	.name			= DRV_NAME,
+	.queuecommand		= sas_queuecommand,
+	.target_alloc		= sas_target_alloc,
+	.slave_configure	= mvs_slave_configure,
+	.slave_destroy		= sas_slave_destroy,
+	.scan_finished		= mvs_scan_finished,
+	.scan_start		= mvs_scan_start,
+	.change_queue_depth	= sas_change_queue_depth,
+	.change_queue_type	= sas_change_queue_type,
+	.bios_param		= sas_bios_param,
+	.can_queue		= 1,
+	.cmd_per_lun		= 1,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.eh_device_reset_handler	= sas_eh_device_reset_handler,
+	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
+	.slave_alloc		= sas_slave_alloc,
+	.target_destroy		= sas_target_destroy,
+	.ioctl			= sas_ioctl,
+};
+
+static struct sas_domain_function_template mvs_transport_ops = {
+	.lldd_execute_task	= mvs_task_exec,
+	.lldd_control_phy	= mvs_phy_control,
+	.lldd_abort_task	= mvs_task_abort,
+	.lldd_port_formed	= mvs_port_formed,
+	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
+};
+
+static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
+{
+	struct mvs_phy *phy = &mvi->phy[phy_id];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
+	sas_phy->class = SAS;
+	sas_phy->iproto = SAS_PROTOCOL_ALL;
+	sas_phy->tproto = 0;
+	sas_phy->type = PHY_TYPE_PHYSICAL;
+	sas_phy->role = PHY_ROLE_INITIATOR;
+	sas_phy->oob_mode = OOB_NOT_CONNECTED;
+	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
+
+	sas_phy->id = phy_id;
+	sas_phy->sas_addr = &mvi->sas_addr[0];
+	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
+	sas_phy->ha = &mvi->sas;
+	sas_phy->lldd_phy = phy;
+}
+
+static void mvs_free(struct mvs_info *mvi)
+{
+	int i;
+
+	if (!mvi)
+		return;
+
+	for (i = 0; i < MVS_SLOTS; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
+
+		if (slot->buf)
+			dma_free_coherent(&mvi->pdev->dev, MVS_SLOT_BUF_SZ,
+					  slot->buf, slot->buf_dma);
+	}
+
+	if (mvi->tx)
+		dma_free_coherent(&mvi->pdev->dev,
+				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
+				  mvi->tx, mvi->tx_dma);
+	if (mvi->rx_fis)
+		dma_free_coherent(&mvi->pdev->dev, MVS_RX_FISL_SZ,
+				  mvi->rx_fis, mvi->rx_fis_dma);
+	if (mvi->rx)
+		dma_free_coherent(&mvi->pdev->dev,
+				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
+				  mvi->rx, mvi->rx_dma);
+	if (mvi->slot)
+		dma_free_coherent(&mvi->pdev->dev,
+				  sizeof(*mvi->slot) * MVS_SLOTS,
+				  mvi->slot, mvi->slot_dma);
+#ifdef MVS_ENABLE_PERI
+	if (mvi->peri_regs)
+		iounmap(mvi->peri_regs);
+#endif
+	if (mvi->regs)
+		iounmap(mvi->regs);
+	if (mvi->shost)
+		scsi_host_put(mvi->shost);
+	kfree(mvi->sas.sas_port);
+	kfree(mvi->sas.sas_phy);
+	kfree(mvi);
+}
+
+#ifdef MVS_USE_TASKLET
+static void mvs_tasklet(unsigned long data)
+{
+	struct mvs_info *mvi = (struct mvs_info *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mvi->lock, flags);
+
+#ifdef MVS_DISABLE_MSI
+	mvs_int_full(mvi);
+#else
+	mvs_int_rx(mvi, true);
+#endif
+	spin_unlock_irqrestore(&mvi->lock, flags);
+}
+#endif
+
+static irqreturn_t mvs_interrupt(int irq, void *opaque)
+{
+	struct mvs_info *mvi = opaque;
+	void __iomem *regs = mvi->regs;
+	u32 stat;
+
+	stat = mr32(GBL_INT_STAT);
+
+	if (stat == 0 || stat == 0xffffffff)
+		return IRQ_NONE;
+
+	/* clear CMD_CMPLT ASAP */
+	mw32_f(INT_STAT, CINT_DONE);
+
+#ifndef MVS_USE_TASKLET
+	spin_lock(&mvi->lock);
+
+	mvs_int_full(mvi);
+
+	spin_unlock(&mvi->lock);
+#else
+	tasklet_schedule(&mvi->tasklet);
+#endif
+	return IRQ_HANDLED;
+}
+
+static struct mvs_info *__devinit mvs_alloc(struct pci_dev *pdev,
+					    const struct pci_device_id *ent)
+{
+	struct mvs_info *mvi;
+	unsigned long res_start, res_len, res_flag;
+	struct asd_sas_phy **arr_phy;
+	struct asd_sas_port **arr_port;
+	const struct mvs_chip_info *chip = &mvs_chips[ent->driver_data];
+	int i;
+
+	/*
+	 * alloc and init our per-HBA mvs_info struct
+	 */
+
+	mvi = kzalloc(sizeof(*mvi), GFP_KERNEL);
+	if (!mvi)
+		return NULL;
+
+	spin_lock_init(&mvi->lock);
+#ifdef MVS_USE_TASKLET
+	tasklet_init(&mvi->tasklet, mvs_tasklet, (unsigned long)mvi);
+#endif
+	mvi->pdev = pdev;
+	mvi->chip = chip;
+
+	if (pdev->device == 0x6440 && pdev->revision == 0)
+		mvi->flags |= MVF_PHY_PWR_FIX;
+
+	/*
+	 * alloc and init SCSI, SAS glue
+	 */
+
+	mvi->shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
+	if (!mvi->shost)
+		goto err_out;
+
+	arr_phy = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
+	arr_port = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
+	if (!arr_phy || !arr_port)
+		goto err_out;
+
+	for (i = 0; i < MVS_MAX_PHYS; i++) {
+		mvs_phy_init(mvi, i);
+		arr_phy[i] = &mvi->phy[i].sas_phy;
+		arr_port[i] = &mvi->port[i].sas_port;
+		mvi->port[i].taskfileset = MVS_ID_NOT_MAPPED;
+		mvi->port[i].wide_port_phymap = 0;
+		mvi->port[i].port_attached = 0;
+		INIT_LIST_HEAD(&mvi->port[i].list);
+	}
+
+	SHOST_TO_SAS_HA(mvi->shost) = &mvi->sas;
+	mvi->shost->transportt = mvs_stt;
+	mvi->shost->max_id = 21;
+	mvi->shost->max_lun = ~0;
+	mvi->shost->max_channel = 0;
+	mvi->shost->max_cmd_len = 16;
+
+	mvi->sas.sas_ha_name = DRV_NAME;
+	mvi->sas.dev = &pdev->dev;
+	mvi->sas.lldd_module = THIS_MODULE;
+	mvi->sas.sas_addr = &mvi->sas_addr[0];
+	mvi->sas.sas_phy = arr_phy;
+	mvi->sas.sas_port = arr_port;
+	mvi->sas.num_phys = chip->n_phy;
+	mvi->sas.lldd_max_execute_num = 1;
+	mvi->sas.lldd_queue_size = MVS_QUEUE_SIZE;
+	mvi->shost->can_queue = MVS_CAN_QUEUE;
+	mvi->shost->cmd_per_lun = MVS_SLOTS / mvi->sas.num_phys;
+	mvi->sas.lldd_ha = mvi;
+	mvi->sas.core.shost = mvi->shost;
+
+	mvs_tag_init(mvi);
+
+	/*
+	 * ioremap main and peripheral registers
+	 */
+
+#ifdef MVS_ENABLE_PERI
+	res_start = pci_resource_start(pdev, 2);
+	res_len = pci_resource_len(pdev, 2);
+	if (!res_start || !res_len)
+		goto err_out;
+
+	mvi->peri_regs = ioremap_nocache(res_start, res_len);
+	if (!mvi->peri_regs)
+		goto err_out;
+#endif
+
+	res_start = pci_resource_start(pdev, 4);
+	res_len = pci_resource_len(pdev, 4);
+	if (!res_start || !res_len)
+		goto err_out;
+
+	res_flag = pci_resource_flags(pdev, 4);
+	if (res_flag & IORESOURCE_CACHEABLE)
+		mvi->regs = ioremap(res_start, res_len);
+	else
+		mvi->regs = ioremap_nocache(res_start, res_len);
+
+	if (!mvi->regs)
+		goto err_out;
+
+	/*
+	 * alloc and init our DMA areas
+	 */
+
+	mvi->tx = dma_alloc_coherent(&pdev->dev,
+				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
+				     &mvi->tx_dma, GFP_KERNEL);
+	if (!mvi->tx)
+		goto err_out;
+	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
+
+	mvi->rx_fis = dma_alloc_coherent(&pdev->dev, MVS_RX_FISL_SZ,
+					 &mvi->rx_fis_dma, GFP_KERNEL);
+	if (!mvi->rx_fis)
+		goto err_out;
+	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
+
+	mvi->rx = dma_alloc_coherent(&pdev->dev,
+				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
+				     &mvi->rx_dma, GFP_KERNEL);
+	if (!mvi->rx)
+		goto err_out;
+	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
+
+	mvi->rx[0] = cpu_to_le32(0xfff);
+	mvi->rx_cons = 0xfff;
+
+	mvi->slot = dma_alloc_coherent(&pdev->dev,
+				       sizeof(*mvi->slot) * MVS_SLOTS,
+				       &mvi->slot_dma, GFP_KERNEL);
+	if (!mvi->slot)
+		goto err_out;
+	memset(mvi->slot, 0, sizeof(*mvi->slot) * MVS_SLOTS);
+
+	for (i = 0; i < MVS_SLOTS; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
+
+		slot->buf = dma_alloc_coherent(&pdev->dev, MVS_SLOT_BUF_SZ,
+					       &slot->buf_dma, GFP_KERNEL);
+		if (!slot->buf)
+			goto err_out;
+		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+	}
+
+	/* finally, read NVRAM to get our SAS address */
+	if (mvs_nvram_read(mvi, NVR_SAS_ADDR, &mvi->sas_addr, 8))
+		goto err_out;
+	return mvi;
+
+err_out:
+	mvs_free(mvi);
+	return NULL;
+}
+
+/* move to PCI layer or libata core? */
+static int pci_go_64(struct pci_dev *pdev)
+{
+	int rc;
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+		if (rc) {
+			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+			if (rc) {
+				dev_printk(KERN_ERR, &pdev->dev,
+					   "64-bit DMA enable failed\n");
+				return rc;
+			}
+		}
+	} else {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				   "32-bit DMA enable failed\n");
+			return rc;
+		}
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_printk(KERN_ERR, &pdev->dev,
+				   "32-bit consistent DMA enable failed\n");
+			return rc;
+		}
+	}
+
+	return rc;
+}
+
+static int __devinit mvs_pci_init(struct pci_dev *pdev,
+				  const struct pci_device_id *ent)
+{
+	int rc;
+	struct mvs_info *mvi;
+	irq_handler_t irq_handler = mvs_interrupt;
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	pci_set_master(pdev);
+
+	rc = pci_request_regions(pdev, DRV_NAME);
+	if (rc)
+		goto err_out_disable;
+
+	rc = pci_go_64(pdev);
+	if (rc)
+		goto err_out_regions;
+
+	mvi = mvs_alloc(pdev, ent);
+	if (!mvi) {
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+
+	rc = mvs_hw_init(mvi);
+	if (rc)
+		goto err_out_mvi;
+
+#ifndef MVS_DISABLE_MSI
+	if (!pci_enable_msi(pdev)) {
+		u32 tmp;
+		void __iomem *regs = mvi->regs;
+		mvi->flags |= MVF_MSI;
+		irq_handler = mvs_msi_interrupt;
+		tmp = mr32(PCS);
+		mw32(PCS, tmp | PCS_SELF_CLEAR);
+	}
+#endif
+
+	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, mvi);
+	if (rc)
+		goto err_out_msi;
+
+	rc = scsi_add_host(mvi->shost, &pdev->dev);
+	if (rc)
+		goto err_out_irq;
+
+	rc = sas_register_ha(&mvi->sas);
+	if (rc)
+		goto err_out_shost;
+
+	pci_set_drvdata(pdev, mvi);
+
+	mvs_print_info(mvi);
+
+	mvs_hba_interrupt_enable(mvi);
+
+	scsi_scan_host(mvi->shost);
+
+	return 0;
+
+err_out_shost:
+	scsi_remove_host(mvi->shost);
+err_out_irq:
+	free_irq(pdev->irq, mvi);
+err_out_msi:
+	if (mvi->flags |= MVF_MSI)
+		pci_disable_msi(pdev);
+err_out_mvi:
+	mvs_free(mvi);
+err_out_regions:
+	pci_release_regions(pdev);
+err_out_disable:
+	pci_disable_device(pdev);
+	return rc;
+}
+
+static void __devexit mvs_pci_remove(struct pci_dev *pdev)
+{
+	struct mvs_info *mvi = pci_get_drvdata(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+
+	if (mvi) {
+		sas_unregister_ha(&mvi->sas);
+		mvs_hba_interrupt_disable(mvi);
+		sas_remove_host(mvi->shost);
+		scsi_remove_host(mvi->shost);
+
+		free_irq(pdev->irq, mvi);
+		if (mvi->flags & MVF_MSI)
+			pci_disable_msi(pdev);
+		mvs_free(mvi);
+		pci_release_regions(pdev);
+	}
+	pci_disable_device(pdev);
+}
+
+static struct pci_device_id __devinitdata mvs_pci_table[] = {
+	{ PCI_VDEVICE(MARVELL, 0x6320), chip_6320 },
+	{ PCI_VDEVICE(MARVELL, 0x6340), chip_6440 },
+	{
+		.vendor 	= PCI_VENDOR_ID_MARVELL,
+		.device 	= 0x6440,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= 0x6480,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= chip_6480,
+	},
+	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
+	{ PCI_VDEVICE(MARVELL, 0x6480), chip_6480 },
+
+	{ }	/* terminate list */
+};
+
+static struct pci_driver mvs_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= mvs_pci_table,
+	.probe		= mvs_pci_init,
+	.remove		= __devexit_p(mvs_pci_remove),
+};
+
+static int __init mvs_init(void)
+{
+	int rc;
+
+	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
+	if (!mvs_stt)
+		return -ENOMEM;
+
+	rc = pci_register_driver(&mvs_pci_driver);
+	if (rc)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	sas_release_transport(mvs_stt);
+	return rc;
+}
+
+static void __exit mvs_exit(void)
+{
+	pci_unregister_driver(&mvs_pci_driver);
+	sas_release_transport(mvs_stt);
+}
+
+module_init(mvs_init);
+module_exit(mvs_exit);
+
+MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
+MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, mvs_pci_table);
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index e4acebd..6a583c1 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1,5 +1,5 @@
 /*
-	mvsas.c - Marvell 88SE6440 SAS/SATA support
+	mv_sas.c - Marvell 88SE6440 SAS/SATA support
 
 	Copyright 2007 Red Hat, Inc.
 	Copyright 2008 Marvell. <kewei@marvell.com>
@@ -28,34 +28,9 @@
 
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/ctype.h>
-#include <scsi/libsas.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/sas_ata.h>
-#include <asm/io.h>
-
-#define DRV_NAME	"mvsas"
-#define DRV_VERSION	"0.5.2"
-#define _MV_DUMP	0
-#define MVS_DISABLE_NVRAM
-#define MVS_DISABLE_MSI
-
-#define mr32(reg)	readl(regs + MVS_##reg)
-#define mw32(reg,val)	writel((val), regs + MVS_##reg)
-#define mw32_f(reg,val)	do {			\
-	writel((val), regs + MVS_##reg);	\
-	readl(regs + MVS_##reg);		\
-	} while (0)
-
-#define MVS_ID_NOT_MAPPED	0x7f
-#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
+#include "mv_sas.h"
+#include "mv_64xx.h"
+#include "mv_chips.h"
 
 /* offset for D2H FIS in the Received FIS List Structure */
 #define SATA_RECEIVED_D2H_FIS(reg_set)	\
@@ -65,670 +40,70 @@
 #define UNASSOC_D2H_FIS(id)		\
 	((void *) mvi->rx_fis + 0x100 * id)
 
-#define for_each_phy(__lseq_mask, __mc, __lseq, __rest)			\
-	for ((__mc) = (__lseq_mask), (__lseq) = 0;			\
-					(__mc) != 0 && __rest;		\
-					(++__lseq), (__mc) >>= 1)
-
-/* driver compile-time configuration */
-enum driver_configuration {
-	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
-	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
-					/* software requires power-of-2
-					   ring size */
-
-	MVS_SLOTS		= 512,	/* command slots */
-	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
-	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
-	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
-	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
-
-	MVS_RX_FIS_COUNT	= 17,	/* Optional rx'd FISs (max 17) */
-
-	MVS_QUEUE_SIZE		= 30,	/* Support Queue depth */
-	MVS_CAN_QUEUE		= MVS_SLOTS - 1,	/* SCSI Queue depth */
-};
-
-/* unchangeable hardware details */
-enum hardware_details {
-	MVS_MAX_PHYS		= 8,	/* max. possible phys */
-	MVS_MAX_PORTS		= 8,	/* max. possible ports */
-	MVS_RX_FISL_SZ		= 0x400 + (MVS_RX_FIS_COUNT * 0x100),
-};
-
-/* peripheral registers (BAR2) */
-enum peripheral_registers {
-	SPI_CTL			= 0x10,	/* EEPROM control */
-	SPI_CMD			= 0x14,	/* EEPROM command */
-	SPI_DATA		= 0x18, /* EEPROM data */
-};
-
-enum peripheral_register_bits {
-	TWSI_RDY		= (1U << 7),	/* EEPROM interface ready */
-	TWSI_RD			= (1U << 4),	/* EEPROM read access */
-
-	SPI_ADDR_MASK		= 0x3ffff,	/* bits 17:0 */
-};
-
-/* enhanced mode registers (BAR4) */
-enum hw_registers {
-	MVS_GBL_CTL		= 0x04,  /* global control */
-	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
-	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
-	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
-
-	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
-	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
-	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
-	MVS_CMD_LIST_HI		= 0x10C,
-	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
-	MVS_RX_FIS_HI		= 0x114,
-
-	MVS_TX_CFG		= 0x120, /* TX configuration */
-	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
-	MVS_TX_HI		= 0x128,
-
-	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
-	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
-	MVS_RX_CFG		= 0x134, /* RX configuration */
-	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
-	MVS_RX_HI		= 0x13C,
-	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
-
-	MVS_INT_COAL		= 0x148, /* Int coalescing config */
-	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
-	MVS_INT_STAT		= 0x150, /* Central int status */
-	MVS_INT_MASK		= 0x154, /* Central int enable */
-	MVS_INT_STAT_SRS	= 0x158, /* SATA register set status */
-	MVS_INT_MASK_SRS	= 0x15C,
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
-	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
-	MVS_P4_INT_STAT		= 0x200, /* Port 4 interrupt status */
-	MVS_P4_INT_MASK		= 0x204, /* Port 4 interrupt enable mask */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
-	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
-
-	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
-	MVS_CMD_DATA		= 0x1BC, /* Command register port (data) */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
-	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
-	MVS_P4_CFG_ADDR		= 0x230, /* Port 4 config address */
-	MVS_P4_CFG_DATA		= 0x234, /* Port 4 config data */
-
-					 /* ports 1-3 follow after this */
-	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
-	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
-	MVS_P4_VSR_ADDR		= 0x250, /* port 4 VSR addr */
-	MVS_P4_VSR_DATA		= 0x254, /* port 4 VSR data */
-};
-
-enum hw_register_bits {
-	/* MVS_GBL_CTL */
-	INT_EN			= (1U << 1),	/* Global int enable */
-	HBA_RST			= (1U << 0),	/* HBA reset */
-
-	/* MVS_GBL_INT_STAT */
-	INT_XOR			= (1U << 4),	/* XOR engine event */
-	INT_SAS_SATA		= (1U << 0),	/* SAS/SATA event */
-
-	/* MVS_GBL_PORT_TYPE */			/* shl for ports 1-3 */
-	SATA_TARGET		= (1U << 16),	/* port0 SATA target enable */
-	MODE_AUTO_DET_PORT7 = (1U << 15),	/* port0 SAS/SATA autodetect */
-	MODE_AUTO_DET_PORT6 = (1U << 14),
-	MODE_AUTO_DET_PORT5 = (1U << 13),
-	MODE_AUTO_DET_PORT4 = (1U << 12),
-	MODE_AUTO_DET_PORT3 = (1U << 11),
-	MODE_AUTO_DET_PORT2 = (1U << 10),
-	MODE_AUTO_DET_PORT1 = (1U << 9),
-	MODE_AUTO_DET_PORT0 = (1U << 8),
-	MODE_AUTO_DET_EN    =	MODE_AUTO_DET_PORT0 | MODE_AUTO_DET_PORT1 |
-				MODE_AUTO_DET_PORT2 | MODE_AUTO_DET_PORT3 |
-				MODE_AUTO_DET_PORT4 | MODE_AUTO_DET_PORT5 |
-				MODE_AUTO_DET_PORT6 | MODE_AUTO_DET_PORT7,
-	MODE_SAS_PORT7_MASK = (1U << 7),  /* port0 SAS(1), SATA(0) mode */
-	MODE_SAS_PORT6_MASK = (1U << 6),
-	MODE_SAS_PORT5_MASK = (1U << 5),
-	MODE_SAS_PORT4_MASK = (1U << 4),
-	MODE_SAS_PORT3_MASK = (1U << 3),
-	MODE_SAS_PORT2_MASK = (1U << 2),
-	MODE_SAS_PORT1_MASK = (1U << 1),
-	MODE_SAS_PORT0_MASK = (1U << 0),
-	MODE_SAS_SATA	=	MODE_SAS_PORT0_MASK | MODE_SAS_PORT1_MASK |
-				MODE_SAS_PORT2_MASK | MODE_SAS_PORT3_MASK |
-				MODE_SAS_PORT4_MASK | MODE_SAS_PORT5_MASK |
-				MODE_SAS_PORT6_MASK | MODE_SAS_PORT7_MASK,
-
-				/* SAS_MODE value may be
-				 * dictated (in hw) by values
-				 * of SATA_TARGET & AUTO_DET
-				 */
-
-	/* MVS_TX_CFG */
-	TX_EN			= (1U << 16),	/* Enable TX */
-	TX_RING_SZ_MASK		= 0xfff,	/* TX ring size, bits 11:0 */
-
-	/* MVS_RX_CFG */
-	RX_EN			= (1U << 16),	/* Enable RX */
-	RX_RING_SZ_MASK		= 0xfff,	/* RX ring size, bits 11:0 */
-
-	/* MVS_INT_COAL */
-	COAL_EN			= (1U << 16),	/* Enable int coalescing */
-
-	/* MVS_INT_STAT, MVS_INT_MASK */
-	CINT_I2C		= (1U << 31),	/* I2C event */
-	CINT_SW0		= (1U << 30),	/* software event 0 */
-	CINT_SW1		= (1U << 29),	/* software event 1 */
-	CINT_PRD_BC		= (1U << 28),	/* PRD BC err for read cmd */
-	CINT_DMA_PCIE		= (1U << 27),	/* DMA to PCIE timeout */
-	CINT_MEM		= (1U << 26),	/* int mem parity err */
-	CINT_I2C_SLAVE		= (1U << 25),	/* slave I2C event */
-	CINT_SRS		= (1U << 3),	/* SRS event */
-	CINT_CI_STOP		= (1U << 1),	/* cmd issue stopped */
-	CINT_DONE		= (1U << 0),	/* cmd completion */
-
-						/* shl for ports 1-3 */
-	CINT_PORT_STOPPED	= (1U << 16),	/* port0 stopped */
-	CINT_PORT		= (1U << 8),	/* port0 event */
-	CINT_PORT_MASK_OFFSET	= 8,
-	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
-
-	/* TX (delivery) ring bits */
-	TXQ_CMD_SHIFT		= 29,
-	TXQ_CMD_SSP		= 1,		/* SSP protocol */
-	TXQ_CMD_SMP		= 2,		/* SMP protocol */
-	TXQ_CMD_STP		= 3,		/* STP/SATA protocol */
-	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
-	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
-	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
-	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
-	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
-	TXQ_SRS_MASK		= 0x7f,
-	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
-	TXQ_PHY_MASK		= 0xff,
-	TXQ_SLOT_MASK		= 0xfff,	/* slot number */
-
-	/* RX (completion) ring bits */
-	RXQ_GOOD		= (1U << 23),	/* Response good */
-	RXQ_SLOT_RESET		= (1U << 21),	/* Slot reset complete */
-	RXQ_CMD_RX		= (1U << 20),	/* target cmd received */
-	RXQ_ATTN		= (1U << 19),	/* attention */
-	RXQ_RSP			= (1U << 18),	/* response frame xfer'd */
-	RXQ_ERR			= (1U << 17),	/* err info rec xfer'd */
-	RXQ_DONE		= (1U << 16),	/* cmd complete */
-	RXQ_SLOT_MASK		= 0xfff,	/* slot number */
-
-	/* mvs_cmd_hdr bits */
-	MCH_PRD_LEN_SHIFT	= 16,		/* 16-bit PRD table len */
-	MCH_SSP_FR_TYPE_SHIFT	= 13,		/* SSP frame type */
-
-						/* SSP initiator only */
-	MCH_SSP_FR_CMD		= 0x0,		/* COMMAND frame */
-
-						/* SSP initiator or target */
-	MCH_SSP_FR_TASK		= 0x1,		/* TASK frame */
-
-						/* SSP target only */
-	MCH_SSP_FR_XFER_RDY	= 0x4,		/* XFER_RDY frame */
-	MCH_SSP_FR_RESP		= 0x5,		/* RESPONSE frame */
-	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
-	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
-
-	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
-	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
-	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
-	MCH_RETRY		= (1U << 9),	/* tport layer retry (SSP) */
-	MCH_PROTECTION		= (1U << 8),	/* protection info rec (SSP) */
-	MCH_RESET		= (1U << 7),	/* Reset (STP/SATA) */
-	MCH_FPDMA		= (1U << 6),	/* First party DMA (STP/SATA) */
-	MCH_ATAPI		= (1U << 5),	/* ATAPI (STP/SATA) */
-	MCH_BIST		= (1U << 4),	/* BIST activate (STP/SATA) */
-	MCH_PMP_MASK		= 0xf,		/* PMP from cmd FIS (STP/SATA)*/
-
-	CCTL_RST		= (1U << 5),	/* port logic reset */
-
-						/* 0(LSB first), 1(MSB first) */
-	CCTL_ENDIAN_DATA	= (1U << 3),	/* PRD data */
-	CCTL_ENDIAN_RSP		= (1U << 2),	/* response frame */
-	CCTL_ENDIAN_OPEN	= (1U << 1),	/* open address frame */
-	CCTL_ENDIAN_CMD		= (1U << 0),	/* command table */
-
-	/* MVS_Px_SER_CTLSTAT (per-phy control) */
-	PHY_SSP_RST		= (1U << 3),	/* reset SSP link layer */
-	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
-	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
-	PHY_RST			= (1U << 0),	/* phy reset */
-	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
-	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
-			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
-	PHY_READY_MASK		= (1U << 20),
-
-	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
-	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
-	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
-	PHYEV_AN		= (1U << 18),	/* SATA async notification */
-	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
-	PHYEV_SIG_FIS		= (1U << 16),	/* signature FIS */
-	PHYEV_POOF		= (1U << 12),	/* phy ready from 1 -> 0 */
-	PHYEV_IU_BIG		= (1U << 11),	/* IU too long err */
-	PHYEV_IU_SMALL		= (1U << 10),	/* IU too short err */
-	PHYEV_UNK_TAG		= (1U << 9),	/* unknown tag */
-	PHYEV_BROAD_CH		= (1U << 8),	/* broadcast(CHANGE) */
-	PHYEV_COMWAKE		= (1U << 7),	/* COMWAKE rx'd */
-	PHYEV_PORT_SEL		= (1U << 6),	/* port selector present */
-	PHYEV_HARD_RST		= (1U << 5),	/* hard reset rx'd */
-	PHYEV_ID_TMOUT		= (1U << 4),	/* identify timeout */
-	PHYEV_ID_FAIL		= (1U << 3),	/* identify failed */
-	PHYEV_ID_DONE		= (1U << 2),	/* identify done */
-	PHYEV_HARD_RST_DONE	= (1U << 1),	/* hard reset done */
-	PHYEV_RDY_CH		= (1U << 0),	/* phy ready changed state */
-
-	/* MVS_PCS */
-	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
-	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
-	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6480 */
-	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
-	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
-	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
-	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
-	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
-	PCS_CMD_RST		= (1U << 1),	/* reset cmd issue */
-	PCS_CMD_EN		= (1U << 0),	/* enable cmd issue */
-
-	/* Port n Attached Device Info */
-	PORT_DEV_SSP_TRGT	= (1U << 19),
-	PORT_DEV_SMP_TRGT	= (1U << 18),
-	PORT_DEV_STP_TRGT	= (1U << 17),
-	PORT_DEV_SSP_INIT	= (1U << 11),
-	PORT_DEV_SMP_INIT	= (1U << 10),
-	PORT_DEV_STP_INIT	= (1U << 9),
-	PORT_PHY_ID_MASK	= (0xFFU << 24),
-	PORT_DEV_TRGT_MASK	= (0x7U << 17),
-	PORT_DEV_INIT_MASK	= (0x7U << 9),
-	PORT_DEV_TYPE_MASK	= (0x7U << 0),
-
-	/* Port n PHY Status */
-	PHY_RDY			= (1U << 2),
-	PHY_DW_SYNC		= (1U << 1),
-	PHY_OOB_DTCTD		= (1U << 0),
-
-	/* VSR */
-	/* PHYMODE 6 (CDB) */
-	PHY_MODE6_LATECLK	= (1U << 29),	/* Lock Clock */
-	PHY_MODE6_DTL_SPEED	= (1U << 27),	/* Digital Loop Speed */
-	PHY_MODE6_FC_ORDER	= (1U << 26),	/* Fibre Channel Mode Order*/
-	PHY_MODE6_MUCNT_EN	= (1U << 24),	/* u Count Enable */
-	PHY_MODE6_SEL_MUCNT_LEN	= (1U << 22),	/* Training Length Select */
-	PHY_MODE6_SELMUPI	= (1U << 20),	/* Phase Multi Select (init) */
-	PHY_MODE6_SELMUPF	= (1U << 18),	/* Phase Multi Select (final) */
-	PHY_MODE6_SELMUFF	= (1U << 16),	/* Freq Loop Multi Sel(final) */
-	PHY_MODE6_SELMUFI	= (1U << 14),	/* Freq Loop Multi Sel(init) */
-	PHY_MODE6_FREEZE_LOOP	= (1U << 12),	/* Freeze Rx CDR Loop */
-	PHY_MODE6_INT_RXFOFFS	= (1U << 3),	/* Rx CDR Freq Loop Enable */
-	PHY_MODE6_FRC_RXFOFFS	= (1U << 2),	/* Initial Rx CDR Offset */
-	PHY_MODE6_STAU_0D8	= (1U << 1),	/* Rx CDR Freq Loop Saturate */
-	PHY_MODE6_RXSAT_DIS	= (1U << 0),	/* Saturate Ctl */
-};
-
-enum mvs_info_flags {
-	MVF_MSI			= (1U << 0),	/* MSI is enabled */
-	MVF_PHY_PWR_FIX		= (1U << 1),	/* bug workaround */
-};
-
-enum sas_cmd_port_registers {
-	CMD_CMRST_OOB_DET	= 0x100, /* COMRESET OOB detect register */
-	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
-	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
-	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
-	CMD_OOB_SPACE		= 0x110, /* OOB space control register */
-	CMD_OOB_BURST		= 0x114, /* OOB burst control register */
-	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
-	CMD_PHY_CONFIG0		= 0x11c, /* PHY config register 0 */
-	CMD_PHY_CONFIG1		= 0x120, /* PHY config register 1 */
-	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
-	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
-	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
-	CMD_SAS_CTL3		= 0x130, /* SAS control register 3 */
-	CMD_ID_TEST		= 0x134, /* ID test register */
-	CMD_PL_TIMER		= 0x138, /* PL timer register */
-	CMD_WD_TIMER		= 0x13c, /* WD timer register */
-	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
-	CMD_APP_MEM_CTL		= 0x144, /* Application Memory Control */
-	CMD_XOR_MEM_CTL		= 0x148, /* XOR Block Memory Control */
-	CMD_DMA_MEM_CTL		= 0x14c, /* DMA Block Memory Control */
-	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
-	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
-	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
-	CMD_SATA_PORT_MEM_CTL1	= 0x15c, /* SATA Port Memory Control 1 */
-	CMD_XOR_MEM_BIST_CTL	= 0x160, /* XOR Memory BIST Control */
-	CMD_XOR_MEM_BIST_STAT	= 0x164, /* XOR Memroy BIST Status */
-	CMD_DMA_MEM_BIST_CTL	= 0x168, /* DMA Memory BIST Control */
-	CMD_DMA_MEM_BIST_STAT	= 0x16c, /* DMA Memory BIST Status */
-	CMD_PORT_MEM_BIST_CTL	= 0x170, /* Port Memory BIST Control */
-	CMD_PORT_MEM_BIST_STAT0 = 0x174, /* Port Memory BIST Status 0 */
-	CMD_PORT_MEM_BIST_STAT1 = 0x178, /* Port Memory BIST Status 1 */
-	CMD_STP_MEM_BIST_CTL	= 0x17c, /* STP Memory BIST Control */
-	CMD_STP_MEM_BIST_STAT0	= 0x180, /* STP Memory BIST Status 0 */
-	CMD_STP_MEM_BIST_STAT1	= 0x184, /* STP Memory BIST Status 1 */
-	CMD_RESET_COUNT		= 0x188, /* Reset Count */
-	CMD_MONTR_DATA_SEL	= 0x18C, /* Monitor Data/Select */
-	CMD_PLL_PHY_CONFIG	= 0x190, /* PLL/PHY Configuration */
-	CMD_PHY_CTL		= 0x194, /* PHY Control and Status */
-	CMD_PHY_TEST_COUNT0	= 0x198, /* Phy Test Count 0 */
-	CMD_PHY_TEST_COUNT1	= 0x19C, /* Phy Test Count 1 */
-	CMD_PHY_TEST_COUNT2	= 0x1A0, /* Phy Test Count 2 */
-	CMD_APP_ERR_CONFIG	= 0x1A4, /* Application Error Configuration */
-	CMD_PND_FIFO_CTL0	= 0x1A8, /* Pending FIFO Control 0 */
-	CMD_HOST_CTL		= 0x1AC, /* Host Control Status */
-	CMD_HOST_WR_DATA	= 0x1B0, /* Host Write Data */
-	CMD_HOST_RD_DATA	= 0x1B4, /* Host Read Data */
-	CMD_PHY_MODE_21		= 0x1B8, /* Phy Mode 21 */
-	CMD_SL_MODE0		= 0x1BC, /* SL Mode 0 */
-	CMD_SL_MODE1		= 0x1C0, /* SL Mode 1 */
-	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
-};
-
-/* SAS/SATA configuration port registers, aka phy registers */
-enum sas_sata_config_port_regs {
-	PHYR_IDENTIFY		= 0x00,	/* info for IDENTIFY frame */
-	PHYR_ADDR_LO		= 0x04,	/* my SAS address (low) */
-	PHYR_ADDR_HI		= 0x08,	/* my SAS address (high) */
-	PHYR_ATT_DEV_INFO	= 0x0C,	/* attached device info */
-	PHYR_ATT_ADDR_LO	= 0x10,	/* attached dev SAS addr (low) */
-	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
-	PHYR_SATA_CTL		= 0x18,	/* SATA control */
-	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
-	PHYR_SATA_SIG0		= 0x20,	/*port SATA signature FIS(Byte 0-3) */
-	PHYR_SATA_SIG1		= 0x24,	/*port SATA signature FIS(Byte 4-7) */
-	PHYR_SATA_SIG2		= 0x28,	/*port SATA signature FIS(Byte 8-11) */
-	PHYR_SATA_SIG3		= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
-	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
-	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
-	PHYR_WIDE_PORT		= 0x38,	/* wide port participating */
-	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
-	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
-	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
-};
-
-/*  SAS/SATA Vendor Specific Port Registers */
-enum sas_sata_vsp_regs {
-	VSR_PHY_STAT		= 0x00, /* Phy Status */
-	VSR_PHY_MODE1		= 0x01, /* phy tx */
-	VSR_PHY_MODE2		= 0x02, /* tx scc */
-	VSR_PHY_MODE3		= 0x03, /* pll */
-	VSR_PHY_MODE4		= 0x04, /* VCO */
-	VSR_PHY_MODE5		= 0x05, /* Rx */
-	VSR_PHY_MODE6		= 0x06, /* CDR */
-	VSR_PHY_MODE7		= 0x07, /* Impedance */
-	VSR_PHY_MODE8		= 0x08, /* Voltage */
-	VSR_PHY_MODE9		= 0x09, /* Test */
-	VSR_PHY_MODE10		= 0x0A, /* Power */
-	VSR_PHY_MODE11		= 0x0B, /* Phy Mode */
-	VSR_PHY_VS0		= 0x0C, /* Vednor Specific 0 */
-	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
-};
-
-enum pci_cfg_registers {
-	PCR_PHY_CTL	= 0x40,
-	PCR_PHY_CTL2	= 0x90,
-	PCR_DEV_CTRL	= 0xE8,
-};
-
-enum pci_cfg_register_bits {
-	PCTL_PWR_ON	= (0xFU << 24),
-	PCTL_OFF	= (0xFU << 12),
-	PRD_REQ_SIZE	= (0x4000),
-	PRD_REQ_MASK	= (0x00007000),
-};
-
-enum nvram_layout_offsets {
-	NVR_SIG		= 0x00,		/* 0xAA, 0x55 */
-	NVR_SAS_ADDR	= 0x02,		/* 8-byte SAS address */
-};
-
-enum chip_flavors {
-	chip_6320,
-	chip_6440,
-	chip_6480,
-};
-
-enum port_type {
-	PORT_TYPE_SAS	=  (1L << 1),
-	PORT_TYPE_SATA	=  (1L << 0),
-};
-
-/* Command Table Format */
-enum ct_format {
-	/* SSP */
-	SSP_F_H		=  0x00,
-	SSP_F_IU	=  0x18,
-	SSP_F_MAX	=  0x4D,
-	/* STP */
-	STP_CMD_FIS	=  0x00,
-	STP_ATAPI_CMD	=  0x40,
-	STP_F_MAX	=  0x10,
-	/* SMP */
-	SMP_F_T		=  0x00,
-	SMP_F_DEP	=  0x01,
-	SMP_F_MAX	=  0x101,
-};
-
-enum status_buffer {
-	SB_EIR_OFF	=  0x00,	/* Error Information Record */
-	SB_RFB_OFF	=  0x08,	/* Response Frame Buffer */
-	SB_RFB_MAX	=  0x400,	/* RFB size*/
-};
-
-enum error_info_rec {
-	CMD_ISS_STPD	= (1U << 31),	/* Cmd Issue Stopped */
-	CMD_PI_ERR	= (1U << 30),	/* Protection info error.  see flags2 */
-	RSP_OVER	= (1U << 29),	/* rsp buffer overflow */
-	RETRY_LIM	= (1U << 28),	/* FIS/frame retry limit exceeded */
-	UNK_FIS 	= (1U << 27),	/* unknown FIS */
-	DMA_TERM	= (1U << 26),	/* DMA terminate primitive rx'd */
-	SYNC_ERR	= (1U << 25),	/* SYNC rx'd during frame xmit */
-	TFILE_ERR	= (1U << 24),	/* SATA taskfile Error bit set */
-	R_ERR		= (1U << 23),	/* SATA returned R_ERR prim */
-	RD_OFS		= (1U << 20),	/* Read DATA frame invalid offset */
-	XFER_RDY_OFS	= (1U << 19),	/* XFER_RDY offset error */
-	UNEXP_XFER_RDY	= (1U << 18),	/* unexpected XFER_RDY error */
-	DATA_OVER_UNDER = (1U << 16),	/* data overflow/underflow */
-	INTERLOCK	= (1U << 15),	/* interlock error */
-	NAK		= (1U << 14),	/* NAK rx'd */
-	ACK_NAK_TO	= (1U << 13),	/* ACK/NAK timeout */
-	CXN_CLOSED	= (1U << 12),	/* cxn closed w/out ack/nak */
-	OPEN_TO 	= (1U << 11),	/* I_T nexus lost, open cxn timeout */
-	PATH_BLOCKED	= (1U << 10),	/* I_T nexus lost, pathway blocked */
-	NO_DEST 	= (1U << 9),	/* I_T nexus lost, no destination */
-	STP_RES_BSY	= (1U << 8),	/* STP resources busy */
-	BREAK		= (1U << 7),	/* break received */
-	BAD_DEST	= (1U << 6),	/* bad destination */
-	BAD_PROTO	= (1U << 5),	/* protocol not supported */
-	BAD_RATE	= (1U << 4),	/* cxn rate not supported */
-	WRONG_DEST	= (1U << 3),	/* wrong destination error */
-	CREDIT_TO	= (1U << 2),	/* credit timeout */
-	WDOG_TO 	= (1U << 1),	/* watchdog timeout */
-	BUF_PAR 	= (1U << 0),	/* buffer parity error */
-};
-
-enum error_info_rec_2 {
-	SLOT_BSY_ERR	= (1U << 31),	/* Slot Busy Error */
-	GRD_CHK_ERR	= (1U << 14),	/* Guard Check Error */
-	APP_CHK_ERR	= (1U << 13),	/* Application Check error */
-	REF_CHK_ERR	= (1U << 12),	/* Reference Check Error */
-	USR_BLK_NM	= (1U << 0),	/* User Block Number */
-};
-
-struct mvs_chip_info {
-	u32		n_phy;
-	u32		srs_sz;
-	u32		slot_width;
-};
-
-struct mvs_err_info {
-	__le32			flags;
-	__le32			flags2;
-};
-
-struct mvs_prd {
-	__le64			addr;		/* 64-bit buffer address */
-	__le32			reserved;
-	__le32			len;		/* 16-bit length */
-};
-
-struct mvs_cmd_hdr {
-	__le32			flags;		/* PRD tbl len; SAS, SATA ctl */
-	__le32			lens;		/* cmd, max resp frame len */
-	__le32			tags;		/* targ port xfer tag; tag */
-	__le32			data_len;	/* data xfer len */
-	__le64			cmd_tbl;	/* command table address */
-	__le64			open_frame;	/* open addr frame address */
-	__le64			status_buf;	/* status buffer address */
-	__le64			prd_tbl;	/* PRD tbl address */
-	__le32			reserved[4];
-};
-
-struct mvs_port {
-	struct asd_sas_port	sas_port;
-	u8			port_attached;
-	u8			taskfileset;
-	u8			wide_port_phymap;
-	struct list_head	list;
-};
-
-struct mvs_phy {
-	struct mvs_port		*port;
-	struct asd_sas_phy	sas_phy;
-	struct sas_identify	identify;
-	struct scsi_device	*sdev;
-	u64		dev_sas_addr;
-	u64		att_dev_sas_addr;
-	u32		att_dev_info;
-	u32		dev_info;
-	u32		phy_type;
-	u32		phy_status;
-	u32		irq_status;
-	u32		frame_rcvd_size;
-	u8		frame_rcvd[32];
-	u8		phy_attached;
-	enum sas_linkrate	minimum_linkrate;
-	enum sas_linkrate	maximum_linkrate;
-};
-
-struct mvs_slot_info {
-	struct list_head	list;
-	struct sas_task		*task;
-	u32			n_elem;
-	u32			tx;
-
-	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
-	 * and PRD table
-	 */
-	void			*buf;
-	dma_addr_t		buf_dma;
-#if _MV_DUMP
-	u32			cmd_size;
-#endif
-
-	void			*response;
-	struct mvs_port		*port;
+struct mvs_task_exec_info {
+	struct sas_task *task;
+	struct mvs_cmd_hdr *hdr;
+	struct mvs_port *port;
+	u32 tag;
+	int n_elem;
 };
 
-struct mvs_info {
-	unsigned long		flags;
-
-	spinlock_t		lock;		/* host-wide lock */
-	struct pci_dev		*pdev;		/* our device */
-	void __iomem		*regs;		/* enhanced mode registers */
-	void __iomem		*peri_regs;	/* peripheral registers */
-
-	u8			sas_addr[SAS_ADDR_SIZE];
-	struct sas_ha_struct	sas;		/* SCSI/SAS glue */
-	struct Scsi_Host	*shost;
-
-	__le32			*tx;		/* TX (delivery) DMA ring */
-	dma_addr_t		tx_dma;
-	u32			tx_prod;	/* cached next-producer idx */
-
-	__le32			*rx;		/* RX (completion) DMA ring */
-	dma_addr_t		rx_dma;
-	u32			rx_cons;	/* RX consumer idx */
-
-	__le32			*rx_fis;	/* RX'd FIS area */
-	dma_addr_t		rx_fis_dma;
-
-	struct mvs_cmd_hdr	*slot;	/* DMA command header slots */
-	dma_addr_t		slot_dma;
-
-	const struct mvs_chip_info *chip;
+static void mvs_release_task(struct mvs_info *mvi, int phy_no);
+static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i);
+static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
+					int get_st);
+static int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
+static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
+				u32 slot_idx);
 
-	u8			tags[MVS_SLOTS];
-	struct mvs_slot_info	slot_info[MVS_SLOTS];
-				/* further per-slot information */
-	struct mvs_phy		phy[MVS_MAX_PHYS];
-	struct mvs_port		port[MVS_MAX_PHYS];
-#ifdef MVS_USE_TASKLET
-	struct tasklet_struct	tasklet;
-#endif
-};
+static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
+{
+	if (task->lldd_task) {
+		struct mvs_slot_info *slot;
+		slot = (struct mvs_slot_info *) task->lldd_task;
+		*tag = slot - mvi->slot_info;
+		return 1;
+	}
+	return 0;
+}
 
-static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
-			   void *funcdata);
-static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port);
-static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val);
-static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port);
-static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val);
-static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val);
-static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port);
+static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
+{
+	void *bitmap = (void *) &mvi->tags;
+	clear_bit(tag, bitmap);
+}
 
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i);
-static void mvs_detect_porttype(struct mvs_info *mvi, int i);
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st);
-static void mvs_release_task(struct mvs_info *mvi, int phy_no);
+static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
+{
+	mvs_tag_clear(mvi, tag);
+}
 
-static int mvs_scan_finished(struct Scsi_Host *, unsigned long);
-static void mvs_scan_start(struct Scsi_Host *);
-static int mvs_slave_configure(struct scsi_device *sdev);
+static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
+{
+	void *bitmap = (void *) &mvi->tags;
+	set_bit(tag, bitmap);
+}
 
-static struct scsi_transport_template *mvs_stt;
+static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
+{
+	unsigned int index, tag;
+	void *bitmap = (void *) &mvi->tags;
 
-static const struct mvs_chip_info mvs_chips[] = {
-	[chip_6320] =		{ 2, 16, 9  },
-	[chip_6440] =		{ 4, 16, 9  },
-	[chip_6480] =		{ 8, 32, 10 },
-};
+	index = find_first_zero_bit(bitmap, MVS_SLOTS);
+	tag = index;
+	if (tag >= MVS_SLOTS)
+		return -SAS_QUEUE_FULL;
+	mvs_tag_set(mvi, tag);
+	*tag_out = tag;
+	return 0;
+}
 
-static struct scsi_host_template mvs_sht = {
-	.module			= THIS_MODULE,
-	.name			= DRV_NAME,
-	.queuecommand		= sas_queuecommand,
-	.target_alloc		= sas_target_alloc,
-	.slave_configure	= mvs_slave_configure,
-	.slave_destroy		= sas_slave_destroy,
-	.scan_finished		= mvs_scan_finished,
-	.scan_start		= mvs_scan_start,
-	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
-	.bios_param		= sas_bios_param,
-	.can_queue		= 1,
-	.cmd_per_lun		= 1,
-	.this_id		= -1,
-	.sg_tablesize		= SG_ALL,
-	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
-	.use_clustering		= ENABLE_CLUSTERING,
-	.eh_device_reset_handler	= sas_eh_device_reset_handler,
-	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
-	.slave_alloc		= sas_slave_alloc,
-	.target_destroy		= sas_target_destroy,
-	.ioctl			= sas_ioctl,
-};
+void mvs_tag_init(struct mvs_info *mvi)
+{
+	int i;
+	for (i = 0; i < MVS_SLOTS; ++i)
+		mvs_tag_clear(mvi, i);
+}
 
 static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
 {
@@ -848,959 +223,270 @@ static void mvs_hba_cq_dump(struct mvs_info *mvi)
 #endif
 }
 
-static void mvs_hba_interrupt_enable(struct mvs_info *mvi)
+/* FIXME: locking? */
+int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, void *funcdata)
 {
-	void __iomem *regs = mvi->regs;
+	struct mvs_info *mvi = sas_phy->ha->lldd_ha;
+	int rc = 0, phy_id = sas_phy->id;
 	u32 tmp;
 
-	tmp = mr32(GBL_CTL);
-
-	mw32(GBL_CTL, tmp | INT_EN);
-}
+	tmp = mvs_read_phy_ctl(mvi, phy_id);
 
-static void mvs_hba_interrupt_disable(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
+	switch (func) {
+	case PHY_FUNC_SET_LINK_RATE:{
+			struct sas_phy_linkrates *rates = funcdata;
+			u32 lrmin = 0, lrmax = 0;
 
-	tmp = mr32(GBL_CTL);
+			lrmin = (rates->minimum_linkrate << 8);
+			lrmax = (rates->maximum_linkrate << 12);
 
-	mw32(GBL_CTL, tmp & ~INT_EN);
-}
+			if (lrmin) {
+				tmp &= ~(0xf << 8);
+				tmp |= lrmin;
+			}
+			if (lrmax) {
+				tmp &= ~(0xf << 12);
+				tmp |= lrmax;
+			}
+			mvs_write_phy_ctl(mvi, phy_id, tmp);
+			break;
+		}
 
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
+	case PHY_FUNC_HARD_RESET:
+		if (tmp & PHY_RST_HARD)
+			break;
+		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST_HARD);
+		break;
 
-/* move to PCI layer or libata core? */
-static int pci_go_64(struct pci_dev *pdev)
-{
-	int rc;
+	case PHY_FUNC_LINK_RESET:
+		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST);
+		break;
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (rc) {
-			rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-			if (rc) {
-				dev_printk(KERN_ERR, &pdev->dev,
-					   "64-bit DMA enable failed\n");
-				return rc;
-			}
-		}
-	} else {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc) {
-			dev_printk(KERN_ERR, &pdev->dev,
-				   "32-bit DMA enable failed\n");
-			return rc;
-		}
-		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc) {
-			dev_printk(KERN_ERR, &pdev->dev,
-				   "32-bit consistent DMA enable failed\n");
-			return rc;
-		}
+	case PHY_FUNC_DISABLE:
+	case PHY_FUNC_RELEASE_SPINUP_HOLD:
+	default:
+		rc = -EOPNOTSUPP;
 	}
 
 	return rc;
 }
 
-static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
+static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
 {
-	if (task->lldd_task) {
-		struct mvs_slot_info *slot;
-		slot = (struct mvs_slot_info *) task->lldd_task;
-		*tag = slot - mvi->slot_info;
-		return 1;
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
+
+	if (!phy->phy_attached)
+		return;
+
+	if (sas_phy->phy) {
+		struct sas_phy *sphy = sas_phy->phy;
+
+		sphy->negotiated_linkrate = sas_phy->linkrate;
+		sphy->minimum_linkrate = phy->minimum_linkrate;
+		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		sphy->maximum_linkrate = phy->maximum_linkrate;
+		sphy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
 	}
-	return 0;
-}
 
-static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
-{
-	void *bitmap = (void *) &mvi->tags;
-	clear_bit(tag, bitmap);
-}
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		struct sas_identify_frame *id;
 
-static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
-{
-	mvs_tag_clear(mvi, tag);
+		id = (struct sas_identify_frame *)phy->frame_rcvd;
+		id->dev_type = phy->identify.device_type;
+		id->initiator_bits = SAS_PROTOCOL_ALL;
+		id->target_bits = phy->identify.target_port_protocols;
+	} else if (phy->phy_type & PORT_TYPE_SATA) {
+		/* TODO */
+	}
+	mvi->sas.sas_phy[i]->frame_rcvd_size = phy->frame_rcvd_size;
+	mvi->sas.notify_port_event(mvi->sas.sas_phy[i],
+				   PORTE_BYTES_DMAED);
 }
 
-static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
+int mvs_slave_configure(struct scsi_device *sdev)
 {
-	void *bitmap = (void *) &mvi->tags;
-	set_bit(tag, bitmap);
-}
+	struct domain_device *dev = sdev_to_domain_dev(sdev);
+	int ret = sas_slave_configure(sdev);
 
-static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
-{
-	unsigned int index, tag;
-	void *bitmap = (void *) &mvi->tags;
+	if (ret)
+		return ret;
 
-	index = find_first_zero_bit(bitmap, MVS_SLOTS);
-	tag = index;
-	if (tag >= MVS_SLOTS)
-		return -SAS_QUEUE_FULL;
-	mvs_tag_set(mvi, tag);
-	*tag_out = tag;
+	if (dev_is_sata(dev)) {
+		/* struct ata_port *ap = dev->sata_dev.ap; */
+		/* struct ata_device *adev = ap->link.device; */
+
+		/* clamp at no NCQ for the time being */
+		/* adev->flags |= ATA_DFLAG_NCQ_OFF; */
+		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
+	}
 	return 0;
 }
 
-static void mvs_tag_init(struct mvs_info *mvi)
+void mvs_scan_start(struct Scsi_Host *shost)
 {
 	int i;
-	for (i = 0; i < MVS_SLOTS; ++i)
-		mvs_tag_clear(mvi, i);
+	struct mvs_info *mvi = SHOST_TO_SAS_HA(shost)->lldd_ha;
+
+	for (i = 0; i < mvi->chip->n_phy; ++i) {
+		mvs_bytes_dmaed(mvi, i);
+	}
 }
 
-#ifndef MVS_DISABLE_NVRAM
-static int mvs_eep_read(void __iomem *regs, u32 addr, u32 *data)
+int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time)
 {
-	int timeout = 1000;
-
-	if (addr & ~SPI_ADDR_MASK)
-		return -EINVAL;
-
-	writel(addr, regs + SPI_CMD);
-	writel(TWSI_RD, regs + SPI_CTL);
-
-	while (timeout-- > 0) {
-		if (readl(regs + SPI_CTL) & TWSI_RDY) {
-			*data = readl(regs + SPI_DATA);
-			return 0;
-		}
-
-		udelay(10);
-	}
-
-	return -EBUSY;
+	/* give the phy enabling interrupt event time to come in (1s
+	 * is empirically about all it takes) */
+	if (time < HZ)
+		return 0;
+	/* Wait for discovery to finish */
+	scsi_flush_work(shost);
+	return 1;
 }
 
-static int mvs_eep_read_buf(void __iomem *regs, u32 addr,
-			    void *buf, u32 buflen)
+static int mvs_task_prep_smp(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
 {
-	u32 addr_end, tmp_addr, i, j;
-	u32 tmp = 0;
-	int rc;
-	u8 *tmp8, *buf8 = buf;
-
-	addr_end = addr + buflen;
-	tmp_addr = ALIGN(addr, 4);
-	if (addr > 0xff)
-		return -EINVAL;
-
-	j = addr & 0x3;
-	if (j) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
+	int elem, rc, i;
+	struct sas_task *task = tei->task;
+	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct scatterlist *sg_req, *sg_resp;
+	u32 req_len, resp_len, tag = tei->tag;
+	void *buf_tmp;
+	u8 *buf_oaf;
+	dma_addr_t buf_tmp_dma;
+	struct mvs_prd *buf_prd;
+	struct scatterlist *sg;
+	struct mvs_slot_info *slot = &mvi->slot_info[tag];
+	struct asd_sas_port *sas_port = task->dev->port;
+	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#if _MV_DUMP
+	u8 *buf_cmd;
+	void *from;
+#endif
+	/*
+	 * DMA-map SMP request, response buffers
+	 */
+	sg_req = &task->smp_task.smp_req;
+	elem = pci_map_sg(mvi->pdev, sg_req, 1, PCI_DMA_TODEVICE);
+	if (!elem)
+		return -ENOMEM;
+	req_len = sg_dma_len(sg_req);
 
-		tmp8 = (u8 *)&tmp;
-		for (i = j; i < 4; i++)
-			*buf8++ = tmp8[i];
+	sg_resp = &task->smp_task.smp_resp;
+	elem = pci_map_sg(mvi->pdev, sg_resp, 1, PCI_DMA_FROMDEVICE);
+	if (!elem) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+	resp_len = sg_dma_len(sg_resp);
 
-		tmp_addr += 4;
+	/* must be in dwords */
+	if ((req_len & 0x3) || (resp_len & 0x3)) {
+		rc = -EINVAL;
+		goto err_out_2;
 	}
 
-	for (j = ALIGN(addr_end, 4); tmp_addr < j; tmp_addr += 4) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
+	/*
+	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
+	 */
 
-		memcpy(buf8, &tmp, 4);
-		buf8 += 4;
-	}
+	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
+	buf_tmp = slot->buf;
+	buf_tmp_dma = slot->buf_dma;
 
-	if (tmp_addr < addr_end) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
+#if _MV_DUMP
+	buf_cmd = buf_tmp;
+	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
+	buf_tmp += req_len;
+	buf_tmp_dma += req_len;
+	slot->cmd_size = req_len;
+#else
+	hdr->cmd_tbl = cpu_to_le64(sg_dma_address(sg_req));
+#endif
 
-		tmp8 = (u8 *)&tmp;
-		j = addr_end - tmp_addr;
-		for (i = 0; i < j; i++)
-			*buf8++ = tmp8[i];
+	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
+	buf_oaf = buf_tmp;
+	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
 
-		tmp_addr += 4;
-	}
+	buf_tmp += MVS_OAF_SZ;
+	buf_tmp_dma += MVS_OAF_SZ;
 
-	return 0;
-}
-#endif
+	/* region 3: PRD table ********************************************* */
+	buf_prd = buf_tmp;
+	if (tei->n_elem)
+		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
+	else
+		hdr->prd_tbl = 0;
 
-static int mvs_nvram_read(struct mvs_info *mvi, u32 addr,
-			  void *buf, u32 buflen)
-{
-#ifndef MVS_DISABLE_NVRAM
-	void __iomem *regs = mvi->regs;
-	int rc, i;
-	u32 sum;
-	u8 hdr[2], *tmp;
-	const char *msg;
+	i = sizeof(struct mvs_prd) * tei->n_elem;
+	buf_tmp += i;
+	buf_tmp_dma += i;
 
-	rc = mvs_eep_read_buf(regs, addr, &hdr, 2);
-	if (rc) {
-		msg = "nvram hdr read failed";
-		goto err_out;
-	}
-	rc = mvs_eep_read_buf(regs, addr + 2, buf, buflen);
-	if (rc) {
-		msg = "nvram read failed";
-		goto err_out;
-	}
+	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
+	slot->response = buf_tmp;
+	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
 
-	if (hdr[0] != 0x5A) {
-		/* entry id */
-		msg = "invalid nvram entry id";
-		rc = -ENOENT;
-		goto err_out;
-	}
+	/*
+	 * Fill in TX ring and command slot header
+	 */
+	slot->tx = mvi->tx_prod;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
+					TXQ_MODE_I | tag |
+					(sas_port->phy_mask << TXQ_PHY_SHIFT));
 
-	tmp = buf;
-	sum = ((u32)hdr[0]) + ((u32)hdr[1]);
-	for (i = 0; i < buflen; i++)
-		sum += ((u32)tmp[i]);
+	hdr->flags |= flags;
+	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
+	hdr->tags = cpu_to_le32(tag);
+	hdr->data_len = 0;
 
-	if (sum) {
-		msg = "nvram checksum failure";
-		rc = -EILSEQ;
-		goto err_out;
+	/* generate open address frame hdr (first 12 bytes) */
+	buf_oaf[0] = (1 << 7) | (0 << 4) | 0x01; /* initiator, SMP, ftype 1h */
+	buf_oaf[1] = task->dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
+	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+
+	/* fill in PRD (scatter/gather) table, if any */
+	for_each_sg(task->scatter, sg, tei->n_elem, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
 	}
 
+#if _MV_DUMP
+	/* copy cmd table */
+	from = kmap_atomic(sg_page(sg_req), KM_IRQ0);
+	memcpy(buf_cmd, from + sg_req->offset, req_len);
+	kunmap_atomic(from, KM_IRQ0);
+#endif
 	return 0;
 
+err_out_2:
+	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_resp, 1,
+		     PCI_DMA_FROMDEVICE);
 err_out:
-	dev_printk(KERN_ERR, &mvi->pdev->dev, "%s", msg);
+	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_req, 1,
+		     PCI_DMA_TODEVICE);
 	return rc;
-#else
-	/* FIXME , For SAS target mode */
-	memcpy(buf, "\x50\x05\x04\x30\x11\xab\x00\x00", 8);
-	return 0;
-#endif
 }
 
-static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
+static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
 {
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-	if (!phy->phy_attached)
-		return;
-
-	if (sas_phy->phy) {
-		struct sas_phy *sphy = sas_phy->phy;
-
-		sphy->negotiated_linkrate = sas_phy->linkrate;
-		sphy->minimum_linkrate = phy->minimum_linkrate;
-		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
-		sphy->maximum_linkrate = phy->maximum_linkrate;
-		sphy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
-	}
-
-	if (phy->phy_type & PORT_TYPE_SAS) {
-		struct sas_identify_frame *id;
+	struct ata_queued_cmd *qc = task->uldd_task;
 
-		id = (struct sas_identify_frame *)phy->frame_rcvd;
-		id->dev_type = phy->identify.device_type;
-		id->initiator_bits = SAS_PROTOCOL_ALL;
-		id->target_bits = phy->identify.target_port_protocols;
-	} else if (phy->phy_type & PORT_TYPE_SATA) {
-		/* TODO */
+	if (qc) {
+		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
+			qc->tf.command == ATA_CMD_FPDMA_READ) {
+			*tag = qc->tag;
+			return 1;
+		}
 	}
-	mvi->sas.sas_phy[i]->frame_rcvd_size = phy->frame_rcvd_size;
-	mvi->sas.notify_port_event(mvi->sas.sas_phy[i],
-				   PORTE_BYTES_DMAED);
-}
 
-static int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time)
-{
-	/* give the phy enabling interrupt event time to come in (1s
-	 * is empirically about all it takes) */
-	if (time < HZ)
-		return 0;
-	/* Wait for discovery to finish */
-	scsi_flush_work(shost);
-	return 1;
+	return 0;
 }
 
-static void mvs_scan_start(struct Scsi_Host *shost)
-{
-	int i;
-	struct mvs_info *mvi = SHOST_TO_SAS_HA(shost)->lldd_ha;
-
-	for (i = 0; i < mvi->chip->n_phy; ++i) {
-		mvs_bytes_dmaed(mvi, i);
-	}
-}
-
-static int mvs_slave_configure(struct scsi_device *sdev)
-{
-	struct domain_device *dev = sdev_to_domain_dev(sdev);
-	int ret = sas_slave_configure(sdev);
-
-	if (ret)
-		return ret;
-
-	if (dev_is_sata(dev)) {
-		/* struct ata_port *ap = dev->sata_dev.ap; */
-		/* struct ata_device *adev = ap->link.device; */
-
-		/* clamp at no NCQ for the time being */
-		/* adev->flags |= ATA_DFLAG_NCQ_OFF; */
-		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
-	}
-	return 0;
-}
-
-static void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
-{
-	struct pci_dev *pdev = mvi->pdev;
-	struct sas_ha_struct *sas_ha = &mvi->sas;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-
-	phy->irq_status = mvs_read_port_irq_stat(mvi, phy_no);
-	/*
-	* events is port event now ,
-	* we need check the interrupt status which belongs to per port.
-	*/
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Port %d Event = %X\n",
-		phy_no, phy->irq_status);
-
-	if (phy->irq_status & (PHYEV_POOF | PHYEV_DEC_ERR)) {
-		mvs_release_task(mvi, phy_no);
-		if (!mvs_is_phy_ready(mvi, phy_no)) {
-			sas_phy_disconnected(sas_phy);
-			sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
-			dev_printk(KERN_INFO, &pdev->dev,
-				"Port %d Unplug Notice\n", phy_no);
-
-		} else
-			mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET, NULL);
-	}
-	if (!(phy->irq_status & PHYEV_DEC_ERR)) {
-		if (phy->irq_status & PHYEV_COMWAKE) {
-			u32 tmp = mvs_read_port_irq_mask(mvi, phy_no);
-			mvs_write_port_irq_mask(mvi, phy_no,
-						tmp | PHYEV_SIG_FIS);
-		}
-		if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
-			phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
-			if (phy->phy_status) {
-				mvs_detect_porttype(mvi, phy_no);
-
-				if (phy->phy_type & PORT_TYPE_SATA) {
-					u32 tmp = mvs_read_port_irq_mask(mvi,
-								phy_no);
-					tmp &= ~PHYEV_SIG_FIS;
-					mvs_write_port_irq_mask(mvi,
-								phy_no, tmp);
-				}
-
-				mvs_update_phyinfo(mvi, phy_no, 0);
-				sas_ha->notify_phy_event(sas_phy,
-							PHYE_OOB_DONE);
-				mvs_bytes_dmaed(mvi, phy_no);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"plugin interrupt but phy is gone\n");
-				mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET,
-							NULL);
-			}
-		} else if (phy->irq_status & PHYEV_BROAD_CH) {
-			mvs_release_task(mvi, phy_no);
-			sas_ha->notify_port_event(sas_phy,
-						PORTE_BROADCAST_RCVD);
-		}
-	}
-	mvs_write_port_irq_stat(mvi, phy_no, phy->irq_status);
-}
-
-static void mvs_int_sata(struct mvs_info *mvi)
-{
-	u32 tmp;
-	void __iomem *regs = mvi->regs;
-	tmp = mr32(INT_STAT_SRS);
-	mw32(INT_STAT_SRS, tmp & 0xFFFF);
-}
-
-static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
-				u32 slot_idx)
-{
-	void __iomem *regs = mvi->regs;
-	struct domain_device *dev = task->dev;
-	struct asd_sas_port *sas_port = dev->port;
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	u32 reg_set, phy_mask;
-
-	if (!sas_protocol_ata(task->task_proto)) {
-		reg_set = 0;
-		phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-				sas_port->phy_mask;
-	} else {
-		reg_set = port->taskfileset;
-		phy_mask = sas_port->phy_mask;
-	}
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | slot_idx |
-					(TXQ_CMD_SLOT_RESET << TXQ_CMD_SHIFT) |
-					(phy_mask << TXQ_PHY_SHIFT) |
-					(reg_set << TXQ_SRS_SHIFT));
-
-	mw32(TX_PROD_IDX, mvi->tx_prod);
-	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-}
-
-static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
-			u32 slot_idx, int err)
-{
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	struct task_status_struct *tstat = &task->task_status;
-	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
-	int stat = SAM_GOOD;
-
-	resp->frame_len = sizeof(struct dev_to_host_fis);
-	memcpy(&resp->ending_fis[0],
-	       SATA_RECEIVED_D2H_FIS(port->taskfileset),
-	       sizeof(struct dev_to_host_fis));
-	tstat->buf_valid_size = sizeof(*resp);
-	if (unlikely(err))
-		stat = SAS_PROTO_RESPONSE;
-	return stat;
-}
-
-static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
-{
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	mvs_tag_clear(mvi, slot_idx);
-}
-
-static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
-			  struct mvs_slot_info *slot, u32 slot_idx)
-{
-	if (!sas_protocol_ata(task->task_proto))
-		if (slot->n_elem)
-			pci_unmap_sg(mvi->pdev, task->scatter,
-				     slot->n_elem, task->data_dir);
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_resp, 1,
-			     PCI_DMA_FROMDEVICE);
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_req, 1,
-			     PCI_DMA_TODEVICE);
-		break;
-
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SSP:
-	default:
-		/* do nothing */
-		break;
-	}
-	list_del(&slot->list);
-	task->lldd_task = NULL;
-	slot->task = NULL;
-	slot->port = NULL;
-}
-
-static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
-			 u32 slot_idx)
-{
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
-	u32 err_dw1 = le32_to_cpu(*(u32 *) (slot->response + 4));
-	int stat = SAM_CHECK_COND;
-
-	if (err_dw1 & SLOT_BSY_ERR) {
-		stat = SAS_QUEUE_FULL;
-		mvs_slot_reset(mvi, task, slot_idx);
-	}
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		break;
-	case SAS_PROTOCOL_SMP:
-		break;
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-		if (err_dw0 & TFILE_ERR)
-			stat = mvs_sata_done(mvi, task, slot_idx, 1);
-		break;
-	default:
-		break;
-	}
-
-	mvs_hexdump(16, (u8 *) slot->response, 0);
-	return stat;
-}
-
-static int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
-{
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	struct sas_task *task = slot->task;
-	struct task_status_struct *tstat;
-	struct mvs_port *port;
-	bool aborted;
-	void *to;
-
-	if (unlikely(!task || !task->lldd_task))
-		return -1;
-
-	mvs_hba_cq_dump(mvi);
-
-	spin_lock(&task->task_state_lock);
-	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
-	if (!aborted) {
-		task->task_state_flags &=
-		    ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
-		task->task_state_flags |= SAS_TASK_STATE_DONE;
-	}
-	spin_unlock(&task->task_state_lock);
-
-	if (aborted) {
-		mvs_slot_task_free(mvi, task, slot, slot_idx);
-		mvs_slot_free(mvi, rx_desc);
-		return -1;
-	}
-
-	port = slot->port;
-	tstat = &task->task_status;
-	memset(tstat, 0, sizeof(*tstat));
-	tstat->resp = SAS_TASK_COMPLETE;
-
-	if (unlikely(!port->port_attached || flags)) {
-		mvs_slot_err(mvi, task, slot_idx);
-		if (!sas_protocol_ata(task->task_proto))
-			tstat->stat = SAS_PHY_DOWN;
-		goto out;
-	}
-
-	/* error info record present */
-	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
-		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
-		goto out;
-	}
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		/* hw says status == 0, datapres == 0 */
-		if (rx_desc & RXQ_GOOD) {
-			tstat->stat = SAM_GOOD;
-			tstat->resp = SAS_TASK_COMPLETE;
-		}
-		/* response frame present */
-		else if (rx_desc & RXQ_RSP) {
-			struct ssp_response_iu *iu =
-			    slot->response + sizeof(struct mvs_err_info);
-			sas_ssp_task_response(&mvi->pdev->dev, task, iu);
-		}
-
-		/* should never happen? */
-		else
-			tstat->stat = SAM_CHECK_COND;
-		break;
-
-	case SAS_PROTOCOL_SMP: {
-			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-			tstat->stat = SAM_GOOD;
-			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
-			memcpy(to + sg_resp->offset,
-				slot->response + sizeof(struct mvs_err_info),
-				sg_dma_len(sg_resp));
-			kunmap_atomic(to, KM_IRQ0);
-			break;
-		}
-
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
-			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
-			break;
-		}
-
-	default:
-		tstat->stat = SAM_CHECK_COND;
-		break;
-	}
-
-out:
-	mvs_slot_task_free(mvi, task, slot, slot_idx);
-	if (unlikely(tstat->stat != SAS_QUEUE_FULL))
-		mvs_slot_free(mvi, rx_desc);
-
-	spin_unlock(&mvi->lock);
-	task->task_done(task);
-	spin_lock(&mvi->lock);
-	return tstat->stat;
-}
-
-static void mvs_release_task(struct mvs_info *mvi, int phy_no)
-{
-	struct list_head *pos, *n;
-	struct mvs_slot_info *slot;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct mvs_port *port = phy->port;
-	u32 rx_desc;
-
-	if (!port)
-		return;
-
-	list_for_each_safe(pos, n, &port->list) {
-		slot = container_of(pos, struct mvs_slot_info, list);
-		rx_desc = (u32) (slot - mvi->slot_info);
-		mvs_slot_complete(mvi, rx_desc, 1);
-	}
-}
-
-static void mvs_int_full(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp, stat;
-	int i;
-
-	stat = mr32(INT_STAT);
-
-	mvs_int_rx(mvi, false);
-
-	for (i = 0; i < MVS_MAX_PORTS; i++) {
-		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
-		if (tmp)
-			mvs_int_port(mvi, i, tmp);
-	}
-
-	if (stat & CINT_SRS)
-		mvs_int_sata(mvi);
-
-	mw32(INT_STAT, stat);
-}
-
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
-{
-	void __iomem *regs = mvi->regs;
-	u32 rx_prod_idx, rx_desc;
-	bool attn = false;
-	struct pci_dev *pdev = mvi->pdev;
-
-	/* the first dword in the RX ring is special: it contains
-	 * a mirror of the hardware's RX producer index, so that
-	 * we don't have to stall the CPU reading that register.
-	 * The actual RX ring is offset by one dword, due to this.
-	 */
-	rx_prod_idx = mvi->rx_cons;
-	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
-	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
-		return 0;
-
-	/* The CMPL_Q may come late, read from register and try again
-	* note: if coalescing is enabled,
-	* it will need to read from register every time for sure
-	*/
-	if (mvi->rx_cons == rx_prod_idx)
-		mvi->rx_cons = mr32(RX_CONS_IDX) & RX_RING_SZ_MASK;
-
-	if (mvi->rx_cons == rx_prod_idx)
-		return 0;
-
-	while (mvi->rx_cons != rx_prod_idx) {
-
-		/* increment our internal RX consumer pointer */
-		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
-
-		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
-
-		if (likely(rx_desc & RXQ_DONE))
-			mvs_slot_complete(mvi, rx_desc, 0);
-		if (rx_desc & RXQ_ATTN) {
-			attn = true;
-			dev_printk(KERN_DEBUG, &pdev->dev, "ATTN %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_ERR) {
-			if (!(rx_desc & RXQ_DONE))
-				mvs_slot_complete(mvi, rx_desc, 0);
-			dev_printk(KERN_DEBUG, &pdev->dev, "RXQ_ERR %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_SLOT_RESET) {
-			dev_printk(KERN_DEBUG, &pdev->dev, "Slot reset[%X]\n",
-				rx_desc);
-			mvs_slot_free(mvi, rx_desc);
-		}
-	}
-
-	if (attn && self_clear)
-		mvs_int_full(mvi);
-
-	return 0;
-}
-
-#ifdef MVS_USE_TASKLET
-static void mvs_tasklet(unsigned long data)
-{
-	struct mvs_info *mvi = (struct mvs_info *) data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mvi->lock, flags);
-
-#ifdef MVS_DISABLE_MSI
-	mvs_int_full(mvi);
-#else
-	mvs_int_rx(mvi, true);
-#endif
-	spin_unlock_irqrestore(&mvi->lock, flags);
-}
-#endif
-
-static irqreturn_t mvs_interrupt(int irq, void *opaque)
-{
-	struct mvs_info *mvi = opaque;
-	void __iomem *regs = mvi->regs;
-	u32 stat;
-
-	stat = mr32(GBL_INT_STAT);
-
-	if (stat == 0 || stat == 0xffffffff)
-		return IRQ_NONE;
-
-	/* clear CMD_CMPLT ASAP */
-	mw32_f(INT_STAT, CINT_DONE);
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_full(mvi);
-
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
-}
-
-#ifndef MVS_DISABLE_MSI
-static irqreturn_t mvs_msi_interrupt(int irq, void *opaque)
-{
-	struct mvs_info *mvi = opaque;
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_rx(mvi, true);
-
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
-}
-#endif
-
-struct mvs_task_exec_info {
-	struct sas_task *task;
-	struct mvs_cmd_hdr *hdr;
-	struct mvs_port *port;
-	u32 tag;
-	int n_elem;
-};
-
-static int mvs_task_prep_smp(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
-{
-	int elem, rc, i;
-	struct sas_task *task = tei->task;
-	struct mvs_cmd_hdr *hdr = tei->hdr;
-	struct scatterlist *sg_req, *sg_resp;
-	u32 req_len, resp_len, tag = tei->tag;
-	void *buf_tmp;
-	u8 *buf_oaf;
-	dma_addr_t buf_tmp_dma;
-	struct mvs_prd *buf_prd;
-	struct scatterlist *sg;
-	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-	struct asd_sas_port *sas_port = task->dev->port;
-	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
-#if _MV_DUMP
-	u8 *buf_cmd;
-	void *from;
-#endif
-	/*
-	 * DMA-map SMP request, response buffers
-	 */
-	sg_req = &task->smp_task.smp_req;
-	elem = pci_map_sg(mvi->pdev, sg_req, 1, PCI_DMA_TODEVICE);
-	if (!elem)
-		return -ENOMEM;
-	req_len = sg_dma_len(sg_req);
-
-	sg_resp = &task->smp_task.smp_resp;
-	elem = pci_map_sg(mvi->pdev, sg_resp, 1, PCI_DMA_FROMDEVICE);
-	if (!elem) {
-		rc = -ENOMEM;
-		goto err_out;
-	}
-	resp_len = sg_dma_len(sg_resp);
-
-	/* must be in dwords */
-	if ((req_len & 0x3) || (resp_len & 0x3)) {
-		rc = -EINVAL;
-		goto err_out_2;
-	}
-
-	/*
-	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
-	 */
-
-	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
-	buf_tmp = slot->buf;
-	buf_tmp_dma = slot->buf_dma;
-
-#if _MV_DUMP
-	buf_cmd = buf_tmp;
-	hdr->cmd_tbl = cpu_to_le64(buf_tmp_dma);
-	buf_tmp += req_len;
-	buf_tmp_dma += req_len;
-	slot->cmd_size = req_len;
-#else
-	hdr->cmd_tbl = cpu_to_le64(sg_dma_address(sg_req));
-#endif
-
-	/* region 2: open address frame area (MVS_OAF_SZ bytes) ********* */
-	buf_oaf = buf_tmp;
-	hdr->open_frame = cpu_to_le64(buf_tmp_dma);
-
-	buf_tmp += MVS_OAF_SZ;
-	buf_tmp_dma += MVS_OAF_SZ;
-
-	/* region 3: PRD table ********************************************* */
-	buf_prd = buf_tmp;
-	if (tei->n_elem)
-		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
-	else
-		hdr->prd_tbl = 0;
-
-	i = sizeof(struct mvs_prd) * tei->n_elem;
-	buf_tmp += i;
-	buf_tmp_dma += i;
-
-	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
-	slot->response = buf_tmp;
-	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
-
-	/*
-	 * Fill in TX ring and command slot header
-	 */
-	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
-					TXQ_MODE_I | tag |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT));
-
-	hdr->flags |= flags;
-	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
-	hdr->tags = cpu_to_le32(tag);
-	hdr->data_len = 0;
-
-	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (0 << 4) | 0x01; /* initiator, SMP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
-
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
-#if _MV_DUMP
-	/* copy cmd table */
-	from = kmap_atomic(sg_page(sg_req), KM_IRQ0);
-	memcpy(buf_cmd, from + sg_req->offset, req_len);
-	kunmap_atomic(from, KM_IRQ0);
-#endif
-	return 0;
-
-err_out_2:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_resp, 1,
-		     PCI_DMA_FROMDEVICE);
-err_out:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_req, 1,
-		     PCI_DMA_TODEVICE);
-	return rc;
-}
-
-static void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp, offs;
-	u8 *tfs = &port->taskfileset;
-
-	if (*tfs == MVS_ID_NOT_MAPPED)
-		return;
-
-	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
-	if (*tfs < 16) {
-		tmp = mr32(PCS);
-		mw32(PCS, tmp & ~offs);
-	} else {
-		tmp = mr32(CTL);
-		mw32(CTL, tmp & ~offs);
-	}
-
-	tmp = mr32(INT_STAT_SRS) & (1U << *tfs);
-	if (tmp)
-		mw32(INT_STAT_SRS, tmp);
-
-	*tfs = MVS_ID_NOT_MAPPED;
-}
-
-static u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port)
-{
-	int i;
-	u32 tmp, offs;
-	void __iomem *regs = mvi->regs;
-
-	if (port->taskfileset != MVS_ID_NOT_MAPPED)
-		return 0;
-
-	tmp = mr32(PCS);
-
-	for (i = 0; i < mvi->chip->srs_sz; i++) {
-		if (i == 16)
-			tmp = mr32(CTL);
-		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
-		if (!(tmp & offs)) {
-			port->taskfileset = i;
-
-			if (i < 16)
-				mw32(PCS, tmp | offs);
-			else
-				mw32(CTL, tmp | offs);
-			tmp = mr32(INT_STAT_SRS) & (1U << i);
-			if (tmp)
-				mw32(INT_STAT_SRS, tmp);
-			return 0;
-		}
-	}
-	return MVS_ID_NOT_MAPPED;
-}
-
-static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
-{
-	struct ata_queued_cmd *qc = task->uldd_task;
-
-	if (qc) {
-		if (qc->tf.command == ATA_CMD_FPDMA_WRITE ||
-			qc->tf.command == ATA_CMD_FPDMA_READ) {
-			*tag = qc->tag;
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static int mvs_task_prep_ata(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
+static int mvs_task_prep_ata(struct mvs_info *mvi,
+			     struct mvs_task_exec_info *tei)
 {
 	struct sas_task *task = tei->task;
 	struct domain_device *dev = task->dev;
@@ -2037,7 +723,7 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 	return 0;
 }
 
-static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
+int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
 {
 	struct domain_device *dev = task->dev;
 	struct mvs_info *mvi = dev->port->ha->lldd_ha;
@@ -2055,823 +741,900 @@ static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
 		dev = t->dev;
 		tei.port = &mvi->port[dev->port->id];
 
-		if (!tei.port->port_attached) {
-			if (sas_protocol_ata(t->task_proto)) {
-				rc = SAS_PHY_DOWN;
-				goto out_done;
-			} else {
-				struct task_status_struct *ts = &t->task_status;
-				ts->resp = SAS_TASK_UNDELIVERED;
-				ts->stat = SAS_PHY_DOWN;
-				t->task_done(t);
-				if (n > 1)
-					t = list_entry(t->list.next,
-							struct sas_task, list);
-				continue;
-			}
-		}
+		if (!tei.port->port_attached) {
+			if (sas_protocol_ata(t->task_proto)) {
+				rc = SAS_PHY_DOWN;
+				goto out_done;
+			} else {
+				struct task_status_struct *ts = &t->task_status;
+				ts->resp = SAS_TASK_UNDELIVERED;
+				ts->stat = SAS_PHY_DOWN;
+				t->task_done(t);
+				if (n > 1)
+					t = list_entry(t->list.next,
+							struct sas_task, list);
+				continue;
+			}
+		}
+
+		if (!sas_protocol_ata(t->task_proto)) {
+			if (t->num_scatter) {
+				n_elem = pci_map_sg(mvi->pdev, t->scatter,
+						    t->num_scatter,
+						    t->data_dir);
+				if (!n_elem) {
+					rc = -ENOMEM;
+					goto err_out;
+				}
+			}
+		} else {
+			n_elem = t->num_scatter;
+		}
+
+		rc = mvs_tag_alloc(mvi, &tag);
+		if (rc)
+			goto err_out;
+
+		slot = &mvi->slot_info[tag];
+		t->lldd_task = NULL;
+		slot->n_elem = n_elem;
+		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+		tei.task = t;
+		tei.hdr = &mvi->slot[tag];
+		tei.tag = tag;
+		tei.n_elem = n_elem;
+
+		switch (t->task_proto) {
+		case SAS_PROTOCOL_SMP:
+			rc = mvs_task_prep_smp(mvi, &tei);
+			break;
+		case SAS_PROTOCOL_SSP:
+			rc = mvs_task_prep_ssp(mvi, &tei);
+			break;
+		case SAS_PROTOCOL_SATA:
+		case SAS_PROTOCOL_STP:
+		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+			rc = mvs_task_prep_ata(mvi, &tei);
+			break;
+		default:
+			dev_printk(KERN_ERR, &pdev->dev,
+				"unknown sas_task proto: 0x%x\n",
+				t->task_proto);
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc)
+			goto err_out_tag;
+
+		slot->task = t;
+		slot->port = tei.port;
+		t->lldd_task = (void *) slot;
+		list_add_tail(&slot->list, &slot->port->list);
+		/* TODO: select normal or high priority */
+
+		spin_lock(&t->task_state_lock);
+		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
+		spin_unlock(&t->task_state_lock);
+
+		mvs_hba_memory_dump(mvi, tag, t->task_proto);
+
+		++pass;
+		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
+		if (n > 1)
+			t = list_entry(t->list.next, struct sas_task, list);
+	} while (--n);
+
+	rc = 0;
+	goto out_done;
+
+err_out_tag:
+	mvs_tag_free(mvi, tag);
+err_out:
+	dev_printk(KERN_ERR, &pdev->dev, "mvsas exec failed[%d]!\n", rc);
+	if (!sas_protocol_ata(t->task_proto))
+		if (n_elem)
+			pci_unmap_sg(mvi->pdev, t->scatter, n_elem,
+				     t->data_dir);
+out_done:
+	if (pass)
+		mw32(TX_PROD_IDX, (mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
+	spin_unlock_irqrestore(&mvi->lock, flags);
+	return rc;
+}
+
+static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
+{
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	mvs_tag_clear(mvi, slot_idx);
+}
+
+static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
+			  struct mvs_slot_info *slot, u32 slot_idx)
+{
+	if (!sas_protocol_ata(task->task_proto))
+		if (slot->n_elem)
+			pci_unmap_sg(mvi->pdev, task->scatter,
+				     slot->n_elem, task->data_dir);
+
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_resp, 1,
+			     PCI_DMA_FROMDEVICE);
+		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_req, 1,
+			     PCI_DMA_TODEVICE);
+		break;
+
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SSP:
+	default:
+		/* do nothing */
+		break;
+	}
+	list_del(&slot->list);
+	task->lldd_task = NULL;
+	slot->task = NULL;
+	slot->port = NULL;
+}
+
+static void mvs_update_wideport(struct mvs_info *mvi, int i)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct mvs_port *port = phy->port;
+	int j, no;
+
+	for_each_phy(port->wide_port_phymap, no, j, mvi->chip->n_phy)
+		if (no & 1) {
+			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
+			mvs_write_port_cfg_data(mvi, no,
+						port->wide_port_phymap);
+		} else {
+			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
+			mvs_write_port_cfg_data(mvi, no, 0);
+		}
+}
+
+static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
+{
+	u32 tmp;
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct mvs_port *port = phy->port;;
+
+	tmp = mvs_read_phy_ctl(mvi, i);
+
+	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
+		if (!port)
+			phy->phy_attached = 1;
+		return tmp;
+	}
+
+	if (port) {
+		if (phy->phy_type & PORT_TYPE_SAS) {
+			port->wide_port_phymap &= ~(1U << i);
+			if (!port->wide_port_phymap)
+				port->port_attached = 0;
+			mvs_update_wideport(mvi, i);
+		} else if (phy->phy_type & PORT_TYPE_SATA)
+			port->port_attached = 0;
+		mvs_free_reg_set(mvi, phy->port);
+		phy->port = NULL;
+		phy->phy_attached = 0;
+		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	}
+	return 0;
+}
+
+static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
+{
+	u32 *s = (u32 *) buf;
+
+	if (!s)
+		return NULL;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
+	s[3] = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
+	s[2] = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
+	s[1] = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
+	s[0] = mvs_read_port_cfg_data(mvi, i);
+
+	return (void *)s;
+}
+
+static u32 mvs_is_sig_fis_received(u32 irq_status)
+{
+	return irq_status & PHYEV_SIG_FIS;
+}
+
+static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
+					int get_st)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct pci_dev *pdev = mvi->pdev;
+	u32 tmp;
+	u64 tmp64;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
+	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
 
-		if (!sas_protocol_ata(t->task_proto)) {
-			if (t->num_scatter) {
-				n_elem = pci_map_sg(mvi->pdev, t->scatter,
-						    t->num_scatter,
-						    t->data_dir);
-				if (!n_elem) {
-					rc = -ENOMEM;
-					goto err_out;
-				}
-			}
-		} else {
-			n_elem = t->num_scatter;
-		}
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
+	phy->dev_sas_addr = (u64) mvs_read_port_cfg_data(mvi, i) << 32;
 
-		rc = mvs_tag_alloc(mvi, &tag);
-		if (rc)
-			goto err_out;
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
+	phy->dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
 
-		slot = &mvi->slot_info[tag];
-		t->lldd_task = NULL;
-		slot->n_elem = n_elem;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
-		tei.task = t;
-		tei.hdr = &mvi->slot[tag];
-		tei.tag = tag;
-		tei.n_elem = n_elem;
+	if (get_st) {
+		phy->irq_status = mvs_read_port_irq_stat(mvi, i);
+		phy->phy_status = mvs_is_phy_ready(mvi, i);
+	}
 
-		switch (t->task_proto) {
-		case SAS_PROTOCOL_SMP:
-			rc = mvs_task_prep_smp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SSP:
-			rc = mvs_task_prep_ssp(mvi, &tei);
-			break;
-		case SAS_PROTOCOL_SATA:
-		case SAS_PROTOCOL_STP:
-		case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-			rc = mvs_task_prep_ata(mvi, &tei);
-			break;
-		default:
-			dev_printk(KERN_ERR, &pdev->dev,
-				"unknown sas_task proto: 0x%x\n",
-				t->task_proto);
-			rc = -EINVAL;
-			break;
-		}
+	if (phy->phy_status) {
+		u32 phy_st;
+		struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
 
-		if (rc)
-			goto err_out_tag;
+		mvs_write_port_cfg_addr(mvi, i, PHYR_PHY_STAT);
+		phy_st = mvs_read_port_cfg_data(mvi, i);
 
-		slot->task = t;
-		slot->port = tei.port;
-		t->lldd_task = (void *) slot;
-		list_add_tail(&slot->list, &slot->port->list);
-		/* TODO: select normal or high priority */
+		sas_phy->linkrate =
+			(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+				PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
+		phy->minimum_linkrate =
+			(phy->phy_status &
+				PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
+		phy->maximum_linkrate =
+			(phy->phy_status &
+				PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
 
-		spin_lock(&t->task_state_lock);
-		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
-		spin_unlock(&t->task_state_lock);
+		if (phy->phy_type & PORT_TYPE_SAS) {
+			/* Updated attached_sas_addr */
+			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
+			phy->att_dev_sas_addr =
+				(u64) mvs_read_port_cfg_data(mvi, i) << 32;
+			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
+			phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
+			phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
+			phy->identify.device_type =
+			    phy->att_dev_info & PORT_DEV_TYPE_MASK;
 
-		mvs_hba_memory_dump(mvi, tag, t->task_proto);
+			if (phy->identify.device_type == SAS_END_DEV)
+				phy->identify.target_port_protocols =
+							SAS_PROTOCOL_SSP;
+			else if (phy->identify.device_type != NO_DEVICE)
+				phy->identify.target_port_protocols =
+							SAS_PROTOCOL_SMP;
+			if (phy_st & PHY_OOB_DTCTD)
+				sas_phy->oob_mode = SAS_OOB_MODE;
+			phy->frame_rcvd_size =
+			    sizeof(struct sas_identify_frame);
+		} else if (phy->phy_type & PORT_TYPE_SATA) {
+			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
+			if (mvs_is_sig_fis_received(phy->irq_status)) {
+				phy->att_dev_sas_addr = i;	/* temp */
+				if (phy_st & PHY_OOB_DTCTD)
+					sas_phy->oob_mode = SATA_OOB_MODE;
+				phy->frame_rcvd_size =
+				    sizeof(struct dev_to_host_fis);
+				mvs_get_d2h_reg(mvi, i,
+						(void *)sas_phy->frame_rcvd);
+			} else {
+				dev_printk(KERN_DEBUG, &pdev->dev,
+					"No sig fis\n");
+				phy->phy_type &= ~(PORT_TYPE_SATA);
+				goto out_done;
+			}
+		}
+		tmp64 = cpu_to_be64(phy->att_dev_sas_addr);
+		memcpy(sas_phy->attached_sas_addr, &tmp64, SAS_ADDR_SIZE);
 
-		++pass;
-		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
-		if (n > 1)
-			t = list_entry(t->list.next, struct sas_task, list);
-	} while (--n);
+		dev_printk(KERN_DEBUG, &pdev->dev,
+			"phy[%d] Get Attached Address 0x%llX ,"
+			" SAS Address 0x%llX\n",
+			i,
+			(unsigned long long)phy->att_dev_sas_addr,
+			(unsigned long long)phy->dev_sas_addr);
+		dev_printk(KERN_DEBUG, &pdev->dev,
+			"Rate = %x , type = %d\n",
+			sas_phy->linkrate, phy->phy_type);
 
-	rc = 0;
-	goto out_done;
+		/* workaround for HW phy decoding error on 1.5g disk drive */
+		mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
+		tmp = mvs_read_port_vsr_data(mvi, i);
+		if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+		     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
+			SAS_LINK_RATE_1_5_GBPS)
+			tmp &= ~PHY_MODE6_LATECLK;
+		else
+			tmp |= PHY_MODE6_LATECLK;
+		mvs_write_port_vsr_data(mvi, i, tmp);
 
-err_out_tag:
-	mvs_tag_free(mvi, tag);
-err_out:
-	dev_printk(KERN_ERR, &pdev->dev, "mvsas exec failed[%d]!\n", rc);
-	if (!sas_protocol_ata(t->task_proto))
-		if (n_elem)
-			pci_unmap_sg(mvi->pdev, t->scatter, n_elem,
-				     t->data_dir);
+	}
 out_done:
-	if (pass)
-		mw32(TX_PROD_IDX, (mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
-	spin_unlock_irqrestore(&mvi->lock, flags);
-	return rc;
+	if (get_st)
+		mvs_write_port_irq_stat(mvi, i, phy->irq_status);
 }
 
-static int mvs_task_abort(struct sas_task *task)
+void mvs_port_formed(struct asd_sas_phy *sas_phy)
 {
-	int rc;
+	struct sas_ha_struct *sas_ha = sas_phy->ha;
+	struct mvs_info *mvi = sas_ha->lldd_ha;
+	struct asd_sas_port *sas_port = sas_phy->port;
+	struct mvs_phy *phy = sas_phy->lldd_phy;
+	struct mvs_port *port = &mvi->port[sas_port->id];
 	unsigned long flags;
-	struct mvs_info *mvi = task->dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	int tag;
 
-	spin_lock_irqsave(&task->task_state_lock, flags);
-	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
-		rc = TMF_RESP_FUNC_COMPLETE;
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		goto out_done;
+	spin_lock_irqsave(&mvi->lock, flags);
+	port->port_attached = 1;
+	phy->port = port;
+	port->taskfileset = MVS_ID_NOT_MAPPED;
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		port->wide_port_phymap = sas_port->phy_mask;
+		mvs_update_wideport(mvi, sas_phy->id);
 	}
-	spin_unlock_irqrestore(&task->task_state_lock, flags);
+	spin_unlock_irqrestore(&mvi->lock, flags);
+}
+
+int mvs_I_T_nexus_reset(struct domain_device *dev)
+{
+	return TMF_RESP_FUNC_FAILED;
+}
+
+static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
+			u32 slot_idx, int err)
+{
+	struct mvs_port *port = mvi->slot_info[slot_idx].port;
+	struct task_status_struct *tstat = &task->task_status;
+	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
+	int stat = SAM_GOOD;
+
+	resp->frame_len = sizeof(struct dev_to_host_fis);
+	memcpy(&resp->ending_fis[0],
+	       SATA_RECEIVED_D2H_FIS(port->taskfileset),
+	       sizeof(struct dev_to_host_fis));
+	tstat->buf_valid_size = sizeof(*resp);
+	if (unlikely(err))
+		stat = SAS_PROTO_RESPONSE;
+	return stat;
+}
+
+static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
+			 u32 slot_idx)
+{
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
+	u32 err_dw1 = le32_to_cpu(*(u32 *) (slot->response + 4));
+	int stat = SAM_CHECK_COND;
 
+	if (err_dw1 & SLOT_BSY_ERR) {
+		stat = SAS_QUEUE_FULL;
+		mvs_slot_reset(mvi, task, slot_idx);
+	}
 	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SMP Abort! \n");
-		break;
 	case SAS_PROTOCOL_SSP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SSP Abort! \n");
+		break;
+	case SAS_PROTOCOL_SMP:
 		break;
 	case SAS_PROTOCOL_SATA:
 	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:{
-		dev_printk(KERN_DEBUG, &pdev->dev, "STP Abort! \n");
-#if _MV_DUMP
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump D2H FIS: \n");
-		mvs_hexdump(sizeof(struct host_to_dev_fis),
-				(void *)&task->ata_task.fis, 0);
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump ATAPI Cmd : \n");
-		mvs_hexdump(16, task->ata_task.atapi_packet, 0);
-#endif
-		spin_lock_irqsave(&task->task_state_lock, flags);
-		if (task->task_state_flags & SAS_TASK_NEED_DEV_RESET) {
-			/* TODO */
-			;
-		}
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+		if (err_dw0 & TFILE_ERR)
+			stat = mvs_sata_done(mvi, task, slot_idx, 1);
 		break;
-	}
 	default:
 		break;
 	}
 
-	if (mvs_find_tag(mvi, task, &tag)) {
-		spin_lock_irqsave(&mvi->lock, flags);
-		mvs_slot_task_free(mvi, task, &mvi->slot_info[tag], tag);
-		spin_unlock_irqrestore(&mvi->lock, flags);
-	}
-	if (!mvs_task_exec(task, 1, GFP_ATOMIC))
-		rc = TMF_RESP_FUNC_COMPLETE;
-	else
-		rc = TMF_RESP_FUNC_FAILED;
-out_done:
-	return rc;
+	mvs_hexdump(16, (u8 *) slot->response, 0);
+	return stat;
 }
 
-static void mvs_free(struct mvs_info *mvi)
+static int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 {
-	int i;
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	struct sas_task *task = slot->task;
+	struct task_status_struct *tstat;
+	struct mvs_port *port;
+	bool aborted;
+	void *to;
 
-	if (!mvi)
-		return;
+	if (unlikely(!task || !task->lldd_task))
+		return -1;
 
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
+	mvs_hba_cq_dump(mvi);
 
-		if (slot->buf)
-			dma_free_coherent(&mvi->pdev->dev, MVS_SLOT_BUF_SZ,
-					  slot->buf, slot->buf_dma);
+	spin_lock(&task->task_state_lock);
+	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
+	if (!aborted) {
+		task->task_state_flags &=
+		    ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
+		task->task_state_flags |= SAS_TASK_STATE_DONE;
 	}
+	spin_unlock(&task->task_state_lock);
 
-	if (mvi->tx)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
-				  mvi->tx, mvi->tx_dma);
-	if (mvi->rx_fis)
-		dma_free_coherent(&mvi->pdev->dev, MVS_RX_FISL_SZ,
-				  mvi->rx_fis, mvi->rx_fis_dma);
-	if (mvi->rx)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
-				  mvi->rx, mvi->rx_dma);
-	if (mvi->slot)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->slot) * MVS_SLOTS,
-				  mvi->slot, mvi->slot_dma);
-#ifdef MVS_ENABLE_PERI
-	if (mvi->peri_regs)
-		iounmap(mvi->peri_regs);
-#endif
-	if (mvi->regs)
-		iounmap(mvi->regs);
-	if (mvi->shost)
-		scsi_host_put(mvi->shost);
-	kfree(mvi->sas.sas_port);
-	kfree(mvi->sas.sas_phy);
-	kfree(mvi);
-}
+	if (aborted) {
+		mvs_slot_task_free(mvi, task, slot, slot_idx);
+		mvs_slot_free(mvi, rx_desc);
+		return -1;
+	}
 
-/* FIXME: locking? */
-static int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
-			   void *funcdata)
-{
-	struct mvs_info *mvi = sas_phy->ha->lldd_ha;
-	int rc = 0, phy_id = sas_phy->id;
-	u32 tmp;
+	port = slot->port;
+	tstat = &task->task_status;
+	memset(tstat, 0, sizeof(*tstat));
+	tstat->resp = SAS_TASK_COMPLETE;
 
-	tmp = mvs_read_phy_ctl(mvi, phy_id);
+	if (unlikely(!port->port_attached || flags)) {
+		mvs_slot_err(mvi, task, slot_idx);
+		if (!sas_protocol_ata(task->task_proto))
+			tstat->stat = SAS_PHY_DOWN;
+		goto out;
+	}
 
-	switch (func) {
-	case PHY_FUNC_SET_LINK_RATE:{
-			struct sas_phy_linkrates *rates = funcdata;
-			u32 lrmin = 0, lrmax = 0;
+	/* error info record present */
+	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
+		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
+		goto out;
+	}
 
-			lrmin = (rates->minimum_linkrate << 8);
-			lrmax = (rates->maximum_linkrate << 12);
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SSP:
+		/* hw says status == 0, datapres == 0 */
+		if (rx_desc & RXQ_GOOD) {
+			tstat->stat = SAM_GOOD;
+			tstat->resp = SAS_TASK_COMPLETE;
+		}
+		/* response frame present */
+		else if (rx_desc & RXQ_RSP) {
+			struct ssp_response_iu *iu =
+			    slot->response + sizeof(struct mvs_err_info);
+			sas_ssp_task_response(&mvi->pdev->dev, task, iu);
+		}
 
-			if (lrmin) {
-				tmp &= ~(0xf << 8);
-				tmp |= lrmin;
-			}
-			if (lrmax) {
-				tmp &= ~(0xf << 12);
-				tmp |= lrmax;
-			}
-			mvs_write_phy_ctl(mvi, phy_id, tmp);
+		/* should never happen? */
+		else
+			tstat->stat = SAM_CHECK_COND;
+		break;
+
+	case SAS_PROTOCOL_SMP: {
+			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
+			tstat->stat = SAM_GOOD;
+			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
+			memcpy(to + sg_resp->offset,
+				slot->response + sizeof(struct mvs_err_info),
+				sg_dma_len(sg_resp));
+			kunmap_atomic(to, KM_IRQ0);
 			break;
 		}
 
-	case PHY_FUNC_HARD_RESET:
-		if (tmp & PHY_RST_HARD)
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
+			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
 			break;
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST_HARD);
-		break;
-
-	case PHY_FUNC_LINK_RESET:
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST);
-		break;
+		}
 
-	case PHY_FUNC_DISABLE:
-	case PHY_FUNC_RELEASE_SPINUP_HOLD:
 	default:
-		rc = -EOPNOTSUPP;
+		tstat->stat = SAM_CHECK_COND;
+		break;
 	}
 
-	return rc;
-}
-
-static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
-{
-	struct mvs_phy *phy = &mvi->phy[phy_id];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+out:
+	mvs_slot_task_free(mvi, task, slot, slot_idx);
+	if (unlikely(tstat->stat != SAS_QUEUE_FULL))
+		mvs_slot_free(mvi, rx_desc);
 
-	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
-	sas_phy->class = SAS;
-	sas_phy->iproto = SAS_PROTOCOL_ALL;
-	sas_phy->tproto = 0;
-	sas_phy->type = PHY_TYPE_PHYSICAL;
-	sas_phy->role = PHY_ROLE_INITIATOR;
-	sas_phy->oob_mode = OOB_NOT_CONNECTED;
-	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
-
-	sas_phy->id = phy_id;
-	sas_phy->sas_addr = &mvi->sas_addr[0];
-	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
-	sas_phy->ha = &mvi->sas;
-	sas_phy->lldd_phy = phy;
+	spin_unlock(&mvi->lock);
+	task->task_done(task);
+	spin_lock(&mvi->lock);
+	return tstat->stat;
 }
 
-static struct mvs_info *__devinit mvs_alloc(struct pci_dev *pdev,
-					    const struct pci_device_id *ent)
+static void mvs_release_task(struct mvs_info *mvi, int phy_no)
 {
-	struct mvs_info *mvi;
-	unsigned long res_start, res_len, res_flag;
-	struct asd_sas_phy **arr_phy;
-	struct asd_sas_port **arr_port;
-	const struct mvs_chip_info *chip = &mvs_chips[ent->driver_data];
-	int i;
-
-	/*
-	 * alloc and init our per-HBA mvs_info struct
-	 */
-
-	mvi = kzalloc(sizeof(*mvi), GFP_KERNEL);
-	if (!mvi)
-		return NULL;
-
-	spin_lock_init(&mvi->lock);
-#ifdef MVS_USE_TASKLET
-	tasklet_init(&mvi->tasklet, mvs_tasklet, (unsigned long)mvi);
-#endif
-	mvi->pdev = pdev;
-	mvi->chip = chip;
-
-	if (pdev->device == 0x6440 && pdev->revision == 0)
-		mvi->flags |= MVF_PHY_PWR_FIX;
-
-	/*
-	 * alloc and init SCSI, SAS glue
-	 */
-
-	mvi->shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
-	if (!mvi->shost)
-		goto err_out;
+	struct list_head *pos, *n;
+	struct mvs_slot_info *slot;
+	struct mvs_phy *phy = &mvi->phy[phy_no];
+	struct mvs_port *port = phy->port;
+	u32 rx_desc;
 
-	arr_phy = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	arr_port = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	if (!arr_phy || !arr_port)
-		goto err_out;
+	if (!port)
+		return;
 
-	for (i = 0; i < MVS_MAX_PHYS; i++) {
-		mvs_phy_init(mvi, i);
-		arr_phy[i] = &mvi->phy[i].sas_phy;
-		arr_port[i] = &mvi->port[i].sas_port;
-		mvi->port[i].taskfileset = MVS_ID_NOT_MAPPED;
-		mvi->port[i].wide_port_phymap = 0;
-		mvi->port[i].port_attached = 0;
-		INIT_LIST_HEAD(&mvi->port[i].list);
+	list_for_each_safe(pos, n, &port->list) {
+		slot = container_of(pos, struct mvs_slot_info, list);
+		rx_desc = (u32) (slot - mvi->slot_info);
+		mvs_slot_complete(mvi, rx_desc, 1);
 	}
+}
 
-	SHOST_TO_SAS_HA(mvi->shost) = &mvi->sas;
-	mvi->shost->transportt = mvs_stt;
-	mvi->shost->max_id = 21;
-	mvi->shost->max_lun = ~0;
-	mvi->shost->max_channel = 0;
-	mvi->shost->max_cmd_len = 16;
-
-	mvi->sas.sas_ha_name = DRV_NAME;
-	mvi->sas.dev = &pdev->dev;
-	mvi->sas.lldd_module = THIS_MODULE;
-	mvi->sas.sas_addr = &mvi->sas_addr[0];
-	mvi->sas.sas_phy = arr_phy;
-	mvi->sas.sas_port = arr_port;
-	mvi->sas.num_phys = chip->n_phy;
-	mvi->sas.lldd_max_execute_num = 1;
-	mvi->sas.lldd_queue_size = MVS_QUEUE_SIZE;
-	mvi->shost->can_queue = MVS_CAN_QUEUE;
-	mvi->shost->cmd_per_lun = MVS_SLOTS / mvi->sas.num_phys;
-	mvi->sas.lldd_ha = mvi;
-	mvi->sas.core.shost = mvi->shost;
-
-	mvs_tag_init(mvi);
-
-	/*
-	 * ioremap main and peripheral registers
-	 */
-
-#ifdef MVS_ENABLE_PERI
-	res_start = pci_resource_start(pdev, 2);
-	res_len = pci_resource_len(pdev, 2);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	mvi->peri_regs = ioremap_nocache(res_start, res_len);
-	if (!mvi->peri_regs)
-		goto err_out;
-#endif
-
-	res_start = pci_resource_start(pdev, 4);
-	res_len = pci_resource_len(pdev, 4);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	res_flag = pci_resource_flags(pdev, 4);
-	if (res_flag & IORESOURCE_CACHEABLE)
-		mvi->regs = ioremap(res_start, res_len);
-	else
-		mvi->regs = ioremap_nocache(res_start, res_len);
-
-	if (!mvi->regs)
-		goto err_out;
+static void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
+{
+	struct pci_dev *pdev = mvi->pdev;
+	struct sas_ha_struct *sas_ha = &mvi->sas;
+	struct mvs_phy *phy = &mvi->phy[phy_no];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
 
+	phy->irq_status = mvs_read_port_irq_stat(mvi, phy_no);
 	/*
-	 * alloc and init our DMA areas
-	 */
-
-	mvi->tx = dma_alloc_coherent(&pdev->dev,
-				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
-				     &mvi->tx_dma, GFP_KERNEL);
-	if (!mvi->tx)
-		goto err_out;
-	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
-
-	mvi->rx_fis = dma_alloc_coherent(&pdev->dev, MVS_RX_FISL_SZ,
-					 &mvi->rx_fis_dma, GFP_KERNEL);
-	if (!mvi->rx_fis)
-		goto err_out;
-	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
-
-	mvi->rx = dma_alloc_coherent(&pdev->dev,
-				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
-				     &mvi->rx_dma, GFP_KERNEL);
-	if (!mvi->rx)
-		goto err_out;
-	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
-
-	mvi->rx[0] = cpu_to_le32(0xfff);
-	mvi->rx_cons = 0xfff;
-
-	mvi->slot = dma_alloc_coherent(&pdev->dev,
-				       sizeof(*mvi->slot) * MVS_SLOTS,
-				       &mvi->slot_dma, GFP_KERNEL);
-	if (!mvi->slot)
-		goto err_out;
-	memset(mvi->slot, 0, sizeof(*mvi->slot) * MVS_SLOTS);
+	* events is port event now ,
+	* we need check the interrupt status which belongs to per port.
+	*/
+	dev_printk(KERN_DEBUG, &pdev->dev,
+		"Port %d Event = %X\n",
+		phy_no, phy->irq_status);
 
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
+	if (phy->irq_status & (PHYEV_POOF | PHYEV_DEC_ERR)) {
+		mvs_release_task(mvi, phy_no);
+		if (!mvs_is_phy_ready(mvi, phy_no)) {
+			sas_phy_disconnected(sas_phy);
+			sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
+			dev_printk(KERN_INFO, &pdev->dev,
+				"Port %d Unplug Notice\n", phy_no);
 
-		slot->buf = dma_alloc_coherent(&pdev->dev, MVS_SLOT_BUF_SZ,
-					       &slot->buf_dma, GFP_KERNEL);
-		if (!slot->buf)
-			goto err_out;
-		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+		} else
+			mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET, NULL);
 	}
+	if (!(phy->irq_status & PHYEV_DEC_ERR)) {
+		if (phy->irq_status & PHYEV_COMWAKE) {
+			u32 tmp = mvs_read_port_irq_mask(mvi, phy_no);
+			mvs_write_port_irq_mask(mvi, phy_no,
+						tmp | PHYEV_SIG_FIS);
+		}
+		if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
+			phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
+			if (phy->phy_status) {
+				mvs_detect_porttype(mvi, phy_no);
 
-	/* finally, read NVRAM to get our SAS address */
-	if (mvs_nvram_read(mvi, NVR_SAS_ADDR, &mvi->sas_addr, 8))
-		goto err_out;
-	return mvi;
-
-err_out:
-	mvs_free(mvi);
-	return NULL;
-}
-
-static u32 mvs_cr32(void __iomem *regs, u32 addr)
-{
-	mw32(CMD_ADDR, addr);
-	return mr32(CMD_DATA);
-}
-
-static void mvs_cw32(void __iomem *regs, u32 addr, u32 val)
-{
-	mw32(CMD_ADDR, addr);
-	mw32(CMD_DATA, val);
-}
+				if (phy->phy_type & PORT_TYPE_SATA) {
+					u32 tmp = mvs_read_port_irq_mask(mvi,
+								phy_no);
+					tmp &= ~PHYEV_SIG_FIS;
+					mvs_write_port_irq_mask(mvi,
+								phy_no, tmp);
+				}
 
-static u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
-{
-	void __iomem *regs = mvi->regs;
-	return (port < 4)?mr32(P0_SER_CTLSTAT + port * 4):
-		mr32(P4_SER_CTLSTAT + (port - 4) * 4);
+				mvs_update_phyinfo(mvi, phy_no, 0);
+				sas_ha->notify_phy_event(sas_phy,
+							PHYE_OOB_DONE);
+				mvs_bytes_dmaed(mvi, phy_no);
+			} else {
+				dev_printk(KERN_DEBUG, &pdev->dev,
+					"plugin interrupt but phy is gone\n");
+				mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET,
+							NULL);
+			}
+		} else if (phy->irq_status & PHYEV_BROAD_CH) {
+			mvs_release_task(mvi, phy_no);
+			sas_ha->notify_port_event(sas_phy,
+						PORTE_BROADCAST_RCVD);
+		}
+	}
+	mvs_write_port_irq_stat(mvi, phy_no, phy->irq_status);
 }
 
-static void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
+static int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
 {
 	void __iomem *regs = mvi->regs;
-	if (port < 4)
-		mw32(P0_SER_CTLSTAT + port * 4, val);
-	else
-		mw32(P4_SER_CTLSTAT + (port - 4) * 4, val);
-}
-
-static u32 mvs_read_port(struct mvs_info *mvi, u32 off, u32 off2, u32 port)
-{
-	void __iomem *regs = mvi->regs + off;
-	void __iomem *regs2 = mvi->regs + off2;
-	return (port < 4)?readl(regs + port * 8):
-		readl(regs2 + (port - 4) * 8);
-}
+	u32 rx_prod_idx, rx_desc;
+	bool attn = false;
+	struct pci_dev *pdev = mvi->pdev;
 
-static void mvs_write_port(struct mvs_info *mvi, u32 off, u32 off2,
-				u32 port, u32 val)
-{
-	void __iomem *regs = mvi->regs + off;
-	void __iomem *regs2 = mvi->regs + off2;
-	if (port < 4)
-		writel(val, regs + port * 8);
-	else
-		writel(val, regs2 + (port - 4) * 8);
-}
+	/* the first dword in the RX ring is special: it contains
+	 * a mirror of the hardware's RX producer index, so that
+	 * we don't have to stall the CPU reading that register.
+	 * The actual RX ring is offset by one dword, due to this.
+	 */
+	rx_prod_idx = mvi->rx_cons;
+	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
+	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
+		return 0;
 
-static u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port);
-}
+	/* The CMPL_Q may come late, read from register and try again
+	* note: if coalescing is enabled,
+	* it will need to read from register every time for sure
+	*/
+	if (mvi->rx_cons == rx_prod_idx)
+		mvi->rx_cons = mr32(RX_CONS_IDX) & RX_RING_SZ_MASK;
 
-static void mvs_write_port_cfg_data(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_CFG_DATA, MVS_P4_CFG_DATA, port, val);
-}
+	if (mvi->rx_cons == rx_prod_idx)
+		return 0;
 
-static void mvs_write_port_cfg_addr(struct mvs_info *mvi, u32 port, u32 addr)
-{
-	mvs_write_port(mvi, MVS_P0_CFG_ADDR, MVS_P4_CFG_ADDR, port, addr);
-}
+	while (mvi->rx_cons != rx_prod_idx) {
 
-static u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port);
-}
+		/* increment our internal RX consumer pointer */
+		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
 
-static void mvs_write_port_vsr_data(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_VSR_DATA, MVS_P4_VSR_DATA, port, val);
-}
+		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
 
-static void mvs_write_port_vsr_addr(struct mvs_info *mvi, u32 port, u32 addr)
-{
-	mvs_write_port(mvi, MVS_P0_VSR_ADDR, MVS_P4_VSR_ADDR, port, addr);
-}
+		if (likely(rx_desc & RXQ_DONE))
+			mvs_slot_complete(mvi, rx_desc, 0);
+		if (rx_desc & RXQ_ATTN) {
+			attn = true;
+			dev_printk(KERN_DEBUG, &pdev->dev, "ATTN %X\n",
+				rx_desc);
+		} else if (rx_desc & RXQ_ERR) {
+			if (!(rx_desc & RXQ_DONE))
+				mvs_slot_complete(mvi, rx_desc, 0);
+			dev_printk(KERN_DEBUG, &pdev->dev, "RXQ_ERR %X\n",
+				rx_desc);
+		} else if (rx_desc & RXQ_SLOT_RESET) {
+			dev_printk(KERN_DEBUG, &pdev->dev, "Slot reset[%X]\n",
+				rx_desc);
+			mvs_slot_free(mvi, rx_desc);
+		}
+	}
 
-static u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
-{
-	return mvs_read_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port);
-}
+	if (attn && self_clear)
+		mvs_int_full(mvi);
 
-static void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_INT_STAT, MVS_P4_INT_STAT, port, val);
+	return 0;
 }
 
-static u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
+#ifndef MVS_DISABLE_NVRAM
+static int mvs_eep_read(void __iomem *regs, u32 addr, u32 *data)
 {
-	return mvs_read_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port);
-}
+	int timeout = 1000;
 
-static void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val)
-{
-	mvs_write_port(mvi, MVS_P0_INT_MASK, MVS_P4_INT_MASK, port, val);
-}
+	if (addr & ~SPI_ADDR_MASK)
+		return -EINVAL;
 
-static void __devinit mvs_phy_hacks(struct mvs_info *mvi)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
+	writel(addr, regs + SPI_CMD);
+	writel(TWSI_RD, regs + SPI_CTL);
 
-	/* workaround for SATA R-ERR, to ignore phy glitch */
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= ~(1 << 9);
-	tmp |= (1 << 10);
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+	while (timeout-- > 0) {
+		if (readl(regs + SPI_CTL) & TWSI_RDY) {
+			*data = readl(regs + SPI_DATA);
+			return 0;
+		}
 
-	/* enable retry 127 times */
-	mvs_cw32(regs, CMD_SAS_CTL1, 0x7f7f);
+		udelay(10);
+	}
 
-	/* extend open frame timeout to max */
-	tmp = mvs_cr32(regs, CMD_SAS_CTL0);
-	tmp &= ~0xffff;
-	tmp |= 0x3fff;
-	mvs_cw32(regs, CMD_SAS_CTL0, tmp);
+	return -EBUSY;
+}
 
-	/* workaround for WDTIMEOUT , set to 550 ms */
-	mvs_cw32(regs, CMD_WD_TIMER, 0x86470);
+static int mvs_eep_read_buf(void __iomem *regs, u32 addr,
+			    void *buf, u32 buflen)
+{
+	u32 addr_end, tmp_addr, i, j;
+	u32 tmp = 0;
+	int rc;
+	u8 *tmp8, *buf8 = buf;
 
-	/* not to halt for different port op during wideport link change */
-	mvs_cw32(regs, CMD_APP_ERR_CONFIG, 0xffefbf7d);
+	addr_end = addr + buflen;
+	tmp_addr = ALIGN(addr, 4);
+	if (addr > 0xff)
+		return -EINVAL;
 
-	/* workaround for Seagate disk not-found OOB sequence, recv
-	 * COMINIT before sending out COMWAKE */
-	tmp = mvs_cr32(regs, CMD_PHY_MODE_21);
-	tmp &= 0x0000ffff;
-	tmp |= 0x00fa0000;
-	mvs_cw32(regs, CMD_PHY_MODE_21, tmp);
+	j = addr & 0x3;
+	if (j) {
+		rc = mvs_eep_read(regs, tmp_addr, &tmp);
+		if (rc)
+			return rc;
 
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= 0x1fffffff;
-	tmp |= (2U << 29);	/* 8 ms retry */
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+		tmp8 = (u8 *)&tmp;
+		for (i = j; i < 4; i++)
+			*buf8++ = tmp8[i];
 
-	/* TEST - for phy decoding error, adjust voltage levels */
-	mw32(P0_VSR_ADDR + 0, 0x8);
-	mw32(P0_VSR_DATA + 0, 0x2F0);
+		tmp_addr += 4;
+	}
 
-	mw32(P0_VSR_ADDR + 8, 0x8);
-	mw32(P0_VSR_DATA + 8, 0x2F0);
+	for (j = ALIGN(addr_end, 4); tmp_addr < j; tmp_addr += 4) {
+		rc = mvs_eep_read(regs, tmp_addr, &tmp);
+		if (rc)
+			return rc;
 
-	mw32(P0_VSR_ADDR + 16, 0x8);
-	mw32(P0_VSR_DATA + 16, 0x2F0);
+		memcpy(buf8, &tmp, 4);
+		buf8 += 4;
+	}
 
-	mw32(P0_VSR_ADDR + 24, 0x8);
-	mw32(P0_VSR_DATA + 24, 0x2F0);
+	if (tmp_addr < addr_end) {
+		rc = mvs_eep_read(regs, tmp_addr, &tmp);
+		if (rc)
+			return rc;
 
-}
+		tmp8 = (u8 *)&tmp;
+		j = addr_end - tmp_addr;
+		for (i = 0; i < j; i++)
+			*buf8++ = tmp8[i];
 
-static void mvs_enable_xmt(struct mvs_info *mvi, int PhyId)
-{
-	void __iomem *regs = mvi->regs;
-	u32 tmp;
+		tmp_addr += 4;
+	}
 
-	tmp = mr32(PCS);
-	if (mvi->chip->n_phy <= 4)
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT);
-	else
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT2);
-	mw32(PCS, tmp);
+	return 0;
 }
+#endif
 
-static void mvs_detect_porttype(struct mvs_info *mvi, int i)
+int mvs_nvram_read(struct mvs_info *mvi, u32 addr, void *buf, u32 buflen)
 {
+#ifndef MVS_DISABLE_NVRAM
 	void __iomem *regs = mvi->regs;
-	u32 reg;
-	struct mvs_phy *phy = &mvi->phy[i];
-
-	/* TODO check & save device type */
-	reg = mr32(GBL_PORT_TYPE);
-
-	if (reg & MODE_SAS_SATA & (1 << i))
-		phy->phy_type |= PORT_TYPE_SAS;
-	else
-		phy->phy_type |= PORT_TYPE_SATA;
-}
-
-static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
-{
-	u32 *s = (u32 *) buf;
-
-	if (!s)
-		return NULL;
+	int rc, i;
+	u32 sum;
+	u8 hdr[2], *tmp;
+	const char *msg;
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
-	s[3] = mvs_read_port_cfg_data(mvi, i);
+	rc = mvs_eep_read_buf(regs, addr, &hdr, 2);
+	if (rc) {
+		msg = "nvram hdr read failed";
+		goto err_out;
+	}
+	rc = mvs_eep_read_buf(regs, addr + 2, buf, buflen);
+	if (rc) {
+		msg = "nvram read failed";
+		goto err_out;
+	}
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
-	s[2] = mvs_read_port_cfg_data(mvi, i);
+	if (hdr[0] != 0x5A) {
+		/* entry id */
+		msg = "invalid nvram entry id";
+		rc = -ENOENT;
+		goto err_out;
+	}
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
-	s[1] = mvs_read_port_cfg_data(mvi, i);
+	tmp = buf;
+	sum = ((u32)hdr[0]) + ((u32)hdr[1]);
+	for (i = 0; i < buflen; i++)
+		sum += ((u32)tmp[i]);
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
-	s[0] = mvs_read_port_cfg_data(mvi, i);
+	if (sum) {
+		msg = "nvram checksum failure";
+		rc = -EILSEQ;
+		goto err_out;
+	}
 
-	return (void *)s;
-}
+	return 0;
 
-static u32 mvs_is_sig_fis_received(u32 irq_status)
-{
-	return irq_status & PHYEV_SIG_FIS;
+err_out:
+	dev_printk(KERN_ERR, &mvi->pdev->dev, "%s", msg);
+	return rc;
+#else
+	/* FIXME , For SAS target mode */
+	memcpy(buf, "\x50\x05\x04\x30\x11\xab\x00\x00", 8);
+	return 0;
+#endif
 }
 
-static void mvs_update_wideport(struct mvs_info *mvi, int i)
+static void mvs_int_sata(struct mvs_info *mvi)
 {
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;
-	int j, no;
-
-	for_each_phy(port->wide_port_phymap, no, j, mvi->chip->n_phy)
-		if (no & 1) {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no,
-						port->wide_port_phymap);
-		} else {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no, 0);
-		}
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(INT_STAT_SRS);
+	mw32(INT_STAT_SRS, tmp & 0xFFFF);
 }
 
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
+static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
+				u32 slot_idx)
 {
-	u32 tmp;
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;;
-
-	tmp = mvs_read_phy_ctl(mvi, i);
-
-	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
-		if (!port)
-			phy->phy_attached = 1;
-		return tmp;
-	}
+	void __iomem *regs = mvi->regs;
+	struct domain_device *dev = task->dev;
+	struct asd_sas_port *sas_port = dev->port;
+	struct mvs_port *port = mvi->slot_info[slot_idx].port;
+	u32 reg_set, phy_mask;
 
-	if (port) {
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			port->wide_port_phymap &= ~(1U << i);
-			if (!port->wide_port_phymap)
-				port->port_attached = 0;
-			mvs_update_wideport(mvi, i);
-		} else if (phy->phy_type & PORT_TYPE_SATA)
-			port->port_attached = 0;
-		mvs_free_reg_set(mvi, phy->port);
-		phy->port = NULL;
-		phy->phy_attached = 0;
-		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	if (!sas_protocol_ata(task->task_proto)) {
+		reg_set = 0;
+		phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
+				sas_port->phy_mask;
+	} else {
+		reg_set = port->taskfileset;
+		phy_mask = sas_port->phy_mask;
 	}
-	return 0;
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | slot_idx |
+					(TXQ_CMD_SLOT_RESET << TXQ_CMD_SHIFT) |
+					(phy_mask << TXQ_PHY_SHIFT) |
+					(reg_set << TXQ_SRS_SHIFT));
+
+	mw32(TX_PROD_IDX, mvi->tx_prod);
+	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
 }
 
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
-					int get_st)
+void mvs_int_full(struct mvs_info *mvi)
 {
-	struct mvs_phy *phy = &mvi->phy[i];
-	struct pci_dev *pdev = mvi->pdev;
-	u32 tmp;
-	u64 tmp64;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
-	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
+	void __iomem *regs = mvi->regs;
+	u32 tmp, stat;
+	int i;
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-	phy->dev_sas_addr = (u64) mvs_read_port_cfg_data(mvi, i) << 32;
+	stat = mr32(INT_STAT);
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-	phy->dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+	mvs_int_rx(mvi, false);
 
-	if (get_st) {
-		phy->irq_status = mvs_read_port_irq_stat(mvi, i);
-		phy->phy_status = mvs_is_phy_ready(mvi, i);
+	for (i = 0; i < MVS_MAX_PORTS; i++) {
+		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
+		if (tmp)
+			mvs_int_port(mvi, i, tmp);
 	}
 
-	if (phy->phy_status) {
-		u32 phy_st;
-		struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-		mvs_write_port_cfg_addr(mvi, i, PHYR_PHY_STAT);
-		phy_st = mvs_read_port_cfg_data(mvi, i);
-
-		sas_phy->linkrate =
-			(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-				PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
-		phy->minimum_linkrate =
-			(phy->phy_status &
-				PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
-		phy->maximum_linkrate =
-			(phy->phy_status &
-				PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
+	if (stat & CINT_SRS)
+		mvs_int_sata(mvi);
 
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			/* Updated attached_sas_addr */
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
-			phy->att_dev_sas_addr =
-				(u64) mvs_read_port_cfg_data(mvi, i) << 32;
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
-			phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
-			phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
-			phy->identify.device_type =
-			    phy->att_dev_info & PORT_DEV_TYPE_MASK;
+	mw32(INT_STAT, stat);
+}
 
-			if (phy->identify.device_type == SAS_END_DEV)
-				phy->identify.target_port_protocols =
-							SAS_PROTOCOL_SSP;
-			else if (phy->identify.device_type != NO_DEVICE)
-				phy->identify.target_port_protocols =
-							SAS_PROTOCOL_SMP;
-			if (phy_st & PHY_OOB_DTCTD)
-				sas_phy->oob_mode = SAS_OOB_MODE;
-			phy->frame_rcvd_size =
-			    sizeof(struct sas_identify_frame);
-		} else if (phy->phy_type & PORT_TYPE_SATA) {
-			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
-			if (mvs_is_sig_fis_received(phy->irq_status)) {
-				phy->att_dev_sas_addr = i;	/* temp */
-				if (phy_st & PHY_OOB_DTCTD)
-					sas_phy->oob_mode = SATA_OOB_MODE;
-				phy->frame_rcvd_size =
-				    sizeof(struct dev_to_host_fis);
-				mvs_get_d2h_reg(mvi, i,
-						(void *)sas_phy->frame_rcvd);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"No sig fis\n");
-				phy->phy_type &= ~(PORT_TYPE_SATA);
-				goto out_done;
-			}
-		}
-		tmp64 = cpu_to_be64(phy->att_dev_sas_addr);
-		memcpy(sas_phy->attached_sas_addr, &tmp64, SAS_ADDR_SIZE);
+#ifndef MVS_DISABLE_MSI
+static irqreturn_t mvs_msi_interrupt(int irq, void *opaque)
+{
+	struct mvs_info *mvi = opaque;
 
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"phy[%d] Get Attached Address 0x%llX ,"
-			" SAS Address 0x%llX\n",
-			i,
-			(unsigned long long)phy->att_dev_sas_addr,
-			(unsigned long long)phy->dev_sas_addr);
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"Rate = %x , type = %d\n",
-			sas_phy->linkrate, phy->phy_type);
+#ifndef MVS_USE_TASKLET
+	spin_lock(&mvi->lock);
 
-		/* workaround for HW phy decoding error on 1.5g disk drive */
-		mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
-		tmp = mvs_read_port_vsr_data(mvi, i);
-		if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-		     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
-			SAS_LINK_RATE_1_5_GBPS)
-			tmp &= ~PHY_MODE6_LATECLK;
-		else
-			tmp |= PHY_MODE6_LATECLK;
-		mvs_write_port_vsr_data(mvi, i, tmp);
+	mvs_int_rx(mvi, true);
 
-	}
-out_done:
-	if (get_st)
-		mvs_write_port_irq_stat(mvi, i, phy->irq_status);
+	spin_unlock(&mvi->lock);
+#else
+	tasklet_schedule(&mvi->tasklet);
+#endif
+	return IRQ_HANDLED;
 }
+#endif
 
-static void mvs_port_formed(struct asd_sas_phy *sas_phy)
+int mvs_task_abort(struct sas_task *task)
 {
-	struct sas_ha_struct *sas_ha = sas_phy->ha;
-	struct mvs_info *mvi = sas_ha->lldd_ha;
-	struct asd_sas_port *sas_port = sas_phy->port;
-	struct mvs_phy *phy = sas_phy->lldd_phy;
-	struct mvs_port *port = &mvi->port[sas_port->id];
+	int rc;
 	unsigned long flags;
+	struct mvs_info *mvi = task->dev->port->ha->lldd_ha;
+	struct pci_dev *pdev = mvi->pdev;
+	int tag;
 
-	spin_lock_irqsave(&mvi->lock, flags);
-	port->port_attached = 1;
-	phy->port = port;
-	port->taskfileset = MVS_ID_NOT_MAPPED;
-	if (phy->phy_type & PORT_TYPE_SAS) {
-		port->wide_port_phymap = sas_port->phy_mask;
-		mvs_update_wideport(mvi, sas_phy->id);
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		rc = TMF_RESP_FUNC_COMPLETE;
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		goto out_done;
 	}
-	spin_unlock_irqrestore(&mvi->lock, flags);
-}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
 
-static int mvs_I_T_nexus_reset(struct domain_device *dev)
-{
-	return TMF_RESP_FUNC_FAILED;
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SMP:
+		dev_printk(KERN_DEBUG, &pdev->dev, "SMP Abort! \n");
+		break;
+	case SAS_PROTOCOL_SSP:
+		dev_printk(KERN_DEBUG, &pdev->dev, "SSP Abort! \n");
+		break;
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:{
+		dev_printk(KERN_DEBUG, &pdev->dev, "STP Abort! \n");
+#if _MV_DUMP
+		dev_printk(KERN_DEBUG, &pdev->dev, "Dump D2H FIS: \n");
+		mvs_hexdump(sizeof(struct host_to_dev_fis),
+				(void *)&task->ata_task.fis, 0);
+		dev_printk(KERN_DEBUG, &pdev->dev, "Dump ATAPI Cmd : \n");
+		mvs_hexdump(16, task->ata_task.atapi_packet, 0);
+#endif
+		spin_lock_irqsave(&task->task_state_lock, flags);
+		if (task->task_state_flags & SAS_TASK_NEED_DEV_RESET) {
+			/* TODO */
+			;
+		}
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		break;
+	}
+	default:
+		break;
+	}
+
+	if (mvs_find_tag(mvi, task, &tag)) {
+		spin_lock_irqsave(&mvi->lock, flags);
+		mvs_slot_task_free(mvi, task, &mvi->slot_info[tag], tag);
+		spin_unlock_irqrestore(&mvi->lock, flags);
+	}
+	if (!mvs_task_exec(task, 1, GFP_ATOMIC))
+		rc = TMF_RESP_FUNC_COMPLETE;
+	else
+		rc = TMF_RESP_FUNC_FAILED;
+out_done:
+	return rc;
 }
 
-static int __devinit mvs_hw_init(struct mvs_info *mvi)
+int __devinit mvs_hw_init(struct mvs_info *mvi)
 {
 	void __iomem *regs = mvi->regs;
 	int i;
@@ -3041,7 +1804,7 @@ static int __devinit mvs_hw_init(struct mvs_info *mvi)
 	return 0;
 }
 
-static void __devinit mvs_print_info(struct mvs_info *mvi)
+void __devinit mvs_print_info(struct mvs_info *mvi)
 {
 	struct pci_dev *pdev = mvi->pdev;
 	static int printed_version;
@@ -3053,170 +1816,3 @@ static void __devinit mvs_print_info(struct mvs_info *mvi)
 		   mvi->chip->n_phy, SAS_ADDR(mvi->sas_addr));
 }
 
-static int __devinit mvs_pci_init(struct pci_dev *pdev,
-				  const struct pci_device_id *ent)
-{
-	int rc;
-	struct mvs_info *mvi;
-	irq_handler_t irq_handler = mvs_interrupt;
-
-	rc = pci_enable_device(pdev);
-	if (rc)
-		return rc;
-
-	pci_set_master(pdev);
-
-	rc = pci_request_regions(pdev, DRV_NAME);
-	if (rc)
-		goto err_out_disable;
-
-	rc = pci_go_64(pdev);
-	if (rc)
-		goto err_out_regions;
-
-	mvi = mvs_alloc(pdev, ent);
-	if (!mvi) {
-		rc = -ENOMEM;
-		goto err_out_regions;
-	}
-
-	rc = mvs_hw_init(mvi);
-	if (rc)
-		goto err_out_mvi;
-
-#ifndef MVS_DISABLE_MSI
-	if (!pci_enable_msi(pdev)) {
-		u32 tmp;
-		void __iomem *regs = mvi->regs;
-		mvi->flags |= MVF_MSI;
-		irq_handler = mvs_msi_interrupt;
-		tmp = mr32(PCS);
-		mw32(PCS, tmp | PCS_SELF_CLEAR);
-	}
-#endif
-
-	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, mvi);
-	if (rc)
-		goto err_out_msi;
-
-	rc = scsi_add_host(mvi->shost, &pdev->dev);
-	if (rc)
-		goto err_out_irq;
-
-	rc = sas_register_ha(&mvi->sas);
-	if (rc)
-		goto err_out_shost;
-
-	pci_set_drvdata(pdev, mvi);
-
-	mvs_print_info(mvi);
-
-	mvs_hba_interrupt_enable(mvi);
-
-	scsi_scan_host(mvi->shost);
-
-	return 0;
-
-err_out_shost:
-	scsi_remove_host(mvi->shost);
-err_out_irq:
-	free_irq(pdev->irq, mvi);
-err_out_msi:
-	if (mvi->flags |= MVF_MSI)
-		pci_disable_msi(pdev);
-err_out_mvi:
-	mvs_free(mvi);
-err_out_regions:
-	pci_release_regions(pdev);
-err_out_disable:
-	pci_disable_device(pdev);
-	return rc;
-}
-
-static void __devexit mvs_pci_remove(struct pci_dev *pdev)
-{
-	struct mvs_info *mvi = pci_get_drvdata(pdev);
-
-	pci_set_drvdata(pdev, NULL);
-
-	if (mvi) {
-		sas_unregister_ha(&mvi->sas);
-		mvs_hba_interrupt_disable(mvi);
-		sas_remove_host(mvi->shost);
-		scsi_remove_host(mvi->shost);
-
-		free_irq(pdev->irq, mvi);
-		if (mvi->flags & MVF_MSI)
-			pci_disable_msi(pdev);
-		mvs_free(mvi);
-		pci_release_regions(pdev);
-	}
-	pci_disable_device(pdev);
-}
-
-static struct sas_domain_function_template mvs_transport_ops = {
-	.lldd_execute_task	= mvs_task_exec,
-	.lldd_control_phy	= mvs_phy_control,
-	.lldd_abort_task	= mvs_task_abort,
-	.lldd_port_formed	= mvs_port_formed,
-	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
-};
-
-static struct pci_device_id __devinitdata mvs_pci_table[] = {
-	{ PCI_VDEVICE(MARVELL, 0x6320), chip_6320 },
-	{ PCI_VDEVICE(MARVELL, 0x6340), chip_6440 },
-	{
-		.vendor 	= PCI_VENDOR_ID_MARVELL,
-		.device 	= 0x6440,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= 0x6480,
-		.class		= 0,
-		.class_mask	= 0,
-		.driver_data	= chip_6480,
-	},
-	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
-	{ PCI_VDEVICE(MARVELL, 0x6480), chip_6480 },
-
-	{ }	/* terminate list */
-};
-
-static struct pci_driver mvs_pci_driver = {
-	.name		= DRV_NAME,
-	.id_table	= mvs_pci_table,
-	.probe		= mvs_pci_init,
-	.remove		= __devexit_p(mvs_pci_remove),
-};
-
-static int __init mvs_init(void)
-{
-	int rc;
-
-	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
-	if (!mvs_stt)
-		return -ENOMEM;
-
-	rc = pci_register_driver(&mvs_pci_driver);
-	if (rc)
-		goto err_out;
-
-	return 0;
-
-err_out:
-	sas_release_transport(mvs_stt);
-	return rc;
-}
-
-static void __exit mvs_exit(void)
-{
-	pci_unregister_driver(&mvs_pci_driver);
-	sas_release_transport(mvs_stt);
-}
-
-module_init(mvs_init);
-module_exit(mvs_exit);
-
-MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
-MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
-MODULE_VERSION(DRV_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(pci, mvs_pci_table);
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
new file mode 100644
index 0000000..7a954a9
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -0,0 +1,205 @@
+/*
+	mv_sas.h - Marvell 88SE6440 SAS/SATA support
+
+	Copyright 2007 Red Hat, Inc.
+	Copyright 2008 Marvell. <kewei@marvell.com>
+
+	This program is free software; you can redistribute it and/or
+	modify it under the terms of the GNU General Public License as
+	published by the Free Software Foundation; either version 2,
+	or (at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty
+	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+	See the GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public
+	License along with this program; see the file COPYING.	If not,
+	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
+	MA 02139, USA.
+
+ */
+
+#ifndef _MV_SAS_H_
+#define _MV_SAS_H_
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <scsi/libsas.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/sas_ata.h>
+#include <linux/version.h>
+#include "mv_defs.h"
+
+#define DRV_NAME	"mvsas"
+#define DRV_VERSION	"0.5.2"
+#define _MV_DUMP	0
+#define MVS_DISABLE_NVRAM
+#define MVS_DISABLE_MSI
+
+#define MVS_ID_NOT_MAPPED	0x7f
+#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
+
+#define for_each_phy(__lseq_mask, __mc, __lseq, __rest)			\
+	for ((__mc) = (__lseq_mask), (__lseq) = 0;			\
+					(__mc) != 0 && __rest;		\
+					(++__lseq), (__mc) >>= 1)
+
+struct mvs_chip_info {
+	u32		n_phy;
+	u32		srs_sz;
+	u32		slot_width;
+};
+
+struct mvs_err_info {
+	__le32			flags;
+	__le32			flags2;
+};
+
+struct mvs_cmd_hdr {
+	__le32			flags;	/* PRD tbl len; SAS, SATA ctl */
+	__le32			lens;	/* cmd, max resp frame len */
+	__le32			tags;	/* targ port xfer tag; tag */
+	__le32			data_len;	/* data xfer len */
+	__le64			cmd_tbl;	/* command table address */
+	__le64			open_frame;	/* open addr frame address */
+	__le64			status_buf;	/* status buffer address */
+	__le64			prd_tbl;		/* PRD tbl address */
+	__le32			reserved[4];
+};
+
+struct mvs_port {
+	struct asd_sas_port	sas_port;
+	u8			port_attached;
+	u8			taskfileset;
+	u8			wide_port_phymap;
+	struct list_head	list;
+};
+
+struct mvs_phy {
+	struct mvs_port		*port;
+	struct asd_sas_phy	sas_phy;
+	struct sas_identify	identify;
+	struct scsi_device	*sdev;
+	u64		dev_sas_addr;
+	u64		att_dev_sas_addr;
+	u32		att_dev_info;
+	u32		dev_info;
+	u32		phy_type;
+	u32		phy_status;
+	u32		irq_status;
+	u32		frame_rcvd_size;
+	u8		frame_rcvd[32];
+	u8		phy_attached;
+	enum sas_linkrate	minimum_linkrate;
+	enum sas_linkrate	maximum_linkrate;
+};
+
+struct mvs_slot_info {
+	struct list_head list;
+	struct sas_task *task;
+	u32 n_elem;
+	u32 tx;
+
+	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
+	 * and PRD table
+	 */
+	void *buf;
+	dma_addr_t buf_dma;
+#if _MV_DUMP
+	u32 cmd_size;
+#endif
+
+	void *response;
+	struct mvs_port *port;
+};
+
+struct mvs_info {
+	unsigned long flags;
+
+	/* host-wide lock */
+	spinlock_t lock;
+
+	/* our device */
+	struct pci_dev *pdev;
+
+	/* enhanced mode registers */
+	void __iomem *regs;
+
+	/* peripheral registers */
+	void __iomem *peri_regs;
+
+	u8 sas_addr[SAS_ADDR_SIZE];
+
+	/* SCSI/SAS glue */
+	struct sas_ha_struct sas;
+	struct Scsi_Host *shost;
+
+	/* TX (delivery) DMA ring */
+	__le32 *tx;
+	dma_addr_t tx_dma;
+
+	/* cached next-producer idx */
+	u32 tx_prod;
+
+	/* RX (completion) DMA ring */
+	__le32 *rx;
+	dma_addr_t rx_dma;
+
+	/* RX consumer idx */
+	u32 rx_cons;
+
+	/* RX'd FIS area */
+	__le32 *rx_fis;
+	dma_addr_t rx_fis_dma;
+
+	/* DMA command header slots */
+	struct mvs_cmd_hdr *slot;
+	dma_addr_t slot_dma;
+
+	const struct mvs_chip_info *chip;
+
+	u8 tags[MVS_SLOTS];
+	struct mvs_slot_info slot_info[MVS_SLOTS];
+				/* further per-slot information */
+	struct mvs_phy phy[MVS_MAX_PHYS];
+	struct mvs_port port[MVS_MAX_PHYS];
+#ifdef MVS_USE_TASKLET
+	struct tasklet_struct tasklet;
+#endif
+};
+
+int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+			void *funcdata);
+int mvs_slave_configure(struct scsi_device *sdev);
+void mvs_scan_start(struct Scsi_Host *shost);
+int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time);
+int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags);
+int mvs_task_abort(struct sas_task *task);
+void mvs_port_formed(struct asd_sas_phy *sas_phy);
+int mvs_I_T_nexus_reset(struct domain_device *dev);
+void mvs_int_full(struct mvs_info *mvi);
+void mvs_tag_init(struct mvs_info *mvi);
+int mvs_nvram_read(struct mvs_info *mvi, u32 addr, void *buf, u32 buflen);
+int __devinit mvs_hw_init(struct mvs_info *mvi);
+void __devinit mvs_print_info(struct mvs_info *mvi);
+void mvs_hba_interrupt_enable(struct mvs_info *mvi);
+void mvs_hba_interrupt_disable(struct mvs_info *mvi);
+void mvs_detect_porttype(struct mvs_info *mvi, int i);
+u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port);
+void mvs_enable_xmt(struct mvs_info *mvi, int PhyId);
+void __devinit mvs_phy_hacks(struct mvs_info *mvi);
+void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port);
+
+#endif
-- 
cgit v0.10.2


From 20b09c2992fefbe78f8cede7b404fb143a413c52 Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Fri, 8 May 2009 17:46:40 -0400
Subject: [SCSI] mvsas: add support for 94xx; layout change; bug fixes

This version contains following main changes
  - Switch to new layout to support more types of ASIC.
  - SSP TMF supported and related Error Handing enhanced.
  - Support flash feature with delay 2*HZ when PHY changed.
  - Support Marvell 94xx series ASIC for 6G SAS/SATA, which has 2
88SE64xx chips but any different register description.
  - Support SPI flash for HBA-related configuration info.
  - Other patch enhanced from kernel side such as increasing PHY type

[jejb: fold back in DMA_BIT_MASK changes]
Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/Kconfig b/drivers/scsi/mvsas/Kconfig
index f83f368..6de7af2 100644
--- a/drivers/scsi/mvsas/Kconfig
+++ b/drivers/scsi/mvsas/Kconfig
@@ -1,35 +1,42 @@
 #
-# Kernel configuration file for 88SE64XX SAS/SATA driver.
+# Kernel configuration file for 88SE64XX/88SE94XX SAS/SATA driver.
 #
 # Copyright 2007 Red Hat, Inc.
 # Copyright 2008 Marvell. <kewei@marvell.com>
 #
 # This file is licensed under GPLv2.
 #
-# This file is part of the 88SE64XX driver.
+# This file is part of the 88SE64XX/88SE94XX driver.
 #
-# The 88SE64XX driver is free software; you can redistribute
+# The 88SE64XX/88SE94XX driver is free software; you can redistribute
 # it and/or modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; version 2 of the
 # License.
 #
-# The 88SE64XX driver is distributed in the hope that it will be
+# The 88SE64XX/88SE94XX driver is distributed in the hope that it will be
 # useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with 88SE64XX Driver; if not, write to the Free Software
+# along with 88SE64XX/88SE94XX Driver; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 #
 
 config SCSI_MVSAS
-	tristate "Marvell 88SE64XX SAS/SATA support"
+	tristate "Marvell 88SE64XX/88SE94XX SAS/SATA support"
 	depends on PCI
 	select SCSI_SAS_LIBSAS
 	select FW_LOADER
 	help
-		This driver supports Marvell's SAS/SATA 3Gb/s PCI-E 88SE64XX
-		chip based host adapters.
+		This driver supports Marvell's SAS/SATA 3Gb/s PCI-E 88SE64XX and 6Gb/s
+		PCI-E 88SE94XX chip based host adapters.
 
+config SCSI_MVSAS_DEBUG
+	bool "Compile in debug mode"
+	default y
+	depends on SCSI_MVSAS
+	help
+		Compiles the 88SE64XX/88SE94XX driver in debug mode.  In debug mode,
+		the driver prints some messages to the console.
diff --git a/drivers/scsi/mvsas/Makefile b/drivers/scsi/mvsas/Makefile
index a1ca681..52ac426 100644
--- a/drivers/scsi/mvsas/Makefile
+++ b/drivers/scsi/mvsas/Makefile
@@ -1,5 +1,5 @@
 #
-# Makefile for Marvell 88SE64xx SAS/SATA driver.
+# Makefile for Marvell 88SE64xx/88SE84xx SAS/SATA driver.
 #
 # Copyright 2007 Red Hat, Inc.
 # Copyright 2008 Marvell. <kewei@marvell.com>
@@ -21,7 +21,12 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 # USA
 
+ifeq ($(CONFIG_SCSI_MVSAS_DEBUG),y)
+	EXTRA_CFLAGS += -DMV_DEBUG
+endif
+
 obj-$(CONFIG_SCSI_MVSAS) += mvsas.o
 mvsas-y +=  mv_init.o  \
            mv_sas.o   \
-           mv_64xx.o
+           mv_64xx.o  \
+           mv_94xx.o
diff --git a/drivers/scsi/mvsas/mv_64xx.c b/drivers/scsi/mvsas/mv_64xx.c
index 697806c8..10a5077 100644
--- a/drivers/scsi/mvsas/mv_64xx.c
+++ b/drivers/scsi/mvsas/mv_64xx.c
@@ -1,184 +1,793 @@
 /*
-	mv_64xx.c - Marvell 88SE6440 SAS/SATA support
-
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
- */
+ * Marvell 88SE64xx hardware specific
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
 
 #include "mv_sas.h"
 #include "mv_64xx.h"
 #include "mv_chips.h"
 
-void mvs_detect_porttype(struct mvs_info *mvi, int i)
+static void mvs_64xx_detect_porttype(struct mvs_info *mvi, int i)
 {
 	void __iomem *regs = mvi->regs;
 	u32 reg;
 	struct mvs_phy *phy = &mvi->phy[i];
 
 	/* TODO check & save device type */
-	reg = mr32(GBL_PORT_TYPE);
-
+	reg = mr32(MVS_GBL_PORT_TYPE);
+	phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
 	if (reg & MODE_SAS_SATA & (1 << i))
 		phy->phy_type |= PORT_TYPE_SAS;
 	else
 		phy->phy_type |= PORT_TYPE_SATA;
 }
 
-void mvs_enable_xmt(struct mvs_info *mvi, int PhyId)
+static void __devinit mvs_64xx_enable_xmt(struct mvs_info *mvi, int phy_id)
 {
 	void __iomem *regs = mvi->regs;
 	u32 tmp;
 
-	tmp = mr32(PCS);
+	tmp = mr32(MVS_PCS);
 	if (mvi->chip->n_phy <= 4)
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT);
+		tmp |= 1 << (phy_id + PCS_EN_PORT_XMT_SHIFT);
+	else
+		tmp |= 1 << (phy_id + PCS_EN_PORT_XMT_SHIFT2);
+	mw32(MVS_PCS, tmp);
+}
+
+static void __devinit mvs_64xx_phy_hacks(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+
+	mvs_phy_hacks(mvi);
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		/* TEST - for phy decoding error, adjust voltage levels */
+		mw32(MVS_P0_VSR_ADDR + 0, 0x8);
+		mw32(MVS_P0_VSR_DATA + 0, 0x2F0);
+
+		mw32(MVS_P0_VSR_ADDR + 8, 0x8);
+		mw32(MVS_P0_VSR_DATA + 8, 0x2F0);
+
+		mw32(MVS_P0_VSR_ADDR + 16, 0x8);
+		mw32(MVS_P0_VSR_DATA + 16, 0x2F0);
+
+		mw32(MVS_P0_VSR_ADDR + 24, 0x8);
+		mw32(MVS_P0_VSR_DATA + 24, 0x2F0);
+	} else {
+		int i;
+		/* disable auto port detection */
+		mw32(MVS_GBL_PORT_TYPE, 0);
+		for (i = 0; i < mvi->chip->n_phy; i++) {
+			mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE7);
+			mvs_write_port_vsr_data(mvi, i, 0x90000000);
+			mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE9);
+			mvs_write_port_vsr_data(mvi, i, 0x50f2);
+			mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE11);
+			mvs_write_port_vsr_data(mvi, i, 0x0e);
+		}
+	}
+}
+
+static void mvs_64xx_stp_reset(struct mvs_info *mvi, u32 phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 reg, tmp;
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		if (phy_id < 4)
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &reg);
+		else
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &reg);
+
+	} else
+		reg = mr32(MVS_PHY_CTL);
+
+	tmp = reg;
+	if (phy_id < 4)
+		tmp |= (1U << phy_id) << PCTL_LINK_OFFS;
 	else
-		tmp |= 1 << (PhyId + PCS_EN_PORT_XMT_SHIFT2);
-	mw32(PCS, tmp);
+		tmp |= (1U << (phy_id - 4)) << PCTL_LINK_OFFS;
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		if (phy_id < 4) {
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+			mdelay(10);
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, reg);
+		} else {
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+			mdelay(10);
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, reg);
+		}
+	} else {
+		mw32(MVS_PHY_CTL, tmp);
+		mdelay(10);
+		mw32(MVS_PHY_CTL, reg);
+	}
+}
+
+static void mvs_64xx_phy_reset(struct mvs_info *mvi, u32 phy_id, int hard)
+{
+	u32 tmp;
+	tmp = mvs_read_port_irq_stat(mvi, phy_id);
+	tmp &= ~PHYEV_RDY_CH;
+	mvs_write_port_irq_stat(mvi, phy_id, tmp);
+	tmp = mvs_read_phy_ctl(mvi, phy_id);
+	if (hard)
+		tmp |= PHY_RST_HARD;
+	else
+		tmp |= PHY_RST;
+	mvs_write_phy_ctl(mvi, phy_id, tmp);
+	if (hard) {
+		do {
+			tmp = mvs_read_phy_ctl(mvi, phy_id);
+		} while (tmp & PHY_RST_HARD);
+	}
+}
+
+static int __devinit mvs_64xx_chip_reset(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	int i;
+
+	/* make sure interrupts are masked immediately (paranoia) */
+	mw32(MVS_GBL_CTL, 0);
+	tmp = mr32(MVS_GBL_CTL);
+
+	/* Reset Controller */
+	if (!(tmp & HBA_RST)) {
+		if (mvi->flags & MVF_PHY_PWR_FIX) {
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
+			tmp &= ~PCTL_PWR_OFF;
+			tmp |= PCTL_PHY_DSBL;
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+
+			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
+			tmp &= ~PCTL_PWR_OFF;
+			tmp |= PCTL_PHY_DSBL;
+			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+		}
+	}
+
+	/* make sure interrupts are masked immediately (paranoia) */
+	mw32(MVS_GBL_CTL, 0);
+	tmp = mr32(MVS_GBL_CTL);
+
+	/* Reset Controller */
+	if (!(tmp & HBA_RST)) {
+		/* global reset, incl. COMRESET/H_RESET_N (self-clearing) */
+		mw32_f(MVS_GBL_CTL, HBA_RST);
+	}
+
+	/* wait for reset to finish; timeout is just a guess */
+	i = 1000;
+	while (i-- > 0) {
+		msleep(10);
+
+		if (!(mr32(MVS_GBL_CTL) & HBA_RST))
+			break;
+	}
+	if (mr32(MVS_GBL_CTL) & HBA_RST) {
+		dev_printk(KERN_ERR, mvi->dev, "HBA reset failed\n");
+		return -EBUSY;
+	}
+	return 0;
 }
 
-void __devinit mvs_phy_hacks(struct mvs_info *mvi)
+static void mvs_64xx_phy_disable(struct mvs_info *mvi, u32 phy_id)
 {
 	void __iomem *regs = mvi->regs;
 	u32 tmp;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		u32 offs;
+		if (phy_id < 4)
+			offs = PCR_PHY_CTL;
+		else {
+			offs = PCR_PHY_CTL2;
+			phy_id -= 4;
+		}
+		pci_read_config_dword(mvi->pdev, offs, &tmp);
+		tmp |= 1U << (PCTL_PHY_DSBL_OFFS + phy_id);
+		pci_write_config_dword(mvi->pdev, offs, tmp);
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp |= 1U << (PCTL_PHY_DSBL_OFFS + phy_id);
+		mw32(MVS_PHY_CTL, tmp);
+	}
+}
+
+static void mvs_64xx_phy_enable(struct mvs_info *mvi, u32 phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		u32 offs;
+		if (phy_id < 4)
+			offs = PCR_PHY_CTL;
+		else {
+			offs = PCR_PHY_CTL2;
+			phy_id -= 4;
+		}
+		pci_read_config_dword(mvi->pdev, offs, &tmp);
+		tmp &= ~(1U << (PCTL_PHY_DSBL_OFFS + phy_id));
+		pci_write_config_dword(mvi->pdev, offs, tmp);
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~(1U << (PCTL_PHY_DSBL_OFFS + phy_id));
+		mw32(MVS_PHY_CTL, tmp);
+	}
+}
 
-	/* workaround for SATA R-ERR, to ignore phy glitch */
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= ~(1 << 9);
-	tmp |= (1 << 10);
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+static int __devinit mvs_64xx_init(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	int i;
+	u32 tmp, cctl;
+
+	if (mvi->pdev && mvi->pdev->revision == 0)
+		mvi->flags |= MVF_PHY_PWR_FIX;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		mvs_show_pcie_usage(mvi);
+		tmp = mvs_64xx_chip_reset(mvi);
+		if (tmp)
+			return tmp;
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_PHY_DSBL;
+		mw32(MVS_PHY_CTL, tmp);
+	}
 
-	/* enable retry 127 times */
-	mvs_cw32(regs, CMD_SAS_CTL1, 0x7f7f);
+	/* Init Chip */
+	/* make sure RST is set; HBA_RST /should/ have done that for us */
+	cctl = mr32(MVS_CTL) & 0xFFFF;
+	if (cctl & CCTL_RST)
+		cctl &= ~CCTL_RST;
+	else
+		mw32_f(MVS_CTL, cctl | CCTL_RST);
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		/* write to device control _AND_ device status register */
+		pci_read_config_dword(mvi->pdev, PCR_DEV_CTRL, &tmp);
+		tmp &= ~PRD_REQ_MASK;
+		tmp |= PRD_REQ_SIZE;
+		pci_write_config_dword(mvi->pdev, PCR_DEV_CTRL, tmp);
+
+		pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp &= ~PCTL_PHY_DSBL;
+		pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+
+		pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
+		tmp &= PCTL_PWR_OFF;
+		tmp &= ~PCTL_PHY_DSBL;
+		pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+	} else {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_COM_ON;
+		tmp &= ~PCTL_PHY_DSBL;
+		tmp |= PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+		tmp &= ~PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+	}
 
-	/* extend open frame timeout to max */
-	tmp = mvs_cr32(regs, CMD_SAS_CTL0);
-	tmp &= ~0xffff;
-	tmp |= 0x3fff;
-	mvs_cw32(regs, CMD_SAS_CTL0, tmp);
+	/* reset control */
+	mw32(MVS_PCS, 0);		/* MVS_PCS */
+	/* init phys */
+	mvs_64xx_phy_hacks(mvi);
 
-	/* workaround for WDTIMEOUT , set to 550 ms */
-	mvs_cw32(regs, CMD_WD_TIMER, 0x86470);
+	/* enable auto port detection */
+	mw32(MVS_GBL_PORT_TYPE, MODE_AUTO_DET_EN);
 
-	/* not to halt for different port op during wideport link change */
-	mvs_cw32(regs, CMD_APP_ERR_CONFIG, 0xffefbf7d);
+	mw32(MVS_CMD_LIST_LO, mvi->slot_dma);
+	mw32(MVS_CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
 
-	/* workaround for Seagate disk not-found OOB sequence, recv
-	 * COMINIT before sending out COMWAKE */
-	tmp = mvs_cr32(regs, CMD_PHY_MODE_21);
-	tmp &= 0x0000ffff;
-	tmp |= 0x00fa0000;
-	mvs_cw32(regs, CMD_PHY_MODE_21, tmp);
+	mw32(MVS_RX_FIS_LO, mvi->rx_fis_dma);
+	mw32(MVS_RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
 
-	tmp = mvs_cr32(regs, CMD_PHY_TIMER);
-	tmp &= 0x1fffffff;
-	tmp |= (2U << 29);	/* 8 ms retry */
-	mvs_cw32(regs, CMD_PHY_TIMER, tmp);
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ);
+	mw32(MVS_TX_LO, mvi->tx_dma);
+	mw32(MVS_TX_HI, (mvi->tx_dma >> 16) >> 16);
 
-	/* TEST - for phy decoding error, adjust voltage levels */
-	mw32(P0_VSR_ADDR + 0, 0x8);
-	mw32(P0_VSR_DATA + 0, 0x2F0);
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ);
+	mw32(MVS_RX_LO, mvi->rx_dma);
+	mw32(MVS_RX_HI, (mvi->rx_dma >> 16) >> 16);
 
-	mw32(P0_VSR_ADDR + 8, 0x8);
-	mw32(P0_VSR_DATA + 8, 0x2F0);
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* set phy local SAS address */
+		/* should set little endian SAS address to 64xx chip */
+		mvs_set_sas_addr(mvi, i, PHYR_ADDR_LO, PHYR_ADDR_HI,
+				cpu_to_be64(mvi->phy[i].dev_sas_addr));
 
-	mw32(P0_VSR_ADDR + 16, 0x8);
-	mw32(P0_VSR_DATA + 16, 0x2F0);
+		mvs_64xx_enable_xmt(mvi, i);
 
-	mw32(P0_VSR_ADDR + 24, 0x8);
-	mw32(P0_VSR_DATA + 24, 0x2F0);
+		mvs_64xx_phy_reset(mvi, i, 1);
+		msleep(500);
+		mvs_64xx_detect_porttype(mvi, i);
+	}
+	if (mvi->flags & MVF_FLAG_SOC) {
+		/* set select registers */
+		writel(0x0E008000, regs + 0x000);
+		writel(0x59000008, regs + 0x004);
+		writel(0x20, regs + 0x008);
+		writel(0x20, regs + 0x00c);
+		writel(0x20, regs + 0x010);
+		writel(0x20, regs + 0x014);
+		writel(0x20, regs + 0x018);
+		writel(0x20, regs + 0x01c);
+	}
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* clear phy int status */
+		tmp = mvs_read_port_irq_stat(mvi, i);
+		tmp &= ~PHYEV_SIG_FIS;
+		mvs_write_port_irq_stat(mvi, i, tmp);
+
+		/* set phy int mask */
+		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH | PHYEV_UNASSOC_FIS |
+			PHYEV_ID_DONE | PHYEV_DCDR_ERR | PHYEV_CRC_ERR |
+			PHYEV_DEC_ERR;
+		mvs_write_port_irq_mask(mvi, i, tmp);
+
+		msleep(100);
+		mvs_update_phyinfo(mvi, i, 1);
+	}
 
+	/* FIXME: update wide port bitmaps */
+
+	/* little endian for open address and command table, etc. */
+	/*
+	 * it seems that ( from the spec ) turning on big-endian won't
+	 * do us any good on big-endian machines, need further confirmation
+	 */
+	cctl = mr32(MVS_CTL);
+	cctl |= CCTL_ENDIAN_CMD;
+	cctl |= CCTL_ENDIAN_DATA;
+	cctl &= ~CCTL_ENDIAN_OPEN;
+	cctl |= CCTL_ENDIAN_RSP;
+	mw32_f(MVS_CTL, cctl);
+
+	/* reset CMD queue */
+	tmp = mr32(MVS_PCS);
+	tmp |= PCS_CMD_RST;
+	mw32(MVS_PCS, tmp);
+	/* interrupt coalescing may cause missing HW interrput in some case,
+	 * and the max count is 0x1ff, while our max slot is 0x200,
+	 * it will make count 0.
+	 */
+	tmp = 0;
+	mw32(MVS_INT_COAL, tmp);
+
+	tmp = 0x100;
+	mw32(MVS_INT_COAL_TMOUT, tmp);
+
+	/* ladies and gentlemen, start your engines */
+	mw32(MVS_TX_CFG, 0);
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ | RX_EN);
+	/* enable CMD/CMPL_Q/RESP mode */
+	mw32(MVS_PCS, PCS_SATA_RETRY | PCS_FIS_RX_EN |
+		PCS_CMD_EN | PCS_CMD_STOP_ERR);
+
+	/* enable completion queue interrupt */
+	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS | CINT_CI_STOP |
+		CINT_DMA_PCIE);
+
+	mw32(MVS_INT_MASK, tmp);
+
+	/* Enable SRS interrupt */
+	mw32(MVS_INT_MASK_SRS_0, 0xFFFF);
+
+	return 0;
 }
 
-void mvs_hba_interrupt_enable(struct mvs_info *mvi)
+static int mvs_64xx_ioremap(struct mvs_info *mvi)
+{
+	if (!mvs_ioremap(mvi, 4, 2))
+		return 0;
+	return -1;
+}
+
+static void mvs_64xx_iounmap(struct mvs_info *mvi)
+{
+	mvs_iounmap(mvi->regs);
+	mvs_iounmap(mvi->regs_ex);
+}
+
+static void mvs_64xx_interrupt_enable(struct mvs_info *mvi)
 {
 	void __iomem *regs = mvi->regs;
 	u32 tmp;
 
-	tmp = mr32(GBL_CTL);
+	tmp = mr32(MVS_GBL_CTL);
+	mw32(MVS_GBL_CTL, tmp | INT_EN);
+}
 
-	mw32(GBL_CTL, tmp | INT_EN);
+static void mvs_64xx_interrupt_disable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+	mw32(MVS_GBL_CTL, tmp & ~INT_EN);
 }
 
-void mvs_hba_interrupt_disable(struct mvs_info *mvi)
+static u32 mvs_64xx_isr_status(struct mvs_info *mvi, int irq)
 {
 	void __iomem *regs = mvi->regs;
+	u32 stat;
+
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		stat = mr32(MVS_GBL_INT_STAT);
+
+		if (stat == 0 || stat == 0xffffffff)
+			return 0;
+	} else
+		stat = 1;
+	return stat;
+}
+
+static irqreturn_t mvs_64xx_isr(struct mvs_info *mvi, int irq, u32 stat)
+{
+	void __iomem *regs = mvi->regs;
+
+	/* clear CMD_CMPLT ASAP */
+	mw32_f(MVS_INT_STAT, CINT_DONE);
+#ifndef MVS_USE_TASKLET
+	spin_lock(&mvi->lock);
+#endif
+	mvs_int_full(mvi);
+#ifndef MVS_USE_TASKLET
+	spin_unlock(&mvi->lock);
+#endif
+	return IRQ_HANDLED;
+}
+
+static void mvs_64xx_command_active(struct mvs_info *mvi, u32 slot_idx)
+{
 	u32 tmp;
+	mvs_cw32(mvi, 0x40 + (slot_idx >> 3), 1 << (slot_idx % 32));
+	mvs_cw32(mvi, 0x00 + (slot_idx >> 3), 1 << (slot_idx % 32));
+	do {
+		tmp = mvs_cr32(mvi, 0x00 + (slot_idx >> 3));
+	} while (tmp & 1 << (slot_idx % 32));
+	do {
+		tmp = mvs_cr32(mvi, 0x40 + (slot_idx >> 3));
+	} while (tmp & 1 << (slot_idx % 32));
+}
 
-	tmp = mr32(GBL_CTL);
+static void mvs_64xx_issue_stop(struct mvs_info *mvi, enum mvs_port_type type,
+				u32 tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
 
-	mw32(GBL_CTL, tmp & ~INT_EN);
+	if (type == PORT_TYPE_SATA) {
+		tmp = mr32(MVS_INT_STAT_SRS_0) | (1U << tfs);
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+	}
+	mw32(MVS_INT_STAT, CINT_CI_STOP);
+	tmp = mr32(MVS_PCS) | 0xFF00;
+	mw32(MVS_PCS, tmp);
 }
 
-void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port)
+static void mvs_64xx_free_reg_set(struct mvs_info *mvi, u8 *tfs)
 {
 	void __iomem *regs = mvi->regs;
 	u32 tmp, offs;
-	u8 *tfs = &port->taskfileset;
 
 	if (*tfs == MVS_ID_NOT_MAPPED)
 		return;
 
 	offs = 1U << ((*tfs & 0x0f) + PCS_EN_SATA_REG_SHIFT);
 	if (*tfs < 16) {
-		tmp = mr32(PCS);
-		mw32(PCS, tmp & ~offs);
+		tmp = mr32(MVS_PCS);
+		mw32(MVS_PCS, tmp & ~offs);
 	} else {
-		tmp = mr32(CTL);
-		mw32(CTL, tmp & ~offs);
+		tmp = mr32(MVS_CTL);
+		mw32(MVS_CTL, tmp & ~offs);
 	}
 
-	tmp = mr32(INT_STAT_SRS) & (1U << *tfs);
+	tmp = mr32(MVS_INT_STAT_SRS_0) & (1U << *tfs);
 	if (tmp)
-		mw32(INT_STAT_SRS, tmp);
+		mw32(MVS_INT_STAT_SRS_0, tmp);
 
 	*tfs = MVS_ID_NOT_MAPPED;
+	return;
 }
 
-u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port)
+static u8 mvs_64xx_assign_reg_set(struct mvs_info *mvi, u8 *tfs)
 {
 	int i;
 	u32 tmp, offs;
 	void __iomem *regs = mvi->regs;
 
-	if (port->taskfileset != MVS_ID_NOT_MAPPED)
+	if (*tfs != MVS_ID_NOT_MAPPED)
 		return 0;
 
-	tmp = mr32(PCS);
+	tmp = mr32(MVS_PCS);
 
 	for (i = 0; i < mvi->chip->srs_sz; i++) {
 		if (i == 16)
-			tmp = mr32(CTL);
+			tmp = mr32(MVS_CTL);
 		offs = 1U << ((i & 0x0f) + PCS_EN_SATA_REG_SHIFT);
 		if (!(tmp & offs)) {
-			port->taskfileset = i;
+			*tfs = i;
 
 			if (i < 16)
-				mw32(PCS, tmp | offs);
+				mw32(MVS_PCS, tmp | offs);
 			else
-				mw32(CTL, tmp | offs);
-			tmp = mr32(INT_STAT_SRS) & (1U << i);
+				mw32(MVS_CTL, tmp | offs);
+			tmp = mr32(MVS_INT_STAT_SRS_0) & (1U << i);
 			if (tmp)
-				mw32(INT_STAT_SRS, tmp);
+				mw32(MVS_INT_STAT_SRS_0, tmp);
 			return 0;
 		}
 	}
 	return MVS_ID_NOT_MAPPED;
 }
 
+void mvs_64xx_make_prd(struct scatterlist *scatter, int nr, void *prd)
+{
+	int i;
+	struct scatterlist *sg;
+	struct mvs_prd *buf_prd = prd;
+	for_each_sg(scatter, sg, nr, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+}
+
+static int mvs_64xx_oob_done(struct mvs_info *mvi, int i)
+{
+	u32 phy_st;
+	mvs_write_port_cfg_addr(mvi, i,
+			PHYR_PHY_STAT);
+	phy_st = mvs_read_port_cfg_data(mvi, i);
+	if (phy_st & PHY_OOB_DTCTD)
+		return 1;
+	return 0;
+}
+
+static void mvs_64xx_fix_phy_info(struct mvs_info *mvi, int i,
+				struct sas_identify_frame *id)
+
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	sas_phy->linkrate =
+		(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+			PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
+
+	phy->minimum_linkrate =
+		(phy->phy_status &
+			PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
+	phy->maximum_linkrate =
+		(phy->phy_status &
+			PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
+	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
+	phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
+
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
+	phy->att_dev_sas_addr =
+	     (u64) mvs_read_port_cfg_data(mvi, i) << 32;
+	mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
+	phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+	phy->att_dev_sas_addr = SAS_ADDR(&phy->att_dev_sas_addr);
+}
+
+static void mvs_64xx_phy_work_around(struct mvs_info *mvi, int i)
+{
+	u32 tmp;
+	struct mvs_phy *phy = &mvi->phy[i];
+	/* workaround for HW phy decoding error on 1.5g disk drive */
+	mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
+	tmp = mvs_read_port_vsr_data(mvi, i);
+	if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+	     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
+		SAS_LINK_RATE_1_5_GBPS)
+		tmp &= ~PHY_MODE6_LATECLK;
+	else
+		tmp |= PHY_MODE6_LATECLK;
+	mvs_write_port_vsr_data(mvi, i, tmp);
+}
+
+void mvs_64xx_phy_set_link_rate(struct mvs_info *mvi, u32 phy_id,
+			struct sas_phy_linkrates *rates)
+{
+	u32 lrmin = 0, lrmax = 0;
+	u32 tmp;
+
+	tmp = mvs_read_phy_ctl(mvi, phy_id);
+	lrmin = (rates->minimum_linkrate << 8);
+	lrmax = (rates->maximum_linkrate << 12);
+
+	if (lrmin) {
+		tmp &= ~(0xf << 8);
+		tmp |= lrmin;
+	}
+	if (lrmax) {
+		tmp &= ~(0xf << 12);
+		tmp |= lrmax;
+	}
+	mvs_write_phy_ctl(mvi, phy_id, tmp);
+	mvs_64xx_phy_reset(mvi, phy_id, 1);
+}
+
+static void mvs_64xx_clear_active_cmds(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(MVS_PCS);
+	mw32(MVS_PCS, tmp & 0xFFFF);
+	mw32(MVS_PCS, tmp);
+	tmp = mr32(MVS_CTL);
+	mw32(MVS_CTL, tmp & 0xFFFF);
+	mw32(MVS_CTL, tmp);
+}
+
+
+u32 mvs_64xx_spi_read_data(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex;
+	return ior32(SPI_DATA_REG_64XX);
+}
+
+void mvs_64xx_spi_write_data(struct mvs_info *mvi, u32 data)
+{
+	void __iomem *regs = mvi->regs_ex;
+	 iow32(SPI_DATA_REG_64XX, data);
+}
+
+
+int mvs_64xx_spi_buildcmd(struct mvs_info *mvi,
+			u32      *dwCmd,
+			u8       cmd,
+			u8       read,
+			u8       length,
+			u32      addr
+			)
+{
+	u32  dwTmp;
+
+	dwTmp = ((u32)cmd << 24) | ((u32)length << 19);
+	if (read)
+		dwTmp |= 1U<<23;
+
+	if (addr != MV_MAX_U32) {
+		dwTmp |= 1U<<22;
+		dwTmp |= (addr & 0x0003FFFF);
+	}
+
+	*dwCmd = dwTmp;
+	return 0;
+}
+
+
+int mvs_64xx_spi_issuecmd(struct mvs_info *mvi, u32 cmd)
+{
+	void __iomem *regs = mvi->regs_ex;
+	int     retry;
+
+	for (retry = 0; retry < 1; retry++) {
+		iow32(SPI_CTRL_REG_64XX, SPI_CTRL_VENDOR_ENABLE);
+		iow32(SPI_CMD_REG_64XX, cmd);
+		iow32(SPI_CTRL_REG_64XX,
+			SPI_CTRL_VENDOR_ENABLE | SPI_CTRL_SPISTART);
+	}
+
+	return 0;
+}
+
+int mvs_64xx_spi_waitdataready(struct mvs_info *mvi, u32 timeout)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 i, dwTmp;
+
+	for (i = 0; i < timeout; i++) {
+		dwTmp = ior32(SPI_CTRL_REG_64XX);
+		if (!(dwTmp & SPI_CTRL_SPISTART))
+			return 0;
+		msleep(10);
+	}
+
+	return -1;
+}
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+void mvs_64xx_fix_dma(dma_addr_t buf_dma, int buf_len, int from, void *prd)
+{
+	int i;
+	struct mvs_prd *buf_prd = prd;
+	buf_prd	+= from;
+	for (i = 0; i < MAX_SG_ENTRY - from; i++) {
+		buf_prd->addr = cpu_to_le64(buf_dma);
+		buf_prd->len = cpu_to_le32(buf_len);
+		++buf_prd;
+	}
+}
+#endif
+
+const struct mvs_dispatch mvs_64xx_dispatch = {
+	"mv64xx",
+	mvs_64xx_init,
+	NULL,
+	mvs_64xx_ioremap,
+	mvs_64xx_iounmap,
+	mvs_64xx_isr,
+	mvs_64xx_isr_status,
+	mvs_64xx_interrupt_enable,
+	mvs_64xx_interrupt_disable,
+	mvs_read_phy_ctl,
+	mvs_write_phy_ctl,
+	mvs_read_port_cfg_data,
+	mvs_write_port_cfg_data,
+	mvs_write_port_cfg_addr,
+	mvs_read_port_vsr_data,
+	mvs_write_port_vsr_data,
+	mvs_write_port_vsr_addr,
+	mvs_read_port_irq_stat,
+	mvs_write_port_irq_stat,
+	mvs_read_port_irq_mask,
+	mvs_write_port_irq_mask,
+	mvs_get_sas_addr,
+	mvs_64xx_command_active,
+	mvs_64xx_issue_stop,
+	mvs_start_delivery,
+	mvs_rx_update,
+	mvs_int_full,
+	mvs_64xx_assign_reg_set,
+	mvs_64xx_free_reg_set,
+	mvs_get_prd_size,
+	mvs_get_prd_count,
+	mvs_64xx_make_prd,
+	mvs_64xx_detect_porttype,
+	mvs_64xx_oob_done,
+	mvs_64xx_fix_phy_info,
+	mvs_64xx_phy_work_around,
+	mvs_64xx_phy_set_link_rate,
+	mvs_hw_max_link_rate,
+	mvs_64xx_phy_disable,
+	mvs_64xx_phy_enable,
+	mvs_64xx_phy_reset,
+	mvs_64xx_stp_reset,
+	mvs_64xx_clear_active_cmds,
+	mvs_64xx_spi_read_data,
+	mvs_64xx_spi_write_data,
+	mvs_64xx_spi_buildcmd,
+	mvs_64xx_spi_issuecmd,
+	mvs_64xx_spi_waitdataready,
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	mvs_64xx_fix_dma,
+#endif
+};
+
diff --git a/drivers/scsi/mvsas/mv_64xx.h b/drivers/scsi/mvsas/mv_64xx.h
index c9f399e..42e947d 100644
--- a/drivers/scsi/mvsas/mv_64xx.h
+++ b/drivers/scsi/mvsas/mv_64xx.h
@@ -1,11 +1,43 @@
+/*
+ * Marvell 88SE64xx hardware specific head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
 #ifndef _MVS64XX_REG_H_
 #define _MVS64XX_REG_H_
 
+#include <linux/types.h>
+
+#define MAX_LINK_RATE		SAS_LINK_RATE_3_0_GBPS
+
 /* enhanced mode registers (BAR4) */
 enum hw_registers {
 	MVS_GBL_CTL		= 0x04,  /* global control */
 	MVS_GBL_INT_STAT	= 0x08,  /* global irq status */
 	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
+
+	MVS_PHY_CTL		= 0x40,  /* SOC PHY Control */
+	MVS_PORTS_IMP		= 0x9C,  /* SOC Port Implemented */
+
 	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
 
 	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
@@ -30,17 +62,19 @@ enum hw_registers {
 	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
 	MVS_INT_STAT		= 0x150, /* Central int status */
 	MVS_INT_MASK		= 0x154, /* Central int enable */
-	MVS_INT_STAT_SRS	= 0x158, /* SATA register set status */
-	MVS_INT_MASK_SRS	= 0x15C,
+	MVS_INT_STAT_SRS_0	= 0x158, /* SATA register set status */
+	MVS_INT_MASK_SRS_0	= 0x15C,
 
 					 /* ports 1-3 follow after this */
 	MVS_P0_INT_STAT		= 0x160, /* port0 interrupt status */
 	MVS_P0_INT_MASK		= 0x164, /* port0 interrupt mask */
-	MVS_P4_INT_STAT		= 0x200, /* Port 4 interrupt status */
-	MVS_P4_INT_MASK		= 0x204, /* Port 4 interrupt enable mask */
+					 /* ports 5-7 follow after this */
+	MVS_P4_INT_STAT		= 0x200, /* Port4 interrupt status */
+	MVS_P4_INT_MASK		= 0x204, /* Port4 interrupt enable mask */
 
 					 /* ports 1-3 follow after this */
 	MVS_P0_SER_CTLSTAT	= 0x180, /* port0 serial control/status */
+					 /* ports 5-7 follow after this */
 	MVS_P4_SER_CTLSTAT	= 0x220, /* port4 serial control/status */
 
 	MVS_CMD_ADDR		= 0x1B8, /* Command register port (addr) */
@@ -49,20 +83,23 @@ enum hw_registers {
 					 /* ports 1-3 follow after this */
 	MVS_P0_CFG_ADDR		= 0x1C0, /* port0 phy register address */
 	MVS_P0_CFG_DATA		= 0x1C4, /* port0 phy register data */
-	MVS_P4_CFG_ADDR		= 0x230, /* Port 4 config address */
-	MVS_P4_CFG_DATA		= 0x234, /* Port 4 config data */
+					 /* ports 5-7 follow after this */
+	MVS_P4_CFG_ADDR		= 0x230, /* Port4 config address */
+	MVS_P4_CFG_DATA		= 0x234, /* Port4 config data */
 
 					 /* ports 1-3 follow after this */
 	MVS_P0_VSR_ADDR		= 0x1E0, /* port0 VSR address */
 	MVS_P0_VSR_DATA		= 0x1E4, /* port0 VSR data */
-	MVS_P4_VSR_ADDR		= 0x250, /* port 4 VSR addr */
-	MVS_P4_VSR_DATA		= 0x254, /* port 4 VSR data */
+					 /* ports 5-7 follow after this */
+	MVS_P4_VSR_ADDR		= 0x250, /* port4 VSR addr */
+	MVS_P4_VSR_DATA		= 0x254, /* port4 VSR data */
 };
 
 enum pci_cfg_registers {
 	PCR_PHY_CTL		= 0x40,
 	PCR_PHY_CTL2		= 0x90,
 	PCR_DEV_CTRL		= 0xE8,
+	PCR_LINK_STAT		= 0xF2,
 };
 
 /*  SAS/SATA Vendor Specific Port Registers */
@@ -83,10 +120,32 @@ enum sas_sata_vsp_regs {
 	VSR_PHY_VS1		= 0x0D, /* Vednor Specific 1 */
 };
 
+enum chip_register_bits {
+	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
+	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
+			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
+};
+
+#define MAX_SG_ENTRY		64
+
 struct mvs_prd {
 	__le64			addr;		/* 64-bit buffer address */
 	__le32			reserved;
 	__le32			len;		/* 16-bit length */
 };
 
+#define SPI_CTRL_REG				0xc0
+#define SPI_CTRL_VENDOR_ENABLE		(1U<<29)
+#define SPI_CTRL_SPIRDY         		(1U<<22)
+#define SPI_CTRL_SPISTART			(1U<<20)
+
+#define SPI_CMD_REG		0xc4
+#define SPI_DATA_REG		0xc8
+
+#define SPI_CTRL_REG_64XX		0x10
+#define SPI_CMD_REG_64XX		0x14
+#define SPI_DATA_REG_64XX		0x18
+
 #endif
diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c
new file mode 100644
index 0000000..0940fae
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_94xx.c
@@ -0,0 +1,672 @@
+/*
+ * Marvell 88SE94xx hardware specific
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#include "mv_sas.h"
+#include "mv_94xx.h"
+#include "mv_chips.h"
+
+static void mvs_94xx_detect_porttype(struct mvs_info *mvi, int i)
+{
+	u32 reg;
+	struct mvs_phy *phy = &mvi->phy[i];
+	u32 phy_status;
+
+	mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE3);
+	reg = mvs_read_port_vsr_data(mvi, i);
+	phy_status = ((reg & 0x3f0000) >> 16) & 0xff;
+	phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
+	switch (phy_status) {
+	case 0x10:
+		phy->phy_type |= PORT_TYPE_SAS;
+		break;
+	case 0x1d:
+	default:
+		phy->phy_type |= PORT_TYPE_SATA;
+		break;
+	}
+}
+
+static void __devinit mvs_94xx_enable_xmt(struct mvs_info *mvi, int phy_id)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	tmp = mr32(MVS_PCS);
+	tmp |= 1 << (phy_id + PCS_EN_PORT_XMT_SHIFT2);
+	mw32(MVS_PCS, tmp);
+}
+
+static void mvs_94xx_phy_reset(struct mvs_info *mvi, u32 phy_id, int hard)
+{
+	u32 tmp;
+
+	tmp = mvs_read_port_irq_stat(mvi, phy_id);
+	tmp &= ~PHYEV_RDY_CH;
+	mvs_write_port_irq_stat(mvi, phy_id, tmp);
+	if (hard) {
+		tmp = mvs_read_phy_ctl(mvi, phy_id);
+		tmp |= PHY_RST_HARD;
+		mvs_write_phy_ctl(mvi, phy_id, tmp);
+		do {
+			tmp = mvs_read_phy_ctl(mvi, phy_id);
+		} while (tmp & PHY_RST_HARD);
+	} else {
+		mvs_write_port_vsr_addr(mvi, phy_id, VSR_PHY_STAT);
+		tmp = mvs_read_port_vsr_data(mvi, phy_id);
+		tmp |= PHY_RST;
+		mvs_write_port_vsr_data(mvi, phy_id, tmp);
+	}
+}
+
+static void mvs_94xx_phy_disable(struct mvs_info *mvi, u32 phy_id)
+{
+	u32 tmp;
+	mvs_write_port_vsr_addr(mvi, phy_id, VSR_PHY_MODE2);
+	tmp = mvs_read_port_vsr_data(mvi, phy_id);
+	mvs_write_port_vsr_data(mvi, phy_id, tmp | 0x00800000);
+}
+
+static void mvs_94xx_phy_enable(struct mvs_info *mvi, u32 phy_id)
+{
+	mvs_write_port_vsr_addr(mvi, phy_id, 0x1B4);
+	mvs_write_port_vsr_data(mvi, phy_id, 0x8300ffc1);
+	mvs_write_port_vsr_addr(mvi, phy_id, 0x104);
+	mvs_write_port_vsr_data(mvi, phy_id, 0x00018080);
+	mvs_write_port_vsr_addr(mvi, phy_id, VSR_PHY_MODE2);
+	mvs_write_port_vsr_data(mvi, phy_id, 0x00207fff);
+}
+
+static int __devinit mvs_94xx_init(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	int i;
+	u32 tmp, cctl;
+
+	mvs_show_pcie_usage(mvi);
+	if (mvi->flags & MVF_FLAG_SOC) {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_PHY_DSBL;
+		mw32(MVS_PHY_CTL, tmp);
+	}
+
+	/* Init Chip */
+	/* make sure RST is set; HBA_RST /should/ have done that for us */
+	cctl = mr32(MVS_CTL) & 0xFFFF;
+	if (cctl & CCTL_RST)
+		cctl &= ~CCTL_RST;
+	else
+		mw32_f(MVS_CTL, cctl | CCTL_RST);
+
+	if (mvi->flags & MVF_FLAG_SOC) {
+		tmp = mr32(MVS_PHY_CTL);
+		tmp &= ~PCTL_PWR_OFF;
+		tmp |= PCTL_COM_ON;
+		tmp &= ~PCTL_PHY_DSBL;
+		tmp |= PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+		tmp &= ~PCTL_LINK_RST;
+		mw32(MVS_PHY_CTL, tmp);
+		msleep(100);
+	}
+
+	/* reset control */
+	mw32(MVS_PCS, 0);		/* MVS_PCS */
+	mw32(MVS_STP_REG_SET_0, 0);
+	mw32(MVS_STP_REG_SET_1, 0);
+
+	/* init phys */
+	mvs_phy_hacks(mvi);
+
+	/* disable Multiplexing, enable phy implemented */
+	mw32(MVS_PORTS_IMP, 0xFF);
+
+
+	mw32(MVS_PA_VSR_ADDR, 0x00000104);
+	mw32(MVS_PA_VSR_PORT, 0x00018080);
+	mw32(MVS_PA_VSR_ADDR, VSR_PHY_MODE8);
+	mw32(MVS_PA_VSR_PORT, 0x0084ffff);
+
+	/* set LED blink when IO*/
+	mw32(MVS_PA_VSR_ADDR, 0x00000030);
+	tmp = mr32(MVS_PA_VSR_PORT);
+	tmp &= 0xFFFF00FF;
+	tmp |= 0x00003300;
+	mw32(MVS_PA_VSR_PORT, tmp);
+
+	mw32(MVS_CMD_LIST_LO, mvi->slot_dma);
+	mw32(MVS_CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
+
+	mw32(MVS_RX_FIS_LO, mvi->rx_fis_dma);
+	mw32(MVS_RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
+
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ);
+	mw32(MVS_TX_LO, mvi->tx_dma);
+	mw32(MVS_TX_HI, (mvi->tx_dma >> 16) >> 16);
+
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ);
+	mw32(MVS_RX_LO, mvi->rx_dma);
+	mw32(MVS_RX_HI, (mvi->rx_dma >> 16) >> 16);
+
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		mvs_94xx_phy_disable(mvi, i);
+		/* set phy local SAS address */
+		mvs_set_sas_addr(mvi, i, CONFIG_ID_FRAME3, CONFIG_ID_FRAME4,
+						(mvi->phy[i].dev_sas_addr));
+
+		mvs_94xx_enable_xmt(mvi, i);
+		mvs_94xx_phy_enable(mvi, i);
+
+		mvs_94xx_phy_reset(mvi, i, 1);
+		msleep(500);
+		mvs_94xx_detect_porttype(mvi, i);
+	}
+
+	if (mvi->flags & MVF_FLAG_SOC) {
+		/* set select registers */
+		writel(0x0E008000, regs + 0x000);
+		writel(0x59000008, regs + 0x004);
+		writel(0x20, regs + 0x008);
+		writel(0x20, regs + 0x00c);
+		writel(0x20, regs + 0x010);
+		writel(0x20, regs + 0x014);
+		writel(0x20, regs + 0x018);
+		writel(0x20, regs + 0x01c);
+	}
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		/* clear phy int status */
+		tmp = mvs_read_port_irq_stat(mvi, i);
+		tmp &= ~PHYEV_SIG_FIS;
+		mvs_write_port_irq_stat(mvi, i, tmp);
+
+		/* set phy int mask */
+		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH |
+			PHYEV_ID_DONE  | PHYEV_DCDR_ERR | PHYEV_CRC_ERR ;
+		mvs_write_port_irq_mask(mvi, i, tmp);
+
+		msleep(100);
+		mvs_update_phyinfo(mvi, i, 1);
+	}
+
+	/* FIXME: update wide port bitmaps */
+
+	/* little endian for open address and command table, etc. */
+	/*
+	 * it seems that ( from the spec ) turning on big-endian won't
+	 * do us any good on big-endian machines, need further confirmation
+	 */
+	cctl = mr32(MVS_CTL);
+	cctl |= CCTL_ENDIAN_CMD;
+	cctl |= CCTL_ENDIAN_DATA;
+	cctl &= ~CCTL_ENDIAN_OPEN;
+	cctl |= CCTL_ENDIAN_RSP;
+	mw32_f(MVS_CTL, cctl);
+
+	/* reset CMD queue */
+	tmp = mr32(MVS_PCS);
+	tmp |= PCS_CMD_RST;
+	mw32(MVS_PCS, tmp);
+	/* interrupt coalescing may cause missing HW interrput in some case,
+	 * and the max count is 0x1ff, while our max slot is 0x200,
+	 * it will make count 0.
+	 */
+	tmp = 0;
+	mw32(MVS_INT_COAL, tmp);
+
+	tmp = 0x100;
+	mw32(MVS_INT_COAL_TMOUT, tmp);
+
+	/* ladies and gentlemen, start your engines */
+	mw32(MVS_TX_CFG, 0);
+	mw32(MVS_TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
+	mw32(MVS_RX_CFG, MVS_RX_RING_SZ | RX_EN);
+	/* enable CMD/CMPL_Q/RESP mode */
+	mw32(MVS_PCS, PCS_SATA_RETRY_2 | PCS_FIS_RX_EN |
+		PCS_CMD_EN | PCS_CMD_STOP_ERR);
+
+	/* enable completion queue interrupt */
+	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS | CINT_CI_STOP |
+		CINT_DMA_PCIE);
+	tmp |= CINT_PHY_MASK;
+	mw32(MVS_INT_MASK, tmp);
+
+	/* Enable SRS interrupt */
+	mw32(MVS_INT_MASK_SRS_0, 0xFFFF);
+
+	return 0;
+}
+
+static int mvs_94xx_ioremap(struct mvs_info *mvi)
+{
+	if (!mvs_ioremap(mvi, 2, -1)) {
+		mvi->regs_ex = mvi->regs + 0x10200;
+		mvi->regs += 0x20000;
+		if (mvi->id == 1)
+			mvi->regs += 0x4000;
+		return 0;
+	}
+	return -1;
+}
+
+static void mvs_94xx_iounmap(struct mvs_info *mvi)
+{
+	if (mvi->regs) {
+		mvi->regs -= 0x20000;
+		if (mvi->id == 1)
+			mvi->regs -= 0x4000;
+		mvs_iounmap(mvi->regs);
+	}
+}
+
+static void mvs_94xx_interrupt_enable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+	tmp |= (IRQ_SAS_A | IRQ_SAS_B);
+	mw32(MVS_GBL_INT_STAT, tmp);
+	writel(tmp, regs + 0x0C);
+	writel(tmp, regs + 0x10);
+	writel(tmp, regs + 0x14);
+	writel(tmp, regs + 0x18);
+	mw32(MVS_GBL_CTL, tmp);
+}
+
+static void mvs_94xx_interrupt_disable(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 tmp;
+
+	tmp = mr32(MVS_GBL_CTL);
+
+	tmp &= ~(IRQ_SAS_A | IRQ_SAS_B);
+	mw32(MVS_GBL_INT_STAT, tmp);
+	writel(tmp, regs + 0x0C);
+	writel(tmp, regs + 0x10);
+	writel(tmp, regs + 0x14);
+	writel(tmp, regs + 0x18);
+	mw32(MVS_GBL_CTL, tmp);
+}
+
+static u32 mvs_94xx_isr_status(struct mvs_info *mvi, int irq)
+{
+	void __iomem *regs = mvi->regs_ex;
+	u32 stat = 0;
+	if (!(mvi->flags & MVF_FLAG_SOC)) {
+		stat = mr32(MVS_GBL_INT_STAT);
+
+		if (!(stat & (IRQ_SAS_A | IRQ_SAS_B)))
+			return 0;
+	}
+	return stat;
+}
+
+static irqreturn_t mvs_94xx_isr(struct mvs_info *mvi, int irq, u32 stat)
+{
+	void __iomem *regs = mvi->regs;
+
+	if (((stat & IRQ_SAS_A) && mvi->id == 0) ||
+			((stat & IRQ_SAS_B) && mvi->id == 1)) {
+		mw32_f(MVS_INT_STAT, CINT_DONE);
+	#ifndef MVS_USE_TASKLET
+		spin_lock(&mvi->lock);
+	#endif
+		mvs_int_full(mvi);
+	#ifndef MVS_USE_TASKLET
+		spin_unlock(&mvi->lock);
+	#endif
+	}
+	return IRQ_HANDLED;
+}
+
+static void mvs_94xx_command_active(struct mvs_info *mvi, u32 slot_idx)
+{
+	u32 tmp;
+	mvs_cw32(mvi, 0x300 + (slot_idx >> 3), 1 << (slot_idx % 32));
+	do {
+		tmp = mvs_cr32(mvi, 0x300 + (slot_idx >> 3));
+	} while (tmp & 1 << (slot_idx % 32));
+}
+
+static void mvs_94xx_issue_stop(struct mvs_info *mvi, enum mvs_port_type type,
+				u32 tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+
+	if (type == PORT_TYPE_SATA) {
+		tmp = mr32(MVS_INT_STAT_SRS_0) | (1U << tfs);
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+	}
+	mw32(MVS_INT_STAT, CINT_CI_STOP);
+	tmp = mr32(MVS_PCS) | 0xFF00;
+	mw32(MVS_PCS, tmp);
+}
+
+static void mvs_94xx_free_reg_set(struct mvs_info *mvi, u8 *tfs)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp;
+	u8 reg_set = *tfs;
+
+	if (*tfs == MVS_ID_NOT_MAPPED)
+		return;
+
+	mvi->sata_reg_set &= ~bit(reg_set);
+	if (reg_set < 32) {
+		w_reg_set_enable(reg_set, (u32)mvi->sata_reg_set);
+		tmp = mr32(MVS_INT_STAT_SRS_0) & (u32)mvi->sata_reg_set;
+		if (tmp)
+			mw32(MVS_INT_STAT_SRS_0, tmp);
+	} else {
+		w_reg_set_enable(reg_set, mvi->sata_reg_set);
+		tmp = mr32(MVS_INT_STAT_SRS_1) & mvi->sata_reg_set;
+		if (tmp)
+			mw32(MVS_INT_STAT_SRS_1, tmp);
+	}
+
+	*tfs = MVS_ID_NOT_MAPPED;
+
+	return;
+}
+
+static u8 mvs_94xx_assign_reg_set(struct mvs_info *mvi, u8 *tfs)
+{
+	int i;
+	void __iomem *regs = mvi->regs;
+
+	if (*tfs != MVS_ID_NOT_MAPPED)
+		return 0;
+
+	i = mv_ffc64(mvi->sata_reg_set);
+	if (i > 32) {
+		mvi->sata_reg_set |= bit(i);
+		w_reg_set_enable(i, (u32)(mvi->sata_reg_set >> 32));
+		*tfs = i;
+		return 0;
+	} else if (i >= 0) {
+		mvi->sata_reg_set |= bit(i);
+		w_reg_set_enable(i, (u32)mvi->sata_reg_set);
+		*tfs = i;
+		return 0;
+	}
+	return MVS_ID_NOT_MAPPED;
+}
+
+static void mvs_94xx_make_prd(struct scatterlist *scatter, int nr, void *prd)
+{
+	int i;
+	struct scatterlist *sg;
+	struct mvs_prd *buf_prd = prd;
+	for_each_sg(scatter, sg, nr, i) {
+		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
+		buf_prd->im_len.len = cpu_to_le32(sg_dma_len(sg));
+		buf_prd++;
+	}
+}
+
+static int mvs_94xx_oob_done(struct mvs_info *mvi, int i)
+{
+	u32 phy_st;
+	phy_st = mvs_read_phy_ctl(mvi, i);
+	if (phy_st & PHY_READY_MASK)	/* phy ready */
+		return 1;
+	return 0;
+}
+
+static void mvs_94xx_get_dev_identify_frame(struct mvs_info *mvi, int port_id,
+					struct sas_identify_frame *id)
+{
+	int i;
+	u32 id_frame[7];
+
+	for (i = 0; i < 7; i++) {
+		mvs_write_port_cfg_addr(mvi, port_id,
+					CONFIG_ID_FRAME0 + i * 4);
+		id_frame[i] = mvs_read_port_cfg_data(mvi, port_id);
+	}
+	memcpy(id, id_frame, 28);
+}
+
+static void mvs_94xx_get_att_identify_frame(struct mvs_info *mvi, int port_id,
+					struct sas_identify_frame *id)
+{
+	int i;
+	u32 id_frame[7];
+
+	/* mvs_hexdump(28, (u8 *)id_frame, 0); */
+	for (i = 0; i < 7; i++) {
+		mvs_write_port_cfg_addr(mvi, port_id,
+					CONFIG_ATT_ID_FRAME0 + i * 4);
+		id_frame[i] = mvs_read_port_cfg_data(mvi, port_id);
+		mv_dprintk("94xx phy %d atta frame %d %x.\n",
+			port_id + mvi->id * mvi->chip->n_phy, i, id_frame[i]);
+	}
+	/* mvs_hexdump(28, (u8 *)id_frame, 0); */
+	memcpy(id, id_frame, 28);
+}
+
+static u32 mvs_94xx_make_dev_info(struct sas_identify_frame *id)
+{
+	u32 att_dev_info = 0;
+
+	att_dev_info |= id->dev_type;
+	if (id->stp_iport)
+		att_dev_info |= PORT_DEV_STP_INIT;
+	if (id->smp_iport)
+		att_dev_info |= PORT_DEV_SMP_INIT;
+	if (id->ssp_iport)
+		att_dev_info |= PORT_DEV_SSP_INIT;
+	if (id->stp_tport)
+		att_dev_info |= PORT_DEV_STP_TRGT;
+	if (id->smp_tport)
+		att_dev_info |= PORT_DEV_SMP_TRGT;
+	if (id->ssp_tport)
+		att_dev_info |= PORT_DEV_SSP_TRGT;
+
+	att_dev_info |= (u32)id->phy_id<<24;
+	return att_dev_info;
+}
+
+static u32 mvs_94xx_make_att_info(struct sas_identify_frame *id)
+{
+	return mvs_94xx_make_dev_info(id);
+}
+
+static void mvs_94xx_fix_phy_info(struct mvs_info *mvi, int i,
+				struct sas_identify_frame *id)
+{
+	struct mvs_phy *phy = &mvi->phy[i];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+	mv_dprintk("get all reg link rate is 0x%x\n", phy->phy_status);
+	sas_phy->linkrate =
+		(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
+			PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
+	sas_phy->linkrate += 0x8;
+	mv_dprintk("get link rate is %d\n", sas_phy->linkrate);
+	phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+	phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
+	mvs_94xx_get_dev_identify_frame(mvi, i, id);
+	phy->dev_info = mvs_94xx_make_dev_info(id);
+
+	if (phy->phy_type & PORT_TYPE_SAS) {
+		mvs_94xx_get_att_identify_frame(mvi, i, id);
+		phy->att_dev_info = mvs_94xx_make_att_info(id);
+		phy->att_dev_sas_addr = *(u64 *)id->sas_addr;
+	} else {
+		phy->att_dev_info = PORT_DEV_STP_TRGT | 1;
+	}
+
+}
+
+void mvs_94xx_phy_set_link_rate(struct mvs_info *mvi, u32 phy_id,
+			struct sas_phy_linkrates *rates)
+{
+	/* TODO */
+}
+
+static void mvs_94xx_clear_active_cmds(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(MVS_STP_REG_SET_0);
+	mw32(MVS_STP_REG_SET_0, 0);
+	mw32(MVS_STP_REG_SET_0, tmp);
+	tmp = mr32(MVS_STP_REG_SET_1);
+	mw32(MVS_STP_REG_SET_1, 0);
+	mw32(MVS_STP_REG_SET_1, tmp);
+}
+
+
+u32 mvs_94xx_spi_read_data(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	return mr32(SPI_RD_DATA_REG_94XX);
+}
+
+void mvs_94xx_spi_write_data(struct mvs_info *mvi, u32 data)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	 mw32(SPI_RD_DATA_REG_94XX, data);
+}
+
+
+int mvs_94xx_spi_buildcmd(struct mvs_info *mvi,
+				u32      *dwCmd,
+				u8       cmd,
+				u8       read,
+				u8       length,
+				u32      addr
+				)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	u32  dwTmp;
+
+	dwTmp = ((u32)cmd << 8) | ((u32)length << 4);
+	if (read)
+		dwTmp |= SPI_CTRL_READ_94XX;
+
+	if (addr != MV_MAX_U32) {
+		mw32(SPI_ADDR_REG_94XX, (addr & 0x0003FFFFL));
+		dwTmp |= SPI_ADDR_VLD_94XX;
+	}
+
+	*dwCmd = dwTmp;
+	return 0;
+}
+
+
+int mvs_94xx_spi_issuecmd(struct mvs_info *mvi, u32 cmd)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	mw32(SPI_CTRL_REG_94XX, cmd | SPI_CTRL_SpiStart_94XX);
+
+	return 0;
+}
+
+int mvs_94xx_spi_waitdataready(struct mvs_info *mvi, u32 timeout)
+{
+	void __iomem *regs = mvi->regs_ex - 0x10200;
+	u32   i, dwTmp;
+
+	for (i = 0; i < timeout; i++) {
+		dwTmp = mr32(SPI_CTRL_REG_94XX);
+		if (!(dwTmp & SPI_CTRL_SpiStart_94XX))
+			return 0;
+		msleep(10);
+	}
+
+	return -1;
+}
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+void mvs_94xx_fix_dma(dma_addr_t buf_dma, int buf_len, int from, void *prd)
+{
+	int i;
+	struct mvs_prd *buf_prd = prd;
+	buf_prd += from;
+	for (i = 0; i < MAX_SG_ENTRY - from; i++) {
+		buf_prd->addr = cpu_to_le64(buf_dma);
+		buf_prd->im_len.len = cpu_to_le32(buf_len);
+		++buf_prd;
+	}
+}
+#endif
+
+const struct mvs_dispatch mvs_94xx_dispatch = {
+	"mv94xx",
+	mvs_94xx_init,
+	NULL,
+	mvs_94xx_ioremap,
+	mvs_94xx_iounmap,
+	mvs_94xx_isr,
+	mvs_94xx_isr_status,
+	mvs_94xx_interrupt_enable,
+	mvs_94xx_interrupt_disable,
+	mvs_read_phy_ctl,
+	mvs_write_phy_ctl,
+	mvs_read_port_cfg_data,
+	mvs_write_port_cfg_data,
+	mvs_write_port_cfg_addr,
+	mvs_read_port_vsr_data,
+	mvs_write_port_vsr_data,
+	mvs_write_port_vsr_addr,
+	mvs_read_port_irq_stat,
+	mvs_write_port_irq_stat,
+	mvs_read_port_irq_mask,
+	mvs_write_port_irq_mask,
+	mvs_get_sas_addr,
+	mvs_94xx_command_active,
+	mvs_94xx_issue_stop,
+	mvs_start_delivery,
+	mvs_rx_update,
+	mvs_int_full,
+	mvs_94xx_assign_reg_set,
+	mvs_94xx_free_reg_set,
+	mvs_get_prd_size,
+	mvs_get_prd_count,
+	mvs_94xx_make_prd,
+	mvs_94xx_detect_porttype,
+	mvs_94xx_oob_done,
+	mvs_94xx_fix_phy_info,
+	NULL,
+	mvs_94xx_phy_set_link_rate,
+	mvs_hw_max_link_rate,
+	mvs_94xx_phy_disable,
+	mvs_94xx_phy_enable,
+	mvs_94xx_phy_reset,
+	NULL,
+	mvs_94xx_clear_active_cmds,
+	mvs_94xx_spi_read_data,
+	mvs_94xx_spi_write_data,
+	mvs_94xx_spi_buildcmd,
+	mvs_94xx_spi_issuecmd,
+	mvs_94xx_spi_waitdataready,
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	mvs_94xx_fix_dma,
+#endif
+};
+
diff --git a/drivers/scsi/mvsas/mv_94xx.h b/drivers/scsi/mvsas/mv_94xx.h
new file mode 100644
index 0000000..23ed9b1
--- /dev/null
+++ b/drivers/scsi/mvsas/mv_94xx.h
@@ -0,0 +1,222 @@
+/*
+ * Marvell 88SE94xx hardware specific head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+#ifndef _MVS94XX_REG_H_
+#define _MVS94XX_REG_H_
+
+#include <linux/types.h>
+
+#define MAX_LINK_RATE		SAS_LINK_RATE_6_0_GBPS
+
+enum hw_registers {
+	MVS_GBL_CTL		= 0x04,  /* global control */
+	MVS_GBL_INT_STAT	= 0x00,  /* global irq status */
+	MVS_GBL_PI		= 0x0C,  /* ports implemented bitmask */
+
+	MVS_PHY_CTL		= 0x40,  /* SOC PHY Control */
+	MVS_PORTS_IMP		= 0x9C,  /* SOC Port Implemented */
+
+	MVS_GBL_PORT_TYPE	= 0xa0,  /* port type */
+
+	MVS_CTL			= 0x100, /* SAS/SATA port configuration */
+	MVS_PCS			= 0x104, /* SAS/SATA port control/status */
+	MVS_CMD_LIST_LO		= 0x108, /* cmd list addr */
+	MVS_CMD_LIST_HI		= 0x10C,
+	MVS_RX_FIS_LO		= 0x110, /* RX FIS list addr */
+	MVS_RX_FIS_HI		= 0x114,
+	MVS_STP_REG_SET_0	= 0x118, /* STP/SATA Register Set Enable */
+	MVS_STP_REG_SET_1	= 0x11C,
+	MVS_TX_CFG		= 0x120, /* TX configuration */
+	MVS_TX_LO		= 0x124, /* TX (delivery) ring addr */
+	MVS_TX_HI		= 0x128,
+
+	MVS_TX_PROD_IDX		= 0x12C, /* TX producer pointer */
+	MVS_TX_CONS_IDX		= 0x130, /* TX consumer pointer (RO) */
+	MVS_RX_CFG		= 0x134, /* RX configuration */
+	MVS_RX_LO		= 0x138, /* RX (completion) ring addr */
+	MVS_RX_HI		= 0x13C,
+	MVS_RX_CONS_IDX		= 0x140, /* RX consumer pointer (RO) */
+
+	MVS_INT_COAL		= 0x148, /* Int coalescing config */
+	MVS_INT_COAL_TMOUT	= 0x14C, /* Int coalescing timeout */
+	MVS_INT_STAT		= 0x150, /* Central int status */
+	MVS_INT_MASK		= 0x154, /* Central int enable */
+	MVS_INT_STAT_SRS_0	= 0x158, /* SATA register set status */
+	MVS_INT_MASK_SRS_0	= 0x15C,
+	MVS_INT_STAT_SRS_1	= 0x160,
+	MVS_INT_MASK_SRS_1	= 0x164,
+	MVS_NON_NCQ_ERR_0	= 0x168, /* SRS Non-specific NCQ Error */
+	MVS_NON_NCQ_ERR_1	= 0x16C,
+	MVS_CMD_ADDR		= 0x170, /* Command register port (addr) */
+	MVS_CMD_DATA		= 0x174, /* Command register port (data) */
+	MVS_MEM_PARITY_ERR	= 0x178, /* Memory parity error */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_INT_STAT		= 0x180, /* port0 interrupt status */
+	MVS_P0_INT_MASK		= 0x184, /* port0 interrupt mask */
+					 /* ports 5-7 follow after this */
+	MVS_P4_INT_STAT		= 0x1A0, /* Port4 interrupt status */
+	MVS_P4_INT_MASK		= 0x1A4, /* Port4 interrupt enable mask */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_SER_CTLSTAT	= 0x1D0, /* port0 serial control/status */
+					 /* ports 5-7 follow after this */
+	MVS_P4_SER_CTLSTAT	= 0x1E0, /* port4 serial control/status */
+
+					 /* ports 1-3 follow after this */
+	MVS_P0_CFG_ADDR		= 0x200, /* port0 phy register address */
+	MVS_P0_CFG_DATA		= 0x204, /* port0 phy register data */
+					 /* ports 5-7 follow after this */
+	MVS_P4_CFG_ADDR		= 0x220, /* Port4 config address */
+	MVS_P4_CFG_DATA		= 0x224, /* Port4 config data */
+
+					 /* phys 1-3 follow after this */
+	MVS_P0_VSR_ADDR		= 0x250, /* phy0 VSR address */
+	MVS_P0_VSR_DATA		= 0x254, /* phy0 VSR data */
+					 /* phys 1-3 follow after this */
+					 /* multiplexing */
+	MVS_P4_VSR_ADDR 	= 0x250, /* phy4 VSR address */
+	MVS_P4_VSR_DATA 	= 0x254, /* phy4 VSR data */
+	MVS_PA_VSR_ADDR		= 0x290, /* All port VSR addr */
+	MVS_PA_VSR_PORT		= 0x294, /* All port VSR data */
+};
+
+enum pci_cfg_registers {
+	PCR_PHY_CTL		= 0x40,
+	PCR_PHY_CTL2		= 0x90,
+	PCR_DEV_CTRL		= 0x78,
+	PCR_LINK_STAT		= 0x82,
+};
+
+/*  SAS/SATA Vendor Specific Port Registers */
+enum sas_sata_vsp_regs {
+	VSR_PHY_STAT		= 0x00 * 4, /* Phy Status */
+	VSR_PHY_MODE1		= 0x01 * 4, /* phy tx */
+	VSR_PHY_MODE2		= 0x02 * 4, /* tx scc */
+	VSR_PHY_MODE3		= 0x03 * 4, /* pll */
+	VSR_PHY_MODE4		= 0x04 * 4, /* VCO */
+	VSR_PHY_MODE5		= 0x05 * 4, /* Rx */
+	VSR_PHY_MODE6		= 0x06 * 4, /* CDR */
+	VSR_PHY_MODE7		= 0x07 * 4, /* Impedance */
+	VSR_PHY_MODE8		= 0x08 * 4, /* Voltage */
+	VSR_PHY_MODE9		= 0x09 * 4, /* Test */
+	VSR_PHY_MODE10		= 0x0A * 4, /* Power */
+	VSR_PHY_MODE11		= 0x0B * 4, /* Phy Mode */
+	VSR_PHY_VS0		= 0x0C * 4, /* Vednor Specific 0 */
+	VSR_PHY_VS1		= 0x0D * 4, /* Vednor Specific 1 */
+};
+
+enum chip_register_bits {
+	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0x7 << 8),
+	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0x7 << 8),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (12),
+	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
+			(0x3 << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
+};
+
+enum pci_interrupt_cause {
+	/*  MAIN_IRQ_CAUSE (R10200) Bits*/
+	IRQ_COM_IN_I2O_IOP0            = (1 << 0),
+	IRQ_COM_IN_I2O_IOP1            = (1 << 1),
+	IRQ_COM_IN_I2O_IOP2            = (1 << 2),
+	IRQ_COM_IN_I2O_IOP3            = (1 << 3),
+	IRQ_COM_OUT_I2O_HOS0           = (1 << 4),
+	IRQ_COM_OUT_I2O_HOS1           = (1 << 5),
+	IRQ_COM_OUT_I2O_HOS2           = (1 << 6),
+	IRQ_COM_OUT_I2O_HOS3           = (1 << 7),
+	IRQ_PCIF_TO_CPU_DRBL0          = (1 << 8),
+	IRQ_PCIF_TO_CPU_DRBL1          = (1 << 9),
+	IRQ_PCIF_TO_CPU_DRBL2          = (1 << 10),
+	IRQ_PCIF_TO_CPU_DRBL3          = (1 << 11),
+	IRQ_PCIF_DRBL0                 = (1 << 12),
+	IRQ_PCIF_DRBL1                 = (1 << 13),
+	IRQ_PCIF_DRBL2                 = (1 << 14),
+	IRQ_PCIF_DRBL3                 = (1 << 15),
+	IRQ_XOR_A                      = (1 << 16),
+	IRQ_XOR_B                      = (1 << 17),
+	IRQ_SAS_A                      = (1 << 18),
+	IRQ_SAS_B                      = (1 << 19),
+	IRQ_CPU_CNTRL                  = (1 << 20),
+	IRQ_GPIO                       = (1 << 21),
+	IRQ_UART                       = (1 << 22),
+	IRQ_SPI                        = (1 << 23),
+	IRQ_I2C                        = (1 << 24),
+	IRQ_SGPIO                      = (1 << 25),
+	IRQ_COM_ERR                    = (1 << 29),
+	IRQ_I2O_ERR                    = (1 << 30),
+	IRQ_PCIE_ERR                   = (1 << 31),
+};
+
+#define MAX_SG_ENTRY		255
+
+struct mvs_prd_imt {
+	__le32			len:22;
+	u8			_r_a:2;
+	u8			misc_ctl:4;
+	u8			inter_sel:4;
+};
+
+struct mvs_prd {
+	/* 64-bit buffer address */
+	__le64			addr;
+	/* 22-bit length */
+	struct mvs_prd_imt	im_len;
+} __attribute__ ((packed));
+
+#define SPI_CTRL_REG_94XX           	0xc800
+#define SPI_ADDR_REG_94XX            	0xc804
+#define SPI_WR_DATA_REG_94XX         0xc808
+#define SPI_RD_DATA_REG_94XX         	0xc80c
+#define SPI_CTRL_READ_94XX         	(1U << 2)
+#define SPI_ADDR_VLD_94XX         	(1U << 1)
+#define SPI_CTRL_SpiStart_94XX     	(1U << 0)
+
+#define mv_ffc(x)   ffz(x)
+
+static inline int
+mv_ffc64(u64 v)
+{
+	int i;
+	i = mv_ffc((u32)v);
+	if (i >= 0)
+		return i;
+	i = mv_ffc((u32)(v>>32));
+
+	if (i != 0)
+		return 32 + i;
+
+	return -1;
+}
+
+#define r_reg_set_enable(i) \
+	(((i) > 31) ? mr32(MVS_STP_REG_SET_1) : \
+	mr32(MVS_STP_REG_SET_0))
+
+#define w_reg_set_enable(i, tmp) \
+	(((i) > 31) ? mw32(MVS_STP_REG_SET_1, tmp) : \
+	mw32(MVS_STP_REG_SET_0, tmp))
+
+extern const struct mvs_dispatch mvs_94xx_dispatch;
+#endif
+
diff --git a/drivers/scsi/mvsas/mv_chips.h b/drivers/scsi/mvsas/mv_chips.h
index cf74b7a..a67e1c4 100644
--- a/drivers/scsi/mvsas/mv_chips.h
+++ b/drivers/scsi/mvsas/mv_chips.h
@@ -1,46 +1,81 @@
+/*
+ * Marvell 88SE64xx/88SE94xx register IO interface
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
+
+
 #ifndef _MV_CHIPS_H_
 #define _MV_CHIPS_H_
 
-#define mr32(reg)	readl(regs + MVS_##reg)
-#define mw32(reg,val)	writel((val), regs + MVS_##reg)
-#define mw32_f(reg,val)	do {			\
-	writel((val), regs + MVS_##reg);	\
-	readl(regs + MVS_##reg);		\
-	} while (0)
+#define mr32(reg)	readl(regs + reg)
+#define mw32(reg, val)	writel((val), regs + reg)
+#define mw32_f(reg, val)	do {			\
+				mw32(reg, val);	\
+				mr32(reg);	\
+			} while (0)
 
-static inline u32 mvs_cr32(void __iomem *regs, u32 addr)
+#define iow32(reg, val) 	outl(val, (unsigned long)(regs + reg))
+#define ior32(reg) 		inl((unsigned long)(regs + reg))
+#define iow16(reg, val) 	outw((unsigned long)(val, regs + reg))
+#define ior16(reg) 		inw((unsigned long)(regs + reg))
+#define iow8(reg, val) 		outb((unsigned long)(val, regs + reg))
+#define ior8(reg) 		inb((unsigned long)(regs + reg))
+
+static inline u32 mvs_cr32(struct mvs_info *mvi, u32 addr)
 {
-	mw32(CMD_ADDR, addr);
-	return mr32(CMD_DATA);
+	void __iomem *regs = mvi->regs;
+	mw32(MVS_CMD_ADDR, addr);
+	return mr32(MVS_CMD_DATA);
 }
 
-static inline void mvs_cw32(void __iomem *regs, u32 addr, u32 val)
+static inline void mvs_cw32(struct mvs_info *mvi, u32 addr, u32 val)
 {
-	mw32(CMD_ADDR, addr);
-	mw32(CMD_DATA, val);
+	void __iomem *regs = mvi->regs;
+	mw32(MVS_CMD_ADDR, addr);
+	mw32(MVS_CMD_DATA, val);
 }
 
 static inline u32 mvs_read_phy_ctl(struct mvs_info *mvi, u32 port)
 {
 	void __iomem *regs = mvi->regs;
-	return (port < 4)?mr32(P0_SER_CTLSTAT + port * 4):
-		mr32(P4_SER_CTLSTAT + (port - 4) * 4);
+	return (port < 4) ? mr32(MVS_P0_SER_CTLSTAT + port * 4) :
+		mr32(MVS_P4_SER_CTLSTAT + (port - 4) * 4);
 }
 
 static inline void mvs_write_phy_ctl(struct mvs_info *mvi, u32 port, u32 val)
 {
 	void __iomem *regs = mvi->regs;
 	if (port < 4)
-		mw32(P0_SER_CTLSTAT + port * 4, val);
+		mw32(MVS_P0_SER_CTLSTAT + port * 4, val);
 	else
-		mw32(P4_SER_CTLSTAT + (port - 4) * 4, val);
+		mw32(MVS_P4_SER_CTLSTAT + (port - 4) * 4, val);
 }
 
-static inline u32 mvs_read_port(struct mvs_info *mvi, u32 off, u32 off2, u32 port)
+static inline u32 mvs_read_port(struct mvs_info *mvi, u32 off,
+				u32 off2, u32 port)
 {
 	void __iomem *regs = mvi->regs + off;
 	void __iomem *regs2 = mvi->regs + off2;
-	return (port < 4)?readl(regs + port * 8):
+	return (port < 4) ? readl(regs + port * 8) :
 		readl(regs2 + (port - 4) * 8);
 }
 
@@ -61,16 +96,19 @@ static inline u32 mvs_read_port_cfg_data(struct mvs_info *mvi, u32 port)
 			MVS_P4_CFG_DATA, port);
 }
 
-static inline void mvs_write_port_cfg_data(struct mvs_info *mvi, u32 port, u32 val)
+static inline void mvs_write_port_cfg_data(struct mvs_info *mvi,
+						u32 port, u32 val)
 {
 	mvs_write_port(mvi, MVS_P0_CFG_DATA,
 			MVS_P4_CFG_DATA, port, val);
 }
 
-static inline void mvs_write_port_cfg_addr(struct mvs_info *mvi, u32 port, u32 addr)
+static inline void mvs_write_port_cfg_addr(struct mvs_info *mvi,
+						u32 port, u32 addr)
 {
 	mvs_write_port(mvi, MVS_P0_CFG_ADDR,
 			MVS_P4_CFG_ADDR, port, addr);
+	mdelay(10);
 }
 
 static inline u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
@@ -79,16 +117,19 @@ static inline u32 mvs_read_port_vsr_data(struct mvs_info *mvi, u32 port)
 			MVS_P4_VSR_DATA, port);
 }
 
-static inline void mvs_write_port_vsr_data(struct mvs_info *mvi, u32 port, u32 val)
+static inline void mvs_write_port_vsr_data(struct mvs_info *mvi,
+						u32 port, u32 val)
 {
 	mvs_write_port(mvi, MVS_P0_VSR_DATA,
 			MVS_P4_VSR_DATA, port, val);
 }
 
-static inline void mvs_write_port_vsr_addr(struct mvs_info *mvi, u32 port, u32 addr)
+static inline void mvs_write_port_vsr_addr(struct mvs_info *mvi,
+						u32 port, u32 addr)
 {
 	mvs_write_port(mvi, MVS_P0_VSR_ADDR,
 			MVS_P4_VSR_ADDR, port, addr);
+	mdelay(10);
 }
 
 static inline u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
@@ -97,7 +138,8 @@ static inline u32 mvs_read_port_irq_stat(struct mvs_info *mvi, u32 port)
 			MVS_P4_INT_STAT, port);
 }
 
-static inline void mvs_write_port_irq_stat(struct mvs_info *mvi, u32 port, u32 val)
+static inline void mvs_write_port_irq_stat(struct mvs_info *mvi,
+						u32 port, u32 val)
 {
 	mvs_write_port(mvi, MVS_P0_INT_STAT,
 			MVS_P4_INT_STAT, port, val);
@@ -107,12 +149,132 @@ static inline u32 mvs_read_port_irq_mask(struct mvs_info *mvi, u32 port)
 {
 	return mvs_read_port(mvi, MVS_P0_INT_MASK,
 			MVS_P4_INT_MASK, port);
+
 }
 
-static inline void mvs_write_port_irq_mask(struct mvs_info *mvi, u32 port, u32 val)
+static inline void mvs_write_port_irq_mask(struct mvs_info *mvi,
+						u32 port, u32 val)
 {
 	mvs_write_port(mvi, MVS_P0_INT_MASK,
 			MVS_P4_INT_MASK, port, val);
 }
 
-#endif
+static inline void __devinit mvs_phy_hacks(struct mvs_info *mvi)
+{
+	u32 tmp;
+
+	/* workaround for SATA R-ERR, to ignore phy glitch */
+	tmp = mvs_cr32(mvi, CMD_PHY_TIMER);
+	tmp &= ~(1 << 9);
+	tmp |= (1 << 10);
+	mvs_cw32(mvi, CMD_PHY_TIMER, tmp);
+
+	/* enable retry 127 times */
+	mvs_cw32(mvi, CMD_SAS_CTL1, 0x7f7f);
+
+	/* extend open frame timeout to max */
+	tmp = mvs_cr32(mvi, CMD_SAS_CTL0);
+	tmp &= ~0xffff;
+	tmp |= 0x3fff;
+	mvs_cw32(mvi, CMD_SAS_CTL0, tmp);
+
+	/* workaround for WDTIMEOUT , set to 550 ms */
+	mvs_cw32(mvi, CMD_WD_TIMER, 0x7a0000);
+
+	/* not to halt for different port op during wideport link change */
+	mvs_cw32(mvi, CMD_APP_ERR_CONFIG, 0xffefbf7d);
+
+	/* workaround for Seagate disk not-found OOB sequence, recv
+	 * COMINIT before sending out COMWAKE */
+	tmp = mvs_cr32(mvi, CMD_PHY_MODE_21);
+	tmp &= 0x0000ffff;
+	tmp |= 0x00fa0000;
+	mvs_cw32(mvi, CMD_PHY_MODE_21, tmp);
+
+	tmp = mvs_cr32(mvi, CMD_PHY_TIMER);
+	tmp &= 0x1fffffff;
+	tmp |= (2U << 29);	/* 8 ms retry */
+	mvs_cw32(mvi, CMD_PHY_TIMER, tmp);
+}
+
+static inline void mvs_int_sata(struct mvs_info *mvi)
+{
+	u32 tmp;
+	void __iomem *regs = mvi->regs;
+	tmp = mr32(MVS_INT_STAT_SRS_0);
+	if (tmp)
+		mw32(MVS_INT_STAT_SRS_0, tmp);
+	MVS_CHIP_DISP->clear_active_cmds(mvi);
+}
+
+static inline void mvs_int_full(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	u32 tmp, stat;
+	int i;
+
+	stat = mr32(MVS_INT_STAT);
+	mvs_int_rx(mvi, false);
+
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
+		if (tmp)
+			mvs_int_port(mvi, i, tmp);
+	}
+
+	if (stat & CINT_SRS)
+		mvs_int_sata(mvi);
+
+	mw32(MVS_INT_STAT, stat);
+}
+
+static inline void mvs_start_delivery(struct mvs_info *mvi, u32 tx)
+{
+	void __iomem *regs = mvi->regs;
+	mw32(MVS_TX_PROD_IDX, tx);
+}
+
+static inline u32 mvs_rx_update(struct mvs_info *mvi)
+{
+	void __iomem *regs = mvi->regs;
+	return mr32(MVS_RX_CONS_IDX);
+}
+
+static inline u32 mvs_get_prd_size(void)
+{
+	return sizeof(struct mvs_prd);
+}
+
+static inline u32 mvs_get_prd_count(void)
+{
+	return MAX_SG_ENTRY;
+}
+
+static inline void mvs_show_pcie_usage(struct mvs_info *mvi)
+{
+	u16 link_stat, link_spd;
+	const char *spd[] = {
+		"UnKnown",
+		"2.5",
+		"5.0",
+	};
+	if (mvi->flags & MVF_FLAG_SOC || mvi->id > 0)
+		return;
+
+	pci_read_config_word(mvi->pdev, PCR_LINK_STAT, &link_stat);
+	link_spd = (link_stat & PLS_LINK_SPD) >> PLS_LINK_SPD_OFFS;
+	if (link_spd >= 3)
+		link_spd = 0;
+	dev_printk(KERN_INFO, mvi->dev,
+		"mvsas: PCI-E x%u, Bandwidth Usage: %s Gbps\n",
+	       (link_stat & PLS_NEG_LINK_WD) >> PLS_NEG_LINK_WD_OFFS,
+	       spd[link_spd]);
+}
+
+static inline u32 mvs_hw_max_link_rate(void)
+{
+	return MAX_LINK_RATE;
+}
+
+#endif  /* _MV_CHIPS_H_ */
+
diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h
index d8e96a3..f8cb9de 100644
--- a/drivers/scsi/mvsas/mv_defs.h
+++ b/drivers/scsi/mvsas/mv_defs.h
@@ -1,53 +1,66 @@
 /*
-	mv_defs.h - Marvell 88SE6440 SAS/SATA support
-
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
- */
+ * Marvell 88SE64xx/88SE94xx const head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
 
 #ifndef _MV_DEFS_H_
 #define _MV_DEFS_H_
 
+
+enum chip_flavors {
+	chip_6320,
+	chip_6440,
+	chip_6485,
+	chip_9480,
+	chip_9180,
+};
+
 /* driver compile-time configuration */
 enum driver_configuration {
+	MVS_SLOTS		= 512,	/* command slots */
 	MVS_TX_RING_SZ		= 1024,	/* TX ring size (12-bit) */
 	MVS_RX_RING_SZ		= 1024, /* RX ring size (12-bit) */
 					/* software requires power-of-2
 					   ring size */
+	MVS_SOC_SLOTS		= 64,
+	MVS_SOC_TX_RING_SZ	= MVS_SOC_SLOTS * 2,
+	MVS_SOC_RX_RING_SZ	= MVS_SOC_SLOTS * 2,
 
-	MVS_SLOTS		= 512,	/* command slots */
 	MVS_SLOT_BUF_SZ		= 8192, /* cmd tbl + IU + status + PRD */
 	MVS_SSP_CMD_SZ		= 64,	/* SSP command table buffer size */
 	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
 	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
-
-	MVS_RX_FIS_COUNT	= 17,	/* Optional rx'd FISs (max 17) */
-
-	MVS_QUEUE_SIZE		= 30,	/* Support Queue depth */
-	MVS_CAN_QUEUE		= MVS_SLOTS - 1,	/* SCSI Queue depth */
+	MVS_QUEUE_SIZE	= 32,	/* Support Queue depth */
+	MVS_CAN_QUEUE		= MVS_SLOTS - 2,	/* SCSI Queue depth */
+	MVS_SOC_CAN_QUEUE	= MVS_SOC_SLOTS - 2,
 };
 
 /* unchangeable hardware details */
 enum hardware_details {
 	MVS_MAX_PHYS		= 8,	/* max. possible phys */
 	MVS_MAX_PORTS		= 8,	/* max. possible ports */
-	MVS_RX_FISL_SZ		= 0x400 + (MVS_RX_FIS_COUNT * 0x100),
+	MVS_SOC_PHYS		= 4,	/* soc phys */
+	MVS_SOC_PORTS		= 4,	/* soc phys */
+	MVS_MAX_DEVICES	= 1024,	/* max supported device */
 };
 
 /* peripheral registers (BAR2) */
@@ -133,6 +146,8 @@ enum hw_register_bits {
 	CINT_PORT		= (1U << 8),	/* port0 event */
 	CINT_PORT_MASK_OFFSET	= 8,
 	CINT_PORT_MASK		= (0xFF << CINT_PORT_MASK_OFFSET),
+	CINT_PHY_MASK_OFFSET	= 4,
+	CINT_PHY_MASK		= (0x0F << CINT_PHY_MASK_OFFSET),
 
 	/* TX (delivery) ring bits */
 	TXQ_CMD_SHIFT		= 29,
@@ -142,7 +157,11 @@ enum hw_register_bits {
 	TXQ_CMD_SSP_FREE_LIST	= 4,		/* add to SSP targ free list */
 	TXQ_CMD_SLOT_RESET	= 7,		/* reset command slot */
 	TXQ_MODE_I		= (1U << 28),	/* mode: 0=target,1=initiator */
+	TXQ_MODE_TARGET 	= 0,
+	TXQ_MODE_INITIATOR	= 1,
 	TXQ_PRIO_HI		= (1U << 27),	/* priority: 0=normal, 1=high */
+	TXQ_PRI_NORMAL		= 0,
+	TXQ_PRI_HIGH		= 1,
 	TXQ_SRS_SHIFT		= 20,		/* SATA register set */
 	TXQ_SRS_MASK		= 0x7f,
 	TXQ_PHY_SHIFT		= 12,		/* PHY bitmap */
@@ -175,6 +194,8 @@ enum hw_register_bits {
 	MCH_SSP_FR_READ		= 0x6,		/* Read DATA frame(s) */
 	MCH_SSP_FR_READ_RESP	= 0x7,		/* ditto, plus RESPONSE */
 
+	MCH_SSP_MODE_PASSTHRU	= 1,
+	MCH_SSP_MODE_NORMAL	= 0,
 	MCH_PASSTHRU		= (1U << 12),	/* pass-through (SSP) */
 	MCH_FBURST		= (1U << 11),	/* first burst (SSP) */
 	MCH_CHK_LEN		= (1U << 10),	/* chk xfer len (SSP) */
@@ -199,15 +220,12 @@ enum hw_register_bits {
 	PHY_BCAST_CHG		= (1U << 2),	/* broadcast(change) notif */
 	PHY_RST_HARD		= (1U << 1),	/* hard reset + phy reset */
 	PHY_RST			= (1U << 0),	/* phy reset */
-	PHY_MIN_SPP_PHYS_LINK_RATE_MASK = (0xF << 8),
-	PHY_MAX_SPP_PHYS_LINK_RATE_MASK = (0xF << 12),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET = (16),
-	PHY_NEG_SPP_PHYS_LINK_RATE_MASK =
-			(0xF << PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET),
 	PHY_READY_MASK		= (1U << 20),
 
 	/* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */
 	PHYEV_DEC_ERR		= (1U << 24),	/* Phy Decoding Error */
+	PHYEV_DCDR_ERR		= (1U << 23),	/* STP Deocder Error */
+	PHYEV_CRC_ERR		= (1U << 22),	/* STP CRC Error */
 	PHYEV_UNASSOC_FIS	= (1U << 19),	/* unassociated FIS rx'd */
 	PHYEV_AN		= (1U << 18),	/* SATA async notification */
 	PHYEV_BIST_ACT		= (1U << 17),	/* BIST activate FIS */
@@ -229,9 +247,10 @@ enum hw_register_bits {
 	/* MVS_PCS */
 	PCS_EN_SATA_REG_SHIFT	= (16),		/* Enable SATA Register Set */
 	PCS_EN_PORT_XMT_SHIFT	= (12),		/* Enable Port Transmit */
-	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6480 */
+	PCS_EN_PORT_XMT_SHIFT2	= (8),		/* For 6485 */
 	PCS_SATA_RETRY		= (1U << 8),	/* retry ctl FIS on R_ERR */
 	PCS_RSP_RX_EN		= (1U << 7),	/* raw response rx */
+	PCS_SATA_RETRY_2	= (1U << 6),	/* For 9180 */
 	PCS_SELF_CLEAR		= (1U << 5),	/* self-clearing int mode */
 	PCS_FIS_RX_EN		= (1U << 4),	/* FIS rx enable */
 	PCS_CMD_STOP_ERR	= (1U << 3),	/* cmd stop-on-err enable */
@@ -246,6 +265,8 @@ enum hw_register_bits {
 	PORT_DEV_SMP_INIT	= (1U << 10),
 	PORT_DEV_STP_INIT	= (1U << 9),
 	PORT_PHY_ID_MASK	= (0xFFU << 24),
+	PORT_SSP_TRGT_MASK	= (0x1U << 19),
+	PORT_SSP_INIT_MASK	= (0x1U << 11),
 	PORT_DEV_TRGT_MASK	= (0x7U << 17),
 	PORT_DEV_INIT_MASK	= (0x7U << 9),
 	PORT_DEV_TYPE_MASK	= (0x7U << 0),
@@ -283,21 +304,30 @@ enum sas_sata_config_port_regs {
 	PHYR_ATT_ADDR_HI	= 0x14,	/* attached dev SAS addr (high) */
 	PHYR_SATA_CTL		= 0x18,	/* SATA control */
 	PHYR_PHY_STAT		= 0x1C,	/* PHY status */
-	PHYR_SATA_SIG0		= 0x20,	/*port SATA signature FIS(Byte 0-3) */
-	PHYR_SATA_SIG1		= 0x24,	/*port SATA signature FIS(Byte 4-7) */
-	PHYR_SATA_SIG2		= 0x28,	/*port SATA signature FIS(Byte 8-11) */
-	PHYR_SATA_SIG3		= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
+	PHYR_SATA_SIG0	= 0x20,	/*port SATA signature FIS(Byte 0-3) */
+	PHYR_SATA_SIG1	= 0x24,	/*port SATA signature FIS(Byte 4-7) */
+	PHYR_SATA_SIG2	= 0x28,	/*port SATA signature FIS(Byte 8-11) */
+	PHYR_SATA_SIG3	= 0x2c,	/*port SATA signature FIS(Byte 12-15) */
 	PHYR_R_ERR_COUNT	= 0x30, /* port R_ERR count register */
 	PHYR_CRC_ERR_COUNT	= 0x34, /* port CRC error count register */
-	PHYR_WIDE_PORT		= 0x38,	/* wide port participating */
+	PHYR_WIDE_PORT	= 0x38,	/* wide port participating */
 	PHYR_CURRENT0		= 0x80,	/* current connection info 0 */
 	PHYR_CURRENT1		= 0x84,	/* current connection info 1 */
 	PHYR_CURRENT2		= 0x88,	/* current connection info 2 */
-};
-
-enum mvs_info_flags {
-	MVF_MSI			= (1U << 0),	/* MSI is enabled */
-	MVF_PHY_PWR_FIX		= (1U << 1),	/* bug workaround */
+	CONFIG_ID_FRAME0       = 0x100, /* Port device ID frame register 0 */
+	CONFIG_ID_FRAME1       = 0x104, /* Port device ID frame register 1 */
+	CONFIG_ID_FRAME2       = 0x108, /* Port device ID frame register 2 */
+	CONFIG_ID_FRAME3       = 0x10c, /* Port device ID frame register 3 */
+	CONFIG_ID_FRAME4       = 0x110, /* Port device ID frame register 4 */
+	CONFIG_ID_FRAME5       = 0x114, /* Port device ID frame register 5 */
+	CONFIG_ID_FRAME6       = 0x118, /* Port device ID frame register 6 */
+	CONFIG_ATT_ID_FRAME0   = 0x11c, /* attached ID frame register 0 */
+	CONFIG_ATT_ID_FRAME1   = 0x120, /* attached ID frame register 1 */
+	CONFIG_ATT_ID_FRAME2   = 0x124, /* attached ID frame register 2 */
+	CONFIG_ATT_ID_FRAME3   = 0x128, /* attached ID frame register 3 */
+	CONFIG_ATT_ID_FRAME4   = 0x12c, /* attached ID frame register 4 */
+	CONFIG_ATT_ID_FRAME5   = 0x130, /* attached ID frame register 5 */
+	CONFIG_ATT_ID_FRAME6   = 0x134, /* attached ID frame register 6 */
 };
 
 enum sas_cmd_port_registers {
@@ -305,11 +335,11 @@ enum sas_cmd_port_registers {
 	CMD_CMWK_OOB_DET	= 0x104, /* COMWAKE OOB detect register */
 	CMD_CMSAS_OOB_DET	= 0x108, /* COMSAS OOB detect register */
 	CMD_BRST_OOB_DET	= 0x10c, /* burst OOB detect register */
-	CMD_OOB_SPACE		= 0x110, /* OOB space control register */
-	CMD_OOB_BURST		= 0x114, /* OOB burst control register */
+	CMD_OOB_SPACE	= 0x110, /* OOB space control register */
+	CMD_OOB_BURST	= 0x114, /* OOB burst control register */
 	CMD_PHY_TIMER		= 0x118, /* PHY timer control register */
-	CMD_PHY_CONFIG0		= 0x11c, /* PHY config register 0 */
-	CMD_PHY_CONFIG1		= 0x120, /* PHY config register 1 */
+	CMD_PHY_CONFIG0	= 0x11c, /* PHY config register 0 */
+	CMD_PHY_CONFIG1	= 0x120, /* PHY config register 1 */
 	CMD_SAS_CTL0		= 0x124, /* SAS control register 0 */
 	CMD_SAS_CTL1		= 0x128, /* SAS control register 1 */
 	CMD_SAS_CTL2		= 0x12c, /* SAS control register 2 */
@@ -318,9 +348,9 @@ enum sas_cmd_port_registers {
 	CMD_PL_TIMER		= 0x138, /* PL timer register */
 	CMD_WD_TIMER		= 0x13c, /* WD timer register */
 	CMD_PORT_SEL_COUNT	= 0x140, /* port selector count register */
-	CMD_APP_MEM_CTL		= 0x144, /* Application Memory Control */
-	CMD_XOR_MEM_CTL		= 0x148, /* XOR Block Memory Control */
-	CMD_DMA_MEM_CTL		= 0x14c, /* DMA Block Memory Control */
+	CMD_APP_MEM_CTL	= 0x144, /* Application Memory Control */
+	CMD_XOR_MEM_CTL	= 0x148, /* XOR Block Memory Control */
+	CMD_DMA_MEM_CTL	= 0x14c, /* DMA Block Memory Control */
 	CMD_PORT_MEM_CTL0	= 0x150, /* Port Memory Control 0 */
 	CMD_PORT_MEM_CTL1	= 0x154, /* Port Memory Control 1 */
 	CMD_SATA_PORT_MEM_CTL0	= 0x158, /* SATA Port Memory Control 0 */
@@ -353,27 +383,25 @@ enum sas_cmd_port_registers {
 	CMD_PND_FIFO_CTL1	= 0x1C4, /* Pending FIFO Control 1 */
 };
 
-enum pci_cfg_register_bits {
-	PCTL_PWR_ON	= (0xFU << 24),
-	PCTL_OFF	= (0xFU << 12),
-	PRD_REQ_SIZE	= (0x4000),
-	PRD_REQ_MASK	= (0x00007000),
+enum mvs_info_flags {
+	MVF_MSI		= (1U << 0),	/* MSI is enabled */
+	MVF_PHY_PWR_FIX	= (1U << 1),	/* bug workaround */
+	MVF_FLAG_SOC		= (1U << 2),	/* SoC integrated controllers */
 };
 
-enum nvram_layout_offsets {
-	NVR_SIG		= 0x00,		/* 0xAA, 0x55 */
-	NVR_SAS_ADDR	= 0x02,		/* 8-byte SAS address */
+enum mvs_event_flags {
+	PHY_PLUG_EVENT	= (3U),
+	PHY_PLUG_IN		= (1U << 0),	/* phy plug in */
+	PHY_PLUG_OUT		= (1U << 1),	/* phy plug out */
 };
 
-enum chip_flavors {
-	chip_6320,
-	chip_6440,
-	chip_6480,
-};
-
-enum port_type {
-	PORT_TYPE_SAS	=  (1L << 1),
-	PORT_TYPE_SATA	=  (1L << 0),
+enum mvs_port_type {
+	PORT_TGT_MASK	=  (1U << 5),
+	PORT_INIT_PORT	=  (1U << 4),
+	PORT_TGT_PORT	=  (1U << 3),
+	PORT_INIT_TGT_PORT = (PORT_INIT_PORT | PORT_TGT_PORT),
+	PORT_TYPE_SAS	=  (1U << 1),
+	PORT_TYPE_SATA	=  (1U << 0),
 };
 
 /* Command Table Format */
@@ -438,4 +466,37 @@ enum error_info_rec_2 {
 	USR_BLK_NM	= (1U << 0),	/* User Block Number */
 };
 
+enum pci_cfg_register_bits {
+	PCTL_PWR_OFF	= (0xFU << 24),
+	PCTL_COM_ON	= (0xFU << 20),
+	PCTL_LINK_RST	= (0xFU << 16),
+	PCTL_LINK_OFFS	= (16),
+	PCTL_PHY_DSBL	= (0xFU << 12),
+	PCTL_PHY_DSBL_OFFS	= (12),
+	PRD_REQ_SIZE	= (0x4000),
+	PRD_REQ_MASK	= (0x00007000),
+	PLS_NEG_LINK_WD		= (0x3FU << 4),
+	PLS_NEG_LINK_WD_OFFS	= 4,
+	PLS_LINK_SPD		= (0x0FU << 0),
+	PLS_LINK_SPD_OFFS	= 0,
+};
+
+enum open_frame_protocol {
+	PROTOCOL_SMP	= 0x0,
+	PROTOCOL_SSP	= 0x1,
+	PROTOCOL_STP	= 0x2,
+};
+
+/* define for response frame datapres field */
+enum datapres_field {
+	NO_DATA		= 0,
+	RESPONSE_DATA	= 1,
+	SENSE_DATA	= 2,
+};
+
+/* define task management IU */
+struct mvs_tmf_task{
+	u8 tmf;
+	u16 tag_of_task_to_be_managed;
+};
 #endif
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 258a1a9..8646a19 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -1,38 +1,41 @@
 /*
-	mv_init.c - Marvell 88SE6440 SAS/SATA init support
+ * Marvell 88SE64xx/88SE94xx pci init
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
 
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
- */
 
 #include "mv_sas.h"
-#include "mv_64xx.h"
-#include "mv_chips.h"
 
 static struct scsi_transport_template *mvs_stt;
-
 static const struct mvs_chip_info mvs_chips[] = {
-	[chip_6320] =		{ 2, 16, 9  },
-	[chip_6440] =		{ 4, 16, 9  },
-	[chip_6480] =		{ 8, 32, 10 },
+	[chip_6320] =	{ 1, 2, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
+	[chip_6440] =	{ 1, 4, 0x400, 17, 16,  9, &mvs_64xx_dispatch, },
+	[chip_6485] =	{ 1, 8, 0x800, 33, 32, 10, &mvs_64xx_dispatch, },
+	[chip_9180] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
+	[chip_9480] =	{ 2, 4, 0x800, 17, 64,  9, &mvs_94xx_dispatch, },
 };
 
+#define SOC_SAS_NUM 2
+
 static struct scsi_host_template mvs_sht = {
 	.module			= THIS_MODULE,
 	.name			= DRV_NAME,
@@ -53,17 +56,29 @@ static struct scsi_host_template mvs_sht = {
 	.use_clustering		= ENABLE_CLUSTERING,
 	.eh_device_reset_handler	= sas_eh_device_reset_handler,
 	.eh_bus_reset_handler	= sas_eh_bus_reset_handler,
-	.slave_alloc		= sas_slave_alloc,
+	.slave_alloc		= mvs_slave_alloc,
 	.target_destroy		= sas_target_destroy,
 	.ioctl			= sas_ioctl,
 };
 
 static struct sas_domain_function_template mvs_transport_ops = {
-	.lldd_execute_task	= mvs_task_exec,
+	.lldd_dev_found 	= mvs_dev_found,
+	.lldd_dev_gone	= mvs_dev_gone,
+
+	.lldd_execute_task	= mvs_queue_command,
 	.lldd_control_phy	= mvs_phy_control,
-	.lldd_abort_task	= mvs_task_abort,
-	.lldd_port_formed	= mvs_port_formed,
+
+	.lldd_abort_task	= mvs_abort_task,
+	.lldd_abort_task_set    = mvs_abort_task_set,
+	.lldd_clear_aca         = mvs_clear_aca,
+       .lldd_clear_task_set    = mvs_clear_task_set,
 	.lldd_I_T_nexus_reset	= mvs_I_T_nexus_reset,
+	.lldd_lu_reset 		= mvs_lu_reset,
+	.lldd_query_task	= mvs_query_task,
+
+	.lldd_port_formed	= mvs_port_formed,
+	.lldd_port_deformed     = mvs_port_deformed,
+
 };
 
 static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
@@ -71,6 +86,8 @@ static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
 	struct mvs_phy *phy = &mvi->phy[phy_id];
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
 
+	phy->mvi = mvi;
+	init_timer(&phy->timer);
 	sas_phy->enabled = (phy_id < mvi->chip->n_phy) ? 1 : 0;
 	sas_phy->class = SAS;
 	sas_phy->iproto = SAS_PROTOCOL_ALL;
@@ -83,248 +100,283 @@ static void __devinit mvs_phy_init(struct mvs_info *mvi, int phy_id)
 	sas_phy->id = phy_id;
 	sas_phy->sas_addr = &mvi->sas_addr[0];
 	sas_phy->frame_rcvd = &phy->frame_rcvd[0];
-	sas_phy->ha = &mvi->sas;
+	sas_phy->ha = (struct sas_ha_struct *)mvi->shost->hostdata;
 	sas_phy->lldd_phy = phy;
 }
 
 static void mvs_free(struct mvs_info *mvi)
 {
 	int i;
+	struct mvs_wq *mwq;
+	int slot_nr;
 
 	if (!mvi)
 		return;
 
-	for (i = 0; i < MVS_SLOTS; i++) {
-		struct mvs_slot_info *slot = &mvi->slot_info[i];
+	if (mvi->flags & MVF_FLAG_SOC)
+		slot_nr = MVS_SOC_SLOTS;
+	else
+		slot_nr = MVS_SLOTS;
 
+	for (i = 0; i < mvi->tags_num; i++) {
+		struct mvs_slot_info *slot = &mvi->slot_info[i];
 		if (slot->buf)
-			dma_free_coherent(&mvi->pdev->dev, MVS_SLOT_BUF_SZ,
+			dma_free_coherent(mvi->dev, MVS_SLOT_BUF_SZ,
 					  slot->buf, slot->buf_dma);
 	}
 
 	if (mvi->tx)
-		dma_free_coherent(&mvi->pdev->dev,
+		dma_free_coherent(mvi->dev,
 				  sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
 				  mvi->tx, mvi->tx_dma);
 	if (mvi->rx_fis)
-		dma_free_coherent(&mvi->pdev->dev, MVS_RX_FISL_SZ,
+		dma_free_coherent(mvi->dev, MVS_RX_FISL_SZ,
 				  mvi->rx_fis, mvi->rx_fis_dma);
 	if (mvi->rx)
-		dma_free_coherent(&mvi->pdev->dev,
+		dma_free_coherent(mvi->dev,
 				  sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
 				  mvi->rx, mvi->rx_dma);
 	if (mvi->slot)
-		dma_free_coherent(&mvi->pdev->dev,
-				  sizeof(*mvi->slot) * MVS_SLOTS,
+		dma_free_coherent(mvi->dev,
+				  sizeof(*mvi->slot) * slot_nr,
 				  mvi->slot, mvi->slot_dma);
-#ifdef MVS_ENABLE_PERI
-	if (mvi->peri_regs)
-		iounmap(mvi->peri_regs);
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	if (mvi->bulk_buffer)
+		dma_free_coherent(mvi->dev, TRASH_BUCKET_SIZE,
+				  mvi->bulk_buffer, mvi->bulk_buffer_dma);
 #endif
-	if (mvi->regs)
-		iounmap(mvi->regs);
+
+	MVS_CHIP_DISP->chip_iounmap(mvi);
 	if (mvi->shost)
 		scsi_host_put(mvi->shost);
-	kfree(mvi->sas.sas_port);
-	kfree(mvi->sas.sas_phy);
+	list_for_each_entry(mwq, &mvi->wq_list, entry)
+		cancel_delayed_work(&mwq->work_q);
 	kfree(mvi);
 }
 
 #ifdef MVS_USE_TASKLET
-static void mvs_tasklet(unsigned long data)
+struct tasklet_struct	mv_tasklet;
+static void mvs_tasklet(unsigned long opaque)
 {
-	struct mvs_info *mvi = (struct mvs_info *) data;
 	unsigned long flags;
+	u32 stat;
+	u16 core_nr, i = 0;
 
-	spin_lock_irqsave(&mvi->lock, flags);
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = (struct sas_ha_struct *)opaque;
+
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[0];
+
+	if (unlikely(!mvi))
+		BUG_ON(1);
+
+	for (i = 0; i < core_nr; i++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[i];
+		stat = MVS_CHIP_DISP->isr_status(mvi, mvi->irq);
+		if (stat)
+			MVS_CHIP_DISP->isr(mvi, mvi->irq, stat);
+	}
 
-#ifdef MVS_DISABLE_MSI
-	mvs_int_full(mvi);
-#else
-	mvs_int_rx(mvi, true);
-#endif
-	spin_unlock_irqrestore(&mvi->lock, flags);
 }
 #endif
 
 static irqreturn_t mvs_interrupt(int irq, void *opaque)
 {
-	struct mvs_info *mvi = opaque;
-	void __iomem *regs = mvi->regs;
+	u32 core_nr, i = 0;
 	u32 stat;
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = opaque;
 
-	stat = mr32(GBL_INT_STAT);
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[0];
 
-	if (stat == 0 || stat == 0xffffffff)
+	if (unlikely(!mvi))
 		return IRQ_NONE;
 
-	/* clear CMD_CMPLT ASAP */
-	mw32_f(INT_STAT, CINT_DONE);
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
-
-	mvs_int_full(mvi);
+	stat = MVS_CHIP_DISP->isr_status(mvi, irq);
+	if (!stat)
+		return IRQ_NONE;
 
-	spin_unlock(&mvi->lock);
+#ifdef MVS_USE_TASKLET
+	tasklet_schedule(&mv_tasklet);
 #else
-	tasklet_schedule(&mvi->tasklet);
+	for (i = 0; i < core_nr; i++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[i];
+		MVS_CHIP_DISP->isr(mvi, irq, stat);
+	}
 #endif
 	return IRQ_HANDLED;
 }
 
-static struct mvs_info *__devinit mvs_alloc(struct pci_dev *pdev,
-					    const struct pci_device_id *ent)
+static int __devinit mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost)
 {
-	struct mvs_info *mvi;
-	unsigned long res_start, res_len, res_flag;
-	struct asd_sas_phy **arr_phy;
-	struct asd_sas_port **arr_port;
-	const struct mvs_chip_info *chip = &mvs_chips[ent->driver_data];
-	int i;
+	int i, slot_nr;
 
-	/*
-	 * alloc and init our per-HBA mvs_info struct
-	 */
-
-	mvi = kzalloc(sizeof(*mvi), GFP_KERNEL);
-	if (!mvi)
-		return NULL;
+	if (mvi->flags & MVF_FLAG_SOC)
+		slot_nr = MVS_SOC_SLOTS;
+	else
+		slot_nr = MVS_SLOTS;
 
 	spin_lock_init(&mvi->lock);
-#ifdef MVS_USE_TASKLET
-	tasklet_init(&mvi->tasklet, mvs_tasklet, (unsigned long)mvi);
-#endif
-	mvi->pdev = pdev;
-	mvi->chip = chip;
-
-	if (pdev->device == 0x6440 && pdev->revision == 0)
-		mvi->flags |= MVF_PHY_PWR_FIX;
-
-	/*
-	 * alloc and init SCSI, SAS glue
-	 */
-
-	mvi->shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
-	if (!mvi->shost)
-		goto err_out;
-
-	arr_phy = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	arr_port = kcalloc(MVS_MAX_PHYS, sizeof(void *), GFP_KERNEL);
-	if (!arr_phy || !arr_port)
-		goto err_out;
-
-	for (i = 0; i < MVS_MAX_PHYS; i++) {
+	for (i = 0; i < mvi->chip->n_phy; i++) {
 		mvs_phy_init(mvi, i);
-		arr_phy[i] = &mvi->phy[i].sas_phy;
-		arr_port[i] = &mvi->port[i].sas_port;
-		mvi->port[i].taskfileset = MVS_ID_NOT_MAPPED;
 		mvi->port[i].wide_port_phymap = 0;
 		mvi->port[i].port_attached = 0;
 		INIT_LIST_HEAD(&mvi->port[i].list);
 	}
-
-	SHOST_TO_SAS_HA(mvi->shost) = &mvi->sas;
-	mvi->shost->transportt = mvs_stt;
-	mvi->shost->max_id = 21;
-	mvi->shost->max_lun = ~0;
-	mvi->shost->max_channel = 0;
-	mvi->shost->max_cmd_len = 16;
-
-	mvi->sas.sas_ha_name = DRV_NAME;
-	mvi->sas.dev = &pdev->dev;
-	mvi->sas.lldd_module = THIS_MODULE;
-	mvi->sas.sas_addr = &mvi->sas_addr[0];
-	mvi->sas.sas_phy = arr_phy;
-	mvi->sas.sas_port = arr_port;
-	mvi->sas.num_phys = chip->n_phy;
-	mvi->sas.lldd_max_execute_num = 1;
-	mvi->sas.lldd_queue_size = MVS_QUEUE_SIZE;
-	mvi->shost->can_queue = MVS_CAN_QUEUE;
-	mvi->shost->cmd_per_lun = MVS_SLOTS / mvi->sas.num_phys;
-	mvi->sas.lldd_ha = mvi;
-	mvi->sas.core.shost = mvi->shost;
-
-	mvs_tag_init(mvi);
-
-	/*
-	 * ioremap main and peripheral registers
-	 */
-
-#ifdef MVS_ENABLE_PERI
-	res_start = pci_resource_start(pdev, 2);
-	res_len = pci_resource_len(pdev, 2);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	mvi->peri_regs = ioremap_nocache(res_start, res_len);
-	if (!mvi->peri_regs)
-		goto err_out;
-#endif
-
-	res_start = pci_resource_start(pdev, 4);
-	res_len = pci_resource_len(pdev, 4);
-	if (!res_start || !res_len)
-		goto err_out;
-
-	res_flag = pci_resource_flags(pdev, 4);
-	if (res_flag & IORESOURCE_CACHEABLE)
-		mvi->regs = ioremap(res_start, res_len);
-	else
-		mvi->regs = ioremap_nocache(res_start, res_len);
-
-	if (!mvi->regs)
-		goto err_out;
+	for (i = 0; i < MVS_MAX_DEVICES; i++) {
+		mvi->devices[i].taskfileset = MVS_ID_NOT_MAPPED;
+		mvi->devices[i].dev_type = NO_DEVICE;
+		mvi->devices[i].device_id = i;
+		mvi->devices[i].dev_status = MVS_DEV_NORMAL;
+	}
 
 	/*
 	 * alloc and init our DMA areas
 	 */
-
-	mvi->tx = dma_alloc_coherent(&pdev->dev,
+	mvi->tx = dma_alloc_coherent(mvi->dev,
 				     sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ,
 				     &mvi->tx_dma, GFP_KERNEL);
 	if (!mvi->tx)
 		goto err_out;
 	memset(mvi->tx, 0, sizeof(*mvi->tx) * MVS_CHIP_SLOT_SZ);
-
-	mvi->rx_fis = dma_alloc_coherent(&pdev->dev, MVS_RX_FISL_SZ,
+	mvi->rx_fis = dma_alloc_coherent(mvi->dev, MVS_RX_FISL_SZ,
 					 &mvi->rx_fis_dma, GFP_KERNEL);
 	if (!mvi->rx_fis)
 		goto err_out;
 	memset(mvi->rx_fis, 0, MVS_RX_FISL_SZ);
 
-	mvi->rx = dma_alloc_coherent(&pdev->dev,
+	mvi->rx = dma_alloc_coherent(mvi->dev,
 				     sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1),
 				     &mvi->rx_dma, GFP_KERNEL);
 	if (!mvi->rx)
 		goto err_out;
 	memset(mvi->rx, 0, sizeof(*mvi->rx) * (MVS_RX_RING_SZ + 1));
-
 	mvi->rx[0] = cpu_to_le32(0xfff);
 	mvi->rx_cons = 0xfff;
 
-	mvi->slot = dma_alloc_coherent(&pdev->dev,
-				       sizeof(*mvi->slot) * MVS_SLOTS,
+	mvi->slot = dma_alloc_coherent(mvi->dev,
+				       sizeof(*mvi->slot) * slot_nr,
 				       &mvi->slot_dma, GFP_KERNEL);
 	if (!mvi->slot)
 		goto err_out;
-	memset(mvi->slot, 0, sizeof(*mvi->slot) * MVS_SLOTS);
+	memset(mvi->slot, 0, sizeof(*mvi->slot) * slot_nr);
 
-	for (i = 0; i < MVS_SLOTS; i++) {
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	mvi->bulk_buffer = dma_alloc_coherent(mvi->dev,
+				       TRASH_BUCKET_SIZE,
+				       &mvi->bulk_buffer_dma, GFP_KERNEL);
+	if (!mvi->bulk_buffer)
+		goto err_out;
+#endif
+	for (i = 0; i < slot_nr; i++) {
 		struct mvs_slot_info *slot = &mvi->slot_info[i];
 
-		slot->buf = dma_alloc_coherent(&pdev->dev, MVS_SLOT_BUF_SZ,
+		slot->buf = dma_alloc_coherent(mvi->dev, MVS_SLOT_BUF_SZ,
 					       &slot->buf_dma, GFP_KERNEL);
-		if (!slot->buf)
+		if (!slot->buf) {
+			printk(KERN_DEBUG"failed to allocate slot->buf.\n");
 			goto err_out;
+		}
 		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+		++mvi->tags_num;
 	}
+	/* Initialize tags */
+	mvs_tag_init(mvi);
+	return 0;
+err_out:
+	return 1;
+}
+
 
-	/* finally, read NVRAM to get our SAS address */
-	if (mvs_nvram_read(mvi, NVR_SAS_ADDR, &mvi->sas_addr, 8))
+int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
+{
+	unsigned long res_start, res_len, res_flag, res_flag_ex = 0;
+	struct pci_dev *pdev = mvi->pdev;
+	if (bar_ex != -1) {
+		/*
+		 * ioremap main and peripheral registers
+		 */
+		res_start = pci_resource_start(pdev, bar_ex);
+		res_len = pci_resource_len(pdev, bar_ex);
+		if (!res_start || !res_len)
+			goto err_out;
+
+		res_flag_ex = pci_resource_flags(pdev, bar_ex);
+		if (res_flag_ex & IORESOURCE_MEM) {
+			if (res_flag_ex & IORESOURCE_CACHEABLE)
+				mvi->regs_ex = ioremap(res_start, res_len);
+			else
+				mvi->regs_ex = ioremap_nocache(res_start,
+						res_len);
+		} else
+			mvi->regs_ex = (void *)res_start;
+		if (!mvi->regs_ex)
+			goto err_out;
+	}
+
+	res_start = pci_resource_start(pdev, bar);
+	res_len = pci_resource_len(pdev, bar);
+	if (!res_start || !res_len)
+		goto err_out;
+
+	res_flag = pci_resource_flags(pdev, bar);
+	if (res_flag & IORESOURCE_CACHEABLE)
+		mvi->regs = ioremap(res_start, res_len);
+	else
+		mvi->regs = ioremap_nocache(res_start, res_len);
+
+	if (!mvi->regs) {
+		if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM))
+			iounmap(mvi->regs_ex);
+		mvi->regs_ex = NULL;
 		goto err_out;
-	return mvi;
+	}
+
+	return 0;
+err_out:
+	return -1;
+}
+
+void mvs_iounmap(void __iomem *regs)
+{
+	iounmap(regs);
+}
+
+static struct mvs_info *__devinit mvs_pci_alloc(struct pci_dev *pdev,
+				const struct pci_device_id *ent,
+				struct Scsi_Host *shost, unsigned int id)
+{
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	mvi = kzalloc(sizeof(*mvi) + MVS_SLOTS * sizeof(struct mvs_slot_info),
+			GFP_KERNEL);
+	if (!mvi)
+		return NULL;
 
+	mvi->pdev = pdev;
+	mvi->dev = &pdev->dev;
+	mvi->chip_id = ent->driver_data;
+	mvi->chip = &mvs_chips[mvi->chip_id];
+	INIT_LIST_HEAD(&mvi->wq_list);
+	mvi->irq = pdev->irq;
+
+	((struct mvs_prv_info *)sha->lldd_ha)->mvi[id] = mvi;
+	((struct mvs_prv_info *)sha->lldd_ha)->n_phy = mvi->chip->n_phy;
+
+	mvi->id = id;
+	mvi->sas = sha;
+	mvi->shost = shost;
+#ifdef MVS_USE_TASKLET
+	tasklet_init(&mv_tasklet, mvs_tasklet, (unsigned long)sha);
+#endif
+
+	if (MVS_CHIP_DISP->chip_ioremap(mvi))
+		goto err_out;
+	if (!mvs_alloc(mvi, shost))
+		return mvi;
 err_out:
 	mvs_free(mvi);
 	return NULL;
@@ -363,16 +415,111 @@ static int pci_go_64(struct pci_dev *pdev)
 	return rc;
 }
 
+static int __devinit mvs_prep_sas_ha_init(struct Scsi_Host *shost,
+				const struct mvs_chip_info *chip_info)
+{
+	int phy_nr, port_nr; unsigned short core_nr;
+	struct asd_sas_phy **arr_phy;
+	struct asd_sas_port **arr_port;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	core_nr = chip_info->n_host;
+	phy_nr  = core_nr * chip_info->n_phy;
+	port_nr = phy_nr;
+
+	memset(sha, 0x00, sizeof(struct sas_ha_struct));
+	arr_phy  = kcalloc(phy_nr, sizeof(void *), GFP_KERNEL);
+	arr_port = kcalloc(port_nr, sizeof(void *), GFP_KERNEL);
+	if (!arr_phy || !arr_port)
+		goto exit_free;
+
+	sha->sas_phy = arr_phy;
+	sha->sas_port = arr_port;
+
+	sha->lldd_ha = kzalloc(sizeof(struct mvs_prv_info), GFP_KERNEL);
+	if (!sha->lldd_ha)
+		goto exit_free;
+
+	((struct mvs_prv_info *)sha->lldd_ha)->n_host = core_nr;
+
+	shost->transportt = mvs_stt;
+	shost->max_id = 128;
+	shost->max_lun = ~0;
+	shost->max_channel = 1;
+	shost->max_cmd_len = 16;
+
+	return 0;
+exit_free:
+	kfree(arr_phy);
+	kfree(arr_port);
+	return -1;
+
+}
+
+static void  __devinit mvs_post_sas_ha_init(struct Scsi_Host *shost,
+			const struct mvs_chip_info *chip_info)
+{
+	int can_queue, i = 0, j = 0;
+	struct mvs_info *mvi = NULL;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	unsigned short nr_core = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+
+	for (j = 0; j < nr_core; j++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[j];
+		for (i = 0; i < chip_info->n_phy; i++) {
+			sha->sas_phy[j * chip_info->n_phy  + i] =
+				&mvi->phy[i].sas_phy;
+			sha->sas_port[j * chip_info->n_phy + i] =
+				&mvi->port[i].sas_port;
+		}
+	}
+
+	sha->sas_ha_name = DRV_NAME;
+	sha->dev = mvi->dev;
+	sha->lldd_module = THIS_MODULE;
+	sha->sas_addr = &mvi->sas_addr[0];
+
+	sha->num_phys = nr_core * chip_info->n_phy;
+
+	sha->lldd_max_execute_num = 1;
+
+	if (mvi->flags & MVF_FLAG_SOC)
+		can_queue = MVS_SOC_CAN_QUEUE;
+	else
+		can_queue = MVS_CAN_QUEUE;
+
+	sha->lldd_queue_size = can_queue;
+	shost->can_queue = can_queue;
+	mvi->shost->cmd_per_lun = MVS_SLOTS/sha->num_phys;
+	sha->core.shost = mvi->shost;
+}
+
+static void mvs_init_sas_add(struct mvs_info *mvi)
+{
+	u8 i;
+	for (i = 0; i < mvi->chip->n_phy; i++) {
+		mvi->phy[i].dev_sas_addr = 0x5005043011ab0000ULL;
+		mvi->phy[i].dev_sas_addr =
+			cpu_to_be64((u64)(*(u64 *)&mvi->phy[i].dev_sas_addr));
+	}
+
+	memcpy(mvi->sas_addr, &mvi->phy[0].dev_sas_addr, SAS_ADDR_SIZE);
+}
+
 static int __devinit mvs_pci_init(struct pci_dev *pdev,
 				  const struct pci_device_id *ent)
 {
-	int rc;
+	unsigned int rc, nhost = 0;
 	struct mvs_info *mvi;
 	irq_handler_t irq_handler = mvs_interrupt;
+	struct Scsi_Host *shost = NULL;
+	const struct mvs_chip_info *chip;
 
+	dev_printk(KERN_INFO, &pdev->dev,
+		"mvsas: driver version %s\n", DRV_VERSION);
 	rc = pci_enable_device(pdev);
 	if (rc)
-		return rc;
+		goto err_out_enable;
 
 	pci_set_master(pdev);
 
@@ -384,84 +531,110 @@ static int __devinit mvs_pci_init(struct pci_dev *pdev,
 	if (rc)
 		goto err_out_regions;
 
-	mvi = mvs_alloc(pdev, ent);
-	if (!mvi) {
+	shost = scsi_host_alloc(&mvs_sht, sizeof(void *));
+	if (!shost) {
 		rc = -ENOMEM;
 		goto err_out_regions;
 	}
 
-	rc = mvs_hw_init(mvi);
-	if (rc)
-		goto err_out_mvi;
-
-#ifndef MVS_DISABLE_MSI
-	if (!pci_enable_msi(pdev)) {
-		u32 tmp;
-		void __iomem *regs = mvi->regs;
-		mvi->flags |= MVF_MSI;
-		irq_handler = mvs_msi_interrupt;
-		tmp = mr32(PCS);
-		mw32(PCS, tmp | PCS_SELF_CLEAR);
+	chip = &mvs_chips[ent->driver_data];
+	SHOST_TO_SAS_HA(shost) =
+		kcalloc(1, sizeof(struct sas_ha_struct), GFP_KERNEL);
+	if (!SHOST_TO_SAS_HA(shost)) {
+		kfree(shost);
+		rc = -ENOMEM;
+		goto err_out_regions;
 	}
-#endif
 
-	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED, DRV_NAME, mvi);
-	if (rc)
-		goto err_out_msi;
+	rc = mvs_prep_sas_ha_init(shost, chip);
+	if (rc) {
+		kfree(shost);
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
 
-	rc = scsi_add_host(mvi->shost, &pdev->dev);
-	if (rc)
-		goto err_out_irq;
+	pci_set_drvdata(pdev, SHOST_TO_SAS_HA(shost));
 
-	rc = sas_register_ha(&mvi->sas);
+	do {
+		mvi = mvs_pci_alloc(pdev, ent, shost, nhost);
+		if (!mvi) {
+			rc = -ENOMEM;
+			goto err_out_regions;
+		}
+
+		mvs_init_sas_add(mvi);
+
+		mvi->instance = nhost;
+		rc = MVS_CHIP_DISP->chip_init(mvi);
+		if (rc) {
+			mvs_free(mvi);
+			goto err_out_regions;
+		}
+		nhost++;
+	} while (nhost < chip->n_host);
+
+	mvs_post_sas_ha_init(shost, chip);
+
+	rc = scsi_add_host(shost, &pdev->dev);
 	if (rc)
 		goto err_out_shost;
 
-	pci_set_drvdata(pdev, mvi);
-
-	mvs_print_info(mvi);
+	rc = sas_register_ha(SHOST_TO_SAS_HA(shost));
+	if (rc)
+		goto err_out_shost;
+	rc = request_irq(pdev->irq, irq_handler, IRQF_SHARED,
+		DRV_NAME, SHOST_TO_SAS_HA(shost));
+	if (rc)
+		goto err_not_sas;
 
-	mvs_hba_interrupt_enable(mvi);
+	MVS_CHIP_DISP->interrupt_enable(mvi);
 
 	scsi_scan_host(mvi->shost);
 
 	return 0;
 
+err_not_sas:
+	sas_unregister_ha(SHOST_TO_SAS_HA(shost));
 err_out_shost:
 	scsi_remove_host(mvi->shost);
-err_out_irq:
-	free_irq(pdev->irq, mvi);
-err_out_msi:
-	if (mvi->flags |= MVF_MSI)
-		pci_disable_msi(pdev);
-err_out_mvi:
-	mvs_free(mvi);
 err_out_regions:
 	pci_release_regions(pdev);
 err_out_disable:
 	pci_disable_device(pdev);
+err_out_enable:
 	return rc;
 }
 
 static void __devexit mvs_pci_remove(struct pci_dev *pdev)
 {
-	struct mvs_info *mvi = pci_get_drvdata(pdev);
+	unsigned short core_nr, i = 0;
+	struct sas_ha_struct *sha = pci_get_drvdata(pdev);
+	struct mvs_info *mvi = NULL;
 
-	pci_set_drvdata(pdev, NULL);
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[0];
 
-	if (mvi) {
-		sas_unregister_ha(&mvi->sas);
-		mvs_hba_interrupt_disable(mvi);
-		sas_remove_host(mvi->shost);
-		scsi_remove_host(mvi->shost);
+#ifdef MVS_USE_TASKLET
+	tasklet_kill(&mv_tasklet);
+#endif
 
-		free_irq(pdev->irq, mvi);
-		if (mvi->flags & MVF_MSI)
-			pci_disable_msi(pdev);
+	pci_set_drvdata(pdev, NULL);
+	sas_unregister_ha(sha);
+	sas_remove_host(mvi->shost);
+	scsi_remove_host(mvi->shost);
+
+	MVS_CHIP_DISP->interrupt_disable(mvi);
+	free_irq(mvi->irq, sha);
+	for (i = 0; i < core_nr; i++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[i];
 		mvs_free(mvi);
-		pci_release_regions(pdev);
 	}
+	kfree(sha->sas_phy);
+	kfree(sha->sas_port);
+	kfree(sha);
+	pci_release_regions(pdev);
 	pci_disable_device(pdev);
+	return;
 }
 
 static struct pci_device_id __devinitdata mvs_pci_table[] = {
@@ -474,10 +647,12 @@ static struct pci_device_id __devinitdata mvs_pci_table[] = {
 		.subdevice	= 0x6480,
 		.class		= 0,
 		.class_mask	= 0,
-		.driver_data	= chip_6480,
+		.driver_data	= chip_6485,
 	},
 	{ PCI_VDEVICE(MARVELL, 0x6440), chip_6440 },
-	{ PCI_VDEVICE(MARVELL, 0x6480), chip_6480 },
+	{ PCI_VDEVICE(MARVELL, 0x6485), chip_6485 },
+	{ PCI_VDEVICE(MARVELL, 0x9480), chip_9480 },
+	{ PCI_VDEVICE(MARVELL, 0x9180), chip_9180 },
 
 	{ }	/* terminate list */
 };
@@ -489,15 +664,17 @@ static struct pci_driver mvs_pci_driver = {
 	.remove		= __devexit_p(mvs_pci_remove),
 };
 
+/* task handler */
+struct task_struct *mvs_th;
 static int __init mvs_init(void)
 {
 	int rc;
-
 	mvs_stt = sas_domain_attach_transport(&mvs_transport_ops);
 	if (!mvs_stt)
 		return -ENOMEM;
 
 	rc = pci_register_driver(&mvs_pci_driver);
+
 	if (rc)
 		goto err_out;
 
@@ -521,4 +698,6 @@ MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>");
 MODULE_DESCRIPTION("Marvell 88SE6440 SAS/SATA controller driver");
 MODULE_VERSION(DRV_VERSION);
 MODULE_LICENSE("GPL");
+#ifdef CONFIG_PCI
 MODULE_DEVICE_TABLE(pci, mvs_pci_table);
+#endif
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 6a583c1..d79ac17 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1,97 +1,65 @@
 /*
-	mv_sas.c - Marvell 88SE6440 SAS/SATA support
-
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
-	---------------------------------------------------------------
-
-	Random notes:
-	* hardware supports controlling the endian-ness of data
-	  structures.  this permits elimination of all the le32_to_cpu()
-	  and cpu_to_le32() conversions.
-
- */
+ * Marvell 88SE64xx/88SE94xx main function
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
 
 #include "mv_sas.h"
-#include "mv_64xx.h"
-#include "mv_chips.h"
-
-/* offset for D2H FIS in the Received FIS List Structure */
-#define SATA_RECEIVED_D2H_FIS(reg_set)	\
-	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x40)
-#define SATA_RECEIVED_PIO_FIS(reg_set)	\
-	((void *) mvi->rx_fis + 0x400 + 0x100 * reg_set + 0x20)
-#define UNASSOC_D2H_FIS(id)		\
-	((void *) mvi->rx_fis + 0x100 * id)
-
-struct mvs_task_exec_info {
-	struct sas_task *task;
-	struct mvs_cmd_hdr *hdr;
-	struct mvs_port *port;
-	u32 tag;
-	int n_elem;
-};
-
-static void mvs_release_task(struct mvs_info *mvi, int phy_no);
-static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i);
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
-					int get_st);
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
-static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
-				u32 slot_idx);
 
 static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
 {
 	if (task->lldd_task) {
 		struct mvs_slot_info *slot;
 		slot = (struct mvs_slot_info *) task->lldd_task;
-		*tag = slot - mvi->slot_info;
+		*tag = slot->slot_tag;
 		return 1;
 	}
 	return 0;
 }
 
-static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
+void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
 {
 	void *bitmap = (void *) &mvi->tags;
 	clear_bit(tag, bitmap);
 }
 
-static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
+void mvs_tag_free(struct mvs_info *mvi, u32 tag)
 {
 	mvs_tag_clear(mvi, tag);
 }
 
-static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
+void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
 {
 	void *bitmap = (void *) &mvi->tags;
 	set_bit(tag, bitmap);
 }
 
-static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
+inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
 {
 	unsigned int index, tag;
 	void *bitmap = (void *) &mvi->tags;
 
-	index = find_first_zero_bit(bitmap, MVS_SLOTS);
+	index = find_first_zero_bit(bitmap, mvi->tags_num);
 	tag = index;
-	if (tag >= MVS_SLOTS)
+	if (tag >= mvi->tags_num)
 		return -SAS_QUEUE_FULL;
 	mvs_tag_set(mvi, tag);
 	*tag_out = tag;
@@ -101,11 +69,11 @@ static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
 void mvs_tag_init(struct mvs_info *mvi)
 {
 	int i;
-	for (i = 0; i < MVS_SLOTS; ++i)
+	for (i = 0; i < mvi->tags_num; ++i)
 		mvs_tag_clear(mvi, i);
 }
 
-static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
+void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
 {
 	u32 i;
 	u32 run;
@@ -113,7 +81,7 @@ static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
 
 	offset = 0;
 	while (size) {
-		printk("%08X : ", baseaddr + offset);
+		printk(KERN_DEBUG"%08X : ", baseaddr + offset);
 		if (size >= 16)
 			run = 16;
 		else
@@ -121,31 +89,31 @@ static void mvs_hexdump(u32 size, u8 *data, u32 baseaddr)
 		size -= run;
 		for (i = 0; i < 16; i++) {
 			if (i < run)
-				printk("%02X ", (u32)data[i]);
+				printk(KERN_DEBUG"%02X ", (u32)data[i]);
 			else
-				printk("   ");
+				printk(KERN_DEBUG"   ");
 		}
-		printk(": ");
+		printk(KERN_DEBUG": ");
 		for (i = 0; i < run; i++)
-			printk("%c", isalnum(data[i]) ? data[i] : '.');
-		printk("\n");
+			printk(KERN_DEBUG"%c",
+				isalnum(data[i]) ? data[i] : '.');
+		printk(KERN_DEBUG"\n");
 		data = &data[16];
 		offset += run;
 	}
-	printk("\n");
+	printk(KERN_DEBUG"\n");
 }
 
-#if _MV_DUMP
+#if (_MV_DUMP > 1)
 static void mvs_hba_sb_dump(struct mvs_info *mvi, u32 tag,
 				   enum sas_protocol proto)
 {
 	u32 offset;
-	struct pci_dev *pdev = mvi->pdev;
 	struct mvs_slot_info *slot = &mvi->slot_info[tag];
 
 	offset = slot->cmd_size + MVS_OAF_SZ +
-	    sizeof(struct mvs_prd) * slot->n_elem;
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Status buffer[%d] :\n",
+	    MVS_CHIP_DISP->prd_size() * slot->n_elem;
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->Status buffer[%d] :\n",
 			tag);
 	mvs_hexdump(32, (u8 *) slot->response,
 		    (u32) slot->buf_dma + offset);
@@ -155,47 +123,45 @@ static void mvs_hba_sb_dump(struct mvs_info *mvi, u32 tag,
 static void mvs_hba_memory_dump(struct mvs_info *mvi, u32 tag,
 				enum sas_protocol proto)
 {
-#if _MV_DUMP
+#if (_MV_DUMP > 1)
 	u32 sz, w_ptr;
 	u64 addr;
-	void __iomem *regs = mvi->regs;
-	struct pci_dev *pdev = mvi->pdev;
 	struct mvs_slot_info *slot = &mvi->slot_info[tag];
 
 	/*Delivery Queue */
-	sz = mr32(TX_CFG) & TX_RING_SZ_MASK;
+	sz = MVS_CHIP_SLOT_SZ;
 	w_ptr = slot->tx;
-	addr = mr32(TX_HI) << 16 << 16 | mr32(TX_LO);
-	dev_printk(KERN_DEBUG, &pdev->dev,
+	addr = mvi->tx_dma;
+	dev_printk(KERN_DEBUG, mvi->dev,
 		"Delivery Queue Size=%04d , WRT_PTR=%04X\n", sz, w_ptr);
-	dev_printk(KERN_DEBUG, &pdev->dev,
+	dev_printk(KERN_DEBUG, mvi->dev,
 		"Delivery Queue Base Address=0x%llX (PA)"
 		"(tx_dma=0x%llX), Entry=%04d\n",
-		addr, mvi->tx_dma, w_ptr);
+		addr, (unsigned long long)mvi->tx_dma, w_ptr);
 	mvs_hexdump(sizeof(u32), (u8 *)(&mvi->tx[mvi->tx_prod]),
 			(u32) mvi->tx_dma + sizeof(u32) * w_ptr);
 	/*Command List */
 	addr = mvi->slot_dma;
-	dev_printk(KERN_DEBUG, &pdev->dev,
+	dev_printk(KERN_DEBUG, mvi->dev,
 		"Command List Base Address=0x%llX (PA)"
 		"(slot_dma=0x%llX), Header=%03d\n",
-		addr, slot->buf_dma, tag);
-	dev_printk(KERN_DEBUG, &pdev->dev, "Command Header[%03d]:\n", tag);
+		addr, (unsigned long long)slot->buf_dma, tag);
+	dev_printk(KERN_DEBUG, mvi->dev, "Command Header[%03d]:\n", tag);
 	/*mvs_cmd_hdr */
 	mvs_hexdump(sizeof(struct mvs_cmd_hdr), (u8 *)(&mvi->slot[tag]),
 		(u32) mvi->slot_dma + tag * sizeof(struct mvs_cmd_hdr));
 	/*1.command table area */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Command Table :\n");
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->Command Table :\n");
 	mvs_hexdump(slot->cmd_size, (u8 *) slot->buf, (u32) slot->buf_dma);
 	/*2.open address frame area */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->Open Address Frame :\n");
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->Open Address Frame :\n");
 	mvs_hexdump(MVS_OAF_SZ, (u8 *) slot->buf + slot->cmd_size,
 				(u32) slot->buf_dma + slot->cmd_size);
 	/*3.status buffer */
 	mvs_hba_sb_dump(mvi, tag, proto);
 	/*4.PRD table */
-	dev_printk(KERN_DEBUG, &pdev->dev, "+---->PRD table :\n");
-	mvs_hexdump(sizeof(struct mvs_prd) * slot->n_elem,
+	dev_printk(KERN_DEBUG, mvi->dev, "+---->PRD table :\n");
+	mvs_hexdump(MVS_CHIP_DISP->prd_size() * slot->n_elem,
 		(u8 *) slot->buf + slot->cmd_size + MVS_OAF_SZ,
 		(u32) slot->buf_dma + slot->cmd_size + MVS_OAF_SZ);
 #endif
@@ -206,15 +172,14 @@ static void mvs_hba_cq_dump(struct mvs_info *mvi)
 #if (_MV_DUMP > 2)
 	u64 addr;
 	void __iomem *regs = mvi->regs;
-	struct pci_dev *pdev = mvi->pdev;
 	u32 entry = mvi->rx_cons + 1;
 	u32 rx_desc = le32_to_cpu(mvi->rx[entry]);
 
 	/*Completion Queue */
 	addr = mr32(RX_HI) << 16 << 16 | mr32(RX_LO);
-	dev_printk(KERN_DEBUG, &pdev->dev, "Completion Task = 0x%p\n",
+	dev_printk(KERN_DEBUG, mvi->dev, "Completion Task = 0x%p\n",
 		   mvi->slot_info[rx_desc & RXQ_SLOT_MASK].task);
-	dev_printk(KERN_DEBUG, &pdev->dev,
+	dev_printk(KERN_DEBUG, mvi->dev,
 		"Completion List Base Address=0x%llX (PA), "
 		"CQ_Entry=%04d, CQ_WP=0x%08X\n",
 		addr, entry - 1, mvi->rx[0]);
@@ -223,62 +188,174 @@ static void mvs_hba_cq_dump(struct mvs_info *mvi)
 #endif
 }
 
-/* FIXME: locking? */
-int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, void *funcdata)
+void mvs_get_sas_addr(void *buf, u32 buflen)
 {
-	struct mvs_info *mvi = sas_phy->ha->lldd_ha;
-	int rc = 0, phy_id = sas_phy->id;
-	u32 tmp;
+	/*memcpy(buf, "\x50\x05\x04\x30\x11\xab\x64\x40", 8);*/
+}
 
-	tmp = mvs_read_phy_ctl(mvi, phy_id);
+struct mvs_info *mvs_find_dev_mvi(struct domain_device *dev)
+{
+	unsigned long i = 0, j = 0, hi = 0;
+	struct sas_ha_struct *sha = dev->port->ha;
+	struct mvs_info *mvi = NULL;
+	struct asd_sas_phy *phy;
+
+	while (sha->sas_port[i]) {
+		if (sha->sas_port[i] == dev->port) {
+			phy =  container_of(sha->sas_port[i]->phy_list.next,
+				struct asd_sas_phy, port_phy_el);
+			j = 0;
+			while (sha->sas_phy[j]) {
+				if (sha->sas_phy[j] == phy)
+					break;
+				j++;
+			}
+			break;
+		}
+		i++;
+	}
+	hi = j/((struct mvs_prv_info *)sha->lldd_ha)->n_phy;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[hi];
 
-	switch (func) {
-	case PHY_FUNC_SET_LINK_RATE:{
-			struct sas_phy_linkrates *rates = funcdata;
-			u32 lrmin = 0, lrmax = 0;
+	return mvi;
 
-			lrmin = (rates->minimum_linkrate << 8);
-			lrmax = (rates->maximum_linkrate << 12);
+}
 
-			if (lrmin) {
-				tmp &= ~(0xf << 8);
-				tmp |= lrmin;
-			}
-			if (lrmax) {
-				tmp &= ~(0xf << 12);
-				tmp |= lrmax;
+/* FIXME */
+int mvs_find_dev_phyno(struct domain_device *dev, int *phyno)
+{
+	unsigned long i = 0, j = 0, n = 0, num = 0;
+	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+	struct sas_ha_struct *sha = dev->port->ha;
+
+	while (sha->sas_port[i]) {
+		if (sha->sas_port[i] == dev->port) {
+			struct asd_sas_phy *phy;
+			list_for_each_entry(phy,
+				&sha->sas_port[i]->phy_list, port_phy_el) {
+				j = 0;
+				while (sha->sas_phy[j]) {
+					if (sha->sas_phy[j] == phy)
+						break;
+					j++;
+				}
+				phyno[n] = (j >= mvi->chip->n_phy) ?
+					(j - mvi->chip->n_phy) : j;
+				num++;
+				n++;
 			}
-			mvs_write_phy_ctl(mvi, phy_id, tmp);
 			break;
 		}
+		i++;
+	}
+	return num;
+}
+
+static inline void mvs_free_reg_set(struct mvs_info *mvi,
+				struct mvs_device *dev)
+{
+	if (!dev) {
+		mv_printk("device has been free.\n");
+		return;
+	}
+	if (dev->runing_req != 0)
+		return;
+	if (dev->taskfileset == MVS_ID_NOT_MAPPED)
+		return;
+	MVS_CHIP_DISP->free_reg_set(mvi, &dev->taskfileset);
+}
+
+static inline u8 mvs_assign_reg_set(struct mvs_info *mvi,
+				struct mvs_device *dev)
+{
+	if (dev->taskfileset != MVS_ID_NOT_MAPPED)
+		return 0;
+	return MVS_CHIP_DISP->assign_reg_set(mvi, &dev->taskfileset);
+}
+
+void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard)
+{
+	u32 no;
+	for_each_phy(phy_mask, phy_mask, no) {
+		if (!(phy_mask & 1))
+			continue;
+		MVS_CHIP_DISP->phy_reset(mvi, no, hard);
+	}
+}
+
+/* FIXME: locking? */
+int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
+			void *funcdata)
+{
+	int rc = 0, phy_id = sas_phy->id;
+	u32 tmp, i = 0, hi;
+	struct sas_ha_struct *sha = sas_phy->ha;
+	struct mvs_info *mvi = NULL;
+
+	while (sha->sas_phy[i]) {
+		if (sha->sas_phy[i] == sas_phy)
+			break;
+		i++;
+	}
+	hi = i/((struct mvs_prv_info *)sha->lldd_ha)->n_phy;
+	mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[hi];
+
+	switch (func) {
+	case PHY_FUNC_SET_LINK_RATE:
+		MVS_CHIP_DISP->phy_set_link_rate(mvi, phy_id, funcdata);
+		break;
 
 	case PHY_FUNC_HARD_RESET:
+		tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_id);
 		if (tmp & PHY_RST_HARD)
 			break;
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST_HARD);
+		MVS_CHIP_DISP->phy_reset(mvi, phy_id, 1);
 		break;
 
 	case PHY_FUNC_LINK_RESET:
-		mvs_write_phy_ctl(mvi, phy_id, tmp | PHY_RST);
+		MVS_CHIP_DISP->phy_enable(mvi, phy_id);
+		MVS_CHIP_DISP->phy_reset(mvi, phy_id, 0);
 		break;
 
 	case PHY_FUNC_DISABLE:
+		MVS_CHIP_DISP->phy_disable(mvi, phy_id);
+		break;
 	case PHY_FUNC_RELEASE_SPINUP_HOLD:
 	default:
 		rc = -EOPNOTSUPP;
 	}
-
+	msleep(200);
 	return rc;
 }
 
+void __devinit mvs_set_sas_addr(struct mvs_info *mvi, int port_id,
+				u32 off_lo, u32 off_hi, u64 sas_addr)
+{
+	u32 lo = (u32)sas_addr;
+	u32 hi = (u32)(sas_addr>>32);
+
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, port_id, off_lo);
+	MVS_CHIP_DISP->write_port_cfg_data(mvi, port_id, lo);
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, port_id, off_hi);
+	MVS_CHIP_DISP->write_port_cfg_data(mvi, port_id, hi);
+}
+
 static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
 {
 	struct mvs_phy *phy = &mvi->phy[i];
-	struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+	struct sas_ha_struct *sas_ha;
 	if (!phy->phy_attached)
 		return;
 
+	if (!(phy->att_dev_info & PORT_DEV_TRGT_MASK)
+		&& phy->phy_type & PORT_TYPE_SAS) {
+		return;
+	}
+
+	sas_ha = mvi->sas;
+	sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE);
+
 	if (sas_phy->phy) {
 		struct sas_phy *sphy = sas_phy->phy;
 
@@ -286,7 +363,7 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
 		sphy->minimum_linkrate = phy->minimum_linkrate;
 		sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
 		sphy->maximum_linkrate = phy->maximum_linkrate;
-		sphy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+		sphy->maximum_linkrate_hw = MVS_CHIP_DISP->phy_max_link_rate();
 	}
 
 	if (phy->phy_type & PORT_TYPE_SAS) {
@@ -297,13 +374,31 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i)
 		id->initiator_bits = SAS_PROTOCOL_ALL;
 		id->target_bits = phy->identify.target_port_protocols;
 	} else if (phy->phy_type & PORT_TYPE_SATA) {
-		/* TODO */
+		/*Nothing*/
 	}
-	mvi->sas.sas_phy[i]->frame_rcvd_size = phy->frame_rcvd_size;
-	mvi->sas.notify_port_event(mvi->sas.sas_phy[i],
+	mv_dprintk("phy %d byte dmaded.\n", i + mvi->id * mvi->chip->n_phy);
+
+	sas_phy->frame_rcvd_size = phy->frame_rcvd_size;
+
+	mvi->sas->notify_port_event(sas_phy,
 				   PORTE_BYTES_DMAED);
 }
 
+int mvs_slave_alloc(struct scsi_device *scsi_dev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+	if (dev_is_sata(dev)) {
+		/* We don't need to rescan targets
+		 * if REPORT_LUNS request is failed
+		 */
+		if (scsi_dev->lun > 0)
+			return -ENXIO;
+		scsi_dev->tagged_supported = 1;
+	}
+
+	return sas_slave_alloc(scsi_dev);
+}
+
 int mvs_slave_configure(struct scsi_device *sdev)
 {
 	struct domain_device *dev = sdev_to_domain_dev(sdev);
@@ -311,25 +406,31 @@ int mvs_slave_configure(struct scsi_device *sdev)
 
 	if (ret)
 		return ret;
-
 	if (dev_is_sata(dev)) {
-		/* struct ata_port *ap = dev->sata_dev.ap; */
-		/* struct ata_device *adev = ap->link.device; */
-
-		/* clamp at no NCQ for the time being */
-		/* adev->flags |= ATA_DFLAG_NCQ_OFF; */
+		/* may set PIO mode */
+	#if MV_DISABLE_NCQ
+		struct ata_port *ap = dev->sata_dev.ap;
+		struct ata_device *adev = ap->link.device;
+		adev->flags |= ATA_DFLAG_NCQ_OFF;
 		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, 1);
+	#endif
 	}
 	return 0;
 }
 
 void mvs_scan_start(struct Scsi_Host *shost)
 {
-	int i;
-	struct mvs_info *mvi = SHOST_TO_SAS_HA(shost)->lldd_ha;
+	int i, j;
+	unsigned short core_nr;
+	struct mvs_info *mvi;
+	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+
+	core_nr = ((struct mvs_prv_info *)sha->lldd_ha)->n_host;
 
-	for (i = 0; i < mvi->chip->n_phy; ++i) {
-		mvs_bytes_dmaed(mvi, i);
+	for (j = 0; j < core_nr; j++) {
+		mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[j];
+		for (i = 0; i < mvi->chip->n_phy; ++i)
+			mvs_bytes_dmaed(mvi, i);
 	}
 }
 
@@ -350,15 +451,15 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	int elem, rc, i;
 	struct sas_task *task = tei->task;
 	struct mvs_cmd_hdr *hdr = tei->hdr;
+	struct domain_device *dev = task->dev;
+	struct asd_sas_port *sas_port = dev->port;
 	struct scatterlist *sg_req, *sg_resp;
 	u32 req_len, resp_len, tag = tei->tag;
 	void *buf_tmp;
 	u8 *buf_oaf;
 	dma_addr_t buf_tmp_dma;
-	struct mvs_prd *buf_prd;
-	struct scatterlist *sg;
+	void *buf_prd;
 	struct mvs_slot_info *slot = &mvi->slot_info[tag];
-	struct asd_sas_port *sas_port = task->dev->port;
 	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
 #if _MV_DUMP
 	u8 *buf_cmd;
@@ -368,18 +469,18 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	 * DMA-map SMP request, response buffers
 	 */
 	sg_req = &task->smp_task.smp_req;
-	elem = pci_map_sg(mvi->pdev, sg_req, 1, PCI_DMA_TODEVICE);
+	elem = dma_map_sg(mvi->dev, sg_req, 1, PCI_DMA_TODEVICE);
 	if (!elem)
 		return -ENOMEM;
 	req_len = sg_dma_len(sg_req);
 
 	sg_resp = &task->smp_task.smp_resp;
-	elem = pci_map_sg(mvi->pdev, sg_resp, 1, PCI_DMA_FROMDEVICE);
+	elem = dma_map_sg(mvi->dev, sg_resp, 1, PCI_DMA_FROMDEVICE);
 	if (!elem) {
 		rc = -ENOMEM;
 		goto err_out;
 	}
-	resp_len = sg_dma_len(sg_resp);
+	resp_len = SB_RFB_MAX;
 
 	/* must be in dwords */
 	if ((req_len & 0x3) || (resp_len & 0x3)) {
@@ -391,7 +492,7 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	 * arrange MVS_SLOT_BUF_SZ-sized DMA buffer according to our needs
 	 */
 
-	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ************** */
+	/* region 1: command table area (MVS_SSP_CMD_SZ bytes) ***** */
 	buf_tmp = slot->buf;
 	buf_tmp_dma = slot->buf_dma;
 
@@ -412,20 +513,22 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	buf_tmp += MVS_OAF_SZ;
 	buf_tmp_dma += MVS_OAF_SZ;
 
-	/* region 3: PRD table ********************************************* */
+	/* region 3: PRD table *********************************** */
 	buf_prd = buf_tmp;
 	if (tei->n_elem)
 		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
 	else
 		hdr->prd_tbl = 0;
 
-	i = sizeof(struct mvs_prd) * tei->n_elem;
+	i = MVS_CHIP_DISP->prd_size() * tei->n_elem;
 	buf_tmp += i;
 	buf_tmp_dma += i;
 
 	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
 	slot->response = buf_tmp;
 	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+	if (mvi->flags & MVF_FLAG_SOC)
+		hdr->reserved[0] = 0;
 
 	/*
 	 * Fill in TX ring and command slot header
@@ -441,17 +544,14 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	hdr->data_len = 0;
 
 	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (0 << 4) | 0x01; /* initiator, SMP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
+	/* initiator, SMP, ftype 1h */
+	buf_oaf[0] = (1 << 7) | (PROTOCOL_SMP << 4) | 0x01;
+	buf_oaf[1] = dev->linkrate & 0xf;
 	*(u16 *)(buf_oaf + 2) = 0xFFFF;		/* SAS SPEC */
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+	memcpy(buf_oaf + 4, dev->sas_addr, SAS_ADDR_SIZE);
 
 	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
+	MVS_CHIP_DISP->make_prd(task->scatter, tei->n_elem, buf_prd);
 
 #if _MV_DUMP
 	/* copy cmd table */
@@ -462,10 +562,10 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	return 0;
 
 err_out_2:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_resp, 1,
+	dma_unmap_sg(mvi->dev, &tei->task->smp_task.smp_resp, 1,
 		     PCI_DMA_FROMDEVICE);
 err_out:
-	pci_unmap_sg(mvi->pdev, &tei->task->smp_task.smp_req, 1,
+	dma_unmap_sg(mvi->dev, &tei->task->smp_task.smp_req, 1,
 		     PCI_DMA_TODEVICE);
 	return rc;
 }
@@ -490,30 +590,41 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 {
 	struct sas_task *task = tei->task;
 	struct domain_device *dev = task->dev;
+	struct mvs_device *mvi_dev =
+		(struct mvs_device *)dev->lldd_dev;
 	struct mvs_cmd_hdr *hdr = tei->hdr;
 	struct asd_sas_port *sas_port = dev->port;
 	struct mvs_slot_info *slot;
-	struct scatterlist *sg;
-	struct mvs_prd *buf_prd;
-	struct mvs_port *port = tei->port;
-	u32 tag = tei->tag;
-	u32 flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+	void *buf_prd;
+	u32 tag = tei->tag, hdr_tag;
+	u32 flags, del_q;
 	void *buf_tmp;
 	u8 *buf_cmd, *buf_oaf;
 	dma_addr_t buf_tmp_dma;
 	u32 i, req_len, resp_len;
 	const u32 max_resp_len = SB_RFB_MAX;
 
-	if (mvs_assign_reg_set(mvi, port) == MVS_ID_NOT_MAPPED)
+	if (mvs_assign_reg_set(mvi, mvi_dev) == MVS_ID_NOT_MAPPED) {
+		mv_dprintk("Have not enough regiset for dev %d.\n",
+			mvi_dev->device_id);
 		return -EBUSY;
-
+	}
 	slot = &mvi->slot_info[tag];
 	slot->tx = mvi->tx_prod;
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
-					(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT) |
-					(port->taskfileset << TXQ_SRS_SHIFT));
-
+	del_q = TXQ_MODE_I | tag |
+		(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
+		(sas_port->phy_mask << TXQ_PHY_SHIFT) |
+		(mvi_dev->taskfileset << TXQ_SRS_SHIFT);
+	mvi->tx[mvi->tx_prod] = cpu_to_le32(del_q);
+
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	if (task->data_dir == DMA_FROM_DEVICE)
+		flags = (MVS_CHIP_DISP->prd_count() << MCH_PRD_LEN_SHIFT);
+	else
+		flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#else
+	flags = (tei->n_elem << MCH_PRD_LEN_SHIFT);
+#endif
 	if (task->ata_task.use_ncq)
 		flags |= MCH_FPDMA;
 	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET) {
@@ -526,10 +637,13 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 	hdr->flags = cpu_to_le32(flags);
 
 	/* FIXME: the low order order 5 bits for the TAG if enable NCQ */
-	if (task->ata_task.use_ncq && mvs_get_ncq_tag(task, &hdr->tags))
-		task->ata_task.fis.sector_count |= hdr->tags << 3;
+	if (task->ata_task.use_ncq && mvs_get_ncq_tag(task, &hdr_tag))
+		task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3);
 	else
-		hdr->tags = cpu_to_le32(tag);
+		hdr_tag = tag;
+
+	hdr->tags = cpu_to_le32(hdr_tag);
+
 	hdr->data_len = cpu_to_le32(task->total_xfer_len);
 
 	/*
@@ -558,12 +672,13 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 
 	/* region 3: PRD table ********************************************* */
 	buf_prd = buf_tmp;
+
 	if (tei->n_elem)
 		hdr->prd_tbl = cpu_to_le64(buf_tmp_dma);
 	else
 		hdr->prd_tbl = 0;
+	i = MVS_CHIP_DISP->prd_size() * MVS_CHIP_DISP->prd_count();
 
-	i = sizeof(struct mvs_prd) * tei->n_elem;
 	buf_tmp += i;
 	buf_tmp_dma += i;
 
@@ -573,6 +688,8 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 	 */
 	slot->response = buf_tmp;
 	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+	if (mvi->flags & MVF_FLAG_SOC)
+		hdr->reserved[0] = 0;
 
 	req_len = sizeof(struct host_to_dev_fis);
 	resp_len = MVS_SLOT_BUF_SZ - MVS_ATA_CMD_SZ -
@@ -582,7 +699,8 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 	resp_len = min(resp_len, max_resp_len);
 	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
 
-	task->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
+	if (likely(!task->ata_task.device_control_reg_update))
+		task->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
 	/* fill in command FIS and ATAPI CDB */
 	memcpy(buf_cmd, &task->ata_task.fis, sizeof(struct host_to_dev_fis));
 	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
@@ -590,30 +708,35 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 			task->ata_task.atapi_packet, 16);
 
 	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (2 << 4) | 0x1;	/* initiator, STP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+	/* initiator, STP, ftype 1h */
+	buf_oaf[0] = (1 << 7) | (PROTOCOL_STP << 4) | 0x1;
+	buf_oaf[1] = dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = cpu_to_be16(mvi_dev->device_id + 1);
+	memcpy(buf_oaf + 4, dev->sas_addr, SAS_ADDR_SIZE);
 
 	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
-	}
-
+	MVS_CHIP_DISP->make_prd(task->scatter, tei->n_elem, buf_prd);
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	if (task->data_dir == DMA_FROM_DEVICE)
+		MVS_CHIP_DISP->dma_fix(mvi->bulk_buffer_dma,
+				TRASH_BUCKET_SIZE, tei->n_elem, buf_prd);
+#endif
 	return 0;
 }
 
 static int mvs_task_prep_ssp(struct mvs_info *mvi,
-			     struct mvs_task_exec_info *tei)
+			     struct mvs_task_exec_info *tei, int is_tmf,
+			     struct mvs_tmf_task *tmf)
 {
 	struct sas_task *task = tei->task;
 	struct mvs_cmd_hdr *hdr = tei->hdr;
 	struct mvs_port *port = tei->port;
+	struct domain_device *dev = task->dev;
+	struct mvs_device *mvi_dev =
+		(struct mvs_device *)dev->lldd_dev;
+	struct asd_sas_port *sas_port = dev->port;
 	struct mvs_slot_info *slot;
-	struct scatterlist *sg;
-	struct mvs_prd *buf_prd;
+	void *buf_prd;
 	struct ssp_frame_hdr *ssp_hdr;
 	void *buf_tmp;
 	u8 *buf_cmd, *buf_oaf, fburst = 0;
@@ -621,12 +744,13 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 	u32 flags;
 	u32 resp_len, req_len, i, tag = tei->tag;
 	const u32 max_resp_len = SB_RFB_MAX;
-	u8 phy_mask;
+	u32 phy_mask;
 
 	slot = &mvi->slot_info[tag];
 
-	phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-		task->dev->port->phy_mask;
+	phy_mask = ((port->wide_port_phymap) ? port->wide_port_phymap :
+		sas_port->phy_mask) & TXQ_PHY_MASK;
+
 	slot->tx = mvi->tx_prod;
 	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | tag |
 				(TXQ_CMD_SSP << TXQ_CMD_SHIFT) |
@@ -640,7 +764,6 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 	hdr->flags = cpu_to_le32(flags |
 				 (tei->n_elem << MCH_PRD_LEN_SHIFT) |
 				 (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT));
-
 	hdr->tags = cpu_to_le32(tag);
 	hdr->data_len = cpu_to_le32(task->total_xfer_len);
 
@@ -674,13 +797,15 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 	else
 		hdr->prd_tbl = 0;
 
-	i = sizeof(struct mvs_prd) * tei->n_elem;
+	i = MVS_CHIP_DISP->prd_size() * tei->n_elem;
 	buf_tmp += i;
 	buf_tmp_dma += i;
 
 	/* region 4: status buffer (larger the PRD, smaller this buf) ****** */
 	slot->response = buf_tmp;
 	hdr->status_buf = cpu_to_le64(buf_tmp_dma);
+	if (mvi->flags & MVF_FLAG_SOC)
+		hdr->reserved[0] = 0;
 
 	resp_len = MVS_SLOT_BUF_SZ - MVS_SSP_CMD_SZ - MVS_OAF_SZ -
 	    sizeof(struct mvs_err_info) - i;
@@ -692,57 +817,105 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | (req_len / 4));
 
 	/* generate open address frame hdr (first 12 bytes) */
-	buf_oaf[0] = (1 << 7) | (1 << 4) | 0x1;	/* initiator, SSP, ftype 1h */
-	buf_oaf[1] = task->dev->linkrate & 0xf;
-	*(u16 *)(buf_oaf + 2) = cpu_to_be16(tag);
-	memcpy(buf_oaf + 4, task->dev->sas_addr, SAS_ADDR_SIZE);
+	/* initiator, SSP, ftype 1h */
+	buf_oaf[0] = (1 << 7) | (PROTOCOL_SSP << 4) | 0x1;
+	buf_oaf[1] = dev->linkrate & 0xf;
+	*(u16 *)(buf_oaf + 2) = cpu_to_be16(mvi_dev->device_id + 1);
+	memcpy(buf_oaf + 4, dev->sas_addr, SAS_ADDR_SIZE);
 
 	/* fill in SSP frame header (Command Table.SSP frame header) */
 	ssp_hdr = (struct ssp_frame_hdr *)buf_cmd;
-	ssp_hdr->frame_type = SSP_COMMAND;
-	memcpy(ssp_hdr->hashed_dest_addr, task->dev->hashed_sas_addr,
+
+	if (is_tmf)
+		ssp_hdr->frame_type = SSP_TASK;
+	else
+		ssp_hdr->frame_type = SSP_COMMAND;
+
+	memcpy(ssp_hdr->hashed_dest_addr, dev->hashed_sas_addr,
 	       HASHED_SAS_ADDR_SIZE);
 	memcpy(ssp_hdr->hashed_src_addr,
-	       task->dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	       dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
 	ssp_hdr->tag = cpu_to_be16(tag);
 
-	/* fill in command frame IU */
+	/* fill in IU for TASK and Command Frame */
 	buf_cmd += sizeof(*ssp_hdr);
 	memcpy(buf_cmd, &task->ssp_task.LUN, 8);
-	buf_cmd[9] = fburst | task->ssp_task.task_attr |
-			(task->ssp_task.task_prio << 3);
-	memcpy(buf_cmd + 12, &task->ssp_task.cdb, 16);
 
-	/* fill in PRD (scatter/gather) table, if any */
-	for_each_sg(task->scatter, sg, tei->n_elem, i) {
-		buf_prd->addr = cpu_to_le64(sg_dma_address(sg));
-		buf_prd->len = cpu_to_le32(sg_dma_len(sg));
-		buf_prd++;
+	if (ssp_hdr->frame_type != SSP_TASK) {
+		buf_cmd[9] = fburst | task->ssp_task.task_attr |
+				(task->ssp_task.task_prio << 3);
+		memcpy(buf_cmd + 12, &task->ssp_task.cdb, 16);
+	} else{
+		buf_cmd[10] = tmf->tmf;
+		switch (tmf->tmf) {
+		case TMF_ABORT_TASK:
+		case TMF_QUERY_TASK:
+			buf_cmd[12] =
+				(tmf->tag_of_task_to_be_managed >> 8) & 0xff;
+			buf_cmd[13] =
+				tmf->tag_of_task_to_be_managed & 0xff;
+			break;
+		default:
+			break;
+		}
 	}
-
+	/* fill in PRD (scatter/gather) table, if any */
+	MVS_CHIP_DISP->make_prd(task->scatter, tei->n_elem, buf_prd);
 	return 0;
 }
 
-int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
+#define	DEV_IS_GONE(mvi_dev)	((!mvi_dev || (mvi_dev->dev_type == NO_DEVICE)))
+static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
+				struct completion *completion, int lock,
+				int is_tmf, struct mvs_tmf_task *tmf)
 {
 	struct domain_device *dev = task->dev;
-	struct mvs_info *mvi = dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	void __iomem *regs = mvi->regs;
+	struct mvs_info *mvi;
+	struct mvs_device *mvi_dev;
 	struct mvs_task_exec_info tei;
 	struct sas_task *t = task;
 	struct mvs_slot_info *slot;
 	u32 tag = 0xdeadbeef, rc, n_elem = 0;
-	unsigned long flags;
 	u32 n = num, pass = 0;
+	unsigned long flags = 0;
 
-	spin_lock_irqsave(&mvi->lock, flags);
+	if (!dev->port) {
+		struct task_status_struct *tsm = &t->task_status;
+
+		tsm->resp = SAS_TASK_UNDELIVERED;
+		tsm->stat = SAS_PHY_DOWN;
+		t->task_done(t);
+		return 0;
+	}
+
+	mvi = mvs_find_dev_mvi(task->dev);
+
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
 	do {
 		dev = t->dev;
-		tei.port = &mvi->port[dev->port->id];
+		mvi_dev = (struct mvs_device *)dev->lldd_dev;
+		if (DEV_IS_GONE(mvi_dev)) {
+			if (mvi_dev)
+				mv_dprintk("device %d not ready.\n",
+					mvi_dev->device_id);
+			else
+				mv_dprintk("device %016llx not ready.\n",
+					SAS_ADDR(dev->sas_addr));
+
+			rc = SAS_PHY_DOWN;
+			goto out_done;
+		}
+
+		if (dev->port->id >= mvi->chip->n_phy)
+			tei.port = &mvi->port[dev->port->id - mvi->chip->n_phy];
+		else
+			tei.port = &mvi->port[dev->port->id];
 
 		if (!tei.port->port_attached) {
 			if (sas_protocol_ata(t->task_proto)) {
+				mv_dprintk("port %d does not"
+					"attached device.\n", dev->port->id);
 				rc = SAS_PHY_DOWN;
 				goto out_done;
 			} else {
@@ -759,7 +932,8 @@ int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
 
 		if (!sas_protocol_ata(t->task_proto)) {
 			if (t->num_scatter) {
-				n_elem = pci_map_sg(mvi->pdev, t->scatter,
+				n_elem = dma_map_sg(mvi->dev,
+						    t->scatter,
 						    t->num_scatter,
 						    t->data_dir);
 				if (!n_elem) {
@@ -776,20 +950,23 @@ int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
 			goto err_out;
 
 		slot = &mvi->slot_info[tag];
+
+
 		t->lldd_task = NULL;
 		slot->n_elem = n_elem;
+		slot->slot_tag = tag;
 		memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
+
 		tei.task = t;
 		tei.hdr = &mvi->slot[tag];
 		tei.tag = tag;
 		tei.n_elem = n_elem;
-
 		switch (t->task_proto) {
 		case SAS_PROTOCOL_SMP:
 			rc = mvs_task_prep_smp(mvi, &tei);
 			break;
 		case SAS_PROTOCOL_SSP:
-			rc = mvs_task_prep_ssp(mvi, &tei);
+			rc = mvs_task_prep_ssp(mvi, &tei, is_tmf, tmf);
 			break;
 		case SAS_PROTOCOL_SATA:
 		case SAS_PROTOCOL_STP:
@@ -797,52 +974,61 @@ int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags)
 			rc = mvs_task_prep_ata(mvi, &tei);
 			break;
 		default:
-			dev_printk(KERN_ERR, &pdev->dev,
+			dev_printk(KERN_ERR, mvi->dev,
 				"unknown sas_task proto: 0x%x\n",
 				t->task_proto);
 			rc = -EINVAL;
 			break;
 		}
 
-		if (rc)
+		if (rc) {
+			mv_dprintk("rc is %x\n", rc);
 			goto err_out_tag;
-
+		}
 		slot->task = t;
 		slot->port = tei.port;
 		t->lldd_task = (void *) slot;
-		list_add_tail(&slot->list, &slot->port->list);
+		list_add_tail(&slot->entry, &tei.port->list);
 		/* TODO: select normal or high priority */
-
 		spin_lock(&t->task_state_lock);
 		t->task_state_flags |= SAS_TASK_AT_INITIATOR;
 		spin_unlock(&t->task_state_lock);
 
 		mvs_hba_memory_dump(mvi, tag, t->task_proto);
-
+		mvi_dev->runing_req++;
 		++pass;
 		mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
 		if (n > 1)
 			t = list_entry(t->list.next, struct sas_task, list);
 	} while (--n);
-
 	rc = 0;
 	goto out_done;
 
 err_out_tag:
 	mvs_tag_free(mvi, tag);
 err_out:
-	dev_printk(KERN_ERR, &pdev->dev, "mvsas exec failed[%d]!\n", rc);
+
+	dev_printk(KERN_ERR, mvi->dev, "mvsas exec failed[%d]!\n", rc);
 	if (!sas_protocol_ata(t->task_proto))
 		if (n_elem)
-			pci_unmap_sg(mvi->pdev, t->scatter, n_elem,
+			dma_unmap_sg(mvi->dev, t->scatter, n_elem,
 				     t->data_dir);
 out_done:
-	if (pass)
-		mw32(TX_PROD_IDX, (mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
-	spin_unlock_irqrestore(&mvi->lock, flags);
+	if (likely(pass)) {
+		MVS_CHIP_DISP->start_delivery(mvi,
+			(mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
+	}
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
 	return rc;
 }
 
+int mvs_queue_command(struct sas_task *task, const int num,
+			gfp_t gfp_flags)
+{
+	return mvs_task_exec(task, num, gfp_flags, NULL, 1, 0, NULL);
+}
+
 static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
 {
 	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
@@ -852,16 +1038,18 @@ static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
 static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
 			  struct mvs_slot_info *slot, u32 slot_idx)
 {
+	if (!slot->task)
+		return;
 	if (!sas_protocol_ata(task->task_proto))
 		if (slot->n_elem)
-			pci_unmap_sg(mvi->pdev, task->scatter,
+			dma_unmap_sg(mvi->dev, task->scatter,
 				     slot->n_elem, task->data_dir);
 
 	switch (task->task_proto) {
 	case SAS_PROTOCOL_SMP:
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_resp, 1,
+		dma_unmap_sg(mvi->dev, &task->smp_task.smp_resp, 1,
 			     PCI_DMA_FROMDEVICE);
-		pci_unmap_sg(mvi->pdev, &task->smp_task.smp_req, 1,
+		dma_unmap_sg(mvi->dev, &task->smp_task.smp_req, 1,
 			     PCI_DMA_TODEVICE);
 		break;
 
@@ -872,10 +1060,12 @@ static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
 		/* do nothing */
 		break;
 	}
-	list_del(&slot->list);
+	list_del_init(&slot->entry);
 	task->lldd_task = NULL;
 	slot->task = NULL;
 	slot->port = NULL;
+	slot->slot_tag = 0xFFFFFFFF;
+	mvs_slot_free(mvi, slot_idx);
 }
 
 static void mvs_update_wideport(struct mvs_info *mvi, int i)
@@ -884,25 +1074,28 @@ static void mvs_update_wideport(struct mvs_info *mvi, int i)
 	struct mvs_port *port = phy->port;
 	int j, no;
 
-	for_each_phy(port->wide_port_phymap, no, j, mvi->chip->n_phy)
-		if (no & 1) {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no,
+	for_each_phy(port->wide_port_phymap, j, no) {
+		if (j & 1) {
+			MVS_CHIP_DISP->write_port_cfg_addr(mvi, no,
+						PHYR_WIDE_PORT);
+			MVS_CHIP_DISP->write_port_cfg_data(mvi, no,
 						port->wide_port_phymap);
 		} else {
-			mvs_write_port_cfg_addr(mvi, no, PHYR_WIDE_PORT);
-			mvs_write_port_cfg_data(mvi, no, 0);
+			MVS_CHIP_DISP->write_port_cfg_addr(mvi, no,
+						PHYR_WIDE_PORT);
+			MVS_CHIP_DISP->write_port_cfg_data(mvi, no,
+						0);
 		}
+	}
 }
 
 static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
 {
 	u32 tmp;
 	struct mvs_phy *phy = &mvi->phy[i];
-	struct mvs_port *port = phy->port;;
-
-	tmp = mvs_read_phy_ctl(mvi, i);
+	struct mvs_port *port = phy->port;
 
+	tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, i);
 	if ((tmp & PHY_READY_MASK) && !(phy->irq_status & PHYEV_POOF)) {
 		if (!port)
 			phy->phy_attached = 1;
@@ -917,7 +1110,6 @@ static u32 mvs_is_phy_ready(struct mvs_info *mvi, int i)
 			mvs_update_wideport(mvi, i);
 		} else if (phy->phy_type & PORT_TYPE_SATA)
 			port->port_attached = 0;
-		mvs_free_reg_set(mvi, phy->port);
 		phy->port = NULL;
 		phy->phy_attached = 0;
 		phy->phy_type &= ~(PORT_TYPE_SAS | PORT_TYPE_SATA);
@@ -932,17 +1124,21 @@ static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
 	if (!s)
 		return NULL;
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
-	s[3] = mvs_read_port_cfg_data(mvi, i);
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG3);
+	s[3] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
-	s[2] = mvs_read_port_cfg_data(mvi, i);
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG2);
+	s[2] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
-	s[1] = mvs_read_port_cfg_data(mvi, i);
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG1);
+	s[1] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
-	s[0] = mvs_read_port_cfg_data(mvi, i);
+	MVS_CHIP_DISP->write_port_cfg_addr(mvi, i, PHYR_SATA_SIG0);
+	s[0] = MVS_CHIP_DISP->read_port_cfg_data(mvi, i);
+
+	/* Workaround: take some ATAPI devices for ATA */
+	if (((s[1] & 0x00FFFFFF) == 0x00EB1401) && (*(u8 *)&s[3] == 0x01))
+		s[1] = 0x00EB1401 | (*((u8 *)&s[1] + 3) & 0x10);
 
 	return (void *)s;
 }
@@ -952,56 +1148,53 @@ static u32 mvs_is_sig_fis_received(u32 irq_status)
 	return irq_status & PHYEV_SIG_FIS;
 }
 
-static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
-					int get_st)
+void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st)
 {
 	struct mvs_phy *phy = &mvi->phy[i];
-	struct pci_dev *pdev = mvi->pdev;
-	u32 tmp;
-	u64 tmp64;
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_IDENTIFY);
-	phy->dev_info = mvs_read_port_cfg_data(mvi, i);
-
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-	phy->dev_sas_addr = (u64) mvs_read_port_cfg_data(mvi, i) << 32;
+	struct sas_identify_frame *id;
 
-	mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-	phy->dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
+	id = (struct sas_identify_frame *)phy->frame_rcvd;
 
 	if (get_st) {
-		phy->irq_status = mvs_read_port_irq_stat(mvi, i);
+		phy->irq_status = MVS_CHIP_DISP->read_port_irq_stat(mvi, i);
 		phy->phy_status = mvs_is_phy_ready(mvi, i);
 	}
 
 	if (phy->phy_status) {
-		u32 phy_st;
-		struct asd_sas_phy *sas_phy = mvi->sas.sas_phy[i];
-
-		mvs_write_port_cfg_addr(mvi, i, PHYR_PHY_STAT);
-		phy_st = mvs_read_port_cfg_data(mvi, i);
-
-		sas_phy->linkrate =
-			(phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-				PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET;
-		phy->minimum_linkrate =
-			(phy->phy_status &
-				PHY_MIN_SPP_PHYS_LINK_RATE_MASK) >> 8;
-		phy->maximum_linkrate =
-			(phy->phy_status &
-				PHY_MAX_SPP_PHYS_LINK_RATE_MASK) >> 12;
+		int oob_done = 0;
+		struct asd_sas_phy *sas_phy = &mvi->phy[i].sas_phy;
 
-		if (phy->phy_type & PORT_TYPE_SAS) {
-			/* Updated attached_sas_addr */
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_HI);
-			phy->att_dev_sas_addr =
-				(u64) mvs_read_port_cfg_data(mvi, i) << 32;
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_ADDR_LO);
-			phy->att_dev_sas_addr |= mvs_read_port_cfg_data(mvi, i);
-			mvs_write_port_cfg_addr(mvi, i, PHYR_ATT_DEV_INFO);
-			phy->att_dev_info = mvs_read_port_cfg_data(mvi, i);
+		oob_done = MVS_CHIP_DISP->oob_done(mvi, i);
+
+		MVS_CHIP_DISP->fix_phy_info(mvi, i, id);
+		if (phy->phy_type & PORT_TYPE_SATA) {
+			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
+			if (mvs_is_sig_fis_received(phy->irq_status)) {
+				phy->phy_attached = 1;
+				phy->att_dev_sas_addr =
+					i + mvi->id * mvi->chip->n_phy;
+				if (oob_done)
+					sas_phy->oob_mode = SATA_OOB_MODE;
+				phy->frame_rcvd_size =
+				    sizeof(struct dev_to_host_fis);
+				mvs_get_d2h_reg(mvi, i, (void *)id);
+			} else {
+				u32 tmp;
+				dev_printk(KERN_DEBUG, mvi->dev,
+					"Phy%d : No sig fis\n", i);
+				tmp = MVS_CHIP_DISP->read_port_irq_mask(mvi, i);
+				MVS_CHIP_DISP->write_port_irq_mask(mvi, i,
+						tmp | PHYEV_SIG_FIS);
+				phy->phy_attached = 0;
+				phy->phy_type &= ~PORT_TYPE_SATA;
+				MVS_CHIP_DISP->phy_reset(mvi, i, 0);
+				goto out_done;
+			}
+		}		else if (phy->phy_type & PORT_TYPE_SAS
+			|| phy->att_dev_info & PORT_SSP_INIT_MASK) {
+			phy->phy_attached = 1;
 			phy->identify.device_type =
-			    phy->att_dev_info & PORT_DEV_TYPE_MASK;
+				phy->att_dev_info & PORT_DEV_TYPE_MASK;
 
 			if (phy->identify.device_type == SAS_END_DEV)
 				phy->identify.target_port_protocols =
@@ -1009,810 +1202,956 @@ static void mvs_update_phyinfo(struct mvs_info *mvi, int i,
 			else if (phy->identify.device_type != NO_DEVICE)
 				phy->identify.target_port_protocols =
 							SAS_PROTOCOL_SMP;
-			if (phy_st & PHY_OOB_DTCTD)
+			if (oob_done)
 				sas_phy->oob_mode = SAS_OOB_MODE;
 			phy->frame_rcvd_size =
 			    sizeof(struct sas_identify_frame);
-		} else if (phy->phy_type & PORT_TYPE_SATA) {
-			phy->identify.target_port_protocols = SAS_PROTOCOL_STP;
-			if (mvs_is_sig_fis_received(phy->irq_status)) {
-				phy->att_dev_sas_addr = i;	/* temp */
-				if (phy_st & PHY_OOB_DTCTD)
-					sas_phy->oob_mode = SATA_OOB_MODE;
-				phy->frame_rcvd_size =
-				    sizeof(struct dev_to_host_fis);
-				mvs_get_d2h_reg(mvi, i,
-						(void *)sas_phy->frame_rcvd);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"No sig fis\n");
-				phy->phy_type &= ~(PORT_TYPE_SATA);
-				goto out_done;
-			}
 		}
-		tmp64 = cpu_to_be64(phy->att_dev_sas_addr);
-		memcpy(sas_phy->attached_sas_addr, &tmp64, SAS_ADDR_SIZE);
-
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"phy[%d] Get Attached Address 0x%llX ,"
-			" SAS Address 0x%llX\n",
-			i,
-			(unsigned long long)phy->att_dev_sas_addr,
-			(unsigned long long)phy->dev_sas_addr);
-		dev_printk(KERN_DEBUG, &pdev->dev,
-			"Rate = %x , type = %d\n",
-			sas_phy->linkrate, phy->phy_type);
-
-		/* workaround for HW phy decoding error on 1.5g disk drive */
-		mvs_write_port_vsr_addr(mvi, i, VSR_PHY_MODE6);
-		tmp = mvs_read_port_vsr_data(mvi, i);
-		if (((phy->phy_status & PHY_NEG_SPP_PHYS_LINK_RATE_MASK) >>
-		     PHY_NEG_SPP_PHYS_LINK_RATE_MASK_OFFSET) ==
-			SAS_LINK_RATE_1_5_GBPS)
-			tmp &= ~PHY_MODE6_LATECLK;
-		else
-			tmp |= PHY_MODE6_LATECLK;
-		mvs_write_port_vsr_data(mvi, i, tmp);
+		memcpy(sas_phy->attached_sas_addr,
+			&phy->att_dev_sas_addr, SAS_ADDR_SIZE);
 
+		if (MVS_CHIP_DISP->phy_work_around)
+			MVS_CHIP_DISP->phy_work_around(mvi, i);
 	}
+	mv_dprintk("port %d attach dev info is %x\n",
+		i + mvi->id * mvi->chip->n_phy, phy->att_dev_info);
+	mv_dprintk("port %d attach sas addr is %llx\n",
+		i + mvi->id * mvi->chip->n_phy, phy->att_dev_sas_addr);
 out_done:
 	if (get_st)
-		mvs_write_port_irq_stat(mvi, i, phy->irq_status);
+		MVS_CHIP_DISP->write_port_irq_stat(mvi, i, phy->irq_status);
 }
 
-void mvs_port_formed(struct asd_sas_phy *sas_phy)
+static void mvs_port_notify_formed(struct asd_sas_phy *sas_phy, int lock)
 {
 	struct sas_ha_struct *sas_ha = sas_phy->ha;
-	struct mvs_info *mvi = sas_ha->lldd_ha;
-	struct asd_sas_port *sas_port = sas_phy->port;
+	struct mvs_info *mvi = NULL; int i = 0, hi;
 	struct mvs_phy *phy = sas_phy->lldd_phy;
-	struct mvs_port *port = &mvi->port[sas_port->id];
-	unsigned long flags;
+	struct asd_sas_port *sas_port = sas_phy->port;
+	struct mvs_port *port;
+	unsigned long flags = 0;
+	if (!sas_port)
+		return;
 
-	spin_lock_irqsave(&mvi->lock, flags);
+	while (sas_ha->sas_phy[i]) {
+		if (sas_ha->sas_phy[i] == sas_phy)
+			break;
+		i++;
+	}
+	hi = i/((struct mvs_prv_info *)sas_ha->lldd_ha)->n_phy;
+	mvi = ((struct mvs_prv_info *)sas_ha->lldd_ha)->mvi[hi];
+	if (sas_port->id >= mvi->chip->n_phy)
+		port = &mvi->port[sas_port->id - mvi->chip->n_phy];
+	else
+		port = &mvi->port[sas_port->id];
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
 	port->port_attached = 1;
 	phy->port = port;
-	port->taskfileset = MVS_ID_NOT_MAPPED;
 	if (phy->phy_type & PORT_TYPE_SAS) {
 		port->wide_port_phymap = sas_port->phy_mask;
+		mv_printk("set wide port phy map %x\n", sas_port->phy_mask);
 		mvs_update_wideport(mvi, sas_phy->id);
 	}
-	spin_unlock_irqrestore(&mvi->lock, flags);
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
 }
 
-int mvs_I_T_nexus_reset(struct domain_device *dev)
+static void mvs_port_notify_deformed(struct asd_sas_phy *sas_phy, int lock)
 {
-	return TMF_RESP_FUNC_FAILED;
+	/*Nothing*/
 }
 
-static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
-			u32 slot_idx, int err)
-{
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	struct task_status_struct *tstat = &task->task_status;
-	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
-	int stat = SAM_GOOD;
 
-	resp->frame_len = sizeof(struct dev_to_host_fis);
-	memcpy(&resp->ending_fis[0],
-	       SATA_RECEIVED_D2H_FIS(port->taskfileset),
-	       sizeof(struct dev_to_host_fis));
-	tstat->buf_valid_size = sizeof(*resp);
-	if (unlikely(err))
-		stat = SAS_PROTO_RESPONSE;
-	return stat;
+void mvs_port_formed(struct asd_sas_phy *sas_phy)
+{
+	mvs_port_notify_formed(sas_phy, 1);
 }
 
-static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
-			 u32 slot_idx)
+void mvs_port_deformed(struct asd_sas_phy *sas_phy)
 {
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
-	u32 err_dw1 = le32_to_cpu(*(u32 *) (slot->response + 4));
-	int stat = SAM_CHECK_COND;
+	mvs_port_notify_deformed(sas_phy, 1);
+}
 
-	if (err_dw1 & SLOT_BSY_ERR) {
-		stat = SAS_QUEUE_FULL;
-		mvs_slot_reset(mvi, task, slot_idx);
-	}
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		break;
-	case SAS_PROTOCOL_SMP:
-		break;
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
-		if (err_dw0 & TFILE_ERR)
-			stat = mvs_sata_done(mvi, task, slot_idx, 1);
-		break;
-	default:
-		break;
+struct mvs_device *mvs_alloc_dev(struct mvs_info *mvi)
+{
+	u32 dev;
+	for (dev = 0; dev < MVS_MAX_DEVICES; dev++) {
+		if (mvi->devices[dev].dev_type == NO_DEVICE) {
+			mvi->devices[dev].device_id = dev;
+			return &mvi->devices[dev];
+		}
 	}
 
-	mvs_hexdump(16, (u8 *) slot->response, 0);
-	return stat;
+	if (dev == MVS_MAX_DEVICES)
+		mv_printk("max support %d devices, ignore ..\n",
+			MVS_MAX_DEVICES);
+
+	return NULL;
 }
 
-static int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
+void mvs_free_dev(struct mvs_device *mvi_dev)
 {
-	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
-	struct sas_task *task = slot->task;
-	struct task_status_struct *tstat;
-	struct mvs_port *port;
-	bool aborted;
-	void *to;
+	u32 id = mvi_dev->device_id;
+	memset(mvi_dev, 0, sizeof(*mvi_dev));
+	mvi_dev->device_id = id;
+	mvi_dev->dev_type = NO_DEVICE;
+	mvi_dev->dev_status = MVS_DEV_NORMAL;
+	mvi_dev->taskfileset = MVS_ID_NOT_MAPPED;
+}
 
-	if (unlikely(!task || !task->lldd_task))
-		return -1;
+int mvs_dev_found_notify(struct domain_device *dev, int lock)
+{
+	unsigned long flags = 0;
+	int res = 0;
+	struct mvs_info *mvi = NULL;
+	struct domain_device *parent_dev = dev->parent;
+	struct mvs_device *mvi_device;
 
-	mvs_hba_cq_dump(mvi);
+	mvi = mvs_find_dev_mvi(dev);
 
-	spin_lock(&task->task_state_lock);
-	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
-	if (!aborted) {
-		task->task_state_flags &=
-		    ~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
-		task->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
+
+	mvi_device = mvs_alloc_dev(mvi);
+	if (!mvi_device) {
+		res = -1;
+		goto found_out;
 	}
-	spin_unlock(&task->task_state_lock);
+	dev->lldd_dev = (void *)mvi_device;
+	mvi_device->dev_type = dev->dev_type;
+
+	if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) {
+		int phy_id;
+		u8 phy_num = parent_dev->ex_dev.num_phys;
+		struct ex_phy *phy;
+		for (phy_id = 0; phy_id < phy_num; phy_id++) {
+			phy = &parent_dev->ex_dev.ex_phy[phy_id];
+			if (SAS_ADDR(phy->attached_sas_addr) ==
+				SAS_ADDR(dev->sas_addr)) {
+				mvi_device->attached_phy = phy_id;
+				break;
+			}
+		}
 
-	if (aborted) {
-		mvs_slot_task_free(mvi, task, slot, slot_idx);
-		mvs_slot_free(mvi, rx_desc);
-		return -1;
+		if (phy_id == phy_num) {
+			mv_printk("Error: no attached dev:%016llx"
+				"at ex:%016llx.\n",
+				SAS_ADDR(dev->sas_addr),
+				SAS_ADDR(parent_dev->sas_addr));
+			res = -1;
+		}
 	}
 
-	port = slot->port;
-	tstat = &task->task_status;
-	memset(tstat, 0, sizeof(*tstat));
-	tstat->resp = SAS_TASK_COMPLETE;
+found_out:
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
+	return res;
+}
 
-	if (unlikely(!port->port_attached || flags)) {
-		mvs_slot_err(mvi, task, slot_idx);
-		if (!sas_protocol_ata(task->task_proto))
-			tstat->stat = SAS_PHY_DOWN;
-		goto out;
-	}
+int mvs_dev_found(struct domain_device *dev)
+{
+	return mvs_dev_found_notify(dev, 1);
+}
 
-	/* error info record present */
-	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
-		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
-		goto out;
-	}
-
-	switch (task->task_proto) {
-	case SAS_PROTOCOL_SSP:
-		/* hw says status == 0, datapres == 0 */
-		if (rx_desc & RXQ_GOOD) {
-			tstat->stat = SAM_GOOD;
-			tstat->resp = SAS_TASK_COMPLETE;
-		}
-		/* response frame present */
-		else if (rx_desc & RXQ_RSP) {
-			struct ssp_response_iu *iu =
-			    slot->response + sizeof(struct mvs_err_info);
-			sas_ssp_task_response(&mvi->pdev->dev, task, iu);
-		}
-
-		/* should never happen? */
-		else
-			tstat->stat = SAM_CHECK_COND;
-		break;
+void mvs_dev_gone_notify(struct domain_device *dev, int lock)
+{
+	unsigned long flags = 0;
+	struct mvs_info *mvi;
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
 
-	case SAS_PROTOCOL_SMP: {
-			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
-			tstat->stat = SAM_GOOD;
-			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
-			memcpy(to + sg_resp->offset,
-				slot->response + sizeof(struct mvs_err_info),
-				sg_dma_len(sg_resp));
-			kunmap_atomic(to, KM_IRQ0);
-			break;
-		}
+	mvi = mvs_find_dev_mvi(dev);
 
-	case SAS_PROTOCOL_SATA:
-	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
-			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
-			break;
-		}
+	if (lock)
+		spin_lock_irqsave(&mvi->lock, flags);
 
-	default:
-		tstat->stat = SAM_CHECK_COND;
-		break;
+	if (mvi_dev) {
+		mv_dprintk("found dev[%d:%x] is gone.\n",
+			mvi_dev->device_id, mvi_dev->dev_type);
+		mvs_free_reg_set(mvi, mvi_dev);
+		mvs_free_dev(mvi_dev);
+	} else {
+		mv_dprintk("found dev has gone.\n");
 	}
+	dev->lldd_dev = NULL;
 
-out:
-	mvs_slot_task_free(mvi, task, slot, slot_idx);
-	if (unlikely(tstat->stat != SAS_QUEUE_FULL))
-		mvs_slot_free(mvi, rx_desc);
-
-	spin_unlock(&mvi->lock);
-	task->task_done(task);
-	spin_lock(&mvi->lock);
-	return tstat->stat;
+	if (lock)
+		spin_unlock_irqrestore(&mvi->lock, flags);
 }
 
-static void mvs_release_task(struct mvs_info *mvi, int phy_no)
-{
-	struct list_head *pos, *n;
-	struct mvs_slot_info *slot;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct mvs_port *port = phy->port;
-	u32 rx_desc;
 
-	if (!port)
-		return;
+void mvs_dev_gone(struct domain_device *dev)
+{
+	mvs_dev_gone_notify(dev, 1);
+}
 
-	list_for_each_safe(pos, n, &port->list) {
-		slot = container_of(pos, struct mvs_slot_info, list);
-		rx_desc = (u32) (slot - mvi->slot_info);
-		mvs_slot_complete(mvi, rx_desc, 1);
+static  struct sas_task *mvs_alloc_task(void)
+{
+	struct sas_task *task = kzalloc(sizeof(struct sas_task), GFP_KERNEL);
+
+	if (task) {
+		INIT_LIST_HEAD(&task->list);
+		spin_lock_init(&task->task_state_lock);
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_timer(&task->timer);
+		init_completion(&task->completion);
 	}
+	return task;
 }
 
-static void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
+static  void mvs_free_task(struct sas_task *task)
 {
-	struct pci_dev *pdev = mvi->pdev;
-	struct sas_ha_struct *sas_ha = &mvi->sas;
-	struct mvs_phy *phy = &mvi->phy[phy_no];
-	struct asd_sas_phy *sas_phy = &phy->sas_phy;
-
-	phy->irq_status = mvs_read_port_irq_stat(mvi, phy_no);
-	/*
-	* events is port event now ,
-	* we need check the interrupt status which belongs to per port.
-	*/
-	dev_printk(KERN_DEBUG, &pdev->dev,
-		"Port %d Event = %X\n",
-		phy_no, phy->irq_status);
-
-	if (phy->irq_status & (PHYEV_POOF | PHYEV_DEC_ERR)) {
-		mvs_release_task(mvi, phy_no);
-		if (!mvs_is_phy_ready(mvi, phy_no)) {
-			sas_phy_disconnected(sas_phy);
-			sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL);
-			dev_printk(KERN_INFO, &pdev->dev,
-				"Port %d Unplug Notice\n", phy_no);
-
-		} else
-			mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET, NULL);
+	if (task) {
+		BUG_ON(!list_empty(&task->list));
+		kfree(task);
 	}
-	if (!(phy->irq_status & PHYEV_DEC_ERR)) {
-		if (phy->irq_status & PHYEV_COMWAKE) {
-			u32 tmp = mvs_read_port_irq_mask(mvi, phy_no);
-			mvs_write_port_irq_mask(mvi, phy_no,
-						tmp | PHYEV_SIG_FIS);
-		}
-		if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
-			phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
-			if (phy->phy_status) {
-				mvs_detect_porttype(mvi, phy_no);
-
-				if (phy->phy_type & PORT_TYPE_SATA) {
-					u32 tmp = mvs_read_port_irq_mask(mvi,
-								phy_no);
-					tmp &= ~PHYEV_SIG_FIS;
-					mvs_write_port_irq_mask(mvi,
-								phy_no, tmp);
-				}
+}
 
-				mvs_update_phyinfo(mvi, phy_no, 0);
-				sas_ha->notify_phy_event(sas_phy,
-							PHYE_OOB_DONE);
-				mvs_bytes_dmaed(mvi, phy_no);
-			} else {
-				dev_printk(KERN_DEBUG, &pdev->dev,
-					"plugin interrupt but phy is gone\n");
-				mvs_phy_control(sas_phy, PHY_FUNC_LINK_RESET,
-							NULL);
-			}
-		} else if (phy->irq_status & PHYEV_BROAD_CH) {
-			mvs_release_task(mvi, phy_no);
-			sas_ha->notify_port_event(sas_phy,
-						PORTE_BROADCAST_RCVD);
-		}
-	}
-	mvs_write_port_irq_stat(mvi, phy_no, phy->irq_status);
+static void mvs_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
 }
 
-static int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
+static void mvs_tmf_timedout(unsigned long data)
 {
-	void __iomem *regs = mvi->regs;
-	u32 rx_prod_idx, rx_desc;
-	bool attn = false;
-	struct pci_dev *pdev = mvi->pdev;
+	struct sas_task *task = (struct sas_task *)data;
 
-	/* the first dword in the RX ring is special: it contains
-	 * a mirror of the hardware's RX producer index, so that
-	 * we don't have to stall the CPU reading that register.
-	 * The actual RX ring is offset by one dword, due to this.
-	 */
-	rx_prod_idx = mvi->rx_cons;
-	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
-	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
-		return 0;
+	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	complete(&task->completion);
+}
 
-	/* The CMPL_Q may come late, read from register and try again
-	* note: if coalescing is enabled,
-	* it will need to read from register every time for sure
-	*/
-	if (mvi->rx_cons == rx_prod_idx)
-		mvi->rx_cons = mr32(RX_CONS_IDX) & RX_RING_SZ_MASK;
+/* XXX */
+#define MVS_TASK_TIMEOUT 20
+static int mvs_exec_internal_tmf_task(struct domain_device *dev,
+			void *parameter, u32 para_len, struct mvs_tmf_task *tmf)
+{
+	int res, retry;
+	struct sas_task *task = NULL;
 
-	if (mvi->rx_cons == rx_prod_idx)
-		return 0;
+	for (retry = 0; retry < 3; retry++) {
+		task = mvs_alloc_task();
+		if (!task)
+			return -ENOMEM;
 
-	while (mvi->rx_cons != rx_prod_idx) {
+		task->dev = dev;
+		task->task_proto = dev->tproto;
 
-		/* increment our internal RX consumer pointer */
-		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
+		memcpy(&task->ssp_task, parameter, para_len);
+		task->task_done = mvs_task_done;
 
-		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
+		task->timer.data = (unsigned long) task;
+		task->timer.function = mvs_tmf_timedout;
+		task->timer.expires = jiffies + MVS_TASK_TIMEOUT*HZ;
+		add_timer(&task->timer);
 
-		if (likely(rx_desc & RXQ_DONE))
-			mvs_slot_complete(mvi, rx_desc, 0);
-		if (rx_desc & RXQ_ATTN) {
-			attn = true;
-			dev_printk(KERN_DEBUG, &pdev->dev, "ATTN %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_ERR) {
-			if (!(rx_desc & RXQ_DONE))
-				mvs_slot_complete(mvi, rx_desc, 0);
-			dev_printk(KERN_DEBUG, &pdev->dev, "RXQ_ERR %X\n",
-				rx_desc);
-		} else if (rx_desc & RXQ_SLOT_RESET) {
-			dev_printk(KERN_DEBUG, &pdev->dev, "Slot reset[%X]\n",
-				rx_desc);
-			mvs_slot_free(mvi, rx_desc);
-		}
-	}
+		res = mvs_task_exec(task, 1, GFP_KERNEL, NULL, 0, 1, tmf);
 
-	if (attn && self_clear)
-		mvs_int_full(mvi);
+		if (res) {
+			del_timer(&task->timer);
+			mv_printk("executing internel task failed:%d\n", res);
+			goto ex_err;
+		}
 
-	return 0;
-}
+		wait_for_completion(&task->completion);
+		res = -TMF_RESP_FUNC_FAILED;
+		/* Even TMF timed out, return direct. */
+		if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+				mv_printk("TMF task[%x] timeout.\n", tmf->tmf);
+				goto ex_err;
+			}
+		}
 
-#ifndef MVS_DISABLE_NVRAM
-static int mvs_eep_read(void __iomem *regs, u32 addr, u32 *data)
-{
-	int timeout = 1000;
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		    task->task_status.stat == SAM_GOOD) {
+			res = TMF_RESP_FUNC_COMPLETE;
+			break;
+		}
 
-	if (addr & ~SPI_ADDR_MASK)
-		return -EINVAL;
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		      task->task_status.stat == SAS_DATA_UNDERRUN) {
+			/* no error, but return the number of bytes of
+			 * underrun */
+			res = task->task_status.residual;
+			break;
+		}
 
-	writel(addr, regs + SPI_CMD);
-	writel(TWSI_RD, regs + SPI_CTL);
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		      task->task_status.stat == SAS_DATA_OVERRUN) {
+			mv_dprintk("blocked task error.\n");
+			res = -EMSGSIZE;
+			break;
+		} else {
+			mv_dprintk(" task to dev %016llx response: 0x%x "
+				    "status 0x%x\n",
+				    SAS_ADDR(dev->sas_addr),
+				    task->task_status.resp,
+				    task->task_status.stat);
+			mvs_free_task(task);
+			task = NULL;
 
-	while (timeout-- > 0) {
-		if (readl(regs + SPI_CTL) & TWSI_RDY) {
-			*data = readl(regs + SPI_DATA);
-			return 0;
 		}
-
-		udelay(10);
 	}
-
-	return -EBUSY;
+ex_err:
+	BUG_ON(retry == 3 && task != NULL);
+	if (task != NULL)
+		mvs_free_task(task);
+	return res;
 }
 
-static int mvs_eep_read_buf(void __iomem *regs, u32 addr,
-			    void *buf, u32 buflen)
+static int mvs_debug_issue_ssp_tmf(struct domain_device *dev,
+				u8 *lun, struct mvs_tmf_task *tmf)
 {
-	u32 addr_end, tmp_addr, i, j;
-	u32 tmp = 0;
-	int rc;
-	u8 *tmp8, *buf8 = buf;
+	struct sas_ssp_task ssp_task;
+	DECLARE_COMPLETION_ONSTACK(completion);
+	if (!(dev->tproto & SAS_PROTOCOL_SSP))
+		return TMF_RESP_FUNC_ESUPP;
 
-	addr_end = addr + buflen;
-	tmp_addr = ALIGN(addr, 4);
-	if (addr > 0xff)
-		return -EINVAL;
+	strncpy((u8 *)&ssp_task.LUN, lun, 8);
 
-	j = addr & 0x3;
-	if (j) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
-
-		tmp8 = (u8 *)&tmp;
-		for (i = j; i < 4; i++)
-			*buf8++ = tmp8[i];
+	return mvs_exec_internal_tmf_task(dev, &ssp_task,
+				sizeof(ssp_task), tmf);
+}
 
-		tmp_addr += 4;
-	}
 
-	for (j = ALIGN(addr_end, 4); tmp_addr < j; tmp_addr += 4) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
-			return rc;
+/*  Standard mandates link reset for ATA  (type 0)
+    and hard reset for SSP (type 1) , only for RECOVERY */
+static int mvs_debug_I_T_nexus_reset(struct domain_device *dev)
+{
+	int rc;
+	struct sas_phy *phy = sas_find_local_phy(dev);
+	int reset_type = (dev->dev_type == SATA_DEV ||
+			(dev->tproto & SAS_PROTOCOL_STP)) ? 0 : 1;
+	rc = sas_phy_reset(phy, reset_type);
+	msleep(2000);
+	return rc;
+}
 
-		memcpy(buf8, &tmp, 4);
-		buf8 += 4;
+/* mandatory SAM-3 */
+int mvs_lu_reset(struct domain_device *dev, u8 *lun)
+{
+	unsigned long flags;
+	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
+	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+	struct mvs_device * mvi_dev = (struct mvs_device *)dev->lldd_dev;
+
+	tmf_task.tmf = TMF_LU_RESET;
+	mvi_dev->dev_status = MVS_DEV_EH;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
+	if (rc == TMF_RESP_FUNC_COMPLETE) {
+		num = mvs_find_dev_phyno(dev, phyno);
+		spin_lock_irqsave(&mvi->lock, flags);
+		for (i = 0; i < num; i++)
+			mvs_release_task(mvi, phyno[i], dev);
+		spin_unlock_irqrestore(&mvi->lock, flags);
 	}
+	/* If failed, fall-through I_T_Nexus reset */
+	mv_printk("%s for device[%x]:rc= %d\n", __func__,
+			mvi_dev->device_id, rc);
+	return rc;
+}
 
-	if (tmp_addr < addr_end) {
-		rc = mvs_eep_read(regs, tmp_addr, &tmp);
-		if (rc)
+int mvs_I_T_nexus_reset(struct domain_device *dev)
+{
+	unsigned long flags;
+	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+
+	if (mvi_dev->dev_status != MVS_DEV_EH)
+		return TMF_RESP_FUNC_COMPLETE;
+	rc = mvs_debug_I_T_nexus_reset(dev);
+	mv_printk("%s for device[%x]:rc= %d\n",
+		__func__, mvi_dev->device_id, rc);
+
+	/* housekeeper */
+	num = mvs_find_dev_phyno(dev, phyno);
+	spin_lock_irqsave(&mvi->lock, flags);
+	for (i = 0; i < num; i++)
+		mvs_release_task(mvi, phyno[i], dev);
+	spin_unlock_irqrestore(&mvi->lock, flags);
+
+	return rc;
+}
+/* optional SAM-3 */
+int mvs_query_task(struct sas_task *task)
+{
+	u32 tag;
+	struct scsi_lun lun;
+	struct mvs_tmf_task tmf_task;
+	int rc = TMF_RESP_FUNC_FAILED;
+
+	if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
+		struct scsi_cmnd * cmnd = (struct scsi_cmnd *)task->uldd_task;
+		struct domain_device *dev = task->dev;
+		struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+
+		int_to_scsilun(cmnd->device->lun, &lun);
+		rc = mvs_find_tag(mvi, task, &tag);
+		if (rc == 0) {
+			rc = TMF_RESP_FUNC_FAILED;
 			return rc;
+		}
 
-		tmp8 = (u8 *)&tmp;
-		j = addr_end - tmp_addr;
-		for (i = 0; i < j; i++)
-			*buf8++ = tmp8[i];
+		tmf_task.tmf = TMF_QUERY_TASK;
+		tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag);
 
-		tmp_addr += 4;
+		rc = mvs_debug_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task);
+		switch (rc) {
+		/* The task is still in Lun, release it then */
+		case TMF_RESP_FUNC_SUCC:
+		/* The task is not in Lun or failed, reset the phy */
+		case TMF_RESP_FUNC_FAILED:
+		case TMF_RESP_FUNC_COMPLETE:
+			break;
+		}
 	}
-
-	return 0;
+	mv_printk("%s:rc= %d\n", __func__, rc);
+	return rc;
 }
-#endif
 
-int mvs_nvram_read(struct mvs_info *mvi, u32 addr, void *buf, u32 buflen)
+/*  mandatory SAM-3, still need free task/slot info */
+int mvs_abort_task(struct sas_task *task)
 {
-#ifndef MVS_DISABLE_NVRAM
-	void __iomem *regs = mvi->regs;
-	int rc, i;
-	u32 sum;
-	u8 hdr[2], *tmp;
-	const char *msg;
-
-	rc = mvs_eep_read_buf(regs, addr, &hdr, 2);
-	if (rc) {
-		msg = "nvram hdr read failed";
-		goto err_out;
-	}
-	rc = mvs_eep_read_buf(regs, addr + 2, buf, buflen);
-	if (rc) {
-		msg = "nvram read failed";
-		goto err_out;
+	struct scsi_lun lun;
+	struct mvs_tmf_task tmf_task;
+	struct domain_device *dev = task->dev;
+	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+	int rc = TMF_RESP_FUNC_FAILED;
+	unsigned long flags;
+	u32 tag;
+	if (mvi->exp_req)
+		mvi->exp_req--;
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		rc = TMF_RESP_FUNC_COMPLETE;
+		goto out;
 	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+	if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
+		struct scsi_cmnd * cmnd = (struct scsi_cmnd *)task->uldd_task;
+
+		int_to_scsilun(cmnd->device->lun, &lun);
+		rc = mvs_find_tag(mvi, task, &tag);
+		if (rc == 0) {
+			mv_printk("No such tag in %s\n", __func__);
+			rc = TMF_RESP_FUNC_FAILED;
+			return rc;
+		}
 
-	if (hdr[0] != 0x5A) {
-		/* entry id */
-		msg = "invalid nvram entry id";
-		rc = -ENOENT;
-		goto err_out;
-	}
+		tmf_task.tmf = TMF_ABORT_TASK;
+		tmf_task.tag_of_task_to_be_managed = cpu_to_le16(tag);
 
-	tmp = buf;
-	sum = ((u32)hdr[0]) + ((u32)hdr[1]);
-	for (i = 0; i < buflen; i++)
-		sum += ((u32)tmp[i]);
+		rc = mvs_debug_issue_ssp_tmf(dev, lun.scsi_lun, &tmf_task);
 
-	if (sum) {
-		msg = "nvram checksum failure";
-		rc = -EILSEQ;
-		goto err_out;
-	}
+		/* if successful, clear the task and callback forwards.*/
+		if (rc == TMF_RESP_FUNC_COMPLETE) {
+			u32 slot_no;
+			struct mvs_slot_info *slot;
+			struct mvs_info *mvi = mvs_find_dev_mvi(dev);
 
-	return 0;
+			if (task->lldd_task) {
+				slot = (struct mvs_slot_info *)task->lldd_task;
+				slot_no = (u32) (slot - mvi->slot_info);
+				mvs_slot_complete(mvi, slot_no, 1);
+			}
+		}
+	} else if (task->task_proto & SAS_PROTOCOL_SATA ||
+		task->task_proto & SAS_PROTOCOL_STP) {
+		/* to do free register_set */
+	} else {
+		/* SMP */
 
-err_out:
-	dev_printk(KERN_ERR, &mvi->pdev->dev, "%s", msg);
+	}
+out:
+	if (rc != TMF_RESP_FUNC_COMPLETE)
+		mv_printk("%s:rc= %d\n", __func__, rc);
 	return rc;
-#else
-	/* FIXME , For SAS target mode */
-	memcpy(buf, "\x50\x05\x04\x30\x11\xab\x00\x00", 8);
-	return 0;
-#endif
 }
 
-static void mvs_int_sata(struct mvs_info *mvi)
+int mvs_abort_task_set(struct domain_device *dev, u8 *lun)
 {
-	u32 tmp;
-	void __iomem *regs = mvi->regs;
-	tmp = mr32(INT_STAT_SRS);
-	mw32(INT_STAT_SRS, tmp & 0xFFFF);
-}
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
 
-static void mvs_slot_reset(struct mvs_info *mvi, struct sas_task *task,
-				u32 slot_idx)
-{
-	void __iomem *regs = mvi->regs;
-	struct domain_device *dev = task->dev;
-	struct asd_sas_port *sas_port = dev->port;
-	struct mvs_port *port = mvi->slot_info[slot_idx].port;
-	u32 reg_set, phy_mask;
-
-	if (!sas_protocol_ata(task->task_proto)) {
-		reg_set = 0;
-		phy_mask = (port->wide_port_phymap) ? port->wide_port_phymap :
-				sas_port->phy_mask;
-	} else {
-		reg_set = port->taskfileset;
-		phy_mask = sas_port->phy_mask;
-	}
-	mvi->tx[mvi->tx_prod] = cpu_to_le32(TXQ_MODE_I | slot_idx |
-					(TXQ_CMD_SLOT_RESET << TXQ_CMD_SHIFT) |
-					(phy_mask << TXQ_PHY_SHIFT) |
-					(reg_set << TXQ_SRS_SHIFT));
+	tmf_task.tmf = TMF_ABORT_TASK_SET;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
 
-	mw32(TX_PROD_IDX, mvi->tx_prod);
-	mvi->tx_prod = (mvi->tx_prod + 1) & (MVS_CHIP_SLOT_SZ - 1);
+	return rc;
 }
 
-void mvs_int_full(struct mvs_info *mvi)
+int mvs_clear_aca(struct domain_device *dev, u8 *lun)
 {
-	void __iomem *regs = mvi->regs;
-	u32 tmp, stat;
-	int i;
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
 
-	stat = mr32(INT_STAT);
+	tmf_task.tmf = TMF_CLEAR_ACA;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
 
-	mvs_int_rx(mvi, false);
+	return rc;
+}
 
-	for (i = 0; i < MVS_MAX_PORTS; i++) {
-		tmp = (stat >> i) & (CINT_PORT | CINT_PORT_STOPPED);
-		if (tmp)
-			mvs_int_port(mvi, i, tmp);
-	}
+int mvs_clear_task_set(struct domain_device *dev, u8 *lun)
+{
+	int rc = TMF_RESP_FUNC_FAILED;
+	struct mvs_tmf_task tmf_task;
 
-	if (stat & CINT_SRS)
-		mvs_int_sata(mvi);
+	tmf_task.tmf = TMF_CLEAR_TASK_SET;
+	rc = mvs_debug_issue_ssp_tmf(dev, lun, &tmf_task);
 
-	mw32(INT_STAT, stat);
+	return rc;
 }
 
-#ifndef MVS_DISABLE_MSI
-static irqreturn_t mvs_msi_interrupt(int irq, void *opaque)
+static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
+			u32 slot_idx, int err)
 {
-	struct mvs_info *mvi = opaque;
-
-#ifndef MVS_USE_TASKLET
-	spin_lock(&mvi->lock);
+	struct mvs_device *mvi_dev = (struct mvs_device *)task->dev->lldd_dev;
+	struct task_status_struct *tstat = &task->task_status;
+	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
+	int stat = SAM_GOOD;
 
-	mvs_int_rx(mvi, true);
 
-	spin_unlock(&mvi->lock);
-#else
-	tasklet_schedule(&mvi->tasklet);
-#endif
-	return IRQ_HANDLED;
+	resp->frame_len = sizeof(struct dev_to_host_fis);
+	memcpy(&resp->ending_fis[0],
+	       SATA_RECEIVED_D2H_FIS(mvi_dev->taskfileset),
+	       sizeof(struct dev_to_host_fis));
+	tstat->buf_valid_size = sizeof(*resp);
+	if (unlikely(err))
+		stat = SAS_PROTO_RESPONSE;
+	return stat;
 }
-#endif
 
-int mvs_task_abort(struct sas_task *task)
+static int mvs_slot_err(struct mvs_info *mvi, struct sas_task *task,
+			 u32 slot_idx)
 {
-	int rc;
-	unsigned long flags;
-	struct mvs_info *mvi = task->dev->port->ha->lldd_ha;
-	struct pci_dev *pdev = mvi->pdev;
-	int tag;
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	int stat;
+	u32 err_dw0 = le32_to_cpu(*(u32 *) (slot->response));
+	u32 tfs = 0;
+	enum mvs_port_type type = PORT_TYPE_SAS;
 
-	spin_lock_irqsave(&task->task_state_lock, flags);
-	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
-		rc = TMF_RESP_FUNC_COMPLETE;
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		goto out_done;
-	}
-	spin_unlock_irqrestore(&task->task_state_lock, flags);
+	if (err_dw0 & CMD_ISS_STPD)
+		MVS_CHIP_DISP->issue_stop(mvi, type, tfs);
+
+	MVS_CHIP_DISP->command_active(mvi, slot_idx);
 
+	stat = SAM_CHECK_COND;
 	switch (task->task_proto) {
-	case SAS_PROTOCOL_SMP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SMP Abort! \n");
-		break;
 	case SAS_PROTOCOL_SSP:
-		dev_printk(KERN_DEBUG, &pdev->dev, "SSP Abort! \n");
+		stat = SAS_ABORTED_TASK;
+		break;
+	case SAS_PROTOCOL_SMP:
+		stat = SAM_CHECK_COND;
 		break;
+
 	case SAS_PROTOCOL_SATA:
 	case SAS_PROTOCOL_STP:
-	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:{
-		dev_printk(KERN_DEBUG, &pdev->dev, "STP Abort! \n");
-#if _MV_DUMP
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump D2H FIS: \n");
-		mvs_hexdump(sizeof(struct host_to_dev_fis),
-				(void *)&task->ata_task.fis, 0);
-		dev_printk(KERN_DEBUG, &pdev->dev, "Dump ATAPI Cmd : \n");
-		mvs_hexdump(16, task->ata_task.atapi_packet, 0);
-#endif
-		spin_lock_irqsave(&task->task_state_lock, flags);
-		if (task->task_state_flags & SAS_TASK_NEED_DEV_RESET) {
-			/* TODO */
-			;
-		}
-		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		break;
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
+	{
+		if (err_dw0 == 0x80400002)
+			mv_printk("find reserved error, why?\n");
+
+		task->ata_task.use_ncq = 0;
+		stat = SAS_PROTO_RESPONSE;
+		mvs_sata_done(mvi, task, slot_idx, 1);
+
 	}
+		break;
 	default:
 		break;
 	}
 
-	if (mvs_find_tag(mvi, task, &tag)) {
-		spin_lock_irqsave(&mvi->lock, flags);
-		mvs_slot_task_free(mvi, task, &mvi->slot_info[tag], tag);
-		spin_unlock_irqrestore(&mvi->lock, flags);
-	}
-	if (!mvs_task_exec(task, 1, GFP_ATOMIC))
-		rc = TMF_RESP_FUNC_COMPLETE;
-	else
-		rc = TMF_RESP_FUNC_FAILED;
-out_done:
-	return rc;
+	return stat;
 }
 
-int __devinit mvs_hw_init(struct mvs_info *mvi)
+int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 {
-	void __iomem *regs = mvi->regs;
-	int i;
-	u32 tmp, cctl;
-
-	/* make sure interrupts are masked immediately (paranoia) */
-	mw32(GBL_CTL, 0);
-	tmp = mr32(GBL_CTL);
-
-	/* Reset Controller */
-	if (!(tmp & HBA_RST)) {
-		if (mvi->flags & MVF_PHY_PWR_FIX) {
-			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
-			tmp &= ~PCTL_PWR_ON;
-			tmp |= PCTL_OFF;
-			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
-
-			pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
-			tmp &= ~PCTL_PWR_ON;
-			tmp |= PCTL_OFF;
-			pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
-		}
+	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
+	struct mvs_slot_info *slot = &mvi->slot_info[slot_idx];
+	struct sas_task *task = slot->task;
+	struct mvs_device *mvi_dev = NULL;
+	struct task_status_struct *tstat;
+
+	bool aborted;
+	void *to;
+	enum exec_status sts;
+
+	if (mvi->exp_req)
+		mvi->exp_req--;
+	if (unlikely(!task || !task->lldd_task))
+		return -1;
+
+	tstat = &task->task_status;
+	mvi_dev = (struct mvs_device *)task->dev->lldd_dev;
 
-		/* global reset, incl. COMRESET/H_RESET_N (self-clearing) */
-		mw32_f(GBL_CTL, HBA_RST);
+	mvs_hba_cq_dump(mvi);
+
+	spin_lock(&task->task_state_lock);
+	task->task_state_flags &=
+		~(SAS_TASK_STATE_PENDING | SAS_TASK_AT_INITIATOR);
+	task->task_state_flags |= SAS_TASK_STATE_DONE;
+	/* race condition*/
+	aborted = task->task_state_flags & SAS_TASK_STATE_ABORTED;
+	spin_unlock(&task->task_state_lock);
+
+	memset(tstat, 0, sizeof(*tstat));
+	tstat->resp = SAS_TASK_COMPLETE;
+
+	if (unlikely(aborted)) {
+		tstat->stat = SAS_ABORTED_TASK;
+		if (mvi_dev)
+			mvi_dev->runing_req--;
+		if (sas_protocol_ata(task->task_proto))
+			mvs_free_reg_set(mvi, mvi_dev);
+
+		mvs_slot_task_free(mvi, task, slot, slot_idx);
+		return -1;
 	}
 
-	/* wait for reset to finish; timeout is just a guess */
-	i = 1000;
-	while (i-- > 0) {
-		msleep(10);
+	if (unlikely(!mvi_dev || !slot->port->port_attached || flags)) {
+		mv_dprintk("port has not device.\n");
+		tstat->stat = SAS_PHY_DOWN;
+		goto out;
+	}
 
-		if (!(mr32(GBL_CTL) & HBA_RST))
-			break;
+	/*
+	if (unlikely((rx_desc & RXQ_ERR) || (*(u64 *) slot->response))) {
+		 mv_dprintk("Find device[%016llx] RXQ_ERR %X,
+		 err info:%016llx\n",
+		 SAS_ADDR(task->dev->sas_addr),
+		 rx_desc, (u64)(*(u64 *) slot->response));
 	}
-	if (mr32(GBL_CTL) & HBA_RST) {
-		dev_printk(KERN_ERR, &mvi->pdev->dev, "HBA reset failed\n");
-		return -EBUSY;
+	*/
+
+	/* error info record present */
+	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
+		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
+		goto out;
 	}
 
-	/* Init Chip */
-	/* make sure RST is set; HBA_RST /should/ have done that for us */
-	cctl = mr32(CTL);
-	if (cctl & CCTL_RST)
-		cctl &= ~CCTL_RST;
-	else
-		mw32_f(CTL, cctl | CCTL_RST);
+	switch (task->task_proto) {
+	case SAS_PROTOCOL_SSP:
+		/* hw says status == 0, datapres == 0 */
+		if (rx_desc & RXQ_GOOD) {
+			tstat->stat = SAM_GOOD;
+			tstat->resp = SAS_TASK_COMPLETE;
+		}
+		/* response frame present */
+		else if (rx_desc & RXQ_RSP) {
+			struct ssp_response_iu *iu = slot->response +
+						sizeof(struct mvs_err_info);
+			sas_ssp_task_response(mvi->dev, task, iu);
+		} else
+			tstat->stat = SAM_CHECK_COND;
+		break;
 
-	/* write to device control _AND_ device status register? - A.C. */
-	pci_read_config_dword(mvi->pdev, PCR_DEV_CTRL, &tmp);
-	tmp &= ~PRD_REQ_MASK;
-	tmp |= PRD_REQ_SIZE;
-	pci_write_config_dword(mvi->pdev, PCR_DEV_CTRL, tmp);
+	case SAS_PROTOCOL_SMP: {
+			struct scatterlist *sg_resp = &task->smp_task.smp_resp;
+			tstat->stat = SAM_GOOD;
+			to = kmap_atomic(sg_page(sg_resp), KM_IRQ0);
+			memcpy(to + sg_resp->offset,
+				slot->response + sizeof(struct mvs_err_info),
+				sg_dma_len(sg_resp));
+			kunmap_atomic(to, KM_IRQ0);
+			break;
+		}
 
-	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL, &tmp);
-	tmp |= PCTL_PWR_ON;
-	tmp &= ~PCTL_OFF;
-	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL, tmp);
+	case SAS_PROTOCOL_SATA:
+	case SAS_PROTOCOL_STP:
+	case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: {
+			tstat->stat = mvs_sata_done(mvi, task, slot_idx, 0);
+			break;
+		}
 
-	pci_read_config_dword(mvi->pdev, PCR_PHY_CTL2, &tmp);
-	tmp |= PCTL_PWR_ON;
-	tmp &= ~PCTL_OFF;
-	pci_write_config_dword(mvi->pdev, PCR_PHY_CTL2, tmp);
+	default:
+		tstat->stat = SAM_CHECK_COND;
+		break;
+	}
 
-	mw32_f(CTL, cctl);
+out:
+	if (mvi_dev)
+		mvi_dev->runing_req--;
+	if (sas_protocol_ata(task->task_proto))
+		mvs_free_reg_set(mvi, mvi_dev);
 
-	/* reset control */
-	mw32(PCS, 0);		/*MVS_PCS */
+	mvs_slot_task_free(mvi, task, slot, slot_idx);
+	sts = tstat->stat;
 
-	mvs_phy_hacks(mvi);
+	spin_unlock(&mvi->lock);
+	if (task->task_done)
+		task->task_done(task);
+	else
+		mv_dprintk("why has not task_done.\n");
+	spin_lock(&mvi->lock);
 
-	mw32(CMD_LIST_LO, mvi->slot_dma);
-	mw32(CMD_LIST_HI, (mvi->slot_dma >> 16) >> 16);
+	return sts;
+}
 
-	mw32(RX_FIS_LO, mvi->rx_fis_dma);
-	mw32(RX_FIS_HI, (mvi->rx_fis_dma >> 16) >> 16);
+void mvs_release_task(struct mvs_info *mvi,
+		int phy_no, struct domain_device *dev)
+{
+	int i = 0; u32 slot_idx;
+	struct mvs_phy *phy;
+	struct mvs_port *port;
+	struct mvs_slot_info *slot, *slot2;
 
-	mw32(TX_CFG, MVS_CHIP_SLOT_SZ);
-	mw32(TX_LO, mvi->tx_dma);
-	mw32(TX_HI, (mvi->tx_dma >> 16) >> 16);
+	phy = &mvi->phy[phy_no];
+	port = phy->port;
+	if (!port)
+		return;
 
-	mw32(RX_CFG, MVS_RX_RING_SZ);
-	mw32(RX_LO, mvi->rx_dma);
-	mw32(RX_HI, (mvi->rx_dma >> 16) >> 16);
+	list_for_each_entry_safe(slot, slot2, &port->list, entry) {
+		struct sas_task *task;
+		slot_idx = (u32) (slot - mvi->slot_info);
+		task = slot->task;
 
-	/* enable auto port detection */
-	mw32(GBL_PORT_TYPE, MODE_AUTO_DET_EN);
-	msleep(1100);
-	/* init and reset phys */
-	for (i = 0; i < mvi->chip->n_phy; i++) {
-		u32 lo = be32_to_cpu(*(u32 *)&mvi->sas_addr[4]);
-		u32 hi = be32_to_cpu(*(u32 *)&mvi->sas_addr[0]);
+		if (dev && task->dev != dev)
+			continue;
 
-		mvs_detect_porttype(mvi, i);
+		mv_printk("Release slot [%x] tag[%x], task [%p]:\n",
+			slot_idx, slot->slot_tag, task);
 
-		/* set phy local SAS address */
-		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_LO);
-		mvs_write_port_cfg_data(mvi, i, lo);
-		mvs_write_port_cfg_addr(mvi, i, PHYR_ADDR_HI);
-		mvs_write_port_cfg_data(mvi, i, hi);
+		if (task->task_proto & SAS_PROTOCOL_SSP) {
+			mv_printk("attached with SSP task CDB[");
+			for (i = 0; i < 16; i++)
+				mv_printk(" %02x", task->ssp_task.cdb[i]);
+			mv_printk(" ]\n");
+		}
 
-		/* reset phy */
-		tmp = mvs_read_phy_ctl(mvi, i);
-		tmp |= PHY_RST;
-		mvs_write_phy_ctl(mvi, i, tmp);
+		mvs_slot_complete(mvi, slot_idx, 1);
 	}
+}
 
-	msleep(100);
+static void mvs_phy_disconnected(struct mvs_phy *phy)
+{
+	phy->phy_attached = 0;
+	phy->att_dev_info = 0;
+	phy->att_dev_sas_addr = 0;
+}
+
+static void mvs_work_queue(struct work_struct *work)
+{
+	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct mvs_wq *mwq = container_of(dw, struct mvs_wq, work_q);
+	struct mvs_info *mvi = mwq->mvi;
+	unsigned long flags;
 
-	for (i = 0; i < mvi->chip->n_phy; i++) {
-		/* clear phy int status */
-		tmp = mvs_read_port_irq_stat(mvi, i);
-		tmp &= ~PHYEV_SIG_FIS;
-		mvs_write_port_irq_stat(mvi, i, tmp);
+	spin_lock_irqsave(&mvi->lock, flags);
+	if (mwq->handler & PHY_PLUG_EVENT) {
+		u32 phy_no = (unsigned long) mwq->data;
+		struct sas_ha_struct *sas_ha = mvi->sas;
+		struct mvs_phy *phy = &mvi->phy[phy_no];
+		struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+		if (phy->phy_event & PHY_PLUG_OUT) {
+			u32 tmp;
+			struct sas_identify_frame *id;
+			id = (struct sas_identify_frame *)phy->frame_rcvd;
+			tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no);
+			phy->phy_event &= ~PHY_PLUG_OUT;
+			if (!(tmp & PHY_READY_MASK)) {
+				sas_phy_disconnected(sas_phy);
+				mvs_phy_disconnected(phy);
+				sas_ha->notify_phy_event(sas_phy,
+					PHYE_LOSS_OF_SIGNAL);
+				mv_dprintk("phy%d Removed Device\n", phy_no);
+			} else {
+				MVS_CHIP_DISP->detect_porttype(mvi, phy_no);
+				mvs_update_phyinfo(mvi, phy_no, 1);
+				mvs_bytes_dmaed(mvi, phy_no);
+				mvs_port_notify_formed(sas_phy, 0);
+				mv_dprintk("phy%d Attached Device\n", phy_no);
+			}
+		}
+	}
+	list_del(&mwq->entry);
+	spin_unlock_irqrestore(&mvi->lock, flags);
+	kfree(mwq);
+}
 
-		/* set phy int mask */
-		tmp = PHYEV_RDY_CH | PHYEV_BROAD_CH | PHYEV_UNASSOC_FIS |
-			PHYEV_ID_DONE | PHYEV_DEC_ERR;
-		mvs_write_port_irq_mask(mvi, i, tmp);
+static int mvs_handle_event(struct mvs_info *mvi, void *data, int handler)
+{
+	struct mvs_wq *mwq;
+	int ret = 0;
+
+	mwq = kmalloc(sizeof(struct mvs_wq), GFP_ATOMIC);
+	if (mwq) {
+		mwq->mvi = mvi;
+		mwq->data = data;
+		mwq->handler = handler;
+		MV_INIT_DELAYED_WORK(&mwq->work_q, mvs_work_queue, mwq);
+		list_add_tail(&mwq->entry, &mvi->wq_list);
+		schedule_delayed_work(&mwq->work_q, HZ * 2);
+	} else
+		ret = -ENOMEM;
+
+	return ret;
+}
 
-		msleep(100);
-		mvs_update_phyinfo(mvi, i, 1);
-		mvs_enable_xmt(mvi, i);
+static void mvs_sig_time_out(unsigned long tphy)
+{
+	struct mvs_phy *phy = (struct mvs_phy *)tphy;
+	struct mvs_info *mvi = phy->mvi;
+	u8 phy_no;
+
+	for (phy_no = 0; phy_no < mvi->chip->n_phy; phy_no++) {
+		if (&mvi->phy[phy_no] == phy) {
+			mv_dprintk("Get signature time out, reset phy %d\n",
+				phy_no+mvi->id*mvi->chip->n_phy);
+			MVS_CHIP_DISP->phy_reset(mvi, phy_no, 1);
+		}
 	}
+}
 
-	/* FIXME: update wide port bitmaps */
+static void mvs_sig_remove_timer(struct mvs_phy *phy)
+{
+	if (phy->timer.function)
+		del_timer(&phy->timer);
+	phy->timer.function = NULL;
+}
 
-	/* little endian for open address and command table, etc. */
-	/* A.C.
-	 * it seems that ( from the spec ) turning on big-endian won't
-	 * do us any good on big-endian machines, need further confirmation
-	 */
-	cctl = mr32(CTL);
-	cctl |= CCTL_ENDIAN_CMD;
-	cctl |= CCTL_ENDIAN_DATA;
-	cctl &= ~CCTL_ENDIAN_OPEN;
-	cctl |= CCTL_ENDIAN_RSP;
-	mw32_f(CTL, cctl);
-
-	/* reset CMD queue */
-	tmp = mr32(PCS);
-	tmp |= PCS_CMD_RST;
-	mw32(PCS, tmp);
-	/* interrupt coalescing may cause missing HW interrput in some case,
-	 * and the max count is 0x1ff, while our max slot is 0x200,
-	 * it will make count 0.
-	 */
-	tmp = 0;
-	mw32(INT_COAL, tmp);
+void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events)
+{
+	u32 tmp;
+	struct sas_ha_struct *sas_ha = mvi->sas;
+	struct mvs_phy *phy = &mvi->phy[phy_no];
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
 
-	tmp = 0x100;
-	mw32(INT_COAL_TMOUT, tmp);
+	phy->irq_status = MVS_CHIP_DISP->read_port_irq_stat(mvi, phy_no);
+	mv_dprintk("port %d ctrl sts=0x%X.\n", phy_no+mvi->id*mvi->chip->n_phy,
+		MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no));
+	mv_dprintk("Port %d irq sts = 0x%X\n", phy_no+mvi->id*mvi->chip->n_phy,
+		phy->irq_status);
 
-	/* ladies and gentlemen, start your engines */
-	mw32(TX_CFG, 0);
-	mw32(TX_CFG, MVS_CHIP_SLOT_SZ | TX_EN);
-	mw32(RX_CFG, MVS_RX_RING_SZ | RX_EN);
-	/* enable CMD/CMPL_Q/RESP mode */
-	mw32(PCS, PCS_SATA_RETRY | PCS_FIS_RX_EN | PCS_CMD_EN);
+	/*
+	* events is port event now ,
+	* we need check the interrupt status which belongs to per port.
+	*/
 
-	/* enable completion queue interrupt */
-	tmp = (CINT_PORT_MASK | CINT_DONE | CINT_MEM | CINT_SRS);
-	mw32(INT_MASK, tmp);
+	if (phy->irq_status & PHYEV_DCDR_ERR)
+		mv_dprintk("port %d STP decoding error.\n",
+		phy_no+mvi->id*mvi->chip->n_phy);
+
+	if (phy->irq_status & PHYEV_POOF) {
+		if (!(phy->phy_event & PHY_PLUG_OUT)) {
+			int dev_sata = phy->phy_type & PORT_TYPE_SATA;
+			int ready;
+			mvs_release_task(mvi, phy_no, NULL);
+			phy->phy_event |= PHY_PLUG_OUT;
+			mvs_handle_event(mvi,
+				(void *)(unsigned long)phy_no,
+				PHY_PLUG_EVENT);
+			ready = mvs_is_phy_ready(mvi, phy_no);
+			if (!ready)
+				mv_dprintk("phy%d Unplug Notice\n",
+					phy_no +
+					mvi->id * mvi->chip->n_phy);
+			if (ready || dev_sata) {
+				if (MVS_CHIP_DISP->stp_reset)
+					MVS_CHIP_DISP->stp_reset(mvi,
+							phy_no);
+				else
+					MVS_CHIP_DISP->phy_reset(mvi,
+							phy_no, 0);
+				return;
+			}
+		}
+	}
 
-	/* Enable SRS interrupt */
-	mw32(INT_MASK_SRS, 0xFF);
-	return 0;
+	if (phy->irq_status & PHYEV_COMWAKE) {
+		tmp = MVS_CHIP_DISP->read_port_irq_mask(mvi, phy_no);
+		MVS_CHIP_DISP->write_port_irq_mask(mvi, phy_no,
+					tmp | PHYEV_SIG_FIS);
+		if (phy->timer.function == NULL) {
+			phy->timer.data = (unsigned long)phy;
+			phy->timer.function = mvs_sig_time_out;
+			phy->timer.expires = jiffies + 10*HZ;
+			add_timer(&phy->timer);
+		}
+	}
+	if (phy->irq_status & (PHYEV_SIG_FIS | PHYEV_ID_DONE)) {
+		phy->phy_status = mvs_is_phy_ready(mvi, phy_no);
+		mvs_sig_remove_timer(phy);
+		mv_dprintk("notify plug in on phy[%d]\n", phy_no);
+		if (phy->phy_status) {
+			mdelay(10);
+			MVS_CHIP_DISP->detect_porttype(mvi, phy_no);
+			if (phy->phy_type & PORT_TYPE_SATA) {
+				tmp = MVS_CHIP_DISP->read_port_irq_mask(
+						mvi, phy_no);
+				tmp &= ~PHYEV_SIG_FIS;
+				MVS_CHIP_DISP->write_port_irq_mask(mvi,
+							phy_no, tmp);
+			}
+			mvs_update_phyinfo(mvi, phy_no, 0);
+			mvs_bytes_dmaed(mvi, phy_no);
+			/* whether driver is going to handle hot plug */
+			if (phy->phy_event & PHY_PLUG_OUT) {
+				mvs_port_notify_formed(sas_phy, 0);
+				phy->phy_event &= ~PHY_PLUG_OUT;
+			}
+		} else {
+			mv_dprintk("plugin interrupt but phy%d is gone\n",
+				phy_no + mvi->id*mvi->chip->n_phy);
+		}
+	} else if (phy->irq_status & PHYEV_BROAD_CH) {
+		mv_dprintk("port %d broadcast change.\n",
+			phy_no + mvi->id*mvi->chip->n_phy);
+		/* exception for Samsung disk drive*/
+		mdelay(1000);
+		sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD);
+	}
+	MVS_CHIP_DISP->write_port_irq_stat(mvi, phy_no, phy->irq_status);
 }
 
-void __devinit mvs_print_info(struct mvs_info *mvi)
+int mvs_int_rx(struct mvs_info *mvi, bool self_clear)
 {
-	struct pci_dev *pdev = mvi->pdev;
-	static int printed_version;
+	u32 rx_prod_idx, rx_desc;
+	bool attn = false;
 
-	if (!printed_version++)
-		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
+	/* the first dword in the RX ring is special: it contains
+	 * a mirror of the hardware's RX producer index, so that
+	 * we don't have to stall the CPU reading that register.
+	 * The actual RX ring is offset by one dword, due to this.
+	 */
+	rx_prod_idx = mvi->rx_cons;
+	mvi->rx_cons = le32_to_cpu(mvi->rx[0]);
+	if (mvi->rx_cons == 0xfff)	/* h/w hasn't touched RX ring yet */
+		return 0;
 
-	dev_printk(KERN_INFO, &pdev->dev, "%u phys, addr %llx\n",
-		   mvi->chip->n_phy, SAS_ADDR(mvi->sas_addr));
+	/* The CMPL_Q may come late, read from register and try again
+	* note: if coalescing is enabled,
+	* it will need to read from register every time for sure
+	*/
+	if (unlikely(mvi->rx_cons == rx_prod_idx))
+		mvi->rx_cons = MVS_CHIP_DISP->rx_update(mvi) & RX_RING_SZ_MASK;
+
+	if (mvi->rx_cons == rx_prod_idx)
+		return 0;
+
+	while (mvi->rx_cons != rx_prod_idx) {
+		/* increment our internal RX consumer pointer */
+		rx_prod_idx = (rx_prod_idx + 1) & (MVS_RX_RING_SZ - 1);
+		rx_desc = le32_to_cpu(mvi->rx[rx_prod_idx + 1]);
+
+		if (likely(rx_desc & RXQ_DONE))
+			mvs_slot_complete(mvi, rx_desc, 0);
+		if (rx_desc & RXQ_ATTN) {
+			attn = true;
+		} else if (rx_desc & RXQ_ERR) {
+			if (!(rx_desc & RXQ_DONE))
+				mvs_slot_complete(mvi, rx_desc, 0);
+		} else if (rx_desc & RXQ_SLOT_RESET) {
+			mvs_slot_free(mvi, rx_desc);
+		}
+	}
+
+	if (attn && self_clear)
+		MVS_CHIP_DISP->int_full(mvi);
+	return 0;
 }
 
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 7a954a9..75b9748 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -1,25 +1,26 @@
 /*
-	mv_sas.h - Marvell 88SE6440 SAS/SATA support
-
-	Copyright 2007 Red Hat, Inc.
-	Copyright 2008 Marvell. <kewei@marvell.com>
-
-	This program is free software; you can redistribute it and/or
-	modify it under the terms of the GNU General Public License as
-	published by the Free Software Foundation; either version 2,
-	or (at your option) any later version.
-
-	This program is distributed in the hope that it will be useful,
-	but WITHOUT ANY WARRANTY; without even the implied warranty
-	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-	See the GNU General Public License for more details.
-
-	You should have received a copy of the GNU General Public
-	License along with this program; see the file COPYING.	If not,
-	write to the Free Software Foundation, 675 Mass Ave, Cambridge,
-	MA 02139, USA.
-
- */
+ * Marvell 88SE64xx/88SE94xx main function head file
+ *
+ * Copyright 2007 Red Hat, Inc.
+ * Copyright 2008 Marvell. <kewei@marvell.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+*/
 
 #ifndef _MV_SAS_H_
 #define _MV_SAS_H_
@@ -42,25 +43,144 @@
 #include <linux/version.h>
 #include "mv_defs.h"
 
-#define DRV_NAME	"mvsas"
-#define DRV_VERSION	"0.5.2"
-#define _MV_DUMP	0
-#define MVS_DISABLE_NVRAM
-#define MVS_DISABLE_MSI
-
+#define DRV_NAME		"mvsas"
+#define DRV_VERSION		"0.8.2"
+#define _MV_DUMP		0
 #define MVS_ID_NOT_MAPPED	0x7f
-#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
+/* #define DISABLE_HOTPLUG_DMA_FIX */
+#define MAX_EXP_RUNNING_REQ	2
+#define WIDE_PORT_MAX_PHY		4
+#define	MV_DISABLE_NCQ	0
+#define mv_printk(fmt, arg ...)	\
+	printk(KERN_DEBUG"%s %d:" fmt, __FILE__, __LINE__, ## arg)
+#ifdef MV_DEBUG
+#define mv_dprintk(format, arg...)	\
+	printk(KERN_DEBUG"%s %d:" format, __FILE__, __LINE__, ## arg)
+#else
+#define mv_dprintk(format, arg...)
+#endif
+#define MV_MAX_U32			0xffffffff
+
+extern struct mvs_tgt_initiator mvs_tgt;
+extern struct mvs_info *tgt_mvi;
+extern const struct mvs_dispatch mvs_64xx_dispatch;
+extern const struct mvs_dispatch mvs_94xx_dispatch;
+
+#define DEV_IS_EXPANDER(type)	\
+	((type == EDGE_DEV) || (type == FANOUT_DEV))
 
-#define for_each_phy(__lseq_mask, __mc, __lseq, __rest)			\
-	for ((__mc) = (__lseq_mask), (__lseq) = 0;			\
-					(__mc) != 0 && __rest;		\
+#define bit(n) ((u32)1 << n)
+
+#define for_each_phy(__lseq_mask, __mc, __lseq)			\
+	for ((__mc) = (__lseq_mask), (__lseq) = 0;		\
+					(__mc) != 0 ;		\
 					(++__lseq), (__mc) >>= 1)
 
+#define MV_INIT_DELAYED_WORK(w, f, d)	INIT_DELAYED_WORK(w, f)
+#define UNASSOC_D2H_FIS(id)		\
+	((void *) mvi->rx_fis + 0x100 * id)
+#define SATA_RECEIVED_FIS_LIST(reg_set)	\
+	((void *) mvi->rx_fis + mvi->chip->fis_offs + 0x100 * reg_set)
+#define SATA_RECEIVED_SDB_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x58)
+#define SATA_RECEIVED_D2H_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x40)
+#define SATA_RECEIVED_PIO_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x20)
+#define SATA_RECEIVED_DMA_FIS(reg_set)	\
+	(SATA_RECEIVED_FIS_LIST(reg_set) + 0x00)
+
+enum dev_status {
+	MVS_DEV_NORMAL = 0x0,
+	MVS_DEV_EH	= 0x1,
+};
+
+
+struct mvs_info;
+
+struct mvs_dispatch {
+	char *name;
+	int (*chip_init)(struct mvs_info *mvi);
+	int (*spi_init)(struct mvs_info *mvi);
+	int (*chip_ioremap)(struct mvs_info *mvi);
+	void (*chip_iounmap)(struct mvs_info *mvi);
+	irqreturn_t (*isr)(struct mvs_info *mvi, int irq, u32 stat);
+	u32 (*isr_status)(struct mvs_info *mvi, int irq);
+	void (*interrupt_enable)(struct mvs_info *mvi);
+	void (*interrupt_disable)(struct mvs_info *mvi);
+
+	u32 (*read_phy_ctl)(struct mvs_info *mvi, u32 port);
+	void (*write_phy_ctl)(struct mvs_info *mvi, u32 port, u32 val);
+
+	u32 (*read_port_cfg_data)(struct mvs_info *mvi, u32 port);
+	void (*write_port_cfg_data)(struct mvs_info *mvi, u32 port, u32 val);
+	void (*write_port_cfg_addr)(struct mvs_info *mvi, u32 port, u32 addr);
+
+	u32 (*read_port_vsr_data)(struct mvs_info *mvi, u32 port);
+	void (*write_port_vsr_data)(struct mvs_info *mvi, u32 port, u32 val);
+	void (*write_port_vsr_addr)(struct mvs_info *mvi, u32 port, u32 addr);
+
+	u32 (*read_port_irq_stat)(struct mvs_info *mvi, u32 port);
+	void (*write_port_irq_stat)(struct mvs_info *mvi, u32 port, u32 val);
+
+	u32 (*read_port_irq_mask)(struct mvs_info *mvi, u32 port);
+	void (*write_port_irq_mask)(struct mvs_info *mvi, u32 port, u32 val);
+
+	void (*get_sas_addr)(void *buf, u32 buflen);
+	void (*command_active)(struct mvs_info *mvi, u32 slot_idx);
+	void (*issue_stop)(struct mvs_info *mvi, enum mvs_port_type type,
+				u32 tfs);
+	void (*start_delivery)(struct mvs_info *mvi, u32 tx);
+	u32 (*rx_update)(struct mvs_info *mvi);
+	void (*int_full)(struct mvs_info *mvi);
+	u8 (*assign_reg_set)(struct mvs_info *mvi, u8 *tfs);
+	void (*free_reg_set)(struct mvs_info *mvi, u8 *tfs);
+	u32 (*prd_size)(void);
+	u32 (*prd_count)(void);
+	void (*make_prd)(struct scatterlist *scatter, int nr, void *prd);
+	void (*detect_porttype)(struct mvs_info *mvi, int i);
+	int (*oob_done)(struct mvs_info *mvi, int i);
+	void (*fix_phy_info)(struct mvs_info *mvi, int i,
+				struct sas_identify_frame *id);
+	void (*phy_work_around)(struct mvs_info *mvi, int i);
+	void (*phy_set_link_rate)(struct mvs_info *mvi, u32 phy_id,
+				struct sas_phy_linkrates *rates);
+	u32 (*phy_max_link_rate)(void);
+	void (*phy_disable)(struct mvs_info *mvi, u32 phy_id);
+	void (*phy_enable)(struct mvs_info *mvi, u32 phy_id);
+	void (*phy_reset)(struct mvs_info *mvi, u32 phy_id, int hard);
+	void (*stp_reset)(struct mvs_info *mvi, u32 phy_id);
+	void (*clear_active_cmds)(struct mvs_info *mvi);
+	u32 (*spi_read_data)(struct mvs_info *mvi);
+	void (*spi_write_data)(struct mvs_info *mvi, u32 data);
+	int (*spi_buildcmd)(struct mvs_info *mvi,
+						u32      *dwCmd,
+						u8       cmd,
+						u8       read,
+						u8       length,
+						u32      addr
+						);
+	int (*spi_issuecmd)(struct mvs_info *mvi, u32 cmd);
+	int (*spi_waitdataready)(struct mvs_info *mvi, u32 timeout);
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	void (*dma_fix)(dma_addr_t buf_dma, int buf_len, int from, void *prd);
+#endif
+
+};
+
 struct mvs_chip_info {
-	u32		n_phy;
-	u32		srs_sz;
-	u32		slot_width;
+	u32 		n_host;
+	u32 		n_phy;
+	u32 		fis_offs;
+	u32 		fis_count;
+	u32 		srs_sz;
+	u32 		slot_width;
+	const struct mvs_dispatch *dispatch;
 };
+#define MVS_CHIP_SLOT_SZ	(1U << mvi->chip->slot_width)
+#define MVS_RX_FISL_SZ		\
+	(mvi->chip->fis_offs + (mvi->chip->fis_count * 0x100))
+#define MVS_CHIP_DISP		(mvi->chip->dispatch)
 
 struct mvs_err_info {
 	__le32			flags;
@@ -72,7 +192,7 @@ struct mvs_cmd_hdr {
 	__le32			lens;	/* cmd, max resp frame len */
 	__le32			tags;	/* targ port xfer tag; tag */
 	__le32			data_len;	/* data xfer len */
-	__le64			cmd_tbl;	/* command table address */
+	__le64			cmd_tbl;  	/* command table address */
 	__le64			open_frame;	/* open addr frame address */
 	__le64			status_buf;	/* status buffer address */
 	__le64			prd_tbl;		/* PRD tbl address */
@@ -82,16 +202,17 @@ struct mvs_cmd_hdr {
 struct mvs_port {
 	struct asd_sas_port	sas_port;
 	u8			port_attached;
-	u8			taskfileset;
 	u8			wide_port_phymap;
 	struct list_head	list;
 };
 
 struct mvs_phy {
+	struct mvs_info 		*mvi;
 	struct mvs_port		*port;
 	struct asd_sas_phy	sas_phy;
 	struct sas_identify	identify;
 	struct scsi_device	*sdev;
+	struct timer_list timer;
 	u64		dev_sas_addr;
 	u64		att_dev_sas_addr;
 	u32		att_dev_info;
@@ -102,15 +223,34 @@ struct mvs_phy {
 	u32		frame_rcvd_size;
 	u8		frame_rcvd[32];
 	u8		phy_attached;
+	u8		phy_mode;
+	u8		reserved[2];
+	u32		phy_event;
 	enum sas_linkrate	minimum_linkrate;
 	enum sas_linkrate	maximum_linkrate;
 };
 
+struct mvs_device {
+	enum sas_dev_type dev_type;
+	struct domain_device *sas_device;
+	u32 attached_phy;
+	u32 device_id;
+	u32 runing_req;
+	u8 taskfileset;
+	u8 dev_status;
+	u16 reserved;
+	struct list_head		dev_entry;
+};
+
 struct mvs_slot_info {
-	struct list_head list;
-	struct sas_task *task;
+	struct list_head entry;
+	union {
+		struct sas_task *task;
+		void *tdata;
+	};
 	u32 n_elem;
 	u32 tx;
+	u32 slot_tag;
 
 	/* DMA buffer for storing cmd tbl, open addr frame, status buffer,
 	 * and PRD table
@@ -120,9 +260,10 @@ struct mvs_slot_info {
 #if _MV_DUMP
 	u32 cmd_size;
 #endif
-
 	void *response;
 	struct mvs_port *port;
+	struct mvs_device	*device;
+	void *open_frame;
 };
 
 struct mvs_info {
@@ -133,17 +274,17 @@ struct mvs_info {
 
 	/* our device */
 	struct pci_dev *pdev;
+	struct device *dev;
 
 	/* enhanced mode registers */
 	void __iomem *regs;
 
-	/* peripheral registers */
-	void __iomem *peri_regs;
-
+	/* peripheral or soc registers */
+	void __iomem *regs_ex;
 	u8 sas_addr[SAS_ADDR_SIZE];
 
 	/* SCSI/SAS glue */
-	struct sas_ha_struct sas;
+	struct sas_ha_struct *sas;
 	struct Scsi_Host *shost;
 
 	/* TX (delivery) DMA ring */
@@ -154,7 +295,7 @@ struct mvs_info {
 	u32 tx_prod;
 
 	/* RX (completion) DMA ring */
-	__le32 *rx;
+	__le32	*rx;
 	dma_addr_t rx_dma;
 
 	/* RX consumer idx */
@@ -168,38 +309,98 @@ struct mvs_info {
 	struct mvs_cmd_hdr *slot;
 	dma_addr_t slot_dma;
 
+	u32 chip_id;
 	const struct mvs_chip_info *chip;
 
-	u8 tags[MVS_SLOTS];
-	struct mvs_slot_info slot_info[MVS_SLOTS];
-				/* further per-slot information */
+	int tags_num;
+	u8 tags[MVS_SLOTS >> 3];
+
+	/* further per-slot information */
 	struct mvs_phy phy[MVS_MAX_PHYS];
 	struct mvs_port port[MVS_MAX_PHYS];
-#ifdef MVS_USE_TASKLET
-	struct tasklet_struct tasklet;
+	u32 irq;
+	u32 exp_req;
+	u32 id;
+	u64 sata_reg_set;
+	struct list_head *hba_list;
+	struct list_head soc_entry;
+	struct list_head wq_list;
+	unsigned long instance;
+	u16 flashid;
+	u32 flashsize;
+	u32 flashsectSize;
+
+	void *addon;
+	struct mvs_device	devices[MVS_MAX_DEVICES];
+#ifndef DISABLE_HOTPLUG_DMA_FIX
+	void *bulk_buffer;
+	dma_addr_t bulk_buffer_dma;
+#define TRASH_BUCKET_SIZE    	0x20000
 #endif
+	struct mvs_slot_info slot_info[0];
+};
+
+struct mvs_prv_info{
+	u8 n_host;
+	u8 n_phy;
+	u16 reserve;
+	struct mvs_info *mvi[2];
+};
+
+struct mvs_wq {
+	struct delayed_work work_q;
+	struct mvs_info *mvi;
+	void *data;
+	int handler;
+	struct list_head entry;
 };
 
+struct mvs_task_exec_info {
+	struct sas_task *task;
+	struct mvs_cmd_hdr *hdr;
+	struct mvs_port *port;
+	u32 tag;
+	int n_elem;
+};
+
+
+/******************** function prototype *********************/
+void mvs_get_sas_addr(void *buf, u32 buflen);
+void mvs_tag_clear(struct mvs_info *mvi, u32 tag);
+void mvs_tag_free(struct mvs_info *mvi, u32 tag);
+void mvs_tag_set(struct mvs_info *mvi, unsigned int tag);
+int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out);
+void mvs_tag_init(struct mvs_info *mvi);
+void mvs_iounmap(void __iomem *regs);
+int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex);
+void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard);
 int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func,
 			void *funcdata);
+void __devinit mvs_set_sas_addr(struct mvs_info *mvi, int port_id,
+				u32 off_lo, u32 off_hi, u64 sas_addr);
+int mvs_slave_alloc(struct scsi_device *scsi_dev);
 int mvs_slave_configure(struct scsi_device *sdev);
 void mvs_scan_start(struct Scsi_Host *shost);
 int mvs_scan_finished(struct Scsi_Host *shost, unsigned long time);
-int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags);
-int mvs_task_abort(struct sas_task *task);
+int mvs_queue_command(struct sas_task *task, const int num,
+			gfp_t gfp_flags);
+int mvs_abort_task(struct sas_task *task);
+int mvs_abort_task_set(struct domain_device *dev, u8 *lun);
+int mvs_clear_aca(struct domain_device *dev, u8 *lun);
+int mvs_clear_task_set(struct domain_device *dev, u8 * lun);
 void mvs_port_formed(struct asd_sas_phy *sas_phy);
+void mvs_port_deformed(struct asd_sas_phy *sas_phy);
+int mvs_dev_found(struct domain_device *dev);
+void mvs_dev_gone(struct domain_device *dev);
+int mvs_lu_reset(struct domain_device *dev, u8 *lun);
+int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags);
 int mvs_I_T_nexus_reset(struct domain_device *dev);
-void mvs_int_full(struct mvs_info *mvi);
-void mvs_tag_init(struct mvs_info *mvi);
-int mvs_nvram_read(struct mvs_info *mvi, u32 addr, void *buf, u32 buflen);
-int __devinit mvs_hw_init(struct mvs_info *mvi);
-void __devinit mvs_print_info(struct mvs_info *mvi);
-void mvs_hba_interrupt_enable(struct mvs_info *mvi);
-void mvs_hba_interrupt_disable(struct mvs_info *mvi);
-void mvs_detect_porttype(struct mvs_info *mvi, int i);
-u8 mvs_assign_reg_set(struct mvs_info *mvi, struct mvs_port *port);
-void mvs_enable_xmt(struct mvs_info *mvi, int PhyId);
-void __devinit mvs_phy_hacks(struct mvs_info *mvi);
-void mvs_free_reg_set(struct mvs_info *mvi, struct mvs_port *port);
-
+int mvs_query_task(struct sas_task *task);
+void mvs_release_task(struct mvs_info *mvi, int phy_no,
+			struct domain_device *dev);
+void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events);
+void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st);
+int mvs_int_rx(struct mvs_info *mvi, bool self_clear);
+void mvs_hexdump(u32 size, u8 *data, u32 baseaddr);
 #endif
+
-- 
cgit v0.10.2


From 14faf12f7df8404c3e8e54baad8d178c327a2f87 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 12 Mar 2009 11:08:51 -0400
Subject: [SCSI] Increase default timeout for INQUIRY

This patch (as1224) changes the default timeout for INQUIRY commands
from 3 seconds to 20 seconds, which is the value used by Windows for
USB Mass-Storage devices.  Some of these devices, like the Corsair
Flash Voyager (see Bugzilla #12188) really do need a long timeout.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index e2b50d8..c447838 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -115,12 +115,12 @@ MODULE_PARM_DESC(max_report_luns,
 		 "REPORT LUNS maximum number of LUNS received (should be"
 		 " between 1 and 16384)");
 
-static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ+3;
+static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ + 18;
 
 module_param_named(inq_timeout, scsi_inq_timeout, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(inq_timeout, 
 		 "Timeout (in seconds) waiting for devices to answer INQUIRY."
-		 " Default is 5. Some non-compliant devices need more.");
+		 " Default is 20. Some devices may need more; most need less.");
 
 /* This lock protects only this list */
 static DEFINE_SPINLOCK(async_scan_lock);
-- 
cgit v0.10.2


From fd65e5e93cbd9d2f34bbb0f0b2f46a30a1d20915 Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Wed, 8 Apr 2009 14:33:48 -0500
Subject: [SCSI] qla1280: driver clean up

Remove some unneeded, inactive and unused code, make some trivial
corrections to comments and a printk, and return a proper status
in qla1280_queuecommand.  No fundamental logic changes are made.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 5defe5e..0cbad49 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -435,7 +435,6 @@ static int qla1280_mailbox_command(struct scsi_qla_host *,
 				   uint8_t, uint16_t *);
 static int qla1280_bus_reset(struct scsi_qla_host *, int);
 static int qla1280_device_reset(struct scsi_qla_host *, int, int);
-static int qla1280_abort_device(struct scsi_qla_host *, int, int, int);
 static int qla1280_abort_command(struct scsi_qla_host *, struct srb *, int);
 static int qla1280_abort_isp(struct scsi_qla_host *);
 #ifdef QLA_64BIT_PTR
@@ -698,7 +697,7 @@ qla1280_info(struct Scsi_Host *host)
 }
 
 /**************************************************************************
- *   qla1200_queuecommand
+ *   qla1280_queuecommand
  *     Queue a command to the controller.
  *
  * Note:
@@ -713,7 +712,7 @@ qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct scsi_qla_host *ha = (struct scsi_qla_host *)host->hostdata;
-	struct srb *sp = (struct srb *)&cmd->SCp;
+	struct srb *sp = (struct srb *)CMD_SP(cmd);
 	int status;
 
 	cmd->scsi_done = fn;
@@ -738,11 +737,9 @@ qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
 
 enum action {
 	ABORT_COMMAND,
-	ABORT_DEVICE,
 	DEVICE_RESET,
 	BUS_RESET,
 	ADAPTER_RESET,
-	FAIL
 };
 
 /* timer action for error action processor */
@@ -768,7 +765,7 @@ static void qla1280_mailbox_timeout(unsigned long __data)
 }
 
 /**************************************************************************
- * qla1200_error_action
+ * qla1280_error_action
  *    The function will attempt to perform a specified error action and
  *    wait for the results (or time out).
  *
@@ -798,6 +795,8 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct timer_list timer;
 
+	ENTER("qla1280_error_action");
+
 	ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata);
 
 	dprintk(4, "error_action %i, istatus 0x%04x\n", action,
@@ -807,20 +806,11 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 		RD_REG_WORD(&ha->iobase->host_cmd),
 		RD_REG_WORD(&ha->iobase->ictrl), jiffies);
 
-	ENTER("qla1280_error_action");
 	if (qla1280_verbose)
 		printk(KERN_INFO "scsi(%li): Resetting Cmnd=0x%p, "
 		       "Handle=0x%p, action=0x%x\n",
 		       ha->host_no, cmd, CMD_HANDLE(cmd), action);
 
-	if (cmd == NULL) {
-		printk(KERN_WARNING "(scsi?:?:?:?) Reset called with NULL "
-		       "si_Cmnd pointer, failing.\n");
-		LEAVE("qla1280_error_action");
-		return FAILED;
-	}
-
-	ha = (struct scsi_qla_host *)cmd->device->host->hostdata;
 	sp = (struct srb *)CMD_SP(cmd);
 	handle = CMD_HANDLE(cmd);
 
@@ -857,9 +847,6 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 	 * mean the actual success or fail of the action */
 	result = FAILED;
 	switch (action) {
-	case FAIL:
-		break;
-
 	case ABORT_COMMAND:
 		if ((sp->flags & SRB_ABORT_PENDING)) {
 			printk(KERN_WARNING
@@ -893,15 +880,6 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 		}
 		break;
 
-	case ABORT_DEVICE:
-		if (qla1280_verbose)
-			printk(KERN_INFO
-			       "scsi(%ld:%d:%d:%d): Queueing abort device "
-			       "command.\n", ha->host_no, bus, target, lun);
-		if (qla1280_abort_device(ha, bus, target, lun) == 0)
-			result = SUCCESS;
-		break;
-
 	case DEVICE_RESET:
 		if (qla1280_verbose)
 			printk(KERN_INFO
@@ -1285,8 +1263,6 @@ qla1280_done(struct scsi_qla_host *ha)
 		case DID_ABORT:
 			sp->flags &= ~SRB_ABORT_PENDING;
 			sp->flags |= SRB_ABORTED;
-			if (sp->flags & SRB_TIMEOUT)
-				CMD_RESULT(sp->cmd) = DID_TIME_OUT << 16;
 			break;
 		default:
 			break;
@@ -2417,9 +2393,6 @@ static int
 qla1280_mailbox_command(struct scsi_qla_host *ha, uint8_t mr, uint16_t *mb)
 {
 	struct device_reg __iomem *reg = ha->iobase;
-#if 0
-	LIST_HEAD(done_q);
-#endif
 	int status = 0;
 	int cnt;
 	uint16_t *optr, *iptr;
@@ -2493,19 +2466,9 @@ qla1280_mailbox_command(struct scsi_qla_host *ha, uint8_t mr, uint16_t *mb)
 	mr = MAILBOX_REGISTER_COUNT;
 	memcpy(optr, iptr, MAILBOX_REGISTER_COUNT * sizeof(uint16_t));
 
-#if 0
-	/* Go check for any response interrupts pending. */
-	qla1280_isr(ha, &done_q);
-#endif
-
 	if (ha->flags.reset_marker)
 		qla1280_rst_aen(ha);
 
-#if 0
-	if (!list_empty(&done_q))
-		qla1280_done(ha, &done_q);
-#endif
-
 	if (status)
 		dprintk(2, "qla1280_mailbox_command: **** FAILED, mailbox0 = "
 			"0x%x ****\n", mb[0]);
@@ -2641,41 +2604,6 @@ qla1280_device_reset(struct scsi_qla_host *ha, int bus, int target)
 }
 
 /*
- * qla1280_abort_device
- *      Issue an abort message to the device
- *
- * Input:
- *      ha     = adapter block pointer.
- *      bus    = SCSI BUS.
- *      target = SCSI ID.
- *      lun    = SCSI LUN.
- *
- * Returns:
- *      0 = success
- */
-static int
-qla1280_abort_device(struct scsi_qla_host *ha, int bus, int target, int lun)
-{
-	uint16_t mb[MAILBOX_REGISTER_COUNT];
-	int status;
-
-	ENTER("qla1280_abort_device");
-
-	mb[0] = MBC_ABORT_DEVICE;
-	mb[1] = (bus ? target | BIT_7 : target) << 8 | lun;
-	status = qla1280_mailbox_command(ha, BIT_1 | BIT_0, &mb[0]);
-
-	/* Issue marker command. */
-	qla1280_marker(ha, bus, target, lun, MK_SYNC_ID_LUN);
-
-	if (status)
-		dprintk(2, "qla1280_abort_device: **** FAILED ****\n");
-
-	LEAVE("qla1280_abort_device");
-	return status;
-}
-
-/*
  * qla1280_abort_command
  *      Abort command aborts a specified IOCB.
  *
@@ -2833,7 +2761,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 
 	/* If room for request in request ring. */
 	if ((req_cnt + 2) >= ha->req_q_cnt) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_start_scsi: in-ptr=0x%x  req_q_cnt="
 			"0x%xreq_cnt=0x%x", ha->req_ring_index, ha->req_q_cnt,
 			req_cnt);
@@ -2845,7 +2773,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 		     ha->outstanding_cmds[cnt] != NULL; cnt++);
 
 	if (cnt >= MAX_OUTSTANDING_COMMANDS) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_start_scsi: NO ROOM IN "
 			"OUTSTANDING ARRAY, req_q_cnt=0x%x", ha->req_q_cnt);
 		goto out;
@@ -3108,7 +3036,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 		ha->req_q_cnt, seg_cnt);
 	/* If room for request in request ring. */
 	if ((req_cnt + 2) >= ha->req_q_cnt) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_32bit_start_scsi: in-ptr=0x%x, "
 			"req_q_cnt=0x%x, req_cnt=0x%x", ha->req_ring_index,
 			ha->req_q_cnt, req_cnt);
@@ -3120,7 +3048,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 		     (ha->outstanding_cmds[cnt] != 0); cnt++) ;
 
 	if (cnt >= MAX_OUTSTANDING_COMMANDS) {
-		status = 1;
+		status = SCSI_MLQUEUE_HOST_BUSY;
 		dprintk(2, "qla1280_32bit_start_scsi: NO ROOM IN OUTSTANDING "
 			"ARRAY, req_q_cnt=0x%x\n", ha->req_q_cnt);
 		goto out;
@@ -3495,7 +3423,7 @@ qla1280_isr(struct scsi_qla_host *ha, struct list_head *done_q)
 					 * If we get here we have a real problem!
 					 */
 					printk(KERN_WARNING
-					       "qla1280: ISP invalid handle");
+					       "qla1280: ISP invalid handle\n");
 				}
 			}
 			break;
@@ -3955,13 +3883,6 @@ qla1280_check_for_dead_scsi_bus(struct scsi_qla_host *ha, unsigned int bus)
 
 		if (scsi_control == SCSI_PHASE_INVALID) {
 			ha->bus_settings[bus].scsi_bus_dead = 1;
-#if 0
-			CMD_RESULT(cp) = DID_NO_CONNECT << 16;
-			CMD_HANDLE(cp) = INVALID_HANDLE;
-			/* ha->actthreads--; */
-
-			(*(cp)->scsi_done)(cp);
-#endif
 			return 1;	/* bus is dead */
 		} else {
 			ha->bus_settings[bus].scsi_bus_dead = 0;
-- 
cgit v0.10.2


From 413e6e18b483de272bdafa56e5c086c75f11d681 Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Wed, 8 Apr 2009 14:34:33 -0500
Subject: [SCSI] qla1280: error recovery rewrite

The driver now waits for the scsi commands associated with a
particular error recovery step to be returned to the mid-layer,
and returns the appropriate SUCCESS or FAILED status.  Removes
unneeded polling of chip for interrupts.

This patch also bumps the driver version number.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 0cbad49..8371d91 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -17,9 +17,12 @@
 * General Public License for more details.
 *
 ******************************************************************************/
-#define QLA1280_VERSION      "3.26"
+#define QLA1280_VERSION      "3.27"
 /*****************************************************************************
     Revision History:
+    Rev  3.27, February 10, 2009, Michael Reed
+	- General code cleanup.
+	- Improve error recovery.
     Rev  3.26, January 16, 2006 Jes Sorensen
 	- Ditch all < 2.6 support
     Rev  3.25.1, February 10, 2005 Christoph Hellwig
@@ -718,6 +721,8 @@ qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
 	cmd->scsi_done = fn;
 	sp->cmd = cmd;
 	sp->flags = 0;
+	sp->wait = NULL;
+	CMD_HANDLE(cmd) = (unsigned char *)NULL;
 
 	qla1280_print_scsi_cmd(5, cmd);
 
@@ -742,14 +747,6 @@ enum action {
 	ADAPTER_RESET,
 };
 
-/* timer action for error action processor */
-static void qla1280_error_wait_timeout(unsigned long __data)
-{
-	struct scsi_cmnd *cmd = (struct scsi_cmnd *)__data;
-	struct srb *sp = (struct srb *)CMD_SP(cmd);
-
-	complete(sp->wait);
-}
 
 static void qla1280_mailbox_timeout(unsigned long __data)
 {
@@ -764,6 +761,65 @@ static void qla1280_mailbox_timeout(unsigned long __data)
 	complete(ha->mailbox_wait);
 }
 
+static int
+_qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp,
+				 struct completion *wait)
+{
+	int	status = FAILED;
+	struct scsi_cmnd *cmd = sp->cmd;
+
+	spin_unlock_irq(ha->host->host_lock);
+	wait_for_completion_timeout(wait, 4*HZ);
+	spin_lock_irq(ha->host->host_lock);
+	sp->wait = NULL;
+	if(CMD_HANDLE(cmd) == COMPLETED_HANDLE) {
+		status = SUCCESS;
+		(*cmd->scsi_done)(cmd);
+	}
+	return status;
+}
+
+static int
+qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	sp->wait = &wait;
+	return _qla1280_wait_for_single_command(ha, sp, &wait);
+}
+
+static int
+qla1280_wait_for_pending_commands(struct scsi_qla_host *ha, int bus, int target)
+{
+	int		cnt;
+	int		status;
+	struct srb	*sp;
+	struct scsi_cmnd *cmd;
+
+	status = SUCCESS;
+
+	/*
+	 * Wait for all commands with the designated bus/target
+	 * to be completed by the firmware
+	 */
+	for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		sp = ha->outstanding_cmds[cnt];
+		if (sp) {
+			cmd = sp->cmd;
+
+			if (bus >= 0 && SCSI_BUS_32(cmd) != bus)
+				continue;
+			if (target >= 0 && SCSI_TCN_32(cmd) != target)
+				continue;
+
+			status = qla1280_wait_for_single_command(ha, sp);
+			if (status == FAILED)
+				break;
+		}
+	}
+	return status;
+}
+
 /**************************************************************************
  * qla1280_error_action
  *    The function will attempt to perform a specified error action and
@@ -777,11 +833,6 @@ static void qla1280_mailbox_timeout(unsigned long __data)
  * Returns:
  *      SUCCESS or FAILED
  *
- * Note:
- *      Resetting the bus always succeeds - is has to, otherwise the
- *      kernel will panic! Try a surgical technique - sending a BUS
- *      DEVICE RESET message - on the offending target before pulling
- *      the SCSI bus reset line.
  **************************************************************************/
 static int
 qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
@@ -789,15 +840,19 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 	struct scsi_qla_host *ha;
 	int bus, target, lun;
 	struct srb *sp;
-	uint16_t data;
-	unsigned char *handle;
-	int result, i;
+	int i, found;
+	int result=FAILED;
+	int wait_for_bus=-1;
+	int wait_for_target = -1;
 	DECLARE_COMPLETION_ONSTACK(wait);
-	struct timer_list timer;
 
 	ENTER("qla1280_error_action");
 
 	ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata);
+	sp = (struct srb *)CMD_SP(cmd);
+	bus = SCSI_BUS_32(cmd);
+	target = SCSI_TCN_32(cmd);
+	lun = SCSI_LUN_32(cmd);
 
 	dprintk(4, "error_action %i, istatus 0x%04x\n", action,
 		RD_REG_WORD(&ha->iobase->istatus));
@@ -811,73 +866,42 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 		       "Handle=0x%p, action=0x%x\n",
 		       ha->host_no, cmd, CMD_HANDLE(cmd), action);
 
-	sp = (struct srb *)CMD_SP(cmd);
-	handle = CMD_HANDLE(cmd);
-
-	/* Check for pending interrupts. */
-	data = qla1280_debounce_register(&ha->iobase->istatus);
-	/*
-	 * The io_request_lock is held when the reset handler is called, hence
-	 * the interrupt handler cannot be running in parallel as it also
-	 * grabs the lock. /Jes
-	 */
-	if (data & RISC_INT)
-		qla1280_isr(ha, &ha->done_q);
-
 	/*
-	 * Determine the suggested action that the mid-level driver wants
-	 * us to perform.
+	 * Check to see if we have the command in the outstanding_cmds[]
+	 * array.  If not then it must have completed before this error
+	 * action was initiated.  If the error_action isn't ABORT_COMMAND
+	 * then the driver must proceed with the requested action.
 	 */
-	if (handle == (unsigned char *)INVALID_HANDLE || handle == NULL) {
-		if(action == ABORT_COMMAND) {
-			/* we never got this command */
-			printk(KERN_INFO "qla1280: Aborting a NULL handle\n");
-			return SUCCESS;	/* no action - we don't have command */
+	found = -1;
+	for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+		if (sp == ha->outstanding_cmds[i]) {
+			found = i;
+			sp->wait = &wait; /* we'll wait for it to complete */
+			break;
 		}
-	} else {
-		sp->wait = &wait;
 	}
 
-	bus = SCSI_BUS_32(cmd);
-	target = SCSI_TCN_32(cmd);
-	lun = SCSI_LUN_32(cmd);
+	if (found < 0) {	/* driver doesn't have command */
+		result = SUCCESS;
+		if (qla1280_verbose) {
+			printk(KERN_INFO
+			       "scsi(%ld:%d:%d:%d): specified command has "
+			       "already completed.\n", ha->host_no, bus,
+				target, lun);
+		}
+	}
 
-	/* Overloading result.  Here it means the success or fail of the
-	 * *issue* of the action.  When we return from the routine, it must
-	 * mean the actual success or fail of the action */
-	result = FAILED;
 	switch (action) {
-	case ABORT_COMMAND:
-		if ((sp->flags & SRB_ABORT_PENDING)) {
-			printk(KERN_WARNING
-			       "scsi(): Command has a pending abort "
-			       "message - ABORT_PENDING.\n");
-			/* This should technically be impossible since we
-			 * now wait for abort completion */
-			break;
-		}
 
-		for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
-			if (sp == ha->outstanding_cmds[i]) {
-				dprintk(1, "qla1280: RISC aborting command\n");
-				if (qla1280_abort_command(ha, sp, i) == 0)
-					result = SUCCESS;
-				else {
-					/*
-					 * Since we don't know what might
-					 * have happend to the command, it
-					 * is unsafe to remove it from the
-					 * device's queue at this point.
-					 * Wait and let the escalation
-					 * process take care of it.
-					 */
-					printk(KERN_WARNING
-					       "scsi(%li:%i:%i:%i): Unable"
-					       " to abort command!\n",
-					       ha->host_no, bus, target, lun);
-				}
-			}
-		}
+	case ABORT_COMMAND:
+		dprintk(1, "qla1280: RISC aborting command\n");
+		/*
+		 * The abort might fail due to race when the host_lock
+		 * is released to issue the abort.  As such, we
+		 * don't bother to check the return status.
+		 */
+		if (found >= 0)
+			qla1280_abort_command(ha, sp, found);
 		break;
 
 	case DEVICE_RESET:
@@ -885,16 +909,21 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 			printk(KERN_INFO
 			       "scsi(%ld:%d:%d:%d): Queueing device reset "
 			       "command.\n", ha->host_no, bus, target, lun);
-		if (qla1280_device_reset(ha, bus, target) == 0)
-			result = SUCCESS;
+		if (qla1280_device_reset(ha, bus, target) == 0) {
+			/* issued device reset, set wait conditions */
+			wait_for_bus = bus;
+			wait_for_target = target;
+		}
 		break;
 
 	case BUS_RESET:
 		if (qla1280_verbose)
 			printk(KERN_INFO "qla1280(%ld:%d): Issued bus "
 			       "reset.\n", ha->host_no, bus);
-		if (qla1280_bus_reset(ha, bus) == 0)
-			result = SUCCESS;
+		if (qla1280_bus_reset(ha, bus) == 0) {
+			/* issued bus reset, set wait conditions */
+			wait_for_bus = bus;
+		}
 		break;
 
 	case ADAPTER_RESET:
@@ -907,55 +936,48 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
 			       "continue automatically\n", ha->host_no);
 		}
 		ha->flags.reset_active = 1;
-		/*
-		 * We restarted all of the commands automatically, so the
-		 * mid-level code can expect completions momentitarily.
-		 */
-		if (qla1280_abort_isp(ha) == 0)
-			result = SUCCESS;
+
+		if (qla1280_abort_isp(ha) != 0) {	/* it's dead */
+			result = FAILED;
+		}
 
 		ha->flags.reset_active = 0;
 	}
 
-	if (!list_empty(&ha->done_q))
-		qla1280_done(ha);
-
-	/* If we didn't manage to issue the action, or we have no
-	 * command to wait for, exit here */
-	if (result == FAILED || handle == NULL ||
-	    handle == (unsigned char *)INVALID_HANDLE) {
-		/*
-		 * Clear completion queue to avoid qla1280_done() trying
-		 * to complete the command at a later stage after we
-		 * have exited the current context
-		 */
-		sp->wait = NULL;
-		goto leave;
-	}
+	/*
+	 * At this point, the host_lock has been released and retaken
+	 * by the issuance of the mailbox command.
+	 * Wait for the command passed in by the mid-layer if it
+	 * was found by the driver.  It might have been returned
+	 * between eh recovery steps, hence the check of the "found"
+	 * variable.
+	 */
 
-	/* set up a timer just in case we're really jammed */
-	init_timer(&timer);
-	timer.expires = jiffies + 4*HZ;
-	timer.data = (unsigned long)cmd;
-	timer.function = qla1280_error_wait_timeout;
-	add_timer(&timer);
+	if (found >= 0)
+		result = _qla1280_wait_for_single_command(ha, sp, &wait);
 
-	/* wait for the action to complete (or the timer to expire) */
-	spin_unlock_irq(ha->host->host_lock);
-	wait_for_completion(&wait);
-	del_timer_sync(&timer);
-	spin_lock_irq(ha->host->host_lock);
-	sp->wait = NULL;
+	if (action == ABORT_COMMAND && result != SUCCESS) {
+		printk(KERN_WARNING
+		       "scsi(%li:%i:%i:%i): "
+		       "Unable to abort command!\n",
+		       ha->host_no, bus, target, lun);
+	}
 
-	/* the only action we might get a fail for is abort */
-	if (action == ABORT_COMMAND) {
-		if(sp->flags & SRB_ABORTED)
-			result = SUCCESS;
-		else
-			result = FAILED;
+	/*
+	 * If the command passed in by the mid-layer has been
+	 * returned by the board, then wait for any additional
+	 * commands which are supposed to complete based upon
+	 * the error action.
+	 *
+	 * All commands are unconditionally returned during a
+	 * call to qla1280_abort_isp(), ADAPTER_RESET.  No need
+	 * to wait for them.
+	 */
+	if (result == SUCCESS && wait_for_bus >= 0) {
+		result = qla1280_wait_for_pending_commands(ha,
+					wait_for_bus, wait_for_target);
 	}
 
- leave:
 	dprintk(1, "RESET returning %d\n", result);
 
 	LEAVE("qla1280_error_action");
@@ -1258,7 +1280,8 @@ qla1280_done(struct scsi_qla_host *ha)
 		switch ((CMD_RESULT(cmd) >> 16)) {
 		case DID_RESET:
 			/* Issue marker command. */
-			qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
+			if (!ha->flags.abort_isp_active)
+				qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
 			break;
 		case DID_ABORT:
 			sp->flags &= ~SRB_ABORT_PENDING;
@@ -1272,12 +1295,11 @@ qla1280_done(struct scsi_qla_host *ha)
 		scsi_dma_unmap(cmd);
 
 		/* Call the mid-level driver interrupt handler */
-		CMD_HANDLE(sp->cmd) = (unsigned char *)INVALID_HANDLE;
 		ha->actthreads--;
 
-		(*(cmd)->scsi_done)(cmd);
-
-		if(sp->wait != NULL)
+		if (sp->wait == NULL)
+			(*(cmd)->scsi_done)(cmd);
+		else
 			complete(sp->wait);
 	}
 	LEAVE("qla1280_done");
@@ -3415,6 +3437,7 @@ qla1280_isr(struct scsi_qla_host *ha, struct list_head *done_q)
 
 					/* Save ISP completion status */
 					CMD_RESULT(sp->cmd) = 0;
+					CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
 
 					/* Place block on done queue */
 					list_add_tail(&sp->list, done_q);
@@ -3681,6 +3704,8 @@ qla1280_status_entry(struct scsi_qla_host *ha, struct response *pkt,
 		}
 	}
 
+	CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
+
 	/* Place command on done queue. */
 	list_add_tail(&sp->list, done_q);
  out:
@@ -3736,6 +3761,8 @@ qla1280_error_entry(struct scsi_qla_host *ha, struct response *pkt,
 			CMD_RESULT(sp->cmd) = DID_ERROR << 16;
 		}
 
+		CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
+
 		/* Place command on done queue. */
 		list_add_tail(&sp->list, done_q);
 	}
@@ -3786,19 +3813,16 @@ qla1280_abort_isp(struct scsi_qla_host *ha)
 		struct scsi_cmnd *cmd;
 		sp = ha->outstanding_cmds[cnt];
 		if (sp) {
-
 			cmd = sp->cmd;
 			CMD_RESULT(cmd) = DID_RESET << 16;
-
-			sp->cmd = NULL;
+			CMD_HANDLE(cmd) = COMPLETED_HANDLE;
 			ha->outstanding_cmds[cnt] = NULL;
-
-			(*cmd->scsi_done)(cmd);
-
-			sp->flags = 0;
+			list_add_tail(&sp->list, &ha->done_q);
 		}
 	}
 
+	qla1280_done(ha);
+
 	status = qla1280_load_firmware(ha);
 	if (status)
 		goto out;
diff --git a/drivers/scsi/qla1280.h b/drivers/scsi/qla1280.h
index d7c44b8..834884b 100644
--- a/drivers/scsi/qla1280.h
+++ b/drivers/scsi/qla1280.h
@@ -88,7 +88,8 @@
 
 /* Maximum outstanding commands in ISP queues */
 #define MAX_OUTSTANDING_COMMANDS	512
-#define INVALID_HANDLE			(MAX_OUTSTANDING_COMMANDS + 2)
+#define COMPLETED_HANDLE		((unsigned char *) \
+					(MAX_OUTSTANDING_COMMANDS + 2))
 
 /* ISP request and response entry counts (37-65535) */
 #define REQUEST_ENTRY_CNT		255 /* Number of request entries. */
-- 
cgit v0.10.2


From 410604d25faddb1b4f0f9667b7452c06cc06cea1 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <Aaro.Koskinen@nokia.com>
Date: Tue, 14 Apr 2009 15:46:59 -0500
Subject: [SCSI] sym53c8xx_2: lun to_clear flag not re-initialized (2.6.27.5)

(Resent with proper formatting)

Fix for the sym53c8xx_2 driver to initialize lun's to_clear flag after
a bus reset (a failed clear can trigger a bus reset and it should not
be attemped again after that).

Signed-off-by: Aaro Koskinen <Aaro.Koskinen@nokia.com>
Tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index ffa70d1..60d6a6d 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -1896,6 +1896,15 @@ void sym_start_up(struct Scsi_Host *shost, int reason)
 		tp->head.sval = 0;
 		tp->head.wval = np->rv_scntl3;
 		tp->head.uval = 0;
+		if (tp->lun0p)
+			tp->lun0p->to_clear = 0;
+		if (tp->lunmp) {
+			int ln;
+
+			for (ln = 1; ln < SYM_CONF_MAX_LUN; ln++)
+				if (tp->lunmp[ln])
+					tp->lunmp[ln]->to_clear = 0;
+		}
 	}
 
 	/*
-- 
cgit v0.10.2


From fa8584566cc9cdaf067dbc12132792887a521da9 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <Aaro.Koskinen@nokia.com>
Date: Tue, 14 Apr 2009 15:47:00 -0500
Subject: [SCSI] sym53c8xx_2: slave_alloc/destroy safety (2.6.27.5)

Make the sym53c8xx_2 driver slave_alloc/destroy less unsafe. References
to the destroyed LCB are cleared from the target structure (instead of
leaving a dangling pointer), and when the last LCB for the target is
destroyed the reference to the upper layer target data is cleared. The
host lock is used to prevent a race with the interrupt handler. Also
user commands are prevented for targets with all LCBs destroyed.

Signed-off-by: Aaro Koskinen <Aaro.Koskinen@nokia.com>
Tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 583966e..45374d6 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -737,11 +737,14 @@ static int sym53c8xx_slave_alloc(struct scsi_device *sdev)
 	struct sym_hcb *np = sym_get_hcb(sdev->host);
 	struct sym_tcb *tp = &np->target[sdev->id];
 	struct sym_lcb *lp;
+	unsigned long flags;
+	int error;
 
 	if (sdev->id >= SYM_CONF_MAX_TARGET || sdev->lun >= SYM_CONF_MAX_LUN)
 		return -ENXIO;
 
-	tp->starget = sdev->sdev_target;
+	spin_lock_irqsave(np->s.host->host_lock, flags);
+
 	/*
 	 * Fail the device init if the device is flagged NOSCAN at BOOT in
 	 * the NVRAM.  This may speed up boot and maintain coherency with
@@ -753,26 +756,37 @@ static int sym53c8xx_slave_alloc(struct scsi_device *sdev)
 
 	if (tp->usrflags & SYM_SCAN_BOOT_DISABLED) {
 		tp->usrflags &= ~SYM_SCAN_BOOT_DISABLED;
-		starget_printk(KERN_INFO, tp->starget,
+		starget_printk(KERN_INFO, sdev->sdev_target,
 				"Scan at boot disabled in NVRAM\n");
-		return -ENXIO;
+		error = -ENXIO;
+		goto out;
 	}
 
 	if (tp->usrflags & SYM_SCAN_LUNS_DISABLED) {
-		if (sdev->lun != 0)
-			return -ENXIO;
-		starget_printk(KERN_INFO, tp->starget,
+		if (sdev->lun != 0) {
+			error = -ENXIO;
+			goto out;
+		}
+		starget_printk(KERN_INFO, sdev->sdev_target,
 				"Multiple LUNs disabled in NVRAM\n");
 	}
 
 	lp = sym_alloc_lcb(np, sdev->id, sdev->lun);
-	if (!lp)
-		return -ENOMEM;
+	if (!lp) {
+		error = -ENOMEM;
+		goto out;
+	}
+	if (tp->nlcb == 1)
+		tp->starget = sdev->sdev_target;
 
 	spi_min_period(tp->starget) = tp->usr_period;
 	spi_max_width(tp->starget) = tp->usr_width;
 
-	return 0;
+	error = 0;
+out:
+	spin_unlock_irqrestore(np->s.host->host_lock, flags);
+
+	return error;
 }
 
 /*
@@ -819,12 +833,34 @@ static int sym53c8xx_slave_configure(struct scsi_device *sdev)
 static void sym53c8xx_slave_destroy(struct scsi_device *sdev)
 {
 	struct sym_hcb *np = sym_get_hcb(sdev->host);
-	struct sym_lcb *lp = sym_lp(&np->target[sdev->id], sdev->lun);
+	struct sym_tcb *tp = &np->target[sdev->id];
+	struct sym_lcb *lp = sym_lp(tp, sdev->lun);
+	unsigned long flags;
+
+	spin_lock_irqsave(np->s.host->host_lock, flags);
+
+	if (lp->busy_itlq || lp->busy_itl) {
+		/*
+		 * This really shouldn't happen, but we can't return an error
+		 * so let's try to stop all on-going I/O.
+		 */
+		starget_printk(KERN_WARNING, tp->starget,
+			       "Removing busy LCB (%d)\n", sdev->lun);
+		sym_reset_scsi_bus(np, 1);
+	}
 
-	if (lp->itlq_tbl)
-		sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK * 4, "ITLQ_TBL");
-	kfree(lp->cb_tags);
-	sym_mfree_dma(lp, sizeof(*lp), "LCB");
+	if (sym_free_lcb(np, sdev->id, sdev->lun) == 0) {
+		/*
+		 * It was the last unit for this target.
+		 */
+		tp->head.sval        = 0;
+		tp->head.wval        = np->rv_scntl3;
+		tp->head.uval        = 0;
+		tp->tgoal.check_nego = 1;
+		tp->starget	     = NULL;
+	}
+
+	spin_unlock_irqrestore(np->s.host->host_lock, flags);
 }
 
 /*
@@ -890,6 +926,8 @@ static void sym_exec_user_command (struct sym_hcb *np, struct sym_usrcmd *uc)
 			if (!((uc->target >> t) & 1))
 				continue;
 			tp = &np->target[t];
+			if (!tp->nlcb)
+				continue;
 
 			switch (uc->cmd) {
 
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 60d6a6d..69ad494 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -4997,7 +4997,7 @@ struct sym_lcb *sym_alloc_lcb (struct sym_hcb *np, u_char tn, u_char ln)
 	 */
 	if (ln && !tp->lunmp) {
 		tp->lunmp = kcalloc(SYM_CONF_MAX_LUN, sizeof(struct sym_lcb *),
-				GFP_KERNEL);
+				GFP_ATOMIC);
 		if (!tp->lunmp)
 			goto fail;
 	}
@@ -5017,6 +5017,7 @@ struct sym_lcb *sym_alloc_lcb (struct sym_hcb *np, u_char tn, u_char ln)
 		tp->lun0p = lp;
 		tp->head.lun0_sa = cpu_to_scr(vtobus(lp));
 	}
+	tp->nlcb++;
 
 	/*
 	 *  Let the itl task point to error handling.
@@ -5094,6 +5095,43 @@ fail:
 }
 
 /*
+ *  Lun control block deallocation. Returns the number of valid remaing LCBs
+ *  for the target.
+ */
+int sym_free_lcb(struct sym_hcb *np, u_char tn, u_char ln)
+{
+	struct sym_tcb *tp = &np->target[tn];
+	struct sym_lcb *lp = sym_lp(tp, ln);
+
+	tp->nlcb--;
+
+	if (ln) {
+		if (!tp->nlcb) {
+			kfree(tp->lunmp);
+			sym_mfree_dma(tp->luntbl, 256, "LUNTBL");
+			tp->lunmp = NULL;
+			tp->luntbl = NULL;
+			tp->head.luntbl_sa = cpu_to_scr(vtobus(np->badluntbl));
+		} else {
+			tp->luntbl[ln] = cpu_to_scr(vtobus(&np->badlun_sa));
+			tp->lunmp[ln] = NULL;
+		}
+	} else {
+		tp->lun0p = NULL;
+		tp->head.lun0_sa = cpu_to_scr(vtobus(&np->badlun_sa));
+	}
+
+	if (lp->itlq_tbl) {
+		sym_mfree_dma(lp->itlq_tbl, SYM_CONF_MAX_TASK*4, "ITLQ_TBL");
+		kfree(lp->cb_tags);
+	}
+
+	sym_mfree_dma(lp, sizeof(*lp), "LCB");
+
+	return tp->nlcb;
+}
+
+/*
  *  Queue a SCSI IO to the controller.
  */
 int sym_queue_scsiio(struct sym_hcb *np, struct scsi_cmnd *cmd, struct sym_ccb *cp)
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.h b/drivers/scsi/sym53c8xx_2/sym_hipd.h
index 9ebc870..053e63c 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.h
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.h
@@ -401,6 +401,7 @@ struct sym_tcb {
 	 *  An array of bus addresses is used on reselection.
 	 */
 	u32	*luntbl;	/* LCBs bus address table	*/
+	int	nlcb;		/* Number of valid LCBs (including LUN #0) */
 
 	/*
 	 *  LUN table used by the C code.
@@ -1065,6 +1066,7 @@ int sym_clear_tasks(struct sym_hcb *np, int cam_status, int target, int lun, int
 struct sym_ccb *sym_get_ccb(struct sym_hcb *np, struct scsi_cmnd *cmd, u_char tag_order);
 void sym_free_ccb(struct sym_hcb *np, struct sym_ccb *cp);
 struct sym_lcb *sym_alloc_lcb(struct sym_hcb *np, u_char tn, u_char ln);
+int sym_free_lcb(struct sym_hcb *np, u_char tn, u_char ln);
 int sym_queue_scsiio(struct sym_hcb *np, struct scsi_cmnd *csio, struct sym_ccb *cp);
 int sym_abort_scsiio(struct sym_hcb *np, struct scsi_cmnd *ccb, int timed_out);
 int sym_reset_scsi_target(struct sym_hcb *np, int target);
-- 
cgit v0.10.2


From 8f03226358972f93cd45be0a710927cbb7fd5127 Mon Sep 17 00:00:00 2001
From: "Chauhan, Vijay" <Vijay.Chauhan@lsi.com>
Date: Mon, 20 Apr 2009 18:14:23 +0530
Subject: [SCSI] scsi_dh_rdac: Retry for NOT_READY(02/04/01) in rdac device
 handler

During device discovery read capacity fails with 0x020401 and sets the
device size to 0. As a reason any I/O submitted to this path gets
killed at sd_prep_fn with BLKPREP_KILL. This patch is to retry for
0x020401. NEED_RETRY in scsi_decide_disposition does not give
sufficient time for the device to become ready.

Signed-off-by: Vijay Chauhan <vijay.chauhan@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 43b8c51..fd0544f 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -561,6 +561,12 @@ static int rdac_check_sense(struct scsi_device *sdev,
 	struct rdac_dh_data *h = get_rdac_data(sdev);
 	switch (sense_hdr->sense_key) {
 	case NOT_READY:
+		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
+			/* LUN Not Ready - Logical Unit Not Ready and is in
+			* the process of becoming ready
+			* Just retry.
+			*/
+			return ADD_TO_MLQUEUE;
 		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x81)
 			/* LUN Not Ready - Storage firmware incompatible
 			 * Manual code synchonisation required.
-- 
cgit v0.10.2


From f381642d8f8963e62f5d45774505fd936f2b6072 Mon Sep 17 00:00:00 2001
From: "Kleber S. Souza" <klebers@linux.vnet.ibm.com>
Date: Wed, 22 Apr 2009 10:50:28 -0300
Subject: [SCSI] ipr: ipr_remove() marked __devexit

Marking the ipr clean up function ipr_remove() as __devexit and using
__devexit_p() macro in its address reference.

Signed-off-by: Kleber Sacilotto de Souza <kleber@linux.vnet.ibm.com>
Reported-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Acked-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index dd689de..764cfcc 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7688,7 +7688,7 @@ static void __ipr_remove(struct pci_dev *pdev)
  * Return value:
  * 	none
  **/
-static void ipr_remove(struct pci_dev *pdev)
+static void __devexit ipr_remove(struct pci_dev *pdev)
 {
 	struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
 
@@ -7864,7 +7864,7 @@ static struct pci_driver ipr_driver = {
 	.name = IPR_NAME,
 	.id_table = ipr_pci_table,
 	.probe = ipr_probe,
-	.remove = ipr_remove,
+	.remove = __devexit_p(ipr_remove),
 	.shutdown = ipr_shutdown,
 	.err_handler = &ipr_err_handler,
 };
-- 
cgit v0.10.2


From 73da9c13d4df3c1715029aa45edc78d71b617dfd Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 22 Apr 2009 17:42:25 -0700
Subject: [SCSI] scsi_debug: fix virtual disk larger than 1TB

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 213123b..41a2177 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -887,7 +887,7 @@ static int resp_start_stop(struct scsi_cmnd * scp,
 static sector_t get_sdebug_capacity(void)
 {
 	if (scsi_debug_virtual_gb > 0)
-		return 2048 * 1024 * scsi_debug_virtual_gb;
+		return 2048 * 1024 * (sector_t)scsi_debug_virtual_gb;
 	else
 		return sdebug_store_sectors;
 }
-- 
cgit v0.10.2


From b0d428adebe9f1232c72bf4c686a6f0eed047cc2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 27 Apr 2009 21:49:31 -0700
Subject: [SCSI] fcoe, libfc: fix function declarations to be ANSI-compliant

Fix function declarations:

drivers/scsi/fcoe/fcoe.c:1356:28: warning: non-ANSI function declaration of function 'fcoe_dev_setup'
drivers/scsi/libfc/fc_rport.c:1293:20: warning: non-ANSI function declaration of function 'fc_setup_rport'
drivers/scsi/libfc/fc_rport.c:1302:23: warning: non-ANSI function declaration of function 'fc_destroy_rport'

[jejb: fixed wrong doc in comment noticed during inspection]
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 03e1926..d08121f 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1350,13 +1350,13 @@ out:
 /**
  * fcoe_dev_setup() - setup link change notification interface
  */
-static void fcoe_dev_setup()
+static void fcoe_dev_setup(void)
 {
 	register_netdevice_notifier(&fcoe_notifier);
 }
 
 /**
- * fcoe_dev_setup() - cleanup link change notification interface
+ * fcoe_dev_cleanup() - cleanup link change notification interface
  */
 static void fcoe_dev_cleanup(void)
 {
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 747d73c..3f5094e 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -1330,7 +1330,7 @@ int fc_rport_init(struct fc_lport *lport)
 }
 EXPORT_SYMBOL(fc_rport_init);
 
-int fc_setup_rport()
+int fc_setup_rport(void)
 {
 	rport_event_queue = create_singlethread_workqueue("fc_rport_eq");
 	if (!rport_event_queue)
@@ -1339,7 +1339,7 @@ int fc_setup_rport()
 }
 EXPORT_SYMBOL(fc_setup_rport);
 
-void fc_destroy_rport()
+void fc_destroy_rport(void)
 {
 	destroy_workqueue(rport_event_queue);
 }
-- 
cgit v0.10.2


From 03fbdb15c14e9746c63168e3ff2c64b9c8336d33 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Wed, 20 May 2009 22:39:08 +0100
Subject: [ARM] 5519/1: amba probe: pass "struct amba_id *" instead of void *

The second argument of the probe method points to the amba_id
structure, so it's better passed with the correct type. None of the
current in-tree drivers uses the pointer, so they have only been
checked for a clean compile.

Change suggested by Russell King.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c
index e29cdc1..a28c06d 100644
--- a/drivers/input/serio/ambakmi.c
+++ b/drivers/input/serio/ambakmi.c
@@ -107,7 +107,7 @@ static void amba_kmi_close(struct serio *io)
 	clk_disable(kmi->clk);
 }
 
-static int amba_kmi_probe(struct amba_device *dev, void *id)
+static int amba_kmi_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct amba_kmi_port *kmi;
 	struct serio *io;
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 36875dc..7d4febd 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -490,7 +490,7 @@ static void mmci_check_status(unsigned long data)
 	mod_timer(&host->timer, jiffies + HZ);
 }
 
-static int __devinit mmci_probe(struct amba_device *dev, void *id)
+static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct mmc_platform_data *plat = dev->dev.platform_data;
 	struct mmci_host *host;
diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c
index 8261535..aaf1f75 100644
--- a/drivers/rtc/rtc-pl030.c
+++ b/drivers/rtc/rtc-pl030.c
@@ -102,7 +102,7 @@ static const struct rtc_class_ops pl030_ops = {
 	.set_alarm	= pl030_set_alarm,
 };
 
-static int pl030_probe(struct amba_device *dev, void *id)
+static int pl030_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct pl030_rtc *rtc;
 	int ret;
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 333eec6..451fc13 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -127,7 +127,7 @@ static int pl031_remove(struct amba_device *adev)
 	return 0;
 }
 
-static int pl031_probe(struct amba_device *adev, void *id)
+static int pl031_probe(struct amba_device *adev, struct amba_id *id)
 {
 	int ret;
 	struct pl031_local *ldata;
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index e3a5ad5..cdc049d 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -665,7 +665,7 @@ static struct uart_driver amba_reg = {
 	.cons			= AMBA_CONSOLE,
 };
 
-static int pl010_probe(struct amba_device *dev, void *id)
+static int pl010_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct uart_amba_port *uap;
 	void __iomem *base;
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 8b2b970..88fdac5 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -729,7 +729,7 @@ static struct uart_driver amba_reg = {
 	.cons			= AMBA_CONSOLE,
 };
 
-static int pl011_probe(struct amba_device *dev, void *id)
+static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct uart_amba_port *uap;
 	void __iomem *base;
diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index 61050ab..d1f80ba 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -437,7 +437,7 @@ static int clcdfb_register(struct clcd_fb *fb)
 	return ret;
 }
 
-static int clcdfb_probe(struct amba_device *dev, void *id)
+static int clcdfb_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct clcd_board *board = dev->dev.platform_data;
 	struct clcd_fb *fb;
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 51e6e54..9b93caf 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -28,7 +28,7 @@ struct amba_id {
 
 struct amba_driver {
 	struct device_driver	drv;
-	int			(*probe)(struct amba_device *, void *);
+	int			(*probe)(struct amba_device *, struct amba_id *);
 	int			(*remove)(struct amba_device *);
 	void			(*shutdown)(struct amba_device *);
 	int			(*suspend)(struct amba_device *, pm_message_t);
diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c
index 7fbd68f..5c48e36 100644
--- a/sound/arm/aaci.c
+++ b/sound/arm/aaci.c
@@ -1074,7 +1074,7 @@ static unsigned int __devinit aaci_size_fifo(struct aaci *aaci)
 	return i;
 }
 
-static int __devinit aaci_probe(struct amba_device *dev, void *id)
+static int __devinit aaci_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct aaci *aaci;
 	int ret, i;
-- 
cgit v0.10.2


From 385aa9e7012d35b017981e67b3464aef4e1e7108 Mon Sep 17 00:00:00 2001
From: Thomas Reitmayr <treitmayr@devbase.at>
Date: Tue, 19 May 2009 19:35:26 +0200
Subject: [ARM] Kirkwood: Correct MPP for SATA activity/presence LEDs of QNAP
 TS-119/TS-219.

For the QNAP TS-119 and TS-219 the wrong MPPs were used for the SATA
activity/presence LEDs. The new settings make these LEDs work as
expected.

Signed-off-by: Thomas Reitmayr <treitmayr@devbase.at>
Tested-by: Martin Michlmayr <tbm@cyrius.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/ts219-setup.c b/arch/arm/mach-kirkwood/ts219-setup.c
index dda5743..01aa213 100644
--- a/arch/arm/mach-kirkwood/ts219-setup.c
+++ b/arch/arm/mach-kirkwood/ts219-setup.c
@@ -142,6 +142,8 @@ static unsigned int qnap_ts219_mpp_config[] __initdata = {
 	MPP1_SPI_MOSI,
 	MPP2_SPI_SCK,
 	MPP3_SPI_MISO,
+	MPP4_SATA1_ACTn,
+	MPP5_SATA0_ACTn,
 	MPP8_TW_SDA,
 	MPP9_TW_SCK,
 	MPP10_UART0_TXD,
@@ -150,10 +152,6 @@ static unsigned int qnap_ts219_mpp_config[] __initdata = {
 	MPP14_UART1_RXD,	/* PIC controller */
 	MPP15_GPIO,		/* USB Copy button */
 	MPP16_GPIO,		/* Reset button */
-	MPP20_SATA1_ACTn,
-	MPP21_SATA0_ACTn,
-	MPP22_SATA1_PRESENTn,
-	MPP23_SATA0_PRESENTn,
 	0
 };
 
-- 
cgit v0.10.2


From 60a0b8f93664621a07b93273fc8ebc29590c62f5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 21 May 2009 12:23:12 +0100
Subject: GFS2: Add a rgrp bitmap full flag

During block allocation, it is useful to know if sections of disk
are full on a finer grained basis than a single resource group.
This can make a performance difference when resource groups have
larger numbers of bitmap blocks, since we no longer have to search
them all block by block in each individual bitmap.

The full flag is set on a per-bitmap basis when it has been
searched and found to have no free space. It is then skipped in
subsequent searches until the flag is reset. The resetting
occurs if we have to drop the glock on the resource group for any
reason, or if we deallocate some blocks within that resource
group and thus free up some space.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index de50d86..dd87379 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -64,9 +64,12 @@ struct gfs2_log_element {
 	const struct gfs2_log_operations *le_ops;
 };
 
+#define GBF_FULL 1
+
 struct gfs2_bitmap {
 	struct buffer_head *bi_bh;
 	char *bi_clone;
+	unsigned long bi_flags;
 	u32 bi_offset;
 	u32 bi_start;
 	u32 bi_len;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fbacf09..23637b9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -442,6 +442,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 	for (x = 0; x < length; x++) {
 		bi = rgd->rd_bits + x;
 
+		bi->bi_flags = 0;
 		/* small rgrp; bitmap stored completely in header block */
 		if (length == 1) {
 			bytes = bytes_left;
@@ -769,6 +770,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 	}
 
 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
+		for (x = 0; x < length; x++)
+			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
 		rgd->rd_flags |= GFS2_RDF_UPTODATE;
 	}
@@ -897,6 +900,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 			continue;
 		if (sdp->sd_args.ar_discard)
 			gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
+		clear_bit(GBF_FULL, &bi->bi_flags);
 		memcpy(bi->bi_clone + bi->bi_offset,
 		       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
 	}
@@ -1309,30 +1313,37 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 {
 	struct gfs2_bitmap *bi = NULL;
 	const u32 length = rgd->rd_length;
-	u32 blk = 0;
+	u32 blk = BFITNOENT;
 	unsigned int buf, x;
 	const unsigned int elen = *n;
-	const u8 *buffer;
+	const u8 *buffer = NULL;
 
 	*n = 0;
 	/* Find bitmap block that contains bits for goal block */
 	for (buf = 0; buf < length; buf++) {
 		bi = rgd->rd_bits + buf;
-		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
-			break;
+		/* Convert scope of "goal" from rgrp-wide to within found bit block */
+		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+			goal -= bi->bi_start * GFS2_NBBY;
+			goto do_search;
+		}
 	}
+	buf = 0;
+	goal = 0;
 
-	gfs2_assert(rgd->rd_sbd, buf < length);
-
-	/* Convert scope of "goal" from rgrp-wide to within found bit block */
-	goal -= bi->bi_start * GFS2_NBBY;
-
+do_search:
 	/* Search (up to entire) bitmap in this rgrp for allocatable block.
 	   "x <= length", instead of "x < length", because we typically start
 	   the search in the middle of a bit block, but if we can't find an
 	   allocatable block anywhere else, we want to be able wrap around and
 	   search in the first part of our first-searched bit block.  */
 	for (x = 0; x <= length; x++) {
+		bi = rgd->rd_bits + buf;
+
+		if (test_bit(GBF_FULL, &bi->bi_flags) &&
+		    (old_state == GFS2_BLKST_FREE))
+			goto skip;
+
 		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
 		   bitmaps, so we must search the originals for that. */
 		buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1343,33 +1354,39 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 		if (blk != BFITNOENT)
 			break;
 
+		if ((goal == 0) && (old_state == GFS2_BLKST_FREE))
+			set_bit(GBF_FULL, &bi->bi_flags);
+
 		/* Try next bitmap block (wrap back to rgrp header if at end) */
-		buf = (buf + 1) % length;
-		bi = rgd->rd_bits + buf;
+skip:
+		buf++;
+		buf %= length;
 		goal = 0;
 	}
 
-	if (blk != BFITNOENT && old_state != new_state) {
-		*n = 1;
-		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+	if (blk == BFITNOENT)
+		return blk;
+	*n = 1;
+	if (old_state == new_state)
+		goto out;
+
+	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+	gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
+		    bi->bi_len, blk, new_state);
+	goal = blk;
+	while (*n < elen) {
+		goal++;
+		if (goal >= (bi->bi_len * GFS2_NBBY))
+			break;
+		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
+		    GFS2_BLKST_FREE)
+			break;
 		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-			    bi->bi_len, blk, new_state);
-		goal = blk;
-		while (*n < elen) {
-			goal++;
-			if (goal >= (bi->bi_len * GFS2_NBBY))
-				break;
-			if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
-			    GFS2_BLKST_FREE)
-				break;
-			gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone,
-				    bi->bi_offset, bi->bi_len, goal,
-				    new_state);
-			(*n)++;
-		}
+			    bi->bi_len, goal, new_state);
+		(*n)++;
 	}
-
-	return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk;
+out:
+	return (bi->bi_start * GFS2_NBBY) + blk;
 }
 
 /**
-- 
cgit v0.10.2


From 873a2e89c5e32a5c72a9ece76fcb871358ae22d2 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Mon, 11 May 2009 15:37:26 +0200
Subject: microblaze: Remove POWERPC reference from Microblaze gpio.h

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/gpio.h b/arch/microblaze/include/asm/gpio.h
index ea04632..2345ac3 100644
--- a/arch/microblaze/include/asm/gpio.h
+++ b/arch/microblaze/include/asm/gpio.h
@@ -11,8 +11,8 @@
  * (at your option) any later version.
  */
 
-#ifndef __ASM_POWERPC_GPIO_H
-#define __ASM_POWERPC_GPIO_H
+#ifndef _ASM_MICROBLAZE_GPIO_H
+#define _ASM_MICROBLAZE_GPIO_H
 
 #include <linux/errno.h>
 #include <asm-generic/gpio.h>
@@ -53,4 +53,4 @@ static inline int irq_to_gpio(unsigned int irq)
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif /* __ASM_POWERPC_GPIO_H */
+#endif /* _ASM_MICROBLAZE_GPIO_H */
-- 
cgit v0.10.2


From 6fa612b56c575a5235568593eab4240c90608630 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Mon, 11 May 2009 15:49:12 +0200
Subject: microblaze: Kconfig: Enable drivers for Microblaze

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 8cc312b..e6af7dd 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration"
 config MICROBLAZE
 	def_bool y
 	select HAVE_LMB
+	select ARCH_WANT_OPTIONAL_GPIOLIB
 
 config SWAP
 	def_bool n
@@ -49,6 +50,9 @@ config GENERIC_CLOCKEVENTS
 config GENERIC_HARDIRQS_NO__DO_IRQ
 	def_bool y
 
+config GENERIC_GPIO
+	def_bool y
+
 config PCI
 	depends on !MMU
 	def_bool n
@@ -105,9 +109,6 @@ config CMDLINE_FORCE
 config OF
 	def_bool y
 
-config OF_DEVICE
-	def_bool y
-
 config PROC_DEVICETREE
 	bool "Support for device tree in /proc"
 	depends on PROC_FS
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index ddea8e4..9f1665f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -438,7 +438,7 @@ source "drivers/s390/block/Kconfig"
 
 config XILINX_SYSACE
 	tristate "Xilinx SystemACE support"
-	depends on 4xx
+	depends on 4xx || MICROBLAZE
 	help
 	  Include support for the Xilinx SystemACE CompactFlash interface
 
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 735bbe2..bb1a071 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -893,7 +893,7 @@ config DTLK
 
 config XILINX_HWICAP
 	tristate "Xilinx HWICAP Support"
-	depends on XILINX_VIRTEX
+	depends on XILINX_VIRTEX || MICROBLAZE
 	help
 	  This option enables support for Xilinx Internal Configuration
 	  Access Port (ICAP) driver.  The ICAP is used on Xilinx Virtex
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index edb0253..11f3739 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -69,7 +69,7 @@ comment "Memory mapped GPIO expanders:"
 
 config GPIO_XILINX
 	bool "Xilinx GPIO support"
-	depends on PPC_OF
+	depends on PPC_OF || MICROBLAZE
 	help
 	  Say yes here to support the Xilinx FPGA GPIO device
 
diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index da3c3a5..c4b3fbd 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -192,7 +192,7 @@ config SERIO_RAW
 
 config SERIO_XILINX_XPS_PS2
 	tristate "Xilinx XPS PS/2 Controller Support"
-	depends on PPC
+	depends on PPC || MICROBLAZE
 	help
 	  This driver supports XPS PS/2 IP from the Xilinx EDK on
 	  PowerPC platform.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index f821dbc..27f3b81 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -1,21 +1,21 @@
 config OF_DEVICE
 	def_bool y
-	depends on OF && (SPARC || PPC_OF)
+	depends on OF && (SPARC || PPC_OF || MICROBLAZE)
 
 config OF_GPIO
 	def_bool y
-	depends on OF && PPC_OF && GPIOLIB
+	depends on OF && (PPC_OF || MICROBLAZE) && GPIOLIB
 	help
 	  OpenFirmware GPIO accessors
 
 config OF_I2C
 	def_tristate I2C
-	depends on PPC_OF && I2C
+	depends on (PPC_OF || MICROBLAZE) && I2C
 	help
 	  OpenFirmware I2C accessors
 
 config OF_SPI
 	def_tristate SPI
-	depends on OF && PPC_OF && SPI
+	depends on OF && (PPC_OF || MICROBLAZE) && SPI
 	help
 	  OpenFirmware SPI accessors
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 83a185d..9574947 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -212,7 +212,7 @@ config SPI_TXX9
 
 config SPI_XILINX
 	tristate "Xilinx SPI controller"
-	depends on XILINX_VIRTEX && EXPERIMENTAL
+	depends on (XILINX_VIRTEX || MICROBLAZE) && EXPERIMENTAL
 	select SPI_BITBANG
 	help
 	  This exposes the SPI controller IP from the Xilinx EDK.
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index c6c816b..5eee3f8 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -22,6 +22,7 @@ config USB_ARCH_HAS_HCD
 	default y if PCMCIA && !M32R			# sl811_cs
 	default y if ARM				# SL-811
 	default y if SUPERH				# r8a66597-hcd
+	default y if MICROBLAZE
 	default PCI
 
 # many non-PCI SOC chips embed OHCI
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0048f11..74712cb 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1996,7 +1996,7 @@ config FB_PS3_DEFAULT_SIZE_M
 
 config FB_XILINX
 	tristate "Xilinx frame buffer support"
-	depends on FB && XILINX_VIRTEX
+	depends on FB && (XILINX_VIRTEX || MICROBLAZE)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-- 
cgit v0.10.2


From 5af7fa68103e7b2efb0fd9d901b1c25bad96fd21 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 1 May 2009 21:48:15 +0000
Subject: microblaze: export some symbols

Some device drivers require the symbols _ebss, kernel_thread,
__page_offset or ___range_ok, so export them.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c
index 5f71790..59ff20e 100644
--- a/arch/microblaze/kernel/microblaze_ksyms.c
+++ b/arch/microblaze/kernel/microblaze_ksyms.c
@@ -45,3 +45,5 @@ extern void __udivsi3(void);
 EXPORT_SYMBOL(__udivsi3);
 extern void __umodsi3(void);
 EXPORT_SYMBOL(__umodsi3);
+extern char *_ebss;
+EXPORT_SYMBOL_GPL(_ebss);
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 07d4fa3..685ad71 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -173,6 +173,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
 			&regs, 0, NULL, NULL);
 }
+EXPORT_SYMBOL_GPL(kernel_thread);
 
 unsigned long get_wchan(struct task_struct *p)
 {
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index b0c8213..31ec053 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -24,7 +24,7 @@
 #include <asm/tlb.h>
 
 unsigned int __page_offset;
-/* EXPORT_SYMBOL(__page_offset); */
+EXPORT_SYMBOL(__page_offset);
 
 char *klimit = _end;
 
@@ -199,3 +199,4 @@ int ___range_ok(unsigned long addr, unsigned long size)
 	return ((addr < memory_start) ||
 		((addr + size) > memory_end));
 }
+EXPORT_SYMBOL(___range_ok);
-- 
cgit v0.10.2


From 353b431bafc42ed8782c7aec7fb819ca4e385ab1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 1 May 2009 13:37:46 +0000
Subject: microblaze: fix __user annotations

The microblaze signal handling code gets some __user
pointers wrong, as shown by sparse.
This adds the annotations where appropriate and
change sys_rt_sigreturn to correctly pass a user
stack down to do_sigaltstack instead of a kernel
structure.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 40d3693..804a074 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -152,8 +152,8 @@ struct rt_sigframe {
 	unsigned long tramp[2];	/* signal trampoline */
 };
 
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *rval_p)
+static int restore_sigcontext(struct pt_regs *regs,
+				struct sigcontext __user *sc, int *rval_p)
 {
 	unsigned int err = 0;
 
@@ -211,11 +211,10 @@ badframe:
 
 asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 {
-	struct rt_sigframe *frame =
-			(struct rt_sigframe *)(regs->r1 + STATE_SAVE_ARG_SPACE);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)(regs->r1 + STATE_SAVE_ARG_SPACE);
 
 	sigset_t set;
-	stack_t st;
 	int rval;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -233,11 +232,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval))
 		goto badframe;
 
-	if (__copy_from_user((void *)&st, &frame->uc.uc_stack, sizeof(st)))
-		goto badframe;
 	/* It is more difficult to avoid calling this function than to
 	 call it and ignore errors. */
-	do_sigaltstack(&st, NULL, regs->r1);
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->r1))
+		goto badframe;
 
 	return rval;
 
@@ -251,7 +249,7 @@ badframe:
  */
 
 static int
-setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 		unsigned long mask)
 {
 	int err = 0;
@@ -278,7 +276,7 @@ setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
 /*
  * Determine which stack to use..
  */
-static inline void *
+static inline void __user *
 get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 {
 	/* Default to using normal stack */
@@ -287,7 +285,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack(sp))
 		sp = current->sas_ss_sp + current->sas_ss_size;
 
-	return (void *)((sp - frame_size) & -8UL);
+	return (void __user *)((sp - frame_size) & -8UL);
 }
 
 static void setup_frame(int sig, struct k_sigaction *ka,
@@ -367,7 +365,7 @@ give_sigsegv:
 static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			sigset_t *set, struct pt_regs *regs)
 {
-	struct rt_sigframe *frame;
+	struct rt_sigframe __user *frame;
 	int err = 0;
 	int signal;
 
-- 
cgit v0.10.2


From 70f4cc29350222ff6baf70265f6482fc01565d48 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 10 May 2009 14:26:52 +0000
Subject: microblaze: kill incorrect __bad_xchg definition

The whole point of the __bad_xchg declaration in
system.h is to give a linker error when a variable
of invalid size is passed to __xchg. The out
of line definition in traps.c defeats this purpose
and does not any value, so remove it here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c
index 293ef48..6322cc1 100644
--- a/arch/microblaze/kernel/traps.c
+++ b/arch/microblaze/kernel/traps.c
@@ -22,14 +22,6 @@ void trap_init(void)
 	__enable_hw_exceptions();
 }
 
-void __bad_xchg(volatile void *ptr, int size)
-{
-	printk(KERN_INFO "xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
-		__builtin_return_address(0), ptr, size);
-	BUG();
-}
-EXPORT_SYMBOL(__bad_xchg);
-
 static int kstack_depth_to_print = 24;
 
 static int __init kstack_setup(char *s)
-- 
cgit v0.10.2


From 122eec2f023f25fdd491ee9eb8eface4ded70728 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 10 May 2009 14:32:11 +0000
Subject: microblaze: remove cacheable_memcpy

This function is neither declared nor used anywhere
outside of ppc32, so remove it from microblaze.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c
index 5880119..6a907c5 100644
--- a/arch/microblaze/lib/memcpy.c
+++ b/arch/microblaze/lib/memcpy.c
@@ -154,8 +154,3 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 }
 EXPORT_SYMBOL(memcpy);
 #endif /* __HAVE_ARCH_MEMCPY */
-
-void *cacheable_memcpy(void *d, const void *s, __kernel_size_t c)
-{
-	return memcpy(d, s, c);
-}
-- 
cgit v0.10.2


From 6b4374261a067e2e1b78602eb5e631a4e027dacf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 1 May 2009 21:36:44 +0000
Subject: microblaze: add security initcalls

The security subsystem has its own initcalls, which
need support in vmlinux.lds.S.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 840385e..910f7c09 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -132,6 +132,8 @@ SECTIONS {
 		__con_initcall_end = .;
 	}
 
+	SECURITY_INIT
+
 	__init_end_before_initramfs = .;
 
 	.init.ramfs ALIGN(4096) : {
-- 
cgit v0.10.2


From 0c60155e14eb00fa13836a710a2d2efb63d8861c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 1 May 2009 21:44:51 +0000
Subject: microblaze: add a dummy pgprot_noncached

Some device drivers call this, so add a macro
that pretends to do this. Since there is no
MMU support, it won't actually result in an
uncached mapping, though.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 4df31e4..254fd4b 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -27,6 +27,8 @@
 #define PAGE_READONLY		__pgprot(0) /* these mean nothing to non MMU */
 #define PAGE_KERNEL		__pgprot(0) /* these mean nothing to non MMU */
 
+#define pgprot_noncached(x)	(x)
+
 #define __swp_type(x)		(0)
 #define __swp_offset(x)		(0)
 #define __swp_entry(typ, off)	((swp_entry_t) { ((typ) | ((off) << 7)) })
-- 
cgit v0.10.2


From bb09791344d02c8caff596dc084b1542dcb70efe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 30 Apr 2009 22:38:58 +0000
Subject: microblaze: do not include types.h in ptrace.h

linux/types.h breaks the uclibc build, so don't
include it here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 55015bc..a917dc5 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -10,7 +10,6 @@
 #define _ASM_MICROBLAZE_PTRACE_H
 
 #ifndef __ASSEMBLY__
-#include <linux/types.h>
 
 typedef unsigned long microblaze_reg_t;
 
-- 
cgit v0.10.2


From 838d2406ee62595c1b40d1d03b48bc9a2102258b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 1 May 2009 13:36:13 +0000
Subject: microblaze: remove bad_user_access_length

This function was actually causing harm, by hiding
errors about invalid sized get_user/put_user accesses.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 5a3ffc3..a4e171d 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -34,10 +34,9 @@ extern int ___range_ok(unsigned long addr, unsigned long size);
 #define access_ok(type, addr, size) (__range_ok((addr), (size)) == 0)
 #define __access_ok(add, size) (__range_ok((addr), (size)) == 0)
 
-extern inline int bad_user_access_length(void)
-{
-	return 0;
-}
+/* Undefined function to trigger linker error */
+extern int bad_user_access_length(void);
+
 /* FIXME this is function for optimalization -> memcpy */
 #define __get_user(var, ptr)					\
 	({							\
-- 
cgit v0.10.2


From 732703af9c3478c3f935dd5ae80140b9b12bda09 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 31 Mar 2009 15:30:31 +0200
Subject: microblaze: clean up checksum.c

This changes the function prototypes in the checksum code
to have the usual prototypes, typically by turning int
arguments into __wsum.

Also change csum_partial_copy_from_user() to operate
on the right address space and export ip_fast_csum,
which is used in modular networking code.

The new version is now sparse-clean including endianess
checks.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/checksum.h b/arch/microblaze/include/asm/checksum.h
index 92b3076..97ea46b 100644
--- a/arch/microblaze/include/asm/checksum.h
+++ b/arch/microblaze/include/asm/checksum.h
@@ -51,7 +51,8 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
  * here even more important to align src and dst on a 32-bit (or even
  * better 64-bit) boundary
  */
-extern __wsum csum_partial_copy(const char *src, char *dst, int len, int sum);
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+								__wsum sum);
 
 /*
  * the same as csum_partial_copy, but copies from user space.
@@ -59,8 +60,8 @@ extern __wsum csum_partial_copy(const char *src, char *dst, int len, int sum);
  * here even more important to align src and dst on a 32-bit (or even
  * better 64-bit) boundary
  */
-extern __wsum csum_partial_copy_from_user(const char *src, char *dst,
-					int len, int sum, int *csum_err);
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					int len, __wsum sum, int *csum_err);
 
 #define csum_partial_copy_nocheck(src, dst, len, sum)	\
 	csum_partial_copy((src), (dst), (len), (sum))
@@ -75,11 +76,12 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 /*
  *	Fold a partial checksum
  */
-static inline __sum16 csum_fold(unsigned int sum)
+static inline __sum16 csum_fold(__wsum csum)
 {
+	u32 sum = (__force u32)csum;
 	sum = (sum & 0xffff) + (sum >> 16);
 	sum = (sum & 0xffff) + (sum >> 16);
-	return ~sum;
+	return (__force __sum16)~sum;
 }
 
 static inline __sum16
@@ -93,6 +95,6 @@ csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
  * this routine is used for miscellaneous IP-like checksums, mainly
  * in icmp.c
  */
-extern __sum16 ip_compute_csum(const unsigned char *buff, int len);
+extern __sum16 ip_compute_csum(const void *buff, int len);
 
 #endif /* _ASM_MICROBLAZE_CHECKSUM_H */
diff --git a/arch/microblaze/lib/checksum.c b/arch/microblaze/lib/checksum.c
index 8093400..f08e745 100644
--- a/arch/microblaze/lib/checksum.c
+++ b/arch/microblaze/lib/checksum.c
@@ -32,9 +32,10 @@
 /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
  kills, so most of the assembly has to go. */
 
-#include <net/checksum.h>
-#include <asm/checksum.h>
 #include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
 
 static inline unsigned short from32to16(unsigned long x)
 {
@@ -102,6 +103,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
 	return (__force __sum16)~do_csum(iph, ihl*4);
 }
+EXPORT_SYMBOL(ip_fast_csum);
 
 /*
  * computes the checksum of a memory block at buff, length len,
@@ -115,15 +117,16 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
  *
  * it's best to have buff aligned on a 32-bit boundary
  */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
 {
+	unsigned int sum = (__force unsigned int)wsum;
 	unsigned int result = do_csum(buff, len);
 
 	/* add in old sum, and carry.. */
 	result += sum;
 	if (sum > result)
 		result += 1;
-	return result;
+	return (__force __wsum)result;
 }
 EXPORT_SYMBOL(csum_partial);
 
@@ -131,9 +134,9 @@ EXPORT_SYMBOL(csum_partial);
  * this routine is used for miscellaneous IP-like checksums, mainly
  * in icmp.c
  */
-__sum16 ip_compute_csum(const unsigned char *buff, int len)
+__sum16 ip_compute_csum(const void *buff, int len)
 {
-	return ~do_csum(buff, len);
+	return (__force __sum16)~do_csum(buff, len);
 }
 EXPORT_SYMBOL(ip_compute_csum);
 
@@ -141,12 +144,18 @@ EXPORT_SYMBOL(ip_compute_csum);
  * copy from fs while checksumming, otherwise like csum_partial
  */
 __wsum
-csum_partial_copy_from_user(const char __user *src, char *dst, int len,
-						int sum, int *csum_err)
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+						__wsum sum, int *csum_err)
 {
-	if (csum_err)
+	int missing;
+
+	missing = __copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*csum_err = -EFAULT;
+	} else
 		*csum_err = 0;
-	memcpy(dst, src, len);
+
 	return csum_partial(dst, len, sum);
 }
 EXPORT_SYMBOL(csum_partial_copy_from_user);
@@ -155,7 +164,7 @@ EXPORT_SYMBOL(csum_partial_copy_from_user);
  * copy from ds while checksumming, otherwise like csum_partial
  */
 __wsum
-csum_partial_copy(const char *src, char *dst, int len, int sum)
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
 {
 	memcpy(dst, src, len);
 	return csum_partial(dst, len, sum);
-- 
cgit v0.10.2


From a6029d1c8cfc9ac2609195f31c2e70b584d3496e Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 12 May 2009 12:10:52 +0200
Subject: microblaze: prepare signal handling for generic unistd.h

We need to define set_restore_sigmask() in order to
get pselect and ppoll. Also, the setup_frame function
can not be used when __NR_sigreturn is not defined.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/syscalls.h b/arch/microblaze/include/asm/syscalls.h
index 9cb4ff0..ddea9eb 100644
--- a/arch/microblaze/include/asm/syscalls.h
+++ b/arch/microblaze/include/asm/syscalls.h
@@ -34,6 +34,9 @@ asmlinkage int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize,
 asmlinkage int sys_sigaction(int sig, const struct old_sigaction *act,
 		struct old_sigaction *oact);
 
+asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
+		struct sigaction __user *oact, size_t sigsetsize);
+
 asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs);
 
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 4c3943e..a0401d2 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -154,6 +154,17 @@ static inline struct thread_info *current_thread_info(void)
  */
 /* FPU was used by this task this quantum (SMP) */
 #define TS_USEDFPU		0x0001
+#define TS_RESTORE_SIGMASK	0x0002
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK 1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+}
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_MICROBLAZE_THREAD_INFO_H */
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 804a074..3cdcbfe 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -288,80 +288,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 	return (void __user *)((sp - frame_size) & -8UL);
 }
 
-static void setup_frame(int sig, struct k_sigaction *ka,
-			sigset_t *set, struct pt_regs *regs)
-{
-	struct sigframe *frame;
-	int err = 0;
-	int signal;
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	signal = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
-
-	if (_NSIG_WORDS > 1) {
-		err |= __copy_to_user(frame->extramask, &set->sig[1],
-					sizeof(frame->extramask));
-	}
-
-	/* Set up to return from userspace. If provided, use a stub
-	 already in userspace. */
-	/* minus 8 is offset to cater for "rtsd r15,8" offset */
-	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->r15 = ((unsigned long)ka->sa.sa_restorer)-8;
-	} else {
-		/* Note, these encodings are _big endian_! */
-
-		/* addi r12, r0, __NR_sigreturn */
-		err |= __put_user(0x31800000 | __NR_sigreturn ,
-				frame->tramp + 0);
-		/* brki r14, 0x8 */
-		err |= __put_user(0xb9cc0008, frame->tramp + 1);
-
-		/* Return from sighandler will jump to the tramp.
-		 Negative 8 offset because return is rtsd r15, 8 */
-		regs->r15 = ((unsigned long)frame->tramp)-8;
-
-		__invalidate_cache_sigtramp((unsigned long)frame->tramp);
-	}
-
-	if (err)
-		goto give_sigsegv;
-
-	/* Set up registers for signal handler */
-	regs->r1 = (unsigned long) frame - STATE_SAVE_ARG_SPACE;
-
-	/* Signal handler args: */
-	regs->r5 = signal; /* Arg 0: signum */
-	regs->r6 = (unsigned long) &frame->sc; /* arg 1: sigcontext */
-
-	/* Offset of 4 to handle microblaze rtid r14, 0 */
-	regs->pc = (unsigned long)ka->sa.sa_handler;
-
-	set_fs(USER_DS);
-
-#ifdef DEBUG_SIG
-	printk(KERN_INFO "SIG deliver (%s:%d): sp=%p pc=%08lx\n",
-		current->comm, current->pid, frame, regs->pc);
-#endif
-
-	return;
-
-give_sigsegv:
-	if (sig == SIGSEGV)
-		ka->sa.sa_handler = SIG_DFL;
-	force_sig(SIGSEGV, current);
-}
-
 static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			sigset_t *set, struct pt_regs *regs)
 {
@@ -380,7 +306,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		? current_thread_info()->exec_domain->signal_invmap[sig]
 		: sig;
 
-	err |= copy_siginfo_to_user(&frame->info, info);
+	if (info)
+		err |= copy_siginfo_to_user(&frame->info, info);
 
 	/* Create the ucontext. */
 	err |= __put_user(0, &frame->uc.uc_flags);
@@ -478,7 +405,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	if (ka->sa.sa_flags & SA_SIGINFO)
 		setup_rt_frame(sig, ka, info, oldset, regs);
 	else
-		setup_frame(sig, ka, oldset, regs);
+		setup_rt_frame(sig, ka, NULL, oldset, regs);
 
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
-- 
cgit v0.10.2


From f2224ff07f345f3f9716071cc90ee50e29af7497 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Apr 2009 17:38:25 +0200
Subject: microblaze: use generic dma-mapping-broken.h

Microblaze does not support the Linux DMA mapping API
at this point, so disable CONFIG_NO_DMA. This lets
us use the generic dma-mapping-broken.h implementation
instead of providing a different copy.

Any drivers that try to use DMA mapping now get
omitted from Kconfig or produce a link error, rather
than failing silently at run time.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index e6af7dd..f8e0722 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -59,7 +59,7 @@ config PCI
 
 config NO_DMA
 	depends on !MMU
-	def_bool n
+	def_bool y
 
 source "init/Kconfig"
 
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index 1733625..d00e400 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -1,129 +1 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_DMA_MAPPING_H
-#define _ASM_MICROBLAZE_DMA_MAPPING_H
-
-#include <asm/cacheflush.h>
-#include <linux/io.h>
-#include <linux/bug.h>
-
-struct scatterlist;
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-/* FIXME */
-static inline int
-dma_supported(struct device *dev, u64 mask)
-{
-	return 1;
-}
-
-static inline dma_addr_t
-dma_map_page(struct device *dev, struct page *page,
-	unsigned long offset, size_t size,
-	enum dma_data_direction direction)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-	enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-	enum dma_data_direction direction)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-	enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
-			enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-		size_t size, enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
-		enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline void
-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
-		enum dma_data_direction direction)
-{
-	BUG();
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return 0;
-}
-
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, int flag)
-{
-	return NULL; /* consistent_alloc(flag, size, dma_handle); */
-}
-
-static inline void dma_free_coherent(struct device *dev, size_t size,
-			void *vaddr, dma_addr_t dma_handle)
-{
-	BUG();
-}
-
-static inline dma_addr_t
-dma_map_single(struct device *dev, void *ptr, size_t size,
-	enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-
-	return virt_to_bus(ptr);
-}
-
-static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-				    size_t size,
-				    enum dma_data_direction direction)
-{
-	switch (direction) {
-	case DMA_FROM_DEVICE:
-		flush_dcache_range((unsigned)dma_addr,
-			(unsigned)dma_addr + size);
-			/* Fall through */
-	case DMA_TO_DEVICE:
-		break;
-	default:
-		BUG();
-	}
-}
-
-#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */
+#include <asm-generic/dma-mapping-broken.h>
-- 
cgit v0.10.2


From 5af90438023786e27178cc542f9a775594f8a126 Mon Sep 17 00:00:00 2001
From: Steve Magnani <steve@digidescorp.com>
Date: Mon, 18 May 2009 03:22:40 +0200
Subject: microblaze: Fix paging init-zone initialization

This patch fix problem with bad zone initialization.
This bug wasn't perform because Microblaze doesn't
define CONFIG_ZONE_DMA and ZONE_NORMAL was 0 for this case
that's why free_area_init works with correct values.

Original message:
I believe that the switch from ZONE_DMA (== 0) to ZONE_NORMAL
broke the free area initialization.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 31ec053..6ef5088 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -44,16 +44,15 @@ static void __init paging_init(void)
 	int i;
 	unsigned long zones_size[MAX_NR_ZONES];
 
+	/* Clean every zones */
+	memset(zones_size, 0, sizeof(zones_size));
+
 	/*
 	 * old: we can DMA to/from any address.put all page into ZONE_DMA
 	 * We use only ZONE_NORMAL
 	 */
 	zones_size[ZONE_NORMAL] = max_mapnr;
 
-	/* every other zones are empty */
-	for (i = 1; i < MAX_NR_ZONES; i++)
-		zones_size[i] = 0;
-
 	free_area_init(zones_size);
 }
 
-- 
cgit v0.10.2


From c3055d14565516867e7a45553b90cb88d6277ec9 Mon Sep 17 00:00:00 2001
From: Thomas Chou <thomas@wytron.com.tw>
Date: Tue, 19 May 2009 22:48:10 +0800
Subject: microblaze: clean LDFLAGS to build kernel

Extra LDFLAGS from user space building may cause kernel failed
to compile.

Signed-off-by: Thomas Chou <thomas@wytron.com.tw>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index aaadfa7..ab731b7 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -36,6 +36,8 @@ CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
 # r31 holds current when in kernel mode
 CFLAGS_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
 
+LDFLAGS		:=
+LDFLAGS_vmlinux	:=
 LDFLAGS_BLOB := --format binary --oformat elf32-microblaze
 
 LIBGCC := $(shell $(CC) $(CFLAGS_KERNEL) -print-libgcc-file-name)
-- 
cgit v0.10.2


From 6e3d4e1d16a19f8462beb5bfe5f2c464770e795e Mon Sep 17 00:00:00 2001
From: Steve Magnani <steve@digidescorp.com>
Date: Sun, 17 May 2009 20:32:07 -0500
Subject: microblaze: Guard __HAVE_ARCH macros with __KERNEL__ in string.h

A polarity reversal in the __KERNEL__ guard prevents the __HAVE_ARCH
flags from being defined in kernel compilation.

I noticed that there's now an option for assembly-optimized versions of
memcpy and memmove. I believe this may be buggy; when I turn it on, all
my printk output gets smashed together, as if the newlines aren't getting
copied.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/string.h b/arch/microblaze/include/asm/string.h
index f7728c9..aec2f59 100644
--- a/arch/microblaze/include/asm/string.h
+++ b/arch/microblaze/include/asm/string.h
@@ -9,7 +9,7 @@
 #ifndef _ASM_MICROBLAZE_STRING_H
 #define _ASM_MICROBLAZE_STRING_H
 
-#ifndef __KERNEL__
+#ifdef __KERNEL__
 
 #define __HAVE_ARCH_MEMSET
 #define __HAVE_ARCH_MEMCPY
-- 
cgit v0.10.2


From 1ce97e564b628bee30b8dbb64e5e653a484308f6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 21 May 2009 15:18:19 +0100
Subject: GFS2: Be more aggressive in reclaiming unlinked inodes

This patch increases the frequency with which gfs2 looks
for unlinked, but still allocated inodes. Its the equivalent
operation to ext3's orphan list, but done with bitmaps in
the resource groups.

This also fixes a bug where a field in the rgrp was too small.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index dd87379..225347f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -94,7 +94,7 @@ struct gfs2_rgrpd {
 	struct gfs2_sbd *rd_sbd;
 	unsigned int rd_bh_count;
 	u32 rd_last_alloc;
-	unsigned char rd_flags;
+	u32 rd_flags;
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 23637b9..ee3d5c1 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -581,7 +581,6 @@ static int read_rindex_entry(struct gfs2_inode *ip,
 
 	rgd->rd_gl->gl_object = rgd;
 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
-	rgd->rd_flags |= GFS2_RDF_CHECK;
 	return error;
 }
 
@@ -703,6 +702,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 
 	rg_flags = be32_to_cpu(str->rg_flags);
 	rg_flags &= ~GFS2_RDF_MASK;
+	rgd->rd_flags &= GFS2_RDF_MASK;
+	rgd->rd_flags |= rg_flags;
 	rgd->rd_free = be32_to_cpu(str->rg_free);
 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
@@ -773,7 +774,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 		for (x = 0; x < length; x++)
 			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
-		rgd->rd_flags |= GFS2_RDF_UPTODATE;
+		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 	}
 
 	spin_lock(&sdp->sd_rindex_spin);
-- 
cgit v0.10.2


From 1dff89a9c7fab71b43ba79cc1aa6c6dbad582a35 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Thu, 21 May 2009 08:20:30 +0200
Subject: microblaze: Fix early cmdline for CMDLINE_FORCE

This patch fixed parsing early parameters because
current implementation does that early parse DTS
command line and then parse CMDLINE line which is compiled-in.

For case that DTS doesn't contain command line is
copied command line from kernel with is done in prom.c
that's why I can remove it from machine_early_init.

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 34c4871..003d373 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -563,7 +563,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 		strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE));
 
 #ifdef CONFIG_CMDLINE
+#ifndef CONFIG_CMDLINE_FORCE
 	if (p == NULL || l == 0 || (l == 1 && (*p) == 0))
+#endif
 		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif /* CONFIG_CMDLINE */
 
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index eb6b417..c156b16 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -42,10 +42,6 @@ char cmd_line[COMMAND_LINE_SIZE];
 
 void __init setup_arch(char **cmdline_p)
 {
-#ifdef CONFIG_CMDLINE_FORCE
-	strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-	strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
 	*cmdline_p = cmd_line;
 
 	console_verbose();
@@ -106,10 +102,7 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
 	memset(__bss_start, 0, __bss_stop-__bss_start);
 	memset(_ssbss, 0, _esbss-_ssbss);
 
-	/*
-	 * Copy command line passed from bootloader, or use default
-	 * if none provided, or forced
-	 */
+	/* Copy command line passed from bootloader */
 #ifndef CONFIG_CMDLINE_BOOL
 	if (cmdline && cmdline[0] != '\0')
 		strlcpy(cmd_line, cmdline, COMMAND_LINE_SIZE);
-- 
cgit v0.10.2


From cda1fd5a60e97a1a1bf96606f201818b207b2c5c Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 19 May 2009 14:33:47 +0200
Subject: microblaze: Cleanup compiled-in rootfs in BSS section

This patch is based on patch from Steve Magnani.

There were bug for compiled-in rootfs. We have to move
moving rootfs which is in BSS section to _ebss section
which is at the end of kernel and then clear bss section
not vice-versa.

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index c156b16..8709bea 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -98,6 +98,29 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
 {
 	unsigned long *src, *dst = (unsigned long *)0x0;
 
+	/* If CONFIG_MTD_UCLINUX is defined, assume ROMFS is at the
+	 * end of kernel. There are two position which we want to check.
+	 * The first is __init_end and the second __bss_start.
+	 */
+#ifdef CONFIG_MTD_UCLINUX
+	int romfs_size;
+	unsigned int romfs_base;
+	char *old_klimit = klimit;
+
+	romfs_base = (ram ? ram : (unsigned int)&__init_end);
+	romfs_size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base));
+	if (!romfs_size) {
+		romfs_base = (unsigned int)&__bss_start;
+		romfs_size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base));
+	}
+
+	/* Move ROMFS out of BSS before clearing it */
+	if (romfs_size > 0) {
+		memmove(&_ebss, (int *)romfs_base, romfs_size);
+		klimit += romfs_size;
+	}
+#endif
+
 /* clearing bss section */
 	memset(__bss_start, 0, __bss_stop-__bss_start);
 	memset(_ssbss, 0, _esbss-_ssbss);
@@ -119,27 +142,15 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
 	printk(KERN_NOTICE "Found FDT at 0x%08x\n", fdt);
 
 #ifdef CONFIG_MTD_UCLINUX
-	{
-		int size;
-		unsigned int romfs_base;
-		romfs_base = (ram ? ram : (unsigned int)&__init_end);
-		/* if CONFIG_MTD_UCLINUX_EBSS is defined, assume ROMFS is at the
-		 * end of kernel, which is ROMFS_LOCATION defined above. */
-		size = PAGE_ALIGN(get_romfs_len((unsigned *)romfs_base));
-		early_printk("Found romfs @ 0x%08x (0x%08x)\n",
-				romfs_base, size);
-		early_printk("#### klimit %p ####\n", klimit);
-		BUG_ON(size < 0); /* What else can we do? */
-
-		/* Use memmove to handle likely case of memory overlap */
-		early_printk("Moving 0x%08x bytes from 0x%08x to 0x%08x\n",
-			size, romfs_base, (unsigned)&_ebss);
-		memmove(&_ebss, (int *)romfs_base, size);
-
-		/* update klimit */
-		klimit += PAGE_ALIGN(size);
-		early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
-	}
+	early_printk("Found romfs @ 0x%08x (0x%08x)\n",
+			romfs_base, romfs_size);
+	early_printk("#### klimit %p ####\n", old_klimit);
+	BUG_ON(romfs_size < 0); /* What else can we do? */
+
+	early_printk("Moved 0x%08x bytes from 0x%08x to 0x%08x\n",
+			romfs_size, romfs_base, (unsigned)&_ebss);
+
+	early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
 #endif
 
 	for (src = __ivt_start; src < __ivt_end; src++, dst++)
-- 
cgit v0.10.2


From e93b55bfb3ead1cb4d9df7a5e0686318cfa8690c Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Thu, 21 May 2009 13:53:56 +0200
Subject: Microblaze: Remove unused variable from paging init

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 6ef5088..af789e2 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -41,7 +41,6 @@ unsigned int memory_size;
  */
 static void __init paging_init(void)
 {
-	int i;
 	unsigned long zones_size[MAX_NR_ZONES];
 
 	/* Clean every zones */
-- 
cgit v0.10.2


From b9479e666563d5c28eb861d2a8f6334666025384 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Thu, 21 May 2009 16:33:07 +0200
Subject: microblaze: Fix cast warning for __va in prom.c

__va expect 32bit value but of_read_ulong(of_read_number)
returns 64bit value

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 003d373..c005cc6 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -509,12 +509,13 @@ static void __init early_init_dt_check_for_initrd(unsigned long node)
 
 	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l);
 	if (prop) {
-		initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4));
+		initrd_start = (unsigned long)
+					__va((u32)of_read_ulong(prop, l/4));
 
 		prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l);
 		if (prop) {
 			initrd_end = (unsigned long)
-					__va(of_read_ulong(prop, l/4));
+					__va((u32)of_read_ulong(prop, 1/4));
 			initrd_below_start_ok = 1;
 		} else {
 			initrd_start = 0;
-- 
cgit v0.10.2


From 55620c86ebc29013c0d26c5b3bc423faea299fee Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 01:16:17 +0900
Subject: sh: irq: Rework the SR.IMASK bitmap handling.

This tidies up how the SR.IMASK bitmap is managed, using the bitmap API
directly instead. At the same time, tidy up the irq_chip conversion a
bit.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/irq/imask.c b/arch/sh/kernel/cpu/irq/imask.c
index f43753b..3bef3c2 100644
--- a/arch/sh/kernel/cpu/irq/imask.c
+++ b/arch/sh/kernel/cpu/irq/imask.c
@@ -18,38 +18,17 @@
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/irq.h>
+#include <linux/bitmap.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 
 /* Bitmap of IRQ masked */
-static unsigned long imask_mask = 0x7fff;
-static int interrupt_priority = 0;
-
-static void enable_imask_irq(unsigned int irq);
-static void disable_imask_irq(unsigned int irq);
-static void shutdown_imask_irq(unsigned int irq);
-static void mask_and_ack_imask(unsigned int);
-static void end_imask_irq(unsigned int irq);
-
 #define IMASK_PRIORITY	15
 
-static unsigned int startup_imask_irq(unsigned int irq)
-{
-	/* Nothing to do */
-	return 0; /* never anything pending */
-}
+static DECLARE_BITMAP(imask_mask, IMASK_PRIORITY);
+static int interrupt_priority;
 
-static struct irq_chip imask_irq_type = {
-	.typename = "SR.IMASK",
-	.startup = startup_imask_irq,
-	.shutdown = shutdown_imask_irq,
-	.enable = enable_imask_irq,
-	.disable = disable_imask_irq,
-	.ack = mask_and_ack_imask,
-	.end = end_imask_irq
-};
-
-void static inline set_interrupt_registers(int ip)
+static inline void set_interrupt_registers(int ip)
 {
 	unsigned long __dummy;
 
@@ -72,42 +51,31 @@ void static inline set_interrupt_registers(int ip)
 		     : "t");
 }
 
-static void disable_imask_irq(unsigned int irq)
+static void mask_imask_irq(unsigned int irq)
 {
 	clear_bit(irq, &imask_mask);
 	if (interrupt_priority < IMASK_PRIORITY - irq)
 		interrupt_priority = IMASK_PRIORITY - irq;
-
 	set_interrupt_registers(interrupt_priority);
 }
 
-static void enable_imask_irq(unsigned int irq)
+static void unmask_imask_irq(unsigned int irq)
 {
 	set_bit(irq, &imask_mask);
-	interrupt_priority = IMASK_PRIORITY - ffz(imask_mask);
-
+	interrupt_priority = IMASK_PRIORITY -
+		find_first_zero_bit(imask_mask, IMASK_PRIORITY);
 	set_interrupt_registers(interrupt_priority);
 }
 
-static void mask_and_ack_imask(unsigned int irq)
-{
-	disable_imask_irq(irq);
-}
-
-static void end_imask_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_imask_irq(irq);
-}
-
-static void shutdown_imask_irq(unsigned int irq)
-{
-	/* Nothing to do */
-}
+static struct irq_chip imask_irq_chip = {
+	.typename	= "SR.IMASK",
+	.mask		= mask_imask_irq,
+	.unmask		= unmask_imask_irq,
+	.mask_ack	= mask_imask_irq,
+};
 
 void make_imask_irq(unsigned int irq)
 {
-	disable_irq_nosync(irq);
-	irq_desc[irq].chip = &imask_irq_type;
-	enable_irq(irq);
+	set_irq_chip_and_handler_name(irq, &imask_irq_chip,
+				      handle_level_irq, "level");
 }
-- 
cgit v0.10.2


From fa1d43ab451084785153d37ae559c4fdd1546a5b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 01:26:16 +0900
Subject: sh: irq: Convert from irq_desc[] to irq_to_desc().

This converts a few places that were using the old irq_desc[] array over
to the shiny new irq_to_desc() helper. Preperatory work for sparse irq
support.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c
index a619eaf..6c092f1 100644
--- a/arch/sh/kernel/cpu/irq/intc-sh5.c
+++ b/arch/sh/kernel/cpu/irq/intc-sh5.c
@@ -152,14 +152,6 @@ static void end_intc_irq(unsigned int irq)
 	enable_intc_irq(irq);
 }
 
-/* For future use, if we ever support IRLM=0) */
-void make_intc_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	irq_desc[irq].chip = &intc_irq_type;
-	disable_intc_irq(irq);
-}
-
 void __init plat_irq_setup(void)
 {
 	unsigned long long __dummy0, __dummy1=~0x00000000100000f0;
@@ -174,7 +166,7 @@ void __init plat_irq_setup(void)
 
 	/* Set default: per-line enable/disable, priority driven ack/eoi */
 	for (i = 0; i < NR_INTC_IRQS; i++)
-		irq_desc[i].chip = &intc_irq_type;
+		set_irq_chip_and_handler(i, &intc_irq_type, handle_level_irq);
 
 
 	/* Disable all interrupts and set all priorities to 0 to avoid trouble */
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 3f1372e..878532b 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -31,39 +31,64 @@ void ack_bad_irq(unsigned int irq)
 }
 
 #if defined(CONFIG_PROC_FS)
+/*
+ * /proc/interrupts printing:
+ */
+static int show_other_interrupts(struct seq_file *p, int prec)
+{
+	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+	return 0;
+}
+
 int show_interrupts(struct seq_file *p, void *v)
 {
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
+	unsigned long flags, any_count = 0;
+	int i = *(loff_t *)v, j, prec;
+	struct irqaction *action;
+	struct irq_desc *desc;
+
+	if (i > nr_irqs)
+		return 0;
+
+	for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
+		j *= 10;
+
+	if (i == nr_irqs)
+		return show_other_interrupts(p, prec);
 
 	if (i == 0) {
-		seq_puts(p, "           ");
+		seq_printf(p, "%*s", prec + 8, "");
 		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
+			seq_printf(p, "CPU%-8d", j);
 		seq_putc(p, '\n');
 	}
 
-	if (i < sh_mv.mv_nr_irqs) {
-		spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto unlock;
-		seq_printf(p, "%3d: ",i);
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-		seq_printf(p, " %14s", irq_desc[i].chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
-		seq_printf(p, "  %s", action->name);
+	desc = irq_to_desc(i);
+	if (!desc)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	for_each_online_cpu(j)
+		any_count |= kstat_irqs_cpu(i, j);
+	action = desc->action;
+	if (!action && !any_count)
+		goto out;
 
-		for (action=action->next; action; action = action->next)
+	seq_printf(p, "%*d: ", prec, i);
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+	seq_printf(p, " %14s", desc->chip->name);
+	seq_printf(p, "-%-8s", desc->name);
+
+	if (action) {
+		seq_printf(p, "  %s", action->name);
+		while ((action = action->next) != NULL)
 			seq_printf(p, ", %s", action->name);
-		seq_putc(p, '\n');
-unlock:
-		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == sh_mv.mv_nr_irqs)
-		seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
+	}
 
+	seq_putc(p, '\n');
+out:
+	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
 #endif
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 2d868c6..fcc5de3 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -23,9 +23,6 @@ extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
 /* platform dependent support */
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(irq_desc);
-EXPORT_SYMBOL(no_irq_chip);
-
 EXPORT_SYMBOL(strlen);
 
 /* PCI exports */
@@ -40,11 +37,6 @@ EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(__copy_user);
-
-#ifdef CONFIG_MMU
-EXPORT_SYMBOL(get_vm_area);
-#endif
-
 EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__ndelay);
 EXPORT_SYMBOL(__const_udelay);
-- 
cgit v0.10.2


From 05ff3004d278b54760abd71530506d803182c71d Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 01:28:33 +0900
Subject: sh: irq: Teach ipr and intc about dynamically allocating irq_descs.

This hooks in irq_to_desc_alloc_cpu() to the necessary code paths in the
intc and ipr controller registration paths. As these are the primary call
paths for all SH CPUs, this alone will make all CPUs sparse IRQ ready.

There is the added benefit now that each CPU contains specific IPR and
INTC tables, so only the vectors with interrupt sources backing them will
ever see an irq_desc instantiation. This effectively packs irq_desc
down to match the CPU, rather than padding NR_IRQS out to cover the valid
vector range.

Boards with extra sources will still have to fiddle with the nr_irqs
setting, but they can continue doing so through the machvec as before.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index 3eb17ee..fa0c846 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -59,10 +59,18 @@ void register_ipr_controller(struct ipr_desc *desc)
 
 	for (i = 0; i < desc->nr_irqs; i++) {
 		struct ipr_data *p = desc->ipr_data + i;
+		struct irq_desc *irq_desc;
 
 		BUG_ON(p->ipr_idx >= desc->nr_offsets);
 		BUG_ON(!desc->ipr_offsets[p->ipr_idx]);
 
+		irq_desc = irq_to_desc_alloc_cpu(p->irq, smp_processor_id());
+		if (unlikely(!irq_desc)) {
+			printk(KERN_INFO "can not get irq_desc for %d\n",
+			       p->irq);
+			continue;
+		}
+
 		disable_irq_nosync(p->irq);
 		set_irq_chip_and_handler_name(p->irq, &desc->chip,
 				      handle_level_irq, "level");
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 12d13d9..098b767 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -671,7 +671,7 @@ unsigned int intc_evt2irq(unsigned int vector)
 
 void __init register_intc_controller(struct intc_desc *desc)
 {
-	unsigned int i, k, smp;
+	unsigned int i, k, smp, cpu = smp_processor_id();
 	struct intc_desc_int *d;
 
 	d = alloc_bootmem(sizeof(*d));
@@ -770,11 +770,19 @@ void __init register_intc_controller(struct intc_desc *desc)
 	/* register the vectors one by one */
 	for (i = 0; i < desc->nr_vectors; i++) {
 		struct intc_vect *vect = desc->vectors + i;
+		unsigned int irq = evt2irq(vect->vect);
+		struct irq_desc *irq_desc;
 
 		if (!vect->enum_id)
 			continue;
 
-		intc_register_irq(desc, d, vect->enum_id, evt2irq(vect->vect));
+		irq_desc = irq_to_desc_alloc_cpu(irq, cpu);
+		if (unlikely(!irq_desc)) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
+		}
+
+		intc_register_irq(desc, d, vect->enum_id, irq);
 	}
 }
 
-- 
cgit v0.10.2


From d8586ba6e1415150e1bab89f0a05447bb6f2d6d5 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 01:36:13 +0900
Subject: sh: irq: Provide an arch_probe_nr_irqs() that wraps the machvec def.

This is just a simple arch_probe_nr_irqs() stub that wraps to the
platform defined number of IRQs. This can be made gradually more
intelligent based on what we can infer from the INTC tables and so on.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 878532b..3d09062 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -279,3 +279,11 @@ void __init init_IRQ(void)
 
 	irq_ctx_init(smp_processor_id());
 }
+
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+	nr_irqs = sh_mv.mv_nr_irqs;
+	return 0;
+}
+#endif
-- 
cgit v0.10.2


From 30cff215b54b20d201a2bd7d50361c4c14700281 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 20 May 2009 13:44:40 +0000
Subject: sh: clkfwk branch compile fix for clock-sh7722

Fix clkfwk branch compile error in clock-sh7722.c.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index a98d7da..ae78efd 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -892,7 +892,7 @@ int __init arch_clk_init(void)
 	struct clk *clk;
 	int i;
 
-	clk_cpg_init();
+	cpg_clk_init();
 
 	clk = clk_get(NULL, "master_clk");
 	for (i = 0; i < ARRAY_SIZE(sh7722_clocks); i++) {
-- 
cgit v0.10.2


From 98f32602d42951e61a059685f842aa7d778ffab0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Thu, 21 May 2009 20:33:58 +0100
Subject: hugh: update email address

My old address will shut down in a few days time: remove it from the tree,
and add a tmpfs (shmem filesystem) maintainer entry with the new address.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 222437e..3015da0 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -133,4 +133,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root.
 Author:
    Christoph Rohland <cr@sap.com>, 1.12.01
 Updated:
-   Hugh Dickins <hugh@veritas.com>, 4 June 2007
+   Hugh Dickins, 4 June 2007
diff --git a/MAINTAINERS b/MAINTAINERS
index 2b349ba..64ea80e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5579,6 +5579,14 @@ M:	ian@mnementh.co.uk
 S:	Maintained
 F:	drivers/mmc/host/tmio_mmc.*
 
+TMPFS (SHMEM FILESYSTEM)
+P:	Hugh Dickins
+M:	hugh.dickins@tiscali.co.uk
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	include/linux/shmem_fs.h
+F:	mm/shmem.c
+
 TPM DEVICE DRIVER
 P:	Debora Velarde
 M:	debora@linux.vnet.ibm.com
diff --git a/mm/rmap.c b/mm/rmap.c
index 1652166..23122af 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -14,7 +14,7 @@
  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
- * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004
+ * Contributions by Hugh Dickins 2003, 2004
  */
 
 /*
-- 
cgit v0.10.2


From 85bc26211c6a2c6e82c2403697f8ce44e9587215 Mon Sep 17 00:00:00 2001
From: Martin Michlmayr <tbm@cyrius.com>
Date: Tue, 19 May 2009 12:30:52 +0200
Subject: [ARM] Orion: Remove explicit name for platform device resources

Remove explicit names from platform device resources since they will
automatically be named after the platform device they're associated
with.

Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
Acked-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index eeb0024..3fab82a 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -386,12 +386,10 @@ static struct mv64xxx_i2c_pdata kirkwood_i2c_pdata = {
 
 static struct resource kirkwood_i2c_resources[] = {
 	{
-		.name	= "i2c",
 		.start	= I2C_PHYS_BASE,
 		.end	= I2C_PHYS_BASE + 0x1f,
 		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "i2c",
 		.start	= IRQ_KIRKWOOD_TWSI,
 		.end	= IRQ_KIRKWOOD_TWSI,
 		.flags	= IORESOURCE_IRQ,
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 9ba5950..0d88eea 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -532,12 +532,10 @@ static struct mv64xxx_i2c_pdata mv78xx0_i2c_0_pdata = {
 
 static struct resource mv78xx0_i2c_0_resources[] = {
 	{
-		.name   = "i2c 0 base",
 		.start  = I2C_0_PHYS_BASE,
 		.end    = I2C_0_PHYS_BASE + 0x1f,
 		.flags  = IORESOURCE_MEM,
 	}, {
-		.name   = "i2c 0 irq",
 		.start  = IRQ_MV78XX0_I2C_0,
 		.end    = IRQ_MV78XX0_I2C_0,
 		.flags  = IORESOURCE_IRQ,
@@ -567,12 +565,10 @@ static struct mv64xxx_i2c_pdata mv78xx0_i2c_1_pdata = {
 
 static struct resource mv78xx0_i2c_1_resources[] = {
 	{
-		.name   = "i2c 1 base",
 		.start  = I2C_1_PHYS_BASE,
 		.end    = I2C_1_PHYS_BASE + 0x1f,
 		.flags  = IORESOURCE_MEM,
 	}, {
-		.name   = "i2c 1 irq",
 		.start  = IRQ_MV78XX0_I2C_1,
 		.end    = IRQ_MV78XX0_I2C_1,
 		.flags  = IORESOURCE_IRQ,
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 6af99dd..a51fb9d 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -248,12 +248,10 @@ static struct mv64xxx_i2c_pdata orion5x_i2c_pdata = {
 
 static struct resource orion5x_i2c_resources[] = {
 	{
-		.name	= "i2c base",
 		.start	= I2C_PHYS_BASE,
 		.end	= I2C_PHYS_BASE + 0x1f,
 		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "i2c irq",
 		.start	= IRQ_ORION5X_I2C,
 		.end	= IRQ_ORION5X_I2C,
 		.flags	= IORESOURCE_IRQ,
-- 
cgit v0.10.2


From 703a3b8e5c01cf6fb33c6d8dc99905f889a4e992 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 21 May 2009 22:21:53 +0000
Subject: [CIFS] fix posix open regression

Posix open code was not properly adding the file to the
list of open files.  Fix  allocating cifsFileInfo
more than once, and adding twice to flist and tlist.
Also fix mode setting to be done in one place in these
paths.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Reviewed-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Tested-by: Jeff Layton <jlayton@redhat.com>
Tested-by: Luca Tettamanti <kronos.it@gmail.com>

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 11431ed..f49d684 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -225,6 +225,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 	if (!(oflags & FMODE_READ))
 		write_only = true;
 
+	mode &= ~current_umask();
 	rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode,
 			pnetfid, presp_data, &oplock, full_path,
 			cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
@@ -310,7 +311,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		return -ENOMEM;
 	}
 
-	mode &= ~current_umask();
 	if (oplockEnabled)
 		oplock = REQ_OPLOCK;
 
@@ -336,7 +336,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			else /* success, no need to query */
 				goto cifs_create_set_dentry;
 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
-			 (rc != -EOPNOTSUPP)) /* path not found or net err */
+			 (rc != -EOPNOTSUPP) && (rc != -EINVAL))
 			goto cifs_create_out;
 		/* else fallthrough to retry, using older open call, this is
 		   case where server does not support this SMB level, and
@@ -609,7 +609,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	int xid;
 	int rc = 0; /* to get around spurious gcc warning, set to zero here */
 	int oplock = 0;
-	int mode;
 	__u16 fileHandle = 0;
 	bool posix_open = false;
 	struct cifs_sb_info *cifs_sb;
@@ -660,13 +659,12 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 
 	if (pTcon->unix_ext) {
 		if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
-				(nd->flags & LOOKUP_OPEN)) {
+		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) {
 			if (!((nd->intent.open.flags & O_CREAT) &&
 					(nd->intent.open.flags & O_EXCL))) {
-				mode = nd->intent.open.create_mode &
-						~current_umask();
 				rc = cifs_posix_open(full_path, &newInode,
-					parent_dir_inode->i_sb, mode,
+					parent_dir_inode->i_sb,
+					nd->intent.open.create_mode,
 					nd->intent.open.flags, &oplock,
 					&fileHandle, xid);
 				/*
@@ -681,6 +679,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 				 */
 				if ((rc != -EINVAL) && (rc != -EOPNOTSUPP))
 					posix_open = true;
+				else
+					pTcon->broken_posix_open = true;
 			}
 		}
 		if (!posix_open)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 38c06f8..302ea15 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -130,10 +130,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode,
 			struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
 {
 
-	file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
-	if (file->private_data == NULL)
-		return -ENOMEM;
-	pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
 	write_lock(&GlobalSMBSeslock);
 
 	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
@@ -184,6 +180,38 @@ psx_client_can_cache:
 	return 0;
 }
 
+static struct cifsFileInfo *
+cifs_fill_filedata(struct file *file)
+{
+	struct list_head *tmp;
+	struct cifsFileInfo *pCifsFile = NULL;
+	struct cifsInodeInfo *pCifsInode = NULL;
+
+	/* search inode for this file and fill in file->private_data */
+	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+	read_lock(&GlobalSMBSeslock);
+	list_for_each(tmp, &pCifsInode->openFileList) {
+		pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
+		if ((pCifsFile->pfile == NULL) &&
+		    (pCifsFile->pid == current->tgid)) {
+			/* mode set in cifs_create */
+
+			/* needed for writepage */
+			pCifsFile->pfile = file;
+			file->private_data = pCifsFile;
+			break;
+		}
+	}
+	read_unlock(&GlobalSMBSeslock);
+
+	if (file->private_data != NULL) {
+		return pCifsFile;
+	} else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
+			cERROR(1, ("could not find file instance for "
+				   "new file %p", file));
+	return NULL;
+}
+
 /* all arguments to this function must be checked for validity in caller */
 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 	struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
@@ -258,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file)
 	struct cifsTconInfo *tcon;
 	struct cifsFileInfo *pCifsFile;
 	struct cifsInodeInfo *pCifsInode;
-	struct list_head *tmp;
 	char *full_path = NULL;
 	int desiredAccess;
 	int disposition;
@@ -270,32 +297,12 @@ int cifs_open(struct inode *inode, struct file *file)
 	cifs_sb = CIFS_SB(inode->i_sb);
 	tcon = cifs_sb->tcon;
 
-	/* search inode for this file and fill in file->private_data */
 	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-	read_lock(&GlobalSMBSeslock);
-	list_for_each(tmp, &pCifsInode->openFileList) {
-		pCifsFile = list_entry(tmp, struct cifsFileInfo,
-				       flist);
-		if ((pCifsFile->pfile == NULL) &&
-		    (pCifsFile->pid == current->tgid)) {
-			/* mode set in cifs_create */
-
-			/* needed for writepage */
-			pCifsFile->pfile = file;
-
-			file->private_data = pCifsFile;
-			break;
-		}
-	}
-	read_unlock(&GlobalSMBSeslock);
-
-	if (file->private_data != NULL) {
-		rc = 0;
+	pCifsFile = cifs_fill_filedata(file);
+	if (pCifsFile) {
 		FreeXid(xid);
-		return rc;
-	} else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
-			cERROR(1, ("could not find file instance for "
-				   "new file %p", file));
+		return 0;
+	}
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
@@ -325,6 +332,7 @@ int cifs_open(struct inode *inode, struct file *file)
 			/* no need for special case handling of setting mode
 			   on read only files needed here */
 
+			pCifsFile = cifs_fill_filedata(file);
 			cifs_posix_open_inode_helper(inode, file, pCifsInode,
 						     pCifsFile, oplock, netfid);
 			goto out;
-- 
cgit v0.10.2


From 5789ba3bd0a3cd20df5980ebf03358f2eb44fd67 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 15:47:06 -0400
Subject: IMA: Minimal IMA policy and boot param for TCB IMA policy

The IMA TCB policy is dangerous.  A normal use can use all of a system's
memory (which cannot be freed) simply by building and running lots of
executables.  The TCB policy is also nearly useless because logging in as root
often causes a policy violation when dealing with utmp, thus rendering the
measurements meaningless.

There is no good fix for this in the kernel.  A full TCB policy would need to
be loaded in userspace using LSM rule matching to get both a protected and
useful system.  But, if too little is measured before userspace can load a real
policy one again ends up with a meaningless set of measurements.  One option
would be to put the policy load inside the initrd in order to get it early
enough in the boot sequence to be useful, but this runs into trouble with the
LSM.  For IMA to measure the LSM policy and the LSM policy loading mechanism
it needs rules to do so, but we already talked about problems with defaulting
to such broad rules....

IMA also depends on the files being measured to be on an FS which implements
and supports i_version.  Since the only FS with this support (ext4) doesn't
even use it by default it seems silly to have any IMA rules by default.

This should reduce the performance overhead of IMA to near 0 while still
letting users who choose to configure their machine as such to inclue the
ima_tcb kernel paramenter and get measurements during boot before they can
load a customized, reasonable policy in userspace.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbf..d9a24a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -914,6 +914,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			Formt: { "sha1" | "md5" }
 			default: "sha1"
 
+	ima_tcb		[IMA]
+			Load a policy which meets the needs of the Trusted
+			Computing Base.  This means IMA will measure all
+			programs exec'd, files mmap'd for exec, and all files
+			opened for read by uid=0.
+
 	in2000=		[HW,SCSI]
 			See header of drivers/scsi/in2000.c.
 
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 31d677f..4719bbf 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -45,9 +45,17 @@ struct ima_measure_rule_entry {
 	} lsm[MAX_LSM_RULES];
 };
 
-/* Without LSM specific knowledge, the default policy can only be
+/*
+ * Without LSM specific knowledge, the default policy can only be
  * written in terms of .action, .func, .mask, .fsmagic, and .uid
  */
+
+/*
+ * The minimum rule set to allow for full TCB coverage.  Measures all files
+ * opened or mmap for exec and everything read by root.  Dangerous because
+ * normal users can easily run the machine out of memory simply building
+ * and running executables.
+ */
 static struct ima_measure_rule_entry default_rules[] = {
 	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
@@ -59,6 +67,8 @@ static struct ima_measure_rule_entry default_rules[] = {
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
+	{.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0,
+	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
 };
 
 static LIST_HEAD(measure_default_rules);
@@ -67,6 +77,14 @@ static struct list_head *ima_measure;
 
 static DEFINE_MUTEX(ima_measure_mutex);
 
+static bool ima_use_tcb __initdata;
+static int __init default_policy_setup(char *str)
+{
+	ima_use_tcb = 1;
+	return 1;
+}
+__setup("ima_tcb", default_policy_setup);
+
 /**
  * ima_match_rules - determine whether an inode matches the measure rule.
  * @rule: a pointer to a rule
@@ -162,9 +180,15 @@ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask)
  */
 void ima_init_policy(void)
 {
-	int i;
+	int i, entries;
+
+	/* if !ima_use_tcb set entries = 0 so we load NO default rules */
+	if (ima_use_tcb)
+		entries = ARRAY_SIZE(default_rules);
+	else
+		entries = 0;
 
-	for (i = 0; i < ARRAY_SIZE(default_rules); i++)
+	for (i = 0; i < entries; i++)
 		list_add_tail(&default_rules[i].list, &measure_default_rules);
 	ima_measure = &measure_default_rules;
 }
-- 
cgit v0.10.2


From 932995f0ce52525b32ff5127b522c2c164de3810 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 15:43:32 -0400
Subject: IMA: Add __init notation to ima functions

A number of IMA functions only used during init are not marked with __init.
Add those notations so they are freed automatically.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 50d572b..63003a6 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -103,7 +103,7 @@ int ima_calc_template_hash(int template_len, void *template, char *digest)
 	return rc;
 }
 
-static void ima_pcrread(int idx, u8 *pcr)
+static void __init ima_pcrread(int idx, u8 *pcr)
 {
 	if (!ima_used_chip)
 		return;
@@ -115,7 +115,7 @@ static void ima_pcrread(int idx, u8 *pcr)
 /*
  * Calculate the boot aggregate hash
  */
-int ima_calc_boot_aggregate(char *digest)
+int __init ima_calc_boot_aggregate(char *digest)
 {
 	struct hash_desc desc;
 	struct scatterlist sg;
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 3305a96..7039b14 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -319,7 +319,7 @@ static struct file_operations ima_measure_policy_ops = {
 	.release = ima_release_policy
 };
 
-int ima_fs_init(void)
+int __init ima_fs_init(void)
 {
 	ima_dir = securityfs_create_dir("ima", NULL);
 	if (IS_ERR(ima_dir))
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index ec79f1e..b8dd693 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -196,7 +196,7 @@ static void init_once(void *foo)
 	kref_set(&iint->refcount, 1);
 }
 
-void ima_iintcache_init(void)
+void __init ima_iintcache_init(void)
 {
 	iint_cache =
 	    kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0,
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 0b0bb8c..a40da7a 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -38,7 +38,7 @@ int ima_used_chip;
  * a different value.) Violations add a zero entry to the measurement
  * list and extend the aggregate PCR value with ff...ff's.
  */
-static void ima_add_boot_aggregate(void)
+static void __init ima_add_boot_aggregate(void)
 {
 	struct ima_template_entry *entry;
 	const char *op = "add_boot_aggregate";
@@ -71,7 +71,7 @@ err_out:
 			    audit_cause, result, 0);
 }
 
-int ima_init(void)
+int __init ima_init(void)
 {
 	u8 pcr_i[IMA_DIGEST_SIZE];
 	int rc;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 4719bbf..e127839 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -178,7 +178,7 @@ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask)
  * ima_measure points to either the measure_default_rules or the
  * the new measure_policy_rules.
  */
-void ima_init_policy(void)
+void __init ima_init_policy(void)
 {
 	int i, entries;
 
-- 
cgit v0.10.2


From b9fc745db833bbf74b4988493b8cd902a84c9415 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 19 May 2009 13:25:57 -0400
Subject: integrity: path_check update

- Add support in ima_path_check() for integrity checking without
incrementing the counts. (Required for nfsd.)
- rename and export opencount_get to ima_counts_get
- replace ima_shm_check calls with ima_counts_get
- export ima_path_check

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/fs/exec.c b/fs/exec.c
index 998e856..618d6d1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -130,7 +130,8 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 				 MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
-	error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
+	error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN,
+			       IMA_COUNT_UPDATE);
 	if (error)
 		goto exit;
 
@@ -680,7 +681,7 @@ struct file *open_exec(const char *name)
 	err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
 	if (err)
 		goto out_path_put;
-	err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
+	err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN, IMA_COUNT_UPDATE);
 	if (err)
 		goto out_path_put;
 
diff --git a/fs/namei.c b/fs/namei.c
index 78f253c..b05a2b1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -853,7 +853,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 			err = inode_permission(nd->path.dentry->d_inode,
 					       MAY_EXEC);
 		if (!err)
-			err = ima_path_check(&nd->path, MAY_EXEC);
+			err = ima_path_check(&nd->path, MAY_EXEC,
+				             IMA_COUNT_UPDATE);
  		if (err)
 			break;
 
@@ -1515,7 +1516,8 @@ int may_open(struct path *path, int acc_mode, int flag)
 		return error;
 
 	error = ima_path_check(path,
-			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+			       IMA_COUNT_UPDATE);
 	if (error)
 		return error;
 	/*
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e2aa45..b1b827d 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,14 +13,17 @@
 #include <linux/fs.h>
 struct linux_binprm;
 
+#define IMA_COUNT_UPDATE 1
+#define IMA_COUNT_LEAVE 0
+
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct path *path, int mask, int update_counts);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
-extern void ima_shm_check(struct file *file);
+extern void ima_counts_get(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct path *path, int mask, int update_counts)
 {
 	return 0;
 }
@@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
-static inline void ima_shm_check(struct file *file)
+static inline void ima_counts_get(struct file *file)
 {
 	return;
 }
diff --git a/ipc/shm.c b/ipc/shm.c
index faa46da..47b4642 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -384,7 +384,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto no_file;
-	ima_shm_check(file);
+	ima_counts_get(file);
 
 	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 	if (id < 0) {
@@ -891,7 +891,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
 	if (!file)
 		goto out_free;
-	ima_shm_check(file);
+	ima_counts_get(file);
 
 	file->private_data = sfd;
 	file->f_mapping = shp->shm_file->f_mapping;
diff --git a/mm/shmem.c b/mm/shmem.c
index b25f95c..a817f75 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2684,7 +2684,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	ima_shm_check(file);
+	ima_counts_get(file);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	vma->vm_file = file;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index c4228c0..a2eb233 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -125,6 +125,15 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
 	return rc;
 }
 
+static void ima_update_counts(struct ima_iint_cache *iint, int mask)
+{
+	iint->opencount++;
+	if ((mask & MAY_WRITE) || (mask == 0))
+		iint->writecount++;
+	else if (mask & (MAY_READ | MAY_EXEC))
+		iint->readcount++;
+}
+
 /**
  * ima_path_check - based on policy, collect/store measurement.
  * @path: contains a pointer to the path to be measured
@@ -143,7 +152,7 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
  * Return 0 on success, an error code on failure.
  * (Based on the results of appraise_measurement().)
  */
-int ima_path_check(struct path *path, int mask)
+int ima_path_check(struct path *path, int mask, int update_counts)
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -157,11 +166,8 @@ int ima_path_check(struct path *path, int mask)
 		return 0;
 
 	mutex_lock(&iint->mutex);
-	iint->opencount++;
-	if ((mask & MAY_WRITE) || (mask == 0))
-		iint->writecount++;
-	else if (mask & (MAY_READ | MAY_EXEC))
-		iint->readcount++;
+	if (update_counts)
+		ima_update_counts(iint, mask);
 
 	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
 	if (rc < 0)
@@ -197,6 +203,7 @@ out:
 	kref_put(&iint->refcount, iint_free);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ima_path_check);
 
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
@@ -225,7 +232,16 @@ out:
 	return rc;
 }
 
-static void opencount_get(struct file *file)
+/*
+ * ima_opens_get - increment file counts
+ *
+ * - for IPC shm and shmat file.
+ * - for nfsd exported files.
+ *
+ * Increment the counts for these files to prevent unnecessary
+ * imbalance messages.
+ */
+void ima_counts_get(struct file *file)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -237,8 +253,14 @@ static void opencount_get(struct file *file)
 		return;
 	mutex_lock(&iint->mutex);
 	iint->opencount++;
+	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+		iint->readcount++;
+
+	if (file->f_mode & FMODE_WRITE)
+		iint->writecount++;
 	mutex_unlock(&iint->mutex);
 }
+EXPORT_SYMBOL_GPL(ima_counts_get);
 
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
@@ -263,18 +285,6 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
-/*
- * ima_shm_check - IPC shm and shmat create/fput a file
- *
- * Maintain the opencount for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_shm_check(struct file *file)
-{
-	opencount_get(file);
-	return;
-}
-
 /**
  * ima_bprm_check - based on policy, collect/store measurement.
  * @bprm: contains the linux_binprm structure
-- 
cgit v0.10.2


From c9d9ac525a0285a5b5ad9c3f9aa8b7c1753e6121 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 19 May 2009 13:25:58 -0400
Subject: integrity: move ima_counts_get

Based on discussion on lkml (Andrew Morton and Eric Paris),
move ima_counts_get down a layer into shmem/hugetlb__file_setup().
Resolves drm shmem_file_setup() usage case as well.

HD comment:
  I still think you're doing this at the wrong level, but recognize
  that you probably won't be persuaded until a few more users of
  alloc_file() emerge, all wanting your ima_counts_get().

  Resolving GEM's shmem_file_setup() is an improvement, so I'll say

Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 153d968..ccc62de 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,6 +30,7 @@
 #include <linux/dnotify.h>
 #include <linux/statfs.h>
 #include <linux/security.h>
+#include <linux/ima.h>
 
 #include <asm/uaccess.h>
 
@@ -997,6 +998,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
 			&hugetlbfs_file_operations);
 	if (!file)
 		goto out_dentry; /* inode is already attached */
+	ima_counts_get(file);
 
 	return file;
 
diff --git a/ipc/shm.c b/ipc/shm.c
index 47b4642..5608183 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -384,7 +384,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto no_file;
-	ima_counts_get(file);
 
 	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 	if (id < 0) {
diff --git a/mm/shmem.c b/mm/shmem.c
index a817f75..0132fbd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2659,6 +2659,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
 	if (error)
 		goto close_file;
 #endif
+	ima_counts_get(file);
 	return file;
 
 close_file:
@@ -2684,7 +2685,6 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	ima_counts_get(file);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	vma->vm_file = file;
-- 
cgit v0.10.2


From 6470c077cae12227318f40f3e6d756caadcce4b0 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 21 May 2009 18:42:54 +0200
Subject: smack: do not beyond ARRAY_SIZE of data

Do not go beyond ARRAY_SIZE of data

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 904af34..8d3c2a0 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -776,7 +776,7 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf,
 	struct sockaddr_in newname;
 	char smack[SMK_LABELLEN];
 	char *sp;
-	char data[SMK_NETLBLADDRMAX];
+	char data[SMK_NETLBLADDRMAX + 1];
 	char *host = (char *)&newname.sin_addr.s_addr;
 	int rc;
 	struct netlbl_audit audit_info;
-- 
cgit v0.10.2


From 7fc1e5c15fde0fa9d2c08441f6898a9e51593d47 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 21 May 2009 18:25:23 +0000
Subject: sh: clkfwk: beyond ARRAY_SIZE of onchip_ops for sh7722.

Do not go beyond ARRAY_SIZE of onchip_ops

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index ae78efd..c090c9a 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -883,7 +883,7 @@ struct clk_ops *onchip_ops[] = {
 void __init
 arch_init_clk_ops(struct clk_ops **ops, int type)
 {
-	BUG_ON(type < 0 || type > ARRAY_SIZE(onchip_ops));
+	BUG_ON(type < 0 || type >= ARRAY_SIZE(onchip_ops));
 	*ops = onchip_ops[type];
 }
 
-- 
cgit v0.10.2


From 37869fa2da1a952ed736858f8b9e45c0b9131ae9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 20 May 2009 14:30:06 +0000
Subject: sh: ap325 camera without i2c driver fix

This patch fixes the ap325rxa ncm03j camera code to handle
the case where no i2c driver is present. Without this fix
i2c_transfer() may be passed NULL as adapter which results
in a crash.

Triggered when i2c-sh_mobile.c failed to probe() due to
missing MSTP clocks.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 39e4691..f2a2964 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -263,6 +263,9 @@ static int camera_probe(void)
 	struct i2c_msg msg;
 	int ret;
 
+	if (!a)
+		return -ENODEV;
+
 	camera_power(1);
 	msg.addr = 0x6e;
 	msg.buf = camera_ncm03j_magic;
-- 
cgit v0.10.2


From 8e9bb19ef97d6594e735bee64b6d72103e350854 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 20 May 2009 14:34:43 +0000
Subject: video: stop sh_mobile_lcdcfb only if started

This patch fixes the LCDC driver to avoid calling the
function sh_mobile_lcdc_start_stop(priv, 0) unless the
same function has been called before to start the LCDC
hardware.

Triggered when sh_mobile_lcdcfb.c failed to probe() due to
missing MSTP clocks.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 92ea0ab..f10d2fb 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -47,6 +47,7 @@ struct sh_mobile_lcdc_priv {
 #endif
 	unsigned long lddckr;
 	struct sh_mobile_lcdc_chan ch[2];
+	int started;
 };
 
 /* shared registers */
@@ -451,6 +452,7 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 
 	/* start the lcdc */
 	sh_mobile_lcdc_start_stop(priv, 1);
+	priv->started = 1;
 
 	/* tell the board code to enable the panel */
 	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
@@ -493,7 +495,10 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
 	}
 
 	/* stop the lcdc */
-	sh_mobile_lcdc_start_stop(priv, 0);
+	if (priv->started) {
+		sh_mobile_lcdc_start_stop(priv, 0);
+		priv->started = 0;
+	}
 
 	/* stop clocks */
 	for (k = 0; k < ARRAY_SIZE(priv->ch); k++)
-- 
cgit v0.10.2


From 2f3ed17e010e8c0873094016f93c1afbb4adb666 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 13:47:52 +0900
Subject: sh: Wrap irq_to_desc_alloc_cpu() around CONFIG_SPARSE_IRQ
 temporarily.

irq_to_desc_alloc_cpu() has been renamed to irq_to_desc_alloc_node() in
-next, but as we can not presently enable SPARSE_IRQ without the early
irq_desc alloc patch, protect it with an ifdef until the interface has
settled and we are ready to enable it system-wide.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index fa0c846..6ad40db 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -59,17 +59,21 @@ void register_ipr_controller(struct ipr_desc *desc)
 
 	for (i = 0; i < desc->nr_irqs; i++) {
 		struct ipr_data *p = desc->ipr_data + i;
+#ifdef CONFIG_SPARSE_IRQ
 		struct irq_desc *irq_desc;
+#endif
 
 		BUG_ON(p->ipr_idx >= desc->nr_offsets);
 		BUG_ON(!desc->ipr_offsets[p->ipr_idx]);
 
+#ifdef CONFIG_SPARSE_IRQ
 		irq_desc = irq_to_desc_alloc_cpu(p->irq, smp_processor_id());
 		if (unlikely(!irq_desc)) {
 			printk(KERN_INFO "can not get irq_desc for %d\n",
 			       p->irq);
 			continue;
 		}
+#endif
 
 		disable_irq_nosync(p->irq);
 		set_irq_chip_and_handler_name(p->irq, &desc->chip,
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 098b767..caf0656 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -771,16 +771,19 @@ void __init register_intc_controller(struct intc_desc *desc)
 	for (i = 0; i < desc->nr_vectors; i++) {
 		struct intc_vect *vect = desc->vectors + i;
 		unsigned int irq = evt2irq(vect->vect);
+#ifdef CONFIG_SPARSE_IRQ
 		struct irq_desc *irq_desc;
-
+#endif
 		if (!vect->enum_id)
 			continue;
 
+#ifdef CONFIG_SPARSE_IRQ
 		irq_desc = irq_to_desc_alloc_cpu(irq, cpu);
 		if (unlikely(!irq_desc)) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 			continue;
 		}
+#endif
 
 		intc_register_irq(desc, d, vect->enum_id, irq);
 	}
-- 
cgit v0.10.2


From 62fad39be0662a924b60e4354b802525ceda0bb1 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 13:50:18 +0900
Subject: sh: Add a NR_IRQS_LEGACY for external IRQ0-7.

This adds a NR_IRQS_LEGACY definition, which will be used by sparse irq.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index d319baa..a2b8c99 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -8,7 +8,8 @@
  * advised to cap this at the hard limit that they're interested in
  * through the machvec.
  */
-#define NR_IRQS 256
+#define NR_IRQS			256
+#define NR_IRQS_LEGACY		8	/* Legacy external IRQ0-7 */
 
 /*
  * Convert back and forth between INTEVT and IRQ values.
-- 
cgit v0.10.2


From 36aa1e32f451b664adaf3fc9a77d8279b7a833b2 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 14:00:34 +0900
Subject: sh: clkfwk: Make clock-cpg usable for non-legacy platforms.

This adds a new SH_CLK_CPG for parts that have CPG support.
SH_CLK_CPG_LEGACY is made to depend on this, and still needs to be set
for platforms that want clock-cpg to register the legacy clocks. With
this new config item in place, it is now possible to start layering more
generic CPG code in place while other platforms transition off of the
legacy clocks.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index df764c5..c815975 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -513,7 +513,11 @@ config SH_PCLK_FREQ
 	  This is necessary for determining the reference clock value on
 	  platforms lacking an RTC.
 
+config SH_CLK_CPG
+	def_bool y
+
 config SH_CLK_CPG_LEGACY
+	depends on SH_CLK_CPG
 	def_bool y if !CPU_SUBTYPE_SH7785
 
 config SH_CLK_MD
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index 05105b9..eecad7c 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -17,6 +17,6 @@ obj-$(CONFIG_ARCH_SHMOBILE)	+= shmobile/
 
 obj-$(CONFIG_UBC_WAKEUP)	+= ubc.o
 obj-$(CONFIG_SH_ADC)		+= adc.o
-obj-$(CONFIG_SH_CLK_CPG_LEGACY)	+= clock-cpg.o
+obj-$(CONFIG_SH_CLK_CPG)	+= clock-cpg.o
 
 obj-y	+= irq/ init.o clock.o
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index b4ca200..b78c237a 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -2,6 +2,7 @@
 #include <linux/compiler.h>
 #include <asm/clock.h>
 
+#ifdef CONFIG_SH_CLK_CPG_LEGACY
 static struct clk master_clk = {
 	.name		= "master_clk",
 	.flags		= CLK_ENABLE_ON_INIT,
@@ -58,3 +59,4 @@ int __init __weak arch_clk_init(void)
 {
 	return cpg_clk_init();
 }
+#endif /* CONFIG_SH_CPG_CLK_LEGACY */
-- 
cgit v0.10.2


From 8fc40238b4ebf07cd11ca9707843338be22af72f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 14:21:03 +0900
Subject: sh: Prefer slab_is_available() over after_bootmem.

This kills off after_bootmem and switches to using slab_is_available()
instead. Presently the only place this is used is by the sh64 ioremap,
and there's not much point in keeping the reference around otherwise.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 3edf297..ee8e6bb 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -184,7 +184,6 @@ void __init paging_init(void)
 }
 
 static struct kcore_list kcore_mem, kcore_vmalloc;
-int after_bootmem = 0;
 
 void __init mem_init(void)
 {
@@ -217,8 +216,6 @@ void __init mem_init(void)
 	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
 
-	after_bootmem = 1;
-
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/arch/sh/mm/ioremap_64.c b/arch/sh/mm/ioremap_64.c
index 2331229..828c859 100644
--- a/arch/sh/mm/ioremap_64.c
+++ b/arch/sh/mm/ioremap_64.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
+#include <linux/slab.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/addrspace.h>
@@ -175,7 +176,7 @@ static __init_refok void *sh64_get_page(void)
 {
 	void *page;
 
-	if (after_bootmem)
+	if (slab_is_available())
 		page = (void *)get_zeroed_page(GFP_KERNEL);
 	else
 		page = alloc_bootmem_pages(PAGE_SIZE);
-- 
cgit v0.10.2


From 6eb0ac03899a1363ba176abe0830a9e6698c0503 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 21 May 2009 19:10:23 +0000
Subject: powerpc/maple: Add a quirk to disable MSI for IPR on Bimini

Something in the HW or FW setup is busted and MSIs aren't working with
IPR on Bimini, so until we figure out exaxtly what's up, we quirk them
out

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 3018552..04296ff 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -592,3 +592,17 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
 	}
 	return irq;
 }
+
+static void __devinit quirk_ipr_msi(struct pci_dev *dev)
+{
+	/* Something prevents MSIs from the IPR from working on Bimini,
+	 * and the driver has no smarts to recover. So disable MSI
+	 * on it for now. */
+
+	if (machine_is(maple)) {
+		dev->no_msi = 1;
+		dev_info(&dev->dev, "Quirk disabled MSI\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
+			quirk_ipr_msi);
-- 
cgit v0.10.2


From 87488957a68293357a94c8142de7d0ae17914912 Mon Sep 17 00:00:00 2001
From: Adam Williamson <awilliam@redhat.com>
Date: Thu, 21 May 2009 18:32:59 -0400
Subject: ALSA: hda - fix audio on HP TX25xx series notebooks

Fixes https://bugtrack.alsa-project.org/alsa-bug/view.php?id=4121

Taken from https://bugzilla.redhat.com/show_bug.cgi?id=498060

Signed-off-by: Adam Williamson <awilliam@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b8a0d3e..bcbb736 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -12058,6 +12058,7 @@ static struct snd_pci_quirk alc268_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL),
 	SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL),
 	SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA),
+	SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
 	SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA),
-- 
cgit v0.10.2


From 88dff4936c0a5fa53080cca68dc963a8a2a674b0 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 22 May 2009 11:35:50 +0800
Subject: x86: DMI match for the Sony VGN-Z540N as it needs BIOS reboot

x86: DMI match for the Sony VGN-Z540N as it needs BIOS reboot,
see:

  http://bugzilla.kernel.org/show_bug.cgi?id=12901

[ Impact: fix hung reboot on certain systems ]

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <lenb@kernel.org>
LKML-Reference: <1242963350.32574.53.camel@rzhang-dt>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1340dad..667188e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -232,6 +232,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
 		},
 	},
+	{	/* Handle problems with rebooting on Sony VGN-Z540N */
+		.callback = set_bios_reboot,
+		.ident = "Sony VGN-Z540N",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
+		},
+	},
 	{ }
 };
 
-- 
cgit v0.10.2


From e769421de22422461f0becae0ec6e792ec6f4476 Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Wed, 28 Jan 2009 21:32:08 +0200
Subject: omap iommu: entries for Kconfig and Makefile

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 9dd68fa..d483b25 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -115,6 +115,9 @@ config OMAP_MBOX_FWK
 	  Say Y here if you want to use OMAP Mailbox framework support for
 	  DSP, IVA1.0 and IVA2 in OMAP1/2/3.
 
+config OMAP_IOMMU
+	tristate
+
 choice
         prompt "System timer"
 	default OMAP_MPU_TIMER
diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile
index 04a100c..a832795 100644
--- a/arch/arm/plat-omap/Makefile
+++ b/arch/arm/plat-omap/Makefile
@@ -13,6 +13,7 @@ obj-  :=
 obj-$(CONFIG_ARCH_OMAP16XX) += ocpi.o
 
 obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o
+obj-$(CONFIG_OMAP_IOMMU) += iommu.o iovmm.o
 
 obj-$(CONFIG_CPU_FREQ) += cpu-omap.o
 obj-$(CONFIG_OMAP_DM_TIMER) += dmtimer.o
-- 
cgit v0.10.2


From caf60779a6c5795340767ccf2f73ed7d5c7e0486 Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Wed, 28 Jan 2009 21:32:09 +0200
Subject: omap2 iommu: entries for Kconfig and Makefile

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index c49d9bf..88629a7 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -28,6 +28,11 @@ endif
 obj-$(CONFIG_ARCH_OMAP2)		+= clock24xx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= clock34xx.o
 
+iommu-y					+= iommu2.o
+iommu-$(CONFIG_ARCH_OMAP3)		+= omap3-iommu.o
+
+obj-$(CONFIG_OMAP_IOMMU)		+= $(iommu-y)
+
 # Specific board support
 obj-$(CONFIG_MACH_OMAP_GENERIC)		+= board-generic.o
 obj-$(CONFIG_MACH_OMAP_H4)		+= board-h4.o
-- 
cgit v0.10.2


From 5c651ffaee6f07aa1f5e6d0763845a2ee606b6bd Mon Sep 17 00:00:00 2001
From: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Date: Tue, 19 May 2009 09:07:55 +0300
Subject: omap iommu: add MPU_BRIDGE_IOMMU for tidspbridge migration

Currently "tidspbridge" driver uses its own mmu implementation and
will migrate to use this "omap iommu" eventually. This config is
provided to make this migration happen smoothly.

Signed-off-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>

diff --git a/arch/arm/mach-omap2/omap3-iommu.c b/arch/arm/mach-omap2/omap3-iommu.c
index 91ee38a..194189c 100644
--- a/arch/arm/mach-omap2/omap3-iommu.c
+++ b/arch/arm/mach-omap2/omap3-iommu.c
@@ -36,11 +36,13 @@ static const struct iommu_platform_data omap3_iommu_pdata[] __initconst = {
 		.nr_tlb_entries = 8,
 		.clk_name = "cam_ick",
 	},
+#if defined(CONFIG_MPU_BRIDGE_IOMMU)
 	{
 		.name = "iva2",
 		.nr_tlb_entries = 32,
 		.clk_name = "iva2_ck",
 	},
+#endif
 };
 #define NR_IOMMU_DEVICES ARRAY_SIZE(omap3_iommu_pdata)
 
-- 
cgit v0.10.2


From aaf22af46ff2b3f0a5c4fcaf1287ac6c28d4078e Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Thu, 21 May 2009 17:27:58 +0200
Subject: microblaze: Kbuild update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 31820df..db5294c 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,26 +1,3 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += linkage.h
-header-y += msgbuf.h
-header-y += poll.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += socket.h
-header-y += sockios.h
-header-y += statfs.h
-header-y += stat.h
-header-y += termbits.h
-header-y += ucontext.h
-
-unifdef-y += cputable.h
-unifdef-y += elf.h
-unifdef-y += termios.h
+header-y  += elf.h
-- 
cgit v0.10.2


From b9ed7252d219c1c663944bf03846eabb515dbe75 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 22 May 2009 09:25:32 +0200
Subject: xen-blkfront: beyond ARRAY_SIZE of info->shadow

Do not go beyond ARRAY_SIZE of info->shadow
Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 6d4ac76..6d59508 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -122,7 +122,7 @@ static DEFINE_SPINLOCK(blkif_io_lock);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 	unsigned long free = info->shadow_free;
-	BUG_ON(free > BLK_RING_SIZE);
+	BUG_ON(free >= BLK_RING_SIZE);
 	info->shadow_free = info->shadow[free].req.id;
 	info->shadow[free].req.id = 0x0fffffee; /* debug */
 	return free;
-- 
cgit v0.10.2


From 0899d6349c60e4021224b51c8c97f49b829dfefd Mon Sep 17 00:00:00 2001
From: Li Yang <leoli@freescale.com>
Date: Fri, 22 May 2009 16:39:59 +0800
Subject: fsldma: update mailling list address in MAINTAINERS

linuxppc-embedded has been merged into linuxppc-dev.

Signed-off-by: Li Yang <leoli@freescale.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 2b349ba..cac3e3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2241,7 +2241,7 @@ P:	Li Yang
 M:	leoli@freescale.com
 P:	Zhang Wei
 M:	zw@zh-kernel.org
-L:	linuxppc-embedded@ozlabs.org
+L:	linuxppc-dev@ozlabs.org
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	drivers/dma/fsldma.*
-- 
cgit v0.10.2


From f47edc6dab11801c2e97088ba7bbce042ded867c Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 22 May 2009 16:46:52 +0800
Subject: fsldma: fix check on potential fdev->chan[] overflow

Fix the check of potential array overflow when using corrupted channel
device tree nodes.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Li Yang <leoli@freescale.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index da8a8ed..391b1bd 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -830,7 +830,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
 			new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
 
 	new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
-	if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
+	if (new_fsl_chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
 		dev_err(fdev->dev, "There is no %d channel!\n",
 				new_fsl_chan->id);
 		err = -EINVAL;
-- 
cgit v0.10.2


From 138ef0185177a6d221d24b6aa8f12d867fbbef90 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Tue, 19 May 2009 15:42:13 -0700
Subject: fsldma: fix "DMA halt timeout!" errors

When using the DMA controller from multiple threads at the same time, it is
possible to get lots of "DMA halt timeout!" errors printed to the kernel
log.

This occurs due to a race between fsl_dma_memcpy_issue_pending() and the
interrupt handler, fsl_dma_chan_do_interrupt(). Both call the
fsl_chan_xfer_ld_queue() function, which does not protect against
concurrent accesses to dma_halt() and dma_start().

The existing spinlock is moved to cover the dma_halt() and dma_start()
functions. Testing shows that the "DMA halt timeout!" errors disappear.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Li Yang <leoli@freescale.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 391b1bd..a4151c3 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -598,15 +598,16 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
 	dma_addr_t next_dest_addr;
 	unsigned long flags;
 
+	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
 	if (!dma_is_idle(fsl_chan))
-		return;
+		goto out_unlock;
 
 	dma_halt(fsl_chan);
 
 	/* If there are some link descriptors
 	 * not transfered in queue. We need to start it.
 	 */
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
 
 	/* Find the first un-transfer desciptor */
 	for (ld_node = fsl_chan->ld_queue.next;
@@ -617,8 +618,6 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
 				fsl_chan->common.cookie) == DMA_SUCCESS);
 		ld_node = ld_node->next);
 
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
-
 	if (ld_node != &fsl_chan->ld_queue) {
 		/* Get the ld start address from ld_queue */
 		next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
@@ -630,6 +629,9 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
 		set_cdar(fsl_chan, 0);
 		set_ndar(fsl_chan, 0);
 	}
+
+out_unlock:
+	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
 }
 
 /**
-- 
cgit v0.10.2


From bcfb7465c03a8c62c89da374677df56f6b894d44 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Fri, 15 May 2009 14:27:16 -0700
Subject: fsldma: fix infinite loop on multi-descriptor DMA chain completion

When creating a DMA transaction with multiple descriptors, the async_tx
cookie is set to 0 for each descriptor in the chain, excluding the last
descriptor, whose cookie is set to -EBUSY.

When fsl_dma_tx_submit() is run, it only assigns a cookie to the first
descriptor. All of the remaining descriptors keep their original value,
including the last descriptor, which is set to -EBUSY.

After the DMA completes, the driver will update the last completed cookie
to be -EBUSY, which is an error code instead of a valid cookie. This causes
dma_async_is_complete() to always return DMA_IN_PROGRESS.

This causes the fsldma driver to never cleanup the queue of link
descriptors, and the driver will re-run the DMA transaction on the hardware
each time it receives the End-of-Chain interrupt. This causes an infinite
loop.

With this patch, fsl_dma_tx_submit() is changed to assign a cookie to every
descriptor in the chain. The rest of the code then works without problems.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Li Yang <leoli@freescale.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index a4151c3..7313a1a 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -313,8 +313,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
 
 static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
 	struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
+	struct fsl_desc_sw *desc;
 	unsigned long flags;
 	dma_cookie_t cookie;
 
@@ -322,14 +322,17 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
 
 	cookie = fsl_chan->common.cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	desc->async_tx.cookie = cookie;
-	fsl_chan->common.cookie = desc->async_tx.cookie;
-
-	append_ld_queue(fsl_chan, desc);
-	list_splice_init(&desc->async_tx.tx_list, fsl_chan->ld_queue.prev);
+	list_for_each_entry(desc, &tx->tx_list, node) {
+		cookie++;
+		if (cookie < 0)
+			cookie = 1;
+
+		desc->async_tx.cookie = cookie;
+	}
+
+	fsl_chan->common.cookie = cookie;
+	append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
+	list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
 
 	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
 
-- 
cgit v0.10.2


From 776c8943f2766f2819fafd88fdfbaf418ecd6e41 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Fri, 15 May 2009 11:33:20 -0700
Subject: fsldma: snooping is not enabled for last entry in descriptor chain

On the 83xx controller, snooping is necessary for the DMA controller to
ensure cache coherence with the CPU when transferring to/from RAM.

The last descriptor in a chain will always have the End-of-Chain interrupt
bit set, so we can set the snoop bit while adding the End-of-Chain
interrupt bit.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Li Yang <leoli@freescale.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 7313a1a..ff9194d 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -179,9 +179,14 @@ static void dma_halt(struct fsl_dma_chan *fsl_chan)
 static void set_ld_eol(struct fsl_dma_chan *fsl_chan,
 			struct fsl_desc_sw *desc)
 {
+	u64 snoop_bits;
+
+	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
+
 	desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
-		DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64)	| FSL_DMA_EOL,
-		64);
+		DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+			| snoop_bits, 64);
 }
 
 static void append_ld_queue(struct fsl_dma_chan *fsl_chan,
-- 
cgit v0.10.2


From 2e077f8e8337e52eef3c39c24c31e103b11a0326 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Fri, 15 May 2009 09:59:46 -0700
Subject: fsldma: fix memory leak on error path in fsl_dma_prep_memcpy()

When preparing a memcpy operation, if the kernel fails to allocate memory
for a link descriptor after the first link descriptor has already been
allocated, then some memory will never be released. Fix the problem by
walking the list of allocated descriptors backwards, and freeing the
allocated descriptors back into the DMA pool.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Li Yang <leoli@freescale.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index ff9194d..1578310 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -462,8 +462,8 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 {
 	struct fsl_dma_chan *fsl_chan;
 	struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+	struct list_head *list;
 	size_t copy;
-	LIST_HEAD(link_chain);
 
 	if (!chan)
 		return NULL;
@@ -480,7 +480,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 		if (!new) {
 			dev_err(fsl_chan->dev,
 					"No free memory for link descriptor\n");
-			return NULL;
+			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
 		dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
@@ -515,7 +515,19 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 	/* Set End-of-link to the last link descriptor of new list*/
 	set_ld_eol(fsl_chan, new);
 
-	return first ? &first->async_tx : NULL;
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	list = &first->async_tx.tx_list;
+	list_for_each_entry_safe_reverse(new, prev, list, node) {
+		list_del(&new->node);
+		dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+	}
+
+	return NULL;
 }
 
 /**
-- 
cgit v0.10.2


From b1e71b0622974953e46a284aa986504a90869a9b Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 22 May 2009 10:01:55 +0100
Subject: GFS2: Clean up some file names

This patch renames the ops_*.c files which have no counterpart
without the ops_ prefix in order to shorten the name and make
it more readable. In addition, ops_address.h (which was very
small) is moved into inode.h and inode.h is cleaned up by
adding extern where required.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index a851ea4..4f7332c 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o log.o lops.o main.o meta_io.o \
-	mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+	mount.o aops.o dentry.o export.o file.o \
 	ops_fstype.o ops_inode.o ops_super.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
new file mode 100644
index 0000000..03ebb43
--- /dev/null
+++ b/fs/gfs2/aops.c
@@ -0,0 +1,1145 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+#include <linux/mpage.h>
+#include <linux/fs.h>
+#include <linux/writeback.h>
+#include <linux/swap.h>
+#include <linux/gfs2_ondisk.h>
+#include <linux/backing-dev.h>
+
+#include "gfs2.h"
+#include "incore.h"
+#include "bmap.h"
+#include "glock.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "trans.h"
+#include "rgrp.h"
+#include "super.h"
+#include "util.h"
+#include "glops.h"
+
+
+static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+				   unsigned int from, unsigned int to)
+{
+	struct buffer_head *head = page_buffers(page);
+	unsigned int bsize = head->b_size;
+	struct buffer_head *bh;
+	unsigned int start, end;
+
+	for (bh = head, start = 0; bh != head || !start;
+	     bh = bh->b_this_page, start = end) {
+		end = start + bsize;
+		if (end <= from || start >= to)
+			continue;
+		if (gfs2_is_jdata(ip))
+			set_buffer_uptodate(bh);
+		gfs2_trans_add_bh(ip->i_gl, bh, 0);
+	}
+}
+
+/**
+ * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
+ * @inode: The inode
+ * @lblock: The block number to look up
+ * @bh_result: The buffer head to return the result in
+ * @create: Non-zero if we may add block to the file
+ *
+ * Returns: errno
+ */
+
+static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
+				  struct buffer_head *bh_result, int create)
+{
+	int error;
+
+	error = gfs2_block_map(inode, lblock, bh_result, 0);
+	if (error)
+		return error;
+	if (!buffer_mapped(bh_result))
+		return -EIO;
+	return 0;
+}
+
+static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
+				 struct buffer_head *bh_result, int create)
+{
+	return gfs2_block_map(inode, lblock, bh_result, 0);
+}
+
+/**
+ * gfs2_writepage_common - Common bits of writepage
+ * @page: The page to be written
+ * @wbc: The writeback control
+ *
+ * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
+ */
+
+static int gfs2_writepage_common(struct page *page,
+				 struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned offset;
+
+	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+		goto out;
+	if (current->journal_info)
+		goto redirty;
+	/* Is the page fully outside i_size? (truncate in progress) */
+	offset = i_size & (PAGE_CACHE_SIZE-1);
+	if (page->index > end_index || (page->index == end_index && !offset)) {
+		page->mapping->a_ops->invalidatepage(page, 0);
+		goto out;
+	}
+	return 1;
+redirty:
+	redirty_page_for_writepage(wbc, page);
+out:
+	unlock_page(page);
+	return 0;
+}
+
+/**
+ * gfs2_writeback_writepage - Write page for writeback mappings
+ * @page: The page
+ * @wbc: The writeback control
+ *
+ */
+
+static int gfs2_writeback_writepage(struct page *page,
+				    struct writeback_control *wbc)
+{
+	int ret;
+
+	ret = gfs2_writepage_common(page, wbc);
+	if (ret <= 0)
+		return ret;
+
+	ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc);
+	if (ret == -EAGAIN)
+		ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+	return ret;
+}
+
+/**
+ * gfs2_ordered_writepage - Write page for ordered data files
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ */
+
+static int gfs2_ordered_writepage(struct page *page,
+				  struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	int ret;
+
+	ret = gfs2_writepage_common(page, wbc);
+	if (ret <= 0)
+		return ret;
+
+	if (!page_has_buffers(page)) {
+		create_empty_buffers(page, inode->i_sb->s_blocksize,
+				     (1 << BH_Dirty)|(1 << BH_Uptodate));
+	}
+	gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
+	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
+
+/**
+ * __gfs2_jdata_writepage - The core of jdata writepage
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ * This is shared between writepage and writepages and implements the
+ * core of the writepage operation. If a transaction is required then
+ * PageChecked will have been set and the transaction will have
+ * already been started before this is called.
+ */
+
+static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+	if (PageChecked(page)) {
+		ClearPageChecked(page);
+		if (!page_has_buffers(page)) {
+			create_empty_buffers(page, inode->i_sb->s_blocksize,
+					     (1 << BH_Dirty)|(1 << BH_Uptodate));
+		}
+		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+	}
+	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
+
+/**
+ * gfs2_jdata_writepage - Write complete page
+ * @page: Page to write
+ *
+ * Returns: errno
+ *
+ */
+
+static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	int ret;
+	int done_trans = 0;
+
+	if (PageChecked(page)) {
+		if (wbc->sync_mode != WB_SYNC_ALL)
+			goto out_ignore;
+		ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+		if (ret)
+			goto out_ignore;
+		done_trans = 1;
+	}
+	ret = gfs2_writepage_common(page, wbc);
+	if (ret > 0)
+		ret = __gfs2_jdata_writepage(page, wbc);
+	if (done_trans)
+		gfs2_trans_end(sdp);
+	return ret;
+
+out_ignore:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return 0;
+}
+
+/**
+ * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: Write-back control
+ *
+ * For the data=writeback case we can already ignore buffer heads
+ * and write whole extents at once. This is a big reduction in the
+ * number of I/O requests we send and the bmap calls we make in this case.
+ */
+static int gfs2_writeback_writepages(struct address_space *mapping,
+				     struct writeback_control *wbc)
+{
+	return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+}
+
+/**
+ * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
+ * @mapping: The mapping
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call for each page
+ * @pvec: The vector of pages
+ * @nr_pages: The number of pages to write
+ *
+ * Returns: non-zero if loop should terminate, zero otherwise
+ */
+
+static int gfs2_write_jdata_pagevec(struct address_space *mapping,
+				    struct writeback_control *wbc,
+				    struct pagevec *pvec,
+				    int nr_pages, pgoff_t end)
+{
+	struct inode *inode = mapping->host;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
+	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	int i;
+	int ret;
+
+	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
+	if (ret < 0)
+		return ret;
+
+	for(i = 0; i < nr_pages; i++) {
+		struct page *page = pvec->pages[i];
+
+		lock_page(page);
+
+		if (unlikely(page->mapping != mapping)) {
+			unlock_page(page);
+			continue;
+		}
+
+		if (!wbc->range_cyclic && page->index > end) {
+			ret = 1;
+			unlock_page(page);
+			continue;
+		}
+
+		if (wbc->sync_mode != WB_SYNC_NONE)
+			wait_on_page_writeback(page);
+
+		if (PageWriteback(page) ||
+		    !clear_page_dirty_for_io(page)) {
+			unlock_page(page);
+			continue;
+		}
+
+		/* Is the page fully outside i_size? (truncate in progress) */
+		if (page->index > end_index || (page->index == end_index && !offset)) {
+			page->mapping->a_ops->invalidatepage(page, 0);
+			unlock_page(page);
+			continue;
+		}
+
+		ret = __gfs2_jdata_writepage(page, wbc);
+
+		if (ret || (--(wbc->nr_to_write) <= 0))
+			ret = 1;
+		if (wbc->nonblocking && bdi_write_congested(bdi)) {
+			wbc->encountered_congestion = 1;
+			ret = 1;
+		}
+
+	}
+	gfs2_trans_end(sdp);
+	return ret;
+}
+
+/**
+ * gfs2_write_cache_jdata - Like write_cache_pages but different
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call
+ * @data: The data to pass to writepage
+ *
+ * The reason that we use our own function here is that we need to
+ * start transactions before we grab page locks. This allows us
+ * to get the ordering right.
+ */
+
+static int gfs2_write_cache_jdata(struct address_space *mapping,
+				  struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	int ret = 0;
+	int done = 0;
+	struct pagevec pvec;
+	int nr_pages;
+	pgoff_t index;
+	pgoff_t end;
+	int scanned = 0;
+	int range_whole = 0;
+
+	if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		wbc->encountered_congestion = 1;
+		return 0;
+	}
+
+	pagevec_init(&pvec, 0);
+	if (wbc->range_cyclic) {
+		index = mapping->writeback_index; /* Start from prev offset */
+		end = -1;
+	} else {
+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = 1;
+		scanned = 1;
+	}
+
+retry:
+	 while (!done && (index <= end) &&
+		(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+					       PAGECACHE_TAG_DIRTY,
+					       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+		scanned = 1;
+		ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
+		if (ret)
+			done = 1;
+		if (ret > 0)
+			ret = 0;
+
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+
+	if (!scanned && !done) {
+		/*
+		 * We hit the last page and there is more work to be done: wrap
+		 * back to the start of the file
+		 */
+		scanned = 1;
+		index = 0;
+		goto retry;
+	}
+
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+		mapping->writeback_index = index;
+	return ret;
+}
+
+
+/**
+ * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * 
+ */
+
+static int gfs2_jdata_writepages(struct address_space *mapping,
+				 struct writeback_control *wbc)
+{
+	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+	int ret;
+
+	ret = gfs2_write_cache_jdata(mapping, wbc);
+	if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
+		gfs2_log_flush(sdp, ip->i_gl);
+		ret = gfs2_write_cache_jdata(mapping, wbc);
+	}
+	return ret;
+}
+
+/**
+ * stuffed_readpage - Fill in a Linux page with stuffed file data
+ * @ip: the inode
+ * @page: the page
+ *
+ * Returns: errno
+ */
+
+static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+{
+	struct buffer_head *dibh;
+	void *kaddr;
+	int error;
+
+	/*
+	 * Due to the order of unstuffing files and ->fault(), we can be
+	 * asked for a zero page in the case of a stuffed file being extended,
+	 * so we need to supply one here. It doesn't happen often.
+	 */
+	if (unlikely(page->index)) {
+		zero_user(page, 0, PAGE_CACHE_SIZE);
+		SetPageUptodate(page);
+		return 0;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
+	       ip->i_disksize);
+	memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize);
+	kunmap_atomic(kaddr, KM_USER0);
+	flush_dcache_page(page);
+	brelse(dibh);
+	SetPageUptodate(page);
+
+	return 0;
+}
+
+
+/**
+ * __gfs2_readpage - readpage
+ * @file: The file to read a page for
+ * @page: The page to read
+ *
+ * This is the core of gfs2's readpage. Its used by the internal file
+ * reading code as in that case we already hold the glock. Also its
+ * called by gfs2_readpage() once the required lock has been granted.
+ *
+ */
+
+static int __gfs2_readpage(void *file, struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+	int error;
+
+	if (gfs2_is_stuffed(ip)) {
+		error = stuffed_readpage(ip, page);
+		unlock_page(page);
+	} else {
+		error = mpage_readpage(page, gfs2_block_map);
+	}
+
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		return -EIO;
+
+	return error;
+}
+
+/**
+ * gfs2_readpage - read a page of a file
+ * @file: The file to read
+ * @page: The page of the file
+ *
+ * This deals with the locking required. We have to unlock and
+ * relock the page in order to get the locking in the right
+ * order.
+ */
+
+static int gfs2_readpage(struct file *file, struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_holder gh;
+	int error;
+
+	unlock_page(page);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	error = gfs2_glock_nq(&gh);
+	if (unlikely(error))
+		goto out;
+	error = AOP_TRUNCATED_PAGE;
+	lock_page(page);
+	if (page->mapping == mapping && !PageUptodate(page))
+		error = __gfs2_readpage(file, page);
+	else
+		unlock_page(page);
+	gfs2_glock_dq(&gh);
+out:
+	gfs2_holder_uninit(&gh);
+	if (error && error != AOP_TRUNCATED_PAGE)
+		lock_page(page);
+	return error;
+}
+
+/**
+ * gfs2_internal_read - read an internal file
+ * @ip: The gfs2 inode
+ * @ra_state: The readahead state (or NULL for no readahead)
+ * @buf: The buffer to fill
+ * @pos: The file position
+ * @size: The amount to read
+ *
+ */
+
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+                       char *buf, loff_t *pos, unsigned size)
+{
+	struct address_space *mapping = ip->i_inode.i_mapping;
+	unsigned long index = *pos / PAGE_CACHE_SIZE;
+	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+	unsigned copied = 0;
+	unsigned amt;
+	struct page *page;
+	void *p;
+
+	do {
+		amt = size - copied;
+		if (offset + size > PAGE_CACHE_SIZE)
+			amt = PAGE_CACHE_SIZE - offset;
+		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		p = kmap_atomic(page, KM_USER0);
+		memcpy(buf + copied, p + offset, amt);
+		kunmap_atomic(p, KM_USER0);
+		mark_page_accessed(page);
+		page_cache_release(page);
+		copied += amt;
+		index++;
+		offset = 0;
+	} while(copied < size);
+	(*pos) += size;
+	return size;
+}
+
+/**
+ * gfs2_readpages - Read a bunch of pages at once
+ *
+ * Some notes:
+ * 1. This is only for readahead, so we can simply ignore any things
+ *    which are slightly inconvenient (such as locking conflicts between
+ *    the page lock and the glock) and return having done no I/O. Its
+ *    obviously not something we'd want to do on too regular a basis.
+ *    Any I/O we ignore at this time will be done via readpage later.
+ * 2. We don't handle stuffed files here we let readpage do the honours.
+ * 3. mpage_readpages() does most of the heavy lifting in the common case.
+ * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
+ */
+
+static int gfs2_readpages(struct file *file, struct address_space *mapping,
+			  struct list_head *pages, unsigned nr_pages)
+{
+	struct inode *inode = mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (unlikely(ret))
+		goto out_uninit;
+	if (!gfs2_is_stuffed(ip))
+		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
+	gfs2_glock_dq(&gh);
+out_uninit:
+	gfs2_holder_uninit(&gh);
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		ret = -EIO;
+	return ret;
+}
+
+/**
+ * gfs2_write_begin - Begin to write to a file
+ * @file: The file to write to
+ * @mapping: The mapping in which to write
+ * @pos: The file offset at which to start writing
+ * @len: Length of the write
+ * @flags: Various flags
+ * @pagep: Pointer to return the page
+ * @fsdata: Pointer to return fs data (unused by GFS2)
+ *
+ * Returns: errno
+ */
+
+static int gfs2_write_begin(struct file *file, struct address_space *mapping,
+			    loff_t pos, unsigned len, unsigned flags,
+			    struct page **pagep, void **fsdata)
+{
+	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+	int alloc_required;
+	int error = 0;
+	struct gfs2_alloc *al;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned to = from + len;
+	struct page *page;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+	error = gfs2_glock_nq(&ip->i_gh);
+	if (unlikely(error))
+		goto out_uninit;
+
+	error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
+	if (error)
+		goto out_unlock;
+
+	if (alloc_required || gfs2_is_jdata(ip))
+		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
+
+	if (alloc_required) {
+		al = gfs2_alloc_get(ip);
+		if (!al) {
+			error = -ENOMEM;
+			goto out_unlock;
+		}
+
+		error = gfs2_quota_lock_check(ip);
+		if (error)
+			goto out_alloc_put;
+
+		al->al_requested = data_blocks + ind_blocks;
+		error = gfs2_inplace_reserve(ip);
+		if (error)
+			goto out_qunlock;
+	}
+
+	rblocks = RES_DINODE + ind_blocks;
+	if (gfs2_is_jdata(ip))
+		rblocks += data_blocks ? data_blocks : 1;
+	if (ind_blocks || data_blocks)
+		rblocks += RES_STATFS + RES_QUOTA;
+
+	error = gfs2_trans_begin(sdp, rblocks,
+				 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+	if (error)
+		goto out_trans_fail;
+
+	error = -ENOMEM;
+	flags |= AOP_FLAG_NOFS;
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	*pagep = page;
+	if (unlikely(!page))
+		goto out_endtrans;
+
+	if (gfs2_is_stuffed(ip)) {
+		error = 0;
+		if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+			error = gfs2_unstuff_dinode(ip, page);
+			if (error == 0)
+				goto prepare_write;
+		} else if (!PageUptodate(page)) {
+			error = stuffed_readpage(ip, page);
+		}
+		goto out;
+	}
+
+prepare_write:
+	error = block_prepare_write(page, from, to, gfs2_block_map);
+out:
+	if (error == 0)
+		return 0;
+
+	page_cache_release(page);
+	if (pos + len > ip->i_inode.i_size)
+		vmtruncate(&ip->i_inode, ip->i_inode.i_size);
+out_endtrans:
+	gfs2_trans_end(sdp);
+out_trans_fail:
+	if (alloc_required) {
+		gfs2_inplace_release(ip);
+out_qunlock:
+		gfs2_quota_unlock(ip);
+out_alloc_put:
+		gfs2_alloc_put(ip);
+	}
+out_unlock:
+	gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+	gfs2_holder_uninit(&ip->i_gh);
+	return error;
+}
+
+/**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+	u64 fs_total, new_free;
+
+	/* Total up the file system space, according to the latest rindex. */
+	fs_total = gfs2_ri_total(sdp);
+
+	spin_lock(&sdp->sd_statfs_spin);
+	if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+		new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+	else
+		new_free = 0;
+	spin_unlock(&sdp->sd_statfs_spin);
+	fs_warn(sdp, "File system extended by %llu blocks.\n",
+		(unsigned long long)new_free);
+	gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+
+/**
+ * gfs2_stuffed_write_end - Write end for stuffed files
+ * @inode: The inode
+ * @dibh: The buffer_head containing the on-disk inode
+ * @pos: The file position
+ * @len: The length of the write
+ * @copied: How much was actually copied by the VFS
+ * @page: The page
+ *
+ * This copies the data from the page into the inode block after
+ * the inode data structure itself.
+ *
+ * Returns: errno
+ */
+static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
+				  loff_t pos, unsigned len, unsigned copied,
+				  struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	u64 to = pos + copied;
+	void *kaddr;
+	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
+	struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+
+	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(buf + pos, kaddr + pos, copied);
+	memset(kaddr + pos + copied, 0, len - copied);
+	flush_dcache_page(page);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+	page_cache_release(page);
+
+	if (copied) {
+		if (inode->i_size < to) {
+			i_size_write(inode, to);
+			ip->i_disksize = inode->i_size;
+		}
+		gfs2_dinode_out(ip, di);
+		mark_inode_dirty(inode);
+	}
+
+	if (inode == sdp->sd_rindex)
+		adjust_fs_space(inode);
+
+	brelse(dibh);
+	gfs2_trans_end(sdp);
+	gfs2_glock_dq(&ip->i_gh);
+	gfs2_holder_uninit(&ip->i_gh);
+	return copied;
+}
+
+/**
+ * gfs2_write_end
+ * @file: The file to write to
+ * @mapping: The address space to write to
+ * @pos: The file position
+ * @len: The length of the data
+ * @copied:
+ * @page: The page that has been written
+ * @fsdata: The fsdata (unused in GFS2)
+ *
+ * The main write_end function for GFS2. We have a separate one for
+ * stuffed files as they are slightly different, otherwise we just
+ * put our locking around the VFS provided functions.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_write_end(struct file *file, struct address_space *mapping,
+			  loff_t pos, unsigned len, unsigned copied,
+			  struct page *page, void *fsdata)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct buffer_head *dibh;
+	struct gfs2_alloc *al = ip->i_alloc;
+	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned int to = from + len;
+	int ret;
+
+	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
+
+	ret = gfs2_meta_inode_buffer(ip, &dibh);
+	if (unlikely(ret)) {
+		unlock_page(page);
+		page_cache_release(page);
+		goto failed;
+	}
+
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
+	if (gfs2_is_stuffed(ip))
+		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
+
+	if (!gfs2_is_writeback(ip))
+		gfs2_page_add_databufs(ip, page, from, to);
+
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (ret > 0) {
+		if (inode->i_size > ip->i_disksize)
+			ip->i_disksize = inode->i_size;
+		gfs2_dinode_out(ip, dibh->b_data);
+		mark_inode_dirty(inode);
+	}
+
+	if (inode == sdp->sd_rindex)
+		adjust_fs_space(inode);
+
+	brelse(dibh);
+	gfs2_trans_end(sdp);
+failed:
+	if (al) {
+		gfs2_inplace_release(ip);
+		gfs2_quota_unlock(ip);
+		gfs2_alloc_put(ip);
+	}
+	gfs2_glock_dq(&ip->i_gh);
+	gfs2_holder_uninit(&ip->i_gh);
+	return ret;
+}
+
+/**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+ 
+static int gfs2_set_page_dirty(struct page *page)
+{
+	SetPageChecked(page);
+	return __set_page_dirty_buffers(page);
+}
+
+/**
+ * gfs2_bmap - Block map function
+ * @mapping: Address space info
+ * @lblock: The block to map
+ *
+ * Returns: The disk address for the block or 0 on hole or error
+ */
+
+static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
+{
+	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_holder i_gh;
+	sector_t dblock = 0;
+	int error;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		return 0;
+
+	if (!gfs2_is_stuffed(ip))
+		dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+	return dblock;
+}
+
+static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+	struct gfs2_bufdata *bd;
+
+	lock_buffer(bh);
+	gfs2_log_lock(sdp);
+	clear_buffer_dirty(bh);
+	bd = bh->b_private;
+	if (bd) {
+		if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
+			list_del_init(&bd->bd_le.le_list);
+		else
+			gfs2_remove_from_journal(bh, current->journal_info, 0);
+	}
+	bh->b_bdev = NULL;
+	clear_buffer_mapped(bh);
+	clear_buffer_req(bh);
+	clear_buffer_new(bh);
+	gfs2_log_unlock(sdp);
+	unlock_buffer(bh);
+}
+
+static void gfs2_invalidatepage(struct page *page, unsigned long offset)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+	struct buffer_head *bh, *head;
+	unsigned long pos = 0;
+
+	BUG_ON(!PageLocked(page));
+	if (offset == 0)
+		ClearPageChecked(page);
+	if (!page_has_buffers(page))
+		goto out;
+
+	bh = head = page_buffers(page);
+	do {
+		if (offset <= pos)
+			gfs2_discard(sdp, bh);
+		pos += bh->b_size;
+		bh = bh->b_this_page;
+	} while (bh != head);
+out:
+	if (offset == 0)
+		try_to_release_page(page, 0);
+}
+
+/**
+ * gfs2_ok_for_dio - check that dio is valid on this file
+ * @ip: The inode
+ * @rw: READ or WRITE
+ * @offset: The offset at which we are reading or writing
+ *
+ * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
+ *          1 (to accept the i/o request)
+ */
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+{
+	/*
+	 * Should we return an error here? I can't see that O_DIRECT for
+	 * a stuffed file makes any sense. For now we'll silently fall
+	 * back to buffered I/O
+	 */
+	if (gfs2_is_stuffed(ip))
+		return 0;
+
+	if (offset >= i_size_read(&ip->i_inode))
+		return 0;
+	return 1;
+}
+
+
+
+static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
+			      const struct iovec *iov, loff_t offset,
+			      unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int rv;
+
+	/*
+	 * Deferred lock, even if its a write, since we do no allocation
+	 * on this path. All we need change is atime, and this lock mode
+	 * ensures that other nodes have flushed their buffered read caches
+	 * (i.e. their page cache entries for this inode). We do not,
+	 * unfortunately have the option of only flushing a range like
+	 * the VFS does.
+	 */
+	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+	rv = gfs2_glock_nq(&gh);
+	if (rv)
+		return rv;
+	rv = gfs2_ok_for_dio(ip, rw, offset);
+	if (rv != 1)
+		goto out; /* dio not valid, fall back to buffered i/o */
+
+	rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
+					   iov, offset, nr_segs,
+					   gfs2_get_block_direct, NULL);
+out:
+	gfs2_glock_dq_m(1, &gh);
+	gfs2_holder_uninit(&gh);
+	return rv;
+}
+
+/**
+ * gfs2_releasepage - free the metadata associated with a page
+ * @page: the page that's being released
+ * @gfp_mask: passed from Linux VFS, ignored by us
+ *
+ * Call try_to_free_buffers() if the buffers in this page can be
+ * released.
+ *
+ * Returns: 0
+ */
+
+int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	struct inode *aspace = page->mapping->host;
+	struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
+	struct buffer_head *bh, *head;
+	struct gfs2_bufdata *bd;
+
+	if (!page_has_buffers(page))
+		return 0;
+
+	gfs2_log_lock(sdp);
+	head = bh = page_buffers(page);
+	do {
+		if (atomic_read(&bh->b_count))
+			goto cannot_release;
+		bd = bh->b_private;
+		if (bd && bd->bd_ail)
+			goto cannot_release;
+		gfs2_assert_warn(sdp, !buffer_pinned(bh));
+		gfs2_assert_warn(sdp, !buffer_dirty(bh));
+		bh = bh->b_this_page;
+	} while(bh != head);
+	gfs2_log_unlock(sdp);
+
+	head = bh = page_buffers(page);
+	do {
+		gfs2_log_lock(sdp);
+		bd = bh->b_private;
+		if (bd) {
+			gfs2_assert_warn(sdp, bd->bd_bh == bh);
+			gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
+			if (!list_empty(&bd->bd_le.le_list)) {
+				if (!buffer_pinned(bh))
+					list_del_init(&bd->bd_le.le_list);
+				else
+					bd = NULL;
+			}
+			if (bd)
+				bd->bd_bh = NULL;
+			bh->b_private = NULL;
+		}
+		gfs2_log_unlock(sdp);
+		if (bd)
+			kmem_cache_free(gfs2_bufdata_cachep, bd);
+
+		bh = bh->b_this_page;
+	} while (bh != head);
+
+	return try_to_free_buffers(page);
+cannot_release:
+	gfs2_log_unlock(sdp);
+	return 0;
+}
+
+static const struct address_space_operations gfs2_writeback_aops = {
+	.writepage = gfs2_writeback_writepage,
+	.writepages = gfs2_writeback_writepages,
+	.readpage = gfs2_readpage,
+	.readpages = gfs2_readpages,
+	.sync_page = block_sync_page,
+	.write_begin = gfs2_write_begin,
+	.write_end = gfs2_write_end,
+	.bmap = gfs2_bmap,
+	.invalidatepage = gfs2_invalidatepage,
+	.releasepage = gfs2_releasepage,
+	.direct_IO = gfs2_direct_IO,
+	.migratepage = buffer_migrate_page,
+	.is_partially_uptodate = block_is_partially_uptodate,
+};
+
+static const struct address_space_operations gfs2_ordered_aops = {
+	.writepage = gfs2_ordered_writepage,
+	.readpage = gfs2_readpage,
+	.readpages = gfs2_readpages,
+	.sync_page = block_sync_page,
+	.write_begin = gfs2_write_begin,
+	.write_end = gfs2_write_end,
+	.set_page_dirty = gfs2_set_page_dirty,
+	.bmap = gfs2_bmap,
+	.invalidatepage = gfs2_invalidatepage,
+	.releasepage = gfs2_releasepage,
+	.direct_IO = gfs2_direct_IO,
+	.migratepage = buffer_migrate_page,
+	.is_partially_uptodate = block_is_partially_uptodate,
+};
+
+static const struct address_space_operations gfs2_jdata_aops = {
+	.writepage = gfs2_jdata_writepage,
+	.writepages = gfs2_jdata_writepages,
+	.readpage = gfs2_readpage,
+	.readpages = gfs2_readpages,
+	.sync_page = block_sync_page,
+	.write_begin = gfs2_write_begin,
+	.write_end = gfs2_write_end,
+	.set_page_dirty = gfs2_set_page_dirty,
+	.bmap = gfs2_bmap,
+	.invalidatepage = gfs2_invalidatepage,
+	.releasepage = gfs2_releasepage,
+	.is_partially_uptodate = block_is_partially_uptodate,
+};
+
+void gfs2_set_aops(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	if (gfs2_is_writeback(ip))
+		inode->i_mapping->a_ops = &gfs2_writeback_aops;
+	else if (gfs2_is_ordered(ip))
+		inode->i_mapping->a_ops = &gfs2_ordered_aops;
+	else if (gfs2_is_jdata(ip))
+		inode->i_mapping->a_ops = &gfs2_jdata_aops;
+	else
+		BUG();
+}
+
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 253e1a3..1153a07 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -25,7 +25,6 @@
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
-#include "ops_address.h"
 
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  * block is 512, so __u16 is fine for that. It saves stack space to
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
new file mode 100644
index 0000000..022c66c
--- /dev/null
+++ b/fs/gfs2/dentry.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/gfs2_ondisk.h>
+#include <linux/crc32.h>
+
+#include "gfs2.h"
+#include "incore.h"
+#include "dir.h"
+#include "glock.h"
+#include "super.h"
+#include "util.h"
+#include "inode.h"
+
+/**
+ * gfs2_drevalidate - Check directory lookup consistency
+ * @dentry: the mapping to check
+ * @nd:
+ *
+ * Check to make sure the lookup necessary to arrive at this inode from its
+ * parent is still good.
+ *
+ * Returns: 1 if the dentry is ok, 0 if it isn't
+ */
+
+static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	struct dentry *parent = dget_parent(dentry);
+	struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
+	struct gfs2_inode *dip = GFS2_I(parent->d_inode);
+	struct inode *inode = dentry->d_inode;
+	struct gfs2_holder d_gh;
+	struct gfs2_inode *ip = NULL;
+	int error;
+	int had_lock = 0;
+
+	if (inode) {
+		if (is_bad_inode(inode))
+			goto invalid;
+		ip = GFS2_I(inode);
+	}
+
+	if (sdp->sd_args.ar_localcaching)
+		goto valid;
+
+	had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
+	if (!had_lock) {
+		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+		if (error)
+			goto fail;
+	} 
+
+	error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
+	switch (error) {
+	case 0:
+		if (!inode)
+			goto invalid_gunlock;
+		break;
+	case -ENOENT:
+		if (!inode)
+			goto valid_gunlock;
+		goto invalid_gunlock;
+	default:
+		goto fail_gunlock;
+	}
+
+valid_gunlock:
+	if (!had_lock)
+		gfs2_glock_dq_uninit(&d_gh);
+valid:
+	dput(parent);
+	return 1;
+
+invalid_gunlock:
+	if (!had_lock)
+		gfs2_glock_dq_uninit(&d_gh);
+invalid:
+	if (inode && S_ISDIR(inode->i_mode)) {
+		if (have_submounts(dentry))
+			goto valid;
+		shrink_dcache_parent(dentry);
+	}
+	d_drop(dentry);
+	dput(parent);
+	return 0;
+
+fail_gunlock:
+	gfs2_glock_dq_uninit(&d_gh);
+fail:
+	dput(parent);
+	return 0;
+}
+
+static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
+{
+	str->hash = gfs2_disk_hash(str->name, str->len);
+	return 0;
+}
+
+const struct dentry_operations gfs2_dops = {
+	.d_revalidate = gfs2_drevalidate,
+	.d_hash = gfs2_dhash,
+};
+
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
new file mode 100644
index 0000000..9200ef2
--- /dev/null
+++ b/fs/gfs2/export.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/exportfs.h>
+#include <linux/gfs2_ondisk.h>
+#include <linux/crc32.h>
+
+#include "gfs2.h"
+#include "incore.h"
+#include "dir.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "super.h"
+#include "rgrp.h"
+#include "util.h"
+
+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+#define GFS2_OLD_FH_SIZE 10
+
+static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
+			  int connectable)
+{
+	__be32 *fh = (__force __be32 *)p;
+	struct inode *inode = dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	if (*len < GFS2_SMALL_FH_SIZE ||
+	    (connectable && *len < GFS2_LARGE_FH_SIZE))
+		return 255;
+
+	fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+	fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+	fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
+	fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
+	*len = GFS2_SMALL_FH_SIZE;
+
+	if (!connectable || inode == sb->s_root->d_inode)
+		return *len;
+
+	spin_lock(&dentry->d_lock);
+	inode = dentry->d_parent->d_inode;
+	ip = GFS2_I(inode);
+	igrab(inode);
+	spin_unlock(&dentry->d_lock);
+
+	fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+	fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+	fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
+	fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
+	*len = GFS2_LARGE_FH_SIZE;
+
+	iput(inode);
+
+	return *len;
+}
+
+struct get_name_filldir {
+	struct gfs2_inum_host inum;
+	char *name;
+};
+
+static int get_name_filldir(void *opaque, const char *name, int length,
+			    loff_t offset, u64 inum, unsigned int type)
+{
+	struct get_name_filldir *gnfd = opaque;
+
+	if (inum != gnfd->inum.no_addr)
+		return 0;
+
+	memcpy(gnfd->name, name, length);
+	gnfd->name[length] = 0;
+
+	return 1;
+}
+
+static int gfs2_get_name(struct dentry *parent, char *name,
+			 struct dentry *child)
+{
+	struct inode *dir = parent->d_inode;
+	struct inode *inode = child->d_inode;
+	struct gfs2_inode *dip, *ip;
+	struct get_name_filldir gnfd;
+	struct gfs2_holder gh;
+	u64 offset = 0;
+	int error;
+
+	if (!dir)
+		return -EINVAL;
+
+	if (!S_ISDIR(dir->i_mode) || !inode)
+		return -EINVAL;
+
+	dip = GFS2_I(dir);
+	ip = GFS2_I(inode);
+
+	*name = 0;
+	gnfd.inum.no_addr = ip->i_no_addr;
+	gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
+	gnfd.name = name;
+
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (error)
+		return error;
+
+	error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
+
+	gfs2_glock_dq_uninit(&gh);
+
+	if (!error && !*name)
+		error = -ENOENT;
+
+	return error;
+}
+
+static struct dentry *gfs2_get_parent(struct dentry *child)
+{
+	struct qstr dotdot;
+	struct dentry *dentry;
+
+	/*
+	 * XXX(hch): it would be a good idea to keep this around as a
+	 *	     static variable.
+	 */
+	gfs2_str2qstr(&dotdot, "..");
+
+	dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1));
+	if (!IS_ERR(dentry))
+		dentry->d_op = &gfs2_dops;
+	return dentry;
+}
+
+static struct dentry *gfs2_get_dentry(struct super_block *sb,
+		struct gfs2_inum_host *inum)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	struct gfs2_holder i_gh, ri_gh, rgd_gh;
+	struct gfs2_rgrpd *rgd;
+	struct inode *inode;
+	struct dentry *dentry;
+	int error;
+
+	/* System files? */
+
+	inode = gfs2_ilookup(sb, inum->no_addr);
+	if (inode) {
+		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+			iput(inode);
+			return ERR_PTR(-ESTALE);
+		}
+		goto out_inode;
+	}
+
+	error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
+				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		return ERR_PTR(error);
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		goto fail;
+
+	error = -EINVAL;
+	rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
+	if (!rgd)
+		goto fail_rindex;
+
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
+	if (error)
+		goto fail_rindex;
+
+	error = -ESTALE;
+	if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
+		goto fail_rgd;
+
+	gfs2_glock_dq_uninit(&rgd_gh);
+	gfs2_glock_dq_uninit(&ri_gh);
+
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+					inum->no_addr,
+					0, 0);
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		goto fail;
+	}
+
+	error = gfs2_inode_refresh(GFS2_I(inode));
+	if (error) {
+		iput(inode);
+		goto fail;
+	}
+
+	/* Pick up the works we bypass in gfs2_inode_lookup */
+	if (inode->i_state & I_NEW) 
+		gfs2_set_iop(inode);
+
+	if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+		iput(inode);
+		goto fail;
+	}
+
+	error = -EIO;
+	if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
+		iput(inode);
+		goto fail;
+	}
+
+	gfs2_glock_dq_uninit(&i_gh);
+
+out_inode:
+	dentry = d_obtain_alias(inode);
+	if (!IS_ERR(dentry))
+		dentry->d_op = &gfs2_dops;
+	return dentry;
+
+fail_rgd:
+	gfs2_glock_dq_uninit(&rgd_gh);
+
+fail_rindex:
+	gfs2_glock_dq_uninit(&ri_gh);
+
+fail:
+	gfs2_glock_dq_uninit(&i_gh);
+	return ERR_PTR(error);
+}
+
+static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	struct gfs2_inum_host this;
+	__be32 *fh = (__force __be32 *)fid->raw;
+
+	switch (fh_type) {
+	case GFS2_SMALL_FH_SIZE:
+	case GFS2_LARGE_FH_SIZE:
+	case GFS2_OLD_FH_SIZE:
+		this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+		this.no_formal_ino |= be32_to_cpu(fh[1]);
+		this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+		this.no_addr |= be32_to_cpu(fh[3]);
+		return gfs2_get_dentry(sb, &this);
+	default:
+		return NULL;
+	}
+}
+
+static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	struct gfs2_inum_host parent;
+	__be32 *fh = (__force __be32 *)fid->raw;
+
+	switch (fh_type) {
+	case GFS2_LARGE_FH_SIZE:
+	case GFS2_OLD_FH_SIZE:
+		parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
+		parent.no_formal_ino |= be32_to_cpu(fh[5]);
+		parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
+		parent.no_addr |= be32_to_cpu(fh[7]);
+		return gfs2_get_dentry(sb, &parent);
+	default:
+		return NULL;
+	}
+}
+
+const struct export_operations gfs2_export_ops = {
+	.encode_fh = gfs2_encode_fh,
+	.fh_to_dentry = gfs2_fh_to_dentry,
+	.fh_to_parent = gfs2_fh_to_parent,
+	.get_name = gfs2_get_name,
+	.get_parent = gfs2_get_parent,
+};
+
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
new file mode 100644
index 0000000..73b6f55
--- /dev/null
+++ b/fs/gfs2/file.c
@@ -0,0 +1,765 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <linux/pagemap.h>
+#include <linux/uio.h>
+#include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include <linux/gfs2_ondisk.h>
+#include <linux/ext2_fs.h>
+#include <linux/crc32.h>
+#include <linux/writeback.h>
+#include <asm/uaccess.h>
+#include <linux/dlm.h>
+#include <linux/dlm_plock.h>
+
+#include "gfs2.h"
+#include "incore.h"
+#include "bmap.h"
+#include "dir.h"
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
+#include "log.h"
+#include "meta_io.h"
+#include "quota.h"
+#include "rgrp.h"
+#include "trans.h"
+#include "util.h"
+#include "eaops.h"
+
+/**
+ * gfs2_llseek - seek to a location in a file
+ * @file: the file
+ * @offset: the offset
+ * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
+ *
+ * SEEK_END requires the glock for the file because it references the
+ * file's size.
+ *
+ * Returns: The new offset, or errno
+ */
+
+static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+	struct gfs2_holder i_gh;
+	loff_t error;
+
+	if (origin == 2) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
+					   &i_gh);
+		if (!error) {
+			error = generic_file_llseek_unlocked(file, offset, origin);
+			gfs2_glock_dq_uninit(&i_gh);
+		}
+	} else
+		error = generic_file_llseek_unlocked(file, offset, origin);
+
+	return error;
+}
+
+/**
+ * gfs2_readdir - Read directory entries from a directory
+ * @file: The directory to read from
+ * @dirent: Buffer for dirents
+ * @filldir: Function used to do the copying
+ *
+ * Returns: errno
+ */
+
+static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct inode *dir = file->f_mapping->host;
+	struct gfs2_inode *dip = GFS2_I(dir);
+	struct gfs2_holder d_gh;
+	u64 offset = file->f_pos;
+	int error;
+
+	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+	error = gfs2_glock_nq(&d_gh);
+	if (error) {
+		gfs2_holder_uninit(&d_gh);
+		return error;
+	}
+
+	error = gfs2_dir_read(dir, &offset, dirent, filldir);
+
+	gfs2_glock_dq_uninit(&d_gh);
+
+	file->f_pos = offset;
+
+	return error;
+}
+
+/**
+ * fsflags_cvt
+ * @table: A table of 32 u32 flags
+ * @val: a 32 bit value to convert
+ *
+ * This function can be used to convert between fsflags values and
+ * GFS2's own flags values.
+ *
+ * Returns: the converted flags
+ */
+static u32 fsflags_cvt(const u32 *table, u32 val)
+{
+	u32 res = 0;
+	while(val) {
+		if (val & 1)
+			res |= *table;
+		table++;
+		val >>= 1;
+	}
+	return res;
+}
+
+static const u32 fsflags_to_gfs2[32] = {
+	[3] = GFS2_DIF_SYNC,
+	[4] = GFS2_DIF_IMMUTABLE,
+	[5] = GFS2_DIF_APPENDONLY,
+	[7] = GFS2_DIF_NOATIME,
+	[12] = GFS2_DIF_EXHASH,
+	[14] = GFS2_DIF_INHERIT_JDATA,
+};
+
+static const u32 gfs2_to_fsflags[32] = {
+	[gfs2fl_Sync] = FS_SYNC_FL,
+	[gfs2fl_Immutable] = FS_IMMUTABLE_FL,
+	[gfs2fl_AppendOnly] = FS_APPEND_FL,
+	[gfs2fl_NoAtime] = FS_NOATIME_FL,
+	[gfs2fl_ExHash] = FS_INDEX_FL,
+	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
+};
+
+static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int error;
+	u32 fsflags;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	error = gfs2_glock_nq(&gh);
+	if (error)
+		return error;
+
+	fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
+	if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
+		fsflags |= FS_JOURNAL_DATA_FL;
+	if (put_user(fsflags, ptr))
+		error = -EFAULT;
+
+	gfs2_glock_dq(&gh);
+	gfs2_holder_uninit(&gh);
+	return error;
+}
+
+void gfs2_set_inode_flags(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	unsigned int flags = inode->i_flags;
+
+	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
+		flags |= S_IMMUTABLE;
+	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
+		flags |= S_APPEND;
+	if (ip->i_diskflags & GFS2_DIF_NOATIME)
+		flags |= S_NOATIME;
+	if (ip->i_diskflags & GFS2_DIF_SYNC)
+		flags |= S_SYNC;
+	inode->i_flags = flags;
+}
+
+/* Flags that can be set by user space */
+#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|			\
+			     GFS2_DIF_IMMUTABLE|		\
+			     GFS2_DIF_APPENDONLY|		\
+			     GFS2_DIF_NOATIME|			\
+			     GFS2_DIF_SYNC|			\
+			     GFS2_DIF_SYSTEM|			\
+			     GFS2_DIF_INHERIT_JDATA)
+
+/**
+ * gfs2_set_flags - set flags on an inode
+ * @inode: The inode
+ * @flags: The flags to set
+ * @mask: Indicates which flags are valid
+ *
+ */
+static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct buffer_head *bh;
+	struct gfs2_holder gh;
+	int error;
+	u32 new_flags, flags;
+
+	error = mnt_want_write(filp->f_path.mnt);
+	if (error)
+		return error;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (error)
+		goto out_drop_write;
+
+	flags = ip->i_diskflags;
+	new_flags = (flags & ~mask) | (reqflags & mask);
+	if ((new_flags ^ flags) == 0)
+		goto out;
+
+	error = -EINVAL;
+	if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
+		goto out;
+
+	error = -EPERM;
+	if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
+		goto out;
+	if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
+		goto out;
+	if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
+	    !capable(CAP_LINUX_IMMUTABLE))
+		goto out;
+	if (!IS_IMMUTABLE(inode)) {
+		error = gfs2_permission(inode, MAY_WRITE);
+		if (error)
+			goto out;
+	}
+	if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
+		if (flags & GFS2_DIF_JDATA)
+			gfs2_log_flush(sdp, ip->i_gl);
+		error = filemap_fdatawrite(inode->i_mapping);
+		if (error)
+			goto out;
+		error = filemap_fdatawait(inode->i_mapping);
+		if (error)
+			goto out;
+	}
+	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (error)
+		goto out;
+	error = gfs2_meta_inode_buffer(ip, &bh);
+	if (error)
+		goto out_trans_end;
+	gfs2_trans_add_bh(ip->i_gl, bh, 1);
+	ip->i_diskflags = new_flags;
+	gfs2_dinode_out(ip, bh->b_data);
+	brelse(bh);
+	gfs2_set_inode_flags(inode);
+	gfs2_set_aops(inode);
+out_trans_end:
+	gfs2_trans_end(sdp);
+out:
+	gfs2_glock_dq_uninit(&gh);
+out_drop_write:
+	mnt_drop_write(filp->f_path.mnt);
+	return error;
+}
+
+static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	u32 fsflags, gfsflags;
+	if (get_user(fsflags, ptr))
+		return -EFAULT;
+	gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
+	if (!S_ISDIR(inode->i_mode)) {
+		if (gfsflags & GFS2_DIF_INHERIT_JDATA)
+			gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
+		return do_gfs2_set_flags(filp, gfsflags, ~0);
+	}
+	return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
+}
+
+static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	switch(cmd) {
+	case FS_IOC_GETFLAGS:
+		return gfs2_get_flags(filp, (u32 __user *)arg);
+	case FS_IOC_SETFLAGS:
+		return gfs2_set_flags(filp, (u32 __user *)arg);
+	}
+	return -ENOTTY;
+}
+
+/**
+ * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * @page: The (locked) page to allocate backing for
+ *
+ * We try to allocate all the blocks required for the page in
+ * one go. This might fail for various reasons, so we keep
+ * trying until all the blocks to back this page are allocated.
+ * If some of the blocks are already allocated, thats ok too.
+ */
+
+static int gfs2_allocate_page_backing(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head bh;
+	unsigned long size = PAGE_CACHE_SIZE;
+	u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	do {
+		bh.b_state = 0;
+		bh.b_size = size;
+		gfs2_block_map(inode, lblock, &bh, 1);
+		if (!buffer_mapped(&bh))
+			return -EIO;
+		size -= bh.b_size;
+		lblock += (bh.b_size >> inode->i_blkbits);
+	} while(size > 0);
+	return 0;
+}
+
+/**
+ * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
+ * @vma: The virtual memory area
+ * @page: The page which is about to become writable
+ *
+ * When the page becomes writable, we need to ensure that we have
+ * blocks allocated on disk to back that page.
+ */
+
+static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *page = vmf->page;
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	unsigned long last_index;
+	u64 pos = page->index << PAGE_CACHE_SHIFT;
+	unsigned int data_blocks, ind_blocks, rblocks;
+	int alloc_required = 0;
+	struct gfs2_holder gh;
+	struct gfs2_alloc *al;
+	int ret;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret)
+		goto out;
+
+	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
+	set_bit(GIF_SW_PAGED, &ip->i_flags);
+
+	ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
+	if (ret || !alloc_required)
+		goto out_unlock;
+	ret = -ENOMEM;
+	al = gfs2_alloc_get(ip);
+	if (al == NULL)
+		goto out_unlock;
+
+	ret = gfs2_quota_lock_check(ip);
+	if (ret)
+		goto out_alloc_put;
+	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+	al->al_requested = data_blocks + ind_blocks;
+	ret = gfs2_inplace_reserve(ip);
+	if (ret)
+		goto out_quota_unlock;
+
+	rblocks = RES_DINODE + ind_blocks;
+	if (gfs2_is_jdata(ip))
+		rblocks += data_blocks ? data_blocks : 1;
+	if (ind_blocks || data_blocks)
+		rblocks += RES_STATFS + RES_QUOTA;
+	ret = gfs2_trans_begin(sdp, rblocks, 0);
+	if (ret)
+		goto out_trans_fail;
+
+	lock_page(page);
+	ret = -EINVAL;
+	last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
+	if (page->index > last_index)
+		goto out_unlock_page;
+	ret = 0;
+	if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
+		goto out_unlock_page;
+	if (gfs2_is_stuffed(ip)) {
+		ret = gfs2_unstuff_dinode(ip, page);
+		if (ret)
+			goto out_unlock_page;
+	}
+	ret = gfs2_allocate_page_backing(page);
+
+out_unlock_page:
+	unlock_page(page);
+	gfs2_trans_end(sdp);
+out_trans_fail:
+	gfs2_inplace_release(ip);
+out_quota_unlock:
+	gfs2_quota_unlock(ip);
+out_alloc_put:
+	gfs2_alloc_put(ip);
+out_unlock:
+	gfs2_glock_dq(&gh);
+out:
+	gfs2_holder_uninit(&gh);
+	if (ret == -ENOMEM)
+		ret = VM_FAULT_OOM;
+	else if (ret)
+		ret = VM_FAULT_SIGBUS;
+	return ret;
+}
+
+static struct vm_operations_struct gfs2_vm_ops = {
+	.fault = filemap_fault,
+	.page_mkwrite = gfs2_page_mkwrite,
+};
+
+/**
+ * gfs2_mmap -
+ * @file: The file to map
+ * @vma: The VMA which described the mapping
+ *
+ * There is no need to get a lock here unless we should be updating
+ * atime. We ignore any locking errors since the only consequence is
+ * a missed atime update (which will just be deferred until later).
+ *
+ * Returns: 0
+ */
+
+static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+
+	if (!(file->f_flags & O_NOATIME)) {
+		struct gfs2_holder i_gh;
+		int error;
+
+		gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+		error = gfs2_glock_nq(&i_gh);
+		file_accessed(file);
+		if (error == 0)
+			gfs2_glock_dq_uninit(&i_gh);
+	}
+	vma->vm_ops = &gfs2_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
+
+	return 0;
+}
+
+/**
+ * gfs2_open - open a file
+ * @inode: the inode to open
+ * @file: the struct file for this opening
+ *
+ * Returns: errno
+ */
+
+static int gfs2_open(struct inode *inode, struct file *file)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder i_gh;
+	struct gfs2_file *fp;
+	int error;
+
+	fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+
+	mutex_init(&fp->f_fl_mutex);
+
+	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
+	file->private_data = fp;
+
+	if (S_ISREG(ip->i_inode.i_mode)) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
+					   &i_gh);
+		if (error)
+			goto fail;
+
+		if (!(file->f_flags & O_LARGEFILE) &&
+		    ip->i_disksize > MAX_NON_LFS) {
+			error = -EOVERFLOW;
+			goto fail_gunlock;
+		}
+
+		gfs2_glock_dq_uninit(&i_gh);
+	}
+
+	return 0;
+
+fail_gunlock:
+	gfs2_glock_dq_uninit(&i_gh);
+fail:
+	file->private_data = NULL;
+	kfree(fp);
+	return error;
+}
+
+/**
+ * gfs2_close - called to close a struct file
+ * @inode: the inode the struct file belongs to
+ * @file: the struct file being closed
+ *
+ * Returns: errno
+ */
+
+static int gfs2_close(struct inode *inode, struct file *file)
+{
+	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct gfs2_file *fp;
+
+	fp = file->private_data;
+	file->private_data = NULL;
+
+	if (gfs2_assert_warn(sdp, fp))
+		return -EIO;
+
+	kfree(fp);
+
+	return 0;
+}
+
+/**
+ * gfs2_fsync - sync the dirty data for a file (across the cluster)
+ * @file: the file that points to the dentry (we ignore this)
+ * @dentry: the dentry that points to the inode to sync
+ *
+ * The VFS will flush "normal" data for us. We only need to worry
+ * about metadata here. For journaled data, we just do a log flush
+ * as we can't avoid it. Otherwise we can just bale out if datasync
+ * is set. For stuffed inodes we must flush the log in order to
+ * ensure that all data is on disk.
+ *
+ * The call to write_inode_now() is there to write back metadata and
+ * the inode itself. It does also try and write the data, but thats
+ * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
+ * for us.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+	int ret = 0;
+
+	if (gfs2_is_jdata(GFS2_I(inode))) {
+		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+		return 0;
+	}
+
+	if (sync_state != 0) {
+		if (!datasync)
+			ret = write_inode_now(inode, 0);
+
+		if (gfs2_is_stuffed(GFS2_I(inode)))
+			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+
+/**
+ * gfs2_setlease - acquire/release a file lease
+ * @file: the file pointer
+ * @arg: lease type
+ * @fl: file lock
+ *
+ * We don't currently have a way to enforce a lease across the whole
+ * cluster; until we do, disable leases (by just returning -EINVAL),
+ * unless the administrator has requested purely local locking.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
+{
+	return -EINVAL;
+}
+
+/**
+ * gfs2_lock - acquire/release a posix lock on a file
+ * @file: the file pointer
+ * @cmd: either modify or retrieve lock state, possibly wait
+ * @fl: type and range of lock
+ *
+ * Returns: errno
+ */
+
+static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	if (!(fl->fl_flags & FL_POSIX))
+		return -ENOLCK;
+	if (__mandatory_lock(&ip->i_inode))
+		return -ENOLCK;
+
+	if (cmd == F_CANCELLK) {
+		/* Hack: */
+		cmd = F_SETLK;
+		fl->fl_type = F_UNLCK;
+	}
+	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+		return -EIO;
+	if (IS_GETLK(cmd))
+		return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
+	else if (fl->fl_type == F_UNLCK)
+		return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
+	else
+		return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
+}
+
+static int do_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gfs2_file *fp = file->private_data;
+	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
+	struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
+	struct gfs2_glock *gl;
+	unsigned int state;
+	int flags;
+	int error = 0;
+
+	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
+	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+
+	mutex_lock(&fp->f_fl_mutex);
+
+	gl = fl_gh->gh_gl;
+	if (gl) {
+		if (fl_gh->gh_state == state)
+			goto out;
+		flock_lock_file_wait(file,
+				     &(struct file_lock){.fl_type = F_UNLCK});
+		gfs2_glock_dq_wait(fl_gh);
+		gfs2_holder_reinit(state, flags, fl_gh);
+	} else {
+		error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
+				       &gfs2_flock_glops, CREATE, &gl);
+		if (error)
+			goto out;
+		gfs2_holder_init(gl, state, flags, fl_gh);
+		gfs2_glock_put(gl);
+	}
+	error = gfs2_glock_nq(fl_gh);
+	if (error) {
+		gfs2_holder_uninit(fl_gh);
+		if (error == GLR_TRYFAILED)
+			error = -EAGAIN;
+	} else {
+		error = flock_lock_file_wait(file, fl);
+		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
+	}
+
+out:
+	mutex_unlock(&fp->f_fl_mutex);
+	return error;
+}
+
+static void do_unflock(struct file *file, struct file_lock *fl)
+{
+	struct gfs2_file *fp = file->private_data;
+	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
+
+	mutex_lock(&fp->f_fl_mutex);
+	flock_lock_file_wait(file, fl);
+	if (fl_gh->gh_gl)
+		gfs2_glock_dq_uninit(fl_gh);
+	mutex_unlock(&fp->f_fl_mutex);
+}
+
+/**
+ * gfs2_flock - acquire/release a flock lock on a file
+ * @file: the file pointer
+ * @cmd: either modify or retrieve lock state, possibly wait
+ * @fl: type and range of lock
+ *
+ * Returns: errno
+ */
+
+static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+	if (__mandatory_lock(&ip->i_inode))
+		return -ENOLCK;
+
+	if (fl->fl_type == F_UNLCK) {
+		do_unflock(file, fl);
+		return 0;
+	} else {
+		return do_flock(file, cmd, fl);
+	}
+}
+
+const struct file_operations gfs2_file_fops = {
+	.llseek		= gfs2_llseek,
+	.read		= do_sync_read,
+	.aio_read	= generic_file_aio_read,
+	.write		= do_sync_write,
+	.aio_write	= generic_file_aio_write,
+	.unlocked_ioctl	= gfs2_ioctl,
+	.mmap		= gfs2_mmap,
+	.open		= gfs2_open,
+	.release	= gfs2_close,
+	.fsync		= gfs2_fsync,
+	.lock		= gfs2_lock,
+	.flock		= gfs2_flock,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+	.setlease	= gfs2_setlease,
+};
+
+const struct file_operations gfs2_dir_fops = {
+	.readdir	= gfs2_readdir,
+	.unlocked_ioctl	= gfs2_ioctl,
+	.open		= gfs2_open,
+	.release	= gfs2_close,
+	.fsync		= gfs2_fsync,
+	.lock		= gfs2_lock,
+	.flock		= gfs2_flock,
+};
+
+#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
+
+const struct file_operations gfs2_file_fops_nolock = {
+	.llseek		= gfs2_llseek,
+	.read		= do_sync_read,
+	.aio_read	= generic_file_aio_read,
+	.write		= do_sync_write,
+	.aio_write	= generic_file_aio_write,
+	.unlocked_ioctl	= gfs2_ioctl,
+	.mmap		= gfs2_mmap,
+	.open		= gfs2_open,
+	.release	= gfs2_close,
+	.fsync		= gfs2_fsync,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+	.setlease	= generic_setlease,
+};
+
+const struct file_operations gfs2_dir_fops_nolock = {
+	.readdir	= gfs2_readdir,
+	.unlocked_ioctl	= gfs2_ioctl,
+	.open		= gfs2_open,
+	.release	= gfs2_close,
+	.fsync		= gfs2_fsync,
+};
+
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 5a31d42..c03a1a3 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -30,7 +30,6 @@
 #include "inode.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_address.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index c30be2b..2c3ec07 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -11,8 +11,16 @@
 #define __INODE_DOT_H__
 
 #include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mm.h>
 #include "util.h"
 
+extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
+extern int gfs2_internal_read(struct gfs2_inode *ip,
+			      struct file_ra_state *ra_state,
+			      char *buf, loff_t *pos, unsigned size);
+extern void gfs2_set_aops(struct inode *inode);
+
 static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
 	return !ip->i_height;
@@ -73,30 +81,31 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
 }
 
 
-void gfs2_set_iop(struct inode *inode);
-struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
-				u64 no_addr, u64 no_formal_ino,
-				int skip_freeing);
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
-
-int gfs2_inode_refresh(struct gfs2_inode *ip);
-
-int gfs2_dinode_dealloc(struct gfs2_inode *inode);
-int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
-struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-			   int is_root);
-struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode, dev_t dev);
-int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-		struct gfs2_inode *ip);
-int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   const struct gfs2_inode *ip);
-int gfs2_permission(struct inode *inode, int mask);
-int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
-struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-void gfs2_dinode_print(const struct gfs2_inode *ip);
+extern void gfs2_set_iop(struct inode *inode);
+extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+				       u64 no_addr, u64 no_formal_ino,
+				       int skip_freeing);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
+
+extern int gfs2_inode_refresh(struct gfs2_inode *ip);
+
+extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
+extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
+extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
+				  int is_root);
+extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
+				  const struct qstr *name,
+				  unsigned int mode, dev_t dev);
+extern int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
+		       struct gfs2_inode *ip);
+extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
+			  const struct gfs2_inode *ip);
+extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
+extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
+extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+extern void gfs2_dinode_print(const struct gfs2_inode *ip);
 
 extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 78a5f43..cb8d7a9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,7 +31,6 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
-#include "ops_address.h"
 
 static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
 {
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
deleted file mode 100644
index e566421..0000000
--- a/fs/gfs2/ops_address.c
+++ /dev/null
@@ -1,1146 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/pagemap.h>
-#include <linux/pagevec.h>
-#include <linux/mpage.h>
-#include <linux/fs.h>
-#include <linux/writeback.h>
-#include <linux/swap.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/backing-dev.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "bmap.h"
-#include "glock.h"
-#include "inode.h"
-#include "log.h"
-#include "meta_io.h"
-#include "ops_address.h"
-#include "quota.h"
-#include "trans.h"
-#include "rgrp.h"
-#include "super.h"
-#include "util.h"
-#include "glops.h"
-
-
-static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-				   unsigned int from, unsigned int to)
-{
-	struct buffer_head *head = page_buffers(page);
-	unsigned int bsize = head->b_size;
-	struct buffer_head *bh;
-	unsigned int start, end;
-
-	for (bh = head, start = 0; bh != head || !start;
-	     bh = bh->b_this_page, start = end) {
-		end = start + bsize;
-		if (end <= from || start >= to)
-			continue;
-		if (gfs2_is_jdata(ip))
-			set_buffer_uptodate(bh);
-		gfs2_trans_add_bh(ip->i_gl, bh, 0);
-	}
-}
-
-/**
- * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
- * @inode: The inode
- * @lblock: The block number to look up
- * @bh_result: The buffer head to return the result in
- * @create: Non-zero if we may add block to the file
- *
- * Returns: errno
- */
-
-static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
-				  struct buffer_head *bh_result, int create)
-{
-	int error;
-
-	error = gfs2_block_map(inode, lblock, bh_result, 0);
-	if (error)
-		return error;
-	if (!buffer_mapped(bh_result))
-		return -EIO;
-	return 0;
-}
-
-static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
-				 struct buffer_head *bh_result, int create)
-{
-	return gfs2_block_map(inode, lblock, bh_result, 0);
-}
-
-/**
- * gfs2_writepage_common - Common bits of writepage
- * @page: The page to be written
- * @wbc: The writeback control
- *
- * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
- */
-
-static int gfs2_writepage_common(struct page *page,
-				 struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
-	unsigned offset;
-
-	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
-		goto out;
-	if (current->journal_info)
-		goto redirty;
-	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
-	if (page->index > end_index || (page->index == end_index && !offset)) {
-		page->mapping->a_ops->invalidatepage(page, 0);
-		goto out;
-	}
-	return 1;
-redirty:
-	redirty_page_for_writepage(wbc, page);
-out:
-	unlock_page(page);
-	return 0;
-}
-
-/**
- * gfs2_writeback_writepage - Write page for writeback mappings
- * @page: The page
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_writeback_writepage(struct page *page,
-				    struct writeback_control *wbc)
-{
-	int ret;
-
-	ret = gfs2_writepage_common(page, wbc);
-	if (ret <= 0)
-		return ret;
-
-	ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc);
-	if (ret == -EAGAIN)
-		ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
-	return ret;
-}
-
-/**
- * gfs2_ordered_writepage - Write page for ordered data files
- * @page: The page to write
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_ordered_writepage(struct page *page,
-				  struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	int ret;
-
-	ret = gfs2_writepage_common(page, wbc);
-	if (ret <= 0)
-		return ret;
-
-	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, inode->i_sb->s_blocksize,
-				     (1 << BH_Dirty)|(1 << BH_Uptodate));
-	}
-	gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
-	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
-}
-
-/**
- * __gfs2_jdata_writepage - The core of jdata writepage
- * @page: The page to write
- * @wbc: The writeback control
- *
- * This is shared between writepage and writepages and implements the
- * core of the writepage operation. If a transaction is required then
- * PageChecked will have been set and the transaction will have
- * already been started before this is called.
- */
-
-static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-
-	if (PageChecked(page)) {
-		ClearPageChecked(page);
-		if (!page_has_buffers(page)) {
-			create_empty_buffers(page, inode->i_sb->s_blocksize,
-					     (1 << BH_Dirty)|(1 << BH_Uptodate));
-		}
-		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
-	}
-	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
-}
-
-/**
- * gfs2_jdata_writepage - Write complete page
- * @page: Page to write
- *
- * Returns: errno
- *
- */
-
-static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	int ret;
-	int done_trans = 0;
-
-	if (PageChecked(page)) {
-		if (wbc->sync_mode != WB_SYNC_ALL)
-			goto out_ignore;
-		ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
-		if (ret)
-			goto out_ignore;
-		done_trans = 1;
-	}
-	ret = gfs2_writepage_common(page, wbc);
-	if (ret > 0)
-		ret = __gfs2_jdata_writepage(page, wbc);
-	if (done_trans)
-		gfs2_trans_end(sdp);
-	return ret;
-
-out_ignore:
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-}
-
-/**
- * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
- * @mapping: The mapping to write
- * @wbc: Write-back control
- *
- * For the data=writeback case we can already ignore buffer heads
- * and write whole extents at once. This is a big reduction in the
- * number of I/O requests we send and the bmap calls we make in this case.
- */
-static int gfs2_writeback_writepages(struct address_space *mapping,
-				     struct writeback_control *wbc)
-{
-	return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
-}
-
-/**
- * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
- * @mapping: The mapping
- * @wbc: The writeback control
- * @writepage: The writepage function to call for each page
- * @pvec: The vector of pages
- * @nr_pages: The number of pages to write
- *
- * Returns: non-zero if loop should terminate, zero otherwise
- */
-
-static int gfs2_write_jdata_pagevec(struct address_space *mapping,
-				    struct writeback_control *wbc,
-				    struct pagevec *pvec,
-				    int nr_pages, pgoff_t end)
-{
-	struct inode *inode = mapping->host;
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
-	unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
-	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
-	int i;
-	int ret;
-
-	ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
-	if (ret < 0)
-		return ret;
-
-	for(i = 0; i < nr_pages; i++) {
-		struct page *page = pvec->pages[i];
-
-		lock_page(page);
-
-		if (unlikely(page->mapping != mapping)) {
-			unlock_page(page);
-			continue;
-		}
-
-		if (!wbc->range_cyclic && page->index > end) {
-			ret = 1;
-			unlock_page(page);
-			continue;
-		}
-
-		if (wbc->sync_mode != WB_SYNC_NONE)
-			wait_on_page_writeback(page);
-
-		if (PageWriteback(page) ||
-		    !clear_page_dirty_for_io(page)) {
-			unlock_page(page);
-			continue;
-		}
-
-		/* Is the page fully outside i_size? (truncate in progress) */
-		if (page->index > end_index || (page->index == end_index && !offset)) {
-			page->mapping->a_ops->invalidatepage(page, 0);
-			unlock_page(page);
-			continue;
-		}
-
-		ret = __gfs2_jdata_writepage(page, wbc);
-
-		if (ret || (--(wbc->nr_to_write) <= 0))
-			ret = 1;
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
-			wbc->encountered_congestion = 1;
-			ret = 1;
-		}
-
-	}
-	gfs2_trans_end(sdp);
-	return ret;
-}
-
-/**
- * gfs2_write_cache_jdata - Like write_cache_pages but different
- * @mapping: The mapping to write
- * @wbc: The writeback control
- * @writepage: The writepage function to call
- * @data: The data to pass to writepage
- *
- * The reason that we use our own function here is that we need to
- * start transactions before we grab page locks. This allows us
- * to get the ordering right.
- */
-
-static int gfs2_write_cache_jdata(struct address_space *mapping,
-				  struct writeback_control *wbc)
-{
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
-	int ret = 0;
-	int done = 0;
-	struct pagevec pvec;
-	int nr_pages;
-	pgoff_t index;
-	pgoff_t end;
-	int scanned = 0;
-	int range_whole = 0;
-
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		wbc->encountered_congestion = 1;
-		return 0;
-	}
-
-	pagevec_init(&pvec, 0);
-	if (wbc->range_cyclic) {
-		index = mapping->writeback_index; /* Start from prev offset */
-		end = -1;
-	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-			range_whole = 1;
-		scanned = 1;
-	}
-
-retry:
-	 while (!done && (index <= end) &&
-		(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-					       PAGECACHE_TAG_DIRTY,
-					       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
-		scanned = 1;
-		ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
-		if (ret)
-			done = 1;
-		if (ret > 0)
-			ret = 0;
-
-		pagevec_release(&pvec);
-		cond_resched();
-	}
-
-	if (!scanned && !done) {
-		/*
-		 * We hit the last page and there is more work to be done: wrap
-		 * back to the start of the file
-		 */
-		scanned = 1;
-		index = 0;
-		goto retry;
-	}
-
-	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-		mapping->writeback_index = index;
-	return ret;
-}
-
-
-/**
- * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
- * @mapping: The mapping to write
- * @wbc: The writeback control
- * 
- */
-
-static int gfs2_jdata_writepages(struct address_space *mapping,
-				 struct writeback_control *wbc)
-{
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
-	int ret;
-
-	ret = gfs2_write_cache_jdata(mapping, wbc);
-	if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
-		gfs2_log_flush(sdp, ip->i_gl);
-		ret = gfs2_write_cache_jdata(mapping, wbc);
-	}
-	return ret;
-}
-
-/**
- * stuffed_readpage - Fill in a Linux page with stuffed file data
- * @ip: the inode
- * @page: the page
- *
- * Returns: errno
- */
-
-static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
-{
-	struct buffer_head *dibh;
-	void *kaddr;
-	int error;
-
-	/*
-	 * Due to the order of unstuffing files and ->fault(), we can be
-	 * asked for a zero page in the case of a stuffed file being extended,
-	 * so we need to supply one here. It doesn't happen often.
-	 */
-	if (unlikely(page->index)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
-		SetPageUptodate(page);
-		return 0;
-	}
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
-
-	kaddr = kmap_atomic(page, KM_USER0);
-	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
-	       ip->i_disksize);
-	memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize);
-	kunmap_atomic(kaddr, KM_USER0);
-	flush_dcache_page(page);
-	brelse(dibh);
-	SetPageUptodate(page);
-
-	return 0;
-}
-
-
-/**
- * __gfs2_readpage - readpage
- * @file: The file to read a page for
- * @page: The page to read
- *
- * This is the core of gfs2's readpage. Its used by the internal file
- * reading code as in that case we already hold the glock. Also its
- * called by gfs2_readpage() once the required lock has been granted.
- *
- */
-
-static int __gfs2_readpage(void *file, struct page *page)
-{
-	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-	int error;
-
-	if (gfs2_is_stuffed(ip)) {
-		error = stuffed_readpage(ip, page);
-		unlock_page(page);
-	} else {
-		error = mpage_readpage(page, gfs2_block_map);
-	}
-
-	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-		return -EIO;
-
-	return error;
-}
-
-/**
- * gfs2_readpage - read a page of a file
- * @file: The file to read
- * @page: The page of the file
- *
- * This deals with the locking required. We have to unlock and
- * relock the page in order to get the locking in the right
- * order.
- */
-
-static int gfs2_readpage(struct file *file, struct page *page)
-{
-	struct address_space *mapping = page->mapping;
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
-	struct gfs2_holder gh;
-	int error;
-
-	unlock_page(page);
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	error = gfs2_glock_nq(&gh);
-	if (unlikely(error))
-		goto out;
-	error = AOP_TRUNCATED_PAGE;
-	lock_page(page);
-	if (page->mapping == mapping && !PageUptodate(page))
-		error = __gfs2_readpage(file, page);
-	else
-		unlock_page(page);
-	gfs2_glock_dq(&gh);
-out:
-	gfs2_holder_uninit(&gh);
-	if (error && error != AOP_TRUNCATED_PAGE)
-		lock_page(page);
-	return error;
-}
-
-/**
- * gfs2_internal_read - read an internal file
- * @ip: The gfs2 inode
- * @ra_state: The readahead state (or NULL for no readahead)
- * @buf: The buffer to fill
- * @pos: The file position
- * @size: The amount to read
- *
- */
-
-int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
-                       char *buf, loff_t *pos, unsigned size)
-{
-	struct address_space *mapping = ip->i_inode.i_mapping;
-	unsigned long index = *pos / PAGE_CACHE_SIZE;
-	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
-	unsigned copied = 0;
-	unsigned amt;
-	struct page *page;
-	void *p;
-
-	do {
-		amt = size - copied;
-		if (offset + size > PAGE_CACHE_SIZE)
-			amt = PAGE_CACHE_SIZE - offset;
-		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
-		if (IS_ERR(page))
-			return PTR_ERR(page);
-		p = kmap_atomic(page, KM_USER0);
-		memcpy(buf + copied, p + offset, amt);
-		kunmap_atomic(p, KM_USER0);
-		mark_page_accessed(page);
-		page_cache_release(page);
-		copied += amt;
-		index++;
-		offset = 0;
-	} while(copied < size);
-	(*pos) += size;
-	return size;
-}
-
-/**
- * gfs2_readpages - Read a bunch of pages at once
- *
- * Some notes:
- * 1. This is only for readahead, so we can simply ignore any things
- *    which are slightly inconvenient (such as locking conflicts between
- *    the page lock and the glock) and return having done no I/O. Its
- *    obviously not something we'd want to do on too regular a basis.
- *    Any I/O we ignore at this time will be done via readpage later.
- * 2. We don't handle stuffed files here we let readpage do the honours.
- * 3. mpage_readpages() does most of the heavy lifting in the common case.
- * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
- */
-
-static int gfs2_readpages(struct file *file, struct address_space *mapping,
-			  struct list_head *pages, unsigned nr_pages)
-{
-	struct inode *inode = mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_holder gh;
-	int ret;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (unlikely(ret))
-		goto out_uninit;
-	if (!gfs2_is_stuffed(ip))
-		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
-	gfs2_glock_dq(&gh);
-out_uninit:
-	gfs2_holder_uninit(&gh);
-	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-		ret = -EIO;
-	return ret;
-}
-
-/**
- * gfs2_write_begin - Begin to write to a file
- * @file: The file to write to
- * @mapping: The mapping in which to write
- * @pos: The file offset at which to start writing
- * @len: Length of the write
- * @flags: Various flags
- * @pagep: Pointer to return the page
- * @fsdata: Pointer to return fs data (unused by GFS2)
- *
- * Returns: errno
- */
-
-static int gfs2_write_begin(struct file *file, struct address_space *mapping,
-			    loff_t pos, unsigned len, unsigned flags,
-			    struct page **pagep, void **fsdata)
-{
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
-	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	int alloc_required;
-	int error = 0;
-	struct gfs2_alloc *al;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-	unsigned to = from + len;
-	struct page *page;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
-	error = gfs2_glock_nq(&ip->i_gh);
-	if (unlikely(error))
-		goto out_uninit;
-
-	error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
-	if (error)
-		goto out_unlock;
-
-	if (alloc_required || gfs2_is_jdata(ip))
-		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
-
-	if (alloc_required) {
-		al = gfs2_alloc_get(ip);
-		if (!al) {
-			error = -ENOMEM;
-			goto out_unlock;
-		}
-
-		error = gfs2_quota_lock_check(ip);
-		if (error)
-			goto out_alloc_put;
-
-		al->al_requested = data_blocks + ind_blocks;
-		error = gfs2_inplace_reserve(ip);
-		if (error)
-			goto out_qunlock;
-	}
-
-	rblocks = RES_DINODE + ind_blocks;
-	if (gfs2_is_jdata(ip))
-		rblocks += data_blocks ? data_blocks : 1;
-	if (ind_blocks || data_blocks)
-		rblocks += RES_STATFS + RES_QUOTA;
-
-	error = gfs2_trans_begin(sdp, rblocks,
-				 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
-	if (error)
-		goto out_trans_fail;
-
-	error = -ENOMEM;
-	flags |= AOP_FLAG_NOFS;
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	*pagep = page;
-	if (unlikely(!page))
-		goto out_endtrans;
-
-	if (gfs2_is_stuffed(ip)) {
-		error = 0;
-		if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
-			error = gfs2_unstuff_dinode(ip, page);
-			if (error == 0)
-				goto prepare_write;
-		} else if (!PageUptodate(page)) {
-			error = stuffed_readpage(ip, page);
-		}
-		goto out;
-	}
-
-prepare_write:
-	error = block_prepare_write(page, from, to, gfs2_block_map);
-out:
-	if (error == 0)
-		return 0;
-
-	page_cache_release(page);
-	if (pos + len > ip->i_inode.i_size)
-		vmtruncate(&ip->i_inode, ip->i_inode.i_size);
-out_endtrans:
-	gfs2_trans_end(sdp);
-out_trans_fail:
-	if (alloc_required) {
-		gfs2_inplace_release(ip);
-out_qunlock:
-		gfs2_quota_unlock(ip);
-out_alloc_put:
-		gfs2_alloc_put(ip);
-	}
-out_unlock:
-	gfs2_glock_dq(&ip->i_gh);
-out_uninit:
-	gfs2_holder_uninit(&ip->i_gh);
-	return error;
-}
-
-/**
- * adjust_fs_space - Adjusts the free space available due to gfs2_grow
- * @inode: the rindex inode
- */
-static void adjust_fs_space(struct inode *inode)
-{
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
-	u64 fs_total, new_free;
-
-	/* Total up the file system space, according to the latest rindex. */
-	fs_total = gfs2_ri_total(sdp);
-
-	spin_lock(&sdp->sd_statfs_spin);
-	if (fs_total > (m_sc->sc_total + l_sc->sc_total))
-		new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
-	else
-		new_free = 0;
-	spin_unlock(&sdp->sd_statfs_spin);
-	fs_warn(sdp, "File system extended by %llu blocks.\n",
-		(unsigned long long)new_free);
-	gfs2_statfs_change(sdp, new_free, new_free, 0);
-}
-
-/**
- * gfs2_stuffed_write_end - Write end for stuffed files
- * @inode: The inode
- * @dibh: The buffer_head containing the on-disk inode
- * @pos: The file position
- * @len: The length of the write
- * @copied: How much was actually copied by the VFS
- * @page: The page
- *
- * This copies the data from the page into the inode block after
- * the inode data structure itself.
- *
- * Returns: errno
- */
-static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
-				  loff_t pos, unsigned len, unsigned copied,
-				  struct page *page)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	u64 to = pos + copied;
-	void *kaddr;
-	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
-	struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
-
-	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
-	kaddr = kmap_atomic(page, KM_USER0);
-	memcpy(buf + pos, kaddr + pos, copied);
-	memset(kaddr + pos + copied, 0, len - copied);
-	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
-
-	if (!PageUptodate(page))
-		SetPageUptodate(page);
-	unlock_page(page);
-	page_cache_release(page);
-
-	if (copied) {
-		if (inode->i_size < to) {
-			i_size_write(inode, to);
-			ip->i_disksize = inode->i_size;
-		}
-		gfs2_dinode_out(ip, di);
-		mark_inode_dirty(inode);
-	}
-
-	if (inode == sdp->sd_rindex)
-		adjust_fs_space(inode);
-
-	brelse(dibh);
-	gfs2_trans_end(sdp);
-	gfs2_glock_dq(&ip->i_gh);
-	gfs2_holder_uninit(&ip->i_gh);
-	return copied;
-}
-
-/**
- * gfs2_write_end
- * @file: The file to write to
- * @mapping: The address space to write to
- * @pos: The file position
- * @len: The length of the data
- * @copied:
- * @page: The page that has been written
- * @fsdata: The fsdata (unused in GFS2)
- *
- * The main write_end function for GFS2. We have a separate one for
- * stuffed files as they are slightly different, otherwise we just
- * put our locking around the VFS provided functions.
- *
- * Returns: errno
- */
-
-static int gfs2_write_end(struct file *file, struct address_space *mapping,
-			  loff_t pos, unsigned len, unsigned copied,
-			  struct page *page, void *fsdata)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct buffer_head *dibh;
-	struct gfs2_alloc *al = ip->i_alloc;
-	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
-	unsigned int to = from + len;
-	int ret;
-
-	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
-
-	ret = gfs2_meta_inode_buffer(ip, &dibh);
-	if (unlikely(ret)) {
-		unlock_page(page);
-		page_cache_release(page);
-		goto failed;
-	}
-
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
-	if (gfs2_is_stuffed(ip))
-		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
-
-	if (!gfs2_is_writeback(ip))
-		gfs2_page_add_databufs(ip, page, from, to);
-
-	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-	if (ret > 0) {
-		if (inode->i_size > ip->i_disksize)
-			ip->i_disksize = inode->i_size;
-		gfs2_dinode_out(ip, dibh->b_data);
-		mark_inode_dirty(inode);
-	}
-
-	if (inode == sdp->sd_rindex)
-		adjust_fs_space(inode);
-
-	brelse(dibh);
-	gfs2_trans_end(sdp);
-failed:
-	if (al) {
-		gfs2_inplace_release(ip);
-		gfs2_quota_unlock(ip);
-		gfs2_alloc_put(ip);
-	}
-	gfs2_glock_dq(&ip->i_gh);
-	gfs2_holder_uninit(&ip->i_gh);
-	return ret;
-}
-
-/**
- * gfs2_set_page_dirty - Page dirtying function
- * @page: The page to dirty
- *
- * Returns: 1 if it dirtyed the page, or 0 otherwise
- */
- 
-static int gfs2_set_page_dirty(struct page *page)
-{
-	SetPageChecked(page);
-	return __set_page_dirty_buffers(page);
-}
-
-/**
- * gfs2_bmap - Block map function
- * @mapping: Address space info
- * @lblock: The block to map
- *
- * Returns: The disk address for the block or 0 on hole or error
- */
-
-static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
-{
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
-	struct gfs2_holder i_gh;
-	sector_t dblock = 0;
-	int error;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (error)
-		return 0;
-
-	if (!gfs2_is_stuffed(ip))
-		dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
-
-	gfs2_glock_dq_uninit(&i_gh);
-
-	return dblock;
-}
-
-static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
-{
-	struct gfs2_bufdata *bd;
-
-	lock_buffer(bh);
-	gfs2_log_lock(sdp);
-	clear_buffer_dirty(bh);
-	bd = bh->b_private;
-	if (bd) {
-		if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
-			list_del_init(&bd->bd_le.le_list);
-		else
-			gfs2_remove_from_journal(bh, current->journal_info, 0);
-	}
-	bh->b_bdev = NULL;
-	clear_buffer_mapped(bh);
-	clear_buffer_req(bh);
-	clear_buffer_new(bh);
-	gfs2_log_unlock(sdp);
-	unlock_buffer(bh);
-}
-
-static void gfs2_invalidatepage(struct page *page, unsigned long offset)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-	struct buffer_head *bh, *head;
-	unsigned long pos = 0;
-
-	BUG_ON(!PageLocked(page));
-	if (offset == 0)
-		ClearPageChecked(page);
-	if (!page_has_buffers(page))
-		goto out;
-
-	bh = head = page_buffers(page);
-	do {
-		if (offset <= pos)
-			gfs2_discard(sdp, bh);
-		pos += bh->b_size;
-		bh = bh->b_this_page;
-	} while (bh != head);
-out:
-	if (offset == 0)
-		try_to_release_page(page, 0);
-}
-
-/**
- * gfs2_ok_for_dio - check that dio is valid on this file
- * @ip: The inode
- * @rw: READ or WRITE
- * @offset: The offset at which we are reading or writing
- *
- * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
- *          1 (to accept the i/o request)
- */
-static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
-{
-	/*
-	 * Should we return an error here? I can't see that O_DIRECT for
-	 * a stuffed file makes any sense. For now we'll silently fall
-	 * back to buffered I/O
-	 */
-	if (gfs2_is_stuffed(ip))
-		return 0;
-
-	if (offset >= i_size_read(&ip->i_inode))
-		return 0;
-	return 1;
-}
-
-
-
-static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int rv;
-
-	/*
-	 * Deferred lock, even if its a write, since we do no allocation
-	 * on this path. All we need change is atime, and this lock mode
-	 * ensures that other nodes have flushed their buffered read caches
-	 * (i.e. their page cache entries for this inode). We do not,
-	 * unfortunately have the option of only flushing a range like
-	 * the VFS does.
-	 */
-	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
-	rv = gfs2_glock_nq(&gh);
-	if (rv)
-		return rv;
-	rv = gfs2_ok_for_dio(ip, rw, offset);
-	if (rv != 1)
-		goto out; /* dio not valid, fall back to buffered i/o */
-
-	rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
-					   iov, offset, nr_segs,
-					   gfs2_get_block_direct, NULL);
-out:
-	gfs2_glock_dq_m(1, &gh);
-	gfs2_holder_uninit(&gh);
-	return rv;
-}
-
-/**
- * gfs2_releasepage - free the metadata associated with a page
- * @page: the page that's being released
- * @gfp_mask: passed from Linux VFS, ignored by us
- *
- * Call try_to_free_buffers() if the buffers in this page can be
- * released.
- *
- * Returns: 0
- */
-
-int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
-{
-	struct inode *aspace = page->mapping->host;
-	struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
-	struct buffer_head *bh, *head;
-	struct gfs2_bufdata *bd;
-
-	if (!page_has_buffers(page))
-		return 0;
-
-	gfs2_log_lock(sdp);
-	head = bh = page_buffers(page);
-	do {
-		if (atomic_read(&bh->b_count))
-			goto cannot_release;
-		bd = bh->b_private;
-		if (bd && bd->bd_ail)
-			goto cannot_release;
-		gfs2_assert_warn(sdp, !buffer_pinned(bh));
-		gfs2_assert_warn(sdp, !buffer_dirty(bh));
-		bh = bh->b_this_page;
-	} while(bh != head);
-	gfs2_log_unlock(sdp);
-
-	head = bh = page_buffers(page);
-	do {
-		gfs2_log_lock(sdp);
-		bd = bh->b_private;
-		if (bd) {
-			gfs2_assert_warn(sdp, bd->bd_bh == bh);
-			gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
-			if (!list_empty(&bd->bd_le.le_list)) {
-				if (!buffer_pinned(bh))
-					list_del_init(&bd->bd_le.le_list);
-				else
-					bd = NULL;
-			}
-			if (bd)
-				bd->bd_bh = NULL;
-			bh->b_private = NULL;
-		}
-		gfs2_log_unlock(sdp);
-		if (bd)
-			kmem_cache_free(gfs2_bufdata_cachep, bd);
-
-		bh = bh->b_this_page;
-	} while (bh != head);
-
-	return try_to_free_buffers(page);
-cannot_release:
-	gfs2_log_unlock(sdp);
-	return 0;
-}
-
-static const struct address_space_operations gfs2_writeback_aops = {
-	.writepage = gfs2_writeback_writepage,
-	.writepages = gfs2_writeback_writepages,
-	.readpage = gfs2_readpage,
-	.readpages = gfs2_readpages,
-	.sync_page = block_sync_page,
-	.write_begin = gfs2_write_begin,
-	.write_end = gfs2_write_end,
-	.bmap = gfs2_bmap,
-	.invalidatepage = gfs2_invalidatepage,
-	.releasepage = gfs2_releasepage,
-	.direct_IO = gfs2_direct_IO,
-	.migratepage = buffer_migrate_page,
-	.is_partially_uptodate = block_is_partially_uptodate,
-};
-
-static const struct address_space_operations gfs2_ordered_aops = {
-	.writepage = gfs2_ordered_writepage,
-	.readpage = gfs2_readpage,
-	.readpages = gfs2_readpages,
-	.sync_page = block_sync_page,
-	.write_begin = gfs2_write_begin,
-	.write_end = gfs2_write_end,
-	.set_page_dirty = gfs2_set_page_dirty,
-	.bmap = gfs2_bmap,
-	.invalidatepage = gfs2_invalidatepage,
-	.releasepage = gfs2_releasepage,
-	.direct_IO = gfs2_direct_IO,
-	.migratepage = buffer_migrate_page,
-	.is_partially_uptodate = block_is_partially_uptodate,
-};
-
-static const struct address_space_operations gfs2_jdata_aops = {
-	.writepage = gfs2_jdata_writepage,
-	.writepages = gfs2_jdata_writepages,
-	.readpage = gfs2_readpage,
-	.readpages = gfs2_readpages,
-	.sync_page = block_sync_page,
-	.write_begin = gfs2_write_begin,
-	.write_end = gfs2_write_end,
-	.set_page_dirty = gfs2_set_page_dirty,
-	.bmap = gfs2_bmap,
-	.invalidatepage = gfs2_invalidatepage,
-	.releasepage = gfs2_releasepage,
-	.is_partially_uptodate = block_is_partially_uptodate,
-};
-
-void gfs2_set_aops(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	if (gfs2_is_writeback(ip))
-		inode->i_mapping->a_ops = &gfs2_writeback_aops;
-	else if (gfs2_is_ordered(ip))
-		inode->i_mapping->a_ops = &gfs2_ordered_aops;
-	else if (gfs2_is_jdata(ip))
-		inode->i_mapping->a_ops = &gfs2_jdata_aops;
-	else
-		BUG();
-}
-
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
deleted file mode 100644
index 5da2128..0000000
--- a/fs/gfs2/ops_address.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_ADDRESS_DOT_H__
-#define __OPS_ADDRESS_DOT_H__
-
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/mm.h>
-
-extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
-extern int gfs2_internal_read(struct gfs2_inode *ip,
-			      struct file_ra_state *ra_state,
-			      char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_aops(struct inode *inode);
-
-#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
deleted file mode 100644
index 022c66c..0000000
--- a/fs/gfs2/ops_dentry.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "dir.h"
-#include "glock.h"
-#include "super.h"
-#include "util.h"
-#include "inode.h"
-
-/**
- * gfs2_drevalidate - Check directory lookup consistency
- * @dentry: the mapping to check
- * @nd:
- *
- * Check to make sure the lookup necessary to arrive at this inode from its
- * parent is still good.
- *
- * Returns: 1 if the dentry is ok, 0 if it isn't
- */
-
-static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
-{
-	struct dentry *parent = dget_parent(dentry);
-	struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
-	struct gfs2_inode *dip = GFS2_I(parent->d_inode);
-	struct inode *inode = dentry->d_inode;
-	struct gfs2_holder d_gh;
-	struct gfs2_inode *ip = NULL;
-	int error;
-	int had_lock = 0;
-
-	if (inode) {
-		if (is_bad_inode(inode))
-			goto invalid;
-		ip = GFS2_I(inode);
-	}
-
-	if (sdp->sd_args.ar_localcaching)
-		goto valid;
-
-	had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
-	if (!had_lock) {
-		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
-		if (error)
-			goto fail;
-	} 
-
-	error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
-	switch (error) {
-	case 0:
-		if (!inode)
-			goto invalid_gunlock;
-		break;
-	case -ENOENT:
-		if (!inode)
-			goto valid_gunlock;
-		goto invalid_gunlock;
-	default:
-		goto fail_gunlock;
-	}
-
-valid_gunlock:
-	if (!had_lock)
-		gfs2_glock_dq_uninit(&d_gh);
-valid:
-	dput(parent);
-	return 1;
-
-invalid_gunlock:
-	if (!had_lock)
-		gfs2_glock_dq_uninit(&d_gh);
-invalid:
-	if (inode && S_ISDIR(inode->i_mode)) {
-		if (have_submounts(dentry))
-			goto valid;
-		shrink_dcache_parent(dentry);
-	}
-	d_drop(dentry);
-	dput(parent);
-	return 0;
-
-fail_gunlock:
-	gfs2_glock_dq_uninit(&d_gh);
-fail:
-	dput(parent);
-	return 0;
-}
-
-static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
-{
-	str->hash = gfs2_disk_hash(str->name, str->len);
-	return 0;
-}
-
-const struct dentry_operations gfs2_dops = {
-	.d_revalidate = gfs2_drevalidate,
-	.d_hash = gfs2_dhash,
-};
-
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
deleted file mode 100644
index 9200ef2..0000000
--- a/fs/gfs2/ops_export.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/exportfs.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "dir.h"
-#include "glock.h"
-#include "glops.h"
-#include "inode.h"
-#include "super.h"
-#include "rgrp.h"
-#include "util.h"
-
-#define GFS2_SMALL_FH_SIZE 4
-#define GFS2_LARGE_FH_SIZE 8
-#define GFS2_OLD_FH_SIZE 10
-
-static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
-			  int connectable)
-{
-	__be32 *fh = (__force __be32 *)p;
-	struct inode *inode = dentry->d_inode;
-	struct super_block *sb = inode->i_sb;
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	if (*len < GFS2_SMALL_FH_SIZE ||
-	    (connectable && *len < GFS2_LARGE_FH_SIZE))
-		return 255;
-
-	fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
-	fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
-	fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
-	fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
-	*len = GFS2_SMALL_FH_SIZE;
-
-	if (!connectable || inode == sb->s_root->d_inode)
-		return *len;
-
-	spin_lock(&dentry->d_lock);
-	inode = dentry->d_parent->d_inode;
-	ip = GFS2_I(inode);
-	igrab(inode);
-	spin_unlock(&dentry->d_lock);
-
-	fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
-	fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
-	fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
-	fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
-	*len = GFS2_LARGE_FH_SIZE;
-
-	iput(inode);
-
-	return *len;
-}
-
-struct get_name_filldir {
-	struct gfs2_inum_host inum;
-	char *name;
-};
-
-static int get_name_filldir(void *opaque, const char *name, int length,
-			    loff_t offset, u64 inum, unsigned int type)
-{
-	struct get_name_filldir *gnfd = opaque;
-
-	if (inum != gnfd->inum.no_addr)
-		return 0;
-
-	memcpy(gnfd->name, name, length);
-	gnfd->name[length] = 0;
-
-	return 1;
-}
-
-static int gfs2_get_name(struct dentry *parent, char *name,
-			 struct dentry *child)
-{
-	struct inode *dir = parent->d_inode;
-	struct inode *inode = child->d_inode;
-	struct gfs2_inode *dip, *ip;
-	struct get_name_filldir gnfd;
-	struct gfs2_holder gh;
-	u64 offset = 0;
-	int error;
-
-	if (!dir)
-		return -EINVAL;
-
-	if (!S_ISDIR(dir->i_mode) || !inode)
-		return -EINVAL;
-
-	dip = GFS2_I(dir);
-	ip = GFS2_I(inode);
-
-	*name = 0;
-	gnfd.inum.no_addr = ip->i_no_addr;
-	gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
-	gnfd.name = name;
-
-	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
-	if (error)
-		return error;
-
-	error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
-
-	gfs2_glock_dq_uninit(&gh);
-
-	if (!error && !*name)
-		error = -ENOENT;
-
-	return error;
-}
-
-static struct dentry *gfs2_get_parent(struct dentry *child)
-{
-	struct qstr dotdot;
-	struct dentry *dentry;
-
-	/*
-	 * XXX(hch): it would be a good idea to keep this around as a
-	 *	     static variable.
-	 */
-	gfs2_str2qstr(&dotdot, "..");
-
-	dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1));
-	if (!IS_ERR(dentry))
-		dentry->d_op = &gfs2_dops;
-	return dentry;
-}
-
-static struct dentry *gfs2_get_dentry(struct super_block *sb,
-		struct gfs2_inum_host *inum)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_holder i_gh, ri_gh, rgd_gh;
-	struct gfs2_rgrpd *rgd;
-	struct inode *inode;
-	struct dentry *dentry;
-	int error;
-
-	/* System files? */
-
-	inode = gfs2_ilookup(sb, inum->no_addr);
-	if (inode) {
-		if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
-			iput(inode);
-			return ERR_PTR(-ESTALE);
-		}
-		goto out_inode;
-	}
-
-	error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
-				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (error)
-		return ERR_PTR(error);
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		goto fail;
-
-	error = -EINVAL;
-	rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
-	if (!rgd)
-		goto fail_rindex;
-
-	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
-	if (error)
-		goto fail_rindex;
-
-	error = -ESTALE;
-	if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
-		goto fail_rgd;
-
-	gfs2_glock_dq_uninit(&rgd_gh);
-	gfs2_glock_dq_uninit(&ri_gh);
-
-	inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
-					inum->no_addr,
-					0, 0);
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
-		goto fail;
-	}
-
-	error = gfs2_inode_refresh(GFS2_I(inode));
-	if (error) {
-		iput(inode);
-		goto fail;
-	}
-
-	/* Pick up the works we bypass in gfs2_inode_lookup */
-	if (inode->i_state & I_NEW) 
-		gfs2_set_iop(inode);
-
-	if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
-		iput(inode);
-		goto fail;
-	}
-
-	error = -EIO;
-	if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
-		iput(inode);
-		goto fail;
-	}
-
-	gfs2_glock_dq_uninit(&i_gh);
-
-out_inode:
-	dentry = d_obtain_alias(inode);
-	if (!IS_ERR(dentry))
-		dentry->d_op = &gfs2_dops;
-	return dentry;
-
-fail_rgd:
-	gfs2_glock_dq_uninit(&rgd_gh);
-
-fail_rindex:
-	gfs2_glock_dq_uninit(&ri_gh);
-
-fail:
-	gfs2_glock_dq_uninit(&i_gh);
-	return ERR_PTR(error);
-}
-
-static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
-{
-	struct gfs2_inum_host this;
-	__be32 *fh = (__force __be32 *)fid->raw;
-
-	switch (fh_type) {
-	case GFS2_SMALL_FH_SIZE:
-	case GFS2_LARGE_FH_SIZE:
-	case GFS2_OLD_FH_SIZE:
-		this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
-		this.no_formal_ino |= be32_to_cpu(fh[1]);
-		this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
-		this.no_addr |= be32_to_cpu(fh[3]);
-		return gfs2_get_dentry(sb, &this);
-	default:
-		return NULL;
-	}
-}
-
-static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
-{
-	struct gfs2_inum_host parent;
-	__be32 *fh = (__force __be32 *)fid->raw;
-
-	switch (fh_type) {
-	case GFS2_LARGE_FH_SIZE:
-	case GFS2_OLD_FH_SIZE:
-		parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
-		parent.no_formal_ino |= be32_to_cpu(fh[5]);
-		parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
-		parent.no_addr |= be32_to_cpu(fh[7]);
-		return gfs2_get_dentry(sb, &parent);
-	default:
-		return NULL;
-	}
-}
-
-const struct export_operations gfs2_export_ops = {
-	.encode_fh = gfs2_encode_fh,
-	.fh_to_dentry = gfs2_fh_to_dentry,
-	.fh_to_parent = gfs2_fh_to_parent,
-	.get_name = gfs2_get_name,
-	.get_parent = gfs2_get_parent,
-};
-
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
deleted file mode 100644
index 0ee7bd2..0000000
--- a/fs/gfs2/ops_file.c
+++ /dev/null
@@ -1,766 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/pagemap.h>
-#include <linux/uio.h>
-#include <linux/blkdev.h>
-#include <linux/mm.h>
-#include <linux/mount.h>
-#include <linux/fs.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/ext2_fs.h>
-#include <linux/crc32.h>
-#include <linux/writeback.h>
-#include <asm/uaccess.h>
-#include <linux/dlm.h>
-#include <linux/dlm_plock.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "bmap.h"
-#include "dir.h"
-#include "glock.h"
-#include "glops.h"
-#include "inode.h"
-#include "log.h"
-#include "meta_io.h"
-#include "quota.h"
-#include "rgrp.h"
-#include "trans.h"
-#include "util.h"
-#include "eaops.h"
-#include "ops_address.h"
-
-/**
- * gfs2_llseek - seek to a location in a file
- * @file: the file
- * @offset: the offset
- * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
- *
- * SEEK_END requires the glock for the file because it references the
- * file's size.
- *
- * Returns: The new offset, or errno
- */
-
-static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
-{
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	struct gfs2_holder i_gh;
-	loff_t error;
-
-	if (origin == 2) {
-		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
-					   &i_gh);
-		if (!error) {
-			error = generic_file_llseek_unlocked(file, offset, origin);
-			gfs2_glock_dq_uninit(&i_gh);
-		}
-	} else
-		error = generic_file_llseek_unlocked(file, offset, origin);
-
-	return error;
-}
-
-/**
- * gfs2_readdir - Read directory entries from a directory
- * @file: The directory to read from
- * @dirent: Buffer for dirents
- * @filldir: Function used to do the copying
- *
- * Returns: errno
- */
-
-static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
-{
-	struct inode *dir = file->f_mapping->host;
-	struct gfs2_inode *dip = GFS2_I(dir);
-	struct gfs2_holder d_gh;
-	u64 offset = file->f_pos;
-	int error;
-
-	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
-	error = gfs2_glock_nq(&d_gh);
-	if (error) {
-		gfs2_holder_uninit(&d_gh);
-		return error;
-	}
-
-	error = gfs2_dir_read(dir, &offset, dirent, filldir);
-
-	gfs2_glock_dq_uninit(&d_gh);
-
-	file->f_pos = offset;
-
-	return error;
-}
-
-/**
- * fsflags_cvt
- * @table: A table of 32 u32 flags
- * @val: a 32 bit value to convert
- *
- * This function can be used to convert between fsflags values and
- * GFS2's own flags values.
- *
- * Returns: the converted flags
- */
-static u32 fsflags_cvt(const u32 *table, u32 val)
-{
-	u32 res = 0;
-	while(val) {
-		if (val & 1)
-			res |= *table;
-		table++;
-		val >>= 1;
-	}
-	return res;
-}
-
-static const u32 fsflags_to_gfs2[32] = {
-	[3] = GFS2_DIF_SYNC,
-	[4] = GFS2_DIF_IMMUTABLE,
-	[5] = GFS2_DIF_APPENDONLY,
-	[7] = GFS2_DIF_NOATIME,
-	[12] = GFS2_DIF_EXHASH,
-	[14] = GFS2_DIF_INHERIT_JDATA,
-};
-
-static const u32 gfs2_to_fsflags[32] = {
-	[gfs2fl_Sync] = FS_SYNC_FL,
-	[gfs2fl_Immutable] = FS_IMMUTABLE_FL,
-	[gfs2fl_AppendOnly] = FS_APPEND_FL,
-	[gfs2fl_NoAtime] = FS_NOATIME_FL,
-	[gfs2fl_ExHash] = FS_INDEX_FL,
-	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
-};
-
-static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int error;
-	u32 fsflags;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	error = gfs2_glock_nq(&gh);
-	if (error)
-		return error;
-
-	fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
-	if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
-		fsflags |= FS_JOURNAL_DATA_FL;
-	if (put_user(fsflags, ptr))
-		error = -EFAULT;
-
-	gfs2_glock_dq(&gh);
-	gfs2_holder_uninit(&gh);
-	return error;
-}
-
-void gfs2_set_inode_flags(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	unsigned int flags = inode->i_flags;
-
-	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
-		flags |= S_IMMUTABLE;
-	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
-		flags |= S_APPEND;
-	if (ip->i_diskflags & GFS2_DIF_NOATIME)
-		flags |= S_NOATIME;
-	if (ip->i_diskflags & GFS2_DIF_SYNC)
-		flags |= S_SYNC;
-	inode->i_flags = flags;
-}
-
-/* Flags that can be set by user space */
-#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|			\
-			     GFS2_DIF_IMMUTABLE|		\
-			     GFS2_DIF_APPENDONLY|		\
-			     GFS2_DIF_NOATIME|			\
-			     GFS2_DIF_SYNC|			\
-			     GFS2_DIF_SYSTEM|			\
-			     GFS2_DIF_INHERIT_JDATA)
-
-/**
- * gfs2_set_flags - set flags on an inode
- * @inode: The inode
- * @flags: The flags to set
- * @mask: Indicates which flags are valid
- *
- */
-static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct buffer_head *bh;
-	struct gfs2_holder gh;
-	int error;
-	u32 new_flags, flags;
-
-	error = mnt_want_write(filp->f_path.mnt);
-	if (error)
-		return error;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (error)
-		goto out_drop_write;
-
-	flags = ip->i_diskflags;
-	new_flags = (flags & ~mask) | (reqflags & mask);
-	if ((new_flags ^ flags) == 0)
-		goto out;
-
-	error = -EINVAL;
-	if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
-		goto out;
-
-	error = -EPERM;
-	if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
-		goto out;
-	if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
-		goto out;
-	if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
-	    !capable(CAP_LINUX_IMMUTABLE))
-		goto out;
-	if (!IS_IMMUTABLE(inode)) {
-		error = gfs2_permission(inode, MAY_WRITE);
-		if (error)
-			goto out;
-	}
-	if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
-		if (flags & GFS2_DIF_JDATA)
-			gfs2_log_flush(sdp, ip->i_gl);
-		error = filemap_fdatawrite(inode->i_mapping);
-		if (error)
-			goto out;
-		error = filemap_fdatawait(inode->i_mapping);
-		if (error)
-			goto out;
-	}
-	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
-	if (error)
-		goto out;
-	error = gfs2_meta_inode_buffer(ip, &bh);
-	if (error)
-		goto out_trans_end;
-	gfs2_trans_add_bh(ip->i_gl, bh, 1);
-	ip->i_diskflags = new_flags;
-	gfs2_dinode_out(ip, bh->b_data);
-	brelse(bh);
-	gfs2_set_inode_flags(inode);
-	gfs2_set_aops(inode);
-out_trans_end:
-	gfs2_trans_end(sdp);
-out:
-	gfs2_glock_dq_uninit(&gh);
-out_drop_write:
-	mnt_drop_write(filp->f_path.mnt);
-	return error;
-}
-
-static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	u32 fsflags, gfsflags;
-	if (get_user(fsflags, ptr))
-		return -EFAULT;
-	gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
-	if (!S_ISDIR(inode->i_mode)) {
-		if (gfsflags & GFS2_DIF_INHERIT_JDATA)
-			gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
-		return do_gfs2_set_flags(filp, gfsflags, ~0);
-	}
-	return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
-}
-
-static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch(cmd) {
-	case FS_IOC_GETFLAGS:
-		return gfs2_get_flags(filp, (u32 __user *)arg);
-	case FS_IOC_SETFLAGS:
-		return gfs2_set_flags(filp, (u32 __user *)arg);
-	}
-	return -ENOTTY;
-}
-
-/**
- * gfs2_allocate_page_backing - Use bmap to allocate blocks
- * @page: The (locked) page to allocate backing for
- *
- * We try to allocate all the blocks required for the page in
- * one go. This might fail for various reasons, so we keep
- * trying until all the blocks to back this page are allocated.
- * If some of the blocks are already allocated, thats ok too.
- */
-
-static int gfs2_allocate_page_backing(struct page *page)
-{
-	struct inode *inode = page->mapping->host;
-	struct buffer_head bh;
-	unsigned long size = PAGE_CACHE_SIZE;
-	u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-	do {
-		bh.b_state = 0;
-		bh.b_size = size;
-		gfs2_block_map(inode, lblock, &bh, 1);
-		if (!buffer_mapped(&bh))
-			return -EIO;
-		size -= bh.b_size;
-		lblock += (bh.b_size >> inode->i_blkbits);
-	} while(size > 0);
-	return 0;
-}
-
-/**
- * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
- * @vma: The virtual memory area
- * @page: The page which is about to become writable
- *
- * When the page becomes writable, we need to ensure that we have
- * blocks allocated on disk to back that page.
- */
-
-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	unsigned long last_index;
-	u64 pos = page->index << PAGE_CACHE_SHIFT;
-	unsigned int data_blocks, ind_blocks, rblocks;
-	int alloc_required = 0;
-	struct gfs2_holder gh;
-	struct gfs2_alloc *al;
-	int ret;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (ret)
-		goto out;
-
-	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
-	set_bit(GIF_SW_PAGED, &ip->i_flags);
-
-	ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
-	if (ret || !alloc_required)
-		goto out_unlock;
-	ret = -ENOMEM;
-	al = gfs2_alloc_get(ip);
-	if (al == NULL)
-		goto out_unlock;
-
-	ret = gfs2_quota_lock_check(ip);
-	if (ret)
-		goto out_alloc_put;
-	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
-	al->al_requested = data_blocks + ind_blocks;
-	ret = gfs2_inplace_reserve(ip);
-	if (ret)
-		goto out_quota_unlock;
-
-	rblocks = RES_DINODE + ind_blocks;
-	if (gfs2_is_jdata(ip))
-		rblocks += data_blocks ? data_blocks : 1;
-	if (ind_blocks || data_blocks)
-		rblocks += RES_STATFS + RES_QUOTA;
-	ret = gfs2_trans_begin(sdp, rblocks, 0);
-	if (ret)
-		goto out_trans_fail;
-
-	lock_page(page);
-	ret = -EINVAL;
-	last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
-	if (page->index > last_index)
-		goto out_unlock_page;
-	ret = 0;
-	if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
-		goto out_unlock_page;
-	if (gfs2_is_stuffed(ip)) {
-		ret = gfs2_unstuff_dinode(ip, page);
-		if (ret)
-			goto out_unlock_page;
-	}
-	ret = gfs2_allocate_page_backing(page);
-
-out_unlock_page:
-	unlock_page(page);
-	gfs2_trans_end(sdp);
-out_trans_fail:
-	gfs2_inplace_release(ip);
-out_quota_unlock:
-	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_alloc_put(ip);
-out_unlock:
-	gfs2_glock_dq(&gh);
-out:
-	gfs2_holder_uninit(&gh);
-	if (ret == -ENOMEM)
-		ret = VM_FAULT_OOM;
-	else if (ret)
-		ret = VM_FAULT_SIGBUS;
-	return ret;
-}
-
-static struct vm_operations_struct gfs2_vm_ops = {
-	.fault = filemap_fault,
-	.page_mkwrite = gfs2_page_mkwrite,
-};
-
-/**
- * gfs2_mmap -
- * @file: The file to map
- * @vma: The VMA which described the mapping
- *
- * There is no need to get a lock here unless we should be updating
- * atime. We ignore any locking errors since the only consequence is
- * a missed atime update (which will just be deferred until later).
- *
- * Returns: 0
- */
-
-static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-
-	if (!(file->f_flags & O_NOATIME)) {
-		struct gfs2_holder i_gh;
-		int error;
-
-		gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
-		error = gfs2_glock_nq(&i_gh);
-		file_accessed(file);
-		if (error == 0)
-			gfs2_glock_dq_uninit(&i_gh);
-	}
-	vma->vm_ops = &gfs2_vm_ops;
-	vma->vm_flags |= VM_CAN_NONLINEAR;
-
-	return 0;
-}
-
-/**
- * gfs2_open - open a file
- * @inode: the inode to open
- * @file: the struct file for this opening
- *
- * Returns: errno
- */
-
-static int gfs2_open(struct inode *inode, struct file *file)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder i_gh;
-	struct gfs2_file *fp;
-	int error;
-
-	fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
-	if (!fp)
-		return -ENOMEM;
-
-	mutex_init(&fp->f_fl_mutex);
-
-	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
-	file->private_data = fp;
-
-	if (S_ISREG(ip->i_inode.i_mode)) {
-		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
-					   &i_gh);
-		if (error)
-			goto fail;
-
-		if (!(file->f_flags & O_LARGEFILE) &&
-		    ip->i_disksize > MAX_NON_LFS) {
-			error = -EOVERFLOW;
-			goto fail_gunlock;
-		}
-
-		gfs2_glock_dq_uninit(&i_gh);
-	}
-
-	return 0;
-
-fail_gunlock:
-	gfs2_glock_dq_uninit(&i_gh);
-fail:
-	file->private_data = NULL;
-	kfree(fp);
-	return error;
-}
-
-/**
- * gfs2_close - called to close a struct file
- * @inode: the inode the struct file belongs to
- * @file: the struct file being closed
- *
- * Returns: errno
- */
-
-static int gfs2_close(struct inode *inode, struct file *file)
-{
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-	struct gfs2_file *fp;
-
-	fp = file->private_data;
-	file->private_data = NULL;
-
-	if (gfs2_assert_warn(sdp, fp))
-		return -EIO;
-
-	kfree(fp);
-
-	return 0;
-}
-
-/**
- * gfs2_fsync - sync the dirty data for a file (across the cluster)
- * @file: the file that points to the dentry (we ignore this)
- * @dentry: the dentry that points to the inode to sync
- *
- * The VFS will flush "normal" data for us. We only need to worry
- * about metadata here. For journaled data, we just do a log flush
- * as we can't avoid it. Otherwise we can just bale out if datasync
- * is set. For stuffed inodes we must flush the log in order to
- * ensure that all data is on disk.
- *
- * The call to write_inode_now() is there to write back metadata and
- * the inode itself. It does also try and write the data, but thats
- * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
- * for us.
- *
- * Returns: errno
- */
-
-static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
-	int ret = 0;
-
-	if (gfs2_is_jdata(GFS2_I(inode))) {
-		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
-		return 0;
-	}
-
-	if (sync_state != 0) {
-		if (!datasync)
-			ret = write_inode_now(inode, 0);
-
-		if (gfs2_is_stuffed(GFS2_I(inode)))
-			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
-	}
-
-	return ret;
-}
-
-#ifdef CONFIG_GFS2_FS_LOCKING_DLM
-
-/**
- * gfs2_setlease - acquire/release a file lease
- * @file: the file pointer
- * @arg: lease type
- * @fl: file lock
- *
- * We don't currently have a way to enforce a lease across the whole
- * cluster; until we do, disable leases (by just returning -EINVAL),
- * unless the administrator has requested purely local locking.
- *
- * Returns: errno
- */
-
-static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
-{
-	return -EINVAL;
-}
-
-/**
- * gfs2_lock - acquire/release a posix lock on a file
- * @file: the file pointer
- * @cmd: either modify or retrieve lock state, possibly wait
- * @fl: type and range of lock
- *
- * Returns: errno
- */
-
-static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
-{
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-
-	if (!(fl->fl_flags & FL_POSIX))
-		return -ENOLCK;
-	if (__mandatory_lock(&ip->i_inode))
-		return -ENOLCK;
-
-	if (cmd == F_CANCELLK) {
-		/* Hack: */
-		cmd = F_SETLK;
-		fl->fl_type = F_UNLCK;
-	}
-	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-		return -EIO;
-	if (IS_GETLK(cmd))
-		return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
-	else if (fl->fl_type == F_UNLCK)
-		return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
-	else
-		return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
-}
-
-static int do_flock(struct file *file, int cmd, struct file_lock *fl)
-{
-	struct gfs2_file *fp = file->private_data;
-	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
-	struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
-	struct gfs2_glock *gl;
-	unsigned int state;
-	int flags;
-	int error = 0;
-
-	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
-
-	mutex_lock(&fp->f_fl_mutex);
-
-	gl = fl_gh->gh_gl;
-	if (gl) {
-		if (fl_gh->gh_state == state)
-			goto out;
-		flock_lock_file_wait(file,
-				     &(struct file_lock){.fl_type = F_UNLCK});
-		gfs2_glock_dq_wait(fl_gh);
-		gfs2_holder_reinit(state, flags, fl_gh);
-	} else {
-		error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
-				       &gfs2_flock_glops, CREATE, &gl);
-		if (error)
-			goto out;
-		gfs2_holder_init(gl, state, flags, fl_gh);
-		gfs2_glock_put(gl);
-	}
-	error = gfs2_glock_nq(fl_gh);
-	if (error) {
-		gfs2_holder_uninit(fl_gh);
-		if (error == GLR_TRYFAILED)
-			error = -EAGAIN;
-	} else {
-		error = flock_lock_file_wait(file, fl);
-		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-	}
-
-out:
-	mutex_unlock(&fp->f_fl_mutex);
-	return error;
-}
-
-static void do_unflock(struct file *file, struct file_lock *fl)
-{
-	struct gfs2_file *fp = file->private_data;
-	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
-
-	mutex_lock(&fp->f_fl_mutex);
-	flock_lock_file_wait(file, fl);
-	if (fl_gh->gh_gl)
-		gfs2_glock_dq_uninit(fl_gh);
-	mutex_unlock(&fp->f_fl_mutex);
-}
-
-/**
- * gfs2_flock - acquire/release a flock lock on a file
- * @file: the file pointer
- * @cmd: either modify or retrieve lock state, possibly wait
- * @fl: type and range of lock
- *
- * Returns: errno
- */
-
-static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
-{
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-
-	if (!(fl->fl_flags & FL_FLOCK))
-		return -ENOLCK;
-	if (__mandatory_lock(&ip->i_inode))
-		return -ENOLCK;
-
-	if (fl->fl_type == F_UNLCK) {
-		do_unflock(file, fl);
-		return 0;
-	} else {
-		return do_flock(file, cmd, fl);
-	}
-}
-
-const struct file_operations gfs2_file_fops = {
-	.llseek		= gfs2_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
-	.unlocked_ioctl	= gfs2_ioctl,
-	.mmap		= gfs2_mmap,
-	.open		= gfs2_open,
-	.release	= gfs2_close,
-	.fsync		= gfs2_fsync,
-	.lock		= gfs2_lock,
-	.flock		= gfs2_flock,
-	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
-	.setlease	= gfs2_setlease,
-};
-
-const struct file_operations gfs2_dir_fops = {
-	.readdir	= gfs2_readdir,
-	.unlocked_ioctl	= gfs2_ioctl,
-	.open		= gfs2_open,
-	.release	= gfs2_close,
-	.fsync		= gfs2_fsync,
-	.lock		= gfs2_lock,
-	.flock		= gfs2_flock,
-};
-
-#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
-
-const struct file_operations gfs2_file_fops_nolock = {
-	.llseek		= gfs2_llseek,
-	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
-	.unlocked_ioctl	= gfs2_ioctl,
-	.mmap		= gfs2_mmap,
-	.open		= gfs2_open,
-	.release	= gfs2_close,
-	.fsync		= gfs2_fsync,
-	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
-	.setlease	= generic_setlease,
-};
-
-const struct file_operations gfs2_dir_fops_nolock = {
-	.readdir	= gfs2_readdir,
-	.unlocked_ioctl	= gfs2_ioctl,
-	.open		= gfs2_open,
-	.release	= gfs2_close,
-	.fsync		= gfs2_fsync,
-};
-
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 152e6c4..2e9b932 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -60,7 +60,6 @@
 #include "super.h"
 #include "trans.h"
 #include "inode.h"
-#include "ops_address.h"
 #include "util.h"
 
 #define QUOTA_USER 1
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ee3d5c1..6122c7e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -29,7 +29,6 @@
 #include "util.h"
 #include "log.h"
 #include "inode.h"
-#include "ops_address.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
-- 
cgit v0.10.2


From 5c82f56736e4c3a9eaf53c94366b056c8622d79e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 22 May 2009 09:41:30 +0100
Subject: AsoC: Make snd_soc_read() and snd_soc_write() functions

Should be no impact on the generated code but it helps the compiler
print clearer messages.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 2af3213..cf6111d 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -214,10 +214,6 @@ void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count,
 			struct snd_soc_jack_gpio *gpios);
 #endif
 
-/* codec IO */
-#define snd_soc_read(codec, reg) codec->read(codec, reg)
-#define snd_soc_write(codec, reg, value) codec->write(codec, reg, value)
-
 /* codec register bit access */
 int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned short reg,
 				unsigned short mask, unsigned short value);
@@ -507,6 +503,19 @@ struct soc_enum {
 	void *dapm;
 };
 
+/* codec IO */
+static inline unsigned int snd_soc_read(struct snd_soc_codec *codec,
+					unsigned int reg)
+{
+	return codec->read(codec, reg);
+}
+
+static inline unsigned int snd_soc_write(struct snd_soc_codec *codec,
+					 unsigned int reg, unsigned int val)
+{
+	return codec->write(codec, reg, val);
+}
+
 #include <sound/soc-dai.h>
 
 #endif
-- 
cgit v0.10.2


From 9da28c7b38170882b1c43d7d133ddce34e25f161 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Fri, 22 May 2009 10:13:15 +0300
Subject: ASoC: TWL4030: Add support for platform dependent configuration

twl4030_setup_data structure can be passed from platform drivers to
the codec via the snd_soc_device->codec_data pointer.

Currently the setup data has support for the Headset pop-removal
related configuration, which differs from board to board.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index f554672..584507f 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1997,6 +1997,8 @@ static int twl4030_resume(struct platform_device *pdev)
 static int twl4030_init(struct snd_soc_device *socdev)
 {
 	struct snd_soc_codec *codec = socdev->card->codec;
+	struct twl4030_setup_data *setup = socdev->codec_data;
+	struct twl4030_priv *twl4030 = codec->private_data;
 	int ret = 0;
 
 	printk(KERN_INFO "TWL4030 Audio Codec init \n");
@@ -2014,6 +2016,23 @@ static int twl4030_init(struct snd_soc_device *socdev)
 	if (codec->reg_cache == NULL)
 		return -ENOMEM;
 
+	/* Configuration for headset ramp delay from setup data */
+	if (setup) {
+		unsigned char hs_pop;
+
+		if (setup->sysclk)
+			twl4030->sysclk = setup->sysclk;
+		else
+			twl4030->sysclk = 26000;
+
+		hs_pop = twl4030_read_reg_cache(codec, TWL4030_REG_HS_POPN_SET);
+		hs_pop &= ~TWL4030_RAMP_DELAY;
+		hs_pop |= (setup->ramp_delay_value << 2);
+		twl4030_write_reg_cache(codec, TWL4030_REG_HS_POPN_SET, hs_pop);
+	} else {
+		twl4030->sysclk = 26000;
+	}
+
 	/* register pcms */
 	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
 	if (ret < 0) {
@@ -2063,9 +2082,6 @@ static int twl4030_probe(struct platform_device *pdev)
 		kfree(codec);
 		return -ENOMEM;
 	}
-	/* Set default sysclk (used by the headsetl/rpga_event callback for
-	 * pop-attenuation) */
-	twl4030->sysclk = 26000;
 
 	codec->private_data = twl4030;
 	socdev->card->codec = codec;
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index 9668bdf..48326e2 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -266,4 +266,9 @@
 extern struct snd_soc_dai twl4030_dai[2];
 extern struct snd_soc_codec_device soc_codec_dev_twl4030;
 
+struct twl4030_setup_data {
+	unsigned int ramp_delay_value;
+	unsigned int sysclk;
+};
+
 #endif	/* End of __TWL4030_AUDIO_H__ */
-- 
cgit v0.10.2


From 7385ba44f8bcea15bf0d75ae2814f0cec63140b9 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Fri, 22 May 2009 10:13:16 +0300
Subject: ASoC: SDP4030: Use the twl4030_setup_data for headset pop-removal

With this patch the initial headset pop-removal related values are
configured for the twl4030 codec (ramp delay and sysclk).

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c
index 19966a7..b719e5d 100644
--- a/sound/soc/omap/sdp3430.c
+++ b/sound/soc/omap/sdp3430.c
@@ -276,10 +276,17 @@ static struct snd_soc_card snd_soc_sdp3430 = {
 	.num_links = ARRAY_SIZE(sdp3430_dai),
 };
 
+/* twl4030 setup */
+static struct twl4030_setup_data twl4030_setup = {
+	.ramp_delay_value = 3,
+	.sysclk = 26000,
+};
+
 /* Audio subsystem */
 static struct snd_soc_device sdp3430_snd_devdata = {
 	.card = &snd_soc_sdp3430,
 	.codec_dev = &soc_codec_dev_twl4030,
+	.codec_data = &twl4030_setup,
 };
 
 static struct platform_device *sdp3430_snd_device;
-- 
cgit v0.10.2


From 9e6e0a128bca0a151d8d3fbd9459b22fc21cfebb Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 22 May 2009 10:36:01 +0100
Subject: GFS2: Merge mount.c and ops_super.c into super.c

mount.c only contained a single function, so is not really
worth retaining on its own. All of the super related code
is now either in super.c or ops_fstype.c

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 4f7332c..d53a9be 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,8 +1,8 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o log.o lops.o main.o meta_io.o \
-	mount.o aops.o dentry.o export.o file.o \
-	ops_fstype.o ops_inode.o ops_super.o quota.o \
+	aops.o dentry.o export.o file.o \
+	ops_fstype.o ops_inode.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o
 
 gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
deleted file mode 100644
index 947af15..0000000
--- a/fs/gfs2/mount.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/parser.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "super.h"
-#include "sys.h"
-#include "util.h"
-
-enum {
-	Opt_lockproto,
-	Opt_locktable,
-	Opt_hostdata,
-	Opt_spectator,
-	Opt_ignore_local_fs,
-	Opt_localflocks,
-	Opt_localcaching,
-	Opt_debug,
-	Opt_nodebug,
-	Opt_upgrade,
-	Opt_acl,
-	Opt_noacl,
-	Opt_quota_off,
-	Opt_quota_account,
-	Opt_quota_on,
-	Opt_quota,
-	Opt_noquota,
-	Opt_suiddir,
-	Opt_nosuiddir,
-	Opt_data_writeback,
-	Opt_data_ordered,
-	Opt_meta,
-	Opt_discard,
-	Opt_nodiscard,
-	Opt_commit,
-	Opt_err,
-};
-
-static const match_table_t tokens = {
-	{Opt_lockproto, "lockproto=%s"},
-	{Opt_locktable, "locktable=%s"},
-	{Opt_hostdata, "hostdata=%s"},
-	{Opt_spectator, "spectator"},
-	{Opt_ignore_local_fs, "ignore_local_fs"},
-	{Opt_localflocks, "localflocks"},
-	{Opt_localcaching, "localcaching"},
-	{Opt_debug, "debug"},
-	{Opt_nodebug, "nodebug"},
-	{Opt_upgrade, "upgrade"},
-	{Opt_acl, "acl"},
-	{Opt_noacl, "noacl"},
-	{Opt_quota_off, "quota=off"},
-	{Opt_quota_account, "quota=account"},
-	{Opt_quota_on, "quota=on"},
-	{Opt_quota, "quota"},
-	{Opt_noquota, "noquota"},
-	{Opt_suiddir, "suiddir"},
-	{Opt_nosuiddir, "nosuiddir"},
-	{Opt_data_writeback, "data=writeback"},
-	{Opt_data_ordered, "data=ordered"},
-	{Opt_meta, "meta"},
-	{Opt_discard, "discard"},
-	{Opt_nodiscard, "nodiscard"},
-	{Opt_commit, "commit=%d"},
-	{Opt_err, NULL}
-};
-
-/**
- * gfs2_mount_args - Parse mount options
- * @sdp:
- * @data:
- *
- * Return: errno
- */
-
-int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
-{
-	char *o;
-	int token;
-	substring_t tmp[MAX_OPT_ARGS];
-	int rv;
-
-	/* Split the options into tokens with the "," character and
-	   process them */
-
-	while (1) {
-		o = strsep(&options, ",");
-		if (o == NULL)
-			break;
-		if (*o == '\0')
-			continue;
-
-		token = match_token(o, tokens, tmp);
-		switch (token) {
-		case Opt_lockproto:
-			match_strlcpy(args->ar_lockproto, &tmp[0],
-				      GFS2_LOCKNAME_LEN);
-			break;
-		case Opt_locktable:
-			match_strlcpy(args->ar_locktable, &tmp[0],
-				      GFS2_LOCKNAME_LEN);
-			break;
-		case Opt_hostdata:
-			match_strlcpy(args->ar_hostdata, &tmp[0],
-				      GFS2_LOCKNAME_LEN);
-			break;
-		case Opt_spectator:
-			args->ar_spectator = 1;
-			break;
-		case Opt_ignore_local_fs:
-			args->ar_ignore_local_fs = 1;
-			break;
-		case Opt_localflocks:
-			args->ar_localflocks = 1;
-			break;
-		case Opt_localcaching:
-			args->ar_localcaching = 1;
-			break;
-		case Opt_debug:
-			args->ar_debug = 1;
-			break;
-		case Opt_nodebug:
-			args->ar_debug = 0;
-			break;
-		case Opt_upgrade:
-			args->ar_upgrade = 1;
-			break;
-		case Opt_acl:
-			args->ar_posix_acl = 1;
-			break;
-		case Opt_noacl:
-			args->ar_posix_acl = 0;
-			break;
-		case Opt_quota_off:
-		case Opt_noquota:
-			args->ar_quota = GFS2_QUOTA_OFF;
-			break;
-		case Opt_quota_account:
-			args->ar_quota = GFS2_QUOTA_ACCOUNT;
-			break;
-		case Opt_quota_on:
-		case Opt_quota:
-			args->ar_quota = GFS2_QUOTA_ON;
-			break;
-		case Opt_suiddir:
-			args->ar_suiddir = 1;
-			break;
-		case Opt_nosuiddir:
-			args->ar_suiddir = 0;
-			break;
-		case Opt_data_writeback:
-			args->ar_data = GFS2_DATA_WRITEBACK;
-			break;
-		case Opt_data_ordered:
-			args->ar_data = GFS2_DATA_ORDERED;
-			break;
-		case Opt_meta:
-			args->ar_meta = 1;
-			break;
-		case Opt_discard:
-			args->ar_discard = 1;
-			break;
-		case Opt_nodiscard:
-			args->ar_discard = 0;
-			break;
-		case Opt_commit:
-			rv = match_int(&tmp[0], &args->ar_commit);
-			if (rv || args->ar_commit <= 0) {
-				fs_info(sdp, "commit mount option requires a positive numeric argument\n");
-				return rv ? rv : -EINVAL;
-			}
-			break;
-		case Opt_err:
-		default:
-			fs_info(sdp, "invalid mount option: %s\n", o);
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
deleted file mode 100644
index 2fd1dcb..0000000
--- a/fs/gfs2/ops_super.c
+++ /dev/null
@@ -1,757 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/statfs.h>
-#include <linux/seq_file.h>
-#include <linux/mount.h>
-#include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-#include <linux/time.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "glock.h"
-#include "inode.h"
-#include "log.h"
-#include "quota.h"
-#include "recovery.h"
-#include "rgrp.h"
-#include "super.h"
-#include "sys.h"
-#include "util.h"
-#include "trans.h"
-#include "dir.h"
-#include "eattr.h"
-#include "bmap.h"
-#include "meta_io.h"
-
-#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
-
-/**
- * gfs2_write_inode - Make sure the inode is stable on the disk
- * @inode: The inode
- * @sync: synchronous write flag
- *
- * Returns: errno
- */
-
-static int gfs2_write_inode(struct inode *inode, int sync)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_holder gh;
-	struct buffer_head *bh;
-	struct timespec atime;
-	struct gfs2_dinode *di;
-	int ret = 0;
-
-	/* Check this is a "normal" inode, etc */
-	if (!test_bit(GIF_USER, &ip->i_flags) ||
-	    (current->flags & PF_MEMALLOC))
-		return 0;
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (ret)
-		goto do_flush;
-	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
-	if (ret)
-		goto do_unlock;
-	ret = gfs2_meta_inode_buffer(ip, &bh);
-	if (ret == 0) {
-		di = (struct gfs2_dinode *)bh->b_data;
-		atime.tv_sec = be64_to_cpu(di->di_atime);
-		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
-		if (timespec_compare(&inode->i_atime, &atime) > 0) {
-			gfs2_trans_add_bh(ip->i_gl, bh, 1);
-			gfs2_dinode_out(ip, bh->b_data);
-		}
-		brelse(bh);
-	}
-	gfs2_trans_end(sdp);
-do_unlock:
-	gfs2_glock_dq_uninit(&gh);
-do_flush:
-	if (sync != 0)
-		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
-	return ret;
-}
-
-/**
- * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
- * @sdp: the filesystem
- *
- * Returns: errno
- */
-
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
-{
-	struct gfs2_holder t_gh;
-	int error;
-
-	gfs2_quota_sync(sdp);
-	gfs2_statfs_sync(sdp);
-
-	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
-				   &t_gh);
-	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return error;
-
-	gfs2_meta_syncfs(sdp);
-	gfs2_log_shutdown(sdp);
-
-	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
-	if (t_gh.gh_gl)
-		gfs2_glock_dq_uninit(&t_gh);
-
-	gfs2_quota_cleanup(sdp);
-
-	return error;
-}
-
-static int gfs2_umount_recovery_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
-/**
- * gfs2_put_super - Unmount the filesystem
- * @sb: The VFS superblock
- *
- */
-
-static void gfs2_put_super(struct super_block *sb)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
-	struct gfs2_jdesc *jd;
-
-	/*  Unfreeze the filesystem, if we need to  */
-
-	mutex_lock(&sdp->sd_freeze_lock);
-	if (sdp->sd_freeze_count)
-		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-	mutex_unlock(&sdp->sd_freeze_lock);
-
-	/* No more recovery requests */
-	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
-	smp_mb();
-
-	/* Wait on outstanding recovery */
-restart:
-	spin_lock(&sdp->sd_jindex_spin);
-	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
-		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
-			continue;
-		spin_unlock(&sdp->sd_jindex_spin);
-		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
-			    gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
-		goto restart;
-	}
-	spin_unlock(&sdp->sd_jindex_spin);
-
-	kthread_stop(sdp->sd_quotad_process);
-	kthread_stop(sdp->sd_logd_process);
-
-	if (!(sb->s_flags & MS_RDONLY)) {
-		error = gfs2_make_fs_ro(sdp);
-		if (error)
-			gfs2_io_error(sdp);
-	}
-	/*  At this point, we're through modifying the disk  */
-
-	/*  Release stuff  */
-
-	iput(sdp->sd_jindex);
-	iput(sdp->sd_inum_inode);
-	iput(sdp->sd_statfs_inode);
-	iput(sdp->sd_rindex);
-	iput(sdp->sd_quota_inode);
-
-	gfs2_glock_put(sdp->sd_rename_gl);
-	gfs2_glock_put(sdp->sd_trans_gl);
-
-	if (!sdp->sd_args.ar_spectator) {
-		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
-		iput(sdp->sd_ir_inode);
-		iput(sdp->sd_sc_inode);
-		iput(sdp->sd_qc_inode);
-	}
-
-	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
-	gfs2_clear_rgrpd(sdp);
-	gfs2_jindex_free(sdp);
-	/*  Take apart glock structures and buffer lists  */
-	gfs2_gl_hash_clear(sdp);
-	/*  Unmount the locking protocol  */
-	gfs2_lm_unmount(sdp);
-
-	/*  At this point, we're through participating in the lockspace  */
-	gfs2_sys_fs_del(sdp);
-}
-
-/**
- * gfs2_write_super
- * @sb: the superblock
- *
- */
-
-static void gfs2_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-}
-
-/**
- * gfs2_sync_fs - sync the filesystem
- * @sb: the superblock
- *
- * Flushes the log to disk.
- */
-
-static int gfs2_sync_fs(struct super_block *sb, int wait)
-{
-	sb->s_dirt = 0;
-	if (wait && sb->s_fs_info)
-		gfs2_log_flush(sb->s_fs_info, NULL);
-	return 0;
-}
-
-/**
- * gfs2_freeze - prevent further writes to the filesystem
- * @sb: the VFS structure for the filesystem
- *
- */
-
-static int gfs2_freeze(struct super_block *sb)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
-
-	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return -EINVAL;
-
-	for (;;) {
-		error = gfs2_freeze_fs(sdp);
-		if (!error)
-			break;
-
-		switch (error) {
-		case -EBUSY:
-			fs_err(sdp, "waiting for recovery before freeze\n");
-			break;
-
-		default:
-			fs_err(sdp, "error freezing FS: %d\n", error);
-			break;
-		}
-
-		fs_err(sdp, "retrying...\n");
-		msleep(1000);
-	}
-	return 0;
-}
-
-/**
- * gfs2_unfreeze - reallow writes to the filesystem
- * @sb: the VFS structure for the filesystem
- *
- */
-
-static int gfs2_unfreeze(struct super_block *sb)
-{
-	gfs2_unfreeze_fs(sb->s_fs_info);
-	return 0;
-}
-
-/**
- * statfs_fill - fill in the sg for a given RG
- * @rgd: the RG
- * @sc: the sc structure
- *
- * Returns: 0 on success, -ESTALE if the LVB is invalid
- */
-
-static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
-			    struct gfs2_statfs_change_host *sc)
-{
-	gfs2_rgrp_verify(rgd);
-	sc->sc_total += rgd->rd_data;
-	sc->sc_free += rgd->rd_free;
-	sc->sc_dinodes += rgd->rd_dinodes;
-	return 0;
-}
-
-/**
- * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
- * @sdp: the filesystem
- * @sc: the sc info that will be returned
- *
- * Any error (other than a signal) will cause this routine to fall back
- * to the synchronous version.
- *
- * FIXME: This really shouldn't busy wait like this.
- *
- * Returns: errno
- */
-
-static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
-	struct gfs2_holder ri_gh;
-	struct gfs2_rgrpd *rgd_next;
-	struct gfs2_holder *gha, *gh;
-	unsigned int slots = 64;
-	unsigned int x;
-	int done;
-	int error = 0, err;
-
-	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
-	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
-	if (!gha)
-		return -ENOMEM;
-
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		goto out;
-
-	rgd_next = gfs2_rgrpd_get_first(sdp);
-
-	for (;;) {
-		done = 1;
-
-		for (x = 0; x < slots; x++) {
-			gh = gha + x;
-
-			if (gh->gh_gl && gfs2_glock_poll(gh)) {
-				err = gfs2_glock_wait(gh);
-				if (err) {
-					gfs2_holder_uninit(gh);
-					error = err;
-				} else {
-					if (!error)
-						error = statfs_slow_fill(
-							gh->gh_gl->gl_object, sc);
-					gfs2_glock_dq_uninit(gh);
-				}
-			}
-
-			if (gh->gh_gl)
-				done = 0;
-			else if (rgd_next && !error) {
-				error = gfs2_glock_nq_init(rgd_next->rd_gl,
-							   LM_ST_SHARED,
-							   GL_ASYNC,
-							   gh);
-				rgd_next = gfs2_rgrpd_get_next(rgd_next);
-				done = 0;
-			}
-
-			if (signal_pending(current))
-				error = -ERESTARTSYS;
-		}
-
-		if (done)
-			break;
-
-		yield();
-	}
-
-	gfs2_glock_dq_uninit(&ri_gh);
-
-out:
-	kfree(gha);
-	return error;
-}
-
-/**
- * gfs2_statfs_i - Do a statfs
- * @sdp: the filesystem
- * @sg: the sg structure
- *
- * Returns: errno
- */
-
-static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
-{
-	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
-
-	spin_lock(&sdp->sd_statfs_spin);
-
-	*sc = *m_sc;
-	sc->sc_total += l_sc->sc_total;
-	sc->sc_free += l_sc->sc_free;
-	sc->sc_dinodes += l_sc->sc_dinodes;
-
-	spin_unlock(&sdp->sd_statfs_spin);
-
-	if (sc->sc_free < 0)
-		sc->sc_free = 0;
-	if (sc->sc_free > sc->sc_total)
-		sc->sc_free = sc->sc_total;
-	if (sc->sc_dinodes < 0)
-		sc->sc_dinodes = 0;
-
-	return 0;
-}
-
-/**
- * gfs2_statfs - Gather and return stats about the filesystem
- * @sb: The superblock
- * @statfsbuf: The buffer
- *
- * Returns: 0 on success or error code
- */
-
-static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
-	struct super_block *sb = dentry->d_inode->i_sb;
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_statfs_change_host sc;
-	int error;
-
-	if (gfs2_tune_get(sdp, gt_statfs_slow))
-		error = gfs2_statfs_slow(sdp, &sc);
-	else
-		error = gfs2_statfs_i(sdp, &sc);
-
-	if (error)
-		return error;
-
-	buf->f_type = GFS2_MAGIC;
-	buf->f_bsize = sdp->sd_sb.sb_bsize;
-	buf->f_blocks = sc.sc_total;
-	buf->f_bfree = sc.sc_free;
-	buf->f_bavail = sc.sc_free;
-	buf->f_files = sc.sc_dinodes + sc.sc_free;
-	buf->f_ffree = sc.sc_free;
-	buf->f_namelen = GFS2_FNAMESIZE;
-
-	return 0;
-}
-
-/**
- * gfs2_remount_fs - called when the FS is remounted
- * @sb:  the filesystem
- * @flags:  the remount flags
- * @data:  extra data passed in (not used right now)
- *
- * Returns: errno
- */
-
-static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_args args = sdp->sd_args; /* Default to current settings */
-	struct gfs2_tune *gt = &sdp->sd_tune;
-	int error;
-
-	spin_lock(&gt->gt_spin);
-	args.ar_commit = gt->gt_log_flush_secs;
-	spin_unlock(&gt->gt_spin);
-	error = gfs2_mount_args(sdp, &args, data);
-	if (error)
-		return error;
-
-	/* Not allowed to change locking details */
-	if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
-	    strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
-	    strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
-		return -EINVAL;
-
-	/* Some flags must not be changed */
-	if (args_neq(&args, &sdp->sd_args, spectator) ||
-	    args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
-	    args_neq(&args, &sdp->sd_args, localflocks) ||
-	    args_neq(&args, &sdp->sd_args, localcaching) ||
-	    args_neq(&args, &sdp->sd_args, meta))
-		return -EINVAL;
-
-	if (sdp->sd_args.ar_spectator)
-		*flags |= MS_RDONLY;
-
-	if ((sb->s_flags ^ *flags) & MS_RDONLY) {
-		if (*flags & MS_RDONLY)
-			error = gfs2_make_fs_ro(sdp);
-		else
-			error = gfs2_make_fs_rw(sdp);
-		if (error)
-			return error;
-	}
-
-	sdp->sd_args = args;
-	if (sdp->sd_args.ar_posix_acl)
-		sb->s_flags |= MS_POSIXACL;
-	else
-		sb->s_flags &= ~MS_POSIXACL;
-	spin_lock(&gt->gt_spin);
-	gt->gt_log_flush_secs = args.ar_commit;
-	spin_unlock(&gt->gt_spin);
-
-	return 0;
-}
-
-/**
- * gfs2_drop_inode - Drop an inode (test for remote unlink)
- * @inode: The inode to drop
- *
- * If we've received a callback on an iopen lock then its because a
- * remote node tried to deallocate the inode but failed due to this node
- * still having the inode open. Here we mark the link count zero
- * since we know that it must have reached zero if the GLF_DEMOTE flag
- * is set on the iopen glock. If we didn't do a disk read since the
- * remote node removed the final link then we might otherwise miss
- * this event. This check ensures that this node will deallocate the
- * inode's blocks, or alternatively pass the baton on to another
- * node for later deallocation.
- */
-
-static void gfs2_drop_inode(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) {
-		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
-		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
-			clear_nlink(inode);
-	}
-	generic_drop_inode(inode);
-}
-
-/**
- * gfs2_clear_inode - Deallocate an inode when VFS is done with it
- * @inode: The VFS inode
- *
- */
-
-static void gfs2_clear_inode(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	/* This tells us its a "real" inode and not one which only
-	 * serves to contain an address space (see rgrp.c, meta_io.c)
-	 * which therefore doesn't have its own glocks.
-	 */
-	if (test_bit(GIF_USER, &ip->i_flags)) {
-		ip->i_gl->gl_object = NULL;
-		gfs2_glock_put(ip->i_gl);
-		ip->i_gl = NULL;
-		if (ip->i_iopen_gh.gh_gl) {
-			ip->i_iopen_gh.gh_gl->gl_object = NULL;
-			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-		}
-	}
-}
-
-static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
-{
-	do {
-		if (d1 == d2)
-			return 1;
-		d1 = d1->d_parent;
-	} while (!IS_ROOT(d1));
-	return 0;
-}
-
-/**
- * gfs2_show_options - Show mount options for /proc/mounts
- * @s: seq_file structure
- * @mnt: vfsmount
- *
- * Returns: 0 on success or error code
- */
-
-static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
-{
-	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
-	struct gfs2_args *args = &sdp->sd_args;
-	int lfsecs;
-
-	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
-		seq_printf(s, ",meta");
-	if (args->ar_lockproto[0])
-		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
-	if (args->ar_locktable[0])
-		seq_printf(s, ",locktable=%s", args->ar_locktable);
-	if (args->ar_hostdata[0])
-		seq_printf(s, ",hostdata=%s", args->ar_hostdata);
-	if (args->ar_spectator)
-		seq_printf(s, ",spectator");
-	if (args->ar_ignore_local_fs)
-		seq_printf(s, ",ignore_local_fs");
-	if (args->ar_localflocks)
-		seq_printf(s, ",localflocks");
-	if (args->ar_localcaching)
-		seq_printf(s, ",localcaching");
-	if (args->ar_debug)
-		seq_printf(s, ",debug");
-	if (args->ar_upgrade)
-		seq_printf(s, ",upgrade");
-	if (args->ar_posix_acl)
-		seq_printf(s, ",acl");
-	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
-		char *state;
-		switch (args->ar_quota) {
-		case GFS2_QUOTA_OFF:
-			state = "off";
-			break;
-		case GFS2_QUOTA_ACCOUNT:
-			state = "account";
-			break;
-		case GFS2_QUOTA_ON:
-			state = "on";
-			break;
-		default:
-			state = "unknown";
-			break;
-		}
-		seq_printf(s, ",quota=%s", state);
-	}
-	if (args->ar_suiddir)
-		seq_printf(s, ",suiddir");
-	if (args->ar_data != GFS2_DATA_DEFAULT) {
-		char *state;
-		switch (args->ar_data) {
-		case GFS2_DATA_WRITEBACK:
-			state = "writeback";
-			break;
-		case GFS2_DATA_ORDERED:
-			state = "ordered";
-			break;
-		default:
-			state = "unknown";
-			break;
-		}
-		seq_printf(s, ",data=%s", state);
-	}
-	if (args->ar_discard)
-		seq_printf(s, ",discard");
-	lfsecs = sdp->sd_tune.gt_log_flush_secs;
-	if (lfsecs != 60)
-		seq_printf(s, ",commit=%d", lfsecs);
-	return 0;
-}
-
-/*
- * We have to (at the moment) hold the inodes main lock to cover
- * the gap between unlocking the shared lock on the iopen lock and
- * taking the exclusive lock. I'd rather do a shared -> exclusive
- * conversion on the iopen lock, but we can change that later. This
- * is safe, just less efficient.
- */
-
-static void gfs2_delete_inode(struct inode *inode)
-{
-	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
-	int error;
-
-	if (!test_bit(GIF_USER, &ip->i_flags))
-		goto out;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (unlikely(error)) {
-		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-		goto out;
-	}
-
-	gfs2_glock_dq_wait(&ip->i_iopen_gh);
-	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
-	error = gfs2_glock_nq(&ip->i_iopen_gh);
-	if (error)
-		goto out_truncate;
-
-	if (S_ISDIR(inode->i_mode) &&
-	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
-		error = gfs2_dir_exhash_dealloc(ip);
-		if (error)
-			goto out_unlock;
-	}
-
-	if (ip->i_eattr) {
-		error = gfs2_ea_dealloc(ip);
-		if (error)
-			goto out_unlock;
-	}
-
-	if (!gfs2_is_stuffed(ip)) {
-		error = gfs2_file_dealloc(ip);
-		if (error)
-			goto out_unlock;
-	}
-
-	error = gfs2_dinode_dealloc(ip);
-	if (error)
-		goto out_unlock;
-
-out_truncate:
-	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
-	if (error)
-		goto out_unlock;
-	/* Needs to be done before glock release & also in a transaction */
-	truncate_inode_pages(&inode->i_data, 0);
-	gfs2_trans_end(sdp);
-
-out_unlock:
-	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
-		gfs2_glock_dq(&ip->i_iopen_gh);
-	gfs2_holder_uninit(&ip->i_iopen_gh);
-	gfs2_glock_dq_uninit(&gh);
-	if (error && error != GLR_TRYFAILED && error != -EROFS)
-		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
-out:
-	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
-}
-
-static struct inode *gfs2_alloc_inode(struct super_block *sb)
-{
-	struct gfs2_inode *ip;
-
-	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
-	if (ip) {
-		ip->i_flags = 0;
-		ip->i_gl = NULL;
-	}
-	return &ip->i_inode;
-}
-
-static void gfs2_destroy_inode(struct inode *inode)
-{
-	kmem_cache_free(gfs2_inode_cachep, inode);
-}
-
-const struct super_operations gfs2_super_ops = {
-	.alloc_inode		= gfs2_alloc_inode,
-	.destroy_inode		= gfs2_destroy_inode,
-	.write_inode		= gfs2_write_inode,
-	.delete_inode		= gfs2_delete_inode,
-	.put_super		= gfs2_put_super,
-	.write_super		= gfs2_write_super,
-	.sync_fs		= gfs2_sync_fs,
-	.freeze_fs 		= gfs2_freeze,
-	.unfreeze_fs		= gfs2_unfreeze,
-	.statfs			= gfs2_statfs,
-	.remount_fs		= gfs2_remount_fs,
-	.clear_inode		= gfs2_clear_inode,
-	.drop_inode		= gfs2_drop_inode,
-	.show_options		= gfs2_show_options,
-};
-
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 601913e..40bcc37 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -7,14 +7,20 @@
  * of the GNU General Public License version 2.
  */
 
+#include <linux/bio.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
-#include <linux/crc32.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
-#include <linux/bio.h>
+#include <linux/crc32.h>
+#include <linux/time.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -31,6 +37,183 @@
 #include "super.h"
 #include "trans.h"
 #include "util.h"
+#include "sys.h"
+#include "eattr.h"
+
+#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
+
+enum {
+	Opt_lockproto,
+	Opt_locktable,
+	Opt_hostdata,
+	Opt_spectator,
+	Opt_ignore_local_fs,
+	Opt_localflocks,
+	Opt_localcaching,
+	Opt_debug,
+	Opt_nodebug,
+	Opt_upgrade,
+	Opt_acl,
+	Opt_noacl,
+	Opt_quota_off,
+	Opt_quota_account,
+	Opt_quota_on,
+	Opt_quota,
+	Opt_noquota,
+	Opt_suiddir,
+	Opt_nosuiddir,
+	Opt_data_writeback,
+	Opt_data_ordered,
+	Opt_meta,
+	Opt_discard,
+	Opt_nodiscard,
+	Opt_commit,
+	Opt_error,
+};
+
+static const match_table_t tokens = {
+	{Opt_lockproto, "lockproto=%s"},
+	{Opt_locktable, "locktable=%s"},
+	{Opt_hostdata, "hostdata=%s"},
+	{Opt_spectator, "spectator"},
+	{Opt_ignore_local_fs, "ignore_local_fs"},
+	{Opt_localflocks, "localflocks"},
+	{Opt_localcaching, "localcaching"},
+	{Opt_debug, "debug"},
+	{Opt_nodebug, "nodebug"},
+	{Opt_upgrade, "upgrade"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_quota_off, "quota=off"},
+	{Opt_quota_account, "quota=account"},
+	{Opt_quota_on, "quota=on"},
+	{Opt_quota, "quota"},
+	{Opt_noquota, "noquota"},
+	{Opt_suiddir, "suiddir"},
+	{Opt_nosuiddir, "nosuiddir"},
+	{Opt_data_writeback, "data=writeback"},
+	{Opt_data_ordered, "data=ordered"},
+	{Opt_meta, "meta"},
+	{Opt_discard, "discard"},
+	{Opt_nodiscard, "nodiscard"},
+	{Opt_commit, "commit=%d"},
+	{Opt_error, NULL}
+};
+
+/**
+ * gfs2_mount_args - Parse mount options
+ * @sdp:
+ * @data:
+ *
+ * Return: errno
+ */
+
+int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
+{
+	char *o;
+	int token;
+	substring_t tmp[MAX_OPT_ARGS];
+	int rv;
+
+	/* Split the options into tokens with the "," character and
+	   process them */
+
+	while (1) {
+		o = strsep(&options, ",");
+		if (o == NULL)
+			break;
+		if (*o == '\0')
+			continue;
+
+		token = match_token(o, tokens, tmp);
+		switch (token) {
+		case Opt_lockproto:
+			match_strlcpy(args->ar_lockproto, &tmp[0],
+				      GFS2_LOCKNAME_LEN);
+			break;
+		case Opt_locktable:
+			match_strlcpy(args->ar_locktable, &tmp[0],
+				      GFS2_LOCKNAME_LEN);
+			break;
+		case Opt_hostdata:
+			match_strlcpy(args->ar_hostdata, &tmp[0],
+				      GFS2_LOCKNAME_LEN);
+			break;
+		case Opt_spectator:
+			args->ar_spectator = 1;
+			break;
+		case Opt_ignore_local_fs:
+			args->ar_ignore_local_fs = 1;
+			break;
+		case Opt_localflocks:
+			args->ar_localflocks = 1;
+			break;
+		case Opt_localcaching:
+			args->ar_localcaching = 1;
+			break;
+		case Opt_debug:
+			args->ar_debug = 1;
+			break;
+		case Opt_nodebug:
+			args->ar_debug = 0;
+			break;
+		case Opt_upgrade:
+			args->ar_upgrade = 1;
+			break;
+		case Opt_acl:
+			args->ar_posix_acl = 1;
+			break;
+		case Opt_noacl:
+			args->ar_posix_acl = 0;
+			break;
+		case Opt_quota_off:
+		case Opt_noquota:
+			args->ar_quota = GFS2_QUOTA_OFF;
+			break;
+		case Opt_quota_account:
+			args->ar_quota = GFS2_QUOTA_ACCOUNT;
+			break;
+		case Opt_quota_on:
+		case Opt_quota:
+			args->ar_quota = GFS2_QUOTA_ON;
+			break;
+		case Opt_suiddir:
+			args->ar_suiddir = 1;
+			break;
+		case Opt_nosuiddir:
+			args->ar_suiddir = 0;
+			break;
+		case Opt_data_writeback:
+			args->ar_data = GFS2_DATA_WRITEBACK;
+			break;
+		case Opt_data_ordered:
+			args->ar_data = GFS2_DATA_ORDERED;
+			break;
+		case Opt_meta:
+			args->ar_meta = 1;
+			break;
+		case Opt_discard:
+			args->ar_discard = 1;
+			break;
+		case Opt_nodiscard:
+			args->ar_discard = 0;
+			break;
+		case Opt_commit:
+			rv = match_int(&tmp[0], &args->ar_commit);
+			if (rv || args->ar_commit <= 0) {
+				fs_info(sdp, "commit mount option requires a positive numeric argument\n");
+				return rv ? rv : -EINVAL;
+			}
+			break;
+		case Opt_error:
+		default:
+			fs_info(sdp, "invalid mount option: %s\n", o);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
 
 /**
  * gfs2_jindex_free - Clear all the journal index information
@@ -436,3 +619,719 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
 	mutex_unlock(&sdp->sd_freeze_lock);
 }
 
+
+/**
+ * gfs2_write_inode - Make sure the inode is stable on the disk
+ * @inode: The inode
+ * @sync: synchronous write flag
+ *
+ * Returns: errno
+ */
+
+static int gfs2_write_inode(struct inode *inode, int sync)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_holder gh;
+	struct buffer_head *bh;
+	struct timespec atime;
+	struct gfs2_dinode *di;
+	int ret = 0;
+
+	/* Check this is a "normal" inode, etc */
+	if (!test_bit(GIF_USER, &ip->i_flags) ||
+	    (current->flags & PF_MEMALLOC))
+		return 0;
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (ret)
+		goto do_flush;
+	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (ret)
+		goto do_unlock;
+	ret = gfs2_meta_inode_buffer(ip, &bh);
+	if (ret == 0) {
+		di = (struct gfs2_dinode *)bh->b_data;
+		atime.tv_sec = be64_to_cpu(di->di_atime);
+		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
+		if (timespec_compare(&inode->i_atime, &atime) > 0) {
+			gfs2_trans_add_bh(ip->i_gl, bh, 1);
+			gfs2_dinode_out(ip, bh->b_data);
+		}
+		brelse(bh);
+	}
+	gfs2_trans_end(sdp);
+do_unlock:
+	gfs2_glock_dq_uninit(&gh);
+do_flush:
+	if (sync != 0)
+		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+	return ret;
+}
+
+/**
+ * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+{
+	struct gfs2_holder t_gh;
+	int error;
+
+	gfs2_quota_sync(sdp);
+	gfs2_statfs_sync(sdp);
+
+	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+				   &t_gh);
+	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return error;
+
+	gfs2_meta_syncfs(sdp);
+	gfs2_log_shutdown(sdp);
+
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+	if (t_gh.gh_gl)
+		gfs2_glock_dq_uninit(&t_gh);
+
+	gfs2_quota_cleanup(sdp);
+
+	return error;
+}
+
+static int gfs2_umount_recovery_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+/**
+ * gfs2_put_super - Unmount the filesystem
+ * @sb: The VFS superblock
+ *
+ */
+
+static void gfs2_put_super(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	int error;
+	struct gfs2_jdesc *jd;
+
+	/*  Unfreeze the filesystem, if we need to  */
+
+	mutex_lock(&sdp->sd_freeze_lock);
+	if (sdp->sd_freeze_count)
+		gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+	mutex_unlock(&sdp->sd_freeze_lock);
+
+	/* No more recovery requests */
+	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
+	smp_mb();
+
+	/* Wait on outstanding recovery */
+restart:
+	spin_lock(&sdp->sd_jindex_spin);
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
+			continue;
+		spin_unlock(&sdp->sd_jindex_spin);
+		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
+			    gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+		goto restart;
+	}
+	spin_unlock(&sdp->sd_jindex_spin);
+
+	kthread_stop(sdp->sd_quotad_process);
+	kthread_stop(sdp->sd_logd_process);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		error = gfs2_make_fs_ro(sdp);
+		if (error)
+			gfs2_io_error(sdp);
+	}
+	/*  At this point, we're through modifying the disk  */
+
+	/*  Release stuff  */
+
+	iput(sdp->sd_jindex);
+	iput(sdp->sd_inum_inode);
+	iput(sdp->sd_statfs_inode);
+	iput(sdp->sd_rindex);
+	iput(sdp->sd_quota_inode);
+
+	gfs2_glock_put(sdp->sd_rename_gl);
+	gfs2_glock_put(sdp->sd_trans_gl);
+
+	if (!sdp->sd_args.ar_spectator) {
+		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
+		iput(sdp->sd_ir_inode);
+		iput(sdp->sd_sc_inode);
+		iput(sdp->sd_qc_inode);
+	}
+
+	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
+	gfs2_clear_rgrpd(sdp);
+	gfs2_jindex_free(sdp);
+	/*  Take apart glock structures and buffer lists  */
+	gfs2_gl_hash_clear(sdp);
+	/*  Unmount the locking protocol  */
+	gfs2_lm_unmount(sdp);
+
+	/*  At this point, we're through participating in the lockspace  */
+	gfs2_sys_fs_del(sdp);
+}
+
+/**
+ * gfs2_write_super
+ * @sb: the superblock
+ *
+ */
+
+static void gfs2_write_super(struct super_block *sb)
+{
+	sb->s_dirt = 0;
+}
+
+/**
+ * gfs2_sync_fs - sync the filesystem
+ * @sb: the superblock
+ *
+ * Flushes the log to disk.
+ */
+
+static int gfs2_sync_fs(struct super_block *sb, int wait)
+{
+	sb->s_dirt = 0;
+	if (wait && sb->s_fs_info)
+		gfs2_log_flush(sb->s_fs_info, NULL);
+	return 0;
+}
+
+/**
+ * gfs2_freeze - prevent further writes to the filesystem
+ * @sb: the VFS structure for the filesystem
+ *
+ */
+
+static int gfs2_freeze(struct super_block *sb)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	int error;
+
+	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return -EINVAL;
+
+	for (;;) {
+		error = gfs2_freeze_fs(sdp);
+		if (!error)
+			break;
+
+		switch (error) {
+		case -EBUSY:
+			fs_err(sdp, "waiting for recovery before freeze\n");
+			break;
+
+		default:
+			fs_err(sdp, "error freezing FS: %d\n", error);
+			break;
+		}
+
+		fs_err(sdp, "retrying...\n");
+		msleep(1000);
+	}
+	return 0;
+}
+
+/**
+ * gfs2_unfreeze - reallow writes to the filesystem
+ * @sb: the VFS structure for the filesystem
+ *
+ */
+
+static int gfs2_unfreeze(struct super_block *sb)
+{
+	gfs2_unfreeze_fs(sb->s_fs_info);
+	return 0;
+}
+
+/**
+ * statfs_fill - fill in the sg for a given RG
+ * @rgd: the RG
+ * @sc: the sc structure
+ *
+ * Returns: 0 on success, -ESTALE if the LVB is invalid
+ */
+
+static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
+			    struct gfs2_statfs_change_host *sc)
+{
+	gfs2_rgrp_verify(rgd);
+	sc->sc_total += rgd->rd_data;
+	sc->sc_free += rgd->rd_free;
+	sc->sc_dinodes += rgd->rd_dinodes;
+	return 0;
+}
+
+/**
+ * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
+ * @sdp: the filesystem
+ * @sc: the sc info that will be returned
+ *
+ * Any error (other than a signal) will cause this routine to fall back
+ * to the synchronous version.
+ *
+ * FIXME: This really shouldn't busy wait like this.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+	struct gfs2_holder ri_gh;
+	struct gfs2_rgrpd *rgd_next;
+	struct gfs2_holder *gha, *gh;
+	unsigned int slots = 64;
+	unsigned int x;
+	int done;
+	int error = 0, err;
+
+	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
+	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+	if (!gha)
+		return -ENOMEM;
+
+	error = gfs2_rindex_hold(sdp, &ri_gh);
+	if (error)
+		goto out;
+
+	rgd_next = gfs2_rgrpd_get_first(sdp);
+
+	for (;;) {
+		done = 1;
+
+		for (x = 0; x < slots; x++) {
+			gh = gha + x;
+
+			if (gh->gh_gl && gfs2_glock_poll(gh)) {
+				err = gfs2_glock_wait(gh);
+				if (err) {
+					gfs2_holder_uninit(gh);
+					error = err;
+				} else {
+					if (!error)
+						error = statfs_slow_fill(
+							gh->gh_gl->gl_object, sc);
+					gfs2_glock_dq_uninit(gh);
+				}
+			}
+
+			if (gh->gh_gl)
+				done = 0;
+			else if (rgd_next && !error) {
+				error = gfs2_glock_nq_init(rgd_next->rd_gl,
+							   LM_ST_SHARED,
+							   GL_ASYNC,
+							   gh);
+				rgd_next = gfs2_rgrpd_get_next(rgd_next);
+				done = 0;
+			}
+
+			if (signal_pending(current))
+				error = -ERESTARTSYS;
+		}
+
+		if (done)
+			break;
+
+		yield();
+	}
+
+	gfs2_glock_dq_uninit(&ri_gh);
+
+out:
+	kfree(gha);
+	return error;
+}
+
+/**
+ * gfs2_statfs_i - Do a statfs
+ * @sdp: the filesystem
+ * @sg: the sg structure
+ *
+ * Returns: errno
+ */
+
+static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
+{
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+
+	spin_lock(&sdp->sd_statfs_spin);
+
+	*sc = *m_sc;
+	sc->sc_total += l_sc->sc_total;
+	sc->sc_free += l_sc->sc_free;
+	sc->sc_dinodes += l_sc->sc_dinodes;
+
+	spin_unlock(&sdp->sd_statfs_spin);
+
+	if (sc->sc_free < 0)
+		sc->sc_free = 0;
+	if (sc->sc_free > sc->sc_total)
+		sc->sc_free = sc->sc_total;
+	if (sc->sc_dinodes < 0)
+		sc->sc_dinodes = 0;
+
+	return 0;
+}
+
+/**
+ * gfs2_statfs - Gather and return stats about the filesystem
+ * @sb: The superblock
+ * @statfsbuf: The buffer
+ *
+ * Returns: 0 on success or error code
+ */
+
+static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_inode->i_sb;
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	struct gfs2_statfs_change_host sc;
+	int error;
+
+	if (gfs2_tune_get(sdp, gt_statfs_slow))
+		error = gfs2_statfs_slow(sdp, &sc);
+	else
+		error = gfs2_statfs_i(sdp, &sc);
+
+	if (error)
+		return error;
+
+	buf->f_type = GFS2_MAGIC;
+	buf->f_bsize = sdp->sd_sb.sb_bsize;
+	buf->f_blocks = sc.sc_total;
+	buf->f_bfree = sc.sc_free;
+	buf->f_bavail = sc.sc_free;
+	buf->f_files = sc.sc_dinodes + sc.sc_free;
+	buf->f_ffree = sc.sc_free;
+	buf->f_namelen = GFS2_FNAMESIZE;
+
+	return 0;
+}
+
+/**
+ * gfs2_remount_fs - called when the FS is remounted
+ * @sb:  the filesystem
+ * @flags:  the remount flags
+ * @data:  extra data passed in (not used right now)
+ *
+ * Returns: errno
+ */
+
+static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	struct gfs2_args args = sdp->sd_args; /* Default to current settings */
+	struct gfs2_tune *gt = &sdp->sd_tune;
+	int error;
+
+	spin_lock(&gt->gt_spin);
+	args.ar_commit = gt->gt_log_flush_secs;
+	spin_unlock(&gt->gt_spin);
+	error = gfs2_mount_args(sdp, &args, data);
+	if (error)
+		return error;
+
+	/* Not allowed to change locking details */
+	if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
+	    strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
+	    strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
+		return -EINVAL;
+
+	/* Some flags must not be changed */
+	if (args_neq(&args, &sdp->sd_args, spectator) ||
+	    args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
+	    args_neq(&args, &sdp->sd_args, localflocks) ||
+	    args_neq(&args, &sdp->sd_args, localcaching) ||
+	    args_neq(&args, &sdp->sd_args, meta))
+		return -EINVAL;
+
+	if (sdp->sd_args.ar_spectator)
+		*flags |= MS_RDONLY;
+
+	if ((sb->s_flags ^ *flags) & MS_RDONLY) {
+		if (*flags & MS_RDONLY)
+			error = gfs2_make_fs_ro(sdp);
+		else
+			error = gfs2_make_fs_rw(sdp);
+		if (error)
+			return error;
+	}
+
+	sdp->sd_args = args;
+	if (sdp->sd_args.ar_posix_acl)
+		sb->s_flags |= MS_POSIXACL;
+	else
+		sb->s_flags &= ~MS_POSIXACL;
+	spin_lock(&gt->gt_spin);
+	gt->gt_log_flush_secs = args.ar_commit;
+	spin_unlock(&gt->gt_spin);
+
+	return 0;
+}
+
+/**
+ * gfs2_drop_inode - Drop an inode (test for remote unlink)
+ * @inode: The inode to drop
+ *
+ * If we've received a callback on an iopen lock then its because a
+ * remote node tried to deallocate the inode but failed due to this node
+ * still having the inode open. Here we mark the link count zero
+ * since we know that it must have reached zero if the GLF_DEMOTE flag
+ * is set on the iopen glock. If we didn't do a disk read since the
+ * remote node removed the final link then we might otherwise miss
+ * this event. This check ensures that this node will deallocate the
+ * inode's blocks, or alternatively pass the baton on to another
+ * node for later deallocation.
+ */
+
+static void gfs2_drop_inode(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) {
+		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
+			clear_nlink(inode);
+	}
+	generic_drop_inode(inode);
+}
+
+/**
+ * gfs2_clear_inode - Deallocate an inode when VFS is done with it
+ * @inode: The VFS inode
+ *
+ */
+
+static void gfs2_clear_inode(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	/* This tells us its a "real" inode and not one which only
+	 * serves to contain an address space (see rgrp.c, meta_io.c)
+	 * which therefore doesn't have its own glocks.
+	 */
+	if (test_bit(GIF_USER, &ip->i_flags)) {
+		ip->i_gl->gl_object = NULL;
+		gfs2_glock_put(ip->i_gl);
+		ip->i_gl = NULL;
+		if (ip->i_iopen_gh.gh_gl) {
+			ip->i_iopen_gh.gh_gl->gl_object = NULL;
+			gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		}
+	}
+}
+
+static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
+{
+	do {
+		if (d1 == d2)
+			return 1;
+		d1 = d1->d_parent;
+	} while (!IS_ROOT(d1));
+	return 0;
+}
+
+/**
+ * gfs2_show_options - Show mount options for /proc/mounts
+ * @s: seq_file structure
+ * @mnt: vfsmount
+ *
+ * Returns: 0 on success or error code
+ */
+
+static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+{
+	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
+	struct gfs2_args *args = &sdp->sd_args;
+	int lfsecs;
+
+	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+		seq_printf(s, ",meta");
+	if (args->ar_lockproto[0])
+		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
+	if (args->ar_locktable[0])
+		seq_printf(s, ",locktable=%s", args->ar_locktable);
+	if (args->ar_hostdata[0])
+		seq_printf(s, ",hostdata=%s", args->ar_hostdata);
+	if (args->ar_spectator)
+		seq_printf(s, ",spectator");
+	if (args->ar_ignore_local_fs)
+		seq_printf(s, ",ignore_local_fs");
+	if (args->ar_localflocks)
+		seq_printf(s, ",localflocks");
+	if (args->ar_localcaching)
+		seq_printf(s, ",localcaching");
+	if (args->ar_debug)
+		seq_printf(s, ",debug");
+	if (args->ar_upgrade)
+		seq_printf(s, ",upgrade");
+	if (args->ar_posix_acl)
+		seq_printf(s, ",acl");
+	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
+		char *state;
+		switch (args->ar_quota) {
+		case GFS2_QUOTA_OFF:
+			state = "off";
+			break;
+		case GFS2_QUOTA_ACCOUNT:
+			state = "account";
+			break;
+		case GFS2_QUOTA_ON:
+			state = "on";
+			break;
+		default:
+			state = "unknown";
+			break;
+		}
+		seq_printf(s, ",quota=%s", state);
+	}
+	if (args->ar_suiddir)
+		seq_printf(s, ",suiddir");
+	if (args->ar_data != GFS2_DATA_DEFAULT) {
+		char *state;
+		switch (args->ar_data) {
+		case GFS2_DATA_WRITEBACK:
+			state = "writeback";
+			break;
+		case GFS2_DATA_ORDERED:
+			state = "ordered";
+			break;
+		default:
+			state = "unknown";
+			break;
+		}
+		seq_printf(s, ",data=%s", state);
+	}
+	if (args->ar_discard)
+		seq_printf(s, ",discard");
+	lfsecs = sdp->sd_tune.gt_log_flush_secs;
+	if (lfsecs != 60)
+		seq_printf(s, ",commit=%d", lfsecs);
+	return 0;
+}
+
+/*
+ * We have to (at the moment) hold the inodes main lock to cover
+ * the gap between unlocking the shared lock on the iopen lock and
+ * taking the exclusive lock. I'd rather do a shared -> exclusive
+ * conversion on the iopen lock, but we can change that later. This
+ * is safe, just less efficient.
+ */
+
+static void gfs2_delete_inode(struct inode *inode)
+{
+	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int error;
+
+	if (!test_bit(GIF_USER, &ip->i_flags))
+		goto out;
+
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (unlikely(error)) {
+		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		goto out;
+	}
+
+	gfs2_glock_dq_wait(&ip->i_iopen_gh);
+	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
+	error = gfs2_glock_nq(&ip->i_iopen_gh);
+	if (error)
+		goto out_truncate;
+
+	if (S_ISDIR(inode->i_mode) &&
+	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
+		error = gfs2_dir_exhash_dealloc(ip);
+		if (error)
+			goto out_unlock;
+	}
+
+	if (ip->i_eattr) {
+		error = gfs2_ea_dealloc(ip);
+		if (error)
+			goto out_unlock;
+	}
+
+	if (!gfs2_is_stuffed(ip)) {
+		error = gfs2_file_dealloc(ip);
+		if (error)
+			goto out_unlock;
+	}
+
+	error = gfs2_dinode_dealloc(ip);
+	if (error)
+		goto out_unlock;
+
+out_truncate:
+	error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
+	if (error)
+		goto out_unlock;
+	/* Needs to be done before glock release & also in a transaction */
+	truncate_inode_pages(&inode->i_data, 0);
+	gfs2_trans_end(sdp);
+
+out_unlock:
+	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+		gfs2_glock_dq(&ip->i_iopen_gh);
+	gfs2_holder_uninit(&ip->i_iopen_gh);
+	gfs2_glock_dq_uninit(&gh);
+	if (error && error != GLR_TRYFAILED && error != -EROFS)
+		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
+out:
+	truncate_inode_pages(&inode->i_data, 0);
+	clear_inode(inode);
+}
+
+static struct inode *gfs2_alloc_inode(struct super_block *sb)
+{
+	struct gfs2_inode *ip;
+
+	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
+	if (ip) {
+		ip->i_flags = 0;
+		ip->i_gl = NULL;
+	}
+	return &ip->i_inode;
+}
+
+static void gfs2_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(gfs2_inode_cachep, inode);
+}
+
+const struct super_operations gfs2_super_ops = {
+	.alloc_inode		= gfs2_alloc_inode,
+	.destroy_inode		= gfs2_destroy_inode,
+	.write_inode		= gfs2_write_inode,
+	.delete_inode		= gfs2_delete_inode,
+	.put_super		= gfs2_put_super,
+	.write_super		= gfs2_write_super,
+	.sync_fs		= gfs2_sync_fs,
+	.freeze_fs 		= gfs2_freeze,
+	.unfreeze_fs		= gfs2_unfreeze,
+	.statfs			= gfs2_statfs,
+	.remount_fs		= gfs2_remount_fs,
+	.clear_inode		= gfs2_clear_inode,
+	.drop_inode		= gfs2_drop_inode,
+	.show_options		= gfs2_show_options,
+};
+
-- 
cgit v0.10.2


From 2286dbfad1fb622ee2691537e5caaedee4618860 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 22 May 2009 10:45:09 +0100
Subject: GFS2: Move gfs2_rmdiri into ops_inode.c

Move gfs2_rmdiri() into ops_inode.c and make it static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c03a1a3..9b17447 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1046,58 +1046,6 @@ fail:
 	return ERR_PTR(error);
 }
 
-/**
- * gfs2_rmdiri - Remove a directory
- * @dip: The parent directory of the directory to be removed
- * @name: The name of the directory to be removed
- * @ip: The GFS2 inode of the directory to be removed
- *
- * Assumes Glocks on dip and ip are held
- *
- * Returns: errno
- */
-
-int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-		struct gfs2_inode *ip)
-{
-	struct qstr dotname;
-	int error;
-
-	if (ip->i_entries != 2) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-
-	error = gfs2_dir_del(dip, name);
-	if (error)
-		return error;
-
-	error = gfs2_change_nlink(dip, -1);
-	if (error)
-		return error;
-
-	gfs2_str2qstr(&dotname, ".");
-	error = gfs2_dir_del(ip, &dotname);
-	if (error)
-		return error;
-
-	gfs2_str2qstr(&dotname, "..");
-	error = gfs2_dir_del(ip, &dotname);
-	if (error)
-		return error;
-
-	/* It looks odd, but it really should be done twice */
-	error = gfs2_change_nlink(ip, -1);
-	if (error)
-		return error;
-
-	error = gfs2_change_nlink(ip, -1);
-	if (error)
-		return error;
-
-	return error;
-}
 
 /*
  * gfs2_unlink_ok - check to see that a inode is still in a directory
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 2c3ec07..6cd3928 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -96,8 +96,6 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
 				  const struct qstr *name,
 				  unsigned int mode, dev_t dev);
-extern int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-		       struct gfs2_inode *ip);
 extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 			  const struct gfs2_inode *ip);
 extern int gfs2_permission(struct inode *inode, int mask);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1c70fa5..5dacd64 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -473,6 +473,59 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 }
 
 /**
+ * gfs2_rmdiri - Remove a directory
+ * @dip: The parent directory of the directory to be removed
+ * @name: The name of the directory to be removed
+ * @ip: The GFS2 inode of the directory to be removed
+ *
+ * Assumes Glocks on dip and ip are held
+ *
+ * Returns: errno
+ */
+
+static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
+		       struct gfs2_inode *ip)
+{
+	struct qstr dotname;
+	int error;
+
+	if (ip->i_entries != 2) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(ip);
+		return -EIO;
+	}
+
+	error = gfs2_dir_del(dip, name);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(dip, -1);
+	if (error)
+		return error;
+
+	gfs2_str2qstr(&dotname, ".");
+	error = gfs2_dir_del(ip, &dotname);
+	if (error)
+		return error;
+
+	gfs2_str2qstr(&dotname, "..");
+	error = gfs2_dir_del(ip, &dotname);
+	if (error)
+		return error;
+
+	/* It looks odd, but it really should be done twice */
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	return error;
+}
+
+/**
  * gfs2_rmdir - Remove a directory
  * @dir: The parent directory of the directory to be removed
  * @dentry: The dentry of the directory to remove
-- 
cgit v0.10.2


From 536baf02f650f4547f105386878b4736fbc181e8 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 22 May 2009 10:48:59 +0100
Subject: GFS2: Move gfs2_readlinki into ops_inode.c

Move gfs2_readlinki into ops_inode.c and make it static

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9b17447..676e750 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1085,63 +1085,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	return 0;
 }
 
-/**
- * gfs2_readlinki - return the contents of a symlink
- * @ip: the symlink's inode
- * @buf: a pointer to the buffer to be filled
- * @len: a pointer to the length of @buf
- *
- * If @buf is too small, a piece of memory is kmalloc()ed and needs
- * to be freed by the caller.
- *
- * Returns: errno
- */
-
-int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
-{
-	struct gfs2_holder i_gh;
-	struct buffer_head *dibh;
-	unsigned int x;
-	int error;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-	error = gfs2_glock_nq(&i_gh);
-	if (error) {
-		gfs2_holder_uninit(&i_gh);
-		return error;
-	}
-
-	if (!ip->i_disksize) {
-		gfs2_consist_inode(ip);
-		error = -EIO;
-		goto out;
-	}
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		goto out;
-
-	x = ip->i_disksize + 1;
-	if (x > *len) {
-		*buf = kmalloc(x, GFP_NOFS);
-		if (!*buf) {
-			error = -ENOMEM;
-			goto out_brelse;
-		}
-	}
-
-	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
-	*len = x;
-
-out_brelse:
-	brelse(dibh);
-out:
-	gfs2_glock_dq_uninit(&i_gh);
-	return error;
-}
-
-static int
-__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
 	struct buffer_head *dibh;
 	int error;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6cd3928..fc9a08f 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,6 @@ extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
 extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 			  const struct gfs2_inode *ip);
 extern int gfs2_permission(struct inode *inode, int mask);
-extern int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 5dacd64..f607f09 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -938,6 +938,61 @@ out:
 }
 
 /**
+ * gfs2_readlinki - return the contents of a symlink
+ * @ip: the symlink's inode
+ * @buf: a pointer to the buffer to be filled
+ * @len: a pointer to the length of @buf
+ *
+ * If @buf is too small, a piece of memory is kmalloc()ed and needs
+ * to be freed by the caller.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
+{
+	struct gfs2_holder i_gh;
+	struct buffer_head *dibh;
+	unsigned int x;
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	error = gfs2_glock_nq(&i_gh);
+	if (error) {
+		gfs2_holder_uninit(&i_gh);
+		return error;
+	}
+
+	if (!ip->i_disksize) {
+		gfs2_consist_inode(ip);
+		error = -EIO;
+		goto out;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		goto out;
+
+	x = ip->i_disksize + 1;
+	if (x > *len) {
+		*buf = kmalloc(x, GFP_NOFS);
+		if (!*buf) {
+			error = -ENOMEM;
+			goto out_brelse;
+		}
+	}
+
+	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
+	*len = x;
+
+out_brelse:
+	brelse(dibh);
+out:
+	gfs2_glock_dq_uninit(&i_gh);
+	return error;
+}
+
+/**
  * gfs2_readlink - Read the value of a symlink
  * @dentry: the symlink
  * @buf: the buffer to read the symlink data into
-- 
cgit v0.10.2


From 87ec21741138bb42e7f943bb142b1d8567c10925 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 22 May 2009 10:54:50 +0100
Subject: GFS2: Move gfs2_unlink_ok into ops_inode.c

Another function which is only called from one ops_inode.c so
we move it and make it static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 676e750..2f94bd7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1046,45 +1046,6 @@ fail:
 	return ERR_PTR(error);
 }
 
-
-/*
- * gfs2_unlink_ok - check to see that a inode is still in a directory
- * @dip: the directory
- * @name: the name of the file
- * @ip: the inode
- *
- * Assumes that the lock on (at least) @dip is held.
- *
- * Returns: 0 if the parent/child relationship is correct, errno if it isn't
- */
-
-int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-		   const struct gfs2_inode *ip)
-{
-	int error;
-
-	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
-		return -EPERM;
-
-	if ((dip->i_inode.i_mode & S_ISVTX) &&
-	    dip->i_inode.i_uid != current_fsuid() &&
-	    ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
-		return -EPERM;
-
-	if (IS_APPEND(&dip->i_inode))
-		return -EPERM;
-
-	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
-	if (error)
-		return error;
-
-	error = gfs2_dir_check(&dip->i_inode, name, ip);
-	if (error)
-		return error;
-
-	return 0;
-}
-
 static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
 	struct buffer_head *dibh;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index fc9a08f..c341aaf 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -96,8 +96,6 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
 				  const struct qstr *name,
 				  unsigned int mode, dev_t dev);
-extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-			  const struct gfs2_inode *ip);
 extern int gfs2_permission(struct inode *inode, int mask);
 extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f607f09..f8bd20b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -262,6 +262,44 @@ out_parent:
 	return error;
 }
 
+/*
+ * gfs2_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
+ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+
+static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
+			  const struct gfs2_inode *ip)
+{
+	int error;
+
+	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
+		return -EPERM;
+
+	if ((dip->i_inode.i_mode & S_ISVTX) &&
+	    dip->i_inode.i_uid != current_fsuid() &&
+	    ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (IS_APPEND(&dip->i_inode))
+		return -EPERM;
+
+	error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+	if (error)
+		return error;
+
+	error = gfs2_dir_check(&dip->i_inode, name, ip);
+	if (error)
+		return error;
+
+	return 0;
+}
+
 /**
  * gfs2_unlink - Unlink a file
  * @dir: The inode of the directory containing the file to unlink
-- 
cgit v0.10.2


From 0e1b74df992c1ef92213ab26f952befda2087f59 Mon Sep 17 00:00:00 2001
From: Mingwei Wang <mwwang@marvell.com>
Date: Wed, 20 May 2009 16:49:57 +0800
Subject: [ARM] pxa: fix the incorrectly defined drive strength macros for
 pxa{168,910}

Signed-off-by: Mingwei Wang <mwwang@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
index d0bdb6e..2e91464 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
@@ -3,6 +3,11 @@
 
 #include <mach/mfp.h>
 
+#define MFP_DRIVE_VERY_SLOW	(0x0 << 13)
+#define MFP_DRIVE_SLOW		(0x1 << 13)
+#define MFP_DRIVE_MEDIUM	(0x2 << 13)
+#define MFP_DRIVE_FAST		(0x3 << 13)
+
 /* GPIO */
 #define GPIO0_GPIO		MFP_CFG(GPIO0, AF5)
 #define GPIO1_GPIO		MFP_CFG(GPIO1, AF5)
diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
index 48a1cbc..d97de36 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
@@ -3,6 +3,11 @@
 
 #include <mach/mfp.h>
 
+#define MFP_DRIVE_VERY_SLOW	(0x0 << 13)
+#define MFP_DRIVE_SLOW		(0x2 << 13)
+#define MFP_DRIVE_MEDIUM	(0x4 << 13)
+#define MFP_DRIVE_FAST		(0x8 << 13)
+
 /* UART2 */
 #define GPIO47_UART2_RXD	MFP_CFG(GPIO47, AF6)
 #define GPIO48_UART2_TXD	MFP_CFG(GPIO48, AF6)
diff --git a/arch/arm/mach-mmp/include/mach/mfp.h b/arch/arm/mach-mmp/include/mach/mfp.h
index 277ea4c..62e510e 100644
--- a/arch/arm/mach-mmp/include/mach/mfp.h
+++ b/arch/arm/mach-mmp/include/mach/mfp.h
@@ -12,16 +12,13 @@
  * possible, we make the following compromise:
  *
  * 1. SLEEP_OE_N will always be programmed to '1' (by MFP_LPM_FLOAT)
- * 2. DRIVE strength definitions redefined to include the reserved bit10
+ * 2. DRIVE strength definitions redefined to include the reserved bit
+ *    - the reserved bit differs between pxa168 and pxa910, and the
+ *      MFP_DRIVE_* macros are individually defined in mfp-pxa{168,910}.h
  * 3. Override MFP_CFG() and MFP_CFG_DRV()
  * 4. Drop the use of MFP_CFG_LPM() and MFP_CFG_X()
  */
 
-#define MFP_DRIVE_VERY_SLOW	(0x0 << 13)
-#define MFP_DRIVE_SLOW		(0x2 << 13)
-#define MFP_DRIVE_MEDIUM	(0x4 << 13)
-#define MFP_DRIVE_FAST		(0x8 << 13)
-
 #undef MFP_CFG
 #undef MFP_CFG_DRV
 #undef MFP_CFG_LPM
-- 
cgit v0.10.2


From f5c81a327015844eb91087dd102648b5d984f33c Mon Sep 17 00:00:00 2001
From: Coly Li <coly.li@suse.de>
Date: Thu, 23 Apr 2009 03:04:45 +0800
Subject: [ARM] pxa: add parameter to clksrc_read() for pxa168/910

This patch modifies parameter of clksrc_read() from 'void' to
'struct clocksource *cs', which fixes compile warning for
incompatible parameter type.

Signed-off-by: Coly Li <coly.li@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index b03a6ed..a8400bb 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -136,7 +136,7 @@ static struct clock_event_device ckevt = {
 	.set_mode	= timer_set_mode,
 };
 
-static cycle_t clksrc_read(void)
+static cycle_t clksrc_read(struct clocksource *cs)
 {
 	return timer_read();
 }
-- 
cgit v0.10.2


From 6ec04f434d29aed33608e0ca4d8b100190e71e96 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Fri, 22 May 2009 01:39:10 +0200
Subject: [ARM] pxa/palm: fix PalmLD/T5/TX AC97 MFP

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 1cec180..471a853 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -62,6 +62,8 @@ static unsigned long palmld_pin_config[] __initdata = {
 	GPIO29_AC97_SDATA_IN_0,
 	GPIO30_AC97_SDATA_OUT,
 	GPIO31_AC97_SYNC,
+	GPIO89_AC97_SYSCLK,
+	GPIO95_AC97_nRESET,
 
 	/* IrDA */
 	GPIO108_GPIO,	/* ir disable */
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 3066236..05bf979 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -64,6 +64,7 @@ static unsigned long palmt5_pin_config[] __initdata = {
 	GPIO29_AC97_SDATA_IN_0,
 	GPIO30_AC97_SDATA_OUT,
 	GPIO31_AC97_SYNC,
+	GPIO89_AC97_SYSCLK,
 	GPIO95_AC97_nRESET,
 
 	/* IrDA */
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index e2d44b1..e99a893 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -65,6 +65,7 @@ static unsigned long palmtx_pin_config[] __initdata = {
 	GPIO29_AC97_SDATA_IN_0,
 	GPIO30_AC97_SDATA_OUT,
 	GPIO31_AC97_SYNC,
+	GPIO89_AC97_SYSCLK,
 	GPIO95_AC97_nRESET,
 
 	/* IrDA */
-- 
cgit v0.10.2


From a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 22 May 2009 14:17:31 +1000
Subject: perf_counter: Dynamically allocate tasks' perf_counter_context struct

This replaces the struct perf_counter_context in the task_struct with
a pointer to a dynamically allocated perf_counter_context struct.  The
main reason for doing is this is to allow us to transfer a
perf_counter_context from one task to another when we do lazy PMU
switching in a later patch.

This has a few side-benefits: the task_struct becomes a little smaller,
we save some memory because only tasks that have perf_counters attached
get a perf_counter_context allocated for them, and we can remove the
inclusion of <linux/perf_counter.h> in sched.h, meaning that we don't
end up recompiling nearly everything whenever perf_counter.h changes.

The perf_counter_context structures are reference-counted and freed
when the last reference is dropped.  A context can have references
from its task and the counters on its task.  Counters can outlive the
task so it is possible that a context will be freed well after its
task has exited.

Contexts are allocated on fork if the parent had a context, or
otherwise the first time that a per-task counter is created on a task.
In the latter case, we set the context pointer in the task struct
locklessly using an atomic compare-and-exchange operation in case we
raced with some other task in creating a context for the subject task.

This also removes the task pointer from the perf_counter struct.  The
task pointer was not used anywhere and would make it harder to move a
context from one task to another.  Anything that needed to know which
task a counter was attached to was already using counter->ctx->task.

The __perf_counter_init_context function moves up in perf_counter.c
so that it can be called from find_get_context, and now initializes
the refcount, but is otherwise unchanged.

We were potentially calling list_del_counter twice: once from
__perf_counter_exit_task when the task exits and once from
__perf_counter_remove_from_context when the counter's fd gets closed.
This adds a check in list_del_counter so it doesn't do anything if
the counter has already been removed from the lists.

Since perf_counter_task_sched_in doesn't do anything if the task doesn't
have a context, and leaves cpuctx->task_ctx = NULL, this adds code to
__perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in
the case where the current task adds the first counter to itself and
thus creates a context for itself.

This also adds similar code to __perf_counter_enable to handle a
similar situation which can arise when the counters have been disabled
using prctl; that also leaves cpuctx->task_ctx = NULL.

[ Impact: refactor counter context management to prepare for new feature ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e9021a9..b4f6440 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,6 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
+#include <linux/perf_counter.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 503afaa..d87247d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,18 +108,6 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-#ifdef CONFIG_PERF_COUNTERS
-# define INIT_PERF_COUNTERS(tsk)					\
-	.perf_counter_ctx.counter_list =				\
-		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
-	.perf_counter_ctx.event_list =					\
-		LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list),	\
-	.perf_counter_ctx.lock =					\
-		__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
-#else
-# define INIT_PERF_COUNTERS(tsk)
-#endif
-
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -183,7 +171,6 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
-	INIT_PERF_COUNTERS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f612941..0713090 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -449,7 +449,6 @@ struct perf_counter {
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
-	struct task_struct		*task;
 	struct file			*filp;
 
 	struct perf_counter		*parent;
@@ -498,7 +497,6 @@ struct perf_counter {
  * Used as a container for task counters and CPU counters as well:
  */
 struct perf_counter_context {
-#ifdef CONFIG_PERF_COUNTERS
 	/*
 	 * Protect the states of the counters in the list,
 	 * nr_active, and the list:
@@ -516,6 +514,7 @@ struct perf_counter_context {
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
+	atomic_t		refcount;
 	struct task_struct	*task;
 
 	/*
@@ -523,7 +522,6 @@ struct perf_counter_context {
 	 */
 	u64			time;
 	u64			timestamp;
-#endif
 };
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff59d12..9714d45 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,7 +71,6 @@ struct sched_param {
 #include <linux/path.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
-#include <linux/perf_counter.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
@@ -99,6 +98,7 @@ struct robust_list_head;
 struct bio;
 struct bts_tracer;
 struct fs_struct;
+struct perf_counter_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1387,7 +1387,9 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
-	struct perf_counter_context perf_counter_ctx;
+#ifdef CONFIG_PERF_COUNTERS
+	struct perf_counter_context *perf_counter_ctxp;
+#endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
diff --git a/kernel/exit.c b/kernel/exit.c
index f9dfedd..99ad406 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,6 +48,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
+#include <linux/perf_counter.h>
 #include <trace/sched.h>
 
 #include <asm/uaccess.h>
@@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
 #ifdef CONFIG_PERF_COUNTERS
-	WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+	WARN_ON_ONCE(tsk->perf_counter_ctxp);
 #endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index d32fef4..e72a09f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -63,6 +63,7 @@
 #include <linux/fs_struct.h>
 #include <trace/sched.h>
 #include <linux/magic.h>
+#include <linux/perf_counter.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 08584c1..06ea3ea 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -97,6 +97,17 @@ void perf_enable(void)
 		hw_perf_enable();
 }
 
+static void get_ctx(struct perf_counter_context *ctx)
+{
+	atomic_inc(&ctx->refcount);
+}
+
+static void put_ctx(struct perf_counter_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->refcount))
+		kfree(ctx);
+}
+
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -118,11 +129,17 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	ctx->nr_counters++;
 }
 
+/*
+ * Remove a counter from the lists for its context.
+ * Must be called with counter->mutex and ctx->mutex held.
+ */
 static void
 list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
 	struct perf_counter *sibling, *tmp;
 
+	if (list_empty(&counter->list_entry))
+		return;
 	ctx->nr_counters--;
 
 	list_del_init(&counter->list_entry);
@@ -216,8 +233,6 @@ static void __perf_counter_remove_from_context(void *info)
 
 	counter_sched_out(counter, cpuctx, ctx);
 
-	counter->task = NULL;
-
 	list_del_counter(counter, ctx);
 
 	if (!ctx->task) {
@@ -279,7 +294,6 @@ retry:
 	 */
 	if (!list_empty(&counter->list_entry)) {
 		list_del_counter(counter, ctx);
-		counter->task = NULL;
 	}
 	spin_unlock_irq(&ctx->lock);
 }
@@ -568,11 +582,17 @@ static void __perf_install_in_context(void *info)
 	 * If this is a task context, we need to check whether it is
 	 * the current task context of this cpu. If not it has been
 	 * scheduled out before the smp call arrived.
+	 * Or possibly this is the right context but it isn't
+	 * on this cpu because it had no counters.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
 
 	spin_lock_irqsave(&ctx->lock, flags);
+	ctx->is_active = 1;
 	update_context_time(ctx);
 
 	/*
@@ -653,7 +673,6 @@ perf_install_in_context(struct perf_counter_context *ctx,
 		return;
 	}
 
-	counter->task = task;
 retry:
 	task_oncpu_function_call(task, __perf_install_in_context,
 				 counter);
@@ -693,10 +712,14 @@ static void __perf_counter_enable(void *info)
 	 * If this is a per-task counter, need to check whether this
 	 * counter's task is the current task on this cpu.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
 
 	spin_lock_irqsave(&ctx->lock, flags);
+	ctx->is_active = 1;
 	update_context_time(ctx);
 
 	counter->prev_state = counter->state;
@@ -852,10 +875,10 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
 	struct pt_regs *regs;
 
-	if (likely(!cpuctx->task_ctx))
+	if (likely(!ctx || !cpuctx->task_ctx))
 		return;
 
 	update_context_time(ctx);
@@ -871,6 +894,8 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
+	if (!cpuctx->task_ctx)
+		return;
 	__perf_counter_sched_out(ctx, cpuctx);
 	cpuctx->task_ctx = NULL;
 }
@@ -969,8 +994,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
 
+	if (likely(!ctx))
+		return;
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 	cpuctx->task_ctx = ctx;
 }
@@ -985,11 +1012,11 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 int perf_counter_task_disable(void)
 {
 	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
 	unsigned long flags;
 
-	if (likely(!ctx->nr_counters))
+	if (!ctx || !ctx->nr_counters)
 		return 0;
 
 	local_irq_save(flags);
@@ -1020,12 +1047,12 @@ int perf_counter_task_disable(void)
 int perf_counter_task_enable(void)
 {
 	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
 	unsigned long flags;
 	int cpu;
 
-	if (likely(!ctx->nr_counters))
+	if (!ctx || !ctx->nr_counters)
 		return 0;
 
 	local_irq_save(flags);
@@ -1128,19 +1155,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 		return;
 
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	ctx = &curr->perf_counter_ctx;
+	ctx = curr->perf_counter_ctxp;
 
 	perf_adjust_freq(&cpuctx->ctx);
-	perf_adjust_freq(ctx);
+	if (ctx)
+		perf_adjust_freq(ctx);
 
 	perf_counter_cpu_sched_out(cpuctx);
-	__perf_counter_task_sched_out(ctx);
+	if (ctx)
+		__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
-	rotate_ctx(ctx);
+	if (ctx)
+		rotate_ctx(ctx);
 
 	perf_counter_cpu_sched_in(cpuctx, cpu);
-	perf_counter_task_sched_in(curr, cpu);
+	if (ctx)
+		perf_counter_task_sched_in(curr, cpu);
 }
 
 /*
@@ -1176,6 +1207,22 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	return atomic64_read(&counter->count);
 }
 
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->event_list);
+	atomic_set(&ctx->refcount, 1);
+	ctx->task = task;
+}
+
 static void put_context(struct perf_counter_context *ctx)
 {
 	if (ctx->task)
@@ -1186,6 +1233,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
+	struct perf_counter_context *tctx;
 	struct task_struct *task;
 
 	/*
@@ -1225,15 +1273,36 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	if (!task)
 		return ERR_PTR(-ESRCH);
 
-	ctx = &task->perf_counter_ctx;
-	ctx->task = task;
-
 	/* Reuse ptrace permission checks for now. */
 	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
-		put_context(ctx);
+		put_task_struct(task);
 		return ERR_PTR(-EACCES);
 	}
 
+	ctx = task->perf_counter_ctxp;
+	if (!ctx) {
+		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
+		if (!ctx) {
+			put_task_struct(task);
+			return ERR_PTR(-ENOMEM);
+		}
+		__perf_counter_init_context(ctx, task);
+		/*
+		 * Make sure other cpus see correct values for *ctx
+		 * once task->perf_counter_ctxp is visible to them.
+		 */
+		smp_wmb();
+		tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx);
+		if (tctx) {
+			/*
+			 * We raced with some other task; use
+			 * the context they set.
+			 */
+			kfree(ctx);
+			ctx = tctx;
+		}
+	}
+
 	return ctx;
 }
 
@@ -1242,6 +1311,7 @@ static void free_counter_rcu(struct rcu_head *head)
 	struct perf_counter *counter;
 
 	counter = container_of(head, struct perf_counter, rcu_head);
+	put_ctx(counter->ctx);
 	kfree(counter);
 }
 
@@ -2247,7 +2317,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
 	put_cpu_var(perf_cpu_context);
 
-	perf_counter_comm_ctx(&current->perf_counter_ctx, comm_event);
+	perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event);
 }
 
 void perf_counter_comm(struct task_struct *task)
@@ -2256,7 +2326,9 @@ void perf_counter_comm(struct task_struct *task)
 
 	if (!atomic_read(&nr_comm_tracking))
 		return;
-       
+	if (!current->perf_counter_ctxp)
+		return;
+
 	comm_event = (struct perf_comm_event){
 		.task	= task,
 		.event  = {
@@ -2372,7 +2444,7 @@ got_name:
 	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
 	put_cpu_var(perf_cpu_context);
 
-	perf_counter_mmap_ctx(&current->perf_counter_ctx, mmap_event);
+	perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event);
 
 	kfree(buf);
 }
@@ -2384,6 +2456,8 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 
 	if (!atomic_read(&nr_mmap_tracking))
 		return;
+	if (!current->perf_counter_ctxp)
+		return;
 
 	mmap_event = (struct perf_mmap_event){
 		.file   = file,
@@ -2985,6 +3059,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->pmu			= NULL;
 	counter->ctx			= ctx;
+	get_ctx(ctx);
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
@@ -3150,21 +3225,6 @@ err_put_context:
 }
 
 /*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	spin_lock_init(&ctx->lock);
-	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->counter_list);
-	INIT_LIST_HEAD(&ctx->event_list);
-	ctx->task = task;
-}
-
-/*
  * inherit a counter from parent task to child task:
  */
 static struct perf_counter *
@@ -3195,7 +3255,6 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Link it up in the child's context:
 	 */
-	child_counter->task = child;
 	add_counter_to_ctx(child_counter, child_ctx);
 
 	child_counter->parent = parent_counter;
@@ -3294,40 +3353,15 @@ __perf_counter_exit_task(struct task_struct *child,
 	struct perf_counter *parent_counter;
 
 	/*
-	 * If we do not self-reap then we have to wait for the
-	 * child task to unschedule (it will happen for sure),
-	 * so that its counter is at its final count. (This
-	 * condition triggers rarely - child tasks usually get
-	 * off their CPU before the parent has a chance to
-	 * get this far into the reaping action)
+	 * Protect against concurrent operations on child_counter
+	 * due its fd getting closed, etc.
 	 */
-	if (child != current) {
-		wait_task_inactive(child, 0);
-		update_counter_times(child_counter);
-		list_del_counter(child_counter, child_ctx);
-	} else {
-		struct perf_cpu_context *cpuctx;
-		unsigned long flags;
-
-		/*
-		 * Disable and unlink this counter.
-		 *
-		 * Be careful about zapping the list - IRQ/NMI context
-		 * could still be processing it:
-		 */
-		local_irq_save(flags);
-		perf_disable();
-
-		cpuctx = &__get_cpu_var(perf_cpu_context);
+	mutex_lock(&child_counter->mutex);
 
-		group_sched_out(child_counter, cpuctx, child_ctx);
-		update_counter_times(child_counter);
+	update_counter_times(child_counter);
+	list_del_counter(child_counter, child_ctx);
 
-		list_del_counter(child_counter, child_ctx);
-
-		perf_enable();
-		local_irq_restore(flags);
-	}
+	mutex_unlock(&child_counter->mutex);
 
 	parent_counter = child_counter->parent;
 	/*
@@ -3346,19 +3380,29 @@ __perf_counter_exit_task(struct task_struct *child,
  *
  * Note: we may be running in child context, but the PID is not hashed
  * anymore so new counters will not be added.
+ * (XXX not sure that is true when we get called from flush_old_exec.
+ *  -- paulus)
  */
 void perf_counter_exit_task(struct task_struct *child)
 {
 	struct perf_counter *child_counter, *tmp;
 	struct perf_counter_context *child_ctx;
+	unsigned long flags;
 
 	WARN_ON_ONCE(child != current);
 
-	child_ctx = &child->perf_counter_ctx;
+	child_ctx = child->perf_counter_ctxp;
 
-	if (likely(!child_ctx->nr_counters))
+	if (likely(!child_ctx))
 		return;
 
+	local_irq_save(flags);
+	__perf_counter_task_sched_out(child_ctx);
+	child->perf_counter_ctxp = NULL;
+	local_irq_restore(flags);
+
+	mutex_lock(&child_ctx->mutex);
+
 again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
@@ -3371,6 +3415,10 @@ again:
 	 */
 	if (!list_empty(&child_ctx->counter_list))
 		goto again;
+
+	mutex_unlock(&child_ctx->mutex);
+
+	put_ctx(child_ctx);
 }
 
 /*
@@ -3382,19 +3430,25 @@ void perf_counter_init_task(struct task_struct *child)
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 
-	child_ctx  =  &child->perf_counter_ctx;
-	parent_ctx = &parent->perf_counter_ctx;
-
-	__perf_counter_init_context(child_ctx, child);
+	child->perf_counter_ctxp = NULL;
 
 	/*
 	 * This is executed from the parent task context, so inherit
-	 * counters that have been marked for cloning:
+	 * counters that have been marked for cloning.
+	 * First allocate and initialize a context for the child.
 	 */
 
-	if (likely(!parent_ctx->nr_counters))
+	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
+	if (!child_ctx)
+		return;
+
+	parent_ctx = parent->perf_counter_ctxp;
+	if (likely(!parent_ctx || !parent_ctx->nr_counters))
 		return;
 
+	__perf_counter_init_context(child_ctx, child);
+	child->perf_counter_ctxp = child_ctx;
+
 	/*
 	 * Lock the parent list. No need to lock the child - not PID
 	 * hashed yet and not running, so nobody can access it.
-- 
cgit v0.10.2


From 564c2b210add41df9a3a5aaa365c1d97cff6110d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 22 May 2009 14:27:22 +1000
Subject: perf_counter: Optimize context switch between identical inherited
 contexts

When monitoring a process and its descendants with a set of inherited
counters, we can often get the situation in a context switch where
both the old (outgoing) and new (incoming) process have the same set
of counters, and their values are ultimately going to be added together.
In that situation it doesn't matter which set of counters are used to
count the activity for the new process, so there is really no need to
go through the process of reading the hardware counters and updating
the old task's counters and then setting up the PMU for the new task.

This optimizes the context switch in this situation.  Instead of
scheduling out the perf_counter_context for the old task and
scheduling in the new context, we simply transfer the old context
to the new task and keep using it without interruption.  The new
context gets transferred to the old task.  This means that both
tasks still have a valid perf_counter_context, so no special case
is introduced when the old task gets scheduled in again, either on
this CPU or another CPU.

The equivalence of contexts is detected by keeping a pointer in
each cloned context pointing to the context it was cloned from.
To cope with the situation where a context is changed by adding
or removing counters after it has been cloned, we also keep a
generation number on each context which is incremented every time
a context is changed.  When a context is cloned we take a copy
of the parent's generation number, and two cloned contexts are
equivalent only if they have the same parent and the same
generation number.  In order that the parent context pointer
remains valid (and is not reused), we increment the parent
context's reference count for each context cloned from it.

Since we don't have individual fds for the counters in a cloned
context, the only thing that can make two clones of a given parent
different after they have been cloned is enabling or disabling all
counters with prctl.  To account for this, we keep a count of the
number of enabled counters in each context.  Two contexts must have
the same number of enabled counters to be considered equivalent.

Here are some measurements of the context switch time as measured with
the lat_ctx benchmark from lmbench, comparing the times obtained with
and without this patch series:

		-----Unmodified-----		With this patch series
Counters:	none	2 HW	4H+4S	none	2 HW	4H+4S

2 processes:
Average		3.44	6.45	11.24	3.12	3.39	3.60
St dev		0.04	0.04	0.13	0.05	0.17	0.19

8 processes:
Average		6.45	8.79	14.00	5.57	6.23	7.57
St dev		1.27	1.04	0.88	1.42	1.46	1.42

32 processes:
Average		5.56	8.43	13.78	5.28	5.55	7.15
St dev		0.41	0.47	0.53	0.54	0.57	0.81

The numbers are the mean and standard deviation of 20 runs of
lat_ctx.  The "none" columns are lat_ctx run directly without any
counters.  The "2 HW" columns are with lat_ctx run under perfstat,
counting cycles and instructions.  The "4H+4S" columns are lat_ctx run
under perfstat with 4 hardware counters and 4 software counters
(cycles, instructions, cache references, cache misses, task
clock, context switch, cpu migrations, and page faults).

[ Impact: performance optimization of counter context-switches ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0713090..4cae01a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -513,6 +513,7 @@ struct perf_counter_context {
 	struct list_head	event_list;
 	int			nr_counters;
 	int			nr_active;
+	int			nr_enabled;
 	int			is_active;
 	atomic_t		refcount;
 	struct task_struct	*task;
@@ -522,6 +523,14 @@ struct perf_counter_context {
 	 */
 	u64			time;
 	u64			timestamp;
+
+	/*
+	 * These fields let us detect when two contexts have both
+	 * been cloned (inherited) from a common ancestor.
+	 */
+	struct perf_counter_context *parent_ctx;
+	u32			parent_gen;
+	u32			generation;
 };
 
 /**
@@ -552,7 +561,8 @@ extern int perf_max_counters;
 extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
+extern void perf_counter_task_sched_out(struct task_struct *task,
+					struct task_struct *next, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 06ea3ea..c100554 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -104,8 +104,11 @@ static void get_ctx(struct perf_counter_context *ctx)
 
 static void put_ctx(struct perf_counter_context *ctx)
 {
-	if (atomic_dec_and_test(&ctx->refcount))
+	if (atomic_dec_and_test(&ctx->refcount)) {
+		if (ctx->parent_ctx)
+			put_ctx(ctx->parent_ctx);
 		kfree(ctx);
+	}
 }
 
 static void
@@ -127,6 +130,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 	ctx->nr_counters++;
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		ctx->nr_enabled++;
 }
 
 /*
@@ -141,6 +146,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	if (list_empty(&counter->list_entry))
 		return;
 	ctx->nr_counters--;
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		ctx->nr_enabled--;
 
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
@@ -204,6 +211,22 @@ group_sched_out(struct perf_counter *group_counter,
 }
 
 /*
+ * Mark this context as not being a clone of another.
+ * Called when counters are added to or removed from this context.
+ * We also increment our generation number so that anything that
+ * was cloned from this context before this will not match anything
+ * cloned from this context after this.
+ */
+static void unclone_ctx(struct perf_counter_context *ctx)
+{
+	++ctx->generation;
+	if (!ctx->parent_ctx)
+		return;
+	put_ctx(ctx->parent_ctx);
+	ctx->parent_ctx = NULL;
+}
+
+/*
  * Cross CPU call to remove a performance counter
  *
  * We disable the counter on the hardware level first. After that we
@@ -263,6 +286,7 @@ static void perf_counter_remove_from_context(struct perf_counter *counter)
 	struct perf_counter_context *ctx = counter->ctx;
 	struct task_struct *task = ctx->task;
 
+	unclone_ctx(ctx);
 	if (!task) {
 		/*
 		 * Per cpu counters are removed via an smp call and
@@ -378,6 +402,7 @@ static void __perf_counter_disable(void *info)
 		else
 			counter_sched_out(counter, cpuctx, ctx);
 		counter->state = PERF_COUNTER_STATE_OFF;
+		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
@@ -419,6 +444,7 @@ static void perf_counter_disable(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
 		update_counter_times(counter);
 		counter->state = PERF_COUNTER_STATE_OFF;
+		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irq(&ctx->lock);
@@ -727,6 +753,7 @@ static void __perf_counter_enable(void *info)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+	ctx->nr_enabled++;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -817,6 +844,7 @@ static void perf_counter_enable(struct perf_counter *counter)
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->tstamp_enabled =
 			ctx->time - counter->total_time_enabled;
+		ctx->nr_enabled++;
 	}
  out:
 	spin_unlock_irq(&ctx->lock);
@@ -862,6 +890,25 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 }
 
 /*
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled counters.
+ * If the number of enabled counters is the same, then the set
+ * of enabled counters should be the same, because these are both
+ * inherited contexts, therefore we can't access individual counters
+ * in them directly with an fd; we can only enable/disable all
+ * counters via prctl, or enable/disable all counters in a family
+ * via ioctl, which will have the same effect on both contexts.
+ */
+static int context_equiv(struct perf_counter_context *ctx1,
+			 struct perf_counter_context *ctx2)
+{
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& ctx1->nr_enabled == ctx2->nr_enabled;
+}
+
+/*
  * Called from scheduler to remove the counters of the current task,
  * with interrupts disabled.
  *
@@ -872,10 +919,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
  * accessing the counter control register. If a NMI hits, then it will
  * not restart the counter.
  */
-void perf_counter_task_sched_out(struct task_struct *task, int cpu)
+void perf_counter_task_sched_out(struct task_struct *task,
+				 struct task_struct *next, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+	struct perf_counter_context *next_ctx;
 	struct pt_regs *regs;
 
 	if (likely(!ctx || !cpuctx->task_ctx))
@@ -885,6 +934,16 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 
 	regs = task_pt_regs(task);
 	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
+	next_ctx = next->perf_counter_ctxp;
+	if (next_ctx && context_equiv(ctx, next_ctx)) {
+		task->perf_counter_ctxp = next_ctx;
+		next->perf_counter_ctxp = ctx;
+		ctx->task = next;
+		next_ctx->task = task;
+		return;
+	}
+
 	__perf_counter_sched_out(ctx, cpuctx);
 
 	cpuctx->task_ctx = NULL;
@@ -998,6 +1057,8 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 
 	if (likely(!ctx))
 		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 	cpuctx->task_ctx = ctx;
 }
@@ -3253,6 +3314,16 @@ inherit_counter(struct perf_counter *parent_counter,
 		return child_counter;
 
 	/*
+	 * Make the child state follow the state of the parent counter,
+	 * not its hw_event.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_counter_{en,dis}able_family.
+	 */
+	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+	else
+		child_counter->state = PERF_COUNTER_STATE_OFF;
+
+	/*
 	 * Link it up in the child's context:
 	 */
 	add_counter_to_ctx(child_counter, child_ctx);
@@ -3277,16 +3348,6 @@ inherit_counter(struct perf_counter *parent_counter,
 	mutex_lock(&parent_counter->mutex);
 	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
 
-	/*
-	 * Make the child state follow the state of the parent counter,
-	 * not its hw_event.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en,dis}able_family.
-	 */
-	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-	else
-		child_counter->state = PERF_COUNTER_STATE_OFF;
-
 	mutex_unlock(&parent_counter->mutex);
 
 	return child_counter;
@@ -3429,6 +3490,7 @@ void perf_counter_init_task(struct task_struct *child)
 	struct perf_counter_context *child_ctx, *parent_ctx;
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
+	int inherited_all = 1;
 
 	child->perf_counter_ctxp = NULL;
 
@@ -3463,12 +3525,31 @@ void perf_counter_init_task(struct task_struct *child)
 		if (counter != counter->group_leader)
 			continue;
 
-		if (!counter->hw_event.inherit)
+		if (!counter->hw_event.inherit) {
+			inherited_all = 0;
 			continue;
+		}
 
 		if (inherit_group(counter, parent,
-				  parent_ctx, child, child_ctx))
+				  parent_ctx, child, child_ctx)) {
+			inherited_all = 0;
 			break;
+		}
+	}
+
+	if (inherited_all) {
+		/*
+		 * Mark the child context as a clone of the parent
+		 * context, or of whatever the parent is a clone of.
+		 */
+		if (parent_ctx->parent_ctx) {
+			child_ctx->parent_ctx = parent_ctx->parent_ctx;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
+		} else {
+			child_ctx->parent_ctx = parent_ctx;
+			child_ctx->parent_gen = parent_ctx->generation;
+		}
+		get_ctx(child_ctx->parent_ctx);
 	}
 
 	mutex_unlock(&parent_ctx->mutex);
diff --git a/kernel/sched.c b/kernel/sched.c
index 419a39d0..4c0d58b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5091,7 +5091,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_counter_task_sched_out(prev, cpu);
+		perf_counter_task_sched_out(prev, next, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
-- 
cgit v0.10.2


From 910431c7f2e963017d767b29c80ae706421e569f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 22 May 2009 12:32:15 +0200
Subject: perf_counter: fix !PERF_COUNTERS build failure

Update the !CONFIG_PERF_COUNTERS prototype too, for
perf_counter_task_sched_out().

[ Impact: build fix ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4cae01a..2eedae8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -625,7 +625,8 @@ extern void perf_counter_init(void);
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
 static inline void
-perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
+perf_counter_task_sched_out(struct task_struct *task,
+			    struct task_struct *next, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *child)	{ }
-- 
cgit v0.10.2


From d5046853634a8d73f28bad3cf68d182c4a99035d Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Fri, 22 May 2009 20:36:21 +0900
Subject: nilfs2: fix memory leak in nilfs_ioctl_clean_segments

This fixes a new memory leak problem in garbage collection.  The
problem was brought by the bugfix patch ("nilfs2: fix lock order
reversal in nilfs_clean_segments ioctl").

Thanks to Kentaro Suzuki for finding this problem.

Reported-by: Kentaro Suzuki <k_suzuki@ms.sylc.co.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 50ff3f2..d6759b9 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -576,7 +576,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
 	ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
 
  out_free:
-	while (--n > 0)
+	while (--n >= 0)
 		vfree(kbufs[n]);
 	kfree(kbufs[4]);
 	return ret;
-- 
cgit v0.10.2


From 63d3892379f93b73ef905fb3449f4e4438a53b40 Mon Sep 17 00:00:00 2001
From: Wu Zhangjin <wuzhangjin@gmail.com>
Date: Thu, 21 May 2009 05:50:01 +0800
Subject: MIPS: Fix sparse warning in incompatiable argument type of
 clear_user.

The type of the second argument of access_ok should be (void __user *).
The unnecessary conversion of the clear_user address argument was causing
sparse to emit warnings on the __chk_user_ptr check.

Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 8de858f..c2d53c1 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -956,7 +956,7 @@ __clear_user(void __user *addr, __kernel_size_t size)
 	void __user * __cl_addr = (addr);				\
 	unsigned long __cl_size = (n);					\
 	if (__cl_size && access_ok(VERIFY_WRITE,			\
-		((unsigned long)(__cl_addr)), __cl_size))		\
+					__cl_addr, __cl_size))		\
 		__cl_size = __clear_user(__cl_addr, __cl_size);		\
 	__cl_size;							\
 })
-- 
cgit v0.10.2


From 63c901c7e6fb878805cd2f8f14fa3eee8c03ee84 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 22 May 2009 10:48:17 +0100
Subject: MIPS: IP32: Fix build error due to uninitialized variable.

  CC      arch/mips/sgi-ip32/ip32-reset.o
cc1: warnings being treated as errors
arch/mips/sgi-ip32/ip32-reset.c: In function 'debounce':
arch/mips/sgi-ip32/ip32-reset.c:97: error: 'reg_a' is used uninitialized in this function

The issues is old but due to the volatile keyword gcc older than 4.4 did
not warn about this obvious bug.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 667da93..cc549a9 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -94,6 +94,7 @@ static void debounce(unsigned long data)
 	volatile unsigned char reg_a, reg_c, xctrl_a;
 
 	reg_c = CMOS_READ(RTC_INTR_FLAGS);
+	reg_a = CMOS_READ(RTC_REG_A);
 	CMOS_WRITE(reg_a | DS_REGA_DV0, RTC_REG_A);
 	wbflush();
 	xctrl_a = CMOS_READ(DS_B1_XCTRL4A);
-- 
cgit v0.10.2


From d2f82c2f70d56ba4623de25edb383fec01f43b89 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 22 May 2009 10:58:43 +0100
Subject: MIPS: IP32: Remove unnecessary if not even harmful volatile keywords.

They are unneeded and as the issue fixed in lmo commit
63f7ec59053e3f850ab67a9938e631bcba64c6ce shows even harmful.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index cc549a9..9b95d80 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -53,7 +53,7 @@ static inline void ip32_machine_halt(void)
 
 static void ip32_machine_power_off(void)
 {
-	volatile unsigned char reg_a, xctrl_a, xctrl_b;
+	unsigned char reg_a, xctrl_a, xctrl_b;
 
 	disable_irq(MACEISA_RTC_IRQ);
 	reg_a = CMOS_READ(RTC_REG_A);
@@ -91,7 +91,7 @@ static void blink_timeout(unsigned long data)
 
 static void debounce(unsigned long data)
 {
-	volatile unsigned char reg_a, reg_c, xctrl_a;
+	unsigned char reg_a, reg_c, xctrl_a;
 
 	reg_c = CMOS_READ(RTC_INTR_FLAGS);
 	reg_a = CMOS_READ(RTC_REG_A);
@@ -138,7 +138,7 @@ static inline void ip32_power_button(void)
 
 static irqreturn_t ip32_rtc_int(int irq, void *dev_id)
 {
-	volatile unsigned char reg_c;
+	unsigned char reg_c;
 
 	reg_c = CMOS_READ(RTC_INTR_FLAGS);
 	if (!(reg_c & RTC_IRQF)) {
-- 
cgit v0.10.2


From b4852b793a1dd74ccde5572d8a8f73e948a5b1a1 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Fri, 22 May 2009 15:12:15 +0300
Subject: ASoC: TWL4030: Differentiate the playback streams

Give unique stream names for the two playback streams so
DAPM can figure out which codec_dai is in use.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 584507f..9197fdd 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1092,13 +1092,13 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	SND_SOC_DAPM_OUTPUT("VIBRA"),
 
 	/* DACs */
-	SND_SOC_DAPM_DAC("DAC Right1", "Right Front Playback",
+	SND_SOC_DAPM_DAC("DAC Right1", "Right Front HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_DAC("DAC Left1", "Left Front Playback",
+	SND_SOC_DAPM_DAC("DAC Left1", "Left Front HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_DAC("DAC Right2", "Right Rear Playback",
+	SND_SOC_DAPM_DAC("DAC Right2", "Right Rear HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_DAC("DAC Left2", "Left Rear Playback",
+	SND_SOC_DAPM_DAC("DAC Left2", "Left Rear HiFi Playback",
 			SND_SOC_NOPM, 0, 0),
 	SND_SOC_DAPM_DAC("DAC Voice", "Voice Playback",
 			SND_SOC_NOPM, 0, 0),
@@ -1937,7 +1937,7 @@ struct snd_soc_dai twl4030_dai[] = {
 {
 	.name = "twl4030",
 	.playback = {
-		.stream_name = "Playback",
+		.stream_name = "HiFi Playback",
 		.channels_min = 2,
 		.channels_max = 4,
 		.rates = TWL4030_RATES | SNDRV_PCM_RATE_96000,
@@ -1953,7 +1953,7 @@ struct snd_soc_dai twl4030_dai[] = {
 {
 	.name = "twl4030 Voice",
 	.playback = {
-		.stream_name = "Playback",
+		.stream_name = "Voice Playback",
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000,
-- 
cgit v0.10.2


From 86ed3669f068b514ab85ffd548456a342b3fb8d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 22 May 2009 15:01:19 +0100
Subject: ASoC: WM9081 mono DAC with integrated 2.6W class AB/D amplifier
 driver

The WM9081 is designed to provide high power output at low distortion
levels in space-constrained portable applications.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/include/sound/wm9081.h b/include/sound/wm9081.h
new file mode 100644
index 0000000..e173ddb
--- /dev/null
+++ b/include/sound/wm9081.h
@@ -0,0 +1,25 @@
+/*
+ * linux/sound/wm9081.h -- Platform data for WM9081
+ *
+ * Copyright 2009 Wolfson Microelectronics. PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_SND_WM_9081_H
+#define __LINUX_SND_WM_9081_H
+
+struct wm9081_retune_mobile_setting {
+	const char *name;
+	unsigned int rate;
+	u16 config[20];
+};
+
+struct wm9081_retune_mobile_config {
+	struct wm9081_retune_mobile_setting *configs;
+	int num_configs;
+};
+
+#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 1c19ad5..7f78b65 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -40,6 +40,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_WM8971 if I2C
 	select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8990 if I2C
+	select SND_SOC_WM9081 if I2C
 	select SND_SOC_WM9705 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9712 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9713 if SND_SOC_AC97_BUS
@@ -156,6 +157,9 @@ config SND_SOC_WM8988
 config SND_SOC_WM8990
 	tristate
 
+config SND_SOC_WM9081
+	tristate
+
 config SND_SOC_WM9705
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 3d31b6b..70c55fa 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -28,6 +28,7 @@ snd-soc-wm8960-objs := wm8960.o
 snd-soc-wm8971-objs := wm8971.o
 snd-soc-wm8988-objs := wm8988.o
 snd-soc-wm8990-objs := wm8990.o
+snd-soc-wm9081-objs := wm9081.o
 snd-soc-wm9705-objs := wm9705.o
 snd-soc-wm9712-objs := wm9712.o
 snd-soc-wm9713-objs := wm9713.o
@@ -62,6 +63,7 @@ obj-$(CONFIG_SND_SOC_WM8940)	+= snd-soc-wm8940.o
 obj-$(CONFIG_SND_SOC_WM8960)	+= snd-soc-wm8960.o
 obj-$(CONFIG_SND_SOC_WM8988)	+= snd-soc-wm8988.o
 obj-$(CONFIG_SND_SOC_WM8990)	+= snd-soc-wm8990.o
+obj-$(CONFIG_SND_SOC_WM9081)	+= snd-soc-wm9081.o
 obj-$(CONFIG_SND_SOC_WM9705)	+= snd-soc-wm9705.o
 obj-$(CONFIG_SND_SOC_WM9712)	+= snd-soc-wm9712.o
 obj-$(CONFIG_SND_SOC_WM9713)	+= snd-soc-wm9713.o
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
new file mode 100644
index 0000000..83e3148
--- /dev/null
+++ b/sound/soc/codecs/wm9081.c
@@ -0,0 +1,1532 @@
+/*
+ * wm9081.c  --  WM9081 ALSA SoC Audio driver
+ *
+ * Author: Mark Brown
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include <sound/wm9081.h>
+#include "wm9081.h"
+
+static u16 wm9081_reg_defaults[] = {
+	0x0000,     /* R0  - Software Reset */
+	0x0000,     /* R1 */
+	0x00B9,     /* R2  - Analogue Lineout */
+	0x00B9,     /* R3  - Analogue Speaker PGA */
+	0x0001,     /* R4  - VMID Control */
+	0x0068,     /* R5  - Bias Control 1 */
+	0x0000,     /* R6 */
+	0x0000,     /* R7  - Analogue Mixer */
+	0x0000,     /* R8  - Anti Pop Control */
+	0x01DB,     /* R9  - Analogue Speaker 1 */
+	0x0018,     /* R10 - Analogue Speaker 2 */
+	0x0180,     /* R11 - Power Management */
+	0x0000,     /* R12 - Clock Control 1 */
+	0x0038,     /* R13 - Clock Control 2 */
+	0x4000,     /* R14 - Clock Control 3 */
+	0x0000,     /* R15 */
+	0x0000,     /* R16 - FLL Control 1 */
+	0x0200,     /* R17 - FLL Control 2 */
+	0x0000,     /* R18 - FLL Control 3 */
+	0x0204,     /* R19 - FLL Control 4 */
+	0x0000,     /* R20 - FLL Control 5 */
+	0x0000,     /* R21 */
+	0x0000,     /* R22 - Audio Interface 1 */
+	0x0002,     /* R23 - Audio Interface 2 */
+	0x0008,     /* R24 - Audio Interface 3 */
+	0x0022,     /* R25 - Audio Interface 4 */
+	0x0000,     /* R26 - Interrupt Status */
+	0x0006,     /* R27 - Interrupt Status Mask */
+	0x0000,     /* R28 - Interrupt Polarity */
+	0x0000,     /* R29 - Interrupt Control */
+	0x00C0,     /* R30 - DAC Digital 1 */
+	0x0008,     /* R31 - DAC Digital 2 */
+	0x09AF,     /* R32 - DRC 1 */
+	0x4201,     /* R33 - DRC 2 */
+	0x0000,     /* R34 - DRC 3 */
+	0x0000,     /* R35 - DRC 4 */
+	0x0000,     /* R36 */
+	0x0000,     /* R37 */
+	0x0000,     /* R38 - Write Sequencer 1 */
+	0x0000,     /* R39 - Write Sequencer 2 */
+	0x0002,     /* R40 - MW Slave 1 */
+	0x0000,     /* R41 */
+	0x0000,     /* R42 - EQ 1 */
+	0x0000,     /* R43 - EQ 2 */
+	0x0FCA,     /* R44 - EQ 3 */
+	0x0400,     /* R45 - EQ 4 */
+	0x00B8,     /* R46 - EQ 5 */
+	0x1EB5,     /* R47 - EQ 6 */
+	0xF145,     /* R48 - EQ 7 */
+	0x0B75,     /* R49 - EQ 8 */
+	0x01C5,     /* R50 - EQ 9 */
+	0x169E,     /* R51 - EQ 10 */
+	0xF829,     /* R52 - EQ 11 */
+	0x07AD,     /* R53 - EQ 12 */
+	0x1103,     /* R54 - EQ 13 */
+	0x1C58,     /* R55 - EQ 14 */
+	0xF373,     /* R56 - EQ 15 */
+	0x0A54,     /* R57 - EQ 16 */
+	0x0558,     /* R58 - EQ 17 */
+	0x0564,     /* R59 - EQ 18 */
+	0x0559,     /* R60 - EQ 19 */
+	0x4000,     /* R61 - EQ 20 */
+};
+
+static struct {
+	int ratio;
+	int clk_sys_rate;
+} clk_sys_rates[] = {
+	{ 64,   0 },
+	{ 128,  1 },
+	{ 192,  2 },
+	{ 256,  3 },
+	{ 384,  4 },
+	{ 512,  5 },
+	{ 768,  6 },
+	{ 1024, 7 },
+	{ 1408, 8 },
+	{ 1536, 9 },
+};
+
+static struct {
+	int rate;
+	int sample_rate;
+} sample_rates[] = {
+	{ 8000,  0  },
+	{ 11025, 1  },
+	{ 12000, 2  },
+	{ 16000, 3  },
+	{ 22050, 4  },
+	{ 24000, 5  },
+	{ 32000, 6  },
+	{ 44100, 7  },
+	{ 48000, 8  },
+	{ 88200, 9  },
+	{ 96000, 10 },
+};
+
+static struct {
+	int div; /* *10 due to .5s */
+	int bclk_div;
+} bclk_divs[] = {
+	{ 10,  0  },
+	{ 15,  1  },
+	{ 20,  2  },
+	{ 30,  3  },
+	{ 40,  4  },
+	{ 50,  5  },
+	{ 55,  6  },
+	{ 60,  7  },
+	{ 80,  8  },
+	{ 100, 9  },
+	{ 110, 10 },
+	{ 120, 11 },
+	{ 160, 12 },
+	{ 200, 13 },
+	{ 220, 14 },
+	{ 240, 15 },
+	{ 250, 16 },
+	{ 300, 17 },
+	{ 320, 18 },
+	{ 440, 19 },
+	{ 480, 20 },
+};
+
+struct wm9081_priv {
+	struct snd_soc_codec codec;
+	u16 reg_cache[WM9081_MAX_REGISTER + 1];
+	int sysclk_source;
+	int mclk_rate;
+	int sysclk_rate;
+	int fs;
+	int bclk;
+	int master;
+	int fll_fref;
+	int fll_fout;
+	struct wm9081_retune_mobile_config *retune;
+};
+
+static int wm9081_reg_is_volatile(int reg)
+{
+	switch (reg) {
+	default:
+		return 0;
+	}
+}
+
+static unsigned int wm9081_read_reg_cache(struct snd_soc_codec *codec,
+					  unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+	return cache[reg];
+}
+
+static unsigned int wm9081_read_hw(struct snd_soc_codec *codec, u8 reg)
+{
+	struct i2c_msg xfer[2];
+	u16 data;
+	int ret;
+	struct i2c_client *client = codec->control_data;
+
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+
+	/* Write register */
+	xfer[0].addr = client->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = 1;
+	xfer[0].buf = &reg;
+
+	/* Read data */
+	xfer[1].addr = client->addr;
+	xfer[1].flags = I2C_M_RD;
+	xfer[1].len = 2;
+	xfer[1].buf = (u8 *)&data;
+
+	ret = i2c_transfer(client->adapter, xfer, 2);
+	if (ret != 2) {
+		dev_err(&client->dev, "i2c_transfer() returned %d\n", ret);
+		return 0;
+	}
+
+	return (data >> 8) | ((data & 0xff) << 8);
+}
+
+static unsigned int wm9081_read(struct snd_soc_codec *codec, unsigned int reg)
+{
+	if (wm9081_reg_is_volatile(reg))
+		return wm9081_read_hw(codec, reg);
+	else
+		return wm9081_read_reg_cache(codec, reg);
+}
+
+static int wm9081_write(struct snd_soc_codec *codec, unsigned int reg,
+			unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	u8 data[3];
+
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+
+	if (!wm9081_reg_is_volatile(reg))
+		cache[reg] = value;
+
+	data[0] = reg;
+	data[1] = value >> 8;
+	data[2] = value & 0x00ff;
+
+	if (codec->hw_write(codec->control_data, data, 3) == 3)
+		return 0;
+	else
+		return -EIO;
+}
+
+static int wm9081_reset(struct snd_soc_codec *codec)
+{
+	return wm9081_write(codec, WM9081_SOFTWARE_RESET, 0);
+}
+
+static const DECLARE_TLV_DB_SCALE(drc_in_tlv, -4500, 75, 0);
+static const DECLARE_TLV_DB_SCALE(drc_out_tlv, -2250, 75, 0);
+static const DECLARE_TLV_DB_SCALE(drc_min_tlv, -1800, 600, 0);
+static unsigned int drc_max_tlv[] = {
+	TLV_DB_RANGE_HEAD(4),
+	0, 0, TLV_DB_SCALE_ITEM(1200, 0, 0),
+	1, 1, TLV_DB_SCALE_ITEM(1800, 0, 0),
+	2, 2, TLV_DB_SCALE_ITEM(2400, 0, 0),
+	3, 3, TLV_DB_SCALE_ITEM(3600, 0, 0),
+};
+static const DECLARE_TLV_DB_SCALE(drc_qr_tlv, 1200, 600, 0);
+static const DECLARE_TLV_DB_SCALE(drc_startup_tlv, -300, 50, 0);
+
+static const DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0);
+
+static const DECLARE_TLV_DB_SCALE(in_tlv, -600, 600, 0);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -7200, 75, 1);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0);
+
+static const char *drc_high_text[] = {
+	"1",
+	"1/2",
+	"1/4",
+	"1/8",
+	"1/16",
+	"0",
+};
+
+static const struct soc_enum drc_high =
+	SOC_ENUM_SINGLE(WM9081_DRC_3, 3, 6, drc_high_text);
+
+static const char *drc_low_text[] = {
+	"1",
+	"1/2",
+	"1/4",
+	"1/8",
+	"0",
+};
+
+static const struct soc_enum drc_low =
+	SOC_ENUM_SINGLE(WM9081_DRC_3, 0, 5, drc_low_text);
+
+static const char *drc_atk_text[] = {
+	"181us",
+	"181us",
+	"363us",
+	"726us",
+	"1.45ms",
+	"2.9ms",
+	"5.8ms",
+	"11.6ms",
+	"23.2ms",
+	"46.4ms",
+	"92.8ms",
+	"185.6ms",
+};
+
+static const struct soc_enum drc_atk =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 12, 12, drc_atk_text);
+
+static const char *drc_dcy_text[] = {
+	"186ms",
+	"372ms",
+	"743ms",
+	"1.49s",
+	"2.97s",
+	"5.94s",
+	"11.89s",
+	"23.78s",
+	"47.56s",
+};
+
+static const struct soc_enum drc_dcy =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 8, 9, drc_dcy_text);
+
+static const char *drc_qr_dcy_text[] = {
+	"0.725ms",
+	"1.45ms",
+	"5.8ms",
+};
+
+static const struct soc_enum drc_qr_dcy =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 4, 3, drc_qr_dcy_text);
+
+static const char *dac_deemph_text[] = {
+	"None",
+	"32kHz",
+	"44.1kHz",
+	"48kHz",
+};
+
+static const struct soc_enum dac_deemph =
+	SOC_ENUM_SINGLE(WM9081_DAC_DIGITAL_2, 1, 4, dac_deemph_text);
+
+static const char *speaker_mode_text[] = {
+	"Class D",
+	"Class AB",
+};
+
+static const struct soc_enum speaker_mode =
+	SOC_ENUM_SINGLE(WM9081_ANALOGUE_SPEAKER_2, 6, 2, speaker_mode_text);
+
+static int speaker_mode_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int reg;
+
+	reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2);
+	if (reg & WM9081_SPK_MODE)
+		ucontrol->value.integer.value[0] = 1;
+	else
+		ucontrol->value.integer.value[0] = 0;
+
+	return 0;
+}
+
+/*
+ * Stop any attempts to change speaker mode while the speaker is enabled.
+ *
+ * We also have some special anti-pop controls dependant on speaker
+ * mode which must be changed along with the mode.
+ */
+static int speaker_mode_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int reg_pwr = wm9081_read(codec, WM9081_POWER_MANAGEMENT);
+	unsigned int reg2 = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2);
+
+	/* Are we changing anything? */
+	if (ucontrol->value.integer.value[0] ==
+	    ((reg2 & WM9081_SPK_MODE) != 0))
+		return 0;
+
+	/* Don't try to change modes while enabled */
+	if (reg_pwr & WM9081_SPK_ENA)
+		return -EINVAL;
+
+	if (ucontrol->value.integer.value[0]) {
+		/* Class AB */
+		reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL);
+		reg2 |= WM9081_SPK_MODE;
+	} else {
+		/* Class D */
+		reg2 |= WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL;
+		reg2 &= ~WM9081_SPK_MODE;
+	}
+
+	wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_2, reg2);
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new wm9081_snd_controls[] = {
+SOC_SINGLE_TLV("IN1 Volume", WM9081_ANALOGUE_MIXER, 1, 1, 1, in_tlv),
+SOC_SINGLE_TLV("IN2 Volume", WM9081_ANALOGUE_MIXER, 3, 1, 1, in_tlv),
+
+SOC_SINGLE_TLV("Playback Volume", WM9081_DAC_DIGITAL_1, 1, 96, 0, dac_tlv),
+
+SOC_SINGLE("LINEOUT Switch", WM9081_ANALOGUE_LINEOUT, 7, 1, 1),
+SOC_SINGLE("LINEOUT ZC Switch", WM9081_ANALOGUE_LINEOUT, 6, 1, 0),
+SOC_SINGLE_TLV("LINEOUT Volume", WM9081_ANALOGUE_LINEOUT, 0, 63, 0, out_tlv),
+
+SOC_SINGLE("DRC Switch", WM9081_DRC_1, 15, 1, 0),
+SOC_ENUM("DRC High Slope", drc_high),
+SOC_ENUM("DRC Low Slope", drc_low),
+SOC_SINGLE_TLV("DRC Input Volume", WM9081_DRC_4, 5, 60, 1, drc_in_tlv),
+SOC_SINGLE_TLV("DRC Output Volume", WM9081_DRC_4, 0, 30, 1, drc_out_tlv),
+SOC_SINGLE_TLV("DRC Minimum Volume", WM9081_DRC_2, 2, 3, 1, drc_min_tlv),
+SOC_SINGLE_TLV("DRC Maximum Volume", WM9081_DRC_2, 0, 3, 0, drc_max_tlv),
+SOC_ENUM("DRC Attack", drc_atk),
+SOC_ENUM("DRC Decay", drc_dcy),
+SOC_SINGLE("DRC Quick Release Switch", WM9081_DRC_1, 2, 1, 0),
+SOC_SINGLE_TLV("DRC Quick Release Volume", WM9081_DRC_2, 6, 3, 0, drc_qr_tlv),
+SOC_ENUM("DRC Quick Release Decay", drc_qr_dcy),
+SOC_SINGLE_TLV("DRC Startup Volume", WM9081_DRC_1, 6, 18, 0, drc_startup_tlv),
+
+SOC_SINGLE("EQ Switch", WM9081_EQ_1, 0, 1, 0),
+
+SOC_SINGLE("Speaker DC Volume", WM9081_ANALOGUE_SPEAKER_1, 3, 5, 0),
+SOC_SINGLE("Speaker AC Volume", WM9081_ANALOGUE_SPEAKER_1, 0, 5, 0),
+SOC_SINGLE("Speaker Switch", WM9081_ANALOGUE_SPEAKER_PGA, 7, 1, 1),
+SOC_SINGLE("Speaker ZC Switch", WM9081_ANALOGUE_SPEAKER_PGA, 6, 1, 0),
+SOC_SINGLE_TLV("Speaker Volume", WM9081_ANALOGUE_SPEAKER_PGA, 0, 63, 0,
+	       out_tlv),
+SOC_ENUM("DAC Deemphasis", dac_deemph),
+SOC_ENUM_EXT("Speaker Mode", speaker_mode, speaker_mode_get, speaker_mode_put),
+};
+
+static const struct snd_kcontrol_new wm9081_eq_controls[] = {
+SOC_SINGLE_TLV("EQ1 Volume", WM9081_EQ_1, 11, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ2 Volume", WM9081_EQ_1, 6, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ3 Volume", WM9081_EQ_1, 1, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ4 Volume", WM9081_EQ_2, 11, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ5 Volume", WM9081_EQ_2, 6, 24, 0, eq_tlv),
+};
+
+static const struct snd_kcontrol_new mixer[] = {
+SOC_DAPM_SINGLE("IN1 Switch", WM9081_ANALOGUE_MIXER, 0, 1, 0),
+SOC_DAPM_SINGLE("IN2 Switch", WM9081_ANALOGUE_MIXER, 2, 1, 0),
+SOC_DAPM_SINGLE("Playback Switch", WM9081_ANALOGUE_MIXER, 4, 1, 0),
+};
+
+static int speaker_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	unsigned int reg = wm9081_read(codec, WM9081_POWER_MANAGEMENT);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		reg |= WM9081_SPK_ENA;
+		break;
+
+	case SND_SOC_DAPM_PRE_PMD:
+		reg &= ~WM9081_SPK_ENA;
+		break;
+	}
+
+	wm9081_write(codec, WM9081_POWER_MANAGEMENT, reg);
+
+	return 0;
+}
+
+struct _fll_div {
+	u16 fll_fratio;
+	u16 fll_outdiv;
+	u16 fll_clk_ref_div;
+	u16 n;
+	u16 k;
+};
+
+/* The size in bits of the FLL divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_FLL_SIZE ((1 << 16) * 10)
+
+static struct {
+	unsigned int min;
+	unsigned int max;
+	u16 fll_fratio;
+	int ratio;
+} fll_fratios[] = {
+	{       0,    64000, 4, 16 },
+	{   64000,   128000, 3,  8 },
+	{  128000,   256000, 2,  4 },
+	{  256000,  1000000, 1,  2 },
+	{ 1000000, 13500000, 0,  1 },
+};
+
+static int fll_factors(struct _fll_div *fll_div, unsigned int Fref,
+		       unsigned int Fout)
+{
+	u64 Kpart;
+	unsigned int K, Ndiv, Nmod, target;
+	unsigned int div;
+	int i;
+
+	/* Fref must be <=13.5MHz */
+	div = 1;
+	while ((Fref / div) > 13500000) {
+		div *= 2;
+
+		if (div > 8) {
+			pr_err("Can't scale %dMHz input down to <=13.5MHz\n",
+			       Fref);
+			return -EINVAL;
+		}
+	}
+	fll_div->fll_clk_ref_div = div / 2;
+
+	pr_debug("Fref=%u Fout=%u\n", Fref, Fout);
+
+	/* Apply the division for our remaining calculations */
+	Fref /= div;
+
+	/* Fvco should be 90-100MHz; don't check the upper bound */
+	div = 0;
+	target = Fout * 2;
+	while (target < 90000000) {
+		div++;
+		target *= 2;
+		if (div > 7) {
+			pr_err("Unable to find FLL_OUTDIV for Fout=%uHz\n",
+			       Fout);
+			return -EINVAL;
+		}
+	}
+	fll_div->fll_outdiv = div;
+
+	pr_debug("Fvco=%dHz\n", target);
+
+	/* Find an appropraite FLL_FRATIO and factor it out of the target */
+	for (i = 0; i < ARRAY_SIZE(fll_fratios); i++) {
+		if (fll_fratios[i].min <= Fref && Fref <= fll_fratios[i].max) {
+			fll_div->fll_fratio = fll_fratios[i].fll_fratio;
+			target /= fll_fratios[i].ratio;
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(fll_fratios)) {
+		pr_err("Unable to find FLL_FRATIO for Fref=%uHz\n", Fref);
+		return -EINVAL;
+	}
+
+	/* Now, calculate N.K */
+	Ndiv = target / Fref;
+
+	fll_div->n = Ndiv;
+	Nmod = target % Fref;
+	pr_debug("Nmod=%d\n", Nmod);
+
+	/* Calculate fractional part - scale up so we can round. */
+	Kpart = FIXED_FLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, Fref);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	fll_div->k = K / 10;
+
+	pr_debug("N=%x K=%x FLL_FRATIO=%x FLL_OUTDIV=%x FLL_CLK_REF_DIV=%x\n",
+		 fll_div->n, fll_div->k,
+		 fll_div->fll_fratio, fll_div->fll_outdiv,
+		 fll_div->fll_clk_ref_div);
+
+	return 0;
+}
+
+static int wm9081_set_fll(struct snd_soc_codec *codec, int fll_id,
+			  unsigned int Fref, unsigned int Fout)
+{
+	struct wm9081_priv *wm9081 = codec->private_data;
+	u16 reg1, reg4, reg5;
+	struct _fll_div fll_div;
+	int ret;
+	int clk_sys_reg;
+
+	/* Any change? */
+	if (Fref == wm9081->fll_fref && Fout == wm9081->fll_fout)
+		return 0;
+
+	/* Disable the FLL */
+	if (Fout == 0) {
+		dev_dbg(codec->dev, "FLL disabled\n");
+		wm9081->fll_fref = 0;
+		wm9081->fll_fout = 0;
+
+		return 0;
+	}
+
+	ret = fll_factors(&fll_div, Fref, Fout);
+	if (ret != 0)
+		return ret;
+
+	reg5 = wm9081_read(codec, WM9081_FLL_CONTROL_5);
+	reg5 &= ~WM9081_FLL_CLK_SRC_MASK;
+
+	switch (fll_id) {
+	case WM9081_SYSCLK_FLL_MCLK:
+		reg5 |= 0x1;
+		break;
+
+	default:
+		dev_err(codec->dev, "Unknown FLL ID %d\n", fll_id);
+		return -EINVAL;
+	}
+
+	/* Disable CLK_SYS while we reconfigure */
+	clk_sys_reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3);
+	if (clk_sys_reg & WM9081_CLK_SYS_ENA)
+		wm9081_write(codec, WM9081_CLOCK_CONTROL_3,
+			     clk_sys_reg & ~WM9081_CLK_SYS_ENA);
+
+	/* Any FLL configuration change requires that the FLL be
+	 * disabled first. */
+	reg1 = wm9081_read(codec, WM9081_FLL_CONTROL_1);
+	reg1 &= ~WM9081_FLL_ENA;
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1);
+
+	/* Apply the configuration */
+	if (fll_div.k)
+		reg1 |= WM9081_FLL_FRAC_MASK;
+	else
+		reg1 &= ~WM9081_FLL_FRAC_MASK;
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1);
+
+	wm9081_write(codec, WM9081_FLL_CONTROL_2,
+		     (fll_div.fll_outdiv << WM9081_FLL_OUTDIV_SHIFT) |
+		     (fll_div.fll_fratio << WM9081_FLL_FRATIO_SHIFT));
+	wm9081_write(codec, WM9081_FLL_CONTROL_3, fll_div.k);
+
+	reg4 = wm9081_read(codec, WM9081_FLL_CONTROL_4);
+	reg4 &= ~WM9081_FLL_N_MASK;
+	reg4 |= fll_div.n << WM9081_FLL_N_SHIFT;
+	wm9081_write(codec, WM9081_FLL_CONTROL_4, reg4);
+
+	reg5 &= ~WM9081_FLL_CLK_REF_DIV_MASK;
+	reg5 |= fll_div.fll_clk_ref_div << WM9081_FLL_CLK_REF_DIV_SHIFT;
+	wm9081_write(codec, WM9081_FLL_CONTROL_5, reg5);
+
+	/* Enable the FLL */
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1 | WM9081_FLL_ENA);
+
+	/* Then bring CLK_SYS up again if it was disabled */
+	if (clk_sys_reg & WM9081_CLK_SYS_ENA)
+		wm9081_write(codec, WM9081_CLOCK_CONTROL_3, clk_sys_reg);
+
+	dev_dbg(codec->dev, "FLL enabled at %dHz->%dHz\n", Fref, Fout);
+
+	wm9081->fll_fref = Fref;
+	wm9081->fll_fout = Fout;
+
+	return 0;
+}
+
+static int configure_clock(struct snd_soc_codec *codec)
+{
+	struct wm9081_priv *wm9081 = codec->private_data;
+	int new_sysclk, i, target;
+	unsigned int reg;
+	int ret = 0;
+	int mclkdiv = 0;
+	int fll = 0;
+
+	switch (wm9081->sysclk_source) {
+	case WM9081_SYSCLK_MCLK:
+		if (wm9081->mclk_rate > 12225000) {
+			mclkdiv = 1;
+			wm9081->sysclk_rate = wm9081->mclk_rate / 2;
+		} else {
+			wm9081->sysclk_rate = wm9081->mclk_rate;
+		}
+		wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK, 0, 0);
+		break;
+
+	case WM9081_SYSCLK_FLL_MCLK:
+		/* If we have a sample rate calculate a CLK_SYS that
+		 * gives us a suitable DAC configuration, plus BCLK.
+		 * Ideally we would check to see if we can clock
+		 * directly from MCLK and only use the FLL if this is
+		 * not the case, though care must be taken with free
+		 * running mode.
+		 */
+		if (wm9081->master && wm9081->bclk) {
+			/* Make sure we can generate CLK_SYS and BCLK
+			 * and that we've got 3MHz for optimal
+			 * performance. */
+			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
+				target = wm9081->fs * clk_sys_rates[i].ratio;
+				if (target >= wm9081->bclk &&
+				    target > 3000000)
+					new_sysclk = target;
+			}
+		} else if (wm9081->fs) {
+			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
+				new_sysclk = clk_sys_rates[i].ratio
+					* wm9081->fs;
+				if (new_sysclk > 3000000)
+					break;
+			}
+		} else {
+			new_sysclk = 12288000;
+		}
+
+		ret = wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK,
+				     wm9081->mclk_rate, new_sysclk);
+		if (ret == 0) {
+			wm9081->sysclk_rate = new_sysclk;
+
+			/* Switch SYSCLK over to FLL */
+			fll = 1;
+		} else {
+			wm9081->sysclk_rate = wm9081->mclk_rate;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_1);
+	if (mclkdiv)
+		reg |= WM9081_MCLKDIV2;
+	else
+		reg &= ~WM9081_MCLKDIV2;
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_1, reg);
+
+	reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3);
+	if (fll)
+		reg |= WM9081_CLK_SRC_SEL;
+	else
+		reg &= ~WM9081_CLK_SRC_SEL;
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_3, reg);
+
+	dev_dbg(codec->dev, "CLK_SYS is %dHz\n", wm9081->sysclk_rate);
+
+	return ret;
+}
+
+static int clk_sys_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+
+	/* This should be done on init() for bypass paths */
+	switch (wm9081->sysclk_source) {
+	case WM9081_SYSCLK_MCLK:
+		dev_dbg(codec->dev, "Using %dHz MCLK\n", wm9081->mclk_rate);
+		break;
+	case WM9081_SYSCLK_FLL_MCLK:
+		dev_dbg(codec->dev, "Using %dHz MCLK with FLL\n",
+			wm9081->mclk_rate);
+		break;
+	default:
+		dev_err(codec->dev, "System clock not configured\n");
+		return -EINVAL;
+	}
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		configure_clock(codec);
+		break;
+
+	case SND_SOC_DAPM_POST_PMD:
+		/* Disable the FLL if it's running */
+		wm9081_set_fll(codec, 0, 0, 0);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_widget wm9081_dapm_widgets[] = {
+SND_SOC_DAPM_INPUT("IN1"),
+SND_SOC_DAPM_INPUT("IN2"),
+
+SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM9081_POWER_MANAGEMENT, 0, 0),
+
+SND_SOC_DAPM_MIXER_NAMED_CTL("Mixer", SND_SOC_NOPM, 0, 0,
+			     mixer, ARRAY_SIZE(mixer)),
+
+SND_SOC_DAPM_PGA("LINEOUT PGA", WM9081_POWER_MANAGEMENT, 4, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA_E("Speaker PGA", WM9081_POWER_MANAGEMENT, 2, 0, NULL, 0,
+		   speaker_event,
+		   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+SND_SOC_DAPM_OUTPUT("LINEOUT"),
+SND_SOC_DAPM_OUTPUT("SPKN"),
+SND_SOC_DAPM_OUTPUT("SPKP"),
+
+SND_SOC_DAPM_SUPPLY("CLK_SYS", WM9081_CLOCK_CONTROL_3, 0, 0, clk_sys_event,
+		    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+SND_SOC_DAPM_SUPPLY("CLK_DSP", WM9081_CLOCK_CONTROL_3, 1, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("TOCLK", WM9081_CLOCK_CONTROL_3, 2, 0, NULL, 0),
+};
+
+
+static const struct snd_soc_dapm_route audio_paths[] = {
+	{ "DAC", NULL, "CLK_SYS" },
+	{ "DAC", NULL, "CLK_DSP" },
+
+	{ "Mixer", "IN1 Switch", "IN1" },
+	{ "Mixer", "IN2 Switch", "IN2" },
+	{ "Mixer", "Playback Switch", "DAC" },
+
+	{ "LINEOUT PGA", NULL, "Mixer" },
+	{ "LINEOUT PGA", NULL, "TOCLK" },
+	{ "LINEOUT PGA", NULL, "CLK_SYS" },
+
+	{ "LINEOUT", NULL, "LINEOUT PGA" },
+
+	{ "Speaker PGA", NULL, "Mixer" },
+	{ "Speaker PGA", NULL, "TOCLK" },
+	{ "Speaker PGA", NULL, "CLK_SYS" },
+
+	{ "SPKN", NULL, "Speaker PGA" },
+	{ "SPKP", NULL, "Speaker PGA" },
+};
+
+static int wm9081_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 reg;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* VMID=2*40k */
+		reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+		reg &= ~WM9081_VMID_SEL_MASK;
+		reg |= 0x2;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Normal bias current */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg &= ~WM9081_STBY_BIAS_ENA;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		/* Initial cold start */
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* Disable LINEOUT discharge */
+			reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL);
+			reg &= ~WM9081_LINEOUT_DISCH;
+			wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg);
+
+			/* Select startup bias source */
+			reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+			reg |= WM9081_BIAS_SRC | WM9081_BIAS_ENA;
+			wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+
+			/* VMID 2*4k; Soft VMID ramp enable */
+			reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+			reg |= WM9081_VMID_RAMP | 0x6;
+			wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+			mdelay(100);
+
+			/* Normal bias enable & soft start off */
+			reg |= WM9081_BIAS_ENA;
+			reg &= ~WM9081_VMID_RAMP;
+			wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+			/* Standard bias source */
+			reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+			reg &= ~WM9081_BIAS_SRC;
+			wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		}
+
+		/* VMID 2*240k */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg &= ~WM9081_VMID_SEL_MASK;
+		reg |= 0x40;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Standby bias current on */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg |= WM9081_STBY_BIAS_ENA;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		/* Startup bias source */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg |= WM9081_BIAS_SRC;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+
+		/* Disable VMID and biases with soft ramping */
+		reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+		reg &= ~(WM9081_VMID_SEL_MASK | WM9081_BIAS_ENA);
+		reg |= WM9081_VMID_RAMP;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Actively discharge LINEOUT */
+		reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL);
+		reg |= WM9081_LINEOUT_DISCH;
+		wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg);
+		break;
+	}
+
+	codec->bias_level = level;
+
+	return 0;
+}
+
+static int wm9081_set_dai_fmt(struct snd_soc_dai *dai,
+			      unsigned int fmt)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+	unsigned int aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2);
+
+	aif2 &= ~(WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV |
+		  WM9081_BCLK_DIR | WM9081_LRCLK_DIR | WM9081_AIF_FMT_MASK);
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+		wm9081->master = 0;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFM:
+		aif2 |= WM9081_LRCLK_DIR;
+		wm9081->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFS:
+		aif2 |= WM9081_BCLK_DIR;
+		wm9081->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFM:
+		aif2 |= WM9081_LRCLK_DIR | WM9081_BCLK_DIR;
+		wm9081->master = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_B:
+		aif2 |= WM9081_AIF_LRCLK_INV;
+	case SND_SOC_DAIFMT_DSP_A:
+		aif2 |= 0x3;
+		break;
+	case SND_SOC_DAIFMT_I2S:
+		aif2 |= 0x2;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		aif2 |= 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_A:
+	case SND_SOC_DAIFMT_DSP_B:
+		/* frame inversion not valid for DSP modes */
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			aif2 |= WM9081_AIF_BCLK_INV;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+
+	case SND_SOC_DAIFMT_I2S:
+	case SND_SOC_DAIFMT_RIGHT_J:
+	case SND_SOC_DAIFMT_LEFT_J:
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_IB_IF:
+			aif2 |= WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV;
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			aif2 |= WM9081_AIF_BCLK_INV;
+			break;
+		case SND_SOC_DAIFMT_NB_IF:
+			aif2 |= WM9081_AIF_LRCLK_INV;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2);
+
+	return 0;
+}
+
+static int wm9081_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+	int ret, i, best, best_val, cur_val;
+	unsigned int clk_ctrl2, aif1, aif2, aif3, aif4;
+
+	clk_ctrl2 = wm9081_read(codec, WM9081_CLOCK_CONTROL_2);
+	clk_ctrl2 &= ~(WM9081_CLK_SYS_RATE_MASK | WM9081_SAMPLE_RATE_MASK);
+
+	aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1);
+
+	aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2);
+	aif2 &= ~WM9081_AIF_WL_MASK;
+
+	aif3 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_3);
+	aif3 &= ~WM9081_BCLK_DIV_MASK;
+
+	aif4 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_4);
+	aif4 &= ~WM9081_LRCLK_RATE_MASK;
+
+	/* What BCLK do we need? */
+	wm9081->fs = params_rate(params);
+	wm9081->bclk = 2 * wm9081->fs;
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		wm9081->bclk *= 16;
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		wm9081->bclk *= 20;
+		aif2 |= 0x4;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		wm9081->bclk *= 24;
+		aif2 |= 0x8;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		wm9081->bclk *= 32;
+		aif2 |= 0xc;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (aif1 & WM9081_AIFDAC_TDM_MODE_MASK) {
+		int slots = ((aif1 & WM9081_AIFDAC_TDM_MODE_MASK) >>
+			     WM9081_AIFDAC_TDM_MODE_SHIFT) + 1;
+		wm9081->bclk *= slots;
+	}
+
+	dev_dbg(codec->dev, "Target BCLK is %dHz\n", wm9081->bclk);
+
+	ret = configure_clock(codec);
+	if (ret != 0)
+		return ret;
+
+	/* Select nearest CLK_SYS_RATE */
+	best = 0;
+	best_val = abs((wm9081->sysclk_rate / clk_sys_rates[0].ratio)
+		       - wm9081->fs);
+	for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
+		cur_val = abs((wm9081->sysclk_rate /
+			       clk_sys_rates[i].ratio) - wm9081->fs);;
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	dev_dbg(codec->dev, "Selected CLK_SYS_RATIO of %d\n",
+		clk_sys_rates[best].ratio);
+	clk_ctrl2 |= (clk_sys_rates[best].clk_sys_rate
+		      << WM9081_CLK_SYS_RATE_SHIFT);
+
+	/* SAMPLE_RATE */
+	best = 0;
+	best_val = abs(wm9081->fs - sample_rates[0].rate);
+	for (i = 1; i < ARRAY_SIZE(sample_rates); i++) {
+		/* Closest match */
+		cur_val = abs(wm9081->fs - sample_rates[i].rate);
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	dev_dbg(codec->dev, "Selected SAMPLE_RATE of %dHz\n",
+		sample_rates[best].rate);
+	clk_ctrl2 |= (sample_rates[i].sample_rate << WM9081_SAMPLE_RATE_SHIFT);
+
+	/* BCLK_DIV */
+	best = 0;
+	best_val = INT_MAX;
+	for (i = 0; i < ARRAY_SIZE(bclk_divs); i++) {
+		cur_val = ((wm9081->sysclk_rate * 10) / bclk_divs[i].div)
+			- wm9081->bclk;
+		if (cur_val < 0) /* Table is sorted */
+			break;
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	wm9081->bclk = (wm9081->sysclk_rate * 10) / bclk_divs[best].div;
+	dev_dbg(codec->dev, "Selected BCLK_DIV of %d for %dHz BCLK\n",
+		bclk_divs[best].div, wm9081->bclk);
+	aif3 |= bclk_divs[best].bclk_div;
+
+	/* LRCLK is a simple fraction of BCLK */
+	dev_dbg(codec->dev, "LRCLK_RATE is %d\n", wm9081->bclk / wm9081->fs);
+	aif4 |= wm9081->bclk / wm9081->fs;
+
+	/* Apply a ReTune Mobile configuration if it's in use */
+	if (wm9081->retune) {
+		struct wm9081_retune_mobile_config *retune = wm9081->retune;
+		struct wm9081_retune_mobile_setting *s;
+		int eq1;
+
+		best = 0;
+		best_val = abs(retune->configs[0].rate - wm9081->fs);
+		for (i = 0; i < retune->num_configs; i++) {
+			cur_val = abs(retune->configs[i].rate - wm9081->fs);
+			if (cur_val < best_val) {
+				best_val = cur_val;
+				best = i;
+			}
+		}
+		s = &retune->configs[best];
+
+		dev_dbg(codec->dev, "ReTune Mobile %s tuned for %dHz\n",
+			s->name, s->rate);
+
+		/* If the EQ is enabled then disable it while we write out */
+		eq1 = wm9081_read(codec, WM9081_EQ_1) & WM9081_EQ_ENA;
+		if (eq1 & WM9081_EQ_ENA)
+			wm9081_write(codec, WM9081_EQ_1, 0);
+
+		/* Write out the other values */
+		for (i = 1; i < ARRAY_SIZE(s->config); i++)
+			wm9081_write(codec, WM9081_EQ_1 + i, s->config[i]);
+
+		eq1 |= (s->config[0] & ~WM9081_EQ_ENA);
+		wm9081_write(codec, WM9081_EQ_1, eq1);
+	}
+
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_2, clk_ctrl2);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_3, aif3);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_4, aif4);
+
+	return 0;
+}
+
+static int wm9081_digital_mute(struct snd_soc_dai *codec_dai, int mute)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	unsigned int reg;
+
+	reg = wm9081_read(codec, WM9081_DAC_DIGITAL_2);
+
+	if (mute)
+		reg |= WM9081_DAC_MUTE;
+	else
+		reg &= ~WM9081_DAC_MUTE;
+
+	wm9081_write(codec, WM9081_DAC_DIGITAL_2, reg);
+
+	return 0;
+}
+
+static int wm9081_set_sysclk(struct snd_soc_dai *codec_dai,
+			     int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+
+	switch (clk_id) {
+	case WM9081_SYSCLK_MCLK:
+	case WM9081_SYSCLK_FLL_MCLK:
+		wm9081->sysclk_source = clk_id;
+		wm9081->mclk_rate = freq;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm9081_set_tdm_slot(struct snd_soc_dai *dai,
+			       unsigned int mask, int slots)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	unsigned int aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1);
+
+	aif1 &= ~(WM9081_AIFDAC_TDM_SLOT_MASK | WM9081_AIFDAC_TDM_MODE_MASK);
+
+	if (slots < 1 || slots > 4)
+		return -EINVAL;
+
+	aif1 |= (slots - 1) << WM9081_AIFDAC_TDM_MODE_SHIFT;
+
+	switch (mask) {
+	case 1:
+		break;
+	case 2:
+		aif1 |= 0x10;
+		break;
+	case 4:
+		aif1 |= 0x20;
+		break;
+	case 8:
+		aif1 |= 0x30;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_1, aif1);
+
+	return 0;
+}
+
+#define WM9081_RATES SNDRV_PCM_RATE_8000_96000
+
+#define WM9081_FORMATS \
+	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+	 SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops wm9081_dai_ops = {
+	.hw_params = wm9081_hw_params,
+	.set_sysclk = wm9081_set_sysclk,
+	.set_fmt = wm9081_set_dai_fmt,
+	.digital_mute = wm9081_digital_mute,
+	.set_tdm_slot = wm9081_set_tdm_slot,
+};
+
+/* We report two channels because the CODEC processes a stereo signal, even
+ * though it is only capable of handling a mono output.
+ */
+struct snd_soc_dai wm9081_dai = {
+	.name = "WM9081",
+	.playback = {
+		.stream_name = "HiFi Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM9081_RATES,
+		.formats = WM9081_FORMATS,
+	},
+	.ops = &wm9081_dai_ops,
+};
+EXPORT_SYMBOL_GPL(wm9081_dai);
+
+
+static struct snd_soc_codec *wm9081_codec;
+
+static int wm9081_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	struct wm9081_priv *wm9081;
+	int ret = 0;
+
+	if (wm9081_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm9081_codec;
+	codec = wm9081_codec;
+	wm9081 = codec->private_data;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm9081_snd_controls,
+			     ARRAY_SIZE(wm9081_snd_controls));
+	if (!wm9081->retune) {
+		dev_dbg(codec->dev,
+			"No ReTune Mobile data, using normal EQ\n");
+		snd_soc_add_controls(codec, wm9081_eq_controls,
+				     ARRAY_SIZE(wm9081_eq_controls));
+	}
+
+	snd_soc_dapm_new_controls(codec, wm9081_dapm_widgets,
+				  ARRAY_SIZE(wm9081_dapm_widgets));
+	snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths));
+	snd_soc_dapm_new_widgets(codec);
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm9081_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int wm9081_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+	return 0;
+}
+
+static int wm9081_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 *reg_cache = codec->reg_cache;
+	int i;
+
+	for (i = 0; i < codec->reg_cache_size; i++) {
+		if (i == WM9081_SOFTWARE_RESET)
+			continue;
+
+		wm9081_write(codec, i, reg_cache[i]);
+	}
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	return 0;
+}
+#else
+#define wm9081_suspend NULL
+#define wm9081_resume NULL
+#endif
+
+struct snd_soc_codec_device soc_codec_dev_wm9081 = {
+	.probe = 	wm9081_probe,
+	.remove = 	wm9081_remove,
+	.suspend =	wm9081_suspend,
+	.resume =	wm9081_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm9081);
+
+static int wm9081_register(struct wm9081_priv *wm9081)
+{
+	struct snd_soc_codec *codec = &wm9081->codec;
+	int ret;
+	u16 reg;
+
+	if (wm9081_codec) {
+		dev_err(codec->dev, "Another WM9081 is registered\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm9081;
+	codec->name = "WM9081";
+	codec->owner = THIS_MODULE;
+	codec->read = wm9081_read;
+	codec->write = wm9081_write;
+	codec->dai = &wm9081_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm9081->reg_cache);
+	codec->reg_cache = &wm9081->reg_cache;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm9081_set_bias_level;
+
+	memcpy(codec->reg_cache, wm9081_reg_defaults,
+	       sizeof(wm9081_reg_defaults));
+
+	reg = wm9081_read_hw(codec, WM9081_SOFTWARE_RESET);
+	if (reg != 0x9081) {
+		dev_err(codec->dev, "Device is not a WM9081: ID=0x%x\n", reg);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = wm9081_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	/* Enable zero cross by default */
+	reg = wm9081_read(codec, WM9081_ANALOGUE_LINEOUT);
+	wm9081_write(codec, WM9081_ANALOGUE_LINEOUT, reg | WM9081_LINEOUTZC);
+	reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_PGA);
+	wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_PGA,
+		     reg | WM9081_SPKPGAZC);
+
+	wm9081_dai.dev = codec->dev;
+
+	wm9081_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm9081_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+
+err:
+	kfree(wm9081);
+	return ret;
+}
+
+static void wm9081_unregister(struct wm9081_priv *wm9081)
+{
+	wm9081_set_bias_level(&wm9081->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm9081_dai);
+	snd_soc_unregister_codec(&wm9081->codec);
+	kfree(wm9081);
+	wm9081_codec = NULL;
+}
+
+static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
+				      const struct i2c_device_id *id)
+{
+	struct wm9081_priv *wm9081;
+	struct snd_soc_codec *codec;
+
+	wm9081 = kzalloc(sizeof(struct wm9081_priv), GFP_KERNEL);
+	if (wm9081 == NULL)
+		return -ENOMEM;
+
+	codec = &wm9081->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+	wm9081->retune = i2c->dev.platform_data;
+
+	i2c_set_clientdata(i2c, wm9081);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm9081_register(wm9081);
+}
+
+static __devexit int wm9081_i2c_remove(struct i2c_client *client)
+{
+	struct wm9081_priv *wm9081 = i2c_get_clientdata(client);
+	wm9081_unregister(wm9081);
+	return 0;
+}
+
+static const struct i2c_device_id wm9081_i2c_id[] = {
+	{ "wm9081", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm9081_i2c_id);
+
+static struct i2c_driver wm9081_i2c_driver = {
+	.driver = {
+		.name = "wm9081",
+		.owner = THIS_MODULE,
+	},
+	.probe =    wm9081_i2c_probe,
+	.remove =   __devexit_p(wm9081_i2c_remove),
+	.id_table = wm9081_i2c_id,
+};
+
+static int __init wm9081_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm9081_i2c_driver);
+	if (ret != 0) {
+		printk(KERN_ERR "Failed to register WM9081 I2C driver: %d\n",
+		       ret);
+	}
+
+	return ret;
+}
+module_init(wm9081_modinit);
+
+static void __exit wm9081_exit(void)
+{
+	i2c_del_driver(&wm9081_i2c_driver);
+}
+module_exit(wm9081_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM9081 driver");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm9081.h b/sound/soc/codecs/wm9081.h
new file mode 100644
index 0000000..42d3bc7
--- /dev/null
+++ b/sound/soc/codecs/wm9081.h
@@ -0,0 +1,787 @@
+#ifndef WM9081_H
+#define WM9081_H
+
+/*
+ * wm9081.c  --  WM9081 ALSA SoC Audio driver
+ *
+ * Author: Mark Brown
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <sound/soc.h>
+
+extern struct snd_soc_dai wm9081_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm9081;
+
+/*
+ * SYSCLK sources
+ */
+#define WM9081_SYSCLK_MCLK      1   /* Use MCLK without FLL */
+#define WM9081_SYSCLK_FLL_MCLK  2   /* Use MCLK, enabling FLL if required */
+
+/*
+ * Register values.
+ */
+#define WM9081_SOFTWARE_RESET                   0x00
+#define WM9081_ANALOGUE_LINEOUT                 0x02
+#define WM9081_ANALOGUE_SPEAKER_PGA             0x03
+#define WM9081_VMID_CONTROL                     0x04
+#define WM9081_BIAS_CONTROL_1                   0x05
+#define WM9081_ANALOGUE_MIXER                   0x07
+#define WM9081_ANTI_POP_CONTROL                 0x08
+#define WM9081_ANALOGUE_SPEAKER_1               0x09
+#define WM9081_ANALOGUE_SPEAKER_2               0x0A
+#define WM9081_POWER_MANAGEMENT                 0x0B
+#define WM9081_CLOCK_CONTROL_1                  0x0C
+#define WM9081_CLOCK_CONTROL_2                  0x0D
+#define WM9081_CLOCK_CONTROL_3                  0x0E
+#define WM9081_FLL_CONTROL_1                    0x10
+#define WM9081_FLL_CONTROL_2                    0x11
+#define WM9081_FLL_CONTROL_3                    0x12
+#define WM9081_FLL_CONTROL_4                    0x13
+#define WM9081_FLL_CONTROL_5                    0x14
+#define WM9081_AUDIO_INTERFACE_1                0x16
+#define WM9081_AUDIO_INTERFACE_2                0x17
+#define WM9081_AUDIO_INTERFACE_3                0x18
+#define WM9081_AUDIO_INTERFACE_4                0x19
+#define WM9081_INTERRUPT_STATUS                 0x1A
+#define WM9081_INTERRUPT_STATUS_MASK            0x1B
+#define WM9081_INTERRUPT_POLARITY               0x1C
+#define WM9081_INTERRUPT_CONTROL                0x1D
+#define WM9081_DAC_DIGITAL_1                    0x1E
+#define WM9081_DAC_DIGITAL_2                    0x1F
+#define WM9081_DRC_1                            0x20
+#define WM9081_DRC_2                            0x21
+#define WM9081_DRC_3                            0x22
+#define WM9081_DRC_4                            0x23
+#define WM9081_WRITE_SEQUENCER_1                0x26
+#define WM9081_WRITE_SEQUENCER_2                0x27
+#define WM9081_MW_SLAVE_1                       0x28
+#define WM9081_EQ_1                             0x2A
+#define WM9081_EQ_2                             0x2B
+#define WM9081_EQ_3                             0x2C
+#define WM9081_EQ_4                             0x2D
+#define WM9081_EQ_5                             0x2E
+#define WM9081_EQ_6                             0x2F
+#define WM9081_EQ_7                             0x30
+#define WM9081_EQ_8                             0x31
+#define WM9081_EQ_9                             0x32
+#define WM9081_EQ_10                            0x33
+#define WM9081_EQ_11                            0x34
+#define WM9081_EQ_12                            0x35
+#define WM9081_EQ_13                            0x36
+#define WM9081_EQ_14                            0x37
+#define WM9081_EQ_15                            0x38
+#define WM9081_EQ_16                            0x39
+#define WM9081_EQ_17                            0x3A
+#define WM9081_EQ_18                            0x3B
+#define WM9081_EQ_19                            0x3C
+#define WM9081_EQ_20                            0x3D
+
+#define WM9081_REGISTER_COUNT                   55
+#define WM9081_MAX_REGISTER                     0x3D
+
+/*
+ * Field Definitions.
+ */
+
+/*
+ * R0 (0x00) - Software Reset
+ */
+#define WM9081_SW_RST_DEV_ID1_MASK              0xFFFF  /* SW_RST_DEV_ID1 - [15:0] */
+#define WM9081_SW_RST_DEV_ID1_SHIFT                  0  /* SW_RST_DEV_ID1 - [15:0] */
+#define WM9081_SW_RST_DEV_ID1_WIDTH                 16  /* SW_RST_DEV_ID1 - [15:0] */
+
+/*
+ * R2 (0x02) - Analogue Lineout
+ */
+#define WM9081_LINEOUT_MUTE                     0x0080  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_MASK                0x0080  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_SHIFT                    7  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_WIDTH                    1  /* LINEOUT_MUTE */
+#define WM9081_LINEOUTZC                        0x0040  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_MASK                   0x0040  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_SHIFT                       6  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_WIDTH                       1  /* LINEOUTZC */
+#define WM9081_LINEOUT_VOL_MASK                 0x003F  /* LINEOUT_VOL - [5:0] */
+#define WM9081_LINEOUT_VOL_SHIFT                     0  /* LINEOUT_VOL - [5:0] */
+#define WM9081_LINEOUT_VOL_WIDTH                     6  /* LINEOUT_VOL - [5:0] */
+
+/*
+ * R3 (0x03) - Analogue Speaker PGA
+ */
+#define WM9081_SPKPGA_MUTE                      0x0080  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_MASK                 0x0080  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_SHIFT                     7  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_WIDTH                     1  /* SPKPGA_MUTE */
+#define WM9081_SPKPGAZC                         0x0040  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_MASK                    0x0040  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_SHIFT                        6  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_WIDTH                        1  /* SPKPGAZC */
+#define WM9081_SPKPGA_VOL_MASK                  0x003F  /* SPKPGA_VOL - [5:0] */
+#define WM9081_SPKPGA_VOL_SHIFT                      0  /* SPKPGA_VOL - [5:0] */
+#define WM9081_SPKPGA_VOL_WIDTH                      6  /* SPKPGA_VOL - [5:0] */
+
+/*
+ * R4 (0x04) - VMID Control
+ */
+#define WM9081_VMID_BUF_ENA                     0x0020  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_MASK                0x0020  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_SHIFT                    5  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_WIDTH                    1  /* VMID_BUF_ENA */
+#define WM9081_VMID_RAMP                        0x0008  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_MASK                   0x0008  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_SHIFT                       3  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_WIDTH                       1  /* VMID_RAMP */
+#define WM9081_VMID_SEL_MASK                    0x0006  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_SEL_SHIFT                        1  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_SEL_WIDTH                        2  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_FAST_ST                     0x0001  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_MASK                0x0001  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_SHIFT                    0  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_WIDTH                    1  /* VMID_FAST_ST */
+
+/*
+ * R5 (0x05) - Bias Control 1
+ */
+#define WM9081_BIAS_SRC                         0x0040  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_MASK                    0x0040  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_SHIFT                        6  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_WIDTH                        1  /* BIAS_SRC */
+#define WM9081_STBY_BIAS_LVL                    0x0020  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_MASK               0x0020  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_SHIFT                   5  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_WIDTH                   1  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_ENA                    0x0010  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_MASK               0x0010  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_SHIFT                   4  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_WIDTH                   1  /* STBY_BIAS_ENA */
+#define WM9081_BIAS_LVL_MASK                    0x000C  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_LVL_SHIFT                        2  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_LVL_WIDTH                        2  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_ENA                         0x0002  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_MASK                    0x0002  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_SHIFT                        1  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_WIDTH                        1  /* BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA                 0x0001  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_MASK            0x0001  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_SHIFT                0  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_WIDTH                1  /* STARTUP_BIAS_ENA */
+
+/*
+ * R7 (0x07) - Analogue Mixer
+ */
+#define WM9081_DAC_SEL                          0x0010  /* DAC_SEL */
+#define WM9081_DAC_SEL_MASK                     0x0010  /* DAC_SEL */
+#define WM9081_DAC_SEL_SHIFT                         4  /* DAC_SEL */
+#define WM9081_DAC_SEL_WIDTH                         1  /* DAC_SEL */
+#define WM9081_IN2_VOL                          0x0008  /* IN2_VOL */
+#define WM9081_IN2_VOL_MASK                     0x0008  /* IN2_VOL */
+#define WM9081_IN2_VOL_SHIFT                         3  /* IN2_VOL */
+#define WM9081_IN2_VOL_WIDTH                         1  /* IN2_VOL */
+#define WM9081_IN2_ENA                          0x0004  /* IN2_ENA */
+#define WM9081_IN2_ENA_MASK                     0x0004  /* IN2_ENA */
+#define WM9081_IN2_ENA_SHIFT                         2  /* IN2_ENA */
+#define WM9081_IN2_ENA_WIDTH                         1  /* IN2_ENA */
+#define WM9081_IN1_VOL                          0x0002  /* IN1_VOL */
+#define WM9081_IN1_VOL_MASK                     0x0002  /* IN1_VOL */
+#define WM9081_IN1_VOL_SHIFT                         1  /* IN1_VOL */
+#define WM9081_IN1_VOL_WIDTH                         1  /* IN1_VOL */
+#define WM9081_IN1_ENA                          0x0001  /* IN1_ENA */
+#define WM9081_IN1_ENA_MASK                     0x0001  /* IN1_ENA */
+#define WM9081_IN1_ENA_SHIFT                         0  /* IN1_ENA */
+#define WM9081_IN1_ENA_WIDTH                         1  /* IN1_ENA */
+
+/*
+ * R8 (0x08) - Anti Pop Control
+ */
+#define WM9081_LINEOUT_DISCH                    0x0004  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_MASK               0x0004  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_SHIFT                   2  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_WIDTH                   1  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_VROI                     0x0002  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_MASK                0x0002  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_SHIFT                    1  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_WIDTH                    1  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_CLAMP                    0x0001  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_MASK               0x0001  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_SHIFT                   0  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_WIDTH                   1  /* LINEOUT_CLAMP */
+
+/*
+ * R9 (0x09) - Analogue Speaker 1
+ */
+#define WM9081_SPK_DCGAIN_MASK                  0x0038  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_DCGAIN_SHIFT                      3  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_DCGAIN_WIDTH                      3  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_ACGAIN_MASK                  0x0007  /* SPK_ACGAIN - [2:0] */
+#define WM9081_SPK_ACGAIN_SHIFT                      0  /* SPK_ACGAIN - [2:0] */
+#define WM9081_SPK_ACGAIN_WIDTH                      3  /* SPK_ACGAIN - [2:0] */
+
+/*
+ * R10 (0x0A) - Analogue Speaker 2
+ */
+#define WM9081_SPK_MODE                         0x0040  /* SPK_MODE */
+#define WM9081_SPK_MODE_MASK                    0x0040  /* SPK_MODE */
+#define WM9081_SPK_MODE_SHIFT                        6  /* SPK_MODE */
+#define WM9081_SPK_MODE_WIDTH                        1  /* SPK_MODE */
+#define WM9081_SPK_INV_MUTE                     0x0010  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_MASK                0x0010  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_SHIFT                    4  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_WIDTH                    1  /* SPK_INV_MUTE */
+#define WM9081_OUT_SPK_CTRL                     0x0008  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_MASK                0x0008  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_SHIFT                    3  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_WIDTH                    1  /* OUT_SPK_CTRL */
+
+/*
+ * R11 (0x0B) - Power Management
+ */
+#define WM9081_TSHUT_ENA                        0x0100  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_MASK                   0x0100  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_SHIFT                       8  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_WIDTH                       1  /* TSHUT_ENA */
+#define WM9081_TSENSE_ENA                       0x0080  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_MASK                  0x0080  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_SHIFT                      7  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_WIDTH                      1  /* TSENSE_ENA */
+#define WM9081_TEMP_SHUT                        0x0040  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_MASK                   0x0040  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_SHIFT                       6  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_WIDTH                       1  /* TEMP_SHUT */
+#define WM9081_LINEOUT_ENA                      0x0010  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_MASK                 0x0010  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_SHIFT                     4  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_WIDTH                     1  /* LINEOUT_ENA */
+#define WM9081_SPKPGA_ENA                       0x0004  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_MASK                  0x0004  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_SHIFT                      2  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_WIDTH                      1  /* SPKPGA_ENA */
+#define WM9081_SPK_ENA                          0x0002  /* SPK_ENA */
+#define WM9081_SPK_ENA_MASK                     0x0002  /* SPK_ENA */
+#define WM9081_SPK_ENA_SHIFT                         1  /* SPK_ENA */
+#define WM9081_SPK_ENA_WIDTH                         1  /* SPK_ENA */
+#define WM9081_DAC_ENA                          0x0001  /* DAC_ENA */
+#define WM9081_DAC_ENA_MASK                     0x0001  /* DAC_ENA */
+#define WM9081_DAC_ENA_SHIFT                         0  /* DAC_ENA */
+#define WM9081_DAC_ENA_WIDTH                         1  /* DAC_ENA */
+
+/*
+ * R12 (0x0C) - Clock Control 1
+ */
+#define WM9081_CLK_OP_DIV_MASK                  0x1C00  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_OP_DIV_SHIFT                     10  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_OP_DIV_WIDTH                      3  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_TO_DIV_MASK                  0x0300  /* CLK_TO_DIV - [9:8] */
+#define WM9081_CLK_TO_DIV_SHIFT                      8  /* CLK_TO_DIV - [9:8] */
+#define WM9081_CLK_TO_DIV_WIDTH                      2  /* CLK_TO_DIV - [9:8] */
+#define WM9081_MCLKDIV2                         0x0080  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_MASK                    0x0080  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_SHIFT                        7  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_WIDTH                        1  /* MCLKDIV2 */
+
+/*
+ * R13 (0x0D) - Clock Control 2
+ */
+#define WM9081_CLK_SYS_RATE_MASK                0x00F0  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_CLK_SYS_RATE_SHIFT                    4  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_CLK_SYS_RATE_WIDTH                    4  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_SAMPLE_RATE_MASK                 0x000F  /* SAMPLE_RATE - [3:0] */
+#define WM9081_SAMPLE_RATE_SHIFT                     0  /* SAMPLE_RATE - [3:0] */
+#define WM9081_SAMPLE_RATE_WIDTH                     4  /* SAMPLE_RATE - [3:0] */
+
+/*
+ * R14 (0x0E) - Clock Control 3
+ */
+#define WM9081_CLK_SRC_SEL                      0x2000  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_MASK                 0x2000  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_SHIFT                    13  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_WIDTH                     1  /* CLK_SRC_SEL */
+#define WM9081_CLK_OP_ENA                       0x0020  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_MASK                  0x0020  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_SHIFT                      5  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_WIDTH                      1  /* CLK_OP_ENA */
+#define WM9081_CLK_TO_ENA                       0x0004  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_MASK                  0x0004  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_SHIFT                      2  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_WIDTH                      1  /* CLK_TO_ENA */
+#define WM9081_CLK_DSP_ENA                      0x0002  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_MASK                 0x0002  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_SHIFT                     1  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_WIDTH                     1  /* CLK_DSP_ENA */
+#define WM9081_CLK_SYS_ENA                      0x0001  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_MASK                 0x0001  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_SHIFT                     0  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_WIDTH                     1  /* CLK_SYS_ENA */
+
+/*
+ * R16 (0x10) - FLL Control 1
+ */
+#define WM9081_FLL_HOLD                         0x0008  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_MASK                    0x0008  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_SHIFT                        3  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_WIDTH                        1  /* FLL_HOLD */
+#define WM9081_FLL_FRAC                         0x0004  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_MASK                    0x0004  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_SHIFT                        2  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_WIDTH                        1  /* FLL_FRAC */
+#define WM9081_FLL_ENA                          0x0001  /* FLL_ENA */
+#define WM9081_FLL_ENA_MASK                     0x0001  /* FLL_ENA */
+#define WM9081_FLL_ENA_SHIFT                         0  /* FLL_ENA */
+#define WM9081_FLL_ENA_WIDTH                         1  /* FLL_ENA */
+
+/*
+ * R17 (0x11) - FLL Control 2
+ */
+#define WM9081_FLL_OUTDIV_MASK                  0x0700  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_OUTDIV_SHIFT                      8  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_OUTDIV_WIDTH                      3  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_CTRL_RATE_MASK               0x0070  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_CTRL_RATE_SHIFT                   4  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_CTRL_RATE_WIDTH                   3  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_FRATIO_MASK                  0x0007  /* FLL_FRATIO - [2:0] */
+#define WM9081_FLL_FRATIO_SHIFT                      0  /* FLL_FRATIO - [2:0] */
+#define WM9081_FLL_FRATIO_WIDTH                      3  /* FLL_FRATIO - [2:0] */
+
+/*
+ * R18 (0x12) - FLL Control 3
+ */
+#define WM9081_FLL_K_MASK                       0xFFFF  /* FLL_K - [15:0] */
+#define WM9081_FLL_K_SHIFT                           0  /* FLL_K - [15:0] */
+#define WM9081_FLL_K_WIDTH                          16  /* FLL_K - [15:0] */
+
+/*
+ * R19 (0x13) - FLL Control 4
+ */
+#define WM9081_FLL_N_MASK                       0x7FE0  /* FLL_N - [14:5] */
+#define WM9081_FLL_N_SHIFT                           5  /* FLL_N - [14:5] */
+#define WM9081_FLL_N_WIDTH                          10  /* FLL_N - [14:5] */
+#define WM9081_FLL_GAIN_MASK                    0x000F  /* FLL_GAIN - [3:0] */
+#define WM9081_FLL_GAIN_SHIFT                        0  /* FLL_GAIN - [3:0] */
+#define WM9081_FLL_GAIN_WIDTH                        4  /* FLL_GAIN - [3:0] */
+
+/*
+ * R20 (0x14) - FLL Control 5
+ */
+#define WM9081_FLL_CLK_REF_DIV_MASK             0x0018  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_REF_DIV_SHIFT                 3  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_REF_DIV_WIDTH                 2  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_SRC_MASK                 0x0003  /* FLL_CLK_SRC - [1:0] */
+#define WM9081_FLL_CLK_SRC_SHIFT                     0  /* FLL_CLK_SRC - [1:0] */
+#define WM9081_FLL_CLK_SRC_WIDTH                     2  /* FLL_CLK_SRC - [1:0] */
+
+/*
+ * R22 (0x16) - Audio Interface 1
+ */
+#define WM9081_AIFDAC_CHAN                      0x0040  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_MASK                 0x0040  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_SHIFT                     6  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_WIDTH                     1  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_TDM_SLOT_MASK             0x0030  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_SLOT_SHIFT                 4  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_SLOT_WIDTH                 2  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_MODE_MASK             0x000C  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_AIFDAC_TDM_MODE_SHIFT                 2  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_AIFDAC_TDM_MODE_WIDTH                 2  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_DAC_COMP                         0x0002  /* DAC_COMP */
+#define WM9081_DAC_COMP_MASK                    0x0002  /* DAC_COMP */
+#define WM9081_DAC_COMP_SHIFT                        1  /* DAC_COMP */
+#define WM9081_DAC_COMP_WIDTH                        1  /* DAC_COMP */
+#define WM9081_DAC_COMPMODE                     0x0001  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_MASK                0x0001  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_SHIFT                    0  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_WIDTH                    1  /* DAC_COMPMODE */
+
+/*
+ * R23 (0x17) - Audio Interface 2
+ */
+#define WM9081_AIF_TRIS                         0x0200  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_MASK                    0x0200  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_SHIFT                        9  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_WIDTH                        1  /* AIF_TRIS */
+#define WM9081_DAC_DAT_INV                      0x0100  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_MASK                 0x0100  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_SHIFT                     8  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_WIDTH                     1  /* DAC_DAT_INV */
+#define WM9081_AIF_BCLK_INV                     0x0080  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_MASK                0x0080  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_SHIFT                    7  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_WIDTH                    1  /* AIF_BCLK_INV */
+#define WM9081_BCLK_DIR                         0x0040  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_MASK                    0x0040  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_SHIFT                        6  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_WIDTH                        1  /* BCLK_DIR */
+#define WM9081_LRCLK_DIR                        0x0020  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_MASK                   0x0020  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_SHIFT                       5  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_WIDTH                       1  /* LRCLK_DIR */
+#define WM9081_AIF_LRCLK_INV                    0x0010  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_MASK               0x0010  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_SHIFT                   4  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_WIDTH                   1  /* AIF_LRCLK_INV */
+#define WM9081_AIF_WL_MASK                      0x000C  /* AIF_WL - [3:2] */
+#define WM9081_AIF_WL_SHIFT                          2  /* AIF_WL - [3:2] */
+#define WM9081_AIF_WL_WIDTH                          2  /* AIF_WL - [3:2] */
+#define WM9081_AIF_FMT_MASK                     0x0003  /* AIF_FMT - [1:0] */
+#define WM9081_AIF_FMT_SHIFT                         0  /* AIF_FMT - [1:0] */
+#define WM9081_AIF_FMT_WIDTH                         2  /* AIF_FMT - [1:0] */
+
+/*
+ * R24 (0x18) - Audio Interface 3
+ */
+#define WM9081_BCLK_DIV_MASK                    0x001F  /* BCLK_DIV - [4:0] */
+#define WM9081_BCLK_DIV_SHIFT                        0  /* BCLK_DIV - [4:0] */
+#define WM9081_BCLK_DIV_WIDTH                        5  /* BCLK_DIV - [4:0] */
+
+/*
+ * R25 (0x19) - Audio Interface 4
+ */
+#define WM9081_LRCLK_RATE_MASK                  0x07FF  /* LRCLK_RATE - [10:0] */
+#define WM9081_LRCLK_RATE_SHIFT                      0  /* LRCLK_RATE - [10:0] */
+#define WM9081_LRCLK_RATE_WIDTH                     11  /* LRCLK_RATE - [10:0] */
+
+/*
+ * R26 (0x1A) - Interrupt Status
+ */
+#define WM9081_WSEQ_BUSY_EINT                   0x0004  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_MASK              0x0004  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_SHIFT                  2  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_WIDTH                  1  /* WSEQ_BUSY_EINT */
+#define WM9081_TSHUT_EINT                       0x0001  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_MASK                  0x0001  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_SHIFT                      0  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_WIDTH                      1  /* TSHUT_EINT */
+
+/*
+ * R27 (0x1B) - Interrupt Status Mask
+ */
+#define WM9081_IM_WSEQ_BUSY_EINT                0x0004  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_MASK           0x0004  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_SHIFT               2  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_WIDTH               1  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_TSHUT_EINT                    0x0001  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_MASK               0x0001  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_SHIFT                   0  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_WIDTH                   1  /* IM_TSHUT_EINT */
+
+/*
+ * R28 (0x1C) - Interrupt Polarity
+ */
+#define WM9081_TSHUT_INV                        0x0001  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_MASK                   0x0001  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_SHIFT                       0  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_WIDTH                       1  /* TSHUT_INV */
+
+/*
+ * R29 (0x1D) - Interrupt Control
+ */
+#define WM9081_IRQ_POL                          0x8000  /* IRQ_POL */
+#define WM9081_IRQ_POL_MASK                     0x8000  /* IRQ_POL */
+#define WM9081_IRQ_POL_SHIFT                        15  /* IRQ_POL */
+#define WM9081_IRQ_POL_WIDTH                         1  /* IRQ_POL */
+#define WM9081_IRQ_OP_CTRL                      0x0001  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_MASK                 0x0001  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_SHIFT                     0  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_WIDTH                     1  /* IRQ_OP_CTRL */
+
+/*
+ * R30 (0x1E) - DAC Digital 1
+ */
+#define WM9081_DAC_VOL_MASK                     0x00FF  /* DAC_VOL - [7:0] */
+#define WM9081_DAC_VOL_SHIFT                         0  /* DAC_VOL - [7:0] */
+#define WM9081_DAC_VOL_WIDTH                         8  /* DAC_VOL - [7:0] */
+
+/*
+ * R31 (0x1F) - DAC Digital 2
+ */
+#define WM9081_DAC_MUTERATE                     0x0400  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_MASK                0x0400  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_SHIFT                   10  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_WIDTH                    1  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTEMODE                     0x0200  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_MASK                0x0200  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_SHIFT                    9  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_WIDTH                    1  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTE                         0x0008  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_MASK                    0x0008  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_SHIFT                        3  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_WIDTH                        1  /* DAC_MUTE */
+#define WM9081_DEEMPH_MASK                      0x0006  /* DEEMPH - [2:1] */
+#define WM9081_DEEMPH_SHIFT                          1  /* DEEMPH - [2:1] */
+#define WM9081_DEEMPH_WIDTH                          2  /* DEEMPH - [2:1] */
+
+/*
+ * R32 (0x20) - DRC 1
+ */
+#define WM9081_DRC_ENA                          0x8000  /* DRC_ENA */
+#define WM9081_DRC_ENA_MASK                     0x8000  /* DRC_ENA */
+#define WM9081_DRC_ENA_SHIFT                        15  /* DRC_ENA */
+#define WM9081_DRC_ENA_WIDTH                         1  /* DRC_ENA */
+#define WM9081_DRC_STARTUP_GAIN_MASK            0x07C0  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_STARTUP_GAIN_SHIFT                6  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_STARTUP_GAIN_WIDTH                5  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_FF_DLY                       0x0020  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_MASK                  0x0020  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_SHIFT                      5  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_WIDTH                      1  /* DRC_FF_DLY */
+#define WM9081_DRC_QR                           0x0004  /* DRC_QR */
+#define WM9081_DRC_QR_MASK                      0x0004  /* DRC_QR */
+#define WM9081_DRC_QR_SHIFT                          2  /* DRC_QR */
+#define WM9081_DRC_QR_WIDTH                          1  /* DRC_QR */
+#define WM9081_DRC_ANTICLIP                     0x0002  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_MASK                0x0002  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_SHIFT                    1  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_WIDTH                    1  /* DRC_ANTICLIP */
+
+/*
+ * R33 (0x21) - DRC 2
+ */
+#define WM9081_DRC_ATK_MASK                     0xF000  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_ATK_SHIFT                        12  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_ATK_WIDTH                         4  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_DCY_MASK                     0x0F00  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_DCY_SHIFT                         8  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_DCY_WIDTH                         4  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_QR_THR_MASK                  0x00C0  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_THR_SHIFT                      6  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_THR_WIDTH                      2  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_DCY_MASK                  0x0030  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_QR_DCY_SHIFT                      4  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_QR_DCY_WIDTH                      2  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_MINGAIN_MASK                 0x000C  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MINGAIN_SHIFT                     2  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MINGAIN_WIDTH                     2  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MAXGAIN_MASK                 0x0003  /* DRC_MAXGAIN - [1:0] */
+#define WM9081_DRC_MAXGAIN_SHIFT                     0  /* DRC_MAXGAIN - [1:0] */
+#define WM9081_DRC_MAXGAIN_WIDTH                     2  /* DRC_MAXGAIN - [1:0] */
+
+/*
+ * R34 (0x22) - DRC 3
+ */
+#define WM9081_DRC_HI_COMP_MASK                 0x0038  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_HI_COMP_SHIFT                     3  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_HI_COMP_WIDTH                     3  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_LO_COMP_MASK                 0x0007  /* DRC_LO_COMP - [2:0] */
+#define WM9081_DRC_LO_COMP_SHIFT                     0  /* DRC_LO_COMP - [2:0] */
+#define WM9081_DRC_LO_COMP_WIDTH                     3  /* DRC_LO_COMP - [2:0] */
+
+/*
+ * R35 (0x23) - DRC 4
+ */
+#define WM9081_DRC_KNEE_IP_MASK                 0x07E0  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_IP_SHIFT                     5  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_IP_WIDTH                     6  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_OP_MASK                 0x001F  /* DRC_KNEE_OP - [4:0] */
+#define WM9081_DRC_KNEE_OP_SHIFT                     0  /* DRC_KNEE_OP - [4:0] */
+#define WM9081_DRC_KNEE_OP_WIDTH                     5  /* DRC_KNEE_OP - [4:0] */
+
+/*
+ * R38 (0x26) - Write Sequencer 1
+ */
+#define WM9081_WSEQ_ENA                         0x8000  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_MASK                    0x8000  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_SHIFT                       15  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_WIDTH                        1  /* WSEQ_ENA */
+#define WM9081_WSEQ_ABORT                       0x0200  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_MASK                  0x0200  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_SHIFT                      9  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_WIDTH                      1  /* WSEQ_ABORT */
+#define WM9081_WSEQ_START                       0x0100  /* WSEQ_START */
+#define WM9081_WSEQ_START_MASK                  0x0100  /* WSEQ_START */
+#define WM9081_WSEQ_START_SHIFT                      8  /* WSEQ_START */
+#define WM9081_WSEQ_START_WIDTH                      1  /* WSEQ_START */
+#define WM9081_WSEQ_START_INDEX_MASK            0x007F  /* WSEQ_START_INDEX - [6:0] */
+#define WM9081_WSEQ_START_INDEX_SHIFT                0  /* WSEQ_START_INDEX - [6:0] */
+#define WM9081_WSEQ_START_INDEX_WIDTH                7  /* WSEQ_START_INDEX - [6:0] */
+
+/*
+ * R39 (0x27) - Write Sequencer 2
+ */
+#define WM9081_WSEQ_CURRENT_INDEX_MASK          0x07F0  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_CURRENT_INDEX_SHIFT              4  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_CURRENT_INDEX_WIDTH              7  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_BUSY                        0x0001  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_MASK                   0x0001  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_SHIFT                       0  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_WIDTH                       1  /* WSEQ_BUSY */
+
+/*
+ * R40 (0x28) - MW Slave 1
+ */
+#define WM9081_SPI_CFG                          0x0020  /* SPI_CFG */
+#define WM9081_SPI_CFG_MASK                     0x0020  /* SPI_CFG */
+#define WM9081_SPI_CFG_SHIFT                         5  /* SPI_CFG */
+#define WM9081_SPI_CFG_WIDTH                         1  /* SPI_CFG */
+#define WM9081_SPI_4WIRE                        0x0010  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_MASK                   0x0010  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_SHIFT                       4  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_WIDTH                       1  /* SPI_4WIRE */
+#define WM9081_ARA_ENA                          0x0008  /* ARA_ENA */
+#define WM9081_ARA_ENA_MASK                     0x0008  /* ARA_ENA */
+#define WM9081_ARA_ENA_SHIFT                         3  /* ARA_ENA */
+#define WM9081_ARA_ENA_WIDTH                         1  /* ARA_ENA */
+#define WM9081_AUTO_INC                         0x0002  /* AUTO_INC */
+#define WM9081_AUTO_INC_MASK                    0x0002  /* AUTO_INC */
+#define WM9081_AUTO_INC_SHIFT                        1  /* AUTO_INC */
+#define WM9081_AUTO_INC_WIDTH                        1  /* AUTO_INC */
+
+/*
+ * R42 (0x2A) - EQ 1
+ */
+#define WM9081_EQ_B1_GAIN_MASK                  0xF800  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B1_GAIN_SHIFT                     11  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B1_GAIN_WIDTH                      5  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B2_GAIN_MASK                  0x07C0  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B2_GAIN_SHIFT                      6  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B2_GAIN_WIDTH                      5  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B4_GAIN_MASK                  0x003E  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_B4_GAIN_SHIFT                      1  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_B4_GAIN_WIDTH                      5  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_ENA                           0x0001  /* EQ_ENA */
+#define WM9081_EQ_ENA_MASK                      0x0001  /* EQ_ENA */
+#define WM9081_EQ_ENA_SHIFT                          0  /* EQ_ENA */
+#define WM9081_EQ_ENA_WIDTH                          1  /* EQ_ENA */
+
+/*
+ * R43 (0x2B) - EQ 2
+ */
+#define WM9081_EQ_B3_GAIN_MASK                  0xF800  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B3_GAIN_SHIFT                     11  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B3_GAIN_WIDTH                      5  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B5_GAIN_MASK                  0x07C0  /* EQ_B5_GAIN - [10:6] */
+#define WM9081_EQ_B5_GAIN_SHIFT                      6  /* EQ_B5_GAIN - [10:6] */
+#define WM9081_EQ_B5_GAIN_WIDTH                      5  /* EQ_B5_GAIN - [10:6] */
+
+/*
+ * R44 (0x2C) - EQ 3
+ */
+#define WM9081_EQ_B1_A_MASK                     0xFFFF  /* EQ_B1_A - [15:0] */
+#define WM9081_EQ_B1_A_SHIFT                         0  /* EQ_B1_A - [15:0] */
+#define WM9081_EQ_B1_A_WIDTH                        16  /* EQ_B1_A - [15:0] */
+
+/*
+ * R45 (0x2D) - EQ 4
+ */
+#define WM9081_EQ_B1_B_MASK                     0xFFFF  /* EQ_B1_B - [15:0] */
+#define WM9081_EQ_B1_B_SHIFT                         0  /* EQ_B1_B - [15:0] */
+#define WM9081_EQ_B1_B_WIDTH                        16  /* EQ_B1_B - [15:0] */
+
+/*
+ * R46 (0x2E) - EQ 5
+ */
+#define WM9081_EQ_B1_PG_MASK                    0xFFFF  /* EQ_B1_PG - [15:0] */
+#define WM9081_EQ_B1_PG_SHIFT                        0  /* EQ_B1_PG - [15:0] */
+#define WM9081_EQ_B1_PG_WIDTH                       16  /* EQ_B1_PG - [15:0] */
+
+/*
+ * R47 (0x2F) - EQ 6
+ */
+#define WM9081_EQ_B2_A_MASK                     0xFFFF  /* EQ_B2_A - [15:0] */
+#define WM9081_EQ_B2_A_SHIFT                         0  /* EQ_B2_A - [15:0] */
+#define WM9081_EQ_B2_A_WIDTH                        16  /* EQ_B2_A - [15:0] */
+
+/*
+ * R48 (0x30) - EQ 7
+ */
+#define WM9081_EQ_B2_B_MASK                     0xFFFF  /* EQ_B2_B - [15:0] */
+#define WM9081_EQ_B2_B_SHIFT                         0  /* EQ_B2_B - [15:0] */
+#define WM9081_EQ_B2_B_WIDTH                        16  /* EQ_B2_B - [15:0] */
+
+/*
+ * R49 (0x31) - EQ 8
+ */
+#define WM9081_EQ_B2_C_MASK                     0xFFFF  /* EQ_B2_C - [15:0] */
+#define WM9081_EQ_B2_C_SHIFT                         0  /* EQ_B2_C - [15:0] */
+#define WM9081_EQ_B2_C_WIDTH                        16  /* EQ_B2_C - [15:0] */
+
+/*
+ * R50 (0x32) - EQ 9
+ */
+#define WM9081_EQ_B2_PG_MASK                    0xFFFF  /* EQ_B2_PG - [15:0] */
+#define WM9081_EQ_B2_PG_SHIFT                        0  /* EQ_B2_PG - [15:0] */
+#define WM9081_EQ_B2_PG_WIDTH                       16  /* EQ_B2_PG - [15:0] */
+
+/*
+ * R51 (0x33) - EQ 10
+ */
+#define WM9081_EQ_B4_A_MASK                     0xFFFF  /* EQ_B4_A - [15:0] */
+#define WM9081_EQ_B4_A_SHIFT                         0  /* EQ_B4_A - [15:0] */
+#define WM9081_EQ_B4_A_WIDTH                        16  /* EQ_B4_A - [15:0] */
+
+/*
+ * R52 (0x34) - EQ 11
+ */
+#define WM9081_EQ_B4_B_MASK                     0xFFFF  /* EQ_B4_B - [15:0] */
+#define WM9081_EQ_B4_B_SHIFT                         0  /* EQ_B4_B - [15:0] */
+#define WM9081_EQ_B4_B_WIDTH                        16  /* EQ_B4_B - [15:0] */
+
+/*
+ * R53 (0x35) - EQ 12
+ */
+#define WM9081_EQ_B4_C_MASK                     0xFFFF  /* EQ_B4_C - [15:0] */
+#define WM9081_EQ_B4_C_SHIFT                         0  /* EQ_B4_C - [15:0] */
+#define WM9081_EQ_B4_C_WIDTH                        16  /* EQ_B4_C - [15:0] */
+
+/*
+ * R54 (0x36) - EQ 13
+ */
+#define WM9081_EQ_B4_PG_MASK                    0xFFFF  /* EQ_B4_PG - [15:0] */
+#define WM9081_EQ_B4_PG_SHIFT                        0  /* EQ_B4_PG - [15:0] */
+#define WM9081_EQ_B4_PG_WIDTH                       16  /* EQ_B4_PG - [15:0] */
+
+/*
+ * R55 (0x37) - EQ 14
+ */
+#define WM9081_EQ_B3_A_MASK                     0xFFFF  /* EQ_B3_A - [15:0] */
+#define WM9081_EQ_B3_A_SHIFT                         0  /* EQ_B3_A - [15:0] */
+#define WM9081_EQ_B3_A_WIDTH                        16  /* EQ_B3_A - [15:0] */
+
+/*
+ * R56 (0x38) - EQ 15
+ */
+#define WM9081_EQ_B3_B_MASK                     0xFFFF  /* EQ_B3_B - [15:0] */
+#define WM9081_EQ_B3_B_SHIFT                         0  /* EQ_B3_B - [15:0] */
+#define WM9081_EQ_B3_B_WIDTH                        16  /* EQ_B3_B - [15:0] */
+
+/*
+ * R57 (0x39) - EQ 16
+ */
+#define WM9081_EQ_B3_C_MASK                     0xFFFF  /* EQ_B3_C - [15:0] */
+#define WM9081_EQ_B3_C_SHIFT                         0  /* EQ_B3_C - [15:0] */
+#define WM9081_EQ_B3_C_WIDTH                        16  /* EQ_B3_C - [15:0] */
+
+/*
+ * R58 (0x3A) - EQ 17
+ */
+#define WM9081_EQ_B3_PG_MASK                    0xFFFF  /* EQ_B3_PG - [15:0] */
+#define WM9081_EQ_B3_PG_SHIFT                        0  /* EQ_B3_PG - [15:0] */
+#define WM9081_EQ_B3_PG_WIDTH                       16  /* EQ_B3_PG - [15:0] */
+
+/*
+ * R59 (0x3B) - EQ 18
+ */
+#define WM9081_EQ_B5_A_MASK                     0xFFFF  /* EQ_B5_A - [15:0] */
+#define WM9081_EQ_B5_A_SHIFT                         0  /* EQ_B5_A - [15:0] */
+#define WM9081_EQ_B5_A_WIDTH                        16  /* EQ_B5_A - [15:0] */
+
+/*
+ * R60 (0x3C) - EQ 19
+ */
+#define WM9081_EQ_B5_B_MASK                     0xFFFF  /* EQ_B5_B - [15:0] */
+#define WM9081_EQ_B5_B_SHIFT                         0  /* EQ_B5_B - [15:0] */
+#define WM9081_EQ_B5_B_WIDTH                        16  /* EQ_B5_B - [15:0] */
+
+/*
+ * R61 (0x3D) - EQ 20
+ */
+#define WM9081_EQ_B5_PG_MASK                    0xFFFF  /* EQ_B5_PG - [15:0] */
+#define WM9081_EQ_B5_PG_SHIFT                        0  /* EQ_B5_PG - [15:0] */
+#define WM9081_EQ_B5_PG_WIDTH                       16  /* EQ_B5_PG - [15:0] */
+
+
+#endif
-- 
cgit v0.10.2


From 8369d5fa63260cc54464b4687aa6a0f78402d98e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 22 May 2009 16:23:36 +0200
Subject: ide: fix 40-wire cable detection for TSST SH-S202* ATAPI devices (v2)

Since 2.6.26 we support UDMA66 on ATAPI devices requiring IVB quirk:

  commit 8588a2b732928b343233af9b1855705b8286bed4
  ("ide: add SH-S202J to ivb_list[]")

We also later added support for more such devices in:

  commit e97564f362a93f8c248246c19828895950341252
  ("ide: More TSST drives with broken cable detection")

and in:

  commit 3ced5c49bd2d1f2c7f769e3a54385883de63a652
  ("ide: add TSSTcorp CDDVDW SH-S202H to ivb_list[]")

It turns out that such devices lack cable detection altogether
(which in turn results in incorrect detection of 40-wire cables
by our current cable detection strategy) so always handle them
by trusting host-side cable detection only.

v2:
Model detection fixup from Martin.

Reported-and-tested-by: Martin Lottermoser <Martin.Lottermoser@t-online.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c19a221..06fe002 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -206,8 +206,6 @@ EXPORT_SYMBOL_GPL(ide_in_drive_list);
 
 /*
  * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid.
- * We list them here and depend on the device side cable detection for them.
- *
  * Some optical devices with the buggy firmwares have the same problem.
  */
 static const struct drive_list_entry ivb_list[] = {
@@ -251,10 +249,25 @@ u8 eighty_ninty_three(ide_drive_t *drive)
 	 * - force bit13 (80c cable present) check also for !ivb devices
 	 *   (unless the slave device is pre-ATA3)
 	 */
-	if ((id[ATA_ID_HW_CONFIG] & 0x4000) ||
-	    (ivb && (id[ATA_ID_HW_CONFIG] & 0x2000)))
+	if (id[ATA_ID_HW_CONFIG] & 0x4000)
 		return 1;
 
+	if (ivb) {
+		const char *model = (char *)&id[ATA_ID_PROD];
+
+		if (strstr(model, "TSSTcorp CDDVDW SH-S202")) {
+			/*
+			 * These ATAPI devices always report 80c cable
+			 * so we have to depend on the host in this case.
+			 */
+			if (hwif->cbl == ATA_CBL_PATA80)
+				return 1;
+		} else {
+			/* Depend on the device side cable detection. */
+			if (id[ATA_ID_HW_CONFIG] & 0x2000)
+				return 1;
+		}
+	}
 no_80w:
 	if (drive->dev_flags & IDE_DFLAG_UDMA33_WARNED)
 		return 0;
-- 
cgit v0.10.2


From e3b29f05124b07303088795396ff858811d2acb8 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 22 May 2009 16:23:37 +0200
Subject: ide: fix OOPS during ide-cd error recovery

On Tuesday 19 May 2009 20:29:28 Martin Lottermoser wrote:

>   hdc: cdrom_decode_status: error=0x40 <3>{ LastFailedSense=0x04 }
>   ide: failed opcode was: unknown
>   hdc: DMA disabled
>   ------------[ cut here ]------------
>   kernel BUG at drivers/ide/ide-io.c:872!

It is possible for ide-cd to ignore ide_error()'s return value under
some circumstances.  Workaround it in ide_intr() and ide_timer_expiry()
by checking if there is a device/port reset pending currently.

Fixes bug #13345:

	http://bugzilla.kernel.org/show_bug.cgi?id=13345

Reported-by: Martin Lottermoser <Martin.Lottermoser@t-online.de>
Reported-and-tested-by: Modestas Vainius <modestas@vainius.eu>
Cc: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 35dc38d..6415a2e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -696,7 +696,7 @@ void ide_timer_expiry (unsigned long data)
 		}
 		spin_lock_irq(&hwif->lock);
 		enable_irq(hwif->irq);
-		if (startstop == ide_stopped) {
+		if (startstop == ide_stopped && hwif->polling == 0) {
 			ide_unlock_port(hwif);
 			plug_device = 1;
 		}
@@ -868,7 +868,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	 * same irq as is currently being serviced here, and Linux
 	 * won't allow another of the same (on any CPU) until we return.
 	 */
-	if (startstop == ide_stopped) {
+	if (startstop == ide_stopped && hwif->polling == 0) {
 		BUG_ON(hwif->handler);
 		ide_unlock_port(hwif);
 		plug_device = 1;
-- 
cgit v0.10.2


From 26bfcf21e25fa090f099fa0ccf201424989cbd7b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 22 May 2009 16:23:37 +0200
Subject: ide: fix printk() levels in ide_dump_ata[pi]_error()

Fixes "<3>" in error messages like this one:

hdc: cdrom_decode_status: error=0x40 <3>{ LastFailedSense=0x04 }

Reported-by: Martin Lottermoser <Martin.Lottermoser@t-online.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 56ff8c4..85b9bae 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -91,7 +91,7 @@ static void ide_dump_sector(ide_drive_t *drive)
 
 static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
 {
-	printk(KERN_ERR "{ ");
+	printk(KERN_CONT "{ ");
 	if (err & ATA_ABORTED)
 		printk(KERN_CONT "DriveStatusError ");
 	if (err & ATA_ICRC)
@@ -121,7 +121,7 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
 
 static void ide_dump_atapi_error(ide_drive_t *drive, u8 err)
 {
-	printk(KERN_ERR "{ ");
+	printk(KERN_CONT "{ ");
 	if (err & ATAPI_ILI)
 		printk(KERN_CONT "IllegalLengthIndication ");
 	if (err & ATAPI_EOM)
-- 
cgit v0.10.2


From cc30137a221372c67a943ad9ea68121a2bd57a6e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 22 May 2009 16:23:38 +0200
Subject: ide: improve failed opcode reporting

Nowadays we (almost) always store the currently executing command
in hwif->cmd so we can use it for the failed opcode reporting.

Cc: Martin Lottermoser <Martin.Lottermoser@t-online.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 85b9bae..2148df8 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -31,24 +31,6 @@ void ide_toggle_bounce(ide_drive_t *drive, int on)
 		blk_queue_bounce_limit(drive->queue, addr);
 }
 
-static void ide_dump_opcode(ide_drive_t *drive)
-{
-	struct request *rq = drive->hwif->rq;
-	struct ide_cmd *cmd = NULL;
-
-	if (!rq)
-		return;
-
-	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
-		cmd = rq->special;
-
-	printk(KERN_ERR "ide: failed opcode was: ");
-	if (cmd == NULL)
-		printk(KERN_CONT "unknown\n");
-	else
-		printk(KERN_CONT "0x%02x\n", cmd->tf.command);
-}
-
 u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48)
 {
 	struct ide_taskfile *tf = &cmd->tf;
@@ -179,7 +161,10 @@ u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat)
 		else
 			ide_dump_atapi_error(drive, err);
 	}
-	ide_dump_opcode(drive);
+
+	printk(KERN_ERR "%s: possibly failed opcode: 0x%02x\n",
+		drive->name, drive->hwif->cmd.tf.command);
+
 	return err;
 }
 EXPORT_SYMBOL(ide_dump_status);
-- 
cgit v0.10.2


From 28ee9bc5cc42776e0364399b401a64906ac1ac8e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 22 May 2009 16:23:38 +0200
Subject: ide: report timeouts in ide_busy_sleep()

* change 'hwif' argument to 'drive'
* report an error on timeout

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 7f264ed..c895ed5 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -295,7 +295,7 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
 
 	timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
 
-	if (ide_busy_sleep(hwif, timeout, use_altstatus))
+	if (ide_busy_sleep(drive, timeout, use_altstatus))
 		return 1;
 
 	/* wait for IRQ and ATA_DRQ */
@@ -316,8 +316,9 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
 	return rc;
 }
 
-int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus)
+int ide_busy_sleep(ide_drive_t *drive, unsigned long timeout, int altstatus)
 {
+	ide_hwif_t *hwif = drive->hwif;
 	u8 stat;
 
 	timeout += jiffies;
@@ -330,6 +331,8 @@ int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus)
 			return 0;
 	} while (time_before(jiffies, timeout));
 
+	printk(KERN_ERR "%s: timeout in %s\n", drive->name, __func__);
+
 	return 1;	/* drive timed-out */
 }
 
@@ -420,7 +423,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 			tp_ops->dev_select(drive);
 			msleep(50);
 			tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
-			(void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0);
+			(void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0);
 			rc = ide_dev_read_id(drive, cmd, id);
 		}
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ff65fff..9fed365 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1109,7 +1109,7 @@ void ide_fix_driveid(u16 *);
 
 extern void ide_fixstring(u8 *, const int, const int);
 
-int ide_busy_sleep(ide_hwif_t *, unsigned long, int);
+int ide_busy_sleep(ide_drive_t *, unsigned long, int);
 
 int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
 
-- 
cgit v0.10.2


From 5993856e53fbc4b4f28e2d481deaebeb715b1267 Mon Sep 17 00:00:00 2001
From: Harald Welte <HaraldWelte@viatech.com>
Date: Fri, 22 May 2009 16:23:39 +0200
Subject: via82cxxx: Add VIA VX855 PCI Device ID

This patch adds the PCI Device ID 0xc409 to the PCI ID table of via82cxxx.c,
as well as the 0x8409 south bridge ID.

This is required to make the IDE driver work on the VX855/VX875 integrated
chipset.

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Cc: Joseph Chan <JosephChan@via.com.tw>
Cc: Bruce Chang <BruceChang@via.com.tw>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 3ff7231..028de26 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -67,6 +67,7 @@ static struct via_isa_bridge {
 	u8 udma_mask;
 	u8 flags;
 } via_isa_bridges[] = {
+	{ "vx855",	PCI_DEVICE_ID_VIA_VX855,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "vx800",	PCI_DEVICE_ID_VIA_VX800,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "vt8237s",	PCI_DEVICE_ID_VIA_8237S,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
@@ -474,6 +475,7 @@ static const struct pci_device_id via_pci_tbl[] = {
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1),  0 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1),  0 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 },
+	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), 0 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410),      1 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 },
 	{ 0, },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06ba90c..0f71812 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1406,7 +1406,7 @@
 #define PCI_DEVICE_ID_VIA_82C598_1	0x8598
 #define PCI_DEVICE_ID_VIA_838X_1	0xB188
 #define PCI_DEVICE_ID_VIA_83_87XX_1	0xB198
-#define PCI_DEVICE_ID_VIA_C409_IDE	0XC409
+#define PCI_DEVICE_ID_VIA_VX855_IDE	0xC409
 #define PCI_DEVICE_ID_VIA_ANON		0xFFFF
 
 #define PCI_VENDOR_ID_SIEMENS           0x110A
-- 
cgit v0.10.2


From 9a2845c453d170e4e9b1437fa671dbf39b0e7bd8 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 20 May 2009 13:36:17 -0500
Subject: ipmi: fix ipmi_si modprobe hang

Instead of queuing IPMB messages before channel initialization, just
throw them away.  Nobody will be listening for them at this point,
anyway, and they will clog up the queue and nothing will be delivered
if we queue them.

Also set the current channel to the number of channels, as this value
is used to tell if the channel information has been initialized.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Cc: Ferenc Wagner <wferi@niif.hu>
Cc: Dan Frazier <dannf@hp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index aa83a08..0905079 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2856,6 +2856,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		/* Assume a single IPMB channel at zero. */
 		intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB;
 		intf->channels[0].protocol = IPMI_CHANNEL_PROTOCOL_IPMB;
+		intf->curr_channel = IPMI_MAX_CHANNELS;
 	}
 
 	if (rv == 0)
@@ -3648,13 +3649,13 @@ static int handle_new_recv_msg(ipmi_smi_t          intf,
 		}
 
 		/*
-		** We need to make sure the channels have been initialized.
-		** The channel_handler routine will set the "curr_channel"
-		** equal to or greater than IPMI_MAX_CHANNELS when all the
-		** channels for this interface have been initialized.
-		*/
+		 * We need to make sure the channels have been initialized.
+		 * The channel_handler routine will set the "curr_channel"
+		 * equal to or greater than IPMI_MAX_CHANNELS when all the
+		 * channels for this interface have been initialized.
+		 */
 		if (intf->curr_channel < IPMI_MAX_CHANNELS) {
-			requeue = 1;     /* Just put the message back for now */
+			requeue = 0; /* Throw the message away */
 			goto out;
 		}
 
-- 
cgit v0.10.2


From c6eb13847ba081552d2af644219bddeff7110caf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 22 May 2009 18:18:28 +0200
Subject: perf_counter tools: increase limits

I tried to run with 300 active counters and the tools bailed out
because our limit was at 64. So increase the counter limit to 1024
and the CPU limit to 4096.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index 6fa3656..81a7374 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -54,8 +54,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
 		       group_fd, flags);
 }
 
-#define MAX_COUNTERS			64
-#define MAX_NR_CPUS			256
+#define MAX_COUNTERS			1024
+#define MAX_NR_CPUS			4096
 
 #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
 
-- 
cgit v0.10.2


From afe6d7e3c4a9aba020637f4ae15527a89ba31f21 Mon Sep 17 00:00:00 2001
From: Andreas Mohr <andi@lisas.de>
Date: Fri, 22 May 2009 17:48:58 +0200
Subject: ALSA: Kill truncate warning by shortening Sigmatel-specific AC97
 control name

ALSA sound/core/control.c:232: Control name 'Sigmatel Surround Phase
Inversion Playback Switch' truncated to 'Sigmatel Surround Phase
Inversion Playback ' bootup message by omitting weird Sigmatel prefix
in this case; also fix up the related ca0106 mixer control removal
part by using identical naming there.

Signed-off-by: Andreas Mohr <andi@lisas.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 81bc93e..7337abd 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -958,10 +958,13 @@ static int patch_sigmatel_stac9708_3d(struct snd_ac97 * ac97)
 }
 
 static const struct snd_kcontrol_new snd_ac97_sigmatel_4speaker =
-AC97_SINGLE("Sigmatel 4-Speaker Stereo Playback Switch", AC97_SIGMATEL_DAC2INVERT, 2, 1, 0);
+AC97_SINGLE("Sigmatel 4-Speaker Stereo Playback Switch",
+		AC97_SIGMATEL_DAC2INVERT, 2, 1, 0);
 
+/* "Sigmatel " removed due to excessive name length: */
 static const struct snd_kcontrol_new snd_ac97_sigmatel_phaseinvert =
-AC97_SINGLE("Sigmatel Surround Phase Inversion Playback Switch", AC97_SIGMATEL_DAC2INVERT, 3, 1, 0);
+AC97_SINGLE("Surround Phase Inversion Playback Switch",
+		AC97_SIGMATEL_DAC2INVERT, 3, 1, 0);
 
 static const struct snd_kcontrol_new snd_ac97_sigmatel_controls[] = {
 AC97_SINGLE("Sigmatel DAC 6dB Attenuate", AC97_SIGMATEL_ANALOG, 1, 1, 0),
diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index ad28887..c111efe 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -800,7 +800,7 @@ int __devinit snd_ca0106_mixer(struct snd_ca0106 *emu)
 		"Capture Volume",
 		"External Amplifier",
 		"Sigmatel 4-Speaker Stereo Playback Switch",
-		"Sigmatel Surround Phase Inversion Playback ",
+		"Surround Phase Inversion Playback Switch",
 		NULL
 	};
 	static char *ca0106_rename_ctls[] = {
-- 
cgit v0.10.2


From bca23dba760d6705c013f89113c46570378fb626 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 21 May 2009 11:46:16 -0700
Subject: x86, setup: revert ACPI 3 E820 extended attributes support

Remove ACPI 3 E820 extended memory attributes support.  At least one
vendor actively set all the flags to zero, but left ECX on return at
24.  This bug may be present in other BIOSes.

The breakage functionally means the ACPI 3 flags are probably
completely useless, and that no OS any time soon is going to rely on
their existence.  Therefore, drop support completely.  We may want to
revisit this question in the future, if we find ourselves actually
needing the flags.

This reverts all or part of the following checkins:

     cd670599b7b00d9263f6f11a05c0edeb9cbedaf3
     c549e71d073a6e9a4847497344db28a784061455

However, retain the part from the latter commit that copies e820 into
a temporary buffer; that is an unrelated BIOS workaround.  Put in a
comment to explain that part.

See https://bugzilla.redhat.com/show_bug.cgi?id=499396 for some
additional information.

[ Impact: detect all memory on affected machines ]

Reported-by: Thomas J. Baker <tjb@unh.edu>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Len Brown <len.brown@intel.com>
Cc: Chuck Ebbert <cebbert@redhat.com>
Cc: Kyle McMartin <kmcmartin@redhat.com>
Cc: Matt Domsch <matt_domsch@dell.com>

diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 5054c2d..74b3d2b 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -17,11 +17,6 @@
 
 #define SMAP	0x534d4150	/* ASCII "SMAP" */
 
-struct e820_ext_entry {
-	struct e820entry std;
-	u32 ext_flags;
-} __attribute__((packed));
-
 static int detect_memory_e820(void)
 {
 	int count = 0;
@@ -29,13 +24,21 @@ static int detect_memory_e820(void)
 	u32 size, id, edi;
 	u8 err;
 	struct e820entry *desc = boot_params.e820_map;
-	static struct e820_ext_entry buf; /* static so it is zeroed */
+	static struct e820entry buf; /* static so it is zeroed */
 
 	/*
-	 * Set this here so that if the BIOS doesn't change this field
-	 * but still doesn't change %ecx, we're still okay...
+	 * Note: at least one BIOS is known which assumes that the
+	 * buffer pointed to by one e820 call is the same one as
+	 * the previous call, and only changes modified fields.  Therefore,
+	 * we use a temporary buffer and copy the results entry by entry.
+	 *
+	 * This routine deliberately does not try to account for
+	 * ACPI 3+ extended attributes.  This is because there are
+	 * BIOSes in the field which report zero for the valid bit for
+	 * all ranges, and we don't currently make any use of the
+	 * other attribute bits.  Revisit this if we see the extended
+	 * attribute bits deployed in a meaningful way in the future.
 	 */
-	buf.ext_flags = 1;
 
 	do {
 		size = sizeof buf;
@@ -66,13 +69,7 @@ static int detect_memory_e820(void)
 			break;
 		}
 
-		/* ACPI 3.0 added the extended flags support.  If bit 0
-		   in the extended flags is zero, we're supposed to simply
-		   ignore the entry -- a backwards incompatible change! */
-		if (size > 20 && !(buf.ext_flags & 1))
-			continue;
-
-		*desc++ = buf.std;
+		*desc++ = buf;
 		count++;
 	} while (next && count < ARRAY_SIZE(boot_params.e820_map));
 
-- 
cgit v0.10.2


From 14b60391587ab9b2207c4fb6281763a93ae85e0f Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 20 May 2009 16:47:08 -0400
Subject: i915: support 8xx desktop cursors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For some reason we never added 8xx desktop cursor support to the
kernel.  This patch fixes that.

[krh: Also set the size on pre-i915 hw.]
Tested-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4a24c90..717b6a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1145,7 +1145,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			mutex_unlock(&dev->struct_mutex);
 			return VM_FAULT_SIGBUS;
 		}
-		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
 	}
 
 	/* Need a new fence register? */
@@ -1375,7 +1375,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 			mutex_unlock(&dev->struct_mutex);
 			return ret;
 		}
-		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
 	}
 
 	drm_gem_object_unreference(obj);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 15da44c..9668cc0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1410,9 +1410,25 @@
 
 /* Cursor A & B regs */
 #define CURACNTR		0x70080
+/* Old style CUR*CNTR flags (desktop 8xx) */
+#define   CURSOR_ENABLE		0x80000000
+#define   CURSOR_GAMMA_ENABLE	0x40000000
+#define   CURSOR_STRIDE_MASK	0x30000000
+#define   CURSOR_FORMAT_SHIFT	24
+#define   CURSOR_FORMAT_MASK	(0x07 << CURSOR_FORMAT_SHIFT)
+#define   CURSOR_FORMAT_2C	(0x00 << CURSOR_FORMAT_SHIFT)
+#define   CURSOR_FORMAT_3C	(0x01 << CURSOR_FORMAT_SHIFT)
+#define   CURSOR_FORMAT_4C	(0x02 << CURSOR_FORMAT_SHIFT)
+#define   CURSOR_FORMAT_ARGB	(0x04 << CURSOR_FORMAT_SHIFT)
+#define   CURSOR_FORMAT_XRGB	(0x05 << CURSOR_FORMAT_SHIFT)
+/* New style CUR*CNTR flags */
+#define   CURSOR_MODE		0x27
 #define   CURSOR_MODE_DISABLE   0x00
 #define   CURSOR_MODE_64_32B_AX 0x07
 #define   CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX)
+#define   MCURSOR_PIPE_SELECT	(1 << 28)
+#define   MCURSOR_PIPE_A	0x00
+#define   MCURSOR_PIPE_B	(1 << 28)
 #define   MCURSOR_GAMMA_ENABLE  (1 << 26)
 #define CURABASE		0x70084
 #define CURAPOS			0x70088
@@ -1420,6 +1436,7 @@
 #define   CURSOR_POS_SIGN       0x8000
 #define   CURSOR_X_SHIFT        0
 #define   CURSOR_Y_SHIFT        16
+#define CURSIZE			0x700a0
 #define CURBCNTR		0x700c0
 #define CURBBASE		0x700c4
 #define CURBPOS			0x700c8
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3387cf3..c9d6f10 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1357,7 +1357,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 	int pipe = intel_crtc->pipe;
 	uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
 	uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
-	uint32_t temp;
+	uint32_t temp = I915_READ(control);
 	size_t addr;
 	int ret;
 
@@ -1366,7 +1366,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 	/* if we want to turn off the cursor ignore width and height */
 	if (!handle) {
 		DRM_DEBUG("cursor off\n");
-		temp = CURSOR_MODE_DISABLE;
+		if (IS_MOBILE(dev) || IS_I9XX(dev)) {
+			temp &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE);
+			temp |= CURSOR_MODE_DISABLE;
+		} else {
+			temp &= ~(CURSOR_ENABLE | CURSOR_GAMMA_ENABLE);
+		}
 		addr = 0;
 		bo = NULL;
 		mutex_lock(&dev->struct_mutex);
@@ -1409,10 +1414,19 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 		addr = obj_priv->phys_obj->handle->busaddr;
 	}
 
-	temp = 0;
-	/* set the pipe for the cursor */
-	temp |= (pipe << 28);
-	temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
+	if (!IS_I9XX(dev))
+		I915_WRITE(CURSIZE, (height << 12) | width);
+
+	/* Hooray for CUR*CNTR differences */
+	if (IS_MOBILE(dev) || IS_I9XX(dev)) {
+		temp &= ~(CURSOR_MODE | MCURSOR_PIPE_SELECT);
+		temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
+		temp |= (pipe << 28); /* Connect to correct pipe */
+	} else {
+		temp &= ~(CURSOR_FORMAT_MASK);
+		temp |= CURSOR_ENABLE;
+		temp |= CURSOR_FORMAT_ARGB | CURSOR_GAMMA_ENABLE;
+	}
 
  finish:
 	I915_WRITE(control, temp);
-- 
cgit v0.10.2


From 8863170628da4b0b461eb96bf797df1dca0bd03e Mon Sep 17 00:00:00 2001
From: Ma Ling <ling.ma@intel.com>
Date: Wed, 13 May 2009 11:19:55 +0800
Subject: drm/i915: Fetch SDVO LVDS mode lines from VBT, then reserve them

Signed-off-by: Ma Ling <ling.ma@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9b149fe..c431fa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -180,7 +180,8 @@ typedef struct drm_i915_private {
 	int backlight_duty_cycle;  /* restore backlight to this value */
 	bool panel_wants_dither;
 	struct drm_display_mode *panel_fixed_mode;
-	struct drm_display_mode *vbt_mode; /* if any */
+	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
+	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
 
 	/* Feature bits from the VBIOS */
 	unsigned int int_tv_support:1;
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index fc28e2b..9d78cff 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -57,9 +57,43 @@ find_section(struct bdb_header *bdb, int section_id)
 	return NULL;
 }
 
-/* Try to find panel data */
 static void
-parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode,
+			struct lvds_dvo_timing *dvo_timing)
+{
+	panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) |
+		dvo_timing->hactive_lo;
+	panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay +
+		((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo);
+	panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start +
+		dvo_timing->hsync_pulse_width;
+	panel_fixed_mode->htotal = panel_fixed_mode->hdisplay +
+		((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo);
+
+	panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) |
+		dvo_timing->vactive_lo;
+	panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay +
+		dvo_timing->vsync_off;
+	panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start +
+		dvo_timing->vsync_pulse_width;
+	panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay +
+		((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo);
+	panel_fixed_mode->clock = dvo_timing->clock * 10;
+	panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
+
+	/* Some VBTs have bogus h/vtotal values */
+	if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal)
+		panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1;
+	if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal)
+		panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1;
+
+	drm_mode_set_name(panel_fixed_mode);
+}
+
+/* Try to find integrated panel data */
+static void
+parse_lfp_panel_data(struct drm_i915_private *dev_priv,
+			    struct bdb_header *bdb)
 {
 	struct bdb_lvds_options *lvds_options;
 	struct bdb_lvds_lfp_data *lvds_lfp_data;
@@ -91,38 +125,45 @@ parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 	panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
 				      DRM_MEM_DRIVER);
 
-	panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) |
-		dvo_timing->hactive_lo;
-	panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay +
-		((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo);
-	panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start +
-		dvo_timing->hsync_pulse_width;
-	panel_fixed_mode->htotal = panel_fixed_mode->hdisplay +
-		((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo);
+	fill_detail_timing_data(panel_fixed_mode, dvo_timing);
 
-	panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) |
-		dvo_timing->vactive_lo;
-	panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay +
-		dvo_timing->vsync_off;
-	panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start +
-		dvo_timing->vsync_pulse_width;
-	panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay +
-		((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo);
-	panel_fixed_mode->clock = dvo_timing->clock * 10;
-	panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
+	dev_priv->lfp_lvds_vbt_mode = panel_fixed_mode;
 
-	/* Some VBTs have bogus h/vtotal values */
-	if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal)
-		panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1;
-	if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal)
-		panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1;
+	DRM_DEBUG("Found panel mode in BIOS VBT tables:\n");
+	drm_mode_debug_printmodeline(panel_fixed_mode);
 
-	drm_mode_set_name(panel_fixed_mode);
+	return;
+}
+
+/* Try to find sdvo panel data */
+static void
+parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
+		      struct bdb_header *bdb)
+{
+	struct bdb_sdvo_lvds_options *sdvo_lvds_options;
+	struct lvds_dvo_timing *dvo_timing;
+	struct drm_display_mode *panel_fixed_mode;
 
-	dev_priv->vbt_mode = panel_fixed_mode;
+	dev_priv->sdvo_lvds_vbt_mode = NULL;
 
-	DRM_DEBUG("Found panel mode in BIOS VBT tables:\n");
-	drm_mode_debug_printmodeline(panel_fixed_mode);
+	sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS);
+	if (!sdvo_lvds_options)
+		return;
+
+	dvo_timing = find_section(bdb, BDB_SDVO_PANEL_DTDS);
+	if (!dvo_timing)
+		return;
+
+	panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
+				      DRM_MEM_DRIVER);
+
+	if (!panel_fixed_mode)
+		return;
+
+	fill_detail_timing_data(panel_fixed_mode,
+			dvo_timing + sdvo_lvds_options->panel_type);
+
+	dev_priv->sdvo_lvds_vbt_mode = panel_fixed_mode;
 
 	return;
 }
@@ -199,7 +240,8 @@ intel_init_bios(struct drm_device *dev)
 
 	/* Grab useful general definitions */
 	parse_general_features(dev_priv, bdb);
-	parse_panel_data(dev_priv, bdb);
+	parse_lfp_panel_data(dev_priv, bdb);
+	parse_sdvo_panel_data(dev_priv, bdb);
 
 	pci_unmap_rom(pdev, bios);
 
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index de621aa..8ca2cde 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -279,6 +279,23 @@ struct vch_bdb_22 {
 	struct vch_panel_data panels[16];
 } __attribute__((packed));
 
+struct bdb_sdvo_lvds_options {
+	u8 panel_backlight;
+	u8 h40_set_panel_type;
+	u8 panel_type;
+	u8 ssc_clk_freq;
+	u16 als_low_trip;
+	u16 als_high_trip;
+	u8 sclalarcoeff_tab_row_num;
+	u8 sclalarcoeff_tab_row_size;
+	u8 coefficient[8];
+	u8 panel_misc_bits_1;
+	u8 panel_misc_bits_2;
+	u8 panel_misc_bits_3;
+	u8 panel_misc_bits_4;
+} __attribute__((packed));
+
+
 bool intel_init_bios(struct drm_device *dev);
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 439a865..53731f0 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -511,10 +511,10 @@ void intel_lvds_init(struct drm_device *dev)
 	}
 
 	/* Failed to get EDID, what about VBT? */
-	if (dev_priv->vbt_mode) {
+	if (dev_priv->lfp_lvds_vbt_mode) {
 		mutex_lock(&dev->mode_config.mutex);
 		dev_priv->panel_fixed_mode =
-			drm_mode_duplicate(dev, dev_priv->vbt_mode);
+			drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
 		mutex_unlock(&dev->mode_config.mutex);
 		if (dev_priv->panel_fixed_mode) {
 			dev_priv->panel_fixed_mode->type |=
-- 
cgit v0.10.2


From 7086c87fb1446ceb37918ffa0941359a7c2ec6cf Mon Sep 17 00:00:00 2001
From: Ma Ling <ling.ma@intel.com>
Date: Wed, 13 May 2009 11:20:06 +0800
Subject: drm/i915: Return SDVO LVDS VBT mode if no EDID modes are detected.

Some new SDVO LVDS hardware doesn't have DDC available, and this should
fix the display on it.

Signed-off-by: Ma Ling <ling.ma@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 9913651..f79ebf4 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -69,6 +69,10 @@ struct intel_sdvo_priv {
 	 * This is set if we treat the device as HDMI, instead of DVI.
 	 */
 	bool is_hdmi;
+	/**
+	 * This is set if we detect output of sdvo device as LVDS.
+	 */
+	bool is_lvds;
 
 	/**
 	 * Returned SDTV resolutions allowed for the current format, if the
@@ -1543,6 +1547,37 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector)
 		}
 }
 
+static void intel_sdvo_get_lvds_modes(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+
+	/*
+	 * Attempt to get the mode list from DDC.
+	 * Assume that the preferred modes are
+	 * arranged in priority order.
+	 */
+	/* set the bus switch and get the modes */
+	intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
+	intel_ddc_get_modes(intel_output);
+	if (list_empty(&connector->probed_modes) == false)
+		return;
+
+	/* Fetch modes from VBT */
+	if (dev_priv->sdvo_lvds_vbt_mode != NULL) {
+		struct drm_display_mode *newmode;
+		newmode = drm_mode_duplicate(connector->dev,
+					     dev_priv->sdvo_lvds_vbt_mode);
+		if (newmode != NULL) {
+			/* Guarantee the mode is preferred */
+			newmode->type = (DRM_MODE_TYPE_PREFERRED |
+					 DRM_MODE_TYPE_DRIVER);
+			drm_mode_probed_add(connector, newmode);
+		}
+	}
+}
+
 static int intel_sdvo_get_modes(struct drm_connector *connector)
 {
 	struct intel_output *output = to_intel_output(connector);
@@ -1550,6 +1585,8 @@ static int intel_sdvo_get_modes(struct drm_connector *connector)
 
 	if (sdvo_priv->is_tv)
 		intel_sdvo_get_tv_modes(connector);
+	else if (sdvo_priv->is_lvds == true)
+		intel_sdvo_get_lvds_modes(connector);
 	else
 		intel_sdvo_get_ddc_modes(connector);
 
@@ -1720,6 +1757,8 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		}
 	}
 
+	/* In defaut case sdvo lvds is false */
+	sdvo_priv->is_lvds = false;
 	intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps);
 
 	if (sdvo_priv->caps.output_flags &
@@ -1773,6 +1812,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_LVDS;
 		connector_type = DRM_MODE_CONNECTOR_LVDS;
+		sdvo_priv->is_lvds = true;
 	}
 	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1)
 	{
@@ -1780,6 +1820,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_LVDS;
 		connector_type = DRM_MODE_CONNECTOR_LVDS;
+		sdvo_priv->is_lvds = true;
 	}
 	else
 	{
-- 
cgit v0.10.2


From ad5b2a6db3eddc41358d8a73f5cfe1c38e7e3a19 Mon Sep 17 00:00:00 2001
From: Jonas Bonn <jonas@southpole.se>
Date: Fri, 15 May 2009 09:10:41 +0200
Subject: drm/i915: Determine type before initialising connector

drm_connector_init sets both the connector type and the connector type_id
on the newly initialised connector.  As the connector type_id is coupled to
the connector type, the connector type cannot simply be modified on an
initialised connector.

This patch changes the order of operations on intel_sdvo_init so that the
type is determined before the connector is intialised.

This fixes a bug whereby the name card0-VGA-1 would be allocted to both a
CRT and an SDVO connector since the SDVO connector would be initialised
with type 'unknown' and hence have its type_id assigned from the wrong pool.

Signed-off-by: Jonas Bonn <jonas@southpole.se>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index f79ebf4..ded122c 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1713,17 +1713,9 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		return false;
 	}
 
-	connector = &intel_output->base;
-
-	drm_connector_init(dev, connector, &intel_sdvo_connector_funcs,
-			   DRM_MODE_CONNECTOR_Unknown);
-	drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs);
 	sdvo_priv = (struct intel_sdvo_priv *)(intel_output + 1);
 	intel_output->type = INTEL_OUTPUT_SDVO;
 
-	connector->interlace_allowed = 0;
-	connector->doublescan_allowed = 0;
-
 	/* setup the DDC bus. */
 	if (output_device == SDVOB)
 		i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOB");
@@ -1731,7 +1723,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOC");
 
 	if (!i2cbus)
-		goto err_connector;
+		goto err_inteloutput;
 
 	sdvo_priv->i2c_bus = i2cbus;
 
@@ -1747,7 +1739,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	intel_output->i2c_bus = i2cbus;
 	intel_output->dev_priv = sdvo_priv;
 
-
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
 		if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) {
@@ -1768,7 +1759,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		else
 			sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1;
 
-		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_TMDS;
 		connector_type = DRM_MODE_CONNECTOR_DVID;
 
@@ -1786,7 +1776,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0)
 	{
 		sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0;
-		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_TVDAC;
 		connector_type = DRM_MODE_CONNECTOR_SVIDEO;
 		sdvo_priv->is_tv = true;
@@ -1795,21 +1784,18 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0)
 	{
 		sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0;
-		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_DAC;
 		connector_type = DRM_MODE_CONNECTOR_VGA;
 	}
 	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1)
 	{
 		sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
-		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_DAC;
 		connector_type = DRM_MODE_CONNECTOR_VGA;
 	}
 	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0)
 	{
 		sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
-		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_LVDS;
 		connector_type = DRM_MODE_CONNECTOR_LVDS;
 		sdvo_priv->is_lvds = true;
@@ -1817,7 +1803,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1)
 	{
 		sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1;
-		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 		encoder_type = DRM_MODE_ENCODER_LVDS;
 		connector_type = DRM_MODE_CONNECTOR_LVDS;
 		sdvo_priv->is_lvds = true;
@@ -1836,9 +1821,16 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		goto err_i2c;
 	}
 
+	connector = &intel_output->base;
+	drm_connector_init(dev, connector, &intel_sdvo_connector_funcs,
+			   connector_type);
+	drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs);
+	connector->interlace_allowed = 0;
+	connector->doublescan_allowed = 0;
+	connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+
 	drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type);
 	drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs);
-	connector->connector_type = connector_type;
 
 	drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
 	drm_sysfs_connector_add(connector);
@@ -1876,8 +1868,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 
 err_i2c:
 	intel_i2c_destroy(intel_output->i2c_bus);
-err_connector:
-	drm_connector_cleanup(connector);
+err_inteloutput:
 	kfree(intel_output);
 
 	return false;
-- 
cgit v0.10.2


From 0c752a93353d9b17dbe148312d732fbe06d235e1 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 22 May 2009 12:17:45 -0700
Subject: x86: introduce noxsave boot parameter

Introduce "noxsave" boot parameter which will disable the cpu's xsave/xrstor
capabilities. Useful for debugging and working around xsave related issues.

[ Impact: make it possible to debug problems in the field ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbf..fd5cac0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1535,6 +1535,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			register save and restore. The kernel will only save
 			legacy floating-point registers on task switch.
 
+	noxsave		[BUGS=X86] Disables x86 extended register state save
+			and restore using xsave. The kernel will fallback to
+			enabling legacy floating-point and sse state.
+
 	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
 			wfi(ARM) instruction doesn't work correctly and not to
 			use it. This is also useful when using JTAG debugger.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c1caefc..77848d9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 } };
 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
+static int __init x86_xsave_setup(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+	return 1;
+}
+__setup("noxsave", x86_xsave_setup);
+
 #ifdef CONFIG_X86_32
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_serial_nr __cpuinitdata = 1;
-- 
cgit v0.10.2


From 619ac3b75a1e9b2df66857f6a0fb466f1da5fa9e Mon Sep 17 00:00:00 2001
From: Ma Ling <ling.ma@intel.com>
Date: Mon, 18 May 2009 16:12:46 +0800
Subject: drm/i915: Use an I2C algo to do the flip to SDVO DDC bus.

Previously, we would set the control bus switch before calls were made
to request EDID information over DDC.  But recently the DDC code started
doing multiple I2C transfers to get the EDID extensions as well.  This
tripped up SDVO, because the control bus switch is only in effect until
the next STOP after a START.  By doing our own algo, we can wrap each i2c
transaction on the DDC I2C bus with the control bus switch it requires.

freedesktop.org bug #21042

Signed-off-by: Ma Ling <ling.ma@intel.com>
[anholt: Hand application for conflict, fixed error path]
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index ded122c..f3ef6bf 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1402,10 +1402,8 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect
 static void intel_sdvo_get_ddc_modes(struct drm_connector *connector)
 {
 	struct intel_output *intel_output = to_intel_output(connector);
-	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 
 	/* set the bus switch and get the modes */
-	intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
 	intel_ddc_get_modes(intel_output);
 
 #if 0
@@ -1601,6 +1599,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector)
 
 	if (intel_output->i2c_bus)
 		intel_i2c_destroy(intel_output->i2c_bus);
+	if (intel_output->ddc_bus)
+		intel_i2c_destroy(intel_output->ddc_bus);
+
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(intel_output);
@@ -1697,12 +1698,56 @@ intel_sdvo_get_digital_encoding_mode(struct intel_output *output)
 	return true;
 }
 
+static struct intel_output *
+intel_sdvo_chan_to_intel_output(struct intel_i2c_chan *chan)
+{
+	struct drm_device *dev = chan->drm_dev;
+	struct drm_connector *connector;
+	struct intel_output *intel_output = NULL;
+
+	list_for_each_entry(connector,
+			&dev->mode_config.connector_list, head) {
+		if (to_intel_output(connector)->ddc_bus == chan) {
+			intel_output = to_intel_output(connector);
+			break;
+		}
+	}
+	return intel_output;
+}
+
+static int intel_sdvo_master_xfer(struct i2c_adapter *i2c_adap,
+				  struct i2c_msg msgs[], int num)
+{
+	struct intel_output *intel_output;
+	struct intel_sdvo_priv *sdvo_priv;
+	struct i2c_algo_bit_data *algo_data;
+	struct i2c_algorithm *algo;
+
+	algo_data = (struct i2c_algo_bit_data *)i2c_adap->algo_data;
+	intel_output =
+		intel_sdvo_chan_to_intel_output(
+				(struct intel_i2c_chan *)(algo_data->data));
+	if (intel_output == NULL)
+		return -EINVAL;
+
+	sdvo_priv = intel_output->dev_priv;
+	algo = (struct i2c_algorithm *)intel_output->i2c_bus->adapter.algo;
+
+	intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
+	return algo->master_xfer(i2c_adap, msgs, num);
+}
+
+static struct i2c_algorithm intel_sdvo_i2c_bit_algo = {
+	.master_xfer	= intel_sdvo_master_xfer,
+};
+
 bool intel_sdvo_init(struct drm_device *dev, int output_device)
 {
 	struct drm_connector *connector;
 	struct intel_output *intel_output;
 	struct intel_sdvo_priv *sdvo_priv;
 	struct intel_i2c_chan *i2cbus = NULL;
+	struct intel_i2c_chan *ddcbus = NULL;
 	int connector_type;
 	u8 ch[0x40];
 	int i;
@@ -1748,6 +1793,20 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		}
 	}
 
+	/* setup the DDC bus. */
+	if (output_device == SDVOB)
+		ddcbus = intel_i2c_create(dev, GPIOE, "SDVOB DDC BUS");
+	else
+		ddcbus = intel_i2c_create(dev, GPIOE, "SDVOC DDC BUS");
+
+	if (ddcbus == NULL)
+		goto err_i2c;
+
+	intel_sdvo_i2c_bit_algo.functionality =
+		intel_output->i2c_bus->adapter.algo->functionality;
+	ddcbus->adapter.algo = &intel_sdvo_i2c_bit_algo;
+	intel_output->ddc_bus = ddcbus;
+
 	/* In defaut case sdvo lvds is false */
 	sdvo_priv->is_lvds = false;
 	intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps);
@@ -1862,11 +1921,11 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 		  sdvo_priv->caps.output_flags &
 			(SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N');
 
-	intel_output->ddc_bus = i2cbus;
-
 	return true;
 
 err_i2c:
+	if (ddcbus != NULL)
+		intel_i2c_destroy(intel_output->ddc_bus);
 	intel_i2c_destroy(intel_output->i2c_bus);
 err_inteloutput:
 	kfree(intel_output);
-- 
cgit v0.10.2


From 0b827537e339c084ac9384df588969d400be9e0d Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 22 May 2009 13:23:37 -0700
Subject: x86: bugfix wbinvd() model check instead of family check

wbinvd is supported on all CPUs 486 or later. But,
pageattr.c is checking x86_model >= 4 before wbinvd(), which looks like
an oversight bug. It was first introduced at one place by changeset
d7c8f21a8cad0228c7c5ce2bb6dbd95d1ee49d13 and got copied over to second
place in the same file later.

[ Impact: fix missing cache flush on early-model CPUs, potential data corruption ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 797f9f1..2cc019a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg)
 	 */
 	__flush_tlb_all();
 
-	if (cache && boot_cpu_data.x86_model >= 4)
+	if (cache && boot_cpu_data.x86 >= 4)
 		wbinvd();
 }
 
@@ -218,7 +218,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 
 	/* 4M threshold */
 	if (numpages >= 1024) {
-		if (boot_cpu_data.x86_model >= 4)
+		if (boot_cpu_data.x86 >= 4)
 			wbinvd();
 		return;
 	}
-- 
cgit v0.10.2


From 0af48f42df15b97080b450d24219dd95db7b929a Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 22 May 2009 13:23:38 -0700
Subject: x86: cpa_flush_array wbinvd should be done on all CPUs

cpa_flush_array seems to prefer wbinvd() over clflush at 4M threshold.
clflush needs to be done on only one CPU as per instruction definition.
wbinvd() however, should be done on all CPUs.

[ Impact: fix missing flush which could cause data corruption ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2cc019a..0f9052b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -204,6 +204,11 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	}
 }
 
+static void wbinvd_local(void *unused)
+{
+	wbinvd();
+}
+
 static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 			    int in_flags, struct page **pages)
 {
@@ -219,7 +224,8 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 	/* 4M threshold */
 	if (numpages >= 1024) {
 		if (boot_cpu_data.x86 >= 4)
-			wbinvd();
+			on_each_cpu(wbinvd_local, NULL, 1);
+
 		return;
 	}
 	/*
-- 
cgit v0.10.2


From a49a018a6ea6d73742a81d673fe5ec4a7d2137b3 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 22 May 2009 16:53:40 -0400
Subject: [ARM] add coherent DMA mask for mv643xx_eth

Since commit eb0519b5a1cf, mv643xx_eth is non functional on ARM because
the platform device declaration does not include any coherent DMA mask
and coherent memory allocations fail.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 3fab82a..be1ca28 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -144,6 +144,9 @@ static struct platform_device kirkwood_ge00 = {
 	.id		= 0,
 	.num_resources	= 1,
 	.resource	= kirkwood_ge00_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
@@ -202,6 +205,9 @@ static struct platform_device kirkwood_ge01 = {
 	.id		= 1,
 	.num_resources	= 1,
 	.resource	= kirkwood_ge01_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
diff --git a/arch/arm/mach-loki/common.c b/arch/arm/mach-loki/common.c
index c0d2d9d..818f19d 100644
--- a/arch/arm/mach-loki/common.c
+++ b/arch/arm/mach-loki/common.c
@@ -82,6 +82,9 @@ static struct platform_device loki_ge0 = {
 	.id		= 0,
 	.num_resources	= 1,
 	.resource	= loki_ge0_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init loki_ge0_init(struct mv643xx_eth_platform_data *eth_data)
@@ -136,6 +139,9 @@ static struct platform_device loki_ge1 = {
 	.id		= 1,
 	.num_resources	= 1,
 	.resource	= loki_ge1_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init loki_ge1_init(struct mv643xx_eth_platform_data *eth_data)
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 0d88eea..1b22e4a 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -321,6 +321,9 @@ static struct platform_device mv78xx0_ge00 = {
 	.id		= 0,
 	.num_resources	= 1,
 	.resource	= mv78xx0_ge00_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
@@ -375,6 +378,9 @@ static struct platform_device mv78xx0_ge01 = {
 	.id		= 1,
 	.num_resources	= 1,
 	.resource	= mv78xx0_ge01_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data)
@@ -429,6 +435,9 @@ static struct platform_device mv78xx0_ge10 = {
 	.id		= 2,
 	.num_resources	= 1,
 	.resource	= mv78xx0_ge10_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data)
@@ -496,6 +505,9 @@ static struct platform_device mv78xx0_ge11 = {
 	.id		= 3,
 	.num_resources	= 1,
 	.resource	= mv78xx0_ge11_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data)
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index a51fb9d..b1c7778 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -188,6 +188,9 @@ static struct platform_device orion5x_eth = {
 	.id		= 0,
 	.num_resources	= 1,
 	.resource	= orion5x_eth_resources,
+	.dev		= {
+		.coherent_dma_mask	= 0xffffffff,
+	},
 };
 
 void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
-- 
cgit v0.10.2


From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:49 -0400
Subject: block: Do away with the notion of hardsect_size

Until now we have had a 1:1 mapping between storage device physical
block size and the logical block sized used when addressing the device.
With SATA 4KB drives coming out that will no longer be the case.  The
sector size will be 4KB but the logical block size will remain
512-bytes.  Hence we need to distinguish between the physical block size
and the logical ditto.

This patch renames hardsect_size to logical_block_size.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 9e105cb..a477991 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -250,7 +250,7 @@ axon_ram_probe(struct of_device *device, const struct of_device_id *device_id)
 
 	set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
 	blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
-	blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
+	blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
 	add_disk(bank->disk);
 
 	bank->irq_id = irq_of_parse_and_map(device->node, 0);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 91fa8e0..73e28d3 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -340,7 +340,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
 		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
-		bi->sector_size = disk->queue->hardsect_size;
+		bi->sector_size = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
 		bi = disk->integrity;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 57af728..15c3164 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -134,7 +134,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
-	blk_queue_hardsect_size(q, 512);
+	blk_queue_logical_block_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
@@ -288,21 +288,20 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 
 /**
- * blk_queue_hardsect_size - set hardware sector size for the queue
+ * blk_queue_logical_block_size - set logical block size for the queue
  * @q:  the request queue for the device
- * @size:  the hardware sector size, in bytes
+ * @size:  the logical block size, in bytes
  *
  * Description:
- *   This should typically be set to the lowest possible sector size
- *   that the hardware can operate on (possible without reverting to
- *   even internal read-modify-write operations). Usually the default
- *   of 512 covers most hardware.
+ *   This should be set to the lowest possible block size that the
+ *   storage device can address.  The default of 512 covers most
+ *   hardware.
  **/
-void blk_queue_hardsect_size(struct request_queue *q, unsigned short size)
+void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
-	q->hardsect_size = size;
+	q->logical_block_size = size;
 }
-EXPORT_SYMBOL(blk_queue_hardsect_size);
+EXPORT_SYMBOL(blk_queue_logical_block_size);
 
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
@@ -324,7 +323,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 	t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
 	t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
 	t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
-	t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
+	t->logical_block_size = max(t->logical_block_size, b->logical_block_size);
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba..13d38b7 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -100,9 +100,9 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_sectors_kb, (page));
 }
 
-static ssize_t queue_hw_sector_size_show(struct request_queue *q, char *page)
+static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->hardsect_size, page);
+	return queue_var_show(queue_logical_block_size(q), page);
 }
 
 static ssize_t
@@ -249,7 +249,12 @@ static struct queue_sysfs_entry queue_iosched_entry = {
 
 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
 	.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
-	.show = queue_hw_sector_size_show,
+	.show = queue_logical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_logical_block_size_entry = {
+	.attr = {.name = "logical_block_size", .mode = S_IRUGO },
+	.show = queue_logical_block_size_show,
 };
 
 static struct queue_sysfs_entry queue_nonrot_entry = {
@@ -283,6 +288,7 @@ static struct attribute *default_attrs[] = {
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
+	&queue_logical_block_size_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615d..9eaa194 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -763,7 +763,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
 		return compat_put_int(arg, block_size(bdev));
 	case BLKSSZGET: /* get block device hardware sector size */
-		return compat_put_int(arg, bdev_hardsect_size(bdev));
+		return compat_put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
 					 bdev_get_queue(bdev)->max_sectors);
diff --git a/block/ioctl.c b/block/ioctl.c
index ad474d4..7aa97f6 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -311,7 +311,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
 	case BLKSSZGET: /* get block device hardware sector size */
-		return put_int(arg, bdev_hardsect_size(bdev));
+		return put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
 	case BLKRASET:
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e714e7c..94474f5 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1389,8 +1389,8 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 
 	disk->queue->queuedata = h;
 
-	blk_queue_hardsect_size(disk->queue,
-				h->drv[drv_index].block_size);
+	blk_queue_logical_block_size(disk->queue,
+				     h->drv[drv_index].block_size);
 
 	/* Make sure all queue data is written out before */
 	/* setting h->drv[drv_index].queue, as setting this */
@@ -2298,7 +2298,7 @@ static int cciss_revalidate(struct gendisk *disk)
 	cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
 			       inq_buff, drv);
 
-	blk_queue_hardsect_size(drv->queue, drv->block_size);
+	blk_queue_logical_block_size(drv->queue, drv->block_size);
 	set_capacity(disk, drv->nr_blocks);
 
 	kfree(inq_buff);
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index a02dcfc..44fa201 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -474,7 +474,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
 		disk->fops = &ida_fops;
 		if (j && !drv->nr_blks)
 			continue;
-		blk_queue_hardsect_size(hba[i]->queue, drv->blk_size);
+		blk_queue_logical_block_size(hba[i]->queue, drv->blk_size);
 		set_capacity(disk, drv->nr_blks);
 		disk->queue = hba[i]->queue;
 		disk->private_data = drv;
@@ -1546,7 +1546,7 @@ static int revalidate_allvol(ctlr_info_t *host)
 		drv_info_t *drv = &host->drv[i];
 		if (i && !drv->nr_blks)
 			continue;
-		blk_queue_hardsect_size(host->queue, drv->blk_size);
+		blk_queue_logical_block_size(host->queue, drv->blk_size);
 		set_capacity(disk, drv->nr_blks);
 		disk->queue = host->queue;
 		disk->private_data = drv;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 961de56..f65b3f3 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -724,7 +724,7 @@ static int __init hd_init(void)
 	blk_queue_max_sectors(hd_queue, 255);
 	init_timer(&device_timer);
 	device_timer.function = hd_times_out;
-	blk_queue_hardsect_size(hd_queue, 512);
+	blk_queue_logical_block_size(hd_queue, 512);
 
 	if (!NR_HD) {
 		/*
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index c0cd0a0..60de5a0 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -996,7 +996,7 @@ static int mg_probe(struct platform_device *plat_dev)
 		goto probe_err_6;
 	}
 	blk_queue_max_sectors(host->breq, MG_MAX_SECTS);
-	blk_queue_hardsect_size(host->breq, MG_SECTOR_SIZE);
+	blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
 
 	init_timer(&host->timer);
 	host->timer.function = mg_times_out;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index dc7a8c3..293f585 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2657,7 +2657,7 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
 	struct request_queue *q = pd->disk->queue;
 
 	blk_queue_make_request(q, pkt_make_request);
-	blk_queue_hardsect_size(q, CD_FRAMESIZE);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
 	blk_queue_max_sectors(q, PACKET_MAX_SECTORS);
 	blk_queue_merge_bvec(q, pkt_merge_bvec);
 	q->queuedata = pd;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 338cee4..aaeeb54 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -477,7 +477,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_max_sectors(queue, dev->bounce_size >> 9);
 	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
-	blk_queue_hardsect_size(queue, dev->blk_size);
+	blk_queue_logical_block_size(queue, dev->blk_size);
 
 	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
 			  ps3disk_prepare_flush);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index e67bbae..cc54473 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -722,7 +722,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	/*
 	 * build the command
 	 *
-	 * The call to blk_queue_hardsect_size() guarantees that request
+	 * The call to blk_queue_logical_block_size() guarantees that request
 	 * is aligned, but it is given in terms of 512 byte units, always.
 	 */
 	block = blk_rq_pos(rq) >> lun->capacity.bshift;
@@ -1749,7 +1749,7 @@ static int ub_bd_revalidate(struct gendisk *disk)
 	ub_revalidate(lun->udev, lun);
 
 	/* XXX Support sector size switching like in sr.c */
-	blk_queue_hardsect_size(disk->queue, lun->capacity.bsize);
+	blk_queue_logical_block_size(disk->queue, lun->capacity.bsize);
 	set_capacity(disk, lun->capacity.nsec);
 	// set_disk_ro(sdkp->disk, lun->readonly);
 
@@ -2324,7 +2324,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
 	blk_queue_segment_boundary(q, 0xffffffff);	/* Dubious. */
 	blk_queue_max_sectors(q, UB_MAX_SECTORS);
-	blk_queue_hardsect_size(q, lun->capacity.bsize);
+	blk_queue_logical_block_size(q, lun->capacity.bsize);
 
 	lun->disk = disk;
 	q->queuedata = lun;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 511d4ae..c4845b1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -347,7 +347,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 				offsetof(struct virtio_blk_config, blk_size),
 				&blk_size);
 	if (!err)
-		blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+		blk_queue_logical_block_size(vblk->disk->queue, blk_size);
 
 	add_disk(vblk->disk);
 	return 0;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 132120a..c199682 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -344,7 +344,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, sector_size);
+	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 3a4397e..f08491a 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -984,7 +984,7 @@ static int __devinit ace_setup(struct ace_device *ace)
 	ace->queue = blk_init_queue(ace_request, &ace->lock);
 	if (ace->queue == NULL)
 		goto err_blk_initq;
-	blk_queue_hardsect_size(ace->queue, 512);
+	blk_queue_logical_block_size(ace->queue, 512);
 
 	/*
 	 * Allocate and initialize GD structure
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1e366ad..b5621f2 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -739,7 +739,7 @@ static void __devinit probe_gdrom_setupdisk(void)
 
 static int __devinit probe_gdrom_setupqueue(void)
 {
-	blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
+	blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
 	/* using DMA so memory will need to be contiguous */
 	blk_queue_max_hw_segments(gd.gdrom_rq, 1);
 	/* set a large max size to get most from DMA */
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index f177c2d..0fff646 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -469,8 +469,8 @@ static void vio_handle_cd_event(struct HvLpEvent *event)
 	case viocdopen:
 		if (event->xRc == 0) {
 			di = &viocd_diskinfo[bevent->disk];
-			blk_queue_hardsect_size(di->viocd_disk->queue,
-					bevent->block_size);
+			blk_queue_logical_block_size(di->viocd_disk->queue,
+						     bevent->block_size);
 			set_capacity(di->viocd_disk,
 					bevent->media_size *
 					bevent->block_size / 512);
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 20d90e6..db32f0e 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -71,7 +71,7 @@ static int raw_open(struct inode *inode, struct file *filp)
 	err = bd_claim(bdev, raw_open);
 	if (err)
 		goto out1;
-	err = set_blocksize(bdev, bdev_hardsect_size(bdev));
+	err = set_blocksize(bdev, bdev_logical_block_size(bdev));
 	if (err)
 		goto out2;
 	filp->f_flags |= O_DIRECT;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 1799328..424140c 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -182,7 +182,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
 				 (sense->information[2] <<  8) |
 				 (sense->information[3]);
 
-			if (drive->queue->hardsect_size == 2048)
+			if (queue_logical_block_size(drive->queue) == 2048)
 				/* device sector size is 2K */
 				sector <<= 2;
 
@@ -737,7 +737,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	struct request_queue *q = drive->queue;
 	int write = rq_data_dir(rq) == WRITE;
 	unsigned short sectors_per_frame =
-		queue_hardsect_size(q) >> SECTOR_BITS;
+		queue_logical_block_size(q) >> SECTOR_BITS;
 
 	ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
 				  "secs_per_frame: %u",
@@ -1021,8 +1021,8 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense)
 	/* save a private copy of the TOC capacity for error handling */
 	drive->probed_capacity = toc->capacity * sectors_per_frame;
 
-	blk_queue_hardsect_size(drive->queue,
-				sectors_per_frame << SECTOR_BITS);
+	blk_queue_logical_block_size(drive->queue,
+				     sectors_per_frame << SECTOR_BITS);
 
 	/* first read just the header, so we know how long the TOC is */
 	stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
@@ -1338,7 +1338,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 /* standard prep_rq_fn that builds 10 byte cmds */
 static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 {
-	int hard_sect = queue_hardsect_size(q);
+	int hard_sect = queue_logical_block_size(q);
 	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
 	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
 
@@ -1543,7 +1543,7 @@ static int ide_cdrom_setup(ide_drive_t *drive)
 
 	nslots = ide_cdrom_probe_capabilities(drive);
 
-	blk_queue_hardsect_size(q, CD_FRAMESIZE);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
 
 	if (ide_cdrom_register(drive, nslots)) {
 		printk(KERN_ERR PFX "%s: %s failed to register device with the"
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 47c68bc..06b0ded 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
 		target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
 
 		if (sync_page_io(rdev->bdev, target,
-				 roundup(size, bdev_hardsect_size(rdev->bdev)),
+				 roundup(size, bdev_logical_block_size(rdev->bdev)),
 				 page, READ)) {
 			page->index = index;
 			attach_page_buffers(page, NULL); /* so that free_buffer will
@@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
 			int size = PAGE_SIZE;
 			if (page->index == bitmap->file_pages-1)
 				size = roundup(bitmap->last_page_size,
-					       bdev_hardsect_size(rdev->bdev));
+					       bdev_logical_block_size(rdev->bdev));
 			/* Just make sure we aren't corrupting data or
 			 * metadata
 			 */
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index a2e26c2..75d8081 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store,
 	}
 
 	/* Validate the chunk size against the device block size */
-	if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+	if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
 		*error = "Chunk size is not a multiple of device blocksize";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index be233bc..6fa8ccf 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -413,7 +413,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		 * Buffer holds both header and bitset.
 		 */
 		buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
-				       bitset_size, ti->limits.hardsect_size);
+				       bitset_size,
+				       ti->limits.logical_block_size);
 
 		if (buf_size > dev->bdev->bd_inode->i_size) {
 			DMWARN("log device %s too small: need %llu bytes",
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index e75c6dd..2662a41 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -282,7 +282,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	 */
 	if (!ps->store->chunk_size) {
 		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-		    bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+		    bdev_logical_block_size(ps->store->cow->bdev) >> 9);
 		ps->store->chunk_mask = ps->store->chunk_size - 1;
 		ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
 		chunk_size_supplied = 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 429b50b..65e2d97 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -108,7 +108,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
 	lhs->max_hw_segments =
 		min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
 
-	lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
+	lhs->logical_block_size = max(lhs->logical_block_size,
+				      rhs->logical_block_size);
 
 	lhs->max_segment_size =
 		min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
@@ -529,7 +530,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 	rs->max_hw_segments =
 		min_not_zero(rs->max_hw_segments, q->max_hw_segments);
 
-	rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
+	rs->logical_block_size = max(rs->logical_block_size,
+				     queue_logical_block_size(q));
 
 	rs->max_segment_size =
 		min_not_zero(rs->max_segment_size, q->max_segment_size);
@@ -683,8 +685,8 @@ static void check_for_valid_limits(struct io_restrictions *rs)
 		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
 	if (!rs->max_hw_segments)
 		rs->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!rs->hardsect_size)
-		rs->hardsect_size = 1 << SECTOR_SHIFT;
+	if (!rs->logical_block_size)
+		rs->logical_block_size = 1 << SECTOR_SHIFT;
 	if (!rs->max_segment_size)
 		rs->max_segment_size = MAX_SEGMENT_SIZE;
 	if (!rs->seg_boundary_mask)
@@ -914,7 +916,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	blk_queue_max_sectors(q, t->limits.max_sectors);
 	q->max_phys_segments = t->limits.max_phys_segments;
 	q->max_hw_segments = t->limits.max_hw_segments;
-	q->hardsect_size = t->limits.hardsect_size;
+	q->logical_block_size = t->limits.logical_block_size;
 	q->max_segment_size = t->limits.max_segment_size;
 	q->max_hw_sectors = t->limits.max_hw_sectors;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fccc834..4cbc19f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1202,7 +1202,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
 
 	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
-	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
+	bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
 	if (rdev->sb_size & bmask)
 		rdev->sb_size = (rdev->sb_size | bmask) + 1;
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c0bebc6..7847bbc 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1242,7 +1242,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 
 	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
 
-	blk_queue_hardsect_size(msb->queue, msb->page_size);
+	blk_queue_logical_block_size(msb->queue, msb->page_size);
 
 	capacity = be16_to_cpu(sys_info->user_block_count);
 	capacity *= be16_to_cpu(sys_info->block_size);
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 6573ef4..335d4c7 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -794,8 +794,9 @@ static int i2o_block_transfer(struct request *req)
 	if (c->adaptec) {
 		u8 cmd[10];
 		u32 scsi_flags;
-		u16 hwsec = queue_hardsect_size(req->q) >> KERNEL_SECTOR_SHIFT;
+		u16 hwsec;
 
+		hwsec = queue_logical_block_size(req->q) >> KERNEL_SECTOR_SHIFT;
 		memset(cmd, 0, 10);
 
 		sgl_offset = SGL_OFFSET_12;
@@ -1078,7 +1079,7 @@ static int i2o_block_probe(struct device *dev)
 	 */
 	if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) ||
 	    !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) {
-		blk_queue_hardsect_size(queue, le32_to_cpu(blocksize));
+		blk_queue_logical_block_size(queue, le32_to_cpu(blocksize));
 	} else
 		osm_warn("unable to get blocksize of %s\n", gd->disk_name);
 
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index c5df865..98ffc41e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -521,7 +521,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 
 	sprintf(md->disk->disk_name, "mmcblk%d", devidx);
 
-	blk_queue_hardsect_size(md->queue.queue, 512);
+	blk_queue_logical_block_size(md->queue.queue, 512);
 
 	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
 		/*
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 502622f..aaac3b6 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -378,7 +378,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	}
 
 	tr->blkcore_priv->rq->queuedata = tr;
-	blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
+	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
 		blk_queue_set_discard(tr->blkcore_priv->rq,
 				      blktrans_discard_request);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e64f62d..27a1be0 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1990,7 +1990,7 @@ static void dasd_setup_queue(struct dasd_block *block)
 {
 	int max;
 
-	blk_queue_hardsect_size(block->request_queue, block->bp_block);
+	blk_queue_logical_block_size(block->request_queue, block->bp_block);
 	max = block->base->discipline->max_blocks << block->s2b_shift;
 	blk_queue_max_sectors(block->request_queue, max);
 	blk_queue_max_phys_segments(block->request_queue, -1L);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index cfdcf1a..a4c7ffc 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -602,7 +602,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	dev_info->gd->private_data = dev_info;
 	dev_info->gd->driverfs_dev = &dev_info->dev;
 	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
-	blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096);
+	blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
 
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 76814f3..0ae0c83 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -343,7 +343,7 @@ static int __init xpram_setup_blkdev(void)
 			goto out;
 		}
 		blk_queue_make_request(xpram_queues[i], xpram_make_request);
-		blk_queue_hardsect_size(xpram_queues[i], 4096);
+		blk_queue_logical_block_size(xpram_queues[i], 4096);
 	}
 
 	/*
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 1e79676..47ff695 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -222,7 +222,7 @@ tapeblock_setup_device(struct tape_device * device)
 	if (rc)
 		goto cleanup_queue;
 
-	blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
+	blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
 	blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
 	blk_queue_max_phys_segments(blkdat->request_queue, -1L);
 	blk_queue_max_hw_segments(blkdat->request_queue, -1L);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 40d2860..bcf3bd4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1510,7 +1510,7 @@ got_data:
 		 */
 		sector_size = 512;
 	}
-	blk_queue_hardsect_size(sdp->request_queue, sector_size);
+	blk_queue_logical_block_size(sdp->request_queue, sector_size);
 
 	{
 		char cap_str_2[10], cap_str_10[10];
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index fddba53..cd350df 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -727,7 +727,7 @@ static void get_sectorsize(struct scsi_cd *cd)
 	}
 
 	queue = cd->device->request_queue;
-	blk_queue_hardsect_size(queue, sector_size);
+	blk_queue_logical_block_size(queue, sector_size);
 
 	return;
 }
diff --git a/fs/bio.c b/fs/bio.c
index 81dc93e..4445c38 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1490,11 +1490,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 sector_t bio_sector_offset(struct bio *bio, unsigned short index,
 			   unsigned int offset)
 {
-	unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+	unsigned int sector_sz;
 	struct bio_vec *bv;
 	sector_t sectors;
 	int i;
 
+	sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
 	sectors = 0;
 
 	if (index >= bio->bi_idx)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a85fe31..a29b4dc 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -76,7 +76,7 @@ int set_blocksize(struct block_device *bdev, int size)
 		return -EINVAL;
 
 	/* Size cannot be smaller than the size supported by the device */
-	if (size < bdev_hardsect_size(bdev))
+	if (size < bdev_logical_block_size(bdev))
 		return -EINVAL;
 
 	/* Don't change the size if it is same as current */
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(sb_set_blocksize);
 
 int sb_min_blocksize(struct super_block *sb, int size)
 {
-	int minsize = bdev_hardsect_size(sb->s_bdev);
+	int minsize = bdev_logical_block_size(sb->s_bdev);
 	if (size < minsize)
 		size = minsize;
 	return sb_set_blocksize(sb, size);
@@ -1117,7 +1117,7 @@ EXPORT_SYMBOL(check_disk_change);
 
 void bd_set_size(struct block_device *bdev, loff_t size)
 {
-	unsigned bsize = bdev_hardsect_size(bdev);
+	unsigned bsize = bdev_logical_block_size(bdev);
 
 	bdev->bd_inode->i_size = size;
 	while (bsize < PAGE_CACHE_SIZE) {
diff --git a/fs/buffer.c b/fs/buffer.c
index aed2977..36e2bbc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1085,12 +1085,12 @@ static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
 	/* Size must be multiple of hard sectorsize */
-	if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
+	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
 			(size < 512 || size > PAGE_SIZE))) {
 		printk(KERN_ERR "getblk(): invalid block size %d requested\n",
 					size);
-		printk(KERN_ERR "hardsect size: %d\n",
-					bdev_hardsect_size(bdev));
+		printk(KERN_ERR "logical block size: %d\n",
+					bdev_logical_block_size(bdev));
 
 		dump_stack();
 		return NULL;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 05763bb..8b10b87 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1127,7 +1127,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		rw = WRITE_ODIRECT;
 
 	if (bdev)
-		bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
+		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
 
 	if (offset & blocksize_mask) {
 		if (bdev)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe..acbb94f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1696,7 +1696,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	hblock = bdev_hardsect_size(sb->s_bdev);
+	hblock = bdev_logical_block_size(sb->s_bdev);
 	if (sb->s_blocksize != blocksize) {
 		/*
 		 * Make sure the blocksize for the filesystem is larger
@@ -2119,7 +2119,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	}
 
 	blocksize = sb->s_blocksize;
-	hblock = bdev_hardsect_size(bdev);
+	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
 			"EXT3-fs: blocksize too small for journal device.\n");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e..a30549f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2962,7 +2962,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	}
 
 	blocksize = sb->s_blocksize;
-	hblock = bdev_hardsect_size(bdev);
+	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
 			"EXT4-fs: blocksize too small for journal device.\n");
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1ff9473..a3b2ac9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -526,11 +526,11 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
 	}
 
 	/* Set up the buffer cache and SB for real */
-	if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
+	if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
 		ret = -EINVAL;
 		fs_err(sdp, "FS block size (%u) is too small for device "
 		       "block size (%u)\n",
-		       sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
+		       sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev));
 		goto out;
 	}
 	if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5650382..a971d24 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -845,7 +845,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 	struct super_block *sb = sdp->sd_vfs;
 	struct block_device *bdev = sb->s_bdev;
 	const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
-					   bdev_hardsect_size(sb->s_bdev);
+					   bdev_logical_block_size(sb->s_bdev);
 	u64 blk;
 	sector_t start = 0;
 	sector_t nr_sects = 0;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 7f65b3b..a91f15b 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -515,7 +515,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
 	if (sb->s_blocksize != blocksize) {
-		int hw_blocksize = bdev_hardsect_size(sb->s_bdev);
+		int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
 
 		if (blocksize < hw_blocksize) {
 			printk(KERN_ERR
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index f76951d..6aa7c47 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
-#include <linux/blkdev.h>	/* For bdev_hardsect_size(). */
+#include <linux/blkdev.h>	/* For bdev_logical_block_size(). */
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
@@ -2785,13 +2785,13 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		goto err_out_now;
 
 	/* We support sector sizes up to the PAGE_CACHE_SIZE. */
-	if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+	if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
 		if (!silent)
 			ntfs_error(sb, "Device has unsupported sector size "
 					"(%i).  The maximum supported sector "
 					"size on this architecture is %lu "
 					"bytes.",
-					bdev_hardsect_size(sb->s_bdev),
+					bdev_logical_block_size(sb->s_bdev),
 					PAGE_CACHE_SIZE);
 		goto err_out_now;
 	}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 4f85ece..09cc25d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1371,7 +1371,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 
 	bdevname(reg->hr_bdev, reg->hr_dev_name);
 
-	sectsize = bdev_hardsect_size(reg->hr_bdev);
+	sectsize = bdev_logical_block_size(reg->hr_bdev);
 	if (sectsize != reg->hr_block_bytes) {
 		mlog(ML_ERROR,
 		     "blocksize %u incorrect for device, expected %d",
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 79ff8d9..5c6163f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -713,7 +713,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
 	*bh = NULL;
 
 	/* may be > 512 */
-	*sector_size = bdev_hardsect_size(sb->s_bdev);
+	*sector_size = bdev_logical_block_size(sb->s_bdev);
 	if (*sector_size > OCFS2_MAX_BLOCKSIZE) {
 		mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n",
 		     *sector_size, OCFS2_MAX_BLOCKSIZE);
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 4629768..fc71aab 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -76,7 +76,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	Sector sect;
 
 	res = 0;
-	blocksize = bdev_hardsect_size(bdev);
+	blocksize = bdev_logical_block_size(bdev);
 	if (blocksize <= 0)
 		goto out_exit;
 	i_size = i_size_read(bdev->bd_inode);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 7965118..0028d2e 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -110,7 +110,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
 	Sector sect;
 	unsigned char *data;
 	u32 this_sector, this_size;
-	int sector_size = bdev_hardsect_size(bdev) / 512;
+	int sector_size = bdev_logical_block_size(bdev) / 512;
 	int loopct = 0;		/* number of links followed
 				   without finding a data partition */
 	int i;
@@ -415,7 +415,7 @@ static struct {
  
 int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
-	int sector_size = bdev_hardsect_size(bdev) / 512;
+	int sector_size = bdev_logical_block_size(bdev) / 512;
 	Sector sect;
 	unsigned char *data;
 	struct partition *p;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 72348cc..0ba4410 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1915,7 +1915,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) {
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 	} else {
-		uopt.blocksize = bdev_hardsect_size(sb->s_bdev);
+		uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 		if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
 			if (!silent)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e28800a..1418b91 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1501,7 +1501,7 @@ xfs_setsize_buftarg_early(
 	struct block_device	*bdev)
 {
 	return xfs_setsize_buftarg_flags(btp,
-			PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+			PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0);
 }
 
 int
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 56ce53f..872b78b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,7 +391,7 @@ struct request_queue
 	unsigned int		max_hw_sectors;
 	unsigned short		max_phys_segments;
 	unsigned short		max_hw_segments;
-	unsigned short		hardsect_size;
+	unsigned short		logical_block_size;
 	unsigned int		max_segment_size;
 
 	unsigned long		seg_boundary_mask;
@@ -901,7 +901,7 @@ extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
-extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
+extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
@@ -988,19 +988,19 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline int queue_hardsect_size(struct request_queue *q)
+static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 
-	if (q && q->hardsect_size)
-		retval = q->hardsect_size;
+	if (q && q->logical_block_size)
+		retval = q->logical_block_size;
 
 	return retval;
 }
 
-static inline int bdev_hardsect_size(struct block_device *bdev)
+static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 {
-	return queue_hardsect_size(bdev_get_queue(bdev));
+	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 
 static inline int queue_dma_alignment(struct request_queue *q)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ded2d7c..49c2362 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -149,7 +149,7 @@ struct io_restrictions {
 	unsigned max_hw_sectors;
 	unsigned max_sectors;
 	unsigned max_segment_size;
-	unsigned short hardsect_size;
+	unsigned short logical_block_size;
 	unsigned short max_hw_segments;
 	unsigned short max_phys_segments;
 	unsigned char no_cluster; /* inverted so that 0 is default */
-- 
cgit v0.10.2


From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:50 -0400
Subject: block: Use accessor functions for queue limits

Convert all external users of queue limits to using wrapper functions
instead of poking the request queue variables directly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 0d98054..30022b4 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -388,10 +388,10 @@ int blkdev_issue_discard(struct block_device *bdev,
 
 		bio->bi_sector = sector;
 
-		if (nr_sects > q->max_hw_sectors) {
-			bio->bi_size = q->max_hw_sectors << 9;
-			nr_sects -= q->max_hw_sectors;
-			sector += q->max_hw_sectors;
+		if (nr_sects > queue_max_hw_sectors(q)) {
+			bio->bi_size = queue_max_hw_sectors(q) << 9;
+			nr_sects -= queue_max_hw_sectors(q);
+			sector += queue_max_hw_sectors(q);
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 59c4af5..7a4c401 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1437,11 +1437,11 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > q->max_hw_sectors)) {
+		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
-				bdevname(bio->bi_bdev, b),
-				bio_sectors(bio),
-				q->max_hw_sectors);
+			       bdevname(bio->bi_bdev, b),
+			       bio_sectors(bio),
+			       queue_max_hw_sectors(q));
 			goto end_io;
 		}
 
@@ -1608,8 +1608,8 @@ EXPORT_SYMBOL(submit_bio);
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
-	if (blk_rq_sectors(rq) > q->max_sectors ||
-	    blk_rq_bytes(rq) > q->max_hw_sectors << 9) {
+	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
+	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
@@ -1621,8 +1621,8 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
-	if (rq->nr_phys_segments > q->max_phys_segments ||
-	    rq->nr_phys_segments > q->max_hw_segments) {
+	if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
+	    rq->nr_phys_segments > queue_max_hw_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
diff --git a/block/blk-map.c b/block/blk-map.c
index ef2492a..9083cf0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -115,7 +115,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 	struct bio *bio = NULL;
 	int ret;
 
-	if (len > (q->max_hw_sectors << 9))
+	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len)
 		return -EINVAL;
@@ -292,7 +292,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	struct bio *bio;
 	int ret;
 
-	if (len > (q->max_hw_sectors << 9))
+	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len || !kbuf)
 		return -EINVAL;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4974dd5..39ce644 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -32,11 +32,12 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 			 * never considered part of another segment, since that
 			 * might change with the bounce page.
 			 */
-			high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+			high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q);
 			if (high || highprv)
 				goto new_segment;
 			if (cluster) {
-				if (seg_size + bv->bv_len > q->max_segment_size)
+				if (seg_size + bv->bv_len
+				    > queue_max_segment_size(q))
 					goto new_segment;
 				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
 					goto new_segment;
@@ -91,7 +92,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 		return 0;
 
 	if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
-	    q->max_segment_size)
+	    queue_max_segment_size(q))
 		return 0;
 
 	if (!bio_has_data(bio))
@@ -134,7 +135,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 		int nbytes = bvec->bv_len;
 
 		if (bvprv && cluster) {
-			if (sg->length + nbytes > q->max_segment_size)
+			if (sg->length + nbytes > queue_max_segment_size(q))
 				goto new_segment;
 
 			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
@@ -205,8 +206,8 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
-	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+	if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
+	    req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -227,9 +228,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 	unsigned short max_sectors;
 
 	if (unlikely(blk_pc_request(req)))
-		max_sectors = q->max_hw_sectors;
+		max_sectors = queue_max_hw_sectors(q);
 	else
-		max_sectors = q->max_sectors;
+		max_sectors = queue_max_sectors(q);
 
 	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
@@ -251,9 +252,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 	unsigned short max_sectors;
 
 	if (unlikely(blk_pc_request(req)))
-		max_sectors = q->max_hw_sectors;
+		max_sectors = queue_max_hw_sectors(q);
 	else
-		max_sectors = q->max_sectors;
+		max_sectors = queue_max_sectors(q);
 
 
 	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
@@ -287,7 +288,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	/*
 	 * Will it become too large?
 	 */
-	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors)
+	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
 		return 0;
 
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -299,10 +300,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 		total_phys_segments--;
 	}
 
-	if (total_phys_segments > q->max_phys_segments)
+	if (total_phys_segments > queue_max_phys_segments(q))
 		return 0;
 
-	if (total_phys_segments > q->max_hw_segments)
+	if (total_phys_segments > queue_max_hw_segments(q))
 		return 0;
 
 	/* Merge is OK... */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 15c3164..0b32f98 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -219,6 +219,15 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
 
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
+{
+	if (BLK_DEF_MAX_SECTORS > max_sectors)
+		q->max_hw_sectors = BLK_DEF_MAX_SECTORS;
+	else
+		q->max_hw_sectors = max_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_hw_sectors);
+
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
@@ -395,11 +404,11 @@ int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size)
 {
-	if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+	if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
-	--q->max_hw_segments;
-	--q->max_phys_segments;
+	blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
+	blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
 	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 13d38b7..142a4ac 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -95,7 +95,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
 
 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 {
-	int max_sectors_kb = q->max_sectors >> 1;
+	int max_sectors_kb = queue_max_sectors(q) >> 1;
 
 	return queue_var_show(max_sectors_kb, (page));
 }
@@ -109,7 +109,7 @@ static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long max_sectors_kb,
-			max_hw_sectors_kb = q->max_hw_sectors >> 1,
+		max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
 			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 
@@ -117,7 +117,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 		return -EINVAL;
 
 	spin_lock_irq(q->queue_lock);
-	q->max_sectors = max_sectors_kb << 1;
+	blk_queue_max_sectors(q, max_sectors_kb << 1);
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
@@ -125,7 +125,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 
 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 {
-	int max_hw_sectors_kb = q->max_hw_sectors >> 1;
+	int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
 
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 9eaa194..df18a15 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -766,7 +766,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		return compat_put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
-					 bdev_get_queue(bdev)->max_sectors);
+					 queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET: /* compatible, but no compat_ptr (!) */
 	case BLKFRASET:
 		if (!capable(CAP_SYS_ADMIN))
diff --git a/block/ioctl.c b/block/ioctl.c
index 7aa97f6..500e4c7 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -152,10 +152,10 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 		bio->bi_private = &wait;
 		bio->bi_sector = start;
 
-		if (len > q->max_hw_sectors) {
-			bio->bi_size = q->max_hw_sectors << 9;
-			len -= q->max_hw_sectors;
-			start += q->max_hw_sectors;
+		if (len > queue_max_hw_sectors(q)) {
+			bio->bi_size = queue_max_hw_sectors(q) << 9;
+			len -= queue_max_hw_sectors(q);
+			start += queue_max_hw_sectors(q);
 		} else {
 			bio->bi_size = len << 9;
 			len = 0;
@@ -313,7 +313,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKSSZGET: /* get block device hardware sector size */
 		return put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
-		return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
+		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
 	case BLKFRASET:
 		if(!capable(CAP_SYS_ADMIN))
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a9670dd..5f8e798 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -75,7 +75,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
 
 static int sg_get_reserved_size(struct request_queue *q, int __user *p)
 {
-	unsigned val = min(q->sg_reserved_size, q->max_sectors << 9);
+	unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9);
 
 	return put_user(val, p);
 }
@@ -89,8 +89,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p)
 
 	if (size < 0)
 		return -EINVAL;
-	if (size > (q->max_sectors << 9))
-		size = q->max_sectors << 9;
+	if (size > (queue_max_sectors(q) << 9))
+		size = queue_max_sectors(q) << 9;
 
 	q->sg_reserved_size = size;
 	return 0;
@@ -264,7 +264,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	if (hdr->cmd_len > BLK_MAX_CDB)
 		return -EINVAL;
 
-	if (hdr->dxfer_len > (q->max_hw_sectors << 9))
+	if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
 		return -EIO;
 
 	if (hdr->dxfer_len)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 293f585..d57f117 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -991,13 +991,15 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
  */
 static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
 {
-	if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) {
+	if ((pd->settings.size << 9) / CD_FRAMESIZE
+	    <= queue_max_phys_segments(q)) {
 		/*
 		 * The cdrom device can handle one segment/frame
 		 */
 		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
 		return 0;
-	} else if ((pd->settings.size << 9) / PAGE_SIZE <= q->max_phys_segments) {
+	} else if ((pd->settings.size << 9) / PAGE_SIZE
+		   <= queue_max_phys_segments(q)) {
 		/*
 		 * We can handle this case at the expense of some extra memory
 		 * copies during write operations
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index cceace6..71d1b9b 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2101,8 +2101,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		nr = nframes;
 		if (cdi->cdda_method == CDDA_BPC_SINGLE)
 			nr = 1;
-		if (nr * CD_FRAMESIZE_RAW > (q->max_sectors << 9))
-			nr = (q->max_sectors << 9) / CD_FRAMESIZE_RAW;
+		if (nr * CD_FRAMESIZE_RAW > (queue_max_sectors(q) << 9))
+			nr = (queue_max_sectors(q) << 9) / CD_FRAMESIZE_RAW;
 
 		len = nr * CD_FRAMESIZE_RAW;
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 65e2d97..e9a73bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -510,7 +510,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 	 *        combine_restrictions_low()
 	 */
 	rs->max_sectors =
-		min_not_zero(rs->max_sectors, q->max_sectors);
+		min_not_zero(rs->max_sectors, queue_max_sectors(q));
 
 	/*
 	 * Check if merge fn is supported.
@@ -525,25 +525,25 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 
 	rs->max_phys_segments =
 		min_not_zero(rs->max_phys_segments,
-			     q->max_phys_segments);
+			     queue_max_phys_segments(q));
 
 	rs->max_hw_segments =
-		min_not_zero(rs->max_hw_segments, q->max_hw_segments);
+		min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
 
 	rs->logical_block_size = max(rs->logical_block_size,
 				     queue_logical_block_size(q));
 
 	rs->max_segment_size =
-		min_not_zero(rs->max_segment_size, q->max_segment_size);
+		min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
 
 	rs->max_hw_sectors =
-		min_not_zero(rs->max_hw_sectors, q->max_hw_sectors);
+		min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
 
 	rs->seg_boundary_mask =
 		min_not_zero(rs->seg_boundary_mask,
-			     q->seg_boundary_mask);
+			     queue_segment_boundary(q));
 
-	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
 
 	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
@@ -914,13 +914,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	 * restrictions.
 	 */
 	blk_queue_max_sectors(q, t->limits.max_sectors);
-	q->max_phys_segments = t->limits.max_phys_segments;
-	q->max_hw_segments = t->limits.max_hw_segments;
-	q->logical_block_size = t->limits.logical_block_size;
-	q->max_segment_size = t->limits.max_segment_size;
-	q->max_hw_sectors = t->limits.max_hw_sectors;
-	q->seg_boundary_mask = t->limits.seg_boundary_mask;
-	q->bounce_pfn = t->limits.bounce_pfn;
+	blk_queue_max_phys_segments(q, t->limits.max_phys_segments);
+	blk_queue_max_hw_segments(q, t->limits.max_hw_segments);
+	blk_queue_logical_block_size(q, t->limits.logical_block_size);
+	blk_queue_max_segment_size(q, t->limits.max_segment_size);
+	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
+	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
+	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 7a36e38..64f1f3e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->num_sectors = rdev->sectors;
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 41ced0c..4ee31aa 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -303,7 +303,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 		 * merge_bvec_fn will be involved in multipath.)
 		 */
 			if (q->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+			    queue_max_sectors(q) > (PAGE_SIZE>>9))
 				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			conf->working_disks++;
@@ -467,7 +467,7 @@ static int multipath_run (mddev_t *mddev)
 		 * violating it, not that we ever expect a device with
 		 * a merge_bvec_fn to be involved in multipath */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!test_bit(Faulty, &rdev->flags))
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c08d755..925507e 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev)
 		 */
 
 		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!smallest || (rdev1->sectors < smallest->sectors))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 36df910..e23758b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1130,7 +1130,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			 * a one page request is never in violation.
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
@@ -1996,7 +1996,7 @@ static int run(mddev_t *mddev)
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 499620a..750550c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1158,8 +1158,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			 * a one page request is never in violation.
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
-				mddev->queue->max_sectors = (PAGE_SIZE>>9);
+			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
@@ -2145,8 +2145,8 @@ static int run(mddev_t *mddev)
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
-			mddev->queue->max_sectors = (PAGE_SIZE>>9);
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4616bc3..7970dc8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3463,10 +3463,10 @@ static int bio_fits_rdev(struct bio *bi)
 {
 	struct request_queue *q = bdev_get_queue(bi->bi_bdev);
 
-	if ((bi->bi_size>>9) > q->max_sectors)
+	if ((bi->bi_size>>9) > queue_max_sectors(q))
 		return 0;
 	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > q->max_phys_segments)
+	if (bi->bi_phys_segments > queue_max_phys_segments(q))
 		return 0;
 
 	if (q->merge_bvec_fn)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0fc2c0a..9bd407f 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -289,8 +289,8 @@ sg_open(struct inode *inode, struct file *filp)
 	if (list_empty(&sdp->sfds)) {	/* no existing opens on this device */
 		sdp->sgdebug = 0;
 		q = sdp->device->request_queue;
-		sdp->sg_tablesize = min(q->max_hw_segments,
-					q->max_phys_segments);
+		sdp->sg_tablesize = min(queue_max_hw_segments(q),
+					queue_max_phys_segments(q));
 	}
 	if ((sfp = sg_add_sfp(sdp, dev)))
 		filp->private_data = sfp;
@@ -909,7 +909,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
                 if (val < 0)
                         return -EINVAL;
 		val = min_t(int, val,
-				sdp->device->request_queue->max_sectors * 512);
+			    queue_max_sectors(sdp->device->request_queue) * 512);
 		if (val != sfp->reserve.bufflen) {
 			if (sg_res_in_use(sfp) || sfp->mmap_called)
 				return -EBUSY;
@@ -919,7 +919,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 		return 0;
 	case SG_GET_RESERVED_SIZE:
 		val = min_t(int, sfp->reserve.bufflen,
-				sdp->device->request_queue->max_sectors * 512);
+			    queue_max_sectors(sdp->device->request_queue) * 512);
 		return put_user(val, ip);
 	case SG_SET_COMMAND_Q:
 		result = get_user(val, ip);
@@ -1059,7 +1059,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 			return -ENODEV;
 		return scsi_ioctl(sdp->device, cmd_in, p);
 	case BLKSECTGET:
-		return put_user(sdp->device->request_queue->max_sectors * 512,
+		return put_user(queue_max_sectors(sdp->device->request_queue) * 512,
 				ip);
 	case BLKTRACESETUP:
 		return blk_trace_setup(sdp->device->request_queue,
@@ -1377,7 +1377,8 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 	sdp->device = scsidp;
 	INIT_LIST_HEAD(&sdp->sfds);
 	init_waitqueue_head(&sdp->o_excl_wait);
-	sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments);
+	sdp->sg_tablesize = min(queue_max_hw_segments(q),
+				queue_max_phys_segments(q));
 	sdp->index = k;
 	kref_init(&sdp->d_ref);
 
@@ -2055,7 +2056,7 @@ sg_add_sfp(Sg_device * sdp, int dev)
 		sg_big_buff = def_reserved_size;
 
 	bufflen = min_t(int, sg_big_buff,
-			sdp->device->request_queue->max_sectors * 512);
+			queue_max_sectors(sdp->device->request_queue) * 512);
 	sg_build_reserve(sfp, bufflen);
 	SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp:   bufflen=%d, k_use_sg=%d\n",
 			   sfp->reserve.bufflen, sfp->reserve.k_use_sg));
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 8681b7083..89bd438 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3983,8 +3983,8 @@ static int st_probe(struct device *dev)
 		return -ENODEV;
 	}
 
-	i = min(SDp->request_queue->max_hw_segments,
-		SDp->request_queue->max_phys_segments);
+	i = min(queue_max_hw_segments(SDp->request_queue),
+		queue_max_phys_segments(SDp->request_queue));
 	if (st_max_sg_segs < i)
 		i = st_max_sg_segs;
 	buffer = new_tape_buffer((SDp->host)->unchecked_isa_dma, i);
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 4ca3b58..cfa26d5 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -132,7 +132,7 @@ static int slave_configure(struct scsi_device *sdev)
 
 		if (us->fflags & US_FL_MAX_SECTORS_MIN)
 			max_sectors = PAGE_CACHE_SIZE >> 9;
-		if (sdev->request_queue->max_sectors > max_sectors)
+		if (queue_max_sectors(sdev->request_queue) > max_sectors)
 			blk_queue_max_sectors(sdev->request_queue,
 					      max_sectors);
 	} else if (sdev->type == TYPE_TAPE) {
@@ -483,7 +483,7 @@ static ssize_t show_max_sectors(struct device *dev, struct device_attribute *att
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
 
-	return sprintf(buf, "%u\n", sdev->request_queue->max_sectors);
+	return sprintf(buf, "%u\n", queue_max_sectors(sdev->request_queue));
 }
 
 /* Input routine for the sysfs max_sectors file */
diff --git a/fs/bio.c b/fs/bio.c
index 4445c38..ab423a1 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -499,11 +499,11 @@ int bio_get_nr_vecs(struct block_device *bdev)
 	struct request_queue *q = bdev_get_queue(bdev);
 	int nr_pages;
 
-	nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (nr_pages > q->max_phys_segments)
-		nr_pages = q->max_phys_segments;
-	if (nr_pages > q->max_hw_segments)
-		nr_pages = q->max_hw_segments;
+	nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (nr_pages > queue_max_phys_segments(q))
+		nr_pages = queue_max_phys_segments(q);
+	if (nr_pages > queue_max_hw_segments(q))
+		nr_pages = queue_max_hw_segments(q);
 
 	return nr_pages;
 }
@@ -562,8 +562,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 	 * make this too complex.
 	 */
 
-	while (bio->bi_phys_segments >= q->max_phys_segments
-	       || bio->bi_phys_segments >= q->max_hw_segments) {
+	while (bio->bi_phys_segments >= queue_max_phys_segments(q)
+	       || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
 
 		if (retried_segments)
 			return 0;
@@ -634,7 +634,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
 		    unsigned int len, unsigned int offset)
 {
-	return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors);
+	return __bio_add_page(q, bio, page, len, offset,
+			      queue_max_hw_sectors(q));
 }
 
 /**
@@ -654,7 +655,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, q->max_sectors);
+	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
 }
 
 struct bio_map_data {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d30ec6f..12737be 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -279,7 +279,7 @@ static inline int bio_has_allocated_vec(struct bio *bio)
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
-	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask)
+	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
 #define BIO_SEG_BOUNDARY(q, b1, b2) \
 	BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 872b78b..29b48f7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -898,6 +898,7 @@ extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
@@ -988,6 +989,41 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
+static inline unsigned long queue_bounce_pfn(struct request_queue *q)
+{
+	return q->bounce_pfn;
+}
+
+static inline unsigned long queue_segment_boundary(struct request_queue *q)
+{
+	return q->seg_boundary_mask;
+}
+
+static inline unsigned int queue_max_sectors(struct request_queue *q)
+{
+	return q->max_sectors;
+}
+
+static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
+{
+	return q->max_hw_sectors;
+}
+
+static inline unsigned short queue_max_hw_segments(struct request_queue *q)
+{
+	return q->max_hw_segments;
+}
+
+static inline unsigned short queue_max_phys_segments(struct request_queue *q)
+{
+	return q->max_phys_segments;
+}
+
+static inline unsigned int queue_max_segment_size(struct request_queue *q)
+{
+	return q->max_segment_size;
+}
+
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272..8dcd431 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -192,7 +192,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 		/*
 		 * is destination page below bounce pfn?
 		 */
-		if (page_to_pfn(page) <= q->bounce_pfn)
+		if (page_to_pfn(page) <= queue_bounce_pfn(q))
 			continue;
 
 		/*
@@ -284,7 +284,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (q->bounce_pfn >= blk_max_pfn)
+		if (queue_bounce_pfn(q) >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
-- 
cgit v0.10.2


From 025146e13b63483add912706c101fb0fb6f015cc Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:51 -0400
Subject: block: Move queue limits to an embedded struct

To accommodate stacking drivers that do not have an associated request
queue we're moving the limits to a separate, embedded structure.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 0b32f98..b0f547c 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -179,16 +179,16 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 	 */
 	if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
 		dma = 1;
-	q->bounce_pfn = max_low_pfn;
+	q->limits.bounce_pfn = max_low_pfn;
 #else
 	if (b_pfn < blk_max_low_pfn)
 		dma = 1;
-	q->bounce_pfn = b_pfn;
+	q->limits.bounce_pfn = b_pfn;
 #endif
 	if (dma) {
 		init_emergency_isa_pool();
 		q->bounce_gfp = GFP_NOIO | GFP_DMA;
-		q->bounce_pfn = b_pfn;
+		q->limits.bounce_pfn = b_pfn;
 	}
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
@@ -211,10 +211,10 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
 	}
 
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->max_hw_sectors = q->max_sectors = max_sectors;
+		q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
 	else {
-		q->max_sectors = BLK_DEF_MAX_SECTORS;
-		q->max_hw_sectors = max_sectors;
+		q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
+		q->limits.max_hw_sectors = max_sectors;
 	}
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
@@ -222,9 +222,9 @@ EXPORT_SYMBOL(blk_queue_max_sectors);
 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 {
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->max_hw_sectors = BLK_DEF_MAX_SECTORS;
+		q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
 	else
-		q->max_hw_sectors = max_sectors;
+		q->limits.max_hw_sectors = max_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
@@ -247,7 +247,7 @@ void blk_queue_max_phys_segments(struct request_queue *q,
 		       __func__, max_segments);
 	}
 
-	q->max_phys_segments = max_segments;
+	q->limits.max_phys_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_phys_segments);
 
@@ -271,7 +271,7 @@ void blk_queue_max_hw_segments(struct request_queue *q,
 		       __func__, max_segments);
 	}
 
-	q->max_hw_segments = max_segments;
+	q->limits.max_hw_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_segments);
 
@@ -292,7 +292,7 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 		       __func__, max_size);
 	}
 
-	q->max_segment_size = max_size;
+	q->limits.max_segment_size = max_size;
 }
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 
@@ -308,7 +308,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
  **/
 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
-	q->logical_block_size = size;
+	q->limits.logical_block_size = size;
 }
 EXPORT_SYMBOL(blk_queue_logical_block_size);
 
@@ -325,14 +325,27 @@ EXPORT_SYMBOL(blk_queue_logical_block_size);
 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 {
 	/* zero is "infinity" */
-	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
-	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
-	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
-
-	t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
-	t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
-	t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
-	t->logical_block_size = max(t->logical_block_size, b->logical_block_size);
+	t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
+					     queue_max_sectors(b));
+
+	t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
+						queue_max_hw_sectors(b));
+
+	t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
+						   queue_segment_boundary(b));
+
+	t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
+						   queue_max_phys_segments(b));
+
+	t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
+						 queue_max_hw_segments(b));
+
+	t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
+						  queue_max_segment_size(b));
+
+	t->limits.logical_block_size = max(queue_logical_block_size(t),
+					   queue_logical_block_size(b));
+
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
@@ -430,7 +443,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 		       __func__, mask);
 	}
 
-	q->seg_boundary_mask = mask;
+	q->limits.seg_boundary_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_segment_boundary);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 29b48f7..b7bb6fd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -307,6 +307,21 @@ struct blk_cmd_filter {
 	struct kobject kobj;
 };
 
+struct queue_limits {
+	unsigned long		bounce_pfn;
+	unsigned long		seg_boundary_mask;
+
+	unsigned int		max_hw_sectors;
+	unsigned int		max_sectors;
+	unsigned int		max_segment_size;
+
+	unsigned short		logical_block_size;
+	unsigned short		max_hw_segments;
+	unsigned short		max_phys_segments;
+
+	unsigned char		no_cluster;
+};
+
 struct request_queue
 {
 	/*
@@ -358,7 +373,6 @@ struct request_queue
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
-	unsigned long		bounce_pfn;
 	gfp_t			bounce_gfp;
 
 	/*
@@ -387,14 +401,6 @@ struct request_queue
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 
-	unsigned int		max_sectors;
-	unsigned int		max_hw_sectors;
-	unsigned short		max_phys_segments;
-	unsigned short		max_hw_segments;
-	unsigned short		logical_block_size;
-	unsigned int		max_segment_size;
-
-	unsigned long		seg_boundary_mask;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
 	unsigned int		dma_pad_mask;
@@ -410,6 +416,8 @@ struct request_queue
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 
+	struct queue_limits	limits;
+
 	/*
 	 * sg stuff
 	 */
@@ -991,45 +999,45 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 
 static inline unsigned long queue_bounce_pfn(struct request_queue *q)
 {
-	return q->bounce_pfn;
+	return q->limits.bounce_pfn;
 }
 
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
-	return q->seg_boundary_mask;
+	return q->limits.seg_boundary_mask;
 }
 
 static inline unsigned int queue_max_sectors(struct request_queue *q)
 {
-	return q->max_sectors;
+	return q->limits.max_sectors;
 }
 
 static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
 {
-	return q->max_hw_sectors;
+	return q->limits.max_hw_sectors;
 }
 
 static inline unsigned short queue_max_hw_segments(struct request_queue *q)
 {
-	return q->max_hw_segments;
+	return q->limits.max_hw_segments;
 }
 
 static inline unsigned short queue_max_phys_segments(struct request_queue *q)
 {
-	return q->max_phys_segments;
+	return q->limits.max_phys_segments;
 }
 
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
 {
-	return q->max_segment_size;
+	return q->limits.max_segment_size;
 }
 
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 
-	if (q && q->logical_block_size)
-		retval = q->logical_block_size;
+	if (q && q->limits.logical_block_size)
+		retval = q->limits.logical_block_size;
 
 	return retval;
 }
-- 
cgit v0.10.2


From cd43e26f071524647e660706b784ebcbefbd2e44 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:52 -0400
Subject: block: Expose stacked device queues in sysfs

Currently stacking devices do not have a queue directory in sysfs.
However, many of the I/O characteristics like sector size, maximum
request size, etc. are queue properties.

This patch enables the queue directory for MD/DM devices.  The elevator
code has been modified to deal with queues that do not have an I/O
scheduler.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 142a4ac..3ccdadb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -395,9 +395,6 @@ int blk_register_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return -ENXIO;
 
-	if (!q->request_fn)
-		return 0;
-
 	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
 			  "%s", "queue");
 	if (ret < 0)
@@ -405,6 +402,9 @@ int blk_register_queue(struct gendisk *disk)
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
+	if (!q->request_fn)
+		return 0;
+
 	ret = elv_register_queue(q);
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
diff --git a/block/elevator.c b/block/elevator.c
index ebee948..bd78f69 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -575,6 +575,9 @@ void elv_drain_elevator(struct request_queue *q)
  */
 void elv_quiesce_start(struct request_queue *q)
 {
+	if (!q->elevator)
+		return;
+
 	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
 
 	/*
@@ -1050,6 +1053,9 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 	char elevator_name[ELV_NAME_MAX];
 	struct elevator_type *e;
 
+	if (!q->elevator)
+		return count;
+
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	strstrip(elevator_name);
 
@@ -1073,10 +1079,15 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
 {
 	struct elevator_queue *e = q->elevator;
-	struct elevator_type *elv = e->elevator_type;
+	struct elevator_type *elv;
 	struct elevator_type *__e;
 	int len = 0;
 
+	if (!q->elevator)
+		return sprintf(name, "none\n");
+
+	elv = e->elevator_type;
+
 	spin_lock(&elv_list_lock);
 	list_for_each_entry(__e, &elv_list, list) {
 		if (!strcmp(elv->elevator_name, __e->elevator_name))
-- 
cgit v0.10.2


From c72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:53 -0400
Subject: block: Export I/O topology for block devices and partitions

To support devices with physical block sizes bigger than 512 bytes we
need to ensure proper alignment.  This patch adds support for exposing
I/O topology characteristics as devices are stacked.

  logical_block_size is the smallest unit the device can address.

  physical_block_size indicates the smallest I/O the device can write
  without incurring a read-modify-write penalty.

  The io_min parameter is the smallest preferred I/O size reported by
  the device.  In many cases this is the same as the physical block
  size.  However, the io_min parameter can be scaled up when stacking
  (RAID5 chunk size > physical block size).

  The io_opt characteristic indicates the optimal I/O size reported by
  the device.  This is usually the stripe width for arrays.

  The alignment_offset parameter indicates the number of bytes the start
  of the device/partition is offset from the device's natural alignment.
  Partition tools and MD/DM utilities can use this to pad their offsets
  so filesystems start on proper boundaries.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 44f52a4..cbbd3e0 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -60,3 +60,62 @@ Description:
 		Indicates whether the block layer should automatically
 		generate checksums for write requests bound for
 		devices that support receiving integrity metadata.
+
+What:		/sys/block/<disk>/alignment_offset
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a physical block size that is
+		bigger than the logical block size (for instance a drive
+		with 4KB physical sectors exposing 512-byte logical
+		blocks to the operating system).  This parameter
+		indicates how many bytes the beginning of the device is
+		offset from the disk's natural alignment.
+
+What:		/sys/block/<disk>/<partition>/alignment_offset
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a physical block size that is
+		bigger than the logical block size (for instance a drive
+		with 4KB physical sectors exposing 512-byte logical
+		blocks to the operating system).  This parameter
+		indicates how many bytes the beginning of the partition
+		is offset from the disk's natural alignment.
+
+What:		/sys/block/<disk>/queue/logical_block_size
+Date:		May 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		This is the smallest unit the storage device can
+		address.  It is typically 512 bytes.
+
+What:		/sys/block/<disk>/queue/physical_block_size
+Date:		May 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		This is the smallest unit the storage device can write
+		without resorting to read-modify-write operation.  It is
+		usually the same as the logical block size but may be
+		bigger.  One example is SATA drives with 4KB sectors
+		that expose a 512-byte logical block size to the
+		operating system.
+
+What:		/sys/block/<disk>/queue/minimum_io_size
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a preferred minimum I/O size,
+		which is the smallest request the device can perform
+		without incurring a read-modify-write penalty.  For disk
+		drives this is often the physical block size.  For RAID
+		arrays it is often the stripe chunk size.
+
+What:		/sys/block/<disk>/queue/optimal_io_size
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report an optimal I/O size, which is
+		the device's preferred unit of receiving I/O.  This is
+		rarely reported for disk drives.  For RAID devices it is
+		usually the stripe width or the internal block size.
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b0f547c..5649f34 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -309,9 +309,94 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
 	q->limits.logical_block_size = size;
+
+	if (q->limits.physical_block_size < size)
+		q->limits.physical_block_size = size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
 }
 EXPORT_SYMBOL(blk_queue_logical_block_size);
 
+/**
+ * blk_queue_physical_block_size - set physical block size for the queue
+ * @q:  the request queue for the device
+ * @size:  the physical block size, in bytes
+ *
+ * Description:
+ *   This should be set to the lowest possible sector size that the
+ *   hardware can operate on without reverting to read-modify-write
+ *   operations.
+ */
+void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
+{
+	q->limits.physical_block_size = size;
+
+	if (q->limits.physical_block_size < q->limits.logical_block_size)
+		q->limits.physical_block_size = q->limits.logical_block_size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
+}
+EXPORT_SYMBOL(blk_queue_physical_block_size);
+
+/**
+ * blk_queue_alignment_offset - set physical block alignment offset
+ * @q:	the request queue for the device
+ * @alignment:	alignment offset in bytes
+ *
+ * Description:
+ *   Some devices are naturally misaligned to compensate for things like
+ *   the legacy DOS partition table 63-sector offset.  Low-level drivers
+ *   should call this function for devices whose first sector is not
+ *   naturally aligned.
+ */
+void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
+{
+	q->limits.alignment_offset =
+		offset & (q->limits.physical_block_size - 1);
+	q->limits.misaligned = 0;
+}
+EXPORT_SYMBOL(blk_queue_alignment_offset);
+
+/**
+ * blk_queue_io_min - set minimum request size for the queue
+ * @q:	the request queue for the device
+ * @io_min:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Some devices have an internal block size bigger than the reported
+ *   hardware sector size.  This function can be used to signal the
+ *   smallest I/O the device can perform without incurring a performance
+ *   penalty.
+ */
+void blk_queue_io_min(struct request_queue *q, unsigned int min)
+{
+	q->limits.io_min = min;
+
+	if (q->limits.io_min < q->limits.logical_block_size)
+		q->limits.io_min = q->limits.logical_block_size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
+}
+EXPORT_SYMBOL(blk_queue_io_min);
+
+/**
+ * blk_queue_io_opt - set optimal request size for the queue
+ * @q:	the request queue for the device
+ * @io_opt:  optimal request size in bytes
+ *
+ * Description:
+ *   Drivers can call this function to set the preferred I/O request
+ *   size for devices that report such a value.
+ */
+void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
+{
+	q->limits.io_opt = opt;
+}
+EXPORT_SYMBOL(blk_queue_io_opt);
+
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
  */
@@ -358,6 +443,107 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
 /**
+ * blk_stack_limits - adjust queue_limits for stacked devices
+ * @t:	the stacking driver limits (top)
+ * @bdev:  the underlying queue limits (bottom)
+ * @offset:  offset to beginning of data within component device
+ *
+ * Description:
+ *    Merges two queue_limit structs.  Returns 0 if alignment didn't
+ *    change.  Returns -1 if adding the bottom device caused
+ *    misalignment.
+ */
+int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+		     sector_t offset)
+{
+	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
+	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+
+	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
+					    b->seg_boundary_mask);
+
+	t->max_phys_segments = min_not_zero(t->max_phys_segments,
+					    b->max_phys_segments);
+
+	t->max_hw_segments = min_not_zero(t->max_hw_segments,
+					  b->max_hw_segments);
+
+	t->max_segment_size = min_not_zero(t->max_segment_size,
+					   b->max_segment_size);
+
+	t->logical_block_size = max(t->logical_block_size,
+				    b->logical_block_size);
+
+	t->physical_block_size = max(t->physical_block_size,
+				     b->physical_block_size);
+
+	t->io_min = max(t->io_min, b->io_min);
+	t->no_cluster |= b->no_cluster;
+
+	/* Bottom device offset aligned? */
+	if (offset &&
+	    (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+		t->misaligned = 1;
+		return -1;
+	}
+
+	/* If top has no alignment offset, inherit from bottom */
+	if (!t->alignment_offset)
+		t->alignment_offset =
+			b->alignment_offset & (b->physical_block_size - 1);
+
+	/* Top device aligned on logical block boundary? */
+	if (t->alignment_offset & (t->logical_block_size - 1)) {
+		t->misaligned = 1;
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * disk_stack_limits - adjust queue limits for stacked drivers
+ * @t:	MD/DM gendisk (top)
+ * @bdev:  the underlying block device (bottom)
+ * @offset:  offset to beginning of data within component device
+ *
+ * Description:
+ *    Merges the limits for two queues.  Returns 0 if alignment
+ *    didn't change.  Returns -1 if adding the bottom device caused
+ *    misalignment.
+ */
+void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
+		       sector_t offset)
+{
+	struct request_queue *t = disk->queue;
+	struct request_queue *b = bdev_get_queue(bdev);
+
+	offset += get_start_sect(bdev) << 9;
+
+	if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
+		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
+
+		disk_name(disk, 0, top);
+		bdevname(bdev, bottom);
+
+		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
+		       top, bottom);
+	}
+
+	if (!t->queue_lock)
+		WARN_ON_ONCE(1);
+	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(t->queue_lock, flags);
+		if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+			queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
+		spin_unlock_irqrestore(t->queue_lock, flags);
+	}
+}
+EXPORT_SYMBOL(disk_stack_limits);
+
+/**
  * blk_queue_dma_pad - set pad mask
  * @q:     the request queue for the device
  * @mask:  pad mask
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ccdadb..9337e17 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -105,6 +105,21 @@ static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page
 	return queue_var_show(queue_logical_block_size(q), page);
 }
 
+static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_physical_block_size(q), page);
+}
+
+static ssize_t queue_io_min_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_io_min(q), page);
+}
+
+static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_io_opt(q), page);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -257,6 +272,21 @@ static struct queue_sysfs_entry queue_logical_block_size_entry = {
 	.show = queue_logical_block_size_show,
 };
 
+static struct queue_sysfs_entry queue_physical_block_size_entry = {
+	.attr = {.name = "physical_block_size", .mode = S_IRUGO },
+	.show = queue_physical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_io_min_entry = {
+	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
+	.show = queue_io_min_show,
+};
+
+static struct queue_sysfs_entry queue_io_opt_entry = {
+	.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
+	.show = queue_io_opt_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nonrot_show,
@@ -289,6 +319,9 @@ static struct attribute *default_attrs[] = {
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_logical_block_size_entry.attr,
+	&queue_physical_block_size_entry.attr,
+	&queue_io_min_entry.attr,
+	&queue_io_opt_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
diff --git a/block/genhd.c b/block/genhd.c
index 1a4916e..fe7ccc0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -852,11 +852,21 @@ static ssize_t disk_capability_show(struct device *dev,
 	return sprintf(buf, "%x\n", disk->flags);
 }
 
+static ssize_t disk_alignment_offset_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -875,6 +885,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_removable.attr,
 	&dev_attr_ro.attr,
 	&dev_attr_size.attr,
+	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 99e33ef..0af3608 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -219,6 +219,13 @@ ssize_t part_size_show(struct device *dev,
 	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
 
+ssize_t part_alignment_offset_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+	return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
+}
+
 ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
@@ -272,6 +279,7 @@ ssize_t part_fail_store(struct device *dev,
 static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
@@ -282,6 +290,7 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_partition.attr,
 	&dev_attr_start.attr,
 	&dev_attr_size.attr,
+	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
@@ -383,6 +392,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	pdev = part_to_dev(p);
 
 	p->start_sect = start;
+	p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b7bb6fd..5e740a1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -314,11 +314,16 @@ struct queue_limits {
 	unsigned int		max_hw_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
+	unsigned int		physical_block_size;
+	unsigned int		alignment_offset;
+	unsigned int		io_min;
+	unsigned int		io_opt;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
 	unsigned short		max_phys_segments;
 
+	unsigned char		misaligned;
 	unsigned char		no_cluster;
 };
 
@@ -911,6 +916,15 @@ extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_alignment_offset(struct request_queue *q,
+				       unsigned int alignment);
+extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
+extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+			    sector_t offset);
+extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
+			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
@@ -1047,6 +1061,39 @@ static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 
+static inline unsigned int queue_physical_block_size(struct request_queue *q)
+{
+	return q->limits.physical_block_size;
+}
+
+static inline unsigned int queue_io_min(struct request_queue *q)
+{
+	return q->limits.io_min;
+}
+
+static inline unsigned int queue_io_opt(struct request_queue *q)
+{
+	return q->limits.io_opt;
+}
+
+static inline int queue_alignment_offset(struct request_queue *q)
+{
+	if (q && q->limits.misaligned)
+		return -1;
+
+	if (q && q->limits.alignment_offset)
+		return q->limits.alignment_offset;
+
+	return 0;
+}
+
+static inline int queue_sector_alignment_offset(struct request_queue *q,
+						sector_t sector)
+{
+	return ((sector << 9) - q->limits.alignment_offset)
+		& (q->limits.io_min - 1);
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a1a28ca..149fda2 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -90,6 +90,7 @@ struct disk_stats {
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
+	sector_t alignment_offset;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
-- 
cgit v0.10.2


From 95caa0a9bdaf93607bd0cc8932f53112496f2f22 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Fri, 22 May 2009 21:30:39 -0300
Subject: icom: fix rmmod crash

Actually the icom driver is crashing when is being removed because
the driver is kfreeing the adapter structure before calling
pci_release_regions(), which result in the following error:

  Unable to handle kernel paging request for data at address 0x6b6b6b6b6b6b6d33
  Faulting instruction address: 0xc000000000246b80
  Oops: Kernel access of bad area, sig: 11 [#1]
  ....
  [c000000012d436a0] [c0000000001002d0] .kfree+0x120/0x34c (unreliable)
  [c000000012d43730] [c000000000246d60] .pci_release_selected_regions+0x3c/0x68
  [c000000012d437c0] [d000000002d54700] .icom_kref_release+0xf4/0x118 [icom]
  [c000000012d43850] [c000000000232e50] .kref_put+0x74/0x94
  [c000000012d438d0] [d000000002d56c58] .icom_remove+0x40/0xa4 [icom]
  [c000000012d43960] [c000000000249e48] .pci_device_remove+0x50/0x90
  [c000000012d439e0] [c0000000002d68d8] .__device_release_driver+0x94/0xd4
  [c000000012d43a70] [c0000000002d7104] .driver_detach+0xf8/0x12c
  [c000000012d43b00] [c0000000002d549c] .bus_remove_driver+0xbc/0x11c
  [c000000012d43b90] [c0000000002d71dc] .driver_unregister+0x60/0x80
  [c000000012d43c20] [c00000000024a07c] .pci_unregister_driver+0x44/0xe8
  [c000000012d43cb0] [d000000002d56bf4] .icom_exit+0x1c/0x40 [icom]
  [c000000012d43d30] [c000000000095fa8] .SyS_delete_module+0x214/0x2a8
  [c000000012d43e30] [c00000000000852c] syscall_exit+0x0/0x40

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Cc: stable@kernel.org
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index 6579e2b..a461b3b 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -1472,8 +1472,8 @@ static void icom_remove_adapter(struct icom_adapter *icom_adapter)
 
 	free_irq(icom_adapter->pci_dev->irq, (void *) icom_adapter);
 	iounmap(icom_adapter->base_addr);
-	icom_free_adapter(icom_adapter);
 	pci_release_regions(icom_adapter->pci_dev);
+	icom_free_adapter(icom_adapter);
 }
 
 static void icom_kref_release(struct kref *kref)
-- 
cgit v0.10.2


From 3b77f777b8f1c001b63e317c4ce317292ff0ff94 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Sat, 23 May 2009 08:23:16 +0200
Subject: ide-disk: fix missing max_sectors accessor function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The recent move to accessor functions for querying queue limits
missed an entry in ide-disk.c:

drivers/ide/ide-disk.c: In function ‘ide_disk_setup’:
drivers/ide/ide-disk.c:642: error: ‘struct request_queue’ has no member named ‘max_sectors’

Fix it.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index ad18e14..c6f7fcf 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -639,7 +639,7 @@ static void ide_disk_setup(ide_drive_t *drive)
 	}
 
 	printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name,
-		q->max_sectors / 2);
+	       queue_max_sectors(q) / 2);
 
 	if (ata_id_is_ssd(id))
 		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-- 
cgit v0.10.2


From 0154724d487586241c1ad57cfd348ed2ff2274e2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 23 May 2009 00:01:05 +0100
Subject: ASoC: Fix WM9081 PowerPC compiler issues

Ensure that we always set a new sysclk when using the FLL in master mode
and pick out the correct value for the sample rate in hw_params().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index 83e3148..86fc57e 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -702,9 +702,10 @@ static int configure_clock(struct snd_soc_codec *codec)
 			 * performance. */
 			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
 				target = wm9081->fs * clk_sys_rates[i].ratio;
+				new_sysclk = target;
 				if (target >= wm9081->bclk &&
 				    target > 3000000)
-					new_sysclk = target;
+					break;
 			}
 		} else if (wm9081->fs) {
 			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
@@ -1102,7 +1103,8 @@ static int wm9081_hw_params(struct snd_pcm_substream *substream,
 	}
 	dev_dbg(codec->dev, "Selected SAMPLE_RATE of %dHz\n",
 		sample_rates[best].rate);
-	clk_ctrl2 |= (sample_rates[i].sample_rate << WM9081_SAMPLE_RATE_SHIFT);
+	clk_ctrl2 |= (sample_rates[best].sample_rate
+			<< WM9081_SAMPLE_RATE_SHIFT);
 
 	/* BCLK_DIV */
 	best = 0;
-- 
cgit v0.10.2


From 14f0aa359365e8a93a77b71e3b840274b9b4dcb1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 23 May 2009 11:36:20 +0100
Subject: [ARM] disable NX support for OABI-supporting kernels

Our signal syscall restart handling for these kernels still uses
the userspace stack to build code for restarting the syscall.
Unfortunately, fixing this is non-trivial, and so for the time
being, we resolve the problem by disabling NX support.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c
index d4a0da1..950391f 100644
--- a/arch/arm/kernel/elf.c
+++ b/arch/arm/kernel/elf.c
@@ -78,6 +78,15 @@ int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack)
 		return 1;
 	if (cpu_architecture() < CPU_ARCH_ARMv6)
 		return 1;
+#if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
+	/*
+	 * If we have support for OABI programs, we can never allow NX
+	 * support - our signal syscall restart mechanism relies upon
+	 * being able to execute code placed on the user stack.
+	 */
+	return 1;
+#else
 	return 0;
+#endif
 }
 EXPORT_SYMBOL(arm_elf_read_implies_exec);
-- 
cgit v0.10.2


From 948cd52906baf1f92aeea2f9b5c515db1b2e592a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 22 May 2009 10:40:09 +0900
Subject: sparseirq: Allow early irq_desc allocation

Presently non-legacy IRQs have their irq_desc allocated with
kzalloc_node(). This assumes that all callers of irq_to_desc_node_alloc()
will be sufficiently late in the boot process that kmalloc is available.

While porting sparseirq support to sh this blew up immediately, as at the
time that we register the CPU's interrupt vector map only bootmem is
available. Check slab_is_available() to work out which path to use.

[ Impact: fix SH early boot crash with sparseirq enabled ]

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
LKML-Reference: <20090522014008.GA2806@linux-sh.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a3c671e..18041a2 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/irq.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/interrupt.h>
@@ -81,11 +82,16 @@ static struct irq_desc irq_desc_init = {
 	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
-void init_kstat_irqs(struct irq_desc *desc, int node, int nr)
+void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
 {
 	void *ptr;
 
-	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
+	if (slab_is_available())
+		ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+				   GFP_ATOMIC, node);
+	else
+		ptr = alloc_bootmem_node(NODE_DATA(node),
+				nr * sizeof(*desc->kstat_irqs));
 
 	/*
 	 * don't overwite if can not get new one
@@ -186,7 +192,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
+struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
@@ -208,7 +214,11 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 	if (desc)
 		goto out_unlock;
 
-	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	if (slab_is_available())
+		desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	else
+		desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+
 	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {
 		printk(KERN_ERR "can not alloc irq_desc\n");
-- 
cgit v0.10.2


From df391e0eda1e678add56a8e34226edf05d89af6a Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 23 May 2009 09:51:20 -0700
Subject: Input: multitouch - add tracking ID to the protocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are a few multi-touch devices that support finger tracking
well in hardware, Stantum being the prime example. By exposing the
tracking ID in the MT protocol, evdev bandwidth and cpu usage in
user space can be reduced.

This patch adds the ABS_MT_TRACKING_ID to the MT protocol.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Tested-by: Stéphane Chatty <chatty@enac.fr>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/drivers/input/input.c b/drivers/input/input.c
index e54e002..5d445f4 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -42,6 +42,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = {
 	ABS_MT_POSITION_Y,
 	ABS_MT_TOOL_TYPE,
 	ABS_MT_BLOB_ID,
+	ABS_MT_TRACKING_ID,
 	0
 };
 static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
diff --git a/include/linux/input.h b/include/linux/input.h
index 0e6ff5d..6fed4f6 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -656,6 +656,7 @@ struct input_absinfo {
 #define ABS_MT_POSITION_Y	0x36	/* Center Y ellipse position */
 #define ABS_MT_TOOL_TYPE	0x37	/* Type of touching device */
 #define ABS_MT_BLOB_ID		0x38	/* Group a set of packets as a blob */
+#define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
 
 #define ABS_MAX			0x3f
 #define ABS_CNT			(ABS_MAX+1)
-- 
cgit v0.10.2


From f9fcfc3b4627a1ec9b50411060f1b384926d6610 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 23 May 2009 09:51:21 -0700
Subject: Input: multitouch - augment event semantics documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Through the collaboration to adapt the N-trig and Stantum HID
drivers to the MT protocol, some semantic clarifications to the
protocol have been made. This patch adds them to the MT documentation.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Tested-by: Stéphane Chatty <chatty@enac.fr>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
index 9f09557..a12ea3b 100644
--- a/Documentation/input/multi-touch-protocol.txt
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -18,8 +18,12 @@ Usage
 Anonymous finger details are sent sequentially as separate packets of ABS
 events. Only the ABS_MT events are recognized as part of a finger
 packet. The end of a packet is marked by calling the input_mt_sync()
-function, which generates a SYN_MT_REPORT event. The end of multi-touch
-transfer is marked by calling the usual input_sync() function.
+function, which generates a SYN_MT_REPORT event. This instructs the
+receiver to accept the data for the current finger and prepare to receive
+another. The end of a multi-touch transfer is marked by calling the usual
+input_sync() function. This instructs the receiver to act upon events
+accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new
+set of events/packets.
 
 A set of ABS_MT events with the desired properties is defined. The events
 are divided into categories, to allow for partial implementation.  The
@@ -27,11 +31,26 @@ minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and
 ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked.  If the
 device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size
 of the approaching finger. Anisotropy and direction may be specified with
-ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. Devices with
-more granular information may specify general shapes as blobs, i.e., as a
-sequence of rectangular shapes grouped together by an
-ABS_MT_BLOB_ID. Finally, the ABS_MT_TOOL_TYPE may be used to specify
-whether the touching tool is a finger or a pen or something else.
+ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION.  The
+ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a
+finger or a pen or something else.  Devices with more granular information
+may specify general shapes as blobs, i.e., as a sequence of rectangular
+shapes grouped together by an ABS_MT_BLOB_ID. Finally, for the few devices
+that currently support it, the ABS_MT_TRACKING_ID event may be used to
+report finger tracking from hardware [5].
+
+Here is what a minimal event sequence for a two-finger touch would look
+like:
+
+   ABS_MT_TOUCH_MAJOR
+   ABS_MT_POSITION_X
+   ABS_MT_POSITION_Y
+   SYN_MT_REPORT
+   ABS_MT_TOUCH_MAJOR
+   ABS_MT_POSITION_X
+   ABS_MT_POSITION_Y
+   SYN_MT_REPORT
+   SYN_REPORT
 
 
 Event Semantics
@@ -44,24 +63,24 @@ ABS_MT_TOUCH_MAJOR
 
 The length of the major axis of the contact. The length should be given in
 surface units. If the surface has an X times Y resolution, the largest
-possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal.
+possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4].
 
 ABS_MT_TOUCH_MINOR
 
 The length, in surface units, of the minor axis of the contact. If the
-contact is circular, this event can be omitted.
+contact is circular, this event can be omitted [4].
 
 ABS_MT_WIDTH_MAJOR
 
 The length, in surface units, of the major axis of the approaching
 tool. This should be understood as the size of the tool itself. The
 orientation of the contact and the approaching tool are assumed to be the
-same.
+same [4].
 
 ABS_MT_WIDTH_MINOR
 
 The length, in surface units, of the minor axis of the approaching
-tool. Omit if circular.
+tool. Omit if circular [4].
 
 The above four values can be used to derive additional information about
 the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates
@@ -70,14 +89,17 @@ different characteristic widths [1].
 
 ABS_MT_ORIENTATION
 
-The orientation of the ellipse. The value should describe half a revolution
-clockwise around the touch center. The scale of the value is arbitrary, but
-zero should be returned for an ellipse aligned along the Y axis of the
-surface. As an example, an index finger placed straight onto the axis could
-return zero orientation, something negative when twisted to the left, and
-something positive when twisted to the right. This value can be omitted if
-the touching object is circular, or if the information is not available in
-the kernel driver.
+The orientation of the ellipse. The value should describe a signed quarter
+of a revolution clockwise around the touch center. The signed value range
+is arbitrary, but zero should be returned for a finger aligned along the Y
+axis of the surface, a negative value when finger is turned to the left, and
+a positive value when finger turned to the right. When completely aligned with
+the X axis, the range max should be returned.  Orientation can be omitted
+if the touching object is circular, or if the information is not available
+in the kernel driver. Partial orientation support is possible if the device
+can distinguish between the two axis, but not (uniquely) any values in
+between. In such cases, the range of ABS_MT_ORIENTATION should be [0, 1]
+[4].
 
 ABS_MT_POSITION_X
 
@@ -98,8 +120,35 @@ ABS_MT_BLOB_ID
 
 The BLOB_ID groups several packets together into one arbitrarily shaped
 contact. This is a low-level anonymous grouping, and should not be confused
-with the high-level contactID, explained below. Most kernel drivers will
-not have this capability, and can safely omit the event.
+with the high-level trackingID [5]. Most kernel drivers will not have blob
+capability, and can safely omit the event.
+
+ABS_MT_TRACKING_ID
+
+The TRACKING_ID identifies an initiated contact throughout its life cycle
+[5]. There are currently only a few devices that support it, so this event
+should normally be omitted.
+
+
+Event Computation
+-----------------
+
+The flora of different hardware unavoidably leads to some devices fitting
+better to the MT protocol than others. To simplify and unify the mapping,
+this section gives recipes for how to compute certain events.
+
+For devices reporting contacts as rectangular shapes, signed orientation
+cannot be obtained. Assuming X and Y are the lengths of the sides of the
+touching rectangle, here is a simple formula that retains the most
+information possible:
+
+   ABS_MT_TOUCH_MAJOR := max(X, Y)
+   ABS_MT_TOUCH_MINOR := min(X, Y)
+   ABS_MT_ORIENTATION := bool(X > Y)
+
+The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that
+the device can distinguish between a finger along the Y axis (0) and a
+finger along the X axis (1).
 
 
 Finger Tracking
@@ -109,14 +158,18 @@ The kernel driver should generate an arbitrary enumeration of the set of
 anonymous contacts currently on the surface. The order in which the packets
 appear in the event stream is not important.
 
-The process of finger tracking, i.e., to assign a unique contactID to each
+The process of finger tracking, i.e., to assign a unique trackingID to each
 initiated contact on the surface, is left to user space; preferably the
-multi-touch X driver [3]. In that driver, the contactID stays the same and
+multi-touch X driver [3]. In that driver, the trackingID stays the same and
 unique until the contact vanishes (when the finger leaves the surface). The
 problem of assigning a set of anonymous fingers to a set of identified
 fingers is a euclidian bipartite matching problem at each event update, and
 relies on a sufficiently rapid update rate.
 
+There are a few devices that support trackingID in hardware. User space can
+make use of these native identifiers to reduce bandwidth and cpu usage.
+
+
 Notes
 -----
 
@@ -136,5 +189,7 @@ could be used to derive tilt.
 time of writing (April 2009), the MT protocol is not yet merged, and the
 prototype implements finger matching, basic mouse support and two-finger
 scrolling. The project aims at improving the quality of current multi-touch
-functionality available in the synaptics X driver, and in addition
+functionality available in the Synaptics X driver, and in addition
 implement more advanced gestures.
+[4] See the section on event computation.
+[5] See the section on finger tracking.
-- 
cgit v0.10.2


From e220d2dcb944c5c488b6855d15ec66d76900514f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:55 +0200
Subject: perf_counter: Fix dynamic irq_period logging

We call perf_adjust_freq() from perf_counter_task_tick() which
is is called under the rq->lock causing lock recursion.
However, it's no longer required to be called under the
rq->lock, so remove it from under it.

Also, fix up some related comments.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.476197912@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2eedae8..23ddd29 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -260,6 +260,7 @@ enum perf_event_type {
 	/*
 	 * struct {
 	 * 	struct perf_event_header	header;
+	 * 	u64				time;
 	 * 	u64				irq_period;
 	 * };
 	 */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c100554..2f410ea 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2559,7 +2559,8 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 }
 
 /*
- *
+ * Log irq_period changes so that analyzing tools can re-normalize the
+ * event flow.
  */
 
 static void perf_log_period(struct perf_counter *counter, u64 period)
diff --git a/kernel/sched.c b/kernel/sched.c
index 4c0d58b..ad079f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4875,9 +4875,10 @@ void scheduler_tick(void)
 	update_rq_clock(rq);
 	update_cpu_load(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
-	perf_counter_task_tick(curr, cpu);
 	spin_unlock(&rq->lock);
 
+	perf_counter_task_tick(curr, cpu);
+
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
-- 
cgit v0.10.2


From fccc714b3148ab9741fafc1e90c3876d50df6093 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:56 +0200
Subject: perf_counter: Sanitize counter->mutex

s/counter->mutex/counter->child_mutex/ and make sure its only
used to protect child_list.

The usage in __perf_counter_exit_task() doesn't appear to be
problematic since ctx->mutex also covers anything related to fd
tear-down.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.533186528@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 23ddd29..4ab8050 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -452,9 +452,6 @@ struct perf_counter {
 	struct perf_counter_context	*ctx;
 	struct file			*filp;
 
-	struct perf_counter		*parent;
-	struct list_head		child_list;
-
 	/*
 	 * These accumulate total time (in nanoseconds) that children
 	 * counters have been enabled and running, respectively.
@@ -465,7 +462,9 @@ struct perf_counter {
 	/*
 	 * Protect attach/detach and child_list:
 	 */
-	struct mutex			mutex;
+	struct mutex			child_mutex;
+	struct list_head		child_list;
+	struct perf_counter		*parent;
 
 	int				oncpu;
 	int				cpu;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2f410ea..679c3b5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -111,6 +111,10 @@ static void put_ctx(struct perf_counter_context *ctx)
 	}
 }
 
+/*
+ * Add a counter from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -136,7 +140,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 /*
  * Remove a counter from the lists for its context.
- * Must be called with counter->mutex and ctx->mutex held.
+ * Must be called with ctx->mutex and ctx->lock held.
  */
 static void
 list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
@@ -276,7 +280,7 @@ static void __perf_counter_remove_from_context(void *info)
 /*
  * Remove the counter from a task's (or a CPU's) list of counters.
  *
- * Must be called with counter->mutex and ctx->mutex held.
+ * Must be called with ctx->mutex held.
  *
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
@@ -1407,11 +1411,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	file->private_data = NULL;
 
 	mutex_lock(&ctx->mutex);
-	mutex_lock(&counter->mutex);
-
 	perf_counter_remove_from_context(counter);
-
-	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
 	free_counter(counter);
@@ -1437,7 +1437,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->state == PERF_COUNTER_STATE_ERROR)
 		return 0;
 
-	mutex_lock(&counter->mutex);
+	mutex_lock(&counter->child_mutex);
 	values[0] = perf_counter_read(counter);
 	n = 1;
 	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -1446,7 +1446,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
-	mutex_unlock(&counter->mutex);
+	mutex_unlock(&counter->child_mutex);
 
 	if (count < n * sizeof(u64))
 		return -EINVAL;
@@ -1510,11 +1510,11 @@ static void perf_counter_for_each_child(struct perf_counter *counter,
 {
 	struct perf_counter *child;
 
-	mutex_lock(&counter->mutex);
+	mutex_lock(&counter->child_mutex);
 	func(counter);
 	list_for_each_entry(child, &counter->child_list, child_list)
 		func(child);
-	mutex_unlock(&counter->mutex);
+	mutex_unlock(&counter->child_mutex);
 }
 
 static void perf_counter_for_each(struct perf_counter *counter,
@@ -1522,11 +1522,11 @@ static void perf_counter_for_each(struct perf_counter *counter,
 {
 	struct perf_counter *child;
 
-	mutex_lock(&counter->mutex);
+	mutex_lock(&counter->child_mutex);
 	perf_counter_for_each_sibling(counter, func);
 	list_for_each_entry(child, &counter->child_list, child_list)
 		perf_counter_for_each_sibling(child, func);
-	mutex_unlock(&counter->mutex);
+	mutex_unlock(&counter->child_mutex);
 }
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -3106,7 +3106,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	if (!group_leader)
 		group_leader = counter;
 
-	mutex_init(&counter->mutex);
+	mutex_init(&counter->child_mutex);
+	INIT_LIST_HEAD(&counter->child_list);
+
 	INIT_LIST_HEAD(&counter->list_entry);
 	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
@@ -3114,8 +3116,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	mutex_init(&counter->mmap_mutex);
 
-	INIT_LIST_HEAD(&counter->child_list);
-
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->group_leader		= group_leader;
@@ -3346,10 +3346,9 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Link this into the parent counter's child list
 	 */
-	mutex_lock(&parent_counter->mutex);
+	mutex_lock(&parent_counter->child_mutex);
 	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
-
-	mutex_unlock(&parent_counter->mutex);
+	mutex_unlock(&parent_counter->child_mutex);
 
 	return child_counter;
 }
@@ -3396,9 +3395,9 @@ static void sync_child_counter(struct perf_counter *child_counter,
 	/*
 	 * Remove this counter from the parent's list
 	 */
-	mutex_lock(&parent_counter->mutex);
+	mutex_lock(&parent_counter->child_mutex);
 	list_del_init(&child_counter->child_list);
-	mutex_unlock(&parent_counter->mutex);
+	mutex_unlock(&parent_counter->child_mutex);
 
 	/*
 	 * Release the parent counter, if this was the last
@@ -3414,17 +3413,9 @@ __perf_counter_exit_task(struct task_struct *child,
 {
 	struct perf_counter *parent_counter;
 
-	/*
-	 * Protect against concurrent operations on child_counter
-	 * due its fd getting closed, etc.
-	 */
-	mutex_lock(&child_counter->mutex);
-
 	update_counter_times(child_counter);
 	list_del_counter(child_counter, child_ctx);
 
-	mutex_unlock(&child_counter->mutex);
-
 	parent_counter = child_counter->parent;
 	/*
 	 * It can happen that parent exits first, and has counters
-- 
cgit v0.10.2


From 682076ae1de0aba9c2da509f7b19dc03e30a6e1f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:57 +0200
Subject: perf_counter: Sanitize context locking

Ensure we're consistent with the context locks.

 context->mutex
   context->lock
     list_{add,del}_counter();

so that either lock is sufficient to stabilize the context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.618790733@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 679c3b5..d162d2f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -597,6 +597,8 @@ static void add_counter_to_ctx(struct perf_counter *counter,
 
 /*
  * Cross CPU call to install and enable a performance counter
+ *
+ * Must be called with ctx->mutex held
  */
 static void __perf_install_in_context(void *info)
 {
@@ -1496,13 +1498,13 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter,
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *sibling;
 
-	spin_lock_irq(&ctx->lock);
+	mutex_lock(&ctx->mutex);
 	counter = counter->group_leader;
 
 	func(counter);
 	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
 		func(sibling);
-	spin_unlock_irq(&ctx->lock);
+	mutex_unlock(&ctx->mutex);
 }
 
 static void perf_counter_for_each_child(struct perf_counter *counter,
@@ -3414,7 +3416,10 @@ __perf_counter_exit_task(struct task_struct *child,
 	struct perf_counter *parent_counter;
 
 	update_counter_times(child_counter);
+
+	spin_lock_irq(&child_ctx->lock);
 	list_del_counter(child_counter, child_ctx);
+	spin_unlock_irq(&child_ctx->lock);
 
 	parent_counter = child_counter->parent;
 	/*
-- 
cgit v0.10.2


From 1a482f38c5aafeb3576079a38a5b21b46619f3d2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:58 +0200
Subject: perf_counter: Fix userspace build

recent userspace (F11) seems to already include the
linux/unistd.h bits which means we cannot include the version
in the kernel sources due to the header guards being the same.

Ensure we include the kernel version first.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.739756497@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index efb8759..1b19f18 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -1,5 +1,6 @@
 
 
+#include "perf.h"
 #include "util/util.h"
 
 #include <sys/types.h>
@@ -30,9 +31,7 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-#include "../../include/linux/perf_counter.h"
 
-#include "perf.h"
 
 #define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
 #define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 03518d7..8ae01d5 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -61,6 +61,7 @@
   * Released under the GPL v2. (and only v2, not any later version)
   */
 
+#include "perf.h"
 #include "util/util.h"
 
 #include <getopt.h>
@@ -83,10 +84,6 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-#include "../../include/linux/perf_counter.h"
-
-#include "perf.h"
-
 #define EVENT_MASK_KERNEL		1
 #define EVENT_MASK_USER			2
 
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 814b2e4..a3216a6 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -42,6 +42,7 @@
   * Released under the GPL v2. (and only v2, not any later version)
   */
 
+#include "perf.h"
 #include "util/util.h"
 
 #include <getopt.h>
@@ -64,10 +65,6 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-#include "../../include/linux/perf_counter.h"
-
-#include "perf.h"
-
 static int			system_wide			=  0;
 
 static int			nr_counters			=  0;
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index 81a7374..a517683 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -1,6 +1,25 @@
 #ifndef _PERF_PERF_H
 #define _PERF_PERF_H
 
+#if defined(__x86_64__) || defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#include "../../arch/powerpc/include/asm/unistd.h"
+#define rmb()		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#include <time.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+
+#include "../../include/linux/perf_counter.h"
+
 /*
  * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
  * counters in the current task.
@@ -26,18 +45,6 @@ static inline unsigned long long rdclock(void)
 #define __user
 #define asmlinkage
 
-#if defined(__x86_64__) || defined(__i386__)
-#include "../../arch/x86/include/asm/unistd.h"
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#include "../../arch/powerpc/include/asm/unistd.h"
-#define rmb()		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
 #define unlikely(x)	__builtin_expect(!!(x), 0)
 #define min(x, y) ({				\
 	typeof(x) _min1 = (x);			\
-- 
cgit v0.10.2


From aa9c67f53d1969cf1db4c9c2db3a78c4ceb96469 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:59 +0200
Subject: perf_counter: Simplify context cleanup

Use perf_counter_remove_from_context() to remove counters from
the context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.796275849@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d162d2f..0e97f89 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3416,10 +3416,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	struct perf_counter *parent_counter;
 
 	update_counter_times(child_counter);
-
-	spin_lock_irq(&child_ctx->lock);
-	list_del_counter(child_counter, child_ctx);
-	spin_unlock_irq(&child_ctx->lock);
+	perf_counter_remove_from_context(child_counter);
 
 	parent_counter = child_counter->parent;
 	/*
-- 
cgit v0.10.2


From 8db14ca12569fe885694bd3d5ff84c2d973d3cb0 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 23 May 2009 18:57:25 +0000
Subject: [CIFS] Avoid open on possible directories since Samba now rejects
 them

Small change (mostly formatting) to limit lookup based open calls to
file create only.

After discussion yesteday on samba-technical about the posix lookup
regression,  and looking at a problem with cifs posix open to one
particular Samba version, Jeff and JRA realized that Samba server's
behavior changed in this area (posix open behavior on files vs.
directories).   To make this behavior consistent, JRA just made a
fix to Samba server to alter how it handles open of directories (now
returning the equivalent of EISDIR instead of success). Since we don't
know at lookup time whether the inode is a directory or file (and
thus whether posix open will succeed with most current Samba server),
this change avoids the posix open code on lookup open (just issues
posix open on creates).    This gets the semantic benefits we want
(atomicity, posix byte range locks, improved write semantics on newly
created files) and file create still is fast, and we avoid the problem
that Jeff noticed yesterday with "openat" (and some open directory
calls) of non-cached directories to one version of Samba server, and
will work with future Samba versions (which include the fix jra just
pushed into Samba server).  I confirmed this approach with jra
yesterday and with Shirish today.

Posix open is only called (at lookup time) for file create now.
For opens (rather than creates), because we do not know if it
is a file or directory yet, and current Samba no longer allows
us to do posix open on dirs, we could end up wasting an open call
on what turns out to be a dir. For file opens, we wait to call posix
open till cifs_open.  It could be added here (lookup) in the future
but the performance tradeoff of the extra network request when EISDIR
or EACCES is returned would have to be weighed against the 50%
reduction in network traffic in the other paths.

Reviewed-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Tested-by: Jeff Layton <jlayton@redhat.com>
CC: Jeremy Allison <jra@samba.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index f49d684..3758965 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -657,31 +657,36 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	}
 	cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode));
 
+	/* Posix open is only called (at lookup time) for file create now.
+	 * For opens (rather than creates), because we do not know if it
+	 * is a file or directory yet, and current Samba no longer allows
+	 * us to do posix open on dirs, we could end up wasting an open call
+	 * on what turns out to be a dir. For file opens, we wait to call posix
+	 * open till cifs_open.  It could be added here (lookup) in the future
+	 * but the performance tradeoff of the extra network request when EISDIR
+	 * or EACCES is returned would have to be weighed against the 50%
+	 * reduction in network traffic in the other paths.
+	 */
 	if (pTcon->unix_ext) {
 		if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
-		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) {
-			if (!((nd->intent.open.flags & O_CREAT) &&
-					(nd->intent.open.flags & O_EXCL))) {
-				rc = cifs_posix_open(full_path, &newInode,
+		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
+		     (nd->intent.open.flags & O_CREAT)) {
+			rc = cifs_posix_open(full_path, &newInode,
 					parent_dir_inode->i_sb,
 					nd->intent.open.create_mode,
 					nd->intent.open.flags, &oplock,
 					&fileHandle, xid);
-				/*
-				 * This code works around a bug in
-				 * samba posix open in samba versions 3.3.1
-				 * and earlier where create works
-				 * but open fails with invalid parameter.
-				 * If either of these error codes are
-				 * returned, follow the normal lookup.
-				 * Otherwise, the error during posix open
-				 * is handled.
-				 */
-				if ((rc != -EINVAL) && (rc != -EOPNOTSUPP))
-					posix_open = true;
-				else
-					pTcon->broken_posix_open = true;
-			}
+			/*
+			 * The check below works around a bug in POSIX
+			 * open in samba versions 3.3.1 and earlier where
+			 * open could incorrectly fail with invalid parameter.
+			 * If either that or op not supported returned, follow
+			 * the normal lookup.
+			 */
+			if ((rc == 0) || (rc == -ENOENT))
+				posix_open = true;
+			else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
+				pTcon->broken_posix_open = true;
 		}
 		if (!posix_open)
 			rc = cifs_get_inode_info_unix(&newInode, full_path,
-- 
cgit v0.10.2


From 5c10e63c943b4c67561ddc6bf61e01d4141f881f Mon Sep 17 00:00:00 2001
From: Takahiro Yasui <tyasui@redhat.com>
Date: Wed, 29 Apr 2009 12:13:02 -0400
Subject: [SCSI] limit state transitions in scsi_internal_device_unblock

scsi timeout on two or more devices may cause extremely long execution
time for user applications because SDEV_OFFLINE state is changed to
SDEV_RUNNING state during scsi error recovery procedures triggered by
a bus reset or a host reset of scsi LLD, and scsi timeout can happens
on the same devices many times.

This happens because scsi_internal_device_unblock() changes device's
state to SDEV_RUNNING even if a device in other states than SDEV_BLOCK,
while the following two transitions are required in this function.

  SDEV_BLOCK -> SDEV_RUNNING
  SDEV_CREATED_BLOCK -> SDEV_CREATED

Otherwise, it returns -EINVAL.

Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
[matthew@wil.cx: supplied rewritten base for patch]
Signed-off-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bb218c8..27dbf2e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2441,20 +2441,18 @@ int
 scsi_internal_device_unblock(struct scsi_device *sdev)
 {
 	struct request_queue *q = sdev->request_queue; 
-	int err;
 	unsigned long flags;
 	
 	/* 
 	 * Try to transition the scsi device to SDEV_RUNNING
 	 * and goose the device queue if successful.  
 	 */
-	err = scsi_device_set_state(sdev, SDEV_RUNNING);
-	if (err) {
-		err = scsi_device_set_state(sdev, SDEV_CREATED);
-
-		if (err)
-			return err;
-	}
+	if (sdev->sdev_state == SDEV_BLOCK)
+		sdev->sdev_state = SDEV_RUNNING;
+	else if (sdev->sdev_state == SDEV_CREATED_BLOCK)
+		sdev->sdev_state = SDEV_CREATED;
+	else
+		return -EINVAL;
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_start_queue(q);
-- 
cgit v0.10.2


From 1da2019fffc65c02a613305919bac28c9bdfaf99 Mon Sep 17 00:00:00 2001
From: Kai Makisara <Kai.Makisara@kolumbus.fi>
Date: Sat, 2 May 2009 08:49:34 +0300
Subject: [SCSI] st: fix gcc 4.4 warning

This patch fixes the GCC 4.4 warning reported by David Binderman and Sergey
Senozhatsky. The old version was working correctly but was not easy to read.

Signed-off-by: Kai Makisara <kai.makisara@kolumbus.fi>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index eb24efe..6f46e62 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -2964,7 +2964,7 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon
 			    !(STp->use_pf & PF_TESTED)) {
 				/* Try the other possible state of Page Format if not
 				   already tried */
-				STp->use_pf = !STp->use_pf | PF_TESTED;
+				STp->use_pf = (STp->use_pf ^ USE_PF) | PF_TESTED;
 				st_release_request(SRpnt);
 				SRpnt = NULL;
 				return st_int_ioctl(STp, cmd_in, arg);
-- 
cgit v0.10.2


From 16b3858ec967f4d9817f6958cc7a0bf1222355f3 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sat, 2 May 2009 22:14:54 +0200
Subject: [SCSI] ibmvscsi: Remove redundant test on unsigned.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index c9aa761..8d3925f 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -910,9 +910,6 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 
 	dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
 
-	if (evt_struct->xfer_iu->srp.login_rsp.req_lim_delta < 0)
-		dev_err(hostdata->dev, "Invalid request_limit.\n");
-
 	/* Now we know what the real request-limit is.
 	 * This value is set rather than added to request_limit because
 	 * request_limit could have been set to -1 by this client.
-- 
cgit v0.10.2


From a3ec723a949d65bf0349cdf60958036454927729 Mon Sep 17 00:00:00 2001
From: Eric Piel <eric.piel@tremplin-utc.net>
Date: Mon, 4 May 2009 12:43:02 +0200
Subject: [SCSI] Update wording of CONFIG_SCSI_MULTI_LUN help

I had to set CONFIG_SCSI_MULTI_LUN to y in order to get my SE W595
working when plugging it as a mass storage. Looking at SCSI option to
get a phone behaving correctly was convoluted to say the least. There
are quite a few other reports about USB card readers needing this option
as well. This patch improves the help text to make the use of the option
more obvious.

Signed-off-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 6e8106a..759e150 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -191,20 +191,19 @@ config SCSI_ENCLOSURE
 	  it has an enclosure device.  Selecting this option will just allow
 	  certain enclosure conditions to be reported and is not required.
 
-comment "Some SCSI devices (e.g. CD jukebox) support multiple LUNs"
-	depends on SCSI
-
 config SCSI_MULTI_LUN
 	bool "Probe all LUNs on each SCSI device"
 	depends on SCSI
 	help
-	  If you have a SCSI device that supports more than one LUN (Logical
-	  Unit Number), e.g. a CD jukebox, and only one LUN is detected, you
-	  can say Y here to force the SCSI driver to probe for multiple LUNs.
-	  A SCSI device with multiple LUNs acts logically like multiple SCSI
-	  devices. The vast majority of SCSI devices have only one LUN, and
-	  so most people can say N here. The max_luns boot/module parameter 
-	  allows to override this setting.
+	  Some devices support more than one LUN (Logical Unit Number) in order
+	  to allow access to several media, e.g. CD jukebox, USB card reader,
+	  mobile phone in mass storage mode. This option forces the kernel to
+	  probe for all LUNs by default. This setting can be overriden by
+	  max_luns boot/module parameter. Note that this option does not affect
+	  devices conforming to SCSI-3 or higher as they can explicitely report
+	  their number of LUNs. It is safe to say Y here unless you have one of
+	  those rare devices which reacts in an unexpected way when probed for
+	  multiple LUNs.
 
 config SCSI_CONSTANTS
 	bool "Verbose SCSI error reporting (kernel size +=12K)"
-- 
cgit v0.10.2


From 6ff63896e5bd624d8563f4b67fe2fe06ce99c8dc Mon Sep 17 00:00:00 2001
From: "Kleber S. Souza" <klebers@linux.vnet.ibm.com>
Date: Mon, 4 May 2009 10:41:02 -0300
Subject: [SCSI] ipr: fix PCI permanent error handler

The ipr driver can hang if it encounters enough PCI errors
to trigger the permanent error handler. The driver will attempt
to initiate a "bringdown" of the adapter and fail all pending
ops back. However, this bringdown is unlike any other bringdown
of the adapter in the code as the driver. In this code path we
end up failing back ops with allow_cmds still set to 1. This results
in some commands, the HCAM commands in particular, getting immediately
re-issued to the adapter on the done call, which results in
an infinite loop in ipr_fail_all_ops. Fix this by setting allow_cmds
to zero in this path.

Signed-off-by: Kleber S. Souza <klebers@linux.vnet.ibm.com>
[brking@linux.vnet.ibm.com: alternate patch substituted]
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 764cfcc..0f8bc77 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7003,6 +7003,7 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev)
 		ioa_cfg->sdt_state = ABORT_DUMP;
 	ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES;
 	ioa_cfg->in_ioa_bringdown = 1;
+	ioa_cfg->allow_cmds = 0;
 	ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 }
-- 
cgit v0.10.2


From 2b288133ab6306b1761e0a2ef943b944ead6ad69 Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Mon, 11 May 2009 20:01:55 +0800
Subject: [SCSI] mvsas: bug fix with setting task management frame type

Correct frame type setting according to parameter.

Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index d79ac17..f709319 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -761,9 +761,11 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 		flags |= MCH_FBURST;
 		fburst = (1 << 7);
 	}
-	hdr->flags = cpu_to_le32(flags |
-				 (tei->n_elem << MCH_PRD_LEN_SHIFT) |
-				 (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT));
+	if (is_tmf)
+		flags |= (MCH_SSP_FR_TASK << MCH_SSP_FR_TYPE_SHIFT);
+	else
+		flags |= (MCH_SSP_FR_CMD << MCH_SSP_FR_TYPE_SHIFT);
+	hdr->flags = cpu_to_le32(flags | (tei->n_elem << MCH_PRD_LEN_SHIFT));
 	hdr->tags = cpu_to_le32(tag);
 	hdr->data_len = cpu_to_le32(task->total_xfer_len);
 
-- 
cgit v0.10.2


From 0b84b7094e87769120def1e703b8b4d037281038 Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Mon, 11 May 2009 20:05:26 +0800
Subject: [SCSI] mvsas: bug fix of dead lock

TMF task should be issued with Interrupt Disabled, or Deadlock may take place.
Clean-up unused parameters and conditonal lock.

Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index f709319..4279b5e 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -868,8 +868,8 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 
 #define	DEV_IS_GONE(mvi_dev)	((!mvi_dev || (mvi_dev->dev_type == NO_DEVICE)))
 static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
-				struct completion *completion, int lock,
-				int is_tmf, struct mvs_tmf_task *tmf)
+				struct completion *completion,int is_tmf,
+				struct mvs_tmf_task *tmf)
 {
 	struct domain_device *dev = task->dev;
 	struct mvs_info *mvi;
@@ -892,8 +892,7 @@ static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
 
 	mvi = mvs_find_dev_mvi(task->dev);
 
-	if (lock)
-		spin_lock_irqsave(&mvi->lock, flags);
+	spin_lock_irqsave(&mvi->lock, flags);
 	do {
 		dev = t->dev;
 		mvi_dev = (struct mvs_device *)dev->lldd_dev;
@@ -1020,15 +1019,14 @@ out_done:
 		MVS_CHIP_DISP->start_delivery(mvi,
 			(mvi->tx_prod - 1) & (MVS_CHIP_SLOT_SZ - 1));
 	}
-	if (lock)
-		spin_unlock_irqrestore(&mvi->lock, flags);
+	spin_unlock_irqrestore(&mvi->lock, flags);
 	return rc;
 }
 
 int mvs_queue_command(struct sas_task *task, const int num,
 			gfp_t gfp_flags)
 {
-	return mvs_task_exec(task, num, gfp_flags, NULL, 1, 0, NULL);
+	return mvs_task_exec(task, num, gfp_flags, NULL, 0, NULL);
 }
 
 static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
@@ -1448,7 +1446,7 @@ static int mvs_exec_internal_tmf_task(struct domain_device *dev,
 		task->timer.expires = jiffies + MVS_TASK_TIMEOUT*HZ;
 		add_timer(&task->timer);
 
-		res = mvs_task_exec(task, 1, GFP_KERNEL, NULL, 0, 1, tmf);
+		res = mvs_task_exec(task, 1, GFP_KERNEL, NULL, 1, tmf);
 
 		if (res) {
 			del_timer(&task->timer);
-- 
cgit v0.10.2


From 0f980a871678b7ec143fcb45b31bf9234e4585c8 Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Mon, 11 May 2009 21:49:52 +0800
Subject: [SCSI] mvsas: bug fix, null pointer may be used

Null pointer check to avoid corruption.

Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 4279b5e..3fc396f 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1873,11 +1873,11 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 	}
 
 out:
-	if (mvi_dev)
+	if (mvi_dev) {
 		mvi_dev->runing_req--;
-	if (sas_protocol_ata(task->task_proto))
-		mvs_free_reg_set(mvi, mvi_dev);
-
+		if (sas_protocol_ata(task->task_proto))
+			mvs_free_reg_set(mvi, mvi_dev);
+	}
 	mvs_slot_task_free(mvi, task, slot, slot_idx);
 	sts = tstat->stat;
 
-- 
cgit v0.10.2


From 77db27cdcbc8ed371fd2f154cbadc7ff32ae8901 Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Mon, 11 May 2009 21:56:31 +0800
Subject: [SCSI] mvsas: correct bit map usage

Utilize DECLARE_BITMAP to define the tags array.

Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 75b9748..93735ed 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -313,8 +313,7 @@ struct mvs_info {
 	const struct mvs_chip_info *chip;
 
 	int tags_num;
-	u8 tags[MVS_SLOTS >> 3];
-
+	DECLARE_BITMAP(tags, MVS_SLOTS);
 	/* further per-slot information */
 	struct mvs_phy phy[MVS_MAX_PHYS];
 	struct mvs_port port[MVS_MAX_PHYS];
-- 
cgit v0.10.2


From 9870d9a2428550e7ac3164a26306ad07a99051ae Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Mon, 11 May 2009 22:19:25 +0800
Subject: [SCSI] mvsas: performance improvement using domain_device->lldd_dev

Using sticky field to improve retrieve performance by eliminating some
lookups in . Remove some spurious casts.

Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 3fc396f..c05e4c0 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -225,7 +225,8 @@ struct mvs_info *mvs_find_dev_mvi(struct domain_device *dev)
 int mvs_find_dev_phyno(struct domain_device *dev, int *phyno)
 {
 	unsigned long i = 0, j = 0, n = 0, num = 0;
-	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
 	struct sas_ha_struct *sha = dev->port->ha;
 
 	while (sha->sas_port[i]) {
@@ -872,8 +873,8 @@ static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
 				struct mvs_tmf_task *tmf)
 {
 	struct domain_device *dev = task->dev;
-	struct mvs_info *mvi;
-	struct mvs_device *mvi_dev;
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
 	struct mvs_task_exec_info tei;
 	struct sas_task *t = task;
 	struct mvs_slot_info *slot;
@@ -890,8 +891,6 @@ static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
 		return 0;
 	}
 
-	mvi = mvs_find_dev_mvi(task->dev);
-
 	spin_lock_irqsave(&mvi->lock, flags);
 	do {
 		dev = t->dev;
@@ -1320,7 +1319,7 @@ int mvs_dev_found_notify(struct domain_device *dev, int lock)
 	}
 	dev->lldd_dev = (void *)mvi_device;
 	mvi_device->dev_type = dev->dev_type;
-
+	mvi_device->mvi_info = mvi;
 	if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) {
 		int phy_id;
 		u8 phy_num = parent_dev->ex_dev.num_phys;
@@ -1357,10 +1356,8 @@ int mvs_dev_found(struct domain_device *dev)
 void mvs_dev_gone_notify(struct domain_device *dev, int lock)
 {
 	unsigned long flags = 0;
-	struct mvs_info *mvi;
 	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
-
-	mvi = mvs_find_dev_mvi(dev);
+	struct mvs_info *mvi = mvi_dev->mvi_info;
 
 	if (lock)
 		spin_lock_irqsave(&mvi->lock, flags);
@@ -1535,8 +1532,8 @@ int mvs_lu_reset(struct domain_device *dev, u8 *lun)
 	unsigned long flags;
 	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
 	struct mvs_tmf_task tmf_task;
-	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
 	struct mvs_device * mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
 
 	tmf_task.tmf = TMF_LU_RESET;
 	mvi_dev->dev_status = MVS_DEV_EH;
@@ -1558,8 +1555,8 @@ int mvs_I_T_nexus_reset(struct domain_device *dev)
 {
 	unsigned long flags;
 	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
-	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
-	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_device * mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
 
 	if (mvi_dev->dev_status != MVS_DEV_EH)
 		return TMF_RESP_FUNC_COMPLETE;
@@ -1587,7 +1584,8 @@ int mvs_query_task(struct sas_task *task)
 	if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
 		struct scsi_cmnd * cmnd = (struct scsi_cmnd *)task->uldd_task;
 		struct domain_device *dev = task->dev;
-		struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+		struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+		struct mvs_info *mvi = mvi_dev->mvi_info;
 
 		int_to_scsilun(cmnd->device->lun, &lun);
 		rc = mvs_find_tag(mvi, task, &tag);
@@ -1619,10 +1617,12 @@ int mvs_abort_task(struct sas_task *task)
 	struct scsi_lun lun;
 	struct mvs_tmf_task tmf_task;
 	struct domain_device *dev = task->dev;
-	struct mvs_info *mvi = mvs_find_dev_mvi(dev);
+	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_info *mvi = mvi_dev->mvi_info;
 	int rc = TMF_RESP_FUNC_FAILED;
 	unsigned long flags;
 	u32 tag;
+
 	if (mvi->exp_req)
 		mvi->exp_req--;
 	spin_lock_irqsave(&task->task_state_lock, flags);
@@ -1652,7 +1652,6 @@ int mvs_abort_task(struct sas_task *task)
 		if (rc == TMF_RESP_FUNC_COMPLETE) {
 			u32 slot_no;
 			struct mvs_slot_info *slot;
-			struct mvs_info *mvi = mvs_find_dev_mvi(dev);
 
 			if (task->lldd_task) {
 				slot = (struct mvs_slot_info *)task->lldd_task;
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 93735ed..aa2270a 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -231,7 +231,9 @@ struct mvs_phy {
 };
 
 struct mvs_device {
+	struct list_head		dev_entry;
 	enum sas_dev_type dev_type;
+	struct mvs_info *mvi_info;
 	struct domain_device *sas_device;
 	u32 attached_phy;
 	u32 device_id;
@@ -239,7 +241,6 @@ struct mvs_device {
 	u8 taskfileset;
 	u8 dev_status;
 	u16 reserved;
-	struct list_head		dev_entry;
 };
 
 struct mvs_slot_info {
-- 
cgit v0.10.2


From 5a2537959fa8781012e8c286fc1614e0f6991327 Mon Sep 17 00:00:00 2001
From: Zhenwen Xu <helight.xu@gmail.com>
Date: Tue, 12 May 2009 13:29:13 -0700
Subject: [SCSI] NCR_D700: fix IRQ handler return type

drivers/scsi/NCR_D700.c: In function `NCR_D700_probe':
drivers/scsi/NCR_D700.c:322: warning: passing argument 2 of `request_irq' from incompatible pointer type
drivers/scsi/NCR_D700.c:322: warning: passing argument 2 of `request_irq' from incompatible pointer type
drivers/scsi/NCR_D700.c:322: warning: passing argument 2 of `request_irq' from incompatible pointer type

Signed-off-by: Zhenwen Xu <helight.xu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/NCR_D700.c b/drivers/scsi/NCR_D700.c
index c889d84..1cdf09a 100644
--- a/drivers/scsi/NCR_D700.c
+++ b/drivers/scsi/NCR_D700.c
@@ -224,7 +224,7 @@ NCR_D700_probe_one(struct NCR_D700_private *p, int siop, int irq,
 	return ret;
 }
 
-static int
+static irqreturn_t
 NCR_D700_intr(int irq, void *data)
 {
 	struct NCR_D700_private *p = (struct NCR_D700_private *)data;
-- 
cgit v0.10.2


From 10eb0f013c63c71c82ede77945a5f390c10cfda6 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:38 -0500
Subject: [SCSI] iscsi: pass ep connect shost

When we create the tcp/ip connection by calling ep_connect, we currently
just go by the routing table info.

I think there are two problems with this.

1. Some drivers do not have access to a routing table. Some drivers like
qla4xxx do not even know about other ports.

2. If you have two initiator ports on the same subnet, the user may have
set things up so that session1 was supposed to be run through port1. and
session2 was supposed to be run through port2. It looks like we could
end with both sessions going through one of the ports.

Fixes for cxgb3i from Karen Xie.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 75223f5..ffbe0c7 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -517,7 +517,8 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
 }
 
 static struct iscsi_endpoint *
-iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking)
+iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
+		      int non_blocking)
 {
 	int err;
 	struct iser_conn *ib_conn;
diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h
index 59b0958..e3133b5 100644
--- a/drivers/scsi/cxgb3i/cxgb3i.h
+++ b/drivers/scsi/cxgb3i/cxgb3i.h
@@ -144,7 +144,6 @@ struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *);
 void cxgb3i_adapter_open(struct t3cdev *);
 void cxgb3i_adapter_close(struct t3cdev *);
 
-struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *);
 struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *,
 				       struct net_device *);
 void cxgb3i_hba_host_remove(struct cxgb3i_hba *);
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index 9212400..04a4374 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -178,7 +178,7 @@ void cxgb3i_adapter_close(struct t3cdev *t3dev)
  * cxgb3i_hba_find_by_netdev - find the cxgb3i_hba structure via net_device
  * @t3dev: t3cdev adapter
  */
-struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
+static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
 {
 	struct cxgb3i_adapter *snic;
 	int i;
@@ -261,20 +261,27 @@ void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba)
 
 /**
  * cxgb3i_ep_connect - establish TCP connection to target portal
+ * @shost:		scsi host to use
  * @dst_addr:		target IP address
  * @non_blocking:	blocking or non-blocking call
  *
  * Initiates a TCP/IP connection to the dst_addr
  */
-static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
+static struct iscsi_endpoint *cxgb3i_ep_connect(struct Scsi_Host *shost,
+						struct sockaddr *dst_addr,
 						int non_blocking)
 {
 	struct iscsi_endpoint *ep;
 	struct cxgb3i_endpoint *cep;
-	struct cxgb3i_hba *hba;
+	struct cxgb3i_hba *hba = NULL;
 	struct s3_conn *c3cn = NULL;
 	int err = 0;
 
+	if (shost)
+		hba = iscsi_host_priv(shost);
+
+	cxgb3i_api_debug("shost 0x%p, hba 0x%p.\n", shost, hba);
+
 	c3cn = cxgb3i_c3cn_create();
 	if (!c3cn) {
 		cxgb3i_log_info("ep connect OOM.\n");
@@ -282,17 +289,27 @@ static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
 		goto release_conn;
 	}
 
-	err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr);
+	err = cxgb3i_c3cn_connect(hba ? hba->ndev : NULL, c3cn,
+				 (struct sockaddr_in *)dst_addr);
 	if (err < 0) {
 		cxgb3i_log_info("ep connect failed.\n");
 		goto release_conn;
 	}
+
 	hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev);
 	if (!hba) {
 		err = -ENOSPC;
 		cxgb3i_log_info("NOT going through cxgbi device.\n");
 		goto release_conn;
 	}
+
+	if (shost && hba != iscsi_host_priv(shost)) {
+		err = -ENOSPC;
+		cxgb3i_log_info("Could not connect through request host%u\n",
+				shost->host_no);
+		goto release_conn;
+	}
+
 	if (c3cn_is_closing(c3cn)) {
 		err = -ENOSPC;
 		cxgb3i_log_info("ep connect unable to connect.\n");
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index e11c9c1..c1d5be4 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -1479,12 +1479,13 @@ static struct net_device *cxgb3_egress_dev(struct net_device *root_dev,
 	return NULL;
 }
 
-static struct rtable *find_route(__be32 saddr, __be32 daddr,
+static struct rtable *find_route(struct net_device *dev,
+				 __be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
 {
 	struct rtable *rt;
 	struct flowi fl = {
-		.oif = 0,
+		.oif = dev ? dev->ifindex : 0,
 		.nl_u = {
 			 .ip4_u = {
 				   .daddr = daddr,
@@ -1573,36 +1574,40 @@ out_err:
  *
  * return 0 if active open request is sent, < 0 otherwise.
  */
-int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin)
+int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn,
+			struct sockaddr_in *usin)
 {
 	struct rtable *rt;
-	struct net_device *dev;
 	struct cxgb3i_sdev_data *cdata;
 	struct t3cdev *cdev;
 	__be32 sipv4;
 	int err;
 
+	c3cn_conn_debug("c3cn 0x%p, dev 0x%p.\n", c3cn, dev);
+
 	if (usin->sin_family != AF_INET)
 		return -EAFNOSUPPORT;
 
 	c3cn->daddr.sin_port = usin->sin_port;
 	c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr;
 
-	rt = find_route(c3cn->saddr.sin_addr.s_addr,
+	rt = find_route(dev, c3cn->saddr.sin_addr.s_addr,
 			c3cn->daddr.sin_addr.s_addr,
 			c3cn->saddr.sin_port,
 			c3cn->daddr.sin_port);
 	if (rt == NULL) {
-		c3cn_conn_debug("NO route to 0x%x, port %u.\n",
+		c3cn_conn_debug("NO route to 0x%x, port %u, dev %s.\n",
 				c3cn->daddr.sin_addr.s_addr,
-				ntohs(c3cn->daddr.sin_port));
+				ntohs(c3cn->daddr.sin_port),
+				dev ? dev->name : "any");
 		return -ENETUNREACH;
 	}
 
 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
-		c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n",
+		c3cn_conn_debug("multi-cast route to 0x%x, port %u, dev %s.\n",
 				c3cn->daddr.sin_addr.s_addr,
-				ntohs(c3cn->daddr.sin_port));
+				ntohs(c3cn->daddr.sin_port),
+				dev ? dev->name : "any");
 		ip_rt_put(rt);
 		return -ENETUNREACH;
 	}
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h
index ebfca96..6a1d86b 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.h
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h
@@ -169,7 +169,8 @@ void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *);
 void cxgb3i_sdev_remove(struct t3cdev *);
 
 struct s3_conn *cxgb3i_c3cn_create(void);
-int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *);
+int cxgb3i_c3cn_connect(struct net_device *, struct s3_conn *,
+			struct sockaddr_in *);
 void cxgb3i_c3cn_rx_credits(struct s3_conn *, int);
 int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *);
 void cxgb3i_c3cn_release(struct s3_conn *);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 0a2ce7b..d69a53a 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1268,26 +1268,54 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 	return err;
 }
 
+static int iscsi_if_ep_connect(struct iscsi_transport *transport,
+			       struct iscsi_uevent *ev, int msg_type)
+{
+	struct iscsi_endpoint *ep;
+	struct sockaddr *dst_addr;
+	struct Scsi_Host *shost = NULL;
+	int non_blocking, err = 0;
+
+	if (!transport->ep_connect)
+		return -EINVAL;
+
+	if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) {
+		shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no);
+		if (!shost) {
+			printk(KERN_ERR "ep connect failed. Could not find "
+			       "host no %u\n",
+			       ev->u.ep_connect_through_host.host_no);
+			return -ENODEV;
+		}
+		non_blocking = ev->u.ep_connect_through_host.non_blocking;
+	} else
+		non_blocking = ev->u.ep_connect.non_blocking;
+
+	dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
+	ep = transport->ep_connect(shost, dst_addr, non_blocking);
+	if (IS_ERR(ep)) {
+		err = PTR_ERR(ep);
+		goto release_host;
+	}
+
+	ev->r.ep_connect_ret.handle = ep->id;
+release_host:
+	if (shost)
+		scsi_host_put(shost);
+	return err;
+}
+
 static int
 iscsi_if_transport_ep(struct iscsi_transport *transport,
 		      struct iscsi_uevent *ev, int msg_type)
 {
 	struct iscsi_endpoint *ep;
-	struct sockaddr *dst_addr;
 	int rc = 0;
 
 	switch (msg_type) {
+	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
 	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
-		if (!transport->ep_connect)
-			return -EINVAL;
-
-		dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
-		ep = transport->ep_connect(dst_addr,
-					   ev->u.ep_connect.non_blocking);
-		if (IS_ERR(ep))
-			return PTR_ERR(ep);
-
-		ev->r.ep_connect_ret.handle = ep->id;
+		rc = iscsi_if_ep_connect(transport, ev, msg_type);
 		break;
 	case ISCSI_UEVENT_TRANSPORT_EP_POLL:
 		if (!transport->ep_poll)
@@ -1469,6 +1497,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
 	case ISCSI_UEVENT_TRANSPORT_EP_POLL:
 	case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
+	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
 		err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type);
 		break;
 	case ISCSI_UEVENT_TGT_DSCVR:
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index d0ed522..2c1a4af 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -50,7 +50,8 @@ enum iscsi_uevent_e {
 	ISCSI_UEVENT_TGT_DSCVR		= UEVENT_BASE + 15,
 	ISCSI_UEVENT_SET_HOST_PARAM	= UEVENT_BASE + 16,
 	ISCSI_UEVENT_UNBIND_SESSION	= UEVENT_BASE + 17,
-	ISCSI_UEVENT_CREATE_BOUND_SESSION	= UEVENT_BASE + 18,
+	ISCSI_UEVENT_CREATE_BOUND_SESSION		= UEVENT_BASE + 18,
+	ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST	= UEVENT_BASE + 19,
 
 	/* up events */
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
@@ -131,6 +132,10 @@ struct iscsi_uevent {
 		struct msg_transport_connect {
 			uint32_t	non_blocking;
 		} ep_connect;
+		struct msg_transport_connect_through_host {
+			uint32_t	host_no;
+			uint32_t	non_blocking;
+		} ep_connect_through_host;
 		struct msg_transport_poll {
 			uint64_t	ep_handle;
 			uint32_t	timeout_ms;
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 457588e..8cb7a31 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -126,7 +126,8 @@ struct iscsi_transport {
 			       int *index, int *age);
 
 	void (*session_recovery_timedout) (struct iscsi_cls_session *session);
-	struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr,
+	struct iscsi_endpoint *(*ep_connect) (struct Scsi_Host *shost,
+					      struct sockaddr *dst_addr,
 					      int non_blocking);
 	int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms);
 	void (*ep_disconnect) (struct iscsi_endpoint *ep);
-- 
cgit v0.10.2


From 184b57c630c86d35b7f92d4b6545fdf07647c5d5 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:39 -0500
Subject: [SCSI] libiscsi: check of LLD has a alloc pdu callout.

bnx2i does not have one. It currently preallocates the bdt
when the session is setup.

We probably want to change that to a dma pool, then allocate from
the pool in the alloc pdu. Until then check if there is a alloc
pdu callout.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index e72b4ad..11bc3e1 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -257,9 +257,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	itt_t itt;
 	int rc;
 
-	rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD);
-	if (rc)
-		return rc;
+	if (conn->session->tt->alloc_pdu) {
+		rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD);
+		if (rc)
+			return rc;
+	}
 	hdr = (struct iscsi_cmd *) task->hdr;
 	itt = hdr->itt;
 	memset(hdr, 0, sizeof(*hdr));
@@ -566,11 +568,14 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	} else
 		task->data_count = 0;
 
-	if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
-		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
-				 "pdu for mgmt task.\n");
-		goto requeue_task;
+	if (conn->session->tt->alloc_pdu) {
+		if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
+			iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
+					 "pdu for mgmt task.\n");
+			goto requeue_task;
+		}
 	}
+
 	itt = task->hdr->itt;
 	task->hdr_len = sizeof(struct iscsi_hdr);
 	memcpy(task->hdr, hdr, sizeof(struct iscsi_hdr));
-- 
cgit v0.10.2


From 5700b1af93388544843a453e3c68f8f928bd1e88 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:40 -0500
Subject: [SCSI] libiscsi: handle param allocation failures

If we could not allocate the initiator name or some other id like
the hwaddress or netdev, then userspace could deal with the failure
by just running in a dregraded mode.

Now we want to be able to switch values for the params and we
want some feedback, so this patch will check if a string like
the initiatorname could not be allocated and return an error.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 11bc3e1..b4aaf2e 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2656,6 +2656,23 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_bind);
 
+static int iscsi_switch_str_param(char **param, char *new_val_buf)
+{
+	char *new_val;
+
+	if (*param) {
+		if (!strcmp(*param, new_val_buf))
+			return 0;
+	}
+
+	new_val = kstrdup(new_val_buf, GFP_NOIO);
+	if (!new_val)
+		return -ENOMEM;
+
+	kfree(*param);
+	*param = new_val;
+	return 0;
+}
 
 int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
 		    enum iscsi_param param, char *buf, int buflen)
@@ -2728,38 +2745,15 @@ int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
 		sscanf(buf, "%u", &conn->exp_statsn);
 		break;
 	case ISCSI_PARAM_USERNAME:
-		kfree(session->username);
-		session->username = kstrdup(buf, GFP_KERNEL);
-		if (!session->username)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->username, buf);
 	case ISCSI_PARAM_USERNAME_IN:
-		kfree(session->username_in);
-		session->username_in = kstrdup(buf, GFP_KERNEL);
-		if (!session->username_in)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->username_in, buf);
 	case ISCSI_PARAM_PASSWORD:
-		kfree(session->password);
-		session->password = kstrdup(buf, GFP_KERNEL);
-		if (!session->password)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->password, buf);
 	case ISCSI_PARAM_PASSWORD_IN:
-		kfree(session->password_in);
-		session->password_in = kstrdup(buf, GFP_KERNEL);
-		if (!session->password_in)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->password_in, buf);
 	case ISCSI_PARAM_TARGET_NAME:
-		/* this should not change between logins */
-		if (session->targetname)
-			break;
-
-		session->targetname = kstrdup(buf, GFP_KERNEL);
-		if (!session->targetname)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&session->targetname, buf);
 	case ISCSI_PARAM_TPGT:
 		sscanf(buf, "%d", &session->tpgt);
 		break;
@@ -2767,25 +2761,11 @@ int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
 		sscanf(buf, "%d", &conn->persistent_port);
 		break;
 	case ISCSI_PARAM_PERSISTENT_ADDRESS:
-		/*
-		 * this is the address returned in discovery so it should
-		 * not change between logins.
-		 */
-		if (conn->persistent_address)
-			break;
-
-		conn->persistent_address = kstrdup(buf, GFP_KERNEL);
-		if (!conn->persistent_address)
-			return -ENOMEM;
-		break;
+		return iscsi_switch_str_param(&conn->persistent_address, buf);
 	case ISCSI_PARAM_IFACE_NAME:
-		if (!session->ifacename)
-			session->ifacename = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&session->ifacename, buf);
 	case ISCSI_PARAM_INITIATOR_NAME:
-		if (!session->initiatorname)
-			session->initiatorname = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&session->initiatorname, buf);
 	default:
 		return -ENOSYS;
 	}
@@ -2856,10 +2836,7 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
 		len = sprintf(buf, "%s\n", session->ifacename);
 		break;
 	case ISCSI_PARAM_INITIATOR_NAME:
-		if (!session->initiatorname)
-			len = sprintf(buf, "%s\n", "unknown");
-		else
-			len = sprintf(buf, "%s\n", session->initiatorname);
+		len = sprintf(buf, "%s\n", session->initiatorname);
 		break;
 	default:
 		return -ENOSYS;
@@ -2925,29 +2902,16 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_NETDEV_NAME:
-		if (!ihost->netdev)
-			len = sprintf(buf, "%s\n", "default");
-		else
-			len = sprintf(buf, "%s\n", ihost->netdev);
+		len = sprintf(buf, "%s\n", ihost->netdev);
 		break;
 	case ISCSI_HOST_PARAM_HWADDRESS:
-		if (!ihost->hwaddress)
-			len = sprintf(buf, "%s\n", "default");
-		else
-			len = sprintf(buf, "%s\n", ihost->hwaddress);
+		len = sprintf(buf, "%s\n", ihost->hwaddress);
 		break;
 	case ISCSI_HOST_PARAM_INITIATOR_NAME:
-		if (!ihost->initiatorname)
-			len = sprintf(buf, "%s\n", "unknown");
-		else
-			len = sprintf(buf, "%s\n", ihost->initiatorname);
+		len = sprintf(buf, "%s\n", ihost->initiatorname);
 		break;
 	case ISCSI_HOST_PARAM_IPADDRESS:
-		if (!strlen(ihost->local_address))
-			len = sprintf(buf, "%s\n", "unknown");
-		else
-			len = sprintf(buf, "%s\n",
-				      ihost->local_address);
+		len = sprintf(buf, "%s\n", ihost->local_address);
 		break;
 	default:
 		return -ENOSYS;
@@ -2964,17 +2928,11 @@ int iscsi_host_set_param(struct Scsi_Host *shost, enum iscsi_host_param param,
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_NETDEV_NAME:
-		if (!ihost->netdev)
-			ihost->netdev = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&ihost->netdev, buf);
 	case ISCSI_HOST_PARAM_HWADDRESS:
-		if (!ihost->hwaddress)
-			ihost->hwaddress = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&ihost->hwaddress, buf);
 	case ISCSI_HOST_PARAM_INITIATOR_NAME:
-		if (!ihost->initiatorname)
-			ihost->initiatorname = kstrdup(buf, GFP_KERNEL);
-		break;
+		return iscsi_switch_str_param(&ihost->initiatorname, buf);
 	default:
 		return -ENOSYS;
 	}
-- 
cgit v0.10.2


From 8f9256cea10ca43ac80f66e176643eb41db34244 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:41 -0500
Subject: [SCSI] libiscsi: export iscsi_itt_to_task for bnx2i

bnx2i needs to be able to look up mgmt task like login and nop, because
it does some processing of them on the completion path. This exports
iscsi_itt_to_task so it can look up the task.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b4aaf2e..a6e6eef 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -828,7 +828,7 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
  *
  * The session lock must be held.
  */
-static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
+struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 {
 	struct iscsi_session *session = conn->session;
 	int i;
@@ -845,6 +845,7 @@ static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 
 	return session->cmds[i];
 }
+EXPORT_SYMBOL_GPL(iscsi_itt_to_task);
 
 /**
  * __iscsi_complete_pdu - complete pdu
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 0289f57..88d33a4 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -406,6 +406,7 @@ extern int __iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
 				char *, int);
 extern int iscsi_verify_itt(struct iscsi_conn *, itt_t);
 extern struct iscsi_task *iscsi_itt_to_ctask(struct iscsi_conn *, itt_t);
+extern struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *, itt_t);
 extern void iscsi_requeue_task(struct iscsi_task *task);
 extern void iscsi_put_task(struct iscsi_task *task);
 extern void __iscsi_get_task(struct iscsi_task *task);
-- 
cgit v0.10.2


From edbc9aa0580c0aca96ac8d11bfb2defa81d91bb3 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:42 -0500
Subject: [SCSI] libiscsi: have iscsi_data_in_rsp call iscsi_update_cmdsn

This has iscsi_data_in_rsp call iscsi_update_cmdsn when a pdu is
completed like is done for other pdu's that are don.

For libiscsi_tcp, this means that it calls iscsi_update_cmdsn when
it is handling the pdu internally to only transfer data, but if there is
status then it does not need to call it since the completion handling
will do it.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index a6e6eef..047543c 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -729,6 +729,7 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	if (!(rhdr->flags & ISCSI_FLAG_DATA_STATUS))
 		return;
 
+	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin *)hdr);
 	sc->result = (DID_OK << 16) | rhdr->cmd_status;
 	conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
 	if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index b579ca9..db93cd0 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -473,7 +473,13 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
 	int datasn = be32_to_cpu(rhdr->datasn);
 	unsigned total_in_length = scsi_in(task->sc)->length;
 
-	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
+	/*
+	 * lib iscsi will update this in the completion handling if there
+	 * is status.
+	 */
+	if (!(rhdr->flags & ISCSI_FLAG_DATA_STATUS))
+		iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
+
 	if (tcp_conn->in.datalen == 0)
 		return 0;
 
-- 
cgit v0.10.2


From 26013ad4c43f49a038a6489c35e9b901491339fe Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:43 -0500
Subject: [SCSI] libiscsi: fix nop response/reply and session cleanup race

If we are responding to a nop from the target by sending our nop,
and the session is getting torn down, then iscsi_start_session_recovery
could set the conn stop bits while the recv path is sending the nop
response and we will hit the bug ons in __iscsi_conn_send_pdu.

This has us check the state in __iscsi_conn_send_pdu and fail all
incoming mgmt IO if we are  not logged in and if the pdu is not login
related. It also changes the ordering of the setting of conn stop state
bits so they are set after the session state is set (both are set under
the session lock).

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 047543c..212fe20 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -546,6 +546,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		 */
 		task = conn->login_task;
 	else {
+		if (session->state != ISCSI_STATE_LOGGED_IN)
+			return NULL;
+
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
@@ -2566,8 +2569,6 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 {
 	int old_stop_stage;
 
-	del_timer_sync(&conn->transport_timer);
-
 	mutex_lock(&session->eh_mutex);
 	spin_lock_bh(&session->lock);
 	if (conn->stop_stage == STOP_CONN_TERM) {
@@ -2585,13 +2586,17 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 		session->state = ISCSI_STATE_TERMINATE;
 	else if (conn->stop_stage != STOP_CONN_RECOVER)
 		session->state = ISCSI_STATE_IN_RECOVERY;
+	spin_unlock_bh(&session->lock);
 
+	del_timer_sync(&conn->transport_timer);
+	iscsi_suspend_tx(conn);
+
+	spin_lock_bh(&session->lock);
 	old_stop_stage = conn->stop_stage;
 	conn->stop_stage = flag;
 	conn->c_stage = ISCSI_CONN_STOPPED;
 	spin_unlock_bh(&session->lock);
 
-	iscsi_suspend_tx(conn);
 	/*
 	 * for connection level recovery we should not calculate
 	 * header digest. conn->hdr_size used for optimization
-- 
cgit v0.10.2


From d1acfae514425d680912907c6554852f1e258551 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:44 -0500
Subject: [SCSI] libiscsi_tcp: update recv tracking for each skb instead of
 iscsi pdu

Everytime we read in a pdu libiscsi will update a tracking field.
It uses this to decide when to check if the transport might be bad.
If we have not got data in recv_timeout seconds then we will
send a iscsi ping/nop.

If we are on a slow link then it could take a while to read in all
the data for a data_in. In that case we might send a ping/nop when
we do not need to or we might drop a session thinking it is bad
when the lower layer is making forward progress on it.

This patch has libiscsi_tcp update the recv tracking for each skb
(basically network packet from our point of view) instead of the
entire iscsi pdu+data, so we account for these cases where data is
coming in slowly.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index db93cd0..b84a1d8 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -863,6 +863,12 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
 	int rc = 0;
 
 	ISCSI_DBG_TCP(conn, "in %d bytes\n", skb->len - offset);
+	/*
+	 * Update for each skb instead of pdu, because over slow networks a
+	 * data_in's data could take a while to read in. We also want to
+	 * account for r2ts.
+	 */
+	conn->last_recv = jiffies;
 
 	if (unlikely(conn->suspend_rx)) {
 		ISCSI_DBG_TCP(conn, "Rx suspended!\n");
-- 
cgit v0.10.2


From 4c48a82935f833d94fcf44c2b0c5d2922acfc77a Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:45 -0500
Subject: [SCSI] libiscsi: fix iscsi transport checks to account for slower
 links

If we have not got any pdus for recv_timeout seconds, then we will
send a iscsi ping/nop to make sure the target is still around. The
problem is if this is a slow link, and the ping got queued after
the data for a data_out (read), then the transport code could think
the ping has failed when it is just slowly making its way through
the network. This patch has us check if we are making progress while
the nop is outstanding. If we are still reading in data, then we
do not fail the session at that time.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 212fe20..c648bd3 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1677,6 +1677,22 @@ static void iscsi_start_tx(struct iscsi_conn *conn)
 		iscsi_conn_queue_work(conn);
 }
 
+/*
+ * We want to make sure a ping is in flight. It has timed out.
+ * And we are not busy processing a pdu that is making
+ * progress but got started before the ping and is taking a while
+ * to complete so the ping is just stuck behind it in a queue.
+ */
+static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
+{
+	if (conn->ping_task &&
+	    time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
+			   (conn->ping_timeout * HZ), jiffies))
+		return 1;
+	else
+		return 0;
+}
+
 static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 {
 	struct iscsi_cls_session *cls_session;
@@ -1712,16 +1728,20 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 	 * if the ping timedout then we are in the middle of cleaning up
 	 * and can let the iscsi eh handle it
 	 */
-	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
-			    (conn->ping_timeout * HZ), jiffies))
+	if (iscsi_has_ping_timed_out(conn)) {
 		rc = BLK_EH_RESET_TIMER;
+		goto done;
+	}
 	/*
 	 * if we are about to check the transport then give the command
 	 * more time
 	 */
 	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
-			   jiffies))
+			   jiffies)) {
 		rc = BLK_EH_RESET_TIMER;
+		goto done;
+	}
+
 	/* if in the middle of checking the transport then give us more time */
 	if (conn->ping_task)
 		rc = BLK_EH_RESET_TIMER;
@@ -1748,13 +1768,13 @@ static void iscsi_check_transport_timeouts(unsigned long data)
 
 	recv_timeout *= HZ;
 	last_recv = conn->last_recv;
-	if (conn->ping_task &&
-	    time_before_eq(conn->last_ping + (conn->ping_timeout * HZ),
-			   jiffies)) {
+
+	if (iscsi_has_ping_timed_out(conn)) {
 		iscsi_conn_printk(KERN_ERR, conn, "ping timeout of %d secs "
-				  "expired, last rx %lu, last ping %lu, "
-				  "now %lu\n", conn->ping_timeout, last_recv,
-				  conn->last_ping, jiffies);
+				  "expired, recv timeout %d, last rx %lu, "
+				  "last ping %lu, now %lu\n",
+				  conn->ping_timeout, conn->recv_timeout,
+				  last_recv, conn->last_ping, jiffies);
 		spin_unlock(&session->lock);
 		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 		return;
-- 
cgit v0.10.2


From 3bbaaad95fd38dedb7c66a601f14825b4e0c5a59 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:46 -0500
Subject: [SCSI] libiscsi: handle cleanup task races

bnx2i needs to send a hardware specific cleanup command if
a command has not completed normally (iscsi/scsi response from
target), and the session is still ok (this is the case when we
send a TMF to stop the command).

At this time it will need to drop the session lock. The problem
with the current code is that fail_all_commands assumes we
will hold the lock the entire time, so it uses list_for_each_entry_safe.
If while bnx2i drops the session lock multiple cmds complete then
list_for_each_entry_safe will not handle this correctly.

This patch removes the running lists and just has us loop over
the cmds array (in later patches we will then replace that
array with a block tag map at the session level). It also fixes
up the completion path so that if the TMF code and the normal recv
path were completing the same command then they both do not try
to do release the refcount taken when the task is queued.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c648bd3..a9d7e52 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -109,7 +109,7 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 		 * if the window closed with IO queued, then kick the
 		 * xmit thread
 		 */
-		if (!list_empty(&session->leadconn->xmitqueue) ||
+		if (!list_empty(&session->leadconn->cmdqueue) ||
 		    !list_empty(&session->leadconn->mgmtqueue)) {
 			if (!(session->tt->caps & CAP_DATA_PATH_OFFLOAD))
 				iscsi_conn_queue_work(session->leadconn);
@@ -366,7 +366,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 		return -EIO;
 
 	task->state = ISCSI_TASK_RUNNING;
-	list_move_tail(&task->running, &conn->run_list);
 
 	conn->scsicmd_pdus_cnt++;
 	ISCSI_DBG_SESSION(session, "iscsi prep [%s cid %d sc %p cdb 0x%x "
@@ -382,26 +381,23 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 }
 
 /**
- * iscsi_complete_command - finish a task
+ * iscsi_free_task - free a task
  * @task: iscsi cmd task
  *
  * Must be called with session lock.
  * This function returns the scsi command to scsi-ml or cleans
  * up mgmt tasks then returns the task to the pool.
  */
-static void iscsi_complete_command(struct iscsi_task *task)
+static void iscsi_free_task(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 
 	session->tt->cleanup_task(task);
-	list_del_init(&task->running);
-	task->state = ISCSI_TASK_COMPLETED;
+	task->state = ISCSI_TASK_FREE;
 	task->sc = NULL;
 
-	if (conn->task == task)
-		conn->task = NULL;
 	/*
 	 * login task is preallocated so do not free
 	 */
@@ -410,9 +406,6 @@ static void iscsi_complete_command(struct iscsi_task *task)
 
 	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
 
-	if (conn->ping_task == task)
-		conn->ping_task = NULL;
-
 	if (sc) {
 		task->sc = NULL;
 		/* SCSI eh reuses commands to verify us */
@@ -435,7 +428,7 @@ EXPORT_SYMBOL_GPL(__iscsi_get_task);
 static void __iscsi_put_task(struct iscsi_task *task)
 {
 	if (atomic_dec_and_test(&task->refcount))
-		iscsi_complete_command(task);
+		iscsi_free_task(task);
 }
 
 void iscsi_put_task(struct iscsi_task *task)
@@ -448,14 +441,50 @@ void iscsi_put_task(struct iscsi_task *task)
 }
 EXPORT_SYMBOL_GPL(iscsi_put_task);
 
+/**
+ * iscsi_complete_task - finish a task
+ * @task: iscsi cmd task
+ *
+ * Must be called with session lock.
+ */
+static void iscsi_complete_task(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+
+	if (task->state == ISCSI_TASK_COMPLETED)
+		return;
+	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
+
+	task->state = ISCSI_TASK_COMPLETED;
+
+	if (!list_empty(&task->running))
+		list_del_init(&task->running);
+
+	if (conn->task == task)
+		conn->task = NULL;
+
+	if (conn->ping_task == task)
+		conn->ping_task = NULL;
+
+	/* release get from queueing */
+	__iscsi_put_task(task);
+}
+
 /*
- * session lock must be held
+ * session lock must be held and if not called for a task that is
+ * still pending or from the xmit thread, then xmit thread must
+ * be suspended.
  */
-static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
-			 int err)
+static void fail_scsi_task(struct iscsi_task *task, int err)
 {
+	struct iscsi_conn *conn = task->conn;
 	struct scsi_cmnd *sc;
 
+	/*
+	 * if a command completes and we get a successful tmf response
+	 * we will hit this because the scsi eh abort code does not take
+	 * a ref to the task.
+	 */
 	sc = task->sc;
 	if (!sc)
 		return;
@@ -475,10 +504,7 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
 		scsi_in(sc)->resid = scsi_in(sc)->length;
 	}
 
-	if (conn->task == task)
-		conn->task = NULL;
-	/* release ref from queuecommand */
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 }
 
 static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
@@ -518,7 +544,6 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
 		session->state = ISCSI_STATE_LOGGING_OUT;
 
 	task->state = ISCSI_TASK_RUNNING;
-	list_move_tail(&task->running, &conn->mgmt_run_list);
 	ISCSI_DBG_SESSION(session, "mgmtpdu [op 0x%x hdr->itt 0x%x "
 			  "datalen %d]\n", hdr->opcode & ISCSI_OPCODE_MASK,
 			  hdr->itt, task->data_count);
@@ -564,6 +589,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	atomic_set(&task->refcount, 1);
 	task->conn = conn;
 	task->sc = NULL;
+	INIT_LIST_HEAD(&task->running);
+	task->state = ISCSI_TASK_PENDING;
 
 	if (data_size) {
 		memcpy(task->data, data, data_size);
@@ -575,7 +602,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
 			iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
 					 "pdu for mgmt task.\n");
-			goto requeue_task;
+			goto free_task;
 		}
 	}
 
@@ -591,30 +618,22 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 						   task->conn->session->age);
 	}
 
-	INIT_LIST_HEAD(&task->running);
-	list_add_tail(&task->running, &conn->mgmtqueue);
-
 	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
 		if (iscsi_prep_mgmt_task(conn, task))
 			goto free_task;
 
 		if (session->tt->xmit_task(task))
 			goto free_task;
-
-	} else
+	} else {
+		list_add_tail(&task->running, &conn->mgmtqueue);
 		iscsi_conn_queue_work(conn);
+	}
 
 	return task;
 
 free_task:
 	__iscsi_put_task(task);
 	return NULL;
-
-requeue_task:
-	if (task != conn->login_task)
-		__kfifo_put(session->cmdpool.queue, (void*)&task,
-			    sizeof(void*));
-	return NULL;
 }
 
 int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
@@ -709,11 +728,10 @@ invalid_datalen:
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 out:
-	ISCSI_DBG_SESSION(session, "done [sc %p res %d itt 0x%x]\n",
+	ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 }
 
 /**
@@ -747,8 +765,11 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 
+	ISCSI_DBG_SESSION(conn->session, "data in with status done "
+			  "[sc %p res %d itt 0x%x]\n",
+			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -969,7 +990,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		}
 
 		iscsi_tmf_rsp(conn, hdr);
-		__iscsi_put_task(task);
+		iscsi_complete_task(task);
 		break;
 	case ISCSI_OP_NOOP_IN:
 		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -987,7 +1008,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			goto recv_pdu;
 
 		mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
-		__iscsi_put_task(task);
+		iscsi_complete_task(task);
 		break;
 	default:
 		rc = ISCSI_ERR_BAD_OPCODE;
@@ -999,7 +1020,7 @@ out:
 recv_pdu:
 	if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
 		rc = ISCSI_ERR_CONN_FAILED;
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
@@ -1176,7 +1197,12 @@ void iscsi_requeue_task(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
 
-	list_move_tail(&task->running, &conn->requeue);
+	/*
+	 * this may be on the requeue list already if the xmit_task callout
+	 * is handling the r2ts while we are adding new ones
+	 */
+	if (list_empty(&task->running))
+		list_add_tail(&task->running, &conn->requeue);
 	iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1216,6 +1242,7 @@ check_mgmt:
 	while (!list_empty(&conn->mgmtqueue)) {
 		conn->task = list_entry(conn->mgmtqueue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		if (iscsi_prep_mgmt_task(conn, conn->task)) {
 			__iscsi_put_task(conn->task);
 			conn->task = NULL;
@@ -1227,23 +1254,26 @@ check_mgmt:
 	}
 
 	/* process pending command queue */
-	while (!list_empty(&conn->xmitqueue)) {
+	while (!list_empty(&conn->cmdqueue)) {
 		if (conn->tmf_state == TMF_QUEUED)
 			break;
 
-		conn->task = list_entry(conn->xmitqueue.next,
+		conn->task = list_entry(conn->cmdqueue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-			fail_command(conn, conn->task, DID_IMM_RETRY << 16);
+			fail_scsi_task(conn->task, DID_IMM_RETRY << 16);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
 		if (rc) {
 			if (rc == -ENOMEM) {
+				list_add_tail(&conn->task->running,
+					      &conn->cmdqueue);
 				conn->task = NULL;
 				goto again;
 			} else
-				fail_command(conn, conn->task, DID_ABORT << 16);
+				fail_scsi_task(conn->task, DID_ABORT << 16);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1270,8 +1300,8 @@ check_mgmt:
 
 		conn->task = list_entry(conn->requeue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		conn->task->state = ISCSI_TASK_RUNNING;
-		list_move_tail(conn->requeue.next, &conn->run_list);
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto again;
@@ -1412,7 +1442,6 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		reason = FAILURE_OOM;
 		goto reject;
 	}
-	list_add_tail(&task->running, &conn->xmitqueue);
 
 	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
 		reason = iscsi_prep_scsi_cmd_pdu(task);
@@ -1429,8 +1458,10 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			reason = FAILURE_SESSION_NOT_READY;
 			goto prepd_reject;
 		}
-	} else
+	} else {
+		list_add_tail(&task->running, &conn->cmdqueue);
 		iscsi_conn_queue_work(conn);
+	}
 
 	session->queued_cmdsn++;
 	spin_unlock(&session->lock);
@@ -1439,7 +1470,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 prepd_reject:
 	sc->scsi_done = NULL;
-	iscsi_complete_command(task);
+	iscsi_complete_task(task);
 reject:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
@@ -1449,7 +1480,7 @@ reject:
 
 prepd_fault:
 	sc->scsi_done = NULL;
-	iscsi_complete_command(task);
+	iscsi_complete_task(task);
 fault:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
@@ -1618,44 +1649,24 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
  * Fail commands. session lock held and recv side suspended and xmit
  * thread flushed
  */
-static void fail_all_commands(struct iscsi_conn *conn, unsigned lun,
-			      int error)
+static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
+			    int error)
 {
-	struct iscsi_task *task, *tmp;
-
-	if (conn->task) {
-		if (lun == -1 ||
-		    (conn->task->sc && conn->task->sc->device->lun == lun))
-			conn->task = NULL;
-	}
+	struct iscsi_task *task;
+	int i;
 
-	/* flush pending */
-	list_for_each_entry_safe(task, tmp, &conn->xmitqueue, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					  "failing pending sc %p itt 0x%x\n",
-					  task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
-	}
+	for (i = 0; i < conn->session->cmds_max; i++) {
+		task = conn->session->cmds[i];
+		if (!task->sc || task->state == ISCSI_TASK_FREE)
+			continue;
 
-	list_for_each_entry_safe(task, tmp, &conn->requeue, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					  "failing requeued sc %p itt 0x%x\n",
-					  task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
-	}
+		if (lun != -1 && lun != task->sc->device->lun)
+			continue;
 
-	/* fail all other running */
-	list_for_each_entry_safe(task, tmp, &conn->run_list, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					 "failing in progress sc %p itt 0x%x\n",
-					 task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
+		ISCSI_DBG_SESSION(conn->session,
+				  "failing sc %p itt 0x%x state %d\n",
+				  task->sc, task->itt, task->state);
+		fail_scsi_task(task, error << 16);
 	}
 }
 
@@ -1859,7 +1870,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	}
 
 	if (task->state == ISCSI_TASK_PENDING) {
-		fail_command(conn, task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT << 16);
 		goto success;
 	}
 
@@ -1890,7 +1901,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 		 * then sent more data for the cmd.
 		 */
 		spin_lock(&session->lock);
-		fail_command(conn, task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT << 16);
 		conn->tmf_state = TMF_INITIAL;
 		spin_unlock(&session->lock);
 		iscsi_start_tx(conn);
@@ -1997,7 +2008,7 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc)
 	iscsi_suspend_tx(conn);
 
 	spin_lock_bh(&session->lock);
-	fail_all_commands(conn, sc->device->lun, DID_ERROR);
+	fail_scsi_tasks(conn, sc->device->lun, DID_ERROR);
 	conn->tmf_state = TMF_INITIAL;
 	spin_unlock_bh(&session->lock);
 
@@ -2304,6 +2315,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 		if (cmd_task_size)
 			task->dd_data = &task[1];
 		task->itt = cmd_i;
+		task->state = ISCSI_TASK_FREE;
 		INIT_LIST_HEAD(&task->running);
 	}
 
@@ -2390,10 +2402,8 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	conn->transport_timer.data = (unsigned long)conn;
 	conn->transport_timer.function = iscsi_check_transport_timeouts;
 
-	INIT_LIST_HEAD(&conn->run_list);
-	INIT_LIST_HEAD(&conn->mgmt_run_list);
 	INIT_LIST_HEAD(&conn->mgmtqueue);
-	INIT_LIST_HEAD(&conn->xmitqueue);
+	INIT_LIST_HEAD(&conn->cmdqueue);
 	INIT_LIST_HEAD(&conn->requeue);
 	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
@@ -2561,27 +2571,24 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
 EXPORT_SYMBOL_GPL(iscsi_conn_start);
 
 static void
-flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn)
+fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 {
-	struct iscsi_task *task, *tmp;
+	struct iscsi_task *task;
+	int i;
 
-	/* handle pending */
-	list_for_each_entry_safe(task, tmp, &conn->mgmtqueue, running) {
-		ISCSI_DBG_SESSION(session, "flushing pending mgmt task "
-				  "itt 0x%x\n", task->itt);
-		/* release ref from prep task */
-		__iscsi_put_task(task);
-	}
+	for (i = 0; i < conn->session->cmds_max; i++) {
+		task = conn->session->cmds[i];
+		if (task->sc)
+			continue;
 
-	/* handle running */
-	list_for_each_entry_safe(task, tmp, &conn->mgmt_run_list, running) {
-		ISCSI_DBG_SESSION(session, "flushing running mgmt task "
-				  "itt 0x%x\n", task->itt);
-		/* release ref from prep task */
-		__iscsi_put_task(task);
-	}
+		if (task->state == ISCSI_TASK_FREE)
+			continue;
 
-	conn->task = NULL;
+		ISCSI_DBG_SESSION(conn->session,
+				  "failing mgmt itt 0x%x state %d\n",
+				  task->itt, task->state);
+		iscsi_complete_task(task);
+	}
 }
 
 static void iscsi_start_session_recovery(struct iscsi_session *session,
@@ -2638,10 +2645,10 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 	 */
 	spin_lock_bh(&session->lock);
 	if (flag == STOP_CONN_RECOVER)
-		fail_all_commands(conn, -1, DID_TRANSPORT_DISRUPTED);
+		fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
 	else
-		fail_all_commands(conn, -1, DID_ERROR);
-	flush_control_queues(session, conn);
+		fail_scsi_tasks(conn, -1, DID_ERROR);
+	fail_mgmt_tasks(session, conn);
 	spin_unlock_bh(&session->lock);
 	mutex_unlock(&session->eh_mutex);
 }
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 88d33a4..facae71 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -82,6 +82,7 @@ enum {
 
 
 enum {
+	ISCSI_TASK_FREE,
 	ISCSI_TASK_COMPLETED,
 	ISCSI_TASK_PENDING,
 	ISCSI_TASK_RUNNING,
@@ -181,9 +182,7 @@ struct iscsi_conn {
 
 	/* xmit */
 	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
-	struct list_head	mgmt_run_list;	/* list of control tasks */
-	struct list_head	xmitqueue;	/* data-path cmd queue */
-	struct list_head	run_list;	/* list of cmds in progress */
+	struct list_head	cmdqueue;	/* data-path cmd queue */
 	struct list_head	requeue;	/* tasks needing another run */
 	struct work_struct	xmitwork;	/* per-conn. xmit workqueue */
 	unsigned long		suspend_tx;	/* suspend Tx */
-- 
cgit v0.10.2


From 301e0f7e4d78e956c58b66888e134dbdb44ea28e Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:47 -0500
Subject: [SCSI] libiscsi: don't let io sit in queue when session has failed

If the session is failed, but we have not yet fully transitioned
to the recovery stage we were still queueuing IO. The idea is
that for some failures we can recvover at the command level
and still continue to execute other IO. Well, we never have
added the recovery within a command code, so queueing up IO here
just creates the possibility that it might time time out so
this just has us requeue the IO the scsi layer for now.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index a9d7e52..57eb3af 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1390,13 +1390,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		goto fault;
 	}
 
-	/*
-	 * ISCSI_STATE_FAILED is a temp. state. The recovery
-	 * code will decide what is best to do with command queued
-	 * during this time
-	 */
-	if (session->state != ISCSI_STATE_LOGGED_IN &&
-	    session->state != ISCSI_STATE_FAILED) {
+	if (session->state != ISCSI_STATE_LOGGED_IN) {
 		/*
 		 * to handle the race between when we set the recovery state
 		 * and block the session we requeue here (commands could
@@ -1404,12 +1398,15 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		 * up because the block code is not locked)
 		 */
 		switch (session->state) {
+		case ISCSI_STATE_FAILED:
 		case ISCSI_STATE_IN_RECOVERY:
 			reason = FAILURE_SESSION_IN_RECOVERY;
-			goto reject;
+			sc->result = DID_IMM_RETRY << 16;
+			break;
 		case ISCSI_STATE_LOGGING_OUT:
 			reason = FAILURE_SESSION_LOGGING_OUT;
-			goto reject;
+			sc->result = DID_IMM_RETRY << 16;
+			break;
 		case ISCSI_STATE_RECOVERY_FAILED:
 			reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
 			sc->result = DID_TRANSPORT_FAILFAST << 16;
-- 
cgit v0.10.2


From 1336aed10b8af791378b017f0fa8da4e5b827b8d Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:48 -0500
Subject: [SCSI] libiscsi: check if iscsi host has work queue before queueing
 work

Instead of having libiscsi check if the offload bit is set, have
it check if the lld created a work queue. I think this is more
clear.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 57eb3af..dafa054 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -81,7 +81,8 @@ inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
 	struct Scsi_Host *shost = conn->session->host;
 	struct iscsi_host *ihost = shost_priv(shost);
 
-	queue_work(ihost->workq, &conn->xmitwork);
+	if (ihost->workq)
+		queue_work(ihost->workq, &conn->xmitwork);
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_queue_work);
 
@@ -110,10 +111,8 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 		 * xmit thread
 		 */
 		if (!list_empty(&session->leadconn->cmdqueue) ||
-		    !list_empty(&session->leadconn->mgmtqueue)) {
-			if (!(session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-				iscsi_conn_queue_work(session->leadconn);
-		}
+		    !list_empty(&session->leadconn->mgmtqueue))
+			iscsi_conn_queue_work(session->leadconn);
 	}
 }
 EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
@@ -555,6 +554,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		      char *data, uint32_t data_size)
 {
 	struct iscsi_session *session = conn->session;
+	struct iscsi_host *ihost = shost_priv(session->host);
 	struct iscsi_task *task;
 	itt_t itt;
 
@@ -618,7 +618,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 						   task->conn->session->age);
 	}
 
-	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
+	if (!ihost->workq) {
 		if (iscsi_prep_mgmt_task(conn, task))
 			goto free_task;
 
@@ -1368,6 +1368,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
 	struct iscsi_cls_session *cls_session;
 	struct Scsi_Host *host;
+	struct iscsi_host *ihost;
 	int reason = 0;
 	struct iscsi_session *session;
 	struct iscsi_conn *conn;
@@ -1378,6 +1379,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	sc->SCp.ptr = NULL;
 
 	host = sc->device->host;
+	ihost = shost_priv(host);
 	spin_unlock(host->host_lock);
 
 	cls_session = starget_to_session(scsi_target(sc->device));
@@ -1440,7 +1442,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		goto reject;
 	}
 
-	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
+	if (!ihost->workq) {
 		reason = iscsi_prep_scsi_cmd_pdu(task);
 		if (reason) {
 			if (reason == -ENOMEM) {
@@ -1673,7 +1675,7 @@ void iscsi_suspend_tx(struct iscsi_conn *conn)
 	struct iscsi_host *ihost = shost_priv(shost);
 
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-	if (!(conn->session->tt->caps & CAP_DATA_PATH_OFFLOAD))
+	if (ihost->workq)
 		flush_workqueue(ihost->workq);
 }
 EXPORT_SYMBOL_GPL(iscsi_suspend_tx);
@@ -1681,8 +1683,7 @@ EXPORT_SYMBOL_GPL(iscsi_suspend_tx);
 static void iscsi_start_tx(struct iscsi_conn *conn)
 {
 	clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-	if (!(conn->session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-		iscsi_conn_queue_work(conn);
+	iscsi_conn_queue_work(conn);
 }
 
 /*
-- 
cgit v0.10.2


From b3cd5050bf8eb32ceecee129cac7c59e6f1668c4 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:49 -0500
Subject: [SCSI] libiscsi: add task aborted state

If a task did not complete normally due to a TMF, libiscsi will
now complete the task with the state ISCSI_TASK_ABRT_TMF. Drivers
like bnx2i that need to free resources if a command did not complete normally
can then check the task state. If a driver does not need to send
a special command if we have dropped the session then they can check
for ISCSI_TASK_ABRT_SESS_RECOV.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index ffbe0c7..0ba6ec8 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -257,11 +257,8 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
 
-	/*
-	 * mgmt tasks do not need special cleanup and we do not
-	 * allocate anything in the init task callout
-	 */
-	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+	/* mgmt tasks do not need special cleanup */
+	if (!task->sc)
 		return;
 
 	if (iser_task->status == ISER_TASK_STATUS_STARTED) {
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index dafa054..b00be6c 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -443,18 +443,20 @@ EXPORT_SYMBOL_GPL(iscsi_put_task);
 /**
  * iscsi_complete_task - finish a task
  * @task: iscsi cmd task
+ * @state: state to complete task with
  *
  * Must be called with session lock.
  */
-static void iscsi_complete_task(struct iscsi_task *task)
+static void iscsi_complete_task(struct iscsi_task *task, int state)
 {
 	struct iscsi_conn *conn = task->conn;
 
-	if (task->state == ISCSI_TASK_COMPLETED)
+	if (task->state == ISCSI_TASK_COMPLETED ||
+	    task->state == ISCSI_TASK_ABRT_TMF ||
+	    task->state == ISCSI_TASK_ABRT_SESS_RECOV)
 		return;
 	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
-
-	task->state = ISCSI_TASK_COMPLETED;
+	task->state = state;
 
 	if (!list_empty(&task->running))
 		list_del_init(&task->running);
@@ -478,6 +480,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 {
 	struct iscsi_conn *conn = task->conn;
 	struct scsi_cmnd *sc;
+	int state;
 
 	/*
 	 * if a command completes and we get a successful tmf response
@@ -488,14 +491,20 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 	if (!sc)
 		return;
 
-	if (task->state == ISCSI_TASK_PENDING)
+	if (task->state == ISCSI_TASK_PENDING) {
 		/*
 		 * cmd never made it to the xmit thread, so we should not count
 		 * the cmd in the sequencing
 		 */
 		conn->session->queued_cmdsn--;
+		/* it was never sent so just complete like normal */
+		state = ISCSI_TASK_COMPLETED;
+	} else if (err == DID_TRANSPORT_DISRUPTED)
+		state = ISCSI_TASK_ABRT_SESS_RECOV;
+	else
+		state = ISCSI_TASK_ABRT_TMF;
 
-	sc->result = err;
+	sc->result = err << 16;
 	if (!scsi_bidi_cmnd(sc))
 		scsi_set_resid(sc, scsi_bufflen(sc));
 	else {
@@ -503,7 +512,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 		scsi_in(sc)->resid = scsi_in(sc)->length;
 	}
 
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, state);
 }
 
 static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
@@ -731,7 +740,7 @@ out:
 	ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 /**
@@ -769,7 +778,7 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			  "[sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -990,7 +999,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		}
 
 		iscsi_tmf_rsp(conn, hdr);
-		iscsi_complete_task(task);
+		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 		break;
 	case ISCSI_OP_NOOP_IN:
 		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -1008,7 +1017,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			goto recv_pdu;
 
 		mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
-		iscsi_complete_task(task);
+		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 		break;
 	default:
 		rc = ISCSI_ERR_BAD_OPCODE;
@@ -1020,7 +1029,7 @@ out:
 recv_pdu:
 	if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
 		rc = ISCSI_ERR_CONN_FAILED;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
@@ -1262,7 +1271,7 @@ check_mgmt:
 					 struct iscsi_task, running);
 		list_del_init(&conn->task->running);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-			fail_scsi_task(conn->task, DID_IMM_RETRY << 16);
+			fail_scsi_task(conn->task, DID_IMM_RETRY);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
@@ -1273,7 +1282,7 @@ check_mgmt:
 				conn->task = NULL;
 				goto again;
 			} else
-				fail_scsi_task(conn->task, DID_ABORT << 16);
+				fail_scsi_task(conn->task, DID_ABORT);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1469,7 +1478,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 prepd_reject:
 	sc->scsi_done = NULL;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 reject:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
@@ -1479,7 +1488,7 @@ reject:
 
 prepd_fault:
 	sc->scsi_done = NULL;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 fault:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
@@ -1665,7 +1674,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
 		ISCSI_DBG_SESSION(conn->session,
 				  "failing sc %p itt 0x%x state %d\n",
 				  task->sc, task->itt, task->state);
-		fail_scsi_task(task, error << 16);
+		fail_scsi_task(task, error);
 	}
 }
 
@@ -1868,7 +1877,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	}
 
 	if (task->state == ISCSI_TASK_PENDING) {
-		fail_scsi_task(task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT);
 		goto success;
 	}
 
@@ -1899,7 +1908,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 		 * then sent more data for the cmd.
 		 */
 		spin_lock(&session->lock);
-		fail_scsi_task(task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT);
 		conn->tmf_state = TMF_INITIAL;
 		spin_unlock(&session->lock);
 		iscsi_start_tx(conn);
@@ -2572,7 +2581,7 @@ static void
 fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 {
 	struct iscsi_task *task;
-	int i;
+	int i, state;
 
 	for (i = 0; i < conn->session->cmds_max; i++) {
 		task = conn->session->cmds[i];
@@ -2585,7 +2594,11 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 		ISCSI_DBG_SESSION(conn->session,
 				  "failing mgmt itt 0x%x state %d\n",
 				  task->itt, task->state);
-		iscsi_complete_task(task);
+		state = ISCSI_TASK_ABRT_SESS_RECOV;
+		if (task->state == ISCSI_TASK_PENDING)
+			state = ISCSI_TASK_COMPLETED;
+		iscsi_complete_task(task, state);
+
 	}
 }
 
@@ -2642,10 +2655,7 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 	 * flush queues.
 	 */
 	spin_lock_bh(&session->lock);
-	if (flag == STOP_CONN_RECOVER)
-		fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
-	else
-		fail_scsi_tasks(conn, -1, DID_ERROR);
+	fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
 	fail_mgmt_tasks(session, conn);
 	spin_unlock_bh(&session->lock);
 	mutex_unlock(&session->eh_mutex);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index b84a1d8..2bc0709 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -440,8 +440,8 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct iscsi_r2t_info *r2t;
 
-	/* nothing to do for mgmt or pending tasks */
-	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+	/* nothing to do for mgmt */
+	if (!task->sc)
 		return;
 
 	/* flush task's r2t queues */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index facae71..196525c 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -86,6 +86,8 @@ enum {
 	ISCSI_TASK_COMPLETED,
 	ISCSI_TASK_PENDING,
 	ISCSI_TASK_RUNNING,
+	ISCSI_TASK_ABRT_TMF,		/* aborted due to TMF */
+	ISCSI_TASK_ABRT_SESS_RECOV,	/* aborted due to session recovery */
 };
 
 struct iscsi_r2t_info {
-- 
cgit v0.10.2


From 4421c9ebeeacf3d9c4e6aa558e1a777178e71add Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:50 -0500
Subject: [SCSI] libiscsi: add debug printks for iscsi command completion path

This patch just adds some debug statements for the abort
and completion paths.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b00be6c..59908ae 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -393,10 +393,12 @@ static void iscsi_free_task(struct iscsi_task *task)
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 
+	ISCSI_DBG_SESSION(session, "freeing task itt 0x%x state %d sc %p\n",
+			  task->itt, task->state, task->sc);
+
 	session->tt->cleanup_task(task);
 	task->state = ISCSI_TASK_FREE;
 	task->sc = NULL;
-
 	/*
 	 * login task is preallocated so do not free
 	 */
@@ -451,6 +453,9 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
 {
 	struct iscsi_conn *conn = task->conn;
 
+	ISCSI_DBG_SESSION(conn->session,
+			  "complete task itt 0x%x state %d sc %p\n",
+			  task->itt, task->state, task->sc);
 	if (task->state == ISCSI_TASK_COMPLETED ||
 	    task->state == ISCSI_TASK_ABRT_TMF ||
 	    task->state == ISCSI_TASK_ABRT_SESS_RECOV)
@@ -1836,6 +1841,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	cls_session = starget_to_session(scsi_target(sc->device));
 	session = cls_session->dd_data;
 
+	ISCSI_DBG_SESSION(session, "aborting sc %p\n", sc);
+
 	mutex_lock(&session->eh_mutex);
 	spin_lock_bh(&session->lock);
 	/*
@@ -1858,6 +1865,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	    sc->SCp.phase != session->age) {
 		spin_unlock_bh(&session->lock);
 		mutex_unlock(&session->eh_mutex);
+		ISCSI_DBG_SESSION(session, "failing abort due to dropped "
+				  "session.\n");
 		return FAILED;
 	}
 
-- 
cgit v0.10.2


From f9da3be5afc08c40e7f7a395c8935d500a6898b1 Mon Sep 17 00:00:00 2001
From: Andy Yan <ayan@marvell.com>
Date: Thu, 14 May 2009 20:41:21 -0400
Subject: [SCSI] mvsas: remove all the casts from void * or to void *

Signed-off-by: Ying Chu <jasonchu@marvell.com>
Signed-off-by: Andy Yan <ayan@marvell.com>
Signed-off-by: Ke Wei <kewei@marvell.com
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index c05e4c0..0d21386 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -28,7 +28,7 @@ static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
 {
 	if (task->lldd_task) {
 		struct mvs_slot_info *slot;
-		slot = (struct mvs_slot_info *) task->lldd_task;
+		slot = task->lldd_task;
 		*tag = slot->slot_tag;
 		return 1;
 	}
@@ -37,7 +37,7 @@ static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
 
 void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
 {
-	void *bitmap = (void *) &mvi->tags;
+	void *bitmap = &mvi->tags;
 	clear_bit(tag, bitmap);
 }
 
@@ -48,14 +48,14 @@ void mvs_tag_free(struct mvs_info *mvi, u32 tag)
 
 void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
 {
-	void *bitmap = (void *) &mvi->tags;
+	void *bitmap = &mvi->tags;
 	set_bit(tag, bitmap);
 }
 
 inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
 {
 	unsigned int index, tag;
-	void *bitmap = (void *) &mvi->tags;
+	void *bitmap = &mvi->tags;
 
 	index = find_first_zero_bit(bitmap, mvi->tags_num);
 	tag = index;
@@ -591,8 +591,7 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 {
 	struct sas_task *task = tei->task;
 	struct domain_device *dev = task->dev;
-	struct mvs_device *mvi_dev =
-		(struct mvs_device *)dev->lldd_dev;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
 	struct mvs_cmd_hdr *hdr = tei->hdr;
 	struct asd_sas_port *sas_port = dev->port;
 	struct mvs_slot_info *slot;
@@ -733,8 +732,7 @@ static int mvs_task_prep_ssp(struct mvs_info *mvi,
 	struct mvs_cmd_hdr *hdr = tei->hdr;
 	struct mvs_port *port = tei->port;
 	struct domain_device *dev = task->dev;
-	struct mvs_device *mvi_dev =
-		(struct mvs_device *)dev->lldd_dev;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
 	struct asd_sas_port *sas_port = dev->port;
 	struct mvs_slot_info *slot;
 	void *buf_prd;
@@ -894,7 +892,7 @@ static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
 	spin_lock_irqsave(&mvi->lock, flags);
 	do {
 		dev = t->dev;
-		mvi_dev = (struct mvs_device *)dev->lldd_dev;
+		mvi_dev = dev->lldd_dev;
 		if (DEV_IS_GONE(mvi_dev)) {
 			if (mvi_dev)
 				mv_dprintk("device %d not ready.\n",
@@ -987,7 +985,7 @@ static int mvs_task_exec(struct sas_task *task, const int num, gfp_t gfp_flags,
 		}
 		slot->task = t;
 		slot->port = tei.port;
-		t->lldd_task = (void *) slot;
+		t->lldd_task = slot;
 		list_add_tail(&slot->entry, &tei.port->list);
 		/* TODO: select normal or high priority */
 		spin_lock(&t->task_state_lock);
@@ -1139,7 +1137,7 @@ static void *mvs_get_d2h_reg(struct mvs_info *mvi, int i, void *buf)
 	if (((s[1] & 0x00FFFFFF) == 0x00EB1401) && (*(u8 *)&s[3] == 0x01))
 		s[1] = 0x00EB1401 | (*((u8 *)&s[1] + 3) & 0x10);
 
-	return (void *)s;
+	return s;
 }
 
 static u32 mvs_is_sig_fis_received(u32 irq_status)
@@ -1176,7 +1174,7 @@ void mvs_update_phyinfo(struct mvs_info *mvi, int i, int get_st)
 					sas_phy->oob_mode = SATA_OOB_MODE;
 				phy->frame_rcvd_size =
 				    sizeof(struct dev_to_host_fis);
-				mvs_get_d2h_reg(mvi, i, (void *)id);
+				mvs_get_d2h_reg(mvi, i, id);
 			} else {
 				u32 tmp;
 				dev_printk(KERN_DEBUG, mvi->dev,
@@ -1317,7 +1315,7 @@ int mvs_dev_found_notify(struct domain_device *dev, int lock)
 		res = -1;
 		goto found_out;
 	}
-	dev->lldd_dev = (void *)mvi_device;
+	dev->lldd_dev = mvi_device;
 	mvi_device->dev_type = dev->dev_type;
 	mvi_device->mvi_info = mvi;
 	if (parent_dev && DEV_IS_EXPANDER(parent_dev->dev_type)) {
@@ -1356,7 +1354,7 @@ int mvs_dev_found(struct domain_device *dev)
 void mvs_dev_gone_notify(struct domain_device *dev, int lock)
 {
 	unsigned long flags = 0;
-	struct mvs_device *mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_device *mvi_dev = dev->lldd_dev;
 	struct mvs_info *mvi = mvi_dev->mvi_info;
 
 	if (lock)
@@ -1532,7 +1530,7 @@ int mvs_lu_reset(struct domain_device *dev, u8 *lun)
 	unsigned long flags;
 	int i, phyno[WIDE_PORT_MAX_PHY], num , rc = TMF_RESP_FUNC_FAILED;
 	struct mvs_tmf_task tmf_task;
-	struct mvs_device * mvi_dev = (struct mvs_device *)dev->lldd_dev;
+	struct mvs_device * mvi_dev = dev->lldd_dev;
 	struct mvs_info *mvi = mvi_dev->mvi_info;
 
 	tmf_task.tmf = TMF_LU_RESET;
@@ -1654,7 +1652,7 @@ int mvs_abort_task(struct sas_task *task)
 			struct mvs_slot_info *slot;
 
 			if (task->lldd_task) {
-				slot = (struct mvs_slot_info *)task->lldd_task;
+				slot = task->lldd_task;
 				slot_no = (u32) (slot - mvi->slot_info);
 				mvs_slot_complete(mvi, slot_no, 1);
 			}
@@ -1708,7 +1706,7 @@ int mvs_clear_task_set(struct domain_device *dev, u8 *lun)
 static int mvs_sata_done(struct mvs_info *mvi, struct sas_task *task,
 			u32 slot_idx, int err)
 {
-	struct mvs_device *mvi_dev = (struct mvs_device *)task->dev->lldd_dev;
+	struct mvs_device *mvi_dev = task->dev->lldd_dev;
 	struct task_status_struct *tstat = &task->task_status;
 	struct ata_task_resp *resp = (struct ata_task_resp *)tstat->buf;
 	int stat = SAM_GOOD;
@@ -1785,7 +1783,7 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 		return -1;
 
 	tstat = &task->task_status;
-	mvi_dev = (struct mvs_device *)task->dev->lldd_dev;
+	mvi_dev = task->dev->lldd_dev;
 
 	mvs_hba_cq_dump(mvi);
 
-- 
cgit v0.10.2


From f0216ae9bd0ea8a45736f386a3b8058aefc64dd8 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <christof.schmitt@de.ibm.com>
Date: Fri, 15 May 2009 13:18:14 +0200
Subject: [SCSI] zfcp: Use correct req_id for traces

The zfcp traces used the fsf_req address in place of the req_id.
Change this to save the correct req_id.

Reviewed-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index 0a1a5dd..b99b87c 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -163,7 +163,7 @@ void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *fsf_req)
 	}
 
 	response->fsf_command = fsf_req->fsf_command;
-	response->fsf_reqid = (unsigned long)fsf_req;
+	response->fsf_reqid = fsf_req->req_id;
 	response->fsf_seqno = fsf_req->seq_no;
 	response->fsf_issued = fsf_req->issued;
 	response->fsf_prot_status = qtcb->prefix.prot_status;
@@ -737,7 +737,7 @@ void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *fsf_req)
 	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
 	memset(r, 0, sizeof(*r));
 	strncpy(r->tag, "octc", ZFCP_DBF_TAG_SIZE);
-	r->fsf_reqid = (unsigned long)fsf_req;
+	r->fsf_reqid = fsf_req->req_id;
 	r->fsf_seqno = fsf_req->seq_no;
 	r->s_id = fc_host_port_id(adapter->scsi_host);
 	r->d_id = wka_port->d_id;
@@ -773,7 +773,7 @@ void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *fsf_req)
 	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
 	memset(r, 0, sizeof(*r));
 	strncpy(r->tag, "rctc", ZFCP_DBF_TAG_SIZE);
-	r->fsf_reqid = (unsigned long)fsf_req;
+	r->fsf_reqid = fsf_req->req_id;
 	r->fsf_seqno = fsf_req->seq_no;
 	r->s_id = wka_port->d_id;
 	r->d_id = fc_host_port_id(adapter->scsi_host);
@@ -803,7 +803,7 @@ static void zfcp_san_dbf_event_els(const char *tag, int level,
 	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
 	memset(rec, 0, sizeof(*rec));
 	strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE);
-	rec->fsf_reqid = (unsigned long)fsf_req;
+	rec->fsf_reqid = fsf_req->req_id;
 	rec->fsf_seqno = fsf_req->seq_no;
 	rec->s_id = s_id;
 	rec->d_id = d_id;
@@ -965,7 +965,7 @@ static void zfcp_scsi_dbf_event(const char *tag, const char *tag2, int level,
 						      ZFCP_DBF_SCSI_FCP_SNS_INFO);
 				}
 
-				rec->fsf_reqid = (unsigned long)fsf_req;
+				rec->fsf_reqid = fsf_req->req_id;
 				rec->fsf_seqno = fsf_req->seq_no;
 				rec->fsf_issued = fsf_req->issued;
 			}
-- 
cgit v0.10.2


From 955a21555dc8dc4a81da7063d595cd7558f413ce Mon Sep 17 00:00:00 2001
From: Christof Schmitt <christof.schmitt@de.ibm.com>
Date: Fri, 15 May 2009 13:18:15 +0200
Subject: [SCSI] zfcp: Update message and add description

Update the newly introduced message for the boxed status to conform to
match the style of s390 and zfcp messages.

Reviewed-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index 733fe3b..4f19b28 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -176,8 +176,8 @@ static int zfcp_ccw_notify(struct ccw_device *ccw_device, int event)
 					"ccnoti4", NULL);
 		break;
 	case CIO_BOXED:
-		dev_warn(&adapter->ccw_device->dev,
-			 "The ccw device did not respond in time.\n");
+		dev_warn(&adapter->ccw_device->dev, "The FCP device "
+			 "did not respond within the specified time\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti5", NULL);
 		break;
 	}
-- 
cgit v0.10.2


From a40a1bafe7da0afe61b1c20fc50e18c07ce724f9 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <christof.schmitt@de.ibm.com>
Date: Fri, 15 May 2009 13:18:16 +0200
Subject: [SCSI] zfcp: Make queue_depth adjustable

zfcp did always set the queue_depth for SCSI devices to 32, not
allowing to change this. Introduce a kernel parameter zfcp.queue_depth
and the change_queue_depth callback to allow changing the queue_depth
when it is required.

Reviewed-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index e8fbeae..7d0da23 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -12,6 +12,10 @@
 #include "zfcp_ext.h"
 #include <asm/atomic.h>
 
+static unsigned int default_depth = 32;
+module_param_named(queue_depth, default_depth, uint, 0600);
+MODULE_PARM_DESC(queue_depth, "Default queue depth for new SCSI devices");
+
 /* Find start of Sense Information in FCP response unit*/
 char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *fcp_rsp_iu)
 {
@@ -24,6 +28,12 @@ char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *fcp_rsp_iu)
 	return fcp_sns_info_ptr;
 }
 
+static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth)
+{
+	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+	return sdev->queue_depth;
+}
+
 static void zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
 {
 	struct zfcp_unit *unit = (struct zfcp_unit *) sdpnt->hostdata;
@@ -34,7 +44,7 @@ static void zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
 static int zfcp_scsi_slave_configure(struct scsi_device *sdp)
 {
 	if (sdp->tagged_supported)
-		scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, 32);
+		scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, default_depth);
 	else
 		scsi_adjust_queue_depth(sdp, 0, 1);
 	return 0;
@@ -647,6 +657,7 @@ struct zfcp_data zfcp_data = {
 		.name			 = "zfcp",
 		.module			 = THIS_MODULE,
 		.proc_name		 = "zfcp",
+		.change_queue_depth	 = zfcp_scsi_change_queue_depth,
 		.slave_alloc		 = zfcp_scsi_slave_alloc,
 		.slave_configure	 = zfcp_scsi_slave_configure,
 		.slave_destroy		 = zfcp_scsi_slave_destroy,
-- 
cgit v0.10.2


From bc90c8632f63cd94246e3fb6f1b6d7ecba48f1a0 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <christof.schmitt@de.ibm.com>
Date: Fri, 15 May 2009 13:18:17 +0200
Subject: [SCSI] zfcp: Remove unnecessary default case and assignments

enum dma_data_direction only has the 4 values DMA_BIDIRECTIONAL,
DMA_TO_DEVICE, DMA_FROM_DEVICE and DMA_NONE. No need to have the
default case. While changing this, setup sbtype in one place to make
sparse happy.

The default value of retval is already -EIO, so remove the
additional assignment for these two cases.

Reviewed-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 74dee32..8411730 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -2314,7 +2314,7 @@ int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *unit,
 {
 	struct zfcp_fsf_req *req;
 	struct fcp_cmnd_iu *fcp_cmnd_iu;
-	unsigned int sbtype;
+	unsigned int sbtype = SBAL_FLAGS0_TYPE_READ;
 	int real_bytes, retval = -EIO;
 	struct zfcp_adapter *adapter = unit->port->adapter;
 
@@ -2356,11 +2356,9 @@ int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *unit,
 	switch (scsi_cmnd->sc_data_direction) {
 	case DMA_NONE:
 		req->qtcb->bottom.io.data_direction = FSF_DATADIR_CMND;
-		sbtype = SBAL_FLAGS0_TYPE_READ;
 		break;
 	case DMA_FROM_DEVICE:
 		req->qtcb->bottom.io.data_direction = FSF_DATADIR_READ;
-		sbtype = SBAL_FLAGS0_TYPE_READ;
 		fcp_cmnd_iu->rddata = 1;
 		break;
 	case DMA_TO_DEVICE:
@@ -2369,8 +2367,6 @@ int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *unit,
 		fcp_cmnd_iu->wddata = 1;
 		break;
 	case DMA_BIDIRECTIONAL:
-	default:
-		retval = -EIO;
 		goto failed_scsi_cmnd;
 	}
 
@@ -2394,9 +2390,7 @@ int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *unit,
 					     scsi_sglist(scsi_cmnd),
 					     FSF_MAX_SBALS_PER_REQ);
 	if (unlikely(real_bytes < 0)) {
-		if (req->sbal_number < FSF_MAX_SBALS_PER_REQ)
-			retval = -EIO;
-		else {
+		if (req->sbal_number >= FSF_MAX_SBALS_PER_REQ) {
 			dev_err(&adapter->ccw_device->dev,
 				"Oversize data package, unit 0x%016Lx "
 				"on port 0x%016Lx closed\n",
-- 
cgit v0.10.2


From dceab655d9f7d99881c2033c8ff4e1c7b444e104 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <christof.schmitt@de.ibm.com>
Date: Fri, 15 May 2009 13:18:18 +0200
Subject: [SCSI] zfcp: Add comments to switch/case fallthroughs

Add comments where there is a deliberate fall through in switch/case
statements. This makes some code checkers happy and makes it clear
that there is no missing break statement.

Reviewed-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index fdc9b43..3f64ade 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -880,6 +880,7 @@ static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act)
 				zfcp_port_put(port);
 			return ZFCP_ERP_CONTINUES;
 		}
+		/* fall through */
 	case ZFCP_ERP_STEP_NAMESERVER_LOOKUP:
 		if (!port->d_id)
 			return ZFCP_ERP_FAILED;
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 8411730..b550c24 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -526,6 +526,7 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req)
 		break;
 	case FSF_TOPO_AL:
 		fc_host_port_type(shost) = FC_PORTTYPE_NLPORT;
+		/* fall through */
 	default:
 		dev_err(&adapter->ccw_device->dev,
 			"Unknown or unsupported arbitrated loop "
@@ -897,6 +898,7 @@ static void zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *req)
 		switch (fsq->word[0]) {
 		case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE:
 			zfcp_test_link(unit->port);
+			/* fall through */
 		case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED:
 			req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 			break;
@@ -993,6 +995,7 @@ static void zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *req)
 		break;
 	case FSF_PORT_HANDLE_NOT_VALID:
 		zfcp_erp_adapter_reopen(adapter, 0, "fsscth1", req);
+		/* fall through */
 	case FSF_GENERIC_COMMAND_REJECTED:
 	case FSF_PAYLOAD_SIZE_MISMATCH:
 	case FSF_REQUEST_SIZE_TOO_LARGE:
@@ -1590,8 +1593,10 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req)
 	case FSF_MAXIMUM_NUMBER_OF_PORTS_EXCEEDED:
 		dev_warn(&req->adapter->ccw_device->dev,
 			 "Opening WKA port 0x%x failed\n", wka_port->d_id);
+		/* fall through */
 	case FSF_ADAPTER_STATUS_AVAILABLE:
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
+		/* fall through */
 	case FSF_ACCESS_DENIED:
 		wka_port->status = ZFCP_WKA_PORT_OFFLINE;
 		break;
-- 
cgit v0.10.2


From a17c5855643afa7838f542cbd0a1ed9a73968cef Mon Sep 17 00:00:00 2001
From: Martin Petermann <martin.petermann@de.ibm.com>
Date: Fri, 15 May 2009 13:18:19 +0200
Subject: [SCSI] zfcp: Increase ref counter for port open requests

In rare cases, open port request might timeout, erp calls
zfcp_port_put, port gets dequeued. Now, the late returning (or
dismissed) fsf-port-open calls the fsf_port_open_handler that tries to
reference the port data structure leading to a kernel oops.

Signed-off-by: Martin Petermann <martin.petermann@de.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index b550c24..6fae268 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -1402,7 +1402,7 @@ static void zfcp_fsf_open_port_handler(struct zfcp_fsf_req *req)
 	struct fsf_plogi *plogi;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
-		return;
+		goto out;
 
 	switch (header->fsf_status) {
 	case FSF_PORT_ALREADY_OPEN:
@@ -1464,6 +1464,9 @@ static void zfcp_fsf_open_port_handler(struct zfcp_fsf_req *req)
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	}
+
+out:
+	zfcp_port_put(port);
 }
 
 /**
@@ -1476,6 +1479,7 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
+	struct zfcp_port *port = erp_action->port;
 	int retval = -EIO;
 
 	spin_lock_bh(&adapter->req_q_lock);
@@ -1496,16 +1500,18 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
 	req->handler = zfcp_fsf_open_port_handler;
-	req->qtcb->bottom.support.d_id = erp_action->port->d_id;
-	req->data = erp_action->port;
+	req->qtcb->bottom.support.d_id = port->d_id;
+	req->data = port;
 	req->erp_action = erp_action;
 	erp_action->fsf_req = req;
+	zfcp_port_get(port);
 
 	zfcp_fsf_start_erp_timer(req);
 	retval = zfcp_fsf_req_send(req);
 	if (retval) {
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req = NULL;
+		zfcp_port_put(port);
 	}
 out:
 	spin_unlock_bh(&adapter->req_q_lock);
-- 
cgit v0.10.2


From ea460a81919f2b3410e7fb30183c0256d068d87a Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen@vnet.ibm.com>
Date: Fri, 15 May 2009 13:18:20 +0200
Subject: [SCSI] zfcp: Changed D_ID left port disabled

If the destination ID (D_ID) of a remote storage port changed, e.g.
re-plugged cable on the switch in a different switch port, the port
was never (re-)attached within Linux. This patch fixes the broken
mapping between the WWPN and the D_ID.

Signed-off-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 3f64ade..e50ea46 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -895,8 +895,13 @@ static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act)
 				act->step = ZFCP_ERP_STEP_PORT_CLOSING;
 				return ZFCP_ERP_CONTINUES;
 			}
-		/* fall through otherwise */
 		}
+		if (port->d_id && !(p_status & ZFCP_STATUS_COMMON_NOESC)) {
+			port->d_id = 0;
+			_zfcp_erp_port_reopen(port, 0, "erpsoc1", NULL);
+			return ZFCP_ERP_EXIT;
+		}
+		/* fall through otherwise */
 	}
 	return ZFCP_ERP_FAILED;
 }
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 19ae084..bb2752b 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -150,9 +150,14 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range,
 	struct zfcp_port *port;
 
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
-	list_for_each_entry(port, &fsf_req->adapter->port_list_head, list)
+	list_for_each_entry(port, &fsf_req->adapter->port_list_head, list) {
 		if ((port->d_id & range) == (elem->nport_did & range))
 			zfcp_test_link(port);
+		if (!port->d_id)
+			zfcp_erp_port_reopen(port,
+					     ZFCP_STATUS_COMMON_ERP_FAILED,
+					     "fcrscn1", NULL);
+	}
 
 	read_unlock_irqrestore(&zfcp_data.config_lock, flags);
 }
-- 
cgit v0.10.2


From 6fcf41d1d8796f41b893754324704e23971c2ea1 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <christof.schmitt@de.ibm.com>
Date: Fri, 15 May 2009 13:18:21 +0200
Subject: [SCSI] zfcp: Keep ccw device and model id in zfcp_ccw.c

Keep the information about the device and model id in zfcp_ccw. This
requires an additional helper function to check for the privileged
cfdc subchannel, but it allows the removal of the redundant defines
from the zfcp_def header file.

Reviewed-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index 4f19b28..b2fe5cd 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -11,6 +11,24 @@
 
 #include "zfcp_ext.h"
 
+#define ZFCP_MODEL_PRIV 0x4
+
+static struct ccw_device_id zfcp_ccw_device_id[] = {
+	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, 0x3) },
+	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, ZFCP_MODEL_PRIV) },
+	{},
+};
+MODULE_DEVICE_TABLE(ccw, zfcp_ccw_device_id);
+
+/**
+ * zfcp_ccw_priv_sch - check if subchannel is privileged
+ * @adapter: Adapter/Subchannel to check
+ */
+int zfcp_ccw_priv_sch(struct zfcp_adapter *adapter)
+{
+	return adapter->ccw_device->id.dev_model == ZFCP_MODEL_PRIV;
+}
+
 /**
  * zfcp_ccw_probe - probe function of zfcp driver
  * @ccw_device: pointer to belonging ccw device
@@ -199,14 +217,6 @@ static void zfcp_ccw_shutdown(struct ccw_device *cdev)
 	up(&zfcp_data.config_sema);
 }
 
-static struct ccw_device_id zfcp_ccw_device_id[] = {
-	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, 0x3) },
-	{ CCW_DEVICE_DEVTYPE(0x1731, 0x3, 0x1732, 0x4) }, /* priv. */
-	{},
-};
-
-MODULE_DEVICE_TABLE(ccw, zfcp_ccw_device_id);
-
 static struct ccw_driver zfcp_ccw_driver = {
 	.owner       = THIS_MODULE,
 	.name        = "zfcp",
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 4c362a9..2074d45 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -47,13 +47,6 @@
 
 /********************* CIO/QDIO SPECIFIC DEFINES *****************************/
 
-/* Adapter Identification Parameters */
-#define ZFCP_CONTROL_UNIT_TYPE  0x1731
-#define ZFCP_CONTROL_UNIT_MODEL 0x03
-#define ZFCP_DEVICE_TYPE        0x1732
-#define ZFCP_DEVICE_MODEL       0x03
-#define ZFCP_DEVICE_MODEL_PRIV	0x04
-
 /* DMQ bug workaround: don't use last SBALE */
 #define ZFCP_MAX_SBALES_PER_SBAL	(QDIO_MAX_ELEMENTS_PER_BUFFER - 1)
 
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 2e31b53..120a9a1 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -27,6 +27,7 @@ extern int zfcp_sg_setup_table(struct scatterlist *, int);
 
 /* zfcp_ccw.c */
 extern int zfcp_ccw_register(void);
+extern int zfcp_ccw_priv_sch(struct zfcp_adapter *);
 extern struct zfcp_adapter *zfcp_get_adapter_by_busid(char *);
 
 /* zfcp_cfdc.c */
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 6fae268..e6dae37 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -1887,7 +1887,7 @@ static void zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *req)
 
 		if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE) &&
 		    (adapter->adapter_features & FSF_FEATURE_LUN_SHARING) &&
-		    (adapter->ccw_device->id.dev_model != ZFCP_DEVICE_MODEL_PRIV)) {
+		    !zfcp_ccw_priv_sch(adapter)) {
 			exclusive = (bottom->lun_access_info &
 					FSF_UNIT_ACCESS_EXCLUSIVE);
 			readwrite = (bottom->lun_access_info &
-- 
cgit v0.10.2


From 3c621b3ee1432e7a2aca4a3b670b1d05f19ecf9c Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsi.com>
Date: Mon, 18 May 2009 12:59:41 -0600
Subject: [SCSI] mpt2sas: T10 DIF Support

This add support for type 1 and 3 DIF support per the Oracle API.

Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 36b1d10..aba81fb 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -61,6 +61,7 @@
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_transport_sas.h>
 #include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
 
 #include "mpt2sas_debug.h"
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index e3a7967..5394f61 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -2390,6 +2390,106 @@ mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase)
 }
 
 /**
+ * _scsih_setup_eedp - setup MPI request for EEDP transfer
+ * @scmd: pointer to scsi command object
+ * @mpi_request: pointer to the SCSI_IO reqest message frame
+ *
+ * Supporting protection 1 and 3.
+ *
+ * Returns nothing
+ */
+static void
+_scsih_setup_eedp(struct scsi_cmnd *scmd, Mpi2SCSIIORequest_t *mpi_request)
+{
+	u16 eedp_flags;
+	unsigned char prot_op = scsi_get_prot_op(scmd);
+	unsigned char prot_type = scsi_get_prot_type(scmd);
+
+	if (prot_type == SCSI_PROT_DIF_TYPE0 ||
+	   prot_type == SCSI_PROT_DIF_TYPE2 ||
+	   prot_op == SCSI_PROT_NORMAL)
+		return;
+
+	if (prot_op ==  SCSI_PROT_READ_STRIP)
+		eedp_flags = MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP;
+	else if (prot_op ==  SCSI_PROT_WRITE_INSERT)
+		eedp_flags = MPI2_SCSIIO_EEDPFLAGS_INSERT_OP;
+	else
+		return;
+
+	mpi_request->EEDPBlockSize = scmd->device->sector_size;
+
+	switch (prot_type) {
+	case SCSI_PROT_DIF_TYPE1:
+
+		/*
+		* enable ref/guard checking
+		* auto increment ref tag
+		*/
+		mpi_request->EEDPFlags = eedp_flags |
+		    MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG |
+		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
+		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
+		    cpu_to_be32(scsi_get_lba(scmd));
+
+		break;
+
+	case SCSI_PROT_DIF_TYPE3:
+
+		/*
+		* enable guard checking
+		*/
+		mpi_request->EEDPFlags = eedp_flags |
+		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+
+		break;
+	}
+}
+
+/**
+ * _scsih_eedp_error_handling - return sense code for EEDP errors
+ * @scmd: pointer to scsi command object
+ * @ioc_status: ioc status
+ *
+ * Returns nothing
+ */
+static void
+_scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
+{
+	u8 ascq;
+	u8 sk;
+	u8 host_byte;
+
+	switch (ioc_status) {
+	case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
+		ascq = 0x01;
+		break;
+	case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
+		ascq = 0x02;
+		break;
+	case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
+		ascq = 0x03;
+		break;
+	default:
+		ascq = 0x00;
+		break;
+	}
+
+	if (scmd->sc_data_direction == DMA_TO_DEVICE) {
+		sk = ILLEGAL_REQUEST;
+		host_byte = DID_ABORT;
+	} else {
+		sk = ABORTED_COMMAND;
+		host_byte = DID_OK;
+	}
+
+	scsi_build_sense_buffer(0, scmd->sense_buffer, sk, 0x10, ascq);
+	scmd->result = DRIVER_SENSE << 24 | (host_byte << 16) |
+	    SAM_STAT_CHECK_CONDITION;
+}
+
+/**
  * scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
  * @done: function pointer to be invoked on completion
@@ -2470,6 +2570,7 @@ scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 	}
 	mpi_request = mpt2sas_base_get_msg_frame(ioc, smid);
 	memset(mpi_request, 0, sizeof(Mpi2SCSIIORequest_t));
+	_scsih_setup_eedp(scmd, mpi_request);
 	mpi_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
 	if (sas_device_priv_data->sas_target->flags &
 	    MPT_TARGET_FLAGS_RAID_COMPONENT)
@@ -2604,6 +2705,15 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 	case MPI2_IOCSTATUS_SCSI_EXT_TERMINATED:
 		desc_ioc_state = "scsi ext terminated";
 		break;
+	case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
+		desc_ioc_state = "eedp guard error";
+		break;
+	case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
+		desc_ioc_state = "eedp ref tag error";
+		break;
+	case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
+		desc_ioc_state = "eedp app tag error";
+		break;
 	default:
 		desc_ioc_state = "unknown";
 		break;
@@ -2939,6 +3049,11 @@ scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
 			scmd->result = DID_RESET << 16;
 		break;
 
+	case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
+	case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
+	case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
+		_scsih_eedp_error_handling(scmd, ioc_status);
+		break;
 	case MPI2_IOCSTATUS_SCSI_PROTOCOL_ERROR:
 	case MPI2_IOCSTATUS_INVALID_FUNCTION:
 	case MPI2_IOCSTATUS_INVALID_SGL:
@@ -5503,6 +5618,9 @@ scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_add_shost_fail;
 	}
 
+	scsi_host_set_prot(shost, SHOST_DIF_TYPE1_PROTECTION
+	    | SHOST_DIF_TYPE3_PROTECTION);
+
 	/* event thread */
 	snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name),
 	    "fw_event%d", ioc->id);
-- 
cgit v0.10.2


From 993e0da7b767c0a7c1fd0079b16f3d28e6f25a48 Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsi.com>
Date: Mon, 18 May 2009 13:00:45 -0600
Subject: [SCSI] mpt2sas: LUN Reset Support

Adding new eh_target_reset_handler for target reset. Change the
eh_device_reset_handler so its sending
MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, instead of
MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET.  Add new function
_scsih_scsi_lookup_find_by_lun as a sanity check to insure I_T_L commands are
completed upon completing lun reset.

Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 5394f61..f458376 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -884,6 +884,41 @@ _scsih_scsi_lookup_find_by_target(struct MPT2SAS_ADAPTER *ioc, int id,
 }
 
 /**
+ * _scsih_scsi_lookup_find_by_lun - search for matching channel:id:lun
+ * @ioc: per adapter object
+ * @id: target id
+ * @lun: lun number
+ * @channel: channel
+ * Context: This function will acquire ioc->scsi_lookup_lock.
+ *
+ * This will search for a matching channel:id:lun in the scsi_lookup array,
+ * returning 1 if found.
+ */
+static u8
+_scsih_scsi_lookup_find_by_lun(struct MPT2SAS_ADAPTER *ioc, int id,
+    unsigned int lun, int channel)
+{
+	u8 found;
+	unsigned long	flags;
+	int i;
+
+	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+	found = 0;
+	for (i = 0 ; i < ioc->request_depth; i++) {
+		if (ioc->scsi_lookup[i].scmd &&
+		    (ioc->scsi_lookup[i].scmd->device->id == id &&
+		    ioc->scsi_lookup[i].scmd->device->channel == channel &&
+		    ioc->scsi_lookup[i].scmd->device->lun == lun)) {
+			found = 1;
+			goto out;
+		}
+	}
+ out:
+	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+	return found;
+}
+
+/**
  * _scsih_get_chain_buffer_dma - obtain block of chains (dma address)
  * @ioc: per adapter object
  * @smid: system request message index
@@ -1889,7 +1924,6 @@ scsih_abort(struct scsi_cmnd *scmd)
 	return r;
 }
 
-
 /**
  * scsih_dev_reset - eh threads main device reset routine
  * @sdev: scsi device struct
@@ -1906,7 +1940,7 @@ scsih_dev_reset(struct scsi_cmnd *scmd)
 	u16	handle;
 	int r;
 
-	printk(MPT2SAS_INFO_FMT "attempting target reset! scmd(%p)\n",
+	printk(MPT2SAS_INFO_FMT "attempting device reset! scmd(%p)\n",
 	    ioc->name, scmd);
 	scsi_print_command(scmd);
 
@@ -1941,6 +1975,78 @@ scsih_dev_reset(struct scsi_cmnd *scmd)
 
 	mutex_lock(&ioc->tm_cmds.mutex);
 	mpt2sas_scsih_issue_tm(ioc, handle, 0,
+	    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, scmd->device->lun,
+	    30);
+
+	/*
+	 *  sanity check see whether all commands to this device been
+	 *  completed
+	 */
+	if (_scsih_scsi_lookup_find_by_lun(ioc, scmd->device->id,
+	    scmd->device->lun, scmd->device->channel))
+		r = FAILED;
+	else
+		r = SUCCESS;
+	ioc->tm_cmds.status = MPT2_CMD_NOT_USED;
+	mutex_unlock(&ioc->tm_cmds.mutex);
+
+ out:
+	printk(MPT2SAS_INFO_FMT "device reset: %s scmd(%p)\n",
+	    ioc->name, ((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd);
+	return r;
+}
+
+/**
+ * scsih_target_reset - eh threads main target reset routine
+ * @sdev: scsi device struct
+ *
+ * Returns SUCCESS if command aborted else FAILED
+ */
+static int
+scsih_target_reset(struct scsi_cmnd *scmd)
+{
+	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
+	struct MPT2SAS_DEVICE *sas_device_priv_data;
+	struct _sas_device *sas_device;
+	unsigned long flags;
+	u16	handle;
+	int r;
+
+	printk(MPT2SAS_INFO_FMT "attempting target reset! scmd(%p)\n",
+	    ioc->name, scmd);
+	scsi_print_command(scmd);
+
+	sas_device_priv_data = scmd->device->hostdata;
+	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+		printk(MPT2SAS_INFO_FMT "target been deleted! scmd(%p)\n",
+		    ioc->name, scmd);
+		scmd->result = DID_NO_CONNECT << 16;
+		scmd->scsi_done(scmd);
+		r = SUCCESS;
+		goto out;
+	}
+
+	/* for hidden raid components obtain the volume_handle */
+	handle = 0;
+	if (sas_device_priv_data->sas_target->flags &
+	    MPT_TARGET_FLAGS_RAID_COMPONENT) {
+		spin_lock_irqsave(&ioc->sas_device_lock, flags);
+		sas_device = _scsih_sas_device_find_by_handle(ioc,
+		   sas_device_priv_data->sas_target->handle);
+		if (sas_device)
+			handle = sas_device->volume_handle;
+		spin_unlock_irqrestore(&ioc->sas_device_lock, flags);
+	} else
+		handle = sas_device_priv_data->sas_target->handle;
+
+	if (!handle) {
+		scmd->result = DID_RESET << 16;
+		r = FAILED;
+		goto out;
+	}
+
+	mutex_lock(&ioc->tm_cmds.mutex);
+	mpt2sas_scsih_issue_tm(ioc, handle, 0,
 	    MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0, 30);
 
 	/*
@@ -5255,6 +5361,7 @@ static struct scsi_host_template scsih_driver_template = {
 	.change_queue_type		= scsih_change_queue_type,
 	.eh_abort_handler		= scsih_abort,
 	.eh_device_reset_handler	= scsih_dev_reset,
+	.eh_target_reset_handler	= scsih_target_reset,
 	.eh_host_reset_handler		= scsih_host_reset,
 	.bios_param			= scsih_bios_param,
 	.can_queue			= 1,
-- 
cgit v0.10.2


From ddf59a35e98aa12255ed64c892271339504cc65c Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsi.com>
Date: Mon, 18 May 2009 13:01:29 -0600
Subject: [SCSI] mpt2sas: add query task support for MPT2COMMAND ioctl

This patch will find an active mid for a query_task request via the ioctl path.

This code is already there for task_abort, so this patch combining code using
the same fuction _ctl_set_task_mid(), previously _ctl_do_task_abort().

Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index ba6ab17..14e473d 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -473,7 +473,7 @@ _ctl_poll(struct file *filep, poll_table *wait)
 }
 
 /**
- * _ctl_do_task_abort - assign an active smid to the abort_task
+ * _ctl_set_task_mid - assign an active smid to tm request
  * @ioc: per adapter object
  * @karg - (struct mpt2_ioctl_command)
  * @tm_request - pointer to mf from user space
@@ -482,7 +482,7 @@ _ctl_poll(struct file *filep, poll_table *wait)
  * during failure, the reply frame is filled.
  */
 static int
-_ctl_do_task_abort(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
+_ctl_set_task_mid(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
     Mpi2SCSITaskManagementRequest_t *tm_request)
 {
 	u8 found = 0;
@@ -494,6 +494,14 @@ _ctl_do_task_abort(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
 	Mpi2SCSITaskManagementReply_t *tm_reply;
 	u32 sz;
 	u32 lun;
+	char *desc = NULL;
+
+	if (tm_request->TaskType == MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK)
+		desc = "abort_task";
+	else if (tm_request->TaskType == MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK)
+		desc = "query_task";
+	else
+		return 0;
 
 	lun = scsilun_to_int((struct scsi_lun *)tm_request->LUN);
 
@@ -517,13 +525,13 @@ _ctl_do_task_abort(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
 	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
 
 	if (!found) {
-		dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "ABORT_TASK: "
-		    "DevHandle(0x%04x), lun(%d), no active mid!!\n", ioc->name,
-		    tm_request->DevHandle, lun));
+		dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: "
+		    "handle(0x%04x), lun(%d), no active mid!!\n", ioc->name,
+		    desc, tm_request->DevHandle, lun));
 		tm_reply = ioc->ctl_cmds.reply;
 		tm_reply->DevHandle = tm_request->DevHandle;
 		tm_reply->Function = MPI2_FUNCTION_SCSI_TASK_MGMT;
-		tm_reply->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK;
+		tm_reply->TaskType = tm_request->TaskType;
 		tm_reply->MsgLength = sizeof(Mpi2SCSITaskManagementReply_t)/4;
 		tm_reply->VP_ID = tm_request->VP_ID;
 		tm_reply->VF_ID = tm_request->VF_ID;
@@ -535,9 +543,9 @@ _ctl_do_task_abort(struct MPT2SAS_ADAPTER *ioc, struct mpt2_ioctl_command *karg,
 		return 1;
 	}
 
-	dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "ABORT_TASK: "
-	    "DevHandle(0x%04x), lun(%d), smid(%d)\n", ioc->name,
-	    tm_request->DevHandle, lun, tm_request->TaskMID));
+	dctlprintk(ioc, printk(MPT2SAS_DEBUG_FMT "%s: "
+	    "handle(0x%04x), lun(%d), task_mid(%d)\n", ioc->name,
+	    desc, tm_request->DevHandle, lun, tm_request->TaskMID));
 	return 0;
 }
 
@@ -739,8 +747,10 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
 		    (Mpi2SCSITaskManagementRequest_t *)mpi_request;
 
 		if (tm_request->TaskType ==
-		    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK) {
-			if (_ctl_do_task_abort(ioc, &karg, tm_request)) {
+		    MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK ||
+		    tm_request->TaskType ==
+		    MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK) {
+			if (_ctl_set_task_mid(ioc, &karg, tm_request)) {
 				mpt2sas_base_free_smid(ioc, smid);
 				goto out;
 			}
-- 
cgit v0.10.2


From d5d135b3a76750df250e18956476b6b2c4ad97c2 Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsi.com>
Date: Mon, 18 May 2009 13:02:08 -0600
Subject: [SCSI] mpt2sas: using the same naming convention for all static
 function

This fix's is for all local function so their name has the "_" preceeding
the module name, then function name.  Most the code is already is using this
naming convention.

Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index f458376..2a01a5f 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -197,12 +197,12 @@ static struct pci_device_id scsih_pci_table[] = {
 MODULE_DEVICE_TABLE(pci, scsih_pci_table);
 
 /**
- * scsih_set_debug_level - global setting of ioc->logging_level.
+ * _scsih_set_debug_level - global setting of ioc->logging_level.
  *
  * Note: The logging levels are defined in mpt2sas_debug.h.
  */
 static int
-scsih_set_debug_level(const char *val, struct kernel_param *kp)
+_scsih_set_debug_level(const char *val, struct kernel_param *kp)
 {
 	int ret = param_set_int(val, kp);
 	struct MPT2SAS_ADAPTER *ioc;
@@ -215,7 +215,7 @@ scsih_set_debug_level(const char *val, struct kernel_param *kp)
 		ioc->logging_level = logging_level;
 	return 0;
 }
-module_param_call(logging_level, scsih_set_debug_level, param_get_int,
+module_param_call(logging_level, _scsih_set_debug_level, param_get_int,
     &logging_level, 0644);
 
 /**
@@ -1082,14 +1082,14 @@ _scsih_build_scatter_gather(struct MPT2SAS_ADAPTER *ioc,
 }
 
 /**
- * scsih_change_queue_depth - setting device queue depth
+ * _scsih_change_queue_depth - setting device queue depth
  * @sdev: scsi device struct
  * @qdepth: requested queue depth
  *
  * Returns queue depth.
  */
 static int
-scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
+_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	struct Scsi_Host *shost = sdev->host;
 	int max_depth;
@@ -1114,14 +1114,14 @@ scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 }
 
 /**
- * scsih_change_queue_depth - changing device queue tag type
+ * _scsih_change_queue_depth - changing device queue tag type
  * @sdev: scsi device struct
  * @tag_type: requested tag type
  *
  * Returns queue tag type.
  */
 static int
-scsih_change_queue_type(struct scsi_device *sdev, int tag_type)
+_scsih_change_queue_type(struct scsi_device *sdev, int tag_type)
 {
 	if (sdev->tagged_supported) {
 		scsi_set_tag_type(sdev, tag_type);
@@ -1136,14 +1136,14 @@ scsih_change_queue_type(struct scsi_device *sdev, int tag_type)
 }
 
 /**
- * scsih_target_alloc - target add routine
+ * _scsih_target_alloc - target add routine
  * @starget: scsi target struct
  *
  * Returns 0 if ok. Any other return is assumed to be an error and
  * the device is ignored.
  */
 static int
-scsih_target_alloc(struct scsi_target *starget)
+_scsih_target_alloc(struct scsi_target *starget)
 {
 	struct Scsi_Host *shost = dev_to_shost(&starget->dev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1198,13 +1198,13 @@ scsih_target_alloc(struct scsi_target *starget)
 }
 
 /**
- * scsih_target_destroy - target destroy routine
+ * _scsih_target_destroy - target destroy routine
  * @starget: scsi target struct
  *
  * Returns nothing.
  */
 static void
-scsih_target_destroy(struct scsi_target *starget)
+_scsih_target_destroy(struct scsi_target *starget)
 {
 	struct Scsi_Host *shost = dev_to_shost(&starget->dev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1247,14 +1247,14 @@ scsih_target_destroy(struct scsi_target *starget)
 }
 
 /**
- * scsih_slave_alloc - device add routine
+ * _scsih_slave_alloc - device add routine
  * @sdev: scsi device struct
  *
  * Returns 0 if ok. Any other return is assumed to be an error and
  * the device is ignored.
  */
 static int
-scsih_slave_alloc(struct scsi_device *sdev)
+_scsih_slave_alloc(struct scsi_device *sdev)
 {
 	struct Scsi_Host *shost;
 	struct MPT2SAS_ADAPTER *ioc;
@@ -1308,13 +1308,13 @@ scsih_slave_alloc(struct scsi_device *sdev)
 }
 
 /**
- * scsih_slave_destroy - device destroy routine
+ * _scsih_slave_destroy - device destroy routine
  * @sdev: scsi device struct
  *
  * Returns nothing.
  */
 static void
-scsih_slave_destroy(struct scsi_device *sdev)
+_scsih_slave_destroy(struct scsi_device *sdev)
 {
 	struct MPT2SAS_TARGET *sas_target_priv_data;
 	struct scsi_target *starget;
@@ -1330,13 +1330,13 @@ scsih_slave_destroy(struct scsi_device *sdev)
 }
 
 /**
- * scsih_display_sata_capabilities - sata capabilities
+ * _scsih_display_sata_capabilities - sata capabilities
  * @ioc: per adapter object
  * @sas_device: the sas_device object
  * @sdev: scsi device struct
  */
 static void
-scsih_display_sata_capabilities(struct MPT2SAS_ADAPTER *ioc,
+_scsih_display_sata_capabilities(struct MPT2SAS_ADAPTER *ioc,
     struct _sas_device *sas_device, struct scsi_device *sdev)
 {
 	Mpi2ConfigReply_t mpi_reply;
@@ -1436,14 +1436,14 @@ _scsih_get_volume_capabilities(struct MPT2SAS_ADAPTER *ioc,
 }
 
 /**
- * scsih_slave_configure - device configure routine.
+ * _scsih_slave_configure - device configure routine.
  * @sdev: scsi device struct
  *
  * Returns 0 if ok. Any other return is assumed to be an error and
  * the device is ignored.
  */
 static int
-scsih_slave_configure(struct scsi_device *sdev)
+_scsih_slave_configure(struct scsi_device *sdev)
 {
 	struct Scsi_Host *shost = sdev->host;
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1524,7 +1524,7 @@ scsih_slave_configure(struct scsi_device *sdev)
 		    r_level, raid_device->handle,
 		    (unsigned long long)raid_device->wwid,
 		    raid_device->num_pds, ds);
-		scsih_change_queue_depth(sdev, qdepth);
+		_scsih_change_queue_depth(sdev, qdepth);
 		return 0;
 	}
 
@@ -1567,10 +1567,10 @@ scsih_slave_configure(struct scsi_device *sdev)
 		    sas_device->slot);
 
 		if (!ssp_target)
-			scsih_display_sata_capabilities(ioc, sas_device, sdev);
+			_scsih_display_sata_capabilities(ioc, sas_device, sdev);
 	}
 
-	scsih_change_queue_depth(sdev, qdepth);
+	_scsih_change_queue_depth(sdev, qdepth);
 
 	if (ssp_target)
 		sas_read_port_mode_page(sdev);
@@ -1578,7 +1578,7 @@ scsih_slave_configure(struct scsi_device *sdev)
 }
 
 /**
- * scsih_bios_param - fetch head, sector, cylinder info for a disk
+ * _scsih_bios_param - fetch head, sector, cylinder info for a disk
  * @sdev: scsi device struct
  * @bdev: pointer to block device context
  * @capacity: device size (in 512 byte sectors)
@@ -1590,7 +1590,7 @@ scsih_slave_configure(struct scsi_device *sdev)
  * Return nothing.
  */
 static int
-scsih_bios_param(struct scsi_device *sdev, struct block_device *bdev,
+_scsih_bios_param(struct scsi_device *sdev, struct block_device *bdev,
     sector_t capacity, int params[])
 {
 	int		heads;
@@ -1671,7 +1671,7 @@ _scsih_response_code(struct MPT2SAS_ADAPTER *ioc, u8 response_code)
 }
 
 /**
- * scsih_tm_done - tm completion routine
+ * _scsih_tm_done - tm completion routine
  * @ioc: per adapter object
  * @smid: system request message index
  * @VF_ID: virtual function id
@@ -1683,7 +1683,7 @@ _scsih_response_code(struct MPT2SAS_ADAPTER *ioc, u8 response_code)
  * Return nothing.
  */
 static void
-scsih_tm_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
+_scsih_tm_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
 {
 	MPI2DefaultReply_t *mpi_reply;
 
@@ -1858,13 +1858,13 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint lun,
 }
 
 /**
- * scsih_abort - eh threads main abort routine
+ * _scsih_abort - eh threads main abort routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_abort(struct scsi_cmnd *scmd)
+_scsih_abort(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -1925,13 +1925,13 @@ scsih_abort(struct scsi_cmnd *scmd)
 }
 
 /**
- * scsih_dev_reset - eh threads main device reset routine
+ * _scsih_dev_reset - eh threads main device reset routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_dev_reset(struct scsi_cmnd *scmd)
+_scsih_dev_reset(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -1997,13 +1997,13 @@ scsih_dev_reset(struct scsi_cmnd *scmd)
 }
 
 /**
- * scsih_target_reset - eh threads main target reset routine
+ * _scsih_target_reset - eh threads main target reset routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_target_reset(struct scsi_cmnd *scmd)
+_scsih_target_reset(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -2068,13 +2068,13 @@ scsih_target_reset(struct scsi_cmnd *scmd)
 }
 
 /**
- * scsih_abort - eh threads main host reset routine
+ * _scsih_abort - eh threads main host reset routine
  * @sdev: scsi device struct
  *
  * Returns SUCCESS if command aborted else FAILED
  */
 static int
-scsih_host_reset(struct scsi_cmnd *scmd)
+_scsih_host_reset(struct scsi_cmnd *scmd)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	int r, retval;
@@ -2596,7 +2596,7 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
 }
 
 /**
- * scsih_qcmd - main scsi request entry point
+ * _scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
  * @done: function pointer to be invoked on completion
  *
@@ -2607,7 +2607,7 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
  * SCSI_MLQUEUE_HOST_BUSY if the entire host queue is full
  */
 static int
-scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
+_scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host);
 	struct MPT2SAS_DEVICE *sas_device_priv_data;
@@ -2999,7 +2999,7 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle)
 }
 
 /**
- * scsih_io_done - scsi request callback
+ * _scsih_io_done - scsi request callback
  * @ioc: per adapter object
  * @smid: system request message index
  * @VF_ID: virtual function id
@@ -3010,7 +3010,7 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle)
  * Return nothing.
  */
 static void
-scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
+_scsih_io_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 VF_ID, u32 reply)
 {
 	Mpi2SCSIIORequest_t *mpi_request;
 	Mpi2SCSIIOReply_t *mpi_reply;
@@ -5351,19 +5351,19 @@ static struct scsi_host_template scsih_driver_template = {
 	.module				= THIS_MODULE,
 	.name				= "Fusion MPT SAS Host",
 	.proc_name			= MPT2SAS_DRIVER_NAME,
-	.queuecommand			= scsih_qcmd,
-	.target_alloc			= scsih_target_alloc,
-	.slave_alloc			= scsih_slave_alloc,
-	.slave_configure		= scsih_slave_configure,
-	.target_destroy			= scsih_target_destroy,
-	.slave_destroy			= scsih_slave_destroy,
-	.change_queue_depth 		= scsih_change_queue_depth,
-	.change_queue_type		= scsih_change_queue_type,
-	.eh_abort_handler		= scsih_abort,
-	.eh_device_reset_handler	= scsih_dev_reset,
-	.eh_target_reset_handler	= scsih_target_reset,
-	.eh_host_reset_handler		= scsih_host_reset,
-	.bios_param			= scsih_bios_param,
+	.queuecommand			= _scsih_qcmd,
+	.target_alloc			= _scsih_target_alloc,
+	.slave_alloc			= _scsih_slave_alloc,
+	.slave_configure		= _scsih_slave_configure,
+	.target_destroy			= _scsih_target_destroy,
+	.slave_destroy			= _scsih_slave_destroy,
+	.change_queue_depth 		= _scsih_change_queue_depth,
+	.change_queue_type		= _scsih_change_queue_type,
+	.eh_abort_handler		= _scsih_abort,
+	.eh_device_reset_handler	= _scsih_dev_reset,
+	.eh_target_reset_handler	= _scsih_target_reset,
+	.eh_host_reset_handler		= _scsih_host_reset,
+	.bios_param			= _scsih_bios_param,
 	.can_queue			= 1,
 	.this_id			= -1,
 	.sg_tablesize			= MPT2SAS_SG_DEPTH,
@@ -5450,13 +5450,13 @@ _scsih_expander_node_remove(struct MPT2SAS_ADAPTER *ioc,
 }
 
 /**
- * scsih_remove - detach and remove add host
+ * _scsih_remove - detach and remove add host
  * @pdev: PCI device struct
  *
  * Return nothing.
  */
 static void __devexit
-scsih_remove(struct pci_dev *pdev)
+_scsih_remove(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -5664,14 +5664,14 @@ _scsih_probe_devices(struct MPT2SAS_ADAPTER *ioc)
 }
 
 /**
- * scsih_probe - attach and add scsi host
+ * _scsih_probe - attach and add scsi host
  * @pdev: PCI device struct
  * @id: pci device id
  *
  * Returns 0 success, anything else error.
  */
 static int
-scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+_scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct MPT2SAS_ADAPTER *ioc;
 	struct Scsi_Host *shost;
@@ -5761,14 +5761,14 @@ scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 #ifdef CONFIG_PM
 /**
- * scsih_suspend - power management suspend main entry point
+ * _scsih_suspend - power management suspend main entry point
  * @pdev: PCI device struct
  * @state: PM state change to (usually PCI_D3)
  *
  * Returns 0 success, anything else error.
  */
 static int
-scsih_suspend(struct pci_dev *pdev, pm_message_t state)
+_scsih_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -5789,13 +5789,13 @@ scsih_suspend(struct pci_dev *pdev, pm_message_t state)
 }
 
 /**
- * scsih_resume - power management resume main entry point
+ * _scsih_resume - power management resume main entry point
  * @pdev: PCI device struct
  *
  * Returns 0 success, anything else error.
  */
 static int
-scsih_resume(struct pci_dev *pdev)
+_scsih_resume(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -5824,22 +5824,22 @@ scsih_resume(struct pci_dev *pdev)
 static struct pci_driver scsih_driver = {
 	.name		= MPT2SAS_DRIVER_NAME,
 	.id_table	= scsih_pci_table,
-	.probe		= scsih_probe,
-	.remove		= __devexit_p(scsih_remove),
+	.probe		= _scsih_probe,
+	.remove		= __devexit_p(_scsih_remove),
 #ifdef CONFIG_PM
-	.suspend	= scsih_suspend,
-	.resume		= scsih_resume,
+	.suspend	= _scsih_suspend,
+	.resume		= _scsih_resume,
 #endif
 };
 
 
 /**
- * scsih_init - main entry point for this driver.
+ * _scsih_init - main entry point for this driver.
  *
  * Returns 0 success, anything else error.
  */
 static int __init
-scsih_init(void)
+_scsih_init(void)
 {
 	int error;
 
@@ -5855,10 +5855,10 @@ scsih_init(void)
 	mpt2sas_base_initialize_callback_handler();
 
 	 /* queuecommand callback hander */
-	scsi_io_cb_idx = mpt2sas_base_register_callback_handler(scsih_io_done);
+	scsi_io_cb_idx = mpt2sas_base_register_callback_handler(_scsih_io_done);
 
 	/* task managment callback handler */
-	tm_cb_idx = mpt2sas_base_register_callback_handler(scsih_tm_done);
+	tm_cb_idx = mpt2sas_base_register_callback_handler(_scsih_tm_done);
 
 	/* base internal commands callback handler */
 	base_cb_idx = mpt2sas_base_register_callback_handler(mpt2sas_base_done);
@@ -5884,12 +5884,12 @@ scsih_init(void)
 }
 
 /**
- * scsih_exit - exit point for this driver (when it is a module).
+ * _scsih_exit - exit point for this driver (when it is a module).
  *
  * Returns 0 success, anything else error.
  */
 static void __exit
-scsih_exit(void)
+_scsih_exit(void)
 {
 	printk(KERN_INFO "mpt2sas version %s unloading\n",
 	    MPT2SAS_DRIVER_VERSION);
@@ -5907,5 +5907,5 @@ scsih_exit(void)
 	mpt2sas_ctl_exit();
 }
 
-module_init(scsih_init);
-module_exit(scsih_exit);
+module_init(_scsih_init);
+module_exit(_scsih_exit);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index e03dc0b..e4d8586 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -264,7 +264,7 @@ struct rep_manu_reply{
 };
 
 /**
- * transport_expander_report_manufacture - obtain SMP report_manufacture
+ * _transport_expander_report_manufacture - obtain SMP report_manufacture
  * @ioc: per adapter object
  * @sas_address: expander sas address
  * @edev: the sas_expander_device object
@@ -274,7 +274,7 @@ struct rep_manu_reply{
  * Returns 0 for success, non-zero for failure.
  */
 static int
-transport_expander_report_manufacture(struct MPT2SAS_ADAPTER *ioc,
+_transport_expander_report_manufacture(struct MPT2SAS_ADAPTER *ioc,
     u64 sas_address, struct sas_expander_device *edev)
 {
 	Mpi2SmpPassthroughRequest_t *mpi_request;
@@ -578,7 +578,7 @@ mpt2sas_transport_port_add(struct MPT2SAS_ADAPTER *ioc, u16 handle,
 	    MPI2_SAS_DEVICE_INFO_EDGE_EXPANDER ||
 	    mpt2sas_port->remote_identify.device_type ==
 	    MPI2_SAS_DEVICE_INFO_FANOUT_EXPANDER)
-		transport_expander_report_manufacture(ioc,
+		_transport_expander_report_manufacture(ioc,
 		    mpt2sas_port->remote_identify.sas_address,
 		    rphy_to_expander_device(rphy));
 
@@ -852,7 +852,7 @@ rphy_to_ioc(struct sas_rphy *rphy)
 }
 
 /**
- * transport_get_linkerrors -
+ * _transport_get_linkerrors -
  * @phy: The sas phy object
  *
  * Only support sas_host direct attached phys.
@@ -860,7 +860,7 @@ rphy_to_ioc(struct sas_rphy *rphy)
  *
  */
 static int
-transport_get_linkerrors(struct sas_phy *phy)
+_transport_get_linkerrors(struct sas_phy *phy)
 {
 	struct MPT2SAS_ADAPTER *ioc = phy_to_ioc(phy);
 	struct _sas_phy *mpt2sas_phy;
@@ -903,14 +903,14 @@ transport_get_linkerrors(struct sas_phy *phy)
 }
 
 /**
- * transport_get_enclosure_identifier -
+ * _transport_get_enclosure_identifier -
  * @phy: The sas phy object
  *
  * Obtain the enclosure logical id for an expander.
  * Returns 0 for success, non-zero for failure.
  */
 static int
-transport_get_enclosure_identifier(struct sas_rphy *rphy, u64 *identifier)
+_transport_get_enclosure_identifier(struct sas_rphy *rphy, u64 *identifier)
 {
 	struct MPT2SAS_ADAPTER *ioc = rphy_to_ioc(rphy);
 	struct _sas_node *sas_expander;
@@ -929,13 +929,13 @@ transport_get_enclosure_identifier(struct sas_rphy *rphy, u64 *identifier)
 }
 
 /**
- * transport_get_bay_identifier -
+ * _transport_get_bay_identifier -
  * @phy: The sas phy object
  *
  * Returns the slot id for a device that resides inside an enclosure.
  */
 static int
-transport_get_bay_identifier(struct sas_rphy *rphy)
+_transport_get_bay_identifier(struct sas_rphy *rphy)
 {
 	struct MPT2SAS_ADAPTER *ioc = rphy_to_ioc(rphy);
 	struct _sas_device *sas_device;
@@ -953,7 +953,7 @@ transport_get_bay_identifier(struct sas_rphy *rphy)
 }
 
 /**
- * transport_phy_reset -
+ * _transport_phy_reset -
  * @phy: The sas phy object
  * @hard_reset:
  *
@@ -961,7 +961,7 @@ transport_get_bay_identifier(struct sas_rphy *rphy)
  * Returns 0 for success, non-zero for failure.
  */
 static int
-transport_phy_reset(struct sas_phy *phy, int hard_reset)
+_transport_phy_reset(struct sas_phy *phy, int hard_reset)
 {
 	struct MPT2SAS_ADAPTER *ioc = phy_to_ioc(phy);
 	struct _sas_phy *mpt2sas_phy;
@@ -1002,7 +1002,7 @@ transport_phy_reset(struct sas_phy *phy, int hard_reset)
 }
 
 /**
- * transport_smp_handler - transport portal for smp passthru
+ * _transport_smp_handler - transport portal for smp passthru
  * @shost: shost object
  * @rphy: sas transport rphy object
  * @req:
@@ -1012,7 +1012,7 @@ transport_phy_reset(struct sas_phy *phy, int hard_reset)
  *           smp_rep_general /sys/class/bsg/expander-5:0
  */
 static int
-transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
+_transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
     struct request *req)
 {
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
@@ -1201,11 +1201,11 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 }
 
 struct sas_function_template mpt2sas_transport_functions = {
-	.get_linkerrors		= transport_get_linkerrors,
-	.get_enclosure_identifier = transport_get_enclosure_identifier,
-	.get_bay_identifier	= transport_get_bay_identifier,
-	.phy_reset		= transport_phy_reset,
-	.smp_handler		= transport_smp_handler,
+	.get_linkerrors		= _transport_get_linkerrors,
+	.get_enclosure_identifier = _transport_get_enclosure_identifier,
+	.get_bay_identifier	= _transport_get_bay_identifier,
+	.phy_reset		= _transport_phy_reset,
+	.smp_handler		= _transport_smp_handler,
 };
 
 struct scsi_transport_template *mpt2sas_transport_template;
-- 
cgit v0.10.2


From d17bf602fbfe4a5ab134a90f6c082fa161d398ee Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsi.com>
Date: Mon, 18 May 2009 13:02:49 -0600
Subject: [SCSI] mpt2sas: bump driver version to 01.100.03.00

Bump driver version.

Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index aba81fb..286c185 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -69,10 +69,10 @@
 #define MPT2SAS_DRIVER_NAME		"mpt2sas"
 #define MPT2SAS_AUTHOR	"LSI Corporation <DL-MPTFusionLinux@lsi.com>"
 #define MPT2SAS_DESCRIPTION	"LSI MPT Fusion SAS 2.0 Device Driver"
-#define MPT2SAS_DRIVER_VERSION		"01.100.02.00"
+#define MPT2SAS_DRIVER_VERSION		"01.100.03.00"
 #define MPT2SAS_MAJOR_VERSION		01
 #define MPT2SAS_MINOR_VERSION		100
-#define MPT2SAS_BUILD_VERSION		02
+#define MPT2SAS_BUILD_VERSION		03
 #define MPT2SAS_RELEASE_VERSION		00
 
 /*
-- 
cgit v0.10.2


From a366695592ebc9151dd5a248681270f0925d8324 Mon Sep 17 00:00:00 2001
From: Abhijeet Joglekar <abjoglek@cisco.com>
Date: Fri, 1 May 2009 10:01:26 -0700
Subject: [SCSI] libfc,fcoe,fnic: Separate rport and lport max retry counts

This allows fnic to configure number of retries for lport and rport
separately.

Signed-off-by: Abhijeet Joglekar <abjoglek@cisco.com>
Acked-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index d08121f..6acb777 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -146,6 +146,7 @@ static int fcoe_lport_config(struct fc_lport *lp)
 	lp->link_up = 0;
 	lp->qfull = 0;
 	lp->max_retry_count = 3;
+	lp->max_rport_retry_count = 3;
 	lp->e_d_tov = 2 * 1000;	/* FC-FS default */
 	lp->r_a_tov = 2 * 2 * 1000;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 32ef6b8..a840728 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -680,6 +680,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	}
 
 	lp->max_retry_count = fnic->config.flogi_retries;
+	lp->max_rport_retry_count = fnic->config.plogi_retries;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
 			      FCP_SPPF_CONF_COMPL);
 	if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR)
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 3f5094e..7bfbff7 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -478,7 +478,7 @@ static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp)
 	if (PTR_ERR(fp) == -FC_EX_CLOSED)
 		return fc_rport_error(rport, fp);
 
-	if (rdata->retries < rdata->local_port->max_retry_count) {
+	if (rdata->retries < rdata->local_port->max_rport_retry_count) {
 		FC_DEBUG_RPORT("Error %ld in state %s, retrying\n",
 			       PTR_ERR(fp), fc_rport_state(rport));
 		rdata->retries++;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 45f9cc6..ebdd9f4 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -679,6 +679,7 @@ struct fc_lport {
 	unsigned int		e_d_tov;
 	unsigned int		r_a_tov;
 	u8			max_retry_count;
+	u8			max_rport_retry_count;
 	u16			link_speed;
 	u16			link_supported_speeds;
 	u16			lro_xid;	/* max xid for fcoe lro */
-- 
cgit v0.10.2


From 6a68afe3a2971953e218e509b16eae0ece43f9ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Sat, 23 May 2009 23:14:10 +0200
Subject: IXP4xx: Ethernet and WAN drivers now support "high" hardware queues.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index a740053..d304c73 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -456,7 +456,8 @@ static inline void queue_put_desc(unsigned int queue, u32 phys,
 	debug_desc(phys, desc);
 	BUG_ON(phys & 0x1F);
 	qmgr_put_entry(queue, phys);
-	BUG_ON(qmgr_stat_overflow(queue));
+	/* Don't check for queue overflow here, we've allocated sufficient
+	   length and queues >= 32 don't support this check anyway. */
 }
 
 
@@ -512,8 +513,8 @@ static int eth_poll(struct napi_struct *napi, int budget)
 #endif
 			napi_complete(napi);
 			qmgr_enable_irq(rxq);
-			if (!qmgr_stat_empty(rxq) &&
-			    napi_reschedule(napi)) {
+			if (!qmgr_stat_nearly_empty(rxq) &&
+			    napi_reschedule(napi)) { /* really empty in fact */
 #if DEBUG_RX
 				printk(KERN_DEBUG "%s: eth_poll"
 				       " napi_reschedule successed\n",
@@ -630,7 +631,8 @@ static void eth_txdone_irq(void *unused)
 			port->tx_buff_tab[n_desc] = NULL;
 		}
 
-		start = qmgr_stat_empty(port->plat->txreadyq);
+		/* really empty in fact */
+		start = qmgr_stat_nearly_empty(port->plat->txreadyq);
 		queue_put_desc(port->plat->txreadyq, phys, desc);
 		if (start) {
 #if DEBUG_TX
@@ -708,13 +710,14 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	queue_put_desc(TX_QUEUE(port->id), tx_desc_phys(port, n), desc);
 	dev->trans_start = jiffies;
 
-	if (qmgr_stat_empty(txreadyq)) {
+	if (qmgr_stat_nearly_empty(txreadyq)) { /* really empty in fact */
 #if DEBUG_TX
 		printk(KERN_DEBUG "%s: eth_xmit queue full\n", dev->name);
 #endif
 		netif_stop_queue(dev);
 		/* we could miss TX ready interrupt */
-		if (!qmgr_stat_empty(txreadyq)) {
+		/* really empty in fact */
+		if (!qmgr_stat_nearly_empty(txreadyq)) {
 #if DEBUG_TX
 			printk(KERN_DEBUG "%s: eth_xmit ready again\n",
 			       dev->name);
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 765a7f5..1e56e58 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -579,7 +579,8 @@ static inline void queue_put_desc(unsigned int queue, u32 phys,
 	debug_desc(phys, desc);
 	BUG_ON(phys & 0x1F);
 	qmgr_put_entry(queue, phys);
-	BUG_ON(qmgr_stat_overflow(queue));
+	/* Don't check for queue overflow here, we've allocated sufficient
+	   length and queues >= 32 don't support this check anyway. */
 }
 
 
@@ -789,7 +790,8 @@ static void hss_hdlc_txdone_irq(void *pdev)
 		free_buffer_irq(port->tx_buff_tab[n_desc]);
 		port->tx_buff_tab[n_desc] = NULL;
 
-		start = qmgr_stat_empty(port->plat->txreadyq);
+		/* really empty in fact */
+		start = qmgr_stat_nearly_empty(port->plat->txreadyq);
 		queue_put_desc(port->plat->txreadyq,
 			       tx_desc_phys(port, n_desc), desc);
 		if (start) {
@@ -867,13 +869,13 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
 	queue_put_desc(queue_ids[port->id].tx, tx_desc_phys(port, n), desc);
 	dev->trans_start = jiffies;
 
-	if (qmgr_stat_empty(txreadyq)) {
+	if (qmgr_stat_nearly_empty(txreadyq)) { /* really empty in fact */
 #if DEBUG_TX
 		printk(KERN_DEBUG "%s: hss_hdlc_xmit queue full\n", dev->name);
 #endif
 		netif_stop_queue(dev);
 		/* we could miss TX ready interrupt */
-		if (!qmgr_stat_empty(txreadyq)) {
+		if (!qmgr_stat_nearly_empty(txreadyq)) {
 #if DEBUG_TX
 			printk(KERN_DEBUG "%s: hss_hdlc_xmit ready again\n",
 			       dev->name);
-- 
cgit v0.10.2


From 2e418400728a9fcacb2ab75f0547584a56b8a584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Sat, 23 May 2009 23:14:59 +0200
Subject: IXP4xx: Whitespace fixes in the Ethernet driver.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index d304c73..672c962 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -817,29 +817,29 @@ static int request_queues(struct port *port)
 	int err;
 
 	err = qmgr_request_queue(RXFREE_QUEUE(port->id), RX_DESCS, 0, 0,
-			    "%s:RX-free", port->netdev->name);
+				 "%s:RX-free", port->netdev->name);
 	if (err)
 		return err;
 
 	err = qmgr_request_queue(port->plat->rxq, RX_DESCS, 0, 0,
-			    "%s:RX", port->netdev->name);
+				 "%s:RX", port->netdev->name);
 	if (err)
 		goto rel_rxfree;
 
 	err = qmgr_request_queue(TX_QUEUE(port->id), TX_DESCS, 0, 0,
-			    "%s:TX", port->netdev->name);
+				 "%s:TX", port->netdev->name);
 	if (err)
 		goto rel_rx;
 
 	err = qmgr_request_queue(port->plat->txreadyq, TX_DESCS, 0, 0,
-			    "%s:TX-ready", port->netdev->name);
+				 "%s:TX-ready", port->netdev->name);
 	if (err)
 		goto rel_tx;
 
 	/* TX-done queue handles skbs sent out by the NPEs */
 	if (!ports_open) {
 		err = qmgr_request_queue(TXDONE_QUEUE, TXDONE_QUEUE_LEN, 0, 0,
-				    "%s:TX-done", DRV_NAME);
+					 "%s:TX-done", DRV_NAME);
 		if (err)
 			goto rel_txready;
 	}
-- 
cgit v0.10.2


From 8a4fe82497a52b6da45b5b3179cce7dd6a28a532 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Tue, 28 Apr 2009 14:48:43 +0200
Subject: IXP42x: Identify Intel IXP425 rev. A0 processors.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/include/mach/cpu.h b/arch/arm/mach-ixp4xx/include/mach/cpu.h
index def7773..b2ef65d 100644
--- a/arch/arm/mach-ixp4xx/include/mach/cpu.h
+++ b/arch/arm/mach-ixp4xx/include/mach/cpu.h
@@ -26,6 +26,8 @@
 #define IXP46X_PROCESSOR_ID_VALUE	0x69054200 /* including IXP455 */
 #define IXP46X_PROCESSOR_ID_MASK	0xfffffff0
 
+#define cpu_is_ixp42x_rev_a0() ((read_cpuid_id() & (IXP42X_PROCESSOR_ID_MASK | 0xF)) == \
+				IXP42X_PROCESSOR_ID_VALUE)
 #define cpu_is_ixp42x()	((read_cpuid_id() & IXP42X_PROCESSOR_ID_MASK) == \
 			 IXP42X_PROCESSOR_ID_VALUE)
 #define cpu_is_ixp43x()	((read_cpuid_id() & IXP43X_PROCESSOR_ID_MASK) == \
@@ -35,8 +37,11 @@
 
 static inline u32 ixp4xx_read_feature_bits(void)
 {
-	unsigned int val = ~*IXP4XX_EXP_CFG2;
+	u32 val = ~*IXP4XX_EXP_CFG2;
 
+	if (cpu_is_ixp42x_rev_a0())
+		return IXP42X_FEATURE_MASK & ~(IXP4XX_FEATURE_RCOMP |
+					       IXP4XX_FEATURE_AES);
 	if (cpu_is_ixp42x())
 		return val & IXP42X_FEATURE_MASK;
 	if (cpu_is_ixp43x())
-- 
cgit v0.10.2


From 61a5ccc85a98f5eced89a3fda19b0ee708535c1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Tue, 28 Apr 2009 14:52:23 +0200
Subject: IXP42x: add NPE support for IXP425 rev. A0 processors.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/ixp4xx_npe.c b/arch/arm/mach-ixp4xx/ixp4xx_npe.c
index 7bb8e77..47ac69c 100644
--- a/arch/arm/mach-ixp4xx/ixp4xx_npe.c
+++ b/arch/arm/mach-ixp4xx/ixp4xx_npe.c
@@ -386,15 +386,6 @@ static int npe_reset(struct npe *npe)
 	/* reset the NPE */
 	ixp4xx_write_feature_bits(val &
 				  ~(IXP4XX_FEATURE_RESET_NPEA << npe->id));
-	for (i = 0; i < MAX_RETRIES; i++) {
-		if (!(ixp4xx_read_feature_bits() &
-		      (IXP4XX_FEATURE_RESET_NPEA << npe->id)))
-			break;	/* reset completed */
-		udelay(1);
-	}
-	if (i == MAX_RETRIES)
-		return -ETIMEDOUT;
-
 	/* deassert reset */
 	ixp4xx_write_feature_bits(val |
 				  (IXP4XX_FEATURE_RESET_NPEA << npe->id));
-- 
cgit v0.10.2


From d4c9e9fc97515588529e7fe48c7d5725292c6734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Sat, 23 May 2009 23:36:03 +0200
Subject: IXP42x: Add QMgr support for IXP425 rev. A0 processors.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
index 7531bfd..6e6dc4c 100644
--- a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
+++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
@@ -49,6 +49,49 @@ void qmgr_set_irq(unsigned int queue, int src,
 }
 
 
+static irqreturn_t qmgr_irq1_a0(int irq, void *pdev)
+{
+	int i, ret = 0;
+
+	/* ACK - it may clear any bits so don't rely on it */
+	__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[0]);
+
+	for (i = 0; i < HALF_QUEUES; i++) {
+		u32 src, stat;
+		if (!(qmgr_regs->irqen[0] & BIT(i)))
+			continue;
+		src = qmgr_regs->irqsrc[i >> 3];
+		stat = qmgr_regs->stat1[i >> 3];
+		if (src & 4) /* the IRQ condition is inverted */
+			stat = ~stat;
+		if (stat & BIT(src & 3)) {
+			irq_handlers[i](irq_pdevs[i]);
+			ret = IRQ_HANDLED;
+		}
+	}
+	return ret;
+}
+
+
+static irqreturn_t qmgr_irq2_a0(int irq, void *pdev)
+{
+	int i, ret = 0;
+	u32 req_bitmap;
+
+	/* ACK - it may clear any bits so don't rely on it */
+	__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[1]);
+
+	req_bitmap = qmgr_regs->irqen[1] & qmgr_regs->statne_h;
+	for (i = 0; i < HALF_QUEUES; i++) {
+		if (!(req_bitmap & BIT(i)))
+			continue;
+		irq_handlers[HALF_QUEUES + i](irq_pdevs[HALF_QUEUES + i]);
+		ret = IRQ_HANDLED;
+	}
+	return ret;
+}
+
+
 static irqreturn_t qmgr_irq(int irq, void *pdev)
 {
 	int i, half = (irq == IRQ_IXP4XX_QM1 ? 0 : 1);
@@ -236,6 +279,8 @@ void qmgr_release_queue(unsigned int queue)
 static int qmgr_init(void)
 {
 	int i, err;
+	irq_handler_t handler1, handler2;
+
 	mem_res = request_mem_region(IXP4XX_QMGR_BASE_PHYS,
 				     IXP4XX_QMGR_REGION_SIZE,
 				     "IXP4xx Queue Manager");
@@ -265,19 +310,25 @@ static int qmgr_init(void)
 	for (i = 0; i < QUEUES; i++)
 		__raw_writel(0, &qmgr_regs->sram[i]);
 
-	err = request_irq(IRQ_IXP4XX_QM1, qmgr_irq, 0,
-			  "IXP4xx Queue Manager", NULL);
+	if (cpu_is_ixp42x_rev_a0()) {
+		handler1 = qmgr_irq1_a0;
+		handler2 = qmgr_irq2_a0;
+	} else
+		handler1 = handler2 = qmgr_irq;
+
+	err = request_irq(IRQ_IXP4XX_QM1, handler1, 0, "IXP4xx Queue Manager",
+			  NULL);
 	if (err) {
-		printk(KERN_ERR "qmgr: failed to request IRQ%i\n",
-		       IRQ_IXP4XX_QM1);
+		printk(KERN_ERR "qmgr: failed to request IRQ%i (%i)\n",
+		       IRQ_IXP4XX_QM1, err);
 		goto error_irq;
 	}
 
-	err = request_irq(IRQ_IXP4XX_QM2, qmgr_irq, 0,
-			  "IXP4xx Queue Manager", NULL);
+	err = request_irq(IRQ_IXP4XX_QM2, handler2, 0, "IXP4xx Queue Manager",
+			  NULL);
 	if (err) {
-		printk(KERN_ERR "qmgr: failed to request IRQ%i\n",
-		       IRQ_IXP4XX_QM2);
+		printk(KERN_ERR "qmgr: failed to request IRQ%i (%i)\n",
+		       IRQ_IXP4XX_QM2, err);
 		goto error_irq2;
 	}
 
-- 
cgit v0.10.2


From 0771c6939484d2ebe0ec28257c2570aecd9911e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Tue, 28 Apr 2009 19:32:55 +0200
Subject: IXP42x: Use __fls() in QMgr interrupt handlers.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
index 6e6dc4c..bfdbe4b 100644
--- a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
+++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c
@@ -52,14 +52,15 @@ void qmgr_set_irq(unsigned int queue, int src,
 static irqreturn_t qmgr_irq1_a0(int irq, void *pdev)
 {
 	int i, ret = 0;
+	u32 en_bitmap, src, stat;
 
 	/* ACK - it may clear any bits so don't rely on it */
 	__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[0]);
 
-	for (i = 0; i < HALF_QUEUES; i++) {
-		u32 src, stat;
-		if (!(qmgr_regs->irqen[0] & BIT(i)))
-			continue;
+	en_bitmap = qmgr_regs->irqen[0];
+	while (en_bitmap) {
+		i = __fls(en_bitmap); /* number of the last "low" queue */
+		en_bitmap &= ~BIT(i);
 		src = qmgr_regs->irqsrc[i >> 3];
 		stat = qmgr_regs->stat1[i >> 3];
 		if (src & 4) /* the IRQ condition is inverted */
@@ -82,9 +83,9 @@ static irqreturn_t qmgr_irq2_a0(int irq, void *pdev)
 	__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[1]);
 
 	req_bitmap = qmgr_regs->irqen[1] & qmgr_regs->statne_h;
-	for (i = 0; i < HALF_QUEUES; i++) {
-		if (!(req_bitmap & BIT(i)))
-			continue;
+	while (req_bitmap) {
+		i = __fls(req_bitmap); /* number of the last "high" queue */
+		req_bitmap &= ~BIT(i);
 		irq_handlers[HALF_QUEUES + i](irq_pdevs[HALF_QUEUES + i]);
 		ret = IRQ_HANDLED;
 	}
@@ -95,15 +96,19 @@ static irqreturn_t qmgr_irq2_a0(int irq, void *pdev)
 static irqreturn_t qmgr_irq(int irq, void *pdev)
 {
 	int i, half = (irq == IRQ_IXP4XX_QM1 ? 0 : 1);
-	u32 val = __raw_readl(&qmgr_regs->irqstat[half]);
-	__raw_writel(val, &qmgr_regs->irqstat[half]); /* ACK */
+	u32 req_bitmap = __raw_readl(&qmgr_regs->irqstat[half]);
 
-	for (i = 0; i < HALF_QUEUES; i++)
-		if (val & (1 << i)) {
-			int irq = half * HALF_QUEUES + i;
-			irq_handlers[irq](irq_pdevs[irq]);
-		}
-	return val ? IRQ_HANDLED : 0;
+	if (!req_bitmap)
+		return 0;
+	__raw_writel(req_bitmap, &qmgr_regs->irqstat[half]); /* ACK */
+
+	while (req_bitmap) {
+		i = __fls(req_bitmap); /* number of the last queue */
+		req_bitmap &= ~BIT(i);
+		i += half * HALF_QUEUES;
+		irq_handlers[i](irq_pdevs[i]);
+	}
+	return IRQ_HANDLED;
 }
 
 
-- 
cgit v0.10.2


From 59a3759d0fe8d969888c741bb33f4946e4d3750d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 23 May 2009 14:47:00 -0700
Subject: Linux 2.6.30-rc7


diff --git a/Makefile b/Makefile
index b57e1f5..739fd34 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 30
-EXTRAVERSION = -rc6
-NAME = Vindictive Armadillo
+EXTRAVERSION = -rc7
+NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
-- 
cgit v0.10.2


From 11c79740d3c03cb81f84e98cf2e2dbd8d9bb53cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Sat, 23 May 2009 23:58:30 +0200
Subject: IXP4xx: support for Goramo MultiLink router platform.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index 2c5a02b..264f4d5 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -78,6 +78,12 @@ config MACH_IXDP465
 	  IXDP465 Development Platform (Also known as BMP).
 	  For more information on this platform, see <file:Documentation/arm/IXP4xx>.
 
+config MACH_GORAMO_MLR
+	bool "GORAMO Multi Link Router"
+	help
+	  Say 'Y' here if you want your kernel to support GORAMO
+	  MultiLink router.
+
 config MACH_KIXRP435
 	bool "KIXRP435"
 	help
diff --git a/arch/arm/mach-ixp4xx/Makefile b/arch/arm/mach-ixp4xx/Makefile
index 2e6bbf9..47d1f60 100644
--- a/arch/arm/mach-ixp4xx/Makefile
+++ b/arch/arm/mach-ixp4xx/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_MACH_DSMG600)      += dsmg600-setup.o
 obj-$(CONFIG_MACH_GATEWAY7001)	+= gateway7001-setup.o
 obj-$(CONFIG_MACH_WG302V2)	+= wg302v2-setup.o
 obj-$(CONFIG_MACH_FSG)		+= fsg-setup.o
+obj-$(CONFIG_MACH_GORAMO_MLR)	+= goramo_mlr.o
 
 obj-$(CONFIG_PCI)		+= $(obj-pci-$(CONFIG_PCI)) common-pci.o
 obj-$(CONFIG_IXP4XX_QMGR)	+= ixp4xx_qmgr.o
diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c
new file mode 100644
index 0000000..a733b8f
--- /dev/null
+++ b/arch/arm/mach-ixp4xx/goramo_mlr.c
@@ -0,0 +1,507 @@
+/*
+ * Goramo MultiLink router platform code
+ * Copyright (C) 2006-2009 Krzysztof Halasa <khc@pm.waw.pl>
+ */
+
+#include <linux/delay.h>
+#include <linux/hdlc.h>
+#include <linux/i2c-gpio.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/serial_8250.h>
+#include <asm/mach-types.h>
+#include <asm/system.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/flash.h>
+#include <asm/mach/pci.h>
+
+#define xgpio_irq(n)		(IRQ_IXP4XX_GPIO ## n)
+#define gpio_irq(n)		xgpio_irq(n)
+
+#define SLOT_ETHA		0x0B	/* IDSEL = AD21 */
+#define SLOT_ETHB		0x0C	/* IDSEL = AD20 */
+#define SLOT_MPCI		0x0D	/* IDSEL = AD19 */
+#define SLOT_NEC		0x0E	/* IDSEL = AD18 */
+
+#define IRQ_ETHA		IRQ_IXP4XX_GPIO4
+#define IRQ_ETHB		IRQ_IXP4XX_GPIO5
+#define IRQ_NEC			IRQ_IXP4XX_GPIO3
+#define IRQ_MPCI		IRQ_IXP4XX_GPIO12
+
+/* GPIO lines */
+#define GPIO_SCL		0
+#define GPIO_SDA		1
+#define GPIO_STR		2
+#define GPIO_HSS0_DCD_N		6
+#define GPIO_HSS1_DCD_N		7
+#define GPIO_HSS0_CTS_N		10
+#define GPIO_HSS1_CTS_N		11
+#define GPIO_HSS1_RTS_N		13
+#define GPIO_HSS0_RTS_N		14
+
+/* Control outputs from 74HC4094 */
+#define CONTROL_HSS0_CLK_INT	0
+#define CONTROL_HSS1_CLK_INT	1
+#define CONTROL_HSS0_DTR_N	2
+#define CONTROL_HSS1_DTR_N	3
+#define CONTROL_EXT		4
+#define CONTROL_AUTO_RESET	5
+#define CONTROL_PCI_RESET_N	6
+#define CONTROL_EEPROM_WC_N	7
+
+/* offsets from start of flash ROM = 0x50000000 */
+#define CFG_ETH0_ADDRESS	0x40 /* 6 bytes */
+#define CFG_ETH1_ADDRESS	0x46 /* 6 bytes */
+#define CFG_REV			0x4C /* u32 */
+#define CFG_SDRAM_SIZE		0x50 /* u32 */
+#define CFG_SDRAM_CONF		0x54 /* u32 */
+#define CFG_SDRAM_MODE		0x58 /* u32 */
+#define CFG_SDRAM_REFRESH	0x5C /* u32 */
+
+#define CFG_HW_BITS		0x60 /* u32 */
+#define  CFG_HW_USB_PORTS	0x00000007 /* 0 = no NEC chip, 1-5 = ports # */
+#define  CFG_HW_HAS_PCI_SLOT	0x00000008
+#define  CFG_HW_HAS_ETH0	0x00000010
+#define  CFG_HW_HAS_ETH1	0x00000020
+#define  CFG_HW_HAS_HSS0	0x00000040
+#define  CFG_HW_HAS_HSS1	0x00000080
+#define  CFG_HW_HAS_UART0	0x00000100
+#define  CFG_HW_HAS_UART1	0x00000200
+#define  CFG_HW_HAS_EEPROM	0x00000400
+
+#define FLASH_CMD_READ_ARRAY	0xFF
+#define FLASH_CMD_READ_ID	0x90
+#define FLASH_SER_OFF		0x102 /* 0x81 in 16-bit mode */
+
+static u32 hw_bits = 0xFFFFFFFD;    /* assume all hardware present */;
+static u8 control_value;
+
+static void set_scl(u8 value)
+{
+	gpio_line_set(GPIO_SCL, !!value);
+	udelay(3);
+}
+
+static void set_sda(u8 value)
+{
+	gpio_line_set(GPIO_SDA, !!value);
+	udelay(3);
+}
+
+static void set_str(u8 value)
+{
+	gpio_line_set(GPIO_STR, !!value);
+	udelay(3);
+}
+
+static inline void set_control(int line, int value)
+{
+	if (value)
+		control_value |=  (1 << line);
+	else
+		control_value &= ~(1 << line);
+}
+
+
+static void output_control(void)
+{
+	int i;
+
+	gpio_line_config(GPIO_SCL, IXP4XX_GPIO_OUT);
+	gpio_line_config(GPIO_SDA, IXP4XX_GPIO_OUT);
+
+	for (i = 0; i < 8; i++) {
+		set_scl(0);
+		set_sda(control_value & (0x80 >> i)); /* MSB first */
+		set_scl(1);	/* active edge */
+	}
+
+	set_str(1);
+	set_str(0);
+
+	set_scl(0);
+	set_sda(1);		/* Be ready for START */
+	set_scl(1);
+}
+
+
+static void (*set_carrier_cb_tab[2])(void *pdev, int carrier);
+
+static int hss_set_clock(int port, unsigned int clock_type)
+{
+	int ctrl_int = port ? CONTROL_HSS1_CLK_INT : CONTROL_HSS0_CLK_INT;
+
+	switch (clock_type) {
+	case CLOCK_DEFAULT:
+	case CLOCK_EXT:
+		set_control(ctrl_int, 0);
+		output_control();
+		return CLOCK_EXT;
+
+	case CLOCK_INT:
+		set_control(ctrl_int, 1);
+		output_control();
+		return CLOCK_INT;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static irqreturn_t hss_dcd_irq(int irq, void *pdev)
+{
+	int i, port = (irq == gpio_irq(GPIO_HSS1_DCD_N));
+	gpio_line_get(port ? GPIO_HSS1_DCD_N : GPIO_HSS0_DCD_N, &i);
+	set_carrier_cb_tab[port](pdev, !i);
+	return IRQ_HANDLED;
+}
+
+
+static int hss_open(int port, void *pdev,
+		    void (*set_carrier_cb)(void *pdev, int carrier))
+{
+	int i, irq;
+
+	if (!port)
+		irq = gpio_irq(GPIO_HSS0_DCD_N);
+	else
+		irq = gpio_irq(GPIO_HSS1_DCD_N);
+
+	gpio_line_get(port ? GPIO_HSS1_DCD_N : GPIO_HSS0_DCD_N, &i);
+	set_carrier_cb(pdev, !i);
+
+	set_carrier_cb_tab[!!port] = set_carrier_cb;
+
+	if ((i = request_irq(irq, hss_dcd_irq, 0, "IXP4xx HSS", pdev)) != 0) {
+		printk(KERN_ERR "ixp4xx_hss: failed to request IRQ%i (%i)\n",
+		       irq, i);
+		return i;
+	}
+
+	set_control(port ? CONTROL_HSS1_DTR_N : CONTROL_HSS0_DTR_N, 0);
+	output_control();
+	gpio_line_set(port ? GPIO_HSS1_RTS_N : GPIO_HSS0_RTS_N, 0);
+	return 0;
+}
+
+static void hss_close(int port, void *pdev)
+{
+	free_irq(port ? gpio_irq(GPIO_HSS1_DCD_N) : gpio_irq(GPIO_HSS0_DCD_N),
+		 pdev);
+	set_carrier_cb_tab[!!port] = NULL; /* catch bugs */
+
+	set_control(port ? CONTROL_HSS1_DTR_N : CONTROL_HSS0_DTR_N, 1);
+	output_control();
+	gpio_line_set(port ? GPIO_HSS1_RTS_N : GPIO_HSS0_RTS_N, 1);
+}
+
+
+/* Flash memory */
+static struct flash_platform_data flash_data = {
+	.map_name	= "cfi_probe",
+	.width		= 2,
+};
+
+static struct resource flash_resource = {
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device device_flash = {
+	.name		= "IXP4XX-Flash",
+	.id		= 0,
+	.dev		= { .platform_data = &flash_data },
+	.num_resources	= 1,
+	.resource	= &flash_resource,
+};
+
+
+/* I^2C interface */
+static struct i2c_gpio_platform_data i2c_data = {
+	.sda_pin	= GPIO_SDA,
+	.scl_pin	= GPIO_SCL,
+};
+
+static struct platform_device device_i2c = {
+	.name		= "i2c-gpio",
+	.id		= 0,
+	.dev		= { .platform_data = &i2c_data },
+};
+
+
+/* IXP425 2 UART ports */
+static struct resource uart_resources[] = {
+	{
+		.start		= IXP4XX_UART1_BASE_PHYS,
+		.end		= IXP4XX_UART1_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	},
+	{
+		.start		= IXP4XX_UART2_BASE_PHYS,
+		.end		= IXP4XX_UART2_BASE_PHYS + 0x0fff,
+		.flags		= IORESOURCE_MEM,
+	}
+};
+
+static struct plat_serial8250_port uart_data[] = {
+	{
+		.mapbase	= IXP4XX_UART1_BASE_PHYS,
+		.membase	= (char __iomem *)IXP4XX_UART1_BASE_VIRT +
+			REG_OFFSET,
+		.irq		= IRQ_IXP4XX_UART1,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+		.uartclk	= IXP4XX_UART_XTAL,
+	},
+	{
+		.mapbase	= IXP4XX_UART2_BASE_PHYS,
+		.membase	= (char __iomem *)IXP4XX_UART2_BASE_VIRT +
+			REG_OFFSET,
+		.irq		= IRQ_IXP4XX_UART2,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+		.uartclk	= IXP4XX_UART_XTAL,
+	},
+	{ },
+};
+
+static struct platform_device device_uarts = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev.platform_data	= uart_data,
+	.num_resources		= 2,
+	.resource		= uart_resources,
+};
+
+
+/* Built-in 10/100 Ethernet MAC interfaces */
+static struct eth_plat_info eth_plat[] = {
+	{
+		.phy		= 0,
+		.rxq		= 3,
+		.txreadyq	= 32,
+	}, {
+		.phy		= 1,
+		.rxq		= 4,
+		.txreadyq	= 33,
+	}
+};
+
+static struct platform_device device_eth_tab[] = {
+	{
+		.name			= "ixp4xx_eth",
+		.id			= IXP4XX_ETH_NPEB,
+		.dev.platform_data	= eth_plat,
+	}, {
+		.name			= "ixp4xx_eth",
+		.id			= IXP4XX_ETH_NPEC,
+		.dev.platform_data	= eth_plat + 1,
+	}
+};
+
+
+/* IXP425 2 synchronous serial ports */
+static struct hss_plat_info hss_plat[] = {
+	{
+		.set_clock	= hss_set_clock,
+		.open		= hss_open,
+		.close		= hss_close,
+		.txreadyq	= 34,
+	}, {
+		.set_clock	= hss_set_clock,
+		.open		= hss_open,
+		.close		= hss_close,
+		.txreadyq	= 35,
+	}
+};
+
+static struct platform_device device_hss_tab[] = {
+	{
+		.name			= "ixp4xx_hss",
+		.id			= 0,
+		.dev.platform_data	= hss_plat,
+	}, {
+		.name			= "ixp4xx_hss",
+		.id			= 1,
+		.dev.platform_data	= hss_plat + 1,
+	}
+};
+
+
+static struct platform_device *device_tab[6] __initdata = {
+	&device_flash,		/* index 0 */
+};
+
+static inline u8 __init flash_readb(u8 __iomem *flash, u32 addr)
+{
+#ifdef __ARMEB__
+	return __raw_readb(flash + addr);
+#else
+	return __raw_readb(flash + (addr ^ 3));
+#endif
+}
+
+static inline u16 __init flash_readw(u8 __iomem *flash, u32 addr)
+{
+#ifdef __ARMEB__
+	return __raw_readw(flash + addr);
+#else
+	return __raw_readw(flash + (addr ^ 2));
+#endif
+}
+
+static void __init gmlr_init(void)
+{
+	u8 __iomem *flash;
+	int i, devices = 1; /* flash */
+
+	ixp4xx_sys_init();
+
+	if ((flash = ioremap(IXP4XX_EXP_BUS_BASE_PHYS, 0x80)) == NULL)
+		printk(KERN_ERR "goramo-mlr: unable to access system"
+		       " configuration data\n");
+	else {
+		system_rev = __raw_readl(flash + CFG_REV);
+		hw_bits = __raw_readl(flash + CFG_HW_BITS);
+
+		for (i = 0; i < ETH_ALEN; i++) {
+			eth_plat[0].hwaddr[i] =
+				flash_readb(flash, CFG_ETH0_ADDRESS + i);
+			eth_plat[1].hwaddr[i] =
+				flash_readb(flash, CFG_ETH1_ADDRESS + i);
+		}
+
+		__raw_writew(FLASH_CMD_READ_ID, flash);
+		system_serial_high = flash_readw(flash, FLASH_SER_OFF);
+		system_serial_high <<= 16;
+		system_serial_high |= flash_readw(flash, FLASH_SER_OFF + 2);
+		system_serial_low = flash_readw(flash, FLASH_SER_OFF + 4);
+		system_serial_low <<= 16;
+		system_serial_low |= flash_readw(flash, FLASH_SER_OFF + 6);
+		__raw_writew(FLASH_CMD_READ_ARRAY, flash);
+
+		iounmap(flash);
+	}
+
+	switch (hw_bits & (CFG_HW_HAS_UART0 | CFG_HW_HAS_UART1)) {
+	case CFG_HW_HAS_UART0:
+		memset(&uart_data[1], 0, sizeof(uart_data[1]));
+		device_uarts.num_resources = 1;
+		break;
+
+	case CFG_HW_HAS_UART1:
+		device_uarts.dev.platform_data = &uart_data[1];
+		device_uarts.resource = &uart_resources[1];
+		device_uarts.num_resources = 1;
+		break;
+	}
+	if (hw_bits & (CFG_HW_HAS_UART0 | CFG_HW_HAS_UART1))
+		device_tab[devices++] = &device_uarts; /* max index 1 */
+
+	if (hw_bits & CFG_HW_HAS_ETH0)
+		device_tab[devices++] = &device_eth_tab[0]; /* max index 2 */
+	if (hw_bits & CFG_HW_HAS_ETH1)
+		device_tab[devices++] = &device_eth_tab[1]; /* max index 3 */
+
+	if (hw_bits & CFG_HW_HAS_HSS0)
+		device_tab[devices++] = &device_hss_tab[0]; /* max index 4 */
+	if (hw_bits & CFG_HW_HAS_HSS1)
+		device_tab[devices++] = &device_hss_tab[1]; /* max index 5 */
+
+	if (hw_bits & CFG_HW_HAS_EEPROM)
+		device_tab[devices++] = &device_i2c; /* max index 6 */
+
+	gpio_line_config(GPIO_SCL, IXP4XX_GPIO_OUT);
+	gpio_line_config(GPIO_SDA, IXP4XX_GPIO_OUT);
+	gpio_line_config(GPIO_STR, IXP4XX_GPIO_OUT);
+	gpio_line_config(GPIO_HSS0_RTS_N, IXP4XX_GPIO_OUT);
+	gpio_line_config(GPIO_HSS1_RTS_N, IXP4XX_GPIO_OUT);
+	gpio_line_config(GPIO_HSS0_DCD_N, IXP4XX_GPIO_IN);
+	gpio_line_config(GPIO_HSS1_DCD_N, IXP4XX_GPIO_IN);
+	set_irq_type(gpio_irq(GPIO_HSS0_DCD_N), IRQ_TYPE_EDGE_BOTH);
+	set_irq_type(gpio_irq(GPIO_HSS1_DCD_N), IRQ_TYPE_EDGE_BOTH);
+
+	set_control(CONTROL_HSS0_DTR_N, 1);
+	set_control(CONTROL_HSS1_DTR_N, 1);
+	set_control(CONTROL_EEPROM_WC_N, 1);
+	set_control(CONTROL_PCI_RESET_N, 1);
+	output_control();
+
+	msleep(1);	      /* Wait for PCI devices to initialize */
+
+	flash_resource.start = IXP4XX_EXP_BUS_BASE(0);
+	flash_resource.end = IXP4XX_EXP_BUS_BASE(0) + ixp4xx_exp_bus_size - 1;
+
+	platform_add_devices(device_tab, devices);
+}
+
+
+#ifdef CONFIG_PCI
+static void __init gmlr_pci_preinit(void)
+{
+	set_irq_type(IRQ_ETHA, IRQ_TYPE_LEVEL_LOW);
+	set_irq_type(IRQ_ETHB, IRQ_TYPE_LEVEL_LOW);
+	set_irq_type(IRQ_NEC, IRQ_TYPE_LEVEL_LOW);
+	set_irq_type(IRQ_MPCI, IRQ_TYPE_LEVEL_LOW);
+	ixp4xx_pci_preinit();
+}
+
+static void __init gmlr_pci_postinit(void)
+{
+	if ((hw_bits & CFG_HW_USB_PORTS) >= 2 &&
+	    (hw_bits & CFG_HW_USB_PORTS) < 5) {
+		/* need to adjust number of USB ports on NEC chip */
+		u32 value, addr = BIT(32 - SLOT_NEC) | 0xE0;
+		if (!ixp4xx_pci_read(addr, NP_CMD_CONFIGREAD, &value)) {
+			value &= ~7;
+			value |= (hw_bits & CFG_HW_USB_PORTS);
+			ixp4xx_pci_write(addr, NP_CMD_CONFIGWRITE, value);
+		}
+	}
+}
+
+static int __init gmlr_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	switch(slot) {
+	case SLOT_ETHA:	return IRQ_ETHA;
+	case SLOT_ETHB:	return IRQ_ETHB;
+	case SLOT_NEC:	return IRQ_NEC;
+	default:	return IRQ_MPCI;
+	}
+}
+
+static struct hw_pci gmlr_hw_pci __initdata = {
+	.nr_controllers = 1,
+	.preinit	= gmlr_pci_preinit,
+	.postinit	= gmlr_pci_postinit,
+	.swizzle	= pci_std_swizzle,
+	.setup		= ixp4xx_setup,
+	.scan		= ixp4xx_scan_bus,
+	.map_irq	= gmlr_map_irq,
+};
+
+static int __init gmlr_pci_init(void)
+{
+	if (machine_is_goramo_mlr() &&
+	    (hw_bits & (CFG_HW_USB_PORTS | CFG_HW_HAS_PCI_SLOT)))
+		pci_common_init(&gmlr_hw_pci);
+	return 0;
+}
+
+subsys_initcall(gmlr_pci_init);
+#endif /* CONFIG_PCI */
+
+
+MACHINE_START(GORAMO_MLR, "MultiLink")
+	/* Maintainer: Krzysztof Halasa */
+	.phys_io	= IXP4XX_PERIPHERAL_BASE_PHYS,
+	.io_pg_offst	= ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xFFFC,
+	.map_io		= ixp4xx_map_io,
+	.init_irq	= ixp4xx_init_irq,
+	.timer		= &ixp4xx_timer,
+	.boot_params	= 0x0100,
+	.init_machine	= gmlr_init,
+MACHINE_END
-- 
cgit v0.10.2


From 79f52b77b89e8b7aa9fbe62135eea198a2ecbd5b Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Sat, 23 May 2009 20:28:41 -0500
Subject: jfs: Add missing mutex_unlock call to error path

Jan Kucera found an missing call to mutex_unlock() with his static code
checker.  It's an unlikely error path to hit in the real world, but it
should be fixed.

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Reported-by: Jan Kucera <kucera.jan.cz@gmail.com>

diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 3460572..0fc3040 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2571,6 +2571,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 
 			txAbort(tid, 0);
 			txEnd(tid);
+			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
 
 			/* release the inode map lock */
 			IWRITE_UNLOCK(ipimap);
-- 
cgit v0.10.2


From 082ff5a2767a0679ee543f14883adbafb631ffbe Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:29:00 +0200
Subject: perf_counter: Change pctrl() behaviour

Instead of en/dis-abling all counters acting on a particular
task, en/dis- able all counters we created.

[ v2: fix crash on first counter enable ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.916937244@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d..353c0ac 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,6 +108,15 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk)					\
+	.perf_counter_mutex = 						\
+		 __MUTEX_INITIALIZER(tsk.perf_counter_mutex),		\
+	.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -171,6 +180,7 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
+	INIT_PERF_COUNTERS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4ab8050..4159ee5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -469,6 +469,9 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
+	struct list_head		owner_entry;
+	struct task_struct		*owner;
+
 	/* mmap bits */
 	struct mutex			mmap_mutex;
 	atomic_t			mmap_count;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9714d45..bc9326d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1389,6 +1389,8 @@ struct task_struct {
 #endif
 #ifdef CONFIG_PERF_COUNTERS
 	struct perf_counter_context *perf_counter_ctxp;
+	struct mutex perf_counter_mutex;
+	struct list_head perf_counter_list;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0e97f89..4c86a63 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1076,79 +1076,26 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 }
 
-int perf_counter_task_disable(void)
+int perf_counter_task_enable(void)
 {
-	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
-	unsigned long flags;
-
-	if (!ctx || !ctx->nr_counters)
-		return 0;
-
-	local_irq_save(flags);
 
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
-
-	/*
-	 * Disable all the counters:
-	 */
-	perf_disable();
-
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ERROR) {
-			update_group_times(counter);
-			counter->state = PERF_COUNTER_STATE_OFF;
-		}
-	}
-
-	perf_enable();
-
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_enable(counter);
+	mutex_unlock(&current->perf_counter_mutex);
 
 	return 0;
 }
 
-int perf_counter_task_enable(void)
+int perf_counter_task_disable(void)
 {
-	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
-	unsigned long flags;
-	int cpu;
-
-	if (!ctx || !ctx->nr_counters)
-		return 0;
-
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
 
-	/*
-	 * Disable all the counters:
-	 */
-	perf_disable();
-
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state > PERF_COUNTER_STATE_OFF)
-			continue;
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->tstamp_enabled =
-			ctx->time - counter->total_time_enabled;
-		counter->hw_event.disabled = 0;
-	}
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-
-	perf_counter_task_sched_in(curr, cpu);
-
-	local_irq_restore(flags);
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_disable(counter);
+	mutex_unlock(&current->perf_counter_mutex);
 
 	return 0;
 }
@@ -1416,6 +1363,11 @@ static int perf_release(struct inode *inode, struct file *file)
 	perf_counter_remove_from_context(counter);
 	mutex_unlock(&ctx->mutex);
 
+	mutex_lock(&counter->owner->perf_counter_mutex);
+	list_del_init(&counter->owner_entry);
+	mutex_unlock(&counter->owner->perf_counter_mutex);
+	put_task_struct(counter->owner);
+
 	free_counter(counter);
 	put_context(ctx);
 
@@ -3272,6 +3224,12 @@ SYSCALL_DEFINE5(perf_counter_open,
 	perf_install_in_context(ctx, counter, cpu);
 	mutex_unlock(&ctx->mutex);
 
+	counter->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_counter_mutex);
+	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
+	mutex_unlock(&current->perf_counter_mutex);
+
 	fput_light(counter_file, fput_needed2);
 
 out_fput:
@@ -3488,6 +3446,9 @@ void perf_counter_init_task(struct task_struct *child)
 
 	child->perf_counter_ctxp = NULL;
 
+	mutex_init(&child->perf_counter_mutex);
+	INIT_LIST_HEAD(&child->perf_counter_list);
+
 	/*
 	 * This is executed from the parent task context, so inherit
 	 * counters that have been marked for cloning.
-- 
cgit v0.10.2


From 475c55797323b67435083f6e2eb8ee670f6410ec Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:29:01 +0200
Subject: perf_counter: Remove perf_counter_context::nr_enabled

now that pctrl() no longer disables other people's counters,
remove the PMU cache code that deals with that.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163013.032998331@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4159ee5..2ddf5e3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -516,7 +516,6 @@ struct perf_counter_context {
 	struct list_head	event_list;
 	int			nr_counters;
 	int			nr_active;
-	int			nr_enabled;
 	int			is_active;
 	atomic_t		refcount;
 	struct task_struct	*task;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4c86a63..cb40625 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -134,8 +134,6 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 	ctx->nr_counters++;
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		ctx->nr_enabled++;
 }
 
 /*
@@ -150,8 +148,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	if (list_empty(&counter->list_entry))
 		return;
 	ctx->nr_counters--;
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		ctx->nr_enabled--;
 
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
@@ -406,7 +402,6 @@ static void __perf_counter_disable(void *info)
 		else
 			counter_sched_out(counter, cpuctx, ctx);
 		counter->state = PERF_COUNTER_STATE_OFF;
-		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
@@ -448,7 +443,6 @@ static void perf_counter_disable(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
 		update_counter_times(counter);
 		counter->state = PERF_COUNTER_STATE_OFF;
-		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irq(&ctx->lock);
@@ -759,7 +753,6 @@ static void __perf_counter_enable(void *info)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	ctx->nr_enabled++;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -850,7 +843,6 @@ static void perf_counter_enable(struct perf_counter *counter)
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->tstamp_enabled =
 			ctx->time - counter->total_time_enabled;
-		ctx->nr_enabled++;
 	}
  out:
 	spin_unlock_irq(&ctx->lock);
@@ -910,8 +902,7 @@ static int context_equiv(struct perf_counter_context *ctx1,
 			 struct perf_counter_context *ctx2)
 {
 	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen
-		&& ctx1->nr_enabled == ctx2->nr_enabled;
+		&& ctx1->parent_gen == ctx2->parent_gen;
 }
 
 /*
-- 
cgit v0.10.2


From c2990a2a582d73562d4dcf2502c39892a19a691d Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Sun, 24 May 2009 08:35:49 +0200
Subject: perf top: fix segfault

c6eb13 increased stack usage such that perf-top now croaks on startup.

Take event_array and mmap_array off the stack to prevent segfault on boxen
with smallish ulimit -s setting.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index a3216a6..74021ac 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -1035,10 +1035,11 @@ static void mmap_read(struct mmap_data *md)
 	md->prev = old;
 }
 
+static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
 int cmd_top(int argc, char **argv, const char *prefix)
 {
-	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 	struct perf_counter_hw_event hw_event;
 	pthread_t thread;
 	int i, counter, group_fd, nr_poll = 0;
-- 
cgit v0.10.2


From a3862d3f814ce7dfca9eed56ac23d29db3aee8d5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 24 May 2009 09:02:37 +0200
Subject: perf_counter: Increase mmap limit

In a default 'perf top' run the tool will create a counter for
each online CPU. With enough CPUs this will eventually exhaust
the default limit.

So scale it up with the number of online CPUs.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index cb40625..6cdf824 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1704,6 +1704,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	user_extra = nr_pages + 1;
 	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+
+	/*
+	 * Increase the limit linearly with more CPUs:
+	 */
+	user_lock_limit *= num_online_cpus();
+
 	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
 	extra = 0;
-- 
cgit v0.10.2


From 3c166c7f1828f226c7f478758bf6c8ce8be1623f Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Sat, 23 May 2009 19:13:07 -0400
Subject: ASoC: Codec for STAC9766 used on the Efika

Datasheet: http://www.idt.com/products/getDoc.cfm?docID=13134007

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 7f78b65..cb07d9b 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -19,6 +19,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_CS4270 if I2C
 	select SND_SOC_PCM3008
 	select SND_SOC_SSM2602 if I2C
+	select SND_SOC_STAC9766 if SND_SOC_AC97_BUS
 	select SND_SOC_TLV320AIC23 if I2C
 	select SND_SOC_TLV320AIC26 if SPI_MASTER
 	select SND_SOC_TLV320AIC3X if I2C
@@ -93,6 +94,9 @@ config SND_SOC_PCM3008
 config SND_SOC_SSM2602
 	tristate
 
+config SND_SOC_STAC9766
+	tristate
+
 config SND_SOC_TLV320AIC23
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 70c55fa..46c007c 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -7,6 +7,7 @@ snd-soc-cs4270-objs := cs4270.o
 snd-soc-l3-objs := l3.o
 snd-soc-pcm3008-objs := pcm3008.o
 snd-soc-ssm2602-objs := ssm2602.o
+snd-soc-stac9766-objs := stac9766.o
 snd-soc-tlv320aic23-objs := tlv320aic23.o
 snd-soc-tlv320aic26-objs := tlv320aic26.o
 snd-soc-tlv320aic3x-objs := tlv320aic3x.o
@@ -42,6 +43,7 @@ obj-$(CONFIG_SND_SOC_CS4270)	+= snd-soc-cs4270.o
 obj-$(CONFIG_SND_SOC_L3)	+= snd-soc-l3.o
 obj-$(CONFIG_SND_SOC_PCM3008)	+= snd-soc-pcm3008.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
+obj-$(CONFIG_SND_SOC_STAC9766)	+= snd-soc-stac9766.o
 obj-$(CONFIG_SND_SOC_TLV320AIC23)	+= snd-soc-tlv320aic23.o
 obj-$(CONFIG_SND_SOC_TLV320AIC26)	+= snd-soc-tlv320aic26.o
 obj-$(CONFIG_SND_SOC_TLV320AIC3X)	+= snd-soc-tlv320aic3x.o
diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c
new file mode 100644
index 0000000..7740cd5
--- /dev/null
+++ b/sound/soc/codecs/stac9766.c
@@ -0,0 +1,470 @@
+/*
+ * stac9766.c  --  ALSA SoC STAC9766 codec support
+ *
+ * Copyright 2009 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  Features:-
+ *
+ *   o Support for AC97 Codec, S/PDIF
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/ac97_codec.h>
+#include <sound/initval.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/tlv.h>
+#include <sound/soc-of-simple.h>
+
+#include "stac9766.h"
+
+#define STAC9766_VERSION "0.10"
+
+/*
+ * STAC9766 register cache
+ */
+static const u16 stac9766_reg[] = {
+	0x6A90, 0x8000, 0x8000, 0x8000, /* 6 */
+	0x0000, 0x0000, 0x8008, 0x8008, /* e */
+	0x8808, 0x8808, 0x8808, 0x8808, /* 16 */
+	0x8808, 0x0000, 0x8000, 0x0000, /* 1e */
+	0x0000, 0x0000, 0x0000, 0x000f, /* 26 */
+	0x0a05, 0x0400, 0xbb80, 0x0000, /* 2e */
+	0x0000, 0xbb80, 0x0000, 0x0000, /* 36 */
+	0x0000, 0x2000, 0x0000, 0x0100, /* 3e */
+	0x0000, 0x0000, 0x0080, 0x0000, /* 46 */
+	0x0000, 0x0000, 0x0003, 0xffff, /* 4e */
+	0x0000, 0x0000, 0x0000, 0x0000, /* 56 */
+	0x4000, 0x0000, 0x0000, 0x0000, /* 5e */
+	0x1201, 0xFFFF, 0xFFFF, 0x0000, /* 66 */
+	0x0000, 0x0000, 0x0000, 0x0000, /* 6e */
+	0x0000, 0x0000, 0x0000, 0x0006, /* 76 */
+	0x0000, 0x0000, 0x0000, 0x0000, /* 7e */
+};
+
+static const char *stac9766_record_mux[] = {"Mic", "CD", "Video", "AUX", "Line", "Stereo Mix", "Mono Mix", "Phone"};
+static const char *stac9766_mono_mux[] = {"Mix", "Mic"};
+static const char *stac9766_mic_mux[] = {"Mic1", "Mic2"};
+static const char *stac9766_SPDIF_mux[] = {"PCM", "ADC Record"};
+static const char *stac9766_popbypass_mux[] = {"Normal", "Bypass Mixer"};
+static const char *stac9766_record_all_mux[] = {"All analog", "Analog plus DAC"};
+static const char *stac9766_boost1[] = {"0dB", "10dB"};
+static const char *stac9766_boost2[] = {"0dB", "20dB"};
+static const char *stac9766_stereo_mic[] = {"Off", "On"};
+
+static const struct soc_enum stac9766_record_enum =
+	SOC_ENUM_DOUBLE(AC97_REC_SEL, 8, 0, 8, stac9766_record_mux);
+static const struct soc_enum stac9766_mono_enum =
+	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 9, 2, stac9766_mono_mux);
+static const struct soc_enum stac9766_mic_enum =
+	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 8, 2, stac9766_mic_mux);
+static const struct soc_enum stac9766_SPDIF_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_DA_CONTROL, 1, 2, stac9766_SPDIF_mux);
+static const struct soc_enum stac9766_popbypass_enum =
+	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 15, 2, stac9766_popbypass_mux);
+static const struct soc_enum stac9766_record_all_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_ANALOG_SPECIAL, 12, 2, stac9766_record_all_mux);
+static const struct soc_enum stac9766_boost1_enum =
+	SOC_ENUM_SINGLE(AC97_MIC, 6, 2, stac9766_boost1); /* 0/10dB */
+static const struct soc_enum stac9766_boost2_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_ANALOG_SPECIAL, 2, 2, stac9766_boost2); /* 0/20dB */
+static const struct soc_enum stac9766_stereo_mic_enum =
+	SOC_ENUM_SINGLE(AC97_STAC_STEREO_MIC, 2, 1, stac9766_stereo_mic);
+
+static const DECLARE_TLV_DB_LINEAR(master_tlv, -4600, 0);
+static const DECLARE_TLV_DB_LINEAR(record_tlv, 0, 2250);
+static const DECLARE_TLV_DB_LINEAR(beep_tlv, -4500, 0);
+static const DECLARE_TLV_DB_LINEAR(mix_tlv, -3450, 1200);
+
+static const struct snd_kcontrol_new stac9766_snd_ac97_controls[] = {
+	SOC_DOUBLE_TLV("Speaker Volume", AC97_MASTER, 8, 0, 31, 1, master_tlv),
+	SOC_SINGLE("Speaker Switch", AC97_MASTER, 15, 1, 1),
+	SOC_DOUBLE_TLV("Headphone Volume", AC97_HEADPHONE, 8, 0, 31, 1, master_tlv),
+	SOC_SINGLE("Headphone Switch", AC97_HEADPHONE, 15, 1, 1),
+	SOC_SINGLE_TLV("Mono Out Volume", AC97_MASTER_MONO, 0, 31, 1, master_tlv),
+	SOC_SINGLE("Mono Out Switch", AC97_MASTER_MONO, 15, 1, 1),
+
+	SOC_DOUBLE_TLV("Record Volume", AC97_REC_GAIN, 8, 0, 15, 0, record_tlv),
+	SOC_SINGLE("Record Switch", AC97_REC_GAIN, 15, 1, 1),
+
+
+	SOC_SINGLE_TLV("Beep Volume", AC97_PC_BEEP, 1, 15, 1, beep_tlv),
+	SOC_SINGLE("Beep Switch", AC97_PC_BEEP, 15, 1, 1),
+	SOC_SINGLE("Beep Frequency", AC97_PC_BEEP, 5, 127, 1),
+	SOC_SINGLE_TLV("Phone Volume", AC97_PHONE, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Phone Switch", AC97_PHONE, 15, 1, 1),
+
+	SOC_ENUM("Mic Boost1", stac9766_boost1_enum),
+	SOC_ENUM("Mic Boost2", stac9766_boost2_enum),
+	SOC_SINGLE_TLV("Mic Volume", AC97_MIC, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Mic Switch", AC97_MIC, 15, 1, 1),
+	SOC_ENUM("Stereo Mic", stac9766_stereo_mic_enum),
+
+	SOC_DOUBLE_TLV("Line Volume", AC97_LINE, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Line Switch", AC97_LINE, 15, 1, 1),
+	SOC_DOUBLE_TLV("CD Volume", AC97_CD, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("CD Switch", AC97_CD, 15, 1, 1),
+	SOC_DOUBLE_TLV("AUX Volume", AC97_AUX, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("AUX Switch", AC97_AUX, 15, 1, 1),
+	SOC_DOUBLE_TLV("Video Volume", AC97_VIDEO, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("Video Switch", AC97_VIDEO, 15, 1, 1),
+
+	SOC_DOUBLE_TLV("DAC Volume", AC97_PCM, 8, 0, 31, 1, mix_tlv),
+	SOC_SINGLE("DAC Switch", AC97_PCM, 15, 1, 1),
+	SOC_SINGLE("Loopback Test Switch", AC97_GENERAL_PURPOSE, 7, 1, 0),
+	SOC_SINGLE("3D Volume", AC97_3D_CONTROL, 3, 2, 1),
+	SOC_SINGLE("3D Switch", AC97_GENERAL_PURPOSE, 13, 1, 0),
+
+	SOC_ENUM("SPDIF Mux", stac9766_SPDIF_enum),
+	SOC_ENUM("Mic1/2 Mux", stac9766_mic_enum),
+	SOC_ENUM("Record All Mux", stac9766_record_all_enum),
+	SOC_ENUM("Record Mux", stac9766_record_enum),
+	SOC_ENUM("Mono Mux", stac9766_mono_enum),
+	SOC_ENUM("Pop Bypass Mux", stac9766_popbypass_enum),
+};
+
+int stac9766_ac97_write(struct snd_soc_codec *codec, unsigned int reg,
+                                unsigned int val)
+{
+	u16 *cache = codec->reg_cache;
+
+	if (reg > AC97_STAC_PAGE0) {
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 0);
+		soc_ac97_ops.write(codec->ac97, reg, val);
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 1);
+		return 0;
+	}
+	if (reg / 2 > ARRAY_SIZE(stac9766_reg))
+		return -EIO;
+
+	soc_ac97_ops.write(codec->ac97, reg, val);
+	cache[reg / 2] = val;
+	return 0;
+}
+
+unsigned int stac9766_ac97_read(struct snd_soc_codec *codec, unsigned int reg)
+{
+	u16 val = 0, *cache = codec->reg_cache;
+
+	if (reg > AC97_STAC_PAGE0) {
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 0);
+		val = soc_ac97_ops.read(codec->ac97, reg - AC97_STAC_PAGE0);
+		stac9766_ac97_write(codec, AC97_INT_PAGING, 1);
+		return val;
+	}
+	if (reg / 2 > ARRAY_SIZE(stac9766_reg))
+		return -EIO;
+
+	if (reg == AC97_RESET || reg == AC97_GPIO_STATUS ||
+		reg == AC97_INT_PAGING || reg == AC97_VENDOR_ID1 ||
+		reg == AC97_VENDOR_ID2) {
+
+		val = soc_ac97_ops.read(codec->ac97, reg);
+		return val;
+	}
+	return cache[reg / 2];
+}
+
+static int ac97_analog_prepare(struct snd_pcm_substream *substream,
+                                struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	unsigned short reg, vra;
+
+	vra = stac9766_ac97_read(codec, AC97_EXTENDED_STATUS);
+
+	vra |= 0x1; /* enable variable rate audio */
+
+	stac9766_ac97_write(codec, AC97_EXTENDED_STATUS, vra);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		reg = AC97_PCM_FRONT_DAC_RATE;
+	else
+		reg = AC97_PCM_LR_ADC_RATE;
+
+	return stac9766_ac97_write(codec, reg, runtime->rate);
+}
+
+static int ac97_digital_prepare(struct snd_pcm_substream *substream,
+                                struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	unsigned short reg, vra;
+
+	stac9766_ac97_write(codec, AC97_SPDIF, 0x2002);
+
+	vra = stac9766_ac97_read(codec, AC97_EXTENDED_STATUS);
+	vra |= 0x5; /* Enable VRA and SPDIF out */
+
+	stac9766_ac97_write(codec, AC97_EXTENDED_STATUS, vra);
+
+	reg = AC97_PCM_FRONT_DAC_RATE;
+
+	return stac9766_ac97_write(codec, reg, runtime->rate);
+}
+
+static int ac97_digital_trigger(struct snd_pcm_substream *substream,
+								int cmd, struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	unsigned short vra;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_STOP:
+		vra = stac9766_ac97_read(codec, AC97_EXTENDED_STATUS);
+		vra &= !0x04;
+		stac9766_ac97_write(codec, AC97_EXTENDED_STATUS, vra);
+		break;
+	}
+	return 0;
+}
+
+static int stac9766_set_bias_level(struct snd_soc_codec *codec,
+                                enum snd_soc_bias_level level)
+{
+	switch (level) {
+	case SND_SOC_BIAS_ON: /* full On */
+	case SND_SOC_BIAS_PREPARE: /* partial On */
+	case SND_SOC_BIAS_STANDBY: /* Off, with power */
+		stac9766_ac97_write(codec, AC97_POWERDOWN, 0x0000);
+		break;
+	case SND_SOC_BIAS_OFF: /* Off, without power */
+		/* disable everything including AC link */
+		stac9766_ac97_write(codec, AC97_POWERDOWN, 0xffff);
+		break;
+	}
+	codec->bias_level = level;
+	return 0;
+}
+
+int stac9766_reset(struct snd_soc_codec *codec, int try_warm)
+{
+	if (try_warm && soc_ac97_ops.warm_reset) {
+		soc_ac97_ops.warm_reset(codec->ac97);
+		if (stac9766_ac97_read(codec, 0) == stac9766_reg[0])
+			return 1;
+	}
+
+	soc_ac97_ops.reset(codec->ac97);
+	if (soc_ac97_ops.warm_reset)
+		soc_ac97_ops.warm_reset(codec->ac97);
+	if (stac9766_ac97_read(codec, 0) != stac9766_reg[0])
+		return -EIO;
+	return 0;
+}
+
+static int stac9766_codec_suspend(struct platform_device *pdev,
+                                pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	stac9766_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int stac9766_codec_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 id, reset;
+
+	reset = 0;
+	/* give the codec an AC97 warm reset to start the link */
+reset:
+	if (reset > 5) {
+		printk(KERN_ERR "stac9766 failed to resume");
+		return -EIO;
+	}
+	codec->ac97->bus->ops->warm_reset(codec->ac97);
+	id = soc_ac97_ops.read(codec->ac97, AC97_VENDOR_ID2);
+	if (id != 0x4c13) {
+		stac9766_reset(codec, 0);
+		reset++;
+		goto reset;
+	}
+	stac9766_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	if (codec->suspend_bias_level == SND_SOC_BIAS_ON)
+		stac9766_set_bias_level(codec, SND_SOC_BIAS_ON);
+
+	return 0;
+}
+
+static struct snd_soc_dai_ops stac9766_dai_ops_analog =
+{
+	.prepare = ac97_analog_prepare,
+};
+
+static struct snd_soc_dai_ops stac9766_dai_ops_digital =
+{
+	.prepare = ac97_digital_prepare,
+	.trigger = ac97_digital_trigger,
+};
+
+struct snd_soc_dai stac9766_dai[] = {
+{
+	.name = "stac9766 analog",
+	.id = 0,
+	.ac97_control = 1,
+
+	/* stream cababilities */
+	.playback = {
+		.stream_name = "stac9766 analog",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000_48000,
+		.formats = SND_SOC_STD_AC97_FMTS,
+	},
+	.capture = {
+		.stream_name = "stac9766 analog",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_8000_48000,
+		.formats = SND_SOC_STD_AC97_FMTS,
+	},
+	/* alsa ops */
+	.ops = &stac9766_dai_ops_analog,
+},
+{
+	.name = "stac9766 IEC958",
+	.id = 1,
+	.ac97_control = 1,
+
+	/* stream cababilities */
+	.playback = {
+		.stream_name = "stac9766 IEC958",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = SNDRV_PCM_RATE_32000 | \
+			SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE,
+	},
+	/* alsa ops */
+	.ops = &stac9766_dai_ops_digital,
+}};
+EXPORT_SYMBOL_GPL(stac9766_dai);
+
+static int stac9766_codec_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	int ret = 0;
+
+	printk(KERN_INFO "STAC9766 SoC Audio Codec %s\n", STAC9766_VERSION);
+
+	socdev->card->codec = kzalloc(sizeof(struct snd_soc_codec), GFP_KERNEL);
+	if (socdev->card->codec == NULL)
+		return -ENOMEM;
+	codec = socdev->card->codec;
+	mutex_init(&codec->mutex);
+
+	codec->reg_cache = kmemdup(stac9766_reg, sizeof(stac9766_reg), GFP_KERNEL);
+	if (codec->reg_cache == NULL) {
+		ret = -ENOMEM;
+		goto cache_err;
+	}
+	codec->reg_cache_size = sizeof(stac9766_reg);
+	codec->reg_cache_step = 2;
+
+	codec->name = "STAC9766";
+	codec->owner = THIS_MODULE;
+	codec->dai = stac9766_dai;
+	codec->num_dai = ARRAY_SIZE(stac9766_dai);
+	codec->write = stac9766_ac97_write;
+	codec->read = stac9766_ac97_read;
+	codec->set_bias_level = stac9766_set_bias_level;
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	ret = snd_soc_new_ac97_codec(codec, &soc_ac97_ops, 0);
+	if (ret < 0)
+		goto codec_err;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0)
+		goto pcm_err;
+
+	/* do a cold reset for the controller and then try
+	 * a warm reset followed by an optional cold reset for codec */
+	stac9766_reset(codec, 0);
+	ret = stac9766_reset(codec, 1);
+	if (ret < 0) {
+		printk(KERN_ERR "Failed to reset STAC9766: AC97 link error\n");
+		goto reset_err;
+	}
+
+	stac9766_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	snd_soc_add_controls(codec, stac9766_snd_ac97_controls, ARRAY_SIZE(
+	                                stac9766_snd_ac97_controls));
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0)
+		goto reset_err;
+	return 0;
+
+reset_err:
+	snd_soc_free_pcms(socdev);
+pcm_err:
+	snd_soc_free_ac97_codec(codec);
+codec_err:
+	kfree(codec->private_data);
+cache_err:
+	kfree(socdev->card->codec);
+	socdev->card->codec = NULL;
+	return ret;
+}
+
+static int stac9766_codec_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	if (codec == NULL)
+		return 0;
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_free_ac97_codec(codec);
+	kfree(codec->reg_cache);
+	kfree(codec);
+	return 0;
+}
+
+struct snd_soc_codec_device soc_codec_dev_stac9766 =
+{
+	.probe = stac9766_codec_probe,
+	.remove = stac9766_codec_remove,
+	.suspend = stac9766_codec_suspend,
+	.resume = stac9766_codec_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_stac9766);
+
+static int __init stac9766_modinit(void)
+{
+	return snd_soc_register_dais(stac9766_dai, ARRAY_SIZE(stac9766_dai));
+}
+module_init(stac9766_modinit);
+
+static void __exit stac9766_exit(void)
+{
+	snd_soc_unregister_dais(stac9766_dai, ARRAY_SIZE(stac9766_dai));
+}
+module_exit(stac9766_exit);
+
+MODULE_DESCRIPTION("ASoC stac9766 driver");
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/stac9766.h b/sound/soc/codecs/stac9766.h
new file mode 100644
index 0000000..65642eb
--- /dev/null
+++ b/sound/soc/codecs/stac9766.h
@@ -0,0 +1,21 @@
+/*
+ * stac9766.h  --  STAC9766 Soc Audio driver
+ */
+
+#ifndef _STAC9766_H
+#define _STAC9766_H
+
+#define AC97_STAC_PAGE0 0x1000
+#define AC97_STAC_DA_CONTROL (AC97_STAC_PAGE0 | 0x6A)
+#define AC97_STAC_ANALOG_SPECIAL (AC97_STAC_PAGE0 | 0x6E)
+#define AC97_STAC_STEREO_MIC 0x78
+
+/* STAC9766 DAI ID's */
+#define STAC9766_DAI_AC97_ANALOG		0
+#define STAC9766_DAI_AC97_DIGITAL		1
+
+extern struct snd_soc_dai stac9766_dai[];
+extern struct snd_soc_codec_device soc_codec_dev_stac9766;
+
+
+#endif
-- 
cgit v0.10.2


From 05e1efa2deb42b1bd548208e5c43f471e2cf0da1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 24 May 2009 13:32:24 +0100
Subject: ASoC: Fix minor issues in STAC9766 driver

Fairly minor issues:
 - Don't register the DAIs, it's not required for AC97 devices.
 - Make unexported functions static.
 - Wrap some excessively long lines.
 - Undo tab/space breakage.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c
index 7740cd5..8ad4b7b 100644
--- a/sound/soc/codecs/stac9766.c
+++ b/sound/soc/codecs/stac9766.c
@@ -52,12 +52,14 @@ static const u16 stac9766_reg[] = {
 	0x0000, 0x0000, 0x0000, 0x0000, /* 7e */
 };
 
-static const char *stac9766_record_mux[] = {"Mic", "CD", "Video", "AUX", "Line", "Stereo Mix", "Mono Mix", "Phone"};
+static const char *stac9766_record_mux[] = {"Mic", "CD", "Video", "AUX",
+			"Line", "Stereo Mix", "Mono Mix", "Phone"};
 static const char *stac9766_mono_mux[] = {"Mix", "Mic"};
 static const char *stac9766_mic_mux[] = {"Mic1", "Mic2"};
 static const char *stac9766_SPDIF_mux[] = {"PCM", "ADC Record"};
 static const char *stac9766_popbypass_mux[] = {"Normal", "Bypass Mixer"};
-static const char *stac9766_record_all_mux[] = {"All analog", "Analog plus DAC"};
+static const char *stac9766_record_all_mux[] = {"All analog",
+	"Analog plus DAC"};
 static const char *stac9766_boost1[] = {"0dB", "10dB"};
 static const char *stac9766_boost2[] = {"0dB", "20dB"};
 static const char *stac9766_stereo_mic[] = {"Off", "On"};
@@ -73,7 +75,8 @@ static const struct soc_enum stac9766_SPDIF_enum =
 static const struct soc_enum stac9766_popbypass_enum =
 	SOC_ENUM_SINGLE(AC97_GENERAL_PURPOSE, 15, 2, stac9766_popbypass_mux);
 static const struct soc_enum stac9766_record_all_enum =
-	SOC_ENUM_SINGLE(AC97_STAC_ANALOG_SPECIAL, 12, 2, stac9766_record_all_mux);
+	SOC_ENUM_SINGLE(AC97_STAC_ANALOG_SPECIAL, 12, 2,
+			stac9766_record_all_mux);
 static const struct soc_enum stac9766_boost1_enum =
 	SOC_ENUM_SINGLE(AC97_MIC, 6, 2, stac9766_boost1); /* 0/10dB */
 static const struct soc_enum stac9766_boost2_enum =
@@ -89,9 +92,11 @@ static const DECLARE_TLV_DB_LINEAR(mix_tlv, -3450, 1200);
 static const struct snd_kcontrol_new stac9766_snd_ac97_controls[] = {
 	SOC_DOUBLE_TLV("Speaker Volume", AC97_MASTER, 8, 0, 31, 1, master_tlv),
 	SOC_SINGLE("Speaker Switch", AC97_MASTER, 15, 1, 1),
-	SOC_DOUBLE_TLV("Headphone Volume", AC97_HEADPHONE, 8, 0, 31, 1, master_tlv),
+	SOC_DOUBLE_TLV("Headphone Volume", AC97_HEADPHONE, 8, 0, 31, 1,
+		       master_tlv),
 	SOC_SINGLE("Headphone Switch", AC97_HEADPHONE, 15, 1, 1),
-	SOC_SINGLE_TLV("Mono Out Volume", AC97_MASTER_MONO, 0, 31, 1, master_tlv),
+	SOC_SINGLE_TLV("Mono Out Volume", AC97_MASTER_MONO, 0, 31, 1,
+		       master_tlv),
 	SOC_SINGLE("Mono Out Switch", AC97_MASTER_MONO, 15, 1, 1),
 
 	SOC_DOUBLE_TLV("Record Volume", AC97_REC_GAIN, 8, 0, 15, 0, record_tlv),
@@ -133,8 +138,8 @@ static const struct snd_kcontrol_new stac9766_snd_ac97_controls[] = {
 	SOC_ENUM("Pop Bypass Mux", stac9766_popbypass_enum),
 };
 
-int stac9766_ac97_write(struct snd_soc_codec *codec, unsigned int reg,
-                                unsigned int val)
+static int stac9766_ac97_write(struct snd_soc_codec *codec, unsigned int reg,
+			       unsigned int val)
 {
 	u16 *cache = codec->reg_cache;
 
@@ -152,7 +157,8 @@ int stac9766_ac97_write(struct snd_soc_codec *codec, unsigned int reg,
 	return 0;
 }
 
-unsigned int stac9766_ac97_read(struct snd_soc_codec *codec, unsigned int reg)
+static unsigned int stac9766_ac97_read(struct snd_soc_codec *codec,
+				       unsigned int reg)
 {
 	u16 val = 0, *cache = codec->reg_cache;
 
@@ -176,7 +182,7 @@ unsigned int stac9766_ac97_read(struct snd_soc_codec *codec, unsigned int reg)
 }
 
 static int ac97_analog_prepare(struct snd_pcm_substream *substream,
-                                struct snd_soc_dai *dai)
+			       struct snd_soc_dai *dai)
 {
 	struct snd_soc_codec *codec = dai->codec;
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -197,7 +203,7 @@ static int ac97_analog_prepare(struct snd_pcm_substream *substream,
 }
 
 static int ac97_digital_prepare(struct snd_pcm_substream *substream,
-                                struct snd_soc_dai *dai)
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_codec *codec = dai->codec;
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -216,7 +222,7 @@ static int ac97_digital_prepare(struct snd_pcm_substream *substream,
 }
 
 static int ac97_digital_trigger(struct snd_pcm_substream *substream,
-								int cmd, struct snd_soc_dai *dai)
+				int cmd, struct snd_soc_dai *dai)
 {
 	struct snd_soc_codec *codec = dai->codec;
 	unsigned short vra;
@@ -232,7 +238,7 @@ static int ac97_digital_trigger(struct snd_pcm_substream *substream,
 }
 
 static int stac9766_set_bias_level(struct snd_soc_codec *codec,
-                                enum snd_soc_bias_level level)
+				   enum snd_soc_bias_level level)
 {
 	switch (level) {
 	case SND_SOC_BIAS_ON: /* full On */
@@ -249,7 +255,7 @@ static int stac9766_set_bias_level(struct snd_soc_codec *codec,
 	return 0;
 }
 
-int stac9766_reset(struct snd_soc_codec *codec, int try_warm)
+static int stac9766_reset(struct snd_soc_codec *codec, int try_warm)
 {
 	if (try_warm && soc_ac97_ops.warm_reset) {
 		soc_ac97_ops.warm_reset(codec->ac97);
@@ -266,7 +272,7 @@ int stac9766_reset(struct snd_soc_codec *codec, int try_warm)
 }
 
 static int stac9766_codec_suspend(struct platform_device *pdev,
-                                pm_message_t state)
+				  pm_message_t state)
 {
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
 	struct snd_soc_codec *codec = socdev->card->codec;
@@ -303,13 +309,11 @@ reset:
 	return 0;
 }
 
-static struct snd_soc_dai_ops stac9766_dai_ops_analog =
-{
+static struct snd_soc_dai_ops stac9766_dai_ops_analog = {
 	.prepare = ac97_analog_prepare,
 };
 
-static struct snd_soc_dai_ops stac9766_dai_ops_digital =
-{
+static struct snd_soc_dai_ops stac9766_dai_ops_digital = {
 	.prepare = ac97_digital_prepare,
 	.trigger = ac97_digital_trigger,
 };
@@ -354,7 +358,8 @@ struct snd_soc_dai stac9766_dai[] = {
 	},
 	/* alsa ops */
 	.ops = &stac9766_dai_ops_digital,
-}};
+}
+};
 EXPORT_SYMBOL_GPL(stac9766_dai);
 
 static int stac9766_codec_probe(struct platform_device *pdev)
@@ -371,7 +376,8 @@ static int stac9766_codec_probe(struct platform_device *pdev)
 	codec = socdev->card->codec;
 	mutex_init(&codec->mutex);
 
-	codec->reg_cache = kmemdup(stac9766_reg, sizeof(stac9766_reg), GFP_KERNEL);
+	codec->reg_cache = kmemdup(stac9766_reg, sizeof(stac9766_reg),
+				   GFP_KERNEL);
 	if (codec->reg_cache == NULL) {
 		ret = -ENOMEM;
 		goto cache_err;
@@ -409,8 +415,8 @@ static int stac9766_codec_probe(struct platform_device *pdev)
 
 	stac9766_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
-	snd_soc_add_controls(codec, stac9766_snd_ac97_controls, ARRAY_SIZE(
-	                                stac9766_snd_ac97_controls));
+	snd_soc_add_controls(codec, stac9766_snd_ac97_controls,
+			     ARRAY_SIZE(stac9766_snd_ac97_controls));
 
 	ret = snd_soc_init_card(socdev);
 	if (ret < 0)
@@ -444,8 +450,7 @@ static int stac9766_codec_remove(struct platform_device *pdev)
 	return 0;
 }
 
-struct snd_soc_codec_device soc_codec_dev_stac9766 =
-{
+struct snd_soc_codec_device soc_codec_dev_stac9766 = {
 	.probe = stac9766_codec_probe,
 	.remove = stac9766_codec_remove,
 	.suspend = stac9766_codec_suspend,
@@ -453,18 +458,6 @@ struct snd_soc_codec_device soc_codec_dev_stac9766 =
 };
 EXPORT_SYMBOL_GPL(soc_codec_dev_stac9766);
 
-static int __init stac9766_modinit(void)
-{
-	return snd_soc_register_dais(stac9766_dai, ARRAY_SIZE(stac9766_dai));
-}
-module_init(stac9766_modinit);
-
-static void __exit stac9766_exit(void)
-{
-	snd_soc_unregister_dais(stac9766_dai, ARRAY_SIZE(stac9766_dai));
-}
-module_exit(stac9766_exit);
-
 MODULE_DESCRIPTION("ASoC stac9766 driver");
 MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 93574844bc3906941b89d6b6f72e01e87413f3c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= <ozan@pardus.org.tr>
Date: Sat, 23 May 2009 15:00:04 +0300
Subject: ALSA: hda - Add forced codec-slots for ASUS W5Fm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ASUS W5Fm needs the fixed codec-slots to probe to override the BIOS
problem like W5F.

Tested-by: Alp Kılıç <kilic.alp@gmail.com>
Signed-off-by: Ozan Çağlayan <ozan@pardus.org.tr>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 21e99cf..3128e1a 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2141,6 +2141,7 @@ static struct snd_pci_quirk probe_mask_list[] __devinitdata = {
 	/* including bogus ALC268 in slot#2 that conflicts with ALC888 */
 	SND_PCI_QUIRK(0x17c0, 0x4085, "Medion MD96630", 0x01),
 	/* forced codec slots */
+	SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103),
 	SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103),
 	{}
 };
-- 
cgit v0.10.2


From 679d92ed1403b6cf9a19aa42ec62b81cae1aa017 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 24 May 2009 19:00:08 +0200
Subject: ALSA: hda - Add 5stack-no-fp model for STAC927x

The recent fix for the headphone volume control on IDT/STAC codecs
resulted in the removal of invalid "Side" volume eventually.  But,
if the front panel doesn't exist, this setup could be regarded as a
sort of regression, as reported in kernel bug #13250.

Now as a workaround, a new model 5stack-no-fp is added so that the user
without the front panel can choose this one explicitly.

Reference: bko#13250
	http://bugzilla.kernel.org/show_bug.cgi?id=13250

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 8eec05b..322869f 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -334,6 +334,7 @@ STAC9227/9228/9229/927x
   ref-no-jd	Reference board without HP/Mic jack detection
   3stack	D965 3stack
   5stack	D965 5stack + SPDIF
+  5stack-no-fp	D965 5stack without front panel
   dell-3stack	Dell Dimension E520
   dell-bios	Fixes with Dell BIOS setup
   auto		BIOS setup (default)
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 03b3646..d2fd8ef 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -150,6 +150,7 @@ enum {
 	STAC_D965_REF,
 	STAC_D965_3ST,
 	STAC_D965_5ST,
+	STAC_D965_5ST_NO_FP,
 	STAC_DELL_3ST,
 	STAC_DELL_BIOS,
 	STAC_927X_MODELS
@@ -2154,6 +2155,13 @@ static unsigned int d965_5st_pin_configs[14] = {
 	0x40000100, 0x40000100
 };
 
+static unsigned int d965_5st_no_fp_pin_configs[14] = {
+	0x40000100, 0x40000100, 0x0181304e, 0x01014010,
+	0x01a19040, 0x01011012, 0x01016011, 0x40000100,
+	0x40000100, 0x40000100, 0x40000100, 0x01442070,
+	0x40000100, 0x40000100
+};
+
 static unsigned int dell_3st_pin_configs[14] = {
 	0x02211230, 0x02a11220, 0x01a19040, 0x01114210,
 	0x01111212, 0x01116211, 0x01813050, 0x01112214,
@@ -2166,6 +2174,7 @@ static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = {
 	[STAC_D965_REF]  = ref927x_pin_configs,
 	[STAC_D965_3ST]  = d965_3st_pin_configs,
 	[STAC_D965_5ST]  = d965_5st_pin_configs,
+	[STAC_D965_5ST_NO_FP]  = d965_5st_no_fp_pin_configs,
 	[STAC_DELL_3ST]  = dell_3st_pin_configs,
 	[STAC_DELL_BIOS] = NULL,
 };
@@ -2176,6 +2185,7 @@ static const char *stac927x_models[STAC_927X_MODELS] = {
 	[STAC_D965_REF]		= "ref",
 	[STAC_D965_3ST]		= "3stack",
 	[STAC_D965_5ST]		= "5stack",
+	[STAC_D965_5ST_NO_FP]	= "5stack-no-fp",
 	[STAC_DELL_3ST]		= "dell-3stack",
 	[STAC_DELL_BIOS]	= "dell-bios",
 };
-- 
cgit v0.10.2


From 657cafa6b0f5296424d6f43f6f6eeb4a3222117e Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Sun, 24 May 2009 15:30:48 +0200
Subject: Use a format for linux_banner

There is no format specifiers left in the linux_banner, and gcc-4.3
complains seeing the printk.

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/main.c b/init/main.c
index 3bbf93b..d721dad 100644
--- a/init/main.c
+++ b/init/main.c
@@ -566,8 +566,7 @@ asmlinkage void __init start_kernel(void)
 	tick_init();
 	boot_cpu_init();
 	page_address_init();
-	printk(KERN_NOTICE);
-	printk(linux_banner);
+	printk(KERN_NOTICE "%s", linux_banner);
 	setup_arch(&command_line);
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
-- 
cgit v0.10.2


From 34e559eabf57d07c55a57e9455c1df2e6a208d57 Mon Sep 17 00:00:00 2001
From: Sergey Lapin <slapin@ossfans.org>
Date: Fri, 8 May 2009 15:58:57 +0100
Subject: [ARM] 5514/1: AFEB9260 sound support

ASoC driver for AT91SAM9260-based AFEB9260 board

Signed-off-by: Sergey Lapin <slapin@ossfans.org>
Signed-off-by: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index e263fda..e3f1c56 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -164,6 +164,8 @@ static struct at91_mmc_data __initdata afeb9260_mmc_data = {
 
 static struct i2c_board_info __initdata afeb9260_i2c_devices[] = {
 	{
+		I2C_BOARD_INFO("tlv320aic23", 0x1a),
+	}, {
 		I2C_BOARD_INFO("fm3130", 0x68),
 	}, {
 		I2C_BOARD_INFO("24c64", 0x50),
@@ -196,6 +198,8 @@ static void __init afeb9260_board_init(void)
 	/* I2C */
 	at91_add_device_i2c(afeb9260_i2c_devices,
 			ARRAY_SIZE(afeb9260_i2c_devices));
+	/* Audio */
+	at91_add_device_ssc(AT91SAM9260_ID_SSC, ATMEL_SSC_TX);
 }
 
 MACHINE_START(AFEB9260, "Custom afeb9260 board")
-- 
cgit v0.10.2


From 6cb38c5f5d7dda38466389082d0d965188c41b81 Mon Sep 17 00:00:00 2001
From: Ryan Mallon <ryan@bluewatersys.com>
Date: Thu, 21 May 2009 22:11:52 +0100
Subject: [ARM] 5523/1: ep93xx phys offset selection

This patch adds a Kconfig option to select between ep93xx boards which
boot from the SDRAM bank at 0x00000000 (SDCE3/SyncBoot) and those which
boot from 0xc0000000 (SDCE0). This corrects a problem which causes
invalid images to be built for boards which boot from 0xc0000000.

Signed-off-by: Ryan Mallon <ryan@bluewatersys.com>
Acked-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/Kconfig b/arch/arm/mach-ep93xx/Kconfig
index 9414eb6..d7291c6 100644
--- a/arch/arm/mach-ep93xx/Kconfig
+++ b/arch/arm/mach-ep93xx/Kconfig
@@ -9,18 +9,37 @@ config CRUNCH
 
 comment "EP93xx Platforms"
 
+choice
+	prompt "EP93xx first SDRAM bank selection"
+	default EP93XX_SDCE3_SYNC_PHYS_OFFSET
+
+config EP93XX_SDCE3_SYNC_PHYS_OFFSET
+	bool "0x00000000 - SDCE3/SyncBoot"
+	help
+	  Select this option if you want support for EP93xx boards with the
+	  first SDRAM bank at 0x00000000
+
+config EP93XX_SDCE0_PHYS_OFFSET
+	bool "0xc0000000 - SDCEO"
+	help
+	  Select this option if you want support for EP93xx boards with the
+	  first SDRAM bank at 0xc0000000
+
+endchoice
+
 config MACH_ADSSPHERE
 	bool "Support ADS Sphere"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	help
 	  Say 'Y' here if you want your kernel to support the ADS
 	  Sphere board.
 
 config MACH_EDB93XX
 	bool
-	default n
 
 config MACH_EDB9301
 	bool "Support Cirrus Logic EDB9301"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -28,6 +47,7 @@ config MACH_EDB9301
 
 config MACH_EDB9302
 	bool "Support Cirrus Logic EDB9302"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -35,6 +55,7 @@ config MACH_EDB9302
 
 config MACH_EDB9302A
 	bool "Support Cirrus Logic EDB9302A"
+	depends on EP93XX_SDCE0_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -42,6 +63,7 @@ config MACH_EDB9302A
 
 config MACH_EDB9307
 	bool "Support Cirrus Logic EDB9307"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -49,6 +71,7 @@ config MACH_EDB9307
 
 config MACH_EDB9307A
 	bool "Support Cirrus Logic EDB9307A"
+	depends on EP93XX_SDCE0_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -56,6 +79,7 @@ config MACH_EDB9307A
 
 config MACH_EDB9312
 	bool "Support Cirrus Logic EDB9312"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -63,6 +87,7 @@ config MACH_EDB9312
 
 config MACH_EDB9315
 	bool "Support Cirrus Logic EDB9315"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
@@ -70,12 +95,14 @@ config MACH_EDB9315
 
 config MACH_EDB9315A
 	bool "Support Cirrus Logic EDB9315A"
+	depends on EP93XX_SDCE0_PHYS_OFFSET
 	select MACH_EDB93XX
 	help
 	  Say 'Y' here if you want your kernel to support the Cirrus
 	  Logic EDB9315A Evaluation Board.
 
 config MACH_GESBC9312
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	bool "Support Glomation GESBC-9312-sx"
 	help
 	  Say 'Y' here if you want your kernel to support the Glomation
@@ -83,10 +110,10 @@ config MACH_GESBC9312
 
 config MACH_MICRO9
 	bool
-	default n
 
 config MACH_MICRO9H
 	bool "Support Contec Hypercontrol Micro9-H"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_MICRO9
 	help
 	  Say 'Y' here if you want your kernel to support the
@@ -94,6 +121,7 @@ config MACH_MICRO9H
 
 config MACH_MICRO9M
 	bool "Support Contec Hypercontrol Micro9-M"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_MICRO9
 	help
 	  Say 'Y' here if you want your kernel to support the
@@ -101,6 +129,7 @@ config MACH_MICRO9M
 
 config MACH_MICRO9L
 	bool "Support Contec Hypercontrol Micro9-L"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	select MACH_MICRO9
 	help
 	  Say 'Y' here if you want your kernel to support the
@@ -108,6 +137,7 @@ config MACH_MICRO9L
 
 config MACH_TS72XX
 	bool "Support Technologic Systems TS-72xx SBC"
+	depends on EP93XX_SDCE3_SYNC_PHYS_OFFSET
 	help
 	  Say 'Y' here if you want your kernel to support the
 	  Technologic Systems TS-72xx board.
diff --git a/arch/arm/mach-ep93xx/Makefile.boot b/arch/arm/mach-ep93xx/Makefile.boot
index d5561ad..27a085a 100644
--- a/arch/arm/mach-ep93xx/Makefile.boot
+++ b/arch/arm/mach-ep93xx/Makefile.boot
@@ -1,2 +1,5 @@
-   zreladdr-y	:= 0x00008000
-params_phys-y	:= 0x00000100
+   zreladdr-$(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)	:= 0x00008000
+params_phys-$(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)	:= 0x00000100
+
+   zreladdr-$(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)		:= 0xc0008000
+params_phys-$(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)		:= 0xc0000100
diff --git a/arch/arm/mach-ep93xx/include/mach/memory.h b/arch/arm/mach-ep93xx/include/mach/memory.h
index 5c80c3c..925b12e 100644
--- a/arch/arm/mach-ep93xx/include/mach/memory.h
+++ b/arch/arm/mach-ep93xx/include/mach/memory.h
@@ -5,6 +5,12 @@
 #ifndef __ASM_ARCH_MEMORY_H
 #define __ASM_ARCH_MEMORY_H
 
+#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)
 #define PHYS_OFFSET		UL(0x00000000)
+#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)
+#define PHYS_OFFSET		UL(0xc0000000)
+#else
+#error "Kconfig bug: No EP93xx PHYS_OFFSET set"
+#endif
 
 #endif
-- 
cgit v0.10.2


From 6b5ca05d9590adc94620a83c5eafed28435e065f Mon Sep 17 00:00:00 2001
From: Ryan Mallon <ryan@bluewatersys.com>
Date: Thu, 21 May 2009 22:16:18 +0100
Subject: [ARM] 5523/2: Updated ep93xx defconfig

Updated defconfig for ep93xx. By default, support for all boards which
boot from 0x00000000 (SDCE3/SyncBoot) is included. Also updated the
defconfig to use EABI for building the kernel.

Signed-off-by: Ryan Mallon <ryan@bluewatersys.com>
Acked-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig
index 3f89d5f..3fb083b 100644
--- a/arch/arm/configs/ep93xx_defconfig
+++ b/arch/arm/configs/ep93xx_defconfig
@@ -1,12 +1,19 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.20-rc1
-# Sat Dec 16 06:05:24 2006
+# Linux kernel version: 2.6.30-rc3
+# Tue May 19 12:26:49 2009
 #
 CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
 # CONFIG_GENERIC_TIME is not set
+# CONFIG_GENERIC_CLOCKEVENTS is not set
 CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 CONFIG_HARDIRQS_SW_RESEND=y
 CONFIG_GENERIC_IRQ_PROBE=y
@@ -15,42 +22,54 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_VECTORS_BASE=0xffff0000
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
-# Code maturity level options
+# General setup
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
+CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
 CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
 # CONFIG_RELAY is not set
-CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -58,31 +77,38 @@ CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
-CONFIG_SLAB=y
+CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-
-#
-# Block layer
-#
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
 
 #
 # IO Schedulers
@@ -96,6 +122,7 @@ CONFIG_DEFAULT_DEADLINE=y
 # CONFIG_DEFAULT_CFQ is not set
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="deadline"
+# CONFIG_FREEZER is not set
 
 #
 # System Type
@@ -105,29 +132,40 @@ CONFIG_DEFAULT_IOSCHED="deadline"
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
 # CONFIG_ARCH_AT91 is not set
-# CONFIG_ARCH_CLPS7500 is not set
 # CONFIG_ARCH_CLPS711X is not set
-# CONFIG_ARCH_CO285 is not set
+# CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 CONFIG_ARCH_EP93XX=y
 # CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_MXC is not set
 # CONFIG_ARCH_NETX is not set
 # CONFIG_ARCH_H720X is not set
 # CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
 # CONFIG_ARCH_IOP32X is not set
 # CONFIG_ARCH_IOP33X is not set
-# CONFIG_ARCH_IOP13XX is not set
-# CONFIG_ARCH_IXP4XX is not set
-# CONFIG_ARCH_IXP2000 is not set
 # CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
 # CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_W90X900 is not set
 # CONFIG_ARCH_PNX4008 is not set
 # CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MSM is not set
 # CONFIG_ARCH_RPC is not set
 # CONFIG_ARCH_SA1100 is not set
 # CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
 
 #
@@ -138,14 +176,24 @@ CONFIG_CRUNCH=y
 #
 # EP93xx Platforms
 #
+# CONFIG_EP93XX_SDCE0_PHYS_OFFSET is not set
+CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET=y
 CONFIG_MACH_ADSSPHERE=y
+CONFIG_MACH_EDB93XX=y
+CONFIG_MACH_EDB9301=y
 CONFIG_MACH_EDB9302=y
-CONFIG_MACH_EDB9302A=y
+CONFIG_MACH_EDB9307=y
 CONFIG_MACH_EDB9312=y
 CONFIG_MACH_EDB9315=y
-CONFIG_MACH_EDB9315A=y
 CONFIG_MACH_GESBC9312=y
+CONFIG_MACH_MICRO9=y
+CONFIG_MACH_MICRO9H=y
+CONFIG_MACH_MICRO9M=y
+CONFIG_MACH_MICRO9L=y
 CONFIG_MACH_TS72XX=y
+CONFIG_EP93XX_EARLY_UART1=y
+# CONFIG_EP93XX_EARLY_UART2 is not set
+# CONFIG_EP93XX_EARLY_UART3 is not set
 
 #
 # Processor Type
@@ -154,6 +202,7 @@ CONFIG_CPU_32=y
 CONFIG_CPU_ARM920T=y
 CONFIG_CPU_32v4T=y
 CONFIG_CPU_ABRT_EV4T=y
+CONFIG_CPU_PABRT_NOIFAR=y
 CONFIG_CPU_CACHE_V4WT=y
 CONFIG_CPU_CACHE_VIVT=y
 CONFIG_CPU_COPY_V4WB=y
@@ -168,34 +217,47 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_ICACHE_DISABLE is not set
 # CONFIG_CPU_DCACHE_DISABLE is not set
 # CONFIG_CPU_DCACHE_WRITETHROUGH is not set
+# CONFIG_OUTER_CACHE is not set
 CONFIG_ARM_VIC=y
+CONFIG_COMMON_CLKDEV=y
 
 #
 # Bus support
 #
 CONFIG_ARM_AMBA=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
 # CONFIG_PCCARD is not set
 
 #
 # Kernel Features
 #
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
 # CONFIG_PREEMPT is not set
 CONFIG_HZ=100
-# CONFIG_AEABI is not set
-# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4096
-# CONFIG_RESOURCES_64BIT is not set
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_ALIGNMENT_TRAP=y
 
 #
@@ -205,6 +267,12 @@ CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_CMDLINE="console=ttyAM0,115200 root=/dev/nfs ip=bootp"
 # CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_IDLE is not set
 
 #
 # Floating point emulation
@@ -221,32 +289,31 @@ CONFIG_FPE_NWFPE_XP=y
 # Userspace binary formats
 #
 CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
 # CONFIG_BINFMT_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
-# CONFIG_ARTHUR is not set
 
 #
 # Power management options
 #
 # CONFIG_PM is not set
-# CONFIG_APM is not set
-
-#
-# Networking
-#
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
 CONFIG_NET=y
 
 #
 # Networking options
 #
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 # CONFIG_XFRM_USER is not set
 # CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
 CONFIG_NET_KEY=y
+# CONFIG_NET_KEY_MIGRATE is not set
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
@@ -267,6 +334,7 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_XFRM_MODE_TRANSPORT=y
 CONFIG_INET_XFRM_MODE_TUNNEL=y
 CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
@@ -276,6 +344,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 CONFIG_IPV6=y
 # CONFIG_IPV6_PRIVACY is not set
 # CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
 # CONFIG_INET6_AH is not set
 # CONFIG_INET6_ESP is not set
 # CONFIG_INET6_IPCOMP is not set
@@ -289,25 +358,15 @@ CONFIG_IPV6=y
 # CONFIG_IPV6_SIT is not set
 # CONFIG_IPV6_TUNNEL is not set
 # CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
 # CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
 # CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
 # CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
 # CONFIG_VLAN_8021Q is not set
 # CONFIG_DECNET is not set
 # CONFIG_LLC2 is not set
@@ -317,20 +376,28 @@ CONFIG_IPV6=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
+# CONFIG_PHONET is not set
 # CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
 
 #
 # Network testing
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_WIRELESS=y
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
 
 #
 # Device Drivers
@@ -339,41 +406,39 @@ CONFIG_IPV6=y
 #
 # Generic Driver Options
 #
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 # CONFIG_FW_LOADER is not set
 # CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
 # CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
 # CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
 CONFIG_MTD=y
 # CONFIG_MTD_DEBUG is not set
 CONFIG_MTD_CONCAT=y
 CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
 CONFIG_MTD_REDBOOT_PARTS=y
 CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
 # CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
 # CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
 CONFIG_MTD_CMDLINE_PARTS=y
 # CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
 
 #
 # User Modules And Translation Layers
 #
 CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_BLOCK=y
 # CONFIG_FTL is not set
 # CONFIG_NFTL is not set
 # CONFIG_INFTL is not set
 # CONFIG_RFD_FTL is not set
 # CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
 
 #
 # RAM/ROM/Flash chip drivers
@@ -404,16 +469,13 @@ CONFIG_MTD_CFI_UTIL=y
 # CONFIG_MTD_RAM is not set
 CONFIG_MTD_ROM=y
 # CONFIG_MTD_ABSENT is not set
-# CONFIG_MTD_OBSOLETE_CHIPS is not set
 
 #
 # Mapping drivers for chip access
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_PHYSMAP_START=0x0
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=1
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_ARM_INTEGRATOR is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -431,49 +493,58 @@ CONFIG_MTD_PHYSMAP_BANKWIDTH=1
 # CONFIG_MTD_DOC2000 is not set
 # CONFIG_MTD_DOC2001 is not set
 # CONFIG_MTD_DOC2001PLUS is not set
-
-#
-# NAND Flash Device Drivers
-#
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_VERIFY_WRITE=y
 # CONFIG_MTD_NAND_ECC_SMC is not set
-CONFIG_MTD_NAND_TS7250=y
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+# CONFIG_MTD_NAND_GPIO is not set
+# CONFIG_MTD_NAND_TS7250 is not set
 CONFIG_MTD_NAND_IDS=y
 # CONFIG_MTD_NAND_DISKONCHIP is not set
 # CONFIG_MTD_NAND_NANDSIM is not set
-
-#
-# OneNAND Flash Device Drivers
-#
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ALAUDA is not set
 # CONFIG_MTD_ONENAND is not set
 
 #
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
+# LPDDR flash memory drivers
 #
+# CONFIG_MTD_LPDDR is not set
 
 #
-# Block devices
+# UBI - Unsorted block images
 #
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 # CONFIG_BLK_DEV_LOOP is not set
 CONFIG_BLK_DEV_NBD=y
 # CONFIG_BLK_DEV_UB is not set
 # CONFIG_BLK_DEV_RAM is not set
-# CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+CONFIG_EEPROM_LEGACY=y
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
 
 #
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
 # CONFIG_SCSI_TGT is not set
 # CONFIG_SCSI_NETLINK is not set
 # CONFIG_SCSI_PROC_FS is not set
@@ -495,6 +566,7 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_CONSTANTS is not set
 # CONFIG_SCSI_LOGGING is not set
 # CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
 
 #
 # SCSI Transports
@@ -502,92 +574,71 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_SPI_ATTRS is not set
 # CONFIG_SCSI_FC_ATTRS is not set
 # CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
 # CONFIG_SCSI_SAS_LIBSAS is not set
-
-#
-# SCSI low-level drivers
-#
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
-
-#
-# Serial ATA (prod) and Parallel ATA (experimental) drivers
-#
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 # CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 # CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
-
-#
-# PHY device support
-#
+# CONFIG_VETH is not set
 # CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 CONFIG_EP93XX_ETH=y
+# CONFIG_AX88796 is not set
 # CONFIG_SMC91X is not set
 # CONFIG_DM9000 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+CONFIG_NETDEV_1000=y
+CONFIG_NETDEV_10000=y
 
 #
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-
-#
-# Token Ring devices
+# Wireless LAN
 #
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
 
 #
-# Wireless LAN (non-hamradio)
+# Enable WiMAX (Networking options) to see the WiMAX drivers
 #
-# CONFIG_NET_RADIO is not set
 
 #
-# Wan interfaces
+# USB Network Adapters
 #
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+CONFIG_USB_RTL8150=y
+# CONFIG_USB_USBNET is not set
 # CONFIG_WAN is not set
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
 # CONFIG_ISDN is not set
 
 #
@@ -605,6 +656,7 @@ CONFIG_EP93XX_ETH=y
 # Character devices
 #
 # CONFIG_VT is not set
+CONFIG_DEVKMEM=y
 # CONFIG_SERIAL_NONSTANDARD is not set
 
 #
@@ -621,104 +673,101 @@ CONFIG_SERIAL_AMBA_PL010_CONSOLE=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
 # CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-CONFIG_WATCHDOG=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-
-#
-# Watchdog Device Drivers
-#
-# CONFIG_SOFT_WATCHDOG is not set
-CONFIG_EP93XX_WATCHDOG=y
-
-#
-# USB-based Watchdog Cards
-#
-# CONFIG_USBPCWATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
-# CONFIG_NVRAM is not set
-# CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
 
 #
-# TPM devices
+# I2C Hardware Bus support
 #
-# CONFIG_TCG_TPM is not set
 
 #
-# I2C support
+# I2C system bus drivers (mostly embedded / system-on-chip)
 #
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
 
 #
-# I2C Algorithms
+# External I2C/SMBus adapter drivers
 #
-CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
 
 #
-# I2C Hardware Bus support
+# Other I2C/SMBus bus drivers
 #
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
 # CONFIG_I2C_STUB is not set
-# CONFIG_I2C_PCA_ISA is not set
 
 #
 # Miscellaneous I2C Chip support
 #
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-CONFIG_EEPROM_LEGACY=y
+# CONFIG_DS1682 is not set
 # CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
 CONFIG_I2C_DEBUG_CORE=y
 CONFIG_I2C_DEBUG_ALGO=y
 CONFIG_I2C_DEBUG_BUS=y
 CONFIG_I2C_DEBUG_CHIP=y
+# CONFIG_SPI is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
 
 #
-# SPI support
+# Memory mapped GPIO expanders:
 #
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
 
 #
-# Dallas's 1-wire bus
+# I2C GPIO expanders:
 #
-# CONFIG_W1 is not set
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
 
 #
-# Hardware Monitoring support
+# PCI GPIO expanders:
 #
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADM1021 is not set
 # CONFIG_SENSORS_ADM1025 is not set
 # CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
 # CONFIG_SENSORS_ADM1031 is not set
 # CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7473 is not set
+# CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
@@ -732,158 +781,188 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_LM87 is not set
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_SHT15 is not set
+# CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
 # CONFIG_SENSORS_SMSC47M192 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_THMC50 is not set
 # CONFIG_SENSORS_VT1211 is not set
 # CONFIG_SENSORS_W83781D is not set
 # CONFIG_SENSORS_W83791D is not set
 # CONFIG_SENSORS_W83792D is not set
 # CONFIG_SENSORS_W83793 is not set
 # CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
 # CONFIG_SENSORS_W83627HF is not set
 # CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
 
 #
-# Misc devices
+# Watchdog Device Drivers
 #
-# CONFIG_TIFM_CORE is not set
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_EP93XX_WATCHDOG=y
 
 #
-# LED devices
+# USB-based Watchdog Cards
 #
-# CONFIG_NEW_LEDS is not set
+# CONFIG_USBPCWATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
-# LED drivers
+# Sonics Silicon Backplane
 #
+# CONFIG_SSB is not set
 
 #
-# LED Triggers
+# Multifunction device drivers
 #
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
 
 #
 # Multimedia devices
 #
+
+#
+# Multimedia core support
+#
 # CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
 
 #
-# Digital Video Broadcasting Devices
+# Multimedia drivers
 #
-# CONFIG_DVB is not set
-# CONFIG_USB_DABUSB is not set
+# CONFIG_DAB is not set
 
 #
 # Graphics support
 #
-# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
 # CONFIG_FB is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
-# Sound
+# Display device support
 #
+# CONFIG_DISPLAY_SUPPORT is not set
 # CONFIG_SOUND is not set
-
-#
-# USB support
-#
+CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
 # CONFIG_USB_ARCH_HAS_EHCI is not set
 CONFIG_USB=y
 CONFIG_USB_DEBUG=y
+# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
 
 #
 # Miscellaneous USB options
 #
 CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_BANDWIDTH is not set
+CONFIG_USB_DEVICE_CLASS=y
 CONFIG_USB_DYNAMIC_MINORS=y
-# CONFIG_USB_MULTITHREAD_PROBE is not set
 # CONFIG_USB_OTG is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+# CONFIG_USB_MON is not set
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
 
 #
 # USB Host Controller Drivers
 #
+# CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_OXU210HP_HCD is not set
 # CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_ISP1760_HCD is not set
 CONFIG_USB_OHCI_HCD=y
-# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
+# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
 CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 # CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_HWA_HCD is not set
+# CONFIG_USB_MUSB_HDRC is not set
 
 #
 # USB Device Class drivers
 #
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# may also be needed; see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
-# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
 # CONFIG_USB_STORAGE_USBAT is not set
 # CONFIG_USB_STORAGE_SDDR09 is not set
 # CONFIG_USB_STORAGE_SDDR55 is not set
 # CONFIG_USB_STORAGE_JUMPSHOT is not set
 # CONFIG_USB_STORAGE_ALAUDA is not set
 # CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
 # CONFIG_USB_LIBUSUAL is not set
 
 #
-# USB Input Devices
-#
-
-#
-# USB HID Boot Protocol drivers
-#
-
-#
 # USB Imaging devices
 #
 # CONFIG_USB_MDC800 is not set
 # CONFIG_USB_MICROTEK is not set
 
 #
-# USB Network Adapters
-#
-# CONFIG_USB_CATC is not set
-# CONFIG_USB_KAWETH is not set
-# CONFIG_USB_PEGASUS is not set
-CONFIG_USB_RTL8150=y
-# CONFIG_USB_USBNET_MII is not set
-# CONFIG_USB_USBNET is not set
-# CONFIG_USB_MON is not set
-
-#
 # USB port drivers
 #
-
-#
-# USB Serial Converter support
-#
 CONFIG_USB_SERIAL=y
 CONFIG_USB_SERIAL_CONSOLE=y
+# CONFIG_USB_EZUSB is not set
 # CONFIG_USB_SERIAL_GENERIC is not set
 # CONFIG_USB_SERIAL_AIRCABLE is not set
-# CONFIG_USB_SERIAL_AIRPRIME is not set
 # CONFIG_USB_SERIAL_ARK3116 is not set
 # CONFIG_USB_SERIAL_BELKIN is not set
+# CONFIG_USB_SERIAL_CH341 is not set
 # CONFIG_USB_SERIAL_WHITEHEAT is not set
 # CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
-# CONFIG_USB_SERIAL_CP2101 is not set
+# CONFIG_USB_SERIAL_CP210X is not set
 # CONFIG_USB_SERIAL_CYPRESS_M8 is not set
 # CONFIG_USB_SERIAL_EMPEG is not set
 # CONFIG_USB_SERIAL_FTDI_SIO is not set
@@ -895,6 +974,7 @@ CONFIG_USB_SERIAL_CONSOLE=y
 # CONFIG_USB_SERIAL_EDGEPORT_TI is not set
 # CONFIG_USB_SERIAL_GARMIN is not set
 # CONFIG_USB_SERIAL_IPW is not set
+# CONFIG_USB_SERIAL_IUU is not set
 # CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
 # CONFIG_USB_SERIAL_KEYSPAN is not set
 # CONFIG_USB_SERIAL_KLSI is not set
@@ -902,16 +982,23 @@ CONFIG_USB_SERIAL_CONSOLE=y
 # CONFIG_USB_SERIAL_MCT_U232 is not set
 # CONFIG_USB_SERIAL_MOS7720 is not set
 # CONFIG_USB_SERIAL_MOS7840 is not set
+# CONFIG_USB_SERIAL_MOTOROLA is not set
 # CONFIG_USB_SERIAL_NAVMAN is not set
 CONFIG_USB_SERIAL_PL2303=y
+# CONFIG_USB_SERIAL_OTI6858 is not set
+# CONFIG_USB_SERIAL_QUALCOMM is not set
+# CONFIG_USB_SERIAL_SPCP8X5 is not set
 # CONFIG_USB_SERIAL_HP4X is not set
 # CONFIG_USB_SERIAL_SAFE is not set
+# CONFIG_USB_SERIAL_SIEMENS_MPI is not set
 # CONFIG_USB_SERIAL_SIERRAWIRELESS is not set
+# CONFIG_USB_SERIAL_SYMBOL is not set
 # CONFIG_USB_SERIAL_TI is not set
 # CONFIG_USB_SERIAL_CYBERJACK is not set
 # CONFIG_USB_SERIAL_XIRCOM is not set
 # CONFIG_USB_SERIAL_OPTION is not set
 # CONFIG_USB_SERIAL_OMNINET is not set
+# CONFIG_USB_SERIAL_OPTICON is not set
 # CONFIG_USB_SERIAL_DEBUG is not set
 
 #
@@ -920,38 +1007,34 @@ CONFIG_USB_SERIAL_PL2303=y
 # CONFIG_USB_EMI62 is not set
 # CONFIG_USB_EMI26 is not set
 # CONFIG_USB_ADUTUX is not set
-# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_SEVSEG is not set
 # CONFIG_USB_RIO500 is not set
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LCD is not set
+# CONFIG_USB_BERRY_CHARGE is not set
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
 # CONFIG_USB_TEST is not set
-
-#
-# USB DSL modem support
-#
-
-#
-# USB Gadget Support
-#
+# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
 
 #
-# MMC/SD Card support
+# OTG and related infrastructure
 #
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_MMC is not set
-
-#
-# Real Time Clock
-#
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
 CONFIG_RTC_LIB=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_HCTOSYS=y
@@ -965,24 +1048,55 @@ CONFIG_RTC_INTF_SYSFS=y
 CONFIG_RTC_INTF_PROC=y
 CONFIG_RTC_INTF_DEV=y
 # CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
 
 #
-# RTC drivers
+# I2C RTC drivers
 #
-# CONFIG_RTC_DRV_X1205 is not set
 CONFIG_RTC_DRV_DS1307=y
-# CONFIG_RTC_DRV_DS1553 is not set
-# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
 # CONFIG_RTC_DRV_DS1672 is not set
-# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
 # CONFIG_RTC_DRV_PCF8563 is not set
 # CONFIG_RTC_DRV_PCF8583 is not set
-# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
 CONFIG_RTC_DRV_M48T86=y
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
 CONFIG_RTC_DRV_EP93XX=y
+# CONFIG_RTC_DRV_PL030 is not set
 # CONFIG_RTC_DRV_PL031 is not set
-# CONFIG_RTC_DRV_TEST is not set
-# CONFIG_RTC_DRV_V3020 is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
 
 #
 # File systems
@@ -991,27 +1105,31 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
 # CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
 # CONFIG_AUTOFS_FS is not set
 # CONFIG_AUTOFS4_FS is not set
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1032,16 +1150,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 #
 CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 # CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
 # CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ADFS_FS is not set
 # CONFIG_AFFS_FS is not set
 # CONFIG_HFS_FS is not set
@@ -1049,33 +1164,35 @@ CONFIG_RAMFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_JFFS_FS is not set
 CONFIG_JFFS2_FS=y
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
 # CONFIG_JFFS2_SUMMARY is not set
 # CONFIG_JFFS2_FS_XATTR is not set
 # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
 CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
 # CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
 # CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_NFS_V3_ACL is not set
 # CONFIG_NFS_V4 is not set
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
 CONFIG_ROOT_NFS=y
+# CONFIG_NFSD is not set
 CONFIG_LOCKD=y
 CONFIG_LOCKD_V4=y
 CONFIG_NFS_COMMON=y
@@ -1087,7 +1204,6 @@ CONFIG_SUNRPC=y
 # CONFIG_NCP_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
 
 #
 # Partition Types
@@ -1109,10 +1225,7 @@ CONFIG_MSDOS_PARTITION=y
 # CONFIG_SUN_PARTITION is not set
 # CONFIG_KARMA_PARTITION is not set
 # CONFIG_EFI_PARTITION is not set
-
-#
-# Native Language Support
-#
+# CONFIG_SYSV68_PARTITION is not set
 CONFIG_NLS=y
 CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=y
@@ -1153,49 +1266,83 @@ CONFIG_NLS_ISO8859_1=y
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
 # CONFIG_NLS_UTF8 is not set
-
-#
-# Distributed Lock Manager
-#
 # CONFIG_DLM is not set
 
 #
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
 # CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_DEBUG_SHIRQ is not set
 CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
 # CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
 CONFIG_DEBUG_SLAB=y
 # CONFIG_DEBUG_SLAB_LEAK is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
 # CONFIG_RT_MUTEX_TESTER is not set
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
 # CONFIG_DEBUG_LIST is not set
-CONFIG_FRAME_POINTER=y
-CONFIG_FORCED_INLINING=y
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_ARM_UNWIND=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
+# CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_LL=y
 # CONFIG_DEBUG_ICEDCC is not set
 
@@ -1204,21 +1351,115 @@ CONFIG_DEBUG_LL=y
 #
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+CONFIG_CRYPTO_CRC32C=y
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
 
 #
-# Cryptographic options
+# Random Number Generation
 #
-# CONFIG_CRYPTO is not set
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
 #
 CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
 # CONFIG_CRC_CCITT is not set
 # CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
 CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
 CONFIG_LIBCRC32C=y
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=y
-CONFIG_PLIST=y
-CONFIG_IOMAP_COPY=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From 89dd08425273773fd33fc85d48d152c5679b2fb4 Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Sat, 23 May 2009 19:12:59 -0400
Subject: ASoC: Basic split of mpc5200 DMA code out of mpc5200_psc_i2s

Basic split of mpc5200 DMA code out from i2s into a standalone file.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 9fc9082..dc79bdf 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -1,5 +1,8 @@
 config SND_SOC_OF_SIMPLE
 	tristate
+	
+config SND_MPC52xx_DMA
+	tristate
 
 # ASoC platform support for the Freescale MPC8610 SOC.  This compiles drivers
 # for the SSI and the Elo DMA controller.  You will still need to select
@@ -23,6 +26,7 @@ config SND_SOC_MPC5200_I2S
 	tristate "Freescale MPC5200 PSC in I2S mode driver"
 	depends on PPC_MPC52xx && PPC_BESTCOMM
 	select SND_SOC_OF_SIMPLE
+	select SND_MPC52xx_DMA
 	select PPC_BESTCOMM_GEN_BD
 	help
 	  Say Y here to support the MPC5200 PSCs in I2S mode.
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index f85134c..7731ef2 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -10,5 +10,7 @@ snd-soc-fsl-ssi-objs := fsl_ssi.o
 snd-soc-fsl-dma-objs := fsl_dma.o
 obj-$(CONFIG_SND_SOC_MPC8610) += snd-soc-fsl-ssi.o snd-soc-fsl-dma.o
 
+# MPC5200 Platform Support
+obj-$(CONFIG_SND_MPC52xx_DMA) += mpc5200_dma.o
 obj-$(CONFIG_SND_SOC_MPC5200_I2S) += mpc5200_psc_i2s.o
 
diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
new file mode 100644
index 0000000..4bae8d6
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -0,0 +1,458 @@
+/*
+ * Freescale MPC5200 PSC DMA
+ * ALSA SoC Platform driver
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <sound/soc-of-simple.h>
+
+#include <sysdev/bestcomm/bestcomm.h>
+#include <sysdev/bestcomm/gen_bd.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc5200_dma.h"
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("Freescale MPC5200 PSC in DMA mode ASoC Driver");
+MODULE_LICENSE("GPL");
+
+/*
+ * Interrupt handlers
+ */
+static irqreturn_t psc_i2s_status_irq(int irq, void *_psc_i2s)
+{
+	struct psc_i2s *psc_i2s = _psc_i2s;
+	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
+	u16 isr;
+
+	isr = in_be16(&regs->mpc52xx_psc_isr);
+
+	/* Playback underrun error */
+	if (psc_i2s->playback.active && (isr & MPC52xx_PSC_IMR_TXEMP))
+		psc_i2s->stats.underrun_count++;
+
+	/* Capture overrun error */
+	if (psc_i2s->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
+		psc_i2s->stats.overrun_count++;
+
+	out_8(&regs->command, 4 << 4);	/* reset the error status */
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * psc_i2s_bcom_enqueue_next_buffer - Enqueue another audio buffer
+ * @s: pointer to stream private data structure
+ *
+ * Enqueues another audio period buffer into the bestcomm queue.
+ *
+ * Note: The routine must only be called when there is space available in
+ * the queue.  Otherwise the enqueue will fail and the audio ring buffer
+ * will get out of sync
+ */
+static void psc_i2s_bcom_enqueue_next_buffer(struct psc_i2s_stream *s)
+{
+	struct bcom_bd *bd;
+
+	/* Prepare and enqueue the next buffer descriptor */
+	bd = bcom_prepare_next_buffer(s->bcom_task);
+	bd->status = s->period_bytes;
+	bd->data[0] = s->period_next_pt;
+	bcom_submit_next_buffer(s->bcom_task, NULL);
+
+	/* Update for next period */
+	s->period_next_pt += s->period_bytes;
+	if (s->period_next_pt >= s->period_end)
+		s->period_next_pt = s->period_start;
+}
+
+/* Bestcomm DMA irq handler */
+static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
+{
+	struct psc_i2s_stream *s = _psc_i2s_stream;
+
+	/* For each finished period, dequeue the completed period buffer
+	 * and enqueue a new one in it's place. */
+	while (bcom_buffer_done(s->bcom_task)) {
+		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
+		s->period_current_pt += s->period_bytes;
+		if (s->period_current_pt >= s->period_end)
+			s->period_current_pt = s->period_start;
+		psc_i2s_bcom_enqueue_next_buffer(s);
+		bcom_enable(s->bcom_task);
+	}
+
+	/* If the stream is active, then also inform the PCM middle layer
+	 * of the period finished event. */
+	if (s->active)
+		snd_pcm_period_elapsed(s->stream);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * psc_i2s_startup: create a new substream
+ *
+ * This is the first function called when a stream is opened.
+ *
+ * If this is the first stream open, then grab the IRQ and program most of
+ * the PSC registers.
+ */
+int psc_i2s_startup(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	int rc;
+
+	dev_dbg(psc_i2s->dev, "psc_i2s_startup(substream=%p)\n", substream);
+
+	if (!psc_i2s->playback.active &&
+	    !psc_i2s->capture.active) {
+		/* Setup the IRQs */
+		rc = request_irq(psc_i2s->irq, &psc_i2s_status_irq, IRQF_SHARED,
+				 "psc-i2s-status", psc_i2s);
+		rc |= request_irq(psc_i2s->capture.irq,
+				  &psc_i2s_bcom_irq, IRQF_SHARED,
+				  "psc-i2s-capture", &psc_i2s->capture);
+		rc |= request_irq(psc_i2s->playback.irq,
+				  &psc_i2s_bcom_irq, IRQF_SHARED,
+				  "psc-i2s-playback", &psc_i2s->playback);
+		if (rc) {
+			free_irq(psc_i2s->irq, psc_i2s);
+			free_irq(psc_i2s->capture.irq,
+				 &psc_i2s->capture);
+			free_irq(psc_i2s->playback.irq,
+				 &psc_i2s->playback);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+int psc_i2s_hw_free(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
+{
+	snd_pcm_set_runtime_buffer(substream, NULL);
+	return 0;
+}
+
+/**
+ * psc_i2s_trigger: start and stop the DMA transfer.
+ *
+ * This function is called by ALSA to start, stop, pause, and resume the DMA
+ * transfer of data.
+ */
+int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			   struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct psc_i2s_stream *s;
+	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
+	u16 imr;
+	u8 psc_cmd;
+	unsigned long flags;
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_i2s->capture;
+	else
+		s = &psc_i2s->playback;
+
+	dev_dbg(psc_i2s->dev, "psc_i2s_trigger(substream=%p, cmd=%i)"
+		" stream_id=%i\n",
+		substream, cmd, substream->pstr->stream);
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		s->period_bytes = frames_to_bytes(runtime,
+						  runtime->period_size);
+		s->period_start = virt_to_phys(runtime->dma_area);
+		s->period_end = s->period_start +
+				(s->period_bytes * runtime->periods);
+		s->period_next_pt = s->period_start;
+		s->period_current_pt = s->period_start;
+		s->active = 1;
+
+		/* First; reset everything */
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
+			out_8(&regs->command, MPC52xx_PSC_RST_RX);
+			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+		} else {
+			out_8(&regs->command, MPC52xx_PSC_RST_TX);
+			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+		}
+
+		/* Next, fill up the bestcomm bd queue and enable DMA.
+		 * This will begin filling the PSC's fifo. */
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+			bcom_gen_bd_rx_reset(s->bcom_task);
+		else
+			bcom_gen_bd_tx_reset(s->bcom_task);
+		while (!bcom_queue_full(s->bcom_task))
+			psc_i2s_bcom_enqueue_next_buffer(s);
+		bcom_enable(s->bcom_task);
+
+		/* Due to errata in the i2s mode; need to line up enabling
+		 * the transmitter with a transition on the frame sync
+		 * line */
+
+		spin_lock_irqsave(&psc_i2s->lock, flags);
+		/* first make sure it is low */
+		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
+			;
+		/* then wait for the transition to high */
+		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) == 0)
+			;
+		/* Finally, enable the PSC.
+		 * Receiver must always be enabled; even when we only want
+		 * transmit.  (see 15.3.2.3 of MPC5200B User's Guide) */
+		psc_cmd = MPC52xx_PSC_RX_ENABLE;
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_PLAYBACK)
+			psc_cmd |= MPC52xx_PSC_TX_ENABLE;
+		out_8(&regs->command, psc_cmd);
+		spin_unlock_irqrestore(&psc_i2s->lock, flags);
+
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		/* Turn off the PSC */
+		s->active = 0;
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
+			if (!psc_i2s->playback.active) {
+				out_8(&regs->command, 2 << 4);	/* reset rx */
+				out_8(&regs->command, 3 << 4);	/* reset tx */
+				out_8(&regs->command, 4 << 4);	/* reset err */
+			}
+		} else {
+			out_8(&regs->command, 3 << 4);	/* reset tx */
+			out_8(&regs->command, 4 << 4);	/* reset err */
+			if (!psc_i2s->capture.active)
+				out_8(&regs->command, 2 << 4);	/* reset rx */
+		}
+
+		bcom_disable(s->bcom_task);
+		while (!bcom_queue_empty(s->bcom_task))
+			bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
+
+		break;
+
+	default:
+		dev_dbg(psc_i2s->dev, "invalid command\n");
+		return -EINVAL;
+	}
+
+	/* Update interrupt enable settings */
+	imr = 0;
+	if (psc_i2s->playback.active)
+		imr |= MPC52xx_PSC_IMR_TXEMP;
+	if (psc_i2s->capture.active)
+		imr |= MPC52xx_PSC_IMR_ORERR;
+	out_be16(&regs->isr_imr.imr, imr);
+
+	return 0;
+}
+
+/**
+ * psc_i2s_shutdown: shutdown the data transfer on a stream
+ *
+ * Shutdown the PSC if there are no other substreams open.
+ */
+void psc_i2s_shutdown(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+
+	dev_dbg(psc_i2s->dev, "psc_i2s_shutdown(substream=%p)\n", substream);
+
+	/*
+	 * If this is the last active substream, disable the PSC and release
+	 * the IRQ.
+	 */
+	if (!psc_i2s->playback.active &&
+	    !psc_i2s->capture.active) {
+
+		/* Disable all interrupts and reset the PSC */
+		out_be16(&psc_i2s->psc_regs->isr_imr.imr, 0);
+		out_8(&psc_i2s->psc_regs->command, 3 << 4); /* reset tx */
+		out_8(&psc_i2s->psc_regs->command, 2 << 4); /* reset rx */
+		out_8(&psc_i2s->psc_regs->command, 1 << 4); /* reset mode */
+		out_8(&psc_i2s->psc_regs->command, 4 << 4); /* reset error */
+
+		/* Release irqs */
+		free_irq(psc_i2s->irq, psc_i2s);
+		free_irq(psc_i2s->capture.irq, &psc_i2s->capture);
+		free_irq(psc_i2s->playback.irq, &psc_i2s->playback);
+	}
+}
+
+/* ---------------------------------------------------------------------
+ * The PSC DMA 'ASoC platform' driver
+ *
+ * Can be referenced by an 'ASoC machine' driver
+ * This driver only deals with the audio bus; it doesn't have any
+ * interaction with the attached codec
+ */
+
+static const struct snd_pcm_hardware psc_i2s_pcm_hardware = {
+	.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
+		SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		SNDRV_PCM_INFO_BATCH,
+	.formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE |
+		   SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE,
+	.rate_min = 8000,
+	.rate_max = 48000,
+	.channels_min = 2,
+	.channels_max = 2,
+	.period_bytes_max	= 1024 * 1024,
+	.period_bytes_min	= 32,
+	.periods_min		= 2,
+	.periods_max		= 256,
+	.buffer_bytes_max	= 2 * 1024 * 1024,
+	.fifo_size		= 0,
+};
+
+static int psc_i2s_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_i2s_stream *s;
+
+	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_open(substream=%p)\n", substream);
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_i2s->capture;
+	else
+		s = &psc_i2s->playback;
+
+	snd_soc_set_runtime_hwparams(substream, &psc_i2s_pcm_hardware);
+
+	s->stream = substream;
+	return 0;
+}
+
+static int psc_i2s_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_i2s_stream *s;
+
+	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_close(substream=%p)\n", substream);
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_i2s->capture;
+	else
+		s = &psc_i2s->playback;
+
+	s->stream = NULL;
+	return 0;
+}
+
+static snd_pcm_uframes_t
+psc_i2s_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_i2s_stream *s;
+	dma_addr_t count;
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+		s = &psc_i2s->capture;
+	else
+		s = &psc_i2s->playback;
+
+	count = s->period_current_pt - s->period_start;
+
+	return bytes_to_frames(substream->runtime, count);
+}
+
+static struct snd_pcm_ops psc_i2s_pcm_ops = {
+	.open		= psc_i2s_pcm_open,
+	.close		= psc_i2s_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.pointer	= psc_i2s_pcm_pointer,
+};
+
+static u64 psc_i2s_pcm_dmamask = 0xffffffff;
+static int psc_i2s_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
+			   struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
+	size_t size = psc_i2s_pcm_hardware.buffer_bytes_max;
+	int rc = 0;
+
+	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_new(card=%p, dai=%p, pcm=%p)\n",
+		card, dai, pcm);
+
+	if (!card->dev->dma_mask)
+		card->dev->dma_mask = &psc_i2s_pcm_dmamask;
+	if (!card->dev->coherent_dma_mask)
+		card->dev->coherent_dma_mask = 0xffffffff;
+
+	if (pcm->streams[0].substream) {
+		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
+					&pcm->streams[0].substream->dma_buffer);
+		if (rc)
+			goto playback_alloc_err;
+	}
+
+	if (pcm->streams[1].substream) {
+		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
+					&pcm->streams[1].substream->dma_buffer);
+		if (rc)
+			goto capture_alloc_err;
+	}
+
+	return 0;
+
+ capture_alloc_err:
+	if (pcm->streams[0].substream)
+		snd_dma_free_pages(&pcm->streams[0].substream->dma_buffer);
+ playback_alloc_err:
+	dev_err(card->dev, "Cannot allocate buffer(s)\n");
+	return -ENOMEM;
+}
+
+static void psc_i2s_pcm_free(struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
+	struct snd_pcm_substream *substream;
+	int stream;
+
+	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_free(pcm=%p)\n", pcm);
+
+	for (stream = 0; stream < 2; stream++) {
+		substream = pcm->streams[stream].substream;
+		if (substream) {
+			snd_dma_free_pages(&substream->dma_buffer);
+			substream->dma_buffer.area = NULL;
+			substream->dma_buffer.addr = 0;
+		}
+	}
+}
+
+struct snd_soc_platform psc_i2s_pcm_soc_platform = {
+	.name		= "mpc5200-psc-audio",
+	.pcm_ops	= &psc_i2s_pcm_ops,
+	.pcm_new	= &psc_i2s_pcm_new,
+	.pcm_free	= &psc_i2s_pcm_free,
+};
+
diff --git a/sound/soc/fsl/mpc5200_dma.h b/sound/soc/fsl/mpc5200_dma.h
new file mode 100644
index 0000000..9a19e8a
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_dma.h
@@ -0,0 +1,81 @@
+/*
+ * Freescale MPC5200 Audio DMA driver
+ */
+
+#ifndef __SOUND_SOC_FSL_MPC5200_DMA_H__
+#define __SOUND_SOC_FSL_MPC5200_DMA_H__
+
+/**
+ * psc_i2s_stream - Data specific to a single stream (playback or capture)
+ * @active:		flag indicating if the stream is active
+ * @psc_i2s:		pointer back to parent psc_i2s data structure
+ * @bcom_task:		bestcomm task structure
+ * @irq:		irq number for bestcomm task
+ * @period_start:	physical address of start of DMA region
+ * @period_end:		physical address of end of DMA region
+ * @period_next_pt:	physical address of next DMA buffer to enqueue
+ * @period_bytes:	size of DMA period in bytes
+ */
+struct psc_i2s_stream {
+	int active;
+	struct psc_i2s *psc_i2s;
+	struct bcom_task *bcom_task;
+	int irq;
+	struct snd_pcm_substream *stream;
+	dma_addr_t period_start;
+	dma_addr_t period_end;
+	dma_addr_t period_next_pt;
+	dma_addr_t period_current_pt;
+	int period_bytes;
+};
+
+/**
+ * psc_i2s - Private driver data
+ * @name: short name for this device ("PSC0", "PSC1", etc)
+ * @psc_regs: pointer to the PSC's registers
+ * @fifo_regs: pointer to the PSC's FIFO registers
+ * @irq: IRQ of this PSC
+ * @dev: struct device pointer
+ * @dai: the CPU DAI for this device
+ * @sicr: Base value used in serial interface control register; mode is ORed
+ *        with this value.
+ * @playback: Playback stream context data
+ * @capture: Capture stream context data
+ */
+struct psc_i2s {
+	char name[32];
+	struct mpc52xx_psc __iomem *psc_regs;
+	struct mpc52xx_psc_fifo __iomem *fifo_regs;
+	unsigned int irq;
+	struct device *dev;
+	struct snd_soc_dai dai;
+	spinlock_t lock;
+	u32 sicr;
+
+	/* per-stream data */
+	struct psc_i2s_stream playback;
+	struct psc_i2s_stream capture;
+
+	/* Statistics */
+	struct {
+		int overrun_count;
+		int underrun_count;
+	} stats;
+};
+
+
+int psc_i2s_startup(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai);
+
+int psc_i2s_hw_free(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai);
+
+void psc_i2s_shutdown(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai);
+
+int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			   struct snd_soc_dai *dai);
+
+extern struct snd_soc_platform psc_i2s_pcm_soc_platform;
+
+#endif /* __SOUND_SOC_FSL_MPC5200_DMA_H__ */
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index 1111c71..8974b53 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -25,6 +25,8 @@
 #include <sysdev/bestcomm/gen_bd.h>
 #include <asm/mpc52xx_psc.h>
 
+#include "mpc5200_dma.h"
+
 MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
 MODULE_DESCRIPTION("Freescale MPC5200 PSC in I2S mode ASoC Driver");
 MODULE_LICENSE("GPL");
@@ -47,179 +49,6 @@ MODULE_LICENSE("GPL");
 			 SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S24_BE | \
 			 SNDRV_PCM_FMTBIT_S32_BE)
 
-/**
- * psc_i2s_stream - Data specific to a single stream (playback or capture)
- * @active:		flag indicating if the stream is active
- * @psc_i2s:		pointer back to parent psc_i2s data structure
- * @bcom_task:		bestcomm task structure
- * @irq:		irq number for bestcomm task
- * @period_start:	physical address of start of DMA region
- * @period_end:		physical address of end of DMA region
- * @period_next_pt:	physical address of next DMA buffer to enqueue
- * @period_bytes:	size of DMA period in bytes
- */
-struct psc_i2s_stream {
-	int active;
-	struct psc_i2s *psc_i2s;
-	struct bcom_task *bcom_task;
-	int irq;
-	struct snd_pcm_substream *stream;
-	dma_addr_t period_start;
-	dma_addr_t period_end;
-	dma_addr_t period_next_pt;
-	dma_addr_t period_current_pt;
-	int period_bytes;
-};
-
-/**
- * psc_i2s - Private driver data
- * @name: short name for this device ("PSC0", "PSC1", etc)
- * @psc_regs: pointer to the PSC's registers
- * @fifo_regs: pointer to the PSC's FIFO registers
- * @irq: IRQ of this PSC
- * @dev: struct device pointer
- * @dai: the CPU DAI for this device
- * @sicr: Base value used in serial interface control register; mode is ORed
- *        with this value.
- * @playback: Playback stream context data
- * @capture: Capture stream context data
- */
-struct psc_i2s {
-	char name[32];
-	struct mpc52xx_psc __iomem *psc_regs;
-	struct mpc52xx_psc_fifo __iomem *fifo_regs;
-	unsigned int irq;
-	struct device *dev;
-	struct snd_soc_dai dai;
-	spinlock_t lock;
-	u32 sicr;
-
-	/* per-stream data */
-	struct psc_i2s_stream playback;
-	struct psc_i2s_stream capture;
-
-	/* Statistics */
-	struct {
-		int overrun_count;
-		int underrun_count;
-	} stats;
-};
-
-/*
- * Interrupt handlers
- */
-static irqreturn_t psc_i2s_status_irq(int irq, void *_psc_i2s)
-{
-	struct psc_i2s *psc_i2s = _psc_i2s;
-	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
-	u16 isr;
-
-	isr = in_be16(&regs->mpc52xx_psc_isr);
-
-	/* Playback underrun error */
-	if (psc_i2s->playback.active && (isr & MPC52xx_PSC_IMR_TXEMP))
-		psc_i2s->stats.underrun_count++;
-
-	/* Capture overrun error */
-	if (psc_i2s->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
-		psc_i2s->stats.overrun_count++;
-
-	out_8(&regs->command, 4 << 4);	/* reset the error status */
-
-	return IRQ_HANDLED;
-}
-
-/**
- * psc_i2s_bcom_enqueue_next_buffer - Enqueue another audio buffer
- * @s: pointer to stream private data structure
- *
- * Enqueues another audio period buffer into the bestcomm queue.
- *
- * Note: The routine must only be called when there is space available in
- * the queue.  Otherwise the enqueue will fail and the audio ring buffer
- * will get out of sync
- */
-static void psc_i2s_bcom_enqueue_next_buffer(struct psc_i2s_stream *s)
-{
-	struct bcom_bd *bd;
-
-	/* Prepare and enqueue the next buffer descriptor */
-	bd = bcom_prepare_next_buffer(s->bcom_task);
-	bd->status = s->period_bytes;
-	bd->data[0] = s->period_next_pt;
-	bcom_submit_next_buffer(s->bcom_task, NULL);
-
-	/* Update for next period */
-	s->period_next_pt += s->period_bytes;
-	if (s->period_next_pt >= s->period_end)
-		s->period_next_pt = s->period_start;
-}
-
-/* Bestcomm DMA irq handler */
-static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
-{
-	struct psc_i2s_stream *s = _psc_i2s_stream;
-
-	/* For each finished period, dequeue the completed period buffer
-	 * and enqueue a new one in it's place. */
-	while (bcom_buffer_done(s->bcom_task)) {
-		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
-		s->period_current_pt += s->period_bytes;
-		if (s->period_current_pt >= s->period_end)
-			s->period_current_pt = s->period_start;
-		psc_i2s_bcom_enqueue_next_buffer(s);
-		bcom_enable(s->bcom_task);
-	}
-
-	/* If the stream is active, then also inform the PCM middle layer
-	 * of the period finished event. */
-	if (s->active)
-		snd_pcm_period_elapsed(s->stream);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * psc_i2s_startup: create a new substream
- *
- * This is the first function called when a stream is opened.
- *
- * If this is the first stream open, then grab the IRQ and program most of
- * the PSC registers.
- */
-static int psc_i2s_startup(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	int rc;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_startup(substream=%p)\n", substream);
-
-	if (!psc_i2s->playback.active &&
-	    !psc_i2s->capture.active) {
-		/* Setup the IRQs */
-		rc = request_irq(psc_i2s->irq, &psc_i2s_status_irq, IRQF_SHARED,
-				 "psc-i2s-status", psc_i2s);
-		rc |= request_irq(psc_i2s->capture.irq,
-				  &psc_i2s_bcom_irq, IRQF_SHARED,
-				  "psc-i2s-capture", &psc_i2s->capture);
-		rc |= request_irq(psc_i2s->playback.irq,
-				  &psc_i2s_bcom_irq, IRQF_SHARED,
-				  "psc-i2s-playback", &psc_i2s->playback);
-		if (rc) {
-			free_irq(psc_i2s->irq, psc_i2s);
-			free_irq(psc_i2s->capture.irq,
-				 &psc_i2s->capture);
-			free_irq(psc_i2s->playback.irq,
-				 &psc_i2s->playback);
-			return -ENODEV;
-		}
-	}
-
-	return 0;
-}
-
 static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 				 struct snd_pcm_hw_params *params,
 				 struct snd_soc_dai *dai)
@@ -258,164 +87,6 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int psc_i2s_hw_free(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai)
-{
-	snd_pcm_set_runtime_buffer(substream, NULL);
-	return 0;
-}
-
-/**
- * psc_i2s_trigger: start and stop the DMA transfer.
- *
- * This function is called by ALSA to start, stop, pause, and resume the DMA
- * transfer of data.
- */
-static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
-			   struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct psc_i2s_stream *s;
-	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
-	u16 imr;
-	u8 psc_cmd;
-	unsigned long flags;
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_trigger(substream=%p, cmd=%i)"
-		" stream_id=%i\n",
-		substream, cmd, substream->pstr->stream);
-
-	switch (cmd) {
-	case SNDRV_PCM_TRIGGER_START:
-		s->period_bytes = frames_to_bytes(runtime,
-						  runtime->period_size);
-		s->period_start = virt_to_phys(runtime->dma_area);
-		s->period_end = s->period_start +
-				(s->period_bytes * runtime->periods);
-		s->period_next_pt = s->period_start;
-		s->period_current_pt = s->period_start;
-		s->active = 1;
-
-		/* First; reset everything */
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			out_8(&regs->command, MPC52xx_PSC_RST_RX);
-			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
-		} else {
-			out_8(&regs->command, MPC52xx_PSC_RST_TX);
-			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
-		}
-
-		/* Next, fill up the bestcomm bd queue and enable DMA.
-		 * This will begin filling the PSC's fifo. */
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-			bcom_gen_bd_rx_reset(s->bcom_task);
-		else
-			bcom_gen_bd_tx_reset(s->bcom_task);
-		while (!bcom_queue_full(s->bcom_task))
-			psc_i2s_bcom_enqueue_next_buffer(s);
-		bcom_enable(s->bcom_task);
-
-		/* Due to errata in the i2s mode; need to line up enabling
-		 * the transmitter with a transition on the frame sync
-		 * line */
-
-		spin_lock_irqsave(&psc_i2s->lock, flags);
-		/* first make sure it is low */
-		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
-			;
-		/* then wait for the transition to high */
-		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) == 0)
-			;
-		/* Finally, enable the PSC.
-		 * Receiver must always be enabled; even when we only want
-		 * transmit.  (see 15.3.2.3 of MPC5200B User's Guide) */
-		psc_cmd = MPC52xx_PSC_RX_ENABLE;
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			psc_cmd |= MPC52xx_PSC_TX_ENABLE;
-		out_8(&regs->command, psc_cmd);
-		spin_unlock_irqrestore(&psc_i2s->lock, flags);
-
-		break;
-
-	case SNDRV_PCM_TRIGGER_STOP:
-		/* Turn off the PSC */
-		s->active = 0;
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			if (!psc_i2s->playback.active) {
-				out_8(&regs->command, 2 << 4);	/* reset rx */
-				out_8(&regs->command, 3 << 4);	/* reset tx */
-				out_8(&regs->command, 4 << 4);	/* reset err */
-			}
-		} else {
-			out_8(&regs->command, 3 << 4);	/* reset tx */
-			out_8(&regs->command, 4 << 4);	/* reset err */
-			if (!psc_i2s->capture.active)
-				out_8(&regs->command, 2 << 4);	/* reset rx */
-		}
-
-		bcom_disable(s->bcom_task);
-		while (!bcom_queue_empty(s->bcom_task))
-			bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
-
-		break;
-
-	default:
-		dev_dbg(psc_i2s->dev, "invalid command\n");
-		return -EINVAL;
-	}
-
-	/* Update interrupt enable settings */
-	imr = 0;
-	if (psc_i2s->playback.active)
-		imr |= MPC52xx_PSC_IMR_TXEMP;
-	if (psc_i2s->capture.active)
-		imr |= MPC52xx_PSC_IMR_ORERR;
-	out_be16(&regs->isr_imr.imr, imr);
-
-	return 0;
-}
-
-/**
- * psc_i2s_shutdown: shutdown the data transfer on a stream
- *
- * Shutdown the PSC if there are no other substreams open.
- */
-static void psc_i2s_shutdown(struct snd_pcm_substream *substream,
-			     struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_shutdown(substream=%p)\n", substream);
-
-	/*
-	 * If this is the last active substream, disable the PSC and release
-	 * the IRQ.
-	 */
-	if (!psc_i2s->playback.active &&
-	    !psc_i2s->capture.active) {
-
-		/* Disable all interrupts and reset the PSC */
-		out_be16(&psc_i2s->psc_regs->isr_imr.imr, 0);
-		out_8(&psc_i2s->psc_regs->command, 3 << 4); /* reset tx */
-		out_8(&psc_i2s->psc_regs->command, 2 << 4); /* reset rx */
-		out_8(&psc_i2s->psc_regs->command, 1 << 4); /* reset mode */
-		out_8(&psc_i2s->psc_regs->command, 4 << 4); /* reset error */
-
-		/* Release irqs */
-		free_irq(psc_i2s->irq, psc_i2s);
-		free_irq(psc_i2s->capture.irq, &psc_i2s->capture);
-		free_irq(psc_i2s->playback.irq, &psc_i2s->playback);
-	}
-}
-
 /**
  * psc_i2s_set_sysclk: set the clock frequency and direction
  *
@@ -495,158 +166,6 @@ static struct snd_soc_dai psc_i2s_dai_template = {
 };
 
 /* ---------------------------------------------------------------------
- * The PSC I2S 'ASoC platform' driver
- *
- * Can be referenced by an 'ASoC machine' driver
- * This driver only deals with the audio bus; it doesn't have any
- * interaction with the attached codec
- */
-
-static const struct snd_pcm_hardware psc_i2s_pcm_hardware = {
-	.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
-		SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
-		SNDRV_PCM_INFO_BATCH,
-	.formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE |
-		   SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE,
-	.rate_min = 8000,
-	.rate_max = 48000,
-	.channels_min = 2,
-	.channels_max = 2,
-	.period_bytes_max	= 1024 * 1024,
-	.period_bytes_min	= 32,
-	.periods_min		= 2,
-	.periods_max		= 256,
-	.buffer_bytes_max	= 2 * 1024 * 1024,
-	.fifo_size		= 0,
-};
-
-static int psc_i2s_pcm_open(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_open(substream=%p)\n", substream);
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	snd_soc_set_runtime_hwparams(substream, &psc_i2s_pcm_hardware);
-
-	s->stream = substream;
-	return 0;
-}
-
-static int psc_i2s_pcm_close(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
-
-	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_close(substream=%p)\n", substream);
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	s->stream = NULL;
-	return 0;
-}
-
-static snd_pcm_uframes_t
-psc_i2s_pcm_pointer(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
-	dma_addr_t count;
-
-	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
-	else
-		s = &psc_i2s->playback;
-
-	count = s->period_current_pt - s->period_start;
-
-	return bytes_to_frames(substream->runtime, count);
-}
-
-static struct snd_pcm_ops psc_i2s_pcm_ops = {
-	.open		= psc_i2s_pcm_open,
-	.close		= psc_i2s_pcm_close,
-	.ioctl		= snd_pcm_lib_ioctl,
-	.pointer	= psc_i2s_pcm_pointer,
-};
-
-static u64 psc_i2s_pcm_dmamask = 0xffffffff;
-static int psc_i2s_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
-			   struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	size_t size = psc_i2s_pcm_hardware.buffer_bytes_max;
-	int rc = 0;
-
-	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_new(card=%p, dai=%p, pcm=%p)\n",
-		card, dai, pcm);
-
-	if (!card->dev->dma_mask)
-		card->dev->dma_mask = &psc_i2s_pcm_dmamask;
-	if (!card->dev->coherent_dma_mask)
-		card->dev->coherent_dma_mask = 0xffffffff;
-
-	if (pcm->streams[0].substream) {
-		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
-					&pcm->streams[0].substream->dma_buffer);
-		if (rc)
-			goto playback_alloc_err;
-	}
-
-	if (pcm->streams[1].substream) {
-		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
-					&pcm->streams[1].substream->dma_buffer);
-		if (rc)
-			goto capture_alloc_err;
-	}
-
-	return 0;
-
- capture_alloc_err:
-	if (pcm->streams[0].substream)
-		snd_dma_free_pages(&pcm->streams[0].substream->dma_buffer);
- playback_alloc_err:
-	dev_err(card->dev, "Cannot allocate buffer(s)\n");
-	return -ENOMEM;
-}
-
-static void psc_i2s_pcm_free(struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_pcm_substream *substream;
-	int stream;
-
-	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_free(pcm=%p)\n", pcm);
-
-	for (stream = 0; stream < 2; stream++) {
-		substream = pcm->streams[stream].substream;
-		if (substream) {
-			snd_dma_free_pages(&substream->dma_buffer);
-			substream->dma_buffer.area = NULL;
-			substream->dma_buffer.addr = 0;
-		}
-	}
-}
-
-struct snd_soc_platform psc_i2s_pcm_soc_platform = {
-	.name		= "mpc5200-psc-audio",
-	.pcm_ops	= &psc_i2s_pcm_ops,
-	.pcm_new	= &psc_i2s_pcm_new,
-	.pcm_free	= &psc_i2s_pcm_free,
-};
-
-/* ---------------------------------------------------------------------
  * Sysfs attributes for debugging
  */
 
-- 
cgit v0.10.2


From cebe77674cab51a9ff1deaa077ab74aff3996764 Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Sat, 23 May 2009 19:13:01 -0400
Subject: ASoC: Rename the PSC functions to DMA

Rename the functions in the mpc5200 DMA file from i2s based names to dma
ones to reflect the file they are in.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
index 4bae8d6..6850392 100644
--- a/sound/soc/fsl/mpc5200_dma.c
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -34,21 +34,21 @@ MODULE_LICENSE("GPL");
 /*
  * Interrupt handlers
  */
-static irqreturn_t psc_i2s_status_irq(int irq, void *_psc_i2s)
+static irqreturn_t psc_dma_status_irq(int irq, void *_psc_dma)
 {
-	struct psc_i2s *psc_i2s = _psc_i2s;
-	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
+	struct psc_dma *psc_dma = _psc_dma;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
 	u16 isr;
 
 	isr = in_be16(&regs->mpc52xx_psc_isr);
 
 	/* Playback underrun error */
-	if (psc_i2s->playback.active && (isr & MPC52xx_PSC_IMR_TXEMP))
-		psc_i2s->stats.underrun_count++;
+	if (psc_dma->playback.active && (isr & MPC52xx_PSC_IMR_TXEMP))
+		psc_dma->stats.underrun_count++;
 
 	/* Capture overrun error */
-	if (psc_i2s->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
-		psc_i2s->stats.overrun_count++;
+	if (psc_dma->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
+		psc_dma->stats.overrun_count++;
 
 	out_8(&regs->command, 4 << 4);	/* reset the error status */
 
@@ -56,7 +56,7 @@ static irqreturn_t psc_i2s_status_irq(int irq, void *_psc_i2s)
 }
 
 /**
- * psc_i2s_bcom_enqueue_next_buffer - Enqueue another audio buffer
+ * psc_dma_bcom_enqueue_next_buffer - Enqueue another audio buffer
  * @s: pointer to stream private data structure
  *
  * Enqueues another audio period buffer into the bestcomm queue.
@@ -65,7 +65,7 @@ static irqreturn_t psc_i2s_status_irq(int irq, void *_psc_i2s)
  * the queue.  Otherwise the enqueue will fail and the audio ring buffer
  * will get out of sync
  */
-static void psc_i2s_bcom_enqueue_next_buffer(struct psc_i2s_stream *s)
+static void psc_dma_bcom_enqueue_next_buffer(struct psc_dma_stream *s)
 {
 	struct bcom_bd *bd;
 
@@ -82,9 +82,9 @@ static void psc_i2s_bcom_enqueue_next_buffer(struct psc_i2s_stream *s)
 }
 
 /* Bestcomm DMA irq handler */
-static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
+static irqreturn_t psc_dma_bcom_irq(int irq, void *_psc_dma_stream)
 {
-	struct psc_i2s_stream *s = _psc_i2s_stream;
+	struct psc_dma_stream *s = _psc_dma_stream;
 
 	/* For each finished period, dequeue the completed period buffer
 	 * and enqueue a new one in it's place. */
@@ -93,7 +93,7 @@ static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
 		s->period_current_pt += s->period_bytes;
 		if (s->period_current_pt >= s->period_end)
 			s->period_current_pt = s->period_start;
-		psc_i2s_bcom_enqueue_next_buffer(s);
+		psc_dma_bcom_enqueue_next_buffer(s);
 		bcom_enable(s->bcom_task);
 	}
 
@@ -106,39 +106,39 @@ static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
 }
 
 /**
- * psc_i2s_startup: create a new substream
+ * psc_dma_startup: create a new substream
  *
  * This is the first function called when a stream is opened.
  *
  * If this is the first stream open, then grab the IRQ and program most of
  * the PSC registers.
  */
-int psc_i2s_startup(struct snd_pcm_substream *substream,
+int psc_dma_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 	int rc;
 
-	dev_dbg(psc_i2s->dev, "psc_i2s_startup(substream=%p)\n", substream);
+	dev_dbg(psc_dma->dev, "psc_dma_startup(substream=%p)\n", substream);
 
-	if (!psc_i2s->playback.active &&
-	    !psc_i2s->capture.active) {
+	if (!psc_dma->playback.active &&
+	    !psc_dma->capture.active) {
 		/* Setup the IRQs */
-		rc = request_irq(psc_i2s->irq, &psc_i2s_status_irq, IRQF_SHARED,
-				 "psc-i2s-status", psc_i2s);
-		rc |= request_irq(psc_i2s->capture.irq,
-				  &psc_i2s_bcom_irq, IRQF_SHARED,
-				  "psc-i2s-capture", &psc_i2s->capture);
-		rc |= request_irq(psc_i2s->playback.irq,
-				  &psc_i2s_bcom_irq, IRQF_SHARED,
-				  "psc-i2s-playback", &psc_i2s->playback);
+		rc = request_irq(psc_dma->irq, &psc_dma_status_irq, IRQF_SHARED,
+				 "psc-dma-status", psc_dma);
+		rc |= request_irq(psc_dma->capture.irq,
+				  &psc_dma_bcom_irq, IRQF_SHARED,
+				  "psc-dma-capture", &psc_dma->capture);
+		rc |= request_irq(psc_dma->playback.irq,
+				  &psc_dma_bcom_irq, IRQF_SHARED,
+				  "psc-dma-playback", &psc_dma->playback);
 		if (rc) {
-			free_irq(psc_i2s->irq, psc_i2s);
-			free_irq(psc_i2s->capture.irq,
-				 &psc_i2s->capture);
-			free_irq(psc_i2s->playback.irq,
-				 &psc_i2s->playback);
+			free_irq(psc_dma->irq, psc_dma);
+			free_irq(psc_dma->capture.irq,
+				 &psc_dma->capture);
+			free_irq(psc_dma->playback.irq,
+				 &psc_dma->playback);
 			return -ENODEV;
 		}
 	}
@@ -146,7 +146,7 @@ int psc_i2s_startup(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-int psc_i2s_hw_free(struct snd_pcm_substream *substream,
+int psc_dma_hw_free(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai)
 {
 	snd_pcm_set_runtime_buffer(substream, NULL);
@@ -154,29 +154,29 @@ int psc_i2s_hw_free(struct snd_pcm_substream *substream,
 }
 
 /**
- * psc_i2s_trigger: start and stop the DMA transfer.
+ * psc_dma_trigger: start and stop the DMA transfer.
  *
  * This function is called by ALSA to start, stop, pause, and resume the DMA
  * transfer of data.
  */
-int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
 			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct psc_i2s_stream *s;
-	struct mpc52xx_psc __iomem *regs = psc_i2s->psc_regs;
+	struct psc_dma_stream *s;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
 	u16 imr;
 	u8 psc_cmd;
 	unsigned long flags;
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
+		s = &psc_dma->capture;
 	else
-		s = &psc_i2s->playback;
+		s = &psc_dma->playback;
 
-	dev_dbg(psc_i2s->dev, "psc_i2s_trigger(substream=%p, cmd=%i)"
+	dev_dbg(psc_dma->dev, "psc_dma_trigger(substream=%p, cmd=%i)"
 		" stream_id=%i\n",
 		substream, cmd, substream->pstr->stream);
 
@@ -207,14 +207,14 @@ int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 		else
 			bcom_gen_bd_tx_reset(s->bcom_task);
 		while (!bcom_queue_full(s->bcom_task))
-			psc_i2s_bcom_enqueue_next_buffer(s);
+			psc_dma_bcom_enqueue_next_buffer(s);
 		bcom_enable(s->bcom_task);
 
-		/* Due to errata in the i2s mode; need to line up enabling
+		/* Due to errata in the dma mode; need to line up enabling
 		 * the transmitter with a transition on the frame sync
 		 * line */
 
-		spin_lock_irqsave(&psc_i2s->lock, flags);
+		spin_lock_irqsave(&psc_dma->lock, flags);
 		/* first make sure it is low */
 		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
 			;
@@ -228,7 +228,7 @@ int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 		if (substream->pstr->stream == SNDRV_PCM_STREAM_PLAYBACK)
 			psc_cmd |= MPC52xx_PSC_TX_ENABLE;
 		out_8(&regs->command, psc_cmd);
-		spin_unlock_irqrestore(&psc_i2s->lock, flags);
+		spin_unlock_irqrestore(&psc_dma->lock, flags);
 
 		break;
 
@@ -236,7 +236,7 @@ int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 		/* Turn off the PSC */
 		s->active = 0;
 		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			if (!psc_i2s->playback.active) {
+			if (!psc_dma->playback.active) {
 				out_8(&regs->command, 2 << 4);	/* reset rx */
 				out_8(&regs->command, 3 << 4);	/* reset tx */
 				out_8(&regs->command, 4 << 4);	/* reset err */
@@ -244,7 +244,7 @@ int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 		} else {
 			out_8(&regs->command, 3 << 4);	/* reset tx */
 			out_8(&regs->command, 4 << 4);	/* reset err */
-			if (!psc_i2s->capture.active)
+			if (!psc_dma->capture.active)
 				out_8(&regs->command, 2 << 4);	/* reset rx */
 		}
 
@@ -255,15 +255,15 @@ int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 		break;
 
 	default:
-		dev_dbg(psc_i2s->dev, "invalid command\n");
+		dev_dbg(psc_dma->dev, "invalid command\n");
 		return -EINVAL;
 	}
 
 	/* Update interrupt enable settings */
 	imr = 0;
-	if (psc_i2s->playback.active)
+	if (psc_dma->playback.active)
 		imr |= MPC52xx_PSC_IMR_TXEMP;
-	if (psc_i2s->capture.active)
+	if (psc_dma->capture.active)
 		imr |= MPC52xx_PSC_IMR_ORERR;
 	out_be16(&regs->isr_imr.imr, imr);
 
@@ -271,36 +271,36 @@ int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
 }
 
 /**
- * psc_i2s_shutdown: shutdown the data transfer on a stream
+ * psc_dma_shutdown: shutdown the data transfer on a stream
  *
  * Shutdown the PSC if there are no other substreams open.
  */
-void psc_i2s_shutdown(struct snd_pcm_substream *substream,
+void psc_dma_shutdown(struct snd_pcm_substream *substream,
 			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 
-	dev_dbg(psc_i2s->dev, "psc_i2s_shutdown(substream=%p)\n", substream);
+	dev_dbg(psc_dma->dev, "psc_dma_shutdown(substream=%p)\n", substream);
 
 	/*
 	 * If this is the last active substream, disable the PSC and release
 	 * the IRQ.
 	 */
-	if (!psc_i2s->playback.active &&
-	    !psc_i2s->capture.active) {
+	if (!psc_dma->playback.active &&
+	    !psc_dma->capture.active) {
 
 		/* Disable all interrupts and reset the PSC */
-		out_be16(&psc_i2s->psc_regs->isr_imr.imr, 0);
-		out_8(&psc_i2s->psc_regs->command, 3 << 4); /* reset tx */
-		out_8(&psc_i2s->psc_regs->command, 2 << 4); /* reset rx */
-		out_8(&psc_i2s->psc_regs->command, 1 << 4); /* reset mode */
-		out_8(&psc_i2s->psc_regs->command, 4 << 4); /* reset error */
+		out_be16(&psc_dma->psc_regs->isr_imr.imr, 0);
+		out_8(&psc_dma->psc_regs->command, 3 << 4); /* reset tx */
+		out_8(&psc_dma->psc_regs->command, 2 << 4); /* reset rx */
+		out_8(&psc_dma->psc_regs->command, 1 << 4); /* reset mode */
+		out_8(&psc_dma->psc_regs->command, 4 << 4); /* reset error */
 
 		/* Release irqs */
-		free_irq(psc_i2s->irq, psc_i2s);
-		free_irq(psc_i2s->capture.irq, &psc_i2s->capture);
-		free_irq(psc_i2s->playback.irq, &psc_i2s->playback);
+		free_irq(psc_dma->irq, psc_dma);
+		free_irq(psc_dma->capture.irq, &psc_dma->capture);
+		free_irq(psc_dma->playback.irq, &psc_dma->playback);
 	}
 }
 
@@ -312,7 +312,7 @@ void psc_i2s_shutdown(struct snd_pcm_substream *substream,
  * interaction with the attached codec
  */
 
-static const struct snd_pcm_hardware psc_i2s_pcm_hardware = {
+static const struct snd_pcm_hardware psc_dma_pcm_hardware = {
 	.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
 		SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
 		SNDRV_PCM_INFO_BATCH,
@@ -330,80 +330,80 @@ static const struct snd_pcm_hardware psc_i2s_pcm_hardware = {
 	.fifo_size		= 0,
 };
 
-static int psc_i2s_pcm_open(struct snd_pcm_substream *substream)
+static int psc_dma_pcm_open(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct psc_dma_stream *s;
 
-	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_open(substream=%p)\n", substream);
+	dev_dbg(psc_dma->dev, "psc_dma_pcm_open(substream=%p)\n", substream);
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
+		s = &psc_dma->capture;
 	else
-		s = &psc_i2s->playback;
+		s = &psc_dma->playback;
 
-	snd_soc_set_runtime_hwparams(substream, &psc_i2s_pcm_hardware);
+	snd_soc_set_runtime_hwparams(substream, &psc_dma_pcm_hardware);
 
 	s->stream = substream;
 	return 0;
 }
 
-static int psc_i2s_pcm_close(struct snd_pcm_substream *substream)
+static int psc_dma_pcm_close(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct psc_dma_stream *s;
 
-	dev_dbg(psc_i2s->dev, "psc_i2s_pcm_close(substream=%p)\n", substream);
+	dev_dbg(psc_dma->dev, "psc_dma_pcm_close(substream=%p)\n", substream);
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
+		s = &psc_dma->capture;
 	else
-		s = &psc_i2s->playback;
+		s = &psc_dma->playback;
 
 	s->stream = NULL;
 	return 0;
 }
 
 static snd_pcm_uframes_t
-psc_i2s_pcm_pointer(struct snd_pcm_substream *substream)
+psc_dma_pcm_pointer(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
-	struct psc_i2s_stream *s;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	struct psc_dma_stream *s;
 	dma_addr_t count;
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-		s = &psc_i2s->capture;
+		s = &psc_dma->capture;
 	else
-		s = &psc_i2s->playback;
+		s = &psc_dma->playback;
 
 	count = s->period_current_pt - s->period_start;
 
 	return bytes_to_frames(substream->runtime, count);
 }
 
-static struct snd_pcm_ops psc_i2s_pcm_ops = {
-	.open		= psc_i2s_pcm_open,
-	.close		= psc_i2s_pcm_close,
+static struct snd_pcm_ops psc_dma_pcm_ops = {
+	.open		= psc_dma_pcm_open,
+	.close		= psc_dma_pcm_close,
 	.ioctl		= snd_pcm_lib_ioctl,
-	.pointer	= psc_i2s_pcm_pointer,
+	.pointer	= psc_dma_pcm_pointer,
 };
 
-static u64 psc_i2s_pcm_dmamask = 0xffffffff;
-static int psc_i2s_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
+static u64 psc_dma_pcm_dmamask = 0xffffffff;
+static int psc_dma_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
 			   struct snd_pcm *pcm)
 {
 	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	size_t size = psc_i2s_pcm_hardware.buffer_bytes_max;
+	size_t size = psc_dma_pcm_hardware.buffer_bytes_max;
 	int rc = 0;
 
-	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_new(card=%p, dai=%p, pcm=%p)\n",
+	dev_dbg(rtd->socdev->dev, "psc_dma_pcm_new(card=%p, dai=%p, pcm=%p)\n",
 		card, dai, pcm);
 
 	if (!card->dev->dma_mask)
-		card->dev->dma_mask = &psc_i2s_pcm_dmamask;
+		card->dev->dma_mask = &psc_dma_pcm_dmamask;
 	if (!card->dev->coherent_dma_mask)
 		card->dev->coherent_dma_mask = 0xffffffff;
 
@@ -431,13 +431,13 @@ static int psc_i2s_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
 	return -ENOMEM;
 }
 
-static void psc_i2s_pcm_free(struct snd_pcm *pcm)
+static void psc_dma_pcm_free(struct snd_pcm *pcm)
 {
 	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
 	struct snd_pcm_substream *substream;
 	int stream;
 
-	dev_dbg(rtd->socdev->dev, "psc_i2s_pcm_free(pcm=%p)\n", pcm);
+	dev_dbg(rtd->socdev->dev, "psc_dma_pcm_free(pcm=%p)\n", pcm);
 
 	for (stream = 0; stream < 2; stream++) {
 		substream = pcm->streams[stream].substream;
@@ -449,10 +449,10 @@ static void psc_i2s_pcm_free(struct snd_pcm *pcm)
 	}
 }
 
-struct snd_soc_platform psc_i2s_pcm_soc_platform = {
+struct snd_soc_platform psc_dma_pcm_soc_platform = {
 	.name		= "mpc5200-psc-audio",
-	.pcm_ops	= &psc_i2s_pcm_ops,
-	.pcm_new	= &psc_i2s_pcm_new,
-	.pcm_free	= &psc_i2s_pcm_free,
+	.pcm_ops	= &psc_dma_pcm_ops,
+	.pcm_new	= &psc_dma_pcm_new,
+	.pcm_free	= &psc_dma_pcm_free,
 };
 
diff --git a/sound/soc/fsl/mpc5200_dma.h b/sound/soc/fsl/mpc5200_dma.h
index 9a19e8a..a33232c 100644
--- a/sound/soc/fsl/mpc5200_dma.h
+++ b/sound/soc/fsl/mpc5200_dma.h
@@ -6,9 +6,9 @@
 #define __SOUND_SOC_FSL_MPC5200_DMA_H__
 
 /**
- * psc_i2s_stream - Data specific to a single stream (playback or capture)
+ * psc_dma_stream - Data specific to a single stream (playback or capture)
  * @active:		flag indicating if the stream is active
- * @psc_i2s:		pointer back to parent psc_i2s data structure
+ * @psc_dma:		pointer back to parent psc_dma data structure
  * @bcom_task:		bestcomm task structure
  * @irq:		irq number for bestcomm task
  * @period_start:	physical address of start of DMA region
@@ -16,9 +16,9 @@
  * @period_next_pt:	physical address of next DMA buffer to enqueue
  * @period_bytes:	size of DMA period in bytes
  */
-struct psc_i2s_stream {
+struct psc_dma_stream {
 	int active;
-	struct psc_i2s *psc_i2s;
+	struct psc_dma *psc_dma;
 	struct bcom_task *bcom_task;
 	int irq;
 	struct snd_pcm_substream *stream;
@@ -30,7 +30,7 @@ struct psc_i2s_stream {
 };
 
 /**
- * psc_i2s - Private driver data
+ * psc_dma - Private driver data
  * @name: short name for this device ("PSC0", "PSC1", etc)
  * @psc_regs: pointer to the PSC's registers
  * @fifo_regs: pointer to the PSC's FIFO registers
@@ -42,7 +42,7 @@ struct psc_i2s_stream {
  * @playback: Playback stream context data
  * @capture: Capture stream context data
  */
-struct psc_i2s {
+struct psc_dma {
 	char name[32];
 	struct mpc52xx_psc __iomem *psc_regs;
 	struct mpc52xx_psc_fifo __iomem *fifo_regs;
@@ -53,8 +53,8 @@ struct psc_i2s {
 	u32 sicr;
 
 	/* per-stream data */
-	struct psc_i2s_stream playback;
-	struct psc_i2s_stream capture;
+	struct psc_dma_stream playback;
+	struct psc_dma_stream capture;
 
 	/* Statistics */
 	struct {
@@ -64,18 +64,18 @@ struct psc_i2s {
 };
 
 
-int psc_i2s_startup(struct snd_pcm_substream *substream,
+int psc_dma_startup(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai);
 
-int psc_i2s_hw_free(struct snd_pcm_substream *substream,
+int psc_dma_hw_free(struct snd_pcm_substream *substream,
 			   struct snd_soc_dai *dai);
 
-void psc_i2s_shutdown(struct snd_pcm_substream *substream,
+void psc_dma_shutdown(struct snd_pcm_substream *substream,
 			     struct snd_soc_dai *dai);
 
-int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
 			   struct snd_soc_dai *dai);
 
-extern struct snd_soc_platform psc_i2s_pcm_soc_platform;
+extern struct snd_soc_platform psc_dma_pcm_soc_platform;
 
 #endif /* __SOUND_SOC_FSL_MPC5200_DMA_H__ */
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index 8974b53..12a7917 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -54,10 +54,10 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 	u32 mode;
 
-	dev_dbg(psc_i2s->dev, "%s(substream=%p) p_size=%i p_bytes=%i"
+	dev_dbg(psc_dma->dev, "%s(substream=%p) p_size=%i p_bytes=%i"
 		" periods=%i buffer_size=%i  buffer_bytes=%i\n",
 		__func__, substream, params_period_size(params),
 		params_period_bytes(params), params_periods(params),
@@ -77,10 +77,10 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 		mode = MPC52xx_PSC_SICR_SIM_CODEC_32;
 		break;
 	default:
-		dev_dbg(psc_i2s->dev, "invalid format\n");
+		dev_dbg(psc_dma->dev, "invalid format\n");
 		return -EINVAL;
 	}
-	out_be32(&psc_i2s->psc_regs->sicr, psc_i2s->sicr | mode);
+	out_be32(&psc_dma->psc_regs->sicr, psc_dma->sicr | mode);
 
 	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
 
@@ -104,8 +104,8 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 static int psc_i2s_set_sysclk(struct snd_soc_dai *cpu_dai,
 			      int clk_id, unsigned int freq, int dir)
 {
-	struct psc_i2s *psc_i2s = cpu_dai->private_data;
-	dev_dbg(psc_i2s->dev, "psc_i2s_set_sysclk(cpu_dai=%p, dir=%i)\n",
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+	dev_dbg(psc_dma->dev, "psc_i2s_set_sysclk(cpu_dai=%p, dir=%i)\n",
 				cpu_dai, dir);
 	return (dir == SND_SOC_CLOCK_IN) ? 0 : -EINVAL;
 }
@@ -123,8 +123,8 @@ static int psc_i2s_set_sysclk(struct snd_soc_dai *cpu_dai,
  */
 static int psc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int format)
 {
-	struct psc_i2s *psc_i2s = cpu_dai->private_data;
-	dev_dbg(psc_i2s->dev, "psc_i2s_set_fmt(cpu_dai=%p, format=%i)\n",
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+	dev_dbg(psc_dma->dev, "psc_i2s_set_fmt(cpu_dai=%p, format=%i)\n",
 				cpu_dai, format);
 	return (format == SND_SOC_DAIFMT_I2S) ? 0 : -EINVAL;
 }
@@ -140,11 +140,11 @@ static int psc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int format)
  * psc_i2s_dai_template: template CPU Digital Audio Interface
  */
 static struct snd_soc_dai_ops psc_i2s_dai_ops = {
-	.startup	= psc_i2s_startup,
+	.startup	= psc_dma_startup,
 	.hw_params	= psc_i2s_hw_params,
-	.hw_free	= psc_i2s_hw_free,
-	.shutdown	= psc_i2s_shutdown,
-	.trigger	= psc_i2s_trigger,
+	.hw_free	= psc_dma_hw_free,
+	.shutdown	= psc_dma_shutdown,
+	.trigger	= psc_dma_trigger,
 	.set_sysclk	= psc_i2s_set_sysclk,
 	.set_fmt	= psc_i2s_set_fmt,
 };
@@ -172,24 +172,24 @@ static struct snd_soc_dai psc_i2s_dai_template = {
 static ssize_t psc_i2s_status_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct psc_i2s *psc_i2s = dev_get_drvdata(dev);
+	struct psc_dma *psc_dma = dev_get_drvdata(dev);
 
 	return sprintf(buf, "status=%.4x sicr=%.8x rfnum=%i rfstat=0x%.4x "
 			"tfnum=%i tfstat=0x%.4x\n",
-			in_be16(&psc_i2s->psc_regs->sr_csr.status),
-			in_be32(&psc_i2s->psc_regs->sicr),
-			in_be16(&psc_i2s->fifo_regs->rfnum) & 0x1ff,
-			in_be16(&psc_i2s->fifo_regs->rfstat),
-			in_be16(&psc_i2s->fifo_regs->tfnum) & 0x1ff,
-			in_be16(&psc_i2s->fifo_regs->tfstat));
+			in_be16(&psc_dma->psc_regs->sr_csr.status),
+			in_be32(&psc_dma->psc_regs->sicr),
+			in_be16(&psc_dma->fifo_regs->rfnum) & 0x1ff,
+			in_be16(&psc_dma->fifo_regs->rfstat),
+			in_be16(&psc_dma->fifo_regs->tfnum) & 0x1ff,
+			in_be16(&psc_dma->fifo_regs->tfstat));
 }
 
-static int *psc_i2s_get_stat_attr(struct psc_i2s *psc_i2s, const char *name)
+static int *psc_i2s_get_stat_attr(struct psc_dma *psc_dma, const char *name)
 {
 	if (strcmp(name, "playback_underrun") == 0)
-		return &psc_i2s->stats.underrun_count;
+		return &psc_dma->stats.underrun_count;
 	if (strcmp(name, "capture_overrun") == 0)
-		return &psc_i2s->stats.overrun_count;
+		return &psc_dma->stats.overrun_count;
 
 	return NULL;
 }
@@ -197,10 +197,10 @@ static int *psc_i2s_get_stat_attr(struct psc_i2s *psc_i2s, const char *name)
 static ssize_t psc_i2s_stat_show(struct device *dev,
 				 struct device_attribute *attr, char *buf)
 {
-	struct psc_i2s *psc_i2s = dev_get_drvdata(dev);
+	struct psc_dma *psc_dma = dev_get_drvdata(dev);
 	int *attrib;
 
-	attrib = psc_i2s_get_stat_attr(psc_i2s, attr->attr.name);
+	attrib = psc_i2s_get_stat_attr(psc_dma, attr->attr.name);
 	if (!attrib)
 		return 0;
 
@@ -212,10 +212,10 @@ static ssize_t psc_i2s_stat_store(struct device *dev,
 				  const char *buf,
 				  size_t count)
 {
-	struct psc_i2s *psc_i2s = dev_get_drvdata(dev);
+	struct psc_dma *psc_dma = dev_get_drvdata(dev);
 	int *attrib;
 
-	attrib = psc_i2s_get_stat_attr(psc_i2s, attr->attr.name);
+	attrib = psc_i2s_get_stat_attr(psc_dma, attr->attr.name);
 	if (!attrib)
 		return 0;
 
@@ -238,7 +238,7 @@ static int __devinit psc_i2s_of_probe(struct of_device *op,
 				      const struct of_device_id *match)
 {
 	phys_addr_t fifo;
-	struct psc_i2s *psc_i2s;
+	struct psc_dma *psc_dma;
 	struct resource res;
 	int size, psc_id, irq, rc;
 	const __be32 *prop;
@@ -265,56 +265,56 @@ static int __devinit psc_i2s_of_probe(struct of_device *op,
 	}
 
 	/* Allocate and initialize the driver private data */
-	psc_i2s = kzalloc(sizeof *psc_i2s, GFP_KERNEL);
-	if (!psc_i2s) {
+	psc_dma = kzalloc(sizeof *psc_dma, GFP_KERNEL);
+	if (!psc_dma) {
 		iounmap(regs);
 		return -ENOMEM;
 	}
-	spin_lock_init(&psc_i2s->lock);
-	psc_i2s->irq = irq;
-	psc_i2s->psc_regs = regs;
-	psc_i2s->fifo_regs = regs + sizeof *psc_i2s->psc_regs;
-	psc_i2s->dev = &op->dev;
-	psc_i2s->playback.psc_i2s = psc_i2s;
-	psc_i2s->capture.psc_i2s = psc_i2s;
-	snprintf(psc_i2s->name, sizeof psc_i2s->name, "PSC%u", psc_id+1);
+	spin_lock_init(&psc_dma->lock);
+	psc_dma->irq = irq;
+	psc_dma->psc_regs = regs;
+	psc_dma->fifo_regs = regs + sizeof *psc_dma->psc_regs;
+	psc_dma->dev = &op->dev;
+	psc_dma->playback.psc_dma = psc_dma;
+	psc_dma->capture.psc_dma = psc_dma;
+	snprintf(psc_dma->name, sizeof psc_dma->name, "PSC%u", psc_id+1);
 
 	/* Fill out the CPU DAI structure */
-	memcpy(&psc_i2s->dai, &psc_i2s_dai_template, sizeof psc_i2s->dai);
-	psc_i2s->dai.private_data = psc_i2s;
-	psc_i2s->dai.name = psc_i2s->name;
-	psc_i2s->dai.id = psc_id;
+	memcpy(&psc_dma->dai, &psc_i2s_dai_template, sizeof psc_dma->dai);
+	psc_dma->dai.private_data = psc_dma;
+	psc_dma->dai.name = psc_dma->name;
+	psc_dma->dai.id = psc_id;
 
 	/* Find the address of the fifo data registers and setup the
 	 * DMA tasks */
 	fifo = res.start + offsetof(struct mpc52xx_psc, buffer.buffer_32);
-	psc_i2s->capture.bcom_task =
+	psc_dma->capture.bcom_task =
 		bcom_psc_gen_bd_rx_init(psc_id, 10, fifo, 512);
-	psc_i2s->playback.bcom_task =
+	psc_dma->playback.bcom_task =
 		bcom_psc_gen_bd_tx_init(psc_id, 10, fifo);
-	if (!psc_i2s->capture.bcom_task ||
-	    !psc_i2s->playback.bcom_task) {
+	if (!psc_dma->capture.bcom_task ||
+	    !psc_dma->playback.bcom_task) {
 		dev_err(&op->dev, "Could not allocate bestcomm tasks\n");
 		iounmap(regs);
-		kfree(psc_i2s);
+		kfree(psc_dma);
 		return -ENODEV;
 	}
 
 	/* Disable all interrupts and reset the PSC */
-	out_be16(&psc_i2s->psc_regs->isr_imr.imr, 0);
-	out_8(&psc_i2s->psc_regs->command, 3 << 4); /* reset transmitter */
-	out_8(&psc_i2s->psc_regs->command, 2 << 4); /* reset receiver */
-	out_8(&psc_i2s->psc_regs->command, 1 << 4); /* reset mode */
-	out_8(&psc_i2s->psc_regs->command, 4 << 4); /* reset error */
+	out_be16(&psc_dma->psc_regs->isr_imr.imr, 0);
+	out_8(&psc_dma->psc_regs->command, 3 << 4); /* reset transmitter */
+	out_8(&psc_dma->psc_regs->command, 2 << 4); /* reset receiver */
+	out_8(&psc_dma->psc_regs->command, 1 << 4); /* reset mode */
+	out_8(&psc_dma->psc_regs->command, 4 << 4); /* reset error */
 
 	/* Configure the serial interface mode; defaulting to CODEC8 mode */
-	psc_i2s->sicr = MPC52xx_PSC_SICR_DTS1 | MPC52xx_PSC_SICR_I2S |
+	psc_dma->sicr = MPC52xx_PSC_SICR_DTS1 | MPC52xx_PSC_SICR_I2S |
 			MPC52xx_PSC_SICR_CLKPOL;
 	if (of_get_property(op->node, "fsl,cellslave", NULL))
-		psc_i2s->sicr |= MPC52xx_PSC_SICR_CELLSLAVE |
+		psc_dma->sicr |= MPC52xx_PSC_SICR_CELLSLAVE |
 				 MPC52xx_PSC_SICR_GENCLK;
-	out_be32(&psc_i2s->psc_regs->sicr,
-		 psc_i2s->sicr | MPC52xx_PSC_SICR_SIM_CODEC_8);
+	out_be32(&psc_dma->psc_regs->sicr,
+		 psc_dma->sicr | MPC52xx_PSC_SICR_SIM_CODEC_8);
 
 	/* Check for the codec handle.  If it is not present then we
 	 * are done */
@@ -325,54 +325,54 @@ static int __devinit psc_i2s_of_probe(struct of_device *op,
 	 * First write: RxRdy (FIFO Alarm) generates rx FIFO irq
 	 * Second write: register Normal mode for non loopback
 	 */
-	out_8(&psc_i2s->psc_regs->mode, 0);
-	out_8(&psc_i2s->psc_regs->mode, 0);
+	out_8(&psc_dma->psc_regs->mode, 0);
+	out_8(&psc_dma->psc_regs->mode, 0);
 
 	/* Set the TX and RX fifo alarm thresholds */
-	out_be16(&psc_i2s->fifo_regs->rfalarm, 0x100);
-	out_8(&psc_i2s->fifo_regs->rfcntl, 0x4);
-	out_be16(&psc_i2s->fifo_regs->tfalarm, 0x100);
-	out_8(&psc_i2s->fifo_regs->tfcntl, 0x7);
+	out_be16(&psc_dma->fifo_regs->rfalarm, 0x100);
+	out_8(&psc_dma->fifo_regs->rfcntl, 0x4);
+	out_be16(&psc_dma->fifo_regs->tfalarm, 0x100);
+	out_8(&psc_dma->fifo_regs->tfcntl, 0x7);
 
 	/* Lookup the IRQ numbers */
-	psc_i2s->playback.irq =
-		bcom_get_task_irq(psc_i2s->playback.bcom_task);
-	psc_i2s->capture.irq =
-		bcom_get_task_irq(psc_i2s->capture.bcom_task);
+	psc_dma->playback.irq =
+		bcom_get_task_irq(psc_dma->playback.bcom_task);
+	psc_dma->capture.irq =
+		bcom_get_task_irq(psc_dma->capture.bcom_task);
 
 	/* Save what we've done so it can be found again later */
-	dev_set_drvdata(&op->dev, psc_i2s);
+	dev_set_drvdata(&op->dev, psc_dma);
 
 	/* Register the SYSFS files */
-	rc = device_create_file(psc_i2s->dev, &dev_attr_status);
-	rc |= device_create_file(psc_i2s->dev, &dev_attr_capture_overrun);
-	rc |= device_create_file(psc_i2s->dev, &dev_attr_playback_underrun);
+	rc = device_create_file(psc_dma->dev, &dev_attr_status);
+	rc |= device_create_file(psc_dma->dev, &dev_attr_capture_overrun);
+	rc |= device_create_file(psc_dma->dev, &dev_attr_playback_underrun);
 	if (rc)
-		dev_info(psc_i2s->dev, "error creating sysfs files\n");
+		dev_info(psc_dma->dev, "error creating sysfs files\n");
 
-	snd_soc_register_platform(&psc_i2s_pcm_soc_platform);
+	snd_soc_register_platform(&psc_dma_pcm_soc_platform);
 
 	/* Tell the ASoC OF helpers about it */
-	of_snd_soc_register_platform(&psc_i2s_pcm_soc_platform, op->node,
-				     &psc_i2s->dai);
+	of_snd_soc_register_platform(&psc_dma_pcm_soc_platform, op->node,
+				     &psc_dma->dai);
 
 	return 0;
 }
 
 static int __devexit psc_i2s_of_remove(struct of_device *op)
 {
-	struct psc_i2s *psc_i2s = dev_get_drvdata(&op->dev);
+	struct psc_dma *psc_dma = dev_get_drvdata(&op->dev);
 
 	dev_dbg(&op->dev, "psc_i2s_remove()\n");
 
-	snd_soc_unregister_platform(&psc_i2s_pcm_soc_platform);
+	snd_soc_unregister_platform(&psc_dma_pcm_soc_platform);
 
-	bcom_gen_bd_rx_release(psc_i2s->capture.bcom_task);
-	bcom_gen_bd_tx_release(psc_i2s->playback.bcom_task);
+	bcom_gen_bd_rx_release(psc_dma->capture.bcom_task);
+	bcom_gen_bd_tx_release(psc_dma->playback.bcom_task);
 
-	iounmap(psc_i2s->psc_regs);
-	iounmap(psc_i2s->fifo_regs);
-	kfree(psc_i2s);
+	iounmap(psc_dma->psc_regs);
+	iounmap(psc_dma->fifo_regs);
+	kfree(psc_dma);
 	dev_set_drvdata(&op->dev, NULL);
 
 	return 0;
-- 
cgit v0.10.2


From 0bc53a67ac831ec84f730a657dbcadd80a589ef5 Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Sat, 23 May 2009 19:13:03 -0400
Subject: ASoC: Add a few more mpc5200 PSC defines

Add a few more mpc5200 PSC defines. More bit fields defines for mpc5200
PSC registers.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
index a218da6..fb84120 100644
--- a/arch/powerpc/include/asm/mpc52xx_psc.h
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -28,6 +28,10 @@
 #define MPC52xx_PSC_MAXNUM	6
 
 /* Programmable Serial Controller (PSC) status register bits */
+#define MPC52xx_PSC_SR_UNEX_RX	0x0001
+#define MPC52xx_PSC_SR_DATA_VAL	0x0002
+#define MPC52xx_PSC_SR_DATA_OVR	0x0004
+#define MPC52xx_PSC_SR_CMDSEND	0x0008
 #define MPC52xx_PSC_SR_CDE	0x0080
 #define MPC52xx_PSC_SR_RXRDY	0x0100
 #define MPC52xx_PSC_SR_RXFULL	0x0200
@@ -61,6 +65,12 @@
 #define MPC52xx_PSC_RXTX_FIFO_EMPTY	0x0001
 
 /* PSC interrupt status/mask bits */
+#define MPC52xx_PSC_IMR_UNEX_RX_SLOT 0x0001
+#define MPC52xx_PSC_IMR_DATA_VALID	0x0002
+#define MPC52xx_PSC_IMR_DATA_OVR	0x0004
+#define MPC52xx_PSC_IMR_CMD_SEND	0x0008
+#define MPC52xx_PSC_IMR_ERROR		0x0040
+#define MPC52xx_PSC_IMR_DEOF		0x0080
 #define MPC52xx_PSC_IMR_TXRDY		0x0100
 #define MPC52xx_PSC_IMR_RXRDY		0x0200
 #define MPC52xx_PSC_IMR_DB		0x0400
@@ -117,6 +127,7 @@
 #define MPC52xx_PSC_SICR_SIM_FIR		(0x6 << 24)
 #define MPC52xx_PSC_SICR_SIM_CODEC_24		(0x7 << 24)
 #define MPC52xx_PSC_SICR_SIM_CODEC_32		(0xf << 24)
+#define MPC52xx_PSC_SICR_AWR			(1 << 30)
 #define MPC52xx_PSC_SICR_GENCLK			(1 << 23)
 #define MPC52xx_PSC_SICR_I2S			(1 << 22)
 #define MPC52xx_PSC_SICR_CLKPOL			(1 << 21)
-- 
cgit v0.10.2


From 32bdfac5462d777f35b00838893c4f87baf23efe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 24 May 2009 21:15:07 +0200
Subject: PM: Do not hold dpm_list_mtx while disabling/enabling nonboot CPUs

We shouldn't hold dpm_list_mtx while executing
[disable|enable]_nonboot_cpus(), because theoretically this may lead
to a deadlock as shown by the following example (provided by Johannes
Berg):

CPU 3       CPU 2                     CPU 1
                                      suspend/hibernate
            something:
            rtnl_lock()               device_pm_lock()
                                       -> mutex_lock(&dpm_list_mtx)

            mutex_lock(&dpm_list_mtx)

linkwatch_work
 -> rtnl_lock()
                                      disable_nonboot_cpus()
                                       -> flush CPU 3 workqueue

Fortunately, device drivers are supposed to stop any activities that
might lead to the registration of new device objects way before
disable_nonboot_cpus() is called, so it shouldn't be necessary to
hold dpm_list_mtx over the entire late part of device suspend and
early part of device resume.

Thus, during the late suspend and the early resume of devices acquire
dpm_list_mtx only when dpm_list is going to be traversed and release
it right after that.

This patch is reported to fix the regressions tracked as
http://bugzilla.kernel.org/show_bug.cgi?id=13245.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Miles Lane <miles.lane@gmail.com>
Tested-by: Ming Lei <tom.leiming@gmail.com>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 69b4ddb..3e4bc69 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -357,6 +357,7 @@ static void dpm_power_up(pm_message_t state)
 {
 	struct device *dev;
 
+	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry(dev, &dpm_list, power.entry)
 		if (dev->power.status > DPM_OFF) {
 			int error;
@@ -366,6 +367,7 @@ static void dpm_power_up(pm_message_t state)
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
+	mutex_unlock(&dpm_list_mtx);
 }
 
 /**
@@ -614,6 +616,7 @@ int device_power_down(pm_message_t state)
 	int error = 0;
 
 	suspend_device_irqs();
+	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
 		error = suspend_device_noirq(dev, state);
 		if (error) {
@@ -622,6 +625,7 @@ int device_power_down(pm_message_t state)
 		}
 		dev->power.status = DPM_OFF_IRQ;
 	}
+	mutex_unlock(&dpm_list_mtx);
 	if (error)
 		device_power_up(resume_event(state));
 	return error;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5a758c6..e498377 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1451,7 +1451,6 @@ int kernel_kexec(void)
 		error = device_suspend(PMSG_FREEZE);
 		if (error)
 			goto Resume_console;
-		device_pm_lock();
 		/* At this point, device_suspend() has been called,
 		 * but *not* device_power_down(). We *must*
 		 * device_power_down() now.  Otherwise, drivers for
@@ -1489,7 +1488,6 @@ int kernel_kexec(void)
 		enable_nonboot_cpus();
 		device_power_up(PMSG_RESTORE);
  Resume_devices:
-		device_pm_unlock();
 		device_resume(PMSG_RESTORE);
  Resume_console:
 		resume_console();
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b0dc9e7..5cb080e 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -215,8 +215,6 @@ static int create_image(int platform_mode)
 	if (error)
 		return error;
 
-	device_pm_lock();
-
 	/* At this point, device_suspend() has been called, but *not*
 	 * device_power_down(). We *must* call device_power_down() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -227,7 +225,7 @@ static int create_image(int platform_mode)
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
-		goto Unlock;
+		return error;
 	}
 
 	error = platform_pre_snapshot(platform_mode);
@@ -280,9 +278,6 @@ static int create_image(int platform_mode)
 	device_power_up(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
- Unlock:
-	device_pm_unlock();
-
 	return error;
 }
 
@@ -344,13 +339,11 @@ static int resume_target_kernel(bool platform_mode)
 {
 	int error;
 
-	device_pm_lock();
-
 	error = device_power_down(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
-		goto Unlock;
+		return error;
 	}
 
 	error = platform_pre_restore(platform_mode);
@@ -403,9 +396,6 @@ static int resume_target_kernel(bool platform_mode)
 
 	device_power_up(PMSG_RECOVER);
 
- Unlock:
-	device_pm_unlock();
-
 	return error;
 }
 
@@ -464,11 +454,9 @@ int hibernation_platform_enter(void)
 		goto Resume_devices;
 	}
 
-	device_pm_lock();
-
 	error = device_power_down(PMSG_HIBERNATE);
 	if (error)
-		goto Unlock;
+		goto Resume_devices;
 
 	error = hibernation_ops->prepare();
 	if (error)
@@ -493,9 +481,6 @@ int hibernation_platform_enter(void)
 
 	device_power_up(PMSG_RESTORE);
 
- Unlock:
-	device_pm_unlock();
-
  Resume_devices:
 	entering_platform_hibernation = false;
 	device_resume(PMSG_RESTORE);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f99ed6a..8680282 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -289,12 +289,10 @@ static int suspend_enter(suspend_state_t state)
 {
 	int error;
 
-	device_pm_lock();
-
 	if (suspend_ops->prepare) {
 		error = suspend_ops->prepare();
 		if (error)
-			goto Done;
+			return error;
 	}
 
 	error = device_power_down(PMSG_SUSPEND);
@@ -343,9 +341,6 @@ static int suspend_enter(suspend_state_t state)
 	if (suspend_ops->finish)
 		suspend_ops->finish();
 
- Done:
-	device_pm_unlock();
-
 	return error;
 }
 
-- 
cgit v0.10.2


From d5a877e8dd409d8c702986d06485c374b705d340 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 24 May 2009 13:03:43 -0700
Subject: async: make sure independent async domains can't accidentally
 entangle

The problem occurs when async_synchronize_full_domain() is called when
the async_pending list is not empty.  This will cause lowest_running()
to return the cookie of the first entry on the async_pending list, which
might be nothing at all to do with the domain being asked for and thus
cause the domain synchronization to wait for an unrelated domain.   This
can cause a deadlock if domain synchronization is used from one domain
to wait for another.

Fix by running over the async_pending list to see if any pending items
actually belong to our domain (and return their cookies if they do).

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/async.c b/kernel/async.c
index 968ef94..5054030 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -92,19 +92,23 @@ extern int initcall_debug;
 static async_cookie_t  __lowest_in_progress(struct list_head *running)
 {
 	struct async_entry *entry;
+	async_cookie_t ret = next_cookie; /* begin with "infinity" value */
+
 	if (!list_empty(running)) {
 		entry = list_first_entry(running,
 			struct async_entry, list);
-		return entry->cookie;
-	} else if (!list_empty(&async_pending)) {
-		entry = list_first_entry(&async_pending,
-			struct async_entry, list);
-		return entry->cookie;
-	} else {
-		/* nothing in progress... next_cookie is "infinity" */
-		return next_cookie;
+		ret = entry->cookie;
 	}
 
+	if (!list_empty(&async_pending)) {
+		list_for_each_entry(entry, &async_pending, list)
+			if (entry->running == running) {
+				ret = entry->cookie;
+				break;
+			}
+	}
+
+	return ret;
 }
 
 static async_cookie_t  lowest_in_progress(struct list_head *running)
-- 
cgit v0.10.2


From 71c9d8b68b299bef614afc7907393564a9f1476f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 25 May 2009 12:01:59 +0900
Subject: x86: Remove remap percpu allocator for the time being

Remap percpu allocator has subtle bug when combined with page
attribute changing.  Remap percpu allocator aliases PMD pages for the
first chunk and as pageattr doesn't know about the alias it ends up
updating page attributes of the original mapping thus leaving the
alises in inconsistent state which might lead to subtle data
corruption.  Please read the following threads for more information:

  http://thread.gmane.org/gmane.linux.kernel/835783

The following is the proposed fix which teaches pageattr about percpu
aliases.

  http://thread.gmane.org/gmane.linux.kernel/837157

However, the above changes are deemed too pervasive for upstream
inclusion for 2.6.30 release, so this patch essentially disables
the remap allocator for the time being.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <4A1A0A27.4050301@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 3a97a4c..8f0e13b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
 	/*
 	 * If large page isn't supported, there's no benefit in doing
 	 * this.  Also, on non-NUMA, embedding is better.
+	 *
+	 * NOTE: disabled for now.
 	 */
-	if (!cpu_has_pse || !pcpu_need_numa())
+	if (true || !cpu_has_pse || !pcpu_need_numa())
 		return -EINVAL;
 
 	/*
-- 
cgit v0.10.2


From 461c6c3a0a23a8fac1a4b636e93ff5dfe599a241 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 25 May 2009 08:06:02 +0200
Subject: ALSA: hda - Add missing check of pin vref 50 and others in Realtek
 codecs

Some Realtek codecs like ALC861 seem to support only VREF50 while the
current driver assumes it's only VREF80.  Check other VREF bits to set
the correct value.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index bcbb736..0fd258e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -776,6 +776,12 @@ static void alc_set_input_pin(struct hda_codec *codec, hda_nid_t nid,
 		pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT;
 		if (pincap & AC_PINCAP_VREF_80)
 			val = PIN_VREF80;
+		else if (pincap & AC_PINCAP_VREF_50)
+			val = PIN_VREF50;
+		else if (pincap & AC_PINCAP_VREF_100)
+			val = PIN_VREF100;
+		else if (pincap & AC_PINCAP_VREF_GRD)
+			val = PIN_VREFGRD;
 	}
 	snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, val);
 }
-- 
cgit v0.10.2


From 85a9f9200226ddffc2ea50dae6a8df04c033ecd4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 25 May 2009 09:59:50 +0200
Subject: perf_counter tools: increase limits, fix

NR_CPUS and NR_COUNTERS goes up quadratic ... 1024x4096 was far
too ambitious upper limit - go for 256x256 which is still plenty.

[ Impact: reduce perf tool memory consumption ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index a517683..5a2520b 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -61,8 +61,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
 		       group_fd, flags);
 }
 
-#define MAX_COUNTERS			1024
-#define MAX_NR_CPUS			4096
+#define MAX_COUNTERS			256
+#define MAX_NR_CPUS			256
 
 #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
 
-- 
cgit v0.10.2


From 345c03ef0f2bd0c933a1eca27732af7edf5e8d4d Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 25 May 2009 10:05:00 +0200
Subject: sound: oxygen: reset DMA when stream is closed

When a PCM stream is closed, flush the corresponding DMA channel.
Otherwise, the DMA controller would continue to output the last sample
which would result in a DC offset on the output.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c
index c262049..3b5ca70 100644
--- a/sound/pci/oxygen/oxygen_pcm.c
+++ b/sound/pci/oxygen/oxygen_pcm.c
@@ -487,10 +487,14 @@ static int oxygen_hw_free(struct snd_pcm_substream *substream)
 {
 	struct oxygen *chip = snd_pcm_substream_chip(substream);
 	unsigned int channel = oxygen_substream_channel(substream);
+	unsigned int channel_mask = 1 << channel;
 
 	spin_lock_irq(&chip->reg_lock);
-	chip->interrupt_mask &= ~(1 << channel);
+	chip->interrupt_mask &= ~channel_mask;
 	oxygen_write16(chip, OXYGEN_INTERRUPT_MASK, chip->interrupt_mask);
+
+	oxygen_set_bits8(chip, OXYGEN_DMA_FLUSH, channel_mask);
+	oxygen_clear_bits8(chip, OXYGEN_DMA_FLUSH, channel_mask);
 	spin_unlock_irq(&chip->reg_lock);
 
 	return snd_pcm_lib_free_pages(substream);
-- 
cgit v0.10.2


From 53bb705d12e0f642c131cdcab2c8e3be7364e505 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 25 May 2009 10:05:43 +0200
Subject: sound: virtuoso: add another DX PCI ID

Add another PCI ID for a second revision of the Xonar DX.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c
index bc5ce11..ebc4e85 100644
--- a/sound/pci/oxygen/virtuoso.c
+++ b/sound/pci/oxygen/virtuoso.c
@@ -188,6 +188,7 @@ static struct pci_device_id xonar_ids[] __devinitdata = {
 	{ OXYGEN_PCI_SUBID(0x1043, 0x8275), .driver_data = MODEL_DX },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x82b7), .driver_data = MODEL_D2X },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x8314), .driver_data = MODEL_HDAV },
+	{ OXYGEN_PCI_SUBID(0x1043, 0x8327), .driver_data = MODEL_DX },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x834f), .driver_data = MODEL_D1 },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x835c), .driver_data = MODEL_STX },
 	{ OXYGEN_PCI_SUBID_BROKEN_EEPROM },
-- 
cgit v0.10.2


From b990ae963a3f80a659b30562c1e3214b386ecce3 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 25 May 2009 10:06:22 +0200
Subject: sound: virtuoso: enable HDAV S/PDIF input

The Xonar HDAV1.3 has a digital input jack, so enable the corresponding
device.

This is not related to the HDMI stuff, which stays unsupported.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c
index ebc4e85..71e38f4 100644
--- a/sound/pci/oxygen/virtuoso.c
+++ b/sound/pci/oxygen/virtuoso.c
@@ -1022,7 +1022,8 @@ static const struct oxygen_model model_xonar_hdav = {
 	.model_data_size = sizeof(struct xonar_data),
 	.device_config = PLAYBACK_0_TO_I2S |
 			 PLAYBACK_1_TO_SPDIF |
-			 CAPTURE_0_FROM_I2S_2,
+			 CAPTURE_0_FROM_I2S_2 |
+			 CAPTURE_1_FROM_SPDIF,
 	.dac_channels = 8,
 	.dac_volume_min = 255 - 2*60,
 	.dac_volume_max = 255,
-- 
cgit v0.10.2


From 04f9890df1bad2115665b7027e664aaffa44088d Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 25 May 2009 10:11:29 +0200
Subject: sound: virtuoso: add Xonar Essence ST support

Add support for the Asus Xonar Essence ST and its daughterboard.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..68ef84f 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -1859,7 +1859,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
   -------------------
 
     Module for sound cards based on the Asus AV100/AV200 chips,
-    i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), and Essence STX.
+    i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), Essence ST
+    (Deluxe) and Essence STX.
 
     This module supports autoprobe and multiple cards.
 
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 93422e3..699c280 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -764,8 +764,8 @@ config SND_VIRTUOSO
 	select SND_OXYGEN_LIB
 	help
 	  Say Y here to include support for sound cards based on the
-	  Asus AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X, and
-	  Essence STX.
+	  Asus AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X,
+	  Essence ST (Deluxe), and Essence STX.
 	  Support for the HDAV1.3 (Deluxe) is very experimental.
 
 	  To compile this driver as a module, choose M here: the module
diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c
index 71e38f4..bf971f7 100644
--- a/sound/pci/oxygen/virtuoso.c
+++ b/sound/pci/oxygen/virtuoso.c
@@ -113,8 +113,8 @@
  */
 
 /*
- * Xonar Essence STX
- * -----------------
+ * Xonar Essence ST (Deluxe)/STX
+ * -----------------------------
  *
  * CMI8788:
  *
@@ -180,6 +180,8 @@ enum {
 	MODEL_DX,
 	MODEL_HDAV,	/* without daughterboard */
 	MODEL_HDAV_H6,	/* with H6 daughterboard */
+	MODEL_ST,
+	MODEL_ST_H6,
 	MODEL_STX,
 };
 
@@ -191,6 +193,7 @@ static struct pci_device_id xonar_ids[] __devinitdata = {
 	{ OXYGEN_PCI_SUBID(0x1043, 0x8327), .driver_data = MODEL_DX },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x834f), .driver_data = MODEL_D1 },
 	{ OXYGEN_PCI_SUBID(0x1043, 0x835c), .driver_data = MODEL_STX },
+	{ OXYGEN_PCI_SUBID(0x1043, 0x835d), .driver_data = MODEL_ST },
 	{ OXYGEN_PCI_SUBID_BROKEN_EEPROM },
 	{ }
 };
@@ -211,9 +214,9 @@ MODULE_DEVICE_TABLE(pci, xonar_ids);
 #define GPIO_DX_FRONT_PANEL	0x0002
 #define GPIO_DX_INPUT_ROUTE	0x0100
 
-#define GPIO_HDAV_DB_MASK	0x0030
-#define GPIO_HDAV_DB_H6		0x0000
-#define GPIO_HDAV_DB_XX		0x0020
+#define GPIO_DB_MASK		0x0030
+#define GPIO_DB_H6		0x0000
+#define GPIO_DB_XX		0x0020
 
 #define GPIO_ST_HP_REAR		0x0002
 #define GPIO_ST_HP		0x0080
@@ -531,7 +534,7 @@ static void xonar_hdav_init(struct oxygen *chip)
 	snd_component_add(chip->card, "CS5381");
 }
 
-static void xonar_stx_init(struct oxygen *chip)
+static void xonar_st_init(struct oxygen *chip)
 {
 	struct xonar_data *data = chip->model_data;
 
@@ -540,12 +543,11 @@ static void xonar_stx_init(struct oxygen *chip)
 		       OXYGEN_2WIRE_INTERRUPT_MASK |
 		       OXYGEN_2WIRE_SPEED_FAST);
 
+	if (chip->model.private_data == MODEL_ST_H6)
+		chip->model.dac_channels = 8;
 	data->anti_pop_delay = 100;
-	data->dacs = 1;
+	data->dacs = chip->model.private_data == MODEL_ST_H6 ? 4 : 1;
 	data->output_enable_bit = GPIO_DX_OUTPUT_ENABLE;
-	data->ext_power_reg = OXYGEN_GPI_DATA;
-	data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK;
-	data->ext_power_bit = GPI_DX_EXT_POWER;
 	data->pcm1796_oversampling = PCM1796_OS_64;
 
 	pcm1796_init(chip);
@@ -561,6 +563,17 @@ static void xonar_stx_init(struct oxygen *chip)
 	snd_component_add(chip->card, "CS5381");
 }
 
+static void xonar_stx_init(struct oxygen *chip)
+{
+	struct xonar_data *data = chip->model_data;
+
+	data->ext_power_reg = OXYGEN_GPI_DATA;
+	data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK;
+	data->ext_power_bit = GPI_DX_EXT_POWER;
+
+	xonar_st_init(chip);
+}
+
 static void xonar_disable_output(struct oxygen *chip)
 {
 	struct xonar_data *data = chip->model_data;
@@ -1036,7 +1049,7 @@ static const struct oxygen_model model_xonar_hdav = {
 static const struct oxygen_model model_xonar_st = {
 	.longname = "Asus Virtuoso 100",
 	.chip = "AV200",
-	.init = xonar_stx_init,
+	.init = xonar_st_init,
 	.control_filter = xonar_st_control_filter,
 	.mixer_init = xonar_st_mixer_init,
 	.cleanup = xonar_st_cleanup,
@@ -1069,6 +1082,7 @@ static int __devinit get_xonar_model(struct oxygen *chip,
 		[MODEL_D2]	= &model_xonar_d2,
 		[MODEL_D2X]	= &model_xonar_d2,
 		[MODEL_HDAV]	= &model_xonar_hdav,
+		[MODEL_ST]	= &model_xonar_st,
 		[MODEL_STX]	= &model_xonar_st,
 	};
 	static const char *const names[] = {
@@ -1078,6 +1092,8 @@ static int __devinit get_xonar_model(struct oxygen *chip,
 		[MODEL_D2X]	= "Xonar D2X",
 		[MODEL_HDAV]	= "Xonar HDAV1.3",
 		[MODEL_HDAV_H6]	= "Xonar HDAV1.3+H6",
+		[MODEL_ST]	= "Xonar Essence ST",
+		[MODEL_ST_H6]	= "Xonar Essence ST+H6",
 		[MODEL_STX]	= "Xonar Essence STX",
 	};
 	unsigned int model = id->driver_data;
@@ -1094,21 +1110,27 @@ static int __devinit get_xonar_model(struct oxygen *chip,
 		chip->model.init = xonar_dx_init;
 		break;
 	case MODEL_HDAV:
-		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL,
-				    GPIO_HDAV_DB_MASK);
-		switch (oxygen_read16(chip, OXYGEN_GPIO_DATA) &
-			GPIO_HDAV_DB_MASK) {
-		case GPIO_HDAV_DB_H6:
+		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DB_MASK);
+		switch (oxygen_read16(chip, OXYGEN_GPIO_DATA) & GPIO_DB_MASK) {
+		case GPIO_DB_H6:
 			model = MODEL_HDAV_H6;
 			break;
-		case GPIO_HDAV_DB_XX:
+		case GPIO_DB_XX:
 			snd_printk(KERN_ERR "unknown daughterboard\n");
 			return -ENODEV;
 		}
 		break;
+	case MODEL_ST:
+		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DB_MASK);
+		switch (oxygen_read16(chip, OXYGEN_GPIO_DATA) & GPIO_DB_MASK) {
+		case GPIO_DB_H6:
+			model = MODEL_ST_H6;
+			break;
+		}
+		break;
 	case MODEL_STX:
-		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL,
-				    GPIO_HDAV_DB_MASK);
+		chip->model.init = xonar_stx_init;
+		oxygen_clear_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DB_MASK);
 		break;
 	}
 
-- 
cgit v0.10.2


From 5a2e9a48b1d6de35ae5efea35d117133c3eb30f2 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Mon, 25 May 2009 11:12:11 +0300
Subject: ASoC: TWL4030: Handsfree pop removal redesign

Move the HandsfreeL/R (IHFL/R) pop removal code from the DAPM_MUX_E
to a more appropriate DAPM_PGA_E widget.
Also fix the power-up sequence to match with the TRM.
The power-down sequence is not described in the TRM, so do it
in a way, which seams like the correct sequence.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 9197fdd..17ddcb2 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -546,27 +546,61 @@ static int micpath_event(struct snd_soc_dapm_widget *w,
 	return 0;
 }
 
-static int handsfree_event(struct snd_soc_dapm_widget *w,
-		struct snd_kcontrol *kcontrol, int event)
+static void handsfree_ramp(struct snd_soc_codec *codec, int reg, int ramp)
 {
-	struct soc_enum *e = (struct soc_enum *)w->kcontrols->private_value;
 	unsigned char hs_ctl;
 
-	hs_ctl = twl4030_read_reg_cache(w->codec, e->reg);
+	hs_ctl = twl4030_read_reg_cache(codec, reg);
 
-	if (hs_ctl & TWL4030_HF_CTL_REF_EN) {
+	if (ramp) {
+		/* HF ramp-up */
+		hs_ctl |= TWL4030_HF_CTL_REF_EN;
+		twl4030_write(codec, reg, hs_ctl);
+		udelay(10);
 		hs_ctl |= TWL4030_HF_CTL_RAMP_EN;
-		twl4030_write(w->codec, e->reg, hs_ctl);
+		twl4030_write(codec, reg, hs_ctl);
+		udelay(40);
 		hs_ctl |= TWL4030_HF_CTL_LOOP_EN;
-		twl4030_write(w->codec, e->reg, hs_ctl);
 		hs_ctl |= TWL4030_HF_CTL_HB_EN;
-		twl4030_write(w->codec, e->reg, hs_ctl);
+		twl4030_write(codec, reg, hs_ctl);
 	} else {
-		hs_ctl &= ~(TWL4030_HF_CTL_RAMP_EN | TWL4030_HF_CTL_LOOP_EN
-				| TWL4030_HF_CTL_HB_EN);
-		twl4030_write(w->codec, e->reg, hs_ctl);
+		/* HF ramp-down */
+		hs_ctl &= ~TWL4030_HF_CTL_LOOP_EN;
+		hs_ctl &= ~TWL4030_HF_CTL_HB_EN;
+		twl4030_write(codec, reg, hs_ctl);
+		hs_ctl &= ~TWL4030_HF_CTL_RAMP_EN;
+		twl4030_write(codec, reg, hs_ctl);
+		udelay(40);
+		hs_ctl &= ~TWL4030_HF_CTL_REF_EN;
+		twl4030_write(codec, reg, hs_ctl);
 	}
+}
 
+static int handsfreelpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		handsfree_ramp(w->codec, TWL4030_REG_HFL_CTL, 1);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		handsfree_ramp(w->codec, TWL4030_REG_HFL_CTL, 0);
+		break;
+	}
+	return 0;
+}
+
+static int handsfreerpga_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		handsfree_ramp(w->codec, TWL4030_REG_HFR_CTL, 1);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		handsfree_ramp(w->codec, TWL4030_REG_HFR_CTL, 0);
+		break;
+	}
 	return 0;
 }
 
@@ -1190,12 +1224,16 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 
 	/* Output MUX controls */
 	/* HandsfreeL/R */
-	SND_SOC_DAPM_MUX_E("HandsfreeL Mux", TWL4030_REG_HFL_CTL, 5, 0,
-		&twl4030_dapm_handsfreel_control, handsfree_event,
-		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
-	SND_SOC_DAPM_MUX_E("HandsfreeR Mux", TWL4030_REG_HFR_CTL, 5, 0,
-		&twl4030_dapm_handsfreer_control, handsfree_event,
-		SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_MUX("HandsfreeL Mux", SND_SOC_NOPM, 0, 0,
+		&twl4030_dapm_handsfreel_control),
+	SND_SOC_DAPM_PGA_E("HandsfreeL PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, handsfreelpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_MUX("HandsfreeR Mux", SND_SOC_NOPM, 5, 0,
+		&twl4030_dapm_handsfreer_control),
+	SND_SOC_DAPM_PGA_E("HandsfreeR PGA", SND_SOC_NOPM,
+			0, 0, NULL, 0, handsfreerpga_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
 	/* Vibra */
 	SND_SOC_DAPM_MUX("Vibra Mux", TWL4030_REG_VIBRA_CTL, 0, 0,
 		&twl4030_dapm_vibra_control),
@@ -1303,11 +1341,13 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"HandsfreeL Mux", "AudioL1", "Analog L1 Playback Mixer"},
 	{"HandsfreeL Mux", "AudioL2", "Analog L2 Playback Mixer"},
 	{"HandsfreeL Mux", "AudioR2", "Analog R2 Playback Mixer"},
+	{"HandsfreeL PGA", NULL, "HandsfreeL Mux"},
 	/* HandsfreeR */
 	{"HandsfreeR Mux", "Voice", "Analog Voice Playback Mixer"},
 	{"HandsfreeR Mux", "AudioR1", "Analog R1 Playback Mixer"},
 	{"HandsfreeR Mux", "AudioR2", "Analog R2 Playback Mixer"},
 	{"HandsfreeR Mux", "AudioL2", "Analog L2 Playback Mixer"},
+	{"HandsfreeR PGA", NULL, "HandsfreeR Mux"},
 	/* Vibra */
 	{"Vibra Mux", "AudioL1", "DAC Left1"},
 	{"Vibra Mux", "AudioR1", "DAC Right1"},
@@ -1324,8 +1364,8 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"HSOR", NULL, "HeadsetR PGA"},
 	{"CARKITL", NULL, "CarkitL Mixer"},
 	{"CARKITR", NULL, "CarkitR Mixer"},
-	{"HFL", NULL, "HandsfreeL Mux"},
-	{"HFR", NULL, "HandsfreeR Mux"},
+	{"HFL", NULL, "HandsfreeL PGA"},
+	{"HFR", NULL, "HandsfreeR PGA"},
 	{"Vibra Route", "Audio", "Vibra Mux"},
 	{"VIBRA", NULL, "Vibra Route"},
 
-- 
cgit v0.10.2


From f3b5d3002d5b43d277dedc1e044d02f2a40a43c5 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Mon, 25 May 2009 11:12:12 +0300
Subject: ASoC: TWL4030: Add shadow register

Shadow, non HW register for dealing with the HandsfreeL/R
muting.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 17ddcb2..989446d 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -115,6 +115,7 @@ static const u8 twl4030_reg[TWL4030_CACHEREGNUM] = {
 	0x00, /* REG_VIBRA_PWM_SET	(0x47)	*/
 	0x00, /* REG_ANAMIC_GAIN	(0x48)	*/
 	0x00, /* REG_MISC_SET_2		(0x49)	*/
+	0x00, /* REG_SW_SHADOW		(0x4A)	- Shadow, non HW register */
 };
 
 /* codec private data */
@@ -172,7 +173,11 @@ static int twl4030_write(struct snd_soc_codec *codec,
 			unsigned int reg, unsigned int value)
 {
 	twl4030_write_reg_cache(codec, reg, value);
-	return twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, value, reg);
+	if (likely(reg < TWL4030_REG_SW_SHADOW))
+		return twl4030_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE, value,
+					    reg);
+	else
+		return 0;
 }
 
 static void twl4030_codec_enable(struct snd_soc_codec *codec, int enable)
diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h
index 48326e2..fe5f395 100644
--- a/sound/soc/codecs/twl4030.h
+++ b/sound/soc/codecs/twl4030.h
@@ -92,8 +92,9 @@
 #define TWL4030_REG_VIBRA_PWM_SET	0x47
 #define TWL4030_REG_ANAMIC_GAIN		0x48
 #define TWL4030_REG_MISC_SET_2		0x49
+#define TWL4030_REG_SW_SHADOW		0x4A
 
-#define TWL4030_CACHEREGNUM	(TWL4030_REG_MISC_SET_2 + 1)
+#define TWL4030_CACHEREGNUM	(TWL4030_REG_SW_SHADOW + 1)
 
 /* Bitfield Definitions */
 
@@ -260,6 +261,10 @@
 #define TWL4030_SMOOTH_ANAVOL_EN	0x02
 #define TWL4030_DIGMIC_LR_SWAP_EN	0x01
 
+/* TWL4030_REG_SW_SHADOW (0x4A) Fields */
+#define TWL4030_HFL_EN			0x01
+#define TWL4030_HFR_EN			0x02
+
 #define TWL4030_DAI_HIFI		0
 #define TWL4030_DAI_VOICE		1
 
-- 
cgit v0.10.2


From 0f89bdcac61536c5cb2a095a514657019573afb4 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Mon, 25 May 2009 11:12:13 +0300
Subject: ASoC: TWL4030: HandsfreeL/R mute DAPM switch

Add DAPM switch for HeadsetL/R mute. Since all bits are are needed
for the HFL/R pop removal to work the switch is using the SW_SHADOW
no HW register for the HandsfreeL/R mute.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 989446d..63ebd17 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -395,6 +395,10 @@ static const struct soc_enum twl4030_handsfreel_enum =
 static const struct snd_kcontrol_new twl4030_dapm_handsfreel_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreel_enum);
 
+/* Handsfree Left virtual mute */
+static const struct snd_kcontrol_new twl4030_dapm_handsfreelmute_control =
+	SOC_DAPM_SINGLE("Switch", TWL4030_REG_SW_SHADOW, 0, 1, 0);
+
 /* Handsfree Right */
 static const char *twl4030_handsfreer_texts[] =
 		{"Voice", "AudioR1", "AudioR2", "AudioL2"};
@@ -407,6 +411,10 @@ static const struct soc_enum twl4030_handsfreer_enum =
 static const struct snd_kcontrol_new twl4030_dapm_handsfreer_control =
 SOC_DAPM_ENUM("Route", twl4030_handsfreer_enum);
 
+/* Handsfree Right virtual mute */
+static const struct snd_kcontrol_new twl4030_dapm_handsfreermute_control =
+	SOC_DAPM_SINGLE("Switch", TWL4030_REG_SW_SHADOW, 1, 1, 0);
+
 /* Vibra */
 /* Vibra audio path selection */
 static const char *twl4030_vibra_texts[] =
@@ -1231,11 +1239,15 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = {
 	/* HandsfreeL/R */
 	SND_SOC_DAPM_MUX("HandsfreeL Mux", SND_SOC_NOPM, 0, 0,
 		&twl4030_dapm_handsfreel_control),
+	SND_SOC_DAPM_SWITCH("HandsfreeL Switch", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_handsfreelmute_control),
 	SND_SOC_DAPM_PGA_E("HandsfreeL PGA", SND_SOC_NOPM,
 			0, 0, NULL, 0, handsfreelpga_event,
 			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
 	SND_SOC_DAPM_MUX("HandsfreeR Mux", SND_SOC_NOPM, 5, 0,
 		&twl4030_dapm_handsfreer_control),
+	SND_SOC_DAPM_SWITCH("HandsfreeR Switch", SND_SOC_NOPM, 0, 0,
+			&twl4030_dapm_handsfreermute_control),
 	SND_SOC_DAPM_PGA_E("HandsfreeR PGA", SND_SOC_NOPM,
 			0, 0, NULL, 0, handsfreerpga_event,
 			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD),
@@ -1346,13 +1358,15 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"HandsfreeL Mux", "AudioL1", "Analog L1 Playback Mixer"},
 	{"HandsfreeL Mux", "AudioL2", "Analog L2 Playback Mixer"},
 	{"HandsfreeL Mux", "AudioR2", "Analog R2 Playback Mixer"},
-	{"HandsfreeL PGA", NULL, "HandsfreeL Mux"},
+	{"HandsfreeL Switch", "Switch", "HandsfreeL Mux"},
+	{"HandsfreeL PGA", NULL, "HandsfreeL Switch"},
 	/* HandsfreeR */
 	{"HandsfreeR Mux", "Voice", "Analog Voice Playback Mixer"},
 	{"HandsfreeR Mux", "AudioR1", "Analog R1 Playback Mixer"},
 	{"HandsfreeR Mux", "AudioR2", "Analog R2 Playback Mixer"},
 	{"HandsfreeR Mux", "AudioL2", "Analog L2 Playback Mixer"},
-	{"HandsfreeR PGA", NULL, "HandsfreeR Mux"},
+	{"HandsfreeR Switch", "Switch", "HandsfreeR Mux"},
+	{"HandsfreeR PGA", NULL, "HandsfreeR Switch"},
 	/* Vibra */
 	{"Vibra Mux", "AudioL1", "DAC Left1"},
 	{"Vibra Mux", "AudioR1", "DAC Right1"},
-- 
cgit v0.10.2


From d94b943054721c346b0881865d645f000cd19880 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 25 May 2009 09:57:56 +0200
Subject: perf top: Reduce display overhead

Iterate over the symbol table once per display interval, and
copy/sort/tally/decay only those symbols which are active.

Before:

 top - 10:14:53 up  4:08, 17 users,  load average: 1.17, 1.53, 1.49
 Tasks: 273 total,   5 running, 268 sleeping,   0 stopped,   0 zombie
 Cpu(s):  6.9%us, 38.2%sy,  0.0%ni, 19.9%id,  0.0%wa,  0.0%hi, 35.0%si,  0.0%st

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  P COMMAND
 28504 root      20   0  1044  260  164 S   58  0.0   0:04.19 2 netserver
 28499 root      20   0  1040  412  316 R   51  0.0   0:04.15 0 netperf
 28500 root      20   0  1040  408  316 R   50  0.0   0:04.14 1 netperf
 28503 root      20   0  1044  260  164 S   50  0.0   0:04.01 1 netserver
 28501 root      20   0  1044  260  164 S   49  0.0   0:03.99 0 netserver
 28502 root      20   0  1040  412  316 S   43  0.0   0:03.96 2 netperf
 28468 root      20   0 1892m 325m  972 S   16 10.8   0:10.50 3 perf
 28467 root      20   0 1892m 325m  972 R    2 10.8   0:00.72 3 perf

After:

 top - 10:16:30 up  4:10, 17 users,  load average: 2.27, 1.88, 1.62
 Tasks: 273 total,   6 running, 267 sleeping,   0 stopped,   0 zombie
 Cpu(s):  2.5%us, 39.7%sy,  0.0%ni, 24.6%id,  0.0%wa,  0.0%hi, 33.3%si,  0.0%st

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  P COMMAND
 28590 root      20   0  1040  412  316 S   54  0.0   0:07.85 2 netperf
 28589 root      20   0  1044  260  164 R   54  0.0   0:07.84 0 netserver
 28588 root      20   0  1040  412  316 R   50  0.0   0:07.89 1 netperf
 28591 root      20   0  1044  256  164 S   50  0.0   0:07.82 1 netserver
 28587 root      20   0  1040  408  316 R   47  0.0   0:07.61 0 netperf
 28592 root      20   0  1044  260  164 R   47  0.0   0:07.85 2 netserver
 28378 root      20   0  8732 1300  860 R    2  0.0   0:01.81 3 top
 28577 root      20   0 1892m 165m  972 R    2  5.5   0:00.48 3 perf
 28578 root      20   0 1892m 165m  972 S    2  5.5   0:00.04 3 perf

[ Impact: optimization ]

Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 74021ac..4bed265 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -374,18 +374,26 @@ static struct sym_entry		tmp[MAX_SYMS];
 
 static void print_sym_table(void)
 {
-	int i, printed;
+	int i, j, active_count, printed;
 	int counter;
 	float events_per_sec = events/delay_secs;
 	float kevents_per_sec = (events-userspace_events)/delay_secs;
 	float sum_kevents = 0.0;
 
 	events = userspace_events = 0;
-	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
-	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
 
-	for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
-		sum_kevents += tmp[i].count[0];
+	/* Iterate over symbol table and copy/tally/decay active symbols. */
+	for (i = 0, active_count = 0; i < sym_table_count; i++) {
+		if (sym_table[i].count[0]) {
+			tmp[active_count++] = sym_table[i];
+			sum_kevents += sym_table[i].count[0];
+
+			for (j = 0; j < nr_counters; j++)
+				sym_table[i].count[j] = zero ? 0 : sym_table[i].count[j] * 7 / 8;
+		}
+	}
+
+	qsort(tmp, active_count + 1, sizeof(tmp[0]), compare);
 
 	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
 
@@ -433,29 +441,23 @@ static void print_sym_table(void)
 	       	       "  ______     ______   _____   ________________   _______________\n\n"
 	);
 
-	for (i = 0, printed = 0; i < sym_table_count; i++) {
+	for (i = 0, printed = 0; i < active_count; i++) {
 		float pcnt;
-		int count;
 
-		if (printed <= 18 && tmp[i].count[0] >= count_filter) {
-			pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
-
-			if (nr_counters == 1)
-				printf("%19.2f - %4.1f%% - %016llx : %s\n",
-					sym_weight(tmp + i),
-					pcnt, tmp[i].addr, tmp[i].sym);
-			else
-				printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
-					sym_weight(tmp + i),
-					tmp[i].count[0],
-					pcnt, tmp[i].addr, tmp[i].sym);
-			printed++;
-		}
-		/*
-		 * Add decay to the counts:
-		 */
-		for (count = 0; count < nr_counters; count++)
-			sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
+		if (++printed > 18 || tmp[i].count[0] < count_filter)
+			break;
+
+		pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
+
+		if (nr_counters == 1)
+			printf("%19.2f - %4.1f%% - %016llx : %s\n",
+				sym_weight(tmp + i),
+				pcnt, tmp[i].addr, tmp[i].sym);
+		else
+			printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
+				sym_weight(tmp + i),
+				tmp[i].count[0],
+				pcnt, tmp[i].addr, tmp[i].sym);
 	}
 
 	if (sym_filter_entry)
-- 
cgit v0.10.2


From e4cbb4e3ac8b09fdb11e39e5a5611bfab0a7cd1a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 May 2009 15:50:30 +0200
Subject: perf_counter: Move child perfcounter init to after scheduler init

Initialize a task's perfcounters (inherit from parent, etc.) after
the child task's scheduler fields have been initialized already.

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/fork.c b/kernel/fork.c
index e72a09f..675e01e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -984,7 +984,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	rt_mutex_init_task(p);
-	perf_counter_init_task(p);
 
 #ifdef CONFIG_PROVE_LOCKING
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
@@ -1096,6 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
+	perf_counter_init_task(p);
 
 	if ((retval = audit_alloc(p)))
 		goto bad_fork_cleanup_policy;
-- 
cgit v0.10.2


From 9733bb8e9ce0078f55405ce470a62ec0a551fe99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl>
Date: Mon, 25 May 2009 13:25:34 +0200
Subject: IXP4xx: Change QMgr function names to qmgr_stat_*_watermark and clean
 the comments.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>

diff --git a/arch/arm/mach-ixp4xx/include/mach/qmgr.h b/arch/arm/mach-ixp4xx/include/mach/qmgr.h
index 32077e1..9e7cad2 100644
--- a/arch/arm/mach-ixp4xx/include/mach/qmgr.h
+++ b/arch/arm/mach-ixp4xx/include/mach/qmgr.h
@@ -138,12 +138,12 @@ static inline int qmgr_stat_empty(unsigned int queue)
 }
 
 /**
- * qmgr_stat_empty() - checks if a hardware queue is nearly empty
+ * qmgr_stat_below_low_watermark() - checks if a queue is below low watermark
  * @queue:	queue number
  *
- * Returns non-zero value if the queue is nearly or completely empty.
+ * Returns non-zero value if the queue is below low watermark.
  */
-static inline int qmgr_stat_nearly_empty(unsigned int queue)
+static inline int qmgr_stat_below_low_watermark(unsigned int queue)
 {
 	extern struct qmgr_regs __iomem *qmgr_regs;
 	if (queue >= HALF_QUEUES)
@@ -153,19 +153,19 @@ static inline int qmgr_stat_nearly_empty(unsigned int queue)
 }
 
 /**
- * qmgr_stat_empty() - checks if a hardware queue is nearly full
+ * qmgr_stat_above_high_watermark() - checks if a queue is above high watermark
  * @queue:	queue number
  *
- * Returns non-zero value if the queue is nearly or completely full.
+ * Returns non-zero value if the queue is above high watermark
  */
-static inline int qmgr_stat_nearly_full(unsigned int queue)
+static inline int qmgr_stat_above_high_watermark(unsigned int queue)
 {
 	BUG_ON(queue >= HALF_QUEUES);
 	return __qmgr_get_stat1(queue) & QUEUE_STAT1_NEARLY_FULL;
 }
 
 /**
- * qmgr_stat_empty() - checks if a hardware queue is full
+ * qmgr_stat_full() - checks if a hardware queue is full
  * @queue:	queue number
  *
  * Returns non-zero value if the queue is full.
@@ -180,10 +180,10 @@ static inline int qmgr_stat_full(unsigned int queue)
 }
 
 /**
- * qmgr_stat_empty() - checks if a hardware queue experienced underflow
+ * qmgr_stat_underflow() - checks if a hardware queue experienced underflow
  * @queue:	queue number
  *
- * Returns non-zero value if empty.
+ * Returns non-zero value if the queue experienced underflow.
  */
 static inline int qmgr_stat_underflow(unsigned int queue)
 {
@@ -191,10 +191,10 @@ static inline int qmgr_stat_underflow(unsigned int queue)
 }
 
 /**
- * qmgr_stat_empty() - checks if a hardware queue experienced overflow
+ * qmgr_stat_overflow() - checks if a hardware queue experienced overflow
  * @queue:	queue number
  *
- * Returns non-zero value if empty.
+ * Returns non-zero value if the queue experienced overflow.
  */
 static inline int qmgr_stat_overflow(unsigned int queue)
 {
diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index 672c962..b6d1881 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -513,8 +513,8 @@ static int eth_poll(struct napi_struct *napi, int budget)
 #endif
 			napi_complete(napi);
 			qmgr_enable_irq(rxq);
-			if (!qmgr_stat_nearly_empty(rxq) &&
-			    napi_reschedule(napi)) { /* really empty in fact */
+			if (!qmgr_stat_below_low_watermark(rxq) &&
+			    napi_reschedule(napi)) { /* not empty again */
 #if DEBUG_RX
 				printk(KERN_DEBUG "%s: eth_poll"
 				       " napi_reschedule successed\n",
@@ -631,10 +631,9 @@ static void eth_txdone_irq(void *unused)
 			port->tx_buff_tab[n_desc] = NULL;
 		}
 
-		/* really empty in fact */
-		start = qmgr_stat_nearly_empty(port->plat->txreadyq);
+		start = qmgr_stat_below_low_watermark(port->plat->txreadyq);
 		queue_put_desc(port->plat->txreadyq, phys, desc);
-		if (start) {
+		if (start) { /* TX-ready queue was empty */
 #if DEBUG_TX
 			printk(KERN_DEBUG "%s: eth_txdone_irq xmit ready\n",
 			       port->netdev->name);
@@ -710,14 +709,14 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	queue_put_desc(TX_QUEUE(port->id), tx_desc_phys(port, n), desc);
 	dev->trans_start = jiffies;
 
-	if (qmgr_stat_nearly_empty(txreadyq)) { /* really empty in fact */
+	if (qmgr_stat_below_low_watermark(txreadyq)) { /* empty */
 #if DEBUG_TX
 		printk(KERN_DEBUG "%s: eth_xmit queue full\n", dev->name);
 #endif
 		netif_stop_queue(dev);
 		/* we could miss TX ready interrupt */
 		/* really empty in fact */
-		if (!qmgr_stat_nearly_empty(txreadyq)) {
+		if (!qmgr_stat_below_low_watermark(txreadyq)) {
 #if DEBUG_TX
 			printk(KERN_DEBUG "%s: eth_xmit ready again\n",
 			       dev->name);
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 1e56e58..a6dc317 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -790,11 +790,10 @@ static void hss_hdlc_txdone_irq(void *pdev)
 		free_buffer_irq(port->tx_buff_tab[n_desc]);
 		port->tx_buff_tab[n_desc] = NULL;
 
-		/* really empty in fact */
-		start = qmgr_stat_nearly_empty(port->plat->txreadyq);
+		start = qmgr_stat_below_low_watermark(port->plat->txreadyq);
 		queue_put_desc(port->plat->txreadyq,
 			       tx_desc_phys(port, n_desc), desc);
-		if (start) {
+		if (start) { /* TX-ready queue was empty */
 #if DEBUG_TX
 			printk(KERN_DEBUG "%s: hss_hdlc_txdone_irq xmit"
 			       " ready\n", dev->name);
@@ -869,13 +868,13 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
 	queue_put_desc(queue_ids[port->id].tx, tx_desc_phys(port, n), desc);
 	dev->trans_start = jiffies;
 
-	if (qmgr_stat_nearly_empty(txreadyq)) { /* really empty in fact */
+	if (qmgr_stat_below_low_watermark(txreadyq)) { /* empty */
 #if DEBUG_TX
 		printk(KERN_DEBUG "%s: hss_hdlc_xmit queue full\n", dev->name);
 #endif
 		netif_stop_queue(dev);
 		/* we could miss TX ready interrupt */
-		if (!qmgr_stat_nearly_empty(txreadyq)) {
+		if (!qmgr_stat_below_low_watermark(txreadyq)) {
 #if DEBUG_TX
 			printk(KERN_DEBUG "%s: hss_hdlc_xmit ready again\n",
 			       dev->name);
-- 
cgit v0.10.2


From d3f4b3855ba87caff8f35e738c7e7e3bad0a6ab1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 25 May 2009 14:40:01 +0200
Subject: perf stat: flip around ':k' and ':u' flags

This output:

 $ perf stat -e 0:1:k -e 0:1:u ./hello
  Performance counter stats for './hello':
          140131  instructions         (events)
         1906968  instructions         (events)

Is quite confusing - as :k means "user instructions", :u means
"kernel instructions".

Flip them around - as the 'exclude' property is not intuitive in
the flag naming.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 8ae01d5..88c70be 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -266,9 +266,9 @@ static __u64 match_event_symbols(char *str)
 
 	switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) {
 		case 3:
-			if (strchr(mask_str, 'u'))
-				event_mask[nr_counters] |= EVENT_MASK_USER;
 			if (strchr(mask_str, 'k'))
+				event_mask[nr_counters] |= EVENT_MASK_USER;
+			if (strchr(mask_str, 'u'))
 				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
 		case 2:
 			return EID(type, id);
-- 
cgit v0.10.2


From 266dfb0b58bc4181b6158ee63a0069abaa9f3a98 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:24 +0200
Subject: perf_counter: Fix perf-$cmd invokation

Fix:

  $ perf-top
  fatal: cannot handle -top internally

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124559.995591577@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 594d270..1d6d7aa 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -357,7 +357,7 @@ int main(int argc, const char **argv)
 	 * die if that one cannot handle it.
 	 */
 	if (!prefixcmp(cmd, "perf-")) {
-		cmd += 4;
+		cmd += 5;
 		argv[0] = cmd;
 		handle_internal_command(argc, argv);
 		die("cannot handle %s internally", cmd);
-- 
cgit v0.10.2


From e527ea312f31e88a7fa5472b71db71c565b0d44f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:25 +0200
Subject: perf_counter: Remove unused ABI bits

extra_config_len isn't used for anything, remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124600.116035832@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2ddf5e3..b1f2bac 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -154,11 +154,11 @@ struct perf_counter_hw_event {
 
 				__reserved_1   : 51;
 
-	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
+	__u32			__reserved_2;
 
-	__u64			__reserved_2;
 	__u64			__reserved_3;
+	__u64			__reserved_4;
 };
 
 /*
-- 
cgit v0.10.2


From 771d7cde144d87f2d1fbee4da3c6234d61f7e42a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:26 +0200
Subject: perf_counter: Make pctrl() affect inherited counters too

Paul noted that the new ptcrl() didn't work on child counters.

Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124600.203151469@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 6cdf824..217dbcc 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1067,30 +1067,6 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 }
 
-int perf_counter_task_enable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_enable(counter);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-int perf_counter_task_disable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_disable(counter);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
 static void perf_adjust_freq(struct perf_counter_context *ctx)
@@ -1505,6 +1481,30 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return 0;
 }
 
+int perf_counter_task_enable(void)
+{
+	struct perf_counter *counter;
+
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_for_each_child(counter, perf_counter_enable);
+	mutex_unlock(&current->perf_counter_mutex);
+
+	return 0;
+}
+
+int perf_counter_task_disable(void)
+{
+	struct perf_counter *counter;
+
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_for_each_child(counter, perf_counter_disable);
+	mutex_unlock(&current->perf_counter_mutex);
+
+	return 0;
+}
+
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
-- 
cgit v0.10.2


From 6ab423e0eaca827fbd201ca4ae7d4f8573a366b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:27 +0200
Subject: perf_counter: Propagate inheritance failures down the fork() path

Fail fork() when we fail inheritance for some reason (-ENOMEM most likely).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124600.324656474@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index b1f2bac..d3e85de 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -566,7 +566,7 @@ extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task,
 					struct task_struct *next, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern void perf_counter_init_task(struct task_struct *child);
+extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
@@ -631,7 +631,7 @@ perf_counter_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline void perf_counter_init_task(struct task_struct *child)	{ }
+static inline int perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
diff --git a/kernel/fork.c b/kernel/fork.c
index 675e01e..c07c333 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1095,7 +1095,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
-	perf_counter_init_task(p);
+
+	retval = perf_counter_init_task(p);
+	if (retval)
+		goto bad_fork_cleanup_policy;
 
 	if ((retval = audit_alloc(p)))
 		goto bad_fork_cleanup_policy;
@@ -1295,6 +1298,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
+	perf_counter_exit_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 217dbcc..7a7a144 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3434,18 +3434,23 @@ again:
 /*
  * Initialize the perf_counter context in task_struct
  */
-void perf_counter_init_task(struct task_struct *child)
+int perf_counter_init_task(struct task_struct *child)
 {
 	struct perf_counter_context *child_ctx, *parent_ctx;
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 	int inherited_all = 1;
+	int ret = 0;
 
 	child->perf_counter_ctxp = NULL;
 
 	mutex_init(&child->perf_counter_mutex);
 	INIT_LIST_HEAD(&child->perf_counter_list);
 
+	parent_ctx = parent->perf_counter_ctxp;
+	if (likely(!parent_ctx || !parent_ctx->nr_counters))
+		return 0;
+
 	/*
 	 * This is executed from the parent task context, so inherit
 	 * counters that have been marked for cloning.
@@ -3454,11 +3459,7 @@ void perf_counter_init_task(struct task_struct *child)
 
 	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
 	if (!child_ctx)
-		return;
-
-	parent_ctx = parent->perf_counter_ctxp;
-	if (likely(!parent_ctx || !parent_ctx->nr_counters))
-		return;
+		return -ENOMEM;
 
 	__perf_counter_init_context(child_ctx, child);
 	child->perf_counter_ctxp = child_ctx;
@@ -3482,8 +3483,9 @@ void perf_counter_init_task(struct task_struct *child)
 			continue;
 		}
 
-		if (inherit_group(counter, parent,
-				  parent_ctx, child, child_ctx)) {
+		ret = inherit_group(counter, parent, parent_ctx,
+					     child, child_ctx);
+		if (ret) {
 			inherited_all = 0;
 			break;
 		}
@@ -3505,6 +3507,8 @@ void perf_counter_init_task(struct task_struct *child)
 	}
 
 	mutex_unlock(&parent_ctx->mutex);
+
+	return ret;
 }
 
 static void __cpuinit perf_counter_init_cpu(int cpu)
-- 
cgit v0.10.2


From 10989fb2451763fae6f42d85fa6106c8fd010cf5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:28 +0200
Subject: perf_counter: Fix PERF_COUNTER_CONTEXT_SWITCHES for cpu counters

Ingo noticed that cpu counters had 0 context switches, even though
there was plenty scheduling on the cpu.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124600.419025548@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7a7a144..14b1fe9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -924,14 +924,13 @@ void perf_counter_task_sched_out(struct task_struct *task,
 	struct perf_counter_context *next_ctx;
 	struct pt_regs *regs;
 
+	regs = task_pt_regs(task);
+	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
 	if (likely(!ctx || !cpuctx->task_ctx))
 		return;
 
 	update_context_time(ctx);
-
-	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
-
 	next_ctx = next->perf_counter_ctxp;
 	if (next_ctx && context_equiv(ctx, next_ctx)) {
 		task->perf_counter_ctxp = next_ctx;
-- 
cgit v0.10.2


From 759d427aa5a9d88a81afd11817cdeb40aea85234 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 25 May 2009 11:51:00 -0400
Subject: ext4: remove unused function __ext4_write_dirty_metadata

The __ext4_write_dirty_metadata() function was introduced by commit
0390131b, "ext4: Allow ext4 to run without a journal", but nothing
ever used the function, either then or since.  So let's remove it and
save a bit of space.

Cc: Frank Mayhar <fmayhar@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dadd3f9..14c00ff 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4790,25 +4790,6 @@ int ext4_write_inode(struct inode *inode, int wait)
 	return ext4_force_commit(inode->i_sb);
 }
 
-int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
-{
-	int err = 0;
-
-	mark_buffer_dirty(bh);
-	if (inode && inode_needs_sync(inode)) {
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh)) {
-			ext4_error(inode->i_sb, __func__,
-				   "IO error syncing inode, "
-				   "inode=%lu, block=%llu",
-				   inode->i_ino,
-				   (unsigned long long)bh->b_blocknr);
-			err = -EIO;
-		}
-	}
-	return err;
-}
-
 /*
  * ext4_setattr()
  *
-- 
cgit v0.10.2


From 88b6edd17c62b7d346d21f4087893ce7d4ef828a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 25 May 2009 11:50:39 -0400
Subject: ext4: Clean up calls to ext4_get_group_desc()

If the caller isn't planning on modifying the block group descriptors,
there's no need to pass in a pointer to a struct buffer_head.  Nuking
this saves a tiny amount of CPU time and stack space usage.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 82f7d1d..3743bd8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -347,7 +347,6 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *desc;
-	struct buffer_head *bh;
 	struct flex_groups *flex_group = sbi->s_flex_groups;
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
@@ -402,7 +401,7 @@ find_close_to_parent:
 found_flexbg:
 	for (i = best_flex * flex_size; i < ngroups &&
 		     i < (best_flex + 1) * flex_size; i++) {
-		desc = ext4_get_group_desc(sb, i, &bh);
+		desc = ext4_get_group_desc(sb, i, NULL);
 		if (ext4_free_inodes_count(sb, desc)) {
 			*best_group = i;
 			goto out;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eca6c05..91b98b5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1660,7 +1660,6 @@ static int ext4_fill_flex_info(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_desc *gdp = NULL;
-	struct buffer_head *bh;
 	ext4_group_t flex_group_count;
 	ext4_group_t flex_group;
 	int groups_per_flex = 0;
@@ -1693,7 +1692,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
 	}
 
 	for (i = 0; i < sbi->s_groups_count; i++) {
-		gdp = ext4_get_group_desc(sb, i, &bh);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 
 		flex_group = ext4_flex_group(sbi, i);
 		atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
-- 
cgit v0.10.2


From 4fcd39207f4c91185cc89e3e6a28cbb643034ff1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 25 May 2009 18:34:52 +0200
Subject: ALSA: hda - Reset CORB/RIRB at retrying the verb communication

When a codec communication error occurs, the CORB/RIRB counters should
be reset first before re-issuing the verb.  Simply call azx_free_cmd_io()
and azx_init_cmd_io() to achieve that.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 49fd973..3fc75e2 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -522,6 +522,7 @@ static void azx_init_cmd_io(struct azx *chip)
 	/* RIRB set up */
 	chip->rirb.addr = chip->rb.addr + 2048;
 	chip->rirb.buf = (u32 *)(chip->rb.area + 2048);
+	chip->rirb.wp = chip->rirb.rp = chip->rirb.cmds = 0;
 	azx_writel(chip, RIRBLBASE, (u32)chip->rirb.addr);
 	azx_writel(chip, RIRBUBASE, upper_32_bits(chip->rirb.addr));
 
@@ -533,7 +534,6 @@ static void azx_init_cmd_io(struct azx *chip)
 	azx_writew(chip, RINTCNT, 1);
 	/* enable rirb dma and response irq */
 	azx_writeb(chip, RIRBCTL, ICH6_RBCTL_DMA_EN | ICH6_RBCTL_IRQ_EN);
-	chip->rirb.rp = chip->rirb.cmds = 0;
 }
 
 static void azx_free_cmd_io(struct azx *chip)
@@ -654,9 +654,11 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 
 	snd_printk(KERN_ERR SFX "azx_get_response timeout (ERROR): "
 		   "last cmd=0x%08x\n", chip->last_cmd);
+	/* re-initialize CORB/RIRB */
 	spin_lock_irq(&chip->reg_lock);
-	chip->rirb.cmds = 0; /* reset the index */
 	bus->rirb_error = 1;
+	azx_free_cmd_io(chip);
+	azx_init_cmd_io(chip);
 	spin_unlock_irq(&chip->reg_lock);
 	return -1;
 }
-- 
cgit v0.10.2


From a8cd0244e9cebcf9b358d24c7e7410062f3665cb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 24 May 2009 22:15:25 +0300
Subject: KVM: Make paravirt tlb flush also reload the PAE PDPTRs

The paravirt tlb flush may be used not only to flush TLBs, but also
to reload the four page-directory-pointer-table entries, as it is used
as a replacement for reloading CR3.  Change the code to do the entire
CR3 reloading dance instead of simply flushing the TLB.

Cc: stable@kernel.org
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b6caf13..32cf11e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2897,8 +2897,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
 
 static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 {
-	kvm_x86_ops->tlb_flush(vcpu);
-	set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+	kvm_set_cr3(vcpu, vcpu->arch.cr3);
 	return 1;
 }
 
-- 
cgit v0.10.2


From a2edf57f510cce6a389cc14e58c6ad0a4296d6f9 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 24 May 2009 22:19:00 +0300
Subject: KVM: Fix PDPTR reloading on CR4 writes

The processor is documented to reload the PDPTRs while in PAE mode if any
of the CR4 bits PSE, PGE, or PAE change.  Linux relies on this
behaviour when zapping the low mappings of PAE kernels during boot.

The code already handled changes to CR4.PAE; augment it to also notice changes
to PSE and PGE.

This triggered while booting an F11 PAE kernel; the futex initialization code
runs before any CR3 reloads and writes to a NULL pointer; the futex subsystem
ended up uninitialized, killing PI futexes and pulseaudio which uses them.

Cc: stable@kernel.org
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 49079a4..3944e91 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -338,6 +338,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
+	unsigned long old_cr4 = vcpu->arch.cr4;
+	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+
 	if (cr4 & CR4_RESERVED_BITS) {
 		printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
 		kvm_inject_gp(vcpu, 0);
@@ -351,7 +354,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 			kvm_inject_gp(vcpu, 0);
 			return;
 		}
-	} else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
+	} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
+		   && ((cr4 ^ old_cr4) & pdptr_bits)
 		   && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
 		printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
 		kvm_inject_gp(vcpu, 0);
-- 
cgit v0.10.2


From bed8b97d88b56fdad5677585262e20c5f0a1a8e2 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:33 -0700
Subject: ARM: OMAP2/3: Remove OMAP2_32KSYNCT_BASE

Use processor specific defines instead.

As an extra bonus, this patch fixes the problem of CONFIG_DEBUG_SPINLOCK
calling sched_clock before we have things initialized:

http://patchwork.kernel.org/patch/15810/

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index af4bd34..cbb8930 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -128,7 +128,7 @@ omap242x_sdi_prcm_voltctrl:
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_sdi_timer_32ksynct_cr:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010)
 ENTRY(omap242x_sram_ddr_init_sz)
 	.word	. - omap242x_sram_ddr_init
 
@@ -224,7 +224,7 @@ omap242x_srs_prcm_voltctrl:
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_srs_timer_32ksynct:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010)
 
 ENTRY(omap242x_sram_reprogram_sdrc_sz)
 	.word	. - omap242x_sram_reprogram_sdrc
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index 84363e2..054b0f3 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -128,7 +128,7 @@ omap243x_sdi_prcm_voltctrl:
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_sdi_timer_32ksynct_cr:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010)
 ENTRY(omap243x_sram_ddr_init_sz)
 	.word	. - omap243x_sram_ddr_init
 
@@ -224,7 +224,7 @@ omap243x_srs_prcm_voltctrl:
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_srs_timer_32ksynct:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010)
 
 ENTRY(omap243x_sram_reprogram_sdrc_sz)
 	.word	. - omap243x_sram_reprogram_sdrc
diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index 433021f..e1add78 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -175,25 +175,61 @@ console_initcall(omap_add_serial_console);
  * but systems won't necessarily want to spend resources that way.
  */
 
-#if defined(CONFIG_ARCH_OMAP16XX)
-#define TIMER_32K_SYNCHRONIZED		0xfffbc410
-#elif defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
-#define TIMER_32K_SYNCHRONIZED		(OMAP2_32KSYNCT_BASE + 0x10)
-#endif
+#define OMAP16XX_TIMER_32K_SYNCHRONIZED		0xfffbc410
 
-#ifdef	TIMER_32K_SYNCHRONIZED
+#if !(defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP15XX))
 
 #include <linux/clocksource.h>
 
-static cycle_t omap_32k_read(struct clocksource *cs)
+#ifdef CONFIG_ARCH_OMAP16XX
+static cycle_t omap16xx_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED);
+}
+#else
+#define omap16xx_32k_read	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2420
+static cycle_t omap2420_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap2420_32k_read	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2430
+static cycle_t omap2430_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap2430_32k_read	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP34XX
+static cycle_t omap34xx_32k_read(struct clocksource *cs)
 {
-	return omap_readl(TIMER_32K_SYNCHRONIZED);
+	return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap34xx_32k_read	NULL
+#endif
+
+/*
+ * Kernel assumes that sched_clock can be called early but may not have
+ * things ready yet.
+ */
+static cycle_t omap_32k_read_dummy(struct clocksource *cs)
+{
+	return 0;
 }
 
 static struct clocksource clocksource_32k = {
 	.name		= "32k_counter",
 	.rating		= 250,
-	.read		= omap_32k_read,
+	.read		= omap_32k_read_dummy,
 	.mask		= CLOCKSOURCE_MASK(32),
 	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
@@ -207,7 +243,7 @@ unsigned long long sched_clock(void)
 {
 	unsigned long long ret;
 
-	ret = (unsigned long long)omap_32k_read(&clocksource_32k);
+	ret = (unsigned long long)clocksource_32k.read(&clocksource_32k);
 	ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift;
 	return ret;
 }
@@ -220,6 +256,17 @@ static int __init omap_init_clocksource_32k(void)
 	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
 		struct clk *sync_32k_ick;
 
+		if (cpu_is_omap16xx())
+			clocksource_32k.read = omap16xx_32k_read;
+		else if (cpu_is_omap2420())
+			clocksource_32k.read = omap2420_32k_read;
+		else if (cpu_is_omap2430())
+			clocksource_32k.read = omap2430_32k_read;
+		else if (cpu_is_omap34xx())
+			clocksource_32k.read = omap34xx_32k_read;
+		else
+			return -ENODEV;
+
 		sync_32k_ick = clk_get(NULL, "omap_32ksync_ick");
 		if (sync_32k_ick)
 			clk_enable(sync_32k_ick);
@@ -234,7 +281,7 @@ static int __init omap_init_clocksource_32k(void)
 }
 arch_initcall(omap_init_clocksource_32k);
 
-#endif	/* TIMER_32K_SYNCHRONIZED */
+#endif	/* !(defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP15XX)) */
 
 /* Global address base setup code */
 
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 24335d4..4202cf4 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -87,7 +87,6 @@
 
 #if defined(CONFIG_ARCH_OMAP2420)
 
-#define OMAP2_32KSYNCT_BASE	OMAP2420_32KSYNCT_BASE
 #define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
 #define OMAP2_PRM_BASE		OMAP2420_PRM_BASE
@@ -95,7 +94,6 @@
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
-#define OMAP2_32KSYNCT_BASE	OMAP2430_32KSYNCT_BASE
 #define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
 #define OMAP2_PRM_BASE		OMAP2430_PRM_BASE
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index ab64015..581d910 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -85,7 +85,6 @@
 
 #if defined(CONFIG_ARCH_OMAP3430)
 
-#define OMAP2_32KSYNCT_BASE		OMAP3430_32KSYNCT_BASE
 #define OMAP2_CM_BASE			OMAP3430_CM_BASE
 #define OMAP2_PRM_BASE			OMAP3430_PRM_BASE
 #define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
-- 
cgit v0.10.2


From 23b7dd3166fcd88d82ada7e13478fbe4c2231ddf Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:34 -0700
Subject: ARM: OMAP2/3: Remove OMAP_PRM_REGADDR and OMAP2_PRM_BASE

Remove OMAP_PRM_REGADDR and use processor specific defines instead.

Also fold in a patch from Kevin Hilman to add _OFFSET #defines
for the PRCM registers to be used with the prm_[read|write]_* macros.
These are used extensively in the forthcoming OMAP PM support.

Also remove now unused OMAP2_PRM_BASE.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 4247a15..dd37483 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -91,9 +91,9 @@ static void _omap2xxx_clk_commit(struct clk *clk)
 		return;
 
 	prm_write_mod_reg(OMAP24XX_VALID_CONFIG, OMAP24XX_GR_MOD,
-		OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET);
+		OMAP2_PRCM_CLKCFG_CTRL_OFFSET);
 	/* OCP barrier */
-	prm_read_mod_reg(OMAP24XX_GR_MOD, OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET);
+	prm_read_mod_reg(OMAP24XX_GR_MOD, OMAP2_PRCM_CLKCFG_CTRL_OFFSET);
 }
 
 /*
diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c
index e4cef33..c442fe9 100644
--- a/arch/arm/mach-omap2/clock24xx.c
+++ b/arch/arm/mach-omap2/clock24xx.c
@@ -233,6 +233,8 @@ static struct prcm_config *curr_prcm_set;
 static struct clk *vclk;
 static struct clk *sclk;
 
+static void __iomem *prcm_clksrc_ctrl;
+
 /*-------------------------------------------------------------------------
  * Omap24xx specific clock functions
  *-------------------------------------------------------------------------*/
@@ -269,10 +271,9 @@ static int omap2_enable_osc_ck(struct clk *clk)
 {
 	u32 pcc;
 
-	pcc = __raw_readl(OMAP24XX_PRCM_CLKSRC_CTRL);
+	pcc = __raw_readl(prcm_clksrc_ctrl);
 
-	__raw_writel(pcc & ~OMAP_AUTOEXTCLKMODE_MASK,
-		      OMAP24XX_PRCM_CLKSRC_CTRL);
+	__raw_writel(pcc & ~OMAP_AUTOEXTCLKMODE_MASK, prcm_clksrc_ctrl);
 
 	return 0;
 }
@@ -281,10 +282,9 @@ static void omap2_disable_osc_ck(struct clk *clk)
 {
 	u32 pcc;
 
-	pcc = __raw_readl(OMAP24XX_PRCM_CLKSRC_CTRL);
+	pcc = __raw_readl(prcm_clksrc_ctrl);
 
-	__raw_writel(pcc | OMAP_AUTOEXTCLKMODE_MASK,
-		      OMAP24XX_PRCM_CLKSRC_CTRL);
+	__raw_writel(pcc | OMAP_AUTOEXTCLKMODE_MASK, prcm_clksrc_ctrl);
 }
 
 static const struct clkops clkops_oscck = {
@@ -654,7 +654,7 @@ static u32 omap2_get_sysclkdiv(void)
 {
 	u32 div;
 
-	div = __raw_readl(OMAP24XX_PRCM_CLKSRC_CTRL);
+	div = __raw_readl(prcm_clksrc_ctrl);
 	div &= OMAP_SYSCLKDIV_MASK;
 	div >>= OMAP_SYSCLKDIV_SHIFT;
 
@@ -714,10 +714,13 @@ int __init omap2_clk_init(void)
 	struct omap_clk *c;
 	u32 clkrate;
 
-	if (cpu_is_omap242x())
+	if (cpu_is_omap242x()) {
+		prcm_clksrc_ctrl = OMAP2420_PRCM_CLKSRC_CTRL;
 		cpu_mask = RATE_IN_242X;
-	else if (cpu_is_omap2430())
+	} else if (cpu_is_omap2430()) {
+		prcm_clksrc_ctrl = OMAP2430_PRCM_CLKSRC_CTRL;
 		cpu_mask = RATE_IN_243X;
+	}
 
 	clk_init(&omap2_clk_functions);
 
diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h
index 88c5acb..f0e4480 100644
--- a/arch/arm/mach-omap2/clock24xx.h
+++ b/arch/arm/mach-omap2/clock24xx.h
@@ -24,6 +24,15 @@
 #include "cm-regbits-24xx.h"
 #include "sdrc.h"
 
+/* REVISIT: These should be set dynamically for CONFIG_MULTI_OMAP2 */
+#ifdef CONFIG_ARCH_OMAP2420
+#define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2420_PRCM_CLKOUT_CTRL
+#define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2420_PRCM_CLKEMUL_CTRL
+#else
+#define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2430_PRCM_CLKOUT_CTRL
+#define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2430_PRCM_CLKEMUL_CTRL
+#endif
+
 static unsigned long omap2_table_mpu_recalc(struct clk *clk);
 static int omap2_select_table_rate(struct clk *clk, unsigned long rate);
 static long omap2_round_to_table_rate(struct clk *clk, unsigned long rate);
diff --git a/arch/arm/mach-omap2/cm.h b/arch/arm/mach-omap2/cm.h
index 65fdf78..efc082a 100644
--- a/arch/arm/mach-omap2/cm.h
+++ b/arch/arm/mach-omap2/cm.h
@@ -38,6 +38,7 @@
 #define OMAP3430_CM_SYSCONFIG		OMAP_CM_REGADDR(OCP_MOD, 0x0010)
 #define OMAP3430_CM_POLCTRL		OMAP_CM_REGADDR(OCP_MOD, 0x009c)
 
+#define OMAP3_CM_CLKOUT_CTRL_OFFSET	0x0070
 #define OMAP3430_CM_CLKOUT_CTRL		OMAP_CM_REGADDR(OMAP3430_CCR_MOD, 0x0070)
 
 /*
diff --git a/arch/arm/mach-omap2/prm.h b/arch/arm/mach-omap2/prm.h
index 826d326b8..7c8e0c4 100644
--- a/arch/arm/mach-omap2/prm.h
+++ b/arch/arm/mach-omap2/prm.h
@@ -16,17 +16,12 @@
 
 #include "prcm-common.h"
 
-#ifndef __ASSEMBLER__
-#define OMAP_PRM_REGADDR(module, reg)					\
-			IO_ADDRESS(OMAP2_PRM_BASE + (module) + (reg))
-#else
 #define OMAP2420_PRM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2420_PRM_BASE + (module) + (reg))
 #define OMAP2430_PRM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2430_PRM_BASE + (module) + (reg))
 #define OMAP34XX_PRM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP3430_PRM_BASE + (module) + (reg))
-#endif
 
 /*
  * Architecture-specific global PRM registers
@@ -38,80 +33,132 @@
  *
  */
 
-/* Global 24xx registers in GR_MOD (Same as OCP_MOD for 24xx) */
-#define OMAP24XX_PRCM_VOLTCTRL_OFFSET		0x0050
-#define OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET	0x0080
-
-/* 242x GR_MOD registers, use these only for assembly code */
-#define OMAP242X_PRCM_VOLTCTRL		OMAP2420_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_VOLTCTRL_OFFSET)
-#define OMAP242X_PRCM_CLKCFG_CTRL	OMAP2420_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET)
-
-/* 243x GR_MOD registers, use these only for assembly code */
-#define OMAP243X_PRCM_VOLTCTRL		OMAP2430_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_VOLTCTRL_OFFSET)
-#define OMAP243X_PRCM_CLKCFG_CTRL	OMAP2430_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET)
-
-/* These will disappear */
-#define OMAP24XX_PRCM_REVISION		OMAP_PRM_REGADDR(OCP_MOD, 0x0000)
-#define OMAP24XX_PRCM_SYSCONFIG		OMAP_PRM_REGADDR(OCP_MOD, 0x0010)
-
-#define OMAP24XX_PRCM_IRQSTATUS_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x0018)
-#define OMAP24XX_PRCM_IRQENABLE_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x001c)
-
-#define OMAP24XX_PRCM_VOLTST		OMAP_PRM_REGADDR(OCP_MOD, 0x0054)
-#define OMAP24XX_PRCM_CLKSRC_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0060)
-#define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0070)
-#define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0078)
-#define OMAP24XX_PRCM_CLKCFG_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0080)
-#define OMAP24XX_PRCM_CLKCFG_STATUS	OMAP_PRM_REGADDR(OCP_MOD, 0x0084)
-#define OMAP24XX_PRCM_VOLTSETUP		OMAP_PRM_REGADDR(OCP_MOD, 0x0090)
-#define OMAP24XX_PRCM_CLKSSETUP		OMAP_PRM_REGADDR(OCP_MOD, 0x0094)
-#define OMAP24XX_PRCM_POLCTRL		OMAP_PRM_REGADDR(OCP_MOD, 0x0098)
-
-#define OMAP3430_PRM_REVISION		OMAP_PRM_REGADDR(OCP_MOD, 0x0004)
-#define OMAP3430_PRM_SYSCONFIG		OMAP_PRM_REGADDR(OCP_MOD, 0x0014)
-
-#define OMAP3430_PRM_IRQSTATUS_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x0018)
-#define OMAP3430_PRM_IRQENABLE_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x001c)
-
-
-#define OMAP3430_PRM_VC_SMPS_SA		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0020)
-#define OMAP3430_PRM_VC_SMPS_VOL_RA	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0024)
-#define OMAP3430_PRM_VC_SMPS_CMD_RA	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0028)
-#define OMAP3430_PRM_VC_CMD_VAL_0	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x002c)
-#define OMAP3430_PRM_VC_CMD_VAL_1	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0030)
-#define OMAP3430_PRM_VC_CH_CONF		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0034)
-#define OMAP3430_PRM_VC_I2C_CFG		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0038)
-#define OMAP3430_PRM_VC_BYPASS_VAL	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x003c)
-#define OMAP3430_PRM_RSTCTRL		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0050)
-#define OMAP3430_PRM_RSTTIME		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0054)
-#define OMAP3430_PRM_RSTST		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0058)
-#define OMAP3430_PRM_VOLTCTRL		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0060)
-#define OMAP3430_PRM_SRAM_PCHARGE	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0064)
-#define OMAP3430_PRM_CLKSRC_CTRL	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0070)
-#define OMAP3430_PRM_VOLTSETUP1		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0090)
-#define OMAP3430_PRM_VOLTOFFSET		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0094)
-#define OMAP3430_PRM_CLKSETUP		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0098)
-#define OMAP3430_PRM_POLCTRL		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x009c)
-#define OMAP3430_PRM_VOLTSETUP2		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00a0)
-#define OMAP3430_PRM_VP1_CONFIG		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b0)
-#define OMAP3430_PRM_VP1_VSTEPMIN	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b4)
-#define OMAP3430_PRM_VP1_VSTEPMAX	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b8)
-#define OMAP3430_PRM_VP1_VLIMITTO	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00bc)
-#define OMAP3430_PRM_VP1_VOLTAGE	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c0)
-#define OMAP3430_PRM_VP1_STATUS		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c4)
-#define OMAP3430_PRM_VP2_CONFIG		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d0)
-#define OMAP3430_PRM_VP2_VSTEPMIN	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d4)
-#define OMAP3430_PRM_VP2_VSTEPMAX	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d8)
-#define OMAP3430_PRM_VP2_VLIMITTO	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00dc)
-#define OMAP3430_PRM_VP2_VOLTAGE	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e0)
-#define OMAP3430_PRM_VP2_STATUS		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e4)
-
-#define OMAP3430_PRM_CLKSEL		OMAP_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0040)
-#define OMAP3430_PRM_CLKOUT_CTRL	OMAP_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0070)
+#define OMAP2_PRCM_REVISION_OFFSET	0x0000
+#define OMAP2420_PRCM_REVISION		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0000)
+#define OMAP2_PRCM_SYSCONFIG_OFFSET	0x0010
+#define OMAP2420_PRCM_SYSCONFIG		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0010)
+
+#define OMAP2_PRCM_IRQSTATUS_MPU_OFFSET	0x0018
+#define OMAP2420_PRCM_IRQSTATUS_MPU	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0018)
+#define OMAP2_PRCM_IRQENABLE_MPU_OFFSET	0x001c
+#define OMAP2420_PRCM_IRQENABLE_MPU	OMAP2420_PRM_REGADDR(OCP_MOD, 0x001c)
+
+#define OMAP2_PRCM_VOLTCTRL_OFFSET	0x0050
+#define OMAP2420_PRCM_VOLTCTRL		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0050)
+#define OMAP2_PRCM_VOLTST_OFFSET	0x0054
+#define OMAP2420_PRCM_VOLTST		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0054)
+#define OMAP2_PRCM_CLKSRC_CTRL_OFFSET	0x0060
+#define OMAP2420_PRCM_CLKSRC_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0060)
+#define OMAP2_PRCM_CLKOUT_CTRL_OFFSET	0x0070
+#define OMAP2420_PRCM_CLKOUT_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0070)
+#define OMAP2_PRCM_CLKEMUL_CTRL_OFFSET	0x0078
+#define OMAP2420_PRCM_CLKEMUL_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0078)
+#define OMAP2_PRCM_CLKCFG_CTRL_OFFSET	0x0080
+#define OMAP2420_PRCM_CLKCFG_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0080)
+#define OMAP2_PRCM_CLKCFG_STATUS_OFFSET	0x0084
+#define OMAP2420_PRCM_CLKCFG_STATUS	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0084)
+#define OMAP2_PRCM_VOLTSETUP_OFFSET	0x0090
+#define OMAP2420_PRCM_VOLTSETUP		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0090)
+#define OMAP2_PRCM_CLKSSETUP_OFFSET	0x0094
+#define OMAP2420_PRCM_CLKSSETUP		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0094)
+#define OMAP2_PRCM_POLCTRL_OFFSET	0x0098
+#define OMAP2420_PRCM_POLCTRL		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0098)
+
+#define OMAP2430_PRCM_REVISION		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0000)
+#define OMAP2430_PRCM_SYSCONFIG		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0010)
+
+#define OMAP2430_PRCM_IRQSTATUS_MPU	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0018)
+#define OMAP2430_PRCM_IRQENABLE_MPU	OMAP2430_PRM_REGADDR(OCP_MOD, 0x001c)
+
+#define OMAP2430_PRCM_VOLTCTRL		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0050)
+#define OMAP2430_PRCM_VOLTST		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0054)
+#define OMAP2430_PRCM_CLKSRC_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0060)
+#define OMAP2430_PRCM_CLKOUT_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0070)
+#define OMAP2430_PRCM_CLKEMUL_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0078)
+#define OMAP2430_PRCM_CLKCFG_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0080)
+#define OMAP2430_PRCM_CLKCFG_STATUS	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0084)
+#define OMAP2430_PRCM_VOLTSETUP		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0090)
+#define OMAP2430_PRCM_CLKSSETUP		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0094)
+#define OMAP2430_PRCM_POLCTRL		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0098)
+
+#define OMAP3_PRM_REVISION_OFFSET	0x0004
+#define OMAP3430_PRM_REVISION		OMAP34XX_PRM_REGADDR(OCP_MOD, 0x0004)
+#define OMAP3_PRM_SYSCONFIG_OFFSET	0x0014
+#define OMAP3430_PRM_SYSCONFIG		OMAP34XX_PRM_REGADDR(OCP_MOD, 0x0014)
+
+#define OMAP3_PRM_IRQSTATUS_MPU_OFFSET	0x0018
+#define OMAP3430_PRM_IRQSTATUS_MPU	OMAP34XX_PRM_REGADDR(OCP_MOD, 0x0018)
+#define OMAP3_PRM_IRQENABLE_MPU_OFFSET	0x001c
+#define OMAP3430_PRM_IRQENABLE_MPU	OMAP34XX_PRM_REGADDR(OCP_MOD, 0x001c)
+
+
+#define OMAP3_PRM_VC_SMPS_SA_OFFSET	0x0020
+#define OMAP3430_PRM_VC_SMPS_SA		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0020)
+#define OMAP3_PRM_VC_SMPS_VOL_RA_OFFSET	0x0024
+#define OMAP3430_PRM_VC_SMPS_VOL_RA	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0024)
+#define OMAP3_PRM_VC_SMPS_CMD_RA_OFFSET	0x0028
+#define OMAP3430_PRM_VC_SMPS_CMD_RA	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0028)
+#define OMAP3_PRM_VC_CMD_VAL_0_OFFSET	0x002c
+#define OMAP3430_PRM_VC_CMD_VAL_0	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x002c)
+#define OMAP3_PRM_VC_CMD_VAL_1_OFFSET	0x0030
+#define OMAP3430_PRM_VC_CMD_VAL_1	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0030)
+#define OMAP3_PRM_VC_CH_CONF_OFFSET	0x0034
+#define OMAP3430_PRM_VC_CH_CONF		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0034)
+#define OMAP3_PRM_VC_I2C_CFG_OFFSET	0x0038
+#define OMAP3430_PRM_VC_I2C_CFG		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0038)
+#define OMAP3_PRM_VC_BYPASS_VAL_OFFSET	0x003c
+#define OMAP3430_PRM_VC_BYPASS_VAL	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x003c)
+#define OMAP3_PRM_RSTCTRL_OFFSET	0x0050
+#define OMAP3430_PRM_RSTCTRL		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0050)
+#define OMAP3_PRM_RSTTIME_OFFSET	0x0054
+#define OMAP3430_PRM_RSTTIME		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0054)
+#define OMAP3_PRM_RSTST_OFFSET	0x0058
+#define OMAP3430_PRM_RSTST		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0058)
+#define OMAP3_PRM_VOLTCTRL_OFFSET	0x0060
+#define OMAP3430_PRM_VOLTCTRL		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0060)
+#define OMAP3_PRM_SRAM_PCHARGE_OFFSET	0x0064
+#define OMAP3430_PRM_SRAM_PCHARGE	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0064)
+#define OMAP3_PRM_CLKSRC_CTRL_OFFSET	0x0070
+#define OMAP3430_PRM_CLKSRC_CTRL	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0070)
+#define OMAP3_PRM_VOLTSETUP1_OFFSET	0x0090
+#define OMAP3430_PRM_VOLTSETUP1		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0090)
+#define OMAP3_PRM_VOLTOFFSET_OFFSET	0x0094
+#define OMAP3430_PRM_VOLTOFFSET		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0094)
+#define OMAP3_PRM_CLKSETUP_OFFSET	0x0098
+#define OMAP3430_PRM_CLKSETUP		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0098)
+#define OMAP3_PRM_POLCTRL_OFFSET	0x009c
+#define OMAP3430_PRM_POLCTRL		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x009c)
+#define OMAP3_PRM_VOLTSETUP2_OFFSET	0x00a0
+#define OMAP3430_PRM_VOLTSETUP2		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00a0)
+#define OMAP3_PRM_VP1_CONFIG_OFFSET	0x00b0
+#define OMAP3430_PRM_VP1_CONFIG		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b0)
+#define OMAP3_PRM_VP1_VSTEPMIN_OFFSET	0x00b4
+#define OMAP3430_PRM_VP1_VSTEPMIN	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b4)
+#define OMAP3_PRM_VP1_VSTEPMAX_OFFSET	0x00b8
+#define OMAP3430_PRM_VP1_VSTEPMAX	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b8)
+#define OMAP3_PRM_VP1_VLIMITTO_OFFSET	0x00bc
+#define OMAP3430_PRM_VP1_VLIMITTO	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00bc)
+#define OMAP3_PRM_VP1_VOLTAGE_OFFSET	0x00c0
+#define OMAP3430_PRM_VP1_VOLTAGE	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c0)
+#define OMAP3_PRM_VP1_STATUS_OFFSET	0x00c4
+#define OMAP3430_PRM_VP1_STATUS		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c4)
+#define OMAP3_PRM_VP2_CONFIG_OFFSET	0x00d0
+#define OMAP3430_PRM_VP2_CONFIG		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d0)
+#define OMAP3_PRM_VP2_VSTEPMIN_OFFSET	0x00d4
+#define OMAP3430_PRM_VP2_VSTEPMIN	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d4)
+#define OMAP3_PRM_VP2_VSTEPMAX_OFFSET	0x00d8
+#define OMAP3430_PRM_VP2_VSTEPMAX	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d8)
+#define OMAP3_PRM_VP2_VLIMITTO_OFFSET	0x00dc
+#define OMAP3430_PRM_VP2_VLIMITTO	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00dc)
+#define OMAP3_PRM_VP2_VOLTAGE_OFFSET	0x00e0
+#define OMAP3430_PRM_VP2_VOLTAGE	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e0)
+#define OMAP3_PRM_VP2_STATUS_OFFSET	0x00e4
+#define OMAP3430_PRM_VP2_STATUS		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e4)
+
+#define OMAP3_PRM_CLKSEL_OFFSET	0x0040
+#define OMAP3430_PRM_CLKSEL		OMAP34XX_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0040)
+#define OMAP3_PRM_CLKOUT_CTRL_OFFSET	0x0070
+#define OMAP3430_PRM_CLKOUT_CTRL	OMAP34XX_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0070)
 
 /*
  * Module specific PRM registers from PRM_BASE + domain offset
diff --git a/arch/arm/mach-omap2/sdrc2xxx.c b/arch/arm/mach-omap2/sdrc2xxx.c
index 0afdad5..feaec7e 100644
--- a/arch/arm/mach-omap2/sdrc2xxx.c
+++ b/arch/arm/mach-omap2/sdrc2xxx.c
@@ -99,7 +99,10 @@ u32 omap2xxx_sdrc_reprogram(u32 level, u32 force)
 	m_type = omap2xxx_sdrc_get_type();
 
 	local_irq_save(flags);
-	__raw_writel(0xffff, OMAP24XX_PRCM_VOLTSETUP);
+	if (cpu_is_omap2420())
+		__raw_writel(0xffff, OMAP2420_PRCM_VOLTSETUP);
+	else
+		__raw_writel(0xffff, OMAP2430_PRCM_VOLTSETUP);
 	omap2_sram_reprogram_sdrc(level, dll_ctrl, m_type);
 	curr_perf_level = level;
 	local_irq_restore(flags);
diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index cbb8930..bb29985 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -124,7 +124,7 @@ omap242x_sdi_cm_clksel2_pll:
 omap242x_sdi_sdrc_dlla_ctrl:
 	.word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL)
 omap242x_sdi_prcm_voltctrl:
-	.word OMAP242X_PRCM_VOLTCTRL
+	.word OMAP2420_PRCM_VOLTCTRL
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_sdi_timer_32ksynct_cr:
@@ -220,7 +220,7 @@ omap242x_srs_sdrc_dlla_ctrl:
 omap242x_srs_sdrc_rfr_ctrl:
 	.word OMAP242X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
 omap242x_srs_prcm_voltctrl:
-	.word OMAP242X_PRCM_VOLTCTRL
+	.word OMAP2420_PRCM_VOLTCTRL
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_srs_timer_32ksynct:
@@ -305,7 +305,7 @@ wait_dll_lock:
 	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
 
 omap242x_ssp_set_config:
-	.word OMAP242X_PRCM_CLKCFG_CTRL
+	.word OMAP2420_PRCM_CLKCFG_CTRL
 omap242x_ssp_pll_ctl:
 	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKEN)
 omap242x_ssp_pll_stat:
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index 054b0f3..9955abc 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -124,7 +124,7 @@ omap243x_sdi_cm_clksel2_pll:
 omap243x_sdi_sdrc_dlla_ctrl:
 	.word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL)
 omap243x_sdi_prcm_voltctrl:
-	.word OMAP243X_PRCM_VOLTCTRL
+	.word OMAP2430_PRCM_VOLTCTRL
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_sdi_timer_32ksynct_cr:
@@ -220,7 +220,7 @@ omap243x_srs_sdrc_dlla_ctrl:
 omap243x_srs_sdrc_rfr_ctrl:
 	.word OMAP243X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
 omap243x_srs_prcm_voltctrl:
-	.word OMAP243X_PRCM_VOLTCTRL
+	.word OMAP2430_PRCM_VOLTCTRL
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_srs_timer_32ksynct:
@@ -305,7 +305,7 @@ wait_dll_lock:
 	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
 
 omap243x_ssp_set_config:
-	.word OMAP243X_PRCM_CLKCFG_CTRL
+	.word OMAP2430_PRCM_CLKCFG_CTRL
 omap243x_ssp_pll_ctl:
 	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKEN)
 omap243x_ssp_pll_stat:
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 4202cf4..10a6413 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -89,14 +89,12 @@
 
 #define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
-#define OMAP2_PRM_BASE		OMAP2420_PRM_BASE
 #define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
 #define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
-#define OMAP2_PRM_BASE		OMAP2430_PRM_BASE
 #define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #endif
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index 581d910..4233064 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -86,7 +86,6 @@
 #if defined(CONFIG_ARCH_OMAP3430)
 
 #define OMAP2_CM_BASE			OMAP3430_CM_BASE
-#define OMAP2_PRM_BASE			OMAP3430_PRM_BASE
 #define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 
 #endif
-- 
cgit v0.10.2


From 8a424bb3c92b2df17008dfa17ac55b602290267b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:35 -0700
Subject: ARM: OMAP2/3: Move define of OMAP2_VA_IC_BASE to be local to
 entry-macro.S

Move define of OMAP2_VA_IC_BASE to be local to entry-macro.S

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/include/mach/entry-macro.S b/arch/arm/plat-omap/include/mach/entry-macro.S
index 2276f89..33256a0 100644
--- a/arch/arm/plat-omap/include/mach/entry-macro.S
+++ b/arch/arm/plat-omap/include/mach/entry-macro.S
@@ -58,11 +58,14 @@
 #endif
 #if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
 
-#if defined(CONFIG_ARCH_OMAP24XX)
 #include <mach/omap24xx.h>
-#endif
-#if defined(CONFIG_ARCH_OMAP34XX)
 #include <mach/omap34xx.h>
+
+/* REVISIT: This should be set dynamically if CONFIG_MULTI_OMAP2 is selected */
+#if defined(CONFIG_ARCH_OMAP2420) || defined(CONFIG_ARCH_OMAP2430)
+#define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP24XX_IC_BASE)
+#elif defined(CONFIG_ARCH_OMAP34XX)
+#define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 #endif
 
 #define INTCPS_SIR_IRQ_OFFSET	0x0040		/* Active interrupt offset */
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 10a6413..4ce9b6a 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -89,13 +89,11 @@
 
 #define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
-#define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
 #define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
-#define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #endif
 
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index 4233064..36c99ca 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -86,7 +86,6 @@
 #if defined(CONFIG_ARCH_OMAP3430)
 
 #define OMAP2_CM_BASE			OMAP3430_CM_BASE
-#define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 
 #endif
 
-- 
cgit v0.10.2


From c28150ee688df25861bb8eeff445ed95baf29321 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:35 -0700
Subject: ARM: OMAP2/3: Remove OMAP2_PRCM_BASE

It's currently unused, and processor specific defines should
be used instead.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 4ce9b6a..60f8324 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -87,12 +87,10 @@
 
 #if defined(CONFIG_ARCH_OMAP2420)
 
-#define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
-#define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
 
 #endif
-- 
cgit v0.10.2


From eb0d0ee1c256492edd56e55c2704bbb1fe1d8bc0 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:36 -0700
Subject: ARM: OMAP2/3: Remove OMAP_CM_REGADDR

Processor specific macros should be used instead.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h
index f0e4480..458f00c 100644
--- a/arch/arm/mach-omap2/clock24xx.h
+++ b/arch/arm/mach-omap2/clock24xx.h
@@ -26,9 +26,11 @@
 
 /* REVISIT: These should be set dynamically for CONFIG_MULTI_OMAP2 */
 #ifdef CONFIG_ARCH_OMAP2420
+#define OMAP_CM_REGADDR			OMAP2420_CM_REGADDR
 #define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2420_PRCM_CLKOUT_CTRL
 #define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2420_PRCM_CLKEMUL_CTRL
 #else
+#define OMAP_CM_REGADDR			OMAP2430_CM_REGADDR
 #define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2430_PRCM_CLKOUT_CTRL
 #define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2430_PRCM_CLKEMUL_CTRL
 #endif
diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h
index 017a30e..496f0e9 100644
--- a/arch/arm/mach-omap2/clock34xx.h
+++ b/arch/arm/mach-omap2/clock34xx.h
@@ -27,6 +27,8 @@
 #include "prm.h"
 #include "prm-regbits-34xx.h"
 
+#define OMAP_CM_REGADDR		OMAP34XX_CM_REGADDR
+
 static unsigned long omap3_dpll_recalc(struct clk *clk);
 static unsigned long omap3_clkoutx2_recalc(struct clk *clk);
 static void omap3_dpll_allow_idle(struct clk *clk);
diff --git a/arch/arm/mach-omap2/cm.h b/arch/arm/mach-omap2/cm.h
index efc082a..1d3c93b 100644
--- a/arch/arm/mach-omap2/cm.h
+++ b/arch/arm/mach-omap2/cm.h
@@ -16,17 +16,12 @@
 
 #include "prcm-common.h"
 
-#ifndef __ASSEMBLER__
-#define OMAP_CM_REGADDR(module, reg)					\
-			IO_ADDRESS(OMAP2_CM_BASE + (module) + (reg))
-#else
 #define OMAP2420_CM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2420_CM_BASE + (module) + (reg))
 #define OMAP2430_CM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2430_CM_BASE + (module) + (reg))
 #define OMAP34XX_CM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP3430_CM_BASE + (module) + (reg))
-#endif
 
 /*
  * Architecture-specific global CM registers
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 60f8324..696edfc 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -85,15 +85,5 @@
 #define OMAP24XX_SEC_AES_BASE	(OMAP24XX_SEC_BASE + 0x6000)
 #define OMAP24XX_SEC_PKA_BASE	(OMAP24XX_SEC_BASE + 0x8000)
 
-#if defined(CONFIG_ARCH_OMAP2420)
-
-#define OMAP2_CM_BASE		OMAP2420_CM_BASE
-
-#elif defined(CONFIG_ARCH_OMAP2430)
-
-#define OMAP2_CM_BASE		OMAP2430_CM_BASE
-
-#endif
-
 #endif /* __ASM_ARCH_OMAP24XX_H */
 
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index 36c99ca..cc25733 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -83,12 +83,6 @@
 
 #define OMAP34XX_MAILBOX_BASE		(L4_34XX_BASE + 0x94000)
 
-#if defined(CONFIG_ARCH_OMAP3430)
-
-#define OMAP2_CM_BASE			OMAP3430_CM_BASE
-
-#endif
-
 #define OMAP34XX_DSP_BASE	0x58000000
 #define OMAP34XX_DSP_MEM_BASE	(OMAP34XX_DSP_BASE + 0x0)
 #define OMAP34XX_DSP_IPI_BASE	(OMAP34XX_DSP_BASE + 0x1000000)
-- 
cgit v0.10.2


From a9a418d4553290b824f95df65bd20097d7691233 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:36 -0700
Subject: ARM: OMAP2/3: Reorganize Makefile to add omap4 support

We don't necessarily want to compile in irq.o and sdrc.o for omap4.
Also, clock and prcm may not be implemented initially.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index c49d9bf..bf3827a 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -3,9 +3,14 @@
 #
 
 # Common support
-obj-y := irq.o id.o io.o sdrc.o control.o prcm.o clock.o mux.o \
-		devices.o serial.o gpmc.o timer-gp.o powerdomain.o \
-		clockdomain.o
+obj-y := id.o io.o control.o mux.o devices.o serial.o gpmc.o timer-gp.o
+
+omap-2-3-common				= irq.o sdrc.o
+prcm-common				= prcm.o powerdomain.o
+clock-common				= clock.o clockdomain.o
+
+obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(prcm-common) $(clock-common)
+obj-$(CONFIG_ARCH_OMAP3) += $(omap-2-3-common) $(prcm-common) $(clock-common)
 
 obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o
 
-- 
cgit v0.10.2


From aea2a5b03c6476175c6f498a72cfbe73f7f47528 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 25 May 2009 11:08:36 -0700
Subject: ARM: OMAP: Remove unwanted type casts and fix the compiler warning.

This patch fixes the compiler warning "assignment from incompatible
pointer type"  in dmtimer.c and removes the tye casts. These warnings
were suppressed by type catsing.

The proposed fix was suggested by Russell King <rmk+kernel@arm.linux.org.uk>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 55bb996..ee20612 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -214,7 +214,7 @@ static const char *omap2_dm_source_names[] __initdata = {
 	NULL
 };
 
-static struct clk **omap2_dm_source_clocks[3];
+static struct clk *omap2_dm_source_clocks[3];
 static const int dm_timer_count = ARRAY_SIZE(omap2_dm_timers);
 
 #elif defined(CONFIG_ARCH_OMAP3)
@@ -247,7 +247,7 @@ static const char *omap3_dm_source_names[] __initdata = {
 	NULL
 };
 
-static struct clk **omap3_dm_source_clocks[2];
+static struct clk *omap3_dm_source_clocks[2];
 static const int dm_timer_count = ARRAY_SIZE(omap3_dm_timers);
 
 #else
@@ -257,7 +257,7 @@ static const int dm_timer_count = ARRAY_SIZE(omap3_dm_timers);
 #endif
 
 static struct omap_dm_timer *dm_timers;
-static char **dm_source_names;
+static const char **dm_source_names;
 static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
@@ -705,12 +705,12 @@ int __init omap_dm_timer_init(void)
 		dm_timers = omap1_dm_timers;
 	else if (cpu_is_omap24xx()) {
 		dm_timers = omap2_dm_timers;
-		dm_source_names = (char **)omap2_dm_source_names;
-		dm_source_clocks = (struct clk **)omap2_dm_source_clocks;
+		dm_source_names = omap2_dm_source_names;
+		dm_source_clocks = omap2_dm_source_clocks;
 	} else if (cpu_is_omap34xx()) {
 		dm_timers = omap3_dm_timers;
-		dm_source_names = (char **)omap3_dm_source_names;
-		dm_source_clocks = (struct clk **)omap3_dm_source_clocks;
+		dm_source_names = omap3_dm_source_names;
+		dm_source_clocks = omap3_dm_source_clocks;
 	}
 
 	if (cpu_class_is_omap2())
-- 
cgit v0.10.2


From af5703f2beb490fc700a30d0b87e84da0cf42086 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 25 May 2009 11:08:37 -0700
Subject: ARM: OMAP: Remove useless omap_sram_error function.

This patch removes fixes omap_sram_error() function and replace the
error paths with BUG_ON.

The proposed fix was suggested by Russell King <rmk+kernel@arm.linux.org.uk>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index fa5297d..e1493d8 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -242,20 +242,13 @@ void * omap_sram_push(void * start, unsigned long size)
 	return (void *)omap_sram_ceil;
 }
 
-static void omap_sram_error(void)
-{
-	panic("Uninitialized SRAM function\n");
-}
-
 #ifdef CONFIG_ARCH_OMAP1
 
 static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);
 
 void omap_sram_reprogram_clock(u32 dpllctl, u32 ckctl)
 {
-	if (!_omap_sram_reprogram_clock)
-		omap_sram_error();
-
+	BUG_ON(!_omap_sram_reprogram_clock);
 	_omap_sram_reprogram_clock(dpllctl, ckctl);
 }
 
@@ -280,9 +273,7 @@ static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
 void omap2_sram_ddr_init(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
 		   u32 base_cs, u32 force_unlock)
 {
-	if (!_omap2_sram_ddr_init)
-		omap_sram_error();
-
+	BUG_ON(!_omap2_sram_ddr_init);
 	_omap2_sram_ddr_init(slow_dll_ctrl, fast_dll_ctrl,
 			     base_cs, force_unlock);
 }
@@ -292,9 +283,7 @@ static void (*_omap2_sram_reprogram_sdrc)(u32 perf_level, u32 dll_val,
 
 void omap2_sram_reprogram_sdrc(u32 perf_level, u32 dll_val, u32 mem_type)
 {
-	if (!_omap2_sram_reprogram_sdrc)
-		omap_sram_error();
-
+	BUG_ON(!_omap2_sram_reprogram_sdrc);
 	_omap2_sram_reprogram_sdrc(perf_level, dll_val, mem_type);
 }
 
@@ -302,9 +291,7 @@ static u32 (*_omap2_set_prcm)(u32 dpll_ctrl_val, u32 sdrc_rfr_val, int bypass);
 
 u32 omap2_set_prcm(u32 dpll_ctrl_val, u32 sdrc_rfr_val, int bypass)
 {
-	if (!_omap2_set_prcm)
-		omap_sram_error();
-
+	BUG_ON(!_omap2_set_prcm);
 	return _omap2_set_prcm(dpll_ctrl_val, sdrc_rfr_val, bypass);
 }
 #endif
@@ -360,9 +347,7 @@ static u32 (*_omap3_sram_configure_core_dpll)(u32 sdrc_rfr_ctrl,
 u32 omap3_configure_core_dpll(u32 sdrc_rfr_ctrl, u32 sdrc_actim_ctrla,
 			      u32 sdrc_actim_ctrlb, u32 m2)
 {
-	if (!_omap3_sram_configure_core_dpll)
-		omap_sram_error();
-
+	BUG_ON(!_omap3_sram_configure_core_dpll);
 	return _omap3_sram_configure_core_dpll(sdrc_rfr_ctrl,
 					       sdrc_actim_ctrla,
 					       sdrc_actim_ctrlb, m2);
-- 
cgit v0.10.2


From 00aeeffffadab39dcbd8d2360a1f2dc4469f4f1a Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 25 May 2009 11:08:37 -0700
Subject: ARM: OMAP: Remove unnecessary omap2_globals.

This patch removes unnecessary omap2_globals and pass the global structures
directly as function argument.

The proposed cleanup was suggested by Russell King <rmk+kernel@arm.linux.org.uk>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index e1add78..70b68ef 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -287,9 +287,7 @@ arch_initcall(omap_init_clocksource_32k);
 
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 
-static struct omap_globals *omap2_globals;
-
-static void __init __omap2_set_globals(void)
+static void __init __omap2_set_globals(struct omap_globals *omap2_globals)
 {
 	omap2_set_globals_tap(omap2_globals);
 	omap2_set_globals_sdrc(omap2_globals);
@@ -313,8 +311,7 @@ static struct omap_globals omap242x_globals = {
 
 void __init omap2_set_globals_242x(void)
 {
-	omap2_globals = &omap242x_globals;
-	__omap2_set_globals();
+	__omap2_set_globals(&omap242x_globals);
 }
 #endif
 
@@ -332,8 +329,7 @@ static struct omap_globals omap243x_globals = {
 
 void __init omap2_set_globals_243x(void)
 {
-	omap2_globals = &omap243x_globals;
-	__omap2_set_globals();
+	__omap2_set_globals(&omap243x_globals);
 }
 #endif
 
@@ -351,8 +347,7 @@ static struct omap_globals omap343x_globals = {
 
 void __init omap2_set_globals_343x(void)
 {
-	omap2_globals = &omap343x_globals;
-	__omap2_set_globals();
+	__omap2_set_globals(&omap343x_globals);
 }
 #endif
 
-- 
cgit v0.10.2


From e85c205ac1427f2405021a36f083280ff0d0a35e Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 25 May 2009 11:08:41 -0700
Subject: ARM: OMAP: Increase VMALLOC_END to allow 256MB RAM

This increases VMALLOC_END to 0x18000000, making room for 256MB
RAM with the default 128MB vmalloc region.

Note that after this patch there's no longer a hole between vmalloc
space and the beginning of IO space on omap2 as the first virtual
mapping starts at 0xd8000000.

Also fold in a related change from Paul Walmsley <paul@pwsan.com>
to change the OMAP2_SRAM addresses accordingly.

Signed-off-by: Mans Rullgard <mans@mansr.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/include/mach/vmalloc.h b/arch/arm/plat-omap/include/mach/vmalloc.h
index dc104cd..b97dfaf 100644
--- a/arch/arm/plat-omap/include/mach/vmalloc.h
+++ b/arch/arm/plat-omap/include/mach/vmalloc.h
@@ -17,5 +17,5 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
-#define VMALLOC_END	  (PAGE_OFFSET + 0x10000000)
+#define VMALLOC_END	  (PAGE_OFFSET + 0x18000000)
 
diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index e1493d8..102c9f7 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -38,8 +38,8 @@
 #define OMAP1_SRAM_VA		VMALLOC_END
 #define OMAP2_SRAM_PA		0x40200000
 #define OMAP2_SRAM_PUB_PA	0x4020f800
-#define OMAP2_SRAM_VA		VMALLOC_END
-#define OMAP2_SRAM_PUB_VA	(VMALLOC_END + 0x800)
+#define OMAP2_SRAM_VA		0xe3000000
+#define OMAP2_SRAM_PUB_VA	(OMAP2_SRAM_VA + 0x800)
 #define OMAP3_SRAM_PA           0x40200000
 #define OMAP3_SRAM_VA           0xd7000000
 #define OMAP3_SRAM_PUB_PA       0x40208000
-- 
cgit v0.10.2


From d6d834b010908380c9054d8ad339a902833a3bab Mon Sep 17 00:00:00 2001
From: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Date: Mon, 25 May 2009 11:08:42 -0700
Subject: ARM: OMAP: McBSP: Fix legacy interrupts to clear their status

If XSYNCERR or RSYNCERR interrupts are enabled, they are never
cleared causing the IRQ handler to be continuously called.
This patch clears the IRQs in question in the event they are
enabled and taken.

Signed-off-by: Eero Nurkkala <ext-eero.nurkkala@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c
index 28b0a82..efa0e01 100644
--- a/arch/arm/plat-omap/mcbsp.c
+++ b/arch/arm/plat-omap/mcbsp.c
@@ -91,11 +91,20 @@ static void omap_mcbsp_dump_reg(u8 id)
 static irqreturn_t omap_mcbsp_tx_irq_handler(int irq, void *dev_id)
 {
 	struct omap_mcbsp *mcbsp_tx = dev_id;
+	u16 irqst_spcr2;
 
-	dev_dbg(mcbsp_tx->dev, "TX IRQ callback : 0x%x\n",
-		OMAP_MCBSP_READ(mcbsp_tx->io_base, SPCR2));
+	irqst_spcr2 = OMAP_MCBSP_READ(mcbsp_tx->io_base, SPCR2);
+	dev_dbg(mcbsp_tx->dev, "TX IRQ callback : 0x%x\n", irqst_spcr2);
 
-	complete(&mcbsp_tx->tx_irq_completion);
+	if (irqst_spcr2 & XSYNC_ERR) {
+		dev_err(mcbsp_tx->dev, "TX Frame Sync Error! : 0x%x\n",
+			irqst_spcr2);
+		/* Writing zero to XSYNC_ERR clears the IRQ */
+		OMAP_MCBSP_WRITE(mcbsp_tx->io_base, SPCR2,
+			irqst_spcr2 & ~(XSYNC_ERR));
+	} else {
+		complete(&mcbsp_tx->tx_irq_completion);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -103,11 +112,20 @@ static irqreturn_t omap_mcbsp_tx_irq_handler(int irq, void *dev_id)
 static irqreturn_t omap_mcbsp_rx_irq_handler(int irq, void *dev_id)
 {
 	struct omap_mcbsp *mcbsp_rx = dev_id;
+	u16 irqst_spcr1;
 
-	dev_dbg(mcbsp_rx->dev, "RX IRQ callback : 0x%x\n",
-		OMAP_MCBSP_READ(mcbsp_rx->io_base, SPCR2));
+	irqst_spcr1 = OMAP_MCBSP_READ(mcbsp_rx->io_base, SPCR1);
+	dev_dbg(mcbsp_rx->dev, "RX IRQ callback : 0x%x\n", irqst_spcr1);
 
-	complete(&mcbsp_rx->rx_irq_completion);
+	if (irqst_spcr1 & RSYNC_ERR) {
+		dev_err(mcbsp_rx->dev, "RX Frame Sync Error! : 0x%x\n",
+			irqst_spcr1);
+		/* Writing zero to RSYNC_ERR clears the IRQ */
+		OMAP_MCBSP_WRITE(mcbsp_rx->io_base, SPCR1,
+			irqst_spcr1 & ~(RSYNC_ERR));
+	} else {
+		complete(&mcbsp_rx->tx_irq_completion);
+	}
 
 	return IRQ_HANDLED;
 }
-- 
cgit v0.10.2


From ddf25dfe3a744d4e11a73e1508121f74267dda68 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@nokia.com>
Date: Mon, 25 May 2009 11:08:43 -0700
Subject: ARM: OMAP: Update contact address of I2C registration helper

This email address is going to expire soon so update it.

Signed-off-by: Jarkko Nikula <jarkko.nikula@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index a303071..8b84839 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2007 Nokia Corporation.
  *
- * Contact: Jarkko Nikula <jarkko.nikula@nokia.com>
+ * Contact: Jarkko Nikula <jhnikula@gmail.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
-- 
cgit v0.10.2


From ebe3b0e787a538638410088227e0a87e3b4b6c00 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:08:44 -0700
Subject: ARM: OMAP1: Misc clean-up

Remove unnecessary Kconfig line and allow compile of MBOX_FWK.
Also allow building USB on Nokia 770.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index cd8de89..55ecc01 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -46,7 +46,6 @@ config MACH_OMAP_H2
 config MACH_OMAP_H3
 	bool "TI H3 Support"
 	depends on ARCH_OMAP1 && ARCH_OMAP16XX
-#	select GPIOEXPANDER_OMAP
     	help
 	  TI OMAP 1710 H3 board support. Say Y here if you have such
 	  a board.
diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile
index 1bda8f5..6867cd3 100644
--- a/arch/arm/mach-omap1/Makefile
+++ b/arch/arm/mach-omap1/Makefile
@@ -13,6 +13,10 @@ obj-$(CONFIG_OMAP_32K_TIMER)	+= timer32k.o
 # Power Management
 obj-$(CONFIG_PM) += pm.o sleep.o
 
+# DSP
+obj-$(CONFIG_OMAP_MBOX_FWK)	+= mailbox_mach.o
+mailbox_mach-objs		:= mailbox.o
+
 led-y := leds.o
 
 # Specific board support
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index d1ed136..8780ca6 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -35,6 +35,7 @@
 #include <mach/omapfb.h>
 #include <mach/lcd_mipid.h>
 #include <mach/mmc.h>
+#include <mach/usb.h>
 
 #define ADS7846_PENDOWN_GPIO	15
 
-- 
cgit v0.10.2


From a4ab0d836bbdbbfdb892135a92b339107530b710 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:26:41 -0700
Subject: ARM: OMAP2/3: Remove OMAP2_32KSYNCT_BASE

Use processor specific defines instead.

As an extra bonus, this patch fixes the problem of CONFIG_DEBUG_SPINLOCK
calling sched_clock before we have things initialized:

http://patchwork.kernel.org/patch/15810/

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index af4bd34..cbb8930 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -128,7 +128,7 @@ omap242x_sdi_prcm_voltctrl:
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_sdi_timer_32ksynct_cr:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010)
 ENTRY(omap242x_sram_ddr_init_sz)
 	.word	. - omap242x_sram_ddr_init
 
@@ -224,7 +224,7 @@ omap242x_srs_prcm_voltctrl:
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_srs_timer_32ksynct:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010)
 
 ENTRY(omap242x_sram_reprogram_sdrc_sz)
 	.word	. - omap242x_sram_reprogram_sdrc
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index 84363e2..054b0f3 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -128,7 +128,7 @@ omap243x_sdi_prcm_voltctrl:
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_sdi_timer_32ksynct_cr:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010)
 ENTRY(omap243x_sram_ddr_init_sz)
 	.word	. - omap243x_sram_ddr_init
 
@@ -224,7 +224,7 @@ omap243x_srs_prcm_voltctrl:
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_srs_timer_32ksynct:
-	.word IO_ADDRESS(OMAP2_32KSYNCT_BASE + 0x010)
+	.word IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010)
 
 ENTRY(omap243x_sram_reprogram_sdrc_sz)
 	.word	. - omap243x_sram_reprogram_sdrc
diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index 433021f..e1add78 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -175,25 +175,61 @@ console_initcall(omap_add_serial_console);
  * but systems won't necessarily want to spend resources that way.
  */
 
-#if defined(CONFIG_ARCH_OMAP16XX)
-#define TIMER_32K_SYNCHRONIZED		0xfffbc410
-#elif defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
-#define TIMER_32K_SYNCHRONIZED		(OMAP2_32KSYNCT_BASE + 0x10)
-#endif
+#define OMAP16XX_TIMER_32K_SYNCHRONIZED		0xfffbc410
 
-#ifdef	TIMER_32K_SYNCHRONIZED
+#if !(defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP15XX))
 
 #include <linux/clocksource.h>
 
-static cycle_t omap_32k_read(struct clocksource *cs)
+#ifdef CONFIG_ARCH_OMAP16XX
+static cycle_t omap16xx_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED);
+}
+#else
+#define omap16xx_32k_read	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2420
+static cycle_t omap2420_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap2420_32k_read	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2430
+static cycle_t omap2430_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap2430_32k_read	NULL
+#endif
+
+#ifdef CONFIG_ARCH_OMAP34XX
+static cycle_t omap34xx_32k_read(struct clocksource *cs)
 {
-	return omap_readl(TIMER_32K_SYNCHRONIZED);
+	return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap34xx_32k_read	NULL
+#endif
+
+/*
+ * Kernel assumes that sched_clock can be called early but may not have
+ * things ready yet.
+ */
+static cycle_t omap_32k_read_dummy(struct clocksource *cs)
+{
+	return 0;
 }
 
 static struct clocksource clocksource_32k = {
 	.name		= "32k_counter",
 	.rating		= 250,
-	.read		= omap_32k_read,
+	.read		= omap_32k_read_dummy,
 	.mask		= CLOCKSOURCE_MASK(32),
 	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
@@ -207,7 +243,7 @@ unsigned long long sched_clock(void)
 {
 	unsigned long long ret;
 
-	ret = (unsigned long long)omap_32k_read(&clocksource_32k);
+	ret = (unsigned long long)clocksource_32k.read(&clocksource_32k);
 	ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift;
 	return ret;
 }
@@ -220,6 +256,17 @@ static int __init omap_init_clocksource_32k(void)
 	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
 		struct clk *sync_32k_ick;
 
+		if (cpu_is_omap16xx())
+			clocksource_32k.read = omap16xx_32k_read;
+		else if (cpu_is_omap2420())
+			clocksource_32k.read = omap2420_32k_read;
+		else if (cpu_is_omap2430())
+			clocksource_32k.read = omap2430_32k_read;
+		else if (cpu_is_omap34xx())
+			clocksource_32k.read = omap34xx_32k_read;
+		else
+			return -ENODEV;
+
 		sync_32k_ick = clk_get(NULL, "omap_32ksync_ick");
 		if (sync_32k_ick)
 			clk_enable(sync_32k_ick);
@@ -234,7 +281,7 @@ static int __init omap_init_clocksource_32k(void)
 }
 arch_initcall(omap_init_clocksource_32k);
 
-#endif	/* TIMER_32K_SYNCHRONIZED */
+#endif	/* !(defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP15XX)) */
 
 /* Global address base setup code */
 
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 24335d4..4202cf4 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -87,7 +87,6 @@
 
 #if defined(CONFIG_ARCH_OMAP2420)
 
-#define OMAP2_32KSYNCT_BASE	OMAP2420_32KSYNCT_BASE
 #define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
 #define OMAP2_PRM_BASE		OMAP2420_PRM_BASE
@@ -95,7 +94,6 @@
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
-#define OMAP2_32KSYNCT_BASE	OMAP2430_32KSYNCT_BASE
 #define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
 #define OMAP2_PRM_BASE		OMAP2430_PRM_BASE
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index ab64015..581d910 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -85,7 +85,6 @@
 
 #if defined(CONFIG_ARCH_OMAP3430)
 
-#define OMAP2_32KSYNCT_BASE		OMAP3430_32KSYNCT_BASE
 #define OMAP2_CM_BASE			OMAP3430_CM_BASE
 #define OMAP2_PRM_BASE			OMAP3430_PRM_BASE
 #define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
-- 
cgit v0.10.2


From 8e3bd351d1d2505e17d0b10c17bf8d7655eb9faf Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:26:42 -0700
Subject: ARM: OMAP2/3: Remove OMAP_PRM_REGADDR and OMAP2_PRM_BASE

Remove OMAP_PRM_REGADDR and use processor specific defines instead.

Also fold in a patch from Kevin Hilman to add _OFFSET #defines
for the PRCM registers to be used with the prm_[read|write]_* macros.
These are used extensively in the forthcoming OMAP PM support.

Also remove now unused OMAP2_PRM_BASE.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 4247a15..dd37483 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -91,9 +91,9 @@ static void _omap2xxx_clk_commit(struct clk *clk)
 		return;
 
 	prm_write_mod_reg(OMAP24XX_VALID_CONFIG, OMAP24XX_GR_MOD,
-		OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET);
+		OMAP2_PRCM_CLKCFG_CTRL_OFFSET);
 	/* OCP barrier */
-	prm_read_mod_reg(OMAP24XX_GR_MOD, OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET);
+	prm_read_mod_reg(OMAP24XX_GR_MOD, OMAP2_PRCM_CLKCFG_CTRL_OFFSET);
 }
 
 /*
diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c
index e4cef33..c442fe9 100644
--- a/arch/arm/mach-omap2/clock24xx.c
+++ b/arch/arm/mach-omap2/clock24xx.c
@@ -233,6 +233,8 @@ static struct prcm_config *curr_prcm_set;
 static struct clk *vclk;
 static struct clk *sclk;
 
+static void __iomem *prcm_clksrc_ctrl;
+
 /*-------------------------------------------------------------------------
  * Omap24xx specific clock functions
  *-------------------------------------------------------------------------*/
@@ -269,10 +271,9 @@ static int omap2_enable_osc_ck(struct clk *clk)
 {
 	u32 pcc;
 
-	pcc = __raw_readl(OMAP24XX_PRCM_CLKSRC_CTRL);
+	pcc = __raw_readl(prcm_clksrc_ctrl);
 
-	__raw_writel(pcc & ~OMAP_AUTOEXTCLKMODE_MASK,
-		      OMAP24XX_PRCM_CLKSRC_CTRL);
+	__raw_writel(pcc & ~OMAP_AUTOEXTCLKMODE_MASK, prcm_clksrc_ctrl);
 
 	return 0;
 }
@@ -281,10 +282,9 @@ static void omap2_disable_osc_ck(struct clk *clk)
 {
 	u32 pcc;
 
-	pcc = __raw_readl(OMAP24XX_PRCM_CLKSRC_CTRL);
+	pcc = __raw_readl(prcm_clksrc_ctrl);
 
-	__raw_writel(pcc | OMAP_AUTOEXTCLKMODE_MASK,
-		      OMAP24XX_PRCM_CLKSRC_CTRL);
+	__raw_writel(pcc | OMAP_AUTOEXTCLKMODE_MASK, prcm_clksrc_ctrl);
 }
 
 static const struct clkops clkops_oscck = {
@@ -654,7 +654,7 @@ static u32 omap2_get_sysclkdiv(void)
 {
 	u32 div;
 
-	div = __raw_readl(OMAP24XX_PRCM_CLKSRC_CTRL);
+	div = __raw_readl(prcm_clksrc_ctrl);
 	div &= OMAP_SYSCLKDIV_MASK;
 	div >>= OMAP_SYSCLKDIV_SHIFT;
 
@@ -714,10 +714,13 @@ int __init omap2_clk_init(void)
 	struct omap_clk *c;
 	u32 clkrate;
 
-	if (cpu_is_omap242x())
+	if (cpu_is_omap242x()) {
+		prcm_clksrc_ctrl = OMAP2420_PRCM_CLKSRC_CTRL;
 		cpu_mask = RATE_IN_242X;
-	else if (cpu_is_omap2430())
+	} else if (cpu_is_omap2430()) {
+		prcm_clksrc_ctrl = OMAP2430_PRCM_CLKSRC_CTRL;
 		cpu_mask = RATE_IN_243X;
+	}
 
 	clk_init(&omap2_clk_functions);
 
diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h
index 88c5acb..f0e4480 100644
--- a/arch/arm/mach-omap2/clock24xx.h
+++ b/arch/arm/mach-omap2/clock24xx.h
@@ -24,6 +24,15 @@
 #include "cm-regbits-24xx.h"
 #include "sdrc.h"
 
+/* REVISIT: These should be set dynamically for CONFIG_MULTI_OMAP2 */
+#ifdef CONFIG_ARCH_OMAP2420
+#define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2420_PRCM_CLKOUT_CTRL
+#define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2420_PRCM_CLKEMUL_CTRL
+#else
+#define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2430_PRCM_CLKOUT_CTRL
+#define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2430_PRCM_CLKEMUL_CTRL
+#endif
+
 static unsigned long omap2_table_mpu_recalc(struct clk *clk);
 static int omap2_select_table_rate(struct clk *clk, unsigned long rate);
 static long omap2_round_to_table_rate(struct clk *clk, unsigned long rate);
diff --git a/arch/arm/mach-omap2/cm.h b/arch/arm/mach-omap2/cm.h
index 65fdf78..efc082a 100644
--- a/arch/arm/mach-omap2/cm.h
+++ b/arch/arm/mach-omap2/cm.h
@@ -38,6 +38,7 @@
 #define OMAP3430_CM_SYSCONFIG		OMAP_CM_REGADDR(OCP_MOD, 0x0010)
 #define OMAP3430_CM_POLCTRL		OMAP_CM_REGADDR(OCP_MOD, 0x009c)
 
+#define OMAP3_CM_CLKOUT_CTRL_OFFSET	0x0070
 #define OMAP3430_CM_CLKOUT_CTRL		OMAP_CM_REGADDR(OMAP3430_CCR_MOD, 0x0070)
 
 /*
diff --git a/arch/arm/mach-omap2/prm.h b/arch/arm/mach-omap2/prm.h
index 826d326b8..7c8e0c4 100644
--- a/arch/arm/mach-omap2/prm.h
+++ b/arch/arm/mach-omap2/prm.h
@@ -16,17 +16,12 @@
 
 #include "prcm-common.h"
 
-#ifndef __ASSEMBLER__
-#define OMAP_PRM_REGADDR(module, reg)					\
-			IO_ADDRESS(OMAP2_PRM_BASE + (module) + (reg))
-#else
 #define OMAP2420_PRM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2420_PRM_BASE + (module) + (reg))
 #define OMAP2430_PRM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2430_PRM_BASE + (module) + (reg))
 #define OMAP34XX_PRM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP3430_PRM_BASE + (module) + (reg))
-#endif
 
 /*
  * Architecture-specific global PRM registers
@@ -38,80 +33,132 @@
  *
  */
 
-/* Global 24xx registers in GR_MOD (Same as OCP_MOD for 24xx) */
-#define OMAP24XX_PRCM_VOLTCTRL_OFFSET		0x0050
-#define OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET	0x0080
-
-/* 242x GR_MOD registers, use these only for assembly code */
-#define OMAP242X_PRCM_VOLTCTRL		OMAP2420_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_VOLTCTRL_OFFSET)
-#define OMAP242X_PRCM_CLKCFG_CTRL	OMAP2420_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET)
-
-/* 243x GR_MOD registers, use these only for assembly code */
-#define OMAP243X_PRCM_VOLTCTRL		OMAP2430_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_VOLTCTRL_OFFSET)
-#define OMAP243X_PRCM_CLKCFG_CTRL	OMAP2430_PRM_REGADDR(OMAP24XX_GR_MOD,	\
-						OMAP24XX_PRCM_CLKCFG_CTRL_OFFSET)
-
-/* These will disappear */
-#define OMAP24XX_PRCM_REVISION		OMAP_PRM_REGADDR(OCP_MOD, 0x0000)
-#define OMAP24XX_PRCM_SYSCONFIG		OMAP_PRM_REGADDR(OCP_MOD, 0x0010)
-
-#define OMAP24XX_PRCM_IRQSTATUS_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x0018)
-#define OMAP24XX_PRCM_IRQENABLE_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x001c)
-
-#define OMAP24XX_PRCM_VOLTST		OMAP_PRM_REGADDR(OCP_MOD, 0x0054)
-#define OMAP24XX_PRCM_CLKSRC_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0060)
-#define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0070)
-#define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0078)
-#define OMAP24XX_PRCM_CLKCFG_CTRL	OMAP_PRM_REGADDR(OCP_MOD, 0x0080)
-#define OMAP24XX_PRCM_CLKCFG_STATUS	OMAP_PRM_REGADDR(OCP_MOD, 0x0084)
-#define OMAP24XX_PRCM_VOLTSETUP		OMAP_PRM_REGADDR(OCP_MOD, 0x0090)
-#define OMAP24XX_PRCM_CLKSSETUP		OMAP_PRM_REGADDR(OCP_MOD, 0x0094)
-#define OMAP24XX_PRCM_POLCTRL		OMAP_PRM_REGADDR(OCP_MOD, 0x0098)
-
-#define OMAP3430_PRM_REVISION		OMAP_PRM_REGADDR(OCP_MOD, 0x0004)
-#define OMAP3430_PRM_SYSCONFIG		OMAP_PRM_REGADDR(OCP_MOD, 0x0014)
-
-#define OMAP3430_PRM_IRQSTATUS_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x0018)
-#define OMAP3430_PRM_IRQENABLE_MPU	OMAP_PRM_REGADDR(OCP_MOD, 0x001c)
-
-
-#define OMAP3430_PRM_VC_SMPS_SA		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0020)
-#define OMAP3430_PRM_VC_SMPS_VOL_RA	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0024)
-#define OMAP3430_PRM_VC_SMPS_CMD_RA	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0028)
-#define OMAP3430_PRM_VC_CMD_VAL_0	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x002c)
-#define OMAP3430_PRM_VC_CMD_VAL_1	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0030)
-#define OMAP3430_PRM_VC_CH_CONF		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0034)
-#define OMAP3430_PRM_VC_I2C_CFG		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0038)
-#define OMAP3430_PRM_VC_BYPASS_VAL	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x003c)
-#define OMAP3430_PRM_RSTCTRL		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0050)
-#define OMAP3430_PRM_RSTTIME		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0054)
-#define OMAP3430_PRM_RSTST		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0058)
-#define OMAP3430_PRM_VOLTCTRL		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0060)
-#define OMAP3430_PRM_SRAM_PCHARGE	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0064)
-#define OMAP3430_PRM_CLKSRC_CTRL	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0070)
-#define OMAP3430_PRM_VOLTSETUP1		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0090)
-#define OMAP3430_PRM_VOLTOFFSET		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0094)
-#define OMAP3430_PRM_CLKSETUP		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x0098)
-#define OMAP3430_PRM_POLCTRL		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x009c)
-#define OMAP3430_PRM_VOLTSETUP2		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00a0)
-#define OMAP3430_PRM_VP1_CONFIG		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b0)
-#define OMAP3430_PRM_VP1_VSTEPMIN	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b4)
-#define OMAP3430_PRM_VP1_VSTEPMAX	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b8)
-#define OMAP3430_PRM_VP1_VLIMITTO	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00bc)
-#define OMAP3430_PRM_VP1_VOLTAGE	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c0)
-#define OMAP3430_PRM_VP1_STATUS		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c4)
-#define OMAP3430_PRM_VP2_CONFIG		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d0)
-#define OMAP3430_PRM_VP2_VSTEPMIN	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d4)
-#define OMAP3430_PRM_VP2_VSTEPMAX	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d8)
-#define OMAP3430_PRM_VP2_VLIMITTO	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00dc)
-#define OMAP3430_PRM_VP2_VOLTAGE	OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e0)
-#define OMAP3430_PRM_VP2_STATUS		OMAP_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e4)
-
-#define OMAP3430_PRM_CLKSEL		OMAP_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0040)
-#define OMAP3430_PRM_CLKOUT_CTRL	OMAP_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0070)
+#define OMAP2_PRCM_REVISION_OFFSET	0x0000
+#define OMAP2420_PRCM_REVISION		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0000)
+#define OMAP2_PRCM_SYSCONFIG_OFFSET	0x0010
+#define OMAP2420_PRCM_SYSCONFIG		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0010)
+
+#define OMAP2_PRCM_IRQSTATUS_MPU_OFFSET	0x0018
+#define OMAP2420_PRCM_IRQSTATUS_MPU	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0018)
+#define OMAP2_PRCM_IRQENABLE_MPU_OFFSET	0x001c
+#define OMAP2420_PRCM_IRQENABLE_MPU	OMAP2420_PRM_REGADDR(OCP_MOD, 0x001c)
+
+#define OMAP2_PRCM_VOLTCTRL_OFFSET	0x0050
+#define OMAP2420_PRCM_VOLTCTRL		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0050)
+#define OMAP2_PRCM_VOLTST_OFFSET	0x0054
+#define OMAP2420_PRCM_VOLTST		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0054)
+#define OMAP2_PRCM_CLKSRC_CTRL_OFFSET	0x0060
+#define OMAP2420_PRCM_CLKSRC_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0060)
+#define OMAP2_PRCM_CLKOUT_CTRL_OFFSET	0x0070
+#define OMAP2420_PRCM_CLKOUT_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0070)
+#define OMAP2_PRCM_CLKEMUL_CTRL_OFFSET	0x0078
+#define OMAP2420_PRCM_CLKEMUL_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0078)
+#define OMAP2_PRCM_CLKCFG_CTRL_OFFSET	0x0080
+#define OMAP2420_PRCM_CLKCFG_CTRL	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0080)
+#define OMAP2_PRCM_CLKCFG_STATUS_OFFSET	0x0084
+#define OMAP2420_PRCM_CLKCFG_STATUS	OMAP2420_PRM_REGADDR(OCP_MOD, 0x0084)
+#define OMAP2_PRCM_VOLTSETUP_OFFSET	0x0090
+#define OMAP2420_PRCM_VOLTSETUP		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0090)
+#define OMAP2_PRCM_CLKSSETUP_OFFSET	0x0094
+#define OMAP2420_PRCM_CLKSSETUP		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0094)
+#define OMAP2_PRCM_POLCTRL_OFFSET	0x0098
+#define OMAP2420_PRCM_POLCTRL		OMAP2420_PRM_REGADDR(OCP_MOD, 0x0098)
+
+#define OMAP2430_PRCM_REVISION		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0000)
+#define OMAP2430_PRCM_SYSCONFIG		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0010)
+
+#define OMAP2430_PRCM_IRQSTATUS_MPU	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0018)
+#define OMAP2430_PRCM_IRQENABLE_MPU	OMAP2430_PRM_REGADDR(OCP_MOD, 0x001c)
+
+#define OMAP2430_PRCM_VOLTCTRL		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0050)
+#define OMAP2430_PRCM_VOLTST		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0054)
+#define OMAP2430_PRCM_CLKSRC_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0060)
+#define OMAP2430_PRCM_CLKOUT_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0070)
+#define OMAP2430_PRCM_CLKEMUL_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0078)
+#define OMAP2430_PRCM_CLKCFG_CTRL	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0080)
+#define OMAP2430_PRCM_CLKCFG_STATUS	OMAP2430_PRM_REGADDR(OCP_MOD, 0x0084)
+#define OMAP2430_PRCM_VOLTSETUP		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0090)
+#define OMAP2430_PRCM_CLKSSETUP		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0094)
+#define OMAP2430_PRCM_POLCTRL		OMAP2430_PRM_REGADDR(OCP_MOD, 0x0098)
+
+#define OMAP3_PRM_REVISION_OFFSET	0x0004
+#define OMAP3430_PRM_REVISION		OMAP34XX_PRM_REGADDR(OCP_MOD, 0x0004)
+#define OMAP3_PRM_SYSCONFIG_OFFSET	0x0014
+#define OMAP3430_PRM_SYSCONFIG		OMAP34XX_PRM_REGADDR(OCP_MOD, 0x0014)
+
+#define OMAP3_PRM_IRQSTATUS_MPU_OFFSET	0x0018
+#define OMAP3430_PRM_IRQSTATUS_MPU	OMAP34XX_PRM_REGADDR(OCP_MOD, 0x0018)
+#define OMAP3_PRM_IRQENABLE_MPU_OFFSET	0x001c
+#define OMAP3430_PRM_IRQENABLE_MPU	OMAP34XX_PRM_REGADDR(OCP_MOD, 0x001c)
+
+
+#define OMAP3_PRM_VC_SMPS_SA_OFFSET	0x0020
+#define OMAP3430_PRM_VC_SMPS_SA		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0020)
+#define OMAP3_PRM_VC_SMPS_VOL_RA_OFFSET	0x0024
+#define OMAP3430_PRM_VC_SMPS_VOL_RA	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0024)
+#define OMAP3_PRM_VC_SMPS_CMD_RA_OFFSET	0x0028
+#define OMAP3430_PRM_VC_SMPS_CMD_RA	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0028)
+#define OMAP3_PRM_VC_CMD_VAL_0_OFFSET	0x002c
+#define OMAP3430_PRM_VC_CMD_VAL_0	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x002c)
+#define OMAP3_PRM_VC_CMD_VAL_1_OFFSET	0x0030
+#define OMAP3430_PRM_VC_CMD_VAL_1	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0030)
+#define OMAP3_PRM_VC_CH_CONF_OFFSET	0x0034
+#define OMAP3430_PRM_VC_CH_CONF		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0034)
+#define OMAP3_PRM_VC_I2C_CFG_OFFSET	0x0038
+#define OMAP3430_PRM_VC_I2C_CFG		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0038)
+#define OMAP3_PRM_VC_BYPASS_VAL_OFFSET	0x003c
+#define OMAP3430_PRM_VC_BYPASS_VAL	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x003c)
+#define OMAP3_PRM_RSTCTRL_OFFSET	0x0050
+#define OMAP3430_PRM_RSTCTRL		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0050)
+#define OMAP3_PRM_RSTTIME_OFFSET	0x0054
+#define OMAP3430_PRM_RSTTIME		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0054)
+#define OMAP3_PRM_RSTST_OFFSET	0x0058
+#define OMAP3430_PRM_RSTST		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0058)
+#define OMAP3_PRM_VOLTCTRL_OFFSET	0x0060
+#define OMAP3430_PRM_VOLTCTRL		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0060)
+#define OMAP3_PRM_SRAM_PCHARGE_OFFSET	0x0064
+#define OMAP3430_PRM_SRAM_PCHARGE	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0064)
+#define OMAP3_PRM_CLKSRC_CTRL_OFFSET	0x0070
+#define OMAP3430_PRM_CLKSRC_CTRL	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0070)
+#define OMAP3_PRM_VOLTSETUP1_OFFSET	0x0090
+#define OMAP3430_PRM_VOLTSETUP1		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0090)
+#define OMAP3_PRM_VOLTOFFSET_OFFSET	0x0094
+#define OMAP3430_PRM_VOLTOFFSET		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0094)
+#define OMAP3_PRM_CLKSETUP_OFFSET	0x0098
+#define OMAP3430_PRM_CLKSETUP		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x0098)
+#define OMAP3_PRM_POLCTRL_OFFSET	0x009c
+#define OMAP3430_PRM_POLCTRL		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x009c)
+#define OMAP3_PRM_VOLTSETUP2_OFFSET	0x00a0
+#define OMAP3430_PRM_VOLTSETUP2		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00a0)
+#define OMAP3_PRM_VP1_CONFIG_OFFSET	0x00b0
+#define OMAP3430_PRM_VP1_CONFIG		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b0)
+#define OMAP3_PRM_VP1_VSTEPMIN_OFFSET	0x00b4
+#define OMAP3430_PRM_VP1_VSTEPMIN	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b4)
+#define OMAP3_PRM_VP1_VSTEPMAX_OFFSET	0x00b8
+#define OMAP3430_PRM_VP1_VSTEPMAX	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00b8)
+#define OMAP3_PRM_VP1_VLIMITTO_OFFSET	0x00bc
+#define OMAP3430_PRM_VP1_VLIMITTO	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00bc)
+#define OMAP3_PRM_VP1_VOLTAGE_OFFSET	0x00c0
+#define OMAP3430_PRM_VP1_VOLTAGE	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c0)
+#define OMAP3_PRM_VP1_STATUS_OFFSET	0x00c4
+#define OMAP3430_PRM_VP1_STATUS		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00c4)
+#define OMAP3_PRM_VP2_CONFIG_OFFSET	0x00d0
+#define OMAP3430_PRM_VP2_CONFIG		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d0)
+#define OMAP3_PRM_VP2_VSTEPMIN_OFFSET	0x00d4
+#define OMAP3430_PRM_VP2_VSTEPMIN	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d4)
+#define OMAP3_PRM_VP2_VSTEPMAX_OFFSET	0x00d8
+#define OMAP3430_PRM_VP2_VSTEPMAX	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00d8)
+#define OMAP3_PRM_VP2_VLIMITTO_OFFSET	0x00dc
+#define OMAP3430_PRM_VP2_VLIMITTO	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00dc)
+#define OMAP3_PRM_VP2_VOLTAGE_OFFSET	0x00e0
+#define OMAP3430_PRM_VP2_VOLTAGE	OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e0)
+#define OMAP3_PRM_VP2_STATUS_OFFSET	0x00e4
+#define OMAP3430_PRM_VP2_STATUS		OMAP34XX_PRM_REGADDR(OMAP3430_GR_MOD, 0x00e4)
+
+#define OMAP3_PRM_CLKSEL_OFFSET	0x0040
+#define OMAP3430_PRM_CLKSEL		OMAP34XX_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0040)
+#define OMAP3_PRM_CLKOUT_CTRL_OFFSET	0x0070
+#define OMAP3430_PRM_CLKOUT_CTRL	OMAP34XX_PRM_REGADDR(OMAP3430_CCR_MOD, 0x0070)
 
 /*
  * Module specific PRM registers from PRM_BASE + domain offset
diff --git a/arch/arm/mach-omap2/sdrc2xxx.c b/arch/arm/mach-omap2/sdrc2xxx.c
index 0afdad5..feaec7e 100644
--- a/arch/arm/mach-omap2/sdrc2xxx.c
+++ b/arch/arm/mach-omap2/sdrc2xxx.c
@@ -99,7 +99,10 @@ u32 omap2xxx_sdrc_reprogram(u32 level, u32 force)
 	m_type = omap2xxx_sdrc_get_type();
 
 	local_irq_save(flags);
-	__raw_writel(0xffff, OMAP24XX_PRCM_VOLTSETUP);
+	if (cpu_is_omap2420())
+		__raw_writel(0xffff, OMAP2420_PRCM_VOLTSETUP);
+	else
+		__raw_writel(0xffff, OMAP2430_PRCM_VOLTSETUP);
 	omap2_sram_reprogram_sdrc(level, dll_ctrl, m_type);
 	curr_perf_level = level;
 	local_irq_restore(flags);
diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
index cbb8930..bb29985 100644
--- a/arch/arm/mach-omap2/sram242x.S
+++ b/arch/arm/mach-omap2/sram242x.S
@@ -124,7 +124,7 @@ omap242x_sdi_cm_clksel2_pll:
 omap242x_sdi_sdrc_dlla_ctrl:
 	.word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL)
 omap242x_sdi_prcm_voltctrl:
-	.word OMAP242X_PRCM_VOLTCTRL
+	.word OMAP2420_PRCM_VOLTCTRL
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_sdi_timer_32ksynct_cr:
@@ -220,7 +220,7 @@ omap242x_srs_sdrc_dlla_ctrl:
 omap242x_srs_sdrc_rfr_ctrl:
 	.word OMAP242X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
 omap242x_srs_prcm_voltctrl:
-	.word OMAP242X_PRCM_VOLTCTRL
+	.word OMAP2420_PRCM_VOLTCTRL
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap242x_srs_timer_32ksynct:
@@ -305,7 +305,7 @@ wait_dll_lock:
 	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
 
 omap242x_ssp_set_config:
-	.word OMAP242X_PRCM_CLKCFG_CTRL
+	.word OMAP2420_PRCM_CLKCFG_CTRL
 omap242x_ssp_pll_ctl:
 	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKEN)
 omap242x_ssp_pll_stat:
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
index 054b0f3..9955abc 100644
--- a/arch/arm/mach-omap2/sram243x.S
+++ b/arch/arm/mach-omap2/sram243x.S
@@ -124,7 +124,7 @@ omap243x_sdi_cm_clksel2_pll:
 omap243x_sdi_sdrc_dlla_ctrl:
 	.word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL)
 omap243x_sdi_prcm_voltctrl:
-	.word OMAP243X_PRCM_VOLTCTRL
+	.word OMAP2430_PRCM_VOLTCTRL
 prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_sdi_timer_32ksynct_cr:
@@ -220,7 +220,7 @@ omap243x_srs_sdrc_dlla_ctrl:
 omap243x_srs_sdrc_rfr_ctrl:
 	.word OMAP243X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
 omap243x_srs_prcm_voltctrl:
-	.word OMAP243X_PRCM_VOLTCTRL
+	.word OMAP2430_PRCM_VOLTCTRL
 ddr_prcm_mask_val:
 	.word 0xFFFF3FFC
 omap243x_srs_timer_32ksynct:
@@ -305,7 +305,7 @@ wait_dll_lock:
 	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
 
 omap243x_ssp_set_config:
-	.word OMAP243X_PRCM_CLKCFG_CTRL
+	.word OMAP2430_PRCM_CLKCFG_CTRL
 omap243x_ssp_pll_ctl:
 	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKEN)
 omap243x_ssp_pll_stat:
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 4202cf4..10a6413 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -89,14 +89,12 @@
 
 #define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
-#define OMAP2_PRM_BASE		OMAP2420_PRM_BASE
 #define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
 #define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
-#define OMAP2_PRM_BASE		OMAP2430_PRM_BASE
 #define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #endif
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index 581d910..4233064 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -86,7 +86,6 @@
 #if defined(CONFIG_ARCH_OMAP3430)
 
 #define OMAP2_CM_BASE			OMAP3430_CM_BASE
-#define OMAP2_PRM_BASE			OMAP3430_PRM_BASE
 #define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 
 #endif
-- 
cgit v0.10.2


From 2e693f841f70508a082ded0276a33b2e503d98b7 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:26:45 -0700
Subject: ARM: OMAP2/3: Move define of OMAP2_VA_IC_BASE to be local to
 entry-macro.S

Move define of OMAP2_VA_IC_BASE to be local to entry-macro.S

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/include/mach/entry-macro.S b/arch/arm/plat-omap/include/mach/entry-macro.S
index 2276f89..33256a0 100644
--- a/arch/arm/plat-omap/include/mach/entry-macro.S
+++ b/arch/arm/plat-omap/include/mach/entry-macro.S
@@ -58,11 +58,14 @@
 #endif
 #if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
 
-#if defined(CONFIG_ARCH_OMAP24XX)
 #include <mach/omap24xx.h>
-#endif
-#if defined(CONFIG_ARCH_OMAP34XX)
 #include <mach/omap34xx.h>
+
+/* REVISIT: This should be set dynamically if CONFIG_MULTI_OMAP2 is selected */
+#if defined(CONFIG_ARCH_OMAP2420) || defined(CONFIG_ARCH_OMAP2430)
+#define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP24XX_IC_BASE)
+#elif defined(CONFIG_ARCH_OMAP34XX)
+#define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 #endif
 
 #define INTCPS_SIR_IRQ_OFFSET	0x0040		/* Active interrupt offset */
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 10a6413..4ce9b6a 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -89,13 +89,11 @@
 
 #define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
-#define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
 #define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
-#define OMAP2_VA_IC_BASE	IO_ADDRESS(OMAP24XX_IC_BASE)
 
 #endif
 
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index 4233064..36c99ca 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -86,7 +86,6 @@
 #if defined(CONFIG_ARCH_OMAP3430)
 
 #define OMAP2_CM_BASE			OMAP3430_CM_BASE
-#define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 
 #endif
 
-- 
cgit v0.10.2


From 07b95d000d48bd44e6f573566c1eba985ef25ed4 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:26:46 -0700
Subject: ARM: OMAP2/3: Remove OMAP2_PRCM_BASE

It's currently unused, and processor specific defines should
be used instead.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 4ce9b6a..60f8324 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -87,12 +87,10 @@
 
 #if defined(CONFIG_ARCH_OMAP2420)
 
-#define OMAP2_PRCM_BASE		OMAP2420_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2420_CM_BASE
 
 #elif defined(CONFIG_ARCH_OMAP2430)
 
-#define OMAP2_PRCM_BASE		OMAP2430_PRCM_BASE
 #define OMAP2_CM_BASE		OMAP2430_CM_BASE
 
 #endif
-- 
cgit v0.10.2


From ef6685a6ded6a20ff33a868cc2c0dba0505b7e4c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:26:46 -0700
Subject: ARM: OMAP2/3: Remove OMAP_CM_REGADDR

Processor specific macros should be used instead.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h
index f0e4480..458f00c 100644
--- a/arch/arm/mach-omap2/clock24xx.h
+++ b/arch/arm/mach-omap2/clock24xx.h
@@ -26,9 +26,11 @@
 
 /* REVISIT: These should be set dynamically for CONFIG_MULTI_OMAP2 */
 #ifdef CONFIG_ARCH_OMAP2420
+#define OMAP_CM_REGADDR			OMAP2420_CM_REGADDR
 #define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2420_PRCM_CLKOUT_CTRL
 #define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2420_PRCM_CLKEMUL_CTRL
 #else
+#define OMAP_CM_REGADDR			OMAP2430_CM_REGADDR
 #define OMAP24XX_PRCM_CLKOUT_CTRL	OMAP2430_PRCM_CLKOUT_CTRL
 #define OMAP24XX_PRCM_CLKEMUL_CTRL	OMAP2430_PRCM_CLKEMUL_CTRL
 #endif
diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h
index 017a30e..496f0e9 100644
--- a/arch/arm/mach-omap2/clock34xx.h
+++ b/arch/arm/mach-omap2/clock34xx.h
@@ -27,6 +27,8 @@
 #include "prm.h"
 #include "prm-regbits-34xx.h"
 
+#define OMAP_CM_REGADDR		OMAP34XX_CM_REGADDR
+
 static unsigned long omap3_dpll_recalc(struct clk *clk);
 static unsigned long omap3_clkoutx2_recalc(struct clk *clk);
 static void omap3_dpll_allow_idle(struct clk *clk);
diff --git a/arch/arm/mach-omap2/cm.h b/arch/arm/mach-omap2/cm.h
index efc082a..1d3c93b 100644
--- a/arch/arm/mach-omap2/cm.h
+++ b/arch/arm/mach-omap2/cm.h
@@ -16,17 +16,12 @@
 
 #include "prcm-common.h"
 
-#ifndef __ASSEMBLER__
-#define OMAP_CM_REGADDR(module, reg)					\
-			IO_ADDRESS(OMAP2_CM_BASE + (module) + (reg))
-#else
 #define OMAP2420_CM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2420_CM_BASE + (module) + (reg))
 #define OMAP2430_CM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP2430_CM_BASE + (module) + (reg))
 #define OMAP34XX_CM_REGADDR(module, reg)				\
 			IO_ADDRESS(OMAP3430_CM_BASE + (module) + (reg))
-#endif
 
 /*
  * Architecture-specific global CM registers
diff --git a/arch/arm/plat-omap/include/mach/omap24xx.h b/arch/arm/plat-omap/include/mach/omap24xx.h
index 60f8324..696edfc 100644
--- a/arch/arm/plat-omap/include/mach/omap24xx.h
+++ b/arch/arm/plat-omap/include/mach/omap24xx.h
@@ -85,15 +85,5 @@
 #define OMAP24XX_SEC_AES_BASE	(OMAP24XX_SEC_BASE + 0x6000)
 #define OMAP24XX_SEC_PKA_BASE	(OMAP24XX_SEC_BASE + 0x8000)
 
-#if defined(CONFIG_ARCH_OMAP2420)
-
-#define OMAP2_CM_BASE		OMAP2420_CM_BASE
-
-#elif defined(CONFIG_ARCH_OMAP2430)
-
-#define OMAP2_CM_BASE		OMAP2430_CM_BASE
-
-#endif
-
 #endif /* __ASM_ARCH_OMAP24XX_H */
 
diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index 36c99ca..cc25733 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -83,12 +83,6 @@
 
 #define OMAP34XX_MAILBOX_BASE		(L4_34XX_BASE + 0x94000)
 
-#if defined(CONFIG_ARCH_OMAP3430)
-
-#define OMAP2_CM_BASE			OMAP3430_CM_BASE
-
-#endif
-
 #define OMAP34XX_DSP_BASE	0x58000000
 #define OMAP34XX_DSP_MEM_BASE	(OMAP34XX_DSP_BASE + 0x0)
 #define OMAP34XX_DSP_IPI_BASE	(OMAP34XX_DSP_BASE + 0x1000000)
-- 
cgit v0.10.2


From d76076636bc69fc28269076b867fc72031634357 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 25 May 2009 11:26:47 -0700
Subject: ARM: OMAP2/3: Reorganize Makefile to add omap4 support

We don't necessarily want to compile in irq.o and sdrc.o for omap4.
Also, clock and prcm may not be implemented initially.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index c49d9bf..bf3827a 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -3,9 +3,14 @@
 #
 
 # Common support
-obj-y := irq.o id.o io.o sdrc.o control.o prcm.o clock.o mux.o \
-		devices.o serial.o gpmc.o timer-gp.o powerdomain.o \
-		clockdomain.o
+obj-y := id.o io.o control.o mux.o devices.o serial.o gpmc.o timer-gp.o
+
+omap-2-3-common				= irq.o sdrc.o
+prcm-common				= prcm.o powerdomain.o
+clock-common				= clock.o clockdomain.o
+
+obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(prcm-common) $(clock-common)
+obj-$(CONFIG_ARCH_OMAP3) += $(omap-2-3-common) $(prcm-common) $(clock-common)
 
 obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o
 
-- 
cgit v0.10.2


From ce8a17fd3f19baac0171611a73d5ac2072f45c7a Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 25 May 2009 11:26:47 -0700
Subject: ARM: OMAP: Remove unwanted type casts and fix the compiler warning.

This patch fixes the compiler warning "assignment from incompatible
pointer type"  in dmtimer.c and removes the tye casts. These warnings
were suppressed by type catsing.

The proposed fix was suggested by Russell King <rmk+kernel@arm.linux.org.uk>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 55bb996..ee20612 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -214,7 +214,7 @@ static const char *omap2_dm_source_names[] __initdata = {
 	NULL
 };
 
-static struct clk **omap2_dm_source_clocks[3];
+static struct clk *omap2_dm_source_clocks[3];
 static const int dm_timer_count = ARRAY_SIZE(omap2_dm_timers);
 
 #elif defined(CONFIG_ARCH_OMAP3)
@@ -247,7 +247,7 @@ static const char *omap3_dm_source_names[] __initdata = {
 	NULL
 };
 
-static struct clk **omap3_dm_source_clocks[2];
+static struct clk *omap3_dm_source_clocks[2];
 static const int dm_timer_count = ARRAY_SIZE(omap3_dm_timers);
 
 #else
@@ -257,7 +257,7 @@ static const int dm_timer_count = ARRAY_SIZE(omap3_dm_timers);
 #endif
 
 static struct omap_dm_timer *dm_timers;
-static char **dm_source_names;
+static const char **dm_source_names;
 static struct clk **dm_source_clocks;
 
 static spinlock_t dm_timer_lock;
@@ -705,12 +705,12 @@ int __init omap_dm_timer_init(void)
 		dm_timers = omap1_dm_timers;
 	else if (cpu_is_omap24xx()) {
 		dm_timers = omap2_dm_timers;
-		dm_source_names = (char **)omap2_dm_source_names;
-		dm_source_clocks = (struct clk **)omap2_dm_source_clocks;
+		dm_source_names = omap2_dm_source_names;
+		dm_source_clocks = omap2_dm_source_clocks;
 	} else if (cpu_is_omap34xx()) {
 		dm_timers = omap3_dm_timers;
-		dm_source_names = (char **)omap3_dm_source_names;
-		dm_source_clocks = (struct clk **)omap3_dm_source_clocks;
+		dm_source_names = omap3_dm_source_names;
+		dm_source_clocks = omap3_dm_source_clocks;
 	}
 
 	if (cpu_class_is_omap2())
-- 
cgit v0.10.2


From da7a0649bc8f3f65a01cb14e8b5c97e2ff40098a Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 25 May 2009 11:26:48 -0700
Subject: ARM: OMAP: Remove useless omap_sram_error function.

This patch removes fixes omap_sram_error() function and replace the
error paths with BUG_ON.

The proposed fix was suggested by Russell King <rmk+kernel@arm.linux.org.uk>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index fa5297d..e1493d8 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -242,20 +242,13 @@ void * omap_sram_push(void * start, unsigned long size)
 	return (void *)omap_sram_ceil;
 }
 
-static void omap_sram_error(void)
-{
-	panic("Uninitialized SRAM function\n");
-}
-
 #ifdef CONFIG_ARCH_OMAP1
 
 static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);
 
 void omap_sram_reprogram_clock(u32 dpllctl, u32 ckctl)
 {
-	if (!_omap_sram_reprogram_clock)
-		omap_sram_error();
-
+	BUG_ON(!_omap_sram_reprogram_clock);
 	_omap_sram_reprogram_clock(dpllctl, ckctl);
 }
 
@@ -280,9 +273,7 @@ static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
 void omap2_sram_ddr_init(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
 		   u32 base_cs, u32 force_unlock)
 {
-	if (!_omap2_sram_ddr_init)
-		omap_sram_error();
-
+	BUG_ON(!_omap2_sram_ddr_init);
 	_omap2_sram_ddr_init(slow_dll_ctrl, fast_dll_ctrl,
 			     base_cs, force_unlock);
 }
@@ -292,9 +283,7 @@ static void (*_omap2_sram_reprogram_sdrc)(u32 perf_level, u32 dll_val,
 
 void omap2_sram_reprogram_sdrc(u32 perf_level, u32 dll_val, u32 mem_type)
 {
-	if (!_omap2_sram_reprogram_sdrc)
-		omap_sram_error();
-
+	BUG_ON(!_omap2_sram_reprogram_sdrc);
 	_omap2_sram_reprogram_sdrc(perf_level, dll_val, mem_type);
 }
 
@@ -302,9 +291,7 @@ static u32 (*_omap2_set_prcm)(u32 dpll_ctrl_val, u32 sdrc_rfr_val, int bypass);
 
 u32 omap2_set_prcm(u32 dpll_ctrl_val, u32 sdrc_rfr_val, int bypass)
 {
-	if (!_omap2_set_prcm)
-		omap_sram_error();
-
+	BUG_ON(!_omap2_set_prcm);
 	return _omap2_set_prcm(dpll_ctrl_val, sdrc_rfr_val, bypass);
 }
 #endif
@@ -360,9 +347,7 @@ static u32 (*_omap3_sram_configure_core_dpll)(u32 sdrc_rfr_ctrl,
 u32 omap3_configure_core_dpll(u32 sdrc_rfr_ctrl, u32 sdrc_actim_ctrla,
 			      u32 sdrc_actim_ctrlb, u32 m2)
 {
-	if (!_omap3_sram_configure_core_dpll)
-		omap_sram_error();
-
+	BUG_ON(!_omap3_sram_configure_core_dpll);
 	return _omap3_sram_configure_core_dpll(sdrc_rfr_ctrl,
 					       sdrc_actim_ctrla,
 					       sdrc_actim_ctrlb, m2);
-- 
cgit v0.10.2


From 8f9ccfeeb2cecb54dd093119291ab271ab0fd94a Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Mon, 25 May 2009 11:26:48 -0700
Subject: ARM: OMAP: Remove unnecessary omap2_globals.

This patch removes unnecessary omap2_globals and pass the global structures
directly as function argument.

The proposed cleanup was suggested by Russell King <rmk+kernel@arm.linux.org.uk>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index e1add78..70b68ef 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -287,9 +287,7 @@ arch_initcall(omap_init_clocksource_32k);
 
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 
-static struct omap_globals *omap2_globals;
-
-static void __init __omap2_set_globals(void)
+static void __init __omap2_set_globals(struct omap_globals *omap2_globals)
 {
 	omap2_set_globals_tap(omap2_globals);
 	omap2_set_globals_sdrc(omap2_globals);
@@ -313,8 +311,7 @@ static struct omap_globals omap242x_globals = {
 
 void __init omap2_set_globals_242x(void)
 {
-	omap2_globals = &omap242x_globals;
-	__omap2_set_globals();
+	__omap2_set_globals(&omap242x_globals);
 }
 #endif
 
@@ -332,8 +329,7 @@ static struct omap_globals omap243x_globals = {
 
 void __init omap2_set_globals_243x(void)
 {
-	omap2_globals = &omap243x_globals;
-	__omap2_set_globals();
+	__omap2_set_globals(&omap243x_globals);
 }
 #endif
 
@@ -351,8 +347,7 @@ static struct omap_globals omap343x_globals = {
 
 void __init omap2_set_globals_343x(void)
 {
-	omap2_globals = &omap343x_globals;
-	__omap2_set_globals();
+	__omap2_set_globals(&omap343x_globals);
 }
 #endif
 
-- 
cgit v0.10.2


From ff99be573e02e9f7edc23b472c7f9a5ddba12795 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 17:39:03 +0200
Subject: perf_counter: x86: Expose INV and EDGE bits

Expose the INV and EDGE bits of the PMU to raw configs.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.494709027@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6cc1660..c14437f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -87,11 +87,15 @@ static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
 #define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
 #define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK 		\
 	(CORE_EVNTSEL_EVENT_MASK |	\
 	 CORE_EVNTSEL_UNIT_MASK  |	\
+	 CORE_EVNTSEL_EDGE_MASK  |	\
+	 CORE_EVNTSEL_INV_MASK  |	\
 	 CORE_EVNTSEL_COUNTER_MASK)
 
 	return event & CORE_EVNTSEL_MASK;
@@ -119,11 +123,15 @@ static u64 amd_pmu_raw_event(u64 event)
 {
 #define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
 #define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
 
 #define K7_EVNTSEL_MASK			\
 	(K7_EVNTSEL_EVENT_MASK |	\
 	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
 	 K7_EVNTSEL_COUNTER_MASK)
 
 	return event & K7_EVNTSEL_MASK;
-- 
cgit v0.10.2


From 48e22d56ecdeddd1ffb42a02fccba5c6ef42b133 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 17:39:04 +0200
Subject: perf_counter: x86: Remove interrupt throttle

remove the x86 specific interrupt throttle

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.616671838@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b4f6440..89b63b5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -763,8 +763,6 @@ static void local_apic_timer_interrupt(void)
 	inc_irq_stat(apic_timer_irqs);
 
 	evt->event_handler(evt);
-
-	perf_counter_unthrottle();
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index c14437f..8c8177f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -719,11 +719,6 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
 }
 
 /*
- * Maximum interrupt frequency of 100KHz per CPU
- */
-#define PERFMON_MAX_INTERRUPTS (100000/HZ)
-
-/*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
  */
@@ -775,15 +770,14 @@ again:
 	if (status)
 		goto again;
 
-	if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS)
-		perf_enable();
+	perf_enable();
 
 	return 1;
 }
 
 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
-	int cpu, idx, throttle = 0, handled = 0;
+	int cpu, idx, handled = 0;
 	struct cpu_hw_counters *cpuc;
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
@@ -792,16 +786,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
-		throttle = 1;
-		__perf_disable();
-		cpuc->enabled = 0;
-		barrier();
-	}
-
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		int disable = 0;
-
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
@@ -809,45 +794,23 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 		hwc = &counter->hw;
 
 		if (counter->hw_event.nmi != nmi)
-			goto next;
+			continue;
 
 		val = x86_perf_counter_update(counter, hwc, idx);
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			goto next;
+			continue;
 
 		/* counter overflow */
 		x86_perf_counter_set_period(counter, hwc, idx);
 		handled = 1;
 		inc_irq_stat(apic_perf_irqs);
-		disable = perf_counter_overflow(counter, nmi, regs, 0);
-
-next:
-		if (disable || throttle)
+		if (perf_counter_overflow(counter, nmi, regs, 0))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
 	return handled;
 }
 
-void perf_counter_unthrottle(void)
-{
-	struct cpu_hw_counters *cpuc;
-
-	if (!x86_pmu_initialized())
-		return;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
-		/*
-		 * Clear them before re-enabling irqs/NMIs again:
-		 */
-		cpuc->interrupts = 0;
-		perf_enable();
-	} else {
-		cpuc->interrupts = 0;
-	}
-}
-
 void smp_perf_counter_interrupt(struct pt_regs *regs)
 {
 	irq_enter();
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d3e85de..0c160be 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -570,7 +570,6 @@ extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
-extern void perf_counter_unthrottle(void);
 extern void __perf_disable(void);
 extern bool __perf_enable(void);
 extern void perf_disable(void);
@@ -635,7 +634,6 @@ static inline int perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
-static inline void perf_counter_unthrottle(void)			{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-- 
cgit v0.10.2


From a78ac3258782f3e64cb40beb5990808e1febcc0c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 17:39:05 +0200
Subject: perf_counter: Generic per counter interrupt throttle

Introduce a generic per counter interrupt throttle.

This uses the perf_counter_overflow() quick disable to throttle a specific
counter when its going too fast when a pmu->unthrottle() method is provided
which can undo the quick disable.

Power needs to implement both the quick disable and the unthrottle method.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.703093461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8c8177f..c4b543d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -623,6 +623,18 @@ try_generic:
 	return 0;
 }
 
+static void x86_pmu_unthrottle(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+
+	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+				cpuc->counters[hwc->idx] != counter))
+		return;
+
+	x86_pmu.enable(hwc, hwc->idx);
+}
+
 void perf_counter_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
@@ -1038,6 +1050,7 @@ static const struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
 	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
 };
 
 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0c160be..e3a7585 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -267,6 +267,15 @@ enum perf_event_type {
 	PERF_EVENT_PERIOD		= 4,
 
 	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				time;
+	 * };
+	 */
+	PERF_EVENT_THROTTLE		= 5,
+	PERF_EVENT_UNTHROTTLE		= 6,
+
+	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
 	 *
@@ -367,6 +376,7 @@ struct pmu {
 	int (*enable)			(struct perf_counter *counter);
 	void (*disable)			(struct perf_counter *counter);
 	void (*read)			(struct perf_counter *counter);
+	void (*unthrottle)		(struct perf_counter *counter);
 };
 
 /**
@@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
 extern int sysctl_perf_counter_mlock;
+extern int sysctl_perf_counter_limit;
 
 extern void perf_counter_init(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 14b1fe9..ec9c4007 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,6 +46,7 @@ static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
+int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1066,12 +1067,15 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 }
 
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_counter *counter, int enable);
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
 static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
-	u64 irq_period;
+	u64 interrupts, irq_period;
 	u64 events, period;
 	s64 delta;
 
@@ -1080,10 +1084,19 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
+		interrupts = counter->hw.interrupts;
+		counter->hw.interrupts = 0;
+
+		if (interrupts == MAX_INTERRUPTS) {
+			perf_log_throttle(counter, 1);
+			counter->pmu->unthrottle(counter);
+			interrupts = 2*sysctl_perf_counter_limit/HZ;
+		}
+
 		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
 			continue;
 
-		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		events = HZ * interrupts * counter->hw.irq_period;
 		period = div64_u64(events, counter->hw_event.irq_freq);
 
 		delta = (s64)(1 + period - counter->hw.irq_period);
@@ -1097,7 +1110,6 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		perf_log_period(counter, irq_period);
 
 		counter->hw.irq_period = irq_period;
-		counter->hw.interrupts = 0;
 	}
 	spin_unlock(&ctx->lock);
 }
@@ -2544,6 +2556,35 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 }
 
 /*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_counter *counter, int enable)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+	} throttle_event = {
+		.header = {
+			.type = PERF_EVENT_THROTTLE + 1,
+			.misc = 0,
+			.size = sizeof(throttle_event),
+		},
+		.time = sched_clock(),
+	};
+
+	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, throttle_event);
+	perf_output_end(&handle);
+}
+
+/*
  * Generic counter overflow handling.
  */
 
@@ -2551,9 +2592,19 @@ int perf_counter_overflow(struct perf_counter *counter,
 			  int nmi, struct pt_regs *regs, u64 addr)
 {
 	int events = atomic_read(&counter->event_limit);
+	int throttle = counter->pmu->unthrottle != NULL;
 	int ret = 0;
 
-	counter->hw.interrupts++;
+	if (!throttle) {
+		counter->hw.interrupts++;
+	} else if (counter->hw.interrupts != MAX_INTERRUPTS) {
+		counter->hw.interrupts++;
+		if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
+			counter->hw.interrupts = MAX_INTERRUPTS;
+			perf_log_throttle(counter, 0);
+			ret = 1;
+		}
+	}
 
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3cb1849..0c4bf86 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -930,6 +930,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_int_limit",
+		.data		= &sysctl_perf_counter_limit,
+		.maxlen		= sizeof(sysctl_perf_counter_limit),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 /*
  * NOTE: do not add new entries to this table unless you have read
-- 
cgit v0.10.2


From 53b441a565bf4036ab49c8ea04c5ad06ace7dd6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 25 May 2009 21:41:28 +0200
Subject: Revert "perf_counter, x86: speed up the scheduling fast-path"

This reverts commit b68f1d2e7aa21029d73c7d453a8046e95d351740.

It is causing problems (stuck/stuttering profiling) - when mixed
NMI and non-NMI counters are used.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.703093461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index c4b543d..189bf9d 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -293,7 +293,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 			return -EACCES;
 		hwc->nmi = 1;
 	}
-	perf_counters_lapic_init(hwc->nmi);
 
 	if (!hwc->irq_period)
 		hwc->irq_period = x86_pmu.max_period;
@@ -612,6 +611,8 @@ try_generic:
 		hwc->counter_base = x86_pmu.perfctr;
 	}
 
+	perf_counters_lapic_init(hwc->nmi);
+
 	x86_pmu.disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
@@ -1037,7 +1038,7 @@ void __init init_hw_perf_counters(void)
 
 	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
 
-	perf_counters_lapic_init(1);
+	perf_counters_lapic_init(0);
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-- 
cgit v0.10.2


From 0127c3ea082ee9f1034789b978dfc7fd83254617 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 25 May 2009 22:03:26 +0200
Subject: perf_counter: fix warning & lockup

 - remove bogus warning
 - fix wakeup from NMI path lockup
 - also fix up whitespace noise in perf_counter.h

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.703093461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e3a7585..2b16ed3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -73,7 +73,7 @@ enum sw_event_ids {
 	PERF_SW_EVENTS_MAX		= 7,
 };
 
-#define __PERF_COUNTER_MASK(name) 			\
+#define __PERF_COUNTER_MASK(name)			\
 	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
 	 PERF_COUNTER_##name##_SHIFT)
 
@@ -98,14 +98,14 @@ enum sw_event_ids {
  * in the overflow packets.
  */
 enum perf_counter_record_format {
-	PERF_RECORD_IP		= 1U << 0,
-	PERF_RECORD_TID		= 1U << 1,
-	PERF_RECORD_TIME	= 1U << 2,
-	PERF_RECORD_ADDR	= 1U << 3,
-	PERF_RECORD_GROUP	= 1U << 4,
-	PERF_RECORD_CALLCHAIN	= 1U << 5,
-	PERF_RECORD_CONFIG	= 1U << 6,
-	PERF_RECORD_CPU		= 1U << 7,
+	PERF_RECORD_IP			= 1U << 0,
+	PERF_RECORD_TID			= 1U << 1,
+	PERF_RECORD_TIME		= 1U << 2,
+	PERF_RECORD_ADDR		= 1U << 3,
+	PERF_RECORD_GROUP		= 1U << 4,
+	PERF_RECORD_CALLCHAIN		= 1U << 5,
+	PERF_RECORD_CONFIG		= 1U << 6,
+	PERF_RECORD_CPU			= 1U << 7,
 };
 
 /*
@@ -235,13 +235,13 @@ enum perf_event_type {
 	 * correlate userspace IPs to code. They have the following structure:
 	 *
 	 * struct {
-	 * 	struct perf_event_header	header;
+	 *	struct perf_event_header	header;
 	 *
-	 * 	u32				pid, tid;
-	 * 	u64				addr;
-	 * 	u64				len;
-	 * 	u64				pgoff;
-	 * 	char				filename[];
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
 	 * };
 	 */
 	PERF_EVENT_MMAP			= 1,
@@ -249,27 +249,27 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
+	 *	struct perf_event_header	header;
 	 *
-	 * 	u32				pid, tid;
-	 * 	char				comm[];
+	 *	u32				pid, tid;
+	 *	char				comm[];
 	 * };
 	 */
 	PERF_EVENT_COMM			= 3,
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				time;
-	 * 	u64				irq_period;
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				irq_period;
 	 * };
 	 */
 	PERF_EVENT_PERIOD		= 4,
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				time;
+	 *	struct perf_event_header	header;
+	 *	u64				time;
 	 * };
 	 */
 	PERF_EVENT_THROTTLE		= 5,
@@ -280,23 +280,23 @@ enum perf_event_type {
 	 * will be PERF_RECORD_*
 	 *
 	 * struct {
-	 * 	struct perf_event_header	header;
+	 *	struct perf_event_header	header;
 	 *
-	 * 	{ u64			ip;	  } && PERF_RECORD_IP
-	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
-	 * 	{ u64			time;     } && PERF_RECORD_TIME
-	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
-	 * 	{ u64			config;   } && PERF_RECORD_CONFIG
-	 * 	{ u32			cpu, res; } && PERF_RECORD_CPU
+	 *	{ u64			ip;	  } && PERF_RECORD_IP
+	 *	{ u32			pid, tid; } && PERF_RECORD_TID
+	 *	{ u64			time;     } && PERF_RECORD_TIME
+	 *	{ u64			addr;     } && PERF_RECORD_ADDR
+	 *	{ u64			config;   } && PERF_RECORD_CONFIG
+	 *	{ u32			cpu, res; } && PERF_RECORD_CPU
 	 *
-	 * 	{ u64			nr;
-	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
+	 *	{ u64			nr;
+	 *	  { u64 event, val; }	cnt[nr];  } && PERF_RECORD_GROUP
 	 *
-	 * 	{ u16			nr,
-	 * 				hv,
-	 * 				kernel,
-	 * 				user;
-	 * 	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
+	 *	{ u16			nr,
+	 *				hv,
+	 *				kernel,
+	 *				user;
+	 *	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
 	 * };
 	 */
 };
@@ -406,7 +406,7 @@ struct perf_mmap_data {
 	atomic_t			wakeup;		/* needs a wakeup    */
 
 	struct perf_counter_mmap_page   *user_page;
-	void 				*data_pages[0];
+	void				*data_pages[0];
 };
 
 struct perf_pending_entry {
@@ -422,7 +422,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
-	int 				nr_siblings;
+	int				nr_siblings;
 	struct perf_counter		*group_leader;
 	const struct pmu		*pmu;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ec9c4007..070f92d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2576,7 +2576,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 		.time = sched_clock(),
 	};
 
-	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0);
+	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
 	if (ret)
 		return;
 
@@ -3449,8 +3449,6 @@ void perf_counter_exit_task(struct task_struct *child)
 	struct perf_counter_context *child_ctx;
 	unsigned long flags;
 
-	WARN_ON_ONCE(child != current);
-
 	child_ctx = child->perf_counter_ctxp;
 
 	if (likely(!child_ctx))
-- 
cgit v0.10.2


From 4f5359685af6de7dca101393dc606620adbe963f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 18 May 2009 19:35:34 +0800
Subject: tracing: add trace_event_read_lock()

I found that there is nothing to protect event_hash in
ftrace_find_event(). Rcu protects the event hashlist
but not the event itself while we use it after its extraction
through ftrace_find_event().

This lack of a proper locking in this spot opens a race
window between any event dereferencing and module removal.

Eg:

--Task A--

print_trace_line(trace) {
  event = find_ftrace_event(trace)

--Task B--

trace_module_remove_events(mod) {
  list_trace_events_module(ev, mod) {
    unregister_ftrace_event(ev->event) {
      hlist_del(ev->event->node)
        list_del(....)
    }
  }
}
|--> module removed, the event has been dropped

--Task A--

  event->print(trace); // Dereferencing freed memory

If the event retrieved belongs to a module and this module
is concurrently removed, we may end up dereferencing a data
from a freed module.

RCU could solve this, but it would add latency to the kernel and
forbid tracers output callbacks to call any sleepable code.
So this fix converts 'trace_event_mutex' to a read/write semaphore,
and adds trace_event_read_lock() to protect ftrace_find_event().

[ Impact: fix possible freed memory dereference in ftrace ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <4A114806.7090302@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dd40d23..02d32ba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1569,12 +1569,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		p = s_next(m, p, &l);
 	}
 
+	trace_event_read_lock();
 	return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
 	atomic_dec(&trace_record_cmdline_disabled);
+	trace_event_read_unlock();
 }
 
 static void print_lat_help_header(struct seq_file *m)
@@ -1817,6 +1819,7 @@ static int trace_empty(struct trace_iterator *iter)
 	return 1;
 }
 
+/*  Called with trace_event_read_lock() held. */
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
 	enum print_line_t ret;
@@ -3008,6 +3011,7 @@ waitagain:
 	       offsetof(struct trace_iterator, seq));
 	iter->pos = -1;
 
+	trace_event_read_lock();
 	while (find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
 		int len = iter->seq.len;
@@ -3024,6 +3028,7 @@ waitagain:
 		if (iter->seq.len >= cnt)
 			break;
 	}
+	trace_event_read_unlock();
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -3146,6 +3151,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		goto out_err;
 	}
 
+	trace_event_read_lock();
+
 	/* Fill as many pages as possible. */
 	for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
 		pages[i] = alloc_page(GFP_KERNEL);
@@ -3168,6 +3175,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		trace_seq_init(&iter->seq);
 	}
 
+	trace_event_read_unlock();
 	mutex_unlock(&iter->mutex);
 
 	spd.nr_pages = i;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 489c0e8..7136420 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -14,7 +14,7 @@
 /* must be a power of 2 */
 #define EVENT_HASHSIZE	128
 
-static DEFINE_MUTEX(trace_event_mutex);
+static DECLARE_RWSEM(trace_event_mutex);
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -466,6 +466,7 @@ static int task_state_char(unsigned long state)
  * @type: the type of event to look for
  *
  * Returns an event of type @type otherwise NULL
+ * Called with trace_event_read_lock() held.
  */
 struct trace_event *ftrace_find_event(int type)
 {
@@ -475,7 +476,7 @@ struct trace_event *ftrace_find_event(int type)
 
 	key = type & (EVENT_HASHSIZE - 1);
 
-	hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+	hlist_for_each_entry(event, n, &event_hash[key], node) {
 		if (event->type == type)
 			return event;
 	}
@@ -513,6 +514,16 @@ static int trace_search_list(struct list_head **list)
 	return last + 1;
 }
 
+void trace_event_read_lock(void)
+{
+	down_read(&trace_event_mutex);
+}
+
+void trace_event_read_unlock(void)
+{
+	up_read(&trace_event_mutex);
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -533,7 +544,7 @@ int register_ftrace_event(struct trace_event *event)
 	unsigned key;
 	int ret = 0;
 
-	mutex_lock(&trace_event_mutex);
+	down_write(&trace_event_mutex);
 
 	if (WARN_ON(!event))
 		goto out;
@@ -581,11 +592,11 @@ int register_ftrace_event(struct trace_event *event)
 
 	key = event->type & (EVENT_HASHSIZE - 1);
 
-	hlist_add_head_rcu(&event->node, &event_hash[key]);
+	hlist_add_head(&event->node, &event_hash[key]);
 
 	ret = event->type;
  out:
-	mutex_unlock(&trace_event_mutex);
+	up_write(&trace_event_mutex);
 
 	return ret;
 }
@@ -597,10 +608,10 @@ EXPORT_SYMBOL_GPL(register_ftrace_event);
  */
 int unregister_ftrace_event(struct trace_event *event)
 {
-	mutex_lock(&trace_event_mutex);
+	down_write(&trace_event_mutex);
 	hlist_del(&event->node);
 	list_del(&event->list);
-	mutex_unlock(&trace_event_mutex);
+	up_write(&trace_event_mutex);
 
 	return 0;
 }
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 6e220a8..ac240e7 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -20,6 +20,8 @@ extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
+extern void trace_event_read_lock(void);
+extern void trace_event_read_unlock(void);
 extern struct trace_event *ftrace_find_event(int type);
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
-- 
cgit v0.10.2


From b0aae68cc5508f3c2fbf728988c954db4c8b8a53 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 21 May 2009 13:59:18 +0800
Subject: tracing/events: change the type of __str_loc_item to unsigned short

When defining a dynamic size string, we add __str_loc_##item to the
trace entry, and it stores the location of the actual string in
entry->_str_data[]

'unsigned short' should be sufficient to store this information, thus
we save 2 bytes per dyn-size string in the ring buffer.

[ Impact: reduce memory occupied by dyn-size strings in ring buffer ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <4A14EDB6.2050507@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index edb02bc..b5ff2e8 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -25,7 +25,7 @@
 #define __field(type, item)		type	item;
 
 #undef __string
-#define __string(item, src)		int	__str_loc_##item;
+#define __string(item, src)		unsigned short	__str_loc_##item;
 
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
-- 
cgit v0.10.2


From 62e1e389f87a8839ad83b08c44691d1df8320846 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 09:40:59 +1000
Subject: md: always update level / chunk_size / layout when writing v1.x
 metadata.

We previously didn't update these fields when writing the metadata
because they could never change.  They can now, so we better write
them.
v0.90 metadata always updated these fields.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index fccc834..aa79d55 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1375,6 +1375,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 
 	sb->raid_disks = cpu_to_le32(mddev->raid_disks);
 	sb->size = cpu_to_le64(mddev->dev_sectors);
+	sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9);
+	sb->level = cpu_to_le32(mddev->level);
+	sb->layout = cpu_to_le32(mddev->layout);
 
 	if (mddev->bitmap && mddev->bitmap_file == NULL) {
 		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
-- 
cgit v0.10.2


From 2b69c83924396ad1eda36fdd267c9d2f360f5555 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 09:41:17 +1000
Subject: md: improve errno return when setting array_size

Instead of always returns EINVAL if anything goes wrong
when setting the array size, add the option of
  E2BIG
if the size requested is too large.  This makes it easier
for user-space to be sure what went wrong.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index aa79d55..58e0b02 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3683,7 +3683,7 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
 		if (strict_blocks_to_sectors(buf, &sectors) < 0)
 			return -EINVAL;
 		if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
-			return -EINVAL;
+			return -E2BIG;
 
 		mddev->external_size = 1;
 	}
-- 
cgit v0.10.2


From be512691036cc989c11d0f418187efbbf14468e6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 09:41:17 +1000
Subject: md: bitmap: improve bitmap maintenance code.

The code for checking which bits in the bitmap can be cleared
has 2 problems:
 1/ it repeatedly takes and drops a spinlock, where it would make
    more sense to just hold on to it most of the time.
 2/ it doesn't make use of some opportunities to skip large sections
    of the bitmap

This patch fixes those.  It will only affect CPU consumption, not
correctness.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 47c68bc..56df1ce 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1097,14 +1097,12 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 	}
 	bitmap->allclean = 1;
 
+	spin_lock_irqsave(&bitmap->lock, flags);
 	for (j = 0; j < bitmap->chunks; j++) {
 		bitmap_counter_t *bmc;
-		spin_lock_irqsave(&bitmap->lock, flags);
-		if (!bitmap->filemap) {
+		if (!bitmap->filemap)
 			/* error or shutdown */
-			spin_unlock_irqrestore(&bitmap->lock, flags);
 			break;
-		}
 
 		page = filemap_get_page(bitmap, j);
 
@@ -1121,6 +1119,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 					write_page(bitmap, page, 0);
 					bitmap->allclean = 0;
 				}
+				spin_lock_irqsave(&bitmap->lock, flags);
+				j |= (PAGE_BITS - 1);
 				continue;
 			}
 
@@ -1181,9 +1181,10 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 					ext2_clear_bit(file_page_offset(j), paddr);
 				kunmap_atomic(paddr, KM_USER0);
 			}
-		}
-		spin_unlock_irqrestore(&bitmap->lock, flags);
+		} else
+			j |= PAGE_COUNTER_MASK;
 	}
+	spin_unlock_irqrestore(&bitmap->lock, flags);
 
 	/* now sync the final page */
 	if (lastpage != NULL) {
-- 
cgit v0.10.2


From b6a9ce688f613e2ee5f15e6720e0bb8520efc36e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 09:41:17 +1000
Subject: md: export 'frozen' resync state through sysfs

The md resync engine has a 'frozen' state which ensures that
no resync/recovery.  This is used to avoid races.

Export this state through the 'sync_action' sysfs attribute
so that user-space can benefit and also avoid some races.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 58e0b02..384e4f0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3306,7 +3306,9 @@ static ssize_t
 action_show(mddev_t *mddev, char *page)
 {
 	char *type = "idle";
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+	if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
+		type = "frozen";
+	else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
 	    (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
 		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
 			type = "reshape";
@@ -3329,7 +3331,12 @@ action_store(mddev_t *mddev, const char *page, size_t len)
 	if (!mddev->pers || !mddev->pers->sync_request)
 		return -EINVAL;
 
-	if (cmd_match(page, "idle")) {
+	if (cmd_match(page, "frozen"))
+		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	else
+		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+	if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
 		if (mddev->sync_thread) {
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			md_unregister_thread(mddev->sync_thread);
-- 
cgit v0.10.2


From 29fcefba8a2f0fea11e2b721fe174a1832801284 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Sun, 24 May 2009 11:13:17 +0300
Subject: kmemtrace: fix kernel parameter documentation

The kmemtrace.enable kernel parameter no longer works. To enable
kmemtrace at boot-time, you must pass "ftrace=kmemtrace" instead.

[ Impact: remove obsolete kernel parameter documentation ]

Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <alpine.DEB.2.00.0905241112190.10296@rocky>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbf..9243dd8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -56,7 +56,6 @@ parameter is applicable:
 	ISAPNP	ISA PnP code is enabled.
 	ISDN	Appropriate ISDN support is enabled.
 	JOY	Appropriate joystick support is enabled.
-	KMEMTRACE kmemtrace is enabled.
 	LIBATA  Libata driver is enabled
 	LP	Printer support is enabled.
 	LOOP	Loopback device support is enabled.
@@ -1054,15 +1053,6 @@ and is between 256 and 4096 characters. It is defined in the file
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
-	kmemtrace.enable=	[KNL,KMEMTRACE] Format: { yes | no }
-				Controls whether kmemtrace is enabled
-				at boot-time.
-
-	kmemtrace.subbufs=n	[KNL,KMEMTRACE] Overrides the number of
-			subbufs kmemtrace's relay channel has. Set this
-			higher than default (KMEMTRACE_N_SUBBUFS in code) if
-			you experience buffer overruns.
-
 	kgdboc=		[HW] kgdb over consoles.
 			Requires a tty driver that supports console polling.
 			(only serial suported for now)
-- 
cgit v0.10.2


From b11c53e12f94a46b50bccc7a1a953d7ca1d54a31 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Mon, 25 May 2009 18:11:59 +0800
Subject: ftrace: Add task_comm support for trace_event

If we enable a trace event alone without any tracer running (such as
function tracer, sched switch tracer, etc...) it can't output enough
task command information.

We need to use the tracing_{start/stop}_cmdline_record() helpers
which are designed to keep track of cmdlines for any tasks that
were scheduled during the tracing.

Before this patch:
 # echo 1 > debugfs/tracing/events/sched/sched_switch/enable
 # cat debugfs/tracing/trace
 # tracer: nop
 #
 #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
 #              | |       |          |         |
            <...>-2289  [000] 526276.724790: sched_switch: task bash:2289 [120] ==> sshd:2287 [120]
            <...>-2287  [000] 526276.725231: sched_switch: task sshd:2287 [120] ==> bash:2289 [120]
            <...>-2289  [000] 526276.725452: sched_switch: task bash:2289 [120] ==> sshd:2287 [120]
            <...>-2287  [000] 526276.727181: sched_switch: task sshd:2287 [120] ==> swapper:0 [140]
           <idle>-0     [000] 526277.032734: sched_switch: task swapper:0 [140] ==> events/0:5 [115]
            <...>-5     [000] 526277.032782: sched_switch: task events/0:5 [115] ==> swapper:0 [140]
 ...

After this patch:
 # tracer: nop
 #
 #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
 #              | |       |          |         |
             bash-2269  [000] 527347.989229: sched_switch: task bash:2269 [120] ==> sshd:2267 [120]
             sshd-2267  [000] 527347.990960: sched_switch: task sshd:2267 [120] ==> bash:2269 [120]
             bash-2269  [000] 527347.991143: sched_switch: task bash:2269 [120] ==> sshd:2267 [120]
             sshd-2267  [000] 527347.992959: sched_switch: task sshd:2267 [120] ==> swapper:0 [140]
           <idle>-0     [000] 527348.531989: sched_switch: task swapper:0 [140] ==> events/0:5 [115]
         events/0-5     [000] 527348.532115: sched_switch: task events/0:5 [115] ==> swapper:0 [140]
 ...

Changelog:
v1->v2: Update Kconfig to select CONTEXT_SWITCH_TRACER in
        ENABLE_EVENT_TRACING
v2->v3: v2 can solve problem that was caused by config EVENT_TRACING
        alone, but when CONFIG_FTRACE is off and CONFIG_TRACING is
        selected by other config, compile fail happened again.
        This version solves it.

[ Impact: fix incomplete output of event tracing ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <4A14FDFE.2080402@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f61be30..a508b9d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config FTRACE_NMI_ENTER
        default y
 
 config EVENT_TRACING
+	select CONTEXT_SWITCH_TRACER
+	bool
+
+config CONTEXT_SWITCH_TRACER
+	select MARKERS
 	bool
 
 config TRACING
@@ -176,10 +181,10 @@ config SCHED_TRACER
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
-config CONTEXT_SWITCH_TRACER
+config ENABLE_CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
 	select TRACING
-	select MARKERS
+	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9e91c4a..9b246eb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -85,6 +85,7 @@ static void ftrace_clear_events(void)
 
 		if (call->enabled) {
 			call->enabled = 0;
+			tracing_stop_cmdline_record();
 			call->unregfunc();
 		}
 	}
@@ -99,12 +100,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 	case 0:
 		if (call->enabled) {
 			call->enabled = 0;
+			tracing_stop_cmdline_record();
 			call->unregfunc();
 		}
 		break;
 	case 1:
 		if (!call->enabled) {
 			call->enabled = 1;
+			tracing_start_cmdline_record();
 			call->regfunc();
 		}
 		break;
@@ -1058,6 +1061,7 @@ static void trace_module_remove_events(struct module *mod)
 			found = true;
 			if (call->enabled) {
 				call->enabled = 0;
+				tracing_stop_cmdline_record();
 				call->unregfunc();
 			}
 			if (call->event)
@@ -1262,11 +1266,13 @@ static __init void event_trace_self_tests(void)
 		}
 
 		call->enabled = 1;
+		tracing_start_cmdline_record();
 		call->regfunc();
 
 		event_test_stuff();
 
 		call->unregfunc();
+		tracing_stop_cmdline_record();
 		call->enabled = 0;
 
 		pr_cont("OK\n");
-- 
cgit v0.10.2


From 0e907c99391362385c8e3af2c43b904dd1fd5d73 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Mon, 25 May 2009 18:13:59 +0800
Subject: ftrace: clean up of using ftrace_event_enable_disable()

Always use ftrace_event_enable_disable() to enable/disable an event
so that we can factorize out the event toggling code.

[ Impact: factorize and cleanup event tracing code ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4A14FDFE.2080402@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b246eb..6c81f9c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -76,26 +76,9 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
 
 #endif /* CONFIG_MODULES */
 
-static void ftrace_clear_events(void)
-{
-	struct ftrace_event_call *call;
-
-	mutex_lock(&event_mutex);
-	list_for_each_entry(call, &ftrace_events, list) {
-
-		if (call->enabled) {
-			call->enabled = 0;
-			tracing_stop_cmdline_record();
-			call->unregfunc();
-		}
-	}
-	mutex_unlock(&event_mutex);
-}
-
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
-
 	switch (enable) {
 	case 0:
 		if (call->enabled) {
@@ -114,6 +97,17 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 	}
 }
 
+static void ftrace_clear_events(void)
+{
+	struct ftrace_event_call *call;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(call, &ftrace_events, list) {
+		ftrace_event_enable_disable(call, 0);
+	}
+	mutex_unlock(&event_mutex);
+}
+
 /*
  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
  */
@@ -1059,11 +1053,7 @@ static void trace_module_remove_events(struct module *mod)
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
 		if (call->mod == mod) {
 			found = true;
-			if (call->enabled) {
-				call->enabled = 0;
-				tracing_stop_cmdline_record();
-				call->unregfunc();
-			}
+			ftrace_event_enable_disable(call, 0);
 			if (call->event)
 				unregister_ftrace_event(call->event);
 			debugfs_remove_recursive(call->dir);
@@ -1265,15 +1255,9 @@ static __init void event_trace_self_tests(void)
 			continue;
 		}
 
-		call->enabled = 1;
-		tracing_start_cmdline_record();
-		call->regfunc();
-
+		ftrace_event_enable_disable(call, 1);
 		event_test_stuff();
-
-		call->unregfunc();
-		tracing_stop_cmdline_record();
-		call->enabled = 0;
+		ftrace_event_enable_disable(call, 0);
 
 		pr_cont("OK\n");
 	}
-- 
cgit v0.10.2


From 848b3182365fdf5a05bcd5ed949071cac2c894b3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 12:41:08 +1000
Subject: md: raid5: avoid sector values going negative when testing reshape
 progress.

As sector_t in unsigned, we cannot afford to let 'safepos' etc go
negative.
So replace
   a -= b;
by
   a -= min(b,a);

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4616bc3..3c3626d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 	safepos = conf->reshape_safe;
 	sector_div(safepos, data_disks);
 	if (mddev->delta_disks < 0) {
-		writepos -= reshape_sectors;
+		writepos -= min(reshape_sectors, writepos);
 		readpos += reshape_sectors;
 		safepos += reshape_sectors;
 	} else {
 		writepos += reshape_sectors;
-		readpos -= reshape_sectors;
-		safepos -= reshape_sectors;
+		readpos -= min(reshape_sectors, readpos);
+		safepos -= min(reshape_sectors, safepos);
 	}
 
 	/* 'writepos' is the most advanced device address we might write.
-- 
cgit v0.10.2


From 7a91ee1f628ef6bfe3f13067c0ddf9db520cb86b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 12:57:21 +1000
Subject: md: don't update curr_resync_completed without also updating
 reshape_position.

In order for the metadata to always be consistent, we mustn't updated
curr_resync_completed without also updating reshape_position.

The reshape code updates both at the same time.  However since
commit 97e4f42d62badb0f9fbc27c013e89bc1336a03bc
the common md_do_sync will sometimes update curr_resync_completed
but is not in a position to update reshape_position.
So if MD_RECOVERY_RESHAPE is set (indicating that a reshape is
happening, so reshape_position might change), don't update
curr_resync_completed in md_do_sync, leave it to the per-personality
reshape code.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 384e4f0..9544565 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6362,12 +6362,13 @@ void md_do_sync(mddev_t *mddev)
 
 		skipped = 0;
 
-		if ((mddev->curr_resync > mddev->curr_resync_completed &&
-		     (mddev->curr_resync - mddev->curr_resync_completed)
-		    > (max_sectors >> 4)) ||
-		    (j - mddev->curr_resync_completed)*2
-		    >= mddev->resync_max - mddev->curr_resync_completed
-			) {
+		if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+		    ((mddev->curr_resync > mddev->curr_resync_completed &&
+		      (mddev->curr_resync - mddev->curr_resync_completed)
+		      > (max_sectors >> 4)) ||
+		     (j - mddev->curr_resync_completed)*2
+		     >= mddev->resync_max - mddev->curr_resync_completed
+			    )) {
 			/* time to update curr_resync_completed */
 			blk_unplug(mddev->queue);
 			wait_event(mddev->recovery_wait,
-- 
cgit v0.10.2


From b492b852cd8c99505708152c29a5e09a787af9de Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 26 May 2009 12:57:36 +1000
Subject: md: don't use locked_ioctl.

md has no need for the BKL - it does its own locking.
So md_ioctl doesn't need to be a locked_ioctl.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9544565..641b211 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5567,7 +5567,7 @@ static struct block_device_operations md_fops =
 	.owner		= THIS_MODULE,
 	.open		= md_open,
 	.release	= md_release,
-	.locked_ioctl	= md_ioctl,
+	.ioctl		= md_ioctl,
 	.getgeo		= md_getgeo,
 	.media_changed	= md_media_changed,
 	.revalidate_disk= md_revalidate,
-- 
cgit v0.10.2


From 8e35961b57da14cb64cb0e4e1b7e3aabda6396fe Mon Sep 17 00:00:00 2001
From: Hideo Saito <hsaito.ppc@gmail.com>
Date: Sun, 24 May 2009 15:33:34 +0000
Subject: powerpc/mm: Fix broken MMU PID stealing on !SMP

The recent rework of the MMU PID handling for non-hash CPUs has a
subtle bug in the !SMP "optimized" variant of the PID stealing
function.  It clears the PID in the mm context before it calls
local_flush_tlb_mm(). However, the later will not flush anything
if the PID in the context is clear...

Signed-off-by: Hideo Saito <hsaito.ppc@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index a70e311..030d000 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -127,12 +127,12 @@ static unsigned int steal_context_up(unsigned int id)
 
 	pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
 
-	/* Mark this mm has having no context anymore */
-	mm->context.id = MMU_NO_CONTEXT;
-
 	/* Flush the TLB for that context */
 	local_flush_tlb_mm(mm);
 
+	/* Mark this mm has having no context anymore */
+	mm->context.id = MMU_NO_CONTEXT;
+
 	/* XXX This clear should ultimately be part of local_flush_tlb_mm */
 	__clear_bit(id, stale_map[cpu]);
 
-- 
cgit v0.10.2


From 3709ab8dfa23cab553ea9ffea3372c8e0a28f332 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 May 2009 13:52:28 +0900
Subject: sh: irq: Fix up imask build warnings.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/irq/imask.c b/arch/sh/kernel/cpu/irq/imask.c
index 3bef3c2..6b5d191 100644
--- a/arch/sh/kernel/cpu/irq/imask.c
+++ b/arch/sh/kernel/cpu/irq/imask.c
@@ -53,7 +53,7 @@ static inline void set_interrupt_registers(int ip)
 
 static void mask_imask_irq(unsigned int irq)
 {
-	clear_bit(irq, &imask_mask);
+	clear_bit(irq, imask_mask);
 	if (interrupt_priority < IMASK_PRIORITY - irq)
 		interrupt_priority = IMASK_PRIORITY - irq;
 	set_interrupt_registers(interrupt_priority);
@@ -61,7 +61,7 @@ static void mask_imask_irq(unsigned int irq)
 
 static void unmask_imask_irq(unsigned int irq)
 {
-	set_bit(irq, &imask_mask);
+	set_bit(irq, imask_mask);
 	interrupt_priority = IMASK_PRIORITY -
 		find_first_zero_bit(imask_mask, IMASK_PRIORITY);
 	set_interrupt_registers(interrupt_priority);
-- 
cgit v0.10.2


From 46176b4f6bac19454b7b5c35f68594b85850a600 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 26 May 2009 14:42:40 +0900
Subject: x86, relocs: ignore R_386_NONE in kernel relocation entries

For relocatable 32bit kernels, boot/compressed/relocs.c processes
relocation entries in the kernel image and appends it to the kernel
image such that boot/compressed/head_32.S can relocate the kernel.
The kernel image is one statically linked object and only uses two
relocation types - R_386_PC32 and R_386_32, of the two only the latter
needs massaging during kernel relocation and thus handled by relocs.
R_386_PC32 is ignored and all other relocation types are considered
error.

When the target of a relocation resides in a discarded section,
binutils doesn't throw away the relocation record but nullifies it by
changing it to R_386_NONE, which unfortunately makes relocs fail.

The problem was triggered by yet out-of-tree x86 stack unwind patches
but given the binutils behavior, ignoring R_386_NONE is the right
thing to do.

The problem has been tracked down to binutils behavior by Jan Beulich.

[ Impact: fix build with certain binutils by ignoring R_386_NONE ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <4A1B8150.40702@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 857e492..bbeb0c3 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -504,8 +504,11 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
 			if (sym->st_shndx == SHN_ABS) {
 				continue;
 			}
-			if (r_type == R_386_PC32) {
-				/* PC relative relocations don't need to be adjusted */
+			if (r_type == R_386_NONE || r_type == R_386_PC32) {
+				/*
+				 * NONE can be ignored and and PC relative
+				 * relocations don't need to be adjusted.
+				 */
 			}
 			else if (r_type == R_386_32) {
 				/* Visit relocations that need to be adjusted */
-- 
cgit v0.10.2


From 8a7b8cb91f26a671f22cedc7fd54508667f2d9b9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 26 May 2009 16:27:59 +1000
Subject: perf_counter: powerpc: Implement interrupt throttling

This implements interrupt throttling on powerpc.  Since we don't have
individual count enable/disable or interrupt enable/disable controls
per counter, this simply sets the hardware counter to 0, meaning that
it will not interrupt again until it has counted 2^31 counts, which
will take at least 2^30 cycles assuming a maximum of 2 counts per
cycle.  Also, we set counter->hw.period_left to the maximum possible
value (2^63 - 1), so we won't report overflows for this counter for
the forseeable future.

The unthrottle operation restores counter->hw.period_left and the
hardware counter so that we will once again report a counter overflow
after counter->hw.irq_period counts.

[ Impact: new perfcounters robustness feature on PowerPC ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18971.35823.643362.446774@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index fe21b24..f96d55f 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -740,10 +740,37 @@ static void power_pmu_disable(struct perf_counter *counter)
 	local_irq_restore(flags);
 }
 
+/*
+ * Re-enable interrupts on a counter after they were throttled
+ * because they were coming too fast.
+ */
+static void power_pmu_unthrottle(struct perf_counter *counter)
+{
+	s64 val, left;
+	unsigned long flags;
+
+	if (!counter->hw.idx || !counter->hw.irq_period)
+		return;
+	local_irq_save(flags);
+	perf_disable();
+	power_pmu_read(counter);
+	left = counter->hw.irq_period;
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(counter->hw.idx, val);
+	atomic64_set(&counter->hw.prev_count, val);
+	atomic64_set(&counter->hw.period_left, left);
+	perf_counter_update_userpage(counter);
+	perf_enable();
+	local_irq_restore(flags);
+}
+
 struct pmu power_pmu = {
 	.enable		= power_pmu_enable,
 	.disable	= power_pmu_disable,
 	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
 };
 
 /*
@@ -957,10 +984,6 @@ static void record_and_restart(struct perf_counter *counter, long val,
 		if (left < 0x80000000L)
 			val = 0x80000000L - left;
 	}
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
 
 	/*
 	 * Finally record data if requested.
@@ -983,8 +1006,23 @@ static void record_and_restart(struct perf_counter *counter, long val,
 			if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
 				addr = mfspr(SPRN_SDAR);
 		}
-		perf_counter_overflow(counter, nmi, regs, addr);
+		if (perf_counter_overflow(counter, nmi, regs, addr)) {
+			/*
+			 * Interrupts are coming too fast - throttle them
+			 * by setting the counter to 0, so it will be
+			 * at least 2^30 cycles until the next interrupt
+			 * (assuming each counter counts at most 2 counts
+			 * per cycle).
+			 */
+			val = 0;
+			left = ~0ULL >> 1;
+		}
 	}
+
+	write_pmc(counter->hw.idx, val);
+	atomic64_set(&counter->hw.prev_count, val);
+	atomic64_set(&counter->hw.period_left, left);
+	perf_counter_update_userpage(counter);
 }
 
 /*
-- 
cgit v0.10.2


From 79202ba9ff8cf570a75596f42e011167734d1c4b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 08:10:00 +0200
Subject: perf_counter, x86: Fix APIC NMI programming

My Nehalem box locks up in certain situations (with an
always-asserted NMI causing a lockup) if the PMU LVT
entry is programmed between NMI and IRQ mode with a
high frequency.

Standardize exlusively on NMIs instead.

[ Impact: fix lockup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 189bf9d..ece3813 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -285,14 +285,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
 	/*
-	 * If privileged enough, allow NMI events:
+	 * Use NMI events all the time:
 	 */
-	hwc->nmi = 0;
-	if (hw_event->nmi) {
-		if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		hwc->nmi = 1;
-	}
+	hwc->nmi	= 1;
+	hw_event->nmi	= 1;
 
 	if (!hwc->irq_period)
 		hwc->irq_period = x86_pmu.max_period;
@@ -553,9 +549,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 	if (!x86_pmu.num_counters_fixed)
 		return -1;
 
-	if (unlikely(hwc->nmi))
-		return -1;
-
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
 	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
@@ -806,9 +799,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 		counter = cpuc->counters[idx];
 		hwc = &counter->hw;
 
-		if (counter->hw_event.nmi != nmi)
-			continue;
-
 		val = x86_perf_counter_update(counter, hwc, idx);
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
 			continue;
-- 
cgit v0.10.2


From aaba98018b8295dfa2119345d17f833d74448cd0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 08:10:00 +0200
Subject: perf_counter, x86: Make NMI lockups more robust

We have a debug check that detects stuck NMIs and returns with
the PMU disabled in the global ctrl MSR - but i managed to trigger
a situation where this was not enough to deassert the NMI.

So clear/reset the full PMU and keep the disable count balanced when
exiting from here. This way the box produces a debug warning but
stays up and is more debuggable.

[ Impact: in case of PMU related bugs, recover more gracefully ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ece3813..2eeaa99 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -724,6 +724,30 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
 		intel_pmu_enable_counter(hwc, idx);
 }
 
+static void intel_pmu_reset(void)
+{
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_counters)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+
+	local_irq_restore(flags);
+}
+
+
 /*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
@@ -750,6 +774,8 @@ again:
 	if (++loops > 100) {
 		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
 		perf_counter_print_debug();
+		intel_pmu_reset();
+		perf_enable();
 		return 1;
 	}
 
-- 
cgit v0.10.2


From 329d876d6fd326109f191ae0fb2798b8834fb70b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 08:10:00 +0200
Subject: perf_counter: Initialize ->oncpu properly

This shouldnt matter normally (and i have not seen any
misbehavior), because active counters always have a
proper ->oncpu value - but nevertheless initialize the
field properly to -1.

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 070f92d..367299f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3122,6 +3122,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->pmu			= NULL;
 	counter->ctx			= ctx;
+	counter->oncpu			= -1;
+
 	get_ctx(ctx);
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-- 
cgit v0.10.2


From 287c129716c2d4b75f3d8dd6e68732a3cd326ee6 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Tue, 26 May 2009 07:04:52 +0000
Subject: sh: Add ms7724se (SH7724) board support

This adds preliminary support for the ms7724se solution engine board.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 99e2d47..1c91b1f 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -46,6 +46,15 @@ config SH_7722_SOLUTION_ENGINE
 	  Select 7722 SolutionEngine if configuring for a Hitachi SH772
 	  evaluation board.
 
+config SH_7724_SOLUTION_ENGINE
+	bool "SolutionEngine7724"
+	select SOLUTION_ENGINE
+	depends on CPU_SUBTYPE_SH7724
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Select 7724 SolutionEngine if configuring for a Hitachi SH7724
+	  evaluation board.
+
 config SH_7751_SOLUTION_ENGINE
 	bool "SolutionEngine7751"
 	select SOLUTION_ENGINE
diff --git a/arch/sh/boards/mach-se/7724/Makefile b/arch/sh/boards/mach-se/7724/Makefile
new file mode 100644
index 0000000..349cbd6
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the HITACHI UL SolutionEngine 7724 specific parts of the kernel
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+#
+
+obj-y	 := setup.o irq.o
\ No newline at end of file
diff --git a/arch/sh/boards/mach-se/7724/irq.c b/arch/sh/boards/mach-se/7724/irq.c
new file mode 100644
index 0000000..f76cf3b
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/irq.c
@@ -0,0 +1,139 @@
+/*
+ * linux/arch/sh/boards/se/7724/irq.c
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on  linux/arch/sh/boards/se/7722/irq.c
+ * Copyright (C) 2007  Nobuhiro Iwamatsu
+ *
+ * Hitachi UL SolutionEngine 7724 Support.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <mach-se/mach/se7724.h>
+
+struct fpga_irq {
+	unsigned long  sraddr;
+	unsigned long  mraddr;
+	unsigned short mask;
+	unsigned int   base;
+};
+
+static unsigned int fpga2irq(unsigned int irq)
+{
+	if (irq >= IRQ0_BASE &&
+	    irq <= IRQ0_END)
+		return IRQ0_IRQ;
+	else if (irq >= IRQ1_BASE &&
+		 irq <= IRQ1_END)
+		return IRQ1_IRQ;
+	else
+		return IRQ2_IRQ;
+}
+
+static struct fpga_irq get_fpga_irq(unsigned int irq)
+{
+	struct fpga_irq set;
+
+	switch (irq) {
+	case IRQ0_IRQ:
+		set.sraddr = IRQ0_SR;
+		set.mraddr = IRQ0_MR;
+		set.mask   = IRQ0_MASK;
+		set.base   = IRQ0_BASE;
+		break;
+	case IRQ1_IRQ:
+		set.sraddr = IRQ1_SR;
+		set.mraddr = IRQ1_MR;
+		set.mask   = IRQ1_MASK;
+		set.base   = IRQ1_BASE;
+		break;
+	default:
+		set.sraddr = IRQ2_SR;
+		set.mraddr = IRQ2_MR;
+		set.mask   = IRQ2_MASK;
+		set.base   = IRQ2_BASE;
+		break;
+	}
+
+	return set;
+}
+
+static void disable_se7724_irq(unsigned int irq)
+{
+	struct fpga_irq set = get_fpga_irq(fpga2irq(irq));
+	unsigned int bit = irq - set.base;
+	ctrl_outw(ctrl_inw(set.mraddr) | 0x0001 << bit, set.mraddr);
+}
+
+static void enable_se7724_irq(unsigned int irq)
+{
+	struct fpga_irq set = get_fpga_irq(fpga2irq(irq));
+	unsigned int bit = irq - set.base;
+	ctrl_outw(ctrl_inw(set.mraddr) & ~(0x0001 << bit), set.mraddr);
+}
+
+static struct irq_chip se7724_irq_chip __read_mostly = {
+	.name           = "SE7724-FPGA",
+	.mask           = disable_se7724_irq,
+	.unmask         = enable_se7724_irq,
+	.mask_ack       = disable_se7724_irq,
+};
+
+static void se7724_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct fpga_irq set = get_fpga_irq(irq);
+	unsigned short intv = ctrl_inw(set.sraddr);
+	struct irq_desc *ext_desc;
+	unsigned int ext_irq = set.base;
+
+	intv &= set.mask;
+
+	while (intv) {
+		if (intv & 0x0001) {
+			ext_desc = irq_desc + ext_irq;
+			handle_level_irq(ext_irq, ext_desc);
+		}
+		intv >>= 1;
+		ext_irq++;
+	}
+}
+
+/*
+ * Initialize IRQ setting
+ */
+void __init init_se7724_IRQ(void)
+{
+	int i;
+
+	ctrl_outw(0xffff, IRQ0_MR);  /* mask all */
+	ctrl_outw(0xffff, IRQ1_MR);  /* mask all */
+	ctrl_outw(0xffff, IRQ2_MR);  /* mask all */
+	ctrl_outw(0x0000, IRQ0_SR);  /* clear irq */
+	ctrl_outw(0x0000, IRQ1_SR);  /* clear irq */
+	ctrl_outw(0x0000, IRQ2_SR);  /* clear irq */
+	ctrl_outw(0x002a, IRQ_MODE); /* set irq type */
+
+	for (i = 0; i < SE7724_FPGA_IRQ_NR; i++)
+		set_irq_chip_and_handler_name(SE7724_FPGA_IRQ_BASE + i,
+					      &se7724_irq_chip,
+					      handle_level_irq, "level");
+
+	set_irq_chained_handler(IRQ0_IRQ, se7724_irq_demux);
+	set_irq_type(IRQ0_IRQ, IRQ_TYPE_LEVEL_LOW);
+
+	set_irq_chained_handler(IRQ1_IRQ, se7724_irq_demux);
+	set_irq_type(IRQ1_IRQ, IRQ_TYPE_LEVEL_LOW);
+
+	set_irq_chained_handler(IRQ2_IRQ, se7724_irq_demux);
+	set_irq_type(IRQ2_IRQ, IRQ_TYPE_LEVEL_LOW);
+}
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
new file mode 100644
index 0000000..9cd04bd
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -0,0 +1,448 @@
+/*
+ * linux/arch/sh/boards/se/7724/setup.c
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/physmap.h>
+#include <linux/delay.h>
+#include <linux/smc91x.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <video/sh_mobile_lcdc.h>
+#include <media/sh_mobile_ceu.h>
+#include <asm/io.h>
+#include <asm/heartbeat.h>
+#include <asm/sh_keysc.h>
+#include <cpu/sh7724.h>
+#include <mach-se/mach/se7724.h>
+
+/*
+ * SWx    1234 5678
+ * ------------------------------------
+ * SW31 : 1001 1100    : default
+ * SW32 : 0111 1111    : use on board flash
+ *
+ * SW41 : abxx xxxx  -> a = 0 : Analog  monitor
+ *                          1 : Digital monitor
+ *                      b = 0 : VGA
+ *                          1 : SVGA
+ */
+
+/* Heartbeat */
+static struct heartbeat_data heartbeat_data = {
+	.regsize = 16,
+};
+
+static struct resource heartbeat_resources[] = {
+	[0] = {
+		.start  = PA_LED,
+		.end    = PA_LED,
+		.flags  = IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device heartbeat_device = {
+	.name           = "heartbeat",
+	.id             = -1,
+	.dev = {
+		.platform_data = &heartbeat_data,
+	},
+	.num_resources  = ARRAY_SIZE(heartbeat_resources),
+	.resource       = heartbeat_resources,
+};
+
+/* LAN91C111 */
+static struct smc91x_platdata smc91x_info = {
+	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+};
+
+static struct resource smc91x_eth_resources[] = {
+	[0] = {
+		.name   = "SMC91C111" ,
+		.start  = 0x1a300300,
+		.end    = 0x1a30030f,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = IRQ0_SMC,
+		.flags  = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
+	},
+};
+
+static struct platform_device smc91x_eth_device = {
+	.name	= "smc91x",
+	.num_resources  = ARRAY_SIZE(smc91x_eth_resources),
+	.resource       = smc91x_eth_resources,
+	.dev	= {
+		.platform_data	= &smc91x_info,
+	},
+};
+
+/* MTD */
+static struct mtd_partition nor_flash_partitions[] = {
+	{
+		.name = "uboot",
+		.offset = 0,
+		.size = (1 * 1024 * 1024),
+		.mask_flags = MTD_WRITEABLE,	/* Read-only */
+	}, {
+		.name = "kernel",
+		.offset = MTDPART_OFS_APPEND,
+		.size = (2 * 1024 * 1024),
+	}, {
+		.name = "free-area",
+		.offset = MTDPART_OFS_APPEND,
+		.size = MTDPART_SIZ_FULL,
+	},
+};
+
+static struct physmap_flash_data nor_flash_data = {
+	.width		= 2,
+	.parts		= nor_flash_partitions,
+	.nr_parts	= ARRAY_SIZE(nor_flash_partitions),
+};
+
+static struct resource nor_flash_resources[] = {
+	[0] = {
+		.name	= "NOR Flash",
+		.start	= 0x00000000,
+		.end	= 0x01ffffff,
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+static struct platform_device nor_flash_device = {
+	.name		= "physmap-flash",
+	.resource	= nor_flash_resources,
+	.num_resources	= ARRAY_SIZE(nor_flash_resources),
+	.dev		= {
+		.platform_data = &nor_flash_data,
+	},
+};
+
+/* LCDC */
+static struct sh_mobile_lcdc_info lcdc_info = {
+	.clock_source = LCDC_CLK_EXTERNAL,
+	.ch[0] = {
+		.chan = LCDC_CHAN_MAINLCD,
+		.bpp = 16,
+		.clock_divider = 1,
+		.lcd_cfg = {
+			.name = "LB070WV1",
+			.sync = 0, /* hsync and vsync are active low */
+		},
+		.lcd_size_cfg = { /* 7.0 inch */
+			.width = 152,
+			.height = 91,
+		},
+		.board_cfg = {
+		},
+	}
+};
+
+static struct resource lcdc_resources[] = {
+	[0] = {
+		.name	= "LCDC",
+		.start	= 0xfe940000,
+		.end	= 0xfe941fff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 106,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device lcdc_device = {
+	.name		= "sh_mobile_lcdc_fb",
+	.num_resources	= ARRAY_SIZE(lcdc_resources),
+	.resource	= lcdc_resources,
+	.dev		= {
+		.platform_data	= &lcdc_info,
+	},
+};
+
+/* CEU0 */
+static struct sh_mobile_ceu_info sh_mobile_ceu0_info = {
+	.flags = SH_CEU_FLAG_USE_8BIT_BUS,
+};
+
+static struct resource ceu0_resources[] = {
+	[0] = {
+		.name	= "CEU0",
+		.start	= 0xfe910000,
+		.end	= 0xfe91009f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 52,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device ceu0_device = {
+	.name		= "sh_mobile_ceu",
+	.id             = 0, /* "ceu0" clock */
+	.num_resources	= ARRAY_SIZE(ceu0_resources),
+	.resource	= ceu0_resources,
+	.dev	= {
+		.platform_data	= &sh_mobile_ceu0_info,
+	},
+};
+
+/* CEU1 */
+static struct sh_mobile_ceu_info sh_mobile_ceu1_info = {
+	.flags = SH_CEU_FLAG_USE_8BIT_BUS,
+};
+
+static struct resource ceu1_resources[] = {
+	[0] = {
+		.name	= "CEU1",
+		.start	= 0xfe914000,
+		.end	= 0xfe91409f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 63,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device ceu1_device = {
+	.name		= "sh_mobile_ceu",
+	.id             = 1, /* "ceu1" clock */
+	.num_resources	= ARRAY_SIZE(ceu1_resources),
+	.resource	= ceu1_resources,
+	.dev	= {
+		.platform_data	= &sh_mobile_ceu1_info,
+	},
+};
+
+/* KEYSC */
+static struct sh_keysc_info keysc_info = {
+	.mode = SH_KEYSC_MODE_1,
+	.scan_timing = 10,
+	.delay = 50,
+	.keycodes = {
+		KEY_1, KEY_2, KEY_3, KEY_4, KEY_5,
+		KEY_6, KEY_7, KEY_8, KEY_9, KEY_A,
+		KEY_B, KEY_C, KEY_D, KEY_E, KEY_F,
+		KEY_G, KEY_H, KEY_I, KEY_K, KEY_L,
+		KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q,
+		KEY_R, KEY_S, KEY_T, KEY_U, KEY_V,
+	},
+};
+
+static struct resource keysc_resources[] = {
+	[0] = {
+		.start  = 0x1a204000,
+		.end    = 0x1a20400f,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = IRQ0_KEY,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device keysc_device = {
+	.name           = "sh_keysc",
+	.id             = 0, /* "keysc0" clock */
+	.num_resources  = ARRAY_SIZE(keysc_resources),
+	.resource       = keysc_resources,
+	.dev	= {
+		.platform_data	= &keysc_info,
+	},
+};
+
+static struct platform_device *ms7724se_devices[] __initdata = {
+	&heartbeat_device,
+	&smc91x_eth_device,
+	&lcdc_device,
+	&nor_flash_device,
+	&ceu0_device,
+	&ceu1_device,
+	&keysc_device,
+};
+
+#define SW4140    0xBA201000
+#define FPGA_OUT  0xBA200400
+#define PORT_HIZA 0xA4050158
+
+#define SW41_A    0x0100
+#define SW41_B    0x0200
+#define SW41_C    0x0400
+#define SW41_D    0x0800
+#define SW41_E    0x1000
+#define SW41_F    0x2000
+#define SW41_G    0x4000
+#define SW41_H    0x8000
+static int __init devices_setup(void)
+{
+	u16 sw = ctrl_inw(SW4140); /* select camera, monitor */
+
+	/* Reset Release */
+	ctrl_outw(ctrl_inw(FPGA_OUT) &
+		  ~((1 << 1)  | /* LAN */
+		    (1 << 6)  | /* VIDEO DAC */
+		    (1 << 12)), /* USB0 */
+		  FPGA_OUT);
+
+	/* enable IRQ 0,1,2 */
+	gpio_request(GPIO_FN_INTC_IRQ0, NULL);
+	gpio_request(GPIO_FN_INTC_IRQ1, NULL);
+	gpio_request(GPIO_FN_INTC_IRQ2, NULL);
+
+	/* enable SCIFA3 */
+	gpio_request(GPIO_FN_SCIF3_I_SCK, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_RXD, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_TXD, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_CTS, NULL);
+	gpio_request(GPIO_FN_SCIF3_I_RTS, NULL);
+
+	/* enable LCDC */
+	gpio_request(GPIO_FN_LCDD23,   NULL);
+	gpio_request(GPIO_FN_LCDD22,   NULL);
+	gpio_request(GPIO_FN_LCDD21,   NULL);
+	gpio_request(GPIO_FN_LCDD20,   NULL);
+	gpio_request(GPIO_FN_LCDD19,   NULL);
+	gpio_request(GPIO_FN_LCDD18,   NULL);
+	gpio_request(GPIO_FN_LCDD17,   NULL);
+	gpio_request(GPIO_FN_LCDD16,   NULL);
+	gpio_request(GPIO_FN_LCDD15,   NULL);
+	gpio_request(GPIO_FN_LCDD14,   NULL);
+	gpio_request(GPIO_FN_LCDD13,   NULL);
+	gpio_request(GPIO_FN_LCDD12,   NULL);
+	gpio_request(GPIO_FN_LCDD11,   NULL);
+	gpio_request(GPIO_FN_LCDD10,   NULL);
+	gpio_request(GPIO_FN_LCDD9,    NULL);
+	gpio_request(GPIO_FN_LCDD8,    NULL);
+	gpio_request(GPIO_FN_LCDD7,    NULL);
+	gpio_request(GPIO_FN_LCDD6,    NULL);
+	gpio_request(GPIO_FN_LCDD5,    NULL);
+	gpio_request(GPIO_FN_LCDD4,    NULL);
+	gpio_request(GPIO_FN_LCDD3,    NULL);
+	gpio_request(GPIO_FN_LCDD2,    NULL);
+	gpio_request(GPIO_FN_LCDD1,    NULL);
+	gpio_request(GPIO_FN_LCDD0,    NULL);
+	gpio_request(GPIO_FN_LCDDISP,  NULL);
+	gpio_request(GPIO_FN_LCDHSYN,  NULL);
+	gpio_request(GPIO_FN_LCDDCK,   NULL);
+	gpio_request(GPIO_FN_LCDVSYN,  NULL);
+	gpio_request(GPIO_FN_LCDDON,   NULL);
+	gpio_request(GPIO_FN_LCDVEPWC, NULL);
+	gpio_request(GPIO_FN_LCDVCPWC, NULL);
+	gpio_request(GPIO_FN_LCDRD,    NULL);
+	gpio_request(GPIO_FN_LCDLCLK,  NULL);
+	ctrl_outw((ctrl_inw(PORT_HIZA) & ~0x0001), PORT_HIZA);
+
+	/* enable CEU0 */
+	gpio_request(GPIO_FN_VIO0_D15, NULL);
+	gpio_request(GPIO_FN_VIO0_D14, NULL);
+	gpio_request(GPIO_FN_VIO0_D13, NULL);
+	gpio_request(GPIO_FN_VIO0_D12, NULL);
+	gpio_request(GPIO_FN_VIO0_D11, NULL);
+	gpio_request(GPIO_FN_VIO0_D10, NULL);
+	gpio_request(GPIO_FN_VIO0_D9,  NULL);
+	gpio_request(GPIO_FN_VIO0_D8,  NULL);
+	gpio_request(GPIO_FN_VIO0_D7,  NULL);
+	gpio_request(GPIO_FN_VIO0_D6,  NULL);
+	gpio_request(GPIO_FN_VIO0_D5,  NULL);
+	gpio_request(GPIO_FN_VIO0_D4,  NULL);
+	gpio_request(GPIO_FN_VIO0_D3,  NULL);
+	gpio_request(GPIO_FN_VIO0_D2,  NULL);
+	gpio_request(GPIO_FN_VIO0_D1,  NULL);
+	gpio_request(GPIO_FN_VIO0_D0,  NULL);
+	gpio_request(GPIO_FN_VIO0_VD,  NULL);
+	gpio_request(GPIO_FN_VIO0_CLK, NULL);
+	gpio_request(GPIO_FN_VIO0_FLD, NULL);
+	gpio_request(GPIO_FN_VIO0_HD,  NULL);
+	platform_resource_setup_memory(&ceu0_device, "ceu", 4 << 20);
+
+	/* enable CEU1 */
+	gpio_request(GPIO_FN_VIO1_D7,  NULL);
+	gpio_request(GPIO_FN_VIO1_D6,  NULL);
+	gpio_request(GPIO_FN_VIO1_D5,  NULL);
+	gpio_request(GPIO_FN_VIO1_D4,  NULL);
+	gpio_request(GPIO_FN_VIO1_D3,  NULL);
+	gpio_request(GPIO_FN_VIO1_D2,  NULL);
+	gpio_request(GPIO_FN_VIO1_D1,  NULL);
+	gpio_request(GPIO_FN_VIO1_D0,  NULL);
+	gpio_request(GPIO_FN_VIO1_FLD, NULL);
+	gpio_request(GPIO_FN_VIO1_HD,  NULL);
+	gpio_request(GPIO_FN_VIO1_VD,  NULL);
+	gpio_request(GPIO_FN_VIO1_CLK, NULL);
+	platform_resource_setup_memory(&ceu1_device, "ceu", 4 << 20);
+
+	/* KEYSC */
+	gpio_request(GPIO_FN_KEYOUT5_IN5, NULL);
+	gpio_request(GPIO_FN_KEYOUT4_IN6, NULL);
+	gpio_request(GPIO_FN_KEYIN4,      NULL);
+	gpio_request(GPIO_FN_KEYIN3,      NULL);
+	gpio_request(GPIO_FN_KEYIN2,      NULL);
+	gpio_request(GPIO_FN_KEYIN1,      NULL);
+	gpio_request(GPIO_FN_KEYIN0,      NULL);
+	gpio_request(GPIO_FN_KEYOUT3,     NULL);
+	gpio_request(GPIO_FN_KEYOUT2,     NULL);
+	gpio_request(GPIO_FN_KEYOUT1,     NULL);
+	gpio_request(GPIO_FN_KEYOUT0,     NULL);
+
+	if (sw & SW41_B) {
+		/* SVGA */
+		lcdc_info.ch[0].lcd_cfg.xres         = 800;
+		lcdc_info.ch[0].lcd_cfg.yres         = 600;
+		lcdc_info.ch[0].lcd_cfg.left_margin  = 142;
+		lcdc_info.ch[0].lcd_cfg.right_margin = 52;
+		lcdc_info.ch[0].lcd_cfg.hsync_len    = 96;
+		lcdc_info.ch[0].lcd_cfg.upper_margin = 24;
+		lcdc_info.ch[0].lcd_cfg.lower_margin = 2;
+		lcdc_info.ch[0].lcd_cfg.vsync_len    = 2;
+	} else {
+		/* VGA */
+		lcdc_info.ch[0].lcd_cfg.xres         = 640;
+		lcdc_info.ch[0].lcd_cfg.yres         = 480;
+		lcdc_info.ch[0].lcd_cfg.left_margin  = 105;
+		lcdc_info.ch[0].lcd_cfg.right_margin = 50;
+		lcdc_info.ch[0].lcd_cfg.hsync_len    = 96;
+		lcdc_info.ch[0].lcd_cfg.upper_margin = 33;
+		lcdc_info.ch[0].lcd_cfg.lower_margin = 10;
+		lcdc_info.ch[0].lcd_cfg.vsync_len    = 2;
+	}
+
+	if (sw & SW41_A) {
+		/* Digital monitor */
+		lcdc_info.ch[0].interface_type = RGB18;
+		lcdc_info.ch[0].flags          = 0;
+	} else {
+		/* Analog monitor */
+		lcdc_info.ch[0].interface_type = RGB24;
+		lcdc_info.ch[0].flags          = LCDC_FLAGS_DWPOL;
+	}
+
+	return platform_add_devices(ms7724se_devices,
+				ARRAY_SIZE(ms7724se_devices));
+}
+device_initcall(devices_setup);
+
+static struct sh_machine_vector mv_ms7724se __initmv = {
+	.mv_name	= "ms7724se",
+	.mv_init_irq	= init_se7724_IRQ,
+	.mv_nr_irqs	= SE7724_FPGA_IRQ_BASE + SE7724_FPGA_IRQ_NR,
+};
diff --git a/arch/sh/boards/mach-se/Makefile b/arch/sh/boards/mach-se/Makefile
index 2de42ba..b537e23 100644
--- a/arch/sh/boards/mach-se/Makefile
+++ b/arch/sh/boards/mach-se/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_SH_7751_SOLUTION_ENGINE)	+= 7751/
 obj-$(CONFIG_SH_7780_SOLUTION_ENGINE)	+= 7780/
 obj-$(CONFIG_SH_7343_SOLUTION_ENGINE)	+= 7343/
 obj-$(CONFIG_SH_7721_SOLUTION_ENGINE)	+= 7721/
+obj-$(CONFIG_SH_7724_SOLUTION_ENGINE)	+= 7724/
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
new file mode 100644
index 0000000..a029d0fb
--- /dev/null
+++ b/arch/sh/configs/se7724_defconfig
@@ -0,0 +1,1552 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc6
+# Tue May 26 13:18:09 2009
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH32=y
+# CONFIG_SUPERH64 is not set
+CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig"
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_SYS_SUPPORTS_CMT=y
+CONFIG_SYS_SUPPORTS_TMU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_NO_VIRT_TO_BUS=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# System type
+#
+CONFIG_CPU_SH4=y
+CONFIG_CPU_SH4A=y
+CONFIG_CPU_SHX2=y
+CONFIG_ARCH_SHMOBILE=y
+# CONFIG_CPU_SUBTYPE_SH7619 is not set
+# CONFIG_CPU_SUBTYPE_SH7201 is not set
+# CONFIG_CPU_SUBTYPE_SH7203 is not set
+# CONFIG_CPU_SUBTYPE_SH7206 is not set
+# CONFIG_CPU_SUBTYPE_SH7263 is not set
+# CONFIG_CPU_SUBTYPE_MXG is not set
+# CONFIG_CPU_SUBTYPE_SH7705 is not set
+# CONFIG_CPU_SUBTYPE_SH7706 is not set
+# CONFIG_CPU_SUBTYPE_SH7707 is not set
+# CONFIG_CPU_SUBTYPE_SH7708 is not set
+# CONFIG_CPU_SUBTYPE_SH7709 is not set
+# CONFIG_CPU_SUBTYPE_SH7710 is not set
+# CONFIG_CPU_SUBTYPE_SH7712 is not set
+# CONFIG_CPU_SUBTYPE_SH7720 is not set
+# CONFIG_CPU_SUBTYPE_SH7721 is not set
+# CONFIG_CPU_SUBTYPE_SH7750 is not set
+# CONFIG_CPU_SUBTYPE_SH7091 is not set
+# CONFIG_CPU_SUBTYPE_SH7750R is not set
+# CONFIG_CPU_SUBTYPE_SH7750S is not set
+# CONFIG_CPU_SUBTYPE_SH7751 is not set
+# CONFIG_CPU_SUBTYPE_SH7751R is not set
+# CONFIG_CPU_SUBTYPE_SH7760 is not set
+# CONFIG_CPU_SUBTYPE_SH4_202 is not set
+# CONFIG_CPU_SUBTYPE_SH7723 is not set
+CONFIG_CPU_SUBTYPE_SH7724=y
+# CONFIG_CPU_SUBTYPE_SH7763 is not set
+# CONFIG_CPU_SUBTYPE_SH7770 is not set
+# CONFIG_CPU_SUBTYPE_SH7780 is not set
+# CONFIG_CPU_SUBTYPE_SH7785 is not set
+# CONFIG_CPU_SUBTYPE_SH7786 is not set
+# CONFIG_CPU_SUBTYPE_SHX3 is not set
+# CONFIG_CPU_SUBTYPE_SH7343 is not set
+# CONFIG_CPU_SUBTYPE_SH7722 is not set
+# CONFIG_CPU_SUBTYPE_SH7366 is not set
+
+#
+# Memory management options
+#
+CONFIG_QUICKLIST=y
+CONFIG_MMU=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_FORCE_MAX_ZONEORDER=11
+CONFIG_MEMORY_START=0x08000000
+CONFIG_MEMORY_SIZE=0x08000000
+CONFIG_29BIT=y
+# CONFIG_X2TLB is not set
+CONFIG_VSYSCALL=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_MAX_ACTIVE_REGIONS=1
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_PAGE_SIZE_4KB=y
+# CONFIG_PAGE_SIZE_8KB is not set
+# CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Cache configuration
+#
+CONFIG_CACHE_WRITEBACK=y
+# CONFIG_CACHE_WRITETHROUGH is not set
+# CONFIG_CACHE_OFF is not set
+
+#
+# Processor features
+#
+CONFIG_CPU_LITTLE_ENDIAN=y
+# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH_STORE_QUEUES is not set
+CONFIG_CPU_HAS_INTEVT=y
+CONFIG_CPU_HAS_SR_RB=y
+CONFIG_CPU_HAS_PTEA=y
+CONFIG_CPU_HAS_FPU=y
+
+#
+# Board support
+#
+CONFIG_SOLUTION_ENGINE=y
+CONFIG_SH_7724_SOLUTION_ENGINE=y
+
+#
+# Timer and clock configuration
+#
+CONFIG_SH_TIMER_TMU=y
+# CONFIG_SH_TIMER_CMT is not set
+CONFIG_SH_PCLK_FREQ=41666666
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# DMA support
+#
+# CONFIG_SH_DMA is not set
+
+#
+# Companion Chips
+#
+
+#
+# Additional SuperH Device Drivers
+#
+CONFIG_HEARTBEAT=y
+# CONFIG_PUSH_SWITCH is not set
+
+#
+# Kernel features
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+# CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
+CONFIG_SECCOMP=y
+# CONFIG_PREEMPT_NONE is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREEMPT=y
+CONFIG_GUSA=y
+
+#
+# Boot options
+#
+CONFIG_ZERO_PAGE_OFFSET=0x00001000
+CONFIG_BOOT_LINK_OFFSET=0x00800000
+CONFIG_ENTRY_OFFSET=0x00001000
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=tty1 console=ttySC3,115200 root=/dev/nfs ip=dhcp memchunk.vpu=4m"
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options (EXPERIMENTAL)
+#
+# CONFIG_PM is not set
+# CONFIG_CPU_IDLE is not set
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_ASK_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_MULTIPLE_TABLES is not set
+# CONFIG_IP_ROUTE_MULTIPATH is not set
+# CONFIG_IP_ROUTE_VERBOSE is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_WIRELESS=y
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ALAUDA is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTD_UBI_BEB_RESERVE=1
+# CONFIG_MTD_UBI_GLUEBI is not set
+
+#
+# UBI debugging options
+#
+# CONFIG_MTD_UBI_DEBUG is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=4
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+CONFIG_MDIO_BITBANG=y
+# CONFIG_MDIO_GPIO is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_STNIC is not set
+CONFIG_SMC91X=y
+# CONFIG_ENC28J60 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+CONFIG_KEYBOARD_SH_KEYSC=y
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_NR_UARTS=6
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_SH_MOBILE=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_BITBANG=y
+# CONFIG_SPI_GPIO is not set
+# CONFIG_SPI_SH_SCI is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+CONFIG_VIDEO_DEV=y
+CONFIG_VIDEO_V4L2_COMMON=y
+# CONFIG_VIDEO_ALLOW_V4L1 is not set
+CONFIG_VIDEO_V4L1_COMPAT=y
+# CONFIG_DVB_CORE is not set
+CONFIG_VIDEO_MEDIA=y
+
+#
+# Multimedia drivers
+#
+# CONFIG_MEDIA_ATTACH is not set
+CONFIG_MEDIA_TUNER=y
+# CONFIG_MEDIA_TUNER_CUSTOMISE is not set
+CONFIG_MEDIA_TUNER_SIMPLE=y
+CONFIG_MEDIA_TUNER_TDA8290=y
+CONFIG_MEDIA_TUNER_TDA9887=y
+CONFIG_MEDIA_TUNER_TEA5761=y
+CONFIG_MEDIA_TUNER_TEA5767=y
+CONFIG_MEDIA_TUNER_MT20XX=y
+CONFIG_MEDIA_TUNER_XC2028=y
+CONFIG_MEDIA_TUNER_XC5000=y
+CONFIG_MEDIA_TUNER_MC44S803=y
+CONFIG_VIDEO_V4L2=y
+CONFIG_VIDEOBUF_GEN=y
+CONFIG_VIDEOBUF_DMA_CONTIG=y
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+# CONFIG_VIDEO_ADV_DEBUG is not set
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
+# CONFIG_VIDEO_VIVI is not set
+# CONFIG_VIDEO_SAA5246A is not set
+# CONFIG_VIDEO_SAA5249 is not set
+CONFIG_SOC_CAMERA=y
+# CONFIG_SOC_CAMERA_MT9M001 is not set
+# CONFIG_SOC_CAMERA_MT9M111 is not set
+# CONFIG_SOC_CAMERA_MT9T031 is not set
+# CONFIG_SOC_CAMERA_MT9V022 is not set
+# CONFIG_SOC_CAMERA_TW9910 is not set
+# CONFIG_SOC_CAMERA_PLATFORM is not set
+CONFIG_SOC_CAMERA_OV772X=y
+CONFIG_VIDEO_SH_MOBILE_CEU=y
+CONFIG_V4L_USB_DRIVERS=y
+# CONFIG_USB_VIDEO_CLASS is not set
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+CONFIG_USB_GSPCA=m
+# CONFIG_USB_M5602 is not set
+# CONFIG_USB_STV06XX is not set
+# CONFIG_USB_GSPCA_CONEX is not set
+# CONFIG_USB_GSPCA_ETOMS is not set
+# CONFIG_USB_GSPCA_FINEPIX is not set
+# CONFIG_USB_GSPCA_MARS is not set
+# CONFIG_USB_GSPCA_MR97310A is not set
+# CONFIG_USB_GSPCA_OV519 is not set
+# CONFIG_USB_GSPCA_OV534 is not set
+# CONFIG_USB_GSPCA_PAC207 is not set
+# CONFIG_USB_GSPCA_PAC7311 is not set
+# CONFIG_USB_GSPCA_SONIXB is not set
+# CONFIG_USB_GSPCA_SONIXJ is not set
+# CONFIG_USB_GSPCA_SPCA500 is not set
+# CONFIG_USB_GSPCA_SPCA501 is not set
+# CONFIG_USB_GSPCA_SPCA505 is not set
+# CONFIG_USB_GSPCA_SPCA506 is not set
+# CONFIG_USB_GSPCA_SPCA508 is not set
+# CONFIG_USB_GSPCA_SPCA561 is not set
+# CONFIG_USB_GSPCA_SQ905 is not set
+# CONFIG_USB_GSPCA_SQ905C is not set
+# CONFIG_USB_GSPCA_STK014 is not set
+# CONFIG_USB_GSPCA_SUNPLUS is not set
+# CONFIG_USB_GSPCA_T613 is not set
+# CONFIG_USB_GSPCA_TV8532 is not set
+# CONFIG_USB_GSPCA_VC032X is not set
+# CONFIG_USB_GSPCA_ZC3XX is not set
+# CONFIG_VIDEO_PVRUSB2 is not set
+# CONFIG_VIDEO_HDPVR is not set
+# CONFIG_VIDEO_EM28XX is not set
+# CONFIG_VIDEO_CX231XX is not set
+# CONFIG_VIDEO_USBVISION is not set
+# CONFIG_USB_ET61X251 is not set
+# CONFIG_USB_SN9C102 is not set
+# CONFIG_USB_ZC0301 is not set
+CONFIG_USB_PWC_INPUT_EVDEV=y
+# CONFIG_USB_ZR364XX is not set
+# CONFIG_USB_STKWEBCAM is not set
+# CONFIG_USB_S2255 is not set
+# CONFIG_RADIO_ADAPTERS is not set
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+# CONFIG_FB_CFB_FILLRECT is not set
+# CONFIG_FB_CFB_COPYAREA is not set
+# CONFIG_FB_CFB_IMAGEBLIT is not set
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+CONFIG_FB_SYS_FILLRECT=y
+CONFIG_FB_SYS_COPYAREA=y
+CONFIG_FB_SYS_IMAGEBLIT=y
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+CONFIG_FB_SYS_FOPS=y
+CONFIG_FB_DEFERRED_IO=y
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+CONFIG_FB_SH_MOBILE_LCDC=y
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+# CONFIG_LOGO_SUPERH_MONO is not set
+# CONFIG_LOGO_SUPERH_VGA16 is not set
+CONFIG_LOGO_SUPERH_CLUT224=y
+# CONFIG_SOUND is not set
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+# CONFIG_HID_PID is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# Special HID drivers
+#
+# CONFIG_HID_A4TECH is not set
+# CONFIG_HID_APPLE is not set
+# CONFIG_HID_BELKIN is not set
+# CONFIG_HID_CHERRY is not set
+# CONFIG_HID_CHICONY is not set
+# CONFIG_HID_CYPRESS is not set
+# CONFIG_DRAGONRISE_FF is not set
+# CONFIG_HID_EZKEY is not set
+# CONFIG_HID_KYE is not set
+# CONFIG_HID_GYRATION is not set
+# CONFIG_HID_KENSINGTON is not set
+# CONFIG_HID_LOGITECH is not set
+# CONFIG_HID_MICROSOFT is not set
+# CONFIG_HID_MONTEREY is not set
+# CONFIG_HID_NTRIG is not set
+# CONFIG_HID_PANTHERLORD is not set
+# CONFIG_HID_PETALYNX is not set
+# CONFIG_HID_SAMSUNG is not set
+# CONFIG_HID_SONY is not set
+# CONFIG_HID_SUNPLUS is not set
+# CONFIG_GREENASIA_FF is not set
+# CONFIG_HID_TOPSEED is not set
+# CONFIG_THRUSTMASTER_FF is not set
+# CONFIG_ZEROPLUS_FF is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_ARCH_HAS_HCD=y
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+CONFIG_USB_DEVICE_CLASS=y
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_OXU210HP_HCD is not set
+# CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_ISP1760_HCD is not set
+# CONFIG_USB_SL811_HCD is not set
+CONFIG_USB_R8A66597_HCD=y
+# CONFIG_USB_HWA_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_BERRY_CHARGE is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
+# CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_NOP_USB_XCEIV is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_SDHCI is not set
+CONFIG_MMC_SPI=y
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+CONFIG_RTC_DRV_PCF8563=y
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_DS1390 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_R9701 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+# CONFIG_RTC_DRV_DS3234 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_RTC_DRV_SH is not set
+# CONFIG_RTC_DRV_GENERIC is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_EXT4_FS is not set
+CONFIG_JBD=y
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_UBIFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+CONFIG_NLS_CODEPAGE_932=y
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_KERNEL is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_SH_STANDARD_BIOS is not set
+# CONFIG_EARLY_SCIF_CONSOLE is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
+CONFIG_CRC_ITU_T=y
+CONFIG_CRC32=y
+CONFIG_CRC7=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/sh/include/mach-se/mach/se7724.h b/arch/sh/include/mach-se/mach/se7724.h
new file mode 100644
index 0000000..74164b6
--- /dev/null
+++ b/arch/sh/include/mach-se/mach/se7724.h
@@ -0,0 +1,67 @@
+#ifndef __ASM_SH_SE7724_H
+#define __ASM_SH_SE7724_H
+
+/*
+ * linux/include/asm-sh/se7724.h
+ *
+ * Copyright (C) 2009 Renesas Solutions Corp.
+ *
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Hitachi UL SolutionEngine 7724 Support.
+ *
+ * Based on se7722.h
+ * Copyright (C) 2007  Nobuhiro Iwamatsu
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#include <asm/addrspace.h>
+
+#define PA_LED		(0xba203000)	/* 8bit LED */
+#define IRQ_MODE	(0xba200010)
+#define IRQ0_SR		(0xba200014)
+#define IRQ1_SR		(0xba200018)
+#define IRQ2_SR		(0xba20001c)
+#define IRQ0_MR		(0xba200020)
+#define IRQ1_MR		(0xba200024)
+#define IRQ2_MR		(0xba200028)
+
+/* IRQ */
+#define IRQ0_IRQ        32
+#define IRQ1_IRQ        33
+#define IRQ2_IRQ        34
+
+/* Bits in IRQ012 registers */
+#define SE7724_FPGA_IRQ_BASE	220
+
+/* IRQ0 */
+#define IRQ0_BASE	SE7724_FPGA_IRQ_BASE
+#define IRQ0_KEY	(IRQ0_BASE + 12)
+#define IRQ0_RMII	(IRQ0_BASE + 13)
+#define IRQ0_SMC	(IRQ0_BASE + 14)
+#define IRQ0_MASK	0x7fff
+#define IRQ0_END	IRQ0_SMC
+/* IRQ1 */
+#define IRQ1_BASE	(IRQ0_END + 1)
+#define IRQ1_TS		(IRQ1_BASE + 0)
+#define IRQ1_MASK	0x0001
+#define IRQ1_END	IRQ1_TS
+/* IRQ2 */
+#define IRQ2_BASE	(IRQ1_END + 1)
+#define IRQ2_USB0	(IRQ1_BASE + 0)
+#define IRQ2_USB1	(IRQ1_BASE + 1)
+#define IRQ2_MASK	0x0003
+#define IRQ2_END	IRQ2_USB1
+
+#define SE7724_FPGA_IRQ_NR	(IRQ2_END - IRQ0_BASE)
+
+/* arch/sh/boards/se/7724/irq.c */
+void init_se7724_IRQ(void);
+
+#define __IO_PREFIX		se7724
+#include <asm/io_generic.h>
+
+#endif  /* __ASM_SH_SE7724_H */
-- 
cgit v0.10.2


From 5b25ab29bad3114f798b136b4147f255a5d5742f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 May 2009 17:02:18 +0900
Subject: sh: Record ms7724se in mach-types.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types
index 8477b5d..fec3a53 100644
--- a/arch/sh/tools/mach-types
+++ b/arch/sh/tools/mach-types
@@ -23,6 +23,7 @@ HD64461			HD64461
 7619SE			SH_7619_SOLUTION_ENGINE
 7721SE			SH_7721_SOLUTION_ENGINE
 7722SE			SH_7722_SOLUTION_ENGINE
+7724SE			SH_7724_SOLUTION_ENGINE
 7751SE			SH_7751_SOLUTION_ENGINE
 7780SE			SH_7780_SOLUTION_ENGINE
 7751SYSTEMH		SH_7751_SYSTEMH
-- 
cgit v0.10.2


From 69aa48ab82e17299efe2be6c21795945731a6c17 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 09:02:27 +0200
Subject: perf record: Straighten out argv types

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 1b19f18..f225efa 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -191,7 +191,7 @@ static void display_help(void)
 	exit(0);
 }
 
-static void process_options(int argc, const char *argv[])
+static void process_options(int argc, char * const argv[])
 {
 	int error = 0, counter;
 
@@ -538,7 +538,7 @@ static void open_counters(int cpu, pid_t pid)
 	nr_cpu++;
 }
 
-int cmd_record(int argc, const char **argv)
+int cmd_record(int argc, char * const argv[])
 {
 	int i, counter;
 	pid_t pid;
-- 
cgit v0.10.2


From 4e97ddf09ee3ce715fc334399bae4cc0c0a13057 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 10:07:44 +0200
Subject: perf stat: Remove unused variable

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 88c70be..c1053d8 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -541,8 +541,6 @@ static void skip_signal(int signo)
 
 int cmd_stat(int argc, char **argv, const char *prefix)
 {
-	sigset_t blocked;
-
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	process_options(argc, argv);
-- 
cgit v0.10.2


From c9904dd15922f349b5f06839e34b1723d4a75940 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 25 May 2009 08:10:19 +0000
Subject: sh: add pll_clk to sh7785

This patch converts the sh7785 pll implementation from the
all-in-one code in frqmr_recalc() and frqmr_build_rate_table()
to a separate struct clk. This allows us to remove the processor
specific multiplier and use generic rate table functions.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index cf042b5..7021ab0 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -56,12 +56,7 @@ static unsigned long frqmr_recalc(struct clk *clk)
 
 	idx = (__raw_readl(FRQMR1) >> data->shift) & 0x000f;
 
-	/*
-	 * XXX: PLL1 multiplier is locked for the default clock mode,
-	 * when mode pin detection and configuration support is added,
-	 * select the multiplier dynamically.
-	 */
-	return clk->parent->rate * 36 / div2[idx];
+	return clk->parent->rate / div2[idx];
 }
 
 static void frqmr_build_rate_table(struct clk *clk)
@@ -75,7 +70,7 @@ static void frqmr_build_rate_table(struct clk *clk)
 
 		data->freq_table[entry].index = entry;
 		data->freq_table[entry].frequency =
-			clk->parent->rate * 36 / div2[i];
+			clk->parent->rate / div2[i];
 
 		entry++;
 	}
@@ -136,6 +131,20 @@ static struct clk_ops frqmr_clk_ops = {
 	.round_rate		= frqmr_round_rate,
 };
 
+static unsigned long pll_recalc(struct clk *clk)
+{
+	/*
+	 * XXX: PLL1 multiplier is locked for the default clock mode,
+	 * when mode pin detection and configuration support is added,
+	 * select the multiplier dynamically.
+	 */
+	return clk->parent->rate * 36;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
 /*
  * Default rate for the root input clock, reset this with clk_set_rate()
  * from the platform code.
@@ -146,11 +155,19 @@ static struct clk extal_clk = {
 	.rate		= 33333333,
 };
 
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.parent		= &extal_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
 static struct clk cpu_clk = {
 	.name		= "cpu_clk",		/* Ick */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 	.priv		= &ifc_data,
 };
@@ -159,7 +176,7 @@ static struct clk shyway_clk = {
 	.name		= "shyway_clk",		/* SHck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 	.priv		= &sfc_data,
 };
@@ -168,7 +185,7 @@ static struct clk peripheral_clk = {
 	.name		= "peripheral_clk",	/* Pck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 	.priv		= &pfc_data,
 };
@@ -177,7 +194,7 @@ static struct clk ddr_clk = {
 	.name		= "ddr_clk",		/* DDRck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 	.priv		= &mfc_data,
 };
@@ -186,7 +203,7 @@ static struct clk bus_clk = {
 	.name		= "bus_clk",		/* Bck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 	.priv		= &bfc_data,
 };
@@ -195,7 +212,7 @@ static struct clk ga_clk = {
 	.name		= "ga_clk",		/* GAck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.priv		= &s2fc_data,
 };
 
@@ -203,7 +220,7 @@ static struct clk du_clk = {
 	.name		= "du_clk",		/* DUck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.priv		= &s3fc_data,
 };
 
@@ -211,13 +228,14 @@ static struct clk umem_clk = {
 	.name		= "umem_clk",		/* uck */
 	.id		= -1,
 	.ops		= &frqmr_clk_ops,
-	.parent		= &extal_clk,
+	.parent		= &pll_clk,
 	.flags		= CLK_ENABLE_ON_INIT,
 	.priv		= &ufc_data,
 };
 
 static struct clk *clks[] = {
 	&extal_clk,
+	&pll_clk,
 	&cpu_clk,
 	&shyway_clk,
 	&peripheral_clk,
-- 
cgit v0.10.2


From c94a85746f7bdc13035acdf88c130d7b6fa41bde Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 25 May 2009 08:10:28 +0000
Subject: sh: add shared clock framework frequency table code

Add SuperH-specific clock framework helper functions:
- clk_rate_table_build() - build cpufreq table from divisors/multipliers
- clk_rate_table_round() - use cpufreq table to find matching frequency

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 64c93cb..60d5d2b 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -100,4 +100,22 @@ enum clk_sh_algo_id {
 	IP_N1,
 };
 
+struct clk_div_mult_table {
+	unsigned int *divisors;
+	unsigned int nr_divisors;
+	unsigned int *multipliers;
+	unsigned int nr_multipliers;
+};
+
+struct cpufreq_frequency_table;
+void clk_rate_table_build(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  int nr_freqs,
+			  struct clk_div_mult_table *src_table,
+			  unsigned long *bitmap);
+
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate);
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 012d234..59764d6 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -29,6 +29,7 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/debugfs.h>
+#include <linux/cpufreq.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
 
@@ -36,6 +37,80 @@ static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
 static DEFINE_MUTEX(clock_list_sem);
 
+void clk_rate_table_build(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  int nr_freqs,
+			  struct clk_div_mult_table *src_table,
+			  unsigned long *bitmap)
+{
+	unsigned long mult, div;
+	unsigned long freq;
+	int i;
+
+	for (i = 0; i < nr_freqs; i++) {
+		div = 1;
+		mult = 1;
+
+		if (src_table->divisors && i < src_table->nr_divisors)
+			div = src_table->divisors[i];
+
+		if (src_table->multipliers && i < src_table->nr_multipliers)
+			mult = src_table->multipliers[i];
+
+		if (!div || !mult || (bitmap && !test_bit(i, bitmap)))
+			freq = CPUFREQ_ENTRY_INVALID;
+		else
+			freq = clk->parent->rate * mult / div;
+
+		freq_table[i].index = i;
+		freq_table[i].frequency = freq;
+	}
+
+	/* Termination entry */
+	freq_table[i].index = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
+}
+
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate)
+{
+	unsigned long rate_error, rate_error_prev = ~0UL;
+	unsigned long rate_best_fit = rate;
+	unsigned long highest, lowest;
+	int i;
+
+	highest = lowest = 0;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = freq_table[i].frequency;
+
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq > highest)
+			highest = freq;
+		if (freq < lowest)
+			lowest = freq;
+
+		rate_error = abs(freq - rate);
+		if (rate_error < rate_error_prev) {
+			rate_best_fit = freq;
+			rate_error_prev = rate_error;
+		}
+
+		if (rate_error == 0)
+			break;
+	}
+
+	if (rate >= highest)
+		rate_best_fit = highest;
+	if (rate <= lowest)
+		rate_best_fit = lowest;
+
+	return rate_best_fit;
+}
+
 /* Used for clocks that always have same value as the parent clock */
 unsigned long followparent_recalc(struct clk *clk)
 {
-- 
cgit v0.10.2


From df109e630f82de63ec82eebbfec2a57852517f28 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 25 May 2009 08:10:36 +0000
Subject: sh: use shared frequency tables on sh7785

This patch converts the sh7785 clock code to make use
of clk_rate_table_build() and clk_rate_table_round().
The ->build_rate_table() callback is removed, the
table building is instead handled in ->recalc().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 7021ab0..a4a9bcb 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -19,6 +19,12 @@
 
 static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
 			       24, 32, 36, 48 };
+
+static struct clk_div_mult_table cpg_div = {
+	.divisors = div2,
+	.nr_divisors = ARRAY_SIZE(div2),
+};
+
 struct clk_priv {
 	unsigned int			shift;
 
@@ -52,82 +58,23 @@ FRQMR_CLK_DATA(ifc, 28, 0x000e);
 static unsigned long frqmr_recalc(struct clk *clk)
 {
 	struct clk_priv *data = clk->priv;
-	unsigned int idx;
-
-	idx = (__raw_readl(FRQMR1) >> data->shift) & 0x000f;
-
-	return clk->parent->rate / div2[idx];
-}
-
-static void frqmr_build_rate_table(struct clk *clk)
-{
-	struct clk_priv *data = clk->priv;
-	int i, entry;
-
-	for (i = entry = 0; i < ARRAY_SIZE(div2); i++) {
-		if ((data->div_bitmap & (1 << i)) == 0)
-			continue;
+	unsigned int idx = (__raw_readl(FRQMR1) >> data->shift) & 0x000f;
 
-		data->freq_table[entry].index = entry;
-		data->freq_table[entry].frequency =
-			clk->parent->rate / div2[i];
-
-		entry++;
-	}
-
-	if (entry == 0) {
-		pr_warning("clkfwk: failed to build frequency table "
-			   "for \"%s\" clk!\n", clk->name);
-		return;
-	}
-
-	/* Termination entry */
-	data->freq_table[entry].index = entry;
-	data->freq_table[entry].frequency = CPUFREQ_TABLE_END;
+	clk_rate_table_build(clk, data->freq_table, ARRAY_SIZE(div2),
+			     &cpg_div, &data->div_bitmap);
+	
+	return data->freq_table[idx].frequency;
 }
 
 static long frqmr_round_rate(struct clk *clk, unsigned long rate)
 {
 	struct clk_priv *data = clk->priv;
-	unsigned long rate_error, rate_error_prev = ~0UL;
-	unsigned long rate_best_fit = rate;
-	unsigned long highest, lowest;
-	int i;
-
-	highest = lowest = 0;
-
-	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		unsigned long freq = data->freq_table[i].frequency;
-
-		if (freq == CPUFREQ_ENTRY_INVALID)
-			continue;
-
-		if (freq > highest)
-			highest = freq;
-		if (freq < lowest)
-			lowest = freq;
-
-		rate_error = abs(freq - rate);
-		if (rate_error < rate_error_prev) {
-			rate_best_fit = freq;
-			rate_error_prev = rate_error;
-		}
-
-		if (rate_error == 0)
-			break;
-	}
-
-	if (rate >= highest)
-		rate_best_fit = highest;
-	if (rate <= lowest)
-		rate_best_fit = lowest;
 
-	return rate_best_fit;
+	return clk_rate_table_round(clk, data->freq_table, rate);
 }
 
 static struct clk_ops frqmr_clk_ops = {
 	.recalc			= frqmr_recalc,
-	.build_rate_table	= frqmr_build_rate_table,
 	.round_rate		= frqmr_round_rate,
 };
 
-- 
cgit v0.10.2


From 61ce5393e4c8914c46ec99cbda76823515109709 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 25 May 2009 08:10:45 +0000
Subject: sh: remove clk_ops->build_rate_table()

This patch removes the ->build_rate_table() callback,
->recalc() may instead be used for this purpose.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 60d5d2b..aa9480d 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -16,7 +16,6 @@ struct clk_ops {
 	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
 	int (*set_parent)(struct clk *clk, struct clk *parent);
 	long (*round_rate)(struct clk *clk, unsigned long rate);
-	void (*build_rate_table)(struct clk *clk);
 };
 
 struct clk {
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 59764d6..aa0fd08 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -138,8 +138,6 @@ void propagate_rate(struct clk *tclk)
 	list_for_each_entry(clkp, &tclk->children, sibling) {
 		if (clkp->ops && clkp->ops->recalc)
 			clkp->rate = clkp->ops->recalc(clkp);
-		if (clkp->ops && clkp->ops->build_rate_table)
-			clkp->ops->build_rate_table(clkp);
 
 		propagate_rate(clkp);
 	}
-- 
cgit v0.10.2


From f9e2b97dc2cf832ccceea2a2e6eccc4bc9af72cf Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 May 2009 17:12:20 +0900
Subject: sh: Add a KBUILD_DEFCONFIG for sh64.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index a47fbd2..0fe35cf 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -82,7 +82,6 @@ defaultimage-$(CONFIG_SH_7206_SOLUTION_ENGINE)	:= vmlinux
 defaultimage-$(CONFIG_SH_7619_SOLUTION_ENGINE)	:= vmlinux
 
 # Set some sensible Kbuild defaults
-KBUILD_DEFCONFIG	:= shx3_defconfig
 KBUILD_IMAGE		:= $(defaultimage-y)
 
 #
@@ -90,15 +89,17 @@ KBUILD_IMAGE		:= $(defaultimage-y)
 # error messages during linking.
 #
 ifdef CONFIG_SUPERH32
-UTS_MACHINE	:= sh
-BITS		:= 32
-LDFLAGS_vmlinux	+= -e _stext
+UTS_MACHINE		:= sh
+BITS			:= 32
+LDFLAGS_vmlinux		+= -e _stext
+KBUILD_DEFCONFIG	:= shx3_defconfig
 else
-UTS_MACHINE	:= sh64
-BITS		:= 64
-LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
-		   --defsym phys_stext_shmedia=phys_stext+1 \
-		   -e phys_stext_shmedia
+UTS_MACHINE		:= sh64
+BITS			:= 64
+LDFLAGS_vmlinux		+= --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \
+			   --defsym phys_stext_shmedia=phys_stext+1 \
+			   -e phys_stext_shmedia
+KBUILD_DEFCONFIG	:= cayman_defconfig
 endif
 
 ifneq ($(SUBARCH),$(ARCH))
-- 
cgit v0.10.2


From 0e9b20b8a1cab6c6ab4f98f917a2d98783103969 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 09:17:18 +0200
Subject: perf record: Convert to Git option parsing

Remove getopt usage and use Git's much more advanced and more compact
command option library.

Git's library (util/parse-options.[ch]) constructs help texts and
error messages automatically, and has a number of other convenience
features as well.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index f225efa..f12a782 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -2,6 +2,8 @@
 
 #include "perf.h"
 #include "util/util.h"
+#include "util/parse-options.h"
+#include "util/exec_cmd.h"
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -11,7 +13,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
-#include <getopt.h>
 #include <assert.h>
 #include <fcntl.h>
 #include <stdio.h>
@@ -33,8 +34,8 @@
 
 
-#define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
-#define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
+#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
 static int			nr_counters			=  0;
 static __u64			event_id[MAX_COUNTERS]		= { };
@@ -45,7 +46,7 @@ static int			nr_cpus				=  0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			= 16;
 static int			output;
-static char 			*output_name			= "output.perf";
+static const char		*output_name			= "output.perf";
 static int			group				= 0;
 static unsigned int		realtime_prio			= 0;
 static int			system_wide			= 0;
@@ -62,192 +63,6 @@ const unsigned int default_count[] = {
 	  10000,
 };
 
-struct event_symbol {
-	__u64 event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
-};
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static __u64 match_event_symbols(char *str)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-
-	if (sscanf(str, "r%llx", &config) == 1)
-		return config | PERF_COUNTER_RAW_MASK;
-
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return ~0ULL;
-}
-
-static int parse_events(char *str)
-{
-	__u64 config;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
-
-	event_id[nr_counters] = config;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-static void display_events_help(void)
-{
-	unsigned int i;
-	__u64 e;
-
-	printf(
-	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
-
-		printf("\n                             %d:%d: %-20s",
-				type, id, event_symbols[i].symbol);
-	}
-
-	printf("\n"
-	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
-}
-
-static void display_help(void)
-{
-	printf(
-	"Usage: perf-record [<options>] <cmd>\n"
-	"perf-record Options (up to %d event types can be specified at once):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -c CNT    --count=CNT          # event period to sample\n"
-	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
-	" -o file   --output=<file>      # output file\n"
-	" -p pid    --pid=<pid>		 # record events on existing pid\n"
-	" -r prio   --realtime=<prio>    # use RT prio\n"
-	" -s        --system             # system wide profiling\n"
-	);
-
-	exit(0);
-}
-
-static void process_options(int argc, char * const argv[])
-{
-	int error = 0, counter;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"count",	required_argument,	NULL, 'c'},
-			{"event",	required_argument,	NULL, 'e'},
-			{"mmap_pages",	required_argument,	NULL, 'm'},
-			{"output",	required_argument,	NULL, 'o'},
-			{"pid",		required_argument,	NULL, 'p'},
-			{"realtime",	required_argument,	NULL, 'r'},
-			{"system",	no_argument,		NULL, 's'},
-			{"inherit",	no_argument,		NULL, 'i'},
-			{"nmi",		no_argument,		NULL, 'n'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:c:e:m:o:p:r:sin",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'c': default_interval		=   atoi(optarg); break;
-		case 'e': error				= parse_events(optarg); break;
-		case 'm': mmap_pages			=   atoi(optarg); break;
-		case 'o': output_name			= strdup(optarg); break;
-		case 'p': target_pid			=   atoi(optarg); break;
-		case 'r': realtime_prio			=   atoi(optarg); break;
-		case 's': system_wide                   ^=             1; break;
-		case 'i': inherit			^=	       1; break;
-		case 'n': nmi				^=	       1; break;
-		default: error = 1; break;
-		}
-	}
-
-	if (argc - optind == 0 && target_pid == -1)
-		error = 1;
-
-	if (error)
-		display_help();
-
-	if (!nr_counters) {
-		nr_counters = 1;
-		event_id[0] = 0;
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
-
-		event_count[counter] = default_interval;
-	}
-}
-
 struct mmap_data {
 	int counter;
 	void *base;
@@ -538,16 +353,13 @@ static void open_counters(int cpu, pid_t pid)
 	nr_cpu++;
 }
 
-int cmd_record(int argc, char * const argv[])
+static int __cmd_record(int argc, const char **argv)
 {
 	int i, counter;
 	pid_t pid;
 	int ret;
 
 	page_size = sysconf(_SC_PAGE_SIZE);
-
-	process_options(argc, argv);
-
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
@@ -558,9 +370,6 @@ int cmd_record(int argc, char * const argv[])
 		exit(-1);
 	}
 
-	argc -= optind;
-	argv += optind;
-
 	if (!system_wide) {
 		open_counters(-1, target_pid != -1 ? target_pid : 0);
 	} else for (i = 0; i < nr_cpus; i++)
@@ -575,7 +384,7 @@ int cmd_record(int argc, char * const argv[])
 			perror("failed to fork");
 
 		if (!pid) {
-			if (execvp(argv[0], argv)) {
+			if (execvp(argv[0], (char **)argv)) {
 				perror(argv[0]);
 				exit(-1);
 			}
@@ -610,3 +419,170 @@ int cmd_record(int argc, char * const argv[])
 
 	return 0;
 }
+
+struct event_symbol {
+	__u64 event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+};
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static __u64 match_event_symbols(const char *str)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return ~0ULL;
+}
+
+static int parse_events(const struct option *opt, const char *str, int unset)
+{
+	__u64 config;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
+
+	event_id[nr_counters] = config;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+static char events_help[100000];
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+
+
+static void create_events_help(void)
+{
+	unsigned int i;
+	char *str;
+	__u64 e;
+
+	str = events_help;
+
+	str += sprintf(str,
+	"event name: [");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		if (i)
+			str += sprintf(str, "|");
+
+		str += sprintf(str, "%s",
+				event_symbols[i].symbol);
+	}
+
+	str += sprintf(str, "|rNNN]");
+}
+
+static const char * const record_usage[] = {
+	"perf record [<options>] <command>",
+	NULL
+};
+
+const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     events_help, parse_events),
+	OPT_INTEGER('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
+	OPT_STRING('o', "output", &output_name, "file",
+		    "output file name"),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "record events on existing pid"),
+	OPT_INTEGER('r', "realtime", &realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_END()
+};
+
+int cmd_record(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	create_events_help();
+
+	argc = parse_options(argc, argv, options, record_usage, 0);
+	if (!argc)
+		usage_with_options(record_usage, options);
+
+	if (!nr_counters) {
+		nr_counters = 1;
+		event_id[0] = 0;
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		event_count[counter] = default_interval;
+	}
+
+	return __cmd_record(argc, argv);
+}
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 4bed265..626b3207 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -42,13 +42,16 @@
   * Released under the GPL v2. (and only v2, not any later version)
   */
 
+
 #include "perf.h"
 #include "util/util.h"
 
 #include <getopt.h>
 #include <assert.h>
 #include <fcntl.h>
+
 #include <stdio.h>
+
 #include <errno.h>
 #include <time.h>
 #include <sched.h>
-- 
cgit v0.10.2


From 8ad8db3788fd9a449941fb2392ca85af4ee1cde1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 11:10:09 +0200
Subject: perf_counter tools: Librarize event string parsing

Extract the event string parser from builtin-record.c, and
librarize it - to be reused in other commands.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 481e4c2..45daa72 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -290,6 +290,7 @@ LIB_H += ../../include/linux/perf_counter.h
 LIB_H += perf.h
 LIB_H += util/levenshtein.h
 LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
 LIB_H += util/quote.h
 LIB_H += util/util.h
 LIB_H += util/help.h
@@ -304,6 +305,7 @@ LIB_OBJS += util/exec_cmd.o
 LIB_OBJS += util/help.o
 LIB_OBJS += util/levenshtein.o
 LIB_OBJS += util/parse-options.o
+LIB_OBJS += util/parse-events.o
 LIB_OBJS += util/path.o
 LIB_OBJS += util/run-command.o
 LIB_OBJS += util/quote.o
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index f12a782..6fa6ed6 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -3,44 +3,17 @@
 #include "perf.h"
 #include "util/util.h"
 #include "util/parse-options.h"
+#include "util/parse-events.h"
 #include "util/exec_cmd.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <time.h>
 #include <sched.h>
-#include <pthread.h>
-
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <sys/mman.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-
 
 #define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
-static int			nr_counters			=  0;
-static __u64			event_id[MAX_COUNTERS]		= { };
 static int			default_interval = 100000;
 static int			event_count[MAX_COUNTERS];
+
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 static int			nr_cpus				=  0;
 static unsigned int		page_size;
@@ -420,131 +393,16 @@ static int __cmd_record(int argc, const char **argv)
 	return 0;
 }
 
-struct event_symbol {
-	__u64 event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
-};
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static __u64 match_event_symbols(const char *str)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-
-	if (sscanf(str, "r%llx", &config) == 1)
-		return config | PERF_COUNTER_RAW_MASK;
-
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return ~0ULL;
-}
-
-static int parse_events(const struct option *opt, const char *str, int unset)
-{
-	__u64 config;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
-
-	event_id[nr_counters] = config;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
-static char events_help[100000];
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-
-
-static void create_events_help(void)
-{
-	unsigned int i;
-	char *str;
-	__u64 e;
-
-	str = events_help;
-
-	str += sprintf(str,
-	"event name: [");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
-
-		if (i)
-			str += sprintf(str, "|");
-
-		str += sprintf(str, "%s",
-				event_symbols[i].symbol);
-	}
-
-	str += sprintf(str, "|rNNN]");
-}
-
 static const char * const record_usage[] = {
 	"perf record [<options>] <command>",
 	NULL
 };
 
+static char events_help_msg[EVENTS_HELP_MAX];
+
 const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
-		     events_help, parse_events),
+		     events_help_msg, parse_events),
 	OPT_INTEGER('c', "count", &default_interval,
 		    "event period to sample"),
 	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
@@ -566,7 +424,7 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 {
 	int counter;
 
-	create_events_help();
+	create_events_help(events_help_msg);
 
 	argc = parse_options(argc, argv, options, record_usage, 0);
 	if (!argc)
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
new file mode 100644
index 0000000..77d0917
--- /dev/null
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -0,0 +1,127 @@
+
+#include "../perf.h"
+#include "util.h"
+#include "parse-options.h"
+#include "parse-events.h"
+#include "exec_cmd.h"
+
+int nr_counters;
+
+__u64			event_id[MAX_COUNTERS]		= { };
+
+struct event_symbol {
+	__u64 event;
+	char *symbol;
+};
+
+static struct event_symbol event_symbols[] = {
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
+	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
+
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
+	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+};
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static __u64 match_event_symbols(const char *str)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+
+	if (sscanf(str, "r%llx", &config) == 1)
+		return config | PERF_COUNTER_RAW_MASK;
+
+	if (sscanf(str, "%d:%llu", &type, &id) == 2)
+		return EID(type, id);
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol)))
+			return event_symbols[i].event;
+	}
+
+	return ~0ULL;
+}
+
+int parse_events(const struct option *opt, const char *str, int unset)
+{
+	__u64 config;
+
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	config = match_event_symbols(str);
+	if (config == ~0ULL)
+		return -1;
+
+	event_id[nr_counters] = config;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+/*
+ * Create the help text for the event symbols:
+ */
+void create_events_help(char *events_help_msg)
+{
+	unsigned int i;
+	char *str;
+	__u64 e;
+
+	str = events_help_msg;
+
+	str += sprintf(str,
+	"event name: [");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		int type, id;
+
+		e = event_symbols[i].event;
+		type = PERF_COUNTER_TYPE(e);
+		id = PERF_COUNTER_ID(e);
+
+		if (i)
+			str += sprintf(str, "|");
+
+		str += sprintf(str, "%s",
+				event_symbols[i].symbol);
+	}
+
+	str += sprintf(str, "|rNNN]");
+}
+
diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h
new file mode 100644
index 0000000..6e2ebe5
--- /dev/null
+++ b/Documentation/perf_counter/util/parse-events.h
@@ -0,0 +1,10 @@
+
+extern int nr_counters;
+extern __u64			event_id[MAX_COUNTERS];
+
+extern int parse_events(const struct option *opt, const char *str, int unset);
+
+#define EVENTS_HELP_MAX (128*1024)
+
+extern void create_events_help(char *help_msg);
+
-- 
cgit v0.10.2


From 5242519b0296d128425368fc6ab17f541d5fa775 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 09:17:18 +0200
Subject: perf stat: Convert to Git option parsing

Remove getopt usage and use Git's much more advanced and more compact
command option library.

Extend the event parser library with the extensions that were in
perf-stat before.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 6fa6ed6..ec2b787 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -4,7 +4,6 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
-#include "util/exec_cmd.h"
 
 #include <sched.h>
 
@@ -400,7 +399,7 @@ static const char * const record_usage[] = {
 
 static char events_help_msg[EVENTS_HELP_MAX];
 
-const struct option options[] = {
+static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     events_help_msg, parse_events),
 	OPT_INTEGER('c', "count", &default_interval,
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index c1053d8..e7cb941 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -1,35 +1,5 @@
 /*
- * kerneltop.c: show top kernel functions - performance counters showcase
-
-   Build with:
-
-     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
-
-   Sample output:
-
-------------------------------------------------------------------------------
- KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
-------------------------------------------------------------------------------
-
-             weight         RIP          kernel function
-             ______   ________________   _______________
-
-              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
-              33.00 - ffffffff804cb740 : sock_alloc_send_skb
-              31.26 - ffffffff804ce808 : skb_push
-              22.43 - ffffffff80510004 : tcp_established_options
-              19.00 - ffffffff8027d250 : find_get_page
-              15.76 - ffffffff804e4fc9 : eth_type_trans
-              15.20 - ffffffff804d8baa : dst_release
-              14.86 - ffffffff804cf5d8 : skb_release_head_state
-              14.00 - ffffffff802217d5 : read_hpet
-              12.00 - ffffffff804ffb7f : __ip_local_out
-              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
-               8.54 - ffffffff805001a3 : ip_queue_xmit
- */
-
-/*
- * perfstat:  /usr/bin/time -alike performance counter statistics utility
+ * perf stat:  /usr/bin/time -alike performance counter statistics utility
 
           It summarizes the counter events of all tasks (and child tasks),
           covering all CPUs that the command (or workload) executes on.
@@ -38,59 +8,38 @@
 
    Sample output:
 
-   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
+   $ perf stat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
 
    Performance counter stats for 'ls':
 
            163516953 instructions
                 2295 cache-misses
              2855182 branch-misses
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
  */
 
- /*
-  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
-  *
-  * Improvements and fixes by:
-  *
-  *   Arjan van de Ven <arjan@linux.intel.com>
-  *   Yanmin Zhang <yanmin.zhang@intel.com>
-  *   Wu Fengguang <fengguang.wu@intel.com>
-  *   Mike Galbraith <efault@gmx.de>
-  *   Paul Mackerras <paulus@samba.org>
-  *
-  * Released under the GPL v2. (and only v2, not any later version)
-  */
-
 #include "perf.h"
 #include "util/util.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
 
-#include <getopt.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <time.h>
-#include <sched.h>
-#include <pthread.h>
-
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
 #include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <sys/mman.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-#define EVENT_MASK_KERNEL		1
-#define EVENT_MASK_USER			2
 
 static int			system_wide			=  0;
+static int			inherit				=  1;
 
-static int			nr_counters			=  0;
-static __u64			event_id[MAX_COUNTERS]		= {
+static __u64			default_event_id[MAX_COUNTERS]	= {
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
@@ -101,20 +50,15 @@ static __u64			event_id[MAX_COUNTERS]		= {
 	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
 	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
 };
+
 static int			default_interval = 100000;
 static int			event_count[MAX_COUNTERS];
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-static int			event_mask[MAX_COUNTERS];
 
-static int			tid				= -1;
-static int			profile_cpu			= -1;
+static int			target_pid			= -1;
 static int			nr_cpus				=  0;
-static int			nmi				=  1;
-static int			group				=  0;
 static unsigned int		page_size;
 
-static int			zero;
-
 static int			scale				=  1;
 
 static const unsigned int default_count[] = {
@@ -126,197 +70,6 @@ static const unsigned int default_count[] = {
 	  10000,
 };
 
-static char *hw_event_names[] = {
-	"CPU cycles",
-	"instructions",
-	"cache references",
-	"cache misses",
-	"branches",
-	"branch misses",
-	"bus cycles",
-};
-
-static char *sw_event_names[] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-	"minor faults",
-	"major faults",
-};
-
-struct event_symbol {
-	__u64 event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
-};
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-static void display_events_help(void)
-{
-	unsigned int i;
-	__u64 e;
-
-	printf(
-	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
-
-		printf("\n                             %d:%d: %-20s",
-				type, id, event_symbols[i].symbol);
-	}
-
-	printf("\n"
-	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
-}
-
-static void display_help(void)
-{
-	printf(
-	"Usage: perfstat [<events...>] <cmd...>\n\n"
-	"PerfStat Options (up to %d event types can be specified):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -l                           # scale counter values\n"
-	" -a                           # system-wide collection\n");
-	exit(0);
-}
-
-static char *event_name(int ctr)
-{
-	__u64 config = event_id[ctr];
-	int type = PERF_COUNTER_TYPE(config);
-	int id = PERF_COUNTER_ID(config);
-	static char buf[32];
-
-	if (PERF_COUNTER_RAW(config)) {
-		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
-		return buf;
-	}
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		if (id < PERF_HW_EVENTS_MAX)
-			return hw_event_names[id];
-		return "unknown-hardware";
-
-	case PERF_TYPE_SOFTWARE:
-		if (id < PERF_SW_EVENTS_MAX)
-			return sw_event_names[id];
-		return "unknown-software";
-
-	default:
-		break;
-	}
-
-	return "unknown";
-}
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static __u64 match_event_symbols(char *str)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-	char mask_str[4];
-
-	if (sscanf(str, "r%llx", &config) == 1)
-		return config | PERF_COUNTER_RAW_MASK;
-
-	switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) {
-		case 3:
-			if (strchr(mask_str, 'k'))
-				event_mask[nr_counters] |= EVENT_MASK_USER;
-			if (strchr(mask_str, 'u'))
-				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
-		case 2:
-			return EID(type, id);
-
-		default:
-			break;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return ~0ULL;
-}
-
-static int parse_events(char *str)
-{
-	__u64 config;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
-
-	event_id[nr_counters] = config;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
-
-/*
- * perfstat
- */
-
-char fault_here[1000000];
-
 static void create_perfstat_counter(int counter)
 {
 	struct perf_counter_hw_event hw_event;
@@ -324,7 +77,7 @@ static void create_perfstat_counter(int counter)
 	memset(&hw_event, 0, sizeof(hw_event));
 	hw_event.config		= event_id[counter];
 	hw_event.record_type	= 0;
-	hw_event.nmi		= 0;
+	hw_event.nmi		= 1;
 	hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
 	hw_event.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
 
@@ -343,7 +96,7 @@ static void create_perfstat_counter(int counter)
 			}
 		}
 	} else {
-		hw_event.inherit	= 1;
+		hw_event.inherit	= inherit;
 		hw_event.disabled	= 1;
 
 		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
@@ -355,7 +108,7 @@ static void create_perfstat_counter(int counter)
 	}
 }
 
-int do_perfstat(int argc, char *argv[])
+int do_perfstat(int argc, const char **argv)
 {
 	unsigned long long t0, t1;
 	int counter;
@@ -369,12 +122,6 @@ int do_perfstat(int argc, char *argv[])
 	for (counter = 0; counter < nr_counters; counter++)
 		create_perfstat_counter(counter);
 
-	argc -= optind;
-	argv += optind;
-
-	if (!argc)
-		display_help();
-
 	/*
 	 * Enable counters and exec the command:
 	 */
@@ -384,7 +131,7 @@ int do_perfstat(int argc, char *argv[])
 	if ((pid = fork()) < 0)
 		perror("failed to fork");
 	if (!pid) {
-		if (execvp(argv[0], argv)) {
+		if (execvp(argv[0], (char **)argv)) {
 			perror(argv[0]);
 			exit(-1);
 		}
@@ -458,70 +205,45 @@ int do_perfstat(int argc, char *argv[])
 	return 0;
 }
 
-static void process_options(int argc, char **argv)
+static void skip_signal(int signo)
 {
-	int error = 0, counter;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"count",	required_argument,	NULL, 'c'},
-			{"cpu",		required_argument,	NULL, 'C'},
-			{"delay",	required_argument,	NULL, 'd'},
-			{"dump_symtab",	no_argument,		NULL, 'D'},
-			{"event",	required_argument,	NULL, 'e'},
-			{"filter",	required_argument,	NULL, 'f'},
-			{"group",	required_argument,	NULL, 'g'},
-			{"help",	no_argument,		NULL, 'h'},
-			{"nmi",		required_argument,	NULL, 'n'},
-			{"munmap_info",	no_argument,		NULL, 'U'},
-			{"pid",		required_argument,	NULL, 'p'},
-			{"realtime",	required_argument,	NULL, 'r'},
-			{"scale",	no_argument,		NULL, 'l'},
-			{"symbol",	required_argument,	NULL, 's'},
-			{"stat",	no_argument,		NULL, 'S'},
-			{"vmlinux",	required_argument,	NULL, 'x'},
-			{"zero",	no_argument,		NULL, 'z'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'a': system_wide			=	       1; break;
-		case 'c': default_interval		=   atoi(optarg); break;
-		case 'C':
-			/* CPU and PID are mutually exclusive */
-			if (tid != -1) {
-				printf("WARNING: CPU switch overriding PID\n");
-				sleep(1);
-				tid = -1;
-			}
-			profile_cpu			=   atoi(optarg); break;
-
-		case 'e': error				= parse_events(optarg); break;
-
-		case 'g': group				=   atoi(optarg); break;
-		case 'h':      				  display_help(); break;
-		case 'l': scale				=	       1; break;
-		case 'n': nmi				=   atoi(optarg); break;
-		case 'p':
-			/* CPU and PID are mutually exclusive */
-			if (profile_cpu != -1) {
-				printf("WARNING: PID switch overriding CPU\n");
-				sleep(1);
-				profile_cpu = -1;
-			}
-			tid				=   atoi(optarg); break;
-		case 'z': zero				=              1; break;
-		default: error = 1; break;
-		}
-	}
-	if (error)
-		display_help();
+}
+
+static const char * const stat_usage[] = {
+	"perf stat [<options>] <command>",
+	NULL
+};
+
+static char events_help_msg[EVENTS_HELP_MAX];
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     events_help_msg, parse_events),
+	OPT_INTEGER('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "stat events on existing pid"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('l', "scale", &scale,
+			    "scale/normalize counters"),
+	OPT_END()
+};
+
+int cmd_stat(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	create_events_help(events_help_msg);
+	memcpy(event_id, default_event_id, sizeof(default_event_id));
+
+	argc = parse_options(argc, argv, options, stat_usage, 0);
+	if (!argc)
+		usage_with_options(stat_usage, options);
 
 	if (!nr_counters) {
 		nr_counters = 8;
@@ -533,18 +255,6 @@ static void process_options(int argc, char **argv)
 
 		event_count[counter] = default_interval;
 	}
-}
-
-static void skip_signal(int signo)
-{
-}
-
-int cmd_stat(int argc, char **argv, const char *prefix)
-{
-	page_size = sysconf(_SC_PAGE_SIZE);
-
-	process_options(argc, argv);
-
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index 77d0917..88c903e 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -8,6 +8,7 @@
 int nr_counters;
 
 __u64			event_id[MAX_COUNTERS]		= { };
+int			event_mask[MAX_COUNTERS];
 
 struct event_symbol {
 	__u64 event;
@@ -37,6 +38,64 @@ static struct event_symbol event_symbols[] = {
 	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
 };
 
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static char *hw_event_names[] = {
+	"CPU cycles",
+	"instructions",
+	"cache references",
+	"cache misses",
+	"branches",
+	"branch misses",
+	"bus cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu clock ticks",
+	"task clock ticks",
+	"pagefaults",
+	"context switches",
+	"CPU migrations",
+	"minor faults",
+	"major faults",
+};
+
+char *event_name(int ctr)
+{
+	__u64 config = event_id[ctr];
+	int type = PERF_COUNTER_TYPE(config);
+	int id = PERF_COUNTER_ID(config);
+	static char buf[32];
+
+	if (PERF_COUNTER_RAW(config)) {
+		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
+		return buf;
+	}
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (id < PERF_HW_EVENTS_MAX)
+			return hw_event_names[id];
+		return "unknown-hardware";
+
+	case PERF_TYPE_SOFTWARE:
+		if (id < PERF_SW_EVENTS_MAX)
+			return sw_event_names[id];
+		return "unknown-software";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
@@ -46,12 +105,23 @@ static __u64 match_event_symbols(const char *str)
 	__u64 config, id;
 	int type;
 	unsigned int i;
+	char mask_str[4];
 
 	if (sscanf(str, "r%llx", &config) == 1)
 		return config | PERF_COUNTER_RAW_MASK;
 
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
+	switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) {
+		case 3:
+			if (strchr(mask_str, 'k'))
+				event_mask[nr_counters] |= EVENT_MASK_USER;
+			if (strchr(mask_str, 'u'))
+				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
+		case 2:
+			return EID(type, id);
+
+		default:
+			break;
+	}
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		if (!strncmp(str, event_symbols[i].symbol,
@@ -86,14 +156,6 @@ again:
 	return 0;
 }
 
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
 /*
  * Create the help text for the event symbols:
  */
diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h
index 6e2ebe5..0da306b 100644
--- a/Documentation/perf_counter/util/parse-events.h
+++ b/Documentation/perf_counter/util/parse-events.h
@@ -1,6 +1,16 @@
 
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
 extern int nr_counters;
 extern __u64			event_id[MAX_COUNTERS];
+extern int			event_mask[MAX_COUNTERS];
+
+#define EVENT_MASK_KERNEL	1
+#define EVENT_MASK_USER		2
+
+extern char *event_name(int ctr);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
 
-- 
cgit v0.10.2


From 65b1aa13f2a14e113d1139ecf7cb2507a3cfa8b8 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:33 +0200
Subject: ARM: MX3: basic support for lilly-1131 board

This patch adds basic support for INCO startec's LILLY-1131 iMX31 based
modules. The module needs a baseboard support to work which will be
added in the next patch.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index ebab18a..6f17ceb 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -60,6 +60,13 @@ config MACH_MX31MOBOARD
 	  Include support for mx31moboard platform. This includes specific
 	  configurations for the board and its peripherals.
 
+config MACH_MX31LILLY
+	bool "Support MX31 LILLY-1131 platforms (INCO startec)"
+	select ARCH_MX31
+	help
+	  Include support for mx31 based LILLY1131 modules. This includes
+	  specific configurations for the board and its peripherals.
+
 config MACH_QONG
 	bool "Support Dave/DENX QongEVB-LITE platform"
 	select ARCH_MX31
diff --git a/arch/arm/mach-mx3/Makefile b/arch/arm/mach-mx3/Makefile
index 611812b..d3e3f3b 100644
--- a/arch/arm/mach-mx3/Makefile
+++ b/arch/arm/mach-mx3/Makefile
@@ -8,6 +8,7 @@ obj-y				:= mm.o devices.o
 obj-$(CONFIG_ARCH_MX31)		+= clock.o iomux.o
 obj-$(CONFIG_ARCH_MX35)		+= clock-imx35.o
 obj-$(CONFIG_MACH_MX31ADS)	+= mx31ads.o
+obj-$(CONFIG_MACH_MX31LILLY)	+= mx31lilly.o
 obj-$(CONFIG_MACH_MX31LITE)	+= mx31lite.o
 obj-$(CONFIG_MACH_PCM037)	+= pcm037.o
 obj-$(CONFIG_MACH_MX31_3DS)	+= mx31pdk.o
diff --git a/arch/arm/mach-mx3/mx31lilly.c b/arch/arm/mach-mx3/mx31lilly.c
new file mode 100644
index 0000000..3f64778
--- /dev/null
+++ b/arch/arm/mach-mx3/mx31lilly.c
@@ -0,0 +1,81 @@
+/*
+ *  LILLY-1131 module support
+ *
+ *    Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  based on code for other MX31 boards,
+ *
+ *    Copyright 2005-2007 Freescale Semiconductor
+ *    Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ *    Copyright (C) 2009 Valentin Longchamp, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/mach/map.h>
+
+#include <mach/hardware.h>
+#include <mach/common.h>
+#include <mach/iomux-mx3.h>
+#include <mach/board-mx31lilly.h>
+
+#include "devices.h"
+
+/*
+ * This file contains module-specific initialization routines for LILLY-1131.
+ * Initialization of peripherals found on the baseboard is implemented in the
+ * appropriate baseboard support code.
+ */
+
+static int mx31lilly_baseboard;
+core_param(mx31lilly_baseboard, mx31lilly_baseboard, int, 0444);
+
+static void __init mx31lilly_board_init(void)
+{
+	switch (mx31lilly_baseboard) {
+	case MX31LILLY_NOBOARD:
+		break;
+	default:
+		printk(KERN_ERR "Illegal mx31lilly_baseboard type %d\n",
+			mx31lilly_baseboard);
+	}
+}
+
+static void __init mx31lilly_timer_init(void)
+{
+	mx31_clocks_init(26000000);
+}
+
+static struct sys_timer mx31lilly_timer = {
+	.init	= mx31lilly_timer_init,
+};
+
+MACHINE_START(LILLY1131, "INCO startec LILLY-1131")
+	.phys_io	= AIPS1_BASE_ADDR,
+	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params	= PHYS_OFFSET + 0x100,
+	.map_io		= mx31_map_io,
+	.init_irq	= mxc_init_irq,
+	.init_machine	= mx31lilly_board_init,
+	.timer		= &mx31lilly_timer,
+MACHINE_END
+
diff --git a/arch/arm/plat-mxc/include/mach/board-mx31lilly.h b/arch/arm/plat-mxc/include/mach/board-mx31lilly.h
new file mode 100644
index 0000000..823fb88
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-mx31lilly.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ * Based on code for mobots boards,
+ *   Copyright (C) 2009 Valentin Longchamp, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_MX31LILLY_H__
+#define __ASM_ARCH_MXC_BOARD_MX31LILLY_H__
+
+/* mandatory for CONFIG_LL_DEBUG */
+
+#define MXC_LL_UART_PADDR	UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR	(AIPI_BASE_ADDR_VIRT + 0x0A000)
+
+#ifndef __ASSEMBLY__
+
+enum mx31lilly_boards {
+	MX31LILLY_NOBOARD	= 0,
+};
+
+/*
+ * This CPU module needs a baseboard to work. After basic initializing
+ * its own devices, it calls baseboard's init function.
+ */
+
+#endif
+
+#endif /* __ASM_ARCH_MXC_BOARD_MX31LILLY_H__ */
-- 
cgit v0.10.2


From 1bc34f7964b1e8dee8d49d83cba8dad0a010df92 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:34 +0200
Subject: ARM: MX3: add skeleton for lilly-1131 development board

Support code for lilly-1131 is implemented in a module/baseboard
fashion. All code specific to peripherals found on the development board
will go to this file.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Makefile b/arch/arm/mach-mx3/Makefile
index d3e3f3b..6c22da2 100644
--- a/arch/arm/mach-mx3/Makefile
+++ b/arch/arm/mach-mx3/Makefile
@@ -8,7 +8,7 @@ obj-y				:= mm.o devices.o
 obj-$(CONFIG_ARCH_MX31)		+= clock.o iomux.o
 obj-$(CONFIG_ARCH_MX35)		+= clock-imx35.o
 obj-$(CONFIG_MACH_MX31ADS)	+= mx31ads.o
-obj-$(CONFIG_MACH_MX31LILLY)	+= mx31lilly.o
+obj-$(CONFIG_MACH_MX31LILLY)	+= mx31lilly.o mx31lilly-db.o
 obj-$(CONFIG_MACH_MX31LITE)	+= mx31lite.o
 obj-$(CONFIG_MACH_PCM037)	+= pcm037.o
 obj-$(CONFIG_MACH_MX31_3DS)	+= mx31pdk.o
diff --git a/arch/arm/mach-mx3/mx31lilly-db.c b/arch/arm/mach-mx3/mx31lilly-db.c
new file mode 100644
index 0000000..73e1e5e
--- /dev/null
+++ b/arch/arm/mach-mx3/mx31lilly-db.c
@@ -0,0 +1,68 @@
+/*
+ *  LILLY-1131 development board support
+ *
+ *    Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  based on code for other MX31 boards,
+ *
+ *    Copyright 2005-2007 Freescale Semiconductor
+ *    Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ *    Copyright (C) 2009 Valentin Longchamp, EPFL Mobots group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <mach/hardware.h>
+#include <mach/common.h>
+#include <mach/imx-uart.h>
+#include <mach/iomux-mx3.h>
+#include <mach/board-mx31lilly.h>
+
+#include "devices.h"
+
+/*
+ * This file contains board-specific initialization routines for the
+ * LILLY-1131 development board. If you design an own baseboard for the
+ * module, use this file as base for support code.
+ */
+
+static unsigned int lilly_db_board_pins[] __initdata = {
+	MX31_PIN_CTS1__CTS1,
+	MX31_PIN_RTS1__RTS1,
+	MX31_PIN_TXD1__TXD1,
+	MX31_PIN_RXD1__RXD1,
+};
+
+/* UART */
+static struct imxuart_platform_data uart_pdata __initdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+void __init mx31lilly_db_init(void)
+{
+	mxc_iomux_setup_multiple_pins(lilly_db_board_pins,
+					ARRAY_SIZE(lilly_db_board_pins),
+					"development board pins");
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+}
+
diff --git a/arch/arm/mach-mx3/mx31lilly.c b/arch/arm/mach-mx3/mx31lilly.c
index 3f64778..a7d66cb 100644
--- a/arch/arm/mach-mx3/mx31lilly.c
+++ b/arch/arm/mach-mx3/mx31lilly.c
@@ -54,6 +54,9 @@ static void __init mx31lilly_board_init(void)
 	switch (mx31lilly_baseboard) {
 	case MX31LILLY_NOBOARD:
 		break;
+	case MX31LILLY_DB:
+		mx31lilly_db_init();
+		break;
 	default:
 		printk(KERN_ERR "Illegal mx31lilly_baseboard type %d\n",
 			mx31lilly_baseboard);
diff --git a/arch/arm/plat-mxc/include/mach/board-mx31lilly.h b/arch/arm/plat-mxc/include/mach/board-mx31lilly.h
index 823fb88..78cf31e 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31lilly.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31lilly.h
@@ -31,6 +31,7 @@
 
 enum mx31lilly_boards {
 	MX31LILLY_NOBOARD	= 0,
+	MX31LILLY_DB		= 1,
 };
 
 /*
@@ -38,6 +39,8 @@ enum mx31lilly_boards {
  * its own devices, it calls baseboard's init function.
  */
 
+extern void mx31lilly_db_init(void);
+
 #endif
 
 #endif /* __ASM_ARCH_MXC_BOARD_MX31LILLY_H__ */
-- 
cgit v0.10.2


From cbaa6ca1f5af59ec8d0d104c1f02b65a6cf0a8aa Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:35 +0200
Subject: ARM: MX3: add lilly-1131 ethernet support

This adds support for the SMSC911x chip found on the lilly-1131 module.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lilly.c b/arch/arm/mach-mx3/mx31lilly.c
index a7d66cb..e533934 100644
--- a/arch/arm/mach-mx3/mx31lilly.c
+++ b/arch/arm/mach-mx3/mx31lilly.c
@@ -27,6 +27,9 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/smsc911x.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -46,6 +49,44 @@
  * appropriate baseboard support code.
  */
 
+/* SMSC ethernet support */
+
+static struct resource smsc91x_resources[] = {
+	{
+		.start	= CS4_BASE_ADDR,
+		.end	= CS4_BASE_ADDR + 0xffff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IOMUX_TO_IRQ(MX31_PIN_GPIO1_0),
+		.end	= IOMUX_TO_IRQ(MX31_PIN_GPIO1_0),
+		.flags	= IORESOURCE_IRQ | IRQF_TRIGGER_FALLING,
+	}
+};
+
+static struct smsc911x_platform_config smsc911x_config = {
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_OPEN_DRAIN,
+	.flags		= SMSC911X_USE_32BIT |
+			  SMSC911X_SAVE_MAC_ADDRESS |
+			  SMSC911X_FORCE_INTERNAL_PHY,
+};
+
+static struct platform_device smsc91x_device = {
+	.name		= "smsc911x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(smsc91x_resources),
+	.resource	= smsc91x_resources,
+	.dev		= {
+		.platform_data = &smsc911x_config,
+	}
+};
+
+static struct platform_device *devices[] __initdata = {
+	&smsc91x_device,
+};
+
 static int mx31lilly_baseboard;
 core_param(mx31lilly_baseboard, mx31lilly_baseboard, int, 0444);
 
@@ -61,6 +102,10 @@ static void __init mx31lilly_board_init(void)
 		printk(KERN_ERR "Illegal mx31lilly_baseboard type %d\n",
 			mx31lilly_baseboard);
 	}
+
+	mxc_iomux_alloc_pin(MX31_PIN_CS4__CS4, "Ethernet CS");
+
+	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
 static void __init mx31lilly_timer_init(void)
-- 
cgit v0.10.2


From d0b1eabc7b255daa978849229703b4d70a4c0555 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:36 +0200
Subject: ARM: MX3: add MMC suuport for lilly1131-db

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lilly-db.c b/arch/arm/mach-mx3/mx31lilly-db.c
index 73e1e5e..b782116 100644
--- a/arch/arm/mach-mx3/mx31lilly-db.c
+++ b/arch/arm/mach-mx3/mx31lilly-db.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -37,6 +38,7 @@
 #include <mach/imx-uart.h>
 #include <mach/iomux-mx3.h>
 #include <mach/board-mx31lilly.h>
+#include <mach/mmc.h>
 
 #include "devices.h"
 
@@ -51,6 +53,12 @@ static unsigned int lilly_db_board_pins[] __initdata = {
 	MX31_PIN_RTS1__RTS1,
 	MX31_PIN_TXD1__TXD1,
 	MX31_PIN_RXD1__RXD1,
+	MX31_PIN_SD1_DATA3__SD1_DATA3,
+	MX31_PIN_SD1_DATA2__SD1_DATA2,
+	MX31_PIN_SD1_DATA1__SD1_DATA1,
+	MX31_PIN_SD1_DATA0__SD1_DATA0,
+	MX31_PIN_SD1_CLK__SD1_CLK,
+	MX31_PIN_SD1_CMD__SD1_CMD,
 };
 
 /* UART */
@@ -58,11 +66,70 @@ static struct imxuart_platform_data uart_pdata __initdata = {
 	.flags = IMXUART_HAVE_RTSCTS,
 };
 
+/* MMC support */
+
+static int mxc_mmc1_get_ro(struct device *dev)
+{
+	return gpio_get_value(IOMUX_TO_GPIO(MX31_PIN_LCS0));
+}
+
+static int gpio_det, gpio_wp;
+
+static int mxc_mmc1_init(struct device *dev,
+			 irq_handler_t detect_irq, void *data)
+{
+	int ret;
+
+	gpio_det = IOMUX_TO_GPIO(MX31_PIN_GPIO1_1);
+	gpio_wp = IOMUX_TO_GPIO(MX31_PIN_LCS0);
+
+	ret = gpio_request(gpio_det, "MMC detect");
+	if (ret)
+		return ret;
+
+	ret = gpio_request(gpio_wp, "MMC w/p");
+	if (ret)
+		goto exit_free_det;
+
+	gpio_direction_input(gpio_det);
+	gpio_direction_input(gpio_wp);
+
+	ret = request_irq(IOMUX_TO_IRQ(MX31_PIN_GPIO1_1), detect_irq,
+			  IRQF_DISABLED | IRQF_TRIGGER_FALLING,
+			  "MMC detect", data);
+	if (ret)
+		goto exit_free_wp;
+
+	return 0;
+
+exit_free_wp:
+	gpio_free(gpio_wp);
+
+exit_free_det:
+	gpio_free(gpio_det);
+
+	return ret;
+}
+
+static void mxc_mmc1_exit(struct device *dev, void *data)
+{
+	gpio_free(gpio_det);
+	gpio_free(gpio_wp);
+	free_irq(IOMUX_TO_IRQ(MX31_PIN_GPIO1_1), data);
+}
+
+static struct imxmmc_platform_data mmc_pdata = {
+	.get_ro	= mxc_mmc1_get_ro,
+	.init	= mxc_mmc1_init,
+	.exit	= mxc_mmc1_exit,
+};
+
 void __init mx31lilly_db_init(void)
 {
 	mxc_iomux_setup_multiple_pins(lilly_db_board_pins,
 					ARRAY_SIZE(lilly_db_board_pins),
 					"development board pins");
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	mxc_register_device(&mxcsdhc_device0, &mmc_pdata);
 }
 
-- 
cgit v0.10.2


From b99238721bf62c216d556b4ba63bdf410e068284 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:37 +0200
Subject: ARM: MX3: add framebuffer support for lilly1131-db

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lilly-db.c b/arch/arm/mach-mx3/mx31lilly-db.c
index b782116..99b0a6e 100644
--- a/arch/arm/mach-mx3/mx31lilly-db.c
+++ b/arch/arm/mach-mx3/mx31lilly-db.c
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -39,6 +40,8 @@
 #include <mach/iomux-mx3.h>
 #include <mach/board-mx31lilly.h>
 #include <mach/mmc.h>
+#include <mach/mx3fb.h>
+#include <mach/ipu.h>
 
 #include "devices.h"
 
@@ -59,6 +62,29 @@ static unsigned int lilly_db_board_pins[] __initdata = {
 	MX31_PIN_SD1_DATA0__SD1_DATA0,
 	MX31_PIN_SD1_CLK__SD1_CLK,
 	MX31_PIN_SD1_CMD__SD1_CMD,
+	MX31_PIN_LD0__LD0,
+	MX31_PIN_LD1__LD1,
+	MX31_PIN_LD2__LD2,
+	MX31_PIN_LD3__LD3,
+	MX31_PIN_LD4__LD4,
+	MX31_PIN_LD5__LD5,
+	MX31_PIN_LD6__LD6,
+	MX31_PIN_LD7__LD7,
+	MX31_PIN_LD8__LD8,
+	MX31_PIN_LD9__LD9,
+	MX31_PIN_LD10__LD10,
+	MX31_PIN_LD11__LD11,
+	MX31_PIN_LD12__LD12,
+	MX31_PIN_LD13__LD13,
+	MX31_PIN_LD14__LD14,
+	MX31_PIN_LD15__LD15,
+	MX31_PIN_LD16__LD16,
+	MX31_PIN_LD17__LD17,
+	MX31_PIN_VSYNC3__VSYNC3,
+	MX31_PIN_HSYNC__HSYNC,
+	MX31_PIN_FPSHIFT__FPSHIFT,
+	MX31_PIN_DRDY0__DRDY0,
+	MX31_PIN_CONTRAST__CONTRAST,
 };
 
 /* UART */
@@ -124,6 +150,50 @@ static struct imxmmc_platform_data mmc_pdata = {
 	.exit	= mxc_mmc1_exit,
 };
 
+/* Framebuffer support */
+static struct ipu_platform_data ipu_data __initdata = {
+	.irq_base = MXC_IPU_IRQ_START,
+};
+
+static const struct fb_videomode fb_modedb = {
+	/* 640x480 TFT panel (IPS-056T) */
+	.name	   	= "CRT-VGA",
+	.refresh	= 64,
+	.xres		= 640,
+	.yres		= 480,
+	.pixclock	= 30000,
+	.left_margin	= 200,
+	.right_margin	= 2,
+	.upper_margin	= 2,
+	.lower_margin	= 2,
+	.hsync_len	= 3,
+	.vsync_len	= 1,
+	.sync		= FB_SYNC_VERT_HIGH_ACT | FB_SYNC_OE_ACT_HIGH,
+	.vmode		= FB_VMODE_NONINTERLACED,
+	.flag		= 0,
+};
+
+static struct mx3fb_platform_data fb_pdata __initdata = {
+	.dma_dev	= &mx3_ipu.dev,
+	.name		= "CRT-VGA",
+	.mode		= &fb_modedb,
+	.num_modes	= 1,
+};
+
+#define LCD_VCC_EN_GPIO	 (7)
+
+static void __init mx31lilly_init_fb(void)
+{
+	if (gpio_request(LCD_VCC_EN_GPIO, "LCD enable") != 0) {
+		printk(KERN_WARNING "unable to request LCD_VCC_EN pin.\n");
+		return;
+	}
+
+	mxc_register_device(&mx3_ipu, &ipu_data);
+	mxc_register_device(&mx3_fb, &fb_pdata);
+	gpio_direction_output(LCD_VCC_EN_GPIO, 1);
+}
+
 void __init mx31lilly_db_init(void)
 {
 	mxc_iomux_setup_multiple_pins(lilly_db_board_pins,
@@ -131,5 +201,6 @@ void __init mx31lilly_db_init(void)
 					"development board pins");
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
 	mxc_register_device(&mxcsdhc_device0, &mmc_pdata);
+	mx31lilly_init_fb();
 }
 
-- 
cgit v0.10.2


From 38160e0b0126565947e986701ae1bdbff01f121d Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:38 +0200
Subject: ARM: MX3: add NOR flash support for lilly-1131

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lilly.c b/arch/arm/mach-mx3/mx31lilly.c
index e533934..f15a2eb 100644
--- a/arch/arm/mach-mx3/mx31lilly.c
+++ b/arch/arm/mach-mx3/mx31lilly.c
@@ -30,6 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/smsc911x.h>
+#include <linux/mtd/physmap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -83,8 +84,30 @@ static struct platform_device smsc91x_device = {
 	}
 };
 
+/* NOR flash */
+static struct physmap_flash_data nor_flash_data = {
+	.width  = 2,
+};
+
+static struct resource nor_flash_resource = {
+	.start	= 0xa0000000,
+	.end	= 0xa1ffffff,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct platform_device physmap_flash_device = {
+	.name	= "physmap-flash",
+	.id	= 0,
+	.dev	= {
+		.platform_data  = &nor_flash_data,
+	},
+	.resource = &nor_flash_resource,
+	.num_resources = 1,
+};
+
 static struct platform_device *devices[] __initdata = {
 	&smsc91x_device,
+	&physmap_flash_device,
 };
 
 static int mx31lilly_baseboard;
-- 
cgit v0.10.2


From 4193d2d2d7c8307803d035932d5e33d848a96951 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 20 May 2009 19:54:39 +0200
Subject: ARM: MX3: add I2C support for lilly-1131

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lilly.c b/arch/arm/mach-mx3/mx31lilly.c
index f15a2eb..6ab2f16 100644
--- a/arch/arm/mach-mx3/mx31lilly.c
+++ b/arch/arm/mach-mx3/mx31lilly.c
@@ -108,6 +108,7 @@ static struct platform_device physmap_flash_device = {
 static struct platform_device *devices[] __initdata = {
 	&smsc91x_device,
 	&physmap_flash_device,
+	&mxc_i2c_device1,
 };
 
 static int mx31lilly_baseboard;
@@ -127,6 +128,8 @@ static void __init mx31lilly_board_init(void)
 	}
 
 	mxc_iomux_alloc_pin(MX31_PIN_CS4__CS4, "Ethernet CS");
+	mxc_iomux_alloc_pin(MX31_PIN_CSPI2_MOSI__SCL, "I2C SCL");
+	mxc_iomux_alloc_pin(MX31_PIN_CSPI2_MISO__SDA, "I2C SDA");
 
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
-- 
cgit v0.10.2


From b456bae0ff4f3cf91639dd32b2bfc49b1c30b4b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 09:17:18 +0200
Subject: perf top: Convert to Git option parsing

Remove getopt usage and use Git's much more advanced and more compact
command option library.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 626b3207..87b925c 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -45,8 +45,10 @@
 
 #include "perf.h"
 #include "util/util.h"
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
 
-#include <getopt.h>
 #include <assert.h>
 #include <fcntl.h>
 
@@ -70,8 +72,7 @@
 
 static int			system_wide			=  0;
 
-static int			nr_counters			=  0;
-static __u64			event_id[MAX_COUNTERS]		= {
+static __u64			default_event_id[MAX_COUNTERS]		= {
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
 	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
@@ -88,7 +89,7 @@ static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static __u64			count_filter		       = 100;
 
-static int			tid				= -1;
+static int			target_pid				= -1;
 static int			profile_cpu			= -1;
 static int			nr_cpus				=  0;
 static int			nmi				=  1;
@@ -100,8 +101,6 @@ static int			use_mmap			= 0;
 static int			use_munmap			= 0;
 static int			freq				= 0;
 
-static char			*vmlinux;
-
 static char			*sym_filter;
 static unsigned long		filter_start;
 static unsigned long		filter_end;
@@ -110,18 +109,6 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
-static int			scale;
-
-struct source_line {
-	uint64_t		EIP;
-	unsigned long		count;
-	char			*line;
-	struct source_line	*next;
-};
-
-static struct source_line	*lines;
-static struct source_line	**lines_tail;
-
 static const unsigned int default_count[] = {
 	1000000,
 	1000000,
@@ -131,194 +118,6 @@ static const unsigned int default_count[] = {
 	  10000,
 };
 
-static char *hw_event_names[] = {
-	"CPU cycles",
-	"instructions",
-	"cache references",
-	"cache misses",
-	"branches",
-	"branch misses",
-	"bus cycles",
-};
-
-static char *sw_event_names[] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-	"minor faults",
-	"major faults",
-};
-
-struct event_symbol {
-	__u64 event;
-	char *symbol;
-};
-
-static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
-};
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-static void display_events_help(void)
-{
-	unsigned int i;
-	__u64 e;
-
-	printf(
-	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
-
-		printf("\n                             %d:%d: %-20s",
-				type, id, event_symbols[i].symbol);
-	}
-
-	printf("\n"
-	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
-}
-
-static void display_help(void)
-{
-	printf(
-	"Usage: kerneltop [<options>]\n"
-	"   Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
-	"KernelTop Options (up to %d event types can be specified at once):\n\n",
-		 MAX_COUNTERS);
-
-	display_events_help();
-
-	printf(
-	" -c CNT    --count=CNT        # event period to sample\n\n"
-	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
-	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
-	" -l                           # show scale factor for RR events\n"
-	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
-	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
-	" -r prio   --realtime=<prio>  # event acquisition runs with SCHED_FIFO policy\n"
-	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
-	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
-	" -z        --zero             # zero counts after display\n"
-	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
-	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
-	" -M        --mmap_info        # print mmap info stream\n"
-	" -U        --munmap_info      # print munmap info stream\n"
-	);
-
-	exit(0);
-}
-
-static char *event_name(int ctr)
-{
-	__u64 config = event_id[ctr];
-	int type = PERF_COUNTER_TYPE(config);
-	int id = PERF_COUNTER_ID(config);
-	static char buf[32];
-
-	if (PERF_COUNTER_RAW(config)) {
-		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
-		return buf;
-	}
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		if (id < PERF_HW_EVENTS_MAX)
-			return hw_event_names[id];
-		return "unknown-hardware";
-
-	case PERF_TYPE_SOFTWARE:
-		if (id < PERF_SW_EVENTS_MAX)
-			return sw_event_names[id];
-		return "unknown-software";
-
-	default:
-		break;
-	}
-
-	return "unknown";
-}
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static __u64 match_event_symbols(char *str)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-
-	if (sscanf(str, "r%llx", &config) == 1)
-		return config | PERF_COUNTER_RAW_MASK;
-
-	if (sscanf(str, "%d:%llu", &type, &id) == 2)
-		return EID(type, id);
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
-	}
-
-	return ~0ULL;
-}
-
-static int parse_events(char *str)
-{
-	__u64 config;
-
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
-
-	event_id[nr_counters] = config;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
 /*
  * Symbols
  */
@@ -331,7 +130,6 @@ struct sym_entry {
 	char			*sym;
 	unsigned long		count[MAX_COUNTERS];
 	int			skip;
-	struct source_line	*source;
 };
 
 #define MAX_SYMS		100000
@@ -342,8 +140,6 @@ struct sym_entry		*sym_filter_entry;
 
 static struct sym_entry		sym_table[MAX_SYMS];
 
-static void show_details(struct sym_entry *sym);
-
 /*
  * Ordering weight: count-1 * count-2 * ... / count-n
  */
@@ -419,15 +215,15 @@ static void print_sym_table(void)
 
 	printf( "], ");
 
-	if (tid != -1)
-		printf(" (tid: %d", tid);
+	if (target_pid != -1)
+		printf(" (target_pid: %d", target_pid);
 	else
 		printf(" (all");
 
 	if (profile_cpu != -1)
 		printf(", cpu: %d)\n", profile_cpu);
 	else {
-		if (tid != -1)
+		if (target_pid != -1)
 			printf(")\n");
 		else
 			printf(", %d CPUs)\n", nr_cpus);
@@ -463,9 +259,6 @@ static void print_sym_table(void)
 				pcnt, tmp[i].addr, tmp[i].sym);
 	}
 
-	if (sym_filter_entry)
-		show_details(sym_filter_entry);
-
 	{
 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
 
@@ -628,134 +421,8 @@ static void parse_symbols(void)
 	}
 }
 
-/*
- * Source lines
- */
-
-static void parse_vmlinux(char *filename)
-{
-	FILE *file;
-	char command[PATH_MAX*2];
-	if (!filename)
-		return;
-
-	sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
-
-	file = popen(command, "r");
-	if (!file)
-		return;
-
-	lines_tail = &lines;
-	while (!feof(file)) {
-		struct source_line *src;
-		size_t dummy = 0;
-		char *c;
-
-		src = malloc(sizeof(struct source_line));
-		assert(src != NULL);
-		memset(src, 0, sizeof(struct source_line));
-
-		if (getline(&src->line, &dummy, file) < 0)
-			break;
-		if (!src->line)
-			break;
-
-		c = strchr(src->line, '\n');
-		if (c)
-			*c = 0;
-
-		src->next = NULL;
-		*lines_tail = src;
-		lines_tail = &src->next;
-
-		if (strlen(src->line)>8 && src->line[8] == ':')
-			src->EIP = strtoull(src->line, NULL, 16);
-		if (strlen(src->line)>8 && src->line[16] == ':')
-			src->EIP = strtoull(src->line, NULL, 16);
-	}
-	pclose(file);
-}
-
-static void record_precise_ip(uint64_t ip)
-{
-	struct source_line *line;
-
-	for (line = lines; line; line = line->next) {
-		if (line->EIP == ip)
-			line->count++;
-		if (line->EIP > ip)
-			break;
-	}
-}
-
-static void lookup_sym_in_vmlinux(struct sym_entry *sym)
-{
-	struct source_line *line;
-	char pattern[PATH_MAX];
-	sprintf(pattern, "<%s>:", sym->sym);
-
-	for (line = lines; line; line = line->next) {
-		if (strstr(line->line, pattern)) {
-			sym->source = line;
-			break;
-		}
-	}
-}
-
-static void show_lines(struct source_line *line_queue, int line_queue_count)
-{
-	int i;
-	struct source_line *line;
-
-	line = line_queue;
-	for (i = 0; i < line_queue_count; i++) {
-		printf("%8li\t%s\n", line->count, line->line);
-		line = line->next;
-	}
-}
-
 #define TRACE_COUNT     3
 
-static void show_details(struct sym_entry *sym)
-{
-	struct source_line *line;
-	struct source_line *line_queue = NULL;
-	int displayed = 0;
-	int line_queue_count = 0;
-
-	if (!sym->source)
-		lookup_sym_in_vmlinux(sym);
-	if (!sym->source)
-		return;
-
-	printf("Showing details for %s\n", sym->sym);
-
-	line = sym->source;
-	while (line) {
-		if (displayed && strstr(line->line, ">:"))
-			break;
-
-		if (!line_queue_count)
-			line_queue = line;
-		line_queue_count ++;
-
-		if (line->count >= count_filter) {
-			show_lines(line_queue, line_queue_count);
-			line_queue_count = 0;
-			line_queue = NULL;
-		} else if (line_queue_count > TRACE_COUNT) {
-			line_queue = line_queue->next;
-			line_queue_count --;
-		}
-
-		line->count = 0;
-		displayed++;
-		if (displayed > 300)
-			break;
-		line = line->next;
-	}
-}
-
 /*
  * Binary search in the histogram table and record the hit:
  */
@@ -764,8 +431,6 @@ static void record_ip(uint64_t ip, int counter)
 	int left_idx, middle_idx, right_idx, idx;
 	unsigned long left, middle, right;
 
-	record_precise_ip(ip);
-
 	left_idx = 0;
 	right_idx = sym_table_count-1;
 	assert(ip <= max_ip && ip >= min_ip);
@@ -822,97 +487,6 @@ static void process_event(uint64_t ip, int counter)
 	record_ip(ip, counter);
 }
 
-static void process_options(int argc, char **argv)
-{
-	int error = 0, counter;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"count",	required_argument,	NULL, 'c'},
-			{"cpu",		required_argument,	NULL, 'C'},
-			{"delay",	required_argument,	NULL, 'd'},
-			{"dump_symtab",	no_argument,		NULL, 'D'},
-			{"event",	required_argument,	NULL, 'e'},
-			{"filter",	required_argument,	NULL, 'f'},
-			{"group",	required_argument,	NULL, 'g'},
-			{"help",	no_argument,		NULL, 'h'},
-			{"nmi",		required_argument,	NULL, 'n'},
-			{"mmap_info",	no_argument,		NULL, 'M'},
-			{"mmap_pages",	required_argument,	NULL, 'm'},
-			{"munmap_info",	no_argument,		NULL, 'U'},
-			{"pid",		required_argument,	NULL, 'p'},
-			{"realtime",	required_argument,	NULL, 'r'},
-			{"scale",	no_argument,		NULL, 'l'},
-			{"symbol",	required_argument,	NULL, 's'},
-			{"stat",	no_argument,		NULL, 'S'},
-			{"vmlinux",	required_argument,	NULL, 'x'},
-			{"zero",	no_argument,		NULL, 'z'},
-			{"freq",	required_argument,	NULL, 'F'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'a': system_wide			=	       1; break;
-		case 'c': default_interval		=   atoi(optarg); break;
-		case 'C':
-			/* CPU and PID are mutually exclusive */
-			if (tid != -1) {
-				printf("WARNING: CPU switch overriding PID\n");
-				sleep(1);
-				tid = -1;
-			}
-			profile_cpu			=   atoi(optarg); break;
-		case 'd': delay_secs			=   atoi(optarg); break;
-		case 'D': dump_symtab			=              1; break;
-
-		case 'e': error				= parse_events(optarg); break;
-
-		case 'f': count_filter			=   atoi(optarg); break;
-		case 'g': group				=   atoi(optarg); break;
-		case 'h':      				  display_help(); break;
-		case 'l': scale				=	       1; break;
-		case 'n': nmi				=   atoi(optarg); break;
-		case 'p':
-			/* CPU and PID are mutually exclusive */
-			if (profile_cpu != -1) {
-				printf("WARNING: PID switch overriding CPU\n");
-				sleep(1);
-				profile_cpu = -1;
-			}
-			tid				=   atoi(optarg); break;
-		case 'r': realtime_prio			=   atoi(optarg); break;
-		case 's': sym_filter			= strdup(optarg); break;
-		case 'x': vmlinux			= strdup(optarg); break;
-		case 'z': zero				=              1; break;
-		case 'm': mmap_pages			=   atoi(optarg); break;
-		case 'M': use_mmap			=              1; break;
-		case 'U': use_munmap			=              1; break;
-		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
-		default: error = 1; break;
-		}
-	}
-	if (error)
-		display_help();
-
-	if (!nr_counters) {
-		nr_counters = 1;
-		event_id[0] = 0;
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
-
-		event_count[counter] = default_interval;
-	}
-}
-
 struct mmap_data {
 	int counter;
 	void *base;
@@ -973,11 +547,11 @@ static void mmap_read(struct mmap_data *md)
 		struct ip_event {
 			struct perf_event_header header;
 			__u64 ip;
-			__u32 pid, tid;
+			__u32 pid, target_pid;
 		};
 		struct mmap_event {
 			struct perf_event_header header;
-			__u32 pid, tid;
+			__u32 pid, target_pid;
 			__u64 start;
 			__u64 len;
 			__u64 pgoff;
@@ -1043,7 +617,7 @@ static void mmap_read(struct mmap_data *md)
 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
-int cmd_top(int argc, char **argv, const char *prefix)
+static int __cmd_top(void)
 {
 	struct perf_counter_hw_event hw_event;
 	pthread_t thread;
@@ -1051,27 +625,12 @@ int cmd_top(int argc, char **argv, const char *prefix)
 	unsigned int cpu;
 	int ret;
 
-	page_size = sysconf(_SC_PAGE_SIZE);
-
-	process_options(argc, argv);
-
-	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	assert(nr_cpus <= MAX_NR_CPUS);
-	assert(nr_cpus >= 0);
-
-	if (tid != -1 || profile_cpu != -1)
-		nr_cpus = 1;
-
-	parse_symbols();
-	if (vmlinux && sym_filter_entry)
-		parse_vmlinux(vmlinux);
-
 	for (i = 0; i < nr_cpus; i++) {
 		group_fd = -1;
 		for (counter = 0; counter < nr_counters; counter++) {
 
 			cpu	= profile_cpu;
-			if (tid == -1 && profile_cpu == -1)
+			if (target_pid == -1 && profile_cpu == -1)
 				cpu = i;
 
 			memset(&hw_event, 0, sizeof(hw_event));
@@ -1083,7 +642,7 @@ int cmd_top(int argc, char **argv, const char *prefix)
 			hw_event.munmap		= use_munmap;
 			hw_event.freq		= freq;
 
-			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
+			fd[i][counter] = sys_perf_counter_open(&hw_event, target_pid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
 				int err = errno;
 				printf("kerneltop error: syscall returned with %d (%s)\n",
@@ -1147,3 +706,95 @@ int cmd_top(int argc, char **argv, const char *prefix)
 
 	return 0;
 }
+
+static const char * const top_usage[] = {
+	"perf top [<options>]",
+	NULL
+};
+
+static char events_help_msg[EVENTS_HELP_MAX];
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     events_help_msg, parse_events),
+	OPT_INTEGER('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "profile events on existing pid"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_INTEGER('C', "CPU", &profile_cpu,
+		    "CPU to profile on"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
+	OPT_INTEGER('r', "realtime", &realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_INTEGER('d', "delay", &realtime_prio,
+		    "number of seconds to delay between refreshes"),
+	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
+			    "dump the symbol table used for profiling"),
+	OPT_INTEGER('f', "--count-filter", &count_filter,
+		    "only display functions with more events than this"),
+	OPT_BOOLEAN('g', "group", &group,
+			    "put the counters into a counter group"),
+	OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
+		    "only display symbols matchig this pattern"),
+	OPT_BOOLEAN('z', "zero", &group,
+		    "zero history across updates"),
+	OPT_BOOLEAN('M', "use-mmap", &use_mmap,
+		    "track mmap events"),
+	OPT_BOOLEAN('U', "use-munmap", &use_munmap,
+		    "track munmap events"),
+	OPT_INTEGER('F', "--freq", &freq,
+		    "profile at this frequency"),
+	OPT_END()
+};
+
+int cmd_top(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	create_events_help(events_help_msg);
+	memcpy(event_id, default_event_id, sizeof(default_event_id));
+
+	argc = parse_options(argc, argv, options, top_usage, 0);
+	if (argc)
+		usage_with_options(top_usage, options);
+
+	if (freq) {
+		default_interval = freq;
+		freq = 1;
+	}
+
+	/* CPU and PID are mutually exclusive */
+	if (target_pid != -1 && profile_cpu != -1) {
+		printf("WARNING: PID switch overriding CPU\n");
+		sleep(1);
+		profile_cpu = -1;
+	}
+
+	if (!nr_counters) {
+		nr_counters = 1;
+		event_id[0] = 0;
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (event_count[counter])
+			continue;
+
+		event_count[counter] = default_interval;
+	}
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (target_pid != -1 || profile_cpu != -1)
+		nr_cpus = 1;
+
+	parse_symbols();
+
+	return __cmd_top();
+}
-- 
cgit v0.10.2


From 4e0f088106c28d0ed8e827338b42404d236deda0 Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Mon, 25 May 2009 22:35:38 +0200
Subject: ARM MXC: Atmark Armadillo 500 board support.

On Mon, 25 May 2009 18:33:28 +0200
Valentin Longchamp <valentin.longchamp@epfl.ch> wrote:

> Hello,
>
> I have updated my mxc git tree and given a compilation spin to your
> board support and I have comments. See below.
>

>
> What's the point of declaring this new function, if all it does is
> calling the already existing one ? Furthermore, in the current
> mxc-master tree, mxc_map_io() does not exist anymore, it should be
> mx31_map_io (this breaks compilation for your board).
>

This patch solve those two issues. Until now i based my patches to
vanilla tree. This evening I cloned the mxc-master tree so i could
update the armadillo 500 support within recent platform changes.

The patch:

From e986a8dc262e5292350d95bf65ead75baf3272d7 Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Mon, 25 May 2009 22:24:03 +0200
Subject: [PATCH] Armadillo 500 removing useless function armadillo5x0_map_io
and use machine specific io mapping

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/armadillo5x0.c b/arch/arm/mach-mx3/armadillo5x0.c
index cac4538..54118109 100644
--- a/arch/arm/mach-mx3/armadillo5x0.c
+++ b/arch/arm/mach-mx3/armadillo5x0.c
@@ -274,20 +274,6 @@ static void __init armadillo5x0_init(void)
 	mxc_register_device(&mx3_fb, &mx3fb_pdata);
 }
 
-/*
- * Set up static virtual mappings.
- */
-static void __init armadillo5x0_map_io(void)
-{
-	/*
-	 * Maps:
-	 * - NAND, SDRAM, WEIM, M3IF, EMI controllers
-	 * - AVIC (Vectored Interrupt controller)
-	 * - AIPS1 and AIPS2
-	 **/
-	mxc_map_io();
-}
-
 static void __init armadillo5x0_timer_init(void)
 {
 	mx31_clocks_init(26000000);
@@ -302,7 +288,7 @@ MACHINE_START(ARMADILLO5X0, "Armadillo-500")
 	.phys_io	= AIPS1_BASE_ADDR,
 	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params	= PHYS_OFFSET + 0x00000100,
-	.map_io		= armadillo5x0_map_io,
+	.map_io		= mx31_map_io,
 	.init_irq	= mxc_init_irq,
 	.timer		= &armadillo5x0_timer,
 	.init_machine	= armadillo5x0_init,
-- 
cgit v0.10.2


From a693a26fe0807c3c406d6b5295d222b646677288 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 26 May 2009 12:58:58 +0200
Subject: ALSA: riptide - Code clean up

A code clean up, coding style fixes.
The firmware loading routine is split to an own function to improve
the readability.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index e51a5ef..460be03 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -507,41 +507,19 @@ static int riptide_reset(struct cmdif *cif, struct snd_riptide *chip);
  */
 
 static struct pci_device_id snd_riptide_ids[] = {
-	{
-	 .vendor = 0x127a,.device = 0x4310,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4320,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4330,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4340,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
+	{ PCI_DEVICE(0x127a, 0x4310) },
+	{ PCI_DEVICE(0x127a, 0x4320) },
+	{ PCI_DEVICE(0x127a, 0x4330) },
+	{ PCI_DEVICE(0x127a, 0x4340) },
 	{0,},
 };
 
 #ifdef SUPPORT_JOYSTICK
 static struct pci_device_id snd_riptide_joystick_ids[] __devinitdata = {
-	{
-	 .vendor = 0x127a,.device = 0x4312,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .vendor = 0x127a,.device = 0x4322,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{.vendor = 0x127a,.device = 0x4332,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
-	{.vendor = 0x127a,.device = 0x4342,
-	 .subvendor = PCI_ANY_ID,.subdevice = PCI_ANY_ID,
-	 },
+	{ PCI_DEVICE(0x127a, 0x4312) },
+	{ PCI_DEVICE(0x127a, 0x4322) },
+	{ PCI_DEVICE(0x127a, 0x4332) },
+	{ PCI_DEVICE(0x127a, 0x4342) },
 	{0,},
 };
 #endif
@@ -1209,12 +1187,79 @@ static int riptide_resume(struct pci_dev *pci)
 }
 #endif
 
+static int try_to_load_firmware(struct cmdif *cif, struct snd_riptide *chip)
+{
+	union firmware_version firmware = { .ret = CMDRET_ZERO };
+	int i, timeout, err;
+
+	for (i = 0; i < 2; i++) {
+		WRITE_PORT_ULONG(cif->hwport->port[i].data1, 0);
+		WRITE_PORT_ULONG(cif->hwport->port[i].data2, 0);
+	}
+	SET_GRESET(cif->hwport);
+	udelay(100);
+	UNSET_GRESET(cif->hwport);
+	udelay(100);
+
+	for (timeout = 100000; --timeout; udelay(10)) {
+		if (IS_READY(cif->hwport) && !IS_GERR(cif->hwport))
+			break;
+	}
+	if (!timeout) {
+		snd_printk(KERN_ERR
+			   "Riptide: device not ready, audio status: 0x%x "
+			   "ready: %d gerr: %d\n",
+			   READ_AUDIO_STATUS(cif->hwport),
+			   IS_READY(cif->hwport), IS_GERR(cif->hwport));
+		return -EIO;
+	} else {
+		snd_printdd
+			("Riptide: audio status: 0x%x ready: %d gerr: %d\n",
+			 READ_AUDIO_STATUS(cif->hwport),
+			 IS_READY(cif->hwport), IS_GERR(cif->hwport));
+	}
+
+	SEND_GETV(cif, &firmware.ret);
+	snd_printdd("Firmware version: ASIC: %d CODEC %d AUXDSP %d PROG %d\n",
+		    firmware.firmware.ASIC, firmware.firmware.CODEC,
+		    firmware.firmware.AUXDSP, firmware.firmware.PROG);
+
+	for (i = 0; i < FIRMWARE_VERSIONS; i++) {
+		if (!memcmp(&firmware_versions[i], &firmware, sizeof(firmware)))
+			break;
+	}
+	if (i >= FIRMWARE_VERSIONS)
+		return 0; /* no match */
+
+	if (!chip)
+		return 1; /* OK */
+
+	snd_printdd("Writing Firmware\n");
+	if (!chip->fw_entry) {
+		err = request_firmware(&chip->fw_entry, "riptide.hex",
+				       &chip->pci->dev);
+		if (err) {
+			snd_printk(KERN_ERR
+				   "Riptide: Firmware not available %d\n", err);
+			return -EIO;
+		}
+	}
+	err = loadfirmware(cif, chip->fw_entry->data, chip->fw_entry->size);
+	if (err) {
+		snd_printk(KERN_ERR
+			   "Riptide: Could not load firmware %d\n", err);
+		return err;
+	}
+
+	chip->firmware = firmware;
+
+	return 1; /* OK */
+}
+
 static int riptide_reset(struct cmdif *cif, struct snd_riptide *chip)
 {
-	int timeout, tries;
 	union cmdret rptr = CMDRET_ZERO;
-	union firmware_version firmware;
-	int i, j, err, has_firmware;
+	int err, tries;
 
 	if (!cif)
 		return -EINVAL;
@@ -1227,75 +1272,11 @@ static int riptide_reset(struct cmdif *cif, struct snd_riptide *chip)
 	cif->is_reset = 0;
 
 	tries = RESET_TRIES;
-	has_firmware = 0;
-	while (has_firmware == 0 && tries-- > 0) {
-		for (i = 0; i < 2; i++) {
-			WRITE_PORT_ULONG(cif->hwport->port[i].data1, 0);
-			WRITE_PORT_ULONG(cif->hwport->port[i].data2, 0);
-		}
-		SET_GRESET(cif->hwport);
-		udelay(100);
-		UNSET_GRESET(cif->hwport);
-		udelay(100);
-
-		for (timeout = 100000; --timeout; udelay(10)) {
-			if (IS_READY(cif->hwport) && !IS_GERR(cif->hwport))
-				break;
-		}
-		if (timeout == 0) {
-			snd_printk(KERN_ERR
-				   "Riptide: device not ready, audio status: 0x%x ready: %d gerr: %d\n",
-				   READ_AUDIO_STATUS(cif->hwport),
-				   IS_READY(cif->hwport), IS_GERR(cif->hwport));
-			return -EIO;
-		} else {
-			snd_printdd
-			    ("Riptide: audio status: 0x%x ready: %d gerr: %d\n",
-			     READ_AUDIO_STATUS(cif->hwport),
-			     IS_READY(cif->hwport), IS_GERR(cif->hwport));
-		}
-
-		SEND_GETV(cif, &rptr);
-		for (i = 0; i < 4; i++)
-			firmware.ret.retwords[i] = rptr.retwords[i];
-
-		snd_printdd
-		    ("Firmware version: ASIC: %d CODEC %d AUXDSP %d PROG %d\n",
-		     firmware.firmware.ASIC, firmware.firmware.CODEC,
-		     firmware.firmware.AUXDSP, firmware.firmware.PROG);
-
-		for (j = 0; j < FIRMWARE_VERSIONS; j++) {
-			has_firmware = 1;
-			for (i = 0; i < 4; i++) {
-				if (firmware_versions[j].ret.retwords[i] !=
-				    firmware.ret.retwords[i])
-					has_firmware = 0;
-			}
-			if (has_firmware)
-				break;
-		}
-
-		if (chip != NULL && has_firmware == 0) {
-			snd_printdd("Writing Firmware\n");
-			if (!chip->fw_entry) {
-				if ((err =
-				     request_firmware(&chip->fw_entry,
-						      "riptide.hex",
-						      &chip->pci->dev)) != 0) {
-					snd_printk(KERN_ERR
-						   "Riptide: Firmware not available %d\n",
-						   err);
-					return -EIO;
-				}
-			}
-			err = loadfirmware(cif, chip->fw_entry->data,
-					   chip->fw_entry->size);
-			if (err)
-				snd_printk(KERN_ERR
-					   "Riptide: Could not load firmware %d\n",
-					   err);
-		}
-	}
+	do {
+		err = try_to_load_firmware(cif, chip);
+		if (err < 0)
+			return err;
+	} while (!err && --tries);
 
 	SEND_SACR(cif, 0, AC97_RESET);
 	SEND_RACR(cif, AC97_RESET, &rptr);
@@ -1337,11 +1318,6 @@ static int riptide_reset(struct cmdif *cif, struct snd_riptide *chip)
 	SET_AIE(cif->hwport);
 	SET_AIACK(cif->hwport);
 	cif->is_reset = 1;
-	if (chip) {
-		for (i = 0; i < 4; i++)
-			chip->firmware.ret.retwords[i] =
-			    firmware.ret.retwords[i];
-	}
 
 	return 0;
 }
@@ -2094,8 +2070,8 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 	static int dev;
 	struct snd_card *card;
 	struct snd_riptide *chip;
-	unsigned short addr;
-	int err = 0;
+	unsigned short val;
+	int err;
 
 	if (dev >= SNDRV_CARDS)
 		return -ENODEV;
@@ -2107,60 +2083,63 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 	err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
 	if (err < 0)
 		return err;
-	if ((err = snd_riptide_create(card, pci, &chip)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
+	err = snd_riptide_create(card, pci, &chip);
+	if (err < 0)
+		goto error;
 	card->private_data = chip;
-	if ((err = snd_riptide_pcm(chip, 0, NULL)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
-	if ((err = snd_riptide_mixer(chip)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
-	pci_write_config_word(chip->pci, PCI_EXT_Legacy_Mask, LEGACY_ENABLE_ALL
-			      | (opl3_port[dev] ? LEGACY_ENABLE_FM : 0)
+	err = snd_riptide_pcm(chip, 0, NULL);
+	if (err < 0)
+		goto error;
+	err = snd_riptide_mixer(chip);
+	if (err < 0)
+		goto error;
+
+	val = LEGACY_ENABLE_ALL;
+	if (opl3_port[dev])
+		val |= LEGACY_ENABLE_FM;
 #ifdef SUPPORT_JOYSTICK
-			      | (joystick_port[dev] ? LEGACY_ENABLE_GAMEPORT :
-				 0)
+	if (joystick_port[dev])
+		val |= LEGACY_ENABLE_GAMEPORT;
 #endif
-			      | (mpu_port[dev]
-				 ? (LEGACY_ENABLE_MPU_INT | LEGACY_ENABLE_MPU) :
-				 0)
-			      | ((chip->irq << 4) & 0xF0));
-	if ((addr = mpu_port[dev]) != 0) {
-		pci_write_config_word(chip->pci, PCI_EXT_MPU_Base, addr);
-		if ((err = snd_mpu401_uart_new(card, 0, MPU401_HW_RIPTIDE,
-					       addr, 0, chip->irq, 0,
-					       &chip->rmidi)) < 0)
+	if (mpu_port[dev])
+		val |= LEGACY_ENABLE_MPU_INT | LEGACY_ENABLE_MPU;
+	val |= (chip->irq << 4) & 0xf0;
+	pci_write_config_word(chip->pci, PCI_EXT_Legacy_Mask, val);
+	if (mpu_port[dev]) {
+		val = mpu_port[dev];
+		pci_write_config_word(chip->pci, PCI_EXT_MPU_Base, val);
+		err = snd_mpu401_uart_new(card, 0, MPU401_HW_RIPTIDE,
+					  val, 0, chip->irq, 0,
+					  &chip->rmidi);
+		if (err < 0)
 			snd_printk(KERN_WARNING
 				   "Riptide: Can't Allocate MPU at 0x%x\n",
-				   addr);
+				   val);
 		else
-			chip->mpuaddr = addr;
+			chip->mpuaddr = val;
 	}
-	if ((addr = opl3_port[dev]) != 0) {
-		pci_write_config_word(chip->pci, PCI_EXT_FM_Base, addr);
-		if ((err = snd_opl3_create(card, addr, addr + 2,
-					   OPL3_HW_RIPTIDE, 0,
-					   &chip->opl3)) < 0)
+	if (opl3_port[dev]) {
+		val = opl3_port[dev];
+		pci_write_config_word(chip->pci, PCI_EXT_FM_Base, val);
+		err = snd_opl3_create(card, val, val + 2,
+				      OPL3_HW_RIPTIDE, 0, &chip->opl3);
+		if (err < 0)
 			snd_printk(KERN_WARNING
 				   "Riptide: Can't Allocate OPL3 at 0x%x\n",
-				   addr);
+				   val);
 		else {
-			chip->opladdr = addr;
-			if ((err =
-			     snd_opl3_hwdep_new(chip->opl3, 0, 1, NULL)) < 0)
+			chip->opladdr = val;
+			err = snd_opl3_hwdep_new(chip->opl3, 0, 1, NULL);
+			if (err < 0)
 				snd_printk(KERN_WARNING
 					   "Riptide: Can't Allocate OPL3-HWDEP\n");
 		}
 	}
 #ifdef SUPPORT_JOYSTICK
-	if ((addr = joystick_port[dev]) != 0) {
-		pci_write_config_word(chip->pci, PCI_EXT_Game_Base, addr);
-		chip->gameaddr = addr;
+	if (joystick_port[dev]) {
+		val = joystick_port[dev];
+		pci_write_config_word(chip->pci, PCI_EXT_Game_Base, val);
+		chip->gameaddr = val;
 	}
 #endif
 
@@ -2178,13 +2157,16 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 		 chip->opladdr);
 #endif
 	snd_riptide_proc_init(chip);
-	if ((err = snd_card_register(card)) < 0) {
-		snd_card_free(card);
-		return err;
-	}
+	err = snd_card_register(card);
+	if (err < 0)
+		goto error;
 	pci_set_drvdata(pci, card);
 	dev++;
 	return 0;
+
+ error:
+	snd_card_free(card);
+	return err;
 }
 
 static void __devexit snd_card_riptide_remove(struct pci_dev *pci)
-- 
cgit v0.10.2


From 8fa66bdcc81dd4fc2c91228074d0a4698120c5a2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 May 2009 12:45:42 -0300
Subject: perf_counter: First part of 'perf report' conversion to C + elfutils

Integrate perf-report into 'perf', as builtin-report.c.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 45daa72..49c601e 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -228,7 +228,6 @@ COMPAT_CFLAGS =
 COMPAT_OBJS =
 LIB_H =
 LIB_OBJS =
-PROGRAMS = perf-report
 SCRIPT_PERL =
 SCRIPT_SH =
 TEST_PROGRAMS =
@@ -315,6 +314,7 @@ LIB_OBJS += util/wrapper.o
 
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-record.o
+BUILTIN_OBJS += builtin-report.o
 BUILTIN_OBJS += builtin-stat.o
 BUILTIN_OBJS += builtin-top.o
 
@@ -811,10 +811,6 @@ clean:
 	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
 	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
 
-# temporary hack:
-perf-report: perf-report.cc ../../include/linux/perf_counter.h Makefile
-	g++ -g -O2 -Wall -lrt -o $@ $<
-
 .PHONY: all install clean strip
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
new file mode 100644
index 0000000..864f68f
--- /dev/null
+++ b/Documentation/perf_counter/builtin-report.c
@@ -0,0 +1,751 @@
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <getopt.h>
+#include <assert.h>
+#include <search.h>
+
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#include "../../include/linux/perf_counter.h"
+#include "list.h"
+
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
+static char 		const *input_name = "output.perf";
+static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+static const char *perf_event_names[] = {
+	[PERF_EVENT_MMAP]   = " PERF_EVENT_MMAP",
+	[PERF_EVENT_MUNMAP] = " PERF_EVENT_MUNMAP",
+	[PERF_EVENT_COMM]   = " PERF_EVENT_COMM",
+};
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+};
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid,tid;
+	char comm[16];
+};
+
+typedef union event_union {
+	struct perf_event_header header;
+	struct ip_event ip;
+	struct mmap_event mmap;
+	struct comm_event comm;
+} event_t;
+
+struct section {
+	struct list_head node;
+	uint64_t	 start;
+	uint64_t	 end;
+	uint64_t	 offset;
+	char		 name[0];
+};
+
+static struct section *section__new(uint64_t start, uint64_t size,
+				    uint64_t offset, char *name)
+{
+	struct section *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		self->start  = start;
+		self->end    = start + size;
+		self->offset = offset;
+		strcpy(self->name, name);
+	}
+
+	return self;
+}
+
+static void section__delete(struct section *self)
+{
+	free(self);
+}
+
+struct symbol {
+	struct list_head node;
+	uint64_t	 start;
+	uint64_t	 end;
+	char		 name[0];
+};
+
+static struct symbol *symbol__new(uint64_t start, uint64_t len, const char *name)
+{
+	struct symbol *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		self->start = start;
+		self->end   = start + len;
+		strcpy(self->name, name);
+	}
+
+	return self;
+}
+
+static void symbol__delete(struct symbol *self)
+{
+	free(self);
+}
+
+static size_t symbol__fprintf(struct symbol *self, FILE *fp)
+{
+	return fprintf(fp, " %lx-%lx %s\n",
+		       self->start, self->end, self->name);
+}
+
+struct dso {
+	struct list_head node;
+	struct list_head sections;
+	struct list_head syms;
+	char		 name[0];
+};
+
+static struct dso *dso__new(const char *name)
+{
+	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		strcpy(self->name, name);
+		INIT_LIST_HEAD(&self->sections);
+		INIT_LIST_HEAD(&self->syms);
+	}
+
+	return self;
+}
+
+static void dso__delete_sections(struct dso *self)
+{
+	struct section *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &self->sections, node)
+		section__delete(pos);
+}
+
+static void dso__delete_symbols(struct dso *self)
+{
+	struct symbol *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &self->syms, node)
+		symbol__delete(pos);
+}
+
+static void dso__delete(struct dso *self)
+{
+	dso__delete_sections(self);
+	dso__delete_symbols(self);
+	free(self);
+}
+
+static void dso__insert_symbol(struct dso *self, struct symbol *sym)
+{
+	list_add_tail(&sym->node, &self->syms);
+}
+
+static struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
+{
+	if (self == NULL)
+		return NULL;
+
+	struct symbol *pos;
+
+	list_for_each_entry(pos, &self->syms, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static int dso__load(struct dso *self)
+{
+	/* FIXME */
+	return 0;
+}
+
+static size_t dso__fprintf(struct dso *self, FILE *fp)
+{
+	struct symbol *pos;
+	size_t ret = fprintf(fp, "dso: %s\n", self->name);
+
+	list_for_each_entry(pos, &self->syms, node)
+		ret += symbol__fprintf(pos, fp);
+
+	return ret;
+}
+
+static LIST_HEAD(dsos);
+static struct dso *kernel_dso;
+
+static void dsos__add(struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos);
+}
+
+static struct dso *dsos__find(const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		if (strcmp(pos->name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+static struct dso *dsos__findnew(const char *name)
+{
+	struct dso *dso = dsos__find(name);
+
+	if (dso == NULL) {
+		dso = dso__new(name);
+		if (dso != NULL && dso__load(dso) < 0)
+			goto out_delete_dso;
+
+		dsos__add(dso);
+	}
+
+	return dso;
+
+out_delete_dso:
+	dso__delete(dso);
+	return NULL;
+}
+
+static void dsos__fprintf(FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		dso__fprintf(pos, fp);
+}
+
+static int load_kallsyms(void)
+{
+	kernel_dso = dso__new("[kernel]");
+	if (kernel_dso == NULL)
+		return -1;
+
+	FILE *file = fopen("/proc/kallsyms", "r");
+
+	if (file == NULL)
+		goto out_delete_dso;
+
+	char *line = NULL;
+	size_t n;
+
+	while (!feof(file)) {
+		unsigned long long start;
+		char c, symbf[4096];
+
+		if (getline(&line, &n, file) < 0)
+			break;
+
+		if (!line)
+			goto out_delete_dso;
+
+		if (sscanf(line, "%llx %c %s", &start, &c, symbf) == 3) {
+			struct symbol *sym = symbol__new(start, 0x1000000, symbf);
+
+			if (sym == NULL)
+				goto out_delete_dso;
+
+			dso__insert_symbol(kernel_dso, sym);
+		}
+	}
+
+	dsos__add(kernel_dso);
+	free(line);
+	fclose(file);
+	return 0;
+
+out_delete_dso:
+	dso__delete(kernel_dso);
+	return -1;
+}
+
+struct map {
+	struct list_head node;
+	uint64_t	 start;
+	uint64_t	 end;
+	uint64_t	 pgoff;
+	struct dso	 *dso;
+};
+
+static struct map *map__new(struct mmap_event *event)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->start = event->start;
+		self->end   = event->start + event->len;
+		self->pgoff = event->pgoff;
+
+		self->dso = dsos__findnew(event->filename);
+		if (self->dso == NULL)
+			goto out_delete;
+	}
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %lx-%lx %lx %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+struct symhist {
+	struct list_head node;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	uint32_t	 count;
+	char		 level;
+};
+
+static struct symhist *symhist__new(struct symbol *sym, struct dso *dso,
+				    char level)
+{
+	struct symhist *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->sym   = sym;
+		self->dso   = dso;
+		self->level = level;
+		self->count = 0;
+	}
+
+	return self;
+}
+
+static void symhist__delete(struct symhist *self)
+{
+	free(self);
+}
+
+static bool symhist__equal(struct symhist *self, struct symbol *sym,
+			   struct dso *dso, char level)
+{
+	return self->level == level && self->sym == sym && self->dso == dso;
+}
+
+static void symhist__inc(struct symhist *self)
+{
+	++self->count;
+}
+
+static size_t symhist__fprintf(struct symhist *self, FILE *fp)
+{
+	size_t ret = fprintf(fp, "[%c] ", self->level);
+
+	if (self->level != '.')
+		ret += fprintf(fp, "%s", self->sym->name);
+	else
+		ret += fprintf(fp, "%s: %s",
+			       self->dso ? self->dso->name : "<unknown",
+			       self->sym ? self->sym->name : "<unknown>");
+	return ret + fprintf(fp, ": %u\n", self->count);
+}
+
+struct thread {
+	struct list_head node;
+	struct list_head maps;
+	struct list_head symhists;
+	pid_t		 pid;
+	char		 *comm;
+};
+
+static struct thread *thread__new(pid_t pid)
+{
+	struct thread *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->pid = pid;
+		self->comm = NULL;
+		INIT_LIST_HEAD(&self->maps);
+		INIT_LIST_HEAD(&self->symhists);
+	}
+
+	return self;
+}
+
+static void thread__insert_symhist(struct thread *self,
+				   struct symhist *symhist)
+{
+	list_add_tail(&symhist->node, &self->symhists);
+}
+
+static struct symhist *thread__symhists_find(struct thread *self,
+					     struct symbol *sym,
+					     struct dso *dso, char level)
+{
+	struct symhist *pos;
+
+	list_for_each_entry(pos, &self->symhists, node)
+		if (symhist__equal(pos, sym, dso, level))
+			return pos;
+
+	return NULL;
+}
+
+static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
+				 struct dso *dso, char level)
+{
+	struct symhist *symhist = thread__symhists_find(self, sym, dso, level);
+
+	if (symhist == NULL) {
+		symhist = symhist__new(sym, dso, level);
+		if (symhist == NULL)
+			goto out_error;
+		thread__insert_symhist(self, symhist);
+	}
+
+	symhist__inc(symhist);
+	return 0;
+out_error:
+	return -ENOMEM;
+}
+
+static int thread__set_comm(struct thread *self, const char *comm)
+{
+	self->comm = strdup(comm);
+	return self->comm ? 0 : -ENOMEM;
+}
+
+static size_t thread__maps_fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct symhist *pos;
+	int ret = fprintf(fp, "thread: %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->symhists, node)
+		ret += symhist__fprintf(pos, fp);
+
+	return ret;
+}
+
+static LIST_HEAD(threads);
+
+static void threads__add(struct thread *thread)
+{
+	list_add_tail(&thread->node, &threads);
+}
+
+static struct thread *threads__find(pid_t pid)
+{
+	struct thread *pos;
+
+	list_for_each_entry(pos, &threads, node)
+		if (pos->pid == pid)
+			return pos;
+	return NULL;
+}
+
+static struct thread *threads__findnew(pid_t pid)
+{
+	struct thread *thread = threads__find(pid);
+
+	if (thread == NULL) {
+		thread = thread__new(pid);
+		if (thread != NULL)
+			threads__add(thread);
+	}
+
+	return thread;
+}
+
+static void thread__insert_map(struct thread *self, struct map *map)
+{
+	list_add_tail(&map->node, &self->maps);
+}
+
+static struct map *thread__find_map(struct thread *self, uint64_t ip)
+{
+	if (self == NULL)
+		return NULL;
+
+	struct map *pos;
+
+	list_for_each_entry(pos, &self->maps, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static void threads__fprintf(FILE *fp)
+{
+	struct thread *pos;
+
+	list_for_each_entry(pos, &threads, node)
+		thread__fprintf(pos, fp);
+}
+
+#if 0
+static std::string resolve_user_symbol(int pid, uint64_t ip)
+{
+	std::string sym = "<unknown>";
+
+	maps_t &m = maps[pid];
+	maps_t::const_iterator mi = m.upper_bound(map(ip));
+	if (mi == m.end())
+		return sym;
+
+	ip -= mi->start + mi->pgoff;
+
+	symbols_t &s = dsos[mi->dso].syms;
+	symbols_t::const_iterator si = s.upper_bound(symbol(ip));
+
+	sym = mi->dso + ": <unknown>";
+
+	if (si == s.begin())
+		return sym;
+	si--;
+
+	if (si->start <= ip && ip < si->end)
+		sym = mi->dso + ": " + si->name;
+#if 0
+	else if (si->start <= ip)
+		sym = mi->dso + ": ?" + si->name;
+#endif
+
+	return sym;
+}
+#endif
+
+static void display_help(void)
+{
+	printf(
+	"Usage: perf-report [<options>]\n"
+	" -i file   --input=<file>      # input file\n"
+	);
+
+	exit(0);
+}
+
+static void process_options(int argc, char *argv[])
+{
+	int error = 0;
+
+	for (;;) {
+		int option_index = 0;
+		/** Options for getopt */
+		static struct option long_options[] = {
+			{"input",	required_argument,	NULL, 'i'},
+			{"no-user",	no_argument,		NULL, 'u'},
+			{"no-kernel",	no_argument,		NULL, 'k'},
+			{"no-hv",	no_argument,		NULL, 'h'},
+			{NULL,		0,			NULL,  0 }
+		};
+		int c = getopt_long(argc, argv, "+:i:kuh",
+				    long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'i': input_name			= strdup(optarg); break;
+		case 'k': show_mask &= ~SHOW_KERNEL; break;
+		case 'u': show_mask &= ~SHOW_USER; break;
+		case 'h': show_mask &= ~SHOW_HV; break;
+		default: error = 1; break;
+		}
+	}
+
+	if (error)
+		display_help();
+}
+
+int cmd_report(int argc, char **argv)
+{
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	char *buf;
+	event_t *event;
+	int ret, rc = EXIT_FAILURE;
+	unsigned long total = 0;
+
+	page_size = getpagesize();
+
+	process_options(argc, argv);
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kallsyms() < 0) {
+		perror("failed to open kallsyms");
+		return EXIT_FAILURE;
+	}
+
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+
+	if (!event->header.size) {
+		fprintf(stderr, "zero-sized event at file offset %ld\n", offset + head);
+		fprintf(stderr, "skipping %ld bytes of events.\n", stat.st_size - offset - head);
+		goto done;
+	}
+
+	head += event->header.size;
+
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
+		char level;
+		int show = 0;
+		struct dso *dso = NULL;
+		struct thread *thread = threads__findnew(event->ip.pid);
+
+		if (thread == NULL)
+			goto done;
+
+		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+			show = SHOW_KERNEL;
+			level = 'k';
+			dso = kernel_dso;
+		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+			show = SHOW_USER;
+			level = '.';
+			struct map *map = thread__find_map(thread, event->ip.ip);
+			if (map != NULL)
+				dso = map->dso;
+		} else {
+			show = SHOW_HV;
+			level = 'H';
+		}
+
+		if (show & show_mask) {
+			struct symbol *sym = dso__find_symbol(dso, event->ip.ip);
+
+			if (thread__symbol_incnew(thread, sym, dso, level))
+				goto done;
+		}
+		total++;
+	} else switch (event->header.type) {
+	case PERF_EVENT_MMAP: {
+		struct thread *thread = threads__findnew(event->mmap.pid);
+		struct map *map = map__new(&event->mmap);
+
+		if (thread == NULL || map == NULL )
+			goto done;
+		thread__insert_map(thread, map);
+		break;
+	}
+	case PERF_EVENT_COMM: {
+		struct thread *thread = threads__findnew(event->comm.pid);
+
+		if (thread == NULL ||
+		    thread__set_comm(thread, event->comm.comm))
+			goto done;
+		break;
+	}
+	}
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	rc = EXIT_SUCCESS;
+done:
+	close(input);
+	//dsos__fprintf(stdout);
+	threads__fprintf(stdout);
+#if 0
+	std::map<std::string, int>::iterator hi = hist.begin();
+
+	while (hi != hist.end()) {
+		rev_hist.insert(std::pair<int, std::string>(hi->second, hi->first));
+		hist.erase(hi++);
+	}
+
+	std::multimap<int, std::string>::const_iterator ri = rev_hist.begin();
+
+	while (ri != rev_hist.end()) {
+		printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str());
+		ri++;
+	}
+#endif
+	return rc;
+}
+
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index d32318a..5bfea57 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -16,6 +16,7 @@ extern int check_pager_config(const char *cmd);
 
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_record(int argc, const char **argv, const char *prefix);
+extern int cmd_report(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
index d15210a..4390292 100644
--- a/Documentation/perf_counter/command-list.txt
+++ b/Documentation/perf_counter/command-list.txt
@@ -1,6 +1,7 @@
 # List of known perf commands.
 # command name				category [deprecated] [common]
 perf-record                             mainporcelain common
+perf-report                             mainporcelain common
 perf-stat                               mainporcelain common
 perf-top                                mainporcelain common
 
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 1d6d7aa..e8a8584 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -250,6 +250,7 @@ static void handle_internal_command(int argc, const char **argv)
 	static struct cmd_struct commands[] = {
 		{ "help", cmd_help, 0 },
 		{ "record", cmd_record, 0 },
+		{ "report", cmd_report, 0 },
 		{ "stat", cmd_stat, 0 },
 		{ "top", cmd_top, 0 },
 		{ "version", cmd_version, 0 },
-- 
cgit v0.10.2


From fd4242bb35b70557eee8d0c79f82dacc3f3b89e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 20 May 2009 12:45:34 +0200
Subject: perf_counter tools: remove the standalone perf-report utility

With a built-in 'perf report' command now available, remove the
standalone implementation for good.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc
deleted file mode 100644
index 8855107..0000000
--- a/Documentation/perf_counter/perf-report.cc
+++ /dev/null
@@ -1,515 +0,0 @@
-#define _GNU_SOURCE
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <ctype.h>
-#include <time.h>
-#include <getopt.h>
-#include <assert.h>
-
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-#include "../../include/linux/perf_counter.h"
-
-#include <set>
-#include <map>
-#include <string>
-
-
-#define SHOW_KERNEL	1
-#define SHOW_USER	2
-#define SHOW_HV		4
-
-static char 		const *input_name = "output.perf";
-static int		input;
-static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
-
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
-
-struct ip_event {
-	struct perf_event_header header;
-	__u64 ip;
-	__u32 pid, tid;
-};
-struct mmap_event {
-	struct perf_event_header header;
-	__u32 pid, tid;
-	__u64 start;
-	__u64 len;
-	__u64 pgoff;
-	char filename[PATH_MAX];
-};
-struct comm_event {
-	struct perf_event_header header;
-	__u32 pid,tid;
-	char comm[16];
-};
-
-typedef union event_union {
-	struct perf_event_header header;
-	struct ip_event ip;
-	struct mmap_event mmap;
-	struct comm_event comm;
-} event_t;
-
-struct section {
-	uint64_t start;
-	uint64_t end;
-
-	uint64_t offset;
-
-	std::string name;
-
-	section() { };
-
-	section(uint64_t stab) : end(stab) { };
-
-	section(uint64_t start, uint64_t size, uint64_t offset, std::string name) :
-		start(start), end(start + size), offset(offset), name(name)
-	{ };
-
-	bool operator < (const struct section &s) const {
-		return end < s.end;
-	};
-};
-
-typedef std::set<struct section> sections_t;
-
-struct symbol {
-	uint64_t start;
-	uint64_t end;
-
-	std::string name;
-
-	symbol() { };
-
-	symbol(uint64_t ip) : start(ip) { }
-
-	symbol(uint64_t start, uint64_t len, std::string name) :
-		start(start), end(start + len), name(name)
-	{ };
-
-	bool operator < (const struct symbol &s) const {
-		return start < s.start;
-	};
-};
-
-typedef std::set<struct symbol> symbols_t;
-
-struct dso {
-	sections_t sections;
-	symbols_t syms;
-};
-
-static std::map<std::string, struct dso> dsos;
-
-static void load_dso_sections(std::string dso_name)
-{
-	struct dso &dso = dsos[dso_name];
-
-	std::string cmd = "readelf -DSW " + dso_name;
-
-	FILE *file = popen(cmd.c_str(), "r");
-	if (!file) {
-		perror("failed to open pipe");
-		exit(-1);
-	}
-
-	char *line = NULL;
-	size_t n = 0;
-
-	while (!feof(file)) {
-		uint64_t addr, off, size;
-		char name[32];
-
-		if (getline(&line, &n, file) < 0)
-			break;
-		if (!line)
-			break;
-
-		if (sscanf(line, "  [%*2d] %16s %*14s %Lx %Lx %Lx",
-					name, &addr, &off, &size) == 4) {
-
-			dso.sections.insert(section(addr, size, addr - off, name));
-		}
-#if 0
-		/*
-		 * for reading readelf symbols (-s), however these don't seem
-		 * to include nearly everything, so use nm for that.
-		 */
-		if (sscanf(line, " %*4d %*3d: %Lx %5Lu %*7s %*6s %*7s %3d %s",
-			   &start, &size, &section, sym) == 4) {
-
-			start -= dso.section_offsets[section];
-
-			dso.syms.insert(symbol(start, size, std::string(sym)));
-		}
-#endif
-	}
-	pclose(file);
-}
-
-static void load_dso_symbols(std::string dso_name, std::string args)
-{
-	struct dso &dso = dsos[dso_name];
-
-	std::string cmd = "nm -nSC " + args + " " + dso_name;
-
-	FILE *file = popen(cmd.c_str(), "r");
-	if (!file) {
-		perror("failed to open pipe");
-		exit(-1);
-	}
-
-	char *line = NULL;
-	size_t n = 0;
-
-	while (!feof(file)) {
-		uint64_t start, size;
-		char c;
-		char sym[1024];
-
-		if (getline(&line, &n, file) < 0)
-			break;
-		if (!line)
-			break;
-
-
-		if (sscanf(line, "%Lx %Lx %c %s", &start, &size, &c, sym) == 4) {
-			sections_t::const_iterator si =
-				dso.sections.upper_bound(section(start));
-			if (si == dso.sections.end()) {
-				printf("symbol in unknown section: %s\n", sym);
-				continue;
-			}
-
-			start -= si->offset;
-
-			dso.syms.insert(symbol(start, size, sym));
-		}
-	}
-	pclose(file);
-}
-
-static void load_dso(std::string dso_name)
-{
-	load_dso_sections(dso_name);
-	load_dso_symbols(dso_name, "-D"); /* dynamic symbols */
-	load_dso_symbols(dso_name, "");   /* regular ones */
-}
-
-void load_kallsyms(void)
-{
-	struct dso &dso = dsos["[kernel]"];
-
-	FILE *file = fopen("/proc/kallsyms", "r");
-	if (!file) {
-		perror("failed to open kallsyms");
-		exit(-1);
-	}
-
-	char *line;
-	size_t n;
-
-	while (!feof(file)) {
-		uint64_t start;
-		char c;
-		char sym[1024000];
-
-		if (getline(&line, &n, file) < 0)
-			break;
-		if (!line)
-			break;
-
-		if (sscanf(line, "%Lx %c %s", &start, &c, sym) == 3)
-			dso.syms.insert(symbol(start, 0x1000000, std::string(sym)));
-	}
-	fclose(file);
-}
-
-struct map {
-	uint64_t start;
-	uint64_t end;
-	uint64_t pgoff;
-
-	std::string dso;
-
-	map() { };
-
-	map(uint64_t ip) : end(ip) { }
-
-	map(mmap_event *mmap) {
-		start = mmap->start;
-		end = mmap->start + mmap->len;
-		pgoff = mmap->pgoff;
-
-		dso = std::string(mmap->filename);
-
-		if (dsos.find(dso) == dsos.end())
-			load_dso(dso);
-	};
-
-	bool operator < (const struct map &m) const {
-		return end < m.end;
-	};
-};
-
-typedef std::set<struct map> maps_t;
-
-static std::map<int, maps_t> maps;
-
-static std::map<int, std::string> comms;
-
-static std::map<std::string, int> hist;
-static std::multimap<int, std::string> rev_hist;
-
-static std::string resolve_comm(int pid)
-{
-	std::string comm;
-
-	std::map<int, std::string>::const_iterator ci = comms.find(pid);
-	if (ci != comms.end()) {
-		comm = ci->second;
-	} else {
-		char pid_str[30];
-
-		sprintf(pid_str, ":%d", pid);
-		comm = pid_str;
-	}
-
-	return comm;
-}
-
-static std::string resolve_user_symbol(int pid, uint64_t ip)
-{
-	std::string sym = "<unknown>";
-
-	maps_t &m = maps[pid];
-	maps_t::const_iterator mi = m.upper_bound(map(ip));
-	if (mi == m.end())
-		return sym;
-
-	ip -= mi->start + mi->pgoff;
-
-	symbols_t &s = dsos[mi->dso].syms;
-	symbols_t::const_iterator si = s.upper_bound(symbol(ip));
-
-	sym = mi->dso + ": <unknown>";
-
-	if (si == s.begin())
-		return sym;
-	si--;
-
-	if (si->start <= ip && ip < si->end)
-		sym = mi->dso + ": " + si->name;
-#if 0
-	else if (si->start <= ip)
-		sym = mi->dso + ": ?" + si->name;
-#endif
-
-	return sym;
-}
-
-static std::string resolve_kernel_symbol(uint64_t ip)
-{
-	std::string sym = "<unknown>";
-
-	symbols_t &s = dsos["[kernel]"].syms;
-	symbols_t::const_iterator si = s.upper_bound(symbol(ip));
-
-	if (si == s.begin())
-		return sym;
-	si--;
-
-	if (si->start <= ip && ip < si->end)
-		sym = si->name;
-
-	return sym;
-}
-
-static void display_help(void)
-{
-	printf(
-	"Usage: perf-report [<options>]\n"
-	" -i file   --input=<file>      # input file\n"
-	);
-
-	exit(0);
-}
-
-static void process_options(int argc, char *argv[])
-{
-	int error = 0;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"input",	required_argument,	NULL, 'i'},
-			{"no-user",	no_argument,		NULL, 'u'},
-			{"no-kernel",	no_argument,		NULL, 'k'},
-			{"no-hv",	no_argument,		NULL, 'h'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:i:kuh",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'i': input_name			= strdup(optarg); break;
-		case 'k': show_mask &= ~SHOW_KERNEL; break;
-		case 'u': show_mask &= ~SHOW_USER; break;
-		case 'h': show_mask &= ~SHOW_HV; break;
-		default: error = 1; break;
-		}
-	}
-
-	if (error)
-		display_help();
-}
-
-int main(int argc, char *argv[])
-{
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat stat;
-	char *buf;
-	event_t *event;
-	int ret;
-	unsigned long total = 0;
-
-	page_size = getpagesize();
-
-	process_options(argc, argv);
-
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
-	}
-
-	ret = fstat(input, &stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	load_kallsyms();
-
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int ret;
-
-		ret = munmap(buf, page_size * mmap_window);
-		assert(ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-
-	if (!event->header.size) {
-		fprintf(stderr, "zero-sized event at file offset %ld\n", offset + head);
-		fprintf(stderr, "skipping %ld bytes of events.\n", stat.st_size - offset - head);
-		goto done;
-	}
-
-	head += event->header.size;
-
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-		std::string comm, sym, level;
-		int show = 0;
-		char output[1024];
-
-		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
-			show |= SHOW_KERNEL;
-			level = " [k] ";
-			sym = resolve_kernel_symbol(event->ip.ip);
-		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
-			show |= SHOW_USER;
-			level = " [.] ";
-			sym = resolve_user_symbol(event->ip.pid, event->ip.ip);
-		} else {
-			show |= SHOW_HV;
-			level = " [H] ";
-		}
-
-		if (show & show_mask) {
-			comm = resolve_comm(event->ip.pid);
-			snprintf(output, sizeof(output), "%16s %s %s",
-					comm.c_str(), level.c_str(), sym.c_str());
-			hist[output]++;
-		}
-
-		total++;
-
-	} else switch (event->header.type) {
-	case PERF_EVENT_MMAP:
-		maps[event->mmap.pid].insert(map(&event->mmap));
-		break;
-
-	case PERF_EVENT_COMM:
-		comms[event->comm.pid] = std::string(event->comm.comm);
-		break;
-	}
-
-	if (offset + head < stat.st_size)
-		goto more;
-
-done:
-
-	close(input);
-
-	std::map<std::string, int>::iterator hi = hist.begin();
-
-	while (hi != hist.end()) {
-		rev_hist.insert(std::pair<int, std::string>(hi->second, hi->first));
-		hist.erase(hi++);
-	}
-
-	std::multimap<int, std::string>::const_iterator ri = rev_hist.begin();
-
-	while (ri != rev_hist.end()) {
-		printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str());
-		ri++;
-	}
-
-	return 0;
-}
-
-- 
cgit v0.10.2


From 62eb93905b3b43cea407cfbc061cc7b40ae1c6e9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 May 2009 14:28:47 -0300
Subject: perf_counter: Implement dso__load using libelf

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 49c601e..6bffa86 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -160,7 +160,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
 CFLAGS = -g -O2 -Wall
-LDFLAGS = -lpthread -lrt
+LDFLAGS = -lpthread -lrt -lelf
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
 STRIP ?= strip
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 864f68f..ad2f327 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -8,6 +8,9 @@
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
+#include <gelf.h>
+#include <elf.h>
+#include <libelf.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <errno.h>
@@ -195,10 +198,123 @@ static struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
 	return NULL;
 }
 
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @self: struct elf_symtab instance to iterate
+ * @index: uint32_t index
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
+	for (index = 0, gelf_getsym(syms, index, &sym);\
+	     index < nr_syms; \
+	     index++, gelf_getsym(syms, index, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+	return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline bool elf_sym__is_function(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_FUNC &&
+	       sym->st_name != 0 &&
+	       sym->st_shndx != SHN_UNDEF;
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+					const Elf_Data *symstrs)
+{
+	return symstrs->d_buf + sym->st_name;
+}
+
+static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+				    GElf_Shdr *shp, const char *name,
+				    size_t *index)
+{
+	Elf_Scn *sec = NULL;
+	size_t cnt = 1;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		char *str;
+
+		gelf_getshdr(sec, shp);
+		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+		if (!strcmp(name, str)) {
+			if (index)
+				*index = cnt;
+			break;
+		}
+		++cnt;
+	}
+
+	return sec;
+}
+
 static int dso__load(struct dso *self)
 {
-	/* FIXME */
-	return 0;
+	int fd = open(self->name, O_RDONLY), err = -1;
+
+	if (fd == -1)
+		return -1;
+
+	Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		fprintf(stderr, "%s: cannot read %s ELF file.\n",
+			__func__, self->name);
+		goto out_close;
+	}
+
+	GElf_Ehdr ehdr;
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	GElf_Shdr shdr;
+	Elf_Scn *sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
+	if (sec == NULL)
+		sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
+
+	if (sec == NULL)
+		goto out_elf_end;
+
+	if (gelf_getshdr(sec, &shdr) == NULL)
+		goto out_elf_end;
+
+	Elf_Data *syms = elf_getdata(sec, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	sec = elf_getscn(elf, shdr.sh_link);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	Elf_Data *symstrs = elf_getdata(sec, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	const uint32_t nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	GElf_Sym sym;
+	uint32_t index;
+	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
+		if (!elf_sym__is_function(&sym))
+			continue;
+		struct symbol *f = symbol__new(sym.st_value, sym.st_size,
+					       elf_sym__name(&sym, symstrs));
+		if (f == NULL)
+			goto out_elf_end;
+
+		dso__insert_symbol(self, f);
+	}
+
+	err = 0;
+out_elf_end:
+	elf_end(elf);
+out_close:
+	close(fd);
+	return err;
 }
 
 static size_t dso__fprintf(struct dso *self, FILE *fp)
@@ -614,6 +730,8 @@ int cmd_report(int argc, char **argv)
 	int ret, rc = EXIT_FAILURE;
 	unsigned long total = 0;
 
+	elf_version(EV_CURRENT);
+
 	page_size = getpagesize();
 
 	process_options(argc, argv);
-- 
cgit v0.10.2


From 35a50c8a20eea22c141e05c5667ac21c48b8b65d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 May 2009 16:24:49 -0300
Subject: perf_counter: Use rb_trees in perf report

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 6bffa86..412dea1 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -287,6 +287,8 @@ LIB_FILE=libperf.a
 
 LIB_H += ../../include/linux/perf_counter.h
 LIB_H += perf.h
+LIB_H += util/list.h
+LIB_H += util/rbtree.h
 LIB_H += util/levenshtein.h
 LIB_H += util/parse-options.h
 LIB_H += util/parse-events.h
@@ -306,6 +308,7 @@ LIB_OBJS += util/levenshtein.o
 LIB_OBJS += util/parse-options.o
 LIB_OBJS += util/parse-events.o
 LIB_OBJS += util/path.o
+LIB_OBJS += util/rbtree.o
 LIB_OBJS += util/run-command.o
 LIB_OBJS += util/quote.o
 LIB_OBJS += util/strbuf.o
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index ad2f327..f63057f 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -32,7 +32,8 @@
 #include <linux/types.h>
 
 #include "../../include/linux/perf_counter.h"
-#include "list.h"
+#include "util/list.h"
+#include "util/rbtree.h"
 
 #define SHOW_KERNEL	1
 #define SHOW_USER	2
@@ -106,10 +107,10 @@ static void section__delete(struct section *self)
 }
 
 struct symbol {
-	struct list_head node;
-	uint64_t	 start;
-	uint64_t	 end;
-	char		 name[0];
+	struct rb_node rb_node;
+	uint64_t       start;
+	uint64_t       end;
+	char	       name[0];
 };
 
 static struct symbol *symbol__new(uint64_t start, uint64_t len, const char *name)
@@ -139,7 +140,7 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 struct dso {
 	struct list_head node;
 	struct list_head sections;
-	struct list_head syms;
+	struct rb_root	 syms;
 	char		 name[0];
 };
 
@@ -150,7 +151,7 @@ static struct dso *dso__new(const char *name)
 	if (self != NULL) {
 		strcpy(self->name, name);
 		INIT_LIST_HEAD(&self->sections);
-		INIT_LIST_HEAD(&self->syms);
+		self->syms = RB_ROOT;
 	}
 
 	return self;
@@ -166,10 +167,14 @@ static void dso__delete_sections(struct dso *self)
 
 static void dso__delete_symbols(struct dso *self)
 {
-	struct symbol *pos, *n;
+	struct symbol *pos;
+	struct rb_node *next = rb_first(&self->syms);
 
-	list_for_each_entry_safe(pos, n, &self->syms, node)
+	while (next) {
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
 		symbol__delete(pos);
+	}
 }
 
 static void dso__delete(struct dso *self)
@@ -181,7 +186,21 @@ static void dso__delete(struct dso *self)
 
 static void dso__insert_symbol(struct dso *self, struct symbol *sym)
 {
-	list_add_tail(&sym->node, &self->syms);
+	struct rb_node **p = &self->syms.rb_node;
+	struct rb_node *parent = NULL;
+	const uint64_t ip = sym->start;
+	struct symbol *s;
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol, rb_node);
+		if (ip < s->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&sym->rb_node, parent, p);
+	rb_insert_color(&sym->rb_node, &self->syms);
 }
 
 static struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
@@ -189,11 +208,18 @@ static struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
 	if (self == NULL)
 		return NULL;
 
-	struct symbol *pos;
+	struct rb_node *n = self->syms.rb_node;
 
-	list_for_each_entry(pos, &self->syms, node)
-		if (ip >= pos->start && ip <= pos->end)
-			return pos;
+	while (n) {
+		struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+		if (ip < s->start)
+			n = n->rb_left;
+		else if (ip > s->end)
+			n = n->rb_right;
+		else
+			return s;
+	}
 
 	return NULL;
 }
@@ -319,11 +345,13 @@ out_close:
 
 static size_t dso__fprintf(struct dso *self, FILE *fp)
 {
-	struct symbol *pos;
 	size_t ret = fprintf(fp, "dso: %s\n", self->name);
 
-	list_for_each_entry(pos, &self->syms, node)
+	struct rb_node *nd;
+	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
 		ret += symbol__fprintf(pos, fp);
+	}
 
 	return ret;
 }
diff --git a/Documentation/perf_counter/util/rbtree.c b/Documentation/perf_counter/util/rbtree.c
new file mode 100644
index 0000000..b15ba9c
--- /dev/null
+++ b/Documentation/perf_counter/util/rbtree.c
@@ -0,0 +1,383 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *right = node->rb_right;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_right = right->rb_left))
+		rb_set_parent(right->rb_left, node);
+	right->rb_left = node;
+
+	rb_set_parent(right, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_left)
+			parent->rb_left = right;
+		else
+			parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *left = node->rb_left;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_left = left->rb_right))
+		rb_set_parent(left->rb_right, node);
+	left->rb_right = node;
+
+	rb_set_parent(left, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_right)
+			parent->rb_right = left;
+		else
+			parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *parent, *gparent;
+
+	while ((parent = rb_parent(node)) && rb_is_red(parent))
+	{
+		gparent = rb_parent(parent);
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register struct rb_node *uncle = gparent->rb_right;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				register struct rb_node *uncle = gparent->rb_left;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	rb_set_black(root->rb_node);
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+			     struct rb_root *root)
+{
+	struct rb_node *other;
+
+	while ((!node || rb_is_black(node)) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_right || rb_is_black(other->rb_right))
+				{
+					rb_set_black(other->rb_left);
+					rb_set_red(other);
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_right);
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_left || rb_is_black(other->rb_left))
+				{
+					rb_set_black(other->rb_right);
+					rb_set_red(other);
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_left);
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *child, *parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		struct rb_node *old = node, *left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left) != NULL)
+			node = left;
+		child = node->rb_right;
+		parent = rb_parent(node);
+		color = rb_color(node);
+
+		if (child)
+			rb_set_parent(child, parent);
+		if (parent == old) {
+			parent->rb_right = child;
+			parent = node;
+		} else
+			parent->rb_left = child;
+
+		node->rb_parent_color = old->rb_parent_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (rb_parent(old))
+		{
+			if (rb_parent(old)->rb_left == old)
+				rb_parent(old)->rb_left = node;
+			else
+				rb_parent(old)->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		rb_set_parent(old->rb_left, node);
+		if (old->rb_right)
+			rb_set_parent(old->rb_right, node);
+		goto color;
+	}
+
+	parent = rb_parent(node);
+	color = rb_color(node);
+
+	if (child)
+		rb_set_parent(child, parent);
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->rb_right) {
+		node = node->rb_right; 
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->rb_left) {
+		node = node->rb_left; 
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (parent) {
+		if (victim == parent->rb_left)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else {
+		root->rb_node = new;
+	}
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
diff --git a/Documentation/perf_counter/util/rbtree.h b/Documentation/perf_counter/util/rbtree.h
new file mode 100644
index 0000000..6bdc488
--- /dev/null
+++ b/Documentation/perf_counter/util/rbtree.h
@@ -0,0 +1,171 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	struct rb_node * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   struct rb_node * node)
+{
+	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+	struct rb_node * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	rb_link_node(node, parent, p);
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 struct rb_node * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <stddef.h>
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+struct rb_node
+{
+	unsigned long  rb_parent_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root
+{
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
+#define rb_color(r)   ((r)->rb_parent_color & 1)
+#define rb_is_red(r)   (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
+#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
+#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+				struct rb_node ** rb_link)
+{
+	node->rb_parent_color = (unsigned long )parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#endif	/* _LINUX_RBTREE_H */
-- 
cgit v0.10.2


From 040e6034124c504d536736ce08e4643e640cd7c2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 May 2009 16:25:31 -0300
Subject: perf_counter: Add our private copy of list.h

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/list.h b/Documentation/perf_counter/util/list.h
new file mode 100644
index 0000000..e2548e8
--- /dev/null
+++ b/Documentation/perf_counter/util/list.h
@@ -0,0 +1,603 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+/*
+  Copyright (C) Cast of dozens, comes from the Linux kernel
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+*/
+
+#include <stddef.h>
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *)0x00100100)
+#define LIST_POISON2 ((void *)0x00200200)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({			\
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+        (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_range - deletes range of entries from list.
+ * @beging: first element in the range to delete from the list.
+ * @beging: first element in the range to delete from the list.
+ * Note: list_empty on the range of entries does not return true after this,
+ * the entries is in an undefined state.
+ */
+static inline void list_del_range(struct list_head *begin,
+				  struct list_head *end)
+{
+	begin->prev->next = end->next;
+	end->next->prev = begin->prev;
+}
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ * Note: if 'old' was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+static inline void __list_splice(struct list_head *list,
+				 struct list_head *head)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:       the list head to take the element from.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); \
+        	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; pos != (head); \
+        	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) 			\
+	for (; &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) 			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos; \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
-- 
cgit v0.10.2


From ce7e43653b08db094326f378958bc293a68e8e5b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 19 May 2009 09:30:23 -0300
Subject: perf_counter: Use rb_tree for symhists and threads in report

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index f63057f..e857201 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -479,23 +479,25 @@ static size_t map__fprintf(struct map *self, FILE *fp)
 }
 
 struct symhist {
-	struct list_head node;
+	struct rb_node	 rb_node;
 	struct dso	 *dso;
 	struct symbol	 *sym;
+	uint64_t	 ip;
 	uint32_t	 count;
 	char		 level;
 };
 
-static struct symhist *symhist__new(struct symbol *sym, struct dso *dso,
-				    char level)
+static struct symhist *symhist__new(struct symbol *sym, uint64_t ip,
+				    struct dso *dso, char level)
 {
 	struct symhist *self = malloc(sizeof(*self));
 
 	if (self != NULL) {
 		self->sym   = sym;
+		self->ip    = ip;
 		self->dso   = dso;
 		self->level = level;
-		self->count = 0;
+		self->count = 1;
 	}
 
 	return self;
@@ -506,12 +508,6 @@ static void symhist__delete(struct symhist *self)
 	free(self);
 }
 
-static bool symhist__equal(struct symhist *self, struct symbol *sym,
-			   struct dso *dso, char level)
-{
-	return self->level == level && self->sym == sym && self->dso == dso;
-}
-
 static void symhist__inc(struct symhist *self)
 {
 	++self->count;
@@ -519,7 +515,7 @@ static void symhist__inc(struct symhist *self)
 
 static size_t symhist__fprintf(struct symhist *self, FILE *fp)
 {
-	size_t ret = fprintf(fp, "[%c] ", self->level);
+	size_t ret = fprintf(fp, "%#llx [%c] ", (unsigned long long)self->ip, self->level);
 
 	if (self->level != '.')
 		ret += fprintf(fp, "%s", self->sym->name);
@@ -531,9 +527,9 @@ static size_t symhist__fprintf(struct symhist *self, FILE *fp)
 }
 
 struct thread {
-	struct list_head node;
+	struct rb_node	 rb_node;
 	struct list_head maps;
-	struct list_head symhists;
+	struct rb_root	 symhists;
 	pid_t		 pid;
 	char		 *comm;
 };
@@ -546,47 +542,43 @@ static struct thread *thread__new(pid_t pid)
 		self->pid = pid;
 		self->comm = NULL;
 		INIT_LIST_HEAD(&self->maps);
-		INIT_LIST_HEAD(&self->symhists);
+		self->symhists = RB_ROOT;
 	}
 
 	return self;
 }
 
-static void thread__insert_symhist(struct thread *self,
-				   struct symhist *symhist)
-{
-	list_add_tail(&symhist->node, &self->symhists);
-}
-
-static struct symhist *thread__symhists_find(struct thread *self,
-					     struct symbol *sym,
-					     struct dso *dso, char level)
+static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
+				 uint64_t ip, struct dso *dso, char level)
 {
-	struct symhist *pos;
+	struct rb_node **p = &self->symhists.rb_node;
+	struct rb_node *parent = NULL;
+	struct symhist *sh;
 
-	list_for_each_entry(pos, &self->symhists, node)
-		if (symhist__equal(pos, sym, dso, level))
-			return pos;
+	while (*p != NULL) {
+		parent = *p;
+		sh = rb_entry(parent, struct symhist, rb_node);
 
-	return NULL;
-}
+		if (sh->sym == sym || ip == sh->ip) {
+			symhist__inc(sh);
+			return 0;
+		}
 
-static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
-				 struct dso *dso, char level)
-{
-	struct symhist *symhist = thread__symhists_find(self, sym, dso, level);
+		/* Handle unresolved symbols too */
+		const uint64_t start = !sh->sym ? sh->ip : sh->sym->start;
 
-	if (symhist == NULL) {
-		symhist = symhist__new(sym, dso, level);
-		if (symhist == NULL)
-			goto out_error;
-		thread__insert_symhist(self, symhist);
+		if (ip < start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
 	}
 
-	symhist__inc(symhist);
+	sh = symhist__new(sym, ip, dso, level);
+	if (sh == NULL)
+		return -ENOMEM;
+	rb_link_node(&sh->rb_node, parent, p);
+	rb_insert_color(&sh->rb_node, &self->symhists);
 	return 0;
-out_error:
-	return -ENOMEM;
 }
 
 static int thread__set_comm(struct thread *self, const char *comm)
@@ -608,43 +600,44 @@ static size_t thread__maps_fprintf(struct thread *self, FILE *fp)
 
 static size_t thread__fprintf(struct thread *self, FILE *fp)
 {
-	struct symhist *pos;
 	int ret = fprintf(fp, "thread: %d %s\n", self->pid, self->comm);
+	struct rb_node *nd;
 
-	list_for_each_entry(pos, &self->symhists, node)
+	for (nd = rb_first(&self->symhists); nd; nd = rb_next(nd)) {
+		struct symhist *pos = rb_entry(nd, struct symhist, rb_node);
 		ret += symhist__fprintf(pos, fp);
+	}
 
 	return ret;
 }
 
-static LIST_HEAD(threads);
+static struct rb_root threads = RB_ROOT;
 
-static void threads__add(struct thread *thread)
-{
-	list_add_tail(&thread->node, &threads);
-}
-
-static struct thread *threads__find(pid_t pid)
+static struct thread *threads__findnew(pid_t pid)
 {
-	struct thread *pos;
+	struct rb_node **p = &threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
 
-	list_for_each_entry(pos, &threads, node)
-		if (pos->pid == pid)
-			return pos;
-	return NULL;
-}
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
 
-static struct thread *threads__findnew(pid_t pid)
-{
-	struct thread *thread = threads__find(pid);
+		if (th->pid == pid)
+			return th;
 
-	if (thread == NULL) {
-		thread = thread__new(pid);
-		if (thread != NULL)
-			threads__add(thread);
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
 	}
 
-	return thread;
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads);
+	}
+	return th;
 }
 
 static void thread__insert_map(struct thread *self, struct map *map)
@@ -668,44 +661,13 @@ static struct map *thread__find_map(struct thread *self, uint64_t ip)
 
 static void threads__fprintf(FILE *fp)
 {
-	struct thread *pos;
-
-	list_for_each_entry(pos, &threads, node)
+	struct rb_node *nd;
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
 		thread__fprintf(pos, fp);
+	}
 }
 
-#if 0
-static std::string resolve_user_symbol(int pid, uint64_t ip)
-{
-	std::string sym = "<unknown>";
-
-	maps_t &m = maps[pid];
-	maps_t::const_iterator mi = m.upper_bound(map(ip));
-	if (mi == m.end())
-		return sym;
-
-	ip -= mi->start + mi->pgoff;
-
-	symbols_t &s = dsos[mi->dso].syms;
-	symbols_t::const_iterator si = s.upper_bound(symbol(ip));
-
-	sym = mi->dso + ": <unknown>";
-
-	if (si == s.begin())
-		return sym;
-	si--;
-
-	if (si->start <= ip && ip < si->end)
-		sym = mi->dso + ": " + si->name;
-#if 0
-	else if (si->start <= ip)
-		sym = mi->dso + ": ?" + si->name;
-#endif
-
-	return sym;
-}
-#endif
-
 static void display_help(void)
 {
 	printf(
@@ -824,8 +786,11 @@ more:
 		struct dso *dso = NULL;
 		struct thread *thread = threads__findnew(event->ip.pid);
 
-		if (thread == NULL)
+		if (thread == NULL) {
+			fprintf(stderr, "problem processing %d event, bailing out\n",
+				event->header.type);
 			goto done;
+		}
 
 		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
 			show = SHOW_KERNEL;
@@ -845,8 +810,11 @@ more:
 		if (show & show_mask) {
 			struct symbol *sym = dso__find_symbol(dso, event->ip.ip);
 
-			if (thread__symbol_incnew(thread, sym, dso, level))
+			if (thread__symbol_incnew(thread, sym, event->ip.ip,
+						  dso, level)) {
+				fprintf(stderr, "problem incrementing symbol count, bailing out\n");
 				goto done;
+			}
 		}
 		total++;
 	} else switch (event->header.type) {
@@ -854,8 +822,10 @@ more:
 		struct thread *thread = threads__findnew(event->mmap.pid);
 		struct map *map = map__new(&event->mmap);
 
-		if (thread == NULL || map == NULL )
+		if (thread == NULL || map == NULL) {
+			fprintf(stderr, "problem processing PERF_EVENT_MMAP, bailing out\n");
 			goto done;
+		}
 		thread__insert_map(thread, map);
 		break;
 	}
@@ -863,8 +833,10 @@ more:
 		struct thread *thread = threads__findnew(event->comm.pid);
 
 		if (thread == NULL ||
-		    thread__set_comm(thread, event->comm.comm))
+		    thread__set_comm(thread, event->comm.comm)) {
+			fprintf(stderr, "problem processing PERF_EVENT_COMM, bailing out\n");
 			goto done;
+		}
 		break;
 	}
 	}
-- 
cgit v0.10.2


From f3e08c5341c528284460530b546608f27232f737 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 May 2009 15:34:54 +0200
Subject: perf report: Fix segfault on unknown symbols

Ingo reported:

> Program received signal SIGSEGV, Segmentation fault.
> 0x0000003e25080f80 in strlen () from /lib64/libc.so.6
> Missing separate debuginfos, use: debuginfo-install elfutils.x86_64
> glibc.x86_64 zlib.x86_64
> (gdb) bt
> #0  0x0000003e25080f80 in strlen () from /lib64/libc.so.6
> #1  0x0000003e2506954e in fputs () from /lib64/libc.so.6
> #2  0x00000000004059e8 in cmd_report (argc=<value optimized out>,
>     argv=<value optimized out>) at builtin-report.c:521
> #3  0x0000000000402dad in handle_internal_command (argc=1, argv=0x7fffe1218e30)
>     at perf.c:226
> #4  0x0000000000402f6d in main (argc=1, argv=0x7fffe1218e30) at perf.c:324
> (gdb)

Signed-off-by Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e857201..21386a8 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -518,7 +518,7 @@ static size_t symhist__fprintf(struct symhist *self, FILE *fp)
 	size_t ret = fprintf(fp, "%#llx [%c] ", (unsigned long long)self->ip, self->level);
 
 	if (self->level != '.')
-		ret += fprintf(fp, "%s", self->sym->name);
+		ret += fprintf(fp, "%s", self->sym ? self->sym->name: "<unknown>");
 	else
 		ret += fprintf(fp, "%s: %s",
 			       self->dso ? self->dso->name : "<unknown",
-- 
cgit v0.10.2


From 53cb8bc2a3d976efd1a800c3de4640a7220afbb3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 09:17:18 +0200
Subject: perf record: Convert to Git option parsing

Remove getopt usage and use Git's much more advanced and more compact
command option library.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 21386a8..9e59d60 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1,52 +1,29 @@
-#define _GNU_SOURCE
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
+#include "util/util.h"
+
+#include <libelf.h>
 #include <gelf.h>
 #include <elf.h>
-#include <libelf.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <errno.h>
-#include <ctype.h>
-#include <time.h>
-#include <getopt.h>
-#include <assert.h>
-#include <search.h>
-
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-#include "../../include/linux/perf_counter.h"
+
 #include "util/list.h"
 #include "util/rbtree.h"
 
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
 #define SHOW_KERNEL	1
 #define SHOW_USER	2
 #define SHOW_HV		4
 
-static char 		const *input_name = "output.perf";
+static char		const *input_name = "output.perf";
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
 
-static const char *perf_event_names[] = {
+const char *perf_event_names[] = {
 	[PERF_EVENT_MMAP]   = " PERF_EVENT_MMAP",
 	[PERF_EVENT_MUNMAP] = " PERF_EVENT_MUNMAP",
 	[PERF_EVENT_COMM]   = " PERF_EVENT_COMM",
@@ -86,7 +63,7 @@ struct section {
 	char		 name[0];
 };
 
-static struct section *section__new(uint64_t start, uint64_t size,
+struct section *section__new(uint64_t start, uint64_t size,
 				    uint64_t offset, char *name)
 {
 	struct section *self = malloc(sizeof(*self) + strlen(name) + 1);
@@ -241,7 +218,7 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym)
 	return GELF_ST_TYPE(sym->st_info);
 }
 
-static inline bool elf_sym__is_function(const GElf_Sym *sym)
+static inline int elf_sym__is_function(const GElf_Sym *sym)
 {
 	return elf_sym__type(sym) == STT_FUNC &&
 	       sym->st_name != 0 &&
@@ -393,7 +370,7 @@ out_delete_dso:
 	return NULL;
 }
 
-static void dsos__fprintf(FILE *fp)
+void dsos__fprintf(FILE *fp)
 {
 	struct dso *pos;
 
@@ -503,7 +480,7 @@ static struct symhist *symhist__new(struct symbol *sym, uint64_t ip,
 	return self;
 }
 
-static void symhist__delete(struct symhist *self)
+void symhist__delete(struct symhist *self)
 {
 	free(self);
 }
@@ -587,7 +564,7 @@ static int thread__set_comm(struct thread *self, const char *comm)
 	return self->comm ? 0 : -ENOMEM;
 }
 
-static size_t thread__maps_fprintf(struct thread *self, FILE *fp)
+size_t thread__maps_fprintf(struct thread *self, FILE *fp)
 {
 	struct map *pos;
 	size_t ret = 0;
@@ -668,49 +645,7 @@ static void threads__fprintf(FILE *fp)
 	}
 }
 
-static void display_help(void)
-{
-	printf(
-	"Usage: perf-report [<options>]\n"
-	" -i file   --input=<file>      # input file\n"
-	);
-
-	exit(0);
-}
-
-static void process_options(int argc, char *argv[])
-{
-	int error = 0;
-
-	for (;;) {
-		int option_index = 0;
-		/** Options for getopt */
-		static struct option long_options[] = {
-			{"input",	required_argument,	NULL, 'i'},
-			{"no-user",	no_argument,		NULL, 'u'},
-			{"no-kernel",	no_argument,		NULL, 'k'},
-			{"no-hv",	no_argument,		NULL, 'h'},
-			{NULL,		0,			NULL,  0 }
-		};
-		int c = getopt_long(argc, argv, "+:i:kuh",
-				    long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'i': input_name			= strdup(optarg); break;
-		case 'k': show_mask &= ~SHOW_KERNEL; break;
-		case 'u': show_mask &= ~SHOW_USER; break;
-		case 'h': show_mask &= ~SHOW_HV; break;
-		default: error = 1; break;
-		}
-	}
-
-	if (error)
-		display_help();
-}
-
-int cmd_report(int argc, char **argv)
+static int __cmd_report(void)
 {
 	unsigned long offset = 0;
 	unsigned long head = 0;
@@ -720,12 +655,6 @@ int cmd_report(int argc, char **argv)
 	int ret, rc = EXIT_FAILURE;
 	unsigned long total = 0;
 
-	elf_version(EV_CURRENT);
-
-	page_size = getpagesize();
-
-	process_options(argc, argv);
-
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
 		perror("failed to open file");
@@ -867,3 +796,24 @@ done:
 	return rc;
 }
 
+static const char * const report_usage[] = {
+	"perf report [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_END()
+};
+
+int cmd_report(int argc, const char **argv, const char *prefix)
+{
+	elf_version(EV_CURRENT);
+
+	page_size = getpagesize();
+
+	parse_options(argc, argv, options, report_usage, 0);
+
+	return __cmd_report();
+}
-- 
cgit v0.10.2


From 0bec253c813fbb067db4dfd9f5b6cec1bd2ef026 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 09:17:18 +0200
Subject: perf report: Add help/manpage

Add a (minimal) manpage for perf report.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-report.txt b/Documentation/perf_counter/Documentation/perf-report.txt
new file mode 100644
index 0000000..64696a2
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-report.txt
@@ -0,0 +1,32 @@
+perf-report(1)
+==========
+
+NAME
+----
+perf-report - Read output.perf (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf report' [-i <file> | --input=file]
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf report.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: output.perf)
+
+Configuration
+-------------
+
+EXAMPLES
+--------
+
+SEE ALSO
+--------
+linkperf:perf-stat[1]
-- 
cgit v0.10.2


From db1005ec6ff8f250bccbc87387a955078891bfe4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 26 May 2009 13:55:42 +0200
Subject: ALSA: riptide - Fix joystick resource handling

The current code doesn't handle the multiple gameports properly,
and uses unnecessary global static variables to store the data.
This patch changes the probe / remove routines to use the driver
data assigned to the dedicated pci device, and adds the support of
multiple devices.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index 460be03..235a71e 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -2014,14 +2014,12 @@ static int __devinit snd_riptide_mixer(struct snd_riptide *chip)
 }
 
 #ifdef SUPPORT_JOYSTICK
-static int have_joystick;
-static struct pci_dev *riptide_gameport_pci;
-static struct gameport *riptide_gameport;
 
 static int __devinit
 snd_riptide_joystick_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
 	static int dev;
+	struct gameport *gameport;
 
 	if (dev >= SNDRV_CARDS)
 		return -ENODEV;
@@ -2030,36 +2028,33 @@ snd_riptide_joystick_probe(struct pci_dev *pci, const struct pci_device_id *id)
 		return -ENOENT;
 	}
 
-	if (joystick_port[dev]) {
-		riptide_gameport = gameport_allocate_port();
-		if (riptide_gameport) {
-			if (!request_region
-			    (joystick_port[dev], 8, "Riptide gameport")) {
-				snd_printk(KERN_WARNING
-					   "Riptide: cannot grab gameport 0x%x\n",
-					   joystick_port[dev]);
-				gameport_free_port(riptide_gameport);
-				riptide_gameport = NULL;
-			} else {
-				riptide_gameport_pci = pci;
-				riptide_gameport->io = joystick_port[dev];
-				gameport_register_port(riptide_gameport);
-			}
-		}
+	if (!joystick_port[dev++])
+		return 0;
+
+	gameport = gameport_allocate_port();
+	if (!gameport)
+		return -ENOMEM;
+	if (!request_region(joystick_port[dev], 8, "Riptide gameport")) {
+		snd_printk(KERN_WARNING
+			   "Riptide: cannot grab gameport 0x%x\n",
+			   joystick_port[dev]);
+		gameport_free_port(gameport);
+		return -EBUSY;
 	}
-	dev++;
+
+	gameport->io = joystick_port[dev];
+	gameport_register_port(gameport);
+	pci_set_drvdata(pci, gameport);
 	return 0;
 }
 
 static void __devexit snd_riptide_joystick_remove(struct pci_dev *pci)
 {
-	if (riptide_gameport) {
-		if (riptide_gameport_pci == pci) {
-			release_region(riptide_gameport->io, 8);
-			riptide_gameport_pci = NULL;
-			gameport_unregister_port(riptide_gameport);
-			riptide_gameport = NULL;
-		}
+	struct gameport *gameport = pci_get_drvdata(pci);
+	if (gameport) {
+		release_region(gameport->io, 8);
+		gameport_unregister_port(gameport);
+		pci_set_drvdata(pci, NULL);
 	}
 }
 #endif
@@ -2198,14 +2193,11 @@ static struct pci_driver joystick_driver = {
 static int __init alsa_card_riptide_init(void)
 {
 	int err;
-	if ((err = pci_register_driver(&driver)) < 0)
+	err = pci_register_driver(&driver);
+	if (err < 0)
 		return err;
 #if defined(SUPPORT_JOYSTICK)
-	if (pci_register_driver(&joystick_driver) < 0) {
-		have_joystick = 0;
-		snd_printk(KERN_INFO "no joystick found\n");
-	} else
-		have_joystick = 1;
+	pci_register_driver(&joystick_driver);
 #endif
 	return 0;
 }
@@ -2214,8 +2206,7 @@ static void __exit alsa_card_riptide_exit(void)
 {
 	pci_unregister_driver(&driver);
 #if defined(SUPPORT_JOYSTICK)
-	if (have_joystick)
-		pci_unregister_driver(&joystick_driver);
+	pci_unregister_driver(&joystick_driver);
 #endif
 }
 
-- 
cgit v0.10.2


From 86d190e77c44cb057742dcc871b12ebd4633c387 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 26 May 2009 15:18:58 +0200
Subject: ALSA: hda - Minor clean up of patch_sigmatel.c

- Remove unneeded semicolons
- Introduce spec->gpio_led to specify the GPIO bit for LED control

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 1731081..26d8707 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -193,6 +193,7 @@ struct sigmatel_spec {
 	unsigned int gpio_dir;
 	unsigned int gpio_data;
 	unsigned int gpio_mute;
+	unsigned int gpio_led;
 
 	/* stream */
 	unsigned int stream_delay;
@@ -4651,22 +4652,13 @@ static int stac92xx_hp_check_power_status(struct hda_codec *codec,
 					      hda_nid_t nid)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	unsigned int gpio_bit = 0; /* gets rid of compiler warning */
-
-	switch (spec->board_config) {
-	case STAC_HP_DV4_1222NR:
-		gpio_bit = 0x01;
-		break;
-	case STAC_HP_HDX:
-		gpio_bit = 0x08;
-	}
 
 	if (nid == 0x10) {
 		if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
 		    HDA_AMP_MUTE)
-			spec->gpio_data &= ~gpio_bit;  /* orange */
+			spec->gpio_data |= spec->gpio_led; /* white */
 		else
-			spec->gpio_data |= gpio_bit;   /* white */
+			spec->gpio_data &= ~spec->gpio_led; /* orange */
 
 		stac_gpio_set(codec, spec->gpio_mask,
 			      spec->gpio_dir,
@@ -5352,14 +5344,7 @@ again:
 		 */
 		spec->num_smuxes = 1;
 		spec->num_dmuxes = 1;
-#ifdef CONFIG_SND_HDA_POWER_SAVE
-		/* This controls MUTE LED */
-		spec->gpio_mask |= 0x01;
-		spec->gpio_dir  |= 0x01;
-		spec->gpio_data |= 0x01;
-		codec->patch_ops.check_power_status =
-		    stac92xx_hp_check_power_status;
-#endif
+		spec->gpio_led = 0x01;
 		/* fallthrough */
 	case STAC_HP_DV5:
 		snd_hda_codec_set_pincfg(codec, 0x0d, 0x90170010);
@@ -5369,22 +5354,21 @@ again:
 		spec->num_dmics = 1;
 		spec->num_dmuxes = 1;
 		spec->num_smuxes = 1;
-		/*
-		 * For controlling MUTE LED on HP HDX16/HDX18 notebooks,
-		 * the CONFIG_SND_HDA_POWER_SAVE is needed to be set.
-		 */
-#ifdef CONFIG_SND_HDA_POWER_SAVE
 		/* orange/white mute led on GPIO3, orange=0, white=1 */
-		spec->gpio_mask |= 0x08;
-		spec->gpio_dir  |= 0x08;
-		spec->gpio_data |= 0x08;  /* set to white */
+		spec->gpio_led = 0x08;
+		break;
+	}
 
+#ifdef CONFIG_SND_HDA_POWER_SAVE
+	if (spec->gpio_led) {
+		spec->gpio_mask |= spec->gpio_led;
+		spec->gpio_dir |= spec->gpio_led;
+		spec->gpio_data |= spec->gpio_led;
 		/* register check_power_status callback. */
 		codec->patch_ops.check_power_status =
-		    stac92xx_hp_check_power_status;
+			stac92xx_hp_check_power_status;
+	}
 #endif	
-		break;
-	};
 
 	spec->multiout.dac_nids = spec->dac_nids;
 	if (spec->dinput_mux)
@@ -5409,7 +5393,7 @@ again:
 	codec->proc_widget_hook = stac92hd7x_proc_hook;
 
 	return 0;
-};
+}
 
 static int patch_stac922x(struct hda_codec *codec)
 {
@@ -5564,7 +5548,7 @@ static int patch_stac927x(struct hda_codec *codec)
 			/* correct the device field to SPDIF out */
 			snd_hda_codec_set_pincfg(codec, 0x21, 0x01442070);
 			break;
-		};
+		}
 		/* configure the analog microphone on some laptops */
 		snd_hda_codec_set_pincfg(codec, 0x0c, 0x90a79130);
 		/* correct the front output jack as a hp out */
-- 
cgit v0.10.2


From 8174086167d43d0fd7b21928074145ae1d15bbab Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 26 May 2009 15:22:00 +0200
Subject: ALSA: hda - Allow concurrent RIRB access in single_cmd mode

In the single_cmd mode, the current driver code doesn't do any update
for RIRB just for any safety reason.  But, actually the RIRB and
single_cmd mode don't conflict.  Unsolicited events can be delivered
even while using the single_cmd mode.

This patch allows the handling of unsolicited events with single_cmd
mode, just always checking RIRB independent from single_cmd flag.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 82d5218..01d8d97 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -901,8 +901,7 @@ static void azx_init_chip(struct azx *chip)
 	azx_int_enable(chip);
 
 	/* initialize the codec command I/O */
-	if (!chip->single_cmd)
-		azx_init_cmd_io(chip);
+	azx_init_cmd_io(chip);
 
 	/* program the position buffer */
 	azx_writel(chip, DPLBASE, (u32)chip->posbuf.addr);
@@ -1018,7 +1017,7 @@ static irqreturn_t azx_interrupt(int irq, void *dev_id)
 	/* clear rirb int */
 	status = azx_readb(chip, RIRBSTS);
 	if (status & RIRB_INT_MASK) {
-		if (!chip->single_cmd && (status & RIRB_INT_RESPONSE))
+		if (status & RIRB_INT_RESPONSE)
 			azx_update_rirb(chip);
 		azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
 	}
@@ -2338,11 +2337,9 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 		goto errout;
 	}
 	/* allocate CORB/RIRB */
-	if (!chip->single_cmd) {
-		err = azx_alloc_cmd_io(chip);
-		if (err < 0)
-			goto errout;
-	}
+	err = azx_alloc_cmd_io(chip);
+	if (err < 0)
+		goto errout;
 
 	/* initialize streams */
 	azx_init_stream(chip);
-- 
cgit v0.10.2


From f91183fe3780d44849110a1653dfe8af7bc67aa4 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 26 May 2009 15:25:34 +0200
Subject: perf top: Remove leftover NMI/IRQ bits

79202b removed IRQ/NMI mode selection, so remove it from
perf top as well.

[ Impact: cleanup ]

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 87b925c..cacaa3c 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -8,7 +8,7 @@
    Sample output:
 
 ------------------------------------------------------------------------------
- KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
+ KernelTop:    2669 irqs/sec  [cache-misses/cache-refs],  (all, cpu: 2)
 ------------------------------------------------------------------------------
 
              weight         RIP          kernel function
@@ -92,7 +92,6 @@ static __u64			count_filter		       = 100;
 static int			target_pid				= -1;
 static int			profile_cpu			= -1;
 static int			nr_cpus				=  0;
-static int			nmi				=  1;
 static unsigned int		realtime_prio			=  0;
 static int			group				=  0;
 static unsigned int		page_size;
@@ -198,10 +197,9 @@ static void print_sym_table(void)
 
 	printf(
 "------------------------------------------------------------------------------\n");
-	printf( " KernelTop:%8.0f irqs/sec  kernel:%4.1f%% [%s, ",
+	printf( " KernelTop:%8.0f irqs/sec  kernel:%4.1f%% [",
 		events_per_sec,
-		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
-		nmi ? "NMI" : "IRQ");
+		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)));
 
 	if (nr_counters == 1)
 		printf("%d ", event_count[0]);
@@ -637,7 +635,7 @@ static int __cmd_top(void)
 			hw_event.config		= event_id[counter];
 			hw_event.irq_period	= event_count[counter];
 			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-			hw_event.nmi		= nmi;
+			hw_event.nmi		= 1;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
 			hw_event.freq		= freq;
-- 
cgit v0.10.2


From db20c0031288ff524d82b1f240f35f85d4a052eb Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 26 May 2009 15:25:34 +0200
Subject: perf top: fix typo in -d option

Clean up copy/paste options parsing conversion error.

[ Impact: reactivate -d option ]

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index cacaa3c..6b1c66f 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -727,7 +727,7 @@ static const struct option options[] = {
 		    "number of mmap data pages"),
 	OPT_INTEGER('r', "realtime", &realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
-	OPT_INTEGER('d', "delay", &realtime_prio,
+	OPT_INTEGER('d', "delay", &delay_secs,
 		    "number of seconds to delay between refreshes"),
 	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
 			    "dump the symbol table used for profiling"),
-- 
cgit v0.10.2


From d076d2bd0d9379314df5f2ab8b9c83f617c70923 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 May 2009 23:10:15 +0900
Subject: sh: pci-sh7751: Initialize io_map_base in controller definition.

As there is only a single controller and remapping has no impact for the
address range in question, just initialize it directly in the controller
definition. This fixes up boot time warnings about not having the field
initialized.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c
index c4fa0bb1..70c1999 100644
--- a/arch/sh/drivers/pci/pci-sh7751.c
+++ b/arch/sh/drivers/pci/pci-sh7751.c
@@ -52,7 +52,7 @@ static struct resource sh7751_io_resource = {
 };
 
 static struct resource sh7751_mem_resource = {
-	.name	= "SH7785_mem",
+	.name	= "SH7751_mem",
 	.start	= SH7751_PCI_MEMORY_BASE,
 	.end	= SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1,
 	.flags	= IORESOURCE_MEM
@@ -64,6 +64,7 @@ static struct pci_channel sh7751_pci_controller = {
 	.mem_offset	= 0x00000000,
 	.io_resource	= &sh7751_io_resource,
 	.io_offset	= 0x00000000,
+	.io_map_base	= SH7751_PCI_IO_BASE,
 };
 
 static struct sh4_pci_address_map sh7751_pci_map = {
@@ -179,8 +180,6 @@ static int __init sh7751_pci_init(void)
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM;
 	pci_write_reg(chan, word, SH4_PCICR);
 
-	__set_io_port_base(SH7751_PCI_IO_BASE);
-
 	register_pci_controller(chan);
 
 	return 0;
-- 
cgit v0.10.2


From b7e2ac619465f1774b827d9997109ceef4a61851 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 May 2009 23:13:13 +0900
Subject: sh: pci: Disable MWI and make pci_dma_burst_advice() a bit more
 accurate.

None of the SH PCI controllers support MWI, it is always treated as a
direct memory write, so simply disable it outright. In the case of the
PCI cache line size, consult that for the pci_dma_burst_advice()
strategy, and switch over to PCI_DMA_BURST_MULTIPLE, as PPC64.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 5b2e0fc..ae0da6f 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -89,12 +89,28 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
 #endif
 
 #ifdef CONFIG_PCI
+/*
+ * None of the SH PCI controllers support MWI, it is always treated as a
+ * direct memory write.
+ */
+#define PCI_DISABLE_MWI
+
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
 {
-	*strat = PCI_DMA_BURST_INFINITY;
-	*strategy_parameter = ~0UL;
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+
+	if (byte == 0)
+		cacheline_size = L1_CACHE_BYTES;
+	else
+		cacheline_size = byte << 2;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
 }
 #endif
 
-- 
cgit v0.10.2


From df4aab46a8256ac0f0c2701b3fe23b7dd05e6b48 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 4 May 2009 13:14:27 -0700
Subject: davinci: gpio irq enable tweaks

Fix two IRQ triggering bugs affecting GPIO IRQs:

 - Make sure enabling with IRQ_TYPE_NONE ("default, unspecified")
   isn't a NOP ... default to both edges, at least one must work.

 - As noted by Kevin Hilman, setting the irq trigger type for a
   banked gpio interrupt shouldn't enable irqs that are disabled.

Since GPIO IRQs haven't been used much yet, it's not clear these
bugs could have affected anything.  The few current users don't
seem to have been obviously suffering from these issues.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c
index 1aba41c..34ba4ce 100644
--- a/arch/arm/mach-davinci/gpio.c
+++ b/arch/arm/mach-davinci/gpio.c
@@ -187,10 +187,15 @@ static void gpio_irq_enable(unsigned irq)
 {
 	struct gpio_controller *__iomem g = get_irq_chip_data(irq);
 	u32 mask = __gpio_mask(irq_to_gpio(irq));
+	unsigned status = irq_desc[irq].status;
 
-	if (irq_desc[irq].status & IRQ_TYPE_EDGE_FALLING)
+	status &= IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING;
+	if (!status)
+		status = IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING;
+
+	if (status & IRQ_TYPE_EDGE_FALLING)
 		__raw_writel(mask, &g->set_falling);
-	if (irq_desc[irq].status & IRQ_TYPE_EDGE_RISING)
+	if (status & IRQ_TYPE_EDGE_RISING)
 		__raw_writel(mask, &g->set_rising);
 }
 
@@ -205,10 +210,13 @@ static int gpio_irq_type(unsigned irq, unsigned trigger)
 	irq_desc[irq].status &= ~IRQ_TYPE_SENSE_MASK;
 	irq_desc[irq].status |= trigger;
 
-	__raw_writel(mask, (trigger & IRQ_TYPE_EDGE_FALLING)
-		     ? &g->set_falling : &g->clr_falling);
-	__raw_writel(mask, (trigger & IRQ_TYPE_EDGE_RISING)
-		     ? &g->set_rising : &g->clr_rising);
+	/* don't enable the IRQ if it's currently disabled */
+	if (irq_desc[irq].depth == 0) {
+		__raw_writel(mask, (trigger & IRQ_TYPE_EDGE_FALLING)
+			     ? &g->set_falling : &g->clr_falling);
+		__raw_writel(mask, (trigger & IRQ_TYPE_EDGE_RISING)
+			     ? &g->set_rising : &g->clr_rising);
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From dc75602628472d7fbc4cb43f010bf0257e95ab71 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Mon, 11 May 2009 11:04:53 -0700
Subject: davinci: fixups for banked GPIO interrupt handling

This patch seems to get me much more reliable performance using the
GPIO banked interrupts on dm355 for the dm9000 driver.

Changes include:

- init GPIO handling along with normal GPIO init
- mask the level-sensitive bank IRQ during handling

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c
index 34ba4ce..40327b5 100644
--- a/arch/arm/mach-davinci/gpio.c
+++ b/arch/arm/mach-davinci/gpio.c
@@ -45,6 +45,7 @@ static struct gpio_controller __iomem * __init gpio2controller(unsigned gpio)
 	return __gpio_to_controller(gpio);
 }
 
+static int __init davinci_gpio_irq_setup(void);
 
 /*--------------------------------------------------------------------------*/
 
@@ -157,6 +158,7 @@ static int __init davinci_gpio_setup(void)
 		gpiochip_add(&chips[i].chip);
 	}
 
+	davinci_gpio_irq_setup();
 	return 0;
 }
 pure_initcall(davinci_gpio_setup);
@@ -238,6 +240,7 @@ gpio_irq_handler(unsigned irq, struct irq_desc *desc)
 		mask <<= 16;
 
 	/* temporarily mask (level sensitive) parent IRQ */
+	desc->chip->mask(irq);
 	desc->chip->ack(irq);
 	while (1) {
 		u32		status;
@@ -333,4 +336,3 @@ static int __init davinci_gpio_irq_setup(void)
 
 	return 0;
 }
-arch_initcall(davinci_gpio_irq_setup);
-- 
cgit v0.10.2


From 17eb15704142dc1b302a3f0cd632c426ee326b6d Mon Sep 17 00:00:00 2001
From: Chaithrika U S <chaithrika@ti.com>
Date: Mon, 19 Jan 2009 14:13:05 +0530
Subject: davinci: use 32-bit accesses for low-level debug macros

This patch defines debug macros for low-level debugging for Davinci
based platforms

Tested on :
        - DM644x DaVinci EVM
        - DM646X DaVinciHD EVM
	- DM355 EVM

This patch attempts to solve the low-level debug issue in DM646x. The
UART on DM646x SoC allows only 32-bit access. The existing
debug-macro.S uses the macros from debug-8250.S file. This led to
garbage serial out in the case of DM646x.

The inclusion of debug-8250.S does not allow for run time fix for this
issue.  There are compile time errors due to multiple definitions of
the macros.  Also when building a single image for multiple DaVinci
Platforms, the ifdefs cannot be relied upon.

The solution below does not include the debug-8250.S file and defines
the necessary macros. This solution was arrived at after observing
that word access does not affect the low-level debug messages on
DM644x/DM355.

The other approach to this issue is to use the UART module information
available in the peripheral registers to decide the access
mechanism. But this will have to be done for every access of UART
specifically for DM646x. Also this calls for a modification of the
debug-8250.S file.

Signed-off-by: Chaithrika U S <chaithrika@ti.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/include/mach/debug-macro.S b/arch/arm/mach-davinci/include/mach/debug-macro.S
index e6c0f0d..de3fc21 100644
--- a/arch/arm/mach-davinci/include/mach/debug-macro.S
+++ b/arch/arm/mach-davinci/include/mach/debug-macro.S
@@ -9,6 +9,16 @@
  * or implied.
  */
 
+/* Modifications
+ * Jan 2009	Chaithrika U S	Added senduart, busyuart, waituart
+ *				macros, based on debug-8250.S file
+ *				but using 32-bit accesses required for
+ *                              some davinci devices.
+ */
+
+#include <linux/serial_reg.h>
+#define UART_SHIFT	2
+
 		.macro addruart, rx
 		mrc	p15, 0, \rx, c1, c0
 		tst	\rx, #1			@ MMU enabled?
@@ -17,5 +27,22 @@
 		orr	\rx, \rx, #0x00c20000   @ UART 0
 		.endm
 
-#define UART_SHIFT	2
-#include <asm/hardware/debug-8250.S>
+		.macro	senduart,rd,rx
+		str	\rd, [\rx, #UART_TX << UART_SHIFT]
+		.endm
+
+		.macro	busyuart,rd,rx
+1002:		ldr	\rd, [\rx, #UART_LSR << UART_SHIFT]
+		and	\rd, \rd, #UART_LSR_TEMT | UART_LSR_THRE
+		teq	\rd, #UART_LSR_TEMT | UART_LSR_THRE
+		bne	1002b
+		.endm
+
+		.macro	waituart,rd,rx
+#ifdef FLOW_CONTROL
+1001:		ldr	\rd, [\rx, #UART_MSR << UART_SHIFT]
+		tst	\rd, #UART_MSR_CTS
+		beq	1001b
+#endif
+		.endm
+
-- 
cgit v0.10.2


From ce8ccaf0efeb43cd23393a9590365587ad879b2a Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Mon, 11 Feb 2008 14:44:07 -0700
Subject: davinci: interrupts: get_irqnr_and_base: save an instruction

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/include/mach/entry-macro.S b/arch/arm/mach-davinci/include/mach/entry-macro.S
index 039b84f..0ebb445 100644
--- a/arch/arm/mach-davinci/include/mach/entry-macro.S
+++ b/arch/arm/mach-davinci/include/mach/entry-macro.S
@@ -23,9 +23,8 @@
 
 		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
 		ldr \tmp, [\base, #0x14]
-		mov \tmp, \tmp, lsr #2
+		movs \tmp, \tmp, lsr #2
 		sub \irqnr, \tmp, #1
-		cmp \tmp, #0
 		.endm
 
 		.macro	irq_prio_table
-- 
cgit v0.10.2


From 27428e39da8e45f861c79e7dcec32d38965afeb3 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 18 Feb 2009 14:00:36 -0700
Subject: davinci: support different UART bases for zImage uncompress

The davinci pre-kernel boot code assumes that all platforms use the
same UART base address for the console.  That assumption is not longer
valid with some newer SoCs so determine the console UART base address
from the machine number passed in from bootloader.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/include/mach/uncompress.h b/arch/arm/mach-davinci/include/mach/uncompress.h
index 8c165de..1e27475 100644
--- a/arch/arm/mach-davinci/include/mach/uncompress.h
+++ b/arch/arm/mach-davinci/include/mach/uncompress.h
@@ -13,11 +13,24 @@
 #include <linux/serial_reg.h>
 #include <mach/serial.h>
 
+#include <asm/mach-types.h>
+
+extern unsigned int __machine_arch_type;
+
+static u32 *uart;
+
+static u32 *get_uart_base(void)
+{
+	/* Add logic here for new platforms, using __macine_arch_type */
+	return (u32 *)DAVINCI_UART0_BASE;
+}
+
 /* PORT_16C550A, in polled non-fifo mode */
 
 static void putc(char c)
 {
-	volatile u32 *uart = (volatile void *) DAVINCI_UART0_BASE;
+	if (!uart)
+		uart = get_uart_base();
 
 	while (!(uart[UART_LSR] & UART_LSR_THRE))
 		barrier();
@@ -26,7 +39,9 @@ static void putc(char c)
 
 static inline void flush(void)
 {
-	volatile u32 *uart = (volatile void *) DAVINCI_UART0_BASE;
+	if (!uart)
+		uart = get_uart_base();
+
 	while (!(uart[UART_LSR] & UART_LSR_THRE))
 		barrier();
 }
-- 
cgit v0.10.2


From 0521444d497ee1f8a31314d2ce3c6b9edab25b51 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Wed, 11 Mar 2009 19:49:05 +0400
Subject: davinci: INTC: add support for TI cp_intc

Add support for Texas Instuments Common Platform Interrupt Controller
(cp_intc) used on DA830/OMAP-L137.

Signed-off-by: Steve Chen <schen@mvista.com>
Signed-off-by: Mark Greer <mgreer@mvista.com>
Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index a9c78bc..56cd03f 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -1,5 +1,8 @@
 if ARCH_DAVINCI
 
+config CP_INTC
+	bool
+
 menu "TI DaVinci Implementations"
 
 comment "DaVinci Core Type"
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 1674661..9a696ae 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -8,6 +8,7 @@ obj-y 			:= time.o irq.o clock.o serial.o io.o id.o psc.o \
 			   gpio.o devices.o dma.o usb.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
+obj-$(CONFIG_CP_INTC)			+= cp_intc.o
 
 # Chip specific
 obj-$(CONFIG_ARCH_DAVINCI_DM644x)       += dm644x.o
diff --git a/arch/arm/mach-davinci/cp_intc.c b/arch/arm/mach-davinci/cp_intc.c
new file mode 100644
index 0000000..96c8e97
--- /dev/null
+++ b/arch/arm/mach-davinci/cp_intc.c
@@ -0,0 +1,161 @@
+/*
+ * TI Common Platform Interrupt Controller (cp_intc) driver
+ *
+ * Author: Steve Chen <schen@mvista.com>
+ * Copyright (C) 2008-2009, MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+
+#include <mach/cp_intc.h>
+
+static void __iomem *cp_intc_base;
+
+static inline unsigned int cp_intc_read(unsigned offset)
+{
+	return __raw_readl(cp_intc_base + offset);
+}
+
+static inline void cp_intc_write(unsigned long value, unsigned offset)
+{
+	__raw_writel(value, cp_intc_base + offset);
+}
+
+static void cp_intc_ack_irq(unsigned int irq)
+{
+	cp_intc_write(irq, CP_INTC_SYS_STAT_IDX_CLR);
+}
+
+/* Disable interrupt */
+static void cp_intc_mask_irq(unsigned int irq)
+{
+	/* XXX don't know why we need to disable nIRQ here... */
+	cp_intc_write(1, CP_INTC_HOST_ENABLE_IDX_CLR);
+	cp_intc_write(irq, CP_INTC_SYS_ENABLE_IDX_CLR);
+	cp_intc_write(1, CP_INTC_HOST_ENABLE_IDX_SET);
+}
+
+/* Enable interrupt */
+static void cp_intc_unmask_irq(unsigned int irq)
+{
+	cp_intc_write(irq, CP_INTC_SYS_ENABLE_IDX_SET);
+}
+
+static int cp_intc_set_irq_type(unsigned int irq, unsigned int flow_type)
+{
+	unsigned reg		= BIT_WORD(irq);
+	unsigned mask		= BIT_MASK(irq);
+	unsigned polarity	= cp_intc_read(CP_INTC_SYS_POLARITY(reg));
+	unsigned type		= cp_intc_read(CP_INTC_SYS_TYPE(reg));
+
+	switch (flow_type) {
+	case IRQ_TYPE_EDGE_RISING:
+		polarity |= mask;
+		type |= mask;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		polarity &= ~mask;
+		type |= mask;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		polarity |= mask;
+		type &= ~mask;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		polarity &= ~mask;
+		type &= ~mask;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	cp_intc_write(polarity, CP_INTC_SYS_POLARITY(reg));
+	cp_intc_write(type, CP_INTC_SYS_TYPE(reg));
+
+	return 0;
+}
+
+static struct irq_chip cp_intc_irq_chip = {
+	.name		= "cp_intc",
+	.ack		= cp_intc_ack_irq,
+	.mask		= cp_intc_mask_irq,
+	.unmask		= cp_intc_unmask_irq,
+	.set_type	= cp_intc_set_irq_type,
+};
+
+void __init cp_intc_init(void __iomem *base, unsigned short num_irq,
+			 u8 *irq_prio)
+{
+	unsigned num_reg	= BITS_TO_LONGS(num_irq);
+	int i;
+
+	cp_intc_base = base;
+
+	cp_intc_write(0, CP_INTC_GLOBAL_ENABLE);
+
+	/* Disable all host interrupts */
+	cp_intc_write(0, CP_INTC_HOST_ENABLE(0));
+
+	/* Disable system interrupts */
+	for (i = 0; i < num_reg; i++)
+		cp_intc_write(~0, CP_INTC_SYS_ENABLE_CLR(i));
+
+	/* Set to normal mode, no nesting, no priority hold */
+	cp_intc_write(0, CP_INTC_CTRL);
+	cp_intc_write(0, CP_INTC_HOST_CTRL);
+
+	/* Clear system interrupt status */
+	for (i = 0; i < num_reg; i++)
+		cp_intc_write(~0, CP_INTC_SYS_STAT_CLR(i));
+
+	/* Enable nIRQ (what about nFIQ?) */
+	cp_intc_write(1, CP_INTC_HOST_ENABLE_IDX_SET);
+
+	/*
+	 * Priority is determined by host channel: lower channel number has
+	 * higher priority i.e. channel 0 has highest priority and channel 31
+	 * had the lowest priority.
+	 */
+	num_reg = (num_irq + 3) >> 2;	/* 4 channels per register */
+	if (irq_prio) {
+		unsigned j, k;
+		u32 val;
+
+		for (k = i = 0; i < num_reg; i++) {
+			for (val = j = 0; j < 4; j++, k++) {
+				val >>= 8;
+				if (k < num_irq)
+					val |= irq_prio[k] << 24;
+			}
+
+			cp_intc_write(val, CP_INTC_CHAN_MAP(i));
+		}
+	} else	{
+		/*
+		 * Default everything to channel 15 if priority not specified.
+		 * Note that channel 0-1 are mapped to nFIQ and channels 2-31
+		 * are mapped to nIRQ.
+		 */
+		for (i = 0; i < num_reg; i++)
+			cp_intc_write(0x0f0f0f0f, CP_INTC_CHAN_MAP(i));
+	}
+
+	/* Set up genirq dispatching for cp_intc */
+	for (i = 0; i < num_irq; i++) {
+		set_irq_chip(i, &cp_intc_irq_chip);
+		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
+		set_irq_handler(i, handle_edge_irq);
+	}
+
+	/* Enable global interrupt */
+	cp_intc_write(1, CP_INTC_GLOBAL_ENABLE);
+}
diff --git a/arch/arm/mach-davinci/include/mach/cp_intc.h b/arch/arm/mach-davinci/include/mach/cp_intc.h
new file mode 100644
index 0000000..c4d27ee
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/cp_intc.h
@@ -0,0 +1,57 @@
+/*
+ * TI Common Platform Interrupt Controller (cp_intc) definitions
+ *
+ * Author: Steve Chen <schen@mvista.com>
+ * Copyright (C) 2008-2009, MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef __ASM_HARDWARE_CP_INTC_H
+#define __ASM_HARDWARE_CP_INTC_H
+
+#define CP_INTC_REV			0x00
+#define CP_INTC_CTRL			0x04
+#define CP_INTC_HOST_CTRL		0x0C
+#define CP_INTC_GLOBAL_ENABLE		0x10
+#define CP_INTC_GLOBAL_NESTING_LEVEL	0x1C
+#define CP_INTC_SYS_STAT_IDX_SET	0x20
+#define CP_INTC_SYS_STAT_IDX_CLR	0x24
+#define CP_INTC_SYS_ENABLE_IDX_SET	0x28
+#define CP_INTC_SYS_ENABLE_IDX_CLR	0x2C
+#define CP_INTC_GLOBAL_WAKEUP_ENABLE	0x30
+#define CP_INTC_HOST_ENABLE_IDX_SET	0x34
+#define CP_INTC_HOST_ENABLE_IDX_CLR	0x38
+#define CP_INTC_PACING_PRESCALE 	0x40
+#define CP_INTC_VECTOR_BASE		0x50
+#define CP_INTC_VECTOR_SIZE		0x54
+#define CP_INTC_VECTOR_NULL		0x58
+#define CP_INTC_PRIO_IDX		0x80
+#define CP_INTC_PRIO_VECTOR		0x84
+#define CP_INTC_SECURE_ENABLE		0x90
+#define CP_INTC_SECURE_PRIO_IDX 	0x94
+#define CP_INTC_PACING_PARAM(n) 	(0x0100 + (n << 4))
+#define CP_INTC_PACING_DEC(n)		(0x0104 + (n << 4))
+#define CP_INTC_PACING_MAP(n)		(0x0108 + (n << 4))
+#define CP_INTC_SYS_RAW_STAT(n) 	(0x0200 + (n << 2))
+#define CP_INTC_SYS_STAT_CLR(n) 	(0x0280 + (n << 2))
+#define CP_INTC_SYS_ENABLE_SET(n)	(0x0300 + (n << 2))
+#define CP_INTC_SYS_ENABLE_CLR(n)	(0x0380 + (n << 2))
+#define CP_INTC_CHAN_MAP(n)		(0x0400 + (n << 2))
+#define CP_INTC_HOST_MAP(n)		(0x0800 + (n << 2))
+#define CP_INTC_HOST_PRIO_IDX(n)	(0x0900 + (n << 2))
+#define CP_INTC_SYS_POLARITY(n) 	(0x0D00 + (n << 2))
+#define CP_INTC_SYS_TYPE(n)		(0x0D80 + (n << 2))
+#define CP_INTC_WAKEUP_ENABLE(n)	(0x0E00 + (n << 2))
+#define CP_INTC_DEBUG_SELECT(n) 	(0x0F00 + (n << 2))
+#define CP_INTC_SYS_SECURE_ENABLE(n)	(0x1000 + (n << 2))
+#define CP_INTC_HOST_NESTING_LEVEL(n)	(0x1100 + (n << 2))
+#define CP_INTC_HOST_ENABLE(n)		(0x1500 + (n << 2))
+#define CP_INTC_HOST_PRIO_VECTOR(n)	(0x1600 + (n << 2))
+#define CP_INTC_VECTOR_ADDR(n)		(0x2000 + (n << 2))
+
+void __init cp_intc_init(void __iomem *base, unsigned short num_irq,
+			 u8 *irq_prio);
+
+#endif	/* __ASM_HARDWARE_CP_INTC_H */
-- 
cgit v0.10.2


From fb6313879caa46831d71a316b97b51d37d100269 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 29 Apr 2009 16:23:59 -0700
Subject: davinci: add platform support for watchdog timer

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h
index 35736ec..584494a 100644
--- a/arch/arm/mach-davinci/clock.h
+++ b/arch/arm/mach-davinci/clock.h
@@ -93,4 +93,7 @@ struct davinci_clk {
 	}
 
 int davinci_clk_init(struct davinci_clk *clocks);
+
+extern struct platform_device davinci_wdt_device;
+
 #endif
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index a31370b..7fdc408 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -54,3 +54,38 @@ void __init davinci_init_i2c(struct davinci_i2c_platform_data *pdata)
 	(void) platform_device_register(&davinci_i2c_device);
 }
 
+/*-------------------------------------------------------------------------*/
+
+static struct resource wdt_resources[] = {
+	{
+		.start	= 0x01c21c00,
+		.end	= 0x01c21fff,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+struct platform_device davinci_wdt_device = {
+	.name		= "watchdog",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(wdt_resources),
+	.resource	= wdt_resources,
+};
+
+static void davinci_init_wdt(void)
+{
+	platform_device_register(&davinci_wdt_device);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int __init davinci_init_devices(void)
+{
+	/* please keep these calls, and their implementations above,
+	 * in alphabetical order so they're easier to sort through.
+	 */
+	davinci_init_wdt();
+
+	return 0;
+}
+arch_initcall(davinci_init_devices);
+
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 494e01b..efbbc2a 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/device.h>
+#include <linux/platform_device.h>
 
 #include <mach/hardware.h>
 #include <asm/system.h>
@@ -349,15 +350,13 @@ struct sys_timer davinci_timer = {
 
 
 /* reset board using watchdog timer */
-void davinci_watchdog_reset(void) {
+void davinci_watchdog_reset(void)
+{
 	u32 tgcr, wdtcr;
 	void __iomem *base = IO_ADDRESS(DAVINCI_WDOG_BASE);
-	struct device dev;
 	struct clk *wd_clk;
-	char *name = "watchdog";
 
-	dev_set_name(&dev, name);
-	wd_clk = clk_get(&dev, NULL);
+	wd_clk = clk_get(&davinci_wdt_device.dev, NULL);
 	if (WARN_ON(IS_ERR(wd_clk)))
 		return;
 	clk_enable(wd_clk);
-- 
cgit v0.10.2


From f5ce6a67a78357da5e88782b1cf1cc10b36f102c Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hugo.villeneuve@lyrtech.com>
Date: Wed, 29 Apr 2009 16:46:57 -0700
Subject: davinci: DM644x: add support for SFFSDR board

Signed-off-by: Hugo Villeneuve <hugo.villeneuve@lyrtech.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 56cd03f..0d8ad59 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -20,6 +20,13 @@ config MACH_DAVINCI_EVM
 	  Configure this option to specify the whether the board used
 	  for development is a DM644x EVM
 
+config MACH_SFFSDR
+	bool "Lyrtech SFFSDR"
+	default n
+	depends on ARCH_DAVINCI_DM644x
+	help
+	  Say Y here to select the Lyrtech Small Form Factor
+	  Software Defined Radio (SFFSDR) board.
 
 config DAVINCI_MUX
 	bool "DAVINCI multiplexing support"
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 9a696ae..2bb6ffa 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_ARCH_DAVINCI_DM644x)       += dm644x.o
 
 # Board specific
 obj-$(CONFIG_MACH_DAVINCI_EVM)  	+= board-dm644x-evm.o
+obj-$(CONFIG_MACH_SFFSDR)		+= board-sffsdr.o
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
new file mode 100644
index 0000000..c6525e4
--- /dev/null
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -0,0 +1,180 @@
+/*
+ * Lyrtech SFFSDR board support.
+ *
+ * Copyright (C) 2008 Philip Balister, OpenSDR <philip@opensdr.com>
+ * Copyright (C) 2008 Lyrtech <www.lyrtech.com>
+ *
+ * Based on DV-EVM platform, original copyright follows:
+ *
+ * Copyright (C) 2007 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/at24.h>
+#include <linux/etherdevice.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/io.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/flash.h>
+
+#include <mach/dm644x.h>
+#include <mach/common.h>
+#include <mach/i2c.h>
+#include <mach/serial.h>
+#include <mach/psc.h>
+#include <mach/mux.h>
+
+#define DAVINCI_ASYNC_EMIF_CONTROL_BASE   0x01e00000
+#define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE  0x02000000
+
+struct mtd_partition davinci_sffsdr_nandflash_partition[] = {
+	/* U-Boot Environment: Block 0
+	 * UBL:                Block 1
+	 * U-Boot:             Blocks 6-7 (256 kb)
+	 * Integrity Kernel:   Blocks 8-31 (3 Mb)
+	 * Integrity Data:     Blocks 100-END
+	 */
+	{
+		.name		= "Linux Kernel",
+		.offset		= 32 * SZ_128K,
+		.size		= 16 * SZ_128K, /* 2 Mb */
+		.mask_flags	= MTD_WRITEABLE, /* Force read-only */
+	},
+	{
+		.name		= "Linux ROOT",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= 256 * SZ_128K, /* 32 Mb */
+		.mask_flags	= 0, /* R/W */
+	},
+};
+
+static struct flash_platform_data davinci_sffsdr_nandflash_data = {
+	.parts		= davinci_sffsdr_nandflash_partition,
+	.nr_parts	= ARRAY_SIZE(davinci_sffsdr_nandflash_partition),
+};
+
+static struct resource davinci_sffsdr_nandflash_resource[] = {
+	{
+		.start		= DAVINCI_ASYNC_EMIF_DATA_CE0_BASE,
+		.end		= DAVINCI_ASYNC_EMIF_DATA_CE0_BASE + SZ_16M - 1,
+		.flags		= IORESOURCE_MEM,
+	}, {
+		.start		= DAVINCI_ASYNC_EMIF_CONTROL_BASE,
+		.end		= DAVINCI_ASYNC_EMIF_CONTROL_BASE + SZ_4K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device davinci_sffsdr_nandflash_device = {
+	.name		= "davinci_nand", /* Name of driver */
+	.id		= 0,
+	.dev		= {
+		.platform_data	= &davinci_sffsdr_nandflash_data,
+	},
+	.num_resources	= ARRAY_SIZE(davinci_sffsdr_nandflash_resource),
+	.resource	= davinci_sffsdr_nandflash_resource,
+};
+
+/* Get Ethernet address from kernel boot params */
+static u8 mac_addr[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+
+static struct at24_platform_data eeprom_info = {
+	.byte_len	= (64*1024) / 8,
+	.page_size	= 32,
+	.flags		= AT24_FLAG_ADDR16,
+};
+
+static struct i2c_board_info __initdata i2c_info[] =  {
+	{
+		I2C_BOARD_INFO("24lc64", 0x50),
+		.platform_data	= &eeprom_info,
+	},
+	/* Other I2C devices:
+	 * MSP430,  addr 0x23 (not used)
+	 * PCA9543, addr 0x70 (setup done by U-Boot)
+	 * ADS7828, addr 0x48 (ADC for voltage monitoring.)
+	 */
+};
+
+static struct davinci_i2c_platform_data i2c_pdata = {
+	.bus_freq	= 20 /* kHz */,
+	.bus_delay	= 100 /* usec */,
+};
+
+static void __init sffsdr_init_i2c(void)
+{
+	davinci_init_i2c(&i2c_pdata);
+	i2c_register_board_info(1, i2c_info, ARRAY_SIZE(i2c_info));
+}
+
+static struct platform_device *davinci_sffsdr_devices[] __initdata = {
+	&davinci_sffsdr_nandflash_device,
+};
+
+static struct davinci_uart_config uart_config __initdata = {
+	.enabled_uarts = (1 << 0),
+};
+
+static void __init davinci_sffsdr_map_io(void)
+{
+	davinci_map_common_io();
+	dm644x_init();
+}
+
+static __init void davinci_sffsdr_init(void)
+{
+	platform_add_devices(davinci_sffsdr_devices,
+			     ARRAY_SIZE(davinci_sffsdr_devices));
+	sffsdr_init_i2c();
+	davinci_serial_init(&uart_config);
+	setup_usb(0, 0); /* We support only peripheral mode. */
+
+	/* mux VLYNQ pins */
+	davinci_cfg_reg(DM644X_VLYNQEN);
+	davinci_cfg_reg(DM644X_VLYNQWD);
+}
+
+static __init void davinci_sffsdr_irq_init(void)
+{
+	davinci_irq_init();
+}
+
+MACHINE_START(SFFSDR, "Lyrtech SFFSDR")
+	/* Maintainer: Hugo Villeneuve hugo.villeneuve@lyrtech.com */
+	.phys_io      = IO_PHYS,
+	.io_pg_offst  = (__IO_ADDRESS(IO_PHYS) >> 18) & 0xfffc,
+	.boot_params  = (DAVINCI_DDR_BASE + 0x100),
+	.map_io	      = davinci_sffsdr_map_io,
+	.init_irq     = davinci_sffsdr_irq_init,
+	.timer	      = &davinci_timer,
+	.init_machine = davinci_sffsdr_init,
+MACHINE_END
-- 
cgit v0.10.2


From 95a3477fe57e0669dcb531516f2930fe1cf27e6b Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 29 Apr 2009 12:10:55 -0700
Subject: davinci: DM355: add base SoC and board support

In addition, add board support for the DM355 Evaluation Module (EVM)
and the DM355 Leopard board.

Original DM355 EVM support done by Sandeep Paulraj, with significant
updates and improvements by David Brownell.  DM355 Leopord support
done by Koen Kooi.

Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Signed-off-by: Koen Kooi <koen@beagleboard.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 0d8ad59..1c3ab40 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -10,6 +10,9 @@ comment "DaVinci Core Type"
 config ARCH_DAVINCI_DM644x
 	bool "DaVinci 644x based system"
 
+config ARCH_DAVINCI_DM355
+        bool "DaVinci 355 based system"
+
 comment "DaVinci Board Type"
 
 config MACH_DAVINCI_EVM
@@ -28,6 +31,22 @@ config MACH_SFFSDR
 	  Say Y here to select the Lyrtech Small Form Factor
 	  Software Defined Radio (SFFSDR) board.
 
+config MACH_DAVINCI_DM355_EVM
+	bool "TI DM355 EVM"
+	default n
+	depends on ARCH_DAVINCI_DM355
+	help
+	  Configure this option to specify the whether the board used
+	  for development is a DM355 EVM
+
+config MACH_DM355_LEOPARD
+	bool "DM355 Leopard board"
+	default n
+	depends on ARCH_DAVINCI_DM355
+	help
+	  Configure this option to specify the whether the board used
+	  for development is a DM355 Leopard board.
+
 config DAVINCI_MUX
 	bool "DAVINCI multiplexing support"
 	depends on ARCH_DAVINCI
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 2bb6ffa..381c363 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -12,7 +12,10 @@ obj-$(CONFIG_CP_INTC)			+= cp_intc.o
 
 # Chip specific
 obj-$(CONFIG_ARCH_DAVINCI_DM644x)       += dm644x.o
+obj-$(CONFIG_ARCH_DAVINCI_DM355)        += dm355.o
 
 # Board specific
 obj-$(CONFIG_MACH_DAVINCI_EVM)  	+= board-dm644x-evm.o
 obj-$(CONFIG_MACH_SFFSDR)		+= board-sffsdr.o
+obj-$(CONFIG_MACH_DAVINCI_DM355_EVM)	+= board-dm355-evm.o
+obj-$(CONFIG_MACH_DM355_LEOPARD)	+= board-dm355-leopard.o
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
new file mode 100644
index 0000000..6af3c6c
--- /dev/null
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -0,0 +1,269 @@
+/*
+ * TI DaVinci EVM board support
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/clk.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/eeprom.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/flash.h>
+
+#include <mach/hardware.h>
+#include <mach/dm355.h>
+#include <mach/psc.h>
+#include <mach/common.h>
+#include <mach/i2c.h>
+#include <mach/serial.h>
+#include <mach/nand.h>
+
+#define DAVINCI_ASYNC_EMIF_CONTROL_BASE		0x01e10000
+#define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
+
+/* NOTE:  this is geared for the standard config, with a socketed
+ * 2 GByte Micron NAND (MT29F16G08FAA) using 128KB sectors.  If you
+ * swap chips, maybe with a different block size, partitioning may
+ * need to be changed.
+ */
+#define NAND_BLOCK_SIZE		SZ_128K
+
+static struct mtd_partition davinci_nand_partitions[] = {
+	{
+		/* UBL (a few copies) plus U-Boot */
+		.name		= "bootloader",
+		.offset		= 0,
+		.size		= 15 * NAND_BLOCK_SIZE,
+		.mask_flags	= MTD_WRITEABLE, /* force read-only */
+	}, {
+		/* U-Boot environment */
+		.name		= "params",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= 1 * NAND_BLOCK_SIZE,
+		.mask_flags	= 0,
+	}, {
+		.name		= "kernel",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= SZ_4M,
+		.mask_flags	= 0,
+	}, {
+		.name		= "filesystem1",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= SZ_512M,
+		.mask_flags	= 0,
+	}, {
+		.name		= "filesystem2",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= MTDPART_SIZ_FULL,
+		.mask_flags	= 0,
+	}
+	/* two blocks with bad block table (and mirror) at the end */
+};
+
+static struct davinci_nand_pdata davinci_nand_data = {
+	.mask_chipsel		= BIT(14),
+	.parts			= davinci_nand_partitions,
+	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
+	.ecc_mode		= NAND_ECC_HW_SYNDROME,
+	.options		= NAND_USE_FLASH_BBT,
+};
+
+static struct resource davinci_nand_resources[] = {
+	{
+		.start		= DAVINCI_ASYNC_EMIF_DATA_CE0_BASE,
+		.end		= DAVINCI_ASYNC_EMIF_DATA_CE0_BASE + SZ_32M - 1,
+		.flags		= IORESOURCE_MEM,
+	}, {
+		.start		= DAVINCI_ASYNC_EMIF_CONTROL_BASE,
+		.end		= DAVINCI_ASYNC_EMIF_CONTROL_BASE + SZ_4K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device davinci_nand_device = {
+	.name			= "davinci_nand",
+	.id			= 0,
+
+	.num_resources		= ARRAY_SIZE(davinci_nand_resources),
+	.resource		= davinci_nand_resources,
+
+	.dev			= {
+		.platform_data	= &davinci_nand_data,
+	},
+};
+
+static struct davinci_i2c_platform_data i2c_pdata = {
+	.bus_freq	= 400	/* kHz */,
+	.bus_delay	= 0	/* usec */,
+};
+
+static int dm355evm_mmc_gpios = -EINVAL;
+
+static void dm355evm_mmcsd_gpios(unsigned gpio)
+{
+	gpio_request(gpio + 0, "mmc0_ro");
+	gpio_request(gpio + 1, "mmc0_cd");
+	gpio_request(gpio + 2, "mmc1_ro");
+	gpio_request(gpio + 3, "mmc1_cd");
+
+	/* we "know" these are input-only so we don't
+	 * need to call gpio_direction_input()
+	 */
+
+	dm355evm_mmc_gpios = gpio;
+}
+
+static struct i2c_board_info dm355evm_i2c_info[] = {
+	{ I2C_BOARD_INFO("dm355evm_msp", 0x25),
+		.platform_data = dm355evm_mmcsd_gpios,
+		/* plus irq */ },
+	/* { I2C_BOARD_INFO("tlv320aic3x", 0x1b), }, */
+	/* { I2C_BOARD_INFO("tvp5146", 0x5d), }, */
+};
+
+static void __init evm_init_i2c(void)
+{
+	davinci_init_i2c(&i2c_pdata);
+
+	gpio_request(5, "dm355evm_msp");
+	gpio_direction_input(5);
+	dm355evm_i2c_info[0].irq = gpio_to_irq(5);
+
+	i2c_register_board_info(1, dm355evm_i2c_info,
+			ARRAY_SIZE(dm355evm_i2c_info));
+}
+
+static struct resource dm355evm_dm9000_rsrc[] = {
+	{
+		/* addr */
+		.start	= 0x04014000,
+		.end	= 0x04014001,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		/* data */
+		.start	= 0x04014002,
+		.end	= 0x04014003,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags	= IORESOURCE_IRQ
+			| IORESOURCE_IRQ_HIGHEDGE /* rising (active high) */,
+	},
+};
+
+static struct platform_device dm355evm_dm9000 = {
+	.name		= "dm9000",
+	.id		= -1,
+	.resource	= dm355evm_dm9000_rsrc,
+	.num_resources	= ARRAY_SIZE(dm355evm_dm9000_rsrc),
+};
+
+static struct platform_device *davinci_evm_devices[] __initdata = {
+	&dm355evm_dm9000,
+	&davinci_nand_device,
+};
+
+static struct davinci_uart_config uart_config __initdata = {
+	.enabled_uarts = (1 << 0),
+};
+
+static void __init dm355_evm_map_io(void)
+{
+	davinci_map_common_io();
+	dm355_init();
+}
+
+/* Don't connect anything to J10 unless you're only using USB host
+ * mode *and* have to do so with some kind of gender-bender.  If
+ * you have proper Mini-B or Mini-A cables (or Mini-A adapters)
+ * the ID pin won't need any help.
+ */
+#ifdef CONFIG_USB_MUSB_PERIPHERAL
+#define USB_ID_VALUE	0	/* ID pulled high; *should* float */
+#else
+#define USB_ID_VALUE	1	/* ID pulled low */
+#endif
+
+static struct spi_eeprom at25640a = {
+	.byte_len	= SZ_64K / 8,
+	.name		= "at25640a",
+	.page_size	= 32,
+	.flags		= EE_ADDR2,
+};
+
+static struct spi_board_info dm355_evm_spi_info[] __initconst = {
+	{
+		.modalias	= "at25",
+		.platform_data	= &at25640a,
+		.max_speed_hz	= 10 * 1000 * 1000,	/* at 3v3 */
+		.bus_num	= 0,
+		.chip_select	= 0,
+		.mode		= SPI_MODE_0,
+	},
+};
+
+static __init void dm355_evm_init(void)
+{
+	struct clk *aemif;
+
+	gpio_request(1, "dm9000");
+	gpio_direction_input(1);
+	dm355evm_dm9000_rsrc[2].start = gpio_to_irq(1);
+
+	aemif = clk_get(&dm355evm_dm9000.dev, "aemif");
+	if (IS_ERR(aemif))
+		WARN("%s: unable to get AEMIF clock\n", __func__);
+	else
+		clk_enable(aemif);
+
+	platform_add_devices(davinci_evm_devices,
+			     ARRAY_SIZE(davinci_evm_devices));
+	evm_init_i2c();
+	davinci_serial_init(&uart_config);
+
+	/* NOTE:  NAND flash timings set by the UBL are slower than
+	 * needed by MT29F16G08FAA chips ... EMIF.A1CR is 0x40400204
+	 * but could be 0x0400008c for about 25% faster page reads.
+	 */
+
+	gpio_request(2, "usb_id_toggle");
+	gpio_direction_output(2, USB_ID_VALUE);
+	/* irlml6401 switches over 1A in under 8 msec */
+	setup_usb(500, 8);
+
+	dm355_init_spi0(BIT(0), dm355_evm_spi_info,
+			ARRAY_SIZE(dm355_evm_spi_info));
+}
+
+static __init void dm355_evm_irq_init(void)
+{
+	davinci_irq_init();
+}
+
+MACHINE_START(DAVINCI_DM355_EVM, "DaVinci DM355 EVM")
+	.phys_io      = IO_PHYS,
+	.io_pg_offst  = (__IO_ADDRESS(IO_PHYS) >> 18) & 0xfffc,
+	.boot_params  = (0x80000100),
+	.map_io	      = dm355_evm_map_io,
+	.init_irq     = dm355_evm_irq_init,
+	.timer	      = &davinci_timer,
+	.init_machine = dm355_evm_init,
+MACHINE_END
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
new file mode 100644
index 0000000..22f16f3
--- /dev/null
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -0,0 +1,268 @@
+/*
+ * DM355 leopard board support
+ *
+ * Based on board-dm355-evm.c
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand.h>
+#include <linux/i2c.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/clk.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/eeprom.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/flash.h>
+
+#include <mach/hardware.h>
+#include <mach/dm355.h>
+#include <mach/psc.h>
+#include <mach/common.h>
+#include <mach/i2c.h>
+#include <mach/serial.h>
+#include <mach/nand.h>
+
+#define DAVINCI_ASYNC_EMIF_CONTROL_BASE		0x01e10000
+#define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
+
+/* NOTE:  this is geared for the standard config, with a socketed
+ * 2 GByte Micron NAND (MT29F16G08FAA) using 128KB sectors.  If you
+ * swap chips, maybe with a different block size, partitioning may
+ * need to be changed.
+ */
+#define NAND_BLOCK_SIZE		SZ_128K
+
+static struct mtd_partition davinci_nand_partitions[] = {
+	{
+		/* UBL (a few copies) plus U-Boot */
+		.name		= "bootloader",
+		.offset		= 0,
+		.size		= 15 * NAND_BLOCK_SIZE,
+		.mask_flags	= MTD_WRITEABLE, /* force read-only */
+	}, {
+		/* U-Boot environment */
+		.name		= "params",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= 1 * NAND_BLOCK_SIZE,
+		.mask_flags	= 0,
+	}, {
+		.name		= "kernel",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= SZ_4M,
+		.mask_flags	= 0,
+	}, {
+		.name		= "filesystem1",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= SZ_512M,
+		.mask_flags	= 0,
+	}, {
+		.name		= "filesystem2",
+		.offset		= MTDPART_OFS_APPEND,
+		.size		= MTDPART_SIZ_FULL,
+		.mask_flags	= 0,
+	}
+	/* two blocks with bad block table (and mirror) at the end */
+};
+
+static struct davinci_nand_pdata davinci_nand_data = {
+	.mask_chipsel		= BIT(14),
+	.parts			= davinci_nand_partitions,
+	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
+	.ecc_mode		= NAND_ECC_HW_SYNDROME,
+	.options		= NAND_USE_FLASH_BBT,
+};
+
+static struct resource davinci_nand_resources[] = {
+	{
+		.start		= DAVINCI_ASYNC_EMIF_DATA_CE0_BASE,
+		.end		= DAVINCI_ASYNC_EMIF_DATA_CE0_BASE + SZ_32M - 1,
+		.flags		= IORESOURCE_MEM,
+	}, {
+		.start		= DAVINCI_ASYNC_EMIF_CONTROL_BASE,
+		.end		= DAVINCI_ASYNC_EMIF_CONTROL_BASE + SZ_4K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device davinci_nand_device = {
+	.name			= "davinci_nand",
+	.id			= 0,
+
+	.num_resources		= ARRAY_SIZE(davinci_nand_resources),
+	.resource		= davinci_nand_resources,
+
+	.dev			= {
+		.platform_data	= &davinci_nand_data,
+	},
+};
+
+static struct davinci_i2c_platform_data i2c_pdata = {
+	.bus_freq	= 400	/* kHz */,
+	.bus_delay	= 0	/* usec */,
+};
+
+static int leopard_mmc_gpio = -EINVAL;
+
+static void dm355leopard_mmcsd_gpios(unsigned gpio)
+{
+	gpio_request(gpio + 0, "mmc0_ro");
+	gpio_request(gpio + 1, "mmc0_cd");
+	gpio_request(gpio + 2, "mmc1_ro");
+	gpio_request(gpio + 3, "mmc1_cd");
+
+	/* we "know" these are input-only so we don't
+	 * need to call gpio_direction_input()
+	 */
+
+	leopard_mmc_gpio = gpio;
+}
+
+static struct i2c_board_info dm355leopard_i2c_info[] = {
+	{ I2C_BOARD_INFO("dm355leopard_msp", 0x25),
+		.platform_data = dm355leopard_mmcsd_gpios,
+		/* plus irq */ },
+	/* { I2C_BOARD_INFO("tlv320aic3x", 0x1b), }, */
+	/* { I2C_BOARD_INFO("tvp5146", 0x5d), }, */
+};
+
+static void __init leopard_init_i2c(void)
+{
+	davinci_init_i2c(&i2c_pdata);
+
+	gpio_request(5, "dm355leopard_msp");
+	gpio_direction_input(5);
+	dm355leopard_i2c_info[0].irq = gpio_to_irq(5);
+
+	i2c_register_board_info(1, dm355leopard_i2c_info,
+			ARRAY_SIZE(dm355leopard_i2c_info));
+}
+
+static struct resource dm355leopard_dm9000_rsrc[] = {
+	{
+		/* addr */
+		.start	= 0x04000000,
+		.end	= 0x04000001,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		/* data */
+		.start	= 0x04000016,
+		.end	= 0x04000017,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags	= IORESOURCE_IRQ
+			| IORESOURCE_IRQ_HIGHEDGE /* rising (active high) */,
+	},
+};
+
+static struct platform_device dm355leopard_dm9000 = {
+	.name		= "dm9000",
+	.id		= -1,
+	.resource	= dm355leopard_dm9000_rsrc,
+	.num_resources	= ARRAY_SIZE(dm355leopard_dm9000_rsrc),
+};
+
+static struct platform_device *davinci_leopard_devices[] __initdata = {
+	&dm355leopard_dm9000,
+	&davinci_nand_device,
+};
+
+static struct davinci_uart_config uart_config __initdata = {
+	.enabled_uarts = (1 << 0),
+};
+
+static void __init dm355_leopard_map_io(void)
+{
+	davinci_map_common_io();
+	dm355_init();
+}
+
+/* Don't connect anything to J10 unless you're only using USB host
+ * mode *and* have to do so with some kind of gender-bender.  If
+ * you have proper Mini-B or Mini-A cables (or Mini-A adapters)
+ * the ID pin won't need any help.
+ */
+#ifdef CONFIG_USB_MUSB_PERIPHERAL
+#define USB_ID_VALUE	0	/* ID pulled high; *should* float */
+#else
+#define USB_ID_VALUE	1	/* ID pulled low */
+#endif
+
+static struct spi_eeprom at25640a = {
+	.byte_len	= SZ_64K / 8,
+	.name		= "at25640a",
+	.page_size	= 32,
+	.flags		= EE_ADDR2,
+};
+
+static struct spi_board_info dm355_leopard_spi_info[] __initconst = {
+	{
+		.modalias	= "at25",
+		.platform_data	= &at25640a,
+		.max_speed_hz	= 10 * 1000 * 1000,	/* at 3v3 */
+		.bus_num	= 0,
+		.chip_select	= 0,
+		.mode		= SPI_MODE_0,
+	},
+};
+
+static __init void dm355_leopard_init(void)
+{
+	struct clk *aemif;
+
+	gpio_request(9, "dm9000");
+	gpio_direction_input(9);
+	dm355leopard_dm9000_rsrc[2].start = gpio_to_irq(9);
+
+	aemif = clk_get(&dm355leopard_dm9000.dev, "aemif");
+	if (IS_ERR(aemif))
+		WARN("%s: unable to get AEMIF clock\n", __func__);
+	else
+		clk_enable(aemif);
+
+	platform_add_devices(davinci_leopard_devices,
+			     ARRAY_SIZE(davinci_leopard_devices));
+	leopard_init_i2c();
+	davinci_serial_init(&uart_config);
+
+	/* NOTE:  NAND flash timings set by the UBL are slower than
+	 * needed by MT29F16G08FAA chips ... EMIF.A1CR is 0x40400204
+	 * but could be 0x0400008c for about 25% faster page reads.
+	 */
+
+	gpio_request(2, "usb_id_toggle");
+	gpio_direction_output(2, USB_ID_VALUE);
+	/* irlml6401 switches over 1A in under 8 msec */
+	setup_usb(500, 8);
+
+	dm355_init_spi0(BIT(0), dm355_leopard_spi_info,
+			ARRAY_SIZE(dm355_leopard_spi_info));
+}
+
+static __init void dm355_leopard_irq_init(void)
+{
+	davinci_irq_init();
+}
+
+MACHINE_START(DM355_LEOPARD, "DaVinci DM355 leopard")
+	.phys_io      = IO_PHYS,
+	.io_pg_offst  = (__IO_ADDRESS(IO_PHYS) >> 18) & 0xfffc,
+	.boot_params  = (0x80000100),
+	.map_io	      = dm355_leopard_map_io,
+	.init_irq     = dm355_leopard_irq_init,
+	.timer	      = &davinci_timer,
+	.init_machine = dm355_leopard_init,
+MACHINE_END
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
new file mode 100644
index 0000000..c02115f
--- /dev/null
+++ b/arch/arm/mach-davinci/dm355.c
@@ -0,0 +1,540 @@
+/*
+ * TI DaVinci DM355 chip specific setup
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) Deep Root Systems, LLC. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/spi/spi.h>
+
+#include <mach/dm355.h>
+#include <mach/clock.h>
+#include <mach/cputype.h>
+#include <mach/edma.h>
+#include <mach/psc.h>
+#include <mach/mux.h>
+#include <mach/irqs.h>
+
+#include "clock.h"
+#include "mux.h"
+
+/*
+ * Device specific clocks
+ */
+#define DM355_REF_FREQ		24000000	/* 24 or 36 MHz */
+
+static struct pll_data pll1_data = {
+	.num       = 1,
+	.phys_base = DAVINCI_PLL1_BASE,
+	.flags     = PLL_HAS_PREDIV | PLL_HAS_POSTDIV,
+};
+
+static struct pll_data pll2_data = {
+	.num       = 2,
+	.phys_base = DAVINCI_PLL2_BASE,
+	.flags     = PLL_HAS_PREDIV,
+};
+
+static struct clk ref_clk = {
+	.name = "ref_clk",
+	/* FIXME -- crystal rate is board-specific */
+	.rate = DM355_REF_FREQ,
+};
+
+static struct clk pll1_clk = {
+	.name = "pll1",
+	.parent = &ref_clk,
+	.flags = CLK_PLL,
+	.pll_data = &pll1_data,
+};
+
+static struct clk pll1_aux_clk = {
+	.name = "pll1_aux_clk",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL | PRE_PLL,
+};
+
+static struct clk pll1_sysclk1 = {
+	.name = "pll1_sysclk1",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV1,
+};
+
+static struct clk pll1_sysclk2 = {
+	.name = "pll1_sysclk2",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV2,
+};
+
+static struct clk pll1_sysclk3 = {
+	.name = "pll1_sysclk3",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV3,
+};
+
+static struct clk pll1_sysclk4 = {
+	.name = "pll1_sysclk4",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV4,
+};
+
+static struct clk pll1_sysclkbp = {
+	.name = "pll1_sysclkbp",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL | PRE_PLL,
+	.div_reg = BPDIV
+};
+
+static struct clk vpss_dac_clk = {
+	.name = "vpss_dac",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM355_LPSC_VPSS_DAC,
+};
+
+static struct clk vpss_master_clk = {
+	.name = "vpss_master",
+	.parent = &pll1_sysclk4,
+	.lpsc = DAVINCI_LPSC_VPSSMSTR,
+	.flags = CLK_PSC,
+};
+
+static struct clk vpss_slave_clk = {
+	.name = "vpss_slave",
+	.parent = &pll1_sysclk4,
+	.lpsc = DAVINCI_LPSC_VPSSSLV,
+};
+
+
+static struct clk clkout1_clk = {
+	.name = "clkout1",
+	.parent = &pll1_aux_clk,
+	/* NOTE:  clkout1 can be externally gated by muxing GPIO-18 */
+};
+
+static struct clk clkout2_clk = {
+	.name = "clkout2",
+	.parent = &pll1_sysclkbp,
+};
+
+static struct clk pll2_clk = {
+	.name = "pll2",
+	.parent = &ref_clk,
+	.flags = CLK_PLL,
+	.pll_data = &pll2_data,
+};
+
+static struct clk pll2_sysclk1 = {
+	.name = "pll2_sysclk1",
+	.parent = &pll2_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV1,
+};
+
+static struct clk pll2_sysclkbp = {
+	.name = "pll2_sysclkbp",
+	.parent = &pll2_clk,
+	.flags = CLK_PLL | PRE_PLL,
+	.div_reg = BPDIV
+};
+
+static struct clk clkout3_clk = {
+	.name = "clkout3",
+	.parent = &pll2_sysclkbp,
+	/* NOTE:  clkout3 can be externally gated by muxing GPIO-16 */
+};
+
+static struct clk arm_clk = {
+	.name = "arm_clk",
+	.parent = &pll1_sysclk1,
+	.lpsc = DAVINCI_LPSC_ARM,
+	.flags = ALWAYS_ENABLED,
+};
+
+/*
+ * NOT LISTED below, and not touched by Linux
+ *   - in SyncReset state by default
+ *	.lpsc = DAVINCI_LPSC_TPCC,
+ *	.lpsc = DAVINCI_LPSC_TPTC0,
+ *	.lpsc = DAVINCI_LPSC_TPTC1,
+ *	.lpsc = DAVINCI_LPSC_DDR_EMIF, .parent = &sysclk2_clk,
+ *	.lpsc = DAVINCI_LPSC_MEMSTICK,
+ *   - in Enabled state by default
+ *	.lpsc = DAVINCI_LPSC_SYSTEM_SUBSYS,
+ *	.lpsc = DAVINCI_LPSC_SCR2,	// "bus"
+ *	.lpsc = DAVINCI_LPSC_SCR3,	// "bus"
+ *	.lpsc = DAVINCI_LPSC_SCR4,	// "bus"
+ *	.lpsc = DAVINCI_LPSC_CROSSBAR,	// "emulation"
+ *	.lpsc = DAVINCI_LPSC_CFG27,	// "test"
+ *	.lpsc = DAVINCI_LPSC_CFG3,	// "test"
+ *	.lpsc = DAVINCI_LPSC_CFG5,	// "test"
+ */
+
+static struct clk mjcp_clk = {
+	.name = "mjcp",
+	.parent = &pll1_sysclk1,
+	.lpsc = DAVINCI_LPSC_IMCOP,
+};
+
+static struct clk uart0_clk = {
+	.name = "uart0",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_UART0,
+};
+
+static struct clk uart1_clk = {
+	.name = "uart1",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_UART1,
+};
+
+static struct clk uart2_clk = {
+	.name = "uart2",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_UART2,
+};
+
+static struct clk i2c_clk = {
+	.name = "i2c",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_I2C,
+};
+
+static struct clk asp0_clk = {
+	.name = "asp0",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_McBSP,
+};
+
+static struct clk asp1_clk = {
+	.name = "asp1",
+	.parent = &pll1_sysclk2,
+	.lpsc = DM355_LPSC_McBSP1,
+};
+
+static struct clk mmcsd0_clk = {
+	.name = "mmcsd0",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_MMC_SD,
+};
+
+static struct clk mmcsd1_clk = {
+	.name = "mmcsd1",
+	.parent = &pll1_sysclk2,
+	.lpsc = DM355_LPSC_MMC_SD1,
+};
+
+static struct clk spi0_clk = {
+	.name = "spi0",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_SPI,
+};
+
+static struct clk spi1_clk = {
+	.name = "spi1",
+	.parent = &pll1_sysclk2,
+	.lpsc = DM355_LPSC_SPI1,
+};
+
+static struct clk spi2_clk = {
+	.name = "spi2",
+	.parent = &pll1_sysclk2,
+	.lpsc = DM355_LPSC_SPI2,
+};
+
+static struct clk gpio_clk = {
+	.name = "gpio",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_GPIO,
+};
+
+static struct clk aemif_clk = {
+	.name = "aemif",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_AEMIF,
+};
+
+static struct clk pwm0_clk = {
+	.name = "pwm0",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_PWM0,
+};
+
+static struct clk pwm1_clk = {
+	.name = "pwm1",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_PWM1,
+};
+
+static struct clk pwm2_clk = {
+	.name = "pwm2",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_PWM2,
+};
+
+static struct clk pwm3_clk = {
+	.name = "pwm3",
+	.parent = &pll1_aux_clk,
+	.lpsc = DM355_LPSC_PWM3,
+};
+
+static struct clk timer0_clk = {
+	.name = "timer0",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_TIMER0,
+};
+
+static struct clk timer1_clk = {
+	.name = "timer1",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_TIMER1,
+};
+
+static struct clk timer2_clk = {
+	.name = "timer2",
+	.parent = &pll1_aux_clk,
+	.lpsc = DAVINCI_LPSC_TIMER2,
+	.usecount = 1,              /* REVISIT: why cant' this be disabled? */
+};
+
+static struct clk timer3_clk = {
+	.name = "timer3",
+	.parent = &pll1_aux_clk,
+	.lpsc = DM355_LPSC_TIMER3,
+};
+
+static struct clk rto_clk = {
+	.name = "rto",
+	.parent = &pll1_aux_clk,
+	.lpsc = DM355_LPSC_RTO,
+};
+
+static struct clk usb_clk = {
+	.name = "usb",
+	.parent = &pll1_sysclk2,
+	.lpsc = DAVINCI_LPSC_USB,
+};
+
+static struct davinci_clk dm355_clks[] = {
+	CLK(NULL, "ref", &ref_clk),
+	CLK(NULL, "pll1", &pll1_clk),
+	CLK(NULL, "pll1_sysclk1", &pll1_sysclk1),
+	CLK(NULL, "pll1_sysclk2", &pll1_sysclk2),
+	CLK(NULL, "pll1_sysclk3", &pll1_sysclk3),
+	CLK(NULL, "pll1_sysclk4", &pll1_sysclk4),
+	CLK(NULL, "pll1_aux", &pll1_aux_clk),
+	CLK(NULL, "pll1_sysclkbp", &pll1_sysclkbp),
+	CLK(NULL, "vpss_dac", &vpss_dac_clk),
+	CLK(NULL, "vpss_master", &vpss_master_clk),
+	CLK(NULL, "vpss_slave", &vpss_slave_clk),
+	CLK(NULL, "clkout1", &clkout1_clk),
+	CLK(NULL, "clkout2", &clkout2_clk),
+	CLK(NULL, "pll2", &pll2_clk),
+	CLK(NULL, "pll2_sysclk1", &pll2_sysclk1),
+	CLK(NULL, "pll2_sysclkbp", &pll2_sysclkbp),
+	CLK(NULL, "clkout3", &clkout3_clk),
+	CLK(NULL, "arm", &arm_clk),
+	CLK(NULL, "mjcp", &mjcp_clk),
+	CLK(NULL, "uart0", &uart0_clk),
+	CLK(NULL, "uart1", &uart1_clk),
+	CLK(NULL, "uart2", &uart2_clk),
+	CLK("i2c_davinci.1", NULL, &i2c_clk),
+	CLK("soc-audio.0", NULL, &asp0_clk),
+	CLK("soc-audio.1", NULL, &asp1_clk),
+	CLK("davinci_mmc.0", NULL, &mmcsd0_clk),
+	CLK("davinci_mmc.1", NULL, &mmcsd1_clk),
+	CLK(NULL, "spi0", &spi0_clk),
+	CLK(NULL, "spi1", &spi1_clk),
+	CLK(NULL, "spi2", &spi2_clk),
+	CLK(NULL, "gpio", &gpio_clk),
+	CLK(NULL, "aemif", &aemif_clk),
+	CLK(NULL, "pwm0", &pwm0_clk),
+	CLK(NULL, "pwm1", &pwm1_clk),
+	CLK(NULL, "pwm2", &pwm2_clk),
+	CLK(NULL, "pwm3", &pwm3_clk),
+	CLK(NULL, "timer0", &timer0_clk),
+	CLK(NULL, "timer1", &timer1_clk),
+	CLK("watchdog", NULL, &timer2_clk),
+	CLK(NULL, "timer3", &timer3_clk),
+	CLK(NULL, "rto", &rto_clk),
+	CLK(NULL, "usb", &usb_clk),
+	CLK(NULL, NULL, NULL),
+};
+
+/*----------------------------------------------------------------------*/
+
+static u64 dm355_spi0_dma_mask = DMA_BIT_MASK(32);
+
+static struct resource dm355_spi0_resources[] = {
+	{
+		.start = 0x01c66000,
+		.end   = 0x01c667ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_DM355_SPINT0_1,
+		.flags = IORESOURCE_IRQ,
+	},
+	/* Not yet used, so not included:
+	 * IORESOURCE_IRQ:
+	 *  - IRQ_DM355_SPINT0_0
+	 * IORESOURCE_DMA:
+	 *  - DAVINCI_DMA_SPI_SPIX
+	 *  - DAVINCI_DMA_SPI_SPIR
+	 */
+};
+
+static struct platform_device dm355_spi0_device = {
+	.name = "spi_davinci",
+	.id = 0,
+	.dev = {
+		.dma_mask = &dm355_spi0_dma_mask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+	.num_resources = ARRAY_SIZE(dm355_spi0_resources),
+	.resource = dm355_spi0_resources,
+};
+
+void __init dm355_init_spi0(unsigned chipselect_mask,
+		struct spi_board_info *info, unsigned len)
+{
+	/* for now, assume we need MISO */
+	davinci_cfg_reg(DM355_SPI0_SDI);
+
+	/* not all slaves will be wired up */
+	if (chipselect_mask & BIT(0))
+		davinci_cfg_reg(DM355_SPI0_SDENA0);
+	if (chipselect_mask & BIT(1))
+		davinci_cfg_reg(DM355_SPI0_SDENA1);
+
+	spi_register_board_info(info, len);
+
+	platform_device_register(&dm355_spi0_device);
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Device specific mux setup
+ *
+ *	soc	description	mux  mode   mode  mux	 dbg
+ *				reg  offset mask  mode
+ */
+static const struct mux_config dm355_pins[] = {
+MUX_CFG(DM355,	MMCSD0,		4,   2,     1,	  0,	 false)
+
+MUX_CFG(DM355,	SD1_CLK,	3,   6,     1,	  1,	 false)
+MUX_CFG(DM355,	SD1_CMD,	3,   7,     1,	  1,	 false)
+MUX_CFG(DM355,	SD1_DATA3,	3,   8,     3,	  1,	 false)
+MUX_CFG(DM355,	SD1_DATA2,	3,   10,    3,	  1,	 false)
+MUX_CFG(DM355,	SD1_DATA1,	3,   12,    3,	  1,	 false)
+MUX_CFG(DM355,	SD1_DATA0,	3,   14,    3,	  1,	 false)
+
+MUX_CFG(DM355,	I2C_SDA,	3,   19,    1,	  1,	 false)
+MUX_CFG(DM355,	I2C_SCL,	3,   20,    1,	  1,	 false)
+
+MUX_CFG(DM355,	MCBSP0_BDX,	3,   0,     1,	  1,	 false)
+MUX_CFG(DM355,	MCBSP0_X,	3,   1,     1,	  1,	 false)
+MUX_CFG(DM355,	MCBSP0_BFSX,	3,   2,     1,	  1,	 false)
+MUX_CFG(DM355,	MCBSP0_BDR,	3,   3,     1,	  1,	 false)
+MUX_CFG(DM355,	MCBSP0_R,	3,   4,     1,	  1,	 false)
+MUX_CFG(DM355,	MCBSP0_BFSR,	3,   5,     1,	  1,	 false)
+
+MUX_CFG(DM355,	SPI0_SDI,	4,   1,     1,    0,	 false)
+MUX_CFG(DM355,	SPI0_SDENA0,	4,   0,     1,    0,	 false)
+MUX_CFG(DM355,	SPI0_SDENA1,	3,   28,    1,    1,	 false)
+
+INT_CFG(DM355,  INT_EDMA_CC,	      2,    1,    1,     false)
+INT_CFG(DM355,  INT_EDMA_TC0_ERR,     3,    1,    1,     false)
+INT_CFG(DM355,  INT_EDMA_TC1_ERR,     4,    1,    1,     false)
+
+EVT_CFG(DM355,  EVT8_ASP1_TX,	      0,    1,    0,     false)
+EVT_CFG(DM355,  EVT9_ASP1_RX,	      1,    1,    0,     false)
+EVT_CFG(DM355,  EVT26_MMC0_RX,	      2,    1,    0,     false)
+};
+
+/*----------------------------------------------------------------------*/
+
+static const s8 dma_chan_dm355_no_event[] = {
+	12, 13, 24, 56, 57,
+	58, 59, 60, 61, 62,
+	63,
+	-1
+};
+
+static struct edma_soc_info dm355_edma_info = {
+	.n_channel	= 64,
+	.n_region	= 4,
+	.n_slot		= 128,
+	.n_tc		= 2,
+	.noevent	= dma_chan_dm355_no_event,
+};
+
+static struct resource edma_resources[] = {
+	{
+		.name	= "edma_cc",
+		.start	= 0x01c00000,
+		.end	= 0x01c00000 + SZ_64K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.name	= "edma_tc0",
+		.start	= 0x01c10000,
+		.end	= 0x01c10000 + SZ_1K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.name	= "edma_tc1",
+		.start	= 0x01c10400,
+		.end	= 0x01c10400 + SZ_1K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_CCINT0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_CCERRINT,
+		.flags	= IORESOURCE_IRQ,
+	},
+	/* not using (or muxing) TC*_ERR */
+};
+
+static struct platform_device dm355_edma_device = {
+	.name			= "edma",
+	.id			= -1,
+	.dev.platform_data	= &dm355_edma_info,
+	.num_resources		= ARRAY_SIZE(edma_resources),
+	.resource		= edma_resources,
+};
+
+/*----------------------------------------------------------------------*/
+
+void __init dm355_init(void)
+{
+	davinci_clk_init(dm355_clks);
+	davinci_mux_register(dm355_pins, ARRAY_SIZE(dm355_pins));;
+}
+
+static int __init dm355_init_devices(void)
+{
+	if (!cpu_is_davinci_dm355())
+		return 0;
+
+	davinci_cfg_reg(DM355_INT_EDMA_CC);
+	platform_device_register(&dm355_edma_device);
+	return 0;
+}
+postcore_initcall(dm355_init_devices);
diff --git a/arch/arm/mach-davinci/include/mach/dm355.h b/arch/arm/mach-davinci/include/mach/dm355.h
new file mode 100644
index 0000000..f7100b6
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/dm355.h
@@ -0,0 +1,23 @@
+/*
+ * Chip specific defines for DM355 SoC
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) Deep Root Systems, LLC. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef __ASM_ARCH_DM355_H
+#define __ASM_ARCH_DM355_H
+
+#include <mach/hardware.h>
+
+void __init dm355_init(void);
+
+struct spi_board_info;
+
+void dm355_init_spi0(unsigned chipselect_mask,
+		struct spi_board_info *info, unsigned len);
+
+#endif /* __ASM_ARCH_DM355_H */
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index 632847d..761ab2b 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -18,7 +18,7 @@
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)
 #define DAVINCI_UART2_BASE	(IO_PHYS + 0x20800)
 
-#define DM355_UART2_BASE	(IO_PHYS + 0x206000)
+#define DM355_UART2_BASE        (IO_PHYS + 0x206000)
 
 /* DaVinci UART register offsets */
 #define UART_DAVINCI_PWREMU		0x0c
-- 
cgit v0.10.2


From e38d92fdcd04c79d28679682f63a83487c4c4c05 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 29 Apr 2009 17:44:58 -0700
Subject: davinci: DM646x: add base SoC and board support

Add support for DM646x SoC (a.k.a DaVinci HD) and its Evalution
Module (EVM.)

Original support done by Sudhakar Rajashekhara.

Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 1c3ab40..d7614a0 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -13,6 +13,9 @@ config ARCH_DAVINCI_DM644x
 config ARCH_DAVINCI_DM355
         bool "DaVinci 355 based system"
 
+config ARCH_DAVINCI_DM646x
+        bool "DaVinci 646x based system"
+
 comment "DaVinci Board Type"
 
 config MACH_DAVINCI_EVM
@@ -47,6 +50,15 @@ config MACH_DM355_LEOPARD
 	  Configure this option to specify the whether the board used
 	  for development is a DM355 Leopard board.
 
+config MACH_DAVINCI_DM6467_EVM
+	bool "TI DM6467 EVM"
+	default n
+	depends on ARCH_DAVINCI_DM646x
+	help
+	  Configure this option to specify the whether the board used
+	  for development is a DM6467 EVM
+
+
 config DAVINCI_MUX
 	bool "DAVINCI multiplexing support"
 	depends on ARCH_DAVINCI
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 381c363..2873149 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -13,9 +13,11 @@ obj-$(CONFIG_CP_INTC)			+= cp_intc.o
 # Chip specific
 obj-$(CONFIG_ARCH_DAVINCI_DM644x)       += dm644x.o
 obj-$(CONFIG_ARCH_DAVINCI_DM355)        += dm355.o
+obj-$(CONFIG_ARCH_DAVINCI_DM646x)       += dm646x.o
 
 # Board specific
 obj-$(CONFIG_MACH_DAVINCI_EVM)  	+= board-dm644x-evm.o
 obj-$(CONFIG_MACH_SFFSDR)		+= board-sffsdr.o
 obj-$(CONFIG_MACH_DAVINCI_DM355_EVM)	+= board-dm355-evm.o
 obj-$(CONFIG_MACH_DM355_LEOPARD)	+= board-dm355-leopard.o
+obj-$(CONFIG_MACH_DAVINCI_DM6467_EVM)	+= board-dm646x-evm.o
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
new file mode 100644
index 0000000..94cb623
--- /dev/null
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -0,0 +1,280 @@
+/*
+ * TI DaVinci DM646X EVM board
+ *
+ * Derived from: arch/arm/mach-davinci/board-evm.c
+ * Copyright (C) 2006 Texas Instruments.
+ *
+ * (C) 2007-2008, MontaVista Software, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+/**************************************************************************
+ * Included Files
+ **************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <linux/root_dev.h>
+#include <linux/dma-mapping.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/leds.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/i2c/at24.h>
+#include <linux/i2c/pcf857x.h>
+#include <linux/etherdevice.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/flash.h>
+
+#include <mach/dm646x.h>
+#include <mach/common.h>
+#include <mach/psc.h>
+#include <mach/serial.h>
+#include <mach/i2c.h>
+
+static struct davinci_uart_config uart_config __initdata = {
+	.enabled_uarts = (1 << 0),
+};
+
+/* LEDS */
+
+static struct gpio_led evm_leds[] = {
+	{ .name = "DS1", .active_low = 1, },
+	{ .name = "DS2", .active_low = 1, },
+	{ .name = "DS3", .active_low = 1, },
+	{ .name = "DS4", .active_low = 1, },
+};
+
+static __initconst struct gpio_led_platform_data evm_led_data = {
+	.num_leds = ARRAY_SIZE(evm_leds),
+	.leds     = evm_leds,
+};
+
+static struct platform_device *evm_led_dev;
+
+static int evm_led_setup(struct i2c_client *client, int gpio,
+			unsigned int ngpio, void *c)
+{
+	struct gpio_led *leds = evm_leds;
+	int status;
+
+	while (ngpio--) {
+		leds->gpio = gpio++;
+		leds++;
+	};
+
+	evm_led_dev = platform_device_alloc("leds-gpio", 0);
+	platform_device_add_data(evm_led_dev, &evm_led_data,
+				sizeof(evm_led_data));
+
+	evm_led_dev->dev.parent = &client->dev;
+	status = platform_device_add(evm_led_dev);
+	if (status < 0) {
+		platform_device_put(evm_led_dev);
+		evm_led_dev = NULL;
+	}
+	return status;
+}
+
+static int evm_led_teardown(struct i2c_client *client, int gpio,
+				unsigned ngpio, void *c)
+{
+	if (evm_led_dev) {
+		platform_device_unregister(evm_led_dev);
+		evm_led_dev = NULL;
+	}
+	return 0;
+}
+
+static int evm_sw_gpio[4] = { -EINVAL, -EINVAL, -EINVAL, -EINVAL };
+
+static int evm_sw_setup(struct i2c_client *client, int gpio,
+			unsigned ngpio, void *c)
+{
+	int status;
+	int i;
+	char label[10];
+
+	for (i = 0; i < 4; ++i) {
+		snprintf(label, 10, "user_sw%d", i);
+		status = gpio_request(gpio, label);
+		if (status)
+			goto out_free;
+		evm_sw_gpio[i] = gpio++;
+
+		status = gpio_direction_input(evm_sw_gpio[i]);
+		if (status) {
+			gpio_free(evm_sw_gpio[i]);
+			evm_sw_gpio[i] = -EINVAL;
+			goto out_free;
+		}
+
+		status = gpio_export(evm_sw_gpio[i], 0);
+		if (status) {
+			gpio_free(evm_sw_gpio[i]);
+			evm_sw_gpio[i] = -EINVAL;
+			goto out_free;
+		}
+	}
+	return status;
+out_free:
+	for (i = 0; i < 4; ++i) {
+		if (evm_sw_gpio[i] != -EINVAL) {
+			gpio_free(evm_sw_gpio[i]);
+			evm_sw_gpio[i] = -EINVAL;
+		}
+	}
+	return status;
+}
+
+static int evm_sw_teardown(struct i2c_client *client, int gpio,
+			unsigned ngpio, void *c)
+{
+	int i;
+
+	for (i = 0; i < 4; ++i) {
+		if (evm_sw_gpio[i] != -EINVAL) {
+			gpio_unexport(evm_sw_gpio[i]);
+			gpio_free(evm_sw_gpio[i]);
+			evm_sw_gpio[i] = -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int evm_pcf_setup(struct i2c_client *client, int gpio,
+			unsigned int ngpio, void *c)
+{
+	int status;
+
+	if (ngpio < 8)
+		return -EINVAL;
+
+	status = evm_sw_setup(client, gpio, 4, c);
+	if (status)
+		return status;
+
+	return evm_led_setup(client, gpio+4, 4, c);
+}
+
+static int evm_pcf_teardown(struct i2c_client *client, int gpio,
+			unsigned int ngpio, void *c)
+{
+	BUG_ON(ngpio < 8);
+
+	evm_sw_teardown(client, gpio, 4, c);
+	evm_led_teardown(client, gpio+4, 4, c);
+
+	return 0;
+}
+
+static struct pcf857x_platform_data pcf_data = {
+	.gpio_base	= DAVINCI_N_GPIO+1,
+	.setup		= evm_pcf_setup,
+	.teardown	= evm_pcf_teardown,
+};
+
+/* Most of this EEPROM is unused, but U-Boot uses some data:
+ *  - 0x7f00, 6 bytes Ethernet Address
+ *  - ... newer boards may have more
+ */
+static struct memory_accessor *at24_mem_acc;
+
+static void at24_setup(struct memory_accessor *mem_acc, void *context)
+{
+	char mac_addr[6];
+
+	at24_mem_acc = mem_acc;
+
+	/* Read MAC addr from EEPROM */
+	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
+	    ETH_ALEN)
+		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
+}
+
+static struct at24_platform_data eeprom_info = {
+	.byte_len       = (256*1024) / 8,
+	.page_size      = 64,
+	.flags          = AT24_FLAG_ADDR16,
+	.setup          = at24_setup,
+};
+
+int dm646xevm_eeprom_read(void *buf, off_t off, size_t count)
+{
+	if (at24_mem_acc)
+		return at24_mem_acc->read(at24_mem_acc, buf, off, count);
+	return -ENODEV;
+}
+EXPORT_SYMBOL(dm646xevm_eeprom_read);
+
+int dm646xevm_eeprom_write(void *buf, off_t off, size_t count)
+{
+	if (at24_mem_acc)
+		return at24_mem_acc->write(at24_mem_acc, buf, off, count);
+	return -ENODEV;
+}
+EXPORT_SYMBOL(dm646xevm_eeprom_write);
+
+static struct i2c_board_info __initdata i2c_info[] =  {
+	{
+		I2C_BOARD_INFO("24c256", 0x50),
+		.platform_data  = &eeprom_info,
+	},
+	{
+		I2C_BOARD_INFO("pcf8574a", 0x38),
+		.platform_data	= &pcf_data,
+	},
+};
+
+static struct davinci_i2c_platform_data i2c_pdata = {
+	.bus_freq       = 100 /* kHz */,
+	.bus_delay      = 0 /* usec */,
+};
+
+static void __init evm_init_i2c(void)
+{
+	davinci_init_i2c(&i2c_pdata);
+	i2c_register_board_info(1, i2c_info, ARRAY_SIZE(i2c_info));
+}
+
+static void __init davinci_map_io(void)
+{
+	davinci_map_common_io();
+	dm646x_init();
+}
+
+static __init void evm_init(void)
+{
+	evm_init_i2c();
+	davinci_serial_init(&uart_config);
+}
+
+static __init void davinci_dm646x_evm_irq_init(void)
+{
+	davinci_irq_init();
+}
+
+MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
+	.phys_io      = IO_PHYS,
+	.io_pg_offst  = (__IO_ADDRESS(IO_PHYS) >> 18) & 0xfffc,
+	.boot_params  = (0x80000100),
+	.map_io       = davinci_map_io,
+	.init_irq     = davinci_dm646x_evm_irq_init,
+	.timer        = &davinci_timer,
+	.init_machine = evm_init,
+MACHINE_END
+
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
new file mode 100644
index 0000000..93443a6
--- /dev/null
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -0,0 +1,402 @@
+/*
+ * TI DaVinci DM644x chip specific setup
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) Deep Root Systems, LLC. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include <mach/dm646x.h>
+#include <mach/clock.h>
+#include <mach/cputype.h>
+#include <mach/edma.h>
+#include <mach/irqs.h>
+#include <mach/psc.h>
+#include <mach/mux.h>
+
+#include "clock.h"
+#include "mux.h"
+
+/*
+ * Device specific clocks
+ */
+#define DM646X_REF_FREQ		27000000
+#define DM646X_AUX_FREQ		24000000
+
+static struct pll_data pll1_data = {
+	.num       = 1,
+	.phys_base = DAVINCI_PLL1_BASE,
+};
+
+static struct pll_data pll2_data = {
+	.num       = 2,
+	.phys_base = DAVINCI_PLL2_BASE,
+};
+
+static struct clk ref_clk = {
+	.name = "ref_clk",
+	.rate = DM646X_REF_FREQ,
+};
+
+static struct clk aux_clkin = {
+	.name = "aux_clkin",
+	.rate = DM646X_AUX_FREQ,
+};
+
+static struct clk pll1_clk = {
+	.name = "pll1",
+	.parent = &ref_clk,
+	.pll_data = &pll1_data,
+	.flags = CLK_PLL,
+};
+
+static struct clk pll1_sysclk1 = {
+	.name = "pll1_sysclk1",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV1,
+};
+
+static struct clk pll1_sysclk2 = {
+	.name = "pll1_sysclk2",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV2,
+};
+
+static struct clk pll1_sysclk3 = {
+	.name = "pll1_sysclk3",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV3,
+};
+
+static struct clk pll1_sysclk4 = {
+	.name = "pll1_sysclk4",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV4,
+};
+
+static struct clk pll1_sysclk5 = {
+	.name = "pll1_sysclk5",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV5,
+};
+
+static struct clk pll1_sysclk6 = {
+	.name = "pll1_sysclk6",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV6,
+};
+
+static struct clk pll1_sysclk8 = {
+	.name = "pll1_sysclk8",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV8,
+};
+
+static struct clk pll1_sysclk9 = {
+	.name = "pll1_sysclk9",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV9,
+};
+
+static struct clk pll1_sysclkbp = {
+	.name = "pll1_sysclkbp",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL | PRE_PLL,
+	.div_reg = BPDIV,
+};
+
+static struct clk pll1_aux_clk = {
+	.name = "pll1_aux_clk",
+	.parent = &pll1_clk,
+	.flags = CLK_PLL | PRE_PLL,
+};
+
+static struct clk pll2_clk = {
+	.name = "pll2_clk",
+	.parent = &ref_clk,
+	.pll_data = &pll2_data,
+	.flags = CLK_PLL,
+};
+
+static struct clk pll2_sysclk1 = {
+	.name = "pll2_sysclk1",
+	.parent = &pll2_clk,
+	.flags = CLK_PLL,
+	.div_reg = PLLDIV1,
+};
+
+static struct clk dsp_clk = {
+	.name = "dsp",
+	.parent = &pll1_sysclk1,
+	.lpsc = DM646X_LPSC_C64X_CPU,
+	.flags = PSC_DSP,
+	.usecount = 1,			/* REVISIT how to disable? */
+};
+
+static struct clk arm_clk = {
+	.name = "arm",
+	.parent = &pll1_sysclk2,
+	.lpsc = DM646X_LPSC_ARM,
+	.flags = ALWAYS_ENABLED,
+};
+
+static struct clk uart0_clk = {
+	.name = "uart0",
+	.parent = &aux_clkin,
+	.lpsc = DM646X_LPSC_UART0,
+};
+
+static struct clk uart1_clk = {
+	.name = "uart1",
+	.parent = &aux_clkin,
+	.lpsc = DM646X_LPSC_UART1,
+};
+
+static struct clk uart2_clk = {
+	.name = "uart2",
+	.parent = &aux_clkin,
+	.lpsc = DM646X_LPSC_UART2,
+};
+
+static struct clk i2c_clk = {
+	.name = "I2CCLK",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_I2C,
+};
+
+static struct clk gpio_clk = {
+	.name = "gpio",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_GPIO,
+};
+
+static struct clk aemif_clk = {
+	.name = "aemif",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_AEMIF,
+	.flags = ALWAYS_ENABLED,
+};
+
+static struct clk emac_clk = {
+	.name = "emac",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_EMAC,
+};
+
+static struct clk pwm0_clk = {
+	.name = "pwm0",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_PWM0,
+	.usecount = 1,            /* REVIST: disabling hangs system */
+};
+
+static struct clk pwm1_clk = {
+	.name = "pwm1",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_PWM1,
+	.usecount = 1,            /* REVIST: disabling hangs system */
+};
+
+static struct clk timer0_clk = {
+	.name = "timer0",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_TIMER0,
+};
+
+static struct clk timer1_clk = {
+	.name = "timer1",
+	.parent = &pll1_sysclk3,
+	.lpsc = DM646X_LPSC_TIMER1,
+};
+
+static struct clk timer2_clk = {
+	.name = "timer2",
+	.parent = &pll1_sysclk3,
+	.flags = ALWAYS_ENABLED, /* no LPSC, always enabled; c.f. spruep9a */
+};
+
+static struct clk vpif0_clk = {
+	.name = "vpif0",
+	.parent = &ref_clk,
+	.lpsc = DM646X_LPSC_VPSSMSTR,
+	.flags = ALWAYS_ENABLED,
+};
+
+static struct clk vpif1_clk = {
+	.name = "vpif1",
+	.parent = &ref_clk,
+	.lpsc = DM646X_LPSC_VPSSSLV,
+	.flags = ALWAYS_ENABLED,
+};
+
+struct davinci_clk dm646x_clks[] = {
+	CLK(NULL, "ref", &ref_clk),
+	CLK(NULL, "aux", &aux_clkin),
+	CLK(NULL, "pll1", &pll1_clk),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk1),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk2),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk3),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk4),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk5),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk6),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk8),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclk9),
+	CLK(NULL, "pll1_sysclk", &pll1_sysclkbp),
+	CLK(NULL, "pll1_aux", &pll1_aux_clk),
+	CLK(NULL, "pll2", &pll2_clk),
+	CLK(NULL, "pll2_sysclk1", &pll2_sysclk1),
+	CLK(NULL, "dsp", &dsp_clk),
+	CLK(NULL, "arm", &arm_clk),
+	CLK(NULL, "uart0", &uart0_clk),
+	CLK(NULL, "uart1", &uart1_clk),
+	CLK(NULL, "uart2", &uart2_clk),
+	CLK("i2c_davinci.1", NULL, &i2c_clk),
+	CLK(NULL, "gpio", &gpio_clk),
+	CLK(NULL, "aemif", &aemif_clk),
+	CLK("davinci_emac.1", NULL, &emac_clk),
+	CLK(NULL, "pwm0", &pwm0_clk),
+	CLK(NULL, "pwm1", &pwm1_clk),
+	CLK(NULL, "timer0", &timer0_clk),
+	CLK(NULL, "timer1", &timer1_clk),
+	CLK("watchdog", NULL, &timer2_clk),
+	CLK(NULL, "vpif0", &vpif0_clk),
+	CLK(NULL, "vpif1", &vpif1_clk),
+	CLK(NULL, NULL, NULL),
+};
+
+/*
+ * Device specific mux setup
+ *
+ *	soc	description	mux  mode   mode  mux	 dbg
+ *				reg  offset mask  mode
+ */
+static const struct mux_config dm646x_pins[] = {
+MUX_CFG(DM646X, ATAEN,		0,   0,     1,	  1,	 true)
+
+MUX_CFG(DM646X, AUDCK1,		0,   29,    1,	  0,	 false)
+
+MUX_CFG(DM646X, AUDCK0,		0,   28,    1,	  0,	 false)
+
+MUX_CFG(DM646X, CRGMUX,			0,   24,    7,    5,	 true)
+
+MUX_CFG(DM646X, STSOMUX_DISABLE,	0,   22,    3,    0,	 true)
+
+MUX_CFG(DM646X, STSIMUX_DISABLE,	0,   20,    3,    0,	 true)
+
+MUX_CFG(DM646X, PTSOMUX_DISABLE,	0,   18,    3,    0,	 true)
+
+MUX_CFG(DM646X, PTSIMUX_DISABLE,	0,   16,    3,    0,	 true)
+
+MUX_CFG(DM646X, STSOMUX,		0,   22,    3,    2,	 true)
+
+MUX_CFG(DM646X, STSIMUX,		0,   20,    3,    2,	 true)
+
+MUX_CFG(DM646X, PTSOMUX_PARALLEL,	0,   18,    3,    2,	 true)
+
+MUX_CFG(DM646X, PTSIMUX_PARALLEL,	0,   16,    3,    2,	 true)
+
+MUX_CFG(DM646X, PTSOMUX_SERIAL,		0,   18,    3,    3,	 true)
+
+MUX_CFG(DM646X, PTSIMUX_SERIAL,		0,   16,    3,    3,	 true)
+};
+
+/*----------------------------------------------------------------------*/
+
+static const s8 dma_chan_dm646x_no_event[] = {
+	 0,  1,  2,  3, 13,
+	14, 15, 24, 25, 26,
+	27, 30, 31, 54, 55,
+	56,
+	-1
+};
+
+static struct edma_soc_info dm646x_edma_info = {
+	.n_channel	= 64,
+	.n_region	= 6,	/* 0-1, 4-7 */
+	.n_slot		= 512,
+	.n_tc		= 4,
+	.noevent	= dma_chan_dm646x_no_event,
+};
+
+static struct resource edma_resources[] = {
+	{
+		.name	= "edma_cc",
+		.start	= 0x01c00000,
+		.end	= 0x01c00000 + SZ_64K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.name	= "edma_tc0",
+		.start	= 0x01c10000,
+		.end	= 0x01c10000 + SZ_1K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.name	= "edma_tc1",
+		.start	= 0x01c10400,
+		.end	= 0x01c10400 + SZ_1K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.name	= "edma_tc2",
+		.start	= 0x01c10800,
+		.end	= 0x01c10800 + SZ_1K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.name	= "edma_tc3",
+		.start	= 0x01c10c00,
+		.end	= 0x01c10c00 + SZ_1K - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_CCINT0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_CCERRINT,
+		.flags	= IORESOURCE_IRQ,
+	},
+	/* not using TC*_ERR */
+};
+
+static struct platform_device dm646x_edma_device = {
+	.name			= "edma",
+	.id			= -1,
+	.dev.platform_data	= &dm646x_edma_info,
+	.num_resources		= ARRAY_SIZE(edma_resources),
+	.resource		= edma_resources,
+};
+
+/*----------------------------------------------------------------------*/
+
+void __init dm646x_init(void)
+{
+	davinci_clk_init(dm646x_clks);
+	davinci_mux_register(dm646x_pins, ARRAY_SIZE(dm646x_pins));
+}
+
+static int __init dm646x_init_devices(void)
+{
+	if (!cpu_is_davinci_dm646x())
+		return 0;
+
+	platform_device_register(&dm646x_edma_device);
+	return 0;
+}
+postcore_initcall(dm646x_init_devices);
diff --git a/arch/arm/mach-davinci/include/mach/dm646x.h b/arch/arm/mach-davinci/include/mach/dm646x.h
new file mode 100644
index 0000000..d917939
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/dm646x.h
@@ -0,0 +1,18 @@
+/*
+ * Chip specific defines for DM646x SoC
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) Deep Root Systems, LLC. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef __ASM_ARCH_DM646X_H
+#define __ASM_ARCH_DM646X_H
+
+#include <mach/hardware.h>
+
+void __init dm646x_init(void);
+
+#endif /* __ASM_ARCH_DM646X_H */
-- 
cgit v0.10.2


From 35652fe1858e664707cfa32e80547b210cc41f78 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 20 May 2009 16:44:44 -0700
Subject: davinci: update davinci_all_defconfig for dm355, dm6467

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index eb2738b..c371c65 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30-rc2
-# Wed Apr 15 08:16:53 2009
+# Linux kernel version: 2.6.30-rc6
+# Wed May 20 17:45:58 2009
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
@@ -188,11 +188,17 @@ CONFIG_ARCH_DAVINCI=y
 # DaVinci Core Type
 #
 CONFIG_ARCH_DAVINCI_DM644x=y
+CONFIG_ARCH_DAVINCI_DM355=y
+CONFIG_ARCH_DAVINCI_DM646x=y
 
 #
 # DaVinci Board Type
 #
 CONFIG_MACH_DAVINCI_EVM=y
+CONFIG_MACH_SFFSDR=y
+CONFIG_MACH_DAVINCI_DM355_EVM=y
+CONFIG_MACH_DM355_LEOPARD=y
+CONFIG_MACH_DAVINCI_DM6467_EVM=y
 CONFIG_DAVINCI_MUX=y
 CONFIG_DAVINCI_MUX_DEBUG=y
 CONFIG_DAVINCI_MUX_WARNINGS=y
@@ -661,7 +667,9 @@ CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 # CONFIG_AX88796 is not set
 # CONFIG_SMC91X is not set
-# CONFIG_DM9000 is not set
+CONFIG_DM9000=y
+CONFIG_DM9000_DEBUGLEVEL=4
+# CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL is not set
 # CONFIG_ETHOC is not set
 # CONFIG_SMC911X is not set
 # CONFIG_SMSC911X is not set
@@ -963,6 +971,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
 # CONFIG_MFD_ASIC3 is not set
+# CONFIG_MFD_DM355EVM_MSP is not set
 # CONFIG_HTC_EGPIO is not set
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_TPS65010 is not set
-- 
cgit v0.10.2


From 2dbf56aeb7986b54651c93ed171877e8179289bc Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Mon, 11 May 2009 15:55:03 -0700
Subject: davinci: MMC platform support

Add SoC and platform-specific data and init for MMC driver.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 6af3c6c..0874413 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -36,6 +36,7 @@
 #include <mach/i2c.h>
 #include <mach/serial.h>
 #include <mach/nand.h>
+#include <mach/mmc.h>
 
 #define DAVINCI_ASYNC_EMIF_CONTROL_BASE		0x01e10000
 #define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
@@ -191,6 +192,31 @@ static void __init dm355_evm_map_io(void)
 	dm355_init();
 }
 
+static int dm355evm_mmc_get_cd(int module)
+{
+	if (!gpio_is_valid(dm355evm_mmc_gpios))
+		return -ENXIO;
+	/* low == card present */
+	return !gpio_get_value_cansleep(dm355evm_mmc_gpios + 2 * module + 1);
+}
+
+static int dm355evm_mmc_get_ro(int module)
+{
+	if (!gpio_is_valid(dm355evm_mmc_gpios))
+		return -ENXIO;
+	/* high == card's write protect switch active */
+	return gpio_get_value_cansleep(dm355evm_mmc_gpios + 2 * module + 0);
+}
+
+static struct davinci_mmc_config dm355evm_mmc_config = {
+	.get_cd		= dm355evm_mmc_get_cd,
+	.get_ro		= dm355evm_mmc_get_ro,
+	.wires		= 4,
+	.max_freq       = 50000000,
+	.caps           = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED,
+	.version	= MMC_CTLR_VERSION_1,
+};
+
 /* Don't connect anything to J10 unless you're only using USB host
  * mode *and* have to do so with some kind of gender-bender.  If
  * you have proper Mini-B or Mini-A cables (or Mini-A adapters)
@@ -249,6 +275,9 @@ static __init void dm355_evm_init(void)
 	/* irlml6401 switches over 1A in under 8 msec */
 	setup_usb(500, 8);
 
+	davinci_setup_mmc(0, &dm355evm_mmc_config);
+	davinci_setup_mmc(1, &dm355evm_mmc_config);
+
 	dm355_init_spi0(BIT(0), dm355_evm_spi_info,
 			ARRAY_SIZE(dm355_evm_spi_info));
 }
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index 22f16f3..d4562f5 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -35,6 +35,7 @@
 #include <mach/i2c.h>
 #include <mach/serial.h>
 #include <mach/nand.h>
+#include <mach/mmc.h>
 
 #define DAVINCI_ASYNC_EMIF_CONTROL_BASE		0x01e10000
 #define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
@@ -190,6 +191,30 @@ static void __init dm355_leopard_map_io(void)
 	dm355_init();
 }
 
+static int dm355leopard_mmc_get_cd(int module)
+{
+	if (!gpio_is_valid(leopard_mmc_gpio))
+		return -ENXIO;
+	/* low == card present */
+	return !gpio_get_value_cansleep(leopard_mmc_gpio + 2 * module + 1);
+}
+
+static int dm355leopard_mmc_get_ro(int module)
+{
+	if (!gpio_is_valid(leopard_mmc_gpio))
+		return -ENXIO;
+	/* high == card's write protect switch active */
+	return gpio_get_value_cansleep(leopard_mmc_gpio + 2 * module + 0);
+}
+
+static struct davinci_mmc_config dm355leopard_mmc_config = {
+	.get_cd		= dm355leopard_mmc_get_cd,
+	.get_ro		= dm355leopard_mmc_get_ro,
+	.wires		= 4,
+	.max_freq       = 50000000,
+	.caps           = MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED,
+};
+
 /* Don't connect anything to J10 unless you're only using USB host
  * mode *and* have to do so with some kind of gender-bender.  If
  * you have proper Mini-B or Mini-A cables (or Mini-A adapters)
@@ -248,6 +273,9 @@ static __init void dm355_leopard_init(void)
 	/* irlml6401 switches over 1A in under 8 msec */
 	setup_usb(500, 8);
 
+	davinci_setup_mmc(0, &dm355leopard_mmc_config);
+	davinci_setup_mmc(1, &dm355leopard_mmc_config);
+
 	dm355_init_spi0(BIT(0), dm355_leopard_spi_info,
 			ARRAY_SIZE(dm355_leopard_spi_info));
 }
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index b2e7f9c..9c3ce31 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -44,6 +44,7 @@
 #include <mach/mux.h>
 #include <mach/psc.h>
 #include <mach/nand.h>
+#include <mach/mmc.h>
 
 #define DM644X_EVM_PHY_MASK		(0x2)
 #define DM644X_EVM_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
@@ -545,6 +546,27 @@ static int dm6444evm_msp430_get_pins(void)
 	return (buf[3] << 8) | buf[2];
 }
 
+static int dm6444evm_mmc_get_cd(int module)
+{
+	int status = dm6444evm_msp430_get_pins();
+
+	return (status < 0) ? status : !(status & BIT(1));
+}
+
+static int dm6444evm_mmc_get_ro(int module)
+{
+	int status = dm6444evm_msp430_get_pins();
+
+	return (status < 0) ? status : status & BIT(6 + 8);
+}
+
+static struct davinci_mmc_config dm6446evm_mmc_config = {
+	.get_cd		= dm6444evm_mmc_get_cd,
+	.get_ro		= dm6444evm_mmc_get_ro,
+	.wires		= 4,
+	.version	= MMC_CTLR_VERSION_1
+};
+
 static struct i2c_board_info __initdata i2c_info[] =  {
 	{
 		I2C_BOARD_INFO("dm6446evm_msp", 0x23),
@@ -671,6 +693,8 @@ static __init void davinci_evm_init(void)
 			     ARRAY_SIZE(davinci_evm_devices));
 	evm_init_i2c();
 
+	davinci_setup_mmc(0, &dm6446evm_mmc_config);
+
 	davinci_serial_init(&uart_config);
 
 	/* Register the fixup for PHY on DaVinci */
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 7fdc408..56c1931 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -23,8 +23,13 @@
 #include <mach/irqs.h>
 #include <mach/cputype.h>
 #include <mach/mux.h>
+#include <mach/edma.h>
+#include <mach/mmc.h>
 
 #define DAVINCI_I2C_BASE	     0x01C21000
+#define DAVINCI_MMCSD0_BASE	     0x01E10000
+#define DM355_MMCSD0_BASE	     0x01E11000
+#define DM355_MMCSD1_BASE	     0x01E00000
 
 static struct resource i2c_resources[] = {
 	{
@@ -54,6 +59,158 @@ void __init davinci_init_i2c(struct davinci_i2c_platform_data *pdata)
 	(void) platform_device_register(&davinci_i2c_device);
 }
 
+#if	defined(CONFIG_MMC_DAVINCI) || defined(CONFIG_MMC_DAVINCI_MODULE)
+
+static u64 mmcsd0_dma_mask = DMA_32BIT_MASK;
+
+static struct resource mmcsd0_resources[] = {
+	{
+		/* different on dm355 */
+		.start = DAVINCI_MMCSD0_BASE,
+		.end   = DAVINCI_MMCSD0_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	/* IRQs:  MMC/SD, then SDIO */
+	{
+		.start = IRQ_MMCINT,
+		.flags = IORESOURCE_IRQ,
+	}, {
+		/* different on dm355 */
+		.start = IRQ_SDIOINT,
+		.flags = IORESOURCE_IRQ,
+	},
+	/* DMA channels: RX, then TX */
+	{
+		.start = DAVINCI_DMA_MMCRXEVT,
+		.flags = IORESOURCE_DMA,
+	}, {
+		.start = DAVINCI_DMA_MMCTXEVT,
+		.flags = IORESOURCE_DMA,
+	},
+};
+
+static struct platform_device davinci_mmcsd0_device = {
+	.name = "davinci_mmc",
+	.id = 0,
+	.dev = {
+		.dma_mask = &mmcsd0_dma_mask,
+		.coherent_dma_mask = DMA_32BIT_MASK,
+	},
+	.num_resources = ARRAY_SIZE(mmcsd0_resources),
+	.resource = mmcsd0_resources,
+};
+
+static u64 mmcsd1_dma_mask = DMA_32BIT_MASK;
+
+static struct resource mmcsd1_resources[] = {
+	{
+		.start = DM355_MMCSD1_BASE,
+		.end   = DM355_MMCSD1_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	/* IRQs:  MMC/SD, then SDIO */
+	{
+		.start = IRQ_DM355_MMCINT1,
+		.flags = IORESOURCE_IRQ,
+	}, {
+		.start = IRQ_DM355_SDIOINT1,
+		.flags = IORESOURCE_IRQ,
+	},
+	/* DMA channels: RX, then TX */
+	{
+		.start = 30,	/* rx */
+		.flags = IORESOURCE_DMA,
+	}, {
+		.start = 31,	/* tx */
+		.flags = IORESOURCE_DMA,
+	},
+};
+
+static struct platform_device davinci_mmcsd1_device = {
+	.name = "davinci_mmc",
+	.id = 1,
+	.dev = {
+		.dma_mask = &mmcsd1_dma_mask,
+		.coherent_dma_mask = DMA_32BIT_MASK,
+	},
+	.num_resources = ARRAY_SIZE(mmcsd1_resources),
+	.resource = mmcsd1_resources,
+};
+
+
+void __init davinci_setup_mmc(int module, struct davinci_mmc_config *config)
+{
+	struct platform_device	*pdev = NULL;
+
+	if (WARN_ON(cpu_is_davinci_dm646x()))
+		return;
+
+	/* REVISIT: update PINMUX, ARM_IRQMUX, and EDMA_EVTMUX here too;
+	 * for example if MMCSD1 is used for SDIO, maybe DAT2 is unused.
+	 *
+	 * FIXME dm6441 (no MMC/SD), dm357 (one), and dm335 (two) are
+	 * not handled right here ...
+	 */
+	switch (module) {
+	case 1:
+		if (!cpu_is_davinci_dm355())
+			break;
+
+		/* REVISIT we may not need all these pins if e.g. this
+		 * is a hard-wired SDIO device...
+		 */
+		davinci_cfg_reg(DM355_SD1_CMD);
+		davinci_cfg_reg(DM355_SD1_CLK);
+		davinci_cfg_reg(DM355_SD1_DATA0);
+		davinci_cfg_reg(DM355_SD1_DATA1);
+		davinci_cfg_reg(DM355_SD1_DATA2);
+		davinci_cfg_reg(DM355_SD1_DATA3);
+
+		pdev = &davinci_mmcsd1_device;
+		break;
+	case 0:
+		if (cpu_is_davinci_dm355()) {
+			mmcsd0_resources[0].start = DM355_MMCSD0_BASE;
+			mmcsd0_resources[0].end = DM355_MMCSD0_BASE + SZ_4K - 1;
+			mmcsd0_resources[2].start = IRQ_DM355_SDIOINT0;
+
+			/* expose all 6 MMC0 signals:  CLK, CMD, DATA[0..3] */
+			davinci_cfg_reg(DM355_MMCSD0);
+
+			/* enable RX EDMA */
+			davinci_cfg_reg(DM355_EVT26_MMC0_RX);
+		}
+
+		else if (cpu_is_davinci_dm644x()) {
+			/* REVISIT: should this be in board-init code? */
+			void __iomem *base =
+				IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE);
+
+			/* Power-on 3.3V IO cells */
+			__raw_writel(0, base + DM64XX_VDD3P3V_PWDN);
+			/*Set up the pull regiter for MMC */
+			davinci_cfg_reg(DM644X_MSTK);
+		}
+
+		pdev = &davinci_mmcsd0_device;
+		break;
+	}
+
+	if (WARN_ON(!pdev))
+		return;
+
+	pdev->dev.platform_data = config;
+	platform_device_register(pdev);
+}
+
+#else
+
+void __init davinci_setup_mmc(int module, struct davinci_mmc_config *config)
+{
+}
+
+#endif
+
 /*-------------------------------------------------------------------------*/
 
 static struct resource wdt_resources[] = {
diff --git a/arch/arm/mach-davinci/include/mach/mmc.h b/arch/arm/mach-davinci/include/mach/mmc.h
new file mode 100644
index 0000000..5a85e24
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/mmc.h
@@ -0,0 +1,33 @@
+/*
+ *  Board-specific MMC configuration
+ */
+
+#ifndef _DAVINCI_MMC_H
+#define _DAVINCI_MMC_H
+
+#include <linux/types.h>
+#include <linux/mmc/host.h>
+
+struct davinci_mmc_config {
+	/* get_cd()/get_wp() may sleep */
+	int	(*get_cd)(int module);
+	int	(*get_ro)(int module);
+	/* wires == 0 is equivalent to wires == 4 (4-bit parallel) */
+	u8	wires;
+
+	u32     max_freq;
+
+	/* any additional host capabilities: OR'd in to mmc->f_caps */
+	u32     caps;
+
+	/* Version of the MMC/SD controller */
+	u8	version;
+};
+void davinci_setup_mmc(int module, struct davinci_mmc_config *config);
+
+enum {
+	MMC_CTLR_VERSION_1 = 0,	/* DM644x and DM355 */
+	MMC_CTLR_VERSION_2,	/* DA830 */
+};
+
+#endif
-- 
cgit v0.10.2


From ac7b75b5bbbfd60b752869a22daa3be99b5b4f99 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Thu, 7 May 2009 06:19:40 -0700
Subject: davinci: EMAC platform support

Add SoC and platform-specific data and init for DaVinci EMAC network
driver.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 9c3ce31..02e7cda 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -16,12 +16,11 @@
 #include <linux/gpio.h>
 #include <linux/leds.h>
 #include <linux/memory.h>
-#include <linux/etherdevice.h>
 
 #include <linux/i2c.h>
 #include <linux/i2c/pcf857x.h>
 #include <linux/i2c/at24.h>
-
+#include <linux/etherdevice.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
@@ -39,6 +38,7 @@
 
 #include <mach/dm644x.h>
 #include <mach/common.h>
+#include <mach/emac.h>
 #include <mach/i2c.h>
 #include <mach/serial.h>
 #include <mach/mux.h>
@@ -60,6 +60,11 @@
 #define LXT971_PHY_ID	(0x001378e2)
 #define LXT971_PHY_MASK	(0xfffffff0)
 
+static struct emac_platform_data dm644x_evm_emac_pdata = {
+	.phy_mask	= DM644X_EVM_PHY_MASK,
+	.mdio_max_freq	= DM644X_EVM_MDIO_FREQUENCY,
+};
+
 static struct mtd_partition davinci_evm_norflash_partitions[] = {
 	/* bootloader (UBL, U-Boot, etc) in first 5 sectors */
 	{
@@ -441,15 +446,15 @@ static struct memory_accessor *at24_mem_acc;
 
 static void at24_setup(struct memory_accessor *mem_acc, void *context)
 {
-	DECLARE_MAC_BUF(mac_str);
-	char mac_addr[6];
+	char mac_addr[ETH_ALEN];
 
 	at24_mem_acc = mem_acc;
 
 	/* Read MAC addr from EEPROM */
-	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, 6) == 6) {
-		printk(KERN_INFO "Read MAC addr from EEPROM: %s\n",
-		       print_mac(mac_str, mac_addr));
+	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
+	    ETH_ALEN) {
+		printk(KERN_INFO "Read MAC addr from EEPROM: %pM\n", mac_addr);
+		memcpy(dm644x_evm_emac_pdata.mac_addr, mac_addr, ETH_ALEN);
 	}
 }
 
@@ -460,22 +465,6 @@ static struct at24_platform_data eeprom_info = {
 	.setup          = at24_setup,
 };
 
-int dm6446evm_eeprom_read(void *buf, off_t off, size_t count)
-{
-	if (at24_mem_acc)
-		return at24_mem_acc->read(at24_mem_acc, buf, off, count);
-	return -ENODEV;
-}
-EXPORT_SYMBOL(dm6446evm_eeprom_read);
-
-int dm6446evm_eeprom_write(void *buf, off_t off, size_t count)
-{
-	if (at24_mem_acc)
-		return at24_mem_acc->write(at24_mem_acc, buf, off, count);
-	return -ENODEV;
-}
-EXPORT_SYMBOL(dm6446evm_eeprom_write);
-
 /*
  * MSP430 supports RTC, card detection, input from IR remote, and
  * a bit more.  It triggers interrupts on GPIO(7) from pressing
@@ -697,6 +686,8 @@ static __init void davinci_evm_init(void)
 
 	davinci_serial_init(&uart_config);
 
+	dm644x_init_emac(&dm644x_evm_emac_pdata);
+
 	/* Register the fixup for PHY on DaVinci */
 	phy_register_fixup_for_uid(LXT971_PHY_ID, LXT971_PHY_MASK,
 					davinci_phy_fixup);
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 94cb623..aedde3c 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -45,6 +45,16 @@
 #include <mach/psc.h>
 #include <mach/serial.h>
 #include <mach/i2c.h>
+#include <mach/mmc.h>
+#include <mach/emac.h>
+
+#define DM646X_EVM_PHY_MASK		(0x2)
+#define DM646X_EVM_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
+
+static struct emac_platform_data dm646x_evm_emac_pdata = {
+	.phy_mask	= DM646X_EVM_PHY_MASK,
+	.mdio_max_freq	= DM646X_EVM_MDIO_FREQUENCY,
+};
 
 static struct davinci_uart_config uart_config __initdata = {
 	.enabled_uarts = (1 << 0),
@@ -196,14 +206,16 @@ static struct memory_accessor *at24_mem_acc;
 
 static void at24_setup(struct memory_accessor *mem_acc, void *context)
 {
-	char mac_addr[6];
+	char mac_addr[ETH_ALEN];
 
 	at24_mem_acc = mem_acc;
 
 	/* Read MAC addr from EEPROM */
 	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
-	    ETH_ALEN)
+	    ETH_ALEN) {
 		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
+		memcpy(dm646x_evm_emac_pdata.mac_addr, mac_addr, ETH_ALEN);
+	}
 }
 
 static struct at24_platform_data eeprom_info = {
@@ -261,6 +273,7 @@ static __init void evm_init(void)
 {
 	evm_init_i2c();
 	davinci_serial_init(&uart_config);
+	dm646x_init_emac(&dm646x_evm_emac_pdata);
 }
 
 static __init void davinci_dm646x_evm_irq_init(void)
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index c6525e4..913dad9 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -48,11 +48,15 @@
 
 #include <mach/dm644x.h>
 #include <mach/common.h>
+#include <mach/emac.h>
 #include <mach/i2c.h>
 #include <mach/serial.h>
 #include <mach/psc.h>
 #include <mach/mux.h>
 
+#define SFFSDR_PHY_MASK		(0x2)
+#define SFFSDR_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
+
 #define DAVINCI_ASYNC_EMIF_CONTROL_BASE   0x01e00000
 #define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE  0x02000000
 
@@ -104,8 +108,10 @@ static struct platform_device davinci_sffsdr_nandflash_device = {
 	.resource	= davinci_sffsdr_nandflash_resource,
 };
 
-/* Get Ethernet address from kernel boot params */
-static u8 mac_addr[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+static struct emac_platform_data sffsdr_emac_pdata = {
+	.phy_mask	= SFFSDR_PHY_MASK,
+	.mdio_max_freq	= SFFSDR_MDIO_FREQUENCY,
+};
 
 static struct at24_platform_data eeprom_info = {
 	.byte_len	= (64*1024) / 8,
@@ -156,6 +162,7 @@ static __init void davinci_sffsdr_init(void)
 			     ARRAY_SIZE(davinci_sffsdr_devices));
 	sffsdr_init_i2c();
 	davinci_serial_init(&uart_config);
+	dm644x_init_emac(&sffsdr_emac_pdata);
 	setup_usb(0, 0); /* We support only peripheral mode. */
 
 	/* mux VLYNQ pins */
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index d428ef1..0419d57 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -444,6 +444,25 @@ static struct platform_device dm644x_edma_device = {
 };
 
 /*----------------------------------------------------------------------*/
+#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
+
+void dm644x_init_emac(struct emac_platform_data *pdata)
+{
+	pdata->ctrl_reg_offset		= DM644X_EMAC_CNTRL_OFFSET;
+	pdata->ctrl_mod_reg_offset	= DM644X_EMAC_CNTRL_MOD_OFFSET;
+	pdata->ctrl_ram_offset		= DM644X_EMAC_CNTRL_RAM_OFFSET;
+	pdata->mdio_reg_offset		= DM644X_EMAC_MDIO_OFFSET;
+	pdata->ctrl_ram_size		= DM644X_EMAC_CNTRL_RAM_SIZE;
+	pdata->version			= EMAC_VERSION_1;
+	dm644x_emac_device.dev.platform_data = pdata;
+	platform_device_register(&dm644x_emac_device);
+}
+#else
+
+void dm644x_init_emac(struct emac_platform_data *unused) {}
+
+#endif
+
 void __init dm644x_init(void)
 {
 	davinci_clk_init(dm644x_clks);
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 93443a6..975ed06 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -279,6 +279,44 @@ struct davinci_clk dm646x_clks[] = {
 	CLK(NULL, NULL, NULL),
 };
 
+#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
+static struct resource dm646x_emac_resources[] = {
+	{
+		.start	= DM646X_EMAC_BASE,
+		.end	= DM646X_EMAC_BASE + 0x47ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= IRQ_DM646X_EMACRXTHINT,
+		.end	= IRQ_DM646X_EMACRXTHINT,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM646X_EMACRXINT,
+		.end	= IRQ_DM646X_EMACRXINT,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM646X_EMACTXINT,
+		.end	= IRQ_DM646X_EMACTXINT,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM646X_EMACMISCINT,
+		.end	= IRQ_DM646X_EMACMISCINT,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device dm646x_emac_device = {
+	.name		= "davinci_emac",
+	.id		= 1,
+	.num_resources	= ARRAY_SIZE(dm646x_emac_resources),
+	.resource	= dm646x_emac_resources,
+};
+
+#endif
+
 /*
  * Device specific mux setup
  *
@@ -385,6 +423,25 @@ static struct platform_device dm646x_edma_device = {
 
 /*----------------------------------------------------------------------*/
 
+#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
+
+void dm646x_init_emac(struct emac_platform_data *pdata)
+{
+	pdata->ctrl_reg_offset		= DM646X_EMAC_CNTRL_OFFSET;
+	pdata->ctrl_mod_reg_offset	= DM646X_EMAC_CNTRL_MOD_OFFSET;
+	pdata->ctrl_ram_offset		= DM646X_EMAC_CNTRL_RAM_OFFSET;
+	pdata->mdio_reg_offset		= DM646X_EMAC_MDIO_OFFSET;
+	pdata->ctrl_ram_size		= DM646X_EMAC_CNTRL_RAM_SIZE;
+	pdata->version			= EMAC_VERSION_2;
+	dm646x_emac_device.dev.platform_data = pdata;
+	platform_device_register(&dm646x_emac_device);
+}
+#else
+
+void dm646x_init_emac(struct emac_platform_data *unused) {}
+
+#endif
+
 void __init dm646x_init(void)
 {
 	davinci_clk_init(dm646x_clks);
diff --git a/arch/arm/mach-davinci/include/mach/dm644x.h b/arch/arm/mach-davinci/include/mach/dm644x.h
index 3dcb9f4..ace167a 100644
--- a/arch/arm/mach-davinci/include/mach/dm644x.h
+++ b/arch/arm/mach-davinci/include/mach/dm644x.h
@@ -24,6 +24,7 @@
 
 #include <linux/platform_device.h>
 #include <mach/hardware.h>
+#include <mach/emac.h>
 
 #define DM644X_EMAC_BASE		(0x01C80000)
 #define DM644X_EMAC_CNTRL_OFFSET	(0x0000)
@@ -33,5 +34,6 @@
 #define DM644X_EMAC_CNTRL_RAM_SIZE	(0x2000)
 
 void __init dm644x_init(void);
+void dm644x_init_emac(struct emac_platform_data *pdata);
 
 #endif /* __ASM_ARCH_DM644X_H */
diff --git a/arch/arm/mach-davinci/include/mach/dm646x.h b/arch/arm/mach-davinci/include/mach/dm646x.h
index d917939..ea7b28e 100644
--- a/arch/arm/mach-davinci/include/mach/dm646x.h
+++ b/arch/arm/mach-davinci/include/mach/dm646x.h
@@ -12,7 +12,16 @@
 #define __ASM_ARCH_DM646X_H
 
 #include <mach/hardware.h>
+#include <mach/emac.h>
+
+#define DM646X_EMAC_BASE		(0x01C80000)
+#define DM646X_EMAC_CNTRL_OFFSET	(0x0000)
+#define DM646X_EMAC_CNTRL_MOD_OFFSET	(0x1000)
+#define DM646X_EMAC_CNTRL_RAM_OFFSET	(0x2000)
+#define DM646X_EMAC_MDIO_OFFSET		(0x4000)
+#define DM646X_EMAC_CNTRL_RAM_SIZE	(0x2000)
 
 void __init dm646x_init(void);
+void dm646x_init_emac(struct emac_platform_data *pdata);
 
 #endif /* __ASM_ARCH_DM646X_H */
diff --git a/arch/arm/mach-davinci/include/mach/emac.h b/arch/arm/mach-davinci/include/mach/emac.h
new file mode 100644
index 0000000..549fcce
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/emac.h
@@ -0,0 +1,36 @@
+/*
+ * TI DaVinci EMAC platform support
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) Deep Root Systems, LLC. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef _MACH_DAVINCI_EMAC_H
+#define _MACH_DAVINCI_EMAC_H
+
+#include <linux/if_ether.h>
+
+struct emac_platform_data {
+	char mac_addr[ETH_ALEN];
+	u32 ctrl_reg_offset;
+	u32 ctrl_mod_reg_offset;
+	u32 ctrl_ram_offset;
+	u32 mdio_reg_offset;
+	u32 ctrl_ram_size;
+	u32 phy_mask;
+	u32 mdio_max_freq;
+	u8 rmii_en;
+	u8 version;
+};
+
+enum {
+	EMAC_VERSION_1,	/* DM644x */
+	EMAC_VERSION_2,	/* DM646x */
+};
+void davinci_init_emac(struct emac_platform_data *pdata);
+#endif
+
+
-- 
cgit v0.10.2


From f17e04afaff84b5cfd317da29ac4d764908ff833 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 May 2009 15:30:22 +0200
Subject: perf report: Fix ELF symbol parsing

[ Impact: fix DSO symbol output in perf report ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 412dea1..10c13a6 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -159,7 +159,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
-CFLAGS = -g -O2 -Wall
+CFLAGS = -ggdb3 -Wall
 LDFLAGS = -lpthread -lrt -lelf
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 9e59d60..697f960 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -55,34 +55,6 @@ typedef union event_union {
 	struct comm_event comm;
 } event_t;
 
-struct section {
-	struct list_head node;
-	uint64_t	 start;
-	uint64_t	 end;
-	uint64_t	 offset;
-	char		 name[0];
-};
-
-struct section *section__new(uint64_t start, uint64_t size,
-				    uint64_t offset, char *name)
-{
-	struct section *self = malloc(sizeof(*self) + strlen(name) + 1);
-
-	if (self != NULL) {
-		self->start  = start;
-		self->end    = start + size;
-		self->offset = offset;
-		strcpy(self->name, name);
-	}
-
-	return self;
-}
-
-static void section__delete(struct section *self)
-{
-	free(self);
-}
-
 struct symbol {
 	struct rb_node rb_node;
 	uint64_t       start;
@@ -116,7 +88,6 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 
 struct dso {
 	struct list_head node;
-	struct list_head sections;
 	struct rb_root	 syms;
 	char		 name[0];
 };
@@ -127,21 +98,12 @@ static struct dso *dso__new(const char *name)
 
 	if (self != NULL) {
 		strcpy(self->name, name);
-		INIT_LIST_HEAD(&self->sections);
 		self->syms = RB_ROOT;
 	}
 
 	return self;
 }
 
-static void dso__delete_sections(struct dso *self)
-{
-	struct section *pos, *n;
-
-	list_for_each_entry_safe(pos, n, &self->sections, node)
-		section__delete(pos);
-}
-
 static void dso__delete_symbols(struct dso *self)
 {
 	struct symbol *pos;
@@ -156,7 +118,6 @@ static void dso__delete_symbols(struct dso *self)
 
 static void dso__delete(struct dso *self)
 {
-	dso__delete_sections(self);
 	dso__delete_symbols(self);
 	free(self);
 }
@@ -282,9 +243,6 @@ static int dso__load(struct dso *self)
 	if (sec == NULL)
 		goto out_elf_end;
 
-	if (gelf_getshdr(sec, &shdr) == NULL)
-		goto out_elf_end;
-
 	Elf_Data *syms = elf_getdata(sec, NULL);
 	if (syms == NULL)
 		goto out_elf_end;
@@ -302,11 +260,21 @@ static int dso__load(struct dso *self)
 	GElf_Sym sym;
 	uint32_t index;
 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
+		struct symbol *f;
+
 		if (!elf_sym__is_function(&sym))
 			continue;
-		struct symbol *f = symbol__new(sym.st_value, sym.st_size,
-					       elf_sym__name(&sym, symstrs));
-		if (f == NULL)
+
+		sec = elf_getscn(elf, sym.st_shndx);
+		if (!sec)
+			goto out_elf_end;
+
+		gelf_getshdr(sec, &shdr);
+		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
+		f = symbol__new(sym.st_value, sym.st_size,
+				elf_sym__name(&sym, symstrs));
+		if (!f)
 			goto out_elf_end;
 
 		dso__insert_symbol(self, f);
@@ -498,7 +466,7 @@ static size_t symhist__fprintf(struct symhist *self, FILE *fp)
 		ret += fprintf(fp, "%s", self->sym ? self->sym->name: "<unknown>");
 	else
 		ret += fprintf(fp, "%s: %s",
-			       self->dso ? self->dso->name : "<unknown",
+			       self->dso ? self->dso->name : "<unknown>",
 			       self->sym ? self->sym->name : "<unknown>");
 	return ret + fprintf(fp, ": %u\n", self->count);
 }
@@ -714,6 +682,7 @@ more:
 		int show = 0;
 		struct dso *dso = NULL;
 		struct thread *thread = threads__findnew(event->ip.pid);
+		uint64_t ip = event->ip.ip;
 
 		if (thread == NULL) {
 			fprintf(stderr, "problem processing %d event, bailing out\n",
@@ -728,19 +697,20 @@ more:
 		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
 			show = SHOW_USER;
 			level = '.';
-			struct map *map = thread__find_map(thread, event->ip.ip);
-			if (map != NULL)
+			struct map *map = thread__find_map(thread, ip);
+			if (map != NULL) {
 				dso = map->dso;
+				ip -= map->start + map->pgoff;
+			}
 		} else {
 			show = SHOW_HV;
 			level = 'H';
 		}
 
 		if (show & show_mask) {
-			struct symbol *sym = dso__find_symbol(dso, event->ip.ip);
+			struct symbol *sym = dso__find_symbol(dso, ip);
 
-			if (thread__symbol_incnew(thread, sym, event->ip.ip,
-						  dso, level)) {
+			if (thread__symbol_incnew(thread, sym, ip, dso, level)) {
 				fprintf(stderr, "problem incrementing symbol count, bailing out\n");
 				goto done;
 			}
-- 
cgit v0.10.2


From 59d81029b6804c3d5895d07cad77d7dfddc6b5b2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 26 May 2009 11:14:27 -0300
Subject: perf report: Fix kernel symbol resolution

kallsyms have just the symbol start, so we need to read two lines
to get the len.

[ Impact: fix incorrect kernel symbol display in perf report ]

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 697f960..b19b893 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -360,9 +360,17 @@ static int load_kallsyms(void)
 	char *line = NULL;
 	size_t n;
 
+	if (getline(&line, &n, file) < 0 || !line)
+		goto out_delete_dso;
+
+	unsigned long long previous_start;
+	char c, previous_symbf[4096];
+	if (sscanf(line, "%llx %c %s", &previous_start, &c, previous_symbf) != 3)
+		goto out_delete_line;
+
 	while (!feof(file)) {
 		unsigned long long start;
-		char c, symbf[4096];
+		char symbf[4096];
 
 		if (getline(&line, &n, file) < 0)
 			break;
@@ -371,12 +379,18 @@ static int load_kallsyms(void)
 			goto out_delete_dso;
 
 		if (sscanf(line, "%llx %c %s", &start, &c, symbf) == 3) {
-			struct symbol *sym = symbol__new(start, 0x1000000, symbf);
+			if (start > previous_start) {
+				struct symbol *sym = symbol__new(previous_start,
+								 start - previous_start,
+								 previous_symbf);
 
-			if (sym == NULL)
-				goto out_delete_dso;
+				if (sym == NULL)
+					goto out_delete_dso;
 
-			dso__insert_symbol(kernel_dso, sym);
+				dso__insert_symbol(kernel_dso, sym);
+				previous_start = start;
+				strcpy(previous_symbf, symbf);
+			}
 		}
 	}
 
@@ -385,6 +399,8 @@ static int load_kallsyms(void)
 	fclose(file);
 	return 0;
 
+out_delete_line:
+	free(line);
 out_delete_dso:
 	dso__delete(kernel_dso);
 	return -1;
-- 
cgit v0.10.2


From e1b28aab5804aa477c33d19855d6747607a885fd Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 26 May 2009 15:41:27 +0100
Subject: GFS2: Remove lockstruct subdir from gfs2 sysfs files

The lockstruct sub directory contained two entries, both of
which are duplicated elsewhere in the gfs2 sysfs files as
well as being available via /proc/mounts. There is no userland program
using either of them, so this patch removes them.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9f6d48b..94bd59e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -276,25 +276,6 @@ static struct kobj_type gfs2_ktype = {
 	.sysfs_ops     = &gfs2_attr_ops,
 };
 
-/*
- * display struct lm_lockstruct fields
- */
-
-#define LOCKSTRUCT_ATTR(name, fmt)                                          \
-static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
-{                                                                           \
-	return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \
-}                                                                           \
-static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name)
-
-LOCKSTRUCT_ATTR(jid,      "%u\n");
-LOCKSTRUCT_ATTR(first,    "%u\n");
-
-static struct attribute *lockstruct_attrs[] = {
-	&lockstruct_attr_jid.attr,
-	&lockstruct_attr_first.attr,
-	NULL,
-};
 
 /*
  * lock_module. Originally from lock_dlm
@@ -397,6 +378,11 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
 	return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
 }
 
+static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
+{
+	return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid);
+}
+
 #define GDLM_ATTR(_name,_mode,_show,_store) \
 static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
@@ -404,6 +390,7 @@ GDLM_ATTR(proto_name,     0444, proto_name_show,	NULL);
 GDLM_ATTR(block,          0644, block_show,		block_store);
 GDLM_ATTR(withdraw,       0644, withdraw_show,		withdraw_store);
 GDLM_ATTR(id,             0444, lkid_show,		NULL);
+GDLM_ATTR(jid,		  0444, jid_show,		NULL);
 GDLM_ATTR(first,          0444, lkfirst_show,		NULL);
 GDLM_ATTR(first_done,     0444, first_done_show,	NULL);
 GDLM_ATTR(recover,        0200, NULL,			recover_store);
@@ -415,7 +402,7 @@ static struct attribute *lock_module_attrs[] = {
 	&gdlm_attr_block.attr,
 	&gdlm_attr_withdraw.attr,
 	&gdlm_attr_id.attr,
-	&lockstruct_attr_jid.attr,
+	&gdlm_attr_jid.attr,
 	&gdlm_attr_first.attr,
 	&gdlm_attr_first_done.attr,
 	&gdlm_attr_recover.attr,
@@ -558,11 +545,6 @@ static struct attribute *tune_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group lockstruct_group = {
-	.name = "lockstruct",
-	.attrs = lockstruct_attrs,
-};
-
 static struct attribute_group args_group = {
 	.name = "args",
 	.attrs = args_attrs,
@@ -588,13 +570,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	if (error)
 		goto fail;
 
-	error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
-	if (error)
-		goto fail_reg;
-
 	error = sysfs_create_group(&sdp->sd_kobj, &args_group);
 	if (error)
-		goto fail_lockstruct;
+		goto fail_reg;
 
 	error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
 	if (error)
@@ -611,8 +589,6 @@ fail_tune:
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 fail_args:
 	sysfs_remove_group(&sdp->sd_kobj, &args_group);
-fail_lockstruct:
-	sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 fail_reg:
 	kobject_put(&sdp->sd_kobj);
 fail:
@@ -624,7 +600,6 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
 {
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 	sysfs_remove_group(&sdp->sd_kobj, &args_group);
-	sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 	sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
 	kobject_put(&sdp->sd_kobj);
 }
-- 
cgit v0.10.2


From 0945f98b4a844b488d4e42a43f90a3c3aef281af Mon Sep 17 00:00:00 2001
From: "Edgar E. Iglesias" <edgar.iglesias@gmail.com>
Date: Sun, 10 May 2009 14:39:05 +0200
Subject: microblaze: Add audit and seccomp thread flags.

Signed-off-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>

diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index a0401d2..7fac444 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -122,6 +122,8 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLESTEP		4
 #define TIF_IRET		5 /* return with iret */
 #define TIF_MEMDIE		6
+#define TIF_SYSCALL_AUDIT	9       /* syscall auditing active */
+#define TIF_SECCOMP		10      /* secure computing */
 #define TIF_FREEZE		14	/* Freezing for suspend */
 
 /* FIXME change in entry.S */
@@ -138,10 +140,17 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_KERNEL_TRACE	(1 << TIF_KERNEL_TRACE)
 
+/* work to do in syscall trace */
+#define _TIF_WORK_SYSCALL_MASK  (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
+				 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		0x0000FFFE
+
 /* work to do on any return to u-space */
 #define _TIF_ALLWORK_MASK	0x0000FFFF
 
-- 
cgit v0.10.2


From 2f3a499e6b803802880aea1fb8d3b46f1959494f Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 10:14:49 +0200
Subject: microblaze: Fix size of __kernel_mode_t to short

This patches solve problem with inconsistency between
kernel and glibc

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/posix_types.h b/arch/microblaze/include/asm/posix_types.h
index b4df41c..8c758b23 100644
--- a/arch/microblaze/include/asm/posix_types.h
+++ b/arch/microblaze/include/asm/posix_types.h
@@ -16,7 +16,7 @@
  */
 
 typedef unsigned long	__kernel_ino_t;
-typedef unsigned int	__kernel_mode_t;
+typedef unsigned short	__kernel_mode_t;
 typedef unsigned int	__kernel_nlink_t;
 typedef long		__kernel_off_t;
 typedef int		__kernel_pid_t;
-- 
cgit v0.10.2


From 2c65b4665f3f1651a7ef652d86eeb23be95dcdb9 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:07 +0200
Subject: microblaze_mmu_v2: Add mmu_defconfig

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
new file mode 100644
index 0000000..bd0b85e
--- /dev/null
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -0,0 +1,798 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc6
+# Fri May 22 10:02:33 2009
+#
+CONFIG_MICROBLAZE=y
+# CONFIG_SWAP is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_TIME=y
+# CONFIG_GENERIC_TIME_VSYSCALL is not set
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="rootfs.cpio"
+CONFIG_INITRAMFS_ROOT_UID=0
+CONFIG_INITRAMFS_ROOT_GID=0
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_INITRAMFS_COMPRESSION_NONE=y
+# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+# CONFIG_SHMEM is not set
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+# CONFIG_SLOW_WORK is not set
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_BASE_SMALL=1
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# Platform options
+#
+CONFIG_PLATFORM_GENERIC=y
+CONFIG_OPT_LIB_FUNCTION=y
+CONFIG_OPT_LIB_ASM=y
+CONFIG_ALLOW_EDIT_AUTO=y
+
+#
+# Automatic platform settings from Kconfig.auto
+#
+
+#
+# Definitions for MICROBLAZE0
+#
+CONFIG_KERNEL_BASE_ADDR=0x90000000
+CONFIG_XILINX_MICROBLAZE0_FAMILY="virtex5"
+CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
+CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
+CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
+CONFIG_XILINX_MICROBLAZE0_USE_DIV=1
+CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL=2
+CONFIG_XILINX_MICROBLAZE0_USE_FPU=2
+CONFIG_XILINX_MICROBLAZE0_HW_VER="7.10.d"
+
+#
+# Processor type and features
+#
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_MMU=y
+
+#
+# Boot options
+#
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyUL0,115200"
+CONFIG_CMDLINE_FORCE=y
+CONFIG_OF=y
+CONFIG_PROC_DEVICETREE=y
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+
+#
+# Exectuable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+# CONFIG_MTD is not set
+CONFIG_OF_DEVICE=y
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=8192
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_XILINX_SYSACE is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_93CX6 is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+CONFIG_NETDEV_1000=y
+CONFIG_NETDEV_10000=y
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_UARTLITE=y
+CONFIG_SERIAL_UARTLITE_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_XILINX_HWICAP is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+# CONFIG_PROC_KCORE is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+CONFIG_CIFS=y
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+# CONFIG_CIFS_WEAK_PW_HASH is not set
+# CONFIG_CIFS_XATTR is not set
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SYSV68_PARTITION is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+CONFIG_DEBUG_SLAB=y
+# CONFIG_DEBUG_SLAB_LEAK is not set
+CONFIG_DEBUG_SPINLOCK=y
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_SAMPLES is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_HEART_BEAT=y
+CONFIG_DEBUG_BOOTMEM=y
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_HAVE_LMB=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From 5846cc608fd42cd3645ff9f841888832c6ef9b6e Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:09 +0200
Subject: microblaze_mmu_v2: MMU update for startup code

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index 319dc35..e568d6e 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -3,6 +3,26 @@
  * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
+ * MMU code derived from arch/ppc/kernel/head_4xx.S:
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *
  * This file is subject to the terms and conditions of the GNU General Public
  * License. See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -12,6 +32,22 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_MMU
+#include <asm/setup.h> /* COMMAND_LINE_SIZE */
+#include <asm/mmu.h>
+#include <asm/processor.h>
+
+.data
+.global empty_zero_page
+.align 12
+empty_zero_page:
+	.space	4096
+.global swapper_pg_dir
+swapper_pg_dir:
+	.space	4096
+
+#endif /* CONFIG_MMU */
+
 	.text
 ENTRY(_start)
 	mfs	r1, rmsr
@@ -32,6 +68,123 @@ _copy_fdt:
 	addik	r3, r3, -4 /* descrement loop */
 no_fdt_arg:
 
+#ifdef CONFIG_MMU
+
+#ifndef CONFIG_CMDLINE_BOOL
+/*
+ * handling command line
+ * copy command line to __init_end. There is space for storing command line.
+ */
+	or	r6, r0, r0		/* incremment */
+	ori	r4, r0, __init_end	/* load address of command line */
+	tophys(r4,r4)			/* convert to phys address */
+	ori	r3, r0, COMMAND_LINE_SIZE - 1 /* number of loops */
+_copy_command_line:
+	lbu	r7, r5, r6 /* r7=r5+r6 - r5 contain pointer to command line */
+	sb	r7, r4, r6		/* addr[r4+r6]= r7*/
+	addik	r6, r6, 1		/* increment counting */
+	bgtid	r3, _copy_command_line	/* loop for all entries       */
+	addik	r3, r3, -1		/* descrement loop */
+	addik	r5, r4, 0		/* add new space for command line */
+	tovirt(r5,r5)
+#endif /* CONFIG_CMDLINE_BOOL */
+
+#ifdef NOT_COMPILE
+/* save bram context */
+	or	r6, r0, r0				/* incremment */
+	ori	r4, r0, TOPHYS(_bram_load_start)	/* save bram context */
+	ori	r3, r0, (LMB_SIZE - 4)
+_copy_bram:
+	lw	r7, r0, r6		/* r7 = r0 + r6 */
+	sw	r7, r4, r6		/* addr[r4 + r6] = r7*/
+	addik	r6, r6, 4		/* increment counting */
+	bgtid	r3, _copy_bram		/* loop for all entries */
+	addik	r3, r3, -4		/* descrement loop */
+#endif
+	/* We have to turn on the MMU right away. */
+
+	/*
+	 * Set up the initial MMU state so we can do the first level of
+	 * kernel initialization.  This maps the first 16 MBytes of memory 1:1
+	 * virtual to physical.
+	 */
+	nop
+	addik	r3, r0, 63		/* Invalidate all TLB entries */
+_invalidate:
+	mts	rtlbx, r3
+	mts	rtlbhi, r0			/* flush: ensure V is clear   */
+	bgtid	r3, _invalidate		/* loop for all entries       */
+	addik	r3, r3, -1
+	/* sync */
+
+	/*
+	 * We should still be executing code at physical address area
+	 * RAM_BASEADDR at this point. However, kernel code is at
+	 * a virtual address. So, set up a TLB mapping to cover this once
+	 * translation is enabled.
+	 */
+
+	addik	r3,r0, CONFIG_KERNEL_START /* Load the kernel virtual address */
+	tophys(r4,r3)			/* Load the kernel physical address */
+
+	mts	rpid,r0			/* Load the kernel PID */
+	nop
+	bri	4
+
+	/*
+	 * Configure and load two entries into TLB slots 0 and 1.
+	 * In case we are pinning TLBs, these are reserved in by the
+	 * other TLB functions.  If not reserving, then it doesn't
+	 * matter where they are loaded.
+	 */
+	andi	r4,r4,0xfffffc00	/* Mask off the real page number */
+	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
+
+	andi	r3,r3,0xfffffc00	/* Mask off the effective page number */
+	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
+
+	mts     rtlbx,r0		/* TLB slow 0 */
+
+	mts	rtlblo,r4		/* Load the data portion of the entry */
+	mts	rtlbhi,r3		/* Load the tag portion of the entry */
+
+	addik	r4, r4, 0x01000000	/* Map next 16 M entries */
+	addik	r3, r3, 0x01000000
+
+	ori	r6,r0,1			/* TLB slot 1 */
+	mts     rtlbx,r6
+
+	mts	rtlblo,r4		/* Load the data portion of the entry */
+	mts	rtlbhi,r3		/* Load the tag portion of the entry */
+
+	/*
+	 * Load a TLB entry for LMB, since we need access to
+	 * the exception vectors, using a 4k real==virtual mapping.
+	 */
+	ori	r6,r0,3			/* TLB slot 3 */
+	mts     rtlbx,r6
+
+	ori	r4,r0,(TLB_WR | TLB_EX)
+	ori	r3,r0,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+	mts	rtlblo,r4		/* Load the data portion of the entry */
+	mts	rtlbhi,r3		/* Load the tag portion of the entry */
+
+	/*
+	 * We now have the lower 16 Meg of RAM mapped into TLB entries, and the
+	 * caches ready to work.
+	 */
+turn_on_mmu:
+	ori	r15,r0,start_here
+	ori	r4,r0,MSR_KERNEL_VMS
+	mts	rmsr,r4
+	nop
+	rted	r15,0			/* enables MMU */
+	nop
+
+start_here:
+#endif /* CONFIG_MMU */
+
 	/* Initialize small data anchors */
 	la	r13, r0, _KERNEL_SDA_BASE_
 	la	r2, r0, _KERNEL_SDA2_BASE_
@@ -51,6 +204,43 @@ no_fdt_arg:
 	brald	r15, r8
 	nop
 
+#ifndef CONFIG_MMU
 	la	r15, r0, machine_halt
 	braid	start_kernel
 	nop
+#else
+	/*
+	 * Initialize the MMU.
+	 */
+	bralid	r15, mmu_init
+	nop
+
+	/* Go back to running unmapped so we can load up new values
+	 * and change to using our exception vectors.
+	 * On the MicroBlaze, all we invalidate the used TLB entries to clear
+	 * the old 16M byte TLB mappings.
+	 */
+	ori	r15,r0,TOPHYS(kernel_load_context)
+	ori	r4,r0,MSR_KERNEL
+	mts	rmsr,r4
+	nop
+	bri	4
+	rted	r15,0
+	nop
+
+	/* Load up the kernel context */
+kernel_load_context:
+	# Keep entry 0 and 1 valid. Entry 3 mapped to LMB can go away.
+	ori	r5,r0,3
+	mts     rtlbx,r5
+	nop
+	mts	rtlbhi,r0
+	nop
+	addi	r15, r0, machine_halt
+	ori	r17, r0, start_kernel
+	ori	r4, r0, MSR_KERNEL_VMS
+	mts	rmsr, r4
+	nop
+	rted	r17, 0		/* enable MMU and jump to start_kernel */
+	nop
+#endif /* CONFIG_MMU */
-- 
cgit v0.10.2


From a43acfbbc8653f70b8da4c64ec534fb45065a2ee Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:10 +0200
Subject: microblaze_mmu_v2: Alocate TLB for early console

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 9b98e8e..27f8daf 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2007-2008 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -18,7 +19,6 @@
 extern unsigned int boot_cpuid; /* move to smp.h */
 
 extern char cmd_line[COMMAND_LINE_SIZE];
-#  endif/* __KERNEL__ */
 
 void early_printk(const char *fmt, ...);
 
@@ -30,6 +30,11 @@ void setup_heartbeat(void);
 
 unsigned long long sched_clock(void);
 
+#   ifdef CONFIG_MMU
+extern void mmu_reset(void);
+extern void early_console_reg_tlb_alloc(unsigned int addr);
+#   endif /* CONFIG_MMU */
+
 void time_init(void);
 void init_IRQ(void);
 void machine_early_init(const char *cmdline, unsigned int ram,
@@ -40,5 +45,6 @@ void machine_shutdown(void);
 void machine_halt(void);
 void machine_power_off(void);
 
+#  endif/* __KERNEL__ */
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SETUP_H */
diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c
index 4b0f0fd..7de8492 100644
--- a/arch/microblaze/kernel/early_printk.c
+++ b/arch/microblaze/kernel/early_printk.c
@@ -87,6 +87,9 @@ int __init setup_early_printk(char *opt)
 	base_addr = early_uartlite_console();
 	if (base_addr) {
 		early_console_initialized = 1;
+#ifdef CONFIG_MMU
+		early_console_reg_tlb_alloc(base_addr);
+#endif
 		early_printk("early_printk_console is enabled at 0x%08x\n",
 							base_addr);
 
-- 
cgit v0.10.2


From 3f50425c0c9815411b760e36b48e18958819a304 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:10 +0200
Subject: microblaze_mmu_v2: TLB low level code

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/misc.S b/arch/microblaze/kernel/misc.S
new file mode 100644
index 0000000..d623efc
--- /dev/null
+++ b/arch/microblaze/kernel/misc.S
@@ -0,0 +1,120 @@
+/*
+ * Miscellaneous low-level MMU functions.
+ *
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ * Derived from arch/ppc/kernel/misc.S
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of this
+ * archive for more details.
+ */
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <linux/errno.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+
+	.text
+/*
+ * Flush MMU TLB
+ *
+ * We avoid flushing the pinned 0, 1 and possibly 2 entries.
+ */
+.globl _tlbia;
+.align 4;
+_tlbia:
+	addik	r12, r0, 63 /* flush all entries (63 - 3) */
+	/* isync */
+_tlbia_1:
+	mts	rtlbx, r12
+	nop
+	mts	rtlbhi, r0 /* flush: ensure V is clear */
+	nop
+	addik	r11, r12, -2
+	bneid	r11, _tlbia_1 /* loop for all entries */
+	addik	r12, r12, -1
+	/* sync */
+	rtsd	r15, 8
+	nop
+
+/*
+ * Flush MMU TLB for a particular address (in r5)
+ */
+.globl _tlbie;
+.align 4;
+_tlbie:
+	mts	rtlbsx, r5 /* look up the address in TLB */
+	nop
+	mfs	r12, rtlbx /* Retrieve index */
+	nop
+	blti	r12, _tlbie_1 /* Check if found */
+	mts	rtlbhi, r0 /* flush: ensure V is clear */
+	nop
+_tlbie_1:
+	rtsd	r15, 8
+	nop
+
+/*
+ * Allocate TLB entry for early console
+ */
+.globl early_console_reg_tlb_alloc;
+.align 4;
+early_console_reg_tlb_alloc:
+	/*
+	 * Load a TLB entry for the UART, so that microblaze_progress() can use
+	 * the UARTs nice and early.  We use a 4k real==virtual mapping.
+	 */
+	ori	r4, r0, 2
+	mts	rtlbx, r4 /* TLB slot 2 */
+
+	or	r4,r5,r0
+	andi	r4,r4,0xfffff000
+	ori	r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
+
+	andi	r5,r5,0xfffff000
+	ori	r5,r5,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+	mts	rtlblo,r4 /* Load the data portion of the entry */
+	nop
+	mts	rtlbhi,r5 /* Load the tag portion of the entry */
+	nop
+	rtsd	r15, 8
+	nop
+
+/*
+ * Copy a whole page (4096 bytes).
+ */
+#define COPY_16_BYTES		\
+	lwi	r7, r6, 0;	\
+	lwi	r8, r6, 4;	\
+	lwi	r9, r6, 8;	\
+	lwi	r10, r6, 12;	\
+	swi	r7, r5, 0;	\
+	swi	r8, r5, 4;	\
+	swi	r9, r5, 8;	\
+	swi	r10, r5, 12
+
+
+/* FIXME DCACHE_LINE_BYTES (CONFIG_XILINX_MICROBLAZE0_DCACHE_LINE_LEN * 4)*/
+#define DCACHE_LINE_BYTES (4 * 4)
+
+.globl copy_page;
+.align 4;
+copy_page:
+	ori	r11, r0, (PAGE_SIZE/DCACHE_LINE_BYTES) - 1
+_copy_page_loop:
+	COPY_16_BYTES
+#if DCACHE_LINE_BYTES >= 32
+	COPY_16_BYTES
+#endif
+	addik	r6, r6, DCACHE_LINE_BYTES
+	addik	r5, r5, DCACHE_LINE_BYTES
+	bneid	r11, _copy_page_loop
+	addik	r11, r11, -1
+	rtsd	r15, 8
+	nop
-- 
cgit v0.10.2


From 4dc60832f516c4ccfd1e6aa07d92cc0f6d21bacb Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:12 +0200
Subject: microblaze_mmu_v2: MMU initialization

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index af789e2..b5a701c 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -23,18 +23,26 @@
 #include <asm/sections.h>
 #include <asm/tlb.h>
 
+#ifndef CONFIG_MMU
 unsigned int __page_offset;
 EXPORT_SYMBOL(__page_offset);
 
+#else
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+int mem_init_done;
+static int init_bootmem_done;
+#endif /* CONFIG_MMU */
+
 char *klimit = _end;
 
 /*
  * Initialize the bootmem system and give it all the memory we
  * have available.
  */
-unsigned int memory_start;
-unsigned int memory_end; /* due to mm/nommu.c */
-unsigned int memory_size;
+unsigned long memory_start;
+unsigned long memory_end; /* due to mm/nommu.c */
+unsigned long memory_size;
 
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
@@ -59,6 +67,7 @@ void __init setup_memory(void)
 {
 	int i;
 	unsigned long map_size;
+#ifndef CONFIG_MMU
 	u32 kernel_align_start, kernel_align_size;
 
 	/* Find main memory where is the kernel */
@@ -91,6 +100,7 @@ void __init setup_memory(void)
 		__func__, kernel_align_start, kernel_align_start
 			+ kernel_align_size, kernel_align_size);
 
+#endif
 	/*
 	 * Kernel:
 	 * start: base phys address of kernel - page align
@@ -119,9 +129,13 @@ void __init setup_memory(void)
 	 * for 4GB of memory, using 4kB pages), plus 1 page
 	 * (in case the address isn't page-aligned).
 	 */
+#ifndef CONFIG_MMU
 	map_size = init_bootmem_node(NODE_DATA(0), PFN_UP(TOPHYS((u32)_end)),
 					min_low_pfn, max_low_pfn);
-
+#else
+	map_size = init_bootmem_node(&contig_page_data,
+		PFN_UP(TOPHYS((u32)_end)), min_low_pfn, max_low_pfn);
+#endif
 	lmb_reserve(PFN_UP(TOPHYS((u32)_end)) << PAGE_SHIFT, map_size);
 
 	/* free bootmem is whole main memory */
@@ -135,6 +149,9 @@ void __init setup_memory(void)
 		reserve_bootmem(lmb.reserved.region[i].base,
 			lmb_size_bytes(&lmb.reserved, i) - 1, BOOTMEM_DEFAULT);
 	}
+#ifdef CONFIG_MMU
+	init_bootmem_done = 1;
+#endif
 	paging_init();
 }
 
@@ -189,8 +206,12 @@ void __init mem_init(void)
 	printk(KERN_INFO "Memory: %luk/%luk available\n",
 	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 	       num_physpages << (PAGE_SHIFT-10));
+#ifdef CONFIG_MMU
+	mem_init_done = 1;
+#endif
 }
 
+#ifndef CONFIG_MMU
 /* Check against bounds of physical memory */
 int ___range_ok(unsigned long addr, unsigned long size)
 {
@@ -198,3 +219,132 @@ int ___range_ok(unsigned long addr, unsigned long size)
 		((addr + size) > memory_end));
 }
 EXPORT_SYMBOL(___range_ok);
+
+#else
+int page_is_ram(unsigned long pfn)
+{
+	return pfn < max_low_pfn;
+}
+
+/*
+ * Check for command-line options that affect what MMU_init will do.
+ */
+static void mm_cmdline_setup(void)
+{
+	unsigned long maxmem = 0;
+	char *p = cmd_line;
+
+	/* Look for mem= option on command line */
+	p = strstr(cmd_line, "mem=");
+	if (p) {
+		p += 4;
+		maxmem = memparse(p, &p);
+		if (maxmem && memory_size > maxmem) {
+			memory_size = maxmem;
+			memory_end = memory_start + memory_size;
+			lmb.memory.region[0].size = memory_size;
+		}
+	}
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+static void __init mmu_init_hw(void)
+{
+	/*
+	 * The Zone Protection Register (ZPR) defines how protection will
+	 * be applied to every page which is a member of a given zone. At
+	 * present, we utilize only two of the zones.
+	 * The zone index bits (of ZSEL) in the PTE are used for software
+	 * indicators, except the LSB.  For user access, zone 1 is used,
+	 * for kernel access, zone 0 is used.  We set all but zone 1
+	 * to zero, allowing only kernel access as indicated in the PTE.
+	 * For zone 1, we set a 01 binary (a value of 10 will not work)
+	 * to allow user access as indicated in the PTE.  This also allows
+	 * kernel access as indicated in the PTE.
+	 */
+	__asm__ __volatile__ ("ori r11, r0, 0x10000000;" \
+			"mts rzpr, r11;"
+			: : : "r11");
+}
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+
+/* called from head.S */
+asmlinkage void __init mmu_init(void)
+{
+	unsigned int kstart, ksize;
+
+	if (!lmb.reserved.cnt) {
+		printk(KERN_EMERG "Error memory count\n");
+		machine_restart(NULL);
+	}
+
+	if ((u32) lmb.memory.region[0].size < 0x1000000) {
+		printk(KERN_EMERG "Memory must be greater than 16MB\n");
+		machine_restart(NULL);
+	}
+	/* Find main memory where the kernel is */
+	memory_start = (u32) lmb.memory.region[0].base;
+	memory_end = (u32) lmb.memory.region[0].base +
+				(u32) lmb.memory.region[0].size;
+	memory_size = memory_end - memory_start;
+
+	mm_cmdline_setup(); /* FIXME parse args from command line - not used */
+
+	/*
+	 * Map out the kernel text/data/bss from the available physical
+	 * memory.
+	 */
+	kstart = __pa(CONFIG_KERNEL_START); /* kernel start */
+	/* kernel size */
+	ksize = PAGE_ALIGN(((u32)_end - (u32)CONFIG_KERNEL_START));
+	lmb_reserve(kstart, ksize);
+
+#if defined(CONFIG_BLK_DEV_INITRD)
+	/* Remove the init RAM disk from the available memory. */
+/*	if (initrd_start) {
+		mem_pieces_remove(&phys_avail, __pa(initrd_start),
+				  initrd_end - initrd_start, 1);
+	}*/
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+	/* Initialize the MMU hardware */
+	mmu_init_hw();
+
+	/* Map in all of RAM starting at CONFIG_KERNEL_START */
+	mapin_ram();
+
+#ifdef HIGHMEM_START_BOOL
+	ioremap_base = HIGHMEM_START;
+#else
+	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
+#endif /* CONFIG_HIGHMEM */
+	ioremap_bot = ioremap_base;
+
+	/* Initialize the context management stuff */
+	mmu_context_init();
+}
+
+/* This is only called until mem_init is done. */
+void __init *early_get_page(void)
+{
+	void *p;
+	if (init_bootmem_done) {
+		p = alloc_bootmem_pages(PAGE_SIZE);
+	} else {
+		/*
+		 * Mem start + 32MB -> here is limit
+		 * because of mem mapping from head.S
+		 */
+		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+					memory_start + 0x2000000));
+	}
+	return p;
+}
+#endif /* CONFIG_MMU */
-- 
cgit v0.10.2


From 23098649e0f8861ea69fac62cf6ba721b83065dc Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:12 +0200
Subject: microblaze_mmu_v2: mmu.h update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/mmu.h b/arch/microblaze/include/asm/mmu.h
index 0e0431d..66cad6a 100644
--- a/arch/microblaze/include/asm/mmu.h
+++ b/arch/microblaze/include/asm/mmu.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,11 +11,109 @@
 #ifndef _ASM_MICROBLAZE_MMU_H
 #define _ASM_MICROBLAZE_MMU_H
 
-#ifndef __ASSEMBLY__
+# ifndef CONFIG_MMU
+#  ifndef __ASSEMBLY__
 typedef struct {
 	struct vm_list_struct	*vmlist;
 	unsigned long		end_brk;
 } mm_context_t;
-#endif /* __ASSEMBLY__ */
+#  endif /* __ASSEMBLY__ */
+# else /* CONFIG_MMU */
+#  ifdef __KERNEL__
+#   ifndef __ASSEMBLY__
 
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
+
+/* Hardware Page Table Entry */
+typedef struct _PTE {
+	unsigned long    v:1;	/* Entry is valid */
+	unsigned long vsid:24;	/* Virtual segment identifier */
+	unsigned long    h:1;	/* Hash algorithm indicator */
+	unsigned long  api:6;	/* Abbreviated page index */
+	unsigned long  rpn:20;	/* Real (physical) page number */
+	unsigned long     :3;	/* Unused */
+	unsigned long    r:1;	/* Referenced */
+	unsigned long    c:1;	/* Changed */
+	unsigned long    w:1;	/* Write-thru cache mode */
+	unsigned long    i:1;	/* Cache inhibited */
+	unsigned long    m:1;	/* Memory coherence */
+	unsigned long    g:1;	/* Guarded */
+	unsigned long     :1;	/* Unused */
+	unsigned long   pp:2;	/* Page protection */
+} PTE;
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+#  define PP_RWXX	0 /* Supervisor read/write, User none */
+#  define PP_RWRX	1 /* Supervisor read/write, User read */
+#  define PP_RWRW	2 /* Supervisor read/write, User read/write */
+#  define PP_RXRX	3 /* Supervisor read,       User read */
+
+/* Segment Register */
+typedef struct _SEGREG {
+	unsigned long    t:1;	/* Normal or I/O  type */
+	unsigned long   ks:1;	/* Supervisor 'key' (normally 0) */
+	unsigned long   kp:1;	/* User 'key' (normally 1) */
+	unsigned long    n:1;	/* No-execute */
+	unsigned long     :4;	/* Unused */
+	unsigned long vsid:24;	/* Virtual Segment Identifier */
+} SEGREG;
+
+extern void _tlbie(unsigned long va);	/* invalidate a TLB entry */
+extern void _tlbia(void);		/* invalidate all TLB entries */
+#   endif /* __ASSEMBLY__ */
+
+/*
+ * The MicroBlaze processor has a TLB architecture identical to PPC-40x. The
+ * instruction and data sides share a unified, 64-entry, semi-associative
+ * TLB which is maintained totally under software control. In addition, the
+ * instruction side has a hardware-managed, 2,4, or 8-entry, fully-associative
+ * TLB which serves as a first level to the shared TLB. These two TLBs are
+ * known as the UTLB and ITLB, respectively.
+ */
+
+#  define MICROBLAZE_TLB_SIZE 64
+
+/*
+ * TLB entries are defined by a "high" tag portion and a "low" data
+ * portion. The data portion is 32-bits.
+ *
+ * TLB entries are managed entirely under software control by reading,
+ * writing, and searching using the MTS and MFS instructions.
+ */
+
+#  define TLB_LO		1
+#  define TLB_HI		0
+#  define TLB_DATA		TLB_LO
+#  define TLB_TAG		TLB_HI
+
+/* Tag portion */
+#  define TLB_EPN_MASK		0xFFFFFC00 /* Effective Page Number */
+#  define TLB_PAGESZ_MASK	0x00000380
+#  define TLB_PAGESZ(x)		(((x) & 0x7) << 7)
+#  define PAGESZ_1K		0
+#  define PAGESZ_4K		1
+#  define PAGESZ_16K		2
+#  define PAGESZ_64K		3
+#  define PAGESZ_256K		4
+#  define PAGESZ_1M		5
+#  define PAGESZ_4M		6
+#  define PAGESZ_16M		7
+#  define TLB_VALID		0x00000040 /* Entry is valid */
+
+/* Data portion */
+#  define TLB_RPN_MASK		0xFFFFFC00 /* Real Page Number */
+#  define TLB_PERM_MASK		0x00000300
+#  define TLB_EX		0x00000200 /* Instruction execution allowed */
+#  define TLB_WR		0x00000100 /* Writes permitted */
+#  define TLB_ZSEL_MASK		0x000000F0
+#  define TLB_ZSEL(x)		(((x) & 0xF) << 4)
+#  define TLB_ATTR_MASK		0x0000000F
+#  define TLB_W			0x00000008 /* Caching is write-through */
+#  define TLB_I			0x00000004 /* Caching is inhibited */
+#  define TLB_M			0x00000002 /* Memory is coherent */
+#  define TLB_G			0x00000001 /* Memory is guarded from prefetch */
+
+#  endif /* __KERNEL__ */
+# endif /* CONFIG_MMU */
 #endif /* _ASM_MICROBLAZE_MMU_H */
-- 
cgit v0.10.2


From 5de96121009f4de43ffeb7160109e23132278c07 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:13 +0200
Subject: microblaze_mmu_v2: Page fault handling high level - fault.c

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
new file mode 100644
index 0000000..5e67cd1
--- /dev/null
+++ b/arch/microblaze/mm/fault.c
@@ -0,0 +1,304 @@
+/*
+ *  arch/microblaze/mm/fault.c
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from "arch/ppc/mm/fault.c"
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <linux/uaccess.h>
+#include <asm/exceptions.h>
+
+#if defined(CONFIG_KGDB)
+int debugger_kernel_faults = 1;
+#endif
+
+static unsigned long pte_misses;	/* updated by do_page_fault() */
+static unsigned long pte_errors;	/* updated by do_page_fault() */
+
+/*
+ * Check whether the instruction at regs->pc is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+	unsigned int inst;
+
+	if (get_user(inst, (unsigned int *)regs->pc))
+		return 0;
+	/* check for 1 in the rD field */
+	if (((inst >> 21) & 0x1f) != 1)
+		return 0;
+	/* check for store opcodes */
+	if ((inst & 0xd0000000) == 0xd0000000)
+		return 1;
+	return 0;
+}
+
+
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from do_page_fault above and from some of the procedures
+ * in traps.c.
+ */
+static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+	const struct exception_table_entry *fixup;
+/* MS: no context */
+	/* Are we prepared to handle this fault?  */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return;
+	}
+
+	/* kernel has accessed a bad area */
+#if defined(CONFIG_KGDB)
+	if (debugger_kernel_faults)
+		debugger(regs);
+#endif
+	die("kernel access of bad area", regs, sig);
+}
+
+/*
+ * The error_code parameter is ESR for a data fault,
+ * 0 for an instruction fault.
+ */
+void do_page_fault(struct pt_regs *regs, unsigned long address,
+		   unsigned long error_code)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	siginfo_t info;
+	int code = SEGV_MAPERR;
+	int is_write = error_code & ESR_S;
+	int fault;
+
+	regs->ear = address;
+	regs->esr = error_code;
+
+	/* On a kernel SLB miss we can only check for a valid exception entry */
+	if (kernel_mode(regs) && (address >= TASK_SIZE)) {
+		printk(KERN_WARNING "kernel task_size exceed");
+		_exception(SIGSEGV, regs, code, address);
+	}
+
+	/* for instr TLB miss and instr storage exception ESR_S is undefined */
+	if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
+		is_write = 0;
+
+#if defined(CONFIG_KGDB)
+	if (debugger_fault_handler && regs->trap == 0x300) {
+		debugger_fault_handler(regs);
+		return;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (in_atomic() || mm == NULL) {
+		/* FIXME */
+		if (kernel_mode(regs)) {
+			printk(KERN_EMERG
+				"Page fault in kernel mode - Oooou!!! pid %d\n",
+				current->pid);
+			_exception(SIGSEGV, regs, code, address);
+			return;
+		}
+		/* in_atomic() in user mode is really bad,
+		   as is current->mm == NULL. */
+		printk(KERN_EMERG "Page fault in user mode with "
+		       "in_atomic(), mm = %p\n", mm);
+		printk(KERN_EMERG "r15 = %lx  MSR = %lx\n",
+		       regs->r15, regs->msr);
+		die("Weird page fault", regs, SIGSEGV);
+	}
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
+	 * erroneous fault occurring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibility of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (kernel_mode(regs) && !search_exception_tables(regs->pc))
+			goto bad_area_nosemaphore;
+
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+
+	if (vma->vm_start <= address)
+		goto good_area;
+
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	if (!is_write)
+		goto bad_area;
+
+	/*
+	 * N.B. The ABI allows programs to access up to
+	 * a few hundred bytes below the stack pointer (TBD).
+	 * The kernel signal delivery code writes up to about 1.5kB
+	 * below the stack pointer (r1) before decrementing it.
+	 * The exec code can write slightly over 640kB to the stack
+	 * before setting the user r1.  Thus we allow the stack to
+	 * expand to 1MB without further checks.
+	 */
+	if (address + 0x100000 < vma->vm_end) {
+
+		/* get user regs even if this fault is in kernel mode */
+		struct pt_regs *uregs = current->thread.regs;
+		if (uregs == NULL)
+			goto bad_area;
+
+		/*
+		 * A user-mode access to an address a long way below
+		 * the stack pointer is only valid if the instruction
+		 * is one which would update the stack pointer to the
+		 * address accessed if the instruction completed,
+		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+		 * (or the byte, halfword, float or double forms).
+		 *
+		 * If we don't check this then any write to the area
+		 * between the last mapped region and the stack will
+		 * expand the stack rather than segfaulting.
+		 */
+		if (address + 2048 < uregs->r1
+			&& (kernel_mode(regs) || !store_updates_sp(regs)))
+				goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+good_area:
+	code = SEGV_ACCERR;
+
+	/* a write */
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	/* a read */
+	} else {
+		/* protection fault */
+		if (error_code & 0x08000000)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+survive:
+	fault = handle_mm_fault(mm, vma, address, is_write);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (fault & VM_FAULT_MAJOR)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+	up_read(&mm->mmap_sem);
+	/*
+	 * keep track of tlb+htab misses that are good addrs but
+	 * just need pte's created via handle_mm_fault()
+	 * -- Cort
+	 */
+	pte_misses++;
+	return;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	pte_errors++;
+
+	/* User mode accesses cause a SIGSEGV */
+	if (user_mode(regs)) {
+		_exception(SIGSEGV, regs, code, address);
+/*		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code = code;
+		info.si_addr = (void *) address;
+		force_sig_info(SIGSEGV, &info, current);*/
+		return;
+	}
+
+	bad_page_fault(regs, address, SIGSEGV);
+	return;
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	up_read(&mm->mmap_sem);
+	printk(KERN_WARNING "VM: killing process %s\n", current->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	bad_page_fault(regs, address, SIGKILL);
+	return;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	if (user_mode(regs)) {
+		info.si_signo = SIGBUS;
+		info.si_errno = 0;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGBUS, &info, current);
+		return;
+	}
+	bad_page_fault(regs, address, SIGBUS);
+}
-- 
cgit v0.10.2


From fc34d1eb1ca09d3450508e2cf9cf511364c2c460 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:14 +0200
Subject: microblaze_mmu_v2: Context handling - mmu_context.c/h

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/mmu_context.h b/arch/microblaze/include/asm/mmu_context.h
index 150ca01..385fed1 100644
--- a/arch/microblaze/include/asm/mmu_context.h
+++ b/arch/microblaze/include/asm/mmu_context.h
@@ -1,21 +1,5 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
-#define _ASM_MICROBLAZE_MMU_CONTEXT_H
-
-# define init_new_context(tsk, mm)		({ 0; })
-
-# define enter_lazy_tlb(mm, tsk)		do {} while (0)
-# define change_mm_context(old, ctx, _pml4)	do {} while (0)
-# define destroy_context(mm)			do {} while (0)
-# define deactivate_mm(tsk, mm)			do {} while (0)
-# define switch_mm(prev, next, tsk)		do {} while (0)
-# define activate_mm(prev, next)		do {} while (0)
-
-#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */
+#ifdef CONFIG_MMU
+# include "mmu_context_mm.h"
+#else
+# include "mmu_context_no.h"
+#endif
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
new file mode 100644
index 0000000..3e5c254
--- /dev/null
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2006 Atmark Techno, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
+#define _ASM_MICROBLAZE_MMU_CONTEXT_H
+
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <asm/mmu.h>
+#include <asm-generic/mm_hooks.h>
+
+# ifdef __KERNEL__
+/*
+ * This function defines the mapping from contexts to VSIDs (virtual
+ * segment IDs).  We use a skew on both the context and the high 4 bits
+ * of the 32-bit virtual address (the "effective segment ID") in order
+ * to spread out the entries in the MMU hash table.
+ */
+# define CTX_TO_VSID(ctx, va)	(((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
+				 & 0xffffff)
+
+/*
+   MicroBlaze has 256 contexts, so we can just rotate through these
+   as a way of "switching" contexts.  If the TID of the TLB is zero,
+   the PID/TID comparison is disabled, so we can use a TID of zero
+   to represent all kernel pages as shared among all contexts.
+ */
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+# define NO_CONTEXT	256
+# define LAST_CONTEXT	255
+# define FIRST_CONTEXT	1
+
+/*
+ * Set the current MMU context.
+ * This is done byloading up the segment registers for the user part of the
+ * address space.
+ *
+ * Since the PGD is immediately available, it is much faster to simply
+ * pass this along as a second parameter, which is required for 8xx and
+ * can be used for debugging on all processors (if you happen to have
+ * an Abatron).
+ */
+extern void set_context(mm_context_t context, pgd_t *pgd);
+
+/*
+ * Bitmap of contexts in use.
+ * The size of this bitmap is LAST_CONTEXT + 1 bits.
+ */
+extern unsigned long context_map[];
+
+/*
+ * This caches the next context number that we expect to be free.
+ * Its use is an optimization only, we can't rely on this context
+ * number to be free, but it usually will be.
+ */
+extern mm_context_t next_mmu_context;
+
+/*
+ * Since we don't have sufficient contexts to give one to every task
+ * that could be in the system, we need to be able to steal contexts.
+ * These variables support that.
+ */
+extern atomic_t nr_free_contexts;
+extern struct mm_struct *context_mm[LAST_CONTEXT+1];
+extern void steal_context(void);
+
+/*
+ * Get a new mmu context for the address space described by `mm'.
+ */
+static inline void get_mmu_context(struct mm_struct *mm)
+{
+	mm_context_t ctx;
+
+	if (mm->context != NO_CONTEXT)
+		return;
+	while (atomic_dec_if_positive(&nr_free_contexts) < 0)
+		steal_context();
+	ctx = next_mmu_context;
+	while (test_and_set_bit(ctx, context_map)) {
+		ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx);
+		if (ctx > LAST_CONTEXT)
+			ctx = 0;
+	}
+	next_mmu_context = (ctx + 1) & LAST_CONTEXT;
+	mm->context = ctx;
+	context_mm[ctx] = mm;
+}
+
+/*
+ * Set up the context for a new address space.
+ */
+# define init_new_context(tsk, mm)	(((mm)->context = NO_CONTEXT), 0)
+
+/*
+ * We're finished using the context for an address space.
+ */
+static inline void destroy_context(struct mm_struct *mm)
+{
+	if (mm->context != NO_CONTEXT) {
+		clear_bit(mm->context, context_map);
+		mm->context = NO_CONTEXT;
+		atomic_inc(&nr_free_contexts);
+	}
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	tsk->thread.pgdir = next->pgd;
+	get_mmu_context(next);
+	set_context(next->context, next->pgd);
+}
+
+/*
+ * After we have set current->mm to a new value, this activates
+ * the context for the new mm so we see the new mappings.
+ */
+static inline void activate_mm(struct mm_struct *active_mm,
+			struct mm_struct *mm)
+{
+	current->thread.pgdir = mm->pgd;
+	get_mmu_context(mm);
+	set_context(mm->context, mm->pgd);
+}
+
+extern void mmu_context_init(void);
+
+# endif /* __KERNEL__ */
+#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */
diff --git a/arch/microblaze/include/asm/mmu_context_no.h b/arch/microblaze/include/asm/mmu_context_no.h
new file mode 100644
index 0000000..ba55671
--- /dev/null
+++ b/arch/microblaze/include/asm/mmu_context_no.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2006 Atmark Techno, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
+#define _ASM_MICROBLAZE_MMU_CONTEXT_H
+
+# define init_new_context(tsk, mm)		({ 0; })
+
+# define enter_lazy_tlb(mm, tsk)		do {} while (0)
+# define change_mm_context(old, ctx, _pml4)	do {} while (0)
+# define destroy_context(mm)			do {} while (0)
+# define deactivate_mm(tsk, mm)			do {} while (0)
+# define switch_mm(prev, next, tsk)		do {} while (0)
+# define activate_mm(prev, next)		do {} while (0)
+
+#endif /* _ASM_MICROBLAZE_MMU_CONTEXT_H */
diff --git a/arch/microblaze/mm/mmu_context.c b/arch/microblaze/mm/mmu_context.c
new file mode 100644
index 0000000..26ff82f
--- /dev/null
+++ b/arch/microblaze/mm/mmu_context.c
@@ -0,0 +1,70 @@
+/*
+ * This file contains the routines for handling the MMU.
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from arch/ppc/mm/4xx_mmu.c:
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+mm_context_t next_mmu_context;
+unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+atomic_t nr_free_contexts;
+struct mm_struct *context_mm[LAST_CONTEXT+1];
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init mmu_context_init(void)
+{
+	/*
+	 * The use of context zero is reserved for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
+}
+
+/*
+ * Steal a context from a task that has one at the moment.
+ *
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone were motivated to do it.
+ */
+void steal_context(void)
+{
+	struct mm_struct *mm;
+
+	/* free up context `next_mmu_context' */
+	/* if we shouldn't free context 0, don't... */
+	if (next_mmu_context < FIRST_CONTEXT)
+		next_mmu_context = FIRST_CONTEXT;
+	mm = context_mm[next_mmu_context];
+	flush_tlb_mm(mm);
+	destroy_context(mm);
+}
-- 
cgit v0.10.2


From 15902bf63c8332946e5a1f48a72e3ae22874b11b Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:15 +0200
Subject: microblaze_mmu_v2: Page table - ioremap - pgtable.c/h, section update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 254fd4b..4c57a58 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -14,6 +16,8 @@
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 
+#ifndef CONFIG_MMU
+
 #define pgd_present(pgd)	(1) /* pages are always present on non MMU */
 #define pgd_none(pgd)		(0)
 #define pgd_bad(pgd)		(0)
@@ -47,6 +51,538 @@ static inline int pte_file(pte_t pte) { return 0; }
 
 #define arch_enter_lazy_cpu_mode()	do {} while (0)
 
+#else /* CONFIG_MMU */
+
+#include <asm-generic/4level-fixup.h>
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/processor.h>		/* For TASK_SIZE */
+#include <asm/mmu.h>
+#include <asm/page.h>
+
+#define FIRST_USER_ADDRESS	0
+
+extern unsigned long va_to_phys(unsigned long address);
+extern pte_t *va_to_pte(unsigned long address);
+extern unsigned long ioremap_bot, ioremap_base;
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+
+static inline int pte_special(pte_t pte)	{ return 0; }
+
+static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
+
+/* Start and end of the vmalloc area. */
+/* Make sure to map the vmalloc area above the pinned kernel memory area
+   of 32Mb.  */
+#define VMALLOC_START	(CONFIG_KERNEL_START + \
+				max(32 * 1024 * 1024UL, memory_size))
+#define VMALLOC_END	ioremap_bot
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * The MicroBlaze MMU is identical to the PPC-40x MMU, and uses a hash
+ * table containing PTEs, together with a set of 16 segment registers, to
+ * define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings.  We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code.  Low-level assembler code in hashtable.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+/*
+ * The MicroBlaze processor has a TLB architecture identical to PPC-40x. The
+ * instruction and data sides share a unified, 64-entry, semi-associative
+ * TLB which is maintained totally under software control. In addition, the
+ * instruction side has a hardware-managed, 2,4, or 8-entry, fully-associative
+ * TLB which serves as a first level to the shared TLB. These two TLBs are
+ * known as the UTLB and ITLB, respectively (see "mmu.h" for definitions).
+ */
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
+ *
+ */
+
+/* PMD_SHIFT determines the size of the area mapped by the PTE pages */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT	PMD_SHIFT
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * entries per page directory level: our page-table tree is two-level, so
+ * we don't really have any PMD directory.
+ */
+#define PTRS_PER_PTE	(1 << PTE_SHIFT)
+#define PTRS_PER_PMD	1
+#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_PGD_NR	0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define pte_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pte "PTE_FMT".\n", \
+		__FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pmd %08lx.\n", \
+		__FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", \
+		__FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Bits in a linux-style PTE.  These match the bits in the
+ * (hardware-defined) PTE as closely as possible.
+ */
+
+/* There are several potential gotchas here.  The hardware TLBLO
+ * field looks like this:
+ *
+ * 0  1  2  3  4  ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ * RPN.....................  0  0 EX WR ZSEL.......  W  I  M  G
+ *
+ * Where possible we make the Linux PTE bits match up with this
+ *
+ * - bits 20 and 21 must be cleared, because we use 4k pages (4xx can
+ * support down to 1k pages), this is done in the TLBMiss exception
+ * handler.
+ * - We use only zones 0 (for kernel pages) and 1 (for user pages)
+ * of the 16 available.  Bit 24-26 of the TLB are cleared in the TLB
+ * miss handler.  Bit 27 is PAGE_USER, thus selecting the correct
+ * zone.
+ * - PRESENT *must* be in the bottom two bits because swap cache
+ * entries use the top 30 bits.  Because 4xx doesn't support SMP
+ * anyway, M is irrelevant so we borrow it for PAGE_PRESENT.  Bit 30
+ * is cleared in the TLB miss handler before the TLB entry is loaded.
+ * - All other bits of the PTE are loaded into TLBLO without
+ *  * modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
+ * software PTE bits.  We actually use use bits 21, 24, 25, and
+ * 30 respectively for the software bits: ACCESSED, DIRTY, RW, and
+ * PRESENT.
+ */
+
+/* Definitions for MicroBlaze. */
+#define	_PAGE_GUARDED	0x001	/* G: page is guarded from prefetch */
+#define _PAGE_PRESENT	0x002	/* software: PTE contains a translation */
+#define	_PAGE_NO_CACHE	0x004	/* I: caching is inhibited */
+#define	_PAGE_WRITETHRU	0x008	/* W: caching is write-through */
+#define	_PAGE_USER	0x010	/* matches one of the zone permission bits */
+#define	_PAGE_RW	0x040	/* software: Writes permitted */
+#define	_PAGE_DIRTY	0x080	/* software: dirty page */
+#define _PAGE_HWWRITE	0x100	/* hardware: Dirty & RW, set in exception */
+#define _PAGE_HWEXEC	0x200	/* hardware: EX permission */
+#define _PAGE_ACCESSED	0x400	/* software: R: page referenced */
+#define _PMD_PRESENT	PAGE_MASK
+
+/*
+ * Some bits are unused...
+ */
+#ifndef _PAGE_HASHPTE
+#define _PAGE_HASHPTE	0
+#endif
+#ifndef _PTE_NONE_MASK
+#define _PTE_NONE_MASK	0
+#endif
+#ifndef _PAGE_SHARED
+#define _PAGE_SHARED	0
+#endif
+#ifndef _PAGE_HWWRITE
+#define _PAGE_HWWRITE	0
+#endif
+#ifndef _PAGE_HWEXEC
+#define _PAGE_HWEXEC	0
+#endif
+#ifndef _PAGE_EXEC
+#define _PAGE_EXEC	0
+#endif
+
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+/*
+ * Note: the _PAGE_COHERENT bit automatically gets set in the hardware
+ * PTE if CONFIG_SMP is defined (hash_page does this); there is no need
+ * to have it in the Linux PTE, and in fact the bit could be reused for
+ * another purpose.  -- paulus.
+ */
+#define _PAGE_BASE	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_WRENABLE	(_PAGE_RW | _PAGE_DIRTY | _PAGE_HWWRITE)
+
+#define _PAGE_KERNEL \
+	(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_SHARED | _PAGE_HWEXEC)
+
+#define _PAGE_IO	(_PAGE_KERNEL | _PAGE_NO_CACHE | _PAGE_GUARDED)
+
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X \
+		__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_SHARED)
+#define PAGE_KERNEL_CI	__pgprot(_PAGE_IO)
+
+/*
+ * We consider execute permission the same as read.
+ * Also, write permissions imply read permissions.
+ */
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY_X
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY_X
+#define __P100	PAGE_READONLY
+#define __P101	PAGE_READONLY_X
+#define __P110	PAGE_COPY
+#define __P111	PAGE_COPY_X
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY_X
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED_X
+#define __S100	PAGE_READONLY
+#define __S101	PAGE_READONLY_X
+#define __S110	PAGE_SHARED
+#define __S111	PAGE_SHARED_X
+
+#ifndef __ASSEMBLY__
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* __ASSEMBLY__ */
+
+#define pte_none(pte)		((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
+#define pte_present(pte)	(pte_val(pte) & _PAGE_PRESENT)
+#define pte_clear(mm, addr, ptep) \
+	do { set_pte_at((mm), (addr), (ptep), __pte(0)); } while (0)
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		((pmd_val(pmd) & _PMD_PRESENT) == 0)
+#define	pmd_present(pmd)	((pmd_val(pmd) & _PMD_PRESENT) != 0)
+#define	pmd_clear(pmdp)		do { pmd_val(*(pmdp)) = 0; } while (0)
+
+#define pte_page(x)		(mem_map + (unsigned long) \
+				((pte_val(x) - memory_start) >> PAGE_SHIFT))
+#define PFN_SHIFT_OFFSET	(PAGE_SHIFT)
+
+#define pte_pfn(x)		(pte_val(x) >> PFN_SHIFT_OFFSET)
+
+#define pfn_pte(pfn, prot) \
+	__pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) | pgprot_val(prot))
+
+#ifndef __ASSEMBLY__
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+#define pgd_clear(xp)				do { } while (0)
+#define pgd_page(pgd) \
+	((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)  { return pte_val(pte) & _PAGE_USER; }
+static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
+static inline int pte_exec(pte_t pte)  { return pte_val(pte) & _PAGE_EXEC; }
+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+/* FIXME */
+static inline int pte_file(pte_t pte)		{ return 0; }
+
+static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
+static inline void pte_cache(pte_t pte)   { pte_val(pte) &= ~_PAGE_NO_CACHE; }
+
+static inline pte_t pte_rdprotect(pte_t pte) \
+		{ pte_val(pte) &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte) \
+	{ pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
+static inline pte_t pte_exprotect(pte_t pte) \
+	{ pte_val(pte) &= ~_PAGE_EXEC; return pte; }
+static inline pte_t pte_mkclean(pte_t pte) \
+	{ pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
+static inline pte_t pte_mkold(pte_t pte) \
+	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+
+static inline pte_t pte_mkread(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkexec(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) \
+	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+static inline pte_t mk_pte_phys(phys_addr_t physpage, pgprot_t pgprot)
+{
+	pte_t pte;
+	pte_val(pte) = physpage | pgprot_val(pgprot);
+	return pte;
+}
+
+#define mk_pte(page, pgprot) \
+({									   \
+	pte_t pte;							   \
+	pte_val(pte) = (((page - mem_map) << PAGE_SHIFT) + memory_start) |  \
+			pgprot_val(pgprot);				   \
+	pte;								   \
+})
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+/*
+ * Atomic PTE updates.
+ *
+ * pte_update clears and sets bit atomically, and returns
+ * the old pte value.
+ * The ((unsigned long)(p+1) - 4) hack is to get to the least-significant
+ * 32 bits of the PTE regardless of whether PTEs are 32 or 64 bits.
+ */
+static inline unsigned long pte_update(pte_t *p, unsigned long clr,
+				unsigned long set)
+{
+	unsigned long old, tmp, msr;
+
+	__asm__ __volatile__("\
+	msrclr	%2, 0x2\n\
+	nop\n\
+	lw	%0, %4, r0\n\
+	andn	%1, %0, %5\n\
+	or	%1, %1, %6\n\
+	sw	%1, %4, r0\n\
+	mts     rmsr, %2\n\
+	nop"
+	: "=&r" (old), "=&r" (tmp), "=&r" (msr), "=m" (*p)
+	: "r" ((unsigned long)(p+1) - 4), "r" (clr), "r" (set), "m" (*p)
+	: "cc");
+
+	return old;
+}
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+static inline void set_pte(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte)
+{
+	*ptep = pte;
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte)
+{
+	*ptep = pte;
+}
+
+static inline int ptep_test_and_clear_young(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
+}
+
+static inline int ptep_test_and_clear_dirty(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return (pte_update(ptep, \
+		(_PAGE_DIRTY | _PAGE_HWWRITE), 0) & _PAGE_DIRTY) != 0;
+}
+
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
+}
+
+/*static inline void ptep_set_wrprotect(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0);
+}*/
+
+static inline void ptep_mkdirty(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	pte_update(ptep, 0, _PAGE_DIRTY);
+}
+
+/*#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)*/
+
+/* Convert pmd entry to page */
+/* our pmd entry is an effective address of pte table*/
+/* returns effective address of the pmd entry*/
+#define pmd_page_kernel(pmd)	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
+
+/* returns struct *page of the pmd entry*/
+#define pmd_page(pmd)	(pfn_to_page(__pa(pmd_val(pmd)) >> PAGE_SHIFT))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
+#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
+
+/* Find an entry in the second-level page table.. */
+static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
+{
+	return (pmd_t *) dir;
+}
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address)		\
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, addr)	\
+	((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr))
+#define pte_offset_map(dir, addr)		\
+	((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE0) + pte_index(addr))
+#define pte_offset_map_nested(dir, addr)	\
+	((pte_t *) kmap_atomic(pmd_page(*(dir)), KM_PTE1) + pte_index(addr))
+
+#define pte_unmap(pte)		kunmap_atomic(pte, KM_PTE0)
+#define pte_unmap_nested(pte)	kunmap_atomic(pte, KM_PTE1)
+
+/* Encode and decode a nonlinear file mapping entry */
+#define PTE_FILE_MAX_BITS	29
+#define pte_to_pgoff(pte)	(pte_val(pte) >> 3)
+#define pgoff_to_pte(off)	((pte_t) { ((off) << 3) })
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_page is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_page(unsigned context, unsigned long va, pte_t *ptep);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va, pte_t *ptep);
+
+/*
+ * Encode and decode a swap entry.
+ * Note that the bits we use in a PTE for representing a swap entry
+ * must not include the _PAGE_PRESENT bit, or the _PAGE_HASHPTE bit
+ * (if used).  -- paulus
+ */
+#define __swp_type(entry)		((entry).val & 0x3f)
+#define __swp_offset(entry)	((entry).val >> 6)
+#define __swp_entry(type, offset) \
+		((swp_entry_t) { (type) | ((offset) << 6) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) >> 2 })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val << 2 })
+
+
+/* CONFIG_APUS */
+/* For virtual address to physical address conversion */
+extern void cache_clear(__u32 addr, int length);
+extern void cache_push(__u32 addr, int length);
+extern int mm_end_of_chunk(unsigned long addr, int len);
+extern unsigned long iopa(unsigned long addr);
+/* extern unsigned long mm_ptov(unsigned long addr) \
+	__attribute__ ((const)); TBD */
+
+/* Values for nocacheflag and cmode */
+/* These are not used by the APUS kernel_map, but prevents
+ * compilation errors.
+ */
+#define	IOMAP_FULL_CACHING	0
+#define	IOMAP_NOCACHE_SER	1
+#define	IOMAP_NOCACHE_NONSER	2
+#define	IOMAP_NO_COPYBACK	3
+
+/*
+ * Map some physical address range into the kernel address space.
+ */
+extern unsigned long kernel_map(unsigned long paddr, unsigned long size,
+				int nocacheflag, unsigned long *memavailp);
+
+/*
+ * Set cache mode of (kernel space) address range.
+ */
+extern void kernel_set_cachemode(unsigned long address, unsigned long size,
+				unsigned int cmode);
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define kern_addr_valid(addr)	(1)
+
+#define io_remap_page_range remap_page_range
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+void do_page_fault(struct pt_regs *regs, unsigned long address,
+		   unsigned long error_code);
+
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags);
+
+void __init adjust_total_lowmem(void);
+void mapin_ram(void);
+int map_page(unsigned long va, phys_addr_t pa, int flags);
+
+extern int mem_init_done;
+extern unsigned long ioremap_base;
+extern unsigned long ioremap_bot;
+
+asmlinkage void __init mmu_init(void);
+
+void __init *early_get_page(void);
+
+void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle);
+void consistent_free(void *vaddr);
+void consistent_sync(void *vaddr, size_t size, int direction);
+void consistent_sync_page(struct page *page, unsigned long offset,
+	size_t size, int direction);
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+#endif /* CONFIG_MMU */
+
 #ifndef __ASSEMBLY__
 #include <asm-generic/pgtable.h>
 
diff --git a/arch/microblaze/include/asm/sections.h b/arch/microblaze/include/asm/sections.h
index 8434a43..4487e15 100644
--- a/arch/microblaze/include/asm/sections.h
+++ b/arch/microblaze/include/asm/sections.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -14,6 +16,7 @@
 # ifndef __ASSEMBLY__
 extern char _ssbss[], _esbss[];
 extern unsigned long __ivt_start[], __ivt_end[];
+extern char _etext[], _stext[];
 
 #  ifdef CONFIG_MTD_UCLINUX
 extern char *_ebss;
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
new file mode 100644
index 0000000..46c4ca5
--- /dev/null
+++ b/arch/microblaze/mm/pgtable.c
@@ -0,0 +1,286 @@
+/*
+ *  This file contains the routines setting up the linux page tables.
+ *
+ * Copyright (C) 2008 Michal Simek
+ * Copyright (C) 2008 PetaLogix
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from arch/ppc/mm/pgtable.c:
+ *    -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This file is subject to the terms and conditions of the GNU General
+ *  Public License.  See the file COPYING in the main directory of this
+ *  archive for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <linux/io.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>
+
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+
+unsigned long ioremap_base;
+unsigned long ioremap_bot;
+
+/* The maximum lowmem defaults to 768Mb, but this can be configured to
+ * another value.
+ */
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+#ifndef CONFIG_SMP
+struct pgtable_cache_struct quicklists;
+#endif
+
+static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
+		unsigned long flags)
+{
+	unsigned long v, i;
+	phys_addr_t p;
+	int err;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from ioremap_base
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 *
+	 * However, allow remap of rootfs: TBD
+	 */
+	if (mem_init_done &&
+		p >= memory_start && p < virt_to_phys(high_memory) &&
+		!(p >= virt_to_phys((unsigned long)&__bss_stop) &&
+		p < virt_to_phys((unsigned long)__bss_stop))) {
+		printk(KERN_WARNING "__ioremap(): phys addr "PTE_FMT
+			" is RAM lr %p\n", (unsigned long)p,
+			__builtin_return_address(0));
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped? If the whole area is mapped then we're
+	 * done, otherwise remap it since we want to keep the virt addrs for
+	 * each request contiguous.
+	 *
+	 * We make the assumption here that if the bottom and top
+	 * of the range we want are mapped then it's mapped to the
+	 * same virt address (and this is contiguous).
+	 *  -- Cort
+	 */
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == NULL)
+			return NULL;
+		v = VMALLOC_VMADDR(area->addr);
+	} else {
+		v = (ioremap_bot -= size);
+	}
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+	if (flags & _PAGE_NO_CACHE)
+		flags |= _PAGE_GUARDED;
+
+	err = 0;
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
+		err = map_page(v + i, p + i, flags);
+	if (err) {
+		if (mem_init_done)
+			vfree((void *)v);
+		return NULL;
+	}
+
+	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+}
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+EXPORT_SYMBOL(ioremap);
+
+void iounmap(void *addr)
+{
+	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
+		vfree((void *) (PAGE_MASK & (unsigned long) addr));
+}
+EXPORT_SYMBOL(iounmap);
+
+
+int map_page(unsigned long va, phys_addr_t pa, int flags)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+	/* spin_lock(&init_mm.page_table_lock); */
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_offset(pgd_offset_k(va), va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	pg = pte_alloc_kernel(pd, va); /* from powerpc - pgtable.c */
+	/* pg = pte_alloc_kernel(&init_mm, pd, va); */
+
+	if (pg != NULL) {
+		err = 0;
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
+				__pgprot(flags)));
+		if (mem_init_done)
+			flush_HPTE(0, va, pmd_val(*pd));
+			/* flush_HPTE(0, va, pg); */
+
+	}
+	/* spin_unlock(&init_mm.page_table_lock); */
+	return err;
+}
+
+void __init adjust_total_lowmem(void)
+{
+/* TBD */
+#if 0
+	unsigned long max_low_mem = MAX_LOW_MEM;
+
+	if (total_lowmem > max_low_mem) {
+		total_lowmem = max_low_mem;
+#ifndef CONFIG_HIGHMEM
+		printk(KERN_INFO "Warning, memory limited to %ld Mb, use "
+				"CONFIG_HIGHMEM to reach %ld Mb\n",
+				max_low_mem >> 20, total_memory >> 20);
+		total_memory = total_lowmem;
+#endif /* CONFIG_HIGHMEM */
+	}
+#endif
+}
+
+static void show_tmem(unsigned long tmem)
+{
+	volatile unsigned long a;
+	a = a + tmem;
+}
+
+/*
+ * Map in all of physical memory starting at CONFIG_KERNEL_START.
+ */
+void __init mapin_ram(void)
+{
+	unsigned long v, p, s, f;
+
+	v = CONFIG_KERNEL_START;
+	p = memory_start;
+	show_tmem(memory_size);
+	for (s = 0; s < memory_size; s += PAGE_SIZE) {
+		f = _PAGE_PRESENT | _PAGE_ACCESSED |
+				_PAGE_SHARED | _PAGE_HWEXEC;
+		if ((char *) v < _stext || (char *) v >= _etext)
+			f |= _PAGE_WRENABLE;
+		else
+			/* On the MicroBlaze, no user access
+			   forces R/W kernel access */
+			f |= _PAGE_USER;
+		map_page(v, p, f);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+/* is x a power of 2? */
+#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
+
+/*
+ * Set up a mapping for a block of I/O.
+ * virt, phys, size must all be page-aligned.
+ * This should only be called before ioremap is called.
+ */
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags)
+{
+	int i;
+
+	if (virt > CONFIG_KERNEL_START && virt < ioremap_bot)
+		ioremap_bot = ioremap_base = virt;
+
+	/* Put it in the page tables. */
+	for (i = 0; i < size; i += PAGE_SIZE)
+		map_page(virt + i, phys + i, flags);
+}
+
+/* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+ * the PTE pointer is unmodified if PTE is not found.
+ */
+static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+{
+	pgd_t	*pgd;
+	pmd_t	*pmd;
+	pte_t	*pte;
+	int     retval = 0;
+
+	pgd = pgd_offset(mm, addr & PAGE_MASK);
+	if (pgd) {
+		pmd = pmd_offset(pgd, addr & PAGE_MASK);
+		if (pmd_present(*pmd)) {
+			pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
+			if (pte) {
+				retval = 1;
+				*ptep = pte;
+			}
+		}
+	}
+	return retval;
+}
+
+/* Find physical address for this virtual address.  Normally used by
+ * I/O functions, but anyone can call it.
+ */
+unsigned long iopa(unsigned long addr)
+{
+	unsigned long pa;
+
+	pte_t *pte;
+	struct mm_struct *mm;
+
+	/* Allow mapping of user addresses (within the thread)
+	 * for DMA if necessary.
+	 */
+	if (addr < TASK_SIZE)
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	pa = 0;
+	if (get_pteptr(mm, addr, &pte))
+		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+
+	return pa;
+}
-- 
cgit v0.10.2


From dc95be1f7188f0718ac922b6b6b72406c294d250 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:16 +0200
Subject: microblaze_mmu_v2: io.h MMU update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index 8b5853e..5c17342 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -12,6 +14,9 @@
 #include <asm/byteorder.h>
 #include <asm/page.h>
 #include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/mm.h>          /* Get struct page {...} */
+
 
 #define IO_SPACE_LIMIT (0xFFFFFFFF)
 
@@ -112,6 +117,30 @@ static inline void writel(unsigned int v, volatile void __iomem *addr)
 #define memcpy_fromio(a, b, c)	memcpy((a), (void *)(b), (c))
 #define memcpy_toio(a, b, c)	memcpy((void *)(a), (b), (c))
 
+#ifdef CONFIG_MMU
+
+#define mm_ptov(addr)		((void *)__phys_to_virt(addr))
+#define mm_vtop(addr)		((unsigned long)__virt_to_phys(addr))
+#define phys_to_virt(addr)	((void *)__phys_to_virt(addr))
+#define virt_to_phys(addr)	((unsigned long)__virt_to_phys(addr))
+#define virt_to_bus(addr)	((unsigned long)__virt_to_phys(addr))
+
+#define __page_address(page) \
+		(PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT))
+#define page_to_phys(page)	virt_to_phys((void *)__page_address(page))
+#define page_to_bus(page)	(page_to_phys(page))
+#define bus_to_virt(addr)	(phys_to_virt(addr))
+
+extern void iounmap(void *addr);
+/*extern void *__ioremap(phys_addr_t address, unsigned long size,
+		unsigned long flags);*/
+extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
+#define ioremap_writethrough(addr, size) ioremap((addr), (size))
+#define ioremap_nocache(addr, size)      ioremap((addr), (size))
+#define ioremap_fullcache(addr, size)    ioremap((addr), (size))
+
+#else /* CONFIG_MMU */
+
 /**
  *	virt_to_phys - map virtual addresses to physical
  *	@address: address to remap
@@ -160,6 +189,8 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size,
 #define iounmap(addr)		((void)0)
 #define ioremap_nocache(physaddr, size)	ioremap(physaddr, size)
 
+#endif /* CONFIG_MMU */
+
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
-- 
cgit v0.10.2


From 1f84e1ea0e87ad659cd6f6a6285d50c73a8d1a24 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:17 +0200
Subject: microblaze_mmu_v2: pgalloc.h and page.h

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 7238dcf..210e584 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -1,6 +1,8 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * VM ops
+ *
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  * Changes for MMU support:
  *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
@@ -15,14 +17,15 @@
 
 #include <linux/pfn.h>
 #include <asm/setup.h>
+#include <linux/const.h>
+
+#ifdef __KERNEL__
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT	(12)
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-#ifdef __KERNEL__
-
 #ifndef __ASSEMBLY__
 
 #define PAGE_UP(addr)	(((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
@@ -35,6 +38,7 @@
 /* align addr on a size boundary - adjust address up if needed */
 #define _ALIGN(addr, size)	_ALIGN_UP(addr, size)
 
+#ifndef CONFIG_MMU
 /*
  * PAGE_OFFSET -- the first address of the first page of memory. When not
  * using MMU this corresponds to the first free page in physical memory (aligned
@@ -43,15 +47,44 @@
 extern unsigned int __page_offset;
 #define PAGE_OFFSET __page_offset
 
-#define copy_page(to, from)			memcpy((to), (from), PAGE_SIZE)
-#define get_user_page(vaddr)			__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)		free_page(addr)
+#else /* CONFIG_MMU */
 
-#define clear_page(pgaddr)			memset((pgaddr), 0, PAGE_SIZE)
+/*
+ * PAGE_OFFSET -- the first address of the first page of memory. With MMU
+ * it is set to the kernel start address (aligned on a page boundary).
+ *
+ * CONFIG_KERNEL_START is defined in arch/microblaze/config.in and used
+ * in arch/microblaze/Makefile.
+ */
+#define PAGE_OFFSET	CONFIG_KERNEL_START
 
+/*
+ * MAP_NR -- given an address, calculate the index of the page struct which
+ * points to the address's page.
+ */
+#define MAP_NR(addr) (((unsigned long)(addr) - PAGE_OFFSET) >> PAGE_SHIFT)
 
-#define clear_user_page(pgaddr, vaddr, page)	memset((pgaddr), 0, PAGE_SIZE)
-#define copy_user_page(vto, vfrom, vaddr, topg) \
+/*
+ * The basic type of a PTE - 32 bit physical addressing.
+ */
+typedef unsigned long pte_basic_t;
+#define PTE_SHIFT	(PAGE_SHIFT - 2)	/* 1024 ptes per page */
+#define PTE_FMT		"%.8lx"
+
+#endif /* CONFIG_MMU */
+
+#  ifndef CONFIG_MMU
+#  define copy_page(to, from)			memcpy((to), (from), PAGE_SIZE)
+#  define get_user_page(vaddr)			__get_free_page(GFP_KERNEL)
+#  define free_user_page(page, addr)		free_page(addr)
+#  else /* CONFIG_MMU */
+extern void copy_page(void *to, void *from);
+#  endif /* CONFIG_MMU */
+
+# define clear_page(pgaddr)			memset((pgaddr), 0, PAGE_SIZE)
+
+# define clear_user_page(pgaddr, vaddr, page)	memset((pgaddr), 0, PAGE_SIZE)
+# define copy_user_page(vto, vfrom, vaddr, topg) \
 			memcpy((vto), (vfrom), PAGE_SIZE)
 
 /*
@@ -60,21 +93,32 @@ extern unsigned int __page_offset;
 typedef struct page *pgtable_t;
 typedef struct { unsigned long	pte; }		pte_t;
 typedef struct { unsigned long	pgprot; }	pgprot_t;
+/* FIXME this can depend on linux kernel version */
+#   ifdef CONFIG_MMU
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+#   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
 typedef struct { pmd_t		pue[1]; }	pud_t;
 typedef struct { pud_t		pge[1]; }	pgd_t;
+#   endif /* CONFIG_MMU */
 
+# define pte_val(x)	((x).pte)
+# define pgprot_val(x)	((x).pgprot)
 
-#define pte_val(x)	((x).pte)
-#define pgprot_val(x)	((x).pgprot)
-#define pmd_val(x)	((x).ste[0])
-#define pud_val(x)	((x).pue[0])
-#define pgd_val(x)	((x).pge[0])
+#   ifdef CONFIG_MMU
+#   define pmd_val(x)      ((x).pmd)
+#   define pgd_val(x)      ((x).pgd)
+#   else  /* CONFIG_MMU */
+#   define pmd_val(x)	((x).ste[0])
+#   define pud_val(x)	((x).pue[0])
+#   define pgd_val(x)	((x).pge[0])
+#   endif  /* CONFIG_MMU */
 
-#define __pte(x)	((pte_t) { (x) })
-#define __pmd(x)	((pmd_t) { (x) })
-#define __pgd(x)	((pgd_t) { (x) })
-#define __pgprot(x)	((pgprot_t) { (x) })
+# define __pte(x)	((pte_t) { (x) })
+# define __pmd(x)	((pmd_t) { (x) })
+# define __pgd(x)	((pgd_t) { (x) })
+# define __pgprot(x)	((pgprot_t) { (x) })
 
 /**
  * Conversions for virtual address, physical address, pfn, and struct
@@ -94,44 +138,80 @@ extern unsigned long max_low_pfn;
 extern unsigned long min_low_pfn;
 extern unsigned long max_pfn;
 
-#define __pa(vaddr)		((unsigned long) (vaddr))
-#define __va(paddr)		((void *) (paddr))
+extern unsigned long memory_start;
+extern unsigned long memory_end;
+extern unsigned long memory_size;
 
-#define phys_to_pfn(phys)	(PFN_DOWN(phys))
-#define pfn_to_phys(pfn)	(PFN_PHYS(pfn))
+extern int page_is_ram(unsigned long pfn);
 
-#define virt_to_pfn(vaddr)	(phys_to_pfn((__pa(vaddr))))
-#define pfn_to_virt(pfn)	__va(pfn_to_phys((pfn)))
+# define phys_to_pfn(phys)	(PFN_DOWN(phys))
+# define pfn_to_phys(pfn)	(PFN_PHYS(pfn))
 
-#define virt_to_page(vaddr)	(pfn_to_page(virt_to_pfn(vaddr)))
-#define page_to_virt(page)	(pfn_to_virt(page_to_pfn(page)))
+# define virt_to_pfn(vaddr)	(phys_to_pfn((__pa(vaddr))))
+# define pfn_to_virt(pfn)	__va(pfn_to_phys((pfn)))
 
-#define page_to_phys(page)	(pfn_to_phys(page_to_pfn(page)))
-#define page_to_bus(page)	(page_to_phys(page))
-#define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
+#  ifdef CONFIG_MMU
+#  define virt_to_page(kaddr) 	(mem_map +  MAP_NR(kaddr))
+#  else /* CONFIG_MMU */
+#  define virt_to_page(vaddr)	(pfn_to_page(virt_to_pfn(vaddr)))
+#  define page_to_virt(page)	(pfn_to_virt(page_to_pfn(page)))
+#  define page_to_phys(page)	(pfn_to_phys(page_to_pfn(page)))
+#  define page_to_bus(page)	(page_to_phys(page))
+#  define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
+#  endif /* CONFIG_MMU */
 
-extern unsigned int memory_start;
-extern unsigned int memory_end;
-extern unsigned int memory_size;
+#  ifndef CONFIG_MMU
+#  define pfn_valid(pfn)	((pfn) >= min_low_pfn && (pfn) <= max_mapnr)
+#  define ARCH_PFN_OFFSET	(PAGE_OFFSET >> PAGE_SHIFT)
+#  else /* CONFIG_MMU */
+#  define ARCH_PFN_OFFSET	(memory_start >> PAGE_SHIFT)
+#  define pfn_valid(pfn)	((pfn) < (max_mapnr + ARCH_PFN_OFFSET))
+#  define VALID_PAGE(page) 	((page - mem_map) < max_mapnr)
+#  endif /* CONFIG_MMU */
 
-#define pfn_valid(pfn)		((pfn) >= min_low_pfn && (pfn) < max_mapnr)
+# endif /* __ASSEMBLY__ */
 
-#define ARCH_PFN_OFFSET		(PAGE_OFFSET >> PAGE_SHIFT)
+#define	virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
 
-#else
-#define tophys(rd, rs)	(addik rd, rs, 0)
-#define tovirt(rd, rs)	(addik rd, rs, 0)
-#endif /* __ASSEMBLY__ */
 
-#define virt_addr_valid(vaddr)	(pfn_valid(virt_to_pfn(vaddr)))
+#  ifndef CONFIG_MMU
+#  define __pa(vaddr)	((unsigned long) (vaddr))
+#  define __va(paddr)	((void *) (paddr))
+#  else /* CONFIG_MMU */
+#  define __pa(x)	__virt_to_phys((unsigned long)(x))
+#  define __va(x)	((void *)__phys_to_virt((unsigned long)(x)))
+#  endif /* CONFIG_MMU */
+
 
-/* Convert between virtual and physical address for MMU.  */
-/* Handle MicroBlaze processor with virtual memory.  */
+/* Convert between virtual and physical address for MMU. */
+/* Handle MicroBlaze processor with virtual memory. */
+#ifndef CONFIG_MMU
 #define __virt_to_phys(addr)	addr
 #define __phys_to_virt(addr)	addr
+#define tophys(rd, rs)	addik rd, rs, 0
+#define tovirt(rd, rs)	addik rd, rs, 0
+#else
+#define __virt_to_phys(addr) \
+	((addr) + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START)
+#define __phys_to_virt(addr) \
+	((addr) + CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
+#define tophys(rd, rs) \
+	addik rd, rs, (CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START)
+#define tovirt(rd, rs) \
+	addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
+#endif /* CONFIG_MMU */
 
 #define TOPHYS(addr)  __virt_to_phys(addr)
 
+#ifdef CONFIG_MMU
+#ifdef CONFIG_CONTIGUOUS_PAGE_ALLOC
+#define WANT_PAGE_VIRTUAL 1 /* page alloc 2 relies on this */
+#endif
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#endif /* CONFIG_MMU */
+
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 2a4b354..59a757e 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,6 +11,195 @@
 #ifndef _ASM_MICROBLAZE_PGALLOC_H
 #define _ASM_MICROBLAZE_PGALLOC_H
 
+#ifdef CONFIG_MMU
+
+#include <linux/kernel.h>	/* For min/max macros */
+#include <linux/highmem.h>
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define PGDIR_ORDER	0
+
+/*
+ * This is handled very differently on MicroBlaze since out page tables
+ * are all 0's and I want to be able to use these zero'd pages elsewhere
+ * as well - it gives us quite a speedup.
+ * -- Cort
+ */
+extern struct pgtable_cache_struct {
+	unsigned long *pgd_cache;
+	unsigned long *pte_cache;
+	unsigned long pgtable_cache_sz;
+} quicklists;
+
+#define pgd_quicklist		(quicklists.pgd_cache)
+#define pmd_quicklist		((unsigned long *)0)
+#define pte_quicklist		(quicklists.pte_cache)
+#define pgtable_cache_size	(quicklists.pgtable_cache_sz)
+
+extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */
+extern atomic_t zero_sz; /* # currently pre-zero'd pages */
+extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */
+extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */
+extern atomic_t zerototal; /* # pages zero'd over time */
+
+#define zero_quicklist		(zero_cache)
+#define zero_cache_sz	 	(zero_sz)
+#define zero_cache_calls	(zeropage_calls)
+#define zero_cache_hits		(zeropage_hits)
+#define zero_cache_total	(zerototal)
+
+/*
+ * return a pre-zero'd page from the list,
+ * return NULL if none available -- Cort
+ */
+extern unsigned long get_zero_page_fast(void);
+
+extern void __bad_pte(pmd_t *pmd);
+
+extern inline pgd_t *get_pgd_slow(void)
+{
+	pgd_t *ret;
+
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER);
+	if (ret != NULL)
+		clear_page(ret);
+	return ret;
+}
+
+extern inline pgd_t *get_pgd_fast(void)
+{
+	unsigned long *ret;
+
+	ret = pgd_quicklist;
+	if (ret != NULL) {
+		pgd_quicklist = (unsigned long *)(*ret);
+		ret[0] = 0;
+		pgtable_cache_size--;
+	} else
+		ret = (unsigned long *)get_pgd_slow();
+	return (pgd_t *)ret;
+}
+
+extern inline void free_pgd_fast(pgd_t *pgd)
+{
+	*(unsigned long **)pgd = pgd_quicklist;
+	pgd_quicklist = (unsigned long *) pgd;
+	pgtable_cache_size++;
+}
+
+extern inline void free_pgd_slow(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+
+#define pgd_free(mm, pgd)        free_pgd_fast(pgd)
+#define pgd_alloc(mm)		get_pgd_fast()
+
+#define pmd_pgtable(pmd)	pmd_page(pmd)
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+#define pmd_alloc_one_fast(mm, address)	({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
+/* FIXME two definition - look below */
+#define pmd_free(mm, x)			do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+		unsigned long address)
+{
+	pte_t *pte;
+	extern int mem_init_done;
+	extern void *early_get_page(void);
+	if (mem_init_done) {
+		pte = (pte_t *)__get_free_page(GFP_KERNEL |
+					__GFP_REPEAT | __GFP_ZERO);
+	} else {
+		pte = (pte_t *)early_get_page();
+		if (pte)
+			clear_page(pte);
+	}
+	return pte;
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+		unsigned long address)
+{
+	struct page *ptepage;
+
+#ifdef CONFIG_HIGHPTE
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+#else
+	int flags = GFP_KERNEL | __GFP_REPEAT;
+#endif
+
+	ptepage = alloc_pages(flags, 0);
+	if (ptepage)
+		clear_highpage(ptepage);
+	return ptepage;
+}
+
+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
+		unsigned long address)
+{
+	unsigned long *ret;
+
+	ret = pte_quicklist;
+	if (ret != NULL) {
+		pte_quicklist = (unsigned long *)(*ret);
+		ret[0] = 0;
+		pgtable_cache_size--;
+	}
+	return (pte_t *)ret;
+}
+
+extern inline void pte_free_fast(pte_t *pte)
+{
+	*(unsigned long **)pte = pte_quicklist;
+	pte_quicklist = (unsigned long *) pte;
+	pgtable_cache_size++;
+}
+
+extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+extern inline void pte_free_slow(struct page *ptepage)
+{
+	__free_page(ptepage);
+}
+
+extern inline void pte_free(struct mm_struct *mm, struct page *ptepage)
+{
+	__free_page(ptepage);
+}
+
+#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+
+#define pmd_populate(mm, pmd, pte)	(pmd_val(*(pmd)) = page_address(pte))
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+		(pmd_val(*(pmd)) = (unsigned long) (pte))
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+#define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
+/*#define pmd_free(mm, x)			do { } while (0)*/
+#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define pgd_populate(mm, pmd, pte)	BUG()
+
+extern int do_check_pgt_cache(int, int);
+
+#endif /* CONFIG_MMU */
+
 #define check_pgt_cache()	do {} while (0)
 
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
-- 
cgit v0.10.2


From 5233806dfe6f88fb1a01db3729eeda78f65bcbd1 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:18 +0200
Subject: microblaze_mmu_v2: Update process creation for MMU

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 9329029..563c6b9 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -26,14 +26,15 @@ extern const struct seq_operations cpuinfo_op;
 # define cpu_sleep()		do {} while (0)
 # define prepare_to_copy(tsk)	do {} while (0)
 
-# endif /* __ASSEMBLY__ */
-
 #define task_pt_regs(tsk) \
 		(((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1)
 
 /* Do necessary setup to start up a newly executed thread. */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp);
 
+# endif /* __ASSEMBLY__ */
+
+# ifndef CONFIG_MMU
 /*
  * User space process size: memory size
  *
@@ -85,4 +86,90 @@ extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 # define KSTK_EIP(tsk)	(0)
 # define KSTK_ESP(tsk)	(0)
 
+# else /* CONFIG_MMU */
+
+/*
+ * This is used to define STACK_TOP, and with MMU it must be below
+ * kernel base to select the correct PGD when handling MMU exceptions.
+ */
+# define TASK_SIZE	(CONFIG_KERNEL_START)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+# define TASK_UNMAPPED_BASE	(TASK_SIZE / 8 * 3)
+
+# define THREAD_KSP	0
+
+#  ifndef __ASSEMBLY__
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#  define current_text_addr()	({ __label__ _l; _l: &&_l; })
+
+/* If you change this, you must change the associated assembly-languages
+ * constants defined below, THREAD_*.
+ */
+struct thread_struct {
+	/* kernel stack pointer (must be first field in structure) */
+	unsigned long	ksp;
+	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
+	void		*pgdir;		/* root of page-table tree */
+	struct pt_regs	*regs;		/* Pointer to saved register state */
+};
+
+#  define INIT_THREAD { \
+	.ksp   = sizeof init_stack + (unsigned long)init_stack, \
+	.pgdir = swapper_pg_dir, \
+}
+
+/* Do necessary setup to start up a newly executed thread.  */
+void start_thread(struct pt_regs *regs,
+		unsigned long pc, unsigned long usp);
+
+/* Free all resources held by a thread. */
+extern inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Free current thread data structures etc.  */
+static inline void exit_thread(void)
+{
+}
+
+/* Return saved (kernel) PC of a blocked thread.  */
+#  define thread_saved_pc(tsk)	\
+	((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
+
+unsigned long get_wchan(struct task_struct *p);
+
+/* The size allocated for kernel stacks. This _must_ be a power of two! */
+# define KERNEL_STACK_SIZE	0x2000
+
+/* Return some info about the user process TASK.  */
+#  define task_tos(task)	((unsigned long)(task) + KERNEL_STACK_SIZE)
+#  define task_regs(task) ((struct pt_regs *)task_tos(task) - 1)
+
+#  define task_pt_regs_plus_args(tsk) \
+	(((void *)task_pt_regs(tsk)) - STATE_SAVE_ARG_SPACE)
+
+#  define task_sp(task)	(task_regs(task)->r1)
+#  define task_pc(task)	(task_regs(task)->pc)
+/* Grotty old names for some.  */
+#  define KSTK_EIP(task)	(task_pc(task))
+#  define KSTK_ESP(task)	(task_sp(task))
+
+/* FIXME */
+#  define deactivate_mm(tsk, mm)	do { } while (0)
+
+#  define STACK_TOP	TASK_SIZE
+#  define STACK_TOP_MAX	STACK_TOP
+
+#  endif /* __ASSEMBLY__ */
+# endif /* CONFIG_MMU */
 #endif /* _ASM_MICROBLAZE_PROCESSOR_H */
diff --git a/arch/microblaze/include/asm/registers.h b/arch/microblaze/include/asm/registers.h
index 834142d..68c3afb 100644
--- a/arch/microblaze/include/asm/registers.h
+++ b/arch/microblaze/include/asm/registers.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -30,4 +30,21 @@
 #define FSR_UF		(1<<1) /* Underflow */
 #define FSR_DO		(1<<0) /* Denormalized operand error */
 
+# ifdef CONFIG_MMU
+/* Machine State Register (MSR) Fields */
+# define MSR_UM		(1<<11) /* User Mode */
+# define MSR_UMS	(1<<12) /* User Mode Save */
+# define MSR_VM		(1<<13) /* Virtual Mode */
+# define MSR_VMS	(1<<14) /* Virtual Mode Save */
+
+# define MSR_KERNEL	(MSR_EE | MSR_VM)
+/* # define MSR_USER	(MSR_KERNEL | MSR_UM | MSR_IE) */
+# define MSR_KERNEL_VMS	(MSR_EE | MSR_VMS)
+/* # define MSR_USER_VMS	(MSR_KERNEL_VMS | MSR_UMS | MSR_IE) */
+
+/* Exception State Register (ESR) Fields */
+# define	  ESR_DIZ	(1<<11) /* Zone Protection */
+# define	  ESR_S		(1<<10) /* Store instruction */
+
+# endif /* CONFIG_MMU */
 #endif /* _ASM_MICROBLAZE_REGISTERS_H */
diff --git a/arch/microblaze/include/asm/segment.h b/arch/microblaze/include/asm/segment.h
index 7f5dcc5..0e7102c 100644
--- a/arch/microblaze/include/asm/segment.h
+++ b/arch/microblaze/include/asm/segment.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -11,7 +11,7 @@
 #ifndef _ASM_MICROBLAZE_SEGMENT_H
 #define _ASM_MICROBLAZE_SEGMENT_H
 
-#ifndef __ASSEMBLY__
+# ifndef __ASSEMBLY__
 
 typedef struct {
 	unsigned long seg;
@@ -29,15 +29,21 @@ typedef struct {
  *
  * For non-MMU arch like Microblaze, KERNEL_DS and USER_DS is equal.
  */
-#  define KERNEL_DS	((mm_segment_t){0})
+# define MAKE_MM_SEG(s)       ((mm_segment_t) { (s) })
+
+#  ifndef CONFIG_MMU
+#  define KERNEL_DS	MAKE_MM_SEG(0)
 #  define USER_DS	KERNEL_DS
+#  else
+#  define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
+#  define USER_DS	MAKE_MM_SEG(TASK_SIZE - 1)
+#  endif
 
 # define get_ds()	(KERNEL_DS)
 # define get_fs()	(current_thread_info()->addr_limit)
-# define set_fs(x) \
-		do { current_thread_info()->addr_limit = (x); } while (0)
+# define set_fs(val)	(current_thread_info()->addr_limit = (val))
 
-# define segment_eq(a, b)		((a).seg == (b).seg)
+# define segment_eq(a, b)	((a).seg == (b).seg)
 
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SEGMENT_H */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 685ad71..00b12c6 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -126,9 +126,54 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	else
 		childregs->r1 = ((unsigned long) ti) + THREAD_SIZE;
 
+#ifndef CONFIG_MMU
 	memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
 	ti->cpu_context.r1 = (unsigned long)childregs;
 	ti->cpu_context.msr = (unsigned long)childregs->msr;
+#else
+
+	/* if creating a kernel thread then update the current reg (we don't
+	 * want to use the parent's value when restoring by POP_STATE) */
+	if (kernel_mode(regs))
+		/* save new current on stack to use POP_STATE */
+		childregs->CURRENT_TASK = (unsigned long)p;
+	/* if returning to user then use the parent's value of this register */
+
+	/* if we're creating a new kernel thread then just zeroing all
+	 * the registers. That's OK for a brand new thread.*/
+	/* Pls. note that some of them will be restored in POP_STATE */
+	if (kernel_mode(regs))
+		memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
+	/* if this thread is created for fork/vfork/clone, then we want to
+	 * restore all the parent's context */
+	/* in addition to the registers which will be restored by POP_STATE */
+	else {
+		ti->cpu_context = *(struct cpu_context *)regs;
+		childregs->msr |= MSR_UMS;
+	}
+
+	/* FIXME STATE_SAVE_PT_OFFSET; */
+	ti->cpu_context.r1  = (unsigned long)childregs - STATE_SAVE_ARG_SPACE;
+	/* we should consider the fact that childregs is a copy of the parent
+	 * regs which were saved immediately after entering the kernel state
+	 * before enabling VM. This MSR will be restored in switch_to and
+	 * RETURN() and we want to have the right machine state there
+	 * specifically this state must have INTs disabled before and enabled
+	 * after performing rtbd
+	 * compose the right MSR for RETURN(). It will work for switch_to also
+	 * excepting for VM and UMS
+	 * don't touch UMS , CARRY and cache bits
+	 * right now MSR is a copy of parent one */
+	childregs->msr |= MSR_BIP;
+	childregs->msr &= ~MSR_EIP;
+	childregs->msr |= MSR_IE;
+	childregs->msr &= ~MSR_VM;
+	childregs->msr |= MSR_VMS;
+	childregs->msr |= MSR_EE; /* exceptions will be enabled*/
+
+	ti->cpu_context.msr = (childregs->msr|MSR_VM);
+	ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
+#endif
 	ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;
 
 	if (clone_flags & CLONE_SETTLS)
@@ -137,6 +182,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
+#ifndef CONFIG_MMU
 /*
  * Return saved PC of a blocked thread.
  */
@@ -151,6 +197,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	else
 		return ctx->r14;
 }
+#endif
 
 static void kernel_thread_helper(int (*fn)(void *), void *arg)
 {
@@ -189,3 +236,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
 	regs->r1 = usp;
 	regs->pt_mode = 0;
 }
+
+#ifdef CONFIG_MMU
+#include <linux/elfcore.h>
+/*
+ * Set up a thread for executing a new program
+ */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
+{
+	return 0; /* MicroBlaze has no separate FPU registers */
+}
+#endif /* CONFIG_MMU */
-- 
cgit v0.10.2


From 45be7d46a9928c6b8ed747e020748500da7e66f1 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:18 +0200
Subject: microblaze_mmu_v2: Update tlb.h and tlbflush.h

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index d1dfe37..c472d28 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -13,4 +15,10 @@
 
 #include <asm-generic/tlb.h>
 
+#ifdef CONFIG_MMU
+#define tlb_start_vma(tlb, vma)		do { } while (0)
+#define tlb_end_vma(tlb, vma)		do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
+#endif
+
 #endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/microblaze/include/asm/tlbflush.h b/arch/microblaze/include/asm/tlbflush.h
index d7fe762..eb31a0e 100644
--- a/arch/microblaze/include/asm/tlbflush.h
+++ b/arch/microblaze/include/asm/tlbflush.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,6 +11,50 @@
 #ifndef _ASM_MICROBLAZE_TLBFLUSH_H
 #define _ASM_MICROBLAZE_TLBFLUSH_H
 
+#ifdef CONFIG_MMU
+
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/processor.h>	/* For TASK_SIZE */
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+
+extern void _tlbie(unsigned long address);
+extern void _tlbia(void);
+
+#define __tlbia()	_tlbia()
+
+static inline void local_flush_tlb_all(void)
+	{ __tlbia(); }
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+	{ __tlbia(); }
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+				unsigned long vmaddr)
+	{ _tlbie(vmaddr); }
+static inline void local_flush_tlb_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+	{ __tlbia(); }
+
+#define flush_tlb_kernel_range(start, end)	do { } while (0)
+
+#define update_mmu_cache(vma, addr, pte)	do { } while (0)
+
+#define flush_tlb_all local_flush_tlb_all
+#define flush_tlb_mm local_flush_tlb_mm
+#define flush_tlb_page local_flush_tlb_page
+#define flush_tlb_range local_flush_tlb_range
+
+/*
+ * This is called in munmap when we have freed up some page-table
+ * pages.  We don't need to do anything here, there's nothing special
+ * about our page-table pages.  -- paulus
+ */
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+	unsigned long start, unsigned long end) { }
+
+#else /* CONFIG_MMU */
+
 #define flush_tlb()				BUG()
 #define flush_tlb_all()				BUG()
 #define flush_tlb_mm(mm)			BUG()
@@ -17,4 +63,6 @@
 #define flush_tlb_pgtables(mm, start, end)	BUG()
 #define flush_tlb_kernel_range(start, end)	BUG()
 
+#endif /* CONFIG_MMU */
+
 #endif /* _ASM_MICROBLAZE_TLBFLUSH_H */
-- 
cgit v0.10.2


From 627cef44f4f4dfc22bebf3a68378bf3e3bedd21e Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:19 +0200
Subject: microblaze_mmu_v2: MMU asm offset update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
index aabd9e9..7bc7b68 100644
--- a/arch/microblaze/kernel/asm-offsets.c
+++ b/arch/microblaze/kernel/asm-offsets.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
  * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
@@ -68,16 +69,26 @@ int main(int argc, char *argv[])
 
 	/* struct task_struct */
 	DEFINE(TS_THREAD_INFO, offsetof(struct task_struct, stack));
+#ifdef CONFIG_MMU
+	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
+	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
+	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
+	DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
+	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
+	DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
+	DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
+	BLANK();
+
+	DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
+	BLANK();
+#endif
 
 	/* struct thread_info */
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
-	DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
-	DEFINE(TI_STATUS, offsetof(struct thread_info, status));
-	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
 	DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
-	DEFINE(TI_RESTART_BLOCK, offsetof(struct thread_info, restart_block));
 	DEFINE(TI_CPU_CONTEXT, offsetof(struct thread_info, cpu_context));
 	BLANK();
 
-- 
cgit v0.10.2


From 23cfc369337fa106d08cbed0dc86527c67966ff2 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:20 +0200
Subject: microblaze_mmu_v2: Add CURRENT_TASK for entry.S

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/current.h b/arch/microblaze/include/asm/current.h
index 8375ea9..29303ed 100644
--- a/arch/microblaze/include/asm/current.h
+++ b/arch/microblaze/include/asm/current.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -9,6 +11,12 @@
 #ifndef _ASM_MICROBLAZE_CURRENT_H
 #define _ASM_MICROBLAZE_CURRENT_H
 
+/*
+ * Register used to hold the current task pointer while in the kernel.
+ * Any `call clobbered' register without a special meaning should be OK,
+ * but check asm/microblaze/kernel/entry.S to be sure.
+ */
+#define CURRENT_TASK	r31
 # ifndef __ASSEMBLY__
 /*
  * Dedicate r31 to keeping the current task pointer
-- 
cgit v0.10.2


From ca54502bd52a5d483f7ba076b613ad2ee43941da Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:21 +0200
Subject: microblaze_mmu_v2: entry.S, entry.h

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h
index e4c3aef..61abbd2 100644
--- a/arch/microblaze/include/asm/entry.h
+++ b/arch/microblaze/include/asm/entry.h
@@ -1,8 +1,8 @@
 /*
  * Definitions used by low-level trap handlers
  *
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2007 - 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2007 John Williams <john.williams@petalogix.com>
  *
  * This file is subject to the terms and conditions of the GNU General
@@ -31,7 +31,40 @@ DECLARE_PER_CPU(unsigned int, R11_SAVE); /* Temp variable for entry */
 DECLARE_PER_CPU(unsigned int, CURRENT_SAVE); /* Saved current pointer */
 # endif /* __ASSEMBLY__ */
 
+#ifndef CONFIG_MMU
+
 /* noMMU hasn't any space for args */
 # define STATE_SAVE_ARG_SPACE	(0)
 
+#else /* CONFIG_MMU */
+
+/* If true, system calls save and restore all registers (except result
+ * registers, of course).  If false, then `call clobbered' registers
+ * will not be preserved, on the theory that system calls are basically
+ * function calls anyway, and the caller should be able to deal with it.
+ * This is a security risk, of course, as `internal' values may leak out
+ * after a system call, but that certainly doesn't matter very much for
+ * a processor with no MMU protection!  For a protected-mode kernel, it
+ * would be faster to just zero those registers before returning.
+ *
+ * I can not rely on the glibc implementation. If you turn it off make
+ * sure that r11/r12 is saved in user-space. --KAA
+ *
+ * These are special variables using by the kernel trap/interrupt code
+ * to save registers in, at a time when there are no spare registers we
+ * can use to do so, and we can't depend on the value of the stack
+ * pointer.  This means that they must be within a signed 16-bit
+ * displacement of 0x00000000.
+ */
+
+/* A `state save frame' is a struct pt_regs preceded by some extra space
+ * suitable for a function call stack frame. */
+
+/* Amount of room on the stack reserved for arguments and to satisfy the
+ * C calling conventions, in addition to the space used by the struct
+ * pt_regs that actually holds saved values. */
+#define STATE_SAVE_ARG_SPACE	(6*4) /* Up to six arguments */
+
+#endif /* CONFIG_MMU */
+
 #endif /* _ASM_MICROBLAZE_ENTRY_H */
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
new file mode 100644
index 0000000..91a0e7b
--- /dev/null
+++ b/arch/microblaze/kernel/entry.S
@@ -0,0 +1,1116 @@
+/*
+ * Low-level system-call handling, trap handlers and context-switching
+ *
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2003		John Williams <jwilliams@itee.uq.edu.au>
+ * Copyright (C) 2001,2002	NEC Corporation
+ * Copyright (C) 2001,2002	Miles Bader <miles@gnu.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ * Written by Miles Bader <miles@gnu.org>
+ * Heavily modified by John Williams for Microblaze
+ */
+
+#include <linux/sys.h>
+#include <linux/linkage.h>
+
+#include <asm/entry.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/exceptions.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+#include <asm/page.h>
+#include <asm/unistd.h>
+
+#include <linux/errno.h>
+#include <asm/signal.h>
+
+/* The size of a state save frame. */
+#define STATE_SAVE_SIZE		(PT_SIZE + STATE_SAVE_ARG_SPACE)
+
+/* The offset of the struct pt_regs in a `state save frame' on the stack. */
+#define PTO	STATE_SAVE_ARG_SPACE /* 24 the space for args */
+
+#define C_ENTRY(name)	.globl name; .align 4; name
+
+/*
+ * Various ways of setting and clearing BIP in flags reg.
+ * This is mucky, but necessary using microblaze version that
+ * allows msr ops to write to BIP
+ */
+#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+	.macro	clear_bip
+	msrclr	r11, MSR_BIP
+	nop
+	.endm
+
+	.macro	set_bip
+	msrset	r11, MSR_BIP
+	nop
+	.endm
+
+	.macro	clear_eip
+	msrclr	r11, MSR_EIP
+	nop
+	.endm
+
+	.macro	set_ee
+	msrset	r11, MSR_EE
+	nop
+	.endm
+
+	.macro	disable_irq
+	msrclr	r11, MSR_IE
+	nop
+	.endm
+
+	.macro	enable_irq
+	msrset	r11, MSR_IE
+	nop
+	.endm
+
+	.macro	set_ums
+	msrset	r11, MSR_UMS
+	nop
+	msrclr	r11, MSR_VMS
+	nop
+	.endm
+
+	.macro	set_vms
+	msrclr	r11, MSR_UMS
+	nop
+	msrset	r11, MSR_VMS
+	nop
+	.endm
+
+	.macro	clear_vms_ums
+	msrclr	r11, MSR_VMS
+	nop
+	msrclr	r11, MSR_UMS
+	nop
+	.endm
+#else
+	.macro	clear_bip
+	mfs	r11, rmsr
+	nop
+	andi	r11, r11, ~MSR_BIP
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	set_bip
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_BIP
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	clear_eip
+	mfs	r11, rmsr
+	nop
+	andi	r11, r11, ~MSR_EIP
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	set_ee
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_EE
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	disable_irq
+	mfs	r11, rmsr
+	nop
+	andi	r11, r11, ~MSR_IE
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	enable_irq
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_IE
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro set_ums
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_VMS
+	andni	r11, r11, MSR_UMS
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	set_vms
+	mfs	r11, rmsr
+	nop
+	ori	r11, r11, MSR_VMS
+	andni	r11, r11, MSR_UMS
+	mts	rmsr, r11
+	nop
+	.endm
+
+	.macro	clear_vms_ums
+	mfs	r11, rmsr
+	nop
+	andni	r11, r11, (MSR_VMS|MSR_UMS)
+	mts	rmsr,r11
+	nop
+	.endm
+#endif
+
+/* Define how to call high-level functions. With MMU, virtual mode must be
+ * enabled when calling the high-level function. Clobbers R11.
+ * VM_ON, VM_OFF, DO_JUMP_BIPCLR, DO_CALL
+ */
+
+/* turn on virtual protected mode save */
+#define VM_ON		\
+	set_ums;		\
+	rted	r0, 2f;	\
+2: nop;
+
+/* turn off virtual protected mode save and user mode save*/
+#define VM_OFF			\
+	clear_vms_ums;			\
+	rted	r0, TOPHYS(1f);	\
+1: nop;
+
+#define SAVE_REGS \
+	swi	r2, r1, PTO+PT_R2;	/* Save SDA */			\
+	swi	r5, r1, PTO+PT_R5;					\
+	swi	r6, r1, PTO+PT_R6;					\
+	swi	r7, r1, PTO+PT_R7;					\
+	swi	r8, r1, PTO+PT_R8;					\
+	swi	r9, r1, PTO+PT_R9;					\
+	swi	r10, r1, PTO+PT_R10;					\
+	swi	r11, r1, PTO+PT_R11;	/* save clobbered regs after rval */\
+	swi	r12, r1, PTO+PT_R12;					\
+	swi	r13, r1, PTO+PT_R13;	/* Save SDA2 */			\
+	swi	r14, r1, PTO+PT_PC;	/* PC, before IRQ/trap */	\
+	swi	r15, r1, PTO+PT_R15;	/* Save LP */			\
+	swi	r18, r1, PTO+PT_R18;	/* Save asm scratch reg */	\
+	swi	r19, r1, PTO+PT_R19;					\
+	swi	r20, r1, PTO+PT_R20;					\
+	swi	r21, r1, PTO+PT_R21;					\
+	swi	r22, r1, PTO+PT_R22;					\
+	swi	r23, r1, PTO+PT_R23;					\
+	swi	r24, r1, PTO+PT_R24;					\
+	swi	r25, r1, PTO+PT_R25;					\
+	swi	r26, r1, PTO+PT_R26;					\
+	swi	r27, r1, PTO+PT_R27;					\
+	swi	r28, r1, PTO+PT_R28;					\
+	swi	r29, r1, PTO+PT_R29;					\
+	swi	r30, r1, PTO+PT_R30;					\
+	swi	r31, r1, PTO+PT_R31;	/* Save current task reg */	\
+	mfs	r11, rmsr;		/* save MSR */			\
+	nop;								\
+	swi	r11, r1, PTO+PT_MSR;
+
+#define RESTORE_REGS \
+	lwi	r11, r1, PTO+PT_MSR;					\
+	mts	rmsr , r11;						\
+	nop;								\
+	lwi	r2, r1, PTO+PT_R2;	/* restore SDA */		\
+	lwi	r5, r1, PTO+PT_R5;					\
+	lwi	r6, r1, PTO+PT_R6;					\
+	lwi	r7, r1, PTO+PT_R7;					\
+	lwi	r8, r1, PTO+PT_R8;					\
+	lwi	r9, r1, PTO+PT_R9;					\
+	lwi	r10, r1, PTO+PT_R10;					\
+	lwi	r11, r1, PTO+PT_R11;	/* restore clobbered regs after rval */\
+	lwi	r12, r1, PTO+PT_R12;					\
+	lwi	r13, r1, PTO+PT_R13;	/* restore SDA2 */		\
+	lwi	r14, r1, PTO+PT_PC;	/* RESTORE_LINK PC, before IRQ/trap */\
+	lwi	r15, r1, PTO+PT_R15;	/* restore LP */		\
+	lwi	r18, r1, PTO+PT_R18;	/* restore asm scratch reg */	\
+	lwi	r19, r1, PTO+PT_R19;					\
+	lwi	r20, r1, PTO+PT_R20;					\
+	lwi	r21, r1, PTO+PT_R21;					\
+	lwi	r22, r1, PTO+PT_R22;					\
+	lwi	r23, r1, PTO+PT_R23;					\
+	lwi	r24, r1, PTO+PT_R24;					\
+	lwi	r25, r1, PTO+PT_R25;					\
+	lwi	r26, r1, PTO+PT_R26;					\
+	lwi	r27, r1, PTO+PT_R27;					\
+	lwi	r28, r1, PTO+PT_R28;					\
+	lwi	r29, r1, PTO+PT_R29;					\
+	lwi	r30, r1, PTO+PT_R30;					\
+	lwi	r31, r1, PTO+PT_R31;	/* Restore cur task reg */
+
+.text
+
+/*
+ * User trap.
+ *
+ * System calls are handled here.
+ *
+ * Syscall protocol:
+ * Syscall number in r12, args in r5-r10
+ * Return value in r3
+ *
+ * Trap entered via brki instruction, so BIP bit is set, and interrupts
+ * are masked. This is nice, means we don't have to CLI before state save
+ */
+C_ENTRY(_user_exception):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	addi	r14, r14, 4	/* return address is 4 byte after call */
+	swi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11);	/* Save r11 */
+
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));/* See if already in kernel mode.*/
+	beqi	r11, 1f;		/* Jump ahead if coming from user */
+/* Kernel-mode state save. */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
+	tophys(r1,r11);
+	swi	r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */
+	SAVE_REGS
+
+	addi	r11, r0, 1; 		/* Was in kernel-mode. */
+	swi	r11, r1, PTO+PT_MODE; /* pt_regs -> kernel mode */
+	brid	2f;
+	nop;				/* Fill delay slot */
+
+/* User-mode state save.  */
+1:
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11);	/* restore r11 */
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
+	tophys(r1,r1);
+	lwi	r1, r1, TS_THREAD_INFO;	/* get stack from task_struct */
+/* calculate kernel stack pointer from task struct 8k */
+	addik	r1, r1, THREAD_SIZE;
+	tophys(r1,r1);
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
+	SAVE_REGS
+
+	swi	r0, r1, PTO+PT_MODE;			/* Was in user-mode. */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
+	swi	r11, r1, PTO+PT_R1;		/* Store user SP.  */
+	addi	r11, r0, 1;
+	swi	r11, r0, TOPHYS(PER_CPU(KM));	/* Now we're in kernel-mode.  */
+2:	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE));	/* get saved current */
+	/* Save away the syscall number.  */
+	swi	r12, r1, PTO+PT_R0;
+	tovirt(r1,r1)
+
+	la	r15, r0, ret_from_trap-8
+/* where the trap should return need -8 to adjust for rtsd r15, 8*/
+/* Jump to the appropriate function for the system call number in r12
+ * (r12 is not preserved), or return an error if r12 is not valid. The LP
+ * register should point to the location where
+ * the called function should return.  [note that MAKE_SYS_CALL uses label 1] */
+	/* See if the system call number is valid.  */
+	addi	r11, r12, -__NR_syscalls;
+	bgei	r11,1f;
+	/* Figure out which function to use for this system call.  */
+	/* Note Microblaze barrel shift is optional, so don't rely on it */
+	add	r12, r12, r12;			/* convert num -> ptr */
+	add	r12, r12, r12;
+
+	/* Trac syscalls and stored them to r0_ram */
+	lwi	r3, r12, 0x400 + TOPHYS(r0_ram)
+	addi	r3, r3, 1
+	swi	r3, r12, 0x400 + TOPHYS(r0_ram)
+
+	lwi	r12, r12, TOPHYS(sys_call_table); /* Function ptr */
+	/* Make the system call.  to r12*/
+	set_vms;
+	rtid	r12, 0;
+	nop;
+	/* The syscall number is invalid, return an error.  */
+1:	VM_ON;	/* RETURN() expects virtual mode*/
+	addi	r3, r0, -ENOSYS;
+	rtsd	r15,8;		/* looks like a normal subroutine return */
+	or 	r0, r0, r0
+
+
+/* Entry point used to return from a syscall/trap.  */
+/* We re-enable BIP bit before state restore */
+C_ENTRY(ret_from_trap):
+	set_bip;			/*  Ints masked for state restore*/
+	lwi	r11, r1, PTO+PT_MODE;
+/* See if returning to kernel mode, if so, skip resched &c.  */
+	bnei	r11, 2f;
+
+	/* We're returning to user mode, so check for various conditions that
+	 * trigger rescheduling. */
+	/* Get current task ptr into r11 */
+	add	r11, r0, CURRENT_TASK;	/* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;		/* get flags in thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f;
+
+	swi	r3, r1, PTO + PT_R3; /* store syscall result */
+	swi	r4, r1, PTO + PT_R4;
+	bralid	r15, schedule;	/* Call scheduler */
+	nop;				/* delay slot */
+	lwi	r3, r1, PTO + PT_R3; /* restore syscall result */
+	lwi	r4, r1, PTO + PT_R4;
+
+	/* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqi	r11, 1f;		/* Signals to handle, handle them */
+
+	swi	r3, r1, PTO + PT_R3; /* store syscall result */
+	swi	r4, r1, PTO + PT_R4;
+	la	r5, r1, PTO;		/* Arg 1: struct pt_regs *regs */
+	add	r6, r0, r0;		/* Arg 2: sigset_t *oldset */
+	addi	r7, r0, 1;		/* Arg 3: int in_syscall */
+	bralid	r15, do_signal;	/* Handle any signals */
+	nop;
+	lwi	r3, r1, PTO + PT_R3; /* restore syscall result */
+	lwi	r4, r1, PTO + PT_R4;
+
+/* Finally, return to user state.  */
+1:	swi	r0, r0, PER_CPU(KM);	/* Now officially in user state. */
+	add	r11, r0, CURRENT_TASK;	/* Get current task ptr into r11 */
+	swi	r11, r0, PER_CPU(CURRENT_SAVE); /* save current */
+	VM_OFF;
+	tophys(r1,r1);
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+	lwi	r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
+	bri	6f;
+
+/* Return to kernel state.  */
+2:	VM_OFF;
+	tophys(r1,r1);
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+	tovirt(r1,r1);
+6:
+TRAP_return:		/* Make global symbol for debugging */
+	rtbd	r14, 0;	/* Instructions to return from an IRQ */
+	nop;
+
+
+/* These syscalls need access to the struct pt_regs on the stack, so we
+   implement them in assembly (they're basically all wrappers anyway).  */
+
+C_ENTRY(sys_fork_wrapper):
+	addi	r5, r0, SIGCHLD			/* Arg 0: flags */
+	lwi	r6, r1, PTO+PT_R1	/* Arg 1: child SP (use parent's) */
+	la	r7, r1, PTO			/* Arg 2: parent context */
+	add	r8. r0, r0			/* Arg 3: (unused) */
+	add	r9, r0, r0;			/* Arg 4: (unused) */
+	add	r10, r0, r0;			/* Arg 5: (unused) */
+	brid	do_fork		/* Do real work (tail-call) */
+	nop;
+
+/* This the initial entry point for a new child thread, with an appropriate
+   stack in place that makes it look the the child is in the middle of an
+   syscall.  This function is actually `returned to' from switch_thread
+   (copy_thread makes ret_from_fork the return address in each new thread's
+   saved context).  */
+C_ENTRY(ret_from_fork):
+	bralid	r15, schedule_tail; /* ...which is schedule_tail's arg */
+	add	r3, r5, r0;	/* switch_thread returns the prev task */
+				/* ( in the delay slot ) */
+	add	r3, r0, r0;	/* Child's fork call should return 0. */
+	brid	ret_from_trap;	/* Do normal trap return */
+	nop;
+
+C_ENTRY(sys_vfork_wrapper):
+	la	r5, r1, PTO
+	brid	sys_vfork	/* Do real work (tail-call) */
+	nop
+
+C_ENTRY(sys_clone_wrapper):
+	bnei	r6, 1f;			/* See if child SP arg (arg 1) is 0. */
+	lwi	r6, r1, PTO+PT_R1;	/* If so, use paret's stack ptr */
+1:	la	r7, r1, PTO;			/* Arg 2: parent context */
+	add	r8, r0, r0;			/* Arg 3: (unused) */
+	add	r9, r0, r0;			/* Arg 4: (unused) */
+	add	r10, r0, r0;			/* Arg 5: (unused) */
+	brid	do_fork		/* Do real work (tail-call) */
+	nop;
+
+C_ENTRY(sys_execve_wrapper):
+	la	r8, r1, PTO;		/* add user context as 4th arg */
+	brid	sys_execve;	/* Do real work (tail-call).*/
+	nop;
+
+C_ENTRY(sys_sigsuspend_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r6, r1, PTO;		/* add user context as 2nd arg */
+	bralid	r15, sys_sigsuspend; /* Do real work.*/
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+C_ENTRY(sys_rt_sigsuspend_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r7, r1, PTO;		/* add user context as 3rd arg */
+	brlid	r15, sys_rt_sigsuspend;	/* Do real work.*/
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+
+C_ENTRY(sys_sigreturn_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r5, r1, PTO;		/* add user context as 1st arg */
+	brlid	r15, sys_sigreturn;	/* Do real work.*/
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+C_ENTRY(sys_rt_sigreturn_wrapper):
+	swi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	swi	r4, r1, PTO+PT_R4;
+	la	r5, r1, PTO;		/* add user context as 1st arg */
+	brlid	r15, sys_rt_sigreturn	/* Do real work */
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	bri ret_from_trap /* fall through will not work here due to align */
+	nop;
+
+/*
+ * HW EXCEPTION rutine start
+ */
+
+#define SAVE_STATE	\
+	swi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */	\
+	set_bip;	/*equalize initial state for all possible entries*/\
+	clear_eip;							\
+	enable_irq;							\
+	set_ee;								\
+	/* See if already in kernel mode.*/				\
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));				\
+	beqi	r11, 1f;		/* Jump ahead if coming from user */\
+	/* Kernel-mode state save.  */					\
+	/* Reload kernel stack-ptr. */					\
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));			\
+	tophys(r1,r11);							\
+	swi	r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */	\
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */\
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */\
+	/* store return registers separately because			\
+	 * this macros is use for others exceptions */			\
+	swi	r3, r1, PTO + PT_R3;					\
+	swi	r4, r1, PTO + PT_R4;					\
+	SAVE_REGS							\
+	/* PC, before IRQ/trap - this is one instruction above */	\
+	swi	r17, r1, PTO+PT_PC;					\
+									\
+	addi	r11, r0, 1; 		/* Was in kernel-mode.  */	\
+	swi	r11, r1, PTO+PT_MODE; 	 				\
+	brid	2f;							\
+	nop;				/* Fill delay slot */		\
+1:	/* User-mode state save.  */					\
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */\
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\
+	tophys(r1,r1);							\
+	lwi	r1, r1, TS_THREAD_INFO;	/* get the thread info */	\
+	addik	r1, r1, THREAD_SIZE;	/* calculate kernel stack pointer */\
+	tophys(r1,r1);							\
+									\
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */\
+	/* store return registers separately because this macros	\
+	 * is use for others exceptions */				\
+	swi	r3, r1, PTO + PT_R3; 					\
+	swi	r4, r1, PTO + PT_R4;					\
+	SAVE_REGS							\
+	/* PC, before IRQ/trap - this is one instruction above FIXME*/	\
+	swi	r17, r1, PTO+PT_PC;					\
+									\
+	swi	r0, r1, PTO+PT_MODE; /* Was in user-mode.  */		\
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));			\
+	swi	r11, r1, PTO+PT_R1; /* Store user SP.  */		\
+	addi	r11, r0, 1;						\
+	swi	r11, r0, TOPHYS(PER_CPU(KM)); /* Now we're in kernel-mode.*/\
+2:	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\
+	/* Save away the syscall number.  */				\
+	swi	r0, r1, PTO+PT_R0;					\
+	tovirt(r1,r1)
+
+C_ENTRY(full_exception_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	/* adjust exception address for privileged instruction
+	 * for finding where is it */
+	addik	r17, r17, -4
+	SAVE_STATE /* Save registers */
+	/* FIXME this can be store directly in PT_ESR reg.
+	 * I tested it but there is a fault */
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc - 8
+	la	r5, r1, PTO		 /* parameter struct pt_regs * regs */
+	mfs	r6, resr
+	nop
+	mfs	r7, rfsr;		/* save FSR */
+	nop
+	la	r12, r0, full_exception
+	set_vms;
+	rtbd	r12, 0;
+	nop;
+
+/*
+ * Unaligned data trap.
+ *
+ * Unaligned data trap last on 4k page is handled here.
+ *
+ * Trap entered via exception, so EE bit is set, and interrupts
+ * are masked.  This is nice, means we don't have to CLI before state save
+ *
+ * The assembler routine is in "arch/microblaze/kernel/hw_exception_handler.S"
+ */
+C_ENTRY(unaligned_data_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	SAVE_STATE		/* Save registers.*/
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc-8
+	mfs	r3, resr		/* ESR */
+	nop
+	mfs	r4, rear		/* EAR */
+	nop
+	la	r7, r1, PTO		/* parameter struct pt_regs * regs */
+	la	r12, r0, _unaligned_data_exception
+	set_vms;
+	rtbd	r12, 0;	/* interrupts enabled */
+	nop;
+
+/*
+ * Page fault traps.
+ *
+ * If the real exception handler (from hw_exception_handler.S) didn't find
+ * the mapping for the process, then we're thrown here to handle such situation.
+ *
+ * Trap entered via exceptions, so EE bit is set, and interrupts
+ * are masked.  This is nice, means we don't have to CLI before state save
+ *
+ * Build a standard exception frame for TLB Access errors.  All TLB exceptions
+ * will bail out to this point if they can't resolve the lightweight TLB fault.
+ *
+ * The C function called is in "arch/microblaze/mm/fault.c", declared as:
+ * void do_page_fault(struct pt_regs *regs,
+ *				unsigned long address,
+ *				unsigned long error_code)
+ */
+/* data and intruction trap - which is choose is resolved int fault.c */
+C_ENTRY(page_fault_data_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	SAVE_STATE		/* Save registers.*/
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc-8
+	la	r5, r1, PTO		/* parameter struct pt_regs * regs */
+	mfs	r6, rear		/* parameter unsigned long address */
+	nop
+	mfs	r7, resr		/* parameter unsigned long error_code */
+	nop
+	la	r12, r0, do_page_fault
+	set_vms;
+	rtbd	r12, 0;	/* interrupts enabled */
+	nop;
+
+C_ENTRY(page_fault_instr_trap):
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	SAVE_STATE		/* Save registers.*/
+	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
+	la	r15, r0, ret_from_exc-8
+	la	r5, r1, PTO		/* parameter struct pt_regs * regs */
+	mfs	r6, rear		/* parameter unsigned long address */
+	nop
+	ori	r7, r0, 0		/* parameter unsigned long error_code */
+	la	r12, r0, do_page_fault
+	set_vms;
+	rtbd	r12, 0;	/* interrupts enabled */
+	nop;
+
+/* Entry point used to return from an exception.  */
+C_ENTRY(ret_from_exc):
+	set_bip;			/*  Ints masked for state restore*/
+	lwi	r11, r1, PTO+PT_MODE;
+	bnei	r11, 2f;		/* See if returning to kernel mode, */
+					/* ... if so, skip resched &c.  */
+
+	/* We're returning to user mode, so check for various conditions that
+	   trigger rescheduling. */
+	/* Get current task ptr into r11 */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f;
+
+/* Call the scheduler before returning from a syscall/trap. */
+	bralid	r15, schedule;	/* Call scheduler */
+	nop;				/* delay slot */
+
+	/* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqi	r11, 1f;		/* Signals to handle, handle them */
+
+	/*
+	 * Handle a signal return; Pending signals should be in r18.
+	 *
+	 * Not all registers are saved by the normal trap/interrupt entry
+	 * points (for instance, call-saved registers (because the normal
+	 * C-compiler calling sequence in the kernel makes sure they're
+	 * preserved), and call-clobbered registers in the case of
+	 * traps), but signal handlers may want to examine or change the
+	 * complete register state.  Here we save anything not saved by
+	 * the normal entry sequence, so that it may be safely restored
+	 * (in a possibly modified form) after do_signal returns.
+	 * store return registers separately because this macros is use
+	 * for others exceptions */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	la	r5, r1, PTO;		/* Arg 1: struct pt_regs *regs */
+	add	r6, r0, r0;		/* Arg 2: sigset_t *oldset */
+	addi	r7, r0, 0;		/* Arg 3: int in_syscall */
+	bralid	r15, do_signal;	/* Handle any signals */
+	nop;
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+
+/* Finally, return to user state.  */
+1:	swi	r0, r0, PER_CPU(KM);	/* Now officially in user state. */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	swi	r11, r0, PER_CPU(CURRENT_SAVE); /* save current */
+	VM_OFF;
+	tophys(r1,r1);
+
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+	lwi	r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */
+	bri	6f;
+/* Return to kernel state.  */
+2:	VM_OFF;
+	tophys(r1,r1);
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+	tovirt(r1,r1);
+6:
+EXC_return:		/* Make global symbol for debugging */
+	rtbd	r14, 0;	/* Instructions to return from an IRQ */
+	nop;
+
+/*
+ * HW EXCEPTION rutine end
+ */
+
+/*
+ * Hardware maskable interrupts.
+ *
+ * The stack-pointer (r1) should have already been saved to the memory
+ * location PER_CPU(ENTRY_SP).
+ */
+C_ENTRY(_interrupt):
+/* MS: we are in physical address */
+/* Save registers, switch to proper stack, convert SP to virtual.*/
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
+	swi	r11, r0, TOPHYS(PER_CPU(R11_SAVE));
+	/* MS: See if already in kernel mode. */
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));
+	beqi	r11, 1f; /* MS: Jump ahead if coming from user */
+
+/* Kernel-mode state save. */
+	or	r11, r1, r0
+	tophys(r1,r11); /* MS: I have in r1 physical address where stack is */
+/* MS: Save original SP - position PT_R1 to next stack frame 4 *1 - 152*/
+	swi	r11, r1, (PT_R1 - PT_SIZE);
+/* MS: restore r11 because of saving in SAVE_REGS */
+	lwi	r11, r0, TOPHYS(PER_CPU(R11_SAVE));
+	/* save registers */
+/* MS: Make room on the stack -> activation record */
+	addik	r1, r1, -STATE_SAVE_SIZE;
+/* MS: store return registers separately because
+ * this macros is use for others exceptions */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	SAVE_REGS
+	/* MS: store mode */
+	addi	r11, r0, 1; /* MS: Was in kernel-mode. */
+	swi	r11, r1, PTO + PT_MODE; /* MS: and save it */
+	brid	2f;
+	nop; /* MS: Fill delay slot */
+
+1:
+/* User-mode state save. */
+/* MS: restore r11 -> FIXME move before SAVE_REG */
+	lwi	r11, r0, TOPHYS(PER_CPU(R11_SAVE));
+ /* MS: get the saved current */
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
+	tophys(r1,r1);
+	lwi	r1, r1, TS_THREAD_INFO;
+	addik	r1, r1, THREAD_SIZE;
+	tophys(r1,r1);
+	/* save registers */
+	addik	r1, r1, -STATE_SAVE_SIZE;
+	swi	r3, r1, PTO+PT_R3;
+	swi	r4, r1, PTO+PT_R4;
+	SAVE_REGS
+	/* calculate mode */
+	swi	r0, r1, PTO + PT_MODE;
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
+	swi	r11, r1, PTO+PT_R1;
+	/* setup kernel mode to KM */
+	addi	r11, r0, 1;
+	swi	r11, r0, TOPHYS(PER_CPU(KM));
+
+2:
+	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
+	swi	r0, r1, PTO + PT_R0;
+	tovirt(r1,r1)
+	la	r5, r1, PTO;
+	set_vms;
+	la	r11, r0, do_IRQ;
+	la	r15, r0, irq_call;
+irq_call:rtbd	r11, 0;
+	nop;
+
+/* MS: we are in virtual mode */
+ret_from_irq:
+	lwi	r11, r1, PTO + PT_MODE;
+	bnei	r11, 2f;
+
+	add	r11, r0, CURRENT_TASK;
+	lwi	r11, r11, TS_THREAD_INFO;
+	lwi	r11, r11, TI_FLAGS; /* MS: get flags from thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f
+	bralid	r15, schedule;
+	nop; /* delay slot */
+
+    /* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK;
+	lwi	r11, r11, TS_THREAD_INFO; /* MS: get thread info */
+	lwi	r11, r11, TI_FLAGS; /* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqid	r11, no_intr_resched
+/* Handle a signal return; Pending signals should be in r18. */
+	addi	r7, r0, 0; /* Arg 3: int in_syscall */
+	la	r5, r1, PTO; /* Arg 1: struct pt_regs *regs */
+	bralid	r15, do_signal;	/* Handle any signals */
+	add	r6, r0, r0; /* Arg 2: sigset_t *oldset */
+
+/* Finally, return to user state. */
+no_intr_resched:
+    /* Disable interrupts, we are now committed to the state restore */
+	disable_irq
+	swi	r0, r0, PER_CPU(KM); /* MS: Now officially in user state. */
+	add	r11, r0, CURRENT_TASK;
+	swi	r11, r0, PER_CPU(CURRENT_SAVE);
+	VM_OFF;
+	tophys(r1,r1);
+	lwi	r3, r1, PTO + PT_R3; /* MS: restore saved r3, r4 registers */
+	lwi	r4, r1, PTO + PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE /* MS: Clean up stack space. */
+	lwi	r1, r1, PT_R1 - PT_SIZE;
+	bri	6f;
+/* MS: Return to kernel state. */
+2:	VM_OFF /* MS: turn off MMU */
+	tophys(r1,r1)
+	lwi	r3, r1, PTO + PT_R3; /* MS: restore saved r3, r4 registers */
+	lwi	r4, r1, PTO + PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE	/* MS: Clean up stack space. */
+	tovirt(r1,r1);
+6:
+IRQ_return: /* MS: Make global symbol for debugging */
+	rtid	r14, 0
+	nop
+
+/*
+ * `Debug' trap
+ *  We enter dbtrap in "BIP" (breakpoint) mode.
+ *  So we exit the breakpoint mode with an 'rtbd' and proceed with the
+ *  original dbtrap.
+ *  however, wait to save state first
+ */
+C_ENTRY(_debug_exception):
+	/* BIP bit is set on entry, no interrupts can occur */
+	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
+
+	swi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* Save r11 */
+	set_bip;	/*equalize initial state for all possible entries*/
+	clear_eip;
+	enable_irq;
+	lwi	r11, r0, TOPHYS(PER_CPU(KM));/* See if already in kernel mode.*/
+	beqi	r11, 1f;		/* Jump ahead if coming from user */
+	/* Kernel-mode state save.  */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
+	tophys(r1,r11);
+	swi	r11, r1, (PT_R1-PT_SIZE); /* Save original SP. */
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	SAVE_REGS;
+
+	addi	r11, r0, 1; 		/* Was in kernel-mode.  */
+	swi	r11, r1, PTO + PT_MODE;
+	brid	2f;
+	nop;				/* Fill delay slot */
+1:      /* User-mode state save.  */
+	lwi	r11, r0, TOPHYS(r0_ram + PTO + PT_R11); /* restore r11 */
+	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
+	tophys(r1,r1);
+	lwi	r1, r1, TS_THREAD_INFO;	/* get the thread info */
+	addik	r1, r1, THREAD_SIZE;	/* calculate kernel stack pointer */
+	tophys(r1,r1);
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
+	swi	r3, r1, PTO + PT_R3;
+	swi	r4, r1, PTO + PT_R4;
+	SAVE_REGS;
+
+	swi	r0, r1, PTO+PT_MODE; /* Was in user-mode.  */
+	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
+	swi	r11, r1, PTO+PT_R1; /* Store user SP.  */
+	addi	r11, r0, 1;
+	swi	r11, r0, TOPHYS(PER_CPU(KM));	/* Now we're in kernel-mode.  */
+2:	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
+	/* Save away the syscall number.  */
+	swi	r0, r1, PTO+PT_R0;
+	tovirt(r1,r1)
+
+	addi	r5, r0, SIGTRAP		     /* send the trap signal */
+	add	r6, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	addk	r7, r0, r0		     /* 3rd param zero */
+
+	set_vms;
+	la	r11, r0, send_sig;
+	la	r15, r0, dbtrap_call;
+dbtrap_call:	rtbd	r11, 0;
+	nop;
+
+	set_bip;			/*  Ints masked for state restore*/
+	lwi	r11, r1, PTO+PT_MODE;
+	bnei	r11, 2f;
+
+	/* Get current task ptr into r11 */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_NEED_RESCHED;
+	beqi	r11, 5f;
+
+/* Call the scheduler before returning from a syscall/trap. */
+
+	bralid	r15, schedule;	/* Call scheduler */
+	nop;				/* delay slot */
+	/* XXX Is PT_DTRACE handling needed here? */
+	/* XXX m68knommu also checks TASK_STATE & TASK_COUNTER here.  */
+
+	/* Maybe handle a signal */
+5:	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	lwi	r11, r11, TS_THREAD_INFO;	/* get thread info */
+	lwi	r11, r11, TI_FLAGS;	/* get flags in thread info */
+	andi	r11, r11, _TIF_SIGPENDING;
+	beqi	r11, 1f;		/* Signals to handle, handle them */
+
+/* Handle a signal return; Pending signals should be in r18.  */
+	/* Not all registers are saved by the normal trap/interrupt entry
+	   points (for instance, call-saved registers (because the normal
+	   C-compiler calling sequence in the kernel makes sure they're
+	   preserved), and call-clobbered registers in the case of
+	   traps), but signal handlers may want to examine or change the
+	   complete register state.  Here we save anything not saved by
+	   the normal entry sequence, so that it may be safely restored
+	   (in a possibly modified form) after do_signal returns.  */
+
+	la	r5, r1, PTO;		/* Arg 1: struct pt_regs *regs */
+	add	r6, r0, r0;		/* Arg 2: sigset_t *oldset */
+	addi  r7, r0, 0;	/* Arg 3: int in_syscall */
+	bralid	r15, do_signal;	/* Handle any signals */
+	nop;
+
+
+/* Finally, return to user state.  */
+1:	swi	r0, r0, PER_CPU(KM);	/* Now officially in user state. */
+	add	r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */
+	swi	r11, r0, PER_CPU(CURRENT_SAVE); /* save current */
+	VM_OFF;
+	tophys(r1,r1);
+
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+
+	lwi	r1, r1, PT_R1 - PT_SIZE;
+					/* Restore user stack pointer. */
+	bri	6f;
+
+/* Return to kernel state.  */
+2:	VM_OFF;
+	tophys(r1,r1);
+	lwi	r3, r1, PTO+PT_R3; /* restore saved r3, r4 registers */
+	lwi	r4, r1, PTO+PT_R4;
+	RESTORE_REGS
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+
+	tovirt(r1,r1);
+6:
+DBTRAP_return:		/* Make global symbol for debugging */
+	rtbd	r14, 0;	/* Instructions to return from an IRQ */
+	nop;
+
+
+
+ENTRY(_switch_to)
+	/* prepare return value */
+	addk	r3, r0, r31
+
+	/* save registers in cpu_context */
+	/* use r11 and r12, volatile registers, as temp register */
+	/* give start of cpu_context for previous process */
+	addik	r11, r5, TI_CPU_CONTEXT
+	swi	r1, r11, CC_R1
+	swi	r2, r11, CC_R2
+	/* skip volatile registers.
+	 * they are saved on stack when we jumped to _switch_to() */
+	/* dedicated registers */
+	swi	r13, r11, CC_R13
+	swi	r14, r11, CC_R14
+	swi	r15, r11, CC_R15
+	swi	r16, r11, CC_R16
+	swi	r17, r11, CC_R17
+	swi	r18, r11, CC_R18
+	/* save non-volatile registers */
+	swi	r19, r11, CC_R19
+	swi	r20, r11, CC_R20
+	swi	r21, r11, CC_R21
+	swi	r22, r11, CC_R22
+	swi	r23, r11, CC_R23
+	swi	r24, r11, CC_R24
+	swi	r25, r11, CC_R25
+	swi	r26, r11, CC_R26
+	swi	r27, r11, CC_R27
+	swi	r28, r11, CC_R28
+	swi	r29, r11, CC_R29
+	swi	r30, r11, CC_R30
+	/* special purpose registers */
+	mfs	r12, rmsr
+	nop
+	swi	r12, r11, CC_MSR
+	mfs	r12, rear
+	nop
+	swi	r12, r11, CC_EAR
+	mfs	r12, resr
+	nop
+	swi	r12, r11, CC_ESR
+	mfs	r12, rfsr
+	nop
+	swi	r12, r11, CC_FSR
+
+	/* update r31, the current */
+	lwi	r31, r6, TI_TASK/* give me pointer to task which will be next */
+	/* stored it to current_save too */
+	swi	r31, r0, PER_CPU(CURRENT_SAVE)
+
+	/* get new process' cpu context and restore */
+	/* give me start where start context of next task */
+	addik	r11, r6, TI_CPU_CONTEXT
+
+	/* non-volatile registers */
+	lwi	r30, r11, CC_R30
+	lwi	r29, r11, CC_R29
+	lwi	r28, r11, CC_R28
+	lwi	r27, r11, CC_R27
+	lwi	r26, r11, CC_R26
+	lwi	r25, r11, CC_R25
+	lwi	r24, r11, CC_R24
+	lwi	r23, r11, CC_R23
+	lwi	r22, r11, CC_R22
+	lwi	r21, r11, CC_R21
+	lwi	r20, r11, CC_R20
+	lwi	r19, r11, CC_R19
+	/* dedicated registers */
+	lwi	r18, r11, CC_R18
+	lwi	r17, r11, CC_R17
+	lwi	r16, r11, CC_R16
+	lwi	r15, r11, CC_R15
+	lwi	r14, r11, CC_R14
+	lwi	r13, r11, CC_R13
+	/* skip volatile registers */
+	lwi	r2, r11, CC_R2
+	lwi	r1, r11, CC_R1
+
+	/* special purpose registers */
+	lwi	r12, r11, CC_FSR
+	mts	rfsr, r12
+	nop
+	lwi	r12, r11, CC_MSR
+	mts	rmsr, r12
+	nop
+
+	rtsd	r15, 8
+	nop
+
+ENTRY(_reset)
+	brai	0x70; /* Jump back to FS-boot */
+
+ENTRY(_break)
+	mfs	r5, rmsr
+	nop
+	swi	r5, r0, 0x250 + TOPHYS(r0_ram)
+	mfs	r5, resr
+	nop
+	swi	r5, r0, 0x254 + TOPHYS(r0_ram)
+	bri	0
+
+	/* These are compiled and loaded into high memory, then
+	 * copied into place in mach_early_setup */
+	.section	.init.ivt, "ax"
+	.org	0x0
+	/* this is very important - here is the reset vector */
+	/* in current MMU branch you don't care what is here - it is
+	 * used from bootloader site - but this is correct for FS-BOOT */
+	brai	0x70
+	nop
+	brai	TOPHYS(_user_exception); /* syscall handler */
+	brai	TOPHYS(_interrupt);	/* Interrupt handler */
+	brai	TOPHYS(_break);		/* nmi trap handler */
+	brai	TOPHYS(_hw_exception_handler);	/* HW exception handler */
+
+	.org	0x60
+	brai	TOPHYS(_debug_exception);	/* debug trap handler*/
+
+.section .rodata,"a"
+#include "syscall_table.S"
+
+syscall_table_size=(.-sys_call_table)
+
-- 
cgit v0.10.2


From 7db29dde731db02143418cfa008b7b77ccb2fa57 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:22 +0200
Subject: microblaze_mmu_v2: Update exception handling - MMU exception

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index cf9486d..9d591cd 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -53,6 +53,12 @@
  *   - Illegal instruction opcode
  *   - Divide-by-zero
  *
+ *   - Privileged instruction exception (MMU)
+ *   - Data storage exception (MMU)
+ *   - Instruction storage exception (MMU)
+ *   - Data TLB miss exception (MMU)
+ *   - Instruction TLB miss exception (MMU)
+ *
  * Note we disable interrupts during exception handling, otherwise we will
  * possibly get multiple re-entrancy if interrupt handles themselves cause
  * exceptions. JW
@@ -71,9 +77,24 @@
 #include <asm/asm-offsets.h>
 
 /* Helpful Macros */
+#ifndef CONFIG_MMU
 #define EX_HANDLER_STACK_SIZ	(4*19)
+#endif
 #define NUM_TO_REG(num)		r ## num
 
+#ifdef CONFIG_MMU
+/* FIXME you can't change first load of MSR because there is
+ * hardcoded jump bri 4 */
+	#define RESTORE_STATE			\
+		lwi	r3, r1, PT_R3;		\
+		lwi	r4, r1, PT_R4;		\
+		lwi	r5, r1, PT_R5;		\
+		lwi	r6, r1, PT_R6;		\
+		lwi	r11, r1, PT_R11;	\
+		lwi	r31, r1, PT_R31;	\
+		lwi	r1, r0, TOPHYS(r0_ram + 0);
+#endif /* CONFIG_MMU */
+
 #define LWREG_NOP			\
 	bri	ex_handler_unhandled;	\
 	nop;
@@ -106,6 +127,54 @@
 	or	r3, r0, NUM_TO_REG (regnum);		\
 	bri	ex_sw_tail;
 
+#ifdef CONFIG_MMU
+	#define R3_TO_LWREG_VM_V(regnum)		\
+		brid	ex_lw_end_vm;			\
+		swi	r3, r7, 4 * regnum;
+
+	#define R3_TO_LWREG_VM(regnum)			\
+		brid	ex_lw_end_vm;			\
+		or	NUM_TO_REG (regnum), r0, r3;
+
+	#define SWREG_TO_R3_VM_V(regnum)		\
+		brid	ex_sw_tail_vm;			\
+		lwi	r3, r7, 4 * regnum;
+
+	#define SWREG_TO_R3_VM(regnum)			\
+		brid	ex_sw_tail_vm;			\
+		or	r3, r0, NUM_TO_REG (regnum);
+
+	/* Shift right instruction depending on available configuration */
+	#if CONFIG_XILINX_MICROBLAZE0_USE_BARREL > 0
+	#define BSRLI(rD, rA, imm)	\
+		bsrli rD, rA, imm
+	#elif CONFIG_XILINX_MICROBLAZE0_USE_DIV > 0
+	#define BSRLI(rD, rA, imm)	\
+		ori rD, r0, (1 << imm);	\
+		idivu rD, rD, rA
+	#else
+	#define BSRLI(rD, rA, imm) BSRLI ## imm (rD, rA)
+	/* Only the used shift constants defined here - add more if needed */
+	#define BSRLI2(rD, rA)				\
+		srl rD, rA;		/* << 1 */	\
+		srl rD, rD;		/* << 2 */
+	#define BSRLI10(rD, rA)				\
+		srl rD, rA;		/* << 1 */	\
+		srl rD, rD;		/* << 2 */	\
+		srl rD, rD;		/* << 3 */	\
+		srl rD, rD;		/* << 4 */	\
+		srl rD, rD;		/* << 5 */	\
+		srl rD, rD;		/* << 6 */	\
+		srl rD, rD;		/* << 7 */	\
+		srl rD, rD;		/* << 8 */	\
+		srl rD, rD;		/* << 9 */	\
+		srl rD, rD		/* << 10 */
+	#define BSRLI20(rD, rA)		\
+		BSRLI10(rD, rA);	\
+		BSRLI10(rD, rD)
+	#endif
+#endif /* CONFIG_MMU */
+
 .extern other_exception_handler /* Defined in exception.c */
 
 /*
@@ -163,34 +232,119 @@
 
 /* wrappers to restore state before coming to entry.S */
 
+#ifdef CONFIG_MMU
+.section .rodata
+.align 4
+_MB_HW_ExceptionVectorTable:
+/*  0 - Undefined */
+	.long	TOPHYS(ex_handler_unhandled)
+/*  1 - Unaligned data access exception */
+	.long	TOPHYS(handle_unaligned_ex)
+/*  2 - Illegal op-code exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  3 - Instruction bus error exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  4 - Data bus error exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  5 - Divide by zero exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  6 - Floating point unit exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  7 - Privileged instruction exception */
+	.long	TOPHYS(full_exception_trapw)
+/*  8 - 15 - Undefined */
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+/* 16 - Data storage exception */
+	.long	TOPHYS(handle_data_storage_exception)
+/* 17 - Instruction storage exception */
+	.long	TOPHYS(handle_instruction_storage_exception)
+/* 18 - Data TLB miss exception */
+	.long	TOPHYS(handle_data_tlb_miss_exception)
+/* 19 - Instruction TLB miss exception */
+	.long	TOPHYS(handle_instruction_tlb_miss_exception)
+/* 20 - 31 - Undefined */
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+	.long	TOPHYS(ex_handler_unhandled)
+#endif
+
 .global _hw_exception_handler
 .section .text
 .align 4
 .ent _hw_exception_handler
 _hw_exception_handler:
+#ifndef CONFIG_MMU
 	addik	r1, r1, -(EX_HANDLER_STACK_SIZ); /* Create stack frame */
+#else
+	swi	r1, r0, TOPHYS(r0_ram + 0); /* GET_SP */
+	/* Save date to kernel memory. Here is the problem
+	 * when you came from user space */
+	ori	r1, r0, TOPHYS(r0_ram + 28);
+#endif
 	swi	r3, r1, PT_R3
 	swi	r4, r1, PT_R4
 	swi	r5, r1, PT_R5
 	swi	r6, r1, PT_R6
 
-	mfs	r5, rmsr;
-	nop
-	swi	r5, r1, 0;
-	mfs	r4, rbtr	/* Save BTR before jumping to handler */
-	nop
+#ifdef CONFIG_MMU
+	swi	r11, r1, PT_R11
+	swi	r31, r1, PT_R31
+	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
+#endif
+
 	mfs	r3, resr
 	nop
+	mfs	r4, rear;
+	nop
 
+#ifndef CONFIG_MMU
 	andi	r5, r3, 0x1000;		/* Check ESR[DS] */
 	beqi	r5, not_in_delay_slot;	/* Branch if ESR[DS] not set */
 	mfs	r17, rbtr;	/* ESR[DS] set - return address in BTR */
 	nop
 not_in_delay_slot:
 	swi	r17, r1, PT_R17
+#endif
 
 	andi	r5, r3, 0x1F;		/* Extract ESR[EXC] */
 
+#ifdef CONFIG_MMU
+	/* Calculate exception vector offset = r5 << 2 */
+	addk	r6, r5, r5; /* << 1 */
+	addk	r6, r6, r6; /* << 2 */
+
+/* counting which exception happen */
+	lwi	r5, r0, 0x200 + TOPHYS(r0_ram)
+	addi	r5, r5, 1
+	swi	r5, r0, 0x200 + TOPHYS(r0_ram)
+	lwi	r5, r6, 0x200 + TOPHYS(r0_ram)
+	addi	r5, r5, 1
+	swi	r5, r6, 0x200 + TOPHYS(r0_ram)
+/* end */
+	/* Load the HW Exception vector */
+	lwi	r6, r6, TOPHYS(_MB_HW_ExceptionVectorTable)
+	bra	r6
+
+full_exception_trapw:
+	RESTORE_STATE
+	bri	full_exception_trap
+#else
 	/* Exceptions enabled here. This will allow nested exceptions */
 	mfs	r6, rmsr;
 	nop
@@ -254,6 +408,7 @@ handle_other_ex: /* Handle Other exceptions here */
 	lwi	r18, r1, PT_R18
 
 	bri	ex_handler_done; /* Complete exception handling */
+#endif
 
 /* 0x01 - Unaligned data access exception
  * This occurs when a word access is not aligned on a word boundary,
@@ -265,11 +420,28 @@ handle_other_ex: /* Handle Other exceptions here */
 handle_unaligned_ex:
 	/* Working registers already saved: R3, R4, R5, R6
 	 *  R3 = ESR
-	 *  R4 = BTR
+	 *  R4 = EAR
 	 */
-	mfs	r4, rear;
+#ifdef CONFIG_MMU
+	andi	r6, r3, 0x1000			/* Check ESR[DS] */
+	beqi	r6, _no_delayslot		/* Branch if ESR[DS] not set */
+	mfs	r17, rbtr;	/* ESR[DS] set - return address in BTR */
 	nop
+_no_delayslot:
+#endif
+
+#ifdef CONFIG_MMU
+	/* Check if unaligned address is last on a 4k page */
+		andi	r5, r4, 0xffc
+		xori	r5, r5, 0xffc
+		bnei	r5, _unaligned_ex2
+	_unaligned_ex1:
+		RESTORE_STATE;
+/* Another page must be accessed or physical address not in page table */
+		bri	unaligned_data_trap
 
+	_unaligned_ex2:
+#endif
 	andi	r6, r3, 0x3E0; /* Mask and extract the register operand */
 	srl	r6, r6; /* r6 >> 5 */
 	srl	r6, r6;
@@ -278,6 +450,45 @@ handle_unaligned_ex:
 	srl	r6, r6;
 	/* Store the register operand in a temporary location */
 	sbi	r6, r0, TOPHYS(ex_reg_op);
+#ifdef CONFIG_MMU
+	/* Get physical address */
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	ori	r5, r0, CONFIG_KERNEL_START
+	cmpu	r5, r4, r5
+	bgti	r5, _unaligned_ex3
+	ori	r5, r0, swapper_pg_dir
+	bri	_unaligned_ex4
+
+	/* Get the PGD for the current thread. */
+_unaligned_ex3: /* user thread */
+	addi	r5 ,CURRENT_TASK, TOPHYS(0); /* get current task address */
+	lwi	r5, r5, TASK_THREAD + PGDIR
+_unaligned_ex4:
+	tophys(r5,r5)
+	BSRLI(r6,r4,20)			/* Create L1 (pgdir/pmd) address */
+	andi	r6, r6, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r5,r5,0xfffff003" */
+	or	r5, r5, r6
+	lwi	r6, r5, 0		/* Get L1 entry */
+	andi	r5, r6, 0xfffff000	/* Extract L2 (pte) base address. */
+	beqi	r5, _unaligned_ex1	/* Bail if no table */
+
+	tophys(r5,r5)
+	BSRLI(r6,r4,10)			/* Compute PTE address */
+	andi	r6, r6, 0xffc
+	andi	r5, r5, 0xfffff003
+	or	r5, r5, r6
+	lwi	r5, r5, 0		/* Get Linux PTE */
+
+	andi	r6, r5, _PAGE_PRESENT
+	beqi	r6, _unaligned_ex1	/* Bail if no page */
+
+	andi	r5, r5, 0xfffff000	/* Extract RPN */
+	andi	r4, r4, 0x00000fff	/* Extract offset */
+	or	r4, r4, r5		/* Create physical address */
+#endif /* CONFIG_MMU */
 
 	andi	r6, r3, 0x400; /* Extract ESR[S] */
 	bnei	r6, ex_sw;
@@ -355,6 +566,7 @@ ex_shw:
 ex_sw_end: /* Exception handling of store word, ends. */
 
 ex_handler_done:
+#ifndef CONFIG_MMU
 	lwi	r5, r1, 0 /* RMSR */
 	mts	rmsr, r5
 	nop
@@ -366,13 +578,455 @@ ex_handler_done:
 
 	rted	r17, 0
 	addik	r1, r1, (EX_HANDLER_STACK_SIZ); /* Restore stack frame */
+#else
+	RESTORE_STATE;
+	rted	r17, 0
+	nop
+#endif
+
+#ifdef CONFIG_MMU
+	/* Exception vector entry code. This code runs with address translation
+	 * turned off (i.e. using physical addresses). */
+
+	/* Exception vectors. */
+
+	/* 0x10 - Data Storage Exception
+	 * This happens for just a few reasons. U0 set (but we don't do that),
+	 * or zone protection fault (user violation, write to protected page).
+	 * If this is just an update of modified status, we do that quickly
+	 * and exit. Otherwise, we call heavyweight functions to do the work.
+	 */
+	handle_data_storage_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 * R3 = ESR
+		 */
+		mfs	r11, rpid
+		nop
+		bri	4
+		mfs	r3, rear		/* Get faulting address */
+		nop
+		/* If we are faulting a kernel address, we have to use the
+		 * kernel page tables.
+		 */
+		ori	r4, r0, CONFIG_KERNEL_START
+		cmpu	r4, r3, r4
+		bgti	r4, ex3
+		/* First, check if it was a zone fault (which means a user
+		 * tried to access a kernel or read-protected page - always
+		 * a SEGV). All other faults here must be stores, so no
+		 * need to check ESR_S as well. */
+		mfs	r4, resr
+		nop
+		andi	r4, r4, 0x800		/* ESR_Z - zone protection */
+		bnei	r4, ex2
+
+		ori	r4, r0, swapper_pg_dir
+		mts	rpid, r0		/* TLB will have 0 TID */
+		nop
+		bri	ex4
+
+		/* Get the PGD for the current thread. */
+	ex3:
+		/* First, check if it was a zone fault (which means a user
+		 * tried to access a kernel or read-protected page - always
+		 * a SEGV). All other faults here must be stores, so no
+		 * need to check ESR_S as well. */
+		mfs	r4, resr
+		nop
+		andi	r4, r4, 0x800		/* ESR_Z */
+		bnei	r4, ex2
+		/* get current task address */
+		addi	r4 ,CURRENT_TASK, TOPHYS(0);
+		lwi	r4, r4, TASK_THREAD+PGDIR
+	ex4:
+		tophys(r4,r4)
+		BSRLI(r5,r3,20)		/* Create L1 (pgdir/pmd) address */
+		andi	r5, r5, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
+		or	r4, r4, r5
+		lwi	r4, r4, 0		/* Get L1 entry */
+		andi	r5, r4, 0xfffff000 /* Extract L2 (pte) base address */
+		beqi	r5, ex2			/* Bail if no table */
+
+		tophys(r5,r5)
+		BSRLI(r6,r3,10)			/* Compute PTE address */
+		andi	r6, r6, 0xffc
+		andi	r5, r5, 0xfffff003
+		or	r5, r5, r6
+		lwi	r4, r5, 0		/* Get Linux PTE */
+
+		andi	r6, r4, _PAGE_RW	/* Is it writeable? */
+		beqi	r6, ex2			/* Bail if not */
+
+		/* Update 'changed' */
+		ori	r4, r4, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+		swi	r4, r5, 0		/* Update Linux page table */
+
+		/* Most of the Linux PTE is ready to load into the TLB LO.
+		 * We set ZSEL, where only the LS-bit determines user access.
+		 * We set execute, because we don't have the granularity to
+		 * properly set this at the page level (Linux problem).
+		 * If shared is set, we cause a zero PID->TID load.
+		 * Many of these bits are software only. Bits we don't set
+		 * here we (properly should) assume have the appropriate value.
+		 */
+		andni	r4, r4, 0x0ce2		/* Make sure 20, 21 are zero */
+		ori	r4, r4, _PAGE_HWEXEC	/* make it executable */
+
+		/* find the TLB index that caused the fault. It has to be here*/
+		mts	rtlbsx, r3
+		nop
+		mfs	r5, rtlbx		/* DEBUG: TBD */
+		nop
+		mts	rtlblo, r4		/* Load TLB LO */
+		nop
+						/* Will sync shadow TLBs */
+
+		/* Done...restore registers and get out of here. */
+		mts	rpid, r11
+		nop
+		bri 4
+
+		RESTORE_STATE;
+		rted	r17, 0
+		nop
+	ex2:
+		/* The bailout. Restore registers to pre-exception conditions
+		 * and call the heavyweights to help us out. */
+		mts	rpid, r11
+		nop
+		bri 4
+		RESTORE_STATE;
+		bri	page_fault_data_trap
+
+
+	/* 0x11 - Instruction Storage Exception
+	 * This is caused by a fetch from non-execute or guarded pages. */
+	handle_instruction_storage_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 * R3 = ESR
+		 */
+
+		mfs	r3, rear		/* Get faulting address */
+		nop
+		RESTORE_STATE;
+		bri	page_fault_instr_trap
+
+	/* 0x12 - Data TLB Miss Exception
+	 * As the name implies, translation is not in the MMU, so search the
+	 * page tables and fix it. The only purpose of this function is to
+	 * load TLB entries from the page table if they exist.
+	 */
+	handle_data_tlb_miss_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 * R3 = ESR
+		 */
+		mfs	r11, rpid
+		nop
+		bri	4
+		mfs	r3, rear		/* Get faulting address */
+		nop
+
+		/* If we are faulting a kernel address, we have to use the
+		 * kernel page tables. */
+		ori	r4, r0, CONFIG_KERNEL_START
+		cmpu	r4, r3, r4
+		bgti	r4, ex5
+		ori	r4, r0, swapper_pg_dir
+		mts	rpid, r0		/* TLB will have 0 TID */
+		nop
+		bri	ex6
 
+		/* Get the PGD for the current thread. */
+	ex5:
+		/* get current task address */
+		addi	r4 ,CURRENT_TASK, TOPHYS(0);
+		lwi	r4, r4, TASK_THREAD+PGDIR
+	ex6:
+		tophys(r4,r4)
+		BSRLI(r5,r3,20)		/* Create L1 (pgdir/pmd) address */
+		andi	r5, r5, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
+		or	r4, r4, r5
+		lwi	r4, r4, 0		/* Get L1 entry */
+		andi	r5, r4, 0xfffff000 /* Extract L2 (pte) base address */
+		beqi	r5, ex7			/* Bail if no table */
+
+		tophys(r5,r5)
+		BSRLI(r6,r3,10)			/* Compute PTE address */
+		andi	r6, r6, 0xffc
+		andi	r5, r5, 0xfffff003
+		or	r5, r5, r6
+		lwi	r4, r5, 0		/* Get Linux PTE */
+
+		andi	r6, r4, _PAGE_PRESENT
+		beqi	r6, ex7
+
+		ori	r4, r4, _PAGE_ACCESSED
+		swi	r4, r5, 0
+
+		/* Most of the Linux PTE is ready to load into the TLB LO.
+		 * We set ZSEL, where only the LS-bit determines user access.
+		 * We set execute, because we don't have the granularity to
+		 * properly set this at the page level (Linux problem).
+		 * If shared is set, we cause a zero PID->TID load.
+		 * Many of these bits are software only. Bits we don't set
+		 * here we (properly should) assume have the appropriate value.
+		 */
+		andni	r4, r4, 0x0ce2		/* Make sure 20, 21 are zero */
+
+		bri	finish_tlb_load
+	ex7:
+		/* The bailout. Restore registers to pre-exception conditions
+		 * and call the heavyweights to help us out.
+		 */
+		mts	rpid, r11
+		nop
+		bri	4
+		RESTORE_STATE;
+		bri	page_fault_data_trap
+
+	/* 0x13 - Instruction TLB Miss Exception
+	 * Nearly the same as above, except we get our information from
+	 * different registers and bailout to a different point.
+	 */
+	handle_instruction_tlb_miss_exception:
+		/* Working registers already saved: R3, R4, R5, R6
+		 *  R3 = ESR
+		 */
+		mfs	r11, rpid
+		nop
+		bri	4
+		mfs	r3, rear		/* Get faulting address */
+		nop
+
+		/* If we are faulting a kernel address, we have to use the
+		 * kernel page tables.
+		 */
+		ori	r4, r0, CONFIG_KERNEL_START
+		cmpu	r4, r3, r4
+		bgti	r4, ex8
+		ori	r4, r0, swapper_pg_dir
+		mts	rpid, r0		/* TLB will have 0 TID */
+		nop
+		bri	ex9
+
+		/* Get the PGD for the current thread. */
+	ex8:
+		/* get current task address */
+		addi	r4 ,CURRENT_TASK, TOPHYS(0);
+		lwi	r4, r4, TASK_THREAD+PGDIR
+	ex9:
+		tophys(r4,r4)
+		BSRLI(r5,r3,20)		/* Create L1 (pgdir/pmd) address */
+		andi	r5, r5, 0xffc
+/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
+		or	r4, r4, r5
+		lwi	r4, r4, 0		/* Get L1 entry */
+		andi	r5, r4, 0xfffff000 /* Extract L2 (pte) base address */
+		beqi	r5, ex10		/* Bail if no table */
+
+		tophys(r5,r5)
+		BSRLI(r6,r3,10)			/* Compute PTE address */
+		andi	r6, r6, 0xffc
+		andi	r5, r5, 0xfffff003
+		or	r5, r5, r6
+		lwi	r4, r5, 0		/* Get Linux PTE */
+
+		andi	r6, r4, _PAGE_PRESENT
+		beqi	r6, ex7
+
+		ori	r4, r4, _PAGE_ACCESSED
+		swi	r4, r5, 0
+
+		/* Most of the Linux PTE is ready to load into the TLB LO.
+		 * We set ZSEL, where only the LS-bit determines user access.
+		 * We set execute, because we don't have the granularity to
+		 * properly set this at the page level (Linux problem).
+		 * If shared is set, we cause a zero PID->TID load.
+		 * Many of these bits are software only. Bits we don't set
+		 * here we (properly should) assume have the appropriate value.
+		 */
+		andni	r4, r4, 0x0ce2		/* Make sure 20, 21 are zero */
+
+		bri	finish_tlb_load
+	ex10:
+		/* The bailout. Restore registers to pre-exception conditions
+		 * and call the heavyweights to help us out.
+		 */
+		mts	rpid, r11
+		nop
+		bri 4
+		RESTORE_STATE;
+		bri	page_fault_instr_trap
+
+/* Both the instruction and data TLB miss get to this point to load the TLB.
+ *	r3 - EA of fault
+ *	r4 - TLB LO (info from Linux PTE)
+ *	r5, r6 - available to use
+ *	PID - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ * A common place to load the TLB.
+ */
+	tlb_index:
+		.long	1 /* MS: storing last used tlb index */
+	finish_tlb_load:
+		/* MS: load the last used TLB index. */
+		lwi	r5, r0, TOPHYS(tlb_index)
+		addik	r5, r5, 1 /* MS: inc tlb_index -> use next one */
+
+/* MS: FIXME this is potential fault, because this is mask not count */
+		andi	r5, r5, (MICROBLAZE_TLB_SIZE-1)
+		ori	r6, r0, 1
+		cmp	r31, r5, r6
+		blti	r31, sem
+		addik	r5, r6, 1
+	sem:
+		/* MS: save back current TLB index */
+		swi	r5, r0, TOPHYS(tlb_index)
+
+		ori	r4, r4, _PAGE_HWEXEC	/* make it executable */
+		mts	rtlbx, r5		/* MS: save current TLB */
+		nop
+		mts	rtlblo,	r4		/* MS: save to TLB LO */
+		nop
+
+		/* Create EPN. This is the faulting address plus a static
+		 * set of bits. These are size, valid, E, U0, and ensure
+		 * bits 20 and 21 are zero.
+		 */
+		andi	r3, r3, 0xfffff000
+		ori	r3, r3, 0x0c0
+		mts	rtlbhi,	r3		/* Load TLB HI */
+		nop
+
+		/* Done...restore registers and get out of here. */
+	ex12:
+		mts	rpid, r11
+		nop
+		bri 4
+		RESTORE_STATE;
+		rted	r17, 0
+		nop
+
+	/* extern void giveup_fpu(struct task_struct *prev)
+	 *
+	 * The MicroBlaze processor may have an FPU, so this should not just
+	 * return: TBD.
+	 */
+	.globl giveup_fpu;
+	.align 4;
+	giveup_fpu:
+		bralid	r15,0			/* TBD */
+		nop
+
+	/* At present, this routine just hangs. - extern void abort(void) */
+	.globl abort;
+	.align 4;
+	abort:
+		br	r0
+
+	.globl set_context;
+	.align 4;
+	set_context:
+		mts	rpid, r5	/* Shadow TLBs are automatically */
+		nop
+		bri	4		/* flushed by changing PID */
+		rtsd	r15,8
+		nop
+
+#endif
 .end _hw_exception_handler
 
+#ifdef CONFIG_MMU
+/* Unaligned data access exception last on a 4k page for MMU.
+ * When this is called, we are in virtual mode with exceptions enabled
+ * and registers 1-13,15,17,18 saved.
+ *
+ * R3 = ESR
+ * R4 = EAR
+ * R7 = pointer to saved registers (struct pt_regs *regs)
+ *
+ * This handler perform the access, and returns via ret_from_exc.
+ */
+.global _unaligned_data_exception
+.ent _unaligned_data_exception
+_unaligned_data_exception:
+	andi	r8, r3, 0x3E0;	/* Mask and extract the register operand */
+	BSRLI(r8,r8,2);		/* r8 >> 2 = register operand * 8 */
+	andi	r6, r3, 0x400;	/* Extract ESR[S] */
+	bneid	r6, ex_sw_vm;
+	andi	r6, r3, 0x800;	/* Extract ESR[W] - delay slot */
+ex_lw_vm:
+	beqid	r6, ex_lhw_vm;
+	lbui	r5, r4, 0;	/* Exception address in r4 - delay slot */
+/* Load a word, byte-by-byte from destination address and save it in tmp space*/
+	la	r6, r0, ex_tmp_data_loc_0;
+	sbi	r5, r6, 0;
+	lbui	r5, r4, 1;
+	sbi	r5, r6, 1;
+	lbui	r5, r4, 2;
+	sbi	r5, r6, 2;
+	lbui	r5, r4, 3;
+	sbi	r5, r6, 3;
+	brid	ex_lw_tail_vm;
+/* Get the destination register value into r3 - delay slot */
+	lwi	r3, r6, 0;
+ex_lhw_vm:
+	/* Load a half-word, byte-by-byte from destination address and
+	 * save it in tmp space */
+	la	r6, r0, ex_tmp_data_loc_0;
+	sbi	r5, r6, 0;
+	lbui	r5, r4, 1;
+	sbi	r5, r6, 1;
+	lhui	r3, r6, 0;	/* Get the destination register value into r3 */
+ex_lw_tail_vm:
+	/* Form load_word jump table offset (lw_table_vm + (8 * regnum)) */
+	addik	r5, r8, lw_table_vm;
+	bra	r5;
+ex_lw_end_vm:			/* Exception handling of load word, ends */
+	brai	ret_from_exc;
+ex_sw_vm:
+/* Form store_word jump table offset (sw_table_vm + (8 * regnum)) */
+	addik	r5, r8, sw_table_vm;
+	bra	r5;
+ex_sw_tail_vm:
+	la	r5, r0, ex_tmp_data_loc_0;
+	beqid	r6, ex_shw_vm;
+	swi	r3, r5, 0;	/* Get the word - delay slot */
+	/* Store the word, byte-by-byte into destination address */
+	lbui	r3, r5, 0;
+	sbi	r3, r4, 0;
+	lbui	r3, r5, 1;
+	sbi	r3, r4, 1;
+	lbui	r3, r5, 2;
+	sbi	r3, r4, 2;
+	lbui	r3, r5, 3;
+	brid	ret_from_exc;
+	sbi	r3, r4, 3;	/* Delay slot */
+ex_shw_vm:
+	/* Store the lower half-word, byte-by-byte into destination address */
+	lbui	r3, r5, 2;
+	sbi	r3, r4, 0;
+	lbui	r3, r5, 3;
+	brid	ret_from_exc;
+	sbi	r3, r4, 1;	/* Delay slot */
+ex_sw_end_vm:			/* Exception handling of store word, ends. */
+.end _unaligned_data_exception
+#endif /* CONFIG_MMU */
+
 ex_handler_unhandled:
 /* FIXME add handle function for unhandled exception - dump register */
 	bri 0
 
+/*
+ * hw_exception_handler Jump Table
+ * - Contains code snippets for each register that caused the unalign exception
+ * - Hence exception handler is NOT self-modifying
+ * - Separate table for load exceptions and store exceptions.
+ * - Each table is of size: (8 * 32) = 256 bytes
+ */
+
 .section .text
 .align 4
 lw_table:
@@ -407,7 +1061,11 @@ lw_r27:		R3_TO_LWREG	(27);
 lw_r28:		R3_TO_LWREG	(28);
 lw_r29:		R3_TO_LWREG	(29);
 lw_r30:		R3_TO_LWREG	(30);
+#ifdef CONFIG_MMU
+lw_r31: 	R3_TO_LWREG_V	(31);
+#else
 lw_r31:		R3_TO_LWREG	(31);
+#endif
 
 sw_table:
 sw_r0:		SWREG_TO_R3	(0);
@@ -441,7 +1099,81 @@ sw_r27:		SWREG_TO_R3	(27);
 sw_r28:		SWREG_TO_R3	(28);
 sw_r29:		SWREG_TO_R3	(29);
 sw_r30:		SWREG_TO_R3	(30);
+#ifdef CONFIG_MMU
+sw_r31:		SWREG_TO_R3_V	(31);
+#else
 sw_r31:		SWREG_TO_R3	(31);
+#endif
+
+#ifdef CONFIG_MMU
+lw_table_vm:
+lw_r0_vm:	R3_TO_LWREG_VM		(0);
+lw_r1_vm:	R3_TO_LWREG_VM_V	(1);
+lw_r2_vm:	R3_TO_LWREG_VM_V	(2);
+lw_r3_vm:	R3_TO_LWREG_VM_V	(3);
+lw_r4_vm:	R3_TO_LWREG_VM_V	(4);
+lw_r5_vm:	R3_TO_LWREG_VM_V	(5);
+lw_r6_vm:	R3_TO_LWREG_VM_V	(6);
+lw_r7_vm:	R3_TO_LWREG_VM_V	(7);
+lw_r8_vm:	R3_TO_LWREG_VM_V	(8);
+lw_r9_vm:	R3_TO_LWREG_VM_V	(9);
+lw_r10_vm:	R3_TO_LWREG_VM_V	(10);
+lw_r11_vm:	R3_TO_LWREG_VM_V	(11);
+lw_r12_vm:	R3_TO_LWREG_VM_V	(12);
+lw_r13_vm:	R3_TO_LWREG_VM_V	(13);
+lw_r14_vm:	R3_TO_LWREG_VM		(14);
+lw_r15_vm:	R3_TO_LWREG_VM_V	(15);
+lw_r16_vm:	R3_TO_LWREG_VM		(16);
+lw_r17_vm:	R3_TO_LWREG_VM_V	(17);
+lw_r18_vm:	R3_TO_LWREG_VM_V	(18);
+lw_r19_vm:	R3_TO_LWREG_VM		(19);
+lw_r20_vm:	R3_TO_LWREG_VM		(20);
+lw_r21_vm:	R3_TO_LWREG_VM		(21);
+lw_r22_vm:	R3_TO_LWREG_VM		(22);
+lw_r23_vm:	R3_TO_LWREG_VM		(23);
+lw_r24_vm:	R3_TO_LWREG_VM		(24);
+lw_r25_vm:	R3_TO_LWREG_VM		(25);
+lw_r26_vm:	R3_TO_LWREG_VM		(26);
+lw_r27_vm:	R3_TO_LWREG_VM		(27);
+lw_r28_vm:	R3_TO_LWREG_VM		(28);
+lw_r29_vm:	R3_TO_LWREG_VM		(29);
+lw_r30_vm:	R3_TO_LWREG_VM		(30);
+lw_r31_vm:	R3_TO_LWREG_VM_V	(31);
+
+sw_table_vm:
+sw_r0_vm:	SWREG_TO_R3_VM		(0);
+sw_r1_vm:	SWREG_TO_R3_VM_V	(1);
+sw_r2_vm:	SWREG_TO_R3_VM_V	(2);
+sw_r3_vm:	SWREG_TO_R3_VM_V	(3);
+sw_r4_vm:	SWREG_TO_R3_VM_V	(4);
+sw_r5_vm:	SWREG_TO_R3_VM_V	(5);
+sw_r6_vm:	SWREG_TO_R3_VM_V	(6);
+sw_r7_vm:	SWREG_TO_R3_VM_V	(7);
+sw_r8_vm:	SWREG_TO_R3_VM_V	(8);
+sw_r9_vm:	SWREG_TO_R3_VM_V	(9);
+sw_r10_vm:	SWREG_TO_R3_VM_V	(10);
+sw_r11_vm:	SWREG_TO_R3_VM_V	(11);
+sw_r12_vm:	SWREG_TO_R3_VM_V	(12);
+sw_r13_vm:	SWREG_TO_R3_VM_V	(13);
+sw_r14_vm:	SWREG_TO_R3_VM		(14);
+sw_r15_vm:	SWREG_TO_R3_VM_V	(15);
+sw_r16_vm:	SWREG_TO_R3_VM		(16);
+sw_r17_vm:	SWREG_TO_R3_VM_V	(17);
+sw_r18_vm:	SWREG_TO_R3_VM_V	(18);
+sw_r19_vm:	SWREG_TO_R3_VM		(19);
+sw_r20_vm:	SWREG_TO_R3_VM		(20);
+sw_r21_vm:	SWREG_TO_R3_VM		(21);
+sw_r22_vm:	SWREG_TO_R3_VM		(22);
+sw_r23_vm:	SWREG_TO_R3_VM		(23);
+sw_r24_vm:	SWREG_TO_R3_VM		(24);
+sw_r25_vm:	SWREG_TO_R3_VM		(25);
+sw_r26_vm:	SWREG_TO_R3_VM		(26);
+sw_r27_vm:	SWREG_TO_R3_VM		(27);
+sw_r28_vm:	SWREG_TO_R3_VM		(28);
+sw_r29_vm:	SWREG_TO_R3_VM		(29);
+sw_r30_vm:	SWREG_TO_R3_VM		(30);
+sw_r31_vm:	SWREG_TO_R3_VM_V	(31);
+#endif /* CONFIG_MMU */
 
 /* Temporary data structures used in the handler */
 .section .data
-- 
cgit v0.10.2


From 0d6de9532663a4120ce35f507f16b72df382e360 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:23 +0200
Subject: microblaze_mmu_v2: uaccess MMU update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index a4e171d..65adad6 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -26,6 +28,10 @@
 #define VERIFY_READ	0
 #define VERIFY_WRITE	1
 
+#define __clear_user(addr, n)	(memset((void *)(addr), 0, (n)), 0)
+
+#ifndef CONFIG_MMU
+
 extern int ___range_ok(unsigned long addr, unsigned long size);
 
 #define __range_ok(addr, size) \
@@ -38,63 +44,64 @@ extern int ___range_ok(unsigned long addr, unsigned long size);
 extern int bad_user_access_length(void);
 
 /* FIXME this is function for optimalization -> memcpy */
-#define __get_user(var, ptr)					\
-	({							\
-		int __gu_err = 0;				\
-		switch (sizeof(*(ptr))) {			\
-		case 1:						\
-		case 2:						\
-		case 4:						\
-			(var) = *(ptr);				\
-			break;					\
-		case 8:						\
-			memcpy((void *) &(var), (ptr), 8);	\
-			break;					\
-		default:					\
-			(var) = 0;				\
-			__gu_err = __get_user_bad();		\
-			break;					\
-		}						\
-		__gu_err;					\
-	})
+#define __get_user(var, ptr)				\
+({							\
+	int __gu_err = 0;				\
+	switch (sizeof(*(ptr))) {			\
+	case 1:						\
+	case 2:						\
+	case 4:						\
+		(var) = *(ptr);				\
+		break;					\
+	case 8:						\
+		memcpy((void *) &(var), (ptr), 8);	\
+		break;					\
+	default:					\
+		(var) = 0;				\
+		__gu_err = __get_user_bad();		\
+		break;					\
+	}						\
+	__gu_err;					\
+})
 
 #define __get_user_bad()	(bad_user_access_length(), (-EFAULT))
 
+/* FIXME is not there defined __pu_val */
 #define __put_user(var, ptr)					\
-	({							\
-		int __pu_err = 0;				\
-		switch (sizeof(*(ptr))) {			\
-		case 1:						\
-		case 2:						\
-		case 4:						\
-			*(ptr) = (var);				\
-			break;					\
-		case 8: {					\
-			typeof(*(ptr)) __pu_val = var;		\
-			memcpy(ptr, &__pu_val, sizeof(__pu_val));\
-			}					\
-			break;					\
-		default:					\
-			__pu_err = __put_user_bad();		\
-			break;					\
-		}							\
-		__pu_err;						\
-	})
+({								\
+	int __pu_err = 0;					\
+	switch (sizeof(*(ptr))) {				\
+	case 1:							\
+	case 2:							\
+	case 4:							\
+		*(ptr) = (var);					\
+		break;						\
+	case 8: {						\
+		typeof(*(ptr)) __pu_val = (var);		\
+		memcpy(ptr, &__pu_val, sizeof(__pu_val));	\
+		}						\
+		break;						\
+	default:						\
+		__pu_err = __put_user_bad();			\
+		break;						\
+	}							\
+	__pu_err;						\
+})
 
 #define __put_user_bad()	(bad_user_access_length(), (-EFAULT))
 
-#define put_user(x, ptr)	__put_user(x, ptr)
-#define get_user(x, ptr)	__get_user(x, ptr)
-
-#define copy_to_user(to, from, n)		(memcpy(to, from, n), 0)
-#define copy_from_user(to, from, n)		(memcpy(to, from, n), 0)
+#define put_user(x, ptr)	__put_user((x), (ptr))
+#define get_user(x, ptr)	__get_user((x), (ptr))
 
-#define __copy_to_user(to, from, n)		(copy_to_user(to, from, n))
-#define __copy_from_user(to, from, n)		(copy_from_user(to, from, n))
-#define __copy_to_user_inatomic(to, from, n)	(__copy_to_user(to, from, n))
-#define __copy_from_user_inatomic(to, from, n)	(__copy_from_user(to, from, n))
+#define copy_to_user(to, from, n)	(memcpy((to), (from), (n)), 0)
+#define copy_from_user(to, from, n)	(memcpy((to), (from), (n)), 0)
 
-#define __clear_user(addr, n)	(memset((void *)addr, 0, n), 0)
+#define __copy_to_user(to, from, n)	(copy_to_user((to), (from), (n)))
+#define __copy_from_user(to, from, n)	(copy_from_user((to), (from), (n)))
+#define __copy_to_user_inatomic(to, from, n) \
+			(__copy_to_user((to), (from), (n)))
+#define __copy_from_user_inatomic(to, from, n) \
+			(__copy_from_user((to), (from), (n)))
 
 static inline unsigned long clear_user(void *addr, unsigned long size)
 {
@@ -103,13 +110,200 @@ static inline unsigned long clear_user(void *addr, unsigned long size)
 	return size;
 }
 
-/* Returns 0 if exception not found and fixup otherwise. */
+/* Returns 0 if exception not found and fixup otherwise.  */
 extern unsigned long search_exception_table(unsigned long);
 
+extern long strncpy_from_user(char *dst, const char *src, long count);
+extern long strnlen_user(const char *src, long count);
+
+#else /* CONFIG_MMU */
+
+/*
+ * Address is valid if:
+ *  - "addr", "addr + size" and "size" are all below the limit
+ */
+#define access_ok(type, addr, size) \
+	(get_fs().seg > (((unsigned long)(addr)) | \
+		(size) | ((unsigned long)(addr) + (size))))
+
+/* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",
+ type?"WRITE":"READ",addr,size,get_fs().seg)) */
+
+/*
+ * All the __XXX versions macros/functions below do not perform
+ * access checking. It is assumed that the necessary checks have been
+ * already performed before the finction (macro) is called.
+ */
+
+#define get_user(x, ptr)						\
+({									\
+	access_ok(VERIFY_READ, (ptr), sizeof(*(ptr)))			\
+		? __get_user((x), (ptr)) : -EFAULT;			\
+})
+
+#define put_user(x, ptr)						\
+({									\
+	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr)))			\
+		? __put_user((x), (ptr)) : -EFAULT;			\
+})
+
+#define __get_user(x, ptr)						\
+({									\
+	unsigned long __gu_val;						\
+	/*unsigned long __gu_ptr = (unsigned long)(ptr);*/		\
+	long __gu_err;							\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__get_user_asm("lbu", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 2:								\
+		__get_user_asm("lhu", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 4:								\
+		__get_user_asm("lw", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	default:							\
+		__gu_val = 0; __gu_err = -EINVAL;			\
+	}								\
+	x = (__typeof__(*(ptr))) __gu_val;				\
+	__gu_err;							\
+})
+
+#define __get_user_asm(insn, __gu_ptr, __gu_val, __gu_err)		\
+({									\
+	__asm__ __volatile__ (						\
+			"1:"	insn	" %1, %2, r0;			\
+				addk	%0, r0, r0;			\
+			2:						\
+			.section .fixup,\"ax\";				\
+			3:	brid	2b;				\
+				addik	%0, r0, %3;			\
+			.previous;					\
+			.section __ex_table,\"a\";			\
+			.word	1b,3b;					\
+			.previous;"					\
+		: "=r"(__gu_err), "=r"(__gu_val)			\
+		: "r"(__gu_ptr), "i"(-EFAULT)				\
+	);								\
+})
+
+#define __put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __gu_val = x;				\
+	long __gu_err = 0;						\
+	switch (sizeof(__gu_val)) {					\
+	case 1:								\
+		__put_user_asm("sb", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 2: 							\
+		__put_user_asm("sh", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 4:								\
+		__put_user_asm("sw", (ptr), __gu_val, __gu_err);	\
+		break;							\
+	case 8:								\
+		__put_user_asm_8((ptr), __gu_val, __gu_err);		\
+		break;							\
+	default:							\
+		__gu_err = -EINVAL;					\
+	}								\
+	__gu_err;							\
+})
+
+#define __put_user_asm_8(__gu_ptr, __gu_val, __gu_err)	\
+({							\
+__asm__ __volatile__ ("	lwi	%0, %1, 0;		\
+		1:	swi	%0, %2, 0;		\
+			lwi	%0, %1, 4;		\
+		2:	swi	%0, %2, 4;		\
+			addk	%0,r0,r0;		\
+		3:					\
+		.section .fixup,\"ax\";			\
+		4:	brid	3b;			\
+			addik	%0, r0, %3;		\
+		.previous;				\
+		.section __ex_table,\"a\";		\
+		.word	1b,4b,2b,4b;			\
+		.previous;"				\
+	: "=&r"(__gu_err)				\
+	: "r"(&__gu_val),				\
+	"r"(__gu_ptr), "i"(-EFAULT)			\
+	);						\
+})
+
+#define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err)	\
+({								\
+	__asm__ __volatile__ (					\
+			"1:"	insn	" %1, %2, r0;		\
+				addk	%0, r0, r0;		\
+			2:					\
+			.section .fixup,\"ax\";			\
+			3:	brid	2b;			\
+				addik	%0, r0, %3;		\
+			.previous;				\
+			.section __ex_table,\"a\";		\
+			.word	1b,3b;				\
+			.previous;"				\
+		: "=r"(__gu_err)				\
+		: "r"(__gu_val), "r"(__gu_ptr), "i"(-EFAULT)	\
+	);							\
+})
+
+/*
+ * Return: number of not copied bytes, i.e. 0 if OK or non-zero if fail.
+ */
+static inline int clear_user(char *to, int size)
+{
+	if (size && access_ok(VERIFY_WRITE, to, size)) {
+		__asm__ __volatile__ ("				\
+				1:				\
+					sb	r0, %2, r0;	\
+					addik	%0, %0, -1;	\
+					bneid	%0, 1b;		\
+					addik	%2, %2, 1;	\
+				2:				\
+				.section __ex_table,\"a\";	\
+				.word	1b,2b;			\
+				.section .text;"		\
+			: "=r"(size)				\
+			: "0"(size), "r"(to)
+		);
+	}
+	return size;
+}
+
+extern unsigned long __copy_tofrom_user(void __user *to,
+		const void __user *from, unsigned long size);
+
+#define copy_to_user(to, from, n)					\
+	(access_ok(VERIFY_WRITE, (to), (n)) ?				\
+		__copy_tofrom_user((void __user *)(to),			\
+			(__force const void __user *)(from), (n))	\
+		: -EFAULT)
+
+#define __copy_to_user(to, from, n)	copy_to_user((to), (from), (n))
+#define __copy_to_user_inatomic(to, from, n)	copy_to_user((to), (from), (n))
+
+#define copy_from_user(to, from, n)					\
+	(access_ok(VERIFY_READ, (from), (n)) ?				\
+		__copy_tofrom_user((__force void __user *)(to),		\
+			(void __user *)(from), (n))			\
+		: -EFAULT)
+
+#define __copy_from_user(to, from, n)	copy_from_user((to), (from), (n))
+#define __copy_from_user_inatomic(to, from, n) \
+		copy_from_user((to), (from), (n))
+
+extern int __strncpy_user(char *to, const char __user *from, int len);
+extern int __strnlen_user(const char __user *sstr, int len);
+
+#define strncpy_from_user(to, from, len)	\
+		(access_ok(VERIFY_READ, from, 1) ?	\
+			__strncpy_user(to, from, len) : -EFAULT)
+#define strnlen_user(str, len)	\
+		(access_ok(VERIFY_READ, str, 1) ? __strnlen_user(str, len) : 0)
 
-extern long strncpy_from_user(char *dst, const char __user *src, long count);
-extern long strnlen_user(const char __user *src, long count);
-extern long __strncpy_from_user(char *dst, const char __user *src, long count);
+#endif /* CONFIG_MMU */
 
 /*
  * The exception table consists of pairs of addresses: the first is the
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
new file mode 100644
index 0000000..67f991c
--- /dev/null
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2009 PetaLogix
+ * Copyright (C) 2007 LynuxWorks, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h>
+
+/*
+ * int __strncpy_user(char *to, char *from, int len);
+ *
+ * Returns:
+ *  -EFAULT  for an exception
+ *  len      if we hit the buffer limit
+ *  bytes copied
+ */
+
+	.text
+.globl __strncpy_user;
+.align 4;
+__strncpy_user:
+
+	/*
+	 * r5 - to
+	 * r6 - from
+	 * r7 - len
+	 * r3 - temp count
+	 * r4 - temp val
+	 */
+	addik	r3,r7,0		/* temp_count = len */
+	beqi	r3,3f
+1:
+	lbu	r4,r6,r0
+	sb	r4,r5,r0
+
+	addik	r3,r3,-1
+	beqi	r3,2f		/* break on len */
+
+	addik	r5,r5,1
+	bneid	r4,1b
+	addik	r6,r6,1		/* delay slot */
+	addik	r3,r3,1		/* undo "temp_count--" */
+2:
+	rsubk	r3,r3,r7	/* temp_count = len - temp_count */
+3:
+	rtsd	r15,8
+	nop
+
+
+	.section	.fixup, "ax"
+	.align	2
+4:
+	brid	3b
+	addik	r3,r0, -EFAULT
+
+	.section	__ex_table, "a"
+	.word	1b,4b
+
+/*
+ * int __strnlen_user(char __user *str, int maxlen);
+ *
+ * Returns:
+ *  0 on error
+ *  maxlen + 1  if no NUL byte found within maxlen bytes
+ *  size of the string (including NUL byte)
+ */
+
+	.text
+.globl __strnlen_user;
+.align 4;
+__strnlen_user:
+	addik	r3,r6,0
+	beqi	r3,3f
+1:
+	lbu	r4,r5,r0
+	beqid	r4,2f		/* break on NUL */
+	addik	r3,r3,-1	/* delay slot */
+
+	bneid	r3,1b
+	addik	r5,r5,1		/* delay slot */
+
+	addik	r3,r3,-1	/* for break on len */
+2:
+	rsubk	r3,r3,r6
+3:
+	rtsd	r15,8
+	nop
+
+
+	.section	.fixup,"ax"
+4:
+	brid	3b
+	addk	r3,r0,r0
+
+	.section	__ex_table,"a"
+	.word	1b,4b
+
+/*
+ * int __copy_tofrom_user(char *to, char *from, int len)
+ * Return:
+ *   0 on success
+ *   number of not copied bytes on error
+ */
+	.text
+.globl __copy_tofrom_user;
+.align 4;
+__copy_tofrom_user:
+	/*
+	 * r5 - to
+	 * r6 - from
+	 * r7, r3 - count
+	 * r4 - tempval
+	 */
+	addik	r3,r7,0
+	beqi	r3,3f
+1:
+	lbu	r4,r6,r0
+	addik	r6,r6,1
+2:
+	sb	r4,r5,r0
+	addik	r3,r3,-1
+	bneid	r3,1b
+	addik	r5,r5,1		/* delay slot */
+3:
+	rtsd	r15,8
+	nop
+
+
+	.section	__ex_table,"a"
+	.word	1b,3b,2b,3b
-- 
cgit v0.10.2


From 4bb73c3de752dc386958be32dc7e1d9fefdcbbf0 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:24 +0200
Subject: microblaze_mmu_v2: Add MMU related exceptions handling

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 4a8a406..0cb64a3 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -21,9 +21,9 @@
 
 #include <asm/exceptions.h>
 #include <asm/entry.h>		/* For KM CPU var */
-#include <asm/uaccess.h>
-#include <asm/errno.h>
-#include <asm/ptrace.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
 #include <asm/current.h>
 
 #define MICROBLAZE_ILL_OPCODE_EXCEPTION	0x02
@@ -31,7 +31,7 @@
 #define MICROBLAZE_DBUS_EXCEPTION	0x04
 #define MICROBLAZE_DIV_ZERO_EXCEPTION	0x05
 #define MICROBLAZE_FPU_EXCEPTION	0x06
-#define MICROBLAZE_PRIVILEG_EXCEPTION	0x07
+#define MICROBLAZE_PRIVILEGED_EXCEPTION	0x07
 
 static DEFINE_SPINLOCK(die_lock);
 
@@ -66,6 +66,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 							int fsr, int addr)
 {
+#ifdef CONFIG_MMU
+	int code;
+	addr = regs->pc;
+#endif
+
 #if 0
 	printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n",
 			type, user_mode(regs) ? "user" : "kernel", fsr,
@@ -74,7 +79,13 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 
 	switch (type & 0x1F) {
 	case MICROBLAZE_ILL_OPCODE_EXCEPTION:
-		_exception(SIGILL, regs, ILL_ILLOPC, addr);
+		if (user_mode(regs)) {
+			printk(KERN_WARNING "Illegal opcode exception in user mode.\n");
+			_exception(SIGILL, regs, ILL_ILLOPC, addr);
+			return;
+		}
+		printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n");
+		die("opcode exception", regs, SIGBUS);
 		break;
 	case MICROBLAZE_IBUS_EXCEPTION:
 		if (user_mode(regs)) {
@@ -95,11 +106,16 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 		die("bus exception", regs, SIGBUS);
 		break;
 	case MICROBLAZE_DIV_ZERO_EXCEPTION:
-		printk(KERN_WARNING "Divide by zero exception\n");
-		_exception(SIGILL, regs, ILL_ILLOPC, addr);
+		if (user_mode(regs)) {
+			printk(KERN_WARNING "Divide by zero exception in user mode\n");
+			_exception(SIGILL, regs, ILL_ILLOPC, addr);
+			return;
+		}
+		printk(KERN_WARNING "Divide by zero exception in kernel mode.\n");
+		die("Divide by exception", regs, SIGBUS);
 		break;
-
 	case MICROBLAZE_FPU_EXCEPTION:
+		printk(KERN_WARNING "FPU exception\n");
 		/* IEEE FP exception */
 		/* I removed fsr variable and use code var for storing fsr */
 		if (fsr & FSR_IO)
@@ -115,7 +131,20 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 		_exception(SIGFPE, regs, fsr, addr);
 		break;
 
+#ifdef CONFIG_MMU
+	case MICROBLAZE_PRIVILEGED_EXCEPTION:
+		printk(KERN_WARNING "Privileged exception\n");
+		/* "brk r0,r0" - used as debug breakpoint */
+		if (get_user(code, (unsigned long *)regs->pc) == 0
+			&& code == 0x980c0000) {
+			_exception(SIGTRAP, regs, TRAP_BRKPT, addr);
+		} else {
+			_exception(SIGILL, regs, ILL_PRVOPC, addr);
+		}
+		break;
+#endif
 	default:
+	/* FIXME what to do in unexpected exception */
 		printk(KERN_WARNING "Unexpected exception %02x "
 			"PC=%08x in %s mode\n", type, (unsigned int) addr,
 			kernel_mode(regs) ? "kernel" : "user");
-- 
cgit v0.10.2


From d4c1285ef0d6b005f4e7651ee1e7cf304b4e97dc Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:24 +0200
Subject: microblaze_mmu_v2: Update linker script for MMU

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 910f7c09..8ae807a 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -17,8 +17,7 @@ ENTRY(_start)
 jiffies = jiffies_64 + 4;
 
 SECTIONS {
-	. = CONFIG_KERNEL_BASE_ADDR;
-
+	. = CONFIG_KERNEL_START;
 	.text : {
 		_text = . ;
 		_stext = . ;
-- 
cgit v0.10.2


From 65504a47e02e4e6e58884376f4a700f83cc8234f Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:26 +0200
Subject: microblaze_mmu_v2: Enable fork syscall for MMU and add fork as vfork
 for noMMU

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S
index f24b126..1fce6b8 100644
--- a/arch/microblaze/kernel/entry-nommu.S
+++ b/arch/microblaze/kernel/entry-nommu.S
@@ -10,7 +10,7 @@
 
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
-#include <asm/errno.h>
+#include <linux/errno.h>
 #include <asm/entry.h>
 #include <asm/asm-offsets.h>
 #include <asm/registers.h>
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 3bb42ec..376d178 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -2,7 +2,11 @@ ENTRY(sys_call_table)
 	.long sys_restart_syscall	/* 0 - old "setup()" system call,
 					 * used for restarting */
 	.long sys_exit
-	.long sys_ni_syscall		/* was fork */
+#ifdef CONFIG_MMU
+	.long sys_fork_wrapper
+#else
+	.long sys_ni_syscall
+#endif
 	.long sys_read
 	.long sys_write
 	.long sys_open			/* 5 */
-- 
cgit v0.10.2


From 17f3324c3eb271882b9e6b8fc3b1698290121801 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:27 +0200
Subject: microblaze_mmu_v2: Traps MMU update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/exceptions.h b/arch/microblaze/include/asm/exceptions.h
index 24ca540..90731df 100644
--- a/arch/microblaze/include/asm/exceptions.h
+++ b/arch/microblaze/include/asm/exceptions.h
@@ -1,8 +1,8 @@
 /*
  * Preliminary support for HW exception handing for Microblaze
  *
- * Copyright (C) 2008 Michal Simek
- * Copyright (C) 2008 PetaLogix
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2005 John Williams <jwilliams@itee.uq.edu.au>
  *
  * This file is subject to the terms and conditions of the GNU General
@@ -64,21 +64,13 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 void die(const char *str, struct pt_regs *fp, long err);
 void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr);
 
-#if defined(CONFIG_XMON)
-extern void xmon(struct pt_regs *regs);
-extern int xmon_bpt(struct pt_regs *regs);
-extern int xmon_sstep(struct pt_regs *regs);
-extern int xmon_iabr_match(struct pt_regs *regs);
-extern int xmon_dabr_match(struct pt_regs *regs);
-extern void (*xmon_fault_handler)(struct pt_regs *regs);
+#ifdef CONFIG_MMU
+void __bug(const char *file, int line, void *data);
+int bad_trap(int trap_num, struct pt_regs *regs);
+int debug_trap(struct pt_regs *regs);
+#endif /* CONFIG_MMU */
 
-void (*debugger)(struct pt_regs *regs) = xmon;
-int (*debugger_bpt)(struct pt_regs *regs) = xmon_bpt;
-int (*debugger_sstep)(struct pt_regs *regs) = xmon_sstep;
-int (*debugger_iabr_match)(struct pt_regs *regs) = xmon_iabr_match;
-int (*debugger_dabr_match)(struct pt_regs *regs) = xmon_dabr_match;
-void (*debugger_fault_handler)(struct pt_regs *regs);
-#elif defined(CONFIG_KGDB)
+#if defined(CONFIG_KGDB)
 void (*debugger)(struct pt_regs *regs);
 int (*debugger_bpt)(struct pt_regs *regs);
 int (*debugger_sstep)(struct pt_regs *regs);
diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c
index 6322cc1..eaaaf80 100644
--- a/arch/microblaze/kernel/traps.c
+++ b/arch/microblaze/kernel/traps.c
@@ -97,3 +97,37 @@ void dump_stack(void)
 	show_stack(NULL, NULL);
 }
 EXPORT_SYMBOL(dump_stack);
+
+#ifdef CONFIG_MMU
+void __bug(const char *file, int line, void *data)
+{
+	if (data)
+		printk(KERN_CRIT "kernel BUG at %s:%d (data = %p)!\n",
+			file, line, data);
+	else
+		printk(KERN_CRIT "kernel BUG at %s:%d!\n", file, line);
+
+	machine_halt();
+}
+
+int bad_trap(int trap_num, struct pt_regs *regs)
+{
+	printk(KERN_CRIT
+		"unimplemented trap %d called at 0x%08lx, pid %d!\n",
+		trap_num, regs->pc, current->pid);
+	return -ENOSYS;
+}
+
+int debug_trap(struct pt_regs *regs)
+{
+	int i;
+	printk(KERN_CRIT "debug trap\n");
+	for (i = 0; i < 32; i++) {
+		/* printk("r%i:%08X\t",i,regs->gpr[i]); */
+		if ((i % 4) == 3)
+			printk(KERN_CRIT "\n");
+	}
+	printk(KERN_CRIT "pc:%08lX\tmsr:%08lX\n", regs->pc, regs->msr);
+	return -ENOSYS;
+}
+#endif
-- 
cgit v0.10.2


From 8b28626a6b1522b39f75d0bf80d5dec23c931f5a Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:28 +0200
Subject: microblaze_mmu_v2: Update signal returning address

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 3cdcbfe..4c0e652 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -388,7 +388,15 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
 	case -ERESTARTNOINTR:
 do_restart:
 		/* offset of 4 bytes to re-execute trap (brki) instruction */
+#ifndef CONFIG_MMU
 		regs->pc -= 4;
+#else
+		/* offset of 8 bytes required = 4 for rtbd
+		   offset, plus 4 for size of
+			"brki r14,8"
+		   instruction. */
+		regs->pc -= 8;
+#endif
 		break;
 	}
 }
-- 
cgit v0.10.2


From 46fb9be93b15bd8315622ad2f85f9516c064a785 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:28 +0200
Subject: microblaze_mmu_v2: Update cacheflush.h

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h
index 3300b78..f989d6a 100644
--- a/arch/microblaze/include/asm/cacheflush.h
+++ b/arch/microblaze/include/asm/cacheflush.h
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2007 PetaLogix
+ * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2007-2009 PetaLogix
  * Copyright (C) 2007 John Williams <john.williams@petalogix.com>
  * based on v850 version which was
  * Copyright (C) 2001,02,03 NEC Electronics Corporation
@@ -43,6 +44,23 @@
 #define flush_icache_range(start, len)	__invalidate_icache_range(start, len)
 #define flush_icache_page(vma, pg)		do { } while (0)
 
+#ifndef CONFIG_MMU
+# define flush_icache_user_range(start, len)	do { } while (0)
+#else
+# define flush_icache_user_range(vma, pg, adr, len) __invalidate_icache_all()
+
+# define flush_page_to_ram(page)		do { } while (0)
+
+# define flush_icache()			__invalidate_icache_all()
+# define flush_cache_sigtramp(vaddr) \
+			__invalidate_icache_range(vaddr, vaddr + 8)
+
+# define flush_dcache_mmap_lock(mapping)	do { } while (0)
+# define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+# define flush_cache_dup_mm(mm)			do { } while (0)
+#endif
+
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
-- 
cgit v0.10.2


From dcffc1b29115cc26686b8ae62b587e63f0e139f0 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:29 +0200
Subject: microblaze_mmu_v2: Update dma.h for MMU

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/dma.h b/arch/microblaze/include/asm/dma.h
index 0967fa0..08c073b 100644
--- a/arch/microblaze/include/asm/dma.h
+++ b/arch/microblaze/include/asm/dma.h
@@ -9,8 +9,13 @@
 #ifndef _ASM_MICROBLAZE_DMA_H
 #define _ASM_MICROBLAZE_DMA_H
 
+#ifndef CONFIG_MMU
 /* we don't have dma address limit. define it as zero to be
  * unlimited. */
 #define MAX_DMA_ADDRESS		(0)
+#else
+/* Virtual address corresponding to last available physical memory address.  */
+#define MAX_DMA_ADDRESS (CONFIG_KERNEL_START + memory_size - 1)
+#endif
 
 #endif /* _ASM_MICROBLAZE_DMA_H */
-- 
cgit v0.10.2


From e57a221f15fd62921d845ddb37ce048fc7a38fd6 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:30 +0200
Subject: microblaze_mmu_v2: Elf update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/elf.h b/arch/microblaze/include/asm/elf.h
index 81337f2..f92fc0d 100644
--- a/arch/microblaze/include/asm/elf.h
+++ b/arch/microblaze/include/asm/elf.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
  * Copyright (C) 2006 Atmark Techno, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -27,4 +29,95 @@
  */
 #define ELF_CLASS	ELFCLASS32
 
+#ifndef __uClinux__
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/byteorder.h>
+
+#ifndef ELF_GREG_T
+#define ELF_GREG_T
+typedef unsigned long elf_greg_t;
+#endif
+
+#ifndef ELF_NGREG
+#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
+#endif
+
+#ifndef ELF_GREGSET_T
+#define ELF_GREGSET_T
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+#endif
+
+#ifndef ELF_FPREGSET_T
+#define ELF_FPREGSET_T
+
+/* TBD */
+#define ELF_NFPREG	33	/* includes fsr */
+typedef unsigned long elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+/* typedef struct user_fpu_struct elf_fpregset_t; */
+#endif
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+
+#define ELF_ET_DYN_BASE         (0x08000000)
+
+#ifdef __LITTLE_ENDIAN__
+#define ELF_DATA	ELFDATA2LSB
+#else
+#define ELF_DATA	ELFDATA2MSB
+#endif
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE	4096
+
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)			\
+	memcpy((char *) &_dest, (char *) _regs,		\
+	sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+ * instruction set this CPU supports.  This could be done in user space,
+ * but it's not easy, and we've already done it here.
+ */
+#define ELF_HWCAP	(0)
+
+/* This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+
+ * For the moment, we have only optimizations for the Intel generations,
+ * but that could change...
+ */
+#define ELF_PLATFORM  (NULL)
+
+/* Added _f parameter. Is this definition correct: TBD */
+#define ELF_PLAT_INIT(_r, _f)				\
+do {							\
+	_r->r1 =  _r->r1 =  _r->r2 =  _r->r3 =		\
+	_r->r4 =  _r->r5 =  _r->r6 =  _r->r7 =		\
+	_r->r8 =  _r->r9 =  _r->r10 = _r->r11 =		\
+	_r->r12 = _r->r13 = _r->r14 = _r->r15 =		\
+	_r->r16 = _r->r17 = _r->r18 = _r->r19 =		\
+	_r->r20 = _r->r21 = _r->r22 = _r->r23 =		\
+	_r->r24 = _r->r25 = _r->r26 = _r->r27 =		\
+	_r->r28 = _r->r29 = _r->r30 = _r->r31 =		\
+	0;						\
+} while (0)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex) set_personality(PER_LINUX_32BIT)
+#endif
+
+#endif /* __uClinux__ */
+
 #endif /* _ASM_MICROBLAZE_ELF_H */
-- 
cgit v0.10.2


From 8b3f7d5c0ba7600222744588126388b225c61f18 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:31 +0200
Subject: microblaze_mmu_v2: stat.h MMU update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/stat.h b/arch/microblaze/include/asm/stat.h
index 5f18b8a..a15f775 100644
--- a/arch/microblaze/include/asm/stat.h
+++ b/arch/microblaze/include/asm/stat.h
@@ -16,58 +16,53 @@
 
 #include <linux/posix_types.h>
 
+#define STAT_HAVE_NSEC 1
+
 struct stat {
-	unsigned int	st_dev;
+	unsigned long	st_dev;
 	unsigned long	st_ino;
 	unsigned int	st_mode;
 	unsigned int	st_nlink;
 	unsigned int	st_uid;
 	unsigned int	st_gid;
-	unsigned int	st_rdev;
-	unsigned long	st_size;
-	unsigned long	st_blksize;
-	unsigned long	st_blocks;
-	unsigned long	st_atime;
-	unsigned long	__unused1; /* unsigned long  st_atime_nsec */
-	unsigned long	st_mtime;
-	unsigned long	__unused2; /* unsigned long  st_mtime_nsec */
-	unsigned long	st_ctime;
-	unsigned long	__unused3; /* unsigned long  st_ctime_nsec */
+	unsigned long	st_rdev;
+	unsigned long	__pad1;
+	long		st_size;
+	int		st_blksize;
+	int		__pad2;
+	long		st_blocks;
+	int		st_atime;
+	unsigned int	st_atime_nsec;
+	int		st_mtime;
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;
+	unsigned int	st_ctime_nsec;
 	unsigned long	__unused4;
 	unsigned long	__unused5;
 };
 
 struct stat64 {
-	unsigned long long	st_dev;
-	unsigned long	__unused1;
-
-	unsigned long long	st_ino;
-
-	unsigned int	st_mode;
-	unsigned int	st_nlink;
-
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-
-	unsigned long long	st_rdev;
-	unsigned long	__unused3;
-
-	long long	st_size;
-	unsigned long	st_blksize;
-
-	unsigned long	st_blocks; /* No. of 512-byte blocks allocated */
-	unsigned long	__unused4; /* future possible st_blocks high bits */
-
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-
-	unsigned long	__unused8;
+	unsigned long long	st_dev;		/* Device.  */
+	unsigned long long	st_ino;		/* File serial number.  */
+	unsigned int		st_mode;	/* File mode.  */
+	unsigned int		st_nlink;	/* Link count.  */
+	unsigned int		st_uid;		/* User ID of the file's owner.  */
+	unsigned int		st_gid;		/* Group ID of the file's group. */
+	unsigned long long	st_rdev;	/* Device number, if device.  */
+	unsigned long long	__pad1;
+	long long		st_size;	/* Size of file, in bytes.  */
+	int			st_blksize;	/* Optimal block size for I/O.  */
+	int			__pad2;
+	long long		st_blocks;	/* Number 512-byte blocks allocated. */
+	int			st_atime;	/* Time of last access.  */
+	unsigned int		st_atime_nsec;
+	int			st_mtime;	/* Time of last modification.  */
+	unsigned int		st_mtime_nsec;
+	int			st_ctime;	/* Time of last status change.  */
+	unsigned int		st_ctime_nsec;
+	unsigned int		__unused4;
+	unsigned int		__unused5;
 };
 
 #endif /* _ASM_MICROBLAZE_STAT_H */
+
-- 
cgit v0.10.2


From a116f6d5db4476d0c941b495a6122b0130bbf20b Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:31 +0200
Subject: microblaze_mmu_v2: Kconfig update

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index f8e0722..b50b845 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -54,11 +54,9 @@ config GENERIC_GPIO
 	def_bool y
 
 config PCI
-	depends on !MMU
 	def_bool n
 
 config NO_DMA
-	depends on !MMU
 	def_bool y
 
 source "init/Kconfig"
@@ -76,7 +74,8 @@ source "kernel/Kconfig.preempt"
 source "kernel/Kconfig.hz"
 
 config MMU
-	def_bool n
+	bool "MMU support"
+	default n
 
 config NO_MMU
 	bool
@@ -119,6 +118,113 @@ config PROC_DEVICETREE
 
 endmenu
 
+menu "Advanced setup"
+
+config ADVANCED_OPTIONS
+	bool "Prompt for advanced kernel configuration options"
+	depends on MMU
+	help
+	  This option will enable prompting for a variety of advanced kernel
+	  configuration options.  These options can cause the kernel to not
+	  work if they are set incorrectly, but can be used to optimize certain
+	  aspects of kernel memory management.
+
+	  Unless you know what you are doing, say N here.
+
+comment "Default settings for advanced configuration options are used"
+	depends on !ADVANCED_OPTIONS
+
+config HIGHMEM_START_BOOL
+	bool "Set high memory pool address"
+	depends on ADVANCED_OPTIONS && HIGHMEM
+	help
+	  This option allows you to set the base address of the kernel virtual
+	  area used to map high memory pages.  This can be useful in
+	  optimizing the layout of kernel virtual memory.
+
+	  Say N here unless you know what you are doing.
+
+config HIGHMEM_START
+	hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL
+	depends on MMU
+	default "0xfe000000"
+
+config LOWMEM_SIZE_BOOL
+	bool "Set maximum low memory"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the maximum amount of memory which
+	  will be used as "low memory", that is, memory which the kernel can
+	  access directly, without having to set up a kernel virtual mapping.
+	  This can be useful in optimizing the layout of kernel virtual
+	  memory.
+
+	  Say N here unless you know what you are doing.
+
+config LOWMEM_SIZE
+	hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
+	depends on MMU
+	default "0x30000000"
+
+config KERNEL_START_BOOL
+	bool "Set custom kernel base address"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the kernel virtual address at which
+	  the kernel will map low memory (the kernel image will be linked at
+	  this address).  This can be useful in optimizing the virtual memory
+	  layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config KERNEL_START
+	hex "Virtual address of kernel base" if KERNEL_START_BOOL
+	default "0xc0000000" if MMU
+	default KERNEL_BASE_ADDR if !MMU
+
+config TASK_SIZE_BOOL
+	bool "Set custom user task size"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the amount of virtual address space
+	  allocated to user tasks.  This can be useful in optimizing the
+	  virtual memory layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config TASK_SIZE
+	hex "Size of user task space" if TASK_SIZE_BOOL
+	depends on MMU
+	default "0x80000000"
+
+config CONSISTENT_START_BOOL
+	bool "Set custom consistent memory pool address"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the base virtual address
+	  of the the consistent memory pool.  This pool of virtual
+	  memory is used to make consistent memory allocations.
+
+config CONSISTENT_START
+	hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
+	depends on MMU
+	default "0xff100000" if NOT_COHERENT_CACHE
+
+config CONSISTENT_SIZE_BOOL
+	bool "Set custom consistent memory pool size"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the size of the the
+	  consistent memory pool.  This pool of virtual memory
+	  is used to make consistent memory allocations.
+
+config CONSISTENT_SIZE
+	hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
+	depends on MMU
+	default "0x00200000" if NOT_COHERENT_CACHE
+
+endmenu
+
 source "mm/Kconfig"
 
 menu "Exectuable file formats"
-- 
cgit v0.10.2


From 4b2368ffd6e3563b73b8391c133822a58b1b432a Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 26 May 2009 16:30:32 +0200
Subject: microblaze_mmu_v2: Makefiles

Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index ab731b7..d0bcf80 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -1,4 +1,8 @@
+ifeq ($(CONFIG_MMU),y)
+UTS_SYSNAME = -DUTS_SYSNAME=\"Linux\"
+else
 UTS_SYSNAME = -DUTS_SYSNAME=\"uClinux\"
+endif
 
 # What CPU vesion are we building for, and crack it open
 # as major.minor.rev
diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile
index 844edf4..c2bb043 100644
--- a/arch/microblaze/boot/Makefile
+++ b/arch/microblaze/boot/Makefile
@@ -7,6 +7,8 @@ targets := linux.bin linux.bin.gz
 OBJCOPYFLAGS_linux.bin  := -O binary
 
 $(obj)/linux.bin: vmlinux FORCE
+	[ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
+	touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
 	$(call if_changed,objcopy)
 	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile
index da94bec..f4a5e19 100644
--- a/arch/microblaze/kernel/Makefile
+++ b/arch/microblaze/kernel/Makefile
@@ -15,5 +15,6 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_SELFMOD)		+= selfmod.o
 obj-$(CONFIG_HEART_BEAT)	+= heartbeat.o
 obj-$(CONFIG_MODULES)		+= microblaze_ksyms.o module.o
+obj-$(CONFIG_MMU)		+= misc.o
 
 obj-y	+= entry$(MMUEXT).o
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index d27126b..71c8cb6 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -10,4 +10,5 @@ else
 lib-y += memcpy.o memmove.o
 endif
 
-lib-y +=  uaccess.o
+lib-$(CONFIG_NO_MMU) += uaccess.o
+lib-$(CONFIG_MMU) += uaccess_old.o
diff --git a/arch/microblaze/mm/Makefile b/arch/microblaze/mm/Makefile
index bf9e447..6c8a924 100644
--- a/arch/microblaze/mm/Makefile
+++ b/arch/microblaze/mm/Makefile
@@ -3,3 +3,5 @@
 #
 
 obj-y := init.o
+
+obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o
-- 
cgit v0.10.2


From f6eb53498ee8f725832f3a0fffca90566bb118a6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 26 May 2009 15:50:25 +0100
Subject: GFS2: Remove args subdir from gfs2 sysfs files

Since we can cat /proc/mounts there is no need to have this
subdirectory in the gfs2 sysfs files. In fact this does not
reflect the full range of possible mount argumenmts, where
as /proc/mounts does.

There was only one userland user of this set of sysfs files
and it will function perfectly well without these files
being present (in fact that subcommand of gfs2_tool is
obsolete anyway).

The tune/* subdirectory is also considered mostly obsolete,
but there are a few uses of this until mount arguments can
be added for the last few functions for which there are no
equivalents currently. However the tune/* directory is still
in my sights and new code should avoid using it. Only the gfs2_quota
and gfs2_tool programs are know to use tune/* at the moment.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 94bd59e..23419dc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -411,44 +411,6 @@ static struct attribute *lock_module_attrs[] = {
 	NULL,
 };
 
-#define ARGS_ATTR(name, fmt)                                                \
-static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                 \
-{                                                                           \
-	return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name);       \
-}                                                                           \
-static struct gfs2_attr args_attr_##name = __ATTR_RO(name)
-
-ARGS_ATTR(lockproto,       "%s\n");
-ARGS_ATTR(locktable,       "%s\n");
-ARGS_ATTR(hostdata,        "%s\n");
-ARGS_ATTR(spectator,       "%d\n");
-ARGS_ATTR(ignore_local_fs, "%d\n");
-ARGS_ATTR(localcaching,    "%d\n");
-ARGS_ATTR(localflocks,     "%d\n");
-ARGS_ATTR(debug,           "%d\n");
-ARGS_ATTR(upgrade,         "%d\n");
-ARGS_ATTR(posix_acl,       "%d\n");
-ARGS_ATTR(quota,           "%u\n");
-ARGS_ATTR(suiddir,         "%d\n");
-ARGS_ATTR(data,            "%d\n");
-
-static struct attribute *args_attrs[] = {
-	&args_attr_lockproto.attr,
-	&args_attr_locktable.attr,
-	&args_attr_hostdata.attr,
-	&args_attr_spectator.attr,
-	&args_attr_ignore_local_fs.attr,
-	&args_attr_localcaching.attr,
-	&args_attr_localflocks.attr,
-	&args_attr_debug.attr,
-	&args_attr_upgrade.attr,
-	&args_attr_posix_acl.attr,
-	&args_attr_quota.attr,
-	&args_attr_suiddir.attr,
-	&args_attr_data.attr,
-	NULL,
-};
-
 /*
  * get and set struct gfs2_tune fields
  */
@@ -545,11 +507,6 @@ static struct attribute *tune_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group args_group = {
-	.name = "args",
-	.attrs = args_attrs,
-};
-
 static struct attribute_group tune_group = {
 	.name = "tune",
 	.attrs = tune_attrs,
@@ -570,13 +527,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 	if (error)
 		goto fail;
 
-	error = sysfs_create_group(&sdp->sd_kobj, &args_group);
-	if (error)
-		goto fail_reg;
-
 	error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
 	if (error)
-		goto fail_args;
+		goto fail_reg;
 
 	error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
 	if (error)
@@ -587,8 +540,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 
 fail_tune:
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
-fail_args:
-	sysfs_remove_group(&sdp->sd_kobj, &args_group);
 fail_reg:
 	kobject_put(&sdp->sd_kobj);
 fail:
@@ -599,7 +550,6 @@ fail:
 void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
 {
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
-	sysfs_remove_group(&sdp->sd_kobj, &args_group);
 	sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
 	kobject_put(&sdp->sd_kobj);
 }
-- 
cgit v0.10.2


From 5582b0648de6248c67c0b47fa170e5fb15ab4bf1 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 27 May 2009 00:12:58 +0900
Subject: sh: pci-sh7780: Fix up for PCI_DISABLE_MWI changes.

This fixes a build error where references to pci_cache_line_size are
undefined, as this ceases to be exported when PCI_DISABLE_MWI is enabled,
as is now the default.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index ae13ff9..323b92d 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -15,8 +15,6 @@
 #include <linux/delay.h>
 #include "pci-sh4.h"
 
-extern u8 pci_cache_line_size;
-
 static struct resource sh7785_io_resource = {
 	.name	= "SH7785_IO",
 	.start	= SH7780_PCI_IO_BASE,
@@ -37,6 +35,7 @@ static struct pci_channel sh7780_pci_controller = {
 	.mem_offset	= 0x00000000,
 	.io_resource	= &sh7785_io_resource,
 	.io_offset	= 0x00000000,
+	.io_map_base	= SH7780_PCI_IO_BASE,
 };
 
 static struct sh4_pci_address_map sh7780_pci_map = {
@@ -99,8 +98,6 @@ static int __init sh7780_pci_init(void)
 	__raw_writeb(PCI_CLASS_BRIDGE_HOST & 0xff,
 		     chan->reg_base + SH7780_PCISUB);
 
-	pci_cache_line_size = pci_read_reg(chan, SH7780_PCICLS) / 4;
-
 	/*
 	 * Set IO and Mem windows to local address
 	 * Make PCI and local address the same for easy 1 to 1 mapping
@@ -142,8 +139,6 @@ static int __init sh7780_pci_init(void)
 	word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO;
 	pci_write_reg(chan, word, SH4_PCICR);
 
-	__set_io_port_base(SH7780_PCI_IO_BASE);
-
 	register_pci_controller(chan);
 
 	return 0;
-- 
cgit v0.10.2


From 79c3c0b729647a6246c120408f36e6804dab244e Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:38:58 -0700
Subject: davinci: Encapsulate SoC-specific data in a structure

Create a structure to encapsulate SoC-specific information.
This will assist in generalizing code so it can be used by
different SoCs that have similar hardware but with minor
differences such as having a different base address.

The idea is that the code for each SoC fills out a structure
with the correct information.  The board-specific code then
calls the SoC init routine which in turn will call a common
init routine that makes a copy of the structure, maps in I/O
regions, etc.

After initialization, code can get a pointer to the structure
by calling davinci_get_soc_info().  Eventually, the common
init routine will make a copy of all of the data pointed to
by the structure so the original data can be made __init_data.
That way the data for SoC's that aren't being used won't consume
memory for the entire life of the kernel.

The structure will be extended in subsequent patches but
initially, it holds the map_desc structure for any I/O
regions the SoC/board wants statically mapped.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 2873149..74d25f6 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -5,7 +5,7 @@
 
 # Common objects
 obj-y 			:= time.o irq.o clock.o serial.o io.o id.o psc.o \
-			   gpio.o devices.o dma.o usb.o
+			   gpio.o devices.o dma.o usb.o common.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
 obj-$(CONFIG_CP_INTC)			+= cp_intc.o
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 0874413..5ac2f56 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -37,6 +37,7 @@
 #include <mach/serial.h>
 #include <mach/nand.h>
 #include <mach/mmc.h>
+#include <mach/common.h>
 
 #define DAVINCI_ASYNC_EMIF_CONTROL_BASE		0x01e10000
 #define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
@@ -188,7 +189,6 @@ static struct davinci_uart_config uart_config __initdata = {
 
 static void __init dm355_evm_map_io(void)
 {
-	davinci_map_common_io();
 	dm355_init();
 }
 
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index d4562f5..28c9008 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -36,6 +36,7 @@
 #include <mach/serial.h>
 #include <mach/nand.h>
 #include <mach/mmc.h>
+#include <mach/common.h>
 
 #define DAVINCI_ASYNC_EMIF_CONTROL_BASE		0x01e10000
 #define DAVINCI_ASYNC_EMIF_DATA_CE0_BASE	0x02000000
@@ -187,7 +188,6 @@ static struct davinci_uart_config uart_config __initdata = {
 
 static void __init dm355_leopard_map_io(void)
 {
-	davinci_map_common_io();
 	dm355_init();
 }
 
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 02e7cda..cfe89c4 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -45,6 +45,7 @@
 #include <mach/psc.h>
 #include <mach/nand.h>
 #include <mach/mmc.h>
+#include <mach/common.h>
 
 #define DM644X_EVM_PHY_MASK		(0x2)
 #define DM644X_EVM_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
@@ -609,7 +610,6 @@ static struct davinci_uart_config uart_config __initdata = {
 static void __init
 davinci_evm_map_io(void)
 {
-	davinci_map_common_io();
 	dm644x_init();
 }
 
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index aedde3c..becae51 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -47,6 +47,7 @@
 #include <mach/i2c.h>
 #include <mach/mmc.h>
 #include <mach/emac.h>
+#include <mach/common.h>
 
 #define DM646X_EVM_PHY_MASK		(0x2)
 #define DM646X_EVM_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
@@ -265,7 +266,6 @@ static void __init evm_init_i2c(void)
 
 static void __init davinci_map_io(void)
 {
-	davinci_map_common_io();
 	dm646x_init();
 }
 
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index 913dad9..938b446 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -53,6 +53,7 @@
 #include <mach/serial.h>
 #include <mach/psc.h>
 #include <mach/mux.h>
+#include <mach/common.h>
 
 #define SFFSDR_PHY_MASK		(0x2)
 #define SFFSDR_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
@@ -152,7 +153,6 @@ static struct davinci_uart_config uart_config __initdata = {
 
 static void __init davinci_sffsdr_map_io(void)
 {
-	davinci_map_common_io();
 	dm644x_init();
 }
 
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
new file mode 100644
index 0000000..9a5486f
--- /dev/null
+++ b/arch/arm/mach-davinci/common.c
@@ -0,0 +1,55 @@
+/*
+ * Code commons to all DaVinci SoCs.
+ *
+ * Author: Mark A. Greer <mgreer@mvista.com>
+ *
+ * 2009 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/module.h>
+#include <linux/io.h>
+
+#include <asm/tlb.h>
+#include <asm/mach/map.h>
+
+#include <mach/common.h>
+
+struct davinci_soc_info davinci_soc_info;
+EXPORT_SYMBOL(davinci_soc_info);
+
+void __init davinci_common_init(struct davinci_soc_info *soc_info)
+{
+	int ret;
+
+	if (!soc_info) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	memcpy(&davinci_soc_info, soc_info, sizeof(struct davinci_soc_info));
+
+	if (davinci_soc_info.io_desc && (davinci_soc_info.io_desc_num > 0))
+		iotable_init(davinci_soc_info.io_desc,
+				davinci_soc_info.io_desc_num);
+
+	/*
+	 * Normally devicemaps_init() would flush caches and tlb after
+	 * mdesc->map_io(), but we must also do it here because of the CPU
+	 * revision check below.
+	 */
+	local_flush_tlb_all();
+	flush_cache_all();
+
+	/*
+	 * We want to check CPU revision early for cpu_is_xxxx() macros.
+	 * IO space mapping must be initialized before we can do that.
+	 */
+	davinci_check_revision();
+
+	return;
+
+err:
+	pr_err("davinci_common_init: SoC Initialization failed\n");
+}
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index c02115f..6d1abfd 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -16,6 +16,8 @@
 
 #include <linux/spi/spi.h>
 
+#include <asm/mach/map.h>
+
 #include <mach/dm355.h>
 #include <mach/clock.h>
 #include <mach/cputype.h>
@@ -23,6 +25,7 @@
 #include <mach/psc.h>
 #include <mach/mux.h>
 #include <mach/irqs.h>
+#include <mach/common.h>
 
 #include "clock.h"
 #include "mux.h"
@@ -522,8 +525,23 @@ static struct platform_device dm355_edma_device = {
 
 /*----------------------------------------------------------------------*/
 
+static struct map_desc dm355_io_desc[] = {
+	{
+		.virtual	= IO_VIRT,
+		.pfn		= __phys_to_pfn(IO_PHYS),
+		.length		= IO_SIZE,
+		.type		= MT_DEVICE
+	},
+};
+
+static struct davinci_soc_info davinci_soc_info_dm355 = {
+	.io_desc		= dm355_io_desc,
+	.io_desc_num		= ARRAY_SIZE(dm355_io_desc),
+};
+
 void __init dm355_init(void)
 {
+	davinci_common_init(&davinci_soc_info_dm355);
 	davinci_clk_init(dm355_clks);
 	davinci_mux_register(dm355_pins, ARRAY_SIZE(dm355_pins));;
 }
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 0419d57..79f1136 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -13,6 +13,8 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 
+#include <asm/mach/map.h>
+
 #include <mach/dm644x.h>
 #include <mach/clock.h>
 #include <mach/cputype.h>
@@ -20,6 +22,7 @@
 #include <mach/irqs.h>
 #include <mach/psc.h>
 #include <mach/mux.h>
+#include <mach/common.h>
 
 #include "clock.h"
 #include "mux.h"
@@ -463,8 +466,23 @@ void dm644x_init_emac(struct emac_platform_data *unused) {}
 
 #endif
 
+static struct map_desc dm644x_io_desc[] = {
+	{
+		.virtual	= IO_VIRT,
+		.pfn		= __phys_to_pfn(IO_PHYS),
+		.length		= IO_SIZE,
+		.type		= MT_DEVICE
+	},
+};
+
+static struct davinci_soc_info davinci_soc_info_dm644x = {
+	.io_desc		= dm644x_io_desc,
+	.io_desc_num		= ARRAY_SIZE(dm644x_io_desc),
+};
+
 void __init dm644x_init(void)
 {
+	davinci_common_init(&davinci_soc_info_dm644x);
 	davinci_clk_init(dm644x_clks);
 	davinci_mux_register(dm644x_pins, ARRAY_SIZE(dm644x_pins));
 }
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 975ed06..8547ec0 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -13,6 +13,8 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 
+#include <asm/mach/map.h>
+
 #include <mach/dm646x.h>
 #include <mach/clock.h>
 #include <mach/cputype.h>
@@ -20,6 +22,7 @@
 #include <mach/irqs.h>
 #include <mach/psc.h>
 #include <mach/mux.h>
+#include <mach/common.h>
 
 #include "clock.h"
 #include "mux.h"
@@ -442,8 +445,23 @@ void dm646x_init_emac(struct emac_platform_data *unused) {}
 
 #endif
 
+static struct map_desc dm646x_io_desc[] = {
+	{
+		.virtual	= IO_VIRT,
+		.pfn		= __phys_to_pfn(IO_PHYS),
+		.length		= IO_SIZE,
+		.type		= MT_DEVICE
+	},
+};
+
+static struct davinci_soc_info davinci_soc_info_dm646x = {
+	.io_desc		= dm646x_io_desc,
+	.io_desc_num		= ARRAY_SIZE(dm646x_io_desc),
+};
+
 void __init dm646x_init(void)
 {
+	davinci_common_init(&davinci_soc_info_dm646x);
 	davinci_clk_init(dm646x_clks);
 	davinci_mux_register(dm646x_pins, ARRAY_SIZE(dm646x_pins));
 }
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 1917709..770a1ba 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -17,7 +17,6 @@ struct sys_timer;
 extern struct sys_timer davinci_timer;
 
 extern void davinci_irq_init(void);
-extern void davinci_map_common_io(void);
 
 /* parameters describe VBUS sourcing for host mode */
 extern void setup_usb(unsigned mA, unsigned potpgt_msec);
@@ -25,4 +24,15 @@ extern void setup_usb(unsigned mA, unsigned potpgt_msec);
 /* parameters describe VBUS sourcing for host mode */
 extern void setup_usb(unsigned mA, unsigned potpgt_msec);
 
+/* SoC specific init support */
+struct davinci_soc_info {
+	struct map_desc			*io_desc;
+	unsigned long			io_desc_num;
+};
+
+extern struct davinci_soc_info davinci_soc_info;
+
+extern void davinci_common_init(struct davinci_soc_info *soc_info);
+extern void davinci_check_revision(void);
+
 #endif /* __ARCH_ARM_MACH_DAVINCI_COMMON_H */
diff --git a/arch/arm/mach-davinci/include/mach/dm355.h b/arch/arm/mach-davinci/include/mach/dm355.h
index f7100b6..54903b7 100644
--- a/arch/arm/mach-davinci/include/mach/dm355.h
+++ b/arch/arm/mach-davinci/include/mach/dm355.h
@@ -13,10 +13,9 @@
 
 #include <mach/hardware.h>
 
-void __init dm355_init(void);
-
 struct spi_board_info;
 
+void __init dm355_init(void);
 void dm355_init_spi0(unsigned chipselect_mask,
 		struct spi_board_info *info, unsigned len);
 
diff --git a/arch/arm/mach-davinci/io.c b/arch/arm/mach-davinci/io.c
index a548abb..49912b4 100644
--- a/arch/arm/mach-davinci/io.c
+++ b/arch/arm/mach-davinci/io.c
@@ -9,47 +9,9 @@
  */
 
 #include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/io.h>
 
 #include <asm/tlb.h>
-#include <asm/memory.h>
-
-#include <asm/mach/map.h>
-#include <mach/clock.h>
-
-extern void davinci_check_revision(void);
-
-/*
- * The machine specific code may provide the extra mapping besides the
- * default mapping provided here.
- */
-static struct map_desc davinci_io_desc[] __initdata = {
-	{
-		.virtual	= IO_VIRT,
-		.pfn		= __phys_to_pfn(IO_PHYS),
-		.length		= IO_SIZE,
-		.type		= MT_DEVICE
-	},
-};
-
-void __init davinci_map_common_io(void)
-{
-	iotable_init(davinci_io_desc, ARRAY_SIZE(davinci_io_desc));
-
-	/* Normally devicemaps_init() would flush caches and tlb after
-	 * mdesc->map_io(), but we must also do it here because of the CPU
-	 * revision check below.
-	 */
-	local_flush_tlb_all();
-	flush_cache_all();
-
-	/* We want to check CPU revision early for cpu_is_xxxx() macros.
-	 * IO space mapping must be initialized before we can do that.
-	 */
-	davinci_check_revision();
-}
 
 #define BETWEEN(p, st, sz)	((p) >= (st) && (p) < ((st) + (sz)))
 #define XLATE(p, pst, vst)	((void __iomem *)((p) - (pst) + (vst)))
-- 
cgit v0.10.2


From b9ab12797e74d93a3656ea0bf5591f8b3e094fd5 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:39:09 -0700
Subject: davinci: Support JTAG ID register at any address

The Davinci cpu_is_davinci_*() macros use the SoC part number
and variant retrieved from the JTAG ID register to determine the
type of cpu that the kernel is running on.  Currently, the code to
read the JTAG ID register assumes that the register is always at
the same base address.  This isn't true on some newer SoCs.

To solve this, have the SoC-specific code set the JTAG ID register
base address in soc_info structure and add a 'cpu_id' member to it.
'cpu_id' will be used by the cpu_is_davinci_*() macros to match
the cpu id.  Also move the info used to identify the cpu type into
the SoC-specific code to keep all SoC-specific code together.

The common code will read the JTAG ID register, search through
an array of davinci_id structures to identify the cpu type.
Once identified, it will set the 'cpu_id' member of the soc_info
structure to the proper value and the cpu_is_davinci_*() macros
will now work.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 74d25f6..a65d03b 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -4,7 +4,7 @@
 #
 
 # Common objects
-obj-y 			:= time.o irq.o clock.o serial.o io.o id.o psc.o \
+obj-y 			:= time.o irq.o clock.o serial.o io.o psc.o \
 			   gpio.o devices.o dma.o usb.o common.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index 9a5486f..29b3a8d 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -15,13 +15,31 @@
 #include <asm/mach/map.h>
 
 #include <mach/common.h>
+#include <mach/cputype.h>
 
 struct davinci_soc_info davinci_soc_info;
 EXPORT_SYMBOL(davinci_soc_info);
 
+static struct davinci_id * __init davinci_get_id(u32 jtag_id)
+{
+	int i;
+	struct davinci_id *dip;
+	u8 variant = (jtag_id & 0xf0000000) >> 28;
+	u16 part_no = (jtag_id & 0x0ffff000) >> 12;
+
+	for (i = 0, dip = davinci_soc_info.ids; i < davinci_soc_info.ids_num;
+			i++, dip++)
+		/* Don't care about the manufacturer right now */
+		if ((dip->part_no == part_no) && (dip->variant == variant))
+			return dip;
+
+	return NULL;
+}
+
 void __init davinci_common_init(struct davinci_soc_info *soc_info)
 {
 	int ret;
+	struct davinci_id *dip;
 
 	if (!soc_info) {
 		ret = -EINVAL;
@@ -46,7 +64,16 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info)
 	 * We want to check CPU revision early for cpu_is_xxxx() macros.
 	 * IO space mapping must be initialized before we can do that.
 	 */
-	davinci_check_revision();
+	davinci_soc_info.jtag_id = __raw_readl(davinci_soc_info.jtag_id_base);
+
+	dip = davinci_get_id(davinci_soc_info.jtag_id);
+	if (!dip) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	davinci_soc_info.cpu_id = dip->cpu_id;
+	pr_info("DaVinci %s variant 0x%x\n", dip->name, dip->variant);
 
 	return;
 
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 6d1abfd..0f724c0 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -534,9 +534,23 @@ static struct map_desc dm355_io_desc[] = {
 	},
 };
 
+/* Contents of JTAG ID register used to identify exact cpu type */
+static struct davinci_id dm355_ids[] = {
+	{
+		.variant	= 0x0,
+		.part_no	= 0xb73b,
+		.manufacturer	= 0x00f,
+		.cpu_id		= DAVINCI_CPU_ID_DM355,
+		.name		= "dm355",
+	},
+};
+
 static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.io_desc		= dm355_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm355_io_desc),
+	.jtag_id_base		= IO_ADDRESS(0x01c40028),
+	.ids			= dm355_ids,
+	.ids_num		= ARRAY_SIZE(dm355_ids),
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 79f1136..9bcd98c 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -388,7 +388,6 @@ MUX_CFG(DM644X, LOEEN,		0,   24,    1,	  1,	 true)
 MUX_CFG(DM644X, LFLDEN,		0,   25,    1,	  1,	 false)
 };
 
-
 /*----------------------------------------------------------------------*/
 
 static const s8 dma_chan_dm644x_no_event[] = {
@@ -475,9 +474,23 @@ static struct map_desc dm644x_io_desc[] = {
 	},
 };
 
+/* Contents of JTAG ID register used to identify exact cpu type */
+static struct davinci_id dm644x_ids[] = {
+	{
+		.variant	= 0x0,
+		.part_no	= 0xb700,
+		.manufacturer	= 0x017,
+		.cpu_id		= DAVINCI_CPU_ID_DM6446,
+		.name		= "dm6446",
+	},
+};
+
 static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.io_desc		= dm644x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm644x_io_desc),
+	.jtag_id_base		= IO_ADDRESS(0x01c40028),
+	.ids			= dm644x_ids,
+	.ids_num		= ARRAY_SIZE(dm644x_ids),
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 8547ec0..cd86bb7 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -454,9 +454,23 @@ static struct map_desc dm646x_io_desc[] = {
 	},
 };
 
+/* Contents of JTAG ID register used to identify exact cpu type */
+static struct davinci_id dm646x_ids[] = {
+	{
+		.variant	= 0x0,
+		.part_no	= 0xb770,
+		.manufacturer	= 0x017,
+		.cpu_id		= DAVINCI_CPU_ID_DM6467,
+		.name		= "dm6467",
+	},
+};
+
 static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.io_desc		= dm646x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm646x_io_desc),
+	.jtag_id_base		= IO_ADDRESS(0x01c40028),
+	.ids			= dm646x_ids,
+	.ids_num		= ARRAY_SIZE(dm646x_ids),
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/id.c b/arch/arm/mach-davinci/id.c
deleted file mode 100644
index 018b994..0000000
--- a/arch/arm/mach-davinci/id.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Davinci CPU identification code
- *
- * Copyright (C) 2006 Komal Shah <komal_shah802003@yahoo.com>
- *
- * Derived from OMAP1 CPU identification code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-#define JTAG_ID_BASE		IO_ADDRESS(0x01c40028)
-
-static unsigned int davinci_revision;
-
-struct davinci_id {
-	u8	variant;	/* JTAG ID bits 31:28 */
-	u16	part_no;	/* JTAG ID bits 27:12 */
-	u32	manufacturer;	/* JTAG ID bits 11:1 */
-	u32	type;		/* Cpu id bits [31:8], cpu class bits [7:0] */
-};
-
-/* Register values to detect the DaVinci version */
-static struct davinci_id davinci_ids[] __initdata = {
-	{
-		/* DM6446 */
-		.part_no      = 0xb700,
-		.variant      = 0x0,
-		.manufacturer = 0x017,
-		.type	      = 0x64460000,
-	},
-	{
-		/* DM646X */
-		.part_no      = 0xb770,
-		.variant      = 0x0,
-		.manufacturer = 0x017,
-		.type         = 0x64670000,
-	},
-	{
-		/* DM355 */
-		.part_no	= 0xb73b,
-		.variant	= 0x0,
-		.manufacturer	= 0x00f,
-		.type		= 0x03550000,
-	},
-};
-
-/*
- * Get Device Part No. from JTAG ID register
- */
-static u16 __init davinci_get_part_no(void)
-{
-	u32 dev_id, part_no;
-
-	dev_id = __raw_readl(JTAG_ID_BASE);
-
-	part_no = ((dev_id >> 12) & 0xffff);
-
-	return part_no;
-}
-
-/*
- * Get Device Revision from JTAG ID register
- */
-static u8 __init davinci_get_variant(void)
-{
-	u32 variant;
-
-	variant = __raw_readl(JTAG_ID_BASE);
-
-	variant = (variant >> 28) & 0xf;
-
-	return variant;
-}
-
-unsigned int davinci_rev(void)
-{
-	return davinci_revision >> 16;
-}
-EXPORT_SYMBOL(davinci_rev);
-
-void __init davinci_check_revision(void)
-{
-	int i;
-	u16 part_no;
-	u8 variant;
-
-	part_no = davinci_get_part_no();
-	variant = davinci_get_variant();
-
-	/* First check only the major version in a safe way */
-	for (i = 0; i < ARRAY_SIZE(davinci_ids); i++) {
-		if (part_no == (davinci_ids[i].part_no)) {
-			davinci_revision = davinci_ids[i].type;
-			break;
-		}
-	}
-
-	/* Check if we can find the dev revision */
-	for (i = 0; i < ARRAY_SIZE(davinci_ids); i++) {
-		if (part_no == davinci_ids[i].part_no &&
-		    variant == davinci_ids[i].variant) {
-			davinci_revision = davinci_ids[i].type;
-			break;
-		}
-	}
-
-	printk(KERN_INFO "DaVinci DM%04x variant 0x%x\n",
-	       davinci_rev(), variant);
-}
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 770a1ba..ece1cd4 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -28,11 +28,15 @@ extern void setup_usb(unsigned mA, unsigned potpgt_msec);
 struct davinci_soc_info {
 	struct map_desc			*io_desc;
 	unsigned long			io_desc_num;
+	u32				cpu_id;
+	u32				jtag_id;
+	void __iomem			*jtag_id_base;
+	struct davinci_id		*ids;
+	unsigned long			ids_num;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
 
 extern void davinci_common_init(struct davinci_soc_info *soc_info);
-extern void davinci_check_revision(void);
 
 #endif /* __ARCH_ARM_MACH_DAVINCI_COMMON_H */
diff --git a/arch/arm/mach-davinci/include/mach/cputype.h b/arch/arm/mach-davinci/include/mach/cputype.h
index 27cfb1b..d12a5ed 100644
--- a/arch/arm/mach-davinci/include/mach/cputype.h
+++ b/arch/arm/mach-davinci/include/mach/cputype.h
@@ -16,17 +16,30 @@
 #ifndef _ASM_ARCH_CPU_H
 #define _ASM_ARCH_CPU_H
 
-extern unsigned int davinci_rev(void);
+#include <mach/common.h>
 
-#define IS_DAVINCI_CPU(type, id)			\
-static inline int is_davinci_dm ##type(void)	        \
-{							\
-	return (davinci_rev() == (id)) ? 1 : 0;	        \
+struct davinci_id {
+	u8	variant;	/* JTAG ID bits 31:28 */
+	u16	part_no;	/* JTAG ID bits 27:12 */
+	u16	manufacturer;	/* JTAG ID bits 11:1 */
+	u32	cpu_id;
+	char	*name;
+};
+
+/* Can use lower 16 bits of cpu id  for a variant when required */
+#define	DAVINCI_CPU_ID_DM6446		0x64460000
+#define	DAVINCI_CPU_ID_DM6467		0x64670000
+#define	DAVINCI_CPU_ID_DM355		0x03550000
+
+#define IS_DAVINCI_CPU(type, id)					\
+static inline int is_davinci_ ##type(void)				\
+{									\
+	return (davinci_soc_info.cpu_id == (id));			\
 }
 
-IS_DAVINCI_CPU(644x, 0x6446)
-IS_DAVINCI_CPU(646x, 0x6467)
-IS_DAVINCI_CPU(355, 0x355)
+IS_DAVINCI_CPU(dm644x, DAVINCI_CPU_ID_DM6446)
+IS_DAVINCI_CPU(dm646x, DAVINCI_CPU_ID_DM6467)
+IS_DAVINCI_CPU(dm355, DAVINCI_CPU_ID_DM355)
 
 #ifdef CONFIG_ARCH_DAVINCI_DM644x
 #define cpu_is_davinci_dm644x() is_davinci_dm644x()
-- 
cgit v0.10.2


From 66e0c3991c5a1735dd8add77ab8aff5005f57681 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:39:23 -0700
Subject: davinci: Add clock init call to common init routine

All of the davinci SoCs need to call davinci_clk_init() so
put the call in the common init routine.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index 29b3a8d..2e5b888 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -17,6 +17,8 @@
 #include <mach/common.h>
 #include <mach/cputype.h>
 
+#include "clock.h"
+
 struct davinci_soc_info davinci_soc_info;
 EXPORT_SYMBOL(davinci_soc_info);
 
@@ -75,6 +77,13 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info)
 	davinci_soc_info.cpu_id = dip->cpu_id;
 	pr_info("DaVinci %s variant 0x%x\n", dip->name, dip->variant);
 
+	if (davinci_soc_info.cpu_clks) {
+		ret = davinci_clk_init(davinci_soc_info.cpu_clks);
+
+		if (ret != 0)
+			goto err;
+	}
+
 	return;
 
 err:
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 0f724c0..e93840a 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -551,12 +551,12 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.jtag_id_base		= IO_ADDRESS(0x01c40028),
 	.ids			= dm355_ids,
 	.ids_num		= ARRAY_SIZE(dm355_ids),
+	.cpu_clks		= dm355_clks,
 };
 
 void __init dm355_init(void)
 {
 	davinci_common_init(&davinci_soc_info_dm355);
-	davinci_clk_init(dm355_clks);
 	davinci_mux_register(dm355_pins, ARRAY_SIZE(dm355_pins));;
 }
 
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 9bcd98c..648160c 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -491,12 +491,12 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.jtag_id_base		= IO_ADDRESS(0x01c40028),
 	.ids			= dm644x_ids,
 	.ids_num		= ARRAY_SIZE(dm644x_ids),
+	.cpu_clks		= dm644x_clks,
 };
 
 void __init dm644x_init(void)
 {
 	davinci_common_init(&davinci_soc_info_dm644x);
-	davinci_clk_init(dm644x_clks);
 	davinci_mux_register(dm644x_pins, ARRAY_SIZE(dm644x_pins));
 }
 
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index cd86bb7..7cf9705 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -471,12 +471,12 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.jtag_id_base		= IO_ADDRESS(0x01c40028),
 	.ids			= dm646x_ids,
 	.ids_num		= ARRAY_SIZE(dm646x_ids),
+	.cpu_clks		= dm646x_clks,
 };
 
 void __init dm646x_init(void)
 {
 	davinci_common_init(&davinci_soc_info_dm646x);
-	davinci_clk_init(dm646x_clks);
 	davinci_mux_register(dm646x_pins, ARRAY_SIZE(dm646x_pins));
 }
 
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index ece1cd4..97782a7 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -33,6 +33,7 @@ struct davinci_soc_info {
 	void __iomem			*jtag_id_base;
 	struct davinci_id		*ids;
 	unsigned long			ids_num;
+	struct davinci_clk		*cpu_clks;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
-- 
cgit v0.10.2


From d81d188cafecbc9e01df51527ac4c84a5b19e033 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:39:33 -0700
Subject: davinci: Add support for multiple PSCs

The current code to support the DaVinci Power and Sleep Controller (PSC)
assumes that there is only one controller.  This assumption is no longer
valid so expand the support to allow greater than one PSC.

To accomplish this, put the base addresses for the PSCs in the SoC
infrastructure so it can be referenced by the PSC code.  This also
requires adding an extra parameter to davinci_psc_config() to specify
the PSC that is to be enabled/disabled.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c
index f0baaa1..39bf321 100644
--- a/arch/arm/mach-davinci/clock.c
+++ b/arch/arm/mach-davinci/clock.c
@@ -42,7 +42,8 @@ static void __clk_enable(struct clk *clk)
 	if (clk->parent)
 		__clk_enable(clk->parent);
 	if (clk->usecount++ == 0 && (clk->flags & CLK_PSC))
-		davinci_psc_config(psc_domain(clk), clk->lpsc, 1);
+		davinci_psc_config(psc_domain(clk), clk->psc_ctlr,
+				clk->lpsc, 1);
 }
 
 static void __clk_disable(struct clk *clk)
@@ -50,7 +51,8 @@ static void __clk_disable(struct clk *clk)
 	if (WARN_ON(clk->usecount == 0))
 		return;
 	if (--clk->usecount == 0 && !(clk->flags & CLK_PLL))
-		davinci_psc_config(psc_domain(clk), clk->lpsc, 0);
+		davinci_psc_config(psc_domain(clk), clk->psc_ctlr,
+				clk->lpsc, 0);
 	if (clk->parent)
 		__clk_disable(clk->parent);
 }
@@ -164,11 +166,11 @@ static int __init clk_disable_unused(void)
 			continue;
 
 		/* ignore if in Disabled or SwRstDisable states */
-		if (!davinci_psc_is_clk_active(ck->lpsc))
+		if (!davinci_psc_is_clk_active(ck->psc_ctlr, ck->lpsc))
 			continue;
 
 		pr_info("Clocks: disable unused %s\n", ck->name);
-		davinci_psc_config(psc_domain(ck), ck->lpsc, 0);
+		davinci_psc_config(psc_domain(ck), ck->psc_ctlr, ck->lpsc, 0);
 	}
 	spin_unlock_irq(&clockfw_lock);
 
diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h
index 584494a..27233cb 100644
--- a/arch/arm/mach-davinci/clock.h
+++ b/arch/arm/mach-davinci/clock.h
@@ -67,6 +67,7 @@ struct clk {
 	u8			usecount;
 	u8			flags;
 	u8			lpsc;
+	u8			psc_ctlr;
 	struct clk              *parent;
 	struct pll_data         *pll_data;
 	u32                     div_reg;
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index e93840a..37f20a7 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -545,6 +545,10 @@ static struct davinci_id dm355_ids[] = {
 	},
 };
 
+static void __iomem *dm355_psc_bases[] = {
+	IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE),
+};
+
 static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.io_desc		= dm355_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm355_io_desc),
@@ -552,6 +556,8 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.ids			= dm355_ids,
 	.ids_num		= ARRAY_SIZE(dm355_ids),
 	.cpu_clks		= dm355_clks,
+	.psc_bases		= dm355_psc_bases,
+	.psc_bases_num		= ARRAY_SIZE(dm355_psc_bases),
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 648160c..7b15fab 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -485,6 +485,10 @@ static struct davinci_id dm644x_ids[] = {
 	},
 };
 
+static void __iomem *dm644x_psc_bases[] = {
+	IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE),
+};
+
 static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.io_desc		= dm644x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm644x_io_desc),
@@ -492,6 +496,8 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.ids			= dm644x_ids,
 	.ids_num		= ARRAY_SIZE(dm644x_ids),
 	.cpu_clks		= dm644x_clks,
+	.psc_bases		= dm644x_psc_bases,
+	.psc_bases_num		= ARRAY_SIZE(dm644x_psc_bases),
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 7cf9705..3c61543 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -465,6 +465,10 @@ static struct davinci_id dm646x_ids[] = {
 	},
 };
 
+static void __iomem *dm646x_psc_bases[] = {
+	IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE),
+};
+
 static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.io_desc		= dm646x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm646x_io_desc),
@@ -472,6 +476,8 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.ids			= dm646x_ids,
 	.ids_num		= ARRAY_SIZE(dm646x_ids),
 	.cpu_clks		= dm646x_clks,
+	.psc_bases		= dm646x_psc_bases,
+	.psc_bases_num		= ARRAY_SIZE(dm646x_psc_bases),
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 97782a7..7851d56 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -34,6 +34,8 @@ struct davinci_soc_info {
 	struct davinci_id		*ids;
 	unsigned long			ids_num;
 	struct davinci_clk		*cpu_clks;
+	void __iomem			**psc_bases;
+	unsigned long			psc_bases_num;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/psc.h b/arch/arm/mach-davinci/include/mach/psc.h
index 55a90d4..ab8a258 100644
--- a/arch/arm/mach-davinci/include/mach/psc.h
+++ b/arch/arm/mach-davinci/include/mach/psc.h
@@ -27,6 +27,8 @@
 #ifndef __ASM_ARCH_PSC_H
 #define __ASM_ARCH_PSC_H
 
+#define	DAVINCI_PWR_SLEEP_CNTRL_BASE	0x01C41000
+
 /* Power and Sleep Controller (PSC) Domains */
 #define DAVINCI_GPSC_ARMDOMAIN      0
 #define DAVINCI_GPSC_DSPDOMAIN      1
@@ -116,8 +118,8 @@
 #define DM646X_LPSC_TIMER1         35
 #define DM646X_LPSC_ARM_INTC       45
 
-extern int davinci_psc_is_clk_active(unsigned int id);
-extern void davinci_psc_config(unsigned int domain, unsigned int id,
-			       char enable);
+extern int davinci_psc_is_clk_active(unsigned int ctlr, unsigned int id);
+extern void davinci_psc_config(unsigned int domain, unsigned int ctlr,
+		unsigned int id, char enable);
 
 #endif /* __ASM_ARCH_PSC_H */
diff --git a/arch/arm/mach-davinci/psc.c b/arch/arm/mach-davinci/psc.c
index 84171ab..a78b657 100644
--- a/arch/arm/mach-davinci/psc.c
+++ b/arch/arm/mach-davinci/psc.c
@@ -28,8 +28,6 @@
 #include <mach/psc.h>
 #include <mach/mux.h>
 
-#define DAVINCI_PWR_SLEEP_CNTRL_BASE 0x01C41000
-
 /* PSC register offsets */
 #define EPCPR		0x070
 #define PTCMD		0x120
@@ -42,22 +40,42 @@
 #define MDSTAT_STATE_MASK 0x1f
 
 /* Return nonzero iff the domain's clock is active */
-int __init davinci_psc_is_clk_active(unsigned int id)
+int __init davinci_psc_is_clk_active(unsigned int ctlr, unsigned int id)
 {
-	void __iomem *psc_base = IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE);
-	u32 mdstat = __raw_readl(psc_base + MDSTAT + 4 * id);
+	void __iomem *psc_base;
+	u32 mdstat;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+
+	if (!soc_info->psc_bases || (ctlr >= soc_info->psc_bases_num)) {
+		pr_warning("PSC: Bad psc data: 0x%x[%d]\n",
+				(int)soc_info->psc_bases, ctlr);
+		return 0;
+	}
+
+	psc_base = soc_info->psc_bases[ctlr];
+	mdstat = __raw_readl(psc_base + MDSTAT + 4 * id);
 
 	/* if clocked, state can be "Enable" or "SyncReset" */
 	return mdstat & BIT(12);
 }
 
 /* Enable or disable a PSC domain */
-void davinci_psc_config(unsigned int domain, unsigned int id, char enable)
+void davinci_psc_config(unsigned int domain, unsigned int ctlr,
+		unsigned int id, char enable)
 {
 	u32 epcpr, ptcmd, ptstat, pdstat, pdctl1, mdstat, mdctl;
-	void __iomem *psc_base = IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE);
+	void __iomem *psc_base;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 	u32 next_state = enable ? 0x3 : 0x2; /* 0x3 enables, 0x2 disables */
 
+	if (!soc_info->psc_bases || (ctlr >= soc_info->psc_bases_num)) {
+		pr_warning("PSC: Bad psc data: 0x%x[%d]\n",
+				(int)soc_info->psc_bases, ctlr);
+		return;
+	}
+
+	psc_base = soc_info->psc_bases[ctlr];
+
 	mdctl = __raw_readl(psc_base + MDCTL + 4 * id);
 	mdctl &= ~MDSTAT_STATE_MASK;
 	mdctl |= next_state;
-- 
cgit v0.10.2


From 0e585952ac6a06b3c77d6b8eadb9c359766a700d Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:39:48 -0700
Subject: davinci: Move pinmux setup info to SoC infrastructure

The pinmux register base and setup can be different for different
SoCs so move the pinmux reg base, pinmux table (and its size) to
the SoC infrastructure.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 37f20a7..f735ed9 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -436,6 +436,7 @@ void __init dm355_init_spi0(unsigned chipselect_mask,
  *				reg  offset mask  mode
  */
 static const struct mux_config dm355_pins[] = {
+#ifdef CONFIG_DAVINCI_MUX
 MUX_CFG(DM355,	MMCSD0,		4,   2,     1,	  0,	 false)
 
 MUX_CFG(DM355,	SD1_CLK,	3,   6,     1,	  1,	 false)
@@ -466,6 +467,7 @@ INT_CFG(DM355,  INT_EDMA_TC1_ERR,     4,    1,    1,     false)
 EVT_CFG(DM355,  EVT8_ASP1_TX,	      0,    1,    0,     false)
 EVT_CFG(DM355,  EVT9_ASP1_RX,	      1,    1,    0,     false)
 EVT_CFG(DM355,  EVT26_MMC0_RX,	      2,    1,    0,     false)
+#endif
 };
 
 /*----------------------------------------------------------------------*/
@@ -558,12 +560,14 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.cpu_clks		= dm355_clks,
 	.psc_bases		= dm355_psc_bases,
 	.psc_bases_num		= ARRAY_SIZE(dm355_psc_bases),
+	.pinmux_base		= IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE),
+	.pinmux_pins		= dm355_pins,
+	.pinmux_pins_num	= ARRAY_SIZE(dm355_pins),
 };
 
 void __init dm355_init(void)
 {
 	davinci_common_init(&davinci_soc_info_dm355);
-	davinci_mux_register(dm355_pins, ARRAY_SIZE(dm355_pins));;
 }
 
 static int __init dm355_init_devices(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 7b15fab..b7c17dd 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -346,6 +346,7 @@ static struct platform_device dm644x_emac_device = {
  *				reg  offset mask  mode
  */
 static const struct mux_config dm644x_pins[] = {
+#ifdef CONFIG_DAVINCI_MUX
 MUX_CFG(DM644X, HDIREN,		0,   16,    1,	  1,	 true)
 MUX_CFG(DM644X, ATAEN,		0,   17,    1,	  1,	 true)
 MUX_CFG(DM644X, ATAEN_DISABLE,	0,   17,    1,	  0,	 true)
@@ -386,6 +387,7 @@ MUX_CFG(DM644X, RGB666,		0,   22,    1,	  1,	 true)
 
 MUX_CFG(DM644X, LOEEN,		0,   24,    1,	  1,	 true)
 MUX_CFG(DM644X, LFLDEN,		0,   25,    1,	  1,	 false)
+#endif
 };
 
 /*----------------------------------------------------------------------*/
@@ -498,12 +500,14 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.cpu_clks		= dm644x_clks,
 	.psc_bases		= dm644x_psc_bases,
 	.psc_bases_num		= ARRAY_SIZE(dm644x_psc_bases),
+	.pinmux_base		= IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE),
+	.pinmux_pins		= dm644x_pins,
+	.pinmux_pins_num	= ARRAY_SIZE(dm644x_pins),
 };
 
 void __init dm644x_init(void)
 {
 	davinci_common_init(&davinci_soc_info_dm644x);
-	davinci_mux_register(dm644x_pins, ARRAY_SIZE(dm644x_pins));
 }
 
 static int __init dm644x_init_devices(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 3c61543..299d8d9 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -327,6 +327,7 @@ static struct platform_device dm646x_emac_device = {
  *				reg  offset mask  mode
  */
 static const struct mux_config dm646x_pins[] = {
+#ifdef CONFIG_DAVINCI_MUX
 MUX_CFG(DM646X, ATAEN,		0,   0,     1,	  1,	 true)
 
 MUX_CFG(DM646X, AUDCK1,		0,   29,    1,	  0,	 false)
@@ -354,6 +355,7 @@ MUX_CFG(DM646X, PTSIMUX_PARALLEL,	0,   16,    3,    2,	 true)
 MUX_CFG(DM646X, PTSOMUX_SERIAL,		0,   18,    3,    3,	 true)
 
 MUX_CFG(DM646X, PTSIMUX_SERIAL,		0,   16,    3,    3,	 true)
+#endif
 };
 
 /*----------------------------------------------------------------------*/
@@ -478,12 +480,14 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.cpu_clks		= dm646x_clks,
 	.psc_bases		= dm646x_psc_bases,
 	.psc_bases_num		= ARRAY_SIZE(dm646x_psc_bases),
+	.pinmux_base		= IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE),
+	.pinmux_pins		= dm646x_pins,
+	.pinmux_pins_num	= ARRAY_SIZE(dm646x_pins),
 };
 
 void __init dm646x_init(void)
 {
 	davinci_common_init(&davinci_soc_info_dm646x);
-	davinci_mux_register(dm646x_pins, ARRAY_SIZE(dm646x_pins));
 }
 
 static int __init dm646x_init_devices(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 7851d56..c00d375 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -36,6 +36,9 @@ struct davinci_soc_info {
 	struct davinci_clk		*cpu_clks;
 	void __iomem			**psc_bases;
 	unsigned long			psc_bases_num;
+	void __iomem			*pinmux_base;
+	const struct mux_config		*pinmux_pins;
+	unsigned long			pinmux_pins_num;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/mux.h b/arch/arm/mach-davinci/include/mach/mux.h
index bae22cb..5d6efa8 100644
--- a/arch/arm/mach-davinci/include/mach/mux.h
+++ b/arch/arm/mach-davinci/include/mach/mux.h
@@ -168,15 +168,9 @@ enum davinci_dm355_index {
 
 #ifdef CONFIG_DAVINCI_MUX
 /* setup pin muxing */
-extern void davinci_mux_init(void);
-extern int davinci_mux_register(const struct mux_config *pins,
-				unsigned long size);
 extern int davinci_cfg_reg(unsigned long reg_cfg);
 #else
 /* boot loader does it all (no warnings from CONFIG_DAVINCI_MUX_WARNINGS) */
-static inline void davinci_mux_init(void) {}
-static inline int davinci_mux_register(const struct mux_config *pins,
-				       unsigned long size) { return 0; }
 static inline int davinci_cfg_reg(unsigned long reg_cfg) { return 0; }
 #endif
 
diff --git a/arch/arm/mach-davinci/mux.c b/arch/arm/mach-davinci/mux.c
index bbba0b2..d310f57 100644
--- a/arch/arm/mach-davinci/mux.c
+++ b/arch/arm/mach-davinci/mux.c
@@ -21,18 +21,7 @@
 
 #include <mach/hardware.h>
 #include <mach/mux.h>
-
-static const struct mux_config *mux_table;
-static unsigned long pin_table_sz;
-
-int __init davinci_mux_register(const struct mux_config *pins,
-				unsigned long size)
-{
-	mux_table = pins;
-	pin_table_sz = size;
-
-	return 0;
-}
+#include <mach/common.h>
 
 /*
  * Sets the DAVINCI MUX register based on the table
@@ -40,23 +29,24 @@ int __init davinci_mux_register(const struct mux_config *pins,
 int __init_or_module davinci_cfg_reg(const unsigned long index)
 {
 	static DEFINE_SPINLOCK(mux_spin_lock);
-	void __iomem *base = IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE);
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+	void __iomem *base = soc_info->pinmux_base;
 	unsigned long flags;
 	const struct mux_config *cfg;
 	unsigned int reg_orig = 0, reg = 0;
 	unsigned int mask, warn = 0;
 
-	if (!mux_table)
+	if (!soc_info->pinmux_pins)
 		BUG();
 
-	if (index >= pin_table_sz) {
+	if (index >= soc_info->pinmux_pins_num) {
 		printk(KERN_ERR "Invalid pin mux index: %lu (%lu)\n",
-		       index, pin_table_sz);
+		       index, soc_info->pinmux_pins_num);
 		dump_stack();
 		return -ENODEV;
 	}
 
-	cfg = &mux_table[index];
+	cfg = &soc_info->pinmux_pins[index];
 
 	if (cfg->name == NULL) {
 		printk(KERN_ERR "No entry for the specified index\n");
-- 
cgit v0.10.2


From 673dd36f0d0cf8893d6b46d524ad80e81076b885 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:40:00 -0700
Subject: davinci: Move interrupt ctlr info to SoC infrastructure

Use the SoC infrastructure to hold the interrupt controller
information (i.e., base address, default priorities,
interrupt controller type, and the number of IRQs).

The interrupt controller base, although initially put
in the soc_info structure's intc_base field, is eventually
put in the global 'davinci_intc_base' so the low-level
interrupt code can access it without a dereference.

These changes enable the SoC default irq priorities to be
put in the SoC-specific files, and the interrupt controller
to be at any base address.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index 2e5b888..d72d517 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -22,6 +22,8 @@
 struct davinci_soc_info davinci_soc_info;
 EXPORT_SYMBOL(davinci_soc_info);
 
+void __iomem *davinci_intc_base;
+
 static struct davinci_id * __init davinci_get_id(u32 jtag_id)
 {
 	int i;
@@ -84,6 +86,7 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info)
 			goto err;
 	}
 
+	davinci_intc_base = davinci_soc_info.intc_base;
 	return;
 
 err:
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index f735ed9..e8c01ff 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -470,6 +470,71 @@ EVT_CFG(DM355,  EVT26_MMC0_RX,	      2,    1,    0,     false)
 #endif
 };
 
+static u8 dm355_default_priorities[DAVINCI_N_AINTC_IRQ] = {
+	[IRQ_DM355_CCDC_VDINT0]		= 2,
+	[IRQ_DM355_CCDC_VDINT1]		= 6,
+	[IRQ_DM355_CCDC_VDINT2]		= 6,
+	[IRQ_DM355_IPIPE_HST]		= 6,
+	[IRQ_DM355_H3AINT]		= 6,
+	[IRQ_DM355_IPIPE_SDR]		= 6,
+	[IRQ_DM355_IPIPEIFINT]		= 6,
+	[IRQ_DM355_OSDINT]		= 7,
+	[IRQ_DM355_VENCINT]		= 6,
+	[IRQ_ASQINT]			= 6,
+	[IRQ_IMXINT]			= 6,
+	[IRQ_USBINT]			= 4,
+	[IRQ_DM355_RTOINT]		= 4,
+	[IRQ_DM355_UARTINT2]		= 7,
+	[IRQ_DM355_TINT6]		= 7,
+	[IRQ_CCINT0]			= 5,	/* dma */
+	[IRQ_CCERRINT]			= 5,	/* dma */
+	[IRQ_TCERRINT0]			= 5,	/* dma */
+	[IRQ_TCERRINT]			= 5,	/* dma */
+	[IRQ_DM355_SPINT2_1]		= 7,
+	[IRQ_DM355_TINT7]		= 4,
+	[IRQ_DM355_SDIOINT0]		= 7,
+	[IRQ_MBXINT]			= 7,
+	[IRQ_MBRINT]			= 7,
+	[IRQ_MMCINT]			= 7,
+	[IRQ_DM355_MMCINT1]		= 7,
+	[IRQ_DM355_PWMINT3]		= 7,
+	[IRQ_DDRINT]			= 7,
+	[IRQ_AEMIFINT]			= 7,
+	[IRQ_DM355_SDIOINT1]		= 4,
+	[IRQ_TINT0_TINT12]		= 2,	/* clockevent */
+	[IRQ_TINT0_TINT34]		= 2,	/* clocksource */
+	[IRQ_TINT1_TINT12]		= 7,	/* DSP timer */
+	[IRQ_TINT1_TINT34]		= 7,	/* system tick */
+	[IRQ_PWMINT0]			= 7,
+	[IRQ_PWMINT1]			= 7,
+	[IRQ_PWMINT2]			= 7,
+	[IRQ_I2C]			= 3,
+	[IRQ_UARTINT0]			= 3,
+	[IRQ_UARTINT1]			= 3,
+	[IRQ_DM355_SPINT0_0]		= 3,
+	[IRQ_DM355_SPINT0_1]		= 3,
+	[IRQ_DM355_GPIO0]		= 3,
+	[IRQ_DM355_GPIO1]		= 7,
+	[IRQ_DM355_GPIO2]		= 4,
+	[IRQ_DM355_GPIO3]		= 4,
+	[IRQ_DM355_GPIO4]		= 7,
+	[IRQ_DM355_GPIO5]		= 7,
+	[IRQ_DM355_GPIO6]		= 7,
+	[IRQ_DM355_GPIO7]		= 7,
+	[IRQ_DM355_GPIO8]		= 7,
+	[IRQ_DM355_GPIO9]		= 7,
+	[IRQ_DM355_GPIOBNK0]		= 7,
+	[IRQ_DM355_GPIOBNK1]		= 7,
+	[IRQ_DM355_GPIOBNK2]		= 7,
+	[IRQ_DM355_GPIOBNK3]		= 7,
+	[IRQ_DM355_GPIOBNK4]		= 7,
+	[IRQ_DM355_GPIOBNK5]		= 7,
+	[IRQ_DM355_GPIOBNK6]		= 7,
+	[IRQ_COMMTX]			= 7,
+	[IRQ_COMMRX]			= 7,
+	[IRQ_EMUINT]			= 7,
+};
+
 /*----------------------------------------------------------------------*/
 
 static const s8 dma_chan_dm355_no_event[] = {
@@ -563,6 +628,10 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.pinmux_base		= IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE),
 	.pinmux_pins		= dm355_pins,
 	.pinmux_pins_num	= ARRAY_SIZE(dm355_pins),
+	.intc_base		= IO_ADDRESS(DAVINCI_ARM_INTC_BASE),
+	.intc_type		= DAVINCI_INTC_TYPE_AINTC,
+	.intc_irq_prios		= dm355_default_priorities,
+	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index b7c17dd..5c6a7b1 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -390,6 +390,74 @@ MUX_CFG(DM644X, LFLDEN,		0,   25,    1,	  1,	 false)
 #endif
 };
 
+/* FIQ are pri 0-1; otherwise 2-7, with 7 lowest priority */
+static u8 dm644x_default_priorities[DAVINCI_N_AINTC_IRQ] = {
+	[IRQ_VDINT0]		= 2,
+	[IRQ_VDINT1]		= 6,
+	[IRQ_VDINT2]		= 6,
+	[IRQ_HISTINT]		= 6,
+	[IRQ_H3AINT]		= 6,
+	[IRQ_PRVUINT]		= 6,
+	[IRQ_RSZINT]		= 6,
+	[7]			= 7,
+	[IRQ_VENCINT]		= 6,
+	[IRQ_ASQINT]		= 6,
+	[IRQ_IMXINT]		= 6,
+	[IRQ_VLCDINT]		= 6,
+	[IRQ_USBINT]		= 4,
+	[IRQ_EMACINT]		= 4,
+	[14]			= 7,
+	[15]			= 7,
+	[IRQ_CCINT0]		= 5,	/* dma */
+	[IRQ_CCERRINT]		= 5,	/* dma */
+	[IRQ_TCERRINT0]		= 5,	/* dma */
+	[IRQ_TCERRINT]		= 5,	/* dma */
+	[IRQ_PSCIN]		= 7,
+	[21]			= 7,
+	[IRQ_IDE]		= 4,
+	[23]			= 7,
+	[IRQ_MBXINT]		= 7,
+	[IRQ_MBRINT]		= 7,
+	[IRQ_MMCINT]		= 7,
+	[IRQ_SDIOINT]		= 7,
+	[28]			= 7,
+	[IRQ_DDRINT]		= 7,
+	[IRQ_AEMIFINT]		= 7,
+	[IRQ_VLQINT]		= 4,
+	[IRQ_TINT0_TINT12]	= 2,	/* clockevent */
+	[IRQ_TINT0_TINT34]	= 2,	/* clocksource */
+	[IRQ_TINT1_TINT12]	= 7,	/* DSP timer */
+	[IRQ_TINT1_TINT34]	= 7,	/* system tick */
+	[IRQ_PWMINT0]		= 7,
+	[IRQ_PWMINT1]		= 7,
+	[IRQ_PWMINT2]		= 7,
+	[IRQ_I2C]		= 3,
+	[IRQ_UARTINT0]		= 3,
+	[IRQ_UARTINT1]		= 3,
+	[IRQ_UARTINT2]		= 3,
+	[IRQ_SPINT0]		= 3,
+	[IRQ_SPINT1]		= 3,
+	[45]			= 7,
+	[IRQ_DSP2ARM0]		= 4,
+	[IRQ_DSP2ARM1]		= 4,
+	[IRQ_GPIO0]		= 7,
+	[IRQ_GPIO1]		= 7,
+	[IRQ_GPIO2]		= 7,
+	[IRQ_GPIO3]		= 7,
+	[IRQ_GPIO4]		= 7,
+	[IRQ_GPIO5]		= 7,
+	[IRQ_GPIO6]		= 7,
+	[IRQ_GPIO7]		= 7,
+	[IRQ_GPIOBNK0]		= 7,
+	[IRQ_GPIOBNK1]		= 7,
+	[IRQ_GPIOBNK2]		= 7,
+	[IRQ_GPIOBNK3]		= 7,
+	[IRQ_GPIOBNK4]		= 7,
+	[IRQ_COMMTX]		= 7,
+	[IRQ_COMMRX]		= 7,
+	[IRQ_EMUINT]		= 7,
+};
+
 /*----------------------------------------------------------------------*/
 
 static const s8 dma_chan_dm644x_no_event[] = {
@@ -503,6 +571,10 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.pinmux_base		= IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE),
 	.pinmux_pins		= dm644x_pins,
 	.pinmux_pins_num	= ARRAY_SIZE(dm644x_pins),
+	.intc_base		= IO_ADDRESS(DAVINCI_ARM_INTC_BASE),
+	.intc_type		= DAVINCI_INTC_TYPE_AINTC,
+	.intc_irq_prios 	= dm644x_default_priorities,
+	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 299d8d9..beb522e 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -358,6 +358,73 @@ MUX_CFG(DM646X, PTSIMUX_SERIAL,		0,   16,    3,    3,	 true)
 #endif
 };
 
+static u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = {
+	[IRQ_DM646X_VP_VERTINT0]        = 7,
+	[IRQ_DM646X_VP_VERTINT1]        = 7,
+	[IRQ_DM646X_VP_VERTINT2]        = 7,
+	[IRQ_DM646X_VP_VERTINT3]        = 7,
+	[IRQ_DM646X_VP_ERRINT]          = 7,
+	[IRQ_DM646X_RESERVED_1]         = 7,
+	[IRQ_DM646X_RESERVED_2]         = 7,
+	[IRQ_DM646X_WDINT]              = 7,
+	[IRQ_DM646X_CRGENINT0]          = 7,
+	[IRQ_DM646X_CRGENINT1]          = 7,
+	[IRQ_DM646X_TSIFINT0]           = 7,
+	[IRQ_DM646X_TSIFINT1]           = 7,
+	[IRQ_DM646X_VDCEINT]            = 7,
+	[IRQ_DM646X_USBINT]             = 7,
+	[IRQ_DM646X_USBDMAINT]          = 7,
+	[IRQ_DM646X_PCIINT]             = 7,
+	[IRQ_CCINT0]                    = 7,    /* dma */
+	[IRQ_CCERRINT]                  = 7,    /* dma */
+	[IRQ_TCERRINT0]                 = 7,    /* dma */
+	[IRQ_TCERRINT]                  = 7,    /* dma */
+	[IRQ_DM646X_TCERRINT2]          = 7,
+	[IRQ_DM646X_TCERRINT3]          = 7,
+	[IRQ_DM646X_IDE]                = 7,
+	[IRQ_DM646X_HPIINT]             = 7,
+	[IRQ_DM646X_EMACRXTHINT]        = 7,
+	[IRQ_DM646X_EMACRXINT]          = 7,
+	[IRQ_DM646X_EMACTXINT]          = 7,
+	[IRQ_DM646X_EMACMISCINT]        = 7,
+	[IRQ_DM646X_MCASP0TXINT]        = 7,
+	[IRQ_DM646X_MCASP0RXINT]        = 7,
+	[IRQ_AEMIFINT]                  = 7,
+	[IRQ_DM646X_RESERVED_3]         = 7,
+	[IRQ_DM646X_MCASP1TXINT]        = 7,    /* clockevent */
+	[IRQ_TINT0_TINT34]              = 7,    /* clocksource */
+	[IRQ_TINT1_TINT12]              = 7,    /* DSP timer */
+	[IRQ_TINT1_TINT34]              = 7,    /* system tick */
+	[IRQ_PWMINT0]                   = 7,
+	[IRQ_PWMINT1]                   = 7,
+	[IRQ_DM646X_VLQINT]             = 7,
+	[IRQ_I2C]                       = 7,
+	[IRQ_UARTINT0]                  = 7,
+	[IRQ_UARTINT1]                  = 7,
+	[IRQ_DM646X_UARTINT2]           = 7,
+	[IRQ_DM646X_SPINT0]             = 7,
+	[IRQ_DM646X_SPINT1]             = 7,
+	[IRQ_DM646X_DSP2ARMINT]         = 7,
+	[IRQ_DM646X_RESERVED_4]         = 7,
+	[IRQ_DM646X_PSCINT]             = 7,
+	[IRQ_DM646X_GPIO0]              = 7,
+	[IRQ_DM646X_GPIO1]              = 7,
+	[IRQ_DM646X_GPIO2]              = 7,
+	[IRQ_DM646X_GPIO3]              = 7,
+	[IRQ_DM646X_GPIO4]              = 7,
+	[IRQ_DM646X_GPIO5]              = 7,
+	[IRQ_DM646X_GPIO6]              = 7,
+	[IRQ_DM646X_GPIO7]              = 7,
+	[IRQ_DM646X_GPIOBNK0]           = 7,
+	[IRQ_DM646X_GPIOBNK1]           = 7,
+	[IRQ_DM646X_GPIOBNK2]           = 7,
+	[IRQ_DM646X_DDRINT]             = 7,
+	[IRQ_DM646X_AEMIFINT]           = 7,
+	[IRQ_COMMTX]                    = 7,
+	[IRQ_COMMRX]                    = 7,
+	[IRQ_EMUINT]                    = 7,
+};
+
 /*----------------------------------------------------------------------*/
 
 static const s8 dma_chan_dm646x_no_event[] = {
@@ -483,6 +550,10 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.pinmux_base		= IO_ADDRESS(DAVINCI_SYSTEM_MODULE_BASE),
 	.pinmux_pins		= dm646x_pins,
 	.pinmux_pins_num	= ARRAY_SIZE(dm646x_pins),
+	.intc_base		= IO_ADDRESS(DAVINCI_ARM_INTC_BASE),
+	.intc_type		= DAVINCI_INTC_TYPE_AINTC,
+	.intc_irq_prios		= dm646x_default_priorities,
+	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index c00d375..838ae13 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -17,6 +17,7 @@ struct sys_timer;
 extern struct sys_timer davinci_timer;
 
 extern void davinci_irq_init(void);
+extern void __iomem *davinci_intc_base;
 
 /* parameters describe VBUS sourcing for host mode */
 extern void setup_usb(unsigned mA, unsigned potpgt_msec);
@@ -39,6 +40,10 @@ struct davinci_soc_info {
 	void __iomem			*pinmux_base;
 	const struct mux_config		*pinmux_pins;
 	unsigned long			pinmux_pins_num;
+	void __iomem			*intc_base;
+	int				intc_type;
+	u8				*intc_irq_prios;
+	unsigned long			intc_irq_num;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/entry-macro.S b/arch/arm/mach-davinci/include/mach/entry-macro.S
index 0ebb445..ed78851 100644
--- a/arch/arm/mach-davinci/include/mach/entry-macro.S
+++ b/arch/arm/mach-davinci/include/mach/entry-macro.S
@@ -15,7 +15,8 @@
 		.endm
 
 		.macro  get_irqnr_preamble, base, tmp
-		ldr \base, =IO_ADDRESS(DAVINCI_ARM_INTC_BASE)
+		ldr \base, =davinci_intc_base
+		ldr \base, [\base]
 		.endm
 
 		.macro  arch_ret_to_user, tmp1, tmp2
diff --git a/arch/arm/mach-davinci/include/mach/irqs.h b/arch/arm/mach-davinci/include/mach/irqs.h
index 1806607..bc5d6aa 100644
--- a/arch/arm/mach-davinci/include/mach/irqs.h
+++ b/arch/arm/mach-davinci/include/mach/irqs.h
@@ -30,6 +30,9 @@
 /* Base address */
 #define DAVINCI_ARM_INTC_BASE 0x01C48000
 
+#define DAVINCI_INTC_TYPE_AINTC		0
+#define DAVINCI_INTC_TYPE_CP_INTC	1
+
 /* Interrupt lines */
 #define IRQ_VDINT0       0
 #define IRQ_VDINT1       1
diff --git a/arch/arm/mach-davinci/irq.c b/arch/arm/mach-davinci/irq.c
index 5a324c9..af92ffe 100644
--- a/arch/arm/mach-davinci/irq.c
+++ b/arch/arm/mach-davinci/irq.c
@@ -26,6 +26,7 @@
 
 #include <mach/hardware.h>
 #include <mach/cputype.h>
+#include <mach/common.h>
 #include <asm/mach/irq.h>
 
 #define IRQ_BIT(irq)		((irq) & 0x1f)
@@ -41,18 +42,14 @@
 #define IRQ_INTPRI0_REG_OFFSET	0x0030
 #define IRQ_INTPRI7_REG_OFFSET	0x004C
 
-const u8 *davinci_def_priorities;
-
-#define INTC_BASE IO_ADDRESS(DAVINCI_ARM_INTC_BASE)
-
 static inline unsigned int davinci_irq_readl(int offset)
 {
-	return __raw_readl(INTC_BASE + offset);
+	return __raw_readl(davinci_intc_base + offset);
 }
 
 static inline void davinci_irq_writel(unsigned long value, int offset)
 {
-	__raw_writel(value, INTC_BASE + offset);
+	__raw_writel(value, davinci_intc_base + offset);
 }
 
 /* Disable interrupt */
@@ -113,217 +110,11 @@ static struct irq_chip davinci_irq_chip_0 = {
 	.unmask = davinci_unmask_irq,
 };
 
-/* FIQ are pri 0-1; otherwise 2-7, with 7 lowest priority */
-static const u8 dm644x_default_priorities[DAVINCI_N_AINTC_IRQ] __initdata = {
-	[IRQ_VDINT0]		= 2,
-	[IRQ_VDINT1]		= 6,
-	[IRQ_VDINT2]		= 6,
-	[IRQ_HISTINT]		= 6,
-	[IRQ_H3AINT]		= 6,
-	[IRQ_PRVUINT]		= 6,
-	[IRQ_RSZINT]		= 6,
-	[7]			= 7,
-	[IRQ_VENCINT]		= 6,
-	[IRQ_ASQINT]		= 6,
-	[IRQ_IMXINT]		= 6,
-	[IRQ_VLCDINT]		= 6,
-	[IRQ_USBINT]		= 4,
-	[IRQ_EMACINT]		= 4,
-	[14]			= 7,
-	[15]			= 7,
-	[IRQ_CCINT0]		= 5,	/* dma */
-	[IRQ_CCERRINT]		= 5,	/* dma */
-	[IRQ_TCERRINT0]		= 5,	/* dma */
-	[IRQ_TCERRINT]		= 5,	/* dma */
-	[IRQ_PSCIN]		= 7,
-	[21]			= 7,
-	[IRQ_IDE]		= 4,
-	[23]			= 7,
-	[IRQ_MBXINT]		= 7,
-	[IRQ_MBRINT]		= 7,
-	[IRQ_MMCINT]		= 7,
-	[IRQ_SDIOINT]		= 7,
-	[28]			= 7,
-	[IRQ_DDRINT]		= 7,
-	[IRQ_AEMIFINT]		= 7,
-	[IRQ_VLQINT]		= 4,
-	[IRQ_TINT0_TINT12]	= 2,	/* clockevent */
-	[IRQ_TINT0_TINT34]	= 2,	/* clocksource */
-	[IRQ_TINT1_TINT12]	= 7,	/* DSP timer */
-	[IRQ_TINT1_TINT34]	= 7,	/* system tick */
-	[IRQ_PWMINT0]		= 7,
-	[IRQ_PWMINT1]		= 7,
-	[IRQ_PWMINT2]		= 7,
-	[IRQ_I2C]		= 3,
-	[IRQ_UARTINT0]		= 3,
-	[IRQ_UARTINT1]		= 3,
-	[IRQ_UARTINT2]		= 3,
-	[IRQ_SPINT0]		= 3,
-	[IRQ_SPINT1]		= 3,
-	[45]			= 7,
-	[IRQ_DSP2ARM0]		= 4,
-	[IRQ_DSP2ARM1]		= 4,
-	[IRQ_GPIO0]		= 7,
-	[IRQ_GPIO1]		= 7,
-	[IRQ_GPIO2]		= 7,
-	[IRQ_GPIO3]		= 7,
-	[IRQ_GPIO4]		= 7,
-	[IRQ_GPIO5]		= 7,
-	[IRQ_GPIO6]		= 7,
-	[IRQ_GPIO7]		= 7,
-	[IRQ_GPIOBNK0]		= 7,
-	[IRQ_GPIOBNK1]		= 7,
-	[IRQ_GPIOBNK2]		= 7,
-	[IRQ_GPIOBNK3]		= 7,
-	[IRQ_GPIOBNK4]		= 7,
-	[IRQ_COMMTX]		= 7,
-	[IRQ_COMMRX]		= 7,
-	[IRQ_EMUINT]		= 7,
-};
-
-static const u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = {
-	[IRQ_DM646X_VP_VERTINT0]        = 7,
-	[IRQ_DM646X_VP_VERTINT1]        = 7,
-	[IRQ_DM646X_VP_VERTINT2]        = 7,
-	[IRQ_DM646X_VP_VERTINT3]        = 7,
-	[IRQ_DM646X_VP_ERRINT]          = 7,
-	[IRQ_DM646X_RESERVED_1]         = 7,
-	[IRQ_DM646X_RESERVED_2]         = 7,
-	[IRQ_DM646X_WDINT]              = 7,
-	[IRQ_DM646X_CRGENINT0]          = 7,
-	[IRQ_DM646X_CRGENINT1]          = 7,
-	[IRQ_DM646X_TSIFINT0]           = 7,
-	[IRQ_DM646X_TSIFINT1]           = 7,
-	[IRQ_DM646X_VDCEINT]            = 7,
-	[IRQ_DM646X_USBINT]             = 7,
-	[IRQ_DM646X_USBDMAINT]          = 7,
-	[IRQ_DM646X_PCIINT]             = 7,
-	[IRQ_CCINT0]                    = 7,    /* dma */
-	[IRQ_CCERRINT]                  = 7,    /* dma */
-	[IRQ_TCERRINT0]                 = 7,    /* dma */
-	[IRQ_TCERRINT]                  = 7,    /* dma */
-	[IRQ_DM646X_TCERRINT2]          = 7,
-	[IRQ_DM646X_TCERRINT3]          = 7,
-	[IRQ_DM646X_IDE]                = 7,
-	[IRQ_DM646X_HPIINT]             = 7,
-	[IRQ_DM646X_EMACRXTHINT]        = 7,
-	[IRQ_DM646X_EMACRXINT]          = 7,
-	[IRQ_DM646X_EMACTXINT]          = 7,
-	[IRQ_DM646X_EMACMISCINT]        = 7,
-	[IRQ_DM646X_MCASP0TXINT]        = 7,
-	[IRQ_DM646X_MCASP0RXINT]        = 7,
-	[IRQ_AEMIFINT]                  = 7,
-	[IRQ_DM646X_RESERVED_3]         = 7,
-	[IRQ_DM646X_MCASP1TXINT]        = 7,    /* clockevent */
-	[IRQ_TINT0_TINT34]              = 7,    /* clocksource */
-	[IRQ_TINT1_TINT12]              = 7,    /* DSP timer */
-	[IRQ_TINT1_TINT34]              = 7,    /* system tick */
-	[IRQ_PWMINT0]                   = 7,
-	[IRQ_PWMINT1]                   = 7,
-	[IRQ_DM646X_VLQINT]             = 7,
-	[IRQ_I2C]                       = 7,
-	[IRQ_UARTINT0]                  = 7,
-	[IRQ_UARTINT1]                  = 7,
-	[IRQ_DM646X_UARTINT2]           = 7,
-	[IRQ_DM646X_SPINT0]             = 7,
-	[IRQ_DM646X_SPINT1]             = 7,
-	[IRQ_DM646X_DSP2ARMINT]         = 7,
-	[IRQ_DM646X_RESERVED_4]         = 7,
-	[IRQ_DM646X_PSCINT]             = 7,
-	[IRQ_DM646X_GPIO0]              = 7,
-	[IRQ_DM646X_GPIO1]              = 7,
-	[IRQ_DM646X_GPIO2]              = 7,
-	[IRQ_DM646X_GPIO3]              = 7,
-	[IRQ_DM646X_GPIO4]              = 7,
-	[IRQ_DM646X_GPIO5]              = 7,
-	[IRQ_DM646X_GPIO6]              = 7,
-	[IRQ_DM646X_GPIO7]              = 7,
-	[IRQ_DM646X_GPIOBNK0]           = 7,
-	[IRQ_DM646X_GPIOBNK1]           = 7,
-	[IRQ_DM646X_GPIOBNK2]           = 7,
-	[IRQ_DM646X_DDRINT]             = 7,
-	[IRQ_DM646X_AEMIFINT]           = 7,
-	[IRQ_COMMTX]                    = 7,
-	[IRQ_COMMRX]                    = 7,
-	[IRQ_EMUINT]                    = 7,
-};
-
-static const u8 dm355_default_priorities[DAVINCI_N_AINTC_IRQ] = {
-	[IRQ_DM355_CCDC_VDINT0]		= 2,
-	[IRQ_DM355_CCDC_VDINT1]		= 6,
-	[IRQ_DM355_CCDC_VDINT2]		= 6,
-	[IRQ_DM355_IPIPE_HST]		= 6,
-	[IRQ_DM355_H3AINT]		= 6,
-	[IRQ_DM355_IPIPE_SDR]		= 6,
-	[IRQ_DM355_IPIPEIFINT]		= 6,
-	[IRQ_DM355_OSDINT]		= 7,
-	[IRQ_DM355_VENCINT]		= 6,
-	[IRQ_ASQINT]			= 6,
-	[IRQ_IMXINT]			= 6,
-	[IRQ_USBINT]			= 4,
-	[IRQ_DM355_RTOINT]		= 4,
-	[IRQ_DM355_UARTINT2]		= 7,
-	[IRQ_DM355_TINT6]		= 7,
-	[IRQ_CCINT0]			= 5,	/* dma */
-	[IRQ_CCERRINT]			= 5,	/* dma */
-	[IRQ_TCERRINT0]			= 5,	/* dma */
-	[IRQ_TCERRINT]			= 5,	/* dma */
-	[IRQ_DM355_SPINT2_1]		= 7,
-	[IRQ_DM355_TINT7]		= 4,
-	[IRQ_DM355_SDIOINT0]		= 7,
-	[IRQ_MBXINT]			= 7,
-	[IRQ_MBRINT]			= 7,
-	[IRQ_MMCINT]			= 7,
-	[IRQ_DM355_MMCINT1]		= 7,
-	[IRQ_DM355_PWMINT3]		= 7,
-	[IRQ_DDRINT]			= 7,
-	[IRQ_AEMIFINT]			= 7,
-	[IRQ_DM355_SDIOINT1]		= 4,
-	[IRQ_TINT0_TINT12]		= 2,	/* clockevent */
-	[IRQ_TINT0_TINT34]		= 2,	/* clocksource */
-	[IRQ_TINT1_TINT12]		= 7,	/* DSP timer */
-	[IRQ_TINT1_TINT34]		= 7,	/* system tick */
-	[IRQ_PWMINT0]			= 7,
-	[IRQ_PWMINT1]			= 7,
-	[IRQ_PWMINT2]			= 7,
-	[IRQ_I2C]			= 3,
-	[IRQ_UARTINT0]			= 3,
-	[IRQ_UARTINT1]			= 3,
-	[IRQ_DM355_SPINT0_0]		= 3,
-	[IRQ_DM355_SPINT0_1]		= 3,
-	[IRQ_DM355_GPIO0]		= 3,
-	[IRQ_DM355_GPIO1]		= 7,
-	[IRQ_DM355_GPIO2]		= 4,
-	[IRQ_DM355_GPIO3]		= 4,
-	[IRQ_DM355_GPIO4]		= 7,
-	[IRQ_DM355_GPIO5]		= 7,
-	[IRQ_DM355_GPIO6]		= 7,
-	[IRQ_DM355_GPIO7]		= 7,
-	[IRQ_DM355_GPIO8]		= 7,
-	[IRQ_DM355_GPIO9]		= 7,
-	[IRQ_DM355_GPIOBNK0]		= 7,
-	[IRQ_DM355_GPIOBNK1]		= 7,
-	[IRQ_DM355_GPIOBNK2]		= 7,
-	[IRQ_DM355_GPIOBNK3]		= 7,
-	[IRQ_DM355_GPIOBNK4]		= 7,
-	[IRQ_DM355_GPIOBNK5]		= 7,
-	[IRQ_DM355_GPIOBNK6]		= 7,
-	[IRQ_COMMTX]			= 7,
-	[IRQ_COMMRX]			= 7,
-	[IRQ_EMUINT]			= 7,
-};
-
 /* ARM Interrupt Controller Initialization */
 void __init davinci_irq_init(void)
 {
 	unsigned i;
-
-	if (cpu_is_davinci_dm644x())
-		davinci_def_priorities = dm644x_default_priorities;
-	else if (cpu_is_davinci_dm646x())
-		davinci_def_priorities = dm646x_default_priorities;
-	else if (cpu_is_davinci_dm355())
-		davinci_def_priorities = dm355_default_priorities;
+	const u8 *davinci_def_priorities = davinci_soc_info.intc_irq_prios;
 
 	/* Clear all interrupt requests */
 	davinci_irq_writel(~0x0, FIQ_REG0_OFFSET);
-- 
cgit v0.10.2


From f64691b3ab795268072e76ddb89290b6277cdf33 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:40:11 -0700
Subject: davinci: Add base address and timer flexibility

The davinci timer code currently hardcodes the timer register
base addresses, the timer irq numbers, and the timers to use
for clock events and clocksource.  This won't work for some
a new SoC so put those values into the soc_info structure
and set them up in the SoC-specific files.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 56c1931..36c528f 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -25,6 +25,7 @@
 #include <mach/mux.h>
 #include <mach/edma.h>
 #include <mach/mmc.h>
+#include <mach/time.h>
 
 #define DAVINCI_I2C_BASE	     0x01C21000
 #define DAVINCI_MMCSD0_BASE	     0x01E10000
@@ -235,6 +236,52 @@ static void davinci_init_wdt(void)
 
 /*-------------------------------------------------------------------------*/
 
+struct davinci_timer_instance davinci_timer_instance[2] = {
+	{
+		.base		= IO_ADDRESS(DAVINCI_TIMER0_BASE),
+		.bottom_irq	= IRQ_TINT0_TINT12,
+		.top_irq	= IRQ_TINT0_TINT34,
+	},
+	{
+		.base		= IO_ADDRESS(DAVINCI_TIMER1_BASE),
+		.bottom_irq	= IRQ_TINT1_TINT12,
+		.top_irq	= IRQ_TINT1_TINT34,
+	},
+};
+
+/*-------------------------------------------------------------------------*/
+
+#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
+
+void davinci_init_emac(struct emac_platform_data *pdata)
+{
+	DECLARE_MAC_BUF(buf);
+
+	if (cpu_is_davinci_dm644x())
+		dm644x_init_emac(pdata);
+	else if (cpu_is_davinci_dm646x())
+		dm646x_init_emac(pdata);
+
+	/* if valid MAC exists, don't re-register */
+	if (is_valid_ether_addr(pdata->mac_addr))
+		return;
+	else {
+		/* Use random MAC if none passed */
+		random_ether_addr(pdata->mac_addr);
+
+		printk(KERN_WARNING "%s: using random MAC addr: %s\n",
+		       __func__, print_mac(buf, pdata->mac_addr));
+	}
+}
+
+#else
+
+void davinci_init_emac(struct emac_platform_data *unused) {}
+
+#endif
+
+/*-------------------------------------------------------------------------*/
+
 static int __init davinci_init_devices(void)
 {
 	/* please keep these calls, and their implementations above,
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index e8c01ff..293a419 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -25,6 +25,7 @@
 #include <mach/psc.h>
 #include <mach/mux.h>
 #include <mach/irqs.h>
+#include <mach/time.h>
 #include <mach/common.h>
 
 #include "clock.h"
@@ -616,6 +617,18 @@ static void __iomem *dm355_psc_bases[] = {
 	IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE),
 };
 
+/*
+ * T0_BOT: Timer 0, bottom:  clockevent source for hrtimers
+ * T0_TOP: Timer 0, top   :  clocksource for generic timekeeping
+ * T1_BOT: Timer 1, bottom:  (used by DSP in TI DSPLink code)
+ * T1_TOP: Timer 1, top   :  <unused>
+ */
+struct davinci_timer_info dm355_timer_info = {
+	.timers		= davinci_timer_instance,
+	.clockevent_id	= T0_BOT,
+	.clocksource_id	= T0_TOP,
+};
+
 static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.io_desc		= dm355_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm355_io_desc),
@@ -632,6 +645,7 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.intc_type		= DAVINCI_INTC_TYPE_AINTC,
 	.intc_irq_prios		= dm355_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
+	.timer_info		= &dm355_timer_info,
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 5c6a7b1..8e9385c 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -22,6 +22,7 @@
 #include <mach/irqs.h>
 #include <mach/psc.h>
 #include <mach/mux.h>
+#include <mach/time.h>
 #include <mach/common.h>
 
 #include "clock.h"
@@ -559,6 +560,18 @@ static void __iomem *dm644x_psc_bases[] = {
 	IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE),
 };
 
+/*
+ * T0_BOT: Timer 0, bottom:  clockevent source for hrtimers
+ * T0_TOP: Timer 0, top   :  clocksource for generic timekeeping
+ * T1_BOT: Timer 1, bottom:  (used by DSP in TI DSPLink code)
+ * T1_TOP: Timer 1, top   :  <unused>
+ */
+struct davinci_timer_info dm644x_timer_info = {
+	.timers		= davinci_timer_instance,
+	.clockevent_id	= T0_BOT,
+	.clocksource_id	= T0_TOP,
+};
+
 static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.io_desc		= dm644x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm644x_io_desc),
@@ -575,6 +588,7 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.intc_type		= DAVINCI_INTC_TYPE_AINTC,
 	.intc_irq_prios 	= dm644x_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
+	.timer_info		= &dm644x_timer_info,
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index beb522e..219063f 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -22,6 +22,7 @@
 #include <mach/irqs.h>
 #include <mach/psc.h>
 #include <mach/mux.h>
+#include <mach/time.h>
 #include <mach/common.h>
 
 #include "clock.h"
@@ -538,6 +539,18 @@ static void __iomem *dm646x_psc_bases[] = {
 	IO_ADDRESS(DAVINCI_PWR_SLEEP_CNTRL_BASE),
 };
 
+/*
+ * T0_BOT: Timer 0, bottom:  clockevent source for hrtimers
+ * T0_TOP: Timer 0, top   :  clocksource for generic timekeeping
+ * T1_BOT: Timer 1, bottom:  (used by DSP in TI DSPLink code)
+ * T1_TOP: Timer 1, top   :  <unused>
+ */
+struct davinci_timer_info dm646x_timer_info = {
+	.timers		= davinci_timer_instance,
+	.clockevent_id	= T0_BOT,
+	.clocksource_id	= T0_TOP,
+};
+
 static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.io_desc		= dm646x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm646x_io_desc),
@@ -554,6 +567,7 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.intc_type		= DAVINCI_INTC_TYPE_AINTC,
 	.intc_irq_prios		= dm646x_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
+	.timer_info		= &dm646x_timer_info,
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 838ae13..90b43be 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -25,6 +25,18 @@ extern void setup_usb(unsigned mA, unsigned potpgt_msec);
 /* parameters describe VBUS sourcing for host mode */
 extern void setup_usb(unsigned mA, unsigned potpgt_msec);
 
+struct davinci_timer_instance {
+	void __iomem	*base;
+	u32		bottom_irq;
+	u32		top_irq;
+};
+
+struct davinci_timer_info {
+	struct davinci_timer_instance	*timers;
+	unsigned int			clockevent_id;
+	unsigned int			clocksource_id;
+};
+
 /* SoC specific init support */
 struct davinci_soc_info {
 	struct map_desc			*io_desc;
@@ -44,6 +56,7 @@ struct davinci_soc_info {
 	int				intc_type;
 	u8				*intc_irq_prios;
 	unsigned long			intc_irq_num;
+	struct davinci_timer_info	*timer_info;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/time.h b/arch/arm/mach-davinci/include/mach/time.h
new file mode 100644
index 0000000..1428d77
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/time.h
@@ -0,0 +1,34 @@
+/*
+ * Local header file for DaVinci time code.
+ *
+ * Author: Kevin Hilman, MontaVista Software, Inc. <source@mvista.com>
+ *
+ * 2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef __ARCH_ARM_MACH_DAVINCI_TIME_H
+#define __ARCH_ARM_MACH_DAVINCI_TIME_H
+
+#define DAVINCI_TIMER0_BASE		(IO_PHYS + 0x21400)
+#define DAVINCI_TIMER1_BASE		(IO_PHYS + 0x21800)
+
+enum {
+	T0_BOT,
+	T0_TOP,
+	T1_BOT,
+	T1_TOP,
+	NUM_TIMERS
+};
+
+#define IS_TIMER1(id)		(id & 0x2)
+#define IS_TIMER0(id)		(!IS_TIMER1(id))
+#define IS_TIMER_TOP(id)	((id & 0x1))
+#define IS_TIMER_BOT(id)	(!IS_TIMER_TOP(id))
+
+#define ID_TO_TIMER(id)		(IS_TIMER1(id) != 0)
+
+extern struct davinci_timer_instance davinci_timer_instance[];
+
+#endif /* __ARCH_ARM_MACH_DAVINCI_TIME_H */
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index efbbc2a..faafb89 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -29,42 +29,23 @@
 #include <asm/errno.h>
 #include <mach/io.h>
 #include <mach/cputype.h>
+#include <mach/time.h>
 #include "clock.h"
 
 static struct clock_event_device clockevent_davinci;
 static unsigned int davinci_clock_tick_rate;
 
-#define DAVINCI_TIMER0_BASE (IO_PHYS + 0x21400)
-#define DAVINCI_TIMER1_BASE (IO_PHYS + 0x21800)
 #define DAVINCI_WDOG_BASE   (IO_PHYS + 0x21C00)
 
-enum {
-	T0_BOT = 0, T0_TOP, T1_BOT, T1_TOP, NUM_TIMERS,
-};
-
-#define IS_TIMER1(id)    (id & 0x2)
-#define IS_TIMER0(id)    (!IS_TIMER1(id))
-#define IS_TIMER_TOP(id) ((id & 0x1))
-#define IS_TIMER_BOT(id) (!IS_TIMER_TOP(id))
-
-static int timer_irqs[NUM_TIMERS] = {
-	IRQ_TINT0_TINT12,
-	IRQ_TINT0_TINT34,
-	IRQ_TINT1_TINT12,
-	IRQ_TINT1_TINT34,
-};
-
 /*
  * This driver configures the 2 64-bit count-up timers as 4 independent
  * 32-bit count-up timers used as follows:
- *
- * T0_BOT: Timer 0, bottom:  clockevent source for hrtimers
- * T0_TOP: Timer 0, top   :  clocksource for generic timekeeping
- * T1_BOT: Timer 1, bottom:  (used by DSP in TI DSPLink code)
- * T1_TOP: Timer 1, top   :  <unused>
  */
-#define TID_CLOCKEVENT  T0_BOT
-#define TID_CLOCKSOURCE T0_TOP
+
+enum {
+	TID_CLOCKEVENT,
+	TID_CLOCKSOURCE,
+};
 
 /* Timer register offsets */
 #define PID12                        0x0
@@ -119,6 +100,13 @@ static struct timer_s timers[];
 #define TIMER_OPTS_ONESHOT    0x01
 #define TIMER_OPTS_PERIODIC   0x02
 
+static char *id_to_name[] = {
+	[T0_BOT]	= "timer0_0",
+	[T0_TOP]	= "timer0_1",
+	[T1_BOT]	= "timer1_0",
+	[T1_TOP]	= "timer1_1",
+};
+
 static int timer32_config(struct timer_s *t)
 {
 	u32 tcr = __raw_readl(t->base + TCR);
@@ -183,13 +171,14 @@ static struct timer_s timers[] = {
 
 static void __init timer_init(void)
 {
-	u32 phys_bases[] = {DAVINCI_TIMER0_BASE, DAVINCI_TIMER1_BASE};
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+	struct davinci_timer_instance *dtip = soc_info->timer_info->timers;
 	int i;
 
 	/* Global init of each 64-bit timer as a whole */
 	for(i=0; i<2; i++) {
 		u32 tgcr;
-		void __iomem *base = IO_ADDRESS(phys_bases[i]);
+		void __iomem *base = dtip[i].base;
 
 		/* Disabled, Internal clock source */
 		__raw_writel(0, base + TCR);
@@ -215,33 +204,30 @@ static void __init timer_init(void)
 	/* Init of each timer as a 32-bit timer */
 	for (i=0; i< ARRAY_SIZE(timers); i++) {
 		struct timer_s *t = &timers[i];
-		u32 phys_base;
-
-		if (t->name) {
-			t->id = i;
-			phys_base = (IS_TIMER1(t->id) ?
-			       DAVINCI_TIMER1_BASE : DAVINCI_TIMER0_BASE);
-			t->base = IO_ADDRESS(phys_base);
-
-			if (IS_TIMER_BOT(t->id)) {
-				t->enamode_shift = 6;
-				t->tim_off = TIM12;
-				t->prd_off = PRD12;
-			} else {
-				t->enamode_shift = 22;
-				t->tim_off = TIM34;
-				t->prd_off = PRD34;
-			}
-
-			/* Register interrupt */
-			t->irqaction.name = t->name;
-			t->irqaction.dev_id = (void *)t;
-			if (t->irqaction.handler != NULL) {
-				setup_irq(timer_irqs[t->id], &t->irqaction);
-			}
-
-			timer32_config(&timers[i]);
+		int timer = ID_TO_TIMER(t->id);
+		u32 irq;
+
+		t->base = dtip[timer].base;
+
+		if (IS_TIMER_BOT(t->id)) {
+			t->enamode_shift = 6;
+			t->tim_off = TIM12;
+			t->prd_off = PRD12;
+			irq = dtip[timer].bottom_irq;
+		} else {
+			t->enamode_shift = 22;
+			t->tim_off = TIM34;
+			t->prd_off = PRD34;
+			irq = dtip[timer].top_irq;
 		}
+
+		/* Register interrupt */
+		t->irqaction.name = t->name;
+		t->irqaction.dev_id = (void *)t;
+		if (t->irqaction.handler != NULL)
+			setup_irq(irq, &t->irqaction);
+
+		timer32_config(&timers[i]);
 	}
 }
 
@@ -256,7 +242,6 @@ static cycle_t read_cycles(struct clocksource *cs)
 }
 
 static struct clocksource clocksource_davinci = {
-	.name		= "timer0_1",
 	.rating		= 300,
 	.read		= read_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
@@ -301,7 +286,6 @@ static void davinci_set_mode(enum clock_event_mode mode,
 }
 
 static struct clock_event_device clockevent_davinci = {
-	.name		= "timer0_0",
 	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.set_next_event	= davinci_set_next_event,
@@ -312,10 +296,14 @@ static struct clock_event_device clockevent_davinci = {
 static void __init davinci_timer_init(void)
 {
 	struct clk *timer_clk;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
 	static char err[] __initdata = KERN_ERR
 		"%s: can't register clocksource!\n";
 
+	timers[TID_CLOCKEVENT].id = soc_info->timer_info->clockevent_id;
+	timers[TID_CLOCKSOURCE].id = soc_info->timer_info->clocksource_id;
+
 	/* init timer hw */
 	timer_init();
 
@@ -326,6 +314,7 @@ static void __init davinci_timer_init(void)
 	davinci_clock_tick_rate = clk_get_rate(timer_clk);
 
 	/* setup clocksource */
+	clocksource_davinci.name = id_to_name[timers[TID_CLOCKSOURCE].id];
 	clocksource_davinci.mult =
 		clocksource_khz2mult(davinci_clock_tick_rate/1000,
 				     clocksource_davinci.shift);
@@ -333,6 +322,7 @@ static void __init davinci_timer_init(void)
 		printk(err, clocksource_davinci.name);
 
 	/* setup clockevent */
+	clockevent_davinci.name = id_to_name[timers[TID_CLOCKEVENT].id];
 	clockevent_davinci.mult = div_sc(davinci_clock_tick_rate, NSEC_PER_SEC,
 					 clockevent_davinci.shift);
 	clockevent_davinci.max_delta_ns =
-- 
cgit v0.10.2


From 951d6f6d703110790256abfce03ced117d2dcc6b Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:40:21 -0700
Subject: davinci: Add watchdog base address flexibility

The watchdog code currently hardcodes the base address
of the timer its using.  To support new SoCs, make it
support timers at any address.  Use the soc_info structure
to do this.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 36c528f..7ebf671 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -216,8 +216,6 @@ void __init davinci_setup_mmc(int module, struct davinci_mmc_config *config)
 
 static struct resource wdt_resources[] = {
 	{
-		.start	= 0x01c21c00,
-		.end	= 0x01c21fff,
 		.flags	= IORESOURCE_MEM,
 	},
 };
@@ -231,6 +229,11 @@ struct platform_device davinci_wdt_device = {
 
 static void davinci_init_wdt(void)
 {
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+
+	wdt_resources[0].start = (resource_size_t)soc_info->wdt_base;
+	wdt_resources[0].end = (resource_size_t)soc_info->wdt_base + SZ_1K - 1;
+
 	platform_device_register(&davinci_wdt_device);
 }
 
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 293a419..1b7c16c 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -646,6 +646,7 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.intc_irq_prios		= dm355_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm355_timer_info,
+	.wdt_base		= IO_ADDRESS(DAVINCI_WDOG_BASE),
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 8e9385c..c662692 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -589,6 +589,7 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.intc_irq_prios 	= dm644x_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm644x_timer_info,
+	.wdt_base		= IO_ADDRESS(DAVINCI_WDOG_BASE),
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 219063f..83d67cf 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -568,6 +568,7 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.intc_irq_prios		= dm646x_default_priorities,
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm646x_timer_info,
+	.wdt_base		= IO_ADDRESS(DAVINCI_WDOG_BASE),
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 90b43be..d637038 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -57,6 +57,7 @@ struct davinci_soc_info {
 	u8				*intc_irq_prios;
 	unsigned long			intc_irq_num;
 	struct davinci_timer_info	*timer_info;
+	void __iomem			*wdt_base;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/time.h b/arch/arm/mach-davinci/include/mach/time.h
index 1428d77..1c971d8 100644
--- a/arch/arm/mach-davinci/include/mach/time.h
+++ b/arch/arm/mach-davinci/include/mach/time.h
@@ -13,6 +13,7 @@
 
 #define DAVINCI_TIMER0_BASE		(IO_PHYS + 0x21400)
 #define DAVINCI_TIMER1_BASE		(IO_PHYS + 0x21800)
+#define DAVINCI_WDOG_BASE		(IO_PHYS + 0x21C00)
 
 enum {
 	T0_BOT,
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index faafb89..f80ae25 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -35,8 +35,6 @@
 static struct clock_event_device clockevent_davinci;
 static unsigned int davinci_clock_tick_rate;
 
-#define DAVINCI_WDOG_BASE   (IO_PHYS + 0x21C00)
-
 /*
  * This driver configures the 2 64-bit count-up timers as 4 independent
  * 32-bit count-up timers used as follows:
@@ -343,7 +341,8 @@ struct sys_timer davinci_timer = {
 void davinci_watchdog_reset(void)
 {
 	u32 tgcr, wdtcr;
-	void __iomem *base = IO_ADDRESS(DAVINCI_WDOG_BASE);
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+	void __iomem *base = soc_info->wdt_base;
 	struct clk *wd_clk;
 
 	wd_clk = clk_get(&davinci_wdt_device.dev, NULL);
-- 
cgit v0.10.2


From aa2936f5fe060e95ae06685149645b234085a468 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 26 May 2009 16:07:57 +0200
Subject: ALSA: hda - Support sync after writing a verb

This patch adds a debug mode to make the codec communication
synchronous.  Define SND_HDA_SUPPORT_SYNC_WRITE in hda_codec.c,
and the call of snd_hda_codec_write*() will become synchronous,
i.e. wait for the reply from the codec at each time issuing a verb.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 77385de..d1d5fb9 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -158,6 +158,38 @@ make_codec_cmd(struct hda_codec *codec, hda_nid_t nid, int direct,
 	return val;
 }
 
+/*
+ * Send and receive a verb
+ */
+static int codec_exec_verb(struct hda_codec *codec, unsigned int cmd,
+			   unsigned int *res)
+{
+	struct hda_bus *bus = codec->bus;
+	int err, repeated = 0;
+
+	if (res)
+		*res = -1;
+	snd_hda_power_up(codec);
+	mutex_lock(&bus->cmd_mutex);
+ again:
+	err = bus->ops.command(bus, cmd);
+	if (!err) {
+		if (res) {
+			*res = bus->ops.get_response(bus);
+			if (*res == -1 && bus->rirb_error) {
+				if (repeated++ < 1) {
+					snd_printd(KERN_WARNING "hda_codec: "
+					   "Trying verb 0x%08x again\n", cmd);
+					goto again;
+				}
+			}
+		}
+	}
+	mutex_unlock(&bus->cmd_mutex);
+	snd_hda_power_down(codec);
+	return err;
+}
+
 /**
  * snd_hda_codec_read - send a command and get the response
  * @codec: the HDA codec
@@ -174,31 +206,18 @@ unsigned int snd_hda_codec_read(struct hda_codec *codec, hda_nid_t nid,
 				int direct,
 				unsigned int verb, unsigned int parm)
 {
-	struct hda_bus *bus = codec->bus;
-	unsigned int cmd, res;
-	int repeated = 0;
-
-	cmd = make_codec_cmd(codec, nid, direct, verb, parm);
-	snd_hda_power_up(codec);
-	mutex_lock(&bus->cmd_mutex);
- again:
-	if (!bus->ops.command(bus, cmd)) {
-		res = bus->ops.get_response(bus);
-		if (res == -1 && bus->rirb_error) {
-			if (repeated++ < 1) {
-				snd_printd(KERN_WARNING "hda_codec: "
-					   "Trying verb 0x%08x again\n", cmd);
-				goto again;
-			}
-		}
-	} else
-		res = (unsigned int)-1;
-	mutex_unlock(&bus->cmd_mutex);
-	snd_hda_power_down(codec);
+	unsigned cmd = make_codec_cmd(codec, nid, direct, verb, parm);
+	unsigned int res;
+	codec_exec_verb(codec, cmd, &res);
 	return res;
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_read);
 
+/* Define the below to send and receive verbs synchronously.
+ * If you often get any codec communication errors, this is worth to try.
+ */
+/* #define SND_HDA_SUPPORT_SYNC_WRITE */
+
 /**
  * snd_hda_codec_write - send a single command without waiting for response
  * @codec: the HDA codec
@@ -214,17 +233,13 @@ EXPORT_SYMBOL_HDA(snd_hda_codec_read);
 int snd_hda_codec_write(struct hda_codec *codec, hda_nid_t nid, int direct,
 			 unsigned int verb, unsigned int parm)
 {
-	struct hda_bus *bus = codec->bus;
+	unsigned int cmd = make_codec_cmd(codec, nid, direct, verb, parm);
+#ifdef SND_HDA_SUPPORT_SYNC_WRITE
 	unsigned int res;
-	int err;
-
-	res = make_codec_cmd(codec, nid, direct, verb, parm);
-	snd_hda_power_up(codec);
-	mutex_lock(&bus->cmd_mutex);
-	err = bus->ops.command(bus, res);
-	mutex_unlock(&bus->cmd_mutex);
-	snd_hda_power_down(codec);
-	return err;
+	return codec_exec_verb(codec, cmd, &res);
+#else
+	return codec_exec_verb(codec, cmd, NULL);
+#endif
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_write);
 
@@ -2281,28 +2296,22 @@ EXPORT_SYMBOL_HDA(snd_hda_create_spdif_in_ctls);
 int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid,
 			      int direct, unsigned int verb, unsigned int parm)
 {
-	struct hda_bus *bus = codec->bus;
-	unsigned int res;
-	int err;
+	int err = snd_hda_codec_write(codec, nid, direct, verb, parm);
+	struct hda_cache_head *c;
+	u32 key;
 
-	res = make_codec_cmd(codec, nid, direct, verb, parm);
-	snd_hda_power_up(codec);
-	mutex_lock(&bus->cmd_mutex);
-	err = bus->ops.command(bus, res);
-	if (!err) {
-		struct hda_cache_head *c;
-		u32 key;
-		/* parm may contain the verb stuff for get/set amp */
-		verb = verb | (parm >> 8);
-		parm &= 0xff;
-		key = build_cmd_cache_key(nid, verb);
-		c = get_alloc_hash(&codec->cmd_cache, key);
-		if (c)
-			c->val = parm;
-	}
-	mutex_unlock(&bus->cmd_mutex);
-	snd_hda_power_down(codec);
-	return err;
+	if (err < 0)
+		return err;
+	/* parm may contain the verb stuff for get/set amp */
+	verb = verb | (parm >> 8);
+	parm &= 0xff;
+	key = build_cmd_cache_key(nid, verb);
+	mutex_lock(&codec->bus->cmd_mutex);
+	c = get_alloc_hash(&codec->cmd_cache, key);
+	if (c)
+		c->val = parm;
+	mutex_unlock(&codec->bus->cmd_mutex);
+	return 0;
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_write_cache);
 
-- 
cgit v0.10.2


From abd54f68629fa73ed4fa040d433196211a9bbed2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 26 May 2009 12:21:34 -0300
Subject: perf: Don't assume /proc/kallsyms is ordered

perf: Don't assume /proc/kallsyms is ordered

Since we _are_ ordering it by the symbol start, just traverse the
freshly built rbtree setting the prev->end members to curr->start - 1.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090526152134.GF4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index b19b893..e178190 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -360,17 +360,9 @@ static int load_kallsyms(void)
 	char *line = NULL;
 	size_t n;
 
-	if (getline(&line, &n, file) < 0 || !line)
-		goto out_delete_dso;
-
-	unsigned long long previous_start;
-	char c, previous_symbf[4096];
-	if (sscanf(line, "%llx %c %s", &previous_start, &c, previous_symbf) != 3)
-		goto out_delete_line;
-
 	while (!feof(file)) {
 		unsigned long long start;
-		char symbf[4096];
+		char c, symbf[4096];
 
 		if (getline(&line, &n, file) < 0)
 			break;
@@ -379,21 +371,35 @@ static int load_kallsyms(void)
 			goto out_delete_dso;
 
 		if (sscanf(line, "%llx %c %s", &start, &c, symbf) == 3) {
-			if (start > previous_start) {
-				struct symbol *sym = symbol__new(previous_start,
-								 start - previous_start,
-								 previous_symbf);
+			/*
+			 * Well fix up the end later, when we have all sorted.
+			 */
+			struct symbol *sym = symbol__new(start, 0xdead, symbf);
 
-				if (sym == NULL)
-					goto out_delete_dso;
+			if (sym == NULL)
+				goto out_delete_dso;
 
-				dso__insert_symbol(kernel_dso, sym);
-				previous_start = start;
-				strcpy(previous_symbf, symbf);
-			}
+			dso__insert_symbol(kernel_dso, sym);
 		}
 	}
 
+	/*
+	 * Now that we have all sorted out, just set the ->end of all
+	 * symbols
+	 */
+	struct rb_node *nd, *prevnd = rb_first(&kernel_dso->syms);
+
+	if (prevnd == NULL)
+		goto out_delete_line;
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
+			      *curr = rb_entry(nd, struct symbol, rb_node);
+
+		prev->end = curr->start - 1;
+		prevnd = nd;
+	}
+
 	dsos__add(kernel_dso);
 	free(line);
 	fclose(file);
-- 
cgit v0.10.2


From 4319503779060120fa5de9b8fde056603bb6e0fd Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jwilson@fedoraproject.org>
Date: Fri, 6 Mar 2009 20:24:57 +0000
Subject: [CPUFREQ] add atom family to p4-clockmod

Some atom procs don't do freq scaling (such as the atom 330 on my own
littlefalls2 board). By adding the atom family here, we at least get
the benefit of passive cooling in a thermal emergency. Not sure how
to see that its actually helping any, but the driver does bind and
claim its functioning on my atom 330.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 6ac55bd..8696151 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
 		case 0x16: /* Celeron Core */
+		case 0x1C: /* Atom */
 			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 			return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
 		case 0x0D: /* Pentium M (Dothan) */
-- 
cgit v0.10.2


From d38e73e8dad454a5916f446b0d3523c1161ae95a Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 23 Apr 2009 13:36:12 -0400
Subject: [CPUFREQ] powernow-k7 build fix when ACPI=n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

arch/x86/kernel/cpu/cpufreq/powernow-k7.c:172: warning: 'invalidate_entry' defined but not used

Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 3c28ccd..a8363e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -168,10 +168,12 @@ static int check_powernow(void)
 	return 1;
 }
 
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
 static void invalidate_entry(unsigned int entry)
 {
 	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
 }
+#endif
 
 static int get_ranges(unsigned char *pst)
 {
-- 
cgit v0.10.2


From 42a06f2166f2f6f7bf04f32b4e823eacdceafdc9 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sun, 17 May 2009 10:23:52 -0400
Subject: [CPUFREQ] remove rwsem lock from CPUFREQ_GOV_STOP call

* Rafael J. Wysocki (rjw@sisk.pl) wrote:
> This message has been generated automatically as a part of a report
> of regressions introduced between 2.6.28 and 2.6.29.
>
> The following bug entry is on the current list of known regressions
> introduced between 2.6.28 and 2.6.29.  Please verify if it still should
> be listed and let me know (either way).
>
>
> Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13186
> Subject		: cpufreq timer teardown problem
> Submitter	: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> Date		: 2009-04-23 14:00 (24 days old)
> References	: http://marc.info/?l=linux-kernel&m=124049523515036&w=4
> Handled-By	: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> Patch		: http://patchwork.kernel.org/patch/19754/
> 		  http://patchwork.kernel.org/patch/19753/

The patches linked above depend on the following patch to remove
circular locking dependency :

cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call

(the following issue was faced when using cancel_delayed_work_sync() in the
timer teardown (which fixes a race).

* KOSAKI Motohiro (kosaki.motohiro@jp.fujitsu.com) wrote:
> Hi
>
> my box output following warnings.
> it seems regression by commit 7ccc7608b836e58fbacf65ee4f8eefa288e86fac.
>
> A: work -> do_dbs_timer()  -> cpu_policy_rwsem
> B: store() -> cpu_policy_rwsem -> cpufreq_governor_dbs() -> work
>
>

Hrm, I think it must be due to my attempt to fix the timer teardown race
in ondemand governor mixed with new locking behavior in 2.6.30-rc.

The rwlock seems to be taken around the whole call to
cpufreq_governor_dbs(), when it should be only taken around accesses to
the locked data, and especially *not* around the call to
dbs_timer_exit().

Reverting my fix attempt would put the teardown race back in place
(replacing the cancel_delayed_work_sync by cancel_delayed_work).
Instead, a proper fix would imply modifying this critical section :

cpufreq.c: __cpufreq_remove_dev()
...
        if (cpufreq_driver->target)
                __cpufreq_governor(data, CPUFREQ_GOV_STOP);

        unlock_policy_rwsem_write(cpu);

To make sure the __cpufreq_governor() callback is not called with rwsem
held. This would allow execution of cancel_delayed_work_sync() without
being nested within the rwsem.

Applies on top of the 2.6.30-rc5 tree.

Required to remove circular dep in teardown of both conservative and
ondemande governors so they can use cancel_delayed_work_sync().
CPUFREQ_GOV_STOP does not modify the policy, therefore this locking seemed
unneeded.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Greg KH <greg@kroah.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: "Rafael J. Wysocki" <rjw@sisk.pl>
CC: Ben Slusky <sluskyb@paranoiacs.org>
CC: Chris Wright <chrisw@sous-sol.org>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d270e8e..47d2ad0 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1070,11 +1070,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 #endif
 
+	unlock_policy_rwsem_write(cpu);
+
 	if (cpufreq_driver->target)
 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
 
-	unlock_policy_rwsem_write(cpu);
-
 	kobject_put(&data->kobj);
 
 	/* we need to make sure that the underlying kobj is actually
-- 
cgit v0.10.2


From b253d2b2d28ead6fed012feb54694b3d0562839a Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sun, 17 May 2009 10:29:33 -0400
Subject: [CPUFREQ] fix timer teardown in conservative governor

* Rafael J. Wysocki (rjw@sisk.pl) wrote:
> This message has been generated automatically as a part of a report
> of regressions introduced between 2.6.28 and 2.6.29.
>
> The following bug entry is on the current list of known regressions
> introduced between 2.6.28 and 2.6.29.  Please verify if it still should
> be listed and let me know (either way).
>
>
> Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13186
> Subject		: cpufreq timer teardown problem
> Submitter	: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> Date		: 2009-04-23 14:00 (24 days old)
> References	: http://marc.info/?l=linux-kernel&m=124049523515036&w=4
> Handled-By	: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> Patch		: http://patchwork.kernel.org/patch/19754/
> 		  http://patchwork.kernel.org/patch/19753/
>

(re-send with updated changelog)

cpufreq fix timer teardown in conservative governor

The problem is that dbs_timer_exit() uses cancel_delayed_work() when it should
use cancel_delayed_work_sync(). cancel_delayed_work() does not wait for the
workqueue handler to exit.

The ondemand governor does not seem to be affected because the
"if (!dbs_info->enable)" check at the beginning of the workqueue handler returns
immediately without rescheduling the work. The conservative governor in
2.6.30-rc has the same check as the ondemand governor, which makes things
usually run smoothly. However, if the governor is quickly stopped and then
started, this could lead to the following race :

dbs_enable could be reenabled and multiple do_dbs_timer handlers would run.
This is why a synchronized teardown is required.

Depends on patch
cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call

The following patch applies to 2.6.30-rc2. Stable kernels have a similar
issue which should also be fixed, but the code changed between 2.6.29
and 2.6.30, so this patch only applies to 2.6.30-rc.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: gregkh@suse.de
CC: stable@kernel.org
CC: cpufreq@vger.kernel.org
CC: Ingo Molnar <mingo@elte.hu>
CC: rjw@sisk.pl
CC: Ben Slusky <sluskyb@paranoiacs.org>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 2ecd95e..7a74d17 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -91,6 +91,9 @@ static unsigned int dbs_enable;	/* number of CPUs using this policy */
  * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
  * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
  * is recursive for the same process. -Venki
+ * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it
+ * would deadlock with cancel_delayed_work_sync(), which is needed for proper
+ * raceless workqueue teardown.
  */
 static DEFINE_MUTEX(dbs_mutex);
 
@@ -542,7 +545,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
 {
 	dbs_info->enable = 0;
-	cancel_delayed_work(&dbs_info->work);
+	cancel_delayed_work_sync(&dbs_info->work);
 }
 
 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
-- 
cgit v0.10.2


From b14893a62c73af0eca414cfed505b8c09efc613c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sun, 17 May 2009 10:30:45 -0400
Subject: [CPUFREQ] fix timer teardown in ondemand governor

* Rafael J. Wysocki (rjw@sisk.pl) wrote:
> This message has been generated automatically as a part of a report
> of regressions introduced between 2.6.28 and 2.6.29.
>
> The following bug entry is on the current list of known regressions
> introduced between 2.6.28 and 2.6.29.  Please verify if it still should
> be listed and let me know (either way).
>
>
> Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13186
> Subject		: cpufreq timer teardown problem
> Submitter	: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> Date		: 2009-04-23 14:00 (24 days old)
> References	: http://marc.info/?l=linux-kernel&m=124049523515036&w=4
> Handled-By	: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> Patch		: http://patchwork.kernel.org/patch/19754/
> 		  http://patchwork.kernel.org/patch/19753/
>

(updated changelog)

cpufreq fix timer teardown in ondemand governor

The problem is that dbs_timer_exit() uses cancel_delayed_work() when it should
use cancel_delayed_work_sync(). cancel_delayed_work() does not wait for the
workqueue handler to exit.

The ondemand governor does not seem to be affected because the
"if (!dbs_info->enable)" check at the beginning of the workqueue handler returns
immediately without rescheduling the work. The conservative governor in
2.6.30-rc has the same check as the ondemand governor, which makes things
usually run smoothly. However, if the governor is quickly stopped and then
started, this could lead to the following race :

dbs_enable could be reenabled and multiple do_dbs_timer handlers would run.
This is why a synchronized teardown is required.

The following patch applies to, at least, 2.6.28.x, 2.6.29.1, 2.6.30-rc2.

Depends on patch
cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: gregkh@suse.de
CC: stable@kernel.org
CC: cpufreq@vger.kernel.org
CC: Ingo Molnar <mingo@elte.hu>
CC: rjw@sisk.pl
CC: Ben Slusky <sluskyb@paranoiacs.org>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 338f428..e741c33 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -98,6 +98,9 @@ static unsigned int dbs_enable;	/* number of CPUs using this policy */
  * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
  * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
  * is recursive for the same process. -Venki
+ * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it
+ * would deadlock with cancel_delayed_work_sync(), which is needed for proper
+ * raceless workqueue teardown.
  */
 static DEFINE_MUTEX(dbs_mutex);
 
@@ -562,7 +565,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
 {
 	dbs_info->enable = 0;
-	cancel_delayed_work(&dbs_info->work);
+	cancel_delayed_work_sync(&dbs_info->work);
 }
 
 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
-- 
cgit v0.10.2


From df1829770db415dc5a5ed5ada3bd70176c6f0a01 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Wed, 22 Apr 2009 13:48:32 +0200
Subject: [CPUFREQ] powernow-k8 cleanup msg if BIOS does not export ACPI _PSS
 cpufreq data

- Make the message shorter and easier to grep for
- Use printk_once instead of WARN_ONCE (functionality of these was mixed)

Signed-off-by: Thomas Renninger <trenn@suse.de>
Cc: Langsdorf, Mark <mark.langsdorf@amd.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 4709ead..feef10c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1215,13 +1215,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
 	return cpufreq_frequency_table_verify(pol, data->powernow_table);
 }
 
+static const char ACPI_PSS_BIOS_BUG_MSG[] =
+	KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
+	KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
+
 /* per CPU init entry point to the driver */
 static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 {
 	struct powernow_k8_data *data;
 	cpumask_t oldmask;
 	int rc;
-	static int print_once;
 
 	if (!cpu_online(pol->cpu))
 		return -ENODEV;
@@ -1244,19 +1247,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 		 * an UP version, and is deprecated by AMD.
 		 */
 		if (num_online_cpus() != 1) {
-			/*
-			 * Replace this one with print_once as soon as such a
-			 * thing gets introduced
-			 */
-			if (!print_once) {
-				WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
-					"does not provide ACPI _PSS objects "
-					"in a way that Linux understands. "
-					"Please report this to the Linux ACPI"
-					" maintainers and complain to your "
-					"BIOS vendor.\n");
-				print_once++;
-			}
+			printk_once(ACPI_PSS_BIOS_BUG_MSG);
 			goto err_out;
 		}
 		if (pol->cpu != 0) {
-- 
cgit v0.10.2


From ca446d06351992e4f1a7c1e5e99870ab4ec5188f Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Wed, 22 Apr 2009 13:48:33 +0200
Subject: [CPUFREQ] powernow-k8: determine exact CPU frequency for HW Pstates

Slightly modified by trenn@suse.de -> only do this on fam 10h and fam 11h.

Currently powernow-k8 determines CPU frequency from ACPI PSS objects, but
according to AMD family 11h BKDG this frequency is just a rounded value:

  "CoreFreq (MHz) = The CPU COF specified by MSRC001_00[6B:64][CpuFid]
  rounded to the nearest 100 Mhz."

As a consequnce powernow-k8 reports wrong CPU frequency on some systems,
e.g. on Turion X2 Ultra:

  powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82
               processors (2 cpu cores) (version 2.20.00)
  powernow-k8:    0 : pstate 0 (2200 MHz)
  powernow-k8:    1 : pstate 1 (1100 MHz)
  powernow-k8:    2 : pstate 2 (600 MHz)

But this is wrong as frequency for Pstate2 is 550 MHz. x86info reports it
correctly:

  #x86info -a |grep Pstate
  ...
  Pstate-0: fid=e, did=0, vid=24 (2200MHz)
  Pstate-1: fid=e, did=1, vid=30 (1100MHz)
  Pstate-2: fid=e, did=2, vid=3c (550MHz) (current)

Solution is to determine the frequency directly from Pstate MSRs instead
of using rounded values from ACPI table.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index feef10c..f6b32d1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data)
 				data->batps);
 }
 
+static u32 freq_from_fid_did(u32 fid, u32 did)
+{
+	u32 mhz = 0;
+
+	if (boot_cpu_data.x86 == 0x10)
+		mhz = (100 * (fid + 0x10)) >> did;
+	else if (boot_cpu_data.x86 == 0x11)
+		mhz = (100 * (fid + 8)) >> did;
+	else
+		BUG();
+
+	return mhz * 1000;
+}
+
 static int fill_powernow_table(struct powernow_k8_data *data,
 		struct pst_s *pst, u8 maxvid)
 {
@@ -923,8 +937,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
 
 		powernow_table[i].index = index;
 
-		powernow_table[i].frequency =
-			data->acpi_data.states[i].core_frequency * 1000;
+		/* Frequency may be rounded for these */
+		if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) {
+			powernow_table[i].frequency =
+				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
+		} else
+			powernow_table[i].frequency =
+				data->acpi_data.states[i].core_frequency * 1000;
 	}
 	return 0;
 }
-- 
cgit v0.10.2


From aae80dc24aeddec4e2b6e182a43491942f8667d3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 26 May 2009 18:35:27 +0200
Subject: ALSA: ctxfi - Add missing module parameter definitions

Added missing module_param*() and MODULE_PARM*().

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index 19310ed..e31e29e 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -23,13 +23,22 @@ MODULE_SUPPORTED_DEVICE("{{Creative Labs, Sound Blaster X-Fi}");
 
 static unsigned int reference_rate = 48000;
 static unsigned int multiple = 2;
+MODULE_PARM_DESC(reference_rate, "Reference rate (default=48000)");
 module_param(reference_rate, uint, S_IRUGO);
+MODULE_PARM_DESC(multiple, "Rate multiplier (default=2)");
 module_param(multiple, uint, S_IRUGO);
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
 static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
+module_param_array(index, int, NULL, 0444);
+MODULE_PARM_DESC(index, "Index value for Creative X-Fi driver");
+module_param_array(id, charp, NULL, 0444);
+MODULE_PARM_DESC(id, "ID string for Creative X-Fi driver");
+module_param_array(enable, bool, NULL, 0444);
+MODULE_PARM_DESC(enable, "Enable Creative X-Fi driver");
+
 static struct pci_device_id ct_pci_dev_ids[] = {
 	/* only X-Fi is supported, so... */
 	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1) },
-- 
cgit v0.10.2


From 97b07b699b11d4bd1218a841e5dfed16bd53de06 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 18:48:58 +0200
Subject: perf report: add --dump-raw-trace option

To help the inspection of various data files, implement an ASCII dump
method that just dumps the records as they are read in - then we exit.

[ Impact: new feature ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e178190..8ea8aaa 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -20,6 +20,8 @@ static char		const *input_name = "output.perf";
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
+static int		dump_trace = 0;
+
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
 
@@ -643,7 +645,7 @@ static int __cmd_report(void)
 	char *buf;
 	event_t *event;
 	int ret, rc = EXIT_FAILURE;
-	unsigned long total = 0;
+	unsigned long total = 0, total_mmap = 0, total_comm = 0;
 
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
@@ -706,6 +708,13 @@ more:
 		struct thread *thread = threads__findnew(event->ip.pid);
 		uint64_t ip = event->ip.ip;
 
+		if (dump_trace) {
+			fprintf(stderr, "PERF_EVENT (IP, %d): %d: %p\n",
+				event->header.misc,
+				event->ip.pid,
+				(void *)event->ip.ip);
+		}
+
 		if (thread == NULL) {
 			fprintf(stderr, "problem processing %d event, bailing out\n",
 				event->header.type);
@@ -743,23 +752,40 @@ more:
 		struct thread *thread = threads__findnew(event->mmap.pid);
 		struct map *map = map__new(&event->mmap);
 
+		if (dump_trace) {
+			fprintf(stderr, "PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
+				(void *)event->mmap.start,
+				(void *)event->mmap.len,
+				(void *)event->mmap.pgoff,
+				event->mmap.filename);
+		}
 		if (thread == NULL || map == NULL) {
 			fprintf(stderr, "problem processing PERF_EVENT_MMAP, bailing out\n");
 			goto done;
 		}
 		thread__insert_map(thread, map);
+		total_mmap++;
 		break;
 	}
 	case PERF_EVENT_COMM: {
 		struct thread *thread = threads__findnew(event->comm.pid);
 
+		if (dump_trace) {
+			fprintf(stderr, "PERF_EVENT_COMM: %s:%d\n",
+				event->comm.comm, event->comm.pid);
+		}
 		if (thread == NULL ||
 		    thread__set_comm(thread, event->comm.comm)) {
 			fprintf(stderr, "problem processing PERF_EVENT_COMM, bailing out\n");
 			goto done;
 		}
+		total_comm++;
 		break;
 	}
+	default: {
+		fprintf(stderr, "skipping unknown header type: %d\n",
+			event->header.type);
+	}
 	}
 
 	if (offset + head < stat.st_size)
@@ -768,6 +794,15 @@ more:
 	rc = EXIT_SUCCESS;
 done:
 	close(input);
+
+	if (dump_trace) {
+		fprintf(stderr, "   IP events: %10ld\n", total);
+		fprintf(stderr, " mmap events: %10ld\n", total_mmap);
+		fprintf(stderr, " comm events: %10ld\n", total_comm);
+
+		return 0;
+	}
+
 	//dsos__fprintf(stdout);
 	threads__fprintf(stdout);
 #if 0
@@ -796,6 +831,8 @@ static const char * const report_usage[] = {
 static const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 3e70611460fe74ad32534fa9791774f6bbdd4159 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 18:53:17 +0200
Subject: perf report: add counter for unknown events

Add a counter for unknown event records.

[ Impact: improve debugging ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 8ea8aaa..4b5ccc5 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -645,7 +645,7 @@ static int __cmd_report(void)
 	char *buf;
 	event_t *event;
 	int ret, rc = EXIT_FAILURE;
-	unsigned long total = 0, total_mmap = 0, total_comm = 0;
+	unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown;
 
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
@@ -785,6 +785,7 @@ more:
 	default: {
 		fprintf(stderr, "skipping unknown header type: %d\n",
 			event->header.type);
+		total_unknown++;
 	}
 	}
 
@@ -796,9 +797,10 @@ done:
 	close(input);
 
 	if (dump_trace) {
-		fprintf(stderr, "   IP events: %10ld\n", total);
-		fprintf(stderr, " mmap events: %10ld\n", total_mmap);
-		fprintf(stderr, " comm events: %10ld\n", total_comm);
+		fprintf(stderr, "      IP events: %10ld\n", total);
+		fprintf(stderr, "    mmap events: %10ld\n", total_mmap);
+		fprintf(stderr, "    comm events: %10ld\n", total_comm);
+		fprintf(stderr, " unknown events: %10ld\n", total_unknown);
 
 		return 0;
 	}
-- 
cgit v0.10.2


From e4a5d54f924ea5ce2913d9d0687d034004816465 Mon Sep 17 00:00:00 2001
From: Ma Ling <ling.ma@intel.com>
Date: Tue, 26 May 2009 11:31:00 +0800
Subject: drm/i915: Add support for VGA load detection (pre-945).

Two approaches for VGA detections: hot plug detection for 945G onwards
and load pipe detection for Pre-945G.  Load pipe detection will get one free
pipe, set border color as red and blue, then check CRT status by
swf register.  This is a sync-up with the 2D driver.

Signed-off-by: Ma Ling <ling.ma@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 19148c3..640f515 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -198,9 +198,142 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 	return intel_ddc_probe(intel_output);
 }
 
+static enum drm_connector_status
+intel_crt_load_detect(struct drm_crtc *crtc, struct intel_output *intel_output)
+{
+	struct drm_encoder *encoder = &intel_output->enc;
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	uint32_t pipe = intel_crtc->pipe;
+	uint32_t save_bclrpat;
+	uint32_t save_vtotal;
+	uint32_t vtotal, vactive;
+	uint32_t vsample;
+	uint32_t vblank, vblank_start, vblank_end;
+	uint32_t dsl;
+	uint32_t bclrpat_reg;
+	uint32_t vtotal_reg;
+	uint32_t vblank_reg;
+	uint32_t vsync_reg;
+	uint32_t pipeconf_reg;
+	uint32_t pipe_dsl_reg;
+	uint8_t	st00;
+	enum drm_connector_status status;
+
+	if (pipe == 0) {
+		bclrpat_reg = BCLRPAT_A;
+		vtotal_reg = VTOTAL_A;
+		vblank_reg = VBLANK_A;
+		vsync_reg = VSYNC_A;
+		pipeconf_reg = PIPEACONF;
+		pipe_dsl_reg = PIPEADSL;
+	} else {
+		bclrpat_reg = BCLRPAT_B;
+		vtotal_reg = VTOTAL_B;
+		vblank_reg = VBLANK_B;
+		vsync_reg = VSYNC_B;
+		pipeconf_reg = PIPEBCONF;
+		pipe_dsl_reg = PIPEBDSL;
+	}
+
+	save_bclrpat = I915_READ(bclrpat_reg);
+	save_vtotal = I915_READ(vtotal_reg);
+	vblank = I915_READ(vblank_reg);
+
+	vtotal = ((save_vtotal >> 16) & 0xfff) + 1;
+	vactive = (save_vtotal & 0x7ff) + 1;
+
+	vblank_start = (vblank & 0xfff) + 1;
+	vblank_end = ((vblank >> 16) & 0xfff) + 1;
+
+	/* Set the border color to purple. */
+	I915_WRITE(bclrpat_reg, 0x500050);
+
+	if (IS_I9XX(dev)) {
+		uint32_t pipeconf = I915_READ(pipeconf_reg);
+		I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER);
+		/* Wait for next Vblank to substitue
+		 * border color for Color info */
+		intel_wait_for_vblank(dev);
+		st00 = I915_READ8(VGA_MSR_WRITE);
+		status = ((st00 & (1 << 4)) != 0) ?
+			connector_status_connected :
+			connector_status_disconnected;
+
+		I915_WRITE(pipeconf_reg, pipeconf);
+	} else {
+		bool restore_vblank = false;
+		int count, detect;
+
+		/*
+		* If there isn't any border, add some.
+		* Yes, this will flicker
+		*/
+		if (vblank_start <= vactive && vblank_end >= vtotal) {
+			uint32_t vsync = I915_READ(vsync_reg);
+			uint32_t vsync_start = (vsync & 0xffff) + 1;
+
+			vblank_start = vsync_start;
+			I915_WRITE(vblank_reg,
+				   (vblank_start - 1) |
+				   ((vblank_end - 1) << 16));
+			restore_vblank = true;
+		}
+		/* sample in the vertical border, selecting the larger one */
+		if (vblank_start - vactive >= vtotal - vblank_end)
+			vsample = (vblank_start + vactive) >> 1;
+		else
+			vsample = (vtotal + vblank_end) >> 1;
+
+		/*
+		 * Wait for the border to be displayed
+		 */
+		while (I915_READ(pipe_dsl_reg) >= vactive)
+			;
+		while ((dsl = I915_READ(pipe_dsl_reg)) <= vsample)
+			;
+		/*
+		 * Watch ST00 for an entire scanline
+		 */
+		detect = 0;
+		count = 0;
+		do {
+			count++;
+			/* Read the ST00 VGA status register */
+			st00 = I915_READ8(VGA_MSR_WRITE);
+			if (st00 & (1 << 4))
+				detect++;
+		} while ((I915_READ(pipe_dsl_reg) == dsl));
+
+		/* restore vblank if necessary */
+		if (restore_vblank)
+			I915_WRITE(vblank_reg, vblank);
+		/*
+		 * If more than 3/4 of the scanline detected a monitor,
+		 * then it is assumed to be present. This works even on i830,
+		 * where there isn't any way to force the border color across
+		 * the screen
+		 */
+		status = detect * 4 > count * 3 ?
+			 connector_status_connected :
+			 connector_status_disconnected;
+	}
+
+	/* Restore previous settings */
+	I915_WRITE(bclrpat_reg, save_bclrpat);
+
+	return status;
+}
+
 static enum drm_connector_status intel_crt_detect(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct drm_encoder *encoder = &intel_output->enc;
+	struct drm_crtc *crtc;
+	int dpms_mode;
+	enum drm_connector_status status;
 
 	if (IS_I9XX(dev) && !IS_I915G(dev) && !IS_I915GM(dev)) {
 		if (intel_crt_detect_hotplug(connector))
@@ -212,8 +345,20 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto
 	if (intel_crt_detect_ddc(connector))
 		return connector_status_connected;
 
-	/* TODO use load detect */
-	return connector_status_unknown;
+	/* for pre-945g platforms use load detect */
+	if (encoder->crtc && encoder->crtc->enabled) {
+		status = intel_crt_load_detect(encoder->crtc, intel_output);
+	} else {
+		crtc = intel_get_load_detect_pipe(intel_output,
+						  NULL, &dpms_mode);
+		if (crtc) {
+			status = intel_crt_load_detect(crtc, intel_output);
+			intel_release_load_detect_pipe(intel_output, dpms_mode);
+		} else
+			status = connector_status_unknown;
+	}
+
+	return status;
 }
 
 static void intel_crt_destroy(struct drm_connector *connector)
-- 
cgit v0.10.2


From f49515b157e2d3ca3633eb0664fc46c42f6cb37e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 26 May 2009 19:03:36 +0200
Subject: perf report: add more debugging

Add the offset of the file we are analyzing, and the size of the record.

In case of problems it's easier to see where the parser lost track.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 4b5ccc5..2d4e4cc 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -645,7 +645,7 @@ static int __cmd_report(void)
 	char *buf;
 	event_t *event;
 	int ret, rc = EXIT_FAILURE;
-	unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown;
+	unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
 
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
@@ -699,8 +699,6 @@ more:
 		goto done;
 	}
 
-	head += event->header.size;
-
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
 		char level;
 		int show = 0;
@@ -709,7 +707,9 @@ more:
 		uint64_t ip = event->ip.ip;
 
 		if (dump_trace) {
-			fprintf(stderr, "PERF_EVENT (IP, %d): %d: %p\n",
+			fprintf(stderr, "%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+				(void *)(offset + head),
+				(void *)(long)(event->header.size),
 				event->header.misc,
 				event->ip.pid,
 				(void *)event->ip.ip);
@@ -753,7 +753,9 @@ more:
 		struct map *map = map__new(&event->mmap);
 
 		if (dump_trace) {
-			fprintf(stderr, "PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
+			fprintf(stderr, "%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
+				(void *)(offset + head),
+				(void *)(long)(event->header.size),
 				(void *)event->mmap.start,
 				(void *)event->mmap.len,
 				(void *)event->mmap.pgoff,
@@ -771,7 +773,9 @@ more:
 		struct thread *thread = threads__findnew(event->comm.pid);
 
 		if (dump_trace) {
-			fprintf(stderr, "PERF_EVENT_COMM: %s:%d\n",
+			fprintf(stderr, "%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+				(void *)(offset + head),
+				(void *)(long)(event->header.size),
 				event->comm.comm, event->comm.pid);
 		}
 		if (thread == NULL ||
@@ -783,12 +787,16 @@ more:
 		break;
 	}
 	default: {
-		fprintf(stderr, "skipping unknown header type: %d\n",
+		fprintf(stderr, "%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
 			event->header.type);
 		total_unknown++;
 	}
 	}
 
+	head += event->header.size;
+
 	if (offset + head < stat.st_size)
 		goto more;
 
-- 
cgit v0.10.2


From 6142f9ec108a4ddbf0d5904c3daa5fdcaa618792 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 May 2009 20:51:47 +0200
Subject: perf report: More robust error handling

Don't let funny events confuse us, stick to what we know and
try to find sensible data again.

If we find an unknown event, check we're still u64 aligned, and
increment by one u64. This ensures we're bound to happen upon a
valid event soon.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 2d4e4cc..a58be7f 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -645,6 +645,7 @@ static int __cmd_report(void)
 	char *buf;
 	event_t *event;
 	int ret, rc = EXIT_FAILURE;
+	uint32_t size;
 	unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
 
 	input = open(input_name, O_RDONLY);
@@ -680,6 +681,10 @@ remap:
 more:
 	event = (event_t *)(buf + head);
 
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
 	if (head + event->header.size >= page_size * mmap_window) {
 		unsigned long shift = page_size * (head / page_size);
 		int ret;
@@ -692,12 +697,9 @@ more:
 		goto remap;
 	}
 
-
-	if (!event->header.size) {
-		fprintf(stderr, "zero-sized event at file offset %ld\n", offset + head);
-		fprintf(stderr, "skipping %ld bytes of events.\n", stat.st_size - offset - head);
-		goto done;
-	}
+	size = event->header.size;
+	if (!size)
+		goto broken_event;
 
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
 		char level;
@@ -787,15 +789,26 @@ more:
 		break;
 	}
 	default: {
+broken_event:
 		fprintf(stderr, "%p [%p]: skipping unknown header type: %d\n",
 			(void *)(offset + head),
 			(void *)(long)(event->header.size),
 			event->header.type);
 		total_unknown++;
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
 	}
 	}
 
-	head += event->header.size;
+	head += size;
 
 	if (offset + head < stat.st_size)
 		goto more;
-- 
cgit v0.10.2


From be74b73a57645cc253d881ab0c1014eb64b9cf22 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 26 May 2009 20:25:22 +0200
Subject: tracing: add __print_flags for events

Developers have been asking for the ability in the ftrace event tracer
to display names of bits in a flags variable.

Instead of printing out c2, it would be easier to read FOO|BAR|GOO,
assuming that FOO is bit 1, BAR is bit 6 and GOO is bit 7.

Some examples where this would be useful are the state flags in a context
switch, kmalloc flags, and even permision flags in accessing files.

[
  v2 changes include:

  Frederic Weisbecker's idea of using a mask instead of bits,
  thus we can output GFP_KERNEL instead of GPF_WAIT|GFP_IO|GFP_FS.

  Li Zefan's idea of allowing the caller of __print_flags to add their
  own delimiter (or no delimiter) where we can get for file permissions
  rwx instead of r|w|x.
]

[
  v3 changes:

   Christoph Hellwig's idea of using an array instead of va_args.
]

[ Impact: better displaying of flags in trace output ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bae51dd..4b58cf1 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -3,12 +3,23 @@
 
 #include <linux/trace_seq.h>
 #include <linux/ring_buffer.h>
-
+#include <linux/percpu.h>
 
 struct trace_array;
 struct tracer;
 struct dentry;
 
+DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
+struct trace_print_flags {
+	unsigned long		mask;
+	const char		*name;
+};
+
+const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+				   unsigned long flags,
+				   const struct trace_print_flags *flag_array);
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b5ff2e8..22c9471 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -87,6 +87,7 @@
  *	struct trace_seq *s = &iter->seq;
  *	struct ftrace_raw_<call> *field; <-- defined in stage 1
  *	struct trace_entry *entry;
+ *	struct trace_seq *p;
  *	int ret;
  *
  *	entry = iter->ent;
@@ -98,7 +99,9 @@
  *
  *	field = (typeof(field))entry;
  *
+ *	p = get_cpu_var(ftrace_event_seq);
  *	ret = trace_seq_printf(s, <TP_printk> "\n");
+ *	put_cpu();
  *	if (!ret)
  *		return TRACE_TYPE_PARTIAL_LINE;
  *
@@ -119,6 +122,14 @@
 #undef __get_str
 #define __get_str(field)	((char *)__entry + __entry->__str_loc_##field)
 
+#undef __print_flags
+#define __print_flags(flag, delim, flag_array...)			\
+	({								\
+		static const struct trace_print_flags flags[] =		\
+			{ flag_array, { -1, NULL }};			\
+		ftrace_print_flags_seq(p, delim, flag, flags);		\
+	})
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 enum print_line_t							\
@@ -127,6 +138,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##call *field;				\
 	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
 	int ret;							\
 									\
 	entry = iter->ent;						\
@@ -138,7 +150,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 									\
 	field = (typeof(field))entry;					\
 									\
+	p = &get_cpu_var(ftrace_event_seq);				\
 	ret = trace_seq_printf(s, #call ": " print);			\
+	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7136420..a4840c2 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -15,6 +15,9 @@
 #define EVENT_HASHSIZE	128
 
 static DECLARE_RWSEM(trace_event_mutex);
+
+DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -212,6 +215,42 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 	return 0;
 }
 
+const char *
+ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+		       unsigned long flags,
+		       const struct trace_print_flags *flag_array)
+{
+	unsigned long mask;
+	const char *str;
+	int i;
+
+	trace_seq_init(p);
+
+	for (i = 0;  flag_array[i].name && flags; i++) {
+
+		mask = flag_array[i].mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		str = flag_array[i].name;
+		flags &= ~mask;
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_puts(p, str);
+	}
+
+	/* check for left over flags */
+	if (flags) {
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_printf(p, "0x%lx", flags);
+	}
+
+	trace_seq_putc(p, 0);
+
+	return p->buffer;
+}
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
-- 
cgit v0.10.2


From 937cdb9db7f59278d0cb1582e6e64e3dfd73b4fc Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 15 May 2009 10:51:13 -0400
Subject: tracing: add previous task state info to sched switch event

It is useful to see the state of a task that is being switched out.
This patch adds the output of the state of the previous task in
the context switch event.

[ Impact: see state of switched out task in context switch ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index dd4033c..24ab5bc 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -156,6 +156,7 @@ TRACE_EVENT(sched_switch,
 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	prev_pid			)
 		__field(	int,	prev_prio			)
+		__field(	long,	prev_state			)
 		__array(	char,	next_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	next_pid			)
 		__field(	int,	next_prio			)
@@ -165,13 +166,19 @@ TRACE_EVENT(sched_switch,
 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_prio	= prev->prio;
+		__entry->prev_state	= prev->state;
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->prev_state ?
+		  __print_flags(__entry->prev_state, "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "W" }) : "R",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
 
-- 
cgit v0.10.2


From 62ba180e80f4194a498585ac0e4c07daa8ca08d1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 15 May 2009 16:16:30 -0400
Subject: tracing: add flag output for kmem events

This patch changes the output for gfp_flags from being a simple hex value
to the actual names.

  gfp_flags=GFP_ATOMIC  instead of gfp_flags=00000020

And even

  gfp_flags=GFP_KERNEL instead of gfp_flags=000000d0

(Thanks to Frederic Weisbecker for pointing out that the first version
 had a bad order of GFP masks)

[ Impact: more human readable output from tracer ]

Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index c22c42f..9baba50 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -7,6 +7,43 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kmem
 
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+#define show_gfp_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"}, \
+	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
+	{(unsigned long)GFP_USER,		"GFP_USER"},		\
+	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
+	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
+	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
+	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
+	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
+	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
+	{(unsigned long)__GFP_WAIT,		"GFP_WAIT"},		\
+	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
+	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
+	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
+	{(unsigned long)__GFP_REPEAT,		"GFP_REPEAT"},		\
+	{(unsigned long)__GFP_NOFAIL,		"GFP_NOFAIL"},		\
+	{(unsigned long)__GFP_NORETRY,		"GFP_NORETRY"},		\
+	{(unsigned long)__GFP_COMP,		"GFP_COMP"},		\
+	{(unsigned long)__GFP_ZERO,		"GFP_ZERO"},		\
+	{(unsigned long)__GFP_NOMEMALLOC,	"GFP_NOMEMALLOC"},	\
+	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
+	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
+	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
+	) : "GFP_NOWAIT"
+
 TRACE_EVENT(kmalloc,
 
 	TP_PROTO(unsigned long call_site,
@@ -33,12 +70,12 @@ TRACE_EVENT(kmalloc,
 		__entry->gfp_flags	= gfp_flags;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags)
+		show_gfp_flags(__entry->gfp_flags))
 );
 
 TRACE_EVENT(kmem_cache_alloc,
@@ -67,12 +104,12 @@ TRACE_EVENT(kmem_cache_alloc,
 		__entry->gfp_flags	= gfp_flags;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags)
+		show_gfp_flags(__entry->gfp_flags))
 );
 
 TRACE_EVENT(kmalloc_node,
@@ -104,12 +141,12 @@ TRACE_EVENT(kmalloc_node,
 		__entry->node		= node;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags,
+		show_gfp_flags(__entry->gfp_flags),
 		__entry->node)
 );
 
@@ -142,12 +179,12 @@ TRACE_EVENT(kmem_cache_alloc_node,
 		__entry->node		= node;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags,
+		show_gfp_flags(__entry->gfp_flags),
 		__entry->node)
 );
 
-- 
cgit v0.10.2


From 0f4fc29dd68dfab9c6ddd5d087d34a5b6818cb00 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 20 May 2009 19:21:47 -0400
Subject: tracing: add __print_symbolic to trace events

This patch adds __print_symbolic which is similar to __print_flags but
works for an enumeration type instead. That is, there is only a one to one
mapping between the values and the symbols. When a match is made, then
it is printed, otherwise the hex value is outputed.

[ Impact: add interface for showing symbol names in events ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4b58cf1..bbf40f6 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -20,6 +20,9 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 				   unsigned long flags,
 				   const struct trace_print_flags *flag_array);
 
+const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+				     const struct trace_print_flags *symbol_array);
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 22c9471..87fc227 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -130,6 +130,14 @@
 		ftrace_print_flags_seq(p, delim, flag, flags);		\
 	})
 
+#undef __print_symbolic
+#define __print_symbolic(value, symbol_array...)			\
+	({								\
+		static const struct trace_print_flags symbols[] =	\
+			{ symbol_array, { -1, NULL }};			\
+		ftrace_print_symbols_seq(p, value, symbols);		\
+	})
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 enum print_line_t							\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a4840c2..c12d95d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -251,6 +251,31 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 	return p->buffer;
 }
 
+const char *
+ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+			 const struct trace_print_flags *symbol_array)
+{
+	int i;
+
+	trace_seq_init(p);
+
+	for (i = 0;  symbol_array[i].name; i++) {
+
+		if (val != symbol_array[i].mask)
+			continue;
+
+		trace_seq_puts(p, symbol_array[i].name);
+		break;
+	}
+
+	if (!p->len)
+		trace_seq_printf(p, "0x%lx", val);
+		
+	trace_seq_putc(p, 0);
+
+	return p->buffer;
+}
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
-- 
cgit v0.10.2


From c2adae0970ca1db8adb92fb56ae3bcabd916e8bd Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 20 May 2009 19:56:19 -0400
Subject: tracing: convert irq events to use __print_symbolic

The recording of the names at trace time is inefficient. This patch
implements the softirq event recording to only record the vector
and then use the __print_symbolic interface to print out the names.

[ Impact: faster recording of softirq events ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 32a9f7e..683fb36 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -7,6 +7,19 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM irq
 
+#define softirq_name(sirq) { sirq, #sirq }
+#define show_softirq_name(val)						\
+	__print_symbolic(val,						\
+			 softirq_name(HI_SOFTIRQ),			\
+			 softirq_name(TIMER_SOFTIRQ),			\
+			 softirq_name(NET_TX_SOFTIRQ),			\
+			 softirq_name(NET_RX_SOFTIRQ),			\
+			 softirq_name(BLOCK_SOFTIRQ),			\
+			 softirq_name(TASKLET_SOFTIRQ),			\
+			 softirq_name(SCHED_SOFTIRQ),			\
+			 softirq_name(HRTIMER_SOFTIRQ),			\
+			 softirq_name(RCU_SOFTIRQ))
+
 /**
  * irq_handler_entry - called immediately before the irq action handler
  * @irq: irq number
@@ -87,15 +100,14 @@ TRACE_EVENT(softirq_entry,
 
 	TP_STRUCT__entry(
 		__field(	int,	vec			)
-		__string(	name,	softirq_to_name[h-vec]	)
 	),
 
 	TP_fast_assign(
 		__entry->vec = (int)(h - vec);
-		__assign_str(name, softirq_to_name[h-vec]);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
 );
 
 /**
@@ -117,15 +129,14 @@ TRACE_EVENT(softirq_exit,
 
 	TP_STRUCT__entry(
 		__field(	int,	vec			)
-		__string(	name,	softirq_to_name[h-vec]	)
 	),
 
 	TP_fast_assign(
 		__entry->vec = (int)(h - vec);
-		__assign_str(name, softirq_to_name[h-vec]);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
 );
 
 #endif /*  _TRACE_IRQ_H */
-- 
cgit v0.10.2


From 68743082b560067e3e93eab8b2568f238e486865 Mon Sep 17 00:00:00 2001
From: Vu Pham <vu@mellanox.com>
Date: Tue, 26 May 2009 14:51:00 -0400
Subject: XPRTRDMA: fix client rpcrdma FRMR registration on mlx4 devices

mlx4/connectX FRMR requires local write enable together with remote
rdma write enable. This fixes NFS/RDMA operation over the ConnectX
Infiniband HCA in the default memreg mode.

Signed-off-by: Vu Pham <vu@mellanox.com>
Signed-off-by: Tom Talpey <tmtalpey@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b21e0c..465aafc 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
 	frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
 	frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
 	frmr_wr.wr.fast_reg.access_flags = (writing ?
-				IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
+				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+				IB_ACCESS_REMOTE_READ);
 	frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
 	DECR_CQCOUNT(&r_xprt->rx_ep);
 
-- 
cgit v0.10.2


From d0367a508af9cf97beb202935bb9ad8883d30cd1 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Tue, 26 May 2009 14:51:00 -0400
Subject: nfs: fix build error in nfsroot with initconst

fix build error with latest kbuild adjustments to initconst.

The commit a447c0932445f92ce6f4c1bd020f62c5097a7842 ("vfs: Use
const for kernel parser table") changed:

    static match_table_t __initdata tokens = {
to
    static match_table_t __initconst tokens = {

But the missing const causes popwerpc to fail with latest
updates to __initconst like this:

fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict
fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict

The bug is only present with kbuild-next.
Following patch has been build tested.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index d9ef602..e3ed590 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -129,7 +129,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initconst tokens = {
+static const match_table_t tokens __initconst = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
-- 
cgit v0.10.2


From 95baa25c7321eb8613246acbf61b97911cc748d3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 May 2009 14:51:00 -0400
Subject: NFSv4: Fix the case where NFSv4 renewal fails

If the asynchronous lease renewal fails (usually due to a soft timeout),
then we _must_ schedule state recovery in order to ensure that we don't
lose the lease unnecessarily or, if the lease is already lost, that we
recover the locking state promptly...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a4d2426..4674f80 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data)
 	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
-		switch (task->tk_status) {
-			case -NFS4ERR_STALE_CLIENTID:
-			case -NFS4ERR_EXPIRED:
-			case -NFS4ERR_CB_PATH_DOWN:
-				nfs4_schedule_state_recovery(clp);
-		}
+		/* Unless we're shutting down, schedule state recovery! */
+		if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
+			nfs4_schedule_state_recovery(clp);
 		return;
 	}
 	spin_lock(&clp->cl_lock);
-- 
cgit v0.10.2


From ab2b7ebaad16226c9a5e85c5f384d19fa58a7459 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 26 May 2009 09:11:03 +0100
Subject: kmod: Release sub_info on cred allocation failure.

call_usermodehelper_setup() forgot to kfree(sub_info)
when prepare_usermodehelper_creds() failed.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/kmod.c b/kernel/kmod.c
index b750675..7e95bed 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -370,8 +370,10 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 	sub_info->argv = argv;
 	sub_info->envp = envp;
 	sub_info->cred = prepare_usermodehelper_creds();
-	if (!sub_info->cred)
+	if (!sub_info->cred) {
+		kfree(sub_info);
 		return NULL;
+	}
 
   out:
 	return sub_info;
-- 
cgit v0.10.2


From 564346224daaa8f7222d7a92cdbb7bafde59ae6e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 26 May 2009 20:54:41 +0930
Subject: lguest: fix on Intel when KVM loaded (unhandled trap 13)

When KVM is loaded, and hence VT set up, the vmcall instruction in an
lguest guest causes a #GP, not #UD.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 1a83910..eaf722f 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -358,6 +358,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 			if (emulate_insn(cpu))
 				return;
 		}
+		/* If KVM is active, the vmcall instruction triggers a
+		 * General Protection Fault.  Normally it triggers an
+		 * invalid opcode fault (6): */
+	case 6:
+		/* We need to check if ring == GUEST_PL and
+		 * faulting instruction == vmcall. */
+		if (is_hypercall(cpu)) {
+			rewrite_hypercall(cpu);
+			return;
+		}
 		break;
 	case 14: /* We've intercepted a Page Fault. */
 		/* The Guest accessed a virtual address that wasn't mapped.
@@ -403,15 +413,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 		 * up the pointer now to indicate a hypercall is pending. */
 		cpu->hcall = (struct hcall_args *)cpu->regs;
 		return;
-	case 6:
-		/* kvm hypercalls trigger an invalid opcode fault (6).
-		 * We need to check if ring == GUEST_PL and
-		 * faulting instruction == vmcall. */
-		if (is_hypercall(cpu)) {
-			rewrite_hypercall(cpu);
-			return;
-		}
-		break;
 	}
 
 	/* We didn't handle the trap, so it needs to go to the Guest. */
-- 
cgit v0.10.2


From 3a4b8cc70b7473a0b9f26f5b4ddc6579b5e214be Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 26 May 2009 16:19:04 -0300
Subject: perf report: Sort output by symbol usage

[acme@emilia ~]$ perf record find / > /dev/null 2>&1
[acme@emilia ~]$ perf stat perf report | head -20
 4.95          find [k] 0xffffffff81393d65 _spin_lock
 3.89          find [.] 0x000000000000af89 /usr/bin/find: <unknown>
 2.19          find [k] 0xffffffffa00518e0 ext3fs_dirhash
 1.87          find [k] 0xffffffff810a6cea __rcu_read_lock
 1.86          find [k] 0xffffffff811c7312 _atomic_dec_and_lock
 1.86          find [.] 0x00000000000782ab /lib64/libc-2.5.so: __GI_strlen
 1.85          find [k] 0xffffffff810fedfb __kmalloc
 1.62          find [.] 0x00000000000430ff /lib64/libc-2.5.so: vfprintf
 1.59          find [k] 0xffffffff810a6d6d __rcu_read_unlock
 1.55          find [k] 0xffffffff81119395 __d_lookup
 1.39          find [.] 0x0000000000071b40 /lib64/libc-2.5.so: _int_malloc
 1.30          find [k] 0xffffffffa031c4fc nfs_do_filldir
 1.21          find [k] 0xffffffff811876a5 avc_has_perm_noaudit
 1.15          find [k] 0xffffffff810fef62 kmem_cache_alloc
 1.07          find [k] 0xffffffff811d03fb copy_user_generic_string
 1.03          find [k] 0xffffffffa0043882 ext3_htree_store_dirent
 0.99          find [k] 0xffffffff81393ebb _spin_lock_bh
 0.98          find [k] 0xffffffffa03319a2 nfs3_decode_dirent
 0.97          find [k] 0xffffffff8100bf20 system_call
 0.92          find [k] 0xffffffff8139437e _spin_unlock

 Performance counter stats for 'perf':

     244.278972  task clock ticks     (msecs)
              8  context switches     (events)
              9  CPU migrations       (events)
           2104  pagefaults           (events)
       35329669  CPU cycles           (events)  (scaled from 75.40%)
       13740366  instructions         (events)  (scaled from 75.49%)
          59073  cache references     (events)  (scaled from 24.60%)
            196  cache misses         (events)  (scaled from 24.51%)

 Wall-clock time elapsed:   246.060717 msecs

[acme@emilia ~]$
[acme@emilia ~]$ grep "model name" /proc/cpuinfo | head -1
model name	: Intel(R) Xeon(R) CPU           E5405  @ 2.00GHz
[acme@emilia ~]$ grep "model name" /proc/cpuinfo | wc -l
8
[acme@emilia ~]$

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090526191904.GH4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index a58be7f..a4c6ffa 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -447,26 +447,33 @@ static size_t map__fprintf(struct map *self, FILE *fp)
 		       self->start, self->end, self->pgoff, self->dso->name);
 }
 
+struct thread;
+
+static const char *thread__name(struct thread *self, char *bf, size_t size);
+
 struct symhist {
 	struct rb_node	 rb_node;
 	struct dso	 *dso;
 	struct symbol	 *sym;
+	struct thread	 *thread;
 	uint64_t	 ip;
 	uint32_t	 count;
 	char		 level;
 };
 
 static struct symhist *symhist__new(struct symbol *sym, uint64_t ip,
-				    struct dso *dso, char level)
+				    struct thread *thread, struct dso *dso,
+				    char level)
 {
 	struct symhist *self = malloc(sizeof(*self));
 
 	if (self != NULL) {
-		self->sym   = sym;
-		self->ip    = ip;
-		self->dso   = dso;
-		self->level = level;
-		self->count = 1;
+		self->sym    = sym;
+		self->thread = thread;
+		self->ip     = ip;
+		self->dso    = dso;
+		self->level  = level;
+		self->count  = 1;
 	}
 
 	return self;
@@ -482,17 +489,29 @@ static void symhist__inc(struct symhist *self)
 	++self->count;
 }
 
-static size_t symhist__fprintf(struct symhist *self, FILE *fp)
+static size_t
+symhist__fprintf(struct symhist *self, uint64_t total_samples, FILE *fp)
 {
-	size_t ret = fprintf(fp, "%#llx [%c] ", (unsigned long long)self->ip, self->level);
+	char bf[32];
+	size_t ret;
+
+	if (total_samples)
+		ret = fprintf(fp, "%5.2f", (self->count * 100.0) / total_samples);
+	else
+		ret = fprintf(fp, "%12d", self->count);
+
+	ret += fprintf(fp, "%14s [%c] %#018llx ",
+		       thread__name(self->thread, bf, sizeof(bf)),
+		       self->level, (unsigned long long)self->ip);
 
 	if (self->level != '.')
-		ret += fprintf(fp, "%s", self->sym ? self->sym->name: "<unknown>");
+		ret += fprintf(fp, "%s\n",
+			       self->sym ? self->sym->name : "<unknown>");
 	else
-		ret += fprintf(fp, "%s: %s",
+		ret += fprintf(fp, "%s: %s\n",
 			       self->dso ? self->dso->name : "<unknown>",
 			       self->sym ? self->sym->name : "<unknown>");
-	return ret + fprintf(fp, ": %u\n", self->count);
+	return ret;
 }
 
 struct thread {
@@ -503,6 +522,15 @@ struct thread {
 	char		 *comm;
 };
 
+static const char *thread__name(struct thread *self, char *bf, size_t size)
+{
+	if (self->comm)
+		return self->comm;
+
+	snprintf(bf, sizeof(bf), ":%u", self->pid);
+	return bf;
+}
+
 static struct thread *thread__new(pid_t pid)
 {
 	struct thread *self = malloc(sizeof(*self));
@@ -542,7 +570,7 @@ static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
 			p = &(*p)->rb_right;
 	}
 
-	sh = symhist__new(sym, ip, dso, level);
+	sh = symhist__new(sym, ip, self, dso, level);
 	if (sh == NULL)
 		return -ENOMEM;
 	rb_link_node(&sh->rb_node, parent, p);
@@ -574,7 +602,7 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
 
 	for (nd = rb_first(&self->symhists); nd; nd = rb_next(nd)) {
 		struct symhist *pos = rb_entry(nd, struct symhist, rb_node);
-		ret += symhist__fprintf(pos, fp);
+		ret += symhist__fprintf(pos, 0, fp);
 	}
 
 	return ret;
@@ -628,7 +656,7 @@ static struct map *thread__find_map(struct thread *self, uint64_t ip)
 	return NULL;
 }
 
-static void threads__fprintf(FILE *fp)
+void threads__fprintf(FILE *fp)
 {
 	struct rb_node *nd;
 	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
@@ -637,6 +665,61 @@ static void threads__fprintf(FILE *fp)
 	}
 }
 
+static struct rb_root global_symhists = RB_ROOT;
+
+static void threads__insert_symhist(struct symhist *sh)
+{
+	struct rb_node **p = &global_symhists.rb_node;
+	struct rb_node *parent = NULL;
+	struct symhist *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct symhist, rb_node);
+
+		/* Reverse order */
+		if (sh->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&sh->rb_node, parent, p);
+	rb_insert_color(&sh->rb_node, &global_symhists);
+}
+
+static void threads__sort_symhists(void)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *thread = rb_entry(nd, struct thread, rb_node);
+		struct rb_node *next = rb_first(&thread->symhists);
+
+		while (next) {
+			struct symhist *n = rb_entry(next, struct symhist,
+						     rb_node);
+			next = rb_next(&n->rb_node);
+			rb_erase(&n->rb_node, &thread->symhists);
+			threads__insert_symhist(n);
+		}
+
+	}
+}
+
+static size_t threads__symhists_fprintf(uint64_t total_samples, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = 0;
+
+	for (nd = rb_first(&global_symhists); nd; nd = rb_next(nd)) {
+		struct symhist *pos = rb_entry(nd, struct symhist, rb_node);
+		ret += symhist__fprintf(pos, total_samples, fp);
+	}
+
+	return ret;
+}
+
 static int __cmd_report(void)
 {
 	unsigned long offset = 0;
@@ -826,23 +909,9 @@ done:
 		return 0;
 	}
 
-	//dsos__fprintf(stdout);
-	threads__fprintf(stdout);
-#if 0
-	std::map<std::string, int>::iterator hi = hist.begin();
-
-	while (hi != hist.end()) {
-		rev_hist.insert(std::pair<int, std::string>(hi->second, hi->first));
-		hist.erase(hi++);
-	}
-
-	std::multimap<int, std::string>::const_iterator ri = rev_hist.begin();
+	threads__sort_symhists();
+	threads__symhists_fprintf(total, stdout);
 
-	while (ri != rev_hist.end()) {
-		printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str());
-		ri++;
-	}
-#endif
 	return rc;
 }
 
-- 
cgit v0.10.2


From 2171787be2e71ff71159857bfeb21398b61eb615 Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Tue, 26 May 2009 10:33:35 -0700
Subject: x86: avoid back to back on_each_cpu in cpa_flush_array

Cleanup cpa_flush_array() to avoid back to back on_each_cpu() calls.

[ Impact: optimizes fix 0af48f42df15b97080b450d24219dd95db7b929a ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0f9052b..e17efed 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -204,30 +204,19 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	}
 }
 
-static void wbinvd_local(void *unused)
-{
-	wbinvd();
-}
-
 static void cpa_flush_array(unsigned long *start, int numpages, int cache,
 			    int in_flags, struct page **pages)
 {
 	unsigned int i, level;
+	unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
 
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(__cpa_flush_range, NULL, 1);
+	on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
 
-	if (!cache)
+	if (!cache || do_wbinvd)
 		return;
 
-	/* 4M threshold */
-	if (numpages >= 1024) {
-		if (boot_cpu_data.x86 >= 4)
-			on_each_cpu(wbinvd_local, NULL, 1);
-
-		return;
-	}
 	/*
 	 * We only need to flush on one CPU,
 	 * clflush is a MESI-coherent instruction that
-- 
cgit v0.10.2


From dbcc34756234596993a3b1304636f032e66d401f Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Tue, 26 May 2009 08:34:08 -0400
Subject: ASoC: Main rewite of the mpc5200 audio DMA code

Rewrite the mpc5200 audio DMA code to support both I2S and AC97.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index dc79bdf..1918c78 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -25,7 +25,6 @@ config SND_SOC_MPC8610_HPCD
 config SND_SOC_MPC5200_I2S
 	tristate "Freescale MPC5200 PSC in I2S mode driver"
 	depends on PPC_MPC52xx && PPC_BESTCOMM
-	select SND_SOC_OF_SIMPLE
 	select SND_MPC52xx_DMA
 	select PPC_BESTCOMM_GEN_BD
 	help
diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
index 6850392..efec33a 100644
--- a/sound/soc/fsl/mpc5200_dma.c
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -3,23 +3,13 @@
  * ALSA SoC Platform driver
  *
  * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2009 Jon Smirl, Digispeaker
  */
 
-#include <linux/init.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/delay.h>
 #include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/dma-mapping.h>
 
-#include <sound/core.h>
-#include <sound/pcm.h>
-#include <sound/pcm_params.h>
-#include <sound/initval.h>
 #include <sound/soc.h>
-#include <sound/soc-of-simple.h>
 
 #include <sysdev/bestcomm/bestcomm.h>
 #include <sysdev/bestcomm/gen_bd.h>
@@ -27,10 +17,6 @@
 
 #include "mpc5200_dma.h"
 
-MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
-MODULE_DESCRIPTION("Freescale MPC5200 PSC in DMA mode ASoC Driver");
-MODULE_LICENSE("GPL");
-
 /*
  * Interrupt handlers
  */
@@ -50,7 +36,7 @@ static irqreturn_t psc_dma_status_irq(int irq, void *_psc_dma)
 	if (psc_dma->capture.active && (isr & MPC52xx_PSC_IMR_ORERR))
 		psc_dma->stats.overrun_count++;
 
-	out_8(&regs->command, 4 << 4);	/* reset the error status */
+	out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
 
 	return IRQ_HANDLED;
 }
@@ -81,21 +67,36 @@ static void psc_dma_bcom_enqueue_next_buffer(struct psc_dma_stream *s)
 		s->period_next_pt = s->period_start;
 }
 
+static void psc_dma_bcom_enqueue_tx(struct psc_dma_stream *s)
+{
+	while (s->appl_ptr < s->runtime->control->appl_ptr) {
+
+		if (bcom_queue_full(s->bcom_task))
+			return;
+
+		s->appl_ptr += s->period_size;
+
+		psc_dma_bcom_enqueue_next_buffer(s);
+	}
+}
+
 /* Bestcomm DMA irq handler */
-static irqreturn_t psc_dma_bcom_irq(int irq, void *_psc_dma_stream)
+static irqreturn_t psc_dma_bcom_irq_tx(int irq, void *_psc_dma_stream)
 {
 	struct psc_dma_stream *s = _psc_dma_stream;
 
+	spin_lock(&s->psc_dma->lock);
 	/* For each finished period, dequeue the completed period buffer
 	 * and enqueue a new one in it's place. */
 	while (bcom_buffer_done(s->bcom_task)) {
 		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
+
 		s->period_current_pt += s->period_bytes;
 		if (s->period_current_pt >= s->period_end)
 			s->period_current_pt = s->period_start;
-		psc_dma_bcom_enqueue_next_buffer(s);
-		bcom_enable(s->bcom_task);
 	}
+	psc_dma_bcom_enqueue_tx(s);
+	spin_unlock(&s->psc_dma->lock);
 
 	/* If the stream is active, then also inform the PCM middle layer
 	 * of the period finished event. */
@@ -105,49 +106,33 @@ static irqreturn_t psc_dma_bcom_irq(int irq, void *_psc_dma_stream)
 	return IRQ_HANDLED;
 }
 
-/**
- * psc_dma_startup: create a new substream
- *
- * This is the first function called when a stream is opened.
- *
- * If this is the first stream open, then grab the IRQ and program most of
- * the PSC registers.
- */
-int psc_dma_startup(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai)
+static irqreturn_t psc_dma_bcom_irq_rx(int irq, void *_psc_dma_stream)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
-	int rc;
+	struct psc_dma_stream *s = _psc_dma_stream;
 
-	dev_dbg(psc_dma->dev, "psc_dma_startup(substream=%p)\n", substream);
+	spin_lock(&s->psc_dma->lock);
+	/* For each finished period, dequeue the completed period buffer
+	 * and enqueue a new one in it's place. */
+	while (bcom_buffer_done(s->bcom_task)) {
+		bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
 
-	if (!psc_dma->playback.active &&
-	    !psc_dma->capture.active) {
-		/* Setup the IRQs */
-		rc = request_irq(psc_dma->irq, &psc_dma_status_irq, IRQF_SHARED,
-				 "psc-dma-status", psc_dma);
-		rc |= request_irq(psc_dma->capture.irq,
-				  &psc_dma_bcom_irq, IRQF_SHARED,
-				  "psc-dma-capture", &psc_dma->capture);
-		rc |= request_irq(psc_dma->playback.irq,
-				  &psc_dma_bcom_irq, IRQF_SHARED,
-				  "psc-dma-playback", &psc_dma->playback);
-		if (rc) {
-			free_irq(psc_dma->irq, psc_dma);
-			free_irq(psc_dma->capture.irq,
-				 &psc_dma->capture);
-			free_irq(psc_dma->playback.irq,
-				 &psc_dma->playback);
-			return -ENODEV;
-		}
+		s->period_current_pt += s->period_bytes;
+		if (s->period_current_pt >= s->period_end)
+			s->period_current_pt = s->period_start;
+
+		psc_dma_bcom_enqueue_next_buffer(s);
 	}
+	spin_unlock(&s->psc_dma->lock);
 
-	return 0;
+	/* If the stream is active, then also inform the PCM middle layer
+	 * of the period finished event. */
+	if (s->active)
+		snd_pcm_period_elapsed(s->stream);
+
+	return IRQ_HANDLED;
 }
 
-int psc_dma_hw_free(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai)
+static int psc_dma_hw_free(struct snd_pcm_substream *substream)
 {
 	snd_pcm_set_runtime_buffer(substream, NULL);
 	return 0;
@@ -159,8 +144,7 @@ int psc_dma_hw_free(struct snd_pcm_substream *substream,
  * This function is called by ALSA to start, stop, pause, and resume the DMA
  * transfer of data.
  */
-int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
-			   struct snd_soc_dai *dai)
+static int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
@@ -168,8 +152,8 @@ int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
 	struct psc_dma_stream *s;
 	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
 	u16 imr;
-	u8 psc_cmd;
 	unsigned long flags;
+	int i;
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
 		s = &psc_dma->capture;
@@ -189,68 +173,48 @@ int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
 				(s->period_bytes * runtime->periods);
 		s->period_next_pt = s->period_start;
 		s->period_current_pt = s->period_start;
+		s->period_size = runtime->period_size;
 		s->active = 1;
 
-		/* First; reset everything */
+		/* track appl_ptr so that we have a better chance of detecting
+		 * end of stream and not over running it.
+		 */
+		s->runtime = runtime;
+		s->appl_ptr = s->runtime->control->appl_ptr -
+				(runtime->period_size * runtime->periods);
+
+		/* Fill up the bestcomm bd queue and enable DMA.
+		 * This will begin filling the PSC's fifo.
+		 */
+		spin_lock_irqsave(&psc_dma->lock, flags);
+
 		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			out_8(&regs->command, MPC52xx_PSC_RST_RX);
-			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+			bcom_gen_bd_rx_reset(s->bcom_task);
+			for (i = 0; i < runtime->periods; i++)
+				if (!bcom_queue_full(s->bcom_task))
+					psc_dma_bcom_enqueue_next_buffer(s);
 		} else {
-			out_8(&regs->command, MPC52xx_PSC_RST_TX);
-			out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+			bcom_gen_bd_tx_reset(s->bcom_task);
+			psc_dma_bcom_enqueue_tx(s);
 		}
 
-		/* Next, fill up the bestcomm bd queue and enable DMA.
-		 * This will begin filling the PSC's fifo. */
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
-			bcom_gen_bd_rx_reset(s->bcom_task);
-		else
-			bcom_gen_bd_tx_reset(s->bcom_task);
-		while (!bcom_queue_full(s->bcom_task))
-			psc_dma_bcom_enqueue_next_buffer(s);
 		bcom_enable(s->bcom_task);
-
-		/* Due to errata in the dma mode; need to line up enabling
-		 * the transmitter with a transition on the frame sync
-		 * line */
-
-		spin_lock_irqsave(&psc_dma->lock, flags);
-		/* first make sure it is low */
-		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
-			;
-		/* then wait for the transition to high */
-		while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) == 0)
-			;
-		/* Finally, enable the PSC.
-		 * Receiver must always be enabled; even when we only want
-		 * transmit.  (see 15.3.2.3 of MPC5200B User's Guide) */
-		psc_cmd = MPC52xx_PSC_RX_ENABLE;
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			psc_cmd |= MPC52xx_PSC_TX_ENABLE;
-		out_8(&regs->command, psc_cmd);
 		spin_unlock_irqrestore(&psc_dma->lock, flags);
 
+		out_8(&regs->command, MPC52xx_PSC_RST_ERR_STAT);
+
 		break;
 
 	case SNDRV_PCM_TRIGGER_STOP:
-		/* Turn off the PSC */
 		s->active = 0;
-		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
-			if (!psc_dma->playback.active) {
-				out_8(&regs->command, 2 << 4);	/* reset rx */
-				out_8(&regs->command, 3 << 4);	/* reset tx */
-				out_8(&regs->command, 4 << 4);	/* reset err */
-			}
-		} else {
-			out_8(&regs->command, 3 << 4);	/* reset tx */
-			out_8(&regs->command, 4 << 4);	/* reset err */
-			if (!psc_dma->capture.active)
-				out_8(&regs->command, 2 << 4);	/* reset rx */
-		}
 
+		spin_lock_irqsave(&psc_dma->lock, flags);
 		bcom_disable(s->bcom_task);
-		while (!bcom_queue_empty(s->bcom_task))
-			bcom_retrieve_buffer(s->bcom_task, NULL, NULL);
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+			bcom_gen_bd_rx_reset(s->bcom_task);
+		else
+			bcom_gen_bd_tx_reset(s->bcom_task);
+		spin_unlock_irqrestore(&psc_dma->lock, flags);
 
 		break;
 
@@ -265,44 +229,11 @@ int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
 		imr |= MPC52xx_PSC_IMR_TXEMP;
 	if (psc_dma->capture.active)
 		imr |= MPC52xx_PSC_IMR_ORERR;
-	out_be16(&regs->isr_imr.imr, imr);
+	out_be16(&regs->isr_imr.imr, psc_dma->imr | imr);
 
 	return 0;
 }
 
-/**
- * psc_dma_shutdown: shutdown the data transfer on a stream
- *
- * Shutdown the PSC if there are no other substreams open.
- */
-void psc_dma_shutdown(struct snd_pcm_substream *substream,
-			     struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
-
-	dev_dbg(psc_dma->dev, "psc_dma_shutdown(substream=%p)\n", substream);
-
-	/*
-	 * If this is the last active substream, disable the PSC and release
-	 * the IRQ.
-	 */
-	if (!psc_dma->playback.active &&
-	    !psc_dma->capture.active) {
-
-		/* Disable all interrupts and reset the PSC */
-		out_be16(&psc_dma->psc_regs->isr_imr.imr, 0);
-		out_8(&psc_dma->psc_regs->command, 3 << 4); /* reset tx */
-		out_8(&psc_dma->psc_regs->command, 2 << 4); /* reset rx */
-		out_8(&psc_dma->psc_regs->command, 1 << 4); /* reset mode */
-		out_8(&psc_dma->psc_regs->command, 4 << 4); /* reset error */
-
-		/* Release irqs */
-		free_irq(psc_dma->irq, psc_dma);
-		free_irq(psc_dma->capture.irq, &psc_dma->capture);
-		free_irq(psc_dma->playback.irq, &psc_dma->playback);
-	}
-}
 
 /* ---------------------------------------------------------------------
  * The PSC DMA 'ASoC platform' driver
@@ -312,62 +243,78 @@ void psc_dma_shutdown(struct snd_pcm_substream *substream,
  * interaction with the attached codec
  */
 
-static const struct snd_pcm_hardware psc_dma_pcm_hardware = {
+static const struct snd_pcm_hardware psc_dma_hardware = {
 	.info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
 		SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
 		SNDRV_PCM_INFO_BATCH,
 	.formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE |
-		   SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE,
+		SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE,
 	.rate_min = 8000,
 	.rate_max = 48000,
-	.channels_min = 2,
+	.channels_min = 1,
 	.channels_max = 2,
 	.period_bytes_max	= 1024 * 1024,
 	.period_bytes_min	= 32,
 	.periods_min		= 2,
 	.periods_max		= 256,
 	.buffer_bytes_max	= 2 * 1024 * 1024,
-	.fifo_size		= 0,
+	.fifo_size		= 512,
 };
 
-static int psc_dma_pcm_open(struct snd_pcm_substream *substream)
+static int psc_dma_open(struct snd_pcm_substream *substream)
 {
+	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 	struct psc_dma_stream *s;
+	int rc;
 
-	dev_dbg(psc_dma->dev, "psc_dma_pcm_open(substream=%p)\n", substream);
+	dev_dbg(psc_dma->dev, "psc_dma_open(substream=%p)\n", substream);
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
 		s = &psc_dma->capture;
 	else
 		s = &psc_dma->playback;
 
-	snd_soc_set_runtime_hwparams(substream, &psc_dma_pcm_hardware);
+	snd_soc_set_runtime_hwparams(substream, &psc_dma_hardware);
+
+	rc = snd_pcm_hw_constraint_integer(runtime,
+		SNDRV_PCM_HW_PARAM_PERIODS);
+	if (rc < 0) {
+		dev_err(substream->pcm->card->dev, "invalid buffer size\n");
+		return rc;
+	}
 
 	s->stream = substream;
 	return 0;
 }
 
-static int psc_dma_pcm_close(struct snd_pcm_substream *substream)
+static int psc_dma_close(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
 	struct psc_dma_stream *s;
 
-	dev_dbg(psc_dma->dev, "psc_dma_pcm_close(substream=%p)\n", substream);
+	dev_dbg(psc_dma->dev, "psc_dma_close(substream=%p)\n", substream);
 
 	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
 		s = &psc_dma->capture;
 	else
 		s = &psc_dma->playback;
 
+	if (!psc_dma->playback.active &&
+	    !psc_dma->capture.active) {
+
+		/* Disable all interrupts and reset the PSC */
+		out_be16(&psc_dma->psc_regs->isr_imr.imr, psc_dma->imr);
+		out_8(&psc_dma->psc_regs->command, 4 << 4); /* reset error */
+	}
 	s->stream = NULL;
 	return 0;
 }
 
 static snd_pcm_uframes_t
-psc_dma_pcm_pointer(struct snd_pcm_substream *substream)
+psc_dma_pointer(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
@@ -384,60 +331,78 @@ psc_dma_pcm_pointer(struct snd_pcm_substream *substream)
 	return bytes_to_frames(substream->runtime, count);
 }
 
-static struct snd_pcm_ops psc_dma_pcm_ops = {
-	.open		= psc_dma_pcm_open,
-	.close		= psc_dma_pcm_close,
+static int
+psc_dma_hw_params(struct snd_pcm_substream *substream,
+			 struct snd_pcm_hw_params *params)
+{
+	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
+
+	return 0;
+}
+
+static struct snd_pcm_ops psc_dma_ops = {
+	.open		= psc_dma_open,
+	.close		= psc_dma_close,
+	.hw_free	= psc_dma_hw_free,
 	.ioctl		= snd_pcm_lib_ioctl,
-	.pointer	= psc_dma_pcm_pointer,
+	.pointer	= psc_dma_pointer,
+	.trigger	= psc_dma_trigger,
+	.hw_params	= psc_dma_hw_params,
 };
 
-static u64 psc_dma_pcm_dmamask = 0xffffffff;
-static int psc_dma_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
+static u64 psc_dma_dmamask = 0xffffffff;
+static int psc_dma_new(struct snd_card *card, struct snd_soc_dai *dai,
 			   struct snd_pcm *pcm)
 {
 	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	size_t size = psc_dma_pcm_hardware.buffer_bytes_max;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+	size_t size = psc_dma_hardware.buffer_bytes_max;
 	int rc = 0;
 
-	dev_dbg(rtd->socdev->dev, "psc_dma_pcm_new(card=%p, dai=%p, pcm=%p)\n",
+	dev_dbg(rtd->socdev->dev, "psc_dma_new(card=%p, dai=%p, pcm=%p)\n",
 		card, dai, pcm);
 
 	if (!card->dev->dma_mask)
-		card->dev->dma_mask = &psc_dma_pcm_dmamask;
+		card->dev->dma_mask = &psc_dma_dmamask;
 	if (!card->dev->coherent_dma_mask)
 		card->dev->coherent_dma_mask = 0xffffffff;
 
 	if (pcm->streams[0].substream) {
-		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
-					&pcm->streams[0].substream->dma_buffer);
+		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->card->dev,
+				size, &pcm->streams[0].substream->dma_buffer);
 		if (rc)
 			goto playback_alloc_err;
 	}
 
 	if (pcm->streams[1].substream) {
-		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->dev, size,
-					&pcm->streams[1].substream->dma_buffer);
+		rc = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, pcm->card->dev,
+				size, &pcm->streams[1].substream->dma_buffer);
 		if (rc)
 			goto capture_alloc_err;
 	}
 
+	if (rtd->socdev->card->codec->ac97)
+		rtd->socdev->card->codec->ac97->private_data = psc_dma;
+
 	return 0;
 
  capture_alloc_err:
 	if (pcm->streams[0].substream)
 		snd_dma_free_pages(&pcm->streams[0].substream->dma_buffer);
+
  playback_alloc_err:
 	dev_err(card->dev, "Cannot allocate buffer(s)\n");
+
 	return -ENOMEM;
 }
 
-static void psc_dma_pcm_free(struct snd_pcm *pcm)
+static void psc_dma_free(struct snd_pcm *pcm)
 {
 	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
 	struct snd_pcm_substream *substream;
 	int stream;
 
-	dev_dbg(rtd->socdev->dev, "psc_dma_pcm_free(pcm=%p)\n", pcm);
+	dev_dbg(rtd->socdev->dev, "psc_dma_free(pcm=%p)\n", pcm);
 
 	for (stream = 0; stream < 2; stream++) {
 		substream = pcm->streams[stream].substream;
@@ -449,10 +414,151 @@ static void psc_dma_pcm_free(struct snd_pcm *pcm)
 	}
 }
 
-struct snd_soc_platform psc_dma_pcm_soc_platform = {
+struct snd_soc_platform mpc5200_audio_dma_platform = {
 	.name		= "mpc5200-psc-audio",
-	.pcm_ops	= &psc_dma_pcm_ops,
-	.pcm_new	= &psc_dma_pcm_new,
-	.pcm_free	= &psc_dma_pcm_free,
+	.pcm_ops	= &psc_dma_ops,
+	.pcm_new	= &psc_dma_new,
+	.pcm_free	= &psc_dma_free,
 };
+EXPORT_SYMBOL_GPL(mpc5200_audio_dma_platform);
+
+int mpc5200_audio_dma_create(struct of_device *op)
+{
+	phys_addr_t fifo;
+	struct psc_dma *psc_dma;
+	struct resource res;
+	int size, irq, rc;
+	const __be32 *prop;
+	void __iomem *regs;
+
+	/* Fetch the registers and IRQ of the PSC */
+	irq = irq_of_parse_and_map(op->node, 0);
+	if (of_address_to_resource(op->node, 0, &res)) {
+		dev_err(&op->dev, "Missing reg property\n");
+		return -ENODEV;
+	}
+	regs = ioremap(res.start, 1 + res.end - res.start);
+	if (!regs) {
+		dev_err(&op->dev, "Could not map registers\n");
+		return -ENODEV;
+	}
+
+	/* Allocate and initialize the driver private data */
+	psc_dma = kzalloc(sizeof *psc_dma, GFP_KERNEL);
+	if (!psc_dma) {
+		iounmap(regs);
+		return -ENOMEM;
+	}
+
+	/* Get the PSC ID */
+	prop = of_get_property(op->node, "cell-index", &size);
+	if (!prop || size < sizeof *prop)
+		return -ENODEV;
+
+	spin_lock_init(&psc_dma->lock);
+	psc_dma->id = be32_to_cpu(*prop);
+	psc_dma->irq = irq;
+	psc_dma->psc_regs = regs;
+	psc_dma->fifo_regs = regs + sizeof *psc_dma->psc_regs;
+	psc_dma->dev = &op->dev;
+	psc_dma->playback.psc_dma = psc_dma;
+	psc_dma->capture.psc_dma = psc_dma;
+	snprintf(psc_dma->name, sizeof psc_dma->name, "PSC%u", psc_dma->id);
+
+	/* Find the address of the fifo data registers and setup the
+	 * DMA tasks */
+	fifo = res.start + offsetof(struct mpc52xx_psc, buffer.buffer_32);
+	psc_dma->capture.bcom_task =
+		bcom_psc_gen_bd_rx_init(psc_dma->id, 10, fifo, 512);
+	psc_dma->playback.bcom_task =
+		bcom_psc_gen_bd_tx_init(psc_dma->id, 10, fifo);
+	if (!psc_dma->capture.bcom_task ||
+	    !psc_dma->playback.bcom_task) {
+		dev_err(&op->dev, "Could not allocate bestcomm tasks\n");
+		iounmap(regs);
+		kfree(psc_dma);
+		return -ENODEV;
+	}
+
+	/* Disable all interrupts and reset the PSC */
+	out_be16(&psc_dma->psc_regs->isr_imr.imr, psc_dma->imr);
+	 /* reset receiver */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_RST_RX);
+	 /* reset transmitter */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_RST_TX);
+	 /* reset error */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_RST_ERR_STAT);
+	 /* reset mode */
+	out_8(&psc_dma->psc_regs->command, MPC52xx_PSC_SEL_MODE_REG_1);
+
+	/* Set up mode register;
+	 * First write: RxRdy (FIFO Alarm) generates rx FIFO irq
+	 * Second write: register Normal mode for non loopback
+	 */
+	out_8(&psc_dma->psc_regs->mode, 0);
+	out_8(&psc_dma->psc_regs->mode, 0);
+
+	/* Set the TX and RX fifo alarm thresholds */
+	out_be16(&psc_dma->fifo_regs->rfalarm, 0x100);
+	out_8(&psc_dma->fifo_regs->rfcntl, 0x4);
+	out_be16(&psc_dma->fifo_regs->tfalarm, 0x100);
+	out_8(&psc_dma->fifo_regs->tfcntl, 0x7);
+
+	/* Lookup the IRQ numbers */
+	psc_dma->playback.irq =
+		bcom_get_task_irq(psc_dma->playback.bcom_task);
+	psc_dma->capture.irq =
+		bcom_get_task_irq(psc_dma->capture.bcom_task);
+
+	rc = request_irq(psc_dma->irq, &psc_dma_status_irq, IRQF_SHARED,
+			 "psc-dma-status", psc_dma);
+	rc |= request_irq(psc_dma->capture.irq,
+			  &psc_dma_bcom_irq_rx, IRQF_SHARED,
+			  "psc-dma-capture", &psc_dma->capture);
+	rc |= request_irq(psc_dma->playback.irq,
+			  &psc_dma_bcom_irq_tx, IRQF_SHARED,
+			  "psc-dma-playback", &psc_dma->playback);
+	if (rc) {
+		free_irq(psc_dma->irq, psc_dma);
+		free_irq(psc_dma->capture.irq,
+			 &psc_dma->capture);
+		free_irq(psc_dma->playback.irq,
+			 &psc_dma->playback);
+		return -ENODEV;
+	}
 
+	/* Save what we've done so it can be found again later */
+	dev_set_drvdata(&op->dev, psc_dma);
+
+	/* Tell the ASoC OF helpers about it */
+	return snd_soc_register_platform(&mpc5200_audio_dma_platform);
+}
+EXPORT_SYMBOL_GPL(mpc5200_audio_dma_create);
+
+int mpc5200_audio_dma_destroy(struct of_device *op)
+{
+	struct psc_dma *psc_dma = dev_get_drvdata(&op->dev);
+
+	dev_dbg(&op->dev, "mpc5200_audio_dma_destroy()\n");
+
+	snd_soc_unregister_platform(&mpc5200_audio_dma_platform);
+
+	bcom_gen_bd_rx_release(psc_dma->capture.bcom_task);
+	bcom_gen_bd_tx_release(psc_dma->playback.bcom_task);
+
+	/* Release irqs */
+	free_irq(psc_dma->irq, psc_dma);
+	free_irq(psc_dma->capture.irq, &psc_dma->capture);
+	free_irq(psc_dma->playback.irq, &psc_dma->playback);
+
+	iounmap(psc_dma->psc_regs);
+	kfree(psc_dma);
+	dev_set_drvdata(&op->dev, NULL);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpc5200_audio_dma_destroy);
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("Freescale MPC5200 PSC in DMA mode ASoC Driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/fsl/mpc5200_dma.h b/sound/soc/fsl/mpc5200_dma.h
index a33232c..2000803 100644
--- a/sound/soc/fsl/mpc5200_dma.h
+++ b/sound/soc/fsl/mpc5200_dma.h
@@ -5,8 +5,10 @@
 #ifndef __SOUND_SOC_FSL_MPC5200_DMA_H__
 #define __SOUND_SOC_FSL_MPC5200_DMA_H__
 
+#define PSC_STREAM_NAME_LEN 32
+
 /**
- * psc_dma_stream - Data specific to a single stream (playback or capture)
+ * psc_ac97_stream - Data specific to a single stream (playback or capture)
  * @active:		flag indicating if the stream is active
  * @psc_dma:		pointer back to parent psc_dma data structure
  * @bcom_task:		bestcomm task structure
@@ -17,6 +19,9 @@
  * @period_bytes:	size of DMA period in bytes
  */
 struct psc_dma_stream {
+	struct snd_pcm_runtime *runtime;
+	snd_pcm_uframes_t appl_ptr;
+
 	int active;
 	struct psc_dma *psc_dma;
 	struct bcom_task *bcom_task;
@@ -27,6 +32,7 @@ struct psc_dma_stream {
 	dma_addr_t period_next_pt;
 	dma_addr_t period_current_pt;
 	int period_bytes;
+	int period_size;
 };
 
 /**
@@ -48,9 +54,12 @@ struct psc_dma {
 	struct mpc52xx_psc_fifo __iomem *fifo_regs;
 	unsigned int irq;
 	struct device *dev;
-	struct snd_soc_dai dai;
 	spinlock_t lock;
 	u32 sicr;
+	uint sysclk;
+	int imr;
+	int id;
+	unsigned int slots;
 
 	/* per-stream data */
 	struct psc_dma_stream playback;
@@ -58,24 +67,14 @@ struct psc_dma {
 
 	/* Statistics */
 	struct {
-		int overrun_count;
-		int underrun_count;
+		unsigned long overrun_count;
+		unsigned long underrun_count;
 	} stats;
 };
 
+int mpc5200_audio_dma_create(struct of_device *op);
+int mpc5200_audio_dma_destroy(struct of_device *op);
 
-int psc_dma_startup(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai);
-
-int psc_dma_hw_free(struct snd_pcm_substream *substream,
-			   struct snd_soc_dai *dai);
-
-void psc_dma_shutdown(struct snd_pcm_substream *substream,
-			     struct snd_soc_dai *dai);
-
-int psc_dma_trigger(struct snd_pcm_substream *substream, int cmd,
-			   struct snd_soc_dai *dai);
-
-extern struct snd_soc_platform psc_dma_pcm_soc_platform;
+extern struct snd_soc_platform mpc5200_audio_dma_platform;
 
 #endif /* __SOUND_SOC_FSL_MPC5200_DMA_H__ */
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index 12a7917..ce8de90 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -3,34 +3,22 @@
  * ALSA SoC Digital Audio Interface (DAI) driver
  *
  * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2009 Jon Smirl, Digispeaker
  */
 
-#include <linux/init.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/delay.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
-#include <linux/dma-mapping.h>
 
-#include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
-#include <sound/initval.h>
 #include <sound/soc.h>
-#include <sound/soc-of-simple.h>
 
-#include <sysdev/bestcomm/bestcomm.h>
-#include <sysdev/bestcomm/gen_bd.h>
 #include <asm/mpc52xx_psc.h>
 
+#include "mpc5200_psc_i2s.h"
 #include "mpc5200_dma.h"
 
-MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
-MODULE_DESCRIPTION("Freescale MPC5200 PSC in I2S mode ASoC Driver");
-MODULE_LICENSE("GPL");
-
 /**
  * PSC_I2S_RATES: sample rates supported by the I2S
  *
@@ -46,8 +34,7 @@ MODULE_LICENSE("GPL");
  * PSC_I2S_FORMATS: audio formats supported by the PSC I2S mode
  */
 #define PSC_I2S_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE | \
-			 SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S24_BE | \
-			 SNDRV_PCM_FMTBIT_S32_BE)
+			 SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S32_BE)
 
 static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 				 struct snd_pcm_hw_params *params,
@@ -82,8 +69,6 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 	}
 	out_be32(&psc_dma->psc_regs->sicr, psc_dma->sicr | mode);
 
-	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
-
 	return 0;
 }
 
@@ -140,16 +125,13 @@ static int psc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int format)
  * psc_i2s_dai_template: template CPU Digital Audio Interface
  */
 static struct snd_soc_dai_ops psc_i2s_dai_ops = {
-	.startup	= psc_dma_startup,
 	.hw_params	= psc_i2s_hw_params,
-	.hw_free	= psc_dma_hw_free,
-	.shutdown	= psc_dma_shutdown,
-	.trigger	= psc_dma_trigger,
 	.set_sysclk	= psc_i2s_set_sysclk,
 	.set_fmt	= psc_i2s_set_fmt,
 };
 
-static struct snd_soc_dai psc_i2s_dai_template = {
+struct snd_soc_dai psc_i2s_dai[] = {{
+	.name   = "I2S",
 	.playback = {
 		.channels_min = 2,
 		.channels_max = 2,
@@ -163,71 +145,8 @@ static struct snd_soc_dai psc_i2s_dai_template = {
 		.formats = PSC_I2S_FORMATS,
 	},
 	.ops = &psc_i2s_dai_ops,
-};
-
-/* ---------------------------------------------------------------------
- * Sysfs attributes for debugging
- */
-
-static ssize_t psc_i2s_status_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	struct psc_dma *psc_dma = dev_get_drvdata(dev);
-
-	return sprintf(buf, "status=%.4x sicr=%.8x rfnum=%i rfstat=0x%.4x "
-			"tfnum=%i tfstat=0x%.4x\n",
-			in_be16(&psc_dma->psc_regs->sr_csr.status),
-			in_be32(&psc_dma->psc_regs->sicr),
-			in_be16(&psc_dma->fifo_regs->rfnum) & 0x1ff,
-			in_be16(&psc_dma->fifo_regs->rfstat),
-			in_be16(&psc_dma->fifo_regs->tfnum) & 0x1ff,
-			in_be16(&psc_dma->fifo_regs->tfstat));
-}
-
-static int *psc_i2s_get_stat_attr(struct psc_dma *psc_dma, const char *name)
-{
-	if (strcmp(name, "playback_underrun") == 0)
-		return &psc_dma->stats.underrun_count;
-	if (strcmp(name, "capture_overrun") == 0)
-		return &psc_dma->stats.overrun_count;
-
-	return NULL;
-}
-
-static ssize_t psc_i2s_stat_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct psc_dma *psc_dma = dev_get_drvdata(dev);
-	int *attrib;
-
-	attrib = psc_i2s_get_stat_attr(psc_dma, attr->attr.name);
-	if (!attrib)
-		return 0;
-
-	return sprintf(buf, "%i\n", *attrib);
-}
-
-static ssize_t psc_i2s_stat_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf,
-				  size_t count)
-{
-	struct psc_dma *psc_dma = dev_get_drvdata(dev);
-	int *attrib;
-
-	attrib = psc_i2s_get_stat_attr(psc_dma, attr->attr.name);
-	if (!attrib)
-		return 0;
-
-	*attrib = simple_strtoul(buf, NULL, 0);
-	return count;
-}
-
-static DEVICE_ATTR(status, 0644, psc_i2s_status_show, NULL);
-static DEVICE_ATTR(playback_underrun, 0644, psc_i2s_stat_show,
-			psc_i2s_stat_store);
-static DEVICE_ATTR(capture_overrun, 0644, psc_i2s_stat_show,
-			psc_i2s_stat_store);
+} };
+EXPORT_SYMBOL_GPL(psc_i2s_dai);
 
 /* ---------------------------------------------------------------------
  * OF platform bus binding code:
@@ -237,82 +156,26 @@ static DEVICE_ATTR(capture_overrun, 0644, psc_i2s_stat_show,
 static int __devinit psc_i2s_of_probe(struct of_device *op,
 				      const struct of_device_id *match)
 {
-	phys_addr_t fifo;
+	int rc;
 	struct psc_dma *psc_dma;
-	struct resource res;
-	int size, psc_id, irq, rc;
-	const __be32 *prop;
-	void __iomem *regs;
-
-	dev_dbg(&op->dev, "probing psc i2s device\n");
-
-	/* Get the PSC ID */
-	prop = of_get_property(op->node, "cell-index", &size);
-	if (!prop || size < sizeof *prop)
-		return -ENODEV;
-	psc_id = be32_to_cpu(*prop);
-
-	/* Fetch the registers and IRQ of the PSC */
-	irq = irq_of_parse_and_map(op->node, 0);
-	if (of_address_to_resource(op->node, 0, &res)) {
-		dev_err(&op->dev, "Missing reg property\n");
-		return -ENODEV;
-	}
-	regs = ioremap(res.start, 1 + res.end - res.start);
-	if (!regs) {
-		dev_err(&op->dev, "Could not map registers\n");
-		return -ENODEV;
-	}
+	struct mpc52xx_psc __iomem *regs;
 
-	/* Allocate and initialize the driver private data */
-	psc_dma = kzalloc(sizeof *psc_dma, GFP_KERNEL);
-	if (!psc_dma) {
-		iounmap(regs);
-		return -ENOMEM;
-	}
-	spin_lock_init(&psc_dma->lock);
-	psc_dma->irq = irq;
-	psc_dma->psc_regs = regs;
-	psc_dma->fifo_regs = regs + sizeof *psc_dma->psc_regs;
-	psc_dma->dev = &op->dev;
-	psc_dma->playback.psc_dma = psc_dma;
-	psc_dma->capture.psc_dma = psc_dma;
-	snprintf(psc_dma->name, sizeof psc_dma->name, "PSC%u", psc_id+1);
-
-	/* Fill out the CPU DAI structure */
-	memcpy(&psc_dma->dai, &psc_i2s_dai_template, sizeof psc_dma->dai);
-	psc_dma->dai.private_data = psc_dma;
-	psc_dma->dai.name = psc_dma->name;
-	psc_dma->dai.id = psc_id;
-
-	/* Find the address of the fifo data registers and setup the
-	 * DMA tasks */
-	fifo = res.start + offsetof(struct mpc52xx_psc, buffer.buffer_32);
-	psc_dma->capture.bcom_task =
-		bcom_psc_gen_bd_rx_init(psc_id, 10, fifo, 512);
-	psc_dma->playback.bcom_task =
-		bcom_psc_gen_bd_tx_init(psc_id, 10, fifo);
-	if (!psc_dma->capture.bcom_task ||
-	    !psc_dma->playback.bcom_task) {
-		dev_err(&op->dev, "Could not allocate bestcomm tasks\n");
-		iounmap(regs);
-		kfree(psc_dma);
-		return -ENODEV;
+	rc = mpc5200_audio_dma_create(op);
+	if (rc != 0)
+		return rc;
+
+	rc = snd_soc_register_dais(psc_i2s_dai, ARRAY_SIZE(psc_i2s_dai));
+	if (rc != 0) {
+		pr_err("Failed to register DAI\n");
+		return 0;
 	}
 
-	/* Disable all interrupts and reset the PSC */
-	out_be16(&psc_dma->psc_regs->isr_imr.imr, 0);
-	out_8(&psc_dma->psc_regs->command, 3 << 4); /* reset transmitter */
-	out_8(&psc_dma->psc_regs->command, 2 << 4); /* reset receiver */
-	out_8(&psc_dma->psc_regs->command, 1 << 4); /* reset mode */
-	out_8(&psc_dma->psc_regs->command, 4 << 4); /* reset error */
+	psc_dma = dev_get_drvdata(&op->dev);
+	regs = psc_dma->psc_regs;
 
 	/* Configure the serial interface mode; defaulting to CODEC8 mode */
 	psc_dma->sicr = MPC52xx_PSC_SICR_DTS1 | MPC52xx_PSC_SICR_I2S |
 			MPC52xx_PSC_SICR_CLKPOL;
-	if (of_get_property(op->node, "fsl,cellslave", NULL))
-		psc_dma->sicr |= MPC52xx_PSC_SICR_CELLSLAVE |
-				 MPC52xx_PSC_SICR_GENCLK;
 	out_be32(&psc_dma->psc_regs->sicr,
 		 psc_dma->sicr | MPC52xx_PSC_SICR_SIM_CODEC_8);
 
@@ -321,66 +184,37 @@ static int __devinit psc_i2s_of_probe(struct of_device *op,
 	if (!of_get_property(op->node, "codec-handle", NULL))
 		return 0;
 
-	/* Set up mode register;
-	 * First write: RxRdy (FIFO Alarm) generates rx FIFO irq
-	 * Second write: register Normal mode for non loopback
-	 */
-	out_8(&psc_dma->psc_regs->mode, 0);
-	out_8(&psc_dma->psc_regs->mode, 0);
-
-	/* Set the TX and RX fifo alarm thresholds */
-	out_be16(&psc_dma->fifo_regs->rfalarm, 0x100);
-	out_8(&psc_dma->fifo_regs->rfcntl, 0x4);
-	out_be16(&psc_dma->fifo_regs->tfalarm, 0x100);
-	out_8(&psc_dma->fifo_regs->tfcntl, 0x7);
-
-	/* Lookup the IRQ numbers */
-	psc_dma->playback.irq =
-		bcom_get_task_irq(psc_dma->playback.bcom_task);
-	psc_dma->capture.irq =
-		bcom_get_task_irq(psc_dma->capture.bcom_task);
-
-	/* Save what we've done so it can be found again later */
-	dev_set_drvdata(&op->dev, psc_dma);
-
-	/* Register the SYSFS files */
-	rc = device_create_file(psc_dma->dev, &dev_attr_status);
-	rc |= device_create_file(psc_dma->dev, &dev_attr_capture_overrun);
-	rc |= device_create_file(psc_dma->dev, &dev_attr_playback_underrun);
-	if (rc)
-		dev_info(psc_dma->dev, "error creating sysfs files\n");
-
-	snd_soc_register_platform(&psc_dma_pcm_soc_platform);
-
-	/* Tell the ASoC OF helpers about it */
-	of_snd_soc_register_platform(&psc_dma_pcm_soc_platform, op->node,
-				     &psc_dma->dai);
+	/* Due to errata in the dma mode; need to line up enabling
+	 * the transmitter with a transition on the frame sync
+	 * line */
+
+	/* first make sure it is low */
+	while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) != 0)
+		;
+	/* then wait for the transition to high */
+	while ((in_8(&regs->ipcr_acr.ipcr) & 0x80) == 0)
+		;
+	/* Finally, enable the PSC.
+	 * Receiver must always be enabled; even when we only want
+	 * transmit.  (see 15.3.2.3 of MPC5200B User's Guide) */
+
+	/* Go */
+	out_8(&psc_dma->psc_regs->command,
+			MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
 
 	return 0;
+
 }
 
 static int __devexit psc_i2s_of_remove(struct of_device *op)
 {
-	struct psc_dma *psc_dma = dev_get_drvdata(&op->dev);
-
-	dev_dbg(&op->dev, "psc_i2s_remove()\n");
-
-	snd_soc_unregister_platform(&psc_dma_pcm_soc_platform);
-
-	bcom_gen_bd_rx_release(psc_dma->capture.bcom_task);
-	bcom_gen_bd_tx_release(psc_dma->playback.bcom_task);
-
-	iounmap(psc_dma->psc_regs);
-	iounmap(psc_dma->fifo_regs);
-	kfree(psc_dma);
-	dev_set_drvdata(&op->dev, NULL);
-
-	return 0;
+	return mpc5200_audio_dma_destroy(op);
 }
 
 /* Match table for of_platform binding */
 static struct of_device_id psc_i2s_match[] __devinitdata = {
 	{ .compatible = "fsl,mpc5200-psc-i2s", },
+	{ .compatible = "fsl,mpc5200b-psc-i2s", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, psc_i2s_match);
@@ -411,4 +245,7 @@ static void __exit psc_i2s_exit(void)
 }
 module_exit(psc_i2s_exit);
 
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("Freescale MPC5200 PSC in I2S mode ASoC Driver");
+MODULE_LICENSE("GPL");
 
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.h b/sound/soc/fsl/mpc5200_psc_i2s.h
new file mode 100644
index 0000000..ce55e07
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_psc_i2s.h
@@ -0,0 +1,12 @@
+/*
+ * Freescale MPC5200 PSC in I2S mode
+ * ALSA SoC Digital Audio Interface (DAI) driver
+ *
+ */
+
+#ifndef __SOUND_SOC_FSL_MPC52xx_PSC_I2S_H__
+#define __SOUND_SOC_FSL_MPC52xx_PSC_I2S_H__
+
+extern struct snd_soc_dai psc_i2s_dai[];
+
+#endif /* __SOUND_SOC_FSL_MPC52xx_PSC_I2S_H__ */
-- 
cgit v0.10.2


From 20d0e1520ed1ba8aad05f416245446de0f7ec4bb Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Tue, 26 May 2009 08:34:10 -0400
Subject: ASoC: AC97 driver for mpc5200

I've implemented retries for when the AC97 hardware doesn't reset on
first try. About 10% of the time both the Efika and pcm030 AC97 codecs
don't reset on first try and need to be poked multiple times.  Failure
is indicated by not having the link clock start ticking. Every once in
a while even five pokes won't get the link started and I have to power
cycle.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 1918c78..3bce952 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -29,3 +29,14 @@ config SND_SOC_MPC5200_I2S
 	select PPC_BESTCOMM_GEN_BD
 	help
 	  Say Y here to support the MPC5200 PSCs in I2S mode.
+
+config SND_SOC_MPC5200_AC97
+	tristate "Freescale MPC5200 PSC in AC97 mode driver"
+	depends on PPC_MPC52xx && PPC_BESTCOMM
+	select AC97_BUS
+	select SND_MPC52xx_DMA
+	select PPC_BESTCOMM_GEN_BD
+	help
+	  Say Y here to support the MPC5200 PSCs in AC97 mode.
+
+
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index 7731ef2..14631a1 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -13,4 +13,5 @@ obj-$(CONFIG_SND_SOC_MPC8610) += snd-soc-fsl-ssi.o snd-soc-fsl-dma.o
 # MPC5200 Platform Support
 obj-$(CONFIG_SND_MPC52xx_DMA) += mpc5200_dma.o
 obj-$(CONFIG_SND_SOC_MPC5200_I2S) += mpc5200_psc_i2s.o
+obj-$(CONFIG_SND_SOC_MPC5200_AC97) += mpc5200_psc_ac97.o
 
diff --git a/sound/soc/fsl/mpc5200_psc_ac97.c b/sound/soc/fsl/mpc5200_psc_ac97.c
new file mode 100644
index 0000000..9f2df15
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_psc_ac97.c
@@ -0,0 +1,331 @@
+/*
+ * linux/sound/mpc5200-ac97.c -- AC97 support for the Freescale MPC52xx chip.
+ *
+ * Copyright (C) 2009 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+
+#include <asm/time.h>
+#include <asm/delay.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc5200_dma.h"
+#include "mpc5200_psc_ac97.h"
+
+#define DRV_NAME "mpc5200-psc-ac97"
+
+/* ALSA only supports a single AC97 device so static is recommend here */
+static struct psc_dma *psc_dma;
+
+static unsigned short psc_ac97_read(struct snd_ac97 *ac97, unsigned short reg)
+{
+	int rc;
+	unsigned int val;
+
+	/* Wait for command send status zero = ready */
+	spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_CMDSEND), 100, 0, rc);
+	if (rc == 0) {
+		pr_err("timeout on ac97 bus (rdy)\n");
+		return -ENODEV;
+	}
+	/* Send the read */
+	out_be32(&psc_dma->psc_regs->ac97_cmd, (1<<31) | ((reg & 0x7f) << 24));
+
+	/* Wait for the answer */
+	spin_event_timeout((in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_DATA_VAL), 100, 0, rc);
+	if (rc == 0) {
+		pr_err("timeout on ac97 read (val) %x\n",
+				in_be16(&psc_dma->psc_regs->sr_csr.status));
+		return -ENODEV;
+	}
+	/* Get the data */
+	val = in_be32(&psc_dma->psc_regs->ac97_data);
+	if (((val >> 24) & 0x7f) != reg) {
+		pr_err("reg echo error on ac97 read\n");
+		return -ENODEV;
+	}
+	val = (val >> 8) & 0xffff;
+
+	return (unsigned short) val;
+}
+
+static void psc_ac97_write(struct snd_ac97 *ac97,
+				unsigned short reg, unsigned short val)
+{
+	int rc;
+
+	/* Wait for command status zero = ready */
+	spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_CMDSEND), 100, 0, rc);
+	if (rc == 0) {
+		pr_err("timeout on ac97 bus (write)\n");
+		return;
+	}
+	/* Write data */
+	out_be32(&psc_dma->psc_regs->ac97_cmd,
+			((reg & 0x7f) << 24) | (val << 8));
+}
+
+static void psc_ac97_warm_reset(struct snd_ac97 *ac97)
+{
+	int rc;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+
+	out_be32(&regs->sicr, psc_dma->sicr | MPC52xx_PSC_SICR_AWR);
+	spin_event_timeout(0, 3, 0, rc);
+	out_be32(&regs->sicr, psc_dma->sicr);
+}
+
+static void psc_ac97_cold_reset(struct snd_ac97 *ac97)
+{
+	int rc;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+
+	/* Do a cold reset */
+	out_8(&regs->op1, MPC52xx_PSC_OP_RES);
+	spin_event_timeout(0, 10, 0, rc);
+	out_8(&regs->op0, MPC52xx_PSC_OP_RES);
+	spin_event_timeout(0, 50, 0, rc);
+	psc_ac97_warm_reset(ac97);
+}
+
+struct snd_ac97_bus_ops soc_ac97_ops = {
+	.read		= psc_ac97_read,
+	.write		= psc_ac97_write,
+	.reset		= psc_ac97_cold_reset,
+	.warm_reset	= psc_ac97_warm_reset,
+};
+EXPORT_SYMBOL_GPL(soc_ac97_ops);
+
+static int psc_ac97_hw_analog_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *cpu_dai)
+{
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+
+	dev_dbg(psc_dma->dev, "%s(substream=%p) p_size=%i p_bytes=%i"
+		" periods=%i buffer_size=%i  buffer_bytes=%i channels=%i"
+		" rate=%i format=%i\n",
+		__func__, substream, params_period_size(params),
+		params_period_bytes(params), params_periods(params),
+		params_buffer_size(params), params_buffer_bytes(params),
+		params_channels(params), params_rate(params),
+		params_format(params));
+
+
+	if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE) {
+		if (params_channels(params) == 1)
+			psc_dma->slots |= 0x00000100;
+		else
+			psc_dma->slots |= 0x00000300;
+	} else {
+		if (params_channels(params) == 1)
+			psc_dma->slots |= 0x01000000;
+		else
+			psc_dma->slots |= 0x03000000;
+	}
+	out_be32(&psc_dma->psc_regs->ac97_slots, psc_dma->slots);
+
+	return 0;
+}
+
+static int psc_ac97_hw_digital_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *cpu_dai)
+{
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+
+	if (params_channels(params) == 1)
+		out_be32(&psc_dma->psc_regs->ac97_slots, 0x01000000);
+	else
+		out_be32(&psc_dma->psc_regs->ac97_slots, 0x03000000);
+
+	return 0;
+}
+
+static int psc_ac97_trigger(struct snd_pcm_substream *substream, int cmd,
+							struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct psc_dma *psc_dma = rtd->dai->cpu_dai->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_STOP:
+		if (substream->pstr->stream == SNDRV_PCM_STREAM_CAPTURE)
+			psc_dma->slots &= 0xFFFF0000;
+		else
+			psc_dma->slots &= 0x0000FFFF;
+
+		out_be32(&psc_dma->psc_regs->ac97_slots, psc_dma->slots);
+		break;
+	}
+	return 0;
+}
+
+static int psc_ac97_probe(struct platform_device *pdev,
+					struct snd_soc_dai *cpu_dai)
+{
+	struct psc_dma *psc_dma = cpu_dai->private_data;
+	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
+
+	/* Go */
+	out_8(&regs->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
+	return 0;
+}
+
+/* ---------------------------------------------------------------------
+ * ALSA SoC Bindings
+ *
+ * - Digital Audio Interface (DAI) template
+ * - create/destroy dai hooks
+ */
+
+/**
+ * psc_ac97_dai_template: template CPU Digital Audio Interface
+ */
+static struct snd_soc_dai_ops psc_ac97_analog_ops = {
+	.hw_params	= psc_ac97_hw_analog_params,
+	.trigger	= psc_ac97_trigger,
+};
+
+static struct snd_soc_dai_ops psc_ac97_digital_ops = {
+	.hw_params	= psc_ac97_hw_digital_params,
+};
+
+struct snd_soc_dai psc_ac97_dai[] = {
+{
+	.name   = "AC97",
+	.ac97_control = 1,
+	.probe	= psc_ac97_probe,
+	.playback = {
+		.channels_min   = 1,
+		.channels_max   = 6,
+		.rates          = SNDRV_PCM_RATE_8000_48000,
+		.formats = SNDRV_PCM_FMTBIT_S32_BE,
+	},
+	.capture = {
+		.channels_min   = 1,
+		.channels_max   = 2,
+		.rates          = SNDRV_PCM_RATE_8000_48000,
+		.formats = SNDRV_PCM_FMTBIT_S32_BE,
+	},
+	.ops = &psc_ac97_analog_ops,
+},
+{
+	.name   = "SPDIF",
+	.ac97_control = 1,
+	.playback = {
+		.channels_min   = 1,
+		.channels_max   = 2,
+		.rates          = SNDRV_PCM_RATE_32000 | \
+			SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000,
+		.formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE,
+	},
+	.ops = &psc_ac97_digital_ops,
+} };
+EXPORT_SYMBOL_GPL(psc_ac97_dai);
+
+
+
+/* ---------------------------------------------------------------------
+ * OF platform bus binding code:
+ * - Probe/remove operations
+ * - OF device match table
+ */
+static int __devinit psc_ac97_of_probe(struct of_device *op,
+				      const struct of_device_id *match)
+{
+	int rc, i;
+	struct snd_ac97 ac97;
+	struct mpc52xx_psc __iomem *regs;
+
+	rc = mpc5200_audio_dma_create(op);
+	if (rc != 0)
+		return rc;
+
+	for (i = 0; i < ARRAY_SIZE(psc_ac97_dai); i++)
+		psc_ac97_dai[i].dev = &op->dev;
+
+	rc = snd_soc_register_dais(psc_ac97_dai, ARRAY_SIZE(psc_ac97_dai));
+	if (rc != 0) {
+		dev_err(&op->dev, "Failed to register DAI\n");
+		return rc;
+	}
+
+	psc_dma = dev_get_drvdata(&op->dev);
+	regs = psc_dma->psc_regs;
+	ac97.private_data = psc_dma;
+
+	for (i = 0; i < ARRAY_SIZE(psc_ac97_dai); i++)
+		psc_ac97_dai[i].private_data = psc_dma;
+
+	psc_dma->imr = 0;
+	out_be16(&psc_dma->psc_regs->isr_imr.imr, psc_dma->imr);
+
+	/* Configure the serial interface mode to AC97 */
+	psc_dma->sicr = MPC52xx_PSC_SICR_SIM_AC97 | MPC52xx_PSC_SICR_ENAC97;
+	out_be32(&regs->sicr, psc_dma->sicr);
+
+	/* No slots active */
+	out_be32(&regs->ac97_slots, 0x00000000);
+
+	return 0;
+}
+
+static int __devexit psc_ac97_of_remove(struct of_device *op)
+{
+	return mpc5200_audio_dma_destroy(op);
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id psc_ac97_match[] __devinitdata = {
+	{ .compatible = "fsl,mpc5200-psc-ac97", },
+	{ .compatible = "fsl,mpc5200b-psc-ac97", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, psc_ac97_match);
+
+static struct of_platform_driver psc_ac97_driver = {
+	.match_table = psc_ac97_match,
+	.probe = psc_ac97_of_probe,
+	.remove = __devexit_p(psc_ac97_of_remove),
+	.driver = {
+		.name = "mpc5200-psc-ac97",
+		.owner = THIS_MODULE,
+	},
+};
+
+/* ---------------------------------------------------------------------
+ * Module setup and teardown; simply register the of_platform driver
+ * for the PSC in AC97 mode.
+ */
+static int __init psc_ac97_init(void)
+{
+	return of_register_platform_driver(&psc_ac97_driver);
+}
+module_init(psc_ac97_init);
+
+static void __exit psc_ac97_exit(void)
+{
+	of_unregister_platform_driver(&psc_ac97_driver);
+}
+module_exit(psc_ac97_exit);
+
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_DESCRIPTION("mpc5200 AC97 module");
+MODULE_LICENSE("GPL");
+
diff --git a/sound/soc/fsl/mpc5200_psc_ac97.h b/sound/soc/fsl/mpc5200_psc_ac97.h
new file mode 100644
index 0000000..4bc18c3
--- /dev/null
+++ b/sound/soc/fsl/mpc5200_psc_ac97.h
@@ -0,0 +1,15 @@
+/*
+ * Freescale MPC5200 PSC in AC97 mode
+ * ALSA SoC Digital Audio Interface (DAI) driver
+ *
+ */
+
+#ifndef __SOUND_SOC_FSL_MPC52xx_PSC_AC97_H__
+#define __SOUND_SOC_FSL_MPC52xx_PSC_AC97_H__
+
+extern struct snd_soc_dai psc_ac97_dai[];
+
+#define MPC5200_AC97_NORMAL 0
+#define MPC5200_AC97_SPDIF 1
+
+#endif /* __SOUND_SOC_FSL_MPC52xx_PSC_AC97_H__ */
-- 
cgit v0.10.2


From a9262c4fd404654acd3684699047fa63206518c8 Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Tue, 26 May 2009 08:34:12 -0400
Subject: ASoC: Support for AC97 on Phytec pmc030 base board.

A wm9712 AC97 codec is used.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 3bce952..79579ae 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -39,4 +39,11 @@ config SND_SOC_MPC5200_AC97
 	help
 	  Say Y here to support the MPC5200 PSCs in AC97 mode.
 
+config SND_MPC52xx_SOC_PCM030
+	tristate "SoC AC97 Audio support for Phytec pcm030 and WM9712"
+	depends on PPC_MPC5200_SIMPLE
+	select SND_SOC_MPC5200_AC97
+	select SND_SOC_WM9712
+	help
+	  Say Y if you want to add support for sound on the Phytec pcm030 baseboard.
 
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index 14631a1..66d88c8 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -15,3 +15,6 @@ obj-$(CONFIG_SND_MPC52xx_DMA) += mpc5200_dma.o
 obj-$(CONFIG_SND_SOC_MPC5200_I2S) += mpc5200_psc_i2s.o
 obj-$(CONFIG_SND_SOC_MPC5200_AC97) += mpc5200_psc_ac97.o
 
+# MPC5200 Machine Support
+obj-$(CONFIG_SND_MPC52xx_SOC_PCM030) += pcm030-audio-fabric.o
+
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
new file mode 100644
index 0000000..8766f7a
--- /dev/null
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -0,0 +1,90 @@
+/*
+ * Phytec pcm030 driver for the PSC of the Freescale MPC52xx
+ * configured as AC97 interface
+ *
+ * Copyright 2008 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <sound/soc-of-simple.h>
+
+#include "mpc5200_dma.h"
+#include "mpc5200_psc_ac97.h"
+#include "../codecs/wm9712.h"
+
+static struct snd_soc_device device;
+static struct snd_soc_card card;
+
+static struct snd_soc_dai_link pcm030_fabric_dai[] = {
+{
+	.name = "AC97",
+	.stream_name = "AC97 Analog",
+	.codec_dai = &wm9712_dai[WM9712_DAI_AC97_HIFI],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_NORMAL],
+},
+{
+	.name = "AC97",
+	.stream_name = "AC97 IEC958",
+	.codec_dai = &wm9712_dai[WM9712_DAI_AC97_AUX],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_SPDIF],
+},
+};
+
+static __init int pcm030_fabric_init(void)
+{
+	struct platform_device *pdev;
+	int rc;
+
+	if (!machine_is_compatible("phytec,pcm030"))
+		return -ENODEV;
+
+	card.platform = &mpc5200_audio_dma_platform;
+	card.name = "pcm030";
+	card.dai_link = pcm030_fabric_dai;
+	card.num_links = ARRAY_SIZE(pcm030_fabric_dai);
+
+	device.card = &card;
+	device.codec_dev = &soc_codec_dev_wm9712;
+
+	pdev = platform_device_alloc("soc-audio", 1);
+	if (!pdev) {
+		pr_err("pcm030_fabric_init: platform_device_alloc() failed\n");
+		return -ENODEV;
+	}
+
+	platform_set_drvdata(pdev, &device);
+	device.dev = &pdev->dev;
+
+	rc = platform_device_add(pdev);
+	if (rc) {
+		pr_err("pcm030_fabric_init: platform_device_add() failed\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+module_init(pcm030_fabric_init);
+
+
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_DESCRIPTION(DRV_NAME ": mpc5200 pcm030 fabric driver");
+MODULE_LICENSE("GPL");
+
-- 
cgit v0.10.2


From 6ffee43ecf8bfbe0bd74c9084c9772a59097d53b Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Tue, 26 May 2009 08:34:14 -0400
Subject: ASoC: Fabric bindings for STAC9766 on the Efika

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 79579ae..f571c6e 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -47,3 +47,11 @@ config SND_MPC52xx_SOC_PCM030
 	help
 	  Say Y if you want to add support for sound on the Phytec pcm030 baseboard.
 
+config SND_MPC52xx_SOC_EFIKA
+	tristate "SoC AC97 Audio support for bbplan Efika and STAC9766"
+	depends on PPC_EFIKA
+	select SND_SOC_MPC5200_AC97
+	select SND_SOC_STAC9766
+	help
+	  Say Y if you want to add support for sound on the Efika.
+
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index 66d88c8..a83a739 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -17,4 +17,5 @@ obj-$(CONFIG_SND_SOC_MPC5200_AC97) += mpc5200_psc_ac97.o
 
 # MPC5200 Machine Support
 obj-$(CONFIG_SND_MPC52xx_SOC_PCM030) += pcm030-audio-fabric.o
+obj-$(CONFIG_SND_MPC52xx_SOC_EFIKA) += efika-audio-fabric.o
 
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
new file mode 100644
index 0000000..85b0e75
--- /dev/null
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -0,0 +1,90 @@
+/*
+ * Efika driver for the PSC of the Freescale MPC52xx
+ * configured as AC97 interface
+ *
+ * Copyright 2008 Jon Smirl, Digispeaker
+ * Author: Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <sound/soc-of-simple.h>
+
+#include "mpc5200_dma.h"
+#include "mpc5200_psc_ac97.h"
+#include "../codecs/stac9766.h"
+
+static struct snd_soc_device device;
+static struct snd_soc_card card;
+
+static struct snd_soc_dai_link efika_fabric_dai[] = {
+{
+	.name = "AC97",
+	.stream_name = "AC97 Analog",
+	.codec_dai = &stac9766_dai[STAC9766_DAI_AC97_ANALOG],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_NORMAL],
+},
+{
+	.name = "AC97",
+	.stream_name = "AC97 IEC958",
+	.codec_dai = &stac9766_dai[STAC9766_DAI_AC97_DIGITAL],
+	.cpu_dai = &psc_ac97_dai[MPC5200_AC97_SPDIF],
+},
+};
+
+static __init int efika_fabric_init(void)
+{
+	struct platform_device *pdev;
+	int rc;
+
+	if (!machine_is_compatible("bplan,efika"))
+		return -ENODEV;
+
+	card.platform = &mpc5200_audio_dma_platform;
+	card.name = "Efika";
+	card.dai_link = efika_fabric_dai;
+	card.num_links = ARRAY_SIZE(efika_fabric_dai);
+
+	device.card = &card;
+	device.codec_dev = &soc_codec_dev_stac9766;
+
+	pdev = platform_device_alloc("soc-audio", 1);
+	if (!pdev) {
+		pr_err("efika_fabric_init: platform_device_alloc() failed\n");
+		return -ENODEV;
+	}
+
+	platform_set_drvdata(pdev, &device);
+	device.dev = &pdev->dev;
+
+	rc = platform_device_add(pdev);
+	if (rc) {
+		pr_err("efika_fabric_init: platform_device_add() failed\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+module_init(efika_fabric_init);
+
+
+MODULE_AUTHOR("Jon Smirl <jonsmirl@gmail.com>");
+MODULE_DESCRIPTION(DRV_NAME ": mpc5200 Efika fabric driver");
+MODULE_LICENSE("GPL");
+
-- 
cgit v0.10.2


From 0c0e09e21a9e7bc6ca54e06ef3d497255ca26383 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 26 May 2009 21:14:59 +0100
Subject: ASoC: Mark MPC5200 AC97 as BROKEN until PowerPC merge issues are
 resolved

These drivers use spin_event_timeout() which is only present in the
PowerPC tree at present and which is undergoing some API revisions
so temporarily mark them as BROKEN until these issues are sorted
out.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index f571c6e..5dbebf8 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -41,15 +41,16 @@ config SND_SOC_MPC5200_AC97
 
 config SND_MPC52xx_SOC_PCM030
 	tristate "SoC AC97 Audio support for Phytec pcm030 and WM9712"
-	depends on PPC_MPC5200_SIMPLE
+	depends on PPC_MPC5200_SIMPLE && BROKEN
 	select SND_SOC_MPC5200_AC97
 	select SND_SOC_WM9712
 	help
-	  Say Y if you want to add support for sound on the Phytec pcm030 baseboard.
+	  Say Y if you want to add support for sound on the Phytec pcm030
+	  baseboard.
 
 config SND_MPC52xx_SOC_EFIKA
 	tristate "SoC AC97 Audio support for bbplan Efika and STAC9766"
-	depends on PPC_EFIKA
+	depends on PPC_EFIKA && BROKEN
 	select SND_SOC_MPC5200_AC97
 	select SND_SOC_STAC9766
 	help
-- 
cgit v0.10.2


From 46a7574caf5bc533c24b315800ed323c187614f5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sun, 24 May 2009 18:45:17 -0400
Subject: cifs: fix artificial limit on reading symlinks

There's no reason to limit the size of a symlink that we can read to
4000 bytes. That may be nowhere near PATH_MAX if the server is sending
UCS2 strings. CIFS should be able to read in a symlink up to the size of
the buffer. The size of the header has already been accounted for when
creating the slabcache, so CIFSMaxBufSize should be the correct size to
pass in.

Fixes samba bug #6384.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d062602..aece2a8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2427,8 +2427,7 @@ querySymLinkRetry:
 	params = 2 /* level */  + 4 /* rsrvd */  + name_len /* incl null */ ;
 	pSMB->TotalDataCount = 0;
 	pSMB->MaxParameterCount = cpu_to_le16(2);
-	/* BB find exact max data count below from sess structure BB */
-	pSMB->MaxDataCount = cpu_to_le16(4000);
+	pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize);
 	pSMB->MaxSetupCount = 0;
 	pSMB->Reserved = 0;
 	pSMB->Flags = 0;
-- 
cgit v0.10.2


From f55ed1a83d099f275c9560ad7d4c4700d1e54bdd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 26 May 2009 16:28:11 -0400
Subject: cifs: tighten up default file_mode/dir_mode

The current default file mode is 02767 and dir mode is 0777. This is
extremely "loose". Given that CIFS is a single-user protocol, these
permissions allow anyone to use the mount -- in effect, giving anyone on
the machine access to the credentials used to mount the share.

Change this by making the default permissions restrict write access to
the default owner of the mount. Give read and execute permissions to
everyone else. These are the same permissions that VFAT mounts get by
default so there is some precedent here.

Note that this patch also removes the mandatory locking flags from the
default file_mode. After having looked at how these flags are used by
the kernel, I don't think that keeping them as the default offers any
real benefit. That flag combination makes it so that the kernel enforces
mandatory locking.

Since the server is going to do that for us anyway, I don't think we
want the client to enforce this by default on applications that just
want advisory locks. Anyone that does want this behavior can always
enable it by setting the file_mode appropriately.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4aa81a5..f32c903 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -827,9 +827,9 @@ cifs_parse_mount_options(char *options, const char *devname,
 	vol->target_rfc1001_name[0] = 0;
 	vol->linux_uid = current_uid();  /* use current_euid() instead? */
 	vol->linux_gid = current_gid();
-	vol->dir_mode = S_IRWXUGO;
-	/* 2767 perms indicate mandatory locking support */
-	vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP);
+
+	/* default to only allowing write access to owner of the mount */
+	vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
 
 	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
 	vol->rw = true;
-- 
cgit v0.10.2


From 096324873f9c7172a17aff9db1356f4f01b77afe Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 23 May 2009 14:30:12 -0500
Subject: xfs: fix overflow in xfs_growfs_data_private

In the case where growing a filesystem would leave the last AG
too small, the fixup code has an overflow in the calculation
of the new size with one fewer ag, because "nagcount" is a 32
bit number.  If the new filesystem has > 2^32 blocks in it
this causes a problem resulting in an EINVAL return from growfs:

 # xfs_io -f -c "truncate 19998630180864" fsfile
 # mkfs.xfs -f -bsize=4096 -dagsize=76288719b,size=3905982455b fsfile
 # mount -o loop fsfile /mnt
 # xfs_growfs /mnt

meta-data=/dev/loop0             isize=256    agcount=52,
agsize=76288719 blks
         =                       sectsz=512   attr=2
data     =                       bsize=4096   blocks=3905982455, imaxpct=5
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0
log      =internal               bsize=4096   blocks=32768, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=0
realtime =none                   extsz=4096   blocks=0, rtextents=0
xfs_growfs: XFS_IOC_FSGROWFSDATA xfsctl failed: Invalid argument

Reported-by: richard.ems@cape-horn-eng.com
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8379e3b..cbd451b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -160,7 +160,7 @@ xfs_growfs_data_private(
 	nagcount = new + (nb_mod != 0);
 	if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
 		nagcount--;
-		nb = nagcount * mp->m_sb.sb_agblocks;
+		nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
 		if (nb < mp->m_sb.sb_dblocks)
 			return XFS_ERROR(EINVAL);
 	}
-- 
cgit v0.10.2


From a330bd4750bc84aebb28faddd525d0bcbdde262d Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 6 May 2009 15:53:25 -0700
Subject: Revert "ARM: OMAP: Mask interrupts when disabling interrupts, v2"

This reverts commit 5461af5af5c6a7fee78978aafe720541bf3a2f55.

Adding a disable hook to the irq_chip is not the way to fix the
problem being addressed by this patch.  Instead, we need to fix
support for [enable|disable]_irq_wake().

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index 998c5c4..08a3b99 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -134,7 +134,6 @@ static struct irq_chip omap_irq_chip = {
 	.ack	= omap_mask_ack_irq,
 	.mask	= omap_mask_irq,
 	.unmask	= omap_unmask_irq,
-	.disable = omap_mask_irq,
 };
 
 static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
-- 
cgit v0.10.2


From b1338d199dda6681d9af0297928af0a7eb9cba7b Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Tue, 26 May 2009 12:15:53 +0900
Subject: tomoyo: add missing call to cap_bprm_set_creds

cap_bprm_set_creds() has to be called from security_bprm_set_creds().
TOMOYO forgot to call cap_bprm_set_creds() from tomoyo_bprm_set_creds()
and suid executables were not being working.

Make sure we call cap_bprm_set_creds() with TOMOYO, to set credentials
properly inside tomoyo_bprm_set_creds().

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 5b48191..e42be5c 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -27,6 +27,12 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
 
 static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
 {
+	int rc;
+
+	rc = cap_bprm_set_creds(bprm);
+	if (rc)
+		return rc;
+
 	/*
 	 * Do only if this function is called for the first time of an execve
 	 * operation.
-- 
cgit v0.10.2


From e76a16deb8785317a23cca7204331af053e0fb4e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 26 May 2009 17:44:56 -0700
Subject: drm/i915: Fix tiling pitch handling on 8xx.

The pitch field is an exponent on pre-965, so we were rejecting buffers
on 8xx that we shouldn't have.  915 got lucky in that the largest legal
value happened to match (8KB / 512 = 0x10), but 8xx has a smaller tile width.
Additionally, we programmed that bad value into the register on 8xx, so the
only pitch that would work correctly was 4096 (512-1023 pixels), while others
would probably give bad rendering or hangs.

Signed-off-by: Eric Anholt <eric@anholt.net>

fd.o bug #20473.

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 717b6a8..e4408da 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2128,8 +2128,10 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 		return;
 	}
 
-	pitch_val = (obj_priv->stride / 128) - 1;
-	WARN_ON(pitch_val & ~0x0000000f);
+	pitch_val = obj_priv->stride / 128;
+	pitch_val = ffs(pitch_val) - 1;
+	WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
+
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 52a0593..540dd33 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -213,7 +213,8 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	if (tiling_mode == I915_TILING_NONE)
 		return true;
 
-	if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+	if (!IS_I9XX(dev) ||
+	    (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
 		tile_width = 128;
 	else
 		tile_width = 512;
@@ -225,11 +226,18 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 		if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
 			return false;
 	} else if (IS_I9XX(dev)) {
-		if (stride / tile_width > I830_FENCE_MAX_PITCH_VAL ||
+		uint32_t pitch_val = ffs(stride / tile_width) - 1;
+
+		/* XXX: For Y tiling, FENCE_MAX_PITCH_VAL is actually 6 (8KB)
+		 * instead of 4 (2KB) on 945s.
+		 */
+		if (pitch_val > I915_FENCE_MAX_PITCH_VAL ||
 		    size > (I830_FENCE_MAX_SIZE_VAL << 20))
 			return false;
 	} else {
-		if (stride / 128 > I830_FENCE_MAX_PITCH_VAL ||
+		uint32_t pitch_val = ffs(stride / tile_width) - 1;
+
+		if (pitch_val > I830_FENCE_MAX_PITCH_VAL ||
 		    size > (I830_FENCE_MAX_SIZE_VAL << 19))
 			return false;
 	}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9668cc0..375569d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -190,7 +190,8 @@
 #define   I830_FENCE_SIZE_BITS(size)	((ffs((size) >> 19) - 1) << 8)
 #define   I830_FENCE_PITCH_SHIFT	4
 #define   I830_FENCE_REG_VALID		(1<<0)
-#define   I830_FENCE_MAX_PITCH_VAL	0x10
+#define   I915_FENCE_MAX_PITCH_VAL	0x10
+#define   I830_FENCE_MAX_PITCH_VAL	6
 #define   I830_FENCE_MAX_SIZE_VAL	(1<<8)
 
 #define   I915_FENCE_START_MASK		0x0ff00000
-- 
cgit v0.10.2


From cfa16a0de5392c54db553ec2233a7110e4b4da7a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 26 May 2009 18:46:16 -0700
Subject: drm/i915: Apply a big hammer to 865 GEM object CPU cache flushing.

On the 865, but not the 855, the clflush we do appears to not actually make
it out to the hardware all the time.  An easy way to safely reproduce was
X -retro, which would show that some of the blits involved in drawing the
lovely root weave didn't make it out to the hardware.  Those blits are 32
bytes each, and 1-2 would be missing at various points around the screen.
Other experimentation (doing more clflush, doing more AGP chipset flush,
poking at some more device registers to maybe trigger more flushing) didn't
help.  krh came up with the wbinvd as a way to successfully get all those
blits to appear.

Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4408da..e242186 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2423,6 +2423,16 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
 	if (obj_priv->pages == NULL)
 		return;
 
+	/* XXX: The 865 in particular appears to be weird in how it handles
+	 * cache flushing.  We haven't figured it out, but the
+	 * clflush+agp_chipset_flush doesn't appear to successfully get the
+	 * data visible to the PGU, while wbinvd + agp_chipset_flush does.
+	 */
+	if (IS_I865G(obj->dev)) {
+		wbinvd();
+		return;
+	}
+
 	drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
 }
 
-- 
cgit v0.10.2


From 76b0187525f024cb391c8043adf2e359b2adb988 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 26 May 2009 14:16:31 +0900
Subject: rootplug: Remove redundant initialization.

We don't need to explicitly initialize to cap_* because
it will be filled by security_fixup_ops().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/root_plug.c b/security/root_plug.c
index 40fb4f1..2f7ffa6 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -71,18 +71,6 @@ static int rootplug_bprm_check_security (struct linux_binprm *bprm)
 }
 
 static struct security_operations rootplug_security_ops = {
-	/* Use the capability functions for some of the hooks */
-	.ptrace_may_access =		cap_ptrace_may_access,
-	.ptrace_traceme =		cap_ptrace_traceme,
-	.capget =			cap_capget,
-	.capset =			cap_capset,
-	.capable =			cap_capable,
-
-	.bprm_set_creds =		cap_bprm_set_creds,
-
-	.task_fix_setuid =		cap_task_fix_setuid,
-	.task_prctl =			cap_task_prctl,
-
 	.bprm_check_security =		rootplug_bprm_check_security,
 };
 
-- 
cgit v0.10.2


From 84532a0fc3d5811dca8e3726fe4d372ea87bd7c6 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 27 May 2009 13:33:14 +1000
Subject: Revert "powerpc: Rework dma-noncoherent to use generic vmalloc layer"

This reverts commit 33f00dcedb0e22cdb156a23632814fc580fcfcf8.

    While it was a good idea to try to use the mm/vmalloc.c allocator instead
    of our own (in fact, ours is itself a dup on an old variant of the vmalloc
    one), unfortunately, the approach is terminally busted since
    dma_alloc_coherent() can be called at interrupt time or in atomic contexts
    and there's little chances we'll make the code in mm/vmalloc.c cope with\       that :-(

    Until we can get the generic code to forbid that idiocy and fix all
    drivers abusing it, we pretty much have no choice but revert to
    our custom virtual space allocator.

    There's also a problem with SMP safety since freeing such mapping
    would require an IPI which cannot be done at interrupt time.

    However, right now, I don't think we support any platform that is
    both SMP and has non-coherent DMA (don't laugh, I know such things
    do exist !) so we can sort that out later.

    Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a0d1146..3bb43ad 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -868,6 +868,31 @@ config TASK_SIZE
 	default "0x80000000" if PPC_PREP || PPC_8xx
 	default "0xc0000000"
 
+config CONSISTENT_START_BOOL
+	bool "Set custom consistent memory pool address"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the base virtual address
+	  of the consistent memory pool.  This pool of virtual
+	  memory is used to make consistent memory allocations.
+
+config CONSISTENT_START
+	hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
+	default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx)
+	default "0xff100000" if NOT_COHERENT_CACHE
+
+config CONSISTENT_SIZE_BOOL
+	bool "Set custom consistent memory pool size"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the size of the
+	  consistent memory pool.  This pool of virtual memory
+	  is used to make consistent memory allocations.
+
+config CONSISTENT_SIZE
+	hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
+	default "0x00200000" if NOT_COHERENT_CACHE
+
 config PIN_TLB
 	bool "Pinned Kernel TLBs (860 ONLY)"
 	depends on ADVANCED_OPTIONS && 8xx
diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c
index 005a28d..b7dc4c1 100644
--- a/arch/powerpc/lib/dma-noncoherent.c
+++ b/arch/powerpc/lib/dma-noncoherent.c
@@ -29,11 +29,121 @@
 #include <linux/types.h>
 #include <linux/highmem.h>
 #include <linux/dma-mapping.h>
-#include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
 
 /*
+ * This address range defaults to a value that is safe for all
+ * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
+ * can be further configured for specific applications under
+ * the "Advanced Setup" menu. -Matt
+ */
+#define CONSISTENT_BASE	(CONFIG_CONSISTENT_START)
+#define CONSISTENT_END	(CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE)
+#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
+
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
+ */
+struct ppc_vm_region {
+	struct list_head	vm_list;
+	unsigned long		vm_start;
+	unsigned long		vm_end;
+};
+
+static struct ppc_vm_region consistent_head = {
+	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start	= CONSISTENT_BASE,
+	.vm_end		= CONSISTENT_END,
+};
+
+static struct ppc_vm_region *
+ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct ppc_vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct ppc_vm_region), gfp);
+	if (!new)
+		goto out;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+ found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+ nospc:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+ out:
+	return NULL;
+}
+
+static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
+{
+	struct ppc_vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+ out:
+	return c;
+}
+
+/*
  * Allocate DMA-coherent memory space and return both the kernel remapped
  * virtual and bus address for that space.
  */
@@ -41,21 +151,21 @@ void *
 __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	struct page *page;
+	struct ppc_vm_region *c;
 	unsigned long order;
-	int i;
-	unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT;
-	unsigned int array_size = nr_pages * sizeof(struct page *);
-	struct page **pages;
-	struct page *end;
 	u64 mask = 0x00ffffff, limit; /* ISA default */
-	struct vm_struct *area;
 
-	BUG_ON(!mem_init_done);
+	if (!consistent_pte) {
+		printk(KERN_ERR "%s: not initialised\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
 	size = PAGE_ALIGN(size);
 	limit = (mask + 1) & ~mask;
-	if (limit && size >= limit) {
-		printk(KERN_WARNING "coherent allocation too big (requested "
-				"%#x mask %#Lx)\n", size, mask);
+	if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) {
+		printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
+		       size, mask);
 		return NULL;
 	}
 
@@ -68,8 +178,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 	if (!page)
 		goto no_page;
 
-	end = page + (1 << order);
-
 	/*
 	 * Invalidate any data that might be lurking in the
 	 * kernel direct-mapped region for device DMA.
@@ -80,59 +188,48 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 		flush_dcache_range(kaddr, kaddr + size);
 	}
 
-	split_page(page, order);
-
 	/*
-	 * Set the "dma handle"
+	 * Allocate a virtual address in the consistent mapping region.
 	 */
-	*handle = page_to_phys(page);
-
-	area = get_vm_area_caller(size, VM_IOREMAP,
-			__builtin_return_address(1));
-	if (!area)
-		goto out_free_pages;
-
-	if (array_size > PAGE_SIZE) {
-		pages = vmalloc(array_size);
-		area->flags |= VM_VPAGES;
-	} else {
-		pages = kmalloc(array_size, GFP_KERNEL);
-	}
-	if (!pages)
-		goto out_free_area;
+	c = ppc_vm_region_alloc(&consistent_head, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (c) {
+		unsigned long vaddr = c->vm_start;
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+		struct page *end = page + (1 << order);
 
-	area->pages = pages;
-	area->nr_pages = nr_pages;
+		split_page(page, order);
 
-	for (i = 0; i < nr_pages; i++)
-		pages[i] = page + i;
+		/*
+		 * Set the "dma handle"
+		 */
+		*handle = page_to_phys(page);
 
-	if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages))
-		goto out_unmap;
+		do {
+			BUG_ON(!pte_none(*pte));
 
-	/*
-	 * Free the otherwise unused pages.
-	 */
-	page += nr_pages;
-	while (page < end) {
-		__free_page(page);
-		page++;
+			SetPageReserved(page);
+			set_pte_at(&init_mm, vaddr,
+				   pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
+			page++;
+			pte++;
+			vaddr += PAGE_SIZE;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			__free_page(page);
+			page++;
+		}
+
+		return (void *)c->vm_start;
 	}
 
-	return area->addr;
-out_unmap:
-	vunmap(area->addr);
-	if (array_size > PAGE_SIZE)
-		vfree(pages);
-	else
-		kfree(pages);
-	goto out_free_pages;
-out_free_area:
-	free_vm_area(area);
-out_free_pages:
 	if (page)
 		__free_pages(page, order);
-no_page:
+ no_page:
 	return NULL;
 }
 EXPORT_SYMBOL(__dma_alloc_coherent);
@@ -142,12 +239,104 @@ EXPORT_SYMBOL(__dma_alloc_coherent);
  */
 void __dma_free_coherent(size_t size, void *vaddr)
 {
-	vfree(vaddr);
+	struct ppc_vm_region *c;
+	unsigned long flags, addr;
+	pte_t *ptep;
+
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
+	if (!c)
+		goto no_area;
+
+	if ((c->vm_end - c->vm_start) != size) {
+		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	addr = c->vm_start;
+	do {
+		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		unsigned long pfn;
+
+		ptep++;
+		addr += PAGE_SIZE;
 
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		printk(KERN_CRIT "%s: bad page in kernel page table\n",
+		       __func__);
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+ no_area:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
+	       __func__, vaddr);
+	dump_stack();
 }
 EXPORT_SYMBOL(__dma_free_coherent);
 
 /*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int ret = 0;
+
+	do {
+		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
+		pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE);
+		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE);
+		if (!pmd) {
+			printk(KERN_ERR "%s: no pmd tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
+		if (!pte) {
+			printk(KERN_ERR "%s: no pte tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		consistent_pte = pte;
+	} while (0);
+
+	return ret;
+}
+
+core_initcall(dma_alloc_init);
+
+/*
  * make an area consistent.
  */
 void __dma_sync(void *vaddr, size_t size, int direction)
-- 
cgit v0.10.2


From 87ad57bacb25c3f24c54f142ef445f68277705f0 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 19 May 2009 16:09:42 +0800
Subject: cpuidle: makes AMD C1E work in acpi_idle

When AMD C1E is enabled, local APIC timer will stop even in C1.
This patch uses broadcast IPI to replace local APIC timer in C1.

http://bugzilla.kernel.org/show_bug.cgi?id=13233

[ impact: avoid boot hang in AMD CPU with C1E enabled ]

Tested-by: Dmitry Lyzhyn <thisistempbox@yahoo.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 72069ba..6b7bcc7 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -148,6 +148,9 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr,
 	if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
 		return;
 
+	if (boot_cpu_has(X86_FEATURE_AMDC1E))
+		type = ACPI_STATE_C1;
+
 	/*
 	 * Check, if one of the previous states already marked the lapic
 	 * unstable
@@ -611,6 +614,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
 		switch (cx->type) {
 		case ACPI_STATE_C1:
 			cx->valid = 1;
+			acpi_timer_check_state(i, pr, cx);
 			break;
 
 		case ACPI_STATE_C2:
@@ -835,6 +839,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 		return 0;
 	}
 
+	acpi_state_timer_broadcast(pr, cx, 1);
 	kt1 = ktime_get_real();
 	acpi_idle_do_entry(cx);
 	kt2 = ktime_get_real();
@@ -842,6 +847,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 
 	local_irq_enable();
 	cx->usage++;
+	acpi_state_timer_broadcast(pr, cx, 0);
 
 	return idle_time;
 }
-- 
cgit v0.10.2


From 7d60e8ab0d5507229dfbdf456501cc378610fa01 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 19 May 2009 16:09:54 +0800
Subject: cpuidle: fix AMD C1E suspend hang

When AMD C1E is enabled, local APIC timer will stop even in C1. To avoid
suspend/resume hang, this patch removes C1 and replace it with a cpu_relax() in
suspend/resume path. This hasn't any impact in runtime path.

http://bugzilla.kernel.org/show_bug.cgi?id=13233

[ impact: avoid suspend/resume hang in AMD CPU with C1E enabled ]

Tested-by: Dmitry Lyzhyn <thisistempbox@yahoo.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 6b7bcc7..10a2d91 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -834,8 +834,8 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
 
 	/* Do not access any ACPI IO ports in suspend path */
 	if (acpi_idle_suspend) {
-		acpi_safe_halt();
 		local_irq_enable();
+		cpu_relax();
 		return 0;
 	}
 
-- 
cgit v0.10.2


From 7a1450fdf4c69961f3926352fd8bc4ea19676756 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 04:55:38 -0400
Subject: Blackfin: hook up preadv/pwritev syscalls

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index 1e57b63..cf5066d 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -378,8 +378,10 @@
 #define __NR_dup3		363
 #define __NR_pipe2		364
 #define __NR_inotify_init1	365
+#define __NR_preadv		366
+#define __NR_pwritev		367
 
-#define __NR_syscall		366
+#define __NR_syscall		368
 #define NR_syscalls		__NR_syscall
 
 /* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 21e65a3..a063a43 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1581,6 +1581,8 @@ ENTRY(_sys_call_table)
 	.long _sys_dup3
 	.long _sys_pipe2
 	.long _sys_inotify_init1	/* 365 */
+	.long _sys_preadv
+	.long _sys_pwritev
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
-- 
cgit v0.10.2


From 6c83429a1c32c914dfb81939cc2ddece97e48294 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sun, 24 May 2009 02:13:15 -0400
Subject: MAINTAINERS: update Blackfin items

With Bryan Wu having moved on to another job, push the slack onto some
other ADI lackeys.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 77cbfb1..7dd3475 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1132,16 +1132,16 @@ F:	fs/bfs/
 F:	include/linux/bfs_fs.h
 
 BLACKFIN ARCHITECTURE
-P:	Bryan Wu
-M:	cooloney@kernel.org
+P:	Mike Frysinger
+M:	vapier@gentoo.org
 L:	uclinux-dist-devel@blackfin.uclinux.org
 W:	http://blackfin.uclinux.org
 S:	Supported
 F:	arch/blackfin/
 
 BLACKFIN EMAC DRIVER
-P:	Bryan Wu
-M:	cooloney@kernel.org
+P:	Michael Hennerich
+M:	michael.hennerich@analog.com
 L:	uclinux-dist-devel@blackfin.uclinux.org (subscribers-only)
 W:	http://blackfin.uclinux.org
 S:	Supported
-- 
cgit v0.10.2


From 49afa60948f859e71d68a74c1af6ccd7b5b94d82 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 18 May 2009 04:33:07 -0400
Subject: MAINTAINERS: drop (subscribers-only) markings on Blackfin lists

All of the Blackfin lists are transparently moderated for non-subscribers.
i.e. there are no annoying notices and people get whitelisted after first
their posting.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 7dd3475..5ee166e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1142,7 +1142,7 @@ F:	arch/blackfin/
 BLACKFIN EMAC DRIVER
 P:	Michael Hennerich
 M:	michael.hennerich@analog.com
-L:	uclinux-dist-devel@blackfin.uclinux.org (subscribers-only)
+L:	uclinux-dist-devel@blackfin.uclinux.org
 W:	http://blackfin.uclinux.org
 S:	Supported
 F:	drivers/net/bfin_mac.*
@@ -1150,7 +1150,7 @@ F:	drivers/net/bfin_mac.*
 BLACKFIN RTC DRIVER
 P:	Mike Frysinger
 M:	vapier.adi@gmail.com
-L:	uclinux-dist-devel@blackfin.uclinux.org (subscribers-only)
+L:	uclinux-dist-devel@blackfin.uclinux.org
 W:	http://blackfin.uclinux.org
 S:	Supported
 F:	drivers/rtc/rtc-bfin.c
@@ -1158,7 +1158,7 @@ F:	drivers/rtc/rtc-bfin.c
 BLACKFIN SERIAL DRIVER
 P:	Sonic Zhang
 M:	sonic.zhang@analog.com
-L:	uclinux-dist-devel@blackfin.uclinux.org (subscribers-only)
+L:	uclinux-dist-devel@blackfin.uclinux.org
 W:	http://blackfin.uclinux.org
 S:	Supported
 F:	drivers/serial/bfin_5xx.c
@@ -1166,7 +1166,7 @@ F:	drivers/serial/bfin_5xx.c
 BLACKFIN WATCHDOG DRIVER
 P:	Mike Frysinger
 M:	vapier.adi@gmail.com
-L:	uclinux-dist-devel@blackfin.uclinux.org (subscribers-only)
+L:	uclinux-dist-devel@blackfin.uclinux.org
 W:	http://blackfin.uclinux.org
 S:	Supported
 F:	drivers/watchdog/bfin_wdt.c
@@ -1174,7 +1174,7 @@ F:	drivers/watchdog/bfin_wdt.c
 BLACKFIN I2C TWI DRIVER
 P:	Sonic Zhang
 M:	sonic.zhang@analog.com
-L:	uclinux-dist-devel@blackfin.uclinux.org (subscribers-only)
+L:	uclinux-dist-devel@blackfin.uclinux.org
 W:	http://blackfin.uclinux.org/
 S:	Supported
 F:	drivers/i2c/busses/i2c-bfin-twi.c
-- 
cgit v0.10.2


From 6b50520b2fd9bf521f9c947b5f6999bad273a51d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 19 May 2009 10:03:22 -0400
Subject: Blackfin: ignore generated vmlinux.lds

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/.gitignore b/arch/blackfin/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/blackfin/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
-- 
cgit v0.10.2


From 2ec10ea91bf3688013b00638f29df4f8f6b5c18b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 20 May 2009 19:45:39 -0400
Subject: Blackfin: drop unneeded asm/.gitignore

We don't create a include/asm/mach/ symlink anymore, so we don't need the
.gitignore for it.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/.gitignore b/arch/blackfin/include/asm/.gitignore
deleted file mode 100644
index 7858564..0000000
--- a/arch/blackfin/include/asm/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-+mach
-- 
cgit v0.10.2


From add8a5050a52f1bd1be6b97be86fdd1cfbea2d1d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 05:03:52 -0400
Subject: Blackfin: fix strncmp.o build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix some more fallout of the string changes:

  CC      arch/blackfin/lib/strncmp.o
In file included from include/linux/bitmap.h:9,
                 from include/linux/nodemask.h:90,
                 from include/linux/mmzone.h:17,
                 from include/linux/gfp.h:5,
                 from include/linux/kmod.h:23,
                 from include/linux/module.h:14,
                 from arch/blackfin/lib/strncmp.c:14:
include/linux/string.h: In function ‘strstarts’:
include/linux/string.h:132: error: implicit declaration of function ‘strncmp’
make[1]: *** [arch/blackfin/lib/strncmp.o] Error 1

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
CC: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/blackfin/lib/strncmp.c b/arch/blackfin/lib/strncmp.c
index 2aaae78..46518b1 100644
--- a/arch/blackfin/lib/strncmp.c
+++ b/arch/blackfin/lib/strncmp.c
@@ -8,9 +8,8 @@
 
 #define strncmp __inline_strncmp
 #include <asm/string.h>
-#undef strncmp
-
 #include <linux/module.h>
+#undef strncmp
 
 int strncmp(const char *cs, const char *ct, size_t count)
 {
-- 
cgit v0.10.2


From d8d1656ee15d3085e0085a87e70f9093a0a102c5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 26 May 2009 19:20:57 -0300
Subject: perf report: Use hex2long instead of sscanf

Before:

[acme@emilia ~]$ perf record -o perf_report.perf perf stat perf report > /dev/null

 Performance counter stats for 'perf':

     245.414985  task clock ticks     (msecs)
              6  context switches     (events)
              6  CPU migrations       (events)
           2108  pagefaults           (events)
       37493013  CPU cycles           (events)  (scaled from 67.04%)
       13576789  instructions         (events)  (scaled from 66.76%)
          57931  cache references     (events)  (scaled from 21.96%)
          12263  cache misses         (events)  (scaled from 21.98%)

 Wall-clock time elapsed:   246.575587 msecs

[acme@emilia ~]$ perf report -i perf_report.perf | head
12.15          perf [.] 0x000000000005432a /lib64/libc-2.5.so: _IO_vfscanf_internal
 9.38          perf [k] 0xffffffff8101b1d2 intel_pmu_enable_all
 8.53          perf [.] 0x00000000000056b8 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__insert_symbol
 6.61          perf [.] 0x00000000000057cb /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__find_symbol
 5.33          perf [k] 0xffffffff811ce082 number
 4.69          perf [.] 0x0000000000034829 /lib64/libc-2.5.so: ____strtoull_l_internal
 4.48          perf [.] 0x0000000000006505 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: thread__symbol_incnew
 3.41          perf [.] 0x000000000000fce6 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: rb_insert_color
 3.20          perf [k] 0xffffffff811cfc01 vsnprintf
 2.99          perf [k] 0xffffffff811ce5e8 format_decode

After:

[acme@emilia ~]$ perf record -o perf_report.perf perf stat perf report > /dev/null

 Performance counter stats for 'perf':

     218.186805  task clock ticks     (msecs)
              4  context switches     (events)
              7  CPU migrations       (events)
           2133  pagefaults           (events)
       32735365  CPU cycles           (events)  (scaled from 67.04%)
       11952309  instructions         (events)  (scaled from 66.26%)
          50314  cache references     (events)  (scaled from 21.96%)
          13228  cache misses         (events)  (scaled from 21.98%)

 Wall-clock time elapsed:   218.810451 msecs

[acme@emilia ~]$ perf report -i perf_report.perf | head
10.68          perf [.] 0x000000000000578d /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__find_symbol
 9.62          perf [.] 0x00000000000065f7 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: thread__symbol_incnew
 9.40          perf [.] 0x00000000000056b4 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__insert_symbol
 9.19          perf [k] 0xffffffff8101b1d2 intel_pmu_enable_all
 5.13          perf [.] 0x0000000000005ec7 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: hex2long
 4.49          perf [k] 0xffffffff81083808 kallsyms_expand_symbol
 3.85          perf [k] 0xffffffff811ce2c1 number
 3.63          perf [.] 0x0000000000005e81 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: hex
 2.99          perf [.] 0x000000000000fd5b /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: rb_insert_color
 2.99          perf [k] 0xffffffff811cf251 string
[acme@emilia ~]$

[ Impact: optimization ]

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090526222057.GI4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index a4c6ffa..c517483 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -348,6 +348,39 @@ void dsos__fprintf(FILE *fp)
 		dso__fprintf(pos, fp);
 }
 
+static int hex(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2long(char *ptr, unsigned long *long_val)
+{
+	const char *p = ptr;
+	*long_val = 0;
+
+	while (*p) {
+		const int hex_val = hex(*p);
+
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		p++;
+	}
+
+	return p - ptr;
+}
+
 static int load_kallsyms(void)
 {
 	kernel_dso = dso__new("[kernel]");
@@ -363,26 +396,30 @@ static int load_kallsyms(void)
 	size_t n;
 
 	while (!feof(file)) {
-		unsigned long long start;
-		char c, symbf[4096];
-
-		if (getline(&line, &n, file) < 0)
+		unsigned long start;
+		int line_len = getline(&line, &n, file);
+		if (line_len < 0)
 			break;
 
 		if (!line)
 			goto out_delete_dso;
 
-		if (sscanf(line, "%llx %c %s", &start, &c, symbf) == 3) {
-			/*
-			 * Well fix up the end later, when we have all sorted.
-			 */
-			struct symbol *sym = symbol__new(start, 0xdead, symbf);
+		line[--line_len] = '\0'; /* \n */
+
+		int len = hex2long(line, &start);
+		
+		len += 3; /* ' t ' */
+		if (len >= line_len)
+			continue;
+		/*
+		 * Well fix up the end later, when we have all sorted.
+		 */
+		struct symbol *sym = symbol__new(start, 0xdead, line + len);
 
-			if (sym == NULL)
-				goto out_delete_dso;
+		if (sym == NULL)
+			goto out_delete_dso;
 
-			dso__insert_symbol(kernel_dso, sym);
-		}
+		dso__insert_symbol(kernel_dso, sym);
 	}
 
 	/*
-- 
cgit v0.10.2


From 03f6316d32738ec5eda2e6f628c12d1c01e61a87 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 26 May 2009 19:21:55 -0300
Subject: perf report: Only load text symbols from kallsyms

Just like we do for userspace when reading the symtab, reducing the
number of entries we insert on the symbols rbtree.

Before:

[acme@emilia ~]$ rm -f perf_report.perf ; perf record -o perf_report.perf perf stat perf report > /dev/null

 Performance counter stats for 'perf':

     218.138382  task clock ticks     (msecs)
              4  context switches     (events)
              8  CPU migrations       (events)
           2136  pagefaults           (events)
       32746212  CPU cycles           (events)  (scaled from 67.04%)
       11961102  instructions         (events)  (scaled from 66.19%)
          49841  cache references     (events)  (scaled from 21.96%)
          13777  cache misses         (events)  (scaled from 21.98%)

 Wall-clock time elapsed:   218.702477 msecs

[acme@emilia ~]$ perf report -i perf_report.perf | head
11.06          perf [.] 0x00000000000057cb /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__find_symbol
 9.15          perf [.] 0x00000000000056a0 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__insert_symbol
 8.72          perf [k] 0xffffffff8101b1d2 intel_pmu_enable_all
 8.51          perf [.] 0x0000000000006672 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: thread__symbol_incnew
 3.83          perf [k] 0xffffffff811cfc5a vsnprintf
 3.40          perf [.] 0x0000000000005e33 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: hex
 3.40          perf [.] 0x0000000000005ec7 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: hex2long
 3.19          perf [k] 0xffffffff811ce1c1 number
 2.77          perf [.] 0x0000000000006869 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: threads__findnew
 2.77          perf [.] 0x000000000000fde3 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: rb_insert_color
[acme@emilia ~]$

After:

acme@emilia ~]$ rm -f perf_report.perf ; perf record -o perf_report.perf perf stat perf report > /dev/null

 Performance counter stats for 'perf':

     190.228511  task clock ticks     (msecs)
              4  context switches     (events)
              7  CPU migrations       (events)
           1625  pagefaults           (events)
       29578745  CPU cycles           (events)  (scaled from 66.92%)
       10516914  instructions         (events)  (scaled from 66.47%)
          44015  cache references     (events)  (scaled from 22.04%)
           8248  cache misses         (events)  (scaled from 22.07%)

 Wall-clock time elapsed:   190.816096 msecs

[acme@emilia ~]$ perf report -i perf_report.perf | head
15.99          perf [.] 0x00000000000057a9 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__find_symbol
10.87          perf [.] 0x000000000000674d /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: thread__symbol_incnew
 8.74          perf [k] 0xffffffff8101b1d2 intel_pmu_enable_all
 5.54          perf [.] 0x0000000000005e42 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: hex
 4.48          perf [.] 0x0000000000005ebe /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: hex2long
 4.48          perf [k] 0xffffffff811cfba0 vsnprintf
 3.84          perf [.] 0x00000000000056b4 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__insert_symbol
 3.62          perf [.] 0x00000000000068d0 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: threads__findnew
 3.20          perf [k] 0xffffffff811ce0b3 number
 2.56          perf [.] 0x0000000000006d78 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: __cmd_report
[acme@emilia ~]$

[ Impact: optimization ]

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090526222155.GJ4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index c517483..a55f15d 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -3,6 +3,7 @@
 #include <libelf.h>
 #include <gelf.h>
 #include <elf.h>
+#include <ctype.h>
 
 #include "util/list.h"
 #include "util/rbtree.h"
@@ -408,13 +409,20 @@ static int load_kallsyms(void)
 
 		int len = hex2long(line, &start);
 		
-		len += 3; /* ' t ' */
-		if (len >= line_len)
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		char symbol_type = line[len];
+		/*
+		 * We're interested only in code ('T'ext)
+		 */
+		if (toupper(symbol_type) != 'T')
 			continue;
 		/*
 		 * Well fix up the end later, when we have all sorted.
 		 */
-		struct symbol *sym = symbol__new(start, 0xdead, line + len);
+		struct symbol *sym = symbol__new(start, 0xdead, line + len + 2);
 
 		if (sym == NULL)
 			goto out_delete_dso;
-- 
cgit v0.10.2


From af83632f98aefd1ae4d8ca3c7c285ccf6a7d3956 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 08:38:48 +0200
Subject: perf report: Only load text symbols from kallsyms, fix

- allow 'W' symbols too
 - Convert initializations to C99 style
 - whitespace cleanups

Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090526222155.GJ4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index a55f15d..ed3da9d 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -384,21 +384,26 @@ int hex2long(char *ptr, unsigned long *long_val)
 
 static int load_kallsyms(void)
 {
+	struct rb_node *nd, *prevnd;
+	char *line = NULL;
+	FILE *file;
+	size_t n;
+
 	kernel_dso = dso__new("[kernel]");
 	if (kernel_dso == NULL)
 		return -1;
 
-	FILE *file = fopen("/proc/kallsyms", "r");
-
+	file = fopen("/proc/kallsyms", "r");
 	if (file == NULL)
 		goto out_delete_dso;
 
-	char *line = NULL;
-	size_t n;
-
 	while (!feof(file)) {
 		unsigned long start;
-		int line_len = getline(&line, &n, file);
+		struct symbol *sym;
+		int line_len, len;
+		char symbol_type;
+
+		line_len = getline(&line, &n, file);
 		if (line_len < 0)
 			break;
 
@@ -407,22 +412,22 @@ static int load_kallsyms(void)
 
 		line[--line_len] = '\0'; /* \n */
 
-		int len = hex2long(line, &start);
-		
+		len = hex2long(line, &start);
+
 		len++;
 		if (len + 2 >= line_len)
 			continue;
 
-		char symbol_type = line[len];
+		symbol_type = toupper(line[len]);
 		/*
 		 * We're interested only in code ('T'ext)
 		 */
-		if (toupper(symbol_type) != 'T')
+		if (symbol_type != 'T' && symbol_type != 'W')
 			continue;
 		/*
 		 * Well fix up the end later, when we have all sorted.
 		 */
-		struct symbol *sym = symbol__new(start, 0xdead, line + len + 2);
+		sym = symbol__new(start, 0xdead, line + len + 2);
 
 		if (sym == NULL)
 			goto out_delete_dso;
@@ -434,7 +439,7 @@ static int load_kallsyms(void)
 	 * Now that we have all sorted out, just set the ->end of all
 	 * symbols
 	 */
-	struct rb_node *nd, *prevnd = rb_first(&kernel_dso->syms);
+	prevnd = rb_first(&kernel_dso->syms);
 
 	if (prevnd == NULL)
 		goto out_delete_line;
@@ -450,6 +455,7 @@ static int load_kallsyms(void)
 	dsos__add(kernel_dso);
 	free(line);
 	fclose(file);
+
 	return 0;
 
 out_delete_line:
-- 
cgit v0.10.2


From 815e777f913ed54ddb449d2854015c65b4ecbfe3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 26 May 2009 19:46:14 -0300
Subject: perf report: Show the IP only in --verbose mode

perf: report should show the IP only in --verbose mode

[acme@emilia ~]$ perf report | head
 4.95          find [k] _spin_lock
 2.19          find [k] ext3fs_dirhash	[ext3]
 1.87          find [k] __rcu_read_lock
 1.86          find [k] _atomic_dec_and_lock
 1.86          find [.] /lib64/libc-2.5.so: __GI_strlen
 1.85          find [k] __kmalloc
 1.62          find [.] /lib64/libc-2.5.so: vfprintf
 1.59          find [k] __rcu_read_unlock
 1.55          find [k] __d_lookup

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090526224614.GK4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index ed3da9d..2d65d9c 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -22,6 +22,7 @@ static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int		dump_trace = 0;
+static int 		verbose;
 
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
@@ -551,9 +552,12 @@ symhist__fprintf(struct symhist *self, uint64_t total_samples, FILE *fp)
 	else
 		ret = fprintf(fp, "%12d", self->count);
 
-	ret += fprintf(fp, "%14s [%c] %#018llx ",
+	ret += fprintf(fp, "%14s [%c] ",
 		       thread__name(self->thread, bf, sizeof(bf)),
-		       self->level, (unsigned long long)self->ip);
+		       self->level);
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx ", (unsigned long long)self->ip);
 
 	if (self->level != '.')
 		ret += fprintf(fp, "%s\n",
@@ -974,6 +978,8 @@ static const char * const report_usage[] = {
 static const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_END()
-- 
cgit v0.10.2


From 16f762a2ac5ecf8a11f6f0332e46cc3459220da5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 09:10:38 +0200
Subject: perf_counter tools: Introduce stricter C code checking

Tighten up our C code requirements:

 - disallow warnings
 - disallow declarations-mixed-with-statements
 - require proper prototypes
 - require C99 (with gcc extensions)

Fix up a ton of problems these measures unearth:

 - unused functions
 - needlessly global functions
 - missing prototypes
 - code mixed with declarations

Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090526222155.GJ4424@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 10c13a6..efb0589 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -159,7 +159,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
-CFLAGS = -ggdb3 -Wall
+CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement
 LDFLAGS = -lpthread -lrt -lelf
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
index 6616de0..d2bd317 100644
--- a/Documentation/perf_counter/builtin-help.c
+++ b/Documentation/perf_counter/builtin-help.c
@@ -399,7 +399,7 @@ static void get_html_page_path(struct strbuf *page_path, const char *page)
  * HTML.
  */
 #ifndef open_html
-void open_html(const char *path)
+static void open_html(const char *path)
 {
 	execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
 }
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index ec2b787..68abfdf 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -1,6 +1,7 @@
 
 
 #include "perf.h"
+#include "builtin.h"
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
@@ -144,26 +145,32 @@ static int nr_poll;
 static int nr_cpu;
 
 struct mmap_event {
-	struct perf_event_header header;
-	__u32 pid, tid;
-	__u64 start;
-	__u64 len;
-	__u64 pgoff;
-	char filename[PATH_MAX];
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	__u64				start;
+	__u64				len;
+	__u64				pgoff;
+	char				filename[PATH_MAX];
 };
+
 struct comm_event {
-	struct perf_event_header header;
-	__u32 pid,tid;
-	char comm[16];
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	char				comm[16];
 };
 
 static pid_t pid_synthesize_comm_event(pid_t pid)
 {
+	struct comm_event comm_ev;
 	char filename[PATH_MAX];
+	pid_t spid, ppid;
 	char bf[BUFSIZ];
-	struct comm_event comm_ev;
+	int fd, nr, ret;
+	char comm[18];
 	size_t size;
-	int fd;
+	char state;
 
 	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
 
@@ -178,12 +185,8 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 	}
 	close(fd);
 
-	pid_t spid, ppid;
-	char state;
-	char comm[18];
-
 	memset(&comm_ev, 0, sizeof(comm_ev));
-        int nr = sscanf(bf, "%d %s %c %d %d ",
+        nr = sscanf(bf, "%d %s %c %d %d ",
 			&spid, comm, &state, &ppid, &comm_ev.pid);
 	if (nr != 5) {
 		fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -198,7 +201,8 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 	memcpy(comm_ev.comm, comm + 1, size);
 	size = ALIGN(size, sizeof(uint64_t));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
-	int ret = write(output, &comm_ev, comm_ev.header.size);
+
+	ret = write(output, &comm_ev, comm_ev.header.size);
 	if (ret < 0) {
 		perror("failed to write");
 		exit(-1);
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 2d65d9c..7f1255d 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1,4 +1,5 @@
 #include "util/util.h"
+#include "builtin.h"
 
 #include <libelf.h>
 #include <gelf.h>
@@ -22,7 +23,7 @@ static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int		dump_trace = 0;
-static int 		verbose;
+static int		verbose;
 
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
@@ -60,10 +61,10 @@ typedef union event_union {
 } event_t;
 
 struct symbol {
-	struct rb_node rb_node;
-	uint64_t       start;
-	uint64_t       end;
-	char	       name[0];
+	struct rb_node		rb_node;
+	__u64			start;
+	__u64			end;
+	char			name[0];
 };
 
 static struct symbol *symbol__new(uint64_t start, uint64_t len, const char *name)
@@ -86,7 +87,7 @@ static void symbol__delete(struct symbol *self)
 
 static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 {
-	return fprintf(fp, " %lx-%lx %s\n",
+	return fprintf(fp, " %llx-%llx %s\n",
 		       self->start, self->end, self->name);
 }
 
@@ -147,10 +148,12 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
 
 static struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
 {
+	struct rb_node *n;
+
 	if (self == NULL)
 		return NULL;
 
-	struct rb_node *n = self->syms.rb_node;
+	n = self->syms.rb_node;
 
 	while (n) {
 		struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -221,33 +224,42 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 
 static int dso__load(struct dso *self)
 {
-	int fd = open(self->name, O_RDONLY), err = -1;
+	Elf_Data *symstrs;
+	uint32_t nr_syms;
+	int fd, err = -1;
+	uint32_t index;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *syms;
+	GElf_Sym sym;
+	Elf_Scn *sec;
+	Elf *elf;
 
+
+	fd = open(self->name, O_RDONLY);
 	if (fd == -1)
 		return -1;
 
-	Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
 	if (elf == NULL) {
 		fprintf(stderr, "%s: cannot read %s ELF file.\n",
 			__func__, self->name);
 		goto out_close;
 	}
 
-	GElf_Ehdr ehdr;
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
 		fprintf(stderr, "%s: cannot get elf header.\n", __func__);
 		goto out_elf_end;
 	}
 
-	GElf_Shdr shdr;
-	Elf_Scn *sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
+	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
 	if (sec == NULL)
 		sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
 
 	if (sec == NULL)
 		goto out_elf_end;
 
-	Elf_Data *syms = elf_getdata(sec, NULL);
+	syms = elf_getdata(sec, NULL);
 	if (syms == NULL)
 		goto out_elf_end;
 
@@ -255,14 +267,12 @@ static int dso__load(struct dso *self)
 	if (sec == NULL)
 		goto out_elf_end;
 
-	Elf_Data *symstrs = elf_getdata(sec, NULL);
+	symstrs = elf_getdata(sec, NULL);
 	if (symstrs == NULL)
 		goto out_elf_end;
 
-	const uint32_t nr_syms = shdr.sh_size / shdr.sh_entsize;
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
-	GElf_Sym sym;
-	uint32_t index;
 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
 		struct symbol *f;
 
@@ -342,7 +352,7 @@ out_delete_dso:
 	return NULL;
 }
 
-void dsos__fprintf(FILE *fp)
+static void dsos__fprintf(FILE *fp)
 {
 	struct dso *pos;
 
@@ -365,7 +375,7 @@ static int hex(char ch)
  * While we find nice hex chars, build a long_val.
  * Return number of chars processed.
  */
-int hex2long(char *ptr, unsigned long *long_val)
+static int hex2long(char *ptr, unsigned long *long_val)
 {
 	const char *p = ptr;
 	*long_val = 0;
@@ -493,12 +503,6 @@ out_delete:
 	return NULL;
 }
 
-static size_t map__fprintf(struct map *self, FILE *fp)
-{
-	return fprintf(fp, " %lx-%lx %lx %s\n",
-		       self->start, self->end, self->pgoff, self->dso->name);
-}
-
 struct thread;
 
 static const char *thread__name(struct thread *self, char *bf, size_t size);
@@ -531,11 +535,6 @@ static struct symhist *symhist__new(struct symbol *sym, uint64_t ip,
 	return self;
 }
 
-void symhist__delete(struct symhist *self)
-{
-	free(self);
-}
-
 static void symhist__inc(struct symhist *self)
 {
 	++self->count;
@@ -608,6 +607,8 @@ static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
 	struct symhist *sh;
 
 	while (*p != NULL) {
+		uint64_t start;
+
 		parent = *p;
 		sh = rb_entry(parent, struct symhist, rb_node);
 
@@ -617,7 +618,7 @@ static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
 		}
 
 		/* Handle unresolved symbols too */
-		const uint64_t start = !sh->sym ? sh->ip : sh->sym->start;
+		start = !sh->sym ? sh->ip : sh->sym->start;
 
 		if (ip < start)
 			p = &(*p)->rb_left;
@@ -639,17 +640,6 @@ static int thread__set_comm(struct thread *self, const char *comm)
 	return self->comm ? 0 : -ENOMEM;
 }
 
-size_t thread__maps_fprintf(struct thread *self, FILE *fp)
-{
-	struct map *pos;
-	size_t ret = 0;
-
-	list_for_each_entry(pos, &self->maps, node)
-		ret += map__fprintf(pos, fp);
-
-	return ret;
-}
-
 static size_t thread__fprintf(struct thread *self, FILE *fp)
 {
 	int ret = fprintf(fp, "thread: %d %s\n", self->pid, self->comm);
@@ -657,13 +647,14 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
 
 	for (nd = rb_first(&self->symhists); nd; nd = rb_next(nd)) {
 		struct symhist *pos = rb_entry(nd, struct symhist, rb_node);
+
 		ret += symhist__fprintf(pos, 0, fp);
 	}
 
 	return ret;
 }
 
-static struct rb_root threads = RB_ROOT;
+static struct rb_root threads;
 
 static struct thread *threads__findnew(pid_t pid)
 {
@@ -699,11 +690,11 @@ static void thread__insert_map(struct thread *self, struct map *map)
 
 static struct map *thread__find_map(struct thread *self, uint64_t ip)
 {
+	struct map *pos;
+
 	if (self == NULL)
 		return NULL;
 
-	struct map *pos;
-
 	list_for_each_entry(pos, &self->maps, node)
 		if (ip >= pos->start && ip <= pos->end)
 			return pos;
@@ -711,7 +702,7 @@ static struct map *thread__find_map(struct thread *self, uint64_t ip)
 	return NULL;
 }
 
-void threads__fprintf(FILE *fp)
+static void threads__fprintf(FILE *fp)
 {
 	struct rb_node *nd;
 	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
@@ -720,7 +711,7 @@ void threads__fprintf(FILE *fp)
 	}
 }
 
-static struct rb_root global_symhists = RB_ROOT;
+static struct rb_root global_symhists;
 
 static void threads__insert_symhist(struct symhist *sh)
 {
@@ -852,7 +843,7 @@ more:
 				(void *)(long)(event->header.size),
 				event->header.misc,
 				event->ip.pid,
-				(void *)event->ip.ip);
+				(void *)(long)ip);
 		}
 
 		if (thread == NULL) {
@@ -866,9 +857,12 @@ more:
 			level = 'k';
 			dso = kernel_dso;
 		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+			struct map *map;
+
 			show = SHOW_USER;
 			level = '.';
-			struct map *map = thread__find_map(thread, ip);
+
+			map = thread__find_map(thread, ip);
 			if (map != NULL) {
 				dso = map->dso;
 				ip -= map->start + map->pgoff;
@@ -896,9 +890,9 @@ more:
 			fprintf(stderr, "%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
 				(void *)(offset + head),
 				(void *)(long)(event->header.size),
-				(void *)event->mmap.start,
-				(void *)event->mmap.len,
-				(void *)event->mmap.pgoff,
+				(void *)(long)event->mmap.start,
+				(void *)(long)event->mmap.len,
+				(void *)(long)event->mmap.pgoff,
 				event->mmap.filename);
 		}
 		if (thread == NULL || map == NULL) {
@@ -964,6 +958,11 @@ done:
 		return 0;
 	}
 
+	if (verbose >= 2) {
+		dsos__fprintf(stdout);
+		threads__fprintf(stdout);
+	}
+
 	threads__sort_symhists();
 	threads__symhists_fprintf(total, stdout);
 
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index e7cb941..ce661e2 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -30,6 +30,7 @@
  */
 
 #include "perf.h"
+#include "builtin.h"
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
@@ -108,7 +109,7 @@ static void create_perfstat_counter(int counter)
 	}
 }
 
-int do_perfstat(int argc, const char **argv)
+static int do_perfstat(int argc, const char **argv)
 {
 	unsigned long long t0, t1;
 	int counter;
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 6b1c66f..a890872 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -42,8 +42,8 @@
   * Released under the GPL v2. (and only v2, not any later version)
   */
 
-
 #include "perf.h"
+#include "builtin.h"
 #include "util/util.h"
 #include "util/util.h"
 #include "util/parse-options.h"
diff --git a/Documentation/perf_counter/util/abspath.c b/Documentation/perf_counter/util/abspath.c
index 649f34f..61d33b8 100644
--- a/Documentation/perf_counter/util/abspath.c
+++ b/Documentation/perf_counter/util/abspath.c
@@ -5,7 +5,7 @@
  * symlink to a directory, we do not want to say it is a directory when
  * dealing with tracked content in the working tree.
  */
-int is_directory(const char *path)
+static int is_directory(const char *path)
 {
 	struct stat st;
 	return (!stat(path, &st) && S_ISDIR(st.st_mode));
diff --git a/Documentation/perf_counter/util/cache.h b/Documentation/perf_counter/util/cache.h
index 7108051..393d614 100644
--- a/Documentation/perf_counter/util/cache.h
+++ b/Documentation/perf_counter/util/cache.h
@@ -104,6 +104,8 @@ char *strip_path_suffix(const char *path, const char *suffix);
 
 extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
 extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+extern int perf_mkstemp(char *path, size_t len, const char *template);
 
 extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
 	__attribute__((format (printf, 3, 4)));
diff --git a/Documentation/perf_counter/util/util.h b/Documentation/perf_counter/util/util.h
index 36e40c3..76590a1 100644
--- a/Documentation/perf_counter/util/util.h
+++ b/Documentation/perf_counter/util/util.h
@@ -309,6 +309,8 @@ extern ssize_t xread(int fd, void *buf, size_t len);
 extern ssize_t xwrite(int fd, const void *buf, size_t len);
 extern int xdup(int fd);
 extern FILE *xfdopen(int fd, const char *mode);
+extern int xmkstemp(char *template);
+
 static inline size_t xsize_t(off_t len)
 {
 	return (size_t)len;
-- 
cgit v0.10.2


From b16e7766d6436835f473ba823ad04fbdfe5e9cbd Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 27 May 2009 13:36:10 +1000
Subject: powerpc: Move dma-noncoherent.c from arch/powerpc/lib to
 arch/powerpc/mm

(pre-requisite to make the next patches more palatable)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8db3527..29b742b 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -18,7 +18,6 @@ obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o
 obj-$(CONFIG_XMON)	+= sstep.o
 obj-$(CONFIG_KPROBES)	+= sstep.o
-obj-$(CONFIG_NOT_COHERENT_CACHE)	+= dma-noncoherent.o
 
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c
deleted file mode 100644
index b7dc4c1..0000000
--- a/arch/powerpc/lib/dma-noncoherent.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- *  PowerPC version derived from arch/arm/mm/consistent.c
- *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
- *
- *  Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators.  Used for DMA devices that want to
- * share uncached memory with the processor core.  The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- *						-- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/highmem.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/tlbflush.h>
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
- * can be further configured for specific applications under
- * the "Advanced Setup" menu. -Matt
- */
-#define CONSISTENT_BASE	(CONFIG_CONSISTENT_START)
-#define CONSISTENT_END	(CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE)
-#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static pte_t *consistent_pte;
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- *  struct vm_struct {
- *    struct vm_region	region;
- *    unsigned long	flags;
- *    struct page	**pages;
- *    unsigned int	nr_pages;
- *    unsigned long	phys_addr;
- *  };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- *  struct vm_region vmalloc_head = {
- *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
- *	.vm_start	= VMALLOC_START,
- *	.vm_end		= VMALLOC_END,
- *  };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct ppc_vm_region {
-	struct list_head	vm_list;
-	unsigned long		vm_start;
-	unsigned long		vm_end;
-};
-
-static struct ppc_vm_region consistent_head = {
-	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
-	.vm_start	= CONSISTENT_BASE,
-	.vm_end		= CONSISTENT_END,
-};
-
-static struct ppc_vm_region *
-ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
-{
-	unsigned long addr = head->vm_start, end = head->vm_end - size;
-	unsigned long flags;
-	struct ppc_vm_region *c, *new;
-
-	new = kmalloc(sizeof(struct ppc_vm_region), gfp);
-	if (!new)
-		goto out;
-
-	spin_lock_irqsave(&consistent_lock, flags);
-
-	list_for_each_entry(c, &head->vm_list, vm_list) {
-		if ((addr + size) < addr)
-			goto nospc;
-		if ((addr + size) <= c->vm_start)
-			goto found;
-		addr = c->vm_end;
-		if (addr > end)
-			goto nospc;
-	}
-
- found:
-	/*
-	 * Insert this entry _before_ the one we found.
-	 */
-	list_add_tail(&new->vm_list, &c->vm_list);
-	new->vm_start = addr;
-	new->vm_end = addr + size;
-
-	spin_unlock_irqrestore(&consistent_lock, flags);
-	return new;
-
- nospc:
-	spin_unlock_irqrestore(&consistent_lock, flags);
-	kfree(new);
- out:
-	return NULL;
-}
-
-static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
-{
-	struct ppc_vm_region *c;
-
-	list_for_each_entry(c, &head->vm_list, vm_list) {
-		if (c->vm_start == addr)
-			goto out;
-	}
-	c = NULL;
- out:
-	return c;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *
-__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
-{
-	struct page *page;
-	struct ppc_vm_region *c;
-	unsigned long order;
-	u64 mask = 0x00ffffff, limit; /* ISA default */
-
-	if (!consistent_pte) {
-		printk(KERN_ERR "%s: not initialised\n", __func__);
-		dump_stack();
-		return NULL;
-	}
-
-	size = PAGE_ALIGN(size);
-	limit = (mask + 1) & ~mask;
-	if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) {
-		printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
-		       size, mask);
-		return NULL;
-	}
-
-	order = get_order(size);
-
-	if (mask != 0xffffffff)
-		gfp |= GFP_DMA;
-
-	page = alloc_pages(gfp, order);
-	if (!page)
-		goto no_page;
-
-	/*
-	 * Invalidate any data that might be lurking in the
-	 * kernel direct-mapped region for device DMA.
-	 */
-	{
-		unsigned long kaddr = (unsigned long)page_address(page);
-		memset(page_address(page), 0, size);
-		flush_dcache_range(kaddr, kaddr + size);
-	}
-
-	/*
-	 * Allocate a virtual address in the consistent mapping region.
-	 */
-	c = ppc_vm_region_alloc(&consistent_head, size,
-			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
-	if (c) {
-		unsigned long vaddr = c->vm_start;
-		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
-		struct page *end = page + (1 << order);
-
-		split_page(page, order);
-
-		/*
-		 * Set the "dma handle"
-		 */
-		*handle = page_to_phys(page);
-
-		do {
-			BUG_ON(!pte_none(*pte));
-
-			SetPageReserved(page);
-			set_pte_at(&init_mm, vaddr,
-				   pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
-			page++;
-			pte++;
-			vaddr += PAGE_SIZE;
-		} while (size -= PAGE_SIZE);
-
-		/*
-		 * Free the otherwise unused pages.
-		 */
-		while (page < end) {
-			__free_page(page);
-			page++;
-		}
-
-		return (void *)c->vm_start;
-	}
-
-	if (page)
-		__free_pages(page, order);
- no_page:
-	return NULL;
-}
-EXPORT_SYMBOL(__dma_alloc_coherent);
-
-/*
- * free a page as defined by the above mapping.
- */
-void __dma_free_coherent(size_t size, void *vaddr)
-{
-	struct ppc_vm_region *c;
-	unsigned long flags, addr;
-	pte_t *ptep;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irqsave(&consistent_lock, flags);
-
-	c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
-	if (!c)
-		goto no_area;
-
-	if ((c->vm_end - c->vm_start) != size) {
-		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
-		       __func__, c->vm_end - c->vm_start, size);
-		dump_stack();
-		size = c->vm_end - c->vm_start;
-	}
-
-	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
-	addr = c->vm_start;
-	do {
-		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
-		unsigned long pfn;
-
-		ptep++;
-		addr += PAGE_SIZE;
-
-		if (!pte_none(pte) && pte_present(pte)) {
-			pfn = pte_pfn(pte);
-
-			if (pfn_valid(pfn)) {
-				struct page *page = pfn_to_page(pfn);
-				ClearPageReserved(page);
-
-				__free_page(page);
-				continue;
-			}
-		}
-
-		printk(KERN_CRIT "%s: bad page in kernel page table\n",
-		       __func__);
-	} while (size -= PAGE_SIZE);
-
-	flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
-	list_del(&c->vm_list);
-
-	spin_unlock_irqrestore(&consistent_lock, flags);
-
-	kfree(c);
-	return;
-
- no_area:
-	spin_unlock_irqrestore(&consistent_lock, flags);
-	printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
-	       __func__, vaddr);
-	dump_stack();
-}
-EXPORT_SYMBOL(__dma_free_coherent);
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init dma_alloc_init(void)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	int ret = 0;
-
-	do {
-		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
-		pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE);
-		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE);
-		if (!pmd) {
-			printk(KERN_ERR "%s: no pmd tables\n", __func__);
-			ret = -ENOMEM;
-			break;
-		}
-
-		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
-		if (!pte) {
-			printk(KERN_ERR "%s: no pte tables\n", __func__);
-			ret = -ENOMEM;
-			break;
-		}
-
-		consistent_pte = pte;
-	} while (0);
-
-	return ret;
-}
-
-core_initcall(dma_alloc_init);
-
-/*
- * make an area consistent.
- */
-void __dma_sync(void *vaddr, size_t size, int direction)
-{
-	unsigned long start = (unsigned long)vaddr;
-	unsigned long end   = start + size;
-
-	switch (direction) {
-	case DMA_NONE:
-		BUG();
-	case DMA_FROM_DEVICE:
-		/*
-		 * invalidate only when cache-line aligned otherwise there is
-		 * the potential for discarding uncommitted data from the cache
-		 */
-		if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
-			flush_dcache_range(start, end);
-		else
-			invalidate_dcache_range(start, end);
-		break;
-	case DMA_TO_DEVICE:		/* writeback only */
-		clean_dcache_range(start, end);
-		break;
-	case DMA_BIDIRECTIONAL:	/* writeback and invalidate */
-		flush_dcache_range(start, end);
-		break;
-	}
-}
-EXPORT_SYMBOL(__dma_sync);
-
-#ifdef CONFIG_HIGHMEM
-/*
- * __dma_sync_page() implementation for systems using highmem.
- * In this case, each page of a buffer must be kmapped/kunmapped
- * in order to have a virtual address for __dma_sync(). This must
- * not sleep so kmap_atomic()/kunmap_atomic() are used.
- *
- * Note: yes, it is possible and correct to have a buffer extend
- * beyond the first page.
- */
-static inline void __dma_sync_page_highmem(struct page *page,
-		unsigned long offset, size_t size, int direction)
-{
-	size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
-	size_t cur_size = seg_size;
-	unsigned long flags, start, seg_offset = offset;
-	int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
-	int seg_nr = 0;
-
-	local_irq_save(flags);
-
-	do {
-		start = (unsigned long)kmap_atomic(page + seg_nr,
-				KM_PPC_SYNC_PAGE) + seg_offset;
-
-		/* Sync this buffer segment */
-		__dma_sync((void *)start, seg_size, direction);
-		kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE);
-		seg_nr++;
-
-		/* Calculate next buffer segment size */
-		seg_size = min((size_t)PAGE_SIZE, size - cur_size);
-
-		/* Add the segment size to our running total */
-		cur_size += seg_size;
-		seg_offset = 0;
-	} while (seg_nr < nr_segs);
-
-	local_irq_restore(flags);
-}
-#endif /* CONFIG_HIGHMEM */
-
-/*
- * __dma_sync_page makes memory consistent. identical to __dma_sync, but
- * takes a struct page instead of a virtual address
- */
-void __dma_sync_page(struct page *page, unsigned long offset,
-	size_t size, int direction)
-{
-#ifdef CONFIG_HIGHMEM
-	__dma_sync_page_highmem(page, offset, size, direction);
-#else
-	unsigned long start = (unsigned long)page_address(page) + offset;
-	__dma_sync((void *)start, size, direction);
-#endif
-}
-EXPORT_SYMBOL(__dma_sync_page);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 17290bc..b746f4c 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PPC_SUBPAGE_PROT)	+= subpage-prot.o
+obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
new file mode 100644
index 0000000..b7dc4c1
--- /dev/null
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -0,0 +1,426 @@
+/*
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators.  Used for DMA devices that want to
+ * share uncached memory with the processor core.  The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ *						-- Dan
+ *
+ * Reorganized to get rid of the arch-specific consistent_* functions
+ * and provide non-coherent implementations for the DMA API. -Matt
+ *
+ * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
+ * implementation. This is pulled straight from ARM and barely
+ * modified. -Matt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/tlbflush.h>
+
+/*
+ * This address range defaults to a value that is safe for all
+ * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
+ * can be further configured for specific applications under
+ * the "Advanced Setup" menu. -Matt
+ */
+#define CONSISTENT_BASE	(CONFIG_CONSISTENT_START)
+#define CONSISTENT_END	(CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE)
+#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
+
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
+ */
+struct ppc_vm_region {
+	struct list_head	vm_list;
+	unsigned long		vm_start;
+	unsigned long		vm_end;
+};
+
+static struct ppc_vm_region consistent_head = {
+	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start	= CONSISTENT_BASE,
+	.vm_end		= CONSISTENT_END,
+};
+
+static struct ppc_vm_region *
+ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct ppc_vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct ppc_vm_region), gfp);
+	if (!new)
+		goto out;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+ found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+ nospc:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+ out:
+	return NULL;
+}
+
+static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
+{
+	struct ppc_vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+ out:
+	return c;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *
+__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
+{
+	struct page *page;
+	struct ppc_vm_region *c;
+	unsigned long order;
+	u64 mask = 0x00ffffff, limit; /* ISA default */
+
+	if (!consistent_pte) {
+		printk(KERN_ERR "%s: not initialised\n", __func__);
+		dump_stack();
+		return NULL;
+	}
+
+	size = PAGE_ALIGN(size);
+	limit = (mask + 1) & ~mask;
+	if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) {
+		printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
+		       size, mask);
+		return NULL;
+	}
+
+	order = get_order(size);
+
+	if (mask != 0xffffffff)
+		gfp |= GFP_DMA;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		goto no_page;
+
+	/*
+	 * Invalidate any data that might be lurking in the
+	 * kernel direct-mapped region for device DMA.
+	 */
+	{
+		unsigned long kaddr = (unsigned long)page_address(page);
+		memset(page_address(page), 0, size);
+		flush_dcache_range(kaddr, kaddr + size);
+	}
+
+	/*
+	 * Allocate a virtual address in the consistent mapping region.
+	 */
+	c = ppc_vm_region_alloc(&consistent_head, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (c) {
+		unsigned long vaddr = c->vm_start;
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+		struct page *end = page + (1 << order);
+
+		split_page(page, order);
+
+		/*
+		 * Set the "dma handle"
+		 */
+		*handle = page_to_phys(page);
+
+		do {
+			BUG_ON(!pte_none(*pte));
+
+			SetPageReserved(page);
+			set_pte_at(&init_mm, vaddr,
+				   pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
+			page++;
+			pte++;
+			vaddr += PAGE_SIZE;
+		} while (size -= PAGE_SIZE);
+
+		/*
+		 * Free the otherwise unused pages.
+		 */
+		while (page < end) {
+			__free_page(page);
+			page++;
+		}
+
+		return (void *)c->vm_start;
+	}
+
+	if (page)
+		__free_pages(page, order);
+ no_page:
+	return NULL;
+}
+EXPORT_SYMBOL(__dma_alloc_coherent);
+
+/*
+ * free a page as defined by the above mapping.
+ */
+void __dma_free_coherent(size_t size, void *vaddr)
+{
+	struct ppc_vm_region *c;
+	unsigned long flags, addr;
+	pte_t *ptep;
+
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
+	if (!c)
+		goto no_area;
+
+	if ((c->vm_end - c->vm_start) != size) {
+		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+	addr = c->vm_start;
+	do {
+		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		unsigned long pfn;
+
+		ptep++;
+		addr += PAGE_SIZE;
+
+		if (!pte_none(pte) && pte_present(pte)) {
+			pfn = pte_pfn(pte);
+
+			if (pfn_valid(pfn)) {
+				struct page *page = pfn_to_page(pfn);
+				ClearPageReserved(page);
+
+				__free_page(page);
+				continue;
+			}
+		}
+
+		printk(KERN_CRIT "%s: bad page in kernel page table\n",
+		       __func__);
+	} while (size -= PAGE_SIZE);
+
+	flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+	list_del(&c->vm_list);
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+	return;
+
+ no_area:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
+	       __func__, vaddr);
+	dump_stack();
+}
+EXPORT_SYMBOL(__dma_free_coherent);
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int ret = 0;
+
+	do {
+		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
+		pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE);
+		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE);
+		if (!pmd) {
+			printk(KERN_ERR "%s: no pmd tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
+		if (!pte) {
+			printk(KERN_ERR "%s: no pte tables\n", __func__);
+			ret = -ENOMEM;
+			break;
+		}
+
+		consistent_pte = pte;
+	} while (0);
+
+	return ret;
+}
+
+core_initcall(dma_alloc_init);
+
+/*
+ * make an area consistent.
+ */
+void __dma_sync(void *vaddr, size_t size, int direction)
+{
+	unsigned long start = (unsigned long)vaddr;
+	unsigned long end   = start + size;
+
+	switch (direction) {
+	case DMA_NONE:
+		BUG();
+	case DMA_FROM_DEVICE:
+		/*
+		 * invalidate only when cache-line aligned otherwise there is
+		 * the potential for discarding uncommitted data from the cache
+		 */
+		if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
+			flush_dcache_range(start, end);
+		else
+			invalidate_dcache_range(start, end);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		clean_dcache_range(start, end);
+		break;
+	case DMA_BIDIRECTIONAL:	/* writeback and invalidate */
+		flush_dcache_range(start, end);
+		break;
+	}
+}
+EXPORT_SYMBOL(__dma_sync);
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * __dma_sync_page() implementation for systems using highmem.
+ * In this case, each page of a buffer must be kmapped/kunmapped
+ * in order to have a virtual address for __dma_sync(). This must
+ * not sleep so kmap_atomic()/kunmap_atomic() are used.
+ *
+ * Note: yes, it is possible and correct to have a buffer extend
+ * beyond the first page.
+ */
+static inline void __dma_sync_page_highmem(struct page *page,
+		unsigned long offset, size_t size, int direction)
+{
+	size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
+	size_t cur_size = seg_size;
+	unsigned long flags, start, seg_offset = offset;
+	int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
+	int seg_nr = 0;
+
+	local_irq_save(flags);
+
+	do {
+		start = (unsigned long)kmap_atomic(page + seg_nr,
+				KM_PPC_SYNC_PAGE) + seg_offset;
+
+		/* Sync this buffer segment */
+		__dma_sync((void *)start, seg_size, direction);
+		kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE);
+		seg_nr++;
+
+		/* Calculate next buffer segment size */
+		seg_size = min((size_t)PAGE_SIZE, size - cur_size);
+
+		/* Add the segment size to our running total */
+		cur_size += seg_size;
+		seg_offset = 0;
+	} while (seg_nr < nr_segs);
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * __dma_sync_page makes memory consistent. identical to __dma_sync, but
+ * takes a struct page instead of a virtual address
+ */
+void __dma_sync_page(struct page *page, unsigned long offset,
+	size_t size, int direction)
+{
+#ifdef CONFIG_HIGHMEM
+	__dma_sync_page_highmem(page, offset, size, direction);
+#else
+	unsigned long start = (unsigned long)page_address(page) + offset;
+	__dma_sync((void *)start, size, direction);
+#endif
+}
+EXPORT_SYMBOL(__dma_sync_page);
-- 
cgit v0.10.2


From f637a49e507c88354ab32b5d914e06acfb7ee00d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 27 May 2009 13:44:50 +1000
Subject: powerpc: Minor cleanups of kernel virt address space definitions

Make FIXADDR_TOP a compile time constant and cleanup a
couple of definitions relative to the layout of the kernel
address space on ppc32. We also print out that layout at
boot time for debugging purposes.

This is a pre-requisite for properly fixing non-coherent
DMA allocactions.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index d60fd18..f1f4e23 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -14,8 +14,6 @@
 #ifndef _ASM_FIXMAP_H
 #define _ASM_FIXMAP_H
 
-extern unsigned long FIXADDR_TOP;
-
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
 #include <asm/page.h>
@@ -24,6 +22,8 @@ extern unsigned long FIXADDR_TOP;
 #include <asm/kmap_types.h>
 #endif
 
+#define FIXADDR_TOP	((unsigned long)(-PAGE_SIZE))
+
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index ba45c99..28fe9d4 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -10,7 +10,7 @@
 
 extern unsigned long va_to_phys(unsigned long address);
 extern pte_t *va_to_pte(unsigned long address);
-extern unsigned long ioremap_bot, ioremap_base;
+extern unsigned long ioremap_bot;
 
 #ifdef CONFIG_44x
 extern int icache_44x_need_flush;
@@ -56,8 +56,26 @@ extern int icache_44x_need_flush;
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 /*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+#ifdef CONFIG_HIGHMEM
+#define KVIRT_TOP	PKMAP_BASE
+#else
+#define KVIRT_TOP	(0xfe000000UL)	/* for now, could be FIXMAP_BASE ? */
+#endif
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#define IOREMAP_TOP	KVIRT_TOP
+
+/*
  * Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 64MB value just means that there will be a 64MB "hole" after the
+ * current 16MB value just means that there will be a 64MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
  * any out-of-bounds memory accesses will hopefully be caught.
  * The vmalloc() routines leaves a hole of 4kB between each vmalloced
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 666a5e8..3de6a0d9 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -168,12 +168,8 @@ void __init MMU_init(void)
 		ppc_md.progress("MMU:mapin", 0x301);
 	mapin_ram();
 
-#ifdef CONFIG_HIGHMEM
-	ioremap_base = PKMAP_BASE;
-#else
-	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
-#endif /* CONFIG_HIGHMEM */
-	ioremap_bot = ioremap_base;
+	/* Initialize early top-down ioremap allocator */
+	ioremap_bot = IOREMAP_TOP;
 
 	/* Map in I/O resources */
 	if (ppc_md.progress)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0602a7..d3a4e67 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -380,6 +380,19 @@ void __init mem_init(void)
 		bsssize >> 10,
 		initsize >> 10);
 
+#ifdef CONFIG_PPC32
+	pr_info("Kernel virtual memory layout:\n");
+	pr_info("  * 0x%08lx..0x%08lx  : fixmap\n", FIXADDR_START, FIXADDR_TOP);
+#ifdef CONFIG_HIGHMEM
+	pr_info("  * 0x%08lx..0x%08lx  : highmem PTEs\n",
+		PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
+#endif /* CONFIG_HIGHMEM */
+	pr_info("  * 0x%08lx..0x%08lx  : early ioremap\n",
+		ioremap_bot, IOREMAP_TOP);
+	pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
+		VMALLOC_START, VMALLOC_END);
+#endif /* CONFIG_PPC32 */
+
 	mem_init_done = 1;
 }
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 430d090..5422169 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -399,8 +399,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
 static int fixmaps;
-unsigned long FIXADDR_TOP = (-PAGE_SIZE);
-EXPORT_SYMBOL(FIXADDR_TOP);
 
 void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
 {
-- 
cgit v0.10.2


From 8b31e49d1d75729c1da9009664ba52abd1adc628 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 27 May 2009 13:50:33 +1000
Subject: powerpc: Fix up dma_alloc_coherent() on platforms without cache
 coherency.

The implementation we just revived has issues, such as using a
Kconfig-defined virtual address area in kernel space that nothing
actually carves out (and thus will overlap whatever is there),
or having some dependencies on being self contained in a single
PTE page which adds unnecessary constraints on the kernel virtual
address space.

This fixes it by using more classic PTE accessors and automatically
locating the area for consistent memory, carving an appropriate hole
in the kernel virtual address space, leaving only the size of that
area as a Kconfig option. It also brings some dma-mask related fixes
from the ARM implementation which was almost identical initially but
grew its own fixes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3bb43ad..cdc9a6f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -868,19 +868,6 @@ config TASK_SIZE
 	default "0x80000000" if PPC_PREP || PPC_8xx
 	default "0xc0000000"
 
-config CONSISTENT_START_BOOL
-	bool "Set custom consistent memory pool address"
-	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
-	help
-	  This option allows you to set the base virtual address
-	  of the consistent memory pool.  This pool of virtual
-	  memory is used to make consistent memory allocations.
-
-config CONSISTENT_START
-	hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
-	default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx)
-	default "0xff100000" if NOT_COHERENT_CACHE
-
 config CONSISTENT_SIZE_BOOL
 	bool "Set custom consistent memory pool size"
 	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index c69f2b5..cb448d68 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -26,7 +26,9 @@
  * allocate the space "normally" and use the cache management functions
  * to ensure it is consistent.
  */
-extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp);
+struct device;
+extern void *__dma_alloc_coherent(struct device *dev, size_t size,
+				  dma_addr_t *handle, gfp_t gfp);
 extern void __dma_free_coherent(size_t size, void *vaddr);
 extern void __dma_sync(void *vaddr, size_t size, int direction);
 extern void __dma_sync_page(struct page *page, unsigned long offset,
@@ -37,7 +39,7 @@ extern void __dma_sync_page(struct page *page, unsigned long offset,
  * Cache coherent cores.
  */
 
-#define __dma_alloc_coherent(gfp, size, handle)	NULL
+#define __dma_alloc_coherent(dev, gfp, size, handle)	NULL
 #define __dma_free_coherent(size, addr)		((void)0)
 #define __dma_sync(addr, size, rw)		((void)0)
 #define __dma_sync_page(pg, off, sz, rw)	((void)0)
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index 28fe9d4..c9ff9d7 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -71,7 +71,11 @@ extern int icache_44x_need_flush;
  * until mem_init() at which point this becomes the top of the vmalloc
  * and ioremap space
  */
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define IOREMAP_TOP	((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#else
 #define IOREMAP_TOP	KVIRT_TOP
+#endif
 
 /*
  * Just any arbitrary offset to the start of the vmalloc VM area: the
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 53c7788..6b02793 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -32,7 +32,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 {
 	void *ret;
 #ifdef CONFIG_NOT_COHERENT_CACHE
-	ret = __dma_alloc_coherent(size, dma_handle, flag);
+	ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
 	if (ret == NULL)
 		return NULL;
 	*dma_handle += get_dma_direct_offset(dev);
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index b7dc4c1..36692f5 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -32,20 +32,21 @@
 
 #include <asm/tlbflush.h>
 
+#include "mmu_decl.h"
+
 /*
  * This address range defaults to a value that is safe for all
  * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
  * can be further configured for specific applications under
  * the "Advanced Setup" menu. -Matt
  */
-#define CONSISTENT_BASE	(CONFIG_CONSISTENT_START)
-#define CONSISTENT_END	(CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE)
+#define CONSISTENT_BASE		(IOREMAP_TOP)
+#define CONSISTENT_END 		(CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
 #define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
 
 /*
  * This is the page table (2MB) covering uncached, DMA consistent allocations
  */
-static pte_t *consistent_pte;
 static DEFINE_SPINLOCK(consistent_lock);
 
 /*
@@ -148,22 +149,38 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
  * virtual and bus address for that space.
  */
 void *
-__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
+__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	struct page *page;
 	struct ppc_vm_region *c;
 	unsigned long order;
-	u64 mask = 0x00ffffff, limit; /* ISA default */
+	u64 mask = ISA_DMA_THRESHOLD, limit;
 
-	if (!consistent_pte) {
-		printk(KERN_ERR "%s: not initialised\n", __func__);
-		dump_stack();
-		return NULL;
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			goto no_page;
+		}
+
+		if ((~mask) & ISA_DMA_THRESHOLD) {
+			dev_warn(dev, "coherent DMA mask %#llx is smaller "
+				 "than system GFP_DMA mask %#llx\n",
+				 mask, (unsigned long long)ISA_DMA_THRESHOLD);
+			goto no_page;
+		}
 	}
 
+
 	size = PAGE_ALIGN(size);
 	limit = (mask + 1) & ~mask;
-	if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) {
+	if ((limit && size >= limit) ||
+	    size >= (CONSISTENT_END - CONSISTENT_BASE)) {
 		printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
 		       size, mask);
 		return NULL;
@@ -171,6 +188,7 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 
 	order = get_order(size);
 
+	/* Might be useful if we ever have a real legacy DMA zone... */
 	if (mask != 0xffffffff)
 		gfp |= GFP_DMA;
 
@@ -195,7 +213,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 	if (c) {
 		unsigned long vaddr = c->vm_start;
-		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
 		struct page *end = page + (1 << order);
 
 		split_page(page, order);
@@ -206,13 +223,10 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 		*handle = page_to_phys(page);
 
 		do {
-			BUG_ON(!pte_none(*pte));
-
 			SetPageReserved(page);
-			set_pte_at(&init_mm, vaddr,
-				   pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
+			map_page(vaddr, page_to_phys(page),
+				 pgprot_noncached(PAGE_KERNEL));
 			page++;
-			pte++;
 			vaddr += PAGE_SIZE;
 		} while (size -= PAGE_SIZE);
 
@@ -241,8 +255,7 @@ void __dma_free_coherent(size_t size, void *vaddr)
 {
 	struct ppc_vm_region *c;
 	unsigned long flags, addr;
-	pte_t *ptep;
-
+	
 	size = PAGE_ALIGN(size);
 
 	spin_lock_irqsave(&consistent_lock, flags);
@@ -258,29 +271,26 @@ void __dma_free_coherent(size_t size, void *vaddr)
 		size = c->vm_end - c->vm_start;
 	}
 
-	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
 	addr = c->vm_start;
 	do {
-		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+		pte_t *ptep;
 		unsigned long pfn;
 
-		ptep++;
-		addr += PAGE_SIZE;
-
-		if (!pte_none(pte) && pte_present(pte)) {
-			pfn = pte_pfn(pte);
-
+		ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
+							       addr),
+						    addr),
+					 addr);
+		if (!pte_none(*ptep) && pte_present(*ptep)) {
+			pfn = pte_pfn(*ptep);
+			pte_clear(&init_mm, addr, ptep);
 			if (pfn_valid(pfn)) {
 				struct page *page = pfn_to_page(pfn);
-				ClearPageReserved(page);
 
+				ClearPageReserved(page);
 				__free_page(page);
-				continue;
 			}
 		}
-
-		printk(KERN_CRIT "%s: bad page in kernel page table\n",
-		       __func__);
+		addr += PAGE_SIZE;
 	} while (size -= PAGE_SIZE);
 
 	flush_tlb_kernel_range(c->vm_start, c->vm_end);
@@ -301,42 +311,6 @@ void __dma_free_coherent(size_t size, void *vaddr)
 EXPORT_SYMBOL(__dma_free_coherent);
 
 /*
- * Initialise the consistent memory allocation.
- */
-static int __init dma_alloc_init(void)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	int ret = 0;
-
-	do {
-		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
-		pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE);
-		pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE);
-		if (!pmd) {
-			printk(KERN_ERR "%s: no pmd tables\n", __func__);
-			ret = -ENOMEM;
-			break;
-		}
-
-		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
-		if (!pte) {
-			printk(KERN_ERR "%s: no pte tables\n", __func__);
-			ret = -ENOMEM;
-			break;
-		}
-
-		consistent_pte = pte;
-	} while (0);
-
-	return ret;
-}
-
-core_initcall(dma_alloc_init);
-
-/*
  * make an area consistent.
  */
 void __dma_sync(void *vaddr, size_t size, int direction)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d3a4e67..579382c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -387,6 +387,10 @@ void __init mem_init(void)
 	pr_info("  * 0x%08lx..0x%08lx  : highmem PTEs\n",
 		PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
 #endif /* CONFIG_HIGHMEM */
+#ifdef CONFIG_NOT_COHERENT_CACHE
+	pr_info("  * 0x%08lx..0x%08lx  : consistent mem\n",
+		IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
+#endif /* CONFIG_NOT_COHERENT_CACHE */
 	pr_info("  * 0x%08lx..0x%08lx  : early ioremap\n",
 		ioremap_bot, IOREMAP_TOP);
 	pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
-- 
cgit v0.10.2


From 23ac9cbed82b00ca3520bb81dbe9ea3b7a936a1b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 09:33:18 +0200
Subject: perf_counter tools: Rename output.perf to perf.data

output.perf is only output to perf-record - it's input to
perf-report. So change it to a more direction-neutral name.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt
index d07700e..353db1b 100644
--- a/Documentation/perf_counter/Documentation/perf-record.txt
+++ b/Documentation/perf_counter/Documentation/perf-record.txt
@@ -3,7 +3,7 @@ perf-record(1)
 
 NAME
 ----
-perf-record - Run a command and record its profile into output.perf
+perf-record - Run a command and record its profile into perf.data
 
 SYNOPSIS
 --------
@@ -13,7 +13,7 @@ SYNOPSIS
 DESCRIPTION
 -----------
 This command runs a command and gathers a performance counter profile
-from it, into output.perf - without displaying anything.
+from it, into perf.data - without displaying anything.
 
 This file can then be inspected later on, using 'perf report'.
 
diff --git a/Documentation/perf_counter/Documentation/perf-report.txt b/Documentation/perf_counter/Documentation/perf-report.txt
index 64696a2..49efe16 100644
--- a/Documentation/perf_counter/Documentation/perf-report.txt
+++ b/Documentation/perf_counter/Documentation/perf-report.txt
@@ -3,7 +3,7 @@ perf-report(1)
 
 NAME
 ----
-perf-report - Read output.perf (created by perf record) and display the profile
+perf-report - Read perf.data (created by perf record) and display the profile
 
 SYNOPSIS
 --------
@@ -19,7 +19,7 @@ OPTIONS
 -------
 -i::
 --input=::
-        Input file name. (default: output.perf)
+        Input file name. (default: perf.data)
 
 Configuration
 -------------
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 68abfdf..431077a 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -19,7 +19,7 @@ static int			nr_cpus				=  0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			= 16;
 static int			output;
-static const char		*output_name			= "output.perf";
+static const char		*output_name			= "perf.data";
 static int			group				= 0;
 static unsigned int		realtime_prio			= 0;
 static int			system_wide			= 0;
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 7f1255d..e2712cd 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -18,7 +18,7 @@
 #define SHOW_USER	2
 #define SHOW_HV		4
 
-static char		const *input_name = "output.perf";
+static char		const *input_name = "perf.data";
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
-- 
cgit v0.10.2


From a930d2c0d0a685ab955472b08baad041cc5edb4a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 09:50:13 +0200
Subject: perf_counter tools: Add built-in pager support

Add Git's pager.c (and sigchain) code. A command only
has to call setup_pager() to get paged interactive
output.

Non-interactive (redirected, command-piped, etc.) uses
are not affected.

Update perf-report to make use of this.

[ Impact: new feature ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index efb0589..51b13f9 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -297,11 +297,13 @@ LIB_H += util/util.h
 LIB_H += util/help.h
 LIB_H += util/strbuf.h
 LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
 LIB_OBJS += util/config.o
 LIB_OBJS += util/ctype.o
+LIB_OBJS += util/environment.o
 LIB_OBJS += util/exec_cmd.o
 LIB_OBJS += util/help.o
 LIB_OBJS += util/levenshtein.o
@@ -314,6 +316,8 @@ LIB_OBJS += util/quote.o
 LIB_OBJS += util/strbuf.o
 LIB_OBJS += util/usage.o
 LIB_OBJS += util/wrapper.o
+LIB_OBJS += util/sigchain.o
+LIB_OBJS += util/pager.o
 
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-record.o
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e2712cd..9aef7c5 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -7,6 +7,7 @@
 #include <ctype.h>
 
 #include "util/list.h"
+#include "util/cache.h"
 #include "util/rbtree.h"
 
 #include "perf.h"
@@ -992,5 +993,7 @@ int cmd_report(int argc, const char **argv, const char *prefix)
 
 	parse_options(argc, argv, options, report_usage, 0);
 
+	setup_pager();
+
 	return __cmd_report();
 }
diff --git a/Documentation/perf_counter/util/environment.c b/Documentation/perf_counter/util/environment.c
new file mode 100644
index 0000000..9b1c819
--- /dev/null
+++ b/Documentation/perf_counter/util/environment.c
@@ -0,0 +1,8 @@
+/*
+ * We put all the perf config variables in this same object
+ * file, so that programs can link against the config parser
+ * without having to link against all the rest of perf.
+ */
+#include "cache.h"
+
+const char *pager_program;
diff --git a/Documentation/perf_counter/util/pager.c b/Documentation/perf_counter/util/pager.c
new file mode 100644
index 0000000..a28bcca
--- /dev/null
+++ b/Documentation/perf_counter/util/pager.c
@@ -0,0 +1,99 @@
+#include "cache.h"
+#include "run-command.h"
+#include "sigchain.h"
+
+/*
+ * This is split up from the rest of git so that we can do
+ * something different on Windows.
+ */
+
+static int spawned_pager;
+
+#ifndef __MINGW32__
+static void pager_preexec(void)
+{
+	/*
+	 * Work around bug in "less" by not starting it until we
+	 * have real input
+	 */
+	fd_set in;
+
+	FD_ZERO(&in);
+	FD_SET(0, &in);
+	select(1, &in, NULL, &in, NULL);
+
+	setenv("LESS", "FRSX", 0);
+}
+#endif
+
+static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
+static struct child_process pager_process;
+
+static void wait_for_pager(void)
+{
+	fflush(stdout);
+	fflush(stderr);
+	/* signal EOF to pager */
+	close(1);
+	close(2);
+	finish_command(&pager_process);
+}
+
+static void wait_for_pager_signal(int signo)
+{
+	wait_for_pager();
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+void setup_pager(void)
+{
+	const char *pager = getenv("PERF_PAGER");
+
+	if (!isatty(1))
+		return;
+	if (!pager) {
+		if (!pager_program)
+			perf_config(perf_default_config, NULL);
+		pager = pager_program;
+	}
+	if (!pager)
+		pager = getenv("PAGER");
+	if (!pager)
+		pager = "less";
+	else if (!*pager || !strcmp(pager, "cat"))
+		return;
+
+	spawned_pager = 1; /* means we are emitting to terminal */
+
+	/* spawn the pager */
+	pager_argv[2] = pager;
+	pager_process.argv = pager_argv;
+	pager_process.in = -1;
+#ifndef __MINGW32__
+	pager_process.preexec_cb = pager_preexec;
+#endif
+	if (start_command(&pager_process))
+		return;
+
+	/* original process continues, but writes to the pipe */
+	dup2(pager_process.in, 1);
+	if (isatty(2))
+		dup2(pager_process.in, 2);
+	close(pager_process.in);
+
+	/* this makes sure that the parent terminates after the pager */
+	sigchain_push_common(wait_for_pager_signal);
+	atexit(wait_for_pager);
+}
+
+int pager_in_use(void)
+{
+	const char *env;
+
+	if (spawned_pager)
+		return 1;
+
+	env = getenv("PERF_PAGER_IN_USE");
+	return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0;
+}
diff --git a/Documentation/perf_counter/util/sigchain.c b/Documentation/perf_counter/util/sigchain.c
new file mode 100644
index 0000000..1118b99
--- /dev/null
+++ b/Documentation/perf_counter/util/sigchain.c
@@ -0,0 +1,52 @@
+#include "sigchain.h"
+#include "cache.h"
+
+#define SIGCHAIN_MAX_SIGNALS 32
+
+struct sigchain_signal {
+	sigchain_fun *old;
+	int n;
+	int alloc;
+};
+static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS];
+
+static void check_signum(int sig)
+{
+	if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS)
+		die("BUG: signal out of range: %d", sig);
+}
+
+int sigchain_push(int sig, sigchain_fun f)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+
+	ALLOC_GROW(s->old, s->n + 1, s->alloc);
+	s->old[s->n] = signal(sig, f);
+	if (s->old[s->n] == SIG_ERR)
+		return -1;
+	s->n++;
+	return 0;
+}
+
+int sigchain_pop(int sig)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+	if (s->n < 1)
+		return 0;
+
+	if (signal(sig, s->old[s->n - 1]) == SIG_ERR)
+		return -1;
+	s->n--;
+	return 0;
+}
+
+void sigchain_push_common(sigchain_fun f)
+{
+	sigchain_push(SIGINT, f);
+	sigchain_push(SIGHUP, f);
+	sigchain_push(SIGTERM, f);
+	sigchain_push(SIGQUIT, f);
+	sigchain_push(SIGPIPE, f);
+}
diff --git a/Documentation/perf_counter/util/sigchain.h b/Documentation/perf_counter/util/sigchain.h
new file mode 100644
index 0000000..618083bc
--- /dev/null
+++ b/Documentation/perf_counter/util/sigchain.h
@@ -0,0 +1,11 @@
+#ifndef SIGCHAIN_H
+#define SIGCHAIN_H
+
+typedef void (*sigchain_fun)(int);
+
+int sigchain_push(int sig, sigchain_fun f);
+int sigchain_pop(int sig);
+
+void sigchain_push_common(sigchain_fun f);
+
+#endif /* SIGCHAIN_H */
-- 
cgit v0.10.2


From 6af3fb72d2437239e5eb13a59e95dc43ccab3e8f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 27 May 2009 10:49:26 +0200
Subject: ALSA: Fix invalid jiffies check after pause

The hw_ptr_jiffies has to be reset properly to avoid the invalid
check of jiffies delta in snd_pcm_update_hw_ptr*() functions.
Especailly this patch fixes the bogus jiffies check after the puase
and resume.

This patch is a modified version of the original patch by Jaroslav.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index a2a792c..3eea98a 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1478,7 +1478,6 @@ static int snd_pcm_lib_ioctl_reset(struct snd_pcm_substream *substream,
 		runtime->status->hw_ptr %= runtime->buffer_size;
 	else
 		runtime->status->hw_ptr = 0;
-	runtime->hw_ptr_jiffies = jiffies;
 	snd_pcm_stream_unlock_irqrestore(substream, flags);
 	return 0;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index fc6f98e..b5da656 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -848,6 +848,7 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	snd_pcm_trigger_tstamp(substream);
+	runtime->hw_ptr_jiffies = jiffies;
 	runtime->status->state = state;
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
@@ -961,6 +962,11 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push)
 {
 	if (substream->runtime->trigger_master != substream)
 		return 0;
+	/* The jiffies check in snd_pcm_update_hw_ptr*() is done by
+	 * a delta betwen the current jiffies, this gives a large enough
+	 * delta, effectively to skip the check once.
+	 */
+	substream->runtime->hw_ptr_jiffies = jiffies - HZ * 1000;
 	return substream->ops->trigger(substream,
 				       push ? SNDRV_PCM_TRIGGER_PAUSE_PUSH :
 					      SNDRV_PCM_TRIGGER_PAUSE_RELEASE);
-- 
cgit v0.10.2


From 9d911d7903926a65ef49ec671bacd86bcee5eb51 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 21 May 2009 16:21:15 -0600
Subject: PCI Hotplug: acpiphp: don't store a pci_dev in acpiphp_func

An oops can occur if a user attempts to use both PCI logical
hotplug and the ACPI physical hotplug driver (acpiphp) in this
sequence, where $slot/address == $device.

In other words, if acpiphp has claimed a PCI device, and that
device is logically removed, then acpiphp may oops when it
attempts to access it again.

	# echo 1 > /sys/bus/pci/devices/$device/remove
	# echo 0 > /sys/bus/pci/slots/$slot/power

Unable to handle kernel NULL pointer dereference (address 0000000000000000)
Call Trace:
 [<a000000100016390>] show_stack+0x50/0xa0
 [<a000000100016c60>] show_regs+0x820/0x860
 [<a00000010003b390>] die+0x190/0x2a0
 [<a000000100066a40>] ia64_do_page_fault+0x8e0/0xa40
 [<a00000010000c7a0>] ia64_native_leave_kernel+0x0/0x270
 [<a0000001003b2660>] pci_remove_bus_device+0x120/0x260
 [<a0000002060549f0>] acpiphp_disable_slot+0x410/0x540 [acpiphp]
 [<a0000002060505c0>] disable_slot+0xc0/0x120 [acpiphp]
 [<a0000002040d21c0>] power_write_file+0x1e0/0x2a0 [pci_hotplug]
 [<a0000001003bb820>] pci_slot_attr_store+0x60/0xa0
 [<a000000100240f70>] sysfs_write_file+0x230/0x2c0
 [<a000000100195750>] vfs_write+0x190/0x2e0
 [<a0000001001961a0>] sys_write+0x80/0x100
 [<a00000010000c600>] ia64_ret_from_syscall+0x0/0x20
 [<a000000000010720>] __kernel_syscall_via_break+0x0/0x20

The root cause of this oops is that the logical remove ("echo 1 >
/sys/bus/pci/devices/$device/remove") destroyed the pci_dev. The
pci_dev struct itself wasn't deallocated because acpiphp kept a
reference, but some of its fields became invalid.

acpiphp doesn't have any real reason to keep a pointer to a
pci_dev around. It can always derive it using pci_get_slot().

If a logical remove destroys the pci_dev, acpiphp won't find it
and is thus prevented from causing mischief.

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Tested-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Reported-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 4fc168b..e68d5f2 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -129,7 +129,6 @@ struct acpiphp_func {
 	struct acpiphp_bridge *bridge;	/* Ejectable PCI-to-PCI bridge */
 
 	struct list_head sibling;
-	struct pci_dev *pci_dev;
 	struct notifier_block nb;
 	acpi_handle	handle;
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index a33794d..3a6064bc 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -32,9 +32,6 @@
 
 /*
  * Lifetime rules for pci_dev:
- *  - The one in acpiphp_func has its refcount elevated by pci_get_slot()
- *    when the driver is loaded or when an insertion event occurs.  It loses
- *    a refcount when its ejected or the driver unloads.
  *  - The one in acpiphp_bridge has its refcount elevated by pci_get_slot()
  *    when the bridge is scanned and it loses a refcount when the bridge
  *    is removed.
@@ -130,6 +127,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 	unsigned long long adr, sun;
 	int device, function, retval;
 	struct pci_bus *pbus = bridge->pci_bus;
+	struct pci_dev *pdev;
 
 	if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle))
 		return AE_OK;
@@ -213,10 +211,10 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 	newfunc->slot = slot;
 	list_add_tail(&newfunc->sibling, &slot->funcs);
 
-	/* associate corresponding pci_dev */
-	newfunc->pci_dev = pci_get_slot(pbus, PCI_DEVFN(device, function));
-	if (newfunc->pci_dev) {
+	pdev = pci_get_slot(pbus, PCI_DEVFN(device, function));
+	if (pdev) {
 		slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON);
+		pci_dev_put(pdev);
 	}
 
 	if (is_dock_device(handle)) {
@@ -617,7 +615,6 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
 				if (ACPI_FAILURE(status))
 					err("failed to remove notify handler\n");
 			}
-			pci_dev_put(func->pci_dev);
 			list_del(list);
 			kfree(func);
 		}
@@ -1101,22 +1098,24 @@ static int __ref enable_device(struct acpiphp_slot *slot)
 	pci_enable_bridges(bus);
 	pci_bus_add_devices(bus);
 
-	/* associate pci_dev to our representation */
 	list_for_each (l, &slot->funcs) {
 		func = list_entry(l, struct acpiphp_func, sibling);
-		func->pci_dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
-							func->function));
-		if (!func->pci_dev)
+		dev = pci_get_slot(bus, PCI_DEVFN(slot->device,
+						  func->function));
+		if (!dev)
 			continue;
 
-		if (func->pci_dev->hdr_type != PCI_HEADER_TYPE_BRIDGE &&
-		    func->pci_dev->hdr_type != PCI_HEADER_TYPE_CARDBUS)
+		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE &&
+		    dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) {
+			pci_dev_put(dev);
 			continue;
+		}
 
 		status = find_p2p_bridge(func->handle, (u32)1, bus, NULL);
 		if (ACPI_FAILURE(status))
 			warn("find_p2p_bridge failed (error code = 0x%x)\n",
 				status);
+		pci_dev_put(dev);
 	}
 
 	slot->flags |= SLOT_ENABLED;
@@ -1142,17 +1141,14 @@ static void disable_bridges(struct pci_bus *bus)
  */
 static int disable_device(struct acpiphp_slot *slot)
 {
-	int retval = 0;
 	struct acpiphp_func *func;
-	struct list_head *l;
+	struct pci_dev *pdev;
 
 	/* is this slot already disabled? */
 	if (!(slot->flags & SLOT_ENABLED))
 		goto err_exit;
 
-	list_for_each (l, &slot->funcs) {
-		func = list_entry(l, struct acpiphp_func, sibling);
-
+	list_for_each_entry(func, &slot->funcs, sibling) {
 		if (func->bridge) {
 			/* cleanup p2p bridges under this P2P bridge */
 			cleanup_p2p_bridge(func->bridge->handle,
@@ -1160,35 +1156,28 @@ static int disable_device(struct acpiphp_slot *slot)
 			func->bridge = NULL;
 		}
 
-		if (func->pci_dev) {
-			pci_stop_bus_device(func->pci_dev);
-			if (func->pci_dev->subordinate) {
-				disable_bridges(func->pci_dev->subordinate);
-				pci_disable_device(func->pci_dev);
+		pdev = pci_get_slot(slot->bridge->pci_bus,
+				    PCI_DEVFN(slot->device, func->function));
+		if (pdev) {
+			pci_stop_bus_device(pdev);
+			if (pdev->subordinate) {
+				disable_bridges(pdev->subordinate);
+				pci_disable_device(pdev);
 			}
+			pci_remove_bus_device(pdev);
+			pci_dev_put(pdev);
 		}
 	}
 
-	list_for_each (l, &slot->funcs) {
-		func = list_entry(l, struct acpiphp_func, sibling);
-
+	list_for_each_entry(func, &slot->funcs, sibling) {
 		acpiphp_unconfigure_ioapics(func->handle);
 		acpiphp_bus_trim(func->handle);
-		/* try to remove anyway.
-		 * acpiphp_bus_add might have been failed */
-
-		if (!func->pci_dev)
-			continue;
-
-		pci_remove_bus_device(func->pci_dev);
-		pci_dev_put(func->pci_dev);
-		func->pci_dev = NULL;
 	}
 
 	slot->flags &= (~SLOT_ENABLED);
 
- err_exit:
-	return retval;
+err_exit:
+	return 0;
 }
 
 
-- 
cgit v0.10.2


From c87d9732004b3f8fd82d729f12ccfb96c0df279e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 27 May 2009 10:53:33 +0200
Subject: ALSA: Enable PCM hw_ptr_jiffies check only in xrun_debug mode

The PCM hw_ptr jiffies check results sometimes in problems when a
hardware doesn't give smooth hw_ptr updates.  So far, au88x0 and some
other drivers appear not working due to this strict check.
However, this check is a nice debug tool, and the capability should be
still kept.

Hence, we disable this check now as default unless the user enables it
by setting the xrun_debug mode to the specific stream via a proc file.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index bba2dbb..cfac20c 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -104,6 +104,11 @@ card*/pcm*/xrun_debug
 	When this value is greater than 1, the driver will show the
 	stack trace additionally.  This may help the debugging.
 
+	Since 2.6.30, this option also enables the hwptr check using
+	jiffies.  This detects spontaneous invalid pointer callback
+	values, but can be lead to too much corrections for a (mostly
+	buggy) hardware that doesn't give smooth pointer updates.
+
 card*/pcm*/sub*/info
 	The general information of this PCM sub-stream.
 
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 3eea98a..d659995 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -249,6 +249,11 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 			new_hw_ptr = hw_base + pos;
 		}
 	}
+
+	/* Do jiffies check only in xrun_debug mode */
+	if (!xrun_debug(substream))
+		goto no_jiffies_check;
+
 	/* Skip the jiffies check for hardwares with BATCH flag.
 	 * Such hardware usually just increases the position at each IRQ,
 	 * thus it can't give any strange position.
@@ -336,7 +341,9 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 			hw_base = 0;
 		new_hw_ptr = hw_base + pos;
 	}
-	if (((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
+	/* Do jiffies check only in xrun_debug mode */
+	if (xrun_debug(substream) &&
+	    ((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		hw_ptr_error(substream,
 			     "hw_ptr skipping! "
 			     "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n",
-- 
cgit v0.10.2


From 55de5ef970c680d8d75f2a9aa7e4f172140dbd9c Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Wed, 27 May 2009 10:49:30 +0200
Subject: sound: usb-audio: make the MotU Fastlane work again

Kernel 2.6.18 broke the MotU Fastlane, which uses duplicate endpoint
numbers in a manner that is not only illegal but also confuses the
kernel's endpoint descriptor caching mechanism.  To work around this, we
have to add a separate usb_set_interface() call to guide the USB core to
the correct descriptors.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Reported-and-tested-by: David Fries <david@fries.net>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 823296d..a6b8848 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -3347,7 +3347,7 @@ static int snd_usb_create_quirk(struct snd_usb_audio *chip,
 		[QUIRK_MIDI_YAMAHA] = snd_usb_create_midi_interface,
 		[QUIRK_MIDI_MIDIMAN] = snd_usb_create_midi_interface,
 		[QUIRK_MIDI_NOVATION] = snd_usb_create_midi_interface,
-		[QUIRK_MIDI_RAW] = snd_usb_create_midi_interface,
+		[QUIRK_MIDI_FASTLANE] = snd_usb_create_midi_interface,
 		[QUIRK_MIDI_EMAGIC] = snd_usb_create_midi_interface,
 		[QUIRK_MIDI_CME] = snd_usb_create_midi_interface,
 		[QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk,
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 36e4f7a..8e7f789 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -153,7 +153,7 @@ enum quirk_type {
 	QUIRK_MIDI_YAMAHA,
 	QUIRK_MIDI_MIDIMAN,
 	QUIRK_MIDI_NOVATION,
-	QUIRK_MIDI_RAW,
+	QUIRK_MIDI_FASTLANE,
 	QUIRK_MIDI_EMAGIC,
 	QUIRK_MIDI_CME,
 	QUIRK_MIDI_US122L,
diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c
index 26bad37..2fb35cc 100644
--- a/sound/usb/usbmidi.c
+++ b/sound/usb/usbmidi.c
@@ -1778,8 +1778,18 @@ int snd_usb_create_midi_interface(struct snd_usb_audio* chip,
 		umidi->usb_protocol_ops = &snd_usbmidi_novation_ops;
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
-	case QUIRK_MIDI_RAW:
+	case QUIRK_MIDI_FASTLANE:
 		umidi->usb_protocol_ops = &snd_usbmidi_raw_ops;
+		/*
+		 * Interface 1 contains isochronous endpoints, but with the same
+		 * numbers as in interface 0.  Since it is interface 1 that the
+		 * USB core has most recently seen, these descriptors are now
+		 * associated with the endpoint numbers.  This will foul up our
+		 * attempts to submit bulk/interrupt URBs to the endpoints in
+		 * interface 0, so we have to make sure that the USB core looks
+		 * again at interface 0 by calling usb_set_interface() on it.
+		 */
+		usb_set_interface(umidi->chip->dev, 0, 0);
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
 	case QUIRK_MIDI_EMAGIC:
diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 647ef50..5d955aa 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -1868,7 +1868,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		.data = & (const struct snd_usb_audio_quirk[]) {
 			{
 				.ifnum = 0,
-				.type = QUIRK_MIDI_RAW
+				.type = QUIRK_MIDI_FASTLANE
 			},
 			{
 				.ifnum = 1,
-- 
cgit v0.10.2


From 08d15f034e94251606479d7ca9070994c2e2fcf0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 23 May 2009 10:41:05 +0100
Subject: ASoC: Switch FSL SSI DAI over to symmetric_rates

The effect of symmetric_constraints should provide a standard way to
enforce the use of the same sample rate for both directions.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Timur Tabi <timur@freescale.com>

diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 47afaa9..93f0f38 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -375,18 +375,14 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream,
 		struct snd_pcm_runtime *first_runtime =
 			ssi_private->first_stream->runtime;
 
-		if (!first_runtime->rate || !first_runtime->sample_bits) {
+		if (!first_runtime->sample_bits) {
 			dev_err(substream->pcm->card->dev,
-				"set sample rate and size in %s stream first\n",
+				"set sample size in %s stream first\n",
 				substream->stream == SNDRV_PCM_STREAM_PLAYBACK
 				? "capture" : "playback");
 			return -EAGAIN;
 		}
 
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-			SNDRV_PCM_HW_PARAM_RATE,
-			first_runtime->rate, first_runtime->rate);
-
 		/* If we're in synchronous mode, then we need to constrain
 		 * the sample size as well.  We don't support independent sample
 		 * rates in asynchronous mode.
@@ -693,6 +689,7 @@ struct snd_soc_dai *fsl_ssi_create_dai(struct fsl_ssi_info *ssi_info)
 	fsl_ssi_dai->name = ssi_private->name;
 	fsl_ssi_dai->id = ssi_info->id;
 	fsl_ssi_dai->dev = ssi_info->dev;
+	fsl_ssi_dai->symmetric_rates = 1;
 
 	ret = snd_soc_register_dai(fsl_ssi_dai);
 	if (ret != 0) {
-- 
cgit v0.10.2


From ef65b2a0b3a2f82850144df6e6a7796f6d66da6b Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 27 May 2009 10:10:51 +0200
Subject: perf record: Fix the profiling of existing pid or whole box

Perf record bails if no command argument is provided, so you can't use
naked -a or -p to profile a running task or the whole box.

Allow foreground profiling of an existing pid or the entire system.

[ Impact: fix command option handling bug ]

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 431077a..4a06866 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -354,7 +354,7 @@ static int __cmd_record(int argc, const char **argv)
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 
-	if (target_pid == -1) {
+	if (target_pid == -1 && argc) {
 		pid = fork();
 		if (pid < 0)
 			perror("failed to fork");
@@ -430,7 +430,7 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 	create_events_help(events_help_msg);
 
 	argc = parse_options(argc, argv, options, record_usage, 0);
-	if (!argc)
+	if (!argc && target_pid == -1 && !system_wide)
 		usage_with_options(record_usage, options);
 
 	if (!nr_counters) {
-- 
cgit v0.10.2


From d716fba49c7445ec87c3f045c59624fac03ee3f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 13:19:59 +0200
Subject: perf report: Remove <ctype.h> include

Pekka reported build failure in builtin-report.c:

    CC builtin-report.o
    In file included from builtin-report.c:7:
    /usr/include/ctype.h:102: error: expected expression before token

And observed:

| Removing #include <ctype.h> from builtin-report.c makes the problem
| go away. I am running Ubuntu 9.04 that has gcc 4.3.3 and libc 2.9.

Reported-by: Pekka J Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 9aef7c5..6265bed 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -4,7 +4,6 @@
 #include <libelf.h>
 #include <gelf.h>
 #include <elf.h>
-#include <ctype.h>
 
 #include "util/list.h"
 #include "util/cache.h"
-- 
cgit v0.10.2


From ed37d83e6aa218192fb28bb6b82498d2a8c74070 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 27 May 2009 21:39:05 +1000
Subject: md: raid5: change incorrect usage of 'min' macro to 'min_t'

A recent patch to raid5.c use min on an int and a sector_t.
This isn't allowed.
So change it to min_t(sector_t,x,y).

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3c3626d..5d400ae 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 	safepos = conf->reshape_safe;
 	sector_div(safepos, data_disks);
 	if (mddev->delta_disks < 0) {
-		writepos -= min(reshape_sectors, writepos);
+		writepos -= min_t(sector_t, reshape_sectors, writepos);
 		readpos += reshape_sectors;
 		safepos += reshape_sectors;
 	} else {
 		writepos += reshape_sectors;
-		readpos -= min(reshape_sectors, readpos);
-		safepos -= min(reshape_sectors, safepos);
+		readpos -= min_t(sector_t, reshape_sectors, readpos);
+		safepos -= min_t(sector_t, reshape_sectors, safepos);
 	}
 
 	/* 'writepos' is the most advanced device address we might write.
-- 
cgit v0.10.2


From ba396a6c104682dfe5c8b4fbbf5974d5ac9f3687 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 27 May 2009 14:17:08 +0200
Subject: block: fix oops with block tag queueing

commit e8939a50466fd963eb1ba9118c34b9ffb7ff6aa6
Author: Tejun Heo <tj@kernel.org>
Date:   Fri May 8 11:54:16 2009 +0900

    block: implement and enforce request peek/start/fetch

Added a BUG_ON(blk_queued_rq(req)) to the top of blk_finish_req().
Unfortunately, this checks whether req->queuelist is empty.  This list
is doing double duty both as the queue list and the tag list, so tagged
requests come in here with this not empty and boom (the tag list is
emptied by blk_queue_end_tag() lower down).

Fix this by moving the BUG_ON to below the end tag we also seem
vulnerable to this in blk_requeue_request() as well.  I think all uses
of blk_queued_rq() need auditing because the check is clearly wrong in
the tagged case.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 7a4c401..8b3b74e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -956,8 +956,6 @@ EXPORT_SYMBOL(blk_make_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
-	BUG_ON(blk_queued_rq(rq));
-
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
@@ -965,6 +963,8 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
 
+	BUG_ON(blk_queued_rq(rq));
+
 	elv_requeue_request(q, rq);
 }
 EXPORT_SYMBOL(blk_requeue_request);
@@ -2042,11 +2042,11 @@ static bool blk_update_bidi_request(struct request *rq, int error,
  */
 static void blk_finish_request(struct request *req, int error)
 {
-	BUG_ON(blk_queued_rq(req));
-
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
+	BUG_ON(blk_queued_rq(req));
+
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
 
-- 
cgit v0.10.2


From 3c4198e874cde694f5ea1463706873e7907bdb18 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Wed, 27 May 2009 14:50:02 +0200
Subject: block: fix no diskstat problem

The commit below in 2.6-block/for-2.6.31 causes no diskstat problem
because the blk_discard_rq() check was added with '&&'.
It should be 'blk_fs_request() || blk_discard_rq()'.
This patch does it and fixes the no diskstat problem.
Please review and apply.

------ /proc/diskstat without this patch -------------------------------------
   8       0 sda 0 0 0 0 0 0 0 0 0 0 0
------------------------------------------------------------------------------

----- /proc/diskstat with this patch applied ---------------------------------
   8       0 sda 4186 303 373621 61600 9578 3859 107468 169479 2 89755 231059
------------------------------------------------------------------------------

--------------------------------------------------------------------------
commit c69d48540c201394d08cb4d48b905e001313d9b8
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri Apr 24 08:12:19 2009 +0200

    block: include discard requests in IO accounting

    We currently don't do merging on discard requests, but we potentially
    could. If we do, then we need to include discard requests in the IO
    accounting, or merging would end up decrementing in_flight IO counters
    for an IO which never incremented them.

    So enable accounting for discard requests.

<snip>

 static inline int blk_do_io_stat(struct request *rq)
 {
-       return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq);
+       return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq) &&
+               blk_discard_rq(rq);
 }
--------------------------------------------------------------------------

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk.h b/block/blk.h
index c863ec2..3fae6ad 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -156,12 +156,12 @@ static inline int blk_cpu_to_group(int cpu)
  *
  *	a) it's attached to a gendisk, and
  *	b) the queue had IO stats enabled when this request was started, and
- *	c) it's a file system request
+ *	c) it's a file system request or a discard request
  */
 static inline int blk_do_io_stat(struct request *rq)
 {
-	return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq) &&
-		blk_discard_rq(rq);
+	return rq->rq_disk && blk_rq_io_stat(rq) &&
+	       (blk_fs_request(rq) || blk_discard_rq(rq));
 }
 
 #endif
-- 
cgit v0.10.2


From b7a16eac5e679fb5f531b9eeff7db7952303e77d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 13:35:35 +0200
Subject: perf_counter: tools: /usr/lib/debug%s.debug support

Some distros seem to store debuginfo in weird places.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 6265bed..a9ff49a 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -190,7 +190,8 @@ static inline int elf_sym__is_function(const GElf_Sym *sym)
 {
 	return elf_sym__type(sym) == STT_FUNC &&
 	       sym->st_name != 0 &&
-	       sym->st_shndx != SHN_UNDEF;
+	       sym->st_shndx != SHN_UNDEF &&
+	       sym->st_size != 0;
 }
 
 static inline const char *elf_sym__name(const GElf_Sym *sym,
@@ -222,11 +223,11 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 	return sec;
 }
 
-static int dso__load(struct dso *self)
+static int dso__load_sym(struct dso *self, int fd, char *name)
 {
 	Elf_Data *symstrs;
 	uint32_t nr_syms;
-	int fd, err = -1;
+	int err = -1;
 	uint32_t index;
 	GElf_Ehdr ehdr;
 	GElf_Shdr shdr;
@@ -234,16 +235,12 @@ static int dso__load(struct dso *self)
 	GElf_Sym sym;
 	Elf_Scn *sec;
 	Elf *elf;
-
-
-	fd = open(self->name, O_RDONLY);
-	if (fd == -1)
-		return -1;
+	int nr = 0;
 
 	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
 	if (elf == NULL) {
 		fprintf(stderr, "%s: cannot read %s ELF file.\n",
-			__func__, self->name);
+			__func__, name);
 		goto out_close;
 	}
 
@@ -292,16 +289,63 @@ static int dso__load(struct dso *self)
 			goto out_elf_end;
 
 		dso__insert_symbol(self, f);
+
+		nr++;
 	}
 
-	err = 0;
+	err = nr;
 out_elf_end:
 	elf_end(elf);
 out_close:
-	close(fd);
 	return err;
 }
 
+static int dso__load(struct dso *self)
+{
+	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
+	char *name = malloc(size);
+	int variant = 0;
+	int ret = -1;
+	int fd;
+
+	if (!name)
+		return -1;
+
+more:
+	do {
+		switch (variant) {
+		case 0: /* Fedora */
+			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
+			break;
+		case 1: /* Ubuntu */
+			snprintf(name, size, "/usr/lib/debug%s", self->name);
+			break;
+		case 2: /* Sane people */
+			snprintf(name, size, "%s", self->name);
+			break;
+
+		default:
+			goto out;
+		}
+		variant++;
+
+		fd = open(name, O_RDONLY);
+	} while (fd < 0);
+
+	ret = dso__load_sym(self, fd, name);
+	close(fd);
+
+	/*
+	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
+	 */
+	if (!ret)
+		goto more;
+
+out:
+	free(name);
+	return ret;
+}
+
 static size_t dso__fprintf(struct dso *self, FILE *fp)
 {
 	size_t ret = fprintf(fp, "dso: %s\n", self->name);
@@ -336,11 +380,23 @@ static struct dso *dsos__find(const char *name)
 static struct dso *dsos__findnew(const char *name)
 {
 	struct dso *dso = dsos__find(name);
+	int nr;
 
 	if (dso == NULL) {
 		dso = dso__new(name);
-		if (dso != NULL && dso__load(dso) < 0)
+		if (!dso)
+			goto out_delete_dso;
+
+		nr = dso__load(dso);
+		if (nr < 0) {
+			fprintf(stderr, "Failed to open: %s\n", name);
 			goto out_delete_dso;
+		}
+		if (!nr) {
+			fprintf(stderr,
+		"Failed to find debug symbols for: %s, maybe install a debug package?\n",
+					name);
+		}
 
 		dsos__add(dso);
 	}
@@ -547,9 +603,9 @@ symhist__fprintf(struct symhist *self, uint64_t total_samples, FILE *fp)
 	size_t ret;
 
 	if (total_samples)
-		ret = fprintf(fp, "%5.2f", (self->count * 100.0) / total_samples);
+		ret = fprintf(fp, "%5.2f%% ", (self->count * 100.0) / total_samples);
 	else
-		ret = fprintf(fp, "%12d", self->count);
+		ret = fprintf(fp, "%12d ", self->count);
 
 	ret += fprintf(fp, "%14s [%c] ",
 		       thread__name(self->thread, bf, sizeof(bf)),
@@ -922,10 +978,12 @@ more:
 	}
 	default: {
 broken_event:
-		fprintf(stderr, "%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
+		if (dump_trace)
+			fprintf(stderr, "%p [%p]: skipping unknown header type: %d\n",
+					(void *)(offset + head),
+					(void *)(long)(event->header.size),
+					event->header.type);
+
 		total_unknown++;
 
 		/*
-- 
cgit v0.10.2


From 346a850e3c3a20159cef2b79235e6d34aa497c65 Mon Sep 17 00:00:00 2001
From: Manuel Traut <manut@linutronix.de>
Date: Wed, 27 May 2009 06:20:05 -0700
Subject: Input: usb1400_ts - fix access to "device data" in resume function

platform_data != driver_data

driver data is actually the "correct" place of the struct however it is
not placed there due to the need of the ac97 struct. This is broken since
d9105c2b01 aka "[ARM] 5184/1: Split ucb1400_ts into core and touchscreen"

Signed-off-by: Manuel Traut <manut@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index f100c7f..6954f55 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -419,7 +419,7 @@ static int ucb1400_ts_remove(struct platform_device *dev)
 #ifdef CONFIG_PM
 static int ucb1400_ts_resume(struct platform_device *dev)
 {
-	struct ucb1400_ts *ucb = platform_get_drvdata(dev);
+	struct ucb1400_ts *ucb = dev->dev.platform_data;
 
 	if (ucb->ts_task) {
 		/*
-- 
cgit v0.10.2


From 348ca1029e8bae6e0c49097ad25439b17c5326f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 May 2009 15:46:50 +0100
Subject: FS-Cache: Fixup renamed filenames in comments in internal.h

Fix up renamed filenames in comments in fs/fscache/internal.h.

Originally, the files were all called fsc-xxx.c, but they got renamed to
just xxx.c.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index e0cbd16..1c34130 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -28,7 +28,7 @@
 #define FSCACHE_MAX_THREADS	32
 
 /*
- * fsc-cache.c
+ * cache.c
  */
 extern struct list_head fscache_cache_list;
 extern struct rw_semaphore fscache_addremove_sem;
@@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object(
 	struct fscache_cookie *);
 
 /*
- * fsc-cookie.c
+ * cookie.c
  */
 extern struct kmem_cache *fscache_cookie_jar;
 
@@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *);
 extern void __fscache_cookie_put(struct fscache_cookie *);
 
 /*
- * fsc-fsdef.c
+ * fsdef.c
  */
 extern struct fscache_cookie fscache_fsdef_index;
 extern struct fscache_cookie_def fscache_fsdef_netfs_def;
 
 /*
- * fsc-histogram.c
+ * histogram.c
  */
 #ifdef CONFIG_FSCACHE_HISTOGRAM
 extern atomic_t fscache_obj_instantiate_histogram[HZ];
@@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops;
 #endif
 
 /*
- * fsc-main.c
+ * main.c
  */
 extern unsigned fscache_defer_lookup;
 extern unsigned fscache_defer_create;
@@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *);
 extern int fscache_wait_bit_interruptible(void *);
 
 /*
- * fsc-object.c
+ * object.c
  */
 extern void fscache_withdrawing_object(struct fscache_cache *,
 				       struct fscache_object *);
 extern void fscache_enqueue_object(struct fscache_object *);
 
 /*
- * fsc-operation.c
+ * operation.c
  */
 extern int fscache_submit_exclusive_op(struct fscache_object *,
 				       struct fscache_operation *);
@@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *);
 extern void fscache_operation_gc(struct work_struct *);
 
 /*
- * fsc-proc.c
+ * proc.c
  */
 #ifdef CONFIG_PROC_FS
 extern int __init fscache_proc_init(void);
@@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void);
 #endif
 
 /*
- * fsc-stats.c
+ * stats.c
  */
 #ifdef CONFIG_FSCACHE_STATS
 extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS];
-- 
cgit v0.10.2


From 911e690e70540f009125bacd16c017eb1a7b1916 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 May 2009 15:46:55 +0100
Subject: CacheFiles: Fixup renamed filenames in comments in internal.h

Fix up renamed filenames in comments in fs/cachefiles/internal.h.

Originally, the files were all called cf-xxx.c, but they got renamed to
just xxx.c.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 19218e1..f7c255f 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache)
 }
 
 /*
- * cf-bind.c
+ * bind.c
  */
 extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args);
 extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache);
 
 /*
- * cf-daemon.c
+ * daemon.c
  */
 extern const struct file_operations cachefiles_daemon_fops;
 
@@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
 				unsigned fnr, unsigned bnr);
 
 /*
- * cf-interface.c
+ * interface.c
  */
 extern const struct fscache_cache_ops cachefiles_cache_ops;
 
 /*
- * cf-key.c
+ * key.c
  */
 extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
 
 /*
- * cf-namei.c
+ * namei.c
  */
 extern int cachefiles_delete_object(struct cachefiles_cache *cache,
 				    struct cachefiles_object *object);
@@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
 				   struct dentry *dir, char *filename);
 
 /*
- * cf-proc.c
+ * proc.c
  */
 #ifdef CONFIG_CACHEFILES_HISTOGRAM
 extern atomic_t cachefiles_lookup_histogram[HZ];
@@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif)
 #endif
 
 /*
- * cf-rdwr.c
+ * rdwr.c
  */
 extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *,
 					 struct page *, gfp_t);
@@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *);
 extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
 
 /*
- * cf-security.c
+ * security.c
  */
 extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
 extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
@@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
 }
 
 /*
- * cf-xattr.c
+ * xattr.c
  */
 extern int cachefiles_check_object_type(struct cachefiles_object *object);
 extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
-- 
cgit v0.10.2


From 450aaa2b2a1b006870ba68251fbb40b2387caade Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 20:20:23 +0200
Subject: perf_counter: tools: report: Add vmlinux support

Allow to use vmlinux instead of kallsyms.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182100.740018486@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index a9ff49a..3e87cbd 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -19,6 +19,7 @@
 #define SHOW_HV		4
 
 static char		const *input_name = "perf.data";
+static char		*vmlinux = NULL;
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
@@ -532,6 +533,39 @@ out_delete_dso:
 	return -1;
 }
 
+static int load_kernel(void)
+{
+	int fd, nr;
+
+	if (!vmlinux)
+		goto kallsyms;
+
+	fd = open(vmlinux, O_RDONLY);
+	if (fd < 0)
+		goto kallsyms;
+
+	kernel_dso = dso__new("[kernel]");
+	if (!kernel_dso)
+		goto fail_open;
+
+	nr = dso__load_sym(kernel_dso, fd, vmlinux);
+
+	if (nr <= 0)
+		goto fail_load;
+
+	dsos__add(kernel_dso);
+	close(fd);
+
+	return 0;
+
+fail_load:
+	dso__delete(kernel_dso);
+fail_open:
+	close(fd);
+kallsyms:
+	return load_kallsyms();
+}
+
 struct map {
 	struct list_head node;
 	uint64_t	 start;
@@ -850,7 +884,7 @@ static int __cmd_report(void)
 		exit(0);
 	}
 
-	if (load_kallsyms() < 0) {
+	if (load_kernel() < 0) {
 		perror("failed to open kallsyms");
 		return EXIT_FAILURE;
 	}
@@ -1039,6 +1073,7 @@ static const struct option options[] = {
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
+	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From e7fb08b1d06a6b37263c765205de5614a2273aeb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 20:20:24 +0200
Subject: perf_counter: tools: report: Rework histogram code

In preparation for configurable sorting, rework the histgram code a bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182100.796410098@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 3e87cbd..2762564 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -597,71 +597,9 @@ struct thread;
 
 static const char *thread__name(struct thread *self, char *bf, size_t size);
 
-struct symhist {
-	struct rb_node	 rb_node;
-	struct dso	 *dso;
-	struct symbol	 *sym;
-	struct thread	 *thread;
-	uint64_t	 ip;
-	uint32_t	 count;
-	char		 level;
-};
-
-static struct symhist *symhist__new(struct symbol *sym, uint64_t ip,
-				    struct thread *thread, struct dso *dso,
-				    char level)
-{
-	struct symhist *self = malloc(sizeof(*self));
-
-	if (self != NULL) {
-		self->sym    = sym;
-		self->thread = thread;
-		self->ip     = ip;
-		self->dso    = dso;
-		self->level  = level;
-		self->count  = 1;
-	}
-
-	return self;
-}
-
-static void symhist__inc(struct symhist *self)
-{
-	++self->count;
-}
-
-static size_t
-symhist__fprintf(struct symhist *self, uint64_t total_samples, FILE *fp)
-{
-	char bf[32];
-	size_t ret;
-
-	if (total_samples)
-		ret = fprintf(fp, "%5.2f%% ", (self->count * 100.0) / total_samples);
-	else
-		ret = fprintf(fp, "%12d ", self->count);
-
-	ret += fprintf(fp, "%14s [%c] ",
-		       thread__name(self->thread, bf, sizeof(bf)),
-		       self->level);
-
-	if (verbose)
-		ret += fprintf(fp, "%#018llx ", (unsigned long long)self->ip);
-
-	if (self->level != '.')
-		ret += fprintf(fp, "%s\n",
-			       self->sym ? self->sym->name : "<unknown>");
-	else
-		ret += fprintf(fp, "%s: %s\n",
-			       self->dso ? self->dso->name : "<unknown>",
-			       self->sym ? self->sym->name : "<unknown>");
-	return ret;
-}
-
 struct thread {
 	struct rb_node	 rb_node;
 	struct list_head maps;
-	struct rb_root	 symhists;
 	pid_t		 pid;
 	char		 *comm;
 };
@@ -683,67 +621,17 @@ static struct thread *thread__new(pid_t pid)
 		self->pid = pid;
 		self->comm = NULL;
 		INIT_LIST_HEAD(&self->maps);
-		self->symhists = RB_ROOT;
 	}
 
 	return self;
 }
 
-static int thread__symbol_incnew(struct thread *self, struct symbol *sym,
-				 uint64_t ip, struct dso *dso, char level)
-{
-	struct rb_node **p = &self->symhists.rb_node;
-	struct rb_node *parent = NULL;
-	struct symhist *sh;
-
-	while (*p != NULL) {
-		uint64_t start;
-
-		parent = *p;
-		sh = rb_entry(parent, struct symhist, rb_node);
-
-		if (sh->sym == sym || ip == sh->ip) {
-			symhist__inc(sh);
-			return 0;
-		}
-
-		/* Handle unresolved symbols too */
-		start = !sh->sym ? sh->ip : sh->sym->start;
-
-		if (ip < start)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	sh = symhist__new(sym, ip, self, dso, level);
-	if (sh == NULL)
-		return -ENOMEM;
-	rb_link_node(&sh->rb_node, parent, p);
-	rb_insert_color(&sh->rb_node, &self->symhists);
-	return 0;
-}
-
 static int thread__set_comm(struct thread *self, const char *comm)
 {
 	self->comm = strdup(comm);
 	return self->comm ? 0 : -ENOMEM;
 }
 
-static size_t thread__fprintf(struct thread *self, FILE *fp)
-{
-	int ret = fprintf(fp, "thread: %d %s\n", self->pid, self->comm);
-	struct rb_node *nd;
-
-	for (nd = rb_first(&self->symhists); nd; nd = rb_next(nd)) {
-		struct symhist *pos = rb_entry(nd, struct symhist, rb_node);
-
-		ret += symhist__fprintf(pos, 0, fp);
-	}
-
-	return ret;
-}
-
 static struct rb_root threads;
 
 static struct thread *threads__findnew(pid_t pid)
@@ -792,70 +680,172 @@ static struct map *thread__find_map(struct thread *self, uint64_t ip)
 	return NULL;
 }
 
-static void threads__fprintf(FILE *fp)
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+static struct rb_root hist;
+
+struct hist_entry {
+	struct rb_node	 rb_node;
+
+	struct thread	 *thread;
+	struct map	 *map;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	uint64_t	 ip;
+	char		 level;
+
+	uint32_t	 count;
+};
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t ip_l, ip_r;
+	int cmp = right->thread->pid - left->thread->pid;
+
+	if (cmp)
+		return cmp;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+static int
+hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+		struct symbol *sym, uint64_t ip, char level)
 {
-	struct rb_node *nd;
-	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
-		struct thread *pos = rb_entry(nd, struct thread, rb_node);
-		thread__fprintf(pos, fp);
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= thread,
+		.map	= map,
+		.dso	= dso,
+		.sym	= sym,
+		.ip	= ip,
+		.level	= level,
+		.count	= 1,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			he->count++;
+			return 0;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
 	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return -ENOMEM;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+
+	return 0;
 }
 
-static struct rb_root global_symhists;
+static size_t
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
+{
+	char bf[32];
+	size_t ret;
+
+	if (total_samples) {
+		ret = fprintf(fp, "%5.2f%% ",
+				(self->count * 100.0) / total_samples);
+	} else
+		ret = fprintf(fp, "%12d ", self->count);
 
-static void threads__insert_symhist(struct symhist *sh)
+	ret += fprintf(fp, "%14s [%c] ",
+		       thread__name(self->thread, bf, sizeof(bf)),
+		       self->level);
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx ", (unsigned long long)self->ip);
+
+	if (self->level != '.')
+		ret += fprintf(fp, "%s\n",
+			       self->sym ? self->sym->name : "<unknown>");
+	else
+		ret += fprintf(fp, "%s: %s\n",
+			       self->dso ? self->dso->name : "<unknown>",
+			       self->sym ? self->sym->name : "<unknown>");
+	return ret;
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+static struct rb_root output_hists;
+
+static void output__insert_entry(struct hist_entry *he)
 {
-	struct rb_node **p = &global_symhists.rb_node;
+	struct rb_node **p = &output_hists.rb_node;
 	struct rb_node *parent = NULL;
-	struct symhist *iter;
+	struct hist_entry *iter;
 
 	while (*p != NULL) {
 		parent = *p;
-		iter = rb_entry(parent, struct symhist, rb_node);
+		iter = rb_entry(parent, struct hist_entry, rb_node);
 
-		/* Reverse order */
-		if (sh->count > iter->count)
+		if (he->count > iter->count)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
 	}
 
-	rb_link_node(&sh->rb_node, parent, p);
-	rb_insert_color(&sh->rb_node, &global_symhists);
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
 }
 
-static void threads__sort_symhists(void)
+static void output__resort(void)
 {
-	struct rb_node *nd;
-
-	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
-		struct thread *thread = rb_entry(nd, struct thread, rb_node);
-		struct rb_node *next = rb_first(&thread->symhists);
+	struct rb_node *next = rb_first(&hist);
+	struct hist_entry *n;
 
-		while (next) {
-			struct symhist *n = rb_entry(next, struct symhist,
-						     rb_node);
-			next = rb_next(&n->rb_node);
-			rb_erase(&n->rb_node, &thread->symhists);
-			threads__insert_symhist(n);
-		}
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
 
+		rb_erase(&n->rb_node, &hist);
+		output__insert_entry(n);
 	}
 }
 
-static size_t threads__symhists_fprintf(uint64_t total_samples, FILE *fp)
+static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 {
+	struct hist_entry *pos;
 	struct rb_node *nd;
 	size_t ret = 0;
 
-	for (nd = rb_first(&global_symhists); nd; nd = rb_next(nd)) {
-		struct symhist *pos = rb_entry(nd, struct symhist, rb_node);
-		ret += symhist__fprintf(pos, total_samples, fp);
+	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node);
+		ret += hist_entry__fprintf(fp, pos, total_samples);
 	}
 
 	return ret;
 }
 
+
 static int __cmd_report(void)
 {
 	unsigned long offset = 0;
@@ -926,6 +916,7 @@ more:
 		struct dso *dso = NULL;
 		struct thread *thread = threads__findnew(event->ip.pid);
 		uint64_t ip = event->ip.ip;
+		struct map *map = NULL;
 
 		if (dump_trace) {
 			fprintf(stderr, "%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
@@ -945,9 +936,10 @@ more:
 		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
 			show = SHOW_KERNEL;
 			level = 'k';
+
 			dso = kernel_dso;
+
 		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
-			struct map *map;
 
 			show = SHOW_USER;
 			level = '.';
@@ -957,6 +949,7 @@ more:
 				dso = map->dso;
 				ip -= map->start + map->pgoff;
 			}
+
 		} else {
 			show = SHOW_HV;
 			level = 'H';
@@ -965,8 +958,9 @@ more:
 		if (show & show_mask) {
 			struct symbol *sym = dso__find_symbol(dso, ip);
 
-			if (thread__symbol_incnew(thread, sym, ip, dso, level)) {
-				fprintf(stderr, "problem incrementing symbol count, bailing out\n");
+			if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+				fprintf(stderr,
+		"problem incrementing symbol count, bailing out\n");
 				goto done;
 			}
 		}
@@ -1050,13 +1044,11 @@ done:
 		return 0;
 	}
 
-	if (verbose >= 2) {
+	if (verbose >= 2)
 		dsos__fprintf(stdout);
-		threads__fprintf(stdout);
-	}
 
-	threads__sort_symhists();
-	threads__symhists_fprintf(total, stdout);
+	output__resort();
+	output__fprintf(stdout, total);
 
 	return rc;
 }
-- 
cgit v0.10.2


From 1aa167382323eeeeb38368cab85cf17979793cbe Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 20:20:25 +0200
Subject: perf_counter: tools: report: Dynamic sort/print bits

Make the sorting and printing dynamic.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182100.921953817@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 2762564..856186f 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -699,14 +699,41 @@ struct hist_entry {
 	uint32_t	 count;
 };
 
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *);
+};
+
 static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	uint64_t ip_l, ip_r;
-	int cmp = right->thread->pid - left->thread->pid;
+	return right->thread->pid - left->thread->pid;
+}
+
+static size_t
+sort__thread_print(FILE *fp, struct hist_entry *self)
+{
+	char bf[32];
+
+	return fprintf(fp, "%14s ",
+			thread__name(self->thread, bf, sizeof(bf)));
+}
 
-	if (cmp)
-		return cmp;
+static struct sort_entry sort_thread = {
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+};
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t ip_l, ip_r;
 
 	if (left->sym == right->sym)
 		return 0;
@@ -717,6 +744,79 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(ip_r - ip_l);
 }
 
+static size_t
+sort__sym_print(FILE *fp, struct hist_entry *self)
+{
+	size_t ret = 0;
+
+	ret += fprintf(fp, "[%c] ", self->level);
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx ", (unsigned long long)self->ip);
+
+	if (self->level != '.')
+		ret += fprintf(fp, "%s ",
+			       self->sym ? self->sym->name : "<unknown>");
+	else
+		ret += fprintf(fp, "%s: %s ",
+			       self->dso ? self->dso->name : "<unknown>",
+			       self->sym ? self->sym->name : "<unknown>");
+
+	return ret;
+}
+
+static struct sort_entry sort_sym = {
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+static LIST_HEAD(hist_entry__sort_list);
+
+static void setup_sorting(void)
+{
+	list_add_tail(&sort_thread.list, &hist_entry__sort_list);
+	list_add_tail(&sort_sym.list, &hist_entry__sort_list);
+}
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static size_t
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
+{
+	struct sort_entry *se;
+	size_t ret;
+
+	if (total_samples) {
+		ret = fprintf(fp, "%5.2f%% ",
+				(self->count * 100.0) / total_samples);
+	} else
+		ret = fprintf(fp, "%12d ", self->count);
+
+	list_for_each_entry(se, &hist_entry__sort_list, list)
+		ret += se->print(fp, self);
+
+	ret += fprintf(fp, "\n");
+
+	return ret;
+}
+
+/*
+ * collect histogram counts
+ */
+
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		struct symbol *sym, uint64_t ip, char level)
@@ -762,35 +862,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 	return 0;
 }
 
-static size_t
-hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
-{
-	char bf[32];
-	size_t ret;
-
-	if (total_samples) {
-		ret = fprintf(fp, "%5.2f%% ",
-				(self->count * 100.0) / total_samples);
-	} else
-		ret = fprintf(fp, "%12d ", self->count);
-
-	ret += fprintf(fp, "%14s [%c] ",
-		       thread__name(self->thread, bf, sizeof(bf)),
-		       self->level);
-
-	if (verbose)
-		ret += fprintf(fp, "%#018llx ", (unsigned long long)self->ip);
-
-	if (self->level != '.')
-		ret += fprintf(fp, "%s\n",
-			       self->sym ? self->sym->name : "<unknown>");
-	else
-		ret += fprintf(fp, "%s: %s\n",
-			       self->dso ? self->dso->name : "<unknown>",
-			       self->sym ? self->sym->name : "<unknown>");
-	return ret;
-}
-
 /*
  * reverse the map, sort on count.
  */
@@ -1077,6 +1148,8 @@ int cmd_report(int argc, const char **argv, const char *prefix)
 
 	parse_options(argc, argv, options, report_usage, 0);
 
+	setup_sorting();
+
 	setup_pager();
 
 	return __cmd_report();
-- 
cgit v0.10.2


From 37f440cba299bb479cf45d12eef923f0979dbcaf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 20:20:26 +0200
Subject: pref_counter: tools: report: Add --sort option

option parsing for dynamic sorting.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182101.041817692@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 856186f..982abce 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -20,6 +20,7 @@
 
 static char		const *input_name = "perf.data";
 static char		*vmlinux = NULL;
+static char		*sort_order = "pid,symbol";
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
@@ -770,12 +771,49 @@ static struct sort_entry sort_sym = {
 	.print	= sort__sym_print,
 };
 
+struct sort_dimension {
+	char *name;
+	struct sort_entry *entry;
+	int taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+};
+
 static LIST_HEAD(hist_entry__sort_list);
 
+static int sort_dimension__add(char *tok)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strcmp(tok, sd->name))
+			continue;
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+		return 0;
+	}
+
+	return -ESRCH;
+}
+
 static void setup_sorting(void)
 {
-	list_add_tail(&sort_thread.list, &hist_entry__sort_list);
-	list_add_tail(&sort_sym.list, &hist_entry__sort_list);
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp))
+		sort_dimension__add(tok);
+
+	free(str);
 }
 
 static int64_t
@@ -1137,6 +1175,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+	OPT_STRING('s', "sort", &sort_order, "foo", "bar"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 992444b173f35997f96f5cbb214f0de81d1b97ff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 20:20:27 +0200
Subject: perf_counter: tools: report: Add comm sorting

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182101.129302022@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 982abce..a634022 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -732,6 +732,35 @@ static struct sort_entry sort_thread = {
 };
 
 static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r) {
+		if (!comm_l && !comm_r)
+			return 0;
+		else if (!comm_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(comm_l, comm_r);
+}
+
+static size_t
+sort__comm_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%20s ", self->thread->comm ?: "<unknown>");
+}
+
+static struct sort_entry sort_comm = {
+	.cmp	= sort__comm_cmp,
+	.print	= sort__comm_print,
+};
+
+static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	uint64_t ip_l, ip_r;
@@ -779,6 +808,7 @@ struct sort_dimension {
 
 static struct sort_dimension sort_dimensions[] = {
 	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
 	{ .name = "symbol",	.entry = &sort_sym,	},
 };
 
-- 
cgit v0.10.2


From 55e5ec41a9de46b6ca06031f4fbdfdfc76dc24dc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 May 2009 20:20:28 +0200
Subject: pref_counter: tools: report: Add dso sorting

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182101.229504802@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index a634022..30e12c7 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -761,6 +761,35 @@ static struct sort_entry sort_comm = {
 };
 
 static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->dso;
+	struct dso *dso_r = right->dso;
+
+	if (!dso_l || !dso_r) {
+		if (!dso_l && !dso_r)
+			return 0;
+		else if (!dso_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(dso_l->name, dso_r->name);
+}
+
+static size_t
+sort__dso_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%64s ", self->dso ? self->dso->name : "<unknown>");
+}
+
+static struct sort_entry sort_dso = {
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+};
+
+static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	uint64_t ip_l, ip_r;
@@ -809,6 +838,7 @@ struct sort_dimension {
 static struct sort_dimension sort_dimensions[] = {
 	{ .name = "pid",	.entry = &sort_thread,	},
 	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
 	{ .name = "symbol",	.entry = &sort_sym,	},
 };
 
-- 
cgit v0.10.2


From 2d65537ee7cd4a0818ea80a97ab7932368fff5cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 21:36:22 +0200
Subject: pref_counter: tools: report: Add header printout & prettify

Old default output:

 3.12%    perf-report [.] ./perf-report:       dsos__find
 2.44%    perf-report [k] kernel:              kallsyms_expand_symbol
 2.28%          :4483 [.] <unknown>:           <unknown>
 2.05%          :4174 [k] kernel:              _spin_lock_irqsave
 2.01%    perf-report [k] kernel:              vsnprintf
 1.92%    perf-report [k] kernel:              format_decode
 1.92%          :4438 [k] kernel:              _spin_lock

New default output:

 #
 # Overhead          Command       File: Symbol
 # ........          .......       ............
 #
      6.54%             perf  [k]  kernel: kallsyms_expand_symbol
      6.26%             perf  [.]  /home/mingo/tip/Documentation/perf_counter/perf: dso__insert_symbol
      4.76%             perf  [.]  /home/mingo/tip/Documentation/perf_counter/perf: hex2long
      4.55%             perf  [k]  kernel: number
      4.48%             perf  [k]  kernel: format_decode
      4.09%             perf  [k]  kernel: vsnprintf

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20090527182101.229504802@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 30e12c7..6df95c2 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -708,6 +708,7 @@ struct sort_entry {
 	struct list_head list;
 
 	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	size_t (*print_header)(FILE *fp);
 	size_t	(*print)(FILE *fp, struct hist_entry *);
 };
 
@@ -722,7 +723,7 @@ sort__thread_print(FILE *fp, struct hist_entry *self)
 {
 	char bf[32];
 
-	return fprintf(fp, "%14s ",
+	return fprintf(fp, " %16s",
 			thread__name(self->thread, bf, sizeof(bf)));
 }
 
@@ -752,7 +753,7 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__comm_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "%20s ", self->thread->comm ?: "<unknown>");
+	return fprintf(fp, " %16s", self->thread->comm ?: "<unknown>");
 }
 
 static struct sort_entry sort_comm = {
@@ -781,7 +782,7 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__dso_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "%64s ", self->dso ? self->dso->name : "<unknown>");
+	return fprintf(fp, " %64s", self->dso ? self->dso->name : "<unknown>");
 }
 
 static struct sort_entry sort_dso = {
@@ -803,21 +804,33 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(ip_r - ip_l);
 }
 
+static size_t sort__sym_print_header(FILE *fp)
+{
+	size_t ret = 0;
+
+	ret += fprintf(fp, "#\n");
+	ret += fprintf(fp, "# Overhead          Command       File: Symbol\n");
+	ret += fprintf(fp, "# ........          .......       ............\n");
+	ret += fprintf(fp, "#\n");
+
+	return ret;
+}
+
 static size_t
 sort__sym_print(FILE *fp, struct hist_entry *self)
 {
 	size_t ret = 0;
 
-	ret += fprintf(fp, "[%c] ", self->level);
+	ret += fprintf(fp, "  [%c] ", self->level);
 
 	if (verbose)
-		ret += fprintf(fp, "%#018llx ", (unsigned long long)self->ip);
+		ret += fprintf(fp, " %#018llx", (unsigned long long)self->ip);
 
 	if (self->level != '.')
-		ret += fprintf(fp, "%s ",
+		ret += fprintf(fp, " kernel: %s",
 			       self->sym ? self->sym->name : "<unknown>");
 	else
-		ret += fprintf(fp, "%s: %s ",
+		ret += fprintf(fp, " %s: %s",
 			       self->dso ? self->dso->name : "<unknown>",
 			       self->sym ? self->sym->name : "<unknown>");
 
@@ -825,8 +838,9 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_sym = {
-	.cmp	= sort__sym_cmp,
-	.print	= sort__sym_print,
+	.cmp		= sort__sym_cmp,
+	.print_header	= sort__sym_print_header,
+	.print		= sort__sym_print,
 };
 
 struct sort_dimension {
@@ -898,7 +912,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 	size_t ret;
 
 	if (total_samples) {
-		ret = fprintf(fp, "%5.2f%% ",
+		ret = fprintf(fp, "    %5.2f%%",
 				(self->count * 100.0) / total_samples);
 	} else
 		ret = fprintf(fp, "%12d ", self->count);
@@ -1003,9 +1017,15 @@ static void output__resort(void)
 static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 {
 	struct hist_entry *pos;
+	struct sort_entry *se;
 	struct rb_node *nd;
 	size_t ret = 0;
 
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		if (se->print_header)
+			ret += se->print_header(fp);
+	}
+
 	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
 		pos = rb_entry(nd, struct hist_entry, rb_node);
 		ret += hist_entry__fprintf(fp, pos, total_samples);
-- 
cgit v0.10.2


From 07f4f3e8a24138ca2f3650723d670df25687cd05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= <krh@redhat.com>
Date: Wed, 27 May 2009 14:37:28 -0400
Subject: i915: Set object to gtt domain when faulting it back in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a GEM object is evicted from the GTT we set it to the CPU domain,
as it might get swapped in and out or ever mmapped regularly.  If the
object is mmapped through the GTT it can still get evicted in this way
by other objects requiring GTT space.  When the GTT mapping is touched
again we fault it back into the GTT, but fail to set it back to the
GTT domain.  This means we fail to flush any cached CPU writes to the
pages backing the object which will then happen "eventually", typically
after we write to the page through the uncached GTT mapping.

[anholt: Note that userland does do a set_domain(GTT, GTT) when starting
to access the GTT mapping.  That covers getting the existing mapping of the
object synchronized if it's bound to the GTT.  But set_domain(GTT, GTT)
doesn't do anything if the object is currently unbound.  This fix covers the
transition to being bound for GTT mapping.]

Fixes glyph and other pixmap corruption during swapping.  fd.o bug #21790

Signed-off-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e242186..670d128 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1145,6 +1145,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			mutex_unlock(&dev->struct_mutex);
 			return VM_FAULT_SIGBUS;
 		}
+
+		ret = i915_gem_object_set_to_gtt_domain(obj, write);
+		if (ret) {
+			mutex_unlock(&dev->struct_mutex);
+			return VM_FAULT_SIGBUS;
+		}
+
 		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
 	}
 
-- 
cgit v0.10.2


From ea8b27ad0cc2573776c6cd87617a37aaf603b8bd Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Wed, 27 May 2009 01:06:19 -0400
Subject: ASoC: Modify mpc5200 AC97 driver to use V9 of spin_event_timeout()

The function signature for spin_event_timeout() has changed in version V9.
Adjust the mpc5200 AC97 driver to use the new function.

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/fsl/mpc5200_psc_ac97.c b/sound/soc/fsl/mpc5200_psc_ac97.c
index 9f2df15..794a247 100644
--- a/sound/soc/fsl/mpc5200_psc_ac97.c
+++ b/sound/soc/fsl/mpc5200_psc_ac97.c
@@ -31,13 +31,13 @@ static struct psc_dma *psc_dma;
 
 static unsigned short psc_ac97_read(struct snd_ac97 *ac97, unsigned short reg)
 {
-	int rc;
+	int status;
 	unsigned int val;
 
 	/* Wait for command send status zero = ready */
-	spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
-				MPC52xx_PSC_SR_CMDSEND), 100, 0, rc);
-	if (rc == 0) {
+	status = spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_CMDSEND), 100, 0);
+	if (status == 0) {
 		pr_err("timeout on ac97 bus (rdy)\n");
 		return -ENODEV;
 	}
@@ -45,9 +45,9 @@ static unsigned short psc_ac97_read(struct snd_ac97 *ac97, unsigned short reg)
 	out_be32(&psc_dma->psc_regs->ac97_cmd, (1<<31) | ((reg & 0x7f) << 24));
 
 	/* Wait for the answer */
-	spin_event_timeout((in_be16(&psc_dma->psc_regs->sr_csr.status) &
-				MPC52xx_PSC_SR_DATA_VAL), 100, 0, rc);
-	if (rc == 0) {
+	status = spin_event_timeout((in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_DATA_VAL), 100, 0);
+	if (status == 0) {
 		pr_err("timeout on ac97 read (val) %x\n",
 				in_be16(&psc_dma->psc_regs->sr_csr.status));
 		return -ENODEV;
@@ -66,12 +66,12 @@ static unsigned short psc_ac97_read(struct snd_ac97 *ac97, unsigned short reg)
 static void psc_ac97_write(struct snd_ac97 *ac97,
 				unsigned short reg, unsigned short val)
 {
-	int rc;
+	int status;
 
 	/* Wait for command status zero = ready */
-	spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
-				MPC52xx_PSC_SR_CMDSEND), 100, 0, rc);
-	if (rc == 0) {
+	status = spin_event_timeout(!(in_be16(&psc_dma->psc_regs->sr_csr.status) &
+				MPC52xx_PSC_SR_CMDSEND), 100, 0);
+	if (status == 0) {
 		pr_err("timeout on ac97 bus (write)\n");
 		return;
 	}
@@ -82,24 +82,22 @@ static void psc_ac97_write(struct snd_ac97 *ac97,
 
 static void psc_ac97_warm_reset(struct snd_ac97 *ac97)
 {
-	int rc;
 	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
 
 	out_be32(&regs->sicr, psc_dma->sicr | MPC52xx_PSC_SICR_AWR);
-	spin_event_timeout(0, 3, 0, rc);
+	udelay(3);
 	out_be32(&regs->sicr, psc_dma->sicr);
 }
 
 static void psc_ac97_cold_reset(struct snd_ac97 *ac97)
 {
-	int rc;
 	struct mpc52xx_psc __iomem *regs = psc_dma->psc_regs;
 
 	/* Do a cold reset */
 	out_8(&regs->op1, MPC52xx_PSC_OP_RES);
-	spin_event_timeout(0, 10, 0, rc);
+	udelay(10);
 	out_8(&regs->op0, MPC52xx_PSC_OP_RES);
-	spin_event_timeout(0, 50, 0, rc);
+	udelay(50);
 	psc_ac97_warm_reset(ac97);
 }
 
-- 
cgit v0.10.2


From 55717314c4e3a5180a54228a2f97e50f3496de4c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 May 2009 22:13:17 +0200
Subject: pref_counter: tools: report: Robustify in case of weird events

This error condition:

  aldebaran:~/linux/linux/Documentation/perf_counter> perf report
  dso__load_sym: cannot get elf header.
  failed to open: /etc/ld.so.cache
  problem processing PERF_EVENT_MMAP, bailing out

caused the profile to be very short - as the error was at the beginning
of the file and we bailed out completely.

Be more permissive and consider the event broken instead.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 6df95c2..5993c12 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1117,9 +1117,9 @@ more:
 		}
 
 		if (thread == NULL) {
-			fprintf(stderr, "problem processing %d event, bailing out\n",
+			fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
-			goto done;
+			goto broken_event;
 		}
 
 		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
@@ -1149,8 +1149,8 @@ more:
 
 			if (hist_entry__add(thread, map, dso, sym, ip, level)) {
 				fprintf(stderr,
-		"problem incrementing symbol count, bailing out\n");
-				goto done;
+		"problem incrementing symbol count, skipping event\n");
+				goto broken_event;
 			}
 		}
 		total++;
@@ -1169,8 +1169,8 @@ more:
 				event->mmap.filename);
 		}
 		if (thread == NULL || map == NULL) {
-			fprintf(stderr, "problem processing PERF_EVENT_MMAP, bailing out\n");
-			goto done;
+			fprintf(stderr, "problem processing PERF_EVENT_MMAP, skipping event.\n");
+			goto broken_event;
 		}
 		thread__insert_map(thread, map);
 		total_mmap++;
@@ -1187,8 +1187,8 @@ more:
 		}
 		if (thread == NULL ||
 		    thread__set_comm(thread, event->comm.comm)) {
-			fprintf(stderr, "problem processing PERF_EVENT_COMM, bailing out\n");
-			goto done;
+			fprintf(stderr, "problem processing PERF_EVENT_COMM, skipping event.\n");
+			goto broken_event;
 		}
 		total_comm++;
 		break;
@@ -1221,7 +1221,6 @@ broken_event:
 		goto more;
 
 	rc = EXIT_SUCCESS;
-done:
 	close(input);
 
 	if (dump_trace) {
-- 
cgit v0.10.2


From b787f2e2a37a373a045f4d9b9bed941ccff01663 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Wed, 13 May 2009 16:25:57 -0500
Subject: fsldma: Fix compile warnings

We we build with dma_addr_t as a 64-bit quantity we get:

drivers/dma/fsldma.c: In function 'fsl_chan_xfer_ld_queue':
drivers/dma/fsldma.c:625: warning: cast to pointer from integer of different size
drivers/dma/fsldma.c: In function 'fsl_dma_chan_do_interrupt':
drivers/dma/fsldma.c:737: warning: cast to pointer from integer of different size
drivers/dma/fsldma.c:737: warning: cast to pointer from integer of different size
drivers/dma/fsldma.c: In function 'of_fsl_dma_probe':
drivers/dma/fsldma.c:927: warning: cast to pointer from integer of different

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 1578310..f18d1bd 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -641,8 +641,8 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
 	if (ld_node != &fsl_chan->ld_queue) {
 		/* Get the ld start address from ld_queue */
 		next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
-		dev_dbg(fsl_chan->dev, "xfer LDs staring from %p\n",
-				(void *)next_dest_addr);
+		dev_dbg(fsl_chan->dev, "xfer LDs staring from 0x%llx\n",
+				(unsigned long long)next_dest_addr);
 		set_cdar(fsl_chan, next_dest_addr);
 		dma_start(fsl_chan);
 	} else {
@@ -756,8 +756,9 @@ static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data)
 	 */
 	if (stat & FSL_DMA_SR_EOSI) {
 		dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n");
-		dev_dbg(fsl_chan->dev, "event: clndar %p, nlndar %p\n",
-			(void *)get_cdar(fsl_chan), (void *)get_ndar(fsl_chan));
+		dev_dbg(fsl_chan->dev, "event: clndar 0x%llx, nlndar 0x%llx\n",
+			(unsigned long long)get_cdar(fsl_chan),
+			(unsigned long long)get_ndar(fsl_chan));
 		stat &= ~FSL_DMA_SR_EOSI;
 		update_cookie = 1;
 	}
@@ -947,8 +948,8 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 	}
 
 	dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
-			"controller at %p...\n",
-			match->compatible, (void *)fdev->reg.start);
+			"controller at 0x%llx...\n",
+			match->compatible, (unsigned long long)fdev->reg.start);
 	fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end
 						- fdev->reg.start + 1);
 
-- 
cgit v0.10.2


From c1f67a88bf62fac0f4151c007b361199c2cd1988 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Wed, 27 May 2009 14:36:16 -0700
Subject: IB/mthca: Add module parameter for number of MTTs per segment

The current MTT allocator uses kmalloc() to allocate a buffer for its
buddy allocator, and thus is limited in the amount of MTT segments
that it can control.  As a result, the size of memory that can be
registered is limited too.  This patch uses a module parameter to
control the number of MTT entries that each segment represents,
allowing more memory to be registered with the same number of
segments.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 6d55f9d..8c2ed99 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1059,7 +1059,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
 	if (mthca_is_memfree(dev))
 		dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
-					       MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
+					       dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size;
 	else
 		dev_lim->reserved_mtts = 1 << (field >> 4);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 2525901..9ef611f 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -159,6 +159,7 @@ struct mthca_limits {
 	int      reserved_eqs;
 	int      num_mpts;
 	int      num_mtt_segs;
+	int	 mtt_seg_size;
 	int      fmr_reserved_mtts;
 	int      reserved_mtts;
 	int      reserved_mrws;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 1d83cf7..13da9f1 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -125,6 +125,10 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444);
 MODULE_PARM_DESC(fmr_reserved_mtts,
 		 "number of memory translation table segments reserved for FMR");
 
+static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)");
+
 static char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -162,6 +166,7 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
 	int err;
 	u8 status;
 
+	mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8;
 	err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
 	if (err) {
 		mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
@@ -460,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev,
 	}
 
 	/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
-	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
-					   dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size,
+					   dma_get_cache_alignment()) / mdev->limits.mtt_seg_size;
 
 	mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
-							 MTHCA_MTT_SEG_SIZE,
+							 mdev->limits.mtt_seg_size,
 							 mdev->limits.num_mtt_segs,
 							 mdev->limits.reserved_mtts,
 							 1, 0);
@@ -1315,6 +1320,12 @@ static void __init mthca_validate_profile(void)
 		printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n",
 		       hca_profile.fmr_reserved_mtts);
 	}
+
+	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
+		printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n",
+		       log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8));
+		log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
+	}
 }
 
 static int __init mthca_init(void)
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 882e6b7..d606edf 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -220,7 +220,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
 
 	mtt->buddy = buddy;
 	mtt->order = 0;
-	for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+	for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
 		++mtt->order;
 
 	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
@@ -267,7 +267,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 
 	while (list_len > 0) {
 		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
-					   mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+					   mtt->first_seg * dev->limits.mtt_seg_size +
 					   start_index * 8);
 		mtt_entry[1] = 0;
 		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
@@ -326,7 +326,7 @@ static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
 	u64 __iomem *mtts;
 	int i;
 
-	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
 		start_index * sizeof (u64);
 	for (i = 0; i < list_len; ++i)
 		mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
@@ -345,10 +345,10 @@ static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
 	/* For Arbel, all MTTs must fit in the same page. */
 	BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
 	/* Require full segments */
-	BUG_ON(s % MTHCA_MTT_SEG_SIZE);
+	BUG_ON(s % dev->limits.mtt_seg_size);
 
 	mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
-				s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+				s / dev->limits.mtt_seg_size, &dma_handle);
 
 	BUG_ON(!mtts);
 
@@ -479,7 +479,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 	if (mr->mtt)
 		mpt_entry->mtt_seg =
 			cpu_to_be64(dev->mr_table.mtt_base +
-				    mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
+				    mr->mtt->first_seg * dev->limits.mtt_seg_size);
 
 	if (0) {
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -626,7 +626,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		goto err_out_table;
 	}
 
-	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+	mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
 
 	if (mthca_is_memfree(dev)) {
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
@@ -908,7 +908,7 @@ int mthca_init_mr_table(struct mthca_dev *dev)
 			 dev->mr_table.mtt_base);
 
 		dev->mr_table.tavor_fmr.mtt_base =
-			ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
+			ioremap(addr, mtts * dev->limits.mtt_seg_size);
 		if (!dev->mr_table.tavor_fmr.mtt_base) {
 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
 			err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index d168c25..8edb28a 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -94,7 +94,7 @@ s64 mthca_make_profile(struct mthca_dev *dev,
 	profile[MTHCA_RES_RDB].size  = MTHCA_RDB_ENTRY_SIZE;
 	profile[MTHCA_RES_MCG].size  = MTHCA_MGM_ENTRY_SIZE;
 	profile[MTHCA_RES_MPT].size  = dev_lim->mpt_entry_sz;
-	profile[MTHCA_RES_MTT].size  = MTHCA_MTT_SEG_SIZE;
+	profile[MTHCA_RES_MTT].size  = dev->limits.mtt_seg_size;
 	profile[MTHCA_RES_UAR].size  = dev_lim->uar_scratch_entry_sz;
 	profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
 	profile[MTHCA_RES_UARC].size = request->uarc_size;
@@ -232,7 +232,7 @@ s64 mthca_make_profile(struct mthca_dev *dev,
 			dev->limits.num_mtt_segs = profile[i].num;
 			dev->mr_table.mtt_base   = profile[i].start;
 			init_hca->mtt_base       = profile[i].start;
-			init_hca->mtt_seg_sz     = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+			init_hca->mtt_seg_sz     = ffs(dev->limits.mtt_seg_size) - 7;
 			break;
 		case MTHCA_RES_UAR:
 			dev->limits.num_uars       = profile[i].num;
-- 
cgit v0.10.2


From ab6bf42e2339580b5d87746d0ff4da4b1578b03e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Wed, 27 May 2009 14:38:34 -0700
Subject: mlx4_core: Add module parameter for number of MTTs per segment

The current MTT allocator uses kmalloc() to allocate a buffer for its
buddy allocator, and thus is limited in the amount of MTT segments
that it can control.  As a result, the size of memory that can be
registered is limited too.  This patch uses a module parameter to
control the number of MTT entries that each segment represents,
allowing more memory to be registered with the same number of
segments.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 30bea96..018348c 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -100,6 +100,10 @@ module_param_named(use_prio, use_prio, bool, 0444);
 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
 		  "(0/1, default 0)");
 
+static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)");
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -203,12 +207,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
+	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
 	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
-						    MLX4_MTT_ENTRY_PER_SEG);
+						    dev->caps.mtts_per_seg);
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
-	dev->caps.mtt_entry_sz	     = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
+	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
@@ -1304,6 +1309,11 @@ static int __init mlx4_verify_params(void)
 		return -1;
 	}
 
+	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
+		printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 0caf74c..3b8973d 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -209,7 +209,7 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 	} else
 		mtt->page_shift = page_shift;
 
-	for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1)
+	for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
 		++mtt->order;
 
 	mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
@@ -350,7 +350,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
 						   MLX4_MPT_PD_FLAG_RAE);
 		mpt_entry->mtt_sz    = cpu_to_be32((1 << mr->mtt.order) *
-						   MLX4_MTT_ENTRY_PER_SEG);
+						   dev->caps.mtts_per_seg);
 	} else {
 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
 	}
@@ -391,7 +391,7 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 	    (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
 		return -EINVAL;
 
-	if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1))
+	if (start_index & (dev->caps.mtts_per_seg - 1))
 		return -EINVAL;
 
 	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index cebdf32..bd22df9 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c
@@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_EQ].size     = dev_cap->eqc_entry_sz;
 	profile[MLX4_RES_DMPT].size   = dev_cap->dmpt_entry_sz;
 	profile[MLX4_RES_CMPT].size   = dev_cap->cmpt_entry_sz;
-	profile[MLX4_RES_MTT].size    = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
+	profile[MLX4_RES_MTT].size    = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
 	profile[MLX4_RES_MCG].size    = MLX4_MGM_ENTRY_SIZE;
 
 	profile[MLX4_RES_QP].num      = request->num_qp;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3aff8a6..ce7cc6c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -210,6 +210,7 @@ struct mlx4_caps {
 	int			num_comp_vectors;
 	int			num_mpts;
 	int			num_mtt_segs;
+	int			mtts_per_seg;
 	int			fmr_reserved_mtts;
 	int			reserved_mtts;
 	int			reserved_mrws;
-- 
cgit v0.10.2


From a0d24b295aed7a9daf4ca36bd4784e4d40f82303 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 19 May 2009 12:03:15 +0800
Subject: nfsd: fix hung up of nfs client while sync write data to nfs server

Commit 'Short write in nfsd becomes a full write to the client'
(31dec2538e45e9fff2007ea1f4c6bae9f78db724) broken the sync write.
With the following commands to reproduce:

  $ mount -t nfs -o sync 192.168.0.21:/nfsroot /mnt
  $ cd /mnt
  $ echo aaaa > temp.txt

Then nfs client is hung up.

In SYNC mode the server alaways return the write count 0 to the
client. This is because the value of host_err in nfsd_vfs_write()
will be overwrite in SYNC mode by 'host_err=nfsd_sync(file);',
and then we return host_err(which is now 0) as write count.

This patch fixed the problem.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6c68ffd..b660435 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1015,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
 	set_fs(oldfs);
 	if (host_err >= 0) {
+		*cnt = host_err;
 		nfsdstats.io_write += host_err;
 		fsnotify_modify(file->f_path.dentry);
 	}
@@ -1060,10 +1061,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	}
 
 	dprintk("nfsd: write complete host_err=%d\n", host_err);
-	if (host_err >= 0) {
+	if (host_err >= 0)
 		err = 0;
-		*cnt = host_err;
-	} else
+	else
 		err = nfserrno(host_err);
 out:
 	return err;
-- 
cgit v0.10.2


From 7ab1a2b31d4a8b4f519ccff5a84c53a5b87fd1be Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 27 May 2009 14:42:36 -0700
Subject: RDMA/cxgb3: Report correct port state and MTU

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 160ef482..e2a6321 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -40,6 +40,7 @@
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -1152,12 +1153,39 @@ static int iwch_query_device(struct ib_device *ibdev,
 static int iwch_query_port(struct ib_device *ibdev,
 			   u8 port, struct ib_port_attr *props)
 {
+	struct iwch_dev *dev;
+	struct net_device *netdev;
+	struct in_device *inetdev;
+
 	PDBG("%s ibdev %p\n", __func__, ibdev);
 
+	dev = to_iwch_dev(ibdev);
+	netdev = dev->rdev.port_info.lldevs[port-1];
+
 	memset(props, 0, sizeof(struct ib_port_attr));
 	props->max_mtu = IB_MTU_4096;
-	props->active_mtu = IB_MTU_2048;
-	props->state = IB_PORT_ACTIVE;
+	if (netdev->mtu >= 4096)
+		props->active_mtu = IB_MTU_4096;
+	else if (netdev->mtu >= 2048)
+		props->active_mtu = IB_MTU_2048;
+	else if (netdev->mtu >= 1024)
+		props->active_mtu = IB_MTU_1024;
+	else if (netdev->mtu >= 512)
+		props->active_mtu = IB_MTU_512;
+	else
+		props->active_mtu = IB_MTU_256;
+
+	if (!netif_carrier_ok(netdev))
+		props->state = IB_PORT_DOWN;
+	else {
+		inetdev = in_dev_get(netdev);
+		if (inetdev->ifa_list)
+			props->state = IB_PORT_ACTIVE;
+		else
+			props->state = IB_PORT_INIT;
+		in_dev_put(inetdev);
+	}
+
 	props->port_cap_flags =
 	    IB_PORT_CM_SUP |
 	    IB_PORT_SNMP_TUNNEL_SUP |
-- 
cgit v0.10.2


From 3026c19a14ba71ccd4dc4925abab9395ea12839c Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 27 May 2009 14:43:39 -0700
Subject: RDMA/cxgb3: Limit fast register size based on T3 limitations

T3 firmware only supports one WRs worth of page list for fast register
work requests.  The driver currently allows 2 WRs worth, which
doesn't work for T3, so reduce the limit in the driver.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index ff9be1a..32e3b14 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -176,7 +176,7 @@ struct t3_send_wr {
 	struct t3_sge sgl[T3_MAX_SGE];	/* 4+ */
 };
 
-#define T3_MAX_FASTREG_DEPTH 24
+#define T3_MAX_FASTREG_DEPTH 10
 #define T3_MAX_FASTREG_FRAG 10
 
 struct t3_fastreg_wr {
-- 
cgit v0.10.2


From dacd2549ca61ddbdd1ed62a76ca95dea3f0e02c6 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Tue, 26 May 2009 09:08:03 +0900
Subject: PCI/ACPI: fix wrong ref count handling in acpi_pci_bind()

The 'dev' field of struct acpi_pci_data is having a pointer to struct
pci_dev without incrementing the reference counter. Because of this, I
got the following kernel oops when I was doing some pci hotplug
operations. This patch fixes this bug by replacing wrong hand-made
pci_find_slot() with pci_get_slot() in acpi_pci_bind().

 BUG: unable to handle kernel NULL pointer dereference at 00000000000000e8
 IP: [<ffffffff803f0e9b>] acpi_pci_unbind+0xb1/0xdd

 Call Trace:
  [<ffffffff803ecee4>] acpi_bus_remove+0x54/0x68
  [<ffffffff803ecf6d>] acpi_bus_trim+0x75/0xe3
  [<ffffffffa0345ddd>] acpiphp_disable_slot+0x16d/0x1e0 [acpiphp]
  [<ffffffffa03441f0>] disable_slot+0x20/0x60 [acpiphp]
  [<ffffffff803cfc18>] power_write_file+0xc8/0x110
  [<ffffffff803c6a54>] pci_slot_attr_store+0x24/0x30
  [<ffffffff803469ce>] sysfs_write_file+0xce/0x140
  [<ffffffff802e94e7>] vfs_write+0xc7/0x170
  [<ffffffff802e9aa0>] sys_write+0x50/0x90
  [<ffffffff8020bd6b>] system_call_fastpath+0x16/0x1b

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Tested-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index 95650f8..bc46de3 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -116,9 +116,6 @@ int acpi_pci_bind(struct acpi_device *device)
 	struct acpi_pci_data *pdata;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	acpi_handle handle;
-	struct pci_dev *dev;
-	struct pci_bus *bus;
-
 
 	if (!device || !device->parent)
 		return -EINVAL;
@@ -176,20 +173,9 @@ int acpi_pci_bind(struct acpi_device *device)
 	 * Locate matching device in PCI namespace.  If it doesn't exist
 	 * this typically means that the device isn't currently inserted
 	 * (e.g. docking station, port replicator, etc.).
-	 * We cannot simply search the global pci device list, since
-	 * PCI devices are added to the global pci list when the root
-	 * bridge start ops are run, which may not have happened yet.
 	 */
-	bus = pci_find_bus(data->id.segment, data->id.bus);
-	if (bus) {
-		list_for_each_entry(dev, &bus->devices, bus_list) {
-			if (dev->devfn == PCI_DEVFN(data->id.device,
-						    data->id.function)) {
-				data->dev = dev;
-				break;
-			}
-		}
-	}
+	data->dev = pci_get_slot(pdata->bus,
+				PCI_DEVFN(data->id.device, data->id.function));
 	if (!data->dev) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 				  "Device %04x:%02x:%02x.%d not present in PCI namespace\n",
@@ -259,9 +245,10 @@ int acpi_pci_bind(struct acpi_device *device)
 
       end:
 	kfree(buffer.pointer);
-	if (result)
+	if (result) {
+		pci_dev_put(data->dev);
 		kfree(data);
-
+	}
 	return result;
 }
 
@@ -303,6 +290,7 @@ static int acpi_pci_unbind(struct acpi_device *device)
 	if (data->dev->subordinate) {
 		acpi_pci_irq_del_prt(data->id.segment, data->bus->number);
 	}
+	pci_dev_put(data->dev);
 	kfree(data);
 
       end:
-- 
cgit v0.10.2


From 7f4218354fe312b327af06c3d8c95ed5f214c8ca Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 27 May 2009 18:51:06 -0400
Subject: nfsd: Revert "svcrpc: take advantage of tcp autotuning"

This reverts commit 47a14ef1af48c696b214ac168f056ddc79793d0e "svcrpc:
take advantage of tcp autotuning", which uncovered some further problems
in the server rpc code, causing significant performance regressions in
common cases.

We will likely reinstate this patch after releasing 2.6.30 and applying
some work on the underlying fixes to the problem (developed by Trond).

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Olga Kornievskaia <aglo@citi.umich.edu>
Cc: Jim Rees <rees@umich.edu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index af31988..9d50423 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -345,6 +345,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
 	lock_sock(sock->sk);
 	sock->sk->sk_sndbuf = snd * 2;
 	sock->sk->sk_rcvbuf = rcv * 2;
+	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
 	release_sock(sock->sk);
 #endif
 }
@@ -796,6 +797,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
 		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
 
+	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
+		/* sndbuf needs to have room for one request
+		 * per thread, otherwise we can stall even when the
+		 * network isn't a bottleneck.
+		 *
+		 * We count all threads rather than threads in a
+		 * particular pool, which provides an upper bound
+		 * on the number of threads which will access the socket.
+		 *
+		 * rcvbuf just needs to be able to hold a few requests.
+		 * Normally they will be removed from the queue
+		 * as soon a a complete request arrives.
+		 */
+		svc_sock_setbufsize(svsk->sk_sock,
+				    (serv->sv_nrthreads+3) * serv->sv_max_mesg,
+				    3 * serv->sv_max_mesg);
+
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
 	/* Receive data. If we haven't got the record length yet, get
@@ -1043,6 +1061,15 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
 
+		/* initialise setting must have enough space to
+		 * receive and respond to one request.
+		 * svc_tcp_recvfrom will re-adjust if necessary
+		 */
+		svc_sock_setbufsize(svsk->sk_sock,
+				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
+				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
+
+		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		if (sk->sk_state != TCP_ESTABLISHED)
 			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1112,14 +1139,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
 		svc_udp_init(svsk, serv);
-	else {
-		/* initialise setting must have enough space to
-		 * receive and respond to one request.
-		 */
-		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
-					4 * serv->sv_max_mesg);
+	else
 		svc_tcp_init(svsk, serv);
-	}
 
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
-- 
cgit v0.10.2


From 98779be861a05c4cb75bed916df72ec0cba8b53d Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Thu, 14 May 2009 16:34:28 -0500
Subject: svcrdma: dma unmap the correct length for the RPCRDMA header page.

The svcrdma module was incorrectly unmapping the RPCRDMA header page.
On IBM pserver systems this causes a resource leak that results in
running out of bus address space (10 cthon iterations will reproduce it).
The code was mapping the full page but only unmapping the actual header
length.  The fix is to only map the header length.

I also cleaned up the use of ib_dma_map_page() calls since the unmap
logic always uses ib_dma_unmap_single().  I made these symmetrical.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8b510c5..f11be72 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
 		page_bytes -= sge_bytes;
 
 		frmr->page_list->page_list[page_no] =
-			ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  page_address(page),
 					  PAGE_SIZE, DMA_TO_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
 					 frmr->page_list->page_list[page_no]))
@@ -532,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
+	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
 	ctxt->sge[0].addr =
-		ib_dma_map_page(rdma->sc_cm_id->device,
-				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
+		ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
+				  ctxt->sge[0].length, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
 		goto err;
 	atomic_inc(&rdma->sc_dma_used);
 
 	ctxt->direction = DMA_TO_DEVICE;
 
-	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
-	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
-
 	/* Determine how many of our SGE are to be transmitted */
 	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4b0c2fa..5151f9f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		BUG_ON(sge_no >= xprt->sc_max_sge);
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
-		pa = ib_dma_map_page(xprt->sc_cm_id->device,
-				     page, 0, PAGE_SIZE,
+		pa = ib_dma_map_single(xprt->sc_cm_id->device,
+				     page_address(page), PAGE_SIZE,
 				     DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
 			goto err_put_ctxt;
@@ -1315,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
 	/* Prepare SGE for local address */
-	sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
-				   p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	sge.addr = ib_dma_map_single(xprt->sc_cm_id->device,
+				   page_address(p), PAGE_SIZE, DMA_FROM_DEVICE);
 	if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
 		put_page(p);
 		return;
@@ -1343,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	if (ret) {
 		dprintk("svcrdma: Error %d posting send for protocol error\n",
 			ret);
-		ib_dma_unmap_page(xprt->sc_cm_id->device,
+		ib_dma_unmap_single(xprt->sc_cm_id->device,
 				  sge.addr, PAGE_SIZE,
 				  DMA_FROM_DEVICE);
 		svc_rdma_put_context(ctxt, 1);
-- 
cgit v0.10.2


From 5b6045a906f48d37591365c5dcdd6d1d146bfd4a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 26 May 2009 17:28:02 +0200
Subject: trace: disable preemption before taking raw spinlocks

s390 code uses smp_processor_id() in __raw_spin_lock() code which
reveals that a (raw) spinlock is taken without preemption disabled.
This can potentially deadlock.

To fix this explicitly disable and enable preemption.

BUG: using smp_processor_id() in preemptible [00000000] code: cat/2278
caller is trace_find_cmdline+0x40/0xfc
CPU: 0 Not tainted 2.6.30-rc7-dirty #39
Process cat (pid: 2278, task: 000000003faedb68, ksp: 000000003b33b988)
000000003b33b988 000000003b33bae0 0000000000000002 0000000000000000
       000000003b33bb80 000000003b33baf8 000000003b33baf8 00000000000175d6
       0000000000000001 000000003b33b988 000000003f9b0000 000000000000000b
       000000000000000c 000000003b33bb40 000000003b33bae0 0000000000000000
       0000000000000000 00000000000175d6 000000003b33bae0 000000003b33bb28
Call Trace:
([<00000000000174b2>] show_trace+0x112/0x170)
 [<0000000000017582>] show_stack+0x72/0x100
 [<0000000000441538>] dump_stack+0xc8/0xd8
 [<000000000025c350>] debug_smp_processor_id+0x114/0x130
 [<00000000000bf0e4>] trace_find_cmdline+0x40/0xfc
 [<00000000000c35d4>] trace_print_context+0x58/0xac
 [<00000000000bb676>] print_trace_line+0x416/0x470
 [<00000000000bc8fe>] s_show+0x4e/0x428
 [<000000000013834e>] seq_read+0x36a/0x5d4
 [<0000000000112a78>] vfs_read+0xc8/0x174
 [<0000000000112c58>] SyS_read+0x74/0xc4
 [<000000000002c7ae>] sysc_noemu+0x10/0x16
 [<000002000012436c>] 0x2000012436c
1 lock held by cat/2278:
 #0:  (&p->lock){+.+.+.}, at: [<0000000000138056>] seq_read+0x72/0x5d4

[ Impact: fix preempt-unsafe raw spinlock ]

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 02d32ba..a3a8a87 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -808,6 +808,7 @@ void trace_find_cmdline(int pid, char comm[])
 		return;
 	}
 
+	preempt_disable();
 	__raw_spin_lock(&trace_cmdline_lock);
 	map = map_pid_to_cmdline[pid];
 	if (map != NO_CMDLINE_MAP)
@@ -816,6 +817,7 @@ void trace_find_cmdline(int pid, char comm[])
 		strcpy(comm, "<...>");
 
 	__raw_spin_unlock(&trace_cmdline_lock);
+	preempt_enable();
 }
 
 void tracing_record_cmdline(struct task_struct *tsk)
-- 
cgit v0.10.2


From f2aebaee653a35b01c3665de2cbb1e31456b8ea8 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Wed, 27 May 2009 21:36:02 +0800
Subject: ftrace: don't convert function's local variable name in macro

"call" is an argument of macro, but it is also used as a local
variable name of function in macro.
We should keep this local variable name distinct from any
CPP macro parameter name if both are in the same macro scope,
although it hasn't caused any problem yet.

[ Impact: robustify macro ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 87fc227..b4ec83a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -397,19 +397,19 @@ static void ftrace_profile_##call(proto)				\
 	perf_tpcounter_event(event_##call.id);				\
 }									\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
 {									\
 	int ret = 0;							\
 									\
-	if (!atomic_inc_return(&call->profile_count))			\
+	if (!atomic_inc_return(&event_call->profile_count))		\
 		ret = register_trace_##call(ftrace_profile_##call);	\
 									\
 	return ret;							\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 {									\
-	if (atomic_add_negative(-1, &call->profile_count))		\
+	if (atomic_add_negative(-1, &event_call->profile_count))	\
 		unregister_trace_##call(ftrace_profile_##call);		\
 }
 
@@ -433,9 +433,9 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
 #define __array(type, item, len)
 
 #undef __string
-#define __string(item, src)						       \
-	__str_offsets.item = __str_size +				       \
-			     offsetof(typeof(*entry), __str_data);	       \
+#define __string(item, src)						\
+	__str_offsets.item = __str_size +				\
+			     offsetof(typeof(*entry), __str_data);	\
 	__str_size += strlen(src) + 1;
 
 #undef __assign_str
@@ -451,8 +451,8 @@ static struct ftrace_event_call event_##call;				\
 									\
 static void ftrace_raw_event_##call(proto)				\
 {									\
-	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;  \
-	struct ftrace_event_call *call = &event_##call;			\
+	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;	\
+	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	unsigned long irq_flags;					\
@@ -473,7 +473,7 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 	assign;								\
 									\
-	if (!filter_current_check_discard(call, entry, event))		\
+	if (!filter_current_check_discard(event_call, entry, event))	\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
 }									\
 									\
-- 
cgit v0.10.2


From 14dba5331b90c20588ae6504fea8049c7283028d Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 27 May 2009 09:31:52 -0400
Subject: integrity: nfsd imbalance bug fix

An nfsd exported file is opened/closed by the kernel causing the
integrity imbalance message.

Before a file is opened, there normally is permission checking, which
is done in inode_permission().  However, as integrity checking requires
a dentry and mount point, which is not available in inode_permission(),
the integrity (permission) checking must be called separately.

In order to detect any missing integrity checking calls, we keep track
of file open/closes.  ima_path_check() increments these counts and
does the integrity (permission) checking. As a result, the number of
calls to ima_path_check()/ima_file_free() should be balanced.  An extra
call to fput(), indicates the file could have been accessed without first
calling ima_path_check().

In nfsv3 permission checking is done once, followed by multiple reads,
which do an open/close for each read.  The integrity (permission) checking
call should be in nfsd_permission() after the inode_permission() call, but
as there is no correlation between the number of permission checking and
open calls, the integrity checking call should not increment the counters,
but defer it to when the file is actually opened.

This patch adds:
- integrity (permission) checking for nfsd exported files in nfsd_permission().
- a call to increment counts for files opened by nfsd.

This patch has been updated to return the nfs error types.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6c68ffd..81ff0f4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -55,6 +55,7 @@
 #include <linux/security.h>
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
+#include <linux/ima.h>
 
 #include <asm/uaccess.h>
 
@@ -735,6 +736,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, cred);
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
+	else
+		ima_counts_get(*filp);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
@@ -2024,6 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 					struct dentry *dentry, int acc)
 {
 	struct inode	*inode = dentry->d_inode;
+	struct path	path;
 	int		err;
 
 	if (acc == NFSD_MAY_NOP)
@@ -2096,7 +2100,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
 	    acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
 		err = inode_permission(inode, MAY_EXEC);
+	if (err)
+		goto nfsd_out;
 
+	/* Do integrity (permission) checking now, but defer incrementing
+	 * IMA counts to the actual file open.
+	 */
+	path.mnt = exp->ex_path.mnt;
+	path.dentry = dentry;
+	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
+			     IMA_COUNT_LEAVE);
+nfsd_out:
 	return err? nfserrno(err) : 0;
 }
 
-- 
cgit v0.10.2


From abfe0af9813153bae8c85d9bac966bafcb8ddab1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 20 May 2009 00:37:40 -0700
Subject: x86: enable_update_mptable should be a macro

instead of declaring one variant as an inline function...
because other case is a variable

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4A13B344.7030307@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 3dcbaaa..e2a1bb6 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -61,9 +61,11 @@ extern void get_smp_config(void);
 #ifdef CONFIG_X86_MPPARSE
 extern void find_smp_config(void);
 extern void early_reserve_e820_mpc_new(void);
+extern int enable_update_mptable;
 #else
 static inline void find_smp_config(void) { }
 static inline void early_reserve_e820_mpc_new(void) { }
+#define enable_update_mptable 0
 #endif
 
 void __cpuinit generic_processor_info(int apicid, int version);
@@ -87,15 +89,6 @@ static inline int acpi_probe_gsi(void)
 }
 #endif /* CONFIG_ACPI */
 
-#ifdef CONFIG_X86_MPPARSE
-extern int enable_update_mptable;
-#else
-static inline int enable_update_mptable(void)
-{
-	return 0;
-}
-#endif
-
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
 
 struct physid_mask {
-- 
cgit v0.10.2


From 13b297d943828c4594527a2bd9c30ecd04e37886 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 26 May 2009 14:18:07 +0900
Subject: smack: Remove redundant initialization.

We don't need to explicitly initialize to cap_* because
it will be filled by security_fixup_ops().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0d030b4..0023182 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3034,15 +3034,7 @@ struct security_operations smack_ops = {
 
 	.ptrace_may_access =		smack_ptrace_may_access,
 	.ptrace_traceme =		smack_ptrace_traceme,
-	.capget = 			cap_capget,
-	.capset = 			cap_capset,
-	.capable = 			cap_capable,
 	.syslog = 			smack_syslog,
-	.settime = 			cap_settime,
-	.vm_enough_memory = 		cap_vm_enough_memory,
-
-	.bprm_set_creds = 		cap_bprm_set_creds,
-	.bprm_secureexec = 		cap_bprm_secureexec,
 
 	.sb_alloc_security = 		smack_sb_alloc_security,
 	.sb_free_security = 		smack_sb_free_security,
@@ -3066,8 +3058,6 @@ struct security_operations smack_ops = {
 	.inode_post_setxattr = 		smack_inode_post_setxattr,
 	.inode_getxattr = 		smack_inode_getxattr,
 	.inode_removexattr = 		smack_inode_removexattr,
-	.inode_need_killpriv =		cap_inode_need_killpriv,
-	.inode_killpriv =		cap_inode_killpriv,
 	.inode_getsecurity = 		smack_inode_getsecurity,
 	.inode_setsecurity = 		smack_inode_setsecurity,
 	.inode_listsecurity = 		smack_inode_listsecurity,
@@ -3088,7 +3078,6 @@ struct security_operations smack_ops = {
 	.cred_commit =			smack_cred_commit,
 	.kernel_act_as =		smack_kernel_act_as,
 	.kernel_create_files_as =	smack_kernel_create_files_as,
-	.task_fix_setuid =		cap_task_fix_setuid,
 	.task_setpgid = 		smack_task_setpgid,
 	.task_getpgid = 		smack_task_getpgid,
 	.task_getsid = 			smack_task_getsid,
@@ -3102,7 +3091,6 @@ struct security_operations smack_ops = {
 	.task_kill = 			smack_task_kill,
 	.task_wait = 			smack_task_wait,
 	.task_to_inode = 		smack_task_to_inode,
-	.task_prctl =			cap_task_prctl,
 
 	.ipc_permission = 		smack_ipc_permission,
 	.ipc_getsecid =			smack_ipc_getsecid,
@@ -3129,9 +3117,6 @@ struct security_operations smack_ops = {
 	.sem_semctl = 			smack_sem_semctl,
 	.sem_semop = 			smack_sem_semop,
 
-	.netlink_send =			cap_netlink_send,
-	.netlink_recv = 		cap_netlink_recv,
-
 	.d_instantiate = 		smack_d_instantiate,
 
 	.getprocattr = 			smack_getprocattr,
-- 
cgit v0.10.2


From 1812e67c7410c8d0d57f14a3dc81a99bd5b30e3e Mon Sep 17 00:00:00 2001
From: Tony Vroon <tony@linx.net>
Date: Wed, 27 May 2009 21:00:41 +0100
Subject: ALSA: hda - Compaq Presario CQ60 patching for Conexant

A docking mic control is shown by default. The Compaq Presario
CQ60 laptop has no docking connector, so designate it as a
CXT5051_HP model.
This makes the phantom mixer slider disappear.

Signed-off-by: Tony Vroon <tony@linx.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 56ce19e..4fcbe21 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1848,6 +1848,7 @@ static const char *cxt5051_models[CXT5051_MODELS] = {
 
 static struct snd_pci_quirk cxt5051_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x30cf, "HP DV6736", CXT5051_HP_DV6736),
+	SND_PCI_QUIRK(0x103c, 0x360b, "Compaq Presario CQ60", CXT5051_HP),
 	SND_PCI_QUIRK(0x14f1, 0x0101, "Conexant Reference board",
 		      CXT5051_LAPTOP),
 	SND_PCI_QUIRK(0x14f1, 0x5051, "HP Spartan 1.1", CXT5051_HP),
-- 
cgit v0.10.2


From d3e78ee3d015dac1794433abb6403b6fc8e70e10 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 28 May 2009 11:41:50 +0200
Subject: perf_counter: Fix perf_counter_init_task() on !CONFIG_PERF_COUNTERS

Pointed out by compiler warnings:

   tip/include/linux/perf_counter.h:644: warning: no return statement in function returning non-void

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2b16ed3..a65ddc5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -641,7 +641,7 @@ perf_counter_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline int perf_counter_init_task(struct task_struct *child)	{ }
+static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
-- 
cgit v0.10.2


From ca8cdeef9ca2ff89ee8a21d6f6ff3dfb60286041 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 28 May 2009 11:08:33 +0200
Subject: perf_counter tools: report: Implement header output for --sort
 variants

Implement this style of header:

 #
 # Overhead          Command       File: Symbol
 # ........          .......       ............
 #

for the various --sort variants as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 5993c12..506cde4 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -596,8 +596,6 @@ out_delete:
 
 struct thread;
 
-static const char *thread__name(struct thread *self, char *bf, size_t size);
-
 struct thread {
 	struct rb_node	 rb_node;
 	struct list_head maps;
@@ -605,15 +603,6 @@ struct thread {
 	char		 *comm;
 };
 
-static const char *thread__name(struct thread *self, char *bf, size_t size)
-{
-	if (self->comm)
-		return self->comm;
-
-	snprintf(bf, sizeof(bf), ":%u", self->pid);
-	return bf;
-}
-
 static struct thread *thread__new(pid_t pid)
 {
 	struct thread *self = malloc(sizeof(*self));
@@ -707,8 +696,9 @@ struct hist_entry {
 struct sort_entry {
 	struct list_head list;
 
+	char *header;
+
 	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-	size_t (*print_header)(FILE *fp);
 	size_t	(*print)(FILE *fp, struct hist_entry *);
 };
 
@@ -721,13 +711,11 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__thread_print(FILE *fp, struct hist_entry *self)
 {
-	char bf[32];
-
-	return fprintf(fp, " %16s",
-			thread__name(self->thread, bf, sizeof(bf)));
+	return fprintf(fp, " %16s:%5d", self->thread->comm ?: "", self->thread->pid);
 }
 
 static struct sort_entry sort_thread = {
+	.header = "         Command: Pid ",
 	.cmp	= sort__thread_cmp,
 	.print	= sort__thread_print,
 };
@@ -757,6 +745,7 @@ sort__comm_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_comm = {
+	.header = "         Command",
 	.cmp	= sort__comm_cmp,
 	.print	= sort__comm_print,
 };
@@ -786,6 +775,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_dso = {
+	.header = "                                                    Shared Object",
 	.cmp	= sort__dso_cmp,
 	.print	= sort__dso_print,
 };
@@ -804,43 +794,25 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(ip_r - ip_l);
 }
 
-static size_t sort__sym_print_header(FILE *fp)
-{
-	size_t ret = 0;
-
-	ret += fprintf(fp, "#\n");
-	ret += fprintf(fp, "# Overhead          Command       File: Symbol\n");
-	ret += fprintf(fp, "# ........          .......       ............\n");
-	ret += fprintf(fp, "#\n");
-
-	return ret;
-}
-
 static size_t
 sort__sym_print(FILE *fp, struct hist_entry *self)
 {
 	size_t ret = 0;
 
-	ret += fprintf(fp, "  [%c] ", self->level);
-
 	if (verbose)
 		ret += fprintf(fp, " %#018llx", (unsigned long long)self->ip);
 
-	if (self->level != '.')
-		ret += fprintf(fp, " kernel: %s",
-			       self->sym ? self->sym->name : "<unknown>");
-	else
-		ret += fprintf(fp, " %s: %s",
-			       self->dso ? self->dso->name : "<unknown>",
-			       self->sym ? self->sym->name : "<unknown>");
+	ret += fprintf(fp, " %s: %s",
+			self->dso ? self->dso->name : "<unknown>",
+			self->sym ? self->sym->name : "<unknown>");
 
 	return ret;
 }
 
 static struct sort_entry sort_sym = {
-	.cmp		= sort__sym_cmp,
-	.print_header	= sort__sym_print_header,
-	.print		= sort__sym_print,
+	.header = "Shared Object: Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
 };
 
 struct sort_dimension {
@@ -1021,10 +993,24 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 	struct rb_node *nd;
 	size_t ret = 0;
 
+	fprintf(fp, "#\n");
+
+	fprintf(fp, "# Overhead");
+	list_for_each_entry(se, &hist_entry__sort_list, list)
+		fprintf(fp, " %s", se->header);
+	fprintf(fp, "\n");
+
+	fprintf(fp, "# ........");
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		if (se->print_header)
-			ret += se->print_header(fp);
+		int i;
+
+		fprintf(fp, " ");
+		for (i = 0; i < strlen(se->header); i++)
+			fprintf(fp, ".");
 	}
+	fprintf(fp, "\n");
+
+	fprintf(fp, "#\n");
 
 	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
 		pos = rb_entry(nd, struct hist_entry, rb_node);
-- 
cgit v0.10.2


From 63299f057fbce47da895e8865cba7e9c3eb01a20 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 28 May 2009 10:52:00 +0200
Subject: perf_counter tools: report: Add help text for --sort

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 506cde4..9fdf822 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1240,7 +1240,8 @@ static const struct option options[] = {
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
-	OPT_STRING('s', "sort", &sort_order, "foo", "bar"),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 5d85d3247cc3555215d7d11c78576a396c98e4d9 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 28 May 2009 11:04:53 +0200
Subject: block: export blk_stack_limits()

DM needs to use blk_stack_limits(), so it needs to be exported.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5649f34..8d33934 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -500,6 +500,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	return 0;
 }
+EXPORT_SYMBOL(blk_stack_limits);
 
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
-- 
cgit v0.10.2


From 449bd54dcbd0b60070ce4129fedaf0f4ae044099 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Wed, 27 May 2009 17:08:39 -0700
Subject: ASoC: correct print specifiers for unsigneds

Unsigned variables should use `%u' rather than `%d'.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c
index 7065753..9eb610c 100644
--- a/sound/soc/atmel/playpaq_wm8510.c
+++ b/sound/soc/atmel/playpaq_wm8510.c
@@ -117,7 +117,7 @@ static struct ssc_clock_data playpaq_wm8510_calc_ssc_clock(
 	 * Find actual rate, compare to requested rate
 	 */
 	actual_rate = (cd.ssc_rate / (cd.cmr_div * 2)) / (2 * (cd.period + 1));
-	pr_debug("playpaq_wm8510: Request rate = %d, actual rate = %d\n",
+	pr_debug("playpaq_wm8510: Request rate = %u, actual rate = %u\n",
 		 rate, actual_rate);
 
 
diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c
index 21f69df..9fcbb9c 100644
--- a/sound/soc/codecs/tlv320aic23.c
+++ b/sound/soc/codecs/tlv320aic23.c
@@ -86,7 +86,7 @@ static int tlv320aic23_write(struct snd_soc_codec *codec, unsigned int reg,
 	 */
 
 	if ((reg < 0 || reg > 9) && (reg != 15)) {
-		printk(KERN_WARNING "%s Invalid register R%d\n", __func__, reg);
+		printk(KERN_WARNING "%s Invalid register R%u\n", __func__, reg);
 		return -1;
 	}
 
@@ -98,7 +98,7 @@ static int tlv320aic23_write(struct snd_soc_codec *codec, unsigned int reg,
 	if (codec->hw_write(codec->control_data, data, 2) == 2)
 		return 0;
 
-	printk(KERN_ERR "%s cannot write %03x to register R%d\n", __func__,
+	printk(KERN_ERR "%s cannot write %03x to register R%u\n", __func__,
 	       value, reg);
 
 	return -EIO;
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index ddefb8f..269b108 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -101,7 +101,7 @@ static int uda134x_write(struct snd_soc_codec *codec, unsigned int reg,
 	pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value);
 
 	if (reg >= UDA134X_REGS_NUM) {
-		printk(KERN_ERR "%s unkown register: reg: %d",
+		printk(KERN_ERR "%s unkown register: reg: %u",
 		       __func__, reg);
 		return -EINVAL;
 	}
@@ -296,7 +296,7 @@ static int uda134x_set_dai_sysclk(struct snd_soc_dai *codec_dai,
 	struct snd_soc_codec *codec = codec_dai->codec;
 	struct uda134x_priv *uda134x = codec->private_data;
 
-	pr_debug("%s clk_id: %d, freq: %d, dir: %d\n", __func__,
+	pr_debug("%s clk_id: %d, freq: %u, dir: %d\n", __func__,
 		 clk_id, freq, dir);
 
 	/* Anything between 256fs*8Khz and 512fs*48Khz should be acceptable
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index 0275321..e7348d3 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1108,7 +1108,7 @@ static int wm8350_set_fll(struct snd_soc_dai *codec_dai,
 	if (ret < 0)
 		return ret;
 	dev_dbg(wm8350->dev,
-		"FLL in %d FLL out %d N 0x%x K 0x%x div %d ratio %d",
+		"FLL in %u FLL out %u N 0x%x K 0x%x div %d ratio %d",
 		freq_in, freq_out, fll_div.n, fll_div.k, fll_div.div,
 		fll_div.ratio);
 
diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c
index e4547de..502eefa 100644
--- a/sound/soc/codecs/wm8400.c
+++ b/sound/soc/codecs/wm8400.c
@@ -954,7 +954,7 @@ static int fll_factors(struct wm8400_priv *wm8400, struct fll_factors *factors,
 		factors->outdiv *= 2;
 		if (factors->outdiv > 32) {
 			dev_err(wm8400->wm8400->dev,
-				"Unsupported FLL output frequency %dHz\n",
+				"Unsupported FLL output frequency %uHz\n",
 				Fout);
 			return -EINVAL;
 		}
@@ -1003,7 +1003,7 @@ static int fll_factors(struct wm8400_priv *wm8400, struct fll_factors *factors,
 	factors->k = K / 10;
 
 	dev_dbg(wm8400->wm8400->dev,
-		"FLL: Fref=%d Fout=%d N=%x K=%x, FRATIO=%x OUTDIV=%x\n",
+		"FLL: Fref=%u Fout=%u N=%x K=%x, FRATIO=%x OUTDIV=%x\n",
 		Fref, Fout,
 		factors->n, factors->k, factors->fratio, factors->outdiv);
 
diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c
index 6a4cea0..c8b8dba 100644
--- a/sound/soc/codecs/wm8510.c
+++ b/sound/soc/codecs/wm8510.c
@@ -298,7 +298,7 @@ static void pll_factors(unsigned int target, unsigned int source)
 
 	if ((Ndiv < 6) || (Ndiv > 12))
 		printk(KERN_WARNING
-			"WM8510 N value %d outwith recommended range!d\n",
+			"WM8510 N value %u outwith recommended range!d\n",
 			Ndiv);
 
 	pll_div.n = Ndiv;
diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c
index 9f6be3d..86c4b24d 100644
--- a/sound/soc/codecs/wm8580.c
+++ b/sound/soc/codecs/wm8580.c
@@ -415,7 +415,7 @@ static int pll_factors(struct _pll_div *pll_div, unsigned int target,
 	unsigned int K, Ndiv, Nmod;
 	int i;
 
-	pr_debug("wm8580: PLL %dHz->%dHz\n", source, target);
+	pr_debug("wm8580: PLL %uHz->%uHz\n", source, target);
 
 	/* Scale the output frequency up; the PLL should run in the
 	 * region of 90-100MHz.
@@ -447,7 +447,7 @@ static int pll_factors(struct _pll_div *pll_div, unsigned int target,
 
 	if ((Ndiv < 5) || (Ndiv > 13)) {
 		printk(KERN_ERR
-			"WM8580 N=%d outside supported range\n", Ndiv);
+			"WM8580 N=%u outside supported range\n", Ndiv);
 		return -EINVAL;
 	}
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index d121e58..d28eeac 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -703,7 +703,7 @@ static void pll_factors(struct _pll_div *pll_div, unsigned int target,
 
 	if ((Ndiv < 6) || (Ndiv > 12))
 		printk(KERN_WARNING
-			"wm8753: unsupported N = %d\n", Ndiv);
+			"wm8753: unsupported N = %u\n", Ndiv);
 
 	pll_div->n = Ndiv;
 	Nmod = target % source;
diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
index 46c5ea1..3c78945 100644
--- a/sound/soc/codecs/wm8900.c
+++ b/sound/soc/codecs/wm8900.c
@@ -778,11 +778,11 @@ static int fll_factors(struct _fll_div *fll_div, unsigned int Fref,
 	}
 
 	if (target > 100000000)
-		printk(KERN_WARNING "wm8900: FLL rate %d out of range, Fref=%d"
-		       " Fout=%d\n", target, Fref, Fout);
+		printk(KERN_WARNING "wm8900: FLL rate %u out of range, Fref=%u"
+		       " Fout=%u\n", target, Fref, Fout);
 	if (div > 32) {
 		printk(KERN_ERR "wm8900: Invalid FLL division rate %u, "
-		       "Fref=%d, Fout=%d, target=%d\n",
+		       "Fref=%u, Fout=%u, target=%u\n",
 		       div, Fref, Fout, target);
 		return -EINVAL;
 	}
diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c
index 40cd274..d029818 100644
--- a/sound/soc/codecs/wm8990.c
+++ b/sound/soc/codecs/wm8990.c
@@ -998,7 +998,7 @@ static void pll_factors(struct _pll_div *pll_div, unsigned int target,
 
 	if ((Ndiv < 6) || (Ndiv > 12))
 		printk(KERN_WARNING
-		"WM8990 N value outwith recommended range! N = %d\n", Ndiv);
+		"WM8990 N value outwith recommended range! N = %u\n", Ndiv);
 
 	pll_div->n = Ndiv;
 	Nmod = target % source;
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index d1744e9..abed37a 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -710,7 +710,7 @@ static void pll_factors(struct _pll_div *pll_div, unsigned int source)
 	Ndiv = target / source;
 	if ((Ndiv < 5) || (Ndiv > 12))
 		printk(KERN_WARNING
-			"WM9713 PLL N value %d out of recommended range!\n",
+			"WM9713 PLL N value %u out of recommended range!\n",
 			Ndiv);
 
 	pll_div->n = Ndiv;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 6fc7876..19c4540 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -208,7 +208,7 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 		~(SSCR0_ECS |  SSCR0_NCS | SSCR0_MOD | SSCR0_ACS);
 
 	dev_dbg(&ssp->pdev->dev,
-		"pxa_ssp_set_dai_sysclk id: %d, clk_id %d, freq %d\n",
+		"pxa_ssp_set_dai_sysclk id: %d, clk_id %d, freq %u\n",
 		cpu_dai->id, clk_id, freq);
 
 	switch (clk_id) {
@@ -357,7 +357,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai,
 			ssacd |= (0x6 << 4);
 
 			dev_dbg(&ssp->pdev->dev,
-				"Using SSACDD %x to supply %dHz\n",
+				"Using SSACDD %x to supply %uHz\n",
 				val, freq_out);
 			break;
 		}
diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c
index 972c276..1a28317 100644
--- a/sound/soc/s3c24xx/s3c-i2s-v2.c
+++ b/sound/soc/s3c24xx/s3c-i2s-v2.c
@@ -547,7 +547,7 @@ int s3c_i2sv2_iis_calc_rate(struct s3c_i2sv2_rate_calc *info,
 		actual = clkrate / (fsdiv * div);
 		deviation = actual - rate;
 
-		printk(KERN_DEBUG "%dfs: div %d => result %d, deviation %d\n",
+		printk(KERN_DEBUG "%ufs: div %u => result %u, deviation %d\n",
 		       fsdiv, div, actual, deviation);
 
 		deviation = abs(deviation);
@@ -563,7 +563,7 @@ int s3c_i2sv2_iis_calc_rate(struct s3c_i2sv2_rate_calc *info,
 			break;
 	}
 
-	printk(KERN_DEBUG "best: fs=%d, div=%d, rate=%d\n",
+	printk(KERN_DEBUG "best: fs=%u, div=%u, rate=%u\n",
 	       best_fs, best_div, best_rate);
 
 	info->fs_div = best_fs;
diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c
index 56fa087..b378096 100644
--- a/sound/soc/sh/ssi.c
+++ b/sound/soc/sh/ssi.c
@@ -145,7 +145,7 @@ static int ssi_hw_params(struct snd_pcm_substream *substream,
 	recv = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? 0 : 1;
 
 	pr_debug("ssi_hw_params() enter\nssicr was    %08lx\n", ssicr);
-	pr_debug("bits: %d channels: %d\n", bits, channels);
+	pr_debug("bits: %u channels: %u\n", bits, channels);
 
 	ssicr &= ~(CR_TRMD | CR_CHNL_MASK | CR_DWL_MASK | CR_PDTA |
 		   CR_SWL_MASK);
-- 
cgit v0.10.2


From b05a7d4fed7e51dca37d0a31baf1466de30b1f01 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 28 May 2009 11:59:12 +0200
Subject: ALSA: hda - Always sync writes in single_cmd mode

In the single_cmd mode, the hardware cannot store the multiple replies
like on RIRB, thus each verb has to sync and wait for the response no
matter whether the return value is needed or not.  Otherwise it may
result in a wrong return value from the previous verb.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 01d8d97..31a695e 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -673,6 +673,27 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
  *       I left the codes, however, for debugging/testing purposes.
  */
 
+/* receive a response */
+static int azx_single_wait_for_response(struct azx *chip)
+{
+	int timeout = 50;
+
+	while (timeout--) {
+		/* check IRV busy bit */
+		if (azx_readw(chip, IRS) & ICH6_IRS_VALID) {
+			/* reuse rirb.res as the response return value */
+			chip->rirb.res = azx_readl(chip, IR);
+			return 0;
+		}
+		udelay(1);
+	}
+	if (printk_ratelimit())
+		snd_printd(SFX "get_response timeout: IRS=0x%x\n",
+			   azx_readw(chip, IRS));
+	chip->rirb.res = -1;
+	return -EIO;
+}
+
 /* send a command */
 static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
 {
@@ -688,7 +709,7 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
 			azx_writel(chip, IC, val);
 			azx_writew(chip, IRS, azx_readw(chip, IRS) |
 				   ICH6_IRS_BUSY);
-			return 0;
+			return azx_single_wait_for_response(chip);
 		}
 		udelay(1);
 	}
@@ -702,18 +723,7 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
 static unsigned int azx_single_get_response(struct hda_bus *bus)
 {
 	struct azx *chip = bus->private_data;
-	int timeout = 50;
-
-	while (timeout--) {
-		/* check IRV busy bit */
-		if (azx_readw(chip, IRS) & ICH6_IRS_VALID)
-			return azx_readl(chip, IR);
-		udelay(1);
-	}
-	if (printk_ratelimit())
-		snd_printd(SFX "get_response timeout: IRS=0x%x\n",
-			   azx_readw(chip, IRS));
-	return (unsigned int)-1;
+	return chip->rirb.res;
 }
 
 /*
-- 
cgit v0.10.2


From b21fadb9c1852c91622ca1dccfeb144bc535e36e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 28 May 2009 12:26:15 +0200
Subject: ALSA: hda - Add more register bits definitions

Added some missing register bits definitions to reduce magic numbers.
Also renamed some to follow the names on the datasheet.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 31a695e..f63bc65 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -138,14 +138,23 @@ MODULE_DESCRIPTION("Intel HDA driver");
  * registers
  */
 #define ICH6_REG_GCAP			0x00
+#define   ICH6_GCAP_64OK	(1 << 0)   /* 64bit address support */
+#define   ICH6_GCAP_NSDO	(3 << 1)   /* # of serial data out signals */
+#define   ICH6_GCAP_BSS		(31 << 3)  /* # of bidirectional streams */
+#define   ICH6_GCAP_ISS		(15 << 8)  /* # of input streams */
+#define   ICH6_GCAP_OSS		(15 << 12) /* # of output streams */
 #define ICH6_REG_VMIN			0x02
 #define ICH6_REG_VMAJ			0x03
 #define ICH6_REG_OUTPAY			0x04
 #define ICH6_REG_INPAY			0x06
 #define ICH6_REG_GCTL			0x08
+#define   ICH6_GCTL_RESET	(1 << 1)   /* controller reset */
+#define   ICH6_GCTL_FCNTRL	(1 << 1)   /* flush control */
+#define   ICH6_GCTL_UNSOL	(1 << 8)   /* accept unsol. response enable */
 #define ICH6_REG_WAKEEN			0x0c
 #define ICH6_REG_STATESTS		0x0e
 #define ICH6_REG_GSTS			0x10
+#define   ICH6_GSTS_FSTS	(1 << 1)   /* flush status */
 #define ICH6_REG_INTCTL			0x20
 #define ICH6_REG_INTSTS			0x24
 #define ICH6_REG_WALCLK			0x30
@@ -153,17 +162,27 @@ MODULE_DESCRIPTION("Intel HDA driver");
 #define ICH6_REG_CORBLBASE		0x40
 #define ICH6_REG_CORBUBASE		0x44
 #define ICH6_REG_CORBWP			0x48
-#define ICH6_REG_CORBRP			0x4A
+#define ICH6_REG_CORBRP			0x4a
+#define   ICH6_CORBRP_RST	(1 << 15)  /* read pointer reset */
 #define ICH6_REG_CORBCTL		0x4c
+#define   ICH6_CORBCTL_RUN	(1 << 1)   /* enable DMA */
+#define   ICH6_CORBCTL_CMEIE	(1 << 0)   /* enable memory error irq */
 #define ICH6_REG_CORBSTS		0x4d
+#define   ICH6_CORBSTS_CMEI	(1 << 0)   /* memory error indication */
 #define ICH6_REG_CORBSIZE		0x4e
 
 #define ICH6_REG_RIRBLBASE		0x50
 #define ICH6_REG_RIRBUBASE		0x54
 #define ICH6_REG_RIRBWP			0x58
+#define   ICH6_RIRBWP_RST	(1 << 15)  /* write pointer reset */
 #define ICH6_REG_RINTCNT		0x5a
 #define ICH6_REG_RIRBCTL		0x5c
+#define   ICH6_RBCTL_IRQ_EN	(1 << 0)   /* enable IRQ */
+#define   ICH6_RBCTL_DMA_EN	(1 << 1)   /* enable DMA */
+#define   ICH6_RBCTL_OVERRUN_EN	(1 << 2)   /* enable overrun irq */
 #define ICH6_REG_RIRBSTS		0x5d
+#define   ICH6_RBSTS_IRQ	(1 << 0)   /* response irq */
+#define   ICH6_RBSTS_OVERRUN	(1 << 2)   /* overrun irq */
 #define ICH6_REG_RIRBSIZE		0x5e
 
 #define ICH6_REG_IC			0x60
@@ -260,16 +279,6 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define ICH6_INT_CTRL_EN	0x40000000 /* controller interrupt enable bit */
 #define ICH6_INT_GLOBAL_EN	0x80000000 /* global interrupt enable bit */
 
-/* GCTL unsolicited response enable bit */
-#define ICH6_GCTL_UREN		(1<<8)
-
-/* GCTL reset bit */
-#define ICH6_GCTL_RESET		(1<<0)
-
-/* CORB/RIRB control, read/write pointer */
-#define ICH6_RBCTL_DMA_EN	0x02	/* enable DMA */
-#define ICH6_RBCTL_IRQ_EN	0x01	/* enable IRQ */
-#define ICH6_RBRWP_CLR		0x8000	/* read/write pointer clear */
 /* below are so far hardcoded - should read registers in future */
 #define ICH6_MAX_CORB_ENTRIES	256
 #define ICH6_MAX_RIRB_ENTRIES	256
@@ -515,9 +524,9 @@ static void azx_init_cmd_io(struct azx *chip)
 	/* set the corb write pointer to 0 */
 	azx_writew(chip, CORBWP, 0);
 	/* reset the corb hw read pointer */
-	azx_writew(chip, CORBRP, ICH6_RBRWP_CLR);
+	azx_writew(chip, CORBRP, ICH6_CORBRP_RST);
 	/* enable corb dma */
-	azx_writeb(chip, CORBCTL, ICH6_RBCTL_DMA_EN);
+	azx_writeb(chip, CORBCTL, ICH6_CORBCTL_RUN);
 
 	/* RIRB set up */
 	chip->rirb.addr = chip->rb.addr + 2048;
@@ -529,7 +538,7 @@ static void azx_init_cmd_io(struct azx *chip)
 	/* set the rirb size to 256 entries (ULI requires explicitly) */
 	azx_writeb(chip, RIRBSIZE, 0x02);
 	/* reset the rirb hw write pointer */
-	azx_writew(chip, RIRBWP, ICH6_RBRWP_CLR);
+	azx_writew(chip, RIRBWP, ICH6_RIRBWP_RST);
 	/* set N=1, get RIRB response interrupt for new entry */
 	azx_writew(chip, RINTCNT, 1);
 	/* enable rirb dma and response irq */
@@ -796,7 +805,7 @@ static int azx_reset(struct azx *chip)
 	}
 
 	/* Accept unsolicited responses */
-	azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UREN);
+	azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UNSOL);
 
 	/* detect codecs */
 	if (!chip->codec_mask) {
@@ -2284,10 +2293,10 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 
 	/* ATI chips seems buggy about 64bit DMA addresses */
 	if (chip->driver_type == AZX_DRIVER_ATI)
-		gcap &= ~0x01;
+		gcap &= ~ICH6_GCAP_64OK;
 
 	/* allow 64bit DMA address if supported by H/W */
-	if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
+	if ((gcap & ICH6_GCAP_64OK) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
 		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
 	else {
 		pci_set_dma_mask(pci, DMA_BIT_MASK(32));
-- 
cgit v0.10.2


From 7d96fd41cadc55f4e00231c8c71b8e25c779f122 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Mon, 25 May 2009 11:02:02 +0200
Subject: x86: move rdtsc_barrier() into the TSC vread method

The *fence instructions were moved to vsyscall_64.c by commit
cb9e35dce94a1b9c59d46224e8a94377d673e204.  But this breaks the
vDSO, because vread methods are also called from there.

Besides, the synchronization might be unnecessary for other
time sources than TSC.

[ Impact: fix potential time warp in VDSO ]

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
LKML-Reference: <9d0ea9ea0f866bdc1f4d76831221ae117f11ea67.1243241859.git.ptesarik@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@kernel.org>

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d57de05..cf8611d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -710,7 +710,16 @@ static cycle_t read_tsc(struct clocksource *cs)
 #ifdef CONFIG_X86_64
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)vget_cycles();
+	cycle_t ret;
+
+	/*
+	 * Surround the RDTSC by barriers, to make sure it's not
+	 * speculated to outside the seqlock critical section and
+	 * does not cause time warps:
+	 */
+	rdtsc_barrier();
+	ret = (cycle_t)vget_cycles();
+	rdtsc_barrier();
 
 	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
 		ret : __vsyscall_gtod_data.clock.cycle_last;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 44153af..25ee06a 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
 			return;
 		}
 
-		/*
-		 * Surround the RDTSC by barriers, to make sure it's not
-		 * speculated to outside the seqlock critical section and
-		 * does not cause time warps:
-		 */
-		rdtsc_barrier();
 		now = vread();
-		rdtsc_barrier();
-
 		base = __vsyscall_gtod_data.clock.cycle_last;
 		mask = __vsyscall_gtod_data.clock.mask;
 		mult = __vsyscall_gtod_data.clock.mult;
-- 
cgit v0.10.2


From af73110d23fb54f940197d93a410e9fa0cee66e2 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 18 May 2009 16:26:27 +0100
Subject: [ARM] 5516/1: Flush the D-cache after initialising the SCU

On MP systems, the data loaded by CPU0 before the SCU was initialised
may not be visible to the other CPUs.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

This also includes the following compile fix:

This patch includes 'asm/cacheflush.h' which is needed to use
'flush_cache_all()' function.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 7f24ee9..d3831f6 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 
 #include <asm/smp_scu.h>
+#include <asm/cacheflush.h>
 
 #define SCU_CTRL		0x00
 #define SCU_CONFIG		0x04
@@ -38,4 +39,10 @@ void __init scu_enable(void __iomem *scu_base)
 	scu_ctrl = __raw_readl(scu_base + SCU_CTRL);
 	scu_ctrl |= 1;
 	__raw_writel(scu_ctrl, scu_base + SCU_CTRL);
+
+	/*
+	 * Ensure that the data accessed by CPU0 before the SCU was
+	 * initialised is visible to the other CPUs.
+	 */
+	flush_cache_all();
 }
-- 
cgit v0.10.2


From c93f7669098eb97c5376e5396e3dfb734c17df4f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 28 May 2009 22:18:17 +1000
Subject: perf_counter: Fix race in attaching counters to tasks and exiting

Commit 564c2b21 ("perf_counter: Optimize context switch between
identical inherited contexts") introduced a race where it is possible
that a counter being attached to a task could get attached to the
wrong task, if the task is one that has inherited its context from
another task via fork.  This happens because the optimized context
switch could switch the context to another task after find_get_context
has read task->perf_counter_ctxp.  In fact, it's possible that the
context could then get freed, if the other task then exits.

This fixes the problem by protecting both the context switch and the
critical code in find_get_context with spinlocks.  The context switch
locks the cxt->lock of both the outgoing and incoming contexts before
swapping them.  That means that once code such as find_get_context
has obtained the spinlock for the context associated with a task,
the context can't get swapped to another task.  However, the context
may have been swapped in the interval between reading
task->perf_counter_ctxp and getting the lock, so it is necessary to
check and retry.

To make sure that none of the contexts being looked at in
find_get_context can get freed, this changes the context freeing code
to use RCU.  Thus an rcu_read_lock() is sufficient to ensure that no
contexts can get freed.  This part of the patch is lifted from a patch
posted by Peter Zijlstra.

This also adds a check to make sure that we can't add a counter to a
task that is exiting.

There is also a race between perf_counter_exit_task and
find_get_context; this solves the race by moving the get_ctx that
was in perf_counter_alloc into the locked region in find_get_context,
so that once find_get_context has got the context for a task, it
won't get freed even if the task calls perf_counter_exit_task.  It
doesn't matter if new top-level (non-inherited) counters get attached
to the context after perf_counter_exit_task has detached the context
from the task.  They will just stay there and never get scheduled in
until the counters' fds get closed, and then perf_release will remove
them from the context and eventually free the context.

With this, we are now doing the unclone in find_get_context rather
than when a counter was added to or removed from a context (actually,
we were missing the unclone_ctx() call when adding a counter to a
context).  We don't need to unclone when removing a counter from a
context because we have no way to remove a counter from a cloned
context.

This also takes out the smp_wmb() in find_get_context, which Peter
Zijlstra pointed out was unnecessary because the cmpxchg implies a
full barrier anyway.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18974.33033.667187.273886@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a65ddc5..717bf3b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -541,8 +541,9 @@ struct perf_counter_context {
 	 * been cloned (inherited) from a common ancestor.
 	 */
 	struct perf_counter_context *parent_ctx;
-	u32			parent_gen;
-	u32			generation;
+	u64			parent_gen;
+	u64			generation;
+	struct rcu_head		rcu_head;
 };
 
 /**
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 367299f..52e5a15 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -103,12 +103,22 @@ static void get_ctx(struct perf_counter_context *ctx)
 	atomic_inc(&ctx->refcount);
 }
 
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_counter_context *ctx;
+
+	ctx = container_of(head, struct perf_counter_context, rcu_head);
+	kfree(ctx);
+}
+
 static void put_ctx(struct perf_counter_context *ctx)
 {
 	if (atomic_dec_and_test(&ctx->refcount)) {
 		if (ctx->parent_ctx)
 			put_ctx(ctx->parent_ctx);
-		kfree(ctx);
+		if (ctx->task)
+			put_task_struct(ctx->task);
+		call_rcu(&ctx->rcu_head, free_ctx);
 	}
 }
 
@@ -212,22 +222,6 @@ group_sched_out(struct perf_counter *group_counter,
 }
 
 /*
- * Mark this context as not being a clone of another.
- * Called when counters are added to or removed from this context.
- * We also increment our generation number so that anything that
- * was cloned from this context before this will not match anything
- * cloned from this context after this.
- */
-static void unclone_ctx(struct perf_counter_context *ctx)
-{
-	++ctx->generation;
-	if (!ctx->parent_ctx)
-		return;
-	put_ctx(ctx->parent_ctx);
-	ctx->parent_ctx = NULL;
-}
-
-/*
  * Cross CPU call to remove a performance counter
  *
  * We disable the counter on the hardware level first. After that we
@@ -281,13 +275,19 @@ static void __perf_counter_remove_from_context(void *info)
  *
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This is OK when called from perf_release since
+ * that only calls us on the top-level context, which can't be a clone.
+ * When called from perf_counter_exit_task, it's OK because the
+ * context has been detached from its task.
  */
 static void perf_counter_remove_from_context(struct perf_counter *counter)
 {
 	struct perf_counter_context *ctx = counter->ctx;
 	struct task_struct *task = ctx->task;
 
-	unclone_ctx(ctx);
 	if (!task) {
 		/*
 		 * Per cpu counters are removed via an smp call and
@@ -410,6 +410,16 @@ static void __perf_counter_disable(void *info)
 
 /*
  * Disable a counter.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This condition is satisifed when called through
+ * perf_counter_for_each_child or perf_counter_for_each because they
+ * hold the top-level counter's child_mutex, so any descendant that
+ * goes to exit will block in sync_child_counter.
+ * When called from perf_pending_counter it's OK because counter->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_counter_task_sched_out for this context.
  */
 static void perf_counter_disable(struct perf_counter *counter)
 {
@@ -794,6 +804,12 @@ static void __perf_counter_enable(void *info)
 
 /*
  * Enable a counter.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This condition is satisfied when called through
+ * perf_counter_for_each_child or perf_counter_for_each as described
+ * for perf_counter_disable.
  */
 static void perf_counter_enable(struct perf_counter *counter)
 {
@@ -923,7 +939,9 @@ void perf_counter_task_sched_out(struct task_struct *task,
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = task->perf_counter_ctxp;
 	struct perf_counter_context *next_ctx;
+	struct perf_counter_context *parent;
 	struct pt_regs *regs;
+	int do_switch = 1;
 
 	regs = task_pt_regs(task);
 	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
@@ -932,18 +950,39 @@ void perf_counter_task_sched_out(struct task_struct *task,
 		return;
 
 	update_context_time(ctx);
+
+	rcu_read_lock();
+	parent = rcu_dereference(ctx->parent_ctx);
 	next_ctx = next->perf_counter_ctxp;
-	if (next_ctx && context_equiv(ctx, next_ctx)) {
-		task->perf_counter_ctxp = next_ctx;
-		next->perf_counter_ctxp = ctx;
-		ctx->task = next;
-		next_ctx->task = task;
-		return;
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+		/*
+		 * Looks like the two contexts are clones, so we might be
+		 * able to optimize the context switch.  We lock both
+		 * contexts and check that they are clones under the
+		 * lock (including re-checking that neither has been
+		 * uncloned in the meantime).  It doesn't matter which
+		 * order we take the locks because no other cpu could
+		 * be trying to lock both of these tasks.
+		 */
+		spin_lock(&ctx->lock);
+		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		if (context_equiv(ctx, next_ctx)) {
+			task->perf_counter_ctxp = next_ctx;
+			next->perf_counter_ctxp = ctx;
+			ctx->task = next;
+			next_ctx->task = task;
+			do_switch = 0;
+		}
+		spin_unlock(&next_ctx->lock);
+		spin_unlock(&ctx->lock);
 	}
+	rcu_read_unlock();
 
-	__perf_counter_sched_out(ctx, cpuctx);
-
-	cpuctx->task_ctx = NULL;
+	if (do_switch) {
+		__perf_counter_sched_out(ctx, cpuctx);
+		cpuctx->task_ctx = NULL;
+	}
 }
 
 static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
@@ -1215,18 +1254,13 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	ctx->task = task;
 }
 
-static void put_context(struct perf_counter_context *ctx)
-{
-	if (ctx->task)
-		put_task_struct(ctx->task);
-}
-
 static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
-	struct perf_counter_context *tctx;
+	struct perf_counter_context *parent_ctx;
 	struct task_struct *task;
+	int err;
 
 	/*
 	 * If cpu is not a wildcard then this is a percpu counter:
@@ -1249,6 +1283,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
+		get_ctx(ctx);
 
 		return ctx;
 	}
@@ -1265,37 +1300,79 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	if (!task)
 		return ERR_PTR(-ESRCH);
 
+	/*
+	 * Can't attach counters to a dying task.
+	 */
+	err = -ESRCH;
+	if (task->flags & PF_EXITING)
+		goto errout;
+
 	/* Reuse ptrace permission checks for now. */
-	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
-		put_task_struct(task);
-		return ERR_PTR(-EACCES);
+	err = -EACCES;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto errout;
+
+ retry_lock:
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_counter_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_counter_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more and we can
+		 * unclone it if necessary.
+		 * Once it's not a clone things will be stable.
+		 */
+		spin_lock_irq(&ctx->lock);
+		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
+			spin_unlock_irq(&ctx->lock);
+			goto retry;
+		}
+		parent_ctx = ctx->parent_ctx;
+		if (parent_ctx) {
+			put_ctx(parent_ctx);
+			ctx->parent_ctx = NULL;		/* no longer a clone */
+		}
+		++ctx->generation;
+		/*
+		 * Get an extra reference before dropping the lock so that
+		 * this context won't get freed if the task exits.
+		 */
+		get_ctx(ctx);
+		spin_unlock_irq(&ctx->lock);
 	}
+	rcu_read_unlock();
 
-	ctx = task->perf_counter_ctxp;
 	if (!ctx) {
 		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-		if (!ctx) {
-			put_task_struct(task);
-			return ERR_PTR(-ENOMEM);
-		}
+		err = -ENOMEM;
+		if (!ctx)
+			goto errout;
 		__perf_counter_init_context(ctx, task);
-		/*
-		 * Make sure other cpus see correct values for *ctx
-		 * once task->perf_counter_ctxp is visible to them.
-		 */
-		smp_wmb();
-		tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx);
-		if (tctx) {
+		get_ctx(ctx);
+		if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
 			/*
 			 * We raced with some other task; use
 			 * the context they set.
 			 */
 			kfree(ctx);
-			ctx = tctx;
+			goto retry_lock;
 		}
+		get_task_struct(task);
 	}
 
+	put_task_struct(task);
 	return ctx;
+
+ errout:
+	put_task_struct(task);
+	return ERR_PTR(err);
 }
 
 static void free_counter_rcu(struct rcu_head *head)
@@ -1303,7 +1380,6 @@ static void free_counter_rcu(struct rcu_head *head)
 	struct perf_counter *counter;
 
 	counter = container_of(head, struct perf_counter, rcu_head);
-	put_ctx(counter->ctx);
 	kfree(counter);
 }
 
@@ -1324,6 +1400,7 @@ static void free_counter(struct perf_counter *counter)
 	if (counter->destroy)
 		counter->destroy(counter);
 
+	put_ctx(counter->ctx);
 	call_rcu(&counter->rcu_head, free_counter_rcu);
 }
 
@@ -1347,7 +1424,6 @@ static int perf_release(struct inode *inode, struct file *file)
 	put_task_struct(counter->owner);
 
 	free_counter(counter);
-	put_context(ctx);
 
 	return 0;
 }
@@ -1437,6 +1513,12 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter,
 	mutex_unlock(&ctx->mutex);
 }
 
+/*
+ * Holding the top-level counter's child_mutex means that any
+ * descendant process that has inherited this counter will block
+ * in sync_child_counter if it goes to exit, thus satisfying the
+ * task existence requirements of perf_counter_enable/disable.
+ */
 static void perf_counter_for_each_child(struct perf_counter *counter,
 					void (*func)(struct perf_counter *))
 {
@@ -3124,8 +3206,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->ctx			= ctx;
 	counter->oncpu			= -1;
 
-	get_ctx(ctx);
-
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
@@ -3290,7 +3370,7 @@ err_free_put_context:
 	kfree(counter);
 
 err_put_context:
-	put_context(ctx);
+	put_ctx(ctx);
 
 	goto out_fput;
 }
@@ -3322,6 +3402,7 @@ inherit_counter(struct perf_counter *parent_counter,
 					   group_leader, GFP_KERNEL);
 	if (IS_ERR(child_counter))
 		return child_counter;
+	get_ctx(child_ctx);
 
 	/*
 	 * Make the child state follow the state of the parent counter,
@@ -3439,11 +3520,6 @@ __perf_counter_exit_task(struct task_struct *child,
 
 /*
  * When a child task exits, feed back counter values to parent counters.
- *
- * Note: we may be running in child context, but the PID is not hashed
- * anymore so new counters will not be added.
- * (XXX not sure that is true when we get called from flush_old_exec.
- *  -- paulus)
  */
 void perf_counter_exit_task(struct task_struct *child)
 {
@@ -3458,7 +3534,15 @@ void perf_counter_exit_task(struct task_struct *child)
 
 	local_irq_save(flags);
 	__perf_counter_task_sched_out(child_ctx);
+
+	/*
+	 * Take the context lock here so that if find_get_context is
+	 * reading child->perf_counter_ctxp, we wait until it has
+	 * incremented the context's refcount before we do put_ctx below.
+	 */
+	spin_lock(&child_ctx->lock);
 	child->perf_counter_ctxp = NULL;
+	spin_unlock(&child_ctx->lock);
 	local_irq_restore(flags);
 
 	mutex_lock(&child_ctx->mutex);
@@ -3513,6 +3597,7 @@ int perf_counter_init_task(struct task_struct *child)
 
 	__perf_counter_init_context(child_ctx, child);
 	child->perf_counter_ctxp = child_ctx;
+	get_task_struct(child);
 
 	/*
 	 * Lock the parent list. No need to lock the child - not PID
-- 
cgit v0.10.2


From e03cdade0ca945a04e982525e50fef275190b77b Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 28 May 2009 14:16:52 +0100
Subject: [ARM] smp: use new cpumask functions

Convert cpu_*_mask bit twiddling to the new set_cpu_*() API.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 608f2d5..a06e735 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -41,7 +41,7 @@ extern void show_ipi_list(struct seq_file *p);
 asmlinkage void do_IPI(struct pt_regs *regs);
 
 /*
- * Setup the SMP cpu_possible_map
+ * Setup the set of possible CPUs (via set_cpu_possible)
  */
 extern void smp_init_cpus(void);
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 91130e2..0d8097f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -166,7 +166,7 @@ int __cpuexit __cpu_disable(void)
 	 * Take this CPU offline.  Once we clear this, we can't return,
 	 * and we must not schedule until we're ready to give up the cpu.
 	 */
-	cpu_clear(cpu, cpu_online_map);
+	set_cpu_online(cpu, false);
 
 	/*
 	 * OK - migrate IRQs away from this CPU
@@ -288,7 +288,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	/*
 	 * OK, now it's safe to let the boot CPU continue
 	 */
-	cpu_set(cpu, cpu_online_map);
+	set_cpu_online(cpu, true);
 
 	/*
 	 * OK, it's off to the idle thread for us
@@ -462,7 +462,7 @@ static void ipi_cpu_stop(unsigned int cpu)
 	dump_stack();
 	spin_unlock(&stop_lock);
 
-	cpu_clear(cpu, cpu_online_map);
+	set_cpu_online(cpu, false);
 
 	local_fiq_disable();
 	local_irq_disable();
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index c862ce1..ca74217 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -166,7 +166,7 @@ void __init smp_init_cpus(void)
 	unsigned int i, ncores = get_core_count();
 
 	for (i = 0; i < ncores; i++)
-		cpu_set(i, cpu_possible_map);
+		set_cpu_possible(i, true);
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -204,7 +204,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	 * actually populated at the present time.
 	 */
 	for (i = 0; i < max_cpus; i++)
-		cpu_set(i, cpu_present_map);
+		set_cpu_present(i, true);
 
 	/*
 	 * Initialise the SCU if there are more than one CPU and let
-- 
cgit v0.10.2


From f49afbb572d5e08ae12f1a979dc2e41745040339 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Date: Thu, 28 May 2009 16:41:36 +0300
Subject: MAINTAINER: Add F: entries for Gemini and FA526

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>

diff --git a/MAINTAINERS b/MAINTAINERS
index 5ee166e..42c53ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -624,6 +624,7 @@ M:	paulius.zaleckas@teltonika.lt
 L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
 T:	git git://gitorious.org/linux-gemini/mainline.git
 S:	Maintained
+F:	arch/arm/mach-gemini/
 
 ARM/EBSA110 MACHINE SUPPORT
 P:	Russell King
@@ -650,6 +651,7 @@ P:	Paulius Zaleckas
 M:	paulius.zaleckas@teltonika.lt
 L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
 S:	Maintained
+F:	arch/arm/mm/*-fa*
 
 ARM/FOOTBRIDGE ARCHITECTURE
 P:	Russell King
-- 
cgit v0.10.2


From 67a433ce278b98f47272726a22537fab7fd99de9 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Date: Thu, 28 May 2009 16:42:25 +0300
Subject: Gemini: Fix SRAM/ROM location after memory swap

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>

diff --git a/arch/arm/mach-gemini/include/mach/hardware.h b/arch/arm/mach-gemini/include/mach/hardware.h
index de67526..213a4fc 100644
--- a/arch/arm/mach-gemini/include/mach/hardware.h
+++ b/arch/arm/mach-gemini/include/mach/hardware.h
@@ -15,10 +15,9 @@
 /*
  * Memory Map definitions
  */
-/* FIXME: Does it really swap SRAM like this? */
 #ifdef CONFIG_GEMINI_MEM_SWAP
 # define GEMINI_DRAM_BASE	0x00000000
-# define GEMINI_SRAM_BASE	0x20000000
+# define GEMINI_SRAM_BASE	0x70000000
 #else
 # define GEMINI_SRAM_BASE	0x00000000
 # define GEMINI_DRAM_BASE	0x10000000
-- 
cgit v0.10.2


From be461ba836770263826457624bc4a5173a1f5040 Mon Sep 17 00:00:00 2001
From: Chaithrika U S <chaithrika@ti.com>
Date: Thu, 28 May 2009 05:10:50 -0400
Subject: ASoC: Add dummy S/PDIF codec support

McASP on DM646x can operate in DIT (S/PDIF) where no codec is needed.
This patch provides stub codec that can be used in these configurations.
On DM646x EVM the McASP1 is connected to the S/PDIF out.

Signed-off-by: Steve Chen <schen@mvista.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Naresh Medisetty <naresh@ti.com>
Signed-off-by: Chaithrika U S <chaithrika@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index cb07d9b..bbc97fd 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -18,6 +18,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_AK4535 if I2C
 	select SND_SOC_CS4270 if I2C
 	select SND_SOC_PCM3008
+	select SND_SOC_SPDIF
 	select SND_SOC_SSM2602 if I2C
 	select SND_SOC_STAC9766 if SND_SOC_AC97_BUS
 	select SND_SOC_TLV320AIC23 if I2C
@@ -91,6 +92,9 @@ config SND_SOC_L3
 config SND_SOC_PCM3008
        tristate
 
+config SND_SOC_SPDIF
+	tristate
+
 config SND_SOC_SSM2602
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 46c007c..8b75305 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -6,6 +6,7 @@ snd-soc-ak4535-objs := ak4535.o
 snd-soc-cs4270-objs := cs4270.o
 snd-soc-l3-objs := l3.o
 snd-soc-pcm3008-objs := pcm3008.o
+snd-soc-spdif-objs := spdif_transciever.o
 snd-soc-ssm2602-objs := ssm2602.o
 snd-soc-stac9766-objs := stac9766.o
 snd-soc-tlv320aic23-objs := tlv320aic23.o
@@ -42,6 +43,7 @@ obj-$(CONFIG_SND_SOC_AK4535)	+= snd-soc-ak4535.o
 obj-$(CONFIG_SND_SOC_CS4270)	+= snd-soc-cs4270.o
 obj-$(CONFIG_SND_SOC_L3)	+= snd-soc-l3.o
 obj-$(CONFIG_SND_SOC_PCM3008)	+= snd-soc-pcm3008.o
+obj-$(CONFIG_SND_SOC_SPDIF)	+= snd-soc-spdif.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
 obj-$(CONFIG_SND_SOC_STAC9766)	+= snd-soc-stac9766.o
 obj-$(CONFIG_SND_SOC_TLV320AIC23)	+= snd-soc-tlv320aic23.o
diff --git a/sound/soc/codecs/spdif_transciever.c b/sound/soc/codecs/spdif_transciever.c
new file mode 100644
index 0000000..118e976
--- /dev/null
+++ b/sound/soc/codecs/spdif_transciever.c
@@ -0,0 +1,69 @@
+/*
+ * ALSA SoC SPDIF DIT driver
+ *
+ *  This driver is used by controllers which can operate in DIT (SPDI/F) where
+ *  no codec is needed.  This file provides stub codec that can be used
+ *  in these configurations. TI DaVinci Audio controller uses this driver.
+ *
+ * Author:      Steve Chen,  <schen@mvista.com>
+ * Copyright:   (C) 2009 MontaVista Software, Inc., <source@mvista.com>
+ * Copyright:   (C) 2009  Texas Instruments, India
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <sound/soc.h>
+#include <sound/pcm.h>
+
+#define STUB_RATES	SNDRV_PCM_RATE_8000_96000
+#define STUB_FORMATS	SNDRV_PCM_FMTBIT_S16_LE
+
+
+struct snd_soc_dai dit_stub_dai = {
+	.name		= "DIT",
+	.playback 	= {
+		.stream_name	= "Playback",
+		.channels_min	= 1,
+		.channels_max	= 384,
+		.rates		= STUB_RATES,
+		.formats	= STUB_FORMATS,
+	},
+};
+
+static int spdif_dit_probe(struct platform_device *pdev)
+{
+	return snd_soc_register_dai(&dit_stub_dai);
+}
+
+static int spdif_dit_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_dai(&dit_stub_dai);
+	return 0;
+}
+
+static struct platform_driver spdif_dit_driver = {
+	.probe		= spdif_dit_probe,
+	.remove		= spdif_dit_remove,
+	.driver		= {
+		.name	= "spdif-dit",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init dit_modinit(void)
+{
+	return platform_driver_register(&spdif_dit_driver);
+}
+
+static void __exit dit_exit(void)
+{
+	platform_driver_unregister(&spdif_dit_driver);
+}
+
+module_init(dit_modinit);
+module_exit(dit_exit);
+
diff --git a/sound/soc/codecs/spdif_transciever.h b/sound/soc/codecs/spdif_transciever.h
new file mode 100644
index 0000000..296f2eb
--- /dev/null
+++ b/sound/soc/codecs/spdif_transciever.h
@@ -0,0 +1,17 @@
+/*
+ * ALSA SoC DIT/DIR driver header
+ *
+ * Author:      Steve Chen,  <schen@mvista.com>
+ * Copyright:   (C) 2008 MontaVista Software, Inc., <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CODEC_STUBS_H
+#define CODEC_STUBS_H
+
+extern struct snd_soc_dai dit_stub_dai;
+
+#endif /* CODEC_STUBS_H */
-- 
cgit v0.10.2


From 07119a4df8c8c77d888f2f46964ea9512ea84ff8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 27 May 2009 09:37:33 -0400
Subject: cifs: have cifs_NTtimeToUnix take a little-endian arg

...and just have the function call le64_to_cpu.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fae0839..8831f64 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -90,7 +90,7 @@ extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16,
 						 struct cifsTconInfo *);
 extern void DeleteOplockQEntry(struct oplock_q_entry *);
 extern void DeleteTconOplockQEntries(struct cifsTconInfo *);
-extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601);
+extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
 extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
 extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 302ea15..0686684 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -241,7 +241,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 	/* BB need same check in cifs_create too? */
 	/* if not oplocked, invalidate inode pages if mtime or file
 	   size changed */
-	temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
+	temp = cifs_NTtimeToUnix(buf->LastWriteTime);
 	if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
 			   (file->f_path.dentry->d_inode->i_size ==
 			    (loff_t)le64_to_cpu(buf->EndOfFile))) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9c869a6..42d6e0f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -85,10 +85,10 @@ static void cifs_unix_info_to_inode(struct inode *inode,
 	__u64 num_of_bytes = le64_to_cpu(info->NumOfBytes);
 	__u64 end_of_file = le64_to_cpu(info->EndOfFile);
 
-	inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime));
+	inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime);
 	inode->i_mtime =
-		cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime));
-	inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange));
+		cifs_NTtimeToUnix(info->LastModificationTime);
+	inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
 	inode->i_mode = le64_to_cpu(info->Permissions);
 
 	/*
@@ -554,14 +554,11 @@ int cifs_get_inode_info(struct inode **pinode,
 
 	/* Linux can not store file creation time so ignore it */
 	if (pfindData->LastAccessTime)
-		inode->i_atime = cifs_NTtimeToUnix
-			(le64_to_cpu(pfindData->LastAccessTime));
+		inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
 	else /* do not need to use current_fs_time - time not stored */
 		inode->i_atime = CURRENT_TIME;
-	inode->i_mtime =
-		    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
-	inode->i_ctime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
+	inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
+	inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
 	cFYI(DBG2, ("Attributes came in as 0x%x", attr));
 	if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
 		inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index e2fe998..d3ba75e 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -853,12 +853,12 @@ smbCalcSize_LE(struct smb_hdr *ptr)
 
 #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
 
-    /*
-     * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
-     * into Unix UTC (based 1970-01-01, in seconds).
-     */
+/*
+ * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
+ * into Unix UTC (based 1970-01-01, in seconds).
+ */
 struct timespec
-cifs_NTtimeToUnix(u64 ntutc)
+cifs_NTtimeToUnix(__le64 ntutc)
 {
 	struct timespec ts;
 	/* BB what about the timezone? BB */
@@ -866,7 +866,7 @@ cifs_NTtimeToUnix(u64 ntutc)
 	/* Subtract the NTFS time offset, then convert to 1s intervals. */
 	u64 t;
 
-	t = ntutc - NTFS_TIME_OFFSET;
+	t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
 	ts.tv_nsec = do_div(t, 10000000) * 100;
 	ts.tv_sec = t;
 	return ts;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 964e097..79c46c2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -150,11 +150,11 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		allocation_size = le64_to_cpu(pfindData->AllocationSize);
 		end_of_file = le64_to_cpu(pfindData->EndOfFile);
 		tmp_inode->i_atime =
-		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
+			cifs_NTtimeToUnix(pfindData->LastAccessTime);
 		tmp_inode->i_mtime =
-		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime));
+			cifs_NTtimeToUnix(pfindData->LastWriteTime);
 		tmp_inode->i_ctime =
-		      cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime));
+			cifs_NTtimeToUnix(pfindData->ChangeTime);
 	} else { /* legacy, OS2 and DOS style */
 /*		struct timespec ts;*/
 		FIND_FILE_STANDARD_INFO *pfindData =
@@ -331,11 +331,11 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
 	local_size  = tmp_inode->i_size;
 
 	tmp_inode->i_atime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime));
+	    cifs_NTtimeToUnix(pfindData->LastAccessTime);
 	tmp_inode->i_mtime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastModificationTime));
+	    cifs_NTtimeToUnix(pfindData->LastModificationTime);
 	tmp_inode->i_ctime =
-	    cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastStatusChange));
+	    cifs_NTtimeToUnix(pfindData->LastStatusChange);
 
 	tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
 	/* since we set the inode type below we need to mask off type
-- 
cgit v0.10.2


From c4a2c08db7d976c2e23a97da5d69ec7c9701034d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 27 May 2009 09:37:33 -0400
Subject: cifs: make cnvrtDosUnixTm take a little-endian args and an offset

The callers primarily end up converting the args from le anyway. Also,
most of the callers end up needing to add an offset to the result. The
exception to these rules is cnvrtDosCifsTm, but there are no callers of
that function, so we might as well remove it.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8831f64..d542cf1 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -92,8 +92,8 @@ extern void DeleteOplockQEntry(struct oplock_q_entry *);
 extern void DeleteTconOplockQEntries(struct cifsTconInfo *);
 extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
-extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
-extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
+extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
+				      int offset);
 
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
 			   struct super_block *sb, int mode, int oflags,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index aece2a8..b84c61d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -524,8 +524,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			int val, seconds, remain, result;
 			struct timespec ts, utc;
 			utc = CURRENT_TIME;
-			ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date),
-						le16_to_cpu(rsp->SrvTime.Time));
+			ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
+					    rsp->SrvTime.Time, 0);
 			cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d",
 				(int)ts.tv_sec, (int)utc.tv_sec,
 				(int)(utc.tv_sec - ts.tv_sec)));
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d3ba75e..32d6baa 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -883,16 +883,12 @@ cifs_UnixTimeToNT(struct timespec t)
 static int total_days_of_prev_months[] =
 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
 
-
-__le64 cnvrtDosCifsTm(__u16 date, __u16 time)
-{
-	return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time)));
-}
-
-struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
+struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
 {
 	struct timespec ts;
 	int sec, min, days, month, year;
+	u16 date = le16_to_cpu(le_date);
+	u16 time = le16_to_cpu(le_time);
 	SMB_TIME *st = (SMB_TIME *)&time;
 	SMB_DATE *sd = (SMB_DATE *)&date;
 
@@ -933,7 +929,7 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 		days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0);
 	sec += 24 * 60 * 60 * days;
 
-	ts.tv_sec = sec;
+	ts.tv_sec = sec + offset;
 
 	/* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 79c46c2..86d0055 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -115,17 +115,6 @@ construct_dentry(struct qstr *qstring, struct file *file,
 	return rc;
 }
 
-static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode)
-{
-	if ((tcon) && (tcon->ses) && (tcon->ses->server)) {
-		inode->i_ctime.tv_sec += tcon->ses->server->timeAdj;
-		inode->i_mtime.tv_sec += tcon->ses->server->timeAdj;
-		inode->i_atime.tv_sec += tcon->ses->server->timeAdj;
-	}
-	return;
-}
-
-
 static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 			  char *buf, unsigned int *pobject_type, int isNewInode)
 {
@@ -156,20 +145,19 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		tmp_inode->i_ctime =
 			cifs_NTtimeToUnix(pfindData->ChangeTime);
 	} else { /* legacy, OS2 and DOS style */
-/*		struct timespec ts;*/
+		int offset = cifs_sb->tcon->ses->server->timeAdj;
 		FIND_FILE_STANDARD_INFO *pfindData =
 			(FIND_FILE_STANDARD_INFO *)buf;
 
-		tmp_inode->i_mtime = cnvrtDosUnixTm(
-				le16_to_cpu(pfindData->LastWriteDate),
-				le16_to_cpu(pfindData->LastWriteTime));
-		tmp_inode->i_atime = cnvrtDosUnixTm(
-				le16_to_cpu(pfindData->LastAccessDate),
-				le16_to_cpu(pfindData->LastAccessTime));
-		tmp_inode->i_ctime = cnvrtDosUnixTm(
-				le16_to_cpu(pfindData->LastWriteDate),
-				le16_to_cpu(pfindData->LastWriteTime));
-		AdjustForTZ(cifs_sb->tcon, tmp_inode);
+		tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
+						    pfindData->LastWriteTime,
+						    offset);
+		tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
+						    pfindData->LastAccessTime,
+						    offset);
+		tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
+						    pfindData->LastWriteTime,
+						    offset);
 		attr = le16_to_cpu(pfindData->Attributes);
 		allocation_size = le32_to_cpu(pfindData->AllocationSize);
 		end_of_file = le32_to_cpu(pfindData->DataSize);
-- 
cgit v0.10.2


From bd433d4cf4d8593a5f1764776b91f1794fce5a77 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 27 May 2009 09:37:34 -0400
Subject: cifs: rename cifs_iget to cifs_root_iget

The current cifs_iget isn't suitable for anything but the root inode.
Rename it with a more appropriate name.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5e6d358..0a10a59 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -146,7 +146,7 @@ cifs_read_super(struct super_block *sb, void *data,
 #endif
 	sb->s_blocksize = CIFS_MAX_MSGSIZE;
 	sb->s_blocksize_bits = 14;	/* default 2**14 = CIFS_MAX_MSGSIZE */
-	inode = cifs_iget(sb, ROOT_I);
+	inode = cifs_root_iget(sb, ROOT_I);
 
 	if (IS_ERR(inode)) {
 		rc = PTR_ERR(inode);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 051b71c..3b6a85c 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -36,7 +36,7 @@ extern void cifs_read_inode(struct inode *);
 
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
-extern struct inode *cifs_iget(struct super_block *, unsigned long);
+extern struct inode *cifs_root_iget(struct super_block *, unsigned long);
 extern int cifs_create(struct inode *, struct dentry *, int,
 		       struct nameidata *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 42d6e0f..84b7bea 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -696,7 +696,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
 }
 
 /* gets root inode */
-struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
+struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 {
 	int xid;
 	struct cifs_sb_info *cifs_sb;
-- 
cgit v0.10.2


From a0c9217f64ee3cd1e534966da8c5f05768e1ab09 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 27 May 2009 15:40:47 -0400
Subject: cifs: make serverino the default when mounting

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f32c903..8ae563f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -835,6 +835,8 @@ cifs_parse_mount_options(char *options, const char *devname,
 	vol->rw = true;
 	/* default is always to request posix paths. */
 	vol->posix_paths = 1;
+	/* default to using server inode numbers where available */
+	vol->server_ino = 1;
 
 	if (!options)
 		return 1;
-- 
cgit v0.10.2


From c5077ec42303e07c2c685b0f6cb8eee0f2c7751c Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 28 May 2009 15:09:04 +0000
Subject: [CIFS] Update readme to indicate change to default mount (serverino)

Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f20c406..227c681 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,9 @@
+Version 1.59
+------------
+Client uses server inode numbers (which are persistent) rather than
+client generated ones by default (mount option "serverino" turned
+on by default if server supports it).
+
 Version 1.58
 ------------
 Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
@@ -10,6 +16,8 @@ we converted from).  Fix endianness of the vcnum field used during
 session setup to distinguish multiple mounts to same server from different
 userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
 flag to be set to 2, and mount must enable krb5 to turn on extended security).
+Performance of file create to Samba improved (posix create on lookup
+removes 1 of 2 network requests sent on file create)
  
 Version 1.57
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index db208ddb..6d1608f 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -388,8 +388,13 @@ A partial list of the supported mount options follows:
 		or the CIFS Unix Extensions equivalent and for those
 		this mount option will have no effect.  Exporting cifs mounts
 		under nfsd requires this mount option on the cifs mount.
+		This is now the default if server supports the 
+		required network operation.
   noserverino   Client generates inode numbers (rather than using the actual one
-		from the server) by default.
+		from the server). These inode numbers will vary after
+		unmount or reboot which can confuse some applications,
+		but not all server filesystems support unique inode
+		numbers.
   setuids       If the CIFS Unix extensions are negotiated with the server
 		the client will attempt to set the effective uid and gid of
 		the local process on newly created files, directories, and
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 3b6a85c..9570a0e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.58"
+#define CIFS_VERSION   "1.59"
 #endif				/* _CIFSFS_H */
-- 
cgit v0.10.2


From ed888aef427365d19f887c271a3a906d16422d24 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 17:16:04 +0200
Subject: dma-debug: re-add dma memory leak detection

This is basically a revert of commit 314eeac9 but now in a
fixed version.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index cdd205d..e47e1a0 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -105,6 +105,11 @@ static const char *type2name[4] = { "single", "page",
 static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
 				   "DMA_FROM_DEVICE", "DMA_NONE" };
 
+/* little merge helper - remove it after the merge window */
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
+
 /*
  * The access to some variables in this macro is racy. We can't use atomic_t
  * here because all these variables are exported to debugfs. Some of them even
@@ -458,9 +463,60 @@ out_err:
 	return -ENOMEM;
 }
 
+static int device_dma_allocations(struct device *dev)
+{
+	struct dma_debug_entry *entry;
+	unsigned long flags;
+	int count = 0, i;
+
+	for (i = 0; i < HASH_SIZE; ++i) {
+		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
+		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+			if (entry->dev == dev)
+				count += 1;
+		}
+		spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
+	}
+
+	return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb,
+				    unsigned long action, void *data)
+{
+	struct device *dev = data;
+	int count;
+
+
+	switch (action) {
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		count = device_dma_allocations(dev);
+		if (count == 0)
+			break;
+		err_printk(dev, NULL, "DMA-API: device driver has pending "
+				"DMA allocations while released from device "
+				"[count=%d]\n", count);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 void dma_debug_add_bus(struct bus_type *bus)
 {
-	/* FIXME: register notifier */
+	struct notifier_block *nb;
+
+	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+	if (nb == NULL) {
+		printk(KERN_ERR "dma_debug_add_bus: out of memory\n");
+		return;
+	}
+
+	nb->notifier_call = dma_debug_device_change;
+
+	bus_register_notifier(bus, nb);
 }
 
 /*
-- 
cgit v0.10.2


From 0b0ef442958d74e5749c460b0ae68606e317fe01 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabioestevam@yahoo.com>
Date: Wed, 27 May 2009 13:51:24 -0700
Subject: MX3: Add missing entry in devices.h

Add missing mxc_rnga_device entry in devices.h (mxc-master tree).

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/devices.h b/arch/arm/mach-mx3/devices.h
index 475410a..ffd494d 100644
--- a/arch/arm/mach-mx3/devices.h
+++ b/arch/arm/mach-mx3/devices.h
@@ -16,3 +16,5 @@ extern struct platform_device mxc_fec_device;
 extern struct platform_device mxcsdhc_device0;
 extern struct platform_device mxcsdhc_device1;
 extern struct platform_device mxc_otg_udc_device;
+extern struct platform_device mxc_rnga_device;
+
-- 
cgit v0.10.2


From 0573cb5f45f1b98b74348c3f1ed4f26e56e774e5 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Thu, 28 May 2009 16:46:21 +0200
Subject: mx31: correct csi_clk parent (v2)

changes since v1: we now check if the parent configuration bit was
changed since reset and change the parent when needed.

csi_clk parent was defined with ahb_clk. However, according to the
m31 reference manual, it should be serial_pll_clk.

Guennadi always used a 20 MHz clock that was by chance changed to
a 45 MHz that fits in the mt9t031 spec. Now the clocks are computed
and output correctly (measured on oscillo).

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c
index 28bd11d..217d114 100644
--- a/arch/arm/mach-mx3/clock.c
+++ b/arch/arm/mach-mx3/clock.c
@@ -483,7 +483,7 @@ DEFINE_CLOCK(i2c3_clk,    2, MXC_CCM_CGR0, 30, NULL, NULL, &perclk_clk);
 DEFINE_CLOCK(mpeg4_clk,   0, MXC_CCM_CGR1,  0, NULL, NULL, &ahb_clk);
 DEFINE_CLOCK(mstick1_clk, 0, MXC_CCM_CGR1,  2, mstick1_get_rate, NULL, &usb_pll_clk);
 DEFINE_CLOCK(mstick2_clk, 1, MXC_CCM_CGR1,  4, mstick2_get_rate, NULL, &usb_pll_clk);
-DEFINE_CLOCK1(csi_clk,    0, MXC_CCM_CGR1,  6, csi, NULL, &ahb_clk);
+DEFINE_CLOCK1(csi_clk,    0, MXC_CCM_CGR1,  6, csi, NULL, &serial_pll_clk);
 DEFINE_CLOCK(rtc_clk,     0, MXC_CCM_CGR1,  8, NULL, NULL, &ipg_clk);
 DEFINE_CLOCK(wdog_clk,    0, MXC_CCM_CGR1, 10, NULL, NULL, &ipg_clk);
 DEFINE_CLOCK(pwm_clk,     0, MXC_CCM_CGR1, 12, NULL, NULL, &perclk_clk);
@@ -571,6 +571,13 @@ int __init mx31_clocks_init(unsigned long fref)
 	for (i = 0; i < ARRAY_SIZE(lookups); i++)
 		clkdev_add(&lookups[i]);
 
+	/* change the csi_clk parent if necessary */
+	reg = __raw_readl(MXC_CCM_CCMR);
+	if (!(reg & MXC_CCM_CCMR_CSCS))
+		if (clk_set_parent(&csi_clk, &usb_pll_clk))
+			pr_err("%s: error changing csi_clk parent\n", __func__);
+
+
 	/* Turn off all possible clocks */
 	__raw_writel((3 << 4), MXC_CCM_CGR0);
 	__raw_writel(0, MXC_CCM_CGR1);
-- 
cgit v0.10.2


From fefda117ddb324b872312f1f061230e627c9f5ee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 20 May 2009 12:21:42 +0200
Subject: amd-iommu: add amd_iommu_dump parameter

This kernel parameter will be useful to get some AMD IOMMU related
information in dmesg that is not necessary for the default user but may
be helpful in debug situations.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9..89dfb37 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -194,6 +194,12 @@
 #define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
 #define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
 					      domain for an IOMMU */
+extern bool amd_iommu_dump;
+#define DUMP_printk(format, arg...)					\
+	do {								\
+		if (amd_iommu_dump)						\
+			printk(KERN_INFO "AMD IOMMU: " format, ## arg);	\
+	} while(0);
 
 /*
  * This structure contains generic data for  IOMMU protection domains
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be09..57fb7a7 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -115,6 +115,8 @@ struct ivmd_header {
 	u64 range_length;
 } __attribute__((packed));
 
+bool amd_iommu_dump;
+
 static int __initdata amd_iommu_detected;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
@@ -1211,6 +1213,13 @@ void __init amd_iommu_detect(void)
  *
  ****************************************************************************/
 
+static int __init parse_amd_iommu_dump(char *str)
+{
+	amd_iommu_dump = true;
+
+	return 1;
+}
+
 static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
@@ -1235,5 +1244,6 @@ static int __init parse_amd_iommu_size_options(char *str)
 	return 1;
 }
 
+__setup("amd_iommu_dump", parse_amd_iommu_dump);
 __setup("amd_iommu=", parse_amd_iommu_options);
 __setup("amd_iommu_size=", parse_amd_iommu_size_options);
-- 
cgit v0.10.2


From 9c72041f719e2864d4208a89341c36b316dbf893 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 20 May 2009 13:53:57 +0200
Subject: amd-iommu: add dump for iommus described in ivrs table

Add information about IOMMU devices described in the IVRS ACPI table to
the kernel log if amd_iommu_dump was specified on the kernel command
line.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 57fb7a7..2816590 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -748,6 +748,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 		h = (struct ivhd_header *)p;
 		switch (*p) {
 		case ACPI_IVHD_TYPE:
+
+			DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+				    "seg: %d flags: %01x info %04x\n",
+				    PCI_BUS(h->devid), PCI_SLOT(h->devid),
+				    PCI_FUNC(h->devid), h->cap_ptr,
+				    h->pci_seg, h->flags, h->info);
+			DUMP_printk("       mmio-addr: %016llx\n",
+				    h->mmio_phys);
+
 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
 			if (iommu == NULL)
 				return -ENOMEM;
-- 
cgit v0.10.2


From 42a698f40a0946f5517308411b9e003ae031414d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 20 May 2009 15:41:28 +0200
Subject: amd-iommu: print ivhd information to dmesg when requested

Add information about devices belonging to an IOMMU as described in the
IVRS ACPI table to the kernel log if amd_iommu_dump was specified on the
kernel command line.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 2816590..fe3e645 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -598,32 +598,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 	p += sizeof(struct ivhd_header);
 	end += h->length;
 
+
 	while (p < end) {
 		e = (struct ivhd_entry *)p;
 		switch (e->type) {
 		case IVHD_DEV_ALL:
+
+			DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
+				    " last device %02x:%02x.%x flags: %02x\n",
+				    PCI_BUS(iommu->first_device),
+				    PCI_SLOT(iommu->first_device),
+				    PCI_FUNC(iommu->first_device),
+				    PCI_BUS(iommu->last_device),
+				    PCI_SLOT(iommu->last_device),
+				    PCI_FUNC(iommu->last_device),
+				    e->flags);
+
 			for (dev_i = iommu->first_device;
 					dev_i <= iommu->last_device; ++dev_i)
 				set_dev_entry_from_acpi(iommu, dev_i,
 							e->flags, 0);
 			break;
 		case IVHD_DEV_SELECT:
+
+			DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+				    "flags: %02x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags);
+
 			devid = e->devid;
 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
 			break;
 		case IVHD_DEV_SELECT_RANGE_START:
+
+			DUMP_printk("  DEV_SELECT_RANGE_START\t "
+				    "devid: %02x:%02x.%x flags: %02x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags);
+
 			devid_start = e->devid;
 			flags = e->flags;
 			ext_flags = 0;
 			alias = false;
 			break;
 		case IVHD_DEV_ALIAS:
+
+			DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+				    "flags: %02x devid_to: %02x:%02x.%x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags,
+				    PCI_BUS(e->ext >> 8),
+				    PCI_SLOT(e->ext >> 8),
+				    PCI_FUNC(e->ext >> 8));
+
 			devid = e->devid;
 			devid_to = e->ext >> 8;
 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
 			amd_iommu_alias_table[devid] = devid_to;
 			break;
 		case IVHD_DEV_ALIAS_RANGE:
+
+			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
+				    "devid: %02x:%02x.%x flags: %02x "
+				    "devid_to: %02x:%02x.%x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags,
+				    PCI_BUS(e->ext >> 8),
+				    PCI_SLOT(e->ext >> 8),
+				    PCI_FUNC(e->ext >> 8));
+
 			devid_start = e->devid;
 			flags = e->flags;
 			devid_to = e->ext >> 8;
@@ -631,17 +682,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 			alias = true;
 			break;
 		case IVHD_DEV_EXT_SELECT:
+
+			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+				    "flags: %02x ext: %08x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags, e->ext);
+
 			devid = e->devid;
 			set_dev_entry_from_acpi(iommu, devid, e->flags,
 						e->ext);
 			break;
 		case IVHD_DEV_EXT_SELECT_RANGE:
+
+			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
+				    "%02x:%02x.%x flags: %02x ext: %08x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid),
+				    e->flags, e->ext);
+
 			devid_start = e->devid;
 			flags = e->flags;
 			ext_flags = e->ext;
 			alias = false;
 			break;
 		case IVHD_DEV_RANGE_END:
+
+			DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
+				    PCI_BUS(e->devid),
+				    PCI_SLOT(e->devid),
+				    PCI_FUNC(e->devid));
+
 			devid = e->devid;
 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
 				if (alias)
-- 
cgit v0.10.2


From 02acc43a294098c2a4cd22cf24e9c988644f9f7f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 20 May 2009 16:24:21 +0200
Subject: amd-iommu: print ivmd information to dmesg when requested

Add information about device memory mapping requirements for the IOMMU
as described in the IVRS ACPI table to the kernel log if amd_iommu_dump
was specified on the kernel command line.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index fe3e645..b90a78c 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -983,6 +983,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
 	struct unity_map_entry *e = 0;
+	char *s;
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
 	if (e == NULL)
@@ -991,13 +992,16 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 	switch (m->type) {
 	default:
 	case ACPI_IVMD_TYPE:
+		s = "IVMD_TYPEi\t\t\t";
 		e->devid_start = e->devid_end = m->devid;
 		break;
 	case ACPI_IVMD_TYPE_ALL:
+		s = "IVMD_TYPE_ALL\t\t";
 		e->devid_start = 0;
 		e->devid_end = amd_iommu_last_bdf;
 		break;
 	case ACPI_IVMD_TYPE_RANGE:
+		s = "IVMD_TYPE_RANGE\t\t";
 		e->devid_start = m->devid;
 		e->devid_end = m->aux;
 		break;
@@ -1006,6 +1010,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
 	e->prot = m->flags >> 1;
 
+	DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
+		    " range_start: %016llx range_end: %016llx flags: %x\n", s,
+		    PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
+		    PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
+		    e->address_start, e->address_end, m->flags);
+
 	list_add_tail(&e->list, &amd_iommu_unity_map);
 
 	return 0;
-- 
cgit v0.10.2


From b3b99ef8b4f80f3f093a72110e7697c2281ae45d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:02:48 +0200
Subject: amd-iommu: move protection domain printk to dump code

This information is only helpful for debugging. Don't print it anymore
unless explicitly requested.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99..3356599 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1009,8 +1009,9 @@ static int device_change_notifier(struct notifier_block *nb,
 		if (!dma_domain)
 			dma_domain = iommu->default_dom;
 		attach_device(iommu, &dma_domain->domain, devid);
-		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-		       "device %s\n", dma_domain->domain.id, dev_name(dev));
+		DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
+			    "%d for device %s\n",
+			    dma_domain->domain.id, dev_name(dev));
 		break;
 	case BUS_NOTIFY_UNBIND_DRIVER:
 		if (!domain)
@@ -1133,8 +1134,9 @@ static int get_device_resources(struct device *dev,
 			dma_dom = (*iommu)->default_dom;
 		*domain = &dma_dom->domain;
 		attach_device(*iommu, *domain, *bdf);
-		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-				"device %s\n", (*domain)->id, dev_name(dev));
+		DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
+				"%d for device %s\n",
+				(*domain)->id, dev_name(dev));
 	}
 
 	if (domain_for_device(_bdf) == NULL)
-- 
cgit v0.10.2


From 2be69c79e9a46a554fc3ff57d886e65e7a73eb72 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:15:49 +0200
Subject: x86/iommu: add IOMMU_STRESS Kconfig entry

This Kconfig option is intended to enable various code paths or
parameters in IOMMU implementations to stress test the code and/or the
hardware. This can also be done by disabling optimizations in the code
when this option is switched on.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 5865712..33fac6b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -159,6 +159,14 @@ config IOMMU_DEBUG
 	  options. See Documentation/x86_64/boot-options.txt for more
 	  details.
 
+config IOMMU_STRESS
+	bool "Enable IOMMU stress-test mode"
+	---help---
+	  This option disables various optimizations in IOMMU related
+	  code to do real stress testing of the IOMMU code. This option
+	  will cause a performance drop and should only be enabled for
+	  testing.
+
 config IOMMU_LEAK
 	bool "IOMMU leak tracing"
 	depends on IOMMU_DEBUG && DMA_API_DEBUG
-- 
cgit v0.10.2


From 2e8b569614b89c9b1b85cba37db36daeeeff744e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:44:03 +0200
Subject: amd-iommu: disable device isolation with CONFIG_IOMMU_STRESS

With device isolation disabled we can test better for race conditions in
dma_ops related code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index b90a78c..6694112 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -124,8 +124,14 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+
+#ifdef CONFIG_IOMMU_STRESS
+bool amd_iommu_isolate = false;
+#else
 bool amd_iommu_isolate = true;		/* if true, device isolation is
 					   enabled */
+#endif
+
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
-- 
cgit v0.10.2


From 421f909c803d1c397f6c66b75653f238696c39ee Mon Sep 17 00:00:00 2001
From: Neil Turton <nturton@solarflare.com>
Date: Thu, 14 May 2009 14:00:35 +0100
Subject: amd-iommu: fix an off-by-one error in the AMD IOMMU driver.

The variable amd_iommu_last_bdf holds the maximum bdf of any device
controlled by an IOMMU, so the number of device entries needed is
amd_iommu_last_bdf+1.  The function tbl_size used amd_iommu_last_bdf
instead.  This would be a problem if the last device were a large
enough power of 2.

[ Impact: fix amd_iommu_last_bdf off-by-one error ]

Signed-off-by: Neil Turton <nturton@solarflare.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be09..35fc965 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -175,7 +175,7 @@ static inline void update_last_devid(u16 devid)
 static inline unsigned long tbl_size(int entry_size)
 {
 	unsigned shift = PAGE_SHIFT +
-			 get_order(amd_iommu_last_bdf * entry_size);
+			 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 
 	return 1UL << shift;
 }
-- 
cgit v0.10.2


From 7455aab1f95f6464c5af3fbdee28744e73f38564 Mon Sep 17 00:00:00 2001
From: Neil Turton <nturton@solarflare.com>
Date: Thu, 14 May 2009 14:08:11 +0100
Subject: amd-iommu: fix the handling of device aliases in the AMD IOMMU
 driver.

The devid parameter to set_dev_entry_from_acpi is the requester ID
rather than the device ID since it is used to index the IOMMU device
table.  The handling of IVHD_DEV_ALIAS used to pass the device ID.
This patch fixes it to pass the requester ID.

[ Impact: fix setting the wrong req-id in acpi-table parsing ]

Signed-off-by: Neil Turton <nturton@solarflare.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 35fc965..53f93db 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -618,7 +618,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
 		case IVHD_DEV_ALIAS:
 			devid = e->devid;
 			devid_to = e->ext >> 8;
-			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
 			amd_iommu_alias_table[devid] = devid_to;
 			break;
 		case IVHD_DEV_ALIAS_RANGE:
-- 
cgit v0.10.2


From 0bc252f430d6a3ac7836d40f00d0ae020593b11b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:48:05 +0200
Subject: amd-iommu: make sure only ivmd entries are parsed

The bug never triggered. But it should be fixed to protect against
broken ACPI tables in the future.

[ Impact: protect against broken ivrs acpi table ]

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 53f93db..a3a2b98 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -906,6 +906,8 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 
 	switch (m->type) {
 	default:
+		kfree(e);
+		return 0;
 	case ACPI_IVMD_TYPE:
 		e->devid_start = e->devid_end = m->devid;
 		break;
-- 
cgit v0.10.2


From c1eee67b2d8464781f5868a34168df61e40e85a6 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Thu, 21 May 2009 00:56:58 -0700
Subject: amd iommu: properly detach from protection domain on ->remove

Some drivers may use the dma api during ->remove which will
cause a protection domain to get reattached to a device.  Delay the
detach until after the driver is completely unbound.

[ joro: added a little merge helper ]

[ Impact: fix too early device<->domain removal ]

Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99..d689883 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -57,6 +57,10 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 static struct dma_ops_domain *find_protection_domain(u16 devid);
 
 
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
+
 #ifdef CONFIG_AMD_IOMMU_STATS
 
 /*
@@ -1012,7 +1016,7 @@ static int device_change_notifier(struct notifier_block *nb,
 		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
 		       "device %s\n", dma_domain->domain.id, dev_name(dev));
 		break;
-	case BUS_NOTIFY_UNBIND_DRIVER:
+	case BUS_NOTIFY_UNBOUND_DRIVER:
 		if (!domain)
 			goto out;
 		detach_device(domain, devid);
-- 
cgit v0.10.2


From 3bd221724adb9d642270df0e78b0105fb61e4a1c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 May 2009 15:06:20 +0200
Subject: amd-iommu: introduce for_each_iommu* macros

This patch introduces the for_each_iommu and for_each_iommu_safe macros
to simplify the developers life when having to iterate over all AMD
IOMMUs in the system.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9..cf5ef17 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -196,6 +196,14 @@
 					      domain for an IOMMU */
 
 /*
+ * Make iterating over all IOMMUs easier
+ */
+#define for_each_iommu(iommu) \
+	list_for_each_entry((iommu), &amd_iommu_list, list)
+#define for_each_iommu_safe(iommu, next) \
+	list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
+
+/*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
  */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99..d9e9dc1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -213,7 +213,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 {
 	struct amd_iommu *iommu;
 
-	list_for_each_entry(iommu, &amd_iommu_list, list)
+	for_each_iommu(iommu)
 		iommu_poll_events(iommu);
 
 	return IRQ_HANDLED;
@@ -440,7 +440,7 @@ static void iommu_flush_domain(u16 domid)
 	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 				      domid, 1, 1);
 
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
 		spin_lock_irqsave(&iommu->lock, flags);
 		__iommu_queue_command(iommu, &cmd);
 		__iommu_completion_wait(iommu);
@@ -1672,7 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
 	 * found in the system. Devices not assigned to any other
 	 * protection domain will be assigned to the default one.
 	 */
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
 		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
 		if (iommu->default_dom == NULL)
 			return -ENOMEM;
@@ -1710,7 +1710,7 @@ int __init amd_iommu_init_dma_ops(void)
 
 free_domains:
 
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
 		if (iommu->default_dom)
 			dma_ops_domain_free(iommu->default_dom);
 	}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be09..675a4b6 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -679,7 +679,7 @@ static void __init free_iommu_all(void)
 {
 	struct amd_iommu *iommu, *next;
 
-	list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+	for_each_iommu_safe(iommu, next) {
 		list_del(&iommu->list);
 		free_iommu_one(iommu);
 		kfree(iommu);
@@ -779,7 +779,7 @@ static int __init iommu_setup_msix(struct amd_iommu *iommu)
 	struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
 	int nvec = 0, i;
 
-	list_for_each_entry(curr, &amd_iommu_list, list) {
+	for_each_iommu(curr) {
 		if (curr->dev == iommu->dev) {
 			entries[nvec].entry = curr->evt_msi_num;
 			entries[nvec].vector = 0;
@@ -818,7 +818,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
 	int r;
 	struct amd_iommu *curr;
 
-	list_for_each_entry(curr, &amd_iommu_list, list) {
+	for_each_iommu(curr) {
 		if (curr->dev == iommu->dev)
 			curr->int_enabled = true;
 	}
@@ -971,7 +971,7 @@ static void __init enable_iommus(void)
 {
 	struct amd_iommu *iommu;
 
-	list_for_each_entry(iommu, &amd_iommu_list, list) {
+	for_each_iommu(iommu) {
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
 		iommu_enable_event_logging(iommu);
-- 
cgit v0.10.2


From 58492e128892e3b55f1a6ef0cf3c3ab4ce7cc214 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 May 2009 18:41:16 +0200
Subject: amd-iommu: consolidate hardware initialization to one function

This patch restructures the AMD IOMMU initialization code to initialize
all hardware registers with one single function call.
This is helpful for suspend/resume support.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 675a4b6..74f4f1f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -252,13 +252,6 @@ static void __init iommu_enable(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
-/* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
-{
-	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
-	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
-}
-
 /*
  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
  * the system has one.
@@ -413,25 +406,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 {
 	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 			get_order(CMD_BUFFER_SIZE));
-	u64 entry;
 
 	if (cmd_buf == NULL)
 		return NULL;
 
 	iommu->cmd_buf_size = CMD_BUFFER_SIZE;
 
-	entry = (u64)virt_to_phys(cmd_buf);
+	return cmd_buf;
+}
+
+/*
+ * This function writes the command buffer address to the hardware and
+ * enables it.
+ */
+static void iommu_enable_command_buffer(struct amd_iommu *iommu)
+{
+	u64 entry;
+
+	BUG_ON(iommu->cmd_buf == NULL);
+
+	entry = (u64)virt_to_phys(iommu->cmd_buf);
 	entry |= MMIO_CMD_SIZE_512;
+
 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
-			&entry, sizeof(entry));
+		    &entry, sizeof(entry));
 
 	/* set head and tail to zero manually */
 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 
 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
-
-	return cmd_buf;
 }
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +447,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
 /* allocates the memory where the IOMMU will log its events to */
 static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-	u64 entry;
 	iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 						get_order(EVT_BUFFER_SIZE));
 
 	if (iommu->evt_buf == NULL)
 		return NULL;
 
+	return iommu->evt_buf;
+}
+
+static void iommu_enable_event_buffer(struct amd_iommu *iommu)
+{
+	u64 entry;
+
+	BUG_ON(iommu->evt_buf == NULL);
+
 	entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+
 	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 		    &entry, sizeof(entry));
 
-	iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
-	return iommu->evt_buf;
+	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
 static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -710,7 +721,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	if (!iommu->mmio_base)
 		return -ENOMEM;
 
-	iommu_set_device_table(iommu);
 	iommu->cmd_buf = alloc_command_buffer(iommu);
 	if (!iommu->cmd_buf)
 		return -ENOMEM;
@@ -837,6 +847,8 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
 		return 1;
 	}
 
+	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
 	return 0;
 }
 
@@ -972,9 +984,11 @@ static void __init enable_iommus(void)
 	struct amd_iommu *iommu;
 
 	for_each_iommu(iommu) {
+		iommu_set_device_table(iommu);
+		iommu_enable_command_buffer(iommu);
+		iommu_enable_event_buffer(iommu);
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
-		iommu_enable_event_logging(iommu);
 		iommu_enable(iommu);
 	}
 }
-- 
cgit v0.10.2


From fab6afa30954a0684ef8ac1d9a606e74a6215ab6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 May 2009 18:46:34 +0200
Subject: amd-iommu: drop pointless iommu-loop in msi setup code

It is not necessary to loop again over all IOMMUs in this code. So drop
the loop.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 74f4f1f..cc99f60 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -826,13 +826,6 @@ out_free:
 static int __init iommu_setup_msi(struct amd_iommu *iommu)
 {
 	int r;
-	struct amd_iommu *curr;
-
-	for_each_iommu(curr) {
-		if (curr->dev == iommu->dev)
-			curr->int_enabled = true;
-	}
-
 
 	if (pci_enable_msi(iommu->dev))
 		return 1;
@@ -847,6 +840,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
 		return 1;
 	}
 
+	iommu->int_enabled = true;
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 
 	return 0;
-- 
cgit v0.10.2


From d91cecdd796c27df46339e80ed436a980c56fcad Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 4 May 2009 18:51:00 +0200
Subject: amd-iommu: remove support for msi-x

Current hardware uses msi instead of msi-x so this code it not necessary
and can not be tested. The best thing is to drop this code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index cc99f60..feee475 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -783,46 +783,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
  *
  ****************************************************************************/
 
-static int __init iommu_setup_msix(struct amd_iommu *iommu)
-{
-	struct amd_iommu *curr;
-	struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
-	int nvec = 0, i;
-
-	for_each_iommu(curr) {
-		if (curr->dev == iommu->dev) {
-			entries[nvec].entry = curr->evt_msi_num;
-			entries[nvec].vector = 0;
-			curr->int_enabled = true;
-			nvec++;
-		}
-	}
-
-	if (pci_enable_msix(iommu->dev, entries, nvec)) {
-		pci_disable_msix(iommu->dev);
-		return 1;
-	}
-
-	for (i = 0; i < nvec; ++i) {
-		int r = request_irq(entries->vector, amd_iommu_int_handler,
-				    IRQF_SAMPLE_RANDOM,
-				    "AMD IOMMU",
-				    NULL);
-		if (r)
-			goto out_free;
-	}
-
-	return 0;
-
-out_free:
-	for (i -= 1; i >= 0; --i)
-		free_irq(entries->vector, NULL);
-
-	pci_disable_msix(iommu->dev);
-
-	return 1;
-}
-
 static int __init iommu_setup_msi(struct amd_iommu *iommu)
 {
 	int r;
@@ -851,9 +811,7 @@ static int __init iommu_init_msi(struct amd_iommu *iommu)
 	if (iommu->int_enabled)
 		return 0;
 
-	if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
-		return iommu_setup_msix(iommu);
-	else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+	if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
 		return iommu_setup_msi(iommu);
 
 	return 1;
-- 
cgit v0.10.2


From 92ac4320af6ed4294c2c221dd4ccbfd9026a3aa7 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 19 May 2009 19:06:27 +0200
Subject: amd-iommu: add function to disable all iommus

This function is required for suspend/resume support with AMD IOMMU
enabled.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index feee475..ed10c0f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -252,6 +252,11 @@ static void __init iommu_enable(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
+static void iommu_disable(struct amd_iommu *iommu)
+{
+	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
+}
+
 /*
  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
  * the system has one.
@@ -945,6 +950,14 @@ static void __init enable_iommus(void)
 	}
 }
 
+static void disable_iommus(void)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu)
+		iommu_disable(iommu);
+}
+
 /*
  * Suspend/Resume support
  * disable suspend until real resume implemented
-- 
cgit v0.10.2


From bfd1be1857e5a3385bf146e02e6dc3dd4241bec1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 May 2009 15:33:57 +0200
Subject: amd-iommu: add function to flush tlb for all domains

This function is required for suspend/resume support with AMD IOMMU
enabled.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index f712344..1750e1f 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -27,6 +27,7 @@ extern int amd_iommu_init(void);
 extern int amd_iommu_init_dma_ops(void);
 extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
 #else
 static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d9e9dc1..826ad079 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -449,6 +449,17 @@ static void iommu_flush_domain(u16 domid)
 	}
 }
 
+void amd_iommu_flush_all_domains(void)
+{
+	int i;
+
+	for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+		if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+			continue;
+		iommu_flush_domain(i);
+	}
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v0.10.2


From 7d7a110c6127b7fc683dc6d764555f2dbd22b054 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 May 2009 15:48:10 +0200
Subject: amd-iommu: add function to flush tlb for all devices

This function is required for suspend/resume support with AMD IOMMU
enabled.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 1750e1f..262e028 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -28,6 +28,7 @@ extern int amd_iommu_init_dma_ops(void);
 extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
 extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
 #else
 static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 826ad079..92b0e18 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -460,6 +460,24 @@ void amd_iommu_flush_all_domains(void)
 	}
 }
 
+void amd_iommu_flush_all_devices(void)
+{
+	struct amd_iommu *iommu;
+	int i;
+
+	for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+		if (amd_iommu_pd_table[i] == NULL)
+			continue;
+
+		iommu = amd_iommu_rlookup_table[i];
+		if (!iommu)
+			continue;
+
+		iommu_queue_inv_dev_entry(iommu, i);
+		iommu_completion_wait(iommu);
+	}
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v0.10.2


From 05f92db9f47f852ff48bbed1b063b8ab8ad00285 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 12 May 2009 09:52:46 +0200
Subject: amd_iommu: un __init functions required for suspend/resume

This patch makes sure that no function required for suspend/resume of
AMD IOMMU driver is thrown away after boot.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index ed10c0f..330896b 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -193,7 +193,7 @@ static inline unsigned long tbl_size(int entry_size)
  * This function set the exclusion range in the IOMMU. DMA accesses to the
  * exclusion range are passed through untranslated
  */
-static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
 	u64 start = iommu->exclusion_start & PAGE_MASK;
 	u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +225,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
 }
 
 /* Generic functions to enable/disable certain features of the IOMMU. */
-static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 {
 	u32 ctrl;
 
@@ -244,7 +244,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 }
 
 /* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+static void iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -811,7 +811,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
 	return 0;
 }
 
-static int __init iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_msi(struct amd_iommu *iommu)
 {
 	if (iommu->int_enabled)
 		return 0;
@@ -936,7 +936,7 @@ static void init_device_table(void)
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
  */
-static void __init enable_iommus(void)
+static void enable_iommus(void)
 {
 	struct amd_iommu *iommu;
 
-- 
cgit v0.10.2


From 736501ee000757082a4f0832826ae1eda7ea106e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 12 May 2009 09:56:12 +0200
Subject: amd-iommu: implement suspend/resume

This patch puts everything together and enables suspend/resume support
in the AMD IOMMU driver.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 330896b..4ca8fbf 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -965,12 +965,31 @@ static void disable_iommus(void)
 
 static int amd_iommu_resume(struct sys_device *dev)
 {
+	/*
+	 * Disable IOMMUs before reprogramming the hardware registers.
+	 * IOMMU is still enabled from the resume kernel.
+	 */
+	disable_iommus();
+
+	/* re-load the hardware */
+	enable_iommus();
+
+	/*
+	 * we have to flush after the IOMMUs are enabled because a
+	 * disabled IOMMU will never execute the commands we send
+	 */
+	amd_iommu_flush_all_domains();
+	amd_iommu_flush_all_devices();
+
 	return 0;
 }
 
 static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
 {
-	return -EINVAL;
+	/* disable IOMMUs to go out of the way for BIOS */
+	disable_iommus();
+
+	return 0;
 }
 
 static struct sysdev_class amd_iommu_sysdev_class = {
-- 
cgit v0.10.2


From c3239567a20e90e3026ac5453d5267506ef7b030 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 12 May 2009 10:56:44 +0200
Subject: amd-iommu: introduce aperture_range structure

This is a preperation for extended address allocator.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 95c8cd9..4c64c9b 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -195,6 +195,8 @@
 #define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
 					      domain for an IOMMU */
 
+#define APERTURE_RANGE_SIZE	(128 * 1024 * 1024)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -210,6 +212,24 @@ struct protection_domain {
 };
 
 /*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+	/* address allocation bitmap */
+	unsigned long *bitmap;
+
+	/*
+	 * Array of PTE pages for the aperture. In this array we save all the
+	 * leaf pages of the domain page table used for the aperture. This way
+	 * we don't need to walk the page table to find a specific PTE. We can
+	 * just calculate its address in constant time.
+	 */
+	u64 *pte_pages[64];
+};
+
+/*
  * Data container for a dma_ops specific protection domain
  */
 struct dma_ops_domain {
@@ -224,16 +244,8 @@ struct dma_ops_domain {
 	/* address we start to search for free addresses */
 	unsigned long next_bit;
 
-	/* address allocation bitmap */
-	unsigned long *bitmap;
-
-	/*
-	 * Array of PTE pages for the aperture. In this array we save all the
-	 * leaf pages of the domain page table used for the aperture. This way
-	 * we don't need to walk the page table to find a specific PTE. We can
-	 * just calculate its address in constant time.
-	 */
-	u64 **pte_pages;
+	/* address space relevant data */
+	struct aperture_range aperture;
 
 	/* This will be set to true when TLB needs to be flushed */
 	bool need_flush;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a97db99..62acd09 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -595,7 +595,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 		 * as allocated in the aperture
 		 */
 		if (addr < dma_dom->aperture_size)
-			__set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+			__set_bit(addr >> PAGE_SHIFT,
+				  dma_dom->aperture.bitmap);
 	}
 
 	return 0;
@@ -656,11 +657,12 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 		dom->need_flush = true;
 	}
 
-	address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
-				   0 , boundary_size, align_mask);
+	address = iommu_area_alloc(dom->aperture.bitmap, limit, dom->next_bit,
+				   pages, 0 , boundary_size, align_mask);
 	if (address == -1) {
-		address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
-				0, boundary_size, align_mask);
+		address = iommu_area_alloc(dom->aperture.bitmap, limit, 0,
+					   pages, 0, boundary_size,
+					   align_mask);
 		dom->need_flush = true;
 	}
 
@@ -685,7 +687,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 				   unsigned int pages)
 {
 	address >>= PAGE_SHIFT;
-	iommu_area_free(dom->bitmap, address, pages);
+	iommu_area_free(dom->aperture.bitmap, address, pages);
 
 	if (address >= dom->next_bit)
 		dom->need_flush = true;
@@ -741,7 +743,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 	if (start_page + pages > last_page)
 		pages = last_page - start_page;
 
-	iommu_area_reserve(dom->bitmap, start_page, pages);
+	iommu_area_reserve(dom->aperture.bitmap, start_page, pages);
 }
 
 static void free_pagetable(struct protection_domain *domain)
@@ -785,9 +787,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	free_pagetable(&dom->domain);
 
-	kfree(dom->pte_pages);
-
-	kfree(dom->bitmap);
+	free_page((unsigned long)dom->aperture.bitmap);
 
 	kfree(dom);
 }
@@ -826,16 +826,15 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
-	dma_dom->aperture_size = (1ULL << order);
-	dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
-				  GFP_KERNEL);
-	if (!dma_dom->bitmap)
+	dma_dom->aperture_size = APERTURE_RANGE_SIZE;
+	dma_dom->aperture.bitmap = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!dma_dom->aperture.bitmap)
 		goto free_dma_dom;
 	/*
 	 * mark the first page as allocated so we never return 0 as
 	 * a valid dma-address. So we can use 0 as error value
 	 */
-	dma_dom->bitmap[0] = 1;
+	dma_dom->aperture.bitmap[0] = 1;
 	dma_dom->next_bit = 0;
 
 	dma_dom->need_flush = false;
@@ -854,13 +853,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	/*
 	 * At the last step, build the page tables so we don't need to
 	 * allocate page table pages in the dma_ops mapping/unmapping
-	 * path.
+	 * path for the first 128MB of dma address space.
 	 */
 	num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
-	dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
-			GFP_KERNEL);
-	if (!dma_dom->pte_pages)
-		goto free_dma_dom;
 
 	l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
 	if (l2_pde == NULL)
@@ -869,10 +864,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
 
 	for (i = 0; i < num_pte_pages; ++i) {
-		dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!dma_dom->pte_pages[i])
+		u64 **pte_page = &dma_dom->aperture.pte_pages[i];
+		*pte_page = (u64 *)get_zeroed_page(GFP_KERNEL);
+		if (!*pte_page)
 			goto free_dma_dom;
-		address = virt_to_phys(dma_dom->pte_pages[i]);
+		address = virt_to_phys(*pte_page);
 		l2_pde[i] = IOMMU_L1_PDE(address);
 	}
 
@@ -1159,7 +1155,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 
 	paddr &= PAGE_MASK;
 
-	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+	pte  = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)];
 	pte += IOMMU_PTE_L0_INDEX(address);
 
 	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
@@ -1192,7 +1188,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 
 	WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
 
-	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+	pte  = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)];
 	pte += IOMMU_PTE_L0_INDEX(address);
 
 	WARN_ON(!*pte);
-- 
cgit v0.10.2


From 8bda3092bcfa68f786d94549ae026e8db1eff041 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 12 May 2009 12:02:46 +0200
Subject: amd-iommu: move page table allocation code to seperate function

This patch makes page table allocation usable for dma_ops code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 62acd09..ded79f7 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -55,7 +55,9 @@ struct iommu_cmd {
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
 static struct dma_ops_domain *find_protection_domain(u16 devid);
-
+static u64* alloc_pte(struct protection_domain *dom,
+		      unsigned long address, u64
+		      **pte_page, gfp_t gfp);
 
 #ifdef CONFIG_AMD_IOMMU_STATS
 
@@ -468,7 +470,7 @@ static int iommu_map_page(struct protection_domain *dom,
 			  unsigned long phys_addr,
 			  int prot)
 {
-	u64 __pte, *pte, *page;
+	u64 __pte, *pte;
 
 	bus_addr  = PAGE_ALIGN(bus_addr);
 	phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +479,7 @@ static int iommu_map_page(struct protection_domain *dom,
 	if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
 		return -EINVAL;
 
-	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
-	if (!IOMMU_PTE_PRESENT(*pte)) {
-		page = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			return -ENOMEM;
-		*pte = IOMMU_L2_PDE(virt_to_phys(page));
-	}
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
-	if (!IOMMU_PTE_PRESENT(*pte)) {
-		page = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			return -ENOMEM;
-		*pte = IOMMU_L1_PDE(virt_to_phys(page));
-	}
-
-	pte = IOMMU_PTE_PAGE(*pte);
-	pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+	pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
 
 	if (IOMMU_PTE_PRESENT(*pte))
 		return -EBUSY;
@@ -1140,6 +1122,61 @@ static int get_device_resources(struct device *dev,
 }
 
 /*
+ * If the pte_page is not yet allocated this function is called
+ */
+static u64* alloc_pte(struct protection_domain *dom,
+		      unsigned long address, u64 **pte_page, gfp_t gfp)
+{
+	u64 *pte, *page;
+
+	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte)) {
+		page = (u64 *)get_zeroed_page(gfp);
+		if (!page)
+			return NULL;
+		*pte = IOMMU_L2_PDE(virt_to_phys(page));
+	}
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte)) {
+		page = (u64 *)get_zeroed_page(gfp);
+		if (!page)
+			return NULL;
+		*pte = IOMMU_L1_PDE(virt_to_phys(page));
+	}
+
+	pte = IOMMU_PTE_PAGE(*pte);
+
+	if (pte_page)
+		*pte_page = pte;
+
+	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+	return pte;
+}
+
+/*
+ * This function fetches the PTE for a given address in the aperture
+ */
+static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
+			    unsigned long address)
+{
+	struct aperture_range *aperture = &dom->aperture;
+	u64 *pte, *pte_page;
+
+	pte = aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+	if (!pte) {
+		pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+		aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)] = pte_page;
+	}
+
+	return pte;
+}
+
+/*
  * This is the generic map function. It maps one 4kb page at paddr to
  * the given address in the DMA address space for the domain.
  */
@@ -1155,8 +1192,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 
 	paddr &= PAGE_MASK;
 
-	pte  = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)];
-	pte += IOMMU_PTE_L0_INDEX(address);
+	pte  = dma_ops_get_pte(dom, address);
 
 	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
 
-- 
cgit v0.10.2


From 53812c115cda1f660b286c939669154a56976f6b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 12 May 2009 12:17:38 +0200
Subject: amd-iommu: handle page table allocation failures in dma_ops code

The code will be required when the aperture size increases dynamically
in the extended address allocator.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ded79f7..a467add 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1193,6 +1193,8 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 	paddr &= PAGE_MASK;
 
 	pte  = dma_ops_get_pte(dom, address);
+	if (!pte)
+		return bad_dma_address;
 
 	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
 
@@ -1248,7 +1250,7 @@ static dma_addr_t __map_single(struct device *dev,
 			       u64 dma_mask)
 {
 	dma_addr_t offset = paddr & ~PAGE_MASK;
-	dma_addr_t address, start;
+	dma_addr_t address, start, ret;
 	unsigned int pages;
 	unsigned long align_mask = 0;
 	int i;
@@ -1271,7 +1273,10 @@ static dma_addr_t __map_single(struct device *dev,
 
 	start = address;
 	for (i = 0; i < pages; ++i) {
-		dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+		ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+		if (ret == bad_dma_address)
+			goto out_unmap;
+
 		paddr += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
@@ -1287,6 +1292,17 @@ static dma_addr_t __map_single(struct device *dev,
 
 out:
 	return address;
+
+out_unmap:
+
+	for (--i; i >= 0; --i) {
+		start -= PAGE_SIZE;
+		dma_ops_domain_unmap(iommu, dma_dom, start);
+	}
+
+	dma_ops_free_addresses(dma_dom, address, pages);
+
+	return bad_dma_address;
 }
 
 /*
-- 
cgit v0.10.2


From 384de72910a7bf96a02a6d8023fe9e16d872beb2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 15 May 2009 12:30:05 +0200
Subject: amd-iommu: make address allocator aware of multiple aperture ranges

This patch changes the AMD IOMMU address allocator to allow up to 32
aperture ranges per dma_ops domain.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 4c64c9b..eca9129 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -195,7 +195,12 @@
 #define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
 					      domain for an IOMMU */
 
-#define APERTURE_RANGE_SIZE	(128 * 1024 * 1024)
+#define APERTURE_RANGE_SHIFT	27	/* 128 MB */
+#define APERTURE_RANGE_SIZE	(1ULL << APERTURE_RANGE_SHIFT)
+#define APERTURE_RANGE_PAGES	(APERTURE_RANGE_SIZE >> PAGE_SHIFT)
+#define APERTURE_MAX_RANGES	32	/* allows 4GB of DMA address space */
+#define APERTURE_RANGE_INDEX(a)	((a) >> APERTURE_RANGE_SHIFT)
+#define APERTURE_PAGE_INDEX(a)	(((a) >> 21) & 0x3fULL)
 
 /*
  * This structure contains generic data for  IOMMU protection domains
@@ -227,6 +232,8 @@ struct aperture_range {
 	 * just calculate its address in constant time.
 	 */
 	u64 *pte_pages[64];
+
+	unsigned long offset;
 };
 
 /*
@@ -245,7 +252,7 @@ struct dma_ops_domain {
 	unsigned long next_bit;
 
 	/* address space relevant data */
-	struct aperture_range aperture;
+	struct aperture_range *aperture[APERTURE_MAX_RANGES];
 
 	/* This will be set to true when TLB needs to be flushed */
 	bool need_flush;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a467add..794163a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -578,7 +578,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 		 */
 		if (addr < dma_dom->aperture_size)
 			__set_bit(addr >> PAGE_SHIFT,
-				  dma_dom->aperture.bitmap);
+				  dma_dom->aperture[0]->bitmap);
 	}
 
 	return 0;
@@ -615,43 +615,74 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  ****************************************************************************/
 
 /*
- * The address allocator core function.
+ * The address allocator core functions.
  *
  * called with domain->lock held
  */
+
+static unsigned long dma_ops_area_alloc(struct device *dev,
+					struct dma_ops_domain *dom,
+					unsigned int pages,
+					unsigned long align_mask,
+					u64 dma_mask,
+					unsigned long start)
+{
+	unsigned long next_bit = dom->next_bit % APERTURE_RANGE_PAGES;
+	int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
+	int i = start >> APERTURE_RANGE_SHIFT;
+	unsigned long boundary_size;
+	unsigned long address = -1;
+	unsigned long limit;
+
+	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+			PAGE_SIZE) >> PAGE_SHIFT;
+
+	for (;i < max_index; ++i) {
+		unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+
+		if (dom->aperture[i]->offset >= dma_mask)
+			break;
+
+		limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+					       dma_mask >> PAGE_SHIFT);
+
+		address = iommu_area_alloc(dom->aperture[i]->bitmap,
+					   limit, next_bit, pages, 0,
+					    boundary_size, align_mask);
+		if (address != -1) {
+			address = dom->aperture[i]->offset +
+				  (address << PAGE_SHIFT);
+			dom->next_bit = (address >> PAGE_SHIFT) + pages;
+			break;
+		}
+
+		next_bit = 0;
+	}
+
+	return address;
+}
+
 static unsigned long dma_ops_alloc_addresses(struct device *dev,
 					     struct dma_ops_domain *dom,
 					     unsigned int pages,
 					     unsigned long align_mask,
 					     u64 dma_mask)
 {
-	unsigned long limit;
 	unsigned long address;
-	unsigned long boundary_size;
+	unsigned long start = dom->next_bit << PAGE_SHIFT;
 
-	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-			PAGE_SIZE) >> PAGE_SHIFT;
-	limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
-				       dma_mask >> PAGE_SHIFT);
 
-	if (dom->next_bit >= limit) {
-		dom->next_bit = 0;
-		dom->need_flush = true;
-	}
+	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+				     dma_mask, start);
 
-	address = iommu_area_alloc(dom->aperture.bitmap, limit, dom->next_bit,
-				   pages, 0 , boundary_size, align_mask);
 	if (address == -1) {
-		address = iommu_area_alloc(dom->aperture.bitmap, limit, 0,
-					   pages, 0, boundary_size,
-					   align_mask);
+		dom->next_bit = 0;
+		address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+					     dma_mask, 0);
 		dom->need_flush = true;
 	}
 
-	if (likely(address != -1)) {
-		dom->next_bit = address + pages;
-		address <<= PAGE_SHIFT;
-	} else
+	if (unlikely(address == -1))
 		address = bad_dma_address;
 
 	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -668,11 +699,17 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 				   unsigned long address,
 				   unsigned int pages)
 {
-	address >>= PAGE_SHIFT;
-	iommu_area_free(dom->aperture.bitmap, address, pages);
+	unsigned i = address >> APERTURE_RANGE_SHIFT;
+	struct aperture_range *range = dom->aperture[i];
+
+	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 
-	if (address >= dom->next_bit)
+	if ((address >> PAGE_SHIFT) >= dom->next_bit)
 		dom->need_flush = true;
+
+	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+	iommu_area_free(range->bitmap, address, pages);
+
 }
 
 /****************************************************************************
@@ -720,12 +757,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 				      unsigned long start_page,
 				      unsigned int pages)
 {
-	unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
 
 	if (start_page + pages > last_page)
 		pages = last_page - start_page;
 
-	iommu_area_reserve(dom->aperture.bitmap, start_page, pages);
+	for (i = start_page; i < start_page + pages; ++i) {
+		int index = i / APERTURE_RANGE_PAGES;
+		int page  = i % APERTURE_RANGE_PAGES;
+		__set_bit(page, dom->aperture[index]->bitmap);
+	}
 }
 
 static void free_pagetable(struct protection_domain *domain)
@@ -764,12 +805,19 @@ static void free_pagetable(struct protection_domain *domain)
  */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
+	int i;
+
 	if (!dom)
 		return;
 
 	free_pagetable(&dom->domain);
 
-	free_page((unsigned long)dom->aperture.bitmap);
+	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+		if (!dom->aperture[i])
+			continue;
+		free_page((unsigned long)dom->aperture[i]->bitmap);
+		kfree(dom->aperture[i]);
+	}
 
 	kfree(dom);
 }
@@ -797,6 +845,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	if (!dma_dom)
 		return NULL;
 
+	dma_dom->aperture[0] = kzalloc(sizeof(struct aperture_range),
+				       GFP_KERNEL);
+	if (!dma_dom->aperture[0])
+		goto free_dma_dom;
+
 	spin_lock_init(&dma_dom->domain.lock);
 
 	dma_dom->domain.id = domain_id_alloc();
@@ -809,14 +862,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
 	dma_dom->aperture_size = APERTURE_RANGE_SIZE;
-	dma_dom->aperture.bitmap = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!dma_dom->aperture.bitmap)
+	dma_dom->aperture[0]->bitmap = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!dma_dom->aperture[0]->bitmap)
 		goto free_dma_dom;
 	/*
 	 * mark the first page as allocated so we never return 0 as
 	 * a valid dma-address. So we can use 0 as error value
 	 */
-	dma_dom->aperture.bitmap[0] = 1;
+	dma_dom->aperture[0]->bitmap[0] = 1;
 	dma_dom->next_bit = 0;
 
 	dma_dom->need_flush = false;
@@ -846,7 +899,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
 
 	for (i = 0; i < num_pte_pages; ++i) {
-		u64 **pte_page = &dma_dom->aperture.pte_pages[i];
+		u64 **pte_page = &dma_dom->aperture[0]->pte_pages[i];
 		*pte_page = (u64 *)get_zeroed_page(GFP_KERNEL);
 		if (!*pte_page)
 			goto free_dma_dom;
@@ -1164,14 +1217,19 @@ static u64* alloc_pte(struct protection_domain *dom,
 static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
 			    unsigned long address)
 {
-	struct aperture_range *aperture = &dom->aperture;
+	struct aperture_range *aperture;
 	u64 *pte, *pte_page;
 
-	pte = aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)];
+	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+	if (!aperture)
+		return NULL;
+
+	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
 	if (!pte) {
 		pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
-		aperture->pte_pages[IOMMU_PTE_L1_INDEX(address)] = pte_page;
-	}
+		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
+	} else
+		pte += IOMMU_PTE_L0_INDEX(address);
 
 	return pte;
 }
@@ -1219,14 +1277,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 				 struct dma_ops_domain *dom,
 				 unsigned long address)
 {
+	struct aperture_range *aperture;
 	u64 *pte;
 
 	if (address >= dom->aperture_size)
 		return;
 
-	WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
+	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+	if (!aperture)
+		return;
+
+	pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+	if (!pte)
+		return;
 
-	pte  = dom->aperture.pte_pages[IOMMU_PTE_L1_INDEX(address)];
 	pte += IOMMU_PTE_L0_INDEX(address);
 
 	WARN_ON(!*pte);
-- 
cgit v0.10.2


From 803b8cb4d9a93b90c67aba2aab7f2c54d595b5b9 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 18 May 2009 15:32:48 +0200
Subject: amd-iommu: change dma_dom->next_bit to dma_dom->next_address

Simplify the code a little bit by using the same unit for all address
space related state in the dma_ops domain structure.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index eca9129..4ff4cf1 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -249,7 +249,7 @@ struct dma_ops_domain {
 	unsigned long aperture_size;
 
 	/* address we start to search for free addresses */
-	unsigned long next_bit;
+	unsigned long next_address;
 
 	/* address space relevant data */
 	struct aperture_range *aperture[APERTURE_MAX_RANGES];
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 794163a..c1a08b9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -627,13 +627,15 @@ static unsigned long dma_ops_area_alloc(struct device *dev,
 					u64 dma_mask,
 					unsigned long start)
 {
-	unsigned long next_bit = dom->next_bit % APERTURE_RANGE_PAGES;
+	unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
 	int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
 	int i = start >> APERTURE_RANGE_SHIFT;
 	unsigned long boundary_size;
 	unsigned long address = -1;
 	unsigned long limit;
 
+	next_bit >>= PAGE_SHIFT;
+
 	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 			PAGE_SIZE) >> PAGE_SHIFT;
 
@@ -652,7 +654,7 @@ static unsigned long dma_ops_area_alloc(struct device *dev,
 		if (address != -1) {
 			address = dom->aperture[i]->offset +
 				  (address << PAGE_SHIFT);
-			dom->next_bit = (address >> PAGE_SHIFT) + pages;
+			dom->next_address = address + (pages << PAGE_SHIFT);
 			break;
 		}
 
@@ -669,14 +671,12 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 					     u64 dma_mask)
 {
 	unsigned long address;
-	unsigned long start = dom->next_bit << PAGE_SHIFT;
-
 
 	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
-				     dma_mask, start);
+				     dma_mask, dom->next_address);
 
 	if (address == -1) {
-		dom->next_bit = 0;
+		dom->next_address = 0;
 		address = dma_ops_area_alloc(dev, dom, pages, align_mask,
 					     dma_mask, 0);
 		dom->need_flush = true;
@@ -704,10 +704,11 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 
 	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 
-	if ((address >> PAGE_SHIFT) >= dom->next_bit)
+	if (address >= dom->next_address)
 		dom->need_flush = true;
 
 	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+
 	iommu_area_free(range->bitmap, address, pages);
 
 }
@@ -870,7 +871,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	 * a valid dma-address. So we can use 0 as error value
 	 */
 	dma_dom->aperture[0]->bitmap[0] = 1;
-	dma_dom->next_bit = 0;
+	dma_dom->next_address = 0;
 
 	dma_dom->need_flush = false;
 	dma_dom->target_dev = 0xffff;
-- 
cgit v0.10.2


From 9cabe89b99773e682538a8809abc7d4000c77083 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 18 May 2009 16:38:55 +0200
Subject: amd-iommu: move aperture_range allocation code to seperate function

This patch prepares the dynamic increasement of dma_ops domain
apertures.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c1a08b9..8ff02ee 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -620,6 +620,59 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  * called with domain->lock held
  */
 
+/*
+ * This function is used to add a new aperture range to an existing
+ * aperture in case of dma_ops domain allocation or address allocation
+ * failure.
+ */
+static int alloc_new_range(struct dma_ops_domain *dma_dom,
+			   bool populate, gfp_t gfp)
+{
+	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+
+	if (index >= APERTURE_MAX_RANGES)
+		return -ENOMEM;
+
+	dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
+	if (!dma_dom->aperture[index])
+		return -ENOMEM;
+
+	dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
+	if (!dma_dom->aperture[index]->bitmap)
+		goto out_free;
+
+	dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+
+	if (populate) {
+		unsigned long address = dma_dom->aperture_size;
+		int i, num_ptes = APERTURE_RANGE_PAGES / 512;
+		u64 *pte, *pte_page;
+
+		for (i = 0; i < num_ptes; ++i) {
+			pte = alloc_pte(&dma_dom->domain, address,
+					&pte_page, gfp);
+			if (!pte)
+				goto out_free;
+
+			dma_dom->aperture[index]->pte_pages[i] = pte_page;
+
+			address += APERTURE_RANGE_SIZE / 64;
+		}
+	}
+
+	dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+
+	return 0;
+
+out_free:
+	free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+
+	kfree(dma_dom->aperture[index]);
+	dma_dom->aperture[index] = NULL;
+
+	return -ENOMEM;
+}
+
 static unsigned long dma_ops_area_alloc(struct device *dev,
 					struct dma_ops_domain *dom,
 					unsigned int pages,
@@ -832,9 +885,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 						   unsigned order)
 {
 	struct dma_ops_domain *dma_dom;
-	unsigned i, num_pte_pages;
-	u64 *l2_pde;
-	u64 address;
 
 	/*
 	 * Currently the DMA aperture must be between 32 MB and 1GB in size
@@ -846,11 +896,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	if (!dma_dom)
 		return NULL;
 
-	dma_dom->aperture[0] = kzalloc(sizeof(struct aperture_range),
-				       GFP_KERNEL);
-	if (!dma_dom->aperture[0])
-		goto free_dma_dom;
-
 	spin_lock_init(&dma_dom->domain.lock);
 
 	dma_dom->domain.id = domain_id_alloc();
@@ -862,10 +907,13 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
-	dma_dom->aperture_size = APERTURE_RANGE_SIZE;
-	dma_dom->aperture[0]->bitmap = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!dma_dom->aperture[0]->bitmap)
+
+	dma_dom->need_flush = false;
+	dma_dom->target_dev = 0xffff;
+
+	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
 		goto free_dma_dom;
+
 	/*
 	 * mark the first page as allocated so we never return 0 as
 	 * a valid dma-address. So we can use 0 as error value
@@ -873,9 +921,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->aperture[0]->bitmap[0] = 1;
 	dma_dom->next_address = 0;
 
-	dma_dom->need_flush = false;
-	dma_dom->target_dev = 0xffff;
-
 	/* Intialize the exclusion range if necessary */
 	if (iommu->exclusion_start &&
 	    iommu->exclusion_start < dma_dom->aperture_size) {
@@ -886,28 +931,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 		dma_ops_reserve_addresses(dma_dom, startpage, pages);
 	}
 
-	/*
-	 * At the last step, build the page tables so we don't need to
-	 * allocate page table pages in the dma_ops mapping/unmapping
-	 * path for the first 128MB of dma address space.
-	 */
-	num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
-
-	l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
-	if (l2_pde == NULL)
-		goto free_dma_dom;
-
-	dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
-
-	for (i = 0; i < num_pte_pages; ++i) {
-		u64 **pte_page = &dma_dom->aperture[0]->pte_pages[i];
-		*pte_page = (u64 *)get_zeroed_page(GFP_KERNEL);
-		if (!*pte_page)
-			goto free_dma_dom;
-		address = virt_to_phys(*pte_page);
-		l2_pde[i] = IOMMU_L1_PDE(address);
-	}
-
 	return dma_dom;
 
 free_dma_dom:
-- 
cgit v0.10.2


From 00cd122ae5e5e7c60cce2af3c35b190d4c3f2d0d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 19 May 2009 09:52:40 +0200
Subject: amd-iommu: handle exlusion ranges and unity mappings in
 alloc_new_range

This patch makes sure no reserved addresses are allocated in an dma_ops
domain when the aperture is increased dynamically.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8ff02ee..59ee1b9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -58,6 +58,9 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
 static u64* alloc_pte(struct protection_domain *dom,
 		      unsigned long address, u64
 		      **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+				      unsigned long start_page,
+				      unsigned int pages);
 
 #ifdef CONFIG_AMD_IOMMU_STATS
 
@@ -621,14 +624,42 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  */
 
 /*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+		      unsigned long address)
+{
+	u64 *pte;
+
+	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return NULL;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return NULL;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+	return pte;
+}
+
+/*
  * This function is used to add a new aperture range to an existing
  * aperture in case of dma_ops domain allocation or address allocation
  * failure.
  */
-static int alloc_new_range(struct dma_ops_domain *dma_dom,
+static int alloc_new_range(struct amd_iommu *iommu,
+			   struct dma_ops_domain *dma_dom,
 			   bool populate, gfp_t gfp)
 {
 	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+	int i;
 
 	if (index >= APERTURE_MAX_RANGES)
 		return -ENOMEM;
@@ -662,6 +693,33 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 
 	dma_dom->aperture_size += APERTURE_RANGE_SIZE;
 
+	/* Intialize the exclusion range if necessary */
+	if (iommu->exclusion_start &&
+	    iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+	    iommu->exclusion_start < dma_dom->aperture_size) {
+		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+		int pages = iommu_num_pages(iommu->exclusion_start,
+					    iommu->exclusion_length,
+					    PAGE_SIZE);
+		dma_ops_reserve_addresses(dma_dom, startpage, pages);
+	}
+
+	/*
+	 * Check for areas already mapped as present in the new aperture
+	 * range and mark those pages as reserved in the allocator. Such
+	 * mappings may already exist as a result of requested unity
+	 * mappings for devices.
+	 */
+	for (i = dma_dom->aperture[index]->offset;
+	     i < dma_dom->aperture_size;
+	     i += PAGE_SIZE) {
+		u64 *pte = fetch_pte(&dma_dom->domain, i);
+		if (!pte || !IOMMU_PTE_PRESENT(*pte))
+			continue;
+
+		dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+	}
+
 	return 0;
 
 out_free:
@@ -911,7 +969,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->need_flush = false;
 	dma_dom->target_dev = 0xffff;
 
-	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
+	if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
 		goto free_dma_dom;
 
 	/*
@@ -921,15 +979,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->aperture[0]->bitmap[0] = 1;
 	dma_dom->next_address = 0;
 
-	/* Intialize the exclusion range if necessary */
-	if (iommu->exclusion_start &&
-	    iommu->exclusion_start < dma_dom->aperture_size) {
-		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
-		int pages = iommu_num_pages(iommu->exclusion_start,
-					    iommu->exclusion_length,
-					    PAGE_SIZE);
-		dma_ops_reserve_addresses(dma_dom, startpage, pages);
-	}
 
 	return dma_dom;
 
-- 
cgit v0.10.2


From 11b83888ae729457b5cfb936dbd498481f6408df Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 19 May 2009 10:23:15 +0200
Subject: amd-iommu: enlarge the aperture dynamically

By dynamically increasing the aperture the extended allocator is now
ready for use.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 59ee1b9..d129d8f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1403,10 +1403,26 @@ static dma_addr_t __map_single(struct device *dev,
 	if (align)
 		align_mask = (1UL << get_order(size)) - 1;
 
+retry:
 	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
 					  dma_mask);
-	if (unlikely(address == bad_dma_address))
-		goto out;
+	if (unlikely(address == bad_dma_address)) {
+		/*
+		 * setting next_address here will let the address
+		 * allocator only scan the new allocated range in the
+		 * first run. This is a small optimization.
+		 */
+		dma_dom->next_address = dma_dom->aperture_size;
+
+		if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+			goto out;
+
+		/*
+		 * aperture was sucessfully enlarged by 128 MB, try
+		 * allocation again
+		 */
+		goto retry;
+	}
 
 	start = address;
 	for (i = 0; i < pages; ++i) {
-- 
cgit v0.10.2


From d9cfed925448f097ec7faab80d903eb7e5f99712 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 19 May 2009 12:16:29 +0200
Subject: amd-iommu: remove amd_iommu_size kernel parameter

This parameter is not longer necessary when aperture increases
dynamically.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbf..5b776c6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -329,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file
 				    flushed before they will be reused, which
 				    is a lot of faster
 
-	amd_iommu_size= [HW,X86-64]
-			Define the size of the aperture for the AMD IOMMU
-			driver. Possible values are:
-			'32M', '64M' (default), '128M', '256M', '512M', '1G'
-
 	amijoy.map=	[HW,JOY] Amiga joystick support
 			Map of devices attached to JOY0DAT and JOY1DAT
 			Format: <a>,<b>
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d129d8f..31d56c3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -939,17 +939,10 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
  * It also intializes the page table and the address allocator data
  * structures required for the dma_ops interface
  */
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
-						   unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
 {
 	struct dma_ops_domain *dma_dom;
 
-	/*
-	 * Currently the DMA aperture must be between 32 MB and 1GB in size
-	 */
-	if ((order < 25) || (order > 30))
-		return NULL;
-
 	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
 	if (!dma_dom)
 		return NULL;
@@ -1087,7 +1080,6 @@ static int device_change_notifier(struct notifier_block *nb,
 	struct protection_domain *domain;
 	struct dma_ops_domain *dma_domain;
 	struct amd_iommu *iommu;
-	int order = amd_iommu_aperture_order;
 	unsigned long flags;
 
 	if (devid > amd_iommu_last_bdf)
@@ -1126,7 +1118,7 @@ static int device_change_notifier(struct notifier_block *nb,
 		dma_domain = find_protection_domain(devid);
 		if (dma_domain)
 			goto out;
-		dma_domain = dma_ops_domain_alloc(iommu, order);
+		dma_domain = dma_ops_domain_alloc(iommu);
 		if (!dma_domain)
 			goto out;
 		dma_domain->target_dev = devid;
@@ -1826,7 +1818,6 @@ static void prealloc_protection_domains(void)
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
 	struct amd_iommu *iommu;
-	int order = amd_iommu_aperture_order;
 	u16 devid;
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1839,7 +1830,7 @@ static void prealloc_protection_domains(void)
 		iommu = amd_iommu_rlookup_table[devid];
 		if (!iommu)
 			continue;
-		dma_dom = dma_ops_domain_alloc(iommu, order);
+		dma_dom = dma_ops_domain_alloc(iommu);
 		if (!dma_dom)
 			continue;
 		init_unity_mappings_for_device(dma_dom, devid);
@@ -1865,7 +1856,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
 int __init amd_iommu_init_dma_ops(void)
 {
 	struct amd_iommu *iommu;
-	int order = amd_iommu_aperture_order;
 	int ret;
 
 	/*
@@ -1874,7 +1864,7 @@ int __init amd_iommu_init_dma_ops(void)
 	 * protection domain will be assigned to the default one.
 	 */
 	list_for_each_entry(iommu, &amd_iommu_list, list) {
-		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+		iommu->default_dom = dma_ops_domain_alloc(iommu);
 		if (iommu->default_dom == NULL)
 			return -ENOMEM;
 		iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8c0be09..762a4ee 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,6 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
-unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
 bool amd_iommu_isolate = true;		/* if true, device isolation is
 					   enabled */
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
@@ -1137,9 +1136,6 @@ int __init amd_iommu_init(void)
 
 	enable_iommus();
 
-	printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
-			(1 << (amd_iommu_aperture_order-20)));
-
 	printk(KERN_INFO "AMD IOMMU: device isolation ");
 	if (amd_iommu_isolate)
 		printk("enabled\n");
@@ -1225,15 +1221,4 @@ static int __init parse_amd_iommu_options(char *str)
 	return 1;
 }
 
-static int __init parse_amd_iommu_size_options(char *str)
-{
-	unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
-
-	if ((order > 24) && (order < 31))
-		amd_iommu_aperture_order = order;
-
-	return 1;
-}
-
 __setup("amd_iommu=", parse_amd_iommu_options);
-__setup("amd_iommu_size=", parse_amd_iommu_size_options);
-- 
cgit v0.10.2


From fe16f088a88fb73161bba8784375c829f7e87b54 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:27:53 +0200
Subject: amd-iommu: disable round-robin allocator for CONFIG_IOMMU_STRESS

Disabling the round-robin allocator results in reusing the same
dma-addresses again very fast. This is a good test if the iotlb flushing
is working correctly.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 31d56c3..543822b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -783,6 +783,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 {
 	unsigned long address;
 
+#ifdef CONFIG_IOMMU_STRESS
+	dom->next_address = 0;
+	dom->need_flush = true;
+#endif
+
 	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
 				     dma_mask, dom->next_address);
 
-- 
cgit v0.10.2


From f5e9705c6429d24dee832b2edd7f4848d432ea03 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:31:53 +0200
Subject: amd-iommu: don't preallocate page tables with CONFIG_IOMMU_STRESS

This forces testing of on-demand page table allocation code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 543822b..33434c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -661,6 +661,10 @@ static int alloc_new_range(struct amd_iommu *iommu,
 	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
 	int i;
 
+#ifdef CONFIG_IOMMU_STRESS
+	populate = false;
+#endif
+
 	if (index >= APERTURE_MAX_RANGES)
 		return -ENOMEM;
 
-- 
cgit v0.10.2


From 47bccd6bb2b866449d3ecf2ba350ac1c7473b2b8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 12:40:54 +0200
Subject: amd-iommu: don't free dma adresses below 512MB with
 CONFIG_IOMMU_STRESS

This will test the automatic aperture enlargement code. This is
important because only very few devices will ever trigger this code
path. So force it under CONFIG_IOMMU_STRESS.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 33434c4..04ff5ec 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -824,6 +824,11 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 
 	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 
+#ifdef CONFIG_IOMMU_STRESS
+	if (i < 4)
+		return;
+#endif
+
 	if (address >= dom->next_address)
 		dom->need_flush = true;
 
-- 
cgit v0.10.2


From 13503fa9137d9708d52214e9506c671dbf2fbdce Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 26 Mar 2009 17:39:20 +0900
Subject: x86, mce: Cleanup param parser

- Fix the comment formatting.

- The error path does not return 0, and printk lacks level and "\n".

- Move __setup("nomce") next to mcheck_disable().

- Improve readability etc.

[ Impact: cleanup ]

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <49CB3F38.7090703@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 6fb0b35..77effb5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -839,25 +839,29 @@ static int __init mcheck_disable(char *str)
 	mce_dont_init = 1;
 	return 1;
 }
+__setup("nomce", mcheck_disable);
 
-/* mce=off disables machine check.
-   mce=TOLERANCELEVEL (number, see above)
-   mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
-   mce=nobootlog Don't log MCEs from before booting. */
+/*
+ * mce=off disables machine check
+ * mce=TOLERANCELEVEL (number, see above)
+ * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+ * mce=nobootlog Don't log MCEs from before booting.
+ */
 static int __init mcheck_enable(char *str)
 {
 	if (!strcmp(str, "off"))
 		mce_dont_init = 1;
-	else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
-		mce_bootlog = str[0] == 'b';
+	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
+		mce_bootlog = (str[0] == 'b');
 	else if (isdigit(str[0]))
 		get_option(&str, &tolerant);
-	else
-		printk("mce= argument %s ignored. Please use /sys", str);
+	else {
+		printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n",
+		       str);
+		return 0;
+	}
 	return 1;
 }
-
-__setup("nomce", mcheck_disable);
 __setup("mce=", mcheck_enable);
 
 /*
-- 
cgit v0.10.2


From e9eee03e99d519599eb615c3e251d5f6cc4be57d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:17 +0200
Subject: x86, mce: clean up mce_64.c

This file has been modified many times along the years, by multiple
authors, so the general style and structure has diverged in a number
of areas making this file hard to read.

So fix the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 77effb5..1491246 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -1,46 +1,47 @@
 /*
  * Machine check handler.
+ *
  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
  * Rest from unknown author(s).
  * 2004 Andi Kleen. Rewrote most of it.
  * Copyright 2008 Intel Corporation
  * Author: Andi Kleen
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/thread_info.h>
+#include <linux/capability.h>
+#include <linux/miscdevice.h>
+#include <linux/ratelimit.h>
+#include <linux/kallsyms.h>
+#include <linux/rcupdate.h>
 #include <linux/smp_lock.h>
+#include <linux/kobject.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
 #include <linux/string.h>
-#include <linux/rcupdate.h>
-#include <linux/kallsyms.h>
 #include <linux/sysdev.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/capability.h>
-#include <linux/cpu.h>
-#include <linux/percpu.h>
-#include <linux/poll.h>
-#include <linux/thread_info.h>
 #include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/kdebug.h>
-#include <linux/kobject.h>
+#include <linux/sched.h>
 #include <linux/sysfs.h>
-#include <linux/ratelimit.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/poll.h>
+#include <linux/cpu.h>
+#include <linux/fs.h>
+
 #include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/mce.h>
 #include <asm/uaccess.h>
-#include <asm/smp.h>
 #include <asm/idle.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
+#include <asm/smp.h>
 
-#define MISC_MCELOG_MINOR 227
+#define MISC_MCELOG_MINOR	227
 
 atomic_t mce_entry;
 
-static int mce_dont_init;
+static int			mce_dont_init;
 
 /*
  * Tolerant levels:
@@ -49,16 +50,16 @@ static int mce_dont_init;
  *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
  *   3: never panic or SIGBUS, log all errors (for testing only)
  */
-static int tolerant = 1;
-static int banks;
-static u64 *bank;
-static unsigned long notify_user;
-static int rip_msr;
-static int mce_bootlog = -1;
-static atomic_t mce_events;
+static int			tolerant = 1;
+static int			banks;
+static u64			*bank;
+static unsigned long		notify_user;
+static int			rip_msr;
+static int			mce_bootlog = -1;
+static atomic_t			mce_events;
 
-static char trigger[128];
-static char *trigger_argv[2] = { trigger, NULL };
+static char			trigger[128];
+static char			*trigger_argv[2] = { trigger, NULL };
 
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 
@@ -89,19 +90,23 @@ static struct mce_log mcelog = {
 void mce_log(struct mce *mce)
 {
 	unsigned next, entry;
+
 	atomic_inc(&mce_events);
 	mce->finished = 0;
 	wmb();
 	for (;;) {
 		entry = rcu_dereference(mcelog.next);
 		for (;;) {
-			/* When the buffer fills up discard new entries. Assume
-			   that the earlier errors are the more interesting. */
+			/*
+			 * When the buffer fills up discard new entries.
+			 * Assume that the earlier errors are the more
+			 * interesting ones:
+			 */
 			if (entry >= MCE_LOG_LEN) {
 				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
 				return;
 			}
-			/* Old left over entry. Skip. */
+			/* Old left over entry. Skip: */
 			if (mcelog.entry[entry].finished) {
 				entry++;
 				continue;
@@ -264,12 +269,12 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
  * implies that most kernel services cannot be safely used. Don't even
  * think about putting a printk in there!
  */
-void do_machine_check(struct pt_regs * regs, long error_code)
+void do_machine_check(struct pt_regs *regs, long error_code)
 {
 	struct mce m, panicm;
+	int panicm_found = 0;
 	u64 mcestart = 0;
 	int i;
-	int panicm_found = 0;
 	/*
 	 * If no_way_out gets set, there is no safe way to recover from this
 	 * MCE.  If tolerant is cranked up, we'll try anyway.
@@ -293,6 +298,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 	mce_setup(&m);
 
 	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+
 	/* if the restart IP is not valid, we're done for */
 	if (!(m.mcgstatus & MCG_STATUS_RIPV))
 		no_way_out = 1;
@@ -356,23 +362,29 @@ void do_machine_check(struct pt_regs * regs, long error_code)
 		mce_get_rip(&m, regs);
 		mce_log(&m);
 
-		/* Did this bank cause the exception? */
-		/* Assume that the bank with uncorrectable errors did it,
-		   and that there is only a single one. */
-		if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
+		/*
+		 * Did this bank cause the exception?
+		 *
+		 * Assume that the bank with uncorrectable errors did it,
+		 * and that there is only a single one:
+		 */
+		if ((m.status & MCI_STATUS_UC) &&
+					(m.status & MCI_STATUS_EN)) {
 			panicm = m;
 			panicm_found = 1;
 		}
 	}
 
-	/* If we didn't find an uncorrectable error, pick
-	   the last one (shouldn't happen, just being safe). */
+	/*
+	 * If we didn't find an uncorrectable error, pick
+	 * the last one (shouldn't happen, just being safe).
+	 */
 	if (!panicm_found)
 		panicm = m;
 
 	/*
 	 * If we have decided that we just CAN'T continue, and the user
-	 *  has not set tolerant to an insane level, give up and die.
+	 * has not set tolerant to an insane level, give up and die.
 	 */
 	if (no_way_out && tolerant < 3)
 		mce_panic("Machine check", &panicm, mcestart);
@@ -451,10 +463,9 @@ void mce_log_therm_throt_event(__u64 status)
  * poller finds an MCE, poll 2x faster.  When the poller finds no more
  * errors, poll 2x slower (up to check_interval seconds).
  */
-
 static int check_interval = 5 * 60; /* 5 minutes */
+
 static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
-static void mcheck_timer(unsigned long);
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
 static void mcheck_timer(unsigned long data)
@@ -464,9 +475,10 @@ static void mcheck_timer(unsigned long data)
 
 	WARN_ON(smp_processor_id() != data);
 
-	if (mce_available(&current_cpu_data))
+	if (mce_available(&current_cpu_data)) {
 		machine_check_poll(MCP_TIMESTAMP,
 				&__get_cpu_var(mce_poll_banks));
+	}
 
 	/*
 	 * Alert userspace if needed.  If we logged an MCE, reduce the
@@ -501,6 +513,7 @@ int mce_notify_user(void)
 	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
 
 	clear_thread_flag(TIF_MCE_NOTIFY);
+
 	if (test_and_clear_bit(0, &notify_user)) {
 		wake_up_interruptible(&mce_wait);
 
@@ -520,9 +533,10 @@ int mce_notify_user(void)
 	return 0;
 }
 
-/* see if the idle task needs to notify userspace */
+/* see if the idle task needs to notify userspace: */
 static int
-mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
+mce_idle_callback(struct notifier_block *nfb, unsigned long action,
+		  void *unused)
 {
 	/* IDLE_END should be safe - interrupts are back on */
 	if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
@@ -532,7 +546,7 @@ mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
 }
 
 static struct notifier_block mce_idle_notifier = {
-	.notifier_call = mce_idle_callback,
+	.notifier_call		= mce_idle_callback,
 };
 
 static __init int periodic_mcheck_init(void)
@@ -547,8 +561,8 @@ __initcall(periodic_mcheck_init);
  */
 static int mce_cap_init(void)
 {
-	u64 cap;
 	unsigned b;
+	u64 cap;
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	b = cap & 0xff;
@@ -578,9 +592,9 @@ static int mce_cap_init(void)
 
 static void mce_init(void *dummy)
 {
+	mce_banks_t all_banks;
 	u64 cap;
 	int i;
-	mce_banks_t all_banks;
 
 	/*
 	 * Log the machine checks left over from the previous reset.
@@ -605,14 +619,21 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 {
 	/* This should be disabled by the BIOS, but isn't always */
 	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if (c->x86 == 15 && banks > 4)
-			/* disable GART TBL walk error reporting, which trips off
-			   incorrectly with the IOMMU & 3ware & Cerberus. */
+		if (c->x86 == 15 && banks > 4) {
+			/*
+			 * disable GART TBL walk error reporting, which
+			 * trips off incorrectly with the IOMMU & 3ware
+			 * & Cerberus:
+			 */
 			clear_bit(10, (unsigned long *)&bank[4]);
-		if(c->x86 <= 17 && mce_bootlog < 0)
-			/* Lots of broken BIOS around that don't clear them
-			   by default and leave crap in there. Don't log. */
+		}
+		if (c->x86 <= 17 && mce_bootlog < 0) {
+			/*
+			 * Lots of broken BIOS around that don't clear them
+			 * by default and leave crap in there. Don't log:
+			 */
 			mce_bootlog = 0;
+		}
 	}
 
 }
@@ -646,7 +667,7 @@ static void mce_init_timer(void)
 
 /*
  * Called for each booted CPU to set up machine checks.
- * Must be called with preempt off.
+ * Must be called with preempt off:
  */
 void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 {
@@ -669,8 +690,8 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
  */
 
 static DEFINE_SPINLOCK(mce_state_lock);
-static int open_count;	/* #times opened */
-static int open_exclu;	/* already open exclusive? */
+static int		open_count;		/* #times opened */
+static int		open_exclu;		/* already open exclusive? */
 
 static int mce_open(struct inode *inode, struct file *file)
 {
@@ -680,6 +701,7 @@ static int mce_open(struct inode *inode, struct file *file)
 	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
 		spin_unlock(&mce_state_lock);
 		unlock_kernel();
+
 		return -EBUSY;
 	}
 
@@ -712,13 +734,14 @@ static void collect_tscs(void *data)
 	rdtscll(cpu_tsc[smp_processor_id()]);
 }
 
+static DEFINE_MUTEX(mce_read_mutex);
+
 static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 			loff_t *off)
 {
+	char __user *buf = ubuf;
 	unsigned long *cpu_tsc;
-	static DEFINE_MUTEX(mce_read_mutex);
 	unsigned prev, next;
-	char __user *buf = ubuf;
 	int i, err;
 
 	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
@@ -732,6 +755,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
 		mutex_unlock(&mce_read_mutex);
 		kfree(cpu_tsc);
+
 		return -EINVAL;
 	}
 
@@ -770,6 +794,7 @@ timeout:
 	 * synchronize.
 	 */
 	on_each_cpu(collect_tscs, cpu_tsc, 1);
+
 	for (i = next; i < MCE_LOG_LEN; i++) {
 		if (mcelog.entry[i].finished &&
 		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -782,6 +807,7 @@ timeout:
 	}
 	mutex_unlock(&mce_read_mutex);
 	kfree(cpu_tsc);
+
 	return err ? -EFAULT : buf - ubuf;
 }
 
@@ -799,6 +825,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+
 	switch (cmd) {
 	case MCE_GET_RECORD_LEN:
 		return put_user(sizeof(struct mce), p);
@@ -810,6 +837,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		do {
 			flags = mcelog.flags;
 		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+
 		return put_user(flags, p);
 	}
 	default:
@@ -818,11 +846,11 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 }
 
 static const struct file_operations mce_chrdev_ops = {
-	.open = mce_open,
-	.release = mce_release,
-	.read = mce_read,
-	.poll = mce_poll,
-	.unlocked_ioctl = mce_ioctl,
+	.open			= mce_open,
+	.release		= mce_release,
+	.read			= mce_read,
+	.poll			= mce_poll,
+	.unlocked_ioctl		= mce_ioctl,
 };
 
 static struct miscdevice mce_log_device = {
@@ -891,13 +919,16 @@ static int mce_shutdown(struct sys_device *dev)
 	return mce_disable();
 }
 
-/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
-   Only one CPU is active at this time, the others get readded later using
-   CPU hotplug. */
+/*
+ * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+ * Only one CPU is active at this time, the others get re-added later using
+ * CPU hotplug:
+ */
 static int mce_resume(struct sys_device *dev)
 {
 	mce_init(NULL);
 	mce_cpu_features(&current_cpu_data);
+
 	return 0;
 }
 
@@ -916,14 +947,16 @@ static void mce_restart(void)
 }
 
 static struct sysdev_class mce_sysclass = {
-	.suspend = mce_suspend,
-	.shutdown = mce_shutdown,
-	.resume = mce_resume,
-	.name = "machinecheck",
+	.suspend	= mce_suspend,
+	.shutdown	= mce_shutdown,
+	.resume		= mce_resume,
+	.name		= "machinecheck",
 };
 
 DEFINE_PER_CPU(struct sys_device, device_mce);
-void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
+
+__cpuinitdata
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
 /* Why are there no generic functions for this? */
 #define ACCESSOR(name, var, start) \
@@ -937,9 +970,12 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit
 				    const char *buf, size_t siz) {	\
 		char *end;						\
 		unsigned long new = simple_strtoul(buf, &end, 0);	\
-		if (end == buf) return -EINVAL;				\
+									\
+		if (end == buf)						\
+			return -EINVAL;					\
 		var = new;						\
 		start;							\
+									\
 		return end-buf;						\
 	}								\
 	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
@@ -950,6 +986,7 @@ static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
 			 char *buf)
 {
 	u64 b = bank[attr - bank_attrs];
+
 	return sprintf(buf, "%llx\n", b);
 }
 
@@ -958,15 +995,18 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
 {
 	char *end;
 	u64 new = simple_strtoull(buf, &end, 0);
+
 	if (end == buf)
 		return -EINVAL;
+
 	bank[attr - bank_attrs] = new;
 	mce_restart();
+
 	return end-buf;
 }
 
-static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
-				char *buf)
+static ssize_t
+show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
 {
 	strcpy(buf, trigger);
 	strcat(buf, "\n");
@@ -974,21 +1014,27 @@ static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 }
 
 static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
-				const char *buf,size_t siz)
+				const char *buf, size_t siz)
 {
 	char *p;
 	int len;
+
 	strncpy(trigger, buf, sizeof(trigger));
 	trigger[sizeof(trigger)-1] = 0;
 	len = strlen(trigger);
 	p = strchr(trigger, '\n');
-	if (*p) *p = 0;
+
+	if (*p)
+		*p = 0;
+
 	return len;
 }
 
 static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
-ACCESSOR(check_interval,check_interval,mce_restart())
+
+ACCESSOR(check_interval, check_interval, mce_restart())
+
 static struct sysdev_attribute *mce_attributes[] = {
 	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
 	NULL
@@ -996,7 +1042,7 @@ static struct sysdev_attribute *mce_attributes[] = {
 
 static cpumask_var_t mce_device_initialized;
 
-/* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
+/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
 static __cpuinit int mce_create_device(unsigned int cpu)
 {
 	int err;
@@ -1006,15 +1052,15 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 		return -EIO;
 
 	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
-	per_cpu(device_mce,cpu).id = cpu;
-	per_cpu(device_mce,cpu).cls = &mce_sysclass;
+	per_cpu(device_mce, cpu).id	= cpu;
+	per_cpu(device_mce, cpu).cls	= &mce_sysclass;
 
-	err = sysdev_register(&per_cpu(device_mce,cpu));
+	err = sysdev_register(&per_cpu(device_mce, cpu));
 	if (err)
 		return err;
 
 	for (i = 0; mce_attributes[i]; i++) {
-		err = sysdev_create_file(&per_cpu(device_mce,cpu),
+		err = sysdev_create_file(&per_cpu(device_mce, cpu),
 					 mce_attributes[i]);
 		if (err)
 			goto error;
@@ -1035,10 +1081,10 @@ error2:
 	}
 error:
 	while (--i >= 0) {
-		sysdev_remove_file(&per_cpu(device_mce,cpu),
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
 				   mce_attributes[i]);
 	}
-	sysdev_unregister(&per_cpu(device_mce,cpu));
+	sysdev_unregister(&per_cpu(device_mce, cpu));
 
 	return err;
 }
@@ -1051,12 +1097,12 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
 		return;
 
 	for (i = 0; mce_attributes[i]; i++)
-		sysdev_remove_file(&per_cpu(device_mce,cpu),
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
 			mce_attributes[i]);
 	for (i = 0; i < banks; i++)
 		sysdev_remove_file(&per_cpu(device_mce, cpu),
 			&bank_attrs[i]);
-	sysdev_unregister(&per_cpu(device_mce,cpu));
+	sysdev_unregister(&per_cpu(device_mce, cpu));
 	cpumask_clear_cpu(cpu, mce_device_initialized);
 }
 
@@ -1076,11 +1122,12 @@ static void mce_disable_cpu(void *h)
 
 static void mce_reenable_cpu(void *h)
 {
-	int i;
 	unsigned long action = *(unsigned long *)h;
+	int i;
 
 	if (!mce_available(&current_cpu_data))
 		return;
+
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_reenable();
 	for (i = 0; i < banks; i++)
@@ -1088,8 +1135,8 @@ static void mce_reenable_cpu(void *h)
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
-				      unsigned long action, void *hcpu)
+static int __cpuinit
+mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 	struct timer_list *t = &per_cpu(mce_timer, cpu);
@@ -1142,12 +1189,14 @@ static __init int mce_init_banks(void)
 
 	for (i = 0; i < banks; i++) {
 		struct sysdev_attribute *a = &bank_attrs[i];
-		a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
+
+		a->attr.name	= kasprintf(GFP_KERNEL, "bank%d", i);
 		if (!a->attr.name)
 			goto nomem;
-		a->attr.mode = 0644;
-		a->show = show_bank;
-		a->store = set_bank;
+
+		a->attr.mode	= 0644;
+		a->show		= show_bank;
+		a->store	= set_bank;
 	}
 	return 0;
 
@@ -1156,6 +1205,7 @@ nomem:
 		kfree(bank_attrs[i].attr.name);
 	kfree(bank_attrs);
 	bank_attrs = NULL;
+
 	return -ENOMEM;
 }
 
@@ -1185,6 +1235,7 @@ static __init int mce_init_device(void)
 
 	register_hotcpu_notifier(&mce_cpu_notifier);
 	misc_register(&mce_log_device);
+
 	return err;
 }
 
-- 
cgit v0.10.2


From 3b58dfd04bdfa52e717ead8f3c7622610eb7f950 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:21 +0200
Subject: x86, mce: clean up mce_32.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 3552119..05979e7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -2,13 +2,12 @@
  * mce.c - x86 Machine Check Exception Reporting
  * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
+#include <linux/thread_info.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>
-#include <linux/thread_info.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -17,18 +16,20 @@
 #include "mce.h"
 
 int mce_disabled;
-int nr_mce_banks;
 
+int nr_mce_banks;
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	       smp_processor_id());
 }
 
 /* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+						unexpected_machine_check;
 
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
-- 
cgit v0.10.2


From c5aaf0e0702513637278ca4e27a156caa9392817 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:18 +0200
Subject: x86, mce: clean up p4.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index f53bdcb..cb344ab 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -2,18 +2,17 @@
  * P4 specific Machine Check Exception Reporting
  */
 
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>
 
+#include <asm/therm_throt.h>
 #include <asm/processor.h>
 #include <asm/system.h>
-#include <asm/msr.h>
 #include <asm/apic.h>
-
-#include <asm/therm_throt.h>
+#include <asm/msr.h>
 
 #include "mce.h"
 
@@ -36,6 +35,7 @@ static int mce_num_extended_msrs;
 
 
 #ifdef CONFIG_X86_MCE_P4THERMAL
+
 static void unexpected_thermal_interrupt(struct pt_regs *regs)
 {
 	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
@@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
 	add_taint(TAINT_MACHINE_CHECK);
 }
 
-/* P4/Xeon Thermal transition interrupt handler */
+/* P4/Xeon Thermal transition interrupt handler: */
 static void intel_thermal_interrupt(struct pt_regs *regs)
 {
 	__u64 msr_val;
@@ -54,8 +54,9 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
 	therm_throt_process(msr_val & 0x1);
 }
 
-/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+/* Thermal interrupt handler for this CPU setup: */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
+						unexpected_thermal_interrupt;
 
 void smp_thermal_interrupt(struct pt_regs *regs)
 {
@@ -65,67 +66,76 @@ void smp_thermal_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
-/* P4/Xeon Thermal regulation detect and init */
+/* P4/Xeon Thermal regulation detect and init: */
 static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
-	u32 l, h;
 	unsigned int cpu = smp_processor_id();
+	u32 l, h;
 
-	/* Thermal monitoring */
+	/* Thermal monitoring: */
 	if (!cpu_has(c, X86_FEATURE_ACPI))
 		return;	/* -ENODEV */
 
-	/* Clock modulation */
+	/* Clock modulation: */
 	if (!cpu_has(c, X86_FEATURE_ACC))
 		return;	/* -ENODEV */
 
-	/* first check if its enabled already, in which case there might
+	/*
+	 * First check if its enabled already, in which case there might
 	 * be some SMM goo which handles it, so we can't even put a handler
-	 * since it might be delivered via SMI already -zwanem.
+	 * since it might be delivered via SMI already:
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	h = apic_read(APIC_LVTTHMR);
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
-				cpu);
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+
 		return; /* -EBUSY */
 	}
 
-	/* check whether a vector already exists, temporarily masked? */
+	/* Check whether a vector already exists, temporarily masked? */
 	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
-				"installed\n",
-			cpu, (h & APIC_VECTOR_MASK));
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
+		       cpu, (h & APIC_VECTOR_MASK));
+
 		return; /* -EBUSY */
 	}
 
-	/* The temperature transition interrupt handler setup */
-	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
-	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
+	/*
+	 * The temperature transition interrupt handler setup:
+	 */
+
+	/* Our delivery vector: */
+	h = THERMAL_APIC_VECTOR;
+
+	/* We'll mask the thermal vector in the lapic till we're ready: */
+	h |= APIC_DM_FIXED | APIC_LVT_MASKED;
 	apic_write(APIC_LVTTHMR, h);
 
 	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
 	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
 
-	/* ok we're good to go... */
+	/* Ok, we're good to go... */
 	vendor_thermal_interrupt = intel_thermal_interrupt;
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 
+	/* Unmask the thermal vector: */
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
 	printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);
-	return;
 }
 #endif /* CONFIG_X86_MCE_P4THERMAL */
 
-
 /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 {
 	u32 h;
 
@@ -143,9 +153,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 
 static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
+	int recover = 1;
 	int i;
 
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -157,7 +167,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 
 	if (mce_num_extended_msrs > 0) {
 		struct intel_mce_extended_msrs dbg;
+
 		intel_get_extended_msrs(&dbg);
+
 		printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
 			"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
 			"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
@@ -171,6 +183,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
+
 			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
@@ -196,6 +209,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		panic("Unable to continue");
 
 	printk(KERN_EMERG "Attempting to continue.\n");
+
 	/*
 	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
@@ -217,7 +231,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
-
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
-- 
cgit v0.10.2


From ed8bc7ed9a2ad875617b24d2ba09e49ee886638c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:21 +0200
Subject: x86, mce: clean up p5.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index c9f77ea..8812f54 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -2,11 +2,10 @@
  * P5 specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -15,39 +14,53 @@
 
 #include "mce.h"
 
-/* Machine check handler for Pentium class Intel */
+/* Machine check handler for Pentium class Intel CPUs: */
 static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
+
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
-	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
-	if (lotype&(1<<5))
-		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
+
+	printk(KERN_EMERG
+		"CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
+		smp_processor_id(), loaddr, lotype);
+
+	if (lotype & (1<<5)) {
+		printk(KERN_EMERG
+			"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+			smp_processor_id());
+	}
+
 	add_taint(TAINT_MACHINE_CHECK);
 }
 
-/* Set up machine check reporting for processors with Intel style MCE */
+/* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 
-	/*Check for MCE support */
+	/* Check for MCE support: */
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;
 
-	/* Default P5 to off as its often misconnected */
+	/* Default P5 to off as its often misconnected: */
 	if (mce_disabled != -1)
 		return;
+
 	machine_check_vector = pentium_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
-	/* Read registers before enabling */
+	/* Read registers before enabling: */
 	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
+	printk(KERN_INFO
+	       "Intel old style machine check architecture supported.\n");
 
-	/* Enable MCE */
+	/* Enable MCE: */
 	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+	printk(KERN_INFO
+	       "Intel old style machine check reporting enabled on CPU#%d.\n",
+	       smp_processor_id());
 }
-- 
cgit v0.10.2


From ea2566ff80e096eeecc0918fb5d5a4612d8f62ef Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:19 +0200
Subject: x86, mce: clean up p6.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 2ac52d7..43c24e6 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -2,11 +2,10 @@
  * P6 specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -18,9 +17,9 @@
 /* Machine Check Handler For PII/PIII */
 static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
+	int recover = 1;
 	int i;
 
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
-			misc[0] = addr[0] = '\0';
+
+			misc[0] = '\0';
+			addr[0] = '\0';
+
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
 			high &= ~(1<<31);
+
 			if (high & (1<<27)) {
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
+
 			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
 		}
@@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	/*
 	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
-	 * for errors if the OS could not log the error.
+	 * for errors if the OS could not log the error:
 	 */
 	for (i = 0; i < nr_mce_banks; i++) {
 		unsigned int msr;
+
 		msr = MSR_IA32_MC0_STATUS+i*4;
 		rdmsr(msr, low, high);
 		if (high & (1<<31)) {
-			/* Clear it */
+			/* Clear it: */
 			wrmsr(msr, 0UL, 0UL);
-			/* Serialize */
+			/* Serialize: */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
@@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
-/* Set up machine check reporting for processors with Intel style MCE */
+/* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 
 	/* Ok machine check is available */
 	machine_check_vector = intel_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
 	printk(KERN_INFO "Intel machine check architecture supported.\n");
-- 
cgit v0.10.2


From efee4ca80980f97b60e91e3322c3342f19623eff Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:20 +0200
Subject: x86, mce: clean up k7.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index dd3af6e..89e5104 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -2,11 +2,10 @@
  * Athlon specific Machine Check Exception Reporting
  * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>
 
 #include <asm/processor.h>
@@ -15,12 +14,12 @@
 
 #include "mce.h"
 
-/* Machine Check Handler For AMD Athlon/Duron */
+/* Machine Check Handler For AMD Athlon/Duron: */
 static void k7_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
+	int recover = 1;
 	int i;
 
 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
 
 	for (i = 1; i < nr_mce_banks; i++) {
 		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-		if (high&(1<<31)) {
+		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
-			misc[0] = addr[0] = '\0';
+
+			misc[0] = '\0';
+			addr[0] = '\0';
+
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
 			high &= ~(1<<31);
+
 			if (high & (1<<27)) {
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
+
 			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
-			/* Clear it */
+
+			/* Clear it: */
 			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-			/* Serialize */
+			/* Serialize: */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
 	}
 
-	if (recover&2)
+	if (recover & 2)
 		panic("CPU context corrupt");
-	if (recover&1)
+	if (recover & 1)
 		panic("Unable to continue");
+
 	printk(KERN_EMERG "Attempting to continue.\n");
+
 	mcgstl &= ~(1<<2);
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
 
-/* AMD K7 machine check is Intel like */
+/* AMD K7 machine check is Intel like: */
 void amd_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
 		return;
 
 	machine_check_vector = k7_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
 	printk(KERN_INFO "Intel machine check architecture supported.\n");
+
 	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;
 
-	/* Clear status for MC index 0 separately, we don't touch CTL,
-	 * as some K7 Athlons cause spurious MCEs when its enabled. */
+	/*
+	 * Clear status for MC index 0 separately, we don't touch CTL,
+	 * as some K7 Athlons cause spurious MCEs when its enabled:
+	 */
 	if (boot_cpu_data.x86 == 6) {
 		wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
 		i = 1;
 	} else
 		i = 0;
+
 	for (; i < nr_mce_banks; i++) {
 		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-- 
cgit v0.10.2


From 91425084f74c0ad087b3fb6bdad79a825f952720 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:22 +0200
Subject: x86, mce: clean up winchip.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2a043d8..81b0248 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -2,11 +2,10 @@
  * IDT Winchip specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -14,7 +13,7 @@
 
 #include "mce.h"
 
-/* Machine check handler for WinChip C6 */
+/* Machine check handler for WinChip C6: */
 static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
@@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 lo, hi;
+
 	machine_check_vector = winchip_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
+
 	rdmsr(MSR_IDT_FCR1, lo, hi);
 	lo |= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
 	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
+
 	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+
+	printk(KERN_INFO
+	       "Winchip machine check reporting enabled on CPU#0.\n");
 }
-- 
cgit v0.10.2


From bdbfbdd5e8f0efb9bfef2e597f8ac673c36317ab Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:20 +0200
Subject: x86, mce: clean up non-fatal.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index a74af12..70b7104 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -6,15 +6,14 @@
  * This file contains routines to check for non-fatal MCEs every 15s
  *
  */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/workqueue.h>
 #include <linux/interrupt.h>
-#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -22,9 +21,9 @@
 
 #include "mce.h"
 
-static int firstbank;
+static int		firstbank;
 
-#define MCE_RATE	15*HZ	/* timer rate is 15s */
+#define MCE_RATE	(15*HZ)	/* timer rate is 15s */
 
 static void mce_checkregs(void *info)
 {
@@ -34,23 +33,24 @@ static void mce_checkregs(void *info)
 	for (i = firstbank; i < nr_mce_banks; i++) {
 		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 
-		if (high & (1<<31)) {
-			printk(KERN_INFO "MCE: The hardware reports a non "
-				"fatal, correctable incident occurred on "
-				"CPU %d.\n",
+		if (!(high & (1<<31)))
+			continue;
+
+		printk(KERN_INFO "MCE: The hardware reports a non fatal, "
+			"correctable incident occurred on CPU %d.\n",
 				smp_processor_id());
-			printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
-			/*
-			 * Scrub the error so we don't pick it up in MCE_RATE
-			 * seconds time.
-			 */
-			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
-			/* Serialize */
-			wmb();
-			add_taint(TAINT_MACHINE_CHECK);
-		}
+
+		printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+
+		/*
+		 * Scrub the error so we don't pick it up in MCE_RATE
+		 * seconds time:
+		 */
+		wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+		/* Serialize: */
+		wmb();
+		add_taint(TAINT_MACHINE_CHECK);
 	}
 }
 
@@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void)
 
 	/* Some Athlons misbehave when we frob bank 0 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		boot_cpu_data.x86 == 6)
-			firstbank = 1;
+						boot_cpu_data.x86 == 6)
+		firstbank = 1;
 	else
-			firstbank = 0;
+		firstbank = 0;
 
 	/*
 	 * Check for non-fatal errors every MCE_RATE s
 	 */
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
 	printk(KERN_INFO "Machine check exception polling timer started.\n");
+
 	return 0;
 }
 module_init(init_nonfatal_mce_checker);
-- 
cgit v0.10.2


From cb6f3c155b0afabc48667efb9e7b1ce92ccfcab4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:19 +0200
Subject: x86, mce: clean up therm_throt.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d5ae224..a2b5d7d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,7 +1,7 @@
 /*
- *
  * Thermal throttle event support code (such as syslog messaging and rate
  * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ *
  * This allows consistent reporting of CPU thermal throttle events.
  *
  * Maintains a counter in /sys that keeps track of the number of thermal
@@ -13,43 +13,44 @@
  * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
  *          Inspired by Ross Biro's and Al Borchers' counter code.
  */
-
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
 #include <linux/percpu.h>
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
-#include <asm/cpu.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
+
 #include <asm/therm_throt.h>
+#include <asm/cpu.h>
 
 /* How long to wait between reporting thermal events */
-#define CHECK_INTERVAL              (300 * HZ)
+#define CHECK_INTERVAL		(300 * HZ)
 
 static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
 static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-atomic_t therm_throt_en = ATOMIC_INIT(0);
+
+atomic_t therm_throt_en		= ATOMIC_INIT(0);
 
 #ifdef CONFIG_SYSFS
-#define define_therm_throt_sysdev_one_ro(_name)                              \
-        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
-
-#define define_therm_throt_sysdev_show_func(name)                            \
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,        \
-					struct sysdev_attribute *attr,	     \
-                                              char *buf)                     \
-{                                                                            \
-	unsigned int cpu = dev->id;                                          \
-	ssize_t ret;                                                         \
-                                                                             \
-	preempt_disable();              /* CPU hotplug */                    \
-	if (cpu_online(cpu))                                                 \
-		ret = sprintf(buf, "%lu\n",                                  \
-			      per_cpu(thermal_throttle_##name, cpu));        \
-	else                                                                 \
-		ret = 0;                                                     \
-	preempt_enable();                                                    \
-                                                                             \
-	return ret;                                                          \
+#define define_therm_throt_sysdev_one_ro(_name)				\
+	static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+
+#define define_therm_throt_sysdev_show_func(name)			\
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,	\
+					struct sysdev_attribute *attr,	\
+					      char *buf)		\
+{									\
+	unsigned int cpu = dev->id;					\
+	ssize_t ret;							\
+									\
+	preempt_disable();	/* CPU hotplug */			\
+	if (cpu_online(cpu))						\
+		ret = sprintf(buf, "%lu\n",				\
+			      per_cpu(thermal_throttle_##name, cpu));	\
+	else								\
+		ret = 0;						\
+	preempt_enable();						\
+									\
+	return ret;							\
 }
 
 define_therm_throt_sysdev_show_func(count);
@@ -61,8 +62,8 @@ static struct attribute *thermal_throttle_attrs[] = {
 };
 
 static struct attribute_group thermal_throttle_attr_group = {
-	.attrs = thermal_throttle_attrs,
-	.name = "thermal_throttle"
+	.attrs	= thermal_throttle_attrs,
+	.name	= "thermal_throttle"
 };
 #endif /* CONFIG_SYSFS */
 
@@ -110,10 +111,11 @@ int therm_throt_process(int curr)
 }
 
 #ifdef CONFIG_SYSFS
-/* Add/Remove thermal_throttle interface for CPU device */
+/* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
 {
-	return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	return sysfs_create_group(&sys_dev->kobj,
+				  &thermal_throttle_attr_group);
 }
 
 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
@@ -121,19 +123,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
 }
 
-/* Mutex protecting device creation against CPU hotplug */
+/* Mutex protecting device creation against CPU hotplug: */
 static DEFINE_MUTEX(therm_cpu_lock);
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
-						   unsigned long action,
-						   void *hcpu)
+static __cpuinit int
+thermal_throttle_cpu_callback(struct notifier_block *nfb,
+			      unsigned long action,
+			      void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 	struct sys_device *sys_dev;
 	int err = 0;
 
 	sys_dev = get_cpu_sysdev(cpu);
+
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-- 
cgit v0.10.2


From 1cb2a8e1767ab60370ecce90654c0f281c602d95 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:18 +0200
Subject: x86, mce: clean up mce_amd_64.c

Make the coding style match that of the rest of the x86 arch code.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 56dde9c..4d90ec3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -13,22 +13,22 @@
  *
  *  All MC4_MISCi registers are shared between multi-cores
  */
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/kobject.h>
 #include <linux/notifier.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
+#include <linux/kobject.h>
 #include <linux/sysdev.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
+#include <asm/percpu.h>
 #include <asm/apic.h>
+#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
-#include <asm/percpu.h>
-#include <asm/idle.h>
 
 #define PFX               "mce_threshold: "
 #define VERSION           "version 1.1.1"
@@ -48,26 +48,26 @@
 #define MCG_XBLK_ADDR     0xC0000400
 
 struct threshold_block {
-	unsigned int block;
-	unsigned int bank;
-	unsigned int cpu;
-	u32 address;
-	u16 interrupt_enable;
-	u16 threshold_limit;
-	struct kobject kobj;
-	struct list_head miscj;
+	unsigned int		block;
+	unsigned int		bank;
+	unsigned int		cpu;
+	u32			address;
+	u16			interrupt_enable;
+	u16			threshold_limit;
+	struct kobject		kobj;
+	struct list_head	miscj;
 };
 
 /* defaults used early on boot */
 static struct threshold_block threshold_defaults = {
-	.interrupt_enable = 0,
-	.threshold_limit = THRESHOLD_MAX,
+	.interrupt_enable	= 0,
+	.threshold_limit	= THRESHOLD_MAX,
 };
 
 struct threshold_bank {
-	struct kobject *kobj;
-	struct threshold_block *blocks;
-	cpumask_var_t cpus;
+	struct kobject		*kobj;
+	struct threshold_block	*blocks;
+	cpumask_var_t		cpus;
 };
 static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
 
@@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void);
  */
 
 struct thresh_restart {
-	struct threshold_block *b;
-	int reset;
-	u16 old_limit;
+	struct threshold_block	*b;
+	int			reset;
+	u16			old_limit;
 };
 
 /* must be called with correct cpu affinity */
@@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr)
 	} else if (tr->old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
 		    (tr->old_limit - tr->b->threshold_limit);
+
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
@@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr)
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
-	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
-	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
+	unsigned int bank, block;
 	struct thresh_restart tr;
+	u8 lvt_off;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 				if (!address)
 					break;
 				address += MCG_XBLK_ADDR;
-			}
-			else
+			} else
 				++address;
 
 			if (rdmsr_safe(address, &low, &high))
@@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
  */
 static void amd_threshold_interrupt(void)
 {
+	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
 	struct mce m;
-	u32 low = 0, high = 0, address = 0;
 
 	mce_setup(&m);
 
@@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void)
 		if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0)
+			if (block == 0) {
 				address = MSR_IA32_MC0_MISC + bank * 4;
-			else if (block == 1) {
+			} else if (block == 1) {
 				address = (low & MASK_BLKPTR_LO) >> 21;
 				if (!address)
 					break;
 				address += MCG_XBLK_ADDR;
-			}
-			else
+			} else {
 				++address;
+			}
 
 			if (rdmsr_safe(address, &low, &high))
 				break;
@@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void)
 			     (high & MASK_LOCKED_HI))
 				continue;
 
-			/* Log the machine check that caused the threshold
-			   event. */
+			/*
+			 * Log the machine check that caused the threshold
+			 * event.
+			 */
 			machine_check_poll(MCP_TIMESTAMP,
 					&__get_cpu_var(mce_poll_banks));
 
@@ -254,48 +256,56 @@ static void amd_threshold_interrupt(void)
 
 struct threshold_attr {
 	struct attribute attr;
-	ssize_t(*show) (struct threshold_block *, char *);
-	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
+	ssize_t (*show) (struct threshold_block *, char *);
+	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
 };
 
-#define SHOW_FIELDS(name)                                           \
-static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
-{                                                                   \
-        return sprintf(buf, "%lx\n", (unsigned long) b->name);      \
+#define SHOW_FIELDS(name)						\
+static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
+{									\
+	return sprintf(buf, "%lx\n", (unsigned long) b->name);		\
 }
 SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(threshold_limit)
 
-static ssize_t store_interrupt_enable(struct threshold_block *b,
-				      const char *buf, size_t count)
+static ssize_t
+store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count)
 {
-	char *end;
 	struct thresh_restart tr;
-	unsigned long new = simple_strtoul(buf, &end, 0);
+	unsigned long new;
+	char *end;
+
+	new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
+
 	b->interrupt_enable = !!new;
 
-	tr.b = b;
-	tr.reset = 0;
-	tr.old_limit = 0;
+	tr.b		= b;
+	tr.reset	= 0;
+	tr.old_limit	= 0;
+
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
 	return end - buf;
 }
 
-static ssize_t store_threshold_limit(struct threshold_block *b,
-				     const char *buf, size_t count)
+static ssize_t
+store_threshold_limit(struct threshold_block *b, const char *buf, size_t count)
 {
-	char *end;
 	struct thresh_restart tr;
-	unsigned long new = simple_strtoul(buf, &end, 0);
+	unsigned long new;
+	char *end;
+
+	new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
+
 	if (new > THRESHOLD_MAX)
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
+
 	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
 	tr.b = b;
@@ -307,8 +317,8 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 }
 
 struct threshold_block_cross_cpu {
-	struct threshold_block *tb;
-	long retval;
+	struct threshold_block	*tb;
+	long			retval;
 };
 
 static void local_error_count_handler(void *_tbcc)
@@ -338,15 +348,16 @@ static ssize_t store_error_count(struct threshold_block *b,
 	return 1;
 }
 
-#define THRESHOLD_ATTR(_name,_mode,_show,_store) {            \
-        .attr = {.name = __stringify(_name), .mode = _mode }, \
-        .show = _show,                                        \
-        .store = _store,                                      \
+#define THRESHOLD_ATTR(_name, _mode, _show, _store)			\
+{									\
+	.attr	= {.name = __stringify(_name), .mode = _mode },		\
+	.show	= _show,						\
+	.store	= _store,						\
 };
 
-#define RW_ATTR(name)                                           \
-static struct threshold_attr name =                             \
-        THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
+#define RW_ATTR(name)							\
+static struct threshold_attr name =					\
+	THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
 
 RW_ATTR(interrupt_enable);
 RW_ATTR(threshold_limit);
@@ -359,15 +370,17 @@ static struct attribute *default_attrs[] = {
 	NULL
 };
 
-#define to_block(k) container_of(k, struct threshold_block, kobj)
-#define to_attr(a) container_of(a, struct threshold_attr, attr)
+#define to_block(k)	container_of(k, struct threshold_block, kobj)
+#define to_attr(a)	container_of(a, struct threshold_attr, attr)
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
+
 	ret = a->show ? a->show(b, buf) : -EIO;
+
 	return ret;
 }
 
@@ -377,18 +390,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
+
 	ret = a->store ? a->store(b, buf, count) : -EIO;
+
 	return ret;
 }
 
 static struct sysfs_ops threshold_ops = {
-	.show = show,
-	.store = store,
+	.show			= show,
+	.store			= store,
 };
 
 static struct kobj_type threshold_ktype = {
-	.sysfs_ops = &threshold_ops,
-	.default_attrs = default_attrs,
+	.sysfs_ops		= &threshold_ops,
+	.default_attrs		= default_attrs,
 };
 
 static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
@@ -396,9 +411,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 					       unsigned int block,
 					       u32 address)
 {
-	int err;
-	u32 low, high;
 	struct threshold_block *b = NULL;
+	u32 low, high;
+	int err;
 
 	if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
 		return 0;
@@ -421,20 +436,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 	if (!b)
 		return -ENOMEM;
 
-	b->block = block;
-	b->bank = bank;
-	b->cpu = cpu;
-	b->address = address;
-	b->interrupt_enable = 0;
-	b->threshold_limit = THRESHOLD_MAX;
+	b->block		= block;
+	b->bank			= bank;
+	b->cpu			= cpu;
+	b->address		= address;
+	b->interrupt_enable	= 0;
+	b->threshold_limit	= THRESHOLD_MAX;
 
 	INIT_LIST_HEAD(&b->miscj);
 
-	if (per_cpu(threshold_banks, cpu)[bank]->blocks)
+	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
 		list_add(&b->miscj,
 			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
-	else
+	} else {
 		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
+	}
 
 	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
 				   per_cpu(threshold_banks, cpu)[bank]->kobj,
@@ -447,8 +463,9 @@ recurse:
 		if (!address)
 			return 0;
 		address += MCG_XBLK_ADDR;
-	} else
+	} else {
 		++address;
+	}
 
 	err = allocate_threshold_blocks(cpu, bank, ++block, address);
 	if (err)
@@ -507,6 +524,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 		cpumask_copy(b->cpus, cpu_core_mask(cpu));
 		per_cpu(threshold_banks, cpu)[bank] = b;
+
 		goto out;
 	}
 #endif
@@ -605,15 +623,13 @@ static void deallocate_threshold_block(unsigned int cpu,
 
 static void threshold_remove_bank(unsigned int cpu, int bank)
 {
-	int i = 0;
 	struct threshold_bank *b;
 	char name[32];
+	int i = 0;
 
 	b = per_cpu(threshold_banks, cpu)[bank];
-
 	if (!b)
 		return;
-
 	if (!b->blocks)
 		goto free_out;
 
@@ -624,6 +640,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 	if (shared_bank[bank] && b->blocks->cpu != cpu) {
 		sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
 		per_cpu(threshold_banks, cpu)[bank] = NULL;
+
 		return;
 	}
 #endif
@@ -659,8 +676,8 @@ static void threshold_remove_device(unsigned int cpu)
 }
 
 /* get notified when a cpu comes on/off */
-static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
-						     unsigned int cpu)
+static void __cpuinit
+amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
 {
 	if (cpu >= NR_CPUS)
 		return;
@@ -686,11 +703,12 @@ static __init int threshold_init_device(void)
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 		int err = threshold_create_device(lcpu);
+
 		if (err)
 			return err;
 	}
 	threshold_cpu_callback = amd_64_threshold_cpu_callback;
+
 	return 0;
 }
-
 device_initcall(threshold_init_device);
-- 
cgit v0.10.2


From 6cc6f3ebd19fea722c19630af5ad68af7f51d493 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Apr 2009 12:31:23 +0200
Subject: x86, mce: unify Intel thermal init, prepare

Prepare for unification, make two intel_init_thermal equal.

[ Impact: cleanup ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index cef3ee3..b85d0c1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -34,21 +34,22 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_exit();
 }
 
+static inline void intel_set_thermal_handler(void) { }
+
 static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
-	u32 l, h;
-	int tm2 = 0;
 	unsigned int cpu = smp_processor_id();
+	int tm2 = 0;
+	u32 l, h;
 
-	if (!cpu_has(c, X86_FEATURE_ACPI))
-		return;
-
-	if (!cpu_has(c, X86_FEATURE_ACC))
+	/* Thermal monitoring depends on ACPI and clock modulation*/
+	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 		return;
 
-	/* first check if TM1 is already enabled by the BIOS, in which
-	 * case there might be some SMM goo which handles it, so we can't even
-	 * put a handler since it might be delivered via SMI already.
+	/*
+	 * First check if its enabled already, in which case there might
+	 * be some SMM goo which handles it, so we can't even put a handler
+	 * since it might be delivered via SMI already:
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	h = apic_read(APIC_LVTTHMR);
@@ -61,31 +62,35 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
 		tm2 = 1;
 
+	/* Check whether a vector already exists */
 	if (h & APIC_VECTOR_MASK) {
 		printk(KERN_DEBUG
-		       "CPU%d: Thermal LVT vector (%#x) already "
-		       "installed\n", cpu, (h & APIC_VECTOR_MASK));
+		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
+		       cpu, (h & APIC_VECTOR_MASK));
 		return;
 	}
 
-	h = THERMAL_APIC_VECTOR;
-	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+	/* We'll mask the thermal vector in the lapic till we're ready: */
+	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
 	apic_write(APIC_LVTTHMR, h);
 
 	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
 	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
 
+	intel_set_thermal_handler();
+
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 
+	/* Unmask the thermal vector: */
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
 	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
-		cpu, tm2 ? "TM2" : "TM1");
+	       cpu, tm2 ? "TM2" : "TM1");
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);
-	return;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb344ab..f70753a 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -66,19 +66,21 @@ void smp_thermal_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
+static void intel_set_thermal_handler(void)
+{
+	vendor_thermal_interrupt = intel_thermal_interrupt;
+}
+
 /* P4/Xeon Thermal regulation detect and init: */
 static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
 	unsigned int cpu = smp_processor_id();
+	int tm2 = 0;
 	u32 l, h;
 
-	/* Thermal monitoring: */
-	if (!cpu_has(c, X86_FEATURE_ACPI))
-		return;	/* -ENODEV */
-
-	/* Clock modulation: */
-	if (!cpu_has(c, X86_FEATURE_ACC))
-		return;	/* -ENODEV */
+	/* Thermal monitoring depends on ACPI and clock modulation*/
+	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+		return;
 
 	/*
 	 * First check if its enabled already, in which case there might
@@ -90,35 +92,28 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
 		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
-
-		return; /* -EBUSY */
+		return;
 	}
 
-	/* Check whether a vector already exists, temporarily masked? */
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
+		tm2 = 1;
+
+	/* Check whether a vector already exists */
 	if (h & APIC_VECTOR_MASK) {
 		printk(KERN_DEBUG
 		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
 		       cpu, (h & APIC_VECTOR_MASK));
-
-		return; /* -EBUSY */
+		return;
 	}
 
-	/*
-	 * The temperature transition interrupt handler setup:
-	 */
-
-	/* Our delivery vector: */
-	h = THERMAL_APIC_VECTOR;
-
 	/* We'll mask the thermal vector in the lapic till we're ready: */
-	h |= APIC_DM_FIXED | APIC_LVT_MASKED;
+	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
 	apic_write(APIC_LVTTHMR, h);
 
 	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
 
-	/* Ok, we're good to go... */
-	vendor_thermal_interrupt = intel_thermal_interrupt;
+	intel_set_thermal_handler();
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
@@ -127,7 +122,8 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+	       cpu, tm2 ? "TM2" : "TM1");
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);
-- 
cgit v0.10.2


From a65d086235208a3b3546e209d2210048549099b2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Apr 2009 12:31:23 +0200
Subject: x86, mce: unify Intel thermal init

Mechanic unification. No change in code.

[ Impact: cleanup, 32-bit / 64-bit unification ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index b2f8982..6def769 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,7 +1,8 @@
 obj-y				=  mce_$(BITS).o therm_throt.o
 
 obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o p6.o winchip.o
-obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o
+obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
+obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
 obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index ae9f628..2d1a54b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,6 +1,8 @@
 #include <linux/init.h>
 #include <asm/mce.h>
 
+#ifdef CONFIG_X86_32
+
 void amd_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
@@ -12,3 +14,12 @@ extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
 extern int nr_mce_banks;
 
+void intel_set_thermal_handler(void);
+
+#else
+
+static inline void intel_set_thermal_handler(void) { }
+
+#endif
+
+void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
new file mode 100644
index 0000000..bad3cbb
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -0,0 +1,73 @@
+/*
+ * Common code for Intel machine checks
+ */
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/therm_throt.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/apic.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	unsigned int cpu = smp_processor_id();
+	int tm2 = 0;
+	u32 l, h;
+
+	/* Thermal monitoring depends on ACPI and clock modulation*/
+	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+		return;
+
+	/*
+	 * First check if its enabled already, in which case there might
+	 * be some SMM goo which handles it, so we can't even put a handler
+	 * since it might be delivered via SMI already:
+	 */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+		return;
+	}
+
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+		tm2 = 1;
+
+	/* Check whether a vector already exists */
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
+		       cpu, (h & APIC_VECTOR_MASK));
+		return;
+	}
+
+	/* We'll mask the thermal vector in the lapic till we're ready: */
+	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+	apic_write(APIC_LVTTHMR, h);
+
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
+
+	intel_set_thermal_handler();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+
+	/* Unmask the thermal vector: */
+	l = apic_read(APIC_LVTTHMR);
+	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+	       cpu, tm2 ? "TM2" : "TM1");
+
+	/* enable thermal throttle processing */
+	atomic_set(&therm_throt_en, 1);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index b85d0c1..38f9632 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -17,6 +17,8 @@
 #include <asm/therm_throt.h>
 #include <asm/apic.h>
 
+#include "mce.h"
+
 asmlinkage void smp_thermal_interrupt(void)
 {
 	__u64 msr_val;
@@ -34,65 +36,6 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_exit();
 }
 
-static inline void intel_set_thermal_handler(void) { }
-
-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-	unsigned int cpu = smp_processor_id();
-	int tm2 = 0;
-	u32 l, h;
-
-	/* Thermal monitoring depends on ACPI and clock modulation*/
-	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
-		return;
-
-	/*
-	 * First check if its enabled already, in which case there might
-	 * be some SMM goo which handles it, so we can't even put a handler
-	 * since it might be delivered via SMI already:
-	 */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	h = apic_read(APIC_LVTTHMR);
-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
-		return;
-	}
-
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
-		tm2 = 1;
-
-	/* Check whether a vector already exists */
-	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
-		       cpu, (h & APIC_VECTOR_MASK));
-		return;
-	}
-
-	/* We'll mask the thermal vector in the lapic till we're ready: */
-	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
-	apic_write(APIC_LVTTHMR, h);
-
-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
-
-	intel_set_thermal_handler();
-
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
-	/* Unmask the thermal vector: */
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
-	       cpu, tm2 ? "TM2" : "TM1");
-
-	/* enable thermal throttle processing */
-	atomic_set(&therm_throt_en, 1);
-}
-
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows
  * the CPU to raise an interrupt when a corrected machine check happened.
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index f70753a..f979ffe 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -66,68 +66,11 @@ void smp_thermal_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
-static void intel_set_thermal_handler(void)
+void intel_set_thermal_handler(void)
 {
 	vendor_thermal_interrupt = intel_thermal_interrupt;
 }
 
-/* P4/Xeon Thermal regulation detect and init: */
-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-	unsigned int cpu = smp_processor_id();
-	int tm2 = 0;
-	u32 l, h;
-
-	/* Thermal monitoring depends on ACPI and clock modulation*/
-	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
-		return;
-
-	/*
-	 * First check if its enabled already, in which case there might
-	 * be some SMM goo which handles it, so we can't even put a handler
-	 * since it might be delivered via SMI already:
-	 */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	h = apic_read(APIC_LVTTHMR);
-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
-		return;
-	}
-
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
-		tm2 = 1;
-
-	/* Check whether a vector already exists */
-	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
-		       cpu, (h & APIC_VECTOR_MASK));
-		return;
-	}
-
-	/* We'll mask the thermal vector in the lapic till we're ready: */
-	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
-	apic_write(APIC_LVTTHMR, h);
-
-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
-
-	intel_set_thermal_handler();
-
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
-	/* Unmask the thermal vector: */
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
-	       cpu, tm2 ? "TM2" : "TM1");
-
-	/* enable thermal throttle processing */
-	atomic_set(&therm_throt_en, 1);
-}
 #endif /* CONFIG_X86_MCE_P4THERMAL */
 
 /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-- 
cgit v0.10.2


From 06b851d98266b812b2fa23d007cdf53f41194bbb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:25 +0200
Subject: x86, mce: unify, prepare 64bit in mce.h

Prepare mce.h for unification, so that it will build on 32-bit x86
kernels too.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4f8c199..8488210 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_X86_MCE_H
 #define _ASM_X86_MCE_H
 
-#ifdef __x86_64__
-
 #include <linux/types.h>
 #include <asm/ioctls.h>
 
@@ -10,21 +8,21 @@
  * Machine Check support for x86
  */
 
-#define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
+#define MCG_CTL_P	 (1ULL<<8)   /* MCG_CAP register available */
 #define MCG_EXT_P	 (1ULL<<9)   /* Extended registers available */
 #define MCG_CMCI_P	 (1ULL<<10)  /* CMCI supported */
 
-#define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
-#define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
-#define MCG_STATUS_MCIP  (1UL<<2)   /* machine check in progress */
+#define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
+#define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
+#define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */
 
-#define MCI_STATUS_VAL   (1UL<<63)  /* valid error */
-#define MCI_STATUS_OVER  (1UL<<62)  /* previous errors lost */
-#define MCI_STATUS_UC    (1UL<<61)  /* uncorrected error */
-#define MCI_STATUS_EN    (1UL<<60)  /* error enabled */
-#define MCI_STATUS_MISCV (1UL<<59)  /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1UL<<58)  /* addr reg. valid */
-#define MCI_STATUS_PCC   (1UL<<57)  /* processor context corrupt */
+#define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
+#define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
+#define MCI_STATUS_UC    (1ULL<<61)  /* uncorrected error */
+#define MCI_STATUS_EN    (1ULL<<60)  /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
+#define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
 
 /* Fields are zero when not available */
 struct mce {
@@ -82,13 +80,11 @@ struct mce_log {
 #define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
 #define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
 
-#endif /* __x86_64__ */
-
 #ifdef __KERNEL__
 
 #ifdef CONFIG_X86_32
 extern int mce_disabled;
-#else /* CONFIG_X86_32 */
+#endif
 
 #include <asm/atomic.h>
 
@@ -143,8 +139,6 @@ extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 extern int mce_notify_user(void);
 
-#endif /* !CONFIG_X86_32 */
-
 #ifdef CONFIG_X86_MCE
 extern void mcheck_init(struct cpuinfo_x86 *c);
 #else
-- 
cgit v0.10.2


From a988d334ae8213c0e0e62327222f6e5e6e52bcf1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:25 +0200
Subject: x86, mce: unify, prepare codes

Move current 32-bit mce_32.c code into mce_64.c.

[ Remove unused artifact stop/restart_mce pointed by Andi Kleen ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@firstfloor.org>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1491246..ce48ae7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -1240,3 +1240,68 @@ static __init int mce_init_device(void)
 }
 
 device_initcall(mce_init_device);
+
+#ifdef CONFIG_X86_32
+
+int mce_disabled;
+
+int nr_mce_banks;
+EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
+
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	       smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+						unexpected_machine_check;
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (mce_disabled == 1)
+		return;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 5)
+			intel_p5_mcheck_init(c);
+		if (c->x86 == 6)
+			intel_p6_mcheck_init(c);
+		if (c->x86 == 15)
+			intel_p4_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_CENTAUR:
+		if (c->x86 == 5)
+			winchip_mcheck_init(c);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 1;
+}
+
+static int __init mcheck_enable(char *str)
+{
+	mce_disabled = -1;
+	return 1;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
+
+#endif /* CONFIG_X86_32 */
-- 
cgit v0.10.2


From 711c2e481c9d1113650d09de10f61ee88ab56fda Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:26 +0200
Subject: x86, mce: unify, prepare for 32-bit v2

Prepare the 64-bit mce_64.c code side to be built on 32-bit.

[ includes ifdef relocation by Andi Kleen ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@firstfloor.org>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index 2d1a54b..cd6cffc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,14 +1,14 @@
 #include <linux/init.h>
 #include <asm/mce.h>
 
-#ifdef CONFIG_X86_32
-
 void amd_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
+#ifdef CONFIG_X86_32
+
 /* Call the installed machine check handler for this CPU setup. */
 extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index ce48ae7..2e2c3d2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -37,6 +37,10 @@
 #include <asm/msr.h>
 #include <asm/smp.h>
 
+#include "mce.h"
+
+#ifdef CONFIG_X86_64
+
 #define MISC_MCELOG_MINOR	227
 
 atomic_t mce_entry;
@@ -1241,7 +1245,7 @@ static __init int mce_init_device(void)
 
 device_initcall(mce_init_device);
 
-#ifdef CONFIG_X86_32
+#else /* CONFIG_X86_32: */
 
 int mce_disabled;
 
-- 
cgit v0.10.2


From dba3725d44f5dfb5711fd509fca10b5b828c43b7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:26 +0200
Subject: x86, mce: unify

move mce_64.c => mce.c and glue it up in the Makefile.
Remove mce_32.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 6def769..55f01b3 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,4 +1,4 @@
-obj-y				=  mce_$(BITS).o therm_throt.o
+obj-y				=  mce.o therm_throt.o
 
 obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o p6.o winchip.o
 obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
new file mode 100644
index 0000000..2e2c3d2
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,1311 @@
+/*
+ * Machine check handler.
+ *
+ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Rest from unknown author(s).
+ * 2004 Andi Kleen. Rewrote most of it.
+ * Copyright 2008 Intel Corporation
+ * Author: Andi Kleen
+ */
+#include <linux/thread_info.h>
+#include <linux/capability.h>
+#include <linux/miscdevice.h>
+#include <linux/ratelimit.h>
+#include <linux/kallsyms.h>
+#include <linux/rcupdate.h>
+#include <linux/smp_lock.h>
+#include <linux/kobject.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/string.h>
+#include <linux/sysdev.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/poll.h>
+#include <linux/cpu.h>
+#include <linux/fs.h>
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/idle.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
+#include <asm/smp.h>
+
+#include "mce.h"
+
+#ifdef CONFIG_X86_64
+
+#define MISC_MCELOG_MINOR	227
+
+atomic_t mce_entry;
+
+static int			mce_dont_init;
+
+/*
+ * Tolerant levels:
+ *   0: always panic on uncorrected errors, log corrected errors
+ *   1: panic or SIGBUS on uncorrected errors, log corrected errors
+ *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
+ *   3: never panic or SIGBUS, log all errors (for testing only)
+ */
+static int			tolerant = 1;
+static int			banks;
+static u64			*bank;
+static unsigned long		notify_user;
+static int			rip_msr;
+static int			mce_bootlog = -1;
+static atomic_t			mce_events;
+
+static char			trigger[128];
+static char			*trigger_argv[2] = { trigger, NULL };
+
+static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+
+/* MCA banks polled by the period polling timer for corrected events */
+DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
+	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
+};
+
+/* Do initial initialization of a struct mce */
+void mce_setup(struct mce *m)
+{
+	memset(m, 0, sizeof(struct mce));
+	m->cpu = smp_processor_id();
+	rdtscll(m->tsc);
+}
+
+/*
+ * Lockless MCE logging infrastructure.
+ * This avoids deadlocks on printk locks without having to break locks. Also
+ * separate MCEs from kernel messages to avoid bogus bug reports.
+ */
+
+static struct mce_log mcelog = {
+	MCE_LOG_SIGNATURE,
+	MCE_LOG_LEN,
+};
+
+void mce_log(struct mce *mce)
+{
+	unsigned next, entry;
+
+	atomic_inc(&mce_events);
+	mce->finished = 0;
+	wmb();
+	for (;;) {
+		entry = rcu_dereference(mcelog.next);
+		for (;;) {
+			/*
+			 * When the buffer fills up discard new entries.
+			 * Assume that the earlier errors are the more
+			 * interesting ones:
+			 */
+			if (entry >= MCE_LOG_LEN) {
+				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+				return;
+			}
+			/* Old left over entry. Skip: */
+			if (mcelog.entry[entry].finished) {
+				entry++;
+				continue;
+			}
+			break;
+		}
+		smp_rmb();
+		next = entry + 1;
+		if (cmpxchg(&mcelog.next, entry, next) == entry)
+			break;
+	}
+	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
+	wmb();
+	mcelog.entry[entry].finished = 1;
+	wmb();
+
+	set_bit(0, &notify_user);
+}
+
+static void print_mce(struct mce *m)
+{
+	printk(KERN_EMERG "\n"
+	       KERN_EMERG "HARDWARE ERROR\n"
+	       KERN_EMERG
+	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+	       m->cpu, m->mcgstatus, m->bank, m->status);
+	if (m->ip) {
+		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
+		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+		       m->cs, m->ip);
+		if (m->cs == __KERNEL_CS)
+			print_symbol("{%s}", m->ip);
+		printk("\n");
+	}
+	printk(KERN_EMERG "TSC %llx ", m->tsc);
+	if (m->addr)
+		printk("ADDR %llx ", m->addr);
+	if (m->misc)
+		printk("MISC %llx ", m->misc);
+	printk("\n");
+	printk(KERN_EMERG "This is not a software problem!\n");
+	printk(KERN_EMERG "Run through mcelog --ascii to decode "
+	       "and contact your hardware vendor\n");
+}
+
+static void mce_panic(char *msg, struct mce *backup, unsigned long start)
+{
+	int i;
+
+	oops_begin();
+	for (i = 0; i < MCE_LOG_LEN; i++) {
+		unsigned long tsc = mcelog.entry[i].tsc;
+
+		if (time_before(tsc, start))
+			continue;
+		print_mce(&mcelog.entry[i]);
+		if (backup && mcelog.entry[i].tsc == backup->tsc)
+			backup = NULL;
+	}
+	if (backup)
+		print_mce(backup);
+	panic(msg);
+}
+
+int mce_available(struct cpuinfo_x86 *c)
+{
+	if (mce_dont_init)
+		return 0;
+	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
+static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
+{
+	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
+		m->ip = regs->ip;
+		m->cs = regs->cs;
+	} else {
+		m->ip = 0;
+		m->cs = 0;
+	}
+	if (rip_msr) {
+		/* Assume the RIP in the MSR is exact. Is this true? */
+		m->mcgstatus |= MCG_STATUS_EIPV;
+		rdmsrl(rip_msr, m->ip);
+		m->cs = 0;
+	}
+}
+
+/*
+ * Poll for corrected events or events that happened before reset.
+ * Those are just logged through /dev/mcelog.
+ *
+ * This is executed in standard interrupt context.
+ */
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+{
+	struct mce m;
+	int i;
+
+	mce_setup(&m);
+
+	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	for (i = 0; i < banks; i++) {
+		if (!bank[i] || !test_bit(i, *b))
+			continue;
+
+		m.misc = 0;
+		m.addr = 0;
+		m.bank = i;
+		m.tsc = 0;
+
+		barrier();
+		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		if (!(m.status & MCI_STATUS_VAL))
+			continue;
+
+		/*
+		 * Uncorrected events are handled by the exception handler
+		 * when it is enabled. But when the exception is disabled log
+		 * everything.
+		 *
+		 * TBD do the same check for MCI_STATUS_EN here?
+		 */
+		if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
+			continue;
+
+		if (m.status & MCI_STATUS_MISCV)
+			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+		if (m.status & MCI_STATUS_ADDRV)
+			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+
+		if (!(flags & MCP_TIMESTAMP))
+			m.tsc = 0;
+		/*
+		 * Don't get the IP here because it's unlikely to
+		 * have anything to do with the actual error location.
+		 */
+		if (!(flags & MCP_DONTLOG)) {
+			mce_log(&m);
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+
+		/*
+		 * Clear state for this bank.
+		 */
+		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+
+	/*
+	 * Don't clear MCG_STATUS here because it's only defined for
+	 * exceptions.
+	 */
+}
+
+/*
+ * The actual machine check handler. This only handles real
+ * exceptions when something got corrupted coming in through int 18.
+ *
+ * This is executed in NMI context not subject to normal locking rules. This
+ * implies that most kernel services cannot be safely used. Don't even
+ * think about putting a printk in there!
+ */
+void do_machine_check(struct pt_regs *regs, long error_code)
+{
+	struct mce m, panicm;
+	int panicm_found = 0;
+	u64 mcestart = 0;
+	int i;
+	/*
+	 * If no_way_out gets set, there is no safe way to recover from this
+	 * MCE.  If tolerant is cranked up, we'll try anyway.
+	 */
+	int no_way_out = 0;
+	/*
+	 * If kill_it gets set, there might be a way to recover from this
+	 * error.
+	 */
+	int kill_it = 0;
+	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+
+	atomic_inc(&mce_entry);
+
+	if (notify_die(DIE_NMI, "machine check", regs, error_code,
+			   18, SIGKILL) == NOTIFY_STOP)
+		goto out2;
+	if (!banks)
+		goto out2;
+
+	mce_setup(&m);
+
+	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+
+	/* if the restart IP is not valid, we're done for */
+	if (!(m.mcgstatus & MCG_STATUS_RIPV))
+		no_way_out = 1;
+
+	rdtscll(mcestart);
+	barrier();
+
+	for (i = 0; i < banks; i++) {
+		__clear_bit(i, toclear);
+		if (!bank[i])
+			continue;
+
+		m.misc = 0;
+		m.addr = 0;
+		m.bank = i;
+
+		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		if ((m.status & MCI_STATUS_VAL) == 0)
+			continue;
+
+		/*
+		 * Non uncorrected errors are handled by machine_check_poll
+		 * Leave them alone.
+		 */
+		if ((m.status & MCI_STATUS_UC) == 0)
+			continue;
+
+		/*
+		 * Set taint even when machine check was not enabled.
+		 */
+		add_taint(TAINT_MACHINE_CHECK);
+
+		__set_bit(i, toclear);
+
+		if (m.status & MCI_STATUS_EN) {
+			/* if PCC was set, there's no way out */
+			no_way_out |= !!(m.status & MCI_STATUS_PCC);
+			/*
+			 * If this error was uncorrectable and there was
+			 * an overflow, we're in trouble.  If no overflow,
+			 * we might get away with just killing a task.
+			 */
+			if (m.status & MCI_STATUS_UC) {
+				if (tolerant < 1 || m.status & MCI_STATUS_OVER)
+					no_way_out = 1;
+				kill_it = 1;
+			}
+		} else {
+			/*
+			 * Machine check event was not enabled. Clear, but
+			 * ignore.
+			 */
+			continue;
+		}
+
+		if (m.status & MCI_STATUS_MISCV)
+			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+		if (m.status & MCI_STATUS_ADDRV)
+			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+
+		mce_get_rip(&m, regs);
+		mce_log(&m);
+
+		/*
+		 * Did this bank cause the exception?
+		 *
+		 * Assume that the bank with uncorrectable errors did it,
+		 * and that there is only a single one:
+		 */
+		if ((m.status & MCI_STATUS_UC) &&
+					(m.status & MCI_STATUS_EN)) {
+			panicm = m;
+			panicm_found = 1;
+		}
+	}
+
+	/*
+	 * If we didn't find an uncorrectable error, pick
+	 * the last one (shouldn't happen, just being safe).
+	 */
+	if (!panicm_found)
+		panicm = m;
+
+	/*
+	 * If we have decided that we just CAN'T continue, and the user
+	 * has not set tolerant to an insane level, give up and die.
+	 */
+	if (no_way_out && tolerant < 3)
+		mce_panic("Machine check", &panicm, mcestart);
+
+	/*
+	 * If the error seems to be unrecoverable, something should be
+	 * done.  Try to kill as little as possible.  If we can kill just
+	 * one task, do that.  If the user has set the tolerance very
+	 * high, don't try to do anything at all.
+	 */
+	if (kill_it && tolerant < 3) {
+		int user_space = 0;
+
+		/*
+		 * If the EIPV bit is set, it means the saved IP is the
+		 * instruction which caused the MCE.
+		 */
+		if (m.mcgstatus & MCG_STATUS_EIPV)
+			user_space = panicm.ip && (panicm.cs & 3);
+
+		/*
+		 * If we know that the error was in user space, send a
+		 * SIGBUS.  Otherwise, panic if tolerance is low.
+		 *
+		 * force_sig() takes an awful lot of locks and has a slight
+		 * risk of deadlocking.
+		 */
+		if (user_space) {
+			force_sig(SIGBUS, current);
+		} else if (panic_on_oops || tolerant < 2) {
+			mce_panic("Uncorrected machine check",
+				&panicm, mcestart);
+		}
+	}
+
+	/* notify userspace ASAP */
+	set_thread_flag(TIF_MCE_NOTIFY);
+
+	/* the last thing we do is clear state */
+	for (i = 0; i < banks; i++) {
+		if (test_bit(i, toclear))
+			wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+	wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ out2:
+	atomic_dec(&mce_entry);
+}
+
+#ifdef CONFIG_X86_MCE_INTEL
+/***
+ * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
+ * @cpu: The CPU on which the event occurred.
+ * @status: Event status information
+ *
+ * This function should be called by the thermal interrupt after the
+ * event has been processed and the decision was made to log the event
+ * further.
+ *
+ * The status parameter will be saved to the 'status' field of 'struct mce'
+ * and historically has been the register value of the
+ * MSR_IA32_THERMAL_STATUS (Intel) msr.
+ */
+void mce_log_therm_throt_event(__u64 status)
+{
+	struct mce m;
+
+	mce_setup(&m);
+	m.bank = MCE_THERMAL_BANK;
+	m.status = status;
+	mce_log(&m);
+}
+#endif /* CONFIG_X86_MCE_INTEL */
+
+/*
+ * Periodic polling timer for "silent" machine check errors.  If the
+ * poller finds an MCE, poll 2x faster.  When the poller finds no more
+ * errors, poll 2x slower (up to check_interval seconds).
+ */
+static int check_interval = 5 * 60; /* 5 minutes */
+
+static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
+static DEFINE_PER_CPU(struct timer_list, mce_timer);
+
+static void mcheck_timer(unsigned long data)
+{
+	struct timer_list *t = &per_cpu(mce_timer, data);
+	int *n;
+
+	WARN_ON(smp_processor_id() != data);
+
+	if (mce_available(&current_cpu_data)) {
+		machine_check_poll(MCP_TIMESTAMP,
+				&__get_cpu_var(mce_poll_banks));
+	}
+
+	/*
+	 * Alert userspace if needed.  If we logged an MCE, reduce the
+	 * polling interval, otherwise increase the polling interval.
+	 */
+	n = &__get_cpu_var(next_interval);
+	if (mce_notify_user()) {
+		*n = max(*n/2, HZ/100);
+	} else {
+		*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+	}
+
+	t->expires = jiffies + *n;
+	add_timer(t);
+}
+
+static void mce_do_trigger(struct work_struct *work)
+{
+	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+}
+
+static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+
+/*
+ * Notify the user(s) about new machine check events.
+ * Can be called from interrupt context, but not from machine check/NMI
+ * context.
+ */
+int mce_notify_user(void)
+{
+	/* Not more than two messages every minute */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
+
+	clear_thread_flag(TIF_MCE_NOTIFY);
+
+	if (test_and_clear_bit(0, &notify_user)) {
+		wake_up_interruptible(&mce_wait);
+
+		/*
+		 * There is no risk of missing notifications because
+		 * work_pending is always cleared before the function is
+		 * executed.
+		 */
+		if (trigger[0] && !work_pending(&mce_trigger_work))
+			schedule_work(&mce_trigger_work);
+
+		if (__ratelimit(&ratelimit))
+			printk(KERN_INFO "Machine check events logged\n");
+
+		return 1;
+	}
+	return 0;
+}
+
+/* see if the idle task needs to notify userspace: */
+static int
+mce_idle_callback(struct notifier_block *nfb, unsigned long action,
+		  void *unused)
+{
+	/* IDLE_END should be safe - interrupts are back on */
+	if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
+		mce_notify_user();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_idle_notifier = {
+	.notifier_call		= mce_idle_callback,
+};
+
+static __init int periodic_mcheck_init(void)
+{
+       idle_notifier_register(&mce_idle_notifier);
+       return 0;
+}
+__initcall(periodic_mcheck_init);
+
+/*
+ * Initialize Machine Checks for a CPU.
+ */
+static int mce_cap_init(void)
+{
+	unsigned b;
+	u64 cap;
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	b = cap & 0xff;
+	if (b > MAX_NR_BANKS) {
+		printk(KERN_WARNING
+		       "MCE: Using only %u machine check banks out of %u\n",
+			MAX_NR_BANKS, b);
+		b = MAX_NR_BANKS;
+	}
+
+	/* Don't support asymmetric configurations today */
+	WARN_ON(banks != 0 && b != banks);
+	banks = b;
+	if (!bank) {
+		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
+		if (!bank)
+			return -ENOMEM;
+		memset(bank, 0xff, banks * sizeof(u64));
+	}
+
+	/* Use accurate RIP reporting if available. */
+	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
+		rip_msr = MSR_IA32_MCG_EIP;
+
+	return 0;
+}
+
+static void mce_init(void *dummy)
+{
+	mce_banks_t all_banks;
+	u64 cap;
+	int i;
+
+	/*
+	 * Log the machine checks left over from the previous reset.
+	 */
+	bitmap_fill(all_banks, MAX_NR_BANKS);
+	machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
+
+	set_in_cr4(X86_CR4_MCE);
+
+	rdmsrl(MSR_IA32_MCG_CAP, cap);
+	if (cap & MCG_CTL_P)
+		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+	for (i = 0; i < banks; i++) {
+		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+}
+
+/* Add per CPU specific workarounds here */
+static void mce_cpu_quirks(struct cpuinfo_x86 *c)
+{
+	/* This should be disabled by the BIOS, but isn't always */
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		if (c->x86 == 15 && banks > 4) {
+			/*
+			 * disable GART TBL walk error reporting, which
+			 * trips off incorrectly with the IOMMU & 3ware
+			 * & Cerberus:
+			 */
+			clear_bit(10, (unsigned long *)&bank[4]);
+		}
+		if (c->x86 <= 17 && mce_bootlog < 0) {
+			/*
+			 * Lots of broken BIOS around that don't clear them
+			 * by default and leave crap in there. Don't log:
+			 */
+			mce_bootlog = 0;
+		}
+	}
+
+}
+
+static void mce_cpu_features(struct cpuinfo_x86 *c)
+{
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		mce_intel_feature_init(c);
+		break;
+	case X86_VENDOR_AMD:
+		mce_amd_feature_init(c);
+		break;
+	default:
+		break;
+	}
+}
+
+static void mce_init_timer(void)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+	int *n = &__get_cpu_var(next_interval);
+
+	*n = check_interval * HZ;
+	if (!*n)
+		return;
+	setup_timer(t, mcheck_timer, smp_processor_id());
+	t->expires = round_jiffies(jiffies + *n);
+	add_timer(t);
+}
+
+/*
+ * Called for each booted CPU to set up machine checks.
+ * Must be called with preempt off:
+ */
+void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (!mce_available(c))
+		return;
+
+	if (mce_cap_init() < 0) {
+		mce_dont_init = 1;
+		return;
+	}
+	mce_cpu_quirks(c);
+
+	mce_init(NULL);
+	mce_cpu_features(c);
+	mce_init_timer();
+}
+
+/*
+ * Character device to read and clear the MCE log.
+ */
+
+static DEFINE_SPINLOCK(mce_state_lock);
+static int		open_count;		/* #times opened */
+static int		open_exclu;		/* already open exclusive? */
+
+static int mce_open(struct inode *inode, struct file *file)
+{
+	lock_kernel();
+	spin_lock(&mce_state_lock);
+
+	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
+		spin_unlock(&mce_state_lock);
+		unlock_kernel();
+
+		return -EBUSY;
+	}
+
+	if (file->f_flags & O_EXCL)
+		open_exclu = 1;
+	open_count++;
+
+	spin_unlock(&mce_state_lock);
+	unlock_kernel();
+
+	return nonseekable_open(inode, file);
+}
+
+static int mce_release(struct inode *inode, struct file *file)
+{
+	spin_lock(&mce_state_lock);
+
+	open_count--;
+	open_exclu = 0;
+
+	spin_unlock(&mce_state_lock);
+
+	return 0;
+}
+
+static void collect_tscs(void *data)
+{
+	unsigned long *cpu_tsc = (unsigned long *)data;
+
+	rdtscll(cpu_tsc[smp_processor_id()]);
+}
+
+static DEFINE_MUTEX(mce_read_mutex);
+
+static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
+			loff_t *off)
+{
+	char __user *buf = ubuf;
+	unsigned long *cpu_tsc;
+	unsigned prev, next;
+	int i, err;
+
+	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
+	if (!cpu_tsc)
+		return -ENOMEM;
+
+	mutex_lock(&mce_read_mutex);
+	next = rcu_dereference(mcelog.next);
+
+	/* Only supports full reads right now */
+	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
+		mutex_unlock(&mce_read_mutex);
+		kfree(cpu_tsc);
+
+		return -EINVAL;
+	}
+
+	err = 0;
+	prev = 0;
+	do {
+		for (i = prev; i < next; i++) {
+			unsigned long start = jiffies;
+
+			while (!mcelog.entry[i].finished) {
+				if (time_after_eq(jiffies, start + 2)) {
+					memset(mcelog.entry + i, 0,
+					       sizeof(struct mce));
+					goto timeout;
+				}
+				cpu_relax();
+			}
+			smp_rmb();
+			err |= copy_to_user(buf, mcelog.entry + i,
+					    sizeof(struct mce));
+			buf += sizeof(struct mce);
+timeout:
+			;
+		}
+
+		memset(mcelog.entry + prev, 0,
+		       (next - prev) * sizeof(struct mce));
+		prev = next;
+		next = cmpxchg(&mcelog.next, prev, 0);
+	} while (next != prev);
+
+	synchronize_sched();
+
+	/*
+	 * Collect entries that were still getting written before the
+	 * synchronize.
+	 */
+	on_each_cpu(collect_tscs, cpu_tsc, 1);
+
+	for (i = next; i < MCE_LOG_LEN; i++) {
+		if (mcelog.entry[i].finished &&
+		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
+			err |= copy_to_user(buf, mcelog.entry+i,
+					    sizeof(struct mce));
+			smp_rmb();
+			buf += sizeof(struct mce);
+			memset(&mcelog.entry[i], 0, sizeof(struct mce));
+		}
+	}
+	mutex_unlock(&mce_read_mutex);
+	kfree(cpu_tsc);
+
+	return err ? -EFAULT : buf - ubuf;
+}
+
+static unsigned int mce_poll(struct file *file, poll_table *wait)
+{
+	poll_wait(file, &mce_wait, wait);
+	if (rcu_dereference(mcelog.next))
+		return POLLIN | POLLRDNORM;
+	return 0;
+}
+
+static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	int __user *p = (int __user *)arg;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+	case MCE_GET_RECORD_LEN:
+		return put_user(sizeof(struct mce), p);
+	case MCE_GET_LOG_LEN:
+		return put_user(MCE_LOG_LEN, p);
+	case MCE_GETCLEAR_FLAGS: {
+		unsigned flags;
+
+		do {
+			flags = mcelog.flags;
+		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+
+		return put_user(flags, p);
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations mce_chrdev_ops = {
+	.open			= mce_open,
+	.release		= mce_release,
+	.read			= mce_read,
+	.poll			= mce_poll,
+	.unlocked_ioctl		= mce_ioctl,
+};
+
+static struct miscdevice mce_log_device = {
+	MISC_MCELOG_MINOR,
+	"mcelog",
+	&mce_chrdev_ops,
+};
+
+/*
+ * Old style boot options parsing. Only for compatibility.
+ */
+static int __init mcheck_disable(char *str)
+{
+	mce_dont_init = 1;
+	return 1;
+}
+__setup("nomce", mcheck_disable);
+
+/*
+ * mce=off disables machine check
+ * mce=TOLERANCELEVEL (number, see above)
+ * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+ * mce=nobootlog Don't log MCEs from before booting.
+ */
+static int __init mcheck_enable(char *str)
+{
+	if (!strcmp(str, "off"))
+		mce_dont_init = 1;
+	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
+		mce_bootlog = (str[0] == 'b');
+	else if (isdigit(str[0]))
+		get_option(&str, &tolerant);
+	else {
+		printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n",
+		       str);
+		return 0;
+	}
+	return 1;
+}
+__setup("mce=", mcheck_enable);
+
+/*
+ * Sysfs support
+ */
+
+/*
+ * Disable machine checks on suspend and shutdown. We can't really handle
+ * them later.
+ */
+static int mce_disable(void)
+{
+	int i;
+
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	return 0;
+}
+
+static int mce_suspend(struct sys_device *dev, pm_message_t state)
+{
+	return mce_disable();
+}
+
+static int mce_shutdown(struct sys_device *dev)
+{
+	return mce_disable();
+}
+
+/*
+ * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+ * Only one CPU is active at this time, the others get re-added later using
+ * CPU hotplug:
+ */
+static int mce_resume(struct sys_device *dev)
+{
+	mce_init(NULL);
+	mce_cpu_features(&current_cpu_data);
+
+	return 0;
+}
+
+static void mce_cpu_restart(void *data)
+{
+	del_timer_sync(&__get_cpu_var(mce_timer));
+	if (mce_available(&current_cpu_data))
+		mce_init(NULL);
+	mce_init_timer();
+}
+
+/* Reinit MCEs after user configuration changes */
+static void mce_restart(void)
+{
+	on_each_cpu(mce_cpu_restart, NULL, 1);
+}
+
+static struct sysdev_class mce_sysclass = {
+	.suspend	= mce_suspend,
+	.shutdown	= mce_shutdown,
+	.resume		= mce_resume,
+	.name		= "machinecheck",
+};
+
+DEFINE_PER_CPU(struct sys_device, device_mce);
+
+__cpuinitdata
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
+
+/* Why are there no generic functions for this? */
+#define ACCESSOR(name, var, start) \
+	static ssize_t show_ ## name(struct sys_device *s,		\
+				     struct sysdev_attribute *attr,	\
+				     char *buf) {			\
+		return sprintf(buf, "%lx\n", (unsigned long)var);	\
+	}								\
+	static ssize_t set_ ## name(struct sys_device *s,		\
+				    struct sysdev_attribute *attr,	\
+				    const char *buf, size_t siz) {	\
+		char *end;						\
+		unsigned long new = simple_strtoul(buf, &end, 0);	\
+									\
+		if (end == buf)						\
+			return -EINVAL;					\
+		var = new;						\
+		start;							\
+									\
+		return end-buf;						\
+	}								\
+	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
+
+static struct sysdev_attribute *bank_attrs;
+
+static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			 char *buf)
+{
+	u64 b = bank[attr - bank_attrs];
+
+	return sprintf(buf, "%llx\n", b);
+}
+
+static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+			const char *buf, size_t siz)
+{
+	char *end;
+	u64 new = simple_strtoull(buf, &end, 0);
+
+	if (end == buf)
+		return -EINVAL;
+
+	bank[attr - bank_attrs] = new;
+	mce_restart();
+
+	return end-buf;
+}
+
+static ssize_t
+show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
+{
+	strcpy(buf, trigger);
+	strcat(buf, "\n");
+	return strlen(trigger) + 1;
+}
+
+static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+				const char *buf, size_t siz)
+{
+	char *p;
+	int len;
+
+	strncpy(trigger, buf, sizeof(trigger));
+	trigger[sizeof(trigger)-1] = 0;
+	len = strlen(trigger);
+	p = strchr(trigger, '\n');
+
+	if (*p)
+		*p = 0;
+
+	return len;
+}
+
+static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
+static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
+
+ACCESSOR(check_interval, check_interval, mce_restart())
+
+static struct sysdev_attribute *mce_attributes[] = {
+	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
+	NULL
+};
+
+static cpumask_var_t mce_device_initialized;
+
+/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
+static __cpuinit int mce_create_device(unsigned int cpu)
+{
+	int err;
+	int i;
+
+	if (!mce_available(&boot_cpu_data))
+		return -EIO;
+
+	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
+	per_cpu(device_mce, cpu).id	= cpu;
+	per_cpu(device_mce, cpu).cls	= &mce_sysclass;
+
+	err = sysdev_register(&per_cpu(device_mce, cpu));
+	if (err)
+		return err;
+
+	for (i = 0; mce_attributes[i]; i++) {
+		err = sysdev_create_file(&per_cpu(device_mce, cpu),
+					 mce_attributes[i]);
+		if (err)
+			goto error;
+	}
+	for (i = 0; i < banks; i++) {
+		err = sysdev_create_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+		if (err)
+			goto error2;
+	}
+	cpumask_set_cpu(cpu, mce_device_initialized);
+
+	return 0;
+error2:
+	while (--i >= 0) {
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+					&bank_attrs[i]);
+	}
+error:
+	while (--i >= 0) {
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+				   mce_attributes[i]);
+	}
+	sysdev_unregister(&per_cpu(device_mce, cpu));
+
+	return err;
+}
+
+static __cpuinit void mce_remove_device(unsigned int cpu)
+{
+	int i;
+
+	if (!cpumask_test_cpu(cpu, mce_device_initialized))
+		return;
+
+	for (i = 0; mce_attributes[i]; i++)
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+			mce_attributes[i]);
+	for (i = 0; i < banks; i++)
+		sysdev_remove_file(&per_cpu(device_mce, cpu),
+			&bank_attrs[i]);
+	sysdev_unregister(&per_cpu(device_mce, cpu));
+	cpumask_clear_cpu(cpu, mce_device_initialized);
+}
+
+/* Make sure there are no machine checks on offlined CPUs. */
+static void mce_disable_cpu(void *h)
+{
+	int i;
+	unsigned long action = *(unsigned long *)h;
+
+	if (!mce_available(&current_cpu_data))
+		return;
+	if (!(action & CPU_TASKS_FROZEN))
+		cmci_clear();
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+}
+
+static void mce_reenable_cpu(void *h)
+{
+	unsigned long action = *(unsigned long *)h;
+	int i;
+
+	if (!mce_available(&current_cpu_data))
+		return;
+
+	if (!(action & CPU_TASKS_FROZEN))
+		cmci_reenable();
+	for (i = 0; i < banks; i++)
+		wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+}
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static int __cpuinit
+mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct timer_list *t = &per_cpu(mce_timer, cpu);
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		mce_create_device(cpu);
+		if (threshold_cpu_callback)
+			threshold_cpu_callback(action, cpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		if (threshold_cpu_callback)
+			threshold_cpu_callback(action, cpu);
+		mce_remove_device(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		del_timer_sync(t);
+		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+		break;
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+		t->expires = round_jiffies(jiffies +
+						__get_cpu_var(next_interval));
+		add_timer_on(t, cpu);
+		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+		break;
+	case CPU_POST_DEAD:
+		/* intentionally ignoring frozen here */
+		cmci_rediscover(cpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_cpu_notifier __cpuinitdata = {
+	.notifier_call = mce_cpu_callback,
+};
+
+static __init int mce_init_banks(void)
+{
+	int i;
+
+	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
+				GFP_KERNEL);
+	if (!bank_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < banks; i++) {
+		struct sysdev_attribute *a = &bank_attrs[i];
+
+		a->attr.name	= kasprintf(GFP_KERNEL, "bank%d", i);
+		if (!a->attr.name)
+			goto nomem;
+
+		a->attr.mode	= 0644;
+		a->show		= show_bank;
+		a->store	= set_bank;
+	}
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(bank_attrs[i].attr.name);
+	kfree(bank_attrs);
+	bank_attrs = NULL;
+
+	return -ENOMEM;
+}
+
+static __init int mce_init_device(void)
+{
+	int err;
+	int i = 0;
+
+	if (!mce_available(&boot_cpu_data))
+		return -EIO;
+
+	alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+
+	err = mce_init_banks();
+	if (err)
+		return err;
+
+	err = sysdev_class_register(&mce_sysclass);
+	if (err)
+		return err;
+
+	for_each_online_cpu(i) {
+		err = mce_create_device(i);
+		if (err)
+			return err;
+	}
+
+	register_hotcpu_notifier(&mce_cpu_notifier);
+	misc_register(&mce_log_device);
+
+	return err;
+}
+
+device_initcall(mce_init_device);
+
+#else /* CONFIG_X86_32: */
+
+int mce_disabled;
+
+int nr_mce_banks;
+EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
+
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	       smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+						unexpected_machine_check;
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (mce_disabled == 1)
+		return;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 5)
+			intel_p5_mcheck_init(c);
+		if (c->x86 == 6)
+			intel_p6_mcheck_init(c);
+		if (c->x86 == 15)
+			intel_p4_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_CENTAUR:
+		if (c->x86 == 5)
+			winchip_mcheck_init(c);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 1;
+}
+
+static int __init mcheck_enable(char *str)
+{
+	mce_disabled = -1;
+	return 1;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
+
+#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
deleted file mode 100644
index 05979e7..0000000
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
- */
-#include <linux/thread_info.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-
-#include "mce.h"
-
-int mce_disabled;
-
-int nr_mce_banks;
-EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
-
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
-	       smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
-						unexpected_machine_check;
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
-	if (mce_disabled == 1)
-		return;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_INTEL:
-		if (c->x86 == 5)
-			intel_p5_mcheck_init(c);
-		if (c->x86 == 6)
-			intel_p6_mcheck_init(c);
-		if (c->x86 == 15)
-			intel_p4_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_CENTAUR:
-		if (c->x86 == 5)
-			winchip_mcheck_init(c);
-		break;
-
-	default:
-		break;
-	}
-}
-
-static int __init mcheck_disable(char *str)
-{
-	mce_disabled = 1;
-	return 1;
-}
-
-static int __init mcheck_enable(char *str)
-{
-	mce_disabled = -1;
-	return 1;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
deleted file mode 100644
index 2e2c3d2..0000000
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ /dev/null
@@ -1,1311 +0,0 @@
-/*
- * Machine check handler.
- *
- * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
- * Rest from unknown author(s).
- * 2004 Andi Kleen. Rewrote most of it.
- * Copyright 2008 Intel Corporation
- * Author: Andi Kleen
- */
-#include <linux/thread_info.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
-#include <linux/ratelimit.h>
-#include <linux/kallsyms.h>
-#include <linux/rcupdate.h>
-#include <linux/smp_lock.h>
-#include <linux/kobject.h>
-#include <linux/kdebug.h>
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <linux/string.h>
-#include <linux/sysdev.h>
-#include <linux/ctype.h>
-#include <linux/sched.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/kmod.h>
-#include <linux/poll.h>
-#include <linux/cpu.h>
-#include <linux/fs.h>
-
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/idle.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-#include <asm/smp.h>
-
-#include "mce.h"
-
-#ifdef CONFIG_X86_64
-
-#define MISC_MCELOG_MINOR	227
-
-atomic_t mce_entry;
-
-static int			mce_dont_init;
-
-/*
- * Tolerant levels:
- *   0: always panic on uncorrected errors, log corrected errors
- *   1: panic or SIGBUS on uncorrected errors, log corrected errors
- *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
- *   3: never panic or SIGBUS, log all errors (for testing only)
- */
-static int			tolerant = 1;
-static int			banks;
-static u64			*bank;
-static unsigned long		notify_user;
-static int			rip_msr;
-static int			mce_bootlog = -1;
-static atomic_t			mce_events;
-
-static char			trigger[128];
-static char			*trigger_argv[2] = { trigger, NULL };
-
-static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
-
-/* MCA banks polled by the period polling timer for corrected events */
-DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
-	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
-};
-
-/* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
-{
-	memset(m, 0, sizeof(struct mce));
-	m->cpu = smp_processor_id();
-	rdtscll(m->tsc);
-}
-
-/*
- * Lockless MCE logging infrastructure.
- * This avoids deadlocks on printk locks without having to break locks. Also
- * separate MCEs from kernel messages to avoid bogus bug reports.
- */
-
-static struct mce_log mcelog = {
-	MCE_LOG_SIGNATURE,
-	MCE_LOG_LEN,
-};
-
-void mce_log(struct mce *mce)
-{
-	unsigned next, entry;
-
-	atomic_inc(&mce_events);
-	mce->finished = 0;
-	wmb();
-	for (;;) {
-		entry = rcu_dereference(mcelog.next);
-		for (;;) {
-			/*
-			 * When the buffer fills up discard new entries.
-			 * Assume that the earlier errors are the more
-			 * interesting ones:
-			 */
-			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
-				return;
-			}
-			/* Old left over entry. Skip: */
-			if (mcelog.entry[entry].finished) {
-				entry++;
-				continue;
-			}
-			break;
-		}
-		smp_rmb();
-		next = entry + 1;
-		if (cmpxchg(&mcelog.next, entry, next) == entry)
-			break;
-	}
-	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
-	wmb();
-	mcelog.entry[entry].finished = 1;
-	wmb();
-
-	set_bit(0, &notify_user);
-}
-
-static void print_mce(struct mce *m)
-{
-	printk(KERN_EMERG "\n"
-	       KERN_EMERG "HARDWARE ERROR\n"
-	       KERN_EMERG
-	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
-	       m->cpu, m->mcgstatus, m->bank, m->status);
-	if (m->ip) {
-		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
-		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
-		       m->cs, m->ip);
-		if (m->cs == __KERNEL_CS)
-			print_symbol("{%s}", m->ip);
-		printk("\n");
-	}
-	printk(KERN_EMERG "TSC %llx ", m->tsc);
-	if (m->addr)
-		printk("ADDR %llx ", m->addr);
-	if (m->misc)
-		printk("MISC %llx ", m->misc);
-	printk("\n");
-	printk(KERN_EMERG "This is not a software problem!\n");
-	printk(KERN_EMERG "Run through mcelog --ascii to decode "
-	       "and contact your hardware vendor\n");
-}
-
-static void mce_panic(char *msg, struct mce *backup, unsigned long start)
-{
-	int i;
-
-	oops_begin();
-	for (i = 0; i < MCE_LOG_LEN; i++) {
-		unsigned long tsc = mcelog.entry[i].tsc;
-
-		if (time_before(tsc, start))
-			continue;
-		print_mce(&mcelog.entry[i]);
-		if (backup && mcelog.entry[i].tsc == backup->tsc)
-			backup = NULL;
-	}
-	if (backup)
-		print_mce(backup);
-	panic(msg);
-}
-
-int mce_available(struct cpuinfo_x86 *c)
-{
-	if (mce_dont_init)
-		return 0;
-	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
-}
-
-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
-{
-	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
-		m->ip = regs->ip;
-		m->cs = regs->cs;
-	} else {
-		m->ip = 0;
-		m->cs = 0;
-	}
-	if (rip_msr) {
-		/* Assume the RIP in the MSR is exact. Is this true? */
-		m->mcgstatus |= MCG_STATUS_EIPV;
-		rdmsrl(rip_msr, m->ip);
-		m->cs = 0;
-	}
-}
-
-/*
- * Poll for corrected events or events that happened before reset.
- * Those are just logged through /dev/mcelog.
- *
- * This is executed in standard interrupt context.
- */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
-{
-	struct mce m;
-	int i;
-
-	mce_setup(&m);
-
-	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
-	for (i = 0; i < banks; i++) {
-		if (!bank[i] || !test_bit(i, *b))
-			continue;
-
-		m.misc = 0;
-		m.addr = 0;
-		m.bank = i;
-		m.tsc = 0;
-
-		barrier();
-		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
-		if (!(m.status & MCI_STATUS_VAL))
-			continue;
-
-		/*
-		 * Uncorrected events are handled by the exception handler
-		 * when it is enabled. But when the exception is disabled log
-		 * everything.
-		 *
-		 * TBD do the same check for MCI_STATUS_EN here?
-		 */
-		if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
-			continue;
-
-		if (m.status & MCI_STATUS_MISCV)
-			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
-		if (m.status & MCI_STATUS_ADDRV)
-			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
-
-		if (!(flags & MCP_TIMESTAMP))
-			m.tsc = 0;
-		/*
-		 * Don't get the IP here because it's unlikely to
-		 * have anything to do with the actual error location.
-		 */
-		if (!(flags & MCP_DONTLOG)) {
-			mce_log(&m);
-			add_taint(TAINT_MACHINE_CHECK);
-		}
-
-		/*
-		 * Clear state for this bank.
-		 */
-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
-
-	/*
-	 * Don't clear MCG_STATUS here because it's only defined for
-	 * exceptions.
-	 */
-}
-
-/*
- * The actual machine check handler. This only handles real
- * exceptions when something got corrupted coming in through int 18.
- *
- * This is executed in NMI context not subject to normal locking rules. This
- * implies that most kernel services cannot be safely used. Don't even
- * think about putting a printk in there!
- */
-void do_machine_check(struct pt_regs *regs, long error_code)
-{
-	struct mce m, panicm;
-	int panicm_found = 0;
-	u64 mcestart = 0;
-	int i;
-	/*
-	 * If no_way_out gets set, there is no safe way to recover from this
-	 * MCE.  If tolerant is cranked up, we'll try anyway.
-	 */
-	int no_way_out = 0;
-	/*
-	 * If kill_it gets set, there might be a way to recover from this
-	 * error.
-	 */
-	int kill_it = 0;
-	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
-
-	atomic_inc(&mce_entry);
-
-	if (notify_die(DIE_NMI, "machine check", regs, error_code,
-			   18, SIGKILL) == NOTIFY_STOP)
-		goto out2;
-	if (!banks)
-		goto out2;
-
-	mce_setup(&m);
-
-	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
-
-	/* if the restart IP is not valid, we're done for */
-	if (!(m.mcgstatus & MCG_STATUS_RIPV))
-		no_way_out = 1;
-
-	rdtscll(mcestart);
-	barrier();
-
-	for (i = 0; i < banks; i++) {
-		__clear_bit(i, toclear);
-		if (!bank[i])
-			continue;
-
-		m.misc = 0;
-		m.addr = 0;
-		m.bank = i;
-
-		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
-		if ((m.status & MCI_STATUS_VAL) == 0)
-			continue;
-
-		/*
-		 * Non uncorrected errors are handled by machine_check_poll
-		 * Leave them alone.
-		 */
-		if ((m.status & MCI_STATUS_UC) == 0)
-			continue;
-
-		/*
-		 * Set taint even when machine check was not enabled.
-		 */
-		add_taint(TAINT_MACHINE_CHECK);
-
-		__set_bit(i, toclear);
-
-		if (m.status & MCI_STATUS_EN) {
-			/* if PCC was set, there's no way out */
-			no_way_out |= !!(m.status & MCI_STATUS_PCC);
-			/*
-			 * If this error was uncorrectable and there was
-			 * an overflow, we're in trouble.  If no overflow,
-			 * we might get away with just killing a task.
-			 */
-			if (m.status & MCI_STATUS_UC) {
-				if (tolerant < 1 || m.status & MCI_STATUS_OVER)
-					no_way_out = 1;
-				kill_it = 1;
-			}
-		} else {
-			/*
-			 * Machine check event was not enabled. Clear, but
-			 * ignore.
-			 */
-			continue;
-		}
-
-		if (m.status & MCI_STATUS_MISCV)
-			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
-		if (m.status & MCI_STATUS_ADDRV)
-			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
-
-		mce_get_rip(&m, regs);
-		mce_log(&m);
-
-		/*
-		 * Did this bank cause the exception?
-		 *
-		 * Assume that the bank with uncorrectable errors did it,
-		 * and that there is only a single one:
-		 */
-		if ((m.status & MCI_STATUS_UC) &&
-					(m.status & MCI_STATUS_EN)) {
-			panicm = m;
-			panicm_found = 1;
-		}
-	}
-
-	/*
-	 * If we didn't find an uncorrectable error, pick
-	 * the last one (shouldn't happen, just being safe).
-	 */
-	if (!panicm_found)
-		panicm = m;
-
-	/*
-	 * If we have decided that we just CAN'T continue, and the user
-	 * has not set tolerant to an insane level, give up and die.
-	 */
-	if (no_way_out && tolerant < 3)
-		mce_panic("Machine check", &panicm, mcestart);
-
-	/*
-	 * If the error seems to be unrecoverable, something should be
-	 * done.  Try to kill as little as possible.  If we can kill just
-	 * one task, do that.  If the user has set the tolerance very
-	 * high, don't try to do anything at all.
-	 */
-	if (kill_it && tolerant < 3) {
-		int user_space = 0;
-
-		/*
-		 * If the EIPV bit is set, it means the saved IP is the
-		 * instruction which caused the MCE.
-		 */
-		if (m.mcgstatus & MCG_STATUS_EIPV)
-			user_space = panicm.ip && (panicm.cs & 3);
-
-		/*
-		 * If we know that the error was in user space, send a
-		 * SIGBUS.  Otherwise, panic if tolerance is low.
-		 *
-		 * force_sig() takes an awful lot of locks and has a slight
-		 * risk of deadlocking.
-		 */
-		if (user_space) {
-			force_sig(SIGBUS, current);
-		} else if (panic_on_oops || tolerant < 2) {
-			mce_panic("Uncorrected machine check",
-				&panicm, mcestart);
-		}
-	}
-
-	/* notify userspace ASAP */
-	set_thread_flag(TIF_MCE_NOTIFY);
-
-	/* the last thing we do is clear state */
-	for (i = 0; i < banks; i++) {
-		if (test_bit(i, toclear))
-			wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
-	wrmsrl(MSR_IA32_MCG_STATUS, 0);
- out2:
-	atomic_dec(&mce_entry);
-}
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occurred.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(__u64 status)
-{
-	struct mce m;
-
-	mce_setup(&m);
-	m.bank = MCE_THERMAL_BANK;
-	m.status = status;
-	mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
-/*
- * Periodic polling timer for "silent" machine check errors.  If the
- * poller finds an MCE, poll 2x faster.  When the poller finds no more
- * errors, poll 2x slower (up to check_interval seconds).
- */
-static int check_interval = 5 * 60; /* 5 minutes */
-
-static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
-static DEFINE_PER_CPU(struct timer_list, mce_timer);
-
-static void mcheck_timer(unsigned long data)
-{
-	struct timer_list *t = &per_cpu(mce_timer, data);
-	int *n;
-
-	WARN_ON(smp_processor_id() != data);
-
-	if (mce_available(&current_cpu_data)) {
-		machine_check_poll(MCP_TIMESTAMP,
-				&__get_cpu_var(mce_poll_banks));
-	}
-
-	/*
-	 * Alert userspace if needed.  If we logged an MCE, reduce the
-	 * polling interval, otherwise increase the polling interval.
-	 */
-	n = &__get_cpu_var(next_interval);
-	if (mce_notify_user()) {
-		*n = max(*n/2, HZ/100);
-	} else {
-		*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
-	}
-
-	t->expires = jiffies + *n;
-	add_timer(t);
-}
-
-static void mce_do_trigger(struct work_struct *work)
-{
-	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
-}
-
-static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
-
-/*
- * Notify the user(s) about new machine check events.
- * Can be called from interrupt context, but not from machine check/NMI
- * context.
- */
-int mce_notify_user(void)
-{
-	/* Not more than two messages every minute */
-	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
-	clear_thread_flag(TIF_MCE_NOTIFY);
-
-	if (test_and_clear_bit(0, &notify_user)) {
-		wake_up_interruptible(&mce_wait);
-
-		/*
-		 * There is no risk of missing notifications because
-		 * work_pending is always cleared before the function is
-		 * executed.
-		 */
-		if (trigger[0] && !work_pending(&mce_trigger_work))
-			schedule_work(&mce_trigger_work);
-
-		if (__ratelimit(&ratelimit))
-			printk(KERN_INFO "Machine check events logged\n");
-
-		return 1;
-	}
-	return 0;
-}
-
-/* see if the idle task needs to notify userspace: */
-static int
-mce_idle_callback(struct notifier_block *nfb, unsigned long action,
-		  void *unused)
-{
-	/* IDLE_END should be safe - interrupts are back on */
-	if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
-		mce_notify_user();
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block mce_idle_notifier = {
-	.notifier_call		= mce_idle_callback,
-};
-
-static __init int periodic_mcheck_init(void)
-{
-       idle_notifier_register(&mce_idle_notifier);
-       return 0;
-}
-__initcall(periodic_mcheck_init);
-
-/*
- * Initialize Machine Checks for a CPU.
- */
-static int mce_cap_init(void)
-{
-	unsigned b;
-	u64 cap;
-
-	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	b = cap & 0xff;
-	if (b > MAX_NR_BANKS) {
-		printk(KERN_WARNING
-		       "MCE: Using only %u machine check banks out of %u\n",
-			MAX_NR_BANKS, b);
-		b = MAX_NR_BANKS;
-	}
-
-	/* Don't support asymmetric configurations today */
-	WARN_ON(banks != 0 && b != banks);
-	banks = b;
-	if (!bank) {
-		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
-		if (!bank)
-			return -ENOMEM;
-		memset(bank, 0xff, banks * sizeof(u64));
-	}
-
-	/* Use accurate RIP reporting if available. */
-	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
-		rip_msr = MSR_IA32_MCG_EIP;
-
-	return 0;
-}
-
-static void mce_init(void *dummy)
-{
-	mce_banks_t all_banks;
-	u64 cap;
-	int i;
-
-	/*
-	 * Log the machine checks left over from the previous reset.
-	 */
-	bitmap_fill(all_banks, MAX_NR_BANKS);
-	machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
-
-	set_in_cr4(X86_CR4_MCE);
-
-	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	if (cap & MCG_CTL_P)
-		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-
-	for (i = 0; i < banks; i++) {
-		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
-}
-
-/* Add per CPU specific workarounds here */
-static void mce_cpu_quirks(struct cpuinfo_x86 *c)
-{
-	/* This should be disabled by the BIOS, but isn't always */
-	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if (c->x86 == 15 && banks > 4) {
-			/*
-			 * disable GART TBL walk error reporting, which
-			 * trips off incorrectly with the IOMMU & 3ware
-			 * & Cerberus:
-			 */
-			clear_bit(10, (unsigned long *)&bank[4]);
-		}
-		if (c->x86 <= 17 && mce_bootlog < 0) {
-			/*
-			 * Lots of broken BIOS around that don't clear them
-			 * by default and leave crap in there. Don't log:
-			 */
-			mce_bootlog = 0;
-		}
-	}
-
-}
-
-static void mce_cpu_features(struct cpuinfo_x86 *c)
-{
-	switch (c->x86_vendor) {
-	case X86_VENDOR_INTEL:
-		mce_intel_feature_init(c);
-		break;
-	case X86_VENDOR_AMD:
-		mce_amd_feature_init(c);
-		break;
-	default:
-		break;
-	}
-}
-
-static void mce_init_timer(void)
-{
-	struct timer_list *t = &__get_cpu_var(mce_timer);
-	int *n = &__get_cpu_var(next_interval);
-
-	*n = check_interval * HZ;
-	if (!*n)
-		return;
-	setup_timer(t, mcheck_timer, smp_processor_id());
-	t->expires = round_jiffies(jiffies + *n);
-	add_timer(t);
-}
-
-/*
- * Called for each booted CPU to set up machine checks.
- * Must be called with preempt off:
- */
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
-{
-	if (!mce_available(c))
-		return;
-
-	if (mce_cap_init() < 0) {
-		mce_dont_init = 1;
-		return;
-	}
-	mce_cpu_quirks(c);
-
-	mce_init(NULL);
-	mce_cpu_features(c);
-	mce_init_timer();
-}
-
-/*
- * Character device to read and clear the MCE log.
- */
-
-static DEFINE_SPINLOCK(mce_state_lock);
-static int		open_count;		/* #times opened */
-static int		open_exclu;		/* already open exclusive? */
-
-static int mce_open(struct inode *inode, struct file *file)
-{
-	lock_kernel();
-	spin_lock(&mce_state_lock);
-
-	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
-		spin_unlock(&mce_state_lock);
-		unlock_kernel();
-
-		return -EBUSY;
-	}
-
-	if (file->f_flags & O_EXCL)
-		open_exclu = 1;
-	open_count++;
-
-	spin_unlock(&mce_state_lock);
-	unlock_kernel();
-
-	return nonseekable_open(inode, file);
-}
-
-static int mce_release(struct inode *inode, struct file *file)
-{
-	spin_lock(&mce_state_lock);
-
-	open_count--;
-	open_exclu = 0;
-
-	spin_unlock(&mce_state_lock);
-
-	return 0;
-}
-
-static void collect_tscs(void *data)
-{
-	unsigned long *cpu_tsc = (unsigned long *)data;
-
-	rdtscll(cpu_tsc[smp_processor_id()]);
-}
-
-static DEFINE_MUTEX(mce_read_mutex);
-
-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
-			loff_t *off)
-{
-	char __user *buf = ubuf;
-	unsigned long *cpu_tsc;
-	unsigned prev, next;
-	int i, err;
-
-	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
-	if (!cpu_tsc)
-		return -ENOMEM;
-
-	mutex_lock(&mce_read_mutex);
-	next = rcu_dereference(mcelog.next);
-
-	/* Only supports full reads right now */
-	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
-		mutex_unlock(&mce_read_mutex);
-		kfree(cpu_tsc);
-
-		return -EINVAL;
-	}
-
-	err = 0;
-	prev = 0;
-	do {
-		for (i = prev; i < next; i++) {
-			unsigned long start = jiffies;
-
-			while (!mcelog.entry[i].finished) {
-				if (time_after_eq(jiffies, start + 2)) {
-					memset(mcelog.entry + i, 0,
-					       sizeof(struct mce));
-					goto timeout;
-				}
-				cpu_relax();
-			}
-			smp_rmb();
-			err |= copy_to_user(buf, mcelog.entry + i,
-					    sizeof(struct mce));
-			buf += sizeof(struct mce);
-timeout:
-			;
-		}
-
-		memset(mcelog.entry + prev, 0,
-		       (next - prev) * sizeof(struct mce));
-		prev = next;
-		next = cmpxchg(&mcelog.next, prev, 0);
-	} while (next != prev);
-
-	synchronize_sched();
-
-	/*
-	 * Collect entries that were still getting written before the
-	 * synchronize.
-	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1);
-
-	for (i = next; i < MCE_LOG_LEN; i++) {
-		if (mcelog.entry[i].finished &&
-		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
-			err |= copy_to_user(buf, mcelog.entry+i,
-					    sizeof(struct mce));
-			smp_rmb();
-			buf += sizeof(struct mce);
-			memset(&mcelog.entry[i], 0, sizeof(struct mce));
-		}
-	}
-	mutex_unlock(&mce_read_mutex);
-	kfree(cpu_tsc);
-
-	return err ? -EFAULT : buf - ubuf;
-}
-
-static unsigned int mce_poll(struct file *file, poll_table *wait)
-{
-	poll_wait(file, &mce_wait, wait);
-	if (rcu_dereference(mcelog.next))
-		return POLLIN | POLLRDNORM;
-	return 0;
-}
-
-static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
-	int __user *p = (int __user *)arg;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	switch (cmd) {
-	case MCE_GET_RECORD_LEN:
-		return put_user(sizeof(struct mce), p);
-	case MCE_GET_LOG_LEN:
-		return put_user(MCE_LOG_LEN, p);
-	case MCE_GETCLEAR_FLAGS: {
-		unsigned flags;
-
-		do {
-			flags = mcelog.flags;
-		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
-
-		return put_user(flags, p);
-	}
-	default:
-		return -ENOTTY;
-	}
-}
-
-static const struct file_operations mce_chrdev_ops = {
-	.open			= mce_open,
-	.release		= mce_release,
-	.read			= mce_read,
-	.poll			= mce_poll,
-	.unlocked_ioctl		= mce_ioctl,
-};
-
-static struct miscdevice mce_log_device = {
-	MISC_MCELOG_MINOR,
-	"mcelog",
-	&mce_chrdev_ops,
-};
-
-/*
- * Old style boot options parsing. Only for compatibility.
- */
-static int __init mcheck_disable(char *str)
-{
-	mce_dont_init = 1;
-	return 1;
-}
-__setup("nomce", mcheck_disable);
-
-/*
- * mce=off disables machine check
- * mce=TOLERANCELEVEL (number, see above)
- * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
- * mce=nobootlog Don't log MCEs from before booting.
- */
-static int __init mcheck_enable(char *str)
-{
-	if (!strcmp(str, "off"))
-		mce_dont_init = 1;
-	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
-		mce_bootlog = (str[0] == 'b');
-	else if (isdigit(str[0]))
-		get_option(&str, &tolerant);
-	else {
-		printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n",
-		       str);
-		return 0;
-	}
-	return 1;
-}
-__setup("mce=", mcheck_enable);
-
-/*
- * Sysfs support
- */
-
-/*
- * Disable machine checks on suspend and shutdown. We can't really handle
- * them later.
- */
-static int mce_disable(void)
-{
-	int i;
-
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
-	return 0;
-}
-
-static int mce_suspend(struct sys_device *dev, pm_message_t state)
-{
-	return mce_disable();
-}
-
-static int mce_shutdown(struct sys_device *dev)
-{
-	return mce_disable();
-}
-
-/*
- * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
- * Only one CPU is active at this time, the others get re-added later using
- * CPU hotplug:
- */
-static int mce_resume(struct sys_device *dev)
-{
-	mce_init(NULL);
-	mce_cpu_features(&current_cpu_data);
-
-	return 0;
-}
-
-static void mce_cpu_restart(void *data)
-{
-	del_timer_sync(&__get_cpu_var(mce_timer));
-	if (mce_available(&current_cpu_data))
-		mce_init(NULL);
-	mce_init_timer();
-}
-
-/* Reinit MCEs after user configuration changes */
-static void mce_restart(void)
-{
-	on_each_cpu(mce_cpu_restart, NULL, 1);
-}
-
-static struct sysdev_class mce_sysclass = {
-	.suspend	= mce_suspend,
-	.shutdown	= mce_shutdown,
-	.resume		= mce_resume,
-	.name		= "machinecheck",
-};
-
-DEFINE_PER_CPU(struct sys_device, device_mce);
-
-__cpuinitdata
-void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
-
-/* Why are there no generic functions for this? */
-#define ACCESSOR(name, var, start) \
-	static ssize_t show_ ## name(struct sys_device *s,		\
-				     struct sysdev_attribute *attr,	\
-				     char *buf) {			\
-		return sprintf(buf, "%lx\n", (unsigned long)var);	\
-	}								\
-	static ssize_t set_ ## name(struct sys_device *s,		\
-				    struct sysdev_attribute *attr,	\
-				    const char *buf, size_t siz) {	\
-		char *end;						\
-		unsigned long new = simple_strtoul(buf, &end, 0);	\
-									\
-		if (end == buf)						\
-			return -EINVAL;					\
-		var = new;						\
-		start;							\
-									\
-		return end-buf;						\
-	}								\
-	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-
-static struct sysdev_attribute *bank_attrs;
-
-static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
-			 char *buf)
-{
-	u64 b = bank[attr - bank_attrs];
-
-	return sprintf(buf, "%llx\n", b);
-}
-
-static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
-			const char *buf, size_t siz)
-{
-	char *end;
-	u64 new = simple_strtoull(buf, &end, 0);
-
-	if (end == buf)
-		return -EINVAL;
-
-	bank[attr - bank_attrs] = new;
-	mce_restart();
-
-	return end-buf;
-}
-
-static ssize_t
-show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
-{
-	strcpy(buf, trigger);
-	strcat(buf, "\n");
-	return strlen(trigger) + 1;
-}
-
-static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
-				const char *buf, size_t siz)
-{
-	char *p;
-	int len;
-
-	strncpy(trigger, buf, sizeof(trigger));
-	trigger[sizeof(trigger)-1] = 0;
-	len = strlen(trigger);
-	p = strchr(trigger, '\n');
-
-	if (*p)
-		*p = 0;
-
-	return len;
-}
-
-static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
-
-ACCESSOR(check_interval, check_interval, mce_restart())
-
-static struct sysdev_attribute *mce_attributes[] = {
-	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
-	NULL
-};
-
-static cpumask_var_t mce_device_initialized;
-
-/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
-static __cpuinit int mce_create_device(unsigned int cpu)
-{
-	int err;
-	int i;
-
-	if (!mce_available(&boot_cpu_data))
-		return -EIO;
-
-	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
-	per_cpu(device_mce, cpu).id	= cpu;
-	per_cpu(device_mce, cpu).cls	= &mce_sysclass;
-
-	err = sysdev_register(&per_cpu(device_mce, cpu));
-	if (err)
-		return err;
-
-	for (i = 0; mce_attributes[i]; i++) {
-		err = sysdev_create_file(&per_cpu(device_mce, cpu),
-					 mce_attributes[i]);
-		if (err)
-			goto error;
-	}
-	for (i = 0; i < banks; i++) {
-		err = sysdev_create_file(&per_cpu(device_mce, cpu),
-					&bank_attrs[i]);
-		if (err)
-			goto error2;
-	}
-	cpumask_set_cpu(cpu, mce_device_initialized);
-
-	return 0;
-error2:
-	while (--i >= 0) {
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-					&bank_attrs[i]);
-	}
-error:
-	while (--i >= 0) {
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-				   mce_attributes[i]);
-	}
-	sysdev_unregister(&per_cpu(device_mce, cpu));
-
-	return err;
-}
-
-static __cpuinit void mce_remove_device(unsigned int cpu)
-{
-	int i;
-
-	if (!cpumask_test_cpu(cpu, mce_device_initialized))
-		return;
-
-	for (i = 0; mce_attributes[i]; i++)
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-			mce_attributes[i]);
-	for (i = 0; i < banks; i++)
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-			&bank_attrs[i]);
-	sysdev_unregister(&per_cpu(device_mce, cpu));
-	cpumask_clear_cpu(cpu, mce_device_initialized);
-}
-
-/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
-{
-	int i;
-	unsigned long action = *(unsigned long *)h;
-
-	if (!mce_available(&current_cpu_data))
-		return;
-	if (!(action & CPU_TASKS_FROZEN))
-		cmci_clear();
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
-}
-
-static void mce_reenable_cpu(void *h)
-{
-	unsigned long action = *(unsigned long *)h;
-	int i;
-
-	if (!mce_available(&current_cpu_data))
-		return;
-
-	if (!(action & CPU_TASKS_FROZEN))
-		cmci_reenable();
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
-}
-
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int __cpuinit
-mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		mce_create_device(cpu);
-		if (threshold_cpu_callback)
-			threshold_cpu_callback(action, cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		if (threshold_cpu_callback)
-			threshold_cpu_callback(action, cpu);
-		mce_remove_device(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		del_timer_sync(t);
-		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
-		break;
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		t->expires = round_jiffies(jiffies +
-						__get_cpu_var(next_interval));
-		add_timer_on(t, cpu);
-		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
-		break;
-	case CPU_POST_DEAD:
-		/* intentionally ignoring frozen here */
-		cmci_rediscover(cpu);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block mce_cpu_notifier __cpuinitdata = {
-	.notifier_call = mce_cpu_callback,
-};
-
-static __init int mce_init_banks(void)
-{
-	int i;
-
-	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
-				GFP_KERNEL);
-	if (!bank_attrs)
-		return -ENOMEM;
-
-	for (i = 0; i < banks; i++) {
-		struct sysdev_attribute *a = &bank_attrs[i];
-
-		a->attr.name	= kasprintf(GFP_KERNEL, "bank%d", i);
-		if (!a->attr.name)
-			goto nomem;
-
-		a->attr.mode	= 0644;
-		a->show		= show_bank;
-		a->store	= set_bank;
-	}
-	return 0;
-
-nomem:
-	while (--i >= 0)
-		kfree(bank_attrs[i].attr.name);
-	kfree(bank_attrs);
-	bank_attrs = NULL;
-
-	return -ENOMEM;
-}
-
-static __init int mce_init_device(void)
-{
-	int err;
-	int i = 0;
-
-	if (!mce_available(&boot_cpu_data))
-		return -EIO;
-
-	alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
-
-	err = mce_init_banks();
-	if (err)
-		return err;
-
-	err = sysdev_class_register(&mce_sysclass);
-	if (err)
-		return err;
-
-	for_each_online_cpu(i) {
-		err = mce_create_device(i);
-		if (err)
-			return err;
-	}
-
-	register_hotcpu_notifier(&mce_cpu_notifier);
-	misc_register(&mce_log_device);
-
-	return err;
-}
-
-device_initcall(mce_init_device);
-
-#else /* CONFIG_X86_32: */
-
-int mce_disabled;
-
-int nr_mce_banks;
-EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
-
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
-	       smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
-						unexpected_machine_check;
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
-	if (mce_disabled == 1)
-		return;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_INTEL:
-		if (c->x86 == 5)
-			intel_p5_mcheck_init(c);
-		if (c->x86 == 6)
-			intel_p6_mcheck_init(c);
-		if (c->x86 == 15)
-			intel_p4_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_CENTAUR:
-		if (c->x86 == 5)
-			winchip_mcheck_init(c);
-		break;
-
-	default:
-		break;
-	}
-}
-
-static int __init mcheck_disable(char *str)
-{
-	mce_disabled = 1;
-	return 1;
-}
-
-static int __init mcheck_enable(char *str)
-{
-	mce_disabled = -1;
-	return 1;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
-
-#endif /* CONFIG_X86_32 */
-- 
cgit v0.10.2


From cb491fca55e5282f0a95ef39c55352e00d6ca75e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:17 +0200
Subject: x86, mce: Rename sysfs variables

Shorten variable names. This also compacts the code a bit.

	device_mce		=> mce_dev
	mce_device_initialized	=> mce_dev_initialized
	mce_attribute		=> mce_attrs

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8488210..b9972a6 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -90,7 +90,7 @@ extern int mce_disabled;
 
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, device_mce);
+DECLARE_PER_CPU(struct sys_device, mce_dev);
 extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2e2c3d2..ba8dd41 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -957,7 +957,7 @@ static struct sysdev_class mce_sysclass = {
 	.name		= "machinecheck",
 };
 
-DEFINE_PER_CPU(struct sys_device, device_mce);
+DEFINE_PER_CPU(struct sys_device, mce_dev);
 
 __cpuinitdata
 void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -1039,12 +1039,12 @@ static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
 
 ACCESSOR(check_interval, check_interval, mce_restart())
 
-static struct sysdev_attribute *mce_attributes[] = {
+static struct sysdev_attribute *mce_attrs[] = {
 	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
 	NULL
 };
 
-static cpumask_var_t mce_device_initialized;
+static cpumask_var_t mce_dev_initialized;
 
 /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
 static __cpuinit int mce_create_device(unsigned int cpu)
@@ -1055,40 +1055,36 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
-	per_cpu(device_mce, cpu).id	= cpu;
-	per_cpu(device_mce, cpu).cls	= &mce_sysclass;
+	memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
+	per_cpu(mce_dev, cpu).id	= cpu;
+	per_cpu(mce_dev, cpu).cls	= &mce_sysclass;
 
-	err = sysdev_register(&per_cpu(device_mce, cpu));
+	err = sysdev_register(&per_cpu(mce_dev, cpu));
 	if (err)
 		return err;
 
-	for (i = 0; mce_attributes[i]; i++) {
-		err = sysdev_create_file(&per_cpu(device_mce, cpu),
-					 mce_attributes[i]);
+	for (i = 0; mce_attrs[i]; i++) {
+		err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
 		if (err)
 			goto error;
 	}
 	for (i = 0; i < banks; i++) {
-		err = sysdev_create_file(&per_cpu(device_mce, cpu),
+		err = sysdev_create_file(&per_cpu(mce_dev, cpu),
 					&bank_attrs[i]);
 		if (err)
 			goto error2;
 	}
-	cpumask_set_cpu(cpu, mce_device_initialized);
+	cpumask_set_cpu(cpu, mce_dev_initialized);
 
 	return 0;
 error2:
-	while (--i >= 0) {
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-					&bank_attrs[i]);
-	}
+	while (--i >= 0)
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
 error:
-	while (--i >= 0) {
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-				   mce_attributes[i]);
-	}
-	sysdev_unregister(&per_cpu(device_mce, cpu));
+	while (--i >= 0)
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+
+	sysdev_unregister(&per_cpu(mce_dev, cpu));
 
 	return err;
 }
@@ -1097,24 +1093,24 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
 {
 	int i;
 
-	if (!cpumask_test_cpu(cpu, mce_device_initialized))
+	if (!cpumask_test_cpu(cpu, mce_dev_initialized))
 		return;
 
-	for (i = 0; mce_attributes[i]; i++)
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-			mce_attributes[i]);
+	for (i = 0; mce_attrs[i]; i++)
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+
 	for (i = 0; i < banks; i++)
-		sysdev_remove_file(&per_cpu(device_mce, cpu),
-			&bank_attrs[i]);
-	sysdev_unregister(&per_cpu(device_mce, cpu));
-	cpumask_clear_cpu(cpu, mce_device_initialized);
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
+
+	sysdev_unregister(&per_cpu(mce_dev, cpu));
+	cpumask_clear_cpu(cpu, mce_dev_initialized);
 }
 
 /* Make sure there are no machine checks on offlined CPUs. */
 static void mce_disable_cpu(void *h)
 {
-	int i;
 	unsigned long action = *(unsigned long *)h;
+	int i;
 
 	if (!mce_available(&current_cpu_data))
 		return;
@@ -1221,7 +1217,7 @@ static __init int mce_init_device(void)
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+	alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
 
 	err = mce_init_banks();
 	if (err)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 4d90ec3..083f270 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -517,7 +517,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (!b)
 			goto out;
 
-		err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
+		err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
@@ -540,7 +540,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		goto out;
 	}
 
-	b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
+	b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
 	if (!b->kobj)
 		goto out_free;
 
@@ -560,7 +560,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (i == cpu)
 			continue;
 
-		err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
+		err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
@@ -638,7 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 #ifdef CONFIG_SMP
 	/* sibling symlink */
 	if (shared_bank[bank] && b->blocks->cpu != cpu) {
-		sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
+		sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
 		per_cpu(threshold_banks, cpu)[bank] = NULL;
 
 		return;
@@ -650,7 +650,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 		if (i == cpu)
 			continue;
 
-		sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
+		sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
 		per_cpu(threshold_banks, i)[bank] = NULL;
 	}
 
-- 
cgit v0.10.2


From b659294b779565c60f5e12ef505328e2b974eb62 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 12:31:27 +0200
Subject: x86, mce: print number of MCE banks

The number of MCE banks supported by a CPU is a useful number to know,
so print it out during CPU initialization.

[ Impact: add printout ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ba8dd41..49c7422 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -570,6 +570,8 @@ static int mce_cap_init(void)
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	b = cap & 0xff;
+	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+
 	if (b > MAX_NR_BANKS) {
 		printk(KERN_WARNING
 		       "MCE: Using only %u machine check banks out of %u\n",
@@ -1287,6 +1289,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
 	default:
 		break;
 	}
+	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
 }
 
 static int __init mcheck_disable(char *str)
-- 
cgit v0.10.2


From ba2d0f2b0c56d7174a0208f7c463271f39040728 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Apr 2009 12:31:24 +0200
Subject: x86, mce: Cleanup symbols in intel thermal codes

Decode magic constants and turn them into symbols.

[ Cleanup to use symbols already exists - HS ]

[ Impact: cleanup ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc1..c864046 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -208,7 +208,14 @@
 
 #define MSR_IA32_THERM_CONTROL		0x0000019a
 #define MSR_IA32_THERM_INTERRUPT	0x0000019b
+
+#define THERM_INT_LOW_ENABLE		(1 << 0)
+#define THERM_INT_HIGH_ENABLE		(1 << 1)
+
 #define MSR_IA32_THERM_STATUS		0x0000019c
+
+#define THERM_STATUS_PROCHOT		(1 << 0)
+
 #define MSR_IA32_MISC_ENABLE		0x000001a0
 
 /* MISC_ENABLE bits: architectural */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index bad3cbb..2b011d2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -32,13 +32,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 	h = apic_read(APIC_LVTTHMR);
-	if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
 		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
 		return;
 	}
 
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
 		tm2 = 1;
 
 	/* Check whether a vector already exists */
@@ -54,12 +54,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	apic_write(APIC_LVTTHMR, h);
 
 	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT,
+		l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
 
 	intel_set_thermal_handler();
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
 
 	/* Unmask the thermal vector: */
 	l = apic_read(APIC_LVTTHMR);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 38f9632..13abafc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -29,7 +29,7 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_enter();
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-	if (therm_throt_process(msr_val & 1))
+	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
 		mce_log_therm_throt_event(msr_val);
 
 	inc_irq_stat(irq_thermal_count);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index f979ffe..82cee10 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -51,7 +51,7 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-	therm_throt_process(msr_val & 0x1);
+	therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
 }
 
 /* Thermal interrupt handler for this CPU setup: */
-- 
cgit v0.10.2


From 01c6680a547a3ee8dd170c269ea8e037b3191b71 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 8 Apr 2009 12:31:24 +0200
Subject: x86, mce: Cleanup MCG definitions

Decode more magic constants and turn them into symbols.

[ Sort definitions bitwise, introduce MCG_EXT_CNT - HS ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b9972a6..94aedaf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -8,9 +8,13 @@
  * Machine Check support for x86
  */
 
-#define MCG_CTL_P	 (1ULL<<8)   /* MCG_CAP register available */
-#define MCG_EXT_P	 (1ULL<<9)   /* Extended registers available */
-#define MCG_CMCI_P	 (1ULL<<10)  /* CMCI supported */
+#define MCG_BANKCNT_MASK	0xff         /* Number of Banks */
+#define MCG_CTL_P		(1ULL<<8)    /* MCG_CAP register available */
+#define MCG_EXT_P		(1ULL<<9)    /* Extended registers available */
+#define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
+#define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
+#define MCG_EXT_CNT_SHIFT	16
+#define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
 
 #define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
 #define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 49c7422..1473336 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -569,7 +569,8 @@ static int mce_cap_init(void)
 	u64 cap;
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	b = cap & 0xff;
+
+	b = cap & MCG_BANKCNT_MASK;
 	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
 
 	if (b > MAX_NR_BANKS) {
@@ -590,7 +591,7 @@ static int mce_cap_init(void)
 	}
 
 	/* Use accurate RIP reporting if available. */
-	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
+	if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
 		rip_msr = MSR_IA32_MCG_EIP;
 
 	return 0;
-- 
cgit v0.10.2


From 3cde5c8c839bf46a7be799ed0e1d0b4780aaf794 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 27 Apr 2009 18:01:31 +0200
Subject: x86, mce: initial steps to make 64bit mce code 32bit clean

Replace unsigned long with u64s if they need to contain 64bit values.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1473336..cd1313b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -156,15 +156,15 @@ static void print_mce(struct mce *m)
 	       "and contact your hardware vendor\n");
 }
 
-static void mce_panic(char *msg, struct mce *backup, unsigned long start)
+static void mce_panic(char *msg, struct mce *backup, u64 start)
 {
 	int i;
 
 	oops_begin();
 	for (i = 0; i < MCE_LOG_LEN; i++) {
-		unsigned long tsc = mcelog.entry[i].tsc;
+		u64 tsc = mcelog.entry[i].tsc;
 
-		if (time_before(tsc, start))
+		if ((s64)(tsc - start) < 0)
 			continue;
 		print_mce(&mcelog.entry[i]);
 		if (backup && mcelog.entry[i].tsc == backup->tsc)
@@ -970,13 +970,13 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 	static ssize_t show_ ## name(struct sys_device *s,		\
 				     struct sysdev_attribute *attr,	\
 				     char *buf) {			\
-		return sprintf(buf, "%lx\n", (unsigned long)var);	\
+		return sprintf(buf, "%Lx\n", (u64)var);			\
 	}								\
 	static ssize_t set_ ## name(struct sys_device *s,		\
 				    struct sysdev_attribute *attr,	\
 				    const char *buf, size_t siz) {	\
 		char *end;						\
-		unsigned long new = simple_strtoul(buf, &end, 0);	\
+		u64 new = simple_strtoull(buf, &end, 0);		\
 									\
 		if (end == buf)						\
 			return -EINVAL;					\
-- 
cgit v0.10.2


From 06b7a7a5ec917761969444fee967c43868a76468 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 27 Apr 2009 18:37:43 +0200
Subject: x86, mce: implement the PPro bank 0 quirk in the 64bit machine check
 code

Quoting the comment:

* SDM documents that on family 6 bank 0 should not be written
* because it aliases to another special BIOS controlled
* register.
* But it's not aliased anymore on model 0x1a+
* Don't ignore bank 0 completely because there could be a valid
* event later, merely don't write CTL0.

This is mostly a port on the 32bit code, except that 32bit
always didn't write it and didn't have the 0x1a heuristic. I checked
with the CPU designers that the quirk is not required starting with
this model.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index cd1313b..1dcd3be 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -65,6 +65,8 @@ static atomic_t			mce_events;
 static char			trigger[128];
 static char			*trigger_argv[2] = { trigger, NULL };
 
+static unsigned long		dont_init_banks;
+
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 
 /* MCA banks polled by the period polling timer for corrected events */
@@ -72,6 +74,11 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
 };
 
+static inline int skip_bank_init(int i)
+{
+	return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
+}
+
 /* Do initial initialization of a struct mce */
 void mce_setup(struct mce *m)
 {
@@ -616,6 +623,8 @@ static void mce_init(void *dummy)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 
 	for (i = 0; i < banks; i++) {
+		if (skip_bank_init(i))
+			continue;
 		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
 		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
@@ -643,6 +652,19 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 		}
 	}
 
+	if (c->x86_vendor == X86_VENDOR_INTEL) {
+		/*
+		 * SDM documents that on family 6 bank 0 should not be written
+		 * because it aliases to another special BIOS controlled
+		 * register.
+		 * But it's not aliased anymore on model 0x1a+
+		 * Don't ignore bank 0 completely because there could be a
+		 * valid event later, merely don't write CTL0.
+		 */
+
+		if (c->x86 == 6 && c->x86_model < 0x1A)
+			__set_bit(0, &dont_init_banks);
+	}
 }
 
 static void mce_cpu_features(struct cpuinfo_x86 *c)
@@ -911,8 +933,10 @@ static int mce_disable(void)
 {
 	int i;
 
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	for (i = 0; i < banks; i++) {
+		if (!skip_bank_init(i))
+			wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	}
 	return 0;
 }
 
@@ -1119,8 +1143,10 @@ static void mce_disable_cpu(void *h)
 		return;
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_clear();
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	for (i = 0; i < banks; i++) {
+		if (!skip_bank_init(i))
+			wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+	}
 }
 
 static void mce_reenable_cpu(void *h)
@@ -1133,8 +1159,10 @@ static void mce_reenable_cpu(void *h)
 
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_reenable();
-	for (i = 0; i < banks; i++)
-		wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+	for (i = 0; i < banks; i++) {
+		if (!skip_bank_init(i))
+			wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+	}
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-- 
cgit v0.10.2


From 2e6f694fde0a7158590e121962ca2e3c06633528 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 27 Apr 2009 18:42:48 +0200
Subject: x86, mce: port K7 bank 0 quirk to 64bit mce code

Various K7 have broken bank 0s. Don't enable it by default

Port from the 32bit code.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1dcd3be..1336280 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -650,6 +650,12 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 			 */
 			mce_bootlog = 0;
 		}
+		/*
+		 * Various K7s with broken bank 0 around. Always disable
+		 * by default.
+		 */
+		 if (c->x86 == 6)
+			bank[0] = 0;
 	}
 
 	if (c->x86_vendor == X86_VENDOR_INTEL) {
-- 
cgit v0.10.2


From 5d7279268b654d1f8ac43b0eb6cd9598d9cf55fd Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 27 Apr 2009 19:25:48 +0200
Subject: x86, mce: use a call vector to call the 64bit mce handler

Allows to call different machine check handlers from the low
level machine check entry vector.

This is needed for later when it will be used for 32bit too.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1336280..d99318b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -39,6 +39,16 @@
 
 #include "mce.h"
 
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	       smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+						unexpected_machine_check;
 #ifdef CONFIG_X86_64
 
 #define MISC_MCELOG_MINOR	227
@@ -715,6 +725,8 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 	}
 	mce_cpu_quirks(c);
 
+	machine_check_vector = do_machine_check;
+
 	mce_init(NULL);
 	mce_cpu_features(c);
 	mce_init_timer();
@@ -1285,17 +1297,6 @@ int mce_disabled;
 int nr_mce_banks;
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
-	       smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
-						unexpected_machine_check;
-
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index cd6cffc..966ae3c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -7,11 +7,12 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
-#ifdef CONFIG_X86_32
 
 /* Call the installed machine check handler for this CPU setup. */
 extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
+#ifdef CONFIG_X86_32
+
 extern int nr_mce_banks;
 
 void intel_set_thermal_handler(void);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6..63276c4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1382,7 +1382,7 @@ paranoiderrorentry stack_segment do_stack_segment
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
+paranoidzeroentry machine_check *machine_check_vector(%rip)
 #endif
 
 	/*
-- 
cgit v0.10.2


From 04b2b1a4df6cd0fdaa598f3c623a19c2d93cb48a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 22:50:19 +0200
Subject: x86, mce: rename 64bit mce_dont_init to mce_disabled

Give it the same name as on 32bit. This makes further merging easier.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 94aedaf..c3c7ee7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -86,9 +86,7 @@ struct mce_log {
 
 #ifdef __KERNEL__
 
-#ifdef CONFIG_X86_32
 extern int mce_disabled;
-#endif
 
 #include <asm/atomic.h>
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d99318b..6ab4770 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -49,14 +49,15 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 /* Call the installed machine check handler for this CPU setup. */
 void (*machine_check_vector)(struct pt_regs *, long error_code) =
 						unexpected_machine_check;
+
+int				mce_disabled;
+
 #ifdef CONFIG_X86_64
 
 #define MISC_MCELOG_MINOR	227
 
 atomic_t mce_entry;
 
-static int			mce_dont_init;
-
 /*
  * Tolerant levels:
  *   0: always panic on uncorrected errors, log corrected errors
@@ -194,7 +195,7 @@ static void mce_panic(char *msg, struct mce *backup, u64 start)
 
 int mce_available(struct cpuinfo_x86 *c)
 {
-	if (mce_dont_init)
+	if (mce_disabled)
 		return 0;
 	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
 }
@@ -720,7 +721,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 		return;
 
 	if (mce_cap_init() < 0) {
-		mce_dont_init = 1;
+		mce_disabled = 1;
 		return;
 	}
 	mce_cpu_quirks(c);
@@ -911,7 +912,7 @@ static struct miscdevice mce_log_device = {
  */
 static int __init mcheck_disable(char *str)
 {
-	mce_dont_init = 1;
+	mce_disabled = 1;
 	return 1;
 }
 __setup("nomce", mcheck_disable);
@@ -925,7 +926,7 @@ __setup("nomce", mcheck_disable);
 static int __init mcheck_enable(char *str)
 {
 	if (!strcmp(str, "off"))
-		mce_dont_init = 1;
+		mce_disabled = 1;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		mce_bootlog = (str[0] == 'b');
 	else if (isdigit(str[0]))
@@ -1292,8 +1293,6 @@ device_initcall(mce_init_device);
 
 #else /* CONFIG_X86_32: */
 
-int mce_disabled;
-
 int nr_mce_banks;
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
-- 
cgit v0.10.2


From d7c3c9a609563868d8a70e220399d06a25aba095 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 23:07:25 +0200
Subject: x86, mce: move mce_disabled option into common 32bit/64bit code

It's the same function, so let's share it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6ab4770..5395200 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -908,16 +908,6 @@ static struct miscdevice mce_log_device = {
 };
 
 /*
- * Old style boot options parsing. Only for compatibility.
- */
-static int __init mcheck_disable(char *str)
-{
-	mce_disabled = 1;
-	return 1;
-}
-__setup("nomce", mcheck_disable);
-
-/*
  * mce=off disables machine check
  * mce=TOLERANCELEVEL (number, see above)
  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
@@ -1327,19 +1317,22 @@ void mcheck_init(struct cpuinfo_x86 *c)
 	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
 }
 
-static int __init mcheck_disable(char *str)
-{
-	mce_disabled = 1;
-	return 1;
-}
-
 static int __init mcheck_enable(char *str)
 {
 	mce_disabled = -1;
 	return 1;
 }
 
-__setup("nomce", mcheck_disable);
 __setup("mce", mcheck_enable);
 
-#endif /* CONFIG_X86_32 */
+#endif /* CONFIG_X86_OLD_MCE */
+
+/*
+ * Old style boot options parsing. Only for compatibility.
+ */
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 1;
+}
+__setup("nomce", mcheck_disable);
-- 
cgit v0.10.2


From 8e97aef5f43ec715f394bc15015ff263b80c3ad6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 14:23:18 +0200
Subject: x86, mce: remove machine check handler idle notify on 64bit

i386 has no idle notifiers, but the 64bit machine check
code uses them to wake up mcelog from a fatal machine check
exception.

For corrected machine checks found by the poller or
threshold interrupts going through an idle notifier is not needed
because the wake_up can is just done directly and doesn't
need the idle notifier. It is only needed for logging
exceptions.

To be honest I never liked the idle notifier even though I signed
off on it. On closer investigation the code actually turned out
to be nearly. Right now machine check exceptions on x86 are always
unrecoverable (lead to panic due to PCC), which means we never execute
the idle notifier path.

The only exception is the somewhat weird tolerant==3 case, which
ignores PCC. I'll fix this in a future patch in a much cleaner way.

So remove the "mcelog wakeup through idle notifier" code
from 64bit.

This allows to compile the 64bit machine check handler on 32bit
which doesn't have idle notifiers.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5395200..7562c1f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -555,29 +555,6 @@ int mce_notify_user(void)
 	return 0;
 }
 
-/* see if the idle task needs to notify userspace: */
-static int
-mce_idle_callback(struct notifier_block *nfb, unsigned long action,
-		  void *unused)
-{
-	/* IDLE_END should be safe - interrupts are back on */
-	if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
-		mce_notify_user();
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block mce_idle_notifier = {
-	.notifier_call		= mce_idle_callback,
-};
-
-static __init int periodic_mcheck_init(void)
-{
-       idle_notifier_register(&mce_idle_notifier);
-       return 0;
-}
-__initcall(periodic_mcheck_init);
-
 /*
  * Initialize Machine Checks for a CPU.
  */
-- 
cgit v0.10.2


From d896a940ef4f12a0a6bc432853b249dcfbacabf0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 14:25:18 +0200
Subject: x86, mce: remove oops_begin() use in 64bit machine check

First 32bit doesn't have oops_begin, so it's a barrier of using
this code on 32bit.

On closer examination it turns out oops_begin is not
a good idea in a machine check panic anyways. All oops_begin
does it so check for recursive/parallel oopses and implement the
"wait on oops" heuristic. But there's actually no good reason
to lock machine checks against oopses or prevent them
from recursion. Also "wait on oops" does not really make
sense for a machine check too.

Replace it with a manual bust_spinlocks/console_verbose.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7562c1f..f4d6841 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -178,7 +178,8 @@ static void mce_panic(char *msg, struct mce *backup, u64 start)
 {
 	int i;
 
-	oops_begin();
+	bust_spinlocks(1);
+	console_verbose();
 	for (i = 0; i < MCE_LOG_LEN; i++) {
 		u64 tsc = mcelog.entry[i].tsc;
 
-- 
cgit v0.10.2


From 4efc0670baf4b14bc95502e54a83ccf639146125 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 19:07:31 +0200
Subject: x86, mce: use 64bit machine check code on 32bit

The 64bit machine check code is in many ways much better than
the 32bit machine check code: it is more specification compliant,
is cleaner, only has a single code base versus one per CPU,
has better infrastructure for recovery, has a cleaner way to communicate
with user space etc. etc.

Use the 64bit code for 32bit too.

This is the second attempt to do this. There was one a couple of years
ago to unify this code for 32bit and 64bit.  Back then this ran into some
trouble with K7s and was reverted.

I believe this time the K7 problems (and some others) are addressed.
I went over the old handlers and was very careful to retain
all quirks.

But of course this needs a lot of testing on old systems. On newer
64bit capable systems I don't expect much problems because they have been
already tested with the 64bit kernel.

I made this a CONFIG for now that still allows to select the old
machine check code. This is mostly to make testing easier,
if someone runs into a problem we can ask them to try
with the CONFIG switched.

The new code is default y for more coverage.

Once there is confidence the 64bit code works well on older hardware
too the CONFIG_X86_OLD_MCE and the associated code can be easily
removed.

This causes a behaviour change for 32bit installations. They now
have to install the mcelog package to be able to log
corrected machine checks.

The 64bit machine check code only handles CPUs which support the
standard Intel machine check architecture described in the IA32 SDM.
The 32bit code has special support for some older CPUs which
have non standard machine check architectures, in particular
WinChip C3 and Intel P5.  I made those a separate CONFIG option
and kept them for now. The WinChip variant could be probably
removed without too much pain, it doesn't really do anything
interesting. P5 is also disabled by default (like it
was before) because many motherboards have it miswired, but
according to Alan Cox a few embedded setups use that one.

Forward ported/heavily changed version of old patch, original patch
included review/fixes from Thomas Gleixner, Bert Wesarg.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a6efe0a..c1c5ccd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -789,6 +789,22 @@ config X86_MCE
 	  to disable it.  MCE support simply ignores non-MCE processors like
 	  the 386 and 486, so nearly everyone can say Y here.
 
+config X86_OLD_MCE
+	depends on X86_32 && X86_MCE
+	bool "Use legacy machine check code (will go away)"
+	default n
+	select X86_ANCIENT_MCE
+	---help---
+	  Use the old i386 machine check code. This is merely intended for
+	  testing in a transition period. Try this if you run into any machine
+	  check related software problems, but report the problem to
+	  linux-kernel.  When in doubt say no.
+
+config X86_NEW_MCE
+	depends on X86_MCE
+	bool
+	default y if (!X86_OLD_MCE && X86_32) || X86_64
+
 config X86_MCE_INTEL
 	def_bool y
 	prompt "Intel MCE features"
@@ -805,6 +821,15 @@ config X86_MCE_AMD
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
 
+config X86_ANCIENT_MCE
+       def_bool n
+       depends on X86_32
+       prompt "Support for old Pentium 5 / WinChip machine checks"
+       ---help---
+	 Include support for machine check handling on old Pentium 5 or WinChip
+	 systems. These typically need to be enabled explicitely on the command
+	 line.
+
 config X86_MCE_THRESHOLD
 	depends on X86_MCE_AMD || X86_MCE_INTEL
 	bool
@@ -812,7 +837,7 @@ config X86_MCE_THRESHOLD
 
 config X86_MCE_NONFATAL
 	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
-	depends on X86_32 && X86_MCE
+	depends on X86_OLD_MCE
 	---help---
 	  Enabling this feature starts a timer that triggers every 5 seconds which
 	  will look at the machine check registers to see if anything happened.
@@ -825,11 +850,15 @@ config X86_MCE_NONFATAL
 
 config X86_MCE_P4THERMAL
 	bool "check for P4 thermal throttling interrupt."
-	depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
+	depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
 	---help---
 	  Enabling this feature will cause a message to be printed when the P4
 	  enters thermal throttling.
 
+config X86_THERMAL_VECTOR
+	def_bool y
+	depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
+
 config VM86
 	bool "Enable VM86 support" if EMBEDDED
 	default y
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bed..486c9e9 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -52,7 +52,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
 #endif
 
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_THERMAL_VECTOR
 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 #endif
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f287092..ad53228 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -843,7 +843,7 @@ void clear_local_APIC(void)
 	}
 
 	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	if (maxlvt >= 5) {
 		v = apic_read(APIC_LVTTHMR);
 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -1962,7 +1962,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	if (maxlvt >= 5)
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index ce4fbfa..c476227 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
 
 static inline int mce_in_progress(void)
 {
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#if defined(CONFIG_X86_NEW_MCE)
 	return atomic_read(&mce_entry) > 0;
 #endif
 	return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 55f01b3..5f8b094 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,6 +1,7 @@
 obj-y				=  mce.o therm_throt.o
 
-obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o p6.o winchip.o
+obj-$(CONFIG_X86_OLD_MCE)	+= k7.o p4.o p6.o
+obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
 obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f4d6841..e193de4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -52,7 +52,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
 
 int				mce_disabled;
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_NEW_MCE
 
 #define MISC_MCELOG_MINOR	227
 
@@ -662,6 +662,21 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 	}
 }
 
+static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+{
+	if (c->x86 != 5)
+		return;
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		if (mce_p5_enabled())
+			intel_p5_mcheck_init(c);
+		break;
+	case X86_VENDOR_CENTAUR:
+		winchip_mcheck_init(c);
+		break;
+	}
+}
+
 static void mce_cpu_features(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
@@ -695,6 +710,11 @@ static void mce_init_timer(void)
  */
 void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 {
+	if (mce_disabled)
+		return;
+
+	mce_ancient_init(c);
+
 	if (!mce_available(c))
 		return;
 
@@ -893,6 +913,10 @@ static struct miscdevice mce_log_device = {
  */
 static int __init mcheck_enable(char *str)
 {
+	if (*str == 0)
+		enable_p5_mce();
+	if (*str == '=')
+		str++;
 	if (!strcmp(str, "off"))
 		mce_disabled = 1;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
@@ -900,13 +924,13 @@ static int __init mcheck_enable(char *str)
 	else if (isdigit(str[0]))
 		get_option(&str, &tolerant);
 	else {
-		printk(KERN_INFO "mce= argument %s ignored. Please use /sys\n",
+		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
 		       str);
 		return 0;
 	}
 	return 1;
 }
-__setup("mce=", mcheck_enable);
+__setup("mce", mcheck_enable);
 
 /*
  * Sysfs support
@@ -1259,7 +1283,7 @@ static __init int mce_init_device(void)
 
 device_initcall(mce_init_device);
 
-#else /* CONFIG_X86_32: */
+#else /* CONFIG_X86_OLD_MCE: */
 
 int nr_mce_banks;
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index 966ae3c..84a552b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,17 +1,29 @@
 #include <linux/init.h>
 #include <asm/mce.h>
 
+#ifdef CONFIG_X86_OLD_MCE
 void amd_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
+#endif
 
+#ifdef CONFIG_X86_ANCIENT_MCE
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+extern int mce_p5_enable;
+static inline int mce_p5_enabled(void) { return mce_p5_enable; }
+static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
+#else
+static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline int mce_p5_enabled(void) { return 0; }
+static inline void enable_p5_mce(void) { }
+#endif
 
 /* Call the installed machine check handler for this CPU setup. */
 extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_OLD_MCE
 
 extern int nr_mce_banks;
 
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 8812f54..015f481 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -14,6 +14,9 @@
 
 #include "mce.h"
 
+/* By default disabled */
+int		mce_p5_enable;
+
 /* Machine check handler for Pentium class Intel CPUs: */
 static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
@@ -44,9 +47,11 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;
 
+#ifdef CONFIG_X86_OLD_MCE
 	/* Default P5 to off as its often misconnected: */
 	if (mce_disabled != -1)
 		return;
+#endif
 
 	machine_check_vector = pentium_machine_check;
 	/* Make sure the vector pointer is visible before we enable MCEs: */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010..35eddc9 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -89,7 +89,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
@@ -176,7 +176,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #endif
 #ifdef CONFIG_X86_MCE
 	sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
 #endif
 #endif
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 368b0a8..98846e0 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -181,7 +181,7 @@ void __init native_init_IRQ(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 #endif
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	/* thermal monitor LVT interrupt */
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 1442516..d0851e3 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -25,11 +25,11 @@
 #include <asm/ucontext.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/mce.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
-#include <asm/mce.h>
 #endif /* CONFIG_X86_64 */
 
 #include <asm/syscall.h>
@@ -857,7 +857,7 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#ifdef CONFIG_X86_NEW_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
 		mce_notify_user();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d2883..ad771f1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -798,7 +798,8 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
 
 	return new_kesp;
 }
-#else
+#endif
+
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
@@ -806,7 +807,6 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
 {
 }
-#endif
 
 /*
  * 'math_state_restore()' saves the current math information in the
-- 
cgit v0.10.2


From 45f458e9a8a216b02b76fe61d9e8bc40d659fbe8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 23:18:26 +0200
Subject: x86, mce: deprecate old 32bit machine check code

Schedule for removal in 2.6.32

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index de491a3..ec9ef5d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -437,3 +437,13 @@ Why:	Superseded by tdfxfb. I2C/DDC support used to live in a separate
 	driver but this caused driver conflicts.
 Who:	Jean Delvare <khali@linux-fr.org>
 	Krzysztof Helt <krzysztof.h1@wp.pl>
+
+----------------------------
+
+What:	CONFIG_X86_OLD_MCE
+When:	2.6.32
+Why:	Remove the old legacy 32bit machine check code. This has been
+	superseded by the newer machine check code from the 64bit port,
+	but the old version has been kept around for easier testing. Note this
+	doesn't impact the old P5 and WinChip machine check handlers.
+Who:	Andi Kleen <andi@firstfloor.org>
-- 
cgit v0.10.2


From 7856f6cce4a8cda8c1f94b99605c07d16b8d8dec Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 23:32:56 +0200
Subject: x86, mce: enable MCE_INTEL for 32bit new MCE

Enable the 64bit MCE_INTEL code (CMCI, thermal interrupts) for 32bit NEW_MCE.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1c5ccd..e1c9f77 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -808,7 +808,7 @@ config X86_NEW_MCE
 config X86_MCE_INTEL
 	def_bool y
 	prompt "Intel MCE features"
-	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+	depends on X86_NEW_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 486c9e9..b2eb9c0 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -56,4 +56,8 @@ BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 #endif
 
+#ifdef CONFIG_X86_MCE_THRESHOLD
+BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
+#endif
+
 #endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 37555e5..922ee7c 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -20,7 +20,7 @@ typedef struct {
 #endif
 #ifdef CONFIG_X86_MCE
 	unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
 	unsigned int irq_threshold_count;
 # endif
 #endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79b..451e24d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -87,10 +87,11 @@
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
 
+#define THRESHOLD_APIC_VECTOR		0xf9
+
 #ifdef CONFIG_X86_32
-/* 0xf8 - 0xf9 : free */
+/* 0xf9 : free */
 #else
-# define THRESHOLD_APIC_VECTOR		0xf9
 # define UV_BAU_MESSAGE			0xf8
 #endif
 
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 23ee9e7..d746df2 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,7 +17,7 @@ static void default_threshold_interrupt(void)
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
 
-asmlinkage void mce_threshold_interrupt(void)
+asmlinkage void smp_threshold_interrupt(void)
 {
 	exit_idle();
 	irq_enter();
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 63276c4..a31a7f2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1007,7 +1007,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
 #endif
 
 apicinterrupt THRESHOLD_APIC_VECTOR \
-	threshold_interrupt mce_threshold_interrupt
+	threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 98846e0..2512ad9 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -186,6 +186,10 @@ void __init native_init_IRQ(void)
 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 
+#ifdef CONFIG_X86_MCE_THRESHOLD
+	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
+
 	if (!acpi_ioapic)
 		setup_irq(2, &irq2);
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ad771f1..0d358c8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -804,7 +804,7 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
 
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
 
-- 
cgit v0.10.2


From de5619dfef76ddb403eb7c6de39c0130166c5dc3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 23:34:40 +0200
Subject: x86, mce: enable MCE_AMD for 32bit NEW_MCE

That's very easy using the infrastructure enabled earlier for MCE_INTEL

Untested.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e1c9f77..a148e7a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -816,7 +816,7 @@ config X86_MCE_INTEL
 config X86_MCE_AMD
 	def_bool y
 	prompt "AMD MCE features"
-	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+	depends on X86_NEW_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
-- 
cgit v0.10.2


From 172d899db4bf0beb7766d583379e5ed552130e4a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 23:37:02 +0200
Subject: x86, mce: document new 32bit mcelog requirement in
 Documentation/Changes

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/Changes b/Documentation/Changes
index b95082b..d21b3b5 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -48,6 +48,7 @@ o  procps                 3.2.0                   # ps --version
 o  oprofile               0.9                     # oprofiled --version
 o  udev                   081                     # udevinfo -V
 o  grub                   0.93                    # grub --version
+o  mcelog		  0.6
 
 Kernel compilation
 ==================
@@ -276,6 +277,16 @@ before running exportfs or mountd.  It is recommended that all NFS
 services be protected from the internet-at-large by a firewall where
 that is possible.
 
+mcelog
+------
+
+In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
+as a regular cronjob similar to the x86-64 kernel to process and log
+machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
+events are errors reported by the CPU. Processing them is strongly encouraged.
+All x86-64 kernels since 2.6.4 require the mcelog utility to
+process machine checks.
+
 Getting updated software
 ========================
 
@@ -365,6 +376,10 @@ FUSE
 ----
 o <http://sourceforge.net/projects/fuse>
 
+mcelog
+------
+o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
+
 Networking
 **********
 
-- 
cgit v0.10.2


From a9862e0560866eadbc59b84867492004da436516 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 19 May 2009 22:49:07 +0200
Subject: Export add_timer_on for modules

Needed in followon patch.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/kernel/timer.c b/kernel/timer.c
index cffffad..e2c47b8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -756,6 +756,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	wake_up_idle_cpu(cpu);
 	spin_unlock_irqrestore(&base->lock, flags);
 }
+EXPORT_SYMBOL_GPL(add_timer_on);
 
 /**
  * del_timer - deactive a timer.
-- 
cgit v0.10.2


From 5f8c1a54cab6f449fe04d42d0661bc796fa4e73e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 29 Apr 2009 19:29:12 +0200
Subject: x86, mce: add MSR read wrappers for easier error injection

This will be used by future patches to allow machine check error injection.
Right now it's a nop, except for adding some wrappers around the MSR reads.

This is early in the sequence to avoid too many conflicts.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e193de4..e755c95 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -194,6 +194,19 @@ static void mce_panic(char *msg, struct mce *backup, u64 start)
 	panic(msg);
 }
 
+/* MSR access wrappers used for error injection */
+static u64 mce_rdmsrl(u32 msr)
+{
+	u64 v;
+	rdmsrl(msr, v);
+	return v;
+}
+
+static void mce_wrmsrl(u32 msr, u64 v)
+{
+	wrmsrl(msr, v);
+}
+
 int mce_available(struct cpuinfo_x86 *c)
 {
 	if (mce_disabled)
@@ -213,7 +226,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 	if (rip_msr) {
 		/* Assume the RIP in the MSR is exact. Is this true? */
 		m->mcgstatus |= MCG_STATUS_EIPV;
-		rdmsrl(rip_msr, m->ip);
+		m->ip = mce_rdmsrl(rip_msr);
 		m->cs = 0;
 	}
 }
@@ -231,7 +244,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 	mce_setup(&m);
 
-	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 	for (i = 0; i < banks; i++) {
 		if (!bank[i] || !test_bit(i, *b))
 			continue;
@@ -242,7 +255,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		m.tsc = 0;
 
 		barrier();
-		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
 		if (!(m.status & MCI_STATUS_VAL))
 			continue;
 
@@ -257,9 +270,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 			continue;
 
 		if (m.status & MCI_STATUS_MISCV)
-			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
 		if (m.status & MCI_STATUS_ADDRV)
-			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
 
 		if (!(flags & MCP_TIMESTAMP))
 			m.tsc = 0;
@@ -275,7 +288,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		/*
 		 * Clear state for this bank.
 		 */
-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+		mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
 
 	/*
@@ -320,7 +333,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	mce_setup(&m);
 
-	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 
 	/* if the restart IP is not valid, we're done for */
 	if (!(m.mcgstatus & MCG_STATUS_RIPV))
@@ -338,7 +351,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		m.addr = 0;
 		m.bank = i;
 
-		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+		m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
 		if ((m.status & MCI_STATUS_VAL) == 0)
 			continue;
 
@@ -378,9 +391,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		}
 
 		if (m.status & MCI_STATUS_MISCV)
-			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
+			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
 		if (m.status & MCI_STATUS_ADDRV)
-			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
 
 		mce_get_rip(&m, regs);
 		mce_log(&m);
@@ -449,9 +462,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	/* the last thing we do is clear state */
 	for (i = 0; i < banks; i++) {
 		if (test_bit(i, toclear))
-			wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+			mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
-	wrmsrl(MSR_IA32_MCG_STATUS, 0);
+	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
  out2:
 	atomic_dec(&mce_entry);
 }
-- 
cgit v0.10.2


From ea149b36c7f511d17dd89fee734cb09778a91fa0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 29 Apr 2009 19:31:00 +0200
Subject: x86, mce: add basic error injection infrastructure

Allow user programs to write mce records into /dev/mcelog. When they do
that a fake machine check is triggered to test the machine check code.

This uses the MCE MSR wrappers added earlier.

The implementation is straight forward. There is a struct mce record
per CPU and the MCE MSR accesses get data from there if there is valid
data injected there. This allows to test the machine check code
relatively realistically because only the lowest layer of hardware
access is intercepted.

The test suite and injector are available at
git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git
git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a148e7a..e25b635 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -835,6 +835,14 @@ config X86_MCE_THRESHOLD
 	bool
 	default y
 
+config X86_MCE_INJECT
+	depends on X86_NEW_MCE
+	tristate "Machine check injector support"
+	---help---
+	  Provide support for injecting machine checks for testing purposes.
+	  If you don't know what a machine check is and you don't do kernel
+	  QA it is safe to say n.
+
 config X86_MCE_NONFATAL
 	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
 	depends on X86_OLD_MCE
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c3c7ee7..e7d2372 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -141,6 +141,9 @@ extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 extern int mce_notify_user(void);
 
+DECLARE_PER_CPU(struct mce, injectm);
+extern struct file_operations mce_chrdev_ops;
+
 #ifdef CONFIG_X86_MCE
 extern void mcheck_init(struct cpuinfo_x86 *c);
 #else
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 5f8b094..60ee182 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
 obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
+obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
new file mode 100644
index 0000000..58afac4
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -0,0 +1,126 @@
+/*
+ * Machine check injection support.
+ * Copyright 2008 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Authors:
+ * Andi Kleen
+ * Ying Huang
+ */
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <asm/uaccess.h>
+#include <asm/mce.h>
+
+/* Update fake mce registers on current CPU. */
+static void inject_mce(struct mce *m)
+{
+	struct mce *i = &per_cpu(injectm, m->cpu);
+
+	/* Make sure noone reads partially written injectm */
+	i->finished = 0;
+	mb();
+	m->finished = 0;
+	/* First set the fields after finished */
+	i->cpu = m->cpu;
+	mb();
+	/* Now write record in order, finished last (except above) */
+	memcpy(i, m, sizeof(struct mce));
+	/* Finally activate it */
+	mb();
+	i->finished = 1;
+}
+
+struct delayed_mce {
+	struct timer_list timer;
+	struct mce m;
+};
+
+/* Inject mce on current CPU */
+static void raise_mce(unsigned long data)
+{
+	struct delayed_mce *dm = (struct delayed_mce *)data;
+	struct mce *m = &dm->m;
+	int cpu = m->cpu;
+
+	inject_mce(m);
+	if (m->status & MCI_STATUS_UC) {
+		struct pt_regs regs;
+		memset(&regs, 0, sizeof(struct pt_regs));
+		regs.ip = m->ip;
+		regs.cs = m->cs;
+		printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+		do_machine_check(&regs, 0);
+		printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+	} else {
+		mce_banks_t b;
+		memset(&b, 0xff, sizeof(mce_banks_t));
+		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+		machine_check_poll(0, &b);
+		mce_notify_user();
+		printk(KERN_INFO "Finished machine check poll on CPU %d\n",
+		       cpu);
+	}
+	kfree(dm);
+}
+
+/* Error injection interface */
+static ssize_t mce_write(struct file *filp, const char __user *ubuf,
+			 size_t usize, loff_t *off)
+{
+	struct delayed_mce *dm;
+	struct mce m;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	/*
+	 * There are some cases where real MSR reads could slip
+	 * through.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
+		return -EIO;
+
+	if ((unsigned long)usize > sizeof(struct mce))
+		usize = sizeof(struct mce);
+	if (copy_from_user(&m, ubuf, usize))
+		return -EFAULT;
+
+	if (m.cpu >= NR_CPUS || !cpu_online(m.cpu))
+		return -EINVAL;
+
+	dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
+	if (!dm)
+		return -ENOMEM;
+
+	/*
+	 * Need to give user space some time to set everything up,
+	 * so do it a jiffie or two later everywhere.
+	 * Should we use a hrtimer here for better synchronization?
+	 */
+	memcpy(&dm->m, &m, sizeof(struct mce));
+	setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
+	dm->timer.expires = jiffies + 2;
+	add_timer_on(&dm->timer, m.cpu);
+	return usize;
+}
+
+static int inject_init(void)
+{
+	printk(KERN_INFO "Machine check injector initialized\n");
+	mce_chrdev_ops.write = mce_write;
+	return 0;
+}
+
+module_init(inject_init);
+/* Cannot tolerate unloading currently because we cannot
+ * guarantee all openers of mce_chrdev will get a reference to us.
+ */
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e755c95..fe216bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -98,6 +98,9 @@ void mce_setup(struct mce *m)
 	rdtscll(m->tsc);
 }
 
+DEFINE_PER_CPU(struct mce, injectm);
+EXPORT_PER_CPU_SYMBOL_GPL(injectm);
+
 /*
  * Lockless MCE logging infrastructure.
  * This avoids deadlocks on printk locks without having to break locks. Also
@@ -194,16 +197,46 @@ static void mce_panic(char *msg, struct mce *backup, u64 start)
 	panic(msg);
 }
 
+/* Support code for software error injection */
+
+static int msr_to_offset(u32 msr)
+{
+	unsigned bank = __get_cpu_var(injectm.bank);
+	if (msr == rip_msr)
+		return offsetof(struct mce, ip);
+	if (msr == MSR_IA32_MC0_STATUS + bank*4)
+		return offsetof(struct mce, status);
+	if (msr == MSR_IA32_MC0_ADDR + bank*4)
+		return offsetof(struct mce, addr);
+	if (msr == MSR_IA32_MC0_MISC + bank*4)
+		return offsetof(struct mce, misc);
+	if (msr == MSR_IA32_MCG_STATUS)
+		return offsetof(struct mce, mcgstatus);
+	return -1;
+}
+
 /* MSR access wrappers used for error injection */
 static u64 mce_rdmsrl(u32 msr)
 {
 	u64 v;
+	if (__get_cpu_var(injectm).finished) {
+		int offset = msr_to_offset(msr);
+		if (offset < 0)
+			return 0;
+		return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
+	}
 	rdmsrl(msr, v);
 	return v;
 }
 
 static void mce_wrmsrl(u32 msr, u64 v)
 {
+	if (__get_cpu_var(injectm).finished) {
+		int offset = msr_to_offset(msr);
+		if (offset >= 0)
+			*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
+		return;
+	}
 	wrmsrl(msr, v);
 }
 
@@ -296,6 +329,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 	 * exceptions.
 	 */
 }
+EXPORT_SYMBOL_GPL(machine_check_poll);
 
 /*
  * The actual machine check handler. This only handles real
@@ -468,6 +502,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
  out2:
 	atomic_dec(&mce_entry);
 }
+EXPORT_SYMBOL_GPL(do_machine_check);
 
 #ifdef CONFIG_X86_MCE_INTEL
 /***
@@ -568,6 +603,7 @@ int mce_notify_user(void)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mce_notify_user);
 
 /*
  * Initialize Machine Checks for a CPU.
@@ -904,13 +940,14 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	}
 }
 
-static const struct file_operations mce_chrdev_ops = {
+struct file_operations mce_chrdev_ops = {
 	.open			= mce_open,
 	.release		= mce_release,
 	.read			= mce_read,
 	.poll			= mce_poll,
 	.unlocked_ioctl		= mce_ioctl,
 };
+EXPORT_SYMBOL_GPL(mce_chrdev_ops);
 
 static struct miscdevice mce_log_device = {
 	MISC_MCELOG_MINOR,
-- 
cgit v0.10.2


From a1ff41bfc1bb7a6d19cf958f89a9b539678781e5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 25 May 2009 22:16:14 -0700
Subject: x86, mce: add comment about mce_chrdev_ops being writable

Add a comment explaining that mce_chrdev_ops is intentionally
writable.

[ Impact: comment only ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fe216bd..156cdf6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -940,6 +940,7 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	}
 }
 
+/* Modified in mce-inject.c, so not static or const */
 struct file_operations mce_chrdev_ops = {
 	.open			= mce_open,
 	.release		= mce_release,
-- 
cgit v0.10.2


From 5706001aacba5d3db5f224ca135e5e91a30be39c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 25 May 2009 22:18:17 -0700
Subject: x86, mce: fix comment style in mce-inject.c

Fix style of winged comment in mce-inject.c.

[ Impact: comment only ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 58afac4..673c728 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -120,7 +120,8 @@ static int inject_init(void)
 }
 
 module_init(inject_init);
-/* Cannot tolerate unloading currently because we cannot
+/*
+ * Cannot tolerate unloading currently because we cannot
  * guarantee all openers of mce_chrdev will get a reference to us.
  */
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 88921be30296e126896ee4d30758f989d1c4ddfb Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:51 +0200
Subject: x86, mce: synchronize core after machine check handling

The example code in the IA32 SDM recommends to synchronize the CPU
after machine check handling. So do that here.

[ Impact: Spec compliance ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 156cdf6..495c968 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -328,6 +328,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 	 * Don't clear MCG_STATUS here because it's only defined for
 	 * exceptions.
 	 */
+
+	sync_core();
 }
 EXPORT_SYMBOL_GPL(machine_check_poll);
 
@@ -501,6 +503,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
  out2:
 	atomic_dec(&mce_entry);
+	sync_core();
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
 
-- 
cgit v0.10.2


From b56f642d2bf8c1f7c6499c1e55b23311a33cc796 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:52 +0200
Subject: x86, mce: use extended sysattrs for the check_interval attribute.

Instead of using own callbacks use the generic ones provided by
the sysdev later.

This finally allows to get rid of the ugly ACCESSOR macros. Should
also save some text size.

[ Impact: cleanup ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 495c968..3cac9da 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1054,28 +1054,6 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
 __cpuinitdata
 void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
-/* Why are there no generic functions for this? */
-#define ACCESSOR(name, var, start) \
-	static ssize_t show_ ## name(struct sys_device *s,		\
-				     struct sysdev_attribute *attr,	\
-				     char *buf) {			\
-		return sprintf(buf, "%Lx\n", (u64)var);			\
-	}								\
-	static ssize_t set_ ## name(struct sys_device *s,		\
-				    struct sysdev_attribute *attr,	\
-				    const char *buf, size_t siz) {	\
-		char *end;						\
-		u64 new = simple_strtoull(buf, &end, 0);		\
-									\
-		if (end == buf)						\
-			return -EINVAL;					\
-		var = new;						\
-		start;							\
-									\
-		return end-buf;						\
-	}								\
-	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-
 static struct sysdev_attribute *bank_attrs;
 
 static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1126,13 +1104,26 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 	return len;
 }
 
+static ssize_t store_int_with_restart(struct sys_device *s,
+				      struct sysdev_attribute *attr,
+				      const char *buf, size_t size)
+{
+	ssize_t ret = sysdev_store_int(s, attr, buf, size);
+	mce_restart();
+	return ret;
+}
+
 static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
 
-ACCESSOR(check_interval, check_interval, mce_restart())
+static struct sysdev_ext_attribute attr_check_interval = {
+	_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
+		     store_int_with_restart),
+	&check_interval
+};
 
 static struct sysdev_attribute *mce_attrs[] = {
-	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
+	&attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
 	NULL
 };
 
-- 
cgit v0.10.2


From fc016a49c2d92f2efbe22c1fb66eb7a5d2a06ed1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:53 +0200
Subject: x86, mce: remove unused mce_events variable

Remove unused mce_events static variable.

[ Impact: cleanup ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3cac9da..69aad7e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -71,7 +71,6 @@ static u64			*bank;
 static unsigned long		notify_user;
 static int			rip_msr;
 static int			mce_bootlog = -1;
-static atomic_t			mce_events;
 
 static char			trigger[128];
 static char			*trigger_argv[2] = { trigger, NULL };
@@ -116,7 +115,6 @@ void mce_log(struct mce *mce)
 {
 	unsigned next, entry;
 
-	atomic_inc(&mce_events);
 	mce->finished = 0;
 	wmb();
 	for (;;) {
-- 
cgit v0.10.2


From 8be9110569aec1f65d86b08aef7ec49659137bf9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 27 May 2009 21:56:53 +0200
Subject: x86, mce: remove mce_init unused argument

Remove unused mce_init argument.

[ Impact: cleanup ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 69aad7e..20c7e7c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -643,7 +643,7 @@ static int mce_cap_init(void)
 	return 0;
 }
 
-static void mce_init(void *dummy)
+static void mce_init(void)
 {
 	mce_banks_t all_banks;
 	u64 cap;
@@ -776,7 +776,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 
 	machine_check_vector = do_machine_check;
 
-	mce_init(NULL);
+	mce_init();
 	mce_cpu_features(c);
 	mce_init_timer();
 }
@@ -1020,7 +1020,7 @@ static int mce_shutdown(struct sys_device *dev)
  */
 static int mce_resume(struct sys_device *dev)
 {
-	mce_init(NULL);
+	mce_init();
 	mce_cpu_features(&current_cpu_data);
 
 	return 0;
@@ -1030,7 +1030,7 @@ static void mce_cpu_restart(void *data)
 {
 	del_timer_sync(&__get_cpu_var(mce_timer));
 	if (mce_available(&current_cpu_data))
-		mce_init(NULL);
+		mce_init();
 	mce_init_timer();
 }
 
-- 
cgit v0.10.2


From 32561696c23028596f24b353d98f2e23b58f91f7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:53 +0200
Subject: x86, mce: rename and align out2 label

There's only a single out path in do_machine_check now, so rename the
label from out2 to out.  Also align it at the first column.

[ Impact: minor cleanup, no functional changes ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 20c7e7c..18d505d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -361,9 +361,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	if (notify_die(DIE_NMI, "machine check", regs, error_code,
 			   18, SIGKILL) == NOTIFY_STOP)
-		goto out2;
+		goto out;
 	if (!banks)
-		goto out2;
+		goto out;
 
 	mce_setup(&m);
 
@@ -499,7 +499,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 	}
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
- out2:
+out:
 	atomic_dec(&mce_entry);
 	sync_core();
 }
-- 
cgit v0.10.2


From b170204ddb7844ffff62d2d537b20c0eeb97725e Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:54 +0200
Subject: x86, mce: drop BKL in mce_open

BKL is not needed for anything in mce_open because it has
an own spinlock. Remove it.

[ Impact: cleanup ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 18d505d..8ab2836 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -13,7 +13,6 @@
 #include <linux/ratelimit.h>
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
-#include <linux/smp_lock.h>
 #include <linux/kobject.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
@@ -791,12 +790,10 @@ static int		open_exclu;		/* already open exclusive? */
 
 static int mce_open(struct inode *inode, struct file *file)
 {
-	lock_kernel();
 	spin_lock(&mce_state_lock);
 
 	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
 		spin_unlock(&mce_state_lock);
-		unlock_kernel();
 
 		return -EBUSY;
 	}
@@ -806,7 +803,6 @@ static int mce_open(struct inode *inode, struct file *file)
 	open_count++;
 
 	spin_unlock(&mce_state_lock);
-	unlock_kernel();
 
 	return nonseekable_open(inode, file);
 }
-- 
cgit v0.10.2


From 8780e8e0f6b34862cdf2c62d4d2674d6bc3207db Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:56 +0200
Subject: x86, mce: improve documentation

Document that check_interval set to 0 means no polling.
Noticed by Hidetoshi Seto

Also add a reference from boot options to the sysfs tunables

Acked-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c1304..63fca71 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -5,6 +5,8 @@ only the AMD64 specific ones are listed here.
 
 Machine check
 
+   Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
+
    mce=off disable machine check
    mce=bootlog Enable logging of machine checks left over from booting.
                Disabled by default on AMD because some BIOS leave bogus ones.
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index a05e58e..a4fdb25 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -41,7 +41,9 @@ check_interval
 	the polling interval.  When the poller stops finding MCEs, it
 	triggers an exponential backoff (poll less often) on the polling
 	interval. The check_interval variable is both the initial and
-	maximum polling interval.
+	maximum polling interval. 0 means no polling for corrected machine
+	check errors (but some corrected errors might be still reported
+	in other ways)
 
 tolerant
 	Tolerance level. When a machine check exception occurs for a non
-- 
cgit v0.10.2


From 9319cec8c185e84fc5281afb6ac5d4c47a234841 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Tue, 14 Apr 2009 17:26:30 +0900
Subject: x86, mce: use strict_strtoull

Use strict_strtoull instead of simple_strtoull.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ab2836..4375ffb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1059,18 +1059,17 @@ static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
 }
 
 static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
-			const char *buf, size_t siz)
+			const char *buf, size_t size)
 {
-	char *end;
-	u64 new = simple_strtoull(buf, &end, 0);
+	u64 new;
 
-	if (end == buf)
+	if (strict_strtoull(buf, 0, &new) < 0)
 		return -EINVAL;
 
 	bank[attr - bank_attrs] = new;
 	mce_restart();
 
-	return end-buf;
+	return size;
 }
 
 static ssize_t
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 083f270..0c56343 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -269,14 +269,12 @@ SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(threshold_limit)
 
 static ssize_t
-store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count)
+store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 {
 	struct thresh_restart tr;
 	unsigned long new;
-	char *end;
 
-	new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
+	if (strict_strtoul(buf, 0, &new) < 0)
 		return -EINVAL;
 
 	b->interrupt_enable = !!new;
@@ -287,18 +285,16 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count)
 
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
-	return end - buf;
+	return size;
 }
 
 static ssize_t
-store_threshold_limit(struct threshold_block *b, const char *buf, size_t count)
+store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
 {
 	struct thresh_restart tr;
 	unsigned long new;
-	char *end;
 
-	new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
+	if (strict_strtoul(buf, 0, &new) < 0)
 		return -EINVAL;
 
 	if (new > THRESHOLD_MAX)
@@ -313,7 +309,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t count)
 
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
-	return end - buf;
+	return size;
 }
 
 struct threshold_block_cross_cpu {
-- 
cgit v0.10.2


From cc3aec52ab8e013984270a79d1aa51f691d239b0 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 30 Apr 2009 15:58:22 +0900
Subject: x86, mce: trivial clean up for therm_throt.c

This patch removes following checkpatch warning:

WARNING: Use #include <linux/cpu.h> instead of <asm/cpu.h>
+#include <asm/cpu.h>

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index a2b5d7d..7b1ae2e 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -20,7 +20,6 @@
 #include <linux/cpu.h>
 
 #include <asm/therm_throt.h>
-#include <asm/cpu.h>
 
 /* How long to wait between reporting thermal events */
 #define CHECK_INTERVAL		(300 * HZ)
-- 
cgit v0.10.2


From 14a02530e2239f753a0f3f089847e723adbdaa47 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 30 Apr 2009 16:04:51 +0900
Subject: x86, mce: trivial clean up for mce.c

This fixs following checkpatch warnings:

WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
+#include <asm/uaccess.h>

WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
+#include <asm/smp.h>

WARNING: line over 80 characters
+                               set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);

WARNING: braces {} are not necessary for any arm of this statement
+       if (mce_notify_user()) {
[...]
+       } else {
[...]

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4375ffb..1d0aa9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -14,6 +14,7 @@
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
 #include <linux/kobject.h>
+#include <linux/uaccess.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
@@ -27,14 +28,13 @@
 #include <linux/kmod.h>
 #include <linux/poll.h>
 #include <linux/cpu.h>
+#include <linux/smp.h>
 #include <linux/fs.h>
 
 #include <asm/processor.h>
-#include <asm/uaccess.h>
 #include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
-#include <asm/smp.h>
 
 #include "mce.h"
 
@@ -125,7 +125,8 @@ void mce_log(struct mce *mce)
 			 * interesting ones:
 			 */
 			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+				set_bit(MCE_OVERFLOW,
+					(unsigned long *)&mcelog.flags);
 				return;
 			}
 			/* Old left over entry. Skip: */
@@ -556,11 +557,10 @@ static void mcheck_timer(unsigned long data)
 	 * polling interval, otherwise increase the polling interval.
 	 */
 	n = &__get_cpu_var(next_interval);
-	if (mce_notify_user()) {
+	if (mce_notify_user())
 		*n = max(*n/2, HZ/100);
-	} else {
+	else
 		*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
-	}
 
 	t->expires = jiffies + *n;
 	add_timer(t);
-- 
cgit v0.10.2


From 34fa1967aa0827776e37feb5666df0327575a0f2 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 8 Apr 2009 12:31:18 +0200
Subject: x86, mce: trivial clean up for mce_amd_64.c

Fix for followings:

WARNING: Use #include <linux/percpu.h> instead of <asm/percpu.h>
+#include <asm/percpu.h>

ERROR: Macros with multiple statements should be enclosed in a do - while
loop
+#define THRESHOLD_ATTR(_name, _mode, _show, _store)                    \
+{                                                                      \
+       .attr   = {.name = __stringify(_name), .mode = _mode },         \
+       .show   = _show,                                                \
+       .store  = _store,                                               \
+};

WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(),
num_possible_cpus(), for_each_possible_cpu(), etc
+       if (cpu >= NR_CPUS)

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 0c56343..ddae216 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
+#include <linux/percpu.h>
 #include <linux/sysdev.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
@@ -24,7 +25,6 @@
 #include <linux/cpu.h>
 #include <linux/smp.h>
 
-#include <asm/percpu.h>
 #include <asm/apic.h>
 #include <asm/idle.h>
 #include <asm/mce.h>
@@ -344,17 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b,
 	return 1;
 }
 
-#define THRESHOLD_ATTR(_name, _mode, _show, _store)			\
-{									\
-	.attr	= {.name = __stringify(_name), .mode = _mode },		\
-	.show	= _show,						\
-	.store	= _store,						\
+#define RW_ATTR(val)							\
+static struct threshold_attr val = {					\
+	.attr	= {.name = __stringify(val), .mode = 0644 },		\
+	.show	= show_## val,						\
+	.store	= store_## val,						\
 };
 
-#define RW_ATTR(name)							\
-static struct threshold_attr name =					\
-	THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
-
 RW_ATTR(interrupt_enable);
 RW_ATTR(threshold_limit);
 RW_ATTR(error_count);
@@ -675,9 +671,6 @@ static void threshold_remove_device(unsigned int cpu)
 static void __cpuinit
 amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
 {
-	if (cpu >= NR_CPUS)
-		return;
-
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-- 
cgit v0.10.2


From 61a021a0700c22ee527d73d92f9acb109ff478f8 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Tue, 14 Apr 2009 17:09:04 +0900
Subject: x86, mce: trivial clean up for mce_intel_64.c

Fix for:

WARNING: space prohibited between function name and open parenthesis '('
+       for_each_online_cpu (cpu) {

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 13abafc..eff3740 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -196,7 +196,7 @@ void cmci_rediscover(int dying)
 		return;
 	cpumask_copy(old, &current->cpus_allowed);
 
-	for_each_online_cpu (cpu) {
+	for_each_online_cpu(cpu) {
 		if (cpu == dying)
 			continue;
 		if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
-- 
cgit v0.10.2


From 98a9c8c3ba13dfc3df8e6d2a126d2fa4e4621e9c Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 28 May 2009 11:41:01 +0900
Subject: x86, mce: trivial clean up for mce-inject.c

Fix for:

WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
+#include <asm/uaccess.h>

WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc
+       if (m.cpu >= NR_CPUS || !cpu_online(m.cpu))

ERROR: trailing whitespace
+/* $

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 673c728..7b3a542 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -11,13 +11,13 @@
  * Andi Kleen
  * Ying Huang
  */
+#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
-#include <asm/uaccess.h>
 #include <asm/mce.h>
 
 /* Update fake mce registers on current CPU. */
@@ -93,7 +93,7 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 	if (copy_from_user(&m, ubuf, usize))
 		return -EFAULT;
 
-	if (m.cpu >= NR_CPUS || !cpu_online(m.cpu))
+	if (m.cpu >= num_possible_cpus() || !cpu_online(m.cpu))
 		return -EINVAL;
 
 	dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
-- 
cgit v0.10.2


From eb2a6ab729ac40a553797703a5a5dba3a74de004 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 28 Apr 2009 23:32:56 +0200
Subject: x86: trivial clean up for irq_vectors.h

Fix a wrong comment.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 451e24d..233006c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -86,11 +86,10 @@
 #define CALL_FUNCTION_VECTOR		0xfc
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
-
 #define THRESHOLD_APIC_VECTOR		0xf9
 
 #ifdef CONFIG_X86_32
-/* 0xf9 : free */
+/* 0xf8 : free */
 #else
 # define UV_BAU_MESSAGE			0xf8
 #endif
-- 
cgit v0.10.2


From cd13adcc823aa421efa4efd995fa7004a58cf38d Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 27 May 2009 16:57:31 +0900
Subject: x86: trivial clean up for arch/x86/Kconfig

Use tab.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e25b635..8c0fff0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -822,13 +822,13 @@ config X86_MCE_AMD
 	   the DRAM Error Threshold.
 
 config X86_ANCIENT_MCE
-       def_bool n
-       depends on X86_32
-       prompt "Support for old Pentium 5 / WinChip machine checks"
-       ---help---
-	 Include support for machine check handling on old Pentium 5 or WinChip
-	 systems. These typically need to be enabled explicitely on the command
-	 line.
+	def_bool n
+	depends on X86_32
+	prompt "Support for old Pentium 5 / WinChip machine checks"
+	---help---
+	  Include support for machine check handling on old Pentium 5 or WinChip
+	  systems. These typically need to be enabled explicitely on the command
+	  line.
 
 config X86_MCE_THRESHOLD
 	depends on X86_MCE_AMD || X86_MCE_INTEL
-- 
cgit v0.10.2


From 38736072d45488fd45f076388b6570419bbbc682 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 28 May 2009 10:05:33 -0700
Subject: x86, mce: drop "extern" from function prototypes in asm/mce.h

Function prototypes don't need to be prefixed by "extern".

[ Impact: cleanup ]

Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index e7d2372..ac6e030 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -121,13 +121,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 #endif
 
-extern int mce_available(struct cpuinfo_x86 *c);
+int mce_available(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
 
 extern atomic_t mce_entry;
 
-extern void do_machine_check(struct pt_regs *, long);
+void do_machine_check(struct pt_regs *, long);
 
 typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
@@ -137,15 +137,15 @@ enum mcp_flags {
 	MCP_UC = (1 << 1),		/* log uncorrected errors */
 	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
 };
-extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
-extern int mce_notify_user(void);
+int mce_notify_user(void);
 
 DECLARE_PER_CPU(struct mce, injectm);
 extern struct file_operations mce_chrdev_ops;
 
 #ifdef CONFIG_X86_MCE
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
 #else
 #define mcheck_init(c) do { } while (0)
 #endif
-- 
cgit v0.10.2


From 1bf4072da67c14d6b02cfeef02212aa5a6211df2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 27 May 2009 09:37:33 -0400
Subject: cifs: reorganize get_cifs_acl

Thus spake Christoph:

"But this whole set_cifs_acl function is a real mess anyway and needs
some splitting up."

With this change too, it's possible to call acl_to_uid_mode() with a
NULL inode pointer. That (or something close to it) will eventually be
necessary when cifs_get_inode_info is reorganized.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 57ecdc8..7f8e6c4 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -552,67 +552,66 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 	return rc;
 }
 
-
-/* Retrieve an ACL from the server */
-static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
-				       const char *path, const __u16 *pfid)
+static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
+		__u16 fid, u32 *pacllen)
 {
-	struct cifsFileInfo *open_file = NULL;
-	bool unlock_file = false;
-	int xid;
-	int rc = -EIO;
-	__u16 fid;
-	struct super_block *sb;
-	struct cifs_sb_info *cifs_sb;
 	struct cifs_ntsd *pntsd = NULL;
+	int xid, rc;
 
-	cFYI(1, ("get mode from ACL for %s", path));
+	xid = GetXid();
+	rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
+	FreeXid(xid);
 
-	if (inode == NULL)
-		return NULL;
 
-	xid = GetXid();
-	if (pfid == NULL)
-		open_file = find_readable_file(CIFS_I(inode));
-	else
-		fid = *pfid;
+	cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
+	return pntsd;
+}
 
-	sb = inode->i_sb;
-	if (sb == NULL) {
-		FreeXid(xid);
-		return NULL;
-	}
-	cifs_sb = CIFS_SB(sb);
+static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
+		const char *path, u32 *pacllen)
+{
+	struct cifs_ntsd *pntsd = NULL;
+	int oplock = 0;
+	int xid, rc;
+	__u16 fid;
 
-	if (open_file) {
-		unlock_file = true;
-		fid = open_file->netfid;
-	} else if (pfid == NULL) {
-		int oplock = 0;
-		/* open file */
-		rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
-				READ_CONTROL, 0, &fid, &oplock, NULL,
-				cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc != 0) {
-			cERROR(1, ("Unable to open file to get ACL"));
-			FreeXid(xid);
-			return NULL;
-		}
+	xid = GetXid();
+
+	rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0,
+			 &fid, &oplock, NULL, cifs_sb->local_nls,
+			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if (rc) {
+		cERROR(1, ("Unable to open file to get ACL"));
+		goto out;
 	}
 
 	rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
 	cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
-	if (unlock_file == true) /* find_readable_file increments ref count */
-		atomic_dec(&open_file->wrtPending);
-	else if (pfid == NULL) /* if opened above we have to close the handle */
-		CIFSSMBClose(xid, cifs_sb->tcon, fid);
-	/* else handle was passed in by caller */
 
+	CIFSSMBClose(xid, cifs_sb->tcon, fid);
+ out:
 	FreeXid(xid);
 	return pntsd;
 }
 
+/* Retrieve an ACL from the server */
+static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
+				      struct inode *inode, const char *path,
+				      u32 *pacllen)
+{
+	struct cifs_ntsd *pntsd = NULL;
+	struct cifsFileInfo *open_file = NULL;
+
+	if (inode)
+		open_file = find_readable_file(CIFS_I(inode));
+	if (!open_file)
+		return get_cifs_acl_by_path(cifs_sb, path, pacllen);
+
+	pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
+	atomic_dec(&open_file->wrtPending);
+	return pntsd;
+}
+
 /* Set an ACL on the server */
 static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 				struct inode *inode, const char *path)
@@ -668,14 +667,19 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 }
 
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid)
+void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
+		     const char *path, const __u16 *pfid)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	u32 acllen = 0;
 	int rc = 0;
 
 	cFYI(DBG2, ("converting ACL to mode for %s", path));
-	pntsd = get_cifs_acl(&acllen, inode, path, pfid);
+
+	if (pfid)
+		pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen);
+	else
+		pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
 
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
 	if (pntsd)
@@ -698,7 +702,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
 	cFYI(DBG2, ("set ACL from mode for %s", path));
 
 	/* Get the security descriptor */
-	pntsd = get_cifs_acl(&secdesclen, inode, path, NULL);
+	pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
 
 	/* Add three ACEs for owner, group, everyone getting rid of
 	   other ACEs as chmod disables ACEs and set the security descriptor */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index d542cf1..f945232 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -108,8 +108,8 @@ extern int cifs_get_inode_info(struct inode **pinode,
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, int xid);
-extern void acl_to_uid_mode(struct inode *inode, const char *path,
-			    const __u16 *pfid);
+extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
+			    const char *path, const __u16 *pfid);
 extern int mode_to_acl(struct inode *inode, const char *path, __u64);
 
 extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 84b7bea..fad882b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -626,7 +626,7 @@ int cifs_get_inode_info(struct inode **pinode,
 	/* fill in 0777 bits from ACL */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 		cFYI(1, ("Getting mode bits from ACL"));
-		acl_to_uid_mode(inode, full_path, pfid);
+		acl_to_uid_mode(cifs_sb, inode, full_path, pfid);
 	}
 #endif
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
-- 
cgit v0.10.2


From a3ce6ea46cc0d6397d1b92b1a5983bb2935306ed Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 28 May 2009 09:51:31 -0700
Subject: Input: libps2 - better handle bad scheduler decisions

Sometimes devices send us their responses in time but due to
unfortunate scheduling decisions the receiving thread does not
get scheduled till much later and we erroneously decide that
device timed out. Work around this problem by checking whether we
received the data we needed instead of checking timeout
condition.

Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 67248c3..be5bbbb 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -210,7 +210,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 	timeout = wait_event_timeout(ps2dev->wait,
 				     !(ps2dev->flags & PS2_FLAG_CMD1), timeout);
 
-	if (ps2dev->cmdcnt && timeout > 0) {
+	if (ps2dev->cmdcnt && !(ps2dev->flags & PS2_FLAG_CMD1)) {
 
 		timeout = ps2_adjust_timeout(ps2dev, command, timeout);
 		wait_event_timeout(ps2dev->wait,
-- 
cgit v0.10.2


From 203350c1a8e23adf17fd9a96d8bfc7adf63c1ff6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 28 May 2009 14:51:00 +0100
Subject: ASoC: Initialise dev for the dummy S/PDIF DAI

Also include the header to make sure the DAI is prototyped.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/spdif_transciever.c b/sound/soc/codecs/spdif_transciever.c
index 118e976..218b33a 100644
--- a/sound/soc/codecs/spdif_transciever.c
+++ b/sound/soc/codecs/spdif_transciever.c
@@ -19,10 +19,11 @@
 #include <sound/soc.h>
 #include <sound/pcm.h>
 
+#include "spdif_transciever.h"
+
 #define STUB_RATES	SNDRV_PCM_RATE_8000_96000
 #define STUB_FORMATS	SNDRV_PCM_FMTBIT_S16_LE
 
-
 struct snd_soc_dai dit_stub_dai = {
 	.name		= "DIT",
 	.playback 	= {
@@ -36,6 +37,7 @@ struct snd_soc_dai dit_stub_dai = {
 
 static int spdif_dit_probe(struct platform_device *pdev)
 {
+	dit_stub_dai.dev = &pdev->dev;
 	return snd_soc_register_dai(&dit_stub_dai);
 }
 
-- 
cgit v0.10.2


From 8bd229492209c0c7d050e2f9a600c12f035d72f7 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Thu, 28 May 2009 10:56:16 -0700
Subject: OMAP2/3: PM: push core PM code from linux-omap

This patch is to sync the core linux-omap PM code with mainline.  This
code has evolved and been used for a while the linux-omap tree, but
the attempt here is to finally get this into mainline.

Following this will be a series of patches from the 'PM branch' of the
linux-omap tree to add full PM hardware support from the linux-omap
tree.

Much of this PM core code was written by Jouni Hogander with
significant contributions from Paul Walmsley as well as many others
from Nokia, Texas Instruments and linux-omap community.

Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Cc: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap1/pm.h b/arch/arm/mach-omap1/pm.h
new file mode 100644
index 0000000..ce6ee79
--- /dev/null
+++ b/arch/arm/mach-omap1/pm.h
@@ -0,0 +1,345 @@
+/*
+ * arch/arm/plat-omap/include/mach/pm.h
+ *
+ * Header file for OMAP Power Management Routines
+ *
+ * Author: MontaVista Software, Inc.
+ *	   support@mvista.com
+ *
+ * Copyright 2002 MontaVista Software Inc.
+ *
+ * Cleanup 2004 for Linux 2.6 by Dirk Behme <dirk.behme@de.bosch.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ASM_ARCH_OMAP_PM_H
+#define __ASM_ARCH_OMAP_PM_H
+
+/*
+ * ----------------------------------------------------------------------------
+ * Register and offset definitions to be used in PM assembler code
+ * ----------------------------------------------------------------------------
+ */
+#define CLKGEN_REG_ASM_BASE		IO_ADDRESS(0xfffece00)
+#define ARM_IDLECT1_ASM_OFFSET		0x04
+#define ARM_IDLECT2_ASM_OFFSET		0x08
+
+#define TCMIF_ASM_BASE			IO_ADDRESS(0xfffecc00)
+#define EMIFS_CONFIG_ASM_OFFSET		0x0c
+#define EMIFF_SDRAM_CONFIG_ASM_OFFSET	0x20
+
+/*
+ * ----------------------------------------------------------------------------
+ * Power management bitmasks
+ * ----------------------------------------------------------------------------
+ */
+#define IDLE_WAIT_CYCLES		0x00000fff
+#define PERIPHERAL_ENABLE		0x2
+
+#define SELF_REFRESH_MODE		0x0c000001
+#define IDLE_EMIFS_REQUEST		0xc
+#define MODEM_32K_EN			0x1
+#define PER_EN				0x1
+
+#define CPU_SUSPEND_SIZE		200
+#define ULPD_LOW_PWR_EN			0x0001
+#define ULPD_DEEP_SLEEP_TRANSITION_EN	0x0010
+#define ULPD_SETUP_ANALOG_CELL_3_VAL	0
+#define ULPD_POWER_CTRL_REG_VAL		0x0219
+
+#define DSP_IDLE_DELAY			10
+#define DSP_IDLE			0x0040
+#define DSP_RST				0x0004
+#define DSP_ENABLE			0x0002
+#define SUFFICIENT_DSP_RESET_TIME	1000
+#define DEFAULT_MPUI_CONFIG		0x05cf
+#define ENABLE_XORCLK			0x2
+#define DSP_CLOCK_ENABLE		0x2000
+#define DSP_IDLE_MODE			0x2
+#define TC_IDLE_REQUEST			(0x0000000c)
+
+#define IRQ_LEVEL2			(1<<0)
+#define IRQ_KEYBOARD			(1<<1)
+#define IRQ_UART2			(1<<15)
+
+#define PDE_BIT				0x08
+#define PWD_EN_BIT			0x04
+#define EN_PERCK_BIT			0x04
+
+#define OMAP1510_DEEP_SLEEP_REQUEST	0x0ec7
+#define OMAP1510_BIG_SLEEP_REQUEST	0x0cc5
+#define OMAP1510_IDLE_LOOP_REQUEST	0x0c00
+#define OMAP1510_IDLE_CLOCK_DOMAINS	0x2
+
+/* Both big sleep and deep sleep use same values. Difference is in ULPD. */
+#define OMAP1610_IDLECT1_SLEEP_VAL	0x13c7
+#define OMAP1610_IDLECT2_SLEEP_VAL	0x09c7
+#define OMAP1610_IDLECT3_VAL		0x3f
+#define OMAP1610_IDLECT3_SLEEP_ORMASK	0x2c
+#define OMAP1610_IDLECT3		0xfffece24
+#define OMAP1610_IDLE_LOOP_REQUEST	0x0400
+
+#define OMAP730_IDLECT1_SLEEP_VAL	0x16c7
+#define OMAP730_IDLECT2_SLEEP_VAL	0x09c7
+#define OMAP730_IDLECT3_VAL		0x3f
+#define OMAP730_IDLECT3		0xfffece24
+#define OMAP730_IDLE_LOOP_REQUEST	0x0C00
+
+#if     !defined(CONFIG_ARCH_OMAP730) && \
+	!defined(CONFIG_ARCH_OMAP15XX) && \
+	!defined(CONFIG_ARCH_OMAP16XX) && \
+	!defined(CONFIG_ARCH_OMAP24XX)
+#warning "Power management for this processor not implemented yet"
+#endif
+
+#ifndef __ASSEMBLER__
+
+#include <linux/clk.h>
+
+extern void prevent_idle_sleep(void);
+extern void allow_idle_sleep(void);
+
+extern void omap_pm_idle(void);
+extern void omap_pm_suspend(void);
+extern void omap730_cpu_suspend(unsigned short, unsigned short);
+extern void omap1510_cpu_suspend(unsigned short, unsigned short);
+extern void omap1610_cpu_suspend(unsigned short, unsigned short);
+extern void omap24xx_cpu_suspend(u32 dll_ctrl, void __iomem *sdrc_dlla_ctrl,
+					void __iomem *sdrc_power);
+extern void omap730_idle_loop_suspend(void);
+extern void omap1510_idle_loop_suspend(void);
+extern void omap1610_idle_loop_suspend(void);
+extern void omap24xx_idle_loop_suspend(void);
+
+extern unsigned int omap730_cpu_suspend_sz;
+extern unsigned int omap1510_cpu_suspend_sz;
+extern unsigned int omap1610_cpu_suspend_sz;
+extern unsigned int omap24xx_cpu_suspend_sz;
+extern unsigned int omap730_idle_loop_suspend_sz;
+extern unsigned int omap1510_idle_loop_suspend_sz;
+extern unsigned int omap1610_idle_loop_suspend_sz;
+extern unsigned int omap24xx_idle_loop_suspend_sz;
+
+#ifdef CONFIG_OMAP_SERIAL_WAKE
+extern void omap_serial_wake_trigger(int enable);
+#else
+#define omap_serial_wakeup_init()	{}
+#define omap_serial_wake_trigger(x)	{}
+#endif	/* CONFIG_OMAP_SERIAL_WAKE */
+
+#define ARM_SAVE(x) arm_sleep_save[ARM_SLEEP_SAVE_##x] = omap_readl(x)
+#define ARM_RESTORE(x) omap_writel((arm_sleep_save[ARM_SLEEP_SAVE_##x]), (x))
+#define ARM_SHOW(x) arm_sleep_save[ARM_SLEEP_SAVE_##x]
+
+#define DSP_SAVE(x) dsp_sleep_save[DSP_SLEEP_SAVE_##x] = __raw_readw(x)
+#define DSP_RESTORE(x) __raw_writew((dsp_sleep_save[DSP_SLEEP_SAVE_##x]), (x))
+#define DSP_SHOW(x) dsp_sleep_save[DSP_SLEEP_SAVE_##x]
+
+#define ULPD_SAVE(x) ulpd_sleep_save[ULPD_SLEEP_SAVE_##x] = omap_readw(x)
+#define ULPD_RESTORE(x) omap_writew((ulpd_sleep_save[ULPD_SLEEP_SAVE_##x]), (x))
+#define ULPD_SHOW(x) ulpd_sleep_save[ULPD_SLEEP_SAVE_##x]
+
+#define MPUI730_SAVE(x) mpui730_sleep_save[MPUI730_SLEEP_SAVE_##x] = omap_readl(x)
+#define MPUI730_RESTORE(x) omap_writel((mpui730_sleep_save[MPUI730_SLEEP_SAVE_##x]), (x))
+#define MPUI730_SHOW(x) mpui730_sleep_save[MPUI730_SLEEP_SAVE_##x]
+
+#define MPUI1510_SAVE(x) mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_##x] = omap_readl(x)
+#define MPUI1510_RESTORE(x) omap_writel((mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_##x]), (x))
+#define MPUI1510_SHOW(x) mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_##x]
+
+#define MPUI1610_SAVE(x) mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x] = omap_readl(x)
+#define MPUI1610_RESTORE(x) omap_writel((mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x]), (x))
+#define MPUI1610_SHOW(x) mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x]
+
+#define OMAP24XX_SAVE(x) omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x] = x
+#define OMAP24XX_RESTORE(x) x = omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x]
+#define OMAP24XX_SHOW(x) omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x]
+
+/*
+ * List of global OMAP registers to preserve.
+ * More ones like CP and general purpose register values are preserved
+ * with the stack pointer in sleep.S.
+ */
+
+enum arm_save_state {
+	ARM_SLEEP_SAVE_START = 0,
+	/*
+	 * MPU control registers 32 bits
+	 */
+	ARM_SLEEP_SAVE_ARM_CKCTL,
+	ARM_SLEEP_SAVE_ARM_IDLECT1,
+	ARM_SLEEP_SAVE_ARM_IDLECT2,
+	ARM_SLEEP_SAVE_ARM_IDLECT3,
+	ARM_SLEEP_SAVE_ARM_EWUPCT,
+	ARM_SLEEP_SAVE_ARM_RSTCT1,
+	ARM_SLEEP_SAVE_ARM_RSTCT2,
+	ARM_SLEEP_SAVE_ARM_SYSST,
+	ARM_SLEEP_SAVE_SIZE
+};
+
+enum dsp_save_state {
+	DSP_SLEEP_SAVE_START = 0,
+	/*
+	 * DSP registers 16 bits
+	 */
+	DSP_SLEEP_SAVE_DSP_IDLECT2,
+	DSP_SLEEP_SAVE_SIZE
+};
+
+enum ulpd_save_state {
+	ULPD_SLEEP_SAVE_START = 0,
+	/*
+	 * ULPD registers 16 bits
+	 */
+	ULPD_SLEEP_SAVE_ULPD_IT_STATUS,
+	ULPD_SLEEP_SAVE_ULPD_CLOCK_CTRL,
+	ULPD_SLEEP_SAVE_ULPD_SOFT_REQ,
+	ULPD_SLEEP_SAVE_ULPD_STATUS_REQ,
+	ULPD_SLEEP_SAVE_ULPD_DPLL_CTRL,
+	ULPD_SLEEP_SAVE_ULPD_POWER_CTRL,
+	ULPD_SLEEP_SAVE_SIZE
+};
+
+enum mpui1510_save_state {
+	MPUI1510_SLEEP_SAVE_START = 0,
+	/*
+	 * MPUI registers 32 bits
+	 */
+	MPUI1510_SLEEP_SAVE_MPUI_CTRL,
+	MPUI1510_SLEEP_SAVE_MPUI_DSP_BOOT_CONFIG,
+	MPUI1510_SLEEP_SAVE_MPUI_DSP_API_CONFIG,
+	MPUI1510_SLEEP_SAVE_MPUI_DSP_STATUS,
+	MPUI1510_SLEEP_SAVE_EMIFF_SDRAM_CONFIG,
+	MPUI1510_SLEEP_SAVE_EMIFS_CONFIG,
+	MPUI1510_SLEEP_SAVE_OMAP_IH1_MIR,
+	MPUI1510_SLEEP_SAVE_OMAP_IH2_MIR,
+#if defined(CONFIG_ARCH_OMAP15XX)
+	MPUI1510_SLEEP_SAVE_SIZE
+#else
+	MPUI1510_SLEEP_SAVE_SIZE = 0
+#endif
+};
+
+enum mpui730_save_state {
+	MPUI730_SLEEP_SAVE_START = 0,
+	/*
+	 * MPUI registers 32 bits
+	 */
+	MPUI730_SLEEP_SAVE_MPUI_CTRL,
+	MPUI730_SLEEP_SAVE_MPUI_DSP_BOOT_CONFIG,
+	MPUI730_SLEEP_SAVE_MPUI_DSP_API_CONFIG,
+	MPUI730_SLEEP_SAVE_MPUI_DSP_STATUS,
+	MPUI730_SLEEP_SAVE_EMIFF_SDRAM_CONFIG,
+	MPUI730_SLEEP_SAVE_EMIFS_CONFIG,
+	MPUI730_SLEEP_SAVE_OMAP_IH1_MIR,
+	MPUI730_SLEEP_SAVE_OMAP_IH2_0_MIR,
+	MPUI730_SLEEP_SAVE_OMAP_IH2_1_MIR,
+#if defined(CONFIG_ARCH_OMAP730)
+	MPUI730_SLEEP_SAVE_SIZE
+#else
+	MPUI730_SLEEP_SAVE_SIZE = 0
+#endif
+};
+
+enum mpui1610_save_state {
+	MPUI1610_SLEEP_SAVE_START = 0,
+	/*
+	 * MPUI registers 32 bits
+	 */
+	MPUI1610_SLEEP_SAVE_MPUI_CTRL,
+	MPUI1610_SLEEP_SAVE_MPUI_DSP_BOOT_CONFIG,
+	MPUI1610_SLEEP_SAVE_MPUI_DSP_API_CONFIG,
+	MPUI1610_SLEEP_SAVE_MPUI_DSP_STATUS,
+	MPUI1610_SLEEP_SAVE_EMIFF_SDRAM_CONFIG,
+	MPUI1610_SLEEP_SAVE_EMIFS_CONFIG,
+	MPUI1610_SLEEP_SAVE_OMAP_IH1_MIR,
+	MPUI1610_SLEEP_SAVE_OMAP_IH2_0_MIR,
+	MPUI1610_SLEEP_SAVE_OMAP_IH2_1_MIR,
+	MPUI1610_SLEEP_SAVE_OMAP_IH2_2_MIR,
+	MPUI1610_SLEEP_SAVE_OMAP_IH2_3_MIR,
+#if defined(CONFIG_ARCH_OMAP16XX)
+	MPUI1610_SLEEP_SAVE_SIZE
+#else
+	MPUI1610_SLEEP_SAVE_SIZE = 0
+#endif
+};
+
+enum omap24xx_save_state {
+	OMAP24XX_SLEEP_SAVE_START = 0,
+	OMAP24XX_SLEEP_SAVE_INTC_MIR0,
+	OMAP24XX_SLEEP_SAVE_INTC_MIR1,
+	OMAP24XX_SLEEP_SAVE_INTC_MIR2,
+
+	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_MPU,
+	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_GFX,
+	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_DSP,
+	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_MDM,
+
+	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_MPU,
+	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_CORE,
+	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_GFX,
+	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_DSP,
+	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_MDM,
+
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST1_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST2_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST3_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST4_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST_GFX,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST_WKUP,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST_CKGEN,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST_DSP,
+	OMAP24XX_SLEEP_SAVE_CM_IDLEST_MDM,
+
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE1_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE2_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE3_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE4_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_WKUP,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_PLL,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_DSP,
+	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_MDM,
+
+	OMAP24XX_SLEEP_SAVE_CM_FCLKEN1_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_FCLKEN2_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_ICLKEN1_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_ICLKEN2_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_ICLKEN3_CORE,
+	OMAP24XX_SLEEP_SAVE_CM_ICLKEN4_CORE,
+	OMAP24XX_SLEEP_SAVE_GPIO1_IRQENABLE1,
+	OMAP24XX_SLEEP_SAVE_GPIO2_IRQENABLE1,
+	OMAP24XX_SLEEP_SAVE_GPIO3_IRQENABLE1,
+	OMAP24XX_SLEEP_SAVE_GPIO4_IRQENABLE1,
+	OMAP24XX_SLEEP_SAVE_GPIO3_OE,
+	OMAP24XX_SLEEP_SAVE_GPIO4_OE,
+	OMAP24XX_SLEEP_SAVE_GPIO3_RISINGDETECT,
+	OMAP24XX_SLEEP_SAVE_GPIO3_FALLINGDETECT,
+	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SPI1_NCS2,
+	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_MCBSP1_DX,
+	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SSI1_FLAG_TX,
+	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SYS_NIRQW0,
+	OMAP24XX_SLEEP_SAVE_SIZE
+};
+
+#endif /* ASSEMBLER */
+#endif /* __ASM_ARCH_OMAP_PM_H */
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index bf3827a..6fd1c1f 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -25,8 +25,10 @@ obj-$(CONFIG_ARCH_OMAP2)		+= sdrc2xxx.o
 
 # Power Management
 ifeq ($(CONFIG_PM),y)
-obj-y					+= pm.o
+obj-$(CONFIG_ARCH_OMAP2)		+= pm24xx.o
 obj-$(CONFIG_ARCH_OMAP24XX)		+= sleep24xx.o
+obj-$(CONFIG_ARCH_OMAP3)		+= pm34xx.o sleep34xx.o
+obj-$(CONFIG_PM_DEBUG)			+= pm-debug.o
 endif
 
 # Clock framework
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
new file mode 100644
index 0000000..6cc375a
--- /dev/null
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -0,0 +1,152 @@
+/*
+ * OMAP Power Management debug routines
+ *
+ * Copyright (C) 2005 Texas Instruments, Inc.
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * Written by:
+ * Richard Woodruff <r-woodruff2@ti.com>
+ * Tony Lindgren
+ * Juha Yrjola
+ * Amit Kucheria <amit.kucheria@nokia.com>
+ * Igor Stoppa <igor.stoppa@nokia.com>
+ * Jouni Hogander
+ *
+ * Based on pm.c for omap2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <mach/clock.h>
+#include <mach/board.h>
+
+#include "prm.h"
+#include "cm.h"
+#include "pm.h"
+
+int omap2_pm_debug;
+
+#define DUMP_PRM_MOD_REG(mod, reg)    \
+	regs[reg_count].name = #mod "." #reg; \
+	regs[reg_count++].val = prm_read_mod_reg(mod, reg)
+#define DUMP_CM_MOD_REG(mod, reg)     \
+	regs[reg_count].name = #mod "." #reg; \
+	regs[reg_count++].val = cm_read_mod_reg(mod, reg)
+#define DUMP_PRM_REG(reg) \
+	regs[reg_count].name = #reg; \
+	regs[reg_count++].val = __raw_readl(reg)
+#define DUMP_CM_REG(reg) \
+	regs[reg_count].name = #reg; \
+	regs[reg_count++].val = __raw_readl(reg)
+#define DUMP_INTC_REG(reg, off) \
+	regs[reg_count].name = #reg; \
+	regs[reg_count++].val = __raw_readl(IO_ADDRESS(0x480fe000 + (off)))
+
+void omap2_pm_dump(int mode, int resume, unsigned int us)
+{
+	struct reg {
+		const char *name;
+		u32 val;
+	} regs[32];
+	int reg_count = 0, i;
+	const char *s1 = NULL, *s2 = NULL;
+
+	if (!resume) {
+#if 0
+		/* MPU */
+		DUMP_PRM_MOD_REG(OCP_MOD, OMAP2_PRM_IRQENABLE_MPU_OFFSET);
+		DUMP_CM_MOD_REG(MPU_MOD, CM_CLKSTCTRL);
+		DUMP_PRM_MOD_REG(MPU_MOD, PM_PWSTCTRL);
+		DUMP_PRM_MOD_REG(MPU_MOD, PM_PWSTST);
+		DUMP_PRM_MOD_REG(MPU_MOD, PM_WKDEP);
+#endif
+#if 0
+		/* INTC */
+		DUMP_INTC_REG(INTC_MIR0, 0x0084);
+		DUMP_INTC_REG(INTC_MIR1, 0x00a4);
+		DUMP_INTC_REG(INTC_MIR2, 0x00c4);
+#endif
+#if 0
+		DUMP_CM_MOD_REG(CORE_MOD, CM_FCLKEN1);
+		if (cpu_is_omap24xx()) {
+			DUMP_CM_MOD_REG(CORE_MOD, OMAP24XX_CM_FCLKEN2);
+			DUMP_PRM_MOD_REG(OMAP24XX_GR_MOD,
+					OMAP2_PRCM_CLKEMUL_CTRL_OFFSET);
+			DUMP_PRM_MOD_REG(OMAP24XX_GR_MOD,
+					OMAP2_PRCM_CLKSRC_CTRL_OFFSET);
+		}
+		DUMP_CM_MOD_REG(WKUP_MOD, CM_FCLKEN);
+		DUMP_CM_MOD_REG(CORE_MOD, CM_ICLKEN1);
+		DUMP_CM_MOD_REG(CORE_MOD, CM_ICLKEN2);
+		DUMP_CM_MOD_REG(WKUP_MOD, CM_ICLKEN);
+		DUMP_CM_MOD_REG(PLL_MOD, CM_CLKEN);
+		DUMP_CM_MOD_REG(PLL_MOD, CM_AUTOIDLE);
+		DUMP_PRM_MOD_REG(CORE_MOD, PM_PWSTST);
+#endif
+#if 0
+		/* DSP */
+		if (cpu_is_omap24xx()) {
+			DUMP_CM_MOD_REG(OMAP24XX_DSP_MOD, CM_FCLKEN);
+			DUMP_CM_MOD_REG(OMAP24XX_DSP_MOD, CM_ICLKEN);
+			DUMP_CM_MOD_REG(OMAP24XX_DSP_MOD, CM_IDLEST);
+			DUMP_CM_MOD_REG(OMAP24XX_DSP_MOD, CM_AUTOIDLE);
+			DUMP_CM_MOD_REG(OMAP24XX_DSP_MOD, CM_CLKSEL);
+			DUMP_CM_MOD_REG(OMAP24XX_DSP_MOD, CM_CLKSTCTRL);
+			DUMP_PRM_MOD_REG(OMAP24XX_DSP_MOD, RM_RSTCTRL);
+			DUMP_PRM_MOD_REG(OMAP24XX_DSP_MOD, RM_RSTST);
+			DUMP_PRM_MOD_REG(OMAP24XX_DSP_MOD, PM_PWSTCTRL);
+			DUMP_PRM_MOD_REG(OMAP24XX_DSP_MOD, PM_PWSTST);
+		}
+#endif
+	} else {
+		DUMP_PRM_MOD_REG(CORE_MOD, PM_WKST1);
+		if (cpu_is_omap24xx())
+			DUMP_PRM_MOD_REG(CORE_MOD, OMAP24XX_PM_WKST2);
+		DUMP_PRM_MOD_REG(WKUP_MOD, PM_WKST);
+		DUMP_PRM_MOD_REG(OCP_MOD, OMAP2_PRCM_IRQSTATUS_MPU_OFFSET);
+#if 1
+		DUMP_INTC_REG(INTC_PENDING_IRQ0, 0x0098);
+		DUMP_INTC_REG(INTC_PENDING_IRQ1, 0x00b8);
+		DUMP_INTC_REG(INTC_PENDING_IRQ2, 0x00d8);
+#endif
+	}
+
+	switch (mode) {
+	case 0:
+		s1 = "full";
+		s2 = "retention";
+		break;
+	case 1:
+		s1 = "MPU";
+		s2 = "retention";
+		break;
+	case 2:
+		s1 = "MPU";
+		s2 = "idle";
+		break;
+	}
+
+	if (!resume)
+#ifdef CONFIG_NO_HZ
+		printk(KERN_INFO
+		       "--- Going to %s %s (next timer after %u ms)\n", s1, s2,
+		       jiffies_to_msecs(get_next_timer_interrupt(jiffies) -
+					jiffies));
+#else
+		printk(KERN_INFO "--- Going to %s %s\n", s1, s2);
+#endif
+	else
+		printk(KERN_INFO "--- Woke up (slept for %u.%03u ms)\n",
+			us / 1000, us % 1000);
+
+	for (i = 0; i < reg_count; i++)
+		printk(KERN_INFO "%-20s: 0x%08x\n", regs[i].name, regs[i].val);
+}
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
deleted file mode 100644
index ea8ceae..0000000
--- a/arch/arm/mach-omap2/pm.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * linux/arch/arm/mach-omap2/pm.c
- *
- * OMAP2 Power Management Routines
- *
- * Copyright (C) 2006 Nokia Corporation
- * Tony Lindgren <tony@atomide.com>
- *
- * Copyright (C) 2005 Texas Instruments, Inc.
- * Richard Woodruff <r-woodruff2@ti.com>
- *
- * Based on pm.c for omap1
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/suspend.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-#include <linux/interrupt.h>
-#include <linux/sysfs.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-
-#include <asm/irq.h>
-#include <asm/atomic.h>
-#include <asm/mach/time.h>
-#include <asm/mach/irq.h>
-
-#include <mach/irqs.h>
-#include <mach/clock.h>
-#include <mach/sram.h>
-#include <mach/pm.h>
-
-static struct clk *vclk;
-static void (*omap2_sram_idle)(void);
-static void (*omap2_sram_suspend)(int dllctrl, int cpu_rev);
-static void (*saved_idle)(void);
-
-extern void __init pmdomain_init(void);
-extern void pmdomain_set_autoidle(void);
-
-static unsigned int omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_SIZE];
-
-void omap2_pm_idle(void)
-{
-	local_irq_disable();
-	local_fiq_disable();
-	if (need_resched()) {
-		local_fiq_enable();
-		local_irq_enable();
-		return;
-	}
-
-	omap2_sram_idle();
-	local_fiq_enable();
-	local_irq_enable();
-}
-
-static int omap2_pm_prepare(void)
-{
-	/* We cannot sleep in idle until we have resumed */
-	saved_idle = pm_idle;
-	pm_idle = NULL;
-	return 0;
-}
-
-static int omap2_pm_suspend(void)
-{
-	return 0;
-}
-
-static int omap2_pm_enter(suspend_state_t state)
-{
-	int ret = 0;
-
-	switch (state)
-	{
-	case PM_SUSPEND_STANDBY:
-	case PM_SUSPEND_MEM:
-		ret = omap2_pm_suspend();
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static void omap2_pm_finish(void)
-{
-	pm_idle = saved_idle;
-}
-
-static struct platform_suspend_ops omap_pm_ops = {
-	.prepare	= omap2_pm_prepare,
-	.enter		= omap2_pm_enter,
-	.finish		= omap2_pm_finish,
-	.valid		= suspend_valid_only_mem,
-};
-
-static int __init omap2_pm_init(void)
-{
-	return 0;
-}
-
-__initcall(omap2_pm_init);
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
new file mode 100644
index 0000000..f7b3baf
--- /dev/null
+++ b/arch/arm/mach-omap2/pm.h
@@ -0,0 +1,38 @@
+/*
+ * OMAP2/3 Power Management Routines
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Jouni Hogander
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ARCH_ARM_MACH_OMAP2_PM_H
+#define __ARCH_ARM_MACH_OMAP2_PM_H
+
+extern int omap2_pm_init(void);
+extern int omap3_pm_init(void);
+
+#ifdef CONFIG_PM_DEBUG
+extern void omap2_pm_dump(int mode, int resume, unsigned int us);
+extern int omap2_pm_debug;
+#else
+#define omap2_pm_dump(mode, resume, us)		do {} while (0);
+#define omap2_pm_debug				0
+#endif /* CONFIG_PM_DEBUG */
+
+extern void omap24xx_idle_loop_suspend(void);
+
+extern void omap24xx_cpu_suspend(u32 dll_ctrl, void __iomem *sdrc_dlla_ctrl,
+					void __iomem *sdrc_power);
+extern void omap34xx_cpu_suspend(u32 *addr, int save_state);
+extern void save_secure_ram_context(u32 *addr);
+
+extern unsigned int omap24xx_idle_loop_suspend_sz;
+extern unsigned int omap34xx_suspend_sz;
+extern unsigned int save_secure_ram_context_sz;
+extern unsigned int omap24xx_cpu_suspend_sz;
+extern unsigned int omap34xx_cpu_suspend_sz;
+
+#endif
diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
new file mode 100644
index 0000000..232b9f6
--- /dev/null
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -0,0 +1,548 @@
+/*
+ * OMAP2 Power Management Routines
+ *
+ * Copyright (C) 2005 Texas Instruments, Inc.
+ * Copyright (C) 2006-2008 Nokia Corporation
+ *
+ * Written by:
+ * Richard Woodruff <r-woodruff2@ti.com>
+ * Tony Lindgren
+ * Juha Yrjola
+ * Amit Kucheria <amit.kucheria@nokia.com>
+ * Igor Stoppa <igor.stoppa@nokia.com>
+ *
+ * Based on pm.c for omap1
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/time.h>
+#include <linux/gpio.h>
+
+#include <asm/mach/time.h>
+#include <asm/mach/irq.h>
+#include <asm/mach-types.h>
+
+#include <mach/irqs.h>
+#include <mach/clock.h>
+#include <mach/sram.h>
+#include <mach/control.h>
+#include <mach/mux.h>
+#include <mach/dma.h>
+#include <mach/board.h>
+
+#include "prm.h"
+#include "prm-regbits-24xx.h"
+#include "cm.h"
+#include "cm-regbits-24xx.h"
+#include "sdrc.h"
+#include "pm.h"
+
+#include <mach/powerdomain.h>
+#include <mach/clockdomain.h>
+
+static void (*omap2_sram_idle)(void);
+static void (*omap2_sram_suspend)(u32 dllctrl, void __iomem *sdrc_dlla_ctrl,
+				  void __iomem *sdrc_power);
+
+static struct powerdomain *mpu_pwrdm;
+static struct powerdomain *core_pwrdm;
+
+static struct clockdomain *dsp_clkdm;
+static struct clockdomain *gfx_clkdm;
+
+static struct clk *osc_ck, *emul_ck;
+
+static int omap2_fclks_active(void)
+{
+	u32 f1, f2;
+
+	f1 = cm_read_mod_reg(CORE_MOD, CM_FCLKEN1);
+	f2 = cm_read_mod_reg(CORE_MOD, OMAP24XX_CM_FCLKEN2);
+	if (f1 | f2)
+		return 1;
+	return 0;
+}
+
+static int omap2_irq_pending(void)
+{
+	u32 pending_reg = 0x480fe098;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		if (omap_readl(pending_reg))
+			return 1;
+		pending_reg += 0x20;
+	}
+	return 0;
+}
+
+static void omap2_enter_full_retention(void)
+{
+	u32 l;
+	struct timespec ts_preidle, ts_postidle, ts_idle;
+
+	/* There is 1 reference hold for all children of the oscillator
+	 * clock, the following will remove it. If no one else uses the
+	 * oscillator itself it will be disabled if/when we enter retention
+	 * mode.
+	 */
+	clk_disable(osc_ck);
+
+	/* Clear old wake-up events */
+	/* REVISIT: These write to reserved bits? */
+	prm_write_mod_reg(0xffffffff, CORE_MOD, PM_WKST1);
+	prm_write_mod_reg(0xffffffff, CORE_MOD, OMAP24XX_PM_WKST2);
+	prm_write_mod_reg(0xffffffff, WKUP_MOD, PM_WKST);
+
+	/*
+	 * Set MPU powerdomain's next power state to RETENTION;
+	 * preserve logic state during retention
+	 */
+	pwrdm_set_logic_retst(mpu_pwrdm, PWRDM_POWER_RET);
+	pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_RET);
+
+	/* Workaround to kill USB */
+	l = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0) | OMAP24XX_USBSTANDBYCTRL;
+	omap_ctrl_writel(l, OMAP2_CONTROL_DEVCONF0);
+
+	omap2_gpio_prepare_for_retention();
+
+	if (omap2_pm_debug) {
+		omap2_pm_dump(0, 0, 0);
+		getnstimeofday(&ts_preidle);
+	}
+
+	/* One last check for pending IRQs to avoid extra latency due
+	 * to sleeping unnecessarily. */
+	if (omap2_irq_pending())
+		goto no_sleep;
+
+	/* Jump to SRAM suspend code */
+	omap2_sram_suspend(sdrc_read_reg(SDRC_DLLA_CTRL),
+			   OMAP_SDRC_REGADDR(SDRC_DLLA_CTRL),
+			   OMAP_SDRC_REGADDR(SDRC_POWER));
+no_sleep:
+
+	if (omap2_pm_debug) {
+		unsigned long long tmp;
+
+		getnstimeofday(&ts_postidle);
+		ts_idle = timespec_sub(ts_postidle, ts_preidle);
+		tmp = timespec_to_ns(&ts_idle) * NSEC_PER_USEC;
+		omap2_pm_dump(0, 1, tmp);
+	}
+	omap2_gpio_resume_after_retention();
+
+	clk_enable(osc_ck);
+
+	/* clear CORE wake-up events */
+	prm_write_mod_reg(0xffffffff, CORE_MOD, PM_WKST1);
+	prm_write_mod_reg(0xffffffff, CORE_MOD, OMAP24XX_PM_WKST2);
+
+	/* wakeup domain events - bit 1: GPT1, bit5 GPIO */
+	prm_clear_mod_reg_bits(0x4 | 0x1, WKUP_MOD, PM_WKST);
+
+	/* MPU domain wake events */
+	l = prm_read_mod_reg(OCP_MOD, OMAP2_PRCM_IRQSTATUS_MPU_OFFSET);
+	if (l & 0x01)
+		prm_write_mod_reg(0x01, OCP_MOD,
+				  OMAP2_PRCM_IRQSTATUS_MPU_OFFSET);
+	if (l & 0x20)
+		prm_write_mod_reg(0x20, OCP_MOD,
+				  OMAP2_PRCM_IRQSTATUS_MPU_OFFSET);
+
+	/* Mask future PRCM-to-MPU interrupts */
+	prm_write_mod_reg(0x0, OCP_MOD, OMAP2_PRCM_IRQSTATUS_MPU_OFFSET);
+}
+
+static int omap2_i2c_active(void)
+{
+	u32 l;
+
+	l = cm_read_mod_reg(CORE_MOD, CM_FCLKEN1);
+	return l & (OMAP2420_EN_I2C2 | OMAP2420_EN_I2C1);
+}
+
+static int sti_console_enabled;
+
+static int omap2_allow_mpu_retention(void)
+{
+	u32 l;
+
+	/* Check for MMC, UART2, UART1, McSPI2, McSPI1 and DSS1. */
+	l = cm_read_mod_reg(CORE_MOD, CM_FCLKEN1);
+	if (l & (OMAP2420_EN_MMC | OMAP24XX_EN_UART2 |
+		 OMAP24XX_EN_UART1 | OMAP24XX_EN_MCSPI2 |
+		 OMAP24XX_EN_MCSPI1 | OMAP24XX_EN_DSS1))
+		return 0;
+	/* Check for UART3. */
+	l = cm_read_mod_reg(CORE_MOD, OMAP24XX_CM_FCLKEN2);
+	if (l & OMAP24XX_EN_UART3)
+		return 0;
+	if (sti_console_enabled)
+		return 0;
+
+	return 1;
+}
+
+static void omap2_enter_mpu_retention(void)
+{
+	int only_idle = 0;
+	struct timespec ts_preidle, ts_postidle, ts_idle;
+
+	/* Putting MPU into the WFI state while a transfer is active
+	 * seems to cause the I2C block to timeout. Why? Good question. */
+	if (omap2_i2c_active())
+		return;
+
+	/* The peripherals seem not to be able to wake up the MPU when
+	 * it is in retention mode. */
+	if (omap2_allow_mpu_retention()) {
+		/* REVISIT: These write to reserved bits? */
+		prm_write_mod_reg(0xffffffff, CORE_MOD, PM_WKST1);
+		prm_write_mod_reg(0xffffffff, CORE_MOD, OMAP24XX_PM_WKST2);
+		prm_write_mod_reg(0xffffffff, WKUP_MOD, PM_WKST);
+
+		/* Try to enter MPU retention */
+		prm_write_mod_reg((0x01 << OMAP_POWERSTATE_SHIFT) |
+				  OMAP_LOGICRETSTATE,
+				  MPU_MOD, PM_PWSTCTRL);
+	} else {
+		/* Block MPU retention */
+
+		prm_write_mod_reg(OMAP_LOGICRETSTATE, MPU_MOD, PM_PWSTCTRL);
+		only_idle = 1;
+	}
+
+	if (omap2_pm_debug) {
+		omap2_pm_dump(only_idle ? 2 : 1, 0, 0);
+		getnstimeofday(&ts_preidle);
+	}
+
+	omap2_sram_idle();
+
+	if (omap2_pm_debug) {
+		unsigned long long tmp;
+
+		getnstimeofday(&ts_postidle);
+		ts_idle = timespec_sub(ts_postidle, ts_preidle);
+		tmp = timespec_to_ns(&ts_idle) * NSEC_PER_USEC;
+		omap2_pm_dump(only_idle ? 2 : 1, 1, tmp);
+	}
+}
+
+static int omap2_can_sleep(void)
+{
+	if (omap2_fclks_active())
+		return 0;
+	if (osc_ck->usecount > 1)
+		return 0;
+	if (omap_dma_running())
+		return 0;
+
+	return 1;
+}
+
+static void omap2_pm_idle(void)
+{
+	local_irq_disable();
+	local_fiq_disable();
+
+	if (!omap2_can_sleep()) {
+		if (omap2_irq_pending())
+			goto out;
+		omap2_enter_mpu_retention();
+		goto out;
+	}
+
+	if (omap2_irq_pending())
+		goto out;
+
+	omap2_enter_full_retention();
+
+out:
+	local_fiq_enable();
+	local_irq_enable();
+}
+
+static int omap2_pm_prepare(void)
+{
+	/* We cannot sleep in idle until we have resumed */
+	disable_hlt();
+	return 0;
+}
+
+static int omap2_pm_suspend(void)
+{
+	u32 wken_wkup, mir1;
+
+	wken_wkup = prm_read_mod_reg(WKUP_MOD, PM_WKEN);
+	prm_write_mod_reg(wken_wkup & ~OMAP24XX_EN_GPT1, WKUP_MOD, PM_WKEN);
+
+	/* Mask GPT1 */
+	mir1 = omap_readl(0x480fe0a4);
+	omap_writel(1 << 5, 0x480fe0ac);
+
+	omap2_enter_full_retention();
+
+	omap_writel(mir1, 0x480fe0a4);
+	prm_write_mod_reg(wken_wkup, WKUP_MOD, PM_WKEN);
+
+	return 0;
+}
+
+static int omap2_pm_enter(suspend_state_t state)
+{
+	int ret = 0;
+
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		ret = omap2_pm_suspend();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void omap2_pm_finish(void)
+{
+	enable_hlt();
+}
+
+static struct platform_suspend_ops omap_pm_ops = {
+	.prepare	= omap2_pm_prepare,
+	.enter		= omap2_pm_enter,
+	.finish		= omap2_pm_finish,
+	.valid		= suspend_valid_only_mem,
+};
+
+static int _pm_clkdm_enable_hwsup(struct clockdomain *clkdm)
+{
+	omap2_clkdm_allow_idle(clkdm);
+	return 0;
+}
+
+static void __init prcm_setup_regs(void)
+{
+	int i, num_mem_banks;
+	struct powerdomain *pwrdm;
+
+	/* Enable autoidle */
+	prm_write_mod_reg(OMAP24XX_AUTOIDLE, OCP_MOD,
+			  OMAP2_PRCM_SYSCONFIG_OFFSET);
+
+	/* Set all domain wakeup dependencies */
+	prm_write_mod_reg(OMAP_EN_WKUP_MASK, MPU_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, OMAP24XX_DSP_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, GFX_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, CORE_MOD, PM_WKDEP);
+	if (cpu_is_omap2430())
+		prm_write_mod_reg(0, OMAP2430_MDM_MOD, PM_WKDEP);
+
+	/*
+	 * Set CORE powerdomain memory banks to retain their contents
+	 * during RETENTION
+	 */
+	num_mem_banks = pwrdm_get_mem_bank_count(core_pwrdm);
+	for (i = 0; i < num_mem_banks; i++)
+		pwrdm_set_mem_retst(core_pwrdm, i, PWRDM_POWER_RET);
+
+	/* Set CORE powerdomain's next power state to RETENTION */
+	pwrdm_set_next_pwrst(core_pwrdm, PWRDM_POWER_RET);
+
+	/*
+	 * Set MPU powerdomain's next power state to RETENTION;
+	 * preserve logic state during retention
+	 */
+	pwrdm_set_logic_retst(mpu_pwrdm, PWRDM_POWER_RET);
+	pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_RET);
+
+	/* Force-power down DSP, GFX powerdomains */
+
+	pwrdm = clkdm_get_pwrdm(dsp_clkdm);
+	pwrdm_set_next_pwrst(pwrdm, PWRDM_POWER_OFF);
+	omap2_clkdm_sleep(dsp_clkdm);
+
+	pwrdm = clkdm_get_pwrdm(gfx_clkdm);
+	pwrdm_set_next_pwrst(pwrdm, PWRDM_POWER_OFF);
+	omap2_clkdm_sleep(gfx_clkdm);
+
+	/* Enable clockdomain hardware-supervised control for all clkdms */
+	clkdm_for_each(_pm_clkdm_enable_hwsup);
+
+	/* Enable clock autoidle for all domains */
+	cm_write_mod_reg(OMAP24XX_AUTO_CAM |
+			 OMAP24XX_AUTO_MAILBOXES |
+			 OMAP24XX_AUTO_WDT4 |
+			 OMAP2420_AUTO_WDT3 |
+			 OMAP24XX_AUTO_MSPRO |
+			 OMAP2420_AUTO_MMC |
+			 OMAP24XX_AUTO_FAC |
+			 OMAP2420_AUTO_EAC |
+			 OMAP24XX_AUTO_HDQ |
+			 OMAP24XX_AUTO_UART2 |
+			 OMAP24XX_AUTO_UART1 |
+			 OMAP24XX_AUTO_I2C2 |
+			 OMAP24XX_AUTO_I2C1 |
+			 OMAP24XX_AUTO_MCSPI2 |
+			 OMAP24XX_AUTO_MCSPI1 |
+			 OMAP24XX_AUTO_MCBSP2 |
+			 OMAP24XX_AUTO_MCBSP1 |
+			 OMAP24XX_AUTO_GPT12 |
+			 OMAP24XX_AUTO_GPT11 |
+			 OMAP24XX_AUTO_GPT10 |
+			 OMAP24XX_AUTO_GPT9 |
+			 OMAP24XX_AUTO_GPT8 |
+			 OMAP24XX_AUTO_GPT7 |
+			 OMAP24XX_AUTO_GPT6 |
+			 OMAP24XX_AUTO_GPT5 |
+			 OMAP24XX_AUTO_GPT4 |
+			 OMAP24XX_AUTO_GPT3 |
+			 OMAP24XX_AUTO_GPT2 |
+			 OMAP2420_AUTO_VLYNQ |
+			 OMAP24XX_AUTO_DSS,
+			 CORE_MOD, CM_AUTOIDLE1);
+	cm_write_mod_reg(OMAP24XX_AUTO_UART3 |
+			 OMAP24XX_AUTO_SSI |
+			 OMAP24XX_AUTO_USB,
+			 CORE_MOD, CM_AUTOIDLE2);
+	cm_write_mod_reg(OMAP24XX_AUTO_SDRC |
+			 OMAP24XX_AUTO_GPMC |
+			 OMAP24XX_AUTO_SDMA,
+			 CORE_MOD, CM_AUTOIDLE3);
+	cm_write_mod_reg(OMAP24XX_AUTO_PKA |
+			 OMAP24XX_AUTO_AES |
+			 OMAP24XX_AUTO_RNG |
+			 OMAP24XX_AUTO_SHA |
+			 OMAP24XX_AUTO_DES,
+			 CORE_MOD, OMAP24XX_CM_AUTOIDLE4);
+
+	cm_write_mod_reg(OMAP2420_AUTO_DSP_IPI, OMAP24XX_DSP_MOD, CM_AUTOIDLE);
+
+	/* Put DPLL and both APLLs into autoidle mode */
+	cm_write_mod_reg((0x03 << OMAP24XX_AUTO_DPLL_SHIFT) |
+			 (0x03 << OMAP24XX_AUTO_96M_SHIFT) |
+			 (0x03 << OMAP24XX_AUTO_54M_SHIFT),
+			 PLL_MOD, CM_AUTOIDLE);
+
+	cm_write_mod_reg(OMAP24XX_AUTO_OMAPCTRL |
+			 OMAP24XX_AUTO_WDT1 |
+			 OMAP24XX_AUTO_MPU_WDT |
+			 OMAP24XX_AUTO_GPIOS |
+			 OMAP24XX_AUTO_32KSYNC |
+			 OMAP24XX_AUTO_GPT1,
+			 WKUP_MOD, CM_AUTOIDLE);
+
+	/* REVISIT: Configure number of 32 kHz clock cycles for sys_clk
+	 * stabilisation */
+	prm_write_mod_reg(15 << OMAP_SETUP_TIME_SHIFT, OMAP24XX_GR_MOD,
+			  OMAP2_PRCM_CLKSSETUP_OFFSET);
+
+	/* Configure automatic voltage transition */
+	prm_write_mod_reg(2 << OMAP_SETUP_TIME_SHIFT, OMAP24XX_GR_MOD,
+			  OMAP2_PRCM_VOLTSETUP_OFFSET);
+	prm_write_mod_reg(OMAP24XX_AUTO_EXTVOLT |
+			  (0x1 << OMAP24XX_SETOFF_LEVEL_SHIFT) |
+			  OMAP24XX_MEMRETCTRL |
+			  (0x1 << OMAP24XX_SETRET_LEVEL_SHIFT) |
+			  (0x0 << OMAP24XX_VOLT_LEVEL_SHIFT),
+			  OMAP24XX_GR_MOD, OMAP2_PRCM_VOLTCTRL_OFFSET);
+
+	/* Enable wake-up events */
+	prm_write_mod_reg(OMAP24XX_EN_GPIOS | OMAP24XX_EN_GPT1,
+			  WKUP_MOD, PM_WKEN);
+}
+
+int __init omap2_pm_init(void)
+{
+	u32 l;
+
+	if (!cpu_is_omap24xx())
+		return -ENODEV;
+
+	printk(KERN_INFO "Power Management for OMAP2 initializing\n");
+	l = prm_read_mod_reg(OCP_MOD, OMAP2_PRCM_REVISION_OFFSET);
+	printk(KERN_INFO "PRCM revision %d.%d\n", (l >> 4) & 0x0f, l & 0x0f);
+
+	/* Look up important powerdomains, clockdomains */
+
+	mpu_pwrdm = pwrdm_lookup("mpu_pwrdm");
+	if (!mpu_pwrdm)
+		pr_err("PM: mpu_pwrdm not found\n");
+
+	core_pwrdm = pwrdm_lookup("core_pwrdm");
+	if (!core_pwrdm)
+		pr_err("PM: core_pwrdm not found\n");
+
+	dsp_clkdm = clkdm_lookup("dsp_clkdm");
+	if (!dsp_clkdm)
+		pr_err("PM: mpu_clkdm not found\n");
+
+	gfx_clkdm = clkdm_lookup("gfx_clkdm");
+	if (!gfx_clkdm)
+		pr_err("PM: gfx_clkdm not found\n");
+
+
+	osc_ck = clk_get(NULL, "osc_ck");
+	if (IS_ERR(osc_ck)) {
+		printk(KERN_ERR "could not get osc_ck\n");
+		return -ENODEV;
+	}
+
+	if (cpu_is_omap242x()) {
+		emul_ck = clk_get(NULL, "emul_ck");
+		if (IS_ERR(emul_ck)) {
+			printk(KERN_ERR "could not get emul_ck\n");
+			clk_put(osc_ck);
+			return -ENODEV;
+		}
+	}
+
+	prcm_setup_regs();
+
+	/* Hack to prevent MPU retention when STI console is enabled. */
+	{
+		const struct omap_sti_console_config *sti;
+
+		sti = omap_get_config(OMAP_TAG_STI_CONSOLE,
+				      struct omap_sti_console_config);
+		if (sti != NULL && sti->enable)
+			sti_console_enabled = 1;
+	}
+
+	/*
+	 * We copy the assembler sleep/wakeup routines to SRAM.
+	 * These routines need to be in SRAM as that's the only
+	 * memory the MPU can see when it wakes up.
+	 */
+	if (cpu_is_omap24xx()) {
+		omap2_sram_idle = omap_sram_push(omap24xx_idle_loop_suspend,
+						 omap24xx_idle_loop_suspend_sz);
+
+		omap2_sram_suspend = omap_sram_push(omap24xx_cpu_suspend,
+						    omap24xx_cpu_suspend_sz);
+	}
+
+	suspend_set_ops(&omap_pm_ops);
+	pm_idle = omap2_pm_idle;
+
+	return 0;
+}
+
+late_initcall(omap2_pm_init);
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
new file mode 100644
index 0000000..4474947
--- /dev/null
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -0,0 +1,606 @@
+/*
+ * OMAP3 Power Management Routines
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation
+ * Tony Lindgren <tony@atomide.com>
+ * Jouni Hogander
+ *
+ * Copyright (C) 2005 Texas Instruments, Inc.
+ * Richard Woodruff <r-woodruff2@ti.com>
+ *
+ * Based on pm.c for omap1
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/pm.h>
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+
+#include <mach/sram.h>
+#include <mach/clockdomain.h>
+#include <mach/powerdomain.h>
+#include <mach/control.h>
+
+#include "cm.h"
+#include "cm-regbits-34xx.h"
+#include "prm-regbits-34xx.h"
+
+#include "prm.h"
+#include "pm.h"
+
+struct power_state {
+	struct powerdomain *pwrdm;
+	u32 next_state;
+	u32 saved_state;
+	struct list_head node;
+};
+
+static LIST_HEAD(pwrst_list);
+
+static void (*_omap_sram_idle)(u32 *addr, int save_state);
+
+static struct powerdomain *mpu_pwrdm;
+
+/* PRCM Interrupt Handler for wakeups */
+static irqreturn_t prcm_interrupt_handler (int irq, void *dev_id)
+{
+	u32 wkst, irqstatus_mpu;
+	u32 fclk, iclk;
+
+	/* WKUP */
+	wkst = prm_read_mod_reg(WKUP_MOD, PM_WKST);
+	if (wkst) {
+		iclk = cm_read_mod_reg(WKUP_MOD, CM_ICLKEN);
+		fclk = cm_read_mod_reg(WKUP_MOD, CM_FCLKEN);
+		cm_set_mod_reg_bits(wkst, WKUP_MOD, CM_ICLKEN);
+		cm_set_mod_reg_bits(wkst, WKUP_MOD, CM_FCLKEN);
+		prm_write_mod_reg(wkst, WKUP_MOD, PM_WKST);
+		while (prm_read_mod_reg(WKUP_MOD, PM_WKST))
+			cpu_relax();
+		cm_write_mod_reg(iclk, WKUP_MOD, CM_ICLKEN);
+		cm_write_mod_reg(fclk, WKUP_MOD, CM_FCLKEN);
+	}
+
+	/* CORE */
+	wkst = prm_read_mod_reg(CORE_MOD, PM_WKST1);
+	if (wkst) {
+		iclk = cm_read_mod_reg(CORE_MOD, CM_ICLKEN1);
+		fclk = cm_read_mod_reg(CORE_MOD, CM_FCLKEN1);
+		cm_set_mod_reg_bits(wkst, CORE_MOD, CM_ICLKEN1);
+		cm_set_mod_reg_bits(wkst, CORE_MOD, CM_FCLKEN1);
+		prm_write_mod_reg(wkst, CORE_MOD, PM_WKST1);
+		while (prm_read_mod_reg(CORE_MOD, PM_WKST1))
+			cpu_relax();
+		cm_write_mod_reg(iclk, CORE_MOD, CM_ICLKEN1);
+		cm_write_mod_reg(fclk, CORE_MOD, CM_FCLKEN1);
+	}
+	wkst = prm_read_mod_reg(CORE_MOD, OMAP3430ES2_PM_WKST3);
+	if (wkst) {
+		iclk = cm_read_mod_reg(CORE_MOD, CM_ICLKEN3);
+		fclk = cm_read_mod_reg(CORE_MOD, OMAP3430ES2_CM_FCLKEN3);
+		cm_set_mod_reg_bits(wkst, CORE_MOD, CM_ICLKEN3);
+		cm_set_mod_reg_bits(wkst, CORE_MOD, OMAP3430ES2_CM_FCLKEN3);
+		prm_write_mod_reg(wkst, CORE_MOD, OMAP3430ES2_PM_WKST3);
+		while (prm_read_mod_reg(CORE_MOD, OMAP3430ES2_PM_WKST3))
+			cpu_relax();
+		cm_write_mod_reg(iclk, CORE_MOD, CM_ICLKEN3);
+		cm_write_mod_reg(fclk, CORE_MOD, OMAP3430ES2_CM_FCLKEN3);
+	}
+
+	/* PER */
+	wkst = prm_read_mod_reg(OMAP3430_PER_MOD, PM_WKST);
+	if (wkst) {
+		iclk = cm_read_mod_reg(OMAP3430_PER_MOD, CM_ICLKEN);
+		fclk = cm_read_mod_reg(OMAP3430_PER_MOD, CM_FCLKEN);
+		cm_set_mod_reg_bits(wkst, OMAP3430_PER_MOD, CM_ICLKEN);
+		cm_set_mod_reg_bits(wkst, OMAP3430_PER_MOD, CM_FCLKEN);
+		prm_write_mod_reg(wkst, OMAP3430_PER_MOD, PM_WKST);
+		while (prm_read_mod_reg(OMAP3430_PER_MOD, PM_WKST))
+			cpu_relax();
+		cm_write_mod_reg(iclk, OMAP3430_PER_MOD, CM_ICLKEN);
+		cm_write_mod_reg(fclk, OMAP3430_PER_MOD, CM_FCLKEN);
+	}
+
+	if (omap_rev() > OMAP3430_REV_ES1_0) {
+		/* USBHOST */
+		wkst = prm_read_mod_reg(OMAP3430ES2_USBHOST_MOD, PM_WKST);
+		if (wkst) {
+			iclk = cm_read_mod_reg(OMAP3430ES2_USBHOST_MOD,
+					       CM_ICLKEN);
+			fclk = cm_read_mod_reg(OMAP3430ES2_USBHOST_MOD,
+					       CM_FCLKEN);
+			cm_set_mod_reg_bits(wkst, OMAP3430ES2_USBHOST_MOD,
+					    CM_ICLKEN);
+			cm_set_mod_reg_bits(wkst, OMAP3430ES2_USBHOST_MOD,
+					    CM_FCLKEN);
+			prm_write_mod_reg(wkst, OMAP3430ES2_USBHOST_MOD,
+					  PM_WKST);
+			while (prm_read_mod_reg(OMAP3430ES2_USBHOST_MOD,
+						PM_WKST))
+				cpu_relax();
+			cm_write_mod_reg(iclk, OMAP3430ES2_USBHOST_MOD,
+					 CM_ICLKEN);
+			cm_write_mod_reg(fclk, OMAP3430ES2_USBHOST_MOD,
+					 CM_FCLKEN);
+		}
+	}
+
+	irqstatus_mpu = prm_read_mod_reg(OCP_MOD,
+					 OMAP3_PRM_IRQSTATUS_MPU_OFFSET);
+	prm_write_mod_reg(irqstatus_mpu, OCP_MOD,
+			  OMAP3_PRM_IRQSTATUS_MPU_OFFSET);
+
+	while (prm_read_mod_reg(OCP_MOD, OMAP3_PRM_IRQSTATUS_MPU_OFFSET))
+		cpu_relax();
+
+	return IRQ_HANDLED;
+}
+
+static void omap_sram_idle(void)
+{
+	/* Variable to tell what needs to be saved and restored
+	 * in omap_sram_idle*/
+	/* save_state = 0 => Nothing to save and restored */
+	/* save_state = 1 => Only L1 and logic lost */
+	/* save_state = 2 => Only L2 lost */
+	/* save_state = 3 => L1, L2 and logic lost */
+	int save_state = 0, mpu_next_state;
+
+	if (!_omap_sram_idle)
+		return;
+
+	mpu_next_state = pwrdm_read_next_pwrst(mpu_pwrdm);
+	switch (mpu_next_state) {
+	case PWRDM_POWER_RET:
+		/* No need to save context */
+		save_state = 0;
+		break;
+	default:
+		/* Invalid state */
+		printk(KERN_ERR "Invalid mpu state in sram_idle\n");
+		return;
+	}
+	omap2_gpio_prepare_for_retention();
+
+	_omap_sram_idle(NULL, save_state);
+	cpu_init();
+
+	omap2_gpio_resume_after_retention();
+}
+
+/*
+ * Check if functional clocks are enabled before entering
+ * sleep. This function could be behind CONFIG_PM_DEBUG
+ * when all drivers are configuring their sysconfig registers
+ * properly and using their clocks properly.
+ */
+static int omap3_fclks_active(void)
+{
+	u32 fck_core1 = 0, fck_core3 = 0, fck_sgx = 0, fck_dss = 0,
+		fck_cam = 0, fck_per = 0, fck_usbhost = 0;
+
+	fck_core1 = cm_read_mod_reg(CORE_MOD,
+				    CM_FCLKEN1);
+	if (omap_rev() > OMAP3430_REV_ES1_0) {
+		fck_core3 = cm_read_mod_reg(CORE_MOD,
+					    OMAP3430ES2_CM_FCLKEN3);
+		fck_sgx = cm_read_mod_reg(OMAP3430ES2_SGX_MOD,
+					  CM_FCLKEN);
+		fck_usbhost = cm_read_mod_reg(OMAP3430ES2_USBHOST_MOD,
+					      CM_FCLKEN);
+	} else
+		fck_sgx = cm_read_mod_reg(GFX_MOD,
+					  OMAP3430ES2_CM_FCLKEN3);
+	fck_dss = cm_read_mod_reg(OMAP3430_DSS_MOD,
+				  CM_FCLKEN);
+	fck_cam = cm_read_mod_reg(OMAP3430_CAM_MOD,
+				  CM_FCLKEN);
+	fck_per = cm_read_mod_reg(OMAP3430_PER_MOD,
+				  CM_FCLKEN);
+	if (fck_core1 | fck_core3 | fck_sgx | fck_dss |
+	    fck_cam | fck_per | fck_usbhost)
+		return 1;
+	return 0;
+}
+
+static int omap3_can_sleep(void)
+{
+	if (omap3_fclks_active())
+		return 0;
+	return 1;
+}
+
+/* This sets pwrdm state (other than mpu & core. Currently only ON &
+ * RET are supported. Function is assuming that clkdm doesn't have
+ * hw_sup mode enabled. */
+static int set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
+{
+	u32 cur_state;
+	int sleep_switch = 0;
+	int ret = 0;
+
+	if (pwrdm == NULL || IS_ERR(pwrdm))
+		return -EINVAL;
+
+	while (!(pwrdm->pwrsts & (1 << state))) {
+		if (state == PWRDM_POWER_OFF)
+			return ret;
+		state--;
+	}
+
+	cur_state = pwrdm_read_next_pwrst(pwrdm);
+	if (cur_state == state)
+		return ret;
+
+	if (pwrdm_read_pwrst(pwrdm) < PWRDM_POWER_ON) {
+		omap2_clkdm_wakeup(pwrdm->pwrdm_clkdms[0]);
+		sleep_switch = 1;
+		pwrdm_wait_transition(pwrdm);
+	}
+
+	ret = pwrdm_set_next_pwrst(pwrdm, state);
+	if (ret) {
+		printk(KERN_ERR "Unable to set state of powerdomain: %s\n",
+		       pwrdm->name);
+		goto err;
+	}
+
+	if (sleep_switch) {
+		omap2_clkdm_allow_idle(pwrdm->pwrdm_clkdms[0]);
+		pwrdm_wait_transition(pwrdm);
+	}
+
+err:
+	return ret;
+}
+
+static void omap3_pm_idle(void)
+{
+	local_irq_disable();
+	local_fiq_disable();
+
+	if (!omap3_can_sleep())
+		goto out;
+
+	if (omap_irq_pending())
+		goto out;
+
+	omap_sram_idle();
+
+out:
+	local_fiq_enable();
+	local_irq_enable();
+}
+
+static int omap3_pm_prepare(void)
+{
+	disable_hlt();
+	return 0;
+}
+
+static int omap3_pm_suspend(void)
+{
+	struct power_state *pwrst;
+	int state, ret = 0;
+
+	/* Read current next_pwrsts */
+	list_for_each_entry(pwrst, &pwrst_list, node)
+		pwrst->saved_state = pwrdm_read_next_pwrst(pwrst->pwrdm);
+	/* Set ones wanted by suspend */
+	list_for_each_entry(pwrst, &pwrst_list, node) {
+		if (set_pwrdm_state(pwrst->pwrdm, pwrst->next_state))
+			goto restore;
+		if (pwrdm_clear_all_prev_pwrst(pwrst->pwrdm))
+			goto restore;
+	}
+
+	omap_sram_idle();
+
+restore:
+	/* Restore next_pwrsts */
+	list_for_each_entry(pwrst, &pwrst_list, node) {
+		set_pwrdm_state(pwrst->pwrdm, pwrst->saved_state);
+		state = pwrdm_read_prev_pwrst(pwrst->pwrdm);
+		if (state > pwrst->next_state) {
+			printk(KERN_INFO "Powerdomain (%s) didn't enter "
+			       "target state %d\n",
+			       pwrst->pwrdm->name, pwrst->next_state);
+			ret = -1;
+		}
+	}
+	if (ret)
+		printk(KERN_ERR "Could not enter target state in pm_suspend\n");
+	else
+		printk(KERN_INFO "Successfully put all powerdomains "
+		       "to target state\n");
+
+	return ret;
+}
+
+static int omap3_pm_enter(suspend_state_t state)
+{
+	int ret = 0;
+
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		ret = omap3_pm_suspend();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void omap3_pm_finish(void)
+{
+	enable_hlt();
+}
+
+static struct platform_suspend_ops omap_pm_ops = {
+	.prepare	= omap3_pm_prepare,
+	.enter		= omap3_pm_enter,
+	.finish		= omap3_pm_finish,
+	.valid		= suspend_valid_only_mem,
+};
+
+static void __init prcm_setup_regs(void)
+{
+	/* reset modem */
+	prm_write_mod_reg(OMAP3430_RM_RSTCTRL_CORE_MODEM_SW_RSTPWRON |
+			  OMAP3430_RM_RSTCTRL_CORE_MODEM_SW_RST,
+			  CORE_MOD, RM_RSTCTRL);
+	prm_write_mod_reg(0, CORE_MOD, RM_RSTCTRL);
+
+	/* XXX Reset all wkdeps. This should be done when initializing
+	 * powerdomains */
+	prm_write_mod_reg(0, OMAP3430_IVA2_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, MPU_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, OMAP3430_DSS_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, OMAP3430_NEON_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, OMAP3430_CAM_MOD, PM_WKDEP);
+	prm_write_mod_reg(0, OMAP3430_PER_MOD, PM_WKDEP);
+	if (omap_rev() > OMAP3430_REV_ES1_0) {
+		prm_write_mod_reg(0, OMAP3430ES2_SGX_MOD, PM_WKDEP);
+		prm_write_mod_reg(0, OMAP3430ES2_USBHOST_MOD, PM_WKDEP);
+	} else
+		prm_write_mod_reg(0, GFX_MOD, PM_WKDEP);
+
+	/*
+	 * Enable interface clock autoidle for all modules.
+	 * Note that in the long run this should be done by clockfw
+	 */
+	cm_write_mod_reg(
+		OMAP3430ES2_AUTO_MMC3 |
+		OMAP3430ES2_AUTO_ICR |
+		OMAP3430_AUTO_AES2 |
+		OMAP3430_AUTO_SHA12 |
+		OMAP3430_AUTO_DES2 |
+		OMAP3430_AUTO_MMC2 |
+		OMAP3430_AUTO_MMC1 |
+		OMAP3430_AUTO_MSPRO |
+		OMAP3430_AUTO_HDQ |
+		OMAP3430_AUTO_MCSPI4 |
+		OMAP3430_AUTO_MCSPI3 |
+		OMAP3430_AUTO_MCSPI2 |
+		OMAP3430_AUTO_MCSPI1 |
+		OMAP3430_AUTO_I2C3 |
+		OMAP3430_AUTO_I2C2 |
+		OMAP3430_AUTO_I2C1 |
+		OMAP3430_AUTO_UART2 |
+		OMAP3430_AUTO_UART1 |
+		OMAP3430_AUTO_GPT11 |
+		OMAP3430_AUTO_GPT10 |
+		OMAP3430_AUTO_MCBSP5 |
+		OMAP3430_AUTO_MCBSP1 |
+		OMAP3430ES1_AUTO_FAC | /* This is es1 only */
+		OMAP3430_AUTO_MAILBOXES |
+		OMAP3430_AUTO_OMAPCTRL |
+		OMAP3430ES1_AUTO_FSHOSTUSB |
+		OMAP3430_AUTO_HSOTGUSB |
+		OMAP3430ES1_AUTO_D2D | /* This is es1 only */
+		OMAP3430_AUTO_SSI,
+		CORE_MOD, CM_AUTOIDLE1);
+
+	cm_write_mod_reg(
+		OMAP3430_AUTO_PKA |
+		OMAP3430_AUTO_AES1 |
+		OMAP3430_AUTO_RNG |
+		OMAP3430_AUTO_SHA11 |
+		OMAP3430_AUTO_DES1,
+		CORE_MOD, CM_AUTOIDLE2);
+
+	if (omap_rev() > OMAP3430_REV_ES1_0) {
+		cm_write_mod_reg(
+			OMAP3430ES2_AUTO_USBTLL,
+			CORE_MOD, CM_AUTOIDLE3);
+	}
+
+	cm_write_mod_reg(
+		OMAP3430_AUTO_WDT2 |
+		OMAP3430_AUTO_WDT1 |
+		OMAP3430_AUTO_GPIO1 |
+		OMAP3430_AUTO_32KSYNC |
+		OMAP3430_AUTO_GPT12 |
+		OMAP3430_AUTO_GPT1 ,
+		WKUP_MOD, CM_AUTOIDLE);
+
+	cm_write_mod_reg(
+		OMAP3430_AUTO_DSS,
+		OMAP3430_DSS_MOD,
+		CM_AUTOIDLE);
+
+	cm_write_mod_reg(
+		OMAP3430_AUTO_CAM,
+		OMAP3430_CAM_MOD,
+		CM_AUTOIDLE);
+
+	cm_write_mod_reg(
+		OMAP3430_AUTO_GPIO6 |
+		OMAP3430_AUTO_GPIO5 |
+		OMAP3430_AUTO_GPIO4 |
+		OMAP3430_AUTO_GPIO3 |
+		OMAP3430_AUTO_GPIO2 |
+		OMAP3430_AUTO_WDT3 |
+		OMAP3430_AUTO_UART3 |
+		OMAP3430_AUTO_GPT9 |
+		OMAP3430_AUTO_GPT8 |
+		OMAP3430_AUTO_GPT7 |
+		OMAP3430_AUTO_GPT6 |
+		OMAP3430_AUTO_GPT5 |
+		OMAP3430_AUTO_GPT4 |
+		OMAP3430_AUTO_GPT3 |
+		OMAP3430_AUTO_GPT2 |
+		OMAP3430_AUTO_MCBSP4 |
+		OMAP3430_AUTO_MCBSP3 |
+		OMAP3430_AUTO_MCBSP2,
+		OMAP3430_PER_MOD,
+		CM_AUTOIDLE);
+
+	if (omap_rev() > OMAP3430_REV_ES1_0) {
+		cm_write_mod_reg(
+			OMAP3430ES2_AUTO_USBHOST,
+			OMAP3430ES2_USBHOST_MOD,
+			CM_AUTOIDLE);
+	}
+
+	/*
+	 * Set all plls to autoidle. This is needed until autoidle is
+	 * enabled by clockfw
+	 */
+	cm_write_mod_reg(1 << OMAP3430_AUTO_IVA2_DPLL_SHIFT,
+			 OMAP3430_IVA2_MOD, CM_AUTOIDLE2);
+	cm_write_mod_reg(1 << OMAP3430_AUTO_MPU_DPLL_SHIFT,
+			 MPU_MOD,
+			 CM_AUTOIDLE2);
+	cm_write_mod_reg((1 << OMAP3430_AUTO_PERIPH_DPLL_SHIFT) |
+			 (1 << OMAP3430_AUTO_CORE_DPLL_SHIFT),
+			 PLL_MOD,
+			 CM_AUTOIDLE);
+	cm_write_mod_reg(1 << OMAP3430ES2_AUTO_PERIPH2_DPLL_SHIFT,
+			 PLL_MOD,
+			 CM_AUTOIDLE2);
+
+	/*
+	 * Enable control of expternal oscillator through
+	 * sys_clkreq. In the long run clock framework should
+	 * take care of this.
+	 */
+	prm_rmw_mod_reg_bits(OMAP_AUTOEXTCLKMODE_MASK,
+			     1 << OMAP_AUTOEXTCLKMODE_SHIFT,
+			     OMAP3430_GR_MOD,
+			     OMAP3_PRM_CLKSRC_CTRL_OFFSET);
+
+	/* setup wakup source */
+	prm_write_mod_reg(OMAP3430_EN_IO | OMAP3430_EN_GPIO1 |
+			  OMAP3430_EN_GPT1 | OMAP3430_EN_GPT12,
+			  WKUP_MOD, PM_WKEN);
+	/* No need to write EN_IO, that is always enabled */
+	prm_write_mod_reg(OMAP3430_EN_GPIO1 | OMAP3430_EN_GPT1 |
+			  OMAP3430_EN_GPT12,
+			  WKUP_MOD, OMAP3430_PM_MPUGRPSEL);
+	/* For some reason IO doesn't generate wakeup event even if
+	 * it is selected to mpu wakeup goup */
+	prm_write_mod_reg(OMAP3430_IO_EN | OMAP3430_WKUP_EN,
+			  OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET);
+}
+
+static int __init pwrdms_setup(struct powerdomain *pwrdm)
+{
+	struct power_state *pwrst;
+
+	if (!pwrdm->pwrsts)
+		return 0;
+
+	pwrst = kmalloc(sizeof(struct power_state), GFP_KERNEL);
+	if (!pwrst)
+		return -ENOMEM;
+	pwrst->pwrdm = pwrdm;
+	pwrst->next_state = PWRDM_POWER_RET;
+	list_add(&pwrst->node, &pwrst_list);
+
+	if (pwrdm_has_hdwr_sar(pwrdm))
+		pwrdm_enable_hdwr_sar(pwrdm);
+
+	return set_pwrdm_state(pwrst->pwrdm, pwrst->next_state);
+}
+
+/*
+ * Enable hw supervised mode for all clockdomains if it's
+ * supported. Initiate sleep transition for other clockdomains, if
+ * they are not used
+ */
+static int __init clkdms_setup(struct clockdomain *clkdm)
+{
+	if (clkdm->flags & CLKDM_CAN_ENABLE_AUTO)
+		omap2_clkdm_allow_idle(clkdm);
+	else if (clkdm->flags & CLKDM_CAN_FORCE_SLEEP &&
+		 atomic_read(&clkdm->usecount) == 0)
+		omap2_clkdm_sleep(clkdm);
+	return 0;
+}
+
+int __init omap3_pm_init(void)
+{
+	struct power_state *pwrst, *tmp;
+	int ret;
+
+	if (!cpu_is_omap34xx())
+		return -ENODEV;
+
+	printk(KERN_ERR "Power Management for TI OMAP3.\n");
+
+	/* XXX prcm_setup_regs needs to be before enabling hw
+	 * supervised mode for powerdomains */
+	prcm_setup_regs();
+
+	ret = request_irq(INT_34XX_PRCM_MPU_IRQ,
+			  (irq_handler_t)prcm_interrupt_handler,
+			  IRQF_DISABLED, "prcm", NULL);
+	if (ret) {
+		printk(KERN_ERR "request_irq failed to register for 0x%x\n",
+		       INT_34XX_PRCM_MPU_IRQ);
+		goto err1;
+	}
+
+	ret = pwrdm_for_each(pwrdms_setup);
+	if (ret) {
+		printk(KERN_ERR "Failed to setup powerdomains\n");
+		goto err2;
+	}
+
+	(void) clkdm_for_each(clkdms_setup);
+
+	mpu_pwrdm = pwrdm_lookup("mpu_pwrdm");
+	if (mpu_pwrdm == NULL) {
+		printk(KERN_ERR "Failed to get mpu_pwrdm\n");
+		goto err2;
+	}
+
+	_omap_sram_idle = omap_sram_push(omap34xx_cpu_suspend,
+					 omap34xx_cpu_suspend_sz);
+
+	suspend_set_ops(&omap_pm_ops);
+
+	pm_idle = omap3_pm_idle;
+
+err1:
+	return ret;
+err2:
+	free_irq(INT_34XX_PRCM_MPU_IRQ, NULL);
+	list_for_each_entry_safe(pwrst, tmp, &pwrst_list, node) {
+		list_del(&pwrst->node);
+		kfree(pwrst);
+	}
+	return ret;
+}
+
+late_initcall(omap3_pm_init);
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index 812d50e..cb1ae84 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -276,6 +276,8 @@
 /* CM_FCLKEN_WKUP, CM_ICLKEN_WKUP, PM_WKEN_WKUP shared bits */
 #define OMAP3430_EN_GPIO1				(1 << 3)
 #define OMAP3430_EN_GPIO1_SHIFT				3
+#define OMAP3430_EN_GPT12				(1 << 1)
+#define OMAP3430_EN_GPT12_SHIFT				1
 #define OMAP3430_EN_GPT1				(1 << 0)
 #define OMAP3430_EN_GPT1_SHIFT				0
 
diff --git a/arch/arm/mach-omap2/sdrc.c b/arch/arm/mach-omap2/sdrc.c
index d62e4e1..2045441 100644
--- a/arch/arm/mach-omap2/sdrc.c
+++ b/arch/arm/mach-omap2/sdrc.c
@@ -60,9 +60,12 @@ struct omap_sdrc_params *omap2_sdrc_get_params(unsigned long r)
 {
 	struct omap_sdrc_params *sp;
 
+	if (!sdrc_init_params)
+		return NULL;
+
 	sp = sdrc_init_params;
 
-	while (sp->rate != r)
+	while (sp->rate && sp->rate != r)
 		sp++;
 
 	if (!sp->rate)
diff --git a/arch/arm/mach-omap2/sleep24xx.S b/arch/arm/mach-omap2/sleep24xx.S
index bf9e961..130aadb 100644
--- a/arch/arm/mach-omap2/sleep24xx.S
+++ b/arch/arm/mach-omap2/sleep24xx.S
@@ -28,7 +28,6 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <mach/io.h>
-#include <mach/pm.h>
 
 #include <mach/omap24xx.h>
 
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
new file mode 100644
index 0000000..e5e2553
--- /dev/null
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -0,0 +1,436 @@
+/*
+ * linux/arch/arm/mach-omap2/sleep.S
+ *
+ * (C) Copyright 2007
+ * Texas Instruments
+ * Karthik Dasu <karthik-dp@ti.com>
+ *
+ * (C) Copyright 2004
+ * Texas Instruments, <www.ti.com>
+ * Richard Woodruff <r-woodruff2@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR /PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <mach/io.h>
+#include <mach/control.h>
+
+#include "prm.h"
+#include "sdrc.h"
+
+#define PM_PREPWSTST_CORE_V	OMAP34XX_PRM_REGADDR(CORE_MOD, \
+				OMAP3430_PM_PREPWSTST)
+#define PM_PREPWSTST_MPU_V	OMAP34XX_PRM_REGADDR(MPU_MOD, \
+				OMAP3430_PM_PREPWSTST)
+#define PM_PWSTCTRL_MPU_P	OMAP34XX_PRM_REGADDR(MPU_MOD, PM_PWSTCTRL)
+#define SCRATCHPAD_MEM_OFFS	0x310 /* Move this as correct place is
+				       * available */
+#define SCRATCHPAD_BASE_P	OMAP343X_CTRL_REGADDR(\
+				OMAP343X_CONTROL_MEM_WKUP +\
+				SCRATCHPAD_MEM_OFFS)
+#define SDRC_POWER_V		OMAP34XX_SDRC_REGADDR(SDRC_POWER)
+
+	.text
+/* Function call to get the restore pointer for resume from OFF */
+ENTRY(get_restore_pointer)
+        stmfd   sp!, {lr}     @ save registers on stack
+	adr	r0, restore
+        ldmfd   sp!, {pc}     @ restore regs and return
+ENTRY(get_restore_pointer_sz)
+        .word   . - get_restore_pointer_sz
+/*
+ * Forces OMAP into idle state
+ *
+ * omap34xx_suspend() - This bit of code just executes the WFI
+ * for normal idles.
+ *
+ * Note: This code get's copied to internal SRAM at boot. When the OMAP
+ *	 wakes up it continues execution at the point it went to sleep.
+ */
+ENTRY(omap34xx_cpu_suspend)
+	stmfd	sp!, {r0-r12, lr}		@ save registers on stack
+loop:
+	/*b	loop*/	@Enable to debug by stepping through code
+	/* r0 contains restore pointer in sdram */
+	/* r1 contains information about saving context */
+	ldr     r4, sdrc_power          @ read the SDRC_POWER register
+	ldr     r5, [r4]                @ read the contents of SDRC_POWER
+	orr     r5, r5, #0x40           @ enable self refresh on idle req
+	str     r5, [r4]                @ write back to SDRC_POWER register
+
+	cmp	r1, #0x0
+	/* If context save is required, do that and execute wfi */
+	bne	save_context_wfi
+	/* Data memory barrier and Data sync barrier */
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c10, 4
+	mcr	p15, 0, r1, c7, c10, 5
+
+	wfi				@ wait for interrupt
+
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	bl i_dll_wait
+
+	ldmfd	sp!, {r0-r12, pc}		@ restore regs and return
+restore:
+	/* b restore*/ 	@ Enable to debug restore code
+        /* Check what was the reason for mpu reset and store the reason in r9*/
+        /* 1 - Only L1 and logic lost */
+        /* 2 - Only L2 lost - In this case, we wont be here */
+        /* 3 - Both L1 and L2 lost */
+	ldr     r1, pm_pwstctrl_mpu
+	ldr	r2, [r1]
+	and     r2, r2, #0x3
+	cmp     r2, #0x0	@ Check if target power state was OFF or RET
+        moveq   r9, #0x3        @ MPU OFF => L1 and L2 lost
+	movne	r9, #0x1	@ Only L1 and L2 lost => avoid L2 invalidation
+	bne	logic_l1_restore
+	/* Execute smi to invalidate L2 cache */
+	mov r12, #0x1                         @ set up to invalide L2
+smi:    .word 0xE1600070                @ Call SMI monitor (smieq)
+logic_l1_restore:
+	mov	r1, #0
+	/* Invalidate all instruction caches to PoU
+	 * and flush branch target cache */
+	mcr	p15, 0, r1, c7, c5, 0
+
+	ldr	r4, scratchpad_base
+	ldr	r3, [r4,#0xBC]
+	ldmia	r3!, {r4-r6}
+	mov	sp, r4
+	msr	spsr_cxsf, r5
+	mov	lr, r6
+
+	ldmia	r3!, {r4-r9}
+	/* Coprocessor access Control Register */
+	mcr p15, 0, r4, c1, c0, 2
+
+	/* TTBR0 */
+	MCR p15, 0, r5, c2, c0, 0
+	/* TTBR1 */
+	MCR p15, 0, r6, c2, c0, 1
+	/* Translation table base control register */
+	MCR p15, 0, r7, c2, c0, 2
+	/*domain access Control Register */
+	MCR p15, 0, r8, c3, c0, 0
+	/* data fault status Register */
+	MCR p15, 0, r9, c5, c0, 0
+
+	ldmia  r3!,{r4-r8}
+	/* instruction fault status Register */
+	MCR p15, 0, r4, c5, c0, 1
+	/*Data Auxiliary Fault Status Register */
+	MCR p15, 0, r5, c5, c1, 0
+	/*Instruction Auxiliary Fault Status Register*/
+	MCR p15, 0, r6, c5, c1, 1
+	/*Data Fault Address Register */
+	MCR p15, 0, r7, c6, c0, 0
+	/*Instruction Fault Address Register*/
+	MCR p15, 0, r8, c6, c0, 2
+	ldmia  r3!,{r4-r7}
+
+	/* user r/w thread and process ID */
+	MCR p15, 0, r4, c13, c0, 2
+	/* user ro thread and process ID */
+	MCR p15, 0, r5, c13, c0, 3
+	/*Privileged only thread and process ID */
+	MCR p15, 0, r6, c13, c0, 4
+	/* cache size selection */
+	MCR p15, 2, r7, c0, c0, 0
+	ldmia  r3!,{r4-r8}
+	/* Data TLB lockdown registers */
+	MCR p15, 0, r4, c10, c0, 0
+	/* Instruction TLB lockdown registers */
+	MCR p15, 0, r5, c10, c0, 1
+	/* Secure or Nonsecure Vector Base Address */
+	MCR p15, 0, r6, c12, c0, 0
+	/* FCSE PID */
+	MCR p15, 0, r7, c13, c0, 0
+	/* Context PID */
+	MCR p15, 0, r8, c13, c0, 1
+
+	ldmia  r3!,{r4-r5}
+	/* primary memory remap register */
+	MCR p15, 0, r4, c10, c2, 0
+	/*normal memory remap register */
+	MCR p15, 0, r5, c10, c2, 1
+
+	/* Restore cpsr */
+	ldmia	r3!,{r4}	/*load CPSR from SDRAM*/
+	msr	cpsr, r4	/*store cpsr */
+
+	/* Enabling MMU here */
+	mrc	p15, 0, r7, c2, c0, 2 /* Read TTBRControl */
+	/* Extract N (0:2) bits and decide whether to use TTBR0 or TTBR1*/
+	and	r7, #0x7
+	cmp	r7, #0x0
+	beq	usettbr0
+ttbr_error:
+	/* More work needs to be done to support N[0:2] value other than 0
+	* So looping here so that the error can be detected
+	*/
+	b	ttbr_error
+usettbr0:
+	mrc	p15, 0, r2, c2, c0, 0
+	ldr	r5, ttbrbit_mask
+	and	r2, r5
+	mov	r4, pc
+	ldr	r5, table_index_mask
+	and	r4, r5 /* r4 = 31 to 20 bits of pc */
+	/* Extract the value to be written to table entry */
+	ldr	r1, table_entry
+	add	r1, r1, r4 /* r1 has value to be written to table entry*/
+	/* Getting the address of table entry to modify */
+	lsr	r4, #18
+	add	r2, r4 /* r2 has the location which needs to be modified */
+	/* Storing previous entry of location being modified */
+	ldr	r5, scratchpad_base
+	ldr	r4, [r2]
+	str	r4, [r5, #0xC0]
+	/* Modify the table entry */
+	str	r1, [r2]
+	/* Storing address of entry being modified
+	 * - will be restored after enabling MMU */
+	ldr	r5, scratchpad_base
+	str	r2, [r5, #0xC4]
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 4	@ Flush prefetch buffer
+	mcr	p15, 0, r0, c7, c5, 6	@ Invalidate branch predictor array
+	mcr	p15, 0, r0, c8, c5, 0	@ Invalidate instruction TLB
+	mcr	p15, 0, r0, c8, c6, 0	@ Invalidate data TLB
+	/* Restore control register  but dont enable caches here*/
+	/* Caches will be enabled after restoring MMU table entry */
+	ldmia	r3!, {r4}
+	/* Store previous value of control register in scratchpad */
+	str	r4, [r5, #0xC8]
+	ldr	r2, cache_pred_disable_mask
+	and	r4, r2
+	mcr	p15, 0, r4, c1, c0, 0
+
+	ldmfd	sp!, {r0-r12, pc}		@ restore regs and return
+save_context_wfi:
+	/*b	save_context_wfi*/	@ enable to debug save code
+	mov	r8, r0 /* Store SDRAM address in r8 */
+        /* Check what that target sleep state is:stored in r1*/
+        /* 1 - Only L1 and logic lost */
+        /* 2 - Only L2 lost */
+        /* 3 - Both L1 and L2 lost */
+	cmp	r1, #0x2 /* Only L2 lost */
+	beq	clean_l2
+	cmp	r1, #0x1 /* L2 retained */
+	/* r9 stores whether to clean L2 or not*/
+	moveq	r9, #0x0 /* Dont Clean L2 */
+	movne	r9, #0x1 /* Clean L2 */
+l1_logic_lost:
+	/* Store sp and spsr to SDRAM */
+	mov	r4, sp
+	mrs	r5, spsr
+	mov	r6, lr
+	stmia	r8!, {r4-r6}
+	/* Save all ARM registers */
+	/* Coprocessor access control register */
+	mrc	p15, 0, r6, c1, c0, 2
+	stmia	r8!, {r6}
+	/* TTBR0, TTBR1 and Translation table base control */
+	mrc	p15, 0, r4, c2, c0, 0
+	mrc	p15, 0, r5, c2, c0, 1
+	mrc	p15, 0, r6, c2, c0, 2
+	stmia	r8!, {r4-r6}
+	/* Domain access control register, data fault status register,
+	and instruction fault status register */
+	mrc	p15, 0, r4, c3, c0, 0
+	mrc	p15, 0, r5, c5, c0, 0
+	mrc	p15, 0, r6, c5, c0, 1
+	stmia	r8!, {r4-r6}
+	/* Data aux fault status register, instruction aux fault status,
+	datat fault address register and instruction fault address register*/
+	mrc	p15, 0, r4, c5, c1, 0
+	mrc	p15, 0, r5, c5, c1, 1
+	mrc	p15, 0, r6, c6, c0, 0
+	mrc	p15, 0, r7, c6, c0, 2
+	stmia	r8!, {r4-r7}
+	/* user r/w thread and process ID, user r/o thread and process ID,
+	priv only thread and process ID, cache size selection */
+	mrc	p15, 0, r4, c13, c0, 2
+	mrc	p15, 0, r5, c13, c0, 3
+	mrc	p15, 0, r6, c13, c0, 4
+	mrc	p15, 2, r7, c0, c0, 0
+	stmia	r8!, {r4-r7}
+	/* Data TLB lockdown, instruction TLB lockdown registers */
+	mrc	p15, 0, r5, c10, c0, 0
+	mrc	p15, 0, r6, c10, c0, 1
+	stmia	r8!, {r5-r6}
+	/* Secure or non secure vector base address, FCSE PID, Context PID*/
+	mrc	p15, 0, r4, c12, c0, 0
+	mrc	p15, 0, r5, c13, c0, 0
+	mrc	p15, 0, r6, c13, c0, 1
+	stmia	r8!, {r4-r6}
+	/* Primary remap, normal remap registers */
+	mrc	p15, 0, r4, c10, c2, 0
+	mrc	p15, 0, r5, c10, c2, 1
+	stmia	r8!,{r4-r5}
+
+	/* Store current cpsr*/
+	mrs	r2, cpsr
+	stmia	r8!, {r2}
+
+	mrc	p15, 0, r4, c1, c0, 0
+	/* save control register */
+	stmia	r8!, {r4}
+clean_caches:
+	/* Clean Data or unified cache to POU*/
+	/* How to invalidate only L1 cache???? - #FIX_ME# */
+	/* mcr	p15, 0, r11, c7, c11, 1 */
+	cmp	r9, #1 /* Check whether L2 inval is required or not*/
+	bne	skip_l2_inval
+clean_l2:
+	/* read clidr */
+	mrc     p15, 1, r0, c0, c0, 1
+	/* extract loc from clidr */
+	ands    r3, r0, #0x7000000
+	/* left align loc bit field */
+	mov     r3, r3, lsr #23
+	/* if loc is 0, then no need to clean */
+	beq     finished
+	/* start clean at cache level 0 */
+	mov     r10, #0
+loop1:
+	/* work out 3x current cache level */
+	add     r2, r10, r10, lsr #1
+	/* extract cache type bits from clidr*/
+	mov     r1, r0, lsr r2
+	/* mask of the bits for current cache only */
+	and     r1, r1, #7
+	/* see what cache we have at this level */
+	cmp     r1, #2
+	/* skip if no cache, or just i-cache */
+	blt     skip
+	/* select current cache level in cssr */
+	mcr     p15, 2, r10, c0, c0, 0
+	/* isb to sych the new cssr&csidr */
+	isb
+	/* read the new csidr */
+	mrc     p15, 1, r1, c0, c0, 0
+	/* extract the length of the cache lines */
+	and     r2, r1, #7
+	/* add 4 (line length offset) */
+	add     r2, r2, #4
+	ldr     r4, assoc_mask
+	/* find maximum number on the way size */
+	ands    r4, r4, r1, lsr #3
+	/* find bit position of way size increment */
+	clz     r5, r4
+	ldr     r7, numset_mask
+	/* extract max number of the index size*/
+	ands    r7, r7, r1, lsr #13
+loop2:
+	mov     r9, r4
+	/* create working copy of max way size*/
+loop3:
+	/* factor way and cache number into r11 */
+	orr     r11, r10, r9, lsl r5
+	/* factor index number into r11 */
+	orr     r11, r11, r7, lsl r2
+	/*clean & invalidate by set/way */
+	mcr     p15, 0, r11, c7, c10, 2
+	/* decrement the way*/
+	subs    r9, r9, #1
+	bge     loop3
+	/*decrement the index */
+	subs    r7, r7, #1
+	bge     loop2
+skip:
+	add     r10, r10, #2
+	/* increment cache number */
+	cmp     r3, r10
+	bgt     loop1
+finished:
+	/*swith back to cache level 0 */
+	mov     r10, #0
+	/* select current cache level in cssr */
+	mcr     p15, 2, r10, c0, c0, 0
+	isb
+skip_l2_inval:
+	/* Data memory barrier and Data sync barrier */
+	mov     r1, #0
+	mcr     p15, 0, r1, c7, c10, 4
+	mcr     p15, 0, r1, c7, c10, 5
+
+	wfi                             @ wait for interrupt
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	bl i_dll_wait
+	/* restore regs and return */
+	ldmfd   sp!, {r0-r12, pc}
+
+i_dll_wait:
+	ldr     r4, clk_stabilize_delay
+
+i_dll_delay:
+	subs    r4, r4, #0x1
+	bne     i_dll_delay
+	ldr     r4, sdrc_power
+	ldr     r5, [r4]
+	bic     r5, r5, #0x40
+	str     r5, [r4]
+	bx	lr
+pm_prepwstst_core:
+	.word	PM_PREPWSTST_CORE_V
+pm_prepwstst_mpu:
+	.word	PM_PREPWSTST_MPU_V
+pm_pwstctrl_mpu:
+	.word	PM_PWSTCTRL_MPU_P
+scratchpad_base:
+	.word	SCRATCHPAD_BASE_P
+sdrc_power:
+	.word SDRC_POWER_V
+context_mem:
+	.word	0x803E3E14
+clk_stabilize_delay:
+	.word 0x000001FF
+assoc_mask:
+	.word	0x3ff
+numset_mask:
+	.word	0x7fff
+ttbrbit_mask:
+	.word	0xFFFFC000
+table_index_mask:
+	.word	0xFFF00000
+table_entry:
+	.word	0x00000C02
+cache_pred_disable_mask:
+	.word	0xFFFFE7FB
+ENTRY(omap34xx_cpu_suspend_sz)
+	.word	. - omap34xx_cpu_suspend
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index 34a56a1..215d463 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -28,7 +28,6 @@
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
-#include <mach/pm.h>
 #include <mach/mux.h>
 #include <mach/usb.h>
 
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 9dd68fa..4bc9ed7 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -171,7 +171,7 @@ endchoice
 
 config OMAP_SERIAL_WAKE
 	bool "Enable wake-up events for serial ports"
-	depends on OMAP_MUX
+	depends on ARCH_OMAP1 && OMAP_MUX
 	default y
 	help
 	  Select this option if you want to have your system wake up
diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index 70b68ef..86ee23d 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/pm.h>
 #include <linux/console.h>
 #include <linux/serial.h>
 #include <linux/tty.h>
diff --git a/arch/arm/plat-omap/include/mach/pm.h b/arch/arm/plat-omap/include/mach/pm.h
deleted file mode 100644
index ce6ee79..0000000
--- a/arch/arm/plat-omap/include/mach/pm.h
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * arch/arm/plat-omap/include/mach/pm.h
- *
- * Header file for OMAP Power Management Routines
- *
- * Author: MontaVista Software, Inc.
- *	   support@mvista.com
- *
- * Copyright 2002 MontaVista Software Inc.
- *
- * Cleanup 2004 for Linux 2.6 by Dirk Behme <dirk.behme@de.bosch.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __ASM_ARCH_OMAP_PM_H
-#define __ASM_ARCH_OMAP_PM_H
-
-/*
- * ----------------------------------------------------------------------------
- * Register and offset definitions to be used in PM assembler code
- * ----------------------------------------------------------------------------
- */
-#define CLKGEN_REG_ASM_BASE		IO_ADDRESS(0xfffece00)
-#define ARM_IDLECT1_ASM_OFFSET		0x04
-#define ARM_IDLECT2_ASM_OFFSET		0x08
-
-#define TCMIF_ASM_BASE			IO_ADDRESS(0xfffecc00)
-#define EMIFS_CONFIG_ASM_OFFSET		0x0c
-#define EMIFF_SDRAM_CONFIG_ASM_OFFSET	0x20
-
-/*
- * ----------------------------------------------------------------------------
- * Power management bitmasks
- * ----------------------------------------------------------------------------
- */
-#define IDLE_WAIT_CYCLES		0x00000fff
-#define PERIPHERAL_ENABLE		0x2
-
-#define SELF_REFRESH_MODE		0x0c000001
-#define IDLE_EMIFS_REQUEST		0xc
-#define MODEM_32K_EN			0x1
-#define PER_EN				0x1
-
-#define CPU_SUSPEND_SIZE		200
-#define ULPD_LOW_PWR_EN			0x0001
-#define ULPD_DEEP_SLEEP_TRANSITION_EN	0x0010
-#define ULPD_SETUP_ANALOG_CELL_3_VAL	0
-#define ULPD_POWER_CTRL_REG_VAL		0x0219
-
-#define DSP_IDLE_DELAY			10
-#define DSP_IDLE			0x0040
-#define DSP_RST				0x0004
-#define DSP_ENABLE			0x0002
-#define SUFFICIENT_DSP_RESET_TIME	1000
-#define DEFAULT_MPUI_CONFIG		0x05cf
-#define ENABLE_XORCLK			0x2
-#define DSP_CLOCK_ENABLE		0x2000
-#define DSP_IDLE_MODE			0x2
-#define TC_IDLE_REQUEST			(0x0000000c)
-
-#define IRQ_LEVEL2			(1<<0)
-#define IRQ_KEYBOARD			(1<<1)
-#define IRQ_UART2			(1<<15)
-
-#define PDE_BIT				0x08
-#define PWD_EN_BIT			0x04
-#define EN_PERCK_BIT			0x04
-
-#define OMAP1510_DEEP_SLEEP_REQUEST	0x0ec7
-#define OMAP1510_BIG_SLEEP_REQUEST	0x0cc5
-#define OMAP1510_IDLE_LOOP_REQUEST	0x0c00
-#define OMAP1510_IDLE_CLOCK_DOMAINS	0x2
-
-/* Both big sleep and deep sleep use same values. Difference is in ULPD. */
-#define OMAP1610_IDLECT1_SLEEP_VAL	0x13c7
-#define OMAP1610_IDLECT2_SLEEP_VAL	0x09c7
-#define OMAP1610_IDLECT3_VAL		0x3f
-#define OMAP1610_IDLECT3_SLEEP_ORMASK	0x2c
-#define OMAP1610_IDLECT3		0xfffece24
-#define OMAP1610_IDLE_LOOP_REQUEST	0x0400
-
-#define OMAP730_IDLECT1_SLEEP_VAL	0x16c7
-#define OMAP730_IDLECT2_SLEEP_VAL	0x09c7
-#define OMAP730_IDLECT3_VAL		0x3f
-#define OMAP730_IDLECT3		0xfffece24
-#define OMAP730_IDLE_LOOP_REQUEST	0x0C00
-
-#if     !defined(CONFIG_ARCH_OMAP730) && \
-	!defined(CONFIG_ARCH_OMAP15XX) && \
-	!defined(CONFIG_ARCH_OMAP16XX) && \
-	!defined(CONFIG_ARCH_OMAP24XX)
-#warning "Power management for this processor not implemented yet"
-#endif
-
-#ifndef __ASSEMBLER__
-
-#include <linux/clk.h>
-
-extern void prevent_idle_sleep(void);
-extern void allow_idle_sleep(void);
-
-extern void omap_pm_idle(void);
-extern void omap_pm_suspend(void);
-extern void omap730_cpu_suspend(unsigned short, unsigned short);
-extern void omap1510_cpu_suspend(unsigned short, unsigned short);
-extern void omap1610_cpu_suspend(unsigned short, unsigned short);
-extern void omap24xx_cpu_suspend(u32 dll_ctrl, void __iomem *sdrc_dlla_ctrl,
-					void __iomem *sdrc_power);
-extern void omap730_idle_loop_suspend(void);
-extern void omap1510_idle_loop_suspend(void);
-extern void omap1610_idle_loop_suspend(void);
-extern void omap24xx_idle_loop_suspend(void);
-
-extern unsigned int omap730_cpu_suspend_sz;
-extern unsigned int omap1510_cpu_suspend_sz;
-extern unsigned int omap1610_cpu_suspend_sz;
-extern unsigned int omap24xx_cpu_suspend_sz;
-extern unsigned int omap730_idle_loop_suspend_sz;
-extern unsigned int omap1510_idle_loop_suspend_sz;
-extern unsigned int omap1610_idle_loop_suspend_sz;
-extern unsigned int omap24xx_idle_loop_suspend_sz;
-
-#ifdef CONFIG_OMAP_SERIAL_WAKE
-extern void omap_serial_wake_trigger(int enable);
-#else
-#define omap_serial_wakeup_init()	{}
-#define omap_serial_wake_trigger(x)	{}
-#endif	/* CONFIG_OMAP_SERIAL_WAKE */
-
-#define ARM_SAVE(x) arm_sleep_save[ARM_SLEEP_SAVE_##x] = omap_readl(x)
-#define ARM_RESTORE(x) omap_writel((arm_sleep_save[ARM_SLEEP_SAVE_##x]), (x))
-#define ARM_SHOW(x) arm_sleep_save[ARM_SLEEP_SAVE_##x]
-
-#define DSP_SAVE(x) dsp_sleep_save[DSP_SLEEP_SAVE_##x] = __raw_readw(x)
-#define DSP_RESTORE(x) __raw_writew((dsp_sleep_save[DSP_SLEEP_SAVE_##x]), (x))
-#define DSP_SHOW(x) dsp_sleep_save[DSP_SLEEP_SAVE_##x]
-
-#define ULPD_SAVE(x) ulpd_sleep_save[ULPD_SLEEP_SAVE_##x] = omap_readw(x)
-#define ULPD_RESTORE(x) omap_writew((ulpd_sleep_save[ULPD_SLEEP_SAVE_##x]), (x))
-#define ULPD_SHOW(x) ulpd_sleep_save[ULPD_SLEEP_SAVE_##x]
-
-#define MPUI730_SAVE(x) mpui730_sleep_save[MPUI730_SLEEP_SAVE_##x] = omap_readl(x)
-#define MPUI730_RESTORE(x) omap_writel((mpui730_sleep_save[MPUI730_SLEEP_SAVE_##x]), (x))
-#define MPUI730_SHOW(x) mpui730_sleep_save[MPUI730_SLEEP_SAVE_##x]
-
-#define MPUI1510_SAVE(x) mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_##x] = omap_readl(x)
-#define MPUI1510_RESTORE(x) omap_writel((mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_##x]), (x))
-#define MPUI1510_SHOW(x) mpui1510_sleep_save[MPUI1510_SLEEP_SAVE_##x]
-
-#define MPUI1610_SAVE(x) mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x] = omap_readl(x)
-#define MPUI1610_RESTORE(x) omap_writel((mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x]), (x))
-#define MPUI1610_SHOW(x) mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x]
-
-#define OMAP24XX_SAVE(x) omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x] = x
-#define OMAP24XX_RESTORE(x) x = omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x]
-#define OMAP24XX_SHOW(x) omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x]
-
-/*
- * List of global OMAP registers to preserve.
- * More ones like CP and general purpose register values are preserved
- * with the stack pointer in sleep.S.
- */
-
-enum arm_save_state {
-	ARM_SLEEP_SAVE_START = 0,
-	/*
-	 * MPU control registers 32 bits
-	 */
-	ARM_SLEEP_SAVE_ARM_CKCTL,
-	ARM_SLEEP_SAVE_ARM_IDLECT1,
-	ARM_SLEEP_SAVE_ARM_IDLECT2,
-	ARM_SLEEP_SAVE_ARM_IDLECT3,
-	ARM_SLEEP_SAVE_ARM_EWUPCT,
-	ARM_SLEEP_SAVE_ARM_RSTCT1,
-	ARM_SLEEP_SAVE_ARM_RSTCT2,
-	ARM_SLEEP_SAVE_ARM_SYSST,
-	ARM_SLEEP_SAVE_SIZE
-};
-
-enum dsp_save_state {
-	DSP_SLEEP_SAVE_START = 0,
-	/*
-	 * DSP registers 16 bits
-	 */
-	DSP_SLEEP_SAVE_DSP_IDLECT2,
-	DSP_SLEEP_SAVE_SIZE
-};
-
-enum ulpd_save_state {
-	ULPD_SLEEP_SAVE_START = 0,
-	/*
-	 * ULPD registers 16 bits
-	 */
-	ULPD_SLEEP_SAVE_ULPD_IT_STATUS,
-	ULPD_SLEEP_SAVE_ULPD_CLOCK_CTRL,
-	ULPD_SLEEP_SAVE_ULPD_SOFT_REQ,
-	ULPD_SLEEP_SAVE_ULPD_STATUS_REQ,
-	ULPD_SLEEP_SAVE_ULPD_DPLL_CTRL,
-	ULPD_SLEEP_SAVE_ULPD_POWER_CTRL,
-	ULPD_SLEEP_SAVE_SIZE
-};
-
-enum mpui1510_save_state {
-	MPUI1510_SLEEP_SAVE_START = 0,
-	/*
-	 * MPUI registers 32 bits
-	 */
-	MPUI1510_SLEEP_SAVE_MPUI_CTRL,
-	MPUI1510_SLEEP_SAVE_MPUI_DSP_BOOT_CONFIG,
-	MPUI1510_SLEEP_SAVE_MPUI_DSP_API_CONFIG,
-	MPUI1510_SLEEP_SAVE_MPUI_DSP_STATUS,
-	MPUI1510_SLEEP_SAVE_EMIFF_SDRAM_CONFIG,
-	MPUI1510_SLEEP_SAVE_EMIFS_CONFIG,
-	MPUI1510_SLEEP_SAVE_OMAP_IH1_MIR,
-	MPUI1510_SLEEP_SAVE_OMAP_IH2_MIR,
-#if defined(CONFIG_ARCH_OMAP15XX)
-	MPUI1510_SLEEP_SAVE_SIZE
-#else
-	MPUI1510_SLEEP_SAVE_SIZE = 0
-#endif
-};
-
-enum mpui730_save_state {
-	MPUI730_SLEEP_SAVE_START = 0,
-	/*
-	 * MPUI registers 32 bits
-	 */
-	MPUI730_SLEEP_SAVE_MPUI_CTRL,
-	MPUI730_SLEEP_SAVE_MPUI_DSP_BOOT_CONFIG,
-	MPUI730_SLEEP_SAVE_MPUI_DSP_API_CONFIG,
-	MPUI730_SLEEP_SAVE_MPUI_DSP_STATUS,
-	MPUI730_SLEEP_SAVE_EMIFF_SDRAM_CONFIG,
-	MPUI730_SLEEP_SAVE_EMIFS_CONFIG,
-	MPUI730_SLEEP_SAVE_OMAP_IH1_MIR,
-	MPUI730_SLEEP_SAVE_OMAP_IH2_0_MIR,
-	MPUI730_SLEEP_SAVE_OMAP_IH2_1_MIR,
-#if defined(CONFIG_ARCH_OMAP730)
-	MPUI730_SLEEP_SAVE_SIZE
-#else
-	MPUI730_SLEEP_SAVE_SIZE = 0
-#endif
-};
-
-enum mpui1610_save_state {
-	MPUI1610_SLEEP_SAVE_START = 0,
-	/*
-	 * MPUI registers 32 bits
-	 */
-	MPUI1610_SLEEP_SAVE_MPUI_CTRL,
-	MPUI1610_SLEEP_SAVE_MPUI_DSP_BOOT_CONFIG,
-	MPUI1610_SLEEP_SAVE_MPUI_DSP_API_CONFIG,
-	MPUI1610_SLEEP_SAVE_MPUI_DSP_STATUS,
-	MPUI1610_SLEEP_SAVE_EMIFF_SDRAM_CONFIG,
-	MPUI1610_SLEEP_SAVE_EMIFS_CONFIG,
-	MPUI1610_SLEEP_SAVE_OMAP_IH1_MIR,
-	MPUI1610_SLEEP_SAVE_OMAP_IH2_0_MIR,
-	MPUI1610_SLEEP_SAVE_OMAP_IH2_1_MIR,
-	MPUI1610_SLEEP_SAVE_OMAP_IH2_2_MIR,
-	MPUI1610_SLEEP_SAVE_OMAP_IH2_3_MIR,
-#if defined(CONFIG_ARCH_OMAP16XX)
-	MPUI1610_SLEEP_SAVE_SIZE
-#else
-	MPUI1610_SLEEP_SAVE_SIZE = 0
-#endif
-};
-
-enum omap24xx_save_state {
-	OMAP24XX_SLEEP_SAVE_START = 0,
-	OMAP24XX_SLEEP_SAVE_INTC_MIR0,
-	OMAP24XX_SLEEP_SAVE_INTC_MIR1,
-	OMAP24XX_SLEEP_SAVE_INTC_MIR2,
-
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_MPU,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_GFX,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_DSP,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_MDM,
-
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_MPU,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_CORE,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_GFX,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_DSP,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_MDM,
-
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST3_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST4_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_GFX,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_WKUP,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_CKGEN,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_DSP,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_MDM,
-
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE3_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE4_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_WKUP,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_PLL,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_DSP,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_MDM,
-
-	OMAP24XX_SLEEP_SAVE_CM_FCLKEN1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_FCLKEN2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN3_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN4_CORE,
-	OMAP24XX_SLEEP_SAVE_GPIO1_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO2_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO3_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO4_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO3_OE,
-	OMAP24XX_SLEEP_SAVE_GPIO4_OE,
-	OMAP24XX_SLEEP_SAVE_GPIO3_RISINGDETECT,
-	OMAP24XX_SLEEP_SAVE_GPIO3_FALLINGDETECT,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SPI1_NCS2,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_MCBSP1_DX,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SSI1_FLAG_TX,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SYS_NIRQW0,
-	OMAP24XX_SLEEP_SAVE_SIZE
-};
-
-#endif /* ASSEMBLER */
-#endif /* __ASM_ARCH_OMAP_PM_H */
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index f2e9de1..6391e3d 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -39,7 +39,6 @@
 #include <mach/gpmc.h>
 #include <mach/onenand.h>
 #include <mach/gpio.h>
-#include <mach/pm.h>
 
 #include <mach/dma.h>
 
-- 
cgit v0.10.2


From 94434535bd36ca010a81e1199f954beef2c4de64 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@nokia.com>
Date: Tue, 3 Feb 2009 15:49:04 -0800
Subject: OMAP: Add new function to check wether there is irq pending

Add common omap2/3 function to check wether there is irq pending.
Switch to use it in omap2 pm code instead of its own.

Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index 08a3b99..b828638 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -28,7 +28,6 @@
 #define INTC_MIR_CLEAR0		0x0088
 #define INTC_MIR_SET0		0x008c
 #define INTC_PENDING_IRQ0	0x0098
-
 /* Number of IRQ state bits in each MIR register */
 #define IRQ_BITS_PER_REG	32
 
@@ -156,6 +155,22 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
 	intc_bank_write_reg(1 << 0, bank, INTC_SYSCONFIG);
 }
 
+int omap_irq_pending(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(irq_banks); i++) {
+		struct omap_irq_bank *bank = irq_banks + i;
+		int irq;
+
+		for (irq = 0; irq < bank->nr_irqs; irq += 32)
+			if (intc_bank_read_reg(bank, INTC_PENDING_IRQ0 +
+					       ((irq >> 5) << 5)))
+				return 1;
+	}
+	return 0;
+}
+
 void __init omap_init_irq(void)
 {
 	unsigned long nr_of_irqs = 0;
diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index 232b9f6..d38f312 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -76,19 +76,6 @@ static int omap2_fclks_active(void)
 	return 0;
 }
 
-static int omap2_irq_pending(void)
-{
-	u32 pending_reg = 0x480fe098;
-	int i;
-
-	for (i = 0; i < 4; i++) {
-		if (omap_readl(pending_reg))
-			return 1;
-		pending_reg += 0x20;
-	}
-	return 0;
-}
-
 static void omap2_enter_full_retention(void)
 {
 	u32 l;
@@ -127,7 +114,7 @@ static void omap2_enter_full_retention(void)
 
 	/* One last check for pending IRQs to avoid extra latency due
 	 * to sleeping unnecessarily. */
-	if (omap2_irq_pending())
+	if (omap_irq_pending())
 		goto no_sleep;
 
 	/* Jump to SRAM suspend code */
@@ -262,13 +249,13 @@ static void omap2_pm_idle(void)
 	local_fiq_disable();
 
 	if (!omap2_can_sleep()) {
-		if (omap2_irq_pending())
+		if (omap_irq_pending())
 			goto out;
 		omap2_enter_mpu_retention();
 		goto out;
 	}
 
-	if (omap2_irq_pending())
+	if (omap_irq_pending())
 		goto out;
 
 	omap2_enter_full_retention();
diff --git a/arch/arm/plat-omap/include/mach/irqs.h b/arch/arm/plat-omap/include/mach/irqs.h
index 7f57ee6..f5f7c92 100644
--- a/arch/arm/plat-omap/include/mach/irqs.h
+++ b/arch/arm/plat-omap/include/mach/irqs.h
@@ -467,6 +467,7 @@
 
 #ifndef __ASSEMBLY__
 extern void omap_init_irq(void);
+extern int omap_irq_pending(void);
 #endif
 
 #include <mach/hardware.h>
-- 
cgit v0.10.2


From 1155e426b7365f7909f5a32612feff7361aa0f4c Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Tue, 25 Nov 2008 11:48:24 -0800
Subject: OMAP3: PM: Force IVA2 into idle during bootup

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 4474947..b2a8730 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -352,6 +352,54 @@ static struct platform_suspend_ops omap_pm_ops = {
 	.valid		= suspend_valid_only_mem,
 };
 
+
+/**
+ * omap3_iva_idle(): ensure IVA is in idle so it can be put into
+ *                   retention
+ *
+ * In cases where IVA2 is activated by bootcode, it may prevent
+ * full-chip retention or off-mode because it is not idle.  This
+ * function forces the IVA2 into idle state so it can go
+ * into retention/off and thus allow full-chip retention/off.
+ *
+ **/
+static void __init omap3_iva_idle(void)
+{
+	/* ensure IVA2 clock is disabled */
+	cm_write_mod_reg(0, OMAP3430_IVA2_MOD, CM_FCLKEN);
+
+	/* if no clock activity, nothing else to do */
+	if (!(cm_read_mod_reg(OMAP3430_IVA2_MOD, OMAP3430_CM_CLKSTST) &
+	      OMAP3430_CLKACTIVITY_IVA2_MASK))
+		return;
+
+	/* Reset IVA2 */
+	prm_write_mod_reg(OMAP3430_RST1_IVA2 |
+			  OMAP3430_RST2_IVA2 |
+			  OMAP3430_RST3_IVA2,
+			  OMAP3430_IVA2_MOD, RM_RSTCTRL);
+
+	/* Enable IVA2 clock */
+	cm_write_mod_reg(OMAP3430_CM_FCLKEN_IVA2_EN_IVA2,
+			 OMAP3430_IVA2_MOD, CM_FCLKEN);
+
+	/* Set IVA2 boot mode to 'idle' */
+	omap_ctrl_writel(OMAP3_IVA2_BOOTMOD_IDLE,
+			 OMAP343X_CONTROL_IVA2_BOOTMOD);
+
+	/* Un-reset IVA2 */
+	prm_write_mod_reg(0, OMAP3430_IVA2_MOD, RM_RSTCTRL);
+
+	/* Disable IVA2 clock */
+	cm_write_mod_reg(0, OMAP3430_IVA2_MOD, CM_FCLKEN);
+
+	/* Reset IVA2 */
+	prm_write_mod_reg(OMAP3430_RST1_IVA2 |
+			  OMAP3430_RST2_IVA2 |
+			  OMAP3430_RST3_IVA2,
+			  OMAP3430_IVA2_MOD, RM_RSTCTRL);
+}
+
 static void __init prcm_setup_regs(void)
 {
 	/* reset modem */
@@ -511,6 +559,8 @@ static void __init prcm_setup_regs(void)
 	 * it is selected to mpu wakeup goup */
 	prm_write_mod_reg(OMAP3430_IO_EN | OMAP3430_WKUP_EN,
 			  OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET);
+
+	omap3_iva_idle();
 }
 
 static int __init pwrdms_setup(struct powerdomain *pwrdm)
diff --git a/arch/arm/plat-omap/include/mach/control.h b/arch/arm/plat-omap/include/mach/control.h
index 269147f..d38697b 100644
--- a/arch/arm/plat-omap/include/mach/control.h
+++ b/arch/arm/plat-omap/include/mach/control.h
@@ -189,6 +189,11 @@
 #define OMAP2_PBIASLITEPWRDNZ0		(1 << 1)
 #define OMAP2_PBIASLITEVMODE0		(1 << 0)
 
+/* CONTROL_IVA2_BOOTMOD bits */
+#define OMAP3_IVA2_BOOTMOD_SHIFT	0
+#define OMAP3_IVA2_BOOTMOD_MASK		(0xf << 0)
+#define OMAP3_IVA2_BOOTMOD_IDLE		(0x1 << 0)
+
 #ifndef __ASSEMBLY__
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 extern void __iomem *omap_ctrl_base_get(void);
-- 
cgit v0.10.2


From 5a1a5abdb2e9a301f1ac62feb37228f3f4d3117c Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Fri, 31 Oct 2008 11:08:42 -0700
Subject: OMAP3: PM: Add wake-up bit defintiions for CONTROL_PADCONF_X

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/plat-omap/include/mach/control.h b/arch/arm/plat-omap/include/mach/control.h
index d38697b..a9f05e5 100644
--- a/arch/arm/plat-omap/include/mach/control.h
+++ b/arch/arm/plat-omap/include/mach/control.h
@@ -194,6 +194,10 @@
 #define OMAP3_IVA2_BOOTMOD_MASK		(0xf << 0)
 #define OMAP3_IVA2_BOOTMOD_IDLE		(0x1 << 0)
 
+/* CONTROL_PADCONF_X bits */
+#define OMAP3_PADCONF_WAKEUPEVENT0	(1 << 15)
+#define OMAP3_PADCONF_WAKEUPENABLE0	(1 << 14)
+
 #ifndef __ASSEMBLY__
 #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
 extern void __iomem *omap_ctrl_base_get(void);
-- 
cgit v0.10.2


From 4af4016c53f52b26461b8030211f8427a58fa5ed Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 4 Feb 2009 10:51:40 -0800
Subject: OMAP3: PM: UART: disable clocks when idle and off-mode support

This patch allows the UART clocks to be disabled when the OMAP UARTs
are inactive, thus permitting the chip to hit retention in idle.
After the expiration of an activity timer, each UART is allowed to
disable its clocks so the system can enter retention.  The activity
timer is (re)activated on any UART interrupt, UART wake event or any
IO pad wakeup.  The actual disable of the UART clocks is done in the
'prepare_idle' hook called from the OMAP idle loop.

While the activity timer is active, the smart-idle mode of the UART is
also disabled.  This is due to a "feature" of the UART module that
after a UART wakeup, the smart-idle mode may be entered before the
UART has communicated the interrupt, or upon TX, an idle mode may be
entered before the TX FIFOs are emptied.

Upon suspend, the 'prepare_suspend' hook cancels any pending activity
timers and allows the clocks to be disabled immediately.

In addition, upon disabling clocks the UART state is saved in case
of an off-mode transition while clocks are off.

Special thanks to Tero Kristo for the initial ideas and first versions
of UART idle support, and to Jouni Hogander for extra testing and
bugfixes.

Tested on OMAP3 (Beagle, RX51, SDP, EVM) and OMAP2 (n810)

Cc: Tero Kristo <tero.kristo@nokia.com>
Cc: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index d38f312..db10255 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -71,6 +71,11 @@ static int omap2_fclks_active(void)
 
 	f1 = cm_read_mod_reg(CORE_MOD, CM_FCLKEN1);
 	f2 = cm_read_mod_reg(CORE_MOD, OMAP24XX_CM_FCLKEN2);
+
+	/* Ignore UART clocks.  These are handled by UART core (serial.c) */
+	f1 &= ~(OMAP24XX_EN_UART1 | OMAP24XX_EN_UART2);
+	f2 &= ~OMAP24XX_EN_UART3;
+
 	if (f1 | f2)
 		return 1;
 	return 0;
@@ -117,12 +122,20 @@ static void omap2_enter_full_retention(void)
 	if (omap_irq_pending())
 		goto no_sleep;
 
+	omap_uart_prepare_idle(0);
+	omap_uart_prepare_idle(1);
+	omap_uart_prepare_idle(2);
+
 	/* Jump to SRAM suspend code */
 	omap2_sram_suspend(sdrc_read_reg(SDRC_DLLA_CTRL),
 			   OMAP_SDRC_REGADDR(SDRC_DLLA_CTRL),
 			   OMAP_SDRC_REGADDR(SDRC_POWER));
-no_sleep:
 
+	omap_uart_resume_idle(2);
+	omap_uart_resume_idle(1);
+	omap_uart_resume_idle(0);
+
+no_sleep:
 	if (omap2_pm_debug) {
 		unsigned long long tmp;
 
@@ -283,6 +296,7 @@ static int omap2_pm_suspend(void)
 	mir1 = omap_readl(0x480fe0a4);
 	omap_writel(1 << 5, 0x480fe0ac);
 
+	omap_uart_prepare_suspend();
 	omap2_enter_full_retention();
 
 	omap_writel(mir1, 0x480fe0a4);
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index b2a8730..54876ac 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -27,6 +27,7 @@
 #include <mach/clockdomain.h>
 #include <mach/powerdomain.h>
 #include <mach/control.h>
+#include <mach/serial.h>
 
 #include "cm.h"
 #include "cm-regbits-34xx.h"
@@ -168,10 +169,16 @@ static void omap_sram_idle(void)
 		return;
 	}
 	omap2_gpio_prepare_for_retention();
+	omap_uart_prepare_idle(0);
+	omap_uart_prepare_idle(1);
+	omap_uart_prepare_idle(2);
 
 	_omap_sram_idle(NULL, save_state);
 	cpu_init();
 
+	omap_uart_resume_idle(2);
+	omap_uart_resume_idle(1);
+	omap_uart_resume_idle(0);
 	omap2_gpio_resume_after_retention();
 }
 
@@ -204,6 +211,11 @@ static int omap3_fclks_active(void)
 				  CM_FCLKEN);
 	fck_per = cm_read_mod_reg(OMAP3430_PER_MOD,
 				  CM_FCLKEN);
+
+	/* Ignore UART clocks.  These are handled by UART core (serial.c) */
+	fck_core1 &= ~(OMAP3430_EN_UART1 | OMAP3430_EN_UART2);
+	fck_per &= ~OMAP3430_EN_UART3;
+
 	if (fck_core1 | fck_core3 | fck_sgx | fck_dss |
 	    fck_cam | fck_per | fck_usbhost)
 		return 1;
@@ -212,6 +224,8 @@ static int omap3_fclks_active(void)
 
 static int omap3_can_sleep(void)
 {
+	if (!omap_uart_can_sleep())
+		return 0;
 	if (omap3_fclks_active())
 		return 0;
 	return 1;
@@ -301,6 +315,7 @@ static int omap3_pm_suspend(void)
 			goto restore;
 	}
 
+	omap_uart_prepare_suspend();
 	omap_sram_idle();
 
 restore:
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 4dcf39c2..53cbe06 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -6,6 +6,8 @@
  * Copyright (C) 2005-2008 Nokia Corporation
  * Author: Paul Mundt <paul.mundt@nokia.com>
  *
+ * Major rework for PM support by Kevin Hilman
+ *
  * Based off of arch/arm/mach-omap/omap1/serial.c
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -21,9 +23,50 @@
 
 #include <mach/common.h>
 #include <mach/board.h>
+#include <mach/clock.h>
+#include <mach/control.h>
+
+#include "prm.h"
+#include "pm.h"
+#include "prm-regbits-34xx.h"
+
+#define UART_OMAP_WER		0x17	/* Wake-up enable register */
+
+#define DEFAULT_TIMEOUT (2 * HZ)
+
+struct omap_uart_state {
+	int num;
+	int can_sleep;
+	struct timer_list timer;
+	u32 timeout;
+
+	void __iomem *wk_st;
+	void __iomem *wk_en;
+	u32 wk_mask;
+	u32 padconf;
+
+	struct clk *ick;
+	struct clk *fck;
+	int clocked;
+
+	struct plat_serial8250_port *p;
+	struct list_head node;
 
-static struct clk *uart_ick[OMAP_MAX_NR_PORTS];
-static struct clk *uart_fck[OMAP_MAX_NR_PORTS];
+#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
+	int context_valid;
+
+	/* Registers to be saved/restored for OFF-mode */
+	u16 dll;
+	u16 dlh;
+	u16 ier;
+	u16 sysc;
+	u16 scr;
+	u16 wer;
+#endif
+};
+
+static struct omap_uart_state omap_uart[OMAP_MAX_NR_PORTS];
+static LIST_HEAD(uart_list);
 
 static struct plat_serial8250_port serial_platform_data[] = {
 	{
@@ -74,30 +117,320 @@ static inline void serial_write_reg(struct plat_serial8250_port *p, int offset,
  * properly. Note that the TX watermark initialization may not be needed
  * once the 8250.c watermark handling code is merged.
  */
-static inline void __init omap_serial_reset(struct plat_serial8250_port *p)
+static inline void __init omap_uart_reset(struct omap_uart_state *uart)
 {
+	struct plat_serial8250_port *p = uart->p;
+
 	serial_write_reg(p, UART_OMAP_MDR1, 0x07);
 	serial_write_reg(p, UART_OMAP_SCR, 0x08);
 	serial_write_reg(p, UART_OMAP_MDR1, 0x00);
 	serial_write_reg(p, UART_OMAP_SYSC, (0x02 << 3) | (1 << 2) | (1 << 0));
 }
 
-void omap_serial_enable_clocks(int enable)
+#if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP3)
+
+static int enable_off_mode; /* to be removed by full off-mode patches */
+
+static void omap_uart_save_context(struct omap_uart_state *uart)
 {
-	int i;
-	for (i = 0; i < OMAP_MAX_NR_PORTS; i++) {
-		if (uart_ick[i] && uart_fck[i]) {
-			if (enable) {
-				clk_enable(uart_ick[i]);
-				clk_enable(uart_fck[i]);
-			} else {
-				clk_disable(uart_ick[i]);
-				clk_disable(uart_fck[i]);
+	u16 lcr = 0;
+	struct plat_serial8250_port *p = uart->p;
+
+	if (!enable_off_mode)
+		return;
+
+	lcr = serial_read_reg(p, UART_LCR);
+	serial_write_reg(p, UART_LCR, 0xBF);
+	uart->dll = serial_read_reg(p, UART_DLL);
+	uart->dlh = serial_read_reg(p, UART_DLM);
+	serial_write_reg(p, UART_LCR, lcr);
+	uart->ier = serial_read_reg(p, UART_IER);
+	uart->sysc = serial_read_reg(p, UART_OMAP_SYSC);
+	uart->scr = serial_read_reg(p, UART_OMAP_SCR);
+	uart->wer = serial_read_reg(p, UART_OMAP_WER);
+
+	uart->context_valid = 1;
+}
+
+static void omap_uart_restore_context(struct omap_uart_state *uart)
+{
+	u16 efr = 0;
+	struct plat_serial8250_port *p = uart->p;
+
+	if (!enable_off_mode)
+		return;
+
+	if (!uart->context_valid)
+		return;
+
+	uart->context_valid = 0;
+
+	serial_write_reg(p, UART_OMAP_MDR1, 0x7);
+	serial_write_reg(p, UART_LCR, 0xBF); /* Config B mode */
+	efr = serial_read_reg(p, UART_EFR);
+	serial_write_reg(p, UART_EFR, UART_EFR_ECB);
+	serial_write_reg(p, UART_LCR, 0x0); /* Operational mode */
+	serial_write_reg(p, UART_IER, 0x0);
+	serial_write_reg(p, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(p, UART_DLL, uart->dll);
+	serial_write_reg(p, UART_DLM, uart->dlh);
+	serial_write_reg(p, UART_LCR, 0x0); /* Operational mode */
+	serial_write_reg(p, UART_IER, uart->ier);
+	serial_write_reg(p, UART_FCR, 0xA1);
+	serial_write_reg(p, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(p, UART_EFR, efr);
+	serial_write_reg(p, UART_LCR, UART_LCR_WLEN8);
+	serial_write_reg(p, UART_OMAP_SCR, uart->scr);
+	serial_write_reg(p, UART_OMAP_WER, uart->wer);
+	serial_write_reg(p, UART_OMAP_SYSC, uart->sysc);
+	serial_write_reg(p, UART_OMAP_MDR1, 0x00); /* UART 16x mode */
+}
+#else
+static inline void omap_uart_save_context(struct omap_uart_state *uart) {}
+static inline void omap_uart_restore_context(struct omap_uart_state *uart) {}
+#endif /* CONFIG_PM && CONFIG_ARCH_OMAP3 */
+
+static inline void omap_uart_enable_clocks(struct omap_uart_state *uart)
+{
+	if (uart->clocked)
+		return;
+
+	clk_enable(uart->ick);
+	clk_enable(uart->fck);
+	uart->clocked = 1;
+	omap_uart_restore_context(uart);
+}
+
+#ifdef CONFIG_PM
+
+static inline void omap_uart_disable_clocks(struct omap_uart_state *uart)
+{
+	if (!uart->clocked)
+		return;
+
+	omap_uart_save_context(uart);
+	uart->clocked = 0;
+	clk_disable(uart->ick);
+	clk_disable(uart->fck);
+}
+
+static void omap_uart_smart_idle_enable(struct omap_uart_state *uart,
+					  int enable)
+{
+	struct plat_serial8250_port *p = uart->p;
+	u16 sysc;
+
+	sysc = serial_read_reg(p, UART_OMAP_SYSC) & 0x7;
+	if (enable)
+		sysc |= 0x2 << 3;
+	else
+		sysc |= 0x1 << 3;
+
+	serial_write_reg(p, UART_OMAP_SYSC, sysc);
+}
+
+static void omap_uart_block_sleep(struct omap_uart_state *uart)
+{
+	omap_uart_enable_clocks(uart);
+
+	omap_uart_smart_idle_enable(uart, 0);
+	uart->can_sleep = 0;
+	mod_timer(&uart->timer, jiffies + uart->timeout);
+}
+
+static void omap_uart_allow_sleep(struct omap_uart_state *uart)
+{
+	if (!uart->clocked)
+		return;
+
+	omap_uart_smart_idle_enable(uart, 1);
+	uart->can_sleep = 1;
+	del_timer(&uart->timer);
+}
+
+static void omap_uart_idle_timer(unsigned long data)
+{
+	struct omap_uart_state *uart = (struct omap_uart_state *)data;
+
+	omap_uart_allow_sleep(uart);
+}
+
+void omap_uart_prepare_idle(int num)
+{
+	struct omap_uart_state *uart;
+
+	list_for_each_entry(uart, &uart_list, node) {
+		if (num == uart->num && uart->can_sleep) {
+			omap_uart_disable_clocks(uart);
+			return;
+		}
+	}
+}
+
+void omap_uart_resume_idle(int num)
+{
+	struct omap_uart_state *uart;
+
+	list_for_each_entry(uart, &uart_list, node) {
+		if (num == uart->num) {
+			omap_uart_enable_clocks(uart);
+
+			/* Check for IO pad wakeup */
+			if (cpu_is_omap34xx() && uart->padconf) {
+				u16 p = omap_ctrl_readw(uart->padconf);
+
+				if (p & OMAP3_PADCONF_WAKEUPEVENT0)
+					omap_uart_block_sleep(uart);
 			}
+
+			/* Check for normal UART wakeup */
+			if (__raw_readl(uart->wk_st) & uart->wk_mask)
+				omap_uart_block_sleep(uart);
+
+			return;
+		}
+	}
+}
+
+void omap_uart_prepare_suspend(void)
+{
+	struct omap_uart_state *uart;
+
+	list_for_each_entry(uart, &uart_list, node) {
+		omap_uart_allow_sleep(uart);
+	}
+}
+
+int omap_uart_can_sleep(void)
+{
+	struct omap_uart_state *uart;
+	int can_sleep = 1;
+
+	list_for_each_entry(uart, &uart_list, node) {
+		if (!uart->clocked)
+			continue;
+
+		if (!uart->can_sleep) {
+			can_sleep = 0;
+			continue;
 		}
+
+		/* This UART can now safely sleep. */
+		omap_uart_allow_sleep(uart);
 	}
+
+	return can_sleep;
 }
 
+/**
+ * omap_uart_interrupt()
+ *
+ * This handler is used only to detect that *any* UART interrupt has
+ * occurred.  It does _nothing_ to handle the interrupt.  Rather,
+ * any UART interrupt will trigger the inactivity timer so the
+ * UART will not idle or sleep for its timeout period.
+ *
+ **/
+static irqreturn_t omap_uart_interrupt(int irq, void *dev_id)
+{
+	struct omap_uart_state *uart = dev_id;
+
+	omap_uart_block_sleep(uart);
+
+	return IRQ_NONE;
+}
+
+static void omap_uart_idle_init(struct omap_uart_state *uart)
+{
+	u32 v;
+	struct plat_serial8250_port *p = uart->p;
+	int ret;
+
+	uart->can_sleep = 0;
+	uart->timeout = DEFAULT_TIMEOUT;
+	setup_timer(&uart->timer, omap_uart_idle_timer,
+		    (unsigned long) uart);
+	mod_timer(&uart->timer, jiffies + uart->timeout);
+	omap_uart_smart_idle_enable(uart, 0);
+
+	if (cpu_is_omap34xx()) {
+		u32 mod = (uart->num == 2) ? OMAP3430_PER_MOD : CORE_MOD;
+		u32 wk_mask = 0;
+		u32 padconf = 0;
+
+		uart->wk_en = OMAP34XX_PRM_REGADDR(mod, PM_WKEN1);
+		uart->wk_st = OMAP34XX_PRM_REGADDR(mod, PM_WKST1);
+		switch (uart->num) {
+		case 0:
+			wk_mask = OMAP3430_ST_UART1_MASK;
+			padconf = 0x182;
+			break;
+		case 1:
+			wk_mask = OMAP3430_ST_UART2_MASK;
+			padconf = 0x17a;
+			break;
+		case 2:
+			wk_mask = OMAP3430_ST_UART3_MASK;
+			padconf = 0x19e;
+			break;
+		}
+		uart->wk_mask = wk_mask;
+		uart->padconf = padconf;
+	} else if (cpu_is_omap24xx()) {
+		u32 wk_mask = 0;
+
+		if (cpu_is_omap2430()) {
+			uart->wk_en = OMAP2430_PRM_REGADDR(CORE_MOD, PM_WKEN1);
+			uart->wk_st = OMAP2430_PRM_REGADDR(CORE_MOD, PM_WKST1);
+		} else if (cpu_is_omap2420()) {
+			uart->wk_en = OMAP2420_PRM_REGADDR(CORE_MOD, PM_WKEN1);
+			uart->wk_st = OMAP2420_PRM_REGADDR(CORE_MOD, PM_WKST1);
+		}
+		switch (uart->num) {
+		case 0:
+			wk_mask = OMAP24XX_ST_UART1_MASK;
+			break;
+		case 1:
+			wk_mask = OMAP24XX_ST_UART2_MASK;
+			break;
+		case 2:
+			wk_mask = OMAP24XX_ST_UART3_MASK;
+			break;
+		}
+		uart->wk_mask = wk_mask;
+	} else {
+		uart->wk_en = 0;
+		uart->wk_st = 0;
+		uart->wk_mask = 0;
+		uart->padconf = 0;
+	}
+
+	/* Set wake-enable bit */
+	if (uart->wk_en && uart->wk_mask) {
+		v = __raw_readl(uart->wk_en);
+		v |= uart->wk_mask;
+		__raw_writel(v, uart->wk_en);
+	}
+
+	/* Ensure IOPAD wake-enables are set */
+	if (cpu_is_omap34xx() && uart->padconf) {
+		u16 v;
+
+		v = omap_ctrl_readw(uart->padconf);
+		v |= OMAP3_PADCONF_WAKEUPENABLE0;
+		omap_ctrl_writew(v, uart->padconf);
+	}
+
+	p->flags |= UPF_SHARE_IRQ;
+	ret = request_irq(p->irq, omap_uart_interrupt, IRQF_SHARED,
+			  "serial idle", (void *)uart);
+	WARN_ON(ret);
+}
+
+#else
+static inline void omap_uart_idle_init(struct omap_uart_state *uart) {}
+#endif /* CONFIG_PM */
+
 void __init omap_serial_init(void)
 {
 	int i;
@@ -117,6 +450,7 @@ void __init omap_serial_init(void)
 
 	for (i = 0; i < OMAP_MAX_NR_PORTS; i++) {
 		struct plat_serial8250_port *p = serial_platform_data + i;
+		struct omap_uart_state *uart = &omap_uart[i];
 
 		if (!(info->enabled_uarts & (1 << i))) {
 			p->membase = NULL;
@@ -125,22 +459,30 @@ void __init omap_serial_init(void)
 		}
 
 		sprintf(name, "uart%d_ick", i+1);
-		uart_ick[i] = clk_get(NULL, name);
-		if (IS_ERR(uart_ick[i])) {
+		uart->ick = clk_get(NULL, name);
+		if (IS_ERR(uart->ick)) {
 			printk(KERN_ERR "Could not get uart%d_ick\n", i+1);
-			uart_ick[i] = NULL;
-		} else
-			clk_enable(uart_ick[i]);
+			uart->ick = NULL;
+		}
 
 		sprintf(name, "uart%d_fck", i+1);
-		uart_fck[i] = clk_get(NULL, name);
-		if (IS_ERR(uart_fck[i])) {
+		uart->fck = clk_get(NULL, name);
+		if (IS_ERR(uart->fck)) {
 			printk(KERN_ERR "Could not get uart%d_fck\n", i+1);
-			uart_fck[i] = NULL;
-		} else
-			clk_enable(uart_fck[i]);
+			uart->fck = NULL;
+		}
+
+		if (!uart->ick || !uart->fck)
+			continue;
+
+		uart->num = i;
+		p->private_data = uart;
+		uart->p = p;
+		list_add(&uart->node, &uart_list);
 
-		omap_serial_reset(p);
+		omap_uart_enable_clocks(uart);
+		omap_uart_reset(uart);
+		omap_uart_idle_init(uart);
 	}
 }
 
diff --git a/arch/arm/plat-omap/include/mach/common.h b/arch/arm/plat-omap/include/mach/common.h
index 0ecf36d..834f0b3 100644
--- a/arch/arm/plat-omap/include/mach/common.h
+++ b/arch/arm/plat-omap/include/mach/common.h
@@ -33,8 +33,6 @@ struct sys_timer;
 
 extern void omap_map_common_io(void);
 extern struct sys_timer omap_timer;
-extern void omap_serial_init(void);
-extern void omap_serial_enable_clocks(int enable);
 #if defined(CONFIG_I2C_OMAP) || defined(CONFIG_I2C_OMAP_MODULE)
 extern int omap_register_i2c_bus(int bus_id, u32 clkrate,
 				 struct i2c_board_info const *info,
diff --git a/arch/arm/plat-omap/include/mach/serial.h b/arch/arm/plat-omap/include/mach/serial.h
index 8a676a0..8e89585 100644
--- a/arch/arm/plat-omap/include/mach/serial.h
+++ b/arch/arm/plat-omap/include/mach/serial.h
@@ -40,4 +40,13 @@
 			__ret;						\
 			})
 
+#ifndef __ASSEMBLER__
+extern void omap_serial_init(void);
+extern int omap_uart_can_sleep(void);
+extern void omap_uart_check_wakeup(void);
+extern void omap_uart_prepare_suspend(void);
+extern void omap_uart_prepare_idle(int num);
+extern void omap_uart_resume_idle(int num);
+#endif
+
 #endif
-- 
cgit v0.10.2


From ba87a9beae8b39894f55761f4aff5ae2ca624f81 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@nokia.com>
Date: Tue, 9 Dec 2008 13:36:50 +0200
Subject: OMAP: UART: Add sysfs interface for adjusting UART sleep timeout

This patch makes it possible to change uart sleep timeout. New sysfs
entry is added (/sys/devices/platform/serial8250.<uart>/sleep_timeout)
Writing zero will disable the timeout feature and prevent UART clocks
from being disabled.

Also default timeout is increased to 5 second to make serial console
more usable.

Original patch was written by Tero Kristo.

Cc: Tero Kristo <Tero.Kristo@nokia.com>
Signed-off-by: Jouni Hogander <jouni.hogander@nokia.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 53cbe06..a0a2803 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -32,7 +32,7 @@
 
 #define UART_OMAP_WER		0x17	/* Wake-up enable register */
 
-#define DEFAULT_TIMEOUT (2 * HZ)
+#define DEFAULT_TIMEOUT (5 * HZ)
 
 struct omap_uart_state {
 	int num;
@@ -235,7 +235,10 @@ static void omap_uart_block_sleep(struct omap_uart_state *uart)
 
 	omap_uart_smart_idle_enable(uart, 0);
 	uart->can_sleep = 0;
-	mod_timer(&uart->timer, jiffies + uart->timeout);
+	if (uart->timeout)
+		mod_timer(&uart->timer, jiffies + uart->timeout);
+	else
+		del_timer(&uart->timer);
 }
 
 static void omap_uart_allow_sleep(struct omap_uart_state *uart)
@@ -340,6 +343,8 @@ static irqreturn_t omap_uart_interrupt(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
+static u32 sleep_timeout = DEFAULT_TIMEOUT;
+
 static void omap_uart_idle_init(struct omap_uart_state *uart)
 {
 	u32 v;
@@ -347,7 +352,7 @@ static void omap_uart_idle_init(struct omap_uart_state *uart)
 	int ret;
 
 	uart->can_sleep = 0;
-	uart->timeout = DEFAULT_TIMEOUT;
+	uart->timeout = sleep_timeout;
 	setup_timer(&uart->timer, omap_uart_idle_timer,
 		    (unsigned long) uart);
 	mod_timer(&uart->timer, jiffies + uart->timeout);
@@ -427,6 +432,39 @@ static void omap_uart_idle_init(struct omap_uart_state *uart)
 	WARN_ON(ret);
 }
 
+static ssize_t sleep_timeout_show(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%u\n", sleep_timeout / HZ);
+}
+
+static ssize_t sleep_timeout_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t n)
+{
+	struct omap_uart_state *uart;
+	unsigned int value;
+
+	if (sscanf(buf, "%u", &value) != 1) {
+		printk(KERN_ERR "sleep_timeout_store: Invalid value\n");
+		return -EINVAL;
+	}
+	sleep_timeout = value * HZ;
+	list_for_each_entry(uart, &uart_list, node) {
+		uart->timeout = sleep_timeout;
+		if (uart->timeout)
+			mod_timer(&uart->timer, jiffies + uart->timeout);
+		else
+			/* A zero value means disable timeout feature */
+			omap_uart_block_sleep(uart);
+	}
+	return n;
+}
+
+static struct kobj_attribute sleep_timeout_attr =
+	__ATTR(sleep_timeout, 0644, sleep_timeout_show, sleep_timeout_store);
+
 #else
 static inline void omap_uart_idle_init(struct omap_uart_state *uart) {}
 #endif /* CONFIG_PM */
@@ -496,6 +534,15 @@ static struct platform_device serial_device = {
 
 static int __init omap_init(void)
 {
-	return platform_device_register(&serial_device);
+	int ret;
+
+	ret = platform_device_register(&serial_device);
+
+#ifdef CONFIG_PM
+	if (!ret)
+		ret = sysfs_create_file(&serial_device.dev.kobj,
+					&sleep_timeout_attr.attr);
+#endif
+	return ret;
 }
 arch_initcall(omap_init);
-- 
cgit v0.10.2


From 8111b221a275cbc974eba26059dc764680ded9a9 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Tue, 28 Apr 2009 15:27:44 -0700
Subject: OMAP3: PM: Add D2D clocks and auto-idle setup to PRCM init

Add D2D clocks (modem_fck, sad2d_ick, mad2d_ick) to clock framework
and ensure that auto-idle bits are set for these clocks during PRCM
init.

Also add omap3_d2d_idle() function called durint PRCM setup which
ensures D2D pins are MUX'd correctly to enable retention for
standalone (no-modem) devices.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index 6202139..9e43fe5 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -129,6 +129,9 @@ static struct omap_clk omap34xx_clks[] = {
 	CLK(NULL,	"sgx_fck",	&sgx_fck,	CK_3430ES2),
 	CLK(NULL,	"sgx_ick",	&sgx_ick,	CK_3430ES2),
 	CLK(NULL,	"d2d_26m_fck",	&d2d_26m_fck,	CK_3430ES1),
+	CLK(NULL,	"modem_fck",	&modem_fck,	CK_343X),
+	CLK(NULL,	"sad2d_ick",	&sad2d_ick,	CK_343X),
+	CLK(NULL,	"mad2d_ick",	&mad2d_ick,	CK_343X),
 	CLK(NULL,	"gpt10_fck",	&gpt10_fck,	CK_343X),
 	CLK(NULL,	"gpt11_fck",	&gpt11_fck,	CK_343X),
 	CLK(NULL,	"cpefuse_fck",	&cpefuse_fck,	CK_3430ES2),
diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h
index 496f0e9..e433aec 100644
--- a/arch/arm/mach-omap2/clock34xx.h
+++ b/arch/arm/mach-omap2/clock34xx.h
@@ -1230,6 +1230,37 @@ static struct clk d2d_26m_fck = {
 	.recalc		= &followparent_recalc,
 };
 
+static struct clk modem_fck = {
+	.name		= "modem_fck",
+	.ops		= &clkops_omap2_dflt_wait,
+	.parent		= &sys_ck,
+	.init		= &omap2_init_clk_clkdm,
+	.enable_reg	= OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1),
+	.enable_bit	= OMAP3430_EN_MODEM_SHIFT,
+	.clkdm_name	= "d2d_clkdm",
+	.recalc		= &followparent_recalc,
+};
+
+static struct clk sad2d_ick = {
+	.name		= "sad2d_ick",
+	.ops		= &clkops_omap2_dflt_wait,
+	.parent		= &l3_ick,
+	.enable_reg	= OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1),
+	.enable_bit	= OMAP3430_EN_SAD2D_SHIFT,
+	.clkdm_name	= "d2d_clkdm",
+	.recalc		= &followparent_recalc,
+};
+
+static struct clk mad2d_ick = {
+	.name		= "mad2d_ick",
+	.ops		= &clkops_omap2_dflt_wait,
+	.parent		= &l3_ick,
+	.enable_reg	= OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN3),
+	.enable_bit	= OMAP3430_EN_MAD2D_SHIFT,
+	.clkdm_name	= "d2d_clkdm",
+	.recalc		= &followparent_recalc,
+};
+
 static const struct clksel omap343x_gpt_clksel[] = {
 	{ .parent = &omap_32k_fck, .rates = gpt_32k_rates },
 	{ .parent = &sys_ck,	   .rates = gpt_sys_rates },
@@ -1947,8 +1978,6 @@ static struct clk usb_l4_ick = {
 	.recalc		= &omap2_clksel_recalc,
 };
 
-/* XXX MDM_INTC_ICK, SAD2D_ICK ?? */
-
 /* SECURITY_L4_ICK2 based clocks */
 
 static struct clk security_l4_ick2 = {
diff --git a/arch/arm/mach-omap2/cm-regbits-34xx.h b/arch/arm/mach-omap2/cm-regbits-34xx.h
index 6f3f5a3..6923deb 100644
--- a/arch/arm/mach-omap2/cm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/cm-regbits-34xx.h
@@ -145,6 +145,8 @@
 #define OMAP3430_CLKACTIVITY_MPU_MASK			(1 << 0)
 
 /* CM_FCLKEN1_CORE specific bits */
+#define OMAP3430_EN_MODEM				(1 << 31)
+#define OMAP3430_EN_MODEM_SHIFT				31
 
 /* CM_ICLKEN1_CORE specific bits */
 #define OMAP3430_EN_ICR					(1 << 29)
@@ -161,6 +163,8 @@
 #define OMAP3430_EN_MAILBOXES_SHIFT			7
 #define OMAP3430_EN_OMAPCTRL				(1 << 6)
 #define OMAP3430_EN_OMAPCTRL_SHIFT			6
+#define OMAP3430_EN_SAD2D				(1 << 3)
+#define OMAP3430_EN_SAD2D_SHIFT				3
 #define OMAP3430_EN_SDRC				(1 << 1)
 #define OMAP3430_EN_SDRC_SHIFT				1
 
@@ -176,6 +180,10 @@
 #define OMAP3430_EN_DES1				(1 << 0)
 #define OMAP3430_EN_DES1_SHIFT				0
 
+/* CM_ICLKEN3_CORE */
+#define OMAP3430_EN_MAD2D_SHIFT				3
+#define OMAP3430_EN_MAD2D				(1 << 3)
+
 /* CM_FCLKEN3_CORE specific bits */
 #define OMAP3430ES2_EN_TS_SHIFT				1
 #define OMAP3430ES2_EN_TS_MASK				(1 << 1)
@@ -231,6 +239,8 @@
 #define OMAP3430ES2_ST_CPEFUSE_MASK			(1 << 0)
 
 /* CM_AUTOIDLE1_CORE */
+#define OMAP3430_AUTO_MODEM				(1 << 31)
+#define OMAP3430_AUTO_MODEM_SHIFT			31
 #define OMAP3430ES2_AUTO_MMC3				(1 << 30)
 #define OMAP3430ES2_AUTO_MMC3_SHIFT			30
 #define OMAP3430ES2_AUTO_ICR				(1 << 29)
@@ -287,6 +297,8 @@
 #define OMAP3430_AUTO_HSOTGUSB_SHIFT			4
 #define OMAP3430ES1_AUTO_D2D				(1 << 3)
 #define OMAP3430ES1_AUTO_D2D_SHIFT			3
+#define OMAP3430_AUTO_SAD2D				(1 << 3)
+#define OMAP3430_AUTO_SAD2D_SHIFT			3
 #define OMAP3430_AUTO_SSI				(1 << 0)
 #define OMAP3430_AUTO_SSI_SHIFT				0
 
@@ -308,6 +320,8 @@
 #define	OMAP3430ES2_AUTO_USBTLL				(1 << 2)
 #define OMAP3430ES2_AUTO_USBTLL_SHIFT			2
 #define OMAP3430ES2_AUTO_USBTLL_MASK			(1 << 2)
+#define OMAP3430_AUTO_MAD2D_SHIFT			3
+#define OMAP3430_AUTO_MAD2D				(1 << 3)
 
 /* CM_CLKSEL_CORE */
 #define OMAP3430_CLKSEL_SSI_SHIFT			8
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 54876ac..f72e254 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -415,14 +415,32 @@ static void __init omap3_iva_idle(void)
 			  OMAP3430_IVA2_MOD, RM_RSTCTRL);
 }
 
-static void __init prcm_setup_regs(void)
+static void __init omap3_d2d_idle(void)
 {
+	u16 mask, padconf;
+
+	/* In a stand alone OMAP3430 where there is not a stacked
+	 * modem for the D2D Idle Ack and D2D MStandby must be pulled
+	 * high. S CONTROL_PADCONF_SAD2D_IDLEACK and
+	 * CONTROL_PADCONF_SAD2D_MSTDBY to have a pull up. */
+	mask = (1 << 4) | (1 << 3); /* pull-up, enabled */
+	padconf = omap_ctrl_readw(OMAP3_PADCONF_SAD2D_MSTANDBY);
+	padconf |= mask;
+	omap_ctrl_writew(padconf, OMAP3_PADCONF_SAD2D_MSTANDBY);
+
+	padconf = omap_ctrl_readw(OMAP3_PADCONF_SAD2D_IDLEACK);
+	padconf |= mask;
+	omap_ctrl_writew(padconf, OMAP3_PADCONF_SAD2D_IDLEACK);
+
 	/* reset modem */
 	prm_write_mod_reg(OMAP3430_RM_RSTCTRL_CORE_MODEM_SW_RSTPWRON |
 			  OMAP3430_RM_RSTCTRL_CORE_MODEM_SW_RST,
 			  CORE_MOD, RM_RSTCTRL);
 	prm_write_mod_reg(0, CORE_MOD, RM_RSTCTRL);
+}
 
+static void __init prcm_setup_regs(void)
+{
 	/* XXX Reset all wkdeps. This should be done when initializing
 	 * powerdomains */
 	prm_write_mod_reg(0, OMAP3430_IVA2_MOD, PM_WKDEP);
@@ -442,6 +460,7 @@ static void __init prcm_setup_regs(void)
 	 * Note that in the long run this should be done by clockfw
 	 */
 	cm_write_mod_reg(
+		OMAP3430_AUTO_MODEM |
 		OMAP3430ES2_AUTO_MMC3 |
 		OMAP3430ES2_AUTO_ICR |
 		OMAP3430_AUTO_AES2 |
@@ -469,7 +488,7 @@ static void __init prcm_setup_regs(void)
 		OMAP3430_AUTO_OMAPCTRL |
 		OMAP3430ES1_AUTO_FSHOSTUSB |
 		OMAP3430_AUTO_HSOTGUSB |
-		OMAP3430ES1_AUTO_D2D | /* This is es1 only */
+		OMAP3430_AUTO_SAD2D |
 		OMAP3430_AUTO_SSI,
 		CORE_MOD, CM_AUTOIDLE1);
 
@@ -483,6 +502,7 @@ static void __init prcm_setup_regs(void)
 
 	if (omap_rev() > OMAP3430_REV_ES1_0) {
 		cm_write_mod_reg(
+			OMAP3430_AUTO_MAD2D |
 			OMAP3430ES2_AUTO_USBTLL,
 			CORE_MOD, CM_AUTOIDLE3);
 	}
@@ -576,6 +596,7 @@ static void __init prcm_setup_regs(void)
 			  OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET);
 
 	omap3_iva_idle();
+	omap3_d2d_idle();
 }
 
 static int __init pwrdms_setup(struct powerdomain *pwrdm)
diff --git a/arch/arm/plat-omap/include/mach/control.h b/arch/arm/plat-omap/include/mach/control.h
index a9f05e5..fcc5a9b 100644
--- a/arch/arm/plat-omap/include/mach/control.h
+++ b/arch/arm/plat-omap/include/mach/control.h
@@ -144,6 +144,10 @@
 #define OMAP343X_CONTROL_PBIAS_LITE	(OMAP2_CONTROL_GENERAL + 0x02b0)
 #define OMAP343X_CONTROL_TEMP_SENSOR	(OMAP2_CONTROL_GENERAL + 0x02b4)
 
+/* 34xx D2D idle-related pins, handled by PM core */
+#define OMAP3_PADCONF_SAD2D_MSTANDBY   0x250
+#define OMAP3_PADCONF_SAD2D_IDLEACK    0x254
+
 /*
  * REVISIT: This list of registers is not comprehensive - there are more
  * that should be added.
-- 
cgit v0.10.2


From 01cbd4d11558e2e4c10f14bb8aecd799b22bca35 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Tue, 25 Nov 2008 21:48:28 -0800
Subject: OMAP3: PM: D2D clockdomain supports SW supervised transitions

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/clockdomains.h b/arch/arm/mach-omap2/clockdomains.h
index 281d5da..fe319ae 100644
--- a/arch/arm/mach-omap2/clockdomains.h
+++ b/arch/arm/mach-omap2/clockdomains.h
@@ -195,7 +195,7 @@ static struct clockdomain sgx_clkdm = {
 static struct clockdomain d2d_clkdm = {
 	.name		= "d2d_clkdm",
 	.pwrdm		= { .name = "core_pwrdm" },
-	.flags		= CLKDM_CAN_HWSUP,
+	.flags		= CLKDM_CAN_HWSUP_SWSUP,
 	.clktrctrl_mask = OMAP3430ES1_CLKTRCTRL_D2D_MASK,
 	.omap_chip	= OMAP_CHIP_INIT(CHIP_IS_OMAP3430),
 };
-- 
cgit v0.10.2


From 94a3ef6f2888ae995a8d112e277ed8465a9457fb Mon Sep 17 00:00:00 2001
From: Peter 'p2' De Schrijver <peter.de-schrijver@nokia.com>
Date: Mon, 19 Jan 2009 19:09:22 +0200
Subject: OMAP3: PM: Ensure MUSB block can idle when driver not loaded

Otherwise, bootloaders may leave MUSB in a state which prevents
retention.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 6fd1c1f..3935a35 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -57,6 +57,4 @@ obj-$(CONFIG_MACH_NOKIA_RX51)		+= board-rx51.o \
 					   mmc-twl4030.o
 
 # Platform specific device init code
-ifeq ($(CONFIG_USB_MUSB_SOC),y)
 obj-y					+= usb-musb.o
-endif
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index 215d463..d85296d 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -31,6 +31,17 @@
 #include <mach/mux.h>
 #include <mach/usb.h>
 
+#define OTG_SYSCONFIG	(OMAP34XX_HSUSB_OTG_BASE + 0x404)
+
+static void __init usb_musb_pm_init(void)
+{
+	/* Ensure force-idle mode for OTG controller */
+	if (cpu_is_omap34xx())
+		omap_writel(0, OTG_SYSCONFIG);
+}
+
+#ifdef CONFIG_USB_MUSB_SOC
+
 static struct resource musb_resources[] = {
 	[0] = { /* start and end set dynamically */
 		.flags	= IORESOURCE_MEM,
@@ -183,4 +194,13 @@ void __init usb_musb_init(void)
 		printk(KERN_ERR "Unable to register HS-USB (MUSB) device\n");
 		return;
 	}
+
+	usb_musb_pm_init();
+}
+
+#else
+void __init usb_musb_init(void)
+{
+	usb_musb_pm_init();
 }
+#endif /* CONFIG_USB_MUSB_SOC */
diff --git a/arch/arm/plat-omap/include/mach/usb.h b/arch/arm/plat-omap/include/mach/usb.h
index 69f0cee..f337e17 100644
--- a/arch/arm/plat-omap/include/mach/usb.h
+++ b/arch/arm/plat-omap/include/mach/usb.h
@@ -27,13 +27,7 @@
 #define UDC_BASE			OMAP2_UDC_BASE
 #define OMAP_OHCI_BASE			OMAP2_OHCI_BASE
 
-#ifdef CONFIG_USB_MUSB_SOC
 extern void usb_musb_init(void);
-#else
-static inline void usb_musb_init(void)
-{
-}
-#endif
 
 #endif
 
-- 
cgit v0.10.2


From 014c46db1c596299fc2c1d231cee30bb211035dc Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Mon, 27 Apr 2009 07:50:23 -0700
Subject: OMAP3: PM: Ensure PRCM interrupts are cleared at boot

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index f72e254..6bbb047 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -595,6 +595,9 @@ static void __init prcm_setup_regs(void)
 	prm_write_mod_reg(OMAP3430_IO_EN | OMAP3430_WKUP_EN,
 			  OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET);
 
+	/* Clear any pending PRCM interrupts */
+	prm_write_mod_reg(0, OCP_MOD, OMAP3_PRM_IRQSTATUS_MPU_OFFSET);
+
 	omap3_iva_idle();
 	omap3_d2d_idle();
 }
-- 
cgit v0.10.2


From b1340d17d25f9a51acf003ba4742e77aefb32071 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Mon, 27 Apr 2009 16:14:54 -0700
Subject: OMAP3: PM: Clear pending PRCM reset flags on init

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 6bbb047..8b8a2dd 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -595,6 +595,15 @@ static void __init prcm_setup_regs(void)
 	prm_write_mod_reg(OMAP3430_IO_EN | OMAP3430_WKUP_EN,
 			  OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET);
 
+	/* Clear any pending 'reset' flags */
+	prm_write_mod_reg(0xffffffff, MPU_MOD, RM_RSTST);
+	prm_write_mod_reg(0xffffffff, CORE_MOD, RM_RSTST);
+	prm_write_mod_reg(0xffffffff, OMAP3430_PER_MOD, RM_RSTST);
+	prm_write_mod_reg(0xffffffff, OMAP3430_EMU_MOD, RM_RSTST);
+	prm_write_mod_reg(0xffffffff, OMAP3430_NEON_MOD, RM_RSTST);
+	prm_write_mod_reg(0xffffffff, OMAP3430_DSS_MOD, RM_RSTST);
+	prm_write_mod_reg(0xffffffff, OMAP3430ES2_USBHOST_MOD, RM_RSTST);
+
 	/* Clear any pending PRCM interrupts */
 	prm_write_mod_reg(0, OCP_MOD, OMAP3_PRM_IRQSTATUS_MPU_OFFSET);
 
-- 
cgit v0.10.2


From d3fd3290c4d9f0e40d06fa3a1a8cf164d8cde801 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Tue, 5 May 2009 16:34:25 -0700
Subject: OMAP3: PM: prevent module wakeups from waking IVA2

By default, prevent functional wakeups from inside a module from
waking up the IVA2.  Let DSP Bridge code handle this when loaded.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 8b8a2dd..841d4c5 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -595,6 +595,12 @@ static void __init prcm_setup_regs(void)
 	prm_write_mod_reg(OMAP3430_IO_EN | OMAP3430_WKUP_EN,
 			  OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET);
 
+	/* Don't attach IVA interrupts */
+	prm_write_mod_reg(0, WKUP_MOD, OMAP3430_PM_IVAGRPSEL);
+	prm_write_mod_reg(0, CORE_MOD, OMAP3430_PM_IVAGRPSEL1);
+	prm_write_mod_reg(0, CORE_MOD, OMAP3430ES2_PM_IVAGRPSEL3);
+	prm_write_mod_reg(0, OMAP3430_PER_MOD, OMAP3430_PM_IVAGRPSEL);
+
 	/* Clear any pending 'reset' flags */
 	prm_write_mod_reg(0xffffffff, MPU_MOD, RM_RSTST);
 	prm_write_mod_reg(0xffffffff, CORE_MOD, RM_RSTST);
diff --git a/arch/arm/mach-omap2/prm.h b/arch/arm/mach-omap2/prm.h
index 7c8e0c4..9937e28 100644
--- a/arch/arm/mach-omap2/prm.h
+++ b/arch/arm/mach-omap2/prm.h
@@ -203,9 +203,11 @@
 
 #define OMAP3430_PM_MPUGRPSEL				0x00a4
 #define OMAP3430_PM_MPUGRPSEL1				OMAP3430_PM_MPUGRPSEL
+#define OMAP3430ES2_PM_MPUGRPSEL3			0x00f8
 
 #define OMAP3430_PM_IVAGRPSEL				0x00a8
 #define OMAP3430_PM_IVAGRPSEL1				OMAP3430_PM_IVAGRPSEL
+#define OMAP3430ES2_PM_IVAGRPSEL3			0x00f4
 
 #define OMAP3430_PM_PREPWSTST				0x00e8
 
-- 
cgit v0.10.2


From c912f7e1eae169aaca333b4c5da3f36c98f2ccb0 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Fri, 15 May 2009 11:29:28 -0700
Subject: OMAP1: PM: update and decouple from OMAP2/3 PM core

Update OMAP1-specific PM infrastructure.  This is a sync of what is in
linux-omap for OMAP1.

This mostly de-couples OMAP1 PM from OMAP2/3 PM and renames things
accordingly, and removes omap2/3 specific code from OMAP1 specific
headers.

Original OMAP1 decoupling patch for OMAP PM branch by Paul Walmsley.

Cc: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 9774c1f..5218943 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -53,11 +53,12 @@
 #include <mach/clock.h>
 #include <mach/sram.h>
 #include <mach/tc.h>
-#include <mach/pm.h>
 #include <mach/mux.h>
 #include <mach/dma.h>
 #include <mach/dmtimer.h>
 
+#include "pm.h"
+
 static unsigned int arm_sleep_save[ARM_SLEEP_SAVE_SIZE];
 static unsigned short dsp_sleep_save[DSP_SLEEP_SAVE_SIZE];
 static unsigned short ulpd_sleep_save[ULPD_SLEEP_SAVE_SIZE];
@@ -101,7 +102,7 @@ static void (*omap_sram_suspend)(unsigned long r0, unsigned long r1) = NULL;
  * going idle we continue to do idle even if we get
  * a clock tick interrupt . .
  */
-void omap_pm_idle(void)
+void omap1_pm_idle(void)
 {
 	extern __u32 arm_idlect1_mask;
 	__u32 use_idlect1 = arm_idlect1_mask;
@@ -222,7 +223,7 @@ static void omap_pm_wakeup_setup(void)
 #define EN_APICK	6	/* ARM_IDLECT2 */
 #define DSP_EN		1	/* ARM_RSTCT1 */
 
-void omap_pm_suspend(void)
+void omap1_pm_suspend(void)
 {
 	unsigned long arg0 = 0, arg1 = 0;
 
@@ -610,7 +611,7 @@ static int omap_pm_enter(suspend_state_t state)
 	{
 	case PM_SUSPEND_STANDBY:
 	case PM_SUSPEND_MEM:
-		omap_pm_suspend();
+		omap1_pm_suspend();
 		break;
 	default:
 		return -EINVAL;
@@ -683,7 +684,7 @@ static int __init omap_pm_init(void)
 		return -ENODEV;
 	}
 
-	pm_idle = omap_pm_idle;
+	pm_idle = omap1_pm_idle;
 
 	if (cpu_is_omap730())
 		setup_irq(INT_730_WAKE_UP_REQ, &omap_wakeup_irq);
diff --git a/arch/arm/mach-omap1/pm.h b/arch/arm/mach-omap1/pm.h
index ce6ee79..9ed5e2c 100644
--- a/arch/arm/mach-omap1/pm.h
+++ b/arch/arm/mach-omap1/pm.h
@@ -1,7 +1,7 @@
 /*
- * arch/arm/plat-omap/include/mach/pm.h
+ * arch/arm/mach-omap1/pm.h
  *
- * Header file for OMAP Power Management Routines
+ * Header file for OMAP1 Power Management Routines
  *
  * Author: MontaVista Software, Inc.
  *	   support@mvista.com
@@ -31,8 +31,8 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#ifndef __ASM_ARCH_OMAP_PM_H
-#define __ASM_ARCH_OMAP_PM_H
+#ifndef __ARCH_ARM_MACH_OMAP1_PM_H
+#define __ARCH_ARM_MACH_OMAP1_PM_H
 
 /*
  * ----------------------------------------------------------------------------
@@ -106,8 +106,7 @@
 
 #if     !defined(CONFIG_ARCH_OMAP730) && \
 	!defined(CONFIG_ARCH_OMAP15XX) && \
-	!defined(CONFIG_ARCH_OMAP16XX) && \
-	!defined(CONFIG_ARCH_OMAP24XX)
+	!defined(CONFIG_ARCH_OMAP16XX)
 #warning "Power management for this processor not implemented yet"
 #endif
 
@@ -115,29 +114,27 @@
 
 #include <linux/clk.h>
 
+extern struct kset power_subsys;
+
 extern void prevent_idle_sleep(void);
 extern void allow_idle_sleep(void);
 
-extern void omap_pm_idle(void);
-extern void omap_pm_suspend(void);
+extern void omap1_pm_idle(void);
+extern void omap1_pm_suspend(void);
+
 extern void omap730_cpu_suspend(unsigned short, unsigned short);
 extern void omap1510_cpu_suspend(unsigned short, unsigned short);
 extern void omap1610_cpu_suspend(unsigned short, unsigned short);
-extern void omap24xx_cpu_suspend(u32 dll_ctrl, void __iomem *sdrc_dlla_ctrl,
-					void __iomem *sdrc_power);
 extern void omap730_idle_loop_suspend(void);
 extern void omap1510_idle_loop_suspend(void);
 extern void omap1610_idle_loop_suspend(void);
-extern void omap24xx_idle_loop_suspend(void);
 
 extern unsigned int omap730_cpu_suspend_sz;
 extern unsigned int omap1510_cpu_suspend_sz;
 extern unsigned int omap1610_cpu_suspend_sz;
-extern unsigned int omap24xx_cpu_suspend_sz;
 extern unsigned int omap730_idle_loop_suspend_sz;
 extern unsigned int omap1510_idle_loop_suspend_sz;
 extern unsigned int omap1610_idle_loop_suspend_sz;
-extern unsigned int omap24xx_idle_loop_suspend_sz;
 
 #ifdef CONFIG_OMAP_SERIAL_WAKE
 extern void omap_serial_wake_trigger(int enable);
@@ -170,10 +167,6 @@ extern void omap_serial_wake_trigger(int enable);
 #define MPUI1610_RESTORE(x) omap_writel((mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x]), (x))
 #define MPUI1610_SHOW(x) mpui1610_sleep_save[MPUI1610_SLEEP_SAVE_##x]
 
-#define OMAP24XX_SAVE(x) omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x] = x
-#define OMAP24XX_RESTORE(x) x = omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x]
-#define OMAP24XX_SHOW(x) omap24xx_sleep_save[OMAP24XX_SLEEP_SAVE_##x]
-
 /*
  * List of global OMAP registers to preserve.
  * More ones like CP and general purpose register values are preserved
@@ -283,63 +276,5 @@ enum mpui1610_save_state {
 #endif
 };
 
-enum omap24xx_save_state {
-	OMAP24XX_SLEEP_SAVE_START = 0,
-	OMAP24XX_SLEEP_SAVE_INTC_MIR0,
-	OMAP24XX_SLEEP_SAVE_INTC_MIR1,
-	OMAP24XX_SLEEP_SAVE_INTC_MIR2,
-
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_MPU,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_GFX,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_DSP,
-	OMAP24XX_SLEEP_SAVE_CM_CLKSTCTRL_MDM,
-
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_MPU,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_CORE,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_GFX,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_DSP,
-	OMAP24XX_SLEEP_SAVE_PM_PWSTCTRL_MDM,
-
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST3_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST4_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_GFX,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_WKUP,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_CKGEN,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_DSP,
-	OMAP24XX_SLEEP_SAVE_CM_IDLEST_MDM,
-
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE3_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE4_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_WKUP,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_PLL,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_DSP,
-	OMAP24XX_SLEEP_SAVE_CM_AUTOIDLE_MDM,
-
-	OMAP24XX_SLEEP_SAVE_CM_FCLKEN1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_FCLKEN2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN1_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN2_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN3_CORE,
-	OMAP24XX_SLEEP_SAVE_CM_ICLKEN4_CORE,
-	OMAP24XX_SLEEP_SAVE_GPIO1_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO2_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO3_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO4_IRQENABLE1,
-	OMAP24XX_SLEEP_SAVE_GPIO3_OE,
-	OMAP24XX_SLEEP_SAVE_GPIO4_OE,
-	OMAP24XX_SLEEP_SAVE_GPIO3_RISINGDETECT,
-	OMAP24XX_SLEEP_SAVE_GPIO3_FALLINGDETECT,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SPI1_NCS2,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_MCBSP1_DX,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SSI1_FLAG_TX,
-	OMAP24XX_SLEEP_SAVE_CONTROL_PADCONF_SYS_NIRQW0,
-	OMAP24XX_SLEEP_SAVE_SIZE
-};
-
 #endif /* ASSEMBLER */
 #endif /* __ASM_ARCH_OMAP_PM_H */
diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c
index 842090b..f754cee 100644
--- a/arch/arm/mach-omap1/serial.c
+++ b/arch/arm/mach-omap1/serial.c
@@ -26,9 +26,6 @@
 #include <mach/mux.h>
 #include <mach/gpio.h>
 #include <mach/fpga.h>
-#ifdef CONFIG_PM
-#include <mach/pm.h>
-#endif
 
 static struct clk * uart1_ck;
 static struct clk * uart2_ck;
diff --git a/arch/arm/mach-omap1/sleep.S b/arch/arm/mach-omap1/sleep.S
index f3eac93..22e8568 100644
--- a/arch/arm/mach-omap1/sleep.S
+++ b/arch/arm/mach-omap1/sleep.S
@@ -35,7 +35,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <mach/io.h>
-#include <mach/pm.h>
+#include "pm.h"
 
 		.text
 
-- 
cgit v0.10.2


From bac4e960b5ce2453d862beaf20e59aa68af3b43a Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 25 May 2009 20:58:00 +0100
Subject: [ARM] barriers: improve xchg, bitops and atomic SMP barriers

Mathieu Desnoyers pointed out that the ARM barriers were lacking:

- cmpxchg, xchg and atomic add return need memory barriers on
  architectures which can reorder the relative order in which memory
  read/writes can be seen between CPUs, which seems to include recent
  ARM architectures. Those barriers are currently missing on ARM.

- test_and_xxx_bit were missing SMP barriers.

So put these barriers in.  Provide separate atomic_add/atomic_sub
operations which do not require barriers.

Reported-Reviewed-and-Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 6116e48..15f8a09 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -114,3 +114,16 @@
 	.align	3;				\
 	.long	9999b,9001f;			\
 	.previous
+
+/*
+ * SMP data memory barrier
+ */
+	.macro	smp_dmb
+#ifdef CONFIG_SMP
+#if __LINUX_ARM_ARCH__ >= 7
+	dmb
+#elif __LINUX_ARM_ARCH__ == 6
+	mcr	p15, 0, r0, c7, c10, 5	@ dmb
+#endif
+#endif
+	.endm
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index ee99723..16b52f3 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -44,11 +44,29 @@ static inline void atomic_set(atomic_t *v, int i)
 	: "cc");
 }
 
+static inline void atomic_add(int i, atomic_t *v)
+{
+	unsigned long tmp;
+	int result;
+
+	__asm__ __volatile__("@ atomic_add\n"
+"1:	ldrex	%0, [%2]\n"
+"	add	%0, %0, %3\n"
+"	strex	%1, %0, [%2]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+	: "=&r" (result), "=&r" (tmp)
+	: "r" (&v->counter), "Ir" (i)
+	: "cc");
+}
+
 static inline int atomic_add_return(int i, atomic_t *v)
 {
 	unsigned long tmp;
 	int result;
 
+	smp_mb();
+
 	__asm__ __volatile__("@ atomic_add_return\n"
 "1:	ldrex	%0, [%2]\n"
 "	add	%0, %0, %3\n"
@@ -59,14 +77,34 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	: "r" (&v->counter), "Ir" (i)
 	: "cc");
 
+	smp_mb();
+
 	return result;
 }
 
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	unsigned long tmp;
+	int result;
+
+	__asm__ __volatile__("@ atomic_sub\n"
+"1:	ldrex	%0, [%2]\n"
+"	sub	%0, %0, %3\n"
+"	strex	%1, %0, [%2]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+	: "=&r" (result), "=&r" (tmp)
+	: "r" (&v->counter), "Ir" (i)
+	: "cc");
+}
+
 static inline int atomic_sub_return(int i, atomic_t *v)
 {
 	unsigned long tmp;
 	int result;
 
+	smp_mb();
+
 	__asm__ __volatile__("@ atomic_sub_return\n"
 "1:	ldrex	%0, [%2]\n"
 "	sub	%0, %0, %3\n"
@@ -77,6 +115,8 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	: "r" (&v->counter), "Ir" (i)
 	: "cc");
 
+	smp_mb();
+
 	return result;
 }
 
@@ -84,6 +124,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 {
 	unsigned long oldval, res;
 
+	smp_mb();
+
 	do {
 		__asm__ __volatile__("@ atomic_cmpxchg\n"
 		"ldrex	%1, [%2]\n"
@@ -95,6 +137,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 		    : "cc");
 	} while (res);
 
+	smp_mb();
+
 	return oldval;
 }
 
@@ -135,6 +179,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 
 	return val;
 }
+#define atomic_add(i, v)	(void) atomic_add_return(i, v)
 
 static inline int atomic_sub_return(int i, atomic_t *v)
 {
@@ -148,6 +193,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 
 	return val;
 }
+#define atomic_sub(i, v)	(void) atomic_sub_return(i, v)
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
@@ -187,10 +233,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 }
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
-#define atomic_add(i, v)	(void) atomic_add_return(i, v)
-#define atomic_inc(v)		(void) atomic_add_return(1, v)
-#define atomic_sub(i, v)	(void) atomic_sub_return(i, v)
-#define atomic_dec(v)		(void) atomic_sub_return(1, v)
+#define atomic_inc(v)		atomic_add(1, v)
+#define atomic_dec(v)		atomic_sub(1, v)
 
 #define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
 #define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
@@ -200,11 +244,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 
 #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
 
-/* Atomic operations are already serializing on ARM */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__after_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_inc()	smp_mb()
 
 #include <asm-generic/atomic.h>
 #endif
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index bd4dc8e..7fce8f3 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -248,6 +248,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	unsigned int tmp;
 #endif
 
+	smp_mb();
+
 	switch (size) {
 #if __LINUX_ARM_ARCH__ >= 6
 	case 1:
@@ -307,6 +309,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		__bad_xchg(ptr, size), ret = 0;
 		break;
 	}
+	smp_mb();
 
 	return ret;
 }
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index d662a2f..83b1da6 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -815,10 +815,7 @@ __kuser_helper_start:
  */
 
 __kuser_memory_barrier:				@ 0xffff0fa0
-
-#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP)
-	mcr	p15, 0, r0, c7, c10, 5	@ dmb
-#endif
+	smp_dmb
 	usr_ret	lr
 
 	.align	5
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 2e787d4..c7f2627 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -18,12 +18,14 @@
 	mov	r2, #1
 	add	r1, r1, r0, lsr #3	@ Get byte offset
 	mov	r3, r2, lsl r3		@ create mask
+	smp_dmb
 1:	ldrexb	r2, [r1]
 	ands	r0, r2, r3		@ save old value of bit
 	\instr	r2, r2, r3			@ toggle bit
 	strexb	ip, r2, [r1]
 	cmp	ip, #0
 	bne	1b
+	smp_dmb
 	cmp	r0, #0
 	movne	r0, #1
 2:	mov	pc, lr
-- 
cgit v0.10.2


From b96d31a62f714566fa6420851b3bb3615c796322 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 27 May 2009 09:37:33 -0400
Subject: cifs: clean up set_cifs_acl interfaces

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7f8e6c4..1403b5d 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -612,57 +612,61 @@ static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 	return pntsd;
 }
 
-/* Set an ACL on the server */
-static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
-				struct inode *inode, const char *path)
+static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
+		struct cifs_ntsd *pnntsd, u32 acllen)
 {
-	struct cifsFileInfo *open_file;
-	bool unlock_file = false;
-	int xid;
-	int rc = -EIO;
-	__u16 fid;
-	struct super_block *sb;
-	struct cifs_sb_info *cifs_sb;
+	int xid, rc;
 
-	cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
+	xid = GetXid();
+	rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
+	FreeXid(xid);
 
-	if (!inode)
-		return rc;
+	cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
+	return rc;
+}
 
-	sb = inode->i_sb;
-	if (sb == NULL)
-		return rc;
+static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
+		struct cifs_ntsd *pnntsd, u32 acllen)
+{
+	int oplock = 0;
+	int xid, rc;
+	__u16 fid;
 
-	cifs_sb = CIFS_SB(sb);
 	xid = GetXid();
 
-	open_file = find_readable_file(CIFS_I(inode));
-	if (open_file) {
-		unlock_file = true;
-		fid = open_file->netfid;
-	} else {
-		int oplock = 0;
-		/* open file */
-		rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
-				WRITE_DAC, 0, &fid, &oplock, NULL,
-				cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc != 0) {
-			cERROR(1, ("Unable to open file to set ACL"));
-			FreeXid(xid);
-			return rc;
-		}
+	rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0,
+			 &fid, &oplock, NULL, cifs_sb->local_nls,
+			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if (rc) {
+		cERROR(1, ("Unable to open file to set ACL"));
+		goto out;
 	}
 
 	rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
 	cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
-	if (unlock_file)
-		atomic_dec(&open_file->wrtPending);
-	else
-		CIFSSMBClose(xid, cifs_sb->tcon, fid);
 
+	CIFSSMBClose(xid, cifs_sb->tcon, fid);
+ out:
 	FreeXid(xid);
+	return rc;
+}
 
+/* Set an ACL on the server */
+static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
+				struct inode *inode, const char *path)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsFileInfo *open_file;
+	int rc;
+
+	cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
+
+	open_file = find_readable_file(CIFS_I(inode));
+	if (!open_file)
+		return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
+
+	rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
+	atomic_dec(&open_file->wrtPending);
 	return rc;
 }
 
-- 
cgit v0.10.2


From ecd322c9b3e4ac70f9f108badde3eb6b99c7993d Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Thu, 28 May 2009 16:07:39 -0400
Subject: [ARM] Add cmpxchg support for ARMv6+ systems (v5)

Add cmpxchg/cmpxchg64 support for ARMv6K and ARMv7 systems
(original patch from Catalin Marinas <catalin.marinas@arm.com>)

The cmpxchg and cmpxchg64 functions can be implemented using the
LDREX*/STREX* instructions. Since operand lengths other than 32bit are
required, the full implementations are only available if the ARMv6K
extensions are present (for the LDREXB, LDREXH and LDREXD instructions).

For ARMv6, only 32-bits cmpxchg is available.

Mathieu :

Make cmpxchg_local always available with best implementation for all type sizes (1, 2, 4 bytes).
Make cmpxchg64_local always available.

Use "Ir" constraint for "old" operand, like atomic.h atomic_cmpxchg does.

Change since v3 :
- Add "memory" clobbers (thanks to Nicolas Pitre)
- removed __asmeq(), only needed for old compilers, very unlikely on ARMv6+.

Note : ARMv7-M should eventually be ifdefed-out of cmpxchg64. But it's not
supported by the Linux kernel currently.

Put back arm < v6 cmpxchg support.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 7fce8f3..d65b2f5 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -319,6 +319,12 @@ extern void enable_hlt(void);
 
 #include <asm-generic/cmpxchg-local.h>
 
+#if __LINUX_ARM_ARCH__ < 6
+
+#ifdef CONFIG_SMP
+#error "SMP is not supported on this platform"
+#endif
+
 /*
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
@@ -332,6 +338,173 @@ extern void enable_hlt(void);
 #include <asm-generic/cmpxchg.h>
 #endif
 
+#else	/* __LINUX_ARM_ARCH__ >= 6 */
+
+extern void __bad_cmpxchg(volatile void *ptr, int size);
+
+/*
+ * cmpxchg only support 32-bits operands on ARMv6.
+ */
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	unsigned long oldval, res;
+
+	switch (size) {
+#ifdef CONFIG_CPU_32v6K
+	case 1:
+		do {
+			asm volatile("@ __cmpxchg1\n"
+			"	ldrexb	%1, [%2]\n"
+			"	mov	%0, #0\n"
+			"	teq	%1, %3\n"
+			"	strexbeq %0, %4, [%2]\n"
+				: "=&r" (res), "=&r" (oldval)
+				: "r" (ptr), "Ir" (old), "r" (new)
+				: "memory", "cc");
+		} while (res);
+		break;
+	case 2:
+		do {
+			asm volatile("@ __cmpxchg1\n"
+			"	ldrexh	%1, [%2]\n"
+			"	mov	%0, #0\n"
+			"	teq	%1, %3\n"
+			"	strexheq %0, %4, [%2]\n"
+				: "=&r" (res), "=&r" (oldval)
+				: "r" (ptr), "Ir" (old), "r" (new)
+				: "memory", "cc");
+		} while (res);
+		break;
+#endif /* CONFIG_CPU_32v6K */
+	case 4:
+		do {
+			asm volatile("@ __cmpxchg4\n"
+			"	ldrex	%1, [%2]\n"
+			"	mov	%0, #0\n"
+			"	teq	%1, %3\n"
+			"	strexeq %0, %4, [%2]\n"
+				: "=&r" (res), "=&r" (oldval)
+				: "r" (ptr), "Ir" (old), "r" (new)
+				: "memory", "cc");
+		} while (res);
+		break;
+	default:
+		__bad_cmpxchg(ptr, size);
+		oldval = 0;
+	}
+
+	return oldval;
+}
+
+static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
+					 unsigned long new, int size)
+{
+	unsigned long ret;
+
+	smp_mb();
+	ret = __cmpxchg(ptr, old, new, size);
+	smp_mb();
+
+	return ret;
+}
+
+#define cmpxchg(ptr,o,n)						\
+	((__typeof__(*(ptr)))__cmpxchg_mb((ptr),			\
+					  (unsigned long)(o),		\
+					  (unsigned long)(n),		\
+					  sizeof(*(ptr))))
+
+static inline unsigned long __cmpxchg_local(volatile void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	unsigned long ret;
+
+	switch (size) {
+#ifndef CONFIG_CPU_32v6K
+	case 1:
+	case 2:
+		ret = __cmpxchg_local_generic(ptr, old, new, size);
+		break;
+#endif	/* !CONFIG_CPU_32v6K */
+	default:
+		ret = __cmpxchg(ptr, old, new, size);
+	}
+
+	return ret;
+}
+
+#define cmpxchg_local(ptr,o,n)						\
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr),			\
+				       (unsigned long)(o),		\
+				       (unsigned long)(n),		\
+				       sizeof(*(ptr))))
+
+#ifdef CONFIG_CPU_32v6K
+
+/*
+ * Note : ARMv7-M (currently unsupported by Linux) does not support
+ * ldrexd/strexd. If ARMv7-M is ever supported by the Linux kernel, it should
+ * not be allowed to use __cmpxchg64.
+ */
+static inline unsigned long long __cmpxchg64(volatile void *ptr,
+					     unsigned long long old,
+					     unsigned long long new)
+{
+	register unsigned long long oldval asm("r0");
+	register unsigned long long __old asm("r2") = old;
+	register unsigned long long __new asm("r4") = new;
+	unsigned long res;
+
+	do {
+		asm volatile(
+		"	@ __cmpxchg8\n"
+		"	ldrexd	%1, %H1, [%2]\n"
+		"	mov	%0, #0\n"
+		"	teq	%1, %3\n"
+		"	teqeq	%H1, %H3\n"
+		"	strexdeq %0, %4, %H4, [%2]\n"
+			: "=&r" (res), "=&r" (oldval)
+			: "r" (ptr), "Ir" (__old), "r" (__new)
+			: "memory", "cc");
+	} while (res);
+
+	return oldval;
+}
+
+static inline unsigned long long __cmpxchg64_mb(volatile void *ptr,
+						unsigned long long old,
+						unsigned long long new)
+{
+	unsigned long long ret;
+
+	smp_mb();
+	ret = __cmpxchg64(ptr, old, new);
+	smp_mb();
+
+	return ret;
+}
+
+#define cmpxchg64(ptr,o,n)						\
+	((__typeof__(*(ptr)))__cmpxchg64_mb((ptr),			\
+					    (unsigned long long)(o),	\
+					    (unsigned long long)(n)))
+
+#define cmpxchg64_local(ptr,o,n)					\
+	((__typeof__(*(ptr)))__cmpxchg64((ptr),				\
+					 (unsigned long long)(o),	\
+					 (unsigned long long)(n)))
+
+#else	/* !CONFIG_CPU_32v6K */
+
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+
+#endif	/* CONFIG_CPU_32v6K */
+
+#endif	/* __LINUX_ARM_ARCH__ >= 6 */
+
 #endif /* __ASSEMBLY__ */
 
 #define arch_align_stack(x) (x)
-- 
cgit v0.10.2


From 0815f8eaae6d11b9dae6c2ff0202d7945f8d7cd2 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Date: Thu, 28 May 2009 13:23:51 -0700
Subject: ARM: OMAP2/3: DMA: implement trans copy and const fill

Implement transparent copy and constant fill features for OMAP2/3.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index 7fc8c04..58d98ad 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -310,41 +310,62 @@ EXPORT_SYMBOL(omap_set_dma_transfer_params);
 
 void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode, u32 color)
 {
-	u16 w;
-
 	BUG_ON(omap_dma_in_1510_mode());
 
-	if (cpu_class_is_omap2()) {
-		REVISIT_24XX();
-		return;
-	}
+	if (cpu_class_is_omap1()) {
+		u16 w;
 
-	w = dma_read(CCR2(lch));
-	w &= ~0x03;
+		w = dma_read(CCR2(lch));
+		w &= ~0x03;
 
-	switch (mode) {
-	case OMAP_DMA_CONSTANT_FILL:
-		w |= 0x01;
-		break;
-	case OMAP_DMA_TRANSPARENT_COPY:
-		w |= 0x02;
-		break;
-	case OMAP_DMA_COLOR_DIS:
-		break;
-	default:
-		BUG();
+		switch (mode) {
+		case OMAP_DMA_CONSTANT_FILL:
+			w |= 0x01;
+			break;
+		case OMAP_DMA_TRANSPARENT_COPY:
+			w |= 0x02;
+			break;
+		case OMAP_DMA_COLOR_DIS:
+			break;
+		default:
+			BUG();
+		}
+		dma_write(w, CCR2(lch));
+
+		w = dma_read(LCH_CTRL(lch));
+		w &= ~0x0f;
+		/* Default is channel type 2D */
+		if (mode) {
+			dma_write((u16)color, COLOR_L(lch));
+			dma_write((u16)(color >> 16), COLOR_U(lch));
+			w |= 1;		/* Channel type G */
+		}
+		dma_write(w, LCH_CTRL(lch));
 	}
-	dma_write(w, CCR2(lch));
 
-	w = dma_read(LCH_CTRL(lch));
-	w &= ~0x0f;
-	/* Default is channel type 2D */
-	if (mode) {
-		dma_write((u16)color, COLOR_L(lch));
-		dma_write((u16)(color >> 16), COLOR_U(lch));
-		w |= 1;		/* Channel type G */
+	if (cpu_class_is_omap2()) {
+		u32 val;
+
+		val = dma_read(CCR(lch));
+		val &= ~((1 << 17) | (1 << 16));
+
+		switch (mode) {
+		case OMAP_DMA_CONSTANT_FILL:
+			val |= 1 << 16;
+			break;
+		case OMAP_DMA_TRANSPARENT_COPY:
+			val |= 1 << 17;
+			break;
+		case OMAP_DMA_COLOR_DIS:
+			break;
+		default:
+			BUG();
+		}
+		dma_write(val, CCR(lch));
+
+		color &= 0xffffff;
+		dma_write(color, COLOR(lch));
 	}
-	dma_write(w, LCH_CTRL(lch));
 }
 EXPORT_SYMBOL(omap_set_dma_color_mode);
 
diff --git a/arch/arm/plat-omap/include/mach/dma.h b/arch/arm/plat-omap/include/mach/dma.h
index 54fe966..35fefdb 100644
--- a/arch/arm/plat-omap/include/mach/dma.h
+++ b/arch/arm/plat-omap/include/mach/dma.h
@@ -144,6 +144,7 @@
 #define OMAP_DMA4_CSSA_U(n)		0
 #define OMAP_DMA4_CDSA_L(n)		0
 #define OMAP_DMA4_CDSA_U(n)		0
+#define OMAP1_DMA_COLOR(n)		0
 
 /*----------------------------------------------------------------------------*/
 
-- 
cgit v0.10.2


From 279b918d726a66c61c9dc7aec8b1fb035d40fdfc Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 28 May 2009 13:23:52 -0700
Subject: ARM: OMAP2/3: sDMA: Correct omap_request_dma_chain(), v2

Original OMAP DMA chaining design had chain_id as one of the callback
parameters. Patch 538528de0cb256f65716ab2e9613d9e920f97fe2 changed it
to use logical channel instead.

Correct the naming for callback to also use logical channel number
instead of the chain_id.

More details are on this email thread:

http://marc.info/?l=linux-omap&m=122961071931459&w=2

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index 58d98ad..06e9cbe 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -1220,7 +1220,7 @@ static void create_dma_lch_chain(int lch_head, int lch_queue)
  * 	     Failure: -EINVAL/-ENOMEM
  */
 int omap_request_dma_chain(int dev_id, const char *dev_name,
-			   void (*callback) (int chain_id, u16 ch_status,
+			   void (*callback) (int lch, u16 ch_status,
 					     void *data),
 			   int *chain_id, int no_of_chans, int chain_mode,
 			   struct omap_dma_channel_params params)
diff --git a/arch/arm/plat-omap/include/mach/dma.h b/arch/arm/plat-omap/include/mach/dma.h
index 35fefdb..19df76f 100644
--- a/arch/arm/plat-omap/include/mach/dma.h
+++ b/arch/arm/plat-omap/include/mach/dma.h
@@ -532,7 +532,7 @@ extern int omap_get_dma_index(int lch, int *ei, int *fi);
 /* Chaining APIs */
 #ifndef CONFIG_ARCH_OMAP1
 extern int omap_request_dma_chain(int dev_id, const char *dev_name,
-				  void (*callback) (int chain_id, u16 ch_status,
+				  void (*callback) (int lch, u16 ch_status,
 						    void *data),
 				  int *chain_id, int no_of_chans,
 				  int chain_mode,
-- 
cgit v0.10.2


From aa62e90fe0700c037675926fff9f75b0b1c00d78 Mon Sep 17 00:00:00 2001
From: Juha Yrjola <juha.yrjola@solidboot.com>
Date: Thu, 28 May 2009 13:23:52 -0700
Subject: ARM: OMAP2/3: Add generic onenand support when connected to GPMC

Add generic onenand support when connected to GPMC and make the
boards to use it.

The patch has been modified to make it more generic to support all
the boards with GPMC. The patch also remove unused prototype for
omap2_onenand_rephase(void).

Note that board-apollon.c is currently using the MTD_ONENAND_GENERIC
and setting the GPMC timings in the bootloader. Setting the GPMC
timings in the bootloader will not allow supporting frequency
scaling for the onenand source clock.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/configs/rx51_defconfig b/arch/arm/configs/rx51_defconfig
index 593102da..eb2cb31 100644
--- a/arch/arm/configs/rx51_defconfig
+++ b/arch/arm/configs/rx51_defconfig
@@ -282,7 +282,7 @@ CONFIG_ALIGNMENT_TRAP=y
 #
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="init=/sbin/preinit ubi.mtd=4 root=ubi0:rootfs rootfstype=ubifs rw console=ttyMTD5"
+CONFIG_CMDLINE="init=/sbin/preinit ubi.mtd=rootfs root=ubi0:rootfs rootfstype=ubifs rootflags=bulk_read,no_chk_data_crc rw console=ttyMTD,log console=tty0"
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index bf3827a..11f7f66 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -58,3 +58,6 @@ obj-$(CONFIG_MACH_NOKIA_RX51)		+= board-rx51.o \
 ifeq ($(CONFIG_USB_MUSB_SOC),y)
 obj-y					+= usb-musb.o
 endif
+
+onenand-$(CONFIG_MTD_ONENAND_OMAP2)	:= gpmc-onenand.o
+obj-y					+= $(onenand-m) $(onenand-y)
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index a738172..ca18ae9 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -27,6 +27,7 @@
 #include <mach/dma.h>
 #include <mach/gpmc.h>
 #include <mach/keypad.h>
+#include <mach/onenand.h>
 
 #include "mmc-twl4030.h"
 
@@ -408,6 +409,62 @@ static int __init rx51_i2c_init(void)
 	return 0;
 }
 
+#if defined(CONFIG_MTD_ONENAND_OMAP2) || \
+	defined(CONFIG_MTD_ONENAND_OMAP2_MODULE)
+
+static struct mtd_partition onenand_partitions[] = {
+	{
+		.name           = "bootloader",
+		.offset         = 0,
+		.size           = 0x20000,
+		.mask_flags     = MTD_WRITEABLE,	/* Force read-only */
+	},
+	{
+		.name           = "config",
+		.offset         = MTDPART_OFS_APPEND,
+		.size           = 0x60000,
+	},
+	{
+		.name           = "log",
+		.offset         = MTDPART_OFS_APPEND,
+		.size           = 0x40000,
+	},
+	{
+		.name           = "kernel",
+		.offset         = MTDPART_OFS_APPEND,
+		.size           = 0x200000,
+	},
+	{
+		.name           = "initfs",
+		.offset         = MTDPART_OFS_APPEND,
+		.size           = 0x200000,
+	},
+	{
+		.name           = "rootfs",
+		.offset         = MTDPART_OFS_APPEND,
+		.size           = MTDPART_SIZ_FULL,
+	},
+};
+
+static struct omap_onenand_platform_data board_onenand_data = {
+	.cs		= 0,
+	.gpio_irq	= 65,
+	.parts		= onenand_partitions,
+	.nr_parts	= ARRAY_SIZE(onenand_partitions),
+};
+
+static void __init board_onenand_init(void)
+{
+	gpmc_onenand_init(&board_onenand_data);
+}
+
+#else
+
+static inline void board_onenand_init(void)
+{
+}
+
+#endif
 
 void __init rx51_peripherals_init(void)
 {
@@ -415,5 +472,6 @@ void __init rx51_peripherals_init(void)
 				ARRAY_SIZE(rx51_peripherals_devices));
 	rx51_i2c_init();
 	rx51_init_smc91x();
+	board_onenand_init();
 }
 
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
new file mode 100644
index 0000000..2fd22f9
--- /dev/null
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -0,0 +1,330 @@
+/*
+ * linux/arch/arm/mach-omap2/gpmc-onenand.c
+ *
+ * Copyright (C) 2006 - 2009 Nokia Corporation
+ * Contacts:	Juha Yrjola
+ *		Tony Lindgren
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/onenand_regs.h>
+#include <linux/io.h>
+
+#include <asm/mach/flash.h>
+
+#include <mach/onenand.h>
+#include <mach/board.h>
+#include <mach/gpmc.h>
+
+static struct omap_onenand_platform_data *gpmc_onenand_data;
+
+static struct platform_device gpmc_onenand_device = {
+	.name		= "omap2-onenand",
+	.id		= -1,
+};
+
+static int omap2_onenand_set_async_mode(int cs, void __iomem *onenand_base)
+{
+	struct gpmc_timings t;
+
+	const int t_cer = 15;
+	const int t_avdp = 12;
+	const int t_aavdh = 7;
+	const int t_ce = 76;
+	const int t_aa = 76;
+	const int t_oe = 20;
+	const int t_cez = 20; /* max of t_cez, t_oez */
+	const int t_ds = 30;
+	const int t_wpl = 40;
+	const int t_wph = 30;
+
+	memset(&t, 0, sizeof(t));
+	t.sync_clk = 0;
+	t.cs_on = 0;
+	t.adv_on = 0;
+
+	/* Read */
+	t.adv_rd_off = gpmc_round_ns_to_ticks(max_t(int, t_avdp, t_cer));
+	t.oe_on  = t.adv_rd_off + gpmc_round_ns_to_ticks(t_aavdh);
+	t.access = t.adv_on + gpmc_round_ns_to_ticks(t_aa);
+	t.access = max_t(int, t.access, t.cs_on + gpmc_round_ns_to_ticks(t_ce));
+	t.access = max_t(int, t.access, t.oe_on + gpmc_round_ns_to_ticks(t_oe));
+	t.oe_off = t.access + gpmc_round_ns_to_ticks(1);
+	t.cs_rd_off = t.oe_off;
+	t.rd_cycle  = t.cs_rd_off + gpmc_round_ns_to_ticks(t_cez);
+
+	/* Write */
+	t.adv_wr_off = t.adv_rd_off;
+	t.we_on  = t.oe_on;
+	if (cpu_is_omap34xx()) {
+		t.wr_data_mux_bus = t.we_on;
+		t.wr_access = t.we_on + gpmc_round_ns_to_ticks(t_ds);
+	}
+	t.we_off = t.we_on + gpmc_round_ns_to_ticks(t_wpl);
+	t.cs_wr_off = t.we_off + gpmc_round_ns_to_ticks(t_wph);
+	t.wr_cycle  = t.cs_wr_off + gpmc_round_ns_to_ticks(t_cez);
+
+	/* Configure GPMC for asynchronous read */
+	gpmc_cs_write_reg(cs, GPMC_CS_CONFIG1,
+			  GPMC_CONFIG1_DEVICESIZE_16 |
+			  GPMC_CONFIG1_MUXADDDATA);
+
+	return gpmc_cs_set_timings(cs, &t);
+}
+
+static void set_onenand_cfg(void __iomem *onenand_base, int latency,
+				int sync_read, int sync_write, int hf)
+{
+	u32 reg;
+
+	reg = readw(onenand_base + ONENAND_REG_SYS_CFG1);
+	reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9));
+	reg |=	(latency << ONENAND_SYS_CFG1_BRL_SHIFT) |
+		ONENAND_SYS_CFG1_BL_16;
+	if (sync_read)
+		reg |= ONENAND_SYS_CFG1_SYNC_READ;
+	else
+		reg &= ~ONENAND_SYS_CFG1_SYNC_READ;
+	if (sync_write)
+		reg |= ONENAND_SYS_CFG1_SYNC_WRITE;
+	else
+		reg &= ~ONENAND_SYS_CFG1_SYNC_WRITE;
+	if (hf)
+		reg |= ONENAND_SYS_CFG1_HF;
+	else
+		reg &= ~ONENAND_SYS_CFG1_HF;
+	writew(reg, onenand_base + ONENAND_REG_SYS_CFG1);
+}
+
+static int omap2_onenand_set_sync_mode(struct omap_onenand_platform_data *cfg,
+					void __iomem *onenand_base,
+					int freq)
+{
+	struct gpmc_timings t;
+	const int t_cer  = 15;
+	const int t_avdp = 12;
+	const int t_cez  = 20; /* max of t_cez, t_oez */
+	const int t_ds   = 30;
+	const int t_wpl  = 40;
+	const int t_wph  = 30;
+	int min_gpmc_clk_period, t_ces, t_avds, t_avdh, t_ach, t_aavdh, t_rdyo;
+	int tick_ns, div, fclk_offset_ns, fclk_offset, gpmc_clk_ns, latency;
+	int first_time = 0, hf = 0, sync_read = 0, sync_write = 0;
+	int err, ticks_cez;
+	int cs = cfg->cs;
+	u32 reg;
+
+	if (cfg->flags & ONENAND_SYNC_READ) {
+		sync_read = 1;
+	} else if (cfg->flags & ONENAND_SYNC_READWRITE) {
+		sync_read = 1;
+		sync_write = 1;
+	}
+
+	if (!freq) {
+		/* Very first call freq is not known */
+		err = omap2_onenand_set_async_mode(cs, onenand_base);
+		if (err)
+			return err;
+		reg = readw(onenand_base + ONENAND_REG_VERSION_ID);
+		switch ((reg >> 4) & 0xf) {
+		case 0:
+			freq = 40;
+			break;
+		case 1:
+			freq = 54;
+			break;
+		case 2:
+			freq = 66;
+			break;
+		case 3:
+			freq = 83;
+			break;
+		case 4:
+			freq = 104;
+			break;
+		default:
+			freq = 54;
+			break;
+		}
+		first_time = 1;
+	}
+
+	switch (freq) {
+	case 83:
+		min_gpmc_clk_period = 12; /* 83 MHz */
+		t_ces   = 5;
+		t_avds  = 4;
+		t_avdh  = 2;
+		t_ach   = 6;
+		t_aavdh = 6;
+		t_rdyo  = 9;
+		break;
+	case 66:
+		min_gpmc_clk_period = 15; /* 66 MHz */
+		t_ces   = 6;
+		t_avds  = 5;
+		t_avdh  = 2;
+		t_ach   = 6;
+		t_aavdh = 6;
+		t_rdyo  = 11;
+		break;
+	default:
+		min_gpmc_clk_period = 18; /* 54 MHz */
+		t_ces   = 7;
+		t_avds  = 7;
+		t_avdh  = 7;
+		t_ach   = 9;
+		t_aavdh = 7;
+		t_rdyo  = 15;
+		sync_write = 0;
+		break;
+	}
+
+	tick_ns = gpmc_ticks_to_ns(1);
+	div = gpmc_cs_calc_divider(cs, min_gpmc_clk_period);
+	gpmc_clk_ns = gpmc_ticks_to_ns(div);
+	if (gpmc_clk_ns < 15) /* >66Mhz */
+		hf = 1;
+	if (hf)
+		latency = 6;
+	else if (gpmc_clk_ns >= 25) /* 40 MHz*/
+		latency = 3;
+	else
+		latency = 4;
+
+	if (first_time)
+		set_onenand_cfg(onenand_base, latency,
+					sync_read, sync_write, hf);
+
+	if (div == 1) {
+		reg = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG2);
+		reg |= (1 << 7);
+		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG2, reg);
+		reg = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG3);
+		reg |= (1 << 7);
+		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG3, reg);
+		reg = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG4);
+		reg |= (1 << 7);
+		reg |= (1 << 23);
+		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG4, reg);
+	} else {
+		reg = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG2);
+		reg &= ~(1 << 7);
+		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG2, reg);
+		reg = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG3);
+		reg &= ~(1 << 7);
+		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG3, reg);
+		reg = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG4);
+		reg &= ~(1 << 7);
+		reg &= ~(1 << 23);
+		gpmc_cs_write_reg(cs, GPMC_CS_CONFIG4, reg);
+	}
+
+	/* Set synchronous read timings */
+	memset(&t, 0, sizeof(t));
+	t.sync_clk = min_gpmc_clk_period;
+	t.cs_on = 0;
+	t.adv_on = 0;
+	fclk_offset_ns = gpmc_round_ns_to_ticks(max_t(int, t_ces, t_avds));
+	fclk_offset = gpmc_ns_to_ticks(fclk_offset_ns);
+	t.page_burst_access = gpmc_clk_ns;
+
+	/* Read */
+	t.adv_rd_off = gpmc_ticks_to_ns(fclk_offset + gpmc_ns_to_ticks(t_avdh));
+	t.oe_on = gpmc_ticks_to_ns(fclk_offset + gpmc_ns_to_ticks(t_ach));
+	t.access = gpmc_ticks_to_ns(fclk_offset + (latency + 1) * div);
+	t.oe_off = t.access + gpmc_round_ns_to_ticks(1);
+	t.cs_rd_off = t.oe_off;
+	ticks_cez = ((gpmc_ns_to_ticks(t_cez) + div - 1) / div) * div;
+	t.rd_cycle = gpmc_ticks_to_ns(fclk_offset + (latency + 1) * div +
+		     ticks_cez);
+
+	/* Write */
+	if (sync_write) {
+		t.adv_wr_off = t.adv_rd_off;
+		t.we_on  = 0;
+		t.we_off = t.cs_rd_off;
+		t.cs_wr_off = t.cs_rd_off;
+		t.wr_cycle  = t.rd_cycle;
+		if (cpu_is_omap34xx()) {
+			t.wr_data_mux_bus = gpmc_ticks_to_ns(fclk_offset +
+					gpmc_ns_to_ticks(min_gpmc_clk_period +
+					t_rdyo));
+			t.wr_access = t.access;
+		}
+	} else {
+		t.adv_wr_off = gpmc_round_ns_to_ticks(max_t(int,
+							t_avdp, t_cer));
+		t.we_on  = t.adv_wr_off + gpmc_round_ns_to_ticks(t_aavdh);
+		t.we_off = t.we_on + gpmc_round_ns_to_ticks(t_wpl);
+		t.cs_wr_off = t.we_off + gpmc_round_ns_to_ticks(t_wph);
+		t.wr_cycle  = t.cs_wr_off + gpmc_round_ns_to_ticks(t_cez);
+		if (cpu_is_omap34xx()) {
+			t.wr_data_mux_bus = t.we_on;
+			t.wr_access = t.we_on + gpmc_round_ns_to_ticks(t_ds);
+		}
+	}
+
+	/* Configure GPMC for synchronous read */
+	gpmc_cs_write_reg(cs, GPMC_CS_CONFIG1,
+			  GPMC_CONFIG1_WRAPBURST_SUPP |
+			  GPMC_CONFIG1_READMULTIPLE_SUPP |
+			  (sync_read ? GPMC_CONFIG1_READTYPE_SYNC : 0) |
+			  (sync_write ? GPMC_CONFIG1_WRITEMULTIPLE_SUPP : 0) |
+			  (sync_write ? GPMC_CONFIG1_WRITETYPE_SYNC : 0) |
+			  GPMC_CONFIG1_CLKACTIVATIONTIME(fclk_offset) |
+			  GPMC_CONFIG1_PAGE_LEN(2) |
+			  (cpu_is_omap34xx() ? 0 :
+				(GPMC_CONFIG1_WAIT_READ_MON |
+				 GPMC_CONFIG1_WAIT_PIN_SEL(0))) |
+			  GPMC_CONFIG1_DEVICESIZE_16 |
+			  GPMC_CONFIG1_DEVICETYPE_NOR |
+			  GPMC_CONFIG1_MUXADDDATA);
+
+	err = gpmc_cs_set_timings(cs, &t);
+	if (err)
+		return err;
+
+	set_onenand_cfg(onenand_base, latency, sync_read, sync_write, hf);
+
+	return 0;
+}
+
+static int gpmc_onenand_setup(void __iomem *onenand_base, int freq)
+{
+	struct device *dev = &gpmc_onenand_device.dev;
+
+	/* Set sync timings in GPMC */
+	if (omap2_onenand_set_sync_mode(gpmc_onenand_data, onenand_base,
+			freq) < 0) {
+		dev_err(dev, "Unable to set synchronous mode\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void __init gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
+{
+	gpmc_onenand_data = _onenand_data;
+	gpmc_onenand_data->onenand_setup = gpmc_onenand_setup;
+	gpmc_onenand_device.dev.platform_data = gpmc_onenand_data;
+
+	if (cpu_is_omap24xx() &&
+			(gpmc_onenand_data->flags & ONENAND_SYNC_READWRITE)) {
+		printk(KERN_ERR "Onenand using only SYNC_READ on 24xx\n");
+		gpmc_onenand_data->flags &= ~ONENAND_SYNC_READWRITE;
+		gpmc_onenand_data->flags |= ONENAND_SYNC_READ;
+	}
+
+	if (platform_device_register(&gpmc_onenand_device) < 0) {
+		printk(KERN_ERR "Unable to register OneNAND device\n");
+		return;
+	}
+}
diff --git a/arch/arm/plat-omap/include/mach/onenand.h b/arch/arm/plat-omap/include/mach/onenand.h
index 4649d30..72f433d 100644
--- a/arch/arm/plat-omap/include/mach/onenand.h
+++ b/arch/arm/plat-omap/include/mach/onenand.h
@@ -9,8 +9,12 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
+#define ONENAND_SYNC_READ	(1 << 0)
+#define ONENAND_SYNC_READWRITE	(1 << 1)
+
 struct omap_onenand_platform_data {
 	int			cs;
 	int			gpio_irq;
@@ -18,8 +22,22 @@ struct omap_onenand_platform_data {
 	int			nr_parts;
 	int                     (*onenand_setup)(void __iomem *, int freq);
 	int			dma_channel;
+	u8			flags;
 };
 
-int omap2_onenand_rephase(void);
-
 #define ONENAND_MAX_PARTITIONS 8
+
+#if defined(CONFIG_MTD_ONENAND_OMAP2) || \
+	defined(CONFIG_MTD_ONENAND_OMAP2_MODULE)
+
+extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+
+#else
+
+#define board_onenand_data	NULL
+
+static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+{
+}
+
+#endif
-- 
cgit v0.10.2


From 1a48e1575188d4023b3428b623caeefe8c599e79 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 28 May 2009 13:23:52 -0700
Subject: ARM: OMAP2/3: Add generic smc91x support when connected to GPMC

Convert the board-rx51 smc91x code to be generic and make
the boards to use it. This allows future recalculation of the
timings when the source clock gets scaled.

Also correct the rx51 interrupt to be IORESOURCE_IRQ_HIGHLEVEL.

Thanks to Paul Walmsley <paul@pwsan.com> for better GPMC timing
calculations.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 11f7f66..e4f6f61 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -61,3 +61,6 @@ endif
 
 onenand-$(CONFIG_MTD_ONENAND_OMAP2)	:= gpmc-onenand.o
 obj-y					+= $(onenand-m) $(onenand-y)
+
+smc91x-$(CONFIG_SMC91X)			:= gpmc-smc91x.o
+obj-y					+= $(smc91x-m) $(smc91x-y)
diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index 2214365..ef70b79 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -36,14 +36,11 @@
 #include <mach/common.h>
 #include <mach/gpmc.h>
 #include <mach/usb.h>
+#include <mach/gpmc-smc91x.h>
 
 #include "mmc-twl4030.h"
 
 #define SDP2430_CS0_BASE	0x04000000
-#define	SDP2430_FLASH_CS	0
-#define	SDP2430_SMC91X_CS	5
-
-#define SDP2430_ETHR_GPIO_IRQ		149
 
 static struct mtd_partition sdp2430_partitions[] = {
 	/* bootloader (U-Boot, etc) in first sector */
@@ -99,100 +96,43 @@ static struct platform_device sdp2430_flash_device = {
 	.resource	= &sdp2430_flash_resource,
 };
 
-static struct resource sdp2430_smc91x_resources[] = {
-	[0] = {
-		.start	= SDP2430_CS0_BASE,
-		.end	= SDP2430_CS0_BASE + SZ_64M - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= OMAP_GPIO_IRQ(SDP2430_ETHR_GPIO_IRQ),
-		.end	= OMAP_GPIO_IRQ(SDP2430_ETHR_GPIO_IRQ),
-		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
-	},
-};
-
-static struct platform_device sdp2430_smc91x_device = {
-	.name		= "smc91x",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(sdp2430_smc91x_resources),
-	.resource	= sdp2430_smc91x_resources,
-};
-
 static struct platform_device *sdp2430_devices[] __initdata = {
-	&sdp2430_smc91x_device,
 	&sdp2430_flash_device,
 };
 
-static inline void __init sdp2430_init_smc91x(void)
-{
-	int eth_cs;
-	unsigned long cs_mem_base;
-	unsigned int rate;
-	struct clk *gpmc_fck;
-
-	eth_cs = SDP2430_SMC91X_CS;
+#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91x_MODULE)
 
-	gpmc_fck = clk_get(NULL, "gpmc_fck");	/* Always on ENABLE_ON_INIT */
-	if (IS_ERR(gpmc_fck)) {
-		WARN_ON(1);
-		return;
-	}
+static struct omap_smc91x_platform_data board_smc91x_data = {
+	.cs		= 5,
+	.gpio_irq	= 149,
+	.flags		= GPMC_MUX_ADD_DATA | GPMC_TIMINGS_SMC91C96 |
+				IORESOURCE_IRQ_LOWLEVEL,
 
-	clk_enable(gpmc_fck);
-	rate = clk_get_rate(gpmc_fck);
-
-	/* Make sure CS1 timings are correct, for 2430 always muxed */
-	gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG1, 0x00011200);
-
-	if (rate >= 160000000) {
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f01);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080803);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1c0b1c0a);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x041f1F1F);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000004C4);
-	} else if (rate >= 130000000) {
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f00);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080802);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1C091C09);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x041f1F1F);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000004C4);
-	} else { /* rate = 100000000 */
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f00);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080802);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1C091C09);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x031A1F1F);
-		gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000003C2);
-	}
+};
 
-	if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) {
-		printk(KERN_ERR "Failed to request GPMC mem for smc91x\n");
-		goto out;
-	}
+static void __init board_smc91x_init(void)
+{
+	if (omap_rev() > OMAP3430_REV_ES1_0)
+		board_smc91x_data.gpio_irq = 6;
+	else
+		board_smc91x_data.gpio_irq = 29;
 
-	sdp2430_smc91x_resources[0].start = cs_mem_base + 0x300;
-	sdp2430_smc91x_resources[0].end = cs_mem_base + 0x30f;
-	udelay(100);
+	gpmc_smc91x_init(&board_smc91x_data);
+}
 
-	if (gpio_request(SDP2430_ETHR_GPIO_IRQ, "SMC91x irq") < 0) {
-		printk(KERN_ERR "Failed to request GPIO%d for smc91x IRQ\n",
-			SDP2430_ETHR_GPIO_IRQ);
-		gpmc_cs_free(eth_cs);
-		goto out;
-	}
-	gpio_direction_input(SDP2430_ETHR_GPIO_IRQ);
+#else
 
-out:
-	clk_disable(gpmc_fck);
-	clk_put(gpmc_fck);
+static inline void board_smc91x_init(void)
+{
 }
 
+#endif
+
 static void __init omap_2430sdp_init_irq(void)
 {
 	omap2_init_common_hw(NULL);
 	omap_init_irq();
 	omap_gpio_init();
-	sdp2430_init_smc91x();
 }
 
 static struct omap_uart_config sdp2430_uart_config __initdata = {
@@ -256,6 +196,7 @@ static void __init omap_2430sdp_init(void)
 	omap_serial_init();
 	twl4030_mmc_init(mmc);
 	usb_musb_init();
+	board_smc91x_init();
 }
 
 static void __init omap_2430sdp_map_io(void)
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index ed92749..0e63695 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -39,15 +39,12 @@
 
 #include <mach/control.h>
 #include <mach/keypad.h>
+#include <mach/gpmc-smc91x.h>
 
 #include "mmc-twl4030.h"
 
 #define CONFIG_DISABLE_HFCLK 1
 
-#define SDP3430_ETHR_GPIO_IRQ_SDPV1	29
-#define SDP3430_ETHR_GPIO_IRQ_SDPV2	6
-#define SDP3430_SMC91X_CS		3
-
 #define SDP3430_TS_GPIO_IRQ_SDPV1	3
 #define SDP3430_TS_GPIO_IRQ_SDPV2	2
 
@@ -56,24 +53,6 @@
 
 #define TWL4030_MSECURE_GPIO 22
 
-static struct resource sdp3430_smc91x_resources[] = {
-	[0] = {
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= 0,
-		.end	= 0,
-		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
-	},
-};
-
-static struct platform_device sdp3430_smc91x_device = {
-	.name		= "smc91x",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(sdp3430_smc91x_resources),
-	.resource	= sdp3430_smc91x_resources,
-};
-
 static int sdp3430_keymap[] = {
 	KEY(0, 0, KEY_LEFT),
 	KEY(0, 1, KEY_RIGHT),
@@ -184,48 +163,14 @@ static struct regulator_consumer_supply sdp3430_vdvi_supply = {
 };
 
 static struct platform_device *sdp3430_devices[] __initdata = {
-	&sdp3430_smc91x_device,
 	&sdp3430_lcd_device,
 };
 
-static inline void __init sdp3430_init_smc91x(void)
-{
-	int eth_cs;
-	unsigned long cs_mem_base;
-	int eth_gpio = 0;
-
-	eth_cs = SDP3430_SMC91X_CS;
-
-	if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) {
-		printk(KERN_ERR "Failed to request GPMC mem for smc91x\n");
-		return;
-	}
-
-	sdp3430_smc91x_resources[0].start = cs_mem_base + 0x300;
-	sdp3430_smc91x_resources[0].end = cs_mem_base + 0x30f;
-	udelay(100);
-
-	if (omap_rev() > OMAP3430_REV_ES1_0)
-		eth_gpio = SDP3430_ETHR_GPIO_IRQ_SDPV2;
-	else
-		eth_gpio = SDP3430_ETHR_GPIO_IRQ_SDPV1;
-
-	sdp3430_smc91x_resources[1].start = gpio_to_irq(eth_gpio);
-
-	if (gpio_request(eth_gpio, "SMC91x irq") < 0) {
-		printk(KERN_ERR "Failed to request GPIO%d for smc91x IRQ\n",
-			eth_gpio);
-		return;
-	}
-	gpio_direction_input(eth_gpio);
-}
-
 static void __init omap_3430sdp_init_irq(void)
 {
 	omap2_init_common_hw(NULL);
 	omap_init_irq();
 	omap_gpio_init();
-	sdp3430_init_smc91x();
 }
 
 static struct omap_uart_config sdp3430_uart_config __initdata = {
@@ -506,6 +451,32 @@ static int __init omap3430_i2c_init(void)
 	return 0;
 }
 
+#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91X_MODULE)
+
+static struct omap_smc91x_platform_data board_smc91x_data = {
+	.cs		= 3,
+	.flags		= GPMC_MUX_ADD_DATA | GPMC_TIMINGS_SMC91C96 |
+				IORESOURCE_IRQ_LOWLEVEL,
+};
+
+static void __init board_smc91x_init(void)
+{
+	if (omap_rev() > OMAP3430_REV_ES1_0)
+		board_smc91x_data.gpio_irq = 6;
+	else
+		board_smc91x_data.gpio_irq = 29;
+
+	gpmc_smc91x_init(&board_smc91x_data);
+}
+
+#else
+
+static inline void board_smc91x_init(void)
+{
+}
+
+#endif
+
 static void __init omap_3430sdp_init(void)
 {
 	omap3430_i2c_init();
@@ -522,6 +493,7 @@ static void __init omap_3430sdp_init(void)
 	ads7846_dev_init();
 	omap_serial_init();
 	usb_musb_init();
+	board_smc91x_init();
 }
 
 static void __init omap_3430sdp_map_io(void)
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index ca18ae9..233c745 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -28,31 +28,10 @@
 #include <mach/gpmc.h>
 #include <mach/keypad.h>
 #include <mach/onenand.h>
+#include <mach/gpmc-smc91x.h>
 
 #include "mmc-twl4030.h"
 
-
-#define SMC91X_CS			1
-#define SMC91X_GPIO_IRQ			54
-#define SMC91X_GPIO_RESET		164
-#define SMC91X_GPIO_PWRDWN		86
-
-static struct resource rx51_smc91x_resources[] = {
-	[0] = {
-		.flags		= IORESOURCE_MEM,
-	},
-	[1] = {
-		.flags		= IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE,
-	},
-};
-
-static struct platform_device rx51_smc91x_device = {
-	.name		= "smc91x",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(rx51_smc91x_resources),
-	.resource	= rx51_smc91x_resources,
-};
-
 static int rx51_keymap[] = {
 	KEY(0, 0, KEY_Q),
 	KEY(0, 1, KEY_W),
@@ -108,98 +87,6 @@ static struct twl4030_keypad_data rx51_kp_data = {
 	.rep		= 1,
 };
 
-static struct platform_device *rx51_peripherals_devices[] = {
-	&rx51_smc91x_device,
-};
-
-/*
- * Timings are taken from smsc-lan91c96-ms.pdf
- */
-static int smc91x_init_gpmc(int cs)
-{
-	struct gpmc_timings t;
-	const int t2_r = 45;		/* t2 in Figure 12.10 */
-	const int t2_w = 30;		/* t2 in Figure 12.11 */
-	const int t3 = 15;		/* t3 in Figure 12.10 */
-	const int t5_r = 0;		/* t5 in Figure 12.10 */
-	const int t6_r = 45;		/* t6 in Figure 12.10 */
-	const int t6_w = 0;		/* t6 in Figure 12.11 */
-	const int t7_w = 15;		/* t7 in Figure 12.11 */
-	const int t15 = 12;		/* t15 in Figure 12.2 */
-	const int t20 = 185;		/* t20 in Figure 12.2 */
-
-	memset(&t, 0, sizeof(t));
-
-	t.cs_on = t15;
-	t.cs_rd_off = t3 + t2_r + t5_r;	/* Figure 12.10 */
-	t.cs_wr_off = t3 + t2_w + t6_w;	/* Figure 12.11 */
-	t.adv_on = t3;			/* Figure 12.10 */
-	t.adv_rd_off = t3 + t2_r;	/* Figure 12.10 */
-	t.adv_wr_off = t3 + t2_w;	/* Figure 12.11 */
-	t.oe_off = t3 + t2_r + t5_r;	/* Figure 12.10 */
-	t.oe_on = t.oe_off - t6_r;	/* Figure 12.10 */
-	t.we_off = t3 + t2_w + t6_w;	/* Figure 12.11 */
-	t.we_on = t.we_off - t7_w;	/* Figure 12.11 */
-	t.rd_cycle = t20;		/* Figure 12.2 */
-	t.wr_cycle = t20;		/* Figure 12.4 */
-	t.access = t3 + t2_r + t5_r;	/* Figure 12.10 */
-	t.wr_access = t3 + t2_w + t6_w;	/* Figure 12.11 */
-
-	gpmc_cs_write_reg(cs, GPMC_CS_CONFIG1, GPMC_CONFIG1_DEVICESIZE_16);
-
-	return gpmc_cs_set_timings(cs, &t);
-}
-
-static void __init rx51_init_smc91x(void)
-{
-	unsigned long cs_mem_base;
-	int ret;
-
-	omap_cfg_reg(U8_34XX_GPIO54_DOWN);
-	omap_cfg_reg(G25_34XX_GPIO86_OUT);
-	omap_cfg_reg(H19_34XX_GPIO164_OUT);
-
-	if (gpmc_cs_request(SMC91X_CS, SZ_16M, &cs_mem_base) < 0) {
-		printk(KERN_ERR "Failed to request GPMC mem for smc91x\n");
-		return;
-	}
-
-	rx51_smc91x_resources[0].start = cs_mem_base + 0x300;
-	rx51_smc91x_resources[0].end = cs_mem_base + 0x30f;
-
-	smc91x_init_gpmc(SMC91X_CS);
-
-	if (gpio_request(SMC91X_GPIO_IRQ, "SMC91X irq") < 0)
-		goto free1;
-
-	gpio_direction_input(SMC91X_GPIO_IRQ);
-	rx51_smc91x_resources[1].start = gpio_to_irq(SMC91X_GPIO_IRQ);
-
-	ret = gpio_request(SMC91X_GPIO_PWRDWN, "SMC91X powerdown");
-	if (ret)
-		goto free2;
-	gpio_direction_output(SMC91X_GPIO_PWRDWN, 0);
-
-	ret = gpio_request(SMC91X_GPIO_RESET, "SMC91X reset");
-	if (ret)
-		goto free3;
-	gpio_direction_output(SMC91X_GPIO_RESET, 0);
-	gpio_set_value(SMC91X_GPIO_RESET, 1);
-	msleep(100);
-	gpio_set_value(SMC91X_GPIO_RESET, 0);
-
-	return;
-
-free3:
-	gpio_free(SMC91X_GPIO_PWRDWN);
-free2:
-	gpio_free(SMC91X_GPIO_IRQ);
-free1:
-	gpmc_cs_free(SMC91X_CS);
-
-	printk(KERN_ERR "Could not initialize smc91x\n");
-}
-
 static struct twl4030_madc_platform_data rx51_madc_data = {
 	.irq_line		= 1,
 };
@@ -466,12 +353,37 @@ static inline void board_onenand_init(void)
 
 #endif
 
+#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91X_MODULE)
+
+static struct omap_smc91x_platform_data board_smc91x_data = {
+	.cs		= 1,
+	.gpio_irq	= 54,
+	.gpio_pwrdwn	= 86,
+	.gpio_reset	= 164,
+	.flags		= GPMC_TIMINGS_SMC91C96 | IORESOURCE_IRQ_HIGHLEVEL,
+};
+
+static void __init board_smc91x_init(void)
+{
+	omap_cfg_reg(U8_34XX_GPIO54_DOWN);
+	omap_cfg_reg(G25_34XX_GPIO86_OUT);
+	omap_cfg_reg(H19_34XX_GPIO164_OUT);
+
+	gpmc_smc91x_init(&board_smc91x_data);
+}
+
+#else
+
+static inline void board_smc91x_init(void)
+{
+}
+
+#endif
+
 void __init rx51_peripherals_init(void)
 {
-	platform_add_devices(rx51_peripherals_devices,
-				ARRAY_SIZE(rx51_peripherals_devices));
 	rx51_i2c_init();
-	rx51_init_smc91x();
 	board_onenand_init();
+	board_smc91x_init();
 }
 
diff --git a/arch/arm/mach-omap2/gpmc-smc91x.c b/arch/arm/mach-omap2/gpmc-smc91x.c
new file mode 100644
index 0000000..df99d31
--- /dev/null
+++ b/arch/arm/mach-omap2/gpmc-smc91x.c
@@ -0,0 +1,189 @@
+/*
+ * linux/arch/arm/mach-omap2/gpmc-smc91x.c
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ * Contact:	Tony Lindgren
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/smc91x.h>
+
+#include <mach/board.h>
+#include <mach/gpmc.h>
+#include <mach/gpmc-smc91x.h>
+
+static struct omap_smc91x_platform_data *gpmc_cfg;
+
+static struct resource gpmc_smc91x_resources[] = {
+	[0] = {
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct smc91x_platdata gpmc_smc91x_info = {
+	.flags  = SMC91X_USE_16BIT | SMC91X_NOWAIT | SMC91X_IO_SHIFT_0,
+};
+
+static struct platform_device gpmc_smc91x_device = {
+	.name		= "smc91x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(gpmc_smc91x_resources),
+	.resource	= gpmc_smc91x_resources,
+	.dev		= {
+		.platform_data = &gpmc_smc91x_info,
+	},
+};
+
+/*
+ * Set the gpmc timings for smc91c96. The timings are taken
+ * from the data sheet available at:
+ * http://www.smsc.com/main/catalog/lan91c96.html
+ * REVISIT: Level shifters can add at least to the access latency.
+ */
+static int smc91c96_gpmc_retime(void)
+{
+	struct gpmc_timings t;
+	const int t3 = 10;	/* Figure 12.2 read and 12.4 write */
+	const int t4_r = 20;	/* Figure 12.2 read */
+	const int t4_w = 5;	/* Figure 12.4 write */
+	const int t5 = 25;	/* Figure 12.2 read */
+	const int t6 = 15;	/* Figure 12.2 read */
+	const int t7 = 5;	/* Figure 12.4 write */
+	const int t8 = 5;	/* Figure 12.4 write */
+	const int t20 = 185;	/* Figure 12.2 read and 12.4 write */
+	u32 l;
+
+	memset(&t, 0, sizeof(t));
+
+	/* Read timings */
+	t.cs_on = 0;
+	t.adv_on = t.cs_on;
+	t.oe_on = t.adv_on + t3;
+	t.access = t.oe_on + t5;
+	t.oe_off = t.access;
+	t.adv_rd_off = t.oe_off + max(t4_r, t6);
+	t.cs_rd_off = t.oe_off;
+	t.rd_cycle = t20 - t.oe_on;
+
+	/* Write timings */
+	t.we_on = t.adv_on + t3;
+
+	if (cpu_is_omap34xx() && (gpmc_cfg->flags & GPMC_MUX_ADD_DATA)) {
+		t.wr_data_mux_bus = t.we_on;
+		t.we_off = t.wr_data_mux_bus + t7;
+	} else
+		t.we_off = t.we_on + t7;
+	if (cpu_is_omap34xx())
+		t.wr_access = t.we_off;
+	t.adv_wr_off = t.we_off + max(t4_w, t8);
+	t.cs_wr_off = t.we_off + t4_w;
+	t.wr_cycle = t20 - t.we_on;
+
+	l = GPMC_CONFIG1_DEVICESIZE_16;
+	if (gpmc_cfg->flags & GPMC_MUX_ADD_DATA)
+		l |= GPMC_CONFIG1_MUXADDDATA;
+	if (gpmc_cfg->flags & GPMC_READ_MON)
+		l |= GPMC_CONFIG1_WAIT_READ_MON;
+	if (gpmc_cfg->flags & GPMC_WRITE_MON)
+		l |= GPMC_CONFIG1_WAIT_WRITE_MON;
+	if (gpmc_cfg->wait_pin)
+		l |= GPMC_CONFIG1_WAIT_PIN_SEL(gpmc_cfg->wait_pin);
+	gpmc_cs_write_reg(gpmc_cfg->cs, GPMC_CS_CONFIG1, l);
+
+	/*
+	 * FIXME: Calculate the address and data bus muxed timings.
+	 * Note that at least adv_rd_off needs to be changed according
+	 * to omap3430 TRM Figure 11-11. Are the sdp boards using the
+	 * FPGA in between smc91x and omap as the timings are different
+	 * from above?
+	 */
+	if (gpmc_cfg->flags & GPMC_MUX_ADD_DATA)
+		return 0;
+
+	return gpmc_cs_set_timings(gpmc_cfg->cs, &t);
+}
+
+/*
+ * Initialize smc91x device connected to the GPMC. Note that we
+ * assume that pin multiplexing is done in the board-*.c file,
+ * or in the bootloader.
+ */
+void __init gpmc_smc91x_init(struct omap_smc91x_platform_data *board_data)
+{
+	unsigned long cs_mem_base;
+	int ret;
+
+	gpmc_cfg = board_data;
+
+	if (gpmc_cfg->flags & GPMC_TIMINGS_SMC91C96)
+		gpmc_cfg->retime = smc91c96_gpmc_retime;
+
+	if (gpmc_cs_request(gpmc_cfg->cs, SZ_16M, &cs_mem_base) < 0) {
+		printk(KERN_ERR "Failed to request GPMC mem for smc91x\n");
+		return;
+	}
+
+	gpmc_smc91x_resources[0].start = cs_mem_base + 0x300;
+	gpmc_smc91x_resources[0].end = cs_mem_base + 0x30f;
+	gpmc_smc91x_resources[1].flags |= (gpmc_cfg->flags & IRQF_TRIGGER_MASK);
+
+	if (gpmc_cfg->retime) {
+		ret = gpmc_cfg->retime();
+		if (ret != 0)
+			goto free1;
+	}
+
+	if (gpio_request(gpmc_cfg->gpio_irq, "SMC91X irq") < 0)
+		goto free1;
+
+	gpio_direction_input(gpmc_cfg->gpio_irq);
+	gpmc_smc91x_resources[1].start = gpio_to_irq(gpmc_cfg->gpio_irq);
+
+	if (gpmc_cfg->gpio_pwrdwn) {
+		ret = gpio_request(gpmc_cfg->gpio_pwrdwn, "SMC91X powerdown");
+		if (ret)
+			goto free2;
+		gpio_direction_output(gpmc_cfg->gpio_pwrdwn, 0);
+	}
+
+	if (gpmc_cfg->gpio_reset) {
+		ret = gpio_request(gpmc_cfg->gpio_reset, "SMC91X reset");
+		if (ret)
+			goto free3;
+
+		gpio_direction_output(gpmc_cfg->gpio_reset, 0);
+		gpio_set_value(gpmc_cfg->gpio_reset, 1);
+		msleep(100);
+		gpio_set_value(gpmc_cfg->gpio_reset, 0);
+	}
+
+	if (platform_device_register(&gpmc_smc91x_device) < 0) {
+		printk(KERN_ERR "Unable to register smc91x device\n");
+		gpio_free(gpmc_cfg->gpio_reset);
+		goto free3;
+	}
+
+	return;
+
+free3:
+	if (gpmc_cfg->gpio_pwrdwn)
+		gpio_free(gpmc_cfg->gpio_pwrdwn);
+free2:
+	gpio_free(gpmc_cfg->gpio_irq);
+free1:
+	gpmc_cs_free(gpmc_cfg->cs);
+
+	printk(KERN_ERR "Could not initialize smc91x\n");
+}
diff --git a/arch/arm/plat-omap/include/mach/gpmc-smc91x.h b/arch/arm/plat-omap/include/mach/gpmc-smc91x.h
new file mode 100644
index 0000000..b64fbee
--- /dev/null
+++ b/arch/arm/plat-omap/include/mach/gpmc-smc91x.h
@@ -0,0 +1,42 @@
+/*
+ * arch/arm/plat-omap/include/mach/gpmc-smc91x.h
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_OMAP_GPMC_SMC91X_H__
+
+#define GPMC_TIMINGS_SMC91C96	(1 << 4)
+#define GPMC_MUX_ADD_DATA	(1 << 5) /* GPMC_CONFIG1_MUXADDDATA */
+#define GPMC_READ_MON		(1 << 6) /* GPMC_CONFIG1_WAIT_READ_MON */
+#define GPMC_WRITE_MON		(1 << 7) /* GPMC_CONFIG1_WAIT_WRITE_MON */
+
+struct omap_smc91x_platform_data {
+	int	cs;
+	int	gpio_irq;
+	int	gpio_pwrdwn;
+	int	gpio_reset;
+	int	wait_pin;	/* Optional GPMC_CONFIG1_WAITPINSELECT */
+	u32	flags;
+	int	(*retime)(void);
+};
+
+#if defined(CONFIG_SMC91X) || \
+	defined(CONFIG_SMC91X_MODULE)
+
+extern void gpmc_smc91x_init(struct omap_smc91x_platform_data *d);
+
+#else
+
+#define board_smc91x_data	NULL
+
+static inline void gpmc_smc91x_init(struct omap_smc91x_platform_data *d)
+{
+}
+
+#endif
+#endif
-- 
cgit v0.10.2


From 7d8e967f88006ba6bea7f998e9e6745c27b0b664 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Thu, 28 May 2009 13:23:53 -0700
Subject: ARM: OMAP2: 2430SDP: Add FB support to board file

Based on an earlier patch by Hunyue Yau <hyau@mvista.com> with
board-*.c changes split to avoid conflicts with other device updates.

Cc: linux-fbdev-devel@lists.sourceforge.net
Signed-off-by: Hunyue Yau <hyau@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index ef70b79..9c3fdcd 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -41,6 +41,7 @@
 #include "mmc-twl4030.h"
 
 #define SDP2430_CS0_BASE	0x04000000
+#define SECONDARY_LCD_GPIO		147
 
 static struct mtd_partition sdp2430_partitions[] = {
 	/* bootloader (U-Boot, etc) in first sector */
@@ -96,8 +97,18 @@ static struct platform_device sdp2430_flash_device = {
 	.resource	= &sdp2430_flash_resource,
 };
 
+static struct platform_device sdp2430_lcd_device = {
+	.name		= "sdp2430_lcd",
+	.id		= -1,
+};
+
 static struct platform_device *sdp2430_devices[] __initdata = {
 	&sdp2430_flash_device,
+	&sdp2430_lcd_device,
+};
+
+static struct omap_lcd_config sdp2430_lcd_config __initdata = {
+	.ctrl_name	= "internal",
 };
 
 #if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91x_MODULE)
@@ -141,6 +152,7 @@ static struct omap_uart_config sdp2430_uart_config __initdata = {
 
 static struct omap_board_config_kernel sdp2430_config[] = {
 	{OMAP_TAG_UART, &sdp2430_uart_config},
+	{OMAP_TAG_LCD, &sdp2430_lcd_config},
 };
 
 
@@ -188,6 +200,8 @@ static struct twl4030_hsmmc_info mmc[] __initdata = {
 
 static void __init omap_2430sdp_init(void)
 {
+	int ret;
+
 	omap2430_i2c_init();
 
 	platform_add_devices(sdp2430_devices, ARRAY_SIZE(sdp2430_devices));
@@ -197,6 +211,11 @@ static void __init omap_2430sdp_init(void)
 	twl4030_mmc_init(mmc);
 	usb_musb_init();
 	board_smc91x_init();
+
+	/* Turn off secondary LCD backlight */
+	ret = gpio_request(SECONDARY_LCD_GPIO, "Secondary LCD backlight");
+	if (ret == 0)
+		gpio_direction_output(SECONDARY_LCD_GPIO, 0);
 }
 
 static void __init omap_2430sdp_map_io(void)
-- 
cgit v0.10.2


From f5525786b0c481a0000989f2603257ba3aaef8ba Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 28 May 2009 13:23:53 -0700
Subject: ARM: OMAP: Add some entries to MAINTAINERS

Add some entries to MAINTAINERS.

Also regroup all omap entries together, and remove an inactive
MMC maintainers entry, and Jarkko Lavinen instead.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 77cbfb1..ee86be0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4127,6 +4127,69 @@ S:	Maintained
 F:	drivers/video/riva/
 F:	drivers/video/nvidia/
 
+OMAP SUPPORT
+P:	Tony Lindgren <tony@atomide.com>
+M:	tony@atomide.com
+L:	linux-omap@vger.kernel.org
+W:	http://www.muru.com/linux/omap/
+W:	http://linux.omap.com/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6.git
+S:	Maintained
+F:	arch/arm/*omap*
+
+OMAP CLOCK FRAMEWORK SUPPORT
+P:	Paul Walmsley
+M:	paul@pwsan.com
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	arch/arm/*omap*/*clock*
+
+OMAP POWER MANAGEMENT SUPPORT
+P:	Kevin Hilman
+M:	khilman@deeprootsystems.com
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	arch/arm/*omap*/*pm*
+
+OMAP AUDIO SUPPORT
+P:	Jarkko Nikula
+M:	jhnikula@gmail.com
+L:	alsa-devel@alsa-project.org (subscribers-only)
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	sound/soc/omap/
+
+OMAP FRAMEBUFFER SUPPORT
+P:	Imre Deak
+M:	imre.deak@nokia.com
+L:	linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	drivers/video/omap/
+
+OMAP MMC SUPPORT
+P:	Jarkko Lavinen
+M:	jarkko.lavinen@nokia.com
+L:	linux-kernel@vger.kernel.org
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+F:	drivers/mmc/host/*omap*
+
+OMAP RANDOM NUMBER GENERATOR SUPPORT
+P:	Deepak Saxena
+M:	dsaxena@plexity.net
+S:	Maintained
+F:	drivers/char/hw_random/omap-rng.c
+
+OMAP USB SUPPORT
+P:	Felipe Balbi
+M:	felipe.balbi@nokia.com
+P:	David Brownell
+M:	dbrownell@users.sourceforge.net
+L:	linux-usb@vger.kernel.org
+L:	linux-omap@vger.kernel.org
+S:	Maintained
+
 OMFS FILESYSTEM
 P:	Bob Copeland
 M:	me@bobcopeland.com
@@ -5515,20 +5578,6 @@ F:	drivers/misc/tifm*
 F:	drivers/mmc/host/tifm_sd.c
 F:	include/linux/tifm.h
 
-TI OMAP MMC INTERFACE DRIVER
-P:	Carlos Aguiar, Anderson Briglia and Syed Khasim
-M:	linux-omap@vger.kernel.org
-W:	http://linux.omap.com
-W:	http://www.muru.com/linux/omap/
-S:	Maintained
-F:	drivers/mmc/host/omap.c
-
-TI OMAP RANDOM NUMBER GENERATOR SUPPORT
-P:	Deepak Saxena
-M:	dsaxena@plexity.net
-S:	Maintained
-F:	drivers/char/hw_random/omap-rng.c
-
 TIPC NETWORK LAYER
 P:	Per Liden
 M:	per.liden@ericsson.com
-- 
cgit v0.10.2


From f42706c90470851fd2e97eda7a4109e8949bde8a Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Fri, 1 May 2009 17:21:11 +0200
Subject: USB: atmel-usba-udc : fix control out requests.

usbtest #14 was failing with "udc: ep0: TXCOMP: Invalid endpoint state 2, halting endpoint..."
This occured since ep0 is bidirectional and ep->is_in is not valid (must always use ep->state)

Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 563d572..5644897 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -794,7 +794,8 @@ usba_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
 	if (ep->desc) {
 		list_add_tail(&req->queue, &ep->queue);
 
-		if (ep->is_in || (ep_is_control(ep)
+		if ((!ep_is_control(ep) && ep->is_in) ||
+			(ep_is_control(ep)
 				&& (ep->state == DATA_STAGE_IN
 					|| ep->state == STATUS_STAGE_IN)))
 			usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY);
-- 
cgit v0.10.2


From fe92c9e481a147a9e1e14f55870f32903b967777 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Mon, 4 May 2009 17:22:43 -0700
Subject: USB: atmel_usb_udc: Use kzalloc() to allocate ep structures

This ensures that all fields are properly initialized.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 5644897..05c913c 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -1941,7 +1941,7 @@ static int __init usba_udc_probe(struct platform_device *pdev)
 	usba_writel(udc, CTRL, USBA_DISABLE_MASK);
 	clk_disable(pclk);
 
-	usba_ep = kmalloc(sizeof(struct usba_ep) * pdata->num_ep,
+	usba_ep = kzalloc(sizeof(struct usba_ep) * pdata->num_ep,
 			  GFP_KERNEL);
 	if (!usba_ep)
 		goto err_alloc_ep;
-- 
cgit v0.10.2


From cab98a0a349829b145d924c0649a2d30cd6a9e3d Mon Sep 17 00:00:00 2001
From: Xiao Kaijian <xiaokj@gmail.com>
Date: Fri, 8 May 2009 00:48:23 +0800
Subject: USB: Yet another Conexant Clone to add to cdc-acm.c

This patch adds another quirky Conexant USB Modem Clone to usb cdc-acm.c

Signed-off-by: Xiao Kaijian <xiaokj@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 0a69c09..7a1164d 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1375,6 +1375,9 @@ static struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x0572, 0x1324), /* Conexant USB MODEM RD02-D400 */
 	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
 	},
+	{ USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */
+	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
+	},
 	{ USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */
 	},
 	{ USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */
-- 
cgit v0.10.2


From 0afb20e00b5053170c85298fed842b32d20b4ea9 Mon Sep 17 00:00:00 2001
From: Warren Free <wfree@ipmn.com>
Date: Fri, 8 May 2009 10:27:08 +0200
Subject: USB: isp1760: urb_dequeue doesn't always find the urbs

The option driver (and presumably others) allocates several URBs when it
opens and tries to free them when it closes. The isp1760_urb_dequeue
function gets called, but the packet being dequeued is not necessarily at
the
front of one of the 32 queues. If not, the isp1760_urb_done function doesn't
get called for the URB and the process trying to free it hangs forever on a
wait_queue. This patch does two things. If the URB being dequeued has others
queued behind it, it re-queues them. And it searches the queues looking for
the URB being dequeued rather than just looking at the one at the front of
the queue.

[bigeasy@linutronix] whitespace fixes, reformating

Cc: stable <stable@kernel.org>
Signed-off-by: Warren Free <wfree@ipmn.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index cd07ea3..1543846 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -1658,6 +1658,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
 	u32 reg_base, or_reg, skip_reg;
 	unsigned long flags;
 	struct ptd ptd;
+	packet_enqueue *pe;
 
 	switch (usb_pipetype(urb->pipe)) {
 	case PIPE_ISOCHRONOUS:
@@ -1669,6 +1670,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
 		reg_base = INT_REGS_OFFSET;
 		or_reg = HC_INT_IRQ_MASK_OR_REG;
 		skip_reg = HC_INT_PTD_SKIPMAP_REG;
+		pe = enqueue_an_INT_packet;
 		break;
 
 	default:
@@ -1676,6 +1678,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
 		reg_base = ATL_REGS_OFFSET;
 		or_reg = HC_ATL_IRQ_MASK_OR_REG;
 		skip_reg = HC_ATL_PTD_SKIPMAP_REG;
+		pe =  enqueue_an_ATL_packet;
 		break;
 	}
 
@@ -1687,6 +1690,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
 			u32 skip_map;
 			u32 or_map;
 			struct isp1760_qtd *qtd;
+			struct isp1760_qh *qh = ints->qh;
 
 			skip_map = isp1760_readl(hcd->regs + skip_reg);
 			skip_map |= 1 << i;
@@ -1699,8 +1703,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
 			priv_write_copy(priv, (u32 *)&ptd, hcd->regs + reg_base
 					+ i * sizeof(ptd), sizeof(ptd));
 			qtd = ints->qtd;
-
-			clean_up_qtdlist(qtd);
+			qtd = clean_up_qtdlist(qtd);
 
 			free_mem(priv, ints->payload);
 
@@ -1711,7 +1714,24 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
 			ints->payload = 0;
 
 			isp1760_urb_done(priv, urb, status);
+			if (qtd)
+				pe(hcd, qh, qtd);
 			break;
+
+		} else if (ints->qtd) {
+			struct isp1760_qtd *qtd, *prev_qtd = ints->qtd;
+
+			for (qtd = ints->qtd->hw_next; qtd; qtd = qtd->hw_next) {
+				if (qtd->urb == urb) {
+					prev_qtd->hw_next = clean_up_qtdlist(qtd);
+					isp1760_urb_done(priv, urb, status);
+					break;
+				}
+				prev_qtd = qtd;
+			}
+			/* we found the urb before the end of the list */
+			if (qtd)
+				break;
 		}
 		ints++;
 	}
-- 
cgit v0.10.2


From 0a3c8549ea7e94d74a41096d42bc6cdf43d183bf Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 27 May 2009 11:25:52 -0400
Subject: usb-serial: fix crash when sub-driver updates firmware

This patch (as1244) fixes a crash in usb-serial that occurs when a
sub-driver returns a positive value from its attach method, indicating
that new firmware was loaded and the device will disconnect and
reconnect.  The usb-serial core then skips the step of registering the
port devices; when the disconnect occurs, the attempt to unregister
the ports fails dramatically.

This problem shows up with Keyspan devices and it might affect others
as well.

When the attach method returns a positive value, the patch sets
num_ports to 0.  This tells usb_serial_disconnect() not to try
unregistering any of the ports; instead they are cleaned up by
destroy_serial().

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 0a566ee..f331e2b 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -974,6 +974,7 @@ int usb_serial_probe(struct usb_interface *interface,
 		if (retval > 0) {
 			/* quietly accept this device, but don't bind to a
 			   serial port as it's about to disappear */
+			serial->num_ports = 0;
 			goto exit;
 		}
 	}
-- 
cgit v0.10.2


From 29868b281f6d057b4cbe348f4483f1717c021c5c Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 20 May 2009 08:49:48 -0400
Subject: Revert "USB: Correct Makefile to make isp1760 buildable"

This reverts commit 26e1287594864169577327fef233befc9739be3b.

A larger patch (f7e7aa585) a few days after this one added the same line
to the Makefile, but in a different place.  While it'd be more correct to
revert that one, it's easier to revert this one because this is a
one-liner.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
CC: Greg Kroah-Hartman <gregkh@suse.de>
CC: linux-usb@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 0716cdb..0a3dc5e 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_USB_MON)		+= mon/
 obj-$(CONFIG_PCI)		+= host/
 obj-$(CONFIG_USB_EHCI_HCD)	+= host/
 obj-$(CONFIG_USB_ISP116X_HCD)	+= host/
-obj-$(CONFIG_USB_ISP1760_HCD)	+= host/
 obj-$(CONFIG_USB_OHCI_HCD)	+= host/
 obj-$(CONFIG_USB_UHCI_HCD)	+= host/
 obj-$(CONFIG_USB_FHCI_HCD)	+= host/
-- 
cgit v0.10.2


From 088962c243db42b9c608f30be3e3a05a5b696895 Mon Sep 17 00:00:00 2001
From: Andrew de Quincey <adq_dvb@lidskialf.net>
Date: Thu, 28 May 2009 14:03:31 -0700
Subject: ARM: OMAP1: Make 770 LCD work

Make 770 LCD work by adding clk_add_alias().
Also remove the old unused functions.

Note that the clk_add_alias() could probably be moved
to arch/arm/clkdev.c later on.

Cc: linux-fbdev-devel@lists.sourceforge.net
Signed-off-by: Andrew de Quincey <adq_dvb@lidskialf.net>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com

diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 8780ca6..e70fc7c 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -33,9 +33,11 @@
 #include <mach/common.h>
 #include <mach/dsp_common.h>
 #include <mach/omapfb.h>
+#include <mach/hwa742.h>
 #include <mach/lcd_mipid.h>
 #include <mach/mmc.h>
 #include <mach/usb.h>
+#include <mach/clock.h>
 
 #define ADS7846_PENDOWN_GPIO	15
 
@@ -163,6 +165,15 @@ static struct spi_board_info nokia770_spi_board_info[] __initdata = {
 	},
 };
 
+static struct hwa742_platform_data nokia770_hwa742_platform_data = {
+	.te_connected		= 1,
+};
+
+static void hwa742_dev_init(void)
+{
+	clk_add_alias("hwa_sys_ck", NULL, "bclk", NULL);
+	omapfb_set_ctrl_platform_data(&nokia770_hwa742_platform_data);
+}
 
 /* assume no Mini-AB port */
 
@@ -371,6 +382,7 @@ static void __init omap_nokia770_init(void)
 	omap_serial_init();
 	omap_register_i2c_bus(1, 100, NULL, 0);
 	omap_dsp_init();
+	hwa742_dev_init();
 	ads7846_dev_init();
 	mipid_dev_init();
 	omap_usb_init(&nokia770_usb_config);
diff --git a/arch/arm/plat-omap/include/mach/hwa742.h b/arch/arm/plat-omap/include/mach/hwa742.h
index 577f492..886248d 100644
--- a/arch/arm/plat-omap/include/mach/hwa742.h
+++ b/arch/arm/plat-omap/include/mach/hwa742.h
@@ -2,10 +2,6 @@
 #define _HWA742_H
 
 struct hwa742_platform_data {
-	void		(*power_up)(struct device *dev);
-	void		(*power_down)(struct device *dev);
-	unsigned long	(*get_clock_rate)(struct device *dev);
-
 	unsigned	te_connected:1;
 };
 
diff --git a/drivers/video/omap/hwa742.c b/drivers/video/omap/hwa742.c
index 8aa6e47..5d4f348 100644
--- a/drivers/video/omap/hwa742.c
+++ b/drivers/video/omap/hwa742.c
@@ -133,8 +133,7 @@ struct {
 	struct lcd_ctrl_extif	*extif;
 	struct lcd_ctrl		*int_ctrl;
 
-	void			(*power_up)(struct device *dev);
-	void			(*power_down)(struct device *dev);
+	struct clk		*sys_ck;
 } hwa742;
 
 struct lcd_ctrl hwa742_ctrl;
@@ -915,14 +914,13 @@ static void hwa742_suspend(void)
 	hwa742_set_update_mode(OMAPFB_UPDATE_DISABLED);
 	/* Enable sleep mode */
 	hwa742_write_reg(HWA742_POWER_SAVE, 1 << 1);
-	if (hwa742.power_down != NULL)
-		hwa742.power_down(hwa742.fbdev->dev);
+	clk_disable(hwa742.sys_ck);
 }
 
 static void hwa742_resume(void)
 {
-	if (hwa742.power_up != NULL)
-		hwa742.power_up(hwa742.fbdev->dev);
+	clk_enable(hwa742.sys_ck);
+
 	/* Disable sleep mode */
 	hwa742_write_reg(HWA742_POWER_SAVE, 0);
 	while (1) {
@@ -955,14 +953,13 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
 	omapfb_conf = fbdev->dev->platform_data;
 	ctrl_conf = omapfb_conf->ctrl_platform_data;
 
-	if (ctrl_conf == NULL || ctrl_conf->get_clock_rate == NULL) {
+	if (ctrl_conf == NULL) {
 		dev_err(fbdev->dev, "HWA742: missing platform data\n");
 		r = -ENOENT;
 		goto err1;
 	}
 
-	hwa742.power_down = ctrl_conf->power_down;
-	hwa742.power_up = ctrl_conf->power_up;
+	hwa742.sys_ck = clk_get(NULL, "hwa_sys_ck");
 
 	spin_lock_init(&hwa742.req_lock);
 
@@ -972,12 +969,11 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
 	if ((r = hwa742.extif->init(fbdev)) < 0)
 		goto err2;
 
-	ext_clk = ctrl_conf->get_clock_rate(fbdev->dev);
+	ext_clk = clk_get_rate(hwa742.sys_ck);
 	if ((r = calc_extif_timings(ext_clk, &extif_mem_div)) < 0)
 		goto err3;
 	hwa742.extif->set_timings(&hwa742.reg_timings);
-	if (hwa742.power_up != NULL)
-		hwa742.power_up(fbdev->dev);
+	clk_enable(hwa742.sys_ck);
 
 	calc_hwa742_clk_rates(ext_clk, &sys_clk, &pix_clk);
 	if ((r = calc_extif_timings(sys_clk, &extif_mem_div)) < 0)
@@ -1040,8 +1036,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
 
 	return 0;
 err4:
-	if (hwa742.power_down != NULL)
-		hwa742.power_down(fbdev->dev);
+	clk_disable(hwa742.sys_ck);
 err3:
 	hwa742.extif->cleanup();
 err2:
@@ -1055,8 +1050,7 @@ static void hwa742_cleanup(void)
 	hwa742_set_update_mode(OMAPFB_UPDATE_DISABLED);
 	hwa742.extif->cleanup();
 	hwa742.int_ctrl->cleanup();
-	if (hwa742.power_down != NULL)
-		hwa742.power_down(hwa742.fbdev->dev);
+	clk_disable(hwa742.sys_ck);
 }
 
 struct lcd_ctrl hwa742_ctrl = {
-- 
cgit v0.10.2


From f247de346f67db1e252e4ed6c29d029a6e592398 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 28 May 2009 14:03:58 -0700
Subject: ARM: OMAP2/3: Remove L4_WK_OMAP_BASE, L4_PER_OMAP_BASE, L4_EMU_BASE,
 L3_OMAP_BASE

These are not being used right now, and the processor specific
defines should be used instead by any code accessing these registers.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/include/mach/omap34xx.h b/arch/arm/plat-omap/include/mach/omap34xx.h
index cc25733..f8d186a 100644
--- a/arch/arm/plat-omap/include/mach/omap34xx.h
+++ b/arch/arm/plat-omap/include/mach/omap34xx.h
@@ -31,13 +31,9 @@
 
 #define L4_34XX_BASE		0x48000000
 #define L4_WK_34XX_BASE		0x48300000
-#define L4_WK_OMAP_BASE		L4_WK_34XX_BASE
 #define L4_PER_34XX_BASE	0x49000000
-#define L4_PER_OMAP_BASE	L4_PER_34XX_BASE
 #define L4_EMU_34XX_BASE	0x54000000
-#define L4_EMU_BASE		L4_EMU_34XX_BASE
 #define L3_34XX_BASE		0x68000000
-#define L3_OMAP_BASE		L3_34XX_BASE
 
 #define OMAP3430_32KSYNCT_BASE	0x48320000
 #define OMAP3430_CM_BASE	0x48004800
-- 
cgit v0.10.2


From 2aa57be2d9e400f498cf0f0636069a81035e06b9 Mon Sep 17 00:00:00 2001
From: Vikram Pandita <vikram.pandita@ti.com>
Date: Thu, 28 May 2009 14:03:59 -0700
Subject: ARM: OMAP2/3: Serial: Remove arch_initcall dependency

Move platform_device_register() for serial device to
omap_serial_init()

There is no need to have arch_initcall() dependency in serial
as already board files call the function omap_serial_init()

Signed-off-by: Vikram Pandita <vikram.pandita@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 4dcf39c2..3c2d325 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -98,6 +98,14 @@ void omap_serial_enable_clocks(int enable)
 	}
 }
 
+static struct platform_device serial_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev			= {
+		.platform_data	= serial_platform_data,
+	},
+};
+
 void __init omap_serial_init(void)
 {
 	int i;
@@ -142,18 +150,6 @@ void __init omap_serial_init(void)
 
 		omap_serial_reset(p);
 	}
-}
 
-static struct platform_device serial_device = {
-	.name			= "serial8250",
-	.id			= PLAT8250_DEV_PLATFORM,
-	.dev			= {
-		.platform_data	= serial_platform_data,
-	},
-};
-
-static int __init omap_init(void)
-{
-	return platform_device_register(&serial_device);
+	platform_device_register(&serial_device);
 }
-arch_initcall(omap_init);
-- 
cgit v0.10.2


From 2e12bd7ef175c9dc55dc215823b62a2247865012 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Thu, 28 May 2009 14:03:59 -0700
Subject: ARM: OMAP3: SDRC: add timing data for Micron MT46H32M32LF-6, v2

Add timing data for the Micron MT46H32M32LF-6 SDRAM chip, used on the
OMAP3 Beagle and EVM boards.  Original timing data is from the Micron
datasheet PDF downloaded from:

http://download.micron.com/pdf/datasheets/dram/mobile/1gb_ddr_mobile_sdram_t48m.pdf

Thanks to Rajendra Nayak <rnayak@ti.com> for his help identifying
the chips used on Beagle & OMAP3EVM.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 3a7a29d..f25082c 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -105,6 +105,8 @@ static struct platform_device omap3beagle_nand_device = {
 	.resource	= &omap3beagle_nand_resource,
 };
 
+#include "sdram-micron-mt46h32m32lf-6.h"
+
 static struct omap_uart_config omap3_beagle_uart_config __initdata = {
 	.enabled_uarts	= ((1 << 0) | (1 << 1) | (1 << 2)),
 };
@@ -185,7 +187,7 @@ static int __init omap3_beagle_i2c_init(void)
 
 static void __init omap3_beagle_init_irq(void)
 {
-	omap2_init_common_hw(NULL);
+	omap2_init_common_hw(mt46h32m32lf6_sdrc_params);
 	omap_init_irq();
 #ifdef CONFIG_OMAP_32K_TIMER
 	omap2_gp_clockevent_set_gptimer(12);
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 402f09c..3571bb4 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -36,6 +36,7 @@
 #include <mach/mcspi.h>
 #include <mach/usb.h>
 
+#include "sdram-micron-mt46h32m32lf-6.h"
 #include "mmc-twl4030.h"
 
 #define OMAP3_PANDORA_TS_GPIO		94
@@ -118,7 +119,7 @@ static int __init omap3pandora_i2c_init(void)
 
 static void __init omap3pandora_init_irq(void)
 {
-	omap2_init_common_hw(NULL);
+	omap2_init_common_hw(mt46h32m32lf6_sdrc_params);
 	omap_init_irq();
 	omap_gpio_init();
 }
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index b1f23be..c7443ff 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -45,6 +45,7 @@
 #include <mach/nand.h>
 #include <mach/usb.h>
 
+#include "sdram-micron-mt46h32m32lf-6.h"
 #include "mmc-twl4030.h"
 
 #define OVERO_GPIO_BT_XGATE	15
@@ -303,7 +304,7 @@ static int __init overo_i2c_init(void)
 
 static void __init overo_init_irq(void)
 {
-	omap2_init_common_hw(NULL);
+	omap2_init_common_hw(mt46h32m32lf6_sdrc_params);
 	omap_init_irq();
 	omap_gpio_init();
 }
diff --git a/arch/arm/mach-omap2/sdram-micron-mt46h32m32lf-6.h b/arch/arm/mach-omap2/sdram-micron-mt46h32m32lf-6.h
new file mode 100644
index 0000000..02e1c2d
--- /dev/null
+++ b/arch/arm/mach-omap2/sdram-micron-mt46h32m32lf-6.h
@@ -0,0 +1,55 @@
+/*
+ * SDRC register values for the Micron MT46H32M32LF-6
+ *
+ * Copyright (C) 2008 Texas Instruments, Inc.
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Paul Walmsley
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ARCH_ARM_MACH_OMAP2_SDRAM_MICRON_MT46H32M32LF
+#define ARCH_ARM_MACH_OMAP2_SDRAM_MICRON_MT46H32M32LF
+
+#include <mach/sdrc.h>
+
+/* Micron MT46H32M32LF-6 */
+/* XXX Using ARE = 0x1 (no autorefresh burst) -- can this be changed? */
+static struct omap_sdrc_params mt46h32m32lf6_sdrc_params[] = {
+	[0] = {
+		.rate	     = 166000000,
+		.actim_ctrla = 0x9a9db4c6,
+		.actim_ctrlb = 0x00011217,
+		.rfr_ctrl    = 0x0004dc01,
+		.mr	     = 0x00000032,
+	},
+	[1] = {
+		.rate	     = 165941176,
+		.actim_ctrla = 0x9a9db4c6,
+		.actim_ctrlb = 0x00011217,
+		.rfr_ctrl    = 0x0004dc01,
+		.mr	     = 0x00000032,
+	},
+	[2] = {
+		.rate	     = 83000000,
+		.actim_ctrla = 0x51512283,
+		.actim_ctrlb = 0x0001120c,
+		.rfr_ctrl    = 0x00025501,
+		.mr	     = 0x00000032,
+	},
+	[3] = {
+		.rate	     = 82970588,
+		.actim_ctrla = 0x51512283,
+		.actim_ctrlb = 0x0001120c,
+		.rfr_ctrl    = 0x00025501,
+		.mr	     = 0x00000032,
+	},
+	[4] = {
+		.rate	     = 0
+	},
+};
+
+#endif
-- 
cgit v0.10.2


From 17a722caaef16835ab83f39046da1760cda8a578 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Thu, 28 May 2009 14:03:59 -0700
Subject: ARM: OMAP3: SDRC: add timing data for Qimonda HYB18M512160AF-6

Add timing data for the Qimonda HYB18M512160AF-6 SDRAM chip, used on
the OMAP3430SDP boards.

Thanks to Rajendra Nayak <rnayak@ti.com> for his help identifying
the chip used on 3430SDP.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 0e63695..496a90e 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -41,6 +41,7 @@
 #include <mach/keypad.h>
 #include <mach/gpmc-smc91x.h>
 
+#include "sdram-qimonda-hyb18m512160af-6.h"
 #include "mmc-twl4030.h"
 
 #define CONFIG_DISABLE_HFCLK 1
@@ -168,7 +169,7 @@ static struct platform_device *sdp3430_devices[] __initdata = {
 
 static void __init omap_3430sdp_init_irq(void)
 {
-	omap2_init_common_hw(NULL);
+	omap2_init_common_hw(hyb18m512160af6_sdrc_params);
 	omap_init_irq();
 	omap_gpio_init();
 }
diff --git a/arch/arm/mach-omap2/sdram-qimonda-hyb18m512160af-6.h b/arch/arm/mach-omap2/sdram-qimonda-hyb18m512160af-6.h
new file mode 100644
index 0000000..3751d29
--- /dev/null
+++ b/arch/arm/mach-omap2/sdram-qimonda-hyb18m512160af-6.h
@@ -0,0 +1,54 @@
+/*
+ * SDRC register values for the Qimonda HYB18M512160AF-6
+ *
+ * Copyright (C) 2008-2009 Texas Instruments, Inc.
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Paul Walmsley
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ARCH_ARM_MACH_OMAP2_SDRAM_QIMONDA_HYB18M512160AF6
+#define ARCH_ARM_MACH_OMAP2_SDRAM_QIMONDA_HYB18M512160AF6
+
+#include <mach/sdrc.h>
+
+/* Qimonda HYB18M512160AF-6 */
+static struct omap_sdrc_params hyb18m512160af6_sdrc_params[] = {
+	[0] = {
+		.rate	     = 166000000,
+		.actim_ctrla = 0x629db4c6,
+		.actim_ctrlb = 0x00012214,
+		.rfr_ctrl    = 0x0004dc01,
+		.mr	     = 0x00000032,
+	},
+	[1] = {
+		.rate	     = 165941176,
+		.actim_ctrla = 0x629db4c6,
+		.actim_ctrlb = 0x00012214,
+		.rfr_ctrl    = 0x0004dc01,
+		.mr	     = 0x00000032,
+	},
+	[2] = {
+		.rate	     = 83000000,
+		.actim_ctrla = 0x31512283,
+		.actim_ctrlb = 0x0001220a,
+		.rfr_ctrl    = 0x00025501,
+		.mr	     = 0x00000022,
+	},
+	[3] = {
+		.rate	     = 82970588,
+		.actim_ctrla = 0x31512283,
+		.actim_ctrlb = 0x0001220a,
+		.rfr_ctrl    = 0x00025501,
+		.mr	     = 0x00000022,
+	},
+	[4] = {
+		.rate	     = 0
+	},
+};
+
+#endif
-- 
cgit v0.10.2


From 49adf465d2448dc15866b2267df46ea2e1ccacf1 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Thu, 28 May 2009 14:04:00 -0700
Subject: ARM: OMAP3: ZOOM MDK: Add FB support to board file

Based on an earlier patch by Stanley.Miao <stanley.miao@windriver.com>
with board-*.c changes split to avoid conflicts with other device updates.

Cc: linux-fbdev-devel@lists.sourceforge.net
Signed-off-by: Stanley.Miao <stanley.miao@windriver.com>
Signed-off-by: Imre Deak <imre.deak@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index da57b0f..59ac8ed 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -77,10 +77,6 @@ static struct platform_device ldp_smsc911x_device = {
 	},
 };
 
-static struct platform_device *ldp_devices[] __initdata = {
-	&ldp_smsc911x_device,
-};
-
 static inline void __init ldp_init_smsc911x(void)
 {
 	int eth_cs;
@@ -122,8 +118,18 @@ static struct omap_uart_config ldp_uart_config __initdata = {
 	.enabled_uarts	= ((1 << 0) | (1 << 1) | (1 << 2)),
 };
 
+static struct platform_device ldp_lcd_device = {
+	.name		= "ldp_lcd",
+	.id		= -1,
+};
+
+static struct omap_lcd_config ldp_lcd_config __initdata = {
+	.ctrl_name	= "internal",
+};
+
 static struct omap_board_config_kernel ldp_config[] __initdata = {
 	{ OMAP_TAG_UART,	&ldp_uart_config },
+	{ OMAP_TAG_LCD,		&ldp_lcd_config },
 };
 
 static struct twl4030_gpio_platform_data ldp_gpio_data = {
@@ -168,6 +174,11 @@ static struct twl4030_hsmmc_info mmc[] __initdata = {
 	{}	/* Terminator */
 };
 
+static struct platform_device *ldp_devices[] __initdata = {
+	&ldp_smsc911x_device,
+	&ldp_lcd_device,
+};
+
 static void __init omap_ldp_init(void)
 {
 	omap_i2c_init();
-- 
cgit v0.10.2


From 4a899d5e93fd974952492cd4a09e98b209d1ad58 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 28 May 2009 14:04:00 -0700
Subject: ARM: OMAP3: Initialize more devices for LDP

Based on an earlier patches by Stanley.Miao <stanley.miao@windriver.com>
and Nishant Kamat <nskamat@ti.com>.

Note that at the ads7846 support still needs support for vaux_control
for the touchscreen to work.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index 59ac8ed..d8bc0a7 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -16,11 +16,13 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/input.h>
+#include <linux/gpio_keys.h>
 #include <linux/workqueue.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
+#include <linux/regulator/machine.h>
 #include <linux/i2c/twl4030.h>
 #include <linux/io.h>
 #include <linux/smsc911x.h>
@@ -39,6 +41,7 @@
 #include <asm/delay.h>
 #include <mach/control.h>
 #include <mach/usb.h>
+#include <mach/keypad.h>
 
 #include "mmc-twl4030.h"
 
@@ -77,6 +80,165 @@ static struct platform_device ldp_smsc911x_device = {
 	},
 };
 
+static int ldp_twl4030_keymap[] = {
+	KEY(0, 0, KEY_1),
+	KEY(1, 0, KEY_2),
+	KEY(2, 0, KEY_3),
+	KEY(0, 1, KEY_4),
+	KEY(1, 1, KEY_5),
+	KEY(2, 1, KEY_6),
+	KEY(3, 1, KEY_F5),
+	KEY(0, 2, KEY_7),
+	KEY(1, 2, KEY_8),
+	KEY(2, 2, KEY_9),
+	KEY(3, 2, KEY_F6),
+	KEY(0, 3, KEY_F7),
+	KEY(1, 3, KEY_0),
+	KEY(2, 3, KEY_F8),
+	PERSISTENT_KEY(4, 5),
+	KEY(4, 4, KEY_VOLUMEUP),
+	KEY(5, 5, KEY_VOLUMEDOWN),
+	0
+};
+
+static struct twl4030_keypad_data ldp_kp_twl4030_data = {
+	.rows		= 6,
+	.cols		= 6,
+	.keymap		= ldp_twl4030_keymap,
+	.keymapsize	= ARRAY_SIZE(ldp_twl4030_keymap),
+	.rep		= 1,
+};
+
+static struct gpio_keys_button ldp_gpio_keys_buttons[] = {
+	[0] = {
+		.code			= KEY_ENTER,
+		.gpio			= 101,
+		.desc			= "enter sw",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[1] = {
+		.code			= KEY_F1,
+		.gpio			= 102,
+		.desc			= "func 1",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[2] = {
+		.code			= KEY_F2,
+		.gpio			= 103,
+		.desc			= "func 2",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[3] = {
+		.code			= KEY_F3,
+		.gpio			= 104,
+		.desc			= "func 3",
+		.active_low		= 1,
+		.debounce_interval 	= 30,
+	},
+	[4] = {
+		.code			= KEY_F4,
+		.gpio			= 105,
+		.desc			= "func 4",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[5] = {
+		.code			= KEY_LEFT,
+		.gpio			= 106,
+		.desc			= "left sw",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[6] = {
+		.code			= KEY_RIGHT,
+		.gpio			= 107,
+		.desc			= "right sw",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[7] = {
+		.code			= KEY_UP,
+		.gpio			= 108,
+		.desc			= "up sw",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+	[8] = {
+		.code			= KEY_DOWN,
+		.gpio			= 109,
+		.desc			= "down sw",
+		.active_low		= 1,
+		.debounce_interval	= 30,
+	},
+};
+
+static struct gpio_keys_platform_data ldp_gpio_keys = {
+	.buttons		= ldp_gpio_keys_buttons,
+	.nbuttons		= ARRAY_SIZE(ldp_gpio_keys_buttons),
+	.rep			= 1,
+};
+
+static struct platform_device ldp_gpio_keys_device = {
+	.name		= "gpio-keys",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &ldp_gpio_keys,
+	},
+};
+
+static int ts_gpio;
+
+/**
+ * @brief ads7846_dev_init : Requests & sets GPIO line for pen-irq
+ *
+ * @return - void. If request gpio fails then Flag KERN_ERR.
+ */
+static void ads7846_dev_init(void)
+{
+	if (gpio_request(ts_gpio, "ads7846 irq") < 0) {
+		printk(KERN_ERR "can't get ads746 pen down GPIO\n");
+		return;
+	}
+
+	gpio_direction_input(ts_gpio);
+	omap_set_gpio_debounce(ts_gpio, 1);
+	omap_set_gpio_debounce_time(ts_gpio, 0xa);
+}
+
+static int ads7846_get_pendown_state(void)
+{
+	return !gpio_get_value(ts_gpio);
+}
+
+static struct ads7846_platform_data tsc2046_config __initdata = {
+	.get_pendown_state	= ads7846_get_pendown_state,
+	.keep_vref_on		= 1,
+};
+
+static struct omap2_mcspi_device_config tsc2046_mcspi_config = {
+	.turbo_mode	= 0,
+	.single_channel	= 1,	/* 0: slave, 1: master */
+};
+
+static struct spi_board_info ldp_spi_board_info[] __initdata = {
+	[0] = {
+		/*
+		 * TSC2046 operates at a max freqency of 2MHz, so
+		 * operate slightly below at 1.5MHz
+		 */
+		.modalias		= "ads7846",
+		.bus_num		= 1,
+		.chip_select		= 0,
+		.max_speed_hz		= 1500000,
+		.controller_data	= &tsc2046_mcspi_config,
+		.irq			= 0,
+		.platform_data		= &tsc2046_config,
+	},
+};
+
 static inline void __init ldp_init_smsc911x(void)
 {
 	int eth_cs;
@@ -132,18 +294,49 @@ static struct omap_board_config_kernel ldp_config[] __initdata = {
 	{ OMAP_TAG_LCD,		&ldp_lcd_config },
 };
 
+static struct twl4030_usb_data ldp_usb_data = {
+	.usb_mode	= T2_USB_MODE_ULPI,
+};
+
 static struct twl4030_gpio_platform_data ldp_gpio_data = {
 	.gpio_base	= OMAP_MAX_GPIO_LINES,
 	.irq_base	= TWL4030_GPIO_IRQ_BASE,
 	.irq_end	= TWL4030_GPIO_IRQ_END,
 };
 
+static struct twl4030_madc_platform_data ldp_madc_data = {
+	.irq_line	= 1,
+};
+
+static struct regulator_consumer_supply ldp_vmmc1_supply = {
+	.supply			= "vmmc",
+};
+
+/* VMMC1 for MMC1 pins CMD, CLK, DAT0..DAT3 (20 mA, plus card == max 220 mA) */
+static struct regulator_init_data ldp_vmmc1 = {
+	.constraints = {
+		.min_uV			= 1850000,
+		.max_uV			= 3150000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &ldp_vmmc1_supply,
+};
+
 static struct twl4030_platform_data ldp_twldata = {
 	.irq_base	= TWL4030_IRQ_BASE,
 	.irq_end	= TWL4030_IRQ_END,
 
 	/* platform_data for children goes here */
+	.madc		= &ldp_madc_data,
+	.usb		= &ldp_usb_data,
+	.vmmc1		= &ldp_vmmc1,
 	.gpio		= &ldp_gpio_data,
+	.keypad		= &ldp_kp_twl4030_data,
 };
 
 static struct i2c_board_info __initdata ldp_i2c_boardinfo[] = {
@@ -177,6 +370,7 @@ static struct twl4030_hsmmc_info mmc[] __initdata = {
 static struct platform_device *ldp_devices[] __initdata = {
 	&ldp_smsc911x_device,
 	&ldp_lcd_device,
+	&ldp_gpio_keys_device,
 };
 
 static void __init omap_ldp_init(void)
@@ -185,9 +379,17 @@ static void __init omap_ldp_init(void)
 	platform_add_devices(ldp_devices, ARRAY_SIZE(ldp_devices));
 	omap_board_config = ldp_config;
 	omap_board_config_size = ARRAY_SIZE(ldp_config);
+	ts_gpio = 54;
+	ldp_spi_board_info[0].irq = gpio_to_irq(ts_gpio);
+	spi_register_board_info(ldp_spi_board_info,
+				ARRAY_SIZE(ldp_spi_board_info));
+	ads7846_dev_init();
 	omap_serial_init();
-	twl4030_mmc_init(mmc);
 	usb_musb_init();
+
+	twl4030_mmc_init(mmc);
+	/* link regulators to MMC adapters */
+	ldp_vmmc1_supply.dev = mmc[0].dev;
 }
 
 static void __init omap_ldp_map_io(void)
diff --git a/arch/arm/plat-omap/include/mach/keypad.h b/arch/arm/plat-omap/include/mach/keypad.h
index 232923a..45ea3ae 100644
--- a/arch/arm/plat-omap/include/mach/keypad.h
+++ b/arch/arm/plat-omap/include/mach/keypad.h
@@ -33,7 +33,11 @@ struct omap_kp_platform_data {
 #define GROUP_3		(3 << 16)
 #define GROUP_MASK	GROUP_3
 
+#define KEY_PERSISTENT		0x00800000
+#define KEYNUM_MASK		0x00EFFFFF
 #define KEY(col, row, val) (((col) << 28) | ((row) << 24) | (val))
+#define PERSISTENT_KEY(col, row) (((col) << 28) | ((row) << 24) | \
+						KEY_PERSISTENT)
 
 #endif
 
-- 
cgit v0.10.2


From b583f26d510ee1aef7348f38f1d959212f66b5e5 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 28 May 2009 14:04:03 -0700
Subject: ARM: OMAP3: mmc-twl4030 uses regulator framework

Decouple the HSMMC glue from the twl4030 as the only
regulator provider, using the regulator framework instead.
This makes the glue's "mmc-twl4030" name become a complete
misnomer ... this code could probably all migrate into the
HSMMC driver now.

Tested on 3430SDP (SD and low-voltage MMC) and Beagle (SD),
plus some other boards (including Overo) after they were
converted to set up MMC regulators properly.

Eventually all boards should just associate a regulator with
each MMC controller they use.  In some cases (Overo MMC2 and
Pandora MMC3, at least) that would be a fixed-voltage regulator
with no real software control.  As a temporary hack (pending
regulator-next updates to make the "fixed.c" regulator become
usable) there's a new ocr_mask field for those boards.

Patch updated with a fix for disabling vcc_aux by
Adrian Hunter <adrian.hunter@nokia.com>

Cc: Pierre Ossman <drzeus-list@drzeus.cx>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c
index dc40b3e..9756a87 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.c
+++ b/arch/arm/mach-omap2/mmc-twl4030.c
@@ -16,8 +16,8 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
-#include <linux/i2c/twl4030.h>
-#include <linux/regulator/machine.h>
+#include <linux/mmc/host.h>
+#include <linux/regulator/consumer.h>
 
 #include <mach/hardware.h>
 #include <mach/control.h>
@@ -26,31 +26,9 @@
 
 #include "mmc-twl4030.h"
 
-#if defined(CONFIG_TWL4030_CORE) && \
-	(defined(CONFIG_MMC_OMAP_HS) || defined(CONFIG_MMC_OMAP_HS_MODULE))
 
-#define LDO_CLR			0x00
-#define VSEL_S2_CLR		0x40
-
-#define VMMC1_DEV_GRP		0x27
-#define VMMC1_CLR		0x00
-#define VMMC1_315V		0x03
-#define VMMC1_300V		0x02
-#define VMMC1_285V		0x01
-#define VMMC1_185V		0x00
-#define VMMC1_DEDICATED		0x2A
-
-#define VMMC2_DEV_GRP		0x2B
-#define VMMC2_CLR		0x40
-#define VMMC2_315V		0x0c
-#define VMMC2_300V		0x0b
-#define VMMC2_285V		0x0a
-#define VMMC2_280V		0x09
-#define VMMC2_260V		0x08
-#define VMMC2_185V		0x06
-#define VMMC2_DEDICATED		0x2E
-
-#define VMMC_DEV_GRP_P1		0x20
+#if defined(CONFIG_REGULATOR) && \
+	(defined(CONFIG_MMC_OMAP_HS) || defined(CONFIG_MMC_OMAP_HS_MODULE))
 
 static u16 control_pbias_offset;
 static u16 control_devconf1_offset;
@@ -59,19 +37,16 @@ static u16 control_devconf1_offset;
 
 static struct twl_mmc_controller {
 	struct omap_mmc_platform_data	*mmc;
-	u8		twl_vmmc_dev_grp;
-	u8		twl_mmc_dedicated;
-	char		name[HSMMC_NAME_LEN + 1];
-} hsmmc[OMAP34XX_NR_MMC] = {
-	{
-		.twl_vmmc_dev_grp		= VMMC1_DEV_GRP,
-		.twl_mmc_dedicated		= VMMC1_DEDICATED,
-	},
-	{
-		.twl_vmmc_dev_grp		= VMMC2_DEV_GRP,
-		.twl_mmc_dedicated		= VMMC2_DEDICATED,
-	},
-};
+	/* Vcc == configured supply
+	 * Vcc_alt == optional
+	 *   -	MMC1, supply for DAT4..DAT7
+	 *   -	MMC2/MMC2, external level shifter voltage supply, for
+	 *	chip (SDIO, eMMC, etc) or transceiver (MMC2 only)
+	 */
+	struct regulator		*vcc;
+	struct regulator		*vcc_aux;
+	char				name[HSMMC_NAME_LEN + 1];
+} hsmmc[OMAP34XX_NR_MMC];
 
 static int twl_mmc_card_detect(int irq)
 {
@@ -117,16 +92,60 @@ static int twl_mmc_late_init(struct device *dev)
 	int ret = 0;
 	int i;
 
-	ret = gpio_request(mmc->slots[0].switch_pin, "mmc_cd");
-	if (ret)
-		goto done;
-	ret = gpio_direction_input(mmc->slots[0].switch_pin);
-	if (ret)
-		goto err;
+	/* MMC/SD/SDIO doesn't require a card detect switch */
+	if (gpio_is_valid(mmc->slots[0].switch_pin)) {
+		ret = gpio_request(mmc->slots[0].switch_pin, "mmc_cd");
+		if (ret)
+			goto done;
+		ret = gpio_direction_input(mmc->slots[0].switch_pin);
+		if (ret)
+			goto err;
+	}
 
+	/* require at least main regulator */
 	for (i = 0; i < ARRAY_SIZE(hsmmc); i++) {
 		if (hsmmc[i].name == mmc->slots[0].name) {
+			struct regulator *reg;
+
 			hsmmc[i].mmc = mmc;
+
+			reg = regulator_get(dev, "vmmc");
+			if (IS_ERR(reg)) {
+				dev_dbg(dev, "vmmc regulator missing\n");
+				/* HACK: until fixed.c regulator is usable,
+				 * we don't require a main regulator
+				 * for MMC2 or MMC3
+				 */
+				if (i != 0)
+					break;
+				ret = PTR_ERR(reg);
+				goto err;
+			}
+			hsmmc[i].vcc = reg;
+			mmc->slots[0].ocr_mask = mmc_regulator_get_ocrmask(reg);
+
+			/* allow an aux regulator */
+			reg = regulator_get(dev, "vmmc_aux");
+			hsmmc[i].vcc_aux = IS_ERR(reg) ? NULL : reg;
+
+			/* UGLY HACK:  workaround regulator framework bugs.
+			 * When the bootloader leaves a supply active, it's
+			 * initialized with zero usecount ... and we can't
+			 * disable it without first enabling it.  Until the
+			 * framework is fixed, we need a workaround like this
+			 * (which is safe for MMC, but not in general).
+			 */
+			if (regulator_is_enabled(hsmmc[i].vcc) > 0) {
+				regulator_enable(hsmmc[i].vcc);
+				regulator_disable(hsmmc[i].vcc);
+			}
+			if (hsmmc[i].vcc_aux) {
+				if (regulator_is_enabled(reg) > 0) {
+					regulator_enable(reg);
+					regulator_disable(reg);
+				}
+			}
+
 			break;
 		}
 	}
@@ -173,96 +192,6 @@ static int twl_mmc_resume(struct device *dev, int slot)
 #define twl_mmc_resume	NULL
 #endif
 
-/*
- * Sets the MMC voltage in twl4030
- */
-
-#define MMC1_OCR	(MMC_VDD_165_195 \
-		|MMC_VDD_28_29|MMC_VDD_29_30|MMC_VDD_30_31|MMC_VDD_31_32)
-#define MMC2_OCR	(MMC_VDD_165_195 \
-		|MMC_VDD_25_26|MMC_VDD_26_27|MMC_VDD_27_28 \
-		|MMC_VDD_28_29|MMC_VDD_29_30|MMC_VDD_30_31|MMC_VDD_31_32)
-
-static int twl_mmc_set_voltage(struct twl_mmc_controller *c, int vdd)
-{
-	int ret;
-	u8 vmmc = 0, dev_grp_val;
-
-	if (!vdd)
-		goto doit;
-
-	if (c->twl_vmmc_dev_grp == VMMC1_DEV_GRP) {
-		/* VMMC1:  max 220 mA.  And for 8-bit mode,
-		 * VSIM:  max 50 mA
-		 */
-		switch (1 << vdd) {
-		case MMC_VDD_165_195:
-			vmmc = VMMC1_185V;
-			/* and VSIM_180V */
-			break;
-		case MMC_VDD_28_29:
-			vmmc = VMMC1_285V;
-			/* and VSIM_280V */
-			break;
-		case MMC_VDD_29_30:
-		case MMC_VDD_30_31:
-			vmmc = VMMC1_300V;
-			/* and VSIM_300V */
-			break;
-		case MMC_VDD_31_32:
-			vmmc = VMMC1_315V;
-			/* error if VSIM needed */
-			break;
-		default:
-			return -EINVAL;
-		}
-	} else if (c->twl_vmmc_dev_grp == VMMC2_DEV_GRP) {
-		/* VMMC2:  max 100 mA */
-		switch (1 << vdd) {
-		case MMC_VDD_165_195:
-			vmmc = VMMC2_185V;
-			break;
-		case MMC_VDD_25_26:
-		case MMC_VDD_26_27:
-			vmmc = VMMC2_260V;
-			break;
-		case MMC_VDD_27_28:
-			vmmc = VMMC2_280V;
-			break;
-		case MMC_VDD_28_29:
-			vmmc = VMMC2_285V;
-			break;
-		case MMC_VDD_29_30:
-		case MMC_VDD_30_31:
-			vmmc = VMMC2_300V;
-			break;
-		case MMC_VDD_31_32:
-			vmmc = VMMC2_315V;
-			break;
-		default:
-			return -EINVAL;
-		}
-	} else {
-		return -EINVAL;
-	}
-
-doit:
-	if (vdd)
-		dev_grp_val = VMMC_DEV_GRP_P1;	/* Power up */
-	else
-		dev_grp_val = LDO_CLR;		/* Power down */
-
-	ret = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
-					dev_grp_val, c->twl_vmmc_dev_grp);
-	if (ret || !vdd)
-		return ret;
-
-	ret = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
-					vmmc, c->twl_mmc_dedicated);
-
-	return ret;
-}
-
 static int twl_mmc1_set_power(struct device *dev, int slot, int power_on,
 				int vdd)
 {
@@ -273,11 +202,13 @@ static int twl_mmc1_set_power(struct device *dev, int slot, int power_on,
 
 	/*
 	 * Assume we power both OMAP VMMC1 (for CMD, CLK, DAT0..3) and the
-	 * card using the same TWL VMMC1 supply (hsmmc[0]); OMAP has both
+	 * card with Vcc regulator (from twl4030 or whatever).  OMAP has both
 	 * 1.8V and 3.0V modes, controlled by the PBIAS register.
 	 *
 	 * In 8-bit modes, OMAP VMMC1A (for DAT4..7) needs a supply, which
 	 * is most naturally TWL VSIM; those pins also use PBIAS.
+	 *
+	 * FIXME handle VMMC1A as needed ...
 	 */
 	if (power_on) {
 		if (cpu_is_omap2430()) {
@@ -300,7 +231,7 @@ static int twl_mmc1_set_power(struct device *dev, int slot, int power_on,
 		reg &= ~OMAP2_PBIASLITEPWRDNZ0;
 		omap_ctrl_writel(reg, control_pbias_offset);
 
-		ret = twl_mmc_set_voltage(c, vdd);
+		ret = mmc_regulator_set_ocr(c->vcc, vdd);
 
 		/* 100ms delay required for PBIAS configuration */
 		msleep(100);
@@ -316,7 +247,7 @@ static int twl_mmc1_set_power(struct device *dev, int slot, int power_on,
 		reg &= ~OMAP2_PBIASLITEPWRDNZ0;
 		omap_ctrl_writel(reg, control_pbias_offset);
 
-		ret = twl_mmc_set_voltage(c, 0);
+		ret = mmc_regulator_set_ocr(c->vcc, 0);
 
 		/* 100ms delay required for PBIAS configuration */
 		msleep(100);
@@ -329,19 +260,33 @@ static int twl_mmc1_set_power(struct device *dev, int slot, int power_on,
 	return ret;
 }
 
-static int twl_mmc2_set_power(struct device *dev, int slot, int power_on, int vdd)
+static int twl_mmc23_set_power(struct device *dev, int slot, int power_on, int vdd)
 {
-	int ret;
+	int ret = 0;
 	struct twl_mmc_controller *c = &hsmmc[1];
 	struct omap_mmc_platform_data *mmc = dev->platform_data;
 
+	/* If we don't see a Vcc regulator, assume it's a fixed
+	 * voltage always-on regulator.
+	 */
+	if (!c->vcc)
+		return 0;
+
 	/*
-	 * Assume TWL VMMC2 (hsmmc[1]) is used only to power the card ... OMAP
+	 * Assume Vcc regulator is used only to power the card ... OMAP
 	 * VDDS is used to power the pins, optionally with a transceiver to
 	 * support cards using voltages other than VDDS (1.8V nominal).  When a
 	 * transceiver is used, DAT3..7 are muxed as transceiver control pins.
+	 *
+	 * In some cases this regulator won't support enable/disable;
+	 * e.g. it's a fixed rail for a WLAN chip.
+	 *
+	 * In other cases vcc_aux switches interface power.  Example, for
+	 * eMMC cards it represents VccQ.  Sometimes transceivers or SDIO
+	 * chips/cards need an interface voltage rail too.
 	 */
 	if (power_on) {
+		/* only MMC2 supports a CLKIN */
 		if (mmc->slots[0].internal_clock) {
 			u32 reg;
 
@@ -349,24 +294,23 @@ static int twl_mmc2_set_power(struct device *dev, int slot, int power_on, int vd
 			reg |= OMAP2_MMCSDIO2ADPCLKISEL;
 			omap_ctrl_writel(reg, control_devconf1_offset);
 		}
-		ret = twl_mmc_set_voltage(c, vdd);
+		ret = mmc_regulator_set_ocr(c->vcc, vdd);
+		/* enable interface voltage rail, if needed */
+		if (ret == 0 && c->vcc_aux) {
+			ret = regulator_enable(c->vcc_aux);
+			if (ret < 0)
+				ret = mmc_regulator_set_ocr(c->vcc, 0);
+		}
 	} else {
-		ret = twl_mmc_set_voltage(c, 0);
+		if (c->vcc_aux && (ret = regulator_is_enabled(c->vcc_aux)) > 0)
+			ret = regulator_disable(c->vcc_aux);
+		if (ret == 0)
+			ret = mmc_regulator_set_ocr(c->vcc, 0);
 	}
 
 	return ret;
 }
 
-static int twl_mmc3_set_power(struct device *dev, int slot, int power_on,
-		int vdd)
-{
-	/*
-	 * Assume MMC3 has self-powered device connected, for example on-board
-	 * chip with external power source.
-	 */
-	return 0;
-}
-
 static struct omap_mmc_platform_data *hsmmc_data[OMAP34XX_NR_MMC] __initdata;
 
 void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
@@ -412,10 +356,10 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		mmc->slots[0].wires = c->wires;
 		mmc->slots[0].internal_clock = !c->ext_clock;
 		mmc->dma_mask = 0xffffffff;
+		mmc->init = twl_mmc_late_init;
 
-		/* note: twl4030 card detect GPIOs normally switch VMMCx ... */
+		/* note: twl4030 card detect GPIOs can disable VMMCx ... */
 		if (gpio_is_valid(c->gpio_cd)) {
-			mmc->init = twl_mmc_late_init;
 			mmc->cleanup = twl_mmc_cleanup;
 			mmc->suspend = twl_mmc_suspend;
 			mmc->resume = twl_mmc_resume;
@@ -439,26 +383,28 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		} else
 			mmc->slots[0].gpio_wp = -EINVAL;
 
-		/* NOTE:  we assume OMAP's MMC1 and MMC2 use
-		 * the TWL4030's VMMC1 and VMMC2, respectively;
-		 * and that MMC3 device has it's own power source.
+		/* NOTE:  MMC slots should have a Vcc regulator set up.
+		 * This may be from a TWL4030-family chip, another
+		 * controllable regulator, or a fixed supply.
+		 *
+		 * temporary HACK: ocr_mask instead of fixed supply
 		 */
+		mmc->slots[0].ocr_mask = c->ocr_mask;
 
 		switch (c->mmc) {
 		case 1:
+			/* on-chip level shifting via PBIAS0/PBIAS1 */
 			mmc->slots[0].set_power = twl_mmc1_set_power;
-			mmc->slots[0].ocr_mask = MMC1_OCR;
 			break;
 		case 2:
-			mmc->slots[0].set_power = twl_mmc2_set_power;
-			if (c->transceiver)
-				mmc->slots[0].ocr_mask = MMC2_OCR;
-			else
-				mmc->slots[0].ocr_mask = MMC_VDD_165_195;
-			break;
+			if (c->ext_clock)
+				c->transceiver = 1;
+			if (c->transceiver && c->wires > 4)
+				c->wires = 4;
+			/* FALLTHROUGH */
 		case 3:
-			mmc->slots[0].set_power = twl_mmc3_set_power;
-			mmc->slots[0].ocr_mask = MMC_VDD_165_195;
+			/* off-chip level shifting, or none */
+			mmc->slots[0].set_power = twl_mmc23_set_power;
 			break;
 		default:
 			pr_err("MMC%d configuration not supported!\n", c->mmc);
diff --git a/arch/arm/mach-omap2/mmc-twl4030.h b/arch/arm/mach-omap2/mmc-twl4030.h
index ea59e86..3807c45 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.h
+++ b/arch/arm/mach-omap2/mmc-twl4030.h
@@ -16,9 +16,10 @@ struct twl4030_hsmmc_info {
 	int	gpio_wp;	/* or -EINVAL */
 	char	*name;		/* or NULL for default */
 	struct device *dev;	/* returned: pointer to mmc adapter */
+	int	ocr_mask;	/* temporary HACK */
 };
 
-#if	defined(CONFIG_TWL4030_CORE) && \
+#if defined(CONFIG_REGULATOR) && \
 	(defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE) || \
 	 defined(CONFIG_MMC_OMAP_HS) || defined(CONFIG_MMC_OMAP_HS_MODULE))
 
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e62a22a..2f19c63 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1073,7 +1073,6 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 	mmc->max_seg_size = mmc->max_req_size;
 
-	mmc->ocr_avail = mmc_slot(host).ocr_mask;
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED;
 
 	if (pdata->slots[host->slot_id].wires >= 8)
@@ -1110,13 +1109,14 @@ static int __init omap_mmc_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
+	/* initialize power supplies, gpios, etc */
 	if (pdata->init != NULL) {
 		if (pdata->init(&pdev->dev) != 0) {
-			dev_dbg(mmc_dev(host->mmc),
-				"Unable to configure MMC IRQs\n");
+			dev_dbg(mmc_dev(host->mmc), "late init error\n");
 			goto err_irq_cd_init;
 		}
 	}
+	mmc->ocr_avail = mmc_slot(host).ocr_mask;
 
 	/* Request IRQ for card detect */
 	if ((mmc_slot(host).card_detect_irq)) {
-- 
cgit v0.10.2


From bb3b9d8eb97624956e7e3a9eba2c64576808b1dc Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 28 May 2009 14:04:03 -0700
Subject: ARM: OMAP3: Initialize regulators for Beagle and Overo

Initialize regulators for Beagle and Overo.

Patch is based on earlier patches posted to linux-omap mailing
list.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index f25082c..991ac9c 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -28,6 +28,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/nand.h>
 
+#include <linux/regulator/machine.h>
 #include <linux/i2c/twl4030.h>
 
 #include <mach/hardware.h>
@@ -120,6 +121,23 @@ static struct twl4030_hsmmc_info mmc[] = {
 	{}	/* Terminator */
 };
 
+static struct platform_device omap3_beagle_lcd_device = {
+	.name		= "omap3beagle_lcd",
+	.id		= -1,
+};
+
+static struct omap_lcd_config omap3_beagle_lcd_config __initdata = {
+	.ctrl_name	= "internal",
+};
+
+static struct regulator_consumer_supply beagle_vmmc1_supply = {
+	.supply			= "vmmc",
+};
+
+static struct regulator_consumer_supply beagle_vsim_supply = {
+	.supply			= "vmmc_aux",
+};
+
 static struct gpio_led gpio_leds[];
 
 static int beagle_twl_gpio_setup(struct device *dev,
@@ -130,6 +148,10 @@ static int beagle_twl_gpio_setup(struct device *dev,
 	mmc[0].gpio_cd = gpio + 0;
 	twl4030_mmc_init(mmc);
 
+	/* link regulators to MMC adapters */
+	beagle_vmmc1_supply.dev = mmc[0].dev;
+	beagle_vsim_supply.dev = mmc[0].dev;
+
 	/* REVISIT: need ehci-omap hooks for external VBUS
 	 * power switch and overcurrent detect
 	 */
@@ -158,12 +180,85 @@ static struct twl4030_gpio_platform_data beagle_gpio_data = {
 	.setup		= beagle_twl_gpio_setup,
 };
 
+static struct regulator_consumer_supply beagle_vdac_supply = {
+	.supply		= "vdac",
+	.dev		= &omap3_beagle_lcd_device.dev,
+};
+
+static struct regulator_consumer_supply beagle_vdvi_supply = {
+	.supply		= "vdvi",
+	.dev		= &omap3_beagle_lcd_device.dev,
+};
+
+/* VMMC1 for MMC1 pins CMD, CLK, DAT0..DAT3 (20 mA, plus card == max 220 mA) */
+static struct regulator_init_data beagle_vmmc1 = {
+	.constraints = {
+		.min_uV			= 1850000,
+		.max_uV			= 3150000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &beagle_vmmc1_supply,
+};
+
+/* VSIM for MMC1 pins DAT4..DAT7 (2 mA, plus card == max 50 mA) */
+static struct regulator_init_data beagle_vsim = {
+	.constraints = {
+		.min_uV			= 1800000,
+		.max_uV			= 3000000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &beagle_vsim_supply,
+};
+
+/* VDAC for DSS driving S-Video (8 mA unloaded, max 65 mA) */
+static struct regulator_init_data beagle_vdac = {
+	.constraints = {
+		.min_uV			= 1800000,
+		.max_uV			= 1800000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &beagle_vdac_supply,
+};
+
+/* VPLL2 for digital video outputs */
+static struct regulator_init_data beagle_vpll2 = {
+	.constraints = {
+		.name			= "VDVI",
+		.min_uV			= 1800000,
+		.max_uV			= 1800000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &beagle_vdvi_supply,
+};
+
 static struct twl4030_platform_data beagle_twldata = {
 	.irq_base	= TWL4030_IRQ_BASE,
 	.irq_end	= TWL4030_IRQ_END,
 
 	/* platform_data for children goes here */
 	.gpio		= &beagle_gpio_data,
+	.vmmc1		= &beagle_vmmc1,
+	.vsim		= &beagle_vsim,
+	.vdac		= &beagle_vdac,
+	.vpll2		= &beagle_vpll2,
 };
 
 static struct i2c_board_info __initdata beagle_i2c_boardinfo[] = {
@@ -195,15 +290,6 @@ static void __init omap3_beagle_init_irq(void)
 	omap_gpio_init();
 }
 
-static struct platform_device omap3_beagle_lcd_device = {
-	.name		= "omap3beagle_lcd",
-	.id		= -1,
-};
-
-static struct omap_lcd_config omap3_beagle_lcd_config __initdata = {
-	.ctrl_name	= "internal",
-};
-
 static struct gpio_led gpio_leds[] = {
 	{
 		.name			= "beagleboard::usr0",
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index c7443ff..dff5528 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c/twl4030.h>
+#include <linux/regulator/machine.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -272,21 +273,76 @@ static struct omap_uart_config overo_uart_config __initdata = {
 	.enabled_uarts	= ((1 << 0) | (1 << 1) | (1 << 2)),
 };
 
+static struct twl4030_hsmmc_info mmc[] = {
+	{
+		.mmc		= 1,
+		.wires		= 4,
+		.gpio_cd	= -EINVAL,
+		.gpio_wp	= -EINVAL,
+	},
+	{
+		.mmc		= 2,
+		.wires		= 4,
+		.gpio_cd	= -EINVAL,
+		.gpio_wp	= -EINVAL,
+		.transceiver	= true,
+		.ocr_mask	= 0x00100000,	/* 3.3V */
+	},
+	{}	/* Terminator */
+};
+
+static struct regulator_consumer_supply overo_vmmc1_supply = {
+	.supply			= "vmmc",
+};
+
+static int overo_twl_gpio_setup(struct device *dev,
+		unsigned gpio, unsigned ngpio)
+{
+	twl4030_mmc_init(mmc);
+
+	overo_vmmc1_supply.dev = mmc[0].dev;
+
+	return 0;
+}
+
 static struct twl4030_gpio_platform_data overo_gpio_data = {
 	.gpio_base	= OMAP_MAX_GPIO_LINES,
 	.irq_base	= TWL4030_GPIO_IRQ_BASE,
 	.irq_end	= TWL4030_GPIO_IRQ_END,
+	.setup		= overo_twl_gpio_setup,
+};
+
+static struct twl4030_usb_data overo_usb_data = {
+	.usb_mode	= T2_USB_MODE_ULPI,
+};
+
+static struct regulator_init_data overo_vmmc1 = {
+	.constraints = {
+		.min_uV			= 1850000,
+		.max_uV			= 3150000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &overo_vmmc1_supply,
 };
 
+/* mmc2 (WLAN) and Bluetooth don't use twl4030 regulators */
+
 static struct twl4030_platform_data overo_twldata = {
 	.irq_base	= TWL4030_IRQ_BASE,
 	.irq_end	= TWL4030_IRQ_END,
 	.gpio		= &overo_gpio_data,
+	.usb		= &overo_usb_data,
+	.vmmc1		= &overo_vmmc1,
 };
 
 static struct i2c_board_info __initdata overo_i2c_boardinfo[] = {
 	{
-		I2C_BOARD_INFO("twl4030", 0x48),
+		I2C_BOARD_INFO("tps65950", 0x48),
 		.flags = I2C_CLIENT_WAKE,
 		.irq = INT_34XX_SYS_NIRQ,
 		.platform_data = &overo_twldata,
@@ -327,23 +383,6 @@ static struct platform_device *overo_devices[] __initdata = {
 	&overo_lcd_device,
 };
 
-static struct twl4030_hsmmc_info mmc[] __initdata = {
-	{
-		.mmc		= 1,
-		.wires		= 4,
-		.gpio_cd	= -EINVAL,
-		.gpio_wp	= -EINVAL,
-	},
-	{
-		.mmc		= 2,
-		.wires		= 4,
-		.gpio_cd	= -EINVAL,
-		.gpio_wp	= -EINVAL,
-		.transceiver	= true,
-	},
-	{}	/* Terminator */
-};
-
 static void __init overo_init(void)
 {
 	overo_i2c_init();
@@ -351,7 +390,6 @@ static void __init overo_init(void)
 	omap_board_config = overo_config;
 	omap_board_config_size = ARRAY_SIZE(overo_config);
 	omap_serial_init();
-	twl4030_mmc_init(mmc);
 	overo_flash_init();
 	usb_musb_init();
 	overo_ads7846_init();
-- 
cgit v0.10.2


From 64f535a87ce103fae592f2cce0866b4a45903ed6 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 28 May 2009 14:04:04 -0700
Subject: ARM: OMAP3: pandora: setup regulator framework for MMC

Setup regulators for MMC1 and MMC2 to get those SD slots
working again.

Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
CC: David Brownell <david-b@pacbell.net>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 3571bb4..2ab2497 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -23,6 +23,7 @@
 
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
+#include <linux/regulator/machine.h>
 #include <linux/i2c/twl4030.h>
 
 #include <asm/mach-types.h>
@@ -70,6 +71,14 @@ static struct omap_uart_config omap3pandora_uart_config __initdata = {
 	.enabled_uarts	= (1 << 2), /* UART3 */
 };
 
+static struct regulator_consumer_supply pandora_vmmc1_supply = {
+	.supply			= "vmmc",
+};
+
+static struct regulator_consumer_supply pandora_vmmc2_supply = {
+	.supply			= "vmmc",
+};
+
 static int omap3pandora_twl_gpio_setup(struct device *dev,
 		unsigned gpio, unsigned ngpio)
 {
@@ -78,6 +87,10 @@ static int omap3pandora_twl_gpio_setup(struct device *dev,
 	omap3pandora_mmc[1].gpio_cd = gpio + 1;
 	twl4030_mmc_init(omap3pandora_mmc);
 
+	/* link regulators to MMC adapters */
+	pandora_vmmc1_supply.dev = omap3pandora_mmc[0].dev;
+	pandora_vmmc2_supply.dev = omap3pandora_mmc[1].dev;
+
 	return 0;
 }
 
@@ -88,6 +101,36 @@ static struct twl4030_gpio_platform_data omap3pandora_gpio_data = {
 	.setup		= omap3pandora_twl_gpio_setup,
 };
 
+/* VMMC1 for MMC1 pins CMD, CLK, DAT0..DAT3 (20 mA, plus card == max 220 mA) */
+static struct regulator_init_data pandora_vmmc1 = {
+	.constraints = {
+		.min_uV			= 1850000,
+		.max_uV			= 3150000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &pandora_vmmc1_supply,
+};
+
+/* VMMC2 for MMC2 pins CMD, CLK, DAT0..DAT3 (max 100 mA) */
+static struct regulator_init_data pandora_vmmc2 = {
+	.constraints = {
+		.min_uV			= 1850000,
+		.max_uV			= 3150000,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &pandora_vmmc2_supply,
+};
+
 static struct twl4030_usb_data omap3pandora_usb_data = {
 	.usb_mode	= T2_USB_MODE_ULPI,
 };
@@ -97,6 +140,8 @@ static struct twl4030_platform_data omap3pandora_twldata = {
 	.irq_end	= TWL4030_IRQ_END,
 	.gpio		= &omap3pandora_gpio_data,
 	.usb		= &omap3pandora_usb_data,
+	.vmmc1		= &pandora_vmmc1,
+	.vmmc2		= &pandora_vmmc2,
 };
 
 static struct i2c_board_info __initdata omap3pandora_i2c_boardinfo[] = {
-- 
cgit v0.10.2


From f52eeee83d360d536cab1c0296eae0ec5f05e4dd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Thu, 28 May 2009 14:04:04 -0700
Subject: ARM: OMAP3: RX51: Connect VAUX3 to MMC2

Connect VAUX3 to MMC2

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 233c745..da93b86 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -32,6 +32,9 @@
 
 #include "mmc-twl4030.h"
 
+#define SYSTEM_REV_B_USES_VAUX3	0x1699
+#define SYSTEM_REV_S_USES_VAUX3 0x8
+
 static int rx51_keymap[] = {
 	KEY(0, 0, KEY_Q),
 	KEY(0, 1, KEY_W),
@@ -147,7 +150,7 @@ static struct regulator_init_data rx51_vaux2 = {
 };
 
 /* VAUX3 - adds more power to VIO_18 rail */
-static struct regulator_init_data rx51_vaux3 = {
+static struct regulator_init_data rx51_vaux3_cam = {
 	.constraints = {
 		.name			= "VCAM_DIG_18",
 		.min_uV			= 1800000,
@@ -160,6 +163,22 @@ static struct regulator_init_data rx51_vaux3 = {
 	},
 };
 
+static struct regulator_init_data rx51_vaux3_mmc = {
+	.constraints = {
+		.name			= "VMMC2_30",
+		.min_uV			= 2800000,
+		.max_uV			= 3000000,
+		.apply_uV		= true,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_VOLTAGE
+					| REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &rx51_vmmc2_supply,
+};
+
 static struct regulator_init_data rx51_vaux4 = {
 	.constraints = {
 		.name			= "VCAM_ANA_28",
@@ -270,10 +289,8 @@ static struct twl4030_platform_data rx51_twldata = {
 
 	.vaux1			= &rx51_vaux1,
 	.vaux2			= &rx51_vaux2,
-	.vaux3			= &rx51_vaux3,
 	.vaux4			= &rx51_vaux4,
 	.vmmc1			= &rx51_vmmc1,
-	.vmmc2			= &rx51_vmmc2,
 	.vsim			= &rx51_vsim,
 	.vdac			= &rx51_vdac,
 };
@@ -289,6 +306,13 @@ static struct i2c_board_info __initdata rx51_peripherals_i2c_board_info_1[] = {
 
 static int __init rx51_i2c_init(void)
 {
+	if ((system_rev >= SYSTEM_REV_S_USES_VAUX3 && system_rev < 0x100) ||
+	    system_rev >= SYSTEM_REV_B_USES_VAUX3)
+		rx51_twldata.vaux3 = &rx51_vaux3_mmc;
+	else {
+		rx51_twldata.vaux3 = &rx51_vaux3_cam;
+		rx51_twldata.vmmc2 = &rx51_vmmc2;
+	}
 	omap_register_i2c_bus(1, 2600, rx51_peripherals_i2c_board_info_1,
 			ARRAY_SIZE(rx51_peripherals_i2c_board_info_1));
 	omap_register_i2c_bus(2, 100, NULL, 0);
-- 
cgit v0.10.2


From 577145f45487b41b4d47eeaedb9d37e494f80715 Mon Sep 17 00:00:00 2001
From: Vikram Pandita <vikram.pandita@ti.com>
Date: Thu, 28 May 2009 14:04:04 -0700
Subject: ARM: OMAP3: Add support for OMAP3 Zoom2 board

This patch creates the minimal OMAP3 Zoom2 board support.

Signed-off-by: Mikkel Christensen <mlc@ti.com>
Signed-off-by: Vikram Pandita <vikram.pandita@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 64ab386..f6cf903 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -67,3 +67,7 @@ config MACH_OMAP_3430SDP
 config MACH_NOKIA_RX51
 	bool "Nokia RX-51 board"
 	depends on ARCH_OMAP3 && ARCH_OMAP34XX
+
+config MACH_OMAP_ZOOM2
+	bool "OMAP3 Zoom2 board"
+	depends on ARCH_OMAP3 && ARCH_OMAP34XX
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index e4f6f61..4cdb318 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -53,7 +53,9 @@ obj-$(CONFIG_MACH_OMAP_3430SDP)		+= board-3430sdp.o \
 obj-$(CONFIG_MACH_NOKIA_RX51)		+= board-rx51.o \
 					   board-rx51-peripherals.o \
 					   mmc-twl4030.o
-
+obj-$(CONFIG_MACH_OMAP_ZOOM2)		+= board-zoom2.o \
+					   mmc-twl4030.o \
+					   board-zoom-debugboard.o
 # Platform specific device init code
 ifeq ($(CONFIG_USB_MUSB_SOC),y)
 obj-y					+= usb-musb.o
diff --git a/arch/arm/mach-omap2/board-zoom-debugboard.c b/arch/arm/mach-omap2/board-zoom-debugboard.c
new file mode 100644
index 0000000..bac5c43
--- /dev/null
+++ b/arch/arm/mach-omap2/board-zoom-debugboard.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2009 Texas Instruments Inc.
+ * Mikkel Christensen <mlc@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/serial_8250.h>
+#include <linux/smsc911x.h>
+
+#include <mach/gpmc.h>
+
+#define ZOOM2_SMSC911X_CS	7
+#define ZOOM2_SMSC911X_GPIO	158
+#define ZOOM2_QUADUART_CS	3
+#define ZOOM2_QUADUART_GPIO	102
+#define QUART_CLK		1843200
+#define DEBUG_BASE		0x08000000
+#define ZOOM2_ETHR_START	DEBUG_BASE
+
+static struct resource zoom2_smsc911x_resources[] = {
+	[0] = {
+		.start	= ZOOM2_ETHR_START,
+		.end	= ZOOM2_ETHR_START + SZ_4K,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
+	},
+};
+
+static struct smsc911x_platform_config zoom2_smsc911x_config = {
+	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_OPEN_DRAIN,
+	.flags		= SMSC911X_USE_32BIT,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+};
+
+static struct platform_device zoom2_smsc911x_device = {
+	.name		= "smsc911x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(zoom2_smsc911x_resources),
+	.resource	= zoom2_smsc911x_resources,
+	.dev		= {
+		.platform_data = &zoom2_smsc911x_config,
+	},
+};
+
+static inline void __init zoom2_init_smsc911x(void)
+{
+	int eth_cs;
+	unsigned long cs_mem_base;
+	int eth_gpio = 0;
+
+	eth_cs = ZOOM2_SMSC911X_CS;
+
+	if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) {
+		printk(KERN_ERR "Failed to request GPMC mem for smsc911x\n");
+		return;
+	}
+
+	zoom2_smsc911x_resources[0].start = cs_mem_base + 0x0;
+	zoom2_smsc911x_resources[0].end   = cs_mem_base + 0xff;
+
+	eth_gpio = ZOOM2_SMSC911X_GPIO;
+
+	zoom2_smsc911x_resources[1].start = OMAP_GPIO_IRQ(eth_gpio);
+
+	if (gpio_request(eth_gpio, "smsc911x irq") < 0) {
+		printk(KERN_ERR "Failed to request GPIO%d for smsc911x IRQ\n",
+				eth_gpio);
+		return;
+	}
+	gpio_direction_input(eth_gpio);
+}
+
+static struct plat_serial8250_port serial_platform_data[] = {
+	{
+		.mapbase	= 0x10000000,
+		.irq		= OMAP_GPIO_IRQ(102),
+		.flags		= UPF_BOOT_AUTOCONF|UPF_IOREMAP|UPF_SHARE_IRQ,
+		.iotype		= UPIO_MEM,
+		.regshift	= 1,
+		.uartclk	= QUART_CLK,
+	}, {
+		.flags		= 0
+	}
+};
+
+static struct platform_device zoom2_debugboard_serial_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM1,
+	.dev			= {
+		.platform_data	= serial_platform_data,
+	},
+};
+
+static inline void __init zoom2_init_quaduart(void)
+{
+	int quart_cs;
+	unsigned long cs_mem_base;
+	int quart_gpio = 0;
+
+	quart_cs = ZOOM2_QUADUART_CS;
+
+	if (gpmc_cs_request(quart_cs, SZ_1M, &cs_mem_base) < 0) {
+		printk(KERN_ERR "Failed to request GPMC mem"
+				"for Quad UART(TL16CP754C)\n");
+		return;
+	}
+
+	quart_gpio = ZOOM2_QUADUART_GPIO;
+
+	if (gpio_request(quart_gpio, "TL16CP754C GPIO") < 0) {
+		printk(KERN_ERR "Failed to request GPIO%d for TL16CP754C\n",
+								quart_gpio);
+		return;
+	}
+	gpio_direction_input(quart_gpio);
+}
+
+static inline int omap_zoom2_debugboard_detect(void)
+{
+	int debug_board_detect = 0;
+
+	debug_board_detect = ZOOM2_SMSC911X_GPIO;
+
+	if (gpio_request(debug_board_detect, "Zoom2 debug board detect") < 0) {
+		printk(KERN_ERR "Failed to request GPIO%d for Zoom2 debug"
+		"board detect\n", debug_board_detect);
+		return 0;
+	}
+	gpio_direction_input(debug_board_detect);
+
+	if (!gpio_get_value(debug_board_detect)) {
+		gpio_free(debug_board_detect);
+		return 0;
+	}
+	return 1;
+}
+
+static struct platform_device *zoom2_devices[] __initdata = {
+	&zoom2_smsc911x_device,
+	&zoom2_debugboard_serial_device,
+};
+
+int __init omap_zoom2_debugboard_init(void)
+{
+	if (!omap_zoom2_debugboard_detect())
+		return 0;
+
+	zoom2_init_smsc911x();
+	zoom2_init_quaduart();
+	return platform_add_devices(zoom2_devices, ARRAY_SIZE(zoom2_devices));
+}
diff --git a/arch/arm/mach-omap2/board-zoom2.c b/arch/arm/mach-omap2/board-zoom2.c
new file mode 100644
index 0000000..bcc0f76
--- /dev/null
+++ b/arch/arm/mach-omap2/board-zoom2.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2009 Texas Instruments Inc.
+ * Mikkel Christensen <mlc@ti.com>
+ *
+ * Modified from mach-omap2/board-ldp.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/i2c/twl4030.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+#include <mach/common.h>
+#include <mach/usb.h>
+
+#include "mmc-twl4030.h"
+
+static void __init omap_zoom2_init_irq(void)
+{
+	omap2_init_common_hw(NULL);
+	omap_init_irq();
+	omap_gpio_init();
+}
+
+static struct omap_uart_config zoom2_uart_config __initdata = {
+	.enabled_uarts	= ((1 << 0) | (1 << 1) | (1 << 2)),
+};
+
+static struct omap_board_config_kernel zoom2_config[] __initdata = {
+	{ OMAP_TAG_UART,	&zoom2_uart_config },
+};
+
+static struct twl4030_gpio_platform_data zoom2_gpio_data = {
+	.gpio_base	= OMAP_MAX_GPIO_LINES,
+	.irq_base	= TWL4030_GPIO_IRQ_BASE,
+	.irq_end	= TWL4030_GPIO_IRQ_END,
+};
+
+static struct twl4030_platform_data zoom2_twldata = {
+	.irq_base	= TWL4030_IRQ_BASE,
+	.irq_end	= TWL4030_IRQ_END,
+
+	/* platform_data for children goes here */
+	.gpio		= &zoom2_gpio_data,
+};
+
+static struct i2c_board_info __initdata zoom2_i2c_boardinfo[] = {
+	{
+		I2C_BOARD_INFO("twl4030", 0x48),
+		.flags		= I2C_CLIENT_WAKE,
+		.irq		= INT_34XX_SYS_NIRQ,
+		.platform_data	= &zoom2_twldata,
+	},
+};
+
+static int __init omap_i2c_init(void)
+{
+	omap_register_i2c_bus(1, 2600, zoom2_i2c_boardinfo,
+			ARRAY_SIZE(zoom2_i2c_boardinfo));
+	omap_register_i2c_bus(2, 400, NULL, 0);
+	omap_register_i2c_bus(3, 400, NULL, 0);
+	return 0;
+}
+
+static struct twl4030_hsmmc_info mmc[] __initdata = {
+	{
+		.mmc		= 1,
+		.wires		= 4,
+		.gpio_cd	= -EINVAL,
+		.gpio_wp	= -EINVAL,
+	},
+	{}	/* Terminator */
+};
+
+extern int __init omap_zoom2_debugboard_init(void);
+
+static void __init omap_zoom2_init(void)
+{
+	omap_i2c_init();
+	omap_board_config = zoom2_config;
+	omap_board_config_size = ARRAY_SIZE(zoom2_config);
+	omap_serial_init();
+	omap_zoom2_debugboard_init();
+	twl4030_mmc_init(mmc);
+	usb_musb_init();
+}
+
+static void __init omap_zoom2_map_io(void)
+{
+	omap2_set_globals_343x();
+	omap2_map_common_io();
+}
+
+MACHINE_START(OMAP_ZOOM2, "OMAP Zoom2 board")
+	.phys_io	= 0x48000000,
+	.io_pg_offst	= ((0xd8000000) >> 18) & 0xfffc,
+	.boot_params	= 0x80000100,
+	.map_io		= omap_zoom2_map_io,
+	.init_irq	= omap_zoom2_init_irq,
+	.init_machine	= omap_zoom2_init,
+	.timer		= &omap_timer,
+MACHINE_END
-- 
cgit v0.10.2


From b969a2873d84b832a00e575c5197d02305399fe2 Mon Sep 17 00:00:00 2001
From: Vikram Pandita <vikram.pandita@ti.com>
Date: Thu, 28 May 2009 14:08:58 -0700
Subject: ARM: OMAP3: Defconfig for Zoom2 board

This patch adds OMAP3 Zoom2 board defconfig.

Signed-off-by: Mikkel Christensen <mlc@ti.com>
Signed-off-by: Vikram Pandita <vikram.pandita@ti.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/configs/omap_zoom2_defconfig b/arch/arm/configs/omap_zoom2_defconfig
new file mode 100644
index 0000000..213fe9c
--- /dev/null
+++ b/arch/arm/configs/omap_zoom2_defconfig
@@ -0,0 +1,1211 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.27-rc5
+# Fri Oct 10 11:49:41 2008
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_SUPPORTS_AOUT=y
+CONFIG_ZONE_DMA=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CGROUPS is not set
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_COMPAT_BRK=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+# CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is not set
+# CONFIG_HAVE_IOREMAP_PROT is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+# CONFIG_HAVE_ARCH_TRACEHOOK is not set
+# CONFIG_HAVE_DMA_ATTRS is not set
+# CONFIG_USE_GENERIC_SMP_HELPERS is not set
+CONFIG_HAVE_CLK=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_KMOD=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_CLASSIC_RCU=y
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS7500 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
+CONFIG_ARCH_OMAP=y
+# CONFIG_ARCH_MSM7X00A is not set
+
+#
+# TI OMAP Implementations
+#
+CONFIG_ARCH_OMAP_OTG=y
+# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_OMAP2 is not set
+CONFIG_ARCH_OMAP3=y
+
+#
+# OMAP Feature Selections
+#
+# CONFIG_OMAP_DEBUG_POWERDOMAIN is not set
+# CONFIG_OMAP_DEBUG_CLOCKDOMAIN is not set
+# CONFIG_OMAP_RESET_CLOCKS is not set
+CONFIG_OMAP_MUX=y
+CONFIG_OMAP_MUX_DEBUG=y
+CONFIG_OMAP_MUX_WARNINGS=y
+CONFIG_OMAP_MCBSP=y
+# CONFIG_OMAP_MPU_TIMER is not set
+CONFIG_OMAP_32K_TIMER=y
+CONFIG_OMAP_32K_TIMER_HZ=128
+CONFIG_OMAP_DM_TIMER=y
+# CONFIG_OMAP_LL_DEBUG_UART1 is not set
+# CONFIG_OMAP_LL_DEBUG_UART2 is not set
+CONFIG_OMAP_LL_DEBUG_UART3=y
+CONFIG_OMAP_SERIAL_WAKE=y
+CONFIG_ARCH_OMAP34XX=y
+CONFIG_ARCH_OMAP3430=y
+
+#
+# OMAP Board Type
+#
+# CONFIG_MACH_OMAP3_BEAGLE is not set
+# CONFIG_MACH_OMAP_LDP is not set
+CONFIG_MACH_OMAP_ZOOM2=y
+# CONFIG_MACH_OVERO is not set
+
+#
+# Boot options
+#
+
+#
+# Power management
+#
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_32v6K=y
+CONFIG_CPU_V7=y
+CONFIG_CPU_32v7=y
+CONFIG_CPU_ABRT_EV7=y
+CONFIG_CPU_PABRT_IFAR=y
+CONFIG_CPU_CACHE_V7=y
+CONFIG_CPU_CACHE_VIPT=y
+CONFIG_CPU_COPY_V6=y
+CONFIG_CPU_TLB_V7=y
+CONFIG_CPU_HAS_ASID=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_ARM_THUMBEE is not set
+# CONFIG_CPU_ICACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_BPREDICT_DISABLE is not set
+CONFIG_HAS_TLS_REG=y
+# CONFIG_OUTER_CACHE is not set
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+# CONFIG_PREEMPT is not set
+CONFIG_HZ=128
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+# CONFIG_LEDS is not set
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=/dev/nfs nfsroot=192.168.0.1:/home/user/buildroot ip=192.168.0.2:192.168.0.1:192.168.0.1:255.255.255.0:tgt:eth0:off rw console=ttyS2,115200n8"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+# CONFIG_FPE_NWFPE_XP is not set
+# CONFIG_FPE_FASTFPE is not set
+CONFIG_VFP=y
+CONFIG_VFPv3=y
+# CONFIG_NEON is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_AOUT is not set
+CONFIG_BINFMT_MISC=y
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+# CONFIG_XFRM_SUB_POLICY is not set
+CONFIG_XFRM_MIGRATE=y
+# CONFIG_XFRM_STATISTICS is not set
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+CONFIG_SMSC_PHY=y
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_SMC91X is not set
+# CONFIG_DM9000 is not set
+# CONFIG_ENC28J60 is not set
+# CONFIG_SMC911X is not set
+CONFIG_SMSC911X=y
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+CONFIG_NETDEV_1000=y
+CONFIG_NETDEV_10000=y
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_ADS7846=y
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_UCB1400 is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_NVRAM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_OMAP=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_ISP1301_OMAP is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+# CONFIG_SPI_BITBANG is not set
+CONFIG_SPI_OMAP24XX=y
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
+CONFIG_W1=y
+
+#
+# 1-wire Bus Masters
+#
+# CONFIG_W1_MASTER_DS2482 is not set
+# CONFIG_W1_MASTER_DS1WM is not set
+# CONFIG_W1_MASTER_GPIO is not set
+
+#
+# 1-wire Slaves
+#
+# CONFIG_W1_SLAVE_THERM is not set
+# CONFIG_W1_SLAVE_SMEM is not set
+# CONFIG_W1_SLAVE_DS2433 is not set
+# CONFIG_W1_SLAVE_DS2760 is not set
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+# CONFIG_PDA_POWER is not set
+# CONFIG_BATTERY_DS2760 is not set
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+# CONFIG_SND_SEQUENCER is not set
+# CONFIG_SND_MIXER_OSS is not set
+# CONFIG_SND_PCM_OSS is not set
+# CONFIG_SND_DYNAMIC_MINORS is not set
+CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_VERBOSE_PROCFS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_DRIVERS=y
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+CONFIG_SND_ARM=y
+CONFIG_SND_SPI=y
+# CONFIG_SND_SOC is not set
+# CONFIG_SOUND_PRIME is not set
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD Host Controller Drivers
+#
+# CONFIG_MMC_SDHCI is not set
+# CONFIG_MMC_OMAP is not set
+# CONFIG_MMC_SPI is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_R9701 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_DMADEVICES is not set
+
+#
+# Voltage and Current regulators
+#
+# CONFIG_REGULATOR is not set
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
+# CONFIG_REGULATOR_BQ24022 is not set
+# CONFIG_UIO is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_FS_XATTR is not set
+# CONFIG_EXT4DEV_FS is not set
+CONFIG_JBD=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_QUOTA=y
+CONFIG_PRINT_QUOTA_WARNING=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_QUOTACTL=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SYSV68_PARTITION is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_HAVE_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+# CONFIG_FTRACE is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_DEBUG_LL=y
+# CONFIG_DEBUG_ICEDCC is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_MANAGER=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=m
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_GENERIC_FIND_FIRST_BIT is not set
+# CONFIG_GENERIC_FIND_NEXT_BIT is not set
+CONFIG_CRC_CCITT=y
+# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From 53c5ec31e775d40e428a577de561211e11b08d74 Mon Sep 17 00:00:00 2001
From: Syed Mohammed Khasim <khasim@ti.com>
Date: Thu, 28 May 2009 14:13:28 -0700
Subject: ARM: OMAP3: Add omap3 EVM support

Add omap3 EVM support

Signed-off-by: Syed Mohammed Khasim <khasim@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index f6cf903..b5c9088 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -56,6 +56,10 @@ config MACH_OVERO
 	bool "Gumstix Overo board"
 	depends on ARCH_OMAP3 && ARCH_OMAP34XX
 
+config MACH_OMAP3EVM
+	bool "OMAP 3530 EVM board"
+	depends on ARCH_OMAP3 && ARCH_OMAP34XX
+
 config MACH_OMAP3_PANDORA
 	bool "OMAP3 Pandora"
 	depends on ARCH_OMAP3 && ARCH_OMAP34XX
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 4cdb318..10e7c29 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -45,6 +45,8 @@ obj-$(CONFIG_MACH_OMAP_LDP)		+= board-ldp.o \
 					   mmc-twl4030.o
 obj-$(CONFIG_MACH_OVERO)		+= board-overo.o \
 					   mmc-twl4030.o
+obj-$(CONFIG_MACH_OMAP3EVM)		+= board-omap3evm.o \
+					   mmc-twl4030.o
 obj-$(CONFIG_MACH_OMAP3_PANDORA)	+= board-omap3pandora.o \
 					   mmc-twl4030.o
 obj-$(CONFIG_MACH_OMAP_3430SDP)		+= board-3430sdp.o \
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
new file mode 100644
index 0000000..d3cc145
--- /dev/null
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -0,0 +1,329 @@
+/*
+ * linux/arch/arm/mach-omap2/board-omap3evm.c
+ *
+ * Copyright (C) 2008 Texas Instruments
+ *
+ * Modified from mach-omap2/board-3430sdp.c
+ *
+ * Initial code: Syed Mohammed Khasim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/leds.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/ads7846.h>
+#include <linux/i2c/twl4030.h>
+
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <mach/board.h>
+#include <mach/mux.h>
+#include <mach/usb.h>
+#include <mach/common.h>
+#include <mach/mcspi.h>
+#include <mach/keypad.h>
+
+#include "sdram-micron-mt46h32m32lf-6.h"
+#include "mmc-twl4030.h"
+
+#define OMAP3_EVM_TS_GPIO	175
+
+#define OMAP3EVM_ETHR_START	0x2c000000
+#define OMAP3EVM_ETHR_SIZE	1024
+#define OMAP3EVM_ETHR_GPIO_IRQ	176
+#define OMAP3EVM_SMC911X_CS	5
+
+static struct resource omap3evm_smc911x_resources[] = {
+	[0] =	{
+		.start	= OMAP3EVM_ETHR_START,
+		.end	= (OMAP3EVM_ETHR_START + OMAP3EVM_ETHR_SIZE - 1),
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] =	{
+		.start	= OMAP_GPIO_IRQ(OMAP3EVM_ETHR_GPIO_IRQ),
+		.end	= OMAP_GPIO_IRQ(OMAP3EVM_ETHR_GPIO_IRQ),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device omap3evm_smc911x_device = {
+	.name		= "smc911x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(omap3evm_smc911x_resources),
+	.resource	= &omap3evm_smc911x_resources[0],
+};
+
+static inline void __init omap3evm_init_smc911x(void)
+{
+	int eth_cs;
+	struct clk *l3ck;
+	unsigned int rate;
+
+	eth_cs = OMAP3EVM_SMC911X_CS;
+
+	l3ck = clk_get(NULL, "l3_ck");
+	if (IS_ERR(l3ck))
+		rate = 100000000;
+	else
+		rate = clk_get_rate(l3ck);
+
+	if (gpio_request(OMAP3EVM_ETHR_GPIO_IRQ, "SMC911x irq") < 0) {
+		printk(KERN_ERR "Failed to request GPIO%d for smc911x IRQ\n",
+			OMAP3EVM_ETHR_GPIO_IRQ);
+		return;
+	}
+
+	gpio_direction_input(OMAP3EVM_ETHR_GPIO_IRQ);
+}
+
+static struct omap_uart_config omap3_evm_uart_config __initdata = {
+	.enabled_uarts	= ((1 << 0) | (1 << 1) | (1 << 2)),
+};
+
+static struct twl4030_hsmmc_info mmc[] = {
+	{
+		.mmc		= 1,
+		.wires		= 4,
+		.gpio_cd	= -EINVAL,
+		.gpio_wp	= 63,
+	},
+	{}	/* Terminator */
+};
+
+static struct gpio_led gpio_leds[] = {
+	{
+		.name			= "omap3evm::ledb",
+		/* normally not visible (board underside) */
+		.default_trigger	= "default-on",
+		.gpio			= -EINVAL,	/* gets replaced */
+		.active_low		= true,
+	},
+};
+
+static struct gpio_led_platform_data gpio_led_info = {
+	.leds		= gpio_leds,
+	.num_leds	= ARRAY_SIZE(gpio_leds),
+};
+
+static struct platform_device leds_gpio = {
+	.name	= "leds-gpio",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &gpio_led_info,
+	},
+};
+
+
+static int omap3evm_twl_gpio_setup(struct device *dev,
+		unsigned gpio, unsigned ngpio)
+{
+	/* gpio + 0 is "mmc0_cd" (input/IRQ) */
+	omap_cfg_reg(L8_34XX_GPIO63);
+	mmc[0].gpio_cd = gpio + 0;
+	twl4030_mmc_init(mmc);
+
+	/*
+	 * Most GPIOs are for USB OTG.  Some are mostly sent to
+	 * the P2 connector; notably LEDA for the LCD backlight.
+	 */
+
+	/* TWL4030_GPIO_MAX + 1 == ledB (out, active low LED) */
+	gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1;
+
+	platform_device_register(&leds_gpio);
+
+	return 0;
+}
+
+static struct twl4030_gpio_platform_data omap3evm_gpio_data = {
+	.gpio_base	= OMAP_MAX_GPIO_LINES,
+	.irq_base	= TWL4030_GPIO_IRQ_BASE,
+	.irq_end	= TWL4030_GPIO_IRQ_END,
+	.use_leds	= true,
+	.setup		= omap3evm_twl_gpio_setup,
+};
+
+static struct twl4030_usb_data omap3evm_usb_data = {
+	.usb_mode	= T2_USB_MODE_ULPI,
+};
+
+static int omap3evm_keymap[] = {
+	KEY(0, 0, KEY_LEFT),
+	KEY(0, 1, KEY_RIGHT),
+	KEY(0, 2, KEY_A),
+	KEY(0, 3, KEY_B),
+	KEY(1, 0, KEY_DOWN),
+	KEY(1, 1, KEY_UP),
+	KEY(1, 2, KEY_E),
+	KEY(1, 3, KEY_F),
+	KEY(2, 0, KEY_ENTER),
+	KEY(2, 1, KEY_I),
+	KEY(2, 2, KEY_J),
+	KEY(2, 3, KEY_K),
+	KEY(3, 0, KEY_M),
+	KEY(3, 1, KEY_N),
+	KEY(3, 2, KEY_O),
+	KEY(3, 3, KEY_P)
+};
+
+static struct twl4030_keypad_data omap3evm_kp_data = {
+	.rows		= 4,
+	.cols		= 4,
+	.keymap		= omap3evm_keymap,
+	.keymapsize	= ARRAY_SIZE(omap3evm_keymap),
+	.rep		= 1,
+};
+
+static struct twl4030_madc_platform_data omap3evm_madc_data = {
+	.irq_line	= 1,
+};
+
+static struct twl4030_platform_data omap3evm_twldata = {
+	.irq_base	= TWL4030_IRQ_BASE,
+	.irq_end	= TWL4030_IRQ_END,
+
+	/* platform_data for children goes here */
+	.keypad		= &omap3evm_kp_data,
+	.madc		= &omap3evm_madc_data,
+	.usb		= &omap3evm_usb_data,
+	.gpio		= &omap3evm_gpio_data,
+};
+
+static struct i2c_board_info __initdata omap3evm_i2c_boardinfo[] = {
+	{
+		I2C_BOARD_INFO("twl4030", 0x48),
+		.flags = I2C_CLIENT_WAKE,
+		.irq = INT_34XX_SYS_NIRQ,
+		.platform_data = &omap3evm_twldata,
+	},
+};
+
+static int __init omap3_evm_i2c_init(void)
+{
+	omap_register_i2c_bus(1, 2600, omap3evm_i2c_boardinfo,
+			ARRAY_SIZE(omap3evm_i2c_boardinfo));
+	omap_register_i2c_bus(2, 400, NULL, 0);
+	omap_register_i2c_bus(3, 400, NULL, 0);
+	return 0;
+}
+
+static struct platform_device omap3_evm_lcd_device = {
+	.name		= "omap3evm_lcd",
+	.id		= -1,
+};
+
+static struct omap_lcd_config omap3_evm_lcd_config __initdata = {
+	.ctrl_name	= "internal",
+};
+
+static void ads7846_dev_init(void)
+{
+	if (gpio_request(OMAP3_EVM_TS_GPIO, "ADS7846 pendown") < 0)
+		printk(KERN_ERR "can't get ads7846 pen down GPIO\n");
+
+	gpio_direction_input(OMAP3_EVM_TS_GPIO);
+
+	omap_set_gpio_debounce(OMAP3_EVM_TS_GPIO, 1);
+	omap_set_gpio_debounce_time(OMAP3_EVM_TS_GPIO, 0xa);
+}
+
+static int ads7846_get_pendown_state(void)
+{
+	return !gpio_get_value(OMAP3_EVM_TS_GPIO);
+}
+
+struct ads7846_platform_data ads7846_config = {
+	.x_max			= 0x0fff,
+	.y_max			= 0x0fff,
+	.x_plate_ohms		= 180,
+	.pressure_max		= 255,
+	.debounce_max		= 10,
+	.debounce_tol		= 3,
+	.debounce_rep		= 1,
+	.get_pendown_state	= ads7846_get_pendown_state,
+	.keep_vref_on		= 1,
+	.settle_delay_usecs	= 150,
+};
+
+static struct omap2_mcspi_device_config ads7846_mcspi_config = {
+	.turbo_mode	= 0,
+	.single_channel	= 1,	/* 0: slave, 1: master */
+};
+
+struct spi_board_info omap3evm_spi_board_info[] = {
+	[0] = {
+		.modalias		= "ads7846",
+		.bus_num		= 1,
+		.chip_select		= 0,
+		.max_speed_hz		= 1500000,
+		.controller_data	= &ads7846_mcspi_config,
+		.irq			= OMAP_GPIO_IRQ(OMAP3_EVM_TS_GPIO),
+		.platform_data		= &ads7846_config,
+	},
+};
+
+static void __init omap3_evm_init_irq(void)
+{
+	omap2_init_common_hw(mt46h32m32lf6_sdrc_params);
+	omap_init_irq();
+	omap_gpio_init();
+	omap3evm_init_smc911x();
+}
+
+static struct omap_board_config_kernel omap3_evm_config[] __initdata = {
+	{ OMAP_TAG_UART,	&omap3_evm_uart_config },
+	{ OMAP_TAG_LCD,		&omap3_evm_lcd_config },
+};
+
+static struct platform_device *omap3_evm_devices[] __initdata = {
+	&omap3_evm_lcd_device,
+	&omap3evm_smc911x_device,
+};
+
+static void __init omap3_evm_init(void)
+{
+	omap3_evm_i2c_init();
+
+	platform_add_devices(omap3_evm_devices, ARRAY_SIZE(omap3_evm_devices));
+	omap_board_config = omap3_evm_config;
+	omap_board_config_size = ARRAY_SIZE(omap3_evm_config);
+
+	spi_register_board_info(omap3evm_spi_board_info,
+				ARRAY_SIZE(omap3evm_spi_board_info));
+
+	omap_serial_init();
+	usb_musb_init();
+	ads7846_dev_init();
+}
+
+static void __init omap3_evm_map_io(void)
+{
+	omap2_set_globals_343x();
+	omap2_map_common_io();
+}
+
+MACHINE_START(OMAP3EVM, "OMAP3 EVM")
+	/* Maintainer: Syed Mohammed Khasim - Texas Instruments */
+	.phys_io	= 0x48000000,
+	.io_pg_offst	= ((0xd8000000) >> 18) & 0xfffc,
+	.boot_params	= 0x80000100,
+	.map_io		= omap3_evm_map_io,
+	.init_irq	= omap3_evm_init_irq,
+	.init_machine	= omap3_evm_init,
+	.timer		= &omap_timer,
+MACHINE_END
-- 
cgit v0.10.2


From 1bfc3738c414a34a6cb81a2f2fddea8e131afa89 Mon Sep 17 00:00:00 2001
From: Syed Mohammed Khasim <khasim@ti.com>
Date: Thu, 28 May 2009 14:15:44 -0700
Subject: ARM: OMAP3: Add omap3 EVM defconfig

Add omap3 EVM defconfig

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/configs/omap3_evm_defconfig b/arch/arm/configs/omap3_evm_defconfig
new file mode 100644
index 0000000..28be17f
--- /dev/null
+++ b/arch/arm/configs/omap3_evm_defconfig
@@ -0,0 +1,1528 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc5
+# Mon May 18 14:01:52 2009
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_FREEZER=y
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_GEMINI is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
+CONFIG_ARCH_OMAP=y
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_W90X900 is not set
+
+#
+# TI OMAP Implementations
+#
+CONFIG_ARCH_OMAP_OTG=y
+# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_OMAP2 is not set
+CONFIG_ARCH_OMAP3=y
+
+#
+# OMAP Feature Selections
+#
+# CONFIG_OMAP_DEBUG_POWERDOMAIN is not set
+# CONFIG_OMAP_DEBUG_CLOCKDOMAIN is not set
+CONFIG_OMAP_RESET_CLOCKS=y
+CONFIG_OMAP_MUX=y
+# CONFIG_OMAP_MUX_DEBUG is not set
+CONFIG_OMAP_MUX_WARNINGS=y
+# CONFIG_OMAP_MCBSP is not set
+# CONFIG_OMAP_MBOX_FWK is not set
+# CONFIG_OMAP_MPU_TIMER is not set
+CONFIG_OMAP_32K_TIMER=y
+CONFIG_OMAP_32K_TIMER_HZ=128
+CONFIG_OMAP_DM_TIMER=y
+CONFIG_OMAP_LL_DEBUG_UART1=y
+# CONFIG_OMAP_LL_DEBUG_UART2 is not set
+# CONFIG_OMAP_LL_DEBUG_UART3 is not set
+CONFIG_OMAP_SERIAL_WAKE=y
+CONFIG_ARCH_OMAP34XX=y
+CONFIG_ARCH_OMAP3430=y
+
+#
+# OMAP Board Type
+#
+# CONFIG_MACH_OMAP3_BEAGLE is not set
+# CONFIG_MACH_OMAP_LDP is not set
+# CONFIG_MACH_OVERO is not set
+CONFIG_MACH_OMAP3EVM=y
+# CONFIG_MACH_OMAP3_PANDORA is not set
+# CONFIG_MACH_OMAP_3430SDP is not set
+# CONFIG_MACH_NOKIA_RX51 is not set
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_32v6K=y
+CONFIG_CPU_V7=y
+CONFIG_CPU_32v7=y
+CONFIG_CPU_ABRT_EV7=y
+CONFIG_CPU_PABRT_IFAR=y
+CONFIG_CPU_CACHE_V7=y
+CONFIG_CPU_CACHE_VIPT=y
+CONFIG_CPU_COPY_V6=y
+CONFIG_CPU_TLB_V7=y
+CONFIG_CPU_HAS_ASID=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_ARM_THUMBEE is not set
+# CONFIG_CPU_ICACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_BPREDICT_DISABLE is not set
+CONFIG_HAS_TLS_REG=y
+# CONFIG_OUTER_CACHE is not set
+# CONFIG_ARM_ERRATA_430973 is not set
+# CONFIG_ARM_ERRATA_458693 is not set
+# CONFIG_ARM_ERRATA_460075 is not set
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+# CONFIG_PREEMPT is not set
+CONFIG_HZ=128
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+# CONFIG_LEDS is not set
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=/dev/nfs nfsroot=192.168.0.1:/home/user/buildroot ip=192.168.0.2:192.168.0.1:192.168.0.1:255.255.255.0:tgt:eth0:off rw console=ttyS2,115200n8"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_FREQ is not set
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+# CONFIG_FPE_NWFPE_XP is not set
+# CONFIG_FPE_FASTFPE is not set
+CONFIG_VFP=y
+CONFIG_VFPv3=y
+CONFIG_NEON=y
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+CONFIG_BINFMT_MISC=y
+
+#
+# Power management options
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+CONFIG_PM_SLEEP=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+# CONFIG_APM_EMULATION is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+CONFIG_NET_KEY=y
+# CONFIG_NET_KEY_MIGRATE is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+CONFIG_WIRELESS=y
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
+# CONFIG_MAC80211 is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=y
+# CONFIG_MTD_CFI_AMDSTD is not set
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_ARM_INTEGRATOR is not set
+# CONFIG_MTD_OMAP_NOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+# CONFIG_MTD_NAND_GPIO is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ALAUDA is not set
+CONFIG_MTD_ONENAND=y
+CONFIG_MTD_ONENAND_VERIFY_WRITE=y
+# CONFIG_MTD_ONENAND_GENERIC is not set
+CONFIG_MTD_ONENAND_OMAP2=y
+# CONFIG_MTD_ONENAND_OTP is not set
+# CONFIG_MTD_ONENAND_2X_PROGRAM is not set
+# CONFIG_MTD_ONENAND_SIM is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+# CONFIG_SMC91X is not set
+# CONFIG_DM9000 is not set
+# CONFIG_ENC28J60 is not set
+# CONFIG_ETHOC is not set
+CONFIG_SMC911X=y
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_ADS7846=y
+# CONFIG_TOUCHSCREEN_AD7877 is not set
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879_SPI is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_OMAP=y
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+# CONFIG_SPI_BITBANG is not set
+# CONFIG_SPI_GPIO is not set
+CONFIG_SPI_OMAP24XX=y
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+CONFIG_GPIO_TWL4030=y
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_OMAP_WATCHDOG=y
+
+#
+# USB-based Watchdog Cards
+#
+# CONFIG_USBPCWATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+CONFIG_TWL4030_CORE=y
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+# CONFIG_USB_DABUSB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_SOUND is not set
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+# CONFIG_HID_PID is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
+# Special HID drivers
+#
+# CONFIG_HID_A4TECH is not set
+# CONFIG_HID_APPLE is not set
+# CONFIG_HID_BELKIN is not set
+# CONFIG_HID_CHERRY is not set
+# CONFIG_HID_CHICONY is not set
+# CONFIG_HID_CYPRESS is not set
+# CONFIG_DRAGONRISE_FF is not set
+# CONFIG_HID_EZKEY is not set
+# CONFIG_HID_KYE is not set
+# CONFIG_HID_GYRATION is not set
+# CONFIG_HID_KENSINGTON is not set
+# CONFIG_HID_LOGITECH is not set
+# CONFIG_HID_MICROSOFT is not set
+# CONFIG_HID_MONTEREY is not set
+# CONFIG_HID_NTRIG is not set
+# CONFIG_HID_PANTHERLORD is not set
+# CONFIG_HID_PETALYNX is not set
+# CONFIG_HID_SAMSUNG is not set
+# CONFIG_HID_SONY is not set
+# CONFIG_HID_SUNPLUS is not set
+# CONFIG_GREENASIA_FF is not set
+# CONFIG_HID_TOPSEED is not set
+# CONFIG_THRUSTMASTER_FF is not set
+# CONFIG_ZEROPLUS_FF is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+CONFIG_USB_SUSPEND=y
+CONFIG_USB_OTG=y
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_OXU210HP_HCD is not set
+# CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_ISP1760_HCD is not set
+# CONFIG_USB_OHCI_HCD is not set
+# CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_HWA_HCD is not set
+CONFIG_USB_MUSB_HDRC=y
+CONFIG_USB_MUSB_SOC=y
+
+#
+# OMAP 343x high speed USB support
+#
+# CONFIG_USB_MUSB_HOST is not set
+# CONFIG_USB_MUSB_PERIPHERAL is not set
+CONFIG_USB_MUSB_OTG=y
+CONFIG_USB_GADGET_MUSB_HDRC=y
+CONFIG_USB_MUSB_HDRC_HCD=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
+# CONFIG_USB_MUSB_DEBUG is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_BERRY_CHARGE is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
+CONFIG_USB_TEST=y
+# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
+CONFIG_USB_GADGET=y
+# CONFIG_USB_GADGET_DEBUG is not set
+# CONFIG_USB_GADGET_DEBUG_FILES is not set
+CONFIG_USB_GADGET_VBUS_DRAW=2
+CONFIG_USB_GADGET_SELECTED=y
+# CONFIG_USB_GADGET_AT91 is not set
+# CONFIG_USB_GADGET_ATMEL_USBA is not set
+# CONFIG_USB_GADGET_FSL_USB2 is not set
+# CONFIG_USB_GADGET_LH7A40X is not set
+# CONFIG_USB_GADGET_OMAP is not set
+# CONFIG_USB_GADGET_PXA25X is not set
+# CONFIG_USB_GADGET_PXA27X is not set
+# CONFIG_USB_GADGET_S3C2410 is not set
+# CONFIG_USB_GADGET_IMX is not set
+# CONFIG_USB_GADGET_M66592 is not set
+# CONFIG_USB_GADGET_AMD5536UDC is not set
+# CONFIG_USB_GADGET_FSL_QE is not set
+# CONFIG_USB_GADGET_CI13XXX is not set
+# CONFIG_USB_GADGET_NET2280 is not set
+# CONFIG_USB_GADGET_GOKU is not set
+# CONFIG_USB_GADGET_DUMMY_HCD is not set
+CONFIG_USB_GADGET_DUALSPEED=y
+CONFIG_USB_ZERO=m
+# CONFIG_USB_ZERO_HNPTEST is not set
+# CONFIG_USB_ETH is not set
+# CONFIG_USB_GADGETFS is not set
+# CONFIG_USB_FILE_STORAGE is not set
+# CONFIG_USB_G_SERIAL is not set
+# CONFIG_USB_MIDI_GADGET is not set
+# CONFIG_USB_G_PRINTER is not set
+# CONFIG_USB_CDC_COMPOSITE is not set
+
+#
+# OTG and related infrastructure
+#
+CONFIG_USB_OTG_UTILS=y
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_ISP1301_OMAP is not set
+CONFIG_TWL4030_USB=y
+# CONFIG_NOP_USB_XCEIV is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_SDHCI is not set
+# CONFIG_MMC_OMAP is not set
+CONFIG_MMC_OMAP_HS=m
+# CONFIG_MMC_SPI is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_REGULATOR=y
+# CONFIG_REGULATOR_DEBUG is not set
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
+# CONFIG_REGULATOR_BQ24022 is not set
+CONFIG_REGULATOR_TWL4030=y
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+# CONFIG_EXT3_FS_XATTR is not set
+# CONFIG_EXT4_FS is not set
+CONFIG_JBD=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_QUOTA=y
+# CONFIG_QUOTA_NETLINK_INTERFACE is not set
+CONFIG_PRINT_QUOTA_WARNING=y
+CONFIG_QUOTA_TREE=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_QUOTACTL=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_JFFS2_CMODE_NONE is not set
+CONFIG_JFFS2_CMODE_PRIORITY=y
+# CONFIG_JFFS2_CMODE_SIZE is not set
+# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SYSV68_PARTITION is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_ARM_UNWIND=y
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_DEBUG_LL=y
+# CONFIG_DEBUG_ICEDCC is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=m
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+CONFIG_CRYPTO_CRC32C=y
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_CRC_CCITT=y
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From 7419045016e5002b3ccee72b28e41bf53dca68f2 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Thu, 28 May 2009 14:15:44 -0700
Subject: ARM: OMAP3: pandora: add support for mode devices

Add support for keypad, GPIO keys and LEDs. Also enable hardware
debounce feature for GPIO keys.

Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index 2ab2497..e32aa23 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -25,6 +25,9 @@
 #include <linux/spi/ads7846.h>
 #include <linux/regulator/machine.h>
 #include <linux/i2c/twl4030.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -36,12 +39,154 @@
 #include <mach/hardware.h>
 #include <mach/mcspi.h>
 #include <mach/usb.h>
+#include <mach/keypad.h>
 
 #include "sdram-micron-mt46h32m32lf-6.h"
 #include "mmc-twl4030.h"
 
 #define OMAP3_PANDORA_TS_GPIO		94
 
+/* hardware debounce: (value + 1) * 31us */
+#define GPIO_DEBOUNCE_TIME		127
+
+static struct gpio_led pandora_gpio_leds[] = {
+	{
+		.name			= "pandora::sd1",
+		.default_trigger	= "mmc0",
+		.gpio			= 128,
+	}, {
+		.name			= "pandora::sd2",
+		.default_trigger	= "mmc1",
+		.gpio			= 129,
+	}, {
+		.name			= "pandora::bluetooth",
+		.gpio			= 158,
+	}, {
+		.name			= "pandora::wifi",
+		.gpio			= 159,
+	},
+};
+
+static struct gpio_led_platform_data pandora_gpio_led_data = {
+	.leds		= pandora_gpio_leds,
+	.num_leds	= ARRAY_SIZE(pandora_gpio_leds),
+};
+
+static struct platform_device pandora_leds_gpio = {
+	.name	= "leds-gpio",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &pandora_gpio_led_data,
+	},
+};
+
+#define GPIO_BUTTON(gpio_num, ev_type, ev_code, act_low, descr)	\
+{								\
+	.gpio		= gpio_num,				\
+	.type		= ev_type,				\
+	.code		= ev_code,				\
+	.active_low	= act_low,				\
+	.desc		= "btn " descr,				\
+}
+
+#define GPIO_BUTTON_LOW(gpio_num, event_code, description)	\
+	GPIO_BUTTON(gpio_num, EV_KEY, event_code, 1, description)
+
+static struct gpio_keys_button pandora_gpio_keys[] = {
+	GPIO_BUTTON_LOW(110,	KEY_UP,		"up"),
+	GPIO_BUTTON_LOW(103,	KEY_DOWN,	"down"),
+	GPIO_BUTTON_LOW(96,	KEY_LEFT,	"left"),
+	GPIO_BUTTON_LOW(98,	KEY_RIGHT,	"right"),
+	GPIO_BUTTON_LOW(111,	BTN_A,		"a"),
+	GPIO_BUTTON_LOW(106,	BTN_B,		"b"),
+	GPIO_BUTTON_LOW(109,	BTN_X,		"x"),
+	GPIO_BUTTON_LOW(101,	BTN_Y,		"y"),
+	GPIO_BUTTON_LOW(102,	BTN_TL,		"l"),
+	GPIO_BUTTON_LOW(97,	BTN_TL2,	"l2"),
+	GPIO_BUTTON_LOW(105,	BTN_TR,		"r"),
+	GPIO_BUTTON_LOW(107,	BTN_TR2,	"r2"),
+	GPIO_BUTTON_LOW(104,	KEY_LEFTCTRL,	"ctrl"),
+	GPIO_BUTTON_LOW(99,	KEY_MENU,	"menu"),
+	GPIO_BUTTON_LOW(176,	KEY_COFFEE,	"hold"),
+	GPIO_BUTTON(100, EV_KEY, KEY_LEFTALT, 0, "alt"),
+	GPIO_BUTTON(108, EV_SW, SW_LID, 1, "lid"),
+};
+
+static struct gpio_keys_platform_data pandora_gpio_key_info = {
+	.buttons	= pandora_gpio_keys,
+	.nbuttons	= ARRAY_SIZE(pandora_gpio_keys),
+};
+
+static struct platform_device pandora_keys_gpio = {
+	.name	= "gpio-keys",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &pandora_gpio_key_info,
+	},
+};
+
+static void __init pandora_keys_gpio_init(void)
+{
+	/* set debounce time for GPIO banks 4 and 6 */
+	omap_set_gpio_debounce_time(32 * 3, GPIO_DEBOUNCE_TIME);
+	omap_set_gpio_debounce_time(32 * 5, GPIO_DEBOUNCE_TIME);
+}
+
+static int pandora_keypad_map[] = {
+	/* col, row, code */
+	KEY(0, 0, KEY_9),
+	KEY(0, 1, KEY_0),
+	KEY(0, 2, KEY_BACKSPACE),
+	KEY(0, 3, KEY_O),
+	KEY(0, 4, KEY_P),
+	KEY(0, 5, KEY_K),
+	KEY(0, 6, KEY_L),
+	KEY(0, 7, KEY_ENTER),
+	KEY(1, 0, KEY_8),
+	KEY(1, 1, KEY_7),
+	KEY(1, 2, KEY_6),
+	KEY(1, 3, KEY_5),
+	KEY(1, 4, KEY_4),
+	KEY(1, 5, KEY_3),
+	KEY(1, 6, KEY_2),
+	KEY(1, 7, KEY_1),
+	KEY(2, 0, KEY_I),
+	KEY(2, 1, KEY_U),
+	KEY(2, 2, KEY_Y),
+	KEY(2, 3, KEY_T),
+	KEY(2, 4, KEY_R),
+	KEY(2, 5, KEY_E),
+	KEY(2, 6, KEY_W),
+	KEY(2, 7, KEY_Q),
+	KEY(3, 0, KEY_J),
+	KEY(3, 1, KEY_H),
+	KEY(3, 2, KEY_G),
+	KEY(3, 3, KEY_F),
+	KEY(3, 4, KEY_D),
+	KEY(3, 5, KEY_S),
+	KEY(3, 6, KEY_A),
+	KEY(3, 7, KEY_LEFTSHIFT),
+	KEY(4, 0, KEY_N),
+	KEY(4, 1, KEY_B),
+	KEY(4, 2, KEY_V),
+	KEY(4, 3, KEY_C),
+	KEY(4, 4, KEY_X),
+	KEY(4, 5, KEY_Z),
+	KEY(4, 6, KEY_DOT),
+	KEY(4, 7, KEY_COMMA),
+	KEY(5, 0, KEY_M),
+	KEY(5, 1, KEY_SPACE),
+	KEY(5, 2, KEY_FN),
+};
+
+static struct twl4030_keypad_data pandora_kp_data = {
+	.rows		= 8,
+	.cols		= 6,
+	.keymap		= pandora_keypad_map,
+	.keymapsize	= ARRAY_SIZE(pandora_keypad_map),
+	.rep		= 1,
+};
+
 static struct twl4030_hsmmc_info omap3pandora_mmc[] = {
 	{
 		.mmc		= 1,
@@ -142,6 +287,7 @@ static struct twl4030_platform_data omap3pandora_twldata = {
 	.usb		= &omap3pandora_usb_data,
 	.vmmc1		= &pandora_vmmc1,
 	.vmmc2		= &pandora_vmmc2,
+	.keypad		= &pandora_kp_data,
 };
 
 static struct i2c_board_info __initdata omap3pandora_i2c_boardinfo[] = {
@@ -234,6 +380,8 @@ static struct omap_board_config_kernel omap3pandora_config[] __initdata = {
 
 static struct platform_device *omap3pandora_devices[] __initdata = {
 	&omap3pandora_lcd_device,
+	&pandora_leds_gpio,
+	&pandora_keys_gpio,
 };
 
 static void __init omap3pandora_init(void)
@@ -247,6 +395,7 @@ static void __init omap3pandora_init(void)
 	spi_register_board_info(omap3pandora_spi_board_info,
 			ARRAY_SIZE(omap3pandora_spi_board_info));
 	omap3pandora_ads7846_init();
+	pandora_keys_gpio_init();
 	usb_musb_init();
 }
 
-- 
cgit v0.10.2


From 44169075e6eaa87bab6a296209d8d0610879b394 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 28 May 2009 14:16:04 -0700
Subject: ARM: OMAP4: Add minimal support for omap4

This patch adds the support for OMAP4. The platform and machine specific
headers and sources updated for OMAP4430 SDP platform.

OMAP4430 is Texas Instrument's SOC based on ARM Cortex-A9 SMP architecture.
It's a dual core SOC with GIC used for interrupt handling and SCU for cache
coherency.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 2249049..f91934b 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -5,6 +5,9 @@
  *
  * Author: Juha Yrjola
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -424,6 +427,9 @@ void __init gpmc_init(void)
 	} else if (cpu_is_omap34xx()) {
 		ck = "gpmc_fck";
 		l = OMAP34XX_GPMC_BASE;
+	} else if (cpu_is_omap44xx()) {
+		ck = "gpmc_fck";
+		l = OMAP44XX_GPMC_BASE;
 	}
 
 	gpmc_l3_clk = clk_get(NULL, ck);
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 34b5914..458990e 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -6,6 +6,9 @@
  * Copyright (C) 2005 Nokia Corporation
  * Written by Tony Lindgren <tony@atomide.com>
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -200,7 +203,10 @@ void __init omap2_check_revision(void)
 		omap24xx_check_revision();
 	else if (cpu_is_omap34xx())
 		omap34xx_check_revision();
-	else
+	else if (cpu_is_omap44xx()) {
+		printk(KERN_INFO "FIXME: CPU revision = OMAP4430\n");
+		return;
+	} else
 		pr_err("OMAP revision unknown, please fix!\n");
 
 	/*
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 916fcd3..32afd94 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -4,12 +4,14 @@
  * OMAP2 I/O mapping code
  *
  * Copyright (C) 2005 Nokia Corporation
- * Copyright (C) 2007 Texas Instruments
+ * Copyright (C) 2007-2009 Texas Instruments
  *
  * Author:
  *	Juha Yrjola <juha.yrjola@nokia.com>
  *	Syed Khasim <x0khasim@ti.com>
  *
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -30,6 +32,7 @@
 #include <mach/sdrc.h>
 #include <mach/gpmc.h>
 
+#ifndef CONFIG_ARCH_OMAP4	/* FIXME: Remove this once clkdev is ready */
 #include "clock.h"
 
 #include <mach/powerdomain.h>
@@ -38,7 +41,7 @@
 
 #include <mach/clockdomain.h>
 #include "clockdomains.h"
-
+#endif
 /*
  * The machine specific code may provide the extra mapping besides the
  * default mapping provided here.
@@ -166,6 +169,46 @@ static struct map_desc omap34xx_io_desc[] __initdata = {
 	},
 };
 #endif
+#ifdef	CONFIG_ARCH_OMAP4
+static struct map_desc omap44xx_io_desc[] __initdata = {
+	{
+		.virtual	= L3_44XX_VIRT,
+		.pfn		= __phys_to_pfn(L3_44XX_PHYS),
+		.length		= L3_44XX_SIZE,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= L4_44XX_VIRT,
+		.pfn		= __phys_to_pfn(L4_44XX_PHYS),
+		.length		= L4_44XX_SIZE,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= L4_WK_44XX_VIRT,
+		.pfn		= __phys_to_pfn(L4_WK_44XX_PHYS),
+		.length		= L4_WK_44XX_SIZE,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= OMAP44XX_GPMC_VIRT,
+		.pfn		= __phys_to_pfn(OMAP44XX_GPMC_PHYS),
+		.length		= OMAP44XX_GPMC_SIZE,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= L4_PER_44XX_VIRT,
+		.pfn		= __phys_to_pfn(L4_PER_44XX_PHYS),
+		.length		= L4_PER_44XX_SIZE,
+		.type		= MT_DEVICE,
+	},
+	{
+		.virtual	= L4_EMU_44XX_VIRT,
+		.pfn		= __phys_to_pfn(L4_EMU_44XX_PHYS),
+		.length		= L4_EMU_44XX_SIZE,
+		.type		= MT_DEVICE,
+	},
+};
+#endif
 
 void __init omap2_map_common_io(void)
 {
@@ -183,6 +226,9 @@ void __init omap2_map_common_io(void)
 	iotable_init(omap34xx_io_desc, ARRAY_SIZE(omap34xx_io_desc));
 #endif
 
+#if defined(CONFIG_ARCH_OMAP4)
+	iotable_init(omap44xx_io_desc, ARRAY_SIZE(omap44xx_io_desc));
+#endif
 	/* Normally devicemaps_init() would flush caches and tlb after
 	 * mdesc->map_io(), but we must also do it here because of the CPU
 	 * revision check below.
@@ -198,9 +244,11 @@ void __init omap2_map_common_io(void)
 void __init omap2_init_common_hw(struct omap_sdrc_params *sp)
 {
 	omap2_mux_init();
+#ifndef CONFIG_ARCH_OMAP4 /* FIXME: Remove this once the clkdev is ready */
 	pwrdm_init(powerdomains_omap);
 	clkdm_init(clockdomains_omap, clkdm_pwrdm_autodeps);
 	omap2_clk_init();
 	omap2_sdrc_init(sp);
+#endif
 	gpmc_init();
 }
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 3c2d325..29dc6f5 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -8,6 +8,9 @@
  *
  * Based off of arch/arm/mach-omap/omap1/serial.c
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com
+ *
  * This file is subject to the terms and conditions of the GNU General Public
  * License. See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -122,6 +125,10 @@ void __init omap_serial_init(void)
 
 	if (info == NULL)
 		return;
+	if (cpu_is_omap44xx()) {
+		for (i = 0; i < OMAP_MAX_NR_PORTS; i++)
+			serial_platform_data[i].irq += 32;
+	}
 
 	for (i = 0; i < OMAP_MAX_NR_PORTS; i++) {
 		struct plat_serial8250_port *p = serial_platform_data + i;
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index f36aba1..2ce474a 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -17,9 +17,10 @@
  *
  * Some parts based off of TI's 24xx code:
  *
- *   Copyright (C) 2004 Texas Instruments, Inc.
+ * Copyright (C) 2004-2009 Texas Instruments, Inc.
  *
  * Roughly modelled after the OMAP1 MPU timer code.
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License. See the file "COPYING" in the main directory of this archive
@@ -82,7 +83,8 @@ static void omap2_gp_timer_set_mode(enum clock_event_mode mode,
 	case CLOCK_EVT_MODE_PERIODIC:
 		period = clk_get_rate(omap_dm_timer_get_fclk(gptimer)) / HZ;
 		period -= 1;
-
+		if (cpu_is_omap44xx())
+			period = 0xff;	/* FIXME: */
 		omap_dm_timer_set_load_start(gptimer, 1, 0xffffffff - period);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
@@ -145,6 +147,9 @@ static void __init omap2_gp_clockevent_init(void)
 		     "timer-gp: omap_dm_timer_set_source() failed\n");
 
 	tick_rate = clk_get_rate(omap_dm_timer_get_fclk(gptimer));
+	if (cpu_is_omap44xx())
+		/* Assuming 32kHz clk is driving GPT1 */
+		tick_rate = 32768;	/* FIXME: */
 
 	pr_info("OMAP clockevent source: GPTIMER%d at %u Hz\n",
 		gptimer_id, tick_rate);
diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index 70b68ef..66738c3 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -2,6 +2,10 @@
  * linux/arch/arm/plat-omap/common.c
  *
  * Code common to all OMAP machines.
+ * The file is created by Tony Lindgren <tony@atomide.com>
+ *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -217,6 +221,15 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs)
 #define omap34xx_32k_read	NULL
 #endif
 
+#ifdef CONFIG_ARCH_OMAP4
+static cycle_t omap44xx_32k_read(struct clocksource *cs)
+{
+	return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10);
+}
+#else
+#define omap44xx_32k_read	NULL
+#endif
+
 /*
  * Kernel assumes that sched_clock can be called early but may not have
  * things ready yet.
@@ -264,6 +277,8 @@ static int __init omap_init_clocksource_32k(void)
 			clocksource_32k.read = omap2430_32k_read;
 		else if (cpu_is_omap34xx())
 			clocksource_32k.read = omap34xx_32k_read;
+		else if (cpu_is_omap44xx())
+			clocksource_32k.read = omap44xx_32k_read;
 		else
 			return -ENODEV;
 
@@ -351,3 +366,19 @@ void __init omap2_set_globals_343x(void)
 }
 #endif
 
+#if defined(CONFIG_ARCH_OMAP4)
+static struct omap_globals omap4_globals = {
+	.class	= OMAP443X_CLASS,
+	.tap	= OMAP2_IO_ADDRESS(0x4830a000),
+	.ctrl	= OMAP2_IO_ADDRESS(OMAP443X_CTRL_BASE),
+	.prm	= OMAP2_IO_ADDRESS(OMAP4430_PRM_BASE),
+	.cm	= OMAP2_IO_ADDRESS(OMAP4430_CM_BASE),
+};
+
+void __init omap2_set_globals_443x(void)
+{
+	omap2_set_globals_tap(&omap4_globals);
+	omap2_set_globals_control(&omap4_globals);
+}
+#endif
+
diff --git a/arch/arm/plat-omap/devices.c b/arch/arm/plat-omap/devices.c
index 87fb7ff..a64b692 100644
--- a/arch/arm/plat-omap/devices.c
+++ b/arch/arm/plat-omap/devices.c
@@ -311,6 +311,8 @@ static void omap_init_wdt(void)
 		wdt_resources[0].start = 0x49016000; /* WDT2 */
 	else if (cpu_is_omap343x())
 		wdt_resources[0].start = 0x48314000; /* WDT2 */
+	else if (cpu_is_omap44xx())
+		wdt_resources[0].start = 0x4a314000;
 	else
 		return;
 
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index 06e9cbe..def14ec 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -10,6 +10,9 @@
  * Merged to support both OMAP1 and OMAP2 by Tony Lindgren <tony@atomide.com>
  * Some functions based on earlier dma-omap.c Copyright (C) 2001 RidgeRun, Inc.
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * Support functions for the OMAP internal DMA channels.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -872,7 +875,7 @@ omap_dma_set_prio_lch(int lch, unsigned char read_prio,
 	}
 	l = dma_read(CCR(lch));
 	l &= ~((1 << 6) | (1 << 26));
-	if (cpu_is_omap2430() || cpu_is_omap34xx())
+	if (cpu_is_omap2430() || cpu_is_omap34xx() ||  cpu_is_omap44xx())
 		l |= ((read_prio & 0x1) << 6) | ((write_prio & 0x1) << 26);
 	else
 		l |= ((read_prio & 0x1) << 6);
@@ -1844,7 +1847,8 @@ static irqreturn_t omap1_dma_irq_handler(int irq, void *dev_id)
 #define omap1_dma_irq_handler	NULL
 #endif
 
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+			defined(CONFIG_ARCH_OMAP4)
 
 static int omap2_dma_handle_ch(int ch)
 {
@@ -2339,6 +2343,9 @@ static int __init omap_init_dma(void)
 	} else if (cpu_is_omap34xx()) {
 		omap_dma_base = IO_ADDRESS(OMAP34XX_DMA4_BASE);
 		dma_lch_count = OMAP_DMA4_LOGICAL_DMA_CH_COUNT;
+	} else if (cpu_is_omap44xx()) {
+		omap_dma_base = IO_ADDRESS(OMAP44XX_DMA4_BASE);
+		dma_lch_count = OMAP_DMA4_LOGICAL_DMA_CH_COUNT;
 	} else {
 		pr_err("DMA init failed for unsupported omap\n");
 		return -ENODEV;
@@ -2437,12 +2444,18 @@ static int __init omap_init_dma(void)
 		}
 	}
 
-	if (cpu_is_omap2430() || cpu_is_omap34xx())
+	if (cpu_is_omap2430() || cpu_is_omap34xx() || cpu_is_omap44xx())
 		omap_dma_set_global_params(DMA_DEFAULT_ARB_RATE,
 				DMA_DEFAULT_FIFO_DEPTH, 0);
 
-	if (cpu_class_is_omap2())
-		setup_irq(INT_24XX_SDMA_IRQ0, &omap24xx_dma_irq);
+	if (cpu_class_is_omap2()) {
+		int irq;
+		if (cpu_is_omap44xx())
+			irq = INT_44XX_SDMA_IRQ0;
+		else
+			irq = INT_24XX_SDMA_IRQ0;
+		setup_irq(irq, &omap24xx_dma_irq);
+	}
 
 	/* FIXME: Update LCD DMA to work on 24xx */
 	if (cpu_class_is_omap1()) {
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index ee20612..7f50b61 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -7,6 +7,9 @@
  * OMAP2 support by Juha Yrjola
  * API improvements and OMAP2 clock framework support by Timo Teras
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2 of the License, or (at your
@@ -150,7 +153,8 @@
 struct omap_dm_timer {
 	unsigned long phys_base;
 	int irq;
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+			defined(CONFIG_ARCH_OMAP4)
 	struct clk *iclk, *fclk;
 #endif
 	void __iomem *io_base;
@@ -169,6 +173,9 @@ struct omap_dm_timer {
 #define omap3_dm_timers			NULL
 #define omap3_dm_source_names		NULL
 #define omap3_dm_source_clocks		NULL
+#define omap4_dm_timers			NULL
+#define omap4_dm_source_names		NULL
+#define omap4_dm_source_clocks		NULL
 
 static struct omap_dm_timer omap1_dm_timers[] = {
 	{ .phys_base = 0xfffb1400, .irq = INT_1610_GPTIMER1 },
@@ -191,6 +198,9 @@ static const int dm_timer_count = ARRAY_SIZE(omap1_dm_timers);
 #define omap3_dm_timers			NULL
 #define omap3_dm_source_names		NULL
 #define omap3_dm_source_clocks		NULL
+#define omap4_dm_timers			NULL
+#define omap4_dm_source_names		NULL
+#define omap4_dm_source_clocks		NULL
 
 static struct omap_dm_timer omap2_dm_timers[] = {
 	{ .phys_base = 0x48028000, .irq = INT_24XX_GPTIMER1 },
@@ -225,6 +235,9 @@ static const int dm_timer_count = ARRAY_SIZE(omap2_dm_timers);
 #define omap2_dm_timers			NULL
 #define omap2_dm_source_names		NULL
 #define omap2_dm_source_clocks		NULL
+#define omap4_dm_timers			NULL
+#define omap4_dm_source_names		NULL
+#define omap4_dm_source_clocks		NULL
 
 static struct omap_dm_timer omap3_dm_timers[] = {
 	{ .phys_base = 0x48318000, .irq = INT_24XX_GPTIMER1 },
@@ -250,6 +263,40 @@ static const char *omap3_dm_source_names[] __initdata = {
 static struct clk *omap3_dm_source_clocks[2];
 static const int dm_timer_count = ARRAY_SIZE(omap3_dm_timers);
 
+#elif defined(CONFIG_ARCH_OMAP4)
+
+#define omap_dm_clk_enable(x)		clk_enable(x)
+#define omap_dm_clk_disable(x)		clk_disable(x)
+#define omap1_dm_timers			NULL
+#define omap2_dm_timers			NULL
+#define omap2_dm_source_names		NULL
+#define omap2_dm_source_clocks		NULL
+#define omap3_dm_timers			NULL
+#define omap3_dm_source_names		NULL
+#define omap3_dm_source_clocks		NULL
+
+static struct omap_dm_timer omap4_dm_timers[] = {
+	{ .phys_base = 0x4a318000, .irq = INT_44XX_GPTIMER1 },
+	{ .phys_base = 0x48032000, .irq = INT_44XX_GPTIMER2 },
+	{ .phys_base = 0x48034000, .irq = INT_44XX_GPTIMER3 },
+	{ .phys_base = 0x48036000, .irq = INT_44XX_GPTIMER4 },
+	{ .phys_base = 0x40138000, .irq = INT_44XX_GPTIMER5 },
+	{ .phys_base = 0x4013a000, .irq = INT_44XX_GPTIMER6 },
+	{ .phys_base = 0x4013a000, .irq = INT_44XX_GPTIMER7 },
+	{ .phys_base = 0x4013e000, .irq = INT_44XX_GPTIMER8 },
+	{ .phys_base = 0x4803e000, .irq = INT_44XX_GPTIMER9 },
+	{ .phys_base = 0x48086000, .irq = INT_44XX_GPTIMER10 },
+	{ .phys_base = 0x48088000, .irq = INT_44XX_GPTIMER11 },
+	{ .phys_base = 0x4a320000, .irq = INT_44XX_GPTIMER12 },
+};
+static const char *omap4_dm_source_names[] __initdata = {
+	"sys_ck",
+	"omap_32k_fck",
+	NULL
+};
+static struct clk *omap4_dm_source_clocks[2];
+static const int dm_timer_count = ARRAY_SIZE(omap4_dm_timers);
+
 #else
 
 #error OMAP architecture not supported!
@@ -459,7 +506,8 @@ __u32 omap_dm_timer_modify_idlect_mask(__u32 inputmask)
 }
 EXPORT_SYMBOL_GPL(omap_dm_timer_modify_idlect_mask);
 
-#elif defined(CONFIG_ARCH_OMAP2) || defined (CONFIG_ARCH_OMAP3)
+#elif defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+				defined(CONFIG_ARCH_OMAP4)
 
 struct clk *omap_dm_timer_get_fclk(struct omap_dm_timer *timer)
 {
@@ -711,6 +759,10 @@ int __init omap_dm_timer_init(void)
 		dm_timers = omap3_dm_timers;
 		dm_source_names = omap3_dm_source_names;
 		dm_source_clocks = omap3_dm_source_clocks;
+	} else if (cpu_is_omap44xx()) {
+		dm_timers = omap4_dm_timers;
+		dm_source_names = omap4_dm_source_names;
+		dm_source_clocks = omap4_dm_source_clocks;
 	}
 
 	if (cpu_class_is_omap2())
@@ -723,7 +775,8 @@ int __init omap_dm_timer_init(void)
 	for (i = 0; i < dm_timer_count; i++) {
 		timer = &dm_timers[i];
 		timer->io_base = IO_ADDRESS(timer->phys_base);
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+					defined(CONFIG_ARCH_OMAP4)
 		if (cpu_class_is_omap2()) {
 			char clk_name[16];
 			sprintf(clk_name, "gpt%d_ick", i + 1);
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index ee0b21f..7fd89ba 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -6,6 +6,9 @@
  * Copyright (C) 2003-2005 Nokia Corporation
  * Written by Juha Yrjölä <juha.yrjola@nokia.com>
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -146,6 +149,16 @@
 #define OMAP34XX_GPIO5_BASE		IO_ADDRESS(0x49056000)
 #define OMAP34XX_GPIO6_BASE		IO_ADDRESS(0x49058000)
 
+/*
+ * OMAP44XX  specific GPIO registers
+ */
+#define OMAP44XX_GPIO1_BASE             IO_ADDRESS(0x4a310000)
+#define OMAP44XX_GPIO2_BASE             IO_ADDRESS(0x48055000)
+#define OMAP44XX_GPIO3_BASE             IO_ADDRESS(0x48057000)
+#define OMAP44XX_GPIO4_BASE             IO_ADDRESS(0x48059000)
+#define OMAP44XX_GPIO5_BASE             IO_ADDRESS(0x4805B000)
+#define OMAP44XX_GPIO6_BASE             IO_ADDRESS(0x4805D000)
+
 #define OMAP_MPUIO_VBASE		IO_ADDRESS(OMAP_MPUIO_BASE)
 
 struct gpio_bank {
@@ -153,11 +166,13 @@ struct gpio_bank {
 	u16 irq;
 	u16 virtual_irq_start;
 	int method;
-#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) ||  \
+		defined(CONFIG_ARCH_OMAP34XX) || defined(CONFIG_ARCH_OMAP4)
 	u32 suspend_wakeup;
 	u32 saved_wakeup;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 	u32 non_wakeup_gpios;
 	u32 enabled_non_wakeup_gpios;
 
@@ -251,6 +266,24 @@ static struct gpio_bank gpio_bank_34xx[6] = {
 
 #endif
 
+#ifdef CONFIG_ARCH_OMAP4
+static struct gpio_bank gpio_bank_44xx[6] = {
+	{ OMAP44XX_GPIO1_BASE, INT_44XX_GPIO_BANK1, IH_GPIO_BASE,	\
+		METHOD_GPIO_24XX },
+	{ OMAP44XX_GPIO2_BASE, INT_44XX_GPIO_BANK2, IH_GPIO_BASE + 32,	\
+		METHOD_GPIO_24XX },
+	{ OMAP44XX_GPIO3_BASE, INT_44XX_GPIO_BANK3, IH_GPIO_BASE + 64,	\
+		METHOD_GPIO_24XX },
+	{ OMAP44XX_GPIO4_BASE, INT_44XX_GPIO_BANK4, IH_GPIO_BASE + 96,	\
+		METHOD_GPIO_24XX },
+	{ OMAP44XX_GPIO5_BASE, INT_44XX_GPIO_BANK5, IH_GPIO_BASE + 128, \
+		METHOD_GPIO_24XX },
+	{ OMAP44XX_GPIO6_BASE, INT_44XX_GPIO_BANK6, IH_GPIO_BASE + 160, \
+		METHOD_GPIO_24XX },
+};
+
+#endif
+
 static struct gpio_bank *gpio_bank;
 static int gpio_bank_count;
 
@@ -273,7 +306,7 @@ static inline struct gpio_bank *get_gpio_bank(int gpio)
 	}
 	if (cpu_is_omap24xx())
 		return &gpio_bank[gpio >> 5];
-	if (cpu_is_omap34xx())
+	if (cpu_is_omap34xx() || cpu_is_omap44xx())
 		return &gpio_bank[gpio >> 5];
 	BUG();
 	return NULL;
@@ -285,7 +318,7 @@ static inline int get_gpio_index(int gpio)
 		return gpio & 0x1f;
 	if (cpu_is_omap24xx())
 		return gpio & 0x1f;
-	if (cpu_is_omap34xx())
+	if (cpu_is_omap34xx() || cpu_is_omap44xx())
 		return gpio & 0x1f;
 	return gpio & 0x0f;
 }
@@ -307,7 +340,7 @@ static inline int gpio_valid(int gpio)
 		return 0;
 	if (cpu_is_omap24xx() && gpio < 128)
 		return 0;
-	if (cpu_is_omap34xx() && gpio < 192)
+	if ((cpu_is_omap34xx() || cpu_is_omap44xx()) && gpio < 192)
 		return 0;
 	return -1;
 }
@@ -353,7 +386,8 @@ static void _set_gpio_direction(struct gpio_bank *bank, int gpio, int is_input)
 		reg += OMAP850_GPIO_DIR_CONTROL;
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		reg += OMAP24XX_GPIO_OE;
 		break;
@@ -425,7 +459,8 @@ static void _set_gpio_dataout(struct gpio_bank *bank, int gpio, int enable)
 			l &= ~(1 << gpio);
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		if (enable)
 			reg += OMAP24XX_GPIO_SETDATAOUT;
@@ -476,7 +511,8 @@ static int __omap_get_gpio_datain(int gpio)
 		reg += OMAP850_GPIO_DATA_INPUT;
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		reg += OMAP24XX_GPIO_DATAIN;
 		break;
@@ -520,7 +556,7 @@ void omap_set_gpio_debounce(int gpio, int enable)
 	else
 		goto done;
 
-	if (cpu_is_omap34xx()) {
+	if (cpu_is_omap34xx() || cpu_is_omap44xx()) {
 		if (enable)
 			clk_enable(bank->dbck);
 		else
@@ -550,7 +586,8 @@ void omap_set_gpio_debounce_time(int gpio, int enc_time)
 }
 EXPORT_SYMBOL(omap_set_gpio_debounce_time);
 
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio,
 						int trigger)
 {
@@ -660,7 +697,8 @@ static int _set_gpio_triggering(struct gpio_bank *bank, int gpio, int trigger)
 			goto bad;
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		set_24xx_gpio_triggering(bank, gpio, trigger);
 		break;
@@ -745,7 +783,8 @@ static void _clear_gpio_irqbank(struct gpio_bank *bank, int gpio_mask)
 		reg += OMAP850_GPIO_INT_STATUS;
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		reg += OMAP24XX_GPIO_IRQSTATUS1;
 		break;
@@ -814,7 +853,8 @@ static u32 _get_gpio_irqbank_mask(struct gpio_bank *bank)
 		inv = 1;
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		reg += OMAP24XX_GPIO_IRQENABLE1;
 		mask = 0xffffffff;
@@ -887,7 +927,8 @@ static void _enable_gpio_irqbank(struct gpio_bank *bank, int gpio_mask, int enab
 			l |= gpio_mask;
 		break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+		defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		if (enable)
 			reg += OMAP24XX_GPIO_SETIRQENABLE1;
@@ -932,7 +973,8 @@ static int _set_gpio_wakeup(struct gpio_bank *bank, int gpio, int enable)
 		spin_unlock_irqrestore(&bank->lock, flags);
 		return 0;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 	case METHOD_GPIO_24XX:
 		if (bank->non_wakeup_gpios & (1 << gpio)) {
 			printk(KERN_ERR "Unable to modify wakeup on "
@@ -1017,7 +1059,8 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
 		__raw_writel(1 << offset, reg);
 	}
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 	if (bank->method == METHOD_GPIO_24XX) {
 		/* Disable wake-up during idle for dynamic tick */
 		void __iomem *reg = bank->base + OMAP24XX_GPIO_CLEARWKUENA;
@@ -1069,7 +1112,8 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
 	if (bank->method == METHOD_GPIO_850)
 		isr_reg = bank->base + OMAP850_GPIO_INT_STATUS;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 	if (bank->method == METHOD_GPIO_24XX)
 		isr_reg = bank->base + OMAP24XX_GPIO_IRQSTATUS1;
 #endif
@@ -1346,7 +1390,7 @@ static int gpio_2irq(struct gpio_chip *chip, unsigned offset)
 /*---------------------------------------------------------------------*/
 
 static int initialized;
-#if !defined(CONFIG_ARCH_OMAP3)
+#if !(defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4))
 static struct clk * gpio_ick;
 #endif
 
@@ -1359,7 +1403,7 @@ static struct clk * gpio5_ick;
 static struct clk * gpio5_fck;
 #endif
 
-#if defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 static struct clk *gpio_iclks[OMAP34XX_NR_GPIOS];
 #endif
 
@@ -1419,8 +1463,8 @@ static int __init _omap_gpio_init(void)
 	}
 #endif
 
-#if defined(CONFIG_ARCH_OMAP3)
-	if (cpu_is_omap34xx()) {
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
+	if (cpu_is_omap34xx() || cpu_is_omap44xx()) {
 		for (i = 0; i < OMAP34XX_NR_GPIOS; i++) {
 			sprintf(clk_name, "gpio%d_ick", i + 1);
 			gpio_iclks[i] = clk_get(NULL, clk_name);
@@ -1497,6 +1541,17 @@ static int __init _omap_gpio_init(void)
 			(rev >> 4) & 0x0f, rev & 0x0f);
 	}
 #endif
+#ifdef CONFIG_ARCH_OMAP4
+	if (cpu_is_omap44xx()) {
+		int rev;
+
+		gpio_bank_count = OMAP34XX_NR_GPIOS;
+		gpio_bank = gpio_bank_44xx;
+		rev = __raw_readl(gpio_bank[0].base + OMAP24XX_GPIO_REVISION);
+		printk(KERN_INFO "OMAP44xx GPIO hardware version %d.%d\n",
+			(rev >> 4) & 0x0f, rev & 0x0f);
+	}
+#endif
 	for (i = 0; i < gpio_bank_count; i++) {
 		int j, gpio_count = 16;
 
@@ -1520,7 +1575,8 @@ static int __init _omap_gpio_init(void)
 			gpio_count = 32; /* 730 has 32-bit GPIOs */
 		}
 
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 		if (bank->method == METHOD_GPIO_24XX) {
 			static const u32 non_wakeup_gpios[] = {
 				0xe203ffc0, 0x08700040
@@ -1577,7 +1633,7 @@ static int __init _omap_gpio_init(void)
 		set_irq_chained_handler(bank->irq, gpio_irq_handler);
 		set_irq_data(bank->irq, bank);
 
-		if (cpu_is_omap34xx()) {
+		if (cpu_is_omap34xx() || cpu_is_omap44xx()) {
 			sprintf(clk_name, "gpio%d_dbck", i + 1);
 			bank->dbck = clk_get(NULL, clk_name);
 			if (IS_ERR(bank->dbck))
@@ -1599,7 +1655,8 @@ static int __init _omap_gpio_init(void)
 	return 0;
 }
 
-#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) || \
+		defined(CONFIG_ARCH_OMAP34XX) || defined(CONFIG_ARCH_OMAP4)
 static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
 {
 	int i;
@@ -1622,7 +1679,8 @@ static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
 			wake_set = bank->base + OMAP1610_GPIO_SET_WAKEUPENA;
 			break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 		case METHOD_GPIO_24XX:
 			wake_status = bank->base + OMAP24XX_GPIO_WAKE_EN;
 			wake_clear = bank->base + OMAP24XX_GPIO_CLEARWKUENA;
@@ -1663,7 +1721,8 @@ static int omap_gpio_resume(struct sys_device *dev)
 			wake_set = bank->base + OMAP1610_GPIO_SET_WAKEUPENA;
 			break;
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 		case METHOD_GPIO_24XX:
 			wake_clear = bank->base + OMAP24XX_GPIO_CLEARWKUENA;
 			wake_set = bank->base + OMAP24XX_GPIO_SETWKUENA;
@@ -1695,7 +1754,8 @@ static struct sys_device omap_gpio_device = {
 
 #endif
 
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 
 static int workaround_enabled;
 
@@ -1711,7 +1771,8 @@ void omap2_gpio_prepare_for_retention(void)
 
 		if (!(bank->enabled_non_wakeup_gpios))
 			continue;
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+				defined(CONFIG_ARCH_OMAP4)
 		bank->saved_datain = __raw_readl(bank->base + OMAP24XX_GPIO_DATAIN);
 		l1 = __raw_readl(bank->base + OMAP24XX_GPIO_FALLINGDETECT);
 		l2 = __raw_readl(bank->base + OMAP24XX_GPIO_RISINGDETECT);
@@ -1720,7 +1781,8 @@ void omap2_gpio_prepare_for_retention(void)
 		bank->saved_risingdetect = l2;
 		l1 &= ~bank->enabled_non_wakeup_gpios;
 		l2 &= ~bank->enabled_non_wakeup_gpios;
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 		__raw_writel(l1, bank->base + OMAP24XX_GPIO_FALLINGDETECT);
 		__raw_writel(l2, bank->base + OMAP24XX_GPIO_RISINGDETECT);
 #endif
@@ -1745,7 +1807,8 @@ void omap2_gpio_resume_after_retention(void)
 
 		if (!(bank->enabled_non_wakeup_gpios))
 			continue;
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 		__raw_writel(bank->saved_fallingdetect,
 				 bank->base + OMAP24XX_GPIO_FALLINGDETECT);
 		__raw_writel(bank->saved_risingdetect,
@@ -1755,14 +1818,16 @@ void omap2_gpio_resume_after_retention(void)
 		 * state.  If so, generate an IRQ by software.  This is
 		 * horribly racy, but it's the best we can do to work around
 		 * this silicon bug. */
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 		l = __raw_readl(bank->base + OMAP24XX_GPIO_DATAIN);
 #endif
 		l ^= bank->saved_datain;
 		l &= bank->non_wakeup_gpios;
 		if (l) {
 			u32 old0, old1;
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 			old0 = __raw_readl(bank->base + OMAP24XX_GPIO_LEVELDETECT0);
 			old1 = __raw_readl(bank->base + OMAP24XX_GPIO_LEVELDETECT1);
 			__raw_writel(old0 | l, bank->base + OMAP24XX_GPIO_LEVELDETECT0);
@@ -1798,7 +1863,8 @@ static int __init omap_gpio_sysinit(void)
 
 	mpuio_init();
 
-#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) || \
+		defined(CONFIG_ARCH_OMAP34XX) || defined(CONFIG_ARCH_OMAP4)
 	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
 		if (ret == 0) {
 			ret = sysdev_class_register(&omap_gpio_sysclass);
@@ -1887,7 +1953,7 @@ static int dbg_gpio_show(struct seq_file *s, void *unused)
 
 			irqstat = irq_desc[irq].status;
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP24XX) ||	\
-		defined(CONFIG_ARCH_OMAP34XX)
+		defined(CONFIG_ARCH_OMAP34XX) || defined(CONFIG_ARCH_OMAP4)
 			if (is_in && ((bank->suspend_wakeup & mask)
 					|| irqstat & IRQ_TYPE_SENSE_MASK)) {
 				char	*trigger = NULL;
diff --git a/arch/arm/plat-omap/include/mach/clock.h b/arch/arm/plat-omap/include/mach/clock.h
index 073a2c5..c20e02e 100644
--- a/arch/arm/plat-omap/include/mach/clock.h
+++ b/arch/arm/plat-omap/include/mach/clock.h
@@ -22,7 +22,8 @@ struct clkops {
 	void			(*disable)(struct clk *);
 };
 
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+		defined(CONFIG_ARCH_OMAP4)
 
 struct clksel_rate {
 	u32			val;
@@ -51,7 +52,7 @@ struct dpll_data {
 	u8			max_divider;
 	u32			max_tolerance;
 	u16			max_multiplier;
-#  if defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 	u8			modes;
 	void __iomem		*autoidle_reg;
 	void __iomem		*idlest_reg;
@@ -83,7 +84,8 @@ struct clk {
 	void			(*init)(struct clk *);
 	__u8			enable_bit;
 	__s8			usecount;
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+		defined(CONFIG_ARCH_OMAP4)
 	u8			fixed_div;
 	void __iomem		*clksel_reg;
 	u32			clksel_mask;
diff --git a/arch/arm/plat-omap/include/mach/common.h b/arch/arm/plat-omap/include/mach/common.h
index 0ecf36d..4b18833 100644
--- a/arch/arm/plat-omap/include/mach/common.h
+++ b/arch/arm/plat-omap/include/mach/common.h
@@ -62,6 +62,7 @@ struct omap_globals {
 void omap2_set_globals_242x(void);
 void omap2_set_globals_243x(void);
 void omap2_set_globals_343x(void);
+void omap2_set_globals_443x(void);
 
 /* These get called from omap2_set_globals_xxxx(), do not call these */
 void omap2_set_globals_tap(struct omap_globals *);
diff --git a/arch/arm/plat-omap/include/mach/control.h b/arch/arm/plat-omap/include/mach/control.h
index 269147f..f45ec62 100644
--- a/arch/arm/plat-omap/include/mach/control.h
+++ b/arch/arm/plat-omap/include/mach/control.h
@@ -1,9 +1,9 @@
 /*
  * arch/arm/plat-omap/include/mach/control.h
  *
- * OMAP2/3 System Control Module definitions
+ * OMAP2/3/4 System Control Module definitions
  *
- * Copyright (C) 2007-2008 Texas Instruments, Inc.
+ * Copyright (C) 2007-2009 Texas Instruments, Inc.
  * Copyright (C) 2007-2008 Nokia Corporation
  *
  * Written by Paul Walmsley
@@ -190,7 +190,8 @@
 #define OMAP2_PBIASLITEVMODE0		(1 << 0)
 
 #ifndef __ASSEMBLY__
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+		defined(CONFIG_ARCH_OMAP4)
 extern void __iomem *omap_ctrl_base_get(void);
 extern u8 omap_ctrl_readb(u16 offset);
 extern u16 omap_ctrl_readw(u16 offset);
diff --git a/arch/arm/plat-omap/include/mach/cpu.h b/arch/arm/plat-omap/include/mach/cpu.h
index 98b1442..fc60c4e 100644
--- a/arch/arm/plat-omap/include/mach/cpu.h
+++ b/arch/arm/plat-omap/include/mach/cpu.h
@@ -5,8 +5,12 @@
  *
  * Copyright (C) 2004, 2008 Nokia Corporation
  *
+ * Copyright (C) 2009 Texas Instruments.
+ *
  * Written by Tony Lindgren <tony.lindgren@nokia.com>
  *
+ * Added OMAP4 specific defines - Santosh Shilimkar<santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -155,6 +159,8 @@ IS_OMAP_SUBCLASS(343x, 0x343)
 #define cpu_is_omap243x()		0
 #define cpu_is_omap34xx()		0
 #define cpu_is_omap343x()		0
+#define cpu_is_omap44xx()		0
+#define cpu_is_omap443x()		0
 
 #if defined(MULTI_OMAP1)
 # if defined(CONFIG_ARCH_OMAP730)
@@ -348,12 +354,21 @@ IS_OMAP_TYPE(3430, 0x3430)
 # define cpu_is_omap3430()		is_omap3430()
 #endif
 
+# if defined(CONFIG_ARCH_OMAP4)
+# undef cpu_is_omap44xx
+# undef cpu_is_omap443x
+# define cpu_is_omap44xx()		1
+# define cpu_is_omap443x()		1
+# endif
+
 /* Macros to detect if we have OMAP1 or OMAP2 */
 #define cpu_class_is_omap1()	(cpu_is_omap7xx() || cpu_is_omap15xx() || \
 				cpu_is_omap16xx())
-#define cpu_class_is_omap2()	(cpu_is_omap24xx() || cpu_is_omap34xx())
+#define cpu_class_is_omap2()	(cpu_is_omap24xx() || cpu_is_omap34xx() || \
+				cpu_is_omap44xx())
 
-#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+			defined(CONFIG_ARCH_OMAP4)
 
 /* Various silicon revisions for omap2 */
 #define OMAP242X_CLASS		0x24200024
@@ -370,6 +385,8 @@ IS_OMAP_TYPE(3430, 0x3430)
 #define OMAP3430_REV_ES3_0	0x34303034
 #define OMAP3430_REV_ES3_1	0x34304034
 
+#define OMAP443X_CLASS		0x44300034
+
 /*
  * omap_chip bits
  *
diff --git a/arch/arm/plat-omap/include/mach/debug-macro.S b/arch/arm/plat-omap/include/mach/debug-macro.S
index 1b11f5c..ac24050 100644
--- a/arch/arm/plat-omap/include/mach/debug-macro.S
+++ b/arch/arm/plat-omap/include/mach/debug-macro.S
@@ -36,7 +36,7 @@
 		add	\rx, \rx, #0x00004000	@ UART 3
 #endif
 
-#elif	CONFIG_ARCH_OMAP3
+#elif defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4)
 		moveq	\rx, #0x48000000	@ physical base address
 		movne	\rx, #0xd8000000	@ virtual base
 		orr	\rx, \rx, #0x0006a000
diff --git a/arch/arm/plat-omap/include/mach/dma.h b/arch/arm/plat-omap/include/mach/dma.h
index 19df76f..8c1eae8 100644
--- a/arch/arm/plat-omap/include/mach/dma.h
+++ b/arch/arm/plat-omap/include/mach/dma.h
@@ -48,6 +48,7 @@
 /* Hardware registers for omap2 and omap3 */
 #define OMAP24XX_DMA4_BASE		(L4_24XX_BASE + 0x56000)
 #define OMAP34XX_DMA4_BASE		(L4_34XX_BASE + 0x56000)
+#define OMAP44XX_DMA4_BASE		(L4_44XX_BASE + 0x56000)
 
 #define OMAP_DMA4_REVISION		0x00
 #define OMAP_DMA4_GCR			0x78
diff --git a/arch/arm/plat-omap/include/mach/entry-macro.S b/arch/arm/plat-omap/include/mach/entry-macro.S
index 33256a0..00f45c0 100644
--- a/arch/arm/plat-omap/include/mach/entry-macro.S
+++ b/arch/arm/plat-omap/include/mach/entry-macro.S
@@ -3,6 +3,9 @@
  *
  * Low-level IRQ helper macros for OMAP-based platforms
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This file is licensed under  the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
  * warranty of any kind, whether express or implied.
@@ -10,6 +13,7 @@
 #include <mach/hardware.h>
 #include <mach/io.h>
 #include <mach/irqs.h>
+#include <asm/hardware/gic.h>
 
 #if defined(CONFIG_ARCH_OMAP1)
 
@@ -56,7 +60,8 @@
 		.endm
 
 #endif
-#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
+#if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX) || \
+			defined(CONFIG_ARCH_OMAP4)
 
 #include <mach/omap24xx.h>
 #include <mach/omap34xx.h>
@@ -67,7 +72,9 @@
 #elif defined(CONFIG_ARCH_OMAP34XX)
 #define OMAP2_VA_IC_BASE		IO_ADDRESS(OMAP34XX_IC_BASE)
 #endif
-
+#if defined(CONFIG_ARCH_OMAP4)
+#include <mach/omap44xx.h>
+#endif
 #define INTCPS_SIR_IRQ_OFFSET	0x0040		/* Active interrupt offset */
 #define	ACTIVEIRQ_MASK		0x7f		/* Active interrupt bits */
 
@@ -80,6 +87,7 @@
 		.macro  arch_ret_to_user, tmp1, tmp2
 		.endm
 
+#ifndef CONFIG_ARCH_OMAP4
 		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
 		ldr	\base, =OMAP2_VA_IC_BASE
 		ldr	\irqnr, [\base, #0x98] /* IRQ pending reg 1 */
@@ -95,6 +103,40 @@
 		and	\irqnr, \irqnr, #ACTIVEIRQ_MASK /* Clear spurious bits */
 
 		.endm
+#else
+		/*
+		 * The interrupt numbering scheme is defined in the
+		 * interrupt controller spec.  To wit:
+		 *
+		 * Interrupts 0-15 are IPI
+		 * 16-28 are reserved
+		 * 29-31 are local.  We allow 30 to be used for the watchdog.
+		 * 32-1020 are global
+		 * 1021-1022 are reserved
+		 * 1023 is "spurious" (no interrupt)
+		 *
+		 * For now, we ignore all local interrupts so only return an
+		 * interrupt if it's between 30 and 1020.  The test_for_ipi
+		 * routine below will pick up on IPIs.
+		 * A simple read from the controller will tell us the number
+		 * of the highest priority enabled interrupt.
+		 * We then just need to check whether it is in the
+		 * valid range for an IRQ (30-1020 inclusive).
+		 */
+		.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
+		ldr     \base, =OMAP44XX_VA_GIC_CPU_BASE
+		ldr     \irqstat, [\base, #GIC_CPU_INTACK]
+
+		ldr     \tmp, =1021
+
+		bic     \irqnr, \irqstat, #0x1c00
+
+		cmp     \irqnr, #29
+		cmpcc   \irqnr, \irqnr
+		cmpne   \irqnr, \tmp
+		cmpcs   \irqnr, \irqnr
+		.endm
+#endif
 
 		.macro	irq_prio_table
 		.endm
diff --git a/arch/arm/plat-omap/include/mach/hardware.h b/arch/arm/plat-omap/include/mach/hardware.h
index 3dc423e..26c1fbf 100644
--- a/arch/arm/plat-omap/include/mach/hardware.h
+++ b/arch/arm/plat-omap/include/mach/hardware.h
@@ -285,5 +285,6 @@
 #include "omap16xx.h"
 #include "omap24xx.h"
 #include "omap34xx.h"
+#include "omap44xx.h"
 
 #endif	/* __ASM_ARCH_OMAP_HARDWARE_H */
diff --git a/arch/arm/plat-omap/include/mach/io.h b/arch/arm/plat-omap/include/mach/io.h
index 0610d7e..3b28147 100644
--- a/arch/arm/plat-omap/include/mach/io.h
+++ b/arch/arm/plat-omap/include/mach/io.h
@@ -6,6 +6,9 @@
  * Copied from arch/arm/mach-sa1100/include/mach/io.h
  * Copyright (C) 1997-1999 Russell King
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2 of the License, or (at your
@@ -157,6 +160,40 @@
 #define DSP_MMU_34XX_VIRT	0xe2000000
 #define DSP_MMU_34XX_SIZE	SZ_4K
 
+
+#elif defined(CONFIG_ARCH_OMAP4)
+/* We map both L3 and L4 on OMAP4 */
+#define L3_44XX_PHYS		L3_44XX_BASE
+#define L3_44XX_VIRT		0xd4000000
+#define L3_44XX_SIZE		SZ_1M
+
+#define L4_44XX_PHYS		L4_44XX_BASE
+#define L4_44XX_VIRT		0xda000000
+#define L4_44XX_SIZE		SZ_4M
+
+
+#define L4_WK_44XX_PHYS		L4_WK_44XX_BASE
+#define L4_WK_44XX_VIRT		0xda300000
+#define L4_WK_44XX_SIZE		SZ_1M
+
+#define L4_PER_44XX_PHYS	L4_PER_44XX_BASE
+#define L4_PER_44XX_VIRT	0xd8000000
+#define L4_PER_44XX_SIZE	SZ_4M
+
+#define L4_EMU_44XX_PHYS	L4_EMU_44XX_BASE
+#define L4_EMU_44XX_VIRT	0xe4000000
+#define L4_EMU_44XX_SIZE	SZ_64M
+
+#define OMAP44XX_GPMC_PHYS	OMAP44XX_GPMC_BASE
+#define OMAP44XX_GPMC_VIRT	0xe0000000
+#define OMAP44XX_GPMC_SIZE	SZ_1M
+
+
+#define IO_OFFSET		0x90000000
+#define __IO_ADDRESS(pa)	((pa) + IO_OFFSET)/* Works for L3 and L4 */
+#define __OMAP2_IO_ADDRESS(pa)	((pa) + IO_OFFSET)/* Works for L3 and L4 */
+#define io_v2p(va)		((va) - IO_OFFSET)/* Works for L3 and L4 */
+
 #endif
 
 #define IO_ADDRESS(pa)		IOMEM(__IO_ADDRESS(pa))
diff --git a/arch/arm/plat-omap/include/mach/irqs.h b/arch/arm/plat-omap/include/mach/irqs.h
index 7f57ee6..5bc331e 100644
--- a/arch/arm/plat-omap/include/mach/irqs.h
+++ b/arch/arm/plat-omap/include/mach/irqs.h
@@ -4,6 +4,9 @@
  *  Copyright (C) Greg Lonnon 2001
  *  Updated for OMAP-1610 by Tony Lindgren <tony@atomide.com>
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -422,6 +425,92 @@
 
 #define	INT_34XX_BENCH_MPU_EMUL	3
 
+
+#define IRQ_GIC_START		32
+
+#define INT_44XX_BENCH_MPU_EMUL	(3 + IRQ_GIC_START)
+#define INT_44XX_SSM_ABORT_IRQ	(6 + IRQ_GIC_START)
+#define INT_44XX_SYS_NIRQ	(7 + IRQ_GIC_START)
+#define INT_44XX_D2D_FW_IRQ	(8 + IRQ_GIC_START)
+#define INT_44XX_PRCM_MPU_IRQ	(11 + IRQ_GIC_START)
+#define INT_44XX_SDMA_IRQ0	(12 + IRQ_GIC_START)
+#define INT_44XX_SDMA_IRQ1	(13 + IRQ_GIC_START)
+#define INT_44XX_SDMA_IRQ2	(14 + IRQ_GIC_START)
+#define INT_44XX_SDMA_IRQ3	(15 + IRQ_GIC_START)
+#define INT_44XX_ISS_IRQ	(24 + IRQ_GIC_START)
+#define INT_44XX_DSS_IRQ	(25 + IRQ_GIC_START)
+#define INT_44XX_MAIL_U0_MPU	(26 + IRQ_GIC_START)
+#define INT_44XX_DSP_MMU	(28 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER1	(37 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER2	(38 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER3	(39 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER4	(40 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER5	(41 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER6	(42 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER7	(43 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER8	(44 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER9	(45 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER10	(46 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER11	(47 + IRQ_GIC_START)
+#define INT_44XX_GPTIMER12	(95 + IRQ_GIC_START)
+#define INT_44XX_SHA1MD5	(51 + IRQ_GIC_START)
+#define INT_44XX_I2C1_IRQ	(56 + IRQ_GIC_START)
+#define INT_44XX_I2C2_IRQ	(57 + IRQ_GIC_START)
+#define INT_44XX_HDQ_IRQ	(58 + IRQ_GIC_START)
+#define INT_44XX_SPI1_IRQ	(65 + IRQ_GIC_START)
+#define INT_44XX_SPI2_IRQ	(66 + IRQ_GIC_START)
+#define INT_44XX_HSI_1_IRQ0	(67 + IRQ_GIC_START)
+#define INT_44XX_HSI_2_IRQ1	(68 + IRQ_GIC_START)
+#define INT_44XX_HSI_1_DMAIRQ	(71 + IRQ_GIC_START)
+#define INT_44XX_UART1_IRQ	(72 + IRQ_GIC_START)
+#define INT_44XX_UART2_IRQ	(73 + IRQ_GIC_START)
+#define INT_44XX_UART3_IRQ	(74 + IRQ_GIC_START)
+#define INT_44XX_UART4_IRQ	(70 + IRQ_GIC_START)
+#define INT_44XX_USB_IRQ_NISO	(76 + IRQ_GIC_START)
+#define INT_44XX_USB_IRQ_ISO	(77 + IRQ_GIC_START)
+#define INT_44XX_USB_IRQ_HGEN	(78 + IRQ_GIC_START)
+#define INT_44XX_USB_IRQ_HSOF	(79 + IRQ_GIC_START)
+#define INT_44XX_USB_IRQ_OTG	(80 + IRQ_GIC_START)
+#define INT_44XX_MCBSP4_IRQ_TX	(81 + IRQ_GIC_START)
+#define INT_44XX_MCBSP4_IRQ_RX	(82 + IRQ_GIC_START)
+#define INT_44XX_MMC_IRQ	(83 + IRQ_GIC_START)
+#define INT_44XX_MMC2_IRQ	(86 + IRQ_GIC_START)
+#define INT_44XX_MCBSP2_IRQ_TX	(89 + IRQ_GIC_START)
+#define INT_44XX_MCBSP2_IRQ_RX	(90 + IRQ_GIC_START)
+#define INT_44XX_SPI3_IRQ	(91 + IRQ_GIC_START)
+#define INT_44XX_SPI5_IRQ	(69 + IRQ_GIC_START)
+
+#define INT_44XX_MCBSP5_IRQ	(16 + IRQ_GIC_START)
+#define INT_44xX_MCBSP1_IRQ	(17 + IRQ_GIC_START)
+#define INT_44XX_MCBSP2_IRQ	(22 + IRQ_GIC_START)
+#define INT_44XX_MCBSP3_IRQ	(23 + IRQ_GIC_START)
+#define INT_44XX_MCBSP4_IRQ	(27 + IRQ_GIC_START)
+#define INT_44XX_HS_USB_MC	(92 + IRQ_GIC_START)
+#define INT_44XX_HS_USB_DMA	(93 + IRQ_GIC_START)
+
+#define INT_44XX_GPIO_BANK1	(29 + IRQ_GIC_START)
+#define INT_44XX_GPIO_BANK2	(30 + IRQ_GIC_START)
+#define INT_44XX_GPIO_BANK3	(31 + IRQ_GIC_START)
+#define INT_44XX_GPIO_BANK4	(32 + IRQ_GIC_START)
+#define INT_44XX_GPIO_BANK5	(33 + IRQ_GIC_START)
+#define INT_44XX_GPIO_BANK6	(34 + IRQ_GIC_START)
+#define INT_44XX_USIM_IRQ	(35 + IRQ_GIC_START)
+#define INT_44XX_WDT3_IRQ	(36 + IRQ_GIC_START)
+#define INT_44XX_SPI4_IRQ	(48 + IRQ_GIC_START)
+#define INT_44XX_SHA1MD52_IRQ	(49 + IRQ_GIC_START)
+#define INT_44XX_FPKA_READY_IRQ	(50 + IRQ_GIC_START)
+#define INT_44XX_SHA1MD51_IRQ	(51 + IRQ_GIC_START)
+#define INT_44XX_RNG_IRQ	(52 + IRQ_GIC_START)
+#define INT_44XX_I2C3_IRQ	(61 + IRQ_GIC_START)
+#define INT_44XX_FPKA_ERROR_IRQ	(64 + IRQ_GIC_START)
+#define INT_44XX_PBIAS_IRQ	(75 + IRQ_GIC_START)
+#define INT_44XX_OHCI_IRQ	(76 + IRQ_GIC_START)
+#define INT_44XX_EHCI_IRQ	(77 + IRQ_GIC_START)
+#define INT_44XX_TLL_IRQ	(78 + IRQ_GIC_START)
+#define INT_44XX_PARTHASH_IRQ	(79 + IRQ_GIC_START)
+#define INT_44XX_MMC3_IRQ	(94 + IRQ_GIC_START)
+
+
 /* Max. 128 level 2 IRQs (OMAP1610), 192 GPIOs (OMAP730/850) and
  * 16 MPUIO lines */
 #define OMAP_MAX_GPIO_LINES	192
diff --git a/arch/arm/plat-omap/include/mach/memory.h b/arch/arm/plat-omap/include/mach/memory.h
index 99ed564..9ad41dc 100644
--- a/arch/arm/plat-omap/include/mach/memory.h
+++ b/arch/arm/plat-omap/include/mach/memory.h
@@ -38,7 +38,8 @@
  */
 #if defined(CONFIG_ARCH_OMAP1)
 #define PHYS_OFFSET		UL(0x10000000)
-#elif defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
+#elif defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) || \
+			defined(CONFIG_ARCH_OMAP4)
 #define PHYS_OFFSET		UL(0x80000000)
 #endif
 
diff --git a/arch/arm/plat-omap/include/mach/omap44xx.h b/arch/arm/plat-omap/include/mach/omap44xx.h
new file mode 100644
index 0000000..15dec7f
--- /dev/null
+++ b/arch/arm/plat-omap/include/mach/omap44xx.h
@@ -0,0 +1,46 @@
+/*:
+ * Address mappings and base address for OMAP4 interconnects
+ * and peripherals.
+ *
+ * Copyright (C) 2009 Texas Instruments
+ *
+ * Author: Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARCH_OMAP44XX_H
+#define __ASM_ARCH_OMAP44XX_H
+
+/*
+ * Please place only base defines here and put the rest in device
+ * specific headers.
+ */
+#define L4_44XX_BASE			0x4a000000
+#define L4_WK_44XX_BASE			0x4a300000
+#define L4_PER_44XX_BASE		0x48000000
+#define L4_EMU_44XX_BASE		0x54000000
+#define L3_44XX_BASE			0x44000000
+#define OMAP4430_32KSYNCT_BASE		0x4a304000
+#define OMAP4430_CM_BASE		0x4a004000
+#define OMAP4430_PRM_BASE		0x48306000
+#define OMAP44XX_GPMC_BASE		0x50000000
+#define OMAP443X_SCM_BASE		0x4a002000
+#define OMAP443X_CTRL_BASE		OMAP443X_SCM_BASE
+#define OMAP44XX_IC_BASE		0x48200000
+#define OMAP44XX_IVA_INTC_BASE		0x40000000
+#define IRQ_SIR_IRQ			0x0040
+#define OMAP44XX_GIC_DIST_BASE		0x48241000
+#define OMAP44XX_GIC_CPU_BASE		0x48240100
+#define OMAP44XX_VA_GIC_CPU_BASE	IO_ADDRESS(OMAP44XX_GIC_CPU_BASE)
+#define OMAP44XX_SCU_BASE		0x48240000
+#define OMAP44XX_VA_SCU_BASE		IO_ADDRESS(OMAP44XX_SCU_BASE)
+#define OMAP44XX_LOCAL_TWD_BASE		0x48240600
+#define OMAP44XX_VA_LOCAL_TWD_BASE	IO_ADDRESS(OMAP44XX_LOCAL_TWD_BASE)
+#define OMAP44XX_LOCAL_TWD_SIZE		0x00000100
+#define OMAP44XX_WKUPGEN_BASE		0x48281000
+#define OMAP44XX_VA_WKUPGEN_BASE	IO_ADDRESS(OMAP44XX_WKUPGEN_BASE)
+
+#endif /* __ASM_ARCH_OMAP44XX_H */
+
diff --git a/arch/arm/plat-omap/include/mach/serial.h b/arch/arm/plat-omap/include/mach/serial.h
index 8a676a0..e37894e 100644
--- a/arch/arm/plat-omap/include/mach/serial.h
+++ b/arch/arm/plat-omap/include/mach/serial.h
@@ -1,5 +1,8 @@
 /*
- *  arch/arm/plat-omap/include/mach/serial.h
+ * arch/arm/plat-omap/include/mach/serial.h
+ *
+ * Copyright (C) 2009 Texas Instruments
+ * Addded OMAP4 support- Santosh Shilimkar <santosh.shilimkar@ti.com>
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -15,19 +18,28 @@
 #define OMAP_UART1_BASE		0xfffb0000
 #define OMAP_UART2_BASE		0xfffb0800
 #define OMAP_UART3_BASE		0xfffb9800
+#define OMAP_MAX_NR_PORTS	3
 #elif defined(CONFIG_ARCH_OMAP2)
 /* OMAP2 serial ports */
 #define OMAP_UART1_BASE		0x4806a000
 #define OMAP_UART2_BASE		0x4806c000
 #define OMAP_UART3_BASE		0x4806e000
+#define OMAP_MAX_NR_PORTS	3
 #elif defined(CONFIG_ARCH_OMAP3)
 /* OMAP3 serial ports */
 #define OMAP_UART1_BASE		0x4806a000
 #define OMAP_UART2_BASE		0x4806c000
 #define OMAP_UART3_BASE		0x49020000
+#define OMAP_MAX_NR_PORTS	3
+#elif defined(CONFIG_ARCH_OMAP4)
+/* OMAP4 serial ports */
+#define OMAP_UART1_BASE		0x4806a000
+#define OMAP_UART2_BASE		0x4806c000
+#define OMAP_UART3_BASE		0x48020000
+#define OMAP_UART4_BASE		0x4806e000
+#define OMAP_MAX_NR_PORTS	4
 #endif
 
-#define OMAP_MAX_NR_PORTS	3
 #define OMAP1510_BASE_BAUD	(12000000/16)
 #define OMAP16XX_BASE_BAUD	(48000000/16)
 #define OMAP24XX_BASE_BAUD	(48000000/16)
diff --git a/arch/arm/plat-omap/io.c b/arch/arm/plat-omap/io.c
index af326ef..9b42d72 100644
--- a/arch/arm/plat-omap/io.c
+++ b/arch/arm/plat-omap/io.c
@@ -1,3 +1,14 @@
+/*
+ * Common io.c file
+ * This file is created by Russell King <rmk+kernel@arm.linux.org.uk>
+ *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mm.h>
@@ -7,6 +18,7 @@
 #include <mach/omap16xx.h>
 #include <mach/omap24xx.h>
 #include <mach/omap34xx.h>
+#include <mach/omap44xx.h>
 
 #define BETWEEN(p,st,sz)	((p) >= (st) && (p) < ((st) + (sz)))
 #define XLATE(p,pst,vst)	((void __iomem *)((p) - (pst) + (vst)))
@@ -92,7 +104,22 @@ void __iomem *omap_ioremap(unsigned long p, size_t size, unsigned int type)
 			return XLATE(p, L4_EMU_34XX_PHYS, L4_EMU_34XX_VIRT);
 	}
 #endif
-
+#ifdef CONFIG_ARCH_OMAP4
+	if (cpu_is_omap44xx()) {
+		if (BETWEEN(p, L3_44XX_PHYS, L3_44XX_SIZE))
+			return XLATE(p, L3_44XX_PHYS, L3_44XX_VIRT);
+		if (BETWEEN(p, L4_44XX_PHYS, L4_44XX_SIZE))
+			return XLATE(p, L4_44XX_PHYS, L4_44XX_VIRT);
+		if (BETWEEN(p, L4_WK_44XX_PHYS, L4_WK_44XX_SIZE))
+			return XLATE(p, L4_WK_44XX_PHYS, L4_WK_44XX_VIRT);
+		if (BETWEEN(p, OMAP44XX_GPMC_PHYS, OMAP44XX_GPMC_SIZE))
+			return XLATE(p, OMAP44XX_GPMC_PHYS, OMAP44XX_GPMC_VIRT);
+		if (BETWEEN(p, L4_PER_44XX_PHYS, L4_PER_44XX_SIZE))
+			return XLATE(p, L4_PER_44XX_PHYS, L4_PER_44XX_VIRT);
+		if (BETWEEN(p, L4_EMU_44XX_PHYS, L4_EMU_44XX_SIZE))
+			return XLATE(p, L4_EMU_44XX_PHYS, L4_EMU_44XX_VIRT);
+	}
+#endif
 	return __arm_ioremap(p, size, type);
 }
 EXPORT_SYMBOL(omap_ioremap);
diff --git a/arch/arm/plat-omap/mux.c b/arch/arm/plat-omap/mux.c
index 80b040f..8d329fb 100644
--- a/arch/arm/plat-omap/mux.c
+++ b/arch/arm/plat-omap/mux.c
@@ -54,6 +54,9 @@ int __init_or_module omap_cfg_reg(const unsigned long index)
 {
 	struct pin_config *reg;
 
+	if (cpu_is_omap44xx())
+		return 0;
+
 	if (mux_cfg == NULL) {
 		printk(KERN_ERR "Pin mux table not initialized\n");
 		return -ENODEV;
diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c
index 102c9f7..67a9070 100644
--- a/arch/arm/plat-omap/sram.c
+++ b/arch/arm/plat-omap/sram.c
@@ -6,6 +6,9 @@
  * Copyright (C) 2005 Nokia Corporation
  * Written by Tony Lindgren <tony@atomide.com>
  *
+ * Copyright (C) 2009 Texas Instruments
+ * Added OMAP4 support - Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -44,6 +47,8 @@
 #define OMAP3_SRAM_VA           0xd7000000
 #define OMAP3_SRAM_PUB_PA       0x40208000
 #define OMAP3_SRAM_PUB_VA       0xd7008000
+#define OMAP4_SRAM_PA		0x40200000		/*0x402f0000*/
+#define OMAP4_SRAM_VA		0xd7000000		/*0xd70f0000*/
 
 #if defined(CONFIG_ARCH_OMAP24XX) || defined(CONFIG_ARCH_OMAP34XX)
 #define SRAM_BOOTLOADER_SZ	0x00
@@ -87,6 +92,10 @@ static int is_sram_locked(void)
 {
 	int type = 0;
 
+	if (cpu_is_omap44xx())
+		/* Not yet supported */
+		return 0;
+
 	if (cpu_is_omap242x())
 		type = omap_rev() & OMAP2_DEVICETYPE_MASK;
 
@@ -135,6 +144,10 @@ void __init omap_detect_sram(void)
 				omap_sram_base = OMAP3_SRAM_VA;
 				omap_sram_start = OMAP3_SRAM_PA;
 				omap_sram_size = 0x10000; /* 64K */
+			} else if (cpu_is_omap44xx()) {
+				omap_sram_base = OMAP4_SRAM_VA;
+				omap_sram_start = OMAP4_SRAM_PA;
+				omap_sram_size = 0x8000; /* 32K */
 			} else {
 				omap_sram_base = OMAP2_SRAM_VA;
 				omap_sram_start = OMAP2_SRAM_PA;
@@ -203,6 +216,12 @@ void __init omap_map_sram(void)
 		omap_sram_io_desc[0].pfn = __phys_to_pfn(base);
 	}
 
+	if (cpu_is_omap44xx()) {
+		omap_sram_io_desc[0].virtual = OMAP4_SRAM_VA;
+		base = OMAP4_SRAM_PA;
+		base = ROUND_DOWN(base, PAGE_SIZE);
+		omap_sram_io_desc[0].pfn = __phys_to_pfn(base);
+	}
 	omap_sram_io_desc[0].length = 1024 * 1024;	/* Use section desc */
 	iotable_init(omap_sram_io_desc, ARRAY_SIZE(omap_sram_io_desc));
 
@@ -391,6 +410,8 @@ int __init omap_sram_init(void)
 		omap243x_sram_init();
 	else if (cpu_is_omap34xx())
 		omap34xx_sram_init();
+	else if (cpu_is_omap44xx())
+		omap34xx_sram_init(); /* FIXME: */
 
 	return 0;
 }
-- 
cgit v0.10.2


From 748303850d589038f8ff9f5c7014afb006210b1f Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 28 May 2009 14:16:04 -0700
Subject: ARM: OMAP4: Clock stubs since CLKDEV not in yet.

This patch update the common clock.c file for OMAP4. The clk_get() and
clk_put() functions are moved to  common place in arch/arm/common/clkdev.c
Since on current OMAP4 platform clk management is still not supported, the
platform file is stubbed with those functions.

Once the framework is ready, this WILL be replaced with a full
clkdev implementation.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c
index 29efc27..0fcfcd6 100644
--- a/arch/arm/plat-omap/clock.c
+++ b/arch/arm/plat-omap/clock.c
@@ -36,10 +36,40 @@ static struct clk_functions *arch_clock;
  * Standard clock functions defined in include/linux/clk.h
  *-------------------------------------------------------------------------*/
 
+/* This functions is moved to arch/arm/common/clkdev.c. For OMAP4 since
+ * clock framework is not up , it is defined here to avoid rework in
+ * every driver. Also dummy prcm reset function is added */
+
+/* Dummy hooks only for OMAP4.For rest OMAPs, common clkdev is used */
+#if defined(CONFIG_ARCH_OMAP4)
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	return NULL;
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+}
+EXPORT_SYMBOL(clk_put);
+
+void omap2_clk_prepare_for_reboot(void)
+{
+}
+EXPORT_SYMBOL(omap2_clk_prepare_for_reboot);
+
+void omap_prcm_arch_reset(char mode)
+{
+}
+EXPORT_SYMBOL(omap_prcm_arch_reset);
+#endif
 int clk_enable(struct clk *clk)
 {
 	unsigned long flags;
 	int ret = 0;
+	if (cpu_is_omap44xx())
+		/* OMAP4 clk framework not supported yet */
+		return 0;
 
 	if (clk == NULL || IS_ERR(clk))
 		return -EINVAL;
@@ -140,6 +170,9 @@ int clk_set_parent(struct clk *clk, struct clk *parent)
 	unsigned long flags;
 	int ret = -EINVAL;
 
+	if (cpu_is_omap44xx())
+	/* OMAP4 clk framework not supported yet */
+		return 0;
 	if (clk == NULL || IS_ERR(clk) || parent == NULL || IS_ERR(parent))
 		return ret;
 
-- 
cgit v0.10.2


From 46ba0abfe1ac2bd9608d0fc9e914379be695aa5b Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 28 May 2009 14:16:05 -0700
Subject: ARM: OMAP4: Add support for 4430 SDP

This patch updates the Makefile and Kconfig entries for OMAP4. The OMAP4430 SDP
board file supports only minimal set of drivers.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e84729b..676d10d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -127,6 +127,7 @@ endif
  machine-$(CONFIG_ARCH_OMAP1)	   := omap1
  machine-$(CONFIG_ARCH_OMAP2)	   := omap2
  machine-$(CONFIG_ARCH_OMAP3)	   := omap2
+ machine-$(CONFIG_ARCH_OMAP4)	   := omap2
     plat-$(CONFIG_ARCH_OMAP)	   := omap
  machine-$(CONFIG_ARCH_S3C2410)	   := s3c2410 s3c2400 s3c2412 s3c2440 s3c2442 s3c2443
  machine-$(CONFIG_ARCH_S3C24A0)	   := s3c24a0
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index b5c9088..a755eb5 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -25,7 +25,7 @@ config ARCH_OMAP3430
 	select ARCH_OMAP_OTG
 
 comment "OMAP Board Type"
-	depends on ARCH_OMAP2 || ARCH_OMAP3
+	depends on ARCH_OMAP2 || ARCH_OMAP3 || ARCH_OMAP4
 
 config MACH_OMAP_GENERIC
 	bool "Generic OMAP board"
@@ -75,3 +75,7 @@ config MACH_NOKIA_RX51
 config MACH_OMAP_ZOOM2
 	bool "OMAP3 Zoom2 board"
 	depends on ARCH_OMAP3 && ARCH_OMAP34XX
+
+config MACH_OMAP_4430SDP
+	bool "OMAP 4430 SDP board"
+	depends on ARCH_OMAP4
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 10e7c29..09fb3ad 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -58,6 +58,9 @@ obj-$(CONFIG_MACH_NOKIA_RX51)		+= board-rx51.o \
 obj-$(CONFIG_MACH_OMAP_ZOOM2)		+= board-zoom2.o \
 					   mmc-twl4030.o \
 					   board-zoom-debugboard.o
+
+obj-$(CONFIG_MACH_OMAP_4430SDP)		+= board-4430sdp.o
+
 # Platform specific device init code
 ifeq ($(CONFIG_USB_MUSB_SOC),y)
 obj-y					+= usb-musb.o
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
new file mode 100644
index 0000000..57e477b
--- /dev/null
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -0,0 +1,94 @@
+/*
+ * Board support file for OMAP4430 SDP.
+ *
+ * Copyright (C) 2009 Texas Instruments
+ *
+ * Author: Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * Based on mach-omap2/board-3430sdp.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <mach/board.h>
+#include <mach/common.h>
+#include <mach/control.h>
+#include <mach/timer-gp.h>
+#include <asm/hardware/gic.h>
+
+static struct platform_device sdp4430_lcd_device = {
+	.name		= "sdp4430_lcd",
+	.id		= -1,
+};
+
+static struct platform_device *sdp4430_devices[] __initdata = {
+	&sdp4430_lcd_device,
+};
+
+static struct omap_uart_config sdp4430_uart_config __initdata = {
+	.enabled_uarts	= (1 << 0) | (1 << 1) | (1 << 2),
+};
+
+static struct omap_lcd_config sdp4430_lcd_config __initdata = {
+	.ctrl_name	= "internal",
+};
+
+static struct omap_board_config_kernel sdp4430_config[] __initdata = {
+	{ OMAP_TAG_UART,	&sdp4430_uart_config },
+	{ OMAP_TAG_LCD,		&sdp4430_lcd_config },
+};
+
+static void __init gic_init_irq(void)
+{
+	gic_dist_init(0, IO_ADDRESS(OMAP44XX_GIC_DIST_BASE), 29);
+	gic_cpu_init(0, IO_ADDRESS(OMAP44XX_GIC_CPU_BASE));
+}
+
+static void __init omap_4430sdp_init_irq(void)
+{
+	omap2_init_common_hw(NULL);
+#ifdef CONFIG_OMAP_32K_TIMER
+	omap2_gp_clockevent_set_gptimer(1);
+#endif
+	gic_init_irq();
+	omap_gpio_init();
+}
+
+
+static void __init omap_4430sdp_init(void)
+{
+	platform_add_devices(sdp4430_devices, ARRAY_SIZE(sdp4430_devices));
+	omap_board_config = sdp4430_config;
+	omap_board_config_size = ARRAY_SIZE(sdp4430_config);
+	omap_serial_init();
+}
+
+static void __init omap_4430sdp_map_io(void)
+{
+	omap2_set_globals_443x();
+	omap2_map_common_io();
+}
+
+MACHINE_START(OMAP_4430SDP, "OMAP4430 4430SDP board")
+	/* Maintainer: Santosh Shilimkar - Texas Instruments Inc */
+	.phys_io	= 0x48000000,
+	.io_pg_offst	= ((0xd8000000) >> 18) & 0xfffc,
+	.boot_params	= 0x80000100,
+	.map_io		= omap_4430sdp_map_io,
+	.init_irq	= omap_4430sdp_init_irq,
+	.init_machine	= omap_4430sdp_init,
+	.timer		= &omap_timer,
+MACHINE_END
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 9dd68fa..89499e6 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -23,6 +23,11 @@ config ARCH_OMAP3
 	select CPU_V7
 	select COMMON_CLKDEV
 
+config ARCH_OMAP4
+	bool "TI OMAP4"
+	select CPU_V7
+	select ARM_GIC
+
 endchoice
 
 comment "OMAP Feature Selections"
@@ -128,13 +133,13 @@ config OMAP_MPU_TIMER
 
 config OMAP_32K_TIMER
 	bool "Use 32KHz timer"
-	depends on ARCH_OMAP16XX || ARCH_OMAP24XX || ARCH_OMAP34XX
+	depends on ARCH_OMAP16XX || ARCH_OMAP24XX || ARCH_OMAP34XX || ARCH_OMAP4
 	help
 	  Select this option if you want to enable the OMAP 32KHz timer.
 	  This timer saves power compared to the OMAP_MPU_TIMER, and has
 	  support for no tick during idle. The 32KHz timer provides less
 	  intra-tick resolution than OMAP_MPU_TIMER. The 32KHz timer is
-	  currently only available for OMAP16XX, 24XX and 34XX.
+	  currently only available for OMAP16XX, 24XX, 34XX and OMAP4.
 
 endchoice
 
@@ -149,7 +154,7 @@ config OMAP_32K_TIMER_HZ
 
 config OMAP_DM_TIMER
 	bool "Use dual-mode timer"
-	depends on ARCH_OMAP16XX || ARCH_OMAP24XX || ARCH_OMAP34XX
+	depends on ARCH_OMAP16XX || ARCH_OMAP24XX || ARCH_OMAP34XX || ARCH_OMAP4
 	help
 	 Select this option if you want to use OMAP Dual-Mode timers.
 
-- 
cgit v0.10.2


From b04e8975e70d9b2bc27c017f92b356b312692544 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 28 May 2009 14:16:05 -0700
Subject: ARM: OMAP4: Add defconfig for 4430 SDP

This patch adds the defconfig for OMAP4430 SDP platform.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/configs/omap_4430sdp_defconfig b/arch/arm/configs/omap_4430sdp_defconfig
new file mode 100644
index 0000000..67a3a77
--- /dev/null
+++ b/arch/arm/configs/omap_4430sdp_defconfig
@@ -0,0 +1,806 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.29
+# Fri April 19 19:58:24 20089
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+# CONFIG_ELF_CORE is not set
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_DAVINCI is not set
+CONFIG_ARCH_OMAP=y
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_W90X900 is not set
+
+#
+# TI OMAP Implementations
+#
+# CONFIG_ARCH_OMAP1 is not set
+# CONFIG_ARCH_OMAP2 is not set
+# CONFIG_ARCH_OMAP3 is not set
+CONFIG_ARCH_OMAP4=y
+
+#
+# OMAP Feature Selections
+#
+# CONFIG_OMAP_RESET_CLOCKS is not set
+# CONFIG_OMAP_COMPONENT_VERSION is not set
+# CONFIG_OMAP_GPIO_SWITCH is not set
+# CONFIG_OMAP_MUX is not set
+# CONFIG_OMAP_MCBSP is not set
+# CONFIG_OMAP_MBOX_FWK is not set
+# CONFIG_OMAP_MPU_TIMER is not set
+CONFIG_OMAP_32K_TIMER=y
+CONFIG_OMAP_32K_TIMER_HZ=128
+CONFIG_OMAP_DM_TIMER=y
+CONFIG_OMAP_LL_DEBUG_UART1=y
+# CONFIG_OMAP_LL_DEBUG_UART2 is not set
+# CONFIG_OMAP_LL_DEBUG_UART3 is not set
+
+
+
+#
+# OMAP Board Type
+#
+CONFIG_MACH_OMAP_4430SDP=y
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_32v6K=y
+CONFIG_CPU_V7=y
+CONFIG_CPU_32v7=y
+CONFIG_CPU_ABRT_EV7=y
+CONFIG_CPU_PABRT_IFAR=y
+CONFIG_CPU_CACHE_V7=y
+CONFIG_CPU_CACHE_VIPT=y
+CONFIG_CPU_COPY_V6=y
+CONFIG_CPU_TLB_V7=y
+CONFIG_CPU_HAS_ASID=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+# CONFIG_ARM_THUMB is not set
+# CONFIG_ARM_THUMBEE is not set
+# CONFIG_CPU_ICACHE_DISABLE is not set
+CONFIG_CPU_DCACHE_DISABLE=y
+# CONFIG_CPU_BPREDICT_DISABLE is not set
+CONFIG_HAS_TLS_REG=y
+# CONFIG_OUTER_CACHE is not set
+CONFIG_ARM_GIC=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+# CONFIG_PREEMPT is not set
+CONFIG_HZ=128
+CONFIG_AEABI=y
+# CONFIG_OABI_COMPAT is not set
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+# CONFIG_UNEVICTABLE_LRU is not set
+# CONFIG_LEDS is not set
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=/dev/ram0 rw mem=128M console=ttyS0,115200n8 initrd=0x81600000,20M ramdisk_size=20480"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_FREQ is not set
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+# CONFIG_FPE_NWFPE_XP is not set
+# CONFIG_FPE_FASTFPE is not set
+CONFIG_VFP=y
+CONFIG_VFPv3=y
+# CONFIG_NEON is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+CONFIG_BINFMT_MISC=y
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+# CONFIG_NET is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_MTD is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+CONFIG_DAB=y
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# CBUS support
+#
+# CONFIG_CBUS is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_FS_XATTR is not set
+# CONFIG_EXT4_FS is not set
+CONFIG_JBD=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_QUOTA=y
+CONFIG_PRINT_QUOTA_WARNING=y
+CONFIG_QUOTA_TREE=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_QUOTACTL=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SYSV68_PARTITION is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_ICEDCC is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_MANAGER=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=m
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+CONFIG_CRYPTO_CRC32C=y
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+CONFIG_CRYPTO_HW=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_CRC_CCITT=y
+# CONFIG_CRC16 is not set
+CONFIG_CRC_T10DIF=y
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From 086a377edc969aea6c761176a7e4ff68f264d6fe Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 7 May 2009 12:36:53 -0700
Subject: sysfs: file.c: use create_singlethread_workqueue()

We don't need a kernel thread per CPU for this application.

Acked-by: Alex Chiang <achiang@hp.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b1606e0..561a9c0 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -723,7 +723,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
 	mutex_unlock(&sysfs_workq_mutex);
 
 	if (sysfs_workqueue == NULL) {
-		sysfs_workqueue = create_workqueue("sysfsd");
+		sysfs_workqueue = create_singlethread_workqueue("sysfsd");
 		if (sysfs_workqueue == NULL) {
 			module_put(owner);
 			return -ENOMEM;
-- 
cgit v0.10.2


From 5c8563d773c0e9f0ac2a552e84806decd98ce732 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 28 May 2009 14:24:07 -0700
Subject: Driver Core: do not oops when driver_unregister() is called for
 unregistered drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We also fix a problem with cleaning up properly when initializing
drivers and devices, so checks like this will work successfully.

Portions of the patch by Linus and Greg and Ingo.

Reported-by: Ozan Çağlayan <ozan@pardus.org.tr>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index dc030f1..c659961 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -700,8 +700,10 @@ int bus_add_driver(struct device_driver *drv)
 	}
 
 	kobject_uevent(&priv->kobj, KOBJ_ADD);
-	return error;
+	return 0;
 out_unregister:
+	kfree(drv->p);
+	drv->p = NULL;
 	kobject_put(&priv->kobj);
 out_put_bus:
 	bus_put(bus);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4aa527b..1977d4b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -879,7 +879,7 @@ int device_add(struct device *dev)
 	}
 
 	if (!dev_name(dev))
-		goto done;
+		goto name_error;
 
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
 
@@ -978,6 +978,9 @@ done:
 	cleanup_device_parent(dev);
 	if (parent)
 		put_device(parent);
+name_error:
+	kfree(dev->p);
+	dev->p = NULL;
 	goto done;
 }
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index c51f11b..8ae0f63 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -257,6 +257,10 @@ EXPORT_SYMBOL_GPL(driver_register);
  */
 void driver_unregister(struct device_driver *drv)
 {
+	if (!drv || !drv->p) {
+		WARN(1, "Unexpected driver unregister!\n");
+		return;
+	}
 	driver_remove_groups(drv, drv->groups);
 	bus_remove_driver(drv);
 }
-- 
cgit v0.10.2


From a2928c42a5d69328c3578b41bd4d72f6658cf7dc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2009 14:55:04 -0300
Subject: perf_counter tools: Move symbol resolution classes from report to
 libperf

Will be used by perf top as well.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090528175504.GC4747@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 51b13f9..bd29a5c 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -298,6 +298,7 @@ LIB_H += util/help.h
 LIB_H += util/strbuf.h
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
+LIB_H += util/symbol.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
@@ -317,6 +318,7 @@ LIB_OBJS += util/strbuf.o
 LIB_OBJS += util/usage.o
 LIB_OBJS += util/wrapper.o
 LIB_OBJS += util/sigchain.o
+LIB_OBJS += util/symbol.o
 LIB_OBJS += util/pager.o
 
 BUILTIN_OBJS += builtin-help.o
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 9fdf822..04fc7ec 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1,13 +1,10 @@
 #include "util/util.h"
 #include "builtin.h"
 
-#include <libelf.h>
-#include <gelf.h>
-#include <elf.h>
-
 #include "util/list.h"
 #include "util/cache.h"
 #include "util/rbtree.h"
+#include "util/symbol.h"
 
 #include "perf.h"
 
@@ -62,305 +59,6 @@ typedef union event_union {
 	struct comm_event comm;
 } event_t;
 
-struct symbol {
-	struct rb_node		rb_node;
-	__u64			start;
-	__u64			end;
-	char			name[0];
-};
-
-static struct symbol *symbol__new(uint64_t start, uint64_t len, const char *name)
-{
-	struct symbol *self = malloc(sizeof(*self) + strlen(name) + 1);
-
-	if (self != NULL) {
-		self->start = start;
-		self->end   = start + len;
-		strcpy(self->name, name);
-	}
-
-	return self;
-}
-
-static void symbol__delete(struct symbol *self)
-{
-	free(self);
-}
-
-static size_t symbol__fprintf(struct symbol *self, FILE *fp)
-{
-	return fprintf(fp, " %llx-%llx %s\n",
-		       self->start, self->end, self->name);
-}
-
-struct dso {
-	struct list_head node;
-	struct rb_root	 syms;
-	char		 name[0];
-};
-
-static struct dso *dso__new(const char *name)
-{
-	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
-
-	if (self != NULL) {
-		strcpy(self->name, name);
-		self->syms = RB_ROOT;
-	}
-
-	return self;
-}
-
-static void dso__delete_symbols(struct dso *self)
-{
-	struct symbol *pos;
-	struct rb_node *next = rb_first(&self->syms);
-
-	while (next) {
-		pos = rb_entry(next, struct symbol, rb_node);
-		next = rb_next(&pos->rb_node);
-		symbol__delete(pos);
-	}
-}
-
-static void dso__delete(struct dso *self)
-{
-	dso__delete_symbols(self);
-	free(self);
-}
-
-static void dso__insert_symbol(struct dso *self, struct symbol *sym)
-{
-	struct rb_node **p = &self->syms.rb_node;
-	struct rb_node *parent = NULL;
-	const uint64_t ip = sym->start;
-	struct symbol *s;
-
-	while (*p != NULL) {
-		parent = *p;
-		s = rb_entry(parent, struct symbol, rb_node);
-		if (ip < s->start)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&sym->rb_node, parent, p);
-	rb_insert_color(&sym->rb_node, &self->syms);
-}
-
-static struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
-{
-	struct rb_node *n;
-
-	if (self == NULL)
-		return NULL;
-
-	n = self->syms.rb_node;
-
-	while (n) {
-		struct symbol *s = rb_entry(n, struct symbol, rb_node);
-
-		if (ip < s->start)
-			n = n->rb_left;
-		else if (ip > s->end)
-			n = n->rb_right;
-		else
-			return s;
-	}
-
-	return NULL;
-}
-
-/**
- * elf_symtab__for_each_symbol - iterate thru all the symbols
- *
- * @self: struct elf_symtab instance to iterate
- * @index: uint32_t index
- * @sym: GElf_Sym iterator
- */
-#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
-	for (index = 0, gelf_getsym(syms, index, &sym);\
-	     index < nr_syms; \
-	     index++, gelf_getsym(syms, index, &sym))
-
-static inline uint8_t elf_sym__type(const GElf_Sym *sym)
-{
-	return GELF_ST_TYPE(sym->st_info);
-}
-
-static inline int elf_sym__is_function(const GElf_Sym *sym)
-{
-	return elf_sym__type(sym) == STT_FUNC &&
-	       sym->st_name != 0 &&
-	       sym->st_shndx != SHN_UNDEF &&
-	       sym->st_size != 0;
-}
-
-static inline const char *elf_sym__name(const GElf_Sym *sym,
-					const Elf_Data *symstrs)
-{
-	return symstrs->d_buf + sym->st_name;
-}
-
-static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
-				    GElf_Shdr *shp, const char *name,
-				    size_t *index)
-{
-	Elf_Scn *sec = NULL;
-	size_t cnt = 1;
-
-	while ((sec = elf_nextscn(elf, sec)) != NULL) {
-		char *str;
-
-		gelf_getshdr(sec, shp);
-		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
-		if (!strcmp(name, str)) {
-			if (index)
-				*index = cnt;
-			break;
-		}
-		++cnt;
-	}
-
-	return sec;
-}
-
-static int dso__load_sym(struct dso *self, int fd, char *name)
-{
-	Elf_Data *symstrs;
-	uint32_t nr_syms;
-	int err = -1;
-	uint32_t index;
-	GElf_Ehdr ehdr;
-	GElf_Shdr shdr;
-	Elf_Data *syms;
-	GElf_Sym sym;
-	Elf_Scn *sec;
-	Elf *elf;
-	int nr = 0;
-
-	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
-	if (elf == NULL) {
-		fprintf(stderr, "%s: cannot read %s ELF file.\n",
-			__func__, name);
-		goto out_close;
-	}
-
-	if (gelf_getehdr(elf, &ehdr) == NULL) {
-		fprintf(stderr, "%s: cannot get elf header.\n", __func__);
-		goto out_elf_end;
-	}
-
-	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
-	if (sec == NULL)
-		sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
-
-	if (sec == NULL)
-		goto out_elf_end;
-
-	syms = elf_getdata(sec, NULL);
-	if (syms == NULL)
-		goto out_elf_end;
-
-	sec = elf_getscn(elf, shdr.sh_link);
-	if (sec == NULL)
-		goto out_elf_end;
-
-	symstrs = elf_getdata(sec, NULL);
-	if (symstrs == NULL)
-		goto out_elf_end;
-
-	nr_syms = shdr.sh_size / shdr.sh_entsize;
-
-	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
-		struct symbol *f;
-
-		if (!elf_sym__is_function(&sym))
-			continue;
-
-		sec = elf_getscn(elf, sym.st_shndx);
-		if (!sec)
-			goto out_elf_end;
-
-		gelf_getshdr(sec, &shdr);
-		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
-
-		f = symbol__new(sym.st_value, sym.st_size,
-				elf_sym__name(&sym, symstrs));
-		if (!f)
-			goto out_elf_end;
-
-		dso__insert_symbol(self, f);
-
-		nr++;
-	}
-
-	err = nr;
-out_elf_end:
-	elf_end(elf);
-out_close:
-	return err;
-}
-
-static int dso__load(struct dso *self)
-{
-	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
-	char *name = malloc(size);
-	int variant = 0;
-	int ret = -1;
-	int fd;
-
-	if (!name)
-		return -1;
-
-more:
-	do {
-		switch (variant) {
-		case 0: /* Fedora */
-			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
-			break;
-		case 1: /* Ubuntu */
-			snprintf(name, size, "/usr/lib/debug%s", self->name);
-			break;
-		case 2: /* Sane people */
-			snprintf(name, size, "%s", self->name);
-			break;
-
-		default:
-			goto out;
-		}
-		variant++;
-
-		fd = open(name, O_RDONLY);
-	} while (fd < 0);
-
-	ret = dso__load_sym(self, fd, name);
-	close(fd);
-
-	/*
-	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
-	 */
-	if (!ret)
-		goto more;
-
-out:
-	free(name);
-	return ret;
-}
-
-static size_t dso__fprintf(struct dso *self, FILE *fp)
-{
-	size_t ret = fprintf(fp, "dso: %s\n", self->name);
-
-	struct rb_node *nd;
-	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
-		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
-		ret += symbol__fprintf(pos, fp);
-	}
-
-	return ret;
-}
-
 static LIST_HEAD(dsos);
 static struct dso *kernel_dso;
 
@@ -418,153 +116,27 @@ static void dsos__fprintf(FILE *fp)
 		dso__fprintf(pos, fp);
 }
 
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-static int hex2long(char *ptr, unsigned long *long_val)
-{
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
-
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
-
-	return p - ptr;
-}
-
-static int load_kallsyms(void)
-{
-	struct rb_node *nd, *prevnd;
-	char *line = NULL;
-	FILE *file;
-	size_t n;
-
-	kernel_dso = dso__new("[kernel]");
-	if (kernel_dso == NULL)
-		return -1;
-
-	file = fopen("/proc/kallsyms", "r");
-	if (file == NULL)
-		goto out_delete_dso;
-
-	while (!feof(file)) {
-		unsigned long start;
-		struct symbol *sym;
-		int line_len, len;
-		char symbol_type;
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0)
-			break;
-
-		if (!line)
-			goto out_delete_dso;
-
-		line[--line_len] = '\0'; /* \n */
-
-		len = hex2long(line, &start);
-
-		len++;
-		if (len + 2 >= line_len)
-			continue;
-
-		symbol_type = toupper(line[len]);
-		/*
-		 * We're interested only in code ('T'ext)
-		 */
-		if (symbol_type != 'T' && symbol_type != 'W')
-			continue;
-		/*
-		 * Well fix up the end later, when we have all sorted.
-		 */
-		sym = symbol__new(start, 0xdead, line + len + 2);
-
-		if (sym == NULL)
-			goto out_delete_dso;
-
-		dso__insert_symbol(kernel_dso, sym);
-	}
-
-	/*
-	 * Now that we have all sorted out, just set the ->end of all
-	 * symbols
-	 */
-	prevnd = rb_first(&kernel_dso->syms);
-
-	if (prevnd == NULL)
-		goto out_delete_line;
-
-	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
-		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
-			      *curr = rb_entry(nd, struct symbol, rb_node);
-
-		prev->end = curr->start - 1;
-		prevnd = nd;
-	}
-
-	dsos__add(kernel_dso);
-	free(line);
-	fclose(file);
-
-	return 0;
-
-out_delete_line:
-	free(line);
-out_delete_dso:
-	dso__delete(kernel_dso);
-	return -1;
-}
-
 static int load_kernel(void)
 {
-	int fd, nr;
-
-	if (!vmlinux)
-		goto kallsyms;
-
-	fd = open(vmlinux, O_RDONLY);
-	if (fd < 0)
-		goto kallsyms;
+	int err = -1;
 
 	kernel_dso = dso__new("[kernel]");
 	if (!kernel_dso)
-		goto fail_open;
-
-	nr = dso__load_sym(kernel_dso, fd, vmlinux);
+		return -1;
 
-	if (nr <= 0)
-		goto fail_load;
+	if (vmlinux)
+		err = dso__load_vmlinux(kernel_dso, vmlinux);
 
-	dsos__add(kernel_dso);
-	close(fd);
+	if (err)
+		err = dso__load_kallsyms(kernel_dso);
 
-	return 0;
+	if (err) {
+		dso__delete(kernel_dso);
+		kernel_dso = NULL;
+	} else
+		dsos__add(kernel_dso);
 
-fail_load:
-	dso__delete(kernel_dso);
-fail_open:
-	close(fd);
-kallsyms:
-	return load_kallsyms();
+	return err;
 }
 
 struct map {
@@ -1050,7 +622,7 @@ static int __cmd_report(void)
 	}
 
 	if (load_kernel() < 0) {
-		perror("failed to open kallsyms");
+		perror("failed to load kernel symbols");
 		return EXIT_FAILURE;
 	}
 
@@ -1247,7 +819,7 @@ static const struct option options[] = {
 
 int cmd_report(int argc, const char **argv, const char *prefix)
 {
-	elf_version(EV_CURRENT);
+	symbol__init();
 
 	page_size = getpagesize();
 
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
new file mode 100644
index 0000000..d06de2c
--- /dev/null
+++ b/Documentation/perf_counter/util/symbol.c
@@ -0,0 +1,421 @@
+#include "util.h"
+#include "../perf.h"
+#include "symbol.h"
+
+#include <libelf.h>
+#include <gelf.h>
+#include <elf.h>
+
+static struct symbol *symbol__new(uint64_t start, uint64_t len,
+				  const char *name)
+{
+	struct symbol *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		self->start = start;
+		self->end   = start + len;
+		strcpy(self->name, name);
+	}
+
+	return self;
+}
+
+static void symbol__delete(struct symbol *self)
+{
+	free(self);
+}
+
+static size_t symbol__fprintf(struct symbol *self, FILE *fp)
+{
+	return fprintf(fp, " %llx-%llx %s\n",
+		       self->start, self->end, self->name);
+}
+
+struct dso *dso__new(const char *name)
+{
+	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		strcpy(self->name, name);
+		self->syms = RB_ROOT;
+	}
+
+	return self;
+}
+
+static void dso__delete_symbols(struct dso *self)
+{
+	struct symbol *pos;
+	struct rb_node *next = rb_first(&self->syms);
+
+	while (next) {
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+		symbol__delete(pos);
+	}
+}
+
+void dso__delete(struct dso *self)
+{
+	dso__delete_symbols(self);
+	free(self);
+}
+
+static void dso__insert_symbol(struct dso *self, struct symbol *sym)
+{
+	struct rb_node **p = &self->syms.rb_node;
+	struct rb_node *parent = NULL;
+	const uint64_t ip = sym->start;
+	struct symbol *s;
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol, rb_node);
+		if (ip < s->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&sym->rb_node, parent, p);
+	rb_insert_color(&sym->rb_node, &self->syms);
+}
+
+struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
+{
+	struct rb_node *n;
+
+	if (self == NULL)
+		return NULL;
+
+	n = self->syms.rb_node;
+
+	while (n) {
+		struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+		if (ip < s->start)
+			n = n->rb_left;
+		else if (ip > s->end)
+			n = n->rb_right;
+		else
+			return s;
+	}
+
+	return NULL;
+}
+
+size_t dso__fprintf(struct dso *self, FILE *fp)
+{
+	size_t ret = fprintf(fp, "dso: %s\n", self->name);
+
+	struct rb_node *nd;
+	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		ret += symbol__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+static int hex(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+static int hex2long(char *ptr, unsigned long *long_val)
+{
+	const char *p = ptr;
+	*long_val = 0;
+
+	while (*p) {
+		const int hex_val = hex(*p);
+
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		p++;
+	}
+
+	return p - ptr;
+}
+
+int dso__load_kallsyms(struct dso *self)
+{
+	struct rb_node *nd, *prevnd;
+	char *line = NULL;
+	size_t n;
+	FILE *file = fopen("/proc/kallsyms", "r");
+
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		unsigned long start;
+		struct symbol *sym;
+		int line_len, len;
+		char symbol_type;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2long(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		symbol_type = toupper(line[len]);
+		/*
+		 * We're interested only in code ('T'ext)
+		 */
+		if (symbol_type != 'T' && symbol_type != 'W')
+			continue;
+		/*
+		 * Well fix up the end later, when we have all sorted.
+		 */
+		sym = symbol__new(start, 0xdead, line + len + 2);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		dso__insert_symbol(self, sym);
+	}
+
+	/*
+	 * Now that we have all sorted out, just set the ->end of all
+	 * symbols
+	 */
+	prevnd = rb_first(&self->syms);
+
+	if (prevnd == NULL)
+		goto out_delete_line;
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
+			      *curr = rb_entry(nd, struct symbol, rb_node);
+
+		prev->end = curr->start - 1;
+		prevnd = nd;
+	}
+
+	free(line);
+	fclose(file);
+
+	return 0;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @self: struct elf_symtab instance to iterate
+ * @index: uint32_t index
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
+	for (index = 0, gelf_getsym(syms, index, &sym);\
+	     index < nr_syms; \
+	     index++, gelf_getsym(syms, index, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+	return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_FUNC &&
+	       sym->st_name != 0 &&
+	       sym->st_shndx != SHN_UNDEF &&
+	       sym->st_size != 0;
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+					const Elf_Data *symstrs)
+{
+	return symstrs->d_buf + sym->st_name;
+}
+
+static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+				    GElf_Shdr *shp, const char *name,
+				    size_t *index)
+{
+	Elf_Scn *sec = NULL;
+	size_t cnt = 1;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		char *str;
+
+		gelf_getshdr(sec, shp);
+		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+		if (!strcmp(name, str)) {
+			if (index)
+				*index = cnt;
+			break;
+		}
+		++cnt;
+	}
+
+	return sec;
+}
+
+static int dso__load_sym(struct dso *self, int fd, const char *name)
+{
+	Elf_Data *symstrs;
+	uint32_t nr_syms;
+	int err = -1;
+	uint32_t index;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *syms;
+	GElf_Sym sym;
+	Elf_Scn *sec;
+	Elf *elf;
+	int nr = 0;
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		fprintf(stderr, "%s: cannot read %s ELF file.\n",
+			__func__, name);
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
+	if (sec == NULL)
+		sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
+
+	if (sec == NULL)
+		goto out_elf_end;
+
+	syms = elf_getdata(sec, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	sec = elf_getscn(elf, shdr.sh_link);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(sec, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
+		struct symbol *f;
+
+		if (!elf_sym__is_function(&sym))
+			continue;
+
+		sec = elf_getscn(elf, sym.st_shndx);
+		if (!sec)
+			goto out_elf_end;
+
+		gelf_getshdr(sec, &shdr);
+		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
+		f = symbol__new(sym.st_value, sym.st_size,
+				elf_sym__name(&sym, symstrs));
+		if (!f)
+			goto out_elf_end;
+
+		dso__insert_symbol(self, f);
+
+		nr++;
+	}
+
+	err = nr;
+out_elf_end:
+	elf_end(elf);
+out_close:
+	return err;
+}
+
+int dso__load(struct dso *self)
+{
+	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
+	char *name = malloc(size);
+	int variant = 0;
+	int ret = -1;
+	int fd;
+
+	if (!name)
+		return -1;
+
+more:
+	do {
+		switch (variant) {
+		case 0: /* Fedora */
+			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
+			break;
+		case 1: /* Ubuntu */
+			snprintf(name, size, "/usr/lib/debug%s", self->name);
+			break;
+		case 2: /* Sane people */
+			snprintf(name, size, "%s", self->name);
+			break;
+
+		default:
+			goto out;
+		}
+		variant++;
+
+		fd = open(name, O_RDONLY);
+	} while (fd < 0);
+
+	ret = dso__load_sym(self, fd, name);
+	close(fd);
+
+	/*
+	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
+	 */
+	if (!ret)
+		goto more;
+
+out:
+	free(name);
+	return ret;
+}
+
+int dso__load_vmlinux(struct dso *self, const char *vmlinux)
+{
+	int err, fd = open(vmlinux, O_RDONLY);
+
+	if (fd < 0)
+		return -1;
+
+	err = dso__load_sym(self, fd, vmlinux);
+	close(fd);
+
+	return err;
+}
+
+void symbol__init(void)
+{
+	elf_version(EV_CURRENT);
+}
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
new file mode 100644
index 0000000..6dffe76
--- /dev/null
+++ b/Documentation/perf_counter/util/symbol.h
@@ -0,0 +1,33 @@
+#ifndef _PERF_SYMBOL_
+#define _PERF_SYMBOL_ 1
+
+#include <linux/types.h>
+#include "list.h"
+#include "rbtree.h"
+
+struct symbol {
+	struct rb_node	rb_node;
+	__u64		start;
+	__u64		end;
+	char		name[0];
+};
+
+struct dso {
+	struct list_head node;
+	struct rb_root	 syms;
+	char		 name[0];
+};
+
+struct dso *dso__new(const char *name);
+void dso__delete(struct dso *self);
+
+struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
+
+int dso__load_kallsyms(struct dso *self);
+int dso__load_vmlinux(struct dso *self, const char *vmlinux);
+int dso__load(struct dso *self);
+
+size_t dso__fprintf(struct dso *self, FILE *fp);
+
+void symbol__init(void);
+#endif /* _PERF_SYMBOL_ */
-- 
cgit v0.10.2


From 0085c954140d27937ada29d139c16341075d45e4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2009 14:55:13 -0300
Subject: perf_counter tools: struct symbol priv area

When creating a dso instance allow asking that all symbols in this dso
have a private area just before the symbol.

perf top will use this for its counters, etc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090528175513.GD4747@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 04fc7ec..889067e 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -83,7 +83,7 @@ static struct dso *dsos__findnew(const char *name)
 	int nr;
 
 	if (dso == NULL) {
-		dso = dso__new(name);
+		dso = dso__new(name, 0);
 		if (!dso)
 			goto out_delete_dso;
 
@@ -120,7 +120,7 @@ static int load_kernel(void)
 {
 	int err = -1;
 
-	kernel_dso = dso__new("[kernel]");
+	kernel_dso = dso__new("[kernel]", 0);
 	if (!kernel_dso)
 		return -1;
 
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index d06de2c..7088206 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -7,22 +7,27 @@
 #include <elf.h>
 
 static struct symbol *symbol__new(uint64_t start, uint64_t len,
-				  const char *name)
+				  const char *name, unsigned int priv_size)
 {
-	struct symbol *self = malloc(sizeof(*self) + strlen(name) + 1);
+	size_t namelen = strlen(name) + 1;
+	struct symbol *self = malloc(priv_size + sizeof(*self) + namelen);
 
 	if (self != NULL) {
+		if (priv_size) {
+			memset(self, 0, priv_size);
+			self = ((void *)self) + priv_size;
+		}
 		self->start = start;
 		self->end   = start + len;
-		strcpy(self->name, name);
+		memcpy(self->name, name, namelen);
 	}
 
 	return self;
 }
 
-static void symbol__delete(struct symbol *self)
+static void symbol__delete(struct symbol *self, unsigned int priv_size)
 {
-	free(self);
+	free(((void *)self) - priv_size);
 }
 
 static size_t symbol__fprintf(struct symbol *self, FILE *fp)
@@ -31,13 +36,14 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 		       self->start, self->end, self->name);
 }
 
-struct dso *dso__new(const char *name)
+struct dso *dso__new(const char *name, unsigned int sym_priv_size)
 {
 	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
 
 	if (self != NULL) {
 		strcpy(self->name, name);
 		self->syms = RB_ROOT;
+		self->sym_priv_size = sym_priv_size;
 	}
 
 	return self;
@@ -51,7 +57,7 @@ static void dso__delete_symbols(struct dso *self)
 	while (next) {
 		pos = rb_entry(next, struct symbol, rb_node);
 		next = rb_next(&pos->rb_node);
-		symbol__delete(pos);
+		symbol__delete(pos, self->sym_priv_size);
 	}
 }
 
@@ -189,7 +195,8 @@ int dso__load_kallsyms(struct dso *self)
 		/*
 		 * Well fix up the end later, when we have all sorted.
 		 */
-		sym = symbol__new(start, 0xdead, line + len + 2);
+		sym = symbol__new(start, 0xdead, line + len + 2,
+				  self->sym_priv_size);
 
 		if (sym == NULL)
 			goto out_delete_line;
@@ -340,7 +347,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name)
 		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
 		f = symbol__new(sym.st_value, sym.st_size,
-				elf_sym__name(&sym, symstrs));
+				elf_sym__name(&sym, symstrs),
+				self->sym_priv_size);
 		if (!f)
 			goto out_elf_end;
 
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
index 6dffe76..9e120af 100644
--- a/Documentation/perf_counter/util/symbol.h
+++ b/Documentation/perf_counter/util/symbol.h
@@ -15,12 +15,18 @@ struct symbol {
 struct dso {
 	struct list_head node;
 	struct rb_root	 syms;
+	unsigned int	 sym_priv_size;
 	char		 name[0];
 };
 
-struct dso *dso__new(const char *name);
+struct dso *dso__new(const char *name, unsigned int sym_priv_size);
 void dso__delete(struct dso *self);
 
+static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
+{
+	return ((void *)sym) - self->sym_priv_size;
+}
+
 struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
 
 int dso__load_kallsyms(struct dso *self);
-- 
cgit v0.10.2


From a827c875f2ebe69c6e6db5e7f112d4beb4d80f01 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2009 14:55:19 -0300
Subject: perf_counter tools: Consolidate dso methods to load kernel symbols

Now one has just to use dso__load_kernel() optionally passing a vmlinux
filename.

Will make things easier for perf top that will want to pass a callback
to filter some symbols.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 889067e..4e9f2bc 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -118,18 +118,13 @@ static void dsos__fprintf(FILE *fp)
 
 static int load_kernel(void)
 {
-	int err = -1;
+	int err;
 
 	kernel_dso = dso__new("[kernel]", 0);
 	if (!kernel_dso)
 		return -1;
 
-	if (vmlinux)
-		err = dso__load_vmlinux(kernel_dso, vmlinux);
-
-	if (err)
-		err = dso__load_kallsyms(kernel_dso);
-
+	err = dso__load_kernel(kernel_dso, vmlinux);
 	if (err) {
 		dso__delete(kernel_dso);
 		kernel_dso = NULL;
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 7088206..504ac31 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -155,7 +155,7 @@ static int hex2long(char *ptr, unsigned long *long_val)
 	return p - ptr;
 }
 
-int dso__load_kallsyms(struct dso *self)
+static int dso__load_kallsyms(struct dso *self)
 {
 	struct rb_node *nd, *prevnd;
 	char *line = NULL;
@@ -410,7 +410,7 @@ out:
 	return ret;
 }
 
-int dso__load_vmlinux(struct dso *self, const char *vmlinux)
+static int dso__load_vmlinux(struct dso *self, const char *vmlinux)
 {
 	int err, fd = open(vmlinux, O_RDONLY);
 
@@ -423,6 +423,19 @@ int dso__load_vmlinux(struct dso *self, const char *vmlinux)
 	return err;
 }
 
+int dso__load_kernel(struct dso *self, const char *vmlinux)
+{
+	int err = -1;
+
+	if (vmlinux)
+		err = dso__load_vmlinux(self, vmlinux);
+
+	if (err)
+		err = dso__load_kallsyms(self);
+
+	return err;
+}
+
 void symbol__init(void)
 {
 	elf_version(EV_CURRENT);
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
index 9e120af..db2fdf9 100644
--- a/Documentation/perf_counter/util/symbol.h
+++ b/Documentation/perf_counter/util/symbol.h
@@ -29,8 +29,7 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
 
 struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
 
-int dso__load_kallsyms(struct dso *self);
-int dso__load_vmlinux(struct dso *self, const char *vmlinux);
+int dso__load_kernel(struct dso *self, const char *vmlinux);
 int dso__load(struct dso *self);
 
 size_t dso__fprintf(struct dso *self, FILE *fp);
-- 
cgit v0.10.2


From 69ee69f63c82e63d9c6c6081d12673af4933c51e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2009 14:55:26 -0300
Subject: perf_counter tools: Optionally pass a symbol filter to the dso load
 routines

Will be used by perf top.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090528175526.GF4747@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 4e9f2bc..412d524 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -87,7 +87,7 @@ static struct dso *dsos__findnew(const char *name)
 		if (!dso)
 			goto out_delete_dso;
 
-		nr = dso__load(dso);
+		nr = dso__load(dso, NULL);
 		if (nr < 0) {
 			fprintf(stderr, "Failed to open: %s\n", name);
 			goto out_delete_dso;
@@ -124,7 +124,7 @@ static int load_kernel(void)
 	if (!kernel_dso)
 		return -1;
 
-	err = dso__load_kernel(kernel_dso, vmlinux);
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL);
 	if (err) {
 		dso__delete(kernel_dso);
 		kernel_dso = NULL;
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 504ac31..4728121 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -155,7 +155,7 @@ static int hex2long(char *ptr, unsigned long *long_val)
 	return p - ptr;
 }
 
-static int dso__load_kallsyms(struct dso *self)
+static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter)
 {
 	struct rb_node *nd, *prevnd;
 	char *line = NULL;
@@ -201,7 +201,10 @@ static int dso__load_kallsyms(struct dso *self)
 		if (sym == NULL)
 			goto out_delete_line;
 
-		dso__insert_symbol(self, sym);
+		if (filter && filter(self, sym))
+			symbol__delete(sym, self->sym_priv_size);
+		else
+			dso__insert_symbol(self, sym);
 	}
 
 	/*
@@ -286,7 +289,8 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 	return sec;
 }
 
-static int dso__load_sym(struct dso *self, int fd, const char *name)
+static int dso__load_sym(struct dso *self, int fd, const char *name,
+			 symbol_filter_t filter)
 {
 	Elf_Data *symstrs;
 	uint32_t nr_syms;
@@ -352,9 +356,12 @@ static int dso__load_sym(struct dso *self, int fd, const char *name)
 		if (!f)
 			goto out_elf_end;
 
-		dso__insert_symbol(self, f);
-
-		nr++;
+		if (filter && filter(self, f))
+			symbol__delete(f, self->sym_priv_size);
+		else {
+			dso__insert_symbol(self, f);
+			nr++;
+		}
 	}
 
 	err = nr;
@@ -364,7 +371,7 @@ out_close:
 	return err;
 }
 
-int dso__load(struct dso *self)
+int dso__load(struct dso *self, symbol_filter_t filter)
 {
 	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
 	char *name = malloc(size);
@@ -396,7 +403,7 @@ more:
 		fd = open(name, O_RDONLY);
 	} while (fd < 0);
 
-	ret = dso__load_sym(self, fd, name);
+	ret = dso__load_sym(self, fd, name, filter);
 	close(fd);
 
 	/*
@@ -410,28 +417,29 @@ out:
 	return ret;
 }
 
-static int dso__load_vmlinux(struct dso *self, const char *vmlinux)
+static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
+			     symbol_filter_t filter)
 {
 	int err, fd = open(vmlinux, O_RDONLY);
 
 	if (fd < 0)
 		return -1;
 
-	err = dso__load_sym(self, fd, vmlinux);
+	err = dso__load_sym(self, fd, vmlinux, filter);
 	close(fd);
 
 	return err;
 }
 
-int dso__load_kernel(struct dso *self, const char *vmlinux)
+int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter)
 {
 	int err = -1;
 
 	if (vmlinux)
-		err = dso__load_vmlinux(self, vmlinux);
+		err = dso__load_vmlinux(self, vmlinux, filter);
 
 	if (err)
-		err = dso__load_kallsyms(self);
+		err = dso__load_kallsyms(self, filter);
 
 	return err;
 }
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
index db2fdf9..b0299bc 100644
--- a/Documentation/perf_counter/util/symbol.h
+++ b/Documentation/perf_counter/util/symbol.h
@@ -19,6 +19,8 @@ struct dso {
 	char		 name[0];
 };
 
+typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
+
 struct dso *dso__new(const char *name, unsigned int sym_priv_size);
 void dso__delete(struct dso *self);
 
@@ -29,8 +31,9 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
 
 struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
 
-int dso__load_kernel(struct dso *self, const char *vmlinux);
-int dso__load(struct dso *self);
+int dso__load_kernel(struct dso *self, const char *vmlinux,
+		     symbol_filter_t filter);
+int dso__load(struct dso *self, symbol_filter_t filter);
 
 size_t dso__fprintf(struct dso *self, FILE *fp);
 
-- 
cgit v0.10.2


From de04687f868bf98e4ef644af91ed85a3bc212ce8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2009 14:55:41 -0300
Subject: perf_counter tools: Convert builtin-top to use libperf symbol
 routines

Now both perf top and report use the same routines.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090528175541.GG4747@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index a890872..52ba9f4 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -44,8 +44,9 @@
 
 #include "perf.h"
 #include "builtin.h"
+#include "util/symbol.h"
 #include "util/util.h"
-#include "util/util.h"
+#include "util/rbtree.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 
@@ -125,19 +126,21 @@ static uint64_t			min_ip;
 static uint64_t			max_ip = -1ll;
 
 struct sym_entry {
-	unsigned long long	addr;
-	char			*sym;
+	struct rb_node		rb_node;
+	struct list_head	node;
 	unsigned long		count[MAX_COUNTERS];
 	int			skip;
 };
 
-#define MAX_SYMS		100000
-
-static int sym_table_count;
-
 struct sym_entry		*sym_filter_entry;
 
-static struct sym_entry		sym_table[MAX_SYMS];
+struct dso *kernel_dso;
+
+/*
+ * Symbols will be added here in record_ip and will get out
+ * after decayed.
+ */
+static LIST_HEAD(active_symbols);
 
 /*
  * Ordering weight: count-1 * count-2 * ... / count-n
@@ -157,42 +160,60 @@ static double sym_weight(const struct sym_entry *sym)
 	return weight;
 }
 
-static int compare(const void *__sym1, const void *__sym2)
-{
-	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
-
-	return sym_weight(sym1) < sym_weight(sym2);
-}
-
 static long			events;
 static long			userspace_events;
 static const char		CONSOLE_CLEAR[] = "[H[2J";
 
-static struct sym_entry		tmp[MAX_SYMS];
+static void list_insert_active_sym(struct sym_entry *syme)
+{
+	list_add(&syme->node, &active_symbols);
+}
+
+static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
+{
+	struct rb_node **p = &tree->rb_node;
+	struct rb_node *parent = NULL;
+	struct sym_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct sym_entry, rb_node);
+
+		if (sym_weight(se) > sym_weight(iter))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&se->rb_node, parent, p);
+	rb_insert_color(&se->rb_node, tree);
+}
 
 static void print_sym_table(void)
 {
-	int i, j, active_count, printed;
+	int printed, j;
 	int counter;
 	float events_per_sec = events/delay_secs;
 	float kevents_per_sec = (events-userspace_events)/delay_secs;
 	float sum_kevents = 0.0;
+	struct sym_entry *syme, *n;
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *nd;
 
 	events = userspace_events = 0;
 
-	/* Iterate over symbol table and copy/tally/decay active symbols. */
-	for (i = 0, active_count = 0; i < sym_table_count; i++) {
-		if (sym_table[i].count[0]) {
-			tmp[active_count++] = sym_table[i];
-			sum_kevents += sym_table[i].count[0];
+	/* Sort the active symbols */
+	list_for_each_entry_safe(syme, n, &active_symbols, node) {
+		if (syme->count[0] != 0) {
+			rb_insert_active_sym(&tmp, syme);
+			sum_kevents += syme->count[0];
 
 			for (j = 0; j < nr_counters; j++)
-				sym_table[i].count[j] = zero ? 0 : sym_table[i].count[j] * 7 / 8;
-		}
+				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
+		} else
+			list_del_init(&syme->node);
 	}
 
-	qsort(tmp, active_count + 1, sizeof(tmp[0]), compare);
-
 	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
 
 	printf(
@@ -238,23 +259,25 @@ static void print_sym_table(void)
 	       	       "  ______     ______   _____   ________________   _______________\n\n"
 	);
 
-	for (i = 0, printed = 0; i < active_count; i++) {
+	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
+		struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
+		struct symbol *sym = (struct symbol *)(syme + 1);
 		float pcnt;
 
-		if (++printed > 18 || tmp[i].count[0] < count_filter)
+		if (++printed > 18 || syme->count[0] < count_filter)
 			break;
 
-		pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
+		pcnt = 100.0 - (100.0 * ((sum_kevents - syme->count[0]) /
+					 sum_kevents));
 
 		if (nr_counters == 1)
 			printf("%19.2f - %4.1f%% - %016llx : %s\n",
-				sym_weight(tmp + i),
-				pcnt, tmp[i].addr, tmp[i].sym);
+				sym_weight(syme),
+				pcnt, sym->start, sym->name);
 		else
 			printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
-				sym_weight(tmp + i),
-				tmp[i].count[0],
-				pcnt, tmp[i].addr, tmp[i].sym);
+				sym_weight(syme), syme->count[0],
+				pcnt, sym->start, sym->name);
 	}
 
 	{
@@ -277,146 +300,85 @@ static void *display_thread(void *arg)
 	return NULL;
 }
 
-static int read_symbol(FILE *in, struct sym_entry *s)
+static int symbol_filter(struct dso *self, struct symbol *sym)
 {
-	static int filter_match = 0;
-	char *sym, stype;
-	char str[500];
-	int rc, pos;
-
-	rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
-	if (rc == EOF)
-		return -1;
-
-	assert(rc == 3);
-
-	/* skip until end of line: */
-	pos = strlen(str);
-	do {
-		rc = fgetc(in);
-		if (rc == '\n' || rc == EOF || pos >= 499)
-			break;
-		str[pos] = rc;
-		pos++;
-	} while (1);
-	str[pos] = 0;
-
-	sym = str;
-
-	/* Filter out known duplicates and non-text symbols. */
-	if (!strcmp(sym, "_text"))
-		return 1;
-	if (!min_ip && !strcmp(sym, "_stext"))
-		return 1;
-	if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
-		return 1;
-	if (stype != 'T' && stype != 't')
-		return 1;
-	if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
-		return 1;
-	if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
+	static int filter_match;
+	struct sym_entry *syme;
+	const char *name = sym->name;
+
+	if (!strcmp(name, "_text") ||
+	    !strcmp(name, "_etext") ||
+	    !strcmp(name, "_sinittext") ||
+	    !strncmp("init_module", name, 11) ||
+	    !strncmp("cleanup_module", name, 14) ||
+	    strstr(name, "_text_start") ||
+	    strstr(name, "_text_end"))
 		return 1;
 
-	s->sym = malloc(strlen(str)+1);
-	assert(s->sym);
-
-	strcpy((char *)s->sym, str);
-	s->skip = 0;
-
+	syme = dso__sym_priv(self, sym);
 	/* Tag events to be skipped. */
-	if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
-		s->skip = 1;
-	else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
-		s->skip = 1;
-	else if (!strcmp("mwait_idle", s->sym))
-		s->skip = 1;
+	if (!strcmp("default_idle", name) ||
+	    !strcmp("cpu_idle", name) ||
+	    !strcmp("enter_idle", name) ||
+	    !strcmp("exit_idle", name) ||
+	    !strcmp("mwait_idle", name))
+		syme->skip = 1;
 
 	if (filter_match == 1) {
-		filter_end = s->addr;
+		filter_end = sym->start;
 		filter_match = -1;
 		if (filter_end - filter_start > 10000) {
-			printf("hm, too large filter symbol <%s> - skipping.\n",
+			fprintf(stderr,
+				"hm, too large filter symbol <%s> - skipping.\n",
 				sym_filter);
-			printf("symbol filter start: %016lx\n", filter_start);
-			printf("                end: %016lx\n", filter_end);
+			fprintf(stderr, "symbol filter start: %016lx\n",
+				filter_start);
+			fprintf(stderr, "                end: %016lx\n",
+				filter_end);
 			filter_end = filter_start = 0;
 			sym_filter = NULL;
 			sleep(1);
 		}
 	}
-	if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
+
+	if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
 		filter_match = 1;
-		filter_start = s->addr;
+		filter_start = sym->start;
 	}
 
+
 	return 0;
 }
 
-static int compare_addr(const void *__sym1, const void *__sym2)
+static int parse_symbols(void)
 {
-	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
+	struct rb_node *node;
+	struct symbol  *sym;
 
-	return sym1->addr > sym2->addr;
-}
-
-static void sort_symbol_table(void)
-{
-	int i, dups;
-
-	do {
-		qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
-		for (i = 0, dups = 0; i < sym_table_count; i++) {
-			if (sym_table[i].addr == sym_table[i+1].addr) {
-				sym_table[i+1].addr = -1ll;
-				dups++;
-			}
-		}
-		sym_table_count -= dups;
-	} while(dups);
-}
-
-static void parse_symbols(void)
-{
-	struct sym_entry *last;
+	kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
+	if (kernel_dso == NULL)
+		return -1;
 
-	FILE *kallsyms = fopen("/proc/kallsyms", "r");
+	if (dso__load_kernel(kernel_dso, NULL, symbol_filter) != 0)
+		goto out_delete_dso;
 
-	if (!kallsyms) {
-		printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
-		exit(-1);
-	}
+	node = rb_first(&kernel_dso->syms);
+	sym = rb_entry(node, struct symbol, rb_node);
+	min_ip = sym->start;
 
-	while (!feof(kallsyms)) {
-		if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
-			sym_table_count++;
-			assert(sym_table_count <= MAX_SYMS);
-		}
-	}
+	node = rb_last(&kernel_dso->syms);
+	sym = rb_entry(node, struct symbol, rb_node);
+	max_ip = sym->start;
 
-	sort_symbol_table();
-	min_ip = sym_table[0].addr;
-	max_ip = sym_table[sym_table_count-1].addr;
-	last = sym_table + sym_table_count++;
+	if (dump_symtab)
+		dso__fprintf(kernel_dso, stdout);
 
-	last->addr = -1ll;
-	last->sym = "<end>";
-
-	if (filter_end) {
-		int count;
-		for (count=0; count < sym_table_count; count ++) {
-			if (!strcmp(sym_table[count].sym, sym_filter)) {
-				sym_filter_entry = &sym_table[count];
-				break;
-			}
-		}
-	}
-	if (dump_symtab) {
-		int i;
+	return 0;
 
-		for (i = 0; i < sym_table_count; i++)
-			fprintf(stderr, "%llx %s\n",
-				sym_table[i].addr, sym_table[i].sym);
-	}
+out_delete_dso:
+	dso__delete(kernel_dso);
+	kernel_dso = NULL;
+	return -1;
 }
 
 #define TRACE_COUNT     3
@@ -426,51 +388,20 @@ static void parse_symbols(void)
  */
 static void record_ip(uint64_t ip, int counter)
 {
-	int left_idx, middle_idx, right_idx, idx;
-	unsigned long left, middle, right;
-
-	left_idx = 0;
-	right_idx = sym_table_count-1;
-	assert(ip <= max_ip && ip >= min_ip);
-
-	while (left_idx + 1 < right_idx) {
-		middle_idx = (left_idx + right_idx) / 2;
+	struct symbol *sym = dso__find_symbol(kernel_dso, ip);
 
-		left   = sym_table[  left_idx].addr;
-		middle = sym_table[middle_idx].addr;
-		right  = sym_table[ right_idx].addr;
+	if (sym != NULL) {
+		struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
 
-		if (!(left <= middle && middle <= right)) {
-			printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
-			printf("%d %d %d\n", left_idx, middle_idx, right_idx);
+		if (!syme->skip) {
+			syme->count[counter]++;
+			if (list_empty(&syme->node) || !syme->node.next)
+				list_insert_active_sym(syme);
+			return;
 		}
-		assert(left <= middle && middle <= right);
-		if (!(left <= ip && ip <= right)) {
-			printf(" left: %016lx\n", left);
-			printf("   ip: %016lx\n", (unsigned long)ip);
-			printf("right: %016lx\n", right);
-		}
-		assert(left <= ip && ip <= right);
-		/*
-		 * [ left .... target .... middle .... right ]
-		 *   => right := middle
-		 */
-		if (ip < middle) {
-			right_idx = middle_idx;
-			continue;
-		}
-		/*
-		 * [ left .... middle ... target ... right ]
-		 *   => left := middle
-		 */
-		left_idx = middle_idx;
 	}
 
-	idx = left_idx;
-
-	if (!sym_table[idx].skip)
-		sym_table[idx].count[counter]++;
-	else events--;
+	events--;
 }
 
 static void process_event(uint64_t ip, int counter)
-- 
cgit v0.10.2


From 9e09675366695405412b709e91709c1ce2925c90 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 28 May 2009 16:25:34 +0200
Subject: perf_counter tools: Document '--' option parsing terminator

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt
index 353db1b..a93d2ec 100644
--- a/Documentation/perf_counter/Documentation/perf-record.txt
+++ b/Documentation/perf_counter/Documentation/perf-record.txt
@@ -9,6 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
index 7fcab27..828c59ff 100644
--- a/Documentation/perf_counter/Documentation/perf-stat.txt
+++ b/Documentation/perf_counter/Documentation/perf-stat.txt
@@ -9,6 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 4a06866..23d1224 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -397,7 +397,8 @@ static int __cmd_record(int argc, const char **argv)
 }
 
 static const char * const record_usage[] = {
-	"perf record [<options>] <command>",
+	"perf record [<options>] [<command>]",
+	"perf record [<options>] -- <command> [<options>]",
 	NULL
 };
 
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index ce661e2..ac14086 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -212,6 +212,7 @@ static void skip_signal(int signo)
 
 static const char * const stat_usage[] = {
 	"perf stat [<options>] <command>",
+	"perf stat [<options>] -- <command> [<options>]",
 	NULL
 };
 
-- 
cgit v0.10.2


From a994955cc091a8a51b7d7412174d9cf6de04d26b Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:40:35 -0700
Subject: davinci: Make GPIO code more generic

The current gpio code needs to know the number of
gpio irqs there are and what the bank irq number is.
To determine those values, it checks the SoC type.

It also assumes that the base address and the number
of irqs the interrupt controller uses is fixed.

To clean up the SoC checks and make it support
different base addresses and interrupt controllers,
have the SoC-specific code set those values in
the soc_info structure and have the gpio code
reference them there.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 1b7c16c..757def7 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/spi.h>
 
@@ -647,6 +648,9 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm355_timer_info,
 	.wdt_base		= IO_ADDRESS(DAVINCI_WDOG_BASE),
+	.gpio_base		= IO_ADDRESS(DAVINCI_GPIO_BASE),
+	.gpio_num		= 104,
+	.gpio_irq		= IRQ_DM355_GPIOBNK0,
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index c662692..cfd918e 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 #include <asm/mach/map.h>
 
@@ -590,6 +591,9 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm644x_timer_info,
 	.wdt_base		= IO_ADDRESS(DAVINCI_WDOG_BASE),
+	.gpio_base		= IO_ADDRESS(DAVINCI_GPIO_BASE),
+	.gpio_num		= 71,
+	.gpio_irq		= IRQ_GPIOBNK0,
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 83d67cf..f980c2f 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 #include <asm/mach/map.h>
 
@@ -569,6 +570,9 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.intc_irq_num		= DAVINCI_N_AINTC_IRQ,
 	.timer_info		= &dm646x_timer_info,
 	.wdt_base		= IO_ADDRESS(DAVINCI_WDOG_BASE),
+	.gpio_base		= IO_ADDRESS(DAVINCI_GPIO_BASE),
+	.gpio_num		= 43, /* Only 33 usable */
+	.gpio_irq		= IRQ_DM646X_GPIOBNK0,
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c
index 40327b5..1b65321 100644
--- a/arch/arm/mach-davinci/gpio.c
+++ b/arch/arm/mach-davinci/gpio.c
@@ -23,6 +23,7 @@
 #include <mach/cputype.h>
 #include <mach/irqs.h>
 #include <mach/hardware.h>
+#include <mach/common.h>
 #include <mach/gpio.h>
 
 #include <asm/mach/irq.h>
@@ -37,8 +38,6 @@ struct davinci_gpio {
 
 static struct davinci_gpio chips[DIV_ROUND_UP(DAVINCI_N_GPIO, 32)];
 
-static unsigned __initdata ngpio;
-
 /* create a non-inlined version */
 static struct gpio_controller __iomem * __init gpio2controller(unsigned gpio)
 {
@@ -116,23 +115,16 @@ davinci_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 static int __init davinci_gpio_setup(void)
 {
 	int i, base;
+	unsigned ngpio;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
-	/* The gpio banks conceptually expose a segmented bitmap,
+	/*
+	 * The gpio banks conceptually expose a segmented bitmap,
 	 * and "ngpio" is one more than the largest zero-based
 	 * bit index that's valid.
 	 */
-	if (cpu_is_davinci_dm355()) {		/* or dm335() */
-		ngpio = 104;
-	} else if (cpu_is_davinci_dm644x()) {	/* or dm337() */
-		ngpio = 71;
-	} else if (cpu_is_davinci_dm646x()) {
-		/* NOTE:  each bank has several "reserved" bits,
-		 * unusable as GPIOs.  Only 33 of the GPIO numbers
-		 * are usable, and we're not rejecting the others.
-		 */
-		ngpio = 43;
-	} else {
-		/* if cpu_is_davinci_dm643x() ngpio = 111 */
+	ngpio = soc_info->gpio_num;
+	if (ngpio == 0) {
 		pr_err("GPIO setup:  how many GPIOs?\n");
 		return -EINVAL;
 	}
@@ -279,17 +271,15 @@ gpio_irq_handler(unsigned irq, struct irq_desc *desc)
 static int __init davinci_gpio_irq_setup(void)
 {
 	unsigned	gpio, irq, bank;
-	unsigned	bank_irq;
 	struct clk	*clk;
 	u32		binten = 0;
+	unsigned	ngpio, bank_irq;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+
+	ngpio = soc_info->gpio_num;
 
-	if (cpu_is_davinci_dm355()) {		/* or dm335() */
-		bank_irq = IRQ_DM355_GPIOBNK0;
-	} else if (cpu_is_davinci_dm644x()) {
-		bank_irq = IRQ_GPIOBNK0;
-	} else if (cpu_is_davinci_dm646x()) {
-		bank_irq = IRQ_DM646X_GPIOBNK0;
-	} else {
+	bank_irq = soc_info->gpio_irq;
+	if (bank_irq == 0) {
 		printk(KERN_ERR "Don't know first GPIO bank IRQ.\n");
 		return -EINVAL;
 	}
@@ -329,8 +319,7 @@ static int __init davinci_gpio_irq_setup(void)
 	/* BINTEN -- per-bank interrupt enable. genirq would also let these
 	 * bits be set/cleared dynamically.
 	 */
-	__raw_writel(binten, (void *__iomem)
-		     IO_ADDRESS(DAVINCI_GPIO_BASE + 0x08));
+	__raw_writel(binten, soc_info->gpio_base + 0x08);
 
 	printk(KERN_INFO "DaVinci: %d gpio irqs\n", irq - gpio_to_irq(0));
 
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index d637038..06ff6d6 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -58,6 +58,9 @@ struct davinci_soc_info {
 	unsigned long			intc_irq_num;
 	struct davinci_timer_info	*timer_info;
 	void __iomem			*wdt_base;
+	void __iomem			*gpio_base;
+	unsigned			gpio_num;
+	unsigned			gpio_irq;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/gpio.h b/arch/arm/mach-davinci/include/mach/gpio.h
index efe3281..ae07455 100644
--- a/arch/arm/mach-davinci/include/mach/gpio.h
+++ b/arch/arm/mach-davinci/include/mach/gpio.h
@@ -17,6 +17,7 @@
 #include <asm-generic/gpio.h>
 
 #include <mach/irqs.h>
+#include <mach/common.h>
 
 #define DAVINCI_GPIO_BASE 0x01C67000
 
@@ -67,15 +68,16 @@ static inline struct gpio_controller *__iomem
 __gpio_to_controller(unsigned gpio)
 {
 	void *__iomem ptr;
+	void __iomem *base = davinci_soc_info.gpio_base;
 
 	if (gpio < 32 * 1)
-		ptr = IO_ADDRESS(DAVINCI_GPIO_BASE + 0x10);
+		ptr = base + 0x10;
 	else if (gpio < 32 * 2)
-		ptr = IO_ADDRESS(DAVINCI_GPIO_BASE + 0x38);
+		ptr = base + 0x38;
 	else if (gpio < 32 * 3)
-		ptr = IO_ADDRESS(DAVINCI_GPIO_BASE + 0x60);
+		ptr = base + 0x60;
 	else if (gpio < 32 * 4)
-		ptr = IO_ADDRESS(DAVINCI_GPIO_BASE + 0x88);
+		ptr = base + 0x88;
 	else
 		ptr = NULL;
 	return ptr;
@@ -142,13 +144,13 @@ static inline int gpio_to_irq(unsigned gpio)
 {
 	if (gpio >= DAVINCI_N_GPIO)
 		return -EINVAL;
-	return DAVINCI_N_AINTC_IRQ + gpio;
+	return davinci_soc_info.intc_irq_num + gpio;
 }
 
 static inline int irq_to_gpio(unsigned irq)
 {
 	/* caller guarantees gpio_to_irq() succeeded */
-	return irq - DAVINCI_N_AINTC_IRQ;
+	return irq - davinci_soc_info.intc_irq_num;
 }
 
 #endif				/* __DAVINCI_GPIO_H */
-- 
cgit v0.10.2


From 65e866a9741126c678e6dcd5d4fa8c9eca18e945 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 18 Mar 2009 12:36:08 -0500
Subject: davinci: Move serial platform_device into SoC-specific files

Currently, there is one set of platform_device and platform_data
structures for all DaVinci SoCs.  The differences in the data
between the various SoCs is handled by davinci_serial_init()
by checking the SoC type.  However, as new SoCs appear, this
routine will become more & more cluttered.

To clean up the routine and make it easier to add support for new
SoCs, move the platform_device and platform_data structures into the
SoC-specific code and use the SoC infrastructure to provide access
to the data.

In the process, fix a bug where the wrong irq is used for uart2
of the dm646x.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 757def7..4c3257e 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk.h>
+#include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/gpio.h>
@@ -27,6 +28,7 @@
 #include <mach/mux.h>
 #include <mach/irqs.h>
 #include <mach/time.h>
+#include <mach/serial.h>
 #include <mach/common.h>
 
 #include "clock.h"
@@ -630,6 +632,44 @@ struct davinci_timer_info dm355_timer_info = {
 	.clocksource_id	= T0_TOP,
 };
 
+static struct plat_serial8250_port dm355_serial_platform_data[] = {
+	{
+		.mapbase	= DAVINCI_UART0_BASE,
+		.irq		= IRQ_UARTINT0,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+	},
+	{
+		.mapbase	= DAVINCI_UART1_BASE,
+		.irq		= IRQ_UARTINT1,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+	},
+	{
+		.mapbase	= DM355_UART2_BASE,
+		.irq		= IRQ_DM355_UARTINT2,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+	},
+	{
+		.flags		= 0
+	},
+};
+
+static struct platform_device dm355_serial_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev			= {
+		.platform_data	= dm355_serial_platform_data,
+	},
+};
+
 static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.io_desc		= dm355_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm355_io_desc),
@@ -651,6 +691,7 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.gpio_base		= IO_ADDRESS(DAVINCI_GPIO_BASE),
 	.gpio_num		= 104,
 	.gpio_irq		= IRQ_DM355_GPIOBNK0,
+	.serial_dev		= &dm355_serial_device,
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index cfd918e..a562986 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk.h>
+#include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 
@@ -24,6 +25,7 @@
 #include <mach/psc.h>
 #include <mach/mux.h>
 #include <mach/time.h>
+#include <mach/serial.h>
 #include <mach/common.h>
 
 #include "clock.h"
@@ -573,6 +575,44 @@ struct davinci_timer_info dm644x_timer_info = {
 	.clocksource_id	= T0_TOP,
 };
 
+static struct plat_serial8250_port dm644x_serial_platform_data[] = {
+	{
+		.mapbase	= DAVINCI_UART0_BASE,
+		.irq		= IRQ_UARTINT0,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+	},
+	{
+		.mapbase	= DAVINCI_UART1_BASE,
+		.irq		= IRQ_UARTINT1,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+	},
+	{
+		.mapbase	= DAVINCI_UART2_BASE,
+		.irq		= IRQ_UARTINT2,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM,
+		.regshift	= 2,
+	},
+	{
+		.flags		= 0
+	},
+};
+
+static struct platform_device dm644x_serial_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev			= {
+		.platform_data	= dm644x_serial_platform_data,
+	},
+};
+
 static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.io_desc		= dm644x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm644x_io_desc),
@@ -594,6 +634,7 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.gpio_base		= IO_ADDRESS(DAVINCI_GPIO_BASE),
 	.gpio_num		= 71,
 	.gpio_irq		= IRQ_GPIOBNK0,
+	.serial_dev		= &dm644x_serial_device,
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index f980c2f..544658e 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk.h>
+#include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 
@@ -24,6 +25,7 @@
 #include <mach/psc.h>
 #include <mach/mux.h>
 #include <mach/time.h>
+#include <mach/serial.h>
 #include <mach/common.h>
 
 #include "clock.h"
@@ -552,6 +554,44 @@ struct davinci_timer_info dm646x_timer_info = {
 	.clocksource_id	= T0_TOP,
 };
 
+static struct plat_serial8250_port dm646x_serial_platform_data[] = {
+	{
+		.mapbase	= DAVINCI_UART0_BASE,
+		.irq		= IRQ_UARTINT0,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM32,
+		.regshift	= 2,
+	},
+	{
+		.mapbase	= DAVINCI_UART1_BASE,
+		.irq		= IRQ_UARTINT1,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM32,
+		.regshift	= 2,
+	},
+	{
+		.mapbase	= DAVINCI_UART2_BASE,
+		.irq		= IRQ_DM646X_UARTINT2,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+				  UPF_IOREMAP,
+		.iotype		= UPIO_MEM32,
+		.regshift	= 2,
+	},
+	{
+		.flags		= 0
+	},
+};
+
+static struct platform_device dm646x_serial_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev			= {
+		.platform_data	= dm646x_serial_platform_data,
+	},
+};
+
 static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.io_desc		= dm646x_io_desc,
 	.io_desc_num		= ARRAY_SIZE(dm646x_io_desc),
@@ -573,6 +613,7 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.gpio_base		= IO_ADDRESS(DAVINCI_GPIO_BASE),
 	.gpio_num		= 43, /* Only 33 usable */
 	.gpio_irq		= IRQ_DM646X_GPIOBNK0,
+	.serial_dev		= &dm646x_serial_device,
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 06ff6d6..9624e03 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -61,6 +61,7 @@ struct davinci_soc_info {
 	void __iomem			*gpio_base;
 	unsigned			gpio_num;
 	unsigned			gpio_irq;
+	struct platform_device		*serial_dev;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index 761ab2b..b0c57fa 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -30,6 +30,6 @@ struct davinci_uart_config {
 	unsigned int enabled_uarts;
 };
 
-extern void davinci_serial_init(struct davinci_uart_config *);
+extern int davinci_serial_init(struct davinci_uart_config *);
 
 #endif /* __ASM_ARCH_SERIAL_H */
diff --git a/arch/arm/mach-davinci/serial.c b/arch/arm/mach-davinci/serial.c
index 6950757..c530c73 100644
--- a/arch/arm/mach-davinci/serial.c
+++ b/arch/arm/mach-davinci/serial.c
@@ -33,6 +33,8 @@
 #include <mach/serial.h>
 #include <mach/irqs.h>
 #include <mach/cputype.h>
+#include <mach/common.h>
+
 #include "clock.h"
 
 static inline unsigned int serial_read_reg(struct plat_serial8250_port *up,
@@ -49,44 +51,6 @@ static inline void serial_write_reg(struct plat_serial8250_port *p, int offset,
 	__raw_writel(value, IO_ADDRESS(p->mapbase) + offset);
 }
 
-static struct plat_serial8250_port serial_platform_data[] = {
-	{
-		.mapbase	= DAVINCI_UART0_BASE,
-		.irq		= IRQ_UARTINT0,
-		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
-				  UPF_IOREMAP,
-		.iotype		= UPIO_MEM,
-		.regshift	= 2,
-	},
-	{
-		.mapbase	= DAVINCI_UART1_BASE,
-		.irq		= IRQ_UARTINT1,
-		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
-				  UPF_IOREMAP,
-		.iotype		= UPIO_MEM,
-		.regshift	= 2,
-	},
-	{
-		.mapbase	= DAVINCI_UART2_BASE,
-		.irq		= IRQ_UARTINT2,
-		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
-				  UPF_IOREMAP,
-		.iotype		= UPIO_MEM,
-		.regshift	= 2,
-	},
-	{
-		.flags		= 0
-	},
-};
-
-static struct platform_device serial_device = {
-	.name			= "serial8250",
-	.id			= PLAT8250_DEV_PLATFORM,
-	.dev			= {
-		.platform_data	= serial_platform_data,
-	},
-};
-
 static void __init davinci_serial_reset(struct plat_serial8250_port *p)
 {
 	unsigned int pwremu = 0;
@@ -106,35 +70,22 @@ static void __init davinci_serial_reset(struct plat_serial8250_port *p)
 				 UART_DM646X_SCR_TX_WATERMARK);
 }
 
-void __init davinci_serial_init(struct davinci_uart_config *info)
+int __init davinci_serial_init(struct davinci_uart_config *info)
 {
 	int i;
 	char name[16];
 	struct clk *uart_clk;
-	struct device *dev = &serial_device.dev;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+	struct device *dev = &soc_info->serial_dev->dev;
+	struct plat_serial8250_port *p = dev->platform_data;
 
 	/*
 	 * Make sure the serial ports are muxed on at this point.
-	 * You have to mux them off in device drivers later on
-	 * if not needed.
+	 * You have to mux them off in device drivers later on if not needed.
 	 */
-	for (i = 0; i < DAVINCI_MAX_NR_UARTS; i++) {
-		struct plat_serial8250_port *p = serial_platform_data + i;
-
-		if (!(info->enabled_uarts & (1 << i))) {
-			p->flags = 0;
+	for (i = 0; i < DAVINCI_MAX_NR_UARTS; i++, p++) {
+		if (!(info->enabled_uarts & (1 << i)))
 			continue;
-		}
-
-		if (cpu_is_davinci_dm646x())
-			p->iotype = UPIO_MEM32;
-
-		if (cpu_is_davinci_dm355()) {
-			if (i == 2) {
-				p->mapbase = (unsigned long)DM355_UART2_BASE;
-				p->irq = IRQ_DM355_UARTINT2;
-			}
-		}
 
 		sprintf(name, "uart%d", i);
 		uart_clk = clk_get(dev, name);
@@ -147,11 +98,6 @@ void __init davinci_serial_init(struct davinci_uart_config *info)
 			davinci_serial_reset(p);
 		}
 	}
-}
 
-static int __init davinci_init(void)
-{
-	return platform_device_register(&serial_device);
+	return platform_device_register(soc_info->serial_dev);
 }
-
-arch_initcall(davinci_init);
-- 
cgit v0.10.2


From 972412b648dcf0c4303dca7e515d5c24ce3cd1d5 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:40:56 -0700
Subject: davinci: Move emac platform_data to SoC-specific files

Since most of the emac platform_data is really SoC specific
and not board specific, move it to the SoC-specific files.
Put a pointer to the platform_data in the soc_info structure
so the board-specific code can set some of the platform_data
if it needs to.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index cfe89c4..987d27f 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -38,7 +38,6 @@
 
 #include <mach/dm644x.h>
 #include <mach/common.h>
-#include <mach/emac.h>
 #include <mach/i2c.h>
 #include <mach/serial.h>
 #include <mach/mux.h>
@@ -61,11 +60,6 @@
 #define LXT971_PHY_ID	(0x001378e2)
 #define LXT971_PHY_MASK	(0xfffffff0)
 
-static struct emac_platform_data dm644x_evm_emac_pdata = {
-	.phy_mask	= DM644X_EVM_PHY_MASK,
-	.mdio_max_freq	= DM644X_EVM_MDIO_FREQUENCY,
-};
-
 static struct mtd_partition davinci_evm_norflash_partitions[] = {
 	/* bootloader (UBL, U-Boot, etc) in first 5 sectors */
 	{
@@ -448,6 +442,7 @@ static struct memory_accessor *at24_mem_acc;
 static void at24_setup(struct memory_accessor *mem_acc, void *context)
 {
 	char mac_addr[ETH_ALEN];
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
 	at24_mem_acc = mem_acc;
 
@@ -455,7 +450,7 @@ static void at24_setup(struct memory_accessor *mem_acc, void *context)
 	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
 	    ETH_ALEN) {
 		printk(KERN_INFO "Read MAC addr from EEPROM: %pM\n", mac_addr);
-		memcpy(dm644x_evm_emac_pdata.mac_addr, mac_addr, ETH_ALEN);
+		memcpy(soc_info->emac_pdata->mac_addr, mac_addr, ETH_ALEN);
 	}
 }
 
@@ -650,6 +645,7 @@ static int davinci_phy_fixup(struct phy_device *phydev)
 static __init void davinci_evm_init(void)
 {
 	struct clk *aemif_clk;
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
 	aemif_clk = clk_get(NULL, "aemif");
 	clk_enable(aemif_clk);
@@ -686,7 +682,9 @@ static __init void davinci_evm_init(void)
 
 	davinci_serial_init(&uart_config);
 
-	dm644x_init_emac(&dm644x_evm_emac_pdata);
+	soc_info->emac_pdata->phy_mask = DM644X_EVM_PHY_MASK;
+	soc_info->emac_pdata->mdio_max_freq = DM644X_EVM_MDIO_FREQUENCY;
+	dm644x_init_emac(soc_info->emac_pdata);
 
 	/* Register the fixup for PHY on DaVinci */
 	phy_register_fixup_for_uid(LXT971_PHY_ID, LXT971_PHY_MASK,
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index becae51..0de0637 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -52,11 +52,6 @@
 #define DM646X_EVM_PHY_MASK		(0x2)
 #define DM646X_EVM_MDIO_FREQUENCY	(2200000) /* PHY bus frequency */
 
-static struct emac_platform_data dm646x_evm_emac_pdata = {
-	.phy_mask	= DM646X_EVM_PHY_MASK,
-	.mdio_max_freq	= DM646X_EVM_MDIO_FREQUENCY,
-};
-
 static struct davinci_uart_config uart_config __initdata = {
 	.enabled_uarts = (1 << 0),
 };
@@ -208,6 +203,7 @@ static struct memory_accessor *at24_mem_acc;
 static void at24_setup(struct memory_accessor *mem_acc, void *context)
 {
 	char mac_addr[ETH_ALEN];
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
 	at24_mem_acc = mem_acc;
 
@@ -215,7 +211,7 @@ static void at24_setup(struct memory_accessor *mem_acc, void *context)
 	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
 	    ETH_ALEN) {
 		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
-		memcpy(dm646x_evm_emac_pdata.mac_addr, mac_addr, ETH_ALEN);
+		memcpy(soc_info->emac_pdata->mac_addr, mac_addr, ETH_ALEN);
 	}
 }
 
@@ -271,9 +267,14 @@ static void __init davinci_map_io(void)
 
 static __init void evm_init(void)
 {
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+
 	evm_init_i2c();
 	davinci_serial_init(&uart_config);
-	dm646x_init_emac(&dm646x_evm_emac_pdata);
+
+	soc_info->emac_pdata->phy_mask = DM646X_EVM_PHY_MASK;
+	soc_info->emac_pdata->mdio_max_freq = DM646X_EVM_MDIO_FREQUENCY;
+	dm646x_init_emac(soc_info->emac_pdata);
 }
 
 static __init void davinci_dm646x_evm_irq_init(void)
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index 938b446..748a8e4 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -48,7 +48,6 @@
 
 #include <mach/dm644x.h>
 #include <mach/common.h>
-#include <mach/emac.h>
 #include <mach/i2c.h>
 #include <mach/serial.h>
 #include <mach/psc.h>
@@ -158,11 +157,14 @@ static void __init davinci_sffsdr_map_io(void)
 
 static __init void davinci_sffsdr_init(void)
 {
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
+
 	platform_add_devices(davinci_sffsdr_devices,
 			     ARRAY_SIZE(davinci_sffsdr_devices));
 	sffsdr_init_i2c();
 	davinci_serial_init(&uart_config);
-	dm644x_init_emac(&sffsdr_emac_pdata);
+	soc_info->emac_pdata->phy_mask = SFFSDR_PHY_MASK;
+	soc_info->emac_pdata->mdio_max_freq = SFFSDR_MDIO_FREQUENCY;
 	setup_usb(0, 0); /* We support only peripheral mode. */
 
 	/* mux VLYNQ pins */
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 7ebf671..c0195cd 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -260,11 +260,6 @@ void davinci_init_emac(struct emac_platform_data *pdata)
 {
 	DECLARE_MAC_BUF(buf);
 
-	if (cpu_is_davinci_dm644x())
-		dm644x_init_emac(pdata);
-	else if (cpu_is_davinci_dm646x())
-		dm646x_init_emac(pdata);
-
 	/* if valid MAC exists, don't re-register */
 	if (is_valid_ether_addr(pdata->mac_addr))
 		return;
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index a562986..3844fc3 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -319,7 +319,14 @@ struct davinci_clk dm644x_clks[] = {
 	CLK(NULL, NULL, NULL),
 };
 
-#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
+static struct emac_platform_data dm644x_emac_pdata = {
+	.ctrl_reg_offset	= DM644X_EMAC_CNTRL_OFFSET,
+	.ctrl_mod_reg_offset	= DM644X_EMAC_CNTRL_MOD_OFFSET,
+	.ctrl_ram_offset	= DM644X_EMAC_CNTRL_RAM_OFFSET,
+	.mdio_reg_offset	= DM644X_EMAC_MDIO_OFFSET,
+	.ctrl_ram_size		= DM644X_EMAC_CNTRL_RAM_SIZE,
+	.version		= EMAC_VERSION_1,
+};
 
 static struct resource dm644x_emac_resources[] = {
 	{
@@ -337,12 +344,13 @@ static struct resource dm644x_emac_resources[] = {
 static struct platform_device dm644x_emac_device = {
        .name		= "davinci_emac",
        .id		= 1,
+       .dev = {
+	       .platform_data	= &dm644x_emac_pdata,
+       },
        .num_resources	= ARRAY_SIZE(dm644x_emac_resources),
        .resource	= dm644x_emac_resources,
 };
 
-#endif
-
 /*
  * Device specific mux setup
  *
@@ -520,24 +528,6 @@ static struct platform_device dm644x_edma_device = {
 };
 
 /*----------------------------------------------------------------------*/
-#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
-
-void dm644x_init_emac(struct emac_platform_data *pdata)
-{
-	pdata->ctrl_reg_offset		= DM644X_EMAC_CNTRL_OFFSET;
-	pdata->ctrl_mod_reg_offset	= DM644X_EMAC_CNTRL_MOD_OFFSET;
-	pdata->ctrl_ram_offset		= DM644X_EMAC_CNTRL_RAM_OFFSET;
-	pdata->mdio_reg_offset		= DM644X_EMAC_MDIO_OFFSET;
-	pdata->ctrl_ram_size		= DM644X_EMAC_CNTRL_RAM_SIZE;
-	pdata->version			= EMAC_VERSION_1;
-	dm644x_emac_device.dev.platform_data = pdata;
-	platform_device_register(&dm644x_emac_device);
-}
-#else
-
-void dm644x_init_emac(struct emac_platform_data *unused) {}
-
-#endif
 
 static struct map_desc dm644x_io_desc[] = {
 	{
@@ -635,6 +625,7 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.gpio_num		= 71,
 	.gpio_irq		= IRQ_GPIOBNK0,
 	.serial_dev		= &dm644x_serial_device,
+	.emac_pdata		= &dm644x_emac_pdata,
 };
 
 void __init dm644x_init(void)
@@ -648,6 +639,7 @@ static int __init dm644x_init_devices(void)
 		return 0;
 
 	platform_device_register(&dm644x_edma_device);
+	platform_device_register(&dm644x_emac_device);
 	return 0;
 }
 postcore_initcall(dm644x_init_devices);
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 544658e..5185ad5 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -286,7 +286,15 @@ struct davinci_clk dm646x_clks[] = {
 	CLK(NULL, NULL, NULL),
 };
 
-#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
+static struct emac_platform_data dm646x_emac_pdata = {
+	.ctrl_reg_offset	= DM646X_EMAC_CNTRL_OFFSET,
+	.ctrl_mod_reg_offset	= DM646X_EMAC_CNTRL_MOD_OFFSET,
+	.ctrl_ram_offset	= DM646X_EMAC_CNTRL_RAM_OFFSET,
+	.mdio_reg_offset	= DM646X_EMAC_MDIO_OFFSET,
+	.ctrl_ram_size		= DM646X_EMAC_CNTRL_RAM_SIZE,
+	.version		= EMAC_VERSION_2,
+};
+
 static struct resource dm646x_emac_resources[] = {
 	{
 		.start	= DM646X_EMAC_BASE,
@@ -318,12 +326,13 @@ static struct resource dm646x_emac_resources[] = {
 static struct platform_device dm646x_emac_device = {
 	.name		= "davinci_emac",
 	.id		= 1,
+	.dev = {
+		.platform_data	= &dm646x_emac_pdata,
+	},
 	.num_resources	= ARRAY_SIZE(dm646x_emac_resources),
 	.resource	= dm646x_emac_resources,
 };
 
-#endif
-
 /*
  * Device specific mux setup
  *
@@ -499,25 +508,6 @@ static struct platform_device dm646x_edma_device = {
 
 /*----------------------------------------------------------------------*/
 
-#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
-
-void dm646x_init_emac(struct emac_platform_data *pdata)
-{
-	pdata->ctrl_reg_offset		= DM646X_EMAC_CNTRL_OFFSET;
-	pdata->ctrl_mod_reg_offset	= DM646X_EMAC_CNTRL_MOD_OFFSET;
-	pdata->ctrl_ram_offset		= DM646X_EMAC_CNTRL_RAM_OFFSET;
-	pdata->mdio_reg_offset		= DM646X_EMAC_MDIO_OFFSET;
-	pdata->ctrl_ram_size		= DM646X_EMAC_CNTRL_RAM_SIZE;
-	pdata->version			= EMAC_VERSION_2;
-	dm646x_emac_device.dev.platform_data = pdata;
-	platform_device_register(&dm646x_emac_device);
-}
-#else
-
-void dm646x_init_emac(struct emac_platform_data *unused) {}
-
-#endif
-
 static struct map_desc dm646x_io_desc[] = {
 	{
 		.virtual	= IO_VIRT,
@@ -614,6 +604,7 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.gpio_num		= 43, /* Only 33 usable */
 	.gpio_irq		= IRQ_DM646X_GPIOBNK0,
 	.serial_dev		= &dm646x_serial_device,
+	.emac_pdata		= &dm646x_emac_pdata,
 };
 
 void __init dm646x_init(void)
@@ -627,6 +618,7 @@ static int __init dm646x_init_devices(void)
 		return 0;
 
 	platform_device_register(&dm646x_edma_device);
+	platform_device_register(&dm646x_emac_device);
 	return 0;
 }
 postcore_initcall(dm646x_init_devices);
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 9624e03..b773d92 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -62,6 +62,7 @@ struct davinci_soc_info {
 	unsigned			gpio_num;
 	unsigned			gpio_irq;
 	struct platform_device		*serial_dev;
+	struct emac_platform_data	*emac_pdata;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
diff --git a/arch/arm/mach-davinci/include/mach/dm644x.h b/arch/arm/mach-davinci/include/mach/dm644x.h
index ace167a..15d42b9 100644
--- a/arch/arm/mach-davinci/include/mach/dm644x.h
+++ b/arch/arm/mach-davinci/include/mach/dm644x.h
@@ -34,6 +34,5 @@
 #define DM644X_EMAC_CNTRL_RAM_SIZE	(0x2000)
 
 void __init dm644x_init(void);
-void dm644x_init_emac(struct emac_platform_data *pdata);
 
 #endif /* __ASM_ARCH_DM644X_H */
diff --git a/arch/arm/mach-davinci/include/mach/dm646x.h b/arch/arm/mach-davinci/include/mach/dm646x.h
index ea7b28e..1fc764c 100644
--- a/arch/arm/mach-davinci/include/mach/dm646x.h
+++ b/arch/arm/mach-davinci/include/mach/dm646x.h
@@ -22,6 +22,5 @@
 #define DM646X_EMAC_CNTRL_RAM_SIZE	(0x2000)
 
 void __init dm646x_init(void);
-void dm646x_init_emac(struct emac_platform_data *pdata);
 
 #endif /* __ASM_ARCH_DM646X_H */
-- 
cgit v0.10.2


From c97909fcf1611645f0fe235b332e39623588d84c Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:41:15 -0700
Subject: davinci: Remove unused i2c eeprom_read/write routines

The dm644x and dm646x board files have i2c eeprom read and
write routines but they are not used so remove them.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 0de0637..44e2ab6 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -222,22 +222,6 @@ static struct at24_platform_data eeprom_info = {
 	.setup          = at24_setup,
 };
 
-int dm646xevm_eeprom_read(void *buf, off_t off, size_t count)
-{
-	if (at24_mem_acc)
-		return at24_mem_acc->read(at24_mem_acc, buf, off, count);
-	return -ENODEV;
-}
-EXPORT_SYMBOL(dm646xevm_eeprom_read);
-
-int dm646xevm_eeprom_write(void *buf, off_t off, size_t count)
-{
-	if (at24_mem_acc)
-		return at24_mem_acc->write(at24_mem_acc, buf, off, count);
-	return -ENODEV;
-}
-EXPORT_SYMBOL(dm646xevm_eeprom_write);
-
 static struct i2c_board_info __initdata i2c_info[] =  {
 	{
 		I2C_BOARD_INFO("24c256", 0x50),
diff --git a/arch/arm/mach-davinci/include/mach/board-dm6446evm.h b/arch/arm/mach-davinci/include/mach/board-dm6446evm.h
deleted file mode 100644
index 3216f21..0000000
--- a/arch/arm/mach-davinci/include/mach/board-dm6446evm.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * DaVinci DM6446 EVM board specific headers
- *
- * Author: Kevin Hilman, Deep Root Systems, LLC
- *
- * 2007 (c) Deep Root Systems, LLC. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or ifndef.
- */
-
-#ifndef _MACH_DAVINCI_DM6446EVM_H
-#define _MACH_DAVINCI_DM6446EVM_H
-
-#include <linux/types.h>
-
-int dm6446evm_eeprom_read(char *buf, off_t off, size_t count);
-int dm6446evm_eeprom_write(char *buf, off_t off, size_t count);
-
-#endif
-- 
cgit v0.10.2


From b14dc0f9942a9c318c6c49f29511d88b3642e2d0 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:41:27 -0700
Subject: davinci: Factor out emac mac address handling

Factor out the code to extract that mac address from
i2c eeprom.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 987d27f..d9d4045 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -44,6 +44,7 @@
 #include <mach/psc.h>
 #include <mach/nand.h>
 #include <mach/mmc.h>
+#include <mach/emac.h>
 #include <mach/common.h>
 
 #define DM644X_EVM_PHY_MASK		(0x2)
@@ -437,28 +438,13 @@ static struct pcf857x_platform_data pcf_data_u35 = {
  *  - 0x0039, 1 byte NTSC vs PAL (bit 0x80 == PAL)
  *  - ... newer boards may have more
  */
-static struct memory_accessor *at24_mem_acc;
-
-static void at24_setup(struct memory_accessor *mem_acc, void *context)
-{
-	char mac_addr[ETH_ALEN];
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
-
-	at24_mem_acc = mem_acc;
-
-	/* Read MAC addr from EEPROM */
-	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
-	    ETH_ALEN) {
-		printk(KERN_INFO "Read MAC addr from EEPROM: %pM\n", mac_addr);
-		memcpy(soc_info->emac_pdata->mac_addr, mac_addr, ETH_ALEN);
-	}
-}
 
 static struct at24_platform_data eeprom_info = {
 	.byte_len	= (256*1024) / 8,
 	.page_size	= 64,
 	.flags		= AT24_FLAG_ADDR16,
-	.setup          = at24_setup,
+	.setup          = davinci_get_mac_addr,
+	.context	= (void *)0x7f00,
 };
 
 /*
@@ -684,7 +670,6 @@ static __init void davinci_evm_init(void)
 
 	soc_info->emac_pdata->phy_mask = DM644X_EVM_PHY_MASK;
 	soc_info->emac_pdata->mdio_max_freq = DM644X_EVM_MDIO_FREQUENCY;
-	dm644x_init_emac(soc_info->emac_pdata);
 
 	/* Register the fixup for PHY on DaVinci */
 	phy_register_fixup_for_uid(LXT971_PHY_ID, LXT971_PHY_MASK,
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 44e2ab6..e17de63 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -198,28 +198,13 @@ static struct pcf857x_platform_data pcf_data = {
  *  - 0x7f00, 6 bytes Ethernet Address
  *  - ... newer boards may have more
  */
-static struct memory_accessor *at24_mem_acc;
-
-static void at24_setup(struct memory_accessor *mem_acc, void *context)
-{
-	char mac_addr[ETH_ALEN];
-	struct davinci_soc_info *soc_info = &davinci_soc_info;
-
-	at24_mem_acc = mem_acc;
-
-	/* Read MAC addr from EEPROM */
-	if (at24_mem_acc->read(at24_mem_acc, mac_addr, 0x7f00, ETH_ALEN) ==
-	    ETH_ALEN) {
-		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
-		memcpy(soc_info->emac_pdata->mac_addr, mac_addr, ETH_ALEN);
-	}
-}
 
 static struct at24_platform_data eeprom_info = {
 	.byte_len       = (256*1024) / 8,
 	.page_size      = 64,
 	.flags          = AT24_FLAG_ADDR16,
-	.setup          = at24_setup,
+	.setup          = davinci_get_mac_addr,
+	.context	= (void *)0x7f00,
 };
 
 static struct i2c_board_info __initdata i2c_info[] =  {
@@ -258,7 +243,6 @@ static __init void evm_init(void)
 
 	soc_info->emac_pdata->phy_mask = DM646X_EVM_PHY_MASK;
 	soc_info->emac_pdata->mdio_max_freq = DM646X_EVM_MDIO_FREQUENCY;
-	dm646x_init_emac(soc_info->emac_pdata);
 }
 
 static __init void davinci_dm646x_evm_irq_init(void)
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index d72d517..169ec73 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -10,12 +10,14 @@
  */
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/etherdevice.h>
 
 #include <asm/tlb.h>
 #include <asm/mach/map.h>
 
 #include <mach/common.h>
 #include <mach/cputype.h>
+#include <mach/emac.h>
 
 #include "clock.h"
 
@@ -24,6 +26,16 @@ EXPORT_SYMBOL(davinci_soc_info);
 
 void __iomem *davinci_intc_base;
 
+void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context)
+{
+	char *mac_addr = davinci_soc_info.emac_pdata->mac_addr;
+	off_t offset = (off_t)context;
+
+	/* Read MAC addr from EEPROM */
+	if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
+		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
+}
+
 static struct davinci_id * __init davinci_get_id(u32 jtag_id)
 {
 	int i;
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index c0195cd..c85091c 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -254,32 +254,6 @@ struct davinci_timer_instance davinci_timer_instance[2] = {
 
 /*-------------------------------------------------------------------------*/
 
-#if defined(CONFIG_TI_DAVINCI_EMAC) || defined(CONFIG_TI_DAVINCI_EMAC_MODULE)
-
-void davinci_init_emac(struct emac_platform_data *pdata)
-{
-	DECLARE_MAC_BUF(buf);
-
-	/* if valid MAC exists, don't re-register */
-	if (is_valid_ether_addr(pdata->mac_addr))
-		return;
-	else {
-		/* Use random MAC if none passed */
-		random_ether_addr(pdata->mac_addr);
-
-		printk(KERN_WARNING "%s: using random MAC addr: %s\n",
-		       __func__, print_mac(buf, pdata->mac_addr));
-	}
-}
-
-#else
-
-void davinci_init_emac(struct emac_platform_data *unused) {}
-
-#endif
-
-/*-------------------------------------------------------------------------*/
-
 static int __init davinci_init_devices(void)
 {
 	/* please keep these calls, and their implementations above,
diff --git a/arch/arm/mach-davinci/include/mach/emac.h b/arch/arm/mach-davinci/include/mach/emac.h
index 549fcce..beff4fb 100644
--- a/arch/arm/mach-davinci/include/mach/emac.h
+++ b/arch/arm/mach-davinci/include/mach/emac.h
@@ -12,6 +12,7 @@
 #define _MACH_DAVINCI_EMAC_H
 
 #include <linux/if_ether.h>
+#include <linux/memory.h>
 
 struct emac_platform_data {
 	char mac_addr[ETH_ALEN];
@@ -30,7 +31,6 @@ enum {
 	EMAC_VERSION_1,	/* DM644x */
 	EMAC_VERSION_2,	/* DM646x */
 };
-void davinci_init_emac(struct emac_platform_data *pdata);
-#endif
-
 
+void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context);
+#endif
-- 
cgit v0.10.2


From 0b0c4c2a6974eae7b96066cb0da35b526fe58468 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:41:40 -0700
Subject: davinci: Integrate cp_intc support into low-level irq code

Integrate the Common Platform Interrupt Controller (cp_intc)
support into the low-level irq handling for davinci and similar
platforms.  Do it such that support for cp_intc and the original
aintc can coexist in the same kernel binary.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index d7614a0..7640867 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -1,5 +1,8 @@
 if ARCH_DAVINCI
 
+config AINTC
+	bool
+
 config CP_INTC
 	bool
 
@@ -9,12 +12,15 @@ comment "DaVinci Core Type"
 
 config ARCH_DAVINCI_DM644x
 	bool "DaVinci 644x based system"
+	select AINTC
 
 config ARCH_DAVINCI_DM355
         bool "DaVinci 355 based system"
+	select AINTC
 
 config ARCH_DAVINCI_DM646x
         bool "DaVinci 646x based system"
+	select AINTC
 
 comment "DaVinci Board Type"
 
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index a65d03b..5b62d8a 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -4,17 +4,19 @@
 #
 
 # Common objects
-obj-y 			:= time.o irq.o clock.o serial.o io.o psc.o \
+obj-y 			:= time.o clock.o serial.o io.o psc.o \
 			   gpio.o devices.o dma.o usb.o common.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
-obj-$(CONFIG_CP_INTC)			+= cp_intc.o
 
 # Chip specific
 obj-$(CONFIG_ARCH_DAVINCI_DM644x)       += dm644x.o
 obj-$(CONFIG_ARCH_DAVINCI_DM355)        += dm355.o
 obj-$(CONFIG_ARCH_DAVINCI_DM646x)       += dm646x.o
 
+obj-$(CONFIG_AINTC)			+= irq.o
+obj-$(CONFIG_CP_INTC)			+= cp_intc.o
+
 # Board specific
 obj-$(CONFIG_MACH_DAVINCI_EVM)  	+= board-dm644x-evm.o
 obj-$(CONFIG_MACH_SFFSDR)		+= board-sffsdr.o
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index 169ec73..61ede19 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -25,6 +25,7 @@ struct davinci_soc_info davinci_soc_info;
 EXPORT_SYMBOL(davinci_soc_info);
 
 void __iomem *davinci_intc_base;
+int davinci_intc_type;
 
 void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context)
 {
@@ -99,6 +100,7 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info)
 	}
 
 	davinci_intc_base = davinci_soc_info.intc_base;
+	davinci_intc_type = davinci_soc_info.intc_type;
 	return;
 
 err:
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index b773d92..22db677 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -18,6 +18,7 @@ extern struct sys_timer davinci_timer;
 
 extern void davinci_irq_init(void);
 extern void __iomem *davinci_intc_base;
+extern int davinci_intc_type;
 
 /* parameters describe VBUS sourcing for host mode */
 extern void setup_usb(unsigned mA, unsigned potpgt_msec);
diff --git a/arch/arm/mach-davinci/include/mach/entry-macro.S b/arch/arm/mach-davinci/include/mach/entry-macro.S
index ed78851..fbdebc7 100644
--- a/arch/arm/mach-davinci/include/mach/entry-macro.S
+++ b/arch/arm/mach-davinci/include/mach/entry-macro.S
@@ -23,9 +23,28 @@
 		.endm
 
 		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
+#if defined(CONFIG_AINTC) && defined(CONFIG_CP_INTC)
+		ldr \tmp, =davinci_intc_type
+		ldr \tmp, [\tmp]
+		cmp \tmp, #DAVINCI_INTC_TYPE_CP_INTC
+		beq 1001f
+#endif
+#if defined(CONFIG_AINTC)
 		ldr \tmp, [\base, #0x14]
 		movs \tmp, \tmp, lsr #2
 		sub \irqnr, \tmp, #1
+		b 1002f
+#endif
+#if defined(CONFIG_CP_INTC)
+1001:		ldr \irqnr, [\base, #0x80] /* get irq number */
+		and \irqnr, \irqnr, #0xff  /* irq is in bits 0-9 */
+		mov \tmp, \irqnr, lsr #3
+		and \tmp, \tmp, #0xfc
+		add \tmp, \tmp, #0x280 /* get the register offset */
+		ldr \irqstat, [\base, \tmp] /* get the intc status */
+		cmp \irqstat, #0x0
+#endif
+1002:
 		.endm
 
 		.macro	irq_prio_table
-- 
cgit v0.10.2


From 3abd5acfff0111809463bcfd7236a1bdf09e4e2d Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:41:54 -0700
Subject: davinci: Add compare register support to timer code

The Timer64p timer has 8 compare registers that can
be used to generate interrupts when the timer value
matches the compare reg's value.  They do not disturb
the timer itself.  This can be useful when there is
only one timer available for both clock events and
clocksource.

When enabled, the clocksource remains a continuous
32-bit counter but the clock event will no longer
support periodic interrupts.  Instead only oneshot
timers will be supported and implemented by setting
the compare register to the current timer value plus
the period that the clock event subsystem is requesting.

Compare registers support is enabled automatically
when the following conditions are met:
1) The same timer is being used for clock events
   and clocksource.
2) The timer is the bottom half (32 bits) of the
   64-bit timer (hardware limitation).
3) The the compare register offset and irq are
   not zero.

Since the timer is always running, there is a hardware
race in timer32_config() between reading the current
timer value, and adding the period to the current
timer value and writing the compare register.
Testing on a da830 evm board with the timer clocked
at 24 MHz and the processor clocked at 300 MHz,
showed the number of counter ticks to do this ranged
from 20-53 (~1-2.2 usecs) but usually around 41 ticks.
This includes some artifacts from collecting the
information.  So, the minimum period should be
at least 5 usecs to be safe.

There is also an non-critical lower limit that
the period should be since there is no point in
setting an event that is much shorter than the
time it takes to set the event, and get & handle
the timer interrupt for that event.  There can
also be all sorts of delays from activities
occuring elsewhere in the system (including
hardware activitis like cache & TLB management).
These are virtually impossible to quantify so a
minimum period of 50 usecs was chosen.  That will
certianly be enough to avoid the actual hardware
race but hopefully not large enough to cause
unreasonably course-grained timers.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 22db677..715528f 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -30,6 +30,8 @@ struct davinci_timer_instance {
 	void __iomem	*base;
 	u32		bottom_irq;
 	u32		top_irq;
+	unsigned long	cmp_off;
+	unsigned int	cmp_irq;
 };
 
 struct davinci_timer_info {
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index f80ae25..fc90d3e 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -46,14 +46,24 @@ enum {
 };
 
 /* Timer register offsets */
-#define PID12                        0x0
-#define TIM12                        0x10
-#define TIM34                        0x14
-#define PRD12                        0x18
-#define PRD34                        0x1c
-#define TCR                          0x20
-#define TGCR                         0x24
-#define WDTCR                        0x28
+#define PID12			0x0
+#define TIM12			0x10
+#define TIM34			0x14
+#define PRD12			0x18
+#define PRD34			0x1c
+#define TCR			0x20
+#define TGCR			0x24
+#define WDTCR			0x28
+
+/* Offsets of the 8 compare registers */
+#define	CMP12_0			0x60
+#define	CMP12_1			0x64
+#define	CMP12_2			0x68
+#define	CMP12_3			0x6c
+#define	CMP12_4			0x70
+#define	CMP12_5			0x74
+#define	CMP12_6			0x78
+#define	CMP12_7			0x7c
 
 /* Timer register bitfields */
 #define TCR_ENAMODE_DISABLE          0x0
@@ -85,6 +95,7 @@ struct timer_s {
 	unsigned int id;
 	unsigned long period;
 	unsigned long opts;
+	unsigned long flags;
 	void __iomem *base;
 	unsigned long tim_off;
 	unsigned long prd_off;
@@ -94,9 +105,13 @@ struct timer_s {
 static struct timer_s timers[];
 
 /* values for 'opts' field of struct timer_s */
-#define TIMER_OPTS_DISABLED   0x00
-#define TIMER_OPTS_ONESHOT    0x01
-#define TIMER_OPTS_PERIODIC   0x02
+#define TIMER_OPTS_DISABLED		0x01
+#define TIMER_OPTS_ONESHOT		0x02
+#define TIMER_OPTS_PERIODIC		0x04
+#define TIMER_OPTS_STATE_MASK		0x07
+
+#define TIMER_OPTS_USE_COMPARE		0x80000000
+#define USING_COMPARE(t)		((t)->opts & TIMER_OPTS_USE_COMPARE)
 
 static char *id_to_name[] = {
 	[T0_BOT]	= "timer0_0",
@@ -107,24 +122,41 @@ static char *id_to_name[] = {
 
 static int timer32_config(struct timer_s *t)
 {
-	u32 tcr = __raw_readl(t->base + TCR);
-
-	/* disable timer */
-	tcr &= ~(TCR_ENAMODE_MASK << t->enamode_shift);
-	__raw_writel(tcr, t->base + TCR);
-
-	/* reset counter to zero, set new period */
-	__raw_writel(0, t->base + t->tim_off);
-	__raw_writel(t->period, t->base + t->prd_off);
-
-	/* Set enable mode */
-	if (t->opts & TIMER_OPTS_ONESHOT) {
-		tcr |= TCR_ENAMODE_ONESHOT << t->enamode_shift;
-	} else if (t->opts & TIMER_OPTS_PERIODIC) {
-		tcr |= TCR_ENAMODE_PERIODIC << t->enamode_shift;
+	u32 tcr;
+	struct davinci_soc_info *soc_info = davinci_get_soc_info();
+
+	if (USING_COMPARE(t)) {
+		struct davinci_timer_instance *dtip =
+				soc_info->timer_info->timers;
+		int event_timer = ID_TO_TIMER(timers[TID_CLOCKEVENT].id);
+
+		/*
+		 * Next interrupt should be the current time reg value plus
+		 * the new period (using 32-bit unsigned addition/wrapping
+		 * to 0 on overflow).  This assumes that the clocksource
+		 * is setup to count to 2^32-1 before wrapping around to 0.
+		 */
+		__raw_writel(__raw_readl(t->base + t->tim_off) + t->period,
+			t->base + dtip[event_timer].cmp_off);
+	} else {
+		tcr = __raw_readl(t->base + TCR);
+
+		/* disable timer */
+		tcr &= ~(TCR_ENAMODE_MASK << t->enamode_shift);
+		__raw_writel(tcr, t->base + TCR);
+
+		/* reset counter to zero, set new period */
+		__raw_writel(0, t->base + t->tim_off);
+		__raw_writel(t->period, t->base + t->prd_off);
+
+		/* Set enable mode */
+		if (t->opts & TIMER_OPTS_ONESHOT)
+			tcr |= TCR_ENAMODE_ONESHOT << t->enamode_shift;
+		else if (t->opts & TIMER_OPTS_PERIODIC)
+			tcr |= TCR_ENAMODE_PERIODIC << t->enamode_shift;
+
+		__raw_writel(tcr, t->base + TCR);
 	}
-
-	__raw_writel(tcr, t->base + TCR);
 	return 0;
 }
 
@@ -222,8 +254,11 @@ static void __init timer_init(void)
 		/* Register interrupt */
 		t->irqaction.name = t->name;
 		t->irqaction.dev_id = (void *)t;
-		if (t->irqaction.handler != NULL)
+
+		if (t->irqaction.handler != NULL) {
+			irq = USING_COMPARE(t) ? dtip[i].cmp_irq : irq;
 			setup_irq(irq, &t->irqaction);
+		}
 
 		timer32_config(&timers[i]);
 	}
@@ -268,15 +303,18 @@ static void davinci_set_mode(enum clock_event_mode mode,
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
 		t->period = davinci_clock_tick_rate / (HZ);
-		t->opts = TIMER_OPTS_PERIODIC;
+		t->opts &= ~TIMER_OPTS_STATE_MASK;
+		t->opts |= TIMER_OPTS_PERIODIC;
 		timer32_config(t);
 		break;
 	case CLOCK_EVT_MODE_ONESHOT:
-		t->opts = TIMER_OPTS_ONESHOT;
+		t->opts &= ~TIMER_OPTS_STATE_MASK;
+		t->opts |= TIMER_OPTS_ONESHOT;
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		t->opts = TIMER_OPTS_DISABLED;
+		t->opts &= ~TIMER_OPTS_STATE_MASK;
+		t->opts |= TIMER_OPTS_DISABLED;
 		break;
 	case CLOCK_EVT_MODE_RESUME:
 		break;
@@ -295,12 +333,40 @@ static void __init davinci_timer_init(void)
 {
 	struct clk *timer_clk;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
-
+	unsigned int clockevent_id;
+	unsigned int clocksource_id;
 	static char err[] __initdata = KERN_ERR
 		"%s: can't register clocksource!\n";
 
-	timers[TID_CLOCKEVENT].id = soc_info->timer_info->clockevent_id;
-	timers[TID_CLOCKSOURCE].id = soc_info->timer_info->clocksource_id;
+	clockevent_id = soc_info->timer_info->clockevent_id;
+	clocksource_id = soc_info->timer_info->clocksource_id;
+
+	timers[TID_CLOCKEVENT].id = clockevent_id;
+	timers[TID_CLOCKSOURCE].id = clocksource_id;
+
+	/*
+	 * If using same timer for both clock events & clocksource,
+	 * a compare register must be used to generate an event interrupt.
+	 * This is equivalent to a oneshot timer only (not periodic).
+	 */
+	if (clockevent_id == clocksource_id) {
+		struct davinci_timer_instance *dtip =
+				soc_info->timer_info->timers;
+		int event_timer = ID_TO_TIMER(clockevent_id);
+
+		/* Only bottom timers can use compare regs */
+		if (IS_TIMER_TOP(clockevent_id))
+			pr_warning("davinci_timer_init: Invalid use"
+				" of system timers.  Results unpredictable.\n");
+		else if ((dtip[event_timer].cmp_off == 0)
+				|| (dtip[event_timer].cmp_irq == 0))
+			pr_warning("davinci_timer_init:  Invalid timer instance"
+				" setup.  Results unpredictable.\n");
+		else {
+			timers[TID_CLOCKEVENT].opts |= TIMER_OPTS_USE_COMPARE;
+			clockevent_davinci.features = CLOCK_EVT_FEAT_ONESHOT;
+		}
+	}
 
 	/* init timer hw */
 	timer_init();
@@ -312,7 +378,7 @@ static void __init davinci_timer_init(void)
 	davinci_clock_tick_rate = clk_get_rate(timer_clk);
 
 	/* setup clocksource */
-	clocksource_davinci.name = id_to_name[timers[TID_CLOCKSOURCE].id];
+	clocksource_davinci.name = id_to_name[clocksource_id];
 	clocksource_davinci.mult =
 		clocksource_khz2mult(davinci_clock_tick_rate/1000,
 				     clocksource_davinci.shift);
@@ -325,8 +391,7 @@ static void __init davinci_timer_init(void)
 					 clockevent_davinci.shift);
 	clockevent_davinci.max_delta_ns =
 		clockevent_delta2ns(0xfffffffe, &clockevent_davinci);
-	clockevent_davinci.min_delta_ns =
-		clockevent_delta2ns(1, &clockevent_davinci);
+	clockevent_davinci.min_delta_ns = 50000; /* 50 usec */
 
 	clockevent_davinci.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_davinci);
-- 
cgit v0.10.2


From 5570078c0ec5ecc5df0bbd7d06f43549b7127ae7 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@mvista.com>
Date: Wed, 15 Apr 2009 12:42:06 -0700
Subject: davinci: Move PINMUX defines to SoC files

Different SoC have different numbers of pinmux registers and other
resources that overlap with each other.  To clean up the code and
eliminate defines that overlap with each other, move the PINMUX
defines to the SoC specific files.

Signed-off-by: Mark A. Greer <mgreer@mvista.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 4c3257e..78fc598 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -433,6 +433,14 @@ void __init dm355_init_spi0(unsigned chipselect_mask,
 
 /*----------------------------------------------------------------------*/
 
+#define PINMUX0		0x00
+#define PINMUX1		0x04
+#define PINMUX2		0x08
+#define PINMUX3		0x0c
+#define PINMUX4		0x10
+#define INTMUX		0x18
+#define EVTMUX		0x1c
+
 /*
  * Device specific mux setup
  *
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 3844fc3..4ab5f07 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -351,6 +351,9 @@ static struct platform_device dm644x_emac_device = {
        .resource	= dm644x_emac_resources,
 };
 
+#define PINMUX0		0x00
+#define PINMUX1		0x04
+
 /*
  * Device specific mux setup
  *
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 5185ad5..12189c7 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -333,6 +333,9 @@ static struct platform_device dm646x_emac_device = {
 	.resource	= dm646x_emac_resources,
 };
 
+#define PINMUX0		0x00
+#define PINMUX1		0x04
+
 /*
  * Device specific mux setup
  *
diff --git a/arch/arm/mach-davinci/include/mach/mux.h b/arch/arm/mach-davinci/include/mach/mux.h
index 5d6efa8..2737845 100644
--- a/arch/arm/mach-davinci/include/mach/mux.h
+++ b/arch/arm/mach-davinci/include/mach/mux.h
@@ -19,16 +19,6 @@
 #ifndef __INC_MACH_MUX_H
 #define __INC_MACH_MUX_H
 
-/* System module registers */
-#define PINMUX0			0x00
-#define PINMUX1			0x04
-/* dm355 only */
-#define PINMUX2			0x08
-#define PINMUX3			0x0c
-#define PINMUX4			0x10
-#define INTMUX			0x18
-#define EVTMUX			0x1c
-
 struct mux_config {
 	const char *name;
 	const char *mux_reg_name;
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index fc90d3e..0884ca5 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -123,7 +123,7 @@ static char *id_to_name[] = {
 static int timer32_config(struct timer_s *t)
 {
 	u32 tcr;
-	struct davinci_soc_info *soc_info = davinci_get_soc_info();
+	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
 	if (USING_COMPARE(t)) {
 		struct davinci_timer_instance *dtip =
-- 
cgit v0.10.2


From 96ed299fdb572fd694d361dc49285dddc0c87da4 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Thu, 30 Apr 2009 11:20:24 -0700
Subject: davinci: cleanup: move dm355 UART2 define to dm355.c

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 78fc598..893d5ba 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -34,6 +34,8 @@
 #include "clock.h"
 #include "mux.h"
 
+#define DM355_UART2_BASE	(IO_PHYS + 0x206000)
+
 /*
  * Device specific clocks
  */
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index b0c57fa..794fa5c 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -18,8 +18,6 @@
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)
 #define DAVINCI_UART2_BASE	(IO_PHYS + 0x20800)
 
-#define DM355_UART2_BASE        (IO_PHYS + 0x206000)
-
 /* DaVinci UART register offsets */
 #define UART_DAVINCI_PWREMU		0x0c
 #define UART_DM646X_SCR			0x10
-- 
cgit v0.10.2


From b79dbdefd2be0980c58b5e36fdc23eb14a7caab4 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 30 Apr 2009 17:33:27 -0700
Subject: davinci: remove remnants of IRAM allocator

Remove remnants of dm6446-specific SRAM allocator, as preparation for
a more generic replacement.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/include/mach/edma.h b/arch/arm/mach-davinci/include/mach/edma.h
index f6fc539..24a3792 100644
--- a/arch/arm/mach-davinci/include/mach/edma.h
+++ b/arch/arm/mach-davinci/include/mach/edma.h
@@ -208,10 +208,6 @@ void edma_clear_event(unsigned channel);
 void edma_pause(unsigned channel);
 void edma_resume(unsigned channel);
 
-/* UNRELATED TO DMA */
-int davinci_alloc_iram(unsigned size);
-void davinci_free_iram(unsigned addr, unsigned size);
-
 /* platform_data for EDMA driver */
 struct edma_soc_info {
 
diff --git a/arch/arm/mach-davinci/include/mach/memory.h b/arch/arm/mach-davinci/include/mach/memory.h
index 86c25c7..c712c7c 100644
--- a/arch/arm/mach-davinci/include/mach/memory.h
+++ b/arch/arm/mach-davinci/include/mach/memory.h
@@ -21,7 +21,6 @@
  * Definitions
  **************************************************************************/
 #define DAVINCI_DDR_BASE    0x80000000
-#define DAVINCI_IRAM_BASE   0x00008000 /* ARM Internal RAM */
 
 #define PHYS_OFFSET DAVINCI_DDR_BASE
 
-- 
cgit v0.10.2


From 0d04eb47054f685b23033ed6ceadfb20db77c5b3 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 30 Apr 2009 17:35:48 -0700
Subject: davinci: soc-specific SRAM setup

Package on-chip SRAM.  It's always accessible from the ARM, so
set up a standardized virtual address mapping into a 128 KiB
area that's reserved for platform use.

In some cases (dm6467) the physical addresses used for EDMA are
not the same as the ones used by the ARM ... so record that info
separately in the SOC data, for chips (unlike the OMAP-L137)
where SRAM may be used with EDMA.

Other blocks of SRAM, such as the ETB buffer or DSP L1/L2 RAM,
may be unused/available on some system.  They are ignored here.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 893d5ba..baaaf32 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -613,6 +613,13 @@ static struct map_desc dm355_io_desc[] = {
 		.length		= IO_SIZE,
 		.type		= MT_DEVICE
 	},
+	{
+		.virtual	= SRAM_VIRT,
+		.pfn		= __phys_to_pfn(0x00010000),
+		.length		= SZ_32K,
+		/* MT_MEMORY_NONCACHED requires supersection alignment */
+		.type		= MT_DEVICE,
+	},
 };
 
 /* Contents of JTAG ID register used to identify exact cpu type */
@@ -702,6 +709,8 @@ static struct davinci_soc_info davinci_soc_info_dm355 = {
 	.gpio_num		= 104,
 	.gpio_irq		= IRQ_DM355_GPIOBNK0,
 	.serial_dev		= &dm355_serial_device,
+	.sram_dma		= 0x00010000,
+	.sram_len		= SZ_32K,
 };
 
 void __init dm355_init(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 4ab5f07..fb5449b3 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -539,6 +539,13 @@ static struct map_desc dm644x_io_desc[] = {
 		.length		= IO_SIZE,
 		.type		= MT_DEVICE
 	},
+	{
+		.virtual	= SRAM_VIRT,
+		.pfn		= __phys_to_pfn(0x00008000),
+		.length		= SZ_16K,
+		/* MT_MEMORY_NONCACHED requires supersection alignment */
+		.type		= MT_DEVICE,
+	},
 };
 
 /* Contents of JTAG ID register used to identify exact cpu type */
@@ -629,6 +636,8 @@ static struct davinci_soc_info davinci_soc_info_dm644x = {
 	.gpio_irq		= IRQ_GPIOBNK0,
 	.serial_dev		= &dm644x_serial_device,
 	.emac_pdata		= &dm644x_emac_pdata,
+	.sram_dma		= 0x00008000,
+	.sram_len		= SZ_16K,
 };
 
 void __init dm644x_init(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 12189c7..334f071 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -518,6 +518,13 @@ static struct map_desc dm646x_io_desc[] = {
 		.length		= IO_SIZE,
 		.type		= MT_DEVICE
 	},
+	{
+		.virtual	= SRAM_VIRT,
+		.pfn		= __phys_to_pfn(0x00010000),
+		.length		= SZ_32K,
+		/* MT_MEMORY_NONCACHED requires supersection alignment */
+		.type		= MT_DEVICE,
+	},
 };
 
 /* Contents of JTAG ID register used to identify exact cpu type */
@@ -608,6 +615,8 @@ static struct davinci_soc_info davinci_soc_info_dm646x = {
 	.gpio_irq		= IRQ_DM646X_GPIOBNK0,
 	.serial_dev		= &dm646x_serial_device,
 	.emac_pdata		= &dm646x_emac_pdata,
+	.sram_dma		= 0x10010000,
+	.sram_len		= SZ_32K,
 };
 
 void __init dm646x_init(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 715528f..a1f03b6 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -66,10 +66,16 @@ struct davinci_soc_info {
 	unsigned			gpio_irq;
 	struct platform_device		*serial_dev;
 	struct emac_platform_data	*emac_pdata;
+	dma_addr_t			sram_dma;
+	unsigned			sram_len;
 };
 
 extern struct davinci_soc_info davinci_soc_info;
 
 extern void davinci_common_init(struct davinci_soc_info *soc_info);
 
+/* standard place to map on-chip SRAMs; they *may* support DMA */
+#define SRAM_VIRT	0xfffe0000
+#define SRAM_SIZE	SZ_128K
+
 #endif /* __ARCH_ARM_MACH_DAVINCI_COMMON_H */
-- 
cgit v0.10.2


From 20e9969b3aa5166d50c8df474967c9d80bf6d481 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 7 May 2009 09:31:42 -0700
Subject: davinci: add SRAM allocator

Provide a generic SRAM allocator using genalloc, and vaguely
modeled after what AVR32 uses.  This builds on top of the
static CPU mapping set up in the previous patch, and returns
DMA mappings as requested (if possible).

Compared to its OMAP cousin, there's no current support for
(currently non-existent) DaVinci power management code running
in SRAM; and this has ways to deallocate, instead of being
allocate-only.

The initial user of this should probably be the audio code,
because EDMA from DDR is subject to various dropouts on at
least DM355 and DM6446 chips.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d02cdb..915b393 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -587,6 +587,7 @@ config ARCH_DAVINCI
 	select ZONE_DMA
 	select HAVE_IDE
 	select COMMON_CLKDEV
+	select GENERIC_ALLOCATOR
 	help
 	  Support for TI's DaVinci platform.
 
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 5b62d8a..059ab78 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -5,7 +5,7 @@
 
 # Common objects
 obj-y 			:= time.o clock.o serial.o io.o psc.o \
-			   gpio.o devices.o dma.o usb.o common.o
+			   gpio.o devices.o dma.o usb.o common.o sram.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
 
diff --git a/arch/arm/mach-davinci/include/mach/sram.h b/arch/arm/mach-davinci/include/mach/sram.h
new file mode 100644
index 0000000..111f7cc
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/sram.h
@@ -0,0 +1,27 @@
+/*
+ * mach/sram.h - DaVinci simple SRAM allocator
+ *
+ * Copyright (C) 2009 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __MACH_SRAM_H
+#define __MACH_SRAM_H
+
+/* ARBITRARY:  SRAM allocations are multiples of this 2^N size */
+#define SRAM_GRANULARITY	512
+
+/*
+ * SRAM allocations return a CPU virtual address, or NULL on error.
+ * If a DMA address is requested and the SRAM supports DMA, its
+ * mapped address is also returned.
+ *
+ * Errors include SRAM memory not being available, and requesting
+ * DMA mapped SRAM on systems which don't allow that.
+ */
+extern void *sram_alloc(size_t len, dma_addr_t *dma);
+extern void sram_free(void *addr, size_t len);
+
+#endif /* __MACH_SRAM_H */
diff --git a/arch/arm/mach-davinci/sram.c b/arch/arm/mach-davinci/sram.c
new file mode 100644
index 0000000..db54b2a
--- /dev/null
+++ b/arch/arm/mach-davinci/sram.c
@@ -0,0 +1,74 @@
+/*
+ * mach-davinci/sram.c - DaVinci simple SRAM allocator
+ *
+ * Copyright (C) 2009 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/genalloc.h>
+
+#include <mach/common.h>
+#include <mach/memory.h>
+#include <mach/sram.h>
+
+
+static struct gen_pool *sram_pool;
+
+void *sram_alloc(size_t len, dma_addr_t *dma)
+{
+	unsigned long vaddr;
+	dma_addr_t dma_base = davinci_soc_info.sram_dma;
+
+	if (dma)
+		*dma = 0;
+	if (!sram_pool || (dma && !dma_base))
+		return NULL;
+
+	vaddr = gen_pool_alloc(sram_pool, len);
+	if (!vaddr)
+		return NULL;
+
+	if (dma)
+		*dma = dma_base + (vaddr - SRAM_VIRT);
+	return (void *)vaddr;
+
+}
+EXPORT_SYMBOL(sram_alloc);
+
+void sram_free(void *addr, size_t len)
+{
+	gen_pool_free(sram_pool, (unsigned long) addr, len);
+}
+EXPORT_SYMBOL(sram_free);
+
+
+/*
+ * REVISIT This supports CPU and DMA access to/from SRAM, but it
+ * doesn't (yet?) support some other notable uses of SRAM:  as TCM
+ * for data and/or instructions; and holding code needed to enter
+ * and exit suspend states (while DRAM can't be used).
+ */
+static int __init sram_init(void)
+{
+	unsigned len = davinci_soc_info.sram_len;
+	int status = 0;
+
+	if (len) {
+		len = min(len, SRAM_SIZE);
+		sram_pool = gen_pool_create(ilog2(SRAM_GRANULARITY), -1);
+		if (!sram_pool)
+			status = -ENOMEM;
+	}
+	if (sram_pool)
+		status = gen_pool_add(sram_pool, SRAM_VIRT, len, -1);
+	WARN_ON(status < 0);
+	return status;
+}
+core_initcall(sram_init);
+
-- 
cgit v0.10.2


From 77bbca138c64cb80259732db6f70e1668123f2a7 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Tue, 26 May 2009 07:26:13 -0700
Subject: davinci: defconfig update: add EMAC

DaVinci EMAC driver is now upstream.  Enable it in default defconfig.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>

diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index c371c65..ac18662 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30-rc6
-# Wed May 20 17:45:58 2009
+# Linux kernel version: 2.6.30-rc7
+# Tue May 26 07:24:28 2009
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
@@ -179,6 +179,7 @@ CONFIG_ARCH_DAVINCI=y
 # CONFIG_ARCH_OMAP is not set
 # CONFIG_ARCH_MSM is not set
 # CONFIG_ARCH_W90X900 is not set
+CONFIG_AINTC=y
 
 #
 # TI DaVinci Implementations
@@ -251,7 +252,7 @@ CONFIG_PREEMPT=y
 CONFIG_HZ=100
 CONFIG_AEABI=y
 # CONFIG_OABI_COMPAT is not set
-CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_HAS_HOLES_MEMORYMODEL is not set
 # CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
 # CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
 # CONFIG_HIGHMEM is not set
@@ -667,6 +668,7 @@ CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 # CONFIG_AX88796 is not set
 # CONFIG_SMC91X is not set
+CONFIG_TI_DAVINCI_EMAC=y
 CONFIG_DM9000=y
 CONFIG_DM9000_DEBUGLEVEL=4
 # CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL is not set
@@ -1326,6 +1328,7 @@ CONFIG_MMC_BLOCK=m
 # MMC/SD/SDIO Host Controller Drivers
 #
 # CONFIG_MMC_SDHCI is not set
+# CONFIG_MMC_DAVINCI is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_ACCESSIBILITY is not set
 CONFIG_NEW_LEDS=y
@@ -1787,6 +1790,7 @@ CONFIG_CRC32=y
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
 CONFIG_DECOMPRESS_GZIP=y
+CONFIG_GENERIC_ALLOCATOR=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From 2f102607ac77354b02a76cf2748598ce9f270f08 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 27 May 2009 23:59:58 -0400
Subject: i7300_idle: allow testing on i5000-series hardware w/o re-compile

Testing the i7300_idle driver on i5000-series hardware required
an edit to i7300_idle.h to "#define SUPPORT_I5000 1" and a re-build
of both i7300_idle and ioat_dma.

Replace that build-time scheme with a load-time module parameter:
"7300_idle.forceload=1" to make it easier to test the driver
on hardware that while not officially validated, works fine
and is much more commonly available.

By default (no modparam) the driver will continue to load
only on the i7300.

Note that ioat_dma runs a copy of i7300_idle's probe routine
to know to reserve an IOAT channel for i7300_idle.
This change makes ioat_dma do that always on the i5000,
just like it does on the i7300.

Signed-off-by: Len Brown <len.brown@intel.com>
Acked-by: Andrew Henroid <andrew.d.henroid@intel.com>

diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 1955ee8..a600fc0 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -173,7 +173,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
 	xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
 
 #ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
-	if (i7300_idle_platform_probe(NULL, NULL) == 0) {
+	if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
 		device->common.chancnt--;
 	}
 #endif
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index bf74039..949c97f 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -41,6 +41,10 @@ static int debug;
 module_param_named(debug, debug, uint, 0644);
 MODULE_PARM_DESC(debug, "Enable debug printks in this driver");
 
+static int forceload;
+module_param_named(forceload, forceload, uint, 0644);
+MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000");
+
 #define dprintk(fmt, arg...) \
 	do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0)
 
@@ -552,7 +556,7 @@ static int __init i7300_idle_init(void)
 	cpus_clear(idle_cpumask);
 	total_us = 0;
 
-	if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev))
+	if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
 		return -ENODEV;
 
 	if (i7300_idle_thrt_save())
diff --git a/include/linux/i7300_idle.h b/include/linux/i7300_idle.h
index 05a80c4..1587b7d 100644
--- a/include/linux/i7300_idle.h
+++ b/include/linux/i7300_idle.h
@@ -16,35 +16,33 @@
 struct fbd_ioat {
 	unsigned int vendor;
 	unsigned int ioat_dev;
+	unsigned int enabled;
 };
 
 /*
  * The i5000 chip-set has the same hooks as the i7300
- * but support is disabled by default because this driver
- * has not been validated on that platform.
+ * but it is not enabled by default and must be manually
+ * manually enabled with "forceload=1" because it is
+ * only lightly validated.
  */
-#define SUPPORT_I5000 0
 
 static const struct fbd_ioat fbd_ioat_list[] = {
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB},
-#if SUPPORT_I5000
-	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT},
-#endif
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB, 1},
+	{PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT, 0},
 	{0, 0}
 };
 
 /* table of devices that work with this driver */
 static const struct pci_device_id pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) },
-#if SUPPORT_I5000
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) },
-#endif
 	{ } /* Terminating entry */
 };
 
 /* Check for known platforms with I/O-AT */
 static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev,
-						struct pci_dev **ioat_dev)
+						struct pci_dev **ioat_dev,
+						int enable_all)
 {
 	int i;
 	struct pci_dev *memdev, *dmadev;
@@ -69,6 +67,8 @@ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev,
 	for (i = 0; fbd_ioat_list[i].vendor != 0; i++) {
 		if (dmadev->vendor == fbd_ioat_list[i].vendor &&
 		    dmadev->device == fbd_ioat_list[i].ioat_dev) {
+			if (!(fbd_ioat_list[i].enabled || enable_all))
+				continue;
 			if (fbd_dev)
 				*fbd_dev = memdev;
 			if (ioat_dev)
-- 
cgit v0.10.2


From 3fd43858c7937801134bd70ef1d411e44f9c0c1c Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 28 May 2009 14:12:27 +0200
Subject: ALSA: au88x0: fix .pointer callback

Appearently, the used mask in the .pointer callback is invalid. It should
be in period_bytes range. The period_bytes is pow(2), so simple bitwise
operation is used.

Idea was taken from ALSA bug#4455.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 3906f5a..f84bb19 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -1255,8 +1255,8 @@ static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma)
 	int temp;
 
 	temp = hwread(vortex->mmio, VORTEX_ADBDMA_STAT + (adbdma << 2));
-	temp = (dma->period_virt * dma->period_bytes) + (temp & POS_MASK);
-	return (temp);
+	temp = (dma->period_virt * dma->period_bytes) + (temp & (dma->period_bytes - 1));
+	return temp;
 }
 
 static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma)
@@ -1504,8 +1504,7 @@ static int inline vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma)
 	int temp;
 
 	temp = hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2));
-	//temp = (temp & POS_MASK) + (((temp>>WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK)*(dma->cfg0&POS_MASK));
-	temp = (temp & POS_MASK) + ((dma->period_virt) * (dma->period_bytes));
+	temp = (dma->period_virt * dma->period_bytes) + (temp & (dma->period_bytes - 1));
 	return temp;
 }
 
-- 
cgit v0.10.2


From e9ab33d03eb721a632214c0bbaa18652de88aa2d Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 28 May 2009 14:20:00 +0200
Subject: ALSA: au88x0: fix wrong period_elapsed() call

The period_elapsed() call should be called when position moves.

The idea was taken from ALSA bug#4455.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index f84bb19..23f49f3 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -2440,7 +2440,8 @@ static irqreturn_t vortex_interrupt(int irq, void *dev_id)
 		spin_lock(&vortex->lock);
 		for (i = 0; i < NR_ADB; i++) {
 			if (vortex->dma_adb[i].fifo_status == FIFO_START) {
-				if (vortex_adbdma_bufshift(vortex, i)) ;
+				if (!vortex_adbdma_bufshift(vortex, i))
+					continue;
 				spin_unlock(&vortex->lock);
 				snd_pcm_period_elapsed(vortex->dma_adb[i].
 						       substream);
-- 
cgit v0.10.2


From a3ec8d70f1a55acccc4874fe9b4dadbbb9454a0f Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 29 May 2009 06:46:46 +0200
Subject: perf_counter tools: Fix top symbol table dump typo

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 52ba9f4..0d100f5 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -371,7 +371,7 @@ static int parse_symbols(void)
 	max_ip = sym->start;
 
 	if (dump_symtab)
-		dso__fprintf(kernel_dso, stdout);
+		dso__fprintf(kernel_dso, stderr);
 
 	return 0;
 
-- 
cgit v0.10.2


From da417a7537cbf4beb28a08a49adf915f2358040c Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 29 May 2009 08:23:16 +0200
Subject: perf_counter tools: Fix top symbol table max_ip typo

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 0d100f5..ebe8bec 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -368,7 +368,7 @@ static int parse_symbols(void)
 
 	node = rb_last(&kernel_dso->syms);
 	sym = rb_entry(node, struct symbol, rb_node);
-	max_ip = sym->start;
+	max_ip = sym->end;
 
 	if (dump_symtab)
 		dso__fprintf(kernel_dso, stderr);
-- 
cgit v0.10.2


From c323d95fa4dbe0b6bf6d59e24a0b7db067dd08a7 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Fri, 29 May 2009 13:28:35 +0800
Subject: perf_counter/x86: Always use NMI for performance-monitoring interrupt

Always use NMI for performance-monitoring interrupt as there could be
racy situations if we switch between irq and nmi mode frequently.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
LKML-Reference: <20090529052835.GA13657@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index d08dd52..876ed97 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -91,10 +91,10 @@ extern void set_perf_counter_pending(void);
 
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(int nmi);
+extern void perf_counters_lapic_init(void);
 #else
 static inline void init_hw_perf_counters(void)		{ }
-static inline void perf_counters_lapic_init(int nmi)	{ }
+static inline void perf_counters_lapic_init(void)	{ }
 #endif
 
 #endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 89b63b5..60df2ef 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1135,7 +1135,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
-	perf_counters_lapic_init(0);
+	perf_counters_lapic_init();
 
 	preempt_disable();
 
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2eeaa99..316b0c9 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -604,7 +604,7 @@ try_generic:
 		hwc->counter_base = x86_pmu.perfctr;
 	}
 
-	perf_counters_lapic_init(hwc->nmi);
+	perf_counters_lapic_init();
 
 	x86_pmu.disable(hwc, idx);
 
@@ -863,24 +863,15 @@ void set_perf_counter_pending(void)
 	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
 }
 
-void perf_counters_lapic_init(int nmi)
+void perf_counters_lapic_init(void)
 {
-	u32 apic_val;
-
 	if (!x86_pmu_initialized())
 		return;
 
 	/*
-	 * Enable the performance counter vector in the APIC LVT:
+	 * Always use NMI for PMU
 	 */
-	apic_val = apic_read(APIC_LVTERR);
-
-	apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
-	if (nmi)
-		apic_write(APIC_LVTPC, APIC_DM_NMI);
-	else
-		apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
-	apic_write(APIC_LVTERR, apic_val);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
 static int __kprobes
@@ -1054,7 +1045,7 @@ void __init init_hw_perf_counters(void)
 
 	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
 
-	perf_counters_lapic_init(0);
+	perf_counters_lapic_init();
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-- 
cgit v0.10.2


From c04f5e5d7b523f90ee3cdd70a68c4002aaecd3fa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 May 2009 09:10:54 +0200
Subject: perf_counter tools: Clean up builtin-stat.c's do_perfstat()

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index ac14086..6a29361 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -109,11 +109,75 @@ static void create_perfstat_counter(int counter)
 	}
 }
 
+/*
+ * Does the counter have nsecs as a unit?
+ */
+static inline int nsec_counter(int counter)
+{
+	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK))
+		return 1;
+	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Print out the results of a single counter:
+ */
+static void print_counter(int counter)
+{
+	__u64 count[3], single_count[3];
+	ssize_t res;
+	int cpu, nv;
+	int scaled;
+
+	count[0] = count[1] = count[2] = 0;
+	nv = scale ? 3 : 1;
+	for (cpu = 0; cpu < nr_cpus; cpu ++) {
+		res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
+		assert(res == nv * sizeof(__u64));
+
+		count[0] += single_count[0];
+		if (scale) {
+			count[1] += single_count[1];
+			count[2] += single_count[2];
+		}
+	}
+
+	scaled = 0;
+	if (scale) {
+		if (count[2] == 0) {
+			fprintf(stderr, " %14s  %-20s\n",
+				"<not counted>", event_name(counter));
+			return;
+		}
+		if (count[2] < count[1]) {
+			scaled = 1;
+			count[0] = (unsigned long long)
+				((double)count[0] * count[1] / count[2] + 0.5);
+		}
+	}
+
+	if (nsec_counter(counter)) {
+		double msecs = (double)count[0] / 1000000;
+
+		fprintf(stderr, " %14.6f  %-20s (msecs)",
+			msecs, event_name(counter));
+	} else {
+		fprintf(stderr, " %14Ld  %-20s (events)",
+			count[0], event_name(counter));
+	}
+	if (scaled)
+		fprintf(stderr, "  (scaled from %.2f%%)",
+			(double) count[2] / count[1] * 100);
+	fprintf(stderr, "\n");
+}
+
 static int do_perfstat(int argc, const char **argv)
 {
 	unsigned long long t0, t1;
 	int counter;
-	ssize_t res;
 	int status;
 	int pid;
 
@@ -149,55 +213,10 @@ static int do_perfstat(int argc, const char **argv)
 		argv[0]);
 	fprintf(stderr, "\n");
 
-	for (counter = 0; counter < nr_counters; counter++) {
-		int cpu, nv;
-		__u64 count[3], single_count[3];
-		int scaled;
-
-		count[0] = count[1] = count[2] = 0;
-		nv = scale ? 3 : 1;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			res = read(fd[cpu][counter],
-				   single_count, nv * sizeof(__u64));
-			assert(res == nv * sizeof(__u64));
-
-			count[0] += single_count[0];
-			if (scale) {
-				count[1] += single_count[1];
-				count[2] += single_count[2];
-			}
-		}
-
-		scaled = 0;
-		if (scale) {
-			if (count[2] == 0) {
-				fprintf(stderr, " %14s  %-20s\n",
-					"<not counted>", event_name(counter));
-				continue;
-			}
-			if (count[2] < count[1]) {
-				scaled = 1;
-				count[0] = (unsigned long long)
-					((double)count[0] * count[1] / count[2] + 0.5);
-			}
-		}
-
-		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
-		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
+	for (counter = 0; counter < nr_counters; counter++)
+		print_counter(counter);
 
-			double msecs = (double)count[0] / 1000000;
 
-			fprintf(stderr, " %14.6f  %-20s (msecs)",
-				msecs, event_name(counter));
-		} else {
-			fprintf(stderr, " %14Ld  %-20s (events)",
-				count[0], event_name(counter));
-		}
-		if (scaled)
-			fprintf(stderr, "  (scaled from %.2f%%)",
-				(double) count[2] / count[1] * 100);
-		fprintf(stderr, "\n");
-	}
 	fprintf(stderr, "\n");
 	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
 			(double)(t1-t0)/1e6);
@@ -212,7 +231,6 @@ static void skip_signal(int signo)
 
 static const char * const stat_usage[] = {
 	"perf stat [<options>] <command>",
-	"perf stat [<options>] -- <command> [<options>]",
 	NULL
 };
 
-- 
cgit v0.10.2


From 2996f5ddb7ba8889caeeac65edafe48845106eaa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 May 2009 09:10:54 +0200
Subject: perf_counter tools: Split display into reading and printing

We introduce the extra pass to allow the print-out to possibly
rely on already read counters.

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 6a29361..0c92eb7 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -71,6 +71,9 @@ static const unsigned int default_count[] = {
 	  10000,
 };
 
+static __u64			event_res[MAX_COUNTERS][3];
+static __u64			event_scaled[MAX_COUNTERS];
+
 static void create_perfstat_counter(int counter)
 {
 	struct perf_counter_hw_event hw_event;
@@ -123,16 +126,19 @@ static inline int nsec_counter(int counter)
 }
 
 /*
- * Print out the results of a single counter:
+ * Read out the results of a single counter:
  */
-static void print_counter(int counter)
+static void read_counter(int counter)
 {
-	__u64 count[3], single_count[3];
+	__u64 *count, single_count[3];
 	ssize_t res;
 	int cpu, nv;
 	int scaled;
 
+	count = event_res[counter];
+
 	count[0] = count[1] = count[2] = 0;
+
 	nv = scale ? 3 : 1;
 	for (cpu = 0; cpu < nr_cpus; cpu ++) {
 		res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
@@ -148,16 +154,35 @@ static void print_counter(int counter)
 	scaled = 0;
 	if (scale) {
 		if (count[2] == 0) {
-			fprintf(stderr, " %14s  %-20s\n",
-				"<not counted>", event_name(counter));
+			event_scaled[counter] = -1;
+			count[0] = 0;
 			return;
 		}
+
 		if (count[2] < count[1]) {
-			scaled = 1;
+			event_scaled[counter] = 1;
 			count[0] = (unsigned long long)
 				((double)count[0] * count[1] / count[2] + 0.5);
 		}
 	}
+}
+
+/*
+ * Print out the results of a single counter:
+ */
+static void print_counter(int counter)
+{
+	__u64 *count;
+	int scaled;
+
+	count = event_res[counter];
+	scaled = event_scaled[counter];
+
+	if (scaled == -1) {
+		fprintf(stderr, " %14s  %-20s\n",
+			"<not counted>", event_name(counter));
+		return;
+	}
 
 	if (nsec_counter(counter)) {
 		double msecs = (double)count[0] / 1000000;
@@ -214,6 +239,9 @@ static int do_perfstat(int argc, const char **argv)
 	fprintf(stderr, "\n");
 
 	for (counter = 0; counter < nr_counters; counter++)
+		read_counter(counter);
+
+	for (counter = 0; counter < nr_counters; counter++)
 		print_counter(counter);
 
 
-- 
cgit v0.10.2


From be1ac0d81d0e3ab655f8c8ade31fb860ef6aa186 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 May 2009 09:10:54 +0200
Subject: perf_counter tools: Also display time-normalized stat results

Add new column that normalizes counter results by
'nanoseconds spent running' unit.

Before:

 Performance counter stats for '/home/mingo/hackbench':

   10469.403605  task clock ticks     (msecs)
          75502  context switches     (events)
           9501  CPU migrations       (events)
          36158  pagefaults           (events)
    31975676185  CPU cycles           (events)
    26257738659  instructions         (events)
      108740581  cache references     (events)
       54606088  cache misses         (events)

 Wall-clock time elapsed:   810.514504 msecs

After:

 Performance counter stats for '/home/mingo/hackbench':

   10469.403605  task clock ticks     (msecs)
          75502  context switches     #        0.007 M/sec
           9501  CPU migrations       #        0.001 M/sec
          36158  pagefaults           #        0.003 M/sec
    31975676185  CPU cycles           #     3054.202 M/sec
    26257738659  instructions         #     2508.045 M/sec
      108740581  cache references     #       10.387 M/sec
       54606088  cache misses         #        5.216 M/sec

 Wall-clock time elapsed:   810.514504 msecs

The advantage of that column is that it is characteristic of the
execution workflow, regardless of runtime. Hence 'hackbench 10'
will look similar to 'hackbench 15' - while the absolute counter
values are very different.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 0c92eb7..ef7e0e1 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -74,6 +74,8 @@ static const unsigned int default_count[] = {
 static __u64			event_res[MAX_COUNTERS][3];
 static __u64			event_scaled[MAX_COUNTERS];
 
+static __u64			runtime_nsecs;
+
 static void create_perfstat_counter(int counter)
 {
 	struct perf_counter_hw_event hw_event;
@@ -165,6 +167,11 @@ static void read_counter(int counter)
 				((double)count[0] * count[1] / count[2] + 0.5);
 		}
 	}
+	/*
+	 * Save the full runtime - to allow normalization during printout:
+	 */
+	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
+		runtime_nsecs = count[0];
 }
 
 /*
@@ -190,8 +197,11 @@ static void print_counter(int counter)
 		fprintf(stderr, " %14.6f  %-20s (msecs)",
 			msecs, event_name(counter));
 	} else {
-		fprintf(stderr, " %14Ld  %-20s (events)",
+		fprintf(stderr, " %14Ld  %-20s",
 			count[0], event_name(counter));
+		if (runtime_nsecs)
+			fprintf(stderr, " # %12.3f M/sec",
+				(double)count[0]/runtime_nsecs*1000.0);
 	}
 	if (scaled)
 		fprintf(stderr, "  (scaled from %.2f%%)",
-- 
cgit v0.10.2


From ad3a37de81c45f6c20d410ece86004b98f7b6d84 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 29 May 2009 16:06:20 +1000
Subject: perf_counter: Don't swap contexts containing locked mutex

Peter Zijlstra pointed out that under some circumstances, we can take
the mutex in a context or a counter and then swap that context or
counter to another task, potentially leading to lock order inversions
or the mutexes not protecting what they are supposed to protect.

This fixes the problem by making sure that we never take a mutex in a
context or counter which could get swapped to another task.  Most of
the cases where we take a mutex is on a top-level counter or context,
i.e. a counter which has an fd associated with it or a context that
contains such a counter.  This adds WARN_ON_ONCE statements to verify
that.

The two cases where we need to take the mutex on a context that is a
clone of another are in perf_counter_exit_task and
perf_counter_init_task.  The perf_counter_exit_task case is solved by
uncloning the context before starting to remove the counters from it.
The perf_counter_init_task is a little trickier; we temporarily
disable context swapping for the parent (forking) task by setting its
ctx->parent_gen to the all-1s value after locking the context, if it
is a cloned context, and restore the ctx->parent_gen value at the end
if the context didn't get uncloned in the meantime.

This also moves the increment of the context generation count to be
within the same critical section, protected by the context mutex, that
adds the new counter to the context.  That way, taking the mutex is
sufficient to ensure that both the counter list and the generation
count are stable.

[ Impact: fix hangs, races with inherited and PID counters ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18975.31580.520676.619896@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 52e5a15..db843f8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -919,7 +919,8 @@ static int context_equiv(struct perf_counter_context *ctx1,
 			 struct perf_counter_context *ctx2)
 {
 	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen;
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& ctx1->parent_gen != ~0ull;
 }
 
 /*
@@ -1339,7 +1340,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 			put_ctx(parent_ctx);
 			ctx->parent_ctx = NULL;		/* no longer a clone */
 		}
-		++ctx->generation;
 		/*
 		 * Get an extra reference before dropping the lock so that
 		 * this context won't get freed if the task exits.
@@ -1414,6 +1414,7 @@ static int perf_release(struct inode *inode, struct file *file)
 
 	file->private_data = NULL;
 
+	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_counter_remove_from_context(counter);
 	mutex_unlock(&ctx->mutex);
@@ -1445,6 +1446,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->state == PERF_COUNTER_STATE_ERROR)
 		return 0;
 
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->child_mutex);
 	values[0] = perf_counter_read(counter);
 	n = 1;
@@ -1504,6 +1506,7 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter,
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *sibling;
 
+	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	counter = counter->group_leader;
 
@@ -1524,6 +1527,7 @@ static void perf_counter_for_each_child(struct perf_counter *counter,
 {
 	struct perf_counter *child;
 
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->child_mutex);
 	func(counter);
 	list_for_each_entry(child, &counter->child_list, child_list)
@@ -1536,6 +1540,7 @@ static void perf_counter_for_each(struct perf_counter *counter,
 {
 	struct perf_counter *child;
 
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->child_mutex);
 	perf_counter_for_each_sibling(counter, func);
 	list_for_each_entry(child, &counter->child_list, child_list)
@@ -1741,6 +1746,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = vma->vm_file->private_data;
 
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
 		struct user_struct *user = current_user();
@@ -1788,6 +1794,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_pgoff != 0)
 		return -EINVAL;
 
+	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->mmap_mutex);
 	if (atomic_inc_not_zero(&counter->mmap_count)) {
 		if (nr_pages != counter->data->nr_pages)
@@ -3349,8 +3356,10 @@ SYSCALL_DEFINE5(perf_counter_open,
 		goto err_free_put_context;
 
 	counter->filp = counter_file;
+	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, counter, cpu);
+	++ctx->generation;
 	mutex_unlock(&ctx->mutex);
 
 	counter->owner = current;
@@ -3436,6 +3445,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Link this into the parent counter's child list
 	 */
+	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
 	mutex_lock(&parent_counter->child_mutex);
 	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
 	mutex_unlock(&parent_counter->child_mutex);
@@ -3485,6 +3495,7 @@ static void sync_child_counter(struct perf_counter *child_counter,
 	/*
 	 * Remove this counter from the parent's list
 	 */
+	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
 	mutex_lock(&parent_counter->child_mutex);
 	list_del_init(&child_counter->child_list);
 	mutex_unlock(&parent_counter->child_mutex);
@@ -3527,12 +3538,17 @@ void perf_counter_exit_task(struct task_struct *child)
 	struct perf_counter_context *child_ctx;
 	unsigned long flags;
 
-	child_ctx = child->perf_counter_ctxp;
-
-	if (likely(!child_ctx))
+	if (likely(!child->perf_counter_ctxp))
 		return;
 
 	local_irq_save(flags);
+	/*
+	 * We can't reschedule here because interrupts are disabled,
+	 * and either child is current or it is a task that can't be
+	 * scheduled, so we are now safe from rescheduling changing
+	 * our context.
+	 */
+	child_ctx = child->perf_counter_ctxp;
 	__perf_counter_task_sched_out(child_ctx);
 
 	/*
@@ -3542,6 +3558,15 @@ void perf_counter_exit_task(struct task_struct *child)
 	 */
 	spin_lock(&child_ctx->lock);
 	child->perf_counter_ctxp = NULL;
+	if (child_ctx->parent_ctx) {
+		/*
+		 * This context is a clone; unclone it so it can't get
+		 * swapped to another process while we're removing all
+		 * the counters from it.
+		 */
+		put_ctx(child_ctx->parent_ctx);
+		child_ctx->parent_ctx = NULL;
+	}
 	spin_unlock(&child_ctx->lock);
 	local_irq_restore(flags);
 
@@ -3571,9 +3596,11 @@ again:
 int perf_counter_init_task(struct task_struct *child)
 {
 	struct perf_counter_context *child_ctx, *parent_ctx;
+	struct perf_counter_context *cloned_ctx;
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 	int inherited_all = 1;
+	u64 cloned_gen;
 	int ret = 0;
 
 	child->perf_counter_ctxp = NULL;
@@ -3581,8 +3608,7 @@ int perf_counter_init_task(struct task_struct *child)
 	mutex_init(&child->perf_counter_mutex);
 	INIT_LIST_HEAD(&child->perf_counter_list);
 
-	parent_ctx = parent->perf_counter_ctxp;
-	if (likely(!parent_ctx || !parent_ctx->nr_counters))
+	if (likely(!parent->perf_counter_ctxp))
 		return 0;
 
 	/*
@@ -3600,6 +3626,34 @@ int perf_counter_init_task(struct task_struct *child)
 	get_task_struct(child);
 
 	/*
+	 * If the parent's context is a clone, temporarily set its
+	 * parent_gen to an impossible value (all 1s) so it won't get
+	 * swapped under us.  The rcu_read_lock makes sure that
+	 * parent_ctx continues to exist even if it gets swapped to
+	 * another process and then freed while we are trying to get
+	 * its lock.
+	 */
+	rcu_read_lock();
+ retry:
+	parent_ctx = rcu_dereference(parent->perf_counter_ctxp);
+	/*
+	 * No need to check if parent_ctx != NULL here; since we saw
+	 * it non-NULL earlier, the only reason for it to become NULL
+	 * is if we exit, and since we're currently in the middle of
+	 * a fork we can't be exiting at the same time.
+	 */
+	spin_lock_irq(&parent_ctx->lock);
+	if (parent_ctx != rcu_dereference(parent->perf_counter_ctxp)) {
+		spin_unlock_irq(&parent_ctx->lock);
+		goto retry;
+	}
+	cloned_gen = parent_ctx->parent_gen;
+	if (parent_ctx->parent_ctx)
+		parent_ctx->parent_gen = ~0ull;
+	spin_unlock_irq(&parent_ctx->lock);
+	rcu_read_unlock();
+
+	/*
 	 * Lock the parent list. No need to lock the child - not PID
 	 * hashed yet and not running, so nobody can access it.
 	 */
@@ -3630,10 +3684,15 @@ int perf_counter_init_task(struct task_struct *child)
 		/*
 		 * Mark the child context as a clone of the parent
 		 * context, or of whatever the parent is a clone of.
+		 * Note that if the parent is a clone, it could get
+		 * uncloned at any point, but that doesn't matter
+		 * because the list of counters and the generation
+		 * count can't have changed since we took the mutex.
 		 */
-		if (parent_ctx->parent_ctx) {
-			child_ctx->parent_ctx = parent_ctx->parent_ctx;
-			child_ctx->parent_gen = parent_ctx->parent_gen;
+		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
+		if (cloned_ctx) {
+			child_ctx->parent_ctx = cloned_ctx;
+			child_ctx->parent_gen = cloned_gen;
 		} else {
 			child_ctx->parent_ctx = parent_ctx;
 			child_ctx->parent_gen = parent_ctx->generation;
@@ -3643,6 +3702,16 @@ int perf_counter_init_task(struct task_struct *child)
 
 	mutex_unlock(&parent_ctx->mutex);
 
+	/*
+	 * Restore the clone status of the parent.
+	 */
+	if (parent_ctx->parent_ctx) {
+		spin_lock_irq(&parent_ctx->lock);
+		if (parent_ctx->parent_ctx)
+			parent_ctx->parent_gen = cloned_gen;
+		spin_unlock_irq(&parent_ctx->lock);
+	}
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From 6daad5c6c586bf07528ae5b39e801b204468f907 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Fri, 29 May 2009 10:15:08 +0100
Subject: [ARM] update mach-types

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 945e0d2..fec6467 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
 #
 #   http://www.arm.linux.org.uk/developer/machines/?action=new
 #
-# Last update: Mon Mar 23 20:09:01 2009
+# Last update: Fri May 29 10:14:20 2009
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
@@ -916,7 +916,7 @@ nxdb500			MACH_NXDB500		NXDB500			905
 apf9328			MACH_APF9328		APF9328			906
 omap_wipoq		MACH_OMAP_WIPOQ		OMAP_WIPOQ		907
 omap_twip		MACH_OMAP_TWIP		OMAP_TWIP		908
-palmt650		MACH_PALMT650		PALMT650		909
+treo650			MACH_TREO650		TREO650			909
 acumen			MACH_ACUMEN		ACUMEN			910
 xp100			MACH_XP100		XP100			911
 fs2410			MACH_FS2410		FS2410			912
@@ -1232,7 +1232,7 @@ ql202b			MACH_QL202B		QL202B			1226
 vpac270			MACH_VPAC270		VPAC270			1227
 rd129			MACH_RD129		RD129			1228
 htcwizard		MACH_HTCWIZARD		HTCWIZARD		1229
-xscale_treo680		MACH_XSCALE_TREO680	XSCALE_TREO680		1230
+treo680			MACH_TREO680		TREO680			1230
 tecon_tmezon		MACH_TECON_TMEZON	TECON_TMEZON		1231
 zylonite		MACH_ZYLONITE		ZYLONITE		1233
 gene1270		MACH_GENE1270		GENE1270		1234
@@ -1418,10 +1418,10 @@ looxc550		MACH_LOOXC550		LOOXC550		1417
 cnty_titan		MACH_CNTY_TITAN		CNTY_TITAN		1418
 app3xx			MACH_APP3XX		APP3XX			1419
 sideoatsgrama		MACH_SIDEOATSGRAMA	SIDEOATSGRAMA		1420
-palmtreo700p		MACH_PALMTREO700P	PALMTREO700P		1421
-palmtreo700w		MACH_PALMTREO700W	PALMTREO700W		1422
-palmtreo750		MACH_PALMTREO750	PALMTREO750		1423
-palmtreo755p		MACH_PALMTREO755P	PALMTREO755P		1424
+treo700p		MACH_TREO700P		TREO700P		1421
+treo700w		MACH_TREO700W		TREO700W		1422
+treo750			MACH_TREO750		TREO750			1423
+treo755p		MACH_TREO755P		TREO755P		1424
 ezreganut9200		MACH_EZREGANUT9200	EZREGANUT9200		1425
 sarge			MACH_SARGE		SARGE			1426
 a696			MACH_A696		A696			1427
@@ -1721,7 +1721,7 @@ sapphire		MACH_SAPPHIRE		SAPPHIRE		1729
 csb637xo		MACH_CSB637XO		CSB637XO		1730
 evisiong		MACH_EVISIONG		EVISIONG		1731
 stmp37xx		MACH_STMP37XX		STMP37XX		1732
-stmp378x		MACH_STMP38XX		STMP38XX		1733
+stmp378x		MACH_STMP378X		STMP378X		1733
 tnt			MACH_TNT		TNT			1734
 tbxt			MACH_TBXT		TBXT			1735
 playmate		MACH_PLAYMATE		PLAYMATE		1736
@@ -1817,7 +1817,7 @@ smdkc100		MACH_SMDKC100		SMDKC100		1826
 tavorevb		MACH_TAVOREVB		TAVOREVB		1827
 saar			MACH_SAAR		SAAR			1828
 deister_eyecam		MACH_DEISTER_EYECAM	DEISTER_EYECAM		1829
-at91sam9m10ek		MACH_AT91SAM9M10EK	AT91SAM9M10EK		1830
+at91sam9m10g45ek	MACH_AT91SAM9M10G45EK	AT91SAM9M10G45EK	1830
 linkstation_produo	MACH_LINKSTATION_PRODUO	LINKSTATION_PRODUO	1831
 hit_b0			MACH_HIT_B0		HIT_B0			1832
 adx_rmu			MACH_ADX_RMU		ADX_RMU			1833
@@ -2132,3 +2132,116 @@ apollo			MACH_APOLLO		APOLLO			2141
 at91cap9stk		MACH_AT91CAP9STK	AT91CAP9STK		2142
 spc300			MACH_SPC300		SPC300			2143
 eko			MACH_EKO		EKO			2144
+ccw9m2443		MACH_CCW9M2443		CCW9M2443		2145
+ccw9m2443js		MACH_CCW9M2443JS	CCW9M2443JS		2146
+m2m_router_device	MACH_M2M_ROUTER_DEVICE	M2M_ROUTER_DEVICE	2147
+str9104nas		MACH_STAR9104NAS	STAR9104NAS		2148
+pca100			MACH_PCA100		PCA100			2149
+z3_dm365_mod_01		MACH_Z3_DM365_MOD_01	Z3_DM365_MOD_01		2150
+hipox			MACH_HIPOX		HIPOX			2151
+omap3_piteds		MACH_OMAP3_PITEDS	OMAP3_PITEDS		2152
+bm150r			MACH_BM150R		BM150R			2153
+tbone			MACH_TBONE		TBONE			2154
+merlin			MACH_MERLIN		MERLIN			2155
+falcon			MACH_FALCON		FALCON			2156
+davinci_da850_evm	MACH_DAVINCI_DA850_EVM	DAVINCI_DA850_EVM	2157
+s5p6440			MACH_S5P6440		S5P6440			2158
+at91sam9g10ek		MACH_AT91SAM9G10EK	AT91SAM9G10EK		2159
+omap_4430sdp		MACH_OMAP_4430SDP	OMAP_4430SDP		2160
+lpc313x			MACH_LPC313X		LPC313X			2161
+magx_zn5		MACH_MAGX_ZN5		MAGX_ZN5		2162
+magx_em30		MACH_MAGX_EM30		MAGX_EM30		2163
+magx_ve66		MACH_MAGX_VE66		MAGX_VE66		2164
+meesc			MACH_MEESC		MEESC			2165
+otc570			MACH_OTC570		OTC570			2166
+bcu2412			MACH_BCU2412		BCU2412			2167
+beacon			MACH_BEACON		BEACON			2168
+actia_tgw		MACH_ACTIA_TGW		ACTIA_TGW		2169
+e4430			MACH_E4430		E4430			2170
+ql300			MACH_QL300		QL300			2171
+btmavb101		MACH_BTMAVB101		BTMAVB101		2172
+btmawb101		MACH_BTMAWB101		BTMAWB101		2173
+sq201			MACH_SQ201		SQ201			2174
+quatro45xx		MACH_QUATRO45XX		QUATRO45XX		2175
+openpad			MACH_OPENPAD		OPENPAD			2176
+tx25			MACH_TX25		TX25			2177
+omap3_torpedo		MACH_OMAP3_TORPEDO	OMAP3_TORPEDO		2178
+htcraphael_k		MACH_HTCRAPHAEL_K	HTCRAPHAEL_K		2179
+lal43			MACH_LAL43		LAL43			2181
+htcraphael_cdma500	MACH_HTCRAPHAEL_CDMA500	HTCRAPHAEL_CDMA500	2182
+anw6410			MACH_ANW6410		ANW6410			2183
+htcprophet		MACH_HTCPROPHET		HTCPROPHET		2185
+cfa_10022		MACH_CFA_10022		CFA_10022		2186
+imx27_visstrim_m10	MACH_IMX27_VISSTRIM_M10	IMX27_VISSTRIM_M10	2187
+px2imx27		MACH_PX2IMX27		PX2IMX27		2188
+stm3210e_eval		MACH_STM3210E_EVAL	STM3210E_EVAL		2189
+dvs10			MACH_DVS10		DVS10			2190
+portuxg20		MACH_PORTUXG20		PORTUXG20		2191
+arm_spv			MACH_ARM_SPV		ARM_SPV			2192
+smdkc110		MACH_SMDKC110		SMDKC110		2193
+cabespresso		MACH_CABESPRESSO	CABESPRESSO		2194
+hmc800			MACH_HMC800		HMC800			2195
+sholes			MACH_SHOLES		SHOLES			2196
+btmxc31			MACH_BTMXC31		BTMXC31			2197
+dt501			MACH_DT501		DT501			2198
+ktx			MACH_KTX		KTX			2199
+omap3517evm		MACH_OMAP3517EVM	OMAP3517EVM		2200
+netspace_v2		MACH_NETSPACE_V2	NETSPACE_V2		2201
+netspace_max_v2		MACH_NETSPACE_MAX_V2	NETSPACE_MAX_V2		2202
+d2net_v2		MACH_D2NET_V2		D2NET_V2		2203
+net2big_v2		MACH_NET2BIG_V2		NET2BIG_V2		2204
+net4big_v2		MACH_NET4BIG_V2		NET4BIG_V2		2205
+net5big_v2		MACH_NET5BIG_V2		NET5BIG_V2		2206
+endb2443		MACH_ENDB2443		ENDB2443		2207
+inetspace_v2		MACH_INETSPACE_V2	INETSPACE_V2		2208
+tros			MACH_TROS		TROS			2209
+pelco_homer		MACH_PELCO_HOMER	PELCO_HOMER		2210
+ofsp8			MACH_OFSP8		OFSP8			2211
+at91sam9g45ekes		MACH_AT91SAM9G45EKES	AT91SAM9G45EKES		2212
+guf_cupid		MACH_GUF_CUPID		GUF_CUPID		2213
+eab1r			MACH_EAB1R		EAB1R			2214
+desirec			MACH_DESIREC		DESIREC			2215
+cordoba			MACH_CORDOBA		CORDOBA			2216
+irvine			MACH_IRVINE		IRVINE			2217
+sff772			MACH_SFF772		SFF772			2218
+pelco_milano		MACH_PELCO_MILANO	PELCO_MILANO		2219
+pc7302			MACH_PC7302		PC7302			2220
+bip6000			MACH_BIP6000		BIP6000			2221
+silvermoon		MACH_SILVERMOON		SILVERMOON		2222
+vc0830			MACH_VC0830		VC0830			2223
+dt430			MACH_DT430		DT430			2224
+ji42pf			MACH_JI42PF		JI42PF			2225
+gnet_ksm		MACH_GNET_KSM		GNET_KSM		2226
+gnet_sgm		MACH_GNET_SGM		GNET_SGM		2227
+gnet_sgr		MACH_GNET_SGR		GNET_SGR		2228
+omap3_icetekevm		MACH_OMAP3_ICETEKEVM	OMAP3_ICETEKEVM		2229
+pnp			MACH_PNP		PNP			2230
+ctera_2bay_k		MACH_CTERA_2BAY_K	CTERA_2BAY_K		2231
+ctera_2bay_u		MACH_CTERA_2BAY_U	CTERA_2BAY_U		2232
+sas_c			MACH_SAS_C		SAS_C			2233
+vma2315			MACH_VMA2315		VMA2315			2234
+vcs			MACH_VCS		VCS			2235
+spear600		MACH_SPEAR600		SPEAR600		2236
+spear300		MACH_SPEAR300		SPEAR300		2237
+spear1300		MACH_SPEAR1300		SPEAR1300		2238
+lilly1131		MACH_LILLY1131		LILLY1131		2239
+arvoo_ax301		MACH_ARVOO_AX301	ARVOO_AX301		2240
+mapphone		MACH_MAPPHONE		MAPPHONE		2241
+legend			MACH_LEGEND		LEGEND			2242
+salsa			MACH_SALSA		SALSA			2243
+lounge			MACH_LOUNGE		LOUNGE			2244
+vision			MACH_VISION		VISION			2245
+vmb20			MACH_VMB20		VMB20			2246
+hy2410			MACH_HY2410		HY2410			2247
+hy9315			MACH_HY9315		HY9315			2248
+bullwinkle		MACH_BULLWINKLE		BULLWINKLE		2249
+arm_ultimator2		MACH_ARM_ULTIMATOR2	ARM_ULTIMATOR2		2250
+vs_v210			MACH_VS_V210		VS_V210			2252
+vs_v212			MACH_VS_V212		VS_V212			2253
+hmt			MACH_HMT		HMT			2254
+suen3			MACH_SUEN3		SUEN3			2255
+vesper			MACH_VESPER		VESPER			2256
+str9			MACH_STR9		STR9			2257
+omap3_wl_ff		MACH_OMAP3_WL_FF	OMAP3_WL_FF		2258
+simcom			MACH_SIMCOM		SIMCOM			2259
+mcwebio			MACH_MCWEBIO		MCWEBIO			2260
-- 
cgit v0.10.2


From 8541c1180a355c4da283fc6b03a92c0233823c1b Mon Sep 17 00:00:00 2001
From: Vladimir Barinov <vova.barinov@gmail.com>
Date: Thu, 23 Apr 2009 15:47:22 +0400
Subject: mtd: MXC NAND driver fixes (v5)

The following patch fixes:
 - re-initialization of host->col_addr which is used as byte index
   between the successive READID flash commands.
 - compile error when CONFIG_PM is enabled
 - pass on the error code from clk_get()
 - return -ENOMEM in case of failed ioremap()
 - pass on the return value of platform_driver_probe() directly
 - remove excessive printk
 - let command line partition table parsing with mxc_nand name.
   The cmd_line parsing is done via <mtd-id> name that differs
   from mxc_nand by default and looks like "NAND 256MiB 1,8V 8-bit"

Signed-off-by: Vladimir Barinov <vbarinov@embeddedalley.com>
Signed-off-by: Lothar Wassmann <LW@KARO-electronics.de>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index f3548d0..40c2608 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -831,6 +831,7 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
 		break;
 
 	case NAND_CMD_READID:
+		host->col_addr = 0;
 		send_read_id(host);
 		break;
 
@@ -867,6 +868,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
 	mtd->priv = this;
 	mtd->owner = THIS_MODULE;
 	mtd->dev.parent = &pdev->dev;
+	mtd->name = "mxc_nand";
 
 	/* 50 us command delay time */
 	this->chip_delay = 5;
@@ -882,8 +884,10 @@ static int __init mxcnd_probe(struct platform_device *pdev)
 	this->verify_buf = mxc_nand_verify_buf;
 
 	host->clk = clk_get(&pdev->dev, "nfc");
-	if (IS_ERR(host->clk))
+	if (IS_ERR(host->clk)) {
+		err = PTR_ERR(host->clk);
 		goto eclk;
+	}
 
 	clk_enable(host->clk);
 	host->clk_act = 1;
@@ -896,7 +900,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
 
 	host->regs = ioremap(res->start, res->end - res->start + 1);
 	if (!host->regs) {
-		err = -EIO;
+		err = -ENOMEM;
 		goto eres;
 	}
 
@@ -1011,30 +1015,35 @@ static int __devexit mxcnd_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	struct mtd_info *info = platform_get_drvdata(pdev);
+	struct mtd_info *mtd = platform_get_drvdata(pdev);
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
 	int ret = 0;
 
 	DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
-	if (info)
-		ret = info->suspend(info);
-
-	/* Disable the NFC clock */
-	clk_disable(nfc_clk);	/* FIXME */
+	if (mtd) {
+		ret = mtd->suspend(mtd);
+		/* Disable the NFC clock */
+		clk_disable(host->clk);
+	}
 
 	return ret;
 }
 
 static int mxcnd_resume(struct platform_device *pdev)
 {
-	struct mtd_info *info = platform_get_drvdata(pdev);
+	struct mtd_info *mtd = platform_get_drvdata(pdev);
+	struct nand_chip *nand_chip = mtd->priv;
+	struct mxc_nand_host *host = nand_chip->priv;
 	int ret = 0;
 
 	DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
-	/* Enable the NFC clock */
-	clk_enable(nfc_clk);	/* FIXME */
 
-	if (info)
-		info->resume(info);
+	if (mtd) {
+		/* Enable the NFC clock */
+		clk_enable(host->clk);
+		mtd->resume(mtd);
+	}
 
 	return ret;
 }
@@ -1055,13 +1064,7 @@ static struct platform_driver mxcnd_driver = {
 
 static int __init mxc_nd_init(void)
 {
-	/* Register the device driver structure. */
-	pr_info("MXC MTD nand Driver\n");
-	if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
-		printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
-		return -ENODEV;
-	}
-	return 0;
+	return platform_driver_probe(&mxcnd_driver, mxcnd_probe);
 }
 
 static void __exit mxc_nd_cleanup(void)
-- 
cgit v0.10.2


From 81e2962801bbb4e740c501ca687d5cb857929c04 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Thu, 28 May 2009 17:43:59 +0200
Subject: jffs2: Fix corruption when flash erase/write failure

Erase errors such as:
"Newly-erased block contained word 0xa4ef223e at offset 0x0296a014"
and failure to write the clean marker,
moves the offending erase block to erasing list before calling
jffs2_erase_failed(). This is bad as jffs2_erase_failed() will
also move the block to the bad_list, but is now moving the
wrong block, causing FS corruption.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index c32b4a1..a024474 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -480,13 +480,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
 	return;
 
 filebad:
-	mutex_lock(&c->erase_free_sem);
-	spin_lock(&c->erase_completion_lock);
-	/* Stick it on a list (any list) so erase_failed can take it
-	   right off again.  Silly, but shouldn't happen often. */
-	list_move(&jeb->list, &c->erasing_list);
-	spin_unlock(&c->erase_completion_lock);
-	mutex_unlock(&c->erase_free_sem);
 	jffs2_erase_failed(c, jeb, bad_offset);
 	return;
 
-- 
cgit v0.10.2


From 8bea869c5e56234990e6bad92a543437115bfc18 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Mon, 27 Apr 2009 09:44:40 +0200
Subject: ALSA: PCM midlevel: improve fifo_size handling

Move the fifo_size assignment to hw->ioctl callback to allow lowlevel
drivers overwrite the default behaviour.

fifo_size is in frames not bytes as specified in asound.h and alsa-lib's
documentation, but most hardware have fixed byte based FIFOs. Introduce
internal SNDRV_PCM_INFO_FIFO_IN_FRAMES.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 6add80f..82aed3f 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -255,6 +255,7 @@ typedef int __bitwise snd_pcm_subformat_t;
 #define SNDRV_PCM_INFO_HALF_DUPLEX	0x00100000	/* only half duplex */
 #define SNDRV_PCM_INFO_JOINT_DUPLEX	0x00200000	/* playback and capture stream are somewhat correlated */
 #define SNDRV_PCM_INFO_SYNC_START	0x00400000	/* pcm support some kind of sync go */
+#define SNDRV_PCM_INFO_FIFO_IN_FRAMES	0x80000000	/* internal kernel flag - FIFO size is in frames */
 
 typedef int __bitwise snd_pcm_state_t;
 #define	SNDRV_PCM_STATE_OPEN		((__force snd_pcm_state_t) 0) /* stream is open */
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 267effd..8a69b5c 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -98,6 +98,7 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_IOCTL1_INFO		1
 #define SNDRV_PCM_IOCTL1_CHANNEL_INFO	2
 #define SNDRV_PCM_IOCTL1_GSTATE		3
+#define SNDRV_PCM_IOCTL1_FIFO_SIZE	4
 
 #define SNDRV_PCM_TRIGGER_STOP		0
 #define SNDRV_PCM_TRIGGER_START		1
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d659995..adc2b0b 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1524,6 +1524,23 @@ static int snd_pcm_lib_ioctl_channel_info(struct snd_pcm_substream *substream,
 	return 0;
 }
 
+static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream,
+				       void *arg)
+{
+	struct snd_pcm_hw_params *params = arg;
+	snd_pcm_format_t format;
+	int channels, width;
+
+	params->fifo_size = substream->runtime->hw.fifo_size;
+	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) {
+		format = params_format(params);
+		channels = params_channels(params);
+		width = snd_pcm_format_physical_width(format);
+		params->fifo_size /= width * channels;
+	}
+	return 0;
+}
+
 /**
  * snd_pcm_lib_ioctl - a generic PCM ioctl callback
  * @substream: the pcm substream instance
@@ -1545,6 +1562,8 @@ int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream,
 		return snd_pcm_lib_ioctl_reset(substream, arg);
 	case SNDRV_PCM_IOCTL1_CHANNEL_INFO:
 		return snd_pcm_lib_ioctl_channel_info(substream, arg);
+	case SNDRV_PCM_IOCTL1_FIFO_SIZE:
+		return snd_pcm_lib_ioctl_fifo_size(substream, arg);
 	}
 	return -ENXIO;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 45dc53f..84da3ba 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -312,9 +312,18 @@ int snd_pcm_hw_refine(struct snd_pcm_substream *substream,
 
 	hw = &substream->runtime->hw;
 	if (!params->info)
-		params->info = hw->info;
-	if (!params->fifo_size)
-		params->fifo_size = hw->fifo_size;
+		params->info = hw->info & ~SNDRV_PCM_INFO_FIFO_IN_FRAMES;
+	if (!params->fifo_size) {
+		if (snd_mask_min(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT]) ==
+		    snd_mask_max(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT]) &&
+                    snd_mask_min(&params->masks[SNDRV_PCM_HW_PARAM_CHANNELS]) ==
+                    snd_mask_max(&params->masks[SNDRV_PCM_HW_PARAM_CHANNELS])) {
+			changed = substream->ops->ioctl(substream,
+					SNDRV_PCM_IOCTL1_FIFO_SIZE, params);
+			if (params < 0)
+				return changed;
+		}
+	}
 	params->rmask = 0;
 	return 0;
 }
-- 
cgit v0.10.2


From c62a01ad6e746fae9c93f51ea67e0abfd8d94b58 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 28 May 2009 11:21:52 +0200
Subject: ALSA: PCM midlevel: introduce mask for xrun_debug() macro

For debugging purposes, it is better to separate actions.

Bit-values:

	1: show bad PCM ring buffer pointer
	2: show also stack (to debug kernel latency issues)
	4: check pointer against system jiffies

Example:

	5: show bad PCM ring buffer pointer and do jiffies check

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index adc2b0b..25cb367 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -126,20 +126,20 @@ void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_ufram
 }
 
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
-#define xrun_debug(substream)	((substream)->pstr->xrun_debug)
+#define xrun_debug(substream, mask)	((substream)->pstr->xrun_debug & (mask))
 #else
-#define xrun_debug(substream)	0
+#define xrun_debug(substream, mask)	0
 #endif
 
-#define dump_stack_on_xrun(substream) do {	\
-		if (xrun_debug(substream) > 1)	\
-			dump_stack();		\
+#define dump_stack_on_xrun(substream) do {		\
+		if (xrun_debug(substream, 2))		\
+			dump_stack();			\
 	} while (0)
 
 static void xrun(struct snd_pcm_substream *substream)
 {
 	snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
-	if (xrun_debug(substream)) {
+	if (xrun_debug(substream, 1)) {
 		snd_printd(KERN_DEBUG "XRUN: pcmC%dD%d%c\n",
 			   substream->pcm->card->number,
 			   substream->pcm->device,
@@ -197,7 +197,7 @@ static int snd_pcm_update_hw_ptr_post(struct snd_pcm_substream *substream,
 
 #define hw_ptr_error(substream, fmt, args...)				\
 	do {								\
-		if (xrun_debug(substream)) {				\
+		if (xrun_debug(substream, 1)) {				\
 			if (printk_ratelimit()) {			\
 				snd_printd("PCM: " fmt, ##args);	\
 			}						\
@@ -251,7 +251,7 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 	}
 
 	/* Do jiffies check only in xrun_debug mode */
-	if (!xrun_debug(substream))
+	if (!xrun_debug(substream, 4))
 		goto no_jiffies_check;
 
 	/* Skip the jiffies check for hardwares with BATCH flag.
@@ -342,7 +342,7 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 		new_hw_ptr = hw_base + pos;
 	}
 	/* Do jiffies check only in xrun_debug mode */
-	if (xrun_debug(substream) &&
+	if (xrun_debug(substream, 4) &&
 	    ((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		hw_ptr_error(substream,
 			     "hw_ptr skipping! "
-- 
cgit v0.10.2


From 13f040f9e55d41e92e485389123654971e03b819 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 28 May 2009 11:31:20 +0200
Subject: ALSA: PCM midlevel: Do not update hw_ptr_jiffies when hw_ptr is not
 changed

Some hardware might have bigger FIFOs and DMA pointer value will be updated
in large chunks. Do not update hw_ptr_jiffies and position timestamp when
hw_ptr value was not changed.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 25cb367..0f299a5 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -138,6 +138,10 @@ void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_ufram
 
 static void xrun(struct snd_pcm_substream *substream)
 {
+	struct snd_pcm_runtime *runtime = substream->runtime;
+
+	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
+		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 	snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
 	if (xrun_debug(substream, 1)) {
 		snd_printd(KERN_DEBUG "XRUN: pcmC%dD%d%c\n",
@@ -154,8 +158,6 @@ snd_pcm_update_hw_ptr_pos(struct snd_pcm_substream *substream,
 {
 	snd_pcm_uframes_t pos;
 
-	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
-		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 	pos = substream->ops->pointer(substream);
 	if (pos == SNDRV_PCM_POS_XRUN)
 		return pos; /* XRUN */
@@ -298,10 +300,15 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
 
+	if (runtime->status->hw_ptr == new_hw_ptr)
+		return 0;
+
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
 	runtime->hw_ptr_jiffies = jiffies;
 	runtime->hw_ptr_interrupt = hw_ptr_interrupt;
+	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
+		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 
 	return snd_pcm_update_hw_ptr_post(substream, runtime);
 }
@@ -356,9 +363,14 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
 
+	if (runtime->status->hw_ptr != new_hw_ptr)
+		return 0;
+
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
 	runtime->hw_ptr_jiffies = jiffies;
+	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
+		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 
 	return snd_pcm_update_hw_ptr_post(substream, runtime);
 }
-- 
cgit v0.10.2


From a4444da31ec92f89cd6923579c20a9c240439cfc Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 28 May 2009 12:31:56 +0200
Subject: ALSA: PCM midlevel: lower jiffies check margin using runtime->delay
 value

When hardware has large FIFO, it is necessary to lower jiffies margin
by count of queued samples.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 0f299a5..dd9126b 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -263,6 +263,9 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 	if (runtime->hw.info & SNDRV_PCM_INFO_BATCH)
 		goto no_jiffies_check;
 	hdelta = new_hw_ptr - old_hw_ptr;
+	if (hdelta < runtime->delay)
+		goto no_jiffies_check;
+	hdelta -= runtime->delay;
 	jdelta = jiffies - runtime->hw_ptr_jiffies;
 	if (((hdelta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		delta = jdelta /
@@ -349,8 +352,12 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 		new_hw_ptr = hw_base + pos;
 	}
 	/* Do jiffies check only in xrun_debug mode */
-	if (xrun_debug(substream, 4) &&
-	    ((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
+	if (!xrun_debug(substream, 4))
+		goto no_jiffies_check;
+	if (delta < runtime->delay)
+		goto no_jiffies_check;
+	delta -= runtime->delay;
+	if (((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		hw_ptr_error(substream,
 			     "hw_ptr skipping! "
 			     "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n",
@@ -359,6 +366,7 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 			     ((delta * HZ) / runtime->rate));
 		return 0;
 	}
+ no_jiffies_check:
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
-- 
cgit v0.10.2


From 0528c7494e67c30329d086df141d2dd691f01556 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Tue, 26 May 2009 17:07:52 +0200
Subject: ALSA: clean up the logic for building sequencer modules

Instead of mangling the CONFIG_* variables in the makefiles over and
over, set a few helper variables in Kconfig.

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index 7bbdda0..6061fb5 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -205,3 +205,5 @@ config SND_PCM_XRUN_DEBUG
 
 config SND_VMASTER
 	bool
+
+source "sound/core/seq/Kconfig"
diff --git a/sound/core/seq/Kconfig b/sound/core/seq/Kconfig
new file mode 100644
index 0000000..b851fd8
--- /dev/null
+++ b/sound/core/seq/Kconfig
@@ -0,0 +1,16 @@
+# define SND_XXX_SEQ to min(SND_SEQUENCER,SND_XXX)
+
+config SND_RAWMIDI_SEQ
+	def_tristate SND_SEQUENCER && SND_RAWMIDI
+
+config SND_OPL3_LIB_SEQ
+	def_tristate SND_SEQUENCER && SND_OPL3_LIB
+
+config SND_OPL4_LIB_SEQ
+	def_tristate SND_SEQUENCER && SND_OPL4_LIB
+
+config SND_SBAWE_SEQ
+	def_tristate SND_SEQUENCER && SND_SBAWE
+
+config SND_EMU10K1_SEQ
+	def_tristate SND_SEQUENCER && SND_EMU10K1
diff --git a/sound/core/seq/Makefile b/sound/core/seq/Makefile
index 0695937..1bcb360 100644
--- a/sound/core/seq/Makefile
+++ b/sound/core/seq/Makefile
@@ -17,14 +17,6 @@ snd-seq-midi-event-objs := seq_midi_event.o
 snd-seq-dummy-objs := seq_dummy.o
 snd-seq-virmidi-objs := seq_virmidi.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 obj-$(CONFIG_SND_SEQUENCER) += snd-seq.o snd-seq-device.o
 ifeq ($(CONFIG_SND_SEQUENCER_OSS),y)
 obj-$(CONFIG_SND_SEQUENCER) += snd-seq-midi-event.o
@@ -33,8 +25,8 @@ obj-$(CONFIG_SND_SEQ_DUMMY) += snd-seq-dummy.o
 
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_VIRMIDI) += snd-seq-virmidi.o snd-seq-midi-event.o
-obj-$(call sequencer,$(CONFIG_SND_RAWMIDI)) += snd-seq-midi.o snd-seq-midi-event.o
-obj-$(call sequencer,$(CONFIG_SND_OPL3_LIB)) += snd-seq-midi-event.o snd-seq-midi-emul.o
-obj-$(call sequencer,$(CONFIG_SND_OPL4_LIB)) += snd-seq-midi-event.o snd-seq-midi-emul.o
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-seq-midi-emul.o snd-seq-virmidi.o
-obj-$(call sequencer,$(CONFIG_SND_EMU10K1)) += snd-seq-midi-emul.o snd-seq-virmidi.o
+obj-$(CONFIG_SND_RAWMIDI_SEQ) += snd-seq-midi.o snd-seq-midi-event.o
+obj-$(CONFIG_SND_OPL3_LIB_SEQ) += snd-seq-midi-event.o snd-seq-midi-emul.o
+obj-$(CONFIG_SND_OPL4_LIB_SEQ) += snd-seq-midi-event.o snd-seq-midi-emul.o
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-seq-midi-emul.o snd-seq-virmidi.o
+obj-$(CONFIG_SND_EMU10K1_SEQ) += snd-seq-midi-emul.o snd-seq-virmidi.o
diff --git a/sound/drivers/opl3/Makefile b/sound/drivers/opl3/Makefile
index 19767a6..7f2c2a1 100644
--- a/sound/drivers/opl3/Makefile
+++ b/sound/drivers/opl3/Makefile
@@ -7,14 +7,6 @@ snd-opl3-lib-objs := opl3_lib.o opl3_synth.o
 snd-opl3-synth-y := opl3_seq.o opl3_midi.o opl3_drums.o
 snd-opl3-synth-$(CONFIG_SND_SEQUENCER_OSS) += opl3_oss.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 obj-$(CONFIG_SND_OPL3_LIB) += snd-opl3-lib.o
 obj-$(CONFIG_SND_OPL4_LIB) += snd-opl3-lib.o
-obj-$(call sequencer,$(CONFIG_SND_OPL3_LIB)) += snd-opl3-synth.o
+obj-$(CONFIG_SND_OPL3_LIB_SEQ) += snd-opl3-synth.o
diff --git a/sound/drivers/opl4/Makefile b/sound/drivers/opl4/Makefile
index d178b39..b94009b 100644
--- a/sound/drivers/opl4/Makefile
+++ b/sound/drivers/opl4/Makefile
@@ -6,13 +6,5 @@
 snd-opl4-lib-objs := opl4_lib.o opl4_mixer.o opl4_proc.o
 snd-opl4-synth-objs := opl4_seq.o opl4_synth.o yrw801.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 obj-$(CONFIG_SND_OPL4_LIB) += snd-opl4-lib.o
-obj-$(call sequencer,$(CONFIG_SND_OPL4_LIB)) += snd-opl4-synth.o
+obj-$(CONFIG_SND_OPL4_LIB_SEQ) += snd-opl4-synth.o
diff --git a/sound/isa/sb/Makefile b/sound/isa/sb/Makefile
index 1098a56..faeffceb 100644
--- a/sound/isa/sb/Makefile
+++ b/sound/isa/sb/Makefile
@@ -13,14 +13,6 @@ snd-sbawe-objs := sbawe.o emu8000.o
 snd-emu8000-synth-objs := emu8000_synth.o emu8000_callback.o emu8000_patch.o emu8000_pcm.o
 snd-es968-objs := es968.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_SB_COMMON) += snd-sb-common.o
 obj-$(CONFIG_SND_SB16_DSP) += snd-sb16-dsp.o
@@ -33,4 +25,4 @@ ifeq ($(CONFIG_SND_SB16_CSP),y)
   obj-$(CONFIG_SND_SB16) += snd-sb16-csp.o
   obj-$(CONFIG_SND_SBAWE) += snd-sb16-csp.o
 endif
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-emu8000-synth.o
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-emu8000-synth.o
diff --git a/sound/pci/emu10k1/Makefile b/sound/pci/emu10k1/Makefile
index cf2d563..fc5591e 100644
--- a/sound/pci/emu10k1/Makefile
+++ b/sound/pci/emu10k1/Makefile
@@ -9,15 +9,7 @@ snd-emu10k1-objs := emu10k1.o emu10k1_main.o \
 snd-emu10k1-synth-objs := emu10k1_synth.o emu10k1_callback.o emu10k1_patch.o
 snd-emu10k1x-objs := emu10k1x.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_EMU10K1) += snd-emu10k1.o
-obj-$(call sequencer,$(CONFIG_SND_EMU10K1)) += snd-emu10k1-synth.o
+obj-$(CONFIG_SND_EMU10K1_SEQ) += snd-emu10k1-synth.o
 obj-$(CONFIG_SND_EMU10K1X) += snd-emu10k1x.o
diff --git a/sound/synth/Makefile b/sound/synth/Makefile
index e99fd76..11eb06a 100644
--- a/sound/synth/Makefile
+++ b/sound/synth/Makefile
@@ -5,16 +5,8 @@
 
 snd-util-mem-objs := util_mem.o
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_EMU10K1) += snd-util-mem.o
 obj-$(CONFIG_SND_TRIDENT) += snd-util-mem.o
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-util-mem.o
-obj-$(call sequencer,$(CONFIG_SND)) += emux/
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-util-mem.o
+obj-$(CONFIG_SND_SEQUENCER) += emux/
diff --git a/sound/synth/emux/Makefile b/sound/synth/emux/Makefile
index b690352..328594e 100644
--- a/sound/synth/emux/Makefile
+++ b/sound/synth/emux/Makefile
@@ -7,14 +7,6 @@ snd-emux-synth-objs := emux.o emux_synth.o emux_seq.o emux_nrpn.o \
 		       emux_effect.o emux_proc.o emux_hwdep.o soundfont.o \
 		       $(if $(CONFIG_SND_SEQUENCER_OSS),emux_oss.o)
 
-#
-# this function returns:
-#   "m" - CONFIG_SND_SEQUENCER is m
-#   <empty string> - CONFIG_SND_SEQUENCER is undefined
-#   otherwise parameter #1 value
-#
-sequencer = $(if $(subst y,,$(CONFIG_SND_SEQUENCER)),$(if $(1),m),$(if $(CONFIG_SND_SEQUENCER),$(1)))
-
 # Toplevel Module Dependencies
-obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-emux-synth.o
-obj-$(call sequencer,$(CONFIG_SND_EMU10K1)) += snd-emux-synth.o
+obj-$(CONFIG_SND_SBAWE_SEQ) += snd-emux-synth.o
+obj-$(CONFIG_SND_EMU10K1_SEQ) += snd-emux-synth.o
-- 
cgit v0.10.2


From 16a30fbb0d3aa4ee829a2dd3d0e314e2b5ae96a9 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Fri, 29 May 2009 09:22:37 +0300
Subject: ASoC: TWL4030: Use reg_cache in twl4030_init_chip

Use the codec->reg_cache instead of the array directly
in twl4030_init_chip for setting the default values.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 63ebd17..df474a5 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -145,7 +145,6 @@ struct twl4030_priv {
 static inline unsigned int twl4030_read_reg_cache(struct snd_soc_codec *codec,
 	unsigned int reg)
 {
-	u8 *cache = codec->reg_cache;
 
 	if (reg >= TWL4030_CACHEREGNUM)
 		return -EIO;
@@ -204,6 +203,7 @@ static void twl4030_codec_enable(struct snd_soc_codec *codec, int enable)
 
 static void twl4030_init_chip(struct snd_soc_codec *codec)
 {
+	u8 *cache = codec->reg_cache;
 	int i;
 
 	/* clear CODECPDZ prior to setting register defaults */
@@ -211,7 +211,7 @@ static void twl4030_init_chip(struct snd_soc_codec *codec)
 
 	/* set all audio section registers to reasonable defaults */
 	for (i = TWL4030_REG_OPTION; i <= TWL4030_REG_MISC_SET_2; i++)
-		twl4030_write(codec, i,	twl4030_reg[i]);
+		twl4030_write(codec, i,	cache[i]);
 
 }
 
-- 
cgit v0.10.2


From 15aedea439c4d7dbec17c99b5e1594c01b979833 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 27 May 2009 09:43:01 +0900
Subject: dma-debug: use sg_dma_address accessor instead of using dma_address
 directly

Architectures might not have dma_address in struct scatterlist (PARISC
doesn't). Directly accessing to dma_address in struct scatterlist is
wrong; we need to use sg_dma_address() accesssor instead.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index e47e1a0..1b5bb82 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -840,7 +840,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		entry->dev            = dev;
 		entry->paddr          = sg_phys(s);
 		entry->size           = s->length;
-		entry->dev_addr       = s->dma_address;
+		entry->dev_addr       = sg_dma_address(s);
 		entry->direction      = direction;
 		entry->sg_call_ents   = nents;
 		entry->sg_mapped_ents = mapped_ents;
@@ -872,7 +872,7 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			.type           = dma_debug_sg,
 			.dev            = dev,
 			.paddr          = sg_phys(s),
-			.dev_addr       = s->dma_address,
+			.dev_addr       = sg_dma_address(s),
 			.size           = s->length,
 			.direction      = dir,
 			.sg_call_ents   = 0,
@@ -996,8 +996,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
-		check_sync(dev, s->dma_address, s->dma_length, 0,
-				direction, true);
+		check_sync(dev, sg_dma_address(s), s->dma_length, 0,
+			   direction, true);
 	}
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
@@ -1012,8 +1012,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
-		check_sync(dev, s->dma_address, s->dma_length, 0,
-				direction, false);
+		check_sync(dev, sg_dma_address(s), s->dma_length, 0,
+			   direction, false);
 	}
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
-- 
cgit v0.10.2


From 884d05970bfbc3db368f23460dc4ce63257f240d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 27 May 2009 09:43:02 +0900
Subject: dma-debug: use sg_dma_len accessor

debug_dma_map_sg() and debug_dma_unmap_sg() use length in struct
scatterlist while debug_dma_sync_sg_for_cpu() and
debug_dma_sync_sg_for_device() use dma_length. This causes bugs
warnings on some IOMMU implementations since these values are not
same; the length doesn't represent the dma length.

We always need to use sg_dma_len() accessor to get the dma length of a
scatterlist entry.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 1b5bb82..51f95e5 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -839,7 +839,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		entry->type           = dma_debug_sg;
 		entry->dev            = dev;
 		entry->paddr          = sg_phys(s);
-		entry->size           = s->length;
+		entry->size           = sg_dma_len(s);
 		entry->dev_addr       = sg_dma_address(s);
 		entry->direction      = direction;
 		entry->sg_call_ents   = nents;
@@ -847,7 +847,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
 		if (!PageHighMem(sg_page(s))) {
 			check_for_stack(dev, sg_virt(s));
-			check_for_illegal_area(dev, sg_virt(s), s->length);
+			check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
 		}
 
 		add_dma_entry(entry);
@@ -873,7 +873,7 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			.dev            = dev,
 			.paddr          = sg_phys(s),
 			.dev_addr       = sg_dma_address(s),
-			.size           = s->length,
+			.size           = sg_dma_len(s),
 			.direction      = dir,
 			.sg_call_ents   = 0,
 		};
@@ -996,7 +996,7 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
-		check_sync(dev, sg_dma_address(s), s->dma_length, 0,
+		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
 			   direction, true);
 	}
 }
@@ -1012,7 +1012,7 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
-		check_sync(dev, sg_dma_address(s), s->dma_length, 0,
+		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
 			   direction, false);
 	}
 }
-- 
cgit v0.10.2


From 88f3907f6f447899544beadf491dccb32015dacb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 27 May 2009 09:43:03 +0900
Subject: dma-debug: fix debug_dma_sync_sg_for_cpu and
 debug_dma_sync_sg_for_device

DMA-mapping.txt says that debug_dma_sync_sg family must be called with
the _same_ one you passed into the dma_map_sg call, it should _NOT_ be
the 'count' value _returned_ from the dma_map_sg call.

debug_dma_sync_sg_for_cpu and debug_dma_sync_sg_for_device can't
handle this properly; they need to use the sg_mapped_ents in struct
dma_debug_entry as debug_dma_unmap_sg() does.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 51f95e5..1abed17 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -855,13 +855,32 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL(debug_dma_map_sg);
 
+static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
+{
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+	int mapped_ents = 0;
+	struct dma_debug_entry ref;
+
+	ref.dev = dev;
+	ref.dev_addr = sg_dma_address(s);
+	ref.size = sg_dma_len(s),
+
+	bucket = get_hash_bucket(&ref, &flags);
+	entry = hash_bucket_find(bucket, &ref);
+	if (entry)
+		mapped_ents = entry->sg_mapped_ents;
+	put_hash_bucket(bucket, &flags);
+
+	return mapped_ents;
+}
+
 void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			int nelems, int dir)
 {
-	struct dma_debug_entry *entry;
 	struct scatterlist *s;
 	int mapped_ents = 0, i;
-	unsigned long flags;
 
 	if (unlikely(global_disable))
 		return;
@@ -881,14 +900,9 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		if (mapped_ents && i >= mapped_ents)
 			break;
 
-		if (mapped_ents == 0) {
-			struct hash_bucket *bucket;
+		if (!i) {
 			ref.sg_call_ents = nelems;
-			bucket = get_hash_bucket(&ref, &flags);
-			entry = hash_bucket_find(bucket, &ref);
-			if (entry)
-				mapped_ents = entry->sg_mapped_ents;
-			put_hash_bucket(bucket, &flags);
+			mapped_ents = get_nr_mapped_entries(dev, s);
 		}
 
 		check_unmap(&ref);
@@ -990,12 +1004,18 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 			       int nelems, int direction)
 {
 	struct scatterlist *s;
-	int i;
+	int mapped_ents = 0, i;
 
 	if (unlikely(global_disable))
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
+		if (!i)
+			mapped_ents = get_nr_mapped_entries(dev, s);
+
+		if (i >= mapped_ents)
+			break;
+
 		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
 			   direction, true);
 	}
@@ -1006,12 +1026,18 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 				  int nelems, int direction)
 {
 	struct scatterlist *s;
-	int i;
+	int mapped_ents = 0, i;
 
 	if (unlikely(global_disable))
 		return;
 
 	for_each_sg(sg, s, nelems, i) {
+		if (!i)
+			mapped_ents = get_nr_mapped_entries(dev, s);
+
+		if (i >= mapped_ents)
+			break;
+
 		check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
 			   direction, false);
 	}
-- 
cgit v0.10.2


From 3f4dee227348daac32f36daad9a91059efd0723e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 May 2009 11:25:09 +0200
Subject: perf_counter: Fix cpuctx->task_ctx races

Peter noticed that we are sometimes reading cpuctx->task_ctx with
interrupts enabled.

Noticed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index db843f8..eb34604 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -234,15 +234,18 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
 
+	local_irq_save(flags);
 	/*
 	 * If this is a task context, we need to check whether it is
 	 * the current task context of this cpu. If not it has been
 	 * scheduled out before the smp call arrived.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		local_irq_restore(flags);
 		return;
+	}
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	spin_lock(&ctx->lock);
 	/*
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level.
@@ -382,14 +385,17 @@ static void __perf_counter_disable(void *info)
 	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
 
+	local_irq_save(flags);
 	/*
 	 * If this is a per-task counter, need to check whether this
 	 * counter's task is the current task on this cpu.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		local_irq_restore(flags);
 		return;
+	}
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	spin_lock(&ctx->lock);
 
 	/*
 	 * If the counter is on, turn it off.
@@ -615,6 +621,7 @@ static void __perf_install_in_context(void *info)
 	unsigned long flags;
 	int err;
 
+	local_irq_save(flags);
 	/*
 	 * If this is a task context, we need to check whether it is
 	 * the current task context of this cpu. If not it has been
@@ -623,12 +630,14 @@ static void __perf_install_in_context(void *info)
 	 * on this cpu because it had no counters.
 	 */
 	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
+		if (cpuctx->task_ctx || ctx->task != current) {
+			local_irq_restore(flags);
 			return;
+		}
 		cpuctx->task_ctx = ctx;
 	}
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
@@ -745,17 +754,20 @@ static void __perf_counter_enable(void *info)
 	unsigned long flags;
 	int err;
 
+	local_irq_save(flags);
 	/*
 	 * If this is a per-task counter, need to check whether this
 	 * counter's task is the current task on this cpu.
 	 */
 	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
+		if (cpuctx->task_ctx || ctx->task != current) {
+			local_irq_restore(flags);
 			return;
+		}
 		cpuctx->task_ctx = ctx;
 	}
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
-- 
cgit v0.10.2


From 012b84dae17126d8b5d159173091eb3db5a2bc43 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 11:08:41 +0200
Subject: perf_counter: Robustify counter-free logic

This fixes a nasty crash and highlights a bug that we were
freeing failed-fork() counters incorrectly.

(the fix for that will come separately)

[ Impact: fix crashes/lockups with inherited counters ]

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index eb34604..616c524 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1004,6 +1004,10 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
 
 	if (!cpuctx->task_ctx)
 		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
 	__perf_counter_sched_out(ctx, cpuctx);
 	cpuctx->task_ctx = NULL;
 }
-- 
cgit v0.10.2


From 294ae4011530d008c59c4fb9847738e39228821e Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Thu, 28 May 2009 10:36:11 +0900
Subject: ftrace: fix typo about map of kernel priority in ftrace.txt file.

Fix typo about chart to map the kernel priority to user land priorities.

   * About sched_setscheduler(2)
      Processes scheduled under SCHED_FIFO or SCHED_RR
      can have a (user-space) static priority in the range 1 to 99.
      (reference: http://www.kernel.org/doc/man-pages/online/pages/
                  man2/sched_setscheduler.2.html)

   * From: Steven Rostedt
      0 to 98 - maps to RT tasks 99 to 1 (SCHED_RR or SCHED_FIFO)

      99 - maps to internal kernel threads that want to be lower than RT tasks
      but higher than SCHED_OTHER tasks. Although I'm not sure if any
      kernel thread actually uses this. I'm not even sure how this can be
      set, because the internal sched_setscheduler function does not allow
      for it.

      100 to 139 - maps nice levels -20 to 19. These are not set via
      sched_setscheduler, but are set via the nice system call.

      140 - reserved for idle tasks.

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index fd9a3e6..e362f50 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -518,9 +518,18 @@ priority with zero (0) being the highest priority and the nice
 values starting at 100 (nice -20). Below is a quick chart to map
 the kernel priority to user land priorities.
 
-  Kernel priority: 0 to 99    ==> user RT priority 99 to 0
-  Kernel priority: 100 to 139 ==> user nice -20 to 19
-  Kernel priority: 140        ==> idle task priority
+   Kernel Space                     User Space
+ ===============================================================
+   0(high) to  98(low)     user RT priority 99(high) to 1(low)
+                           with SCHED_RR or SCHED_FIFO
+ ---------------------------------------------------------------
+  99                       sched_priority is not used in scheduling
+                           decisions(it must be specified as 0)
+ ---------------------------------------------------------------
+ 100(high) to 139(low)     user nice -20(high) to 19(low)
+ ---------------------------------------------------------------
+ 140                       idle task priority
+ ---------------------------------------------------------------
 
 The task states are:
 
-- 
cgit v0.10.2


From f04d82b7e0c63d0251f9952a537a4bc4d73aa1a9 Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Thu, 28 May 2009 10:36:14 +0900
Subject: sched: fix typo in sched-rt-group.txt file

Fix typo about static priority's range.

    Kernel Space                     User Space
    ===============================================================
      0(high) to  98(low)     user RT priority 99(high) to 1(low)
                              with SCHED_RR or SCHED_FIFO
    ---------------------------------------------------------------
     99                       sched_priority is not used in scheduling
                              decisions(it must be specified as 0)
    ---------------------------------------------------------------
    100(high) to 139(low)     user nice -20(high) to 19(low)
    ---------------------------------------------------------------
    140                       idle task priority
    ---------------------------------------------------------------
    * ref) http://www.kernel.org/doc/man-pages/online/pages/man2/sched_setscheduler.2.html

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
CC: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index eb74b01..1df7f9c 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -187,7 +187,7 @@ get their allocated time.
 
 Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
 the biggest challenge as the current linux PI infrastructure is geared towards
-the limited static priority levels 0-139. With deadline scheduling you need to
+the limited static priority levels 0-99. With deadline scheduling you need to
 do deadline inheritance (since priority is inversely proportional to the
 deadline delta (deadline - now).
 
-- 
cgit v0.10.2


From efb3d17240d80e27508d238809168120fe4b93a4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 May 2009 14:25:58 +0200
Subject: perf_counter: Fix COMM and MMAP events for cpu wide counters

Commit a63eaf34ae6 ("perf_counter: Dynamically allocate tasks'
perf_counter_context struct") broke COMM and MMAP notification for
cpu wide counters by dropping out early if there was no task context,
thereby also not iterating the cpu context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 616c524..58d6d19 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2443,9 +2443,9 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
+	if (cpuctx->task_ctx)
+		perf_counter_comm_ctx(cpuctx->task_ctx, comm_event);
 	put_cpu_var(perf_cpu_context);
-
-	perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event);
 }
 
 void perf_counter_comm(struct task_struct *task)
@@ -2454,8 +2454,6 @@ void perf_counter_comm(struct task_struct *task)
 
 	if (!atomic_read(&nr_comm_tracking))
 		return;
-	if (!current->perf_counter_ctxp)
-		return;
 
 	comm_event = (struct perf_comm_event){
 		.task	= task,
@@ -2570,10 +2568,10 @@ got_name:
 
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
+	if (cpuctx->task_ctx)
+		perf_counter_mmap_ctx(cpuctx->task_ctx, mmap_event);
 	put_cpu_var(perf_cpu_context);
 
-	perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event);
-
 	kfree(buf);
 }
 
@@ -2584,8 +2582,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 
 	if (!atomic_read(&nr_mmap_tracking))
 		return;
-	if (!current->perf_counter_ctxp)
-		return;
 
 	mmap_event = (struct perf_mmap_event){
 		.file   = file,
-- 
cgit v0.10.2


From 665c2142a94202881a3c11cbaee6506cb10ada2d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 May 2009 14:51:57 +0200
Subject: perf_counter: Clean up task_ctx vs interrupts

Remove the local_irq_save() etc.. in routines that are smp function
calls, or have IRQs disabled by other means.

Then change the COMM, MMAP, and swcounter context iteration to
current->perf_counter_ctxp and RCU, since it really doesn't matter
which context they iterate, they're all folded.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 58d6d19..0c000d3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -232,18 +232,14 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long flags;
 
-	local_irq_save(flags);
 	/*
 	 * If this is a task context, we need to check whether it is
 	 * the current task context of this cpu. If not it has been
 	 * scheduled out before the smp call arrived.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		local_irq_restore(flags);
+	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
-	}
 
 	spin_lock(&ctx->lock);
 	/*
@@ -267,7 +263,7 @@ static void __perf_counter_remove_from_context(void *info)
 	}
 
 	perf_enable();
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
 }
 
 
@@ -383,17 +379,13 @@ static void __perf_counter_disable(void *info)
 	struct perf_counter *counter = info;
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long flags;
 
-	local_irq_save(flags);
 	/*
 	 * If this is a per-task counter, need to check whether this
 	 * counter's task is the current task on this cpu.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		local_irq_restore(flags);
+	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
-	}
 
 	spin_lock(&ctx->lock);
 
@@ -411,7 +403,7 @@ static void __perf_counter_disable(void *info)
 		counter->state = PERF_COUNTER_STATE_OFF;
 	}
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
 }
 
 /*
@@ -618,10 +610,8 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *leader = counter->group_leader;
 	int cpu = smp_processor_id();
-	unsigned long flags;
 	int err;
 
-	local_irq_save(flags);
 	/*
 	 * If this is a task context, we need to check whether it is
 	 * the current task context of this cpu. If not it has been
@@ -630,10 +620,8 @@ static void __perf_install_in_context(void *info)
 	 * on this cpu because it had no counters.
 	 */
 	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current) {
-			local_irq_restore(flags);
+		if (cpuctx->task_ctx || ctx->task != current)
 			return;
-		}
 		cpuctx->task_ctx = ctx;
 	}
 
@@ -687,7 +675,7 @@ static void __perf_install_in_context(void *info)
  unlock:
 	perf_enable();
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
 }
 
 /*
@@ -751,19 +739,15 @@ static void __perf_counter_enable(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *leader = counter->group_leader;
-	unsigned long flags;
 	int err;
 
-	local_irq_save(flags);
 	/*
 	 * If this is a per-task counter, need to check whether this
 	 * counter's task is the current task on this cpu.
 	 */
 	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current) {
-			local_irq_restore(flags);
+		if (cpuctx->task_ctx || ctx->task != current)
 			return;
-		}
 		cpuctx->task_ctx = ctx;
 	}
 
@@ -811,7 +795,7 @@ static void __perf_counter_enable(void *info)
 	}
 
  unlock:
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
 }
 
 /*
@@ -981,6 +965,10 @@ void perf_counter_task_sched_out(struct task_struct *task,
 		spin_lock(&ctx->lock);
 		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
 		if (context_equiv(ctx, next_ctx)) {
+			/*
+			 * XXX do we need a memory barrier of sorts
+			 * wrt to rcu_dereference() of perf_counter_ctxp
+			 */
 			task->perf_counter_ctxp = next_ctx;
 			next->perf_counter_ctxp = ctx;
 			ctx->task = next;
@@ -998,6 +986,9 @@ void perf_counter_task_sched_out(struct task_struct *task,
 	}
 }
 
+/*
+ * Called with IRQs disabled
+ */
 static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
@@ -1012,6 +1003,9 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
 	cpuctx->task_ctx = NULL;
 }
 
+/*
+ * Called with IRQs disabled
+ */
 static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
 {
 	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
@@ -2431,6 +2425,7 @@ static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
 static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 {
 	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
 	unsigned int size;
 	char *comm = comm_event->task->comm;
 
@@ -2443,9 +2438,17 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
-	if (cpuctx->task_ctx)
-		perf_counter_comm_ctx(cpuctx->task_ctx, comm_event);
 	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_comm_ctx(ctx, comm_event);
+	rcu_read_unlock();
 }
 
 void perf_counter_comm(struct task_struct *task)
@@ -2536,6 +2539,7 @@ static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
 static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 {
 	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
 	struct file *file = mmap_event->file;
 	unsigned int size;
 	char tmp[16];
@@ -2568,10 +2572,18 @@ got_name:
 
 	cpuctx = &get_cpu_var(perf_cpu_context);
 	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
-	if (cpuctx->task_ctx)
-		perf_counter_mmap_ctx(cpuctx->task_ctx, mmap_event);
 	put_cpu_var(perf_cpu_context);
 
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_mmap_ctx(ctx, mmap_event);
+	rcu_read_unlock();
+
 	kfree(buf);
 }
 
@@ -2882,6 +2894,7 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 	int *recursion = perf_swcounter_recursion_context(cpuctx);
+	struct perf_counter_context *ctx;
 
 	if (*recursion)
 		goto out;
@@ -2891,10 +2904,15 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
 				 nr, nmi, regs, addr);
-	if (cpuctx->task_ctx) {
-		perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
-					 nr, nmi, regs, addr);
-	}
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr);
+	rcu_read_unlock();
 
 	barrier();
 	(*recursion)--;
-- 
cgit v0.10.2


From bbbee90829304d156c12b171c0ac7e6e1aba8b90 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 May 2009 14:25:58 +0200
Subject: perf_counter: Ammend cleanup in fork() fail

When fork() fails we cannot use perf_counter_exit_task() since that
assumes to operate on current. Write a new helper that cleans up
unused/clean contexts.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 717bf3b..519a41b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -579,6 +579,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task,
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
+extern void perf_counter_free_task(struct task_struct *task);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void __perf_disable(void);
@@ -644,6 +645,7 @@ static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
+static inline void perf_counter_free_task(struct task_struct *task)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_disable(void)					{ }
diff --git a/kernel/fork.c b/kernel/fork.c
index c07c333..23bf757 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1298,7 +1298,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
-	perf_counter_exit_task(p);
+	perf_counter_free_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0c000d3..79c3f26 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3538,8 +3538,7 @@ static void sync_child_counter(struct perf_counter *child_counter,
 }
 
 static void
-__perf_counter_exit_task(struct task_struct *child,
-			 struct perf_counter *child_counter,
+__perf_counter_exit_task(struct perf_counter *child_counter,
 			 struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *parent_counter;
@@ -3605,7 +3604,7 @@ void perf_counter_exit_task(struct task_struct *child)
 again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
-		__perf_counter_exit_task(child, child_counter, child_ctx);
+		__perf_counter_exit_task(child_counter, child_ctx);
 
 	/*
 	 * If the last counter was a group counter, it will have appended all
@@ -3621,6 +3620,44 @@ again:
 }
 
 /*
+ * free an unexposed, unused context as created by inheritance by
+ * init_task below, used by fork() in case of fail.
+ */
+void perf_counter_free_task(struct task_struct *task)
+{
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+	struct perf_counter *counter, *tmp;
+
+	if (!ctx)
+		return;
+
+	mutex_lock(&ctx->mutex);
+again:
+	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) {
+		struct perf_counter *parent = counter->parent;
+
+		if (WARN_ON_ONCE(!parent))
+			continue;
+
+		mutex_lock(&parent->child_mutex);
+		list_del_init(&counter->child_list);
+		mutex_unlock(&parent->child_mutex);
+
+		fput(parent->filp);
+
+		list_del_counter(counter, ctx);
+		free_counter(counter);
+	}
+
+	if (!list_empty(&ctx->counter_list))
+		goto again;
+
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
+/*
  * Initialize the perf_counter context in task_struct
  */
 int perf_counter_init_task(struct task_struct *child)
-- 
cgit v0.10.2


From 681a1b4032d72f4ad6d4beed751bc65574572746 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 28 May 2009 14:34:18 -0700
Subject: MAINTAINERS: pair EDAC-E752X P: and M: entries

Entries should be P: name then M: email address.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 5ee166e..ea4eced 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1971,8 +1971,8 @@ F:	include/linux/edac.h
 
 EDAC-E752X
 P:	Mark Gross
-P:	Doug Thompson
 M:	mark.gross@intel.com
+P:	Doug Thompson
 M:	dougthompson@xmission.com
 L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
-- 
cgit v0.10.2


From 6d2661ede5f20f968422e790af3334908c3bc857 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 28 May 2009 14:34:19 -0700
Subject: oom: fix possible oom_dump_tasks NULL pointer

When /proc/sys/vm/oom_dump_tasks is enabled, it is possible to get a NULL
pointer for tasks that have detached mm's since task_lock() is not held
during the tasklist scan.  Add the task_lock().

Acked-by: Nick Piggin <npiggin@suse.de>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 92bcf1d..a7b2460 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -284,22 +284,28 @@ static void dump_tasks(const struct mem_cgroup *mem)
 	printk(KERN_INFO "[ pid ]   uid  tgid total_vm      rss cpu oom_adj "
 	       "name\n");
 	do_each_thread(g, p) {
-		/*
-		 * total_vm and rss sizes do not exist for tasks with a
-		 * detached mm so there's no need to report them.
-		 */
-		if (!p->mm)
-			continue;
+		struct mm_struct *mm;
+
 		if (mem && !task_in_mem_cgroup(p, mem))
 			continue;
 		if (!thread_group_leader(p))
 			continue;
 
 		task_lock(p);
+		mm = p->mm;
+		if (!mm) {
+			/*
+			 * total_vm and rss sizes do not exist for tasks with no
+			 * mm so there's no need to report them; they can't be
+			 * oom killed anyway.
+			 */
+			task_unlock(p);
+			continue;
+		}
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
-		       p->pid, __task_cred(p)->uid, p->tgid,
-		       p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p),
-		       p->oomkilladj, p->comm);
+		       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
+		       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
+		       p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
 }
-- 
cgit v0.10.2


From b2e1feaf0af6b8a826b86748a19ddc2013ab7dbd Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 28 May 2009 14:34:20 -0700
Subject: cred: #include init.h in cred.h

linux/cred.h can't be included as first header (alphabetical order)
because it uses __init which is enough to break compilation on some archs.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 3282ee4..4fa9996 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -13,6 +13,7 @@
 #define _LINUX_CRED_H
 
 #include <linux/capability.h>
+#include <linux/init.h>
 #include <linux/key.h>
 #include <asm/atomic.h>
 
-- 
cgit v0.10.2


From bd6daba909d8484bd2ccf6017db4028d7a420927 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 28 May 2009 14:34:21 -0700
Subject: procfs: make errno values consistent when open pident vs exit(2) race
 occurs

proc_pident_instantiate() has following call flow.

proc_pident_lookup()
  proc_pident_instantiate()
    proc_pid_make_inode()

And, proc_pident_lookup() has following error handling.

	const struct pid_entry *p, *last;
	error = ERR_PTR(-ENOENT);
	if (!task)
		goto out_no_task;

Then, proc_pident_instantiate should return ENOENT too when racing against
exit(2) occur.

EINAL has two bad reason.
  - it implies caller is wrong. bad the race isn't caller's mistake.
  - man 2 open don't explain EINVAL. user often don't handle it.

Note: Other proc_pid_make_inode() caller already use ENOENT properly.

Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/base.c b/fs/proc/base.c
index fb45615..3326bbf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1956,7 +1956,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
 	const struct pid_entry *p = ptr;
 	struct inode *inode;
 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-EINVAL);
+	struct dentry *error = ERR_PTR(-ENOENT);
 
 	inode = proc_pid_make_inode(dir->i_sb, task);
 	if (!inode)
-- 
cgit v0.10.2


From e767e0561d7fd2333df1921f1ab4176211f9036b Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Thu, 28 May 2009 14:34:28 -0700
Subject: memcg: fix deadlock between lock_page_cgroup and mapping tree_lock

mapping->tree_lock can be acquired from interrupt context.  Then,
following dead lock can occur.

Assume "A" as a page.

 CPU0:
       lock_page_cgroup(A)
		interrupted
			-> take mapping->tree_lock.
 CPU1:
       take mapping->tree_lock
		-> lock_page_cgroup(A)

This patch tries to fix above deadlock by moving memcg's hook to out of
mapping->tree_lock.  charge/uncharge of pagecache/swapcache is protected
by page lock, not tree_lock.

After this patch, lock_page_cgroup() is not called under mapping->tree_lock.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 62d8143..d476aad 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page,
 	return 0;
 }
 
+static inline void
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+{
+}
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 379ff0b..1b60f30 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page)
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	BUG_ON(page_mapped(page));
-	mem_cgroup_uncharge_cache_page(page);
 
 	/*
 	 * Some filesystems seem to re-dirty the page even after
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page)
 	spin_lock_irq(&mapping->tree_lock);
 	__remove_from_page_cache(page);
 	spin_unlock_irq(&mapping->tree_lock);
+	mem_cgroup_uncharge_cache_page(page);
 }
 
 static int sync_page(void *word)
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
+			spin_unlock_irq(&mapping->tree_lock);
 			mem_cgroup_uncharge_cache_page(page);
 			page_cache_release(page);
 		}
-
-		spin_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	} else
 		mem_cgroup_uncharge_cache_page(page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 01c2d8f..4a747a2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1488,8 +1488,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
 
+#ifdef CONFIG_SWAP
 /*
- * called from __delete_from_swap_cache() and drop "page" account.
+ * called after __delete_from_swap_cache() and drop "page" account.
  * memcg information is recorded to swap_cgroup of "ent"
  */
 void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
@@ -1506,6 +1507,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 	if (memcg)
 		css_put(&memcg->css);
 }
+#endif
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3ecea98..1416e7e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  */
 void __delete_from_swap_cache(struct page *page)
 {
-	swp_entry_t ent = {.val = page_private(page)};
-
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(!PageSwapCache(page));
 	VM_BUG_ON(PageWriteback(page));
@@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page)
 	total_swapcache_pages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	INC_CACHE_INFO(del_total);
-	mem_cgroup_uncharge_swapcache(page, ent);
 }
 
 /**
@@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&swapper_space.tree_lock);
 
+	mem_cgroup_uncharge_swapcache(page, entry);
 	swap_free(entry);
 	page_cache_release(page);
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 55206fa..12e1579 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	BUG_ON(page_has_private(page));
 	__remove_from_page_cache(page);
 	spin_unlock_irq(&mapping->tree_lock);
+	mem_cgroup_uncharge_cache_page(page);
 	page_cache_release(page);	/* pagecache ref */
 	return 1;
 failed:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5fa3eda..d254306 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
 		swp_entry_t swap = { .val = page_private(page) };
 		__delete_from_swap_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
+		mem_cgroup_uncharge_swapcache(page, swap);
 		swap_free(swap);
 	} else {
 		__remove_from_page_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
+		mem_cgroup_uncharge_cache_page(page);
 	}
 
 	return 1;
-- 
cgit v0.10.2


From b898f4f869da5b9d41f297fff87aca4cd42d80b3 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 28 May 2009 14:34:29 -0700
Subject: drivers/serial/mpc52xx_uart.c: fix array overindexing check

The check for an overindexing of mpc52xx_uart_{ports,nodes} has an
off-by-one.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index 7f72f8c..b3feb61 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -988,7 +988,7 @@ mpc52xx_console_setup(struct console *co, char *options)
 	pr_debug("mpc52xx_console_setup co=%p, co->index=%i, options=%s\n",
 		 co, co->index, options);
 
-	if ((co->index < 0) || (co->index > MPC52xx_PSC_MAXNUM)) {
+	if ((co->index < 0) || (co->index >= MPC52xx_PSC_MAXNUM)) {
 		pr_debug("PSC%x out of range\n", co->index);
 		return -EINVAL;
 	}
-- 
cgit v0.10.2


From ba9447198bdd945666a9bac5e556632a7acb235d Mon Sep 17 00:00:00 2001
From: Thomas Dahlmann <dahlmann.thomas@arcor.de>
Date: Thu, 28 May 2009 14:34:30 -0700
Subject: MAINTAINERS: change email address for Thomas Dahlmann

Signed-off-by: Thomas <dahlmann.thomas@arcor.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index ea4eced..41c6605 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -434,7 +434,7 @@ F:	arch/alpha/
 
 AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
 P:	Thomas Dahlmann
-M:	thomas.dahlmann@amd.com
+M:	dahlmann.thomas@arcor.de
 L:	linux-geode@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/usb/gadget/amd5536udc.*
-- 
cgit v0.10.2


From c3dc5bec05a2ae03a72ef82e321d77fb549d951c Mon Sep 17 00:00:00 2001
From: Oskar Schirmer <os@emlix.com>
Date: Thu, 28 May 2009 14:34:31 -0700
Subject: flat: fix data sections alignment

The flat loader uses an architecture's flat_stack_align() to align the
stack but assumes word-alignment is enough for the data sections.

However, on the Xtensa S6000 we have registers up to 128bit width
which can be used from userspace and therefor need userspace stack and
data-section alignment of at least this size.

This patch drops flat_stack_align() and uses the same alignment that
is required for slab caches, ARCH_SLAB_MINALIGN, or wordsize if it's
not defined by the architecture.

It also fixes m32r which was obviously kaput, aligning an
uninitialized stack entry instead of the stack pointer.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Oskar Schirmer <os@emlix.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Johannes Weiner <jw@emlix.com>
Acked-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
index 1d77e51..59426a4 100644
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -5,9 +5,6 @@
 #ifndef __ARM_FLAT_H__
 #define __ARM_FLAT_H__
 
-/* An odd number of words will be pushed after this alignment, so
-   deliberately misalign the value.  */
-#define	flat_stack_align(sp)	sp = (void *)(((unsigned long)(sp) - 4) | 4)
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h
index e70074e..733a178 100644
--- a/arch/blackfin/include/asm/flat.h
+++ b/arch/blackfin/include/asm/flat.h
@@ -10,7 +10,6 @@
 
 #include <asm/unaligned.h>
 
-#define	flat_stack_align(sp)	/* nothing needed */
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 
diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h
index 2a87350..bd12b31 100644
--- a/arch/h8300/include/asm/flat.h
+++ b/arch/h8300/include/asm/flat.h
@@ -5,7 +5,6 @@
 #ifndef __H8300_FLAT_H__
 #define __H8300_FLAT_H__
 
-#define	flat_stack_align(sp)			/* nothing needed */
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		1
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h
index d851cf0..5d711c4 100644
--- a/arch/m32r/include/asm/flat.h
+++ b/arch/m32r/include/asm/flat.h
@@ -12,7 +12,6 @@
 #ifndef __ASM_M32R_FLAT_H
 #define __ASM_M32R_FLAT_H
 
-#define	flat_stack_align(sp)		(*sp += (*sp & 3 ? (4 - (*sp & 3)): 0))
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_set_persistent(relval, p)		0
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index 814b517..a0e2907 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -5,7 +5,6 @@
 #ifndef __M68KNOMMU_FLAT_H__
 #define __M68KNOMMU_FLAT_H__
 
-#define	flat_stack_align(sp)			/* nothing needed */
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h
index d3b2b4f..5d84df5 100644
--- a/arch/sh/include/asm/flat.h
+++ b/arch/sh/include/asm/flat.h
@@ -12,7 +12,6 @@
 #ifndef __ASM_SH_FLAT_H
 #define __ASM_SH_FLAT_H
 
-#define	flat_stack_align(sp)			/* nothing needed */
 #define	flat_argvp_envp_on_stack()		0
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5cebf0b..697f6b5 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -41,6 +41,7 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <asm/cacheflush.h>
+#include <asm/page.h>
 
 /****************************************************************************/
 
@@ -54,6 +55,18 @@
 #define	DBG_FLT(a...)
 #endif
 
+/*
+ * User data (stack, data section and bss) needs to be aligned
+ * for the same reasons as SLAB memory is, and to the same amount.
+ * Avoid duplicating architecture specific code by using the same
+ * macro as with SLAB allocation:
+ */
+#ifdef ARCH_SLAB_MINALIGN
+#define FLAT_DATA_ALIGN	(ARCH_SLAB_MINALIGN)
+#else
+#define FLAT_DATA_ALIGN	(sizeof(void *))
+#endif
+
 #define RELOC_FAILED 0xff00ff01		/* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff		/* Placeholder for unused library */
 
@@ -114,20 +127,18 @@ static unsigned long create_flat_tables(
 	int envc = bprm->envc;
 	char uninitialized_var(dummy);
 
-	sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p);
+	sp = (unsigned long *)p;
+	sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
+	sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+	argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
+	envp = argv + (argc + 1);
 
-	sp -= envc+1;
-	envp = sp;
-	sp -= argc+1;
-	argv = sp;
-
-	flat_stack_align(sp);
 	if (flat_argvp_envp_on_stack()) {
-		--sp; put_user((unsigned long) envp, sp);
-		--sp; put_user((unsigned long) argv, sp);
+		put_user((unsigned long) envp, sp + 2);
+		put_user((unsigned long) argv, sp + 1);
 	}
 
-	put_user(argc,--sp);
+	put_user(argc, sp);
 	current->mm->arg_start = (unsigned long) p;
 	while (argc-->0) {
 		put_user((unsigned long) p, argv++);
@@ -558,7 +569,9 @@ static int load_flat_file(struct linux_binprm * bprm,
 			ret = realdatastart;
 			goto err;
 		}
-		datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
+		datapos = ALIGN(realdatastart +
+				MAX_SHARED_LIBS * sizeof(unsigned long),
+				FLAT_DATA_ALIGN);
 
 		DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n",
 				(int)(data_len + bss_len + stack_len), (int)datapos);
@@ -604,9 +617,12 @@ static int load_flat_file(struct linux_binprm * bprm,
 		}
 
 		realdatastart = textpos + ntohl(hdr->data_start);
-		datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long);
-		reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) +
-				MAX_SHARED_LIBS * sizeof(unsigned long));
+		datapos = ALIGN(realdatastart +
+				MAX_SHARED_LIBS * sizeof(unsigned long),
+				FLAT_DATA_ALIGN);
+
+		reloc = (unsigned long *)
+			(datapos + (ntohl(hdr->reloc_start) - text_len));
 		memp = textpos;
 		memp_size = len;
 #ifdef CONFIG_BINFMT_ZFLAT
@@ -854,7 +870,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	stack_len = TOP_OF_ARGS - bprm->p;             /* the strings */
 	stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
 	stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
-
+	stack_len += FLAT_DATA_ALIGN - 1;  /* reserve for upcoming alignment */
 	
 	res = load_flat_file(bprm, &libinfo, 0, &stack_len);
 	if (res > (unsigned long)-4096)
-- 
cgit v0.10.2


From b5d598b41aebee67bf95802b68b888e98a449687 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Thu, 28 May 2009 14:34:33 -0700
Subject: parport_gsc: fix printk format error

drivers/parport/parport_gsc.c:356: warning: format '%lx' expects type
'long unsigned int', but argument 2 has type 'resource_size_t'

[akpm@linux-foundation.org: fix it to handle u64's]
Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c
index e6a7e84..ea31a45 100644
--- a/drivers/parport/parport_gsc.c
+++ b/drivers/parport/parport_gsc.c
@@ -352,8 +352,8 @@ static int __devinit parport_init_chip(struct parisc_device *dev)
 	unsigned long port;
 
 	if (!dev->irq) {
-		printk(KERN_WARNING "IRQ not found for parallel device at 0x%lx\n",
-			dev->hpa.start);
+		printk(KERN_WARNING "IRQ not found for parallel device at 0x%llx\n",
+			(unsigned long long)dev->hpa.start);
 		return -ENODEV;
 	}
 
-- 
cgit v0.10.2


From 8e8e8267f0a08c2415d5f51bc9a9fde6d5400619 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Thu, 28 May 2009 14:34:34 -0700
Subject: serial: 8250_gsc: fix printk format error

drivers/serial/8250_gsc.c:44: warning: format '%lx' expects type
'long unsigned int', but argument 2 has type 'resource_size_t'

[akpm@linux-foundation.org: fix it to handle u64's]
Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c
index 418b4fe..33149d9 100644
--- a/drivers/serial/8250_gsc.c
+++ b/drivers/serial/8250_gsc.c
@@ -39,9 +39,9 @@ static int __init serial_init_chip(struct parisc_device *dev)
 		 */
 		if (parisc_parent(dev)->id.hw_type != HPHW_IOA)
 			printk(KERN_INFO
-				"Serial: device 0x%lx not configured.\n"
+				"Serial: device 0x%llx not configured.\n"
 				"Enable support for Wax, Lasi, Asp or Dino.\n",
-				dev->hpa.start);
+				(unsigned long long)dev->hpa.start);
 		return -ENODEV;
 	}
 
-- 
cgit v0.10.2


From 17663e59704bea838a9236f299104e30909a43b1 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 28 May 2009 14:34:35 -0700
Subject: S3C-fb: PM fix

Correctly restore the FrameBuffer register state in the resume function.

Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c
index 5e9c630..d3a568e 100644
--- a/drivers/video/s3c-fb.c
+++ b/drivers/video/s3c-fb.c
@@ -947,7 +947,8 @@ static int __devexit s3c_fb_remove(struct platform_device *pdev)
 	int win;
 
 	for (win = 0; win <= S3C_FB_MAX_WIN; win++)
-		s3c_fb_release_win(sfb, sfb->windows[win]);
+		if (sfb->windows[win])
+			s3c_fb_release_win(sfb, sfb->windows[win]);
 
 	iounmap(sfb->regs);
 
@@ -985,11 +986,20 @@ static int s3c_fb_suspend(struct platform_device *pdev, pm_message_t state)
 static int s3c_fb_resume(struct platform_device *pdev)
 {
 	struct s3c_fb *sfb = platform_get_drvdata(pdev);
+	struct s3c_fb_platdata *pd = sfb->pdata;
 	struct s3c_fb_win *win;
 	int win_no;
 
 	clk_enable(sfb->bus_clk);
 
+	/* setup registers */
+	writel(pd->vidcon1, sfb->regs + VIDCON1);
+
+	/* zero all windows before we do anything */
+	for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++)
+		s3c_fb_clear_win(sfb, win_no);
+
+	/* restore framebuffers */
 	for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++) {
 		win = sfb->windows[win_no];
 		if (!win)
-- 
cgit v0.10.2


From 53b7479bbdaedcc7846c66fd608fe66f1b5aa35b Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Thu, 28 May 2009 14:34:36 -0700
Subject: atmel_lcdfb: correct fifo size for some products

Remove wrong fifo size definition for some AT91 products.

Due to a misunderstanding of some AT91 datasheets, a fifo size of 2048
(words) has been introduced by mistake.  In fact, all products (AT91/AT32)
are sharing the same fifo size of 512 words.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Andrew Victor <avictor.za@gmail.com>
Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 9a577a8..2fb63f6 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -29,14 +29,8 @@
 
 /* configurable parameters */
 #define ATMEL_LCDC_CVAL_DEFAULT		0xc8
-#define ATMEL_LCDC_DMA_BURST_LEN	8
-
-#if defined(CONFIG_ARCH_AT91SAM9263) || defined(CONFIG_ARCH_AT91CAP9) || \
-	defined(CONFIG_ARCH_AT91SAM9RL)
-#define ATMEL_LCDC_FIFO_SIZE		2048
-#else
-#define ATMEL_LCDC_FIFO_SIZE		512
-#endif
+#define ATMEL_LCDC_DMA_BURST_LEN	8	/* words */
+#define ATMEL_LCDC_FIFO_SIZE		512	/* words */
 
 #if defined(CONFIG_ARCH_AT91)
 #define	ATMEL_LCDFB_FBINFO_DEFAULT	(FBINFO_DEFAULT \
-- 
cgit v0.10.2


From 32b154c0b0bae2879bf4e549d861caf1759a3546 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 28 May 2009 14:34:37 -0700
Subject: x86: ignore VM_LOCKED when determining if hugetlb-backed page tables
 can be shared or not

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13302

On x86 and x86-64, it is possible that page tables are shared beween
shared mappings backed by hugetlbfs.  As part of this,
page_table_shareable() checks a pair of vma->vm_flags and they must match
if they are to be shared.  All VMA flags are taken into account, including
VM_LOCKED.

The problem is that VM_LOCKED is cleared on fork().  When a process with a
shared memory segment forks() to exec() a helper, there will be shared
VMAs with different flags.  The impact is that the shared segment is
sometimes considered shareable and other times not, depending on what
process is checking.

What happens is that the segment page tables are being shared but the
count is inaccurate depending on the ordering of events.  As the page
tables are freed with put_page(), bad pmd's are found when some of the
children exit.  The hugepage counters also get corrupted and the Total and
Free count will no longer match even when all the hugepage-backed regions
are freed.  This requires a reboot of the machine to "fix".

This patch addresses the problem by comparing all flags except VM_LOCKED
when deciding if pagetables should be shared or not for hugetlbfs-backed
mapping.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <starlight@binnacle.cx>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8f307d9..f46c340 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -26,12 +26,16 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
 	unsigned long sbase = saddr & PUD_MASK;
 	unsigned long s_end = sbase + PUD_SIZE;
 
+	/* Allow segments to share if only one is marked locked */
+	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
+	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+
 	/*
 	 * match the virtual addresses, permission and the alignment of the
 	 * page table page.
 	 */
 	if (pmd_index(addr) != pmd_index(saddr) ||
-	    vma->vm_flags != svma->vm_flags ||
+	    vm_flags != svm_flags ||
 	    sbase < svma->vm_start || svma->vm_end < s_end)
 		return 0;
 
-- 
cgit v0.10.2


From f83a275dbc5ca1721143698e844243fcadfabf6a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 28 May 2009 14:34:40 -0700
Subject: mm: account for MAP_SHARED mappings using VM_MAYSHARE and not
 VM_SHARED in hugetlbfs

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13302

hugetlbfs reserves huge pages but does not fault them at mmap() time to
ensure that future faults succeed.  The reservation behaviour differs
depending on whether the mapping was mapped MAP_SHARED or MAP_PRIVATE.
For MAP_SHARED mappings, hugepages are reserved when mmap() is first
called and are tracked based on information associated with the inode.
Other processes mapping MAP_SHARED use the same reservation.  MAP_PRIVATE
track the reservations based on the VMA created as part of the mmap()
operation.  Each process mapping MAP_PRIVATE must make its own
reservation.

hugetlbfs currently checks if a VMA is MAP_SHARED with the VM_SHARED flag
and not VM_MAYSHARE.  For file-backed mappings, such as hugetlbfs,
VM_SHARED is set only if the mapping is MAP_SHARED and the file was opened
read-write.  If a shared memory mapping was mapped shared-read-write for
populating of data and mapped shared-read-only by other processes, then
hugetlbfs would account for the mapping as if it was MAP_PRIVATE.  This
causes processes to fail to map the file MAP_SHARED even though it should
succeed as the reservation is there.

This patch alters mm/hugetlb.c and replaces VM_SHARED with VM_MAYSHARE
when the intent of the code was to check whether the VMA was mapped
MAP_SHARED or MAP_PRIVATE.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <starlight@binnacle.cx>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 28c655b..e83ad2c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref)
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	if (!(vma->vm_flags & VM_SHARED))
+	if (!(vma->vm_flags & VM_MAYSHARE))
 		return (struct resv_map *)(get_vma_private_data(vma) &
 							~HPAGE_RESV_MASK);
 	return NULL;
@@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	VM_BUG_ON(vma->vm_flags & VM_SHARED);
+	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
 
 	set_vma_private_data(vma, (get_vma_private_data(vma) &
 				HPAGE_RESV_MASK) | (unsigned long)map);
@@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	VM_BUG_ON(vma->vm_flags & VM_SHARED);
+	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
 
 	set_vma_private_data(vma, get_vma_private_data(vma) | flags);
 }
@@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
 	if (vma->vm_flags & VM_NORESERVE)
 		return;
 
-	if (vma->vm_flags & VM_SHARED) {
+	if (vma->vm_flags & VM_MAYSHARE) {
 		/* Shared mappings always use reserves */
 		h->resv_huge_pages--;
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
@@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	if (!(vma->vm_flags & VM_SHARED))
+	if (!(vma->vm_flags & VM_MAYSHARE))
 		vma->vm_private_data = (void *)0;
 }
 
 /* Returns true if the VMA has associated reserve pages */
 static int vma_has_reserves(struct vm_area_struct *vma)
 {
-	if (vma->vm_flags & VM_SHARED)
+	if (vma->vm_flags & VM_MAYSHARE)
 		return 1;
 	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		return 1;
@@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h,
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
 
-	if (vma->vm_flags & VM_SHARED) {
+	if (vma->vm_flags & VM_MAYSHARE) {
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		return region_chg(&inode->i_mapping->private_list,
 							idx, idx + 1);
@@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h,
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	struct inode *inode = mapping->host;
 
-	if (vma->vm_flags & VM_SHARED) {
+	if (vma->vm_flags & VM_MAYSHARE) {
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
 		region_add(&inode->i_mapping->private_list, idx, idx + 1);
 
@@ -1893,7 +1893,7 @@ retry_avoidcopy:
 	 * at the time of fork() could consume its reserves on COW instead
 	 * of the full address range.
 	 */
-	if (!(vma->vm_flags & VM_SHARED) &&
+	if (!(vma->vm_flags & VM_MAYSHARE) &&
 			is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
 			old_page != pagecache_page)
 		outside_reserve = 1;
@@ -2000,7 +2000,7 @@ retry:
 		clear_huge_page(page, address, huge_page_size(h));
 		__SetPageUptodate(page);
 
-		if (vma->vm_flags & VM_SHARED) {
+		if (vma->vm_flags & VM_MAYSHARE) {
 			int err;
 			struct inode *inode = mapping->host;
 
@@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			goto out_mutex;
 		}
 
-		if (!(vma->vm_flags & VM_SHARED))
+		if (!(vma->vm_flags & VM_MAYSHARE))
 			pagecache_page = hugetlbfs_pagecache_page(h,
 								vma, address);
 	}
@@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * to reserve the full area even if read-only as mprotect() may be
 	 * called to make the mapping read-write. Assume !vma is a shm mapping
 	 */
-	if (!vma || vma->vm_flags & VM_SHARED)
+	if (!vma || vma->vm_flags & VM_MAYSHARE)
 		chg = region_chg(&inode->i_mapping->private_list, from, to);
 	else {
 		struct resv_map *resv_map = resv_map_alloc();
@@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * consumed reservations are stored in the map. Hence, nothing
 	 * else has to be done for private mappings here
 	 */
-	if (!vma || vma->vm_flags & VM_SHARED)
+	if (!vma || vma->vm_flags & VM_MAYSHARE)
 		region_add(&inode->i_mapping->private_list, from, to);
 	return 0;
 }
-- 
cgit v0.10.2


From 46f7e602fb32e02145ef14f8c0ca6d399f0a96b9 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Thu, 28 May 2009 14:34:41 -0700
Subject: memcg: fix build warning and avoid checking for mem != null again and
 again

Fix build warning, "mem_cgroup_is_obsolete defined but not used" when
CONFIG_DEBUG_VM is not set.  Also avoid checking for !mem again and again.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4a747a2..78eb855 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -314,14 +314,6 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return mem;
 }
 
-static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
-{
-	if (!mem)
-		return true;
-	return css_is_removed(&mem->css);
-}
-
-
 /*
  * Call callback function against all cgroup under hierarchy tree.
  */
@@ -932,7 +924,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	if (unlikely(!mem))
 		return 0;
 
-	VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
+	VM_BUG_ON(css_is_removed(&mem->css));
 
 	while (1) {
 		int ret;
-- 
cgit v0.10.2


From 56ec0c7b88c6eb17733e5015f31302f6312511ed Mon Sep 17 00:00:00 2001
From: Harry Ciao <qingtao.cao@windriver.com>
Date: Thu, 28 May 2009 14:34:42 -0700
Subject: edac: AMD8111 & AMD8131 use dev_name()

The "bus_id" member in the device structure has been obsolete, use
dev_name() instead.

Signed-off-by: Harry Ciao <qingtao.cao@windriver.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c
index 6146921..2cb58ef 100644
--- a/drivers/edac/amd8111_edac.c
+++ b/drivers/edac/amd8111_edac.c
@@ -389,7 +389,7 @@ static int amd8111_dev_probe(struct pci_dev *dev,
 	dev_info->edac_dev->dev = &dev_info->dev->dev;
 	dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
 	dev_info->edac_dev->ctl_name = dev_info->ctl_name;
-	dev_info->edac_dev->dev_name = dev_info->dev->dev.bus_id;
+	dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
 
 	if (edac_op_state == EDAC_OPSTATE_POLL)
 		dev_info->edac_dev->edac_check = dev_info->check;
@@ -473,7 +473,7 @@ static int amd8111_pci_probe(struct pci_dev *dev,
 	pci_info->edac_dev->dev = &pci_info->dev->dev;
 	pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR;
 	pci_info->edac_dev->ctl_name = pci_info->ctl_name;
-	pci_info->edac_dev->dev_name = pci_info->dev->dev.bus_id;
+	pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev);
 
 	if (edac_op_state == EDAC_OPSTATE_POLL)
 		pci_info->edac_dev->edac_check = pci_info->check;
diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c
index c083b31..b432d60 100644
--- a/drivers/edac/amd8131_edac.c
+++ b/drivers/edac/amd8131_edac.c
@@ -287,7 +287,7 @@ static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	dev_info->edac_dev->dev = &dev_info->dev->dev;
 	dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR;
 	dev_info->edac_dev->ctl_name = dev_info->ctl_name;
-	dev_info->edac_dev->dev_name = dev_info->dev->dev.bus_id;
+	dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev);
 
 	if (edac_op_state == EDAC_OPSTATE_POLL)
 		dev_info->edac_dev->edac_check = amd8131_chipset.check;
-- 
cgit v0.10.2


From 715fe7af9fd7328af661742bfadc195e665a837f Mon Sep 17 00:00:00 2001
From: Harry Ciao <qingtao.cao@windriver.com>
Date: Thu, 28 May 2009 14:34:43 -0700
Subject: edac: AMD8111 & AMD8131 Kconfig fixup

The amd8111_edac.c driver will fail allmodconfig on architectures other
than PPC, introduce Kconfig dependency to avoid this, since both AMD8111
and AMD8131 chips are only adopted on Maple so far.

Signed-off-by: Harry Ciao <qingtao.cao@windriver.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e5f5c5a..956982f 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -192,16 +192,20 @@ config EDAC_PPC4XX
 
 config EDAC_AMD8131
 	tristate "AMD8131 HyperTransport PCI-X Tunnel"
-	depends on EDAC_MM_EDAC && PCI
+	depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
 	help
 	  Support for error detection and correction on the
 	  AMD8131 HyperTransport PCI-X Tunnel chip.
+	  Note, add more Kconfig dependency if it's adopted
+	  on some machine other than Maple.
 
 config EDAC_AMD8111
 	tristate "AMD8111 HyperTransport I/O Hub"
-	depends on EDAC_MM_EDAC && PCI
+	depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
 	help
 	  Support for error detection and correction on the
 	  AMD8111 HyperTransport I/O Hub chip.
+	  Note, add more Kconfig dependency if it's adopted
+	  on some machine other than Maple.
 
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index a5fdcf0..5907681 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -35,3 +35,5 @@ obj-$(CONFIG_EDAC_MPC85XX)		+= mpc85xx_edac.o
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
 obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)		+= ppc4xx_edac.o
+obj-$(CONFIG_EDAC_AMD8111)		+= amd8111_edac.o
+obj-$(CONFIG_EDAC_AMD8131)		+= amd8131_edac.o
-- 
cgit v0.10.2


From b8e7e40abeac49644fec4a4f52ffe74c7b05eca0 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 28 May 2009 14:01:35 +0100
Subject: 8250: Fix oops from setserial

If you setserial a port which has never been initialised we change the type
but don't update the I/O method pointers. The same problem is true if you
change the io type of a port - but nobody ever does that so nobody noticed!

Remember the old type and when attaching if the type has changed reload the
port accessor pointers. We can't do it blindly as some 8250 drivers load custom
accessors and we must not stomp those.

Tested-by: Victor Seryodkin <vvscore@gmail.com>
Closes-bug: #13367
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index b4b3981..a0127e9 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -137,6 +137,7 @@ struct uart_8250_port {
 	unsigned char		mcr;
 	unsigned char		mcr_mask;	/* mask of user bits */
 	unsigned char		mcr_force;	/* mask of forced bits */
+	unsigned char		cur_iotype;	/* Running I/O type */
 
 	/*
 	 * Some bits in registers are cleared on a read, so they must
@@ -471,6 +472,7 @@ static void io_serial_out(struct uart_port *p, int offset, int value)
 
 static void set_io_from_upio(struct uart_port *p)
 {
+	struct uart_8250_port *up = (struct uart_8250_port *)p;
 	switch (p->iotype) {
 	case UPIO_HUB6:
 		p->serial_in = hub6_serial_in;
@@ -509,6 +511,8 @@ static void set_io_from_upio(struct uart_port *p)
 		p->serial_out = io_serial_out;
 		break;
 	}
+	/* Remember loaded iotype */
+	up->cur_iotype = p->iotype;
 }
 
 static void
@@ -1937,6 +1941,9 @@ static int serial8250_startup(struct uart_port *port)
 	up->capabilities = uart_config[up->port.type].flags;
 	up->mcr = 0;
 
+	if (up->port.iotype != up->cur_iotype)
+		set_io_from_upio(port);
+
 	if (up->port.type == PORT_16C950) {
 		/* Wake up and initialize UART */
 		up->acr = 0;
@@ -2563,6 +2570,9 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 	if (ret < 0)
 		probeflags &= ~PROBE_RSA;
 
+	if (up->port.iotype != up->cur_iotype)
+		set_io_from_upio(port);
+
 	if (flags & UART_CONFIG_TYPE)
 		autoconfig(up, probeflags);
 	if (up->port.type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ)
@@ -2671,6 +2681,11 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
 {
 	int i;
 
+	for (i = 0; i < nr_uarts; i++) {
+		struct uart_8250_port *up = &serial8250_ports[i];
+		up->cur_iotype = 0xFF;
+	}
+
 	serial8250_isa_init_ports();
 
 	for (i = 0; i < nr_uarts; i++) {
-- 
cgit v0.10.2


From b78c07d45a7e71be7b5c5d7486f922355ccf23a8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 29 May 2009 13:48:59 -0300
Subject: perf_counter tools: Shorten the DSO names using cwd

[acme@emilia linux-2.6-tip]$ pwd
/home/acme/git/linux-2.6-tip

Before (still available using -P/--full-paths)

[acme@emilia linux-2.6-tip]$ perf report -P | head -10
    11.48%             perf: 7454 [kernel]: clear_page_c
     4.89%             perf: 7454 [kernel]: vsnprintf
     4.61%             perf: 7454 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__find_symbol
     4.09%             perf: 7454 [kernel]: number
     4.06%             perf: 7454 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: dso__fprintf
     4.00%             perf: 7454 /home/acme/git/linux-2.6-tip/Documentation/perf_counter/perf: symbol_filter

New default:

[acme@emilia linux-2.6-tip]$ perf report | head -10
    11.48%             perf: 7454 [kernel]: clear_page_c
     4.89%             perf: 7454 [kernel]: vsnprintf
     4.61%             perf: 7454 ./Documentation/perf_counter/perf: dso__find_symbol
     4.09%             perf: 7454 [kernel]: number
     4.06%             perf: 7454 ./Documentation/perf_counter/perf: dso__fprintf
     4.00%             perf: 7454 ./Documentation/perf_counter/perf: symbol_filter

Suggested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090529164859.GN4747@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 412d524..4705679 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -23,6 +23,7 @@ static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int		dump_trace = 0;
 static int		verbose;
+static int		full_paths;
 
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
@@ -134,6 +135,16 @@ static int load_kernel(void)
 	return err;
 }
 
+static int strcommon(const char *pathname, const char *cwd, int cwdlen)
+{
+	int n = 0;
+
+	while (pathname[n] == cwd[n] && n < cwdlen)
+		++n;
+
+	return n;
+}
+
 struct map {
 	struct list_head node;
 	uint64_t	 start;
@@ -142,16 +153,28 @@ struct map {
 	struct dso	 *dso;
 };
 
-static struct map *map__new(struct mmap_event *event)
+static struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen)
 {
 	struct map *self = malloc(sizeof(*self));
 
 	if (self != NULL) {
+		const char *filename = event->filename;
+		char newfilename[PATH_MAX];
+
+		if (cwd) {
+			int n = strcommon(filename, cwd, cwdlen);
+			if (n == cwdlen) {
+				snprintf(newfilename, sizeof(newfilename),
+					 ".%s", filename + n);
+				filename = newfilename;
+			}
+		}
+
 		self->start = event->start;
 		self->end   = event->start + event->len;
 		self->pgoff = event->pgoff;
 
-		self->dso = dsos__findnew(event->filename);
+		self->dso = dsos__findnew(filename);
 		if (self->dso == NULL)
 			goto out_delete;
 	}
@@ -598,6 +621,8 @@ static int __cmd_report(void)
 	int ret, rc = EXIT_FAILURE;
 	uint32_t size;
 	unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
+	char cwd[PATH_MAX], *cwdp = cwd;
+	int cwdlen;
 
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
@@ -621,6 +646,14 @@ static int __cmd_report(void)
 		return EXIT_FAILURE;
 	}
 
+	if (!full_paths) {
+		if (getcwd(cwd, sizeof(cwd)) == NULL) {
+			perror("failed to get the current directory");
+			return EXIT_FAILURE;
+		}
+		cwdlen = strlen(cwd);
+	} else
+		cwdp = NULL;
 remap:
 	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
 			   MAP_SHARED, input, offset);
@@ -710,7 +743,7 @@ more:
 	} else switch (event->header.type) {
 	case PERF_EVENT_MMAP: {
 		struct thread *thread = threads__findnew(event->mmap.pid);
-		struct map *map = map__new(&event->mmap);
+		struct map *map = map__new(&event->mmap, cwdp, cwdlen);
 
 		if (dump_trace) {
 			fprintf(stderr, "%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
@@ -809,6 +842,8 @@ static const struct option options[] = {
 	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
 		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
+	OPT_BOOLEAN('P', "full-paths", &full_paths,
+		    "Don't shorten the pathnames taking into account the cwd"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From b0958aed1ea39825439a7848544bfb2e267273b4 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Fri, 29 May 2009 18:54:14 +0100
Subject: [ARM] 5529/1: davinci: MMC platform support: DMA_32BIT_MASK -->
 DMA_BIT_MASK(32)

Some DMA_32BIT_MASK usage snuck in with the MMC platform support.
Convert these to the new preferred DMA_BIT_MASK(32).

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index c85091c..de16f34 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -62,7 +62,7 @@ void __init davinci_init_i2c(struct davinci_i2c_platform_data *pdata)
 
 #if	defined(CONFIG_MMC_DAVINCI) || defined(CONFIG_MMC_DAVINCI_MODULE)
 
-static u64 mmcsd0_dma_mask = DMA_32BIT_MASK;
+static u64 mmcsd0_dma_mask = DMA_BIT_MASK(32);
 
 static struct resource mmcsd0_resources[] = {
 	{
@@ -95,13 +95,13 @@ static struct platform_device davinci_mmcsd0_device = {
 	.id = 0,
 	.dev = {
 		.dma_mask = &mmcsd0_dma_mask,
-		.coherent_dma_mask = DMA_32BIT_MASK,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 	.num_resources = ARRAY_SIZE(mmcsd0_resources),
 	.resource = mmcsd0_resources,
 };
 
-static u64 mmcsd1_dma_mask = DMA_32BIT_MASK;
+static u64 mmcsd1_dma_mask = DMA_BIT_MASK(32);
 
 static struct resource mmcsd1_resources[] = {
 	{
@@ -132,7 +132,7 @@ static struct platform_device davinci_mmcsd1_device = {
 	.id = 1,
 	.dev = {
 		.dma_mask = &mmcsd1_dma_mask,
-		.coherent_dma_mask = DMA_32BIT_MASK,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 	.num_resources = ARRAY_SIZE(mmcsd1_resources),
 	.resource = mmcsd1_resources,
-- 
cgit v0.10.2


From e3a6d01932f343c1cc0218909262f0f68b6f7db4 Mon Sep 17 00:00:00 2001
From: Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 28 May 2009 19:56:11 +0100
Subject: [ARM] 5526/1: ep93xx: usb driver cleanup

Cleanup the ohci-ep93xx driver.

1) Use the usb.h dbg() macro instead of pr_debug() so that
   the source filename is prefixed to the message and it is
   terminated with a linefeed.

2) Add error handling for the clk_get() call.

3) Update clkdev support so that the usb clock is matched by
   the dev_id instead of the con_id.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index e8ebeae..29f36b4 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -103,7 +103,7 @@ static struct clk_lookup clocks[] = {
 	INIT_CK(NULL, "hclk", &clk_h),
 	INIT_CK(NULL, "pclk", &clk_p),
 	INIT_CK(NULL, "pll2", &clk_pll2),
-	INIT_CK(NULL, "usb_host", &clk_usb_host),
+	INIT_CK("ep93xx-ohci", NULL, &clk_usb_host),
 	INIT_CK(NULL, "m2p0", &clk_m2p0),
 	INIT_CK(NULL, "m2p1", &clk_m2p1),
 	INIT_CK(NULL, "m2p2", &clk_m2p2),
diff --git a/drivers/usb/host/ohci-ep93xx.c b/drivers/usb/host/ohci-ep93xx.c
index 7cf74f8..b0dbf41 100644
--- a/drivers/usb/host/ohci-ep93xx.c
+++ b/drivers/usb/host/ohci-ep93xx.c
@@ -47,7 +47,7 @@ static int usb_hcd_ep93xx_probe(const struct hc_driver *driver,
 	struct usb_hcd *hcd;
 
 	if (pdev->resource[1].flags != IORESOURCE_IRQ) {
-		pr_debug("resource[1] is not IORESOURCE_IRQ");
+		dbg("resource[1] is not IORESOURCE_IRQ");
 		return -ENOMEM;
 	}
 
@@ -65,12 +65,18 @@ static int usb_hcd_ep93xx_probe(const struct hc_driver *driver,
 
 	hcd->regs = ioremap(hcd->rsrc_start, hcd->rsrc_len);
 	if (hcd->regs == NULL) {
-		pr_debug("ioremap failed");
+		dbg("ioremap failed");
 		retval = -ENOMEM;
 		goto err2;
 	}
 
-	usb_host_clock = clk_get(&pdev->dev, "usb_host");
+	usb_host_clock = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(usb_host_clock)) {
+		dbg("clk_get failed");
+		retval = PTR_ERR(usb_host_clock);
+		goto err3;
+	}
+
 	ep93xx_start_hc(&pdev->dev);
 
 	ohci_hcd_init(hcd_to_ohci(hcd));
@@ -80,6 +86,7 @@ static int usb_hcd_ep93xx_probe(const struct hc_driver *driver,
 		return retval;
 
 	ep93xx_stop_hc(&pdev->dev);
+err3:
 	iounmap(hcd->regs);
 err2:
 	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
-- 
cgit v0.10.2


From 68ee3d83b238fa97e12775963f2b526c5dc74823 Mon Sep 17 00:00:00 2001
From: Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 28 May 2009 19:58:24 +0100
Subject: [ARM] 5527/1: ep93xx: core.c: trivial spelling error

Fix trivial spelling error in arch/arm/mach-ep93xx/core.c

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index c535e88..f477df0 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -155,7 +155,7 @@ static unsigned char gpio_int_unmasked[3];
 static unsigned char gpio_int_enabled[3];
 static unsigned char gpio_int_type1[3];
 static unsigned char gpio_int_type2[3];
-static unsigned char gpio_int_debouce[3];
+static unsigned char gpio_int_debounce[3];
 
 /* Port ordering is: A B F */
 static const u8 int_type1_register_offset[3]	= { 0x90, 0xac, 0x4c };
@@ -192,11 +192,11 @@ void ep93xx_gpio_int_debounce(unsigned int irq, int enable)
 	int port_mask = 1 << (line & 7);
 
 	if (enable)
-		gpio_int_debouce[port] |= port_mask;
+		gpio_int_debounce[port] |= port_mask;
 	else
-		gpio_int_debouce[port] &= ~port_mask;
+		gpio_int_debounce[port] &= ~port_mask;
 
-	__raw_writeb(gpio_int_debouce[port],
+	__raw_writeb(gpio_int_debounce[port],
 		EP93XX_GPIO_REG(int_debounce_register_offset[port]));
 }
 EXPORT_SYMBOL(ep93xx_gpio_int_debounce);
-- 
cgit v0.10.2


From 4070243250dd06a96d874b0be3cb3a39ef23597c Mon Sep 17 00:00:00 2001
From: Hartley Sweeten <hartleys@visionengravers.com>
Date: Thu, 28 May 2009 20:07:03 +0100
Subject: [ARM] 5528/1: ep93xx: add defines for dma clock magic numbers

Update the dma clocks so that the magic numbers are named.

All the dma clocks have an enable bit to turn them on/off as
needed.  Currently these bits are in the code as "magic"
numbers.  This changes all of them to named defines to
improve code readability.

Also, the EP93XX_SYSCON_CLOCK_CONTROL register is improperly
named.  In the EP93xx User's Guide this register is called
PwrCnt (Power Control).  All of the uses of this register
are associated with the clock support so this patch also
modifies the names to match the User's Guide.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index 29f36b4..209a465 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -37,58 +37,58 @@ static struct clk clk_h;
 static struct clk clk_p;
 static struct clk clk_pll2;
 static struct clk clk_usb_host = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= EP93XX_SYSCON_CLOCK_USH_EN,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_USH_EN,
 };
 
 /* DMA Clocks */
 static struct clk clk_m2p0 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00020000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P0,
 };
 static struct clk clk_m2p1 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00010000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P1,
 };
 static struct clk clk_m2p2 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00080000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P2,
 };
 static struct clk clk_m2p3 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00040000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P3,
 };
 static struct clk clk_m2p4 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00200000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P4,
 };
 static struct clk clk_m2p5 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00100000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P5,
 };
 static struct clk clk_m2p6 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00800000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P6,
 };
 static struct clk clk_m2p7 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x00400000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P7,
 };
 static struct clk clk_m2p8 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x02000000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P8,
 };
 static struct clk clk_m2p9 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x01000000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2P9,
 };
 static struct clk clk_m2m0 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x04000000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2M0,
 };
 static struct clk clk_m2m1 = {
-	.enable_reg	= EP93XX_SYSCON_CLOCK_CONTROL,
-	.enable_mask	= 0x08000000,
+	.enable_reg	= EP93XX_SYSCON_PWRCNT,
+	.enable_mask	= EP93XX_SYSCON_PWRCNT_DMA_M2M1,
 };
 
 #define INIT_CK(dev,con,ck)					\
diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
index 78ac1bd..ab73889 100644
--- a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
+++ b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h
@@ -152,9 +152,22 @@
 #define EP93XX_SYSCON_BASE		(EP93XX_APB_VIRT_BASE + 0x00130000)
 #define EP93XX_SYSCON_REG(x)		(EP93XX_SYSCON_BASE + (x))
 #define EP93XX_SYSCON_POWER_STATE	EP93XX_SYSCON_REG(0x00)
-#define EP93XX_SYSCON_CLOCK_CONTROL	EP93XX_SYSCON_REG(0x04)
-#define EP93XX_SYSCON_CLOCK_UARTBAUD	0x20000000
-#define EP93XX_SYSCON_CLOCK_USH_EN	0x10000000
+#define EP93XX_SYSCON_PWRCNT		EP93XX_SYSCON_REG(0x04)
+#define EP93XX_SYSCON_PWRCNT_FIR_EN	(1<<31)
+#define EP93XX_SYSCON_PWRCNT_UARTBAUD	(1<<29)
+#define EP93XX_SYSCON_PWRCNT_USH_EN	(1<<28)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2M1	(1<<27)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2M0	(1<<26)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P8	(1<<25)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P9	(1<<24)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P6	(1<<23)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P7	(1<<22)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P4	(1<<21)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P5	(1<<20)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P2	(1<<19)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P3	(1<<18)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P0	(1<<17)
+#define EP93XX_SYSCON_PWRCNT_DMA_M2P1	(1<<16)
 #define EP93XX_SYSCON_HALT		EP93XX_SYSCON_REG(0x08)
 #define EP93XX_SYSCON_STANDBY		EP93XX_SYSCON_REG(0x0c)
 #define EP93XX_SYSCON_CLOCK_SET1	EP93XX_SYSCON_REG(0x20)
-- 
cgit v0.10.2


From 6953e47e11ea85c8c91ac49a5c779e0a4a143e47 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Mon, 25 May 2009 08:02:35 +0100
Subject: [ARM] 5524/1: at91sam9g20ek: add i2c eeprom info

Add board information about on-board I2C eeprom AT24C512N at 0x50.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index 438efbb..cc270be 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -218,6 +218,13 @@ static struct gpio_led ek_leds[] = {
 	}
 };
 
+static struct i2c_board_info __initdata ek_i2c_devices[] = {
+	{
+		I2C_BOARD_INFO("24c512", 0x50),
+	},
+};
+
+
 static void __init ek_board_init(void)
 {
 	/* Serial */
@@ -235,7 +242,7 @@ static void __init ek_board_init(void)
 	/* MMC */
 	at91_add_device_mmc(0, &ek_mmc_data);
 	/* I2C */
-	at91_add_device_i2c(NULL, 0);
+	at91_add_device_i2c(ek_i2c_devices, ARRAY_SIZE(ek_i2c_devices));
 	/* LEDs */
 	at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
 	/* PCK0 provides MCLK to the WM8731 */
-- 
cgit v0.10.2


From 6d36110e1ce5663705d765ea38dd0f705e9ae6bc Mon Sep 17 00:00:00 2001
From: Sergey Lapin <slapin@ossfans.org>
Date: Mon, 18 May 2009 22:30:10 +0100
Subject: [ARM] 5525/1: AFEB9260: fix for MMC support

Proper pin configuration for MMC.

Signed-off-by: Sergey Lapin <slapin@ossfans.org>
Acked-by: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index e3f1c56..970fd6b 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -156,6 +156,8 @@ static struct atmel_nand_data __initdata afeb9260_nand_data = {
  * MCI (SD/MMC)
  */
 static struct at91_mmc_data __initdata afeb9260_mmc_data = {
+	.det_pin 	= AT91_PIN_PC9,
+	.wp_pin 	= AT91_PIN_PC4,
 	.slot_b		= 1,
 	.wire4		= 1,
 };
-- 
cgit v0.10.2


From 6373fffc5d555caf3acf7c5796cec9820aaf7479 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 May 2009 16:12:02 -0700
Subject: sparc64: Fix section attribute warnings.

CSUM copy to/from user assembler was missing allocatable and
executable attributes for .fixup

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/lib/csum_copy_from_user.S b/arch/sparc/lib/csum_copy_from_user.S
index a22eddb..e0304e6 100644
--- a/arch/sparc/lib/csum_copy_from_user.S
+++ b/arch/sparc/lib/csum_copy_from_user.S
@@ -5,7 +5,7 @@
 
 #define EX_LD(x)		\
 98:	x;			\
-	.section .fixup;	\
+	.section .fixup, "ax";	\
 	.align 4;		\
 99:	retl;			\
 	 mov	-1, %o0;	\
diff --git a/arch/sparc/lib/csum_copy_to_user.S b/arch/sparc/lib/csum_copy_to_user.S
index d5b12f4..afd01ac 100644
--- a/arch/sparc/lib/csum_copy_to_user.S
+++ b/arch/sparc/lib/csum_copy_to_user.S
@@ -5,7 +5,7 @@
 
 #define EX_ST(x)		\
 98:	x;			\
-	.section .fixup;	\
+	.section .fixup,"ax";	\
 	.align 4;		\
 99:	retl;			\
 	 mov	-1, %o0;	\
-- 
cgit v0.10.2


From 34d531e640cb805973cf656b15c716b961565cea Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 26 May 2009 15:11:06 -0400
Subject: ACPI: sanity check _PSS frequency to prevent cpufreq crash

When BIOS SETUP is changed to disable EIST, some BIOS
hand the OS an un-initialized _PSS:

        Name (_PSS, Package (0x06)
        {
            Package (0x06)
            {
                0x80000000,	// frequency [MHz]
                0x80000000,	// power [mW]
                0x80000000,	// latency [us]
                0x80000000,	// BM latency [us]
                0x80000000,	// control
                0x80000000	// status
            },
	    ...

These are outrageous values for frequency,
power and latency, raising the question where to draw
the line between legal and illegal.  We tend to survive
garbage in the power and latency fields, but we can BUG_ON
when garbage is in the frequency field.

Cpufreq multiplies the frequency by 1000 and stores it in a u32 KHz.
So disregard a _PSS with a frequency so large
that it can't be represented by cpufreq.

https://bugzilla.redhat.com/show_bug.cgi?id=500311

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index cafb410..60e543d 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -309,9 +309,15 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 				  (u32) px->bus_master_latency,
 				  (u32) px->control, (u32) px->status));
 
-		if (!px->core_frequency) {
-			printk(KERN_ERR PREFIX
-				    "Invalid _PSS data: freq is zero\n");
+		/*
+ 		 * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq
+		 */
+		if (!px->core_frequency ||
+		    ((u32)(px->core_frequency * 1000) !=
+		     (px->core_frequency * 1000))) {
+			printk(KERN_ERR FW_BUG PREFIX
+			       "Invalid BIOS _PSS frequency: 0x%llx MHz\n",
+			       px->core_frequency);
 			result = -EFAULT;
 			kfree(pr->performance->states);
 			goto end;
-- 
cgit v0.10.2


From 34ac272b3aaef11a91e19a72f3ac5772a96ffbc5 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 26 May 2009 23:35:34 -0400
Subject: ACPI: video: DMI workaround broken eMachines E510 BIOS enabling
 display brightness

http://bugzilla.kernel.org/show_bug.cgi?id=13376

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 810cca9..ee45e76 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -570,6 +570,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710Z"),
 		},
 	},
+	{
+	 .callback = video_set_bqc_offset,
+	 .ident = "eMachines E510",
+	 .matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "EMACHINES"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "eMachines E510"),
+		},
+	},
 	{}
 };
 
-- 
cgit v0.10.2


From 93bcece20ef87c29548ec7e66532f1018572cea0 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 19 May 2009 15:08:41 -0400
Subject: ACPI: video: DMI workaround broken Acer 5315 BIOS enabling display
 brightness

http://bugzilla.kernel.org/show_bug.cgi?id=13121

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ee45e76..6d897bb 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -578,6 +578,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "eMachines E510"),
 		},
 	},
+	{
+	 .callback = video_set_bqc_offset,
+	 .ident = "Acer Aspire 5315",
+	 .matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5315"),
+		},
+	},
 	{}
 };
 
-- 
cgit v0.10.2


From 1fc8d33acafe68bdcc21b327d22ef3820b819727 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Wed, 20 May 2009 11:56:08 +0530
Subject: drm/i915: acpi/video.c fix section mismatch warning

Currently acpi_video_exit() is exported as well as using __exit which causes:

  WARNING: drivers/acpi/video.o(__ksymtab+0x0): Section mismatch in reference from the variable __ksymtab_acpi_video_exit to the function .exit.text:acpi_video_exit()
  The symbol acpi_video_exit is exported and annotated __exit
  Fix this by removing the __exit annotation of acpi_video_exit or drop the export.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 810cca9..a79b885 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -2334,7 +2334,7 @@ static int __init acpi_video_init(void)
 	return acpi_video_register();
 }
 
-void __exit acpi_video_exit(void)
+void acpi_video_exit(void)
 {
 
 	acpi_bus_unregister_driver(&acpi_video_bus);
-- 
cgit v0.10.2


From 21671b88be331fb9c95891d5ee7d2e940e6b024c Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Fri, 22 May 2009 10:23:40 +0200
Subject: ACPI processor: remove spurious newline from warning message

Commit 4973b22a ("ACPI processor: reset the throttling state once it's
invalid") introduced a new warning which prints a spurious newline.

The ACPI_WARNING macro that is used already takes care of adding a
newline, after adding ACPI_CA_VERSION to the message. Remove the newline
to avoid the message getting split into two lines.

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 7f16f5f..2275437 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -840,7 +840,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
 		state = acpi_get_throttling_state(pr, value);
 		if (state == -1) {
 			ACPI_WARNING((AE_INFO,
-				"Invalid throttling state, reset\n"));
+				"Invalid throttling state, reset"));
 			state = 0;
 			ret = acpi_processor_set_throttling(pr, state);
 			if (ret)
-- 
cgit v0.10.2


From 61c8c67e3ad67ea1d1360f2e88688bd942834756 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 26 May 2009 14:58:39 -0700
Subject: acpi-cpufreq: fix printk typo and indentation

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 208ecf6..54b6de2 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -693,8 +693,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
 	    policy->cpuinfo.transition_latency > 20 * 1000) {
 		policy->cpuinfo.transition_latency = 20 * 1000;
-			printk_once(KERN_INFO "Capping off P-state tranision"
-				    " latency at 20 uS\n");
+		printk_once(KERN_INFO
+			    "P-state transition latency capped at 20 uS\n");
 	}
 
 	/* table init */
-- 
cgit v0.10.2


From 31db5645bda24682dadbc97d5e8a7918ade2a298 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 29 May 2009 21:11:27 -0400
Subject: ACPI, i915: build fix (v2)

drivers/built-in.o: In function `intel_opregion_init':
(.text+0x9d540): undefined reference to `acpi_video_register'

v2: move under DRM_I915 from DRM_I915_KMS

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 4cd35d8..f5d46e7 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -67,12 +67,18 @@ config DRM_I830
 	  will load the correct one.
 
 config DRM_I915
+	tristate "i915 driver"
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
 	select FB
 	select FRAMEBUFFER_CONSOLE if !EMBEDDED
-	tristate "i915 driver"
+	# i915 depends on ACPI_VIDEO when ACPI is enabled
+	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
+	select VIDEO_OUTPUT_CONTROL if ACPI
+	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select INPUT if ACPI
+	select ACPI_VIDEO if ACPI
 	help
 	  Choose this option if you have a system that has Intel 830M, 845G,
 	  852GM, 855GM 865G or 915G integrated graphics.  If M is selected, the
@@ -84,12 +90,6 @@ config DRM_I915
 config DRM_I915_KMS
 	bool "Enable modesetting on intel by default"
 	depends on DRM_I915
-	# i915 KMS depends on ACPI_VIDEO when ACPI is enabled
-	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
-	select VIDEO_OUTPUT_CONTROL if ACPI
-	select BACKLIGHT_CLASS_DEVICE if ACPI
-	select INPUT if ACPI
-	select ACPI_VIDEO if ACPI
 	help
 	  Choose this option if you want kernel modesetting enabled by default,
 	  and you have a new enough userspace to support this. Running old
-- 
cgit v0.10.2


From a1f98849fdf2f2fef3ef1c260178cd5fc662b773 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Sun, 8 Mar 2009 22:34:45 -0400
Subject: [ARM] allow for alternative __copy_to_user/__clear_user
 implementations

This allows for optional alternative implementations of __copy_to_user
and __clear_user, with a possible runtime fallback to the standard
version when the alternative provides no gain over that standard
version. This is done by making the standard __copy_to_user into a weak
alias for the symbol __copy_to_user_std.  Same thing for __clear_user.

Those two functions are particularly good candidates to have alternative
implementations for, since they rely on the STRT instruction which has
lower performances than STM instructions on some CPU cores such as
the ARM1176 and Marvell Feroceon.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 7897464..0da9bc9 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -386,7 +386,9 @@ do {									\
 #ifdef CONFIG_MMU
 extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
 extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n);
 extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
+extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n);
 #else
 #define __copy_from_user(to,from,n)	(memcpy(to, (void __force *)from, n), 0)
 #define __copy_to_user(to,from,n)	(memcpy((void __force *)to, from, n), 0)
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 4d6bc71..844f567 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -18,7 +18,8 @@
  *          : sz   - number of bytes to clear
  * Returns  : number of bytes NOT cleared
  */
-ENTRY(__clear_user)
+ENTRY(__clear_user_std)
+WEAK(__clear_user)
 		stmfd	sp!, {r1, lr}
 		mov	r2, #0
 		cmp	r1, #4
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 22f968b..878820f 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -86,7 +86,8 @@
 
 	.text
 
-ENTRY(__copy_to_user)
+ENTRY(__copy_to_user_std)
+WEAK(__copy_to_user)
 
 #include "copy_template.S"
 
-- 
cgit v0.10.2


From 39ec58f3fea47c242724109cc1da999f74810bbc Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Mon, 9 Mar 2009 14:30:09 -0400
Subject: [ARM] alternative copy_to_user/clear_user implementation

This implements {copy_to,clear}_user() by faulting in the userland
pages and then using the regular kernel mem{cpy,set}() to copy the
data (while holding the page table lock).  This is a win if the regular
mem{cpy,set}() implementations are faster than the user copy functions,
which is the case e.g. on Feroceon, where 8-word STMs (which memcpy()
uses under the right conditions) give significantly higher memory write
throughput than a sequence of individual 32bit stores.

Here are numbers for page sized buffers on some Feroceon cores:

 - copy_to_user on Orion5x goes from 51 MB/s to 83 MB/s
 - clear_user on Orion5x goes from 89MB/s to 314MB/s
 - copy_to_user on Kirkwood goes from 240 MB/s to 356 MB/s
 - clear_user on Kirkwood goes from 367 MB/s to 1108 MB/s
 - copy_to_user on Disco-Duo goes from 248 MB/s to 398 MB/s
 - clear_user on Disco-Duo goes from 328 MB/s to 1741 MB/s

Because the setup cost is non negligible, this is worthwhile only if
the amount of data to copy is large enough.  The operation falls back
to the standard implementation when the amount of data is below a certain
threshold. This threshold was determined empirically, however some targets
could benefit from a lower runtime determined value for optimal results
eventually.

In the copy_from_user() case, this technique does not provide any
worthwhile performance gain due to the fact that any kind of read access
allocates the cache and subsequent 32bit loads are just as fast as the
equivalent 8-word LDM.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Tested-by: Martin Michlmayr <tbm@cyrius.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d02cdb..c63e65d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1085,6 +1085,22 @@ config ALIGNMENT_TRAP
 	  correct operation of some network protocols. With an IP-only
 	  configuration it is safe to say N, otherwise say Y.
 
+config UACCESS_WITH_MEMCPY
+	bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user() (EXPERIMENTAL)"
+	depends on MMU && EXPERIMENTAL
+	default y if CPU_FEROCEON
+	help
+	  Implement faster copy_to_user and clear_user methods for CPU
+	  cores where a 8-word STM instruction give significantly higher
+	  memory write throughput than a sequence of individual 32bit stores.
+
+	  A possible side effect is a slight increase in scheduling latency
+	  between threads sharing the same address space if they invoke
+	  such copy operations with large buffers.
+
+	  However, if the CPU data cache is using a write-allocate mode,
+	  this option is unlikely to provide any performance gain.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 866f84a..030ba72 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -29,6 +29,9 @@ else
 endif
 endif
 
+# using lib_ here won't override already available weak symbols
+obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
+
 lib-$(CONFIG_MMU) += $(mmu-y)
 
 ifeq ($(CONFIG_CPU_32v3),y)
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
new file mode 100644
index 0000000..bf987b4
--- /dev/null
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -0,0 +1,139 @@
+/*
+ *  linux/arch/arm/lib/uaccess_with_memcpy.c
+ *
+ *  Written by: Lennert Buytenhek and Nicolas Pitre
+ *  Copyright (C) 2009 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h> /* for in_atomic() */
+#include <asm/current.h>
+#include <asm/page.h>
+
+static int
+pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
+{
+	unsigned long addr = (unsigned long)_addr;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pgd = pgd_offset(current->mm, addr);
+	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
+		return 0;
+
+	pmd = pmd_offset(pgd, addr);
+	if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
+		return 0;
+
+	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
+	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
+	    !pte_write(*pte) || !pte_dirty(*pte))) {
+		pte_unmap_unlock(pte, ptl);
+		return 0;
+	}
+
+	*ptep = pte;
+	*ptlp = ptl;
+
+	return 1;
+}
+
+unsigned long
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	int atomic;
+
+	if (n < 1024)
+		return __copy_to_user_std(to, from, n);
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		memcpy((void *)to, from, n);
+		return 0;
+	}
+
+	/* the mmap semaphore is taken only if not in an atomic context */
+	atomic = in_atomic();
+
+	if (!atomic)
+		down_read(&current->mm->mmap_sem);
+	while (n) {
+		pte_t *pte;
+		spinlock_t *ptl;
+		int tocopy;
+
+		while (!pin_page_for_write(to, &pte, &ptl)) {
+			if (!atomic)
+				up_read(&current->mm->mmap_sem);
+			if (__put_user(0, (char __user *)to))
+				goto out;
+			if (!atomic)
+				down_read(&current->mm->mmap_sem);
+		}
+
+		tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1;
+		if (tocopy > n)
+			tocopy = n;
+
+		memcpy((void *)to, from, tocopy);
+		to += tocopy;
+		from += tocopy;
+		n -= tocopy;
+
+		pte_unmap_unlock(pte, ptl);
+	}
+	if (!atomic)
+		up_read(&current->mm->mmap_sem);
+
+out:
+	return n;
+}
+
+unsigned long __clear_user(void __user *addr, unsigned long n)
+{
+	if (n < 256)
+		return __clear_user_std(addr, n);
+
+	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+		memset((void *)addr, 0, n);
+		return 0;
+	}
+
+	down_read(&current->mm->mmap_sem);
+	while (n) {
+		pte_t *pte;
+		spinlock_t *ptl;
+		int tocopy;
+
+		while (!pin_page_for_write(addr, &pte, &ptl)) {
+			up_read(&current->mm->mmap_sem);
+			if (__put_user(0, (char __user *)addr))
+				goto out;
+			down_read(&current->mm->mmap_sem);
+		}
+
+		tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1;
+		if (tocopy > n)
+			tocopy = n;
+
+		memset((void *)addr, 0, tocopy);
+		addr += tocopy;
+		n -= tocopy;
+
+		pte_unmap_unlock(pte, ptl);
+	}
+	up_read(&current->mm->mmap_sem);
+
+out:
+	return n;
+}
-- 
cgit v0.10.2


From cb9dc92c0a1b76165c8c334402e27191084b2047 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 21 May 2009 22:17:17 -0400
Subject: [ARM] lower overhead with alternative copy_to_user for small copies

Because the alternate copy_to_user implementation has a higher setup cost
than the standard implementation, the size of the memory area to copy
is tested and the standard implementation invoked instead when that size
is too small.  Still, that test is made after the processor has preserved
a bunch of registers on the stack which have to be reloaded right away
needlessly in that case, causing a measurable performance regression
compared to plain usage of the standard implementation only.

To make the size test overhead negligible, let's factorize it out of
the alternate copy_to_user function where it is clear to the compiler
that no stack frame is needed.  Thanks to CONFIG_ARM_UNWIND allowing
for frame pointers to be disabled and tail call optimization to kick in,
the overhead in the small copy case becomes only 3 assembly instructions.

A similar trick is applied to clear_user as well.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index bf987b4..92838e7 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -49,14 +49,11 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
 	return 1;
 }
 
-unsigned long
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+static unsigned long noinline
+__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 {
 	int atomic;
 
-	if (n < 1024)
-		return __copy_to_user_std(to, from, n);
-
 	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
 		memcpy((void *)to, from, n);
 		return 0;
@@ -99,11 +96,24 @@ out:
 	return n;
 }
 
-unsigned long __clear_user(void __user *addr, unsigned long n)
+unsigned long
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	/*
+	 * This test is stubbed out of the main function above to keep
+	 * the overhead for small copies low by avoiding a large
+	 * register dump on the stack just to reload them right away.
+	 * With frame pointer disabled, tail call optimization kicks in
+	 * as well making this test almost invisible.
+	 */
+	if (n < 1024)
+		return __copy_to_user_std(to, from, n);
+	return __copy_to_user_memcpy(to, from, n);
+}
+	
+static unsigned long noinline
+__clear_user_memset(void __user *addr, unsigned long n)
 {
-	if (n < 256)
-		return __clear_user_std(addr, n);
-
 	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
 		memset((void *)addr, 0, n);
 		return 0;
@@ -137,3 +147,11 @@ unsigned long __clear_user(void __user *addr, unsigned long n)
 out:
 	return n;
 }
+
+unsigned long __clear_user(void __user *addr, unsigned long n)
+{
+	/* See rational for this in __copy_to_user() above. */
+	if (n < 256)
+		return __clear_user_std(addr, n);
+	return __clear_user_memset(addr, n);
+}
-- 
cgit v0.10.2


From c143dc903d7c0b15f5052e00b2c7de33a8b4299c Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 30 May 2009 06:43:49 +0200
Subject: block: fix an oops on BLKPREP_KILL

Doing a bit of torture testing, I ran across a BUG in the block
subsystem (at blk-core.c:2048): the test for if the request is queued.

It turns out the trigger was a BLKPREP_KILL coming out of the SCSI prep
function.  Currently for BLKPREP_KILL requests, we send them straight
into __blk_end_request_all() with an error, but they've never been
dequeued, so they trip the bug.  Fix this by starting requests before
killing them.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 8b3b74e..7ae83a1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1789,6 +1789,11 @@ struct request *blk_peek_request(struct request_queue *q)
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
+			/*
+			 * Mark this request as started so we don't trigger
+			 * any debug logic in the end I/O path.
+			 */
+			blk_start_request(rq);
 			__blk_end_request_all(rq, -EIO);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
-- 
cgit v0.10.2


From c626e3f5ca1d95ad2204d3128c26e7678714eb55 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 29 May 2009 21:55:50 -0400
Subject: [ARM] alternative copy_to_user: more precise fallback threshold

Previous size thresholds were guessed from various user space benchmarks
using a kernel with and without the alternative uaccess option.  This
is however not as precise as a kernel based test to measure the real
speed of each method.

This adds a simple test bench to show the time needed for each method.
With this, the optimal size treshold for the alternative implementation
can be determined with more confidence.  It appears that the optimal
threshold for both copy_to_user and clear_user is around 64 bytes. This
is not a surprise knowing that the memcpy and memset implementations
need at least 64 bytes to achieve maximum throughput.

One might suggest that such test be used to determine the optimal
threshold at run time instead, but results are near enough to 64 on
tested targets concerned by this alternative copy_to_user implementation,
so adding some overhead associated with a variable threshold is probably
not worth it for now.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 92838e7..6b967ff 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -106,7 +106,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 	 * With frame pointer disabled, tail call optimization kicks in
 	 * as well making this test almost invisible.
 	 */
-	if (n < 1024)
+	if (n < 64)
 		return __copy_to_user_std(to, from, n);
 	return __copy_to_user_memcpy(to, from, n);
 }
@@ -151,7 +151,78 @@ out:
 unsigned long __clear_user(void __user *addr, unsigned long n)
 {
 	/* See rational for this in __copy_to_user() above. */
-	if (n < 256)
+	if (n < 64)
 		return __clear_user_std(addr, n);
 	return __clear_user_memset(addr, n);
 }
+
+#if 0
+
+/*
+ * This code is disabled by default, but kept around in case the chosen
+ * thresholds need to be revalidated.  Some overhead (small but still)
+ * would be implied by a runtime determined variable threshold, and
+ * so far the measurement on concerned targets didn't show a worthwhile
+ * variation.
+ *
+ * Note that a fairly precise sched_clock() implementation is needed
+ * for results to make some sense.
+ */
+
+#include <linux/vmalloc.h>
+
+static int __init test_size_treshold(void)
+{
+	struct page *src_page, *dst_page;
+	void *user_ptr, *kernel_ptr;
+	unsigned long long t0, t1, t2;
+	int size, ret;
+
+	ret = -ENOMEM;
+	src_page = alloc_page(GFP_KERNEL);
+	if (!src_page)
+		goto no_src;
+	dst_page = alloc_page(GFP_KERNEL);
+	if (!dst_page)
+		goto no_dst;
+	kernel_ptr = page_address(src_page);
+	user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010));
+	if (!user_ptr)
+		goto no_vmap;
+
+	/* warm up the src page dcache */
+	ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE);
+
+	for (size = PAGE_SIZE; size >= 4; size /= 2) {
+		t0 = sched_clock();
+		ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size);
+		t1 = sched_clock();
+		ret |= __copy_to_user_std(user_ptr, kernel_ptr, size);
+		t2 = sched_clock();
+		printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
+	}
+
+	for (size = PAGE_SIZE; size >= 4; size /= 2) {
+		t0 = sched_clock();
+		ret |= __clear_user_memset(user_ptr, size);
+		t1 = sched_clock();
+		ret |= __clear_user_std(user_ptr, size);
+		t2 = sched_clock();
+		printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
+	}
+
+	if (ret)
+		ret = -EFAULT;
+
+	vunmap(user_ptr);
+no_vmap:
+	put_page(dst_page);
+no_dst:
+	put_page(src_page);
+no_src:
+	return ret;
+}
+
+subsys_initcall(test_size_treshold);
+
+#endif
-- 
cgit v0.10.2


From ba84bfcd2b6fdc5a9ac53a4ab103088c99f84a12 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sat, 30 May 2009 08:59:03 +0200
Subject: ALSA: hda - Fix reverted LED setup for HP

The commit 86d190e77c44cb057742dcc871b12ebd4633c387 reverted the bit
flip of LED GPIO for HP DX and DV4-1222nr.  Fixed now.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index a1b4c94..a915f40 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -4666,9 +4666,9 @@ static int stac92xx_hp_check_power_status(struct hda_codec *codec,
 	if (nid == 0x10) {
 		if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
 		    HDA_AMP_MUTE)
-			spec->gpio_data |= spec->gpio_led; /* white */
-		else
 			spec->gpio_data &= ~spec->gpio_led; /* orange */
+		else
+			spec->gpio_data |= spec->gpio_led; /* white */
 
 		stac_gpio_set(codec, spec->gpio_mask,
 			      spec->gpio_dir,
-- 
cgit v0.10.2


From c44613a4c1092e85841b78b7ab52a06654fcd321 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 29 May 2009 17:03:07 -0300
Subject: perf_counter tools: Add locking to perf top

perf_counter tools: Add locking to perf top

We need to protect the active_symbols list as two threads change it:
the main thread adding entries to the head and the display thread
decaying entries from any place in the list.

Also related: take a snapshot of syme->count[0] before using it to
calculate the weight and to show the same number used in this calc when
displaying the symbol usage.

Reported-by: Mike Galbraith <efault@gmx.de>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090529200307.GR4747@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index ebe8bec..24a8879 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -129,6 +129,8 @@ struct sym_entry {
 	struct rb_node		rb_node;
 	struct list_head	node;
 	unsigned long		count[MAX_COUNTERS];
+	unsigned long		snap_count;
+	double			weight;
 	int			skip;
 };
 
@@ -141,17 +143,16 @@ struct dso *kernel_dso;
  * after decayed.
  */
 static LIST_HEAD(active_symbols);
+static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
 
 /*
  * Ordering weight: count-1 * count-2 * ... / count-n
  */
 static double sym_weight(const struct sym_entry *sym)
 {
-	double weight;
+	double weight = sym->snap_count;
 	int counter;
 
-	weight = sym->count[0];
-
 	for (counter = 1; counter < nr_counters-1; counter++)
 		weight *= sym->count[counter];
 
@@ -164,11 +165,18 @@ static long			events;
 static long			userspace_events;
 static const char		CONSOLE_CLEAR[] = "[H[2J";
 
-static void list_insert_active_sym(struct sym_entry *syme)
+static void __list_insert_active_sym(struct sym_entry *syme)
 {
 	list_add(&syme->node, &active_symbols);
 }
 
+static void list_remove_active_sym(struct sym_entry *syme)
+{
+	pthread_mutex_lock(&active_symbols_lock);
+	list_del_init(&syme->node);
+	pthread_mutex_unlock(&active_symbols_lock);
+}
+
 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
 {
 	struct rb_node **p = &tree->rb_node;
@@ -179,7 +187,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
 		parent = *p;
 		iter = rb_entry(parent, struct sym_entry, rb_node);
 
-		if (sym_weight(se) > sym_weight(iter))
+		if (se->weight > iter->weight)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -203,15 +211,21 @@ static void print_sym_table(void)
 	events = userspace_events = 0;
 
 	/* Sort the active symbols */
-	list_for_each_entry_safe(syme, n, &active_symbols, node) {
-		if (syme->count[0] != 0) {
+	pthread_mutex_lock(&active_symbols_lock);
+	syme = list_entry(active_symbols.next, struct sym_entry, node);
+	pthread_mutex_unlock(&active_symbols_lock);
+
+	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+		syme->snap_count = syme->count[0];
+		if (syme->snap_count != 0) {
+			syme->weight = sym_weight(syme);
 			rb_insert_active_sym(&tmp, syme);
-			sum_kevents += syme->count[0];
+			sum_kevents += syme->snap_count;
 
 			for (j = 0; j < nr_counters; j++)
 				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
 		} else
-			list_del_init(&syme->node);
+			list_remove_active_sym(syme);
 	}
 
 	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
@@ -264,19 +278,18 @@ static void print_sym_table(void)
 		struct symbol *sym = (struct symbol *)(syme + 1);
 		float pcnt;
 
-		if (++printed > 18 || syme->count[0] < count_filter)
-			break;
+		if (++printed > 18 || syme->snap_count < count_filter)
+			continue;
 
-		pcnt = 100.0 - (100.0 * ((sum_kevents - syme->count[0]) /
+		pcnt = 100.0 - (100.0 * ((sum_kevents - syme->snap_count) /
 					 sum_kevents));
 
 		if (nr_counters == 1)
 			printf("%19.2f - %4.1f%% - %016llx : %s\n",
-				sym_weight(syme),
-				pcnt, sym->start, sym->name);
+				syme->weight, pcnt, sym->start, sym->name);
 		else
 			printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
-				sym_weight(syme), syme->count[0],
+				syme->weight, syme->snap_count,
 				pcnt, sym->start, sym->name);
 	}
 
@@ -395,8 +408,10 @@ static void record_ip(uint64_t ip, int counter)
 
 		if (!syme->skip) {
 			syme->count[counter]++;
+			pthread_mutex_lock(&active_symbols_lock);
 			if (list_empty(&syme->node) || !syme->node.next)
-				list_insert_active_sym(syme);
+				__list_insert_active_sym(syme);
+			pthread_mutex_unlock(&active_symbols_lock);
 			return;
 		}
 	}
-- 
cgit v0.10.2


From d7c29318c2daa96d64b7312afd8283488c1cb29f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 30 May 2009 12:38:51 +0200
Subject: perf_counter tools: Print 'CPU utilization factor' in builtin-stat

Before:

 Performance counter stats for '/home/mingo/hackbench':

    5728.862689  task clock ticks     (msecs)
          34426  context switches     #        0.006 M/sec
           3835  CPU migrations       #        0.001 M/sec
          18158  pagefaults           #        0.003 M/sec
    16218109156  CPU cycles           #     2830.947 M/sec
    13519616840  instructions         #     2359.913 M/sec
       55941661  cache references     #        9.765 M/sec
       23554938  cache misses         #        4.112 M/sec

 Wall-clock time elapsed:   528.886980 msecs

After:

 Performance counter stats for '/home/mingo/hackbench':

    5845.443541  task clock ticks     #      11.886 CPU utilization factor
          38289  context switches     #       0.007 M/sec
           4208  CPU migrations       #       0.001 M/sec
          17755  pagefaults           #       0.003 M/sec
    16664668576  CPU cycles           #    2850.882 M/sec
    13468113991  instructions         #    2304.036 M/sec
       57445468  cache references     #       9.827 M/sec
       26896502  cache misses         #       4.601 M/sec

 Wall-clock time elapsed:   491.802357 msecs

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index ef7e0e1..5886791 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -75,6 +75,7 @@ static __u64			event_res[MAX_COUNTERS][3];
 static __u64			event_scaled[MAX_COUNTERS];
 
 static __u64			runtime_nsecs;
+static __u64			walltime_nsecs;
 
 static void create_perfstat_counter(int counter)
 {
@@ -194,13 +195,19 @@ static void print_counter(int counter)
 	if (nsec_counter(counter)) {
 		double msecs = (double)count[0] / 1000000;
 
-		fprintf(stderr, " %14.6f  %-20s (msecs)",
+		fprintf(stderr, " %14.6f  %-20s",
 			msecs, event_name(counter));
+		if (event_id[counter] ==
+				EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
+
+			fprintf(stderr, " # %11.3f CPU utilization factor",
+				(double)count[0] / (double)walltime_nsecs);
+		}
 	} else {
 		fprintf(stderr, " %14Ld  %-20s",
 			count[0], event_name(counter));
 		if (runtime_nsecs)
-			fprintf(stderr, " # %12.3f M/sec",
+			fprintf(stderr, " # %11.3f M/sec",
 				(double)count[0]/runtime_nsecs*1000.0);
 	}
 	if (scaled)
@@ -241,6 +248,8 @@ static int do_perfstat(int argc, const char **argv)
 	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
 	t1 = rdclock();
 
+	walltime_nsecs = t1 - t0;
+
 	fflush(stdout);
 
 	fprintf(stderr, "\n");
-- 
cgit v0.10.2


From 7fbd55449aafb86d3237b5d1a26fb4dab2aa2c76 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 30 May 2009 12:38:51 +0200
Subject: perf_counter tools: Fix 'make install'

'make install' didnt install perf itself - which needs a special
rule to be copied to bindir.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index bd29a5c..8f72584 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -776,6 +776,7 @@ install: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
 ifneq (,$X)
 	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
 endif
-- 
cgit v0.10.2


From c1c2365acf8c044f749c0fe1ea236497e8d1718e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 30 May 2009 12:38:51 +0200
Subject: perf_counter tools: Generate per command manpages (and pdf/html,
 etc.)

Import Git's nice .txt => {man/html/pdf} generation machinery.

Fix various errors in the Documentation/perf*.txt description as well.

Also fix a bug in builtin-help: we'd map 'perf help top' to 'perftop'
if only the 'perf' binary is in the default PATH - confusing the manpage
logic. I dont fully understand why Git did it this way - but i suppose
it's a migration artifact from their migration from standalone git-xyz
commands to 'git xyz' commands. The perf tools were always using the
modern form so it's not an issue there.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/Makefile b/Documentation/perf_counter/Documentation/Makefile
new file mode 100644
index 0000000..5457192
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/Makefile
@@ -0,0 +1,300 @@
+MAN1_TXT= \
+	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
+		$(wildcard perf-*.txt)) \
+	perf.txt
+MAN5_TXT=
+MAN7_TXT=
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
+MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+
+DOC_HTML=$(MAN_HTML)
+
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
+SP_ARTICLES += $(API_DOCS)
+SP_ARTICLES += technical/api-index
+
+DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+
+DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+prefix?=$(HOME)
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/perf-doc
+pdfdir?=$(prefix)/share/doc/perf-doc
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+man7dir=$(mandir)/man7
+# DESTDIR=
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA =
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+DOC_REF = origin/man
+HTML_REF = origin/html
+
+infodir?=$(prefix)/share/info
+MAKEINFO=makeinfo
+INSTALL_INFO=install-info
+DOCBOOK2X_TEXI=docbook2x-texi
+DBLATEX=dblatex
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+-include ../config.mak.autogen
+-include ../config.mak
+
+#
+# For asciidoc ...
+#	-7.1.2,	no extra settings are needed.
+#	8.0-,	set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+#	1.69.0,		no extra settings are needed?
+#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
+#	1.71.1,		no extra settings are needed?
+#	1.72.0,		set DOCBOOK_XSL_172.
+#	1.73.0-,	set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+	ifdef ASCIIDOC_NO_ROFF
+	# docbook-xsl after 1.72 needs the regular XSL, but will not
+	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
+	ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+	endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+#   http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_ASCIIDOC	= @echo '   ' ASCIIDOC $@;
+	QUIET_XMLTO	= @echo '   ' XMLTO $@;
+	QUIET_DB2TEXI	= @echo '   ' DB2TEXI $@;
+	QUIET_MAKEINFO	= @echo '   ' MAKEINFO $@;
+	QUIET_DBLATEX	= @echo '   ' DBLATEX $@;
+	QUIET_XSLTPROC	= @echo '   ' XSLTPROC $@;
+	QUIET_GEN	= @echo '   ' GEN $@;
+	QUIET_STDERR	= 2> /dev/null
+	QUIET_SUBDIR0	= +@subdir=
+	QUIET_SUBDIR1	= ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			  $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+endif
+endif
+
+all: html man
+
+html: $(DOC_HTML)
+
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
+
+man: man1 man5 man7
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+man7: $(DOC_MAN7)
+
+info: perf.info perfman.info
+
+pdf: user-manual.pdf
+
+install: install-man
+
+install-man: man
+	$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
+#	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
+#	$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+	$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
+#	$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
+#	$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-info: info
+	$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
+	$(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
+	if test -r $(DESTDIR)$(infodir)/dir; then \
+	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+	else \
+	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
+	fi
+
+install-pdf: pdf
+	$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
+	$(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
+
+install-html: html
+	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+
+../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
+
+-include ../PERF-VERSION-FILE
+
+#
+# Determine "include::" file references in asciidoc files.
+#
+doc.dep : $(wildcard *.txt) build-docdep.perl
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+	mv $@+ $@
+
+-include doc.dep
+
+cmds_txt = cmds-ancillaryinterrogators.txt \
+	cmds-ancillarymanipulators.txt \
+	cmds-mainporcelain.txt \
+	cmds-plumbinginterrogators.txt \
+	cmds-plumbingmanipulators.txt \
+	cmds-synchingrepositories.txt \
+	cmds-synchelpers.txt \
+	cmds-purehelpers.txt \
+	cmds-foreignscminterface.txt
+
+$(cmds_txt): cmd-list.made
+
+cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+	$(QUIET_GEN)$(RM) $@ && \
+	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
+	date >$@
+
+clean:
+	$(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
+	$(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
+	$(RM) howto-index.txt howto/*.html doc.dep
+	$(RM) technical/api-*.html technical/api-index.txt
+	$(RM) $(cmds_txt) *.made
+
+$(MAN_HTML): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+%.1 %.5 %.7 : %.xml
+	$(QUIET_XMLTO)$(RM) $@ && \
+	xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.xml : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+XSLT = docbook.xsl
+XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
+
+user-manual.html: user-manual.xml
+	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
+
+perf.info: user-manual.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
+
+user-manual.texi: user-manual.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+user-manual.pdf: user-manual.xml
+	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
+	$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
+	mv $@+ $@
+
+perfman.texi: $(MAN_XML) cat-texi.perl
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
+		--to-stdout $(xml) &&) true) > $@++ && \
+	$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+perfman.info: perfman.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+
+$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+	mv $@+ $@
+
+howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
+	mv $@+ $@
+
+$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+
+WEBDOC_DEST = /pub/software/tools/perf/docs
+
+$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+	mv $@+ $@
+
+install-webdoc : html
+	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+
+quick-install: quick-install-man
+
+quick-install-man:
+	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+
+quick-install-html:
+	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
+
+.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/Documentation/perf_counter/Documentation/asciidoc.conf b/Documentation/perf_counter/Documentation/asciidoc.conf
new file mode 100644
index 0000000..356b23a
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/asciidoc.conf
@@ -0,0 +1,91 @@
+## linkperf: macro
+#
+# Usage: linkperf:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show PERF link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linkperf-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::perf-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::perf-asciidoc-no-roff[]
+
+ifdef::perf-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::perf-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">perf</refmiscinfo>
+<refmiscinfo class="version">{perf_version}</refmiscinfo>
+<refmiscinfo class="manual">perf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+  <refname>{manname}</refname>
+  <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linkperf-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/Documentation/perf_counter/Documentation/manpage-1.72.xsl b/Documentation/perf_counter/Documentation/manpage-1.72.xsl
new file mode 100644
index 0000000..b4d315c
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+     needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-base.xsl b/Documentation/perf_counter/Documentation/manpage-base.xsl
new file mode 100644
index 0000000..a264fa61
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+     git.docbook.backslash and git.docbook.dot params
+     must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB(',
+			      substring-after(@id,'-'),')',
+			      $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>sp&#10;</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB',
+			      substring-after(@arearefs,'-'),
+			      '. ',$git.docbook.backslash,'fR')"/>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-bold-literal.xsl b/Documentation/perf_counter/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 0000000..608eb5d
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+     this makes literal text easier to distinguish in manpages
+     viewed on a tty -->
+<xsl:template match="literal">
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fB</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-normal.xsl b/Documentation/perf_counter/Documentation/manpage-normal.xsl
new file mode 100644
index 0000000..a48f5b1
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot"	>.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-suppress-sp.xsl b/Documentation/perf_counter/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 0000000..a63c763
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles erroneous, inline .sp in manpage output of some
+     versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+     that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+  <xsl:variable name="content">
+    <xsl:apply-templates/>
+  </xsl:variable>
+  <xsl:value-of select="normalize-space($content)"/>
+  <xsl:if test="not(ancestor::authorblurb) and
+                not(ancestor::personblurb)">
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt
index a93d2ec..4d3416f 100644
--- a/Documentation/perf_counter/Documentation/perf-record.txt
+++ b/Documentation/perf_counter/Documentation/perf-record.txt
@@ -1,5 +1,5 @@
 perf-record(1)
-==========
+==============
 
 NAME
 ----
@@ -53,12 +53,6 @@ OPTIONS
 -l::
         scale counter values
 
-Configuration
--------------
-
-EXAMPLES
---------
-
 SEE ALSO
 --------
-linkperf:git-stat[1]
+linkperf:perf-stat[1]
diff --git a/Documentation/perf_counter/Documentation/perf-report.txt b/Documentation/perf_counter/Documentation/perf-report.txt
index 49efe16..52d3fc6 100644
--- a/Documentation/perf_counter/Documentation/perf-report.txt
+++ b/Documentation/perf_counter/Documentation/perf-report.txt
@@ -1,5 +1,5 @@
 perf-report(1)
-==========
+==============
 
 NAME
 ----
@@ -21,12 +21,6 @@ OPTIONS
 --input=::
         Input file name. (default: perf.data)
 
-Configuration
--------------
-
-EXAMPLES
---------
-
 SEE ALSO
 --------
 linkperf:perf-stat[1]
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
index 828c59ff..a67d0e3 100644
--- a/Documentation/perf_counter/Documentation/perf-stat.txt
+++ b/Documentation/perf_counter/Documentation/perf-stat.txt
@@ -51,9 +51,6 @@ OPTIONS
 -l::
         scale counter values
 
-Configuration
--------------
-
 EXAMPLES
 --------
 
@@ -74,4 +71,4 @@ $ perf stat sleep 1
 
 SEE ALSO
 --------
-linkperf:git-tops[1]
+linkperf:perf-tops[1]
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt
index 057333b..15251e4 100644
--- a/Documentation/perf_counter/Documentation/perf-top.txt
+++ b/Documentation/perf_counter/Documentation/perf-top.txt
@@ -50,12 +50,6 @@ OPTIONS
 -l::
         scale counter values
 
-Configuration
--------------
-
-EXAMPLES
---------
-
 SEE ALSO
 --------
-linkperf:git-stat[1]
+linkperf:perf-stat[1]
diff --git a/Documentation/perf_counter/Documentation/perf.txt b/Documentation/perf_counter/Documentation/perf.txt
new file mode 100644
index 0000000..e3d8b38
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf.txt
@@ -0,0 +1,23 @@
+perf(1)
+=======
+
+NAME
+----
+perf - Performance analysis tools for Linux
+
+SYNOPSIS
+--------
+[verse]
+'perf' [--version] [--help] COMMAND [ARGS]
+
+DESCRIPTION
+-----------
+Performance counters for Linux are are a new kernel-based subsystem
+that provide a framework for all things performance analysis. It
+covers hardware level (CPU/PMU, Performance Monitoring Unit) features
+and software features (software counters, tracepoints) as well.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 8f72584..416ab11 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -693,6 +693,21 @@ builtin-revert.o wt-status.o: wt-status.h
 $(LIB_FILE): $(LIB_OBJS)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
 
+doc:
+	$(MAKE) -C Documentation all
+
+man:
+	$(MAKE) -C Documentation man
+
+html:
+	$(MAKE) -C Documentation html
+
+info:
+	$(MAKE) -C Documentation info
+
+pdf:
+	$(MAKE) -C Documentation pdf
+
 TAGS:
 	$(RM) TAGS
 	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
@@ -781,6 +796,31 @@ ifneq (,$X)
 	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
 endif
 
+install-doc:
+	$(MAKE) -C Documentation install
+
+install-man:
+	$(MAKE) -C Documentation install-man
+
+install-html:
+	$(MAKE) -C Documentation install-html
+
+install-info:
+	$(MAKE) -C Documentation install-info
+
+install-pdf:
+	$(MAKE) -C Documentation install-pdf
+
+quick-install-doc:
+	$(MAKE) -C Documentation quick-install
+
+quick-install-man:
+	$(MAKE) -C Documentation quick-install-man
+
+quick-install-html:
+	$(MAKE) -C Documentation quick-install-html
+
+
 ### Maintainer's dist rules
 
 perf.spec: perf.spec.in
@@ -801,6 +841,26 @@ dist: perf.spec perf-archive$(X) configure
 	@$(RM) -r $(PERF_TARNAME)
 	gzip -f -9 $(PERF_TARNAME).tar
 
+htmldocs = perf-htmldocs-$(PERF_VERSION)
+manpages = perf-manpages-$(PERF_VERSION)
+dist-doc:
+	$(RM) -r .doc-tmp-dir
+	mkdir .doc-tmp-dir
+	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
+	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
+	gzip -n -9 -f $(htmldocs).tar
+	:
+	$(RM) -r .doc-tmp-dir
+	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
+	$(MAKE) -C Documentation DESTDIR=./ \
+		man1dir=../.doc-tmp-dir/man1 \
+		man5dir=../.doc-tmp-dir/man5 \
+		man7dir=../.doc-tmp-dir/man7 \
+		install
+	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
+	gzip -n -9 -f $(manpages).tar
+	$(RM) -r .doc-tmp-dir
+
 rpm: dist
 	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
 
@@ -819,6 +879,7 @@ clean:
 	$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
 	$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
 	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
+	$(MAKE) -C Documentation/ clean
 	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
 
 .PHONY: all install clean strip
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
index d2bd317..a3894bf 100644
--- a/Documentation/perf_counter/builtin-help.c
+++ b/Documentation/perf_counter/builtin-help.c
@@ -317,7 +317,7 @@ static const char *cmd_to_page(const char *perf_cmd)
 	else if (is_perf_command(perf_cmd))
 		return prepend("perf-", perf_cmd);
 	else
-		return prepend("perf", perf_cmd);
+		return prepend("perf-", perf_cmd);
 }
 
 static void setup_man_path(void)
-- 
cgit v0.10.2


From 412b400278ed0f71712387d0b9b31cbdd8a18efc Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 13:56:12 +0100
Subject: RealView: Allow SMP when the Cortex-A9 tile is enabled for EB

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d02cdb..a8d88af 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -859,7 +859,7 @@ source "kernel/time/Kconfig"
 
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP)
+	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || MACH_REALVIEW_PB11MP)
 	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
-- 
cgit v0.10.2


From 1b504bbe7a4a6c251cdc9dcba1fab72234827945 Mon Sep 17 00:00:00 2001
From: Colin Tuckley <colin.tuckley@arm.com>
Date: Sat, 30 May 2009 13:56:12 +0100
Subject: RealView: Add support for the RealView/PBX platform

This is a RealView platform supporting core tiles with ARM11MPCore,
Cortex-A8 or Cortex-A9 (multicore) processors. It has support for MMC,
CompactFlash, PCI-E.

Signed-off-by: Colin Tuckley <colin.tuckley@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a8d88af..cb7b388 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -859,7 +859,7 @@ source "kernel/time/Kconfig"
 
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || MACH_REALVIEW_PB11MP)
+	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX)
 	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
@@ -916,7 +916,7 @@ config HOTPLUG_CPU
 
 config LOCAL_TIMERS
 	bool "Use local timer interrupts"
-	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || REALVIEW_EB_A9MP)
+	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || REALVIEW_EB_A9MP || MACH_REALVIEW_PBX)
 	default y
 	help
 	  Enable support for local timers on SMP platforms, rather then the
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 64f2252..cdb9022 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -24,6 +24,8 @@
 #define L2X0_CACHE_TYPE			0x004
 #define L2X0_CTRL			0x100
 #define L2X0_AUX_CTRL			0x104
+#define L2X0_TAG_LATENCY_CTRL		0x108
+#define L2X0_DATA_LATENCY_CTRL		0x10C
 #define L2X0_EVENT_CNT_CTRL		0x200
 #define L2X0_EVENT_CNT1_CFG		0x204
 #define L2X0_EVENT_CNT0_CFG		0x208
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index b6ec106..368b58a 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -58,6 +58,13 @@ config MACH_REALVIEW_PBA8
 	  PB-A8 is a platform with an on-board Cortex-A8 and has support for
 	  PCI-E and Compact Flash.
 
+config MACH_REALVIEW_PBX
+	bool "Support RealView/PBX platform"
+	select ARM_GIC
+	select HAVE_PATA_PLATFORM
+	help
+	  Include support for the ARM(R) RealView PBX platform.
+
 config REALVIEW_HIGH_PHYS_OFFSET
 	bool "High physical base address for the RealView platform"
 	depends on !MACH_REALVIEW_PB1176
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
index 7bea8ff..7df1931 100644
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_MACH_REALVIEW_EB)		+= realview_eb.o
 obj-$(CONFIG_MACH_REALVIEW_PB11MP)	+= realview_pb11mp.o
 obj-$(CONFIG_MACH_REALVIEW_PB1176)	+= realview_pb1176.o
 obj-$(CONFIG_MACH_REALVIEW_PBA8)	+= realview_pba8.o
+obj-$(CONFIG_MACH_REALVIEW_PBX)		+= realview_pbx.o
 obj-$(CONFIG_SMP)			+= platsmp.o headsmp.o localtimer.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-realview/include/mach/board-pbx.h b/arch/arm/mach-realview/include/mach/board-pbx.h
new file mode 100644
index 0000000..c265093
--- /dev/null
+++ b/arch/arm/mach-realview/include/mach/board-pbx.h
@@ -0,0 +1,203 @@
+/*
+ * arch/arm/mach-realview/include/mach/board-pbx.h
+ *
+ * Copyright (C) 2009 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARCH_BOARD_PBX_H
+#define __ASM_ARCH_BOARD_PBX_H
+
+#include <mach/platform.h>
+
+/*
+ * Peripheral addresses
+ */
+#define REALVIEW_PBX_UART0_BASE			0x10009000	/* UART 0 */
+#define REALVIEW_PBX_UART1_BASE			0x1000A000	/* UART 1 */
+#define REALVIEW_PBX_UART2_BASE			0x1000B000	/* UART 2 */
+#define REALVIEW_PBX_UART3_BASE			0x1000C000	/* UART 3 */
+#define REALVIEW_PBX_SSP_BASE			0x1000D000	/* Synchronous Serial Port */
+#define REALVIEW_PBX_WATCHDOG0_BASE		0x1000F000	/* Watchdog 0 */
+#define REALVIEW_PBX_WATCHDOG_BASE		0x10010000	/* watchdog interface */
+#define REALVIEW_PBX_TIMER0_1_BASE		0x10011000	/* Timer 0 and 1 */
+#define REALVIEW_PBX_TIMER2_3_BASE		0x10012000	/* Timer 2 and 3 */
+#define REALVIEW_PBX_GPIO0_BASE			0x10013000	/* GPIO port 0 */
+#define REALVIEW_PBX_RTC_BASE			0x10017000	/* Real Time Clock */
+#define REALVIEW_PBX_TIMER4_5_BASE		0x10018000	/* Timer 4/5 */
+#define REALVIEW_PBX_TIMER6_7_BASE		0x10019000	/* Timer 6/7 */
+#define REALVIEW_PBX_SCTL_BASE			0x1001A000	/* System Controller */
+#define REALVIEW_PBX_CLCD_BASE			0x10020000	/* CLCD */
+#define REALVIEW_PBX_ONB_SRAM_BASE		0x10060000	/* On-board SRAM */
+#define REALVIEW_PBX_DMC_BASE			0x100E0000	/* DMC configuration */
+#define REALVIEW_PBX_SMC_BASE			0x100E1000	/* SMC configuration */
+#define REALVIEW_PBX_CAN_BASE			0x100E2000	/* CAN bus */
+#define REALVIEW_PBX_GIC_CPU_BASE		0x1E000000	/* Generic interrupt controller CPU interface */
+#define REALVIEW_PBX_FLASH0_BASE		0x40000000
+#define REALVIEW_PBX_FLASH0_SIZE		SZ_64M
+#define REALVIEW_PBX_FLASH1_BASE		0x44000000
+#define REALVIEW_PBX_FLASH1_SIZE		SZ_64M
+#define REALVIEW_PBX_ETH_BASE			0x4E000000	/* Ethernet */
+#define REALVIEW_PBX_USB_BASE			0x4F000000	/* USB */
+#define REALVIEW_PBX_GIC_DIST_BASE		0x1E001000	/* Generic interrupt controller distributor */
+#define REALVIEW_PBX_LT_BASE			0xC0000000	/* Logic Tile expansion */
+#define REALVIEW_PBX_SDRAM6_BASE		0x70000000	/* SDRAM bank 6 256MB */
+#define REALVIEW_PBX_SDRAM7_BASE		0x80000000	/* SDRAM bank 7 256MB */
+
+/*
+ * Tile-specific addresses
+ */
+#define REALVIEW_PBX_TILE_SCU_BASE		0x1F000000      /* SCU registers */
+#define REALVIEW_PBX_TILE_GIC_CPU_BASE		0x1F000100      /* Private Generic interrupt controller CPU interface */
+#define REALVIEW_PBX_TILE_TWD_BASE		0x1F000600
+#define REALVIEW_PBX_TILE_TWD_PERCPU_BASE	0x1F000700
+#define REALVIEW_PBX_TILE_TWD_SIZE		0x00000100
+#define REALVIEW_PBX_TILE_GIC_DIST_BASE		0x1F001000      /* Private Generic interrupt controller distributor */
+#define REALVIEW_PBX_TILE_L220_BASE		0x1F002000      /* L220 registers */
+
+#define REALVIEW_PBX_SYS_PLD_CTRL1		0x74
+
+/*
+ * PBX PCI regions
+ */
+#define REALVIEW_PBX_PCI_BASE			0x90040000	/* PCI-X Unit base */
+#define REALVIEW_PBX_PCI_IO_BASE		0x90050000	/* IO Region on AHB */
+#define REALVIEW_PBX_PCI_MEM_BASE		0xA0000000	/* MEM Region on AHB */
+
+#define REALVIEW_PBX_PCI_BASE_SIZE		0x10000		/* 16 Kb */
+#define REALVIEW_PBX_PCI_IO_SIZE		0x1000		/* 4 Kb */
+#define REALVIEW_PBX_PCI_MEM_SIZE		0x20000000	/* 512 MB */
+
+/*
+ * Irqs
+ */
+#define IRQ_PBX_GIC_START			32
+
+/* L220
+#define IRQ_PBX_L220_EVENT	(IRQ_PBX_GIC_START + 29)
+#define IRQ_PBX_L220_SLAVE	(IRQ_PBX_GIC_START + 30)
+#define IRQ_PBX_L220_DECODE	(IRQ_PBX_GIC_START + 31)
+*/
+
+/*
+ * PB-X on-board gic irq sources
+ */
+#define IRQ_PBX_WATCHDOG	(IRQ_PBX_GIC_START + 0)	/* Watchdog timer */
+#define IRQ_PBX_SOFT		(IRQ_PBX_GIC_START + 1)	/* Software interrupt */
+#define IRQ_PBX_COMMRx		(IRQ_PBX_GIC_START + 2)	/* Debug Comm Rx interrupt */
+#define IRQ_PBX_COMMTx		(IRQ_PBX_GIC_START + 3)	/* Debug Comm Tx interrupt */
+#define IRQ_PBX_TIMER0_1	(IRQ_PBX_GIC_START + 4)	/* Timer 0/1 (default timer) */
+#define IRQ_PBX_TIMER2_3	(IRQ_PBX_GIC_START + 5)	/* Timer 2/3 */
+#define IRQ_PBX_GPIO0		(IRQ_PBX_GIC_START + 6)	/* GPIO 0 */
+#define IRQ_PBX_GPIO1		(IRQ_PBX_GIC_START + 7)	/* GPIO 1 */
+#define IRQ_PBX_GPIO2		(IRQ_PBX_GIC_START + 8)	/* GPIO 2 */
+								/* 9 reserved */
+#define IRQ_PBX_RTC		(IRQ_PBX_GIC_START + 10)	/* Real Time Clock */
+#define IRQ_PBX_SSP		(IRQ_PBX_GIC_START + 11)	/* Synchronous Serial Port */
+#define IRQ_PBX_UART0		(IRQ_PBX_GIC_START + 12)	/* UART 0 on development chip */
+#define IRQ_PBX_UART1		(IRQ_PBX_GIC_START + 13)	/* UART 1 on development chip */
+#define IRQ_PBX_UART2		(IRQ_PBX_GIC_START + 14)	/* UART 2 on development chip */
+#define IRQ_PBX_UART3		(IRQ_PBX_GIC_START + 15)	/* UART 3 on development chip */
+#define IRQ_PBX_SCI		(IRQ_PBX_GIC_START + 16)	/* Smart Card Interface */
+#define IRQ_PBX_MMCI0A		(IRQ_PBX_GIC_START + 17)	/* Multimedia Card 0A */
+#define IRQ_PBX_MMCI0B		(IRQ_PBX_GIC_START + 18)	/* Multimedia Card 0B */
+#define IRQ_PBX_AACI		(IRQ_PBX_GIC_START + 19)	/* Audio Codec */
+#define IRQ_PBX_KMI0		(IRQ_PBX_GIC_START + 20)	/* Keyboard/Mouse port 0 */
+#define IRQ_PBX_KMI1		(IRQ_PBX_GIC_START + 21)	/* Keyboard/Mouse port 1 */
+#define IRQ_PBX_CHARLCD		(IRQ_PBX_GIC_START + 22)	/* Character LCD */
+#define IRQ_PBX_CLCD		(IRQ_PBX_GIC_START + 23)	/* CLCD controller */
+#define IRQ_PBX_DMAC		(IRQ_PBX_GIC_START + 24)	/* DMA controller */
+#define IRQ_PBX_PWRFAIL		(IRQ_PBX_GIC_START + 25)	/* Power failure */
+#define IRQ_PBX_PISMO		(IRQ_PBX_GIC_START + 26)	/* PISMO interface */
+#define IRQ_PBX_DoC		(IRQ_PBX_GIC_START + 27)	/* Disk on Chip memory controller */
+#define IRQ_PBX_ETH		(IRQ_PBX_GIC_START + 28)	/* Ethernet controller */
+#define IRQ_PBX_USB		(IRQ_PBX_GIC_START + 29)	/* USB controller */
+#define IRQ_PBX_TSPEN		(IRQ_PBX_GIC_START + 30)	/* Touchscreen pen */
+#define IRQ_PBX_TSKPAD		(IRQ_PBX_GIC_START + 31)	/* Touchscreen keypad */
+
+#define IRQ_PBX_PMU_SCU0        (IRQ_PBX_GIC_START + 32)        /* SCU PMU Interrupts (11mp) */
+#define IRQ_PBX_PMU_SCU1        (IRQ_PBX_GIC_START + 33)
+#define IRQ_PBX_PMU_SCU2        (IRQ_PBX_GIC_START + 34)
+#define IRQ_PBX_PMU_SCU3        (IRQ_PBX_GIC_START + 35)
+#define IRQ_PBX_PMU_SCU4        (IRQ_PBX_GIC_START + 36)
+#define IRQ_PBX_PMU_SCU5        (IRQ_PBX_GIC_START + 37)
+#define IRQ_PBX_PMU_SCU6        (IRQ_PBX_GIC_START + 38)
+#define IRQ_PBX_PMU_SCU7        (IRQ_PBX_GIC_START + 39)
+
+#define IRQ_PBX_WATCHDOG1       (IRQ_PBX_GIC_START + 40)        /* Watchdog1 timer */
+#define IRQ_PBX_TIMER4_5        (IRQ_PBX_GIC_START + 41)        /* Timer 0/1 (default timer) */
+#define IRQ_PBX_TIMER6_7        (IRQ_PBX_GIC_START + 42)        /* Timer 2/3 */
+/* ... */
+#define IRQ_PBX_PMU_CPU3        (IRQ_PBX_GIC_START + 44)        /* CPU PMU Interrupts */
+#define IRQ_PBX_PMU_CPU2        (IRQ_PBX_GIC_START + 45)
+#define IRQ_PBX_PMU_CPU1        (IRQ_PBX_GIC_START + 46)
+#define IRQ_PBX_PMU_CPU0        (IRQ_PBX_GIC_START + 47)
+
+/* ... */
+#define IRQ_PBX_PCI0		(IRQ_PBX_GIC_START + 50)
+#define IRQ_PBX_PCI1		(IRQ_PBX_GIC_START + 51)
+#define IRQ_PBX_PCI2		(IRQ_PBX_GIC_START + 52)
+#define IRQ_PBX_PCI3		(IRQ_PBX_GIC_START + 53)
+
+#define IRQ_PBX_SMC		-1
+#define IRQ_PBX_SCTL		-1
+
+#define NR_GIC_PBX		1
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_PBX
+ */
+#define NR_IRQS_PBX		(IRQ_PBX_GIC_START + 96)
+
+#if defined(CONFIG_MACH_REALVIEW_PBX)
+
+#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PBX)
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_PBX
+#endif
+
+#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PBX)
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_PBX
+#endif
+
+#endif	/* CONFIG_MACH_REALVIEW_PBX */
+
+/*
+ * Core tile identification (REALVIEW_SYS_PROCID)
+ */
+#define REALVIEW_PBX_PROC_MASK          0xFF000000
+#define REALVIEW_PBX_PROC_ARM7TDMI      0x00000000
+#define REALVIEW_PBX_PROC_ARM9          0x02000000
+#define REALVIEW_PBX_PROC_ARM11         0x04000000
+#define REALVIEW_PBX_PROC_ARM11MP       0x06000000
+#define REALVIEW_PBX_PROC_A9MP          0x0C000000
+#define REALVIEW_PBX_PROC_A8            0x0E000000
+
+#define check_pbx_proc(proc_type)                                            \
+	((readl(__io_address(REALVIEW_SYS_PROCID)) & REALVIEW_PBX_PROC_MASK) \
+	== proc_type)
+
+#ifdef CONFIG_MACH_REALVIEW_PBX
+#define core_tile_pbx11mp()     check_pbx_proc(REALVIEW_PBX_PROC_ARM11MP)
+#define core_tile_pbxa9mp()     check_pbx_proc(REALVIEW_PBX_PROC_A9MP)
+#define core_tile_pbxa8()       check_pbx_proc(REALVIEW_PBX_PROC_A8)
+#else
+#define core_tile_pbx11mp()     0
+#define core_tile_pbxa9mp()     0
+#define core_tile_pbxa8()       0
+#endif
+
+#endif	/* __ASM_ARCH_BOARD_PBX_H */
diff --git a/arch/arm/mach-realview/include/mach/debug-macro.S b/arch/arm/mach-realview/include/mach/debug-macro.S
index 92dbcb9..932d8af 100644
--- a/arch/arm/mach-realview/include/mach/debug-macro.S
+++ b/arch/arm/mach-realview/include/mach/debug-macro.S
@@ -12,7 +12,8 @@
 
 #if defined(CONFIG_MACH_REALVIEW_EB) || \
     defined(CONFIG_MACH_REALVIEW_PB11MP) || \
-    defined(CONFIG_MACH_REALVIEW_PBA8)
+    defined(CONFIG_MACH_REALVIEW_PBA8) || \
+    defined(CONFIG_MACH_REALVIEW_PBX)
 #ifndef DEBUG_LL_UART_OFFSET
 #define DEBUG_LL_UART_OFFSET	0x00009000
 #elif DEBUG_LL_UART_OFFSET != 0x00009000
diff --git a/arch/arm/mach-realview/include/mach/irqs.h b/arch/arm/mach-realview/include/mach/irqs.h
index fe5cb98..e9e3826 100644
--- a/arch/arm/mach-realview/include/mach/irqs.h
+++ b/arch/arm/mach-realview/include/mach/irqs.h
@@ -26,6 +26,7 @@
 #include <mach/board-pb11mp.h>
 #include <mach/board-pb1176.h>
 #include <mach/board-pba8.h>
+#include <mach/board-pbx.h>
 
 #define IRQ_LOCALTIMER		29
 #define IRQ_LOCALWDOG		30
diff --git a/arch/arm/mach-realview/include/mach/uncompress.h b/arch/arm/mach-realview/include/mach/uncompress.h
index 415d634..8305037 100644
--- a/arch/arm/mach-realview/include/mach/uncompress.h
+++ b/arch/arm/mach-realview/include/mach/uncompress.h
@@ -24,6 +24,7 @@
 #include <mach/board-pb11mp.h>
 #include <mach/board-pb1176.h>
 #include <mach/board-pba8.h>
+#include <mach/board-pbx.h>
 
 #define AMBA_UART_DR(base)	(*(volatile unsigned char *)((base) + 0x00))
 #define AMBA_UART_LCRH(base)	(*(volatile unsigned char *)((base) + 0x2c))
@@ -43,6 +44,8 @@ static inline unsigned long get_uart_base(void)
 		return REALVIEW_PB1176_UART0_BASE;
 	else if (machine_is_realview_pba8())
 		return REALVIEW_PBA8_UART0_BASE;
+	else if (machine_is_realview_pbx())
+		return REALVIEW_PBX_UART0_BASE;
 	else
 		return 0;
 }
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 30a9c68..e3bd934 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -22,6 +22,7 @@
 
 #include <mach/board-eb.h>
 #include <mach/board-pb11mp.h>
+#include <mach/board-pbx.h>
 #include <mach/scu.h>
 
 #include "core.h"
@@ -40,6 +41,9 @@ static void __iomem *scu_base_addr(void)
 		return __io_address(REALVIEW_EB11MP_SCU_BASE);
 	else if (machine_is_realview_pb11mp())
 		return __io_address(REALVIEW_TC11MP_SCU_BASE);
+	else if (machine_is_realview_pbx() &&
+		 (core_tile_pbx11mp() || core_tile_pbxa9mp()))
+		return __io_address(REALVIEW_PBX_TILE_SCU_BASE);
 	else
 		return (void __iomem *)0;
 }
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
new file mode 100644
index 0000000..1fe294d
--- /dev/null
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -0,0 +1,335 @@
+/*
+ *  arch/arm/mach-realview/realview_pbx.c
+ *
+ *  Copyright (C) 2009 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/sysdev.h>
+#include <linux/amba/bus.h>
+#include <linux/io.h>
+
+#include <asm/irq.h>
+#include <asm/leds.h>
+#include <asm/mach-types.h>
+#include <asm/hardware/gic.h>
+#include <asm/hardware/cache-l2x0.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/mmc.h>
+#include <asm/mach/time.h>
+
+#include <mach/hardware.h>
+#include <mach/board-pbx.h>
+#include <mach/irqs.h>
+
+#include "core.h"
+
+static struct map_desc realview_pbx_io_desc[] __initdata = {
+	{
+		.virtual	= IO_ADDRESS(REALVIEW_SYS_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_SYS_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_PBX_GIC_CPU_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_PBX_GIC_CPU_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_PBX_GIC_DIST_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_PBX_GIC_DIST_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_SCTL_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_SCTL_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_PBX_TIMER0_1_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_PBX_TIMER0_1_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	}, {
+		.virtual	= IO_ADDRESS(REALVIEW_PBX_TIMER2_3_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_PBX_TIMER2_3_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	},
+#ifdef CONFIG_PCI
+	{
+		.virtual	= PCIX_UNIT_BASE,
+		.pfn		= __phys_to_pfn(REALVIEW_PBX_PCI_BASE),
+		.length		= REALVIEW_PBX_PCI_BASE_SIZE,
+		.type		= MT_DEVICE,
+	},
+#endif
+#ifdef CONFIG_DEBUG_LL
+	{
+		.virtual	= IO_ADDRESS(REALVIEW_PBX_UART0_BASE),
+		.pfn		= __phys_to_pfn(REALVIEW_PBX_UART0_BASE),
+		.length		= SZ_4K,
+		.type		= MT_DEVICE,
+	},
+#endif
+};
+
+static struct map_desc realview_local_io_desc[] __initdata = {
+	{
+		.virtual        = IO_ADDRESS(REALVIEW_PBX_TILE_GIC_CPU_BASE),
+		.pfn            = __phys_to_pfn(REALVIEW_PBX_TILE_GIC_CPU_BASE),
+		.length         = SZ_4K,
+		.type           = MT_DEVICE,
+	}, {
+		.virtual        = IO_ADDRESS(REALVIEW_PBX_TILE_GIC_DIST_BASE),
+		.pfn            = __phys_to_pfn(REALVIEW_PBX_TILE_GIC_DIST_BASE),
+		.length         = SZ_4K,
+		.type           = MT_DEVICE,
+	}, {
+		.virtual        = IO_ADDRESS(REALVIEW_PBX_TILE_L220_BASE),
+		.pfn            = __phys_to_pfn(REALVIEW_PBX_TILE_L220_BASE),
+		.length         = SZ_8K,
+		.type           = MT_DEVICE,
+	}
+};
+
+static void __init realview_pbx_map_io(void)
+{
+	iotable_init(realview_pbx_io_desc, ARRAY_SIZE(realview_pbx_io_desc));
+	if (core_tile_pbx11mp() || core_tile_pbxa9mp())
+		iotable_init(realview_local_io_desc, ARRAY_SIZE(realview_local_io_desc));
+}
+
+/*
+ * RealView PBXCore AMBA devices
+ */
+
+#define GPIO2_IRQ		{ IRQ_PBX_GPIO2, NO_IRQ }
+#define GPIO2_DMA		{ 0, 0 }
+#define GPIO3_IRQ		{ IRQ_PBX_GPIO3, NO_IRQ }
+#define GPIO3_DMA		{ 0, 0 }
+#define AACI_IRQ		{ IRQ_PBX_AACI, NO_IRQ }
+#define AACI_DMA		{ 0x80, 0x81 }
+#define MMCI0_IRQ		{ IRQ_PBX_MMCI0A, IRQ_PBX_MMCI0B }
+#define MMCI0_DMA		{ 0x84, 0 }
+#define KMI0_IRQ		{ IRQ_PBX_KMI0, NO_IRQ }
+#define KMI0_DMA		{ 0, 0 }
+#define KMI1_IRQ		{ IRQ_PBX_KMI1, NO_IRQ }
+#define KMI1_DMA		{ 0, 0 }
+#define PBX_SMC_IRQ		{ NO_IRQ, NO_IRQ }
+#define PBX_SMC_DMA		{ 0, 0 }
+#define MPMC_IRQ		{ NO_IRQ, NO_IRQ }
+#define MPMC_DMA		{ 0, 0 }
+#define PBX_CLCD_IRQ		{ IRQ_PBX_CLCD, NO_IRQ }
+#define PBX_CLCD_DMA		{ 0, 0 }
+#define DMAC_IRQ		{ IRQ_PBX_DMAC, NO_IRQ }
+#define DMAC_DMA		{ 0, 0 }
+#define SCTL_IRQ		{ NO_IRQ, NO_IRQ }
+#define SCTL_DMA		{ 0, 0 }
+#define PBX_WATCHDOG_IRQ	{ IRQ_PBX_WATCHDOG, NO_IRQ }
+#define PBX_WATCHDOG_DMA	{ 0, 0 }
+#define PBX_GPIO0_IRQ		{ IRQ_PBX_GPIO0, NO_IRQ }
+#define PBX_GPIO0_DMA		{ 0, 0 }
+#define GPIO1_IRQ		{ IRQ_PBX_GPIO1, NO_IRQ }
+#define GPIO1_DMA		{ 0, 0 }
+#define PBX_RTC_IRQ		{ IRQ_PBX_RTC, NO_IRQ }
+#define PBX_RTC_DMA		{ 0, 0 }
+#define SCI_IRQ			{ IRQ_PBX_SCI, NO_IRQ }
+#define SCI_DMA			{ 7, 6 }
+#define PBX_UART0_IRQ		{ IRQ_PBX_UART0, NO_IRQ }
+#define PBX_UART0_DMA		{ 15, 14 }
+#define PBX_UART1_IRQ		{ IRQ_PBX_UART1, NO_IRQ }
+#define PBX_UART1_DMA		{ 13, 12 }
+#define PBX_UART2_IRQ		{ IRQ_PBX_UART2, NO_IRQ }
+#define PBX_UART2_DMA		{ 11, 10 }
+#define PBX_UART3_IRQ		{ IRQ_PBX_UART3, NO_IRQ }
+#define PBX_UART3_DMA		{ 0x86, 0x87 }
+#define PBX_SSP_IRQ		{ IRQ_PBX_SSP, NO_IRQ }
+#define PBX_SSP_DMA		{ 9, 8 }
+
+/* FPGA Primecells */
+AMBA_DEVICE(aaci,	"fpga:04",	AACI,		NULL);
+AMBA_DEVICE(mmc0,	"fpga:05",	MMCI0,		&realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,	"fpga:06",	KMI0,		NULL);
+AMBA_DEVICE(kmi1,	"fpga:07",	KMI1,		NULL);
+AMBA_DEVICE(uart3,	"fpga:09",	PBX_UART3,	NULL);
+
+/* DevChip Primecells */
+AMBA_DEVICE(smc,	"dev:00",	PBX_SMC,	NULL);
+AMBA_DEVICE(sctl,	"dev:e0",	SCTL,		NULL);
+AMBA_DEVICE(wdog,	"dev:e1",	PBX_WATCHDOG, 	NULL);
+AMBA_DEVICE(gpio0,	"dev:e4",	PBX_GPIO0,	NULL);
+AMBA_DEVICE(gpio1,	"dev:e5",	GPIO1,		NULL);
+AMBA_DEVICE(gpio2,	"dev:e6",	GPIO2,		NULL);
+AMBA_DEVICE(rtc,	"dev:e8",	PBX_RTC,	NULL);
+AMBA_DEVICE(sci0,	"dev:f0",	SCI,		NULL);
+AMBA_DEVICE(uart0,	"dev:f1",	PBX_UART0,	NULL);
+AMBA_DEVICE(uart1,	"dev:f2",	PBX_UART1,	NULL);
+AMBA_DEVICE(uart2,	"dev:f3",	PBX_UART2,	NULL);
+AMBA_DEVICE(ssp0,	"dev:f4",	PBX_SSP,	NULL);
+
+/* Primecells on the NEC ISSP chip */
+AMBA_DEVICE(clcd,	"issp:20",	PBX_CLCD,	&clcd_plat_data);
+AMBA_DEVICE(dmac,	"issp:30",	DMAC,		NULL);
+
+static struct amba_device *amba_devs[] __initdata = {
+	&dmac_device,
+	&uart0_device,
+	&uart1_device,
+	&uart2_device,
+	&uart3_device,
+	&smc_device,
+	&clcd_device,
+	&sctl_device,
+	&wdog_device,
+	&gpio0_device,
+	&gpio1_device,
+	&gpio2_device,
+	&rtc_device,
+	&sci0_device,
+	&ssp0_device,
+	&aaci_device,
+	&mmc0_device,
+	&kmi0_device,
+	&kmi1_device,
+};
+
+/*
+ * RealView PB-X platform devices
+ */
+static struct resource realview_pbx_flash_resources[] = {
+	[0] = {
+		.start          = REALVIEW_PBX_FLASH0_BASE,
+		.end            = REALVIEW_PBX_FLASH0_BASE + REALVIEW_PBX_FLASH0_SIZE - 1,
+		.flags          = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start          = REALVIEW_PBX_FLASH1_BASE,
+		.end            = REALVIEW_PBX_FLASH1_BASE + REALVIEW_PBX_FLASH1_SIZE - 1,
+		.flags          = IORESOURCE_MEM,
+	},
+};
+
+static struct resource realview_pbx_smsc911x_resources[] = {
+	[0] = {
+		.start		= REALVIEW_PBX_ETH_BASE,
+		.end		= REALVIEW_PBX_ETH_BASE + SZ_64K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= IRQ_PBX_ETH,
+		.end		= IRQ_PBX_ETH,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct resource realview_pbx_isp1761_resources[] = {
+	[0] = {
+		.start		= REALVIEW_PBX_USB_BASE,
+		.end		= REALVIEW_PBX_USB_BASE + SZ_128K - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= IRQ_PBX_USB,
+		.end		= IRQ_PBX_USB,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static void __init gic_init_irq(void)
+{
+	/* ARM PBX on-board GIC */
+	if (core_tile_pbx11mp() || core_tile_pbxa9mp()) {
+		gic_cpu_base_addr = __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE);
+		gic_dist_init(0, __io_address(REALVIEW_PBX_TILE_GIC_DIST_BASE),
+			      29);
+		gic_cpu_init(0, __io_address(REALVIEW_PBX_TILE_GIC_CPU_BASE));
+	} else {
+		gic_cpu_base_addr = __io_address(REALVIEW_PBX_GIC_CPU_BASE);
+		gic_dist_init(0, __io_address(REALVIEW_PBX_GIC_DIST_BASE),
+			      IRQ_PBX_GIC_START);
+		gic_cpu_init(0, __io_address(REALVIEW_PBX_GIC_CPU_BASE));
+	}
+}
+
+static void __init realview_pbx_timer_init(void)
+{
+	timer0_va_base = __io_address(REALVIEW_PBX_TIMER0_1_BASE);
+	timer1_va_base = __io_address(REALVIEW_PBX_TIMER0_1_BASE) + 0x20;
+	timer2_va_base = __io_address(REALVIEW_PBX_TIMER2_3_BASE);
+	timer3_va_base = __io_address(REALVIEW_PBX_TIMER2_3_BASE) + 0x20;
+
+#ifdef CONFIG_LOCAL_TIMERS
+	if (core_tile_pbx11mp() || core_tile_pbxa9mp())
+		twd_base = __io_address(REALVIEW_PBX_TILE_TWD_BASE);
+#endif
+	realview_timer_init(IRQ_PBX_TIMER0_1);
+}
+
+static struct sys_timer realview_pbx_timer = {
+	.init		= realview_pbx_timer_init,
+};
+
+static void __init realview_pbx_init(void)
+{
+	int i;
+
+#ifdef CONFIG_CACHE_L2X0
+	if (core_tile_pbxa9mp()) {
+		void __iomem *l2x0_base =
+			__io_address(REALVIEW_PBX_TILE_L220_BASE);
+
+		/* set RAM latencies to 1 cycle for eASIC */
+		writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL);
+		writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL);
+
+		/* 16KB way size, 8-way associativity, parity disabled
+		 * Bits:  .. 0 0 0 0 1 00 1 0 1 001 0 000 0 .... .... .... */
+		l2x0_init(l2x0_base, 0x02520000, 0xc0000fff);
+	}
+#endif
+
+	realview_flash_register(realview_pbx_flash_resources,
+				ARRAY_SIZE(realview_pbx_flash_resources));
+	realview_eth_register(NULL, realview_pbx_smsc911x_resources);
+	platform_device_register(&realview_i2c_device);
+	platform_device_register(&realview_cf_device);
+	realview_usb_register(realview_pbx_isp1761_resources);
+
+	for (i = 0; i < ARRAY_SIZE(amba_devs); i++) {
+		struct amba_device *d = amba_devs[i];
+		amba_device_register(d, &iomem_resource);
+	}
+
+#ifdef CONFIG_LEDS
+	leds_event = realview_leds_event;
+#endif
+}
+
+MACHINE_START(REALVIEW_PBX, "ARM-RealView PBX")
+	/* Maintainer: ARM Ltd/Deep Blue Solutions Ltd */
+	.phys_io	= REALVIEW_PBX_UART0_BASE,
+	.io_pg_offst	= (IO_ADDRESS(REALVIEW_PBX_UART0_BASE) >> 18) & 0xfffc,
+	.boot_params	= PHYS_OFFSET + 0x00000100,
+	.map_io		= realview_pbx_map_io,
+	.init_irq	= gic_init_irq,
+	.timer		= &realview_pbx_timer,
+	.init_machine	= realview_pbx_init,
+MACHINE_END
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 2097956..76bbcde 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -391,7 +391,7 @@ config CPU_FEROCEON_OLD_ID
 
 # ARMv6
 config CPU_V6
-	bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB
+	bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
 	select CPU_32v6
 	select CPU_ABRT_EV6
 	select CPU_PABRT_NOIFAR
@@ -416,7 +416,7 @@ config CPU_32v6K
 
 # ARMv7
 config CPU_V7
-	bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB
+	bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
 	select CPU_32v6K
 	select CPU_32v7
 	select CPU_ABRT_EV7
@@ -747,7 +747,7 @@ config CACHE_FEROCEON_L2_WRITETHROUGH
 config CACHE_L2X0
 	bool "Enable the L2x0 outer cache controller"
 	depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \
-		   REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31
+		   REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31 || MACH_REALVIEW_PBX
 	default y
 	select OUTER_CACHE
 	help
-- 
cgit v0.10.2


From af607747def54d8f76d2509c1394587da5d9bdfd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 13:56:13 +0100
Subject: RealView: Allow access to the secure flash memory block on PB1176

This patch adds a Kconfig option for specifying whether Linux will only
be run in secure mode on the RealView PB1176 platform. Enabling it will
make the secure flash memory block (64MB @ 0x3c000000) available to
Linux.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index 368b58a..e09f540 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -48,6 +48,15 @@ config MACH_REALVIEW_PB1176
 	help
 	  Include support for the ARM(R) RealView ARM1176 Platform Baseboard.
 
+config REALVIEW_PB1176_SECURE_FLASH
+	bool "Allow access to the secure flash memory block"
+	depends on MACH_REALVIEW_PB1176
+	default n
+	help
+	  Select this option if Linux will only run in secure mode on the
+	  RealView PB1176 platform and access to the secure flash memory
+	  block (64MB @ 0x3c000000) is required.
+
 config MACH_REALVIEW_PBA8
 	bool "Support RealView/PB-A8 platform"
 	select CPU_V7
diff --git a/arch/arm/mach-realview/include/mach/board-pb1176.h b/arch/arm/mach-realview/include/mach/board-pb1176.h
index 858eea7..e4d0e80 100644
--- a/arch/arm/mach-realview/include/mach/board-pb1176.h
+++ b/arch/arm/mach-realview/include/mach/board-pb1176.h
@@ -32,6 +32,8 @@
 #define REALVIEW_PB1176_SDRAM67_BASE		0x70000000 /* SDRAM banks 6 and 7 */
 #define REALVIEW_PB1176_FLASH_BASE		0x30000000
 #define REALVIEW_PB1176_FLASH_SIZE		SZ_64M
+#define REALVIEW_PB1176_SEC_FLASH_BASE		0x3C000000 /* Secure flash */
+#define REALVIEW_PB1176_SEC_FLASH_SIZE		SZ_64M
 
 #define REALVIEW_PB1176_TIMER0_1_BASE		0x10104000 /* Timer 0 and 1 */
 #define REALVIEW_PB1176_TIMER2_3_BASE		0x10105000 /* Timer 2 and 3 */
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index a64b84a..25efe71 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -203,11 +203,23 @@ static struct amba_device *amba_devs[] __initdata = {
 /*
  * RealView PB1176 platform devices
  */
-static struct resource realview_pb1176_flash_resource = {
-	.start			= REALVIEW_PB1176_FLASH_BASE,
-	.end			= REALVIEW_PB1176_FLASH_BASE + REALVIEW_PB1176_FLASH_SIZE - 1,
-	.flags			= IORESOURCE_MEM,
+static struct resource realview_pb1176_flash_resources[] = {
+	[0] = {
+		.start		= REALVIEW_PB1176_FLASH_BASE,
+		.end		= REALVIEW_PB1176_FLASH_BASE + REALVIEW_PB1176_FLASH_SIZE - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start		= REALVIEW_PB1176_SEC_FLASH_BASE,
+		.end		= REALVIEW_PB1176_SEC_FLASH_BASE + REALVIEW_PB1176_SEC_FLASH_SIZE - 1,
+		.flags		= IORESOURCE_MEM,
+	},
 };
+#ifdef CONFIG_REALVIEW_PB1176_SECURE_FLASH
+#define PB1176_FLASH_BLOCKS	2
+#else
+#define PB1176_FLASH_BLOCKS	1
+#endif
 
 static struct resource realview_pb1176_smsc911x_resources[] = {
 	[0] = {
@@ -271,7 +283,8 @@ static void __init realview_pb1176_init(void)
 	l2x0_init(__io_address(REALVIEW_PB1176_L220_BASE), 0x00730000, 0xfe000fff);
 #endif
 
-	realview_flash_register(&realview_pb1176_flash_resource, 1);
+	realview_flash_register(realview_pb1176_flash_resources,
+				PB1176_FLASH_BLOCKS);
 	realview_eth_register(NULL, realview_pb1176_smsc911x_resources);
 	platform_device_register(&realview_i2c_device);
 	realview_usb_register(realview_pb1176_isp1761_resources);
-- 
cgit v0.10.2


From ff3042fb6f1a0f6faf527390a8a943a3e4296741 Mon Sep 17 00:00:00 2001
From: Colin Tuckley <Colin.Tuckley@arm.com>
Date: Sat, 30 May 2009 13:56:13 +0100
Subject: RealView: Allow CONFIG_LEDS on this platform

Signed-off-by: Colin Tuckley <Colin.Tuckley@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index cb7b388..ceb549c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1027,7 +1027,7 @@ config LEDS
 		   ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \
 		   ARCH_SA1100 || ARCH_SHARK || ARCH_VERSATILE || \
 		   ARCH_AT91 || ARCH_DAVINCI || \
-		   ARCH_KS8695 || MACH_RD88F5182
+		   ARCH_KS8695 || MACH_RD88F5182 || ARCH_REALVIEW
 	help
 	  If you say Y here, the LEDs on your machine will be used
 	  to provide useful information about your current system status.
-- 
cgit v0.10.2


From da055eb52ec067d51dc08c7e86baf92dd5c01599 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 13:56:16 +0100
Subject: RealView: Toggle one LED per CPU

If CONFIG_LEDS is enabled, it makes more sense to toggle one LED per CPU
in SMP systems rather than a single LED for all the CPUs.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 076acbc..346cbf0 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -578,21 +578,22 @@ void realview_leds_event(led_event_t ledevt)
 {
 	unsigned long flags;
 	u32 val;
+	u32 led = 1 << smp_processor_id();
 
 	local_irq_save(flags);
 	val = readl(VA_LEDS_BASE);
 
 	switch (ledevt) {
 	case led_idle_start:
-		val = val & ~REALVIEW_SYS_LED0;
+		val = val & ~led;
 		break;
 
 	case led_idle_end:
-		val = val | REALVIEW_SYS_LED0;
+		val = val | led;
 		break;
 
 	case led_timer:
-		val = val ^ REALVIEW_SYS_LED1;
+		val = val ^ REALVIEW_SYS_LED7;
 		break;
 
 	case led_halted:
-- 
cgit v0.10.2


From faa7bc51c11d5bbe440ac04710fd7a3208782000 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:14 +0100
Subject: Check whether the TLB operations need broadcasting on SMP systems

ARMv7 SMP hardware can handle the TLB maintenance operations
broadcasting in hardware so that the software can avoid the costly IPIs.
This patch adds the necessary checks (the MMFR3 CPUID register) to avoid
the broadcasting if already supported by the hardware.

(this patch is based on the work done by Tony Thompson @ ARM)

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 7b9d27e..b3e656c 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -8,6 +8,21 @@
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
 
+#define CPUID_EXT_PFR0	"c1, 0"
+#define CPUID_EXT_PFR1	"c1, 1"
+#define CPUID_EXT_DFR0	"c1, 2"
+#define CPUID_EXT_AFR0	"c1, 3"
+#define CPUID_EXT_MMFR0	"c1, 4"
+#define CPUID_EXT_MMFR1	"c1, 5"
+#define CPUID_EXT_MMFR2	"c1, 6"
+#define CPUID_EXT_MMFR3	"c1, 7"
+#define CPUID_EXT_ISAR0	"c2, 0"
+#define CPUID_EXT_ISAR1	"c2, 1"
+#define CPUID_EXT_ISAR2	"c2, 2"
+#define CPUID_EXT_ISAR3	"c2, 3"
+#define CPUID_EXT_ISAR4	"c2, 4"
+#define CPUID_EXT_ISAR5	"c2, 5"
+
 #ifdef CONFIG_CPU_CP15
 #define read_cpuid(reg)							\
 	({								\
@@ -18,9 +33,19 @@
 		    : "cc");						\
 		__val;							\
 	})
+#define read_cpuid_ext(ext_reg)						\
+	({								\
+		unsigned int __val;					\
+		asm("mrc	p15, 0, %0, c0, " ext_reg		\
+		    : "=r" (__val)					\
+		    :							\
+		    : "cc");						\
+		__val;							\
+	})
 #else
 extern unsigned int processor_id;
 #define read_cpuid(reg) (processor_id)
+#define read_cpuid_ext(reg) 0
 #endif
 
 /*
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a622180..c964f3f 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -40,6 +40,12 @@
 #define TLB_V6_I_ASID	(1 << 18)
 
 #define TLB_BTB		(1 << 28)
+
+/* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
+#define TLB_V7_UIS_PAGE	(1 << 19)
+#define TLB_V7_UIS_FULL (1 << 20)
+#define TLB_V7_UIS_ASID (1 << 21)
+
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
@@ -176,9 +182,17 @@
 # define v6wbi_always_flags	(-1UL)
 #endif
 
+#ifdef CONFIG_SMP
+#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
+#else
+#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
+#endif
+
 #ifdef CONFIG_CPU_TLB_V7
-# define v7wbi_possible_flags	v6wbi_tlb_flags
-# define v7wbi_always_flags	v6wbi_tlb_flags
+# define v7wbi_possible_flags	v7wbi_tlb_flags
+# define v7wbi_always_flags	v7wbi_tlb_flags
 # ifdef _TLB
 #  define MULTI_TLB 1
 # else
@@ -316,6 +330,8 @@ static inline void local_flush_tlb_all(void)
 		asm("mcr p15, 0, %0, c8, c6, 0" : : "r" (zero) : "cc");
 	if (tlb_flag(TLB_V4_I_FULL | TLB_V6_I_FULL))
 		asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
+	if (tlb_flag(TLB_V7_UIS_FULL))
+		asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
@@ -351,6 +367,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 		asm("mcr p15, 0, %0, c8, c6, 2" : : "r" (asid) : "cc");
 	if (tlb_flag(TLB_V6_I_ASID))
 		asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc");
+	if (tlb_flag(TLB_V7_UIS_ASID))
+		asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
@@ -389,6 +407,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (uaddr) : "cc");
 	if (tlb_flag(TLB_V6_I_PAGE))
 		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc");
+	if (tlb_flag(TLB_V7_UIS_PAGE))
+		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
@@ -424,6 +444,8 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 		asm("mcr p15, 0, %0, c8, c6, 1" : : "r" (kaddr) : "cc");
 	if (tlb_flag(TLB_V6_I_PAGE))
 		asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (kaddr) : "cc");
+	if (tlb_flag(TLB_V7_UIS_PAGE))
+		asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (kaddr) : "cc");
 
 	if (tlb_flag(TLB_BTB)) {
 		/* flush the branch target cache */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 6014dfd..ece658e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -32,6 +32,7 @@
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
+#include <asm/cputype.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -545,6 +546,12 @@ struct tlb_args {
 	unsigned long ta_end;
 };
 
+/* all SMP configurations have the extended CPUID registers */
+static inline int tlb_ops_need_broadcast(void)
+{
+	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
+}
+
 static inline void ipi_flush_tlb_all(void *ignored)
 {
 	local_flush_tlb_all();
@@ -587,51 +594,61 @@ static inline void ipi_flush_tlb_kernel_range(void *arg)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+	else
+		local_flush_tlb_all();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+	if (tlb_ops_need_broadcast())
+		on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask);
+	else
+		local_flush_tlb_mm(mm);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 {
-	struct tlb_args ta;
-
-	ta.ta_vma = vma;
-	ta.ta_start = uaddr;
-
-	on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = uaddr;
+		on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	} else
+		local_flush_tlb_page(vma, uaddr);
 }
 
 void flush_tlb_kernel_page(unsigned long kaddr)
 {
-	struct tlb_args ta;
-
-	ta.ta_start = kaddr;
-
-	on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = kaddr;
+		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
+	} else
+		local_flush_tlb_kernel_page(kaddr);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma,
                      unsigned long start, unsigned long end)
 {
-	struct tlb_args ta;
-
-	ta.ta_vma = vma;
-	ta.ta_start = start;
-	ta.ta_end = end;
-
-	on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_vma = vma;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask);
+	} else
+		local_flush_tlb_range(vma, start, end);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	struct tlb_args ta;
-
-	ta.ta_start = start;
-	ta.ta_end = end;
-
-	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+	if (tlb_ops_need_broadcast()) {
+		struct tlb_args ta;
+		ta.ta_start = start;
+		ta.ta_end = end;
+		on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+	} else
+		local_flush_tlb_kernel_range(start, end);
 }
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3397f1e..c3f7375 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -176,8 +176,8 @@ cpu_v7_name:
  */
 __v7_setup:
 #ifdef CONFIG_SMP
-	mrc	p15, 0, r0, c1, c0, 1		@ Enable SMP/nAMP mode
-	orr	r0, r0, #(0x1 << 6)
+	mrc	p15, 0, r0, c1, c0, 1		@ Enable SMP/nAMP mode and
+	orr	r0, r0, #(1 << 6) | (1 << 0)	@ TLB ops broadcasting
 	mcr	p15, 0, r0, c1, c0, 1
 #endif
 	adr	r12, __v7_setup_stack		@ the local stack
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index b637e73..a26a605 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -42,9 +42,11 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	mov	r1, r1, lsl #PAGE_SHIFT
 	vma_vm_flags r2, r2			@ get vma->vm_flags
 1:
-	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA (was 1)
-	tst	r2, #VM_EXEC			@ Executable area ?
-	mcrne	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA (was 1)
+#ifdef CONFIG_SMP
+	mcr	p15, 0, r0, c8, c3, 1		@ TLB invalidate U MVA (shareable) 
+#else
+	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate U MVA
+#endif
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
@@ -69,8 +71,11 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	mov	r0, r0, lsl #PAGE_SHIFT
 	mov	r1, r1, lsl #PAGE_SHIFT
 1:
-	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA
-	mcr	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA
+#ifdef CONFIG_SMP
+	mcr	p15, 0, r0, c8, c3, 1		@ TLB invalidate U MVA (shareable)
+#else
+	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate U MVA
+#endif
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
@@ -87,5 +92,5 @@ ENDPROC(v7wbi_flush_kern_tlb_range)
 ENTRY(v7wbi_tlb_fns)
 	.long	v7wbi_flush_user_tlb_range
 	.long	v7wbi_flush_kern_tlb_range
-	.long	v6wbi_tlb_flags
+	.long	v7wbi_tlb_flags
 	.size	v7wbi_tlb_fns, . - v7wbi_tlb_fns
-- 
cgit v0.10.2


From ba3c02636a0743a62cb50b920d36e1c046ab88cf Mon Sep 17 00:00:00 2001
From: Tony Thompson <Anthony.Thompson@arm.com>
Date: Sat, 30 May 2009 14:00:15 +0100
Subject: ARMv7: Mark the PTWs inner WBWA on SMP and WB on UP

There are additional bits to set for the ARMv7 SMP extensions in the
TTBR registers. The IRGN bits order is counter-intuitive but it allows
software built for the ARMv7 base architecture to run on an
implementation with the MP extensions.

Signed-off-by: Tony Thompson <Anthony.Thompson@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index c3f7375..a18aace 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -19,17 +19,23 @@
 
 #include "proc-macros.S"
 
-#define TTB_C		(1 << 0)
 #define TTB_S		(1 << 1)
 #define TTB_RGN_NC	(0 << 3)
 #define TTB_RGN_OC_WBWA	(1 << 3)
 #define TTB_RGN_OC_WT	(2 << 3)
 #define TTB_RGN_OC_WB	(3 << 3)
+#define TTB_NOS		(1 << 5)
+#define TTB_IRGN_NC	((0 << 0) | (0 << 6))
+#define TTB_IRGN_WBWA	((0 << 0) | (1 << 6))
+#define TTB_IRGN_WT	((1 << 0) | (0 << 6))
+#define TTB_IRGN_WB	((1 << 0) | (1 << 6))
 
 #ifndef CONFIG_SMP
-#define TTB_FLAGS	TTB_C|TTB_RGN_OC_WB		@ mark PTWs cacheable, outer WB
+/* PTWs cacheable, inner WB not shareable, outer WB not shareable */
+#define TTB_FLAGS	TTB_IRGN_WB|TTB_RGN_OC_WB
 #else
-#define TTB_FLAGS	TTB_C|TTB_S|TTB_RGN_OC_WBWA	@ mark PTWs cacheable and shared, outer WBWA
+/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */
+#define TTB_FLAGS	TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA
 #endif
 
 ENTRY(cpu_v7_proc_init)
-- 
cgit v0.10.2


From d71e1352e240dea32d481ad8d662e8de4406ac7e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:15 +0100
Subject: Clear the IT state when invoking a Thumb-2 signal handler

If a process is interrupted during an If-Then block and a signal is
invoked, the ITSTATE bits must be cleared otherwise the handler would
not run correctly.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Joseph S. Myers <joseph@codesourcery.com>

diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 236a06b..4a4290f 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -65,6 +65,13 @@
 #define PSR_x		0x0000ff00	/* Extension		*/
 #define PSR_c		0x000000ff	/* Control		*/
 
+/*
+ * ARMv7 groups of APSR bits
+ */
+#define PSR_ISET_MASK	0x01000010	/* ISA state (J, T) mask */
+#define PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
+#define PSR_ENDIAN_MASK	0x00000200	/* Endianness state mask */
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 80b8b5c..442b874 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -426,9 +426,13 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka,
 		 */
 		thumb = handler & 1;
 
-		if (thumb)
+		if (thumb) {
 			cpsr |= PSR_T_BIT;
-		else
+#if __LINUX_ARM_ARCH__ >= 7
+			/* clear the If-Then Thumb-2 execution state */
+			cpsr &= ~PSR_IT_MASK;
+#endif
+		} else
 			cpsr &= ~PSR_T_BIT;
 	}
 #endif
-- 
cgit v0.10.2


From 213fb2a8ee81ec106b9b370a07ccad575e9d3748 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:16 +0100
Subject: ARMv7: Enable the SWP instruction

The SWP instruction has been deprecated starting with the ARMv6
architecture. On ARMv7 processors with the multiprocessor extensions
(like Cortex-A9), this instruction is disabled by default but it can be
enabled by setting bit 10 in the System Control register. Note that
setting this bit is safe even if the ARMv7 processor has the SWP
instruction enabled by default.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index a18aace..095b69f 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -232,14 +232,14 @@ __v7_setup:
 ENDPROC(__v7_setup)
 
 	/*   AT
-	 *  TFR   EV X F   I D LR
-	 * .EEE ..EE PUI. .T.T 4RVI ZFRS BLDP WCAM
+	 *  TFR   EV X F   I D LR    S
+	 * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
 	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
-	 *    1    0 110       0011 1.00 .111 1101 < we want
+	 *    1    0 110       0011 1100 .111 1101 < we want
 	 */
 	.type	v7_crval, #object
 v7_crval:
-	crval	clear=0x0120c302, mmuset=0x10c0387d, ucset=0x00c0187c
+	crval	clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
 
 __v7_setup_stack:
 	.space	4 * 11				@ 11 registers
-- 
cgit v0.10.2


From 23d1c515d8fc6d74bea442a4b687c3b5b8627ec4 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:16 +0100
Subject: ARMv7: Document the PRRR and NMRR registers setting

This patch adds a comment to the proc-v7.S file for the setting of the
PRRR and NMRR registers. It also sets the PRRR[13:12] bits to 0
(corresponding to the reserved TEX[0]CB encoding 110) to be consistent
with the documentation.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 095b69f..0a8ffd3 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -219,8 +219,36 @@ __v7_setup:
 	mov	r10, #0x1f			@ domains 0, 1 = manager
 	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register
 #endif
-	ldr	r5, =0xff0aa1a8
-	ldr	r6, =0x40e040e0
+	/*
+	 * Memory region attributes with SCTLR.TRE=1
+	 *
+	 *   n = TEX[0],C,B
+	 *   TR = PRRR[2n+1:2n]		- memory type
+	 *   IR = NMRR[2n+1:2n]		- inner cacheable property
+	 *   OR = NMRR[2n+17:2n+16]	- outer cacheable property
+	 *
+	 *			n	TR	IR	OR
+	 *   UNCACHED		000	00
+	 *   BUFFERABLE		001	10	00	00
+	 *   WRITETHROUGH	010	10	10	10
+	 *   WRITEBACK		011	10	11	11
+	 *   reserved		110
+	 *   WRITEALLOC		111	10	01	01
+	 *   DEV_SHARED		100	01
+	 *   DEV_NONSHARED	100	01
+	 *   DEV_WC		001	10
+	 *   DEV_CACHED		011	10
+	 *
+	 * Other attributes:
+	 *
+	 *   DS0 = PRRR[16] = 0		- device shareable property
+	 *   DS1 = PRRR[17] = 1		- device shareable property
+	 *   NS0 = PRRR[18] = 0		- normal shareable property
+	 *   NS1 = PRRR[19] = 1		- normal shareable property
+	 *   NOS = PRRR[24+n] = 1	- not outer shareable
+	 */
+	ldr	r5, =0xff0a81a8			@ PRRR
+	ldr	r6, =0x40e040e0			@ NMRR
 	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 	adr	r5, v7_crval
-- 
cgit v0.10.2


From 8c7e65742fd05f27071282d260376adb98e0e9ac Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:17 +0100
Subject: arm: Provide _sdata and __bss_stop in the vmlinux.lds.S file

_sdata and __bss_stop are common symbols defined by many architectures
and made available to the kernel via asm-generic/sections.h. Kmemleak
uses these symbols when scanning the data sections.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index c90f272..6c07797 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -141,6 +141,7 @@ SECTIONS
 
 	.data : AT(__data_loc) {
 		_data = .;		/* address in memory */
+		_sdata = .;
 
 		/*
 		 * first, the init task union, aligned
@@ -192,6 +193,7 @@ SECTIONS
 		__bss_start = .;	/* BSS				*/
 		*(.bss)
 		*(COMMON)
+		__bss_stop = .;
 		_end = .;
 	}
 					/* Stabs debugging sections.	*/
-- 
cgit v0.10.2


From ee8c9571191e588ede9a220ded807e33c4897d91 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:17 +0100
Subject: RealView: Move the IRQ_* definitions out of the board-*.h files

The IRQ_* macros need to be made visible via the mach/irqs.h file but
without the additional macros defined in the board-*.h files.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 346cbf0..9ea9c05 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -48,6 +48,9 @@
 
 #include <asm/hardware/gic.h>
 
+#include <mach/platform.h>
+#include <mach/irqs.h>
+
 #include "core.h"
 #include "clock.h"
 
diff --git a/arch/arm/mach-realview/include/mach/board-eb.h b/arch/arm/mach-realview/include/mach/board-eb.h
index 268d770..794a8d9 100644
--- a/arch/arm/mach-realview/include/mach/board-eb.h
+++ b/arch/arm/mach-realview/include/mach/board-eb.h
@@ -62,111 +62,6 @@
 #define REALVIEW_EB11MP_SYS_PLD_CTRL1	0x74		/* Register offset for MPCore sysctl */
 #endif
 
-#define IRQ_EB_GIC_START	32
-
-/*
- * RealView EB interrupt sources
- */
-#define IRQ_EB_WDOG		(IRQ_EB_GIC_START + 0)		/* Watchdog timer */
-#define IRQ_EB_SOFT		(IRQ_EB_GIC_START + 1)		/* Software interrupt */
-#define IRQ_EB_COMMRx		(IRQ_EB_GIC_START + 2)		/* Debug Comm Rx interrupt */
-#define IRQ_EB_COMMTx		(IRQ_EB_GIC_START + 3)		/* Debug Comm Tx interrupt */
-#define IRQ_EB_TIMER0_1		(IRQ_EB_GIC_START + 4)		/* Timer 0 and 1 */
-#define IRQ_EB_TIMER2_3		(IRQ_EB_GIC_START + 5)		/* Timer 2 and 3 */
-#define IRQ_EB_GPIO0		(IRQ_EB_GIC_START + 6)		/* GPIO 0 */
-#define IRQ_EB_GPIO1		(IRQ_EB_GIC_START + 7)		/* GPIO 1 */
-#define IRQ_EB_GPIO2		(IRQ_EB_GIC_START + 8)		/* GPIO 2 */
-								/* 9 reserved */
-#define IRQ_EB_RTC		(IRQ_EB_GIC_START + 10)		/* Real Time Clock */
-#define IRQ_EB_SSP		(IRQ_EB_GIC_START + 11)		/* Synchronous Serial Port */
-#define IRQ_EB_UART0		(IRQ_EB_GIC_START + 12)		/* UART 0 on development chip */
-#define IRQ_EB_UART1		(IRQ_EB_GIC_START + 13)		/* UART 1 on development chip */
-#define IRQ_EB_UART2		(IRQ_EB_GIC_START + 14)		/* UART 2 on development chip */
-#define IRQ_EB_UART3		(IRQ_EB_GIC_START + 15)		/* UART 3 on development chip */
-#define IRQ_EB_SCI		(IRQ_EB_GIC_START + 16)		/* Smart Card Interface */
-#define IRQ_EB_MMCI0A		(IRQ_EB_GIC_START + 17)		/* Multimedia Card 0A */
-#define IRQ_EB_MMCI0B		(IRQ_EB_GIC_START + 18)		/* Multimedia Card 0B */
-#define IRQ_EB_AACI		(IRQ_EB_GIC_START + 19)		/* Audio Codec */
-#define IRQ_EB_KMI0		(IRQ_EB_GIC_START + 20)		/* Keyboard/Mouse port 0 */
-#define IRQ_EB_KMI1		(IRQ_EB_GIC_START + 21)		/* Keyboard/Mouse port 1 */
-#define IRQ_EB_CHARLCD		(IRQ_EB_GIC_START + 22)		/* Character LCD */
-#define IRQ_EB_CLCD		(IRQ_EB_GIC_START + 23)		/* CLCD controller */
-#define IRQ_EB_DMA		(IRQ_EB_GIC_START + 24)		/* DMA controller */
-#define IRQ_EB_PWRFAIL		(IRQ_EB_GIC_START + 25)		/* Power failure */
-#define IRQ_EB_PISMO		(IRQ_EB_GIC_START + 26)		/* PISMO interface */
-#define IRQ_EB_DoC		(IRQ_EB_GIC_START + 27)		/* Disk on Chip memory controller */
-#define IRQ_EB_ETH		(IRQ_EB_GIC_START + 28)		/* Ethernet controller */
-#define IRQ_EB_USB		(IRQ_EB_GIC_START + 29)		/* USB controller */
-#define IRQ_EB_TSPEN		(IRQ_EB_GIC_START + 30)		/* Touchscreen pen */
-#define IRQ_EB_TSKPAD		(IRQ_EB_GIC_START + 31)		/* Touchscreen keypad */
-
-/*
- * RealView EB + ARM11MPCore interrupt sources (primary GIC on the core tile)
- */
-#define IRQ_EB11MP_AACI		(IRQ_EB_GIC_START + 0)
-#define IRQ_EB11MP_TIMER0_1	(IRQ_EB_GIC_START + 1)
-#define IRQ_EB11MP_TIMER2_3	(IRQ_EB_GIC_START + 2)
-#define IRQ_EB11MP_USB		(IRQ_EB_GIC_START + 3)
-#define IRQ_EB11MP_UART0	(IRQ_EB_GIC_START + 4)
-#define IRQ_EB11MP_UART1	(IRQ_EB_GIC_START + 5)
-#define IRQ_EB11MP_RTC		(IRQ_EB_GIC_START + 6)
-#define IRQ_EB11MP_KMI0		(IRQ_EB_GIC_START + 7)
-#define IRQ_EB11MP_KMI1		(IRQ_EB_GIC_START + 8)
-#define IRQ_EB11MP_ETH		(IRQ_EB_GIC_START + 9)
-#define IRQ_EB11MP_EB_IRQ1	(IRQ_EB_GIC_START + 10)		/* main GIC */
-#define IRQ_EB11MP_EB_IRQ2	(IRQ_EB_GIC_START + 11)		/* tile GIC */
-#define IRQ_EB11MP_EB_FIQ1	(IRQ_EB_GIC_START + 12)		/* main GIC */
-#define IRQ_EB11MP_EB_FIQ2	(IRQ_EB_GIC_START + 13)		/* tile GIC */
-#define IRQ_EB11MP_MMCI0A	(IRQ_EB_GIC_START + 14)
-#define IRQ_EB11MP_MMCI0B	(IRQ_EB_GIC_START + 15)
-
-#define IRQ_EB11MP_PMU_CPU0	(IRQ_EB_GIC_START + 17)
-#define IRQ_EB11MP_PMU_CPU1	(IRQ_EB_GIC_START + 18)
-#define IRQ_EB11MP_PMU_CPU2	(IRQ_EB_GIC_START + 19)
-#define IRQ_EB11MP_PMU_CPU3	(IRQ_EB_GIC_START + 20)
-#define IRQ_EB11MP_PMU_SCU0	(IRQ_EB_GIC_START + 21)
-#define IRQ_EB11MP_PMU_SCU1	(IRQ_EB_GIC_START + 22)
-#define IRQ_EB11MP_PMU_SCU2	(IRQ_EB_GIC_START + 23)
-#define IRQ_EB11MP_PMU_SCU3	(IRQ_EB_GIC_START + 24)
-#define IRQ_EB11MP_PMU_SCU4	(IRQ_EB_GIC_START + 25)
-#define IRQ_EB11MP_PMU_SCU5	(IRQ_EB_GIC_START + 26)
-#define IRQ_EB11MP_PMU_SCU6	(IRQ_EB_GIC_START + 27)
-#define IRQ_EB11MP_PMU_SCU7	(IRQ_EB_GIC_START + 28)
-
-#define IRQ_EB11MP_L220_EVENT	(IRQ_EB_GIC_START + 29)
-#define IRQ_EB11MP_L220_SLAVE	(IRQ_EB_GIC_START + 30)
-#define IRQ_EB11MP_L220_DECODE	(IRQ_EB_GIC_START + 31)
-
-#define IRQ_EB11MP_UART2	-1
-#define IRQ_EB11MP_UART3	-1
-#define IRQ_EB11MP_CLCD		-1
-#define IRQ_EB11MP_DMA		-1
-#define IRQ_EB11MP_WDOG		-1
-#define IRQ_EB11MP_GPIO0	-1
-#define IRQ_EB11MP_GPIO1	-1
-#define IRQ_EB11MP_GPIO2	-1
-#define IRQ_EB11MP_SCI		-1
-#define IRQ_EB11MP_SSP		-1
-
-#define NR_GIC_EB11MP		2
-
-/*
- * Only define NR_IRQS if less than NR_IRQS_EB
- */
-#define NR_IRQS_EB		(IRQ_EB_GIC_START + 96)
-
-#if defined(CONFIG_MACH_REALVIEW_EB) \
-	&& (!defined(NR_IRQS) || (NR_IRQS < NR_IRQS_EB))
-#undef NR_IRQS
-#define NR_IRQS			NR_IRQS_EB
-#endif
-
-#if defined(CONFIG_REALVIEW_EB_ARM11MP) || defined(CONFIG_REALVIEW_EB_A9MP) \
-	&& (!defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_EB11MP))
-#undef MAX_GIC_NR
-#define MAX_GIC_NR		NR_GIC_EB11MP
-#endif
-
 /*
  * Core tile identification (REALVIEW_SYS_PROCID)
  */
diff --git a/arch/arm/mach-realview/include/mach/board-pb1176.h b/arch/arm/mach-realview/include/mach/board-pb1176.h
index e4d0e80..98f8e7e 100644
--- a/arch/arm/mach-realview/include/mach/board-pb1176.h
+++ b/arch/arm/mach-realview/include/mach/board-pb1176.h
@@ -73,82 +73,4 @@
 #define REALVIEW_PB1176_GIC_DIST_BASE		0x10041000 /* GIC distributor, on FPGA */
 #define REALVIEW_PB1176_L220_BASE		0x10110000 /* L220 registers */
 
-/*
- * Irqs
- */
-#define IRQ_DC1176_GIC_START			32
-#define IRQ_PB1176_GIC_START			64
-
-/*
- * ARM1176 DevChip interrupt sources (primary GIC)
- */
-#define IRQ_DC1176_WATCHDOG	(IRQ_DC1176_GIC_START + 0)	/* Watchdog timer */
-#define IRQ_DC1176_SOFTINT	(IRQ_DC1176_GIC_START + 1)	/* Software interrupt */
-#define IRQ_DC1176_COMMRx	(IRQ_DC1176_GIC_START + 2)	/* Debug Comm Rx interrupt */
-#define IRQ_DC1176_COMMTx	(IRQ_DC1176_GIC_START + 3)	/* Debug Comm Tx interrupt */
-#define IRQ_DC1176_TIMER0	(IRQ_DC1176_GIC_START + 8)	/* Timer 0 */
-#define IRQ_DC1176_TIMER1	(IRQ_DC1176_GIC_START + 9)	/* Timer 1 */
-#define IRQ_DC1176_TIMER2	(IRQ_DC1176_GIC_START + 10)	/* Timer 2 */
-#define IRQ_DC1176_APC		(IRQ_DC1176_GIC_START + 11)
-#define IRQ_DC1176_IEC		(IRQ_DC1176_GIC_START + 12)
-#define IRQ_DC1176_L2CC		(IRQ_DC1176_GIC_START + 13)
-#define IRQ_DC1176_RTC		(IRQ_DC1176_GIC_START + 14)
-#define IRQ_DC1176_CLCD		(IRQ_DC1176_GIC_START + 15)	/* CLCD controller */
-#define IRQ_DC1176_UART0	(IRQ_DC1176_GIC_START + 18)	/* UART 0 on development chip */
-#define IRQ_DC1176_UART1	(IRQ_DC1176_GIC_START + 19)	/* UART 1 on development chip */
-#define IRQ_DC1176_UART2	(IRQ_DC1176_GIC_START + 20)	/* UART 2 on development chip */
-#define IRQ_DC1176_UART3	(IRQ_DC1176_GIC_START + 21)	/* UART 3 on development chip */
-
-#define IRQ_DC1176_PB_IRQ2	(IRQ_DC1176_GIC_START + 30)	/* tile GIC */
-#define IRQ_DC1176_PB_IRQ1	(IRQ_DC1176_GIC_START + 31)	/* main GIC */
-
-/*
- * RealView PB1176 interrupt sources (secondary GIC)
- */
-#define IRQ_PB1176_MMCI0A	(IRQ_PB1176_GIC_START + 1)	/* Multimedia Card 0A */
-#define IRQ_PB1176_MMCI0B	(IRQ_PB1176_GIC_START + 2)	/* Multimedia Card 0A */
-#define IRQ_PB1176_KMI0		(IRQ_PB1176_GIC_START + 3)	/* Keyboard/Mouse port 0 */
-#define IRQ_PB1176_KMI1		(IRQ_PB1176_GIC_START + 4)	/* Keyboard/Mouse port 1 */
-#define IRQ_PB1176_SCI		(IRQ_PB1176_GIC_START + 5)
-#define IRQ_PB1176_UART4	(IRQ_PB1176_GIC_START + 6)	/* UART 4 on baseboard */
-#define IRQ_PB1176_CHARLCD	(IRQ_PB1176_GIC_START + 7)	/* Character LCD */
-#define IRQ_PB1176_GPIO1	(IRQ_PB1176_GIC_START + 8)
-#define IRQ_PB1176_GPIO2	(IRQ_PB1176_GIC_START + 9)
-#define IRQ_PB1176_ETH		(IRQ_PB1176_GIC_START + 10)	/* Ethernet controller */
-#define IRQ_PB1176_USB		(IRQ_PB1176_GIC_START + 11)	/* USB controller */
-
-#define IRQ_PB1176_PISMO	(IRQ_PB1176_GIC_START + 16)
-
-#define IRQ_PB1176_AACI		(IRQ_PB1176_GIC_START + 19)	/* Audio Codec */
-
-#define IRQ_PB1176_TIMER0_1	(IRQ_PB1176_GIC_START + 22)
-#define IRQ_PB1176_TIMER2_3	(IRQ_PB1176_GIC_START + 23)
-#define IRQ_PB1176_DMAC		(IRQ_PB1176_GIC_START + 24)	/* DMA controller */
-#define IRQ_PB1176_RTC		(IRQ_PB1176_GIC_START + 25)	/* Real Time Clock */
-
-#define IRQ_PB1176_GPIO0	-1
-#define IRQ_PB1176_SSP		-1
-#define IRQ_PB1176_SCTL		-1
-
-#define NR_GIC_PB1176		2
-
-/*
- * Only define NR_IRQS if less than NR_IRQS_PB1176
- */
-#define NR_IRQS_PB1176		(IRQ_DC1176_GIC_START + 96)
-
-#if defined(CONFIG_MACH_REALVIEW_PB1176)
-
-#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PB1176)
-#undef NR_IRQS
-#define NR_IRQS			NR_IRQS_PB1176
-#endif
-
-#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PB1176)
-#undef MAX_GIC_NR
-#define MAX_GIC_NR		NR_GIC_PB1176
-#endif
-
-#endif	/* CONFIG_MACH_REALVIEW_PB1176 */
-
 #endif	/* __ASM_ARCH_BOARD_PB1176_H */
diff --git a/arch/arm/mach-realview/include/mach/board-pb11mp.h b/arch/arm/mach-realview/include/mach/board-pb11mp.h
index 53ea0e7..f0d68e0 100644
--- a/arch/arm/mach-realview/include/mach/board-pb11mp.h
+++ b/arch/arm/mach-realview/include/mach/board-pb11mp.h
@@ -81,105 +81,4 @@
 #define REALVIEW_TC11MP_GIC_DIST_BASE		0x1F001000	/* Test chip interrupt controller distributor */
 #define REALVIEW_TC11MP_L220_BASE		0x1F002000	/* L220 registers */
 
-/*
- * Irqs
- */
-#define IRQ_TC11MP_GIC_START			32
-#define IRQ_PB11MP_GIC_START			64
-
-/*
- * ARM11MPCore test chip interrupt sources (primary GIC on the test chip)
- */
-#define IRQ_TC11MP_AACI		(IRQ_TC11MP_GIC_START + 0)
-#define IRQ_TC11MP_TIMER0_1	(IRQ_TC11MP_GIC_START + 1)
-#define IRQ_TC11MP_TIMER2_3	(IRQ_TC11MP_GIC_START + 2)
-#define IRQ_TC11MP_USB		(IRQ_TC11MP_GIC_START + 3)
-#define IRQ_TC11MP_UART0	(IRQ_TC11MP_GIC_START + 4)
-#define IRQ_TC11MP_UART1	(IRQ_TC11MP_GIC_START + 5)
-#define IRQ_TC11MP_RTC		(IRQ_TC11MP_GIC_START + 6)
-#define IRQ_TC11MP_KMI0		(IRQ_TC11MP_GIC_START + 7)
-#define IRQ_TC11MP_KMI1		(IRQ_TC11MP_GIC_START + 8)
-#define IRQ_TC11MP_ETH		(IRQ_TC11MP_GIC_START + 9)
-#define IRQ_TC11MP_PB_IRQ1	(IRQ_TC11MP_GIC_START + 10)		/* main GIC */
-#define IRQ_TC11MP_PB_IRQ2	(IRQ_TC11MP_GIC_START + 11)		/* tile GIC */
-#define IRQ_TC11MP_PB_FIQ1	(IRQ_TC11MP_GIC_START + 12)		/* main GIC */
-#define IRQ_TC11MP_PB_FIQ2	(IRQ_TC11MP_GIC_START + 13)		/* tile GIC */
-#define IRQ_TC11MP_MMCI0A	(IRQ_TC11MP_GIC_START + 14)
-#define IRQ_TC11MP_MMCI0B	(IRQ_TC11MP_GIC_START + 15)
-
-#define IRQ_TC11MP_PMU_CPU0	(IRQ_TC11MP_GIC_START + 17)
-#define IRQ_TC11MP_PMU_CPU1	(IRQ_TC11MP_GIC_START + 18)
-#define IRQ_TC11MP_PMU_CPU2	(IRQ_TC11MP_GIC_START + 19)
-#define IRQ_TC11MP_PMU_CPU3	(IRQ_TC11MP_GIC_START + 20)
-#define IRQ_TC11MP_PMU_SCU0	(IRQ_TC11MP_GIC_START + 21)
-#define IRQ_TC11MP_PMU_SCU1	(IRQ_TC11MP_GIC_START + 22)
-#define IRQ_TC11MP_PMU_SCU2	(IRQ_TC11MP_GIC_START + 23)
-#define IRQ_TC11MP_PMU_SCU3	(IRQ_TC11MP_GIC_START + 24)
-#define IRQ_TC11MP_PMU_SCU4	(IRQ_TC11MP_GIC_START + 25)
-#define IRQ_TC11MP_PMU_SCU5	(IRQ_TC11MP_GIC_START + 26)
-#define IRQ_TC11MP_PMU_SCU6	(IRQ_TC11MP_GIC_START + 27)
-#define IRQ_TC11MP_PMU_SCU7	(IRQ_TC11MP_GIC_START + 28)
-
-#define IRQ_TC11MP_L220_EVENT	(IRQ_TC11MP_GIC_START + 29)
-#define IRQ_TC11MP_L220_SLAVE	(IRQ_TC11MP_GIC_START + 30)
-#define IRQ_TC11MP_L220_DECODE	(IRQ_TC11MP_GIC_START + 31)
-
-/*
- * RealView PB11MPCore GIC interrupt sources (secondary GIC on the board)
- */
-#define IRQ_PB11MP_WATCHDOG	(IRQ_PB11MP_GIC_START + 0)	/* Watchdog timer */
-#define IRQ_PB11MP_SOFT		(IRQ_PB11MP_GIC_START + 1)	/* Software interrupt */
-#define IRQ_PB11MP_COMMRx	(IRQ_PB11MP_GIC_START + 2)	/* Debug Comm Rx interrupt */
-#define IRQ_PB11MP_COMMTx	(IRQ_PB11MP_GIC_START + 3)	/* Debug Comm Tx interrupt */
-#define IRQ_PB11MP_GPIO0	(IRQ_PB11MP_GIC_START + 6)	/* GPIO 0 */
-#define IRQ_PB11MP_GPIO1	(IRQ_PB11MP_GIC_START + 7)	/* GPIO 1 */
-#define IRQ_PB11MP_GPIO2	(IRQ_PB11MP_GIC_START + 8)	/* GPIO 2 */
-								/* 9 reserved */
-#define IRQ_PB11MP_RTC_GIC1	(IRQ_PB11MP_GIC_START + 10)	/* Real Time Clock */
-#define IRQ_PB11MP_SSP		(IRQ_PB11MP_GIC_START + 11)	/* Synchronous Serial Port */
-#define IRQ_PB11MP_UART0_GIC1	(IRQ_PB11MP_GIC_START + 12)	/* UART 0 on development chip */
-#define IRQ_PB11MP_UART1_GIC1	(IRQ_PB11MP_GIC_START + 13)	/* UART 1 on development chip */
-#define IRQ_PB11MP_UART2	(IRQ_PB11MP_GIC_START + 14)	/* UART 2 on development chip */
-#define IRQ_PB11MP_UART3	(IRQ_PB11MP_GIC_START + 15)	/* UART 3 on development chip */
-#define IRQ_PB11MP_SCI		(IRQ_PB11MP_GIC_START + 16)	/* Smart Card Interface */
-#define IRQ_PB11MP_MMCI0A_GIC1	(IRQ_PB11MP_GIC_START + 17)	/* Multimedia Card 0A */
-#define IRQ_PB11MP_MMCI0B_GIC1	(IRQ_PB11MP_GIC_START + 18)	/* Multimedia Card 0B */
-#define IRQ_PB11MP_AACI_GIC1	(IRQ_PB11MP_GIC_START + 19)	/* Audio Codec */
-#define IRQ_PB11MP_KMI0_GIC1	(IRQ_PB11MP_GIC_START + 20)	/* Keyboard/Mouse port 0 */
-#define IRQ_PB11MP_KMI1_GIC1	(IRQ_PB11MP_GIC_START + 21)	/* Keyboard/Mouse port 1 */
-#define IRQ_PB11MP_CHARLCD	(IRQ_PB11MP_GIC_START + 22)	/* Character LCD */
-#define IRQ_PB11MP_CLCD		(IRQ_PB11MP_GIC_START + 23)	/* CLCD controller */
-#define IRQ_PB11MP_DMAC		(IRQ_PB11MP_GIC_START + 24)	/* DMA controller */
-#define IRQ_PB11MP_PWRFAIL	(IRQ_PB11MP_GIC_START + 25)	/* Power failure */
-#define IRQ_PB11MP_PISMO	(IRQ_PB11MP_GIC_START + 26)	/* PISMO interface */
-#define IRQ_PB11MP_DoC		(IRQ_PB11MP_GIC_START + 27)	/* Disk on Chip memory controller */
-#define IRQ_PB11MP_ETH_GIC1	(IRQ_PB11MP_GIC_START + 28)	/* Ethernet controller */
-#define IRQ_PB11MP_USB_GIC1	(IRQ_PB11MP_GIC_START + 29)	/* USB controller */
-#define IRQ_PB11MP_TSPEN	(IRQ_PB11MP_GIC_START + 30)	/* Touchscreen pen */
-#define IRQ_PB11MP_TSKPAD	(IRQ_PB11MP_GIC_START + 31)	/* Touchscreen keypad */
-
-#define IRQ_PB11MP_SMC		-1
-#define IRQ_PB11MP_SCTL		-1
-
-#define NR_GIC_PB11MP		2
-
-/*
- * Only define NR_IRQS if less than NR_IRQS_PB11MP
- */
-#define NR_IRQS_PB11MP		(IRQ_TC11MP_GIC_START + 96)
-
-#if defined(CONFIG_MACH_REALVIEW_PB11MP)
-
-#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PB11MP)
-#undef NR_IRQS
-#define NR_IRQS			NR_IRQS_PB11MP
-#endif
-
-#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PB11MP)
-#undef MAX_GIC_NR
-#define MAX_GIC_NR		NR_GIC_PB11MP
-#endif
-
-#endif	/* CONFIG_MACH_REALVIEW_PB11MP */
-
 #endif	/* __ASM_ARCH_BOARD_PB11MP_H */
diff --git a/arch/arm/mach-realview/include/mach/board-pba8.h b/arch/arm/mach-realview/include/mach/board-pba8.h
index 307f97b..4dfc67a 100644
--- a/arch/arm/mach-realview/include/mach/board-pba8.h
+++ b/arch/arm/mach-realview/include/mach/board-pba8.h
@@ -70,81 +70,4 @@
 #define REALVIEW_PBA8_PCI_IO_SIZE		0x1000		/* 4 Kb */
 #define REALVIEW_PBA8_PCI_MEM_SIZE		0x20000000	/* 512 MB */
 
-/*
- * Irqs
- */
-#define IRQ_PBA8_GIC_START			32
-
-/* L220
-#define IRQ_PBA8_L220_EVENT	(IRQ_PBA8_GIC_START + 29)
-#define IRQ_PBA8_L220_SLAVE	(IRQ_PBA8_GIC_START + 30)
-#define IRQ_PBA8_L220_DECODE	(IRQ_PBA8_GIC_START + 31)
-*/
-
-/*
- * PB-A8 on-board gic irq sources
- */
-#define IRQ_PBA8_WATCHDOG	(IRQ_PBA8_GIC_START + 0)	/* Watchdog timer */
-#define IRQ_PBA8_SOFT		(IRQ_PBA8_GIC_START + 1)	/* Software interrupt */
-#define IRQ_PBA8_COMMRx		(IRQ_PBA8_GIC_START + 2)	/* Debug Comm Rx interrupt */
-#define IRQ_PBA8_COMMTx		(IRQ_PBA8_GIC_START + 3)	/* Debug Comm Tx interrupt */
-#define IRQ_PBA8_TIMER0_1	(IRQ_PBA8_GIC_START + 4)	/* Timer 0/1 (default timer) */
-#define IRQ_PBA8_TIMER2_3	(IRQ_PBA8_GIC_START + 5)	/* Timer 2/3 */
-#define IRQ_PBA8_GPIO0		(IRQ_PBA8_GIC_START + 6)	/* GPIO 0 */
-#define IRQ_PBA8_GPIO1		(IRQ_PBA8_GIC_START + 7)	/* GPIO 1 */
-#define IRQ_PBA8_GPIO2		(IRQ_PBA8_GIC_START + 8)	/* GPIO 2 */
-								/* 9 reserved */
-#define IRQ_PBA8_RTC		(IRQ_PBA8_GIC_START + 10)	/* Real Time Clock */
-#define IRQ_PBA8_SSP		(IRQ_PBA8_GIC_START + 11)	/* Synchronous Serial Port */
-#define IRQ_PBA8_UART0		(IRQ_PBA8_GIC_START + 12)	/* UART 0 on development chip */
-#define IRQ_PBA8_UART1		(IRQ_PBA8_GIC_START + 13)	/* UART 1 on development chip */
-#define IRQ_PBA8_UART2		(IRQ_PBA8_GIC_START + 14)	/* UART 2 on development chip */
-#define IRQ_PBA8_UART3		(IRQ_PBA8_GIC_START + 15)	/* UART 3 on development chip */
-#define IRQ_PBA8_SCI		(IRQ_PBA8_GIC_START + 16)	/* Smart Card Interface */
-#define IRQ_PBA8_MMCI0A		(IRQ_PBA8_GIC_START + 17)	/* Multimedia Card 0A */
-#define IRQ_PBA8_MMCI0B		(IRQ_PBA8_GIC_START + 18)	/* Multimedia Card 0B */
-#define IRQ_PBA8_AACI		(IRQ_PBA8_GIC_START + 19)	/* Audio Codec */
-#define IRQ_PBA8_KMI0		(IRQ_PBA8_GIC_START + 20)	/* Keyboard/Mouse port 0 */
-#define IRQ_PBA8_KMI1		(IRQ_PBA8_GIC_START + 21)	/* Keyboard/Mouse port 1 */
-#define IRQ_PBA8_CHARLCD	(IRQ_PBA8_GIC_START + 22)	/* Character LCD */
-#define IRQ_PBA8_CLCD		(IRQ_PBA8_GIC_START + 23)	/* CLCD controller */
-#define IRQ_PBA8_DMAC		(IRQ_PBA8_GIC_START + 24)	/* DMA controller */
-#define IRQ_PBA8_PWRFAIL	(IRQ_PBA8_GIC_START + 25)	/* Power failure */
-#define IRQ_PBA8_PISMO		(IRQ_PBA8_GIC_START + 26)	/* PISMO interface */
-#define IRQ_PBA8_DoC		(IRQ_PBA8_GIC_START + 27)	/* Disk on Chip memory controller */
-#define IRQ_PBA8_ETH		(IRQ_PBA8_GIC_START + 28)	/* Ethernet controller */
-#define IRQ_PBA8_USB		(IRQ_PBA8_GIC_START + 29)	/* USB controller */
-#define IRQ_PBA8_TSPEN		(IRQ_PBA8_GIC_START + 30)	/* Touchscreen pen */
-#define IRQ_PBA8_TSKPAD		(IRQ_PBA8_GIC_START + 31)	/* Touchscreen keypad */
-
-/* ... */
-#define IRQ_PBA8_PCI0		(IRQ_PBA8_GIC_START + 50)
-#define IRQ_PBA8_PCI1		(IRQ_PBA8_GIC_START + 51)
-#define IRQ_PBA8_PCI2		(IRQ_PBA8_GIC_START + 52)
-#define IRQ_PBA8_PCI3		(IRQ_PBA8_GIC_START + 53)
-
-#define IRQ_PBA8_SMC		-1
-#define IRQ_PBA8_SCTL		-1
-
-#define NR_GIC_PBA8		1
-
-/*
- * Only define NR_IRQS if less than NR_IRQS_PBA8
- */
-#define NR_IRQS_PBA8		(IRQ_PBA8_GIC_START + 64)
-
-#if defined(CONFIG_MACH_REALVIEW_PBA8)
-
-#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PBA8)
-#undef NR_IRQS
-#define NR_IRQS			NR_IRQS_PBA8
-#endif
-
-#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PBA8)
-#undef MAX_GIC_NR
-#define MAX_GIC_NR		NR_GIC_PBA8
-#endif
-
-#endif	/* CONFIG_MACH_REALVIEW_PBA8 */
-
 #endif	/* __ASM_ARCH_BOARD_PBA8_H */
diff --git a/arch/arm/mach-realview/include/mach/board-pbx.h b/arch/arm/mach-realview/include/mach/board-pbx.h
index c265093..848bfff 100644
--- a/arch/arm/mach-realview/include/mach/board-pbx.h
+++ b/arch/arm/mach-realview/include/mach/board-pbx.h
@@ -81,101 +81,6 @@
 #define REALVIEW_PBX_PCI_MEM_SIZE		0x20000000	/* 512 MB */
 
 /*
- * Irqs
- */
-#define IRQ_PBX_GIC_START			32
-
-/* L220
-#define IRQ_PBX_L220_EVENT	(IRQ_PBX_GIC_START + 29)
-#define IRQ_PBX_L220_SLAVE	(IRQ_PBX_GIC_START + 30)
-#define IRQ_PBX_L220_DECODE	(IRQ_PBX_GIC_START + 31)
-*/
-
-/*
- * PB-X on-board gic irq sources
- */
-#define IRQ_PBX_WATCHDOG	(IRQ_PBX_GIC_START + 0)	/* Watchdog timer */
-#define IRQ_PBX_SOFT		(IRQ_PBX_GIC_START + 1)	/* Software interrupt */
-#define IRQ_PBX_COMMRx		(IRQ_PBX_GIC_START + 2)	/* Debug Comm Rx interrupt */
-#define IRQ_PBX_COMMTx		(IRQ_PBX_GIC_START + 3)	/* Debug Comm Tx interrupt */
-#define IRQ_PBX_TIMER0_1	(IRQ_PBX_GIC_START + 4)	/* Timer 0/1 (default timer) */
-#define IRQ_PBX_TIMER2_3	(IRQ_PBX_GIC_START + 5)	/* Timer 2/3 */
-#define IRQ_PBX_GPIO0		(IRQ_PBX_GIC_START + 6)	/* GPIO 0 */
-#define IRQ_PBX_GPIO1		(IRQ_PBX_GIC_START + 7)	/* GPIO 1 */
-#define IRQ_PBX_GPIO2		(IRQ_PBX_GIC_START + 8)	/* GPIO 2 */
-								/* 9 reserved */
-#define IRQ_PBX_RTC		(IRQ_PBX_GIC_START + 10)	/* Real Time Clock */
-#define IRQ_PBX_SSP		(IRQ_PBX_GIC_START + 11)	/* Synchronous Serial Port */
-#define IRQ_PBX_UART0		(IRQ_PBX_GIC_START + 12)	/* UART 0 on development chip */
-#define IRQ_PBX_UART1		(IRQ_PBX_GIC_START + 13)	/* UART 1 on development chip */
-#define IRQ_PBX_UART2		(IRQ_PBX_GIC_START + 14)	/* UART 2 on development chip */
-#define IRQ_PBX_UART3		(IRQ_PBX_GIC_START + 15)	/* UART 3 on development chip */
-#define IRQ_PBX_SCI		(IRQ_PBX_GIC_START + 16)	/* Smart Card Interface */
-#define IRQ_PBX_MMCI0A		(IRQ_PBX_GIC_START + 17)	/* Multimedia Card 0A */
-#define IRQ_PBX_MMCI0B		(IRQ_PBX_GIC_START + 18)	/* Multimedia Card 0B */
-#define IRQ_PBX_AACI		(IRQ_PBX_GIC_START + 19)	/* Audio Codec */
-#define IRQ_PBX_KMI0		(IRQ_PBX_GIC_START + 20)	/* Keyboard/Mouse port 0 */
-#define IRQ_PBX_KMI1		(IRQ_PBX_GIC_START + 21)	/* Keyboard/Mouse port 1 */
-#define IRQ_PBX_CHARLCD		(IRQ_PBX_GIC_START + 22)	/* Character LCD */
-#define IRQ_PBX_CLCD		(IRQ_PBX_GIC_START + 23)	/* CLCD controller */
-#define IRQ_PBX_DMAC		(IRQ_PBX_GIC_START + 24)	/* DMA controller */
-#define IRQ_PBX_PWRFAIL		(IRQ_PBX_GIC_START + 25)	/* Power failure */
-#define IRQ_PBX_PISMO		(IRQ_PBX_GIC_START + 26)	/* PISMO interface */
-#define IRQ_PBX_DoC		(IRQ_PBX_GIC_START + 27)	/* Disk on Chip memory controller */
-#define IRQ_PBX_ETH		(IRQ_PBX_GIC_START + 28)	/* Ethernet controller */
-#define IRQ_PBX_USB		(IRQ_PBX_GIC_START + 29)	/* USB controller */
-#define IRQ_PBX_TSPEN		(IRQ_PBX_GIC_START + 30)	/* Touchscreen pen */
-#define IRQ_PBX_TSKPAD		(IRQ_PBX_GIC_START + 31)	/* Touchscreen keypad */
-
-#define IRQ_PBX_PMU_SCU0        (IRQ_PBX_GIC_START + 32)        /* SCU PMU Interrupts (11mp) */
-#define IRQ_PBX_PMU_SCU1        (IRQ_PBX_GIC_START + 33)
-#define IRQ_PBX_PMU_SCU2        (IRQ_PBX_GIC_START + 34)
-#define IRQ_PBX_PMU_SCU3        (IRQ_PBX_GIC_START + 35)
-#define IRQ_PBX_PMU_SCU4        (IRQ_PBX_GIC_START + 36)
-#define IRQ_PBX_PMU_SCU5        (IRQ_PBX_GIC_START + 37)
-#define IRQ_PBX_PMU_SCU6        (IRQ_PBX_GIC_START + 38)
-#define IRQ_PBX_PMU_SCU7        (IRQ_PBX_GIC_START + 39)
-
-#define IRQ_PBX_WATCHDOG1       (IRQ_PBX_GIC_START + 40)        /* Watchdog1 timer */
-#define IRQ_PBX_TIMER4_5        (IRQ_PBX_GIC_START + 41)        /* Timer 0/1 (default timer) */
-#define IRQ_PBX_TIMER6_7        (IRQ_PBX_GIC_START + 42)        /* Timer 2/3 */
-/* ... */
-#define IRQ_PBX_PMU_CPU3        (IRQ_PBX_GIC_START + 44)        /* CPU PMU Interrupts */
-#define IRQ_PBX_PMU_CPU2        (IRQ_PBX_GIC_START + 45)
-#define IRQ_PBX_PMU_CPU1        (IRQ_PBX_GIC_START + 46)
-#define IRQ_PBX_PMU_CPU0        (IRQ_PBX_GIC_START + 47)
-
-/* ... */
-#define IRQ_PBX_PCI0		(IRQ_PBX_GIC_START + 50)
-#define IRQ_PBX_PCI1		(IRQ_PBX_GIC_START + 51)
-#define IRQ_PBX_PCI2		(IRQ_PBX_GIC_START + 52)
-#define IRQ_PBX_PCI3		(IRQ_PBX_GIC_START + 53)
-
-#define IRQ_PBX_SMC		-1
-#define IRQ_PBX_SCTL		-1
-
-#define NR_GIC_PBX		1
-
-/*
- * Only define NR_IRQS if less than NR_IRQS_PBX
- */
-#define NR_IRQS_PBX		(IRQ_PBX_GIC_START + 96)
-
-#if defined(CONFIG_MACH_REALVIEW_PBX)
-
-#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PBX)
-#undef NR_IRQS
-#define NR_IRQS			NR_IRQS_PBX
-#endif
-
-#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PBX)
-#undef MAX_GIC_NR
-#define MAX_GIC_NR		NR_GIC_PBX
-#endif
-
-#endif	/* CONFIG_MACH_REALVIEW_PBX */
-
-/*
  * Core tile identification (REALVIEW_SYS_PROCID)
  */
 #define REALVIEW_PBX_PROC_MASK          0xFF000000
diff --git a/arch/arm/mach-realview/include/mach/irqs-eb.h b/arch/arm/mach-realview/include/mach/irqs-eb.h
new file mode 100644
index 0000000..204d537
--- /dev/null
+++ b/arch/arm/mach-realview/include/mach/irqs-eb.h
@@ -0,0 +1,129 @@
+/*
+ * arch/arm/mach-realview/include/mach/irqs-eb.h
+ *
+ * Copyright (C) 2007 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __MACH_IRQS_EB_H
+#define __MACH_IRQS_EB_H
+
+#define IRQ_EB_GIC_START	32
+
+/*
+ * RealView EB interrupt sources
+ */
+#define IRQ_EB_WDOG		(IRQ_EB_GIC_START + 0)		/* Watchdog timer */
+#define IRQ_EB_SOFT		(IRQ_EB_GIC_START + 1)		/* Software interrupt */
+#define IRQ_EB_COMMRx		(IRQ_EB_GIC_START + 2)		/* Debug Comm Rx interrupt */
+#define IRQ_EB_COMMTx		(IRQ_EB_GIC_START + 3)		/* Debug Comm Tx interrupt */
+#define IRQ_EB_TIMER0_1		(IRQ_EB_GIC_START + 4)		/* Timer 0 and 1 */
+#define IRQ_EB_TIMER2_3		(IRQ_EB_GIC_START + 5)		/* Timer 2 and 3 */
+#define IRQ_EB_GPIO0		(IRQ_EB_GIC_START + 6)		/* GPIO 0 */
+#define IRQ_EB_GPIO1		(IRQ_EB_GIC_START + 7)		/* GPIO 1 */
+#define IRQ_EB_GPIO2		(IRQ_EB_GIC_START + 8)		/* GPIO 2 */
+								/* 9 reserved */
+#define IRQ_EB_RTC		(IRQ_EB_GIC_START + 10)		/* Real Time Clock */
+#define IRQ_EB_SSP		(IRQ_EB_GIC_START + 11)		/* Synchronous Serial Port */
+#define IRQ_EB_UART0		(IRQ_EB_GIC_START + 12)		/* UART 0 on development chip */
+#define IRQ_EB_UART1		(IRQ_EB_GIC_START + 13)		/* UART 1 on development chip */
+#define IRQ_EB_UART2		(IRQ_EB_GIC_START + 14)		/* UART 2 on development chip */
+#define IRQ_EB_UART3		(IRQ_EB_GIC_START + 15)		/* UART 3 on development chip */
+#define IRQ_EB_SCI		(IRQ_EB_GIC_START + 16)		/* Smart Card Interface */
+#define IRQ_EB_MMCI0A		(IRQ_EB_GIC_START + 17)		/* Multimedia Card 0A */
+#define IRQ_EB_MMCI0B		(IRQ_EB_GIC_START + 18)		/* Multimedia Card 0B */
+#define IRQ_EB_AACI		(IRQ_EB_GIC_START + 19)		/* Audio Codec */
+#define IRQ_EB_KMI0		(IRQ_EB_GIC_START + 20)		/* Keyboard/Mouse port 0 */
+#define IRQ_EB_KMI1		(IRQ_EB_GIC_START + 21)		/* Keyboard/Mouse port 1 */
+#define IRQ_EB_CHARLCD		(IRQ_EB_GIC_START + 22)		/* Character LCD */
+#define IRQ_EB_CLCD		(IRQ_EB_GIC_START + 23)		/* CLCD controller */
+#define IRQ_EB_DMA		(IRQ_EB_GIC_START + 24)		/* DMA controller */
+#define IRQ_EB_PWRFAIL		(IRQ_EB_GIC_START + 25)		/* Power failure */
+#define IRQ_EB_PISMO		(IRQ_EB_GIC_START + 26)		/* PISMO interface */
+#define IRQ_EB_DoC		(IRQ_EB_GIC_START + 27)		/* Disk on Chip memory controller */
+#define IRQ_EB_ETH		(IRQ_EB_GIC_START + 28)		/* Ethernet controller */
+#define IRQ_EB_USB		(IRQ_EB_GIC_START + 29)		/* USB controller */
+#define IRQ_EB_TSPEN		(IRQ_EB_GIC_START + 30)		/* Touchscreen pen */
+#define IRQ_EB_TSKPAD		(IRQ_EB_GIC_START + 31)		/* Touchscreen keypad */
+
+/*
+ * RealView EB + ARM11MPCore interrupt sources (primary GIC on the core tile)
+ */
+#define IRQ_EB11MP_AACI		(IRQ_EB_GIC_START + 0)
+#define IRQ_EB11MP_TIMER0_1	(IRQ_EB_GIC_START + 1)
+#define IRQ_EB11MP_TIMER2_3	(IRQ_EB_GIC_START + 2)
+#define IRQ_EB11MP_USB		(IRQ_EB_GIC_START + 3)
+#define IRQ_EB11MP_UART0	(IRQ_EB_GIC_START + 4)
+#define IRQ_EB11MP_UART1	(IRQ_EB_GIC_START + 5)
+#define IRQ_EB11MP_RTC		(IRQ_EB_GIC_START + 6)
+#define IRQ_EB11MP_KMI0		(IRQ_EB_GIC_START + 7)
+#define IRQ_EB11MP_KMI1		(IRQ_EB_GIC_START + 8)
+#define IRQ_EB11MP_ETH		(IRQ_EB_GIC_START + 9)
+#define IRQ_EB11MP_EB_IRQ1	(IRQ_EB_GIC_START + 10)		/* main GIC */
+#define IRQ_EB11MP_EB_IRQ2	(IRQ_EB_GIC_START + 11)		/* tile GIC */
+#define IRQ_EB11MP_EB_FIQ1	(IRQ_EB_GIC_START + 12)		/* main GIC */
+#define IRQ_EB11MP_EB_FIQ2	(IRQ_EB_GIC_START + 13)		/* tile GIC */
+#define IRQ_EB11MP_MMCI0A	(IRQ_EB_GIC_START + 14)
+#define IRQ_EB11MP_MMCI0B	(IRQ_EB_GIC_START + 15)
+
+#define IRQ_EB11MP_PMU_CPU0	(IRQ_EB_GIC_START + 17)
+#define IRQ_EB11MP_PMU_CPU1	(IRQ_EB_GIC_START + 18)
+#define IRQ_EB11MP_PMU_CPU2	(IRQ_EB_GIC_START + 19)
+#define IRQ_EB11MP_PMU_CPU3	(IRQ_EB_GIC_START + 20)
+#define IRQ_EB11MP_PMU_SCU0	(IRQ_EB_GIC_START + 21)
+#define IRQ_EB11MP_PMU_SCU1	(IRQ_EB_GIC_START + 22)
+#define IRQ_EB11MP_PMU_SCU2	(IRQ_EB_GIC_START + 23)
+#define IRQ_EB11MP_PMU_SCU3	(IRQ_EB_GIC_START + 24)
+#define IRQ_EB11MP_PMU_SCU4	(IRQ_EB_GIC_START + 25)
+#define IRQ_EB11MP_PMU_SCU5	(IRQ_EB_GIC_START + 26)
+#define IRQ_EB11MP_PMU_SCU6	(IRQ_EB_GIC_START + 27)
+#define IRQ_EB11MP_PMU_SCU7	(IRQ_EB_GIC_START + 28)
+
+#define IRQ_EB11MP_L220_EVENT	(IRQ_EB_GIC_START + 29)
+#define IRQ_EB11MP_L220_SLAVE	(IRQ_EB_GIC_START + 30)
+#define IRQ_EB11MP_L220_DECODE	(IRQ_EB_GIC_START + 31)
+
+#define IRQ_EB11MP_UART2	-1
+#define IRQ_EB11MP_UART3	-1
+#define IRQ_EB11MP_CLCD		-1
+#define IRQ_EB11MP_DMA		-1
+#define IRQ_EB11MP_WDOG		-1
+#define IRQ_EB11MP_GPIO0	-1
+#define IRQ_EB11MP_GPIO1	-1
+#define IRQ_EB11MP_GPIO2	-1
+#define IRQ_EB11MP_SCI		-1
+#define IRQ_EB11MP_SSP		-1
+
+#define NR_GIC_EB11MP		2
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_EB
+ */
+#define NR_IRQS_EB		(IRQ_EB_GIC_START + 96)
+
+#if defined(CONFIG_MACH_REALVIEW_EB) \
+	&& (!defined(NR_IRQS) || (NR_IRQS < NR_IRQS_EB))
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_EB
+#endif
+
+#if defined(CONFIG_REALVIEW_EB_ARM11MP) || defined(CONFIG_REALVIEW_EB_A9MP) \
+	&& (!defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_EB11MP))
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_EB11MP
+#endif
+
+#endif	/* __MACH_IRQS_EB_H */
diff --git a/arch/arm/mach-realview/include/mach/irqs-pb1176.h b/arch/arm/mach-realview/include/mach/irqs-pb1176.h
new file mode 100644
index 0000000..2410d4f
--- /dev/null
+++ b/arch/arm/mach-realview/include/mach/irqs-pb1176.h
@@ -0,0 +1,99 @@
+/*
+ * arch/arm/mach-realview/include/mach/irqs-pb1176.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __MACH_IRQS_PB1176_H
+#define __MACH_IRQS_PB1176_H
+
+#define IRQ_DC1176_GIC_START			32
+#define IRQ_PB1176_GIC_START			64
+
+/*
+ * ARM1176 DevChip interrupt sources (primary GIC)
+ */
+#define IRQ_DC1176_WATCHDOG	(IRQ_DC1176_GIC_START + 0)	/* Watchdog timer */
+#define IRQ_DC1176_SOFTINT	(IRQ_DC1176_GIC_START + 1)	/* Software interrupt */
+#define IRQ_DC1176_COMMRx	(IRQ_DC1176_GIC_START + 2)	/* Debug Comm Rx interrupt */
+#define IRQ_DC1176_COMMTx	(IRQ_DC1176_GIC_START + 3)	/* Debug Comm Tx interrupt */
+#define IRQ_DC1176_TIMER0	(IRQ_DC1176_GIC_START + 8)	/* Timer 0 */
+#define IRQ_DC1176_TIMER1	(IRQ_DC1176_GIC_START + 9)	/* Timer 1 */
+#define IRQ_DC1176_TIMER2	(IRQ_DC1176_GIC_START + 10)	/* Timer 2 */
+#define IRQ_DC1176_APC		(IRQ_DC1176_GIC_START + 11)
+#define IRQ_DC1176_IEC		(IRQ_DC1176_GIC_START + 12)
+#define IRQ_DC1176_L2CC		(IRQ_DC1176_GIC_START + 13)
+#define IRQ_DC1176_RTC		(IRQ_DC1176_GIC_START + 14)
+#define IRQ_DC1176_CLCD		(IRQ_DC1176_GIC_START + 15)	/* CLCD controller */
+#define IRQ_DC1176_UART0	(IRQ_DC1176_GIC_START + 18)	/* UART 0 on development chip */
+#define IRQ_DC1176_UART1	(IRQ_DC1176_GIC_START + 19)	/* UART 1 on development chip */
+#define IRQ_DC1176_UART2	(IRQ_DC1176_GIC_START + 20)	/* UART 2 on development chip */
+#define IRQ_DC1176_UART3	(IRQ_DC1176_GIC_START + 21)	/* UART 3 on development chip */
+
+#define IRQ_DC1176_PB_IRQ2	(IRQ_DC1176_GIC_START + 30)	/* tile GIC */
+#define IRQ_DC1176_PB_IRQ1	(IRQ_DC1176_GIC_START + 31)	/* main GIC */
+
+/*
+ * RealView PB1176 interrupt sources (secondary GIC)
+ */
+#define IRQ_PB1176_MMCI0A	(IRQ_PB1176_GIC_START + 1)	/* Multimedia Card 0A */
+#define IRQ_PB1176_MMCI0B	(IRQ_PB1176_GIC_START + 2)	/* Multimedia Card 0A */
+#define IRQ_PB1176_KMI0		(IRQ_PB1176_GIC_START + 3)	/* Keyboard/Mouse port 0 */
+#define IRQ_PB1176_KMI1		(IRQ_PB1176_GIC_START + 4)	/* Keyboard/Mouse port 1 */
+#define IRQ_PB1176_SCI		(IRQ_PB1176_GIC_START + 5)
+#define IRQ_PB1176_UART4	(IRQ_PB1176_GIC_START + 6)	/* UART 4 on baseboard */
+#define IRQ_PB1176_CHARLCD	(IRQ_PB1176_GIC_START + 7)	/* Character LCD */
+#define IRQ_PB1176_GPIO1	(IRQ_PB1176_GIC_START + 8)
+#define IRQ_PB1176_GPIO2	(IRQ_PB1176_GIC_START + 9)
+#define IRQ_PB1176_ETH		(IRQ_PB1176_GIC_START + 10)	/* Ethernet controller */
+#define IRQ_PB1176_USB		(IRQ_PB1176_GIC_START + 11)	/* USB controller */
+
+#define IRQ_PB1176_PISMO	(IRQ_PB1176_GIC_START + 16)
+
+#define IRQ_PB1176_AACI		(IRQ_PB1176_GIC_START + 19)	/* Audio Codec */
+
+#define IRQ_PB1176_TIMER0_1	(IRQ_PB1176_GIC_START + 22)
+#define IRQ_PB1176_TIMER2_3	(IRQ_PB1176_GIC_START + 23)
+#define IRQ_PB1176_DMAC		(IRQ_PB1176_GIC_START + 24)	/* DMA controller */
+#define IRQ_PB1176_RTC		(IRQ_PB1176_GIC_START + 25)	/* Real Time Clock */
+
+#define IRQ_PB1176_GPIO0	-1
+#define IRQ_PB1176_SSP		-1
+#define IRQ_PB1176_SCTL		-1
+
+#define NR_GIC_PB1176		2
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_PB1176
+ */
+#define NR_IRQS_PB1176		(IRQ_DC1176_GIC_START + 96)
+
+#if defined(CONFIG_MACH_REALVIEW_PB1176)
+
+#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PB1176)
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_PB1176
+#endif
+
+#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PB1176)
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_PB1176
+#endif
+
+#endif	/* CONFIG_MACH_REALVIEW_PB1176 */
+
+#endif	/* __MACH_IRQS_PB1176_H */
diff --git a/arch/arm/mach-realview/include/mach/irqs-pb11mp.h b/arch/arm/mach-realview/include/mach/irqs-pb11mp.h
new file mode 100644
index 0000000..34e255a
--- /dev/null
+++ b/arch/arm/mach-realview/include/mach/irqs-pb11mp.h
@@ -0,0 +1,122 @@
+/*
+ * arch/arm/mach-realview/include/mach/irqs-pb11mp.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __MACH_IRQS_PB11MP_H
+#define __MACH_IRQS_PB11MP_H
+
+#define IRQ_TC11MP_GIC_START			32
+#define IRQ_PB11MP_GIC_START			64
+
+/*
+ * ARM11MPCore test chip interrupt sources (primary GIC on the test chip)
+ */
+#define IRQ_TC11MP_AACI		(IRQ_TC11MP_GIC_START + 0)
+#define IRQ_TC11MP_TIMER0_1	(IRQ_TC11MP_GIC_START + 1)
+#define IRQ_TC11MP_TIMER2_3	(IRQ_TC11MP_GIC_START + 2)
+#define IRQ_TC11MP_USB		(IRQ_TC11MP_GIC_START + 3)
+#define IRQ_TC11MP_UART0	(IRQ_TC11MP_GIC_START + 4)
+#define IRQ_TC11MP_UART1	(IRQ_TC11MP_GIC_START + 5)
+#define IRQ_TC11MP_RTC		(IRQ_TC11MP_GIC_START + 6)
+#define IRQ_TC11MP_KMI0		(IRQ_TC11MP_GIC_START + 7)
+#define IRQ_TC11MP_KMI1		(IRQ_TC11MP_GIC_START + 8)
+#define IRQ_TC11MP_ETH		(IRQ_TC11MP_GIC_START + 9)
+#define IRQ_TC11MP_PB_IRQ1	(IRQ_TC11MP_GIC_START + 10)		/* main GIC */
+#define IRQ_TC11MP_PB_IRQ2	(IRQ_TC11MP_GIC_START + 11)		/* tile GIC */
+#define IRQ_TC11MP_PB_FIQ1	(IRQ_TC11MP_GIC_START + 12)		/* main GIC */
+#define IRQ_TC11MP_PB_FIQ2	(IRQ_TC11MP_GIC_START + 13)		/* tile GIC */
+#define IRQ_TC11MP_MMCI0A	(IRQ_TC11MP_GIC_START + 14)
+#define IRQ_TC11MP_MMCI0B	(IRQ_TC11MP_GIC_START + 15)
+
+#define IRQ_TC11MP_PMU_CPU0	(IRQ_TC11MP_GIC_START + 17)
+#define IRQ_TC11MP_PMU_CPU1	(IRQ_TC11MP_GIC_START + 18)
+#define IRQ_TC11MP_PMU_CPU2	(IRQ_TC11MP_GIC_START + 19)
+#define IRQ_TC11MP_PMU_CPU3	(IRQ_TC11MP_GIC_START + 20)
+#define IRQ_TC11MP_PMU_SCU0	(IRQ_TC11MP_GIC_START + 21)
+#define IRQ_TC11MP_PMU_SCU1	(IRQ_TC11MP_GIC_START + 22)
+#define IRQ_TC11MP_PMU_SCU2	(IRQ_TC11MP_GIC_START + 23)
+#define IRQ_TC11MP_PMU_SCU3	(IRQ_TC11MP_GIC_START + 24)
+#define IRQ_TC11MP_PMU_SCU4	(IRQ_TC11MP_GIC_START + 25)
+#define IRQ_TC11MP_PMU_SCU5	(IRQ_TC11MP_GIC_START + 26)
+#define IRQ_TC11MP_PMU_SCU6	(IRQ_TC11MP_GIC_START + 27)
+#define IRQ_TC11MP_PMU_SCU7	(IRQ_TC11MP_GIC_START + 28)
+
+#define IRQ_TC11MP_L220_EVENT	(IRQ_TC11MP_GIC_START + 29)
+#define IRQ_TC11MP_L220_SLAVE	(IRQ_TC11MP_GIC_START + 30)
+#define IRQ_TC11MP_L220_DECODE	(IRQ_TC11MP_GIC_START + 31)
+
+/*
+ * RealView PB11MPCore GIC interrupt sources (secondary GIC on the board)
+ */
+#define IRQ_PB11MP_WATCHDOG	(IRQ_PB11MP_GIC_START + 0)	/* Watchdog timer */
+#define IRQ_PB11MP_SOFT		(IRQ_PB11MP_GIC_START + 1)	/* Software interrupt */
+#define IRQ_PB11MP_COMMRx	(IRQ_PB11MP_GIC_START + 2)	/* Debug Comm Rx interrupt */
+#define IRQ_PB11MP_COMMTx	(IRQ_PB11MP_GIC_START + 3)	/* Debug Comm Tx interrupt */
+#define IRQ_PB11MP_GPIO0	(IRQ_PB11MP_GIC_START + 6)	/* GPIO 0 */
+#define IRQ_PB11MP_GPIO1	(IRQ_PB11MP_GIC_START + 7)	/* GPIO 1 */
+#define IRQ_PB11MP_GPIO2	(IRQ_PB11MP_GIC_START + 8)	/* GPIO 2 */
+								/* 9 reserved */
+#define IRQ_PB11MP_RTC_GIC1	(IRQ_PB11MP_GIC_START + 10)	/* Real Time Clock */
+#define IRQ_PB11MP_SSP		(IRQ_PB11MP_GIC_START + 11)	/* Synchronous Serial Port */
+#define IRQ_PB11MP_UART0_GIC1	(IRQ_PB11MP_GIC_START + 12)	/* UART 0 on development chip */
+#define IRQ_PB11MP_UART1_GIC1	(IRQ_PB11MP_GIC_START + 13)	/* UART 1 on development chip */
+#define IRQ_PB11MP_UART2	(IRQ_PB11MP_GIC_START + 14)	/* UART 2 on development chip */
+#define IRQ_PB11MP_UART3	(IRQ_PB11MP_GIC_START + 15)	/* UART 3 on development chip */
+#define IRQ_PB11MP_SCI		(IRQ_PB11MP_GIC_START + 16)	/* Smart Card Interface */
+#define IRQ_PB11MP_MMCI0A_GIC1	(IRQ_PB11MP_GIC_START + 17)	/* Multimedia Card 0A */
+#define IRQ_PB11MP_MMCI0B_GIC1	(IRQ_PB11MP_GIC_START + 18)	/* Multimedia Card 0B */
+#define IRQ_PB11MP_AACI_GIC1	(IRQ_PB11MP_GIC_START + 19)	/* Audio Codec */
+#define IRQ_PB11MP_KMI0_GIC1	(IRQ_PB11MP_GIC_START + 20)	/* Keyboard/Mouse port 0 */
+#define IRQ_PB11MP_KMI1_GIC1	(IRQ_PB11MP_GIC_START + 21)	/* Keyboard/Mouse port 1 */
+#define IRQ_PB11MP_CHARLCD	(IRQ_PB11MP_GIC_START + 22)	/* Character LCD */
+#define IRQ_PB11MP_CLCD		(IRQ_PB11MP_GIC_START + 23)	/* CLCD controller */
+#define IRQ_PB11MP_DMAC		(IRQ_PB11MP_GIC_START + 24)	/* DMA controller */
+#define IRQ_PB11MP_PWRFAIL	(IRQ_PB11MP_GIC_START + 25)	/* Power failure */
+#define IRQ_PB11MP_PISMO	(IRQ_PB11MP_GIC_START + 26)	/* PISMO interface */
+#define IRQ_PB11MP_DoC		(IRQ_PB11MP_GIC_START + 27)	/* Disk on Chip memory controller */
+#define IRQ_PB11MP_ETH_GIC1	(IRQ_PB11MP_GIC_START + 28)	/* Ethernet controller */
+#define IRQ_PB11MP_USB_GIC1	(IRQ_PB11MP_GIC_START + 29)	/* USB controller */
+#define IRQ_PB11MP_TSPEN	(IRQ_PB11MP_GIC_START + 30)	/* Touchscreen pen */
+#define IRQ_PB11MP_TSKPAD	(IRQ_PB11MP_GIC_START + 31)	/* Touchscreen keypad */
+
+#define IRQ_PB11MP_SMC		-1
+#define IRQ_PB11MP_SCTL		-1
+
+#define NR_GIC_PB11MP		2
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_PB11MP
+ */
+#define NR_IRQS_PB11MP		(IRQ_TC11MP_GIC_START + 96)
+
+#if defined(CONFIG_MACH_REALVIEW_PB11MP)
+
+#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PB11MP)
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_PB11MP
+#endif
+
+#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PB11MP)
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_PB11MP
+#endif
+
+#endif	/* CONFIG_MACH_REALVIEW_PB11MP */
+
+#endif	/* __MACH_IRQS_PB11MP_H */
diff --git a/arch/arm/mach-realview/include/mach/irqs-pba8.h b/arch/arm/mach-realview/include/mach/irqs-pba8.h
new file mode 100644
index 0000000..86792a9
--- /dev/null
+++ b/arch/arm/mach-realview/include/mach/irqs-pba8.h
@@ -0,0 +1,98 @@
+/*
+ * arch/arm/mach-realview/include/mach/irqs-pba8.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __MACH_IRQS_PBA8_H
+#define __MACH_IRQS_PBA8_H
+
+#define IRQ_PBA8_GIC_START			32
+
+/* L220
+#define IRQ_PBA8_L220_EVENT	(IRQ_PBA8_GIC_START + 29)
+#define IRQ_PBA8_L220_SLAVE	(IRQ_PBA8_GIC_START + 30)
+#define IRQ_PBA8_L220_DECODE	(IRQ_PBA8_GIC_START + 31)
+*/
+
+/*
+ * PB-A8 on-board gic irq sources
+ */
+#define IRQ_PBA8_WATCHDOG	(IRQ_PBA8_GIC_START + 0)	/* Watchdog timer */
+#define IRQ_PBA8_SOFT		(IRQ_PBA8_GIC_START + 1)	/* Software interrupt */
+#define IRQ_PBA8_COMMRx		(IRQ_PBA8_GIC_START + 2)	/* Debug Comm Rx interrupt */
+#define IRQ_PBA8_COMMTx		(IRQ_PBA8_GIC_START + 3)	/* Debug Comm Tx interrupt */
+#define IRQ_PBA8_TIMER0_1	(IRQ_PBA8_GIC_START + 4)	/* Timer 0/1 (default timer) */
+#define IRQ_PBA8_TIMER2_3	(IRQ_PBA8_GIC_START + 5)	/* Timer 2/3 */
+#define IRQ_PBA8_GPIO0		(IRQ_PBA8_GIC_START + 6)	/* GPIO 0 */
+#define IRQ_PBA8_GPIO1		(IRQ_PBA8_GIC_START + 7)	/* GPIO 1 */
+#define IRQ_PBA8_GPIO2		(IRQ_PBA8_GIC_START + 8)	/* GPIO 2 */
+								/* 9 reserved */
+#define IRQ_PBA8_RTC		(IRQ_PBA8_GIC_START + 10)	/* Real Time Clock */
+#define IRQ_PBA8_SSP		(IRQ_PBA8_GIC_START + 11)	/* Synchronous Serial Port */
+#define IRQ_PBA8_UART0		(IRQ_PBA8_GIC_START + 12)	/* UART 0 on development chip */
+#define IRQ_PBA8_UART1		(IRQ_PBA8_GIC_START + 13)	/* UART 1 on development chip */
+#define IRQ_PBA8_UART2		(IRQ_PBA8_GIC_START + 14)	/* UART 2 on development chip */
+#define IRQ_PBA8_UART3		(IRQ_PBA8_GIC_START + 15)	/* UART 3 on development chip */
+#define IRQ_PBA8_SCI		(IRQ_PBA8_GIC_START + 16)	/* Smart Card Interface */
+#define IRQ_PBA8_MMCI0A		(IRQ_PBA8_GIC_START + 17)	/* Multimedia Card 0A */
+#define IRQ_PBA8_MMCI0B		(IRQ_PBA8_GIC_START + 18)	/* Multimedia Card 0B */
+#define IRQ_PBA8_AACI		(IRQ_PBA8_GIC_START + 19)	/* Audio Codec */
+#define IRQ_PBA8_KMI0		(IRQ_PBA8_GIC_START + 20)	/* Keyboard/Mouse port 0 */
+#define IRQ_PBA8_KMI1		(IRQ_PBA8_GIC_START + 21)	/* Keyboard/Mouse port 1 */
+#define IRQ_PBA8_CHARLCD	(IRQ_PBA8_GIC_START + 22)	/* Character LCD */
+#define IRQ_PBA8_CLCD		(IRQ_PBA8_GIC_START + 23)	/* CLCD controller */
+#define IRQ_PBA8_DMAC		(IRQ_PBA8_GIC_START + 24)	/* DMA controller */
+#define IRQ_PBA8_PWRFAIL	(IRQ_PBA8_GIC_START + 25)	/* Power failure */
+#define IRQ_PBA8_PISMO		(IRQ_PBA8_GIC_START + 26)	/* PISMO interface */
+#define IRQ_PBA8_DoC		(IRQ_PBA8_GIC_START + 27)	/* Disk on Chip memory controller */
+#define IRQ_PBA8_ETH		(IRQ_PBA8_GIC_START + 28)	/* Ethernet controller */
+#define IRQ_PBA8_USB		(IRQ_PBA8_GIC_START + 29)	/* USB controller */
+#define IRQ_PBA8_TSPEN		(IRQ_PBA8_GIC_START + 30)	/* Touchscreen pen */
+#define IRQ_PBA8_TSKPAD		(IRQ_PBA8_GIC_START + 31)	/* Touchscreen keypad */
+
+/* ... */
+#define IRQ_PBA8_PCI0		(IRQ_PBA8_GIC_START + 50)
+#define IRQ_PBA8_PCI1		(IRQ_PBA8_GIC_START + 51)
+#define IRQ_PBA8_PCI2		(IRQ_PBA8_GIC_START + 52)
+#define IRQ_PBA8_PCI3		(IRQ_PBA8_GIC_START + 53)
+
+#define IRQ_PBA8_SMC		-1
+#define IRQ_PBA8_SCTL		-1
+
+#define NR_GIC_PBA8		1
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_PBA8
+ */
+#define NR_IRQS_PBA8		(IRQ_PBA8_GIC_START + 64)
+
+#if defined(CONFIG_MACH_REALVIEW_PBA8)
+
+#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PBA8)
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_PBA8
+#endif
+
+#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PBA8)
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_PBA8
+#endif
+
+#endif	/* CONFIG_MACH_REALVIEW_PBA8 */
+
+#endif	/* __MACH_IRQS_PBA8_H */
diff --git a/arch/arm/mach-realview/include/mach/irqs-pbx.h b/arch/arm/mach-realview/include/mach/irqs-pbx.h
new file mode 100644
index 0000000..deaad43
--- /dev/null
+++ b/arch/arm/mach-realview/include/mach/irqs-pbx.h
@@ -0,0 +1,115 @@
+/*
+ * arch/arm/mach-realview/include/mach/irqs-pbx.h
+ *
+ * Copyright (C) 2009 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __MACH_IRQS_PBX_H
+#define __MACH_IRQS_PBX_H
+
+#define IRQ_PBX_GIC_START			32
+
+/* L220
+#define IRQ_PBX_L220_EVENT	(IRQ_PBX_GIC_START + 29)
+#define IRQ_PBX_L220_SLAVE	(IRQ_PBX_GIC_START + 30)
+#define IRQ_PBX_L220_DECODE	(IRQ_PBX_GIC_START + 31)
+*/
+
+/*
+ * PBX on-board gic irq sources
+ */
+#define IRQ_PBX_WATCHDOG	(IRQ_PBX_GIC_START + 0)	/* Watchdog timer */
+#define IRQ_PBX_SOFT		(IRQ_PBX_GIC_START + 1)	/* Software interrupt */
+#define IRQ_PBX_COMMRx		(IRQ_PBX_GIC_START + 2)	/* Debug Comm Rx interrupt */
+#define IRQ_PBX_COMMTx		(IRQ_PBX_GIC_START + 3)	/* Debug Comm Tx interrupt */
+#define IRQ_PBX_TIMER0_1	(IRQ_PBX_GIC_START + 4)	/* Timer 0/1 (default timer) */
+#define IRQ_PBX_TIMER2_3	(IRQ_PBX_GIC_START + 5)	/* Timer 2/3 */
+#define IRQ_PBX_GPIO0		(IRQ_PBX_GIC_START + 6)	/* GPIO 0 */
+#define IRQ_PBX_GPIO1		(IRQ_PBX_GIC_START + 7)	/* GPIO 1 */
+#define IRQ_PBX_GPIO2		(IRQ_PBX_GIC_START + 8)	/* GPIO 2 */
+								/* 9 reserved */
+#define IRQ_PBX_RTC		(IRQ_PBX_GIC_START + 10)	/* Real Time Clock */
+#define IRQ_PBX_SSP		(IRQ_PBX_GIC_START + 11)	/* Synchronous Serial Port */
+#define IRQ_PBX_UART0		(IRQ_PBX_GIC_START + 12)	/* UART 0 on development chip */
+#define IRQ_PBX_UART1		(IRQ_PBX_GIC_START + 13)	/* UART 1 on development chip */
+#define IRQ_PBX_UART2		(IRQ_PBX_GIC_START + 14)	/* UART 2 on development chip */
+#define IRQ_PBX_UART3		(IRQ_PBX_GIC_START + 15)	/* UART 3 on development chip */
+#define IRQ_PBX_SCI		(IRQ_PBX_GIC_START + 16)	/* Smart Card Interface */
+#define IRQ_PBX_MMCI0A		(IRQ_PBX_GIC_START + 17)	/* Multimedia Card 0A */
+#define IRQ_PBX_MMCI0B		(IRQ_PBX_GIC_START + 18)	/* Multimedia Card 0B */
+#define IRQ_PBX_AACI		(IRQ_PBX_GIC_START + 19)	/* Audio Codec */
+#define IRQ_PBX_KMI0		(IRQ_PBX_GIC_START + 20)	/* Keyboard/Mouse port 0 */
+#define IRQ_PBX_KMI1		(IRQ_PBX_GIC_START + 21)	/* Keyboard/Mouse port 1 */
+#define IRQ_PBX_CHARLCD		(IRQ_PBX_GIC_START + 22)	/* Character LCD */
+#define IRQ_PBX_CLCD		(IRQ_PBX_GIC_START + 23)	/* CLCD controller */
+#define IRQ_PBX_DMAC		(IRQ_PBX_GIC_START + 24)	/* DMA controller */
+#define IRQ_PBX_PWRFAIL		(IRQ_PBX_GIC_START + 25)	/* Power failure */
+#define IRQ_PBX_PISMO		(IRQ_PBX_GIC_START + 26)	/* PISMO interface */
+#define IRQ_PBX_DoC		(IRQ_PBX_GIC_START + 27)	/* Disk on Chip memory controller */
+#define IRQ_PBX_ETH		(IRQ_PBX_GIC_START + 28)	/* Ethernet controller */
+#define IRQ_PBX_USB		(IRQ_PBX_GIC_START + 29)	/* USB controller */
+#define IRQ_PBX_TSPEN		(IRQ_PBX_GIC_START + 30)	/* Touchscreen pen */
+#define IRQ_PBX_TSKPAD		(IRQ_PBX_GIC_START + 31)	/* Touchscreen keypad */
+
+#define IRQ_PBX_PMU_SCU0        (IRQ_PBX_GIC_START + 32)        /* SCU PMU Interrupts (11mp) */
+#define IRQ_PBX_PMU_SCU1        (IRQ_PBX_GIC_START + 33)
+#define IRQ_PBX_PMU_SCU2        (IRQ_PBX_GIC_START + 34)
+#define IRQ_PBX_PMU_SCU3        (IRQ_PBX_GIC_START + 35)
+#define IRQ_PBX_PMU_SCU4        (IRQ_PBX_GIC_START + 36)
+#define IRQ_PBX_PMU_SCU5        (IRQ_PBX_GIC_START + 37)
+#define IRQ_PBX_PMU_SCU6        (IRQ_PBX_GIC_START + 38)
+#define IRQ_PBX_PMU_SCU7        (IRQ_PBX_GIC_START + 39)
+
+#define IRQ_PBX_WATCHDOG1       (IRQ_PBX_GIC_START + 40)        /* Watchdog1 timer */
+#define IRQ_PBX_TIMER4_5        (IRQ_PBX_GIC_START + 41)        /* Timer 0/1 (default timer) */
+#define IRQ_PBX_TIMER6_7        (IRQ_PBX_GIC_START + 42)        /* Timer 2/3 */
+/* ... */
+#define IRQ_PBX_PMU_CPU3        (IRQ_PBX_GIC_START + 44)        /* CPU PMU Interrupts */
+#define IRQ_PBX_PMU_CPU2        (IRQ_PBX_GIC_START + 45)
+#define IRQ_PBX_PMU_CPU1        (IRQ_PBX_GIC_START + 46)
+#define IRQ_PBX_PMU_CPU0        (IRQ_PBX_GIC_START + 47)
+
+/* ... */
+#define IRQ_PBX_PCI0		(IRQ_PBX_GIC_START + 50)
+#define IRQ_PBX_PCI1		(IRQ_PBX_GIC_START + 51)
+#define IRQ_PBX_PCI2		(IRQ_PBX_GIC_START + 52)
+#define IRQ_PBX_PCI3		(IRQ_PBX_GIC_START + 53)
+
+#define IRQ_PBX_SMC		-1
+#define IRQ_PBX_SCTL		-1
+
+#define NR_GIC_PBX		1
+
+/*
+ * Only define NR_IRQS if less than NR_IRQS_PBX
+ */
+#define NR_IRQS_PBX		(IRQ_PBX_GIC_START + 96)
+
+#if defined(CONFIG_MACH_REALVIEW_PBX)
+
+#if !defined(NR_IRQS) || (NR_IRQS < NR_IRQS_PBX)
+#undef NR_IRQS
+#define NR_IRQS			NR_IRQS_PBX
+#endif
+
+#if !defined(MAX_GIC_NR) || (MAX_GIC_NR < NR_GIC_PBX)
+#undef MAX_GIC_NR
+#define MAX_GIC_NR		NR_GIC_PBX
+#endif
+
+#endif	/* CONFIG_MACH_REALVIEW_PBX */
+
+#endif	/* __MACH_IRQS_PBX_H */
diff --git a/arch/arm/mach-realview/include/mach/irqs.h b/arch/arm/mach-realview/include/mach/irqs.h
index e9e3826..78854f2 100644
--- a/arch/arm/mach-realview/include/mach/irqs.h
+++ b/arch/arm/mach-realview/include/mach/irqs.h
@@ -22,11 +22,11 @@
 #ifndef __ASM_ARCH_IRQS_H
 #define __ASM_ARCH_IRQS_H
 
-#include <mach/board-eb.h>
-#include <mach/board-pb11mp.h>
-#include <mach/board-pb1176.h>
-#include <mach/board-pba8.h>
-#include <mach/board-pbx.h>
+#include <mach/irqs-eb.h>
+#include <mach/irqs-pb11mp.h>
+#include <mach/irqs-pb1176.h>
+#include <mach/irqs-pba8.h>
+#include <mach/irqs-pbx.h>
 
 #define IRQ_LOCALTIMER		29
 #define IRQ_LOCALWDOG		30
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 853d42b..4ce0f98 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -41,6 +41,7 @@
 #include <asm/irq.h>
 #include <asm/mach/irq.h>
 #include <mach/hardware.h>
+#include <mach/board-eb.h>
 #include <asm/system.h>
 
 #include "op_counter.h"
-- 
cgit v0.10.2


From 26584853a44c58f3d6ac7360d697a2ddcd1a3efa Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:18 +0100
Subject: Add core support for ARMv6/v7 big-endian

Starting with ARMv6, the CPUs support the BE-8 variant of big-endian
(byte-invariant). This patch adds the core support:

- setting of the BE-8 mode via the CPSR.E register for both kernel and
  user threads
- big-endian page table walking
- REV used to rotate instructions read from memory during fault
  processing as they are still little-endian format
- Kconfig and Makefile support for BE-8. The --be8 option must be passed
  to the final linking stage to convert the instructions to
  little-endian

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e84729b..1167a4e 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -11,6 +11,9 @@
 # Copyright (C) 1995-2001 by Russell King
 
 LDFLAGS_vmlinux	:=-p --no-undefined -X
+ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
+LDFLAGS_vmlinux	+= --be8
+endif
 CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS		:=-9
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index fbe5eef..ce39dc5 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -40,7 +40,7 @@ ifeq ($(CONFIG_PXA_SHARPSL),y)
 OBJS		+= head-sharpsl.o
 endif
 
-ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
+ifeq ($(CONFIG_CPU_ENDIAN_BE32),y)
 ifeq ($(CONFIG_CPU_CP15),y)
 OBJS		+= big-endian.o
 else
@@ -78,6 +78,9 @@ EXTRA_AFLAGS  := -Wa,-march=all
 # linker symbols.  We only define initrd_phys and params_phys if the
 # machine class defined the corresponding makefile variable.
 LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
+ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
+LDFLAGS_vmlinux += --be8
+endif
 ifneq ($(INITRD_PHYS),)
 LDFLAGS_vmlinux += --defsym initrd_phys=$(INITRD_PHYS)
 endif
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index b371fba..01d49be 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -438,6 +438,9 @@ __armv4_mmu_cache_on:
 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 		orr	r0, r0, #0x0030
+#ifdef CONFIG_CPU_ENDIAN_BE8
+		orr	r0, r0, #1 << 25	@ big-endian page tables
+#endif
 		bl	__common_mmu_cache_on
 		mov	r0, #0
 		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
@@ -455,6 +458,9 @@ __armv7_mmu_cache_on:
 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 		orr	r0, r0, #0x003c		@ write buffer
+#ifdef CONFIG_CPU_ENDIAN_BE8
+		orr	r0, r0, #1 << 25	@ big-endian page tables
+#endif
 		orrne	r0, r0, #1		@ MMU enabled
 		movne	r1, #-1
 		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 1845892..6a89567 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -71,6 +71,7 @@ struct thread_struct {
 		regs->ARM_cpsr = USR26_MODE;				\
 	if (elf_hwcap & HWCAP_THUMB && pc & 1)				\
 		regs->ARM_cpsr |= PSR_T_BIT;				\
+	regs->ARM_cpsr |= PSR_ENDSTATE;					\
 	regs->ARM_pc = pc & ~1;		/* pc */			\
 	regs->ARM_sp = sp;		/* sp */			\
 	regs->ARM_r2 = stack[2];	/* r2 (envp) */			\
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 4a4290f..67b833c 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -50,6 +50,7 @@
 #define PSR_F_BIT	0x00000040
 #define PSR_I_BIT	0x00000080
 #define PSR_A_BIT	0x00000100
+#define PSR_E_BIT	0x00000200
 #define PSR_J_BIT	0x01000000
 #define PSR_Q_BIT	0x08000000
 #define PSR_V_BIT	0x10000000
@@ -72,6 +73,15 @@
 #define PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
 #define PSR_ENDIAN_MASK	0x00000200	/* Endianness state mask */
 
+/*
+ * Default endianness state
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define PSR_ENDSTATE	PSR_E_BIT
+#else
+#define PSR_ENDSTATE	0
+#endif
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index d662a2f..9d7ce43 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -482,6 +482,9 @@ __und_usr:
 	subeq	r4, r2, #4			@ ARM instr at LR - 4
 	subne	r4, r2, #2			@ Thumb instr at LR - 2
 1:	ldreqt	r0, [r4]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	reveq	r0, r0				@ little endian instruction
+#endif
 	beq	call_fpe
 	@ Thumb instruction
 #if __LINUX_ARM_ARCH__ >= 7
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index b55cb03..366e509 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -210,6 +210,9 @@ ENTRY(vector_swi)
   A710(	teq	ip, #0x0f000000						)
   A710(	bne	.Larm710bug						)
 #endif
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	rev	r10, r10			@ little endian instruction
+#endif
 
 #elif defined(CONFIG_AEABI)
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index c3265a2..1585423 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -365,7 +365,7 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	regs.ARM_r2 = (unsigned long)fn;
 	regs.ARM_r3 = (unsigned long)do_exit;
 	regs.ARM_pc = (unsigned long)kernel_thread_helper;
-	regs.ARM_cpsr = SVC_MODE;
+	regs.ARM_cpsr = SVC_MODE | PSR_ENDSTATE;
 
 	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
 }
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 76bbcde..a25f34b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -639,6 +639,20 @@ config CPU_BIG_ENDIAN
 	  port must properly enable any big-endian related features
 	  of your chipset/board/processor.
 
+config CPU_ENDIAN_BE8
+	bool
+	depends on CPU_BIG_ENDIAN
+	default CPU_V6 || CPU_V7
+	help
+	  Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors.
+
+config CPU_ENDIAN_BE32
+	bool
+	depends on CPU_BIG_ENDIAN
+	default !CPU_ENDIAN_BE8
+	help
+	  Support for the BE-32 (big-endian) mode on pre-ARMv6 processors.
+
 config CPU_HIGH_VECTOR
 	depends on !MMU && CPU_CP15 && !CPU_ARM740T
 	bool "Select the High exception vector"
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 6f7e709..f332df7 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -37,6 +37,9 @@ ENTRY(v6_early_abort)
 	movne	pc, lr
 	do_thumb_abort
 	ldreq	r3, [r2]			@ read aborted ARM instruction
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	reveq	r3, r3
+#endif
 	do_ldrd_abort
 	tst	r3, #1 << 20			@ L = 0 -> write
 	orreq	r1, r1, #1 << 11		@ yes.
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 087e239..524ddae 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -170,6 +170,9 @@ __v6_setup:
 #endif /* CONFIG_MMU */
 	adr	r5, v6_crval
 	ldmia	r5, {r5, r6}
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	orr	r6, r6, #1 << 25		@ big-endian page tables
+#endif
 	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 0a8ffd3..4f84864 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -253,6 +253,9 @@ __v7_setup:
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 	adr	r5, v7_crval
 	ldmia	r5, {r5, r6}
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	orr	r6, r6, #1 << 25		@ big-endian page tables
+#endif
    	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
-- 
cgit v0.10.2


From 85d6943af50537d3aec58b967ffbd3fec88453e9 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sat, 30 May 2009 14:00:18 +0100
Subject: Fix the VFP handling on the Feroceon CPU

This CPU generates synchronous VFP exceptions in a non-standard way -
the FPEXC.EX bit set but without the FPSCR.IXE bit being set like in the
VFP subarchitecture 1 or just the FPEXC.DEX bit like in VFP
subarchitecture 2. The main problem is that the faulty instruction
(which needs to be emulated in software) will be restarted several times
(normally until a context switch disables the VFP). This patch ensures
that the VFP exception is treated as synchronous.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Nicolas Pitre <nico@cam.org>

diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 83c4e38..1aeae38 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -100,6 +100,7 @@ ENTRY(vfp_support_entry)
 	beq	no_old_VFP_process
 	VFPFSTMIA r4, r5		@ save the working registers
 	VFPFMRX	r5, FPSCR		@ current status
+#ifndef CONFIG_CPU_FEROCEON
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
 	beq	1f
 	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
@@ -107,6 +108,7 @@ ENTRY(vfp_support_entry)
 	beq	1f
 	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
 1:
+#endif
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
 					@ start of the register dump
@@ -119,6 +121,7 @@ no_old_VFP_process:
 	VFPFLDMIA r10, r5		@ reload the working registers while
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
+#ifndef CONFIG_CPU_FEROCEON
 	tst	r1, #FPEXC_EX		@ is there additional state to restore?
 	beq	1f
 	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
@@ -126,6 +129,7 @@ no_old_VFP_process:
 	beq	1f
 	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
 1:
+#endif
 	VFPFMXR	FPSCR, r5		@ restore status
 
 check_for_exception:
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 01599c4..2d7423a 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -253,12 +253,14 @@ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	}
 
 	if (fpexc & FPEXC_EX) {
+#ifndef CONFIG_CPU_FEROCEON
 		/*
 		 * Asynchronous exception. The instruction is read from FPINST
 		 * and the interrupted instruction has to be restarted.
 		 */
 		trigger = fmrx(FPINST);
 		regs->ARM_pc -= 4;
+#endif
 	} else if (!(fpexc & FPEXC_DEX)) {
 		/*
 		 * Illegal combination of bits. It can be caused by an
-- 
cgit v0.10.2


From 62013ab5d5df297a01ae5863b5c26d758ec0af7f Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sat, 30 May 2009 21:50:58 +0900
Subject: nilfs2: fix bh leak in nilfs_cpfile_delete_checkpoints function

The nilfs_cpfile_delete_checkpoints() wrongly skips brelse() for the
header block of checkpoint file in case of errors.  This fixes the
leak bug.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>

diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index e90b60d..300f1cd 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 		ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
 		if (ret < 0) {
 			if (ret != -ENOENT)
-				goto out_sem;
+				goto out_header;
 			/* skip hole */
 			ret = 0;
 			continue;
@@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 					continue;
 				printk(KERN_ERR "%s: cannot delete block\n",
 				       __func__);
-				goto out_sem;
+				goto out_header;
 			}
 		}
 
@@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 		nilfs_mdt_mark_dirty(cpfile);
 		kunmap_atomic(kaddr, KM_USER0);
 	}
+
+ out_header:
 	brelse(header_bh);
 
  out_sem:
-- 
cgit v0.10.2


From fee3c55d7fb9486f02d3285678d58e433ffe3c2a Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Fri, 29 May 2009 14:15:33 +0200
Subject: microblaze: Fix problem with early_printk in startup

Problem arise when is incopatibility between kernel/dts/pvr
and kernel tries to announce it. Early printk device
(uartlite in our case) was in TLB 2 and when kernel
extract DTB it necessary to allocate at least one
TLB at the end of memory. First free TLB was number two
where was early printk. But checking mechanism (kernel/dts/pvr)
was after extrahing but TLB 2 was different. This caused
that kernel hung up.
Moving early printk device to TLB 63 solve it and we don't
protect it which means that we can use early_printk messages
only for initial parts of kernel then we rewrite TLB 63.

Reported-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/kernel/misc.S b/arch/microblaze/kernel/misc.S
index d623efc..df16c62 100644
--- a/arch/microblaze/kernel/misc.S
+++ b/arch/microblaze/kernel/misc.S
@@ -69,7 +69,7 @@ early_console_reg_tlb_alloc:
 	 * Load a TLB entry for the UART, so that microblaze_progress() can use
 	 * the UARTs nice and early.  We use a 4k real==virtual mapping.
 	 */
-	ori	r4, r0, 2
+	ori	r4, r0, 63
 	mts	rtlbx, r4 /* TLB slot 2 */
 
 	or	r4,r5,r0
-- 
cgit v0.10.2


From c339dfdd65b52bfd947ab29d1210314a2f6d622d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sat, 30 May 2009 20:06:54 +0200
Subject: ide_pci_generic: add quirk for Netcell ATA RAID

We need to explicitly mark words 85-87 as valid ones since
firmware doesn't do it.

This should fix support for LBA48 and FLUSH CACHE [EXT] command
which stopped working after we applied more strict checking of
identify words in:

	commit 942dcd85bf8edf38cdc3745306ca250684d99a61
	("ide: idedisk_supports_lba48() -> ata_id_lba48_enabled()")

and

	commit 4b58f17d7c45a8e5f4acda641bec388398b9c0fa
	("ide: ide_id_has_flush_cache() -> ata_id_flush_enabled()")

Reported-and-tested-by: "Trevor Hemsley" <trevor.hemsley@ntlworld.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
index 61111fd..39d4e01 100644
--- a/drivers/ide/ide-pci-generic.c
+++ b/drivers/ide/ide-pci-generic.c
@@ -33,6 +33,16 @@ static int ide_generic_all;		/* Set to claim all devices */
 module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
 MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
 
+static void netcell_quirkproc(ide_drive_t *drive)
+{
+	/* mark words 85-87 as valid */
+	drive->id[ATA_ID_CSF_DEFAULT] |= 0x4000;
+}
+
+static const struct ide_port_ops netcell_port_ops = {
+	.quirkproc		= netcell_quirkproc,
+};
+
 #define DECLARE_GENERIC_PCI_DEV(extra_flags) \
 	{ \
 		.name		= DRV_NAME, \
@@ -74,6 +84,7 @@ static const struct ide_port_info generic_chipsets[] __devinitdata = {
 
 	{	/* 6: Revolution */
 		.name		= DRV_NAME,
+		.port_ops	= &netcell_port_ops,
 		.host_flags	= IDE_HFLAG_CLEAR_SIMPLEX |
 				  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
-- 
cgit v0.10.2


From 8a933ece41a59ce077eeffe5b9bf08b14d173c58 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 31 May 2009 09:28:12 +0200
Subject: ALSA: hda - Fix a typo in the previous patch

ICH6_GCTL_RESET was wrongly set to another bit by the commit
b21fadb9c1852c91622ca1dccfeb144bc535e36e.  This caused a problem when
the codec needs really a reset (e.g. recovering from the communication
error at probe).

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index f63bc65..b063d0e 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -148,7 +148,7 @@ MODULE_DESCRIPTION("Intel HDA driver");
 #define ICH6_REG_OUTPAY			0x04
 #define ICH6_REG_INPAY			0x06
 #define ICH6_REG_GCTL			0x08
-#define   ICH6_GCTL_RESET	(1 << 1)   /* controller reset */
+#define   ICH6_GCTL_RESET	(1 << 0)   /* controller reset */
 #define   ICH6_GCTL_FCNTRL	(1 << 1)   /* flush control */
 #define   ICH6_GCTL_UNSOL	(1 << 8)   /* accept unsol. response enable */
 #define ICH6_REG_WAKEEN			0x0c
-- 
cgit v0.10.2


From e0421bbe6479816ea16c6553b8f376c592e36a85 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Sun, 31 May 2009 13:31:14 +0100
Subject: [ARM] 5530/1: Freescale STMP: get rid of HW_zzz macros [1/3]

Replace HW_zzz register access macros by regular __raw_readl/__raw_writel calls

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/include/mach/regs-apbh.h b/arch/arm/mach-stmp378x/include/mach/regs-apbh.h
index db63b04..dbcf85b 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-apbh.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-apbh.h
@@ -1,11 +1,9 @@
 /*
- * STMP APBH Register Definitions
+ * stmp378x: APBH register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
  *
- *
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -20,69 +18,84 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_APBH
+#define _MACH_REGS_APBH
+
+#define REGS_APBH_BASE	(STMP3XXX_REGS_BASE + 0x4000)
+#define REGS_APBH_PHYS	0x80004000
+#define REGS_APBH_SIZE	0x2000
+
+#define HW_APBH_CTRL0		0x0
+#define BM_APBH_CTRL0_RESET_CHANNEL	0x00FF0000
+#define BP_APBH_CTRL0_RESET_CHANNEL	16
+#define BM_APBH_CTRL0_CLKGATE	0x40000000
+#define BM_APBH_CTRL0_SFTRST	0x80000000
+
+#define HW_APBH_CTRL1		0x10
+#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ	0x00000001
+#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ	0
+
+#define HW_APBH_CTRL2		0x20
+
+#define HW_APBH_DEVSEL		0x30
+
+#define HW_APBH_CH0_NXTCMDAR	(0x50 + 0 * 0x70)
+#define HW_APBH_CH1_NXTCMDAR	(0x50 + 1 * 0x70)
+#define HW_APBH_CH2_NXTCMDAR	(0x50 + 2 * 0x70)
+#define HW_APBH_CH3_NXTCMDAR	(0x50 + 3 * 0x70)
+#define HW_APBH_CH4_NXTCMDAR	(0x50 + 4 * 0x70)
+#define HW_APBH_CH5_NXTCMDAR	(0x50 + 5 * 0x70)
+#define HW_APBH_CH6_NXTCMDAR	(0x50 + 6 * 0x70)
+#define HW_APBH_CH7_NXTCMDAR	(0x50 + 7 * 0x70)
+#define HW_APBH_CH8_NXTCMDAR	(0x50 + 8 * 0x70)
+#define HW_APBH_CH9_NXTCMDAR	(0x50 + 9 * 0x70)
+#define HW_APBH_CH10_NXTCMDAR	(0x50 + 10 * 0x70)
+#define HW_APBH_CH11_NXTCMDAR	(0x50 + 11 * 0x70)
+#define HW_APBH_CH12_NXTCMDAR	(0x50 + 12 * 0x70)
+#define HW_APBH_CH13_NXTCMDAR	(0x50 + 13 * 0x70)
+#define HW_APBH_CH14_NXTCMDAR	(0x50 + 14 * 0x70)
+#define HW_APBH_CH15_NXTCMDAR	(0x50 + 15 * 0x70)
+
+#define HW_APBH_CHn_NXTCMDAR	0x50
+
+#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER	 0
+#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE	 1
+#define BV_APBH_CHn_CMD_COMMAND__DMA_READ	 2
+#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE	 3
+#define BM_APBH_CHn_CMD_COMMAND	0x00000003
+#define BP_APBH_CHn_CMD_COMMAND	0
+#define BM_APBH_CHn_CMD_CHAIN	0x00000004
+#define BM_APBH_CHn_CMD_IRQONCMPLT	0x00000008
+#define BM_APBH_CHn_CMD_NANDLOCK	0x00000010
+#define BM_APBH_CHn_CMD_NANDWAIT4READY	0x00000020
+#define BM_APBH_CHn_CMD_SEMAPHORE	0x00000040
+#define BM_APBH_CHn_CMD_WAIT4ENDCMD	0x00000080
+#define BM_APBH_CHn_CMD_CMDWORDS	0x0000F000
+#define BP_APBH_CHn_CMD_CMDWORDS	12
+#define BM_APBH_CHn_CMD_XFER_COUNT	0xFFFF0000
+#define BP_APBH_CHn_CMD_XFER_COUNT	16
 
-#ifndef __ARCH_ARM___APBH_H
-#define __ARCH_ARM___APBH_H  1
+#define HW_APBH_CH0_SEMA	(0x80 + 0 * 0x70)
+#define HW_APBH_CH1_SEMA	(0x80 + 1 * 0x70)
+#define HW_APBH_CH2_SEMA	(0x80 + 2 * 0x70)
+#define HW_APBH_CH3_SEMA	(0x80 + 3 * 0x70)
+#define HW_APBH_CH4_SEMA	(0x80 + 4 * 0x70)
+#define HW_APBH_CH5_SEMA	(0x80 + 5 * 0x70)
+#define HW_APBH_CH6_SEMA	(0x80 + 6 * 0x70)
+#define HW_APBH_CH7_SEMA	(0x80 + 7 * 0x70)
+#define HW_APBH_CH8_SEMA	(0x80 + 8 * 0x70)
+#define HW_APBH_CH9_SEMA	(0x80 + 9 * 0x70)
+#define HW_APBH_CH10_SEMA	(0x80 + 10 * 0x70)
+#define HW_APBH_CH11_SEMA	(0x80 + 11 * 0x70)
+#define HW_APBH_CH12_SEMA	(0x80 + 12 * 0x70)
+#define HW_APBH_CH13_SEMA	(0x80 + 13 * 0x70)
+#define HW_APBH_CH14_SEMA	(0x80 + 14 * 0x70)
+#define HW_APBH_CH15_SEMA	(0x80 + 15 * 0x70)
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_APBH_CHn_SEMA	0x80
+#define BM_APBH_CHn_SEMA_INCREMENT_SEMA	0x000000FF
+#define BP_APBH_CHn_SEMA_INCREMENT_SEMA	0
+#define BM_APBH_CHn_SEMA_PHORE	0x00FF0000
+#define BP_APBH_CHn_SEMA_PHORE	16
 
-#define REGS_APBH_BASE (REGS_BASE + 0x4000)
-#define REGS_APBH_BASE_PHYS (0x80004000)
-#define REGS_APBH_SIZE 0x00002000
-HW_REGISTER(HW_APBH_CTRL0, REGS_APBH_BASE, 0x00000000)
-#define HW_APBH_CTRL0_ADDR (REGS_APBH_BASE + 0x00000000)
-#define BM_APBH_CTRL0_SFTRST 0x80000000
-#define BM_APBH_CTRL0_CLKGATE 0x40000000
-#define BM_APBH_CTRL0_AHB_BURST8_EN 0x20000000
-#define BM_APBH_CTRL0_APB_BURST4_EN 0x10000000
-#define BP_APBH_CTRL0_RESET_CHANNEL      16
-#define BM_APBH_CTRL0_RESET_CHANNEL 0x00FF0000
-#define BF_APBH_CTRL0_RESET_CHANNEL(v)  \
-	(((v) << 16) & BM_APBH_CTRL0_RESET_CHANNEL)
-HW_REGISTER(HW_APBH_CTRL1, REGS_APBH_BASE, 0x00000010)
-#define HW_APBH_CTRL1_ADDR (REGS_APBH_BASE + 0x00000010)
-HW_REGISTER(HW_APBH_CTRL2, REGS_APBH_BASE, 0x00000020)
-HW_REGISTER_0(HW_APBH_DEVSEL, REGS_APBH_BASE, 0x00000030)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_CURCMDAR, REGS_APBH_BASE, 0x00000040, 0x70)
-#define BP_APBH_CHn_CURCMDAR_CMD_ADDR      0
-#define BM_APBH_CHn_CURCMDAR_CMD_ADDR 0xFFFFFFFF
-#define BF_APBH_CHn_CURCMDAR_CMD_ADDR(v)   (v)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_NXTCMDAR, REGS_APBH_BASE, 0x00000050, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_CMD, REGS_APBH_BASE, 0x00000060, 0x70)
-#define BP_APBH_CHn_CMD_XFER_COUNT      16
-#define BM_APBH_CHn_CMD_XFER_COUNT 0xFFFF0000
-#define BF_APBH_CHn_CMD_XFER_COUNT(v) \
-	(((v) << 16) & BM_APBH_CHn_CMD_XFER_COUNT)
-#define BP_APBH_CHn_CMD_CMDWORDS      12
-#define BM_APBH_CHn_CMD_CMDWORDS 0x0000F000
-#define BF_APBH_CHn_CMD_CMDWORDS(v)  \
-	(((v) << 12) & BM_APBH_CHn_CMD_CMDWORDS)
-#define BM_APBH_CHn_CMD_HALTONTERMINATE 0x00000100
-#define BM_APBH_CHn_CMD_WAIT4ENDCMD 0x00000080
-#define BM_APBH_CHn_CMD_SEMAPHORE 0x00000040
-#define BM_APBH_CHn_CMD_NANDWAIT4READY 0x00000020
-#define BM_APBH_CHn_CMD_NANDLOCK 0x00000010
-#define BM_APBH_CHn_CMD_IRQONCMPLT 0x00000008
-#define BM_APBH_CHn_CMD_CHAIN 0x00000004
-#define BP_APBH_CHn_CMD_COMMAND      0
-#define BM_APBH_CHn_CMD_COMMAND 0x00000003
-#define BF_APBH_CHn_CMD_COMMAND(v)  \
-	(((v) << 0) & BM_APBH_CHn_CMD_COMMAND)
-#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER 0x0
-#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE   0x1
-#define BV_APBH_CHn_CMD_COMMAND__DMA_READ    0x2
-#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE   0x3
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_BAR, REGS_APBH_BASE, 0x00000070, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_SEMA, REGS_APBH_BASE, 0x00000080, 0x70)
-#define BP_APBH_CHn_SEMA_PHORE      16
-#define BM_APBH_CHn_SEMA_PHORE 0x00FF0000
-#define BF_APBH_CHn_SEMA_PHORE(v)  \
-	(((v) << 16) & BM_APBH_CHn_SEMA_PHORE)
-#define BP_APBH_CHn_SEMA_INCREMENT_SEMA      0
-#define BM_APBH_CHn_SEMA_INCREMENT_SEMA 0x000000FF
-#define BF_APBH_CHn_SEMA_INCREMENT_SEMA(v)  \
-	(((v) << 0) & BM_APBH_CHn_SEMA_INCREMENT_SEMA)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_DEBUG1, REGS_APBH_BASE, 0x00000090, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_DEBUG2, REGS_APBH_BASE, 0x000000a0, 0x70)
-HW_REGISTER_0(HW_APBH_VERSION, REGS_APBH_BASE, 0x000003f0)
-#endif /* __ARCH_ARM___APBH_H */
+#endif
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-apbx.h b/arch/arm/mach-stmp378x/include/mach/regs-apbx.h
index d0e8e9f..3b934a4 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-apbx.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-apbx.h
@@ -1,10 +1,9 @@
 /*
- * STMP APBX Register Definitions
+ * stmp378x: APBX register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
  *
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -19,61 +18,102 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef __ARCH_ARM___APBX_H
-#define __ARCH_ARM___APBX_H  1
+#ifndef _MACH_REGS_APBX
+#define _MACH_REGS_APBX
+
+#define REGS_APBX_BASE	(STMP3XXX_REGS_BASE + 0x24000)
+#define REGS_APBX_PHYS	0x80024000
+#define REGS_APBX_SIZE	0x2000
+
+#define HW_APBX_CTRL0		0x0
+#define BM_APBX_CTRL0_CLKGATE	0x40000000
+#define BM_APBX_CTRL0_SFTRST	0x80000000
+
+#define HW_APBX_CTRL1		0x10
+
+#define HW_APBX_CTRL2		0x20
+
+#define HW_APBX_CHANNEL_CTRL	0x30
+#define BM_APBX_CHANNEL_CTRL_RESET_CHANNEL	0xFFFF0000
+#define BP_APBX_CHANNEL_CTRL_RESET_CHANNEL	16
+
+#define HW_APBX_DEVSEL		0x40
+
+#define HW_APBX_CH0_NXTCMDAR	(0x110 + 0 * 0x70)
+#define HW_APBX_CH1_NXTCMDAR	(0x110 + 1 * 0x70)
+#define HW_APBX_CH2_NXTCMDAR	(0x110 + 2 * 0x70)
+#define HW_APBX_CH3_NXTCMDAR	(0x110 + 3 * 0x70)
+#define HW_APBX_CH4_NXTCMDAR	(0x110 + 4 * 0x70)
+#define HW_APBX_CH5_NXTCMDAR	(0x110 + 5 * 0x70)
+#define HW_APBX_CH6_NXTCMDAR	(0x110 + 6 * 0x70)
+#define HW_APBX_CH7_NXTCMDAR	(0x110 + 7 * 0x70)
+#define HW_APBX_CH8_NXTCMDAR	(0x110 + 8 * 0x70)
+#define HW_APBX_CH9_NXTCMDAR	(0x110 + 9 * 0x70)
+#define HW_APBX_CH10_NXTCMDAR	(0x110 + 10 * 0x70)
+#define HW_APBX_CH11_NXTCMDAR	(0x110 + 11 * 0x70)
+#define HW_APBX_CH12_NXTCMDAR	(0x110 + 12 * 0x70)
+#define HW_APBX_CH13_NXTCMDAR	(0x110 + 13 * 0x70)
+#define HW_APBX_CH14_NXTCMDAR	(0x110 + 14 * 0x70)
+#define HW_APBX_CH15_NXTCMDAR	(0x110 + 15 * 0x70)
+
+#define HW_APBX_CHn_NXTCMDAR	0x110
+#define BM_APBX_CHn_CMD_COMMAND	0x00000003
+#define BP_APBX_CHn_CMD_COMMAND	0
+#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER	 0
+#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE	 1
+#define BV_APBX_CHn_CMD_COMMAND__DMA_READ	 2
+#define BV_APBX_CHn_CMD_COMMAND__DMA_SENSE	 3
+#define BM_APBX_CHn_CMD_CHAIN	0x00000004
+#define BM_APBX_CHn_CMD_IRQONCMPLT	0x00000008
+#define BM_APBX_CHn_CMD_SEMAPHORE	0x00000040
+#define BM_APBX_CHn_CMD_WAIT4ENDCMD	0x00000080
+#define BM_APBX_CHn_CMD_HALTONTERMINATE	0x00000100
+#define BM_APBX_CHn_CMD_CMDWORDS	0x0000F000
+#define BP_APBX_CHn_CMD_CMDWORDS	12
+#define BM_APBX_CHn_CMD_XFER_COUNT	0xFFFF0000
+#define BP_APBX_CHn_CMD_XFER_COUNT	16
+
+#define HW_APBX_CH0_BAR		(0x130 + 0 * 0x70)
+#define HW_APBX_CH1_BAR		(0x130 + 1 * 0x70)
+#define HW_APBX_CH2_BAR		(0x130 + 2 * 0x70)
+#define HW_APBX_CH3_BAR		(0x130 + 3 * 0x70)
+#define HW_APBX_CH4_BAR		(0x130 + 4 * 0x70)
+#define HW_APBX_CH5_BAR		(0x130 + 5 * 0x70)
+#define HW_APBX_CH6_BAR		(0x130 + 6 * 0x70)
+#define HW_APBX_CH7_BAR		(0x130 + 7 * 0x70)
+#define HW_APBX_CH8_BAR		(0x130 + 8 * 0x70)
+#define HW_APBX_CH9_BAR		(0x130 + 9 * 0x70)
+#define HW_APBX_CH10_BAR		(0x130 + 10 * 0x70)
+#define HW_APBX_CH11_BAR		(0x130 + 11 * 0x70)
+#define HW_APBX_CH12_BAR		(0x130 + 12 * 0x70)
+#define HW_APBX_CH13_BAR		(0x130 + 13 * 0x70)
+#define HW_APBX_CH14_BAR		(0x130 + 14 * 0x70)
+#define HW_APBX_CH15_BAR		(0x130 + 15 * 0x70)
+
+#define HW_APBX_CHn_BAR		0x130
+
+#define HW_APBX_CH0_SEMA	(0x140 + 0 * 0x70)
+#define HW_APBX_CH1_SEMA	(0x140 + 1 * 0x70)
+#define HW_APBX_CH2_SEMA	(0x140 + 2 * 0x70)
+#define HW_APBX_CH3_SEMA	(0x140 + 3 * 0x70)
+#define HW_APBX_CH4_SEMA	(0x140 + 4 * 0x70)
+#define HW_APBX_CH5_SEMA	(0x140 + 5 * 0x70)
+#define HW_APBX_CH6_SEMA	(0x140 + 6 * 0x70)
+#define HW_APBX_CH7_SEMA	(0x140 + 7 * 0x70)
+#define HW_APBX_CH8_SEMA	(0x140 + 8 * 0x70)
+#define HW_APBX_CH9_SEMA	(0x140 + 9 * 0x70)
+#define HW_APBX_CH10_SEMA	(0x140 + 10 * 0x70)
+#define HW_APBX_CH11_SEMA	(0x140 + 11 * 0x70)
+#define HW_APBX_CH12_SEMA	(0x140 + 12 * 0x70)
+#define HW_APBX_CH13_SEMA	(0x140 + 13 * 0x70)
+#define HW_APBX_CH14_SEMA	(0x140 + 14 * 0x70)
+#define HW_APBX_CH15_SEMA	(0x140 + 15 * 0x70)
+
+#define HW_APBX_CHn_SEMA	0x140
+#define BM_APBX_CHn_SEMA_INCREMENT_SEMA	0x000000FF
+#define BP_APBX_CHn_SEMA_INCREMENT_SEMA	0
+#define BM_APBX_CHn_SEMA_PHORE	0x00FF0000
+#define BP_APBX_CHn_SEMA_PHORE	16
 
-#include <mach/stmp3xxx_regs.h>
+#endif
 
-#define REGS_APBX_BASE (REGS_BASE + 0x24000)
-#define REGS_APBX_BASE_PHYS (0x80024000)
-#define REGS_APBX_SIZE 0x00002000
-HW_REGISTER(HW_APBX_CTRL0, REGS_APBX_BASE, 0x00000000)
-#define HW_APBX_CTRL0_ADDR (REGS_APBX_BASE + 0x00000000)
-#define BM_APBX_CTRL0_SFTRST 0x80000000
-#define BM_APBX_CTRL0_CLKGATE 0x40000000
-HW_REGISTER(HW_APBX_CTRL1, REGS_APBX_BASE, 0x00000010)
-HW_REGISTER(HW_APBX_CTRL2, REGS_APBX_BASE, 0x00000020)
-HW_REGISTER(HW_APBX_CHANNEL_CTRL, REGS_APBX_BASE, 0x00000030)
-#define BP_APBX_CHANNEL_CTRL_RESET_CHANNEL      16
-#define BM_APBX_CHANNEL_CTRL_RESET_CHANNEL      0xFFFF0000
-#define BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(v)   \
-	(((v) << BP_APBX_CHANNEL_CTRL_RESET_CHANNEL) & \
-	 BM_APBX_CHANNEL_CTRL_RESET_CHANNEL)
-HW_REGISTER_0(HW_APBX_DEVSEL, REGS_APBX_BASE, 0x00000040)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_CURCMDAR, REGS_APBX_BASE, 0x00000100, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_NXTCMDAR, REGS_APBX_BASE, 0x00000110, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_CMD, REGS_APBX_BASE, 0x00000120, 0x70)
-#define BP_APBX_CHn_CMD_XFER_COUNT      16
-#define BM_APBX_CHn_CMD_XFER_COUNT 0xFFFF0000
-#define BF_APBX_CHn_CMD_XFER_COUNT(v) \
-	(((v) << 16) & BM_APBX_CHn_CMD_XFER_COUNT)
-#define BP_APBX_CHn_CMD_CMDWORDS      12
-#define BM_APBX_CHn_CMD_CMDWORDS 0x0000F000
-#define BF_APBX_CHn_CMD_CMDWORDS(v)  \
-	(((v) << 12) & BM_APBX_CHn_CMD_CMDWORDS)
-#define BM_APBX_CHn_CMD_HALTONTERMINATE 0x00000100
-#define BM_APBX_CHn_CMD_WAIT4ENDCMD 0x00000080
-#define BM_APBX_CHn_CMD_SEMAPHORE 0x00000040
-#define BM_APBX_CHn_CMD_IRQONCMPLT 0x00000008
-#define BM_APBX_CHn_CMD_CHAIN 0x00000004
-#define BP_APBX_CHn_CMD_COMMAND      0
-#define BM_APBX_CHn_CMD_COMMAND 0x00000003
-#define BF_APBX_CHn_CMD_COMMAND(v)  \
-	(((v) << 0) & BM_APBX_CHn_CMD_COMMAND)
-#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER 0x0
-#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE   0x1
-#define BV_APBX_CHn_CMD_COMMAND__DMA_READ    0x2
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_BAR, REGS_APBX_BASE, 0x00000130, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_SEMA, REGS_APBX_BASE, 0x00000140, 0x70)
-#define BP_APBX_CHn_SEMA_PHORE      16
-#define BM_APBX_CHn_SEMA_PHORE 0x00FF0000
-#define BF_APBX_CHn_SEMA_PHORE(v)  \
-	(((v) << 16) & BM_APBX_CHn_SEMA_PHORE)
-#define BP_APBX_CHn_SEMA_INCREMENT_SEMA      0
-#define BM_APBX_CHn_SEMA_INCREMENT_SEMA 0x000000FF
-#define BF_APBX_CHn_SEMA_INCREMENT_SEMA(v)  \
-	(((v) << 0) & BM_APBX_CHn_SEMA_INCREMENT_SEMA)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_DEBUG1, REGS_APBX_BASE, 0x00000150, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_DEBUG2, REGS_APBX_BASE, 0x00000160, 0x70)
-HW_REGISTER_0(HW_APBX_VERSION, REGS_APBX_BASE, 0x00000800)
-#endif /* __ARCH_ARM___APBX_H */
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h
index a421d9e..7c546af 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h
@@ -1,5 +1,5 @@
 /*
- * STMP CLKCTRL Register Definitions
+ * stmp378x: CLKCTRL register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,259 +18,71 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_CLKCTRL
+#define _MACH_REGS_CLKCTRL
 
-#ifndef __ARCH_ARM___CLKCTRL_H
-#define __ARCH_ARM___CLKCTRL_H  1
+#define REGS_CLKCTRL_BASE	(STMP3XXX_REGS_BASE + 0x40000)
+#define REGS_CLKCTRL_PHYS	0x80040000
+#define REGS_CLKCTRL_SIZE	0x2000
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_CLKCTRL_PLLCTRL0	0x0
+#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS	0x00040000
 
-#define REGS_CLKCTRL_BASE (REGS_BASE + 0x40000)
-#define REGS_CLKCTRL_BASE_PHYS (0x80040000)
-#define REGS_CLKCTRL_SIZE 0x00002000
-HW_REGISTER(HW_CLKCTRL_PLLCTRL0, REGS_CLKCTRL_BASE, 0x00000000)
-#define HW_CLKCTRL_PLLCTRL0_ADDR (REGS_CLKCTRL_BASE + 0x00000000)
-#define BP_CLKCTRL_PLLCTRL0_LFR_SEL      28
-#define BM_CLKCTRL_PLLCTRL0_LFR_SEL 0x30000000
-#define BF_CLKCTRL_PLLCTRL0_LFR_SEL(v)  \
-	(((v) << 28) & BM_CLKCTRL_PLLCTRL0_LFR_SEL)
-#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__DEFAULT   0x0
-#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_2   0x1
-#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_05  0x2
-#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__UNDEFINED 0x3
-#define BP_CLKCTRL_PLLCTRL0_CP_SEL      24
-#define BM_CLKCTRL_PLLCTRL0_CP_SEL 0x03000000
-#define BF_CLKCTRL_PLLCTRL0_CP_SEL(v)  \
-	(((v) << 24) & BM_CLKCTRL_PLLCTRL0_CP_SEL)
-#define BV_CLKCTRL_PLLCTRL0_CP_SEL__DEFAULT   0x0
-#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_2   0x1
-#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_05  0x2
-#define BV_CLKCTRL_PLLCTRL0_CP_SEL__UNDEFINED 0x3
-#define BP_CLKCTRL_PLLCTRL0_DIV_SEL      20
-#define BM_CLKCTRL_PLLCTRL0_DIV_SEL 0x00300000
-#define BF_CLKCTRL_PLLCTRL0_DIV_SEL(v)  \
-	(((v) << 20) & BM_CLKCTRL_PLLCTRL0_DIV_SEL)
-#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__DEFAULT   0x0
-#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWER     0x1
-#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWEST    0x2
-#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__UNDEFINED 0x3
-#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS 0x00040000
-#define BM_CLKCTRL_PLLCTRL0_POWER 0x00010000
-HW_REGISTER_0(HW_CLKCTRL_PLLCTRL1, REGS_CLKCTRL_BASE, 0x00000010)
-#define HW_CLKCTRL_PLLCTRL1_ADDR (REGS_CLKCTRL_BASE + 0x00000010)
-#define BM_CLKCTRL_PLLCTRL1_LOCK 0x80000000
-#define BM_CLKCTRL_PLLCTRL1_FORCE_LOCK 0x40000000
-#define BP_CLKCTRL_PLLCTRL1_LOCK_COUNT      0
-#define BM_CLKCTRL_PLLCTRL1_LOCK_COUNT 0x0000FFFF
-#define BF_CLKCTRL_PLLCTRL1_LOCK_COUNT(v)  \
-	(((v) << 0) & BM_CLKCTRL_PLLCTRL1_LOCK_COUNT)
-HW_REGISTER(HW_CLKCTRL_CPU, REGS_CLKCTRL_BASE, 0x00000020)
-#define HW_CLKCTRL_CPU_ADDR (REGS_CLKCTRL_BASE + 0x00000020)
-#define BM_CLKCTRL_CPU_BUSY_REF_XTAL 0x20000000
-#define BM_CLKCTRL_CPU_BUSY_REF_CPU 0x10000000
-#define BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN 0x04000000
-#define BP_CLKCTRL_CPU_DIV_XTAL      16
-#define BM_CLKCTRL_CPU_DIV_XTAL 0x03FF0000
-#define BF_CLKCTRL_CPU_DIV_XTAL(v)  \
-	(((v) << 16) & BM_CLKCTRL_CPU_DIV_XTAL)
-#define BM_CLKCTRL_CPU_INTERRUPT_WAIT 0x00001000
-#define BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN 0x00000400
-#define BP_CLKCTRL_CPU_DIV_CPU      0
-#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F
-#define BF_CLKCTRL_CPU_DIV_CPU(v)  \
-	(((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU)
-HW_REGISTER(HW_CLKCTRL_HBUS, REGS_CLKCTRL_BASE, 0x00000030)
-#define HW_CLKCTRL_HBUS_ADDR (REGS_CLKCTRL_BASE + 0x00000030)
-#define BM_CLKCTRL_HBUS_BUSY 0x20000000
-#define BM_CLKCTRL_HBUS_DCP_AS_ENABLE 0x10000000
-#define BM_CLKCTRL_HBUS_PXP_AS_ENABLE 0x08000000
-#define BM_CLKCTRL_HBUS_APBHDMA_AS_ENABLE 0x04000000
-#define BM_CLKCTRL_HBUS_APBXDMA_AS_ENABLE 0x02000000
-#define BM_CLKCTRL_HBUS_TRAFFIC_JAM_AS_ENABLE 0x01000000
-#define BM_CLKCTRL_HBUS_TRAFFIC_AS_ENABLE 0x00800000
-#define BM_CLKCTRL_HBUS_CPU_DATA_AS_ENABLE 0x00400000
-#define BM_CLKCTRL_HBUS_CPU_INSTR_AS_ENABLE 0x00200000
-#define BM_CLKCTRL_HBUS_AUTO_SLOW_MODE 0x00100000
-#define BP_CLKCTRL_HBUS_SLOW_DIV      16
-#define BM_CLKCTRL_HBUS_SLOW_DIV 0x00070000
-#define BF_CLKCTRL_HBUS_SLOW_DIV(v)  \
-	(((v) << 16) & BM_CLKCTRL_HBUS_SLOW_DIV)
-#define BV_CLKCTRL_HBUS_SLOW_DIV__BY1  0x0
-#define BV_CLKCTRL_HBUS_SLOW_DIV__BY2  0x1
-#define BV_CLKCTRL_HBUS_SLOW_DIV__BY4  0x2
-#define BV_CLKCTRL_HBUS_SLOW_DIV__BY8  0x3
-#define BV_CLKCTRL_HBUS_SLOW_DIV__BY16 0x4
-#define BV_CLKCTRL_HBUS_SLOW_DIV__BY32 0x5
-#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 0x00000020
-#define BP_CLKCTRL_HBUS_DIV      0
-#define BM_CLKCTRL_HBUS_DIV 0x0000001F
-#define BF_CLKCTRL_HBUS_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_HBUS_DIV)
-HW_REGISTER_0(HW_CLKCTRL_XBUS, REGS_CLKCTRL_BASE, 0x00000040)
-#define HW_CLKCTRL_XBUS_ADDR (REGS_CLKCTRL_BASE + 0x00000040)
-#define BM_CLKCTRL_XBUS_BUSY 0x80000000
-#define BM_CLKCTRL_XBUS_DIV_FRAC_EN 0x00000400
-#define BP_CLKCTRL_XBUS_DIV      0
-#define BM_CLKCTRL_XBUS_DIV 0x000003FF
-#define BF_CLKCTRL_XBUS_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_XBUS_DIV)
-HW_REGISTER(HW_CLKCTRL_XTAL, REGS_CLKCTRL_BASE, 0x00000050)
-#define HW_CLKCTRL_XTAL_ADDR (REGS_CLKCTRL_BASE + 0x00000050)
-#define BM_CLKCTRL_XTAL_UART_CLK_GATE 0x80000000
-#define BM_CLKCTRL_XTAL_FILT_CLK24M_GATE 0x40000000
-#define BM_CLKCTRL_XTAL_PWM_CLK24M_GATE 0x20000000
-#define BM_CLKCTRL_XTAL_DRI_CLK24M_GATE 0x10000000
-#define BM_CLKCTRL_XTAL_DIGCTRL_CLK1M_GATE 0x08000000
-#define BM_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 0x04000000
-#define BP_CLKCTRL_XTAL_DIV_UART      0
-#define BM_CLKCTRL_XTAL_DIV_UART 0x00000003
-#define BF_CLKCTRL_XTAL_DIV_UART(v)  \
-	(((v) << 0) & BM_CLKCTRL_XTAL_DIV_UART)
-HW_REGISTER_0(HW_CLKCTRL_PIX, REGS_CLKCTRL_BASE, 0x00000060)
-#define HW_CLKCTRL_PIX_ADDR (REGS_CLKCTRL_BASE + 0x00000060)
-#define BM_CLKCTRL_PIX_CLKGATE 0x80000000
-#define BM_CLKCTRL_PIX_BUSY 0x20000000
-#define BM_CLKCTRL_PIX_DIV_FRAC_EN 0x00001000
-#define BP_CLKCTRL_PIX_DIV      0
-#define BM_CLKCTRL_PIX_DIV 0x00000FFF
-#define BF_CLKCTRL_PIX_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_PIX_DIV)
-HW_REGISTER_0(HW_CLKCTRL_SSP, REGS_CLKCTRL_BASE, 0x00000070)
-#define HW_CLKCTRL_SSP_ADDR (REGS_CLKCTRL_BASE + 0x00000070)
-#define BM_CLKCTRL_SSP_CLKGATE 0x80000000
-#define BM_CLKCTRL_SSP_BUSY 0x20000000
-#define BM_CLKCTRL_SSP_DIV_FRAC_EN 0x00000200
-#define BP_CLKCTRL_SSP_DIV      0
-#define BM_CLKCTRL_SSP_DIV 0x000001FF
-#define BF_CLKCTRL_SSP_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_SSP_DIV)
-HW_REGISTER_0(HW_CLKCTRL_GPMI, REGS_CLKCTRL_BASE, 0x00000080)
-#define HW_CLKCTRL_GPMI_ADDR (REGS_CLKCTRL_BASE + 0x00000080)
-#define BM_CLKCTRL_GPMI_CLKGATE 0x80000000
-#define BM_CLKCTRL_GPMI_BUSY 0x20000000
-#define BM_CLKCTRL_GPMI_DIV_FRAC_EN 0x00000400
-#define BP_CLKCTRL_GPMI_DIV      0
-#define BM_CLKCTRL_GPMI_DIV 0x000003FF
-#define BF_CLKCTRL_GPMI_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_GPMI_DIV)
-HW_REGISTER_0(HW_CLKCTRL_SPDIF, REGS_CLKCTRL_BASE, 0x00000090)
-#define HW_CLKCTRL_SPDIF_ADDR (REGS_CLKCTRL_BASE + 0x00000090)
-#define BM_CLKCTRL_SPDIF_CLKGATE 0x80000000
-HW_REGISTER_0(HW_CLKCTRL_EMI, REGS_CLKCTRL_BASE, 0x000000a0)
-#define HW_CLKCTRL_EMI_ADDR (REGS_CLKCTRL_BASE + 0x000000a0)
-#define BM_CLKCTRL_EMI_CLKGATE 0x80000000
-#define BM_CLKCTRL_EMI_SYNC_MODE_EN 0x40000000
-#define BM_CLKCTRL_EMI_BUSY_REF_XTAL 0x20000000
-#define BM_CLKCTRL_EMI_BUSY_REF_EMI 0x10000000
-#define BM_CLKCTRL_EMI_BUSY_REF_CPU 0x08000000
-#define BM_CLKCTRL_EMI_BUSY_SYNC_MODE 0x04000000
-#define BM_CLKCTRL_EMI_BUSY_DCC_RESYNC 0x00020000
-#define BM_CLKCTRL_EMI_DCC_RESYNC_ENABLE 0x00010000
-#define BP_CLKCTRL_EMI_DIV_XTAL      8
-#define BM_CLKCTRL_EMI_DIV_XTAL 0x00000F00
-#define BF_CLKCTRL_EMI_DIV_XTAL(v)  \
-	(((v) << 8) & BM_CLKCTRL_EMI_DIV_XTAL)
-#define BP_CLKCTRL_EMI_DIV_EMI      0
-#define BM_CLKCTRL_EMI_DIV_EMI 0x0000003F
-#define BF_CLKCTRL_EMI_DIV_EMI(v)  \
-	(((v) << 0) & BM_CLKCTRL_EMI_DIV_EMI)
-HW_REGISTER_0(HW_CLKCTRL_IR, REGS_CLKCTRL_BASE, 0x000000b0)
-#define HW_CLKCTRL_IR_ADDR (REGS_CLKCTRL_BASE + 0x000000b0)
-#define BM_CLKCTRL_IR_CLKGATE 0x80000000
-#define BM_CLKCTRL_IR_AUTO_DIV 0x20000000
-#define BM_CLKCTRL_IR_IR_BUSY 0x10000000
-#define BM_CLKCTRL_IR_IROV_BUSY 0x08000000
-#define BP_CLKCTRL_IR_IROV_DIV      16
-#define BM_CLKCTRL_IR_IROV_DIV 0x01FF0000
-#define BF_CLKCTRL_IR_IROV_DIV(v)  \
-	(((v) << 16) & BM_CLKCTRL_IR_IROV_DIV)
-#define BP_CLKCTRL_IR_IR_DIV      0
-#define BM_CLKCTRL_IR_IR_DIV 0x000003FF
-#define BF_CLKCTRL_IR_IR_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_IR_IR_DIV)
-HW_REGISTER_0(HW_CLKCTRL_SAIF, REGS_CLKCTRL_BASE, 0x000000c0)
-#define HW_CLKCTRL_SAIF_ADDR (REGS_CLKCTRL_BASE + 0x000000c0)
-#define BM_CLKCTRL_SAIF_CLKGATE 0x80000000
-#define BM_CLKCTRL_SAIF_BUSY 0x20000000
-#define BM_CLKCTRL_SAIF_DIV_FRAC_EN 0x00010000
-#define BP_CLKCTRL_SAIF_DIV      0
-#define BM_CLKCTRL_SAIF_DIV 0x0000FFFF
-#define BF_CLKCTRL_SAIF_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_SAIF_DIV)
-HW_REGISTER_0(HW_CLKCTRL_TV, REGS_CLKCTRL_BASE, 0x000000d0)
-#define HW_CLKCTRL_TV_ADDR (REGS_CLKCTRL_BASE + 0x000000d0)
-#define BM_CLKCTRL_TV_CLK_TV108M_GATE 0x80000000
-#define BM_CLKCTRL_TV_CLK_TV_GATE 0x40000000
-HW_REGISTER_0(HW_CLKCTRL_ETM, REGS_CLKCTRL_BASE, 0x000000e0)
-#define HW_CLKCTRL_ETM_ADDR (REGS_CLKCTRL_BASE + 0x000000e0)
-#define BM_CLKCTRL_ETM_CLKGATE 0x80000000
-#define BM_CLKCTRL_ETM_BUSY 0x20000000
-#define BM_CLKCTRL_ETM_DIV_FRAC_EN 0x00000040
-#define BP_CLKCTRL_ETM_DIV      0
-#define BM_CLKCTRL_ETM_DIV 0x0000003F
-#define BF_CLKCTRL_ETM_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_ETM_DIV)
-HW_REGISTER(HW_CLKCTRL_FRAC, REGS_CLKCTRL_BASE, 0x000000f0)
-#define HW_CLKCTRL_FRAC_ADDR (REGS_CLKCTRL_BASE + 0x000000f0)
-#define BM_CLKCTRL_FRAC_CLKGATEIO 0x80000000
-#define BM_CLKCTRL_FRAC_IO_STABLE 0x40000000
-#define BP_CLKCTRL_FRAC_IOFRAC      24
-#define BM_CLKCTRL_FRAC_IOFRAC 0x3F000000
-#define BF_CLKCTRL_FRAC_IOFRAC(v)  \
-	(((v) << 24) & BM_CLKCTRL_FRAC_IOFRAC)
-#define BM_CLKCTRL_FRAC_CLKGATEPIX 0x00800000
-#define BM_CLKCTRL_FRAC_PIX_STABLE 0x00400000
-#define BP_CLKCTRL_FRAC_PIXFRAC      16
-#define BM_CLKCTRL_FRAC_PIXFRAC 0x003F0000
-#define BF_CLKCTRL_FRAC_PIXFRAC(v)  \
-	(((v) << 16) & BM_CLKCTRL_FRAC_PIXFRAC)
-#define BM_CLKCTRL_FRAC_CLKGATEEMI 0x00008000
-#define BM_CLKCTRL_FRAC_EMI_STABLE 0x00004000
-#define BP_CLKCTRL_FRAC_EMIFRAC      8
-#define BM_CLKCTRL_FRAC_EMIFRAC 0x00003F00
-#define BF_CLKCTRL_FRAC_EMIFRAC(v)  \
-	(((v) << 8) & BM_CLKCTRL_FRAC_EMIFRAC)
-#define BM_CLKCTRL_FRAC_CLKGATECPU 0x00000080
-#define BM_CLKCTRL_FRAC_CPU_STABLE 0x00000040
-#define BP_CLKCTRL_FRAC_CPUFRAC      0
-#define BM_CLKCTRL_FRAC_CPUFRAC 0x0000003F
-#define BF_CLKCTRL_FRAC_CPUFRAC(v)  \
-	(((v) << 0) & BM_CLKCTRL_FRAC_CPUFRAC)
-HW_REGISTER(HW_CLKCTRL_FRAC1, REGS_CLKCTRL_BASE, 0x00000100)
-#define HW_CLKCTRL_FRAC1_ADDR (REGS_CLKCTRL_BASE + 0x00000100)
-#define BM_CLKCTRL_FRAC1_CLKGATEVID 0x80000000
-#define BM_CLKCTRL_FRAC1_VID_STABLE 0x40000000
-HW_REGISTER(HW_CLKCTRL_CLKSEQ, REGS_CLKCTRL_BASE, 0x00000110)
-#define HW_CLKCTRL_CLKSEQ_ADDR (REGS_CLKCTRL_BASE + 0x00000110)
-#define BM_CLKCTRL_CLKSEQ_BYPASS_ETM 0x00000100
-#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU 0x00000080
-#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI 0x00000040
-#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP 0x00000020
-#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI 0x00000010
-#define BM_CLKCTRL_CLKSEQ_BYPASS_IR 0x00000008
-#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX 0x00000002
-#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF 0x00000001
-HW_REGISTER_0(HW_CLKCTRL_RESET, REGS_CLKCTRL_BASE, 0x00000120)
-#define HW_CLKCTRL_RESET_ADDR (REGS_CLKCTRL_BASE + 0x00000120)
-#define BM_CLKCTRL_RESET_CHIP 0x00000002
-#define BM_CLKCTRL_RESET_DIG 0x00000001
-HW_REGISTER_0(HW_CLKCTRL_STATUS, REGS_CLKCTRL_BASE, 0x00000130)
-#define HW_CLKCTRL_STATUS_ADDR (REGS_CLKCTRL_BASE + 0x00000130)
-#define BP_CLKCTRL_STATUS_CPU_LIMIT      30
-#define BM_CLKCTRL_STATUS_CPU_LIMIT 0xC0000000
-#define BF_CLKCTRL_STATUS_CPU_LIMIT(v) \
-	(((v) << 30) & BM_CLKCTRL_STATUS_CPU_LIMIT)
-HW_REGISTER_0(HW_CLKCTRL_VERSION, REGS_CLKCTRL_BASE, 0x00000140)
-#define HW_CLKCTRL_VERSION_ADDR (REGS_CLKCTRL_BASE + 0x00000140)
-#define BP_CLKCTRL_VERSION_MAJOR      24
-#define BM_CLKCTRL_VERSION_MAJOR 0xFF000000
-#define BF_CLKCTRL_VERSION_MAJOR(v) \
-	(((v) << 24) & BM_CLKCTRL_VERSION_MAJOR)
-#define BP_CLKCTRL_VERSION_MINOR      16
-#define BM_CLKCTRL_VERSION_MINOR 0x00FF0000
-#define BF_CLKCTRL_VERSION_MINOR(v)  \
-	(((v) << 16) & BM_CLKCTRL_VERSION_MINOR)
-#define BP_CLKCTRL_VERSION_STEP      0
-#define BM_CLKCTRL_VERSION_STEP 0x0000FFFF
-#define BF_CLKCTRL_VERSION_STEP(v)  \
-	(((v) << 0) & BM_CLKCTRL_VERSION_STEP)
-#endif /* __ARCH_ARM___CLKCTRL_H */
+#define HW_CLKCTRL_CPU		0x20
+#define BM_CLKCTRL_CPU_DIV_CPU	0x0000003F
+#define BP_CLKCTRL_CPU_DIV_CPU	0
+
+#define HW_CLKCTRL_HBUS		0x30
+#define BM_CLKCTRL_HBUS_DIV	0x0000001F
+#define BP_CLKCTRL_HBUS_DIV	0
+#define BM_CLKCTRL_HBUS_DIV_FRAC_EN	0x00000020
+
+#define HW_CLKCTRL_XBUS		0x40
+
+#define HW_CLKCTRL_XTAL		0x50
+#define BM_CLKCTRL_XTAL_DRI_CLK24M_GATE	0x10000000
+
+#define HW_CLKCTRL_PIX		0x60
+#define BM_CLKCTRL_PIX_DIV	0x00000FFF
+#define BP_CLKCTRL_PIX_DIV	0
+#define BM_CLKCTRL_PIX_CLKGATE	0x80000000
+
+#define HW_CLKCTRL_SSP		0x70
+
+#define HW_CLKCTRL_GPMI		0x80
+
+#define HW_CLKCTRL_SPDIF	0x90
+
+#define HW_CLKCTRL_EMI		0xA0
+#define BM_CLKCTRL_EMI_DIV_EMI	0x0000003F
+#define BP_CLKCTRL_EMI_DIV_EMI	0
+#define BM_CLKCTRL_EMI_DCC_RESYNC_ENABLE	0x00010000
+#define BM_CLKCTRL_EMI_BUSY_DCC_RESYNC	0x00020000
+#define BM_CLKCTRL_EMI_BUSY_REF_EMI	0x10000000
+#define BM_CLKCTRL_EMI_BUSY_REF_XTAL	0x20000000
+
+#define HW_CLKCTRL_IR		0xB0
+
+#define HW_CLKCTRL_SAIF		0xC0
+
+#define HW_CLKCTRL_TV		0xD0
+
+#define HW_CLKCTRL_ETM		0xE0
+
+#define HW_CLKCTRL_FRAC		0xF0
+#define BM_CLKCTRL_FRAC_EMIFRAC	0x00003F00
+#define BP_CLKCTRL_FRAC_EMIFRAC	8
+#define BM_CLKCTRL_FRAC_PIXFRAC	0x003F0000
+#define BP_CLKCTRL_FRAC_PIXFRAC	16
+#define BM_CLKCTRL_FRAC_CLKGATEPIX	0x00800000
+
+#define HW_CLKCTRL_FRAC1	0x100
+
+#define HW_CLKCTRL_CLKSEQ	0x110
+#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX	0x00000002
+
+#define HW_CLKCTRL_RESET	0x120
+#define BM_CLKCTRL_RESET_DIG	0x00000001
+#define BP_CLKCTRL_RESET_DIG	0
+
+#endif
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-icoll.h b/arch/arm/mach-stmp378x/include/mach/regs-icoll.h
index a5a530c..f996e80 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-icoll.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-icoll.h
@@ -1,5 +1,5 @@
 /*
- * STMP ICOLL Register Definitions
+ * stmp378x: ICOLL register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,196 +18,28 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_ICOLL
+#define _MACH_REGS_ICOLL
 
-#ifndef __ARCH_ARM___ICOLL_H
-#define __ARCH_ARM___ICOLL_H  1
+#define REGS_ICOLL_BASE	(STMP3XXX_REGS_BASE + 0x0)
+#define REGS_ICOLL_PHYS	0x80000000
+#define REGS_ICOLL_SIZE	0x2000
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_ICOLL_VECTOR		0x0
 
-#define REGS_ICOLL_BASE (REGS_BASE + 0x0)
-#define REGS_ICOLL_BASE_PHYS (0x80000000)
-#define REGS_ICOLL_SIZE 0x00002000
-HW_REGISTER(HW_ICOLL_VECTOR, REGS_ICOLL_BASE, 0x00000000)
-#define HW_ICOLL_VECTOR_ADDR (REGS_ICOLL_BASE + 0x00000000)
-#define BP_ICOLL_VECTOR_IRQVECTOR      2
-#define BM_ICOLL_VECTOR_IRQVECTOR 0xFFFFFFFC
-#define BF_ICOLL_VECTOR_IRQVECTOR(v) \
-	(((v) << 2) & BM_ICOLL_VECTOR_IRQVECTOR)
-HW_REGISTER_0(HW_ICOLL_LEVELACK, REGS_ICOLL_BASE, 0x00000010)
-#define HW_ICOLL_LEVELACK_ADDR (REGS_ICOLL_BASE + 0x00000010)
-#define BP_ICOLL_LEVELACK_IRQLEVELACK      0
-#define BM_ICOLL_LEVELACK_IRQLEVELACK 0x0000000F
-#define BF_ICOLL_LEVELACK_IRQLEVELACK(v)  \
-	(((v) << 0) & BM_ICOLL_LEVELACK_IRQLEVELACK)
-#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 0x1
-#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL1 0x2
-#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL2 0x4
-#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL3 0x8
-HW_REGISTER(HW_ICOLL_CTRL, REGS_ICOLL_BASE, 0x00000020)
-#define HW_ICOLL_CTRL_ADDR (REGS_ICOLL_BASE + 0x00000020)
-#define BM_ICOLL_CTRL_SFTRST 0x80000000
-#define BV_ICOLL_CTRL_SFTRST__RUN      0x0
-#define BV_ICOLL_CTRL_SFTRST__IN_RESET 0x1
-#define BM_ICOLL_CTRL_CLKGATE 0x40000000
-#define BV_ICOLL_CTRL_CLKGATE__RUN       0x0
-#define BV_ICOLL_CTRL_CLKGATE__NO_CLOCKS 0x1
-#define BP_ICOLL_CTRL_VECTOR_PITCH      21
-#define BM_ICOLL_CTRL_VECTOR_PITCH 0x00E00000
-#define BF_ICOLL_CTRL_VECTOR_PITCH(v)  \
-	(((v) << 21) & BM_ICOLL_CTRL_VECTOR_PITCH)
-#define BV_ICOLL_CTRL_VECTOR_PITCH__DEFAULT_BY4 0x0
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY4	 0x1
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY8	 0x2
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY12	0x3
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY16	0x4
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY20	0x5
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY24	0x6
-#define BV_ICOLL_CTRL_VECTOR_PITCH__BY28	0x7
-#define BM_ICOLL_CTRL_BYPASS_FSM 0x00100000
-#define BV_ICOLL_CTRL_BYPASS_FSM__NORMAL 0x0
-#define BV_ICOLL_CTRL_BYPASS_FSM__BYPASS 0x1
-#define BM_ICOLL_CTRL_NO_NESTING 0x00080000
-#define BV_ICOLL_CTRL_NO_NESTING__NORMAL  0x0
-#define BV_ICOLL_CTRL_NO_NESTING__NO_NEST 0x1
-#define BM_ICOLL_CTRL_ARM_RSE_MODE 0x00040000
-#define BM_ICOLL_CTRL_FIQ_FINAL_ENABLE 0x00020000
-#define BV_ICOLL_CTRL_FIQ_FINAL_ENABLE__DISABLE 0x0
-#define BV_ICOLL_CTRL_FIQ_FINAL_ENABLE__ENABLE  0x1
-#define BM_ICOLL_CTRL_IRQ_FINAL_ENABLE 0x00010000
-#define BV_ICOLL_CTRL_IRQ_FINAL_ENABLE__DISABLE 0x0
-#define BV_ICOLL_CTRL_IRQ_FINAL_ENABLE__ENABLE  0x1
-HW_REGISTER(HW_ICOLL_VBASE, REGS_ICOLL_BASE, 0x00000040)
-#define HW_ICOLL_VBASE_ADDR (REGS_ICOLL_BASE + 0x00000040)
-#define BP_ICOLL_VBASE_TABLE_ADDRESS      2
-#define BM_ICOLL_VBASE_TABLE_ADDRESS 0xFFFFFFFC
-#define BF_ICOLL_VBASE_TABLE_ADDRESS(v) \
-	(((v) << 2) & BM_ICOLL_VBASE_TABLE_ADDRESS)
-HW_REGISTER_0(HW_ICOLL_STAT, REGS_ICOLL_BASE, 0x00000070)
-#define HW_ICOLL_STAT_ADDR (REGS_ICOLL_BASE + 0x00000070)
-#define BP_ICOLL_STAT_VECTOR_NUMBER      0
-#define BM_ICOLL_STAT_VECTOR_NUMBER 0x0000007F
-#define BF_ICOLL_STAT_VECTOR_NUMBER(v)  \
-	(((v) << 0) & BM_ICOLL_STAT_VECTOR_NUMBER)
-/*
- *  multi-register-define name HW_ICOLL_RAWn
- *	      base 0x000000A0
- *	      count 4
- *	      offset 0x10
- */
-HW_REGISTER_0_INDEXED(HW_ICOLL_RAWn, REGS_ICOLL_BASE, 0x000000a0, 0x10)
-#define BP_ICOLL_RAWn_RAW_IRQS      0
-#define BM_ICOLL_RAWn_RAW_IRQS 0xFFFFFFFF
-#define BF_ICOLL_RAWn_RAW_IRQS(v)   (v)
-/*
- *  multi-register-define name HW_ICOLL_INTERRUPTn
- *	      base 0x00000120
- *	      count 128
- *	      offset 0x10
- */
-HW_REGISTER_INDEXED(HW_ICOLL_INTERRUPTn, REGS_ICOLL_BASE, 0x00000120, 0x10)
-#define BM_ICOLL_INTERRUPTn_ENFIQ 0x00000010
-#define BV_ICOLL_INTERRUPTn_ENFIQ__DISABLE 0x0
-#define BV_ICOLL_INTERRUPTn_ENFIQ__ENABLE  0x1
-#define BM_ICOLL_INTERRUPTn_SOFTIRQ 0x00000008
-#define BV_ICOLL_INTERRUPTn_SOFTIRQ__NO_INTERRUPT    0x0
-#define BV_ICOLL_INTERRUPTn_SOFTIRQ__FORCE_INTERRUPT 0x1
-#define BM_ICOLL_INTERRUPTn_ENABLE 0x00000004
-#define BV_ICOLL_INTERRUPTn_ENABLE__DISABLE 0x0
-#define BV_ICOLL_INTERRUPTn_ENABLE__ENABLE  0x1
-#define BP_ICOLL_INTERRUPTn_PRIORITY      0
-#define BM_ICOLL_INTERRUPTn_PRIORITY 0x00000003
-#define BF_ICOLL_INTERRUPTn_PRIORITY(v)  \
-	(((v) << 0) & BM_ICOLL_INTERRUPTn_PRIORITY)
-#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL0 0x0
-#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL1 0x1
-#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL2 0x2
-#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL3 0x3
-HW_REGISTER(HW_ICOLL_DEBUG, REGS_ICOLL_BASE, 0x00001120)
-#define HW_ICOLL_DEBUG_ADDR (REGS_ICOLL_BASE + 0x00001120)
-#define BP_ICOLL_DEBUG_INSERVICE      28
-#define BM_ICOLL_DEBUG_INSERVICE 0xF0000000
-#define BF_ICOLL_DEBUG_INSERVICE(v) \
-	(((v) << 28) & BM_ICOLL_DEBUG_INSERVICE)
-#define BV_ICOLL_DEBUG_INSERVICE__LEVEL0 0x1
-#define BV_ICOLL_DEBUG_INSERVICE__LEVEL1 0x2
-#define BV_ICOLL_DEBUG_INSERVICE__LEVEL2 0x4
-#define BV_ICOLL_DEBUG_INSERVICE__LEVEL3 0x8
-#define BP_ICOLL_DEBUG_LEVEL_REQUESTS      24
-#define BM_ICOLL_DEBUG_LEVEL_REQUESTS 0x0F000000
-#define BF_ICOLL_DEBUG_LEVEL_REQUESTS(v)  \
-	(((v) << 24) & BM_ICOLL_DEBUG_LEVEL_REQUESTS)
-#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL0 0x1
-#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL1 0x2
-#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL2 0x4
-#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL3 0x8
-#define BP_ICOLL_DEBUG_REQUESTS_BY_LEVEL      20
-#define BM_ICOLL_DEBUG_REQUESTS_BY_LEVEL 0x00F00000
-#define BF_ICOLL_DEBUG_REQUESTS_BY_LEVEL(v)  \
-	(((v) << 20) & BM_ICOLL_DEBUG_REQUESTS_BY_LEVEL)
-#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL0 0x1
-#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL1 0x2
-#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL2 0x4
-#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL3 0x8
-#define BM_ICOLL_DEBUG_FIQ 0x00020000
-#define BV_ICOLL_DEBUG_FIQ__NO_FIQ_REQUESTED 0x0
-#define BV_ICOLL_DEBUG_FIQ__FIQ_REQUESTED    0x1
-#define BM_ICOLL_DEBUG_IRQ 0x00010000
-#define BV_ICOLL_DEBUG_IRQ__NO_IRQ_REQUESTED 0x0
-#define BV_ICOLL_DEBUG_IRQ__IRQ_REQUESTED    0x1
-#define BP_ICOLL_DEBUG_VECTOR_FSM      0
-#define BM_ICOLL_DEBUG_VECTOR_FSM 0x000003FF
-#define BF_ICOLL_DEBUG_VECTOR_FSM(v)  \
-	(((v) << 0) & BM_ICOLL_DEBUG_VECTOR_FSM)
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_IDLE	 0x000
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE1  0x001
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE2  0x002
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_PENDING      0x004
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE3  0x008
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE4  0x010
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING1 0x020
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING2 0x040
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING3 0x080
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE5  0x100
-#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE6  0x200
-HW_REGISTER(HW_ICOLL_DBGREAD0, REGS_ICOLL_BASE, 0x00001130)
-#define HW_ICOLL_DBGREAD0_ADDR (REGS_ICOLL_BASE + 0x00001130)
-#define BP_ICOLL_DBGREAD0_VALUE      0
-#define BM_ICOLL_DBGREAD0_VALUE 0xFFFFFFFF
-#define BF_ICOLL_DBGREAD0_VALUE(v)   (v)
-HW_REGISTER(HW_ICOLL_DBGREAD1, REGS_ICOLL_BASE, 0x00001140)
-#define HW_ICOLL_DBGREAD1_ADDR (REGS_ICOLL_BASE + 0x00001140)
-#define BP_ICOLL_DBGREAD1_VALUE      0
-#define BM_ICOLL_DBGREAD1_VALUE 0xFFFFFFFF
-#define BF_ICOLL_DBGREAD1_VALUE(v)   (v)
-HW_REGISTER(HW_ICOLL_DBGFLAG, REGS_ICOLL_BASE, 0x00001150)
-#define HW_ICOLL_DBGFLAG_ADDR (REGS_ICOLL_BASE + 0x00001150)
-#define BP_ICOLL_DBGFLAG_FLAG      0
-#define BM_ICOLL_DBGFLAG_FLAG 0x0000FFFF
-#define BF_ICOLL_DBGFLAG_FLAG(v)  \
-	(((v) << 0) & BM_ICOLL_DBGFLAG_FLAG)
-/*
- *  multi-register-define name HW_ICOLL_DBGREQUESTn
- *	      base 0x00001160
- *	      count 4
- *	      offset 0x10
- */
-HW_REGISTER_0_INDEXED(HW_ICOLL_DBGREQUESTn, REGS_ICOLL_BASE, 0x00001160,
-		       0x10)
-#define BP_ICOLL_DBGREQUESTn_BITS      0
-#define BM_ICOLL_DBGREQUESTn_BITS 0xFFFFFFFF
-#define BF_ICOLL_DBGREQUESTn_BITS(v)   (v)
-HW_REGISTER_0(HW_ICOLL_VERSION, REGS_ICOLL_BASE, 0x000011e0)
-#define HW_ICOLL_VERSION_ADDR (REGS_ICOLL_BASE + 0x000011e0)
-#define BP_ICOLL_VERSION_MAJOR      24
-#define BM_ICOLL_VERSION_MAJOR 0xFF000000
-#define BF_ICOLL_VERSION_MAJOR(v) \
-	(((v) << 24) & BM_ICOLL_VERSION_MAJOR)
-#define BP_ICOLL_VERSION_MINOR      16
-#define BM_ICOLL_VERSION_MINOR 0x00FF0000
-#define BF_ICOLL_VERSION_MINOR(v)  \
-	(((v) << 16) & BM_ICOLL_VERSION_MINOR)
-#define BP_ICOLL_VERSION_STEP      0
-#define BM_ICOLL_VERSION_STEP 0x0000FFFF
-#define BF_ICOLL_VERSION_STEP(v)  \
-	(((v) << 0) & BM_ICOLL_VERSION_STEP)
-#endif /* __ARCH_ARM___ICOLL_H */
+#define HW_ICOLL_LEVELACK	0x10
+#define BM_ICOLL_LEVELACK_IRQLEVELACK	0x0000000F
+#define BP_ICOLL_LEVELACK_IRQLEVELACK	0
+
+#define HW_ICOLL_CTRL		0x20
+#define BM_ICOLL_CTRL_CLKGATE	0x40000000
+#define BM_ICOLL_CTRL_SFTRST	0x80000000
+
+#define HW_ICOLL_STAT		0x70
+
+#define HW_ICOLL_INTERRUPTn	0x120
+
+#define HW_ICOLL_INTERRUPTn	0x120
+#define BM_ICOLL_INTERRUPTn_ENABLE	0x00000004
+
+#endif
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h
index 6c42d2a..50d90ea 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h
@@ -1,5 +1,5 @@
 /*
- * STMP PINCTRL Register Definitions
+ * stmp378x: PINCTRL register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,126 +18,73 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_PINCTRL
+#define _MACH_REGS_PINCTRL
 
-#ifndef __ARCH_ARM___PINCTRL_H
-#define __ARCH_ARM___PINCTRL_H  1
+#define REGS_PINCTRL_BASE	(STMP3XXX_REGS_BASE + 0x18000)
+#define REGS_PINCTRL_PHYS	0x80018000
+#define REGS_PINCTRL_SIZE	0x2000
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_PINCTRL_MUXSEL0	0x100
+#define HW_PINCTRL_MUXSEL1	0x110
+#define HW_PINCTRL_MUXSEL2	0x120
+#define HW_PINCTRL_MUXSEL3	0x130
+#define HW_PINCTRL_MUXSEL4	0x140
+#define HW_PINCTRL_MUXSEL5	0x150
+#define HW_PINCTRL_MUXSEL6	0x160
+#define HW_PINCTRL_MUXSEL7	0x170
 
-#define REGS_PINCTRL_BASE (REGS_BASE + 0x18000)
-#define REGS_PINCTRL_BASE_PHYS (0x80018000)
-#define REGS_PINCTRL_SIZE 0x00002000
-HW_REGISTER(HW_PINCTRL_CTRL, REGS_PINCTRL_BASE, 0x00000000)
-#define HW_PINCTRL_CTRL_ADDR (REGS_PINCTRL_BASE + 0x00000000)
-#define BM_PINCTRL_CTRL_SFTRST 0x80000000
-#define BM_PINCTRL_CTRL_CLKGATE 0x40000000
-#define BM_PINCTRL_CTRL_PRESENT3 0x08000000
-#define BM_PINCTRL_CTRL_PRESENT2 0x04000000
-#define BM_PINCTRL_CTRL_PRESENT1 0x02000000
-#define BM_PINCTRL_CTRL_PRESENT0 0x01000000
-#define BM_PINCTRL_CTRL_IRQOUT2 0x00000004
-#define BM_PINCTRL_CTRL_IRQOUT1 0x00000002
-#define BM_PINCTRL_CTRL_IRQOUT0 0x00000001
-HW_REGISTER(HW_PINCTRL_MUXSEL0, REGS_PINCTRL_BASE, 0x00000100)
-#define HW_PINCTRL_MUXSEL0_ADDR (REGS_PINCTRL_BASE + 0x00000100)
-HW_REGISTER(HW_PINCTRL_MUXSEL1, REGS_PINCTRL_BASE, 0x00000110)
-#define HW_PINCTRL_MUXSEL1_ADDR (REGS_PINCTRL_BASE + 0x00000110)
-HW_REGISTER(HW_PINCTRL_MUXSEL2, REGS_PINCTRL_BASE, 0x00000120)
-#define HW_PINCTRL_MUXSEL2_ADDR (REGS_PINCTRL_BASE + 0x00000120)
-HW_REGISTER(HW_PINCTRL_MUXSEL3, REGS_PINCTRL_BASE, 0x00000130)
-#define HW_PINCTRL_MUXSEL3_ADDR (REGS_PINCTRL_BASE + 0x00000130)
-HW_REGISTER(HW_PINCTRL_MUXSEL4, REGS_PINCTRL_BASE, 0x00000140)
-#define HW_PINCTRL_MUXSEL4_ADDR (REGS_PINCTRL_BASE + 0x00000140)
-HW_REGISTER(HW_PINCTRL_MUXSEL5, REGS_PINCTRL_BASE, 0x00000150)
-#define HW_PINCTRL_MUXSEL5_ADDR (REGS_PINCTRL_BASE + 0x00000150)
-HW_REGISTER(HW_PINCTRL_MUXSEL6, REGS_PINCTRL_BASE, 0x00000160)
-#define HW_PINCTRL_MUXSEL6_ADDR (REGS_PINCTRL_BASE + 0x00000160)
-HW_REGISTER(HW_PINCTRL_MUXSEL7, REGS_PINCTRL_BASE, 0x00000170)
-#define HW_PINCTRL_MUXSEL7_ADDR (REGS_PINCTRL_BASE + 0x00000170)
-HW_REGISTER(HW_PINCTRL_DRIVE0, REGS_PINCTRL_BASE, 0x00000200)
-#define HW_PINCTRL_DRIVE0_ADDR (REGS_PINCTRL_BASE + 0x00000200)
-HW_REGISTER(HW_PINCTRL_DRIVE1, REGS_PINCTRL_BASE, 0x00000210)
-#define HW_PINCTRL_DRIVE1_ADDR (REGS_PINCTRL_BASE + 0x00000210)
-HW_REGISTER(HW_PINCTRL_DRIVE2, REGS_PINCTRL_BASE, 0x00000220)
-#define HW_PINCTRL_DRIVE2_ADDR (REGS_PINCTRL_BASE + 0x00000220)
-HW_REGISTER(HW_PINCTRL_DRIVE3, REGS_PINCTRL_BASE, 0x00000230)
-#define HW_PINCTRL_DRIVE3_ADDR (REGS_PINCTRL_BASE + 0x00000230)
-HW_REGISTER(HW_PINCTRL_DRIVE4, REGS_PINCTRL_BASE, 0x00000240)
-#define HW_PINCTRL_DRIVE4_ADDR (REGS_PINCTRL_BASE + 0x00000240)
-HW_REGISTER(HW_PINCTRL_DRIVE5, REGS_PINCTRL_BASE, 0x00000250)
-#define HW_PINCTRL_DRIVE5_ADDR (REGS_PINCTRL_BASE + 0x00000250)
-HW_REGISTER(HW_PINCTRL_DRIVE6, REGS_PINCTRL_BASE, 0x00000260)
-#define HW_PINCTRL_DRIVE6_ADDR (REGS_PINCTRL_BASE + 0x00000260)
-HW_REGISTER(HW_PINCTRL_DRIVE7, REGS_PINCTRL_BASE, 0x00000270)
-#define HW_PINCTRL_DRIVE7_ADDR (REGS_PINCTRL_BASE + 0x00000270)
-HW_REGISTER(HW_PINCTRL_DRIVE8, REGS_PINCTRL_BASE, 0x00000280)
-#define HW_PINCTRL_DRIVE8_ADDR (REGS_PINCTRL_BASE + 0x00000280)
-HW_REGISTER(HW_PINCTRL_DRIVE9, REGS_PINCTRL_BASE, 0x00000290)
-#define HW_PINCTRL_DRIVE9_ADDR (REGS_PINCTRL_BASE + 0x00000290)
-HW_REGISTER(HW_PINCTRL_DRIVE10, REGS_PINCTRL_BASE, 0x000002a0)
-#define HW_PINCTRL_DRIVE10_ADDR (REGS_PINCTRL_BASE + 0x000002a0)
-HW_REGISTER(HW_PINCTRL_DRIVE11, REGS_PINCTRL_BASE, 0x000002b0)
-#define HW_PINCTRL_DRIVE11_ADDR (REGS_PINCTRL_BASE + 0x000002b0)
-HW_REGISTER(HW_PINCTRL_DRIVE12, REGS_PINCTRL_BASE, 0x000002c0)
-#define HW_PINCTRL_DRIVE12_ADDR (REGS_PINCTRL_BASE + 0x000002c0)
-HW_REGISTER(HW_PINCTRL_DRIVE13, REGS_PINCTRL_BASE, 0x000002d0)
-#define HW_PINCTRL_DRIVE13_ADDR (REGS_PINCTRL_BASE + 0x000002d0)
-HW_REGISTER(HW_PINCTRL_DRIVE14, REGS_PINCTRL_BASE, 0x000002e0)
-#define HW_PINCTRL_DRIVE14_ADDR (REGS_PINCTRL_BASE + 0x000002e0)
-HW_REGISTER(HW_PINCTRL_PULL0, REGS_PINCTRL_BASE, 0x00000400)
-#define HW_PINCTRL_PULL0_ADDR (REGS_PINCTRL_BASE + 0x00000400)
-HW_REGISTER(HW_PINCTRL_PULL1, REGS_PINCTRL_BASE, 0x00000410)
-#define HW_PINCTRL_PULL1_ADDR (REGS_PINCTRL_BASE + 0x00000410)
-HW_REGISTER(HW_PINCTRL_PULL2, REGS_PINCTRL_BASE, 0x00000420)
-#define HW_PINCTRL_PULL2_ADDR (REGS_PINCTRL_BASE + 0x00000420)
-HW_REGISTER(HW_PINCTRL_PULL3, REGS_PINCTRL_BASE, 0x00000430)
-#define HW_PINCTRL_PULL3_ADDR (REGS_PINCTRL_BASE + 0x00000430)
-HW_REGISTER(HW_PINCTRL_DOUT0, REGS_PINCTRL_BASE, 0x00000500)
-#define HW_PINCTRL_DOUT0_ADDR (REGS_PINCTRL_BASE + 0x00000500)
-HW_REGISTER(HW_PINCTRL_DOUT1, REGS_PINCTRL_BASE, 0x00000510)
-#define HW_PINCTRL_DOUT1_ADDR (REGS_PINCTRL_BASE + 0x00000510)
-HW_REGISTER(HW_PINCTRL_DOUT2, REGS_PINCTRL_BASE, 0x00000520)
-#define HW_PINCTRL_DOUT2_ADDR (REGS_PINCTRL_BASE + 0x00000520)
-HW_REGISTER(HW_PINCTRL_DIN0, REGS_PINCTRL_BASE, 0x00000600)
-#define HW_PINCTRL_DIN0_ADDR (REGS_PINCTRL_BASE + 0x00000600)
-HW_REGISTER(HW_PINCTRL_DIN1, REGS_PINCTRL_BASE, 0x00000610)
-#define HW_PINCTRL_DIN1_ADDR (REGS_PINCTRL_BASE + 0x00000610)
-HW_REGISTER(HW_PINCTRL_DIN2, REGS_PINCTRL_BASE, 0x00000620)
-#define HW_PINCTRL_DIN2_ADDR (REGS_PINCTRL_BASE + 0x00000620)
-HW_REGISTER(HW_PINCTRL_DOE0, REGS_PINCTRL_BASE, 0x00000700)
-#define HW_PINCTRL_DOE0_ADDR (REGS_PINCTRL_BASE + 0x00000700)
-HW_REGISTER(HW_PINCTRL_DOE1, REGS_PINCTRL_BASE, 0x00000710)
-#define HW_PINCTRL_DOE1_ADDR (REGS_PINCTRL_BASE + 0x00000710)
-HW_REGISTER(HW_PINCTRL_DOE2, REGS_PINCTRL_BASE, 0x00000720)
-#define HW_PINCTRL_DOE2_ADDR (REGS_PINCTRL_BASE + 0x00000720)
-HW_REGISTER(HW_PINCTRL_PIN2IRQ0, REGS_PINCTRL_BASE, 0x00000800)
-#define HW_PINCTRL_PIN2IRQ0_ADDR (REGS_PINCTRL_BASE + 0x00000800)
-HW_REGISTER(HW_PINCTRL_PIN2IRQ1, REGS_PINCTRL_BASE, 0x00000810)
-#define HW_PINCTRL_PIN2IRQ1_ADDR (REGS_PINCTRL_BASE + 0x00000810)
-HW_REGISTER(HW_PINCTRL_PIN2IRQ2, REGS_PINCTRL_BASE, 0x00000820)
-#define HW_PINCTRL_PIN2IRQ2_ADDR (REGS_PINCTRL_BASE + 0x00000820)
-HW_REGISTER(HW_PINCTRL_IRQEN0, REGS_PINCTRL_BASE, 0x00000900)
-#define HW_PINCTRL_IRQEN0_ADDR (REGS_PINCTRL_BASE + 0x00000900)
-HW_REGISTER(HW_PINCTRL_IRQEN1, REGS_PINCTRL_BASE, 0x00000910)
-#define HW_PINCTRL_IRQEN1_ADDR (REGS_PINCTRL_BASE + 0x00000910)
-HW_REGISTER(HW_PINCTRL_IRQEN2, REGS_PINCTRL_BASE, 0x00000920)
-#define HW_PINCTRL_IRQEN2_ADDR (REGS_PINCTRL_BASE + 0x00000920)
-HW_REGISTER(HW_PINCTRL_IRQLEVEL0, REGS_PINCTRL_BASE, 0x00000a00)
-#define HW_PINCTRL_IRQLEVEL0_ADDR (REGS_PINCTRL_BASE + 0x00000a00)
-HW_REGISTER(HW_PINCTRL_IRQLEVEL1, REGS_PINCTRL_BASE, 0x00000a10)
-#define HW_PINCTRL_IRQLEVEL1_ADDR (REGS_PINCTRL_BASE + 0x00000a10)
-HW_REGISTER(HW_PINCTRL_IRQLEVEL2, REGS_PINCTRL_BASE, 0x00000a20)
-#define HW_PINCTRL_IRQLEVEL2_ADDR (REGS_PINCTRL_BASE + 0x00000a20)
-HW_REGISTER(HW_PINCTRL_IRQPOL0, REGS_PINCTRL_BASE, 0x00000b00)
-#define HW_PINCTRL_IRQPOL0_ADDR (REGS_PINCTRL_BASE + 0x00000b00)
-HW_REGISTER(HW_PINCTRL_IRQPOL1, REGS_PINCTRL_BASE, 0x00000b10)
-#define HW_PINCTRL_IRQPOL1_ADDR (REGS_PINCTRL_BASE + 0x00000b10)
-HW_REGISTER(HW_PINCTRL_IRQPOL2, REGS_PINCTRL_BASE, 0x00000b20)
-#define HW_PINCTRL_IRQPOL2_ADDR (REGS_PINCTRL_BASE + 0x00000b20)
-HW_REGISTER(HW_PINCTRL_IRQSTAT0, REGS_PINCTRL_BASE, 0x00000c00)
-#define HW_PINCTRL_IRQSTAT0_ADDR (REGS_PINCTRL_BASE + 0x00000c00)
-HW_REGISTER(HW_PINCTRL_IRQSTAT1, REGS_PINCTRL_BASE, 0x00000c10)
-#define HW_PINCTRL_IRQSTAT1_ADDR (REGS_PINCTRL_BASE + 0x00000c10)
-HW_REGISTER(HW_PINCTRL_IRQSTAT2, REGS_PINCTRL_BASE, 0x00000c20)
-#define HW_PINCTRL_IRQSTAT2_ADDR (REGS_PINCTRL_BASE + 0x00000c20)
-#endif /* __ARCH_ARM___PINCTRL_H */
+#define HW_PINCTRL_DRIVE0	0x200
+#define HW_PINCTRL_DRIVE1	0x210
+#define HW_PINCTRL_DRIVE2	0x220
+#define HW_PINCTRL_DRIVE3	0x230
+#define HW_PINCTRL_DRIVE4	0x240
+#define HW_PINCTRL_DRIVE5	0x250
+#define HW_PINCTRL_DRIVE6	0x260
+#define HW_PINCTRL_DRIVE7	0x270
+#define HW_PINCTRL_DRIVE8	0x280
+#define HW_PINCTRL_DRIVE9	0x290
+#define HW_PINCTRL_DRIVE10	0x2A0
+#define HW_PINCTRL_DRIVE11	0x2B0
+#define HW_PINCTRL_DRIVE12	0x2C0
+#define HW_PINCTRL_DRIVE13	0x2D0
+#define HW_PINCTRL_DRIVE14	0x2E0
+
+#define HW_PINCTRL_PULL0	0x400
+#define HW_PINCTRL_PULL1	0x410
+#define HW_PINCTRL_PULL2	0x420
+#define HW_PINCTRL_PULL3	0x430
+
+#define HW_PINCTRL_DOUT0	0x500
+#define HW_PINCTRL_DOUT1	0x510
+#define HW_PINCTRL_DOUT2	0x520
+
+#define HW_PINCTRL_DIN0		0x600
+#define HW_PINCTRL_DIN1		0x610
+#define HW_PINCTRL_DIN2		0x620
+
+#define HW_PINCTRL_DOE0		0x700
+#define HW_PINCTRL_DOE1		0x710
+#define HW_PINCTRL_DOE2		0x720
+
+#define HW_PINCTRL_PIN2IRQ0	0x800
+#define HW_PINCTRL_PIN2IRQ1	0x810
+#define HW_PINCTRL_PIN2IRQ2	0x820
+
+#define HW_PINCTRL_IRQEN0	0x900
+#define HW_PINCTRL_IRQEN1	0x910
+#define HW_PINCTRL_IRQEN2	0x920
+
+#define HW_PINCTRL_IRQLEVEL0	0xA00
+#define HW_PINCTRL_IRQLEVEL1	0xA10
+#define HW_PINCTRL_IRQLEVEL2	0xA20
+
+#define HW_PINCTRL_IRQPOL0	0xB00
+#define HW_PINCTRL_IRQPOL1	0xB10
+#define HW_PINCTRL_IRQPOL2	0xB20
+
+#define HW_PINCTRL_IRQSTAT0	0xC00
+#define HW_PINCTRL_IRQSTAT1	0xC10
+#define HW_PINCTRL_IRQSTAT2	0xC20
+
+#endif
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-power.h b/arch/arm/mach-stmp378x/include/mach/regs-power.h
index 1c81afe..e454c83 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-power.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-power.h
@@ -1,5 +1,5 @@
 /*
- * STMP POWER Register Definitions
+ * stmp378x: POWER register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,15 +18,46 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_POWER
+#define _MACH_REGS_POWER
 
-#ifndef __ARCH_ARM___POWER_H
-#define __ARCH_ARM___POWER_H  1
+#define REGS_POWER_BASE	(STMP3XXX_REGS_BASE + 0x44000)
+#define REGS_POWER_PHYS	0x80044000
+#define REGS_POWER_SIZE	0x2000
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_POWER_CTRL		0x0
+#define BM_POWER_CTRL_ENIRQ_VDD5V_GT_VDDIO	0x00000001
+#define BP_POWER_CTRL_ENIRQ_VDD5V_GT_VDDIO	0
+#define BM_POWER_CTRL_ENIRQ_PSWITCH	0x00020000
+#define BM_POWER_CTRL_PSWITCH_IRQ	0x00100000
+#define BM_POWER_CTRL_CLKGATE	0x40000000
 
-#define REGS_POWER_BASE (void __iomem *)(REGS_BASE + 0x44000)
-#define REGS_POWER_BASE_PHYS (0x80044000)
-#define REGS_POWER_SIZE 0x00002000
-HW_REGISTER(HW_POWER_MINPWR, REGS_POWER_BASE, 0x00000020)
-HW_REGISTER(HW_POWER_CHARGE, REGS_POWER_BASE, 0x00000030)
-#endif /* __ARCH_ARM___POWER_H */
+#define HW_POWER_5VCTRL		0x10
+#define BM_POWER_5VCTRL_ENABLE_LINREG_ILIMIT	0x00000040
+
+#define HW_POWER_MINPWR		0x20
+
+#define HW_POWER_CHARGE		0x30
+
+#define HW_POWER_VDDDCTRL	0x40
+
+#define HW_POWER_VDDACTRL	0x50
+
+#define HW_POWER_VDDIOCTRL	0x60
+#define BM_POWER_VDDIOCTRL_TRG	0x0000001F
+#define BP_POWER_VDDIOCTRL_TRG	0
+
+#define HW_POWER_STS		0xC0
+#define BM_POWER_STS_VBUSVALID	0x00000002
+#define BM_POWER_STS_BVALID	0x00000004
+#define BM_POWER_STS_AVALID	0x00000008
+#define BM_POWER_STS_DC_OK	0x00000200
+
+#define HW_POWER_RESET		0x100
+
+#define HW_POWER_DEBUG		0x110
+#define BM_POWER_DEBUG_BVALIDPIOLOCK	0x00000002
+#define BM_POWER_DEBUG_AVALIDPIOLOCK	0x00000004
+#define BM_POWER_DEBUG_VBUSVALIDPIOLOCK	0x00000008
+
+#endif
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-timrot.h b/arch/arm/mach-stmp378x/include/mach/regs-timrot.h
index bb6355a..b552795 100644
--- a/arch/arm/mach-stmp378x/include/mach/regs-timrot.h
+++ b/arch/arm/mach-stmp378x/include/mach/regs-timrot.h
@@ -1,5 +1,5 @@
 /*
- * STMP TIMROT Register Definitions
+ * stmp378x: TIMROT register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,199 +18,51 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_TIMROT
+#define _MACH_REGS_TIMROT
 
-#ifndef __ARCH_ARM___TIMROT_H
-#define __ARCH_ARM___TIMROT_H  1
+#define REGS_TIMROT_BASE	(STMP3XXX_REGS_BASE + 0x68000)
+#define REGS_TIMROT_PHYS	0x80068000
+#define REGS_TIMROT_SIZE	0x2000
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_TIMROT_ROTCTRL	0x0
+#define BM_TIMROT_ROTCTRL_SELECT_A	0x00000007
+#define BP_TIMROT_ROTCTRL_SELECT_A	0
+#define BM_TIMROT_ROTCTRL_SELECT_B	0x00000070
+#define BP_TIMROT_ROTCTRL_SELECT_B	4
+#define BM_TIMROT_ROTCTRL_POLARITY_A	0x00000100
+#define BM_TIMROT_ROTCTRL_POLARITY_B	0x00000200
+#define BM_TIMROT_ROTCTRL_OVERSAMPLE	0x00000C00
+#define BP_TIMROT_ROTCTRL_OVERSAMPLE	10
+#define BM_TIMROT_ROTCTRL_RELATIVE	0x00001000
+#define BM_TIMROT_ROTCTRL_DIVIDER	0x003F0000
+#define BP_TIMROT_ROTCTRL_DIVIDER	16
+#define BM_TIMROT_ROTCTRL_ROTARY_PRESENT	0x20000000
+#define BM_TIMROT_ROTCTRL_CLKGATE	0x40000000
+#define BM_TIMROT_ROTCTRL_SFTRST	0x80000000
 
-#define REGS_TIMROT_BASE (REGS_BASE + 0x68000)
-#define REGS_TIMROT_BASE_PHYS (0x80068000)
-#define REGS_TIMROT_SIZE 0x00002000
-HW_REGISTER(HW_TIMROT_ROTCTRL, REGS_TIMROT_BASE, 0x00000000)
-#define HW_TIMROT_ROTCTRL_ADDR (REGS_TIMROT_BASE + 0x00000000)
-#define BM_TIMROT_ROTCTRL_SFTRST 0x80000000
-#define BM_TIMROT_ROTCTRL_CLKGATE 0x40000000
-#define BM_TIMROT_ROTCTRL_ROTARY_PRESENT 0x20000000
-#define BM_TIMROT_ROTCTRL_TIM3_PRESENT 0x10000000
-#define BM_TIMROT_ROTCTRL_TIM2_PRESENT 0x08000000
-#define BM_TIMROT_ROTCTRL_TIM1_PRESENT 0x04000000
-#define BM_TIMROT_ROTCTRL_TIM0_PRESENT 0x02000000
-#define BP_TIMROT_ROTCTRL_STATE      22
-#define BM_TIMROT_ROTCTRL_STATE 0x01C00000
-#define BF_TIMROT_ROTCTRL_STATE(v)  \
-	(((v) << 22) & BM_TIMROT_ROTCTRL_STATE)
-#define BP_TIMROT_ROTCTRL_DIVIDER      16
-#define BM_TIMROT_ROTCTRL_DIVIDER 0x003F0000
-#define BF_TIMROT_ROTCTRL_DIVIDER(v)  \
-	(((v) << 16) & BM_TIMROT_ROTCTRL_DIVIDER)
-#define BM_TIMROT_ROTCTRL_RELATIVE 0x00001000
-#define BP_TIMROT_ROTCTRL_OVERSAMPLE      10
-#define BM_TIMROT_ROTCTRL_OVERSAMPLE 0x00000C00
-#define BF_TIMROT_ROTCTRL_OVERSAMPLE(v)  \
-	(((v) << 10) & BM_TIMROT_ROTCTRL_OVERSAMPLE)
-#define BV_TIMROT_ROTCTRL_OVERSAMPLE__8X 0x0
-#define BV_TIMROT_ROTCTRL_OVERSAMPLE__4X 0x1
-#define BV_TIMROT_ROTCTRL_OVERSAMPLE__2X 0x2
-#define BV_TIMROT_ROTCTRL_OVERSAMPLE__1X 0x3
-#define BM_TIMROT_ROTCTRL_POLARITY_B 0x00000200
-#define BM_TIMROT_ROTCTRL_POLARITY_A 0x00000100
-#define BP_TIMROT_ROTCTRL_SELECT_B      4
-#define BM_TIMROT_ROTCTRL_SELECT_B 0x00000070
-#define BF_TIMROT_ROTCTRL_SELECT_B(v)  \
-	(((v) << 4) & BM_TIMROT_ROTCTRL_SELECT_B)
-#define BV_TIMROT_ROTCTRL_SELECT_B__NEVER_TICK 0x0
-#define BV_TIMROT_ROTCTRL_SELECT_B__PWM0       0x1
-#define BV_TIMROT_ROTCTRL_SELECT_B__PWM1       0x2
-#define BV_TIMROT_ROTCTRL_SELECT_B__PWM2       0x3
-#define BV_TIMROT_ROTCTRL_SELECT_B__PWM3       0x4
-#define BV_TIMROT_ROTCTRL_SELECT_B__PWM4       0x5
-#define BV_TIMROT_ROTCTRL_SELECT_B__ROTARYA    0x6
-#define BV_TIMROT_ROTCTRL_SELECT_B__ROTARYB    0x7
-#define BP_TIMROT_ROTCTRL_SELECT_A      0
-#define BM_TIMROT_ROTCTRL_SELECT_A 0x00000007
-#define BF_TIMROT_ROTCTRL_SELECT_A(v)  \
-	(((v) << 0) & BM_TIMROT_ROTCTRL_SELECT_A)
-#define BV_TIMROT_ROTCTRL_SELECT_A__NEVER_TICK 0x0
-#define BV_TIMROT_ROTCTRL_SELECT_A__PWM0       0x1
-#define BV_TIMROT_ROTCTRL_SELECT_A__PWM1       0x2
-#define BV_TIMROT_ROTCTRL_SELECT_A__PWM2       0x3
-#define BV_TIMROT_ROTCTRL_SELECT_A__PWM3       0x4
-#define BV_TIMROT_ROTCTRL_SELECT_A__PWM4       0x5
-#define BV_TIMROT_ROTCTRL_SELECT_A__ROTARYA    0x6
-#define BV_TIMROT_ROTCTRL_SELECT_A__ROTARYB    0x7
-HW_REGISTER_0(HW_TIMROT_ROTCOUNT, REGS_TIMROT_BASE, 0x00000010)
-#define HW_TIMROT_ROTCOUNT_ADDR (REGS_TIMROT_BASE + 0x00000010)
-#define BP_TIMROT_ROTCOUNT_UPDOWN      0
-#define BM_TIMROT_ROTCOUNT_UPDOWN 0x0000FFFF
-#define BF_TIMROT_ROTCOUNT_UPDOWN(v)  \
-	(((v) << 0) & BM_TIMROT_ROTCOUNT_UPDOWN)
-/*
- *  multi-register-define name HW_TIMROT_TIMCTRLn
- *	      base 0x00000020
- *	      count 3
- *	      offset 0x20
- */
-HW_REGISTER_INDEXED(HW_TIMROT_TIMCTRLn, REGS_TIMROT_BASE, 0x00000020, 0x20)
-#define BM_TIMROT_TIMCTRLn_IRQ 0x00008000
-#define BM_TIMROT_TIMCTRLn_IRQ_EN 0x00004000
-#define BM_TIMROT_TIMCTRLn_POLARITY 0x00000100
-#define BM_TIMROT_TIMCTRLn_UPDATE 0x00000080
-#define BM_TIMROT_TIMCTRLn_RELOAD 0x00000040
-#define BP_TIMROT_TIMCTRLn_PRESCALE      4
-#define BM_TIMROT_TIMCTRLn_PRESCALE 0x00000030
-#define BF_TIMROT_TIMCTRLn_PRESCALE(v)  \
-	(((v) << 4) & BM_TIMROT_TIMCTRLn_PRESCALE)
-#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_1 0x0
-#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_2 0x1
-#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_4 0x2
-#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_8 0x3
-#define BP_TIMROT_TIMCTRLn_SELECT      0
-#define BM_TIMROT_TIMCTRLn_SELECT 0x0000000F
-#define BF_TIMROT_TIMCTRLn_SELECT(v)  \
-	(((v) << 0) & BM_TIMROT_TIMCTRLn_SELECT)
-#define BV_TIMROT_TIMCTRLn_SELECT__NEVER_TICK  0x0
-#define BV_TIMROT_TIMCTRLn_SELECT__PWM0	0x1
-#define BV_TIMROT_TIMCTRLn_SELECT__PWM1	0x2
-#define BV_TIMROT_TIMCTRLn_SELECT__PWM2	0x3
-#define BV_TIMROT_TIMCTRLn_SELECT__PWM3	0x4
-#define BV_TIMROT_TIMCTRLn_SELECT__PWM4	0x5
-#define BV_TIMROT_TIMCTRLn_SELECT__ROTARYA     0x6
-#define BV_TIMROT_TIMCTRLn_SELECT__ROTARYB     0x7
-#define BV_TIMROT_TIMCTRLn_SELECT__32KHZ_XTAL  0x8
-#define BV_TIMROT_TIMCTRLn_SELECT__8KHZ_XTAL   0x9
-#define BV_TIMROT_TIMCTRLn_SELECT__4KHZ_XTAL   0xA
-#define BV_TIMROT_TIMCTRLn_SELECT__1KHZ_XTAL   0xB
-#define BV_TIMROT_TIMCTRLn_SELECT__TICK_ALWAYS 0xC
-/*
- *  multi-register-define name HW_TIMROT_TIMCOUNTn
- *	      base 0x00000030
- *	      count 3
- *	      offset 0x20
- */
-HW_REGISTER_0_INDEXED(HW_TIMROT_TIMCOUNTn, REGS_TIMROT_BASE, 0x00000030,
-		       0x20)
-#define BP_TIMROT_TIMCOUNTn_RUNNING_COUNT      16
-#define BM_TIMROT_TIMCOUNTn_RUNNING_COUNT 0xFFFF0000
-#define BF_TIMROT_TIMCOUNTn_RUNNING_COUNT(v) \
-	(((v) << 16) & BM_TIMROT_TIMCOUNTn_RUNNING_COUNT)
-#define BP_TIMROT_TIMCOUNTn_FIXED_COUNT      0
-#define BM_TIMROT_TIMCOUNTn_FIXED_COUNT 0x0000FFFF
-#define BF_TIMROT_TIMCOUNTn_FIXED_COUNT(v)  \
-	(((v) << 0) & BM_TIMROT_TIMCOUNTn_FIXED_COUNT)
-HW_REGISTER(HW_TIMROT_TIMCTRL3, REGS_TIMROT_BASE, 0x00000080)
-#define HW_TIMROT_TIMCTRL3_ADDR (REGS_TIMROT_BASE + 0x00000080)
-#define BP_TIMROT_TIMCTRL3_TEST_SIGNAL      16
-#define BM_TIMROT_TIMCTRL3_TEST_SIGNAL 0x000F0000
-#define BF_TIMROT_TIMCTRL3_TEST_SIGNAL(v)  \
-	(((v) << 16) & BM_TIMROT_TIMCTRL3_TEST_SIGNAL)
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__NEVER_TICK  0x0
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM0	0x1
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM1	0x2
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM2	0x3
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM3	0x4
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM4	0x5
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__ROTARYA     0x6
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__ROTARYB     0x7
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__32KHZ_XTAL  0x8
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__8KHZ_XTAL   0x9
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__4KHZ_XTAL   0xA
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__1KHZ_XTAL   0xB
-#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__TICK_ALWAYS 0xC
-#define BM_TIMROT_TIMCTRL3_IRQ 0x00008000
-#define BM_TIMROT_TIMCTRL3_IRQ_EN 0x00004000
-#define BM_TIMROT_TIMCTRL3_DUTY_VALID 0x00000400
-#define BM_TIMROT_TIMCTRL3_DUTY_CYCLE 0x00000200
-#define BM_TIMROT_TIMCTRL3_POLARITY 0x00000100
-#define BM_TIMROT_TIMCTRL3_UPDATE 0x00000080
-#define BM_TIMROT_TIMCTRL3_RELOAD 0x00000040
-#define BP_TIMROT_TIMCTRL3_PRESCALE      4
-#define BM_TIMROT_TIMCTRL3_PRESCALE 0x00000030
-#define BF_TIMROT_TIMCTRL3_PRESCALE(v)  \
-	(((v) << 4) & BM_TIMROT_TIMCTRL3_PRESCALE)
-#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_1 0x0
-#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_2 0x1
-#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_4 0x2
-#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_8 0x3
-#define BP_TIMROT_TIMCTRL3_SELECT      0
-#define BM_TIMROT_TIMCTRL3_SELECT 0x0000000F
-#define BF_TIMROT_TIMCTRL3_SELECT(v)  \
-	(((v) << 0) & BM_TIMROT_TIMCTRL3_SELECT)
-#define BV_TIMROT_TIMCTRL3_SELECT__NEVER_TICK  0x0
-#define BV_TIMROT_TIMCTRL3_SELECT__PWM0	0x1
-#define BV_TIMROT_TIMCTRL3_SELECT__PWM1	0x2
-#define BV_TIMROT_TIMCTRL3_SELECT__PWM2	0x3
-#define BV_TIMROT_TIMCTRL3_SELECT__PWM3	0x4
-#define BV_TIMROT_TIMCTRL3_SELECT__PWM4	0x5
-#define BV_TIMROT_TIMCTRL3_SELECT__ROTARYA     0x6
-#define BV_TIMROT_TIMCTRL3_SELECT__ROTARYB     0x7
-#define BV_TIMROT_TIMCTRL3_SELECT__32KHZ_XTAL  0x8
-#define BV_TIMROT_TIMCTRL3_SELECT__8KHZ_XTAL   0x9
-#define BV_TIMROT_TIMCTRL3_SELECT__4KHZ_XTAL   0xA
-#define BV_TIMROT_TIMCTRL3_SELECT__1KHZ_XTAL   0xB
-#define BV_TIMROT_TIMCTRL3_SELECT__TICK_ALWAYS 0xC
-HW_REGISTER_0(HW_TIMROT_TIMCOUNT3, REGS_TIMROT_BASE, 0x00000090)
-#define HW_TIMROT_TIMCOUNT3_ADDR (REGS_TIMROT_BASE + 0x00000090)
-#define BP_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT      16
-#define BM_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT 0xFFFF0000
-#define BF_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT(v) \
-	(((v) << 16) & BM_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT)
-#define BP_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT      0
-#define BM_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT 0x0000FFFF
-#define BF_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT(v)  \
-	(((v) << 0) & BM_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT)
-HW_REGISTER_0(HW_TIMROT_VERSION, REGS_TIMROT_BASE, 0x000000a0)
-#define HW_TIMROT_VERSION_ADDR (REGS_TIMROT_BASE + 0x000000a0)
-#define BP_TIMROT_VERSION_MAJOR      24
-#define BM_TIMROT_VERSION_MAJOR 0xFF000000
-#define BF_TIMROT_VERSION_MAJOR(v) \
-	(((v) << 24) & BM_TIMROT_VERSION_MAJOR)
-#define BP_TIMROT_VERSION_MINOR      16
-#define BM_TIMROT_VERSION_MINOR 0x00FF0000
-#define BF_TIMROT_VERSION_MINOR(v)  \
-	(((v) << 16) & BM_TIMROT_VERSION_MINOR)
-#define BP_TIMROT_VERSION_STEP      0
-#define BM_TIMROT_VERSION_STEP 0x0000FFFF
-#define BF_TIMROT_VERSION_STEP(v)  \
-	(((v) << 0) & BM_TIMROT_VERSION_STEP)
-#endif /* __ARCH_ARM___TIMROT_H */
+#define HW_TIMROT_ROTCOUNT	0x10
+#define BM_TIMROT_ROTCOUNT_UPDOWN	0x0000FFFF
+#define BP_TIMROT_ROTCOUNT_UPDOWN	0
+
+#define HW_TIMROT_TIMCTRL0	(0x20 + 0 * 0x20)
+#define HW_TIMROT_TIMCTRL1	(0x20 + 1 * 0x20)
+#define HW_TIMROT_TIMCTRL2	(0x20 + 2 * 0x20)
+
+#define HW_TIMROT_TIMCTRLn	0x20
+#define BM_TIMROT_TIMCTRLn_SELECT	0x0000000F
+#define BP_TIMROT_TIMCTRLn_SELECT	0
+#define BM_TIMROT_TIMCTRLn_PRESCALE	0x00000030
+#define BP_TIMROT_TIMCTRLn_PRESCALE	4
+#define BM_TIMROT_TIMCTRLn_RELOAD	0x00000040
+#define BM_TIMROT_TIMCTRLn_UPDATE	0x00000080
+#define BM_TIMROT_TIMCTRLn_IRQ_EN	0x00004000
+#define BM_TIMROT_TIMCTRLn_IRQ	0x00008000
+
+#define HW_TIMROT_TIMCOUNT0	(0x30 + 0 * 0x20)
+#define HW_TIMROT_TIMCOUNT1	(0x30 + 1 * 0x20)
+#define HW_TIMROT_TIMCOUNT2	(0x30 + 2 * 0x20)
+
+#define HW_TIMROT_TIMCOUNTn	0x30
+
+#endif
-- 
cgit v0.10.2


From 3f52326a85666c1cb0210eb5556ef3d483933cfc Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Sun, 31 May 2009 13:31:55 +0100
Subject: [ARM] 5531/1: Freescale STMP: get rid of HW_zzz macros [2/3]

Replace HW_zzz register access macros by regular __raw_readl/__raw_writel calls

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h b/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h
index 3044c20..a323aa9 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h
@@ -1,5 +1,5 @@
 /*
- * STMP APBH Register Definitions
+ * stmp37xx: APBH register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,85 +18,80 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _INCLUDE_ASM_ARCH_REGS_APBH_H
-#define _INCLUDE_ASM_ARCH_REGS_APBH_H
+#ifndef _MACH_REGS_APBH
+#define _MACH_REGS_APBH
 
-#include <mach/stmp3xxx_regs.h>
+#define REGS_APBH_BASE	(STMP3XXX_REGS_BASE + 0x4000)
 
-#ifndef REGS_APBH_BASE
-#define REGS_APBH_BASE (REGS_BASE + 0x00004000)
-#endif
+#define HW_APBH_CTRL0		0x0
+#define BM_APBH_CTRL0_RESET_CHANNEL	0x00FF0000
+#define BP_APBH_CTRL0_RESET_CHANNEL	16
+#define BM_APBH_CTRL0_CLKGATE	0x40000000
+#define BM_APBH_CTRL0_SFTRST	0x80000000
+
+#define HW_APBH_CTRL1		0x10
+#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ	0x00000001
+#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ	0
+
+#define HW_APBH_DEVSEL		0x20
+
+#define HW_APBH_CH0_NXTCMDAR	(0x50 + 0 * 0x70)
+#define HW_APBH_CH1_NXTCMDAR	(0x50 + 1 * 0x70)
+#define HW_APBH_CH2_NXTCMDAR	(0x50 + 2 * 0x70)
+#define HW_APBH_CH3_NXTCMDAR	(0x50 + 3 * 0x70)
+#define HW_APBH_CH4_NXTCMDAR	(0x50 + 4 * 0x70)
+#define HW_APBH_CH5_NXTCMDAR	(0x50 + 5 * 0x70)
+#define HW_APBH_CH6_NXTCMDAR	(0x50 + 6 * 0x70)
+#define HW_APBH_CH7_NXTCMDAR	(0x50 + 7 * 0x70)
+#define HW_APBH_CH8_NXTCMDAR	(0x50 + 8 * 0x70)
+#define HW_APBH_CH9_NXTCMDAR	(0x50 + 9 * 0x70)
+#define HW_APBH_CH10_NXTCMDAR	(0x50 + 10 * 0x70)
+#define HW_APBH_CH11_NXTCMDAR	(0x50 + 11 * 0x70)
+#define HW_APBH_CH12_NXTCMDAR	(0x50 + 12 * 0x70)
+#define HW_APBH_CH13_NXTCMDAR	(0x50 + 13 * 0x70)
+#define HW_APBH_CH14_NXTCMDAR	(0x50 + 14 * 0x70)
+#define HW_APBH_CH15_NXTCMDAR	(0x50 + 15 * 0x70)
 
-HW_REGISTER(HW_APBH_CTRL0, REGS_APBH_BASE, 0x00)
-#define BP_APBH_CTRL0_SFTRST      31
-#define BM_APBH_CTRL0_SFTRST      0x80000000
-#define BP_APBH_CTRL0_CLKGATE      30
-#define BM_APBH_CTRL0_CLKGATE      0x40000000
-#define BP_APBH_CTRL0_RESET_CHANNEL      16
-#define BM_APBH_CTRL0_RESET_CHANNEL      0x00FF0000
-#define BF_APBH_CTRL0_RESET_CHANNEL(v)   \
-	(((v) << BP_APBH_CTRL0_RESET_CHANNEL) & BM_APBH_CTRL0_RESET_CHANNEL)
-HW_REGISTER(HW_APBH_CTRL1, REGS_APBH_BASE, 0x10)
-#define BP_APBH_CTRL1_CH1_CMDCMPLT_IRQ_EN      9
-#define BM_APBH_CTRL1_CH1_CMDCMPLT_IRQ_EN      0x00000200
-#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ_EN      8
-#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ_EN      0x00000100
-#define BP_APBH_CTRL1_CH7_CMDCMPLT_IRQ      7
-#define BM_APBH_CTRL1_CH7_CMDCMPLT_IRQ      0x00000080
-#define BP_APBH_CTRL1_CH1_CMDCMPLT_IRQ      1
-#define BM_APBH_CTRL1_CH1_CMDCMPLT_IRQ      0x00000002
-#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ      0
-#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ      0x00000001
-#define BP_APBH_CTRL1_CH1_ERR_IRQ	    17
-#define BM_APBH_CTRL1_CH1_ERR_IRQ	   0x00020000
-HW_REGISTER_0(HW_APBH_DEVSEL, REGS_APBH_BASE, 0x20)
-HW_REGISTER_RO_INDEXED(HW_APBH_CHn_CURCMDAR, REGS_APBH_BASE, 0x40, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_NXTCMDAR, REGS_APBH_BASE, 0x50, 0x70)
-#define BP_APBH_CHn_NXTCMDAR_CMD_ADDR      0
-#define BM_APBH_CHn_NXTCMDAR_CMD_ADDR      0xFFFFFFFF
-#define BF_APBH_CHn_NXTCMDAR_CMD_ADDR(v)   ((u32) v)
-HW_REGISTER_RO_INDEXED(HW_APBH_CHn_CMD, REGS_APBH_BASE, 0x60, 0x70)
-#define BM_APBH_CHn_CMD_XFER_COUNT		0xFFFF0000
-#define BP_APBH_CHn_CMD_XFER_COUNT		16
-#define BF_APBH_CHn_CMD_XFER_COUNT(v)		\
-	(((v) << BP_APBH_CHn_CMD_XFER_COUNT) & BM_APBH_CHn_CMD_XFER_COUNT)
-#define BM_APBH_CHn_CMD_CMDWORDS		0x0000F000
-#define BP_APBH_CHn_CMD_CMDWORDS		12
-#define BF_APBH_CHn_CMD_CMDWORDS(v)		\
-	(((v) << BP_APBH_CHn_CMD_CMDWORDS) & BM_APBH_CHn_CMD_CMDWORDS)
-#define BM_APBH_CHn_CMD_WAIT4ENDCMD		0x00000080
-#define BM_APBH_CHn_CMD_SEMAPHORE		0x00000040
-#define BP_APBH_CHn_CMD_SEMAPHORE		6
-#define BF_APBH_CHn_CMD_SEMAPHORE(v)		\
-	(((v) << BP_APBH_CHn_CMD_SEMAPHORE) & BM_APBH_CHn_CMD_SEMAPHORE)
-#define BM_APBH_CHn_CMD_NANDWAIT4READY	 0x00000020
-#define BP_APBH_CHn_CMD_NANDLOCK		4
-#define BM_APBH_CHn_CMD_NANDLOCK		0x00000010
-#define BF_APBH_CHn_CMD_NANDLOCK(v)		\
-	(((v) << BP_APBH_CHn_CMD_NANDLOCK) & BM_APBH_CHn_CMD_NANDLOCK)
-#define BM_APBH_CHn_CMD_IRQONCMPLT		0x00000008
+#define HW_APBH_CHn_NXTCMDAR	0x50
+
+#define BM_APBH_CHn_CMD_MODE		0x00000003
+#define BP_APBH_CHn_CMD_MODE		0x00000001
+#define BV_APBH_CHn_CMD_MODE_NOOP		 0
+#define BV_APBH_CHn_CMD_MODE_WRITE		 1
+#define BV_APBH_CHn_CMD_MODE_READ		 2
+#define BV_APBH_CHn_CMD_MODE_SENSE		 3
 #define BM_APBH_CHn_CMD_CHAIN		0x00000004
-#define BM_APBH_CHn_CMD_DMA_READ		0x00000003
-#define BP_APBH_CHn_CMD_DMA_READ		0
-#define BF_APBH_CHn_CMD_DMA_READ(v)		\
-	(((v) << BP_APBH_CHn_CMD_DMA_READ) & BM_APBH_CHn_CMD_DMA_READ)
-#define BF_APBH_CHn_CMD_COMMAND(v)		\
-	(((v) << BP_APBH_CHn_CMD_DMA_READ) & BM_APBH_CHn_CMD_DMA_READ)
-#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER  0x0
-#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE    0x1
-#define BV_APBH_CHn_CMD_COMMAND__DMA_READ     0x2
-#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE    0x3
-HW_REGISTER_INDEXED(HW_APBH_CHn_BAR, REGS_APBH_BASE, 0x70, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBH_CHn_SEMA, REGS_APBH_BASE, 0x80, 0x70)
-#define BP_APBH_CHn_SEMA_INCREMENT_SEMA      0
-#define BM_APBH_CHn_SEMA_INCREMENT_SEMA      0x000000FF
-#define BF_APBH_CHn_SEMA_INCREMENT_SEMA(v)   \
-	(((v) << BP_APBH_CHn_SEMA_INCREMENT_SEMA) & \
-	BM_APBH_CHn_SEMA_INCREMENT_SEMA)
-#define BP_APBH_CHn_SEMA_PHORE      16
-#define BM_APBH_CHn_SEMA_PHORE      0x00FF0000
-HW_REGISTER_RO_INDEXED(HW_APBH_CHn_DEBUG1, REGS_APBH_BASE, 0x90, 0x70)
-HW_REGISTER_RO_INDEXED(HW_APBH_CHn_DEBUG2, REGS_APBH_BASE, 0xA0, 0x70)
-HW_REGISTER_RO(HW_APBH_VERSION, REGS_APBH_BASE, 0x3F0)
+#define BM_APBH_CHn_CMD_IRQONCMPLT	0x00000008
+#define BM_APBH_CHn_CMD_NANDLOCK	0x00000010
+#define BM_APBH_CHn_CMD_NANDWAIT4READY	0x00000020
+#define BM_APBH_CHn_CMD_SEMAPHORE	0x00000040
+#define BM_APBH_CHn_CMD_WAIT4ENDCMD	0x00000080
+#define BM_APBH_CHn_CMD_CMDWORDS	0x0000F000
+#define BP_APBH_CHn_CMD_CMDWORDS	12
+#define BM_APBH_CHn_CMD_XFER_COUNT	0xFFFF0000
+#define BP_APBH_CHn_CMD_XFER_COUNT	16
+
+#define HW_APBH_CH0_SEMA	(0x80 + 0 * 0x70)
+#define HW_APBH_CH1_SEMA	(0x80 + 1 * 0x70)
+#define HW_APBH_CH2_SEMA	(0x80 + 2 * 0x70)
+#define HW_APBH_CH3_SEMA	(0x80 + 3 * 0x70)
+#define HW_APBH_CH4_SEMA	(0x80 + 4 * 0x70)
+#define HW_APBH_CH5_SEMA	(0x80 + 5 * 0x70)
+#define HW_APBH_CH6_SEMA	(0x80 + 6 * 0x70)
+#define HW_APBH_CH7_SEMA	(0x80 + 7 * 0x70)
+#define HW_APBH_CH8_SEMA	(0x80 + 8 * 0x70)
+#define HW_APBH_CH9_SEMA	(0x80 + 9 * 0x70)
+#define HW_APBH_CH10_SEMA	(0x80 + 10 * 0x70)
+#define HW_APBH_CH11_SEMA	(0x80 + 11 * 0x70)
+#define HW_APBH_CH12_SEMA	(0x80 + 12 * 0x70)
+#define HW_APBH_CH13_SEMA	(0x80 + 13 * 0x70)
+#define HW_APBH_CH14_SEMA	(0x80 + 14 * 0x70)
+#define HW_APBH_CH15_SEMA	(0x80 + 15 * 0x70)
 
-#endif /* _INCLUDE_ASM_ARCH_REGS_APBH_H */
+#define HW_APBH_CHn_SEMA	0x80
+#define BM_APBH_CHn_SEMA_INCREMENT_SEMA	0x000000FF
+#define BP_APBH_CHn_SEMA_INCREMENT_SEMA	0
+#define BM_APBH_CHn_SEMA_PHORE	0x00FF0000
+#define BP_APBH_CHn_SEMA_PHORE	16
+
+#endif
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h b/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h
index a14ddb9..6d080cd 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h
@@ -1,5 +1,5 @@
 /*
- * STMP APBX Register Definitions
+ * stmp37xx: APBX register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
@@ -18,92 +18,96 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _INCLUDE_ASM_ARCH_REGS_APBX_H
-#define _INCLUDE_ASM_ARCH_REGS_APBX_H
+#ifndef _MACH_REGS_APBX
+#define _MACH_REGS_APBX
 
-#include <mach/stmp3xxx_regs.h>
+#define REGS_APBX_BASE	(STMP3XXX_REGS_BASE + 0x24000)
 
-#ifndef REGS_APBX_BASE
-#define REGS_APBX_BASE (REGS_BASE + 0x00024000)
-#endif
+#define HW_APBX_CTRL0		0x0
+#define BM_APBX_CTRL0_RESET_CHANNEL	0x00FF0000
+#define BP_APBX_CTRL0_RESET_CHANNEL	16
+#define BM_APBX_CTRL0_CLKGATE	0x40000000
+#define BM_APBX_CTRL0_SFTRST	0x80000000
+
+#define HW_APBX_CTRL1		0x10
+
+#define HW_APBX_DEVSEL		0x20
+
+#define HW_APBX_CH0_NXTCMDAR	(0x50 + 0 * 0x70)
+#define HW_APBX_CH1_NXTCMDAR	(0x50 + 1 * 0x70)
+#define HW_APBX_CH2_NXTCMDAR	(0x50 + 2 * 0x70)
+#define HW_APBX_CH3_NXTCMDAR	(0x50 + 3 * 0x70)
+#define HW_APBX_CH4_NXTCMDAR	(0x50 + 4 * 0x70)
+#define HW_APBX_CH5_NXTCMDAR	(0x50 + 5 * 0x70)
+#define HW_APBX_CH6_NXTCMDAR	(0x50 + 6 * 0x70)
+#define HW_APBX_CH7_NXTCMDAR	(0x50 + 7 * 0x70)
+#define HW_APBX_CH8_NXTCMDAR	(0x50 + 8 * 0x70)
+#define HW_APBX_CH9_NXTCMDAR	(0x50 + 9 * 0x70)
+#define HW_APBX_CH10_NXTCMDAR	(0x50 + 10 * 0x70)
+#define HW_APBX_CH11_NXTCMDAR	(0x50 + 11 * 0x70)
+#define HW_APBX_CH12_NXTCMDAR	(0x50 + 12 * 0x70)
+#define HW_APBX_CH13_NXTCMDAR	(0x50 + 13 * 0x70)
+#define HW_APBX_CH14_NXTCMDAR	(0x50 + 14 * 0x70)
+#define HW_APBX_CH15_NXTCMDAR	(0x50 + 15 * 0x70)
 
-HW_REGISTER(HW_APBX_CTRL0, REGS_APBX_BASE, 0x00)
-#define BP_APBX_CTRL0_SFTRST      31
-#define BM_APBX_CTRL0_SFTRST      0x80000000
-#define BP_APBX_CTRL0_CLKGATE      30
-#define BM_APBX_CTRL0_CLKGATE      0x40000000
-#define BP_APBX_CTRL0_RESET_CHANNEL      16
-#define BM_APBX_CTRL0_RESET_CHANNEL      0x00FF0000
-#define BF_APBX_CTRL0_RESET_CHANNEL(v)   \
-	(((v) << BP_APBX_CTRL0_RESET_CHANNEL) & BM_APBX_CTRL0_RESET_CHANNEL)
-HW_REGISTER(HW_APBX_CTRL1, REGS_APBX_BASE, 0x10)
-HW_REGISTER_0(HW_APBX_DEVSEL, REGS_APBX_BASE, 0x20)
-#define BP_APBX_DEVSEL_CH7      28
-#define BM_APBX_DEVSEL_CH7      0xF0000000
-#define BF_APBX_DEVSEL_CH7(v)   \
-	(((v) << BP_APBX_DEVSEL_CH7) & BM_APBX_DEVSEL_CH7)
-#define BV_APBX_DEVSEL_CH7__USE_UART  0x0
-#define BV_APBX_DEVSEL_CH7__USE_IRDA  0x1
-#define BP_APBX_DEVSEL_CH6	24
-#define BM_APBX_DEVSEL_CH6      0x0F000000
-#define BF_APBX_DEVSEL_CH6(v)   \
-	(((v) << BP_APBX_DEVSEL_CH6) & BM_APBX_DEVSEL_CH6)
-#define BV_APBX_DEVSEL_CH6__USE_UART  0x0
-#define BV_APBX_DEVSEL_CH6__USE_IRDA  0x1
-#define BP_APBX_CTRL1_CH7_AHB_ERROR_IRQ      23
-#define BM_APBX_CTRL1_CH7_AHB_ERROR_IRQ      0x00800000
-#define BP_APBX_CTRL1_CH6_AHB_ERROR_IRQ      22
-#define BM_APBX_CTRL1_CH6_AHB_ERROR_IRQ      0x00400000
-#define BP_APBX_CTRL1_CH7_CMDCMPLT_IRQ_EN      15
-#define BM_APBX_CTRL1_CH7_CMDCMPLT_IRQ_EN      0x00008000
-#define BP_APBX_CTRL1_CH6_CMDCMPLT_IRQ_EN      14
-#define BM_APBX_CTRL1_CH6_CMDCMPLT_IRQ_EN      0x00004000
+#define HW_APBX_CHn_NXTCMDAR	0x50
+#define BM_APBX_CHn_CMD_MODE		0x00000003
+#define BP_APBX_CHn_CMD_MODE		0x00000001
+#define BV_APBX_CHn_CMD_MODE_NOOP		 0
+#define BV_APBX_CHn_CMD_MODE_WRITE		 1
+#define BV_APBX_CHn_CMD_MODE_READ		 2
+#define BV_APBX_CHn_CMD_MODE_SENSE		 3
+#define BM_APBX_CHn_CMD_COMMAND	0x00000003
+#define BP_APBX_CHn_CMD_COMMAND	0
+#define BM_APBX_CHn_CMD_CHAIN	0x00000004
+#define BM_APBX_CHn_CMD_IRQONCMPLT	0x00000008
+#define BM_APBX_CHn_CMD_SEMAPHORE	0x00000040
+#define BM_APBX_CHn_CMD_WAIT4ENDCMD	0x00000080
+#define BM_APBX_CHn_CMD_CMDWORDS	0x0000F000
+#define BP_APBX_CHn_CMD_CMDWORDS	12
+#define BM_APBX_CHn_CMD_XFER_COUNT	0xFFFF0000
+#define BP_APBX_CHn_CMD_XFER_COUNT	16
 
-HW_REGISTER_RO_INDEXED(HW_APBX_CHn_CURCMDAR, REGS_APBX_BASE, 0x40, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_NXTCMDAR, REGS_APBX_BASE, 0x50, 0x70)
-#define BP_APBX_CHn_NXTCMDAR_CMD_ADDR      0
-#define BM_APBX_CHn_NXTCMDAR_CMD_ADDR      0xFFFFFFFF
-#define BF_APBX_CHn_NXTCMDAR_CMD_ADDR(v)   ((u32) v)
-HW_REGISTER_RO_INDEXED(HW_APBX_CHn_CMD, REGS_APBX_BASE, 0x60, 0x70)
-#define BP_APBX_CHn_CMD_XFER_COUNT      16
-#define BM_APBX_CHn_CMD_XFER_COUNT      0xFFFF0000
-#define BF_APBX_CHn_CMD_XFER_COUNT(v)   \
-	(((v) << BP_APBX_CHn_CMD_XFER_COUNT) & BM_APBX_CHn_CMD_XFER_COUNT)
-#define BP_APBX_CHn_CMD_CMDWORDS      12
-#define BM_APBX_CHn_CMD_CMDWORDS      0x0000F000
-#define BF_APBX_CHn_CMD_CMDWORDS(v)   \
-	(((v) << BP_APBX_CHn_CMD_CMDWORDS) & BM_APBX_CHn_CMD_CMDWORDS)
-#define BP_APBX_CHn_CMD_WAIT4ENDCMD      7
-#define BM_APBX_CHn_CMD_WAIT4ENDCMD      0x00000080
-#define BP_APBX_CHn_CMD_SEMAPHORE      6
-#define BM_APBX_CHn_CMD_SEMAPHORE      0x00000040
-#define BP_APBX_CHn_CMD_IRQONCMPLT      3
-#define BM_APBX_CHn_CMD_IRQONCMPLT      0x00000008
-#define BP_APBX_CHn_CMD_CHAIN      2
-#define BM_APBX_CHn_CMD_CHAIN      0x00000004
-#define BM_APBX_CHn_CMD_DMA_READ		0x00000003
-#define BP_APBX_CHn_CMD_DMA_READ		0
-#define BF_APBX_CHn_CMD_DMA_READ(v)		\
-	(((v) << BP_APBX_CHn_CMD_DMA_READ) & BM_APBX_CHn_CMD_DMA_READ)
-#define BP_APBX_CHn_CMD_COMMAND      0
-#define BM_APBX_CHn_CMD_COMMAND      0x00000003
-#define BF_APBX_CHn_CMD_COMMAND(v)   \
-	(((v) << BP_APBX_CHn_CMD_COMMAND) & BM_APBX_CHn_CMD_COMMAND)
-#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER  0x0
-#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE    0x1
-#define BV_APBX_CHn_CMD_COMMAND__DMA_READ     0x2
+#define HW_APBX_CH0_BAR		(0x70 + 0 * 0x70)
+#define HW_APBX_CH1_BAR		(0x70 + 1 * 0x70)
+#define HW_APBX_CH2_BAR		(0x70 + 2 * 0x70)
+#define HW_APBX_CH3_BAR		(0x70 + 3 * 0x70)
+#define HW_APBX_CH4_BAR		(0x70 + 4 * 0x70)
+#define HW_APBX_CH5_BAR		(0x70 + 5 * 0x70)
+#define HW_APBX_CH6_BAR		(0x70 + 6 * 0x70)
+#define HW_APBX_CH7_BAR		(0x70 + 7 * 0x70)
+#define HW_APBX_CH8_BAR		(0x70 + 8 * 0x70)
+#define HW_APBX_CH9_BAR		(0x70 + 9 * 0x70)
+#define HW_APBX_CH10_BAR		(0x70 + 10 * 0x70)
+#define HW_APBX_CH11_BAR		(0x70 + 11 * 0x70)
+#define HW_APBX_CH12_BAR		(0x70 + 12 * 0x70)
+#define HW_APBX_CH13_BAR		(0x70 + 13 * 0x70)
+#define HW_APBX_CH14_BAR		(0x70 + 14 * 0x70)
+#define HW_APBX_CH15_BAR		(0x70 + 15 * 0x70)
 
-HW_REGISTER_RO_INDEXED(HW_APBX_CHn_BAR, REGS_APBX_BASE, 0x70, 0x70)
-HW_REGISTER_0_INDEXED(HW_APBX_CHn_SEMA, REGS_APBX_BASE, 0x80, 0x70)
-#define BP_APBX_CHn_SEMA_INCREMENT_SEMA      0
-#define BM_APBX_CHn_SEMA_INCREMENT_SEMA      0x000000FF
-#define BF_APBX_CHn_SEMA_INCREMENT_SEMA(v)   \
-	(((v) << BP_APBX_CHn_SEMA_INCREMENT_SEMA) & \
-	BM_APBX_CHn_SEMA_INCREMENT_SEMA)
-#define BP_APBX_CHn_SEMA_PHORE      16
-#define BM_APBX_CHn_SEMA_PHORE      0x00FF0000
-HW_REGISTER_RO_INDEXED(HW_APBX_CHn_DEBUG1, REGS_APBX_BASE, 0x90, 0x70)
-HW_REGISTER_RO_INDEXED(HW_APBX_CHn_DEBUG2, REGS_APBX_BASE, 0xA0, 0x70)
-HW_REGISTER_RO(HW_APBX_VERSION, REGS_APBX_BASE, 0x3F0)
+#define HW_APBX_CHn_BAR		0x70
 
-#endif /* _INCLUDE_ASM_ARCH_REGS_APBX_H */
+#define HW_APBX_CH0_SEMA	(0x80 + 0 * 0x70)
+#define HW_APBX_CH1_SEMA	(0x80 + 1 * 0x70)
+#define HW_APBX_CH2_SEMA	(0x80 + 2 * 0x70)
+#define HW_APBX_CH3_SEMA	(0x80 + 3 * 0x70)
+#define HW_APBX_CH4_SEMA	(0x80 + 4 * 0x70)
+#define HW_APBX_CH5_SEMA	(0x80 + 5 * 0x70)
+#define HW_APBX_CH6_SEMA	(0x80 + 6 * 0x70)
+#define HW_APBX_CH7_SEMA	(0x80 + 7 * 0x70)
+#define HW_APBX_CH8_SEMA	(0x80 + 8 * 0x70)
+#define HW_APBX_CH9_SEMA	(0x80 + 9 * 0x70)
+#define HW_APBX_CH10_SEMA	(0x80 + 10 * 0x70)
+#define HW_APBX_CH11_SEMA	(0x80 + 11 * 0x70)
+#define HW_APBX_CH12_SEMA	(0x80 + 12 * 0x70)
+#define HW_APBX_CH13_SEMA	(0x80 + 13 * 0x70)
+#define HW_APBX_CH14_SEMA	(0x80 + 14 * 0x70)
+#define HW_APBX_CH15_SEMA	(0x80 + 15 * 0x70)
+
+#define HW_APBX_CHn_SEMA	0x80
+#define BM_APBX_CHn_SEMA_INCREMENT_SEMA	0x000000FF
+#define BP_APBX_CHn_SEMA_INCREMENT_SEMA	0
+#define BM_APBX_CHn_SEMA_PHORE	0x00FF0000
+#define BP_APBX_CHn_SEMA_PHORE	16
+
+#endif
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h
index 229ee75..47f5c92 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h
@@ -1,85 +1,72 @@
-#ifndef _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H
-#define _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H
+/*
+ * stmp37xx: CLKCTRL register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef _MACH_REGS_CLKCTRL
+#define _MACH_REGS_CLKCTRL
 
-#include <mach/stmp3xxx_regs.h>
+#define REGS_CLKCTRL_BASE	(STMP3XXX_REGS_BASE + 0x40000)
 
-#define REGS_CLKCTRL_BASE (REGS_BASE + 0x00040000)
-
-#define HW_CLKCTRL_PLLCTRL0_ADDR	(REGS_CLKCTRL_BASE + 0x00)
-HW_REGISTER(HW_CLKCTRL_PLLCTRL0, REGS_CLKCTRL_BASE, 0x00)
+#define HW_CLKCTRL_PLLCTRL0	0x0
 #define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS	0x00040000
-#define HW_CLKCTRL_PLLCTRL1_ADDR	(REGS_CLKCTRL_BASE + 0x10)
-HW_REGISTER(HW_CLKCTRL_PLLCTRL1, REGS_CLKCTRL_BASE, 0x10)
-
-#define HW_CLKCTRL_CPU_ADDR		(REGS_CLKCTRL_BASE + 0x20)
-HW_REGISTER(HW_CLKCTRL_CPU, REGS_CLKCTRL_BASE, 0x20)
-#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F
-#define BF_CLKCTRL_CPU_DIV_CPU(v)  \
-	(((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU)
-
-#define HW_CLKCTRL_HBUS_ADDR		(REGS_CLKCTRL_BASE + 0x30)
-HW_REGISTER(HW_CLKCTRL_HBUS, REGS_CLKCTRL_BASE, 0x30)
-#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 	0	/* for compatitibility */
-#define BM_CLKCTRL_HBUS_DIV 0x0000001F
-#define BF_CLKCTRL_HBUS_DIV(v)  \
-	(((v) << 0) & BM_CLKCTRL_HBUS_DIV)
-#define HW_CLKCTRL_XBUS_ADDR		(REGS_CLKCTRL_BASE + 0x40)
-HW_REGISTER(HW_CLKCTRL_XBUS, REGS_CLKCTRL_BASE, 0x40)
-#define HW_CLKCTRL_XTAL_ADDR		(REGS_CLKCTRL_BASE + 0x50)
-HW_REGISTER(HW_CLKCTRL_XTAL, REGS_CLKCTRL_BASE, 0x50)
-#define HW_CLKCTRL_PIX_ADDR		(REGS_CLKCTRL_BASE + 0x60)
-HW_REGISTER(HW_CLKCTRL_PIX, REGS_CLKCTRL_BASE, 0x60)
-#define BM_CLKCTRL_PIX_CLKGATE		0x80000000
-#define BM_CLKCTRL_PIX_BUSY		0x20000000
-#define BM_CLKCTRL_PIX_DIV		0x00007FFF
-#define BP_CLKCTRL_PIX_DIV		0
-#define BF_CLKCTRL_PIX_DIV(v)	\
-	(((v) << BP_CLKCTRL_PIX_DIV) & BM_CLKCTRL_PIX_DIV)
-#define HW_CLKCTRL_SSP_ADDR		(REGS_CLKCTRL_BASE + 0x70)
-HW_REGISTER(HW_CLKCTRL_SSP, REGS_CLKCTRL_BASE, 0x70)
-#define HW_CLKCTRL_GPMI_ADDR		(REGS_CLKCTRL_BASE + 0x80)
-HW_REGISTER(HW_CLKCTRL_GPMI, REGS_CLKCTRL_BASE, 0x80)
-#define HW_CLKCTRL_SPDIF_ADDR		(REGS_CLKCTRL_BASE + 0x90)
-HW_REGISTER(HW_CLKCTRL_SPDIF, REGS_CLKCTRL_BASE, 0x90)
-#define HW_CLKCTRL_EMI_ADDR		(REGS_CLKCTRL_BASE + 0xA0)
-HW_REGISTER(HW_CLKCTRL_EMI, REGS_CLKCTRL_BASE, 0xA0)
-#define HW_CLKCTRL_IR_ADDR		(REGS_CLKCTRL_BASE + 0xB0)
-HW_REGISTER(HW_CLKCTRL_IR, REGS_CLKCTRL_BASE, 0xB0)
-#define HW_CLKCTRL_SAIF_ADDR		(REGS_CLKCTRL_BASE + 0xC0)
-HW_REGISTER(HW_CLKCTRL_SAIF, REGS_CLKCTRL_BASE, 0xC0)
-#define HW_CLKCTRL_FRAC_ADDR		(REGS_CLKCTRL_BASE + 0xD0)
-HW_REGISTER(HW_CLKCTRL_FRAC, REGS_CLKCTRL_BASE, 0xD0)
-#define BM_CLKCTRL_FRAC_CLKGATEIO	0x80000000
-#define BM_CLKCTRL_FRAC_IO_STABLE	0x40000000
-#define BM_CLKCTRL_FRAC_IOFRAC		0x3F000000
-#define BP_CLKCTRL_FRAC_IOFRAC		24
-#define BF_CLKCTRL_FRAC_IOFRAC(v)	\
-	(((v) << BP_CLKCTRL_FRAC_IOFRAC) & BM_CLKCTRL_FRAC_IOFRAC)
+
+#define HW_CLKCTRL_CPU		0x20
+#define BM_CLKCTRL_CPU_DIV_CPU	0x0000003F
+#define BP_CLKCTRL_CPU_DIV_CPU	0
+
+#define HW_CLKCTRL_HBUS		0x30
+#define BM_CLKCTRL_HBUS_DIV	0x0000001F
+#define BP_CLKCTRL_HBUS_DIV	0
+
+#define HW_CLKCTRL_XBUS		0x40
+
+#define HW_CLKCTRL_XTAL		0x50
+
+#define HW_CLKCTRL_PIX		0x60
+#define BM_CLKCTRL_PIX_DIV	0x00007FFF
+#define BP_CLKCTRL_PIX_DIV	0
+#define BM_CLKCTRL_PIX_CLKGATE	0x80000000
+
+#define HW_CLKCTRL_SSP		0x70
+
+#define HW_CLKCTRL_GPMI		0x80
+
+#define HW_CLKCTRL_SPDIF	0x90
+
+#define HW_CLKCTRL_EMI		0xA0
+
+#define HW_CLKCTRL_IR		0xB0
+
+#define HW_CLKCTRL_SAIF		0xC0
+
+#define HW_CLKCTRL_FRAC		0xD0
+#define BM_CLKCTRL_FRAC_EMIFRAC	0x00003F00
+#define BP_CLKCTRL_FRAC_EMIFRAC	8
+#define BM_CLKCTRL_FRAC_PIXFRAC	0x003F0000
+#define BP_CLKCTRL_FRAC_PIXFRAC	16
 #define BM_CLKCTRL_FRAC_CLKGATEPIX	0x00800000
-#define BM_CLKCTRL_FRAC_PIXFRAC		0x003F0000
-#define BP_CLKCTRL_FRAC_PIXFRAC		16
-#define BF_CLKCTRL_FRAC_PIXFRAC(v)	\
-	(((v) << BP_CLKCTRL_FRAC_PIXFRAC) & BM_CLKCTRL_FRAC_PIXFRAC)
-#define BM_CLKCTRL_FRAC_CLKGATEEMI	0x00008000
-#define BM_CLKCTRL_FRAC_EMIFRAC		0x00003F00
-#define BP_CLKCTRL_FRAC_EMIFRAC		8
-#define BF_CLKCTRL_FRAC_EMIFRAC(v)	\
-	(((v) << BP_CLKCTRL_FRAC_EMIFRAC) & BM_CLKCTRL_FRAC_EMIFRAC)
-#define BM_CLKCTRL_FRAC_CLKGATECPU	0x00000080
-#define BM_CLKCTRL_FRAC_CPUFRAC		0x0000003F
-#define BP_CLKCTRL_FRAC_CPUFRAC		0
-#define BF_CLKCTRL_FRAC_CPUFRAC(v)	\
-	(((v) << BP_CLKCTRL_FRAC_CPUFRAC) & BM_CLKCTRL_FRAC_CPUFRAC)
-#define HW_CLKCTRL_CLKSEQ_ADDR		(REGS_CLKCTRL_BASE + 0xE0)
-HW_REGISTER(HW_CLKCTRL_CLKSEQ, REGS_CLKCTRL_BASE, 0xE0)
-#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU		0x00000080
-#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI		0x00000040
-#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP		0x00000020
-#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI		0x00000010
-#define BM_CLKCTRL_CLKSEQ_BYPASS_IR		0x00000008
-#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX		0x00000002
-#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF		0x00000001
-HW_REGISTER_WO(HW_CLKCTRL_RESET, REGS_CLKCTRL_BASE, 0xF0)
-#define BM_CLKCTRL_RESET_CHIP      0x00000002
-#define BM_CLKCTRL_RESET_DIG	   0x00000001
-#endif /* _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H */
+
+#define HW_CLKCTRL_CLKSEQ	0xE0
+#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX	0x00000002
+
+#define HW_CLKCTRL_RESET	0xF0
+#define BM_CLKCTRL_RESET_DIG	0x00000001
+#define BP_CLKCTRL_RESET_DIG	0
+
+#endif
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h b/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h
index 8a92f92..3b7c922 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h
@@ -1,36 +1,43 @@
 /*
- * Freescale STMP378X: clock registers definitions
+ * stmp37xx: ICOLL register definitions
  *
- * Embedded Alley Solutions, Inc <source@embeddedalley.com>
- *
- * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright (c) 2008 Freescale Semiconductor
  * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
- */
-
-/*
- * The code contained herein is licensed under the GNU General Public
- * License. You may obtain a copy of the GNU General Public License
- * Version 2 or later at the following locations:
  *
- * http://www.opensource.org/licenses/gpl-license.html
- * http://www.gnu.org/copyleft/gpl.html
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_ICOLL
+#define _MACH_REGS_ICOLL
+
+#define REGS_ICOLL_BASE	(STMP3XXX_REGS_BASE + 0x0)
 
-#ifndef _INCLUDE_ASM_ARCH_REGS_ICOLL_H
-#define _INCLUDE_ASM_ARCH_REGS_ICOLL_H
+#define HW_ICOLL_VECTOR		0x0
 
+#define HW_ICOLL_LEVELACK	0x10
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_ICOLL_CTRL		0x20
+#define BM_ICOLL_CTRL_CLKGATE	0x40000000
+#define BM_ICOLL_CTRL_SFTRST	0x80000000
 
-#define REGS_ICOLL_BASE (REGS_BASE + 0x00000000)
+#define HW_ICOLL_STAT		0x30
 
-HW_REGISTER(HW_ICOLL_VECTOR, REGS_ICOLL_BASE, 0x00)
-HW_REGISTER_WO(HW_ICOLL_LEVELACK, REGS_ICOLL_BASE, 0x10)
-HW_REGISTER(HW_ICOLL_CTRL, REGS_ICOLL_BASE, 0x20)
-#define BM_ICOLL_CTRL_CLKGATE	  0x40000000
-#define BM_ICOLL_CTRL_SFTRST	   0x80000000
-HW_REGISTER_RO(HW_ICOLL_STAT, REGS_ICOLL_BASE, 0x30)
+#define HW_ICOLL_PRIORITY0	(0x60 + 0 * 0x10)
+#define HW_ICOLL_PRIORITY1	(0x60 + 1 * 0x10)
+#define HW_ICOLL_PRIORITY2	(0x60 + 2 * 0x10)
+#define HW_ICOLL_PRIORITY3	(0x60 + 3 * 0x10)
 
-HW_REGISTER_INDEXED(HW_ICOLL_PRIORITYn, REGS_ICOLL_BASE, 0x60, 0x10)
+#define HW_ICOLL_PRIORITYn	0x60
 
-#endif /* _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H */
+#endif
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h
index b114ecd..d5efce2 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h
@@ -1,7 +1,8 @@
 /*
- * STMP pinmux register definitions
+ * stmp37xx: PINCTRL register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -17,143 +18,71 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _INCLUDE_ASM_ARCH_REGS_PINCTRL_H
-#define _INCLUDE_ASM_ARCH_REGS_PINCTRL_H
-
-#include <mach/stmp3xxx_regs.h>
-
-#ifndef REGS_PINCTRL_BASE
-#define REGS_PINCTRL_BASE (REGS_BASE + 0x00018000)
-#endif /* REGS_PINCTRL_BASE */
-
-HW_REGISTER(HW_PINCTRL_CTRL, REGS_PINCTRL_BASE, 0)
-
-#define HW_PINCTRL_MUXSEL0_ADDR (REGS_PINCTRL_BASE + 0x100)
-HW_REGISTER(HW_PINCTRL_MUXSEL0, REGS_PINCTRL_BASE, 0x100)
-#define HW_PINCTRL_MUXSEL1_ADDR (REGS_PINCTRL_BASE + 0x110)
-HW_REGISTER(HW_PINCTRL_MUXSEL1, REGS_PINCTRL_BASE, 0x110)
-#define HW_PINCTRL_MUXSEL2_ADDR (REGS_PINCTRL_BASE + 0x120)
-HW_REGISTER(HW_PINCTRL_MUXSEL2, REGS_PINCTRL_BASE, 0x120)
-#define HW_PINCTRL_MUXSEL3_ADDR (REGS_PINCTRL_BASE + 0x130)
-HW_REGISTER(HW_PINCTRL_MUXSEL3, REGS_PINCTRL_BASE, 0x130)
-#define BM_PINCTRL_MUXSEL3_BANK1_PIN28      0x03000000
-#define HW_PINCTRL_MUXSEL4_ADDR (REGS_PINCTRL_BASE + 0x140)
-HW_REGISTER(HW_PINCTRL_MUXSEL4, REGS_PINCTRL_BASE, 0x140)
-#define BM_PINCTRL_MUXSEL4_BANK2_PIN03      0x000000C0
-#define BM_PINCTRL_MUXSEL4_BANK2_PIN04      0x00000300
-#define HW_PINCTRL_MUXSEL5_ADDR (REGS_PINCTRL_BASE + 0x150)
-HW_REGISTER(HW_PINCTRL_MUXSEL5, REGS_PINCTRL_BASE, 0x150)
-#define HW_PINCTRL_MUXSEL6_ADDR (REGS_PINCTRL_BASE + 0x160)
-HW_REGISTER(HW_PINCTRL_MUXSEL6, REGS_PINCTRL_BASE, 0x160)
-#define HW_PINCTRL_MUXSEL7_ADDR (REGS_PINCTRL_BASE + 0x170)
-HW_REGISTER(HW_PINCTRL_MUXSEL7, REGS_PINCTRL_BASE, 0x170)
-
-HW_REGISTER(HW_PINCTRL_DRIVE0, REGS_PINCTRL_BASE, 0x200)
-#define HW_PINCTRL_DRIVE0_ADDR (REGS_PINCTRL_BASE + 0x200)
-HW_REGISTER(HW_PINCTRL_DRIVE1, REGS_PINCTRL_BASE, 0x210)
-#define HW_PINCTRL_DRIVE1_ADDR (REGS_PINCTRL_BASE + 0x210)
-HW_REGISTER(HW_PINCTRL_DRIVE2, REGS_PINCTRL_BASE, 0x220)
-#define HW_PINCTRL_DRIVE2_ADDR (REGS_PINCTRL_BASE + 0x220)
-HW_REGISTER(HW_PINCTRL_DRIVE3, REGS_PINCTRL_BASE, 0x230)
-#define HW_PINCTRL_DRIVE3_ADDR (REGS_PINCTRL_BASE + 0x230)
-HW_REGISTER(HW_PINCTRL_DRIVE4, REGS_PINCTRL_BASE, 0x240)
-#define HW_PINCTRL_DRIVE4_ADDR (REGS_PINCTRL_BASE + 0x240)
-HW_REGISTER(HW_PINCTRL_DRIVE5, REGS_PINCTRL_BASE, 0x250)
-#define HW_PINCTRL_DRIVE5_ADDR (REGS_PINCTRL_BASE + 0x250)
-HW_REGISTER(HW_PINCTRL_DRIVE6, REGS_PINCTRL_BASE, 0x260)
-#define HW_PINCTRL_DRIVE6_ADDR (REGS_PINCTRL_BASE + 0x260)
-HW_REGISTER(HW_PINCTRL_DRIVE7, REGS_PINCTRL_BASE, 0x270)
-#define HW_PINCTRL_DRIVE7_ADDR (REGS_PINCTRL_BASE + 0x270)
-HW_REGISTER(HW_PINCTRL_DRIVE8, REGS_PINCTRL_BASE, 0x280)
-#define HW_PINCTRL_DRIVE8_ADDR (REGS_PINCTRL_BASE + 0x280)
-HW_REGISTER(HW_PINCTRL_DRIVE9, REGS_PINCTRL_BASE, 0x290)
-#define HW_PINCTRL_DRIVE9_ADDR (REGS_PINCTRL_BASE + 0x290)
-HW_REGISTER(HW_PINCTRL_DRIVE10, REGS_PINCTRL_BASE, 0x2a0)
-#define HW_PINCTRL_DRIVE10_ADDR (REGS_PINCTRL_BASE + 0x2a0)
-HW_REGISTER(HW_PINCTRL_DRIVE11, REGS_PINCTRL_BASE, 0x2b0)
-#define HW_PINCTRL_DRIVE11_ADDR (REGS_PINCTRL_BASE + 0x2b0)
-HW_REGISTER(HW_PINCTRL_DRIVE12, REGS_PINCTRL_BASE, 0x2c0)
-#define HW_PINCTRL_DRIVE12_ADDR (REGS_PINCTRL_BASE + 0x2c0)
-HW_REGISTER(HW_PINCTRL_DRIVE13, REGS_PINCTRL_BASE, 0x2d0)
-#define HW_PINCTRL_DRIVE13_ADDR (REGS_PINCTRL_BASE + 0x2d0)
-HW_REGISTER(HW_PINCTRL_DRIVE14, REGS_PINCTRL_BASE, 0x2e0)
-#define HW_PINCTRL_DRIVE14_ADDR (REGS_PINCTRL_BASE + 0x2e0)
-
-
-HW_REGISTER(HW_PINCTRL_PULL0, REGS_PINCTRL_BASE, 0x300)
-#define HW_PINCTRL_PULL0_ADDR (REGS_PINCTRL_BASE + 0x300)
-#define BM_PINCTRL_PULL0_BANK0_PIN01      0x00000002
-#define BM_PINCTRL_PULL0_BANK0_PIN02      0x00000004
-#define BM_PINCTRL_PULL0_BANK0_PIN03      0x00000008
-#define BM_PINCTRL_PULL0_BANK0_PIN04      0x00000010
-#define BM_PINCTRL_PULL0_BANK0_PIN20      0x00100000
-HW_REGISTER(HW_PINCTRL_PULL1, REGS_PINCTRL_BASE, 0x310)
-#define HW_PINCTRL_PULL1_ADDR (REGS_PINCTRL_BASE + 0x310)
-#define BM_PINCTRL_PULL1_BANK1_PIN22      0x00400000
-#define BM_PINCTRL_PULL1_BANK1_PIN24      0x01000000
-#define BM_PINCTRL_PULL1_BANK1_PIN25      0x02000000
-#define BM_PINCTRL_PULL1_BANK1_PIN26      0x04000000
-#define BM_PINCTRL_PULL1_BANK1_PIN27      0x08000000
-HW_REGISTER(HW_PINCTRL_PULL2, REGS_PINCTRL_BASE, 0x320)
-#define HW_PINCTRL_PULL2_ADDR (REGS_PINCTRL_BASE + 0x320)
-HW_REGISTER(HW_PINCTRL_PULL3, REGS_PINCTRL_BASE, 0x330)
-#define HW_PINCTRL_PULL3_ADDR (REGS_PINCTRL_BASE + 0x330)
-
-#define HW_PINCTRL_DOUT0_ADDR (REGS_PINCTRL_BASE + 0x400)
-HW_REGISTER(HW_PINCTRL_DOUT0, REGS_PINCTRL_BASE, 0x400)
-#define HW_PINCTRL_DOUT1_ADDR (REGS_PINCTRL_BASE + 0x410)
-HW_REGISTER(HW_PINCTRL_DOUT1, REGS_PINCTRL_BASE, 0x410)
-#define HW_PINCTRL_DOUT2_ADDR (REGS_PINCTRL_BASE + 0x420)
-HW_REGISTER(HW_PINCTRL_DOUT2, REGS_PINCTRL_BASE, 0x420)
-
-#define HW_PINCTRL_DIN0_ADDR (REGS_PINCTRL_BASE + 0x500)
-HW_REGISTER_RO(HW_PINCTRL_DIN0, REGS_PINCTRL_BASE, 0x500)
-#define HW_PINCTRL_DIN1_ADDR (REGS_PINCTRL_BASE + 0x510)
-HW_REGISTER_RO(HW_PINCTRL_DIN1, REGS_PINCTRL_BASE, 0x510)
-#define HW_PINCTRL_DIN2_ADDR (REGS_PINCTRL_BASE + 0x520)
-HW_REGISTER_RO(HW_PINCTRL_DIN2, REGS_PINCTRL_BASE, 0x520)
-
-#define HW_PINCTRL_DOE0_ADDR (REGS_PINCTRL_BASE + 0x600)
-HW_REGISTER(HW_PINCTRL_DOE0, REGS_PINCTRL_BASE, 0x600)
-#define HW_PINCTRL_DOE1_ADDR (REGS_PINCTRL_BASE + 0x610)
-HW_REGISTER(HW_PINCTRL_DOE1, REGS_PINCTRL_BASE, 0x610)
-#define HW_PINCTRL_DOE2_ADDR (REGS_PINCTRL_BASE + 0x620)
-HW_REGISTER(HW_PINCTRL_DOE2, REGS_PINCTRL_BASE, 0x620)
-
-HW_REGISTER(HW_PINCTRL_PIN2IRQ0, REGS_PINCTRL_BASE, 0x700)
-#define HW_PINCTRL_PIN2IRQ0_ADDR (REGS_PINCTRL_BASE + 0x700)
-HW_REGISTER(HW_PINCTRL_PIN2IRQ1, REGS_PINCTRL_BASE, 0x710)
-#define HW_PINCTRL_PIN2IRQ1_ADDR (REGS_PINCTRL_BASE + 0x710)
-HW_REGISTER(HW_PINCTRL_PIN2IRQ2, REGS_PINCTRL_BASE, 0x720)
-#define HW_PINCTRL_PIN2IRQ2_ADDR (REGS_PINCTRL_BASE + 0x720)
-
-HW_REGISTER(HW_PINCTRL_IRQEN0, REGS_PINCTRL_BASE, 0x800)
-#define HW_PINCTRL_IRQEN0_ADDR (REGS_PINCTRL_BASE + 0x800)
-HW_REGISTER(HW_PINCTRL_IRQEN1, REGS_PINCTRL_BASE, 0x810)
-#define HW_PINCTRL_IRQEN1_ADDR (REGS_PINCTRL_BASE + 0x810)
-HW_REGISTER(HW_PINCTRL_IRQEN2, REGS_PINCTRL_BASE, 0x820)
-#define HW_PINCTRL_IRQEN2_ADDR (REGS_PINCTRL_BASE + 0x820)
-
-HW_REGISTER(HW_PINCTRL_IRQLEVEL0, REGS_PINCTRL_BASE, 0x900)
-#define HW_PINCTRL_IRQLEVEL0_ADDR (REGS_PINCTRL_BASE + 0x900)
-HW_REGISTER(HW_PINCTRL_IRQLEVEL1, REGS_PINCTRL_BASE, 0x910)
-#define HW_PINCTRL_IRQLEVEL1_ADDR (REGS_PINCTRL_BASE + 0x910)
-HW_REGISTER(HW_PINCTRL_IRQLEVEL2, REGS_PINCTRL_BASE, 0x920)
-#define HW_PINCTRL_IRQLEVEL2_ADDR (REGS_PINCTRL_BASE + 0x920)
-
-HW_REGISTER(HW_PINCTRL_IRQPOL0, REGS_PINCTRL_BASE, 0xA00)
-#define HW_PINCTRL_IRQPOL0_ADDR (REGS_PINCTRL_BASE + 0xa00)
-HW_REGISTER(HW_PINCTRL_IRQPOL1, REGS_PINCTRL_BASE, 0xA10)
-#define HW_PINCTRL_IRQPOL1_ADDR (REGS_PINCTRL_BASE + 0xa10)
-HW_REGISTER(HW_PINCTRL_IRQPOL2, REGS_PINCTRL_BASE, 0xA20)
-#define HW_PINCTRL_IRQPOL2_ADDR (REGS_PINCTRL_BASE + 0xa20)
-
-HW_REGISTER(HW_PINCTRL_IRQSTAT0, REGS_PINCTRL_BASE, 0xB00)
-#define HW_PINCTRL_IRQSTAT0_ADDR (REGS_PINCTRL_BASE + 0xb00)
-HW_REGISTER(HW_PINCTRL_IRQSTAT1, REGS_PINCTRL_BASE, 0xB10)
-#define HW_PINCTRL_IRQSTAT1_ADDR (REGS_PINCTRL_BASE + 0xb10)
-HW_REGISTER(HW_PINCTRL_IRQSTAT2, REGS_PINCTRL_BASE, 0xB20)
-#define HW_PINCTRL_IRQSTAT2_ADDR (REGS_PINCTRL_BASE + 0xb20)
-
-#endif /* _INCLUDE_ASM_ARCH_REGS_PINCTRL_H */
-
+#ifndef _MACH_REGS_PINCTRL
+#define _MACH_REGS_PINCTRL
+
+#define REGS_PINCTRL_BASE	(STMP3XXX_REGS_BASE + 0x18000)
+
+#define HW_PINCTRL_MUXSEL0	0x100
+#define HW_PINCTRL_MUXSEL1	0x110
+#define HW_PINCTRL_MUXSEL2	0x120
+#define HW_PINCTRL_MUXSEL3	0x130
+#define HW_PINCTRL_MUXSEL4	0x140
+#define HW_PINCTRL_MUXSEL5	0x150
+#define HW_PINCTRL_MUXSEL6	0x160
+#define HW_PINCTRL_MUXSEL7	0x170
+
+#define HW_PINCTRL_DRIVE0	0x200
+#define HW_PINCTRL_DRIVE1	0x210
+#define HW_PINCTRL_DRIVE2	0x220
+#define HW_PINCTRL_DRIVE3	0x230
+#define HW_PINCTRL_DRIVE4	0x240
+#define HW_PINCTRL_DRIVE5	0x250
+#define HW_PINCTRL_DRIVE6	0x260
+#define HW_PINCTRL_DRIVE7	0x270
+#define HW_PINCTRL_DRIVE8	0x280
+#define HW_PINCTRL_DRIVE9	0x290
+#define HW_PINCTRL_DRIVE10	0x2A0
+#define HW_PINCTRL_DRIVE11	0x2B0
+#define HW_PINCTRL_DRIVE12	0x2C0
+#define HW_PINCTRL_DRIVE13	0x2D0
+#define HW_PINCTRL_DRIVE14	0x2E0
+
+#define HW_PINCTRL_PULL0	0x300
+#define HW_PINCTRL_PULL1	0x310
+#define HW_PINCTRL_PULL2	0x320
+#define HW_PINCTRL_PULL3	0x330
+
+#define HW_PINCTRL_DOUT0	0x400
+#define HW_PINCTRL_DOUT1	0x410
+#define HW_PINCTRL_DOUT2	0x420
+
+#define HW_PINCTRL_DIN0		0x500
+#define HW_PINCTRL_DIN1		0x510
+#define HW_PINCTRL_DIN2		0x520
+
+#define HW_PINCTRL_DOE0		0x600
+#define HW_PINCTRL_DOE1		0x610
+#define HW_PINCTRL_DOE2		0x620
+
+#define HW_PINCTRL_PIN2IRQ0	0x700
+#define HW_PINCTRL_PIN2IRQ1	0x710
+#define HW_PINCTRL_PIN2IRQ2	0x720
+
+#define HW_PINCTRL_IRQEN0	0x800
+#define HW_PINCTRL_IRQEN1	0x810
+#define HW_PINCTRL_IRQEN2	0x820
+
+#define HW_PINCTRL_IRQLEVEL0	0x900
+#define HW_PINCTRL_IRQLEVEL1	0x910
+#define HW_PINCTRL_IRQLEVEL2	0x920
+
+#define HW_PINCTRL_IRQPOL0	0xA00
+#define HW_PINCTRL_IRQPOL1	0xA10
+#define HW_PINCTRL_IRQPOL2	0xA20
+
+#define HW_PINCTRL_IRQSTAT0	0xB00
+#define HW_PINCTRL_IRQSTAT1	0xB10
+#define HW_PINCTRL_IRQSTAT2	0xB20
+
+#endif
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-power.h b/arch/arm/mach-stmp37xx/include/mach/regs-power.h
index d15cd66..0e733d7 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-power.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-power.h
@@ -1,7 +1,8 @@
 /*
- * STMP POWER Register Definitions
+ * stmp37xx: POWER register definitions
  *
  * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -17,15 +18,39 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
+#ifndef _MACH_REGS_POWER
+#define _MACH_REGS_POWER
 
-#ifndef __ARCH_ARM___POWER_H
-#define __ARCH_ARM___POWER_H  1
+#define REGS_POWER_BASE	(STMP3XXX_REGS_BASE + 0x44000)
 
-#include <mach/stmp3xxx_regs.h>
+#define HW_POWER_CTRL		0x0
+#define BM_POWER_CTRL_CLKGATE	0x40000000
 
-#define REGS_POWER_BASE (void __iomem *)(REGS_BASE + 0x44000)
-#define REGS_POWER_BASE_PHYS (0x80044000)
-#define REGS_POWER_SIZE 0x00002000
-HW_REGISTER(HW_POWER_MINPWR, REGS_POWER_BASE, 0x00000020)
-HW_REGISTER(HW_POWER_CHARGE, REGS_POWER_BASE, 0x00000030)
-#endif /* __ARCH_ARM___POWER_H */
+#define HW_POWER_5VCTRL		0x10
+
+#define HW_POWER_MINPWR		0x20
+
+#define HW_POWER_CHARGE		0x30
+
+#define HW_POWER_VDDDCTRL	0x40
+
+#define HW_POWER_VDDACTRL	0x50
+
+#define HW_POWER_VDDIOCTRL	0x60
+#define BM_POWER_VDDIOCTRL_TRG	0x0000001F
+#define BP_POWER_VDDIOCTRL_TRG	0
+
+#define HW_POWER_STS		0xB0
+#define BM_POWER_STS_VBUSVALID	0x00000002
+#define BM_POWER_STS_BVALID	0x00000004
+#define BM_POWER_STS_AVALID	0x00000008
+#define BM_POWER_STS_DC_OK	0x00000100
+
+#define HW_POWER_RESET		0xE0
+
+#define HW_POWER_DEBUG		0xF0
+#define BM_POWER_DEBUG_BVALIDPIOLOCK	0x00000002
+#define BM_POWER_DEBUG_AVALIDPIOLOCK	0x00000004
+#define BM_POWER_DEBUG_VBUSVALIDPIOLOCK	0x00000008
+
+#endif
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h b/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h
index 7f00030..4af0f6e 100644
--- a/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h
@@ -1,8 +1,8 @@
 /*
- * include/asm-arm/arch-stmp3xxx/regstimer.h
+ * stmp37xx: TIMROT register definitions
  *
- * Copyright (c) 2008 SigmaTel Inc
- * Copyright (c) 2008 Embedded Alley Solutions, Inc
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,37 +16,34 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef __ARCH_ARM_REGS_TIMROT_H
-#define __ARCH_ARM_REGS_TIMROT_H
+#ifndef _MACH_REGS_TIMROT
+#define _MACH_REGS_TIMROT
 
-#include <mach/stmp3xxx_regs.h>
+#define REGS_TIMROT_BASE	(STMP3XXX_REGS_BASE + 0x68000)
 
-#define REGS_TIMROT_BASE (REGS_BASE + 0x00068000)
+#define HW_TIMROT_ROTCTRL	0x0
+#define BM_TIMROT_ROTCTRL_CLKGATE	0x40000000
+#define BM_TIMROT_ROTCTRL_SFTRST	0x80000000
 
-HW_REGISTER(HW_TIMROT_ROTCTRL, REGS_TIMROT_BASE, 0)
-#define BM_TIMROT_ROTCTRL_SFTRST      0x80000000
-#define BM_TIMROT_ROTCTRL_CLKGATE      0x40000000
+#define HW_TIMROT_TIMCTRL0	(0x20 + 0 * 0x20)
+#define HW_TIMROT_TIMCTRL1	(0x20 + 1 * 0x20)
+#define HW_TIMROT_TIMCTRL2	(0x20 + 2 * 0x20)
 
-HW_REGISTER_INDEXED(HW_TIMROT_TIMCTRLn, REGS_TIMROT_BASE, 0x20, 0x20)
-#define BM_TIMROT_TIMCTRLn_SELECT      0x0000000F
-#define BF_TIMROT_TIMCTRLn_SELECT(v)   (((v) << 0) & BM_TIMROT_TIMCTRLn_SELECT)
-#define BM_TIMROT_TIMCTRLn_PRESCALE      0x00000030
-#define BF_TIMROT_TIMCTRLn_PRESCALE(v)   \
-	(((v) << 4) & BM_TIMROT_TIMCTRLn_PRESCALE)
-#define BM_TIMROT_TIMCTRLn_RELOAD      0x00000040
-#define BF_TIMROT_TIMCTRLn_RELOAD(v)   (((v) << 6) & BM_TIMROT_TIMCTRLn_RELOAD)
-#define BM_TIMROT_TIMCTRLn_UPDATE      0x00000080
-#define BF_TIMROT_TIMCTRLn_UPDATE(v)   (((v) << 7) & BM_TIMROT_TIMCTRLn_UPDATE)
-#define BM_TIMROT_TIMCTRLn_POLARITY      0x00000100
-#define BF_TIMROT_TIMCTRLn_POLARITY(v)   \
-	(((v) << 8) & BM_TIMROT_TIMCTRLn_POLARITY)
-#define BM_TIMROT_TIMCTRLn_IRQ_EN      0x00004000
-#define BF_TIMROT_TIMCTRLn_IRQ_EN(v)   \
-	(((v) << 14) & BM_TIMROT_TIMCTRLn_IRQ_EN)
-#define BM_TIMROT_TIMCTRLn_IRQ      0x00008000
-#define BF_TIMROT_TIMCTRLn_IRQ(v)   (((v) << 15) & BM_TIMROT_TIMCTRLn_IRQ)
-HW_REGISTER_0_INDEXED(HW_TIMROT_TIMCOUNTn, REGS_TIMROT_BASE, 0x30, 0x20)
+#define HW_TIMROT_TIMCTRLn	0x20
+#define BM_TIMROT_TIMCTRLn_SELECT	0x0000000F
+#define BP_TIMROT_TIMCTRLn_SELECT	0
+#define BM_TIMROT_TIMCTRLn_PRESCALE	0x00000030
+#define BP_TIMROT_TIMCTRLn_PRESCALE	4
+#define BM_TIMROT_TIMCTRLn_RELOAD	0x00000040
+#define BM_TIMROT_TIMCTRLn_UPDATE	0x00000080
+#define BM_TIMROT_TIMCTRLn_IRQ_EN	0x00004000
+#define BM_TIMROT_TIMCTRLn_IRQ	0x00008000
 
-#endif /* __ARCH_ARM_REGSTIMER_H */
+#define HW_TIMROT_TIMCOUNT0	(0x30 + 0 * 0x20)
+#define HW_TIMROT_TIMCOUNT1	(0x30 + 1 * 0x20)
+#define HW_TIMROT_TIMCOUNT2	(0x30 + 2 * 0x20)
+
+#define HW_TIMROT_TIMCOUNTn	0x30
+#endif
-- 
cgit v0.10.2


From 98f420b23a62e0c9df78c5851860d47bf1bc87dd Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Sun, 31 May 2009 13:32:11 +0100
Subject: [ARM] 5532/1: Freescale STMP: register definitions [3/3]

Replace HW_zzz register access macros by regular __raw_readl/__raw_writel calls

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/stmp378x.c b/arch/arm/mach-stmp378x/stmp378x.c
index f156ec7..9a363fb 100644
--- a/arch/arm/mach-stmp378x/stmp378x.c
+++ b/arch/arm/mach-stmp378x/stmp378x.c
@@ -47,25 +47,28 @@
 static void stmp378x_ack_irq(unsigned int irq)
 {
 	/* Tell ICOLL to release IRQ line */
-	HW_ICOLL_VECTOR_WR(0x0);
+	__raw_writel(0, REGS_ICOLL_BASE + HW_ICOLL_VECTOR);
 
 	/* ACK current interrupt */
-	HW_ICOLL_LEVELACK_WR(BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0);
+	__raw_writel(0x01 /* BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 */,
+			REGS_ICOLL_BASE + HW_ICOLL_LEVELACK);
 
 	/* Barrier */
-	(void) HW_ICOLL_STAT_RD();
+	(void)__raw_readl(REGS_ICOLL_BASE + HW_ICOLL_STAT);
 }
 
 static void stmp378x_mask_irq(unsigned int irq)
 {
 	/* IRQ disable */
-	HW_ICOLL_INTERRUPTn_CLR(irq, BM_ICOLL_INTERRUPTn_ENABLE);
+	stmp3xxx_clearl(BM_ICOLL_INTERRUPTn_ENABLE,
+			REGS_ICOLL_BASE + HW_ICOLL_INTERRUPTn + irq * 0x10);
 }
 
 static void stmp378x_unmask_irq(unsigned int irq)
 {
 	/* IRQ enable */
-	HW_ICOLL_INTERRUPTn_SET(irq, BM_ICOLL_INTERRUPTn_ENABLE);
+	stmp3xxx_setl(BM_ICOLL_INTERRUPTn_ENABLE,
+		      REGS_ICOLL_BASE + HW_ICOLL_INTERRUPTn + irq * 0x10);
 }
 
 static struct irq_chip stmp378x_chip = {
@@ -84,52 +87,63 @@ void __init stmp378x_init_irq(void)
  */
 void stmp3xxx_arch_dma_enable_interrupt(int channel)
 {
-	int dmabus = channel / 16;
+	void __iomem *c1, *c2;
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL1_SET(1 << (16 + (channel % 16)));
-		HW_APBH_CTRL2_SET(1 << (16 + (channel % 16)));
+		c1 = REGS_APBH_BASE + HW_APBH_CTRL1;
+		c2 = REGS_APBH_BASE + HW_APBH_CTRL2;
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CTRL1_SET(1 << (16 + (channel % 16)));
-		HW_APBX_CTRL2_SET(1 << (16 + (channel % 16)));
+		c1 = REGS_APBX_BASE + HW_APBX_CTRL1;
+		c2 = REGS_APBX_BASE + HW_APBX_CTRL2;
 		break;
+
+	default:
+		return;
 	}
+	stmp3xxx_setl(1 << (16 + STMP3XXX_DMA_CHANNEL(channel)), c1);
+	stmp3xxx_setl(1 << (16 + STMP3XXX_DMA_CHANNEL(channel)), c2);
 }
 EXPORT_SYMBOL(stmp3xxx_arch_dma_enable_interrupt);
 
 void stmp3xxx_arch_dma_clear_interrupt(int channel)
 {
-	int dmabus = channel / 16;
+	void __iomem *c1, *c2;
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL1_CLR(1 << (channel % 16));
-		HW_APBH_CTRL2_CLR(1 << (channel % 16));
+		c1 = REGS_APBH_BASE + HW_APBH_CTRL1;
+		c2 = REGS_APBH_BASE + HW_APBH_CTRL2;
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CTRL1_CLR(1 << (channel % 16));
-		HW_APBX_CTRL2_CLR(1 << (channel % 16));
+		c1 = REGS_APBX_BASE + HW_APBX_CTRL1;
+		c2 = REGS_APBX_BASE + HW_APBX_CTRL2;
 		break;
+
+	default:
+		return;
 	}
+	stmp3xxx_clearl(1 << STMP3XXX_DMA_CHANNEL(channel), c1);
+	stmp3xxx_clearl(1 << STMP3XXX_DMA_CHANNEL(channel), c2);
 }
 EXPORT_SYMBOL(stmp3xxx_arch_dma_clear_interrupt);
 
 int stmp3xxx_arch_dma_is_interrupt(int channel)
 {
-	int dmabus = channel / 16;
 	int r = 0;
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		r = HW_APBH_CTRL1_RD() & (1 << (channel % 16));
+		r = __raw_readl(REGS_APBH_BASE + HW_APBH_CTRL1) &
+			(1 << STMP3XXX_DMA_CHANNEL(channel));
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		r = HW_APBX_CTRL1_RD() & (1 << (channel % 16));
+		r = __raw_readl(REGS_APBX_BASE + HW_APBX_CTRL1) &
+			(1 << STMP3XXX_DMA_CHANNEL(channel));
 		break;
 	}
 	return r;
@@ -138,42 +152,41 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_is_interrupt);
 
 void stmp3xxx_arch_dma_reset_channel(int channel)
 {
-	int dmabus = channel / 16;
-	unsigned chbit = 1 << (channel % 16);
+	unsigned chbit = 1 << STMP3XXX_DMA_CHANNEL(channel);
+	void __iomem *c0;
+	u32 mask;
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		/* Reset channel and wait for it to complete */
-		HW_APBH_CTRL0_SET(chbit <<
-				 BP_APBH_CTRL0_RESET_CHANNEL);
-		while (HW_APBH_CTRL0_RD() &
-		       (chbit << BP_APBH_CTRL0_RESET_CHANNEL))
-			continue;
+		c0 = REGS_APBH_BASE + HW_APBH_CTRL0;
+		mask = chbit << BP_APBH_CTRL0_RESET_CHANNEL;
 		break;
-
 	case STMP3XXX_BUS_APBX:
-		/* Reset channel and wait for it to complete */
-		HW_APBX_CHANNEL_CTRL_SET(
-			BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(chbit));
-		while (HW_APBX_CHANNEL_CTRL_RD() &
-			BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(chbit))
-			continue;
+		c0 = REGS_APBX_BASE + HW_APBX_CHANNEL_CTRL;
+		mask = chbit << BP_APBX_CHANNEL_CTRL_RESET_CHANNEL;
 		break;
+	default:
+		return;
 	}
+
+	/* Reset channel and wait for it to complete */
+	stmp3xxx_setl(mask, c0);
+	while (__raw_readl(c0) & mask)
+		cpu_relax();
 }
 EXPORT_SYMBOL(stmp3xxx_arch_dma_reset_channel);
 
 void stmp3xxx_arch_dma_freeze(int channel)
 {
-	int dmabus = channel / 16;
-	unsigned chbit = 1 << (channel % 16);
+	unsigned chbit = 1 << STMP3XXX_DMA_CHANNEL(channel);
+	u32 mask = 1 << chbit;
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL0_SET(1<<chbit);
+		stmp3xxx_setl(mask, REGS_APBH_BASE + HW_APBH_CTRL0);
 		break;
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CHANNEL_CTRL_SET(1<<chbit);
+		stmp3xxx_setl(mask, REGS_APBX_BASE + HW_APBX_CHANNEL_CTRL);
 		break;
 	}
 }
@@ -181,15 +194,15 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_freeze);
 
 void stmp3xxx_arch_dma_unfreeze(int channel)
 {
-	int dmabus = channel / 16;
-	unsigned chbit = 1 << (channel % 16);
+	unsigned chbit = 1 << STMP3XXX_DMA_CHANNEL(channel);
+	u32 mask = 1 << chbit;
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL0_CLR(1<<chbit);
+		stmp3xxx_clearl(mask, REGS_APBH_BASE + HW_APBH_CTRL0);
 		break;
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CHANNEL_CTRL_CLR(1<<chbit);
+		stmp3xxx_clearl(mask, REGS_APBX_BASE + HW_APBX_CHANNEL_CTRL);
 		break;
 	}
 }
@@ -201,7 +214,7 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_unfreeze);
  *
  * Logical      Physical
  * f0000000	80000000	On-chip registers
- * f1000000	00000000	256k on-chip SRAM
+ * f1000000	00000000	32k on-chip SRAM
  */
 
 static struct map_desc stmp378x_io_desc[] __initdata = {
diff --git a/arch/arm/mach-stmp37xx/stmp37xx.c b/arch/arm/mach-stmp37xx/stmp37xx.c
index 83a41c9..8c7d6fb 100644
--- a/arch/arm/mach-stmp37xx/stmp37xx.c
+++ b/arch/arm/mach-stmp37xx/stmp37xx.c
@@ -34,6 +34,7 @@
 #include <mach/stmp3xxx.h>
 #include <mach/dma.h>
 
+#include <mach/platform.h>
 #include <mach/regs-icoll.h>
 #include <mach/regs-apbh.h>
 #include <mach/regs-apbx.h>
@@ -45,25 +46,28 @@
 static void stmp37xx_ack_irq(unsigned int irq)
 {
 	/* Disable IRQ */
-	HW_ICOLL_PRIORITYn_CLR(irq / 4, 0x04 << ((irq % 4) * 8));
+	stmp3xxx_clearl(0x04 << ((irq % 4) * 8),
+		REGS_ICOLL_BASE + HW_ICOLL_PRIORITYn + irq / 4 * 0x10);
 
 	/* ACK current interrupt */
-	HW_ICOLL_LEVELACK_WR(1);
+	__raw_writel(1, REGS_ICOLL_BASE + HW_ICOLL_LEVELACK);
 
 	/* Barrier */
-	(void) HW_ICOLL_STAT_RD();
+	(void)__raw_readl(REGS_ICOLL_BASE + HW_ICOLL_STAT);
 }
 
 static void stmp37xx_mask_irq(unsigned int irq)
 {
 	/* IRQ disable */
-	HW_ICOLL_PRIORITYn_CLR(irq / 4, 0x04 << ((irq % 4) * 8));
+	stmp3xxx_clearl(0x04 << ((irq % 4) * 8),
+		REGS_ICOLL_BASE + HW_ICOLL_PRIORITYn + irq / 4 * 0x10);
 }
 
 static void stmp37xx_unmask_irq(unsigned int irq)
 {
 	/* IRQ enable */
-	HW_ICOLL_PRIORITYn_SET(irq / 4, 0x04 << ((irq % 4) * 8));
+	stmp3xxx_setl(0x04 << ((irq % 4) * 8),
+		REGS_ICOLL_BASE + HW_ICOLL_PRIORITYn + irq / 4 * 0x10);
 }
 
 static struct irq_chip stmp37xx_chip = {
@@ -82,15 +86,15 @@ void __init stmp37xx_init_irq(void)
  */
 void stmp3xxx_arch_dma_enable_interrupt(int channel)
 {
-	int dmabus = channel / 16;
-
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL1_SET(1 << (8 + (channel % 16)));
+		stmp3xxx_setl(1 << (8 + STMP3XXX_DMA_CHANNEL(channel)),
+			REGS_APBH_BASE + HW_APBH_CTRL1);
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CTRL1_SET(1 << (8 + (channel % 16)));
+		stmp3xxx_setl(1 << (8 + STMP3XXX_DMA_CHANNEL(channel)),
+			REGS_APBX_BASE + HW_APBX_CTRL1);
 		break;
 	}
 }
@@ -98,15 +102,15 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_enable_interrupt);
 
 void stmp3xxx_arch_dma_clear_interrupt(int channel)
 {
-	int dmabus = channel / 16;
-
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL1_CLR(1 << (channel % 16));
+		stmp3xxx_clearl(1 << STMP3XXX_DMA_CHANNEL(channel),
+				REGS_APBH_BASE + HW_APBH_CTRL1);
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CTRL1_CLR(1 << (channel % 16));
+		stmp3xxx_clearl(1 << STMP3XXX_DMA_CHANNEL(channel),
+				REGS_APBX_BASE + HW_APBX_CTRL1);
 		break;
 	}
 }
@@ -116,15 +120,15 @@ int stmp3xxx_arch_dma_is_interrupt(int channel)
 {
 	int r = 0;
 
-	int dmabus = channel / 16;
-
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		r = HW_APBH_CTRL1_RD() & (1 << (channel % 16));
+		r = __raw_readl(REGS_APBH_BASE + HW_APBH_CTRL1) &
+			(1 << STMP3XXX_DMA_CHANNEL(channel));
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		r = HW_APBX_CTRL1_RD() & (1 << (channel % 16));
+		r = __raw_readl(REGS_APBH_BASE + HW_APBH_CTRL1) &
+			(1 << STMP3XXX_DMA_CHANNEL(channel));
 		break;
 	}
 	return r;
@@ -133,24 +137,24 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_is_interrupt);
 
 void stmp3xxx_arch_dma_reset_channel(int channel)
 {
-	int dmabus = channel / 16;
-	unsigned chbit = 1 << (channel % 16);
+	unsigned chbit = 1 << STMP3XXX_DMA_CHANNEL(channel);
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
 		/* Reset channel and wait for it to complete */
-		HW_APBH_CTRL0_SET(chbit << BP_APBH_CTRL0_RESET_CHANNEL);
-		while (HW_APBH_CTRL0_RD() &
+		stmp3xxx_setl(chbit << BP_APBH_CTRL0_RESET_CHANNEL,
+			REGS_APBH_BASE + HW_APBH_CTRL0);
+		while (__raw_readl(REGS_APBH_BASE + HW_APBH_CTRL0) &
 		       (chbit << BP_APBH_CTRL0_RESET_CHANNEL))
-			continue;
+				cpu_relax();
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		/* Reset channel and wait for it to complete */
-		HW_APBX_CTRL0_SET(chbit << BP_APBX_CTRL0_RESET_CHANNEL);
-		while (HW_APBX_CTRL0_RD() &
+		stmp3xxx_setl(chbit << BP_APBX_CTRL0_RESET_CHANNEL,
+			REGS_APBX_BASE + HW_APBX_CTRL0);
+		while (__raw_readl(REGS_APBX_BASE + HW_APBX_CTRL0) &
 		       (chbit << BP_APBX_CTRL0_RESET_CHANNEL))
-			continue;
+				cpu_relax();
 		break;
 	}
 }
@@ -158,15 +162,14 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_reset_channel);
 
 void stmp3xxx_arch_dma_freeze(int channel)
 {
-	int dmabus = channel / 16;
-	unsigned chbit = 1 << (channel % 16);
+	unsigned chbit = 1 << STMP3XXX_DMA_CHANNEL(channel);
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL0_SET(1<<chbit);
+		stmp3xxx_setl(1 << chbit, REGS_APBH_BASE + HW_APBH_CTRL0);
 		break;
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CTRL0_SET(1<<chbit);
+		stmp3xxx_setl(1 << chbit, REGS_APBH_BASE + HW_APBH_CTRL0);
 		break;
 	}
 }
@@ -174,15 +177,14 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_freeze);
 
 void stmp3xxx_arch_dma_unfreeze(int channel)
 {
-	int dmabus = channel / 16;
-	unsigned chbit = 1 << (channel % 16);
+	unsigned chbit = 1 << STMP3XXX_DMA_CHANNEL(channel);
 
-	switch (dmabus) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_CTRL0_CLR(1<<chbit);
+		stmp3xxx_clearl(1 << chbit, REGS_APBH_BASE + HW_APBH_CTRL0);
 		break;
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_CTRL0_CLR(1<<chbit);
+		stmp3xxx_clearl(1 << chbit, REGS_APBH_BASE + HW_APBH_CTRL0);
 		break;
 	}
 }
@@ -194,7 +196,7 @@ EXPORT_SYMBOL(stmp3xxx_arch_dma_unfreeze);
  *
  * Logical      Physical
  * f0000000	80000000	On-chip registers
- * f1000000	00000000	256k on-chip SRAM
+ * f1000000	00000000	32k on-chip SRAM
  */
 static struct map_desc stmp37xx_io_desc[] __initdata = {
 	{
diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c
index 9a1d46b..5d2f19a 100644
--- a/arch/arm/plat-stmp3xxx/clock.c
+++ b/arch/arm/plat-stmp3xxx/clock.c
@@ -15,6 +15,7 @@
  * http://www.opensource.org/licenses/gpl-license.html
  * http://www.gnu.org/copyleft/gpl.html
  */
+#define DEBUG
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -27,6 +28,7 @@
 
 #include <asm/mach-types.h>
 #include <asm/clkdev.h>
+#include <mach/platform.h>
 #include <mach/regs-clkctrl.h>
 
 #include "clock.h"
@@ -187,8 +189,8 @@ static long lcdif_get_rate(struct clk *clk)
 	div = (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask;
 	if (div) {
 		rate /= div;
-		div = (HW_CLKCTRL_FRAC_RD() & BM_CLKCTRL_FRAC_PIXFRAC) >>
-				BP_CLKCTRL_FRAC_PIXFRAC;
+		div = (__raw_readl(REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC) &
+			BM_CLKCTRL_FRAC_PIXFRAC) >> BP_CLKCTRL_FRAC_PIXFRAC;
 		rate /= div;
 	}
 	clk->rate = rate;
@@ -263,15 +265,19 @@ static int lcdif_set_rate(struct clk *clk, u32 rate)
 			lowest_result / 1000, lowest_result % 1000);
 
 	/* Program ref_pix phase fractional divider */
-	HW_CLKCTRL_FRAC_WR((HW_CLKCTRL_FRAC_RD() & ~BM_CLKCTRL_FRAC_PIXFRAC) |
-			   BF_CLKCTRL_FRAC_PIXFRAC(lowest_fracdiv));
+	reg_val = __raw_readl(REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC);
+	reg_val &= ~BM_CLKCTRL_FRAC_PIXFRAC;
+	reg_val |= BF(lowest_fracdiv, CLKCTRL_FRAC_PIXFRAC);
+	__raw_writel(reg_val, REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC);
+
 	/* Ungate PFD */
-	HW_CLKCTRL_FRAC_CLR(BM_CLKCTRL_FRAC_CLKGATEPIX);
+	stmp3xxx_clearl(BM_CLKCTRL_FRAC_CLKGATEPIX,
+			REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC);
 
 	/* Program pix divider */
 	reg_val = __raw_readl(clk->scale_reg);
 	reg_val &= ~(BM_CLKCTRL_PIX_DIV | BM_CLKCTRL_PIX_CLKGATE);
-	reg_val |= BF_CLKCTRL_PIX_DIV(lowest_div);
+	reg_val |= BF(lowest_div, CLKCTRL_PIX_DIV);
 	__raw_writel(reg_val, clk->scale_reg);
 
 	/* Wait for divider update */
@@ -287,7 +293,9 @@ static int lcdif_set_rate(struct clk *clk, u32 rate)
 	}
 
 	/* Switch to ref_pix source */
-	HW_CLKCTRL_CLKSEQ_CLR(BM_CLKCTRL_CLKSEQ_BYPASS_PIX);
+	reg_val = __raw_readl(REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ);
+	reg_val &= ~BM_CLKCTRL_CLKSEQ_BYPASS_PIX;
+	__raw_writel(reg_val, REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ);
 
 out:
 	return ret;
@@ -296,6 +304,8 @@ out:
 
 static int cpu_set_rate(struct clk *clk, u32 rate)
 {
+	u32 reg_val;
+
 	if (rate < 24000)
 		return -EINVAL;
 	else if (rate == 24000) {
@@ -344,7 +354,12 @@ static int cpu_set_rate(struct clk *clk, u32 rate)
 		__raw_writel(1<<7, clk->scale_reg + 8);
 		/* write clkctrl_cpu */
 		clk->saved_div = clkctrl_cpu;
-		HW_CLKCTRL_CPU_WR((HW_CLKCTRL_CPU_RD() & ~0x3f) | clkctrl_cpu);
+
+		reg_val = __raw_readl(REGS_CLKCTRL_BASE + HW_CLKCTRL_CPU);
+		reg_val &= ~0x3F;
+		reg_val |= clkctrl_cpu;
+		__raw_writel(reg_val, REGS_CLKCTRL_BASE + HW_CLKCTRL_CPU);
+
 		for (i = 10000; i; i--)
 			if (!clk_is_busy(clk))
 				break;
@@ -364,7 +379,7 @@ static long cpu_get_rate(struct clk *clk)
 	long rate = clk->parent->rate * 18;
 
 	rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & 0x3f;
-	rate /= HW_CLKCTRL_CPU_RD() & 0x3f;
+	rate /= __raw_readl(REGS_CLKCTRL_BASE + HW_CLKCTRL_CPU) & 0x3f;
 	rate = ((rate + 9) / 10) * 10;
 	clk->rate = rate;
 
@@ -411,7 +426,7 @@ static long emi_get_rate(struct clk *clk)
 	long rate = clk->parent->rate * 18;
 
 	rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & 0x3f;
-	rate /= HW_CLKCTRL_EMI_RD() & 0x3f;
+	rate /= __raw_readl(REGS_CLKCTRL_BASE + HW_CLKCTRL_EMI) & 0x3f;
 	clk->rate = rate;
 
 	return rate;
@@ -427,44 +442,52 @@ static int clkseq_set_parent(struct clk *clk, struct clk *parent)
 		shift = 4;
 
 	if (clk->bypass_reg) {
-		u32 hbus_mask = BM_CLKCTRL_HBUS_DIV_FRAC_EN |
-				BM_CLKCTRL_HBUS_DIV;
+#ifdef CONFIG_ARCH_STMP378X
+		u32 hbus_val, cpu_val;
 
 		if (clk == &cpu_clk && shift == 4) {
-			u32 hbus_val = HW_CLKCTRL_HBUS_RD();
-			u32 cpu_val = HW_CLKCTRL_CPU_RD();
-			hbus_val &= ~hbus_mask;
-			hbus_val |= 1;
+			hbus_val = __raw_readl(REGS_CLKCTRL_BASE +
+					HW_CLKCTRL_HBUS);
+			cpu_val = __raw_readl(REGS_CLKCTRL_BASE +
+					HW_CLKCTRL_CPU);
+
+			hbus_val &= ~(BM_CLKCTRL_HBUS_DIV_FRAC_EN |
+				      BM_CLKCTRL_HBUS_DIV);
 			clk->saved_div = cpu_val & BM_CLKCTRL_CPU_DIV_CPU;
 			cpu_val &= ~BM_CLKCTRL_CPU_DIV_CPU;
 			cpu_val |= 1;
-			__raw_writel(1 << clk->bypass_shift,
-					clk->bypass_reg + shift);
+
 			if (machine_is_stmp378x()) {
-				HW_CLKCTRL_HBUS_WR(hbus_val);
-				HW_CLKCTRL_CPU_WR(cpu_val);
+				__raw_writel(hbus_val,
+					REGS_CLKCTRL_BASE + HW_CLKCTRL_HBUS);
+				__raw_writel(cpu_val,
+					REGS_CLKCTRL_BASE + HW_CLKCTRL_CPU);
 				hclk.rate = 0;
 			}
 		} else if (clk == &cpu_clk && shift == 8) {
-			u32 hbus_val = HW_CLKCTRL_HBUS_RD();
-			u32 cpu_val = HW_CLKCTRL_CPU_RD();
-			hbus_val &= ~hbus_mask;
+			hbus_val = __raw_readl(REGS_CLKCTRL_BASE +
+							HW_CLKCTRL_HBUS);
+			cpu_val = __raw_readl(REGS_CLKCTRL_BASE +
+							HW_CLKCTRL_CPU);
+			hbus_val &= ~(BM_CLKCTRL_HBUS_DIV_FRAC_EN |
+				      BM_CLKCTRL_HBUS_DIV);
 			hbus_val |= 2;
 			cpu_val &= ~BM_CLKCTRL_CPU_DIV_CPU;
 			if (clk->saved_div)
 				cpu_val |= clk->saved_div;
 			else
 				cpu_val |= 2;
+
 			if (machine_is_stmp378x()) {
-				HW_CLKCTRL_HBUS_WR(hbus_val);
-				HW_CLKCTRL_CPU_WR(cpu_val);
+				__raw_writel(hbus_val,
+					REGS_CLKCTRL_BASE + HW_CLKCTRL_HBUS);
+				__raw_writel(cpu_val,
+					REGS_CLKCTRL_BASE + HW_CLKCTRL_CPU);
 				hclk.rate = 0;
 			}
-			__raw_writel(1 << clk->bypass_shift,
-					clk->bypass_reg + shift);
-		} else
-			__raw_writel(1 << clk->bypass_shift,
-					clk->bypass_reg + shift);
+		}
+#endif
+		__raw_writel(1 << clk->bypass_shift, clk->bypass_reg + shift);
 
 		ret = 0;
 	}
@@ -640,7 +663,7 @@ static struct clk osc_24M = {
 
 static struct clk pll_clk = {
 	.parent		= &osc_24M,
-	.enable_reg	= HW_CLKCTRL_PLLCTRL0_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_PLLCTRL0,
 	.enable_shift	= 16,
 	.enable_wait	= 10,
 	.flags		= FIXED_RATE | ENABLED,
@@ -650,11 +673,11 @@ static struct clk pll_clk = {
 
 static struct clk cpu_clk = {
 	.parent		= &pll_clk,
-	.scale_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC,
 	.scale_shift	= 0,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 7,
-	.busy_reg	= HW_CLKCTRL_CPU_ADDR,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CPU,
 	.busy_bit	= 28,
 	.flags		= RATE_PROPAGATES | ENABLED,
 	.ops		= &cpu_ops,
@@ -662,10 +685,10 @@ static struct clk cpu_clk = {
 
 static struct clk io_clk = {
 	.parent		= &pll_clk,
-	.enable_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
-	.scale_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC,
 	.scale_shift	= 24,
 	.flags		= RATE_PROPAGATES | ENABLED,
 	.ops		= &io_ops,
@@ -673,10 +696,10 @@ static struct clk io_clk = {
 
 static struct clk hclk = {
 	.parent		= &cpu_clk,
-	.scale_reg	= HW_CLKCTRL_HBUS_ADDR,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_HBUS,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 7,
-	.busy_reg	= HW_CLKCTRL_HBUS_ADDR,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_HBUS,
 	.busy_bit	= 29,
 	.flags		= RATE_PROPAGATES | ENABLED,
 	.ops		= &hbus_ops,
@@ -684,8 +707,8 @@ static struct clk hclk = {
 
 static struct clk xclk = {
 	.parent		= &osc_24M,
-	.scale_reg	= HW_CLKCTRL_XBUS_ADDR,
-	.busy_reg	= HW_CLKCTRL_XBUS_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XBUS,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XBUS,
 	.busy_bit	= 31,
 	.flags		= RATE_PROPAGATES | ENABLED,
 	.ops		= &xbus_ops,
@@ -693,7 +716,7 @@ static struct clk xclk = {
 
 static struct clk uart_clk = {
 	.parent		= &xclk,
-	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XTAL,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
 	.flags		= ENABLED,
@@ -702,7 +725,7 @@ static struct clk uart_clk = {
 
 static struct clk audio_clk = {
 	.parent		= &xclk,
-	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XTAL,
 	.enable_shift	= 30,
 	.enable_negate	= 1,
 	.ops		= &min_ops,
@@ -710,7 +733,7 @@ static struct clk audio_clk = {
 
 static struct clk pwm_clk = {
 	.parent		= &xclk,
-	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XTAL,
 	.enable_shift	= 29,
 	.enable_negate	= 1,
 	.ops		= &min_ops,
@@ -718,7 +741,7 @@ static struct clk pwm_clk = {
 
 static struct clk dri_clk = {
 	.parent		= &xclk,
-	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XTAL,
 	.enable_shift	= 28,
 	.enable_negate	= 1,
 	.ops		= &min_ops,
@@ -726,7 +749,7 @@ static struct clk dri_clk = {
 
 static struct clk digctl_clk = {
 	.parent		= &xclk,
-	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XTAL,
 	.enable_shift	= 27,
 	.enable_negate	= 1,
 	.ops		= &min_ops,
@@ -734,7 +757,7 @@ static struct clk digctl_clk = {
 
 static struct clk timer_clk = {
 	.parent		= &xclk,
-	.enable_reg	= HW_CLKCTRL_XTAL_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_XTAL,
 	.enable_shift	= 26,
 	.enable_negate	= 1,
 	.flags		= ENABLED,
@@ -743,13 +766,13 @@ static struct clk timer_clk = {
 
 static struct clk lcdif_clk = {
 	.parent		= &pll_clk,
-	.scale_reg	= HW_CLKCTRL_PIX_ADDR,
-	.busy_reg	= HW_CLKCTRL_PIX_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_PIX,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_PIX,
 	.busy_bit	= 29,
-	.enable_reg	= HW_CLKCTRL_PIX_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_PIX,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 1,
 	.flags		= NEEDS_SET_PARENT,
 	.ops		= &lcdif_ops,
@@ -757,12 +780,12 @@ static struct clk lcdif_clk = {
 
 static struct clk ssp_clk = {
 	.parent		= &io_clk,
-	.scale_reg	= HW_CLKCTRL_SSP_ADDR,
-	.busy_reg	= HW_CLKCTRL_SSP_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SSP,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SSP,
 	.busy_bit	= 29,
-	.enable_reg	= HW_CLKCTRL_SSP_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SSP,
 	.enable_shift	= 31,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 5,
 	.enable_negate	= 1,
 	.flags		= NEEDS_SET_PARENT,
@@ -771,13 +794,13 @@ static struct clk ssp_clk = {
 
 static struct clk gpmi_clk = {
 	.parent		= &io_clk,
-	.scale_reg	= HW_CLKCTRL_GPMI_ADDR,
-	.busy_reg	= HW_CLKCTRL_GPMI_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_GPMI,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_GPMI,
 	.busy_bit	= 29,
-	.enable_reg	= HW_CLKCTRL_GPMI_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_GPMI,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 4,
 	.flags		= NEEDS_SET_PARENT,
 	.ops		= &std_ops,
@@ -785,7 +808,7 @@ static struct clk gpmi_clk = {
 
 static struct clk spdif_clk = {
 	.parent		= &pll_clk,
-	.enable_reg	= HW_CLKCTRL_SPDIF_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SPDIF,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
 	.ops		= &min_ops,
@@ -793,14 +816,14 @@ static struct clk spdif_clk = {
 
 static struct clk emi_clk = {
 	.parent		= &pll_clk,
-	.enable_reg	= HW_CLKCTRL_EMI_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_EMI,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
-	.scale_reg	= HW_CLKCTRL_FRAC_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_FRAC,
 	.scale_shift	= 8,
-	.busy_reg	= HW_CLKCTRL_EMI_ADDR,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_EMI,
 	.busy_bit	= 28,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 6,
 	.flags		= ENABLED,
 	.ops		= &emi_ops,
@@ -808,37 +831,37 @@ static struct clk emi_clk = {
 
 static struct clk ir_clk = {
 	.parent		= &io_clk,
-	.enable_reg	= HW_CLKCTRL_IR_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_IR,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 3,
 	.ops		= &min_ops,
 };
 
 static struct clk saif_clk = {
 	.parent		= &pll_clk,
-	.scale_reg	= HW_CLKCTRL_SAIF_ADDR,
-	.busy_reg	= HW_CLKCTRL_SAIF_ADDR,
+	.scale_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SAIF,
+	.busy_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SAIF,
 	.busy_bit	= 29,
-	.enable_reg	= HW_CLKCTRL_SAIF_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_SAIF,
 	.enable_shift	= 31,
 	.enable_negate	= 1,
-	.bypass_reg	= HW_CLKCTRL_CLKSEQ_ADDR,
+	.bypass_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_CLKSEQ,
 	.bypass_shift	= 0,
 	.ops		= &std_ops,
 };
 
 static struct clk usb_clk = {
 	.parent		= &pll_clk,
-	.enable_reg	= HW_CLKCTRL_PLLCTRL0_ADDR,
+	.enable_reg	= REGS_CLKCTRL_BASE + HW_CLKCTRL_PLLCTRL0,
 	.enable_shift	= 18,
 	.enable_negate	= 1,
 	.ops		= &min_ops,
 };
 
 /* list of all the clocks */
-static __initdata struct clk_lookup onchip_clks[] = {
+static struct clk_lookup onchip_clks[] = {
 	{
 		.con_id = "osc_24M",
 		.clk = &osc_24M,
diff --git a/arch/arm/plat-stmp3xxx/core.c b/arch/arm/plat-stmp3xxx/core.c
index 6e2fef16..37b8a09 100644
--- a/arch/arm/plat-stmp3xxx/core.c
+++ b/arch/arm/plat-stmp3xxx/core.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 
 #include <mach/stmp3xxx.h>
+#include <mach/platform.h>
 #include <mach/dma.h>
 #include <mach/regs-clkctrl.h>
 
@@ -121,7 +122,7 @@ struct platform_device stmp3xxx_dbguart = {
 void __init stmp3xxx_init(void)
 {
 	/* Turn off auto-slow and other tricks */
-	HW_CLKCTRL_HBUS_CLR(0x07f00000U);
+	stmp3xxx_clearl(0x7f00000, REGS_CLKCTRL_BASE + HW_CLKCTRL_HBUS);
 
 	stmp3xxx_dma_init();
 }
diff --git a/arch/arm/plat-stmp3xxx/dma.c b/arch/arm/plat-stmp3xxx/dma.c
index cf42de0..d2f4977 100644
--- a/arch/arm/plat-stmp3xxx/dma.c
+++ b/arch/arm/plat-stmp3xxx/dma.c
@@ -23,6 +23,7 @@
 
 #include <asm/page.h>
 
+#include <mach/platform.h>
 #include <mach/dma.h>
 #include <mach/regs-apbx.h>
 #include <mach/regs-apbh.h>
@@ -35,16 +36,6 @@ static struct stmp3xxx_dma_user {
 	const char *name;
 } channels[MAX_DMA_CHANNELS];
 
-static inline int dmach(int ch)
-{
-	return ch % 16;
-}
-
-static inline int dmabus(int ch)
-{
-	return ch / 16;
-}
-
 #define IS_VALID_CHANNEL(ch) ((ch) >= 0 && (ch) < MAX_DMA_CHANNELS)
 #define IS_USED(ch) (channels[ch].inuse)
 
@@ -101,17 +92,19 @@ int stmp3xxx_dma_read_semaphore(int channel)
 {
 	int sem = -1;
 
-	switch (dmabus(channel)) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		sem =
-		    (HW_APBH_CHn_SEMA_RD(dmach(channel)) &
-		     BM_APBH_CHn_SEMA_PHORE) >> BP_APBH_CHn_SEMA_PHORE;
+		sem = __raw_readl(REGS_APBH_BASE + HW_APBH_CHn_SEMA +
+				STMP3XXX_DMA_CHANNEL(channel) * 0x70);
+		sem &= BM_APBH_CHn_SEMA_PHORE;
+		sem >>= BP_APBH_CHn_SEMA_PHORE;
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		sem =
-		    (HW_APBX_CHn_SEMA_RD(dmach(channel)) &
-		     BM_APBX_CHn_SEMA_PHORE) >> BP_APBX_CHn_SEMA_PHORE;
+		sem = __raw_readl(REGS_APBX_BASE + HW_APBX_CHn_SEMA +
+				STMP3XXX_DMA_CHANNEL(channel) * 0x70);
+		sem &= BM_APBX_CHn_SEMA_PHORE;
+		sem >>= BP_APBX_CHn_SEMA_PHORE;
 		break;
 	default:
 		BUG();
@@ -189,39 +182,44 @@ EXPORT_SYMBOL(stmp3xxx_dma_free_command);
 void stmp3xxx_dma_go(int channel,
 		     struct stmp3xxx_dma_descriptor *head, u32 semaphore)
 {
-	int ch = dmach(channel);
+	int ch = STMP3XXX_DMA_CHANNEL(channel);
+	void __iomem *c, *s;
 
-	switch (dmabus(channel)) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		/* Set next command */
-		HW_APBH_CHn_NXTCMDAR_WR(ch, head->handle);
-		/* Set counting semaphore (kicks off transfer). Assumes
-		   peripheral has been set up correctly */
-		HW_APBH_CHn_SEMA_WR(ch, semaphore);
+		c = REGS_APBH_BASE + HW_APBH_CHn_NXTCMDAR + 0x70 * ch;
+		s = REGS_APBH_BASE + HW_APBH_CHn_SEMA + 0x70 * ch;
 		break;
 
 	case STMP3XXX_BUS_APBX:
-		/* Set next command */
-		HW_APBX_CHn_NXTCMDAR_WR(ch, head->handle);
-		/* Set counting semaphore (kicks off transfer). Assumes
-		   peripheral has been set up correctly */
-		HW_APBX_CHn_SEMA_WR(ch, semaphore);
+		c = REGS_APBX_BASE + HW_APBX_CHn_NXTCMDAR + 0x70 * ch;
+		s = REGS_APBX_BASE + HW_APBX_CHn_SEMA + 0x70 * ch;
 		break;
+
+	default:
+		return;
 	}
+
+	/* Set next command */
+	__raw_writel(head->handle, c);
+	/* Set counting semaphore (kicks off transfer). Assumes
+	   peripheral has been set up correctly */
+	__raw_writel(semaphore, s);
 }
 EXPORT_SYMBOL(stmp3xxx_dma_go);
 
 int stmp3xxx_dma_running(int channel)
 {
-	switch (dmabus(channel)) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		return HW_APBH_CHn_SEMA_RD(dmach(channel)) &
-		    BM_APBH_CHn_SEMA_PHORE;
+		return (__raw_readl(REGS_APBH_BASE + HW_APBH_CHn_SEMA +
+			0x70 * STMP3XXX_DMA_CHANNEL(channel))) &
+			    BM_APBH_CHn_SEMA_PHORE;
 
 	case STMP3XXX_BUS_APBX:
-		return HW_APBX_CHn_SEMA_RD(dmach(channel)) &
-		    BM_APBX_CHn_SEMA_PHORE;
-
+		return (__raw_readl(REGS_APBX_BASE + HW_APBX_CHn_SEMA +
+			0x70 * STMP3XXX_DMA_CHANNEL(channel))) &
+			    BM_APBX_CHn_SEMA_PHORE;
 	default:
 		BUG();
 		return 0;
@@ -238,7 +236,7 @@ void stmp3xxx_dma_free_chain(struct stmp37xx_circ_dma_chain *chain)
 
 	for (i = 0; i < chain->total_count; i++)
 		stmp3xxx_dma_free_command(
-			STMP3xxx_DMA(chain->channel, chain->bus),
+			STMP3XXX_DMA(chain->channel, chain->bus),
 			&chain->chain[i]);
 }
 EXPORT_SYMBOL(stmp3xxx_dma_free_chain);
@@ -291,16 +289,15 @@ int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain,
 	chain->free_count = items;
 	chain->active_count = 0;
 	chain->cooked_count = 0;
-	chain->bus = dmabus(ch);
-	chain->channel = dmach(ch);
+	chain->bus = STMP3XXX_DMA_BUS(ch);
+	chain->channel = STMP3XXX_DMA_CHANNEL(ch);
 	return err;
 }
 EXPORT_SYMBOL(stmp3xxx_dma_make_chain);
 
 void stmp37xx_circ_clear_chain(struct stmp37xx_circ_dma_chain *chain)
 {
-	BUG_ON(stmp3xxx_dma_running(STMP3xxx_DMA(chain->channel, chain->bus)) >
-	       0);
+	BUG_ON(stmp3xxx_dma_running(STMP3XXX_DMA(chain->channel, chain->bus)));
 	chain->free_index = 0;
 	chain->active_index = 0;
 	chain->cooked_index = 0;
@@ -325,6 +322,8 @@ EXPORT_SYMBOL(stmp37xx_circ_advance_free);
 void stmp37xx_circ_advance_active(struct stmp37xx_circ_dma_chain *chain,
 		unsigned count)
 {
+	void __iomem *c;
+	u32 mask_clr, mask;
 	BUG_ON(chain->free_count < count);
 
 	chain->free_count -= count;
@@ -334,26 +333,24 @@ void stmp37xx_circ_advance_active(struct stmp37xx_circ_dma_chain *chain,
 
 	switch (chain->bus) {
 	case STMP3XXX_BUS_APBH:
-		/* Set counting semaphore (kicks off transfer). Assumes
-		   peripheral has been set up correctly */
-		HW_APBH_CHn_SEMA_CLR(chain->channel,
-				     BM_APBH_CHn_SEMA_INCREMENT_SEMA);
-		HW_APBH_CHn_SEMA_SET(chain->channel,
-				     BF_APBH_CHn_SEMA_INCREMENT_SEMA(count));
+		c = REGS_APBH_BASE + HW_APBH_CHn_SEMA + 0x70 * chain->channel;
+		mask_clr = BM_APBH_CHn_SEMA_INCREMENT_SEMA;
+		mask = BF(count, APBH_CHn_SEMA_INCREMENT_SEMA);
 		break;
-
 	case STMP3XXX_BUS_APBX:
-		/* Set counting semaphore (kicks off transfer). Assumes
-		   peripheral has been set up correctly */
-		HW_APBX_CHn_SEMA_CLR(chain->channel,
-				     BM_APBX_CHn_SEMA_INCREMENT_SEMA);
-		HW_APBX_CHn_SEMA_SET(chain->channel,
-				     BF_APBX_CHn_SEMA_INCREMENT_SEMA(count));
+		c = REGS_APBX_BASE + HW_APBX_CHn_SEMA + 0x70 * chain->channel;
+		mask_clr = BM_APBX_CHn_SEMA_INCREMENT_SEMA;
+		mask = BF(count, APBX_CHn_SEMA_INCREMENT_SEMA);
 		break;
-
 	default:
 		BUG();
+		return;
 	}
+
+	/* Set counting semaphore (kicks off transfer). Assumes
+	   peripheral has been set up correctly */
+	stmp3xxx_clearl(mask_clr, c);
+	stmp3xxx_setl(mask, c);
 }
 EXPORT_SYMBOL(stmp37xx_circ_advance_active);
 
@@ -362,7 +359,7 @@ unsigned stmp37xx_circ_advance_cooked(struct stmp37xx_circ_dma_chain *chain)
 	unsigned cooked;
 
 	cooked = chain->active_count -
-	  stmp3xxx_dma_read_semaphore(STMP3xxx_DMA(chain->channel, chain->bus));
+	  stmp3xxx_dma_read_semaphore(STMP3XXX_DMA(chain->channel, chain->bus));
 
 	chain->active_count -= cooked;
 	chain->active_index += cooked;
@@ -383,38 +380,41 @@ void stmp3xxx_dma_set_alt_target(int channel, int function)
 #else
 #error wrong arch
 #endif
-	int shift = dmach(channel) * bits;
+	int shift = STMP3XXX_DMA_CHANNEL(channel) * bits;
 	unsigned mask = (1<<bits) - 1;
+	void __iomem *c;
 
 	BUG_ON(function < 0 || function >= (1<<bits));
 	pr_debug("%s: channel = %d, using mask %x, "
 		 "shift = %d\n", __func__, channel, mask, shift);
 
-	switch (dmabus(channel)) {
+	switch (STMP3XXX_DMA_BUS(channel)) {
 	case STMP3XXX_BUS_APBH:
-		HW_APBH_DEVSEL_CLR(mask<<shift);
-		HW_APBH_DEVSEL_SET(function<<shift);
+		c = REGS_APBH_BASE + HW_APBH_DEVSEL;
 		break;
 	case STMP3XXX_BUS_APBX:
-		HW_APBX_DEVSEL_CLR(mask<<shift);
-		HW_APBX_DEVSEL_SET(function<<shift);
+		c = REGS_APBX_BASE + HW_APBX_DEVSEL;
 		break;
 	default:
 		BUG();
 	}
+	stmp3xxx_clearl(mask << shift, c);
+	stmp3xxx_setl(mask << shift, c);
 }
 EXPORT_SYMBOL(stmp3xxx_dma_set_alt_target);
 
 void stmp3xxx_dma_suspend(void)
 {
-	HW_APBH_CTRL0_SET(BM_APBH_CTRL0_CLKGATE);
-	HW_APBX_CTRL0_SET(BM_APBX_CTRL0_CLKGATE);
+	stmp3xxx_setl(BM_APBH_CTRL0_CLKGATE, REGS_APBH_BASE + HW_APBH_CTRL0);
+	stmp3xxx_setl(BM_APBX_CTRL0_CLKGATE, REGS_APBX_BASE + HW_APBX_CTRL0);
 }
 
 void stmp3xxx_dma_resume(void)
 {
-	HW_APBH_CTRL0_CLR(BM_APBH_CTRL0_CLKGATE | BM_APBH_CTRL0_SFTRST);
-	HW_APBX_CTRL0_CLR(BM_APBX_CTRL0_CLKGATE | BM_APBX_CTRL0_SFTRST);
+	stmp3xxx_clearl(BM_APBH_CTRL0_CLKGATE | BM_APBH_CTRL0_SFTRST,
+			REGS_APBH_BASE + HW_APBH_CTRL0);
+	stmp3xxx_clearl(BM_APBX_CTRL0_CLKGATE | BM_APBX_CTRL0_SFTRST,
+			REGS_APBX_BASE + HW_APBX_CTRL0);
 }
 
 #ifdef CONFIG_CPU_FREQ
@@ -452,11 +452,12 @@ static struct dma_notifier_block dma_cpufreq_nb = {
 
 void __init stmp3xxx_dma_init(void)
 {
-	HW_APBH_CTRL0_CLR(BM_APBH_CTRL0_CLKGATE | BM_APBH_CTRL0_SFTRST);
-	HW_APBX_CTRL0_CLR(BM_APBX_CTRL0_CLKGATE | BM_APBX_CTRL0_SFTRST);
+	stmp3xxx_clearl(BM_APBH_CTRL0_CLKGATE | BM_APBH_CTRL0_SFTRST,
+			REGS_APBH_BASE + HW_APBH_CTRL0);
+	stmp3xxx_clearl(BM_APBX_CTRL0_CLKGATE | BM_APBX_CTRL0_SFTRST,
+			REGS_APBX_BASE + HW_APBX_CTRL0);
 #ifdef CONFIG_CPU_FREQ
 	cpufreq_register_notifier(&dma_cpufreq_nb.nb,
 				CPUFREQ_TRANSITION_NOTIFIER);
 #endif /* CONFIG_CPU_FREQ */
-
 }
diff --git a/arch/arm/plat-stmp3xxx/include/mach/dma.h b/arch/arm/plat-stmp3xxx/include/mach/dma.h
index 1e305b2..7c58557 100644
--- a/arch/arm/plat-stmp3xxx/include/mach/dma.h
+++ b/arch/arm/plat-stmp3xxx/include/mach/dma.h
@@ -25,16 +25,14 @@
 #define MAX_PIO_WORDS   (15)
 #endif
 
-#define STMP3XXX_BUS_APBH	0
-#define STMP3XXX_BUS_APBX	1
+#define STMP3XXX_BUS_APBH		0
+#define STMP3XXX_BUS_APBX		1
 #define STMP3XXX_DMA_MAX_CHANNEL	16
-
-
-#define STMP3xxx_DMA(channel, bus)  ((bus) * 16 + (channel))
-
-#define MAX_DMA_ADDRESS		0xffffffff
-
-#define MAX_DMA_CHANNELS	32
+#define STMP3XXX_DMA_BUS(dma)		((dma) / 16)
+#define STMP3XXX_DMA_CHANNEL(dma)	((dma) % 16)
+#define STMP3XXX_DMA(channel, bus)	((bus) * 16 + (channel))
+#define MAX_DMA_ADDRESS			0xffffffff
+#define MAX_DMA_CHANNELS		32
 
 struct stmp3xxx_dma_command {
 	u32 next;
diff --git a/arch/arm/plat-stmp3xxx/include/mach/pinmux.h b/arch/arm/plat-stmp3xxx/include/mach/pinmux.h
index 526c068..cc5af82 100644
--- a/arch/arm/plat-stmp3xxx/include/mach/pinmux.h
+++ b/arch/arm/plat-stmp3xxx/include/mach/pinmux.h
@@ -146,10 +146,9 @@ struct stmp3xxx_pinmux_bank {
 	u8 strengths[HW_DRIVE_PINDRV_NUM];
 
 	/* GPIO things */
-	void __iomem *hw_gpio_read,
-	     *hw_gpio_set,
-	     *hw_gpio_clr,
-	     *hw_gpio_doe;
+	void __iomem *hw_gpio_in,
+		     *hw_gpio_out,
+		     *hw_gpio_doe;
 	int irq, virq;
 };
 
diff --git a/arch/arm/plat-stmp3xxx/include/mach/platform.h b/arch/arm/plat-stmp3xxx/include/mach/platform.h
index 525c413..7007dda 100644
--- a/arch/arm/plat-stmp3xxx/include/mach/platform.h
+++ b/arch/arm/plat-stmp3xxx/include/mach/platform.h
@@ -14,6 +14,9 @@
 #ifndef __ASM_PLAT_PLATFORM_H
 #define __ASM_PLAT_PLATFORM_H
 
+#ifndef __ASSEMBLER__
+#include <linux/io.h>
+#endif
 #include <asm/sizes.h>
 
 /* Virtual address where registers are mapped */
@@ -44,4 +47,22 @@
 #define IRQ_PRIORITY_REG_WR	HW_ICOLL_INTERRUPTn_WR
 #endif
 
+#define HW_STMP3XXX_SET		0x04
+#define HW_STMP3XXX_CLR		0x08
+#define HW_STMP3XXX_TOG		0x0c
+
+#ifndef __ASSEMBLER__
+static inline void stmp3xxx_clearl(u32 v, void __iomem *r)
+{
+	__raw_writel(v, r + HW_STMP3XXX_CLR);
+}
+
+static inline void stmp3xxx_setl(u32 v, void __iomem *r)
+{
+	__raw_writel(v, r + HW_STMP3XXX_SET);
+}
+#endif
+
+#define BF(value, field) (((value) << BP_##field) & BM_##field)
+
 #endif /* __ASM_ARCH_PLATFORM_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h
deleted file mode 100644
index 47797b2..0000000
--- a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Freescale STMP37XX/STMP378X SoC register access interfaces
- *
- * The SoC registers may be accessed via:
- *
- * - single 32 bit address, or
- * - four 32 bit addresses - general purpose, set, clear and toggle bits
- *
- * Multiple IP blocks (e.g. SSP, UART) provide identical register sets per
- * each module
- *
- * Embedded Alley Solutions, Inc <source@embeddedalley.com>
- *
- * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
- * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
- */
-
-/*
- * The code contained herein is licensed under the GNU General Public
- * License. You may obtain a copy of the GNU General Public License
- * Version 2 or later at the following locations:
- *
- * http://www.opensource.org/licenses/gpl-license.html
- * http://www.gnu.org/copyleft/gpl.html
- */
-#ifndef __ASM_PLAT_STMP3XXX_REGS_H
-#define __ASM_PLAT_STMP3XXX_REGS_H
-
-#ifndef __ASSEMBLER__
-#include <linux/io.h>
-#endif
-
-#include "platform.h"
-
-#define REGS_BASE STMP3XXX_REGS_BASE
-
-#define HW_STMP3xxx_SET		0x04
-#define HW_STMP3xxx_CLR		0x08
-#define HW_STMP3xxx_TOG		0x0c
-
-#ifndef __ASSEMBLER__
-#define HW_REGISTER_FUNCS(id, base, offset, regset, rd, wr)		\
-	static const u32 id##_OFFSET = offset;				\
-	static inline u32 id##_RD_NB(const void __iomem *regbase) {	\
-		if (!rd)						\
-			printk(KERN_ERR"%s: cannot READ at %p+%x\n",	\
-				#id, regbase, offset);			\
-		return __raw_readl(regbase + offset);			\
-	}								\
-	static inline void id##_WR_NB(void __iomem *regbase, u32 v) {	\
-		if (!wr)						\
-			printk(KERN_ERR"%s: cannot WRITE at %p+%x\n",	\
-				#id, regbase, offset);			\
-		__raw_writel(v, regbase + offset);			\
-	}								\
-	static inline void id##_SET_NB(void __iomem *regbase, u32 v) {	\
-		if (!wr)						\
-			printk(KERN_ERR"%s: cannot SET at %p+%x\n",	\
-				#id, regbase, offset);			\
-		if (regset)						\
-			__raw_writel(v, regbase + 			\
-					offset + HW_STMP3xxx_SET);	\
-		else							\
-			__raw_writel(v | __raw_readl(regbase + offset),	\
-				regbase + offset);			\
-	}								\
-	static inline void id##_CLR_NB(void __iomem *regbase, u32 v) {	\
-		if (!wr) 						\
-			printk(KERN_ERR"%s: cannot CLR at %p+%x\n",	\
-				#id, regbase, offset);			\
-		if (regset)						\
-			__raw_writel(v, regbase + 			\
-					offset + HW_STMP3xxx_CLR);	\
-		else							\
-			__raw_writel(					\
-				~v & __raw_readl(regbase + offset),	\
-				regbase + offset);			\
-	}								\
-	static inline void id##_TOG_NB(void __iomem *regbase, u32 v) {	\
-		if (!wr) 						\
-			printk(KERN_ERR"%s: cannot TOG at %p+%x\n",	\
-				#id, regbase, offset);			\
-		if (regset)						\
-			__raw_writel(v, regbase + 			\
-					offset + HW_STMP3xxx_TOG);	\
-		else							\
-			__raw_writel(v ^ __raw_readl(regbase + offset),	\
-				regbase + offset);			\
-	}								\
-	static inline u32 id##_RD(void) { return id##_RD_NB(base); }	\
-	static inline void id##_WR(u32 v) { id##_WR_NB(base, v); }	\
-	static inline void id##_SET(u32 v) { id##_SET_NB(base, v); }	\
-	static inline void id##_CLR(u32 v) { id##_CLR_NB(base, v); }	\
-	static inline void id##_TOG(u32 v) { id##_TOG_NB(base, v); }
-
-#define HW_REGISTER_FUNCS_INDEXED(id, base, offset, regset, rd, wr, step)\
-	static inline u32 id##_OFFSET(int i) {				\
-		return offset + i * step;				\
-	}								\
-	static inline u32 id##_RD_NB(const void __iomem *regbase, int i) {\
-		if (!rd) 						\
-			printk(KERN_ERR"%s(%d): can't READ at %p+%x\n",	\
-				#id, i, regbase, offset + i * step);	\
-		return __raw_readl(regbase + offset + i * step);	\
-	}								\
-	static inline void id##_WR_NB(void __iomem *regbase, int i, u32 v) {\
-		if (!wr) 						\
-			printk(KERN_ERR"%s(%d): can't WRITE at %p+%x\n",\
-				#id, i, regbase, offset + i * step);	\
-		__raw_writel(v, regbase + offset + i * step);		\
-	}								\
-	static inline void id##_SET_NB(void __iomem *regbase, int i, u32 v) {\
-		if (!wr)						\
-			printk(KERN_ERR"%s(%d): can't SET at %p+%x\n",	\
-				#id, i, regbase, offset + i * step);	\
-		if (regset)						\
-			__raw_writel(v, regbase + offset + 		\
-					i * step + HW_STMP3xxx_SET);	\
-		else							\
-			__raw_writel(v | __raw_readl(regbase + 		\
-						offset + i * step),	\
-				regbase + offset + i * step);		\
-	}								\
-	static inline void id##_CLR_NB(void __iomem *regbase, int i, u32 v) {\
-		if (!wr) 						\
-			printk(KERN_ERR"%s(%d): cannot CLR at %p+%x\n",	\
-				#id, i, regbase, offset + i * step);	\
-		if (regset)						\
-			__raw_writel(v, regbase + offset + 		\
-					i * step + HW_STMP3xxx_CLR);	\
-		else							\
-			__raw_writel(~v & __raw_readl(regbase + 	\
-						offset + i * step),	\
-				regbase + offset + i * step);		\
-	}								\
-	static inline void id##_TOG_NB(void __iomem *regbase, int i, u32 v) {\
-		if (!wr) 						\
-			printk(KERN_ERR"%s(%d): cannot TOG at %p+%x\n",	\
-				#id, i, regbase, offset + i * step);	\
-		if (regset)						\
-			__raw_writel(v, regbase + offset + 		\
-					i * step + HW_STMP3xxx_TOG);	\
-		else							\
-			__raw_writel(v ^ __raw_readl(regbase + offset 	\
-						+ i * step),		\
-				regbase + offset + i * step);		\
-	}								\
-	static inline u32 id##_RD(int i) 				\
-	{ 								\
-		return id##_RD_NB(base, i); 				\
-	}								\
-	static inline void id##_WR(int i, u32 v) 			\
-	{ 								\
-		id##_WR_NB(base, i, v); 				\
-	}								\
-	static inline void id##_SET(int i, u32 v) 			\
-	{								\
-		id##_SET_NB(base, i, v); 				\
-	}								\
-	static inline void id##_CLR(int i, u32 v) 			\
-	{ 								\
-		id##_CLR_NB(base, i, v);				\
-	}								\
-	static inline void id##_TOG(int i, u32 v) 			\
-	{ 								\
-		id##_TOG_NB(base, i, v); 				\
-	}
-
-#define HW_REGISTER_WO(id, base, offset)\
-	HW_REGISTER_FUNCS(id, base, offset, 1,  0, 1)
-#define HW_REGISTER_RO(id, base, offset)\
-	HW_REGISTER_FUNCS(id, base, offset, 1,  1, 0)
-#define HW_REGISTER(id, base, offset)	\
-	HW_REGISTER_FUNCS(id, base, offset, 1,	1, 1)
-#define HW_REGISTER_0(id, base, offset)	\
-	HW_REGISTER_FUNCS(id, base, offset, 0,  1, 1)
-#define HW_REGISTER_INDEXED(id, base, offset, step)	\
-	HW_REGISTER_FUNCS_INDEXED(id, base, offset, 1,  1, 1, step)
-#define HW_REGISTER_RO_INDEXED(id, base, offset, step)	\
-	HW_REGISTER_FUNCS_INDEXED(id, base, offset, 1,  1, 0, step)
-#define HW_REGISTER_0_INDEXED(id, base, offset, step)	\
-	HW_REGISTER_FUNCS_INDEXED(id, base, offset, 0,  1, 1, step)
-#else /* __ASSEMBLER__ */
-#define HW_REGISTER_FUNCS(id, base, offset, regset, rd, wr)
-#define HW_REGISTER_FUNCS_INDEXED(id, base, offset, regset, rd, wr, step)
-#define HW_REGISTER_WO(id, base, offset)
-#define HW_REGISTER_RO(id, base, offset)
-#define HW_REGISTER(id, base, offset)
-#define HW_REGISTER_0(id, base, offset)
-#define HW_REGISTER_INDEXED(id, base, offset, step)
-#define HW_REGISTER_RO_INDEXED(id, base, offset, step)
-#define HW_REGISTER_0_INDEXED(id, base, offset, step)
-#endif /* __ASSEMBLER__ */
-
-#endif /* __ASM_PLAT_STMP3XXX_REGS_H */
diff --git a/arch/arm/plat-stmp3xxx/include/mach/system.h b/arch/arm/plat-stmp3xxx/include/mach/system.h
index dac48d2..28a9888 100644
--- a/arch/arm/plat-stmp3xxx/include/mach/system.h
+++ b/arch/arm/plat-stmp3xxx/include/mach/system.h
@@ -17,6 +17,7 @@
 #define __ASM_ARCH_SYSTEM_H
 
 #include <asm/proc-fns.h>
+#include <mach/platform.h>
 #include <mach/regs-clkctrl.h>
 #include <mach/regs-power.h>
 
@@ -33,13 +34,14 @@ static inline void arch_idle(void)
 static inline void arch_reset(char mode, const char *cmd)
 {
 	/* Set BATTCHRG to default value */
-	HW_POWER_CHARGE_WR(0x00010000);
+	__raw_writel(0x00010000, REGS_POWER_BASE + HW_POWER_CHARGE);
 
 	/* Set MINPWR to default value   */
-	HW_POWER_MINPWR_WR(0);
+	__raw_writel(0, REGS_POWER_BASE + HW_POWER_MINPWR);
 
 	/* Reset digital side of chip (but not power or RTC) */
-	HW_CLKCTRL_RESET_WR(BM_CLKCTRL_RESET_DIG);
+	__raw_writel(BM_CLKCTRL_RESET_DIG,
+			REGS_CLKCTRL_BASE + HW_CLKCTRL_RESET);
 
 	/* Should not return */
 }
diff --git a/arch/arm/plat-stmp3xxx/irq.c b/arch/arm/plat-stmp3xxx/irq.c
index cb36590..20de4e0 100644
--- a/arch/arm/plat-stmp3xxx/irq.c
+++ b/arch/arm/plat-stmp3xxx/irq.c
@@ -22,21 +22,15 @@
 #include <linux/sysdev.h>
 
 #include <mach/stmp3xxx.h>
+#include <mach/platform.h>
 #include <mach/regs-icoll.h>
 
 void __init stmp3xxx_init_irq(struct irq_chip *chip)
 {
-	unsigned int i;
+	unsigned int i, lv;
 
 	/* Reset the interrupt controller */
-	HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_CLKGATE);
-	udelay(10);
-	HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_SFTRST);
-	udelay(10);
-	HW_ICOLL_CTRL_SET(BM_ICOLL_CTRL_SFTRST);
-	while (!(HW_ICOLL_CTRL_RD() & BM_ICOLL_CTRL_CLKGATE))
-		continue;
-	HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_SFTRST | BM_ICOLL_CTRL_CLKGATE);
+	stmp3xxx_reset_block(REGS_ICOLL_BASE + HW_ICOLL_CTRL, true);
 
 	/* Disable all interrupts initially */
 	for (i = 0; i < NR_REAL_IRQS; i++) {
@@ -47,13 +41,11 @@ void __init stmp3xxx_init_irq(struct irq_chip *chip)
 	}
 
 	/* Ensure vector is cleared */
-	HW_ICOLL_LEVELACK_WR(1);
-	HW_ICOLL_LEVELACK_WR(2);
-	HW_ICOLL_LEVELACK_WR(4);
-	HW_ICOLL_LEVELACK_WR(8);
+	for (lv = 0; lv < 4; lv++)
+		__raw_writel(1 << lv, REGS_ICOLL_BASE + HW_ICOLL_LEVELACK);
+	__raw_writel(0, REGS_ICOLL_BASE + HW_ICOLL_VECTOR);
 
-	HW_ICOLL_VECTOR_WR(0);
 	/* Barrier */
-	(void) HW_ICOLL_STAT_RD();
+	(void)__raw_readl(REGS_ICOLL_BASE + HW_ICOLL_STAT);
 }
 
diff --git a/arch/arm/plat-stmp3xxx/pinmux.c b/arch/arm/plat-stmp3xxx/pinmux.c
index 9b28cc8..d412003 100644
--- a/arch/arm/plat-stmp3xxx/pinmux.c
+++ b/arch/arm/plat-stmp3xxx/pinmux.c
@@ -15,6 +15,7 @@
  * http://www.opensource.org/licenses/gpl-license.html
  * http://www.gnu.org/copyleft/gpl.html
  */
+#define DEBUG
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -25,6 +26,7 @@
 #include <linux/irq.h>
 
 #include <mach/hardware.h>
+#include <mach/platform.h>
 #include <mach/regs-pinctrl.h>
 #include <mach/pins.h>
 #include <mach/pinmux.h>
@@ -33,97 +35,94 @@
 static struct stmp3xxx_pinmux_bank pinmux_banks[] = {
 	[0] = {
 		.hw_muxsel = {
-			HW_PINCTRL_MUXSEL0_ADDR,
-			HW_PINCTRL_MUXSEL1_ADDR
+			REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL0,
+			REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL1,
 		},
 		.hw_drive = {
-			HW_PINCTRL_DRIVE0_ADDR,
-			HW_PINCTRL_DRIVE1_ADDR,
-			HW_PINCTRL_DRIVE2_ADDR,
-			HW_PINCTRL_DRIVE3_ADDR
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE0,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE1,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE2,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE3,
 		},
-		.hw_pull = HW_PINCTRL_PULL0_ADDR,
+		.hw_pull = REGS_PINCTRL_BASE + HW_PINCTRL_PULL0,
 		.functions = { 0x0, 0x1, 0x2, 0x3 },
 		.strengths = { 0x0, 0x1, 0x2, 0x3, 0xff },
 
-		.hw_gpio_read = HW_PINCTRL_DIN0_ADDR,
-		.hw_gpio_set = HW_PINCTRL_DOUT0_ADDR + HW_STMP3xxx_SET,
-		.hw_gpio_clr = HW_PINCTRL_DOUT0_ADDR + HW_STMP3xxx_CLR,
-		.hw_gpio_doe = HW_PINCTRL_DOE0_ADDR,
+		.hw_gpio_in = REGS_PINCTRL_BASE + HW_PINCTRL_DIN0,
+		.hw_gpio_out = REGS_PINCTRL_BASE + HW_PINCTRL_DOUT0,
+		.hw_gpio_doe = REGS_PINCTRL_BASE + HW_PINCTRL_DOE0,
 		.irq = IRQ_GPIO0,
 
-		.pin2irq = HW_PINCTRL_PIN2IRQ0_ADDR,
-		.irqstat = HW_PINCTRL_IRQSTAT0_ADDR,
-		.irqlevel = HW_PINCTRL_IRQLEVEL0_ADDR,
-		.irqpolarity = HW_PINCTRL_IRQPOL0_ADDR,
-		.irqen = HW_PINCTRL_IRQEN0_ADDR,
+		.pin2irq = REGS_PINCTRL_BASE + HW_PINCTRL_PIN2IRQ0,
+		.irqstat = REGS_PINCTRL_BASE + HW_PINCTRL_IRQSTAT0,
+		.irqlevel = REGS_PINCTRL_BASE + HW_PINCTRL_IRQLEVEL0,
+		.irqpolarity = REGS_PINCTRL_BASE + HW_PINCTRL_IRQPOL0,
+		.irqen = REGS_PINCTRL_BASE + HW_PINCTRL_IRQEN0,
 	},
 	[1] = {
 		.hw_muxsel = {
-			HW_PINCTRL_MUXSEL2_ADDR,
-			HW_PINCTRL_MUXSEL3_ADDR
+			REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL2,
+			REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL3,
 		},
 		.hw_drive = {
-			HW_PINCTRL_DRIVE4_ADDR,
-			HW_PINCTRL_DRIVE5_ADDR,
-			HW_PINCTRL_DRIVE6_ADDR,
-			HW_PINCTRL_DRIVE7_ADDR
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE4,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE5,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE6,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE7,
 		},
-		.hw_pull = HW_PINCTRL_PULL1_ADDR,
+		.hw_pull = REGS_PINCTRL_BASE + HW_PINCTRL_PULL1,
 		.functions = { 0x0, 0x1, 0x2, 0x3 },
 		.strengths = { 0x0, 0x1, 0x2, 0x3, 0xff },
 
-		.hw_gpio_read = HW_PINCTRL_DIN1_ADDR,
-		.hw_gpio_set = HW_PINCTRL_DOUT1_ADDR + HW_STMP3xxx_SET,
-		.hw_gpio_clr = HW_PINCTRL_DOUT1_ADDR + HW_STMP3xxx_CLR,
-		.hw_gpio_doe = HW_PINCTRL_DOE1_ADDR,
+		.hw_gpio_in = REGS_PINCTRL_BASE + HW_PINCTRL_DIN1,
+		.hw_gpio_out = REGS_PINCTRL_BASE + HW_PINCTRL_DOUT1,
+		.hw_gpio_doe = REGS_PINCTRL_BASE + HW_PINCTRL_DOE1,
 		.irq = IRQ_GPIO1,
 
-		.pin2irq = HW_PINCTRL_PIN2IRQ1_ADDR,
-		.irqstat = HW_PINCTRL_IRQSTAT1_ADDR,
-		.irqlevel = HW_PINCTRL_IRQLEVEL1_ADDR,
-		.irqpolarity = HW_PINCTRL_IRQPOL1_ADDR,
-		.irqen = HW_PINCTRL_IRQEN1_ADDR,
+		.pin2irq = REGS_PINCTRL_BASE + HW_PINCTRL_PIN2IRQ1,
+		.irqstat = REGS_PINCTRL_BASE + HW_PINCTRL_IRQSTAT1,
+		.irqlevel = REGS_PINCTRL_BASE + HW_PINCTRL_IRQLEVEL1,
+		.irqpolarity = REGS_PINCTRL_BASE + HW_PINCTRL_IRQPOL1,
+		.irqen = REGS_PINCTRL_BASE + HW_PINCTRL_IRQEN1,
 	},
 	[2] = {
 	       .hw_muxsel = {
-			HW_PINCTRL_MUXSEL4_ADDR,
-			HW_PINCTRL_MUXSEL5_ADDR,
+			REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL4,
+			REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL5,
 		},
 		.hw_drive = {
-			HW_PINCTRL_DRIVE8_ADDR,
-			HW_PINCTRL_DRIVE9_ADDR,
-			HW_PINCTRL_DRIVE10_ADDR,
-			HW_PINCTRL_DRIVE11_ADDR,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE8,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE9,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE10,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE11,
 		},
-		.hw_pull = HW_PINCTRL_PULL2_ADDR,
+		.hw_pull = REGS_PINCTRL_BASE + HW_PINCTRL_PULL2,
 		.functions = { 0x0, 0x1, 0x2, 0x3 },
 		.strengths = { 0x0, 0x1, 0x2, 0x1, 0x2 },
 
-		.hw_gpio_read = HW_PINCTRL_DIN2_ADDR,
-		.hw_gpio_set = HW_PINCTRL_DOUT2_ADDR + HW_STMP3xxx_SET,
-		.hw_gpio_clr = HW_PINCTRL_DOUT2_ADDR + HW_STMP3xxx_CLR,
-		.hw_gpio_doe = HW_PINCTRL_DOE2_ADDR,
+		.hw_gpio_in = REGS_PINCTRL_BASE + HW_PINCTRL_DIN2,
+		.hw_gpio_out = REGS_PINCTRL_BASE + HW_PINCTRL_DOUT2,
+		.hw_gpio_doe = REGS_PINCTRL_BASE + HW_PINCTRL_DOE2,
 		.irq = IRQ_GPIO2,
 
-		.pin2irq = HW_PINCTRL_PIN2IRQ2_ADDR,
-		.irqstat = HW_PINCTRL_IRQSTAT2_ADDR,
-		.irqlevel = HW_PINCTRL_IRQLEVEL2_ADDR,
-		.irqpolarity = HW_PINCTRL_IRQPOL2_ADDR,
-		.irqen = HW_PINCTRL_IRQEN2_ADDR,
+		.pin2irq = REGS_PINCTRL_BASE + HW_PINCTRL_PIN2IRQ2,
+		.irqstat = REGS_PINCTRL_BASE + HW_PINCTRL_IRQSTAT2,
+		.irqlevel = REGS_PINCTRL_BASE + HW_PINCTRL_IRQLEVEL2,
+		.irqpolarity = REGS_PINCTRL_BASE + HW_PINCTRL_IRQPOL2,
+		.irqen = REGS_PINCTRL_BASE + HW_PINCTRL_IRQEN2,
 	},
 	[3] = {
 	       .hw_muxsel = {
-		       HW_PINCTRL_MUXSEL6_ADDR,
-		       HW_PINCTRL_MUXSEL7_ADDR,
+		       REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL6,
+		       REGS_PINCTRL_BASE + HW_PINCTRL_MUXSEL7,
 	       },
 	       .hw_drive = {
-			HW_PINCTRL_DRIVE12_ADDR,
-			HW_PINCTRL_DRIVE13_ADDR,
-			HW_PINCTRL_DRIVE14_ADDR,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE12,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE13,
+			REGS_PINCTRL_BASE + HW_PINCTRL_DRIVE14,
 			NULL,
 	       },
-	       .hw_pull = HW_PINCTRL_PULL3_ADDR,
+	       .hw_pull = REGS_PINCTRL_BASE + HW_PINCTRL_PULL3,
 	       .functions = {0x0, 0x1, 0x2, 0x3},
 	       .strengths = {0x0, 0x1, 0x2, 0x3, 0xff},
 	},
@@ -196,8 +195,8 @@ void stmp3xxx_pin_strength(unsigned id, enum pin_strength strength,
 
 	pr_debug("%s: writing 0x%x to 0x%p register\n", __func__,
 			val << shift, hwdrive);
-	__raw_writel(HW_DRIVE_PINDRV_MASK << shift, hwdrive + HW_STMP3xxx_CLR);
-	__raw_writel(val << shift, hwdrive + HW_STMP3xxx_SET);
+	stmp3xxx_clearl(HW_DRIVE_PINDRV_MASK << shift, hwdrive);
+	stmp3xxx_setl(val << shift, hwdrive);
 }
 
 void stmp3xxx_pin_voltage(unsigned id, enum pin_voltage voltage,
@@ -221,11 +220,9 @@ void stmp3xxx_pin_voltage(unsigned id, enum pin_voltage voltage,
 	pr_debug("%s: changing 0x%x bit in 0x%p register\n",
 			__func__, HW_DRIVE_PINV_MASK << shift, hwdrive);
 	if (voltage == PIN_1_8V)
-		__raw_writel(HW_DRIVE_PINV_MASK << shift,
-			     hwdrive + HW_STMP3xxx_CLR);
+		stmp3xxx_clearl(HW_DRIVE_PINV_MASK << shift, hwdrive);
 	else
-		__raw_writel(HW_DRIVE_PINV_MASK << shift,
-			     hwdrive + HW_STMP3xxx_SET);
+		stmp3xxx_setl(HW_DRIVE_PINV_MASK << shift, hwdrive);
 }
 
 void stmp3xxx_pin_pullup(unsigned id, int enable, const char *label)
@@ -245,8 +242,10 @@ void stmp3xxx_pin_pullup(unsigned id, int enable, const char *label)
 
 	pr_debug("%s: changing 0x%x bit in 0x%p register\n",
 			__func__, 1 << pin, hwpull);
-	__raw_writel(1 << pin,
-		     hwpull + (enable ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR));
+	if (enable)
+		stmp3xxx_setl(1 << pin, hwpull);
+	else
+		stmp3xxx_clearl(1 << pin, hwpull);
 }
 
 int stmp3xxx_request_pin(unsigned id, enum pin_fun fun, const char *label)
@@ -290,8 +289,8 @@ void stmp3xxx_set_pin_type(unsigned id, enum pin_fun fun)
 	shift = (pin % HW_MUXSEL_PIN_NUM) * HW_MUXSEL_PIN_LEN;
 	pr_debug("%s: writing 0x%x to 0x%p register\n",
 			__func__, val << shift, hwmux);
-	__raw_writel(HW_MUXSEL_PINFUN_MASK << shift, hwmux + HW_STMP3xxx_CLR);
-	__raw_writel(val << shift, hwmux + HW_STMP3xxx_SET);
+	stmp3xxx_clearl(HW_MUXSEL_PINFUN_MASK << shift, hwmux);
+	stmp3xxx_setl(val << shift, hwmux);
 }
 
 void stmp3xxx_release_pin(unsigned id, const char *label)
@@ -388,10 +387,15 @@ static int stmp3xxx_set_irqtype(unsigned irq, unsigned type)
 				__func__, type);
 		return -ENXIO;
 	}
-	__raw_writel(1 << gpio,
-		pm->irqlevel + (l ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR));
-	__raw_writel(1 << gpio,
-		pm->irqpolarity + (p ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR));
+
+	if (l)
+		stmp3xxx_setl(1 << gpio, pm->irqlevel);
+	else
+		stmp3xxx_clearl(1 << gpio, pm->irqlevel);
+	if (p)
+		stmp3xxx_setl(1 << gpio, pm->irqpolarity);
+	else
+		stmp3xxx_clearl(1 << gpio, pm->irqpolarity);
 	return 0;
 }
 
@@ -402,8 +406,8 @@ static void stmp3xxx_pin_ack_irq(unsigned irq)
 	unsigned gpio;
 
 	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
-	stat = __raw_readl(pm->irqstat) & (1<<gpio);
-	__raw_writel(stat, pm->irqstat + HW_STMP3xxx_CLR);
+	stat = __raw_readl(pm->irqstat) & (1 << gpio);
+	stmp3xxx_clearl(stat, pm->irqstat);
 }
 
 static void stmp3xxx_pin_mask_irq(unsigned irq)
@@ -412,8 +416,8 @@ static void stmp3xxx_pin_mask_irq(unsigned irq)
 	unsigned gpio;
 
 	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
-	__raw_writel(1 << gpio, pm->irqen + HW_STMP3xxx_CLR);
-	__raw_writel(1 << gpio, pm->pin2irq + HW_STMP3xxx_CLR);
+	stmp3xxx_clearl(1 << gpio, pm->irqen);
+	stmp3xxx_clearl(1 << gpio, pm->pin2irq);
 }
 
 static void stmp3xxx_pin_unmask_irq(unsigned irq)
@@ -422,8 +426,8 @@ static void stmp3xxx_pin_unmask_irq(unsigned irq)
 	unsigned gpio;
 
 	stmp3xxx_irq_to_gpio(irq, &pm, &gpio);
-	__raw_writel(1 << gpio, pm->irqen + HW_STMP3xxx_SET);
-	__raw_writel(1 << gpio, pm->pin2irq + HW_STMP3xxx_SET);
+	stmp3xxx_setl(1 << gpio, pm->irqen);
+	stmp3xxx_setl(1 << gpio, pm->pin2irq);
 }
 
 static inline
@@ -443,7 +447,7 @@ static int stmp3xxx_gpio_get(struct gpio_chip *chip, unsigned offset)
 	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
 	unsigned v;
 
-	v = __raw_readl(pm->hw_gpio_read) & (1 << offset);
+	v = __raw_readl(pm->hw_gpio_in) & (1 << offset);
 	return v ? 1 : 0;
 }
 
@@ -451,14 +455,17 @@ static void stmp3xxx_gpio_set(struct gpio_chip *chip, unsigned offset, int v)
 {
 	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
 
-	__raw_writel(1 << offset, v ? pm->hw_gpio_set : pm->hw_gpio_clr);
+	if (v)
+		stmp3xxx_setl(1 << offset, pm->hw_gpio_out);
+	else
+		stmp3xxx_clearl(1 << offset, pm->hw_gpio_out);
 }
 
 static int stmp3xxx_gpio_output(struct gpio_chip *chip, unsigned offset, int v)
 {
 	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
 
-	__raw_writel(1 << offset, pm->hw_gpio_doe + HW_STMP3xxx_SET);
+	stmp3xxx_setl(1 << offset, pm->hw_gpio_doe);
 	stmp3xxx_gpio_set(chip, offset, v);
 	return 0;
 }
@@ -467,7 +474,7 @@ static int stmp3xxx_gpio_input(struct gpio_chip *chip, unsigned offset)
 {
 	struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip);
 
-	__raw_writel(1 << offset, pm->hw_gpio_doe + HW_STMP3xxx_CLR);
+	stmp3xxx_clearl(1 << offset, pm->hw_gpio_doe);
 	return 0;
 }
 
diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c
index 7d872f0..063c7bc 100644
--- a/arch/arm/plat-stmp3xxx/timer.c
+++ b/arch/arm/plat-stmp3xxx/timer.c
@@ -26,6 +26,7 @@
 
 #include <asm/mach/time.h>
 #include <mach/stmp3xxx.h>
+#include <mach/platform.h>
 #include <mach/regs-timrot.h>
 
 static irqreturn_t
@@ -33,13 +34,22 @@ stmp3xxx_timer_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *c = dev_id;
 
-	if (HW_TIMROT_TIMCTRLn_RD(0) & (1<<15)) {
-		HW_TIMROT_TIMCTRLn_CLR(0, (1<<15));
+	/* timer 0 */
+	if (__raw_readl(REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL0) &
+			BM_TIMROT_TIMCTRLn_IRQ) {
+		stmp3xxx_clearl(BM_TIMROT_TIMCTRLn_IRQ,
+				REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL0);
 		c->event_handler(c);
-	} else if (HW_TIMROT_TIMCTRLn_RD(1) & (1<<15)) {
-		HW_TIMROT_TIMCTRLn_CLR(1, (1<<15));
-		HW_TIMROT_TIMCTRLn_CLR(1, BM_TIMROT_TIMCTRLn_IRQ_EN);
-		HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF);
+	}
+
+	/* timer 1 */
+	else if (__raw_readl(REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL1)
+			& BM_TIMROT_TIMCTRLn_IRQ) {
+		stmp3xxx_clearl(BM_TIMROT_TIMCTRLn_IRQ,
+				REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL1);
+		stmp3xxx_clearl(BM_TIMROT_TIMCTRLn_IRQ_EN,
+				REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL1);
+		__raw_writel(0xFFFF, REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT1);
 	}
 
 	return IRQ_HANDLED;
@@ -47,14 +57,16 @@ stmp3xxx_timer_interrupt(int irq, void *dev_id)
 
 static cycle_t stmp3xxx_clock_read(struct clocksource *cs)
 {
-	return ~((HW_TIMROT_TIMCOUNTn_RD(1) & 0xFFFF0000) >> 16);
+	return ~((__raw_readl(REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT1)
+				& 0xFFFF0000) >> 16);
 }
 
 static int
 stmp3xxx_timrot_set_next_event(unsigned long delta,
 		struct clock_event_device *dev)
 {
-	HW_TIMROT_TIMCOUNTn_WR(0, delta); /* reload */
+	/* reload the timer */
+	__raw_writel(delta, REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT0);
 	return 0;
 }
 
@@ -102,25 +114,29 @@ static void __init stmp3xxx_init_timer(void)
 	ckevt_timrot.max_delta_ns = clockevent_delta2ns(0xFFF, &ckevt_timrot);
 	ckevt_timrot.cpumask = cpumask_of(0);
 
-	HW_TIMROT_ROTCTRL_CLR(BM_TIMROT_ROTCTRL_SFTRST |
-				BM_TIMROT_ROTCTRL_CLKGATE);
-	HW_TIMROT_TIMCOUNTn_WR(0, 0);
-	HW_TIMROT_TIMCOUNTn_WR(1, 0);
-
-	HW_TIMROT_TIMCTRLn_WR(0,
-			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
-			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
-			       BM_TIMROT_TIMCTRLn_RELOAD |
-			       BM_TIMROT_TIMCTRLn_UPDATE |
-			       BM_TIMROT_TIMCTRLn_IRQ_EN));
-	HW_TIMROT_TIMCTRLn_WR(1,
-			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
-			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
-			       BM_TIMROT_TIMCTRLn_RELOAD |
-			       BM_TIMROT_TIMCTRLn_UPDATE));
-
-	HW_TIMROT_TIMCOUNTn_WR(0, CLOCK_TICK_RATE / HZ - 1);
-	HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); /* reload */
+	stmp3xxx_reset_block(REGS_TIMROT_BASE, false);
+
+	/* clear two timers */
+	__raw_writel(0, REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT0);
+	__raw_writel(0, REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT1);
+
+	/* configure them */
+	__raw_writel(
+		(8 << BP_TIMROT_TIMCTRLn_SELECT) |  /* 32 kHz */
+		BM_TIMROT_TIMCTRLn_RELOAD |
+		BM_TIMROT_TIMCTRLn_UPDATE |
+		BM_TIMROT_TIMCTRLn_IRQ_EN,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL0);
+	__raw_writel(
+		(8 << BP_TIMROT_TIMCTRLn_SELECT) |  /* 32 kHz */
+		BM_TIMROT_TIMCTRLn_RELOAD |
+		BM_TIMROT_TIMCTRLn_UPDATE |
+		BM_TIMROT_TIMCTRLn_IRQ_EN,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL1);
+
+	__raw_writel(CLOCK_TICK_RATE / HZ - 1,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT0);
+	__raw_writel(0xFFFF, REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT1);
 
 	setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
 
@@ -132,30 +148,31 @@ static void __init stmp3xxx_init_timer(void)
 
 void stmp3xxx_suspend_timer(void)
 {
-	HW_TIMROT_TIMCTRLn_CLR(0, BM_TIMROT_TIMCTRLn_IRQ_EN);
-	HW_TIMROT_TIMCTRLn_CLR(0, (1<<15));
-	HW_TIMROT_ROTCTRL_SET(BM_TIMROT_ROTCTRL_CLKGATE);
+	stmp3xxx_clearl(BM_TIMROT_TIMCTRLn_IRQ_EN | BM_TIMROT_TIMCTRLn_IRQ,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL0);
+	stmp3xxx_setl(BM_TIMROT_ROTCTRL_CLKGATE,
+			REGS_TIMROT_BASE + HW_TIMROT_ROTCTRL);
 }
 
 void stmp3xxx_resume_timer(void)
 {
-	HW_TIMROT_ROTCTRL_CLR(BM_TIMROT_ROTCTRL_SFTRST |
-				BM_TIMROT_ROTCTRL_CLKGATE);
-
-
-	HW_TIMROT_TIMCTRLn_WR(0,
-			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
-			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
-			       BM_TIMROT_TIMCTRLn_UPDATE |
-			       BM_TIMROT_TIMCTRLn_IRQ_EN));
-	HW_TIMROT_TIMCTRLn_WR(1,
-			      (BF_TIMROT_TIMCTRLn_SELECT(8) |  /* 32 kHz */
-			       BF_TIMROT_TIMCTRLn_PRESCALE(0) |
-			       BM_TIMROT_TIMCTRLn_RELOAD |
-			       BM_TIMROT_TIMCTRLn_UPDATE));
-
-	HW_TIMROT_TIMCOUNTn_WR(0, CLOCK_TICK_RATE / HZ - 1);
-	HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); /* reload */
+	stmp3xxx_clearl(BM_TIMROT_ROTCTRL_SFTRST | BM_TIMROT_ROTCTRL_CLKGATE,
+			REGS_TIMROT_BASE + HW_TIMROT_ROTCTRL);
+	__raw_writel(
+		8 << BP_TIMROT_TIMCTRLn_SELECT |  /* 32 kHz */
+		BM_TIMROT_TIMCTRLn_RELOAD |
+		BM_TIMROT_TIMCTRLn_UPDATE |
+		BM_TIMROT_TIMCTRLn_IRQ_EN,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL0);
+	__raw_writel(
+		8 << BP_TIMROT_TIMCTRLn_SELECT |  /* 32 kHz */
+		BM_TIMROT_TIMCTRLn_RELOAD |
+		BM_TIMROT_TIMCTRLn_UPDATE |
+		BM_TIMROT_TIMCTRLn_IRQ_EN,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCTRL1);
+	__raw_writel(CLOCK_TICK_RATE / HZ - 1,
+			REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT0);
+	__raw_writel(0xFFFF, REGS_TIMROT_BASE + HW_TIMROT_TIMCOUNT1);
 }
 
 #else
-- 
cgit v0.10.2


From d315a0e09f1c8b833cacd5e72f3edea419978138 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 31 May 2009 23:09:22 +1000
Subject: crypto: hash - Fix handling of sg entry that crosses page boundary

A quirk that we've always supported is having an sg entry that's
bigger than a page, or more generally an sg entry that crosses
page boundaries.  Even though it would be better to explicitly have
to sg entries for this, we need to support it for the existing users,
in particular, IPsec.

The new ahash sg walking code did try to handle this, but there was
a bug where we didn't increment the page so kept on walking on the
first page over an dover again.

This patch fixes it.

Tested-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/ahash.c b/crypto/ahash.c
index b2d1ee3..f347637 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -82,10 +82,11 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err)
 	if (err)
 		return err;
 
-	walk->offset = 0;
-
-	if (nbytes)
+	if (nbytes) {
+		walk->offset = 0;
+		walk->pg++;
 		return hash_walk_next(walk);
+	}
 
 	if (!walk->total)
 		return 0;
-- 
cgit v0.10.2


From 5a9d25150c01bd140ca647b5e7ee75ae18a369a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Va=C5=A1ut?= <marek.vasut@gmail.com>
Date: Thu, 21 May 2009 13:11:05 +0100
Subject: [ARM] 5522/1: PalmLD: IDE support

Support for Palm LifeDrive's internal harddrive.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-pxa/include/mach/palmld.h b/arch/arm/mach-pxa/include/mach/palmld.h
index fb13c82..8721b80 100644
--- a/arch/arm/mach-pxa/include/mach/palmld.h
+++ b/arch/arm/mach-pxa/include/mach/palmld.h
@@ -56,7 +56,6 @@
 #define GPIO_NR_PALMLD_LED_AMBER	94
 
 /* IDE */
-#define GPIO_NR_PALMLD_IDE_IRQ		95
 #define GPIO_NR_PALMLD_IDE_RESET	98
 #define GPIO_NR_PALMLD_IDE_PWEN		115
 
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 1cec180..24a967e 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -127,7 +127,7 @@ static unsigned long palmld_pin_config[] __initdata = {
 	GPIO81_GPIO,	/* wifi reset */
 
 	/* HDD */
-	GPIO95_GPIO,	/* HDD irq */
+	GPIO98_GPIO,	/* HDD reset */
 	GPIO115_GPIO,	/* HDD power */
 
 	/* MISC */
@@ -494,6 +494,14 @@ static struct platform_device palmld_asoc = {
 };
 
 /******************************************************************************
+ * HDD
+ ******************************************************************************/
+static struct platform_device palmld_hdd = {
+	.name	= "pata_palmld",
+	.id	= -1,
+};
+
+/******************************************************************************
  * Framebuffer
  ******************************************************************************/
 static struct pxafb_mode_info palmld_lcd_modes[] = {
@@ -557,6 +565,7 @@ static struct platform_device *devices[] __initdata = {
 	&palmld_leds,
 	&power_supply,
 	&palmld_asoc,
+	&palmld_hdd,
 };
 
 static struct map_desc palmld_io_desc[] __initdata = {
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 9120717..2aa1908 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -535,6 +535,15 @@ config PATA_OPTIDMA
 
 	  If unsure, say N.
 
+config PATA_PALMLD
+	tristate "Palm LifeDrive PATA support"
+	depends on MACH_PALMLD
+	help
+	  This option enables support for Palm LifeDrive's internal ATA
+	  port via the new ATA layer.
+
+	  If unsure, say N.
+
 config PATA_PCMCIA
 	tristate "PCMCIA PATA support"
 	depends on PCMCIA
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 7f1ecf9..1558059 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_PATA_MPC52xx)	+= pata_mpc52xx.o
 obj-$(CONFIG_PATA_MARVELL)	+= pata_marvell.o
 obj-$(CONFIG_PATA_MPIIX)	+= pata_mpiix.o
 obj-$(CONFIG_PATA_OLDPIIX)	+= pata_oldpiix.o
+obj-$(CONFIG_PATA_PALMLD)	+= pata_palmld.o
 obj-$(CONFIG_PATA_PCMCIA)	+= pata_pcmcia.o
 obj-$(CONFIG_PATA_PDC2027X)	+= pata_pdc2027x.o
 obj-$(CONFIG_PATA_PDC_OLD)	+= pata_pdc202xx_old.o
diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c
new file mode 100644
index 0000000..11fb4cc
--- /dev/null
+++ b/drivers/ata/pata_palmld.c
@@ -0,0 +1,150 @@
+/*
+ * drivers/ata/pata_palmld.c
+ *
+ * Driver for IDE channel in Palm LifeDrive
+ *
+ * Based on research of:
+ *		Alex Osborne <ato@meshy.org>
+ *
+ * Rewrite for mainline:
+ *		Marek Vasut <marek.vasut@gmail.com>
+ *
+ * Rewritten version based on pata_ixp4xx_cf.c:
+ * ixp4xx PATA/Compact Flash driver
+ * Copyright (C) 2006-07 Tower Technologies
+ * Author: Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/libata.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+
+#include <scsi/scsi_host.h>
+#include <mach/palmld.h>
+
+#define DRV_NAME "pata_palmld"
+
+static struct scsi_host_template palmld_sht = {
+	ATA_PIO_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations palmld_port_ops = {
+	.inherits		= &ata_sff_port_ops,
+	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.cable_detect		= ata_cable_40wire,
+};
+
+static __devinit int palmld_pata_probe(struct platform_device *pdev)
+{
+	struct ata_host *host;
+	struct ata_port *ap;
+	void __iomem *mem;
+	int ret;
+
+	/* allocate host */
+	host = ata_host_alloc(&pdev->dev, 1);
+	if (!host)
+		return -ENOMEM;
+
+	/* remap drive's physical memory address */
+	mem = devm_ioremap(&pdev->dev, PALMLD_IDE_PHYS, 0x1000);
+	if (!mem)
+		return -ENOMEM;
+
+	/* request and activate power GPIO, IRQ GPIO */
+	ret = gpio_request(GPIO_NR_PALMLD_IDE_PWEN, "HDD PWR");
+	if (ret)
+		goto err1;
+	ret = gpio_direction_output(GPIO_NR_PALMLD_IDE_PWEN, 1);
+	if (ret)
+		goto err2;
+
+	ret = gpio_request(GPIO_NR_PALMLD_IDE_RESET, "HDD RST");
+	if (ret)
+		goto err2;
+	ret = gpio_direction_output(GPIO_NR_PALMLD_IDE_RESET, 0);
+	if (ret)
+		goto err3;
+
+	/* reset the drive */
+	gpio_set_value(GPIO_NR_PALMLD_IDE_RESET, 0);
+	msleep(30);
+	gpio_set_value(GPIO_NR_PALMLD_IDE_RESET, 1);
+	msleep(30);
+
+	/* setup the ata port */
+	ap = host->ports[0];
+	ap->ops	= &palmld_port_ops;
+	ap->pio_mask = ATA_PIO4;
+	ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY | ATA_FLAG_PIO_POLLING;
+
+	/* memory mapping voodoo */
+	ap->ioaddr.cmd_addr = mem + 0x10;
+	ap->ioaddr.altstatus_addr = mem + 0xe;
+	ap->ioaddr.ctl_addr = mem + 0xe;
+
+	/* start the port */
+	ata_sff_std_ports(&ap->ioaddr);
+
+	/* activate host */
+	return ata_host_activate(host, 0, NULL, IRQF_TRIGGER_RISING,
+					&palmld_sht);
+
+err3:
+	gpio_free(GPIO_NR_PALMLD_IDE_RESET);
+err2:
+	gpio_free(GPIO_NR_PALMLD_IDE_PWEN);
+err1:
+	return ret;
+}
+
+static __devexit int palmld_pata_remove(struct platform_device *dev)
+{
+	struct ata_host *host = platform_get_drvdata(dev);
+
+	ata_host_detach(host);
+
+	/* power down the HDD */
+	gpio_set_value(GPIO_NR_PALMLD_IDE_PWEN, 0);
+
+	gpio_free(GPIO_NR_PALMLD_IDE_RESET);
+	gpio_free(GPIO_NR_PALMLD_IDE_PWEN);
+
+	return 0;
+}
+
+static struct platform_driver palmld_pata_platform_driver = {
+	.driver	 = {
+		.name   = DRV_NAME,
+		.owner  = THIS_MODULE,
+	},
+	.probe		= palmld_pata_probe,
+	.remove		= __devexit_p(palmld_pata_remove),
+};
+
+static int __init palmld_pata_init(void)
+{
+	return platform_driver_register(&palmld_pata_platform_driver);
+}
+
+static void __exit palmld_pata_exit(void)
+{
+	platform_driver_unregister(&palmld_pata_platform_driver);
+}
+
+MODULE_AUTHOR("Marek Vasut <marek.vasut@gmail.com>");
+MODULE_DESCRIPTION("PalmLD PATA driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
+
+module_init(palmld_pata_init);
+module_exit(palmld_pata_exit);
-- 
cgit v0.10.2


From 4dd9e742df98f8f600b4302d3adbb087a68237f7 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Tue, 5 May 2009 05:54:13 +0100
Subject: [ARM] 5505/1: serial amba-pl011: move to arch_initcall for earlier
 console

Signed-off-by: Alessandro Rubini <rubini@unipv.it>"
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 88fdac5..4cfa1eb 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -845,7 +845,11 @@ static void __exit pl011_exit(void)
 	uart_unregister_driver(&amba_reg);
 }
 
-module_init(pl011_init);
+/*
+ * While this can be a module, if builtin it's most likely the console
+ * So let's leave module_exit but move module_init to an earlier place
+ */
+arch_initcall(pl011_init);
 module_exit(pl011_exit);
 
 MODULE_AUTHOR("ARM Ltd/Deep Blue Solutions Ltd");
-- 
cgit v0.10.2


From ed31b2dfad01d23446860c983ff94460d7971c48 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 31 May 2009 15:08:11 +0100
Subject: [ARM] make U300 clk_set_rate() return a _real_ errno

Another stupid instance of "return -1"-ism.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c
index d6f8232..5cd04d6 100644
--- a/arch/arm/mach-u300/clock.c
+++ b/arch/arm/mach-u300/clock.c
@@ -682,7 +682,7 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 	} else {
 		printk(KERN_ERR "clock: Failed to set %s to %ld hz\n",
 		       clk->name, rate);
-		return -1;
+		return -EINVAL;
 	}
 }
 EXPORT_SYMBOL(clk_set_rate);
-- 
cgit v0.10.2


From a22f277bba321474a01691ae66d5952926459f44 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 31 May 2009 15:02:58 +0100
Subject: [ARM] Kconfig: remove 'default n'

Kconfig entries default to n, so there's no need for this to be
explicitly specified.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 65bf774..6577883 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -34,15 +34,12 @@ config SYS_SUPPORTS_APM_EMULATION
 
 config GENERIC_GPIO
 	bool
-	default n
 
 config GENERIC_TIME
 	bool
-	default n
 
 config GENERIC_CLOCKEVENTS
 	bool
-	default n
 
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
@@ -55,7 +52,6 @@ config MMU
 
 config NO_IOPORT
 	bool
-	default n
 
 config EISA
 	bool
@@ -126,11 +122,9 @@ config RWSEM_XCHGADD_ALGORITHM
 
 config ARCH_HAS_ILOG2_U32
 	bool
-	default n
 
 config ARCH_HAS_ILOG2_U64
 	bool
-	default n
 
 config GENERIC_HWEIGHT
 	bool
@@ -969,7 +963,6 @@ config OABI_COMPAT
 
 config ARCH_HAS_HOLES_MEMORYMODEL
 	bool
-	default n
 
 # Discontigmem is deprecated
 config ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 7640867..be747f5 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -34,7 +34,6 @@ config MACH_DAVINCI_EVM
 
 config MACH_SFFSDR
 	bool "Lyrtech SFFSDR"
-	default n
 	depends on ARCH_DAVINCI_DM644x
 	help
 	  Say Y here to select the Lyrtech Small Form Factor
@@ -42,7 +41,6 @@ config MACH_SFFSDR
 
 config MACH_DAVINCI_DM355_EVM
 	bool "TI DM355 EVM"
-	default n
 	depends on ARCH_DAVINCI_DM355
 	help
 	  Configure this option to specify the whether the board used
@@ -50,7 +48,6 @@ config MACH_DAVINCI_DM355_EVM
 
 config MACH_DM355_LEOPARD
 	bool "DM355 Leopard board"
-	default n
 	depends on ARCH_DAVINCI_DM355
 	help
 	  Configure this option to specify the whether the board used
@@ -58,7 +55,6 @@ config MACH_DM355_LEOPARD
 
 config MACH_DAVINCI_DM6467_EVM
 	bool "TI DM6467 EVM"
-	default n
 	depends on ARCH_DAVINCI_DM646x
 	help
 	  Configure this option to specify the whether the board used
diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index 32e4515..229fb3e 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -39,7 +39,6 @@ config MACH_PCM037
 config MACH_MX31LITE
 	bool "Support MX31 LITEKIT (LogicPD)"
 	select ARCH_MX31
-	default n
 	help
 	  Include support for MX31 LITEKIT platform. This includes specific
 	  configurations for the board and its peripherals.
@@ -47,7 +46,6 @@ config MACH_MX31LITE
 config MACH_MX31_3DS
 	bool "Support MX31PDK (3DS)"
 	select ARCH_MX31
-	default n
 	help
 	  Include support for MX31PDK (3DS) platform. This includes specific
 	  configurations for the board and its peripherals.
@@ -55,7 +53,6 @@ config MACH_MX31_3DS
 config MACH_MX31MOBOARD
 	bool "Support mx31moboard platforms (EPFL Mobots group)"
 	select ARCH_MX31
-	default n
 	help
 	  Include support for mx31moboard platform. This includes specific
 	  configurations for the board and its peripherals.
@@ -63,7 +60,6 @@ config MACH_MX31MOBOARD
 config MACH_QONG
 	bool "Support Dave/DENX QongEVB-LITE platform"
 	select ARCH_MX31
-	default n
 	help
 	  Include support for Dave/DENX QongEVB-LITE platform. This includes
 	  specific configurations for the board and its peripherals.
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index b6ec106..bf35cfd 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -24,7 +24,6 @@ config REALVIEW_EB_ARM11MP
 config REALVIEW_EB_ARM11MP_REVB
 	bool "Support ARM11MPCore RevB tile"
 	depends on REALVIEW_EB_ARM11MP
-	default n
 	help
 	  Enable support for the ARM11MPCore RevB tile on the Realview
 	  platform. Since there are device address differences, a
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 2097956..b9bd481 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -642,7 +642,6 @@ config CPU_BIG_ENDIAN
 config CPU_HIGH_VECTOR
 	depends on !MMU && CPU_CP15 && !CPU_ARM740T
 	bool "Select the High exception vector"
-	default n
 	help
 	  Say Y here to select high exception vector(0xFFFF0000~).
 	  The exception vector can be vary depending on the platform
@@ -726,7 +725,6 @@ config NEEDS_SYSCALL_FOR_CMPXCHG
 
 config OUTER_CACHE
 	bool
-	default n
 
 config CACHE_FEROCEON_L2
 	bool "Enable the Feroceon L2 cache controller"
@@ -739,7 +737,6 @@ config CACHE_FEROCEON_L2
 config CACHE_FEROCEON_L2_WRITETHROUGH
 	bool "Force Feroceon L2 cache write through"
 	depends on CACHE_FEROCEON_L2
-	default n
 	help
 	  Say Y here to use the Feroceon L2 cache in writethrough mode.
 	  Unless you specifically require this, say N for writeback mode.
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 9a6ecc7..efe85d0 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -45,7 +45,6 @@ config OMAP_DEBUG_LEDS
 config OMAP_DEBUG_POWERDOMAIN
 	bool "Emit debug messages from powerdomain layer"
 	depends on ARCH_OMAP2 || ARCH_OMAP3
-	default n
 	help
 	  Say Y here if you want to compile in powerdomain layer
 	  debugging messages for OMAP2/3.   These messages can
@@ -57,7 +56,6 @@ config OMAP_DEBUG_POWERDOMAIN
 config OMAP_DEBUG_CLOCKDOMAIN
 	bool "Emit debug messages from clockdomain layer"
 	depends on ARCH_OMAP2 || ARCH_OMAP3
-	default n
 	help
 	  Say Y here if you want to compile in clockdomain layer
 	  debugging messages for OMAP2/3.   These messages can
@@ -115,7 +113,6 @@ config OMAP_MCBSP
 config OMAP_MBOX_FWK
 	tristate "Mailbox framework support"
 	depends on ARCH_OMAP
-	default n
 	help
 	  Say Y here if you want to use OMAP Mailbox framework support for
 	  DSP, IVA1.0 and IVA2 in OMAP1/2/3.
-- 
cgit v0.10.2


From 27b9613b7be39412775d0ab80f57229aa73bb07d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 31 May 2009 22:09:49 +0200
Subject: perf_counter tools: Fix unknown command help text

Arjan reported this error when entering an unknown command to perf:

  $ perf start
  fatal: Uh oh. Your system reports no Git commands at all.

The Git code expects there to be perf-* commands - but since Perf
is a 'pure' utility with no dash commands anymore, this old assumption
of Git does not hold anymore. Remove that error check.

Reported-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c
index edde541..397487f 100644
--- a/Documentation/perf_counter/util/help.c
+++ b/Documentation/perf_counter/util/help.c
@@ -323,9 +323,6 @@ const char *help_unknown_cmd(const char *cmd)
 	qsort(main_cmds.names, main_cmds.cnt,
 	      sizeof(*main_cmds.names), levenshtein_compare);
 
-	if (!main_cmds.cnt)
-		die ("Uh oh. Your system reports no Git commands at all.");
-
 	best_similarity = main_cmds.names[0]->len;
 	n = 1;
 	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
-- 
cgit v0.10.2


From 52bb25a620e1925bb53d41d0ed28571b3de98a31 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 1 Jun 2009 06:21:13 +0000
Subject: headers_check fix: linux/auto_fs.h

fix the following 'make headers_check' warnings:

  usr/include/linux/auto_fs.h:17: include of <linux/types.h> is preferred over <asm/types.h>

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>

diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h
index 6326585..7b09c83 100644
--- a/include/linux/auto_fs.h
+++ b/include/linux/auto_fs.h
@@ -14,13 +14,12 @@
 #ifndef _LINUX_AUTO_FS_H
 #define _LINUX_AUTO_FS_H
 
+#include <linux/types.h>
 #ifdef __KERNEL__
 #include <linux/fs.h>
 #include <linux/limits.h>
-#include <linux/types.h>
 #include <linux/ioctl.h>
 #else
-#include <asm/types.h>
 #include <sys/ioctl.h>
 #endif /* __KERNEL__ */
 
-- 
cgit v0.10.2


From d280cc989ad591607e812cd5c5dfde702b5f191a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 1 Jun 2009 06:23:25 +0000
Subject: headers_check fix: linux/net_dropmon.h

fix the following 'make headers_check' warnings:

  usr/include/linux/net_dropmon.h:7: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>

diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h
index 0217fb8..0e2e100 100644
--- a/include/linux/net_dropmon.h
+++ b/include/linux/net_dropmon.h
@@ -1,6 +1,7 @@
 #ifndef __NET_DROPMON_H
 #define __NET_DROPMON_H
 
+#include <linux/types.h>
 #include <linux/netlink.h>
 
 struct net_dm_drop_point {
-- 
cgit v0.10.2


From eb9b9b56eed280e65a9e194aaeb50a5a75111859 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 11:51:51 +0000
Subject: sh: boot word / mode pin support V2

Add mode pin support for the SuperH architecture V2.

With this patch applied the board code can add their
own function to export the cpu mode pin configuration.
In most cases this will be a constant bitmap, but
boards that allow reading this from a register can
instead read out the pin state from hardware.

The code warns if a pin is tested but no board specific
mode pin function has been provided.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index 73d6d16..84dd377 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -48,6 +48,7 @@ struct sh_machine_vector {
 	void (*mv_ioport_unmap)(void __iomem *);
 
 	int (*mv_clk_init)(void);
+	int (*mv_mode_pins)(void);
 };
 
 extern struct sh_machine_vector sh_mv;
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 005c962..fb67482 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -94,6 +94,10 @@ extern struct pt_regs fake_swapper_regs;
 const char *get_cpu_subtype(struct sh_cpuinfo *c);
 extern const struct seq_operations cpuinfo_op;
 
+/* processor boot mode configuration */
+int generic_mode_pins(void);
+int test_mode_pin(int pin);
+
 #ifdef CONFIG_VSYSCALL
 int vsyscall_init(void);
 #else
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index c1ea41e..548f660 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -129,6 +129,7 @@ void __init sh_mv_setup(void)
 	mv_set(ioport_map);
 	mv_set(ioport_unmap);
 	mv_set(irq_demux);
+	mv_set(mode_pins);
 
 	if (!sh_mv.mv_nr_irqs)
 		sh_mv.mv_nr_irqs = NR_IRQS;
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 00086b2..050131e 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -420,6 +420,18 @@ void __init setup_arch(char **cmdline_p)
 #endif
 }
 
+/* processor boot mode configuration */
+int generic_mode_pins(void)
+{
+	pr_warning("generic_mode_pins(): missing mode pin configuration\n");
+	return 0;
+}
+
+int test_mode_pin(int pin)
+{
+	return sh_mv.mv_mode_pins() & (1 << pin);
+}
+
 static const char *cpu_name[] = {
 	[CPU_SH7201]	= "SH7201",
 	[CPU_SH7203]	= "SH7203",	[CPU_SH7263]	= "SH7263",
-- 
cgit v0.10.2


From 4a44b32969bfc29140b9f21a1ec924c796d6ac43 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 1 Jun 2009 15:56:00 +0900
Subject: sh: sh7785 mode pin definitions

This patch adds sh7785 mode pin definitions. Mode pins and
pin function controller comments are added as well.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/cpu-sh4/cpu/sh7785.h b/arch/sh/include/cpu-sh4/cpu/sh7785.h
index e4006af..89afaa6 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7785.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7785.h
@@ -1,6 +1,30 @@
 #ifndef __ASM_SH7785_H__
 #define __ASM_SH7785_H__
 
+/* Boot Mode Pins, more information in sh7785 manual Rev.1.00, page 1628 */
+enum {
+	MODE_PIN_MODE0,  /* CPG - Initial Pck/Bck Frequency [FRQMR1] */
+	MODE_PIN_MODE1,  /* CPG - Initial Uck/SHck/DDRck Frequency [FRQMR1] */
+	MODE_PIN_MODE2,  /* CPG - Reserved (L: Normal operation) */
+	MODE_PIN_MODE3,  /* CPG - Reserved (L: Normal operation) */
+	MODE_PIN_MODE4,  /* CPG - Initial PLL setting (72x/36x) */
+	MODE_PIN_MODE5,  /* LBSC - Area0 Memory Type / Bus Width [CS0BCR.8] */
+	MODE_PIN_MODE6,  /* LBSC - Area0 Memory Type / Bus Width [CS0BCR.9] */
+	MODE_PIN_MODE7,  /* LBSC - Area0 Memory Type / Bus Width [CS0BCR.3] */
+	MODE_PIN_MODE8,  /* LBSC - Endian Mode (L: Big, H: Little) [BCR.31] */
+	MODE_PIN_MODE9,  /* LBSC - Master/Slave Mode (L: Slave) [BCR.30] */
+	MODE_PIN_MODE10, /* CPG - Clock Input (L: Ext Clk, H: Crystal) */
+	MODE_PIN_MODE11, /* PCI - Pin Mode (LL: PCI host, LH: PCI slave) */
+	MODE_PIN_MODE12, /* PCI - Pin Mode (HL: Local bus, HH: DU) */
+	MODE_PIN_MODE13, /* Boot Address Mode (L: 29-bit, H: 32-bit) */
+	MODE_PIN_MODE14, /* Reserved (H: Normal operation) */
+	MODE_PIN_MPMD,   /* Emulation Mode (L: Emulation mode, H: LSI mode) */
+};
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PA */
 	GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
-- 
cgit v0.10.2


From 63d12e23235d982d8f55696e09b2ff91e3ba0042 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 12:00:25 +0000
Subject: sh: sh7785lcr mode pin configuration

This patch adds mode pin support to the sh7785lcr board.

The harware allows the user to control the mode pins using
dip switches S1 and S2, but from the software the pins are
fixed to the factory default since we have no way to reading
out this configuration from software.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index 33b194b..c2894c5 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -24,6 +24,7 @@
 #include <mach/sh7785lcr.h>
 #include <asm/heartbeat.h>
 #include <asm/clock.h>
+#include <cpu/sh7785.h>
 
 /*
  * NOTE: This board has 2 physical memory maps.
@@ -320,6 +321,26 @@ static void __init sh7785lcr_setup(char **cmdline_p)
 	writel(0x000307c2, sm501_reg);
 }
 
+/* Return the board specific boot mode pin configuration */
+static int sh7785lcr_mode_pins(void)
+{
+	int value = 0;
+
+	/* These are the factory default settings of S1 and S2.
+	 * If you change these dip switches then you will need to
+	 * adjust the values below as well.
+	 */
+	value |= 1 << MODE_PIN_MODE4; /* Clock Mode 16 */
+	value |= 1 << MODE_PIN_MODE5; /* 32-bit Area0 bus width */
+	value |= 1 << MODE_PIN_MODE6; /* 32-bit Area0 bus width */
+	value |= 1 << MODE_PIN_MODE7; /* Area 0 SRAM interface [fixed] */
+	value |= 1 << MODE_PIN_MODE8; /* Little Endian */
+	value |= 1 << MODE_PIN_MODE9; /* Master Mode */
+	value |= 1 << MODE_PIN_MODE14; /* No PLL step-up */
+
+	return value;
+}
+
 /*
  * The Machine Vector
  */
@@ -328,5 +349,6 @@ static struct sh_machine_vector mv_sh7785lcr __initmv = {
 	.mv_setup		= sh7785lcr_setup,
 	.mv_clk_init		= sh7785lcr_clk_init,
 	.mv_init_irq		= init_sh7785lcr_IRQ,
+	.mv_mode_pins		= sh7785lcr_mode_pins,
 };
 
-- 
cgit v0.10.2


From 1823f6d5e6b81cca6542ed2e5f30d2556aad0f67 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 12:06:17 +0000
Subject: sh: sh7785 pll configuration from mode pin

This patch modifies the sh7785 clock code to use the MODE4
value to switch between 72x and 36x PLL multiplication.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index a4a9bcb..705b023 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -16,6 +16,7 @@
 #include <linux/cpufreq.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
+#include <cpu/sh7785.h>
 
 static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
 			       24, 32, 36, 48 };
@@ -80,12 +81,11 @@ static struct clk_ops frqmr_clk_ops = {
 
 static unsigned long pll_recalc(struct clk *clk)
 {
-	/*
-	 * XXX: PLL1 multiplier is locked for the default clock mode,
-	 * when mode pin detection and configuration support is added,
-	 * select the multiplier dynamically.
-	 */
-	return clk->parent->rate * 36;
+	int multiplier;
+
+	multiplier = test_mode_pin(MODE_PIN_MODE4) ? 36 : 72;
+
+	return clk->parent->rate * multiplier;
 }
 
 static struct clk_ops pll_clk_ops = {
-- 
cgit v0.10.2


From 98fbe45bea77c1804eae0e71f27673db1824a2a8 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Fri, 29 May 2009 07:41:23 +0000
Subject: sh: SH7724 has an L2 cache.

Add the CPU_HAS_L2_CACHE flag to SH7724.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index 973ff83..7d821ce 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -160,7 +160,7 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.type = CPU_SH7724;
 		boot_cpu_data.icache.ways = 4;
 		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_FPU;
+		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_FPU | CPU_HAS_L2_CACHE;
 		break;
 	case 0x4000:	/* 1st cut */
 	case 0x4001:	/* 2nd cut */
-- 
cgit v0.10.2


From 25346b93ca079080c9cb23331db5c4f6404e8530 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:48:12 +1000
Subject: perf_counter: Provide functions for locking and pinning the context
 for a task

This abstracts out the code for locking the context associated
with a task.  Because the context might get transferred from
one task to another concurrently, we have to check after
locking the context that it is still the right context for the
task and retry if not.  This was open-coded in
find_get_context() and perf_counter_init_task().

This adds a further function for pinning the context for a
task, i.e. marking it so it can't be transferred to another
task.  This adds a 'pin_count' field to struct
perf_counter_context to indicate that a context is pinned,
instead of the previous method of setting the parent_gen count
to all 1s.  Pinning the context with a pin_count is easier to
undo and doesn't require saving the parent_gen value.  This
also adds a perf_unpin_context() to undo the effect of
perf_pin_task_context() and changes perf_counter_init_task to
use it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.34748.755674.596386@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 519a41b..81ec79c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -543,6 +543,7 @@ struct perf_counter_context {
 	struct perf_counter_context *parent_ctx;
 	u64			parent_gen;
 	u64			generation;
+	int			pin_count;
 	struct rcu_head		rcu_head;
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 79c3f26..da8dfef 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -123,6 +123,69 @@ static void put_ctx(struct perf_counter_context *ctx)
 }
 
 /*
+ * Get the perf_counter_context for a task and lock it.
+ * This has to cope with with the fact that until it is locked,
+ * the context could get moved to another task.
+ */
+static struct perf_counter_context *perf_lock_task_context(
+				struct task_struct *task, unsigned long *flags)
+{
+	struct perf_counter_context *ctx;
+
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_counter_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_counter_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more.
+		 */
+		spin_lock_irqsave(&ctx->lock, *flags);
+		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			goto retry;
+		}
+	}
+	rcu_read_unlock();
+	return ctx;
+}
+
+/*
+ * Get the context for a task and increment its pin_count so it
+ * can't get swapped to another task.  This also increments its
+ * reference count so that the context can't get freed.
+ */
+static struct perf_counter_context *perf_pin_task_context(struct task_struct *task)
+{
+	struct perf_counter_context *ctx;
+	unsigned long flags;
+
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		++ctx->pin_count;
+		get_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+	return ctx;
+}
+
+static void perf_unpin_context(struct perf_counter_context *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	--ctx->pin_count;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	put_ctx(ctx);
+}
+
+/*
  * Add a counter from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
  */
@@ -916,7 +979,7 @@ static int context_equiv(struct perf_counter_context *ctx1,
 {
 	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
 		&& ctx1->parent_gen == ctx2->parent_gen
-		&& ctx1->parent_gen != ~0ull;
+		&& !ctx1->pin_count && !ctx2->pin_count;
 }
 
 /*
@@ -1271,6 +1334,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	struct perf_counter_context *ctx;
 	struct perf_counter_context *parent_ctx;
 	struct task_struct *task;
+	unsigned long flags;
 	int err;
 
 	/*
@@ -1323,28 +1387,9 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	if (!ptrace_may_access(task, PTRACE_MODE_READ))
 		goto errout;
 
- retry_lock:
-	rcu_read_lock();
  retry:
-	ctx = rcu_dereference(task->perf_counter_ctxp);
+	ctx = perf_lock_task_context(task, &flags);
 	if (ctx) {
-		/*
-		 * If this context is a clone of another, it might
-		 * get swapped for another underneath us by
-		 * perf_counter_task_sched_out, though the
-		 * rcu_read_lock() protects us from any context
-		 * getting freed.  Lock the context and check if it
-		 * got swapped before we could get the lock, and retry
-		 * if so.  If we locked the right context, then it
-		 * can't get swapped on us any more and we can
-		 * unclone it if necessary.
-		 * Once it's not a clone things will be stable.
-		 */
-		spin_lock_irq(&ctx->lock);
-		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
-			spin_unlock_irq(&ctx->lock);
-			goto retry;
-		}
 		parent_ctx = ctx->parent_ctx;
 		if (parent_ctx) {
 			put_ctx(parent_ctx);
@@ -1355,9 +1400,8 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 		 * this context won't get freed if the task exits.
 		 */
 		get_ctx(ctx);
-		spin_unlock_irq(&ctx->lock);
+		spin_unlock_irqrestore(&ctx->lock, flags);
 	}
-	rcu_read_unlock();
 
 	if (!ctx) {
 		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
@@ -1372,7 +1416,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 			 * the context they set.
 			 */
 			kfree(ctx);
-			goto retry_lock;
+			goto retry;
 		}
 		get_task_struct(task);
 	}
@@ -3667,7 +3711,6 @@ int perf_counter_init_task(struct task_struct *child)
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 	int inherited_all = 1;
-	u64 cloned_gen;
 	int ret = 0;
 
 	child->perf_counter_ctxp = NULL;
@@ -3693,32 +3736,17 @@ int perf_counter_init_task(struct task_struct *child)
 	get_task_struct(child);
 
 	/*
-	 * If the parent's context is a clone, temporarily set its
-	 * parent_gen to an impossible value (all 1s) so it won't get
-	 * swapped under us.  The rcu_read_lock makes sure that
-	 * parent_ctx continues to exist even if it gets swapped to
-	 * another process and then freed while we are trying to get
-	 * its lock.
+	 * If the parent's context is a clone, pin it so it won't get
+	 * swapped under us.
 	 */
-	rcu_read_lock();
- retry:
-	parent_ctx = rcu_dereference(parent->perf_counter_ctxp);
+	parent_ctx = perf_pin_task_context(parent);
+
 	/*
 	 * No need to check if parent_ctx != NULL here; since we saw
 	 * it non-NULL earlier, the only reason for it to become NULL
 	 * is if we exit, and since we're currently in the middle of
 	 * a fork we can't be exiting at the same time.
 	 */
-	spin_lock_irq(&parent_ctx->lock);
-	if (parent_ctx != rcu_dereference(parent->perf_counter_ctxp)) {
-		spin_unlock_irq(&parent_ctx->lock);
-		goto retry;
-	}
-	cloned_gen = parent_ctx->parent_gen;
-	if (parent_ctx->parent_ctx)
-		parent_ctx->parent_gen = ~0ull;
-	spin_unlock_irq(&parent_ctx->lock);
-	rcu_read_unlock();
 
 	/*
 	 * Lock the parent list. No need to lock the child - not PID
@@ -3759,7 +3787,7 @@ int perf_counter_init_task(struct task_struct *child)
 		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
 		if (cloned_ctx) {
 			child_ctx->parent_ctx = cloned_ctx;
-			child_ctx->parent_gen = cloned_gen;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
 		} else {
 			child_ctx->parent_ctx = parent_ctx;
 			child_ctx->parent_gen = parent_ctx->generation;
@@ -3769,15 +3797,7 @@ int perf_counter_init_task(struct task_struct *child)
 
 	mutex_unlock(&parent_ctx->mutex);
 
-	/*
-	 * Restore the clone status of the parent.
-	 */
-	if (parent_ctx->parent_ctx) {
-		spin_lock_irq(&parent_ctx->lock);
-		if (parent_ctx->parent_ctx)
-			parent_ctx->parent_gen = cloned_gen;
-		spin_unlock_irq(&parent_ctx->lock);
-	}
+	perf_unpin_context(parent_ctx);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 880ca15adf2392770a68047e7a98e076ff4d21da Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:49:14 +1000
Subject: perf_counter: Allow software counters to count while task is not
 running

This changes perf_swcounter_match() so that per-task software
counters can count events that occur while their associated
task is not running.  This will allow us to use the generic
software counter code for counting task migrations, which can
occur while the task is not scheduled in.

To do this, we have to distinguish between the situations where
the counter is inactive because its task has been scheduled
out, and those where the counter is inactive because it is part
of a group that was not able to go on the PMU.  In the former
case we want the counter to count, but not in the latter case.
If the context is active, we have the latter case.  If the
context is inactive then we need to know whether the counter
was counting when the context was last active, which we can
determine by comparing its ->tstamp_stopped timestamp with the
context's timestamp.

This also folds three checks in perf_swcounter_match, checking
perf_event_raw(), perf_event_type() and perf_event_id()
individually, into a single 64-bit comparison on
counter->hw_event.config, as an optimization.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.34810.259718.955621@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index da8dfef..ff8b463 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2867,20 +2867,56 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 
 }
 
+static int perf_swcounter_is_counting(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx;
+	unsigned long flags;
+	int count;
+
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+		return 1;
+
+	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
+		return 0;
+
+	/*
+	 * If the counter is inactive, it could be just because
+	 * its task is scheduled out, or because it's in a group
+	 * which could not go on the PMU.  We want to count in
+	 * the first case but not the second.  If the context is
+	 * currently active then an inactive software counter must
+	 * be the second case.  If it's not currently active then
+	 * we need to know whether the counter was active when the
+	 * context was last active, which we can determine by
+	 * comparing counter->tstamp_stopped with ctx->time.
+	 *
+	 * We are within an RCU read-side critical section,
+	 * which protects the existence of *ctx.
+	 */
+	ctx = counter->ctx;
+	spin_lock_irqsave(&ctx->lock, flags);
+	count = 1;
+	/* Re-check state now we have the lock */
+	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
+	    counter->ctx->is_active ||
+	    counter->tstamp_stopped < ctx->time)
+		count = 0;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	return count;
+}
+
 static int perf_swcounter_match(struct perf_counter *counter,
 				enum perf_event_types type,
 				u32 event, struct pt_regs *regs)
 {
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return 0;
+	u64 event_config;
 
-	if (perf_event_raw(&counter->hw_event))
-		return 0;
+	event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event;
 
-	if (perf_event_type(&counter->hw_event) != type)
+	if (!perf_swcounter_is_counting(counter))
 		return 0;
 
-	if (perf_event_id(&counter->hw_event) != event)
+	if (counter->hw_event.config != event_config)
 		return 0;
 
 	if (counter->hw_event.exclude_user && user_mode(regs))
-- 
cgit v0.10.2


From 93bfd01227408a62006a4e4f640a6056abc6af7a Mon Sep 17 00:00:00 2001
From: Andrea Borgia <andrea@borgia.bo.it>
Date: Mon, 1 Jun 2009 10:48:54 +0200
Subject: ALSA: usb-audio - quirk for USB Aureon cards

Add quirk to provide proper naming of the Terratec Aureon 5.1 MkII
USB card.

Signed-off-by: Andrea Borgia <andrea@borgia.bo.it>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 5d955aa..d84d6f3 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -1951,6 +1951,14 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 	}
 },
 {
+	USB_DEVICE_VENDOR_SPEC(0x0ccd, 0x0028),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		.vendor_name = "TerraTec",
+		.product_name = "Aureon 5.1 MkII",
+		.ifnum = QUIRK_NO_INTERFACE
+	}
+},
+{
 	USB_DEVICE(0x0ccd, 0x0035),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
 		.vendor_name = "Miditech",
-- 
cgit v0.10.2


From 6efd2cd5e8c566b9c2b4c19830e5e120b442d040 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 1 Jun 2009 10:59:51 +0200
Subject: ALSA: usb-audio - Add quirk for Roland/Edirol M-16DX

Added a half-working quirk for Roland/Edirol M-16DX.
This enables the capture on the device but the playback on it seems still
problematic becuase of lack of sync with the capture clock.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index d84d6f3..58f24f5 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -1470,6 +1470,41 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 	}
 },
 {
+	/* Edirol M-16DX */
+	/* FIXME: This quirk gives a good-working capture stream but the
+	 *        playback seems problematic because of lacking of sync
+	 *        with capture stream.  It needs to sync with the capture
+	 *        clock.  As now, you'll get frequent sound distortions
+	 *        via the playback.
+	 */
+	USB_DEVICE(0x0582, 0x00c4),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			{
+				.ifnum = 0,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 1,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 2,
+				.type = QUIRK_MIDI_FIXED_ENDPOINT,
+				.data = & (const struct snd_usb_midi_endpoint_info) {
+					.out_cables = 0x0001,
+					.in_cables  = 0x0001
+				}
+			},
+			{
+				.ifnum = -1
+			}
+		}
+	}
+},
+{
 	/* BOSS GT-10 */
 	USB_DEVICE(0x0582, 0x00da),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
-- 
cgit v0.10.2


From eb366d4c95f546bb46a536387a4aca00ed2eda6a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 1 Jun 2009 08:25:17 +0100
Subject: [ARM] 5533/1: Add U300 series defconfig

This adds a defconfig for the U300 series ST-Ericsson mobile
platforms. It will be maintained to enable the maximum set of
drivers so as to provide a good regression testing target for
these platforms.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig
new file mode 100644
index 0000000..2d827e1
--- /dev/null
+++ b/arch/arm/configs/u300_defconfig
@@ -0,0 +1,1115 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc6
+# Mon Jun  1 09:18:22 2009
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+# CONFIG_NO_IOPORT is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+# CONFIG_AIO is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_SLUB_DEBUG=y
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+CONFIG_IOSCHED_DEADLINE=y
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_DEFAULT_AS is not set
+CONFIG_DEFAULT_DEADLINE=y
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="deadline"
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_GEMINI is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IMX is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_W90X900 is not set
+# CONFIG_ARCH_PNX4008 is not set
+# CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+CONFIG_ARCH_U300=y
+# CONFIG_ARCH_DAVINCI is not set
+# CONFIG_ARCH_OMAP is not set
+
+#
+# ST-Ericsson AB U300/U330/U335/U365 Platform
+#
+
+#
+# ST-Ericsson Mobile Platform Products
+#
+CONFIG_MACH_U300=y
+
+#
+# ST-Ericsson U300/U330/U335/U365 Feature Selections
+#
+# CONFIG_MACH_U300_BS2X is not set
+# CONFIG_MACH_U300_BS330 is not set
+CONFIG_MACH_U300_BS335=y
+# CONFIG_MACH_U300_BS365 is not set
+# CONFIG_MACH_U300_SINGLE_RAM is not set
+CONFIG_MACH_U300_DUAL_RAM=y
+CONFIG_U300_DEBUG=y
+# CONFIG_MACH_U300_SEMI_IS_SHARED is not set
+
+#
+# All the settings below must match the bootloader's settings
+#
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_ARM926T=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5TJ=y
+CONFIG_CPU_PABRT_NOIFAR=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_COPY_V4WB=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_CPU_ICACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_DCACHE_WRITETHROUGH is not set
+# CONFIG_CPU_CACHE_ROUND_ROBIN is not set
+# CONFIG_OUTER_CACHE is not set
+CONFIG_ARM_VIC=y
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+CONFIG_ARM_AMBA=y
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PREEMPT=y
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+CONFIG_ARCH_FLATMEM_HAS_HOLES=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_ALIGNMENT_TRAP=y
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=/dev/mtdblock2 rw rootfstype=yaffs2 console=ttyAMA0,115200n8 ab3100.force=0,0x48 mtdparts=u300nand:128k@0x0(bootrecords)ro,8064k@128k(free)ro,253952k@8192k(platform) lpj=515072"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+# CONFIG_FPE_NWFPE_XP is not set
+# CONFIG_FPE_FASTFPE is not set
+# CONFIG_VFP is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+# CONFIG_SUSPEND is not set
+# CONFIG_APM_EMULATION is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_ECC_SMC=y
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+# CONFIG_MTD_NAND_GPIO is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_NETDEVICES is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_AMBA_PL010 is not set
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=16
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+# CONFIG_I2C_CHARDEV is not set
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+# CONFIG_SPI_BITBANG is not set
+# CONFIG_SPI_GPIO is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+# CONFIG_W1 is not set
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+# CONFIG_PDA_POWER is not set
+# CONFIG_BATTERY_DS2760 is not set
+# CONFIG_BATTERY_BQ27x00 is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+# CONFIG_FB_CFB_FILLRECT is not set
+# CONFIG_FB_CFB_COPYAREA is not set
+# CONFIG_FB_CFB_IMAGEBLIT is not set
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_ARMCLCD is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_GENERIC=y
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE is not set
+# CONFIG_LOGO is not set
+CONFIG_SOUND=y
+# CONFIG_SOUND_OSS_CORE is not set
+CONFIG_SND=y
+CONFIG_SND_TIMER=y
+CONFIG_SND_PCM=y
+CONFIG_SND_JACK=y
+# CONFIG_SND_SEQUENCER is not set
+# CONFIG_SND_MIXER_OSS is not set
+# CONFIG_SND_PCM_OSS is not set
+# CONFIG_SND_HRTIMER is not set
+# CONFIG_SND_DYNAMIC_MINORS is not set
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+# CONFIG_SND_DRIVERS is not set
+# CONFIG_SND_ARM is not set
+# CONFIG_SND_SPI is not set
+CONFIG_SND_SOC=y
+CONFIG_SND_SOC_I2C_AND_SPI=y
+# CONFIG_SND_SOC_ALL_CODECS is not set
+# CONFIG_SOUND_PRIME is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+CONFIG_MMC_ARMMMCI=y
+# CONFIG_MMC_SDHCI is not set
+# CONFIG_MMC_SPI is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+
+#
+# LED drivers
+#
+# CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_GPIO is not set
+# CONFIG_LEDS_LP5521 is not set
+# CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_DAC124S085 is not set
+# CONFIG_LEDS_BD2802 is not set
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+# CONFIG_LEDS_TRIGGER_TIMER is not set
+# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
+CONFIG_LEDS_TRIGGER_BACKLIGHT=y
+# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_DS1390 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_R9701 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+# CONFIG_RTC_DRV_DS3234 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+# CONFIG_RTC_DRV_PL030 is not set
+# CONFIG_RTC_DRV_PL031 is not set
+CONFIG_DMADEVICES=y
+
+#
+# DMA Devices
+#
+# CONFIG_AUXDISPLAY is not set
+CONFIG_REGULATOR=y
+# CONFIG_REGULATOR_DEBUG is not set
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
+# CONFIG_REGULATOR_BQ24022 is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+CONFIG_FUSE_FS=y
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+# CONFIG_MSDOS_FS is not set
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+# CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+CONFIG_TIMER_STATS=y
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_ARM_UNWIND=y
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_LL is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From a3d6ab9723060e8d67e28a8d9142e6d08953d07b Mon Sep 17 00:00:00 2001
From: Wei Ni <wni@nvidia.com>
Date: Mon, 1 Jun 2009 16:37:28 +0800
Subject: ALSA: hda - Support NVIDIA 8 channel HDMI audio

Support 8 channel HDMI audio for MCP78/7A

Signed-off-by: Wei Ni <wni@nvidia.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_nvhdmi.c b/sound/pci/hda/patch_nvhdmi.c
index d57d813..f5792e2 100644
--- a/sound/pci/hda/patch_nvhdmi.c
+++ b/sound/pci/hda/patch_nvhdmi.c
@@ -35,9 +35,28 @@ struct nvhdmi_spec {
 	struct hda_pcm pcm_rec;
 };
 
+#define Nv_VERB_SET_Channel_Allocation          0xF79
+#define Nv_VERB_SET_Info_Frame_Checksum         0xF7A
+#define Nv_VERB_SET_Audio_Protection_On         0xF98
+#define Nv_VERB_SET_Audio_Protection_Off        0xF99
+
+#define Nv_Master_Convert_nid   0x04
+#define Nv_Master_Pin_nid       0x05
+
+static hda_nid_t nvhdmi_convert_nids[4] = {
+	/*front, rear, clfe, rear_surr */
+	0x6, 0x8, 0xa, 0xc,
+};
+
 static struct hda_verb nvhdmi_basic_init[] = {
+	/* set audio protect on */
+	{ 0x1, Nv_VERB_SET_Audio_Protection_On, 0x1},
 	/* enable digital output on pin widget */
-	{ 0x05, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x5, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0x7, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0x9, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0xb, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
+	{ 0xd, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x5 },
 	{} /* terminator */
 };
 
@@ -66,48 +85,205 @@ static int nvhdmi_init(struct hda_codec *codec)
  * Digital out
  */
 static int nvhdmi_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
-				     struct hda_codec *codec,
-				     struct snd_pcm_substream *substream)
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	return snd_hda_multi_out_dig_open(codec, &spec->multiout);
 }
 
-static int nvhdmi_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
-				      struct hda_codec *codec,
-				      struct snd_pcm_substream *substream)
+static int nvhdmi_dig_playback_pcm_close_8ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
 {
 	struct nvhdmi_spec *spec = codec->spec;
+	int i;
+
+	snd_hda_codec_write(codec, Nv_Master_Convert_nid,
+			0, AC_VERB_SET_CHANNEL_STREAMID, 0);
+	for (i = 0; i < 4; i++) {
+		/* set the stream id */
+		snd_hda_codec_write(codec, nvhdmi_convert_nids[i], 0,
+				AC_VERB_SET_CHANNEL_STREAMID, 0);
+		/* set the stream format */
+		snd_hda_codec_write(codec, nvhdmi_convert_nids[i], 0,
+				AC_VERB_SET_STREAM_FORMAT, 0);
+	}
+
 	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
 }
 
-static int nvhdmi_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
-					    struct hda_codec *codec,
-					    unsigned int stream_tag,
-					    unsigned int format,
-					    struct snd_pcm_substream *substream)
+static int nvhdmi_dig_playback_pcm_close_2ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					struct snd_pcm_substream *substream)
+{
+	struct nvhdmi_spec *spec = codec->spec;
+	return snd_hda_multi_out_dig_close(codec, &spec->multiout);
+}
+
+static int nvhdmi_dig_playback_pcm_prepare_8ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					unsigned int stream_tag,
+					unsigned int format,
+					struct snd_pcm_substream *substream)
+{
+	int chs;
+	unsigned int dataDCC1, dataDCC2, chan, chanmask, channel_id;
+	int i;
+
+	mutex_lock(&codec->spdif_mutex);
+
+	chs = substream->runtime->channels;
+	chan = chs ? (chs - 1) : 1;
+
+	switch (chs) {
+	default:
+	case 0:
+	case 2:
+		chanmask = 0x00;
+		break;
+	case 4:
+		chanmask = 0x08;
+		break;
+	case 6:
+		chanmask = 0x0b;
+		break;
+	case 8:
+		chanmask = 0x13;
+		break;
+	}
+	dataDCC1 = AC_DIG1_ENABLE | AC_DIG1_COPYRIGHT;
+	dataDCC2 = 0x2;
+
+	/* set the Audio InforFrame Channel Allocation */
+	snd_hda_codec_write(codec, 0x1, 0,
+			Nv_VERB_SET_Channel_Allocation, chanmask);
+
+	/* turn off SPDIF once; otherwise the IEC958 bits won't be updated */
+	if (codec->spdif_status_reset && (codec->spdif_ctls & AC_DIG1_ENABLE))
+		snd_hda_codec_write(codec,
+				Nv_Master_Convert_nid,
+				0,
+				AC_VERB_SET_DIGI_CONVERT_1,
+				codec->spdif_ctls & ~AC_DIG1_ENABLE & 0xff);
+
+	/* set the stream id */
+	snd_hda_codec_write(codec, Nv_Master_Convert_nid, 0,
+			AC_VERB_SET_CHANNEL_STREAMID, (stream_tag << 4) | 0x0);
+
+	/* set the stream format */
+	snd_hda_codec_write(codec, Nv_Master_Convert_nid, 0,
+			AC_VERB_SET_STREAM_FORMAT, format);
+
+	/* turn on again (if needed) */
+	/* enable and set the channel status audio/data flag */
+	if (codec->spdif_status_reset && (codec->spdif_ctls & AC_DIG1_ENABLE)) {
+		snd_hda_codec_write(codec,
+				Nv_Master_Convert_nid,
+				0,
+				AC_VERB_SET_DIGI_CONVERT_1,
+				codec->spdif_ctls & 0xff);
+		snd_hda_codec_write(codec,
+				Nv_Master_Convert_nid,
+				0,
+				AC_VERB_SET_DIGI_CONVERT_2, dataDCC2);
+	}
+
+	for (i = 0; i < 4; i++) {
+		if (chs == 2)
+			channel_id = 0;
+		else
+			channel_id = i * 2;
+
+		/* turn off SPDIF once;
+		 *otherwise the IEC958 bits won't be updated
+		 */
+		if (codec->spdif_status_reset &&
+		(codec->spdif_ctls & AC_DIG1_ENABLE))
+			snd_hda_codec_write(codec,
+				nvhdmi_convert_nids[i],
+				0,
+				AC_VERB_SET_DIGI_CONVERT_1,
+				codec->spdif_ctls & ~AC_DIG1_ENABLE & 0xff);
+		/* set the stream id */
+		snd_hda_codec_write(codec,
+				nvhdmi_convert_nids[i],
+				0,
+				AC_VERB_SET_CHANNEL_STREAMID,
+				(stream_tag << 4) | channel_id);
+		/* set the stream format */
+		snd_hda_codec_write(codec,
+				nvhdmi_convert_nids[i],
+				0,
+				AC_VERB_SET_STREAM_FORMAT,
+				format);
+		/* turn on again (if needed) */
+		/* enable and set the channel status audio/data flag */
+		if (codec->spdif_status_reset &&
+		(codec->spdif_ctls & AC_DIG1_ENABLE)) {
+			snd_hda_codec_write(codec,
+					nvhdmi_convert_nids[i],
+					0,
+					AC_VERB_SET_DIGI_CONVERT_1,
+					codec->spdif_ctls & 0xff);
+			snd_hda_codec_write(codec,
+					nvhdmi_convert_nids[i],
+					0,
+					AC_VERB_SET_DIGI_CONVERT_2, dataDCC2);
+		}
+	}
+
+	/* set the Audio Info Frame Checksum */
+	snd_hda_codec_write(codec, 0x1, 0,
+			Nv_VERB_SET_Info_Frame_Checksum,
+			(0x71 - chan - chanmask));
+
+	mutex_unlock(&codec->spdif_mutex);
+	return 0;
+}
+
+static int nvhdmi_dig_playback_pcm_prepare_2ch(struct hda_pcm_stream *hinfo,
+					struct hda_codec *codec,
+					unsigned int stream_tag,
+					unsigned int format,
+					struct snd_pcm_substream *substream)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	return snd_hda_multi_out_dig_prepare(codec, &spec->multiout, stream_tag,
-					     format, substream);
+					format, substream);
 }
 
-static struct hda_pcm_stream nvhdmi_pcm_digital_playback = {
+static struct hda_pcm_stream nvhdmi_pcm_digital_playback_8ch = {
+	.substreams = 1,
+	.channels_min = 2,
+	.channels_max = 8,
+	.nid = Nv_Master_Convert_nid,
+	.rates = SNDRV_PCM_RATE_48000,
+	.maxbps = 16,
+	.formats = SNDRV_PCM_FMTBIT_S16_LE,
+	.ops = {
+		.open = nvhdmi_dig_playback_pcm_open,
+		.close = nvhdmi_dig_playback_pcm_close_8ch,
+		.prepare = nvhdmi_dig_playback_pcm_prepare_8ch
+	},
+};
+
+static struct hda_pcm_stream nvhdmi_pcm_digital_playback_2ch = {
 	.substreams = 1,
 	.channels_min = 2,
 	.channels_max = 2,
-	.nid = 0x4, /* NID to query formats and rates and setup streams */
+	.nid = Nv_Master_Convert_nid,
 	.rates = SNDRV_PCM_RATE_48000,
 	.maxbps = 16,
 	.formats = SNDRV_PCM_FMTBIT_S16_LE,
 	.ops = {
 		.open = nvhdmi_dig_playback_pcm_open,
-		.close = nvhdmi_dig_playback_pcm_close,
-		.prepare = nvhdmi_dig_playback_pcm_prepare
+		.close = nvhdmi_dig_playback_pcm_close_2ch,
+		.prepare = nvhdmi_dig_playback_pcm_prepare_2ch
 	},
 };
 
-static int nvhdmi_build_pcms(struct hda_codec *codec)
+static int nvhdmi_build_pcms_8ch(struct hda_codec *codec)
 {
 	struct nvhdmi_spec *spec = codec->spec;
 	struct hda_pcm *info = &spec->pcm_rec;
@@ -117,7 +293,24 @@ static int nvhdmi_build_pcms(struct hda_codec *codec)
 
 	info->name = "NVIDIA HDMI";
 	info->pcm_type = HDA_PCM_TYPE_HDMI;
-	info->stream[SNDRV_PCM_STREAM_PLAYBACK] = nvhdmi_pcm_digital_playback;
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK]
+					= nvhdmi_pcm_digital_playback_8ch;
+
+	return 0;
+}
+
+static int nvhdmi_build_pcms_2ch(struct hda_codec *codec)
+{
+	struct nvhdmi_spec *spec = codec->spec;
+	struct hda_pcm *info = &spec->pcm_rec;
+
+	codec->num_pcms = 1;
+	codec->pcm_info = info;
+
+	info->name = "NVIDIA HDMI";
+	info->pcm_type = HDA_PCM_TYPE_HDMI;
+	info->stream[SNDRV_PCM_STREAM_PLAYBACK]
+					= nvhdmi_pcm_digital_playback_2ch;
 
 	return 0;
 }
@@ -127,14 +320,40 @@ static void nvhdmi_free(struct hda_codec *codec)
 	kfree(codec->spec);
 }
 
-static struct hda_codec_ops nvhdmi_patch_ops = {
+static struct hda_codec_ops nvhdmi_patch_ops_8ch = {
+	.build_controls = nvhdmi_build_controls,
+	.build_pcms = nvhdmi_build_pcms_8ch,
+	.init = nvhdmi_init,
+	.free = nvhdmi_free,
+};
+
+static struct hda_codec_ops nvhdmi_patch_ops_2ch = {
 	.build_controls = nvhdmi_build_controls,
-	.build_pcms = nvhdmi_build_pcms,
+	.build_pcms = nvhdmi_build_pcms_2ch,
 	.init = nvhdmi_init,
 	.free = nvhdmi_free,
 };
 
-static int patch_nvhdmi(struct hda_codec *codec)
+static int patch_nvhdmi_8ch(struct hda_codec *codec)
+{
+	struct nvhdmi_spec *spec;
+
+	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (spec == NULL)
+		return -ENOMEM;
+
+	codec->spec = spec;
+
+	spec->multiout.num_dacs = 0;  /* no analog */
+	spec->multiout.max_channels = 8;
+	spec->multiout.dig_out_nid = Nv_Master_Convert_nid;
+
+	codec->patch_ops = nvhdmi_patch_ops_8ch;
+
+	return 0;
+}
+
+static int patch_nvhdmi_2ch(struct hda_codec *codec)
 {
 	struct nvhdmi_spec *spec;
 
@@ -144,13 +363,11 @@ static int patch_nvhdmi(struct hda_codec *codec)
 
 	codec->spec = spec;
 
-	spec->multiout.num_dacs = 0;	  /* no analog */
+	spec->multiout.num_dacs = 0;  /* no analog */
 	spec->multiout.max_channels = 2;
-	spec->multiout.dig_out_nid = 0x4; /* NID for copying analog to digital,
-					   * seems to be unused in pure-digital
-					   * case. */
+	spec->multiout.dig_out_nid = Nv_Master_Convert_nid;
 
-	codec->patch_ops = nvhdmi_patch_ops;
+	codec->patch_ops = nvhdmi_patch_ops_2ch;
 
 	return 0;
 }
@@ -159,11 +376,11 @@ static int patch_nvhdmi(struct hda_codec *codec)
  * patch entries
  */
 static struct hda_codec_preset snd_hda_preset_nvhdmi[] = {
-	{ .id = 0x10de0002, .name = "MCP78 HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de0006, .name = "MCP78 HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de0007, .name = "MCP7A HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi },
-	{ .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi },
+	{ .id = 0x10de0002, .name = "MCP78 HDMI", .patch = patch_nvhdmi_8ch },
+	{ .id = 0x10de0006, .name = "MCP78 HDMI", .patch = patch_nvhdmi_8ch },
+	{ .id = 0x10de0007, .name = "MCP7A HDMI", .patch = patch_nvhdmi_8ch },
+	{ .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi_2ch },
+	{ .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch },
 	{} /* terminator */
 };
 
-- 
cgit v0.10.2


From 6881e8bf3d86b23dd124134fae113ebd05fae08a Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 12:52:29 +0000
Subject: sh: shared mstp32 clock code

Add shared 32-bit module stop bit clock support.

Processor specific code can use SH_CLK_MSTP32()
to initialize module stop bit clocks, and then
use sh_clk_mstp32() for registration.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index aa9480d..f43d3e7 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -117,4 +117,17 @@ long clk_rate_table_round(struct clk *clk,
 			  struct cpufreq_frequency_table *freq_table,
 			  unsigned long rate);
 
+#define SH_CLK_MSTP32(_name, _id, _parent, _enable_reg,	\
+	    _enable_bit, _flags)			\
+{							\
+	.name		= _name,			\
+	.id		= _id,				\
+	.parent		= _parent,			\
+	.enable_reg	= (void __iomem *)_enable_reg,	\
+	.enable_bit	= _enable_bit,			\
+	.flags		= _flags,			\
+}
+
+int sh_clk_mstp32_register(struct clk *clks, int nr);
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index b78c237a..72228d2 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -1,7 +1,42 @@
 #include <linux/clk.h>
 #include <linux/compiler.h>
+#include <linux/io.h>
 #include <asm/clock.h>
 
+static int sh_clk_mstp32_enable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) & ~(1 << clk->enable_bit),
+		     clk->enable_reg);
+	return 0;
+}
+
+static void sh_clk_mstp32_disable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) | (1 << clk->enable_bit),
+		     clk->enable_reg);
+}
+
+static struct clk_ops sh_clk_mstp32_clk_ops = {
+	.enable		= sh_clk_mstp32_enable,
+	.disable	= sh_clk_mstp32_disable,
+	.recalc		= followparent_recalc,
+};
+
+int __init sh_clk_mstp32_register(struct clk *clks, int nr)
+{
+	struct clk *clkp;
+	int ret = 0;
+	int k;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+		clkp->ops = &sh_clk_mstp32_clk_ops;
+		ret |= clk_register(clkp);
+	}
+
+	return ret;
+}
+
 #ifdef CONFIG_SH_CLK_CPG_LEGACY
 static struct clk master_clk = {
 	.name		= "master_clk",
-- 
cgit v0.10.2


From e89d53e60593ee7066e1d36ab5c1ccf2648f5f53 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 13:01:53 +0000
Subject: sh: hook up shared mstp32 clock code to sh7785

Hook up the shared 32-bit module stop bit code to sh7785.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 705b023..7d55706 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -193,65 +193,34 @@ static struct clk *clks[] = {
 	&umem_clk,
 };
 
-static int mstpcr_clk_enable(struct clk *clk)
-{
-	__raw_writel(__raw_readl(clk->enable_reg) & ~(1 << clk->enable_bit),
-		     clk->enable_reg);
-	return 0;
-}
-
-static void mstpcr_clk_disable(struct clk *clk)
-{
-	__raw_writel(__raw_readl(clk->enable_reg) | (1 << clk->enable_bit),
-		     clk->enable_reg);
-}
-
-static struct clk_ops mstpcr_clk_ops = {
-	.enable		= mstpcr_clk_enable,
-	.disable	= mstpcr_clk_disable,
-	.recalc		= followparent_recalc,
-};
-
 #define MSTPCR0		0xffc80030
 #define MSTPCR1		0xffc80034
 
-#define CLK(_name, _id, _parent, _enable_reg,		\
-	    _enable_bit, _flags)			\
-{							\
-	.name		= _name,			\
-	.id		= _id,				\
-	.parent		= _parent,			\
-	.enable_reg	= (void __iomem *)_enable_reg,	\
-	.enable_bit	= _enable_bit,			\
-	.flags		= _flags,			\
-	.ops		= &mstpcr_clk_ops,		\
-}
-
-static struct clk mstpcr_clks[] = {
+static struct clk mstp_clks[] = {
 	/* MSTPCR0 */
-	CLK("scif_fck", 5, &peripheral_clk, MSTPCR0, 29, 0),
-	CLK("scif_fck", 4, &peripheral_clk, MSTPCR0, 28, 0),
-	CLK("scif_fck", 3, &peripheral_clk, MSTPCR0, 27, 0),
-	CLK("scif_fck", 2, &peripheral_clk, MSTPCR0, 26, 0),
-	CLK("scif_fck", 1, &peripheral_clk, MSTPCR0, 25, 0),
-	CLK("scif_fck", 0, &peripheral_clk, MSTPCR0, 24, 0),
-	CLK("ssi_fck", 1, &peripheral_clk, MSTPCR0, 21, 0),
-	CLK("ssi_fck", 0, &peripheral_clk, MSTPCR0, 20, 0),
-	CLK("hac_fck", 1, &peripheral_clk, MSTPCR0, 17, 0),
-	CLK("hac_fck", 0, &peripheral_clk, MSTPCR0, 16, 0),
-	CLK("mmcif_fck", -1, &peripheral_clk, MSTPCR0, 13, 0),
-	CLK("flctl_fck", -1, &peripheral_clk, MSTPCR0, 12, 0),
-	CLK("tmu345_fck", -1, &peripheral_clk, MSTPCR0, 9, 0),
-	CLK("tmu012_fck", -1, &peripheral_clk, MSTPCR0, 8, 0),
-	CLK("siof_fck", -1, &peripheral_clk, MSTPCR0, 3, 0),
-	CLK("hspi_fck", -1, &peripheral_clk, MSTPCR0, 2, 0),
+	SH_CLK_MSTP32("scif_fck", 5, &peripheral_clk, MSTPCR0, 29, 0),
+	SH_CLK_MSTP32("scif_fck", 4, &peripheral_clk, MSTPCR0, 28, 0),
+	SH_CLK_MSTP32("scif_fck", 3, &peripheral_clk, MSTPCR0, 27, 0),
+	SH_CLK_MSTP32("scif_fck", 2, &peripheral_clk, MSTPCR0, 26, 0),
+	SH_CLK_MSTP32("scif_fck", 1, &peripheral_clk, MSTPCR0, 25, 0),
+	SH_CLK_MSTP32("scif_fck", 0, &peripheral_clk, MSTPCR0, 24, 0),
+	SH_CLK_MSTP32("ssi_fck", 1, &peripheral_clk, MSTPCR0, 21, 0),
+	SH_CLK_MSTP32("ssi_fck", 0, &peripheral_clk, MSTPCR0, 20, 0),
+	SH_CLK_MSTP32("hac_fck", 1, &peripheral_clk, MSTPCR0, 17, 0),
+	SH_CLK_MSTP32("hac_fck", 0, &peripheral_clk, MSTPCR0, 16, 0),
+	SH_CLK_MSTP32("mmcif_fck", -1, &peripheral_clk, MSTPCR0, 13, 0),
+	SH_CLK_MSTP32("flctl_fck", -1, &peripheral_clk, MSTPCR0, 12, 0),
+	SH_CLK_MSTP32("tmu345_fck", -1, &peripheral_clk, MSTPCR0, 9, 0),
+	SH_CLK_MSTP32("tmu012_fck", -1, &peripheral_clk, MSTPCR0, 8, 0),
+	SH_CLK_MSTP32("siof_fck", -1, &peripheral_clk, MSTPCR0, 3, 0),
+	SH_CLK_MSTP32("hspi_fck", -1, &peripheral_clk, MSTPCR0, 2, 0),
 
 	/* MSTPCR1 */
-	CLK("hudi_fck", -1, NULL, MSTPCR1, 19, 0),
-	CLK("ubc_fck", -1, NULL, MSTPCR1, 17, 0),
-	CLK("dmac_11_6_fck", -1, NULL, MSTPCR1, 5, 0),
-	CLK("dmac_5_0_fck", -1, NULL, MSTPCR1, 4, 0),
-	CLK("gdta_fck", -1, NULL, MSTPCR1, 0, 0),
+	SH_CLK_MSTP32("hudi_fck", -1, NULL, MSTPCR1, 19, 0),
+	SH_CLK_MSTP32("ubc_fck", -1, NULL, MSTPCR1, 17, 0),
+	SH_CLK_MSTP32("dmac_11_6_fck", -1, NULL, MSTPCR1, 5, 0),
+	SH_CLK_MSTP32("dmac_5_0_fck", -1, NULL, MSTPCR1, 4, 0),
+	SH_CLK_MSTP32("gdta_fck", -1, NULL, MSTPCR1, 0, 0),
 };
 
 int __init arch_clk_init(void)
@@ -260,8 +229,9 @@ int __init arch_clk_init(void)
 
 	for (i = 0; i < ARRAY_SIZE(clks); i++)
 		ret |= clk_register(clks[i]);
-	for (i = 0; i < ARRAY_SIZE(mstpcr_clks); i++)
-		ret |= clk_register(&mstpcr_clks[i]);
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
 
 	return ret;
 }
-- 
cgit v0.10.2


From a1153e27eec25e9963f5843ba8932952bd9847ac Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 13:11:31 +0000
Subject: sh: shared div4 clock code

Add shared code for 4-bit divisor clocks.

Processor specific code can use SH_CLK_DIV4()
to initialize div4 clocks, and then use
sh_clk_div4_register() for registration.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index f43d3e7..7435e40 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 #include <linux/seq_file.h>
+#include <linux/cpufreq.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 
@@ -41,6 +42,7 @@ struct clk {
 	unsigned long		arch_flags;
 	void			*priv;
 	struct dentry		*dentry;
+	struct cpufreq_frequency_table *freq_table;
 };
 
 struct clk_lookup {
@@ -130,4 +132,17 @@ long clk_rate_table_round(struct clk *clk,
 
 int sh_clk_mstp32_register(struct clk *clks, int nr);
 
+#define SH_CLK_DIV4(_name, _parent, _reg, _shift, _div_bitmap, _flags)	\
+{									\
+	.name = _name,							\
+	.parent = _parent,						\
+	.enable_reg = (void __iomem *)_reg,				\
+	.enable_bit = _shift,						\
+	.arch_flags = _div_bitmap,					\
+	.flags = _flags,						\
+}
+
+int sh_clk_div4_register(struct clk *clks, int nr,
+			 struct clk_div_mult_table *table);
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index 72228d2..88fc30d 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -1,5 +1,6 @@
 #include <linux/clk.h>
 #include <linux/compiler.h>
+#include <linux/bootmem.h>
 #include <linux/io.h>
 #include <asm/clock.h>
 
@@ -37,6 +38,60 @@ int __init sh_clk_mstp32_register(struct clk *clks, int nr)
 	return ret;
 }
 
+static unsigned long sh_clk_div4_recalc(struct clk *clk)
+{
+	struct clk_div_mult_table *table = clk->priv;
+	unsigned int idx;
+
+	clk_rate_table_build(clk, clk->freq_table, table->nr_divisors,
+			     table, &clk->arch_flags);
+
+	idx = (__raw_readl(clk->enable_reg) >> clk->enable_bit) & 0x000f;
+
+	return clk->freq_table[idx].frequency;
+}
+
+static long sh_clk_div4_round_rate(struct clk *clk, unsigned long rate)
+{
+	return clk_rate_table_round(clk, clk->freq_table, rate);
+}
+
+static struct clk_ops sh_clk_div4_clk_ops = {
+	.recalc		= sh_clk_div4_recalc,
+	.round_rate	= sh_clk_div4_round_rate,
+};
+
+int __init sh_clk_div4_register(struct clk *clks, int nr,
+				struct clk_div_mult_table *table)
+{
+	struct clk *clkp;
+	void *freq_table;
+	int nr_divs = table->nr_divisors;
+	int freq_table_size = sizeof(struct cpufreq_frequency_table);
+	int ret = 0;
+	int k;
+
+	k = nr_divs + 1;
+	freq_table = alloc_bootmem(freq_table_size * nr * (nr_divs + 1));
+	if (!freq_table)
+		return -ENOMEM;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+
+		clkp->ops = &sh_clk_div4_clk_ops;
+		clkp->id = -1;
+		clkp->priv = table;
+
+		clkp->freq_table = freq_table + (k * freq_table_size);
+		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
+
+		ret = clk_register(clkp);
+	}
+
+	return ret;
+}
+
 #ifdef CONFIG_SH_CLK_CPG_LEGACY
 static struct clk master_clk = {
 	.name		= "master_clk",
-- 
cgit v0.10.2


From 43909a938063f9b6f98c05a2e28b072dd972ece7 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 28 May 2009 13:13:56 +0000
Subject: sh: hook up shared div4 clock code to sh7785

Hook up the shared 4-bit divisor clock code to sh7785.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index 7d55706..dae20ac 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -18,65 +18,14 @@
 #include <asm/freq.h>
 #include <cpu/sh7785.h>
 
-static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
-			       24, 32, 36, 48 };
-
-static struct clk_div_mult_table cpg_div = {
-	.divisors = div2,
-	.nr_divisors = ARRAY_SIZE(div2),
-};
-
-struct clk_priv {
-	unsigned int			shift;
-
-	/* allowable divisor bitmap */
-	unsigned long			div_bitmap;
-
-	/* Supportable frequencies + termination entry */
-	struct cpufreq_frequency_table	freq_table[ARRAY_SIZE(div2)+1];
-};
-
-#define FRQMR_CLK_DATA(_name, _shift, _div_bitmap)	\
-static struct clk_priv _name##_data = {			\
-	.shift		= _shift,			\
-	.div_bitmap	= _div_bitmap,			\
-							\
-	.freq_table[0]	= {				\
-		.index = 0,				\
-		.frequency = CPUFREQ_TABLE_END,		\
-	},						\
-}
-
-FRQMR_CLK_DATA(pfc,  0, 0x0f80);
-FRQMR_CLK_DATA(s3fc, 4, 0x0ff0);
-FRQMR_CLK_DATA(s2fc, 8, 0x0030);
-FRQMR_CLK_DATA(mfc, 12, 0x000c);
-FRQMR_CLK_DATA(bfc, 16, 0x0fe0);
-FRQMR_CLK_DATA(sfc, 20, 0x000c);
-FRQMR_CLK_DATA(ufc, 24, 0x000c);
-FRQMR_CLK_DATA(ifc, 28, 0x000e);
-
-static unsigned long frqmr_recalc(struct clk *clk)
-{
-	struct clk_priv *data = clk->priv;
-	unsigned int idx = (__raw_readl(FRQMR1) >> data->shift) & 0x000f;
-
-	clk_rate_table_build(clk, data->freq_table, ARRAY_SIZE(div2),
-			     &cpg_div, &data->div_bitmap);
-	
-	return data->freq_table[idx].frequency;
-}
-
-static long frqmr_round_rate(struct clk *clk, unsigned long rate)
-{
-	struct clk_priv *data = clk->priv;
-
-	return clk_rate_table_round(clk, data->freq_table, rate);
-}
-
-static struct clk_ops frqmr_clk_ops = {
-	.recalc			= frqmr_recalc,
-	.round_rate		= frqmr_round_rate,
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+static struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
 };
 
 static unsigned long pll_recalc(struct clk *clk)
@@ -92,16 +41,6 @@ static struct clk_ops pll_clk_ops = {
 	.recalc		= pll_recalc,
 };
 
-/*
- * Default rate for the root input clock, reset this with clk_set_rate()
- * from the platform code.
- */
-static struct clk extal_clk = {
-	.name		= "extal",
-	.id		= -1,
-	.rate		= 33333333,
-};
-
 static struct clk pll_clk = {
 	.name		= "pll_clk",
 	.id		= -1,
@@ -110,87 +49,34 @@ static struct clk pll_clk = {
 	.flags		= CLK_ENABLE_ON_INIT,
 };
 
-static struct clk cpu_clk = {
-	.name		= "cpu_clk",		/* Ick */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-	.priv		= &ifc_data,
-};
-
-static struct clk shyway_clk = {
-	.name		= "shyway_clk",		/* SHck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-	.priv		= &sfc_data,
-};
-
-static struct clk peripheral_clk = {
-	.name		= "peripheral_clk",	/* Pck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-	.priv		= &pfc_data,
-};
-
-static struct clk ddr_clk = {
-	.name		= "ddr_clk",		/* DDRck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-	.priv		= &mfc_data,
+static struct clk *clks[] = {
+	&extal_clk,
+	&pll_clk,
 };
 
-static struct clk bus_clk = {
-	.name		= "bus_clk",		/* Bck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-	.priv		= &bfc_data,
-};
+static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
+			       24, 32, 36, 48 };
 
-static struct clk ga_clk = {
-	.name		= "ga_clk",		/* GAck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.priv		= &s2fc_data,
+static struct clk_div_mult_table div4_table = {
+	.divisors = div2,
+	.nr_divisors = ARRAY_SIZE(div2),
 };
 
-static struct clk du_clk = {
-	.name		= "du_clk",		/* DUck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.priv		= &s3fc_data,
-};
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_DDR, DIV4_GA,
+	DIV4_DU, DIV4_P, DIV4_NR };
 
-static struct clk umem_clk = {
-	.name		= "umem_clk",		/* uck */
-	.id		= -1,
-	.ops		= &frqmr_clk_ops,
-	.parent		= &pll_clk,
-	.flags		= CLK_ENABLE_ON_INIT,
-	.priv		= &ufc_data,
-};
+#define DIV4(_str, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, FRQMR1, _bit, _mask, _flags)
 
-static struct clk *clks[] = {
-	&extal_clk,
-	&pll_clk,
-	&cpu_clk,
-	&shyway_clk,
-	&peripheral_clk,
-	&ddr_clk,
-	&bus_clk,
-	&ga_clk,
-	&du_clk,
-	&umem_clk,
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_P] = DIV4("peripheral_clk", 0, 0x0f80, 0),
+	[DIV4_DU] = DIV4("du_clk", 4, 0x0ff0, 0),
+	[DIV4_GA] = DIV4("ga_clk", 8, 0x0030, 0),
+	[DIV4_DDR] = DIV4("ddr_clk", 12, 0x000c, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", 16, 0x0fe0, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", 20, 0x000c, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", 24, 0x000c, CLK_ENABLE_ON_INIT),
+	[DIV4_I] = DIV4("cpu_clk", 28, 0x000e, CLK_ENABLE_ON_INIT),
 };
 
 #define MSTPCR0		0xffc80030
@@ -198,22 +84,22 @@ static struct clk *clks[] = {
 
 static struct clk mstp_clks[] = {
 	/* MSTPCR0 */
-	SH_CLK_MSTP32("scif_fck", 5, &peripheral_clk, MSTPCR0, 29, 0),
-	SH_CLK_MSTP32("scif_fck", 4, &peripheral_clk, MSTPCR0, 28, 0),
-	SH_CLK_MSTP32("scif_fck", 3, &peripheral_clk, MSTPCR0, 27, 0),
-	SH_CLK_MSTP32("scif_fck", 2, &peripheral_clk, MSTPCR0, 26, 0),
-	SH_CLK_MSTP32("scif_fck", 1, &peripheral_clk, MSTPCR0, 25, 0),
-	SH_CLK_MSTP32("scif_fck", 0, &peripheral_clk, MSTPCR0, 24, 0),
-	SH_CLK_MSTP32("ssi_fck", 1, &peripheral_clk, MSTPCR0, 21, 0),
-	SH_CLK_MSTP32("ssi_fck", 0, &peripheral_clk, MSTPCR0, 20, 0),
-	SH_CLK_MSTP32("hac_fck", 1, &peripheral_clk, MSTPCR0, 17, 0),
-	SH_CLK_MSTP32("hac_fck", 0, &peripheral_clk, MSTPCR0, 16, 0),
-	SH_CLK_MSTP32("mmcif_fck", -1, &peripheral_clk, MSTPCR0, 13, 0),
-	SH_CLK_MSTP32("flctl_fck", -1, &peripheral_clk, MSTPCR0, 12, 0),
-	SH_CLK_MSTP32("tmu345_fck", -1, &peripheral_clk, MSTPCR0, 9, 0),
-	SH_CLK_MSTP32("tmu012_fck", -1, &peripheral_clk, MSTPCR0, 8, 0),
-	SH_CLK_MSTP32("siof_fck", -1, &peripheral_clk, MSTPCR0, 3, 0),
-	SH_CLK_MSTP32("hspi_fck", -1, &peripheral_clk, MSTPCR0, 2, 0),
+	SH_CLK_MSTP32("scif_fck", 5, &div4_clks[DIV4_P], MSTPCR0, 29, 0),
+	SH_CLK_MSTP32("scif_fck", 4, &div4_clks[DIV4_P], MSTPCR0, 28, 0),
+	SH_CLK_MSTP32("scif_fck", 3, &div4_clks[DIV4_P], MSTPCR0, 27, 0),
+	SH_CLK_MSTP32("scif_fck", 2, &div4_clks[DIV4_P], MSTPCR0, 26, 0),
+	SH_CLK_MSTP32("scif_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 25, 0),
+	SH_CLK_MSTP32("scif_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 24, 0),
+	SH_CLK_MSTP32("ssi_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	SH_CLK_MSTP32("ssi_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	SH_CLK_MSTP32("hac_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	SH_CLK_MSTP32("hac_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 16, 0),
+	SH_CLK_MSTP32("mmcif_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 13, 0),
+	SH_CLK_MSTP32("flctl_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 12, 0),
+	SH_CLK_MSTP32("tmu345_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 9, 0),
+	SH_CLK_MSTP32("tmu012_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 8, 0),
+	SH_CLK_MSTP32("siof_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 3, 0),
+	SH_CLK_MSTP32("hspi_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 2, 0),
 
 	/* MSTPCR1 */
 	SH_CLK_MSTP32("hudi_fck", -1, NULL, MSTPCR1, 19, 0),
@@ -231,6 +117,9 @@ int __init arch_clk_init(void)
 		ret |= clk_register(clks[i]);
 
 	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, ARRAY_SIZE(div4_clks),
+					   &div4_table);
+	if (!ret)
 		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
 
 	return ret;
-- 
cgit v0.10.2


From 7c922de709b90badc19705e4f998e6d5b44c419b Mon Sep 17 00:00:00 2001
From: Nickolas Lloyd <ultrageek.lloyd@gmail.com>
Date: Mon, 1 Jun 2009 11:12:29 +0200
Subject: ALSA: hda - Jack Mode changes for Sigmatel boards

This patch changes Line In as Out Switch and Mic In as Out Switch to
enums for consistency, and causes all mic and line in ports to be probed
and controls to be added appropriately.

Signed-off-by:  Nickolas Lloyd <ultrageek.lloyd@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index a915f40..48f4a36 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -670,62 +670,6 @@ static unsigned int stac92xx_vref_get(struct hda_codec *codec, hda_nid_t nid)
 	return vref;
 }
 
-static int stac92xx_dc_bias_put(struct snd_kcontrol *kcontrol,
-				struct snd_ctl_elem_value *ucontrol)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	unsigned int new_vref;
-	unsigned int error;
-
-	if (ucontrol->value.enumerated.item[0] == 0)
-		new_vref = AC_PINCTL_VREF_80;
-	else if (ucontrol->value.enumerated.item[0] == 1)
-		new_vref = AC_PINCTL_VREF_GRD;
-	else
-		new_vref = AC_PINCTL_VREF_HIZ;
-
-	if (new_vref != stac92xx_vref_get(codec, kcontrol->private_value)) {
-		error = stac92xx_vref_set(codec,
-					kcontrol->private_value, new_vref);
-		return error;
-	}
-
-	return 0;
-}
-
-static int stac92xx_dc_bias_get(struct snd_kcontrol *kcontrol,
-				struct snd_ctl_elem_value *ucontrol)
-{
-	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-	unsigned int vref = stac92xx_vref_get(codec, kcontrol->private_value);
-	if (vref == AC_PINCTL_VREF_80)
-		ucontrol->value.enumerated.item[0] = 0;
-	else if (vref == AC_PINCTL_VREF_GRD)
-		ucontrol->value.enumerated.item[0] = 1;
-	else if (vref == AC_PINCTL_VREF_HIZ)
-		ucontrol->value.enumerated.item[0] = 2;
-
-	return 0;
-}
-
-static int stac92xx_dc_bias_info(struct snd_kcontrol *kcontrol,
-				struct snd_ctl_elem_info *uinfo)
-{
-	static char *texts[] = {
-		"Mic In", "Line In", "Line Out"
-	};
-
-	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
-	uinfo->value.enumerated.items = 3;
-	uinfo->count = 1;
-	if (uinfo->value.enumerated.item >= 3)
-		uinfo->value.enumerated.item = 2;
-	strcpy(uinfo->value.enumerated.name,
-		texts[uinfo->value.enumerated.item]);
-
-	return 0;
-}
-
 static int stac92xx_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
@@ -2652,7 +2596,8 @@ static int stac92xx_build_pcms(struct hda_codec *codec)
 	return 0;
 }
 
-static unsigned int stac92xx_get_vref(struct hda_codec *codec, hda_nid_t nid)
+static unsigned int stac92xx_get_default_vref(struct hda_codec *codec,
+					hda_nid_t nid)
 {
 	unsigned int pincap = snd_hda_query_pin_caps(codec, nid);
 	pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT;
@@ -2706,15 +2651,108 @@ static int stac92xx_hp_switch_put(struct snd_kcontrol *kcontrol,
 	return 1;
 }
 
-#define stac92xx_io_switch_info		snd_ctl_boolean_mono_info
+static int stac92xx_dc_bias_info(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_info *uinfo)
+{
+	int i;
+	static char *texts[] = {
+		"Mic In", "Line In", "Line Out"
+	};
+
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct sigmatel_spec *spec = codec->spec;
+	hda_nid_t nid = kcontrol->private_value;
+
+	if (nid == spec->mic_switch || nid == spec->line_switch)
+		i = 3;
+	else
+		i = 2;
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->value.enumerated.items = i;
+	uinfo->count = 1;
+	if (uinfo->value.enumerated.item >= i)
+		uinfo->value.enumerated.item = i-1;
+	strcpy(uinfo->value.enumerated.name,
+		texts[uinfo->value.enumerated.item]);
+
+	return 0;
+}
+
+static int stac92xx_dc_bias_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	hda_nid_t nid = kcontrol->private_value;
+	unsigned int vref = stac92xx_vref_get(codec, nid);
+
+	if (vref == stac92xx_get_default_vref(codec, nid))
+		ucontrol->value.enumerated.item[0] = 0;
+	else if (vref == AC_PINCTL_VREF_GRD)
+		ucontrol->value.enumerated.item[0] = 1;
+	else if (vref == AC_PINCTL_VREF_HIZ)
+		ucontrol->value.enumerated.item[0] = 2;
+
+	return 0;
+}
+
+static int stac92xx_dc_bias_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int new_vref = 0;
+	unsigned int error;
+	hda_nid_t nid = kcontrol->private_value;
+
+	if (ucontrol->value.enumerated.item[0] == 0)
+		new_vref = stac92xx_get_default_vref(codec, nid);
+	else if (ucontrol->value.enumerated.item[0] == 1)
+		new_vref = AC_PINCTL_VREF_GRD;
+	else if (ucontrol->value.enumerated.item[0] == 2)
+		new_vref = AC_PINCTL_VREF_HIZ;
+	else
+		return 0;
+
+	if (new_vref != stac92xx_vref_get(codec, nid)) {
+		error = stac92xx_vref_set(codec, nid, new_vref);
+		return error;
+	}
+
+	return 0;
+}
+
+static int stac92xx_io_switch_info(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[2];
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct sigmatel_spec *spec = codec->spec;
+
+	if (kcontrol->private_value == spec->line_switch)
+		texts[0] = "Line In";
+	else
+		texts[0] = "Mic In";
+	texts[1] = "Line Out";
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->value.enumerated.items = 2;
+	uinfo->count = 1;
+
+	if (uinfo->value.enumerated.item >= 2)
+		uinfo->value.enumerated.item = 1;
+	strcpy(uinfo->value.enumerated.name,
+		texts[uinfo->value.enumerated.item]);
+
+	return 0;
+}
 
 static int stac92xx_io_switch_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct sigmatel_spec *spec = codec->spec;
-	int io_idx = kcontrol-> private_value & 0xff;
+	hda_nid_t nid = kcontrol->private_value;
+	int io_idx = (nid == spec->mic_switch) ? 1 : 0;
 
-	ucontrol->value.integer.value[0] = spec->io_switch[io_idx];
+	ucontrol->value.enumerated.item[0] = spec->io_switch[io_idx];
 	return 0;
 }
 
@@ -2722,9 +2760,9 @@ static int stac92xx_io_switch_put(struct snd_kcontrol *kcontrol, struct snd_ctl_
 {
         struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct sigmatel_spec *spec = codec->spec;
-        hda_nid_t nid = kcontrol->private_value >> 8;
-	int io_idx = kcontrol-> private_value & 0xff;
-	unsigned short val = !!ucontrol->value.integer.value[0];
+	hda_nid_t nid = kcontrol->private_value;
+	int io_idx = (nid == spec->mic_switch) ? 1 : 0;
+	unsigned short val = !!ucontrol->value.enumerated.item[0];
 
 	spec->io_switch[io_idx] = val;
 
@@ -2733,7 +2771,7 @@ static int stac92xx_io_switch_put(struct snd_kcontrol *kcontrol, struct snd_ctl_
 	else {
 		unsigned int pinctl = AC_PINCTL_IN_EN;
 		if (io_idx) /* set VREF for mic */
-			pinctl |= stac92xx_get_vref(codec, nid);
+			pinctl |= stac92xx_get_default_vref(codec, nid);
 		stac92xx_auto_set_pinctl(codec, nid, pinctl);
 	}
 
@@ -2891,6 +2929,34 @@ static struct snd_kcontrol_new stac_input_src_temp = {
 	.put = stac92xx_mux_enum_put,
 };
 
+static inline int stac92xx_add_jack_mode_control(struct hda_codec *codec,
+						hda_nid_t nid, int idx)
+{
+	int def_conf = snd_hda_codec_get_pincfg(codec, nid);
+	int control = 0;
+	struct sigmatel_spec *spec = codec->spec;
+	char name[22];
+
+	if (!((get_defcfg_connect(def_conf)) & AC_JACK_PORT_FIXED)) {
+		if (stac92xx_get_default_vref(codec, nid) == AC_PINCTL_VREF_GRD
+			&& nid == spec->line_switch)
+			control = STAC_CTL_WIDGET_IO_SWITCH;
+		else if (snd_hda_query_pin_caps(codec, nid)
+			& (AC_PINCAP_VREF_GRD << AC_PINCAP_VREF_SHIFT))
+			control = STAC_CTL_WIDGET_DC_BIAS;
+		else if (nid == spec->mic_switch)
+			control = STAC_CTL_WIDGET_IO_SWITCH;
+	}
+
+	if (control) {
+		strcpy(name, auto_pin_cfg_labels[idx]);
+		return stac92xx_add_control(codec->spec, control,
+					strcat(name, " Jack Mode"), nid);
+	}
+
+	return 0;
+}
+
 static int stac92xx_add_input_source(struct sigmatel_spec *spec)
 {
 	struct snd_kcontrol_new *knew;
@@ -3253,7 +3319,9 @@ static int stac92xx_auto_create_multi_out_ctls(struct hda_codec *codec,
 					       const struct auto_pin_cfg *cfg)
 {
 	struct sigmatel_spec *spec = codec->spec;
+	hda_nid_t nid;
 	int err;
+	int idx;
 
 	err = create_multi_out_ctls(codec, cfg->line_outs, cfg->line_out_pins,
 				    spec->multiout.dac_nids,
@@ -3270,20 +3338,13 @@ static int stac92xx_auto_create_multi_out_ctls(struct hda_codec *codec,
 			return err;
 	}
 
-	if (spec->line_switch) {
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_IO_SWITCH,
-					   "Line In as Output Switch",
-					   spec->line_switch << 8);
-		if (err < 0)
-			return err;
-	}
-
-	if (spec->mic_switch) {
-		err = stac92xx_add_control(spec, STAC_CTL_WIDGET_DC_BIAS,
-					   "Mic Jack Mode",
-					   spec->mic_switch);
-		if (err < 0)
-			return err;
+	for (idx = AUTO_PIN_MIC; idx <= AUTO_PIN_FRONT_LINE; idx++) {
+		nid = cfg->input_pins[idx];
+		if (nid) {
+			err = stac92xx_add_jack_mode_control(codec, nid, idx);
+			if (err < 0)
+				return err;
+		}
 	}
 
 	return 0;
@@ -4193,7 +4254,7 @@ static int stac92xx_init(struct hda_codec *codec)
 			unsigned int pinctl, conf;
 			if (i == AUTO_PIN_MIC || i == AUTO_PIN_FRONT_MIC) {
 				/* for mic pins, force to initialize */
-				pinctl = stac92xx_get_vref(codec, nid);
+				pinctl = stac92xx_get_default_vref(codec, nid);
 				pinctl |= AC_PINCTL_IN_EN;
 				stac92xx_auto_set_pinctl(codec, nid, pinctl);
 			} else {
-- 
cgit v0.10.2


From 7863d3f7aeae05099a38693a0a7eb7bdc7b2ab05 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 1 Jun 2009 19:38:41 +0900
Subject: sh: Tidy up the optional L2 probing, wire it up for SH7786.

This tidies up the L2 probing, as it may or may not be implemented on a
CPU, regardless of whether it is supported. This converts the cvr
validity checks from BUG_ON()'s to simply clearing the CPU_HAS_L2_CACHE
flag and moving on with life.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index 7d821ce..28a2f0d 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -134,7 +134,7 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.icache.ways = 4;
 		boot_cpu_data.dcache.ways = 4;
 		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-			CPU_HAS_LLSC | CPU_HAS_PTEAEX;
+			CPU_HAS_LLSC | CPU_HAS_PTEAEX | CPU_HAS_L2_CACHE;
 		break;
 	case 0x3008:
 		boot_cpu_data.icache.ways = 4;
@@ -228,43 +228,48 @@ int __init detect_cpu_and_cache_system(void)
 	}
 
 	/*
-	 * Setup the L2 cache desc
-	 *
 	 * SH-4A's have an optional PIPT L2.
 	 */
 	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
-		/* Bug if we can't decode the L2 info */
-		BUG_ON(!(cvr & 0xf));
-
-		/* Silicon and specifications have clearly never met.. */
-		cvr ^= 0xf;
-
 		/*
-		 * Size calculation is much more sensible
-		 * than it is for the L1.
-		 *
-		 * Sizes are 128KB, 258KB, 512KB, and 1MB.
+		 * Verify that it really has something hooked up, this
+		 * is the safety net for CPUs that have optional L2
+		 * support yet do not implement it.
 		 */
-		size = (cvr & 0xf) << 17;
+		if ((cvr & 0xf) == 0)
+			boot_cpu_data.flags &= ~CPU_HAS_L2_CACHE;
+		else {
+			/*
+			 * Silicon and specifications have clearly never
+			 * met..
+			 */
+			cvr ^= 0xf;
 
-		BUG_ON(!size);
+			/*
+			 * Size calculation is much more sensible
+			 * than it is for the L1.
+			 *
+			 * Sizes are 128KB, 258KB, 512KB, and 1MB.
+			 */
+			size = (cvr & 0xf) << 17;
 
-		boot_cpu_data.scache.way_incr		= (1 << 16);
-		boot_cpu_data.scache.entry_shift	= 5;
-		boot_cpu_data.scache.ways		= 4;
-		boot_cpu_data.scache.linesz		= L1_CACHE_BYTES;
+			boot_cpu_data.scache.way_incr		= (1 << 16);
+			boot_cpu_data.scache.entry_shift	= 5;
+			boot_cpu_data.scache.ways		= 4;
+			boot_cpu_data.scache.linesz		= L1_CACHE_BYTES;
 
-		boot_cpu_data.scache.entry_mask	=
-			(boot_cpu_data.scache.way_incr -
-			 boot_cpu_data.scache.linesz);
+			boot_cpu_data.scache.entry_mask	=
+				(boot_cpu_data.scache.way_incr -
+				 boot_cpu_data.scache.linesz);
 
-		boot_cpu_data.scache.sets	= size /
-			(boot_cpu_data.scache.linesz *
-			 boot_cpu_data.scache.ways);
+			boot_cpu_data.scache.sets	= size /
+				(boot_cpu_data.scache.linesz *
+				 boot_cpu_data.scache.ways);
 
-		boot_cpu_data.scache.way_size	=
-			(boot_cpu_data.scache.sets *
-			 boot_cpu_data.scache.linesz);
+			boot_cpu_data.scache.way_size	=
+				(boot_cpu_data.scache.sets *
+				 boot_cpu_data.scache.linesz);
+		}
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 0bf8513ed0df64b38edce63411d4b7b368464f47 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 1 Jun 2009 19:50:08 +0900
Subject: sh: Tidy up SH-4A boot_cpu_data.flags probing.

This tidies up the boot_cpu_data.flags probing on SH-4A. All of them have
a few things in common, which we can blindly set, rather than having each
subtype have to set the same flags. We can also make assumptions about
cache ways and the validity of PTEA, so this also kills off CPU_HAS_PTEA
as a config option. There was also a bug in the FPU probing, which is now
tidied up.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c815975..e9392aa 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -174,7 +174,6 @@ config CPU_SH4
 	bool
 	select CPU_HAS_INTEVT
 	select CPU_HAS_SR_RB
-	select CPU_HAS_PTEA if !CPU_SH4A || CPU_SHX2
 	select CPU_HAS_FPU if !CPU_SH4AL_DSP
 	select SYS_SUPPORTS_TMU
 
diff --git a/arch/sh/Kconfig.cpu b/arch/sh/Kconfig.cpu
index 9eb1712..cd6e3ea 100644
--- a/arch/sh/Kconfig.cpu
+++ b/arch/sh/Kconfig.cpu
@@ -96,9 +96,6 @@ config CPU_HAS_SR_RB
 	  See <file:Documentation/sh/register-banks.txt> for further
 	  information on SR.RB and register banking in the kernel in general.
 
-config CPU_HAS_PTEA
-	bool
-
 config CPU_HAS_PTEAEX
 	bool
 
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index 28a2f0d..6c78d0a 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -60,12 +60,18 @@ int __init detect_cpu_and_cache_system(void)
 		if ((cvr & 0x10000000) == 0)
 			boot_cpu_data.flags |= CPU_HAS_DSP;
 
-		boot_cpu_data.flags |= CPU_HAS_LLSC;
+		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_PERF_COUNTER;
 		boot_cpu_data.cut_major = pvr & 0x7f;
+
+		boot_cpu_data.icache.ways = 4;
+		boot_cpu_data.dcache.ways = 4;
+	} else {
+		/* And some SH-4 defaults.. */
+		boot_cpu_data.flags |= CPU_HAS_PTEA;
 	}
 
 	/* FPU detection works for everyone */
-	if ((cvr & 0x20000000) == 1)
+	if ((cvr & 0x20000000))
 		boot_cpu_data.flags |= CPU_HAS_FPU;
 
 	/* Mask off the upper chip ID */
@@ -78,25 +84,20 @@ int __init detect_cpu_and_cache_system(void)
 	switch (pvr) {
 	case 0x205:
 		boot_cpu_data.type = CPU_SH7750;
-		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
-				   CPU_HAS_PERF_COUNTER;
+		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG |
+				       CPU_HAS_PERF_COUNTER;
 		break;
 	case 0x206:
 		boot_cpu_data.type = CPU_SH7750S;
-		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
-				   CPU_HAS_PERF_COUNTER;
+		boot_cpu_data.flags |= CPU_HAS_P2_FLUSH_BUG |
+				       CPU_HAS_PERF_COUNTER;
 		break;
 	case 0x1100:
 		boot_cpu_data.type = CPU_SH7751;
-		boot_cpu_data.flags |= CPU_HAS_FPU;
 		break;
 	case 0x2001:
 	case 0x2004:
 		boot_cpu_data.type = CPU_SH7770;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_LLSC;
 		break;
 	case 0x2006:
 	case 0x200A:
@@ -107,45 +108,26 @@ int __init detect_cpu_and_cache_system(void)
 		else
 			boot_cpu_data.type = CPU_SH7780;
 
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-				   CPU_HAS_LLSC;
 		break;
 	case 0x3000:
 	case 0x3003:
 	case 0x3009:
 		boot_cpu_data.type = CPU_SH7343;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_LLSC;
 		break;
 	case 0x3004:
 	case 0x3007:
 		boot_cpu_data.type = CPU_SH7785;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-					  CPU_HAS_LLSC;
 		break;
 	case 0x4004:
 		boot_cpu_data.type = CPU_SH7786;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-			CPU_HAS_LLSC | CPU_HAS_PTEAEX | CPU_HAS_L2_CACHE;
+		boot_cpu_data.flags |= CPU_HAS_PTEAEX | CPU_HAS_L2_CACHE;
 		break;
 	case 0x3008:
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_LLSC;
-
 		switch (prr) {
 		case 0x50:
 		case 0x51:
 			boot_cpu_data.type = CPU_SH7723;
-			boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_L2_CACHE;
+			boot_cpu_data.flags |= CPU_HAS_L2_CACHE;
 			break;
 		case 0x70:
 			boot_cpu_data.type = CPU_SH7366;
@@ -158,17 +140,11 @@ int __init detect_cpu_and_cache_system(void)
 		break;
 	case 0x300b:
 		boot_cpu_data.type = CPU_SH7724;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_FPU | CPU_HAS_L2_CACHE;
+		boot_cpu_data.flags |= CPU_HAS_L2_CACHE;
 		break;
 	case 0x4000:	/* 1st cut */
 	case 0x4001:	/* 2nd cut */
 		boot_cpu_data.type = CPU_SHX3;
-		boot_cpu_data.icache.ways = 4;
-		boot_cpu_data.dcache.ways = 4;
-		boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER |
-					  CPU_HAS_LLSC;
 		break;
 	case 0x700:
 		boot_cpu_data.type = CPU_SH4_501;
@@ -179,7 +155,6 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.type = CPU_SH4_202;
 		boot_cpu_data.icache.ways = 2;
 		boot_cpu_data.dcache.ways = 2;
-		boot_cpu_data.flags |= CPU_HAS_FPU;
 		break;
 	case 0x500 ... 0x501:
 		switch (prr) {
@@ -197,18 +172,12 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.icache.ways = 2;
 		boot_cpu_data.dcache.ways = 2;
 
-		boot_cpu_data.flags |= CPU_HAS_FPU;
-
 		break;
 	default:
 		boot_cpu_data.type = CPU_SH_NONE;
 		break;
 	}
 
-#ifdef CONFIG_CPU_HAS_PTEA
-	boot_cpu_data.flags |= CPU_HAS_PTEA;
-#endif
-
 	/*
 	 * On anything that's not a direct-mapped cache, look to the CVR
 	 * for I/D-cache specifics.
-- 
cgit v0.10.2


From 39d8bbedb9571a89d638f5b05358f26ab503d7a6 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 1 Jun 2009 13:46:49 +0200
Subject: hwmon: (lm78) Add missing __devexit_p()

The remove function uses __devexit, so the .remove assignment needs
__devexit_p() to fix a build error with hotplug disabled.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index b5e3b28..a1787fd 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -182,7 +182,7 @@ static struct platform_driver lm78_isa_driver = {
 		.name	= "lm78",
 	},
 	.probe		= lm78_isa_probe,
-	.remove		= lm78_isa_remove,
+	.remove		= __devexit_p(lm78_isa_remove),
 };
 
 
-- 
cgit v0.10.2


From d54d462472a16fc07adb53a2fcd6c0c2a9a8dd1d Mon Sep 17 00:00:00 2001
From: Christian Engelmayer <Christian.Engelmayer@frequentis.com>
Date: Mon, 1 Jun 2009 13:46:50 +0200
Subject: hwmon: Update documentation on fan_max

Add fan_max description.

Add fan limit alarm 'max_alarm' to the alarm section.

Signed-off-by: Christian Engelmayer <christian.engelmayer@frequentis.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>

diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 2f10ce6..004ee16 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -150,6 +150,11 @@ fan[1-*]_min	Fan minimum value
 		Unit: revolution/min (RPM)
 		RW
 
+fan[1-*]_max	Fan maximum value
+		Unit: revolution/min (RPM)
+		Only rarely supported by the hardware.
+		RW
+
 fan[1-*]_input	Fan input value.
 		Unit: revolution/min (RPM)
 		RO
@@ -390,6 +395,7 @@ OR
 in[0-*]_min_alarm
 in[0-*]_max_alarm
 fan[1-*]_min_alarm
+fan[1-*]_max_alarm
 temp[1-*]_min_alarm
 temp[1-*]_max_alarm
 temp[1-*]_crit_alarm
-- 
cgit v0.10.2


From 0e4835c198e7dd9d814de25a17a1d6682107c394 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Mon, 25 May 2009 16:44:20 +0200
Subject: ALSA: hda-intel: improve initialization for ALC262_HP_BPC model

Fix issues for 3 generations of HP workstations.

The modest modifications do the following:
1. Change the second MIC from device 3 to device 1
2. Init the "boost" values to "0" by default

From: John Brown <john.brown3@hp.com>
Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index add920a..d0786e9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10639,31 +10639,46 @@ static struct hda_verb alc262_HP_BPC_init_verbs[] = {
 	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
 	{0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
 
-	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x7023 },
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
         {0x19, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
-	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0x7023 },
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 	{0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 	{0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000 },
 
 
 	/* FIXME: use matrix-type input source selection */
-	/* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 16, 17, 0b */
-	/* Input mixer1: unmute Mic, F-Mic, Line, CD inputs */
+	/* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 0b, 12 */
+	/* Input mixer1: only unmute Mic */
 	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x05 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x06 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x07 << 8))},
+	{0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x08 << 8))},
 	/* Input mixer2 */
 	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x05 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x06 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x07 << 8))},
+	{0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x08 << 8))},
 	/* Input mixer3 */
 	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x05 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x06 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x07 << 8))},
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x08 << 8))},
 
 	{0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
 
-- 
cgit v0.10.2


From 22a4f650d686eeaac3629dae1c4294381485efdf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 1 Jun 2009 10:13:37 +0200
Subject: perf_counter: Tidy up style details

 - whitespace fixlets
 - make local variable definitions more consistent

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 81ec79c..0e57d8c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -562,7 +562,7 @@ struct perf_cpu_context {
 	 *
 	 * task, softirq, irq, nmi context
 	 */
-	int			recursion[4];
+	int				recursion[4];
 };
 
 #ifdef CONFIG_PERF_COUNTERS
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ff8b463..df319c4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -16,8 +16,9 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/sysfs.h>
-#include <linux/ptrace.h>
+#include <linux/dcache.h>
 #include <linux/percpu.h>
+#include <linux/ptrace.h>
 #include <linux/vmstat.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -26,7 +27,6 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
-#include <linux/dcache.h>
 
 #include <asm/irq_regs.h>
 
@@ -65,7 +65,9 @@ void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
-int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
+
+int __weak
+hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu)
 {
@@ -127,8 +129,8 @@ static void put_ctx(struct perf_counter_context *ctx)
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
-static struct perf_counter_context *perf_lock_task_context(
-				struct task_struct *task, unsigned long *flags)
+static struct perf_counter_context *
+perf_lock_task_context(struct task_struct *task, unsigned long *flags)
 {
 	struct perf_counter_context *ctx;
 
@@ -1330,9 +1332,9 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 
 static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 {
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
 	struct perf_counter_context *parent_ctx;
+	struct perf_counter_context *ctx;
+	struct perf_cpu_context *cpuctx;
 	struct task_struct *task;
 	unsigned long flags;
 	int err;
@@ -1664,8 +1666,8 @@ int perf_counter_task_disable(void)
  */
 void perf_counter_update_userpage(struct perf_counter *counter)
 {
-	struct perf_mmap_data *data;
 	struct perf_counter_mmap_page *userpg;
+	struct perf_mmap_data *data;
 
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
@@ -1769,10 +1771,11 @@ fail:
 
 static void __perf_mmap_data_free(struct rcu_head *rcu_head)
 {
-	struct perf_mmap_data *data = container_of(rcu_head,
-			struct perf_mmap_data, rcu_head);
+	struct perf_mmap_data *data;
 	int i;
 
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+
 	free_page((unsigned long)data->user_page);
 	for (i = 0; i < data->nr_pages; i++)
 		free_page((unsigned long)data->data_pages[i]);
@@ -1801,8 +1804,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	struct perf_counter *counter = vma->vm_file->private_data;
 
 	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
-				      &counter->mmap_mutex)) {
+	if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) {
 		struct user_struct *user = current_user();
 
 		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
@@ -1821,11 +1823,11 @@ static struct vm_operations_struct perf_mmap_vmops = {
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
+	unsigned long locked, lock_limit;
 	unsigned long vma_size;
 	unsigned long nr_pages;
-	unsigned long user_locked, user_lock_limit;
-	unsigned long locked, lock_limit;
 	long user_extra, extra;
 	int ret = 0;
 
@@ -1900,8 +1902,8 @@ unlock:
 
 static int perf_fasync(int fd, struct file *filp, int on)
 {
-	struct perf_counter *counter = filp->private_data;
 	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct perf_counter *counter = filp->private_data;
 	int retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -2412,8 +2414,8 @@ static void perf_counter_output(struct perf_counter *counter,
  */
 
 struct perf_comm_event {
-	struct task_struct 	*task;
-	char 			*comm;
+	struct task_struct	*task;
+	char			*comm;
 	int			comm_size;
 
 	struct {
@@ -2932,6 +2934,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 			       int nmi, struct pt_regs *regs, u64 addr)
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
+
 	if (counter->hw.irq_period && !neg)
 		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
@@ -3526,7 +3529,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Make the child state follow the state of the parent counter,
 	 * not its hw_event.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en,dis}able_family.
+	 * so we won't race with perf_counter_{en, dis}able_family.
 	 */
 	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-- 
cgit v0.10.2


From 229c4eedcedcdadf70411120ba34bc37554a74bd Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 28 May 2009 16:28:53 +0200
Subject: perf_counter tools: Guard against record damaging existing files

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 23d1224..96bfb7c 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -340,7 +340,7 @@ static int __cmd_record(int argc, const char **argv)
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
 
-	output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
+	output = open(output_name, O_CREAT|O_EXCL|O_RDWR, S_IRWXU);
 	if (output < 0) {
 		perror("failed to create output file");
 		exit(-1);
-- 
cgit v0.10.2


From 4156e735d3abde8e9243b5d22f7999dd3fffab2e Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Mon, 1 Jun 2009 13:13:24 -0500
Subject: xfs: prevent deadlock in xfs_qm_shake()

It's possible to recurse into filesystem from the memory
allocation, which deadlocks in xfs_qm_shake(). Add check
for __GFP_FS, and bail out if it is not set.

Signed-off-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Hedi Berriche <hedi@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index af6843c..179cbd6 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
-	return (gfp_mask & __GFP_WAIT) != 0;
+	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
 }
 
 #endif /* __XFS_SUPPORT_KMEM_H__ */
-- 
cgit v0.10.2


From eaf1ac8bb58888e0773c0b81dfedb9d7c0123a1d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Date: Mon, 1 Jun 2009 14:06:40 +0300
Subject: ASoC: TWL4030: Check the interface format for 4 channel mode

In addition to the operating mode check, also check the
codec's interface format in case of four channel mode.
If the codec is not in TDM (DSP_A) mode, return with error.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index df474a5..c53c7ca 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -1608,9 +1608,15 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream,
 
 	 /* If the substream has 4 channel, do the necessary setup */
 	if (params_channels(params) == 4) {
-		/* Safety check: are we in the correct operating mode? */
-		if ((twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) &
-			TWL4030_OPTION_1))
+		u8 format, mode;
+
+		format = twl4030_read_reg_cache(codec, TWL4030_REG_AUDIO_IF);
+		mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE);
+
+		/* Safety check: are we in the correct operating mode and
+		 * the interface is in TDM mode? */
+		if ((mode & TWL4030_OPTION_1) &&
+		    ((format & TWL4030_AIF_FORMAT) == TWL4030_AIF_FORMAT_TDM))
 			twl4030_tdm_enable(codec, substream->stream, 1);
 		else
 			return -EINVAL;
-- 
cgit v0.10.2


From 92b9de8342c6ad0d930333851190ed25b88b190c Mon Sep 17 00:00:00 2001
From: Kacper Szczesniak <kacper@qwe.pl>
Date: Tue, 2 Jun 2009 00:55:19 +0200
Subject: ALSA: hda - Macbook[Pro] 5 6ch support

this is a patch against current snapshot that adds:
6 channels support for the MB5 model

Signed-off-by: Kacper Szczesniak <kacper@qwe.pl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d0786e9..12d6015 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6266,6 +6266,34 @@ static struct hda_channel_mode alc885_mbp_6ch_modes[2] = {
 	{ 6, alc885_mbp_ch6_init },
 };
 
+/*
+ * 2ch
+ * Speakers/Woofer/HP = Front
+ * LineIn = Input
+ */
+static struct hda_verb alc885_mb5_ch2_init[] = {
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+	{ } /* end */
+};
+
+/*
+ * 6ch mode
+ * Speakers/HP = Front
+ * Woofer = LFE
+ * LineIn = Surround
+ */
+static struct hda_verb alc885_mb5_ch6_init[] = {
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x01},
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc885_mb5_6ch_modes[2] = {
+	{ 2, alc885_mb5_ch2_init },
+	{ 6, alc885_mb5_ch6_init },
+};
 
 /* Pin assignment: Front=0x14, Rear=0x15, CLFE=0x16, Side=0x17
  *                 Mic=0x18, Front Mic=0x19, Line-In=0x1a, HP=0x1b
@@ -6310,10 +6338,14 @@ static struct snd_kcontrol_new alc885_mbp3_mixer[] = {
 };
 
 static struct snd_kcontrol_new alc885_mb5_mixer[] = {
-	HDA_CODEC_VOLUME("Front Playback Volume", 0x0d, 0x00, HDA_OUTPUT),
-	HDA_BIND_MUTE   ("Front Playback Switch", 0x0d, 0x02, HDA_INPUT),
-	HDA_CODEC_VOLUME("Line-Out Playback Volume", 0x0c, 0x00, HDA_OUTPUT),
-	HDA_BIND_MUTE   ("Line-Out Playback Switch", 0x0c, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("Front Playback Switch", 0x0c, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("Surround Playback Switch", 0x0d, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("LFE Playback Volume", 0x0e, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("LFE Playback Switch", 0x0e, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("HP Playback Volume", 0x0f, 0x00, HDA_OUTPUT),
+	HDA_BIND_MUTE   ("HP Playback Switch", 0x0f, 0x02, HDA_INPUT),
 	HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
 	HDA_CODEC_MUTE  ("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
@@ -6322,6 +6354,7 @@ static struct snd_kcontrol_new alc885_mb5_mixer[] = {
 	HDA_CODEC_VOLUME("Mic Boost", 0x19, 0x00, HDA_INPUT),
 	{ } /* end */
 };
+
 static struct snd_kcontrol_new alc882_w2jc_mixer[] = {
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
@@ -6551,22 +6584,39 @@ static struct hda_verb alc882_macpro_init_verbs[] = {
 
 /* Macbook 5,1 */
 static struct hda_verb alc885_mb5_init_verbs[] = {
+	/* DACs */
+	{0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	/* Front mixer */
-	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
-	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
-	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
-	/* LineOut mixer */
 	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
 	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
 	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
-	/* Front Pin: output 0 (0x0d) */
+	/* Surround mixer */
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* LFE mixer */
+	{0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* HP mixer */
+	{0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+	{0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	/* Front Pin (0x0c) */
 	{0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x01},
 	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
-	{0x18, AC_VERB_SET_CONNECT_SEL, 0x01},
-	/* HP Pin: output 0 (0x0c) */
+	{0x18, AC_VERB_SET_CONNECT_SEL, 0x00},
+	/* LFE Pin (0x0e) */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | 0x01},
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x02},
+	/* HP Pin (0x0f) */
 	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
 	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
-	{0x14, AC_VERB_SET_CONNECT_SEL, 0x00},
+	{0x14, AC_VERB_SET_CONNECT_SEL, 0x03},
 	/* Front Mic pin: input vref at 80% */
 	{0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
 	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
@@ -6986,13 +7036,13 @@ static struct alc_config_preset alc882_presets[] = {
 		.init_hook = alc885_mbp3_init_hook,
 	},
 	[ALC885_MB5] = {
-		.mixers = { alc885_mb5_mixer },
+		.mixers = { alc885_mb5_mixer, alc882_chmode_mixer },
 		.init_verbs = { alc885_mb5_init_verbs,
 				alc880_gpio1_init_verbs },
 		.num_dacs = ARRAY_SIZE(alc882_dac_nids),
 		.dac_nids = alc882_dac_nids,
-		.channel_mode = alc885_mbp_6ch_modes,
-		.num_channel_mode = ARRAY_SIZE(alc885_mbp_6ch_modes),
+		.channel_mode = alc885_mb5_6ch_modes,
+		.num_channel_mode = ARRAY_SIZE(alc885_mb5_6ch_modes),
 		.input_mux = &mb5_capture_source,
 		.dig_out_nid = ALC882_DIGOUT_NID,
 		.dig_in_nid = ALC882_DIGIN_NID,
-- 
cgit v0.10.2


From d22142aa1b23d64d01f87a94b5756ff6cbd1022f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= <ozan@pardus.org.tr>
Date: Mon, 1 Jun 2009 23:13:17 +0300
Subject: ALSA: hda - fix audio on LG R510
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, LG R510 is only able to produce sound on headphones, the
internal speakers are not working.

The user tested and confirmed that with model=Dell headphones,
internal speakers and the microphone are working flawlessly.

Tested-by: Serdar Soytetir <tulliana@gmail.com>
Signed-off-by: Ozan Çağlayan <ozan@pardus.org.tr>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 12d6015..56c2f84 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -12111,6 +12111,7 @@ static struct snd_pci_quirk alc268_cfg_tbl[] = {
 						ALC268_ACER_ASPIRE_ONE),
 	SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL),
 	SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL),
+	SND_PCI_QUIRK(0x1854, 0x1775, "LG R510", ALC268_DELL),
 	SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
-- 
cgit v0.10.2


From 8871e5b91518a47284b6bc2603b44dbc79c85446 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 01:02:50 +0200
Subject: ALSA: hda - Reorder and clean-up ALC268 quirk table

Rearrange alc268_cfg_tbl[] in the order of vendor id, and group some
entries using SND_PCI_QUIRK_MASK().

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 56c2f84..ad6cc42 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -12111,17 +12111,16 @@ static struct snd_pci_quirk alc268_cfg_tbl[] = {
 						ALC268_ACER_ASPIRE_ONE),
 	SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL),
 	SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL),
-	SND_PCI_QUIRK(0x1854, 0x1775, "LG R510", ALC268_DELL),
-	SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA),
-	SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA),
+	SND_PCI_QUIRK_MASK(0x103c, 0xff00, 0x3000, "HP TX25xx series",
+			   ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
-	SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA),
-	SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA),
-	SND_PCI_QUIRK(0x1179, 0xff64, "TOSHIBA L305", ALC268_TOSHIBA),
+	SND_PCI_QUIRK(0x1170, 0x0040, "ZEPTO", ALC268_ZEPTO),
+	SND_PCI_QUIRK_MASK(0x1179, 0xff00, 0xff00, "TOSHIBA A/Lx05",
+			   ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x14c0, 0x0025, "COMPAL IFL90/JFL-92", ALC268_TOSHIBA),
 	SND_PCI_QUIRK(0x152d, 0x0763, "Diverse (CPR2000)", ALC268_ACER),
 	SND_PCI_QUIRK(0x152d, 0x0771, "Quanta IL1", ALC267_QUANTA_IL1),
-	SND_PCI_QUIRK(0x1170, 0x0040, "ZEPTO", ALC268_ZEPTO),
+	SND_PCI_QUIRK(0x1854, 0x1775, "LG R510", ALC268_DELL),
 	{}
 };
 
-- 
cgit v0.10.2


From fb39125fd79a25c5002f3b45cf4c80e3fa6b961b Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 17 Apr 2009 15:15:51 +0800
Subject: ftrace, workqueuetrace: make workqueue tracepoints use TRACE_EVENT
 macro

v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format
    introduced by Steven Rostedt: consolidate trace and trace_event headers
v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap
    the work addr
v1: zhaolei@cn.fujitsu.com: Make workqueue tracepoints use TRACE_EVENT macro

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to the tracepoints:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Then, this patch converts DEFINE_TRACE to TRACE_EVENT in workqueue related
tracepoints.

[ Impact: expand workqueue tracer to events tracing ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 0000000..035f1bf
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,100 @@
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(int,	cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644
index 7626523..0000000
--- a/include/trace/workqueue.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TRACE_WORKQUEUE_H
-#define __TRACE_WORKQUEUE_H
-
-#include <linux/tracepoint.h>
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
-
-#endif /* __TRACE_WORKQUEUE_H */
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 984b917..cfe56d3 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -6,7 +6,7 @@
  */
 
 
-#include <trace/workqueue.h>
+#include <trace/events/workqueue.h>
 #include <linux/list.h>
 #include <linux/percpu.h>
 #include "trace_stat.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f71fb2a..0668795 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,7 +33,8 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#include <trace/workqueue.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-DEFINE_TRACE(workqueue_insertion);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
@@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
-
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 	return cwq;
 }
 
-DEFINE_TRACE(workqueue_creation);
-
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-DEFINE_TRACE(workqueue_destruction);
-
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
-- 
cgit v0.10.2


From 1fdfca9c577aac96a559c1ea68f5c9156f17d636 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Mon, 20 Apr 2009 14:58:26 +0800
Subject: trace_workqueue: use list_for_each_entry() instead of
 list_for_each_entry_safe()

No need to use list_for_each_entry_safe() in iteration without deleting
any node, we can use list_for_each_entry() instead.

[ Impact: cleanup ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index cfe56d3..128b64b 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -47,12 +47,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			atomic_inc(&node->inserted);
 			goto found;
@@ -69,12 +68,11 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
 			goto found;
-- 
cgit v0.10.2


From b8867164f05791a6b5363bd51c1274e03600886e Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Mon, 20 Apr 2009 14:59:36 +0800
Subject: trace_workqueue: remove cpu_workqueue_stats->first_entry

cpu_workqueue_stats->first_entry is useless because we can retrieve the
header of a cpu workqueue using:
if (&cpu_workqueue_stats->list == workqueue_cpu_stat(cpu)->list.next)

[ Impact: cleanup ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 128b64b..890974a 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -16,8 +16,6 @@
 /* A cpu workqueue thread */
 struct cpu_workqueue_stats {
 	struct list_head            list;
-/* Useful to know if we print the cpu headers */
-	bool		            first_entry;
 	int		            cpu;
 	pid_t			    pid;
 /* Can be inserted from interrupt or user context, need to be atomic */
@@ -103,8 +101,6 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 	cws->pid = wq_thread->pid;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (list_empty(&workqueue_cpu_stat(cpu)->list))
-		cws->first_entry = true;
 	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
-- 
cgit v0.10.2


From f3c4ae26e93d354152196b62797ba86ad86dd0cc Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Mon, 20 Apr 2009 15:02:17 +0800
Subject: trace_workqueue: remove blank line between each cpu

The blankline between each cpu's workqueue stat is not necessary, because
the cpu number is enough to part them by eye.
Old style also caused a blankline below headline, and made code complex
by using lock, disableirq and get cpu var.

Old style:
 # CPU  INSERTED  EXECUTED   NAME
 # |      |         |          |

   0   8644       8644       events/0
   0      0          0       cpuset
   ...
   0      1          1       kdmflush

   1  35365      35365       events/1
   ...

New style:
 # CPU  INSERTED  EXECUTED   NAME
 # |      |         |          |

   0   8644       8644       events/0
   0      0          0       cpuset
   ...
   0      1          1       kdmflush
   1  35365      35365       events/1
   ...

[ Impact: provide more readable code ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 890974a..97fcea4 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -185,16 +185,9 @@ static void *workqueue_stat_next(void *prev, int idx)
 static int workqueue_stat_show(struct seq_file *s, void *p)
 {
 	struct cpu_workqueue_stats *cws = p;
-	unsigned long flags;
-	int cpu = cws->cpu;
 	struct pid *pid;
 	struct task_struct *tsk;
 
-	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
-		seq_printf(s, "\n");
-	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-
 	pid = find_get_pid(cws->pid);
 	if (pid) {
 		tsk = get_pid_task(pid, PIDTYPE_PID);
-- 
cgit v0.10.2


From 0d64f8342de26d02451900b1aad94716fe92c4ab Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 May 2009 05:58:49 +0200
Subject: tracing/stat: replace trace_stat_session by stat_session

The "trace" prefix in struct trace_stat_session type is annoying while
reading the trace_stat.c file. It makes the lines longer, and
is not that much useful to explain the sense of this type.

Just keep "struct stat_session" for this type.

[ Impact: make the code a bit more readable ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index fdde3a4..3b6816b 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -22,7 +22,7 @@ struct trace_stat_list {
 };
 
 /* A stat session is the stats output in one file */
-struct tracer_stat_session {
+struct stat_session {
 	struct list_head	session_list;
 	struct tracer_stat	*ts;
 	struct list_head	stat_list;
@@ -38,7 +38,7 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
 static struct dentry		*stat_dir;
 
 
-static void reset_stat_session(struct tracer_stat_session *session)
+static void reset_stat_session(struct stat_session *session)
 {
 	struct trace_stat_list *node, *next;
 
@@ -48,7 +48,7 @@ static void reset_stat_session(struct tracer_stat_session *session)
 	INIT_LIST_HEAD(&session->stat_list);
 }
 
-static void destroy_session(struct tracer_stat_session *session)
+static void destroy_session(struct stat_session *session)
 {
 	debugfs_remove(session->file);
 	reset_stat_session(session);
@@ -71,7 +71,7 @@ static int dummy_cmp(void *p1, void *p2)
  * All of these copies and sorting are required on all opening
  * since the stats could have changed between two file sessions.
  */
-static int stat_seq_init(struct tracer_stat_session *session)
+static int stat_seq_init(struct stat_session *session)
 {
 	struct trace_stat_list *iter_entry, *new_entry;
 	struct tracer_stat *ts = session->ts;
@@ -154,7 +154,7 @@ exit_free_list:
 
 static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
 
 	/* Prevent from tracer switch or stat_list modification */
 	mutex_lock(&session->stat_mutex);
@@ -168,7 +168,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 
 static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
 
 	if (p == SEQ_START_TOKEN)
 		return seq_list_start(&session->stat_list, *pos);
@@ -178,13 +178,13 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
 
 static void stat_seq_stop(struct seq_file *s, void *p)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
 	mutex_unlock(&session->stat_mutex);
 }
 
 static int stat_seq_show(struct seq_file *s, void *v)
 {
-	struct tracer_stat_session *session = s->private;
+	struct stat_session *session = s->private;
 	struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
 
 	if (v == SEQ_START_TOKEN)
@@ -205,7 +205,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
 {
 	int ret;
 
-	struct tracer_stat_session *session = inode->i_private;
+	struct stat_session *session = inode->i_private;
 
 	ret = seq_open(file, &trace_stat_seq_ops);
 	if (!ret) {
@@ -222,7 +222,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
  */
 static int tracing_stat_release(struct inode *i, struct file *f)
 {
-	struct tracer_stat_session *session = i->i_private;
+	struct stat_session *session = i->i_private;
 
 	mutex_lock(&session->stat_mutex);
 	reset_stat_session(session);
@@ -251,7 +251,7 @@ static int tracing_stat_init(void)
 	return 0;
 }
 
-static int init_stat_file(struct tracer_stat_session *session)
+static int init_stat_file(struct stat_session *session)
 {
 	if (!stat_dir && tracing_stat_init())
 		return -ENODEV;
@@ -266,7 +266,7 @@ static int init_stat_file(struct tracer_stat_session *session)
 
 int register_stat_tracer(struct tracer_stat *trace)
 {
-	struct tracer_stat_session *session, *node, *tmp;
+	struct stat_session *session, *node, *tmp;
 	int ret;
 
 	if (!trace)
@@ -286,7 +286,7 @@ int register_stat_tracer(struct tracer_stat *trace)
 	mutex_unlock(&all_stat_sessions_mutex);
 
 	/* Init the session */
-	session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
+	session = kmalloc(sizeof(struct stat_session), GFP_KERNEL);
 	if (!session)
 		return -ENOMEM;
 
@@ -312,7 +312,7 @@ int register_stat_tracer(struct tracer_stat *trace)
 
 void unregister_stat_tracer(struct tracer_stat *trace)
 {
-	struct tracer_stat_session *node, *tmp;
+	struct stat_session *node, *tmp;
 
 	mutex_lock(&all_stat_sessions_mutex);
 	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
-- 
cgit v0.10.2


From 8f184f27300f66f6dcc8296c2dae7a1fbe8429c9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 May 2009 06:24:36 +0200
Subject: tracing/stat: replace linked list by an rbtree for sorting

When the stat tracing framework prepares the entries from a tracer
to output them to the user, it starts by computing a linear sort
through a linked list to give the entries ordered by relevance
to the user.

This is quite ugly and causes a small latency when we begin to
read the file.

This patch changes that by turning the linked list into a red-black
tree. Athough the whole iteration using the start and next tracer
callbacks while opening the file remain the same, it is now much
more fast and scalable.

The rbtree guarantees O(log(n)) insertions whereas a linked
list with linear sorting brought us a O(n) despair. Now the
(visible) latency has disapeared.

[ Impact: kill the latency while starting to read a stat tracer file ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 3b6816b..0bd0fc8 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -1,7 +1,7 @@
 /*
  * Infrastructure for statistic tracing (histogram output).
  *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
  *
  * Based on the code from trace_branch.c which is
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
@@ -10,14 +10,19 @@
 
 
 #include <linux/list.h>
+#include <linux/rbtree.h>
 #include <linux/debugfs.h>
 #include "trace_stat.h"
 #include "trace.h"
 
 
-/* List of stat entries from a tracer */
-struct trace_stat_list {
-	struct list_head	list;
+/*
+ * List of stat red-black nodes from a tracer
+ * We use a such tree to sort quickly the stat
+ * entries from the tracer.
+ */
+struct stat_node {
+	struct rb_node		node;
 	void			*stat;
 };
 
@@ -25,7 +30,7 @@ struct trace_stat_list {
 struct stat_session {
 	struct list_head	session_list;
 	struct tracer_stat	*ts;
-	struct list_head	stat_list;
+	struct rb_root		stat_root;
 	struct mutex		stat_mutex;
 	struct dentry		*file;
 };
@@ -37,15 +42,45 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
 /* The root directory for all stat files */
 static struct dentry		*stat_dir;
 
+/*
+ * Iterate through the rbtree using a post order traversal path
+ * to release the next node.
+ * It won't necessary release one at each iteration
+ * but it will at least advance closer to the next one
+ * to be released.
+ */
+static struct rb_node *release_next(struct rb_node *node)
+{
+	struct stat_node *snode;
+	struct rb_node *parent = rb_parent(node);
+
+	if (node->rb_left)
+		return node->rb_left;
+	else if (node->rb_right)
+		return node->rb_right;
+	else {
+		if (!parent)
+			return NULL;
+		if (parent->rb_left == node)
+			parent->rb_left = NULL;
+		else
+			parent->rb_right = NULL;
+
+		snode = container_of(node, struct stat_node, node);
+		kfree(snode);
+
+		return parent;
+	}
+}
 
 static void reset_stat_session(struct stat_session *session)
 {
-	struct trace_stat_list *node, *next;
+	struct rb_node *node = session->stat_root.rb_node;
 
-	list_for_each_entry_safe(node, next, &session->stat_list, list)
-		kfree(node);
+	while (node)
+		node = release_next(node);
 
-	INIT_LIST_HEAD(&session->stat_list);
+	session->stat_root = RB_ROOT;
 }
 
 static void destroy_session(struct stat_session *session)
@@ -56,6 +91,35 @@ static void destroy_session(struct stat_session *session)
 	kfree(session);
 }
 
+typedef int (*cmp_stat_t)(void *, void *);
+
+static void
+insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/*
+	 * Figure out where to put new node
+	 * This is a descendent sorting
+	 */
+	while (*new) {
+		struct stat_node *this;
+		int result;
+
+		this = container_of(*new, struct stat_node, node);
+		result = cmp(data->stat, this->stat);
+
+		parent = *new;
+		if (result >= 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
 /*
  * For tracers that don't provide a stat_cmp callback.
  * This one will force an immediate insertion on tail of
@@ -73,8 +137,9 @@ static int dummy_cmp(void *p1, void *p2)
  */
 static int stat_seq_init(struct stat_session *session)
 {
-	struct trace_stat_list *iter_entry, *new_entry;
 	struct tracer_stat *ts = session->ts;
+	struct stat_node *new_entry;
+	struct rb_root *root;
 	void *stat;
 	int ret = 0;
 	int i;
@@ -93,15 +158,13 @@ static int stat_seq_init(struct stat_session *session)
 	 * The first entry. Actually this is the second, but the first
 	 * one (the stat_list head) is pointless.
 	 */
-	new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+	new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
 	if (!new_entry) {
 		ret = -ENOMEM;
 		goto exit;
 	}
-
-	INIT_LIST_HEAD(&new_entry->list);
-
-	list_add(&new_entry->list, &session->stat_list);
+	root = &session->stat_root;
+	insert_stat(root, new_entry, dummy_cmp);
 
 	new_entry->stat = stat;
 
@@ -116,31 +179,17 @@ static int stat_seq_init(struct stat_session *session)
 		if (!stat)
 			break;
 
-		new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
 		if (!new_entry) {
 			ret = -ENOMEM;
 			goto exit_free_list;
 		}
 
-		INIT_LIST_HEAD(&new_entry->list);
 		new_entry->stat = stat;
 
-		list_for_each_entry_reverse(iter_entry, &session->stat_list,
-				list) {
-
-			/* Insertion with a descendent sorting */
-			if (ts->stat_cmp(iter_entry->stat,
-					new_entry->stat) >= 0) {
-
-				list_add(&new_entry->list, &iter_entry->list);
-				break;
-			}
-		}
-
-		/* The current larger value */
-		if (list_empty(&new_entry->list))
-			list_add(&new_entry->list, &session->stat_list);
+		insert_stat(root, new_entry, ts->stat_cmp);
 	}
+
 exit:
 	mutex_unlock(&session->stat_mutex);
 	return ret;
@@ -155,25 +204,38 @@ exit_free_list:
 static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 {
 	struct stat_session *session = s->private;
+	struct rb_node *node;
+	int i;
 
 	/* Prevent from tracer switch or stat_list modification */
 	mutex_lock(&session->stat_mutex);
 
 	/* If we are in the beginning of the file, print the headers */
-	if (!*pos && session->ts->stat_headers)
+	if (!*pos && session->ts->stat_headers) {
+		(*pos)++;
 		return SEQ_START_TOKEN;
+	}
 
-	return seq_list_start(&session->stat_list, *pos);
+	node = rb_first(&session->stat_root);
+	for (i = 0; node && i < *pos; i++)
+		node = rb_next(node);
+
+	(*pos)++;
+
+	return node;
 }
 
 static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
 {
 	struct stat_session *session = s->private;
+	struct rb_node *node = p;
+
+	(*pos)++;
 
 	if (p == SEQ_START_TOKEN)
-		return seq_list_start(&session->stat_list, *pos);
+		return rb_first(&session->stat_root);
 
-	return seq_list_next(p, &session->stat_list, pos);
+	return rb_next(node);
 }
 
 static void stat_seq_stop(struct seq_file *s, void *p)
@@ -185,7 +247,7 @@ static void stat_seq_stop(struct seq_file *s, void *p)
 static int stat_seq_show(struct seq_file *s, void *v)
 {
 	struct stat_session *session = s->private;
-	struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+	struct stat_node *l = container_of(v, struct stat_node, node);
 
 	if (v == SEQ_START_TOKEN)
 		return session->ts->stat_headers(s);
@@ -286,15 +348,13 @@ int register_stat_tracer(struct tracer_stat *trace)
 	mutex_unlock(&all_stat_sessions_mutex);
 
 	/* Init the session */
-	session = kmalloc(sizeof(struct stat_session), GFP_KERNEL);
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
 	if (!session)
 		return -ENOMEM;
 
 	session->ts = trace;
 	INIT_LIST_HEAD(&session->session_list);
-	INIT_LIST_HEAD(&session->stat_list);
 	mutex_init(&session->stat_mutex);
-	session->file = NULL;
 
 	ret = init_stat_file(session);
 	if (ret) {
-- 
cgit v0.10.2


From b3dd7ba7d862707800c7ac45068f14ade2b65155 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 27 May 2009 11:04:26 +0800
Subject: tracing/stat: change dummpy_cmp() to return -1

Currently the output of trace_stat/workqueues is totally reversed:

 # cat /debug/tracing/trace_stat/workqueues
    ...
    1       17       17      210       37   `-blk_unplug_work+0x0/0x57
    1     3779     3779      181       11   |-cfq_kick_queue+0x0/0x2f
    1     3796     3796                     kblockd/1:120
    ...

The correct output should be:

    1     3796     3796                     kblockd/1:120
    1     3779     3779      181       11   |-cfq_kick_queue+0x0/0x2f
    1       17       17      210       37   `-blk_unplug_work+0x0/0x57

It's caused by "tracing/stat: replace linked list by an rbtree for
sorting"
(53059c9b67a62a3dc8c80204d3da42b9267ea5a0).

dummpy_cmp() should return -1, so rb_node will always be inserted as
right-most node in the rbtree, thus we sort the output in ascending
order.

[ Impact: fix the output of trace_stat/workqueues ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 0bd0fc8..5816d1a 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -127,7 +127,7 @@ insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp)
  */
 static int dummy_cmp(void *p1, void *p2)
 {
-	return 1;
+	return -1;
 }
 
 /*
-- 
cgit v0.10.2


From e16228069083a2f6b94383ac5739aea7a0f38ce4 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 27 May 2009 11:04:48 +0800
Subject: tracing/stat: remember to free root node

When closing a trace_stat file, we destroy the rbtree constructed during
file open, but there is memory leak that the root node is not freed.

[ Impact: fix memory leak when closing a trace_stat file ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 5816d1a..8030ec9 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -60,8 +60,8 @@ static struct rb_node *release_next(struct rb_node *node)
 		return node->rb_right;
 	else {
 		if (!parent)
-			return NULL;
-		if (parent->rb_left == node)
+			;
+		else if (parent->rb_left == node)
 			parent->rb_left = NULL;
 		else
 			parent->rb_right = NULL;
-- 
cgit v0.10.2


From dbd3fbdfeecfad4e71139db05d72560c3583e2a9 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 27 May 2009 11:42:46 +0800
Subject: tracing/stat: do some cleanups

- remove duplicate code in stat_seq_init()
- update comments to reflect the change from stat list to stat rbtree

[ Impact: clean up ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 8030ec9..17f20eb 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -93,10 +93,15 @@ static void destroy_session(struct stat_session *session)
 
 typedef int (*cmp_stat_t)(void *, void *);
 
-static void
-insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp)
+static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
 {
 	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct stat_node *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->stat = stat;
 
 	/*
 	 * Figure out where to put new node
@@ -118,12 +123,13 @@ insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp)
 
 	rb_link_node(&data->node, parent, new);
 	rb_insert_color(&data->node, root);
+	return 0;
 }
 
 /*
  * For tracers that don't provide a stat_cmp callback.
- * This one will force an immediate insertion on tail of
- * the list.
+ * This one will force an insertion as right-most node
+ * in the rbtree.
  */
 static int dummy_cmp(void *p1, void *p2)
 {
@@ -131,15 +137,14 @@ static int dummy_cmp(void *p1, void *p2)
 }
 
 /*
- * Initialize the stat list at each trace_stat file opening.
+ * Initialize the stat rbtree at each trace_stat file opening.
  * All of these copies and sorting are required on all opening
  * since the stats could have changed between two file sessions.
  */
 static int stat_seq_init(struct stat_session *session)
 {
 	struct tracer_stat *ts = session->ts;
-	struct stat_node *new_entry;
-	struct rb_root *root;
+	struct rb_root *root = &session->stat_root;
 	void *stat;
 	int ret = 0;
 	int i;
@@ -154,23 +159,12 @@ static int stat_seq_init(struct stat_session *session)
 	if (!stat)
 		goto exit;
 
-	/*
-	 * The first entry. Actually this is the second, but the first
-	 * one (the stat_list head) is pointless.
-	 */
-	new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
-	if (!new_entry) {
-		ret = -ENOMEM;
+	ret = insert_stat(root, stat, ts->stat_cmp);
+	if (ret)
 		goto exit;
-	}
-	root = &session->stat_root;
-	insert_stat(root, new_entry, dummy_cmp);
-
-	new_entry->stat = stat;
 
 	/*
-	 * Iterate over the tracer stat entries and store them in a sorted
-	 * list.
+	 * Iterate over the tracer stat entries and store them in an rbtree.
 	 */
 	for (i = 1; ; i++) {
 		stat = ts->stat_next(stat, i);
@@ -179,22 +173,16 @@ static int stat_seq_init(struct stat_session *session)
 		if (!stat)
 			break;
 
-		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
-		if (!new_entry) {
-			ret = -ENOMEM;
-			goto exit_free_list;
-		}
-
-		new_entry->stat = stat;
-
-		insert_stat(root, new_entry, ts->stat_cmp);
+		ret = insert_stat(root, stat, ts->stat_cmp);
+		if (ret)
+			goto exit_free_rbtree;
 	}
 
 exit:
 	mutex_unlock(&session->stat_mutex);
 	return ret;
 
-exit_free_list:
+exit_free_rbtree:
 	reset_stat_session(session);
 	mutex_unlock(&session->stat_mutex);
 	return ret;
@@ -207,7 +195,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 	struct rb_node *node;
 	int i;
 
-	/* Prevent from tracer switch or stat_list modification */
+	/* Prevent from tracer switch or rbtree modification */
 	mutex_lock(&session->stat_mutex);
 
 	/* If we are in the beginning of the file, print the headers */
@@ -280,7 +268,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
 }
 
 /*
- * Avoid consuming memory with our now useless list.
+ * Avoid consuming memory with our now useless rbtree.
  */
 static int tracing_stat_release(struct inode *i, struct file *f)
 {
-- 
cgit v0.10.2


From 43bd1236234cacbc18d1476a9b57e7a306efddf5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 30 May 2009 04:25:30 +0200
Subject: tracing/stat: remove unappropriate safe walk on list

register_stat_tracer() uses list_for_each_entry_safe
to check whether a tracer is already present in the list.
But we don't delete anything from the list here, so
we don't need the safe version

[ Impact: cleanup list use is stat tracing ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 17f20eb..c006437 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -316,7 +316,7 @@ static int init_stat_file(struct stat_session *session)
 
 int register_stat_tracer(struct tracer_stat *trace)
 {
-	struct stat_session *session, *node, *tmp;
+	struct stat_session *session, *node;
 	int ret;
 
 	if (!trace)
@@ -327,7 +327,7 @@ int register_stat_tracer(struct tracer_stat *trace)
 
 	/* Already registered? */
 	mutex_lock(&all_stat_sessions_mutex);
-	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+	list_for_each_entry(node, &all_stat_sessions, session_list) {
 		if (node->ts == trace) {
 			mutex_unlock(&all_stat_sessions_mutex);
 			return -EINVAL;
-- 
cgit v0.10.2


From 8dd783304e6d0f7c2830365d63f75f08aa343e10 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 01:16:07 +0200
Subject: ALSA: hda - Add codec bus reset and verb-retry at critical errors

Some machines machine cause a severe CORB/RIRB stall in certain
weird conditions, such as PA access at the start up together with
fglrx driver.  This seems unable to be recovered without the controller
reset.

This patch allows the bus controller reset at critical errors so
that the communication gets recovered again.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index d1d5fb9..aa0e1c1 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -165,28 +165,29 @@ static int codec_exec_verb(struct hda_codec *codec, unsigned int cmd,
 			   unsigned int *res)
 {
 	struct hda_bus *bus = codec->bus;
-	int err, repeated = 0;
+	int err;
 
 	if (res)
 		*res = -1;
+ again:
 	snd_hda_power_up(codec);
 	mutex_lock(&bus->cmd_mutex);
- again:
 	err = bus->ops.command(bus, cmd);
-	if (!err) {
-		if (res) {
-			*res = bus->ops.get_response(bus);
-			if (*res == -1 && bus->rirb_error) {
-				if (repeated++ < 1) {
-					snd_printd(KERN_WARNING "hda_codec: "
-					   "Trying verb 0x%08x again\n", cmd);
-					goto again;
-				}
-			}
-		}
-	}
+	if (!err && res)
+		*res = bus->ops.get_response(bus);
 	mutex_unlock(&bus->cmd_mutex);
 	snd_hda_power_down(codec);
+	if (res && *res == -1 && bus->rirb_error) {
+		if (bus->response_reset) {
+			snd_printd("hda_codec: resetting BUS due to "
+				   "fatal communication error\n");
+			bus->ops.bus_reset(bus);
+		}
+		goto again;
+	}
+	/* clear reset-flag when the communication gets recovered */
+	if (!err)
+		bus->response_reset = 0;
 	return err;
 }
 
@@ -3894,11 +3895,10 @@ EXPORT_SYMBOL_HDA(auto_pin_cfg_labels);
 /**
  * snd_hda_suspend - suspend the codecs
  * @bus: the HDA bus
- * @state: suspsend state
  *
  * Returns 0 if successful.
  */
-int snd_hda_suspend(struct hda_bus *bus, pm_message_t state)
+int snd_hda_suspend(struct hda_bus *bus)
 {
 	struct hda_codec *codec;
 
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index c5bd40f..f5fa5d1 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -574,6 +574,8 @@ struct hda_bus_ops {
 	/* attach a PCM stream */
 	int (*attach_pcm)(struct hda_bus *bus, struct hda_codec *codec,
 			  struct hda_pcm *pcm);
+	/* reset bus for retry verb */
+	void (*bus_reset)(struct hda_bus *bus);
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	/* notify power-up/down from codec to controller */
 	void (*pm_notify)(struct hda_bus *bus);
@@ -624,6 +626,8 @@ struct hda_bus {
 	unsigned int needs_damn_long_delay :1;
 	unsigned int shutdown :1;	/* being unloaded */
 	unsigned int rirb_error:1;	/* error in codec communication */
+	unsigned int response_reset:1;	/* controller was reset */
+	unsigned int in_reset:1;	/* during reset operation */
 };
 
 /*
@@ -907,7 +911,7 @@ void snd_hda_get_codec_name(struct hda_codec *codec, char *name, int namelen);
  * power management
  */
 #ifdef CONFIG_PM
-int snd_hda_suspend(struct hda_bus *bus, pm_message_t state);
+int snd_hda_suspend(struct hda_bus *bus);
 int snd_hda_resume(struct hda_bus *bus);
 #endif
 
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index b063d0e..44f9a0a 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -661,14 +661,23 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 		return -1;
 	}
 
-	snd_printk(KERN_ERR SFX "azx_get_response timeout (ERROR): "
-		   "last cmd=0x%08x\n", chip->last_cmd);
-	/* re-initialize CORB/RIRB */
-	spin_lock_irq(&chip->reg_lock);
+	/* a fatal communication error; need either to reset or to fallback
+	 * to the single_cmd mode
+	 */
 	bus->rirb_error = 1;
+	if (!bus->response_reset && !bus->in_reset) {
+		bus->response_reset = 1;
+		return -1; /* give a chance to retry */
+	}
+
+	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, "
+		   "switching to single_cmd mode: last cmd=0x%08x\n",
+		   chip->last_cmd);
+	chip->single_cmd = 1;
+	bus->response_reset = 0;
+	/* re-initialize CORB/RIRB */
 	azx_free_cmd_io(chip);
 	azx_init_cmd_io(chip);
-	spin_unlock_irq(&chip->reg_lock);
 	return -1;
 }
 
@@ -709,6 +718,7 @@ static int azx_single_send_cmd(struct hda_bus *bus, u32 val)
 	struct azx *chip = bus->private_data;
 	int timeout = 50;
 
+	bus->rirb_error = 0;
 	while (timeout--) {
 		/* check ICB busy bit */
 		if (!((azx_readw(chip, IRS) & ICH6_IRS_BUSY))) {
@@ -1247,6 +1257,23 @@ static int azx_attach_pcm_stream(struct hda_bus *bus, struct hda_codec *codec,
 				 struct hda_pcm *cpcm);
 static void azx_stop_chip(struct azx *chip);
 
+static void azx_bus_reset(struct hda_bus *bus)
+{
+	struct azx *chip = bus->private_data;
+	int i;
+
+	bus->in_reset = 1;
+	azx_stop_chip(chip);
+	azx_init_chip(chip);
+	if (chip->initialized) {
+		for (i = 0; i < AZX_MAX_PCMS; i++)
+			snd_pcm_suspend_all(chip->pcm[i]);
+		snd_hda_suspend(chip->bus);
+		snd_hda_resume(chip->bus);
+	}
+	bus->in_reset = 0;
+}
+
 /*
  * Codec initialization
  */
@@ -1270,6 +1297,7 @@ static int __devinit azx_codec_create(struct azx *chip, const char *model,
 	bus_temp.ops.command = azx_send_cmd;
 	bus_temp.ops.get_response = azx_get_response;
 	bus_temp.ops.attach_pcm = azx_attach_pcm_stream;
+	bus_temp.ops.bus_reset = azx_bus_reset;
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	bus_temp.power_save = &power_save;
 	bus_temp.ops.pm_notify = azx_power_notify;
@@ -1997,7 +2025,7 @@ static int azx_suspend(struct pci_dev *pci, pm_message_t state)
 	for (i = 0; i < AZX_MAX_PCMS; i++)
 		snd_pcm_suspend_all(chip->pcm[i]);
 	if (chip->initialized)
-		snd_hda_suspend(chip->bus, state);
+		snd_hda_suspend(chip->bus);
 	azx_stop_chip(chip);
 	if (chip->irq >= 0) {
 		free_irq(chip->irq, chip);
-- 
cgit v0.10.2


From b20f3b834673be9ead83a3c6f07fa3881d1a990f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 01:20:22 +0200
Subject: ALSA: hda - Limit codec-verb retry to limited hardwares

The reset of a BUS controller during operations is somehow risky and
shouldn't be done inevitably for devices that have apparently no such
codec-communication problems.

This patch adds the check of the hardware and limits the bus-reset
capability.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index aa0e1c1..562403a 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -214,11 +214,6 @@ unsigned int snd_hda_codec_read(struct hda_codec *codec, hda_nid_t nid,
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_read);
 
-/* Define the below to send and receive verbs synchronously.
- * If you often get any codec communication errors, this is worth to try.
- */
-/* #define SND_HDA_SUPPORT_SYNC_WRITE */
-
 /**
  * snd_hda_codec_write - send a single command without waiting for response
  * @codec: the HDA codec
@@ -235,12 +230,9 @@ int snd_hda_codec_write(struct hda_codec *codec, hda_nid_t nid, int direct,
 			 unsigned int verb, unsigned int parm)
 {
 	unsigned int cmd = make_codec_cmd(codec, nid, direct, verb, parm);
-#ifdef SND_HDA_SUPPORT_SYNC_WRITE
 	unsigned int res;
-	return codec_exec_verb(codec, cmd, &res);
-#else
-	return codec_exec_verb(codec, cmd, NULL);
-#endif
+	return codec_exec_verb(codec, cmd,
+			       codec->bus->sync_write ? &res : NULL);
 }
 EXPORT_SYMBOL_HDA(snd_hda_codec_write);
 
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index f5fa5d1..cad79ef 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -624,6 +624,9 @@ struct hda_bus {
 
 	/* misc op flags */
 	unsigned int needs_damn_long_delay :1;
+	unsigned int allow_bus_reset:1;	/* allow bus reset at fatal error */
+	unsigned int sync_write:1;	/* sync after verb write */
+	/* status for codec/controller */
 	unsigned int shutdown :1;	/* being unloaded */
 	unsigned int rirb_error:1;	/* error in codec communication */
 	unsigned int response_reset:1;	/* controller was reset */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 44f9a0a..9f44645 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -665,7 +665,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus)
 	 * to the single_cmd mode
 	 */
 	bus->rirb_error = 1;
-	if (!bus->response_reset && !bus->in_reset) {
+	if (bus->allow_bus_reset && !bus->response_reset && !bus->in_reset) {
 		bus->response_reset = 1;
 		return -1; /* give a chance to retry */
 	}
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 48f4a36..42f944b 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -5375,6 +5375,15 @@ again:
 	if (get_wcaps(codec, 0xa) & AC_WCAP_IN_AMP)
 		snd_hda_sequence_write_cache(codec, unmute_init);
 
+	/* Some HP machines seem to have unstable codec communications
+	 * especially with ATI fglrx driver.  For recovering from the
+	 * CORB/RIRB stall, allow the BUS reset and keep always sync
+	 */
+	if (spec->board_config == STAC_HP_DV5) {
+		codec->bus->sync_write = 1;
+		codec->bus->allow_bus_reset = 1;
+	}
+
 	spec->aloopback_ctl = stac92hd71bxx_loopback;
 	spec->aloopback_mask = 0x50;
 	spec->aloopback_shift = 0;
-- 
cgit v0.10.2


From 58f892e022e88438183c48661dcdc6a2997dab99 Mon Sep 17 00:00:00 2001
From: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Date: Tue, 26 May 2009 21:48:07 +0000
Subject: x86: Print real IOAPIC version for x86-64

Fix the fact that the IOAPIC version number in the x86_64 code path always
gets assigned to 0, instead of the correct value.

Before the patch: (from "dmesg" output):

 ACPI: IOAPIC (id[0x08] address[0xfec00000] gsi_base[0])
 IOAPIC[0]: apic_id 8, version 0, address 0xfec00000, GSI 0-23     <---

 After the patch:
 ACPI: IOAPIC (id[0x08] address[0xfec00000] gsi_base[0])
 IOAPIC[0]: apic_id 8, version 32, address 0xfec00000, GSI 0-23    <---

History:

io_apic_get_version() was compiled out of the x86_64 code path in the commit
f2c2cca3acef8b253a36381d9b469ad4fb08563a:

Author: Andi Kleen <ak@suse.de>
Date:   Tue Sep 26 10:52:37 2006 +0200

    [PATCH] Remove APIC version/cpu capability mpparse checking/printing

    ACPI went to great trouble to get the APIC version and CPU capabilities
    of different CPUs before passing them to the mpparser. But all
    that data was used was to print it out.  Actually it even faked some data
    based on the boot cpu, not on the actual CPU being booted.

    Remove all this code because it's not needed.

    Cc: len.brown@intel.com

At the time, the IOAPIC version number was deliberately not printed
in the x86_64 code path. However, after the x86 and x86_64 files were
merged, the net result is that the IOAPIC version is printed incorrectly
in the x86_64 code path.

The patch below provides a fix. I have tested it with acpi, and with
acpi=off, and did not see any problems.

Signed-off-by: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
LKML-Reference: <20090416014230.4885.94926.sendpatchset@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
*************************

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 844e5e2..6310861 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -985,11 +985,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
 	mp_ioapics[idx].apicid = uniq_ioapic_id(id);
-#ifdef CONFIG_X86_32
 	mp_ioapics[idx].apicver = io_apic_get_version(idx);
-#else
-	mp_ioapics[idx].apicver = 0;
-#endif
+
 	/*
 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ac7f3b6..f712f8f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4012,6 +4012,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
 	return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -4024,7 +4025,6 @@ int __init io_apic_get_version(int ioapic)
 
 	return reg_01.bits.version;
 }
-#endif
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
-- 
cgit v0.10.2


From 3d58829b0510244596079c1d2f1762c53aef2e97 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 28 May 2009 09:54:47 +0200
Subject: x86, apic: Restore irqs on fail paths

lapic_resume forgets to restore interrupts on fail paths.
Fix that.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <1243497289-18591-1-git-send-email-jirislaby@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b0fd264..e82488d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2027,7 +2027,7 @@ static int lapic_resume(struct sys_device *dev)
 	unsigned int l, h;
 	unsigned long flags;
 	int maxlvt;
-	int ret;
+	int ret = 0;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
 	if (!apic_pm_state.active)
@@ -2038,14 +2038,15 @@ static int lapic_resume(struct sys_device *dev)
 		ioapic_entries = alloc_ioapic_entries();
 		if (!ioapic_entries) {
 			WARN(1, "Alloc ioapic_entries in lapic resume failed.");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto restore;
 		}
 
 		ret = save_IO_APIC_setup(ioapic_entries);
 		if (ret) {
 			WARN(1, "Saving IO-APIC state failed: %d\n", ret);
 			free_ioapic_entries(ioapic_entries);
-			return ret;
+			goto restore;
 		}
 
 		mask_IO_APIC_setup(ioapic_entries);
@@ -2097,10 +2098,10 @@ static int lapic_resume(struct sys_device *dev)
 		restore_IO_APIC_setup(ioapic_entries);
 		free_ioapic_entries(ioapic_entries);
 	}
-
+restore:
 	local_irq_restore(flags);
 
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v0.10.2


From 60e59f68824102c87e64c5f51c4e57c0b8a61e46 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sun, 24 May 2009 20:34:10 +0000
Subject: powerpc/pmac: Update PowerMac 32-bit defconfig

This mostly adds back AppleTouch support and adds CONFIG_HIGHMEM
by default.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 5339bb4..ea8870a 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28-rc3
-# Tue Nov 11 19:36:51 2008
+# Linux kernel version: 2.6.30-rc7
+# Mon May 25 14:53:25 2009
 #
 # CONFIG_PPC64 is not set
 
@@ -14,6 +14,7 @@ CONFIG_6xx=y
 # CONFIG_40x is not set
 # CONFIG_44x is not set
 # CONFIG_E200 is not set
+CONFIG_PPC_BOOK3S=y
 CONFIG_PPC_FPU=y
 CONFIG_ALTIVEC=y
 CONFIG_PPC_STD_MMU=y
@@ -43,7 +44,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_PPC=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_GENERIC_NVRAM=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
 CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_PPC_OF=y
 CONFIG_OF=y
@@ -52,12 +53,14 @@ CONFIG_OF=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_GENERIC_BUG=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_DTC=y
 # CONFIG_DEFAULT_UIMAGE is not set
 CONFIG_HIBERNATE_32=y
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
 # CONFIG_PPC_DCR_NATIVE is not set
 # CONFIG_PPC_DCR_MMIO is not set
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
@@ -72,14 +75,24 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CGROUPS is not set
 # CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 # CONFIG_RELAY is not set
@@ -88,23 +101,27 @@ CONFIG_NAMESPACES=y
 # CONFIG_IPC_NS is not set
 # CONFIG_USER_NS is not set
 # CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -114,10 +131,12 @@ CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
 # CONFIG_MARKERS is not set
 CONFIG_OPROFILE=y
 CONFIG_HAVE_OPROFILE=y
@@ -127,10 +146,10 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+# CONFIG_SLOW_WORK is not set
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
 # CONFIG_MODULE_FORCE_LOAD is not set
@@ -138,11 +157,8 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
 CONFIG_BLOCK=y
 CONFIG_LBD=y
-# CONFIG_BLK_DEV_IO_TRACE is not set
-CONFIG_LSF=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -158,14 +174,11 @@ CONFIG_DEFAULT_AS=y
 # CONFIG_DEFAULT_CFQ is not set
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="anticipatory"
-CONFIG_CLASSIC_RCU=y
 CONFIG_FREEZER=y
 
 #
 # Platform support
 #
-CONFIG_PPC_MULTIPLATFORM=y
-CONFIG_CLASSIC32=y
 # CONFIG_PPC_CHRP is not set
 # CONFIG_MPC5121_ADS is not set
 # CONFIG_MPC5121_GENERIC is not set
@@ -178,7 +191,9 @@ CONFIG_PPC_PMAC=y
 # CONFIG_PPC_83xx is not set
 # CONFIG_PPC_86xx is not set
 # CONFIG_EMBEDDED6xx is not set
+# CONFIG_AMIGAONE is not set
 CONFIG_PPC_NATIVE=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
 # CONFIG_IPIC is not set
 CONFIG_MPIC=y
 # CONFIG_MPIC_WEIRD is not set
@@ -212,11 +227,12 @@ CONFIG_CPU_FREQ_PMAC=y
 CONFIG_PPC601_SYNC_FIX=y
 # CONFIG_TAU is not set
 # CONFIG_FSL_ULI1575 is not set
+# CONFIG_SIMPLE_GPIO is not set
 
 #
 # Kernel options
 #
-# CONFIG_HIGHMEM is not set
+CONFIG_HIGHMEM=y
 CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -239,6 +255,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 CONFIG_ARCH_HAS_WALK_MEMORY=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
 # CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
 CONFIG_ARCH_FLATMEM_ENABLE=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -250,12 +267,17 @@ CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_MIGRATION is not set
-# CONFIG_RESOURCES_64BIT is not set
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_PPC_4K_PAGES=y
+# CONFIG_PPC_16K_PAGES is not set
+# CONFIG_PPC_64K_PAGES is not set
+# CONFIG_PPC_256K_PAGES is not set
 CONFIG_FORCE_MAX_ZONEORDER=11
 CONFIG_PROC_DEVICETREE=y
 # CONFIG_CMDLINE_BOOL is not set
@@ -288,6 +310,8 @@ CONFIG_ARCH_SUPPORTS_MSI=y
 # CONFIG_PCI_MSI is not set
 # CONFIG_PCI_LEGACY is not set
 # CONFIG_PCI_DEBUG is not set
+# CONFIG_PCI_STUB is not set
+# CONFIG_PCI_IOV is not set
 CONFIG_PCCARD=m
 # CONFIG_PCMCIA_DEBUG is not set
 CONFIG_PCMCIA=m
@@ -397,6 +421,8 @@ CONFIG_NETFILTER_XTABLES=m
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
 # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
 # CONFIG_NETFILTER_XT_TARGET_DSCP is not set
+CONFIG_NETFILTER_XT_TARGET_HL=m
+# CONFIG_NETFILTER_XT_TARGET_LED is not set
 CONFIG_NETFILTER_XT_TARGET_MARK=m
 CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
@@ -405,6 +431,7 @@ CONFIG_NETFILTER_XT_TARGET_RATEEST=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set
 CONFIG_NETFILTER_XT_MATCH_COMMENT=m
 # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set
 CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
@@ -415,6 +442,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
 CONFIG_NETFILTER_XT_MATCH_ESP=m
 # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set
 CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_HL=m
 CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
 CONFIG_NETFILTER_XT_MATCH_LENGTH=m
 CONFIG_NETFILTER_XT_MATCH_LIMIT=m
@@ -478,17 +506,15 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_IP_DCCP=m
 CONFIG_INET_DCCP_DIAG=m
-CONFIG_IP_DCCP_ACKVEC=y
 
 #
 # DCCP CCIDs Configuration (EXPERIMENTAL)
 #
-CONFIG_IP_DCCP_CCID2=m
 # CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=m
+CONFIG_IP_DCCP_CCID3=y
 # CONFIG_IP_DCCP_CCID3_DEBUG is not set
 CONFIG_IP_DCCP_CCID3_RTO=100
-CONFIG_IP_DCCP_TFRC_LIB=m
+CONFIG_IP_DCCP_TFRC_LIB=y
 
 #
 # DCCP Kernel Hacking
@@ -508,13 +534,16 @@ CONFIG_IP_DCCP_TFRC_LIB=m
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 # CONFIG_NET_SCHED is not set
 CONFIG_NET_CLS_ROUTE=y
+# CONFIG_DCB is not set
 
 #
 # Network testing
 #
 # CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_DROP_MONITOR is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_CAN is not set
 CONFIG_IRDA=m
@@ -577,8 +606,6 @@ CONFIG_BT_HIDP=m
 #
 # Bluetooth device drivers
 #
-CONFIG_BT_HCIUSB=m
-# CONFIG_BT_HCIUSB_SCO is not set
 # CONFIG_BT_HCIBTUSB is not set
 # CONFIG_BT_HCIUART is not set
 CONFIG_BT_HCIBCM203X=m
@@ -590,31 +617,27 @@ CONFIG_BT_HCIBFUSB=m
 # CONFIG_BT_HCIBTUART is not set
 # CONFIG_BT_HCIVHCI is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=m
-CONFIG_NL80211=y
+# CONFIG_CFG80211_REG_DEBUG is not set
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
+# CONFIG_LIB80211 is not set
 CONFIG_MAC80211=m
 
 #
 # Rate control algorithm selection
 #
-CONFIG_MAC80211_RC_PID=y
-# CONFIG_MAC80211_RC_MINSTREL is not set
-CONFIG_MAC80211_RC_DEFAULT_PID=y
-# CONFIG_MAC80211_RC_DEFAULT_MINSTREL is not set
-CONFIG_MAC80211_RC_DEFAULT="pid"
+CONFIG_MAC80211_RC_MINSTREL=y
+# CONFIG_MAC80211_RC_DEFAULT_PID is not set
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel"
 # CONFIG_MAC80211_MESH is not set
 CONFIG_MAC80211_LEDS=y
+# CONFIG_MAC80211_DEBUGFS is not set
 # CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_IEEE80211=m
-# CONFIG_IEEE80211_DEBUG is not set
-CONFIG_IEEE80211_CRYPT_WEP=m
-CONFIG_IEEE80211_CRYPT_CCMP=m
-CONFIG_IEEE80211_CRYPT_TKIP=m
+# CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
 
@@ -662,17 +685,27 @@ CONFIG_BLK_DEV_RAM_SIZE=4096
 # CONFIG_BLK_DEV_HD is not set
 CONFIG_MISC_DEVICES=y
 # CONFIG_PHANTOM is not set
-# CONFIG_EEPROM_93CX6 is not set
 # CONFIG_SGI_IOC4 is not set
 # CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
 CONFIG_HAVE_IDE=y
 CONFIG_IDE=y
 
 #
 # Please see Documentation/ide/ide.txt for help/info on IDE drives
 #
+CONFIG_IDE_XFER_MODE=y
 CONFIG_IDE_TIMINGS=y
 CONFIG_IDE_ATAPI=y
 # CONFIG_BLK_DEV_IDE_SATA is not set
@@ -684,7 +717,6 @@ CONFIG_BLK_DEV_IDECS=m
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y
 # CONFIG_BLK_DEV_IDETAPE is not set
-CONFIG_BLK_DEV_IDESCSI=y
 # CONFIG_IDE_TASK_IOCTL is not set
 CONFIG_IDE_PROC_FS=y
 
@@ -714,6 +746,7 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y
 # CONFIG_BLK_DEV_JMICRON is not set
 # CONFIG_BLK_DEV_SC1200 is not set
 # CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT8172 is not set
 # CONFIG_BLK_DEV_IT8213 is not set
 # CONFIG_BLK_DEV_IT821X is not set
 # CONFIG_BLK_DEV_NS87415 is not set
@@ -728,7 +761,6 @@ CONFIG_BLK_DEV_SL82C105=y
 # CONFIG_BLK_DEV_TC86C001 is not set
 CONFIG_BLK_DEV_IDE_PMAC=y
 CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
-CONFIG_BLK_DEV_IDEDMA_PMAC=y
 CONFIG_BLK_DEV_IDEDMA=y
 
 #
@@ -772,6 +804,7 @@ CONFIG_SCSI_FC_ATTRS=y
 # CONFIG_SCSI_SRP_ATTRS is not set
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
+# CONFIG_SCSI_CXGB3_ISCSI is not set
 # CONFIG_BLK_DEV_3W_XXXX_RAID is not set
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
@@ -791,8 +824,12 @@ CONFIG_SCSI_AIC7XXX_OLD=m
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
+# CONFIG_SCSI_MPT2SAS is not set
 # CONFIG_SCSI_HPTIOP is not set
 # CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
+# CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
@@ -822,6 +859,7 @@ CONFIG_SCSI_MAC53C94=y
 # CONFIG_SCSI_SRP is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 # CONFIG_ATA is not set
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=m
@@ -881,6 +919,7 @@ CONFIG_THERM_ADT746X=m
 # CONFIG_ANSLCD is not set
 CONFIG_PMAC_RACKMETER=m
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_DUMMY=m
 # CONFIG_BONDING is not set
 # CONFIG_MACVLAN is not set
@@ -898,6 +937,8 @@ CONFIG_BMAC=y
 CONFIG_SUNGEM=y
 # CONFIG_CASSINI is not set
 # CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 # CONFIG_NET_TULIP is not set
 # CONFIG_HP100 is not set
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
@@ -913,7 +954,6 @@ CONFIG_PCNET32=y
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_B44 is not set
 # CONFIG_FORCEDETH is not set
-# CONFIG_EEPRO100 is not set
 # CONFIG_E100 is not set
 # CONFIG_FEALNX is not set
 # CONFIG_NATSEMI is not set
@@ -923,6 +963,7 @@ CONFIG_PCNET32=y
 # CONFIG_R6040 is not set
 # CONFIG_SIS900 is not set
 # CONFIG_EPIC100 is not set
+# CONFIG_SMSC9420 is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
 # CONFIG_VIA_RHINE is not set
@@ -935,6 +976,7 @@ CONFIG_NETDEV_1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -945,18 +987,20 @@ CONFIG_NETDEV_1000=y
 # CONFIG_VIA_VELOCITY is not set
 # CONFIG_TIGON3 is not set
 # CONFIG_BNX2 is not set
-# CONFIG_MV643XX_ETH is not set
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_CHELSIO_T3 is not set
 # CONFIG_ENIC is not set
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -966,6 +1010,7 @@ CONFIG_NETDEV_10000=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 # CONFIG_TR is not set
 
 #
@@ -974,20 +1019,11 @@ CONFIG_NETDEV_10000=y
 # CONFIG_WLAN_PRE80211 is not set
 CONFIG_WLAN_80211=y
 # CONFIG_PCMCIA_RAYCS is not set
-# CONFIG_IPW2100 is not set
-# CONFIG_IPW2200 is not set
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-CONFIG_HERMES=m
-CONFIG_APPLE_AIRPORT=m
-# CONFIG_PLX_HERMES is not set
-# CONFIG_TMD_HERMES is not set
-# CONFIG_NORTEL_HERMES is not set
-CONFIG_PCI_HERMES=m
-CONFIG_PCMCIA_HERMES=m
-# CONFIG_PCMCIA_SPECTRUM is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 CONFIG_PRISM54=m
@@ -997,15 +1033,17 @@ CONFIG_PRISM54=m
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 CONFIG_P54_COMMON=m
 # CONFIG_P54_USB is not set
 # CONFIG_P54_PCI is not set
+CONFIG_P54_LEDS=y
 # CONFIG_ATH5K is not set
 # CONFIG_ATH9K is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_AR9170_USB is not set
+# CONFIG_IPW2100 is not set
+# CONFIG_IPW2200 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 CONFIG_B43=m
 CONFIG_B43_PCI_AUTOSELECT=y
@@ -1025,6 +1063,19 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
 # CONFIG_B43LEGACY_PIO_MODE is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+CONFIG_HERMES=m
+CONFIG_HERMES_CACHE_FW_ON_INIT=y
+CONFIG_APPLE_AIRPORT=m
+# CONFIG_PLX_HERMES is not set
+# CONFIG_TMD_HERMES is not set
+# CONFIG_NORTEL_HERMES is not set
+CONFIG_PCI_HERMES=m
+CONFIG_PCMCIA_HERMES=m
+# CONFIG_PCMCIA_SPECTRUM is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
 
 #
 # USB Network Adapters
@@ -1036,6 +1087,7 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
 CONFIG_USB_USBNET=m
 CONFIG_USB_NET_AX8817X=m
 CONFIG_USB_NET_CDCETHER=m
+# CONFIG_USB_NET_CDC_EEM is not set
 # CONFIG_USB_NET_DM9601 is not set
 # CONFIG_USB_NET_SMSC95XX is not set
 # CONFIG_USB_NET_GL620A is not set
@@ -1099,7 +1151,7 @@ CONFIG_INPUT_KEYBOARD=y
 CONFIG_INPUT_MOUSE=y
 # CONFIG_MOUSE_PS2 is not set
 # CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_APPLETOUCH is not set
+CONFIG_MOUSE_APPLETOUCH=y
 # CONFIG_MOUSE_BCM5974 is not set
 # CONFIG_MOUSE_VSXXXAA is not set
 # CONFIG_INPUT_JOYSTICK is not set
@@ -1150,10 +1202,13 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y
 # CONFIG_SERIAL_JSM is not set
 # CONFIG_SERIAL_OF_PLATFORM is not set
 CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_HVC_UDBG is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=m
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 CONFIG_NVRAM=y
 CONFIG_GEN_RTC=y
 # CONFIG_GEN_RTC_X is not set
@@ -1232,12 +1287,9 @@ CONFIG_I2C_POWERMAC=y
 # Miscellaneous I2C Chip support
 #
 # CONFIG_DS1682 is not set
-# CONFIG_EEPROM_AT24 is not set
-# CONFIG_EEPROM_LEGACY is not set
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1259,11 +1311,11 @@ CONFIG_BATTERY_PMU=y
 # CONFIG_THERMAL is not set
 # CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
 # Sonics Silicon Backplane
 #
-CONFIG_SSB_POSSIBLE=y
 CONFIG_SSB=m
 CONFIG_SSB_SPROM=y
 CONFIG_SSB_PCIHOST_POSSIBLE=y
@@ -1281,18 +1333,13 @@ CONFIG_SSB_DRIVER_PCICORE=y
 # CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
 # CONFIG_HTC_PASIC3 is not set
+# CONFIG_TWL4030_CORE is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
-
-#
-# Voltage and Current regulators
-#
+# CONFIG_MFD_PCF50633 is not set
 # CONFIG_REGULATOR is not set
-# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
-# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
-# CONFIG_REGULATOR_BQ24022 is not set
 
 #
 # Multimedia devices
@@ -1390,6 +1437,7 @@ CONFIG_FB_ATY_BACKLIGHT=y
 # CONFIG_FB_KYRO is not set
 CONFIG_FB_3DFX=y
 # CONFIG_FB_3DFX_ACCEL is not set
+CONFIG_FB_3DFX_I2C=y
 # CONFIG_FB_VOODOO1 is not set
 # CONFIG_FB_VT8623 is not set
 # CONFIG_FB_TRIDENT is not set
@@ -1399,12 +1447,14 @@ CONFIG_FB_3DFX=y
 # CONFIG_FB_IBM_GXT4500 is not set
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 CONFIG_LCD_CLASS_DEVICE=m
 # CONFIG_LCD_ILI9320 is not set
 # CONFIG_LCD_PLATFORM is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_BACKLIGHT_CORGI is not set
+CONFIG_BACKLIGHT_GENERIC=y
 
 #
 # Display device support
@@ -1444,11 +1494,13 @@ CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
 CONFIG_SND_PCM_OSS_PLUGINS=y
 CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_HRTIMER is not set
 # CONFIG_SND_DYNAMIC_MINORS is not set
 CONFIG_SND_SUPPORT_OLD_API=y
 CONFIG_SND_VERBOSE_PROCFS=y
 # CONFIG_SND_VERBOSE_PRINTK is not set
 # CONFIG_SND_DEBUG is not set
+CONFIG_SND_VMASTER=y
 CONFIG_SND_DRIVERS=y
 CONFIG_SND_DUMMY=m
 # CONFIG_SND_VIRMIDI is not set
@@ -1486,6 +1538,8 @@ CONFIG_SND_PCI=y
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1551,28 +1605,31 @@ CONFIG_USB_HID=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
-CONFIG_HID_BRIGHT=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
-CONFIG_HID_DELL=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
 CONFIG_HID_MICROSOFT=y
 CONFIG_HID_MONTEREY=y
+CONFIG_HID_NTRIG=y
 CONFIG_HID_PANTHERLORD=y
 # CONFIG_PANTHERLORD_FF is not set
 CONFIG_HID_PETALYNX=y
 CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
+# CONFIG_GREENASIA_FF is not set
+CONFIG_HID_TOPSEED=y
 # CONFIG_THRUSTMASTER_FF is not set
 # CONFIG_ZEROPLUS_FF is not set
 CONFIG_USB_SUPPORT=y
@@ -1603,6 +1660,7 @@ CONFIG_USB_EHCI_HCD=m
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
 # CONFIG_USB_EHCI_HCD_PPC_OF is not set
+# CONFIG_USB_OXU210HP_HCD is not set
 # CONFIG_USB_ISP116X_HCD is not set
 # CONFIG_USB_ISP1760_HCD is not set
 CONFIG_USB_OHCI_HCD=y
@@ -1625,24 +1683,23 @@ CONFIG_USB_PRINTER=m
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# may also be needed; see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=m
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
 # CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
 # CONFIG_USB_STORAGE_USBAT is not set
 # CONFIG_USB_STORAGE_SDDR09 is not set
 # CONFIG_USB_STORAGE_SDDR55 is not set
 # CONFIG_USB_STORAGE_JUMPSHOT is not set
 # CONFIG_USB_STORAGE_ALAUDA is not set
-CONFIG_USB_STORAGE_ONETOUCH=y
+CONFIG_USB_STORAGE_ONETOUCH=m
 # CONFIG_USB_STORAGE_KARMA is not set
 # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
 # CONFIG_USB_LIBUSUAL is not set
@@ -1665,7 +1722,7 @@ CONFIG_USB_EZUSB=y
 # CONFIG_USB_SERIAL_CH341 is not set
 # CONFIG_USB_SERIAL_WHITEHEAT is not set
 # CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
-# CONFIG_USB_SERIAL_CP2101 is not set
+# CONFIG_USB_SERIAL_CP210X is not set
 # CONFIG_USB_SERIAL_CYPRESS_M8 is not set
 # CONFIG_USB_SERIAL_EMPEG is not set
 # CONFIG_USB_SERIAL_FTDI_SIO is not set
@@ -1701,15 +1758,19 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
 # CONFIG_USB_SERIAL_NAVMAN is not set
 # CONFIG_USB_SERIAL_PL2303 is not set
 # CONFIG_USB_SERIAL_OTI6858 is not set
+# CONFIG_USB_SERIAL_QUALCOMM is not set
 # CONFIG_USB_SERIAL_SPCP8X5 is not set
 # CONFIG_USB_SERIAL_HP4X is not set
 # CONFIG_USB_SERIAL_SAFE is not set
+# CONFIG_USB_SERIAL_SIEMENS_MPI is not set
 # CONFIG_USB_SERIAL_SIERRAWIRELESS is not set
+# CONFIG_USB_SERIAL_SYMBOL is not set
 # CONFIG_USB_SERIAL_TI is not set
 # CONFIG_USB_SERIAL_CYBERJACK is not set
 # CONFIG_USB_SERIAL_XIRCOM is not set
 # CONFIG_USB_SERIAL_OPTION is not set
 # CONFIG_USB_SERIAL_OMNINET is not set
+# CONFIG_USB_SERIAL_OPTICON is not set
 # CONFIG_USB_SERIAL_DEBUG is not set
 
 #
@@ -1726,7 +1787,6 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 CONFIG_USB_APPLEDISPLAY=m
@@ -1738,6 +1798,11 @@ CONFIG_USB_APPLEDISPLAY=m
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1748,7 +1813,9 @@ CONFIG_LEDS_CLASS=y
 # LED drivers
 #
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1759,11 +1826,16 @@ CONFIG_LEDS_TRIGGER_IDE_DISK=y
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 # CONFIG_EDAC is not set
 # CONFIG_RTC_CLASS is not set
 # CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 
@@ -1774,6 +1846,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
@@ -1783,7 +1856,9 @@ CONFIG_EXT4_FS_XATTR=y
 # CONFIG_EXT4_FS_POSIX_ACL is not set
 # CONFIG_EXT4_FS_SECURITY is not set
 CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
 CONFIG_JBD2=y
+# CONFIG_JBD2_DEBUG is not set
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
@@ -1792,6 +1867,7 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_XFS_FS is not set
 # CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
 CONFIG_DNOTIFY=y
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
@@ -1801,6 +1877,11 @@ CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=m
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 CONFIG_ISO9660_FS=y
@@ -1831,10 +1912,7 @@ CONFIG_TMPFS=y
 # CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
 # CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ADFS_FS is not set
 # CONFIG_AFFS_FS is not set
 CONFIG_HFS_FS=m
@@ -1843,6 +1921,7 @@ CONFIG_HFSPLUS_FS=m
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
 # CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_OMFS_FS is not set
@@ -1851,6 +1930,7 @@ CONFIG_HFSPLUS_FS=m
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1868,7 +1948,6 @@ CONFIG_NFS_ACL_SUPPORT=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 CONFIG_SMB_FS=m
@@ -1940,11 +2019,13 @@ CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_KOI8_U is not set
 CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
 #
 CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
 CONFIG_CRC_CCITT=y
 CONFIG_CRC16=y
 CONFIG_CRC_T10DIF=y
@@ -1954,15 +2035,18 @@ CONFIG_CRC32=y
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_TEXTSEARCH=y
 CONFIG_TEXTSEARCH_KMP=m
 CONFIG_TEXTSEARCH_BM=m
 CONFIG_TEXTSEARCH_FSM=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
 CONFIG_HAVE_LMB=y
+CONFIG_NLATTR=y
 
 #
 # Kernel hacking
@@ -1973,13 +2057,16 @@ CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
+CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHED_DEBUG=y
 CONFIG_SCHEDSTATS=y
 # CONFIG_TIMER_STATS is not set
@@ -1994,6 +2081,7 @@ CONFIG_SCHEDSTATS=y
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
@@ -2001,6 +2089,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
@@ -2009,7 +2098,14 @@ CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_FAULT_INJECTION is not set
 CONFIG_LATENCYTOP=y
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2017,12 +2113,19 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 # CONFIG_FUNCTION_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
 # CONFIG_DEBUG_STACKOVERFLOW is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_CODE_PATCHING_SELFTEST is not set
@@ -2033,6 +2136,7 @@ CONFIG_XMON_DEFAULT=y
 CONFIG_XMON_DISASSEMBLY=y
 CONFIG_DEBUGGER=y
 CONFIG_IRQSTACKS=y
+# CONFIG_VIRQ_DEBUG is not set
 # CONFIG_BDI_SWITCH is not set
 CONFIG_BOOTX_TEXT=y
 # CONFIG_PPC_EARLY_DEBUG is not set
@@ -2051,13 +2155,20 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_FIPS is not set
 CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
 CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2127,6 +2238,7 @@ CONFIG_CRYPTO_TWOFISH_COMMON=m
 # Compression
 #
 CONFIG_CRYPTO_DEFLATE=m
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
-- 
cgit v0.10.2


From ea5cc87c63b49c133d15ec2911bb2e49e8124516 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 1 Jun 2009 22:31:03 -0300
Subject: perf_counter tools: Add string.[ch]

Add hex conversion libraries. We are going to replace sscanf()
uses with them.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/string.c b/Documentation/perf_counter/util/string.c
new file mode 100644
index 0000000..ec33c0c
--- /dev/null
+++ b/Documentation/perf_counter/util/string.c
@@ -0,0 +1,34 @@
+#include "string.h"
+
+static int hex(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2u64(const char *ptr, __u64 *long_val)
+{
+	const char *p = ptr;
+	*long_val = 0;
+
+	while (*p) {
+		const int hex_val = hex(*p);
+
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		p++;
+	}
+
+	return p - ptr;
+}
diff --git a/Documentation/perf_counter/util/string.h b/Documentation/perf_counter/util/string.h
new file mode 100644
index 0000000..72812c1
--- /dev/null
+++ b/Documentation/perf_counter/util/string.h
@@ -0,0 +1,8 @@
+#ifndef _PERF_STRING_H_
+#define _PERF_STRING_H_
+
+#include <linux/types.h>
+
+int hex2u64(const char *ptr, __u64 *val);
+
+#endif
-- 
cgit v0.10.2


From a0055ae2a4e13db9534c438cf8f3896181da6afc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 1 Jun 2009 17:50:19 -0300
Subject: perf_counter tools: Use hex2u64 in more places

This has also a nice side effect, tools built on newer systems such as
fedora 10 again work on systems with older versions of glibc:

My workstation:

[acme@doppio ~]$ rpm -q glibc.x86_64
glibc-2.9-3.x86_64

Test machine:

[acme@emilia ~]$ rpm -q glibc.x86_64
glibc-2.5-24

Before:

[acme@emilia ~]$ perf
perf: /lib64/libc.so.6: version `GLIBC_2.7' not found (required by perf)
[acme@emilia ~]$ nm `which perf` | grep GLIBC_2\.7
                 U __isoc99_sscanf@@GLIBC_2.7
[acme@emilia ~]$

After:
[acme@emilia ~]$ perf
usage: perf [--version] [--help] COMMAND [ARGS]

The most commonly used perf commands are:
   record   Run a command and record its profile into perf.data
   report   Read perf.data (created by perf record) and display the
profile
   stat     Run a command and gather performance counter statistics
   top      Run a command and profile it

See 'perf help COMMAND' for more information on a specific command.
[acme@emilia ~]$ nm `which perf` | grep GLIBC_2\.7
[acme@emilia ~]$

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090601205019.GA7805@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 416ab11..3b8275f 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -296,6 +296,7 @@ LIB_H += util/quote.h
 LIB_H += util/util.h
 LIB_H += util/help.h
 LIB_H += util/strbuf.h
+LIB_H += util/string.h
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
 LIB_H += util/symbol.h
@@ -315,6 +316,7 @@ LIB_OBJS += util/rbtree.o
 LIB_OBJS += util/run-command.o
 LIB_OBJS += util/quote.o
 LIB_OBJS += util/strbuf.o
+LIB_OBJS += util/string.o
 LIB_OBJS += util/usage.o
 LIB_OBJS += util/wrapper.o
 LIB_OBJS += util/sigchain.o
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 96bfb7c..9c151de 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -5,6 +5,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/string.h"
 
 #include <sched.h>
 
@@ -165,12 +166,10 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 {
 	struct comm_event comm_ev;
 	char filename[PATH_MAX];
-	pid_t spid, ppid;
 	char bf[BUFSIZ];
-	int fd, nr, ret;
-	char comm[18];
+	int fd, ret;
 	size_t size;
-	char state;
+	char *field, *sep;
 
 	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
 
@@ -185,20 +184,22 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 	}
 	close(fd);
 
+	/* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
 	memset(&comm_ev, 0, sizeof(comm_ev));
-        nr = sscanf(bf, "%d %s %c %d %d ",
-			&spid, comm, &state, &ppid, &comm_ev.pid);
-	if (nr != 5) {
-		fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
-			filename);
-		exit(EXIT_FAILURE);
-	}
+	field = strchr(bf, '(');
+	if (field == NULL)
+		goto out_failure;
+	sep = strchr(++field, ')');
+	if (sep == NULL)
+		goto out_failure;
+	size = sep - field;
+	memcpy(comm_ev.comm, field, size++);
+	field = strchr(sep + 4, ' ');
+	if (field == NULL)
+		goto out_failure;
+	comm_ev.pid = atoi(++field);
 	comm_ev.header.type = PERF_EVENT_COMM;
 	comm_ev.tid = pid;
-	size = strlen(comm);
-	comm[--size] = '\0'; /* Remove the ')' at the end */
-	--size; /* Remove the '(' at the begin */
-	memcpy(comm_ev.comm, comm + 1, size);
 	size = ALIGN(size, sizeof(uint64_t));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
@@ -208,6 +209,11 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 		exit(-1);
 	}
 	return comm_ev.pid;
+out_failure:
+	fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
+		filename);
+	exit(EXIT_FAILURE);
+	return -1;
 }
 
 static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
@@ -223,23 +229,25 @@ static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
 		exit(EXIT_FAILURE);
 	}
 	while (1) {
-		char bf[BUFSIZ];
-		unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
+		char bf[BUFSIZ], *pbf = bf;
 		struct mmap_event mmap_ev = {
 			.header.type = PERF_EVENT_MMAP,
 		};
-		unsigned long ino;
-		int major, minor;
+		int n;
 		size_t size;
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
-			&mmap_ev.start, &mmap_ev.len,
-                        &vm_read, &vm_write, &vm_exec, &vm_mayshare,
-                        &mmap_ev.pgoff, &major, &minor, &ino);
-		if (vm_exec == 'x') {
+		n = hex2u64(pbf, &mmap_ev.start);
+		if (n < 0)
+			continue;
+		pbf += n + 1;
+		n = hex2u64(pbf, &mmap_ev.len);
+		if (n < 0)
+			continue;
+		pbf += n + 3;
+		if (*pbf == 'x') { /* vm_exec */
 			char *execname = strrchr(bf, ' ');
 
 			if (execname == NULL || execname[1] != '/')
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 4705679..7973092 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -5,6 +5,7 @@
 #include "util/cache.h"
 #include "util/rbtree.h"
 #include "util/symbol.h"
+#include "util/string.h"
 
 #include "perf.h"
 
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index 88c903e..2fdfd1d 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -4,6 +4,7 @@
 #include "parse-options.h"
 #include "parse-events.h"
 #include "exec_cmd.h"
+#include "string.h"
 
 int nr_counters;
 
@@ -105,22 +106,26 @@ static __u64 match_event_symbols(const char *str)
 	__u64 config, id;
 	int type;
 	unsigned int i;
-	char mask_str[4];
+	const char *sep, *pstr;
 
-	if (sscanf(str, "r%llx", &config) == 1)
+	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0)
 		return config | PERF_COUNTER_RAW_MASK;
 
-	switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) {
-		case 3:
-			if (strchr(mask_str, 'k'))
+	pstr = str;
+	sep = strchr(pstr, ':');
+	if (sep) {
+		type = atoi(pstr);
+		pstr = sep + 1;
+		id = atoi(pstr);
+		sep = strchr(pstr, ':');
+		if (sep) {
+			pstr = sep + 1;
+			if (strchr(pstr, 'k'))
 				event_mask[nr_counters] |= EVENT_MASK_USER;
-			if (strchr(mask_str, 'u'))
+			if (strchr(pstr, 'u'))
 				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
-		case 2:
-			return EID(type, id);
-
-		default:
-			break;
+		}
+		return EID(type, id);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 4728121..31e8fae 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -1,5 +1,6 @@
 #include "util.h"
 #include "../perf.h"
+#include "string.h"
 #include "symbol.h"
 
 #include <libelf.h>
@@ -122,39 +123,6 @@ size_t dso__fprintf(struct dso *self, FILE *fp)
 	return ret;
 }
 
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-static int hex2long(char *ptr, unsigned long *long_val)
-{
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
-
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
-
-	return p - ptr;
-}
-
 static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter)
 {
 	struct rb_node *nd, *prevnd;
@@ -166,7 +134,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter)
 		goto out_failure;
 
 	while (!feof(file)) {
-		unsigned long start;
+		__u64 start;
 		struct symbol *sym;
 		int line_len, len;
 		char symbol_type;
@@ -180,7 +148,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter)
 
 		line[--line_len] = '\0'; /* \n */
 
-		len = hex2long(line, &start);
+		len = hex2u64(line, &start);
 
 		len++;
 		if (len + 2 >= line_len)
-- 
cgit v0.10.2


From c8c96525f3c25f43a4fa230e293c4976c0c36cc1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 1 Jun 2009 17:50:57 -0300
Subject: perf_counter tools: Add missing rb_erase in dso__delete_symbols

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090601205057.GB7805@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 31e8fae..039931f 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -58,6 +58,7 @@ static void dso__delete_symbols(struct dso *self)
 	while (next) {
 		pos = rb_entry(next, struct symbol, rb_node);
 		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, &self->syms);
 		symbol__delete(pos, self->sym_priv_size);
 	}
 }
-- 
cgit v0.10.2


From 4778541470cf7d074acd998fd40c06b94711e4ad Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Fri, 29 May 2009 07:41:26 +0000
Subject: sh: fix CONFIG_SH_PCLK_FREQ bug for sh7724

CONFIG_SH_PCLK_FREQ=33333333 is correct for sh7724.
sh7724 master clock is 33333333, but peripheral is 41666666.
This bug came to light because sh-sci driver had changed clk
from "module_clk" to "peripheral_clk"

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e9392aa..dec8b7b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -502,8 +502,7 @@ config SH_PCLK_FREQ
 			      CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7705 || \
 			      CPU_SUBTYPE_SH7203 || CPU_SUBTYPE_SH7206 || \
 			      CPU_SUBTYPE_SH7263 || CPU_SUBTYPE_MXG    || \
-			      CPU_SUBTYPE_SH7786
-	default "41666666" if CPU_SUBTYPE_SH7724
+			      CPU_SUBTYPE_SH7786 || CPU_SUBTYPE_SH7724
 	default "60000000" if CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R
 	default "66000000" if CPU_SUBTYPE_SH4_202
 	default "50000000"
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index a029d0fb..96d2587 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -243,7 +243,7 @@ CONFIG_SH_7724_SOLUTION_ENGINE=y
 #
 CONFIG_SH_TIMER_TMU=y
 # CONFIG_SH_TIMER_CMT is not set
-CONFIG_SH_PCLK_FREQ=41666666
+CONFIG_SH_PCLK_FREQ=33333333
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-- 
cgit v0.10.2


From dd0a3e77c825c9f5c6d2a97deb047f8d52026581 Mon Sep 17 00:00:00 2001
From: SUGIOKA Toshinobu <sugioka@itonet.co.jp>
Date: Mon, 1 Jun 2009 03:53:41 +0000
Subject: serial: sh-sci: Fix up PORT_SCI console output ordering.

Fix SCI transmission sequence in console output function.

This reorders the write sequence to match the SH-3 manual, and corrects
a console corruption bug observed on SH-3 SCI.

Signed-off-by: Toshinobu Sugioka <sugioka@itonet.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index fa4d52a..a4cf107 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -151,9 +151,8 @@ static void sci_poll_put_char(struct uart_port *port, unsigned char c)
 		status = sci_in(port, SCxSR);
 	} while (!(status & SCxSR_TDxE(port)));
 
-	sci_in(port, SCxSR);            /* Dummy read */
-	sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port) & ~SCxSR_TEND(port));
 	sci_out(port, SCxTDR, c);
+	sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port) & ~SCxSR_TEND(port));
 }
 #endif /* CONFIG_CONSOLE_POLL || CONFIG_SERIAL_SH_SCI_CONSOLE */
 
-- 
cgit v0.10.2


From d974ac24b762296eeaebb23e5eff8ed15ce44529 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Tue, 2 Jun 2009 02:49:13 +0000
Subject: sh: add RAMCR definition for sh4

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/cpu-sh4/cpu/cache.h b/arch/sh/include/cpu-sh4/cpu/cache.h
index 1c61ebf..7bfb9e8 100644
--- a/arch/sh/include/cpu-sh4/cpu/cache.h
+++ b/arch/sh/include/cpu-sh4/cpu/cache.h
@@ -38,5 +38,7 @@
 #define CACHE_IC_ADDRESS_ARRAY	0xf0000000
 #define CACHE_OC_ADDRESS_ARRAY	0xf4000000
 
+#define RAMCR			0xFF000074
+
 #endif /* __ASM_CPU_SH4_CACHE_H */
 
-- 
cgit v0.10.2


From fab88d9fe98e9091aafeb9789fbf2e04fdace8ed Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Tue, 2 Jun 2009 02:49:20 +0000
Subject: sh: add weak l2_cache_init function.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index d29e69c..ad85421 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -62,6 +62,11 @@ static void __init speculative_execution_init(void)
 #define speculative_execution_init()	do { } while (0)
 #endif
 
+/* 2nd-level cache init */
+void __uses_jump_to_uncached __attribute__ ((weak)) l2_cache_init(void)
+{
+}
+
 /*
  * Generic first-level cache init
  */
@@ -146,6 +151,8 @@ static void __uses_jump_to_uncached cache_init(void)
 	flags &= ~CCR_CACHE_ENABLE;
 #endif
 
+	l2_cache_init();
+
 	ctrl_outl(flags, CCR);
 	back_to_cached();
 }
-- 
cgit v0.10.2


From b4bd9eb0d8f5c20f942ff037c8a7939f69cb188a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Tue, 2 Jun 2009 02:49:25 +0000
Subject: sh: sh7724: L2 cache initialization.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 191f0e2..000f3b8 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -495,6 +495,15 @@ void __init plat_early_device_setup(void)
 				   ARRAY_SIZE(sh7724_early_devices));
 }
 
+#define RAMCR_CACHE_L2FC	0x0002
+#define RAMCR_CACHE_L2E		0x0001
+#define L2_CACHE_ENABLE		(RAMCR_CACHE_L2E|RAMCR_CACHE_L2FC)
+void __uses_jump_to_uncached l2_cache_init(void)
+{
+	/* Enable L2 cache */
+	ctrl_outl(L2_CACHE_ENABLE, RAMCR);
+}
+
 enum {
 	UNUSED = 0,
 
-- 
cgit v0.10.2


From 138f025267dcc07d5e7d0bb1f20e9a6b5f2fdcf7 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Tue, 2 Jun 2009 02:49:28 +0000
Subject: sh: sh7723: L2 cache initialization.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index 04cb4aa..d8f4a13 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -14,6 +14,7 @@
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
 #include <linux/sh_timer.h>
+#include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -485,6 +486,15 @@ void __init plat_early_device_setup(void)
 				   ARRAY_SIZE(sh7723_early_devices));
 }
 
+#define RAMCR_CACHE_L2FC	0x0002
+#define RAMCR_CACHE_L2E		0x0001
+#define L2_CACHE_ENABLE		(RAMCR_CACHE_L2E|RAMCR_CACHE_L2FC)
+void __uses_jump_to_uncached l2_cache_init(void)
+{
+	/* Enable L2 cache */
+	ctrl_outl(L2_CACHE_ENABLE, RAMCR);
+}
+
 enum {
 	UNUSED=0,
 
-- 
cgit v0.10.2


From 2af15d6a44b871ad4c2a651302374cde8f335480 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 28 May 2009 13:37:24 -0400
Subject: ftrace: add kernel command line function filtering

When using ftrace=function on the command line to trace functions
on boot up, one can not filter out functions that are commonly called.

This patch adds two new ftrace command line commands.

  ftrace_notrace=function-list
  ftrace_filter=function-list

Where function-list is a comma separated list of functions to filter.
The ftrace_notrace will make the functions listed not be included
in the function tracing, and ftrace_filter will only trace the functions
listed.

These two act the same as the debugfs/tracing/set_ftrace_notrace and
debugfs/tracing/set_ftrace_filter respectively.

The simple glob expressions that are allowed by the filter files can also
be used by the command line interface.

	ftrace_notrace=rcu*,*lock,*spin*

Will not trace any function that starts with rcu, ends with lock, or has
the word spin in it.

Note, if the self tests are enabled, they may interfere with the filtering
set by the command lines.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9243dd8..fcd3bfb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -751,12 +751,25 @@ and is between 256 and 4096 characters. It is defined in the file
 			ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
 
 	ftrace=[tracer]
-			[ftrace] will set and start the specified tracer
+			[FTRACE] will set and start the specified tracer
 			as early as possible in order to facilitate early
 			boot debugging.
 
 	ftrace_dump_on_oops
-			[ftrace] will dump the trace buffers on oops.
+			[FTRACE] will dump the trace buffers on oops.
+
+	ftrace_filter=[function-list]
+			[FTRACE] Limit the functions traced by the function
+			tracer at boot up. function-list is a comma separated
+			list of functions. This list can be changed at run
+			time by the set_ftrace_filter file in the debugfs
+			tracing directory. 
+
+	ftrace_notrace=[function-list]
+			[FTRACE] Do not trace the functions specified in
+			function-list. This list can be changed at run time
+			by the set_ftrace_notrace file in the debugfs
+			tracing directory.
 
 	gamecon.map[2|3]=
 			[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 140699a..2074e5b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -32,6 +32,7 @@
 #include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
+#include <asm/setup.h>
 
 #include "trace_output.h"
 #include "trace_stat.h"
@@ -2369,6 +2370,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
 	ftrace_set_regex(buf, len, reset, 0);
 }
 
+/*
+ * command line interface to allow users to set filters on boot up.
+ */
+#define FTRACE_FILTER_SIZE		COMMAND_LINE_SIZE
+static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
+static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+
+static int __init set_ftrace_notrace(char *str)
+{
+	strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
+	return 1;
+}
+__setup("ftrace_notrace=", set_ftrace_notrace);
+
+static int __init set_ftrace_filter(char *str)
+{
+	strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
+	return 1;
+}
+__setup("ftrace_filter=", set_ftrace_filter);
+
+static void __init set_ftrace_early_filter(char *buf, int enable)
+{
+	char *func;
+
+	while (buf) {
+		func = strsep(&buf, ",");
+		ftrace_set_regex(func, strlen(func), 0, enable);
+	}
+}
+
+static void __init set_ftrace_early_filters(void)
+{
+	if (ftrace_filter_buf[0])
+		set_ftrace_early_filter(ftrace_filter_buf, 1);
+	if (ftrace_notrace_buf[0])
+		set_ftrace_early_filter(ftrace_notrace_buf, 0);
+}
+
 static int
 ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 {
@@ -2829,6 +2869,8 @@ void __init ftrace_init(void)
 	if (ret)
 		pr_warning("Failed to register trace ftrace module notifier\n");
 
+	set_ftrace_early_filters();
+
 	return;
  failed:
 	ftrace_disabled = 1;
-- 
cgit v0.10.2


From 5e0a093910876882f91f1d4b8a1635a099e6c7ba Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 28 May 2009 15:50:13 -0400
Subject: tracing: fix config options to not show when automatically selected

There are two options that are selected by all tracers, but we want
to have those options available when no tracer is selected. These are

 The event tracer and sched switch tracer.

The are enabled by all tracers, but if a tracer is not selected we want
the options to appear. All tracers including them select TRACING.
Thus what we would like to do is:

  config EVENT_TRACER
	bool "prompt"
	depends on TRACING
	select TRACING

But that gives us a bug in the kbuild system since we just created a
circular dependency. We only want the prompt to show when TRACING is off.

This patch adds GENERIC_TRACER that all tracers will select instead of
TRACING. The two options (sched switch and event tracer) will select
TRACING directly and depend on !GENERIC_TRACER. This solves the cicular
dependency.

[ Impact: hide options that are selected by default ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a508b9d..6e55cc3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -56,6 +56,13 @@ config CONTEXT_SWITCH_TRACER
 	select MARKERS
 	bool
 
+# All tracer options should select GENERIC_TRACER. For those options that are
+# enabled by all tracers (context switch and event tracer) they select TRACING.
+# This allows those options to appear when no other tracer is selected. But the
+# options do not appear when something else selects it. We need the two options
+# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
+# hidding of the automatic options options.
+
 config TRACING
 	bool
 	select DEBUG_FS
@@ -66,6 +73,10 @@ config TRACING
 	select BINARY_PRINTF
 	select EVENT_TRACING
 
+config GENERIC_TRACER
+	bool
+	select TRACING
+
 #
 # Minimum requirements an architecture has to meet for us to
 # be able to offer generic tracing facilities:
@@ -95,7 +106,7 @@ config FUNCTION_TRACER
 	depends on HAVE_FUNCTION_TRACER
 	select FRAME_POINTER
 	select KALLSYMS
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  Enable the kernel to trace every kernel function. This is done
@@ -126,7 +137,7 @@ config IRQSOFF_TRACER
 	depends on TRACE_IRQFLAGS_SUPPORT
 	depends on GENERIC_TIME
 	select TRACE_IRQFLAGS
-	select TRACING
+	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This option measures the time spent in irqs-off critical
@@ -147,7 +158,7 @@ config PREEMPT_TRACER
 	default n
 	depends on GENERIC_TIME
 	depends on PREEMPT
-	select TRACING
+	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
 	help
 	  This option measures the time spent in preemption off critical
@@ -166,7 +177,7 @@ config PREEMPT_TRACER
 config SYSPROF_TRACER
 	bool "Sysprof Tracer"
 	depends on X86
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer provides the trace needed by the 'Sysprof' userspace
@@ -174,7 +185,7 @@ config SYSPROF_TRACER
 
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	select TRACER_MAX_TRACE
 	help
@@ -183,6 +194,7 @@ config SCHED_TRACER
 
 config ENABLE_CONTEXT_SWITCH_TRACER
 	bool "Trace process context switches"
+	depends on !GENERIC_TRACER
 	select TRACING
 	select CONTEXT_SWITCH_TRACER
 	help
@@ -191,6 +203,7 @@ config ENABLE_CONTEXT_SWITCH_TRACER
 
 config ENABLE_EVENT_TRACING
 	bool "Trace various events in the kernel"
+	depends on !GENERIC_TRACER
 	select TRACING
 	help
 	  This tracer hooks to various trace points in the kernel
@@ -204,14 +217,14 @@ config ENABLE_EVENT_TRACING
 config FTRACE_SYSCALLS
 	bool "Trace syscalls"
 	depends on HAVE_FTRACE_SYSCALLS
-	select TRACING
+	select GENERIC_TRACER
 	select KALLSYMS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
 config BOOT_TRACER
 	bool "Trace boot initcalls"
-	select TRACING
+	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	help
 	  This tracer helps developers to optimize boot times: it records
@@ -228,7 +241,7 @@ config BOOT_TRACER
 
 config TRACE_BRANCH_PROFILING
 	bool
-	select TRACING
+	select GENERIC_TRACER
 
 choice
 	prompt "Branch Profiling"
@@ -308,7 +321,7 @@ config BRANCH_TRACER
 config POWER_TRACER
 	bool "Trace power consumption behavior"
 	depends on X86
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  This tracer helps developers to analyze and optimize the kernels
 	  power management decisions, specifically the C-state and P-state
@@ -342,14 +355,14 @@ config STACK_TRACER
 config HW_BRANCH_TRACER
 	depends on HAVE_HW_BRANCH_TRACER
 	bool "Trace hw branches"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  This tracer records all branches on the system in a circular
 	  buffer giving access to the last N branches for each cpu.
 
 config KMEMTRACE
 	bool "Trace SLAB allocations"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  kmemtrace provides tracing for slab allocator functions, such as
 	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -369,7 +382,7 @@ config KMEMTRACE
 
 config WORKQUEUE_TRACER
 	bool "Trace workqueues"
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  The workqueue tracer provides some statistical informations
           about each cpu workqueue thread such as the number of the
@@ -385,7 +398,7 @@ config BLK_DEV_IO_TRACE
 	select RELAY
 	select DEBUG_FS
 	select TRACEPOINTS
-	select TRACING
+	select GENERIC_TRACER
 	select STACKTRACE
 	help
 	  Say Y here if you want to be able to trace the block layer actions
@@ -446,7 +459,7 @@ config FTRACE_SELFTEST
 
 config FTRACE_STARTUP_TEST
 	bool "Perform a startup test on ftrace"
-	depends on TRACING
+	depends on GENERIC_TRACER
 	select FTRACE_SELFTEST
 	help
 	  This option performs a series of startup tests on ftrace. On bootup
@@ -457,7 +470,7 @@ config FTRACE_STARTUP_TEST
 config MMIOTRACE
 	bool "Memory mapped IO tracing"
 	depends on HAVE_MMIOTRACE_SUPPORT && PCI
-	select TRACING
+	select GENERIC_TRACER
 	help
 	  Mmiotrace traces Memory Mapped I/O access and is meant for
 	  debugging and reverse engineering. It is called from the ioremap
-- 
cgit v0.10.2


From 897f17a65389a26509bd0c79a9812d1c9ea8ea6f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 28 May 2009 16:31:21 -0400
Subject: tracing: combine the default tracers into one config

Both event tracer and sched switch plugin are selected by default
by all generic tracers. But if no generic tracer is enabled, their options
appear. But ether one of them will select the other, thus it only
makes sense to have the default tracers be selected by one option.

[ Impact: clean up kconfig menu ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6e55cc3..4a13e5a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -192,27 +192,14 @@ config SCHED_TRACER
 	  This tracer tracks the latency of the highest priority task
 	  to be scheduled in, starting from the point it has woken up.
 
-config ENABLE_CONTEXT_SWITCH_TRACER
-	bool "Trace process context switches"
-	depends on !GENERIC_TRACER
-	select TRACING
-	select CONTEXT_SWITCH_TRACER
-	help
-	  This tracer gets called from the context switch and records
-	  all switching of tasks.
-
-config ENABLE_EVENT_TRACING
-	bool "Trace various events in the kernel"
+config ENABLE_DEFAULT_TRACERS
+	bool "Trace process context switches and events"
 	depends on !GENERIC_TRACER
 	select TRACING
 	help
 	  This tracer hooks to various trace points in the kernel
 	  allowing the user to pick and choose which trace point they
-	  want to trace.
-
-	  Note, all tracers enable event tracing. This option is
-	  only a convenience to enable event tracing when no other
-	  tracers are selected.
+	  want to trace. It also includes the sched_switch tracer plugin.
 
 config FTRACE_SYSCALLS
 	bool "Trace syscalls"
-- 
cgit v0.10.2


From 6e25db44a7ad7eb380f4ec774ec00a8fcddea112 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 29 May 2009 11:24:59 +0800
Subject: tracing/events: fix a typo in __string() format output

"tsize" should be "\tsize". Also remove the space before "__str_loc".

Before:
 # cat tracing/events/irq/irq_handler_entry/format
        ...
        field:int irq;  offset:12;      size:4;
        field: __str_loc name;  offset:16;tsize:2;
        ...

After:
 # cat tracing/events/irq/irq_handler_entry/format
	...
        field:int irq;  offset:12;      size:4;
        field:__str_loc name;   offset:16;      size:2;
	...

[ Impact: standardize __string field description in events format file ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b4ec83a..9276ec4 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -209,8 +209,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 
 #undef __string
 #define __string(item, src)						       \
-	ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t"	       \
-			       "offset:%u;tsize:%u;\n",			       \
+	ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t"	       \
+			       "offset:%u;\tsize:%u;\n",		       \
 			       (unsigned int)offsetof(typeof(field),	       \
 					__str_loc_##item),		       \
 			       (unsigned int)sizeof(field.__str_loc_##item));  \
-- 
cgit v0.10.2


From a9c1c3abe1160a5632e48c929b02b740556bf423 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 1 Jun 2009 15:35:13 +0800
Subject: tracing/events: put TP_fast_assign into braces

Currently TP_fast_assign has a limitation that we can't define local
variables in it.

Here's one use case when we introduce __dynamic_array():

TP_fast_assign(
	type *p = __get_dynamic_array(item);

	foo(p);
	bar(p);
),

[ Impact: allow defining local variables in TP_fast_assign ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2384B1.90100@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 9276ec4..ee92682 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -471,7 +471,7 @@ static void ftrace_raw_event_##call(proto)				\
 		return;							\
 	entry	= ring_buffer_event_data(event);			\
 									\
-	assign;								\
+	{ assign; }							\
 									\
 	if (!filter_current_check_discard(event_call, entry, event))	\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
-- 
cgit v0.10.2


From 7fcb7c472f455d1711eb5a7633204dba8800a6d6 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 1 Jun 2009 15:35:46 +0800
Subject: tracing/events: introduce __dynamic_array()

__string() is limited:

  - it's a char array, but we may want to define array with other types
  - a source string should be available, but we may just know the string size

We introduce __dynamic_array() to break those limitations, and __string()
becomes a wrapper of it. As a side effect, now __get_str() can be used
in TP_fast_assign but not only TP_print.

Take XFS for example, we have the string length in the dirent, but the
string itself is not NULL-terminated, so __dynamic_array() can be used:

TRACE_EVENT(xfs_dir2,
	TP_PROTO(struct xfs_da_args *args),
	TP_ARGS(args),

	TP_STRUCT__entry(
		__field(int, namelen)
		__dynamic_array(char, name, args->namelen + 1)
		...
	),

	TP_fast_assign(
		char *name = __get_str(name);

		if (args->namelen)
			memcpy(name, args->name, args->namelen);
		name[args->namelen] = '\0';

		__entry->namelen = args->namelen;
	),

	TP_printk("name %.*s namelen %d",
		  __entry->namelen ? __get_str(name) : NULL
		  __entry->namelen)
);

[ Impact: allow defining dynamic size arrays ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2384D2.3080403@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index ee92682..b5478da 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,14 +18,17 @@
 
 #include <linux/ftrace_event.h>
 
+#undef __field
+#define __field(type, item)		type	item;
+
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
-#undef __field
-#define __field(type, item)		type	item;
+#undef __dynamic_array
+#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
 
 #undef __string
-#define __string(item, src)		unsigned short	__str_loc_##item;
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
@@ -35,7 +38,7 @@
 	struct ftrace_raw_##name {				\
 		struct trace_entry	ent;			\
 		tstruct						\
-		char			__str_data[0];		\
+		char			__data[0];		\
 	};							\
 	static struct ftrace_event_call event_##name
 
@@ -47,30 +50,31 @@
  *
  * Include the following:
  *
- * struct ftrace_str_offsets_<call> {
- *	int				<str1>;
- *	int				<str2>;
+ * struct ftrace_data_offsets_<call> {
+ *	int				<item1>;
+ *	int				<item2>;
  *	[...]
  * };
  *
- * The __string() macro will create each int <str>, this is to
- * keep the offset of each string from the beggining of the event
- * once we perform the strlen() of the src strings.
- *
+ * The __dynamic_array() macro will create each int <item>, this is
+ * to keep the offset of each array from the beginning of the event.
  */
 
+#undef __field
+#define __field(type, item);
+
 #undef __array
 #define __array(type, item, len)
 
-#undef __field
-#define __field(type, item);
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)	int item;
 
 #undef __string
-#define __string(item, src)	int item;
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-	struct ftrace_str_offsets_##call {				\
+	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
 
@@ -119,8 +123,12 @@
 #undef TP_printk
 #define TP_printk(fmt, args...) fmt "\n", args
 
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + __entry->__data_loc_##field)
+
 #undef __get_str
-#define __get_str(field)	((char *)__entry + __entry->__str_loc_##field)
+#define __get_str(field) (char *)__get_dynamic_array(field)
 
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
@@ -207,16 +215,19 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	if (!ret)							\
 		return 0;
 
-#undef __string
-#define __string(item, src)						       \
-	ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t"	       \
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
 			       "offset:%u;\tsize:%u;\n",		       \
 			       (unsigned int)offsetof(typeof(field),	       \
-					__str_loc_##item),		       \
-			       (unsigned int)sizeof(field.__str_loc_##item));  \
+					__data_loc_##item),		       \
+			       (unsigned int)sizeof(field.__data_loc_##item)); \
 	if (!ret)							       \
 		return 0;
 
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
 #undef __entry
 #define __entry REC
 
@@ -260,11 +271,14 @@ ftrace_format_##call(struct trace_seq *s)				\
 	if (ret)							\
 		return ret;
 
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
+				offsetof(typeof(field), __data_loc_##item),    \
+				 sizeof(field.__data_loc_##item), 0);
+
 #undef __string
-#define __string(item, src)						       \
-	ret = trace_define_field(event_call, "__str_loc", #item,	       \
-				offsetof(typeof(field), __str_loc_##item),     \
-				 sizeof(field.__str_loc_##item), 0);
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
@@ -289,6 +303,43 @@ ftrace_define_fields_##call(void)					\
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
+ * remember the offset of each array from the beginning of the event.
+ */
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__data_offsets->item = __data_size +				\
+			       offsetof(typeof(*entry), __data);	\
+	__data_size += (len) * sizeof(type);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+static inline int ftrace_get_offsets_##call(				\
+	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
+{									\
+	int __data_size = 0;						\
+	struct ftrace_raw_##call __maybe_unused *entry;			\
+									\
+	tstruct;							\
+									\
+	return __data_size;						\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
  * Stage 4 of the trace events.
  *
  * Override the macros in <trace/trace_events.h> to include the following:
@@ -432,15 +483,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 #undef __array
 #define __array(type, item, len)
 
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__entry->__data_loc_##item = __data_offsets.item;
+
 #undef __string
-#define __string(item, src)						\
-	__str_offsets.item = __str_size +				\
-			     offsetof(typeof(*entry), __str_data);	\
-	__str_size += strlen(src) + 1;
+#define __string(item, src) __dynamic_array(char, item, -1)       	\
 
 #undef __assign_str
 #define __assign_str(dst, src)						\
-	__entry->__str_loc_##dst = __str_offsets.dst;			\
 	strcpy(__get_str(dst), src);
 
 #undef TRACE_EVENT
@@ -451,26 +502,29 @@ static struct ftrace_event_call event_##call;				\
 									\
 static void ftrace_raw_event_##call(proto)				\
 {									\
-	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;	\
+	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	unsigned long irq_flags;					\
-	int __str_size = 0;						\
+	int __data_size;						\
 	int pc;								\
 									\
 	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
-	tstruct;							\
+	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 									\
 	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				 sizeof(struct ftrace_raw_##call) + __str_size,\
+				 sizeof(*entry) + __data_size,		\
 				 irq_flags, pc);			\
 	if (!event)							\
 		return;							\
 	entry	= ring_buffer_event_data(event);			\
 									\
+									\
+	tstruct								\
+									\
 	{ assign; }							\
 									\
 	if (!filter_current_check_discard(event_call, entry, event))	\
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a7430b1..db6e54b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -478,12 +478,12 @@ enum {
 
 static int is_string_field(const char *type)
 {
+	if (strstr(type, "__data_loc") && strstr(type, "char"))
+		return FILTER_DYN_STRING;
+
 	if (strchr(type, '[') && strstr(type, "char"))
 		return FILTER_STATIC_STRING;
 
-	if (!strcmp(type, "__str_loc"))
-		return FILTER_DYN_STRING;
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From ec081ddc3d90aab35bc0de19a358b964978837cf Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 1 Jun 2009 15:53:35 +0100
Subject: tracing: add exports to use __print_symbolic and __print_flags from a
 module

A patch to allow the use of __print_symbolic and __print_flags
from a module. This allows the current GFS2 tracing patch to
build.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
LKML-Reference: <1243868015.29604.542.camel@localhost.localdomain>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index c12d95d..0fe3b22 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -17,6 +17,7 @@
 static DECLARE_RWSEM(trace_event_mutex);
 
 DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
 
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
@@ -250,6 +251,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 
 	return p->buffer;
 }
+EXPORT_SYMBOL(ftrace_print_flags_seq);
 
 const char *
 ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
@@ -275,6 +277,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 
 	return p->buffer;
 }
+EXPORT_SYMBOL(ftrace_print_symbols_seq);
 
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
-- 
cgit v0.10.2


From 1d080d6c3141623c92caaebe20e847cb99ccbb60 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 1 Jun 2009 12:20:40 -0400
Subject: tracing: remove redundant SOFTIRQ from softirq event traces

After converting the softirq tracer to use te flags options, this
caused a regression with the name. Since the flag was used directly
it was printed out (i.e. HRTIMER_SOFTIRQ).

This patch only shows the softirq name without the SOFTIRQ part.

[ Impact: fix regression of output from softirq events ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 683fb36..b0c7ede 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -7,18 +7,18 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM irq
 
-#define softirq_name(sirq) { sirq, #sirq }
-#define show_softirq_name(val)						\
-	__print_symbolic(val,						\
-			 softirq_name(HI_SOFTIRQ),			\
-			 softirq_name(TIMER_SOFTIRQ),			\
-			 softirq_name(NET_TX_SOFTIRQ),			\
-			 softirq_name(NET_RX_SOFTIRQ),			\
-			 softirq_name(BLOCK_SOFTIRQ),			\
-			 softirq_name(TASKLET_SOFTIRQ),			\
-			 softirq_name(SCHED_SOFTIRQ),			\
-			 softirq_name(HRTIMER_SOFTIRQ),			\
-			 softirq_name(RCU_SOFTIRQ))
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)			\
+	__print_symbolic(val,			\
+			 softirq_name(HI),	\
+			 softirq_name(TIMER),	\
+			 softirq_name(NET_TX),	\
+			 softirq_name(NET_RX),	\
+			 softirq_name(BLOCK),	\
+			 softirq_name(TASKLET),	\
+			 softirq_name(SCHED),	\
+			 softirq_name(HRTIMER),	\
+			 softirq_name(RCU))
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
-- 
cgit v0.10.2


From 112f38a7e36e9d688b389507136bf3af3e6d159b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 1 Jun 2009 15:16:05 -0400
Subject: tracing: make trace pipe recognize latency format flag

The trace_pipe did not recognize the latency format flag and would produce
different output than the trace file. The problem was partly due that
the trace flags in the iterator was not set as well as the trace_pipe
zeros out part of the iterator (including the flags) to be able to use
the same routines as the trace file. trace_flags of the iterator should
not cause any problems when not zeroed out by for trace_pipe.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bbf40f6..5c093ff 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -51,6 +51,7 @@ struct trace_iterator {
 	int			cpu_file;
 	struct mutex		mutex;
 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+	unsigned long		iter_flags;
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
@@ -58,7 +59,6 @@ struct trace_iterator {
 	int			cpu;
 	u64			ts;
 
-	unsigned long		iter_flags;
 	loff_t			pos;
 	long			idx;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a3a8a87..cae34c6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2826,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	/* trace pipe does not show start of buffer */
 	cpumask_setall(iter->started);
 
+	if (trace_flags & TRACE_ITER_LATENCY_FMT)
+		iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
 	iter->cpu_file = cpu_file;
 	iter->tr = &global_trace;
 	mutex_init(&iter->mutex);
-- 
cgit v0.10.2


From 0f6ce3de4ef6ff940308087c49760d068851c1a7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 1 Jun 2009 21:51:28 -0400
Subject: ftrace: do not profile functions when disabled

A race was found that if one were to enable and disable the function
profiler repeatedly, then the system can panic. This was because a profiled
function may be preempted just before disabling interrupts. While
the profiler is disabled and then reenabled, the preempted function
could start again, and access the hash as it is being initialized.

This just adds a check in the irq disabled part to check if the profiler
is enabled, and if it is not then it will just exit.

When the system is disabled, the profile_enabled variable is cleared
before calling the unregistering of the function profiler. This
unregistering calls stop machine which also acts as a synchronize schedule.

[ Impact: fix panic in enabling/disabling function profiler ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2074e5b..d6973df 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -599,7 +599,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
 	local_irq_save(flags);
 
 	stat = &__get_cpu_var(ftrace_profile_stats);
-	if (!stat->hash)
+	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
 	rec = ftrace_find_profiled_func(stat, ip);
@@ -630,7 +630,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 
 	local_irq_save(flags);
 	stat = &__get_cpu_var(ftrace_profile_stats);
-	if (!stat->hash)
+	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
 	calltime = trace->rettime - trace->calltime;
@@ -724,6 +724,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 			ftrace_profile_enabled = 1;
 		} else {
 			ftrace_profile_enabled = 0;
+			/*
+			 * unregister_ftrace_profiler calls stop_machine
+			 * so this acts like an synchronize_sched.
+			 */
 			unregister_ftrace_profiler();
 		}
 	}
-- 
cgit v0.10.2


From 1f23920dbf1377fa9e4aef4f3d20c34a06a71a35 Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Thu, 7 May 2009 19:49:45 -0500
Subject: xfs: fix double unlock in xfs_swap_extents()

Regreesion from commit ef8f7fc, which rearranged the code in
xfs_swap_extents() leading to double unlock of xfs inode ilock.
That resulted in xfs_fsr deadlocking itself on platforms, which
don't handle double unlock of rw_semaphore nicely. It caused the
count go negative, which represents the write holder, without
really having one. ia64 is one of the platforms where deadlock
was easily reproduced and the fix was tested.

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e6d839b..7465f9e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -347,13 +347,15 @@ xfs_swap_extents(
 
 	error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
 
-out_unlock:
-	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 out:
 	kmem_free(tempifp);
 	return error;
 
+out_unlock:
+	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	goto out;
+
 out_trans_cancel:
 	xfs_trans_cancel(tp, 0);
 	goto out_unlock;
-- 
cgit v0.10.2


From e6da7c9fed111ba1243297ee6eda8e24ae11c384 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 23 May 2009 14:30:12 -0500
Subject: xfs: fix overflow in xfs_growfs_data_private

In the case where growing a filesystem would leave the last AG
too small, the fixup code has an overflow in the calculation
of the new size with one fewer ag, because "nagcount" is a 32
bit number.  If the new filesystem has > 2^32 blocks in it
this causes a problem resulting in an EINVAL return from growfs:

 # xfs_io -f -c "truncate 19998630180864" fsfile
 # mkfs.xfs -f -bsize=4096 -dagsize=76288719b,size=3905982455b fsfile
 # mount -o loop fsfile /mnt
 # xfs_growfs /mnt

meta-data=/dev/loop0             isize=256    agcount=52,
agsize=76288719 blks
         =                       sectsz=512   attr=2
data     =                       bsize=4096   blocks=3905982455, imaxpct=5
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0
log      =internal               bsize=4096   blocks=32768, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=0
realtime =none                   extsz=4096   blocks=0, rtextents=0
xfs_growfs: XFS_IOC_FSGROWFSDATA xfsctl failed: Invalid argument

Reported-by: richard.ems@cape-horn-eng.com
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8379e3b..cbd451b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -160,7 +160,7 @@ xfs_growfs_data_private(
 	nagcount = new + (nb_mod != 0);
 	if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
 		nagcount--;
-		nb = nagcount * mp->m_sb.sb_agblocks;
+		nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
 		if (nb < mp->m_sb.sb_dblocks)
 			return XFS_ERROR(EINVAL);
 	}
-- 
cgit v0.10.2


From 1b17d766463d51904cb242f194a780737e5f73ef Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Mon, 1 Jun 2009 13:13:24 -0500
Subject: xfs: prevent deadlock in xfs_qm_shake()

It's possible to recurse into filesystem from the memory
allocation, which deadlocks in xfs_qm_shake(). Add check
for __GFP_FS, and bail out if it is not set.

Signed-off-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Hedi Berriche <hedi@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index af6843c..179cbd6 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
-	return (gfp_mask & __GFP_WAIT) != 0;
+	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
 }
 
 #endif /* __XFS_SUPPORT_KMEM_H__ */
-- 
cgit v0.10.2


From 376bacb0a26bca722981d1610ffb76f951572bb1 Mon Sep 17 00:00:00 2001
From: Frank Seidel <frank@f-seidel.de>
Date: Sun, 29 Mar 2009 15:18:39 +0800
Subject: crypto: tcrypt - Reduce stack size

Applying kernel janitors todos (printk calls need KERN_*
constants on linebeginnings, reduce stack footprint where
possible) to tcrypts test_hash_speed (where stacks
memory footprint was very high (on i386 1184 bytes to
160 now).

Signed-off-by: Frank Seidel <frank@f-seidel.de>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index c3c9124..50d1e35 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -396,16 +396,16 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 	struct scatterlist sg[TVMEMSIZE];
 	struct crypto_hash *tfm;
 	struct hash_desc desc;
-	char output[1024];
+	static char output[1024];
 	int i;
 	int ret;
 
-	printk("\ntesting speed of %s\n", algo);
+	printk(KERN_INFO "\ntesting speed of %s\n", algo);
 
 	tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
 
 	if (IS_ERR(tfm)) {
-		printk("failed to load transform for %s: %ld\n", algo,
+		printk(KERN_ERR "failed to load transform for %s: %ld\n", algo,
 		       PTR_ERR(tfm));
 		return;
 	}
@@ -414,7 +414,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 	desc.flags = 0;
 
 	if (crypto_hash_digestsize(tfm) > sizeof(output)) {
-		printk("digestsize(%u) > outputbuffer(%zu)\n",
+		printk(KERN_ERR "digestsize(%u) > outputbuffer(%zu)\n",
 		       crypto_hash_digestsize(tfm), sizeof(output));
 		goto out;
 	}
@@ -427,12 +427,14 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 
 	for (i = 0; speed[i].blen != 0; i++) {
 		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
-			printk("template (%u) too big for tvmem (%lu)\n",
+			printk(KERN_ERR
+			       "template (%u) too big for tvmem (%lu)\n",
 			       speed[i].blen, TVMEMSIZE * PAGE_SIZE);
 			goto out;
 		}
 
-		printk("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ",
+		printk(KERN_INFO "test%3u "
+		       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
 		       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
 
 		if (sec)
@@ -443,7 +445,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 					       speed[i].plen, output);
 
 		if (ret) {
-			printk("hashing failed ret=%d\n", ret);
+			printk(KERN_ERR "hashing failed ret=%d\n", ret);
 			break;
 		}
 	}
-- 
cgit v0.10.2


From 811d8f062668077e268a7292202bb923fe2ae896 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Sun, 29 Mar 2009 15:20:48 +0800
Subject: crypto: api - Use kzfree

Use kzfree() instead of memset() + kfree().

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/api.c b/crypto/api.c
index fd2545d..f500fb8 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -580,20 +580,17 @@ EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
 void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg;
-	int size;
 
 	if (unlikely(!mem))
 		return;
 
 	alg = tfm->__crt_alg;
-	size = ksize(mem);
 
 	if (!tfm->exit && alg->cra_exit)
 		alg->cra_exit(tfm);
 	crypto_exit_ops(tfm);
 	crypto_mod_put(alg);
-	memset(mem, 0, size);
-	kfree(mem);
+	kzfree(mem);
 }
 EXPORT_SYMBOL_GPL(crypto_destroy_tfm);
 
-- 
cgit v0.10.2


From 505fd21d6138545aa5e96aa738975e6a9deb98a9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sun, 29 Mar 2009 15:33:53 +0800
Subject: crypto: cryptd - Use nivcipher in cryptd_alloc_ablkcipher

Use crypto_alloc_base() instead of crypto_alloc_ablkcipher() to
allocate underlying tfm in cryptd_alloc_ablkcipher. Because
crypto_alloc_ablkcipher() prefer GENIV encapsulated crypto instead of
raw one, while cryptd_alloc_ablkcipher needed the raw one.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index d14b226..ae5fa99 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -586,20 +586,24 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 						  u32 type, u32 mask)
 {
 	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_ablkcipher *tfm;
+	struct crypto_tfm *tfm;
 
 	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
 		     "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
 		return ERR_PTR(-EINVAL);
-	tfm = crypto_alloc_ablkcipher(cryptd_alg_name, type, mask);
+	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+	type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+	mask &= ~CRYPTO_ALG_TYPE_MASK;
+	mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK);
+	tfm = crypto_alloc_base(cryptd_alg_name, type, mask);
 	if (IS_ERR(tfm))
 		return ERR_CAST(tfm);
-	if (crypto_ablkcipher_tfm(tfm)->__crt_alg->cra_module != THIS_MODULE) {
-		crypto_free_ablkcipher(tfm);
+	if (tfm->__crt_alg->cra_module != THIS_MODULE) {
+		crypto_free_tfm(tfm);
 		return ERR_PTR(-EINVAL);
 	}
 
-	return __cryptd_ablkcipher_cast(tfm);
+	return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm));
 }
 EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher);
 
-- 
cgit v0.10.2


From 150c7e85526e80474b87004f4b420e8834fdeb43 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sun, 29 Mar 2009 15:39:02 +0800
Subject: crypto: fpu - Add template for blkcipher touching FPU

Blkcipher touching FPU need to be enclosed by kernel_fpu_begin() and
kernel_fpu_end(). If they are invoked in cipher algorithm
implementation, they will be invoked for each block, so that
performance will be hurt, because they are "slow" operations. This
patch implements "fpu" template, which makes these operations to be
invoked for each request.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index ebe7dee..cfb0010 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+obj-$(CONFIG_CRYPTO_FPU) += fpu.o
+
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
new file mode 100644
index 0000000..5f9781a
--- /dev/null
+++ b/arch/x86/crypto/fpu.c
@@ -0,0 +1,166 @@
+/*
+ * FPU: Wrapper for blkcipher touching fpu
+ *
+ * Copyright (c) Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/i387.h>
+
+struct crypto_fpu_ctx {
+	struct crypto_blkcipher *child;
+};
+
+static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_blkcipher *child = ctx->child;
+	int err;
+
+	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
+				   CRYPTO_TFM_REQ_MASK);
+	err = crypto_blkcipher_setkey(child, key, keylen);
+	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
+				     CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	int err;
+	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
+	struct crypto_blkcipher *child = ctx->child;
+	struct blkcipher_desc desc = {
+		.tfm = child,
+		.info = desc_in->info,
+		.flags = desc_in->flags,
+	};
+
+	kernel_fpu_begin();
+	err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
+	kernel_fpu_end();
+	return err;
+}
+
+static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	int err;
+	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
+	struct crypto_blkcipher *child = ctx->child;
+	struct blkcipher_desc desc = {
+		.tfm = child,
+		.info = desc_in->info,
+		.flags = desc_in->flags,
+	};
+
+	kernel_fpu_begin();
+	err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
+	kernel_fpu_end();
+	return err;
+}
+
+static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_blkcipher *cipher;
+
+	cipher = crypto_spawn_blkcipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_blkcipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	if (err)
+		return ERR_PTR(err);
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
+				  CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(alg))
+		return ERR_CAST(alg);
+
+	inst = crypto_alloc_instance("fpu", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = alg->cra_flags;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = alg->cra_type;
+	inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
+	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
+	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
+	inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
+	inst->alg.cra_init = crypto_fpu_init_tfm;
+	inst->alg.cra_exit = crypto_fpu_exit_tfm;
+	inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
+	inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
+	inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static void crypto_fpu_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_fpu_tmpl = {
+	.name = "fpu",
+	.alloc = crypto_fpu_alloc,
+	.free = crypto_fpu_free,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_fpu_module_init(void)
+{
+	return crypto_register_template(&crypto_fpu_tmpl);
+}
+
+static void __exit crypto_fpu_module_exit(void)
+{
+	crypto_unregister_template(&crypto_fpu_tmpl);
+}
+
+module_init(crypto_fpu_module_init);
+module_exit(crypto_fpu_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 74d0e62..66ff22a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -241,6 +241,11 @@ config CRYPTO_XTS
 	  key size 256, 384 or 512 bits. This implementation currently
 	  can't handle a sectorsize which is not a multiple of 16 bytes.
 
+config CRYPTO_FPU
+	tristate
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_MANAGER
+
 comment "Hash modes"
 
 config CRYPTO_HMAC
-- 
cgit v0.10.2


From 2cf4ac8beb9dc50a315a6155b7b70e754d511958 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sun, 29 Mar 2009 15:41:20 +0800
Subject: crypto: aes-ni - Add support for more modes

Because kernel_fpu_begin() and kernel_fpu_end() operations are too
slow, the performance gain of general mode implementation + aes-aesni
is almost all compensated.

The AES-NI support for more modes are implemented as follow:

- Add a new AES algorithm implementation named __aes-aesni without
  kernel_fpu_begin/end()

- Use fpu(<mode>(AES)) to provide kenrel_fpu_begin/end() invoking

- Add <mode>(AES) ablkcipher, which uses cryptd(fpu(<mode>(AES))) to
  defer cryption to cryptd context in soft_irq context.

Now the ctr, lrw, pcbc and xts support are added.

Performance testing based on dm-crypt shows that cryption time can be
reduced to 50% of general mode implementation + aes-aesni implementation.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 02af0af..4e66339 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -21,6 +21,22 @@
 #include <asm/i387.h>
 #include <asm/aes.h>
 
+#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
+#define HAS_CTR
+#endif
+
+#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
+#define HAS_LRW
+#endif
+
+#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
+#define HAS_PCBC
+#endif
+
+#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
+#define HAS_XTS
+#endif
+
 struct async_aes_ctx {
 	struct cryptd_ablkcipher *cryptd_tfm;
 };
@@ -137,6 +153,41 @@ static struct crypto_alg aesni_alg = {
 	}
 };
 
+static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+	aesni_enc(ctx, dst, src);
+}
+
+static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+	aesni_dec(ctx, dst, src);
+}
+
+static struct crypto_alg __aesni_alg = {
+	.cra_name		= "__aes-aesni",
+	.cra_driver_name	= "__driver-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+	.cra_alignmask		= 0,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(__aesni_alg.cra_list),
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= AES_MIN_KEY_SIZE,
+			.cia_max_keysize	= AES_MAX_KEY_SIZE,
+			.cia_setkey		= aes_set_key,
+			.cia_encrypt		= __aes_encrypt,
+			.cia_decrypt		= __aes_decrypt
+		}
+	}
+};
+
 static int ecb_encrypt(struct blkcipher_desc *desc,
 		       struct scatterlist *dst, struct scatterlist *src,
 		       unsigned int nbytes)
@@ -277,8 +328,16 @@ static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
 			unsigned int key_len)
 {
 	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
+	int err;
 
-	return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
+	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+				    & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ablkcipher_setkey(child, key, key_len);
+	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+				    & CRYPTO_TFM_RES_MASK);
+	return err;
 }
 
 static int ablk_encrypt(struct ablkcipher_request *req)
@@ -411,6 +470,163 @@ static struct crypto_alg ablk_cbc_alg = {
 	},
 };
 
+#ifdef HAS_CTR
+static int ablk_ctr_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
+					     0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_ctr_alg = {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
+	.cra_init		= ablk_ctr_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+			.geniv		= "chainiv",
+		},
+	},
+};
+#endif
+
+#ifdef HAS_LRW
+static int ablk_lrw_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))",
+					     0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_lrw_alg = {
+	.cra_name		= "lrw(aes)",
+	.cra_driver_name	= "lrw-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
+	.cra_init		= ablk_lrw_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+};
+#endif
+
+#ifdef HAS_PCBC
+static int ablk_pcbc_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))",
+					     0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_pcbc_alg = {
+	.cra_name		= "pcbc(aes)",
+	.cra_driver_name	= "pcbc-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_pcbc_alg.cra_list),
+	.cra_init		= ablk_pcbc_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+};
+#endif
+
+#ifdef HAS_XTS
+static int ablk_xts_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))",
+					     0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_xts_alg = {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "xts-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_xts_alg.cra_list),
+	.cra_init		= ablk_xts_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+};
+#endif
+
 static int __init aesni_init(void)
 {
 	int err;
@@ -421,6 +637,8 @@ static int __init aesni_init(void)
 	}
 	if ((err = crypto_register_alg(&aesni_alg)))
 		goto aes_err;
+	if ((err = crypto_register_alg(&__aesni_alg)))
+		goto __aes_err;
 	if ((err = crypto_register_alg(&blk_ecb_alg)))
 		goto blk_ecb_err;
 	if ((err = crypto_register_alg(&blk_cbc_alg)))
@@ -429,9 +647,41 @@ static int __init aesni_init(void)
 		goto ablk_ecb_err;
 	if ((err = crypto_register_alg(&ablk_cbc_alg)))
 		goto ablk_cbc_err;
+#ifdef HAS_CTR
+	if ((err = crypto_register_alg(&ablk_ctr_alg)))
+		goto ablk_ctr_err;
+#endif
+#ifdef HAS_LRW
+	if ((err = crypto_register_alg(&ablk_lrw_alg)))
+		goto ablk_lrw_err;
+#endif
+#ifdef HAS_PCBC
+	if ((err = crypto_register_alg(&ablk_pcbc_alg)))
+		goto ablk_pcbc_err;
+#endif
+#ifdef HAS_XTS
+	if ((err = crypto_register_alg(&ablk_xts_alg)))
+		goto ablk_xts_err;
+#endif
 
 	return err;
 
+#ifdef HAS_XTS
+ablk_xts_err:
+#endif
+#ifdef HAS_PCBC
+	crypto_unregister_alg(&ablk_pcbc_alg);
+ablk_pcbc_err:
+#endif
+#ifdef HAS_LRW
+	crypto_unregister_alg(&ablk_lrw_alg);
+ablk_lrw_err:
+#endif
+#ifdef HAS_CTR
+	crypto_unregister_alg(&ablk_ctr_alg);
+ablk_ctr_err:
+#endif
+	crypto_unregister_alg(&ablk_cbc_alg);
 ablk_cbc_err:
 	crypto_unregister_alg(&ablk_ecb_alg);
 ablk_ecb_err:
@@ -439,6 +689,8 @@ ablk_ecb_err:
 blk_cbc_err:
 	crypto_unregister_alg(&blk_ecb_alg);
 blk_ecb_err:
+	crypto_unregister_alg(&__aesni_alg);
+__aes_err:
 	crypto_unregister_alg(&aesni_alg);
 aes_err:
 	return err;
@@ -446,10 +698,23 @@ aes_err:
 
 static void __exit aesni_exit(void)
 {
+#ifdef HAS_XTS
+	crypto_unregister_alg(&ablk_xts_alg);
+#endif
+#ifdef HAS_PCBC
+	crypto_unregister_alg(&ablk_pcbc_alg);
+#endif
+#ifdef HAS_LRW
+	crypto_unregister_alg(&ablk_lrw_alg);
+#endif
+#ifdef HAS_CTR
+	crypto_unregister_alg(&ablk_ctr_alg);
+#endif
 	crypto_unregister_alg(&ablk_cbc_alg);
 	crypto_unregister_alg(&ablk_ecb_alg);
 	crypto_unregister_alg(&blk_cbc_alg);
 	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_alg(&__aesni_alg);
 	crypto_unregister_alg(&aesni_alg);
 }
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 66ff22a..4dfdd03 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -491,6 +491,7 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_AES_X86_64
 	select CRYPTO_CRYPTD
 	select CRYPTO_ALGAPI
+	select CRYPTO_FPU
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 
@@ -510,6 +511,10 @@ config CRYPTO_AES_NI_INTEL
 
 	  See <http://csrc.nist.gov/encryption/aes/> for more information.
 
+	  In addition to AES cipher algorithm support, the
+	  acceleration for some popular block cipher mode is supported
+	  too, including ECB, CBC, CTR, LRW, PCBC, XTS.
+
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
 	select CRYPTO_ALGAPI
-- 
cgit v0.10.2


From c79cf91006f03adb603879013b6710b6062c8445 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Sun, 29 Mar 2009 15:44:19 +0800
Subject: crypto: testmgr - Kill test_comp() sparse warnings

make C=1:
| crypto/testmgr.c:846:45: warning: incorrect type in argument 5 (different signedness)
| crypto/testmgr.c:846:45:    expected unsigned int *dlen
| crypto/testmgr.c:846:45:    got int *<noident>
| crypto/testmgr.c:878:47: warning: incorrect type in argument 5 (different signedness)
| crypto/testmgr.c:878:47:    expected unsigned int *dlen
| crypto/testmgr.c:878:47:    got int *<noident>

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b50c3c6..bfee6e9 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -837,7 +837,8 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
 	int ret;
 
 	for (i = 0; i < ctcount; i++) {
-		int ilen, dlen = COMP_BUF_SIZE;
+		int ilen;
+		unsigned int dlen = COMP_BUF_SIZE;
 
 		memset(result, 0, sizeof (result));
 
@@ -869,7 +870,8 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
 	}
 
 	for (i = 0; i < dtcount; i++) {
-		int ilen, dlen = COMP_BUF_SIZE;
+		int ilen;
+		unsigned int dlen = COMP_BUF_SIZE;
 
 		memset(result, 0, sizeof (result));
 
-- 
cgit v0.10.2


From 2f6ceb7933f52f238519a9c2f65c628eecdac962 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Sun, 29 Mar 2009 15:45:30 +0800
Subject: crypto: pcomp - pcompress.c should include crypto/internal/compress.h

make C=1:
| crypto/pcompress.c:77:5: warning: symbol 'crypto_register_pcomp' was not declared. Should it be static?
| crypto/pcompress.c:89:5: warning: symbol 'crypto_unregister_pcomp' was not declared. Should it be static?

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/pcompress.c b/crypto/pcompress.c
index ca9a4af..bcadc03 100644
--- a/crypto/pcompress.c
+++ b/crypto/pcompress.c
@@ -26,6 +26,7 @@
 #include <linux/string.h>
 
 #include <crypto/compress.h>
+#include <crypto/internal/compress.h>
 
 #include "internal.h"
 
-- 
cgit v0.10.2


From 9f171adc192fc3c8ffbb691cfdcc70259d75c6ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 29 Mar 2009 15:47:06 +0800
Subject: hwrng: omap - Move probe function to .devinit.text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A pointer to omap_rng_probe is passed to the core via
platform_driver_register and so the function must not disappear when the
.init sections are discarded.  Otherwise (if also having HOTPLUG=y)
unbinding and binding a device to the driver via sysfs will result in an
oops as does a device being registered late.

An alternative to this patch is using platform_driver_probe instead of
platform_driver_register plus removing the pointer to the probe function
from the struct platform_driver.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Jan Engelhardt <jengelh@gmx.de>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 538313f..00dd3de 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -89,7 +89,7 @@ static struct hwrng omap_rng_ops = {
 	.data_read	= omap_rng_data_read,
 };
 
-static int __init omap_rng_probe(struct platform_device *pdev)
+static int __devinit omap_rng_probe(struct platform_device *pdev)
 {
 	struct resource *res, *mem;
 	int ret;
-- 
cgit v0.10.2


From 56af8cd44b05bd9649103b76a6e1e575682990e4 Mon Sep 17 00:00:00 2001
From: Lee Nipper <lee.nipper@gmail.com>
Date: Sun, 29 Mar 2009 15:50:50 +0800
Subject: crypto: talitos - scaffolding for new algorithm types

This patch is preparation for adding new algorithm types.

Some elements which are AEAD specific were renamed.
The algorithm template structure was changed to
use crypto_alg, and talitos_alg_alloc was made
more general with respect to algorithm types.
ipsec_esp_edesc is renamed to talitos_edesc
to use it in the upcoming ablkcipher routines.

Signed-off-by: Lee Nipper <lee.nipper@gmail.com>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index a3918c1..9833961 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -684,8 +684,8 @@ struct talitos_ctx {
 	unsigned int authsize;
 };
 
-static int aead_authenc_setauthsize(struct crypto_aead *authenc,
-						 unsigned int authsize)
+static int aead_setauthsize(struct crypto_aead *authenc,
+			    unsigned int authsize)
 {
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 
@@ -694,8 +694,8 @@ static int aead_authenc_setauthsize(struct crypto_aead *authenc,
 	return 0;
 }
 
-static int aead_authenc_setkey(struct crypto_aead *authenc,
-					    const u8 *key, unsigned int keylen)
+static int aead_setkey(struct crypto_aead *authenc,
+		       const u8 *key, unsigned int keylen)
 {
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	struct rtattr *rta = (void *)key;
@@ -740,7 +740,7 @@ badkey:
 }
 
 /*
- * ipsec_esp_edesc - s/w-extended ipsec_esp descriptor
+ * talitos_edesc - s/w-extended descriptor
  * @src_nents: number of segments in input scatterlist
  * @dst_nents: number of segments in output scatterlist
  * @dma_len: length of dma mapped link_tbl space
@@ -752,7 +752,7 @@ badkey:
  * is greater than 1, an integrity check value is concatenated to the end
  * of link_tbl data
  */
-struct ipsec_esp_edesc {
+struct talitos_edesc {
 	int src_nents;
 	int dst_nents;
 	int dma_len;
@@ -762,7 +762,7 @@ struct ipsec_esp_edesc {
 };
 
 static void ipsec_esp_unmap(struct device *dev,
-			    struct ipsec_esp_edesc *edesc,
+			    struct talitos_edesc *edesc,
 			    struct aead_request *areq)
 {
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE);
@@ -795,8 +795,8 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 				   int err)
 {
 	struct aead_request *areq = context;
-	struct ipsec_esp_edesc *edesc =
-		 container_of(desc, struct ipsec_esp_edesc, desc);
+	struct talitos_edesc *edesc =
+		 container_of(desc, struct talitos_edesc, desc);
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	struct scatterlist *sg;
@@ -823,8 +823,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 				   int err)
 {
 	struct aead_request *req = context;
-	struct ipsec_esp_edesc *edesc =
-		 container_of(desc, struct ipsec_esp_edesc, desc);
+	struct talitos_edesc *edesc =
+		 container_of(desc, struct talitos_edesc, desc);
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	struct scatterlist *sg;
@@ -855,8 +855,8 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
 				   int err)
 {
 	struct aead_request *req = context;
-	struct ipsec_esp_edesc *edesc =
-		 container_of(desc, struct ipsec_esp_edesc, desc);
+	struct talitos_edesc *edesc =
+		 container_of(desc, struct talitos_edesc, desc);
 
 	ipsec_esp_unmap(dev, edesc, req);
 
@@ -910,7 +910,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
 /*
  * fill in and submit ipsec_esp descriptor
  */
-static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
+static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		     u8 *giv, u64 seq,
 		     void (*callback) (struct device *dev,
 				       struct talitos_desc *desc,
@@ -1052,14 +1052,14 @@ static int sg_count(struct scatterlist *sg_list, int nbytes)
 }
 
 /*
- * allocate and map the ipsec_esp extended descriptor
+ * allocate and map the extended descriptor
  */
-static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
+static struct talitos_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
 						     int icv_stashing)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct ipsec_esp_edesc *edesc;
+	struct talitos_edesc *edesc;
 	int src_nents, dst_nents, alloc_len, dma_len;
 	gfp_t flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		      GFP_ATOMIC;
@@ -1084,7 +1084,7 @@ static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
 	 * allowing for two separate entries for ICV and generated ICV (+ 2),
 	 * and the ICV data itself
 	 */
-	alloc_len = sizeof(struct ipsec_esp_edesc);
+	alloc_len = sizeof(struct talitos_edesc);
 	if (src_nents || dst_nents) {
 		dma_len = (src_nents + dst_nents + 2) *
 				 sizeof(struct talitos_ptr) + ctx->authsize;
@@ -1109,11 +1109,11 @@ static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
 	return edesc;
 }
 
-static int aead_authenc_encrypt(struct aead_request *req)
+static int aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct ipsec_esp_edesc *edesc;
+	struct talitos_edesc *edesc;
 
 	/* allocate extended descriptor */
 	edesc = ipsec_esp_edesc_alloc(req, 0);
@@ -1128,13 +1128,13 @@ static int aead_authenc_encrypt(struct aead_request *req)
 
 
-static int aead_authenc_decrypt(struct aead_request *req)
+static int aead_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	unsigned int authsize = ctx->authsize;
 	struct talitos_private *priv = dev_get_drvdata(ctx->dev);
-	struct ipsec_esp_edesc *edesc;
+	struct talitos_edesc *edesc;
 	struct scatterlist *sg;
 	void *icvdata;
 
@@ -1180,13 +1180,12 @@ static int aead_authenc_decrypt(struct aead_request *req)
 	}
 }
 
-static int aead_authenc_givencrypt(
-	struct aead_givcrypt_request *req)
+static int aead_givencrypt(struct aead_givcrypt_request *req)
 {
 	struct aead_request *areq = &req->areq;
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
-	struct ipsec_esp_edesc *edesc;
+	struct talitos_edesc *edesc;
 
 	/* allocate extended descriptor */
 	edesc = ipsec_esp_edesc_alloc(areq, 0);
@@ -1205,30 +1204,30 @@ static int aead_authenc_givencrypt(
 }
 
 struct talitos_alg_template {
-	char name[CRYPTO_MAX_ALG_NAME];
-	char driver_name[CRYPTO_MAX_ALG_NAME];
-	unsigned int blocksize;
-	struct aead_alg aead;
-	struct device *dev;
+	struct crypto_alg alg;
 	__be32 desc_hdr_template;
 };
 
 static struct talitos_alg_template driver_algs[] = {
-	/* single-pass ipsec_esp descriptor */
+	/* AEAD algorithms.  These use a single-pass ipsec_esp descriptor */
 	{
-		.name = "authenc(hmac(sha1),cbc(aes))",
-		.driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
-		.blocksize = AES_BLOCK_SIZE,
-		.aead = {
-			.setkey = aead_authenc_setkey,
-			.setauthsize = aead_authenc_setauthsize,
-			.encrypt = aead_authenc_encrypt,
-			.decrypt = aead_authenc_decrypt,
-			.givencrypt = aead_authenc_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = AES_BLOCK_SIZE,
-			.maxauthsize = SHA1_DIGEST_SIZE,
-			},
+		.alg = {
+			.cra_name = "authenc(hmac(sha1),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = AES_BLOCK_SIZE,
+				.maxauthsize = SHA1_DIGEST_SIZE,
+			}
+		},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_AESU |
 		                     DESC_HDR_MODE0_AESU_CBC |
@@ -1238,19 +1237,23 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
 	{
-		.name = "authenc(hmac(sha1),cbc(des3_ede))",
-		.driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
-		.blocksize = DES3_EDE_BLOCK_SIZE,
-		.aead = {
-			.setkey = aead_authenc_setkey,
-			.setauthsize = aead_authenc_setauthsize,
-			.encrypt = aead_authenc_encrypt,
-			.decrypt = aead_authenc_decrypt,
-			.givencrypt = aead_authenc_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = DES3_EDE_BLOCK_SIZE,
-			.maxauthsize = SHA1_DIGEST_SIZE,
-			},
+		.alg = {
+			.cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+				.maxauthsize = SHA1_DIGEST_SIZE,
+			}
+		},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
@@ -1261,19 +1264,23 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA1_HMAC,
 	},
 	{
-		.name = "authenc(hmac(sha256),cbc(aes))",
-		.driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
-		.blocksize = AES_BLOCK_SIZE,
-		.aead = {
-			.setkey = aead_authenc_setkey,
-			.setauthsize = aead_authenc_setauthsize,
-			.encrypt = aead_authenc_encrypt,
-			.decrypt = aead_authenc_decrypt,
-			.givencrypt = aead_authenc_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = AES_BLOCK_SIZE,
-			.maxauthsize = SHA256_DIGEST_SIZE,
-			},
+		.alg = {
+			.cra_name = "authenc(hmac(sha256),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = AES_BLOCK_SIZE,
+				.maxauthsize = SHA256_DIGEST_SIZE,
+			}
+		},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_AESU |
 		                     DESC_HDR_MODE0_AESU_CBC |
@@ -1283,19 +1290,23 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
 	{
-		.name = "authenc(hmac(sha256),cbc(des3_ede))",
-		.driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
-		.blocksize = DES3_EDE_BLOCK_SIZE,
-		.aead = {
-			.setkey = aead_authenc_setkey,
-			.setauthsize = aead_authenc_setauthsize,
-			.encrypt = aead_authenc_encrypt,
-			.decrypt = aead_authenc_decrypt,
-			.givencrypt = aead_authenc_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = DES3_EDE_BLOCK_SIZE,
-			.maxauthsize = SHA256_DIGEST_SIZE,
-			},
+		.alg = {
+			.cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+				.maxauthsize = SHA256_DIGEST_SIZE,
+			}
+		},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
@@ -1306,19 +1317,23 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_SHA256_HMAC,
 	},
 	{
-		.name = "authenc(hmac(md5),cbc(aes))",
-		.driver_name = "authenc-hmac-md5-cbc-aes-talitos",
-		.blocksize = AES_BLOCK_SIZE,
-		.aead = {
-			.setkey = aead_authenc_setkey,
-			.setauthsize = aead_authenc_setauthsize,
-			.encrypt = aead_authenc_encrypt,
-			.decrypt = aead_authenc_decrypt,
-			.givencrypt = aead_authenc_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = AES_BLOCK_SIZE,
-			.maxauthsize = MD5_DIGEST_SIZE,
-			},
+		.alg = {
+			.cra_name = "authenc(hmac(md5),cbc(aes))",
+			.cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = AES_BLOCK_SIZE,
+				.maxauthsize = MD5_DIGEST_SIZE,
+			}
+		},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_AESU |
 		                     DESC_HDR_MODE0_AESU_CBC |
@@ -1328,19 +1343,23 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
 	},
 	{
-		.name = "authenc(hmac(md5),cbc(des3_ede))",
-		.driver_name = "authenc-hmac-md5-cbc-3des-talitos",
-		.blocksize = DES3_EDE_BLOCK_SIZE,
-		.aead = {
-			.setkey = aead_authenc_setkey,
-			.setauthsize = aead_authenc_setauthsize,
-			.encrypt = aead_authenc_encrypt,
-			.decrypt = aead_authenc_decrypt,
-			.givencrypt = aead_authenc_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = DES3_EDE_BLOCK_SIZE,
-			.maxauthsize = MD5_DIGEST_SIZE,
-			},
+		.alg = {
+			.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
+			.cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_aead_type,
+			.cra_aead = {
+				.setkey = aead_setkey,
+				.setauthsize = aead_setauthsize,
+				.encrypt = aead_encrypt,
+				.decrypt = aead_decrypt,
+				.givencrypt = aead_givencrypt,
+				.geniv = "<built-in>",
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+				.maxauthsize = MD5_DIGEST_SIZE,
+			}
+		},
 		.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
 			             DESC_HDR_SEL0_DEU |
 		                     DESC_HDR_MODE0_DEU_CBC |
@@ -1453,19 +1472,13 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 		return ERR_PTR(-ENOMEM);
 
 	alg = &t_alg->crypto_alg;
+	*alg = template->alg;
 
-	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name);
-	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
-		 template->driver_name);
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = talitos_cra_init;
 	alg->cra_priority = TALITOS_CRA_PRIORITY;
-	alg->cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
-	alg->cra_blocksize = template->blocksize;
 	alg->cra_alignmask = 0;
-	alg->cra_type = &crypto_aead_type;
 	alg->cra_ctxsize = sizeof(struct talitos_ctx);
-	alg->cra_u.aead = template->aead;
 
 	t_alg->desc_hdr_template = template->desc_hdr_template;
 	t_alg->dev = dev;
-- 
cgit v0.10.2


From 4de9d0b547b97e40c93a885ac6246c2c5fef05cb Mon Sep 17 00:00:00 2001
From: Lee Nipper <lee.nipper@gmail.com>
Date: Sun, 29 Mar 2009 15:52:32 +0800
Subject: crypto: talitos - Add ablkcipher algorithms

Add these ablkcipher algorithms:
cbc(aes),
cbc(des3_ede).

Added handling of chained scatterlists with zero length entry
because eseqiv uses it.
Added new map and unmap routines.

Signed-off-by: Lee Nipper <lee.nipper@gmail.com>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 9833961..a0b0a63 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -44,6 +44,8 @@
 #include <crypto/sha.h>
 #include <crypto/aead.h>
 #include <crypto/authenc.h>
+#include <crypto/skcipher.h>
+#include <crypto/scatterwalk.h>
 
 #include "talitos.h"
 
@@ -755,12 +757,62 @@ badkey:
 struct talitos_edesc {
 	int src_nents;
 	int dst_nents;
+	int src_is_chained;
+	int dst_is_chained;
 	int dma_len;
 	dma_addr_t dma_link_tbl;
 	struct talitos_desc desc;
 	struct talitos_ptr link_tbl[0];
 };
 
+static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
+			  unsigned int nents, enum dma_data_direction dir,
+			  int chained)
+{
+	if (unlikely(chained))
+		while (sg) {
+			dma_map_sg(dev, sg, 1, dir);
+			sg = scatterwalk_sg_next(sg);
+		}
+	else
+		dma_map_sg(dev, sg, nents, dir);
+	return nents;
+}
+
+static void talitos_unmap_sg_chain(struct device *dev, struct scatterlist *sg,
+				   enum dma_data_direction dir)
+{
+	while (sg) {
+		dma_unmap_sg(dev, sg, 1, dir);
+		sg = scatterwalk_sg_next(sg);
+	}
+}
+
+static void talitos_sg_unmap(struct device *dev,
+			     struct talitos_edesc *edesc,
+			     struct scatterlist *src,
+			     struct scatterlist *dst)
+{
+	unsigned int src_nents = edesc->src_nents ? : 1;
+	unsigned int dst_nents = edesc->dst_nents ? : 1;
+
+	if (src != dst) {
+		if (edesc->src_is_chained)
+			talitos_unmap_sg_chain(dev, src, DMA_TO_DEVICE);
+		else
+			dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
+
+		if (edesc->dst_is_chained)
+			talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE);
+		else
+			dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
+	} else
+		if (edesc->src_is_chained)
+			talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL);
+		else
+			dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
+}
+
 static void ipsec_esp_unmap(struct device *dev,
 			    struct talitos_edesc *edesc,
 			    struct aead_request *areq)
@@ -772,15 +824,7 @@ static void ipsec_esp_unmap(struct device *dev,
 
 	dma_unmap_sg(dev, areq->assoc, 1, DMA_TO_DEVICE);
 
-	if (areq->src != areq->dst) {
-		dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1,
-			     DMA_TO_DEVICE);
-		dma_unmap_sg(dev, areq->dst, edesc->dst_nents ? : 1,
-			     DMA_FROM_DEVICE);
-	} else {
-		dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1,
-			     DMA_BIDIRECTIONAL);
-	}
+	talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
 
 	if (edesc->dma_len)
 		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
@@ -886,7 +930,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
 		link_tbl_ptr->j_extent = 0;
 		link_tbl_ptr++;
 		cryptlen -= sg_dma_len(sg);
-		sg = sg_next(sg);
+		sg = scatterwalk_sg_next(sg);
 	}
 
 	/* adjust (decrease) last one (or two) entry's len to cryptlen */
@@ -952,12 +996,11 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[4].len = cpu_to_be16(cryptlen);
 	desc->ptr[4].j_extent = authsize;
 
-	if (areq->src == areq->dst)
-		sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1,
-				      DMA_BIDIRECTIONAL);
-	else
-		sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1,
-				      DMA_TO_DEVICE);
+	sg_count = talitos_map_sg(dev, areq->src,
+				  edesc->src_nents ? : 1,
+				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL :
+				      DMA_TO_DEVICE,
+				  edesc->src_is_chained);
 
 	if (sg_count == 1) {
 		desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
@@ -986,8 +1029,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[5].j_extent = authsize;
 
 	if (areq->src != areq->dst) {
-		sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1,
-				      DMA_FROM_DEVICE);
+		sg_count = talitos_map_sg(dev, areq->dst,
+					  edesc->dst_nents ? : 1,
+					  DMA_FROM_DEVICE,
+					  edesc->dst_is_chained);
 	}
 
 	if (sg_count == 1) {
@@ -1037,15 +1082,18 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 /*
  * derive number of elements in scatterlist
  */
-static int sg_count(struct scatterlist *sg_list, int nbytes)
+static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained)
 {
 	struct scatterlist *sg = sg_list;
 	int sg_nents = 0;
 
-	while (nbytes) {
+	*chained = 0;
+	while (nbytes > 0) {
 		sg_nents++;
 		nbytes -= sg->length;
-		sg = sg_next(sg);
+		if (!sg_is_last(sg) && (sg + 1)->length == 0)
+			*chained = 1;
+		sg = scatterwalk_sg_next(sg);
 	}
 
 	return sg_nents;
@@ -1054,28 +1102,32 @@ static int sg_count(struct scatterlist *sg_list, int nbytes)
 /*
  * allocate and map the extended descriptor
  */
-static struct talitos_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
-						     int icv_stashing)
+static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
+						 struct scatterlist *src,
+						 struct scatterlist *dst,
+						 unsigned int cryptlen,
+						 unsigned int authsize,
+						 int icv_stashing,
+						 u32 cryptoflags)
 {
-	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	struct talitos_edesc *edesc;
 	int src_nents, dst_nents, alloc_len, dma_len;
-	gfp_t flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+	int src_chained, dst_chained = 0;
+	gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		      GFP_ATOMIC;
 
-	if (areq->cryptlen + ctx->authsize > TALITOS_MAX_DATA_LEN) {
-		dev_err(ctx->dev, "cryptlen exceeds h/w max limit\n");
+	if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) {
+		dev_err(dev, "length exceeds h/w max limit\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	src_nents = sg_count(areq->src, areq->cryptlen + ctx->authsize);
+	src_nents = sg_count(src, cryptlen + authsize, &src_chained);
 	src_nents = (src_nents == 1) ? 0 : src_nents;
 
-	if (areq->dst == areq->src) {
+	if (dst == src) {
 		dst_nents = src_nents;
 	} else {
-		dst_nents = sg_count(areq->dst, areq->cryptlen + ctx->authsize);
+		dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained);
 		dst_nents = (dst_nents == 1) ? 0 : dst_nents;
 	}
 
@@ -1087,28 +1139,41 @@ static struct talitos_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
 	alloc_len = sizeof(struct talitos_edesc);
 	if (src_nents || dst_nents) {
 		dma_len = (src_nents + dst_nents + 2) *
-				 sizeof(struct talitos_ptr) + ctx->authsize;
+				 sizeof(struct talitos_ptr) + authsize;
 		alloc_len += dma_len;
 	} else {
 		dma_len = 0;
-		alloc_len += icv_stashing ? ctx->authsize : 0;
+		alloc_len += icv_stashing ? authsize : 0;
 	}
 
 	edesc = kmalloc(alloc_len, GFP_DMA | flags);
 	if (!edesc) {
-		dev_err(ctx->dev, "could not allocate edescriptor\n");
+		dev_err(dev, "could not allocate edescriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
+	edesc->src_is_chained = src_chained;
+	edesc->dst_is_chained = dst_chained;
 	edesc->dma_len = dma_len;
-	edesc->dma_link_tbl = dma_map_single(ctx->dev, &edesc->link_tbl[0],
+	edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
 					     edesc->dma_len, DMA_BIDIRECTIONAL);
 
 	return edesc;
 }
 
+static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq,
+					      int icv_stashing)
+{
+	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+
+	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
+				   areq->cryptlen, ctx->authsize, icv_stashing,
+				   areq->base.flags);
+}
+
 static int aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
@@ -1116,7 +1181,7 @@ static int aead_encrypt(struct aead_request *req)
 	struct talitos_edesc *edesc;
 
 	/* allocate extended descriptor */
-	edesc = ipsec_esp_edesc_alloc(req, 0);
+	edesc = aead_edesc_alloc(req, 0);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
@@ -1141,7 +1206,7 @@ static int aead_decrypt(struct aead_request *req)
 	req->cryptlen -= authsize;
 
 	/* allocate extended descriptor */
-	edesc = ipsec_esp_edesc_alloc(req, 1);
+	edesc = aead_edesc_alloc(req, 1);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
@@ -1188,7 +1253,7 @@ static int aead_givencrypt(struct aead_givcrypt_request *req)
 	struct talitos_edesc *edesc;
 
 	/* allocate extended descriptor */
-	edesc = ipsec_esp_edesc_alloc(areq, 0);
+	edesc = aead_edesc_alloc(areq, 0);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
@@ -1203,6 +1268,199 @@ static int aead_givencrypt(struct aead_givcrypt_request *req)
 			 ipsec_esp_encrypt_done);
 }
 
+static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
+			     const u8 *key, unsigned int keylen)
+{
+	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablkcipher_alg *alg = crypto_ablkcipher_alg(cipher);
+
+	if (keylen > TALITOS_MAX_KEY_SIZE)
+		goto badkey;
+
+	if (keylen < alg->min_keysize || keylen > alg->max_keysize)
+		goto badkey;
+
+	memcpy(&ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+
+badkey:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+static void common_nonsnoop_unmap(struct device *dev,
+				  struct talitos_edesc *edesc,
+				  struct ablkcipher_request *areq)
+{
+	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
+	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
+
+	talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
+
+	if (edesc->dma_len)
+		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
+				 DMA_BIDIRECTIONAL);
+}
+
+static void ablkcipher_done(struct device *dev,
+			    struct talitos_desc *desc, void *context,
+			    int err)
+{
+	struct ablkcipher_request *areq = context;
+	struct talitos_edesc *edesc =
+		 container_of(desc, struct talitos_edesc, desc);
+
+	common_nonsnoop_unmap(dev, edesc, areq);
+
+	kfree(edesc);
+
+	areq->base.complete(&areq->base, err);
+}
+
+static int common_nonsnoop(struct talitos_edesc *edesc,
+			   struct ablkcipher_request *areq,
+			   u8 *giv,
+			   void (*callback) (struct device *dev,
+					     struct talitos_desc *desc,
+					     void *context, int error))
+{
+	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct device *dev = ctx->dev;
+	struct talitos_desc *desc = &edesc->desc;
+	unsigned int cryptlen = areq->nbytes;
+	unsigned int ivsize;
+	int sg_count, ret;
+
+	/* first DWORD empty */
+	desc->ptr[0].len = 0;
+	desc->ptr[0].ptr = 0;
+	desc->ptr[0].j_extent = 0;
+
+	/* cipher iv */
+	ivsize = crypto_ablkcipher_ivsize(cipher);
+	map_single_talitos_ptr(dev, &desc->ptr[1], ivsize, giv ?: areq->info, 0,
+			       DMA_TO_DEVICE);
+
+	/* cipher key */
+	map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
+			       (char *)&ctx->key, 0, DMA_TO_DEVICE);
+
+	/*
+	 * cipher in
+	 */
+	desc->ptr[3].len = cpu_to_be16(cryptlen);
+	desc->ptr[3].j_extent = 0;
+
+	sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
+				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
+							   : DMA_TO_DEVICE,
+				  edesc->src_is_chained);
+
+	if (sg_count == 1) {
+		desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->src));
+	} else {
+		sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen,
+					  &edesc->link_tbl[0]);
+		if (sg_count > 1) {
+			desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
+			desc->ptr[3].ptr = cpu_to_be32(edesc->dma_link_tbl);
+			dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
+						   edesc->dma_len, DMA_BIDIRECTIONAL);
+		} else {
+			/* Only one segment now, so no link tbl needed */
+			desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->src));
+		}
+	}
+
+	/* cipher out */
+	desc->ptr[4].len = cpu_to_be16(cryptlen);
+	desc->ptr[4].j_extent = 0;
+
+	if (areq->src != areq->dst)
+		sg_count = talitos_map_sg(dev, areq->dst,
+					  edesc->dst_nents ? : 1,
+					  DMA_FROM_DEVICE,
+					  edesc->dst_is_chained);
+
+	if (sg_count == 1) {
+		desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->dst));
+	} else {
+		struct talitos_ptr *link_tbl_ptr =
+			&edesc->link_tbl[edesc->src_nents + 1];
+
+		desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
+		desc->ptr[4].ptr = cpu_to_be32((struct talitos_ptr *)
+					       edesc->dma_link_tbl +
+					       edesc->src_nents + 1);
+		sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen,
+					  link_tbl_ptr);
+		dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
+					   edesc->dma_len, DMA_BIDIRECTIONAL);
+	}
+
+	/* iv out */
+	map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0,
+			       DMA_FROM_DEVICE);
+
+	/* last DWORD empty */
+	desc->ptr[6].len = 0;
+	desc->ptr[6].ptr = 0;
+	desc->ptr[6].j_extent = 0;
+
+	ret = talitos_submit(dev, desc, callback, areq);
+	if (ret != -EINPROGRESS) {
+		common_nonsnoop_unmap(dev, edesc, areq);
+		kfree(edesc);
+	}
+	return ret;
+}
+
+static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *areq)
+{
+	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+
+	return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes,
+				   0, 0, areq->base.flags);
+}
+
+static int ablkcipher_encrypt(struct ablkcipher_request *areq)
+{
+	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct talitos_edesc *edesc;
+
+	/* allocate extended descriptor */
+	edesc = ablkcipher_edesc_alloc(areq);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* set encrypt */
+	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
+
+	return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
+}
+
+static int ablkcipher_decrypt(struct ablkcipher_request *areq)
+{
+	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	struct talitos_edesc *edesc;
+
+	/* allocate extended descriptor */
+	edesc = ablkcipher_edesc_alloc(areq);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
+
+	return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
+}
+
 struct talitos_alg_template {
 	struct crypto_alg alg;
 	__be32 desc_hdr_template;
@@ -1368,6 +1626,52 @@ static struct talitos_alg_template driver_algs[] = {
 		                     DESC_HDR_MODE1_MDEU_INIT |
 		                     DESC_HDR_MODE1_MDEU_PAD |
 		                     DESC_HDR_MODE1_MDEU_MD5_HMAC,
+	},
+	/* ABLKCIPHER algorithms. */
+	{
+		.alg = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-talitos",
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+                                     CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_ablkcipher_type,
+			.cra_ablkcipher = {
+				.setkey = ablkcipher_setkey,
+				.encrypt = ablkcipher_encrypt,
+				.decrypt = ablkcipher_decrypt,
+				.geniv = "eseqiv",
+				.min_keysize = AES_MIN_KEY_SIZE,
+				.max_keysize = AES_MAX_KEY_SIZE,
+				.ivsize = AES_BLOCK_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+				     DESC_HDR_SEL0_AESU |
+				     DESC_HDR_MODE0_AESU_CBC,
+	},
+	{
+		.alg = {
+			.cra_name = "cbc(des3_ede)",
+			.cra_driver_name = "cbc-3des-talitos",
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+                                     CRYPTO_ALG_ASYNC,
+			.cra_type = &crypto_ablkcipher_type,
+			.cra_ablkcipher = {
+				.setkey = ablkcipher_setkey,
+				.encrypt = ablkcipher_encrypt,
+				.decrypt = ablkcipher_decrypt,
+				.geniv = "eseqiv",
+				.min_keysize = DES3_EDE_KEY_SIZE,
+				.max_keysize = DES3_EDE_KEY_SIZE,
+				.ivsize = DES3_EDE_BLOCK_SIZE,
+			}
+		},
+		.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+			             DESC_HDR_SEL0_DEU |
+		                     DESC_HDR_MODE0_DEU_CBC |
+		                     DESC_HDR_MODE0_DEU_3DES,
 	}
 };
 
-- 
cgit v0.10.2


From e938e4656b3ee32e046ee8293411a07be9d72eb8 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Sun, 29 Mar 2009 15:53:23 +0800
Subject: crypto: talitos - Whitespace/codingstyle/overrun lines cleanup

no functional changes.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index a0b0a63..a073e6b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -341,7 +341,8 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
 				status = error;
 
 		dma_unmap_single(dev, request->dma_desc,
-			sizeof(struct talitos_desc), DMA_BIDIRECTIONAL);
+				 sizeof(struct talitos_desc),
+				 DMA_BIDIRECTIONAL);
 
 		/* copy entries so we can call callback outside lock */
 		saved_req.desc = request->desc;
@@ -415,7 +416,8 @@ static struct talitos_desc *current_desc(struct device *dev, int ch)
 /*
  * user diagnostics; report root cause of error based on execution unit status
  */
-static void report_eu_error(struct device *dev, int ch, struct talitos_desc *desc)
+static void report_eu_error(struct device *dev, int ch,
+			    struct talitos_desc *desc)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	int i;
@@ -863,8 +865,8 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 }
 
 static void ipsec_esp_decrypt_swauth_done(struct device *dev,
-				   struct talitos_desc *desc, void *context,
-				   int err)
+					  struct talitos_desc *desc,
+					  void *context, int err)
 {
 	struct aead_request *req = context;
 	struct talitos_edesc *edesc =
@@ -895,8 +897,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 }
 
 static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
-				   struct talitos_desc *desc, void *context,
-				   int err)
+					  struct talitos_desc *desc,
+					  void *context, int err)
 {
 	struct aead_request *req = context;
 	struct talitos_edesc *edesc =
@@ -905,10 +907,9 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
 	ipsec_esp_unmap(dev, edesc, req);
 
 	/* check ICV auth status */
-	if (!err)
-		if ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
-		    DESC_HDR_LO_ICCR1_PASS)
-			err = -EBADMSG;
+	if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
+		     DESC_HDR_LO_ICCR1_PASS))
+		err = -EBADMSG;
 
 	kfree(edesc);
 
@@ -996,10 +997,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[4].len = cpu_to_be16(cryptlen);
 	desc->ptr[4].j_extent = authsize;
 
-	sg_count = talitos_map_sg(dev, areq->src,
-				  edesc->src_nents ? : 1,
-				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL :
-				      DMA_TO_DEVICE,
+	sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
+				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
+							   : DMA_TO_DEVICE,
 				  edesc->src_is_chained);
 
 	if (sg_count == 1) {
@@ -1008,19 +1008,21 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		sg_link_tbl_len = cryptlen;
 
 		if ((edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) &&
-			(edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) {
+			(edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0)
 			sg_link_tbl_len = cryptlen + authsize;
-		}
+
 		sg_count = sg_to_link_tbl(areq->src, sg_count, sg_link_tbl_len,
 					  &edesc->link_tbl[0]);
 		if (sg_count > 1) {
 			desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
 			desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl);
-			dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
-						   edesc->dma_len, DMA_BIDIRECTIONAL);
+			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+						   edesc->dma_len,
+						   DMA_BIDIRECTIONAL);
 		} else {
 			/* Only one segment now, so no link tbl needed */
-			desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
+			desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->
+								      src));
 		}
 	}
 
@@ -1028,12 +1030,11 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	desc->ptr[5].len = cpu_to_be16(cryptlen);
 	desc->ptr[5].j_extent = authsize;
 
-	if (areq->src != areq->dst) {
+	if (areq->src != areq->dst)
 		sg_count = talitos_map_sg(dev, areq->dst,
 					  edesc->dst_nents ? : 1,
 					  DMA_FROM_DEVICE,
 					  edesc->dst_is_chained);
-	}
 
 	if (sg_count == 1) {
 		desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst));
@@ -1078,7 +1079,6 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	return ret;
 }
 
-
 /*
  * derive number of elements in scatterlist
  */
@@ -1191,8 +1191,6 @@ static int aead_encrypt(struct aead_request *req)
 	return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_encrypt_done);
 }
 
-
-
 static int aead_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
@@ -1211,38 +1209,38 @@ static int aead_decrypt(struct aead_request *req)
 		return PTR_ERR(edesc);
 
 	if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
-	    (((!edesc->src_nents && !edesc->dst_nents) ||
-		priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT))) {
+	    ((!edesc->src_nents && !edesc->dst_nents) ||
+	     priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) {
 
 		/* decrypt and check the ICV */
-		edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND |
+		edesc->desc.hdr = ctx->desc_hdr_template |
+				  DESC_HDR_DIR_INBOUND |
 				  DESC_HDR_MODE1_MDEU_CICV;
 
 		/* reset integrity check result bits */
 		edesc->desc.hdr_lo = 0;
 
-		return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_hwauth_done);
+		return ipsec_esp(edesc, req, NULL, 0,
+				 ipsec_esp_decrypt_hwauth_done);
 
-	} else {
-
-		/* Have to check the ICV with software */
+	}
 
-		edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
+	/* Have to check the ICV with software */
+	edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
 
-		/* stash incoming ICV for later cmp with ICV generated by the h/w */
-		if (edesc->dma_len)
-			icvdata = &edesc->link_tbl[edesc->src_nents +
-						   edesc->dst_nents + 2];
-		else
-			icvdata = &edesc->link_tbl[0];
+	/* stash incoming ICV for later cmp with ICV generated by the h/w */
+	if (edesc->dma_len)
+		icvdata = &edesc->link_tbl[edesc->src_nents +
+					   edesc->dst_nents + 2];
+	else
+		icvdata = &edesc->link_tbl[0];
 
-		sg = sg_last(req->src, edesc->src_nents ? : 1);
+	sg = sg_last(req->src, edesc->src_nents ? : 1);
 
-		memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize,
-		       ctx->authsize);
+	memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize,
+	       ctx->authsize);
 
-		return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done);
-	}
+	return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done);
 }
 
 static int aead_givencrypt(struct aead_givcrypt_request *req)
@@ -1368,11 +1366,13 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 		if (sg_count > 1) {
 			desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
 			desc->ptr[3].ptr = cpu_to_be32(edesc->dma_link_tbl);
-			dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
-						   edesc->dma_len, DMA_BIDIRECTIONAL);
+			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+						   edesc->dma_len,
+						   DMA_BIDIRECTIONAL);
 		} else {
 			/* Only one segment now, so no link tbl needed */
-			desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->src));
+			desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->
+								      src));
 		}
 	}
 
@@ -1419,7 +1419,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 	return ret;
 }
 
-static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *areq)
+static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
+						    areq)
 {
 	struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-- 
cgit v0.10.2


From 19bbbc635523703ece28409e59694d5b512b819e Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Sun, 29 Mar 2009 15:53:59 +0800
Subject: crypto: talitos - containerof related codingstyle

no functional changes.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index a073e6b..1cc1c41 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -841,13 +841,14 @@ static void ipsec_esp_encrypt_done(struct device *dev,
 				   int err)
 {
 	struct aead_request *areq = context;
-	struct talitos_edesc *edesc =
-		 container_of(desc, struct talitos_edesc, desc);
 	struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+	struct talitos_edesc *edesc;
 	struct scatterlist *sg;
 	void *icvdata;
 
+	edesc = container_of(desc, struct talitos_edesc, desc);
+
 	ipsec_esp_unmap(dev, edesc, areq);
 
 	/* copy the generated ICV to dst */
@@ -869,13 +870,14 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 					  void *context, int err)
 {
 	struct aead_request *req = context;
-	struct talitos_edesc *edesc =
-		 container_of(desc, struct talitos_edesc, desc);
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+	struct talitos_edesc *edesc;
 	struct scatterlist *sg;
 	void *icvdata;
 
+	edesc = container_of(desc, struct talitos_edesc, desc);
+
 	ipsec_esp_unmap(dev, edesc, req);
 
 	if (!err) {
@@ -901,8 +903,9 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
 					  void *context, int err)
 {
 	struct aead_request *req = context;
-	struct talitos_edesc *edesc =
-		 container_of(desc, struct talitos_edesc, desc);
+	struct talitos_edesc *edesc;
+
+	edesc = container_of(desc, struct talitos_edesc, desc);
 
 	ipsec_esp_unmap(dev, edesc, req);
 
@@ -1308,8 +1311,9 @@ static void ablkcipher_done(struct device *dev,
 			    int err)
 {
 	struct ablkcipher_request *areq = context;
-	struct talitos_edesc *edesc =
-		 container_of(desc, struct talitos_edesc, desc);
+	struct talitos_edesc *edesc;
+
+	edesc = container_of(desc, struct talitos_edesc, desc);
 
 	common_nonsnoop_unmap(dev, edesc, areq);
 
@@ -1686,12 +1690,14 @@ struct talitos_crypto_alg {
 static int talitos_cra_init(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg = tfm->__crt_alg;
-	struct talitos_crypto_alg *talitos_alg =
-		 container_of(alg, struct talitos_crypto_alg, crypto_alg);
+	struct talitos_crypto_alg *talitos_alg;
 	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 
+	talitos_alg =  container_of(alg, struct talitos_crypto_alg, crypto_alg);
+
 	/* update context with ptr to dev */
 	ctx->dev = talitos_alg->dev;
+
 	/* copy descriptor header template value */
 	ctx->desc_hdr_template = talitos_alg->desc_hdr_template;
 
-- 
cgit v0.10.2


From 962a9c99496f98041d14d64a9fdcf58050fefb4d Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Sun, 29 Mar 2009 15:54:30 +0800
Subject: crypto: talitos - Avoid unnecessary decrypt check

the ICV check bit only gets set in decrypt entry points

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 1cc1c41..c70775f 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1010,8 +1010,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	} else {
 		sg_link_tbl_len = cryptlen;
 
-		if ((edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) &&
-			(edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0)
+		if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
 			sg_link_tbl_len = cryptlen + authsize;
 
 		sg_count = sg_to_link_tbl(areq->src, sg_count, sg_link_tbl_len,
-- 
cgit v0.10.2


From d1c8b0a7692e81b46550bcc493465ed10510cd33 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Tue, 21 Apr 2009 14:14:37 +0800
Subject: crypto: padlock - Enable on x86_64

Almost everything stays the same, we need just to use the extended registers
on the bit variant.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 01afd75..39eedd4 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -12,7 +12,7 @@ if CRYPTO_HW
 
 config CRYPTO_DEV_PADLOCK
 	tristate "Support for VIA PadLock ACE"
-	depends on X86_32 && !UML
+	depends on !UML
 	select CRYPTO_ALGAPI
 	help
 	  Some VIA processors come with an integrated crypto engine
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 856b3cc..87f92c3 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -154,7 +154,11 @@ static inline void padlock_reset_key(struct cword *cword)
 	int cpu = raw_smp_processor_id();
 
 	if (cword != per_cpu(last_cword, cpu))
+#ifndef CONFIG_X86_64
 		asm volatile ("pushfl; popfl");
+#else
+		asm volatile ("pushfq; popfq");
+#endif
 }
 
 static inline void padlock_store_cword(struct cword *cword)
@@ -208,10 +212,19 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
 
 	asm volatile ("test $1, %%cl;"
 		      "je 1f;"
+#ifndef CONFIG_X86_64
 		      "lea -1(%%ecx), %%eax;"
 		      "mov $1, %%ecx;"
+#else
+		      "lea -1(%%rcx), %%rax;"
+		      "mov $1, %%rcx;"
+#endif
 		      ".byte 0xf3,0x0f,0xa7,0xc8;"	/* rep xcryptecb */
+#ifndef CONFIG_X86_64
 		      "mov %%eax, %%ecx;"
+#else
+		      "mov %%rax, %%rcx;"
+#endif
 		      "1:"
 		      ".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
 		      : "+S"(input), "+D"(output)
-- 
cgit v0.10.2


From 2f8174187f409213e63c3589af163c627e8a182a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 Apr 2009 13:00:15 +0800
Subject: crypto: padlock - Restore dependency on x86

When we added 64-bit support to padlock the dependency on x86
was lost.  This causes build failures on non-x86 architectures.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 39eedd4..e748e55 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -12,7 +12,7 @@ if CRYPTO_HW
 
 config CRYPTO_DEV_PADLOCK
 	tristate "Support for VIA PadLock ACE"
-	depends on !UML
+	depends on X86 && !UML
 	select CRYPTO_ALGAPI
 	help
 	  Some VIA processors come with an integrated crypto engine
-- 
cgit v0.10.2


From e44a1b44c3a9794236fe038b89a0fbef5adcd523 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 4 May 2009 19:22:11 +0800
Subject: crypto: testmgr - Handle AEAD test vectors expected to fail
 verification

Add infrastructure to tcrypt/testmgr to support handling ccm decryption
test vectors that are expected to fail verification.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index bfee6e9..84f9640 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -363,6 +363,16 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 			switch (ret) {
 			case 0:
+				if (template[i].novrfy) {
+					/* verification was supposed to fail */
+					printk(KERN_ERR "alg: aead: %s failed "
+					       "on test %d for %s: ret was 0, "
+					       "expected -EBADMSG\n",
+					       e, j, algo);
+					/* so really, we got a bad message */
+					ret = -EBADMSG;
+					goto out;
+				}
 				break;
 			case -EINPROGRESS:
 			case -EBUSY:
@@ -372,6 +382,10 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					INIT_COMPLETION(result.completion);
 					break;
 				}
+			case -EBADMSG:
+				if (template[i].novrfy)
+					/* verification failure was expected */
+					continue;
 				/* fall through */
 			default:
 				printk(KERN_ERR "alg: aead: %s failed on test "
@@ -481,6 +495,16 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 			switch (ret) {
 			case 0:
+				if (template[i].novrfy) {
+					/* verification was supposed to fail */
+					printk(KERN_ERR "alg: aead: %s failed "
+					       "on chunk test %d for %s: ret "
+					       "was 0, expected -EBADMSG\n",
+					       e, j, algo);
+					/* so really, we got a bad message */
+					ret = -EBADMSG;
+					goto out;
+				}
 				break;
 			case -EINPROGRESS:
 			case -EBUSY:
@@ -490,6 +514,10 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					INIT_COMPLETION(result.completion);
 					break;
 				}
+			case -EBADMSG:
+				if (template[i].novrfy)
+					/* verification failure was expected */
+					continue;
 				/* fall through */
 			default:
 				printk(KERN_ERR "alg: aead: %s failed on "
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 526f00a..b77b61d 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -62,6 +62,7 @@ struct aead_testvec {
 	int np;
 	int anp;
 	unsigned char fail;
+	unsigned char novrfy;	/* ccm dec verification failure expected */
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
 	unsigned short ilen;
-- 
cgit v0.10.2


From 5d667322a25ab4ecb91176db118fd663fee4da35 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 4 May 2009 19:23:40 +0800
Subject: crypto: testmgr - Add self-tests for rfc4309(ccm(aes))

Add an array of encryption and decryption + verification self-tests
for rfc4309(ccm(aes)).

Test vectors all come from sample FIPS CAVS files provided to
Red Hat by a testing lab. Unfortunately, all the published sample
vectors in RFC 3610 and NIST Special Publication 800-38C contain nonce
lengths that the kernel's rfc4309 implementation doesn't support, so
while using some public domain vectors would have been preferred, its
not possible at this time.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 50d1e35..0452036 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -667,6 +667,10 @@ static void do_test(int m)
 		tcrypt_test("zlib");
 		break;
 
+	case 45:
+		tcrypt_test("rfc4309(ccm(aes))");
+		break;
+
 	case 100:
 		tcrypt_test("hmac(md5)");
 		break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 84f9640..40c1078 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1879,6 +1879,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "rfc4309(ccm(aes))",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = aes_ccm_rfc4309_enc_tv_template,
+					.count = AES_CCM_4309_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = aes_ccm_rfc4309_dec_tv_template,
+					.count = AES_CCM_4309_DEC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
 		.alg = "rmd128",
 		.test = alg_test_hash,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index b77b61d..5add651 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2848,6 +2848,8 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
 #define AES_GCM_DEC_TEST_VECTORS 8
 #define AES_CCM_ENC_TEST_VECTORS 7
 #define AES_CCM_DEC_TEST_VECTORS 7
+#define AES_CCM_4309_ENC_TEST_VECTORS 7
+#define AES_CCM_4309_DEC_TEST_VECTORS 10
 
 static struct cipher_testvec aes_enc_tv_template[] = {
 	{ /* From FIPS-197 */
@@ -5826,6 +5828,374 @@ static struct aead_testvec aes_ccm_dec_tv_template[] = {
 	},
 };
 
+/*
+ * rfc4309 refers to section 8 of rfc3610 for test vectors, but they all
+ * use a 13-byte nonce, we only support an 11-byte nonce. Similarly, all of
+ * Special Publication 800-38C's test vectors also use nonce lengths our
+ * implementation doesn't support. The following are taken from fips cavs
+ * fax files on hand at Red Hat.
+ *
+ * nb: actual key lengths are (klen - 3), the last 3 bytes are actually
+ * part of the nonce which combine w/the iv, but need to be input this way.
+ */
+static struct aead_testvec aes_ccm_rfc4309_enc_tv_template[] = {
+	{
+		.key	= "\x83\xac\x54\x66\xc2\xeb\xe5\x05"
+			  "\x2e\x01\xd1\xfc\x5d\x82\x66\x2e"
+			  "\x96\xac\x59",
+		.klen	= 19,
+		.iv	= "\x30\x07\xa1\xe2\xa2\xc7\x55\x24",
+		.alen	= 0,
+		.input	= "\x19\xc8\x81\xf6\xe9\x86\xff\x93"
+			  "\x0b\x78\x67\xe5\xbb\xb7\xfc\x6e"
+			  "\x83\x77\xb3\xa6\x0c\x8c\x9f\x9c"
+			  "\x35\x2e\xad\xe0\x62\xf9\x91\xa1",
+		.ilen	= 32,
+		.result	= "\xab\x6f\xe1\x69\x1d\x19\x99\xa8"
+			  "\x92\xa0\xc4\x6f\x7e\xe2\x8b\xb1"
+			  "\x70\xbb\x8c\xa6\x4c\x6e\x97\x8a"
+			  "\x57\x2b\xbe\x5d\x98\xa6\xb1\x32"
+			  "\xda\x24\xea\xd9\xa1\x39\x98\xfd"
+			  "\xa4\xbe\xd9\xf2\x1a\x6d\x22\xa8",
+		.rlen	= 48,
+	}, {
+		.key	= "\x1e\x2c\x7e\x01\x41\x9a\xef\xc0"
+			  "\x0d\x58\x96\x6e\x5c\xa2\x4b\xd3"
+			  "\x4f\xa3\x19",
+		.klen	= 19,
+		.iv	= "\xd3\x01\x5a\xd8\x30\x60\x15\x56",
+		.assoc	= "\xda\xe6\x28\x9c\x45\x2d\xfd\x63"
+			  "\x5e\xda\x4c\xb6\xe6\xfc\xf9\xb7"
+			  "\x0c\x56\xcb\xe4\xe0\x05\x7a\xe1"
+			  "\x0a\x63\x09\x78\xbc\x2c\x55\xde",
+		.alen	= 32,
+		.input	= "\x87\xa3\x36\xfd\x96\xb3\x93\x78"
+			  "\xa9\x28\x63\xba\x12\xa3\x14\x85"
+			  "\x57\x1e\x06\xc9\x7b\x21\xef\x76"
+			  "\x7f\x38\x7e\x8e\x29\xa4\x3e\x7e",
+		.ilen	= 32,
+		.result	= "\x8a\x1e\x11\xf0\x02\x6b\xe2\x19"
+			  "\xfc\x70\xc4\x6d\x8e\xb7\x99\xab"
+			  "\xc5\x4b\xa2\xac\xd3\xf3\x48\xff"
+			  "\x3b\xb5\xce\x53\xef\xde\xbb\x02"
+			  "\xa9\x86\x15\x6c\x13\xfe\xda\x0a"
+			  "\x22\xb8\x29\x3d\xd8\x39\x9a\x23",
+		.rlen	= 48,
+	}, {
+		.key	= "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
+			  "\xa3\xf0\xff\xdd\x4e\x4b\x12\x75"
+			  "\x53\x14\x73\x66\x8d\x88\xf6\x80"
+			  "\xa0\x20\x35",
+		.klen	= 27,
+		.iv	= "\x26\xf2\x21\x8d\x50\x20\xda\xe2",
+		.assoc	= "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
+			  "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
+			  "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
+			  "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
+		.alen	= 32,
+		.ilen	= 0,
+		.result	= "\x36\xea\x7a\x70\x08\xdc\x6a\xbc"
+			  "\xad\x0c\x7a\x63\xf6\x61\xfd\x9b",
+		.rlen	= 16,
+	}, {
+		.key	= "\x56\xdf\x5c\x8f\x26\x3f\x0e\x42"
+			  "\xef\x7a\xd3\xce\xfc\x84\x60\x62"
+			  "\xca\xb4\x40\xaf\x5f\xc9\xc9\x01"
+			  "\xd6\x3c\x8c",
+		.klen	= 27,
+		.iv	= "\x86\x84\xb6\xcd\xef\x09\x2e\x94",
+		.assoc	= "\x02\x65\x78\x3c\xe9\x21\x30\x91"
+			  "\xb1\xb9\xda\x76\x9a\x78\x6d\x95"
+			  "\xf2\x88\x32\xa3\xf2\x50\xcb\x4c"
+			  "\xe3\x00\x73\x69\x84\x69\x87\x79",
+		.alen	= 32,
+		.input	= "\x9f\xd2\x02\x4b\x52\x49\x31\x3c"
+			  "\x43\x69\x3a\x2d\x8e\x70\xad\x7e"
+			  "\xe0\xe5\x46\x09\x80\x89\x13\xb2"
+			  "\x8c\x8b\xd9\x3f\x86\xfb\xb5\x6b",
+		.ilen	= 32,
+		.result	= "\x39\xdf\x7c\x3c\x5a\x29\xb9\x62"
+			  "\x5d\x51\xc2\x16\xd8\xbd\x06\x9f"
+			  "\x9b\x6a\x09\x70\xc1\x51\x83\xc2"
+			  "\x66\x88\x1d\x4f\x9a\xda\xe0\x1e"
+			  "\xc7\x79\x11\x58\xe5\x6b\x20\x40"
+			  "\x7a\xea\x46\x42\x8b\xe4\x6f\xe1",
+		.rlen	= 48,
+	}, {
+		.key	= "\xe0\x8d\x99\x71\x60\xd7\x97\x1a"
+			  "\xbd\x01\x99\xd5\x8a\xdf\x71\x3a"
+			  "\xd3\xdf\x24\x4b\x5e\x3d\x4b\x4e"
+			  "\x30\x7a\xb9\xd8\x53\x0a\x5e\x2b"
+			  "\x1e\x29\x91",
+		.klen	= 35,
+		.iv	= "\xad\x8e\xc1\x53\x0a\xcf\x2d\xbe",
+		.assoc	= "\x19\xb6\x1f\x57\xc4\xf3\xf0\x8b"
+			  "\x78\x2b\x94\x02\x29\x0f\x42\x27"
+			  "\x6b\x75\xcb\x98\x34\x08\x7e\x79"
+			  "\xe4\x3e\x49\x0d\x84\x8b\x22\x87",
+		.alen	= 32,
+		.input	= "\xe1\xd9\xd8\x13\xeb\x3a\x75\x3f"
+			  "\x9d\xbd\x5f\x66\xbe\xdc\xbb\x66"
+			  "\xbf\x17\x99\x62\x4a\x39\x27\x1f"
+			  "\x1d\xdc\x24\xae\x19\x2f\x98\x4c",
+		.ilen	= 32,
+		.result	= "\x19\xb8\x61\x33\x45\x2b\x43\x96"
+			  "\x6f\x51\xd0\x20\x30\x7d\x9b\xc6"
+			  "\x26\x3d\xf8\xc9\x65\x16\xa8\x9f"
+			  "\xf0\x62\x17\x34\xf2\x1e\x8d\x75"
+			  "\x4e\x13\xcc\xc0\xc3\x2a\x54\x2d",
+		.rlen	= 40,
+	}, {
+		.key	= "\x7c\xc8\x18\x3b\x8d\x99\xe0\x7c"
+			  "\x45\x41\xb8\xbd\x5c\xa7\xc2\x32"
+			  "\x8a\xb8\x02\x59\xa4\xfe\xa9\x2c"
+			  "\x09\x75\x9a\x9b\x3c\x9b\x27\x39"
+			  "\xf9\xd9\x4e",
+		.klen	= 35,
+		.iv	= "\x63\xb5\x3d\x9d\x43\xf6\x1e\x50",
+		.assoc	= "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
+			  "\x13\x02\x01\x0c\x83\x4c\x96\x35"
+			  "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"
+			  "\xb0\x39\x36\xe6\x8f\x57\xe0\x13",
+		.alen	= 32,
+		.input	= "\x3b\x6c\x29\x36\xb6\xef\x07\xa6"
+			  "\x83\x72\x07\x4f\xcf\xfa\x66\x89"
+			  "\x5f\xca\xb1\xba\xd5\x8f\x2c\x27"
+			  "\x30\xdb\x75\x09\x93\xd4\x65\xe4",
+		.ilen	= 32,
+		.result	= "\xb0\x88\x5a\x33\xaa\xe5\xc7\x1d"
+			  "\x85\x23\xc7\xc6\x2f\xf4\x1e\x3d"
+			  "\xcc\x63\x44\x25\x07\x78\x4f\x9e"
+			  "\x96\xb8\x88\xeb\xbc\x48\x1f\x06"
+			  "\x39\xaf\x39\xac\xd8\x4a\x80\x39"
+			  "\x7b\x72\x8a\xf7",
+		.rlen	= 44,
+	}, {
+		.key	= "\xab\xd0\xe9\x33\x07\x26\xe5\x83"
+			  "\x8c\x76\x95\xd4\xb6\xdc\xf3\x46"
+			  "\xf9\x8f\xad\xe3\x02\x13\x83\x77"
+			  "\x3f\xb0\xf1\xa1\xa1\x22\x0f\x2b"
+			  "\x24\xa7\x8b",
+		.klen	= 35,
+		.iv	= "\x07\xcb\xcc\x0e\xe6\x33\xbf\xf5",
+		.assoc	= "\xd4\xdb\x30\x1d\x03\xfe\xfd\x5f"
+			  "\x87\xd4\x8c\xb6\xb6\xf1\x7a\x5d"
+			  "\xab\x90\x65\x8d\x8e\xca\x4d\x4f"
+			  "\x16\x0c\x40\x90\x4b\xc7\x36\x73",
+		.alen	= 32,
+		.input	= "\xf5\xc6\x7d\x48\xc1\xb7\xe6\x92"
+			  "\x97\x5a\xca\xc4\xa9\x6d\xf9\x3d"
+			  "\x6c\xde\xbc\xf1\x90\xea\x6a\xb2"
+			  "\x35\x86\x36\xaf\x5c\xfe\x4b\x3a",
+		.ilen	= 32,
+		.result	= "\x83\x6f\x40\x87\x72\xcf\xc1\x13"
+			  "\xef\xbb\x80\x21\x04\x6c\x58\x09"
+			  "\x07\x1b\xfc\xdf\xc0\x3f\x5b\xc7"
+			  "\xe0\x79\xa8\x6e\x71\x7c\x3f\xcf"
+			  "\x5c\xda\xb2\x33\xe5\x13\xe2\x0d"
+			  "\x74\xd1\xef\xb5\x0f\x3a\xb5\xf8",
+		.rlen	= 48,
+	},
+};
+
+static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
+	{
+		.key	= "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+			  "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
+			  "\xc6\xfb\x7d",
+		.klen	= 19,
+		.iv	= "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
+		.alen	= 0,
+		.input	= "\xd5\xe8\x93\x9f\xc7\x89\x2e\x2b",
+		.ilen	= 8,
+		.result	= "\x00",
+		.rlen	= 0,
+		.novrfy	= 1,
+	}, {
+		.key	= "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+			  "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
+			  "\xaf\x94\x87",
+		.klen	= 19,
+		.iv	= "\x78\x35\x82\x81\x7f\x88\x94\x68",
+		.alen	= 0,
+		.input	= "\x41\x3c\xb8\x87\x73\xcb\xf3\xf3",
+		.ilen	= 8,
+		.result	= "\x00",
+		.rlen	= 0,
+	}, {
+		.key	= "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
+			  "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
+			  "\xc6\xfb\x7d",
+		.klen	= 19,
+		.iv	= "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
+		.assoc	= "\xf3\x94\x87\x78\x35\x82\x81\x7f"
+			  "\x88\x94\x68\xb1\x78\x6b\x2b\xd6"
+			  "\x04\x1f\x4e\xed\x78\xd5\x33\x66"
+			  "\xd8\x94\x99\x91\x81\x54\x62\x57",
+		.alen	= 32,
+		.input	= "\xf0\x7c\x29\x02\xae\x1c\x2f\x55"
+			  "\xd0\xd1\x3d\x1a\xa3\x6d\xe4\x0a"
+			  "\x86\xb0\x87\x6b\x62\x33\x8c\x34"
+			  "\xce\xab\x57\xcc\x79\x0b\xe0\x6f"
+			  "\x5c\x3e\x48\x1f\x6c\x46\xf7\x51"
+			  "\x8b\x84\x83\x2a\xc1\x05\xb8\xc5",
+		.ilen	= 48,
+		.result	= "\x50\x82\x3e\x07\xe2\x1e\xb6\xfb"
+			  "\x33\xe4\x73\xce\xd2\xfb\x95\x79"
+			  "\xe8\xb4\xb5\x77\x11\x10\x62\x6f"
+			  "\x6a\x82\xd1\x13\xec\xf5\xd0\x48",
+		.rlen	= 32,
+		.novrfy	= 1,
+	}, {
+		.key	= "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
+			  "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
+			  "\x05\xe0\xc9",
+		.klen	= 19,
+		.iv	= "\x0f\xed\x34\xea\x97\xd4\x3b\xdf",
+		.assoc	= "\x49\x5c\x50\x1f\x1d\x94\xcc\x81"
+			  "\xba\xb7\xb6\x03\xaf\xa5\xc1\xa1"
+			  "\xd8\x5c\x42\x68\xe0\x6c\xda\x89"
+			  "\x05\xac\x56\xac\x1b\x2a\xd3\x86",
+		.alen	= 32,
+		.input	= "\x39\xbe\x7d\x15\x62\x77\xf3\x3c"
+			  "\xad\x83\x52\x6d\x71\x03\x25\x1c"
+			  "\xed\x81\x3a\x9a\x16\x7d\x19\x80"
+			  "\x72\x04\x72\xd0\xf6\xff\x05\x0f"
+			  "\xb7\x14\x30\x00\x32\x9e\xa0\xa6"
+			  "\x9e\x5a\x18\xa1\xb8\xfe\xdb\xd3",
+		.ilen	= 48,
+		.result	= "\x75\x05\xbe\xc2\xd9\x1e\xde\x60"
+			  "\x47\x3d\x8c\x7d\xbd\xb5\xd9\xb7"
+			  "\xf2\xae\x61\x05\x8f\x82\x24\x3f"
+			  "\x9c\x67\x91\xe1\x38\x4f\xe4\x0c",
+		.rlen	= 32,
+	}, {
+		.key	= "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+			  "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
+			  "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
+			  "\xee\x49\x83",
+		.klen	= 27,
+		.iv	= "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+		.assoc	= "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+			  "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+			  "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+			  "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+		.alen	= 32,
+		.input	= "\x71\x99\xfa\xf4\x44\x12\x68\x9b",
+		.ilen	= 8,
+		.result	= "\x00",
+		.rlen	= 0,
+	}, {
+		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+			  "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+			  "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+			  "\xee\x49\x83",
+		.klen	= 27,
+		.iv	= "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+		.assoc	= "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+			  "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+			  "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+			  "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+		.alen	= 32,
+		.input	= "\xfb\xe5\x5d\x34\xbe\xe5\xe8\xe7"
+			  "\x5a\xef\x2f\xbf\x1f\x7f\xd4\xb2"
+			  "\x66\xca\x61\x1e\x96\x7a\x61\xb3"
+			  "\x1c\x16\x45\x52\xba\x04\x9c\x9f"
+			  "\xb1\xd2\x40\xbc\x52\x7c\x6f\xb1",
+		.ilen	= 40,
+		.result	= "\x85\x34\x66\x42\xc8\x92\x0f\x36"
+			  "\x58\xe0\x6b\x91\x3c\x98\x5c\xbb"
+			  "\x0a\x85\xcc\x02\xad\x7a\x96\xe9"
+			  "\x65\x43\xa4\xc3\x0f\xdc\x55\x81",
+		.rlen	= 32,
+	}, {
+		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+			  "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+			  "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+			  "\xd1\xfc\x57",
+		.klen	= 27,
+		.iv	= "\x9c\xfe\xb8\x9c\xad\x71\xaa\x1f",
+		.assoc	= "\x86\x67\xa5\xa9\x14\x5f\x0d\xc6"
+			  "\xff\x14\xc7\x44\xbf\x6c\x3a\xc3"
+			  "\xff\xb6\x81\xbd\xe2\xd5\x06\xc7"
+			  "\x3c\xa1\x52\x13\x03\x8a\x23\x3a",
+		.alen	= 32,
+		.input	= "\x3f\x66\xb0\x9d\xe5\x4b\x38\x00"
+			  "\xc6\x0e\x6e\xe5\xd6\x98\xa6\x37"
+			  "\x8c\x26\x33\xc6\xb2\xa2\x17\xfa"
+			  "\x64\x19\xc0\x30\xd7\xfc\x14\x6b"
+			  "\xe3\x33\xc2\x04\xb0\x37\xbe\x3f"
+			  "\xa9\xb4\x2d\x68\x03\xa3\x44\xef",
+		.ilen	= 48,
+		.result	= "\x02\x87\x4d\x28\x80\x6e\xb2\xed"
+			  "\x99\x2a\xa8\xca\x04\x25\x45\x90"
+			  "\x1d\xdd\x5a\xd9\xe4\xdb\x9c\x9c"
+			  "\x49\xe9\x01\xfe\xa7\x80\x6d\x6b",
+		.rlen	= 32,
+		.novrfy	= 1,
+	}, {
+		.key	= "\xa4\x4b\x54\x29\x0a\xb8\x6d\x01"
+			  "\x5b\x80\x2a\xcf\x25\xc4\xb7\x5c"
+			  "\x20\x2c\xad\x30\xc2\x2b\x41\xfb"
+			  "\x0e\x85\xbc\x33\xad\x0f\x2b\xff"
+			  "\xee\x49\x83",
+		.klen	= 35,
+		.iv	= "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+		.alen	= 0,
+		.input	= "\x1f\xb8\x8f\xa3\xdd\x54\x00\xf2",
+		.ilen	= 8,
+		.result	= "\x00",
+		.rlen	= 0,
+	}, {
+		.key	= "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+			  "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
+			  "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
+			  "\xae\x8f\x11\x4c\xc2\x9c\x4a\xbb"
+			  "\x85\x34\x66",
+		.klen	= 35,
+		.iv	= "\x42\xc8\x92\x0f\x36\x58\xe0\x6b",
+		.alen	= 0,
+		.input	= "\x48\x01\x5e\x02\x24\x04\x66\x47"
+			  "\xa1\xea\x6f\xaf\xe8\xfc\xfb\xdd"
+			  "\xa5\xa9\x87\x8d\x84\xee\x2e\x77"
+			  "\xbb\x86\xb9\xf5\x5c\x6c\xff\xf6"
+			  "\x72\xc3\x8e\xf7\x70\xb1\xb2\x07"
+			  "\xbc\xa8\xa3\xbd\x83\x7c\x1d\x2a",
+		.ilen	= 48,
+		.result	= "\xdc\x56\xf2\x71\xb0\xb1\xa0\x6c"
+			  "\xf0\x97\x3a\xfb\x6d\xe7\x32\x99"
+			  "\x3e\xaf\x70\x5e\xb2\x4d\xea\x39"
+			  "\x89\xd4\x75\x7a\x63\xb1\xda\x93",
+		.rlen	= 32,
+		.novrfy	= 1,
+	}, {
+		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+			  "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+			  "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+			  "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b"
+			  "\xcf\x76\x3f",
+		.klen	= 35,
+		.iv	= "\xd9\x95\x75\x8f\x44\x89\x40\x7b",
+		.assoc	= "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+			  "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+			  "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+			  "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
+		.alen	= 32,
+		.input	= "\x48\x58\xd6\xf3\xad\x63\x58\xbf"
+			  "\xae\xc7\x5e\xae\x83\x8f\x7b\xe4"
+			  "\x78\x5c\x4c\x67\x71\x89\x94\xbf"
+			  "\x47\xf1\x63\x7e\x1c\x59\xbd\xc5"
+			  "\x7f\x44\x0a\x0c\x01\x18\x07\x92"
+			  "\xe1\xd3\x51\xce\x32\x6d\x0c\x5b",
+		.ilen	= 48,
+		.result	= "\xc2\x54\xc8\xde\x78\x87\x77\x40"
+			  "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
+			  "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
+			  "\x04\x49\x3b\x19\x93\x57\x25\x5d",
+		.rlen	= 32,
+	},
+};
+
 /* Cast5 test vectors from RFC 2144 */
 #define CAST5_ENC_TEST_VECTORS	3
 #define CAST5_DEC_TEST_VECTORS	3
-- 
cgit v0.10.2


From 7647d6ce2077d9e1c3d72359f6b4492be129cfe8 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 4 May 2009 19:44:50 +0800
Subject: crypto: testmgr - Add infrastructure for ansi_cprng self-tests

Add some necessary infrastructure to make it possible to run
self-tests for ansi_cprng. The bits are likely very specific
to the ANSI X9.31 CPRNG in AES mode, and thus perhaps should
be named more specifically if/when we grow additional CPRNG
support...

Successfully tested against the cryptodev-2.6 tree and a
Red Hat Enterprise Linux 5.x kernel with the follow-on
patch that adds the actual test vectors.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 40c1078..adc54cf 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -19,6 +19,7 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <crypto/rng.h>
 
 #include "internal.h"
 #include "testmgr.h"
@@ -84,6 +85,11 @@ struct hash_test_suite {
 	unsigned int count;
 };
 
+struct cprng_test_suite {
+	struct cprng_testvec *vecs;
+	unsigned int count;
+};
+
 struct alg_test_desc {
 	const char *alg;
 	int (*test)(const struct alg_test_desc *desc, const char *driver,
@@ -95,6 +101,7 @@ struct alg_test_desc {
 		struct comp_test_suite comp;
 		struct pcomp_test_suite pcomp;
 		struct hash_test_suite hash;
+		struct cprng_test_suite cprng;
 	} suite;
 };
 
@@ -1089,6 +1096,68 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 	return 0;
 }
 
+
+static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
+		      unsigned int tcount)
+{
+	const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm));
+	int err, i, j, seedsize;
+	u8 *seed;
+	char result[32];
+
+	seedsize = crypto_rng_seedsize(tfm);
+
+	seed = kmalloc(seedsize, GFP_KERNEL);
+	if (!seed) {
+		printk(KERN_ERR "alg: cprng: Failed to allocate seed space "
+		       "for %s\n", algo);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < tcount; i++) {
+		memset(result, 0, 32);
+
+		memcpy(seed, template[i].v, template[i].vlen);
+		memcpy(seed + template[i].vlen, template[i].key,
+		       template[i].klen);
+		memcpy(seed + template[i].vlen + template[i].klen,
+		       template[i].dt, template[i].dtlen);
+
+		err = crypto_rng_reset(tfm, seed, seedsize);
+		if (err) {
+			printk(KERN_ERR "alg: cprng: Failed to reset rng "
+			       "for %s\n", algo);
+			goto out;
+		}
+
+		for (j = 0; j < template[i].loops; j++) {
+			err = crypto_rng_get_bytes(tfm, result,
+						   template[i].rlen);
+			if (err != template[i].rlen) {
+				printk(KERN_ERR "alg: cprng: Failed to obtain "
+				       "the correct amount of random data for "
+				       "%s (requested %d, got %d)\n", algo,
+				       template[i].rlen, err);
+				goto out;
+			}
+		}
+
+		err = memcmp(result, template[i].result,
+			     template[i].rlen);
+		if (err) {
+			printk(KERN_ERR "alg: cprng: Test %d failed for %s\n",
+			       i, algo);
+			hexdump(result, template[i].rlen);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+out:
+	kfree(seed);
+	return err;
+}
+
 static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
 			 u32 type, u32 mask)
 {
@@ -1288,6 +1357,26 @@ out:
 	return err;
 }
 
+static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
+			  u32 type, u32 mask)
+{
+	struct crypto_rng *rng;
+	int err;
+
+	rng = crypto_alloc_rng(driver, type, mask);
+	if (IS_ERR(rng)) {
+		printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
+		       "%ld\n", driver, PTR_ERR(rng));
+		return PTR_ERR(rng);
+	}
+
+	err = test_cprng(rng, desc->suite.cprng.vecs, desc->suite.cprng.count);
+
+	crypto_free_rng(rng);
+
+	return err;
+}
+
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 5add651..13d5a61 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -70,6 +70,18 @@ struct aead_testvec {
 	unsigned short rlen;
 };
 
+struct cprng_testvec {
+	char *key;
+	char *dt;
+	char *v;
+	char *result;
+	unsigned char klen;
+	unsigned short dtlen;
+	unsigned short vlen;
+	unsigned short rlen;
+	unsigned short loops;
+};
+
 static char zeroed_string[48];
 
 /*
-- 
cgit v0.10.2


From e08ca2da39db22da569dc23578103cdc942fe3ac Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 4 May 2009 19:46:29 +0800
Subject: crypto: testmgr - Add ansi_cprng test vectors

Add ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode),
aka 'ansi_cprng' test vectors, taken from Appendix B.2.9 and B.2.10
of the NIST RNGVS document, found here:
    http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf

Successfully tested against both the cryptodev-2.6 tree and a Red
Hat Enterprise Linux 5.4 kernel, via 'modprobe tcrypt mode=150'.

The selection of 150 was semi-arbitrary, didn't seem like it should
go any place in particular, so I started a new range for rng tests.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 0452036..ea3b8a8 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -707,6 +707,10 @@ static void do_test(int m)
 		tcrypt_test("hmac(rmd160)");
 		break;
 
+	case 150:
+		tcrypt_test("ansi_cprng");
+		break;
+
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index adc54cf..5183ec5 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1380,6 +1380,15 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "ansi_cprng",
+		.test = alg_test_cprng,
+		.suite = {
+			.cprng = {
+				.vecs = ansi_cprng_aes_tv_template,
+				.count = ANSI_CPRNG_AES_TEST_VECTORS
+			}
+		}
+	}, {
 		.alg = "cbc(aes)",
 		.test = alg_test_skcipher,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 13d5a61..c1c709b 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -6208,6 +6208,102 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
 	},
 };
 
+/*
+ * ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode)
+ * test vectors, taken from Appendix B.2.9 and B.2.10:
+ *     http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf
+ * Only AES-128 is supported at this time.
+ */
+#define ANSI_CPRNG_AES_TEST_VECTORS	6
+
+static struct cprng_testvec ansi_cprng_aes_tv_template[] = {
+	{
+		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+		.klen	= 16,
+		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xf9",
+		.dtlen	= 16,
+		.v	= "\x80\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.vlen	= 16,
+		.result	= "\x59\x53\x1e\xd1\x3b\xb0\xc0\x55"
+			  "\x84\x79\x66\x85\xc1\x2f\x76\x41",
+		.rlen	= 16,
+		.loops	= 1,
+	}, {
+		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+		.klen	= 16,
+		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfa",
+		.dtlen	= 16,
+		.v	= "\xc0\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.vlen	= 16,
+		.result	= "\x7c\x22\x2c\xf4\xca\x8f\xa2\x4c"
+			  "\x1c\x9c\xb6\x41\xa9\xf3\x22\x0d",
+		.rlen	= 16,
+		.loops	= 1,
+	}, {
+		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+		.klen	= 16,
+		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfb",
+		.dtlen	= 16,
+		.v	= "\xe0\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.vlen	= 16,
+		.result	= "\x8a\xaa\x00\x39\x66\x67\x5b\xe5"
+			  "\x29\x14\x28\x81\xa9\x4d\x4e\xc7",
+		.rlen	= 16,
+		.loops	= 1,
+	}, {
+		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+		.klen	= 16,
+		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfc",
+		.dtlen	= 16,
+		.v	= "\xf0\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.vlen	= 16,
+		.result	= "\x88\xdd\xa4\x56\x30\x24\x23\xe5"
+			  "\xf6\x9d\xa5\x7e\x7b\x95\xc7\x3a",
+		.rlen	= 16,
+		.loops	= 1,
+	}, {
+		.key	= "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+			  "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+		.klen	= 16,
+		.dt	= "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+			  "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfd",
+		.dtlen	= 16,
+		.v	= "\xf8\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.vlen	= 16,
+		.result	= "\x05\x25\x92\x46\x61\x79\xd2\xcb"
+			  "\x78\xc4\x0b\x14\x0a\x5a\x9a\xc8",
+		.rlen	= 16,
+		.loops	= 1,
+	}, {	/* Monte Carlo Test */
+		.key	= "\x9f\x5b\x51\x20\x0b\xf3\x34\xb5"
+			  "\xd8\x2b\xe8\xc3\x72\x55\xc8\x48",
+		.klen	= 16,
+		.dt	= "\x63\x76\xbb\xe5\x29\x02\xba\x3b"
+			  "\x67\xc9\x25\xfa\x70\x1f\x11\xac",
+		.dtlen	= 16,
+		.v	= "\x57\x2c\x8e\x76\x87\x26\x47\x97"
+			  "\x7e\x74\xfb\xdd\xc4\x95\x01\xd1",
+		.vlen	= 16,
+		.result	= "\x48\xe9\xbd\x0d\x06\xee\x18\xfb"
+			  "\xe4\x57\x90\xd5\xc3\xfc\x9b\x73",
+		.rlen	= 16,
+		.loops	= 10000,
+	},
+};
+
 /* Cast5 test vectors from RFC 2144 */
 #define CAST5_ENC_TEST_VECTORS	3
 #define CAST5_DEC_TEST_VECTORS	3
-- 
cgit v0.10.2


From 941fb3287c0c0d84000b669db5450ac4886da640 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 4 May 2009 19:49:23 +0800
Subject: crypto: testmgr - Catch base cipher self-test failures in fips mode

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5183ec5..e3f9973 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2220,7 +2220,8 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 		if (i < 0)
 			goto notest;
 
-		return alg_test_cipher(alg_test_descs + i, driver, type, mask);
+		rc = alg_test_cipher(alg_test_descs + i, driver, type, mask);
+		goto test_done;
 	}
 
 	i = alg_find_test(alg);
@@ -2229,6 +2230,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 
 	rc = alg_test_descs[i].test(alg_test_descs + i, driver,
 				      type, mask);
+test_done:
 	if (fips_enabled && rc)
 		panic("%s: %s alg self test failed in fips mode!\n", driver, alg);
 
-- 
cgit v0.10.2


From 29ecd4ab3d3aa8bb231361937165dfbbbc534e9a Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 4 May 2009 19:51:17 +0800
Subject: crypto: testmgr - Print self-test pass notices in fips mode

According to our FIPS CAVS testing lab guru, when we're in fips mode,
we must print out notices of successful self-test completion for
every alg to be compliant.

New and improved v2, without strncmp crap. Doesn't need to touch a flag
though, due to not moving the notest label around anymore.

Applies atop '[PATCH v2] crypto: catch base cipher self-test failures
in fips mode'.

Personally, I wouldn't mind seeing this info printed out regardless of
whether or not we're in fips mode, I think its useful info, but will
stick with only in fips mode for now.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index e3f9973..e76af78 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2234,6 +2234,10 @@ test_done:
 	if (fips_enabled && rc)
 		panic("%s: %s alg self test failed in fips mode!\n", driver, alg);
 
+	if (fips_enabled && !rc)
+		printk(KERN_INFO "alg: self-tests for %s (%s) passed\n",
+		       driver, alg);
+
 	return rc;
 
 notest:
-- 
cgit v0.10.2


From f8b0d4d09dc9d0a73fcdcf6c2724650529ec417d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 6 May 2009 14:15:47 +0800
Subject: crypto: testmgr - Dynamically allocate xbuf and axbuf

We currently allocate temporary memory that is used for testing
statically.  This renders the testing engine non-reentrant. As
algorithms may nest, i.e., one may construct another in order to
carry out a part of its operation, this is unacceptable.  For
example, it has been reported that an AEAD implementation allocates
a cipher in its setkey function, which causes it to fail during
testing as the temporary memory is overwritten.

This patch replaces the static memory with dynamically allocated
buffers.  We need a maximum of 16 pages so this slightly increases
the chances of an algorithm failing due to memory shortage.
However, as testing usually occurs at registration, this shouldn't
be a big problem.

Reported-by: Shasi Pulijala <spulijala@amcc.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/algboss.c b/crypto/algboss.c
index 6906f92..9908dd8 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -280,29 +280,13 @@ static struct notifier_block cryptomgr_notifier = {
 
 static int __init cryptomgr_init(void)
 {
-	int err;
-
-	err = testmgr_init();
-	if (err)
-		return err;
-
-	err = crypto_register_notifier(&cryptomgr_notifier);
-	if (err)
-		goto free_testmgr;
-
-	return 0;
-
-free_testmgr:
-	testmgr_exit();
-	return err;
+	return crypto_register_notifier(&cryptomgr_notifier);
 }
 
 static void __exit cryptomgr_exit(void)
 {
 	int err = crypto_unregister_notifier(&cryptomgr_notifier);
 	BUG_ON(err);
-
-	testmgr_exit();
 }
 
 subsys_initcall(cryptomgr_init);
diff --git a/crypto/internal.h b/crypto/internal.h
index fc76e1f..113579a 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -121,9 +121,6 @@ int crypto_register_notifier(struct notifier_block *nb);
 int crypto_unregister_notifier(struct notifier_block *nb);
 int crypto_probing_notify(unsigned long val, void *v);
 
-int __init testmgr_init(void);
-void testmgr_exit(void);
-
 static inline void crypto_alg_put(struct crypto_alg *alg)
 {
 	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index e76af78..c555094 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -107,9 +107,6 @@ struct alg_test_desc {
 
 static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 };
 
-static char *xbuf[XBUFSIZE];
-static char *axbuf[XBUFSIZE];
-
 static void hexdump(unsigned char *buf, unsigned int len)
 {
 	print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET,
@@ -128,6 +125,33 @@ static void tcrypt_complete(struct crypto_async_request *req, int err)
 	complete(&res->completion);
 }
 
+static int testmgr_alloc_buf(char *buf[XBUFSIZE])
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++) {
+		buf[i] = (void *)__get_free_page(GFP_KERNEL);
+		if (!buf[i])
+			goto err_free_buf;
+	}
+
+	return 0;
+
+err_free_buf:
+	while (i-- > 0)
+		free_page((unsigned long)buf[i]);
+
+	return -ENOMEM;
+}
+
+static void testmgr_free_buf(char *buf[XBUFSIZE])
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++)
+		free_page((unsigned long)buf[i]);
+}
+
 static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 		     unsigned int tcount)
 {
@@ -137,8 +161,12 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 	char result[64];
 	struct ahash_request *req;
 	struct tcrypt_result tresult;
-	int ret;
 	void *hash_buff;
+	char *xbuf[XBUFSIZE];
+	int ret = -ENOMEM;
+
+	if (testmgr_alloc_buf(xbuf))
+		goto out_nobuf;
 
 	init_completion(&tresult.completion);
 
@@ -146,7 +174,6 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 	if (!req) {
 		printk(KERN_ERR "alg: hash: Failed to allocate request for "
 		       "%s\n", algo);
-		ret = -ENOMEM;
 		goto out_noreq;
 	}
 	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
@@ -272,6 +299,8 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 out:
 	ahash_request_free(req);
 out_noreq:
+	testmgr_free_buf(xbuf);
+out_nobuf:
 	return ret;
 }
 
@@ -280,7 +309,7 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
 	unsigned int i, j, k, n, temp;
-	int ret = 0;
+	int ret = -ENOMEM;
 	char *q;
 	char *key;
 	struct aead_request *req;
@@ -292,6 +321,13 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 	void *input;
 	void *assoc;
 	char iv[MAX_IVLEN];
+	char *xbuf[XBUFSIZE];
+	char *axbuf[XBUFSIZE];
+
+	if (testmgr_alloc_buf(xbuf))
+		goto out_noxbuf;
+	if (testmgr_alloc_buf(axbuf))
+		goto out_noaxbuf;
 
 	if (enc == ENCRYPT)
 		e = "encryption";
@@ -304,7 +340,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 	if (!req) {
 		printk(KERN_ERR "alg: aead: Failed to allocate request for "
 		       "%s\n", algo);
-		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -581,6 +616,10 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 out:
 	aead_request_free(req);
+	testmgr_free_buf(axbuf);
+out_noaxbuf:
+	testmgr_free_buf(xbuf);
+out_noxbuf:
 	return ret;
 }
 
@@ -589,10 +628,14 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_cipher_tfm(tfm));
 	unsigned int i, j, k;
-	int ret;
 	char *q;
 	const char *e;
 	void *data;
+	char *xbuf[XBUFSIZE];
+	int ret = -ENOMEM;
+
+	if (testmgr_alloc_buf(xbuf))
+		goto out_nobuf;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
@@ -646,6 +689,8 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 	ret = 0;
 
 out:
+	testmgr_free_buf(xbuf);
+out_nobuf:
 	return ret;
 }
 
@@ -655,7 +700,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	const char *algo =
 		crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
 	unsigned int i, j, k, n, temp;
-	int ret;
 	char *q;
 	struct ablkcipher_request *req;
 	struct scatterlist sg[8];
@@ -663,6 +707,11 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	struct tcrypt_result result;
 	void *data;
 	char iv[MAX_IVLEN];
+	char *xbuf[XBUFSIZE];
+	int ret = -ENOMEM;
+
+	if (testmgr_alloc_buf(xbuf))
+		goto out_nobuf;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
@@ -675,7 +724,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	if (!req) {
 		printk(KERN_ERR "alg: skcipher: Failed to allocate request "
 		       "for %s\n", algo);
-		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -860,6 +908,8 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 
 out:
 	ablkcipher_request_free(req);
+	testmgr_free_buf(xbuf);
+out_nobuf:
 	return ret;
 }
 
@@ -2245,41 +2295,3 @@ notest:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(alg_test);
-
-int __init testmgr_init(void)
-{
-	int i;
-
-	for (i = 0; i < XBUFSIZE; i++) {
-		xbuf[i] = (void *)__get_free_page(GFP_KERNEL);
-		if (!xbuf[i])
-			goto err_free_xbuf;
-	}
-
-	for (i = 0; i < XBUFSIZE; i++) {
-		axbuf[i] = (void *)__get_free_page(GFP_KERNEL);
-		if (!axbuf[i])
-			goto err_free_axbuf;
-	}
-
-	return 0;
-
-err_free_axbuf:
-	for (i = 0; i < XBUFSIZE && axbuf[i]; i++)
-		free_page((unsigned long)axbuf[i]);
-err_free_xbuf:
-	for (i = 0; i < XBUFSIZE && xbuf[i]; i++)
-		free_page((unsigned long)xbuf[i]);
-
-	return -ENOMEM;
-}
-
-void testmgr_exit(void)
-{
-	int i;
-
-	for (i = 0; i < XBUFSIZE; i++)
-		free_page((unsigned long)axbuf[i]);
-	for (i = 0; i < XBUFSIZE; i++)
-		free_page((unsigned long)xbuf[i]);
-}
-- 
cgit v0.10.2


From f7cb80f2b9fa06730be20d17c80b12e511a36c1c Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 6 May 2009 17:29:17 +0800
Subject: crypto: testmgr - Add ctr(aes) test vectors

Now with multi-block test vectors, all from SP800-38A, Appendix F.5.
Also added ctr(aes) to case 10 in tcrypt.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index ea3b8a8..9e4974e 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -526,6 +526,7 @@ static void do_test(int m)
 		tcrypt_test("cbc(aes)");
 		tcrypt_test("lrw(aes)");
 		tcrypt_test("xts(aes)");
+		tcrypt_test("ctr(aes)");
 		tcrypt_test("rfc3686(ctr(aes))");
 		break;
 
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index c555094..f4cc178 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1568,6 +1568,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "ctr(aes)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = aes_ctr_enc_tv_template,
+					.count = AES_CTR_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = aes_ctr_dec_tv_template,
+					.count = AES_CTR_DEC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
 		.alg = "cts(cbc(aes))",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -2017,12 +2032,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.suite = {
 			.cipher = {
 				.enc = {
-					.vecs = aes_ctr_enc_tv_template,
-					.count = AES_CTR_ENC_TEST_VECTORS
+					.vecs = aes_ctr_rfc3686_enc_tv_template,
+					.count = AES_CTR_3686_ENC_TEST_VECTORS
 				},
 				.dec = {
-					.vecs = aes_ctr_dec_tv_template,
-					.count = AES_CTR_DEC_TEST_VECTORS
+					.vecs = aes_ctr_rfc3686_dec_tv_template,
+					.count = AES_CTR_3686_DEC_TEST_VECTORS
 				}
 			}
 		}
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index c1c709b..6931622 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2854,8 +2854,10 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
 #define AES_LRW_DEC_TEST_VECTORS 8
 #define AES_XTS_ENC_TEST_VECTORS 4
 #define AES_XTS_DEC_TEST_VECTORS 4
-#define AES_CTR_ENC_TEST_VECTORS 7
-#define AES_CTR_DEC_TEST_VECTORS 6
+#define AES_CTR_ENC_TEST_VECTORS 3
+#define AES_CTR_DEC_TEST_VECTORS 3
+#define AES_CTR_3686_ENC_TEST_VECTORS 7
+#define AES_CTR_3686_DEC_TEST_VECTORS 6
 #define AES_GCM_ENC_TEST_VECTORS 9
 #define AES_GCM_DEC_TEST_VECTORS 8
 #define AES_CCM_ENC_TEST_VECTORS 7
@@ -3998,6 +4000,164 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = {
 
 
 static struct cipher_testvec aes_ctr_enc_tv_template[] = {
+	{ /* From NIST Special Publication 800-38A, Appendix F.5 */
+		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.klen	= 16,
+		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen	= 64,
+		.result	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
+			  "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
+			  "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
+			  "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
+			  "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
+			  "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
+			  "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
+			  "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
+		.rlen	= 64,
+	}, {
+		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen	= 24,
+		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen	= 64,
+		.result	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
+			  "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
+			  "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
+			  "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
+			  "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
+			  "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
+			  "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
+			  "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
+		.rlen	= 64,
+	}, {
+		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen	= 32,
+		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ilen	= 64,
+		.result	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
+			  "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
+			  "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
+			  "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
+			  "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
+			  "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
+			  "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
+			  "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
+		.rlen	= 64,
+	}
+};
+
+static struct cipher_testvec aes_ctr_dec_tv_template[] = {
+	{ /* From NIST Special Publication 800-38A, Appendix F.5 */
+		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.klen	= 16,
+		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.input	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
+			  "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
+			  "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
+			  "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
+			  "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
+			  "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
+			  "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
+			  "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
+		.ilen	= 64,
+		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.rlen	= 64,
+	}, {
+		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen	= 24,
+		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.input	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
+			  "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
+			  "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
+			  "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
+			  "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
+			  "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
+			  "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
+			  "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
+		.ilen	= 64,
+		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.rlen	= 64,
+	}, {
+		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen	= 32,
+		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.input	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
+			  "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
+			  "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
+			  "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
+			  "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
+			  "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
+			  "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
+			  "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
+		.ilen	= 64,
+		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.rlen	= 64,
+	}
+};
+
+static struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = {
 	{ /* From RFC 3686 */
 		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
 			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
@@ -5129,7 +5289,7 @@ static struct cipher_testvec aes_ctr_enc_tv_template[] = {
 	},
 };
 
-static struct cipher_testvec aes_ctr_dec_tv_template[] = {
+static struct cipher_testvec aes_ctr_rfc3686_dec_tv_template[] = {
 	{ /* From RFC 3686 */
 		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
 			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
-- 
cgit v0.10.2


From a1915d51e8e7ee192d2101d621d425379088cbb0 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 15 May 2009 15:16:03 +1000
Subject: crypto: testmgr - Mark algs allowed in fips mode

Set the fips_allowed flag in testmgr.c's alg_test_descs[] for algs
that are allowed to be used when in fips mode.

One caveat: des isn't actually allowed anymore, but des (and thus also
ecb(des)) has to be permitted, because disallowing them results in
des3_ede being unable to properly register (see des module init func).

Also, crc32 isn't technically on the fips approved list, but I think
it gets used in various places that necessitate it being allowed.

This list is based on
http://csrc.nist.gov/groups/STM/cavp/index.html

Important note: allowed/approved here does NOT mean "validated", just
that its an alg that *could* be validated.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f4cc178..51bae62c 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -94,6 +94,7 @@ struct alg_test_desc {
 	const char *alg;
 	int (*test)(const struct alg_test_desc *desc, const char *driver,
 		    u32 type, u32 mask);
+	int fips_allowed;	/* set if alg is allowed in fips mode */
 
 	union {
 		struct aead_test_suite aead;
@@ -1432,6 +1433,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	{
 		.alg = "ansi_cprng",
 		.test = alg_test_cprng,
+		.fips_allowed = 1,
 		.suite = {
 			.cprng = {
 				.vecs = ansi_cprng_aes_tv_template,
@@ -1441,6 +1443,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "cbc(aes)",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1516,6 +1519,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "cbc(des3_ede)",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1546,6 +1550,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ccm(aes)",
 		.test = alg_test_aead,
+		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -1561,6 +1566,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "crc32c",
 		.test = alg_test_crc32c,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = crc32c_tv_template,
@@ -1570,6 +1576,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ctr(aes)",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1615,6 +1622,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(aes)",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1720,6 +1728,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(des)",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1735,6 +1744,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "ecb(des3_ede)",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -1870,6 +1880,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "gcm(aes)",
 		.test = alg_test_aead,
+		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -1912,6 +1923,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha1)",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha1_tv_template,
@@ -1921,6 +1933,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha224)",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha224_tv_template,
@@ -1930,6 +1943,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha256)",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha256_tv_template,
@@ -1939,6 +1953,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha384)",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha384_tv_template,
@@ -1948,6 +1963,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "hmac(sha512)",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = hmac_sha512_tv_template,
@@ -2029,6 +2045,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "rfc3686(ctr(aes))",
 		.test = alg_test_skcipher,
+		.fips_allowed = 1,
 		.suite = {
 			.cipher = {
 				.enc = {
@@ -2044,6 +2061,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "rfc4309(ccm(aes))",
 		.test = alg_test_aead,
+		.fips_allowed = 1,
 		.suite = {
 			.aead = {
 				.enc = {
@@ -2106,6 +2124,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha1",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha1_tv_template,
@@ -2115,6 +2134,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha224",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha224_tv_template,
@@ -2124,6 +2144,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha256",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha256_tv_template,
@@ -2133,6 +2154,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha384",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha384_tv_template,
@@ -2142,6 +2164,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "sha512",
 		.test = alg_test_hash,
+		.fips_allowed = 1,
 		.suite = {
 			.hash = {
 				.vecs = sha512_tv_template,
-- 
cgit v0.10.2


From a3bef3a31a19bd943047ba8bf5b2cc7b5d164362 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 15 May 2009 15:17:05 +1000
Subject: crypto: testmgr - Skip algs not flagged fips_allowed in fips mode

Because all fips-allowed algorithms must be self-tested before they
can be used, they will all have entries in testmgr.c's alg_test_descs[].
Skip self-tests for any algs not flagged as fips_approved and return
-EINVAL when in fips mode.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 51bae62c..f93b26d 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2308,6 +2308,9 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 		if (i < 0)
 			goto notest;
 
+		if (fips_enabled && !alg_test_descs[i].fips_allowed)
+			goto non_fips_alg;
+
 		rc = alg_test_cipher(alg_test_descs + i, driver, type, mask);
 		goto test_done;
 	}
@@ -2316,6 +2319,9 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 	if (i < 0)
 		goto notest;
 
+	if (fips_enabled && !alg_test_descs[i].fips_allowed)
+		goto non_fips_alg;
+
 	rc = alg_test_descs[i].test(alg_test_descs + i, driver,
 				      type, mask);
 test_done:
@@ -2331,5 +2337,7 @@ test_done:
 notest:
 	printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver);
 	return 0;
+non_fips_alg:
+	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(alg_test);
-- 
cgit v0.10.2


From 608d1cd5d375580a49d01b5ed1f9944f5141ae19 Mon Sep 17 00:00:00 2001
From: Harald Welte <HaraldWelte@viatech.com>
Date: Fri, 15 May 2009 15:57:35 +1000
Subject: hwrng: via_rng - The VIA Hardware RNG driver is for the CPU, not
 Chipset

This is a cosmetic change, fixing the MODULE_DESCRIPTION() of via-rng.c

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 4e9573c..02ee639 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -205,5 +205,5 @@ static void __exit mod_exit(void)
 module_init(mod_init);
 module_exit(mod_exit);
 
-MODULE_DESCRIPTION("H/W RNG driver for VIA chipsets");
+MODULE_DESCRIPTION("H/W RNG driver for VIA CPU with PadLock");
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 858576bdc5d65edf1fffd2e65b2165ec1dc68486 Mon Sep 17 00:00:00 2001
From: Harald Welte <HaraldWelte@viatech.com>
Date: Fri, 15 May 2009 16:00:32 +1000
Subject: hwrng: via_rng - Support VIA Nano hardware RNG

The VIA Nano CPU supports the same XSTORE instruction based RNG,
but it lacks the MSR present in earlier CPUs.

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 02ee639..794aacb 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -132,6 +132,19 @@ static int via_rng_init(struct hwrng *rng)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 lo, hi, old_lo;
 
+	/* VIA Nano CPUs don't have the MSR_VIA_RNG anymore.  The RNG
+	 * is always enabled if CPUID rng_en is set.  There is no
+	 * RNG configuration like it used to be the case in this
+	 * register */
+	if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
+		if (!cpu_has_xstore_enabled) {
+			printk(KERN_ERR PFX "can't enable hardware RNG "
+				"if XSTORE is not enabled\n");
+			return -ENODEV;
+		}
+		return 0;
+	}
+
 	/* Control the RNG via MSR.  Tread lightly and pay very close
 	 * close attention to values written, as the reserved fields
 	 * are documented to be "undefined and unpredictable"; but it
-- 
cgit v0.10.2


From e9736c16da9077728802f42393d18258e6685428 Mon Sep 17 00:00:00 2001
From: Harald Welte <HaraldWelte@viatech.com>
Date: Fri, 15 May 2009 16:01:52 +1000
Subject: hwrng: via_rng - Support VIA Nano hardware RNG on X86_64 builds

Fix Kconfig to build via-rng.ko on X86_64 builds, as the VIA Nano
CPU supports x86_64, too.

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 5fab647..9c00440 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -88,7 +88,7 @@ config HW_RANDOM_N2RNG
 
 config HW_RANDOM_VIA
 	tristate "VIA HW Random Number Generator support"
-	depends on HW_RANDOM && X86_32
+	depends on HW_RANDOM && X86
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
-- 
cgit v0.10.2


From 3ce858cb04de8bc83449eac707c8012a1944daca Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 27 May 2009 15:05:02 +1000
Subject: crypto: compress - Return produced bytes in
 crypto_{,de}compress_{update,final}

If crypto_{,de}compress_{update,final}() succeed, return the actual number of
bytes produced instead of zero, so their users don't have to calculate that
theirselves.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f93b26d..376ea88 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1002,24 +1002,25 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 	const char *algo = crypto_tfm_alg_driver_name(crypto_pcomp_tfm(tfm));
 	unsigned int i;
 	char result[COMP_BUF_SIZE];
-	int error;
+	int res;
 
 	for (i = 0; i < ctcount; i++) {
 		struct comp_request req;
+		unsigned int produced = 0;
 
-		error = crypto_compress_setup(tfm, ctemplate[i].params,
-					      ctemplate[i].paramsize);
-		if (error) {
+		res = crypto_compress_setup(tfm, ctemplate[i].params,
+					    ctemplate[i].paramsize);
+		if (res) {
 			pr_err("alg: pcomp: compression setup failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, error);
-			return error;
+			       "%d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
 
-		error = crypto_compress_init(tfm);
-		if (error) {
+		res = crypto_compress_init(tfm);
+		if (res) {
 			pr_err("alg: pcomp: compression init failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, error);
-			return error;
+			       "%d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
 
 		memset(result, 0, sizeof(result));
@@ -1029,32 +1030,37 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 		req.next_out = result;
 		req.avail_out = ctemplate[i].outlen / 2;
 
-		error = crypto_compress_update(tfm, &req);
-		if (error && (error != -EAGAIN || req.avail_in)) {
+		res = crypto_compress_update(tfm, &req);
+		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: compression update failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, error);
-			return error;
+			       "%d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
+		if (res > 0)
+			produced += res;
 
 		/* Add remaining input data */
 		req.avail_in += (ctemplate[i].inlen + 1) / 2;
 
-		error = crypto_compress_update(tfm, &req);
-		if (error && (error != -EAGAIN || req.avail_in)) {
+		res = crypto_compress_update(tfm, &req);
+		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: compression update failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, error);
-			return error;
+			       "%d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
+		if (res > 0)
+			produced += res;
 
 		/* Provide remaining output space */
 		req.avail_out += COMP_BUF_SIZE - ctemplate[i].outlen / 2;
 
-		error = crypto_compress_final(tfm, &req);
-		if (error) {
+		res = crypto_compress_final(tfm, &req);
+		if (res < 0) {
 			pr_err("alg: pcomp: compression final failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, error);
-			return error;
+			       "%d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
+		produced += res;
 
 		if (COMP_BUF_SIZE - req.avail_out != ctemplate[i].outlen) {
 			pr_err("alg: comp: Compression test %d failed for %s: "
@@ -1064,6 +1070,13 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 			return -EINVAL;
 		}
 
+		if (produced != ctemplate[i].outlen) {
+			pr_err("alg: comp: Compression test %d failed for %s: "
+			       "returned len = %u (expected %d)\n", i + 1,
+			       algo, produced, ctemplate[i].outlen);
+			return -EINVAL;
+		}
+
 		if (memcmp(result, ctemplate[i].output, ctemplate[i].outlen)) {
 			pr_err("alg: pcomp: Compression test %d failed for "
 			       "%s\n", i + 1, algo);
@@ -1074,21 +1087,21 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 
 	for (i = 0; i < dtcount; i++) {
 		struct comp_request req;
+		unsigned int produced = 0;
 
-		error = crypto_decompress_setup(tfm, dtemplate[i].params,
-						dtemplate[i].paramsize);
-		if (error) {
+		res = crypto_decompress_setup(tfm, dtemplate[i].params,
+					      dtemplate[i].paramsize);
+		if (res) {
 			pr_err("alg: pcomp: decompression setup failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo,
-			       error);
-			return error;
+			       "test %d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
 
-		error = crypto_decompress_init(tfm);
-		if (error) {
+		res = crypto_decompress_init(tfm);
+		if (res) {
 			pr_err("alg: pcomp: decompression init failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, error);
-			return error;
+			       "%d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
 
 		memset(result, 0, sizeof(result));
@@ -1098,35 +1111,38 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 		req.next_out = result;
 		req.avail_out = dtemplate[i].outlen / 2;
 
-		error = crypto_decompress_update(tfm, &req);
-		if (error  && (error != -EAGAIN || req.avail_in)) {
+		res = crypto_decompress_update(tfm, &req);
+		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: decompression update failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo,
-			       error);
-			return error;
+			       "test %d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
+		if (res > 0)
+			produced += res;
 
 		/* Add remaining input data */
 		req.avail_in += (dtemplate[i].inlen + 1) / 2;
 
-		error = crypto_decompress_update(tfm, &req);
-		if (error  && (error != -EAGAIN || req.avail_in)) {
+		res = crypto_decompress_update(tfm, &req);
+		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: decompression update failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo,
-			       error);
-			return error;
+			       "test %d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
+		if (res > 0)
+			produced += res;
 
 		/* Provide remaining output space */
 		req.avail_out += COMP_BUF_SIZE - dtemplate[i].outlen / 2;
 
-		error = crypto_decompress_final(tfm, &req);
-		if (error  && (error != -EAGAIN || req.avail_in)) {
+		res = crypto_decompress_final(tfm, &req);
+		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
 			pr_err("alg: pcomp: decompression final failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo,
-			       error);
-			return error;
+			       "test %d for %s: error=%d\n", i + 1, algo, res);
+			return res;
 		}
+		if (res > 0)
+			produced += res;
 
 		if (COMP_BUF_SIZE - req.avail_out != dtemplate[i].outlen) {
 			pr_err("alg: comp: Decompression test %d failed for "
@@ -1136,6 +1152,13 @@ static int test_pcomp(struct crypto_pcomp *tfm,
 			return -EINVAL;
 		}
 
+		if (produced != dtemplate[i].outlen) {
+			pr_err("alg: comp: Decompression test %d failed for "
+			       "%s: returned len = %u (expected %d)\n", i + 1,
+			       algo, produced, dtemplate[i].outlen);
+			return -EINVAL;
+		}
+
 		if (memcmp(result, dtemplate[i].output, dtemplate[i].outlen)) {
 			pr_err("alg: pcomp: Decompression test %d failed for "
 			       "%s\n", i + 1, algo);
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 33609ba..c3015733 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -165,15 +165,15 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
+	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in,
-		 req->avail_out - stream->avail_out);
+		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return 0;
+	return ret;
 }
 
 static int zlib_compress_final(struct crypto_pcomp *tfm,
@@ -195,15 +195,15 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
+	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in,
-		 req->avail_out - stream->avail_out);
+		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return 0;
+	return ret;
 }
 
 
@@ -280,15 +280,15 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
+	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in,
-		 req->avail_out - stream->avail_out);
+		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return 0;
+	return ret;
 }
 
 static int zlib_decompress_final(struct crypto_pcomp *tfm,
@@ -328,15 +328,15 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
 		return -EINVAL;
 	}
 
+	ret = req->avail_out - stream->avail_out;
 	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in,
-		 req->avail_out - stream->avail_out);
+		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
 	req->avail_in = stream->avail_in;
 	req->next_out = stream->next_out;
 	req->avail_out = stream->avail_out;
-	return 0;
+	return ret;
 }
 
 
-- 
cgit v0.10.2


From 4e033a6bc70f094d36128c328f6ca725c6ca4b4c Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 27 May 2009 15:10:21 +1000
Subject: crypto: tcrypt - Do not exit on success in fips mode

At present, the tcrypt module always exits with an -EAGAIN upon
successfully completing all the tests its been asked to run. In fips
mode, integrity checking is done by running all self-tests from the
initrd, and its much simpler to check the ret from modprobe for
success than to scrape dmesg and/or /proc/crypto. Simply stay
loaded, giving modprobe a retval of 0, if self-tests all pass and
we're in fips mode.

A side-effect of tracking success/failure for fips mode is that in
non-fips mode, self-test failures will return the actual failure
return codes, rather than always returning -EAGAIN, which seems more
correct anyway.

The tcrypt_test() portion of the patch is dependent on my earlier
pair of patches that skip non-fips algs in fips mode, at least to
achieve the fully intended behavior.

Nb: testing this patch against the cryptodev tree revealed a test
failure for sha384, which I have yet to look into...

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 9e4974e..d59ba50 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -27,6 +27,7 @@
 #include <linux/timex.h>
 #include <linux/interrupt.h>
 #include "tcrypt.h"
+#include "internal.h"
 
 /*
  * Need slab memory for testing (size in number of pages).
@@ -468,248 +469,255 @@ static void test_available(void)
 
 static inline int tcrypt_test(const char *alg)
 {
-	return alg_test(alg, alg, 0, 0);
+	int ret;
+
+	ret = alg_test(alg, alg, 0, 0);
+	/* non-fips algs return -EINVAL in fips mode */
+	if (fips_enabled && ret == -EINVAL)
+		ret = 0;
+	return ret;
 }
 
-static void do_test(int m)
+static int do_test(int m)
 {
 	int i;
+	int ret = 0;
 
 	switch (m) {
 	case 0:
 		for (i = 1; i < 200; i++)
-			do_test(i);
+			ret += do_test(i);
 		break;
 
 	case 1:
-		tcrypt_test("md5");
+		ret += tcrypt_test("md5");
 		break;
 
 	case 2:
-		tcrypt_test("sha1");
+		ret += tcrypt_test("sha1");
 		break;
 
 	case 3:
-		tcrypt_test("ecb(des)");
-		tcrypt_test("cbc(des)");
+		ret += tcrypt_test("ecb(des)");
+		ret += tcrypt_test("cbc(des)");
 		break;
 
 	case 4:
-		tcrypt_test("ecb(des3_ede)");
-		tcrypt_test("cbc(des3_ede)");
+		ret += tcrypt_test("ecb(des3_ede)");
+		ret += tcrypt_test("cbc(des3_ede)");
 		break;
 
 	case 5:
-		tcrypt_test("md4");
+		ret += tcrypt_test("md4");
 		break;
 
 	case 6:
-		tcrypt_test("sha256");
+		ret += tcrypt_test("sha256");
 		break;
 
 	case 7:
-		tcrypt_test("ecb(blowfish)");
-		tcrypt_test("cbc(blowfish)");
+		ret += tcrypt_test("ecb(blowfish)");
+		ret += tcrypt_test("cbc(blowfish)");
 		break;
 
 	case 8:
-		tcrypt_test("ecb(twofish)");
-		tcrypt_test("cbc(twofish)");
+		ret += tcrypt_test("ecb(twofish)");
+		ret += tcrypt_test("cbc(twofish)");
 		break;
 
 	case 9:
-		tcrypt_test("ecb(serpent)");
+		ret += tcrypt_test("ecb(serpent)");
 		break;
 
 	case 10:
-		tcrypt_test("ecb(aes)");
-		tcrypt_test("cbc(aes)");
-		tcrypt_test("lrw(aes)");
-		tcrypt_test("xts(aes)");
-		tcrypt_test("ctr(aes)");
-		tcrypt_test("rfc3686(ctr(aes))");
+		ret += tcrypt_test("ecb(aes)");
+		ret += tcrypt_test("cbc(aes)");
+		ret += tcrypt_test("lrw(aes)");
+		ret += tcrypt_test("xts(aes)");
+		ret += tcrypt_test("ctr(aes)");
+		ret += tcrypt_test("rfc3686(ctr(aes))");
 		break;
 
 	case 11:
-		tcrypt_test("sha384");
+		ret += tcrypt_test("sha384");
 		break;
 
 	case 12:
-		tcrypt_test("sha512");
+		ret += tcrypt_test("sha512");
 		break;
 
 	case 13:
-		tcrypt_test("deflate");
+		ret += tcrypt_test("deflate");
 		break;
 
 	case 14:
-		tcrypt_test("ecb(cast5)");
+		ret += tcrypt_test("ecb(cast5)");
 		break;
 
 	case 15:
-		tcrypt_test("ecb(cast6)");
+		ret += tcrypt_test("ecb(cast6)");
 		break;
 
 	case 16:
-		tcrypt_test("ecb(arc4)");
+		ret += tcrypt_test("ecb(arc4)");
 		break;
 
 	case 17:
-		tcrypt_test("michael_mic");
+		ret += tcrypt_test("michael_mic");
 		break;
 
 	case 18:
-		tcrypt_test("crc32c");
+		ret += tcrypt_test("crc32c");
 		break;
 
 	case 19:
-		tcrypt_test("ecb(tea)");
+		ret += tcrypt_test("ecb(tea)");
 		break;
 
 	case 20:
-		tcrypt_test("ecb(xtea)");
+		ret += tcrypt_test("ecb(xtea)");
 		break;
 
 	case 21:
-		tcrypt_test("ecb(khazad)");
+		ret += tcrypt_test("ecb(khazad)");
 		break;
 
 	case 22:
-		tcrypt_test("wp512");
+		ret += tcrypt_test("wp512");
 		break;
 
 	case 23:
-		tcrypt_test("wp384");
+		ret += tcrypt_test("wp384");
 		break;
 
 	case 24:
-		tcrypt_test("wp256");
+		ret += tcrypt_test("wp256");
 		break;
 
 	case 25:
-		tcrypt_test("ecb(tnepres)");
+		ret += tcrypt_test("ecb(tnepres)");
 		break;
 
 	case 26:
-		tcrypt_test("ecb(anubis)");
-		tcrypt_test("cbc(anubis)");
+		ret += tcrypt_test("ecb(anubis)");
+		ret += tcrypt_test("cbc(anubis)");
 		break;
 
 	case 27:
-		tcrypt_test("tgr192");
+		ret += tcrypt_test("tgr192");
 		break;
 
 	case 28:
 
-		tcrypt_test("tgr160");
+		ret += tcrypt_test("tgr160");
 		break;
 
 	case 29:
-		tcrypt_test("tgr128");
+		ret += tcrypt_test("tgr128");
 		break;
 
 	case 30:
-		tcrypt_test("ecb(xeta)");
+		ret += tcrypt_test("ecb(xeta)");
 		break;
 
 	case 31:
-		tcrypt_test("pcbc(fcrypt)");
+		ret += tcrypt_test("pcbc(fcrypt)");
 		break;
 
 	case 32:
-		tcrypt_test("ecb(camellia)");
-		tcrypt_test("cbc(camellia)");
+		ret += tcrypt_test("ecb(camellia)");
+		ret += tcrypt_test("cbc(camellia)");
 		break;
 	case 33:
-		tcrypt_test("sha224");
+		ret += tcrypt_test("sha224");
 		break;
 
 	case 34:
-		tcrypt_test("salsa20");
+		ret += tcrypt_test("salsa20");
 		break;
 
 	case 35:
-		tcrypt_test("gcm(aes)");
+		ret += tcrypt_test("gcm(aes)");
 		break;
 
 	case 36:
-		tcrypt_test("lzo");
+		ret += tcrypt_test("lzo");
 		break;
 
 	case 37:
-		tcrypt_test("ccm(aes)");
+		ret += tcrypt_test("ccm(aes)");
 		break;
 
 	case 38:
-		tcrypt_test("cts(cbc(aes))");
+		ret += tcrypt_test("cts(cbc(aes))");
 		break;
 
         case 39:
-		tcrypt_test("rmd128");
+		ret += tcrypt_test("rmd128");
 		break;
 
         case 40:
-		tcrypt_test("rmd160");
+		ret += tcrypt_test("rmd160");
 		break;
 
 	case 41:
-		tcrypt_test("rmd256");
+		ret += tcrypt_test("rmd256");
 		break;
 
 	case 42:
-		tcrypt_test("rmd320");
+		ret += tcrypt_test("rmd320");
 		break;
 
 	case 43:
-		tcrypt_test("ecb(seed)");
+		ret += tcrypt_test("ecb(seed)");
 		break;
 
 	case 44:
-		tcrypt_test("zlib");
+		ret += tcrypt_test("zlib");
 		break;
 
 	case 45:
-		tcrypt_test("rfc4309(ccm(aes))");
+		ret += tcrypt_test("rfc4309(ccm(aes))");
 		break;
 
 	case 100:
-		tcrypt_test("hmac(md5)");
+		ret += tcrypt_test("hmac(md5)");
 		break;
 
 	case 101:
-		tcrypt_test("hmac(sha1)");
+		ret += tcrypt_test("hmac(sha1)");
 		break;
 
 	case 102:
-		tcrypt_test("hmac(sha256)");
+		ret += tcrypt_test("hmac(sha256)");
 		break;
 
 	case 103:
-		tcrypt_test("hmac(sha384)");
+		ret += tcrypt_test("hmac(sha384)");
 		break;
 
 	case 104:
-		tcrypt_test("hmac(sha512)");
+		ret += tcrypt_test("hmac(sha512)");
 		break;
 
 	case 105:
-		tcrypt_test("hmac(sha224)");
+		ret += tcrypt_test("hmac(sha224)");
 		break;
 
 	case 106:
-		tcrypt_test("xcbc(aes)");
+		ret += tcrypt_test("xcbc(aes)");
 		break;
 
 	case 107:
-		tcrypt_test("hmac(rmd128)");
+		ret += tcrypt_test("hmac(rmd128)");
 		break;
 
 	case 108:
-		tcrypt_test("hmac(rmd160)");
+		ret += tcrypt_test("hmac(rmd160)");
 		break;
 
 	case 150:
-		tcrypt_test("ansi_cprng");
+		ret += tcrypt_test("ansi_cprng");
 		break;
 
 	case 200:
@@ -873,6 +881,8 @@ static void do_test(int m)
 		test_available();
 		break;
 	}
+
+	return ret;
 }
 
 static int __init tcrypt_mod_init(void)
@@ -886,15 +896,21 @@ static int __init tcrypt_mod_init(void)
 			goto err_free_tv;
 	}
 
-	do_test(mode);
+	err = do_test(mode);
+	if (err) {
+		printk(KERN_ERR "tcrypt: one or more tests failed!\n");
+		goto err_free_tv;
+	}
 
-	/* We intentionaly return -EAGAIN to prevent keeping
-	 * the module. It does all its work from init()
-	 * and doesn't offer any runtime functionality 
+	/* We intentionaly return -EAGAIN to prevent keeping the module,
+	 * unless we're running in fips mode. It does all its work from
+	 * init() and doesn't offer any runtime functionality, but in
+	 * the fips case, checking for a successful load is helpful.
 	 * => we don't need it in the memory, do we?
 	 *                                        -- mludvig
 	 */
-	err = -EAGAIN;
+	if (!fips_enabled)
+		err = -EAGAIN;
 
 err_free_tv:
 	for (i = 0; i < TVMEMSIZE && tvmem[i]; i++)
-- 
cgit v0.10.2


From f3d8fe40498eea9f45be260bdf6ccada845411f3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 27 May 2009 15:16:21 +1000
Subject: crypto: hifn_795x - fix __dev{init,exit} markings

The remove member of the pci_driver hifn_pci_driver uses __devexit_p(),
so the remove function itself should be marked with __devexit.  And where
there be __devexit on the remove, so is there __devinit on the probe.

Similarly, the module_init/module_exit functions should be declared with
plain __init/__exit markings, not the hotplug __dev{init,exit} ones.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 2bef086..5f753fc 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2564,7 +2564,7 @@ static void hifn_tasklet_callback(unsigned long data)
 		hifn_process_queue(dev);
 }
 
-static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int err, i;
 	struct hifn_device *dev;
@@ -2696,7 +2696,7 @@ err_out_disable_pci_device:
 	return err;
 }
 
-static void hifn_remove(struct pci_dev *pdev)
+static void __devexit hifn_remove(struct pci_dev *pdev)
 {
 	int i;
 	struct hifn_device *dev;
@@ -2744,7 +2744,7 @@ static struct pci_driver hifn_pci_driver = {
 	.remove   = __devexit_p(hifn_remove),
 };
 
-static int __devinit hifn_init(void)
+static int __init hifn_init(void)
 {
 	unsigned int freq;
 	int err;
@@ -2789,7 +2789,7 @@ static int __devinit hifn_init(void)
 	return 0;
 }
 
-static void __devexit hifn_fini(void)
+static void __exit hifn_fini(void)
 {
 	pci_unregister_driver(&hifn_pci_driver);
 
-- 
cgit v0.10.2


From fd57f22a09ae276ca3e9cd11ed99b617d611ba82 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 29 May 2009 16:05:42 +1000
Subject: crypto: testmgr - Check all test vector lengths

As we cannot guarantee the availability of contiguous pages at
run-time, all test vectors must either fit within a page, or use
scatter lists.  In some cases vectors were not checked as to
whether they fit inside a page.  This patch adds all the missing
checks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 376ea88..8fcea70 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -185,6 +185,10 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 
 		hash_buff = xbuf[0];
 
+		ret = -EINVAL;
+		if (WARN_ON(template[i].psize > PAGE_SIZE))
+			goto out;
+
 		memcpy(hash_buff, template[i].plaintext, template[i].psize);
 		sg_init_one(&sg[0], hash_buff, template[i].psize);
 
@@ -238,7 +242,11 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 
 			temp = 0;
 			sg_init_table(sg, template[i].np);
+			ret = -EINVAL;
 			for (k = 0; k < template[i].np; k++) {
+				if (WARN_ON(offset_in_page(IDX[k]) +
+					    template[i].tap[k] > PAGE_SIZE))
+					goto out;
 				sg_set_buf(&sg[k],
 					   memcpy(xbuf[IDX[k] >> PAGE_SHIFT] +
 						  offset_in_page(IDX[k]),
@@ -357,6 +365,11 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			input = xbuf[0];
 			assoc = axbuf[0];
 
+			ret = -EINVAL;
+			if (WARN_ON(template[i].ilen > PAGE_SIZE ||
+				    template[i].alen > PAGE_SIZE))
+				goto out;
+
 			memcpy(input, template[i].input, template[i].ilen);
 			memcpy(assoc, template[i].assoc, template[i].alen);
 			if (template[i].iv)
@@ -516,7 +529,11 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			}
 
 			sg_init_table(asg, template[i].anp);
+			ret = -EINVAL;
 			for (k = 0, temp = 0; k < template[i].anp; k++) {
+				if (WARN_ON(offset_in_page(IDX[k]) +
+					    template[i].atap[k] > PAGE_SIZE))
+					goto out;
 				sg_set_buf(&asg[k],
 					   memcpy(axbuf[IDX[k] >> PAGE_SHIFT] +
 						  offset_in_page(IDX[k]),
@@ -650,6 +667,10 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 
 		j++;
 
+		ret = -EINVAL;
+		if (WARN_ON(template[i].ilen > PAGE_SIZE))
+			goto out;
+
 		data = xbuf[0];
 		memcpy(data, template[i].input, template[i].ilen);
 
@@ -741,6 +762,10 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 		if (!(template[i].np)) {
 			j++;
 
+			ret = -EINVAL;
+			if (WARN_ON(template[i].ilen > PAGE_SIZE))
+				goto out;
+
 			data = xbuf[0];
 			memcpy(data, template[i].input, template[i].ilen);
 
-- 
cgit v0.10.2


From a0cfae59f8381c5c670fce2cc3de70b35421f920 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 29 May 2009 16:23:12 +1000
Subject: crypto: testmgr - Allow hash test vectors longer than a page

As it stands we will each test hash vector both linearly and as
a scatter list if applicable.  This means that we cannot have
vectors longer than a page, even with scatter lists.

This patch fixes this by skipping test vectors with np != 0 when
testing linearly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 8fcea70..e9e9d84 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -180,7 +180,12 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   tcrypt_complete, &tresult);
 
+	j = 0;
 	for (i = 0; i < tcount; i++) {
+		if (template[i].np)
+			continue;
+
+		j++;
 		memset(result, 0, 64);
 
 		hash_buff = xbuf[0];
@@ -198,7 +203,7 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 						  template[i].ksize);
 			if (ret) {
 				printk(KERN_ERR "alg: hash: setkey failed on "
-				       "test %d for %s: ret=%d\n", i + 1, algo,
+				       "test %d for %s: ret=%d\n", j, algo,
 				       -ret);
 				goto out;
 			}
@@ -220,14 +225,14 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
 			/* fall through */
 		default:
 			printk(KERN_ERR "alg: hash: digest failed on test %d "
-			       "for %s: ret=%d\n", i + 1, algo, -ret);
+			       "for %s: ret=%d\n", j, algo, -ret);
 			goto out;
 		}
 
 		if (memcmp(result, template[i].digest,
 			   crypto_ahash_digestsize(tfm))) {
 			printk(KERN_ERR "alg: hash: Test %d failed for %s\n",
-			       i + 1, algo);
+			       j, algo);
 			hexdump(result, crypto_ahash_digestsize(tfm));
 			ret = -EINVAL;
 			goto out;
-- 
cgit v0.10.2


From aa07a6990f4b6a8ef9fc538dea55bac6f92255f2 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Tue, 2 Jun 2009 14:13:14 +1000
Subject: crypto: api - Use formatting of module name

Besdies, for the old code, gcc-4.3.3 produced this warning:
  "format not a string literal and no format arguments"

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/api.c b/crypto/api.c
index f500fb8..d5944f9 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -217,14 +217,11 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
 	alg = crypto_alg_lookup(name, type, mask);
 	if (!alg) {
-		char tmp[CRYPTO_MAX_ALG_NAME];
-
-		request_module(name);
+		request_module("%s", name);
 
 		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
-		      CRYPTO_ALG_NEED_FALLBACK) &&
-		    snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp))
-			request_module(tmp);
+		      CRYPTO_ALG_NEED_FALLBACK))
+			request_module("%s-all", name);
 
 		alg = crypto_alg_lookup(name, type, mask);
 	}
-- 
cgit v0.10.2


From fbeb4a9c20d00e2550156f9e5a34473fbde59de2 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 1 Jun 2009 22:47:19 -0500
Subject: tomoyo: avoid get+put of task_struct

Use task_cred_xxx(task, security) in tomoyo_real_domain() to
avoid a get+put of the target cred.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index 41c6eba..f12d5ad 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -90,17 +90,10 @@ static inline struct tomoyo_domain_info *tomoyo_domain(void)
 	return current_cred()->security;
 }
 
-/* Caller holds tasklist_lock spinlock. */
 static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct
 							    *task)
 {
-	/***** CRITICAL SECTION START *****/
-	const struct cred *cred = get_task_cred(task);
-	struct tomoyo_domain_info *domain = cred->security;
-
-	put_cred(cred);
-	return domain;
-	/***** CRITICAL SECTION END *****/
+	return task_cred_xxx(task, security);
 }
 
 #endif /* !defined(_SECURITY_TOMOYO_TOMOYO_H) */
-- 
cgit v0.10.2


From 77bc7ac87d0d6df1ea099a44e8fc4e998e409606 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 1 Jun 2009 21:26:04 -0700
Subject: microblaze: remove unused flat_stack_align() definition

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Michal Simek <monstr@monstr.eu>

diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index acf0da5..6847c15 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -13,7 +13,6 @@
 
 #include <asm/unaligned.h>
 
-#define	flat_stack_align(sp) /* nothing needed */
 #define	flat_argvp_envp_on_stack()	0
 #define	flat_old_ram_flag(flags)	(flags)
 #define	flat_reloc_valid(reloc, size)	((reloc) <= (size))
-- 
cgit v0.10.2


From 3447ef29a7f3b1fd0d8d58376950e695e04f6f8b Mon Sep 17 00:00:00 2001
From: John Williams <john.williams@petalogix.com>
Date: Tue, 24 Mar 2009 11:10:00 +1000
Subject: microblaze: Don't use access_ok for unaligned

it assumes we have an unaligned exception handler which

 (a) may not be true
 (b) costs a lot of performance
 Instead we'll use struct/union method for big endian accesses,
  and byte-shifting for little endian.

Signed-off-by: John Williams <john.williams@petalogix.com>

diff --git a/arch/microblaze/include/asm/unaligned.h b/arch/microblaze/include/asm/unaligned.h
index 9d66b64..3658d91 100644
--- a/arch/microblaze/include/asm/unaligned.h
+++ b/arch/microblaze/include/asm/unaligned.h
@@ -12,7 +12,8 @@
 
 # ifdef __KERNEL__
 
-# include <linux/unaligned/access_ok.h>
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
 # include <linux/unaligned/generic.h>
 
 # define get_unaligned	__get_unaligned_be
-- 
cgit v0.10.2


From 8a4259bf89d23bfd58d87e275ef6da29cea6b3c5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 08:40:51 +0200
Subject: ALSA: ctxfi - Fix Oops at mmapping

Replace a spinlock with a mutex protecting the vm block list at
mmap / munmap calls, which caused Oops like below:

BUG: sleeping function called from invalid context at mm/slub.c:1599
in_atomic(): 0, irqs_disabled(): 1, pid: 32065, name: xine
Pid: 32065, comm: xine Tainted: P           2.6.29.4-75.fc10.x86_64 #1
Call Trace:
  [<ffffffff81040685>] __might_sleep+0x105/0x10a
  [<ffffffff810c9fae>] kmem_cache_alloc+0x32/0xe2
  [<ffffffffa08e3110>] ct_vm_map+0xfa/0x19e [snd_ctxfi]
  [<ffffffffa08e1a07>] ct_map_audio_buffer+0x4c/0x76 [snd_ctxfi]
  [<ffffffffa08e2aa5>] atc_pcm_playback_prepare+0x1d7/0x2a8 [snd_ctxfi]
  [<ffffffff8105ef3f>] ? up_read+0x9/0xb
  [<ffffffff81186b61>] ? __up_read+0x7c/0x87
  [<ffffffffa08e36a6>] ct_pcm_playback_prepare+0x39/0x60 [snd_ctxfi]
  [<ffffffffa0886bcb>] snd_pcm_do_prepare+0x16/0x28 [snd_pcm]
  [<ffffffffa08867c7>] snd_pcm_action_single+0x2d/0x5b [snd_pcm]
  [<ffffffffa08881f3>] snd_pcm_action_nonatomic+0x52/0x6a [snd_pcm]
  [<ffffffffa088a723>] snd_pcm_common_ioctl1+0x404/0xc79 [snd_pcm]
  [<ffffffff810c52c8>] ? alloc_pages_current+0xb9/0xc2
  [<ffffffff810c9402>] ? new_slab+0x1a5/0x1cb
  [<ffffffff810ab9ea>] ? vma_prio_tree_insert+0x23/0xc1
  [<ffffffffa088b411>] snd_pcm_playback_ioctl1+0x213/0x230 [snd_pcm]
  [<ffffffff810b6c20>] ? mmap_region+0x397/0x4c9
  [<ffffffffa088bd9b>] snd_pcm_playback_ioctl+0x2e/0x36 [snd_pcm]
  [<ffffffff810ddc64>] vfs_ioctl+0x2a/0x78
  [<ffffffff810de130>] do_vfs_ioctl+0x462/0x4a2
  [<ffffffff81029cef>] ? default_spin_lock_flags+0x9/0xe
  [<ffffffff81374647>] ? trace_hardirqs_off_thunk+0x3a/0x6c
  [<ffffffff810de1c5>] sys_ioctl+0x55/0x77
  [<ffffffff8101133a>] system_call_fastpath+0x16/0x1b

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index ead104e..1a4bb35 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -119,7 +119,6 @@ atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm);
 
 static int ct_map_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	unsigned long flags;
 	struct snd_pcm_runtime *runtime;
 	struct ct_vm *vm;
 
@@ -129,9 +128,7 @@ static int ct_map_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	runtime = apcm->substream->runtime;
 	vm = atc->vm;
 
-	spin_lock_irqsave(&atc->vm_lock, flags);
 	apcm->vm_block = vm->map(vm, runtime->dma_area, runtime->dma_bytes);
-	spin_unlock_irqrestore(&atc->vm_lock, flags);
 
 	if (NULL == apcm->vm_block)
 		return -ENOENT;
@@ -141,7 +138,6 @@ static int ct_map_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 static void ct_unmap_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	unsigned long flags;
 	struct ct_vm *vm;
 
 	if (NULL == apcm->vm_block)
@@ -149,9 +145,7 @@ static void ct_unmap_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 	vm = atc->vm;
 
-	spin_lock_irqsave(&atc->vm_lock, flags);
 	vm->unmap(vm, apcm->vm_block);
-	spin_unlock_irqrestore(&atc->vm_lock, flags);
 
 	apcm->vm_block = NULL;
 }
@@ -161,9 +155,7 @@ static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index)
 	struct ct_vm *vm;
 	void *kvirt_addr;
 	unsigned long phys_addr;
-	unsigned long flags;
 
-	spin_lock_irqsave(&atc->vm_lock, flags);
 	vm = atc->vm;
 	kvirt_addr = vm->get_ptp_virt(vm, index);
 	if (kvirt_addr == NULL)
@@ -171,8 +163,6 @@ static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index)
 	else
 		phys_addr = virt_to_phys(kvirt_addr);
 
-	spin_unlock_irqrestore(&atc->vm_lock, flags);
-
 	return phys_addr;
 }
 
@@ -1562,7 +1552,6 @@ int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 	atc_set_ops(atc);
 
 	spin_lock_init(&atc->atc_lock);
-	spin_lock_init(&atc->vm_lock);
 
 	/* Find card model */
 	err = atc_identify_card(atc);
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
index 286c993..a7b0ec2 100644
--- a/sound/pci/ctxfi/ctatc.h
+++ b/sound/pci/ctxfi/ctatc.h
@@ -101,7 +101,6 @@ struct ct_atc {
 	unsigned long (*get_ptp_phys)(struct ct_atc *atc, int index);
 
 	spinlock_t atc_lock;
-	spinlock_t vm_lock;
 
 	int (*pcm_playback_prepare)(struct ct_atc *atc,
 				    struct ct_atc_pcm *apcm);
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
index cecf77e..363b67e 100644
--- a/sound/pci/ctxfi/ctvmem.c
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -35,25 +35,27 @@ get_vm_block(struct ct_vm *vm, unsigned int size)
 	struct ct_vm_block *block = NULL, *entry = NULL;
 	struct list_head *pos = NULL;
 
+	mutex_lock(&vm->lock);
 	list_for_each(pos, &vm->unused) {
 		entry = list_entry(pos, struct ct_vm_block, list);
 		if (entry->size >= size)
 			break; /* found a block that is big enough */
 	}
 	if (pos == &vm->unused)
-		return NULL;
+		goto out;
 
 	if (entry->size == size) {
 		/* Move the vm node from unused list to used list directly */
 		list_del(&entry->list);
 		list_add(&entry->list, &vm->used);
 		vm->size -= size;
-		return entry;
+		block = entry;
+		goto out;
 	}
 
 	block = kzalloc(sizeof(*block), GFP_KERNEL);
 	if (NULL == block)
-		return NULL;
+		goto out;
 
 	block->addr = entry->addr;
 	block->size = size;
@@ -62,6 +64,8 @@ get_vm_block(struct ct_vm *vm, unsigned int size)
 	entry->size -= size;
 	vm->size -= size;
 
+ out:
+	mutex_unlock(&vm->lock);
 	return block;
 }
 
@@ -70,6 +74,7 @@ static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
 	struct ct_vm_block *entry = NULL, *pre_ent = NULL;
 	struct list_head *pos = NULL, *pre = NULL;
 
+	mutex_lock(&vm->lock);
 	list_del(&block->list);
 	vm->size += block->size;
 
@@ -106,6 +111,7 @@ static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
 		pos = pre;
 		pre = pos->prev;
 	}
+	mutex_unlock(&vm->lock);
 }
 
 /* Map host addr (kmalloced/vmalloced) to device logical addr. */
@@ -191,6 +197,8 @@ int ct_vm_create(struct ct_vm **rvm)
 	if (NULL == vm)
 		return -ENOMEM;
 
+	mutex_init(&vm->lock);
+
 	/* Allocate page table pages */
 	for (i = 0; i < CT_PTP_NUM; i++) {
 		vm->ptp[i] = kmalloc(PAGE_SIZE, GFP_KERNEL);
diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h
index 4eb5bdd..618952e 100644
--- a/sound/pci/ctxfi/ctvmem.h
+++ b/sound/pci/ctxfi/ctvmem.h
@@ -20,7 +20,7 @@
 
 #define CT_PTP_NUM	1	/* num of device page table pages */
 
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
 
 struct ct_vm_block {
@@ -35,6 +35,7 @@ struct ct_vm {
 	unsigned int size;		/* Available addr space in bytes */
 	struct list_head unused;	/* List of unused blocks */
 	struct list_head used;		/* List of used blocks */
+	struct mutex lock;
 
 	/* Map host addr (kmalloced/vmalloced) to device logical addr. */
 	struct ct_vm_block *(*map)(struct ct_vm *, void *host_addr, int size);
-- 
cgit v0.10.2


From 53c663ce0f39ba8e8ef652e400b317bc60ac7f19 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Tue, 2 Jun 2009 08:44:01 +0200
Subject: block: fix a possible oops on elv_abort_queue()

I found one more mis-conversion to the 'request is always dequeued
when completing' model in elv_abort_queue() during code inspection.
Although I haven't hit any problem caused by this mis-conversion yet
and just done compile/boot test, please apply if you have no problem.

Request must be dequeued when it completes.
However, elv_abort_queue() completes requests without dequeueing.
This will cause oops in the __blk_end_request_all().
This patch fixes the oops.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/elevator.c b/block/elevator.c
index bd78f69..a029cfe 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -813,6 +813,11 @@ void elv_abort_queue(struct request_queue *q)
 		rq = list_entry_rq(q->queue_head.next);
 		rq->cmd_flags |= REQ_QUIET;
 		trace_block_rq_abort(q, rq);
+		/*
+		 * Mark this request as started so we don't trigger
+		 * any debug logic in the end I/O path.
+		 */
+		blk_start_request(rq);
 		__blk_end_request_all(rq, -EIO);
 	}
 }
-- 
cgit v0.10.2


From a12af1ebe675e85831fde3c4d0908fc3b0908b7a Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Mon, 1 Jun 2009 12:30:03 -0500
Subject: GFS2: smbd proccess hangs with flock() call.

GFS2 currently does not support mandatory flocks. An flock() call with
LOCK_MAND triggers unexpected behavior because gfs2 is not checking for
this lock type. This patch corrects that.

Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 73b6f55..841ddc9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -698,8 +698,8 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
-	if (__mandatory_lock(&ip->i_inode))
-		return -ENOLCK;
+	if (fl->fl_type & LOCK_MAND)
+		return -EOPNOTSUPP;
 
 	if (fl->fl_type == F_UNLCK) {
 		do_unflock(file, fl);
-- 
cgit v0.10.2


From 67fbf880631bb4493ad8d23f25562abdf09dc01d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 09:18:26 +0200
Subject: ALSA: ctxfi - Fix a typo in MODULE_LICENSE

A space has to be put between GPL and v2.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index e31e29e..bf232e7 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -18,7 +18,7 @@
 
 MODULE_AUTHOR("Creative Technology Ltd");
 MODULE_DESCRIPTION("X-Fi driver version 1.03");
-MODULE_LICENSE("GPLv2");
+MODULE_LICENSE("GPL v2");
 MODULE_SUPPORTED_DEVICE("{{Creative Labs, Sound Blaster X-Fi}");
 
 static unsigned int reference_rate = 48000;
-- 
cgit v0.10.2


From 9318dce5038f193f46091b80c61928395a4139b7 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 1 Jun 2009 21:36:23 +0200
Subject: ALSA: snd_usb_caiaq: clean whitespaces

Cosmetic changes only, no code change.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index b13ce76..b144513 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -42,10 +42,10 @@
 	(stream << 1) | (~(i / (dev->n_streams * BYTES_PER_SAMPLE_USB)) & 1)
 
 static struct snd_pcm_hardware snd_usb_caiaq_pcm_hardware = {
-	.info 		= (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | 
+	.info 		= (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
 			   SNDRV_PCM_INFO_BLOCK_TRANSFER),
 	.formats 	= SNDRV_PCM_FMTBIT_S24_3BE,
-	.rates 		= (SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | 
+	.rates 		= (SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |
 			   SNDRV_PCM_RATE_96000),
 	.rate_min	= 44100,
 	.rate_max	= 0, /* will overwrite later */
@@ -68,7 +68,7 @@ activate_substream(struct snd_usb_caiaqdev *dev,
 		dev->sub_capture[sub->number] = sub;
 }
 
-static void 
+static void
 deactivate_substream(struct snd_usb_caiaqdev *dev,
 		     struct snd_pcm_substream *sub)
 {
@@ -118,7 +118,7 @@ static int stream_start(struct snd_usb_caiaqdev *dev)
 			return -EPIPE;
 		}
 	}
-	
+
 	return 0;
 }
 
@@ -129,7 +129,7 @@ static void stream_stop(struct snd_usb_caiaqdev *dev)
 	debug("%s(%p)\n", __func__, dev);
 	if (!dev->streaming)
 		return;
-	
+
 	dev->streaming = 0;
 
 	for (i = 0; i < N_URBS; i++) {
@@ -154,7 +154,7 @@ static int snd_usb_caiaq_substream_close(struct snd_pcm_substream *substream)
 	debug("%s(%p)\n", __func__, substream);
 	if (all_substreams_zero(dev->sub_playback) &&
 	    all_substreams_zero(dev->sub_capture)) {
-		/* when the last client has stopped streaming, 
+		/* when the last client has stopped streaming,
 		 * all sample rates are allowed again */
 		stream_stop(dev);
 		dev->pcm_info.rates = dev->samplerates;
@@ -194,7 +194,7 @@ static int snd_usb_caiaq_pcm_prepare(struct snd_pcm_substream *substream)
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
 	debug("%s(%p)\n", __func__, substream);
-	
+
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		dev->period_out_count[index] = BYTES_PER_SAMPLE + 1;
 		dev->audio_out_buf_pos[index] = BYTES_PER_SAMPLE + 1;
@@ -205,19 +205,19 @@ static int snd_usb_caiaq_pcm_prepare(struct snd_pcm_substream *substream)
 
 	if (dev->streaming)
 		return 0;
-	
+
 	/* the first client that opens a stream defines the sample rate
 	 * setting for all subsequent calls, until the last client closed. */
 	for (i=0; i < ARRAY_SIZE(rates); i++)
 		if (runtime->rate == rates[i])
 			dev->pcm_info.rates = 1 << i;
-	
+
 	snd_pcm_limit_hw_rates(runtime);
 
 	bytes_per_sample = BYTES_PER_SAMPLE;
 	if (dev->spec.data_alignment == 2)
 		bytes_per_sample++;
-	
+
 	bpp = ((runtime->rate / 8000) + CLOCK_DRIFT_TOLERANCE)
 		* bytes_per_sample * CHANNELS_PER_STREAM * dev->n_streams;
 
@@ -232,7 +232,7 @@ static int snd_usb_caiaq_pcm_prepare(struct snd_pcm_substream *substream)
 	ret = stream_start(dev);
 	if (ret)
 		return ret;
-	
+
 	dev->output_running = 0;
 	wait_event_timeout(dev->prepare_wait_queue, dev->output_running, HZ);
 	if (!dev->output_running) {
@@ -273,7 +273,7 @@ snd_usb_caiaq_pcm_pointer(struct snd_pcm_substream *sub)
 		return SNDRV_PCM_POS_XRUN;
 
 	if (sub->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		return bytes_to_frames(sub->runtime, 
+		return bytes_to_frames(sub->runtime,
 					dev->audio_out_buf_pos[index]);
 	else
 		return bytes_to_frames(sub->runtime,
@@ -291,7 +291,7 @@ static struct snd_pcm_ops snd_usb_caiaq_ops = {
 	.trigger =	snd_usb_caiaq_pcm_trigger,
 	.pointer =	snd_usb_caiaq_pcm_pointer
 };
-	
+
 static void check_for_elapsed_periods(struct snd_usb_caiaqdev *dev,
 				      struct snd_pcm_substream **subs)
 {
@@ -333,7 +333,7 @@ static void read_in_urb_mode0(struct snd_usb_caiaqdev *dev,
 				struct snd_pcm_runtime *rt = sub->runtime;
 				char *audio_buf = rt->dma_area;
 				int sz = frames_to_bytes(rt, rt->buffer_size);
-				audio_buf[dev->audio_in_buf_pos[stream]++] 
+				audio_buf[dev->audio_in_buf_pos[stream]++]
 					= usb_buf[i];
 				dev->period_in_count[stream]++;
 				if (dev->audio_in_buf_pos[stream] == sz)
@@ -354,14 +354,14 @@ static void read_in_urb_mode2(struct snd_usb_caiaqdev *dev,
 
 	for (i = 0; i < iso->actual_length;) {
 		if (i % (dev->n_streams * BYTES_PER_SAMPLE_USB) == 0) {
-			for (stream = 0; 
-			     stream < dev->n_streams; 
+			for (stream = 0;
+			     stream < dev->n_streams;
 			     stream++, i++) {
 				if (dev->first_packet)
 					continue;
 
 				check_byte = MAKE_CHECKBYTE(dev, stream, i);
-				
+
 				if ((usb_buf[i] & 0x3f) != check_byte)
 					dev->input_panic = 1;
 
@@ -410,21 +410,21 @@ static void read_in_urb(struct snd_usb_caiaqdev *dev,
 	}
 
 	if ((dev->input_panic || dev->output_panic) && !dev->warned) {
-		debug("streaming error detected %s %s\n", 
+		debug("streaming error detected %s %s\n",
 				dev->input_panic ? "(input)" : "",
 				dev->output_panic ? "(output)" : "");
 		dev->warned = 1;
 	}
 }
 
-static void fill_out_urb(struct snd_usb_caiaqdev *dev, 
-			 struct urb *urb, 
+static void fill_out_urb(struct snd_usb_caiaqdev *dev,
+			 struct urb *urb,
 			 const struct usb_iso_packet_descriptor *iso)
 {
 	unsigned char *usb_buf = urb->transfer_buffer + iso->offset;
 	struct snd_pcm_substream *sub;
 	int stream, i;
-	
+
 	for (i = 0; i < iso->length;) {
 		for (stream = 0; stream < dev->n_streams; stream++, i++) {
 			sub = dev->sub_playback[stream];
@@ -444,7 +444,7 @@ static void fill_out_urb(struct snd_usb_caiaqdev *dev,
 
 		/* fill in the check bytes */
 		if (dev->spec.data_alignment == 2 &&
-		    i % (dev->n_streams * BYTES_PER_SAMPLE_USB) == 
+		    i % (dev->n_streams * BYTES_PER_SAMPLE_USB) ==
 		    	(dev->n_streams * CHANNELS_PER_STREAM))
 		    for (stream = 0; stream < dev->n_streams; stream++, i++)
 		    	usb_buf[i] = MAKE_CHECKBYTE(dev, stream, i);
@@ -453,7 +453,7 @@ static void fill_out_urb(struct snd_usb_caiaqdev *dev,
 
 static void read_completed(struct urb *urb)
 {
-	struct snd_usb_caiaq_cb_info *info = urb->context; 
+	struct snd_usb_caiaq_cb_info *info = urb->context;
 	struct snd_usb_caiaqdev *dev;
 	struct urb *out;
 	int frame, len, send_it = 0, outframe = 0;
@@ -478,7 +478,7 @@ static void read_completed(struct urb *urb)
 		out->iso_frame_desc[outframe].length = len;
 		out->iso_frame_desc[outframe].actual_length = 0;
 		out->iso_frame_desc[outframe].offset = BYTES_PER_FRAME * frame;
-		
+
 		if (len > 0) {
 			spin_lock(&dev->spinlock);
 			fill_out_urb(dev, out, &out->iso_frame_desc[outframe]);
@@ -497,14 +497,14 @@ static void read_completed(struct urb *urb)
 		out->transfer_flags = URB_ISO_ASAP;
 		usb_submit_urb(out, GFP_ATOMIC);
 	}
-	
+
 	/* re-submit inbound urb */
 	for (frame = 0; frame < FRAMES_PER_URB; frame++) {
 		urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame;
 		urb->iso_frame_desc[frame].length = BYTES_PER_FRAME;
 		urb->iso_frame_desc[frame].actual_length = 0;
 	}
-	
+
 	urb->number_of_packets = FRAMES_PER_URB;
 	urb->transfer_flags = URB_ISO_ASAP;
 	usb_submit_urb(urb, GFP_ATOMIC);
@@ -528,7 +528,7 @@ static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret)
 	struct usb_device *usb_dev = dev->chip.dev;
 	unsigned int pipe;
 
-	pipe = (dir == SNDRV_PCM_STREAM_PLAYBACK) ? 
+	pipe = (dir == SNDRV_PCM_STREAM_PLAYBACK) ?
 		usb_sndisocpipe(usb_dev, ENDPOINT_PLAYBACK) :
 		usb_rcvisocpipe(usb_dev, ENDPOINT_CAPTURE);
 
@@ -547,25 +547,25 @@ static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret)
 			return urbs;
 		}
 
-		urbs[i]->transfer_buffer = 
+		urbs[i]->transfer_buffer =
 			kmalloc(FRAMES_PER_URB * BYTES_PER_FRAME, GFP_KERNEL);
 		if (!urbs[i]->transfer_buffer) {
 			log("unable to kmalloc() transfer buffer, OOM!?\n");
 			*ret = -ENOMEM;
 			return urbs;
 		}
-		
+
 		for (frame = 0; frame < FRAMES_PER_URB; frame++) {
-			struct usb_iso_packet_descriptor *iso = 
+			struct usb_iso_packet_descriptor *iso =
 				&urbs[i]->iso_frame_desc[frame];
-			
+
 			iso->offset = BYTES_PER_FRAME * frame;
 			iso->length = BYTES_PER_FRAME;
 		}
-		
+
 		urbs[i]->dev = usb_dev;
 		urbs[i]->pipe = pipe;
-		urbs[i]->transfer_buffer_length = FRAMES_PER_URB 
+		urbs[i]->transfer_buffer_length = FRAMES_PER_URB
 						* BYTES_PER_FRAME;
 		urbs[i]->context = &dev->data_cb_info[i];
 		urbs[i]->interval = 1;
@@ -589,7 +589,7 @@ static void free_urbs(struct urb **urbs)
 	for (i = 0; i < N_URBS; i++) {
 		if (!urbs[i])
 			continue;
-		
+
 		usb_kill_urb(urbs[i]);
 		kfree(urbs[i]->transfer_buffer);
 		usb_free_urb(urbs[i]);
@@ -602,11 +602,11 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 {
 	int i, ret;
 
-	dev->n_audio_in  = max(dev->spec.num_analog_audio_in, 
-			       dev->spec.num_digital_audio_in) / 
+	dev->n_audio_in  = max(dev->spec.num_analog_audio_in,
+			       dev->spec.num_digital_audio_in) /
 				CHANNELS_PER_STREAM;
 	dev->n_audio_out = max(dev->spec.num_analog_audio_out,
-			       dev->spec.num_digital_audio_out) / 
+			       dev->spec.num_digital_audio_out) /
 				CHANNELS_PER_STREAM;
 	dev->n_streams = max(dev->n_audio_in, dev->n_audio_out);
 
@@ -619,7 +619,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 		return -EINVAL;
 	}
 
-	ret = snd_pcm_new(dev->chip.card, dev->product_name, 0, 
+	ret = snd_pcm_new(dev->chip.card, dev->product_name, 0,
 			dev->n_audio_out, dev->n_audio_in, &dev->pcm);
 
 	if (ret < 0) {
@@ -632,7 +632,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 
 	memset(dev->sub_playback, 0, sizeof(dev->sub_playback));
 	memset(dev->sub_capture, 0, sizeof(dev->sub_capture));
-	
+
 	memcpy(&dev->pcm_info, &snd_usb_caiaq_pcm_hardware,
 			sizeof(snd_usb_caiaq_pcm_hardware));
 
@@ -651,9 +651,9 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 		break;
 	}
 
-	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_PLAYBACK, 
+	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_PLAYBACK,
 				&snd_usb_caiaq_ops);
-	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_CAPTURE, 
+	snd_pcm_set_ops(dev->pcm, SNDRV_PCM_STREAM_CAPTURE,
 				&snd_usb_caiaq_ops);
 
 	snd_pcm_lib_preallocate_pages_for_all(dev->pcm,
@@ -662,7 +662,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 					MAX_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
 	dev->data_cb_info =
-		kmalloc(sizeof(struct snd_usb_caiaq_cb_info) * N_URBS, 
+		kmalloc(sizeof(struct snd_usb_caiaq_cb_info) * N_URBS,
 					GFP_KERNEL);
 
 	if (!dev->data_cb_info)
@@ -672,14 +672,14 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 		dev->data_cb_info[i].dev = dev;
 		dev->data_cb_info[i].index = i;
 	}
-	
+
 	dev->data_urbs_in = alloc_urbs(dev, SNDRV_PCM_STREAM_CAPTURE, &ret);
 	if (ret < 0) {
 		kfree(dev->data_cb_info);
 		free_urbs(dev->data_urbs_in);
 		return ret;
 	}
-	
+
 	dev->data_urbs_out = alloc_urbs(dev, SNDRV_PCM_STREAM_PLAYBACK, &ret);
 	if (ret < 0) {
 		kfree(dev->data_cb_info);
diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index 515de1c..9be0f2e 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -79,7 +79,7 @@ static struct usb_device_id snd_usb_id_table[] = {
 	{
 		.match_flags =	USB_DEVICE_ID_MATCH_DEVICE,
 		.idVendor =	USB_VID_NATIVEINSTRUMENTS,
-		.idProduct =	USB_PID_RIGKONTROL2 
+		.idProduct =	USB_PID_RIGKONTROL2
 	},
 	{
 		.match_flags =	USB_DEVICE_ID_MATCH_DEVICE,
@@ -197,7 +197,7 @@ int snd_usb_caiaq_send_command(struct snd_usb_caiaqdev *dev,
 
 	if (buffer && len > 0)
 		memcpy(dev->ep1_out_buf+1, buffer, len);
-	
+
 	dev->ep1_out_buf[0] = command;
 	return usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, 1),
 			   dev->ep1_out_buf, len+1, &actual_len, 200);
@@ -208,7 +208,7 @@ int snd_usb_caiaq_set_audio_params (struct snd_usb_caiaqdev *dev,
 {
 	int ret;
 	char tmp[5];
-	
+
 	switch (rate) {
 	case 44100:	tmp[0] = SAMPLERATE_44100;   break;
 	case 48000:	tmp[0] = SAMPLERATE_48000;   break;
@@ -237,12 +237,12 @@ int snd_usb_caiaq_set_audio_params (struct snd_usb_caiaqdev *dev,
 
 	if (ret)
 		return ret;
-	
-	if (!wait_event_timeout(dev->ep1_wait_queue, 
+
+	if (!wait_event_timeout(dev->ep1_wait_queue,
 	    dev->audio_parm_answer >= 0, HZ))
 		return -EPIPE;
-		
-	if (dev->audio_parm_answer != 1) 
+
+	if (dev->audio_parm_answer != 1)
 		debug("unable to set the device's audio params\n");
 	else
 		dev->bpp = bpp;
@@ -250,8 +250,8 @@ int snd_usb_caiaq_set_audio_params (struct snd_usb_caiaqdev *dev,
 	return dev->audio_parm_answer == 1 ? 0 : -EINVAL;
 }
 
-int snd_usb_caiaq_set_auto_msg (struct snd_usb_caiaqdev *dev, 
-				int digital, int analog, int erp)
+int snd_usb_caiaq_set_auto_msg(struct snd_usb_caiaqdev *dev,
+			       int digital, int analog, int erp)
 {
 	char tmp[3] = { digital, analog, erp };
 	return snd_usb_caiaq_send_command(dev, EP1_CMD_AUTO_MSG,
@@ -262,7 +262,7 @@ static void __devinit setup_card(struct snd_usb_caiaqdev *dev)
 {
 	int ret;
 	char val[4];
-	
+
 	/* device-specific startup specials */
 	switch (dev->chip.usb_id) {
 	case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2):
@@ -314,7 +314,7 @@ static void __devinit setup_card(struct snd_usb_caiaqdev *dev)
 			dev->control_state, 1);
 		break;
 	}
-	
+
 	if (dev->spec.num_analog_audio_out +
 	    dev->spec.num_analog_audio_in +
 	    dev->spec.num_digital_audio_out +
@@ -323,7 +323,7 @@ static void __devinit setup_card(struct snd_usb_caiaqdev *dev)
 		if (ret < 0)
 			log("Unable to set up audio system (ret=%d)\n", ret);
 	}
-	
+
 	if (dev->spec.num_midi_in +
 	    dev->spec.num_midi_out > 0) {
 		ret = snd_usb_caiaq_midi_init(dev);
@@ -363,7 +363,7 @@ static int create_card(struct usb_device* usb_dev, struct snd_card **cardp)
 	if (devnum >= SNDRV_CARDS)
 		return -ENODEV;
 
-	err = snd_card_create(index[devnum], id[devnum], THIS_MODULE, 
+	err = snd_card_create(index[devnum], id[devnum], THIS_MODULE,
 			      sizeof(struct snd_usb_caiaqdev), &card);
 	if (err < 0)
 		return err;
@@ -386,7 +386,7 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	struct usb_device *usb_dev = dev->chip.dev;
 	struct snd_card *card = dev->chip.card;
 	int err, len;
-	
+
 	if (usb_set_interface(usb_dev, 0, 1) != 0) {
 		log("can't set alt interface.\n");
 		return -EIO;
@@ -395,19 +395,19 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	usb_init_urb(&dev->ep1_in_urb);
 	usb_init_urb(&dev->midi_out_urb);
 
-	usb_fill_bulk_urb(&dev->ep1_in_urb, usb_dev, 
+	usb_fill_bulk_urb(&dev->ep1_in_urb, usb_dev,
 			  usb_rcvbulkpipe(usb_dev, 0x1),
-			  dev->ep1_in_buf, EP1_BUFSIZE, 
+			  dev->ep1_in_buf, EP1_BUFSIZE,
 			  usb_ep1_command_reply_dispatch, dev);
 
-	usb_fill_bulk_urb(&dev->midi_out_urb, usb_dev, 
+	usb_fill_bulk_urb(&dev->midi_out_urb, usb_dev,
 			  usb_sndbulkpipe(usb_dev, 0x1),
-			  dev->midi_out_buf, EP1_BUFSIZE, 
+			  dev->midi_out_buf, EP1_BUFSIZE,
 			  snd_usb_caiaq_midi_output_done, dev);
-	
+
 	init_waitqueue_head(&dev->ep1_wait_queue);
 	init_waitqueue_head(&dev->prepare_wait_queue);
-	
+
 	if (usb_submit_urb(&dev->ep1_in_urb, GFP_KERNEL) != 0)
 		return -EIO;
 
@@ -420,10 +420,10 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 
 	usb_string(usb_dev, usb_dev->descriptor.iManufacturer,
 		   dev->vendor_name, CAIAQ_USB_STR_LEN);
-	
+
 	usb_string(usb_dev, usb_dev->descriptor.iProduct,
 		   dev->product_name, CAIAQ_USB_STR_LEN);
-	
+
 	usb_string(usb_dev, usb_dev->descriptor.iSerialNumber,
 		   dev->serial, CAIAQ_USB_STR_LEN);
 
@@ -431,7 +431,7 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	c = strchr(dev->serial, ' ');
 	if (c)
 		*c = '\0';
-	
+
 	strcpy(card->driver, MODNAME);
 	strcpy(card->shortname, dev->product_name);
 
@@ -449,18 +449,18 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	return 0;
 }
 
-static int __devinit snd_probe(struct usb_interface *intf, 
+static int __devinit snd_probe(struct usb_interface *intf,
 		     const struct usb_device_id *id)
 {
 	int ret;
 	struct snd_card *card;
 	struct usb_device *device = interface_to_usbdev(intf);
-	
+
 	ret = create_card(device, &card);
-	
+
 	if (ret < 0)
 		return ret;
-			
+
 	usb_set_intfdata(intf, card);
 	ret = init_card(caiaqdev(card));
 	if (ret < 0) {
@@ -468,7 +468,7 @@ static int __devinit snd_probe(struct usb_interface *intf,
 		snd_card_free(card);
 		return ret;
 	}
-	
+
 	return 0;
 }
 
@@ -489,10 +489,10 @@ static void snd_disconnect(struct usb_interface *intf)
 	snd_usb_caiaq_input_free(dev);
 #endif
 	snd_usb_caiaq_audio_free(dev);
-	
+
 	usb_kill_urb(&dev->ep1_in_urb);
 	usb_kill_urb(&dev->midi_out_urb);
-	
+
 	snd_card_free(card);
 	usb_reset_device(interface_to_usbdev(intf));
 }
diff --git a/sound/usb/caiaq/midi.c b/sound/usb/caiaq/midi.c
index 8fa8cd8..538e8c0 100644
--- a/sound/usb/caiaq/midi.c
+++ b/sound/usb/caiaq/midi.c
@@ -40,7 +40,7 @@ static void snd_usb_caiaq_midi_input_trigger(struct snd_rawmidi_substream *subst
 
 	if (!dev)
 		return;
-	
+
 	dev->midi_receive_substream = up ? substream : NULL;
 }
 
@@ -64,18 +64,18 @@ static void snd_usb_caiaq_midi_send(struct snd_usb_caiaqdev *dev,
 				    struct snd_rawmidi_substream *substream)
 {
 	int len, ret;
-	
+
 	dev->midi_out_buf[0] = EP1_CMD_MIDI_WRITE;
 	dev->midi_out_buf[1] = 0; /* port */
 	len = snd_rawmidi_transmit(substream, dev->midi_out_buf + 3,
 				   EP1_BUFSIZE - 3);
-	
+
 	if (len <= 0)
 		return;
-	
+
 	dev->midi_out_buf[2] = len;
 	dev->midi_out_urb.transfer_buffer_length = len+3;
-	
+
 	ret = usb_submit_urb(&dev->midi_out_urb, GFP_ATOMIC);
 	if (ret < 0)
 		log("snd_usb_caiaq_midi_send(%p): usb_submit_urb() failed,"
@@ -88,7 +88,7 @@ static void snd_usb_caiaq_midi_send(struct snd_usb_caiaqdev *dev,
 static void snd_usb_caiaq_midi_output_trigger(struct snd_rawmidi_substream *substream, int up)
 {
 	struct snd_usb_caiaqdev *dev = substream->rmidi->private_data;
-	
+
 	if (up) {
 		dev->midi_out_substream = substream;
 		if (!dev->midi_out_active)
@@ -113,12 +113,12 @@ static struct snd_rawmidi_ops snd_usb_caiaq_midi_input =
 	.trigger =      snd_usb_caiaq_midi_input_trigger,
 };
 
-void snd_usb_caiaq_midi_handle_input(struct snd_usb_caiaqdev *dev, 
+void snd_usb_caiaq_midi_handle_input(struct snd_usb_caiaqdev *dev,
 				     int port, const char *buf, int len)
 {
 	if (!dev->midi_receive_substream)
 		return;
-	
+
 	snd_rawmidi_receive(dev->midi_receive_substream, buf, len);
 }
 
@@ -142,16 +142,16 @@ int snd_usb_caiaq_midi_init(struct snd_usb_caiaqdev *device)
 
 	if (device->spec.num_midi_out > 0) {
 		rmidi->info_flags |= SNDRV_RAWMIDI_INFO_OUTPUT;
-		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, 
+		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT,
 				    &snd_usb_caiaq_midi_output);
 	}
 
 	if (device->spec.num_midi_in > 0) {
 		rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT;
-		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, 
+		snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT,
 				    &snd_usb_caiaq_midi_input);
 	}
-	
+
 	device->rmidi = rmidi;
 
 	return 0;
@@ -160,7 +160,7 @@ int snd_usb_caiaq_midi_init(struct snd_usb_caiaqdev *device)
 void snd_usb_caiaq_midi_output_done(struct urb* urb)
 {
 	struct snd_usb_caiaqdev *dev = urb->context;
-	
+
 	dev->midi_out_active = 0;
 	if (urb->status != 0)
 		return;
-- 
cgit v0.10.2


From d3873a1be9f2d497e9ff013e4a83a2a4d9f1d22d Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 1 Jun 2009 21:36:25 +0200
Subject: ALSA: snd_usb_caiaq: use strlcpy

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index 9be0f2e..d06d7fc 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -432,8 +432,8 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	if (c)
 		*c = '\0';
 
-	strcpy(card->driver, MODNAME);
-	strcpy(card->shortname, dev->product_name);
+	strlcpy(card->driver, MODNAME, sizeof(card->driver));
+	strlcpy(card->shortname, dev->product_name, sizeof(card->shortname));
 
 	len = snprintf(card->longname, sizeof(card->longname),
 		       "%s %s (serial %s, ",
-- 
cgit v0.10.2


From c25486c5ea3ea5586f71285a3aa5cfafb1db59f9 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 2 Jun 2009 08:09:48 +0200
Subject: perf_counter tools: Make .gitignore reflect perf_counter tools files

Make .gitignore reflect perf_counter tools files so
git status doesn't gripe about untracked files.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/.gitignore b/Documentation/perf_counter/.gitignore
index 41c0b20..d69a759 100644
--- a/Documentation/perf_counter/.gitignore
+++ b/Documentation/perf_counter/.gitignore
@@ -1,179 +1,16 @@
-GIT-BUILD-OPTIONS
-GIT-CFLAGS
-GIT-GUI-VARS
-GIT-VERSION-FILE
-git
-git-add
-git-add--interactive
-git-am
-git-annotate
-git-apply
-git-archimport
-git-archive
-git-bisect
-git-bisect--helper
-git-blame
-git-branch
-git-bundle
-git-cat-file
-git-check-attr
-git-check-ref-format
-git-checkout
-git-checkout-index
-git-cherry
-git-cherry-pick
-git-clean
-git-clone
-git-commit
-git-commit-tree
-git-config
-git-count-objects
-git-cvsexportcommit
-git-cvsimport
-git-cvsserver
-git-daemon
-git-diff
-git-diff-files
-git-diff-index
-git-diff-tree
-git-difftool
-git-difftool--helper
-git-describe
-git-fast-export
-git-fast-import
-git-fetch
-git-fetch--tool
-git-fetch-pack
-git-filter-branch
-git-fmt-merge-msg
-git-for-each-ref
-git-format-patch
-git-fsck
-git-fsck-objects
-git-gc
-git-get-tar-commit-id
-git-grep
-git-hash-object
-git-help
-git-http-fetch
-git-http-push
-git-imap-send
-git-index-pack
-git-init
-git-init-db
-git-instaweb
-git-log
-git-lost-found
-git-ls-files
-git-ls-remote
-git-ls-tree
-git-mailinfo
-git-mailsplit
-git-merge
-git-merge-base
-git-merge-index
-git-merge-file
-git-merge-tree
-git-merge-octopus
-git-merge-one-file
-git-merge-ours
-git-merge-recursive
-git-merge-resolve
-git-merge-subtree
-git-mergetool
-git-mergetool--lib
-git-mktag
-git-mktree
-git-name-rev
-git-mv
-git-pack-redundant
-git-pack-objects
-git-pack-refs
-git-parse-remote
-git-patch-id
-git-peek-remote
-git-prune
-git-prune-packed
-git-pull
-git-push
-git-quiltimport
-git-read-tree
-git-rebase
-git-rebase--interactive
-git-receive-pack
-git-reflog
-git-relink
-git-remote
-git-repack
-git-repo-config
-git-request-pull
-git-rerere
-git-reset
-git-rev-list
-git-rev-parse
-git-revert
-git-rm
-git-send-email
-git-send-pack
-git-sh-setup
-git-shell
-git-shortlog
-git-show
-git-show-branch
-git-show-index
-git-show-ref
-git-stage
-git-stash
-git-status
-git-stripspace
-git-submodule
-git-svn
-git-symbolic-ref
-git-tag
-git-tar-tree
-git-unpack-file
-git-unpack-objects
-git-update-index
-git-update-ref
-git-update-server-info
-git-upload-archive
-git-upload-pack
-git-var
-git-verify-pack
-git-verify-tag
-git-web--browse
-git-whatchanged
-git-write-tree
-git-core-*/?*
-gitk-wish
-gitweb/gitweb.cgi
-test-chmtime
-test-ctype
-test-date
-test-delta
-test-dump-cache-tree
-test-genrandom
-test-match-trees
-test-parse-options
-test-path-utils
-test-sha1
-test-sigchain
+PERF-BUILD-OPTIONS
+PERF-CFLAGS
+PERF-GUI-VARS
+PERF-VERSION-FILE
+perf
+perf-help
+perf-record
+perf-report
+perf-stat
+perf-top
+perf*.1
+perf*.xml
 common-cmds.h
-*.tar.gz
-*.dsc
-*.deb
-git.spec
-*.exe
-*.[aos]
-*.py[co]
-config.mak
-autom4te.cache
-config.cache
-config.log
-config.status
-config.mak.autogen
-config.mak.append
-configure
 tags
 TAGS
 cscope*
-- 
cgit v0.10.2


From 1a1df6f0434fc35c9bf6ca25f9c5115713d77291 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 1 Jun 2009 21:36:26 +0200
Subject: ALSA: snd_usb_caiaq: give better longname

The serial number is of no interest in the longname, remove it. This
gives space for the usb path information which is more informative.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index d06d7fc..15052e4 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -382,10 +382,10 @@ static int create_card(struct usb_device* usb_dev, struct snd_card **cardp)
 
 static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 {
-	char *c;
+	char usbpath[32];
 	struct usb_device *usb_dev = dev->chip.dev;
 	struct snd_card *card = dev->chip.card;
-	int err, len;
+	int err;
 
 	if (usb_set_interface(usb_dev, 0, 1) != 0) {
 		log("can't set alt interface.\n");
@@ -424,27 +424,14 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	usb_string(usb_dev, usb_dev->descriptor.iProduct,
 		   dev->product_name, CAIAQ_USB_STR_LEN);
 
-	usb_string(usb_dev, usb_dev->descriptor.iSerialNumber,
-		   dev->serial, CAIAQ_USB_STR_LEN);
-
-	/* terminate serial string at first white space occurence */
-	c = strchr(dev->serial, ' ');
-	if (c)
-		*c = '\0';
-
 	strlcpy(card->driver, MODNAME, sizeof(card->driver));
 	strlcpy(card->shortname, dev->product_name, sizeof(card->shortname));
 
-	len = snprintf(card->longname, sizeof(card->longname),
-		       "%s %s (serial %s, ",
-		       dev->vendor_name, dev->product_name, dev->serial);
-
-	if (len < sizeof(card->longname) - 2)
-		len += usb_make_path(usb_dev, card->longname + len,
-				     sizeof(card->longname) - len);
+	usb_make_path(usb_dev, usbpath, sizeof(usbpath));
+	snprintf(card->longname, sizeof(card->longname),
+		       "%s %s (%s)",
+		       dev->vendor_name, dev->product_name, usbpath);
 
-	card->longname[len++] = ')';
-	card->longname[len] = '\0';
 	setup_card(dev);
 	return 0;
 }
diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h
index 4cce1ad..ece7351 100644
--- a/sound/usb/caiaq/device.h
+++ b/sound/usb/caiaq/device.h
@@ -81,7 +81,6 @@ struct snd_usb_caiaqdev {
 
 	char vendor_name[CAIAQ_USB_STR_LEN];
 	char product_name[CAIAQ_USB_STR_LEN];
-	char serial[CAIAQ_USB_STR_LEN];
 
 	int n_streams, n_audio_in, n_audio_out;
 	int streaming, first_packet, output_running;
-- 
cgit v0.10.2


From 3b315d70b094e8b439358756a9084438fd7a71c2 Mon Sep 17 00:00:00 2001
From: Hector Martin <hector@marcansoft.com>
Date: Tue, 2 Jun 2009 10:54:19 +0200
Subject: ALSA: hda - Acer Aspire 8930G support

Short story: this laptop has 5.1 built-in speakers which you *really*
want to use (the not-so-"sub" woofer is what makes the audio above
average for a laptop), so 6-channel support is important (plus a decent
asound.conf to upmix stereo). It also has the 3 typical jacks that ought
to have a selectable mode. And it's based on ALC889, which sucks.

Rationale/explanations:

The const_channel_count stuff was added because, for a laptop like this,
you always have 6 channels available (internal speakers) but still need
to set the mode for the 3 external jacks. Therefore, the device always
needs to be in 6-channel mode but there still needs to be a mixer
control for the jack mode. You could use line/mic-in at the same time as
the 6 internal speakers, for example. You might be tempted to make it
even smarter by dynamically switching the max channel count when
headphones are plugged in (therefore muting the internal speakers and
reducing the physical channel count to the jack channel mode), but as a
user I consider this to be harmful because I want the audio to blow up
to 6 channels / upmixed as soon as I unplug the headphones, and having
opened the device while in 2-channel mode would prevent this from
working (and always making 6-channel mode available doesn't do any harm).

The hardware needs EAPD turned on and the DACs routed to the internal
speaker pins, so the patch adds those verbs.

The ALC889 CLFE and subsequent (side/aux, here unused) DACs do NOT work
by default, at least here. I wasted much time trying to talk to
Realtek/pshou about this, but they just kept sending me useless updates
to patch_realtek.c that did nothing relevant. In the end I gave up and
brute forced the issue by trying to flip every bit in the proprietary
coefficient registers, and eventually found the two magic registers that
need to be cleared to enable all DACs. I have only heard Acer users
complain, but that might be because ALC889 is pretty new and using 5.1
(and noticing the missing center/lfe channels) might not be that common.
If this is a generalized issue with all ALC889 systems then those verbs
should probably be moved to a common verb array.

The internal mic is untested and probably doesn't work.

These settings will probably work for other Acer Gemstone laptops with
the same 5.1 speaker config. When identified, those should be added to
the PCI subsystem ID list.

Signed-off-by: Hector Martin <hector@marcansoft.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 818d06f..29c6125 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -139,6 +139,7 @@ ALC883/888
   acer		Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
   acer-aspire	Acer Aspire 9810
   acer-aspire-4930g Acer Aspire 4930G
+  acer-aspire-8930g Acer Aspire 8930G
   medion	Medion Laptops
   medion-md2	Medion MD2
   targa-dig	Targa/MSI
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ad6cc42..2bbb9e4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -223,6 +223,7 @@ enum {
 	ALC883_ACER,
 	ALC883_ACER_ASPIRE,
 	ALC888_ACER_ASPIRE_4930G,
+	ALC888_ACER_ASPIRE_8930G,
 	ALC883_MEDION,
 	ALC883_MEDION_MD2,
 	ALC883_LAPTOP_EAPD,
@@ -313,6 +314,8 @@ struct alc_spec {
 	const struct hda_channel_mode *channel_mode;
 	int num_channel_mode;
 	int need_dac_fix;
+	int const_channel_count;
+	int ext_channel_count;
 
 	/* PCM information */
 	struct hda_pcm pcm_rec[3];	/* used in alc_build_pcms() */
@@ -368,6 +371,7 @@ struct alc_config_preset {
 	unsigned int num_channel_mode;
 	const struct hda_channel_mode *channel_mode;
 	int need_dac_fix;
+	int const_channel_count;
 	unsigned int num_mux_defs;
 	const struct hda_input_mux *input_mux;
 	void (*unsol_event)(struct hda_codec *, unsigned int);
@@ -462,7 +466,7 @@ static int alc_ch_mode_get(struct snd_kcontrol *kcontrol,
 	struct alc_spec *spec = codec->spec;
 	return snd_hda_ch_mode_get(codec, ucontrol, spec->channel_mode,
 				   spec->num_channel_mode,
-				   spec->multiout.max_channels);
+				   spec->ext_channel_count);
 }
 
 static int alc_ch_mode_put(struct snd_kcontrol *kcontrol,
@@ -472,9 +476,12 @@ static int alc_ch_mode_put(struct snd_kcontrol *kcontrol,
 	struct alc_spec *spec = codec->spec;
 	int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
 				      spec->num_channel_mode,
-				      &spec->multiout.max_channels);
-	if (err >= 0 && spec->need_dac_fix)
-		spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+				      &spec->ext_channel_count);
+	if (err >= 0 && !spec->const_channel_count) {
+		spec->multiout.max_channels = spec->ext_channel_count;
+		if (spec->need_dac_fix)
+			spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+	}
 	return err;
 }
 
@@ -854,8 +861,13 @@ static void setup_preset(struct alc_spec *spec,
 	spec->channel_mode = preset->channel_mode;
 	spec->num_channel_mode = preset->num_channel_mode;
 	spec->need_dac_fix = preset->need_dac_fix;
+	spec->const_channel_count = preset->const_channel_count;
 
-	spec->multiout.max_channels = spec->channel_mode[0].channels;
+	if (preset->const_channel_count)
+		spec->multiout.max_channels = preset->const_channel_count;
+	else
+		spec->multiout.max_channels = spec->channel_mode[0].channels;
+	spec->ext_channel_count = spec->channel_mode[0].channels;
 
 	spec->multiout.num_dacs = preset->num_dacs;
 	spec->multiout.dac_nids = preset->dac_nids;
@@ -1456,6 +1468,48 @@ static struct hda_verb alc888_acer_aspire_4930g_verbs[] = {
 	{ }
 };
 
+/*
+ * ALC888 Acer Aspire 8930G model
+ */
+
+static struct hda_verb alc888_acer_aspire_8930g_verbs[] = {
+/* Front Mic: set to PIN_IN (empty by default) */
+	{0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+/* Unselect Front Mic by default in input mixer 3 */
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0xb)},
+/* Enable unsolicited event for HP jack */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+/* Connect Internal Front to Front */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x14, AC_VERB_SET_CONNECT_SEL, 0x00},
+/* Connect Internal Rear to Rear */
+	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1b, AC_VERB_SET_CONNECT_SEL, 0x01},
+/* Connect Internal CLFE to CLFE */
+	{0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x16, AC_VERB_SET_CONNECT_SEL, 0x02},
+/* Connect HP out to Front */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
+/* Enable all DACs */
+/*  DAC DISABLE/MUTE 1? */
+/*  setting bits 1-5 disables DAC nids 0x02-0x06 apparently. Init=0x38 */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x03},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x0000},
+/*  DAC DISABLE/MUTE 2? */
+/*  some bit here disables the other DACs. Init=0x4900 */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x08},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x0000},
+/* Enable amplifiers */
+	{0x14, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
+	{0x15, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
+	{ }
+};
+
 static struct hda_input_mux alc888_2_capture_sources[2] = {
 	/* Front mic only available on one ADC */
 	{
@@ -1508,6 +1562,17 @@ static void alc888_acer_aspire_4930g_init_hook(struct hda_codec *codec)
 	alc_automute_amp(codec);
 }
 
+static void alc888_acer_aspire_8930g_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	spec->autocfg.speaker_pins[2] = 0x1b;
+	alc_automute_amp(codec);
+}
+
 /*
  * ALC880 3-stack model
  *
@@ -8758,6 +8823,7 @@ static const char *alc883_models[ALC883_MODEL_LAST] = {
 	[ALC883_ACER]		= "acer",
 	[ALC883_ACER_ASPIRE]	= "acer-aspire",
 	[ALC888_ACER_ASPIRE_4930G]	= "acer-aspire-4930g",
+	[ALC888_ACER_ASPIRE_8930G]	= "acer-aspire-8930g",
 	[ALC883_MEDION]		= "medion",
 	[ALC883_MEDION_MD2]	= "medion-md2",
 	[ALC883_LAPTOP_EAPD]	= "laptop-eapd",
@@ -8790,6 +8856,8 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = {
 		ALC888_ACER_ASPIRE_4930G),
 	SND_PCI_QUIRK(0x1025, 0x013f, "Acer Aspire 5930G",
 		ALC888_ACER_ASPIRE_4930G),
+	SND_PCI_QUIRK(0x1025, 0x0145, "Acer Aspire 8930G",
+		ALC888_ACER_ASPIRE_8930G),
 	SND_PCI_QUIRK(0x1025, 0x0157, "Acer X3200", ALC883_AUTO),
 	SND_PCI_QUIRK(0x1025, 0x0158, "Acer AX1700-U3700A", ALC883_AUTO),
 	SND_PCI_QUIRK(0x1025, 0x015e, "Acer Aspire 6930G",
@@ -9009,6 +9077,27 @@ static struct alc_config_preset alc883_presets[] = {
 		.unsol_event = alc_automute_amp_unsol_event,
 		.init_hook = alc888_acer_aspire_4930g_init_hook,
 	},
+	[ALC888_ACER_ASPIRE_8930G] = {
+		.mixers = { alc888_base_mixer,
+				alc883_chmode_mixer },
+		.init_verbs = { alc883_init_verbs, alc880_gpio1_init_verbs,
+				alc888_acer_aspire_8930g_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids_rev),
+		.adc_nids = alc883_adc_nids_rev,
+		.capsrc_nids = alc883_capsrc_nids_rev,
+		.dig_out_nid = ALC883_DIGOUT_NID,
+		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
+		.channel_mode = alc883_3ST_6ch_modes,
+		.need_dac_fix = 1,
+		.const_channel_count = 6,
+		.num_mux_defs =
+			ARRAY_SIZE(alc888_2_capture_sources),
+		.input_mux = alc888_2_capture_sources,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc888_acer_aspire_8930g_init_hook,
+	},
 	[ALC883_MEDION] = {
 		.mixers = { alc883_fivestack_mixer,
 			    alc883_chmode_mixer },
-- 
cgit v0.10.2


From c1079abd1d5e66edabeb04d75e48e604cd8c167f Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 2 Jun 2009 10:17:34 +0200
Subject: perf_counter tools: Cleanup Makefile

We currently build perf-stat/record etc, only to do nothing
with them.  We also install the perf binary in two places,
$prefix/bin and $perfexec_instdir, which appears to be for
binaries which perf would exec were a command not linked in.
Correct this, and comment out broken/incomplete targets dist
and coverage.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 3b8275f..eae8856 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -260,8 +260,6 @@ PROGRAMS += perf
 
 # List built-in command $C whose implementation cmd_$C() is not in
 # builtin-$C.o but is linked in as part of some other command.
-BUILT_INS += $(patsubst builtin-%.o,perf-%$X,$(BUILTIN_OBJS))
-
 #
 # None right now:
 #
@@ -791,12 +789,14 @@ export perfexec_instdir
 
 install: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
+ifdef BUILT_INS
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 ifneq (,$X)
 	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
 endif
+endif
 
 install-doc:
 	$(MAKE) -C Documentation install
@@ -824,52 +824,55 @@ quick-install-html:
 
 
 ### Maintainer's dist rules
-
-perf.spec: perf.spec.in
-	sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
-	mv $@+ $@
-
-PERF_TARNAME=perf-$(PERF_VERSION)
-dist: perf.spec perf-archive$(X) configure
-	./perf-archive --format=tar \
-		--prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
-	@mkdir -p $(PERF_TARNAME)
-	@cp perf.spec configure $(PERF_TARNAME)
-	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
-	$(TAR) rf $(PERF_TARNAME).tar \
-		$(PERF_TARNAME)/perf.spec \
-		$(PERF_TARNAME)/configure \
-		$(PERF_TARNAME)/version
-	@$(RM) -r $(PERF_TARNAME)
-	gzip -f -9 $(PERF_TARNAME).tar
-
-htmldocs = perf-htmldocs-$(PERF_VERSION)
-manpages = perf-manpages-$(PERF_VERSION)
-dist-doc:
-	$(RM) -r .doc-tmp-dir
-	mkdir .doc-tmp-dir
-	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
-	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
-	gzip -n -9 -f $(htmldocs).tar
-	:
-	$(RM) -r .doc-tmp-dir
-	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
-	$(MAKE) -C Documentation DESTDIR=./ \
-		man1dir=../.doc-tmp-dir/man1 \
-		man5dir=../.doc-tmp-dir/man5 \
-		man7dir=../.doc-tmp-dir/man7 \
-		install
-	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
-	gzip -n -9 -f $(manpages).tar
-	$(RM) -r .doc-tmp-dir
-
-rpm: dist
-	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
+#
+# None right now
+#
+#
+# perf.spec: perf.spec.in
+#	sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
+#	mv $@+ $@
+#
+# PERF_TARNAME=perf-$(PERF_VERSION)
+# dist: perf.spec perf-archive$(X) configure
+#	./perf-archive --format=tar \
+#		--prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
+#	@mkdir -p $(PERF_TARNAME)
+#	@cp perf.spec configure $(PERF_TARNAME)
+#	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
+#	$(TAR) rf $(PERF_TARNAME).tar \
+#		$(PERF_TARNAME)/perf.spec \
+#		$(PERF_TARNAME)/configure \
+#		$(PERF_TARNAME)/version
+#	@$(RM) -r $(PERF_TARNAME)
+#	gzip -f -9 $(PERF_TARNAME).tar
+#
+# htmldocs = perf-htmldocs-$(PERF_VERSION)
+# manpages = perf-manpages-$(PERF_VERSION)
+# dist-doc:
+#	$(RM) -r .doc-tmp-dir
+#	mkdir .doc-tmp-dir
+#	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
+#	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
+#	gzip -n -9 -f $(htmldocs).tar
+#	:
+#	$(RM) -r .doc-tmp-dir
+#	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
+#	$(MAKE) -C Documentation DESTDIR=./ \
+#		man1dir=../.doc-tmp-dir/man1 \
+#		man5dir=../.doc-tmp-dir/man5 \
+#		man7dir=../.doc-tmp-dir/man7 \
+#		install
+#	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
+#	gzip -n -9 -f $(manpages).tar
+#	$(RM) -r .doc-tmp-dir
+#
+# rpm: dist
+#	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
 
 ### Cleaning rules
 
 distclean: clean
-	$(RM) configure
+#	$(RM) configure
 
 clean:
 	$(RM) *.o */*.o $(LIB_FILE)
@@ -896,25 +899,27 @@ check-builtins::
 
 ### Test suite coverage testing
 #
-.PHONY: coverage coverage-clean coverage-build coverage-report
-
-coverage:
-	$(MAKE) coverage-build
-	$(MAKE) coverage-report
-
-coverage-clean:
-	rm -f *.gcda *.gcno
-
-COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
-COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
-
-coverage-build: coverage-clean
-	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
-	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
-		-j1 test
-
-coverage-report:
-	gcov -b *.c */*.c
-	grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
-		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
-		| tee coverage-untested-functions
+# None right now
+#
+# .PHONY: coverage coverage-clean coverage-build coverage-report
+#
+# coverage:
+#	$(MAKE) coverage-build
+#	$(MAKE) coverage-report
+#
+# coverage-clean:
+#	rm -f *.gcda *.gcno
+#
+# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
+# COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
+#
+# coverage-build: coverage-clean
+#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
+#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
+#		-j1 test
+#
+# coverage-report:
+#	gcov -b *.c */*.c
+#	grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
+#		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
+#		| tee coverage-untested-functions
-- 
cgit v0.10.2


From 10a2825514a988225ac2e336c7a9502c4ca57c39 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 2 Jun 2009 11:04:44 +0200
Subject: perf_counter tools: Fix uninitialized variable in perf-report.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# make prefix=/usr/local V=1
gcc -o builtin-report.o -c -O2 -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement   -DSHA1_HEADER='<openssl/sha.h>'  builtin-report.c
cc1: warnings being treated as errors
builtin-report.c: In function ‘__cmd_report’:
builtin-report.c:626: error: ‘cwdlen’ may be used uninitialized in this function

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <new-submission>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 7973092..20a4e51 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -653,8 +653,10 @@ static int __cmd_report(void)
 			return EXIT_FAILURE;
 		}
 		cwdlen = strlen(cwd);
-	} else
+	} else {
 		cwdp = NULL;
+		cwdlen = 0;
+	}
 remap:
 	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
 			   MAP_SHARED, input, offset);
-- 
cgit v0.10.2


From 601e1cc5df940b59e71c947726640811897d30df Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 11:37:01 +0200
Subject: ALSA: ca0106 - Add missing registrations of vmaster controls

Although the vmaster controls are created, they aren't registered thus
they don't appear in the real world.  Added the missing snd_ctl_add()
calls.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Cc: <stable@kernel.org>

diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index c111efe..f143f71 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -841,6 +841,9 @@ int __devinit snd_ca0106_mixer(struct snd_ca0106 *emu)
 					      snd_ca0106_master_db_scale);
 	if (!vmaster)
 		return -ENOMEM;
+	err = snd_ctl_add(card, vmaster);
+	if (err < 0)
+		return err;
 	add_slaves(card, vmaster, slave_vols);
 
 	if (emu->details->spi_dac == 1) {
@@ -848,6 +851,9 @@ int __devinit snd_ca0106_mixer(struct snd_ca0106 *emu)
 						      NULL);
 		if (!vmaster)
 			return -ENOMEM;
+		err = snd_ctl_add(card, vmaster);
+		if (err < 0)
+			return err;
 		add_slaves(card, vmaster, slave_sws);
 	}
         return 0;
-- 
cgit v0.10.2


From 10a8ebbb08c4b08292598947bbe534e04d6ee705 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Tue, 2 Jun 2009 12:02:38 +0200
Subject: ALSA: Core - add snd_card_set_id() function

Introduce snd_card_set_id() function to allow lowlevel drivers to set
default identification name for card slot. The function checks also
for identification name collisions and tries to create unique name.

Also, the snd_card_create() function is simplified, because this new
function is used. As bonus, proper name collision checks are evaluated
at the card create time.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/core.h b/include/sound/core.h
index 3dea798..0e8ae10 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -313,6 +313,7 @@ struct snd_card *snd_card_new(int idx, const char *id,
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
+void snd_card_set_id(struct snd_card *card, const char *id);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
 int snd_card_info_done(void);
diff --git a/sound/core/init.c b/sound/core/init.c
index fd56afe..6557dd8 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -152,15 +152,8 @@ int snd_card_create(int idx, const char *xid,
 	card = kzalloc(sizeof(*card) + extra_size, GFP_KERNEL);
 	if (!card)
 		return -ENOMEM;
-	if (xid) {
-		if (!snd_info_check_reserved_words(xid)) {
-			snd_printk(KERN_ERR
-				   "given id string '%s' is reserved.\n", xid);
-			err = -EBUSY;
-			goto __error;
-		}
-		strlcpy(card->id, xid, sizeof(card->id));
-	}
+	if (xid)
+		snd_card_set_id(card, xid);
 	err = 0;
 	mutex_lock(&snd_card_mutex);
 	if (idx < 0) {
@@ -483,22 +476,39 @@ int snd_card_free(struct snd_card *card)
 
 EXPORT_SYMBOL(snd_card_free);
 
-static void choose_default_id(struct snd_card *card)
+/**
+ *  snd_card_set_id - set card identification name
+ *  @card: soundcard structure
+ *  @nid: new identification string
+ *
+ *  This function sets the card identification and checks for name
+ *  collisions.
+ */
+void snd_card_set_id(struct snd_card *card, const char *nid)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
-	char *id, *spos;
+	const char *spos, *src;
+	char *id;
 	
-	id = spos = card->shortname;	
-	while (*id != '\0') {
-		if (*id == ' ')
-			spos = id + 1;
-		id++;
+	/* check if user specified own card->id */
+	if (card->id[0] != '\0')
+		return;
+	if (nid == NULL) {
+		id = card->shortname;
+		spos = src = id;
+		while (*id != '\0') {
+			if (*id == ' ')
+				spos = id + 1;
+			id++;
+		}
+	} else {
+		spos = src = nid;
 	}
 	id = card->id;
 	while (*spos != '\0' && !isalnum(*spos))
 		spos++;
 	if (isdigit(*spos))
-		*id++ = isalpha(card->shortname[0]) ? card->shortname[0] : 'D';
+		*id++ = isalpha(src[0]) ? src[0] : 'D';
 	while (*spos != '\0' && (size_t)(id - card->id) < sizeof(card->id) - 1) {
 		if (isalnum(*spos))
 			*id++ = *spos;
@@ -513,16 +523,20 @@ static void choose_default_id(struct snd_card *card)
 
 	while (1) {
 	      	if (loops-- == 0) {
-      			snd_printk(KERN_ERR "unable to choose default card id (%s)\n", id);
+			snd_printk(KERN_ERR "unable to set card id (%s)\n", id);
       			strcpy(card->id, card->proc_root->name);
       			return;
       		}
 	      	if (!snd_info_check_reserved_words(id))
       			goto __change;
+		mutex_lock(&snd_card_mutex);
 		for (i = 0; i < snd_ecards_limit; i++) {
-			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id))
+			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id)) {
+				mutex_unlock(&snd_card_mutex);
 				goto __change;
+			}
 		}
+		mutex_unlock(&snd_card_mutex);
 		break;
 
 	      __change:
@@ -539,14 +553,16 @@ static void choose_default_id(struct snd_card *card)
 			spos = id + len - 2;
 			if ((size_t)len <= sizeof(card->id) - 2)
 				spos++;
-			*spos++ = '_';
-			*spos++ = '1';
-			*spos++ = '\0';
+			*(char *)spos++ = '_';
+			*(char *)spos++ = '1';
+			*(char *)spos++ = '\0';
 			idx_flag++;
 		}
 	}
 }
 
+EXPORT_SYMBOL(snd_card_set_id);
+
 #ifndef CONFIG_SYSFS_DEPRECATED
 static ssize_t
 card_id_show_attr(struct device *dev,
@@ -641,7 +657,7 @@ int snd_card_register(struct snd_card *card)
 		return 0;
 	}
 	if (card->id[0] == '\0')
-		choose_default_id(card);
+		snd_card_set_id(card, NULL);
 	snd_cards[card->number] = card;
 	mutex_unlock(&snd_card_mutex);
 	init_info_for_card(card);
-- 
cgit v0.10.2


From bafeee5b1f8d32cbf791c322b40a6fa91d8ccf7a Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 2 Jun 2009 12:36:39 +0200
Subject: ALSA: snd_usb_caiaq: give better shortname

If not passed as module option, provide an own card ID with the newly
introduced snd_set_card_id() call.

This will prevent ALSA from calling choose_default_name() which only
takes the last part of a name containing whitespaces. This for example
caused 'Audio 4 DJ' to be shortened to 'DJ', which was not very
descriptive.

The implementation now takes the short name and removes all whitespaces
from it which is much nicer.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index 15052e4..43bd0db 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -382,10 +382,10 @@ static int create_card(struct usb_device* usb_dev, struct snd_card **cardp)
 
 static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 {
-	char usbpath[32];
+	char *c, usbpath[32];
 	struct usb_device *usb_dev = dev->chip.dev;
 	struct snd_card *card = dev->chip.card;
-	int err;
+	int err, len;
 
 	if (usb_set_interface(usb_dev, 0, 1) != 0) {
 		log("can't set alt interface.\n");
@@ -427,6 +427,23 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 	strlcpy(card->driver, MODNAME, sizeof(card->driver));
 	strlcpy(card->shortname, dev->product_name, sizeof(card->shortname));
 
+	/* if the id was not passed as module option, fill it with a shortened
+	 * version of the product string which does not contain any
+	 * whitespaces */
+
+	if (*card->id == '\0') {
+		char id[sizeof(card->id)];
+
+		memset(id, 0, sizeof(id));
+
+		for (c = card->shortname, len = 0;
+			*c && len < sizeof(card->id); c++)
+			if (*c != ' ')
+				id[len++] = *c;
+
+		snd_card_set_id(card, id);
+	}
+
 	usb_make_path(usb_dev, usbpath, sizeof(usbpath));
 	snprintf(card->longname, sizeof(card->longname),
 		       "%s %s (%s)",
-- 
cgit v0.10.2


From f38b082081bf69a06fffb8b32a175999e2320c5b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 2 Jun 2009 21:05:16 +1000
Subject: perf_counter: Initialize per-cpu context earlier on cpu up

This arranges for perf_counter's notifier for cpu hotplug
operations to be called earlier than the migration notifier in
sched.c by increasing its priority to 20, compared to the 10
for the migration notifier.  The reason for doing this is that
a subsequent commit to convert the cpu migration counter to use
the generic swcounter infrastructure will add a call into the
perf_counter subsystem when tasks get migrated.  Therefore the
perf_counter subsystem needs a chance to initialize its per-cpu
data for the new cpu before it can get called from the
migration code.

This also adds a comment to the migration notifier noting that
its priority needs to be lower than that of the perf_counter
notifier.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18981.1900.792795.836858@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index df319c4..8d2653f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3902,8 +3902,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
 static struct notifier_block __cpuinitdata perf_cpu_nb = {
 	.notifier_call		= perf_cpu_notify,
+	.priority		= 20,
 };
 
 void __init perf_counter_init(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index ad079f0..3226cc1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7319,8 +7319,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-/* Register at highest priority so that task migration (migrate_all_tasks)
- * happens before everything else.
+/*
+ * Register at high priority so that task migration (migrate_all_tasks)
+ * happens before everything else.  This has to be lower priority than
+ * the notifier in the perf_counter subsystem, though.
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
 	.notifier_call = migration_call,
-- 
cgit v0.10.2


From 3f731ca60afc29f5bcdb5fd2a04391466313a9ac Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:52:30 +1000
Subject: perf_counter: Fix cpu migration counter

This fixes the cpu migration software counter to count
correctly even when contexts get swapped from one task to
another.  Previously the cpu migration counts reported by perf
stat were bogus, ranging from negative to several thousand for
a single "lat_ctx 2 8 32" run.  With this patch the cpu
migration count reported for "lat_ctx 2 8 32" is almost always
between 35 and 44.

This fixes the problem by adding a call into the perf_counter
code from set_task_cpu when tasks are migrated.  This enables
us to use the generic swcounter code (with some modifications)
for the cpu migration counter.

This modifies the swcounter code to allow a NULL regs pointer
to be passed in to perf_swcounter_ctx_event() etc.  The cpu
migration counter does this because there isn't necessarily a
pt_regs struct for the task available.  In this case, the
counter will not have interrupt capability - but the migration
counter didn't have interrupt capability before, so this is no
loss.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.35006.819769.416327@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0e57d8c..deb9acf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -615,6 +615,8 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 
 extern void perf_counter_comm(struct task_struct *tsk);
 
+extern void perf_counter_task_migration(struct task_struct *task, int cpu);
+
 #define MAX_STACK_DEPTH		255
 
 struct perf_callchain_entry {
@@ -668,6 +670,8 @@ perf_counter_munmap(unsigned long addr, unsigned long len,
 
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
+static inline void perf_counter_task_migration(struct task_struct *task,
+					       int cpu)			{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8d2653f..cd94cf3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2921,11 +2921,13 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	if (counter->hw_event.config != event_config)
 		return 0;
 
-	if (counter->hw_event.exclude_user && user_mode(regs))
-		return 0;
+	if (regs) {
+		if (counter->hw_event.exclude_user && user_mode(regs))
+			return 0;
 
-	if (counter->hw_event.exclude_kernel && !user_mode(regs))
-		return 0;
+		if (counter->hw_event.exclude_kernel && !user_mode(regs))
+			return 0;
+	}
 
 	return 1;
 }
@@ -2935,7 +2937,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
 
-	if (counter->hw.irq_period && !neg)
+	if (counter->hw.irq_period && !neg && regs)
 		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
@@ -3151,55 +3153,24 @@ static const struct pmu perf_ops_task_clock = {
 /*
  * Software counter: cpu migrations
  */
-
-static inline u64 get_cpu_migrations(struct perf_counter *counter)
-{
-	struct task_struct *curr = counter->ctx->task;
-
-	if (curr)
-		return curr->se.nr_migrations;
-	return cpu_nr_migrations(smp_processor_id());
-}
-
-static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
-{
-	u64 prev, now;
-	s64 delta;
-
-	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_cpu_migrations(counter);
-
-	atomic64_set(&counter->hw.prev_count, now);
-
-	delta = now - prev;
-
-	atomic64_add(delta, &counter->count);
-}
-
-static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
+void perf_counter_task_migration(struct task_struct *task, int cpu)
 {
-	cpu_migrations_perf_counter_update(counter);
-}
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx;
 
-static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
-{
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count,
-			     get_cpu_migrations(counter));
-	return 0;
-}
+	perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
+				 PERF_COUNT_CPU_MIGRATIONS,
+				 1, 1, NULL, 0);
 
-static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
-{
-	cpu_migrations_perf_counter_update(counter);
+	ctx = perf_pin_task_context(task);
+	if (ctx) {
+		perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
+					 PERF_COUNT_CPU_MIGRATIONS,
+					 1, 1, NULL, 0);
+		perf_unpin_context(ctx);
+	}
 }
 
-static const struct pmu perf_ops_cpu_migrations = {
-	.enable		= cpu_migrations_perf_counter_enable,
-	.disable	= cpu_migrations_perf_counter_disable,
-	.read		= cpu_migrations_perf_counter_read,
-};
-
 #ifdef CONFIG_EVENT_PROFILE
 void perf_tpcounter_event(int event_id)
 {
@@ -3272,11 +3243,8 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_PAGE_FAULTS_MIN:
 	case PERF_COUNT_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		pmu = &perf_ops_generic;
-		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
-		if (!counter->hw_event.exclude_kernel)
-			pmu = &perf_ops_cpu_migrations;
+		pmu = &perf_ops_generic;
 		break;
 	}
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3226cc1..8d43347 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1977,6 +1977,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
 #endif
+		perf_counter_task_migration(p, new_cpu);
 	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
 					 new_cfsrq->min_vruntime;
-- 
cgit v0.10.2


From bf4e0ed3d027ce581be18496036862131b5f32aa Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:53:16 +1000
Subject: perf_counter: Remove unused prev_state field

This removes the prev_state field of struct perf_counter since
it is now unused.  It was only used by the cpu migration
counter, which doesn't use it any more.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.35052.915728.626374@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index deb9acf..d970fbc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -427,7 +427,6 @@ struct perf_counter {
 	const struct pmu		*pmu;
 
 	enum perf_counter_active_state	state;
-	enum perf_counter_active_state	prev_state;
 	atomic64_t			count;
 
 	/*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index cd94cf3..fbed4d2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -572,7 +572,6 @@ group_sched_in(struct perf_counter *group_counter,
 	if (ret)
 		return ret < 0 ? ret : 0;
 
-	group_counter->prev_state = group_counter->state;
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
 
@@ -580,7 +579,6 @@ group_sched_in(struct perf_counter *group_counter,
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
-		counter->prev_state = counter->state;
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -657,7 +655,6 @@ static void add_counter_to_ctx(struct perf_counter *counter,
 			       struct perf_counter_context *ctx)
 {
 	list_add_counter(counter, ctx);
-	counter->prev_state = PERF_COUNTER_STATE_OFF;
 	counter->tstamp_enabled = ctx->time;
 	counter->tstamp_running = ctx->time;
 	counter->tstamp_stopped = ctx->time;
@@ -820,7 +817,6 @@ static void __perf_counter_enable(void *info)
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
-	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-- 
cgit v0.10.2


From fe67e6f2d6df371b58ba721954d45a196df5e8b8 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 2 Jun 2009 17:00:45 +0900
Subject: TOMOYO: Remove unused mutex.

I forgot to remove on TOMOYO's 15th posting.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 2d67487..ee43631 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -296,7 +296,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname,
 	struct tomoyo_domain_keeper_entry *ptr;
 	const struct tomoyo_path_info *saved_domainname;
 	const struct tomoyo_path_info *saved_program = NULL;
-	static DEFINE_MUTEX(lock);
 	int error = -ENOMEM;
 	bool is_last_name = false;
 
-- 
cgit v0.10.2


From c6e24d4db824d277303201811e88626778c59999 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 2 Jun 2009 12:36:40 +0200
Subject: ALSA: snd_usb_caiaq: bump version number

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index 43bd0db..b9a2b31 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -35,7 +35,7 @@
 #include "input.h"
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
-MODULE_DESCRIPTION("caiaq USB audio, version 1.3.14");
+MODULE_DESCRIPTION("caiaq USB audio, version 1.3.15");
 MODULE_LICENSE("GPL");
 MODULE_SUPPORTED_DEVICE("{{Native Instruments, RigKontrol2},"
 			 "{Native Instruments, RigKontrol3},"
-- 
cgit v0.10.2


From 9a83b7453c2c4db145666b653abe9d9f410d18a2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 14:20:48 +0200
Subject: ALSA: Remove invalid GENERIC_MIX PCM sublass

SNDRV_PCM_SUBCLASS_GENERIC_MIX is mostly for h/w multi-stream playback
devices, but ca0106 and emu10k1x don't support it (unlike emu10k1).
We shouldn't set that flag to avoid confusion.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c
index bfac30f..57b992a 100644
--- a/sound/pci/ca0106/ca0106_main.c
+++ b/sound/pci/ca0106/ca0106_main.c
@@ -1319,7 +1319,6 @@ static int __devinit snd_ca0106_pcm(struct snd_ca0106 *emu, int device)
         }
 
 	pcm->info_flags = 0;
-	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
 	strcpy(pcm->name, "CA0106");
 
 	for(substream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; 
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index 1970f0e..4d3ad79 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -858,7 +858,6 @@ static int __devinit snd_emu10k1x_pcm(struct emu10k1x *emu, int device, struct s
 	}
 
 	pcm->info_flags = 0;
-	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
 	switch(device) {
 	case 0:
 		strcpy(pcm->name, "EMU10K1X Front");
-- 
cgit v0.10.2


From 4a4b2d7684c66dbd8ed04eb284bc94a78e061d29 Mon Sep 17 00:00:00 2001
From: "Stephen M. Cameron" <scameron@beardog.cca.cpqcorp.net>
Date: Tue, 2 Jun 2009 14:47:50 +0200
Subject: cciss: factor out core of sendcmd() for a more sane interface

Factor out the core of sendcmd() to provide a simpler interface which
exposes all the error information to the caller and make the original
sendcmd use this new function.  Rationale: The SCSI error handling
routines need to send commands with interrupts turned off, but they also
need access to the full error information.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Cc: Mike Miller <mikem@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 94474f5..8d0f893 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2373,41 +2373,21 @@ static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
 	return 0;
 }
 
-/*
- * Send a command to the controller, and wait for it to complete.
- * Only used at init time.
+/* Send command c to controller h and poll for it to complete.
+ * Turns interrupts off on the board.  Used at driver init time
+ * and during SCSI error recovery.
  */
-static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
-												   1: address logical volume log_unit,
-												   2: periph device address is scsi3addr */
-		   unsigned int log_unit,
-		   __u8 page_code, unsigned char *scsi3addr, int cmd_type)
+static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c)
 {
-	CommandList_struct *c;
 	int i;
 	unsigned long complete;
-	ctlr_info_t *info_p = hba[ctlr];
+	int status = IO_ERROR;
 	u64bit buff_dma_handle;
-	int status, done = 0;
 
-	if ((c = cmd_alloc(info_p, 1)) == NULL) {
-		printk(KERN_WARNING "cciss: unable to get memory");
-		return IO_ERROR;
-	}
-	status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
-			  log_unit, page_code, scsi3addr, cmd_type);
-	if (status != IO_OK) {
-		cmd_free(info_p, c, 1);
-		return status;
-	}
-      resend_cmd1:
-	/*
-	 * Disable interrupt
-	 */
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss: turning intr off\n");
-#endif				/* CCISS_DEBUG */
-	info_p->access.set_intr_mask(info_p, CCISS_INTR_OFF);
+resend_cmd1:
+
+	/* Disable interrupt on the board. */
+	h->access.set_intr_mask(h, CCISS_INTR_OFF);
 
 	/* Make sure there is room in the command FIFO */
 	/* Actually it should be completely empty at this time */
@@ -2415,21 +2395,15 @@ static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use
 	/* tape side of the driver. */
 	for (i = 200000; i > 0; i--) {
 		/* if fifo isn't full go */
-		if (!(info_p->access.fifo_full(info_p))) {
-
+		if (!(h->access.fifo_full(h)))
 			break;
-		}
 		udelay(10);
 		printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
-		       " waiting!\n", ctlr);
+		       " waiting!\n", h->ctlr);
 	}
-	/*
-	 * Send the cmd
-	 */
-	info_p->access.submit_command(info_p, c);
-	done = 0;
+	h->access.submit_command(h, c); /* Send the cmd */
 	do {
-		complete = pollcomplete(ctlr);
+		complete = pollcomplete(h->ctlr);
 
 #ifdef CCISS_DEBUG
 		printk(KERN_DEBUG "cciss: command completed\n");
@@ -2438,97 +2412,116 @@ static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use
 		if (complete == 1) {
 			printk(KERN_WARNING
 			       "cciss cciss%d: SendCmd Timeout out, "
-			       "No command list address returned!\n", ctlr);
+			       "No command list address returned!\n", h->ctlr);
 			status = IO_ERROR;
-			done = 1;
 			break;
 		}
 
-		/* This will need to change for direct lookup completions */
-		if ((complete & CISS_ERROR_BIT)
-		    && (complete & ~CISS_ERROR_BIT) == c->busaddr) {
-			/* if data overrun or underun on Report command
-			   ignore it
-			 */
-			if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
-			     (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
-			     (c->Request.CDB[0] == CISS_INQUIRY)) &&
-			    ((c->err_info->CommandStatus ==
-			      CMD_DATA_OVERRUN) ||
-			     (c->err_info->CommandStatus == CMD_DATA_UNDERRUN)
-			    )) {
-				complete = c->busaddr;
-			} else {
-				if (c->err_info->CommandStatus ==
-				    CMD_UNSOLICITED_ABORT) {
-					printk(KERN_WARNING "cciss%d: "
-					       "unsolicited abort %p\n",
-					       ctlr, c);
-					if (c->retry_count < MAX_CMD_RETRIES) {
-						printk(KERN_WARNING
-						       "cciss%d: retrying %p\n",
-						       ctlr, c);
-						c->retry_count++;
-						/* erase the old error */
-						/* information */
-						memset(c->err_info, 0,
-						       sizeof
-						       (ErrorInfo_struct));
-						goto resend_cmd1;
-					} else {
-						printk(KERN_WARNING
-						       "cciss%d: retried %p too "
-						       "many times\n", ctlr, c);
-						status = IO_ERROR;
-						goto cleanup1;
-					}
-				} else if (c->err_info->CommandStatus ==
-					   CMD_UNABORTABLE) {
-					printk(KERN_WARNING
-					       "cciss%d: command could not be aborted.\n",
-					       ctlr);
-					status = IO_ERROR;
-					goto cleanup1;
-				}
-				printk(KERN_WARNING "ciss ciss%d: sendcmd"
-				       " Error %x \n", ctlr,
-				       c->err_info->CommandStatus);
-				printk(KERN_WARNING "ciss ciss%d: sendcmd"
-				       " offensive info\n"
-				       "  size %x\n   num %x   value %x\n",
-				       ctlr,
-				       c->err_info->MoreErrInfo.Invalid_Cmd.
-				       offense_size,
-				       c->err_info->MoreErrInfo.Invalid_Cmd.
-				       offense_num,
-				       c->err_info->MoreErrInfo.Invalid_Cmd.
-				       offense_value);
-				status = IO_ERROR;
-				goto cleanup1;
-			}
+		/* If it's not the cmd we're looking for, save it for later */
+		if ((complete & ~CISS_ERROR_BIT) != c->busaddr) {
+			if (add_sendcmd_reject(c->Request.CDB[0],
+				h->ctlr, complete) != 0)
+				BUG(); /* we are hosed if we get here. */
+			continue;
+		}
+
+		/* It is our command.  If no error, we're done. */
+		if (!(complete & CISS_ERROR_BIT)) {
+			status = IO_OK;
+			break;
+		}
+
+		/* There is an error... */
+
+		/* if data overrun or underun on Report command ignore it */
+		if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
+		     (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
+		     (c->Request.CDB[0] == CISS_INQUIRY)) &&
+			((c->err_info->CommandStatus == CMD_DATA_OVERRUN) ||
+			 (c->err_info->CommandStatus == CMD_DATA_UNDERRUN))) {
+			complete = c->busaddr;
+			status = IO_OK;
+			break;
 		}
-		/* This will need changing for direct lookup completions */
-		if (complete != c->busaddr) {
-			if (add_sendcmd_reject(cmd, ctlr, complete) != 0) {
-				BUG();	/* we are pretty much hosed if we get here. */
+
+		if (c->err_info->CommandStatus == CMD_UNSOLICITED_ABORT) {
+			printk(KERN_WARNING "cciss%d: unsolicited abort %p\n",
+				h->ctlr, c);
+			if (c->retry_count < MAX_CMD_RETRIES) {
+				printk(KERN_WARNING "cciss%d: retrying %p\n",
+				   h->ctlr, c);
+				c->retry_count++;
+				/* erase the old error information */
+				memset(c->err_info, 0, sizeof(c->err_info));
+				goto resend_cmd1;
 			}
-			continue;
-		} else
-			done = 1;
-	} while (!done);
+			printk(KERN_WARNING "cciss%d: retried %p too many "
+				"times\n", h->ctlr, c);
+			status = IO_ERROR;
+			goto cleanup1;
+		}
+
+		if (c->err_info->CommandStatus == CMD_UNABORTABLE) {
+			printk(KERN_WARNING "cciss%d: command could not be "
+				"aborted.\n", h->ctlr);
+			status = IO_ERROR;
+			goto cleanup1;
+		}
+
+		printk(KERN_WARNING "cciss%d: sendcmd error\n", h->ctlr);
+		printk(KERN_WARNING "cmd = 0x%02x, CommandStatus = 0x%02x\n",
+			c->Request.CDB[0], c->err_info->CommandStatus);
+		if (c->err_info->CommandStatus == CMD_TARGET_STATUS) {
+			printk(KERN_WARNING "Target status = 0x%02x\n",
+			c->err_info->ScsiStatus);
+			if (c->err_info->ScsiStatus == 2) /* chk cond */
+				printk(KERN_WARNING "Sense key = 0x%02x\n",
+					0xf & c->err_info->SenseInfo[2]);
+		}
+
+		status = IO_ERROR;
+		goto cleanup1;
+
+	} while (1);
 
-      cleanup1:
+cleanup1:
 	/* unlock the data buffer from DMA */
 	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
 	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
-	pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
+	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
 			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
 #ifdef CONFIG_CISS_SCSI_TAPE
 	/* if we saved some commands for later, process them now. */
-	if (info_p->scsi_rejects.ncompletions > 0)
-		do_cciss_intr(0, info_p);
+	if (h->scsi_rejects.ncompletions > 0)
+		do_cciss_intr(0, h);
 #endif
-	cmd_free(info_p, c, 1);
+	return status;
+}
+
+/*
+ * Send a command to the controller, and wait for it to complete.
+ * Used at init time, and during SCSI error recovery.
+ */
+static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
+	unsigned int use_unit_num,/* 0: address the controller,
+				     1: address logical volume log_unit,
+				     2: periph device address is scsi3addr */
+	unsigned int log_unit,
+	__u8 page_code, unsigned char *scsi3addr, int cmd_type)
+{
+	CommandList_struct *c;
+	int status;
+
+	c = cmd_alloc(hba[ctlr], 1);
+	if (!c) {
+		printk(KERN_WARNING "cciss: unable to get memory");
+		return IO_ERROR;
+	}
+	status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
+			  log_unit, page_code, scsi3addr, cmd_type);
+	if (status == IO_OK)
+		status = sendcmd_core(hba[ctlr], c);
+	cmd_free(hba[ctlr], c, 1);
 	return status;
 }
 
-- 
cgit v0.10.2


From 88f627ae394eadd75ada669904269f1a4a77b3bd Mon Sep 17 00:00:00 2001
From: "Stephen M. Cameron" <scameron@beardog.cca.cpqcorp.net>
Date: Tue, 2 Jun 2009 14:48:11 +0200
Subject: cciss: fix SCSI device reset handler

Fix the SCSI reset error handler to send a working, properly addressed
reset message to the target device and add code to wait for the target
device to become ready by polling it with Test Unit Ready.

The existing reset code was broken in that it didn't bother to set the
8-byte LUN address to anything besides zero, so the command was addressed
to the controller, which pretended to the driver that the command
succeeded, while doing nothing.  Ages ago I tested this code, but
unbeknownst to me, my test was flawed, and what I thought was a tape drive
getting reset was actually nothing of the sort.  Unfortunately, there is
still lots of Smartarray firmware that doesn't handle doing target resets
right, and this code won't help in those cases, but it also shouldn't make
things worse in those cases than they already are.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Cc: Mike Miller <mikem@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8d0f893..cb43fb3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1974,6 +1974,13 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 			c->Request.CDB[0] = BMIC_WRITE;
 			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
 			break;
+		case TEST_UNIT_READY:
+			memcpy(c->Header. LUN.LunAddrBytes, scsi3addr, 8);
+			c->Request.CDBLen = 6;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_NONE;
+			c->Request.Timeout = 0;
+			break;
 		default:
 			printk(KERN_WARNING
 			       "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
@@ -1992,13 +1999,14 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 			memcpy(&c->Request.CDB[4], buff, 8);
 			break;
 		case 1:	/* RESET message */
-			c->Request.CDBLen = 12;
+			memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
+			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.Type.Direction = XFER_NONE;
 			c->Request.Timeout = 0;
 			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
 			c->Request.CDB[0] = cmd;	/* reset */
-			c->Request.CDB[1] = 0x04;	/* reset a LUN */
+			c->Request.CDB[1] = 0x03;	/* reset a target */
 			break;
 		case 3:	/* No-Op message */
 			c->Request.CDBLen = 1;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index a3fd87b..8575c48 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -58,6 +58,18 @@ static int sendcmd(
 	unsigned char *scsi3addr,
 	int cmd_type);
 
+static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+	size_t size,
+	unsigned int use_unit_num, /* 0: address the controller,
+				      1: address logical volume log_unit,
+				      2: periph device address is scsi3addr */
+	unsigned int log_unit, __u8 page_code, unsigned char *scsi3addr,
+	int cmd_type);
+
+static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c);
+
+static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool);
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool);
 
 static int cciss_scsi_proc_info(
 		struct Scsi_Host *sh,
@@ -1575,6 +1587,68 @@ cciss_seq_tape_report(struct seq_file *seq, int ctlr)
 	CPQ_TAPE_UNLOCK(ctlr, flags);
 }
 
+static int wait_for_device_to_become_ready(ctlr_info_t *h,
+	unsigned char lunaddr[])
+{
+	int rc;
+	int count = 0;
+	int waittime = HZ;
+	CommandList_struct *c;
+
+	c = cmd_alloc(h, 1);
+	if (!c) {
+		printk(KERN_WARNING "cciss%d: out of memory in "
+			"wait_for_device_to_become_ready.\n", h->ctlr);
+		return IO_ERROR;
+	}
+
+	/* Send test unit ready until device ready, or give up. */
+	while (count < 20) {
+
+		/* Wait for a bit.  do this first, because if we send
+		 * the TUR right away, the reset will just abort it.
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(waittime);
+		count++;
+
+		/* Increase wait time with each try, up to a point. */
+		if (waittime < (HZ * 30))
+			waittime = waittime * 2;
+
+		/* Send the Test Unit Ready */
+		rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0, 0, 0,
+			lunaddr, TYPE_CMD);
+		if (rc == 0) {
+			rc = sendcmd_core(h, c);
+			/* sendcmd turned off interrupts, turn 'em back on. */
+			h->access.set_intr_mask(h, CCISS_INTR_ON);
+		}
+
+		if (rc == 0 && c->err_info->CommandStatus == CMD_SUCCESS)
+			break;
+
+		if (rc == 0 &&
+			c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION &&
+			(c->err_info->SenseInfo[2] == NO_SENSE ||
+			c->err_info->SenseInfo[2] == UNIT_ATTENTION))
+			break;
+
+		printk(KERN_WARNING "cciss%d: Waiting %d secs "
+			"for device to become ready.\n",
+			h->ctlr, waittime / HZ);
+		rc = 1; /* device not ready. */
+	}
+
+	if (rc)
+		printk("cciss%d: giving up on device.\n", h->ctlr);
+	else
+		printk(KERN_WARNING "cciss%d: device is ready.\n", h->ctlr);
+
+	cmd_free(h, c, 1);
+	return rc;
+}
 
 /* Need at least one of these error handlers to keep ../scsi/hosts.c from 
  * complaining.  Doing a host- or bus-reset can't do anything good here. 
@@ -1591,6 +1665,7 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
 {
 	int rc;
 	CommandList_struct *cmd_in_trouble;
+	unsigned char lunaddr[8];
 	ctlr_info_t **c;
 	int ctlr;
 
@@ -1600,19 +1675,17 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
 		return FAILED;
 	ctlr = (*c)->ctlr;
 	printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr);
-
 	/* find the command that's giving us trouble */
 	cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
-	if (cmd_in_trouble == NULL) { /* paranoia */
+	if (cmd_in_trouble == NULL) /* paranoia */
 		return FAILED;
-	}
+	memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
 	/* send a reset to the SCSI LUN which the command was sent to */
-	rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 2, 0, 0, 
-		(unsigned char *) &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 
+	rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 2, 0, 0, lunaddr,
 		TYPE_MSG);
 	/* sendcmd turned off interrupts on the board, turn 'em back on. */
 	(*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
-	if (rc == 0)
+	if (rc == 0 && wait_for_device_to_become_ready(*c, lunaddr) == 0)
 		return SUCCESS;
 	printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
 	return FAILED;
-- 
cgit v0.10.2


From 7fe063268e73681cdca1a6496a25f93d3332f517 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Tue, 2 Jun 2009 14:48:39 +0200
Subject: cciss: add cciss driver sysfs entries

Add sysfs entries to the cciss driver needed for the dm/multipath tools.

A file for vendor, model, rev, and unique_id is added for each logical
drive under directory /sys/bus/pci/devices/<dev>/ccissX/cXdY.  Where X =
the controller (or host) number and Y is the logical drive number.

A link from /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY to
/sys/block/cciss!cXdY/device is also created.  A bus is created in
/sys/bus/cciss.  A link is created from the pci ccissX entry to
/sys/bus/cciss/devices/ccissX.  Please consider this for inclusion.

Signed-off-by: Mike Miller <mike.miller@hp.com>
Cc: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
new file mode 100644
index 0000000..0a92a7c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -0,0 +1,33 @@
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/model
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 0 model for logical drive
+		Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 0 revision for logical
+		drive Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 83 serial number for logical
+		drive Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	Displays the SCSI INQUIRY page 0 vendor for logical drive
+		Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
+Date:		March 2009
+Kernel Version: 2.6.30
+Contact:	iss_storagedev@hp.com
+Description:	A symbolic link to /sys/block/cciss!cXdY
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cb43fb3..e7d0095 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -437,6 +437,194 @@ static void __devinit cciss_procinit(int i)
 }
 #endif				/* CONFIG_PROC_FS */
 
+#define MAX_PRODUCT_NAME_LEN 19
+
+#define to_hba(n) container_of(n, struct ctlr_info, dev)
+#define to_drv(n) container_of(n, drive_info_struct, dev)
+
+static struct device_type cciss_host_type = {
+	.name		= "cciss_host",
+};
+
+static ssize_t dev_show_unique_id(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	__u8 sn[16];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(sn, drv->serial_no, sizeof(sn));
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, 16 * 2 + 2,
+				"%02X%02X%02X%02X%02X%02X%02X%02X"
+				"%02X%02X%02X%02X%02X%02X%02X%02X\n",
+				sn[0], sn[1], sn[2], sn[3],
+				sn[4], sn[5], sn[6], sn[7],
+				sn[8], sn[9], sn[10], sn[11],
+				sn[12], sn[13], sn[14], sn[15]);
+}
+DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL);
+
+static ssize_t dev_show_vendor(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char vendor[VENDOR_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(vendor) + 1, "%s\n", drv->vendor);
+}
+DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL);
+
+static ssize_t dev_show_model(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char model[MODEL_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(model, drv->model, MODEL_LEN + 1);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(model) + 1, "%s\n", drv->model);
+}
+DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL);
+
+static ssize_t dev_show_rev(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char rev[REV_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(rev, drv->rev, REV_LEN + 1);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(rev) + 1, "%s\n", drv->rev);
+}
+DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
+
+static struct attribute *cciss_dev_attrs[] = {
+	&dev_attr_unique_id.attr,
+	&dev_attr_model.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_rev.attr,
+	NULL
+};
+
+static struct attribute_group cciss_dev_attr_group = {
+	.attrs = cciss_dev_attrs,
+};
+
+static struct attribute_group *cciss_dev_attr_groups[] = {
+	&cciss_dev_attr_group,
+	NULL
+};
+
+static struct device_type cciss_dev_type = {
+	.name		= "cciss_device",
+	.groups		= cciss_dev_attr_groups,
+};
+
+static struct bus_type cciss_bus_type = {
+	.name		= "cciss",
+};
+
+
+/*
+ * Initialize sysfs entry for each controller.  This sets up and registers
+ * the 'cciss#' directory for each individual controller under
+ * /sys/bus/pci/devices/<dev>/.
+ */
+static int cciss_create_hba_sysfs_entry(struct ctlr_info *h)
+{
+	device_initialize(&h->dev);
+	h->dev.type = &cciss_host_type;
+	h->dev.bus = &cciss_bus_type;
+	dev_set_name(&h->dev, "%s", h->devname);
+	h->dev.parent = &h->pdev->dev;
+
+	return device_add(&h->dev);
+}
+
+/*
+ * Remove sysfs entries for an hba.
+ */
+static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
+{
+	device_del(&h->dev);
+}
+
+/*
+ * Initialize sysfs for each logical drive.  This sets up and registers
+ * the 'c#d#' directory for each individual logical drive under
+ * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
+ * /sys/block/cciss!c#d# to this entry.
+ */
+static int cciss_create_ld_sysfs_entry(struct ctlr_info *h,
+				       drive_info_struct *drv,
+				       int drv_index)
+{
+	device_initialize(&drv->dev);
+	drv->dev.type = &cciss_dev_type;
+	drv->dev.bus = &cciss_bus_type;
+	dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index);
+	drv->dev.parent = &h->dev;
+	return device_add(&drv->dev);
+}
+
+/*
+ * Remove sysfs entries for a logical drive.
+ */
+static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv)
+{
+	device_del(&drv->dev);
+}
+
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
@@ -1332,6 +1520,45 @@ static void cciss_softirq_done(struct request *rq)
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
+/* This function gets the SCSI vendor, model, and revision of a logical drive
+ * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
+ * they cannot be read.
+ */
+static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
+				   char *vendor, char *model, char *rev)
+{
+	int rc;
+	InquiryData_struct *inq_buf;
+
+	*vendor = '\0';
+	*model = '\0';
+	*rev = '\0';
+
+	inq_buf = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	if (!inq_buf)
+		return;
+
+	if (withirq)
+		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf,
+				     sizeof(InquiryData_struct), 1, logvol,
+				     0, TYPE_CMD);
+	else
+		rc = sendcmd(CISS_INQUIRY, ctlr, inq_buf,
+			     sizeof(InquiryData_struct), 1, logvol, 0, NULL,
+			     TYPE_CMD);
+	if (rc == IO_OK) {
+		memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
+		vendor[VENDOR_LEN] = '\0';
+		memcpy(model, &inq_buf->data_byte[16], MODEL_LEN);
+		model[MODEL_LEN] = '\0';
+		memcpy(rev, &inq_buf->data_byte[32], REV_LEN);
+		rev[REV_LEN] = '\0';
+	}
+
+	kfree(inq_buf);
+	return;
+}
+
 /* This function gets the serial number of a logical drive via
  * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
  * number cannot be had, for whatever reason, 16 bytes of 0xff
@@ -1372,7 +1599,7 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	disk->first_minor = drv_index << NWD_SHIFT;
 	disk->fops = &cciss_fops;
 	disk->private_data = &h->drv[drv_index];
-	disk->driverfs_dev = &h->pdev->dev;
+	disk->driverfs_dev = &h->drv[drv_index].dev;
 
 	/* Set up queue information */
 	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
@@ -1463,6 +1690,8 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
 	drvinfo->block_size = block_size;
 	drvinfo->nr_blocks = total_size + 1;
 
+	cciss_get_device_descr(ctlr, drv_index, 1, drvinfo->vendor,
+				drvinfo->model, drvinfo->rev);
 	cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
 			sizeof(drvinfo->serial_no));
 
@@ -1512,6 +1741,9 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
 	h->drv[drv_index].cylinders = drvinfo->cylinders;
 	h->drv[drv_index].raid_level = drvinfo->raid_level;
 	memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
+	memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1);
+	memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1);
+	memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1);
 
 	++h->num_luns;
 	disk = h->gendisk[drv_index];
@@ -1586,6 +1818,8 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
 		}
 	}
 	h->drv[drv_index].LunID = lunid;
+	if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index))
+		goto err_free_disk;
 
 	/* Don't need to mark this busy because nobody */
 	/* else knows about this disk yet to contend */
@@ -1593,6 +1827,11 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
 	h->drv[drv_index].busy_configuring = 0;
 	wmb();
 	return drv_index;
+
+err_free_disk:
+	put_disk(h->gendisk[drv_index]);
+	h->gendisk[drv_index] = NULL;
+	return -1;
 }
 
 /* This is for the special case of a controller which
@@ -1713,6 +1952,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
 			h->drv[i].busy_configuring = 1;
 			spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 			return_code = deregister_disk(h, i, 1);
+			cciss_destroy_ld_sysfs_entry(&h->drv[i]);
 			h->drv[i].busy_configuring = 0;
 		}
 	}
@@ -3719,12 +3959,15 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	INIT_HLIST_HEAD(&hba[i]->reqQ);
 
 	if (cciss_pci_init(hba[i], pdev) != 0)
-		goto clean1;
+		goto clean0;
 
 	sprintf(hba[i]->devname, "cciss%d", i);
 	hba[i]->ctlr = i;
 	hba[i]->pdev = pdev;
 
+	if (cciss_create_hba_sysfs_entry(hba[i]))
+		goto clean0;
+
 	/* configure PCI DMA stuff */
 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
 		dac = 1;
@@ -3868,6 +4111,8 @@ clean4:
 clean2:
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 clean1:
+	cciss_destroy_hba_sysfs_entry(hba[i]);
+clean0:
 	hba[i]->busy_initializing = 0;
 	/* cleanup any queues that may have been initialized */
 	for (j=0; j <= hba[i]->highest_lun; j++){
@@ -3978,6 +4223,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 	 */
 	pci_release_regions(pdev);
 	pci_set_drvdata(pdev, NULL);
+	cciss_destroy_hba_sysfs_entry(hba[i]);
 	free_hba(i);
 }
 
@@ -3995,6 +4241,8 @@ static struct pci_driver cciss_pci_driver = {
  */
 static int __init cciss_init(void)
 {
+	int err;
+
 	/*
 	 * The hardware requires that commands are aligned on a 64-bit
 	 * boundary. Given that we use pci_alloc_consistent() to allocate an
@@ -4004,8 +4252,20 @@ static int __init cciss_init(void)
 
 	printk(KERN_INFO DRIVER_NAME "\n");
 
+	err = bus_register(&cciss_bus_type);
+	if (err)
+		return err;
+
 	/* Register for our PCI devices */
-	return pci_register_driver(&cciss_pci_driver);
+	err = pci_register_driver(&cciss_pci_driver);
+	if (err)
+		goto err_bus_register;
+
+	return 0;
+
+err_bus_register:
+	bus_unregister(&cciss_bus_type);
+	return err;
 }
 
 static void __exit cciss_cleanup(void)
@@ -4022,6 +4282,7 @@ static void __exit cciss_cleanup(void)
 		}
 	}
 	remove_proc_entry("driver/cciss", NULL);
+	bus_unregister(&cciss_bus_type);
 }
 
 static void fail_all_cmds(unsigned long ctlr)
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 703e080..dd1926d 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -12,6 +12,10 @@
 #define IO_OK		0
 #define IO_ERROR	1
 
+#define VENDOR_LEN	8
+#define MODEL_LEN	16
+#define REV_LEN		4
+
 struct ctlr_info;
 typedef struct ctlr_info ctlr_info_t;
 
@@ -34,13 +38,18 @@ typedef struct _drive_info_struct
 	int 	cylinders;
 	int	raid_level; /* set to -1 to indicate that
 			     * the drive is not in use/configured
-			    */
-	int	busy_configuring; /*This is set when the drive is being removed
-				   *to prevent it from being opened or it's queue
-				   *from being started.
-				  */
-	__u8 serial_no[16]; /* from inquiry page 0x83, */
-			    /* not necc. null terminated. */
+			     */
+	int	busy_configuring; /* This is set when a drive is being removed
+				   * to prevent it from being opened or it's
+				   * queue from being started.
+				   */
+	struct	device dev;
+	__u8 serial_no[16]; /* from inquiry page 0x83,
+			     * not necc. null terminated.
+			     */
+	char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */
+	char model[MODEL_LEN + 1];   /* SCSI model string */
+	char rev[REV_LEN + 1];       /* SCSI revision string */
 } drive_info_struct;
 
 #ifdef CONFIG_CISS_SCSI_TAPE
@@ -123,6 +132,7 @@ struct ctlr_info
 	unsigned char alive;
 	struct completion *rescan_wait;
 	struct task_struct *cciss_scan_thread;
+	struct device dev;
 };
 
 /*  Defining the diffent access_menthods */
-- 
cgit v0.10.2


From 77b0308a0778861111184e097533000f7a458c37 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 2 Jun 2009 14:51:30 +0200
Subject: cciss: use schedule_timeout_interruptible()

Use schedule_timeout_interruptible() instead of open-coding the set and
schedule parts.

Cc: Mike Miller <mikem@beardog.cca.cpqcorp.net>
Cc: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 8575c48..2edfc9b 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -1608,8 +1608,7 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
 		/* Wait for a bit.  do this first, because if we send
 		 * the TUR right away, the reset will just abort it.
 		 */
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(waittime);
+		schedule_timeout_interruptible(waittime);
 		count++;
 
 		/* Increase wait time with each try, up to a point. */
-- 
cgit v0.10.2


From dbdc9dd342f0a7e32f40f0d4ade662bdfe057484 Mon Sep 17 00:00:00 2001
From: vibi sreenivasan <vibi_sreenivasan@cms.com>
Date: Tue, 2 Jun 2009 14:52:32 +0200
Subject: Removed reference to non-existing file
 Documentation/PCI/PCI-DMA-mapping.txt

File Documentation/PCI/PCI-DMA-mapping.txt does not exist.
 Documentation/DMA-mapping.txt contains DMA Mapping details

Signed-off-by: vibi sreenivasan <vibi_sreenivasan@cms.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6fab97e..8d2158a 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
 do not have a corresponding kernel virtual address space mapping) and
 low-memory pages.
 
-Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion
+Note: Please refer to Documentation/DMA-mapping.txt for a discussion
 on PCI high mem DMA aspects and mapping of scatter gather lists, and support
 for 64 bit PCI.
 
-- 
cgit v0.10.2


From 2e507d849f1834d3fe9aae71b7aabf4c2a3827b8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 18:24:20 +0200
Subject: dma-debug: add variables and checks for driver filter

This patch adds the state variables for the driver filter and a function
to check if the filter is enabled and matches to the current device. The
check is built into the err_printk function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index cdd205d..c01f647 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -99,6 +99,15 @@ static struct dentry *show_num_errors_dent  __read_mostly;
 static struct dentry *num_free_entries_dent __read_mostly;
 static struct dentry *min_free_entries_dent __read_mostly;
 
+/* per-driver filter related state */
+
+#define NAME_MAX_LEN	64
+
+static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
+static struct device_driver *current_driver                    __read_mostly;
+
+static DEFINE_RWLOCK(driver_name_lock);
+
 static const char *type2name[4] = { "single", "page",
 				    "scather-gather", "coherent" };
 
@@ -128,9 +137,47 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
 #endif
 }
 
+static bool driver_filter(struct device *dev)
+{
+	/* driver filter off */
+	if (likely(!current_driver_name[0]))
+		return true;
+
+	/* driver filter on and initialized */
+	if (current_driver && dev->driver == current_driver)
+		return true;
+
+	/* driver filter on but not yet initialized */
+	if (!current_driver && current_driver_name[0]) {
+		struct device_driver *drv = get_driver(dev->driver);
+		unsigned long flags;
+		bool ret = false;
+
+		if (!drv)
+			return false;
+
+		/* lock to protect against change of current_driver_name */
+		read_lock_irqsave(&driver_name_lock, flags);
+
+		if (drv->name &&
+		    strncmp(current_driver_name, drv->name, 63) == 0) {
+			current_driver = drv;
+			ret = true;
+		}
+
+		read_unlock_irqrestore(&driver_name_lock, flags);
+		put_driver(drv);
+
+		return ret;
+	}
+
+	return false;
+}
+
 #define err_printk(dev, entry, format, arg...) do {		\
 		error_count += 1;				\
-		if (show_all_errors || show_num_errors > 0) {	\
+		if (driver_filter(dev) &&			\
+		    (show_all_errors || show_num_errors > 0)) {	\
 			WARN(1, "%s %s: " format,		\
 			     dev_driver_string(dev),		\
 			     dev_name(dev) , ## arg);		\
-- 
cgit v0.10.2


From 822fa19b5c23746577687a0ec48eae0ec1cd22a0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 14:12:17 +0200
Subject: ALSA: ALSA: ctxfi - Release PCM resources at each prepare call

The prepare callback can be called multiple times, thus it needs to
release and acquire the resource again by itself at the second or later
call.

Simply add pcm_release_resources() at the beginning of each prepare
callback in ctatc.c.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 1a4bb35..e14ed71 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -254,6 +254,9 @@ static int atc_pcm_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 		return 0;
 	}
 
+	/* first release old resources */
+	atc->pcm_release_resources(atc, apcm);
+
 	/* Get SRC resource */
 	desc.multi = apcm->substream->runtime->channels;
 	desc.msr = atc->msr;
@@ -496,6 +499,9 @@ atc_pcm_capture_get_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	int n_srcimp = 0, n_amixer = 0, n_srcc = 0, n_sum = 0;
 	struct src_node_conf_t src_node_conf[2] = {{0} };
 
+	/* first release old resources */
+	atc->pcm_release_resources(atc, apcm);
+
 	/* The numbers of converting SRCs and SRCIMPs should be determined
 	 * by pitch value. */
 
@@ -767,6 +773,9 @@ static int spdif_passthru_playback_get_resources(struct ct_atc *atc,
 	int n_amixer = apcm->substream->runtime->channels, i = 0;
 	unsigned int pitch = 0, rsr = atc->pll_rate;
 
+	/* first release old resources */
+	atc->pcm_release_resources(atc, apcm);
+
 	/* Get SRC resource */
 	desc.multi = apcm->substream->runtime->channels;
 	desc.msr = 1;
-- 
cgit v0.10.2


From 6585db943aade186d38eaab2720c18887b94c875 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 14:17:27 +0200
Subject: ALSA: ctxfi - Fix surround mixer names

We usually pick up "Surround" mixer for the rear output, and "Side"
for the extra surround.  Fix the channel mapping to follow it.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
index b795076..177c46e 100644
--- a/sound/pci/ctxfi/ctmixer.c
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -168,7 +168,7 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = {
 	},
 	[MIXER_WAVES_P] = {
 		.ctl = 1,
-		.name = "Surround Playback Volume",
+		.name = "Side Playback Volume",
 	},
 	[MIXER_WAVEC_P] = {
 		.ctl = 1,
@@ -176,7 +176,7 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = {
 	},
 	[MIXER_WAVER_P] = {
 		.ctl = 1,
-		.name = "Rear Playback Volume",
+		.name = "Surround Playback Volume",
 	},
 
 	[MIXER_PCM_C_S] = {
@@ -213,7 +213,7 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = {
 	},
 	[MIXER_WAVES_P_S] = {
 		.ctl = 1,
-		.name = "Surround Playback Switch",
+		.name = "Side Playback Switch",
 	},
 	[MIXER_WAVEC_P_S] = {
 		.ctl = 1,
@@ -221,7 +221,7 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = {
 	},
 	[MIXER_WAVER_P_S] = {
 		.ctl = 1,
-		.name = "Rear Playback Switch",
+		.name = "Surround Playback Switch",
 	},
 	[MIXER_DIGITAL_IO_S] = {
 		.ctl = 0,
-- 
cgit v0.10.2


From 8372d4980fbc2e403f0dc5457441c8c6b7c04915 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 14:27:56 +0200
Subject: ALSA: ctxfi - Fix PCM device naming

PCM names for surround streams should be also fixed as well as the mixer
element names.  Also, a bit clean up for PCM name setup.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index e14ed71..675dd4c 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -72,15 +72,15 @@ static struct {
 	[FRONT]		= { .create = ct_alsa_pcm_create,
 			    .destroy = NULL,
 			    .public_name = "Front/WaveIn"},
-	[REAR]		= { .create = ct_alsa_pcm_create,
+	[SURROUND]	= { .create = ct_alsa_pcm_create,
 			    .destroy = NULL,
-			    .public_name = "Rear"},
+			    .public_name = "Surround"},
 	[CLFE]		= { .create = ct_alsa_pcm_create,
 			    .destroy = NULL,
 			    .public_name = "Center/LFE"},
-	[SURROUND]	= { .create = ct_alsa_pcm_create,
+	[SIDE]		= { .create = ct_alsa_pcm_create,
 			    .destroy = NULL,
-			    .public_name = "Surround"},
+			    .public_name = "Side"},
 	[IEC958]	= { .create = ct_alsa_pcm_create,
 			    .destroy = NULL,
 			    .public_name = "IEC958 Non-audio"},
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
index a7b0ec2..b86d12c 100644
--- a/sound/pci/ctxfi/ctatc.h
+++ b/sound/pci/ctxfi/ctatc.h
@@ -29,9 +29,9 @@
 
 enum CTALSADEVS {		/* Types of alsa devices */
 	FRONT,
-	REAR,
-	CLFE,
 	SURROUND,
+	CLFE,
+	SIDE,
 	IEC958,
 	MIXER,
 	NUM_CTALSADEVS		/* This should always be the last */
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index a64cb0e..756d8b2 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -469,12 +469,10 @@ int ct_alsa_pcm_create(struct ct_atc *atc,
 	struct snd_pcm *pcm;
 	int err;
 	int playback_count, capture_count;
-	char name[128];
 
-	strncpy(name, device_name, sizeof(name));
 	playback_count = (IEC958 == device) ? 1 : 8;
 	capture_count = (FRONT == device) ? 1 : 0;
-	err = snd_pcm_new(atc->card, name, device,
+	err = snd_pcm_new(atc->card, "ctxfi", device,
 			  playback_count, capture_count, &pcm);
 	if (err < 0) {
 		printk(KERN_ERR "ctxfi: snd_pcm_new failed!! Err=%d\n", err);
@@ -484,7 +482,7 @@ int ct_alsa_pcm_create(struct ct_atc *atc,
 	pcm->private_data = atc;
 	pcm->info_flags = 0;
 	pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX;
-	strcpy(pcm->name, device_name);
+	strlcpy(pcm->name, device_name, sizeof(pcm->name));
 
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &ct_pcm_playback_ops);
 
-- 
cgit v0.10.2


From d2b9b96c516d4d61663d92ab4ad4f15ca0134ef2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 14:39:05 +0200
Subject: ALSA: ctxfi - Fix supported PCM formats

The device seems supporting only U8, S16, S24_3LE, S32.  Other linear
formats result in bad outputs.

Also, added the support for 32bit float format, which wasn't listed
in the original code.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 675dd4c..268ecc4 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -170,16 +170,15 @@ static unsigned int convert_format(snd_pcm_format_t snd_format)
 {
 	switch (snd_format) {
 	case SNDRV_PCM_FORMAT_U8:
-	case SNDRV_PCM_FORMAT_S8:
 		return SRC_SF_U8;
 	case SNDRV_PCM_FORMAT_S16_LE:
-	case SNDRV_PCM_FORMAT_U16_LE:
 		return SRC_SF_S16;
 	case SNDRV_PCM_FORMAT_S24_3LE:
 		return SRC_SF_S24;
-	case SNDRV_PCM_FORMAT_S24_LE:
 	case SNDRV_PCM_FORMAT_S32_LE:
 		return SRC_SF_S32;
+	case SNDRV_PCM_FORMAT_FLOAT_LE:
+		return SRC_SF_F32;
 	default:
 		printk(KERN_ERR "ctxfi: not recognized snd format is %d \n",
 			snd_format);
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 756d8b2..26d86dc 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -26,12 +26,10 @@ static struct snd_pcm_hardware ct_pcm_playback_hw = {
 				   SNDRV_PCM_INFO_MMAP_VALID |
 				   SNDRV_PCM_INFO_PAUSE),
 	.formats		= (SNDRV_PCM_FMTBIT_U8 |
-				   SNDRV_PCM_FMTBIT_S8 |
 				   SNDRV_PCM_FMTBIT_S16_LE |
-				   SNDRV_PCM_FMTBIT_U16_LE |
 				   SNDRV_PCM_FMTBIT_S24_3LE |
-				   SNDRV_PCM_FMTBIT_S24_LE |
-				   SNDRV_PCM_FMTBIT_S32_LE),
+				   SNDRV_PCM_FMTBIT_S32_LE |
+				   SNDRV_PCM_FMTBIT_FLOAT_LE),
 	.rates			= (SNDRV_PCM_RATE_CONTINUOUS |
 				   SNDRV_PCM_RATE_8000_192000),
 	.rate_min		= 8000,
@@ -52,8 +50,7 @@ static struct snd_pcm_hardware ct_spdif_passthru_playback_hw = {
 				   SNDRV_PCM_INFO_BLOCK_TRANSFER |
 				   SNDRV_PCM_INFO_MMAP_VALID |
 				   SNDRV_PCM_INFO_PAUSE),
-	.formats		= (SNDRV_PCM_FMTBIT_S16_LE |
-				   SNDRV_PCM_FMTBIT_U16_LE),
+	.formats		= SNDRV_PCM_FMTBIT_S16_LE,
 	.rates			= (SNDRV_PCM_RATE_48000 |
 				   SNDRV_PCM_RATE_44100 |
 				   SNDRV_PCM_RATE_32000),
@@ -77,12 +74,10 @@ static struct snd_pcm_hardware ct_pcm_capture_hw = {
 				   SNDRV_PCM_INFO_PAUSE |
 				   SNDRV_PCM_INFO_MMAP_VALID),
 	.formats		= (SNDRV_PCM_FMTBIT_U8 |
-				   SNDRV_PCM_FMTBIT_S8 |
 				   SNDRV_PCM_FMTBIT_S16_LE |
-				   SNDRV_PCM_FMTBIT_U16_LE |
 				   SNDRV_PCM_FMTBIT_S24_3LE |
-				   SNDRV_PCM_FMTBIT_S24_LE |
-				   SNDRV_PCM_FMTBIT_S32_LE),
+				   SNDRV_PCM_FMTBIT_S32_LE |
+				   SNDRV_PCM_FMTBIT_FLOAT_LE),
 	.rates			= (SNDRV_PCM_RATE_CONTINUOUS |
 				   SNDRV_PCM_RATE_8000_96000),
 	.rate_min		= 8000,
-- 
cgit v0.10.2


From cd391e206f486955e216a61bd9ebcb0e142122e9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 15:04:29 +0200
Subject: ALSA: ctxfi - Remove PAGE_SIZE limitation

Remove the limitation of PAGE_SIZE to be 4k by defining the own
page size and macros for 4k.  8kb page size could be natively supported,
but it's disabled right now for simplicity.

Also, clean up using upper_32_bits() macro.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 2d7fef5..3a7640f 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -277,7 +277,6 @@ config SND_CS5535AUDIO
 
 config SND_CTXFI
 	tristate "Creative Sound Blaster X-Fi"
-	depends on X86
 	select SND_PCM
 	help
 	  If you want to use soundcards based on Creative Sound Blastr X-Fi
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 44283bd..b7b8e6f 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1249,19 +1249,15 @@ static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
 	}
 
 	trnctl = 0x13;  /* 32-bit, 4k-size page */
-#if BITS_PER_LONG == 64
-	ptp_phys_low = info->vm_pgt_phys & ((1UL<<32)-1);
-	ptp_phys_high = (info->vm_pgt_phys>>32) & ((1UL<<32)-1);
-	trnctl |= (1<<2);
-#elif BITS_PER_LONG == 32
-	ptp_phys_low = info->vm_pgt_phys & (~0UL);
-	ptp_phys_high = 0;
-#else
-#	error "Unknown BITS_PER_LONG!"
-#endif
+	ptp_phys_low = (u32)info->vm_pgt_phys;
+	ptp_phys_high = upper_32_bits(info->vm_pgt_phys);
+	if (sizeof(void *) == 8) /* 64bit address */
+		trnctl |= (1 << 2);
+#if 0 /* Only 4k h/w pages for simplicitiy */
 #if PAGE_SIZE == 8192
 	trnctl |= (1<<5);
 #endif
+#endif
 	hw_write_20kx(hw, PTPALX, ptp_phys_low);
 	hw_write_20kx(hw, PTPAHX, ptp_phys_high);
 	hw_write_20kx(hw, TRNCTL, trnctl);
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index 7c24c2c..3497287 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -1203,19 +1203,10 @@ static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
 	}
 
 	vmctl = 0x80000C0F;  /* 32-bit, 4k-size page */
-#if BITS_PER_LONG == 64
-	ptp_phys_low = info->vm_pgt_phys & ((1UL<<32)-1);
-	ptp_phys_high = (info->vm_pgt_phys>>32) & ((1UL<<32)-1);
-	vmctl |= (3<<8);
-#elif BITS_PER_LONG == 32
-	ptp_phys_low = info->vm_pgt_phys & (~0UL);
-	ptp_phys_high = 0;
-#else
-#	error "Unknown BITS_PER_LONG!"
-#endif
-#if PAGE_SIZE == 8192
-#	error "Don't support 8k-page!"
-#endif
+	ptp_phys_low = (u32)info->vm_pgt_phys;
+	ptp_phys_high = upper_32_bits(info->vm_pgt_phys);
+	if (sizeof(void *) == 8) /* 64bit address */
+		vmctl |= (3 << 8);
 	/* Write page table physical address to all PTPAL registers */
 	for (i = 0; i < 64; i++) {
 		hw_write_20kx(hw, VMEM_PTPAL+(16*i), ptp_phys_low);
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
index 363b67e..74a0362 100644
--- a/sound/pci/ctxfi/ctvmem.c
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -18,12 +18,11 @@
 #include "ctvmem.h"
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <asm/page.h>	/* for PAGE_SIZE macro definition */
 #include <linux/io.h>
 #include <asm/pgtable.h>
 
-#define CT_PTES_PER_PAGE (PAGE_SIZE / sizeof(void *))
-#define CT_ADDRS_PER_PAGE (CT_PTES_PER_PAGE * PAGE_SIZE)
+#define CT_PTES_PER_PAGE (CT_PAGE_SIZE / sizeof(void *))
+#define CT_ADDRS_PER_PAGE (CT_PTES_PER_PAGE * CT_PAGE_SIZE)
 
 /* *
  * Find or create vm block based on requested @size.
@@ -138,24 +137,24 @@ ct_vm_map(struct ct_vm *vm, void *host_addr, int size)
 		return NULL;
 	}
 
-	start_phys = (virt_to_phys(host_addr) & PAGE_MASK);
-	pages = (PAGE_ALIGN(virt_to_phys(host_addr) + size)
-			- start_phys) >> PAGE_SHIFT;
+	start_phys = (virt_to_phys(host_addr) & CT_PAGE_MASK);
+	pages = (CT_PAGE_ALIGN(virt_to_phys(host_addr) + size)
+			- start_phys) >> CT_PAGE_SHIFT;
 
 	ptp = vm->ptp[0];
 
-	block = get_vm_block(vm, (pages << PAGE_SHIFT));
+	block = get_vm_block(vm, (pages << CT_PAGE_SHIFT));
 	if (block == NULL) {
 		printk(KERN_ERR "ctxfi: No virtual memory block that is big "
 				  "enough to allocate!\n");
 		return NULL;
 	}
 
-	pte_start = (block->addr >> PAGE_SHIFT);
+	pte_start = (block->addr >> CT_PAGE_SHIFT);
 	for (i = 0; i < pages; i++)
-		ptp[pte_start+i] = start_phys + (i << PAGE_SHIFT);
+		ptp[pte_start+i] = start_phys + (i << CT_PAGE_SHIFT);
 
-	block->addr += (virt_to_phys(host_addr) & (~PAGE_MASK));
+	block->addr += (virt_to_phys(host_addr) & (~CT_PAGE_MASK));
 	block->size = size;
 
 	return block;
@@ -164,9 +163,9 @@ ct_vm_map(struct ct_vm *vm, void *host_addr, int size)
 static void ct_vm_unmap(struct ct_vm *vm, struct ct_vm_block *block)
 {
 	/* do unmapping */
-	block->size = ((block->addr + block->size + PAGE_SIZE - 1)
-			& PAGE_MASK) - (block->addr & PAGE_MASK);
-	block->addr &= PAGE_MASK;
+	block->size = ((block->addr + block->size + CT_PAGE_SIZE - 1)
+			& CT_PAGE_MASK) - (block->addr & CT_PAGE_MASK);
+	block->addr &= CT_PAGE_MASK;
 	put_vm_block(vm, block);
 }
 
diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h
index 618952e..17d2d37 100644
--- a/sound/pci/ctxfi/ctvmem.h
+++ b/sound/pci/ctxfi/ctvmem.h
@@ -23,6 +23,14 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 
+/* The chip can handle the page table of 4k pages
+ * (emu20k1 can handle even 8k pages, but we don't use it right now)
+ */
+#define CT_PAGE_SIZE	4096
+#define CT_PAGE_SHIFT	12
+#define CT_PAGE_MASK	(~(PAGE_SIZE - 1))
+#define CT_PAGE_ALIGN(addr)	ALIGN(addr, CT_PAGE_SIZE)
+
 struct ct_vm_block {
 	unsigned int addr;	/* starting logical addr of this block */
 	unsigned int size;	/* size of this device virtual mem block */
-- 
cgit v0.10.2


From c76157d9286ed598c241c212aa5a3c6e5107bd82 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 15:26:19 +0200
Subject: ALSA: ctxfi - Support SG-buffers

Use SG-buffers instead of contiguous pages.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 268ecc4..6849475 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -128,7 +128,7 @@ static int ct_map_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	runtime = apcm->substream->runtime;
 	vm = atc->vm;
 
-	apcm->vm_block = vm->map(vm, runtime->dma_area, runtime->dma_bytes);
+	apcm->vm_block = vm->map(vm, apcm->substream, runtime->dma_bytes);
 
 	if (NULL == apcm->vm_block)
 		return -ENOENT;
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 26d86dc..52ddf19 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -442,6 +442,7 @@ static struct snd_pcm_ops ct_pcm_playback_ops = {
 	.prepare	= ct_pcm_playback_prepare,
 	.trigger	= ct_pcm_playback_trigger,
 	.pointer	= ct_pcm_playback_pointer,
+	.page		= snd_pcm_sgbuf_ops_page,
 };
 
 /* PCM operators for capture */
@@ -454,6 +455,7 @@ static struct snd_pcm_ops ct_pcm_capture_ops = {
 	.prepare	= ct_pcm_capture_prepare,
 	.trigger	= ct_pcm_capture_trigger,
 	.pointer	= ct_pcm_capture_pointer,
+	.page		= snd_pcm_sgbuf_ops_page,
 };
 
 /* Create ALSA pcm device */
@@ -485,7 +487,7 @@ int ct_alsa_pcm_create(struct ct_atc *atc,
 		snd_pcm_set_ops(pcm,
 				SNDRV_PCM_STREAM_CAPTURE, &ct_pcm_capture_ops);
 
-	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV_SG,
 			snd_dma_pci_data(atc->pci), 128*1024, 128*1024);
 
 	return 0;
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
index 74a0362..b7f8e58 100644
--- a/sound/pci/ctxfi/ctvmem.c
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -19,7 +19,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <asm/pgtable.h>
+#include <sound/pcm.h>
 
 #define CT_PTES_PER_PAGE (CT_PAGE_SIZE / sizeof(void *))
 #define CT_ADDRS_PER_PAGE (CT_PTES_PER_PAGE * CT_PAGE_SIZE)
@@ -34,6 +34,13 @@ get_vm_block(struct ct_vm *vm, unsigned int size)
 	struct ct_vm_block *block = NULL, *entry = NULL;
 	struct list_head *pos = NULL;
 
+	size = CT_PAGE_ALIGN(size);
+	if (size > vm->size) {
+		printk(KERN_ERR "ctxfi: Fail! No sufficient device virtural "
+				  "memory space available!\n");
+		return NULL;
+	}
+
 	mutex_lock(&vm->lock);
 	list_for_each(pos, &vm->unused) {
 		entry = list_entry(pos, struct ct_vm_block, list);
@@ -73,6 +80,8 @@ static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
 	struct ct_vm_block *entry = NULL, *pre_ent = NULL;
 	struct list_head *pos = NULL, *pre = NULL;
 
+	block->size = CT_PAGE_ALIGN(block->size);
+
 	mutex_lock(&vm->lock);
 	list_del(&block->list);
 	vm->size += block->size;
@@ -115,57 +124,36 @@ static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
 
 /* Map host addr (kmalloced/vmalloced) to device logical addr. */
 static struct ct_vm_block *
-ct_vm_map(struct ct_vm *vm, void *host_addr, int size)
+ct_vm_map(struct ct_vm *vm, struct snd_pcm_substream *substream, int size)
 {
-	struct ct_vm_block *block = NULL;
-	unsigned long pte_start;
-	unsigned long i;
-	unsigned long pages;
-	unsigned long start_phys;
+	struct ct_vm_block *block;
+	unsigned int pte_start;
+	unsigned i, pages;
 	unsigned long *ptp;
 
-	/* do mapping */
-	if ((unsigned long)host_addr >= VMALLOC_START) {
-		printk(KERN_ERR "ctxfi: "
-		       "Fail! Not support vmalloced addr now!\n");
-		return NULL;
-	}
-
-	if (size > vm->size) {
-		printk(KERN_ERR "ctxfi: Fail! No sufficient device virtural "
-				  "memory space available!\n");
-		return NULL;
-	}
-
-	start_phys = (virt_to_phys(host_addr) & CT_PAGE_MASK);
-	pages = (CT_PAGE_ALIGN(virt_to_phys(host_addr) + size)
-			- start_phys) >> CT_PAGE_SHIFT;
-
-	ptp = vm->ptp[0];
-
-	block = get_vm_block(vm, (pages << CT_PAGE_SHIFT));
+	block = get_vm_block(vm, size);
 	if (block == NULL) {
 		printk(KERN_ERR "ctxfi: No virtual memory block that is big "
 				  "enough to allocate!\n");
 		return NULL;
 	}
 
+	ptp = vm->ptp[0];
 	pte_start = (block->addr >> CT_PAGE_SHIFT);
-	for (i = 0; i < pages; i++)
-		ptp[pte_start+i] = start_phys + (i << CT_PAGE_SHIFT);
+	pages = block->size >> CT_PAGE_SHIFT;
+	for (i = 0; i < pages; i++) {
+		unsigned long addr;
+		addr = snd_pcm_sgbuf_get_addr(substream, i << CT_PAGE_SHIFT);
+		ptp[pte_start + i] = addr;
+	}
 
-	block->addr += (virt_to_phys(host_addr) & (~CT_PAGE_MASK));
 	block->size = size;
-
 	return block;
 }
 
 static void ct_vm_unmap(struct ct_vm *vm, struct ct_vm_block *block)
 {
 	/* do unmapping */
-	block->size = ((block->addr + block->size + CT_PAGE_SIZE - 1)
-			& CT_PAGE_MASK) - (block->addr & CT_PAGE_MASK);
-	block->addr &= CT_PAGE_MASK;
 	put_vm_block(vm, block);
 }
 
diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h
index 17d2d37..01e4fd0 100644
--- a/sound/pci/ctxfi/ctvmem.h
+++ b/sound/pci/ctxfi/ctvmem.h
@@ -37,6 +37,8 @@ struct ct_vm_block {
 	struct list_head list;
 };
 
+struct snd_pcm_substream;
+
 /* Virtual memory management object for card device */
 struct ct_vm {
 	void *ptp[CT_PTP_NUM];		/* Device page table pages */
@@ -46,7 +48,8 @@ struct ct_vm {
 	struct mutex lock;
 
 	/* Map host addr (kmalloced/vmalloced) to device logical addr. */
-	struct ct_vm_block *(*map)(struct ct_vm *, void *host_addr, int size);
+	struct ct_vm_block *(*map)(struct ct_vm *, struct snd_pcm_substream *,
+				   int size);
 	/* Unmap device logical addr area. */
 	void (*unmap)(struct ct_vm *, struct ct_vm_block *block);
 	void *(*get_ptp_virt)(struct ct_vm *vm, int index);
-- 
cgit v0.10.2


From eeaf100d25cefdbe9dc40d6e0f2025411474972a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 16:06:02 +0200
Subject: ALSA: ca0106 - Add missing card->mixername field setup

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index f143f71..7724252 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -856,6 +856,8 @@ int __devinit snd_ca0106_mixer(struct snd_ca0106 *emu)
 			return err;
 		add_slaves(card, vmaster, slave_sws);
 	}
+
+	strcpy(card->mixername, "CA0106");
         return 0;
 }
 
-- 
cgit v0.10.2


From 709e50cf870e61745b39552044aa6c7c38e4f9e0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 14:13:15 +0200
Subject: perf_counter: Use PID namespaces properly

Stop using task_struct::pid and start using PID namespaces.

PIDs will be reported in the PID namespace of the monitoring
task at the moment of counter creation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d970fbc..9ec20fc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -317,6 +317,7 @@ enum perf_event_type {
 #include <linux/spinlock.h>
 #include <linux/hrtimer.h>
 #include <linux/fs.h>
+#include <linux/pid_namespace.h>
 #include <asm/atomic.h>
 
 struct task_struct;
@@ -500,6 +501,8 @@ struct perf_counter {
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
+
+	struct pid_namespace		*ns;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fbed4d2..caa012c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1432,6 +1432,8 @@ static void free_counter_rcu(struct rcu_head *head)
 	struct perf_counter *counter;
 
 	counter = container_of(head, struct perf_counter, rcu_head);
+	if (counter->ns)
+		put_pid_ns(counter->ns);
 	kfree(counter);
 }
 
@@ -2267,6 +2269,28 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
+static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p)
+{
+	/*
+	 * only top level counters have the pid namespace they were created in
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	return task_tgid_nr_ns(p, counter->ns);
+}
+
+static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
+{
+	/*
+	 * only top level counters have the pid namespace they were created in
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	return task_pid_nr_ns(p, counter->ns);
+}
+
 static void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs, u64 addr)
 {
@@ -2303,8 +2327,8 @@ static void perf_counter_output(struct perf_counter *counter,
 
 	if (record_type & PERF_RECORD_TID) {
 		/* namespace issues */
-		tid_entry.pid = current->group_leader->pid;
-		tid_entry.tid = current->pid;
+		tid_entry.pid = perf_counter_pid(counter, current);
+		tid_entry.tid = perf_counter_tid(counter, current);
 
 		header.type |= PERF_RECORD_TID;
 		header.size += sizeof(tid_entry);
@@ -2432,6 +2456,9 @@ static void perf_counter_comm_output(struct perf_counter *counter,
 	if (ret)
 		return;
 
+	comm_event->event.pid = perf_counter_pid(counter, comm_event->task);
+	comm_event->event.tid = perf_counter_tid(counter, comm_event->task);
+
 	perf_output_put(&handle, comm_event->event);
 	perf_output_copy(&handle, comm_event->comm,
 				   comm_event->comm_size);
@@ -2504,8 +2531,6 @@ void perf_counter_comm(struct task_struct *task)
 		.task	= task,
 		.event  = {
 			.header = { .type = PERF_EVENT_COMM, },
-			.pid	= task->group_leader->pid,
-			.tid	= task->pid,
 		},
 	};
 
@@ -2542,6 +2567,9 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 	if (ret)
 		return;
 
+	mmap_event->event.pid = perf_counter_pid(counter, current);
+	mmap_event->event.tid = perf_counter_tid(counter, current);
+
 	perf_output_put(&handle, mmap_event->event);
 	perf_output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
@@ -2641,8 +2669,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 		.file   = file,
 		.event  = {
 			.header = { .type = PERF_EVENT_MMAP, },
-			.pid	= current->group_leader->pid,
-			.tid	= current->pid,
 			.start  = addr,
 			.len    = len,
 			.pgoff  = pgoff,
@@ -2664,8 +2690,6 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 		.file   = file,
 		.event  = {
 			.header = { .type = PERF_EVENT_MUNMAP, },
-			.pid	= current->group_leader->pid,
-			.tid	= current->pid,
 			.start  = addr,
 			.len    = len,
 			.pgoff  = pgoff,
@@ -3445,6 +3469,8 @@ SYSCALL_DEFINE5(perf_counter_open,
 	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
 	mutex_unlock(&current->perf_counter_mutex);
 
+	counter->ns = get_pid_ns(current->nsproxy->pid_ns);
+
 	fput_light(counter_file, fput_needed2);
 
 out_fput:
-- 
cgit v0.10.2


From f70e87d7a6d9c5a23d5f43ed0a0c224c157ef597 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 14:13:24 +0200
Subject: perf_counter: tools: Expand the COMM,MMAP event synthesizer

Include code to pre-construct mappings based on /proc,
on system wide recording.

Fix the existing code to properly fill out ->pid and ->tid.

The PID should be the Thread Group ID (PIDTYPE_PID of task->group_leader)
The TID should be the Thread ID (PIDTYPE_PID of task)

Furthermore, change the default sorting of report to comm,dso for a
better quick overview.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 9c151de..810fc27 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -162,7 +162,7 @@ struct comm_event {
 	char				comm[16];
 };
 
-static pid_t pid_synthesize_comm_event(pid_t pid)
+static void pid_synthesize_comm_event(pid_t pid, int full)
 {
 	struct comm_event comm_ev;
 	char filename[PATH_MAX];
@@ -170,6 +170,8 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 	int fd, ret;
 	size_t size;
 	char *field, *sep;
+	DIR *tasks;
+	struct dirent dirent, *next;
 
 	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
 
@@ -194,29 +196,50 @@ static pid_t pid_synthesize_comm_event(pid_t pid)
 		goto out_failure;
 	size = sep - field;
 	memcpy(comm_ev.comm, field, size++);
-	field = strchr(sep + 4, ' ');
-	if (field == NULL)
-		goto out_failure;
-	comm_ev.pid = atoi(++field);
+
+	comm_ev.pid = pid;
 	comm_ev.header.type = PERF_EVENT_COMM;
-	comm_ev.tid = pid;
 	size = ALIGN(size, sizeof(uint64_t));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
-	ret = write(output, &comm_ev, comm_ev.header.size);
-	if (ret < 0) {
-		perror("failed to write");
-		exit(-1);
+	if (!full) {
+		comm_ev.tid = pid;
+
+		ret = write(output, &comm_ev, comm_ev.header.size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+		return;
+	}
+
+	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
+
+	tasks = opendir(filename);
+	while (!readdir_r(tasks, &dirent, &next) && next) {
+		char *end;
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end)
+			continue;
+
+		comm_ev.tid = pid;
+
+		ret = write(output, &comm_ev, comm_ev.header.size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
 	}
-	return comm_ev.pid;
+	closedir(tasks);
+	return;
+
 out_failure:
 	fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
 		filename);
 	exit(EXIT_FAILURE);
-	return -1;
 }
 
-static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
+static void pid_synthesize_mmap_events(pid_t pid)
 {
 	char filename[PATH_MAX];
 	FILE *fp;
@@ -261,7 +284,7 @@ static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
 			mmap_ev.len -= mmap_ev.start;
 			mmap_ev.header.size = (sizeof(mmap_ev) -
 					       (sizeof(mmap_ev.filename) - size));
-			mmap_ev.pid = pgid;
+			mmap_ev.pid = pid;
 			mmap_ev.tid = pid;
 
 			if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
@@ -274,6 +297,28 @@ static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
 	fclose(fp);
 }
 
+static void synthesize_events(void)
+{
+	DIR *proc;
+	struct dirent dirent, *next;
+
+	proc = opendir("/proc");
+
+	while (!readdir_r(proc, &dirent, &next) && next) {
+		char *end;
+		pid_t pid;
+
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end) /* only interested in proper numerical dirents */
+			continue;
+
+		pid_synthesize_comm_event(pid, 1);
+		pid_synthesize_mmap_events(pid);
+	}
+
+	closedir(proc);
+}
+
 static void open_counters(int cpu, pid_t pid)
 {
 	struct perf_counter_hw_event hw_event;
@@ -281,8 +326,8 @@ static void open_counters(int cpu, pid_t pid)
 	int track = 1;
 
 	if (pid > 0) {
-		pid_t pgid = pid_synthesize_comm_event(pid);
-		pid_synthesize_mmap_events(pid, pgid);
+		pid_synthesize_comm_event(pid, 0);
+		pid_synthesize_mmap_events(pid);
 	}
 
 	group_fd = -1;
@@ -348,7 +393,7 @@ static int __cmd_record(int argc, const char **argv)
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
 
-	output = open(output_name, O_CREAT|O_EXCL|O_RDWR, S_IRWXU);
+	output = open(output_name, O_CREAT|O_EXCL|O_TRUNC|O_RDWR, S_IRUSR|S_IWUSR);
 	if (output < 0) {
 		perror("failed to create output file");
 		exit(-1);
@@ -385,9 +430,8 @@ static int __cmd_record(int argc, const char **argv)
 		}
 	}
 
-	/*
-	 * TODO: store the current /proc/$/maps information somewhere
-	 */
+	if (system_wide)
+		synthesize_events();
 
 	while (!done) {
 		int hits = events;
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 20a4e51..0558c1e 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -18,7 +18,7 @@
 
 static char		const *input_name = "perf.data";
 static char		*vmlinux = NULL;
-static char		*sort_order = "pid,symbol";
+static char		*sort_order = "comm,dso";
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
-- 
cgit v0.10.2


From 97124d5e2df5b9eaa5bb684bb1e8ebc7e29d0f5d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:52:24 +0200
Subject: perf_counter: tools: Better handle existing data files

Provide an argument (-f) to overwrite existing data files.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 810fc27..bace7a8 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -7,6 +7,7 @@
 #include "util/parse-events.h"
 #include "util/string.h"
 
+#include <unistd.h>
 #include <sched.h>
 
 #define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
@@ -26,7 +27,7 @@ static unsigned int		realtime_prio			= 0;
 static int			system_wide			= 0;
 static pid_t			target_pid			= -1;
 static int			inherit				= 1;
-static int			nmi				= 1;
+static int			force				= 0;
 
 const unsigned int default_count[] = {
 	1000000,
@@ -337,7 +338,6 @@ static void open_counters(int cpu, pid_t pid)
 		hw_event.config		= event_id[counter];
 		hw_event.irq_period	= event_count[counter];
 		hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-		hw_event.nmi		= nmi;
 		hw_event.mmap		= track;
 		hw_event.comm		= track;
 		hw_event.inherit	= (cpu < 0) && inherit;
@@ -387,13 +387,20 @@ static int __cmd_record(int argc, const char **argv)
 	int i, counter;
 	pid_t pid;
 	int ret;
+	struct stat st;
 
 	page_size = sysconf(_SC_PAGE_SIZE);
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
 
-	output = open(output_name, O_CREAT|O_EXCL|O_TRUNC|O_RDWR, S_IRUSR|S_IWUSR);
+	if (!stat(output_name, &st) && !force) {
+		fprintf(stderr, "Error, output file: %s exists, use -f to overwrite.\n",
+				output_name);
+		exit(-1);
+	}
+
+	output = open(output_name, O_CREAT|O_TRUNC|O_RDWR, S_IRUSR|S_IWUSR);
 	if (output < 0) {
 		perror("failed to create output file");
 		exit(-1);
@@ -473,6 +480,8 @@ static const struct option options[] = {
 		    "collect data with this RT SCHED_FIFO priority"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('f', "force", &force,
+			"overwrite existing data file"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 8a6fc708b9bb48a79a385bdc2be0959ee2ab788d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 21:23:13 +0200
Subject: dma-debug: add debugfs file for driver filter

This patch adds the dma-api/driver_filter file to debugfs. The root user
can write a driver name into this file to see only dma-api errors for
that particular driver in the kernel log. Writing an empty string to
that file disables the driver filter.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index c01f647..c6330b1 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -23,9 +23,11 @@
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
+#include <linux/uaccess.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 
@@ -98,6 +100,7 @@ static struct dentry *show_all_errors_dent  __read_mostly;
 static struct dentry *show_num_errors_dent  __read_mostly;
 static struct dentry *num_free_entries_dent __read_mostly;
 static struct dentry *min_free_entries_dent __read_mostly;
+static struct dentry *filter_dent           __read_mostly;
 
 /* per-driver filter related state */
 
@@ -160,7 +163,8 @@ static bool driver_filter(struct device *dev)
 		read_lock_irqsave(&driver_name_lock, flags);
 
 		if (drv->name &&
-		    strncmp(current_driver_name, drv->name, 63) == 0) {
+		    strncmp(current_driver_name, drv->name,
+			    NAME_MAX_LEN-1) == 0) {
 			current_driver = drv;
 			ret = true;
 		}
@@ -454,6 +458,97 @@ out_err:
 	return -ENOMEM;
 }
 
+static ssize_t filter_read(struct file *file, char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	unsigned long flags;
+	char buf[NAME_MAX_LEN + 1];
+	int len;
+
+	if (!current_driver_name[0])
+		return 0;
+
+	/*
+	 * We can't copy to userspace directly because current_driver_name can
+	 * only be read under the driver_name_lock with irqs disabled. So
+	 * create a temporary copy first.
+	 */
+	read_lock_irqsave(&driver_name_lock, flags);
+	len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
+	read_unlock_irqrestore(&driver_name_lock, flags);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t filter_write(struct file *file, const char __user *userbuf,
+			    size_t count, loff_t *ppos)
+{
+	unsigned long flags;
+	char buf[NAME_MAX_LEN];
+	size_t len = NAME_MAX_LEN - 1;
+	int i;
+
+	/*
+	 * We can't copy from userspace directly. Access to
+	 * current_driver_name is protected with a write_lock with irqs
+	 * disabled. Since copy_from_user can fault and may sleep we
+	 * need to copy to temporary buffer first
+	 */
+	len = min(count, len);
+	if (copy_from_user(buf, userbuf, len))
+		return -EFAULT;
+
+	buf[len] = 0;
+
+	write_lock_irqsave(&driver_name_lock, flags);
+
+	/* Now handle the string we got from userspace very carefully.
+	 * The rules are:
+	 *         - only use the first token we got
+	 *         - token delimiter is everything looking like a space
+	 *           character (' ', '\n', '\t' ...)
+	 *
+	 */
+	if (!isalnum(buf[0])) {
+		/*
+		   If the first character userspace gave us is not
+		 * alphanumerical then assume the filter should be
+		 * switched off.
+		 */
+		if (current_driver_name[0])
+			printk(KERN_INFO "DMA-API: switching off dma-debug "
+					 "driver filter\n");
+		current_driver_name[0] = 0;
+		current_driver = NULL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Now parse out the first token and use it as the name for the
+	 * driver to filter for.
+	 */
+	for (i = 0; i < NAME_MAX_LEN; ++i) {
+		current_driver_name[i] = buf[i];
+		if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
+			break;
+	}
+	current_driver_name[i] = 0;
+	current_driver = NULL;
+
+	printk(KERN_INFO "DMA-API: enable driver filter for driver [%s]\n",
+	       current_driver_name);
+
+out_unlock:
+	write_unlock_irqrestore(&driver_name_lock, flags);
+
+	return count;
+}
+
+const struct file_operations filter_fops = {
+	.read  = filter_read,
+	.write = filter_write,
+};
+
 static int dma_debug_fs_init(void)
 {
 	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
@@ -497,6 +592,11 @@ static int dma_debug_fs_init(void)
 	if (!min_free_entries_dent)
 		goto out_err;
 
+	filter_dent = debugfs_create_file("driver_filter", 0644,
+					  dma_debug_dent, NULL, &filter_fops);
+	if (!filter_dent)
+		goto out_err;
+
 	return 0;
 
 out_err:
-- 
cgit v0.10.2


From 1745de5e5639457513fe43440f2800e23c3cbc7d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 21:49:51 +0200
Subject: dma-debug: add dma_debug_driver kernel command line

This patch add the dma_debug_driver= boot parameter to enable the driver
filter for early boot.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbf..b3f1314 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -646,6 +646,13 @@ and is between 256 and 4096 characters. It is defined in the file
 			DMA-API debugging code disables itself because the
 			architectural default is too low.
 
+	dma_debug_driver=<driver_name>
+			With this option the DMA-API debugging driver
+			filter feature can be enabled at boot time. Just
+			pass the driver to filter for as the parameter.
+			The filter can be disabled or changed to another
+			driver later using sysfs.
+
 	dscc4.setup=	[NET]
 
 	dtc3181e=	[HW,SCSI]
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index c6330b1..d0618aa 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1109,3 +1109,21 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
 
+static int __init dma_debug_driver_setup(char *str)
+{
+	int i;
+
+	for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
+		current_driver_name[i] = *str;
+		if (*str == 0)
+			break;
+	}
+
+	if (current_driver_name[0])
+		printk(KERN_INFO "DMA-API: enable driver filter for "
+				 "driver [%s]\n", current_driver_name);
+
+
+	return 1;
+}
+__setup("dma_debug_driver=", dma_debug_driver_setup);
-- 
cgit v0.10.2


From 016ea6874a6d58df85b54f56997d26df13c307b2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 22 May 2009 21:57:23 +0200
Subject: dma-debug: add documentation for the driver filter

This patch adds the driver filter feature to the dma-debug
documentation.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d9aa43d..25fb8bc 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -704,12 +704,24 @@ this directory the following files can currently be found:
 				The current number of free dma_debug_entries
 				in the allocator.
 
+	dma-api/driver-filter
+				You can write a name of a driver into this file
+				to limit the debug output to requests from that
+				particular driver. Write an empty string to
+				that file to disable the filter and see
+				all errors again.
+
 If you have this code compiled into your kernel it will be enabled by default.
 If you want to boot without the bookkeeping anyway you can provide
 'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
 Notice that you can not enable it again at runtime. You have to reboot to do
 so.
 
+If you want to see debug messages only for a special device driver you can
+specify the dma_debug_driver=<drivername> parameter. This will enable the
+driver filter at boot time. The debug code will only print errors for that
+driver afterwards. This filter can be disabled or changed later using debugfs.
+
 When the code disables itself at runtime this is most likely because it ran
 out of dma_debug_entries. These entries are preallocated at boot. The number
 of preallocated entries is defined per architecture. If it is too low for you
-- 
cgit v0.10.2


From 4593bba8679b925a056f84edac061676e7eda71c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 15:34:25 +0200
Subject: perf report: Clean up the default output

 - extra space between columns
 - left-aligned the symbol column
 - moved the no-symbols printout to -v

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 0558c1e..19c1e05 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -84,24 +84,25 @@ static struct dso *dsos__findnew(const char *name)
 	struct dso *dso = dsos__find(name);
 	int nr;
 
-	if (dso == NULL) {
-		dso = dso__new(name, 0);
-		if (!dso)
-			goto out_delete_dso;
-
-		nr = dso__load(dso, NULL);
-		if (nr < 0) {
-			fprintf(stderr, "Failed to open: %s\n", name);
-			goto out_delete_dso;
-		}
-		if (!nr) {
-			fprintf(stderr,
-		"Failed to find debug symbols for: %s, maybe install a debug package?\n",
-					name);
-		}
+	if (dso)
+		return dso;
+
+	dso = dso__new(name, 0);
+	if (!dso)
+		goto out_delete_dso;
 
-		dsos__add(dso);
+	nr = dso__load(dso, NULL);
+	if (nr < 0) {
+		fprintf(stderr, "Failed to open: %s\n", name);
+		goto out_delete_dso;
 	}
+	if (!nr && verbose) {
+		fprintf(stderr,
+		"No symbols found in: %s, maybe install a debug package?\n",
+				name);
+	}
+
+	dsos__add(dso);
 
 	return dso;
 
@@ -302,11 +303,11 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__thread_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, " %16s:%5d", self->thread->comm ?: "", self->thread->pid);
+	return fprintf(fp, "  %16s:%5d", self->thread->comm ?: "", self->thread->pid);
 }
 
 static struct sort_entry sort_thread = {
-	.header = "         Command: Pid ",
+	.header = "          Command: Pid ",
 	.cmp	= sort__thread_cmp,
 	.print	= sort__thread_print,
 };
@@ -332,11 +333,11 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__comm_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, " %16s", self->thread->comm ?: "<unknown>");
+	return fprintf(fp, "  %16s", self->thread->comm ?: "<unknown>");
 }
 
 static struct sort_entry sort_comm = {
-	.header = "         Command",
+	.header = "          Command",
 	.cmp	= sort__comm_cmp,
 	.print	= sort__comm_print,
 };
@@ -362,11 +363,11 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__dso_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, " %64s", self->dso ? self->dso->name : "<unknown>");
+	return fprintf(fp, "  %s", self->dso ? self->dso->name : "<unknown>");
 }
 
 static struct sort_entry sort_dso = {
-	.header = "                                                    Shared Object",
+	.header = " Shared Object",
 	.cmp	= sort__dso_cmp,
 	.print	= sort__dso_print,
 };
@@ -391,9 +392,9 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 	size_t ret = 0;
 
 	if (verbose)
-		ret += fprintf(fp, " %#018llx", (unsigned long long)self->ip);
+		ret += fprintf(fp, "  %#018llx", (unsigned long long)self->ip);
 
-	ret += fprintf(fp, " %s: %s",
+	ret += fprintf(fp, "  %s: %s",
 			self->dso ? self->dso->name : "<unknown>",
 			self->sym ? self->sym->name : "<unknown>");
 
@@ -401,7 +402,7 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_sym = {
-	.header = "Shared Object: Symbol",
+	.header = " Shared Object: Symbol",
 	.cmp	= sort__sym_cmp,
 	.print	= sort__sym_print,
 };
@@ -595,8 +596,8 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
 		int i;
 
-		fprintf(fp, " ");
-		for (i = 0; i < strlen(se->header); i++)
+		fprintf(fp, "  ");
+		for (i = 0; i < strlen(se->header)-1; i++)
 			fprintf(fp, ".");
 	}
 	fprintf(fp, "\n");
-- 
cgit v0.10.2


From 3f08a0e4ab1ce65bb882f6425ff482e5db025f78 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 2 Jun 2009 17:39:52 +0200
Subject: ALSA: bt87x - Add a quirk entry for Askey Computer Corp.
 MagicTView'99

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
index ce3f2e9..24585c6 100644
--- a/sound/pci/bt87x.c
+++ b/sound/pci/bt87x.c
@@ -810,6 +810,8 @@ static struct pci_device_id snd_bt87x_ids[] = {
 	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x107d, 0x6606, GENERIC),
 	/* Voodoo TV 200 */
 	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x121a, 0x3000, GENERIC),
+	/* Askey Computer Corp. MagicTView'99 */
+	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x144f, 0x3000, GENERIC),
 	/* AVerMedia Studio No. 103, 203, ...? */
 	BT_DEVICE(PCI_DEVICE_ID_BROOKTREE_878, 0x1461, 0x0003, AVPHONE98),
 	/* Prolink PixelView PV-M4900 */
-- 
cgit v0.10.2


From 50b64e3b77d569c217a48e078cd565dbd6462ad0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 2 Jun 2009 06:55:20 -0400
Subject: cifs: fix IPv6 address length check

For IPv6 the userspace mount helper sends an address in the "ip="
option.  This check fails if the length is > 35 characters. I have no
idea where the magic 35 character limit came from, but it's clearly not
enough for IPv6. Fix it by making it use the INET6_ADDRSTRLEN #define.

While we're at it, use the same #define for the address length in SPNEGO
upcalls.

Reported-by: Charles R. Anderson <cra@wpi.edu>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 67bf93a..4a4581c 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <keys/user-type.h>
 #include <linux/key-type.h>
+#include <linux/inet.h>
 #include "cifsglob.h"
 #include "cifs_spnego.h"
 #include "cifs_debug.h"
@@ -73,9 +74,6 @@ struct key_type cifs_spnego_key_type = {
  * strlen(";sec=ntlmsspi") */
 #define MAX_MECH_STR_LEN	13
 
-/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */
-#define MAX_IPV6_ADDR_LEN	43
-
 /* strlen of "host=" */
 #define HOST_KEY_LEN		5
 
@@ -102,7 +100,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
 	   host=hostname sec=mechanism uid=0xFF user=username */
 	desc_len = MAX_VER_STR_LEN +
 		   HOST_KEY_LEN + strlen(hostname) +
-		   IP_KEY_LEN + MAX_IPV6_ADDR_LEN +
+		   IP_KEY_LEN + INET6_ADDRSTRLEN +
 		   MAX_MECH_STR_LEN +
 		   UID_KEY_LEN + (sizeof(uid_t) * 2) +
 		   USER_KEY_LEN + strlen(sesInfo->userName) + 1;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8ae563f..74b5a87 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -35,6 +35,7 @@
 #include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
+#include <linux/inet.h>
 #include <net/ipv6.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
@@ -960,7 +961,8 @@ cifs_parse_mount_options(char *options, const char *devname,
 		} else if (strnicmp(data, "ip", 2) == 0) {
 			if (!value || !*value) {
 				vol->UNCip = NULL;
-			} else if (strnlen(value, 35) < 35) {
+			} else if (strnlen(value, INET6_ADDRSTRLEN) <
+							INET6_ADDRSTRLEN) {
 				vol->UNCip = value;
 			} else {
 				printk(KERN_WARNING "CIFS: ip address "
-- 
cgit v0.10.2


From 179c498ae2998461fe436437a74dc29036fc7dcc Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Jun 2009 12:03:19 -0400
Subject: function-graph: only allocate init tasks if it was not already done

When the function graph tracer is enabled, it calls the initialization
needed for the init tasks that would be called on all created tasks.

The problem is that this is called every time the function graph tracer
is enabled, and the ret_stack is allocated for the idle tasks each time.
Thus, the old ret_stack is lost and a memory leak is created.

This is also dangerous because if an interrupt happened on another CPU
with the init task and the ret_stack is replaced, we then lose all the
return pointers for the interrupt, and a crash would take place.

[ Impact: fix memory leak and possible crash due to race ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f1ed080..ebff62e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2643,8 +2643,10 @@ static int start_graph_tracing(void)
 		return -ENOMEM;
 
 	/* The cpu_boot init_task->ret_stack will never be freed */
-	for_each_online_cpu(cpu)
-		ftrace_graph_init_task(idle_task(cpu));
+	for_each_online_cpu(cpu) {
+		if (!idle_task(cpu)->ret_stack)
+			ftrace_graph_init_task(idle_task(cpu));
+	}
 
 	do {
 		ret = alloc_retstack_tasklist(ret_stack_list);
-- 
cgit v0.10.2


From d3ae33efb8e2f277f9007eb060c9d0b91ab38ae1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 2 Jun 2009 12:34:31 +0100
Subject: pata_netcell: LBA48 force identify bits correct

This matches Bartlomiej's patch for ide_pci_generic:
c339dfdd65b52bfd947ab29d1210314a2f6d622d

In the libata case netcell has its own mini driver. I suspect this fix is
actually only needed for some firmware revs but it does no harm either way.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index bdb2369..9a69809 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c
@@ -20,13 +20,24 @@
 
 /* No PIO or DMA methods needed for this device */
 
+static unsigned int netcell_read_id(struct ata_device *adev,
+					struct ata_taskfile *tf, u16 *id)
+{
+	unsigned int err_mask = ata_do_dev_read_id(adev, tf, id);
+	/* Firmware forgets to mark words 85-87 valid */
+	if (err_mask == 0)
+		id[ATA_ID_CSF_DEFAULT] |= 0x0400;
+	return err_mask;
+}
+
 static struct scsi_host_template netcell_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
 };
 
 static struct ata_port_operations netcell_ops = {
 	.inherits	= &ata_bmdma_port_ops,
-	.cable_detect		= ata_cable_80wire,
+	.cable_detect	= ata_cable_80wire,
+	.read_id	= netcell_read_id,
 };
 
 
-- 
cgit v0.10.2


From 05ad709d04799125ed85dd816fdb558258102172 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@etchedpixels.co.uk>
Date: Tue, 2 Jun 2009 16:58:10 +0100
Subject: parport: quickfix the proc registration bug

Ideally we should have a directory of drivers and a link to the 'active'
driver. For now just show the first device which is effectively the existing
semantics without a warning.

This is an update on the original buggy patch that I then forgot to
resubmit. Confusingly it was proposed by Red Hat, written by Etched Pixels
fixed and submitted by Intel ...

Resolves-Bug: http://bugzilla.kernel.org/show_bug.cgi?id=9749
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 0ebca45..dffa5d4 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -614,7 +614,10 @@ parport_register_device(struct parport *port, const char *name,
 	 * pardevice fields. -arca
 	 */
 	port->ops->init_state(tmp, tmp->state);
-	parport_device_proc_register(tmp);
+	if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) {
+		port->proc_device = tmp;
+		parport_device_proc_register(tmp);
+	}
 	return tmp;
 
  out_free_all:
@@ -646,10 +649,14 @@ void parport_unregister_device(struct pardevice *dev)
 	}
 #endif
 
-	parport_device_proc_unregister(dev);
-
 	port = dev->port->physport;
 
+	if (port->proc_device == dev) {
+		port->proc_device = NULL;
+		clear_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags);
+		parport_device_proc_unregister(dev);
+	}
+
 	if (port->cad == dev) {
 		printk(KERN_DEBUG "%s: %s forgot to release port\n",
 		       port->name, dev->name);
diff --git a/include/linux/parport.h b/include/linux/parport.h
index e1f83c5..38a423e 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -324,6 +324,10 @@ struct parport {
 	int spintime;
 	atomic_t ref_count;
 
+	unsigned long devflags;
+#define PARPORT_DEVPROC_REGISTERED	0
+	struct pardevice *proc_device;	/* Currently register proc device */
+
 	struct list_head full_list;
 	struct parport *slaves[3];
 };
-- 
cgit v0.10.2


From 8d9fb9bb89f44a4a06d0e001b16d983b28d571bc Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Sun, 31 May 2009 12:57:22 +0200
Subject: Support for lilly-1131 modules and baseboards [v2]

On Thu, May 28, 2009 at 08:42:23PM +0200, Sascha Hauer wrote:
> > > Mail-Followup-To: Daniel Mack <daniel@caiaq.de>,
> > > 	linux-arm-kernel@lists.arm.linux.org.uk
> >
> > ... which causes my mutt to only reply to the list.
>
> Ah, ok. /me hacking in muttrc... Does it work now?

Yep :)

> >  	mxc_register_device(&mxc_uart_device0, &uart_pdata);
> > +	mxc_register_device(&mxc_uart_device1, &uart_pdata);
> > +	mxc_register_device(&mxc_uart_device2, &uart_pdata);
>
> What about the RXD3/TXD3 pins?

You're right - I got the IOMUX tables wrong and thought UART0 pins are
selected unconditionally. But as it turns out TXD1/RXD1 is for UART0
(mxc_uart_device0), TXD2/RXD2 for UART1 (mxc_uart_device1) etc.

Below is a new patch.

Thanks,
Daniel

From e7eb5fa0fed09d667a4b2f168fe466e2cc645abb Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 27 May 2009 12:22:51 +0200
Subject: [PATCH] ARM: MX3: add two more UARTs to lilly-1131-db

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/mx31lilly-db.c b/arch/arm/mach-mx3/mx31lilly-db.c
index 99b0a6e..3b3a78f 100644
--- a/arch/arm/mach-mx3/mx31lilly-db.c
+++ b/arch/arm/mach-mx3/mx31lilly-db.c
@@ -56,6 +56,14 @@ static unsigned int lilly_db_board_pins[] __initdata = {
 	MX31_PIN_RTS1__RTS1,
 	MX31_PIN_TXD1__TXD1,
 	MX31_PIN_RXD1__RXD1,
+	MX31_PIN_CTS2__CTS2,
+	MX31_PIN_RTS2__RTS2,
+	MX31_PIN_TXD2__TXD2,
+	MX31_PIN_RXD2__RXD2,
+	MX31_PIN_CSPI3_MOSI__RXD3,
+	MX31_PIN_CSPI3_MISO__TXD3,
+	MX31_PIN_CSPI3_SCLK__RTS3,
+	MX31_PIN_CSPI3_SPI_RDY__CTS3,
 	MX31_PIN_SD1_DATA3__SD1_DATA3,
 	MX31_PIN_SD1_DATA2__SD1_DATA2,
 	MX31_PIN_SD1_DATA1__SD1_DATA1,
@@ -200,6 +208,8 @@ void __init mx31lilly_db_init(void)
 					ARRAY_SIZE(lilly_db_board_pins),
 					"development board pins");
 	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	mxc_register_device(&mxc_uart_device1, &uart_pdata);
+	mxc_register_device(&mxc_uart_device2, &uart_pdata);
 	mxc_register_device(&mxcsdhc_device0, &mmc_pdata);
 	mx31lilly_init_fb();
 }
-- 
cgit v0.10.2


From 82310a3272d5a2a7652f5649ad8a55f58c8f74d9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Jun 2009 12:26:07 -0400
Subject: function-graph: enable the stack after initialization of other
 variables

The function graph tracer checks if the task_struct has ret_stack defined
to know if it is OK or not to use it. The initialization is done for
all tasks by one process, but the idle tasks use the same initialization
used by new tasks.

If an interrupt happens on an idle task that just had the ret_stack
created, but before the rest of the initialization took place, then
we can corrupt the return address of the functions.

This patch moves the setting of the task_struct's ret_stack to after
the other variables have been initialized.

[ Impact: prevent kernel panic on idle task when starting function graph ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ebff62e..20e0660 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2739,15 +2739,20 @@ void unregister_ftrace_graph(void)
 void ftrace_graph_init_task(struct task_struct *t)
 {
 	if (atomic_read(&ftrace_graph_active)) {
-		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+		struct ftrace_ret_stack *ret_stack;
+
+		ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
 				* sizeof(struct ftrace_ret_stack),
 				GFP_KERNEL);
-		if (!t->ret_stack)
+		if (!ret_stack)
 			return;
 		t->curr_ret_stack = -1;
 		atomic_set(&t->tracing_graph_pause, 0);
 		atomic_set(&t->trace_overrun, 0);
 		t->ftrace_timestamp = 0;
+		/* make curr_ret_stack visable before we add the ret_stack */
+		smp_wmb();
+		t->ret_stack = ret_stack;
 	} else
 		t->ret_stack = NULL;
 }
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d28687e..baeb5fe 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 	if (!current->ret_stack)
 		return -EBUSY;
 
+	/*
+	 * We must make sure the ret_stack is tested before we read
+	 * anything else.
+	 */
+	smp_rmb();
+
 	/* The return trace stack is full */
 	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
 		atomic_inc(&current->trace_overrun);
-- 
cgit v0.10.2


From 26c01624a2a40f8a4ddf6449b65c9b1c418d0e72 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Jun 2009 14:01:19 -0400
Subject: function-graph: add memory barriers for accessing task's ret_stack

The code that handles the tasks ret_stack allocation for every task
assumes that only an interrupt can cause issues (even though interrupts
are disabled).

In reality, the code is allocating the ret_stack for tasks that may be
running on other CPUs and there are not efficient memory barriers to
handle this case.

[ Impact: prevent crash due to using of uninitialized ret_stack variables ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 20e0660..1664d3f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2580,12 +2580,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 		}
 
 		if (t->ret_stack == NULL) {
-			t->curr_ret_stack = -1;
-			/* Make sure IRQs see the -1 first: */
-			barrier();
-			t->ret_stack = ret_stack_list[start++];
 			atomic_set(&t->tracing_graph_pause, 0);
 			atomic_set(&t->trace_overrun, 0);
+			t->curr_ret_stack = -1;
+			/* Make sure the tasks see the -1 first: */
+			smp_wmb();
+			t->ret_stack = ret_stack_list[start++];
 		}
 	} while_each_thread(g, t);
 
-- 
cgit v0.10.2


From 29c2810276fbf8419c9b4d942b99c873e9b7640a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:56:26 +0200
Subject: perf_counter tools: Remove the last nmi bits

Everything is nmi these days, remove the userspace bits so that
the kernel can drop the interface.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 5886791..644f850 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -84,7 +84,6 @@ static void create_perfstat_counter(int counter)
 	memset(&hw_event, 0, sizeof(hw_event));
 	hw_event.config		= event_id[counter];
 	hw_event.record_type	= 0;
-	hw_event.nmi		= 1;
 	hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
 	hw_event.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
 
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 24a8879..a2cff7b 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -581,7 +581,6 @@ static int __cmd_top(void)
 			hw_event.config		= event_id[counter];
 			hw_event.irq_period	= event_count[counter];
 			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-			hw_event.nmi		= 1;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
 			hw_event.freq		= freq;
-- 
cgit v0.10.2


From 53e111a730ea8b002d57dd226098c12789993329 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 17:01:58 +0200
Subject: x86: Fix atomic_long_xchg() on 64bit

Apparently I'm the first to use it :-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 3673a13..81d3be4 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 #define atomic_long_cmpxchg(l, old, new) \
 	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
 #define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(l), (new)))
+	(atomic64_xchg((atomic64_t *)(v), (new)))
 
 #else  /*  BITS_PER_LONG == 64  */
 
-- 
cgit v0.10.2


From 8e5799b1ad2a0567fdfaaf0e91b40efee010f2c1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:08:15 +0200
Subject: perf_counter: Add unique counter id

Stephan raised the issue that we currently cannot distinguish between
similar counters within a group (PERF_RECORD_GROUP uses the config
value as identifier).

Therefore, generate a new ID for each counter using a global u64
sequence counter.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9ec20fc..4845a21 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -114,8 +114,9 @@ enum perf_counter_record_format {
  * in increasing order of bit value, after the counter value.
  */
 enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1,
-	PERF_FORMAT_TOTAL_TIME_RUNNING	=  2,
+	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING	=  1U << 1,
+	PERF_FORMAT_ID			=  1U << 2,
 };
 
 /*
@@ -290,7 +291,7 @@ enum perf_event_type {
 	 *	{ u32			cpu, res; } && PERF_RECORD_CPU
 	 *
 	 *	{ u64			nr;
-	 *	  { u64 event, val; }	cnt[nr];  } && PERF_RECORD_GROUP
+	 *	  { u64 id, val; }	cnt[nr];  } && PERF_RECORD_GROUP
 	 *
 	 *	{ u16			nr,
 	 *				hv,
@@ -503,6 +504,7 @@ struct perf_counter {
 	struct rcu_head			rcu_head;
 
 	struct pid_namespace		*ns;
+	u64				id;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index caa012c..978ecfc 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1510,6 +1510,8 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
+	if (counter->hw_event.read_format & PERF_FORMAT_ID)
+		values[n++] = counter->id;
 	mutex_unlock(&counter->child_mutex);
 
 	if (count < n * sizeof(u64))
@@ -2303,7 +2305,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		u32 pid, tid;
 	} tid_entry;
 	struct {
-		u64 event;
+		u64 id;
 		u64 counter;
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
@@ -2416,7 +2418,7 @@ static void perf_counter_output(struct perf_counter *counter,
 			if (sub != counter)
 				sub->pmu->read(sub);
 
-			group_entry.event = sub->hw_event.config;
+			group_entry.id = sub->id;
 			group_entry.counter = atomic64_read(&sub->count);
 
 			perf_output_put(&handle, group_entry);
@@ -3375,6 +3377,8 @@ done:
 	return counter;
 }
 
+static atomic64_t perf_counter_id;
+
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -3470,6 +3474,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	mutex_unlock(&current->perf_counter_mutex);
 
 	counter->ns = get_pid_ns(current->nsproxy->pid_ns);
+	counter->id = atomic64_inc_return(&perf_counter_id);
 
 	fput_light(counter_file, fput_needed2);
 
-- 
cgit v0.10.2


From b23f3325ed465f1bd914384884269af0d106778c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:13:03 +0200
Subject: perf_counter: Rename various fields

A few renames:

  s/irq_period/sample_period/
  s/irq_freq/sample_freq/
  s/PERF_RECORD_/PERF_SAMPLE_/
  s/record_type/sample_type/

And change both the new sample_type and read_format to u64.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index f96d55f..c963332 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -535,7 +535,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw.irq_period) {
+		if (counter->hw.sample_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -749,12 +749,12 @@ static void power_pmu_unthrottle(struct perf_counter *counter)
 	s64 val, left;
 	unsigned long flags;
 
-	if (!counter->hw.idx || !counter->hw.irq_period)
+	if (!counter->hw.idx || !counter->hw.sample_period)
 		return;
 	local_irq_save(flags);
 	perf_disable();
 	power_pmu_read(counter);
-	left = counter->hw.irq_period;
+	left = counter->hw.sample_period;
 	val = 0;
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
@@ -789,7 +789,7 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
 	if (counter->hw_event.exclude_user
 	    || counter->hw_event.exclude_kernel
 	    || counter->hw_event.exclude_hv
-	    || counter->hw_event.irq_period)
+	    || counter->hw_event.sample_period)
 		return 0;
 
 	if (ppmu->limited_pmc_event(ev))
@@ -925,7 +925,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.sample_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -958,7 +958,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
-	u64 period = counter->hw.irq_period;
+	u64 period = counter->hw.sample_period;
 	s64 prev, delta, left;
 	int record = 0;
 	u64 addr, mmcra, sdsync;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 316b0c9..ec06aa5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -290,11 +290,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	hwc->nmi	= 1;
 	hw_event->nmi	= 1;
 
-	if (!hwc->irq_period)
-		hwc->irq_period = x86_pmu.max_period;
+	if (!hwc->sample_period)
+		hwc->sample_period = x86_pmu.max_period;
 
 	atomic64_set(&hwc->period_left,
-			min(x86_pmu.max_period, hwc->irq_period));
+			min(x86_pmu.max_period, hwc->sample_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -462,7 +462,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = min(x86_pmu.max_period, hwc->irq_period);
+	s64 period = min(x86_pmu.max_period, hwc->sample_period);
 	int err;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4845a21..1fcd3cc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -94,18 +94,18 @@ enum sw_event_ids {
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
 /*
- * Bits that can be set in hw_event.record_type to request information
+ * Bits that can be set in hw_event.sample_type to request information
  * in the overflow packets.
  */
-enum perf_counter_record_format {
-	PERF_RECORD_IP			= 1U << 0,
-	PERF_RECORD_TID			= 1U << 1,
-	PERF_RECORD_TIME		= 1U << 2,
-	PERF_RECORD_ADDR		= 1U << 3,
-	PERF_RECORD_GROUP		= 1U << 4,
-	PERF_RECORD_CALLCHAIN		= 1U << 5,
-	PERF_RECORD_CONFIG		= 1U << 6,
-	PERF_RECORD_CPU			= 1U << 7,
+enum perf_counter_sample_format {
+	PERF_SAMPLE_IP			= 1U << 0,
+	PERF_SAMPLE_TID			= 1U << 1,
+	PERF_SAMPLE_TIME		= 1U << 2,
+	PERF_SAMPLE_ADDR		= 1U << 3,
+	PERF_SAMPLE_GROUP		= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
+	PERF_SAMPLE_CONFIG		= 1U << 6,
+	PERF_SAMPLE_CPU			= 1U << 7,
 };
 
 /*
@@ -132,12 +132,12 @@ struct perf_counter_hw_event {
 	__u64			config;
 
 	union {
-		__u64		irq_period;
-		__u64		irq_freq;
+		__u64		sample_period;
+		__u64		sample_freq;
 	};
 
-	__u32			record_type;
-	__u32			read_format;
+	__u64			sample_type;
+	__u64			read_format;
 
 	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
@@ -262,7 +262,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
-	 *	u64				irq_period;
+	 *	u64				sample_period;
 	 * };
 	 */
 	PERF_EVENT_PERIOD		= 4,
@@ -363,7 +363,7 @@ struct hw_perf_counter {
 		};
 	};
 	atomic64_t			prev_count;
-	u64				irq_period;
+	u64				sample_period;
 	atomic64_t			period_left;
 	u64				interrupts;
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 978ecfc..5ecd998 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1186,7 +1186,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period);
 static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
-	u64 interrupts, irq_period;
+	u64 interrupts, sample_period;
 	u64 events, period;
 	s64 delta;
 
@@ -1204,23 +1204,23 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 			interrupts = 2*sysctl_perf_counter_limit/HZ;
 		}
 
-		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+		if (!counter->hw_event.freq || !counter->hw_event.sample_freq)
 			continue;
 
-		events = HZ * interrupts * counter->hw.irq_period;
-		period = div64_u64(events, counter->hw_event.irq_freq);
+		events = HZ * interrupts * counter->hw.sample_period;
+		period = div64_u64(events, counter->hw_event.sample_freq);
 
-		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta = (s64)(1 + period - counter->hw.sample_period);
 		delta >>= 1;
 
-		irq_period = counter->hw.irq_period + delta;
+		sample_period = counter->hw.sample_period + delta;
 
-		if (!irq_period)
-			irq_period = 1;
+		if (!sample_period)
+			sample_period = 1;
 
-		perf_log_period(counter, irq_period);
+		perf_log_period(counter, sample_period);
 
-		counter->hw.irq_period = irq_period;
+		counter->hw.sample_period = sample_period;
 	}
 	spin_unlock(&ctx->lock);
 }
@@ -2297,7 +2297,7 @@ static void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs, u64 addr)
 {
 	int ret;
-	u64 record_type = counter->hw_event.record_type;
+	u64 sample_type = counter->hw_event.sample_type;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	u64 ip;
@@ -2321,61 +2321,61 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.misc = PERF_EVENT_MISC_OVERFLOW;
 	header.misc |= perf_misc_flags(regs);
 
-	if (record_type & PERF_RECORD_IP) {
+	if (sample_type & PERF_SAMPLE_IP) {
 		ip = perf_instruction_pointer(regs);
-		header.type |= PERF_RECORD_IP;
+		header.type |= PERF_SAMPLE_IP;
 		header.size += sizeof(ip);
 	}
 
-	if (record_type & PERF_RECORD_TID) {
+	if (sample_type & PERF_SAMPLE_TID) {
 		/* namespace issues */
 		tid_entry.pid = perf_counter_pid(counter, current);
 		tid_entry.tid = perf_counter_tid(counter, current);
 
-		header.type |= PERF_RECORD_TID;
+		header.type |= PERF_SAMPLE_TID;
 		header.size += sizeof(tid_entry);
 	}
 
-	if (record_type & PERF_RECORD_TIME) {
+	if (sample_type & PERF_SAMPLE_TIME) {
 		/*
 		 * Maybe do better on x86 and provide cpu_clock_nmi()
 		 */
 		time = sched_clock();
 
-		header.type |= PERF_RECORD_TIME;
+		header.type |= PERF_SAMPLE_TIME;
 		header.size += sizeof(u64);
 	}
 
-	if (record_type & PERF_RECORD_ADDR) {
-		header.type |= PERF_RECORD_ADDR;
+	if (sample_type & PERF_SAMPLE_ADDR) {
+		header.type |= PERF_SAMPLE_ADDR;
 		header.size += sizeof(u64);
 	}
 
-	if (record_type & PERF_RECORD_CONFIG) {
-		header.type |= PERF_RECORD_CONFIG;
+	if (sample_type & PERF_SAMPLE_CONFIG) {
+		header.type |= PERF_SAMPLE_CONFIG;
 		header.size += sizeof(u64);
 	}
 
-	if (record_type & PERF_RECORD_CPU) {
-		header.type |= PERF_RECORD_CPU;
+	if (sample_type & PERF_SAMPLE_CPU) {
+		header.type |= PERF_SAMPLE_CPU;
 		header.size += sizeof(cpu_entry);
 
 		cpu_entry.cpu = raw_smp_processor_id();
 	}
 
-	if (record_type & PERF_RECORD_GROUP) {
-		header.type |= PERF_RECORD_GROUP;
+	if (sample_type & PERF_SAMPLE_GROUP) {
+		header.type |= PERF_SAMPLE_GROUP;
 		header.size += sizeof(u64) +
 			counter->nr_siblings * sizeof(group_entry);
 	}
 
-	if (record_type & PERF_RECORD_CALLCHAIN) {
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
 
-			header.type |= PERF_RECORD_CALLCHAIN;
+			header.type |= PERF_SAMPLE_CALLCHAIN;
 			header.size += callchain_size;
 		}
 	}
@@ -2386,28 +2386,28 @@ static void perf_counter_output(struct perf_counter *counter,
 
 	perf_output_put(&handle, header);
 
-	if (record_type & PERF_RECORD_IP)
+	if (sample_type & PERF_SAMPLE_IP)
 		perf_output_put(&handle, ip);
 
-	if (record_type & PERF_RECORD_TID)
+	if (sample_type & PERF_SAMPLE_TID)
 		perf_output_put(&handle, tid_entry);
 
-	if (record_type & PERF_RECORD_TIME)
+	if (sample_type & PERF_SAMPLE_TIME)
 		perf_output_put(&handle, time);
 
-	if (record_type & PERF_RECORD_ADDR)
+	if (sample_type & PERF_SAMPLE_ADDR)
 		perf_output_put(&handle, addr);
 
-	if (record_type & PERF_RECORD_CONFIG)
+	if (sample_type & PERF_SAMPLE_CONFIG)
 		perf_output_put(&handle, counter->hw_event.config);
 
-	if (record_type & PERF_RECORD_CPU)
+	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
 
 	/*
-	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
+	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
 	 */
-	if (record_type & PERF_RECORD_GROUP) {
+	if (sample_type & PERF_SAMPLE_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
 
@@ -2702,7 +2702,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 }
 
 /*
- * Log irq_period changes so that analyzing tools can re-normalize the
+ * Log sample_period changes so that analyzing tools can re-normalize the
  * event flow.
  */
 
@@ -2725,7 +2725,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 		.period = period,
 	};
 
-	if (counter->hw.irq_period == period)
+	if (counter->hw.sample_period == period)
 		return;
 
 	ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
@@ -2834,7 +2834,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = hwc->sample_period;
 
 	if (unlikely(left <= -period)) {
 		left = period;
@@ -2874,7 +2874,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			ret = HRTIMER_NORESTART;
 	}
 
-	period = max_t(u64, 10000, counter->hw.irq_period);
+	period = max_t(u64, 10000, counter->hw.sample_period);
 	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
@@ -2959,7 +2959,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
 
-	if (counter->hw.irq_period && !neg && regs)
+	if (counter->hw.sample_period && !neg && regs)
 		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
@@ -3080,8 +3080,8 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->irq_period) {
-		u64 period = max_t(u64, 10000, hwc->irq_period);
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
@@ -3092,7 +3092,7 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	if (counter->hw.irq_period)
+	if (counter->hw.sample_period)
 		hrtimer_cancel(&counter->hw.hrtimer);
 	cpu_clock_perf_counter_update(counter);
 }
@@ -3132,8 +3132,8 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	atomic64_set(&hwc->prev_count, now);
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->irq_period) {
-		u64 period = max_t(u64, 10000, hwc->irq_period);
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
@@ -3144,7 +3144,7 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	if (counter->hw.irq_period)
+	if (counter->hw.sample_period)
 		hrtimer_cancel(&counter->hw.hrtimer);
 	task_clock_perf_counter_update(counter, counter->ctx->time);
 
@@ -3223,7 +3223,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
-	counter->hw.irq_period = counter->hw_event.irq_period;
+	counter->hw.sample_period = counter->hw_event.sample_period;
 
 	return &perf_ops_generic;
 }
@@ -3323,15 +3323,15 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	pmu = NULL;
 
 	hwc = &counter->hw;
-	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
+	if (hw_event->freq && hw_event->sample_freq)
+		hwc->sample_period = div64_u64(TICK_NSEC, hw_event->sample_freq);
 	else
-		hwc->irq_period = hw_event->irq_period;
+		hwc->sample_period = hw_event->sample_period;
 
 	/*
-	 * we currently do not support PERF_RECORD_GROUP on inherited counters
+	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
 	 */
-	if (hw_event->inherit && (hw_event->record_type & PERF_RECORD_GROUP))
+	if (hw_event->inherit && (hw_event->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
 	if (perf_event_raw(hw_event)) {
-- 
cgit v0.10.2


From 8a016db386195b193e2a8aeddff9fe937dcb7a40 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:27:45 +0200
Subject: perf_counter: Remove the last nmi/irq bits

IRQ (non-NMI) sampling is not used anymore - remove the last few bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ec06aa5..9e144fb 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -284,12 +284,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (!hw_event->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
-	/*
-	 * Use NMI events all the time:
-	 */
-	hwc->nmi	= 1;
-	hw_event->nmi	= 1;
-
 	if (!hwc->sample_period)
 		hwc->sample_period = x86_pmu.max_period;
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1fcd3cc..cef9931 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -140,7 +140,6 @@ struct perf_counter_hw_event {
 	__u64			read_format;
 
 	__u64			disabled       :  1, /* off by default        */
-				nmi	       :  1, /* NMI sampling          */
 				inherit	       :  1, /* children inherit it   */
 				pinned	       :  1, /* must always be on PMU */
 				exclusive      :  1, /* only group on PMU     */
@@ -153,7 +152,7 @@ struct perf_counter_hw_event {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 51;
+				__reserved_1   : 52;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
@@ -354,7 +353,6 @@ struct hw_perf_counter {
 			u64				config;
 			unsigned long			config_base;
 			unsigned long			counter_base;
-			int				nmi;
 			int				idx;
 		};
 		union { /* software */
-- 
cgit v0.10.2


From e4abb5d4f7ddabc1fc7c392cf0a10d8e5868c9ca Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 16:08:20 +0200
Subject: perf_counter: x86: Emulate longer sample periods

Do as Power already does, emulate sample periods up to 2^63-1 by
composing them of smaller values limited by hardware capabilities.
Only once we wrap the software period do we generate an overflow
event.

Just 10 lines of new code.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9e144fb..904571b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -287,8 +287,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (!hwc->sample_period)
 		hwc->sample_period = x86_pmu.max_period;
 
-	atomic64_set(&hwc->period_left,
-			min(x86_pmu.max_period, hwc->sample_period));
+	atomic64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -451,13 +450,13 @@ static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
  * Set the next IRQ period, based on the hwc->period_left value.
  * To be called with the counter disabled in hw:
  */
-static void
+static int
 x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = min(x86_pmu.max_period, hwc->sample_period);
-	int err;
+	s64 period = hwc->sample_period;
+	int err, ret = 0;
 
 	/*
 	 * If we are way outside a reasoable range then just skip forward:
@@ -465,11 +464,13 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	if (unlikely(left <= -period)) {
 		left = period;
 		atomic64_set(&hwc->period_left, left);
+		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
 		atomic64_set(&hwc->period_left, left);
+		ret = 1;
 	}
 	/*
 	 * Quirk: certain CPUs dont like it if just 1 event is left:
@@ -477,6 +478,9 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	if (unlikely(left < 2))
 		left = 2;
 
+	if (left > x86_pmu.max_period)
+		left = x86_pmu.max_period;
+
 	per_cpu(prev_left[idx], smp_processor_id()) = left;
 
 	/*
@@ -487,6 +491,8 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 
 	err = checking_wrmsrl(hwc->counter_base + idx,
 			     (u64)(-left) & x86_pmu.counter_mask);
+
+	return ret;
 }
 
 static inline void
@@ -706,16 +712,19 @@ static void x86_pmu_disable(struct perf_counter *counter)
  * Save and restart an expired counter. Called by NMI contexts,
  * so it has to be careful about preempting normal counter ops:
  */
-static void intel_pmu_save_and_restart(struct perf_counter *counter)
+static int intel_pmu_save_and_restart(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	int idx = hwc->idx;
+	int ret;
 
 	x86_perf_counter_update(counter, hwc, idx);
-	x86_perf_counter_set_period(counter, hwc, idx);
+	ret = x86_perf_counter_set_period(counter, hwc, idx);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		intel_pmu_enable_counter(hwc, idx);
+
+	return ret;
 }
 
 static void intel_pmu_reset(void)
@@ -782,7 +791,9 @@ again:
 		if (!test_bit(bit, cpuc->active_mask))
 			continue;
 
-		intel_pmu_save_and_restart(counter);
+		if (!intel_pmu_save_and_restart(counter))
+			continue;
+
 		if (perf_counter_overflow(counter, nmi, regs, 0))
 			intel_pmu_disable_counter(&counter->hw, bit);
 	}
@@ -824,9 +835,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 			continue;
 
 		/* counter overflow */
-		x86_perf_counter_set_period(counter, hwc, idx);
 		handled = 1;
 		inc_irq_stat(apic_perf_irqs);
+		if (!x86_perf_counter_set_period(counter, hwc, idx))
+			continue;
+
 		if (perf_counter_overflow(counter, nmi, regs, 0))
 			amd_pmu_disable_counter(hwc, idx);
 	}
-- 
cgit v0.10.2


From 8e3747c13c39246c7e46def7cf495d9d21d4c5f9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 16:16:02 +0200
Subject: perf_counter: Change data head from u32 to u64

Since some people worried that 4G might not be a large enough
as an mmap data window, extend it to 64 bit for capable
platforms.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index cef9931..c046f7d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -212,7 +212,7 @@ struct perf_counter_mmap_page {
 	 * User-space reading this value should issue an rmb(), on SMP capable
 	 * platforms, after reading this value -- see perf_counter_wakeup().
 	 */
-	__u32   data_head;		/* head in the data section */
+	__u64   data_head;		/* head in the data section */
 };
 
 #define PERF_EVENT_MISC_CPUMODE_MASK	(3 << 0)
@@ -397,10 +397,11 @@ struct perf_mmap_data {
 	int				nr_locked;	/* nr pages mlocked  */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
-	atomic_t			head;		/* write position    */
 	atomic_t			events;		/* event limit       */
 
-	atomic_t			done_head;	/* completed head    */
+	atomic_long_t			head;		/* write position    */
+	atomic_long_t			done_head;	/* completed head    */
+
 	atomic_t			lock;		/* concurrent writes */
 
 	atomic_t			wakeup;		/* needs a wakeup    */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5ecd998..3f11a2b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2067,8 +2067,8 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 struct perf_output_handle {
 	struct perf_counter	*counter;
 	struct perf_mmap_data	*data;
-	unsigned int		offset;
-	unsigned int		head;
+	unsigned long		head;
+	unsigned long		offset;
 	int			nmi;
 	int			overflow;
 	int			locked;
@@ -2122,7 +2122,8 @@ static void perf_output_lock(struct perf_output_handle *handle)
 static void perf_output_unlock(struct perf_output_handle *handle)
 {
 	struct perf_mmap_data *data = handle->data;
-	int head, cpu;
+	unsigned long head;
+	int cpu;
 
 	data->done_head = data->head;
 
@@ -2135,7 +2136,7 @@ again:
 	 * before we publish the new head, matched by a rmb() in userspace when
 	 * reading this position.
 	 */
-	while ((head = atomic_xchg(&data->done_head, 0)))
+	while ((head = atomic_long_xchg(&data->done_head, 0)))
 		data->user_page->data_head = head;
 
 	/*
@@ -2148,7 +2149,7 @@ again:
 	/*
 	 * Therefore we have to validate we did not indeed do so.
 	 */
-	if (unlikely(atomic_read(&data->done_head))) {
+	if (unlikely(atomic_long_read(&data->done_head))) {
 		/*
 		 * Since we had it locked, we can lock it again.
 		 */
@@ -2195,7 +2196,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
-	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
+	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
 
 	handle->offset	= offset;
 	handle->head	= head;
@@ -2246,7 +2247,7 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	 * Check we didn't copy past our reservation window, taking the
 	 * possible unsigned int wrap into account.
 	 */
-	WARN_ON_ONCE(((int)(handle->head - handle->offset)) < 0);
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
 }
 
 #define perf_output_put(handle, x) \
-- 
cgit v0.10.2


From 08247e31ca79b8f02cce47b7e8120797a8726606 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 16:46:57 +0200
Subject: perf_counter: Add ioctl for changing the sample period/frequency

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c046f7d..45bdd3b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -164,10 +164,11 @@ struct perf_counter_hw_event {
 /*
  * Ioctls that can be done on a perf counter fd:
  */
-#define PERF_COUNTER_IOC_ENABLE		_IOW('$', 0, u32)
-#define PERF_COUNTER_IOC_DISABLE	_IOW('$', 1, u32)
-#define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
-#define PERF_COUNTER_IOC_RESET		_IOW('$', 3, u32)
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3f11a2b..abe2f3b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1604,6 +1604,43 @@ static void perf_counter_for_each(struct perf_counter *counter,
 	mutex_unlock(&counter->child_mutex);
 }
 
+static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long size;
+	int ret = 0;
+	u64 value;
+
+	if (!counter->hw_event.sample_period)
+		return -EINVAL;
+
+	size = copy_from_user(&value, arg, sizeof(value));
+	if (size != sizeof(value))
+		return -EFAULT;
+
+	if (!value)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->lock);
+	if (counter->hw_event.freq) {
+		if (value > sysctl_perf_counter_limit) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		counter->hw_event.sample_freq = value;
+	} else {
+		counter->hw_event.sample_period = value;
+		counter->hw.sample_period = value;
+
+		perf_log_period(counter, value);
+	}
+unlock:
+	spin_unlock_irq(&ctx->lock);
+
+	return ret;
+}
+
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
@@ -1623,6 +1660,10 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	case PERF_COUNTER_IOC_REFRESH:
 		return perf_counter_refresh(counter, arg);
+
+	case PERF_COUNTER_IOC_PERIOD:
+		return perf_counter_period(counter, (u64 __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
-- 
cgit v0.10.2


From 0d48696f87e3618b0d35bd3e4e9d7c188d51e7de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 19:22:16 +0200
Subject: perf_counter: Rename perf_counter_hw_event => perf_counter_attr

The structure isn't hw only and when I read event, I think about those
things that fall out the other end. Rename the thing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index c963332..ea54686 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -262,13 +262,13 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
 		}
 		counter = ctrs[i];
 		if (first) {
-			eu = counter->hw_event.exclude_user;
-			ek = counter->hw_event.exclude_kernel;
-			eh = counter->hw_event.exclude_hv;
+			eu = counter->attr.exclude_user;
+			ek = counter->attr.exclude_kernel;
+			eh = counter->attr.exclude_hv;
 			first = 0;
-		} else if (counter->hw_event.exclude_user != eu ||
-			   counter->hw_event.exclude_kernel != ek ||
-			   counter->hw_event.exclude_hv != eh) {
+		} else if (counter->attr.exclude_user != eu ||
+			   counter->attr.exclude_kernel != ek ||
+			   counter->attr.exclude_hv != eh) {
 			return -EAGAIN;
 		}
 	}
@@ -483,16 +483,16 @@ void hw_perf_enable(void)
 
 	/*
 	 * Add in MMCR0 freeze bits corresponding to the
-	 * hw_event.exclude_* bits for the first counter.
+	 * attr.exclude_* bits for the first counter.
 	 * We have already checked that all counters have the
 	 * same values for these bits as the first counter.
 	 */
 	counter = cpuhw->counter[0];
-	if (counter->hw_event.exclude_user)
+	if (counter->attr.exclude_user)
 		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (counter->hw_event.exclude_kernel)
+	if (counter->attr.exclude_kernel)
 		cpuhw->mmcr[0] |= freeze_counters_kernel;
-	if (counter->hw_event.exclude_hv)
+	if (counter->attr.exclude_hv)
 		cpuhw->mmcr[0] |= MMCR0_FCHV;
 
 	/*
@@ -786,10 +786,10 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
 	int n;
 	u64 alt[MAX_EVENT_ALTERNATIVES];
 
-	if (counter->hw_event.exclude_user
-	    || counter->hw_event.exclude_kernel
-	    || counter->hw_event.exclude_hv
-	    || counter->hw_event.sample_period)
+	if (counter->attr.exclude_user
+	    || counter->attr.exclude_kernel
+	    || counter->attr.exclude_hv
+	    || counter->attr.sample_period)
 		return 0;
 
 	if (ppmu->limited_pmc_event(ev))
@@ -855,13 +855,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if (!perf_event_raw(&counter->hw_event)) {
-		ev = perf_event_id(&counter->hw_event);
+	if (!perf_event_raw(&counter->attr)) {
+		ev = perf_event_id(&counter->attr);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return ERR_PTR(-EOPNOTSUPP);
 		ev = ppmu->generic_events[ev];
 	} else {
-		ev = perf_event_config(&counter->hw_event);
+		ev = perf_event_config(&counter->attr);
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
@@ -872,7 +872,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 	 * the user set it to.
 	 */
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		counter->hw_event.exclude_hv = 0;
+		counter->attr.exclude_hv = 0;
 
 	/*
 	 * If this is a per-task counter, then we can use
@@ -990,7 +990,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	if (record) {
 		addr = 0;
-		if (counter->hw_event.record_type & PERF_RECORD_ADDR) {
+		if (counter->attr.record_type & PERF_RECORD_ADDR) {
 			/*
 			 * The user wants a data address recorded.
 			 * If we're not doing instruction sampling,
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 904571b..e16e8c1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -247,11 +247,11 @@ static inline int x86_pmu_initialized(void)
 }
 
 /*
- * Setup the hardware configuration for a given hw_event_type
+ * Setup the hardware configuration for a given attr_type
  */
 static int __hw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
+	struct perf_counter_attr *attr = &counter->attr;
 	struct hw_perf_counter *hwc = &counter->hw;
 	int err;
 
@@ -279,9 +279,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Count user and OS events unless requested not to.
 	 */
-	if (!hw_event->exclude_user)
+	if (!attr->exclude_user)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!hw_event->exclude_kernel)
+	if (!attr->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
 	if (!hwc->sample_period)
@@ -292,15 +292,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (perf_event_raw(hw_event)) {
-		hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
+	if (perf_event_raw(attr)) {
+		hwc->config |= x86_pmu.raw_event(perf_event_config(attr));
 	} else {
-		if (perf_event_id(hw_event) >= x86_pmu.max_events)
+		if (perf_event_id(attr) >= x86_pmu.max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
+		hwc->config |= x86_pmu.event_map(perf_event_id(attr));
 	}
 
 	counter->destroy = hw_perf_counter_destroy;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 45bdd3b..37d5541 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -22,7 +22,7 @@
  */
 
 /*
- * hw_event.type
+ * attr.type
  */
 enum perf_event_types {
 	PERF_TYPE_HARDWARE		= 0,
@@ -37,10 +37,10 @@ enum perf_event_types {
 };
 
 /*
- * Generalized performance counter event types, used by the hw_event.event_id
+ * Generalized performance counter event types, used by the attr.event_id
  * parameter of the sys_perf_counter_open() syscall:
  */
-enum hw_event_ids {
+enum attr_ids {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
@@ -94,7 +94,7 @@ enum sw_event_ids {
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
 /*
- * Bits that can be set in hw_event.sample_type to request information
+ * Bits that can be set in attr.sample_type to request information
  * in the overflow packets.
  */
 enum perf_counter_sample_format {
@@ -109,7 +109,7 @@ enum perf_counter_sample_format {
 };
 
 /*
- * Bits that can be set in hw_event.read_format to request that
+ * Bits that can be set in attr.read_format to request that
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
  */
@@ -122,7 +122,7 @@ enum perf_counter_read_format {
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
-struct perf_counter_hw_event {
+struct perf_counter_attr {
 	/*
 	 * The MSB of the config word signifies if the rest contains cpu
 	 * specific (raw) counter configuration data, if unset, the next
@@ -323,25 +323,25 @@ enum perf_event_type {
 
 struct task_struct;
 
-static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_raw(struct perf_counter_attr *attr)
 {
-	return hw_event->config & PERF_COUNTER_RAW_MASK;
+	return attr->config & PERF_COUNTER_RAW_MASK;
 }
 
-static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_config(struct perf_counter_attr *attr)
 {
-	return hw_event->config & PERF_COUNTER_CONFIG_MASK;
+	return attr->config & PERF_COUNTER_CONFIG_MASK;
 }
 
-static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_type(struct perf_counter_attr *attr)
 {
-	return (hw_event->config & PERF_COUNTER_TYPE_MASK) >>
+	return (attr->config & PERF_COUNTER_TYPE_MASK) >>
 		PERF_COUNTER_TYPE_SHIFT;
 }
 
-static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_id(struct perf_counter_attr *attr)
 {
-	return hw_event->config & PERF_COUNTER_EVENT_MASK;
+	return attr->config & PERF_COUNTER_EVENT_MASK;
 }
 
 /**
@@ -457,7 +457,7 @@ struct perf_counter {
 	u64				tstamp_running;
 	u64				tstamp_stopped;
 
-	struct perf_counter_hw_event	hw_event;
+	struct perf_counter_attr	attr;
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
@@ -605,8 +605,8 @@ extern int perf_counter_overflow(struct perf_counter *counter,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !perf_event_raw(&counter->hw_event) &&
-		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
+	return !perf_event_raw(&counter->attr) &&
+		perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE;
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 79faae9..c6c84ad 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,7 +55,7 @@ struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
-struct perf_counter_hw_event;
+struct perf_counter_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
 asmlinkage long sys_perf_counter_open(
-		const struct perf_counter_hw_event __user *hw_event_uptr,
+		const struct perf_counter_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index abe2f3b..317cef7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -260,7 +260,7 @@ counter_sched_out(struct perf_counter *counter,
 	if (!is_software_counter(counter))
 		cpuctx->active_oncpu--;
 	ctx->nr_active--;
-	if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
+	if (counter->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 }
 
@@ -282,7 +282,7 @@ group_sched_out(struct perf_counter *group_counter,
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
 		counter_sched_out(counter, cpuctx, ctx);
 
-	if (group_counter->hw_event.exclusive)
+	if (group_counter->attr.exclusive)
 		cpuctx->exclusive = 0;
 }
 
@@ -550,7 +550,7 @@ counter_sched_in(struct perf_counter *counter,
 		cpuctx->active_oncpu++;
 	ctx->nr_active++;
 
-	if (counter->hw_event.exclusive)
+	if (counter->attr.exclusive)
 		cpuctx->exclusive = 1;
 
 	return 0;
@@ -642,7 +642,7 @@ static int group_can_go_on(struct perf_counter *counter,
 	 * If this group is exclusive and there are already
 	 * counters on the CPU, it can't go on.
 	 */
-	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
+	if (counter->attr.exclusive && cpuctx->active_oncpu)
 		return 0;
 	/*
 	 * Otherwise, try to add it if all previous groups were able
@@ -725,7 +725,7 @@ static void __perf_install_in_context(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned) {
+		if (leader->attr.pinned) {
 			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
 		}
@@ -849,7 +849,7 @@ static void __perf_counter_enable(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned) {
+		if (leader->attr.pinned) {
 			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
 		}
@@ -927,7 +927,7 @@ static int perf_counter_refresh(struct perf_counter *counter, int refresh)
 	/*
 	 * not supported on inherited counters
 	 */
-	if (counter->hw_event.inherit)
+	if (counter->attr.inherit)
 		return -EINVAL;
 
 	atomic_add(refresh, &counter->event_limit);
@@ -1094,7 +1094,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	 */
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    !counter->hw_event.pinned)
+		    !counter->attr.pinned)
 			continue;
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
@@ -1122,7 +1122,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		 * ignore pinned counters since we did them already.
 		 */
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    counter->hw_event.pinned)
+		    counter->attr.pinned)
 			continue;
 
 		/*
@@ -1204,11 +1204,11 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 			interrupts = 2*sysctl_perf_counter_limit/HZ;
 		}
 
-		if (!counter->hw_event.freq || !counter->hw_event.sample_freq)
+		if (!counter->attr.freq || !counter->attr.sample_freq)
 			continue;
 
 		events = HZ * interrupts * counter->hw.sample_period;
-		period = div64_u64(events, counter->hw_event.sample_freq);
+		period = div64_u64(events, counter->attr.sample_freq);
 
 		delta = (s64)(1 + period - counter->hw.sample_period);
 		delta >>= 1;
@@ -1444,11 +1444,11 @@ static void free_counter(struct perf_counter *counter)
 	perf_pending_sync(counter);
 
 	atomic_dec(&nr_counters);
-	if (counter->hw_event.mmap)
+	if (counter->attr.mmap)
 		atomic_dec(&nr_mmap_tracking);
-	if (counter->hw_event.munmap)
+	if (counter->attr.munmap)
 		atomic_dec(&nr_munmap_tracking);
-	if (counter->hw_event.comm)
+	if (counter->attr.comm)
 		atomic_dec(&nr_comm_tracking);
 
 	if (counter->destroy)
@@ -1504,13 +1504,13 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	mutex_lock(&counter->child_mutex);
 	values[0] = perf_counter_read(counter);
 	n = 1;
-	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
 		values[n++] = counter->total_time_enabled +
 			atomic64_read(&counter->child_total_time_enabled);
-	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
-	if (counter->hw_event.read_format & PERF_FORMAT_ID)
+	if (counter->attr.read_format & PERF_FORMAT_ID)
 		values[n++] = counter->id;
 	mutex_unlock(&counter->child_mutex);
 
@@ -1611,7 +1611,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 	int ret = 0;
 	u64 value;
 
-	if (!counter->hw_event.sample_period)
+	if (!counter->attr.sample_period)
 		return -EINVAL;
 
 	size = copy_from_user(&value, arg, sizeof(value));
@@ -1622,15 +1622,15 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 		return -EINVAL;
 
 	spin_lock_irq(&ctx->lock);
-	if (counter->hw_event.freq) {
+	if (counter->attr.freq) {
 		if (value > sysctl_perf_counter_limit) {
 			ret = -EINVAL;
 			goto unlock;
 		}
 
-		counter->hw_event.sample_freq = value;
+		counter->attr.sample_freq = value;
 	} else {
-		counter->hw_event.sample_period = value;
+		counter->attr.sample_period = value;
 		counter->hw.sample_period = value;
 
 		perf_log_period(counter, value);
@@ -2299,7 +2299,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 	struct perf_counter *counter = handle->counter;
 	struct perf_mmap_data *data = handle->data;
 
-	int wakeup_events = counter->hw_event.wakeup_events;
+	int wakeup_events = counter->attr.wakeup_events;
 
 	if (handle->overflow && wakeup_events) {
 		int events = atomic_inc_return(&data->events);
@@ -2339,7 +2339,7 @@ static void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs, u64 addr)
 {
 	int ret;
-	u64 sample_type = counter->hw_event.sample_type;
+	u64 sample_type = counter->attr.sample_type;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	u64 ip;
@@ -2441,7 +2441,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		perf_output_put(&handle, addr);
 
 	if (sample_type & PERF_SAMPLE_CONFIG)
-		perf_output_put(&handle, counter->hw_event.config);
+		perf_output_put(&handle, counter->attr.config);
 
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
@@ -2512,7 +2512,7 @@ static void perf_counter_comm_output(struct perf_counter *counter,
 static int perf_counter_comm_match(struct perf_counter *counter,
 				   struct perf_comm_event *comm_event)
 {
-	if (counter->hw_event.comm &&
+	if (counter->attr.comm &&
 	    comm_event->event.header.type == PERF_EVENT_COMM)
 		return 1;
 
@@ -2623,11 +2623,11 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 static int perf_counter_mmap_match(struct perf_counter *counter,
 				   struct perf_mmap_event *mmap_event)
 {
-	if (counter->hw_event.mmap &&
+	if (counter->attr.mmap &&
 	    mmap_event->event.header.type == PERF_EVENT_MMAP)
 		return 1;
 
-	if (counter->hw_event.munmap &&
+	if (counter->attr.munmap &&
 	    mmap_event->event.header.type == PERF_EVENT_MUNMAP)
 		return 1;
 
@@ -2907,8 +2907,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	 * In case we exclude kernel IPs or are somehow not in interrupt
 	 * context, provide the next best thing, the user IP.
 	 */
-	if ((counter->hw_event.exclude_kernel || !regs) &&
-			!counter->hw_event.exclude_user)
+	if ((counter->attr.exclude_kernel || !regs) &&
+			!counter->attr.exclude_user)
 		regs = task_pt_regs(current);
 
 	if (regs) {
@@ -2982,14 +2982,14 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	if (!perf_swcounter_is_counting(counter))
 		return 0;
 
-	if (counter->hw_event.config != event_config)
+	if (counter->attr.config != event_config)
 		return 0;
 
 	if (regs) {
-		if (counter->hw_event.exclude_user && user_mode(regs))
+		if (counter->attr.exclude_user && user_mode(regs))
 			return 0;
 
-		if (counter->hw_event.exclude_kernel && !user_mode(regs))
+		if (counter->attr.exclude_kernel && !user_mode(regs))
 			return 0;
 	}
 
@@ -3252,12 +3252,12 @@ extern void ftrace_profile_disable(int);
 
 static void tp_perf_counter_destroy(struct perf_counter *counter)
 {
-	ftrace_profile_disable(perf_event_id(&counter->hw_event));
+	ftrace_profile_disable(perf_event_id(&counter->attr));
 }
 
 static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
-	int event_id = perf_event_id(&counter->hw_event);
+	int event_id = perf_event_id(&counter->attr);
 	int ret;
 
 	ret = ftrace_profile_enable(event_id);
@@ -3265,7 +3265,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
-	counter->hw.sample_period = counter->hw_event.sample_period;
+	counter->hw.sample_period = counter->attr.sample_period;
 
 	return &perf_ops_generic;
 }
@@ -3287,7 +3287,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (perf_event_id(&counter->hw_event)) {
+	switch (perf_event_id(&counter->attr)) {
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
@@ -3319,7 +3319,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(struct perf_counter_hw_event *hw_event,
+perf_counter_alloc(struct perf_counter_attr *attr,
 		   int cpu,
 		   struct perf_counter_context *ctx,
 		   struct perf_counter *group_leader,
@@ -3352,36 +3352,36 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	mutex_init(&counter->mmap_mutex);
 
 	counter->cpu			= cpu;
-	counter->hw_event		= *hw_event;
+	counter->attr		= *attr;
 	counter->group_leader		= group_leader;
 	counter->pmu			= NULL;
 	counter->ctx			= ctx;
 	counter->oncpu			= -1;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	if (hw_event->disabled)
+	if (attr->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
 
 	pmu = NULL;
 
 	hwc = &counter->hw;
-	if (hw_event->freq && hw_event->sample_freq)
-		hwc->sample_period = div64_u64(TICK_NSEC, hw_event->sample_freq);
+	if (attr->freq && attr->sample_freq)
+		hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq);
 	else
-		hwc->sample_period = hw_event->sample_period;
+		hwc->sample_period = attr->sample_period;
 
 	/*
 	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
 	 */
-	if (hw_event->inherit && (hw_event->sample_type & PERF_SAMPLE_GROUP))
+	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
-	if (perf_event_raw(hw_event)) {
+	if (perf_event_raw(attr)) {
 		pmu = hw_perf_counter_init(counter);
 		goto done;
 	}
 
-	switch (perf_event_type(hw_event)) {
+	switch (perf_event_type(attr)) {
 	case PERF_TYPE_HARDWARE:
 		pmu = hw_perf_counter_init(counter);
 		break;
@@ -3409,11 +3409,11 @@ done:
 	counter->pmu = pmu;
 
 	atomic_inc(&nr_counters);
-	if (counter->hw_event.mmap)
+	if (counter->attr.mmap)
 		atomic_inc(&nr_mmap_tracking);
-	if (counter->hw_event.munmap)
+	if (counter->attr.munmap)
 		atomic_inc(&nr_munmap_tracking);
-	if (counter->hw_event.comm)
+	if (counter->attr.comm)
 		atomic_inc(&nr_comm_tracking);
 
 	return counter;
@@ -3424,17 +3424,17 @@ static atomic64_t perf_counter_id;
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
- * @hw_event_uptr:	event type attributes for monitoring/sampling
+ * @attr_uptr:	event type attributes for monitoring/sampling
  * @pid:		target pid
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
 SYSCALL_DEFINE5(perf_counter_open,
-		const struct perf_counter_hw_event __user *, hw_event_uptr,
+		const struct perf_counter_attr __user *, attr_uptr,
 		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_counter *counter, *group_leader;
-	struct perf_counter_hw_event hw_event;
+	struct perf_counter_attr attr;
 	struct perf_counter_context *ctx;
 	struct file *counter_file = NULL;
 	struct file *group_file = NULL;
@@ -3446,7 +3446,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (flags)
 		return -EINVAL;
 
-	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
+	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
 		return -EFAULT;
 
 	/*
@@ -3484,11 +3484,11 @@ SYSCALL_DEFINE5(perf_counter_open,
 		/*
 		 * Only a group leader can be exclusive or pinned
 		 */
-		if (hw_event.exclusive || hw_event.pinned)
+		if (attr.exclusive || attr.pinned)
 			goto err_put_context;
 	}
 
-	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
+	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
 				     GFP_KERNEL);
 	ret = PTR_ERR(counter);
 	if (IS_ERR(counter))
@@ -3556,7 +3556,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	if (parent_counter->parent)
 		parent_counter = parent_counter->parent;
 
-	child_counter = perf_counter_alloc(&parent_counter->hw_event,
+	child_counter = perf_counter_alloc(&parent_counter->attr,
 					   parent_counter->cpu, child_ctx,
 					   group_leader, GFP_KERNEL);
 	if (IS_ERR(child_counter))
@@ -3565,7 +3565,7 @@ inherit_counter(struct perf_counter *parent_counter,
 
 	/*
 	 * Make the child state follow the state of the parent counter,
-	 * not its hw_event.disabled bit.  We hold the parent's mutex,
+	 * not its attr.disabled bit.  We hold the parent's mutex,
 	 * so we won't race with perf_counter_{en, dis}able_family.
 	 */
 	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
@@ -3582,7 +3582,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * inherit into child's child as well:
 	 */
-	child_counter->hw_event.inherit = 1;
+	child_counter->attr.inherit = 1;
 
 	/*
 	 * Get a reference to the parent filp - we will fput it
@@ -3838,7 +3838,7 @@ int perf_counter_init_task(struct task_struct *child)
 		if (counter != counter->group_leader)
 			continue;
 
-		if (!counter->hw_event.inherit) {
+		if (!counter->attr.inherit) {
 			inherited_all = 0;
 			continue;
 		}
-- 
cgit v0.10.2


From c70975bc8d5bac487616785f5d5bc7b090dfa2d9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 17:38:21 +0200
Subject: perf_counter tools: Fix up the ABI shakeup

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index bace7a8..c2fd042 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -322,7 +322,7 @@ static void synthesize_events(void)
 
 static void open_counters(int cpu, pid_t pid)
 {
-	struct perf_counter_hw_event hw_event;
+	struct perf_counter_attr attr;
 	int counter, group_fd;
 	int track = 1;
 
@@ -334,18 +334,18 @@ static void open_counters(int cpu, pid_t pid)
 	group_fd = -1;
 	for (counter = 0; counter < nr_counters; counter++) {
 
-		memset(&hw_event, 0, sizeof(hw_event));
-		hw_event.config		= event_id[counter];
-		hw_event.irq_period	= event_count[counter];
-		hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-		hw_event.mmap		= track;
-		hw_event.comm		= track;
-		hw_event.inherit	= (cpu < 0) && inherit;
+		memset(&attr, 0, sizeof(attr));
+		attr.config		= event_id[counter];
+		attr.sample_period	= event_count[counter];
+		attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+		attr.mmap		= track;
+		attr.comm		= track;
+		attr.inherit	= (cpu < 0) && inherit;
 
 		track = 0; // only the first counter needs these
 
 		fd[nr_cpu][counter] =
-			sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
+			sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
 
 		if (fd[nr_cpu][counter] < 0) {
 			int err = errno;
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 644f850..27abe6a 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -79,22 +79,22 @@ static __u64			walltime_nsecs;
 
 static void create_perfstat_counter(int counter)
 {
-	struct perf_counter_hw_event hw_event;
+	struct perf_counter_attr attr;
 
-	memset(&hw_event, 0, sizeof(hw_event));
-	hw_event.config		= event_id[counter];
-	hw_event.record_type	= 0;
-	hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
-	hw_event.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
+	memset(&attr, 0, sizeof(attr));
+	attr.config		= event_id[counter];
+	attr.sample_type	= 0;
+	attr.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
+	attr.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
 
 	if (scale)
-		hw_event.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
+		attr.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
 					  PERF_FORMAT_TOTAL_TIME_RUNNING;
 
 	if (system_wide) {
 		int cpu;
 		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
+			fd[cpu][counter] = sys_perf_counter_open(&attr, -1, cpu, -1, 0);
 			if (fd[cpu][counter] < 0) {
 				printf("perfstat error: syscall returned with %d (%s)\n",
 						fd[cpu][counter], strerror(errno));
@@ -102,10 +102,10 @@ static void create_perfstat_counter(int counter)
 			}
 		}
 	} else {
-		hw_event.inherit	= inherit;
-		hw_event.disabled	= 1;
+		attr.inherit	= inherit;
+		attr.disabled	= 1;
 
-		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
+		fd[0][counter] = sys_perf_counter_open(&attr, 0, -1, -1, 0);
 		if (fd[0][counter] < 0) {
 			printf("perfstat error: syscall returned with %d (%s)\n",
 					fd[0][counter], strerror(errno));
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index a2cff7b..5029d8e 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -537,7 +537,7 @@ static void mmap_read(struct mmap_data *md)
 		old += size;
 
 		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-			if (event->header.type & PERF_RECORD_IP)
+			if (event->header.type & PERF_SAMPLE_IP)
 				process_event(event->ip.ip, md->counter);
 		} else {
 			switch (event->header.type) {
@@ -563,7 +563,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int __cmd_top(void)
 {
-	struct perf_counter_hw_event hw_event;
+	struct perf_counter_attr attr;
 	pthread_t thread;
 	int i, counter, group_fd, nr_poll = 0;
 	unsigned int cpu;
@@ -577,15 +577,15 @@ static int __cmd_top(void)
 			if (target_pid == -1 && profile_cpu == -1)
 				cpu = i;
 
-			memset(&hw_event, 0, sizeof(hw_event));
-			hw_event.config		= event_id[counter];
-			hw_event.irq_period	= event_count[counter];
-			hw_event.record_type	= PERF_RECORD_IP | PERF_RECORD_TID;
-			hw_event.mmap		= use_mmap;
-			hw_event.munmap		= use_munmap;
-			hw_event.freq		= freq;
+			memset(&attr, 0, sizeof(attr));
+			attr.config		= event_id[counter];
+			attr.sample_period	= event_count[counter];
+			attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+			attr.mmap		= use_mmap;
+			attr.munmap		= use_munmap;
+			attr.freq		= freq;
 
-			fd[i][counter] = sys_perf_counter_open(&hw_event, target_pid, cpu, group_fd, 0);
+			fd[i][counter] = sys_perf_counter_open(&attr, target_pid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
 				int err = errno;
 				printf("kerneltop error: syscall returned with %d (%s)\n",
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index 5a2520b..10622a4 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -53,11 +53,11 @@ static inline unsigned long long rdclock(void)
 	_min1 < _min2 ? _min1 : _min2; })
 
 static inline int
-sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
-	return syscall(__NR_perf_counter_open, hw_event_uptr, pid, cpu,
+	return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
 		       group_fd, flags);
 }
 
-- 
cgit v0.10.2


From 436224a6d8bb3e29fe0cc18122f8d1f593da67b8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 21:02:36 +0200
Subject: perf report: Separate out idle threads

Introduce the special comm name [idle] for idle theads.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 19c1e05..6d68f3a 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -612,6 +612,17 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 	return ret;
 }
 
+static void register_idle_thread(void)
+{
+	struct thread *thread = threads__findnew(0);
+
+	if (thread == NULL ||
+			thread__set_comm(thread, "[idle]")) {
+		fprintf(stderr, "problem inserting idle task.\n");
+		exit(-1);
+	}
+}
+
 
 static int __cmd_report(void)
 {
@@ -626,6 +637,8 @@ static int __cmd_report(void)
 	char cwd[PATH_MAX], *cwdp = cwd;
 	int cwdlen;
 
+	register_idle_thread();
+
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
 		perror("failed to open file");
-- 
cgit v0.10.2


From cf25c63c609e99bfb9303b68a7a90a56a3a32cea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 22:12:14 +0200
Subject: perf report: Fix column width/alignment of dsos

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 6d68f3a..b84aaf1 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -303,11 +303,11 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__thread_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "  %16s:%5d", self->thread->comm ?: "", self->thread->pid);
+	return fprintf(fp, " %16s:%5d", self->thread->comm ?: "", self->thread->pid);
 }
 
 static struct sort_entry sort_thread = {
-	.header = "          Command: Pid ",
+	.header = "         Command: Pid ",
 	.cmp	= sort__thread_cmp,
 	.print	= sort__thread_print,
 };
@@ -363,11 +363,11 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__dso_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "  %s", self->dso ? self->dso->name : "<unknown>");
+	return fprintf(fp, "  %-25s", self->dso ? self->dso->name : "<unknown>");
 }
 
 static struct sort_entry sort_dso = {
-	.header = " Shared Object",
+	.header = " Shared Object          ",
 	.cmp	= sort__dso_cmp,
 	.print	= sort__dso_print,
 };
-- 
cgit v0.10.2


From f7e8b616ed1cc6f790b82324bce8a2a60295e5c2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Jun 2009 16:39:48 -0400
Subject: function-graph: move initialization of new tasks up in fork

When the function graph tracer is enabled, all new tasks must allocate
a ret_stack to place the return address of functions. This is because
the function graph tracer will replace the real return address with a
call to the tracing of the exit function.

This initialization happens in fork, but it happens too late. If fork
fails, then it will call free_task and that calls the freeing of this
ret_stack. But before initialization happens, the new (failed) task
points to its parents ret_stack. If a fork failure happens during
the function trace, it would be catastrophic for the parent.

Also, there's no need to call ftrace_graph_exit_task from fork, since
it is called by free_task which fork calls on failure.

[ Impact: prevent crash during failed fork running function graph tracer ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..c4b1e35 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -982,6 +982,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (!p)
 		goto fork_out;
 
+	ftrace_graph_init_task(p);
+
 	rt_mutex_init_task(p);
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -1131,8 +1133,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		}
 	}
 
-	ftrace_graph_init_task(p);
-
 	p->pid = pid_nr(pid);
 	p->tgid = p->pid;
 	if (clone_flags & CLONE_THREAD)
@@ -1141,7 +1141,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (current->nsproxy != p->nsproxy) {
 		retval = ns_cgroup_clone(p, pid);
 		if (retval)
-			goto bad_fork_free_graph;
+			goto bad_fork_free_pid;
 	}
 
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1233,7 +1233,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
-		goto bad_fork_free_graph;
+		goto bad_fork_free_pid;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
@@ -1268,8 +1268,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	cgroup_post_fork(p);
 	return p;
 
-bad_fork_free_graph:
-	ftrace_graph_exit_task(p);
 bad_fork_free_pid:
 	if (pid != &init_struct_pid)
 		free_pid(pid);
-- 
cgit v0.10.2


From 84047e360af0394ac5861d433f26bbcf30f77dd1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Jun 2009 16:51:55 -0400
Subject: function-graph: always initialize task ret_stack

On creating a new task while running the function graph tracer, if
we fail to allocate the ret_stack, and then fail the fork, the
code will free the parent ret_stack. This is because the child
duplicated the parent and currently points to the parent's ret_stack.

This patch always initializes the task's ret_stack to NULL.

[ Impact: prevent crash of parent on low memory during fork ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1664d3f..bb081f3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2738,6 +2738,9 @@ void unregister_ftrace_graph(void)
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
+	/* Make sure we do not use the parent ret_stack */
+	t->ret_stack = NULL;
+
 	if (atomic_read(&ftrace_graph_active)) {
 		struct ftrace_ret_stack *ret_stack;
 
@@ -2753,8 +2756,7 @@ void ftrace_graph_init_task(struct task_struct *t)
 		/* make curr_ret_stack visable before we add the ret_stack */
 		smp_wmb();
 		t->ret_stack = ret_stack;
-	} else
-		t->ret_stack = NULL;
+	}
 }
 
 void ftrace_graph_exit_task(struct task_struct *t)
-- 
cgit v0.10.2


From abaff32a03e26e5d6674cb2a26ad882efe7493a3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 22:59:57 +0200
Subject: perf record: Add --append option

Allow incremental profiling via 'perf record -A' - this will append
to an existing perf.data.

Also reorder perf record options by utility / likelyhood of usage.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index c2fd042..19cba6b 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -1,5 +1,7 @@
-
-
+/*
+ * perf record: Record the profile of a workload (or a CPU, or a PID) into
+ * the perf.data output file - for later analysis via perf report.
+ */
 #include "perf.h"
 #include "builtin.h"
 #include "util/util.h"
@@ -28,6 +30,7 @@ static int			system_wide			= 0;
 static pid_t			target_pid			= -1;
 static int			inherit				= 1;
 static int			force				= 0;
+static int			append_file			= 0;
 
 const unsigned int default_count[] = {
 	1000000,
@@ -385,22 +388,29 @@ static void open_counters(int cpu, pid_t pid)
 static int __cmd_record(int argc, const char **argv)
 {
 	int i, counter;
+	struct stat st;
 	pid_t pid;
+	int flags;
 	int ret;
-	struct stat st;
 
 	page_size = sysconf(_SC_PAGE_SIZE);
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
 
-	if (!stat(output_name, &st) && !force) {
-		fprintf(stderr, "Error, output file: %s exists, use -f to overwrite.\n",
+	if (!stat(output_name, &st) && !force && !append_file) {
+		fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
 				output_name);
 		exit(-1);
 	}
 
-	output = open(output_name, O_CREAT|O_TRUNC|O_RDWR, S_IRUSR|S_IWUSR);
+	flags = O_CREAT|O_RDWR;
+	if (append_file)
+		flags |= O_APPEND;
+	else
+		flags |= O_TRUNC;
+
+	output = open(output_name, flags, S_IRUSR|S_IWUSR);
 	if (output < 0) {
 		perror("failed to create output file");
 		exit(-1);
@@ -466,22 +476,24 @@ static char events_help_msg[EVENTS_HELP_MAX];
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     events_help_msg, parse_events),
-	OPT_INTEGER('c', "count", &default_interval,
-		    "event period to sample"),
-	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
-		    "number of mmap data pages"),
-	OPT_STRING('o', "output", &output_name, "file",
-		    "output file name"),
-	OPT_BOOLEAN('i', "inherit", &inherit,
-		    "child tasks inherit counters"),
 	OPT_INTEGER('p', "pid", &target_pid,
 		    "record events on existing pid"),
 	OPT_INTEGER('r', "realtime", &realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('A', "append", &append_file,
+			    "append to the output file to do incremental profiling"),
 	OPT_BOOLEAN('f', "force", &force,
 			"overwrite existing data file"),
+	OPT_INTEGER('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_STRING('o', "output", &output_name, "file",
+		    "output file name"),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 3cf165fc2e7f221a7a95098b47eb990779e29f5f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 23:02:59 +0200
Subject: perf record: Increase mmap buffering default

I've run into mmap overruns with the current 16 pages default,
increase it to 128 pages.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 19cba6b..8feb119 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -19,9 +19,9 @@ static int			default_interval = 100000;
 static int			event_count[MAX_COUNTERS];
 
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-static int			nr_cpus				=  0;
+static int			nr_cpus				= 0;
 static unsigned int		page_size;
-static unsigned int		mmap_pages			= 16;
+static unsigned int		mmap_pages			= 128;
 static int			output;
 static const char		*output_name			= "perf.data";
 static int			group				= 0;
-- 
cgit v0.10.2


From 0a520c63e1625b92ef775da40192e1542910e7f6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 23:24:45 +0200
Subject: perf report: Print more info instead of <unknown> entries

Sometimes we still fail to find a DSO or look up a symbol,
print out the raw information in this case (which an help
debug the problem), instead of a not very helpful <unknown>
string.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index b84aaf1..270e986 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -201,7 +201,9 @@ static struct thread *thread__new(pid_t pid)
 
 	if (self != NULL) {
 		self->pid = pid;
-		self->comm = NULL;
+		self->comm = malloc(30);
+		if (self->comm)
+			sprintf(self->comm, ":%d", pid);
 		INIT_LIST_HEAD(&self->maps);
 	}
 
@@ -333,7 +335,7 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__comm_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "  %16s", self->thread->comm ?: "<unknown>");
+	return fprintf(fp, "  %16s", self->thread->comm);
 }
 
 static struct sort_entry sort_comm = {
@@ -363,7 +365,10 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__dso_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "  %-25s", self->dso ? self->dso->name : "<unknown>");
+	if (self->dso)
+		return fprintf(fp, "  %-25s", self->dso->name);
+
+	return fprintf(fp, "  %016llx", (__u64)self->ip);
 }
 
 static struct sort_entry sort_dso = {
@@ -392,11 +397,17 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 	size_t ret = 0;
 
 	if (verbose)
-		ret += fprintf(fp, "  %#018llx", (unsigned long long)self->ip);
+		ret += fprintf(fp, "  %#018llx", (__u64)self->ip);
+
+	if (self->dso)
+		ret += fprintf(fp, "  %s: ", self->dso->name);
+	else
+		ret += fprintf(fp, "  %#016llx: ", (__u64)self->ip);
 
-	ret += fprintf(fp, "  %s: %s",
-			self->dso ? self->dso->name : "<unknown>",
-			self->sym ? self->sym->name : "<unknown>");
+	if (self->sym)
+		ret += fprintf(fp, "%s", self->sym->name);
+	else
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
 
 	return ret;
 }
@@ -772,7 +783,8 @@ more:
 				event->mmap.filename);
 		}
 		if (thread == NULL || map == NULL) {
-			fprintf(stderr, "problem processing PERF_EVENT_MMAP, skipping event.\n");
+			if (verbose)
+				fprintf(stderr, "problem processing PERF_EVENT_MMAP, skipping event.\n");
 			goto broken_event;
 		}
 		thread__insert_map(thread, map);
-- 
cgit v0.10.2


From eb5f4ca9536ba297c98721ecbbdf41ec5b987bd5 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Mon, 1 Jun 2009 09:19:37 +0100
Subject: [ARM] 5534/1: kmalloc must return a cache line aligned buffer

Define ARCH_KMALLOC_MINALIGN in asm/cache.h
At the request of Russell also move ARCH_SLAB_MINALIGN to this file.

Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index cb7a9e9..feaa75f 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -7,4 +7,20 @@
 #define L1_CACHE_SHIFT		5
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+/*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+ * sure that all such allocations are cache aligned. Otherwise,
+ * unrelated code may cause parts of the buffer to be read into the
+ * cache before the transfer is done, causing old data to be seen by
+ * the CPU.
+ */
+#define ARCH_KMALLOC_MINALIGN	L1_CACHE_BYTES
+
+/*
+ * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
+ */
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+#define ARCH_SLAB_MINALIGN 8
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index e6eb8a6..7b52277 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -202,13 +202,6 @@ typedef struct page *pgtable_t;
 	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-/*
- * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
- */
-#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
-#define ARCH_SLAB_MINALIGN 8
-#endif
-
 #include <asm-generic/page.h>
 
 #endif
-- 
cgit v0.10.2


From 1946d6ef9d7bd4ba97094fe6eb68a9b877bde6b7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 1 Jun 2009 12:50:33 +0100
Subject: [ARM] ARMv7 errata: only apply fixes when running on applicable CPU

Currently, whenever an erratum workaround is enabled, it will be
applied whether or not the erratum is relevent for the CPU.  This
patch changes this - we check the variant and revision fields in the
main ID register to determine which errata to apply.

We also avoid re-applying erratum 460075 if it has already been applied.
Applying this fix in non-secure mode results in the kernel failing to
boot (or even do anything.)

This fixes booting on some ARMv7 based platforms which otherwise
silently fail.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3397f1e..a08d9d2 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -184,23 +184,37 @@ __v7_setup:
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
 	bl	v7_flush_dcache_all
 	ldmia	r12, {r0-r5, r7, r9, r11, lr}
+
+	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
+	and	r10, r0, #0xff000000		@ ARM?
+	teq	r10, #0x41000000
+	bne	2f
+	and	r5, r0, #0x00f00000		@ variant
+	and	r6, r0, #0x0000000f		@ revision
+	orr	r0, r6, r5, lsr #20-4		@ combine variant and revision
+
 #ifdef CONFIG_ARM_ERRATA_430973
-	mrc	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orr	r10, r10, #(1 << 6)		@ set IBE to 1
-	mcr	p15, 0, r10, c1, c0, 1		@ write aux control register
+	teq	r5, #0x00100000			@ only present in r1p*
+	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
+	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_458693
-	mrc	p15, 0, r10, c1, c0, 1		@ read aux control register
-	orr	r10, r10, #(1 << 5)		@ set L1NEON to 1
-	orr	r10, r10, #(1 << 9)		@ set PLDNOP to 1
-	mcr	p15, 0, r10, c1, c0, 1		@ write aux control register
+	teq	r0, #0x20			@ only present in r2p0
+	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orreq	r10, r10, #(1 << 5)		@ set L1NEON to 1
+	orreq	r10, r10, #(1 << 9)		@ set PLDNOP to 1
+	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_460075
-	mrc	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
-	orr	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
-	mcr	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
+	teq	r0, #0x20			@ only present in r2p0
+	mrceq	p15, 1, r10, c9, c0, 2		@ read L2 cache aux ctrl register
+	tsteq	r10, #1 << 22
+	orreq	r10, r10, #(1 << 22)		@ set the Write Allocate disable bit
+	mcreq	p15, 1, r10, c9, c0, 2		@ write the L2 cache aux ctrl register
 #endif
-	mov	r10, #0
+
+2:	mov	r10, #0
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #endif
-- 
cgit v0.10.2


From bf9e187637ca3d85cee7407e3af93995868cc87c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 23:37:05 +0200
Subject: perf_counter tools: Make source code headers more coherent

The perf commands had different ways of describing themselves,
introduce a coherent command-file-header format taken from the
Git project.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 8feb119..2741b35 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -1,9 +1,14 @@
 /*
- * perf record: Record the profile of a workload (or a CPU, or a PID) into
- * the perf.data output file - for later analysis via perf report.
+ * builtin-record.c
+ *
+ * Builtin record command: Record the profile of a workload
+ * (or a CPU, or a PID) into the perf.data output file - for
+ * later analysis via perf report.
  */
-#include "perf.h"
 #include "builtin.h"
+
+#include "perf.h"
+
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 270e986..9da990f 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1,6 +1,14 @@
-#include "util/util.h"
+/*
+ * builtin-report.c
+ *
+ * Builtin report command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
 #include "builtin.h"
 
+#include "util/util.h"
+
 #include "util/list.h"
 #include "util/cache.h"
 #include "util/rbtree.h"
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 27abe6a..2357a66 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -1,20 +1,27 @@
 /*
- * perf stat:  /usr/bin/time -alike performance counter statistics utility
+ * builtin-stat.c
+ *
+ * Builtin stat command: Give a precise performance counters summary
+ * overview about any workload, CPU or specific PID.
+ *
+ * Sample output:
 
-          It summarizes the counter events of all tasks (and child tasks),
-          covering all CPUs that the command (or workload) executes on.
-          It only counts the per-task events of the workload started,
-          independent of how many other tasks run on those CPUs.
+   $ perf stat ~/hackbench 10
+   Time: 0.104
 
-   Sample output:
+    Performance counter stats for '/home/mingo/hackbench':
 
-   $ perf stat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
+       1255.538611  task clock ticks     #      10.143 CPU utilization factor
+             54011  context switches     #       0.043 M/sec
+               385  CPU migrations       #       0.000 M/sec
+             17755  pagefaults           #       0.014 M/sec
+        3808323185  CPU cycles           #    3033.219 M/sec
+        1575111190  instructions         #    1254.530 M/sec
+          17367895  cache references     #      13.833 M/sec
+           7674421  cache misses         #       6.112 M/sec
 
-   Performance counter stats for 'ls':
+    Wall-clock time elapsed:   123.786620 msecs
 
-           163516953 instructions
-                2295 cache-misses
-             2855182 branch-misses
  *
  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  *
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 5029d8e..a639352 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -1,49 +1,25 @@
 /*
- * kerneltop.c: show top kernel functions - performance counters showcase
-
-   Build with:
-
-     make -C Documentation/perf_counter/
-
-   Sample output:
-
-------------------------------------------------------------------------------
- KernelTop:    2669 irqs/sec  [cache-misses/cache-refs],  (all, cpu: 2)
-------------------------------------------------------------------------------
-
-             weight         RIP          kernel function
-             ______   ________________   _______________
-
-              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
-              33.00 - ffffffff804cb740 : sock_alloc_send_skb
-              31.26 - ffffffff804ce808 : skb_push
-              22.43 - ffffffff80510004 : tcp_established_options
-              19.00 - ffffffff8027d250 : find_get_page
-              15.76 - ffffffff804e4fc9 : eth_type_trans
-              15.20 - ffffffff804d8baa : dst_release
-              14.86 - ffffffff804cf5d8 : skb_release_head_state
-              14.00 - ffffffff802217d5 : read_hpet
-              12.00 - ffffffff804ffb7f : __ip_local_out
-              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
-               8.54 - ffffffff805001a3 : ip_queue_xmit
+ * builtin-top.c
+ *
+ * Builtin top command: Display a continuously updated profile of
+ * any workload, CPU or specific PID.
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
  */
-
- /*
-  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
-  *
-  * Improvements and fixes by:
-  *
-  *   Arjan van de Ven <arjan@linux.intel.com>
-  *   Yanmin Zhang <yanmin.zhang@intel.com>
-  *   Wu Fengguang <fengguang.wu@intel.com>
-  *   Mike Galbraith <efault@gmx.de>
-  *   Paul Mackerras <paulus@samba.org>
-  *
-  * Released under the GPL v2. (and only v2, not any later version)
-  */
+#include "builtin.h"
 
 #include "perf.h"
-#include "builtin.h"
+
 #include "util/symbol.h"
 #include "util/util.h"
 #include "util/rbtree.h"
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index e8a8584..ec7edb7 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -1,4 +1,13 @@
+/*
+ * perf.c
+ *
+ * Performance analysis utility.
+ *
+ * This is the main hub from which the sub-commands (perf stat,
+ * perf top, perf record, perf report, etc.) are started.
+ */
 #include "builtin.h"
+
 #include "util/exec_cmd.h"
 #include "util/cache.h"
 #include "util/quote.h"
-- 
cgit v0.10.2


From addc2785ce92ff05da8edf18317b6b4719e10d9f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 2 Jun 2009 23:43:11 +0200
Subject: perf record: Print out the number of events captured

It makes sense to inform the user about how many events
perf record has written - so that the sufficiency of
profiling coverage and intensity can be determined at
a glance.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 2741b35..ec3b73a 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -467,6 +467,9 @@ static int __cmd_record(int argc, const char **argv)
 			ret = poll(event_array, nr_poll, 100);
 	}
 
+
+	fprintf(stderr, "[ perf record: Captured and wrote %ld events. ]\n", events);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 850b0cee165576f969363a8c52021b5cf9ecbe67 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 2 Jun 2009 17:01:16 -0400
Subject: SELinux: define audit permissions for audit tree netlink messages

Audit trees defined 2 new netlink messages but the netlink mapping tables for
selinux permissions were not set up.  This patch maps these 2 new operations
to AUDIT_WRITE.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index c6875fd..dd7cc6d 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -112,6 +112,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] =
 	{ AUDIT_DEL_RULE,	NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_USER,		NETLINK_AUDIT_SOCKET__NLMSG_RELAY    },
 	{ AUDIT_SIGNAL_INFO,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },
+	{ AUDIT_TRIM,		NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
+	{ AUDIT_MAKE_EQUIV,	NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_TTY_GET,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },
 	{ AUDIT_TTY_SET,	NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT	},
 };
-- 
cgit v0.10.2


From ab588ccadc80f6ef5495e83e176e88c5c0fc2d0e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 2 Jun 2009 14:23:39 +0900
Subject: TOMOYO: Remove redundant markers.

Remove '/***** START/STOP *****/' markers.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index ddfb9cc..a42fe02 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -866,7 +866,6 @@ static struct tomoyo_profile *tomoyo_find_or_assign_new_profile(const unsigned
 
 	if (profile >= TOMOYO_MAX_PROFILES)
 		return NULL;
-	/***** EXCLUSIVE SECTION START *****/
 	mutex_lock(&lock);
 	ptr = tomoyo_profile_ptr[profile];
 	if (ptr)
@@ -880,7 +879,6 @@ static struct tomoyo_profile *tomoyo_find_or_assign_new_profile(const unsigned
 	tomoyo_profile_ptr[profile] = ptr;
  ok:
 	mutex_unlock(&lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return ptr;
 }
 
@@ -1050,7 +1048,6 @@ static int tomoyo_update_manager_entry(const char *manager,
 	saved_manager = tomoyo_save_name(manager);
 	if (!saved_manager)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_policy_manager_list_lock);
 	list_for_each_entry(ptr, &tomoyo_policy_manager_list, list) {
 		if (ptr->manager != saved_manager)
@@ -1072,7 +1069,6 @@ static int tomoyo_update_manager_entry(const char *manager,
 	error = 0;
  out:
 	up_write(&tomoyo_policy_manager_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -1197,13 +1193,11 @@ static bool tomoyo_is_select_one(struct tomoyo_io_buffer *head,
 
 	if (sscanf(data, "pid=%u", &pid) == 1) {
 		struct task_struct *p;
-		/***** CRITICAL SECTION START *****/
 		read_lock(&tasklist_lock);
 		p = find_task_by_vpid(pid);
 		if (p)
 			domain = tomoyo_real_domain(p);
 		read_unlock(&tasklist_lock);
-		/***** CRITICAL SECTION END *****/
 	} else if (!strncmp(data, "domain=", 7)) {
 		if (tomoyo_is_domain_def(data + 7)) {
 			down_read(&tomoyo_domain_list_lock);
@@ -1594,13 +1588,11 @@ static int tomoyo_read_pid(struct tomoyo_io_buffer *head)
 		const int pid = head->read_step;
 		struct task_struct *p;
 		struct tomoyo_domain_info *domain = NULL;
-		/***** CRITICAL SECTION START *****/
 		read_lock(&tasklist_lock);
 		p = find_task_by_vpid(pid);
 		if (p)
 			domain = tomoyo_real_domain(p);
 		read_unlock(&tasklist_lock);
-		/***** CRITICAL SECTION END *****/
 		if (domain)
 			tomoyo_io_printf(head, "%d %u %s", pid, domain->profile,
 					 domain->domainname->name);
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index ee43631..aa119ca 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -67,14 +67,12 @@ void tomoyo_set_domain_flag(struct tomoyo_domain_info *domain,
 {
 	/* We need to serialize because this is bitfield operation. */
 	static DEFINE_SPINLOCK(lock);
-	/***** CRITICAL SECTION START *****/
 	spin_lock(&lock);
 	if (!is_delete)
 		domain->flags |= flags;
 	else
 		domain->flags &= ~flags;
 	spin_unlock(&lock);
-	/***** CRITICAL SECTION END *****/
 }
 
 /**
@@ -135,7 +133,6 @@ static int tomoyo_update_domain_initializer_entry(const char *domainname,
 	saved_program = tomoyo_save_name(program);
 	if (!saved_program)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_initializer_list_lock);
 	list_for_each_entry(ptr, &tomoyo_domain_initializer_list, list) {
 		if (ptr->is_not != is_not ||
@@ -161,7 +158,6 @@ static int tomoyo_update_domain_initializer_entry(const char *domainname,
 	error = 0;
  out:
 	up_write(&tomoyo_domain_initializer_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -314,7 +310,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname,
 	saved_domainname = tomoyo_save_name(domainname);
 	if (!saved_domainname)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_keeper_list_lock);
 	list_for_each_entry(ptr, &tomoyo_domain_keeper_list, list) {
 		if (ptr->is_not != is_not ||
@@ -340,7 +335,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname,
 	error = 0;
  out:
 	up_write(&tomoyo_domain_keeper_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -475,7 +469,6 @@ static int tomoyo_update_alias_entry(const char *original_name,
 	saved_aliased_name = tomoyo_save_name(aliased_name);
 	if (!saved_original_name || !saved_aliased_name)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_alias_list_lock);
 	list_for_each_entry(ptr, &tomoyo_alias_list, list) {
 		if (ptr->original_name != saved_original_name ||
@@ -498,7 +491,6 @@ static int tomoyo_update_alias_entry(const char *original_name,
 	error = 0;
  out:
 	up_write(&tomoyo_alias_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -566,7 +558,6 @@ int tomoyo_delete_domain(char *domainname)
 
 	name.name = domainname;
 	tomoyo_fill_path_info(&name);
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_list_lock);
 	/* Is there an active domain? */
 	list_for_each_entry(domain, &tomoyo_domain_list, list) {
@@ -580,7 +571,6 @@ int tomoyo_delete_domain(char *domainname)
 		break;
 	}
 	up_write(&tomoyo_domain_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return 0;
 }
 
@@ -599,7 +589,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 	struct tomoyo_domain_info *domain = NULL;
 	const struct tomoyo_path_info *saved_domainname;
 
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_list_lock);
 	domain = tomoyo_find_domain(domainname);
 	if (domain)
@@ -618,7 +607,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 		    domain->domainname != saved_domainname)
 			continue;
 		flag = false;
-		/***** CRITICAL SECTION START *****/
 		read_lock(&tasklist_lock);
 		for_each_process(p) {
 			if (tomoyo_real_domain(p) != domain)
@@ -627,7 +615,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 			break;
 		}
 		read_unlock(&tasklist_lock);
-		/***** CRITICAL SECTION END *****/
 		if (flag)
 			continue;
 		list_for_each_entry(ptr, &domain->acl_info_list, list) {
@@ -650,7 +637,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 	}
  out:
 	up_write(&tomoyo_domain_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return domain;
 }
 
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 2316da8..adf786d 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -166,7 +166,6 @@ static int tomoyo_update_globally_readable_entry(const char *filename,
 	saved_filename = tomoyo_save_name(filename);
 	if (!saved_filename)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_globally_readable_list_lock);
 	list_for_each_entry(ptr, &tomoyo_globally_readable_list, list) {
 		if (ptr->filename != saved_filename)
@@ -187,7 +186,6 @@ static int tomoyo_update_globally_readable_entry(const char *filename,
 	error = 0;
  out:
 	up_write(&tomoyo_globally_readable_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -284,7 +282,6 @@ static int tomoyo_update_file_pattern_entry(const char *pattern,
 	saved_pattern = tomoyo_save_name(pattern);
 	if (!saved_pattern)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_pattern_list_lock);
 	list_for_each_entry(ptr, &tomoyo_pattern_list, list) {
 		if (saved_pattern != ptr->pattern)
@@ -305,7 +302,6 @@ static int tomoyo_update_file_pattern_entry(const char *pattern,
 	error = 0;
  out:
 	up_write(&tomoyo_pattern_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -407,7 +403,6 @@ static int tomoyo_update_no_rewrite_entry(const char *pattern,
 	saved_pattern = tomoyo_save_name(pattern);
 	if (!saved_pattern)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_no_rewrite_list_lock);
 	list_for_each_entry(ptr, &tomoyo_no_rewrite_list, list) {
 		if (ptr->pattern != saved_pattern)
@@ -428,7 +423,6 @@ static int tomoyo_update_no_rewrite_entry(const char *pattern,
 	error = 0;
  out:
 	up_write(&tomoyo_no_rewrite_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -745,7 +739,6 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename,
 	saved_filename = tomoyo_save_name(filename);
 	if (!saved_filename)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_acl_info_list_lock);
 	if (is_delete)
 		goto delete;
@@ -800,7 +793,6 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename,
 	}
  out:
 	up_write(&tomoyo_domain_acl_info_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
@@ -836,7 +828,6 @@ static int tomoyo_update_double_path_acl(const u8 type, const char *filename1,
 	saved_filename2 = tomoyo_save_name(filename2);
 	if (!saved_filename1 || !saved_filename2)
 		return -ENOMEM;
-	/***** EXCLUSIVE SECTION START *****/
 	down_write(&tomoyo_domain_acl_info_list_lock);
 	if (is_delete)
 		goto delete;
@@ -884,7 +875,6 @@ static int tomoyo_update_double_path_acl(const u8 type, const char *filename1,
 	}
  out:
 	up_write(&tomoyo_domain_acl_info_list_lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return error;
 }
 
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 40927a8..3948f6b 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -220,7 +220,6 @@ void *tomoyo_alloc_element(const unsigned int size)
 		= roundup(size, max(sizeof(void *), sizeof(long)));
 	if (word_aligned_size > PATH_MAX)
 		return NULL;
-	/***** EXCLUSIVE SECTION START *****/
 	mutex_lock(&lock);
 	if (buf_used_len + word_aligned_size > PATH_MAX) {
 		if (!tomoyo_quota_for_elements ||
@@ -251,7 +250,6 @@ void *tomoyo_alloc_element(const unsigned int size)
 		}
 	}
 	mutex_unlock(&lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return ptr;
 }
 
@@ -318,7 +316,6 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name)
 		return NULL;
 	}
 	hash = full_name_hash((const unsigned char *) name, len - 1);
-	/***** EXCLUSIVE SECTION START *****/
 	mutex_lock(&lock);
 	list_for_each_entry(ptr, &tomoyo_name_list[hash % TOMOYO_MAX_HASH],
 			     list) {
@@ -366,7 +363,6 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name)
 	}
  out:
 	mutex_unlock(&lock);
-	/***** EXCLUSIVE SECTION END *****/
 	return ptr ? &ptr->entry : NULL;
 }
 
-- 
cgit v0.10.2


From 7d2948b1248109dbc7f4aaf9867c54b1912d494c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 2 Jun 2009 20:42:24 +0900
Subject: TOMOYO: Simplify policy reader.

We can directly assign the result of tomoyo_io_printf() to done flag.

Signed-off-by: Kentaro Takeda <takedakn@nttdata.co.jp>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Toshiharu Harada <haradats@nttdata.co.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index a42fe02..6d25612 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1113,10 +1113,9 @@ static int tomoyo_read_manager_policy(struct tomoyo_io_buffer *head)
 				 list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, "%s\n", ptr->manager->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, "%s\n", ptr->manager->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_policy_manager_list_lock);
 	head->read_eof = done;
@@ -1441,15 +1440,14 @@ static int tomoyo_read_domain_policy(struct tomoyo_io_buffer *head)
 		    TOMOYO_DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ)
 			ignore_global_allow_read
 				= TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "\n";
-		if (!tomoyo_io_printf(head,
-				      "%s\n" TOMOYO_KEYWORD_USE_PROFILE "%u\n"
-				      "%s%s%s\n", domain->domainname->name,
-				      domain->profile, quota_exceeded,
-				      transition_failed,
-				      ignore_global_allow_read)) {
-			done = false;
+		done = tomoyo_io_printf(head, "%s\n" TOMOYO_KEYWORD_USE_PROFILE
+					"%u\n%s%s%s\n",
+					domain->domainname->name,
+					domain->profile, quota_exceeded,
+					transition_failed,
+					ignore_global_allow_read);
+		if (!done)
 			break;
-		}
 		head->read_step = 2;
 acl_loop:
 		if (head->read_step == 3)
@@ -1457,24 +1455,22 @@ acl_loop:
 		/* Print ACL entries in the domain. */
 		down_read(&tomoyo_domain_acl_info_list_lock);
 		list_for_each_cookie(apos, head->read_var2,
-				      &domain->acl_info_list) {
+				     &domain->acl_info_list) {
 			struct tomoyo_acl_info *ptr
 				= list_entry(apos, struct tomoyo_acl_info,
-					      list);
-			if (!tomoyo_print_entry(head, ptr)) {
-				done = false;
+					     list);
+			done = tomoyo_print_entry(head, ptr);
+			if (!done)
 				break;
-			}
 		}
 		up_read(&tomoyo_domain_acl_info_list_lock);
 		if (!done)
 			break;
 		head->read_step = 3;
 tail_mark:
-		if (!tomoyo_io_printf(head, "\n")) {
-			done = false;
+		done = tomoyo_io_printf(head, "\n");
+		if (!done)
 			break;
-		}
 		head->read_step = 1;
 		if (head->read_single_domain)
 			break;
@@ -1544,11 +1540,10 @@ static int tomoyo_read_domain_profile(struct tomoyo_io_buffer *head)
 		domain = list_entry(pos, struct tomoyo_domain_info, list);
 		if (domain->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, "%u %s\n", domain->profile,
-				      domain->domainname->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, "%u %s\n", domain->profile,
+					domain->domainname->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_domain_list_lock);
 	head->read_eof = done;
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index aa119ca..34bb641 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -189,13 +189,12 @@ bool tomoyo_read_domain_initializer_policy(struct tomoyo_io_buffer *head)
 			from = " from ";
 			domain = ptr->domainname->name;
 		}
-		if (!tomoyo_io_printf(head,
-				      "%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN
-				      "%s%s%s\n", no, ptr->program->name, from,
-				      domain)) {
-			done = false;
+		done = tomoyo_io_printf(head,
+					"%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN
+					"%s%s%s\n", no, ptr->program->name,
+					from, domain);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_domain_initializer_list_lock);
 	return done;
@@ -387,13 +386,12 @@ bool tomoyo_read_domain_keeper_policy(struct tomoyo_io_buffer *head)
 			from = " from ";
 			program = ptr->program->name;
 		}
-		if (!tomoyo_io_printf(head,
-				      "%s" TOMOYO_KEYWORD_KEEP_DOMAIN
-				      "%s%s%s\n", no, program, from,
-				      ptr->domainname->name)) {
-			done = false;
+		done = tomoyo_io_printf(head,
+					"%s" TOMOYO_KEYWORD_KEEP_DOMAIN
+					"%s%s%s\n", no, program, from,
+					ptr->domainname->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_domain_keeper_list_lock);
 	return done;
@@ -513,12 +511,11 @@ bool tomoyo_read_alias_policy(struct tomoyo_io_buffer *head)
 		ptr = list_entry(pos, struct tomoyo_alias_entry, list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n",
-				      ptr->original_name->name,
-				      ptr->aliased_name->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n",
+					ptr->original_name->name,
+					ptr->aliased_name->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_alias_list_lock);
 	return done;
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index adf786d..a67f9e6 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -247,11 +247,10 @@ bool tomoyo_read_globally_readable_policy(struct tomoyo_io_buffer *head)
 				 list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n",
-				      ptr->filename->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n",
+					ptr->filename->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_globally_readable_list_lock);
 	return done;
@@ -369,11 +368,10 @@ bool tomoyo_read_file_pattern(struct tomoyo_io_buffer *head)
 		ptr = list_entry(pos, struct tomoyo_pattern_entry, list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN "%s\n",
-				      ptr->pattern->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN
+					"%s\n", ptr->pattern->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_pattern_list_lock);
 	return done;
@@ -483,11 +481,10 @@ bool tomoyo_read_no_rewrite_policy(struct tomoyo_io_buffer *head)
 		ptr = list_entry(pos, struct tomoyo_no_rewrite_entry, list);
 		if (ptr->is_deleted)
 			continue;
-		if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE "%s\n",
-				      ptr->pattern->name)) {
-			done = false;
+		done = tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE
+					"%s\n", ptr->pattern->name);
+		if (!done)
 			break;
-		}
 	}
 	up_read(&tomoyo_no_rewrite_list_lock);
 	return done;
-- 
cgit v0.10.2


From 9fa7eb283c5cdc2b0f4a8cfe6387ed82e5e9a3d3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 2 Jun 2009 20:07:25 -0700
Subject: Linux 2.6.30-rc8


diff --git a/Makefile b/Makefile
index 739fd34..610d1c3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 30
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2


From 6799687a53a28536fd027ccb644833f66a778925 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 2 Jun 2009 08:23:58 +0200
Subject: x86, boot: add new generated files to the appropriate .gitignore
 files

git status complains of untracked (generated) files in arch/x86/boot..

# Untracked files:
#   (use "git add <file>..." to include in what will be committed)
#
#       ../../arch/x86/boot/compressed/mkpiggy
#       ../../arch/x86/boot/compressed/piggy.S
#       ../../arch/x86/boot/compressed/vmlinux.lds
#       ../../arch/x86/boot/voffset.h
#       ../../arch/x86/boot/zoffset.h

..so adjust .gitignore files accordingly.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 172cf8a..851fe93 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -3,6 +3,8 @@ bzImage
 cpustr.h
 mkcpustr
 offsets.h
+voffset.h
+zoffset.h
 setup
 setup.bin
 setup.elf
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index 63eff3b..4a46fab 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1,3 +1,6 @@
 relocs
 vmlinux.bin.all
 vmlinux.relocs
+vmlinux.lds
+mkpiggy
+piggy.S
-- 
cgit v0.10.2


From ca85b6ba59b69b7b5adcc64a98bd2478f73b2542 Mon Sep 17 00:00:00 2001
From: Andrea Borgia <andrea@borgia.bo.it>
Date: Tue, 2 Jun 2009 19:21:17 +0200
Subject: ALSA: usb-audio - errata corrige for quirk

Cut'n'paste mistake, whose likely result was nothing at all.
Correct version is "USB_DEVICE", not "USB_DEVICE_VENDOR_SPEC".

Signed-off-by: Andrea Borgia <andrea@borgia.bo.it>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 58f24f5..f0f7624 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -1986,7 +1986,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 	}
 },
 {
-	USB_DEVICE_VENDOR_SPEC(0x0ccd, 0x0028),
+	USB_DEVICE(0x0ccd, 0x0028),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
 		.vendor_name = "TerraTec",
 		.product_name = "Aureon 5.1 MkII",
-- 
cgit v0.10.2


From 5c9b6e9e618868ac66d92c81b70ad57d82033d4e Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 3 Jun 2009 15:35:19 +1000
Subject: ALSA: sound/ppc: update annotations of serveral functions

[I am not sure if this is the correct approach as I don't know if any of
this actual hardware or drivers are really hot pluggable.]

Gets rid of these build warnings:

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x5c): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_new()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_new().
If .snd_pmac_new is only used by .snd_pmac_probe then
annotate .snd_pmac_new with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x10c): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_burgundy_init()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_burgundy_init().
If .snd_pmac_burgundy_init is only used by .snd_pmac_probe then
annotate .snd_pmac_burgundy_init with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x164): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_daca_init()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_daca_init().
If .snd_pmac_daca_init is only used by .snd_pmac_probe then
annotate .snd_pmac_daca_init with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x1dc): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_tumbler_init()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_tumbler_init().
If .snd_pmac_tumbler_init is only used by .snd_pmac_probe then
annotate .snd_pmac_tumbler_init with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x1ec): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_tumbler_post_init()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_tumbler_post_init().
If .snd_pmac_tumbler_post_init is only used by .snd_pmac_probe then
annotate .snd_pmac_tumbler_post_init with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x28c): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_awacs_init()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_awacs_init().
If .snd_pmac_awacs_init is only used by .snd_pmac_probe then
annotate .snd_pmac_awacs_init with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x2bc): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_pcm_new()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_pcm_new().
If .snd_pmac_pcm_new is only used by .snd_pmac_probe then
annotate .snd_pmac_pcm_new with a matching annotation.

WARNING: sound/ppc/snd-powermac.o(.devinit.text+0x2f8): Section mismatch in reference from the function .snd_pmac_probe() to the function .init.text:.snd_pmac_attach_beep()
The function __devinit .snd_pmac_probe() references
a function __init .snd_pmac_attach_beep().
If .snd_pmac_attach_beep is only used by .snd_pmac_probe then
annotate .snd_pmac_attach_beep with a matching annotation.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c
index 80df9b1..0d2d62d 100644
--- a/sound/ppc/awacs.c
+++ b/sound/ppc/awacs.c
@@ -872,7 +872,7 @@ static void snd_pmac_awacs_update_automute(struct snd_pmac *chip, int do_notify)
 /*
  * initialize chip
  */
-int __init
+int __devinit
 snd_pmac_awacs_init(struct snd_pmac *chip)
 {
 	int pm7500 = IS_PM7500;
diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index 89f5c32..a9d3507 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -215,7 +215,7 @@ static struct snd_kcontrol_new snd_pmac_beep_mixer = {
 };
 
 /* Initialize beep stuff */
-int __init snd_pmac_attach_beep(struct snd_pmac *chip)
+int __devinit snd_pmac_attach_beep(struct snd_pmac *chip)
 {
 	struct pmac_beep *beep;
 	struct input_dev *input_dev;
diff --git a/sound/ppc/burgundy.c b/sound/ppc/burgundy.c
index 45a7629..aba3814 100644
--- a/sound/ppc/burgundy.c
+++ b/sound/ppc/burgundy.c
@@ -618,7 +618,7 @@ static void snd_pmac_burgundy_update_automute(struct snd_pmac *chip, int do_noti
 /*
  * initialize burgundy
  */
-int __init snd_pmac_burgundy_init(struct snd_pmac *chip)
+int __devinit snd_pmac_burgundy_init(struct snd_pmac *chip)
 {
 	int imac = machine_is_compatible("iMac");
 	int i, err;
diff --git a/sound/ppc/daca.c b/sound/ppc/daca.c
index f8d478c..24200b7 100644
--- a/sound/ppc/daca.c
+++ b/sound/ppc/daca.c
@@ -244,7 +244,7 @@ static void daca_cleanup(struct snd_pmac *chip)
 }
 
 /* exported */
-int __init snd_pmac_daca_init(struct snd_pmac *chip)
+int __devinit snd_pmac_daca_init(struct snd_pmac *chip)
 {
 	int i, err;
 	struct pmac_daca *mix;
diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
index a5afb26..15518e6 100644
--- a/sound/ppc/keywest.c
+++ b/sound/ppc/keywest.c
@@ -109,7 +109,7 @@ void snd_pmac_keywest_cleanup(struct pmac_keywest *i2c)
 	}
 }
 
-int __init snd_pmac_tumbler_post_init(void)
+int __devinit snd_pmac_tumbler_post_init(void)
 {
 	int err;
 	
@@ -124,7 +124,7 @@ int __init snd_pmac_tumbler_post_init(void)
 }
 
 /* exported */
-int __init snd_pmac_keywest_init(struct pmac_keywest *i2c)
+int __devinit snd_pmac_keywest_init(struct pmac_keywest *i2c)
 {
 	int err;
 
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 9b4e9c3..dfea116 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -702,7 +702,7 @@ static struct snd_pcm_ops snd_pmac_capture_ops = {
 	.pointer =	snd_pmac_capture_pointer,
 };
 
-int __init snd_pmac_pcm_new(struct snd_pmac *chip)
+int __devinit snd_pmac_pcm_new(struct snd_pmac *chip)
 {
 	struct snd_pcm *pcm;
 	int err;
@@ -934,7 +934,7 @@ static void __init detect_byte_swap(struct snd_pmac *chip)
 /*
  * detect a sound chip
  */
-static int __init snd_pmac_detect(struct snd_pmac *chip)
+static int __devinit snd_pmac_detect(struct snd_pmac *chip)
 {
 	struct device_node *sound;
 	struct device_node *dn;
@@ -1158,7 +1158,7 @@ static struct snd_kcontrol_new auto_mute_controls[] __initdata = {
 	},
 };
 
-int __init snd_pmac_add_automute(struct snd_pmac *chip)
+int __devinit snd_pmac_add_automute(struct snd_pmac *chip)
 {
 	int err;
 	chip->auto_mute = 1;
@@ -1175,7 +1175,7 @@ int __init snd_pmac_add_automute(struct snd_pmac *chip)
 /*
  * create and detect a pmac chip record
  */
-int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
+int __devinit snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
 {
 	struct snd_pmac *chip;
 	struct device_node *np;
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 40222fc..adc8341 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -1269,7 +1269,7 @@ static void tumbler_resume(struct snd_pmac *chip)
 #endif
 
 /* initialize tumbler */
-static int __init tumbler_init(struct snd_pmac *chip)
+static int __devinit tumbler_init(struct snd_pmac *chip)
 {
 	int irq;
 	struct pmac_tumbler *mix = chip->mixer_data;
@@ -1339,7 +1339,7 @@ static void tumbler_cleanup(struct snd_pmac *chip)
 }
 
 /* exported */
-int __init snd_pmac_tumbler_init(struct snd_pmac *chip)
+int __devinit snd_pmac_tumbler_init(struct snd_pmac *chip)
 {
 	int i, err;
 	struct pmac_tumbler *mix;
-- 
cgit v0.10.2


From 3e1e0a5dd539f83438078759c8642c5dd7c24cb6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 3 Jun 2009 08:13:15 +0200
Subject: ALSA: powermac - Replace the rest of __init*

All __initdata should be __devinitdata as platform device is hotpluggable.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c
index 0d2d62d..2cc0eda 100644
--- a/sound/ppc/awacs.c
+++ b/sound/ppc/awacs.c
@@ -477,7 +477,7 @@ static int snd_pmac_awacs_put_master_amp(struct snd_kcontrol *kcontrol,
 #define AMP_CH_SPK	0
 #define AMP_CH_HD	1
 
-static struct snd_kcontrol_new snd_pmac_awacs_amp_vol[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_amp_vol[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "PC Speaker Playback Volume",
 	  .info = snd_pmac_awacs_info_volume_amp,
@@ -514,7 +514,7 @@ static struct snd_kcontrol_new snd_pmac_awacs_amp_vol[] __initdata = {
 	},
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_amp_hp_sw __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_amp_hp_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Headphone Playback Switch",
 	.info = snd_pmac_boolean_stereo_info,
@@ -523,7 +523,7 @@ static struct snd_kcontrol_new snd_pmac_awacs_amp_hp_sw __initdata = {
 	.private_value = AMP_CH_HD,
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_amp_spk_sw __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_amp_spk_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "PC Speaker Playback Switch",
 	.info = snd_pmac_boolean_stereo_info,
@@ -595,46 +595,46 @@ static int snd_pmac_screamer_mic_boost_put(struct snd_kcontrol *kcontrol,
 /*
  * lists of mixer elements
  */
-static struct snd_kcontrol_new snd_pmac_awacs_mixers[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers[] __devinitdata = {
 	AWACS_SWITCH("Master Capture Switch", 1, SHIFT_LOOPTHRU, 0),
 	AWACS_VOLUME("Master Capture Volume", 0, 4, 0),
 /*	AWACS_SWITCH("Unknown Playback Switch", 6, SHIFT_PAROUT0, 0), */
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_beige[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_beige[] __devinitdata = {
 	AWACS_VOLUME("Master Playback Volume", 2, 6, 1),
 	AWACS_VOLUME("Play-through Playback Volume", 5, 6, 1),
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_LINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_lo[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_lo[] __devinitdata = {
 	AWACS_VOLUME("Line out Playback Volume", 2, 6, 1),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_imac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_imac[] __devinitdata = {
 	AWACS_VOLUME("Play-through Playback Volume", 5, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers_g4agp[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers_g4agp[] __devinitdata = {
 	AWACS_VOLUME("Line out Playback Volume", 2, 6, 1),
 	AWACS_VOLUME("Master Playback Volume", 5, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac7500[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac7500[] __devinitdata = {
 	AWACS_VOLUME("Line out Playback Volume", 2, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac5500[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac5500[] __devinitdata = {
 	AWACS_VOLUME("Headphone Playback Volume", 2, 6, 1),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac[] __devinitdata = {
 	AWACS_VOLUME("Master Playback Volume", 2, 6, 1),
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 };
@@ -642,34 +642,34 @@ static struct snd_kcontrol_new snd_pmac_awacs_mixers_pmac[] __initdata = {
 /* FIXME: is this correct order?
  * screamer (powerbook G3 pismo) seems to have different bits...
  */
-static struct snd_kcontrol_new snd_pmac_awacs_mixers2[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers2[] __devinitdata = {
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_LINE, 0),
 	AWACS_SWITCH("Mic Capture Switch", 0, SHIFT_MUX_MIC, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mixers2[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mixers2[] __devinitdata = {
 	AWACS_SWITCH("Line Capture Switch", 0, SHIFT_MUX_MIC, 0),
 	AWACS_SWITCH("Mic Capture Switch", 0, SHIFT_MUX_LINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mixers2_pmac5500[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mixers2_pmac5500[] __devinitdata = {
 	AWACS_SWITCH("CD Capture Switch", 0, SHIFT_MUX_CD, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_master_sw __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_master_sw __devinitdata =
 AWACS_SWITCH("Master Playback Switch", 1, SHIFT_HDMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_master_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_master_sw_imac __devinitdata =
 AWACS_SWITCH("Line out Playback Switch", 1, SHIFT_HDMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_master_sw_pmac5500 __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_master_sw_pmac5500 __devinitdata =
 AWACS_SWITCH("Headphone Playback Switch", 1, SHIFT_HDMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_mic_boost[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_mic_boost[] __devinitdata = {
 	AWACS_SWITCH("Mic Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mic_boost[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_screamer_mic_boost[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Mic Boost Capture Volume",
 	  .info = snd_pmac_screamer_mic_boost_info,
@@ -678,34 +678,34 @@ static struct snd_kcontrol_new snd_pmac_screamer_mic_boost[] __initdata = {
 	},
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_mic_boost_pmac7500[] __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_mic_boost_pmac7500[] __devinitdata =
 {
 	AWACS_SWITCH("Line Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_beige[] __initdata =
+static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_beige[] __devinitdata =
 {
 	AWACS_SWITCH("Line Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 	AWACS_SWITCH("CD Boost Capture Switch", 6, SHIFT_MIC_BOOST, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_imac[] __initdata =
+static struct snd_kcontrol_new snd_pmac_screamer_mic_boost_imac[] __devinitdata =
 {
 	AWACS_SWITCH("Line Boost Capture Switch", 0, SHIFT_GAINLINE, 0),
 	AWACS_SWITCH("Mic Boost Capture Switch", 6, SHIFT_MIC_BOOST, 0),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_vol[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_vol[] __devinitdata = {
 	AWACS_VOLUME("PC Speaker Playback Volume", 4, 6, 1),
 };
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw __devinitdata =
 AWACS_SWITCH("PC Speaker Playback Switch", 1, SHIFT_SPKMUTE, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac1 __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac1 __devinitdata =
 AWACS_SWITCH("PC Speaker Playback Switch", 1, SHIFT_PAROUT1, 1);
 
-static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac2 __initdata =
+static struct snd_kcontrol_new snd_pmac_awacs_speaker_sw_imac2 __devinitdata =
 AWACS_SWITCH("PC Speaker Playback Switch", 1, SHIFT_PAROUT1, 0);
 
 
diff --git a/sound/ppc/burgundy.c b/sound/ppc/burgundy.c
index aba3814..bb80770 100644
--- a/sound/ppc/burgundy.c
+++ b/sound/ppc/burgundy.c
@@ -468,7 +468,7 @@ static int snd_pmac_burgundy_put_switch_b(struct snd_kcontrol *kcontrol,
 /*
  * Burgundy mixers
  */
-static struct snd_kcontrol_new snd_pmac_burgundy_mixers[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_burgundy_mixers[] __devinitdata = {
 	BURGUNDY_VOLUME_W("Master Playback Volume", 0,
 			MASK_ADDR_BURGUNDY_MASTER_VOLUME, 8),
 	BURGUNDY_VOLUME_W("CD Capture Volume", 0,
@@ -496,7 +496,7 @@ static struct snd_kcontrol_new snd_pmac_burgundy_mixers[] __initdata = {
  */	BURGUNDY_SWITCH_B("PCM Capture Switch", 0,
 			MASK_ADDR_BURGUNDY_HOSTIFEH, 0x01, 0, 0)
 };
-static struct snd_kcontrol_new snd_pmac_burgundy_mixers_imac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_burgundy_mixers_imac[] __devinitdata = {
 	BURGUNDY_VOLUME_W("Line in Capture Volume", 0,
 			MASK_ADDR_BURGUNDY_VOLLINE, 16),
 	BURGUNDY_VOLUME_W("Mic Capture Volume", 0,
@@ -522,7 +522,7 @@ static struct snd_kcontrol_new snd_pmac_burgundy_mixers_imac[] __initdata = {
 	BURGUNDY_SWITCH_B("Mic Boost Capture Switch", 0,
 			MASK_ADDR_BURGUNDY_INPBOOST, 0x40, 0x80, 1)
 };
-static struct snd_kcontrol_new snd_pmac_burgundy_mixers_pmac[] __initdata = {
+static struct snd_kcontrol_new snd_pmac_burgundy_mixers_pmac[] __devinitdata = {
 	BURGUNDY_VOLUME_W("Line in Capture Volume", 0,
 			MASK_ADDR_BURGUNDY_VOLMIC, 16),
 	BURGUNDY_VOLUME_B("Line in Gain Capture Volume", 0,
@@ -538,33 +538,33 @@ static struct snd_kcontrol_new snd_pmac_burgundy_mixers_pmac[] __initdata = {
 /*	BURGUNDY_SWITCH_B("Line in Boost Capture Switch", 0,
  *		MASK_ADDR_BURGUNDY_INPBOOST, 0x40, 0x80, 1) */
 };
-static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("Master Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_LEFT | BURGUNDY_LINEOUT_LEFT | BURGUNDY_HP_LEFT,
 	BURGUNDY_OUTPUT_RIGHT | BURGUNDY_LINEOUT_RIGHT | BURGUNDY_HP_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_pmac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_master_sw_pmac __devinitdata =
 BURGUNDY_SWITCH_B("Master Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_INTERN
 	| BURGUNDY_OUTPUT_LEFT, BURGUNDY_OUTPUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("PC Speaker Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_LEFT, BURGUNDY_OUTPUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_pmac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_speaker_sw_pmac __devinitdata =
 BURGUNDY_SWITCH_B("PC Speaker Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_INTERN, 0, 0);
-static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("Line out Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_LINEOUT_LEFT, BURGUNDY_LINEOUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_pmac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_line_sw_pmac __devinitdata =
 BURGUNDY_SWITCH_B("Line out Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_OUTPUT_LEFT, BURGUNDY_OUTPUT_RIGHT, 1);
-static struct snd_kcontrol_new snd_pmac_burgundy_hp_sw_imac __initdata =
+static struct snd_kcontrol_new snd_pmac_burgundy_hp_sw_imac __devinitdata =
 BURGUNDY_SWITCH_B("Headphone Playback Switch", 0,
 	MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES,
 	BURGUNDY_HP_LEFT, BURGUNDY_HP_RIGHT, 1);
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index dfea116..7bc492e 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -908,7 +908,7 @@ static int snd_pmac_dev_free(struct snd_device *device)
  * check the machine support byteswap (little-endian)
  */
 
-static void __init detect_byte_swap(struct snd_pmac *chip)
+static void __devinit detect_byte_swap(struct snd_pmac *chip)
 {
 	struct device_node *mio;
 
@@ -1143,7 +1143,7 @@ static int pmac_hp_detect_get(struct snd_kcontrol *kcontrol,
 	return 0;
 }
 
-static struct snd_kcontrol_new auto_mute_controls[] __initdata = {
+static struct snd_kcontrol_new auto_mute_controls[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Auto Mute Switch",
 	  .info = snd_pmac_boolean_mono_info,
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index adc8341..08e584d 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -838,7 +838,7 @@ static int snapper_put_capture_source(struct snd_kcontrol *kcontrol,
 
 /*
  */
-static struct snd_kcontrol_new tumbler_mixers[] __initdata = {
+static struct snd_kcontrol_new tumbler_mixers[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Master Playback Volume",
 	  .info = tumbler_info_master_volume,
@@ -862,7 +862,7 @@ static struct snd_kcontrol_new tumbler_mixers[] __initdata = {
 	},
 };
 
-static struct snd_kcontrol_new snapper_mixers[] __initdata = {
+static struct snd_kcontrol_new snapper_mixers[] __devinitdata = {
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	  .name = "Master Playback Volume",
 	  .info = tumbler_info_master_volume,
@@ -895,7 +895,7 @@ static struct snd_kcontrol_new snapper_mixers[] __initdata = {
 	},
 };
 
-static struct snd_kcontrol_new tumbler_hp_sw __initdata = {
+static struct snd_kcontrol_new tumbler_hp_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Headphone Playback Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -903,7 +903,7 @@ static struct snd_kcontrol_new tumbler_hp_sw __initdata = {
 	.put = tumbler_put_mute_switch,
 	.private_value = TUMBLER_MUTE_HP,
 };
-static struct snd_kcontrol_new tumbler_speaker_sw __initdata = {
+static struct snd_kcontrol_new tumbler_speaker_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "PC Speaker Playback Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -911,7 +911,7 @@ static struct snd_kcontrol_new tumbler_speaker_sw __initdata = {
 	.put = tumbler_put_mute_switch,
 	.private_value = TUMBLER_MUTE_AMP,
 };
-static struct snd_kcontrol_new tumbler_lineout_sw __initdata = {
+static struct snd_kcontrol_new tumbler_lineout_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Line Out Playback Switch",
 	.info = snd_pmac_boolean_mono_info,
@@ -919,7 +919,7 @@ static struct snd_kcontrol_new tumbler_lineout_sw __initdata = {
 	.put = tumbler_put_mute_switch,
 	.private_value = TUMBLER_MUTE_LINE,
 };
-static struct snd_kcontrol_new tumbler_drc_sw __initdata = {
+static struct snd_kcontrol_new tumbler_drc_sw __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "DRC Switch",
 	.info = snd_pmac_boolean_mono_info,
-- 
cgit v0.10.2


From a05c0205ba031c01bba33a21bf0a35920eb64833 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 3 Jun 2009 09:33:18 +0200
Subject: block: Fix bounce limit setting in DM

blk_queue_bounce_limit() is more than a wrapper about the request queue
limits.bounce_pfn variable.  Introduce blk_queue_bounce_pfn() which can
be called by stacking drivers that wish to set the bounce limit
explicitly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8d33934..9acd0b7 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -194,6 +194,23 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
+ * blk_queue_bounce_pfn - set the bounce buffer limit for queue
+ * @q: the request queue for the device
+ * @pfn: max address
+ *
+ * Description:
+ *    This function is similar to blk_queue_bounce_limit except it
+ *    neither changes allocation flags, nor does it set up the ISA DMA
+ *    pool. This function should only be used by stacking drivers.
+ *    Hardware drivers should use blk_queue_bounce_limit instead.
+ */
+void blk_queue_bounce_pfn(struct request_queue *q, u64 pfn)
+{
+	q->limits.bounce_pfn = pfn;
+}
+EXPORT_SYMBOL(blk_queue_bounce_pfn);
+
+/**
  * blk_queue_max_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
  * @max_sectors:  max sectors in the usual 512b unit
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e9a73bb..3ca1604 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -920,7 +920,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	blk_queue_max_segment_size(q, t->limits.max_segment_size);
 	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
 	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
-	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
+	blk_queue_bounce_pfn(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5e740a1..989aa17 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -910,6 +910,7 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
+extern void blk_queue_bounce_pfn(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-- 
cgit v0.10.2


From a32881066e58346f2901afe0ebdfbf0c562877e5 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Wed, 3 Jun 2009 13:12:55 +0800
Subject: perf_counter/x86: Remove the IRQ (non-NMI) handling bits

Remove the IRQ (non-NMI) handling bits as NMI will be used always.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090603051255.GA2791@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index fe24d28..d750a10 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,6 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
 #ifdef CONFIG_PERF_COUNTERS
-BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 7309c0a..4b4921d 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,6 @@
 extern void apic_timer_interrupt(void);
 extern void generic_interrupt(void);
 extern void error_interrupt(void);
-extern void perf_counter_interrupt(void);
 extern void perf_pending_interrupt(void);
 
 extern void spurious_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 545bb81..4492e19 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -107,11 +107,6 @@
 #define LOCAL_TIMER_VECTOR		0xef
 
 /*
- * Performance monitoring interrupt vector:
- */
-#define LOCAL_PERF_VECTOR		0xee
-
-/*
  * Generic system vector for platform specific use
  */
 #define GENERIC_INTERRUPT_VECTOR	0xed
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index e16e8c1..12cc05e 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -40,7 +40,7 @@ struct cpu_hw_counters {
 struct x86_pmu {
 	const char	*name;
 	int		version;
-	int		(*handle_irq)(struct pt_regs *, int);
+	int		(*handle_irq)(struct pt_regs *);
 	void		(*disable_all)(void);
 	void		(*enable_all)(void);
 	void		(*enable)(struct hw_perf_counter *, int);
@@ -755,7 +755,7 @@ static void intel_pmu_reset(void)
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
  */
-static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
+static int intel_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct cpu_hw_counters *cpuc;
 	struct cpu_hw_counters;
@@ -794,7 +794,7 @@ again:
 		if (!intel_pmu_save_and_restart(counter))
 			continue;
 
-		if (perf_counter_overflow(counter, nmi, regs, 0))
+		if (perf_counter_overflow(counter, 1, regs, 0))
 			intel_pmu_disable_counter(&counter->hw, bit);
 	}
 
@@ -812,7 +812,7 @@ again:
 	return 1;
 }
 
-static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
+static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
 	int cpu, idx, handled = 0;
 	struct cpu_hw_counters *cpuc;
@@ -840,22 +840,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, nmi, regs, 0))
+		if (perf_counter_overflow(counter, 1, regs, 0))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
 	return handled;
 }
 
-void smp_perf_counter_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
-	ack_APIC_irq();
-	x86_pmu.handle_irq(regs, 0);
-	irq_exit();
-}
-
 void smp_perf_pending_interrupt(struct pt_regs *regs)
 {
 	irq_enter();
@@ -910,7 +901,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	 * If the first NMI handles both, the latter will be empty and daze
 	 * the CPU.
 	 */
-	x86_pmu.handle_irq(regs, 1);
+	x86_pmu.handle_irq(regs);
 
 	return NOTIFY_STOP;
 }
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8910046..7985c01 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1026,8 +1026,6 @@ apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
 #ifdef CONFIG_PERF_COUNTERS
-apicinterrupt LOCAL_PERF_VECTOR \
-	perf_counter_interrupt smp_perf_counter_interrupt
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 3190a6b..205bdd8 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -165,7 +165,6 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 # ifdef CONFIG_PERF_COUNTERS
-	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
 	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 53ceb26..fa6ef69 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -155,7 +155,6 @@ static void __init apic_intr_init(void)
 
 	/* Performance monitoring interrupt: */
 #ifdef CONFIG_PERF_COUNTERS
-	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
 	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 #endif
 }
-- 
cgit v0.10.2


From 8ce998d6693bd02ab3b74ee1cc303ecb1fa9b514 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Jun 2009 00:54:33 -0300
Subject: perf_counter tools: Cover PLT symbols too

PLT, the Program Linking Table, is used with the dynamic linker to
allow PIC code in executables and shared objects to figure out
where functions are in other shared objects.

It is one of the sources of unknown/unresolved symbols - this patch
does what binutils figures out when you ask it to disassembly.
(objdump -S)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 039931f..d52a1ae 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -258,6 +258,117 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 	return sec;
 }
 
+#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+
+#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+
+static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
+				       GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym,
+				       GElf_Shdr *shdr_dynsym,
+				       size_t dynsym_idx)
+{
+	uint32_t nr_rel_entries, idx;
+	GElf_Sym sym;
+	__u64 plt_offset;
+	GElf_Shdr shdr_plt;
+	struct symbol *f;
+	GElf_Shdr shdr_rel_plt;
+	Elf_Data *reldata, *syms, *symstrs;
+	Elf_Scn *scn_plt_rel, *scn_symstrs;
+	char sympltname[1024];
+	int nr = 0, symidx;
+
+	scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
+					  ".rela.plt", NULL);
+	if (scn_plt_rel == NULL) {
+		scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
+						  ".rel.plt", NULL);
+		if (scn_plt_rel == NULL)
+			return 0;
+	}
+
+	if (shdr_rel_plt.sh_link != dynsym_idx)
+		return 0;
+
+	if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL)
+		return 0;
+
+	/*
+	 * Fetch the relocation section to find the indexes to the GOT
+	 * and the symbols in the .dynsym they refer to.
+	 */
+	reldata = elf_getdata(scn_plt_rel, NULL);
+	if (reldata == NULL)
+		return -1;
+
+	syms = elf_getdata(scn_dynsym, NULL);
+	if (syms == NULL)
+		return -1;
+
+	scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link);
+	if (scn_symstrs == NULL)
+		return -1;
+
+	symstrs = elf_getdata(scn_symstrs, NULL);
+	if (symstrs == NULL)
+		return -1;
+
+	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+	plt_offset = shdr_plt.sh_offset;
+
+	if (shdr_rel_plt.sh_type == SHT_RELA) {
+		GElf_Rela pos_mem, *pos;
+
+		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
+					   nr_rel_entries) {
+			symidx = GELF_R_SYM(pos->r_info);
+			plt_offset += shdr_plt.sh_entsize;
+			gelf_getsym(syms, symidx, &sym);
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_sym__name(&sym, symstrs));
+
+			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+					sympltname, self->sym_priv_size);
+			if (!f)
+				return -1;
+
+			dso__insert_symbol(self, f);
+			++nr;
+		}
+	} else if (shdr_rel_plt.sh_type == SHT_REL) {
+		GElf_Rel pos_mem, *pos;
+		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
+					  nr_rel_entries) {
+			symidx = GELF_R_SYM(pos->r_info);
+			plt_offset += shdr_plt.sh_entsize;
+			gelf_getsym(syms, symidx, &sym);
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_sym__name(&sym, symstrs));
+
+			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+					sympltname, self->sym_priv_size);
+			if (!f)
+				return -1;
+
+			dso__insert_symbol(self, f);
+			++nr;
+		}
+	} else {
+		/*
+		 * TODO: There are still one more shdr_rel_plt.sh_type
+		 * I have to investigate, but probably should be ignored.
+		 */
+	}
+
+	return nr;
+}
+
 static int dso__load_sym(struct dso *self, int fd, const char *name,
 			 symbol_filter_t filter)
 {
@@ -269,8 +380,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 	GElf_Shdr shdr;
 	Elf_Data *syms;
 	GElf_Sym sym;
-	Elf_Scn *sec;
+	Elf_Scn *sec, *sec_dynsym;
 	Elf *elf;
+	size_t dynsym_idx;
 	int nr = 0;
 
 	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
@@ -285,12 +397,33 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 		goto out_elf_end;
 	}
 
+	/*
+	 * We need to check if we have a .dynsym, so that we can handle the
+	 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+	 * .dynsym or .symtab)
+	 */
+	sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr,
+					 ".dynsym", &dynsym_idx);
+	if (sec_dynsym != NULL) {
+		nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
+						 sec_dynsym, &shdr,
+						 dynsym_idx);
+		if (nr < 0)
+			goto out_elf_end;
+	}
+
+	/*
+	 * But if we have a full .symtab (that is a superset of .dynsym) we
+	 * should add the symbols not in the .dynsyn
+	 */
 	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
-	if (sec == NULL)
-		sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
+	if (sec == NULL) {
+		if (sec_dynsym == NULL)
+			goto out_elf_end;
 
-	if (sec == NULL)
-		goto out_elf_end;
+		sec = sec_dynsym;
+		gelf_getshdr(sec, &shdr);
+	}
 
 	syms = elf_getdata(sec, NULL);
 	if (syms == NULL)
-- 
cgit v0.10.2


From 3502973d005ed89cc2b3f39780813a341ddba97f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 09:38:58 +0200
Subject: perf report: Print -D to stdout

-D prints to stderr - which is a bit confusing - print to stdout
instead.

Also clean up the if (dump_trace) patterns via a dprintf helper.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 9da990f..6207a31 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -31,6 +31,8 @@ static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int		dump_trace = 0;
+#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
+
 static int		verbose;
 static int		full_paths;
 
@@ -729,14 +731,12 @@ more:
 		uint64_t ip = event->ip.ip;
 		struct map *map = NULL;
 
-		if (dump_trace) {
-			fprintf(stderr, "%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
-				(void *)(offset + head),
-				(void *)(long)(event->header.size),
-				event->header.misc,
-				event->ip.pid,
-				(void *)(long)ip);
-		}
+		dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.misc,
+			event->ip.pid,
+			(void *)(long)ip);
 
 		if (thread == NULL) {
 			fprintf(stderr, "problem processing %d event, skipping it.\n",
@@ -781,15 +781,14 @@ more:
 		struct thread *thread = threads__findnew(event->mmap.pid);
 		struct map *map = map__new(&event->mmap, cwdp, cwdlen);
 
-		if (dump_trace) {
-			fprintf(stderr, "%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
-				(void *)(offset + head),
-				(void *)(long)(event->header.size),
-				(void *)(long)event->mmap.start,
-				(void *)(long)event->mmap.len,
-				(void *)(long)event->mmap.pgoff,
-				event->mmap.filename);
-		}
+		dprintf("%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			(void *)(long)event->mmap.start,
+			(void *)(long)event->mmap.len,
+			(void *)(long)event->mmap.pgoff,
+			event->mmap.filename);
+
 		if (thread == NULL || map == NULL) {
 			if (verbose)
 				fprintf(stderr, "problem processing PERF_EVENT_MMAP, skipping event.\n");
@@ -802,12 +801,11 @@ more:
 	case PERF_EVENT_COMM: {
 		struct thread *thread = threads__findnew(event->comm.pid);
 
-		if (dump_trace) {
-			fprintf(stderr, "%p [%p]: PERF_EVENT_COMM: %s:%d\n",
-				(void *)(offset + head),
-				(void *)(long)(event->header.size),
-				event->comm.comm, event->comm.pid);
-		}
+		dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->comm.comm, event->comm.pid);
+
 		if (thread == NULL ||
 		    thread__set_comm(thread, event->comm.comm)) {
 			fprintf(stderr, "problem processing PERF_EVENT_COMM, skipping event.\n");
@@ -818,11 +816,10 @@ more:
 	}
 	default: {
 broken_event:
-		if (dump_trace)
-			fprintf(stderr, "%p [%p]: skipping unknown header type: %d\n",
-					(void *)(offset + head),
-					(void *)(long)(event->header.size),
-					event->header.type);
+		dprintf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
 
 		total_unknown++;
 
@@ -846,14 +843,13 @@ broken_event:
 	rc = EXIT_SUCCESS;
 	close(input);
 
-	if (dump_trace) {
-		fprintf(stderr, "      IP events: %10ld\n", total);
-		fprintf(stderr, "    mmap events: %10ld\n", total_mmap);
-		fprintf(stderr, "    comm events: %10ld\n", total_comm);
-		fprintf(stderr, " unknown events: %10ld\n", total_unknown);
+	dprintf("      IP events: %10ld\n", total);
+	dprintf("    mmap events: %10ld\n", total_mmap);
+	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf(" unknown events: %10ld\n", total_unknown);
 
+	if (dump_trace)
 		return 0;
-	}
 
 	if (verbose >= 2)
 		dsos__fprintf(stdout);
-- 
cgit v0.10.2


From 5352f35d6ae7b8b981d77137fb268bc54d10624f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 10:07:39 +0200
Subject: perf report: Improve sort key recognition

 - allow case-insensitive tokens - such as --sort Comm,Symbol
 - allow substring shortcuts: --sort sym
 - detect invalid tokens and bail out

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 6207a31..a8d3905 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -453,28 +453,18 @@ static int sort_dimension__add(char *tok)
 		if (sd->taken)
 			continue;
 
-		if (strcmp(tok, sd->name))
+		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
 		sd->taken = 1;
+
 		return 0;
 	}
 
 	return -ESRCH;
 }
 
-static void setup_sorting(void)
-{
-	char *tmp, *tok, *str = strdup(sort_order);
-
-	for (tok = strtok_r(str, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp))
-		sort_dimension__add(tok);
-
-	free(str);
-}
-
 static int64_t
 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -880,6 +870,21 @@ static const struct option options[] = {
 	OPT_END()
 };
 
+static void setup_sorting(void)
+{
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok) < 0) {
+			error("Unknown --sort key: `%s'", tok);
+			usage_with_options(report_usage, options);
+		}
+	}
+
+	free(str);
+}
+
 int cmd_report(int argc, const char **argv, const char *prefix)
 {
 	symbol__init();
-- 
cgit v0.10.2


From 367d04c4ec02dad34d80452e32e3370db7fb6fee Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 28 May 2009 09:54:48 +0200
Subject: amd_iommu: fix lock imbalance

In alloc_coherent there is an omitted unlock on the path where mapping
fails. Add the unlock.

[ Impact: fix lock imbalance in alloc_coherent ]

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d689883..9f89bb6 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1541,8 +1541,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
 				 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-	if (*dma_addr == bad_dma_address)
+	if (*dma_addr == bad_dma_address) {
+		spin_unlock_irqrestore(&domain->lock, flags);
 		goto out_free;
+	}
 
 	iommu_completion_wait(iommu);
 
-- 
cgit v0.10.2


From ed966aac335a63083d3125198479447248637d9e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 10:39:26 +0200
Subject: perf report: Handle vDSO symbols properly

We were not looking up vDSO symbols properly, because they
are in the kallsyms but are user-mode entries.

Pass negative addresses to the kernel dso object, this
way we resolve them properly:

     0.05%  [kernel]: vread_tsc

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index a8d3905..0f88d9e 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -728,6 +728,8 @@ more:
 			event->ip.pid,
 			(void *)(long)ip);
 
+		dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
 		if (thread == NULL) {
 			fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
@@ -740,6 +742,8 @@ more:
 
 			dso = kernel_dso;
 
+			dprintf(" ...... dso: %s\n", dso->name);
+
 		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
 
 			show = SHOW_USER;
@@ -749,11 +753,22 @@ more:
 			if (map != NULL) {
 				dso = map->dso;
 				ip -= map->start + map->pgoff;
+			} else {
+				/*
+				 * If this is outside of all known maps,
+				 * and is a negative address, try to look it
+				 * up in the kernel dso, as it might be a
+				 * vsyscall (which executes in user-mode):
+				 */
+				if ((long long)ip < 0)
+					dso = kernel_dso;
 			}
+			dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
 
 		} else {
 			show = SHOW_HV;
 			level = 'H';
+			dprintf(" ...... dso: [hypervisor]\n");
 		}
 
 		if (show & show_mask) {
-- 
cgit v0.10.2


From e09f9446b94ac64b27d37e98c1110f29d712cdad Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 3 Jun 2009 10:07:44 +0100
Subject: GFS2: Remove unused variable

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 841ddc9..73318a3 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -694,8 +694,6 @@ static void do_unflock(struct file *file, struct file_lock *fl)
 
 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
 	if (fl->fl_type & LOCK_MAND)
-- 
cgit v0.10.2


From 08ced854fc4a979d9e59ba01000bf96e7057cfbc Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Wed, 3 Jun 2009 19:28:03 +1000
Subject: hwrng: timeriomem - Fix potential oops (request_mem_region/__devinit)

Fixed oops when calling device_unregister followed by device_register
(changing __init to __devinit) and removed request_mem_region() as
platform_device_register already does this which can result in EBUSY

Signed-off-by: Alexander Clouter <alex@digriz.org.uk>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index dcd352a..a94e930 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -88,9 +88,9 @@ static struct hwrng timeriomem_rng_ops = {
 	.priv		= 0,
 };
 
-static int __init timeriomem_rng_probe(struct platform_device *pdev)
+static int __devinit timeriomem_rng_probe(struct platform_device *pdev)
 {
-	struct resource *res, *mem;
+	struct resource *res;
 	int ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -98,21 +98,12 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
 	if (!res)
 		return -ENOENT;
 
-	mem = request_mem_region(res->start, res->end - res->start + 1,
-				 pdev->name);
-	if (mem == NULL)
-		return -EBUSY;
-
-	dev_set_drvdata(&pdev->dev, mem);
-
 	timeriomem_rng_data = pdev->dev.platform_data;
 
 	timeriomem_rng_data->address = ioremap(res->start,
 						res->end - res->start + 1);
-	if (!timeriomem_rng_data->address) {
-		ret = -ENOMEM;
-		goto err_ioremap;
-	}
+	if (!timeriomem_rng_data->address)
+		return -EIO;
 
 	if (timeriomem_rng_data->period != 0
 		&& usecs_to_jiffies(timeriomem_rng_data->period) > 0) {
@@ -125,7 +116,7 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
 
 	ret = hwrng_register(&timeriomem_rng_ops);
 	if (ret)
-		goto err_register;
+		goto failed;
 
 	dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
 			timeriomem_rng_data->address,
@@ -133,24 +124,19 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_register:
+failed:
 	dev_err(&pdev->dev, "problem registering\n");
 	iounmap(timeriomem_rng_data->address);
-err_ioremap:
-	release_resource(mem);
 
 	return ret;
 }
 
 static int __devexit timeriomem_rng_remove(struct platform_device *pdev)
 {
-	struct resource *mem = dev_get_drvdata(&pdev->dev);
-
 	del_timer_sync(&timeriomem_rng_timer);
 	hwrng_unregister(&timeriomem_rng_ops);
 
 	iounmap(timeriomem_rng_data->address);
-	release_resource(mem);
 
 	return 0;
 }
-- 
cgit v0.10.2


From c076b9937f4912ffc09b30e155fe5246f002f21a Mon Sep 17 00:00:00 2001
From: Ben Nizette <bn@niasdigital.com>
Date: Wed, 3 Jun 2009 11:38:20 +0200
Subject: avr32: Fix clash in ATMEL_USART_ flags

At the moment ATMEL_USART_{RTS,CTS,CLK} have the values
1, 2 and 3 respectively.  Given these are used in bitmasks,
trying to turn on the CLK line will in fact turn on the
RTS and CTS lines as well.

Change the value of ATMEL_USART_CLK to 4.

Signed-off-by: Ben Nizette <bn@niasdigital.com>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/mach-at32ap/include/mach/board.h b/arch/avr32/mach-at32ap/include/mach/board.h
index 0b81642..ddedb47 100644
--- a/arch/avr32/mach-at32ap/include/mach/board.h
+++ b/arch/avr32/mach-at32ap/include/mach/board.h
@@ -29,7 +29,7 @@ extern struct platform_device *atmel_default_console_device;
 /* Flags for selecting USART extra pins */
 #define	ATMEL_USART_RTS		0x01
 #define	ATMEL_USART_CTS		0x02
-#define	ATMEL_USART_CLK		0x03
+#define	ATMEL_USART_CLK		0x04
 
 struct atmel_uart_data {
 	short		use_dma_tx;	/* use transmit DMA? */
-- 
cgit v0.10.2


From aefa1c6e7b76fc428770fede6e8b86587fc2ce09 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabioestevam@yahoo.com>
Date: Tue, 2 Jun 2009 17:24:16 -0700
Subject: MX35: Add basic support for MX35PDK board

Add basic support for MX35PDK board (www.freescale.com/imx35pdk).

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig
index 6f17ceb..6b318d7 100644
--- a/arch/arm/mach-mx3/Kconfig
+++ b/arch/arm/mach-mx3/Kconfig
@@ -89,4 +89,11 @@ config MACH_ARMADILLO5X0
 	  Include support for Atmark Armadillo-500 platform. This includes
 	  specific configurations for the board and its peripherals.
 
+config MACH_MX35_3DS
+	bool "Support MX35PDK platform"
+	select ARCH_MX35
+	default n
+	help
+	  Include support for MX35PDK platform. This includes specific
+	  configurations for the board and its peripherals.
 endif
diff --git a/arch/arm/mach-mx3/Makefile b/arch/arm/mach-mx3/Makefile
index 6c22da2..0322696 100644
--- a/arch/arm/mach-mx3/Makefile
+++ b/arch/arm/mach-mx3/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_MACH_MX31MOBOARD)	+= mx31moboard.o mx31moboard-devboard.o \
 obj-$(CONFIG_MACH_QONG)		+= qong.o
 obj-$(CONFIG_MACH_PCM043)	+= pcm043.o
 obj-$(CONFIG_MACH_ARMADILLO5X0) += armadillo5x0.o
+obj-$(CONFIG_MACH_MX35_3DS)	+= mx35pdk.o
diff --git a/arch/arm/mach-mx3/mx35pdk.c b/arch/arm/mach-mx3/mx35pdk.c
new file mode 100644
index 0000000..6d15374
--- /dev/null
+++ b/arch/arm/mach-mx3/mx35pdk.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Author: Fabio Estevam <fabio.estevam@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/gpio.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/mach/map.h>
+
+#include <mach/hardware.h>
+#include <mach/common.h>
+#include <mach/imx-uart.h>
+#include <mach/iomux-mx35.h>
+
+#include "devices.h"
+
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+static struct platform_device *devices[] __initdata = {
+	&mxc_fec_device,
+};
+
+static struct pad_desc mx35pdk_pads[] = {
+	/* UART1 */
+	MX35_PAD_CTS1__UART1_CTS,
+	MX35_PAD_RTS1__UART1_RTS,
+	MX35_PAD_TXD1__UART1_TXD_MUX,
+	MX35_PAD_RXD1__UART1_RXD_MUX,
+	/* FEC */
+	MX35_PAD_FEC_TX_CLK__FEC_TX_CLK,
+	MX35_PAD_FEC_RX_CLK__FEC_RX_CLK,
+	MX35_PAD_FEC_RX_DV__FEC_RX_DV,
+	MX35_PAD_FEC_COL__FEC_COL,
+	MX35_PAD_FEC_RDATA0__FEC_RDATA_0,
+	MX35_PAD_FEC_TDATA0__FEC_TDATA_0,
+	MX35_PAD_FEC_TX_EN__FEC_TX_EN,
+	MX35_PAD_FEC_MDC__FEC_MDC,
+	MX35_PAD_FEC_MDIO__FEC_MDIO,
+	MX35_PAD_FEC_TX_ERR__FEC_TX_ERR,
+	MX35_PAD_FEC_RX_ERR__FEC_RX_ERR,
+	MX35_PAD_FEC_CRS__FEC_CRS,
+	MX35_PAD_FEC_RDATA1__FEC_RDATA_1,
+	MX35_PAD_FEC_TDATA1__FEC_TDATA_1,
+	MX35_PAD_FEC_RDATA2__FEC_RDATA_2,
+	MX35_PAD_FEC_TDATA2__FEC_TDATA_2,
+	MX35_PAD_FEC_RDATA3__FEC_RDATA_3,
+	MX35_PAD_FEC_TDATA3__FEC_TDATA_3,
+};
+
+/*
+ * Board specific initialization.
+ */
+static void __init mxc_board_init(void)
+{
+	mxc_iomux_v3_setup_multiple_pads(mx35pdk_pads, ARRAY_SIZE(mx35pdk_pads));
+
+	platform_add_devices(devices, ARRAY_SIZE(devices));
+
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+}
+
+static void __init mx35pdk_timer_init(void)
+{
+	mx35_clocks_init();
+}
+
+struct sys_timer mx35pdk_timer = {
+	.init	= mx35pdk_timer_init,
+};
+
+MACHINE_START(MX35_3DS, "Freescale MX35PDK")
+	/* Maintainer: Freescale Semiconductor, Inc */
+	.phys_io	= AIPS1_BASE_ADDR,
+	.io_pg_offst	= ((AIPS1_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params    = PHYS_OFFSET + 0x100,
+	.map_io         = mx35_map_io,
+	.init_irq       = mxc_init_irq,
+	.init_machine   = mxc_board_init,
+	.timer          = &mx35pdk_timer,
+MACHINE_END
diff --git a/arch/arm/plat-mxc/include/mach/board-mx35pdk.h b/arch/arm/plat-mxc/include/mach/board-mx35pdk.h
new file mode 100644
index 0000000..1111037
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-mx35pdk.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright 2009 Freescale Semiconductor, Inc. All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_MX35PDK_H__
+#define __ASM_ARCH_MXC_BOARD_MX35PDK_H__
+
+/* mandatory for CONFIG_DEBUG_LL */
+
+#define MXC_LL_UART_PADDR	UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
+
+#endif /* __ASM_ARCH_MXC_BOARD_MX35PDK_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S
index 57d3e3f..e280978 100644
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -46,6 +46,9 @@
 #ifdef CONFIG_MACH_ARMADILLO5X0
 #include <mach/board-armadillo5x0.h>
 #endif
+#ifdef CONFIG_MACH_MX35_3DS
+#include <mach/board-mx35pdk.h>
+#endif
 		.macro	addruart,rx
 		mrc	p15, 0, \rx, c1, c0
 		tst	\rx, #1			@ MMU enabled?
-- 
cgit v0.10.2


From 6984efb692e97ce5f75f26e595685c04c2061bac Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 3 Jun 2009 19:38:58 +1000
Subject: perf_counter: powerpc: Fix event alternative code generation on
 POWER5/5+

Commit ef923214 ("perf_counter: powerpc: use u64 for event
codes internally") introduced a bug where the return value from
function find_alternative_bdecode gets put into a u64 variable
and later tested to see if it is < 0.  The effect is that we
get extra, bogus event code alternatives on POWER5 and POWER5+,
leading to error messages such as "oops compute_mmcr failed"
being printed and counters not counting properly.

This fixes it by using s64 for the return type of
find_alternative_bdecode and for the local variable that the
caller puts the value in.  It also makes the event argument a
u64 on POWER5+ for consistency.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <18982.17586.666132.90983@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index c6cdfc1..8471e3c 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -242,7 +242,7 @@ static const unsigned char bytedecode_alternatives[4][4] = {
  * event code for those that do, or -1 otherwise.  This also handles
  * alternative PCMSEL values for add events.
  */
-static int find_alternative_bdecode(unsigned int event)
+static s64 find_alternative_bdecode(u64 event)
 {
 	int pmc, altpmc, pp, j;
 
@@ -277,7 +277,7 @@ static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	int i, j, nalt = 1;
 	int nlim;
-	u64 ae;
+	s64 ae;
 
 	alt[0] = event;
 	nalt = 1;
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index d534496..1b44c5f 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -250,7 +250,7 @@ static const unsigned char bytedecode_alternatives[4][4] = {
  * PMCSEL values on other counters.  This returns the alternative
  * event code for those that do, or -1 otherwise.
  */
-static u64 find_alternative_bdecode(u64 event)
+static s64 find_alternative_bdecode(u64 event)
 {
 	int pmc, altpmc, pp, j;
 
@@ -272,7 +272,7 @@ static u64 find_alternative_bdecode(u64 event)
 static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	int i, j, nalt = 1;
-	u64 ae;
+	s64 ae;
 
 	alt[0] = event;
 	nalt = 1;
-- 
cgit v0.10.2


From dcd945e0d8a6d654e3e1de51faea9f98f1504aa5 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 3 Jun 2009 19:40:36 +1000
Subject: perf_counter: powerpc: Fix race causing "oops trying to read PMC0"
 errors

When using interrupting counters and limited (non-interrupting)
counters at the same time, it's possible that we get an
interrupt in write_mmcr0() after writing MMCR0 but before we
have set up the counters using limited PMCs.  What happens then
is that we get into perf_counter_interrupt() with
counter->hw.idx = 0 for the limited counters, leading to the
"oops trying to read PMC0" error message being printed.

This fixes the problem by making perf_counter_interrupt()
robust against counter->hw.idx being zero (the counter is just
ignored in that case) and also by changing write_mmcr0() to
write MMCR0 initially with the counter overflow interrupt
enable bits masked (set to 0).  If the MMCR0 value requested by
the caller has either of those bits set, we write MMCR0 again
with the requested value of those bits after setting up the
limited counters properly.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <18982.17684.138182.954599@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index ea54686..4cc4ac5 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -372,16 +372,28 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
 
 	/*
 	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 * To ensure we don't get a performance monitor interrupt
+	 * between writing MMCR0 and freezing/thawing the limited
+	 * counters, we first write MMCR0 with the counter overflow
+	 * interrupt enable bits turned off.
 	 */
 	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
 		     : "=&r" (pmc5), "=&r" (pmc6)
-		     : "r" (mmcr0), "i" (SPRN_MMCR0),
+		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+		       "i" (SPRN_MMCR0),
 		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
 
 	if (mmcr0 & MMCR0_FC)
 		freeze_limited_counters(cpuhw, pmc5, pmc6);
 	else
 		thaw_limited_counters(cpuhw, pmc5, pmc6);
+
+	/*
+	 * Write the full MMCR0 including the counter overflow interrupt
+	 * enable bits, if necessary.
+	 */
+	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+		mtspr(SPRN_MMCR0, mmcr0);
 }
 
 /*
@@ -1108,7 +1120,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
-		if (is_limited_pmc(counter->hw.idx))
+		if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
 			continue;
 		val = read_pmc(counter->hw.idx);
 		if ((int)val < 0) {
-- 
cgit v0.10.2


From e76afc4e7816a0a5300073098cdac93a994eb5ca Mon Sep 17 00:00:00 2001
From: Eric Lammerts <linux-arm@lists.lammerts.org>
Date: Tue, 19 May 2009 20:53:20 -0400
Subject: fix oops when using console=ttymxcN with N > 0

Signed-off-by: Eric Lammerts <eric@lammerts.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 9f460b1..5f0be40 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -1031,6 +1031,8 @@ imx_console_setup(struct console *co, char *options)
 	if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports))
 		co->index = 0;
 	sport = imx_ports[co->index];
+	if(sport == NULL)
+		return -ENODEV;
 
 	if (options)
 		uart_parse_options(options, &baud, &parity, &bits, &flow);
-- 
cgit v0.10.2


From 6b4bfb87b638a4f114dfb6f72f4ac1be88a4ebe4 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Tue, 26 May 2009 22:31:46 +0530
Subject: mx[23]: don't put clock lookups in __initdata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the __initdata annotation for the clock lookups, since they will
be needed when loading modules which use clk_get().

Tested-by: Agustín Ferrín Pozuelo <gatoguan-os@yahoo.com>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/clock_imx21.c b/arch/arm/mach-mx2/clock_imx21.c
index 999d013..e4b08ca 100644
--- a/arch/arm/mach-mx2/clock_imx21.c
+++ b/arch/arm/mach-mx2/clock_imx21.c
@@ -890,7 +890,7 @@ static struct clk clko_clk = {
 		.con_id = n, \
 		.clk = &c, \
 	},
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 /* It's unlikely that any driver wants one of them directly:
 	_REGISTER_CLOCK(NULL, "ckih", ckih_clk)
 	_REGISTER_CLOCK(NULL, "ckil", ckil_clk)
diff --git a/arch/arm/mach-mx2/clock_imx27.c b/arch/arm/mach-mx2/clock_imx27.c
index 3f7280c..2c97144 100644
--- a/arch/arm/mach-mx2/clock_imx27.c
+++ b/arch/arm/mach-mx2/clock_imx27.c
@@ -621,7 +621,7 @@ DEFINE_CLOCK1(csi_clk,     0, 0,      0, parent, &csi_clk1, &per4_clk);
 		.clk = &c, \
 	},
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk)
 	_REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk)
 	_REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk)
diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c
index 53a112d..3c1e06f 100644
--- a/arch/arm/mach-mx3/clock-imx35.c
+++ b/arch/arm/mach-mx3/clock-imx35.c
@@ -404,7 +404,7 @@ DEFINE_CLOCK(gpu2d_clk,  0, CCM_CGR3,  4, NULL, NULL);
 		.clk = &c,		\
 	},
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK(NULL, "asrc", asrc_clk)
 	_REGISTER_CLOCK(NULL, "ata", ata_clk)
 	_REGISTER_CLOCK(NULL, "audmux", audmux_clk)
diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c
index 9957a11..a68fcf9 100644
--- a/arch/arm/mach-mx3/clock.c
+++ b/arch/arm/mach-mx3/clock.c
@@ -516,7 +516,7 @@ DEFINE_CLOCK(ipg_clk,     0, NULL,          0, ipg_get_rate, NULL, &ahb_clk);
 		.clk = &c, \
 	},
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK(NULL, "emi", emi_clk)
 	_REGISTER_CLOCK(NULL, "cspi", cspi1_clk)
 	_REGISTER_CLOCK(NULL, "cspi", cspi2_clk)
-- 
cgit v0.10.2


From 2552a710f4b991136c650bf2a6d1b81f27f6273e Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Tue, 2 Jun 2009 00:18:53 -0400
Subject: ASoC: SSM2602: remove unsupported sample rates

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index 87f606c..d6af069 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -497,11 +497,9 @@ static int ssm2602_set_bias_level(struct snd_soc_codec *codec,
 	return 0;
 }
 
-#define SSM2602_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_11025 |\
-		SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_22050 |\
-		SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |\
-		SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |\
-		SNDRV_PCM_RATE_96000)
+#define SSM2602_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_32000 |\
+		SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |\
+		SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000)
 
 #define SSM2602_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\
 		SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE)
-- 
cgit v0.10.2


From 80d5bd93143439aff77fd246f5d06570b7a4641e Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Tue, 2 Jun 2009 00:18:56 -0400
Subject: ASoC: Blackfin: set the transfer size according the ac97_frame size

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-sport.c b/sound/soc/blackfin/bf5xx-sport.c
index b7953c8..469ce7f 100644
--- a/sound/soc/blackfin/bf5xx-sport.c
+++ b/sound/soc/blackfin/bf5xx-sport.c
@@ -190,7 +190,7 @@ static inline int sport_hook_rx_dummy(struct sport_device *sport)
 	desc = get_dma_next_desc_ptr(sport->dma_rx_chan);
 	/* Copy the descriptor which will be damaged to backup */
 	temp_desc = *desc;
-	desc->x_count = 0xa;
+	desc->x_count = sport->dummy_count / 2;
 	desc->y_count = 0;
 	desc->next_desc_addr = sport->dummy_rx_desc;
 	local_irq_restore(flags);
@@ -309,7 +309,7 @@ static inline int sport_hook_tx_dummy(struct sport_device *sport)
 	desc = get_dma_next_desc_ptr(sport->dma_tx_chan);
 	/* Store the descriptor which will be damaged */
 	temp_desc = *desc;
-	desc->x_count = 0xa;
+	desc->x_count = sport->dummy_count / 2;
 	desc->y_count = 0;
 	desc->next_desc_addr = sport->dummy_tx_desc;
 	local_irq_restore(flags);
-- 
cgit v0.10.2


From cf485da15a3b507c7dab42337639e4f4025d3373 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Tue, 2 Jun 2009 00:18:57 -0400
Subject: ASoC: Blackfin: document how anomaly 05000250 is handled

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index 8a935f2..b1ed423 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -31,6 +31,15 @@
 #include "bf5xx-sport.h"
 #include "bf5xx-ac97.h"
 
+/* Anomaly notes:
+ *  05000250 -	AD1980 is running in TDM mode and RFS/TFS are generated by SPORT
+ *		contrtoller. But, RFSDIV and TFSDIV are always set to 16*16-1,
+ *		while the max AC97 data size is 13*16. The DIV is always larger
+ *		than data size. AD73311 and ad2602 are not running in TDM mode.
+ *		AD1836 and AD73322 depend on external RFS/TFS only. So, this
+ *		anomaly does not affect blackfin sound drivers.
+*/
+
 static int *cmd_count;
 static int sport_num = CONFIG_SND_BF5XX_SPORT_NUM;
 
-- 
cgit v0.10.2


From f692fce0cf8625b6cc8678e802fb0e2e657b1ca6 Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Tue, 2 Jun 2009 00:18:54 -0400
Subject: ASoC: SSM2602: assign last substream to the master when shutting down

Fixes crash when shutting down.

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index d6af069..1fc4c8e 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -336,15 +336,17 @@ static int ssm2602_startup(struct snd_pcm_substream *substream,
 			master_runtime->sample_bits,
 			master_runtime->rate);
 
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_RATE,
-					     master_runtime->rate,
-					     master_runtime->rate);
-
-		snd_pcm_hw_constraint_minmax(substream->runtime,
-					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
-					     master_runtime->sample_bits,
-					     master_runtime->sample_bits);
+		if (master_runtime->rate != 0)
+			snd_pcm_hw_constraint_minmax(substream->runtime,
+						     SNDRV_PCM_HW_PARAM_RATE,
+						     master_runtime->rate,
+						     master_runtime->rate);
+
+		if (master_runtime->sample_bits != 0)
+			snd_pcm_hw_constraint_minmax(substream->runtime,
+						     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
+						     master_runtime->sample_bits,
+						     master_runtime->sample_bits);
 
 		ssm2602->slave_substream = substream;
 	} else
@@ -372,6 +374,11 @@ static void ssm2602_shutdown(struct snd_pcm_substream *substream,
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_codec *codec = socdev->card->codec;
 	struct ssm2602_priv *ssm2602 = codec->private_data;
+
+	if (ssm2602->master_substream == substream)
+		ssm2602->master_substream = ssm2602->slave_substream;
+
+	ssm2602->slave_substream = NULL;
 	/* deactivate */
 	if (!codec->active)
 		ssm2602_write(codec, SSM2602_ACTIVE, 0);
-- 
cgit v0.10.2


From 0e2595cdfd7df9f1128f7185152601ae5417483b Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Wed, 20 May 2009 08:10:57 -0500
Subject: x86: Fix UV BAU activation descriptor init

The UV tlb shootdown code has a serious initialization error.

An array of structures [32*8] is initialized as if it were [32].
The array is indexed by (cpu number on the blade)*8, so the short
initialization works for up to 4 cpus on a blade.
But above that, we provide an invalid opcode to the hub's
broadcast assist unit.

This patch changes the allocation of the array to use its symbolic
dimensions for better clarity. And initializes all 32*8 entries.

Shortened 'UV_ACTIVATION_DESCRIPTOR_SIZE' to 'UV_ADP_SIZE' per Ingo's
recommendation.

Tested on the UV simulator.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: <stable@kernel.org>
LKML-Reference: <E1M6lZR-0007kV-Aq@eag09.americas.sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 9b0e61b..bddd44f 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -37,7 +37,7 @@
 #define UV_CPUS_PER_ACT_STATUS		32
 #define UV_ACT_STATUS_MASK		0x3
 #define UV_ACT_STATUS_SIZE		2
-#define UV_ACTIVATION_DESCRIPTOR_SIZE	32
+#define UV_ADP_SIZE			32
 #define UV_DISTRIBUTION_SIZE		256
 #define UV_SW_ACK_NPENDING		8
 #define UV_NET_ENDPOINT_INTD		0x38
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index ed0c337..16f0fd4 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -715,7 +715,12 @@ uv_activation_descriptor_init(int node, int pnode)
 	struct bau_desc *adp;
 	struct bau_desc *ad2;
 
-	adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
+	/*
+	 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
+	 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade
+	 */
+	adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
+		UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
 	BUG_ON(!adp);
 
 	pa = uv_gpa(adp); /* need the real nasid*/
@@ -729,7 +734,13 @@ uv_activation_descriptor_init(int node, int pnode)
 				      (n << UV_DESC_BASE_PNODE_SHIFT | m));
 	}
 
-	for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
+	/*
+	 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+	 * cpu even though we only use the first one; one descriptor can
+	 * describe a broadcast to 256 nodes.
+	 */
+	for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
+		i++, ad2++) {
 		memset(ad2, 0, sizeof(struct bau_desc));
 		ad2->header.sw_ack_flag = 1;
 		/*
-- 
cgit v0.10.2


From 226f62fdd53d5b2c74e242aa11f6ad43d0285d3f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Jun 2009 11:23:56 +0200
Subject: perf_counter: Add a comm hook for pure fork()s

I noticed missing COMM events and found that we missed
reporting them for pure forks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/fork.c b/kernel/fork.c
index 23bf757..b7d7a9f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1412,6 +1412,12 @@ long do_fork(unsigned long clone_flags,
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
+		} else {
+			/*
+			 * vfork will do an exec which will call
+			 * set_task_comm()
+			 */
+			perf_counter_comm(p);
 		}
 
 		audit_finish_fork(p);
-- 
cgit v0.10.2


From e61078a0c88773d3465b0b9d665c5ed6b952b1cf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Jun 2009 11:24:33 +0200
Subject: perf record: Use long arg for counter period

I wrote this to test the extended period emulation, we might as
well merge it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index ec3b73a..cea5b8d 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -20,8 +20,8 @@
 #define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
+static long			default_interval = 100000;
+static long			event_count[MAX_COUNTERS];
 
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 static int			nr_cpus				= 0;
@@ -494,7 +494,7 @@ static const struct option options[] = {
 			    "append to the output file to do incremental profiling"),
 	OPT_BOOLEAN('f', "force", &force,
 			"overwrite existing data file"),
-	OPT_INTEGER('c', "count", &default_interval,
+	OPT_LONG('c', "count", &default_interval,
 		    "event period to sample"),
 	OPT_STRING('o', "output", &output_name, "file",
 		    "output file name"),
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c
index 28b34c1..b80abd9 100644
--- a/Documentation/perf_counter/util/parse-options.c
+++ b/Documentation/perf_counter/util/parse-options.c
@@ -113,6 +113,22 @@ static int get_value(struct parse_opt_ctx_t *p,
 			return opterror(opt, "expects a numerical value", flags);
 		return 0;
 
+	case OPTION_LONG:
+		if (unset) {
+			*(long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(long *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
 	default:
 		die("should not happen, someone must be hit on the forehead");
 	}
diff --git a/Documentation/perf_counter/util/parse-options.h b/Documentation/perf_counter/util/parse-options.h
index a81c7fa..a1039a6 100644
--- a/Documentation/perf_counter/util/parse-options.h
+++ b/Documentation/perf_counter/util/parse-options.h
@@ -14,6 +14,7 @@ enum parse_opt_type {
 	/* options with arguments (usually) */
 	OPTION_STRING,
 	OPTION_INTEGER,
+	OPTION_LONG,
 	OPTION_CALLBACK,
 };
 
@@ -97,6 +98,7 @@ struct option {
 #define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
 #define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
 #define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
+#define OPT_LONG(s, l, v, h)        { OPTION_LONG, (s), (l), (v), NULL, (h) }
 #define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
 #define OPT_DATE(s, l, v, h) \
 	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
-- 
cgit v0.10.2


From 8229289b607682f90b946ad2c319526303c17700 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Jun 2009 12:37:36 +0200
Subject: perf report: Fix comm sorting

Since we can (and do) change comm strings during the collection
phase, we cannot actually sort on them to build the histogram.
Therefore add an (optional) third sorting phase to collapse the
histrogram.

Comm sorting now builds the histrogram on threads and then in
the collapse phase collects all threads with the same comm.

This collapsed histogram is then reversed and sorted on events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 0f88d9e..6d359c9 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -211,9 +211,9 @@ static struct thread *thread__new(pid_t pid)
 
 	if (self != NULL) {
 		self->pid = pid;
-		self->comm = malloc(30);
+		self->comm = malloc(32);
 		if (self->comm)
-			sprintf(self->comm, ":%d", pid);
+			snprintf(self->comm, 32, ":%d", self->pid);
 		INIT_LIST_HEAD(&self->maps);
 	}
 
@@ -222,6 +222,8 @@ static struct thread *thread__new(pid_t pid)
 
 static int thread__set_comm(struct thread *self, const char *comm)
 {
+	if (self->comm)
+		free(self->comm);
 	self->comm = strdup(comm);
 	return self->comm ? 0 : -ENOMEM;
 }
@@ -303,9 +305,12 @@ struct sort_entry {
 	char *header;
 
 	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
 	size_t	(*print)(FILE *fp, struct hist_entry *);
 };
 
+/* --sort pid */
+
 static int64_t
 sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -324,9 +329,17 @@ static struct sort_entry sort_thread = {
 	.print	= sort__thread_print,
 };
 
+/* --sort comm */
+
 static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
+	return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
 	char *comm_l = left->thread->comm;
 	char *comm_r = right->thread->comm;
 
@@ -349,11 +362,14 @@ sort__comm_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_comm = {
-	.header = "          Command",
-	.cmp	= sort__comm_cmp,
-	.print	= sort__comm_print,
+	.header 	= "          Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
 };
 
+/* --sort dso */
+
 static int64_t
 sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -387,6 +403,8 @@ static struct sort_entry sort_dso = {
 	.print	= sort__dso_print,
 };
 
+/* --sort symbol */
+
 static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -428,6 +446,8 @@ static struct sort_entry sort_sym = {
 	.print	= sort__sym_print,
 };
 
+static int sort__need_collapse = 0;
+
 struct sort_dimension {
 	char *name;
 	struct sort_entry *entry;
@@ -456,6 +476,9 @@ static int sort_dimension__add(char *tok)
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
 		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
 		sd->taken = 1;
 
@@ -480,6 +503,25 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 	return cmp;
 }
 
+static int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
 static size_t
 hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 {
@@ -549,6 +591,64 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 	return 0;
 }
 
+static void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+static struct rb_root collapse_hists;
+
+static void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+static void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
 /*
  * reverse the map, sort on count.
  */
@@ -577,9 +677,14 @@ static void output__insert_entry(struct hist_entry *he)
 
 static void output__resort(void)
 {
-	struct rb_node *next = rb_first(&hist);
+	struct rb_node *next;
 	struct hist_entry *n;
 
+	if (sort__need_collapse)
+		next = rb_first(&collapse_hists);
+	else
+		next = rb_first(&hist);
+
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node);
 		next = rb_next(&n->rb_node);
@@ -859,6 +964,7 @@ broken_event:
 	if (verbose >= 2)
 		dsos__fprintf(stdout);
 
+	collapse__resort();
 	output__resort();
 	output__fprintf(stdout, total);
 
-- 
cgit v0.10.2


From a96bbc16418bc691317f265d6bf98ba941ca9c1a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Jun 2009 14:01:36 +0200
Subject: perf_counter: Fix race in counter initialization

We need the PID namespace and counter ID available when the
counter overflows and we need to generate a sample event.

[ Impact: fix kernel crash with high-frequency sampling ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
[ fixed a further crash and cleaned up the initialization a bit ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 317cef7..ab44554 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -48,6 +48,8 @@ int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
 
+static atomic64_t perf_counter_id;
+
 /*
  * Lock for (sysadmin-configurable) counter reservations:
  */
@@ -3351,14 +3353,18 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 
 	mutex_init(&counter->mmap_mutex);
 
-	counter->cpu			= cpu;
+	counter->cpu		= cpu;
 	counter->attr		= *attr;
-	counter->group_leader		= group_leader;
-	counter->pmu			= NULL;
-	counter->ctx			= ctx;
-	counter->oncpu			= -1;
+	counter->group_leader	= group_leader;
+	counter->pmu		= NULL;
+	counter->ctx		= ctx;
+	counter->oncpu		= -1;
+
+	counter->ns		= get_pid_ns(current->nsproxy->pid_ns);
+	counter->id		= atomic64_inc_return(&perf_counter_id);
+
+	counter->state		= PERF_COUNTER_STATE_INACTIVE;
 
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (attr->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
 
@@ -3402,6 +3408,8 @@ done:
 		err = PTR_ERR(pmu);
 
 	if (err) {
+		if (counter->ns)
+			put_pid_ns(counter->ns);
 		kfree(counter);
 		return ERR_PTR(err);
 	}
@@ -3419,8 +3427,6 @@ done:
 	return counter;
 }
 
-static atomic64_t perf_counter_id;
-
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -3515,9 +3521,6 @@ SYSCALL_DEFINE5(perf_counter_open,
 	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
 	mutex_unlock(&current->perf_counter_mutex);
 
-	counter->ns = get_pid_ns(current->nsproxy->pid_ns);
-	counter->id = atomic64_inc_return(&perf_counter_id);
-
 	fput_light(counter_file, fput_needed2);
 
 out_fput:
-- 
cgit v0.10.2


From c878b7d60418a45c36d99c2dc876ebb76035d404 Mon Sep 17 00:00:00 2001
From: Mark Jackson <mpfj@mimc.co.uk>
Date: Wed, 3 Jun 2009 11:16:38 +0100
Subject: Fix MIMC200 board LCD init

This patch updates the LCD init code for the MIMC200 board.

V2 fixes a .yres typo and corrects an incorrect setup value
in the call to at32_add_device_lcdc()

Without this patch, the lcd setup is wrong and won't display images
correctly (if at all !!)

Signed-off-by: Mark Jackson <mpfj@mimc.co.uk>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/boards/mimc200/setup.c b/arch/avr32/boards/mimc200/setup.c
index c1b2175..523d8e1 100644
--- a/arch/avr32/boards/mimc200/setup.c
+++ b/arch/avr32/boards/mimc200/setup.c
@@ -43,16 +43,16 @@ unsigned long at32_board_osc_rates[3] = {
 /* Initialized by bootloader-specific startup code. */
 struct tag *bootloader_tags __initdata;
 
-static struct fb_videomode __initdata tx14d14_modes[] = {
+static struct fb_videomode __initdata pt0434827_modes[] = {
 	{
-		.name		= "640x480 @ 60",
-		.refresh	= 60,
-		.xres		= 640,		.yres		= 480,
-		.pixclock	= KHZ2PICOS(11666),
+		.name		= "480x272 @ 72",
+		.refresh	= 72,
+		.xres		= 480,		.yres		= 272,
+		.pixclock	= KHZ2PICOS(10000),
 
-		.left_margin	= 80,		.right_margin	= 1,
-		.upper_margin	= 13,		.lower_margin	= 2,
-		.hsync_len	= 64,		.vsync_len	= 1,
+		.left_margin	= 1,		.right_margin	= 1,
+		.upper_margin	= 12,		.lower_margin	= 1,
+		.hsync_len	= 42,		.vsync_len	= 1,
 
 		.sync		= 0,
 		.vmode		= FB_VMODE_NONINTERLACED,
@@ -60,14 +60,14 @@ static struct fb_videomode __initdata tx14d14_modes[] = {
 };
 
 static struct fb_monspecs __initdata mimc200_default_monspecs = {
-	.manufacturer		= "HIT",
-	.monitor		= "TX14D14VM1BAB",
-	.modedb			= tx14d14_modes,
-	.modedb_len		= ARRAY_SIZE(tx14d14_modes),
+	.manufacturer		= "PT",
+	.monitor		= "PT0434827-A401",
+	.modedb			= pt0434827_modes,
+	.modedb_len		= ARRAY_SIZE(pt0434827_modes),
 	.hfmin			= 14820,
 	.hfmax			= 22230,
 	.vfmin			= 60,
-	.vfmax			= 73.3,
+	.vfmax			= 85,
 	.dclkmax		= 25200000,
 };
 
@@ -228,7 +228,8 @@ static int __init mimc200_init(void)
 	i2c_register_board_info(0, i2c_info, ARRAY_SIZE(i2c_info));
 
 	at32_add_device_lcdc(0, &mimc200_lcdc_data,
-			     fbmem_start, fbmem_size, 1);
+			     fbmem_start, fbmem_size,
+			     ATMEL_LCDC_CONTROL | ATMEL_LCDC_ALT_CONTROL | ATMEL_LCDC_ALT_24B_DATA);
 
 	return 0;
 }
-- 
cgit v0.10.2


From a2023556409cf7fec5d67a26f7fcfa57c5a4086d Mon Sep 17 00:00:00 2001
From: Tim Bird <tim.bird@am.sony.com>
Date: Tue, 2 Jun 2009 17:06:54 -0700
Subject: ring-buffer: fix bug in ring_buffer_discard_commit

There's a bug in ring_buffer_discard_commit.  The wrong
pointer is being compared in order to check if the event
can be freed from the buffer rather than discarded
(i.e. marked as PAD).

I noticed this when I was working on duration filtering.
The bug is not deadly - it just results in lots of wasted
space in the buffer.  All filtered events are left in
the buffer and marked as discarded, rather than being
removed from the buffer to make space for other events.

Unfortunately, when I fixed this bug, I got errors doing a
filtered function trace.  Multiple TIME_EXTEND
events pile up in the buffer, and trigger the
following loop overage warning in rb_iter_peek():

again:
	...
	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
		return NULL;

I'm not sure what the best way is to fix this. I don't
know if I should extend the loop threshhold, or if I should
make the test more complex (ignore TIME_EXTEND
events), or just get rid of this loop check completely.

Note that if I implement a workaround for this, then I
see another problem from rb_advance_iter().  I haven't
tracked that one down yet.

In general, it seems like the case of removing filtered
events has not been working properly, and so some assumptions
about buffer invariant conditions need to be revisited.

Here's the patch for the simple fix:

Compare correct pointer for checking if an event can be
freed rather than left as discarded in the buffer.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
LKML-Reference: <4A25BE9E.5090909@am.sony.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 16b24d4..9453023 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1708,7 +1708,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 
 	bpage = cpu_buffer->tail_page;
 
-	if (bpage == (void *)addr && rb_page_write(bpage) == old_index) {
+	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
 		/*
 		 * This is on the tail page. It is possible that
 		 * a write could come in and move the tail page
-- 
cgit v0.10.2


From edd813bffc62a980bb4fb9b1243f31c1cce78da3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Jun 2009 23:00:53 -0400
Subject: ring-buffer: try to discard unneeded timestamps

There are times that a race may happen that we add a timestamp in a
nested write. This timestamp would just contain a zero delta and serves
no purpose.

Now that we have a way to discard events, this patch will try to discard
the timestamp instead of just wasting the space in the ring buffer.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9453023..5092660 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1335,6 +1335,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
+static inline int
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
+		  struct ring_buffer_event *event)
+{
+	unsigned long new_index, old_index;
+	struct buffer_page *bpage;
+	unsigned long index;
+	unsigned long addr;
+
+	new_index = rb_event_index(event);
+	old_index = new_index + rb_event_length(event);
+	addr = (unsigned long)event;
+	addr &= PAGE_MASK;
+
+	bpage = cpu_buffer->tail_page;
+
+	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+		/*
+		 * This is on the tail page. It is possible that
+		 * a write could come in and move the tail page
+		 * and write to the next page. That is fine
+		 * because we just shorten what is on this page.
+		 */
+		index = local_cmpxchg(&bpage->write, old_index, new_index);
+		if (index == old_index)
+			return 1;
+	}
+
+	/* could not discard */
+	return 0;
+}
+
 static int
 rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  u64 *ts, u64 *delta)
@@ -1384,10 +1416,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		/* let the caller know this was the commit */
 		ret = 1;
 	} else {
-		/* Darn, this is just wasted space */
-		event->time_delta = 0;
-		event->array[0] = 0;
-		ret = 0;
+		/* Try to discard the event */
+		if (!rb_try_to_discard(cpu_buffer, event)) {
+			/* Darn, this is just wasted space */
+			event->time_delta = 0;
+			event->array[0] = 0;
+			ret = 0;
+		}
 	}
 
 	*delta = 0;
@@ -1682,10 +1717,6 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 				struct ring_buffer_event *event)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
-	unsigned long new_index, old_index;
-	struct buffer_page *bpage;
-	unsigned long index;
-	unsigned long addr;
 	int cpu;
 
 	/* The event is discarded regardless */
@@ -1701,24 +1732,8 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	cpu = smp_processor_id();
 	cpu_buffer = buffer->buffers[cpu];
 
-	new_index = rb_event_index(event);
-	old_index = new_index + rb_event_length(event);
-	addr = (unsigned long)event;
-	addr &= PAGE_MASK;
-
-	bpage = cpu_buffer->tail_page;
-
-	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
-		/*
-		 * This is on the tail page. It is possible that
-		 * a write could come in and move the tail page
-		 * and write to the next page. That is fine
-		 * because we just shorten what is on this page.
-		 */
-		index = local_cmpxchg(&bpage->write, old_index, new_index);
-		if (index == old_index)
-			goto out;
-	}
+	if (!rb_try_to_discard(cpu_buffer, event))
+		goto out;
 
 	/*
 	 * The commit is still visible by the reader, so we
-- 
cgit v0.10.2


From ea05b57cc19234d8de9887c8a32c2e58e84b56ba Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 3 Jun 2009 09:30:10 -0400
Subject: ring-buffer: discard timestamps that are at the start of the buffer

Every buffer page in the ring buffer includes its own time stamp.
When an event is recorded to the ring buffer with a delta time greater
than what can be held in the event header, a time stamp event is created.

If the the create timestamp falls over to the next buffer page, it is
redundant because the buffer page holds a full time stamp. This patch
will try to discard the time stamp when it falls to the start of the
next page.

This change also fixes a issues with disarding events. If most events are
discarded, timestamps will start to creep into the ring buffer. If we
do not discard the timestamps then they can fill up the ring buffer over
time and waste space.

This change will keep time stamps from filling up over another page. If
something is recorded in the buffer page, and the rest is filtered, then
the time stamps can only fill up to the end of the page.

[ Impact: prevent time stamps from filling ring buffer ]

Reported-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5092660..7102d7a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -370,6 +370,9 @@ static inline int test_time_stamp(u64 delta)
 /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
 #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
 
+/* Max number of timestamps that can fit on a page */
+#define RB_TIMESTAMPS_PER_PAGE	(BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
+
 int ring_buffer_print_page_header(struct trace_seq *s)
 {
 	struct buffer_data_page field;
@@ -1409,8 +1412,12 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 			event->array[0] = *delta >> TS_SHIFT;
 		} else {
 			cpu_buffer->commit_page->page->time_stamp = *ts;
-			event->time_delta = 0;
-			event->array[0] = 0;
+			/* try to discard, since we do not need this */
+			if (!rb_try_to_discard(cpu_buffer, event)) {
+				/* nope, just zero it */
+				event->time_delta = 0;
+				event->array[0] = 0;
+			}
 		}
 		cpu_buffer->write_stamp = *ts;
 		/* let the caller know this was the commit */
@@ -2268,8 +2275,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	 * Check if we are at the end of the buffer.
 	 */
 	if (iter->head >= rb_page_size(iter->head_page)) {
-		if (RB_WARN_ON(buffer,
-			       iter->head_page == cpu_buffer->commit_page))
+		/* discarded commits can make the page empty */
+		if (iter->head_page == cpu_buffer->commit_page)
 			return;
 		rb_inc_iter(iter);
 		return;
@@ -2312,12 +2319,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	/*
 	 * We repeat when a timestamp is encountered. It is possible
 	 * to get multiple timestamps from an interrupt entering just
-	 * as one timestamp is about to be written. The max times
-	 * that this can happen is the number of nested interrupts we
-	 * can have.  Nesting 10 deep of interrupts is clearly
-	 * an anomaly.
+	 * as one timestamp is about to be written, or from discarded
+	 * commits. The most that we can have is the number on a single page.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
 		return NULL;
 
 	reader = rb_get_reader_page(cpu_buffer);
@@ -2383,14 +2388,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
  again:
 	/*
-	 * We repeat when a timestamp is encountered. It is possible
-	 * to get multiple timestamps from an interrupt entering just
-	 * as one timestamp is about to be written. The max times
-	 * that this can happen is the number of nested interrupts we
-	 * can have. Nesting 10 deep of interrupts is clearly
-	 * an anomaly.
+	 * We repeat when a timestamp is encountered.
+	 * We can get multiple timestamps by nested interrupts or also
+	 * if filtering is on (discarding commits). Since discarding
+	 * commits can be frequent we can get a lot of timestamps.
+	 * But we limit them by not adding timestamps if they begin
+	 * at the start of a page.
 	 */
-	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+	if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
 		return NULL;
 
 	if (rb_per_cpu_empty(cpu_buffer))
-- 
cgit v0.10.2


From 083a63b48e4dd0a6a2d44216720076dc81ebb255 Mon Sep 17 00:00:00 2001
From: walimis <walimisdev@gmail.com>
Date: Wed, 3 Jun 2009 16:01:28 +0800
Subject: tracing/trace_stack: fix the number of entries in the header

The last entry in the stack_dump_trace is ULONG_MAX, which is not
a valid entry, but max_stack_trace.nr_entries has accounted for it.
So when printing the header, we should decrease it by one.
Before fix, print as following, for example:

	Depth    Size   Location    (53 entries)	<--- should be 52
	-----    ----   --------
  0)     3264     108   update_wall_time+0x4d5/0x9a0
  ...
 51)       80      80   syscall_call+0x7/0xb
 ^^^
   it's correct.

Signed-off-by: walimis <walimisdev@gmail.com>
LKML-Reference: <1244016090-7814-1-git-send-email-walimisdev@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1796f00..2d7aebd 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v)
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   max_stack_trace.nr_entries);
+			   max_stack_trace.nr_entries - 1);
 
 		if (!stack_tracer_enabled && !max_stack_size)
 			print_disabled(m);
-- 
cgit v0.10.2


From f11b3f4e2932bfdcfc458ab8d1ece62724ceabfc Mon Sep 17 00:00:00 2001
From: walimis <walimisdev@gmail.com>
Date: Wed, 3 Jun 2009 16:01:29 +0800
Subject: tracing/events: fix output format of kernel stack

According to "events/ftrace/kernel_stack/format", output format of
kernel stack should use "=>" instead of "<=".

The second problem is that we shouldn't skip the first entry in the stack,
although it seems to be duplicated when used in the "function" tracer,
but events also use it. If we skip the first one, we will drop the topmost
entry of the stack.

The last problem is that if the last entry is ULONG_MAX(0xffffffff), we should
drop it, otherwise it will print a NULL name line.

before fix:

      sh-1072  [000]   26.957239: sched_process_fork: parent sh:1072 child sh:1073
      sh-1072  [000]   26.957262:
 <= syscall_call
 <=
      sh-1072  [000]   26.957744: sched_switch: task sh:1072 [120] (R) ==> sh:1073 [120]
      sh-1072  [000]   26.957752:
 <= preempt_schedule
 <= wake_up_new_task
 <= do_fork
 <= sys_clone
 <= syscall_call
 <=

After fix:

      sh-1075  [000]    39.791848: sched_process_fork: parent sh:1075  child sh:1076
      sh-1075  [000]    39.791871:
 => sys_clone
 => syscall_call
      sh-1075  [000]    39.792713: sched_switch: task sh:1075 [120] (R) ==> sh:1076 [120]
      sh-1075  [000]    39.792722:
 => schedule
 => preempt_schedule
 => wake_up_new_task
 => do_fork
 => sys_clone
 => syscall_call

Signed-off-by: walimis <walimisdev@gmail.com>
LKML-Reference: <1244016090-7814-2-git-send-email-walimisdev@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 0fe3b22..64596a5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -975,16 +975,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
+	if (!trace_seq_puts(s, "\n"))
+		goto partial;
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-		if (!field->caller[i])
+		if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
 			break;
-		if (i) {
-			if (!trace_seq_puts(s, " <= "))
-				goto partial;
+		if (!trace_seq_puts(s, " => "))
+			goto partial;
 
-			if (!seq_print_ip_sym(s, field->caller[i], flags))
-				goto partial;
-		}
+		if (!seq_print_ip_sym(s, field->caller[i], flags))
+			goto partial;
 		if (!trace_seq_puts(s, "\n"))
 			goto partial;
 	}
-- 
cgit v0.10.2


From 048dc50c5e7eada19ebabbad70b7966d14283d41 Mon Sep 17 00:00:00 2001
From: walimis <walimisdev@gmail.com>
Date: Wed, 3 Jun 2009 16:01:30 +0800
Subject: tracing/events: fix output format of user stack

According to "events/ftrace/user_stack/format", fix the output of
user stack.

before fix:

  sh-1073  [000]    31.137561:  <b7f274fe> <-  <0804e33c> <-  <080835c1>

after fix:

  sh-1072  [000]    37.039329:
 =>  <b7f8a4fe>
 =>  <0804e33c>
 =>  <080835c1>

Signed-off-by: walimis <walimisdev@gmail.com>
LKML-Reference: <1244016090-7814-3-git-send-email-walimisdev@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 64596a5..8dadbbb 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -389,17 +389,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 
 		if (ip == ULONG_MAX || !ret)
 			break;
-		if (i && ret)
-			ret = trace_seq_puts(s, " <- ");
+		if (ret)
+			ret = trace_seq_puts(s, " => ");
 		if (!ip) {
 			if (ret)
 				ret = trace_seq_puts(s, "??");
+			if (ret)
+				ret = trace_seq_puts(s, "\n");
 			continue;
 		}
 		if (!ret)
 			break;
 		if (ret)
 			ret = seq_print_user_ip(s, mm, ip, sym_flags);
+		ret = trace_seq_puts(s, "\n");
 	}
 
 	if (mm)
@@ -1012,10 +1015,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!seq_print_userip_objs(field, s, flags))
+	if (!trace_seq_putc(s, '\n'))
 		goto partial;
 
-	if (!trace_seq_putc(s, '\n'))
+	if (!seq_print_userip_objs(field, s, flags))
 		goto partial;
 
 	return TRACE_TYPE_HANDLED;
-- 
cgit v0.10.2


From 56d8bd3f0b98972312cad683947ec90b21011199 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 3 Jun 2009 14:52:03 +0100
Subject: tracing: fix multiple use of __print_flags and __print_symbolic

Here is an updated patch to include the extra call to
trace_seq_init() as requested. This is vs. the latest
-tip tree and fixes the use of multiple __print_flags
and __print_symbolic in a single tracer. Also tested
to ensure its working now:

mount.gfs2-2534  [000]   235.850587: gfs2_glock_queue: 8.7 glock 1:2 dequeue PR
mount.gfs2-2534  [000]   235.850591: gfs2_demote_rq: 8.7 glock 1:0 demote EX to NL flags:DI
mount.gfs2-2534  [000]   235.850591: gfs2_glock_queue: 8.7 glock 1:0 dequeue EX
glock_workqueue-2529  [000]   235.850666: gfs2_glock_state_change: 8.7 glock 1:0 state EX => NL tgt:NL dmt:NL flags:lDpI
glock_workqueue-2529  [000]   235.850672: gfs2_glock_put: 8.7 glock 1:0 state NL => IV flags:I

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
LKML-Reference: <1244037123.29604.603.camel@localhost.localdomain>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b5478da..40ede4d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -104,6 +104,7 @@
  *	field = (typeof(field))entry;
  *
  *	p = get_cpu_var(ftrace_event_seq);
+ *	trace_seq_init(p);
  *	ret = trace_seq_printf(s, <TP_printk> "\n");
  *	put_cpu();
  *	if (!ret)
@@ -167,6 +168,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	field = (typeof(field))entry;					\
 									\
 	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
 	ret = trace_seq_printf(s, #call ": " print);			\
 	put_cpu();							\
 	if (!ret)							\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8dadbbb..8afeea4 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -223,10 +223,9 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 {
 	unsigned long mask;
 	const char *str;
+	const char *ret = p->buffer + p->len;
 	int i;
 
-	trace_seq_init(p);
-
 	for (i = 0;  flag_array[i].name && flags; i++) {
 
 		mask = flag_array[i].mask;
@@ -249,7 +248,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 
 	trace_seq_putc(p, 0);
 
-	return p->buffer;
+	return ret;
 }
 EXPORT_SYMBOL(ftrace_print_flags_seq);
 
@@ -258,8 +257,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 			 const struct trace_print_flags *symbol_array)
 {
 	int i;
-
-	trace_seq_init(p);
+	const char *ret = p->buffer + p->len;
 
 	for (i = 0;  symbol_array[i].name; i++) {
 
@@ -275,7 +273,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 		
 	trace_seq_putc(p, 0);
 
-	return p->buffer;
+	return ret;
 }
 EXPORT_SYMBOL(ftrace_print_symbols_seq);
 
-- 
cgit v0.10.2


From 563af16c30ede41eda2d614195d88e07f7c7103d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 3 Jun 2009 11:10:44 -0400
Subject: tracing: add annotation to what type of stack trace is recorded

The current method of printing out a stack trace is to add a new line
and print out the trace:

    yum-updatesd-3120  [002]   573.691303:
 => do_softirq
 => irq_exit
 => smp_apic_timer_interrupt
 => apic_timer_interrupt

This looks a bit awkward, and if we have both stack and user stack traces
running, it would be nice to have a title to tell them apart, although
it is easy to tell by the output.

This patch adds an annotation to the start of the stack traces:

            init-1     [003]   929.304979: <stack trace>
 => user_path_at
 => vfs_fstatat
 => vfs_stat
 => sys_newstat
 => system_call_fastpath

             cat-3459  [002]  1016.824040: <user stack trace>
 =>  <0000003aae6c0250>
 =>  <00007ffff4b06ae4>
 =>  <69636172742f6775>

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8afeea4..425725c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -976,7 +976,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_puts(s, "\n"))
+	if (!trace_seq_puts(s, "<stack trace>\n"))
 		goto partial;
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 		if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
@@ -1013,7 +1013,7 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_putc(s, '\n'))
+	if (!trace_seq_puts(s, "<user stack trace>\n"))
 		goto partial;
 
 	if (!seq_print_userip_objs(field, s, flags))
-- 
cgit v0.10.2


From f2521b6e4c365bd0aac61b2c346e6e9f22607e31 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 19:17:25 +0200
Subject: perf_counter tools: Clean up old kerneltop references

kerneltop has been replaced with perf top - so fix up a few
remaining references to it in display text and error messages.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index cea5b8d..fa62525 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -357,7 +357,8 @@ static void open_counters(int cpu, pid_t pid)
 
 		if (fd[nr_cpu][counter] < 0) {
 			int err = errno;
-			printf("kerneltop error: syscall returned with %d (%s)\n",
+
+			error("syscall returned with %d (%s)\n",
 					fd[nr_cpu][counter], strerror(err));
 			if (err == EPERM)
 				printf("Are you root?\n");
@@ -382,8 +383,7 @@ static void open_counters(int cpu, pid_t pid)
 		mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
 				PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
 		if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
-			printf("kerneltop error: failed to mmap with %d (%s)\n",
-					errno, strerror(errno));
+			error("failed to mmap with %d (%s)\n", errno, strerror(errno));
 			exit(-1);
 		}
 	}
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index a639352..16a6184 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -208,7 +208,7 @@ static void print_sym_table(void)
 
 	printf(
 "------------------------------------------------------------------------------\n");
-	printf( " KernelTop:%8.0f irqs/sec  kernel:%4.1f%% [",
+	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
 		events_per_sec,
 		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)));
 
@@ -281,7 +281,7 @@ static void print_sym_table(void)
 
 static void *display_thread(void *arg)
 {
-	printf("KernelTop refresh period: %d seconds\n", delay_secs);
+	printf("PerfTop refresh period: %d seconds\n", delay_secs);
 
 	while (!sleep(delay_secs))
 		print_sym_table();
@@ -564,7 +564,8 @@ static int __cmd_top(void)
 			fd[i][counter] = sys_perf_counter_open(&attr, target_pid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
 				int err = errno;
-				printf("kerneltop error: syscall returned with %d (%s)\n",
+
+				error("syscall returned with %d (%s)\n",
 					fd[i][counter], strerror(err));
 				if (err == EPERM)
 					printf("Are you root?\n");
@@ -588,11 +589,8 @@ static int __cmd_top(void)
 			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
 			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
 					PROT_READ, MAP_SHARED, fd[i][counter], 0);
-			if (mmap_array[i][counter].base == MAP_FAILED) {
-				printf("kerneltop error: failed to mmap with %d (%s)\n",
-						errno, strerror(errno));
-				exit(-1);
-			}
+			if (mmap_array[i][counter].base == MAP_FAILED)
+				die("failed to mmap with %d (%s)\n", errno, strerror(errno));
 		}
 	}
 
-- 
cgit v0.10.2


From 021e9f476511ebe23d7f45854a52dfe74c09b6ee Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 19:27:19 +0200
Subject: perf record: Refine capture printout

Print out the number of bytes captured, and the (estimated) number of
events the output file contains.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index fa62525..efa2eb4 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -67,6 +67,8 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 static long events;
 static struct timeval last_read, this_read;
 
+static __u64 bytes_written;
+
 static void mmap_read(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
@@ -114,28 +116,34 @@ static void mmap_read(struct mmap_data *md)
 		buf = &data[old & md->mask];
 		size = md->mask + 1 - (old & md->mask);
 		old += size;
+
 		while (size) {
 			int ret = write(output, buf, size);
-			if (ret < 0) {
-				perror("failed to write");
-				exit(-1);
-			}
+
+			if (ret < 0)
+				die("failed to write");
+
 			size -= ret;
 			buf += ret;
+
+			bytes_written += ret;
 		}
 	}
 
 	buf = &data[old & md->mask];
 	size = head - old;
 	old += size;
+
 	while (size) {
 		int ret = write(output, buf, size);
-		if (ret < 0) {
-			perror("failed to write");
-			exit(-1);
-		}
+
+		if (ret < 0)
+			die("failed to write");
+
 		size -= ret;
 		buf += ret;
+
+		bytes_written += ret;
 	}
 
 	md->prev = old;
@@ -467,8 +475,14 @@ static int __cmd_record(int argc, const char **argv)
 			ret = poll(event_array, nr_poll, 100);
 	}
 
-
-	fprintf(stderr, "[ perf record: Captured and wrote %ld events. ]\n", events);
+	/*
+	 * Approximate RIP event size: 24 bytes.
+	 */
+	fprintf(stderr,
+		"[ perf record: Captured and wrote %.3f MB %s (~%lld events) ]\n",
+		(double)bytes_written / 1024.0 / 1024.0,
+		output_name,
+		bytes_written / 24);
 
 	return 0;
 }
-- 
cgit v0.10.2


From e98e96fe43ae92fad0930f05fb2b298e49b9f3b5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 19:30:38 +0200
Subject: perf report: Display 100% correctly

Needs to be 6.2 not 5.2, for 100.00% to be aligned properly.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 6d359c9..e837bb9 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -529,7 +529,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 	size_t ret;
 
 	if (total_samples) {
-		ret = fprintf(fp, "    %5.2f%%",
+		ret = fprintf(fp, "   %6.2f%%",
 				(self->count * 100.0) / total_samples);
 	} else
 		ret = fprintf(fp, "%12d ", self->count);
-- 
cgit v0.10.2


From 44db76c8553c328f4ae02481d77bb3a88ca17645 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 19:36:07 +0200
Subject: perf stat: Print out all arguments

Before:

 Performance counter stats for '/home/mingo/hackbench':

After:

 Performance counter stats for '/home/mingo/hackbench 10':

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 2357a66..4fc0d80 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -228,6 +228,7 @@ static int do_perfstat(int argc, const char **argv)
 	int counter;
 	int status;
 	int pid;
+	int i;
 
 	if (!system_wide)
 		nr_cpus = 1;
@@ -243,14 +244,17 @@ static int do_perfstat(int argc, const char **argv)
 
 	if ((pid = fork()) < 0)
 		perror("failed to fork");
+
 	if (!pid) {
 		if (execvp(argv[0], (char **)argv)) {
 			perror(argv[0]);
 			exit(-1);
 		}
 	}
+
 	while (wait(&status) >= 0)
 		;
+
 	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
 	t1 = rdclock();
 
@@ -259,8 +263,12 @@ static int do_perfstat(int argc, const char **argv)
 	fflush(stdout);
 
 	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for \'%s\':\n",
-		argv[0]);
+	fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
+
+	for (i = 1; i < argc; i++)
+		fprintf(stderr, " %s", argv[i]);
+
+	fprintf(stderr, "\':\n");
 	fprintf(stderr, "\n");
 
 	for (counter = 0; counter < nr_counters; counter++)
-- 
cgit v0.10.2


From eed4dcd443da7a46131ef37c7a389b444905960e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 19:59:24 +0200
Subject: perf report: Add front-entry cache for lookups

Before:

 Performance counter stats for './perf report -i perf.data.big':

     12453988058  instructions

 Performance counter stats for './perf report -i perf.data.big':

     12379566017  instructions

0.60% reduction.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e837bb9..33b3b15 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -229,6 +229,7 @@ static int thread__set_comm(struct thread *self, const char *comm)
 }
 
 static struct rb_root threads;
+static struct thread *last_match;
 
 static struct thread *threads__findnew(pid_t pid)
 {
@@ -236,12 +237,22 @@ static struct thread *threads__findnew(pid_t pid)
 	struct rb_node *parent = NULL;
 	struct thread *th;
 
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
 	while (*p != NULL) {
 		parent = *p;
 		th = rb_entry(parent, struct thread, rb_node);
 
-		if (th->pid == pid)
+		if (th->pid == pid) {
+			last_match = th;
 			return th;
+		}
 
 		if (pid < th->pid)
 			p = &(*p)->rb_left;
@@ -253,7 +264,9 @@ static struct thread *threads__findnew(pid_t pid)
 	if (th != NULL) {
 		rb_link_node(&th->rb_node, parent, p);
 		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
 	}
+
 	return th;
 }
 
-- 
cgit v0.10.2


From 051cdc3c2d0e24443ac03aff03ee89807ec8c589 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 20:09:11 +0200
Subject: perf help: Fix bug when there's no perf-* command around

main_cmds can be empty - fix util/help.c to handle this case
without segfaulting.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c
index 397487f..6653f7d 100644
--- a/Documentation/perf_counter/util/help.c
+++ b/Documentation/perf_counter/util/help.c
@@ -298,7 +298,7 @@ static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
 
 const char *help_unknown_cmd(const char *cmd)
 {
-	int i, n, best_similarity = 0;
+	int i, n = 0, best_similarity = 0;
 	struct cmdnames main_cmds, other_cmds;
 
 	memset(&main_cmds, 0, sizeof(main_cmds));
@@ -315,20 +315,24 @@ const char *help_unknown_cmd(const char *cmd)
 	      sizeof(main_cmds.names), cmdname_compare);
 	uniq(&main_cmds);
 
-	/* This reuses cmdname->len for similarity index */
-	for (i = 0; i < main_cmds.cnt; ++i)
-		main_cmds.names[i]->len =
-			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+	if (main_cmds.cnt) {
+		/* This reuses cmdname->len for similarity index */
+		for (i = 0; i < main_cmds.cnt; ++i)
+			main_cmds.names[i]->len =
+				levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
 
-	qsort(main_cmds.names, main_cmds.cnt,
-	      sizeof(*main_cmds.names), levenshtein_compare);
+		qsort(main_cmds.names, main_cmds.cnt,
+		      sizeof(*main_cmds.names), levenshtein_compare);
+
+		best_similarity = main_cmds.names[0]->len;
+		n = 1;
+		while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+			++n;
+	}
 
-	best_similarity = main_cmds.names[0]->len;
-	n = 1;
-	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
-		++n;
 	if (autocorrect && n == 1) {
 		const char *assumed = main_cmds.names[0]->name;
+
 		main_cmds.names[0] = NULL;
 		clean_cmdnames(&main_cmds);
 		fprintf(stderr, "WARNING: You called a Git program named '%s', "
@@ -345,7 +349,7 @@ const char *help_unknown_cmd(const char *cmd)
 
 	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
 
-	if (best_similarity < 6) {
+	if (main_cmds.cnt && best_similarity < 6) {
 		fprintf(stderr, "\nDid you mean %s?\n",
 			n < 2 ? "this": "one of these");
 
-- 
cgit v0.10.2


From 095b3a6a030f7d4f24825ae93fc384b3d4b4fafa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 20:13:51 +0200
Subject: perf_counter tools: Optimize harder

Use -O6 to build the tools.

Before:

    12387507370  instructions         #    3121.653 M/sec

After:

     6244894971  instructions         #    3458.437 M/sec

Almost twice as fast!

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index eae8856..005709b 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -159,7 +159,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
-CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement
+CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
 LDFLAGS = -lpthread -lrt -lelf
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
-- 
cgit v0.10.2


From a914d4309c4cf6e7c4d0dbce4822dcad38a7cf27 Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Sun, 3 May 2009 12:57:48 -0700
Subject: [ARM] orion: add hwrng timeriomem hook to TS-78xx

Add hook so that the HW RNG source on the TS-78xx is available.

Signed-off-by: Alexander Clouter <alex@digriz.org.uk>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig
index 5b98f76..9e23852 100644
--- a/arch/arm/configs/orion5x_defconfig
+++ b/arch/arm/configs/orion5x_defconfig
@@ -903,7 +903,8 @@ CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=16
 # CONFIG_IPMI_HANDLER is not set
-# CONFIG_HW_RANDOM is not set
+CONFIG_HW_RANDOM=m
+CONFIG_HW_RANDOM_TIMERIOMEM=m
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
 # CONFIG_RAW_DRIVER is not set
diff --git a/arch/arm/mach-orion5x/ts78xx-fpga.h b/arch/arm/mach-orion5x/ts78xx-fpga.h
index 0f9cdf4..37b3d48 100644
--- a/arch/arm/mach-orion5x/ts78xx-fpga.h
+++ b/arch/arm/mach-orion5x/ts78xx-fpga.h
@@ -25,6 +25,7 @@ struct fpga_devices {
 	/* Technologic Systems */
 	struct fpga_device 	ts_rtc;
 	struct fpga_device 	ts_nand;
+	struct fpga_device 	ts_rng;
 };
 
 struct ts78xx_fpga_data {
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index 9a6b397..5041d1b 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -17,6 +17,7 @@
 #include <linux/m48t86.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
+#include <linux/timeriomem-rng.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -270,12 +271,57 @@ static void ts78xx_ts_nand_unload(void)
 }
 
 /*****************************************************************************
+ * HW RNG
+ ****************************************************************************/
+#define TS_RNG_DATA	(TS78XX_FPGA_REGS_PHYS_BASE | 0x044)
+
+static struct resource ts78xx_ts_rng_resource = {
+	.flags		= IORESOURCE_MEM,
+	.start		= TS_RNG_DATA,
+	.end		= TS_RNG_DATA + 4 - 1,
+};
+
+static struct timeriomem_rng_data ts78xx_ts_rng_data = {
+	.period		= 1000000, /* one second */
+};
+
+static struct platform_device ts78xx_ts_rng_device = {
+	.name		= "timeriomem_rng",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &ts78xx_ts_rng_data,
+	},
+	.resource	= &ts78xx_ts_rng_resource,
+	.num_resources	= 1,
+};
+
+static int ts78xx_ts_rng_load(void)
+{
+	int rc;
+
+	if (ts78xx_fpga.supports.ts_rng.init == 0) {
+		rc = platform_device_register(&ts78xx_ts_rng_device);
+		if (!rc)
+			ts78xx_fpga.supports.ts_rng.init = 1;
+	} else
+		rc = platform_device_add(&ts78xx_ts_rng_device);
+
+	return rc;
+};
+
+static void ts78xx_ts_rng_unload(void)
+{
+	platform_device_del(&ts78xx_ts_rng_device);
+}
+
+/*****************************************************************************
  * FPGA 'hotplug' support code
  ****************************************************************************/
 static void ts78xx_fpga_devices_zero_init(void)
 {
 	ts78xx_fpga.supports.ts_rtc.init = 0;
 	ts78xx_fpga.supports.ts_nand.init = 0;
+	ts78xx_fpga.supports.ts_rng.init = 0;
 }
 
 static void ts78xx_fpga_supports(void)
@@ -289,10 +335,12 @@ static void ts78xx_fpga_supports(void)
 	case TS7800_REV_5:
 		ts78xx_fpga.supports.ts_rtc.present = 1;
 		ts78xx_fpga.supports.ts_nand.present = 1;
+		ts78xx_fpga.supports.ts_rng.present = 1;
 		break;
 	default:
 		ts78xx_fpga.supports.ts_rtc.present = 0;
 		ts78xx_fpga.supports.ts_nand.present = 0;
+		ts78xx_fpga.supports.ts_rng.present = 0;
 	}
 }
 
@@ -316,6 +364,14 @@ static int ts78xx_fpga_load_devices(void)
 		}
 		ret |= tmp;
 	}
+	if (ts78xx_fpga.supports.ts_rng.present == 1) {
+		tmp = ts78xx_ts_rng_load();
+		if (tmp) {
+			printk(KERN_INFO "TS-78xx: RNG not registered\n");
+			ts78xx_fpga.supports.ts_rng.present = 0;
+		}
+		ret |= tmp;
+	}
 
 	return ret;
 }
@@ -328,6 +384,8 @@ static int ts78xx_fpga_unload_devices(void)
 		ts78xx_ts_rtc_unload();
 	if (ts78xx_fpga.supports.ts_nand.present == 1)
 		ts78xx_ts_nand_unload();
+	if (ts78xx_fpga.supports.ts_rng.present == 1)
+		ts78xx_ts_rng_unload();
 
 	return ret;
 }
-- 
cgit v0.10.2


From c499b0672f8df9379764965c5ec124751699d7c4 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 3 Apr 2009 14:41:56 +0200
Subject: mxcmmc: decrease minimum frequency to make MMC cards work

This is a temporary workaround until the MMC stack can be fixed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index b4a615c..5950102 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -724,7 +724,9 @@ static int mxcmci_probe(struct platform_device *pdev)
 		goto out_clk_put;
 	}
 
-	mmc->f_min = clk_get_rate(host->clk) >> 7;
+	mmc->f_min = clk_get_rate(host->clk) >> 16;
+	if (mmc->f_min < 400000)
+		mmc->f_min = 400000;
 	mmc->f_max = clk_get_rate(host->clk) >> 1;
 
 	/* recommended in data sheet */
-- 
cgit v0.10.2


From 18374ab76e3ec1cf1b0ca5a8d08e35cfc5d01669 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Jun 2009 14:49:21 -0300
Subject: perf_counter tools: Fix off-by-one bug in symbol__new

The end is really (start + len - 1). Noticed when synthesizing
the PLT symbols, that are small (16 bytes), and hot on the
start RIP.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20090603174921.GG7805@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index d52a1ae..35ee6de 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -19,7 +19,7 @@ static struct symbol *symbol__new(uint64_t start, uint64_t len,
 			self = ((void *)self) + priv_size;
 		}
 		self->start = start;
-		self->end   = start + len;
+		self->end   = start + len - 1;
 		memcpy(self->name, name, namelen);
 	}
 
-- 
cgit v0.10.2


From 85b843227a9b8c1a27ebd354a80c89aef067f2ca Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Wed, 15 Apr 2009 17:44:58 +0530
Subject: omap_hsmmc: Trivial fix for a typo in comment

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e62a22a..c40cb96 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -680,7 +680,7 @@ static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data)
 	host->dma_ch = -1;
 	/*
 	 * DMA Callback: run in interrupt context.
-	 * mutex_unlock will through a kernel warning if used.
+	 * mutex_unlock will throw a kernel warning if used.
 	 */
 	up(&host->sem);
 }
-- 
cgit v0.10.2


From 18489fa2ba4c170d96ffc1a41f7b9002dcb983b7 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Thu, 16 Apr 2009 22:00:36 +0200
Subject: mxcmmc : Reset the SDHC hardware if software timeout occurs.

When a software timeout occurs in polling mode hardware was left in
an indeterminate state causing subsequent operations to block.

Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 5950102..dcc9cdb 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -140,6 +140,8 @@ struct mxcmci_host {
 	struct work_struct	datawork;
 };
 
+static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
+
 static inline int mxcmci_use_dma(struct mxcmci_host *host)
 {
 	return host->do_dma;
@@ -345,8 +347,11 @@ static int mxcmci_poll_status(struct mxcmci_host *host, u32 mask)
 		stat = readl(host->base + MMC_REG_STATUS);
 		if (stat & STATUS_ERR_MASK)
 			return stat;
-		if (time_after(jiffies, timeout))
+		if (time_after(jiffies, timeout)) {
+			mxcmci_softreset(host);
+			mxcmci_set_clk_rate(host, host->clock);
 			return STATUS_TIME_OUT_READ;
+		}
 		if (stat & mask)
 			return 0;
 		cpu_relax();
-- 
cgit v0.10.2


From 656217d28480f63313a488f6973980f9fbb921a1 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Thu, 16 Apr 2009 22:00:41 +0200
Subject: mxcmmc: Fix missing return value checking in DMA setup code.

Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index dcc9cdb..f4cbe47 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -162,7 +162,7 @@ static void mxcmci_softreset(struct mxcmci_host *host)
 	writew(0xff, host->base + MMC_REG_RES_TO);
 }
 
-static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
+static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 {
 	unsigned int nob = data->blocks;
 	unsigned int blksz = data->blksz;
@@ -170,6 +170,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 #ifdef HAS_DMA
 	struct scatterlist *sg;
 	int i;
+	int ret;
 #endif
 	if (data->flags & MMC_DATA_STREAM)
 		nob = 0xffff;
@@ -185,7 +186,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 	for_each_sg(data->sg, sg, data->sg_len, i) {
 		if (sg->offset & 3 || sg->length & 3) {
 			host->do_dma = 0;
-			return;
+			return 0;
 		}
 	}
 
@@ -194,23 +195,30 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 		host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
 					     data->sg_len,  host->dma_dir);
 
-		imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize,
-				 host->res->start + MMC_REG_BUFFER_ACCESS,
-				 DMA_MODE_READ);
+		ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
+				datasize,
+				host->res->start + MMC_REG_BUFFER_ACCESS,
+				DMA_MODE_READ);
 	} else {
 		host->dma_dir = DMA_TO_DEVICE;
 		host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
 					     data->sg_len,  host->dma_dir);
 
-		imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize,
-				 host->res->start + MMC_REG_BUFFER_ACCESS,
-				 DMA_MODE_WRITE);
+		ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
+				datasize,
+				host->res->start + MMC_REG_BUFFER_ACCESS,
+				DMA_MODE_WRITE);
 	}
 
+	if (ret) {
+		dev_err(mmc_dev(host->mmc), "failed to setup DMA : %d\n", ret);
+		return ret;
+	}
 	wmb();
 
 	imx_dma_enable(host->dma);
 #endif /* HAS_DMA */
+	return 0;
 }
 
 static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
@@ -536,6 +544,7 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
 {
 	struct mxcmci_host *host = mmc_priv(mmc);
 	unsigned int cmdat = host->cmdat;
+	int error;
 
 	WARN_ON(host->req != NULL);
 
@@ -545,7 +554,12 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
 	host->do_dma = 1;
 #endif
 	if (req->data) {
-		mxcmci_setup_data(host, req->data);
+		error = mxcmci_setup_data(host, req->data);
+		if (error) {
+			req->cmd->error = error;
+			goto out;
+		}
+
 
 		cmdat |= CMD_DAT_CONT_DATA_ENABLE;
 
@@ -553,7 +567,9 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
 			cmdat |= CMD_DAT_CONT_WRITE;
 	}
 
-	if (mxcmci_start_cmd(host, req->cmd, cmdat))
+	error = mxcmci_start_cmd(host, req->cmd, cmdat);
+out:
+	if (error)
 		mxcmci_finish_request(host, req);
 }
 
-- 
cgit v0.10.2


From 703aaced2b9c9a98285f265f3444c2f89d9d4d19 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Sat, 9 May 2009 01:03:52 -0400
Subject: mvsdio: allow automatic loading when modular

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Tested-by: Martin Michlmayr <tbm@cyrius.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index c643d0f..1783043 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -882,3 +882,4 @@ module_param(nodma, int, 0);
 MODULE_AUTHOR("Maen Suleiman, Nicolas Pitre");
 MODULE_DESCRIPTION("Marvell MMC,SD,SDIO Host Controller driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mvsdio");
-- 
cgit v0.10.2


From 992697e9b342115dcf052ffa41d418cb4fe1a841 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 8 May 2009 08:52:49 -0500
Subject: sdhci-of: Add fsl,esdhc as a valid compatible to bind against

We plan to use fsl,esdhc going forward as the base compatible so update
the driver to bind against it.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 3ff4ac3..09cc597c 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -277,6 +277,7 @@ static int __devexit sdhci_of_remove(struct of_device *ofdev)
 static const struct of_device_id sdhci_of_match[] = {
 	{ .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, },
 	{ .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, },
+	{ .compatible = "fsl,esdhc", .data = &sdhci_esdhc, },
 	{ .compatible = "generic-sdhci", },
 	{},
 };
-- 
cgit v0.10.2


From e749c6f21fd7dc618f61dd178b4ee739c3cb1c31 Mon Sep 17 00:00:00 2001
From: Ben Nizette <bn@niasdigital.com>
Date: Thu, 16 Apr 2009 15:55:21 +1000
Subject: mmc/omap: Use disable_irq_nosync() from within irq handlers.

disable_irq() should wait for all running handlers to complete
before returning.  As such, if it's used to disable an interrupt
from that interrupt's handler it will deadlock.  This replaces
the dangerous instances with the _nosync() variant which doesn't
have this problem.

Signed-off-by: Ben Nizette <bn@niasdigital.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index bfa25c0..dceb5ee 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -822,7 +822,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 		del_timer(&host->cmd_abort_timer);
 		host->abort = 1;
 		OMAP_MMC_WRITE(host, IE, 0);
-		disable_irq(host->irq);
+		disable_irq_nosync(host->irq);
 		schedule_work(&host->cmd_abort_work);
 		return IRQ_HANDLED;
 	}
-- 
cgit v0.10.2


From 9ca6944cbfad11f2368cf10292e7f3eb036386c2 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 14 May 2009 21:28:05 -0400
Subject: mvsdio: ignore high speed timing requests from the core

Empirical evidences show that this is causing far more problems than it
solves when this mode is enabled in the host hardware.  Amongst those
cards that are known to be non functional when this bit is set are:

	A-Data "Speedy" 2GB SD card
	Kodak 512MB SD card
	Ativa 1GB MicroSD card
	Marvell 8688 (WIFI/Bluetooth) SDIO card

Since those cards do work on other host controllers which do honnor the
hs timing, the issue must be with this particular host hardware.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 1783043..9d3cfa9 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -620,9 +620,18 @@ static void mvsd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->bus_width == MMC_BUS_WIDTH_4)
 		ctrl_reg |= MVSD_HOST_CTRL_DATA_WIDTH_4_BITS;
 
+	/*
+	 * The HI_SPEED_EN bit is causing trouble with many (but not all)
+	 * high speed SD, SDHC and SDIO cards.  Not enabling that bit
+	 * makes all cards work.  So let's just ignore that bit for now
+	 * and revisit this issue if problems for not enabling this bit
+	 * are ever reported.
+	 */
+#if 0
 	if (ios->timing == MMC_TIMING_MMC_HS ||
 	    ios->timing == MMC_TIMING_SD_HS)
 		ctrl_reg |= MVSD_HOST_CTRL_HI_SPEED_EN;
+#endif
 
 	host->ctrl = ctrl_reg;
 	mvsd_write(MVSD_HOST_CTRL, ctrl_reg);
-- 
cgit v0.10.2


From 233f0b95ca3a0d1dcbd70bc7e519069a8e10d23e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 21:48:40 +0200
Subject: perf_counter tools: Work around warnings in older GCCs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 4.1.2 produces:

 util/parse-options.c: In function ‘get_value’:
 util/parse-options.c:36: warning: ‘arg’ may be used uninitialized in this function

 builtin-top.c: In function ‘display_thread’:
 builtin-top.c:178: warning: ‘printed’ may be used uninitialized in this function

Annotate them away by initializing these variables to 0.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 16a6184..7c907e2 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -175,7 +175,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
 
 static void print_sym_table(void)
 {
-	int printed, j;
+	int printed = 0, j;
 	int counter;
 	float events_per_sec = events/delay_secs;
 	float kevents_per_sec = (events-userspace_events)/delay_secs;
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c
index b80abd9..551b6bc 100644
--- a/Documentation/perf_counter/util/parse-options.c
+++ b/Documentation/perf_counter/util/parse-options.c
@@ -33,7 +33,7 @@ static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
 static int get_value(struct parse_opt_ctx_t *p,
 		     const struct option *opt, int flags)
 {
-	const char *s, *arg;
+	const char *s, *arg = NULL;
 	const int unset = flags & OPT_UNSET;
 
 	if (unset && p->opt)
-- 
cgit v0.10.2


From a6d297f008e124d0bb4312369191b012c10a1a4e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 26 May 2009 22:35:34 -0400
Subject: mvsdio: fix config failure with some high speed SDHC cards

Especially with Sandisk SDHC cards, the second SWITCH command was failing
with a timeout and the card was not recognized at all.  However if the
system was busy, or debugging was enabled, or a udelay(100) was inserted
before the second SWITCH command in the core code, then the timing was
so that the card started to work.

With some unusual block sizes, the data FIFO status doesn't indicate a
"empty" state right away when the data transfer is done.  Queuing
another data transfer in that condition results in a transfer timeout.

The empty FIFO bit eventually get set by itself in less than 50 usecs
when it is not set right away. So let's just poll for that bit before
configuring the controller with a new data transfer.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 9d3cfa9..b56d72f 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -64,6 +64,31 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
 	unsigned int tmout;
 	int tmout_index;
 
+	/*
+	 * Hardware weirdness.  The FIFO_EMPTY bit of the HW_STATE
+	 * register is sometimes not set before a while when some
+	 * "unusual" data block sizes are used (such as with the SWITCH
+	 * command), even despite the fact that the XFER_DONE interrupt
+	 * was raised.  And if another data transfer starts before
+	 * this bit comes to good sense (which eventually happens by
+	 * itself) then the new transfer simply fails with a timeout.
+	 */
+	if (!(mvsd_read(MVSD_HW_STATE) & (1 << 13))) {
+		unsigned long t = jiffies + HZ;
+		unsigned int hw_state,  count = 0;
+		do {
+			if (time_after(jiffies, t)) {
+				dev_warn(host->dev, "FIFO_EMPTY bit missing\n");
+				break;
+			}
+			hw_state = mvsd_read(MVSD_HW_STATE);
+			count++;
+		} while (!(hw_state & (1 << 13)));
+		dev_dbg(host->dev, "*** wait for FIFO_EMPTY bit "
+				   "(hw=0x%04x, count=%d, jiffies=%ld)\n",
+				   hw_state, count, jiffies - (t - HZ));
+	}
+
 	/* If timeout=0 then maximum timeout index is used. */
 	tmout = DIV_ROUND_UP(data->timeout_ns, host->ns_per_clk);
 	tmout += data->timeout_clks;
-- 
cgit v0.10.2


From fbf6a5fcbcc2248f1e676f7a0a7d49cd4b535d2a Mon Sep 17 00:00:00 2001
From: Dave Liu <daveliu@freescale.com>
Date: Wed, 6 May 2009 18:40:07 +0800
Subject: sdhci-of: Fix the wrong accessor to HOSTVER register

Freescale eSDHC controller has the special order for
the HOST version register. that is not same as the other's
registers. The address of HOSTVER in spec is 0xFE, and
we need use the in_be16(0xFE) to access it, not in_be16(0xFC).

Signed-off-by: Dave Liu <daveliu@freescale.com>
Acked-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index 09cc597c..128c614 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -55,7 +55,13 @@ static u32 esdhc_readl(struct sdhci_host *host, int reg)
 
 static u16 esdhc_readw(struct sdhci_host *host, int reg)
 {
-	return in_be16(host->ioaddr + (reg ^ 0x2));
+	u16 ret;
+
+	if (unlikely(reg == SDHCI_HOST_VERSION))
+		ret = in_be16(host->ioaddr + reg);
+	else
+		ret = in_be16(host->ioaddr + (reg ^ 0x2));
+	return ret;
 }
 
 static u8 esdhc_readb(struct sdhci_host *host, int reg)
-- 
cgit v0.10.2


From 25a52393270ca48c7d0848672ad4423313033c3d Mon Sep 17 00:00:00 2001
From: Joachim Fenkes <fenkes@de.ibm.com>
Date: Wed, 3 Jun 2009 13:25:42 -0700
Subject: IB/ehca: Remove superfluous bitmasks from QP control block

All the fields in the control block are nicely right-aligned, so no
masking is necessary.

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
index 1798e64..689c357 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block {
 #define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM( 7,  7)
 #define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM( 8,  8)
 #define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM( 9,  9)
-#define MQPCB_QP_STATE                          EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11, 11)
 #define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12, 12)
 #define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13, 13)
@@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block {
 #define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18, 18)
 #define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19, 19)
 #define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20, 20)
-#define MQPCB_PATH_MTU                          EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MAX_STATIC_RATE                   EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22, 22)
-#define MQPCB_DLID                              EHCA_BMASK_IBM(16, 31)
 #define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23, 23)
-#define MQPCB_RNR_RETRY_COUNT                   EHCA_BMASK_IBM(29, 31)
 #define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24, 24)
-#define MQPCB_SOURCE_PATH_BITS                  EHCA_BMASK_IBM(25, 31)
 #define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25, 25)
-#define MQPCB_TRAFFIC_CLASS                     EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26, 26)
-#define MQPCB_HOP_LIMIT                         EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27, 27)
-#define MQPCB_SOURCE_GID_IDX                    EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28, 28)
-#define MQPCB_FLOW_LABEL                        EHCA_BMASK_IBM(12, 31)
 #define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30, 30)
 #define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31, 31)
-#define MQPCB_SERVICE_LEVEL_AL                  EHCA_BMASK_IBM(28, 31)
 #define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32, 32)
-#define MQPCB_SEND_GRH_FLAG_AL                  EHCA_BMASK_IBM(31, 31)
 #define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33, 33)
-#define MQPCB_RETRY_COUNT_AL                    EHCA_BMASK_IBM(29, 31)
 #define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34, 34)
-#define MQPCB_TIMEOUT_AL                        EHCA_BMASK_IBM(27, 31)
 #define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MAX_STATIC_RATE_AL                EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36, 36)
-#define MQPCB_DLID_AL                           EHCA_BMASK_IBM(16, 31)
 #define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37, 37)
-#define MQPCB_RNR_RETRY_COUNT_AL                EHCA_BMASK_IBM(29, 31)
 #define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38, 38)
-#define MQPCB_SOURCE_PATH_BITS_AL               EHCA_BMASK_IBM(25, 31)
 #define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39, 39)
-#define MQPCB_TRAFFIC_CLASS_AL                  EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40, 40)
-#define MQPCB_HOP_LIMIT_AL                      EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41, 41)
-#define MQPCB_SOURCE_GID_IDX_AL                 EHCA_BMASK_IBM(24, 31)
 #define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42, 42)
-#define MQPCB_FLOW_LABEL_AL                     EHCA_BMASK_IBM(12, 31)
 #define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44, 44)
 #define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MAX_NR_OUTST_SEND_WR              EHCA_BMASK_IBM(16, 31)
 #define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MAX_NR_OUTST_RECV_WR              EHCA_BMASK_IBM(16, 31)
 #define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47, 47)
-#define MQPCB_DISABLE_ETE_CREDIT_CHECK          EHCA_BMASK_IBM(31, 31)
-#define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM( 8, 31)
 #define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48, 48)
-#define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31, 31)
 #define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49, 49)
-#define MQPCB_CURR_SRQ_LIMIT                    EHCA_BMASK_IBM(16, 31)
 #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50, 50)
 #define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51, 51)
 
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index ead4e71..0338f1f 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1962,19 +1962,13 @@ int ehca_query_qp(struct ib_qp *qp,
 	qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
 	qp_attr->dest_qp_num = qpcb->dest_qp_nr;
 
-	qp_attr->pkey_index =
-		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
-
-	qp_attr->port_num =
-		EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
-
+	qp_attr->pkey_index = qpcb->prim_p_key_idx;
+	qp_attr->port_num = qpcb->prim_phys_port;
 	qp_attr->timeout = qpcb->timeout;
 	qp_attr->retry_cnt = qpcb->retry_count;
 	qp_attr->rnr_retry = qpcb->rnr_retry_count;
 
-	qp_attr->alt_pkey_index =
-		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
-
+	qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
 	qp_attr->alt_port_num = qpcb->alt_phys_port;
 	qp_attr->alt_timeout = qpcb->timeout_al;
 
@@ -2061,8 +2055,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		update_mask |=
 			EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
 			| EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
-		mqpcb->curr_srq_limit =
-			EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
+		mqpcb->curr_srq_limit = attr->srq_limit;
 		mqpcb->qp_aff_asyn_ev_log_reg =
 			EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
 	}
@@ -2125,8 +2118,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
 
 	srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
 	srq_attr->max_sge = 3;
-	srq_attr->srq_limit = EHCA_BMASK_GET(
-		MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
+	srq_attr->srq_limit = qpcb->curr_srq_limit;
 
 	if (ehca_debug_level >= 2)
 		ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-- 
cgit v0.10.2


From 128f048f0f0d2a477ad2555e7acd2ad15a1b6061 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 22:19:36 +0200
Subject: perf_counter: Fix throttling lock-up

Throttling logic is broken and we can lock up with too small
hw sampling intervals.

Make the throttling code more robust: disable counters even
if we already disabled them.

( Also clean up whitespace damage i noticed while reading
  various pieces of code related to throttling. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 12cc05e..8f53f3a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -91,7 +91,7 @@ static u64 intel_pmu_raw_event(u64 event)
 #define CORE_EVNTSEL_INV_MASK		0x00800000ULL
 #define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
 
-#define CORE_EVNTSEL_MASK 		\
+#define CORE_EVNTSEL_MASK		\
 	(CORE_EVNTSEL_EVENT_MASK |	\
 	 CORE_EVNTSEL_UNIT_MASK  |	\
 	 CORE_EVNTSEL_EDGE_MASK  |	\
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ab44554..0bb03f1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2822,11 +2822,20 @@ int perf_counter_overflow(struct perf_counter *counter,
 
 	if (!throttle) {
 		counter->hw.interrupts++;
-	} else if (counter->hw.interrupts != MAX_INTERRUPTS) {
-		counter->hw.interrupts++;
-		if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
-			counter->hw.interrupts = MAX_INTERRUPTS;
-			perf_log_throttle(counter, 0);
+	} else {
+		if (counter->hw.interrupts != MAX_INTERRUPTS) {
+			counter->hw.interrupts++;
+			if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
+				counter->hw.interrupts = MAX_INTERRUPTS;
+				perf_log_throttle(counter, 0);
+				ret = 1;
+			}
+		} else {
+			/*
+			 * Keep re-disabling counters even though on the previous
+			 * pass we disabled it - just in case we raced with a
+			 * sched-in and the counter got enabled again:
+			 */
 			ret = 1;
 		}
 	}
-- 
cgit v0.10.2


From d80d338d2fb611b65830db7ea56680624776030f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 23:14:49 +0200
Subject: perf report: Clean up event processing

- Split out event processig into process_events() helper.

- Untangle the cwd parameters - it's constant so can be a static.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 33b3b15..333d312 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -147,7 +147,11 @@ static int load_kernel(void)
 	return err;
 }
 
-static int strcommon(const char *pathname, const char *cwd, int cwdlen)
+static char __cwd[PATH_MAX];
+static char *cwd = __cwd;
+static int cwdlen;
+
+static int strcommon(const char *pathname)
 {
 	int n = 0;
 
@@ -165,7 +169,7 @@ struct map {
 	struct dso	 *dso;
 };
 
-static struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen)
+static struct map *map__new(struct mmap_event *event)
 {
 	struct map *self = malloc(sizeof(*self));
 
@@ -174,7 +178,8 @@ static struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen)
 		char newfilename[PATH_MAX];
 
 		if (cwd) {
-			int n = strcommon(filename, cwd, cwdlen);
+			int n = strcommon(filename);
+
 			if (n == cwdlen) {
 				snprintf(newfilename, sizeof(newfilename),
 					 ".%s", filename + n);
@@ -752,85 +757,11 @@ static void register_idle_thread(void)
 	}
 }
 
+static unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
 
-static int __cmd_report(void)
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat stat;
-	char *buf;
-	event_t *event;
-	int ret, rc = EXIT_FAILURE;
-	uint32_t size;
-	unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
-	char cwd[PATH_MAX], *cwdp = cwd;
-	int cwdlen;
-
-	register_idle_thread();
-
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
-	}
-
-	ret = fstat(input, &stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
-	}
-
-	if (!full_paths) {
-		if (getcwd(cwd, sizeof(cwd)) == NULL) {
-			perror("failed to get the current directory");
-			return EXIT_FAILURE;
-		}
-		cwdlen = strlen(cwd);
-	} else {
-		cwdp = NULL;
-		cwdlen = 0;
-	}
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int ret;
-
-		ret = munmap(buf, page_size * mmap_window);
-		assert(ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-	if (!size)
-		goto broken_event;
-
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
 		char level;
 		int show = 0;
@@ -851,7 +782,7 @@ more:
 		if (thread == NULL) {
 			fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
-			goto broken_event;
+			return -1;
 		}
 
 		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
@@ -895,14 +826,14 @@ more:
 			if (hist_entry__add(thread, map, dso, sym, ip, level)) {
 				fprintf(stderr,
 		"problem incrementing symbol count, skipping event\n");
-				goto broken_event;
+				return -1;
 			}
 		}
 		total++;
 	} else switch (event->header.type) {
 	case PERF_EVENT_MMAP: {
 		struct thread *thread = threads__findnew(event->mmap.pid);
-		struct map *map = map__new(&event->mmap, cwdp, cwdlen);
+		struct map *map = map__new(&event->mmap);
 
 		dprintf("%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
 			(void *)(offset + head),
@@ -915,7 +846,7 @@ more:
 		if (thread == NULL || map == NULL) {
 			if (verbose)
 				fprintf(stderr, "problem processing PERF_EVENT_MMAP, skipping event.\n");
-			goto broken_event;
+			return -1;
 		}
 		thread__insert_map(thread, map);
 		total_mmap++;
@@ -932,13 +863,93 @@ more:
 		if (thread == NULL ||
 		    thread__set_comm(thread, event->comm.comm)) {
 			fprintf(stderr, "problem processing PERF_EVENT_COMM, skipping event.\n");
-			goto broken_event;
+			return -1;
 		}
 		total_comm++;
 		break;
 	}
-	default: {
-broken_event:
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __cmd_report(void)
+{
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	char *buf;
+	event_t *event;
+	int ret, rc = EXIT_FAILURE;
+	uint32_t size;
+
+	register_idle_thread();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kernel() < 0) {
+		perror("failed to load kernel symbols");
+		return EXIT_FAILURE;
+	}
+
+	if (!full_paths) {
+		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
+			perror("failed to get the current directory");
+			return EXIT_FAILURE;
+		}
+		cwdlen = strlen(cwd);
+	} else {
+		cwd = NULL;
+		cwdlen = 0;
+	}
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	if (!size || process_event(event, offset, head) < 0) {
+
 		dprintf("%p [%p]: skipping unknown header type: %d\n",
 			(void *)(offset + head),
 			(void *)(long)(event->header.size),
@@ -956,7 +967,6 @@ broken_event:
 
 		size = 8;
 	}
-	}
 
 	head += size;
 
-- 
cgit v0.10.2


From 75051724f78677532618dd164a515baf106990e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 23:14:49 +0200
Subject: perf report: Split out event processing helpers

- Introduce per event helper functions

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 333d312..82b6252 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -50,6 +50,7 @@ struct ip_event {
 	__u64 ip;
 	__u32 pid, tid;
 };
+
 struct mmap_event {
 	struct perf_event_header header;
 	__u32 pid, tid;
@@ -58,9 +59,10 @@ struct mmap_event {
 	__u64 pgoff;
 	char filename[PATH_MAX];
 };
+
 struct comm_event {
 	struct perf_event_header header;
-	__u32 pid,tid;
+	__u32 pid, tid;
 	char comm[16];
 };
 
@@ -760,114 +762,137 @@ static void register_idle_thread(void)
 static unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
 
 static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
+process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-		char level;
-		int show = 0;
-		struct dso *dso = NULL;
-		struct thread *thread = threads__findnew(event->ip.pid);
-		uint64_t ip = event->ip.ip;
-		struct map *map = NULL;
-
-		dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.misc,
-			event->ip.pid,
-			(void *)(long)ip);
-
-		dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
-		if (thread == NULL) {
-			fprintf(stderr, "problem processing %d event, skipping it.\n",
-				event->header.type);
-			return -1;
-		}
-
-		if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
-			show = SHOW_KERNEL;
-			level = 'k';
+	char level;
+	int show = 0;
+	struct dso *dso = NULL;
+	struct thread *thread = threads__findnew(event->ip.pid);
+	uint64_t ip = event->ip.ip;
+	struct map *map = NULL;
+
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->header.misc,
+		event->ip.pid,
+		(void *)(long)ip);
+
+	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (thread == NULL) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
 
-			dso = kernel_dso;
+	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+		show = SHOW_KERNEL;
+		level = 'k';
 
-			dprintf(" ...... dso: %s\n", dso->name);
+		dso = kernel_dso;
 
-		} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+		dprintf(" ...... dso: %s\n", dso->name);
 
-			show = SHOW_USER;
-			level = '.';
+	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
 
-			map = thread__find_map(thread, ip);
-			if (map != NULL) {
-				dso = map->dso;
-				ip -= map->start + map->pgoff;
-			} else {
-				/*
-				 * If this is outside of all known maps,
-				 * and is a negative address, try to look it
-				 * up in the kernel dso, as it might be a
-				 * vsyscall (which executes in user-mode):
-				 */
-				if ((long long)ip < 0)
-					dso = kernel_dso;
-			}
-			dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+		show = SHOW_USER;
+		level = '.';
 
+		map = thread__find_map(thread, ip);
+		if (map != NULL) {
+			dso = map->dso;
+			ip -= map->start + map->pgoff;
 		} else {
-			show = SHOW_HV;
-			level = 'H';
-			dprintf(" ...... dso: [hypervisor]\n");
+			/*
+			 * If this is outside of all known maps,
+			 * and is a negative address, try to look it
+			 * up in the kernel dso, as it might be a
+			 * vsyscall (which executes in user-mode):
+			 */
+			if ((long long)ip < 0)
+				dso = kernel_dso;
 		}
+		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
 
-		if (show & show_mask) {
-			struct symbol *sym = dso__find_symbol(dso, ip);
+	} else {
+		show = SHOW_HV;
+		level = 'H';
+		dprintf(" ...... dso: [hypervisor]\n");
+	}
 
-			if (hist_entry__add(thread, map, dso, sym, ip, level)) {
-				fprintf(stderr,
-		"problem incrementing symbol count, skipping event\n");
-				return -1;
-			}
-		}
-		total++;
-	} else switch (event->header.type) {
-	case PERF_EVENT_MMAP: {
-		struct thread *thread = threads__findnew(event->mmap.pid);
-		struct map *map = map__new(&event->mmap);
+	if (show & show_mask) {
+		struct symbol *sym = dso__find_symbol(dso, ip);
 
-		dprintf("%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			(void *)(long)event->mmap.start,
-			(void *)(long)event->mmap.len,
-			(void *)(long)event->mmap.pgoff,
-			event->mmap.filename);
-
-		if (thread == NULL || map == NULL) {
-			if (verbose)
-				fprintf(stderr, "problem processing PERF_EVENT_MMAP, skipping event.\n");
+		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+			fprintf(stderr,
+		"problem incrementing symbol count, skipping event\n");
 			return -1;
 		}
-		thread__insert_map(thread, map);
-		total_mmap++;
-		break;
 	}
-	case PERF_EVENT_COMM: {
-		struct thread *thread = threads__findnew(event->comm.pid);
+	total++;
 
-		dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->comm.comm, event->comm.pid);
+	return 0;
+}
 
-		if (thread == NULL ||
-		    thread__set_comm(thread, event->comm.comm)) {
-			fprintf(stderr, "problem processing PERF_EVENT_COMM, skipping event.\n");
-			return -1;
-		}
-		total_comm++;
-		break;
+static int
+process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->mmap.pid);
+	struct map *map = map__new(&event->mmap);
+
+	dprintf("%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		(void *)(long)event->mmap.start,
+		(void *)(long)event->mmap.len,
+		(void *)(long)event->mmap.pgoff,
+		event->mmap.filename);
+
+	if (thread == NULL || map == NULL) {
+		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		return -1;
+	}
+
+	thread__insert_map(thread, map);
+	total_mmap++;
+
+	return 0;
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->comm.pid);
+
+	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->comm.comm, event->comm.pid);
+
+	if (thread == NULL ||
+	    thread__set_comm(thread, event->comm.comm)) {
+		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		return -1;
 	}
+	total_comm++;
+
+	return 0;
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
+		return process_overflow_event(event, offset, head);
+
+	switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		return process_mmap_event(event, offset, head);
+
+	case PERF_EVENT_COMM:
+		return process_comm_event(event, offset, head);
+
 	default:
 		return -1;
 	}
@@ -877,13 +902,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 static int __cmd_report(void)
 {
+	int ret, rc = EXIT_FAILURE;
 	unsigned long offset = 0;
 	unsigned long head = 0;
 	struct stat stat;
-	char *buf;
 	event_t *event;
-	int ret, rc = EXIT_FAILURE;
 	uint32_t size;
+	char *buf;
 
 	register_idle_thread();
 
-- 
cgit v0.10.2


From d11444dfa78cdd887d8dfd2fab3883132aff2c2d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 3 Jun 2009 23:29:14 +0200
Subject: perf report: Handle all known event types

We have munmap, throttle/unthrottle and period events as well,
process them - otherwise they are considered broke events and
we mis-parse the next few events.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 82b6252..6003cc3 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -893,6 +893,15 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	case PERF_EVENT_COMM:
 		return process_comm_event(event, offset, head);
 
+	/*
+	 * We dont process them right now but they are fine:
+	 */
+	case PERF_EVENT_MUNMAP:
+	case PERF_EVENT_PERIOD:
+	case PERF_EVENT_THROTTLE:
+	case PERF_EVENT_UNTHROTTLE:
+		return 0;
+
 	default:
 		return -1;
 	}
-- 
cgit v0.10.2


From 872c78202c58d26596e25743791ee81a7d24abad Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 3 Jun 2009 20:43:29 +0100
Subject: ALSA: Fix double locking of card list in snd_card_register()

The introduction of snd_card_set_id() added a lock on the card list
to the old choose_default_id() function when using it to implement
the new API call. This lock is needed to allow us to walk the list
and check to see if our new name is a duplicate. Unfortunately this
causes a lockup when called from snd_card_register() (in cases
where no ID is supplied for the card) since the card list is already
locked there.

Fix this fairly hideously by factoring out the implementation and
using a flag to indicate if the lock should be held. A better fix
would probably be to refactor snd_card_register() to move the
_set_id() outside the locking region but I can't immediately see
anything I can convince myself is safe.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/init.c b/sound/core/init.c
index 6557dd8..a578d05 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -476,15 +476,8 @@ int snd_card_free(struct snd_card *card)
 
 EXPORT_SYMBOL(snd_card_free);
 
-/**
- *  snd_card_set_id - set card identification name
- *  @card: soundcard structure
- *  @nid: new identification string
- *
- *  This function sets the card identification and checks for name
- *  collisions.
- */
-void snd_card_set_id(struct snd_card *card, const char *nid)
+static void snd_card_set_id_internal(struct snd_card *card, const char *nid,
+				     int do_locking)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
 	const char *spos, *src;
@@ -529,14 +522,16 @@ void snd_card_set_id(struct snd_card *card, const char *nid)
       		}
 	      	if (!snd_info_check_reserved_words(id))
       			goto __change;
-		mutex_lock(&snd_card_mutex);
+		if (do_locking)
+			mutex_lock(&snd_card_mutex);
 		for (i = 0; i < snd_ecards_limit; i++) {
 			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id)) {
 				mutex_unlock(&snd_card_mutex);
 				goto __change;
 			}
 		}
-		mutex_unlock(&snd_card_mutex);
+		if (do_locking)
+			mutex_unlock(&snd_card_mutex);
 		break;
 
 	      __change:
@@ -561,6 +556,18 @@ void snd_card_set_id(struct snd_card *card, const char *nid)
 	}
 }
 
+/**
+ *  snd_card_set_id - set card identification name
+ *  @card: soundcard structure
+ *  @nid: new identification string
+ *
+ *  This function sets the card identification and checks for name
+ *  collisions.
+ */
+void snd_card_set_id(struct snd_card *card, const char *nid)
+{
+	snd_card_set_id_internal(card, nid, 1);
+}
 EXPORT_SYMBOL(snd_card_set_id);
 
 #ifndef CONFIG_SYSFS_DEPRECATED
@@ -657,7 +664,7 @@ int snd_card_register(struct snd_card *card)
 		return 0;
 	}
 	if (card->id[0] == '\0')
-		snd_card_set_id(card, NULL);
+		snd_card_set_id_internal(card, NULL, 0);
 	snd_cards[card->number] = card;
 	mutex_unlock(&snd_card_mutex);
 	init_info_for_card(card);
-- 
cgit v0.10.2


From 01ca79f1411eae2a45352709c838b946b1af9fbd Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:52 +0200
Subject: x86, mce: add machine check exception count in /proc/interrupts

Useful for debugging, but it's also good general policy
to have a counter for all special interrupts there. This makes it easier
to diagnose where a CPU is spending its time.

[ Impact: feature, debugging tool ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ac6e030..1156dae 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -89,6 +89,7 @@ struct mce_log {
 extern int mce_disabled;
 
 #include <asm/atomic.h>
+#include <linux/percpu.h>
 
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
@@ -123,6 +124,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 
 int mce_available(struct cpuinfo_x86 *c);
 
+DECLARE_PER_CPU(unsigned, mce_exception_count);
+
 void mce_log_therm_throt_event(__u64 status);
 
 extern atomic_t mce_entry;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1d0aa9c..287268d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -57,6 +57,8 @@ int				mce_disabled;
 
 atomic_t mce_entry;
 
+DEFINE_PER_CPU(unsigned, mce_exception_count);
+
 /*
  * Tolerant levels:
  *   0: always panic on uncorrected errors, log corrected errors
@@ -359,6 +361,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	atomic_inc(&mce_entry);
 
+	__get_cpu_var(mce_exception_count)++;
+
 	if (notify_die(DIE_NMI, "machine check", regs, error_code,
 			   18, SIGKILL) == NOTIFY_STOP)
 		goto out;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index a05660b..05fc635 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,7 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/mce.h>
 
 atomic_t irq_err_count;
 
@@ -94,6 +95,12 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "  Threshold APIC interrupts\n");
 # endif
 #endif
+#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+	seq_printf(p, "%*s: ", prec, "MCE");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+	seq_printf(p, "  Machine check exceptions\n");
+#endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
 	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
@@ -161,6 +168,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 {
 	u64 sum = irq_stats(cpu)->__nmi_count;
 
+#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+	sum += per_cpu(mce_exception_count, cpu);
+#endif
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
-- 
cgit v0.10.2


From ca84f69697da0f004135e45b63ca560b6bd3554e Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:57 +0200
Subject: x86, mce: add MCE poll count to /proc/interrupts

Keep a count of the machine check polls (or CMCI events) in
/proc/interrupts.

Andi needs this for debugging, but it's also useful in general
to see what's going in by the kernel.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 1156dae..63abf3b 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -125,6 +125,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 int mce_available(struct cpuinfo_x86 *c);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
+DECLARE_PER_CPU(unsigned, mce_poll_count);
 
 void mce_log_therm_throt_event(__u64 status);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 287268d..784f6ae 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -264,6 +264,8 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 	}
 }
 
+DEFINE_PER_CPU(unsigned, mce_poll_count);
+
 /*
  * Poll for corrected events or events that happened before reset.
  * Those are just logged through /dev/mcelog.
@@ -275,6 +277,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 	struct mce m;
 	int i;
 
+	__get_cpu_var(mce_poll_count)++;
+
 	mce_setup(&m);
 
 	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 05fc635..eff46b5 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -100,6 +100,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
 	seq_printf(p, "  Machine check exceptions\n");
+	seq_printf(p, "%*s: ", prec, "MCP");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+	seq_printf(p, "  Machine check polls\n");
 #endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
-- 
cgit v0.10.2


From f6fb0ac0869500323c78fa21992fe1933af61e91 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:55 +0200
Subject: x86, mce: store record length into memory struct mce anchor

This makes it easier for tools who want to extract the mcelog out of
crash images or memory dumps to adapt to changing struct mce size.
The length field replaces padding, so it's fully compatible.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 63abf3b..0a61946 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -59,7 +59,7 @@ struct mce_log {
 	unsigned len;	    /* = MCE_LOG_LEN */
 	unsigned next;
 	unsigned flags;
-	unsigned pad0;
+	unsigned recordlen;	/* length of struct mce */
 	struct mce entry[MCE_LOG_LEN];
 };
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 784f6ae..3db047e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -108,8 +108,9 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);
  */
 
 static struct mce_log mcelog = {
-	MCE_LOG_SIGNATURE,
-	MCE_LOG_LEN,
+	.signature	= MCE_LOG_SIGNATURE,
+	.len		= MCE_LOG_LEN,
+	.recordlen	= sizeof(struct mce),
 };
 
 void mce_log(struct mce *mce)
-- 
cgit v0.10.2


From d620c67fb92aa11736112f9a03e31d8e3079c57a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:56 +0200
Subject: x86, mce: support more than 256 CPUs in struct mce

The old struct mce had a limitation to 256 CPUs. But x86 Linux supports
more than that now with x2apic. Add a new field extcpu to report the
extended number.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0a61946..b4a04b6 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,9 @@ struct mce {
 	__u64 res2;	/* dito. */
 	__u8  cs;		/* code segment */
 	__u8  bank;	/* machine check bank */
-	__u8  cpu;	/* cpu that raised the error */
+	__u8  cpu;	/* cpu number; obsolete; use extcpu now */
 	__u8  finished;   /* entry is valid */
-	__u32 pad;
+	__u32 extcpu;	/* linux cpu number that detected the error */
 };
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7b3a542..7d858fb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -23,14 +23,14 @@
 /* Update fake mce registers on current CPU. */
 static void inject_mce(struct mce *m)
 {
-	struct mce *i = &per_cpu(injectm, m->cpu);
+	struct mce *i = &per_cpu(injectm, m->extcpu);
 
 	/* Make sure noone reads partially written injectm */
 	i->finished = 0;
 	mb();
 	m->finished = 0;
 	/* First set the fields after finished */
-	i->cpu = m->cpu;
+	i->extcpu = m->extcpu;
 	mb();
 	/* Now write record in order, finished last (except above) */
 	memcpy(i, m, sizeof(struct mce));
@@ -49,7 +49,7 @@ static void raise_mce(unsigned long data)
 {
 	struct delayed_mce *dm = (struct delayed_mce *)data;
 	struct mce *m = &dm->m;
-	int cpu = m->cpu;
+	int cpu = m->extcpu;
 
 	inject_mce(m);
 	if (m->status & MCI_STATUS_UC) {
@@ -93,7 +93,7 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 	if (copy_from_user(&m, ubuf, usize))
 		return -EFAULT;
 
-	if (m.cpu >= num_possible_cpus() || !cpu_online(m.cpu))
+	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
 		return -EINVAL;
 
 	dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
@@ -108,7 +108,7 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 	memcpy(&dm->m, &m, sizeof(struct mce));
 	setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
 	dm->timer.expires = jiffies + 2;
-	add_timer_on(&dm->timer, m.cpu);
+	add_timer_on(&dm->timer, m.extcpu);
 	return usize;
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3db047e..2c4dd6c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -94,7 +94,7 @@ static inline int skip_bank_init(int i)
 void mce_setup(struct mce *m)
 {
 	memset(m, 0, sizeof(struct mce));
-	m->cpu = smp_processor_id();
+	m->cpu = m->extcpu = smp_processor_id();
 	rdtscll(m->tsc);
 }
 
@@ -158,7 +158,7 @@ static void print_mce(struct mce *m)
 	       KERN_EMERG "HARDWARE ERROR\n"
 	       KERN_EMERG
 	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
-	       m->cpu, m->mcgstatus, m->bank, m->status);
+	       m->extcpu, m->mcgstatus, m->bank, m->status);
 	if (m->ip) {
 		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
 		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
-- 
cgit v0.10.2


From 8ee08347c1e8b5680b3b3ce081e42e97bcaa1abe Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:56 +0200
Subject: x86, mce: extend struct mce user interface with more information.

Experience has shown that struct mce which is used to pass an machine
check to the user space daemon currently a few limitations.  Also some
data which is useful to print at panic level is also missing.

This patch addresses most of them. The same information is also
printed out together with mce panic.

struct mce can be painlessly extended in a compatible way, the mcelog
user space code just ignores additional fields with a warning.

- It doesn't provide a wall time timestamp. There have been a few
  complaints about that. Fix that by adding a 64bit time_t

- It doesn't provide the exact CPU identification. This makes
  it awkward for mcelog to decode the event correctly, especially
  when there are variations in the supported MCE codes on different
  CPU models or when mcelog is running on a different host after a panic.
  Previously the administrator had to specify the correct CPU
  when mcelog ran on a different host, but with the more variation
  in machine checks now it's better to auto detect that.
  It's also useful for more detailed analysis of CPU events.
  Pass CPUID 1.EAX and the cpu vendor (as encoded in processor.h) instead.

- Socket ID and initial APIC ID are useful to report because they
  allow to identify the failing CPU in some (not all) cases.
  This is also especially useful for the panic situation.
  This addresses one of the complaints from Thomas Gleixner earlier.

- The MCG capabilities MSR needs to be reported for some advanced
  error processing in mcelog

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b4a04b6..ba1f889 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -36,13 +36,19 @@ struct mce {
 	__u64 mcgstatus;
 	__u64 ip;
 	__u64 tsc;	/* cpu time stamp counter */
-	__u64 res1;	/* for future extension */
-	__u64 res2;	/* dito. */
+	__u64 time;	/* wall time_t when error was detected */
+	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
+	__u8  pad1;
+	__u16 pad2;
+	__u32 cpuid;	/* CPUID 1 EAX */
 	__u8  cs;		/* code segment */
 	__u8  bank;	/* machine check bank */
 	__u8  cpu;	/* cpu number; obsolete; use extcpu now */
 	__u8  finished;   /* entry is valid */
 	__u32 extcpu;	/* linux cpu number that detected the error */
+	__u32 socketid;	/* CPU socket ID */
+	__u32 apicid;	/* CPU initial apic ID */
+	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
 };
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2c4dd6c..ba68449 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -96,6 +96,15 @@ void mce_setup(struct mce *m)
 	memset(m, 0, sizeof(struct mce));
 	m->cpu = m->extcpu = smp_processor_id();
 	rdtscll(m->tsc);
+	/* We hope get_seconds stays lockless */
+	m->time = get_seconds();
+	m->cpuvendor = boot_cpu_data.x86_vendor;
+	m->cpuid = cpuid_eax(1);
+#ifdef CONFIG_SMP
+	m->socketid = cpu_data(m->extcpu).phys_proc_id;
+#endif
+	m->apicid = cpu_data(m->extcpu).initial_apicid;
+	rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
 }
 
 DEFINE_PER_CPU(struct mce, injectm);
@@ -173,6 +182,9 @@ static void print_mce(struct mce *m)
 	if (m->misc)
 		printk("MISC %llx ", m->misc);
 	printk("\n");
+	printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+			m->cpuvendor, m->cpuid, m->time, m->socketid,
+			m->apicid);
 	printk(KERN_EMERG "This is not a software problem!\n");
 	printk(KERN_EMERG "Run through mcelog --ascii to decode "
 	       "and contact your hardware vendor\n");
-- 
cgit v0.10.2


From de8a84d85ad8bb46d01d72ebc57030b95075603c Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:53 +0200
Subject: x86, mce: log corrected errors when panicing

Normally the machine check handler ignores corrected errors and leaves
them to machine_check_poll(). But when panicing mcp won't run, so
log all errors.

Note: this can still miss some cases until the "early no way out"
patch later is applied too.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ba68449..86806e5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -412,9 +412,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 		/*
 		 * Non uncorrected errors are handled by machine_check_poll
-		 * Leave them alone.
+		 * Leave them alone, unless this panics.
 		 */
-		if ((m.status & MCI_STATUS_UC) == 0)
+		if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out)
 			continue;
 
 		/*
-- 
cgit v0.10.2


From a0189c70e5f17f4253dd7bc575c97469900e23d6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:54 +0200
Subject: x86, mce: remove TSC print heuristic

Previously mce_panic used a simple heuristic to avoid printing
old so far unreported machine check events on a mce panic. This worked
by comparing the TSC value at the start of the machine check handler
with the event time stamp and only printing newer ones.

This has a couple of issues, in particular on systems where the TSC
is not fully synchronized between CPUs it could lose events or print
old ones.

It is also problematic with full system synchronization as it is
added by the next patch.

Remove the TSC heuristic and instead replace it with a simple heuristic
to print corrected errors first and after that uncorrected errors
and finally the worst machine check as determined by the machine
check handler.

This simplifies the code because there is no need to pass the
original TSC value around.

Contains fixes from Ying Huang

[ Impact: bug fix, cleanup ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Ying Huang <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 86806e5..6773610 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -158,6 +158,7 @@ void mce_log(struct mce *mce)
 	mcelog.entry[entry].finished = 1;
 	wmb();
 
+	mce->finished = 1;
 	set_bit(0, &notify_user);
 }
 
@@ -190,23 +191,29 @@ static void print_mce(struct mce *m)
 	       "and contact your hardware vendor\n");
 }
 
-static void mce_panic(char *msg, struct mce *backup, u64 start)
+static void mce_panic(char *msg, struct mce *final)
 {
 	int i;
 
 	bust_spinlocks(1);
 	console_verbose();
+	/* First print corrected ones that are still unlogged */
 	for (i = 0; i < MCE_LOG_LEN; i++) {
-		u64 tsc = mcelog.entry[i].tsc;
-
-		if ((s64)(tsc - start) < 0)
+		struct mce *m = &mcelog.entry[i];
+		if ((m->status & MCI_STATUS_VAL) &&
+			!(m->status & MCI_STATUS_UC))
+			print_mce(m);
+	}
+	/* Now print uncorrected but with the final one last */
+	for (i = 0; i < MCE_LOG_LEN; i++) {
+		struct mce *m = &mcelog.entry[i];
+		if (!(m->status & MCI_STATUS_VAL))
 			continue;
-		print_mce(&mcelog.entry[i]);
-		if (backup && mcelog.entry[i].tsc == backup->tsc)
-			backup = NULL;
+		if (!final || memcmp(m, final, sizeof(struct mce)))
+			print_mce(m);
 	}
-	if (backup)
-		print_mce(backup);
+	if (final)
+		print_mce(final);
 	panic(msg);
 }
 
@@ -362,7 +369,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 {
 	struct mce m, panicm;
 	int panicm_found = 0;
-	u64 mcestart = 0;
 	int i;
 	/*
 	 * If no_way_out gets set, there is no safe way to recover from this
@@ -394,7 +400,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	if (!(m.mcgstatus & MCG_STATUS_RIPV))
 		no_way_out = 1;
 
-	rdtscll(mcestart);
 	barrier();
 
 	for (i = 0; i < banks; i++) {
@@ -478,7 +483,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * has not set tolerant to an insane level, give up and die.
 	 */
 	if (no_way_out && tolerant < 3)
-		mce_panic("Machine check", &panicm, mcestart);
+		mce_panic("Machine check", &panicm);
 
 	/*
 	 * If the error seems to be unrecoverable, something should be
@@ -506,8 +511,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		if (user_space) {
 			force_sig(SIGBUS, current);
 		} else if (panic_on_oops || tolerant < 2) {
-			mce_panic("Uncorrected machine check",
-				&panicm, mcestart);
+			mce_panic("Uncorrected machine check", &panicm);
 		}
 	}
 
-- 
cgit v0.10.2


From 817f32d02a52dd7f5941534e0699883691e918df Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:54 +0200
Subject: x86, mce: add table driven machine check grading

The machine check grading (as in deciding what should be done for a given
register value) has to be done multiple times soon and it's also getting
more complicated.
So it makes sense to consolidate it into a single function. To get smaller
and more straight forward and possibly more extensible code I opted towards
a new table driven method. The various rules are put into a table
when is then executed by a very simple interpreter.

The grading engine is in a new file mce-severity.c. I also added a private
include file mce-internal.h, because mce.h is already a bit too cluttered.

This is dead code right now, but will be used in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 60ee182..45004fa 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,5 +1,6 @@
 obj-y				=  mce.o therm_throt.o
 
+obj-$(CONFIG_X86_NEW_MCE)	+= mce-severity.o
 obj-$(CONFIG_X86_OLD_MCE)	+= k7.o p4.o p6.o
 obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
 obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
new file mode 100644
index 0000000..f126b4a
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -0,0 +1,10 @@
+#include <asm/mce.h>
+
+enum severity_level {
+	MCE_NO_SEVERITY,
+	MCE_SOME_SEVERITY,
+	MCE_UC_SEVERITY,
+	MCE_PANIC_SEVERITY,
+};
+
+int mce_severity(struct mce *a, int tolerant, char **msg);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
new file mode 100644
index 0000000..c189e89
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -0,0 +1,61 @@
+/*
+ * MCE grading rules.
+ * Copyright 2008, 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Author: Andi Kleen
+ */
+#include <linux/kernel.h>
+#include <asm/mce.h>
+
+#include "mce-internal.h"
+
+/*
+ * Grade an mce by severity. In general the most severe ones are processed
+ * first. Since there are quite a lot of combinations test the bits in a
+ * table-driven way. The rules are simply processed in order, first
+ * match wins.
+ */
+
+static struct severity {
+	u64 mask;
+	u64 result;
+	unsigned char sev;
+	unsigned char mcgmask;
+	unsigned char mcgres;
+	char *msg;
+} severities[] = {
+#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
+#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
+#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
+#define MCGMASK(x, res, s, m, r...) \
+	{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
+	BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
+	BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
+	BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
+	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"),
+	BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
+	BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
+	BITSET(0, SOME, "No match")	/* always matches. keep at end */
+};
+
+int mce_severity(struct mce *a, int tolerant, char **msg)
+{
+	struct severity *s;
+	for (s = severities;; s++) {
+		if ((a->status & s->mask) != s->result)
+			continue;
+		if ((a->mcgstatus & s->mcgmask) != s->mcgres)
+			continue;
+		if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) &&
+			tolerant < 1)
+			return MCE_PANIC_SEVERITY;
+		if (msg)
+			*msg = s->msg;
+		return s->sev;
+	}
+}
-- 
cgit v0.10.2


From bd19a5e6b73df276e1ccedf9059e9ee70c372d7d Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:55 +0200
Subject: x86, mce: check early in exception handler if panic is needed

The exception handler should behave differently if the exception is
fatal versus one that can be returned from.  In the first case it should
never clear any registers because these need to be preserved
for logging after the next boot. Otherwise it should clear them
on each CPU step by step so that other CPUs sharing the same bank don't
see duplicate events. Otherwise we risk reporting events multiple
times on any CPUs which have shared machine check banks, which
is a common problem on Intel Nehalem which has both SMT (two
CPU threads sharing banks) and shared machine check banks in the uncore.

Determine early in a special pass if any event requires a panic.
This uses the mce_severity() function added earlier.

This is needed for the next patch.

Also fixes a problem together with an earlier patch
that corrected events weren't logged on a fatal MCE.

[ Impact: Feature ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6773610..5031814 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,6 +36,7 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 
+#include "mce-internal.h"
 #include "mce.h"
 
 /* Handle unconfigured int18 (should never happen) */
@@ -191,7 +192,7 @@ static void print_mce(struct mce *m)
 	       "and contact your hardware vendor\n");
 }
 
-static void mce_panic(char *msg, struct mce *final)
+static void mce_panic(char *msg, struct mce *final, char *exp)
 {
 	int i;
 
@@ -214,6 +215,8 @@ static void mce_panic(char *msg, struct mce *final)
 	}
 	if (final)
 		print_mce(final);
+	if (exp)
+		printk(KERN_EMERG "Machine check: %s\n", exp);
 	panic(msg);
 }
 
@@ -358,6 +361,22 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 EXPORT_SYMBOL_GPL(machine_check_poll);
 
 /*
+ * Do a quick check if any of the events requires a panic.
+ * This decides if we keep the events around or clear them.
+ */
+static int mce_no_way_out(struct mce *m, char **msg)
+{
+	int i;
+
+	for (i = 0; i < banks; i++) {
+		m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+		if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
+			return 1;
+	}
+	return 0;
+}
+
+/*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
  *
@@ -381,6 +400,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 */
 	int kill_it = 0;
 	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+	char *msg = "Unknown";
 
 	atomic_inc(&mce_entry);
 
@@ -395,10 +415,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	mce_setup(&m);
 
 	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
-
-	/* if the restart IP is not valid, we're done for */
-	if (!(m.mcgstatus & MCG_STATUS_RIPV))
-		no_way_out = 1;
+	no_way_out = mce_no_way_out(&m, &msg);
 
 	barrier();
 
@@ -430,18 +447,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		__set_bit(i, toclear);
 
 		if (m.status & MCI_STATUS_EN) {
-			/* if PCC was set, there's no way out */
-			no_way_out |= !!(m.status & MCI_STATUS_PCC);
 			/*
 			 * If this error was uncorrectable and there was
 			 * an overflow, we're in trouble.  If no overflow,
 			 * we might get away with just killing a task.
 			 */
-			if (m.status & MCI_STATUS_UC) {
-				if (tolerant < 1 || m.status & MCI_STATUS_OVER)
-					no_way_out = 1;
+			if (m.status & MCI_STATUS_UC)
 				kill_it = 1;
-			}
 		} else {
 			/*
 			 * Machine check event was not enabled. Clear, but
@@ -483,7 +495,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * has not set tolerant to an insane level, give up and die.
 	 */
 	if (no_way_out && tolerant < 3)
-		mce_panic("Machine check", &panicm);
+		mce_panic("Machine check", &panicm, msg);
 
 	/*
 	 * If the error seems to be unrecoverable, something should be
@@ -511,7 +523,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		if (user_space) {
 			force_sig(SIGBUS, current);
 		} else if (panic_on_oops || tolerant < 2) {
-			mce_panic("Uncorrected machine check", &panicm);
+			mce_panic("Uncorrected machine check", &panicm, msg);
 		}
 	}
 
-- 
cgit v0.10.2


From ccc3c3192ae78dd56dcdf5353fd1a9ef5f9a3e2b Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:54 +0200
Subject: x86, mce: implement bootstrapping for machine check wakeups

Machine checks support waking up the mcelog daemon quickly.

The original wake up code for this was pretty ugly, relying on
a idle notifier and a special process flag. The reason it did
it this way is that the machine check handler is not subject
to normal interrupt locking rules so it's not safe
to call wake_up().  Instead it set a process flag
and then either did the wakeup in the syscall return
or in the idle notifier.

This patch adds a new "bootstraping" method as replacement.

The idea is that the handler checks if it's in a state where
it is unsafe to call wake_up(). If it's safe it calls it directly.
When it's not safe -- that is it interrupted in a critical
section with interrupts disables -- it uses a new "self IPI" to trigger
an IPI to its own CPU. This can be done safely because IPI
triggers are atomic with some care. The IPI is raised
once the interrupts are reenabled and can then safely call
wake_up().

When APICs are disabled the event is just queued and will be picked up
eventually by the next polling timer. I think that's a reasonable
compromise, since it should only happen quite rarely.

Contains fixes from Ying Huang.

[ solve conflict on irqinit, make it work on 32bit (entry_arch.h) - HS ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index b2eb9c0..4cdcf5a 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -60,4 +60,8 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
 #endif
 
+#ifdef CONFIG_X86_NEW_MCE
+BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
+#endif
+
 #endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index a7d14bb..4e59197 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -32,6 +32,7 @@ extern void error_interrupt(void);
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
+extern void mce_self_interrupt(void);
 
 extern void invalidate_interrupt(void);
 extern void invalidate_interrupt0(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 8c46b85..68f7cf8 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -118,6 +118,11 @@
 #define GENERIC_INTERRUPT_VECTOR	0xed
 
 /*
+ * Self IPI vector for machine checks
+ */
+#define MCE_SELF_VECTOR			0xeb
+
+/*
  * First APIC vector available to drivers: (vectors 0x30-0xee) we
  * start at 0x31(0x41) to spread out vectors evenly between priority
  * levels. (0x80 is the syscall vector)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5031814..1217816 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -10,6 +10,7 @@
 #include <linux/thread_info.h>
 #include <linux/capability.h>
 #include <linux/miscdevice.h>
+#include <linux/interrupt.h>
 #include <linux/ratelimit.h>
 #include <linux/kallsyms.h>
 #include <linux/rcupdate.h>
@@ -32,7 +33,10 @@
 #include <linux/fs.h>
 
 #include <asm/processor.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
 #include <asm/idle.h>
+#include <asm/ipi.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 
@@ -287,6 +291,54 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 	}
 }
 
+#ifdef CONFIG_X86_LOCAL_APIC 
+/*
+ * Called after interrupts have been reenabled again
+ * when a MCE happened during an interrupts off region
+ * in the kernel.
+ */
+asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
+{
+	ack_APIC_irq();
+	exit_idle();
+	irq_enter();
+	mce_notify_user();
+	irq_exit();
+}
+#endif
+
+static void mce_report_event(struct pt_regs *regs)
+{
+	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
+		mce_notify_user();
+		return;
+	}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * Without APIC do not notify. The event will be picked
+	 * up eventually.
+	 */
+	if (!cpu_has_apic)
+		return;
+
+	/*
+	 * When interrupts are disabled we cannot use
+	 * kernel services safely. Trigger an self interrupt
+	 * through the APIC to instead do the notification
+	 * after interrupts are reenabled again.
+	 */
+	apic->send_IPI_self(MCE_SELF_VECTOR);
+
+	/*
+	 * Wait for idle afterwards again so that we don't leave the
+	 * APIC in a non idle state because the normal APIC writes
+	 * cannot exclude us.
+	 */
+	apic_wait_icr_idle();
+#endif
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -530,6 +582,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	/* notify userspace ASAP */
 	set_thread_flag(TIF_MCE_NOTIFY);
 
+	mce_report_event(regs);
+
 	/* the last thing we do is clear state */
 	for (i = 0; i < banks; i++) {
 		if (test_bit(i, toclear))
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a31a7f2..711c130 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1011,6 +1011,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt
 
+#ifdef CONFIG_X86_MCE
+apicinterrupt MCE_SELF_VECTOR \
+	mce_self_interrupt smp_mce_self_interrupt
+#endif
+
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
 	call_function_single_interrupt smp_call_function_single_interrupt
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index aab3d27..441f6ec 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -187,6 +187,9 @@ static void __init apic_intr_init(void)
 #ifdef CONFIG_X86_THRESHOLD
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
+#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
+	alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
+#endif
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/* self generated IPI for local APIC timer */
-- 
cgit v0.10.2


From f94b61c2c9fdcc90773c49df9ccf9ede3ad0d7db Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:55 +0200
Subject: x86, mce: implement panic synchronization

In some circumstances multiple CPUs can enter mce_panic() in parallel.
This gives quite confused output because they will all dump the same
machine check buffer.

The other problem is that they would all panic in parallel, but not
process each other's shutdown IPIs because interrupts are disabled.

Detect this situation early on in mce_panic(). On the first CPU
entering will do the panic, the others will just wait to be killed.

For paranoia reasons in case the other CPU dies during the MCE I added
a 5 seconds timeout. If it expires each CPU will panic on its own again.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1217816..421020f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -196,10 +196,32 @@ static void print_mce(struct mce *m)
 	       "and contact your hardware vendor\n");
 }
 
+#define PANIC_TIMEOUT 5 /* 5 seconds */
+
+static atomic_t mce_paniced;
+
+/* Panic in progress. Enable interrupts and wait for final IPI */
+static void wait_for_panic(void)
+{
+	long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
+	preempt_disable();
+	local_irq_enable();
+	while (timeout-- > 0)
+		udelay(1);
+	panic("Panicing machine check CPU died");
+}
+
 static void mce_panic(char *msg, struct mce *final, char *exp)
 {
 	int i;
 
+	/*
+	 * Make sure only one CPU runs in machine check panic
+	 */
+	if (atomic_add_return(1, &mce_paniced) > 1)
+		wait_for_panic();
+	barrier();
+
 	bust_spinlocks(1);
 	console_verbose();
 	/* First print corrected ones that are still unlogged */
-- 
cgit v0.10.2


From 3c0797925f4ef9d55a32059d2af61a9c262e639d Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:55 +0200
Subject: x86, mce: switch x86 machine check handler to Monarch election.

On Intel platforms machine check exceptions are always broadcast to
all CPUs.  This patch makes the machine check handler synchronize all
these machine checks, elect a Monarch to handle the event and collect
the worst event from all CPUs and then process it first.

This has some advantages:

- When there is a truly data corrupting error the system panics as
  quickly as possible. This improves containment of corrupted
  data and makes sure the corrupted data never hits stable storage.

- The panics are synchronized and do not reenter the panic code
  on multiple CPUs (which currently does not handle this well).

- All the errors are reported. Currently it often happens that
  another CPU happens to do the panic first, but reports useless
  information (empty machine check) because the real error
  happened on another CPU which came in later.
  This is a big advantage on Nehalem where the 8 threads per CPU
  lead to often the wrong CPU winning the race and dumping
  useless information on a machine check.  The problem also occurs
  in a less severe form on older CPUs.

- The system can detect when no CPUs detected a machine check
  and shut down the system.  This can happen when one CPU is so
  badly hung that that it cannot process a machine check anymore
  or when some external agent wants to stop the system by
  asserting the machine check pin.  This follows Intel hardware
  recommendations.

- This matches the recommended error model by the CPU designers.

- The events can be output in true severity order

- When a panic happens on another CPU it makes sure to be actually
  be able to process the stop IPI by enabling interrupts.

The code is extremly careful to handle timeouts while waiting
for other CPUs. It can't rely on the normal timing mechanisms
(jiffies, ktime_get) because of its asynchronous/lockless nature,
so it uses own timeouts using ndelay() and a "SPINUNIT"

The timeout is configurable. By default it waits for upto one
second for the other CPUs.  This can be also disabled.

From some informal testing AMD systems do not see to broadcast
machine checks, so right now it's always disabled by default on
non Intel CPUs or also on very old Intel systems.

Includes fixes from Ying Huang
Fixed a "ecception" in a comment (H.Seto)
Moved global_nwo reset later based on suggestion from H.Seto
v2: Avoid duplicate messages

[ Impact: feature, fixes long standing problems. ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 63fca71..0ee5e3b 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -15,13 +15,17 @@ Machine check
                in a reboot. On Intel systems it is enabled by default.
    mce=nobootlog
 		Disable boot machine check logging.
-   mce=tolerancelevel (number)
+   mce=tolerancelevel[,monarchtimeout] (number,number)
+		tolerance levels:
 		0: always panic on uncorrected errors, log corrected errors
 		1: panic or SIGBUS on uncorrected errors, log corrected errors
 		2: SIGBUS or log uncorrected errors, log corrected errors
 		3: never panic or SIGBUS, log all errors (for testing only)
 		Default is 1
 		Can be also set using sysfs which is preferable.
+		monarchtimeout:
+		Sets the time in us to wait for other CPUs on machine checks. 0
+		to disable.
 
    nomce (for compatibility with i386): same as mce=off
 
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index a4fdb25..b1fb302 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -69,6 +69,10 @@ trigger
 	Program to run when a machine check event is detected.
 	This is an alternative to running mcelog regularly from cron
 	and allows to detect events faster.
+monarch_timeout
+	How long to wait for the other CPUs to machine check too on a
+	exception. 0 to disable waiting for other CPUs.
+	Unit: us
 
 TBD document entries for AMD threshold interrupt configuration
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 421020f..ba43189 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -21,6 +21,7 @@
 #include <linux/percpu.h>
 #include <linux/string.h>
 #include <linux/sysdev.h>
+#include <linux/delay.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
 #include <linux/sysfs.h>
@@ -28,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/poll.h>
+#include <linux/nmi.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/fs.h>
@@ -60,6 +62,8 @@ int				mce_disabled;
 
 #define MISC_MCELOG_MINOR	227
 
+#define SPINUNIT 100	/* 100ns */
+
 atomic_t mce_entry;
 
 DEFINE_PER_CPU(unsigned, mce_exception_count);
@@ -77,6 +81,7 @@ static u64			*bank;
 static unsigned long		notify_user;
 static int			rip_msr;
 static int			mce_bootlog = -1;
+static int			monarch_timeout = -1;
 
 static char			trigger[128];
 static char			*trigger_argv[2] = { trigger, NULL };
@@ -84,6 +89,9 @@ static char			*trigger_argv[2] = { trigger, NULL };
 static unsigned long		dont_init_banks;
 
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+static DEFINE_PER_CPU(struct mce, mces_seen);
+static int			cpu_missing;
+
 
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -241,6 +249,8 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 	}
 	if (final)
 		print_mce(final);
+	if (cpu_missing)
+		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
 	if (exp)
 		printk(KERN_EMERG "Machine check: %s\n", exp);
 	panic(msg);
@@ -451,18 +461,287 @@ static int mce_no_way_out(struct mce *m, char **msg)
 }
 
 /*
+ * Variable to establish order between CPUs while scanning.
+ * Each CPU spins initially until executing is equal its number.
+ */
+static atomic_t mce_executing;
+
+/*
+ * Defines order of CPUs on entry. First CPU becomes Monarch.
+ */
+static atomic_t mce_callin;
+
+/*
+ * Check if a timeout waiting for other CPUs happened.
+ */
+static int mce_timed_out(u64 *t)
+{
+	/*
+	 * The others already did panic for some reason.
+	 * Bail out like in a timeout.
+	 * rmb() to tell the compiler that system_state
+	 * might have been modified by someone else.
+	 */
+	rmb();
+	if (atomic_read(&mce_paniced))
+		wait_for_panic();
+	if (!monarch_timeout)
+		goto out;
+	if ((s64)*t < SPINUNIT) {
+		/* CHECKME: Make panic default for 1 too? */
+		if (tolerant < 1)
+			mce_panic("Timeout synchronizing machine check over CPUs",
+				  NULL, NULL);
+		cpu_missing = 1;
+		return 1;
+	}
+	*t -= SPINUNIT;
+out:
+	touch_nmi_watchdog();
+	return 0;
+}
+
+/*
+ * The Monarch's reign.  The Monarch is the CPU who entered
+ * the machine check handler first. It waits for the others to
+ * raise the exception too and then grades them. When any
+ * error is fatal panic. Only then let the others continue.
+ *
+ * The other CPUs entering the MCE handler will be controlled by the
+ * Monarch. They are called Subjects.
+ *
+ * This way we prevent any potential data corruption in a unrecoverable case
+ * and also makes sure always all CPU's errors are examined.
+ *
+ * Also this detects the case of an machine check event coming from outer
+ * space (not detected by any CPUs) In this case some external agent wants
+ * us to shut down, so panic too.
+ *
+ * The other CPUs might still decide to panic if the handler happens
+ * in a unrecoverable place, but in this case the system is in a semi-stable
+ * state and won't corrupt anything by itself. It's ok to let the others
+ * continue for a bit first.
+ *
+ * All the spin loops have timeouts; when a timeout happens a CPU
+ * typically elects itself to be Monarch.
+ */
+static void mce_reign(void)
+{
+	int cpu;
+	struct mce *m = NULL;
+	int global_worst = 0;
+	char *msg = NULL;
+	char *nmsg = NULL;
+
+	/*
+	 * This CPU is the Monarch and the other CPUs have run
+	 * through their handlers.
+	 * Grade the severity of the errors of all the CPUs.
+	 */
+	for_each_possible_cpu(cpu) {
+		int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
+					    &nmsg);
+		if (severity > global_worst) {
+			msg = nmsg;
+			global_worst = severity;
+			m = &per_cpu(mces_seen, cpu);
+		}
+	}
+
+	/*
+	 * Cannot recover? Panic here then.
+	 * This dumps all the mces in the log buffer and stops the
+	 * other CPUs.
+	 */
+	if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
+		mce_panic("Fatal machine check", m, msg);
+
+	/*
+	 * For UC somewhere we let the CPU who detects it handle it.
+	 * Also must let continue the others, otherwise the handling
+	 * CPU could deadlock on a lock.
+	 */
+
+	/*
+	 * No machine check event found. Must be some external
+	 * source or one CPU is hung. Panic.
+	 */
+	if (!m && tolerant < 3)
+		mce_panic("Machine check from unknown source", NULL, NULL);
+
+	/*
+	 * Now clear all the mces_seen so that they don't reappear on
+	 * the next mce.
+	 */
+	for_each_possible_cpu(cpu)
+		memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
+}
+
+static atomic_t global_nwo;
+
+/*
+ * Start of Monarch synchronization. This waits until all CPUs have
+ * entered the exception handler and then determines if any of them
+ * saw a fatal event that requires panic. Then it executes them
+ * in the entry order.
+ * TBD double check parallel CPU hotunplug
+ */
+static int mce_start(int no_way_out, int *order)
+{
+	int nwo;
+	int cpus = num_online_cpus();
+	u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+
+	if (!timeout) {
+		*order = -1;
+		return no_way_out;
+	}
+
+	atomic_add(no_way_out, &global_nwo);
+
+	/*
+	 * Wait for everyone.
+	 */
+	while (atomic_read(&mce_callin) != cpus) {
+		if (mce_timed_out(&timeout)) {
+			atomic_set(&global_nwo, 0);
+			*order = -1;
+			return no_way_out;
+		}
+		ndelay(SPINUNIT);
+	}
+
+	/*
+	 * Cache the global no_way_out state.
+	 */
+	nwo = atomic_read(&global_nwo);
+
+	/*
+	 * Monarch starts executing now, the others wait.
+	 */
+	if (*order == 1) {
+		atomic_set(&mce_executing, 1);
+		return nwo;
+	}
+
+	/*
+	 * Now start the scanning loop one by one
+	 * in the original callin order.
+	 * This way when there are any shared banks it will
+	 * be only seen by one CPU before cleared, avoiding duplicates.
+	 */
+	while (atomic_read(&mce_executing) < *order) {
+		if (mce_timed_out(&timeout)) {
+			atomic_set(&global_nwo, 0);
+			*order = -1;
+			return no_way_out;
+		}
+		ndelay(SPINUNIT);
+	}
+	return nwo;
+}
+
+/*
+ * Synchronize between CPUs after main scanning loop.
+ * This invokes the bulk of the Monarch processing.
+ */
+static int mce_end(int order)
+{
+	int ret = -1;
+	u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+
+	if (!timeout)
+		goto reset;
+	if (order < 0)
+		goto reset;
+
+	/*
+	 * Allow others to run.
+	 */
+	atomic_inc(&mce_executing);
+
+	if (order == 1) {
+		/* CHECKME: Can this race with a parallel hotplug? */
+		int cpus = num_online_cpus();
+
+		/*
+		 * Monarch: Wait for everyone to go through their scanning
+		 * loops.
+		 */
+		while (atomic_read(&mce_executing) <= cpus) {
+			if (mce_timed_out(&timeout))
+				goto reset;
+			ndelay(SPINUNIT);
+		}
+
+		mce_reign();
+		barrier();
+		ret = 0;
+	} else {
+		/*
+		 * Subject: Wait for Monarch to finish.
+		 */
+		while (atomic_read(&mce_executing) != 0) {
+			if (mce_timed_out(&timeout))
+				goto reset;
+			ndelay(SPINUNIT);
+		}
+
+		/*
+		 * Don't reset anything. That's done by the Monarch.
+		 */
+		return 0;
+	}
+
+	/*
+	 * Reset all global state.
+	 */
+reset:
+	atomic_set(&global_nwo, 0);
+	atomic_set(&mce_callin, 0);
+	barrier();
+
+	/*
+	 * Let others run again.
+	 */
+	atomic_set(&mce_executing, 0);
+	return ret;
+}
+
+static void mce_clear_state(unsigned long *toclear)
+{
+	int i;
+
+	for (i = 0; i < banks; i++) {
+		if (test_bit(i, toclear))
+			mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+	}
+}
+
+/*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
  *
  * This is executed in NMI context not subject to normal locking rules. This
  * implies that most kernel services cannot be safely used. Don't even
  * think about putting a printk in there!
+ *
+ * On Intel systems this is entered on all CPUs in parallel through
+ * MCE broadcast. However some CPUs might be broken beyond repair,
+ * so be always careful when synchronizing with others.
  */
 void do_machine_check(struct pt_regs *regs, long error_code)
 {
-	struct mce m, panicm;
-	int panicm_found = 0;
+	struct mce m, *final;
 	int i;
+	int worst = 0;
+	int severity;
+	/*
+	 * Establish sequential order between the CPUs entering the machine
+	 * check handler.
+	 */
+	int order;
+
 	/*
 	 * If no_way_out gets set, there is no safe way to recover from this
 	 * MCE.  If tolerant is cranked up, we'll try anyway.
@@ -486,13 +765,23 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	if (!banks)
 		goto out;
 
+	order = atomic_add_return(1, &mce_callin);
 	mce_setup(&m);
 
 	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 	no_way_out = mce_no_way_out(&m, &msg);
 
+	final = &__get_cpu_var(mces_seen);
+	*final = m;
+
 	barrier();
 
+	/*
+	 * Go through all the banks in exclusion of the other CPUs.
+	 * This way we don't report duplicated events on shared banks
+	 * because the first one to see it will clear it.
+	 */
+	no_way_out = mce_start(no_way_out, &order);
 	for (i = 0; i < banks; i++) {
 		__clear_bit(i, toclear);
 		if (!bank[i])
@@ -544,32 +833,32 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		mce_get_rip(&m, regs);
 		mce_log(&m);
 
-		/*
-		 * Did this bank cause the exception?
-		 *
-		 * Assume that the bank with uncorrectable errors did it,
-		 * and that there is only a single one:
-		 */
-		if ((m.status & MCI_STATUS_UC) &&
-					(m.status & MCI_STATUS_EN)) {
-			panicm = m;
-			panicm_found = 1;
+		severity = mce_severity(&m, tolerant, NULL);
+		if (severity > worst) {
+			*final = m;
+			worst = severity;
 		}
 	}
 
+	if (!no_way_out)
+		mce_clear_state(toclear);
+
 	/*
-	 * If we didn't find an uncorrectable error, pick
-	 * the last one (shouldn't happen, just being safe).
+	 * Do most of the synchronization with other CPUs.
+	 * When there's any problem use only local no_way_out state.
 	 */
-	if (!panicm_found)
-		panicm = m;
+	if (mce_end(order) < 0)
+		no_way_out = worst >= MCE_PANIC_SEVERITY;
 
 	/*
 	 * If we have decided that we just CAN'T continue, and the user
 	 * has not set tolerant to an insane level, give up and die.
+	 *
+	 * This is mainly used in the case when the system doesn't
+	 * support MCE broadcasting or it has been disabled.
 	 */
 	if (no_way_out && tolerant < 3)
-		mce_panic("Machine check", &panicm, msg);
+		mce_panic("Machine check", final, msg);
 
 	/*
 	 * If the error seems to be unrecoverable, something should be
@@ -585,7 +874,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		 * instruction which caused the MCE.
 		 */
 		if (m.mcgstatus & MCG_STATUS_EIPV)
-			user_space = panicm.ip && (panicm.cs & 3);
+			user_space = final->ip && (final->cs & 3);
 
 		/*
 		 * If we know that the error was in user space, send a
@@ -597,20 +886,15 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		if (user_space) {
 			force_sig(SIGBUS, current);
 		} else if (panic_on_oops || tolerant < 2) {
-			mce_panic("Uncorrected machine check", &panicm, msg);
+			mce_panic("Uncorrected machine check", final, msg);
 		}
 	}
 
 	/* notify userspace ASAP */
 	set_thread_flag(TIF_MCE_NOTIFY);
 
-	mce_report_event(regs);
-
-	/* the last thing we do is clear state */
-	for (i = 0; i < banks; i++) {
-		if (test_bit(i, toclear))
-			mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
-	}
+	if (worst > 0)
+		mce_report_event(regs);
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 out:
 	atomic_dec(&mce_entry);
@@ -821,7 +1105,17 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 
 		if (c->x86 == 6 && c->x86_model < 0x1A)
 			__set_bit(0, &dont_init_banks);
+
+		/*
+		 * All newer Intel systems support MCE broadcasting. Enable
+		 * synchronization with a one second timeout.
+		 */
+		if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
+			monarch_timeout < 0)
+			monarch_timeout = USEC_PER_SEC;
 	}
+	if (monarch_timeout < 0)
+		monarch_timeout = 0;
 }
 
 static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
@@ -1068,7 +1362,9 @@ static struct miscdevice mce_log_device = {
 
 /*
  * mce=off disables machine check
- * mce=TOLERANCELEVEL (number, see above)
+ * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
+ *	monarchtimeout is how long to wait for other CPUs on machine
+ *	check, or 0 to not wait
  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
  * mce=nobootlog Don't log MCEs from before booting.
  */
@@ -1082,9 +1378,13 @@ static int __init mcheck_enable(char *str)
 		mce_disabled = 1;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		mce_bootlog = (str[0] == 'b');
-	else if (isdigit(str[0]))
+	else if (isdigit(str[0])) {
 		get_option(&str, &tolerant);
-	else {
+		if (*str == ',') {
+			++str;
+			get_option(&str, &monarch_timeout);
+		}
+	} else {
 		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
 		       str);
 		return 0;
@@ -1221,6 +1521,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
 
 static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
+static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
 
 static struct sysdev_ext_attribute attr_check_interval = {
 	_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
@@ -1230,6 +1531,7 @@ static struct sysdev_ext_attribute attr_check_interval = {
 
 static struct sysdev_attribute *mce_attrs[] = {
 	&attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
+	&attr_monarch_timeout.attr,
 	NULL
 };
 
-- 
cgit v0.10.2


From ac9603754dc7e286e62ae4f1067958d5b0075f99 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:58 +0200
Subject: x86, mce: make non Monarch panic message "Fatal machine check" too

... instead of "Machine check". This is for consistency with the Monarch
panic message.

Based on a report from Ying Huang.

v2: But add a descriptive postfix so that the test suite can distingush.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ba43189..d5cb0b4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -554,7 +554,7 @@ static void mce_reign(void)
 	 * other CPUs.
 	 */
 	if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
-		mce_panic("Fatal machine check", m, msg);
+		mce_panic("Fatal Machine check", m, msg);
 
 	/*
 	 * For UC somewhere we let the CPU who detects it handle it.
@@ -858,7 +858,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * support MCE broadcasting or it has been disabled.
 	 */
 	if (no_way_out && tolerant < 3)
-		mce_panic("Machine check", final, msg);
+		mce_panic("Fatal machine check on current CPU", final, msg);
 
 	/*
 	 * If the error seems to be unrecoverable, something should be
-- 
cgit v0.10.2


From 1b2797dcc9f0ad89bc382ace26c6baafbc7e33c2 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 27 May 2009 21:56:51 +0200
Subject: x86, mce: improve mce_get_rip

Assume IP on the stack is valid when either EIPV or RIPV are set.
This influences whether the machine check exception handler decides
to return or panic.

This fixes a test case in the mce-test suite and is more compliant
to the specification.

This currently only makes a difference in a artificial testing
scenario with the mce-test test suite.

Also in addition do not force the EIPV to be valid with the exact
register MSRs, and keep in trust the CS value on stack even if MSR
is available.

[AK: combination of patches from Huang Ying and Hidetoshi Seto, with
new description by me]
[add some description, no code changed - HS]

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d5cb0b4..a7dc369 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -306,21 +306,22 @@ int mce_available(struct cpuinfo_x86 *c)
 	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
 }
 
+/*
+ * Get the address of the instruction at the time of the machine check
+ * error.
+ */
 static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 {
-	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
+
+	if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
 		m->ip = regs->ip;
 		m->cs = regs->cs;
 	} else {
 		m->ip = 0;
 		m->cs = 0;
 	}
-	if (rip_msr) {
-		/* Assume the RIP in the MSR is exact. Is this true? */
-		m->mcgstatus |= MCG_STATUS_EIPV;
+	if (rip_msr)
 		m->ip = mce_rdmsrl(rip_msr);
-		m->cs = 0;
-	}
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC 
-- 
cgit v0.10.2


From 29b0f591d678838435fbb3e15ef20266f1a9e01d Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:56 +0200
Subject: x86, mce: default to panic timeout for machine checks

Fatal machine checks can be logged to disk after boot, but only if
the system did a warm reboot. That's unfortunately difficult with the
default panic behaviour, which waits forever and the admin has to
press the power button because modern systems usually miss a reset button.
This clears the machine checks in the registers and make
it impossible to log them.

This patch changes the default for machine check panic to always
reboot after 30s. Then the mce can be successfully logged after
reboot.

I believe this will improve machine check experience for any
system running the X server.

This is dependent on successfull boot logging of MCEs. This currently
only works on Intel systems, on AMD there are quite a lot of systems
around which leave junk in the machine check registers after boot,
so it's disabled here. These systems will continue to default
to endless waiting panic.

v2: Only force panic timeout when it's shorter (H.Seto)
v3: Only force timeout when there is no timeout
(based on comment H.Seto)

[ Fix changelog - HS ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7dc369..79d2431 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -82,6 +82,7 @@ static unsigned long		notify_user;
 static int			rip_msr;
 static int			mce_bootlog = -1;
 static int			monarch_timeout = -1;
+static int			mce_panic_timeout;
 
 static char			trigger[128];
 static char			*trigger_argv[2] = { trigger, NULL };
@@ -216,6 +217,8 @@ static void wait_for_panic(void)
 	local_irq_enable();
 	while (timeout-- > 0)
 		udelay(1);
+	if (panic_timeout == 0)
+		panic_timeout = mce_panic_timeout;
 	panic("Panicing machine check CPU died");
 }
 
@@ -253,6 +256,8 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
 	if (exp)
 		printk(KERN_EMERG "Machine check: %s\n", exp);
+	if (panic_timeout == 0)
+		panic_timeout = mce_panic_timeout;
 	panic(msg);
 }
 
@@ -1117,6 +1122,8 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
 	}
 	if (monarch_timeout < 0)
 		monarch_timeout = 0;
+	if (mce_bootlog != 0)
+		mce_panic_timeout = 30;
 }
 
 static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
-- 
cgit v0.10.2


From 86503560e48153aba539ff117450d31ab2ef76d7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:58 +0200
Subject: x86, mce: print header/footer only once for multiple MCEs

When multiple MCEs are printed print the "HARDWARE ERROR" header
and "This is not a software error" footer only once. This
makes the output much more compact with many CPUs.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d2431..ff9c732 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -176,11 +176,13 @@ void mce_log(struct mce *mce)
 	set_bit(0, &notify_user);
 }
 
-static void print_mce(struct mce *m)
+static void print_mce(struct mce *m, int *first)
 {
-	printk(KERN_EMERG "\n"
-	       KERN_EMERG "HARDWARE ERROR\n"
-	       KERN_EMERG
+	if (*first) {
+		printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n");
+		*first = 0;
+	}
+	printk(KERN_EMERG
 	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
 	       m->extcpu, m->mcgstatus, m->bank, m->status);
 	if (m->ip) {
@@ -200,9 +202,12 @@ static void print_mce(struct mce *m)
 	printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
 			m->cpuvendor, m->cpuid, m->time, m->socketid,
 			m->apicid);
-	printk(KERN_EMERG "This is not a software problem!\n");
-	printk(KERN_EMERG "Run through mcelog --ascii to decode "
-	       "and contact your hardware vendor\n");
+}
+
+static void print_mce_tail(void)
+{
+	printk(KERN_EMERG "This is not a software problem!\n"
+	       KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n");
 }
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -225,6 +230,7 @@ static void wait_for_panic(void)
 static void mce_panic(char *msg, struct mce *final, char *exp)
 {
 	int i;
+	int first = 1;
 
 	/*
 	 * Make sure only one CPU runs in machine check panic
@@ -240,7 +246,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 		struct mce *m = &mcelog.entry[i];
 		if ((m->status & MCI_STATUS_VAL) &&
 			!(m->status & MCI_STATUS_UC))
-			print_mce(m);
+			print_mce(m, &first);
 	}
 	/* Now print uncorrected but with the final one last */
 	for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -248,12 +254,13 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 		if (!(m->status & MCI_STATUS_VAL))
 			continue;
 		if (!final || memcmp(m, final, sizeof(struct mce)))
-			print_mce(m);
+			print_mce(m, &first);
 	}
 	if (final)
-		print_mce(final);
+		print_mce(final, &first);
 	if (cpu_missing)
 		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
+	print_mce_tail();
 	if (exp)
 		printk(KERN_EMERG "Machine check: %s\n", exp);
 	if (panic_timeout == 0)
-- 
cgit v0.10.2


From ed7290d0ee8f81aa78bfe816f01b012f208cafc5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:57 +0200
Subject: x86, mce: implement new status bits

The x86 architecture recently added some new machine check status bits:
S(ignalled) and AR (Action-Required). Signalled allows to check
if a specific event caused an exception or was just logged through CMCI.
AR allows the kernel to decide if an event needs immediate action
or can be delayed or ignored.

Implement support for these new status bits. mce_severity() uses
the new bits to grade the machine check correctly and decide what
to do. The exception handler uses AR to decide to kill or not.
The S bit is used to separate events between the poll/CMCI handler
and the exception handler.

Classical UC always leads to panic. That was true before anyways
because the existing CPUs always passed a PCC with it.

Also corrects the rules whether to kill in user or kernel context
and how to handle missing RIPV.

The machine check handler largely uses the mce-severity grading
engine now instead of making its own decisions. This means the logic
is centralized in one place.  This is useful because it has to be
evaluated multiple times.

v2: Some rule fixes; Add AO events
Fix RIPV, RIPV|EIPV order (Ying Huang)
Fix UCNA with AR=1 message (Ying Huang)
Add comment about panicing in m_c_p.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ba1f889..afd3cdf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -15,6 +15,7 @@
 #define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
 #define MCG_EXT_CNT_SHIFT	16
 #define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P	 	(1ULL<<24)   /* MCA recovery/new status bits */
 
 #define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
 #define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
@@ -27,6 +28,15 @@
 #define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
 #define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
 #define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
+#define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
+#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF  0	/* segment offset */
+#define MCM_ADDR_LINEAR  1	/* linear address */
+#define MCM_ADDR_PHYS	 2	/* physical address */
+#define MCM_ADDR_MEM	 3	/* memory address */
+#define MCM_ADDR_GENERIC 7	/* generic */
 
 /* Fields are zero when not available */
 struct mce {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index f126b4a..54dcb8f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -2,9 +2,14 @@
 
 enum severity_level {
 	MCE_NO_SEVERITY,
+	MCE_KEEP_SEVERITY,
 	MCE_SOME_SEVERITY,
+	MCE_AO_SEVERITY,
 	MCE_UC_SEVERITY,
+	MCE_AR_SEVERITY,
 	MCE_PANIC_SEVERITY,
 };
 
 int mce_severity(struct mce *a, int tolerant, char **msg);
+
+extern int mce_ser;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c189e89..4f4d2ca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -19,43 +19,117 @@
  * first. Since there are quite a lot of combinations test the bits in a
  * table-driven way. The rules are simply processed in order, first
  * match wins.
+ *
+ * Note this is only used for machine check exceptions, the corrected
+ * errors use much simpler rules. The exceptions still check for the corrected
+ * errors, but only to leave them alone for the CMCI handler (except for
+ * panic situations)
  */
 
+enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+
 static struct severity {
 	u64 mask;
 	u64 result;
 	unsigned char sev;
 	unsigned char mcgmask;
 	unsigned char mcgres;
+	unsigned char ser;
+	unsigned char context;
 	char *msg;
 } severities[] = {
+#define KERNEL .context = IN_KERNEL
+#define USER .context = IN_USER
+#define SER .ser = SER_REQUIRED
+#define NOSER .ser = NO_SER
 #define SEV(s) .sev = MCE_ ## s ## _SEVERITY
 #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
 #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
 #define MCGMASK(x, res, s, m, r...) \
 	{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
+#define MASK(x, y, s, m, r...) \
+	{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
+#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
+#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCACOD 0xffff
+
 	BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
 	BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
 	BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
-	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"),
+	/* When MCIP is not set something is very confused */
+	MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
+	/* Neither return not error IP -- no chance to recover -> PANIC */
+	MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
+		"Neither restart nor error IP"),
+	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
+		KERNEL),
+	BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
+	MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
+	     "Spurious not enabled", SER),
+
+	/* ignore OVER for UCNA */
+	MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
+	     "Uncorrected no action required", SER),
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
+	     "Illegal combination (UCNA with AR=1)", SER),
+	MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
+
+	/* AR add known MCACODs here */
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
+	     "Action required with lost events", SER),
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
+	     "Action required; unknown MCACOD", SER),
+
+	/* known AO MCACODs: */
+	MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
+	     "Action optional: memory scrubbing error", SER),
+	MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
+	     "Action optional: last level cache writeback error", SER),
+
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
+	     "Action optional unknown MCACOD", SER),
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
+	     "Action optional with lost events", SER),
 	BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
 	BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
 	BITSET(0, SOME, "No match")	/* always matches. keep at end */
 };
 
+/*
+ * If the EIPV bit is set, it means the saved IP is the
+ * instruction which caused the MCE.
+ */
+static int error_context(struct mce *m)
+{
+	if (m->mcgstatus & MCG_STATUS_EIPV)
+		return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+	/* Unknown, assume kernel */
+	return IN_KERNEL;
+}
+
 int mce_severity(struct mce *a, int tolerant, char **msg)
 {
+	enum context ctx = error_context(a);
 	struct severity *s;
+
 	for (s = severities;; s++) {
 		if ((a->status & s->mask) != s->result)
 			continue;
 		if ((a->mcgstatus & s->mcgmask) != s->mcgres)
 			continue;
-		if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) &&
-			tolerant < 1)
-			return MCE_PANIC_SEVERITY;
+		if (s->ser == SER_REQUIRED && !mce_ser)
+			continue;
+		if (s->ser == NO_SER && mce_ser)
+			continue;
+		if (s->context && ctx != s->context)
+			continue;
 		if (msg)
 			*msg = s->msg;
+		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
+			if (panic_on_oops || tolerant < 1)
+				return MCE_PANIC_SEVERITY;
+		}
 		return s->sev;
 	}
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff9c732..f051a78 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int			rip_msr;
 static int			mce_bootlog = -1;
 static int			monarch_timeout = -1;
 static int			mce_panic_timeout;
+int				mce_ser;
 
 static char			trigger[128];
 static char			*trigger_argv[2] = { trigger, NULL };
@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
  * Those are just logged through /dev/mcelog.
  *
  * This is executed in standard interrupt context.
+ *
+ * Note: spec recommends to panic for fatal unsignalled
+ * errors here. However this would be quite problematic --
+ * we would need to reimplement the Monarch handling and
+ * it would mess up the exclusion between exception handler
+ * and poll hander -- * so we skip this for now.
+ * These cases should not happen anyways, or only when the CPU
+ * is already totally * confused. In this case it's likely it will
+ * not fully execute the machine check handler either.
  */
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 			continue;
 
 		/*
-		 * Uncorrected events are handled by the exception handler
-		 * when it is enabled. But when the exception is disabled log
-		 * everything.
+		 * Uncorrected or signalled events are handled by the exception
+		 * handler when it is enabled, so don't process those here.
 		 *
 		 * TBD do the same check for MCI_STATUS_EN here?
 		 */
-		if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
+		if (!(flags & MCP_UC) &&
+		    (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
 			continue;
 
 		if (m.status & MCI_STATUS_MISCV)
@@ -790,6 +800,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	barrier();
 
 	/*
+	 * When no restart IP must always kill or panic.
+	 */
+	if (!(m.mcgstatus & MCG_STATUS_RIPV))
+		kill_it = 1;
+
+	/*
 	 * Go through all the banks in exclusion of the other CPUs.
 	 * This way we don't report duplicated events on shared banks
 	 * because the first one to see it will clear it.
@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			continue;
 
 		/*
-		 * Non uncorrected errors are handled by machine_check_poll
-		 * Leave them alone, unless this panics.
+		 * Non uncorrected or non signaled errors are handled by
+		 * machine_check_poll. Leave them alone, unless this panics.
 		 */
-		if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out)
+		if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+			!no_way_out)
 			continue;
 
 		/*
@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		 */
 		add_taint(TAINT_MACHINE_CHECK);
 
-		__set_bit(i, toclear);
+		severity = mce_severity(&m, tolerant, NULL);
 
-		if (m.status & MCI_STATUS_EN) {
-			/*
-			 * If this error was uncorrectable and there was
-			 * an overflow, we're in trouble.  If no overflow,
-			 * we might get away with just killing a task.
-			 */
-			if (m.status & MCI_STATUS_UC)
-				kill_it = 1;
-		} else {
+		/*
+		 * When machine check was for corrected handler don't touch,
+		 * unless we're panicing.
+		 */
+		if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+			continue;
+		__set_bit(i, toclear);
+		if (severity == MCE_NO_SEVERITY) {
 			/*
 			 * Machine check event was not enabled. Clear, but
 			 * ignore.
@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			continue;
 		}
 
+		/*
+		 * Kill on action required.
+		 */
+		if (severity == MCE_AR_SEVERITY)
+			kill_it = 1;
+
 		if (m.status & MCI_STATUS_MISCV)
 			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
 		if (m.status & MCI_STATUS_ADDRV)
@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		mce_get_rip(&m, regs);
 		mce_log(&m);
 
-		severity = mce_severity(&m, tolerant, NULL);
 		if (severity > worst) {
 			*final = m;
 			worst = severity;
@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * one task, do that.  If the user has set the tolerance very
 	 * high, don't try to do anything at all.
 	 */
-	if (kill_it && tolerant < 3) {
-		int user_space = 0;
-
-		/*
-		 * If the EIPV bit is set, it means the saved IP is the
-		 * instruction which caused the MCE.
-		 */
-		if (m.mcgstatus & MCG_STATUS_EIPV)
-			user_space = final->ip && (final->cs & 3);
 
-		/*
-		 * If we know that the error was in user space, send a
-		 * SIGBUS.  Otherwise, panic if tolerance is low.
-		 *
-		 * force_sig() takes an awful lot of locks and has a slight
-		 * risk of deadlocking.
-		 */
-		if (user_space) {
-			force_sig(SIGBUS, current);
-		} else if (panic_on_oops || tolerant < 2) {
-			mce_panic("Uncorrected machine check", final, msg);
-		}
-	}
+	if (kill_it && tolerant < 3)
+		force_sig(SIGBUS, current);
 
 	/* notify userspace ASAP */
 	set_thread_flag(TIF_MCE_NOTIFY);
@@ -1049,6 +1050,9 @@ static int mce_cap_init(void)
 	if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
 		rip_msr = MSR_IA32_MCG_EIP;
 
+	if (cap & MCG_SER_P)
+		mce_ser = 1;
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 4611a6fa4b37cf6b8b6066ed0d605c994c62a1a0 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 27 May 2009 21:56:57 +0200
Subject: x86, mce: export MCE severities coverage via debugfs

The MCE severity judgement code is data-driven, so code coverage tools
such as gcov can not be used for measuring coverage. Instead a dedicated
coverage mechanism is implemented.  The kernel keeps track of rules
executed and reports them in debugfs.

This is useful for increasing coverage of the mce-test testsuite.

Right now it's unconditionally enabled because it's very little code.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 4f4d2ca..ff0807f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -10,6 +10,9 @@
  * Author: Andi Kleen
  */
 #include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
 #include <asm/mce.h>
 
 #include "mce-internal.h"
@@ -37,6 +40,7 @@ static struct severity {
 	unsigned char mcgres;
 	unsigned char ser;
 	unsigned char context;
+	unsigned char covered;
 	char *msg;
 } severities[] = {
 #define KERNEL .context = IN_KERNEL
@@ -126,6 +130,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
 			continue;
 		if (msg)
 			*msg = s->msg;
+		s->covered = 1;
 		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
 			if (panic_on_oops || tolerant < 1)
 				return MCE_PANIC_SEVERITY;
@@ -133,3 +138,81 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
 		return s->sev;
 	}
 }
+
+static void *s_start(struct seq_file *f, loff_t *pos)
+{
+	if (*pos >= ARRAY_SIZE(severities))
+		return NULL;
+	return &severities[*pos];
+}
+
+static void *s_next(struct seq_file *f, void *data, loff_t *pos)
+{
+	if (++(*pos) >= ARRAY_SIZE(severities))
+		return NULL;
+	return &severities[*pos];
+}
+
+static void s_stop(struct seq_file *f, void *data)
+{
+}
+
+static int s_show(struct seq_file *f, void *data)
+{
+	struct severity *ser = data;
+	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
+	return 0;
+}
+
+static const struct seq_operations severities_seq_ops = {
+	.start	= s_start,
+	.next	= s_next,
+	.stop	= s_stop,
+	.show	= s_show,
+};
+
+static int severities_coverage_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &severities_seq_ops);
+}
+
+static ssize_t severities_coverage_write(struct file *file,
+					 const char __user *ubuf,
+					 size_t count, loff_t *ppos)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(severities); i++)
+		severities[i].covered = 0;
+	return count;
+}
+
+static const struct file_operations severities_coverage_fops = {
+	.open		= severities_coverage_open,
+	.release	= seq_release,
+	.read		= seq_read,
+	.write		= severities_coverage_write,
+};
+
+static int __init severities_debugfs_init(void)
+{
+	struct dentry *dmce = NULL, *fseverities_coverage = NULL;
+
+	dmce = debugfs_create_dir("mce", NULL);
+	if (dmce == NULL)
+		goto err_out;
+	fseverities_coverage = debugfs_create_file("severities-coverage",
+						   0444, dmce, NULL,
+						   &severities_coverage_fops);
+	if (fseverities_coverage == NULL)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	if (fseverities_coverage)
+		debugfs_remove(fseverities_coverage);
+	if (dmce)
+		debugfs_remove(dmce);
+	return -ENOMEM;
+}
+late_initcall(severities_debugfs_init);
-- 
cgit v0.10.2


From 4ef702c10b5df18ab04921fc252c26421d4d6c75 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:52 +0200
Subject: x86: fix panic with interrupts off (needed for MCE)

For some time each panic() called with interrupts disabled
triggered the !irqs_disabled() WARN_ON in smp_call_function(),
producing ugly backtraces and confusing users.

This is a common situation with machine checks for example which
tend to call panic with interrupts disabled, but will also hit
in other situations e.g. panic during early boot.  In fact it
means that panic cannot be called in many circumstances, which
would be bad.

This all started with the new fancy queued smp_call_function,
which is then used by the shutdown path to shut down the other
CPUs.

On closer examination it turned out that the fancy RCU
smp_call_function() does lots of things not suitable in a panic
situation anyways, like allocating memory and relying on complex
system state.

I originally tried to patch this over by checking for panic
there, but it was quite complicated and the original patch
was also not very popular.  This also didn't fix some of the
underlying complexity problems.

The new code in post 2.6.29 tries to patch around this by
checking for oops_in_progress, but that is not enough to make
this fully safe and I don't think that's a real solution
because panic has to be reliable.

So instead use an own vector to reboot.  This makes the reboot
code extremly straight forward, which is definitely a big plus
in a panic situation where it is important to avoid relying on
too much kernel state.  The new simple code is also safe to be
called from interupts off region because it is very very simple.

There can be situations where it is important that panic
is reliable.  For example on a fatal machine check the panic
is needed to get the system up again and running as quickly
as possible.  So it's important that panic is reliable and
all function it calls simple.

This is why I came up with this simple vector scheme.
It's very hard to beat in simplicity.  Vectors are not
particularly precious anymore since all big systems are
using per CPU vectors.

Another possibility would have been to use an NMI similar
to kdump, but there is still the problem that NMIs don't
work reliably on some systems due to BIOS issues.  NMIs
would have been able to stop CPUs running with interrupts
off too.  In the sake of universal reliability I opted for
using a non NMI vector for now.

I put the reboot vector into the highest priority bucket of
the APIC vectors and moved the 64bit UV_BAU message down
instead into the next lower priority.

[ Impact: bug fix, fixes an old regression ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 4cdcf5a..69f8868 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
 
 BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
 		 smp_invalidate_interrupt)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4e59197..1c8f28a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,7 @@ extern void invalidate_interrupt6(void);
 extern void invalidate_interrupt7(void);
 
 extern void irq_move_cleanup_interrupt(void);
+extern void reboot_interrupt(void);
 extern void threshold_interrupt(void);
 
 extern void call_function_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 68f7cf8..28477e4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -88,12 +88,7 @@
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
 #define THRESHOLD_APIC_VECTOR		0xf9
-
-#ifdef CONFIG_X86_32
-/* 0xf8 : free */
-#else
-# define UV_BAU_MESSAGE			0xf8
-#endif
+#define REBOOT_VECTOR			0xf8
 
 /* f0-f7 used for spreading out TLB flushes: */
 #define INVALIDATE_TLB_VECTOR_END	0xf7
@@ -117,6 +112,8 @@
  */
 #define GENERIC_INTERRUPT_VECTOR	0xed
 
+#define UV_BAU_MESSAGE			0xec
+
 /*
  * Self IPI vector for machine checks
  */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 711c130..4234b12 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -976,6 +976,8 @@ END(\sym)
 #ifdef CONFIG_SMP
 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt REBOOT_VECTOR \
+	reboot_interrupt smp_reboot_interrupt
 #endif
 
 #ifdef CONFIG_X86_UV
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 441f6ec..4a69ec5 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -173,6 +173,9 @@ static void __init smp_intr_init(void)
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+
+	/* IPI used for rebooting/stopping */
+	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
 #endif
 #endif /* CONFIG_SMP */
 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index f6db48c..bf1831a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask)
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
+asmlinkage void smp_reboot_interrupt(void)
+{
+	ack_APIC_irq();
+	irq_enter();
+	stop_this_cpu(NULL);
+	irq_exit();
+}
+
 static void native_smp_send_stop(void)
 {
 	unsigned long flags;
+	unsigned long wait;
 
 	if (reboot_force)
 		return;
 
-	smp_call_function(stop_this_cpu, NULL, 0);
+	/*
+	 * Use an own vector here because smp_call_function
+	 * does lots of things not suitable in a panic situation.
+	 * On most systems we could also use an NMI here,
+	 * but there are a few systems around where NMI
+	 * is problematic so stay with an non NMI for now
+	 * (this implies we cannot stop CPUs spinning with irq off
+	 * currently)
+	 */
+	if (num_online_cpus() > 1) {
+		apic->send_IPI_allbutself(REBOOT_VECTOR);
+
+		/* Don't wait longer than a second */
+		wait = USEC_PER_SEC;
+		while (num_online_cpus() > 1 && wait--)
+			udelay(1);
+	}
+
 	local_irq_save(flags);
 	disable_local_APIC();
 	local_irq_restore(flags);
-- 
cgit v0.10.2


From 9ff36ee9668ff41ec3274597c730524645929b0f Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:58 +0200
Subject: x86, mce: rename mce_notify_user to mce_notify_irq

Rename the mce_notify_user function to mce_notify_irq. The next
patch will split the wakeup handling of interrupt context
and of process context and it's better to give it a clearer
name for this.

Contains a fix from Ying Huang

[ Impact: cleanup ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index afd3cdf..713926b 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -159,7 +159,7 @@ enum mcp_flags {
 };
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
-int mce_notify_user(void);
+int mce_notify_irq(void);
 
 DECLARE_PER_CPU(struct mce, injectm);
 extern struct file_operations mce_chrdev_ops;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7d858fb..a3a235a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -65,7 +65,7 @@ static void raise_mce(unsigned long data)
 		memset(&b, 0xff, sizeof(mce_banks_t));
 		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
 		machine_check_poll(0, &b);
-		mce_notify_user();
+		mce_notify_irq();
 		printk(KERN_INFO "Finished machine check poll on CPU %d\n",
 		       cpu);
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f051a78..13e1b7f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -348,7 +348,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();
 	exit_idle();
 	irq_enter();
-	mce_notify_user();
+	mce_notify_irq();
 	irq_exit();
 }
 #endif
@@ -356,7 +356,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
 static void mce_report_event(struct pt_regs *regs)
 {
 	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
-		mce_notify_user();
+		mce_notify_irq();
 		return;
 	}
 
@@ -968,7 +968,7 @@ static void mcheck_timer(unsigned long data)
 	 * polling interval, otherwise increase the polling interval.
 	 */
 	n = &__get_cpu_var(next_interval);
-	if (mce_notify_user())
+	if (mce_notify_irq())
 		*n = max(*n/2, HZ/100);
 	else
 		*n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
@@ -989,7 +989,7 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  * Can be called from interrupt context, but not from machine check/NMI
  * context.
  */
-int mce_notify_user(void)
+int mce_notify_irq(void)
 {
 	/* Not more than two messages every minute */
 	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
@@ -1014,7 +1014,7 @@ int mce_notify_user(void)
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mce_notify_user);
+EXPORT_SYMBOL_GPL(mce_notify_irq);
 
 /*
  * Initialize Machine Checks for a CPU.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index eff3740..b7c5a24 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -80,7 +80,7 @@ static int cmci_supported(int *banks)
 static void intel_threshold_interrupt(void)
 {
 	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
-	mce_notify_user();
+	mce_notify_irq();
 }
 
 static void print_update(char *type, int *hdr, int num)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d0851e3..d5dc15b 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -860,7 +860,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 #ifdef CONFIG_X86_NEW_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_user();
+		mce_notify_irq();
 #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
 
 	/* deal with pending signal delivery */
-- 
cgit v0.10.2


From 8fa8dd9e3aafb7b440b7d54219891615abc6390e Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:58 +0200
Subject: x86, mce: define MCE_VECTOR

Add MCE_VECTOR for the #MC exception.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 28477e4..1b35c43 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -25,6 +25,7 @@
  */
 
 #define NMI_VECTOR			0x02
+#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start
-- 
cgit v0.10.2


From 9b1beaf2b551a8a1604f104025b24e9c535c8963 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 27 May 2009 21:56:59 +0200
Subject: x86, mce: support action-optional machine checks

Newer Intel CPUs support a new class of machine checks called recoverable
action optional.

Action Optional means that the CPU detected some form of corruption in
the background and tells the OS about using a machine check
exception. The OS can then take appropiate action, like killing the
process with the corrupted data or logging the event properly to disk.

This is done by the new generic high level memory failure handler added
in a earlier patch. The high level handler takes the address with the
failed memory and does the appropiate action, like killing the process.

In this version of the patch the high level handler is stubbed out
with a weak function to not create a direct dependency on the hwpoison
branch.

The high level handler cannot be directly called from the machine check
exception though, because it has to run in a defined process context to
be able to sleep when taking VM locks (it is not expected to sleep for a
long time, just do so in some exceptional cases like lock contention)

Thus the MCE handler has to queue a work item for process context,
trigger process context and then call the high level handler from there.

This patch adds two path to process context: through a per thread kernel
exit notify_user() callback or through a high priority work item.
The first runs when the process exits back to user space, the other when
it goes to sleep and there is no higher priority process.

The machine check handler will schedule both, and whoever runs first
will grab the event. This is done because quick reaction to this
event is critical to avoid a potential more fatal machine check
when the corruption is consumed.

There is a simple lock less ring buffer to queue the corrupted
addresses between the exception handler and the process context handler.
Then in process context it just calls the high level VM code with
the corrupted PFNs.

The code adds the required code to extract the failed address from
the CPU's machine check registers. It doesn't try to handle all
possible cases -- the specification has 6 different ways to specify
memory address -- but only the linear address.

Most of the required checking has been already done earlier in the
mce_severity rule checking engine.  Following the Intel
recommendations Action Optional errors are only enabled for known
situations (encoded in MCACODs). The errors are ignored otherwise,
because they are action optional.

v2: Improve comment, disable preemption while processing ring buffer
    (reported by Ying Huang)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 713926b..82978ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -160,6 +160,7 @@ enum mcp_flags {
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 int mce_notify_irq(void);
+void mce_notify_process(void);
 
 DECLARE_PER_CPU(struct mce, injectm);
 extern struct file_operations mce_chrdev_ops;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 13e1b7f..d4e7b59 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -33,6 +33,7 @@
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 
 #include <asm/processor.h>
 #include <asm/hw_irq.h>
@@ -105,6 +106,8 @@ static inline int skip_bank_init(int i)
 	return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
 }
 
+static DEFINE_PER_CPU(struct work_struct, mce_work);
+
 /* Do initial initialization of a struct mce */
 void mce_setup(struct mce *m)
 {
@@ -312,6 +315,61 @@ static void mce_wrmsrl(u32 msr, u64 v)
 	wrmsrl(msr, v);
 }
 
+/*
+ * Simple lockless ring to communicate PFNs from the exception handler with the
+ * process context work function. This is vastly simplified because there's
+ * only a single reader and a single writer.
+ */
+#define MCE_RING_SIZE 16	/* we use one entry less */
+
+struct mce_ring {
+	unsigned short start;
+	unsigned short end;
+	unsigned long ring[MCE_RING_SIZE];
+};
+static DEFINE_PER_CPU(struct mce_ring, mce_ring);
+
+/* Runs with CPU affinity in workqueue */
+static int mce_ring_empty(void)
+{
+	struct mce_ring *r = &__get_cpu_var(mce_ring);
+
+	return r->start == r->end;
+}
+
+static int mce_ring_get(unsigned long *pfn)
+{
+	struct mce_ring *r;
+	int ret = 0;
+
+	*pfn = 0;
+	get_cpu();
+	r = &__get_cpu_var(mce_ring);
+	if (r->start == r->end)
+		goto out;
+	*pfn = r->ring[r->start];
+	r->start = (r->start + 1) % MCE_RING_SIZE;
+	ret = 1;
+out:
+	put_cpu();
+	return ret;
+}
+
+/* Always runs in MCE context with preempt off */
+static int mce_ring_add(unsigned long pfn)
+{
+	struct mce_ring *r = &__get_cpu_var(mce_ring);
+	unsigned next;
+
+	next = (r->end + 1) % MCE_RING_SIZE;
+	if (next == r->start)
+		return -1;
+	r->ring[r->end] = pfn;
+	wmb();
+	r->end = next;
+	return 0;
+}
+
 int mce_available(struct cpuinfo_x86 *c)
 {
 	if (mce_disabled)
@@ -319,6 +377,15 @@ int mce_available(struct cpuinfo_x86 *c)
 	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
 }
 
+static void mce_schedule_work(void)
+{
+	if (!mce_ring_empty()) {
+		struct work_struct *work = &__get_cpu_var(mce_work);
+		if (!work_pending(work))
+			schedule_work(work);
+	}
+}
+
 /*
  * Get the address of the instruction at the time of the machine check
  * error.
@@ -349,6 +416,7 @@ asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
 	exit_idle();
 	irq_enter();
 	mce_notify_irq();
+	mce_schedule_work();
 	irq_exit();
 }
 #endif
@@ -357,6 +425,13 @@ static void mce_report_event(struct pt_regs *regs)
 {
 	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
 		mce_notify_irq();
+		/*
+		 * Triggering the work queue here is just an insurance
+		 * policy in case the syscall exit notify handler
+		 * doesn't run soon enough or ends up running on the
+		 * wrong CPU (can happen when audit sleeps)
+		 */
+		mce_schedule_work();
 		return;
 	}
 
@@ -731,6 +806,23 @@ reset:
 	return ret;
 }
 
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses upto page granuality for now.
+ */
+static int mce_usable_address(struct mce *m)
+{
+	if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
+		return 0;
+	if ((m->misc & 0x3f) > PAGE_SHIFT)
+		return 0;
+	if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
+		return 0;
+	return 1;
+}
+
 static void mce_clear_state(unsigned long *toclear)
 {
 	int i;
@@ -865,6 +957,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		if (m.status & MCI_STATUS_ADDRV)
 			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
 
+		/*
+		 * Action optional error. Queue address for later processing.
+		 * When the ring overflows we just ignore the AO error.
+		 * RED-PEN add some logging mechanism when
+		 * usable_address or mce_add_ring fails.
+		 * RED-PEN don't ignore overflow for tolerant == 0
+		 */
+		if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
+			mce_ring_add(m.addr >> PAGE_SHIFT);
+
 		mce_get_rip(&m, regs);
 		mce_log(&m);
 
@@ -916,6 +1018,36 @@ out:
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
 
+/* dummy to break dependency. actual code is in mm/memory-failure.c */
+void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+{
+	printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
+}
+
+/*
+ * Called after mce notification in process context. This code
+ * is allowed to sleep. Call the high level VM handler to process
+ * any corrupted pages.
+ * Assume that the work queue code only calls this one at a time
+ * per CPU.
+ * Note we don't disable preemption, so this code might run on the wrong
+ * CPU. In this case the event is picked up by the scheduled work queue.
+ * This is merely a fast path to expedite processing in some common
+ * cases.
+ */
+void mce_notify_process(void)
+{
+	unsigned long pfn;
+	mce_notify_irq();
+	while (mce_ring_get(&pfn))
+		memory_failure(pfn, MCE_VECTOR);
+}
+
+static void mce_process_work(struct work_struct *dummy)
+{
+	mce_notify_process();
+}
+
 #ifdef CONFIG_X86_MCE_INTEL
 /***
  * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
@@ -1204,6 +1336,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 	mce_init();
 	mce_cpu_features(c);
 	mce_init_timer();
+	INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
 }
 
 /*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d5dc15b..4976888 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -860,7 +860,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 #ifdef CONFIG_X86_NEW_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_irq();
+		mce_notify_process();
 #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
 
 	/* deal with pending signal delivery */
-- 
cgit v0.10.2


From 8051dbd2dfd1427cc102888d7d96bf39de0be150 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Tue, 2 Jun 2009 16:53:23 +0900
Subject: x86, mce: fix for mce counters

Make the MCE counters work on 32bit and add poll count in
arch_irq_stat_cpu.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index eff46b5..9773395 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -95,7 +95,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "  Threshold APIC interrupts\n");
 # endif
 #endif
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_NEW_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -172,9 +172,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 {
 	u64 sum = irq_stats(cpu)->__nmi_count;
 
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
-	sum += per_cpu(mce_exception_count, cpu);
-#endif
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
@@ -192,6 +189,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_threshold_count;
 # endif
 #endif
+#ifdef CONFIG_X86_NEW_MCE
+	sum += per_cpu(mce_exception_count, cpu);
+	sum += per_cpu(mce_poll_count, cpu);
+#endif
 	return sum;
 }
 
-- 
cgit v0.10.2


From bfcd3555af478dbf04c87adc9bb1a739d0a6ccff Mon Sep 17 00:00:00 2001
From: Alberto Bertogli <albertito@blitiri.com.ar>
Date: Tue, 9 Jun 2009 00:06:20 -0400
Subject: jbd2: Fix minor typos in comments in fs/jbd2/journal.c

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5814410..62be7d2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
  * Journal abort has very specific semantics, which we describe
  * for journal abort.
  *
- * Two internal function, which provide abort to te jbd layer
+ * Two internal functions, which provide abort to the jbd layer
  * itself are here.
  */
 
@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
  * int jbd2_journal_errno () - returns the journal's error state.
  * @journal: journal to examine.
  *
- * This is the errno numbet set with jbd2_journal_abort(), the last
+ * This is the errno number set with jbd2_journal_abort(), the last
  * time the journal was mounted - if the journal was stopped
  * without calling abort this will be 0.
  *
@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal)
  * int jbd2_journal_clear_err () - clears the journal's error state
  * @journal: journal to act on.
  *
- * An error must be cleared or Acked to take a FS out of readonly
+ * An error must be cleared or acked to take a FS out of readonly
  * mode.
  */
 int jbd2_journal_clear_err(journal_t *journal)
@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal)
  * void jbd2_journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
- * An error must be cleared or Acked to take a FS out of readonly
+ * An error must be cleared or acked to take a FS out of readonly
  * mode.
  */
 void jbd2_journal_ack_err(journal_t *journal)
-- 
cgit v0.10.2


From 0b8e58a140cae2ba1c4a21ccae7c6c3c939c51f9 Mon Sep 17 00:00:00 2001
From: Andreas Dilger <adilger@sun.com>
Date: Wed, 3 Jun 2009 17:59:28 -0400
Subject: ext4: super.c whitespace cleanup

Cleanup of whitespace and formatting.  Initially driven by confusing indents
for the ext4_{block,inode}_bitmap() et. al. helper routines, but figured I'd
cleanup some other 80-column wrapping and other indenting problems at the
same time.

Signed-off-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 91b98b5..0a97b1a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -79,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 {
 	return le32_to_cpu(bg->bg_block_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -87,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 {
 	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -95,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 {
 	return le32_to_cpu(bg->bg_inode_table_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
 __u32 ext4_free_blks_count(struct super_block *sb,
@@ -103,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb,
 {
 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 }
 
 __u32 ext4_free_inodes_count(struct super_block *sb,
@@ -111,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb,
 {
 	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 }
 
 __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -119,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb,
 {
 	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 }
 
 __u32 ext4_itable_unused_count(struct super_block *sb,
@@ -127,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb,
 {
 	return le16_to_cpu(bg->bg_itable_unused_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 }
 
 void ext4_block_bitmap_set(struct super_block *sb,
@@ -207,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 	journal = EXT4_SB(sb)->s_journal;
 	if (journal) {
 		if (is_journal_aborted(journal)) {
-			ext4_abort(sb, __func__,
-				   "Detected aborted journal");
+			ext4_abort(sb, __func__, "Detected aborted journal");
 			return ERR_PTR(-EROFS);
 		}
 		return jbd2_journal_start(journal, nblocks);
@@ -436,7 +435,7 @@ void ext4_warning(struct super_block *sb, const char *function,
 }
 
 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
-				const char *function, const char *fmt, ...)
+			   const char *function, const char *fmt, ...)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -472,7 +471,6 @@ __acquires(bitlock)
 	return;
 }
 
-
 void ext4_update_dynamic_rev(struct super_block *sb)
 {
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -638,7 +636,6 @@ static void ext4_put_super(struct super_block *sb)
 	lock_kernel();
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-	return;
 }
 
 static struct kmem_cache *ext4_inode_cachep;
@@ -653,6 +650,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
+
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	ei->i_acl = EXT4_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -673,6 +671,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_allocated_meta_blocks = 0;
 	ei->i_delalloc_reserved_flag = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
+
 	return &ei->vfs_inode;
 }
 
@@ -879,12 +878,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_puts(seq, ",noauto_da_alloc");
 
 	ext4_show_quota_options(seq, sb);
+
 	return 0;
 }
 
-
 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
-		u64 ino, u32 generation)
+					u64 ino, u32 generation)
 {
 	struct inode *inode;
 
@@ -913,14 +912,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 }
 
 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
+					int fh_len, int fh_type)
 {
 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 				    ext4_nfs_get_inode);
 }
 
 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
-		int fh_len, int fh_type)
+					int fh_len, int fh_type)
 {
 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 				    ext4_nfs_get_inode);
@@ -932,7 +931,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
  * which would prevent try_to_free_buffers() from freeing them, we must use
  * jbd2 layer's try_to_free_buffers() function to release them.
  */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
+static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
+				 gfp_t wait)
 {
 	journal_t *journal = EXT4_SB(sb)->s_journal;
 
@@ -1133,8 +1133,9 @@ static ext4_fsblk_t get_sb_block(void **data)
 
 	if (!options || strncmp(options, "sb=", 3) != 0)
 		return 1;	/* Default location */
+
 	options += 3;
-	/*todo: use simple_strtoll with >32bit ext4 */
+	/* TODO: use simple_strtoll with >32bit ext4 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
 		printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1144,6 +1145,7 @@ static ext4_fsblk_t get_sb_block(void **data)
 	if (*options == ',')
 		options++;
 	*data = (void *) options;
+
 	return sb_block;
 }
 
@@ -1626,7 +1628,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 		printk(KERN_WARNING
 		       "EXT4-fs warning: checktime reached, "
 		       "running e2fsck is recommended\n");
-	if (!sbi->s_journal) 
+	if (!sbi->s_journal)
 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1810,7 +1812,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 	}
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
-	sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
 
@@ -1926,6 +1928,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 #endif
 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
 }
+
 /*
  * Maximal extent format file size.
  * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1976,19 +1979,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 	loff_t res = EXT4_NDIR_BLOCKS;
 	int meta_blocks;
 	loff_t upper_limit;
-	/* This is calculated to be the largest file size for a
-	 * dense, bitmapped file such that the total number of
-	 * sectors in the file, including data and all indirect blocks,
-	 * does not exceed 2^48 -1
-	 * __u32 i_blocks_lo and _u16 i_blocks_high representing the
-	 * total number of  512 bytes blocks of the file
+	/* This is calculated to be the largest file size for a dense, block
+	 * mapped file such that the file's total number of 512-byte sectors,
+	 * including data and all indirect blocks, does not exceed (2^48 - 1).
+	 *
+	 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
+	 * number of 512-byte sectors of the file.
 	 */
 
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * !has_huge_files or CONFIG_LBD is not enabled
-		 * implies the inode i_block represent total blocks in
-		 * 512 bytes 32 == size of vfs inode i_blocks * 8
+		 * !has_huge_files or CONFIG_LBD not enabled implies that
+		 * the inode i_block field represents total file blocks in
+		 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
 		 */
 		upper_limit = (1LL << 32) - 1;
 
@@ -2030,7 +2033,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 }
 
 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
-				ext4_fsblk_t logical_sb_block, int nr)
+				   ext4_fsblk_t logical_sb_block, int nr)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t bg, first_meta_bg;
@@ -2044,6 +2047,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
 	bg = sbi->s_desc_per_block * nr;
 	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;
+
 	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
@@ -2148,7 +2152,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 }
 
 static ssize_t sbi_ui_show(struct ext4_attr *a,
-				struct ext4_sb_info *sbi, char *buf)
+			   struct ext4_sb_info *sbi, char *buf)
 {
 	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
 
@@ -2253,7 +2257,6 @@ static struct kobj_type ext4_ktype = {
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				__releases(kernel_lock)
 				__acquires(kernel_lock)
-
 {
 	struct buffer_head *bh;
 	struct ext4_super_block *es = NULL;
@@ -2379,7 +2382,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 */
 	set_opt(sbi->s_mount_opt, DELALLOC);
 
-
 	if (!parse_options((char *) data, sb, &journal_devnum,
 			   &journal_ioprio, NULL, 0))
 		goto failed_mount;
@@ -2442,7 +2444,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (sb->s_blocksize != blocksize) {
-
 		/* Validate the filesystem blocksize */
 		if (!sb_set_blocksize(sb, blocksize)) {
 			printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
@@ -2489,6 +2490,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
 			sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
 	}
+
 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -2501,10 +2503,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	} else
 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
+
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
 	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
 		goto cantfind_ext4;
+
 	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
 	if (sbi->s_inodes_per_block == 0)
 		goto cantfind_ext4;
@@ -2515,6 +2519,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+
 	for (i = 0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2566,12 +2571,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-        /*
-         * It makes no sense for the first data block to be beyond the end
-         * of the filesystem.
-         */
-        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-                printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
+	/*
+	 * It makes no sense for the first data block to be beyond the end
+	 * of the filesystem.
+	 */
+	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+		printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
 		       "block %u is beyond end of filesystem (%llu)\n",
 		       le32_to_cpu(es->s_first_data_block),
 		       ext4_blocks_count(es));
@@ -3082,6 +3087,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	EXT4_SB(sb)->journal_bdev = bdev;
 	ext4_init_journal_params(sb, journal);
 	return journal;
+
 out_journal:
 	jbd2_journal_destroy(journal);
 out_bdev:
@@ -3116,7 +3122,6 @@ static int ext4_load_journal(struct super_block *sb,
 	 * crash?  For recovery, we need to check in advance whether we
 	 * can get read-write access to the device.
 	 */
-
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
 			printk(KERN_INFO "EXT4-fs: INFO: recovery "
@@ -3234,7 +3239,6 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	return error;
 }
 
-
 /*
  * Have we just finished recovery?  If so, and if we are mounting (or
  * remounting) the filesystem readonly, then we will end up with a
@@ -3485,8 +3489,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 
 			/*
 			 * Make sure the group descriptor checksums
-			 * are sane.  If they aren't, refuse to
-			 * remount r/w.
+			 * are sane.  If they aren't, refuse to remount r/w.
 			 */
 			for (g = 0; g < sbi->s_groups_count; g++) {
 				struct ext4_group_desc *gdp =
@@ -3545,6 +3548,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			kfree(old_opts.s_qf_names[i]);
 #endif
 	return 0;
+
 restore_opts:
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3628,11 +3632,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
 	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
 	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+
 	return 0;
 }
 
-/* Helper function for writing quotas on sync - we need to start transaction before quota file
- * is locked for write. Otherwise the are possible deadlocks:
+/* Helper function for writing quotas on sync - we need to start transaction
+ * before quota file is locked for write. Otherwise the are possible deadlocks:
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
  *   jbd2_journal_start()                  write_dquot()
@@ -3656,7 +3661,7 @@ static int ext4_write_dquot(struct dquot *dquot)
 
 	inode = dquot_to_inode(dquot);
 	handle = ext4_journal_start(inode,
-					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
@@ -3672,7 +3677,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
-					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
@@ -3688,7 +3693,7 @@ static int ext4_release_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext4_journal_start(dquot_to_inode(dquot),
-					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+				    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle)) {
 		/* Release dquot anyway to avoid endless cycle in dqput() */
 		dquot_release(dquot);
@@ -3736,7 +3741,7 @@ static int ext4_write_info(struct super_block *sb, int type)
 static int ext4_quota_on_mount(struct super_block *sb, int type)
 {
 	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
-			EXT4_SB(sb)->s_jquota_fmt, type);
+				  EXT4_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
@@ -3907,10 +3912,10 @@ out:
 
 #endif
 
-static int ext4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
 static struct file_system_type ext4_fs_type = {
@@ -3922,14 +3927,14 @@ static struct file_system_type ext4_fs_type = {
 };
 
 #ifdef CONFIG_EXT4DEV_COMPAT
-static int ext4dev_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
+			  const char *dev_name, void *data,struct vfsmount *mnt)
 {
 	printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
 	       "to mount using ext4\n");
 	printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
 	       "will go away by 2.6.31\n");
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
 static struct file_system_type ext4dev_fs_type = {
-- 
cgit v0.10.2


From 13be1bf1467e3186a4a9d1b1276cc4bd31e472ea Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 4 Jun 2009 01:53:21 +0200
Subject: ALSA: burgundy: timeout message is off by one.

Timeout message is off by one.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/burgundy.c b/sound/ppc/burgundy.c
index 45a7629..d2c0a4e 100644
--- a/sound/ppc/burgundy.c
+++ b/sound/ppc/burgundy.c
@@ -46,12 +46,12 @@ snd_pmac_burgundy_extend_wait(struct snd_pmac *chip)
 	timeout = 50;
 	while (!(in_le32(&chip->awacs->codec_stat) & MASK_EXTEND) && timeout--)
 		udelay(1);
-	if (! timeout)
+	if (timeout < 0)
 		printk(KERN_DEBUG "burgundy_extend_wait: timeout #1\n");
 	timeout = 50;
 	while ((in_le32(&chip->awacs->codec_stat) & MASK_EXTEND) && timeout--)
 		udelay(1);
-	if (! timeout)
+	if (timeout < 0)
 		printk(KERN_DEBUG "burgundy_extend_wait: timeout #2\n");
 }
 
-- 
cgit v0.10.2


From 018df41861475595a51d327b83fb5830462f7a53 Mon Sep 17 00:00:00 2001
From: Hector Martin <hector@marcansoft.com>
Date: Thu, 4 Jun 2009 00:13:40 +0200
Subject: ALSA: hda - More Aspire 8930G fixes

Enable all three capture channels, including the missing nid 7 which is
the only one capable of capturing DMIC input

Enable Headphone amp for the HP jack. This causes a volume boost for
headphones, but does not cause any noticeable effect for light loads
like other amps, so there is no need to make it configurable.

Add Input Mix capture mux setting to capture the output of the playback
input mux (that is, what goes out the speakers except for PCM)

Hack another coef register because the stereo DMIC for some reason
produces a nonstandard sum/difference signal. I found a bit to make it
just use the sum signal for both channels, which makes it behave like a
standard mono microphone. The stereo is useless anyway (they're 1cm apart).

Tested working: Three capture channels, mic in, line in, DMIC.

Tested not working: CD. Not sure why, might be unconnected in the actual
hardware or a CD drive issue.

Also looked at SPDIF. It appears to work (emitter lights up inside the
HP out jack) but I lack a proper miniTOSLINK cable to test it.

Signed-off-by: Hector Martin <hector@marcansoft.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2bbb9e4..8699b7f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1469,10 +1469,10 @@ static struct hda_verb alc888_acer_aspire_4930g_verbs[] = {
 };
 
 /*
- * ALC888 Acer Aspire 8930G model
+ * ALC889 Acer Aspire 8930G model
  */
 
-static struct hda_verb alc888_acer_aspire_8930g_verbs[] = {
+static struct hda_verb alc889_acer_aspire_8930g_verbs[] = {
 /* Front Mic: set to PIN_IN (empty by default) */
 	{0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
 /* Unselect Front Mic by default in input mixer 3 */
@@ -1492,7 +1492,7 @@ static struct hda_verb alc888_acer_aspire_8930g_verbs[] = {
 	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	{0x16, AC_VERB_SET_CONNECT_SEL, 0x02},
 /* Connect HP out to Front */
-	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | PIN_HP},
 	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
 /* Enable all DACs */
@@ -1507,6 +1507,17 @@ static struct hda_verb alc888_acer_aspire_8930g_verbs[] = {
 /* Enable amplifiers */
 	{0x14, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
 	{0x15, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
+/* DMIC fix
+ * This laptop has a stereo digital microphone. The mics are only 1cm apart
+ * which makes the stereo useless. However, either the mic or the ALC889
+ * makes the signal become a difference/sum signal instead of standard
+ * stereo, which is annoying. So instead we flip this bit which makes the
+ * codec replicate the sum signal to both channels, turning it into a
+ * normal mono mic.
+ */
+/*  DMIC_CONTROL? Init value = 0x0001 */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x0b},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x0003},
 	{ }
 };
 
@@ -1531,6 +1542,38 @@ static struct hda_input_mux alc888_2_capture_sources[2] = {
 	}
 };
 
+static struct hda_input_mux alc889_capture_sources[3] = {
+	/* Digital mic only available on first "ADC" */
+	{
+		.num_items = 5,
+		.items = {
+			{ "Mic", 0x0 },
+			{ "Line", 0x2 },
+			{ "CD", 0x4 },
+			{ "Front Mic", 0xb },
+			{ "Input Mix", 0xa },
+		},
+	},
+	{
+		.num_items = 4,
+		.items = {
+			{ "Mic", 0x0 },
+			{ "Line", 0x2 },
+			{ "CD", 0x4 },
+			{ "Input Mix", 0xa },
+		},
+	},
+	{
+		.num_items = 4,
+		.items = {
+			{ "Mic", 0x0 },
+			{ "Line", 0x2 },
+			{ "CD", 0x4 },
+			{ "Input Mix", 0xa },
+		},
+	}
+};
+
 static struct snd_kcontrol_new alc888_base_mixer[] = {
 	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
 	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
@@ -1562,7 +1605,7 @@ static void alc888_acer_aspire_4930g_init_hook(struct hda_codec *codec)
 	alc_automute_amp(codec);
 }
 
-static void alc888_acer_aspire_8930g_init_hook(struct hda_codec *codec)
+static void alc889_acer_aspire_8930g_init_hook(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
 
@@ -9081,22 +9124,22 @@ static struct alc_config_preset alc883_presets[] = {
 		.mixers = { alc888_base_mixer,
 				alc883_chmode_mixer },
 		.init_verbs = { alc883_init_verbs, alc880_gpio1_init_verbs,
-				alc888_acer_aspire_8930g_verbs },
+				alc889_acer_aspire_8930g_verbs },
 		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
 		.dac_nids = alc883_dac_nids,
-		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids_rev),
-		.adc_nids = alc883_adc_nids_rev,
-		.capsrc_nids = alc883_capsrc_nids_rev,
+		.num_adc_nids = ARRAY_SIZE(alc889_adc_nids),
+		.adc_nids = alc889_adc_nids,
+		.capsrc_nids = alc889_capsrc_nids,
 		.dig_out_nid = ALC883_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
 		.channel_mode = alc883_3ST_6ch_modes,
 		.need_dac_fix = 1,
 		.const_channel_count = 6,
 		.num_mux_defs =
-			ARRAY_SIZE(alc888_2_capture_sources),
-		.input_mux = alc888_2_capture_sources,
+			ARRAY_SIZE(alc889_capture_sources),
+		.input_mux = alc889_capture_sources,
 		.unsol_event = alc_automute_amp_unsol_event,
-		.init_hook = alc888_acer_aspire_8930g_init_hook,
+		.init_hook = alc889_acer_aspire_8930g_init_hook,
 	},
 	[ALC883_MEDION] = {
 		.mixers = { alc883_fivestack_mixer,
-- 
cgit v0.10.2


From b8da7de56ca0ad34726478a50d138a29a9ff76cb Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 2 Jun 2009 16:50:35 +1000
Subject: drm: fix irq naming for kms drivers.

allocating devname in the i915 driver was a hack originally and I
forgot to figure out how to do this properly back then.

So this is the cleaner version that just picks devname or driver name
in the irq code.

It removes the devname allocs from the i915 driver.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 93e677a..fc8e5ac 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -196,6 +196,7 @@ int drm_irq_install(struct drm_device *dev)
 {
 	int ret = 0;
 	unsigned long sh_flags = 0;
+	char *irqname;
 
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
@@ -227,8 +228,13 @@ int drm_irq_install(struct drm_device *dev)
 	if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
 		sh_flags = IRQF_SHARED;
 
+	if (dev->devname)
+		irqname = dev->devname;
+	else
+		irqname = dev->driver->name;
+
 	ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler,
-			  sh_flags, dev->devname, dev);
+			  sh_flags, irqname, dev);
 
 	if (ret < 0) {
 		mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 53d5445..0ccb63e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -987,12 +987,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	int fb_bar = IS_I9XX(dev) ? 2 : 0;
 	int ret = 0;
 
-	dev->devname = kstrdup(DRIVER_NAME, GFP_KERNEL);
-	if (!dev->devname) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) &
 		0xff000000;
 
@@ -1006,7 +1000,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
 	if (ret)
-		goto kfree_devname;
+		goto out;
 
 	/* Basic memrange allocator for stolen space (aka vram) */
 	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
@@ -1024,7 +1018,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	ret = i915_gem_init_ringbuffer(dev);
 	if (ret)
-		goto kfree_devname;
+		goto out;
 
 	/* Allow hardware batchbuffers unless told otherwise.
 	 */
@@ -1056,8 +1050,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 destroy_ringbuffer:
 	i915_gem_cleanup_ringbuffer(dev);
-kfree_devname:
-	kfree(dev->devname);
 out:
 	return ret;
 }
-- 
cgit v0.10.2


From 9863871bd1bbf218b921af5e0bc48ca4f6ea9f12 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Thu, 4 Jun 2009 07:08:13 +1000
Subject: drm/radeon: fix ring free alignment calculations

fd.o bz#21849

We were aligning to +16 dwords, instead of to the next 16dword
boundary in the ring. Fix the calculation to go to the next 16dword
boundary when space checking.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 77a7a4d..aff90bb 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -2185,9 +2185,9 @@ void radeon_commit_ring(drm_radeon_private_t *dev_priv)
 
 	/* check if the ring is padded out to 16-dword alignment */
 
-	tail_aligned = dev_priv->ring.tail & 0xf;
+	tail_aligned = dev_priv->ring.tail & (RADEON_RING_ALIGN-1);
 	if (tail_aligned) {
-		int num_p2 = 16 - tail_aligned;
+		int num_p2 = RADEON_RING_ALIGN - tail_aligned;
 
 		ring = dev_priv->ring.start;
 		/* pad with some CP_PACKET2 */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 8071d96..0c6bfc1 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -1964,11 +1964,14 @@ do {								\
 
 #define RING_LOCALS	int write, _nr, _align_nr; unsigned int mask; u32 *ring;
 
+#define RADEON_RING_ALIGN 16
+
 #define BEGIN_RING( n ) do {						\
 	if ( RADEON_VERBOSE ) {						\
 		DRM_INFO( "BEGIN_RING( %d )\n", (n));			\
 	}								\
-	_align_nr = (n + 0xf) & ~0xf;					\
+	_align_nr = RADEON_RING_ALIGN - ((dev_priv->ring.tail + n) & (RADEON_RING_ALIGN-1));	\
+	_align_nr += n;							\
 	if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) {	\
                 COMMIT_RING();						\
 		radeon_wait_ring( dev_priv, _align_nr * sizeof(u32));	\
-- 
cgit v0.10.2


From 5fdc18d938c99399b73fe894bc24cb9400a1a2ee Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 4 Jun 2009 00:12:18 +0200
Subject: ALSA: Core - clean up snd_card_set_id* calls and remove possible id
 collision

Move locking outside snd_card_set_id_internal() function and rename it
to snd_card_set_id_no_lock() for better function description.

User defined id is just copied to card structure at allocation time.
The real unique id procedure is called in snd_card_register() to
ensure real atomicity.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/init.c b/sound/core/init.c
index a578d05..d5d40d7 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -153,7 +153,7 @@ int snd_card_create(int idx, const char *xid,
 	if (!card)
 		return -ENOMEM;
 	if (xid)
-		snd_card_set_id(card, xid);
+		strlcpy(card->id, xid, sizeof(card->id));
 	err = 0;
 	mutex_lock(&snd_card_mutex);
 	if (idx < 0) {
@@ -476,16 +476,12 @@ int snd_card_free(struct snd_card *card)
 
 EXPORT_SYMBOL(snd_card_free);
 
-static void snd_card_set_id_internal(struct snd_card *card, const char *nid,
-				     int do_locking)
+static void snd_card_set_id_no_lock(struct snd_card *card, const char *nid)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
 	const char *spos, *src;
 	char *id;
 	
-	/* check if user specified own card->id */
-	if (card->id[0] != '\0')
-		return;
 	if (nid == NULL) {
 		id = card->shortname;
 		spos = src = id;
@@ -522,16 +518,10 @@ static void snd_card_set_id_internal(struct snd_card *card, const char *nid,
       		}
 	      	if (!snd_info_check_reserved_words(id))
       			goto __change;
-		if (do_locking)
-			mutex_lock(&snd_card_mutex);
 		for (i = 0; i < snd_ecards_limit; i++) {
-			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id)) {
-				mutex_unlock(&snd_card_mutex);
+			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id))
 				goto __change;
-			}
 		}
-		if (do_locking)
-			mutex_unlock(&snd_card_mutex);
 		break;
 
 	      __change:
@@ -566,7 +556,12 @@ static void snd_card_set_id_internal(struct snd_card *card, const char *nid,
  */
 void snd_card_set_id(struct snd_card *card, const char *nid)
 {
-	snd_card_set_id_internal(card, nid, 1);
+	/* check if user specified own card->id */
+	if (card->id[0] != '\0')
+		return;
+	mutex_lock(&snd_card_mutex);
+	snd_card_set_id_no_lock(card, nid);
+	mutex_unlock(&snd_card_mutex);
 }
 EXPORT_SYMBOL(snd_card_set_id);
 
@@ -663,8 +658,7 @@ int snd_card_register(struct snd_card *card)
 		mutex_unlock(&snd_card_mutex);
 		return 0;
 	}
-	if (card->id[0] == '\0')
-		snd_card_set_id_internal(card, NULL, 0);
+	snd_card_set_id_no_lock(card, card->id[0] == '\0' ? NULL : card->id);
 	snd_cards[card->number] = card;
 	mutex_unlock(&snd_card_mutex);
 	init_info_for_card(card);
-- 
cgit v0.10.2


From 75185c929ed241f5cf1aa28999b8012181e2c7cb Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sat, 30 May 2009 20:42:25 -0700
Subject: drm: add newlines to text sysfs files

The contents of various simple text files in sysfs should end with
a newline to make them easier to read from the console.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 8f93729..182bdf9 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -147,7 +147,7 @@ static ssize_t status_show(struct device *device,
 	enum drm_connector_status status;
 
 	status = connector->funcs->detect(connector);
-	return snprintf(buf, PAGE_SIZE, "%s",
+	return snprintf(buf, PAGE_SIZE, "%s\n",
 			drm_get_connector_status_name(status));
 }
 
@@ -166,7 +166,7 @@ static ssize_t dpms_show(struct device *device,
 	if (ret)
 		return 0;
 
-	return snprintf(buf, PAGE_SIZE, "%s",
+	return snprintf(buf, PAGE_SIZE, "%s\n",
 			drm_get_dpms_name((int)dpms_status));
 }
 
@@ -176,7 +176,7 @@ static ssize_t enabled_show(struct device *device,
 {
 	struct drm_connector *connector = to_drm_connector(device);
 
-	return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" :
+	return snprintf(buf, PAGE_SIZE, "%s\n", connector->encoder ? "enabled" :
 			"disabled");
 }
 
-- 
cgit v0.10.2


From e36ebaf49274ffa78f17b62bcae4c92c33b5b391 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sat, 30 May 2009 20:42:26 -0700
Subject: drm: set permissions on edid file to 0444

Without initializing the sysfs attributes for the edid file,
it was created with mode 0, making it difficult for applications to use.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 182bdf9..9987ab8 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -317,6 +317,7 @@ static struct device_attribute connector_attrs_opt1[] = {
 
 static struct bin_attribute edid_attr = {
 	.attr.name = "edid",
+	.attr.mode = 0444,
 	.size = 128,
 	.read = edid_show,
 };
-- 
cgit v0.10.2


From c9fb15f60eb517c958dec64dca9357bf62bf2201 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sat, 30 May 2009 20:42:28 -0700
Subject: drm: Hook up DPMS property handling in drm_crtc.c. Add
 drm_helper_connector_dpms.

Making the drm_crtc.c code recognize the DPMS property and invoke the
connector->dpms function doesn't remove any capability from the driver while
reducing code duplication.

That just highlighted the problem with the existing DPMS functions which
could turn off the connector, but failed to turn off any relevant crtcs. The
new drm_helper_connector_dpms function manages all of that, using the
drm_helper-specific crtc and encoder dpms functions, automatically computing
the appropriate DPMS level for each object in the system.

This fixes the current troubles in the i915 driver which left PLLs, pipes
and planes running while in DPMS_OFF mode or even while they were unused.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 94a7688..8fab789 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -2294,7 +2294,12 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
 		}
 	}
 
-	if (connector->funcs->set_property)
+	/* Do DPMS ourselves */
+	if (property == connector->dev->mode_config.dpms_property) {
+		if (connector->funcs->dpms)
+			(*connector->funcs->dpms)(connector, (int) out_resp->value);
+		ret = 0;
+	} else if (connector->funcs->set_property)
 		ret = connector->funcs->set_property(connector, property, out_resp->value);
 
 	/* store the property value if succesful */
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 4589044..a6f73f1 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -199,6 +199,29 @@ static void drm_helper_add_std_modes(struct drm_device *dev,
 }
 
 /**
+ * drm_helper_encoder_in_use - check if a given encoder is in use
+ * @encoder: encoder to check
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Walk @encoders's DRM device's mode_config and see if it's in use.
+ *
+ * RETURNS:
+ * True if @encoder is part of the mode_config, false otherwise.
+ */
+bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
+{
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder)
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(drm_helper_encoder_in_use);
+
+/**
  * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
  * @crtc: CRTC to check
  *
@@ -216,7 +239,7 @@ bool drm_helper_crtc_in_use(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	/* FIXME: Locking around list access? */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
-		if (encoder->crtc == crtc)
+		if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder))
 			return true;
 	return false;
 }
@@ -240,7 +263,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		encoder_funcs = encoder->helper_private;
-		if (!encoder->crtc)
+		if (!drm_helper_encoder_in_use(encoder))
 			(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
 	}
 
@@ -935,6 +958,88 @@ bool drm_helper_initial_config(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_helper_initial_config);
 
+static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder)
+{
+	int dpms = DRM_MODE_DPMS_OFF;
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder)
+			if (connector->dpms < dpms)
+				dpms = connector->dpms;
+	return dpms;
+}
+
+static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc)
+{
+	int dpms = DRM_MODE_DPMS_OFF;
+	struct drm_connector *connector;
+	struct drm_device *dev = crtc->dev;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder && connector->encoder->crtc == crtc)
+			if (connector->dpms < dpms)
+				dpms = connector->dpms;
+	return dpms;
+}
+
+/**
+ * drm_helper_connector_dpms
+ * @connector affected connector
+ * @mode DPMS mode
+ *
+ * Calls the low-level connector DPMS function, then
+ * calls appropriate encoder and crtc DPMS functions as well
+ */
+void drm_helper_connector_dpms(struct drm_connector *connector, int mode)
+{
+	struct drm_encoder *encoder = connector->encoder;
+	struct drm_crtc *crtc = encoder ? encoder->crtc : NULL;
+	int old_dpms;
+
+	if (mode == connector->dpms)
+		return;
+
+	old_dpms = connector->dpms;
+	connector->dpms = mode;
+
+	/* from off to on, do crtc then encoder */
+	if (mode < old_dpms) {
+		if (crtc) {
+			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+			if (crtc_funcs->dpms)
+				(*crtc_funcs->dpms) (crtc,
+						     drm_helper_choose_crtc_dpms(crtc));
+		}
+		if (encoder) {
+			struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+			if (encoder_funcs->dpms)
+				(*encoder_funcs->dpms) (encoder,
+							drm_helper_choose_encoder_dpms(encoder));
+		}
+	}
+
+	/* from on to off, do encoder then crtc */
+	if (mode > old_dpms) {
+		if (encoder) {
+			struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+			if (encoder_funcs->dpms)
+				(*encoder_funcs->dpms) (encoder,
+							drm_helper_choose_encoder_dpms(encoder));
+		}
+		if (crtc) {
+			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+			if (crtc_funcs->dpms)
+				(*crtc_funcs->dpms) (crtc,
+						     drm_helper_choose_crtc_dpms(crtc));
+		}
+	}
+
+	return;
+}
+EXPORT_SYMBOL(drm_helper_connector_dpms);
+
 /**
  * drm_hotplug_stage_two
  * @dev DRM device
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 640f515..79acc4f 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -381,11 +381,6 @@ static int intel_crt_set_property(struct drm_connector *connector,
 				  struct drm_property *property,
 				  uint64_t value)
 {
-	struct drm_device *dev = connector->dev;
-
-	if (property == dev->mode_config.dpms_property && connector->encoder)
-		intel_crt_dpms(connector->encoder, (uint32_t)(value & 0xf));
-
 	return 0;
 }
 
@@ -402,6 +397,7 @@ static const struct drm_encoder_helper_funcs intel_crt_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_crt_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.detect = intel_crt_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = intel_crt_destroy,
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 8b8d6e6..1ee3007 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -316,6 +316,7 @@ static const struct drm_encoder_helper_funcs intel_dvo_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_dvo_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_dvo_save,
 	.restore = intel_dvo_restore,
 	.detect = intel_dvo_detect,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index d0983bb..7d6bdd7 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -219,6 +219,7 @@ static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_hdmi_save,
 	.restore = intel_hdmi_restore,
 	.detect = intel_hdmi_detect,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 53731f0..c92a64a 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -343,11 +343,6 @@ static int intel_lvds_set_property(struct drm_connector *connector,
 				   struct drm_property *property,
 				   uint64_t value)
 {
-	struct drm_device *dev = connector->dev;
-
-	if (property == dev->mode_config.dpms_property && connector->encoder)
-		intel_lvds_dpms(connector->encoder, (uint32_t)(value & 0xf));
-
 	return 0;
 }
 
@@ -366,6 +361,7 @@ static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs
 };
 
 static const struct drm_connector_funcs intel_lvds_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_lvds_save,
 	.restore = intel_lvds_restore,
 	.detect = intel_lvds_detect,
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index f3ef6bf..3093b4d 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1616,6 +1616,7 @@ static const struct drm_encoder_helper_funcs intel_sdvo_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_sdvo_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_sdvo_save,
 	.restore = intel_sdvo_restore,
 	.detect = intel_sdvo_detect,
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index d2c3298..98ac054 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1626,6 +1626,7 @@ static const struct drm_encoder_helper_funcs intel_tv_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
 	.save = intel_tv_save,
 	.restore = intel_tv_restore,
 	.detect = intel_tv_detect,
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3c1924c..7300fb8 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -471,6 +471,9 @@ struct drm_connector {
 	u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY];
 	uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY];
 
+	/* requested DPMS state */
+	int dpms;
+
 	void *helper_private;
 
 	uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index ec073d8..6769ff6 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -99,6 +99,8 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 				     struct drm_framebuffer *old_fb);
 extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
 
+extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
+
 extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
 					  struct drm_mode_fb_cmd *mode_cmd);
 
-- 
cgit v0.10.2


From 93c05f222413e3a16e8785f252db4726693abd71 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Thu, 4 Jun 2009 09:41:19 +1000
Subject: drm/i915: intel_lvds.c fix section mismatch

intel_no_lvds[] does not require __initdata as it is used only by

void intel_lvds_init(struct drm_device *dev).

Signed-off-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index c92a64a..53cccfa 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -387,7 +387,7 @@ static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 }
 
 /* These systems claim to have LVDS, but really don't */
-static const struct dmi_system_id __initdata intel_no_lvds[] = {
+static const struct dmi_system_id intel_no_lvds[] = {
 	{
 		.callback = intel_no_lvds_dmi_callback,
 		.ident = "Apple Mac Mini (Core series)",
-- 
cgit v0.10.2


From 6c51d1cfa0a370b48a157163340190cf5fd2346b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 26 May 2009 10:35:52 +1000
Subject: drm: don't associate _DRM_DRIVER maps with a master

A driver will use the _DRM_DRIVER map flag to indicate that it wants
to be responsible for removing the map itself, bypassing the DRM's
automagic cleanup code.

Since the multi-master changes this has been broken, resulting in some
drivers having their registers unmapped before it's finished with them.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 0411d91..80a2575 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -371,7 +371,8 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	list->user_token = list->hash.key << PAGE_SHIFT;
 	mutex_unlock(&dev->struct_mutex);
 
-	list->master = dev->primary->master;
+	if (!(map->flags & _DRM_DRIVER))
+		list->master = dev->primary->master;
 	*maplist = list;
 	return 0;
 	}
-- 
cgit v0.10.2


From fc43896630a421321a19d7970bac27ac94e9d162 Mon Sep 17 00:00:00 2001
From: Adam Jackson <ajax@redhat.com>
Date: Thu, 4 Jun 2009 10:20:34 +1000
Subject: drm: ignore EDID with really tiny modes.

Some EDIDs lie and report tiny modes that aren't possible. Ignore
these modes.

Signed-off-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ca9c616..6f6b264 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -289,6 +289,11 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 	struct drm_display_mode *mode;
 	struct detailed_pixel_timing *pt = &timing->data.pixel_data;
 
+	/* ignore tiny modes */
+	if (((pt->hactive_hi << 8) | pt->hactive_lo) < 64 ||
+	    ((pt->vactive_hi << 8) | pt->hactive_lo) < 64)
+		return NULL;
+
 	if (pt->stereo) {
 		printk(KERN_WARNING "stereo mode not supported\n");
 		return NULL;
-- 
cgit v0.10.2


From e0a94c2a63f2644826069044649669b5e7ca75d3 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Wed, 3 Jun 2009 16:04:31 -0400
Subject: security: use mmap_min_addr indepedently of security models

This patch removes the dependency of mmap_min_addr on CONFIG_SECURITY.
It also sets a default mmap_min_addr of 4096.

mmapping of addresses below 4096 will only be possible for processes
with CAP_SYS_RAWIO.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Eric Paris <eparis@redhat.com>
Looks-ok-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d..0c21af6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -580,12 +580,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
  */
 static inline unsigned long round_hint_to_min(unsigned long hint)
 {
-#ifdef CONFIG_SECURITY
 	hint &= PAGE_MASK;
 	if (((void *)hint != NULL) &&
 	    (hint < mmap_min_addr))
 		return PAGE_ALIGN(mmap_min_addr);
-#endif
 	return hint;
 }
 
diff --git a/include/linux/security.h b/include/linux/security.h
index d5fd616..5eff459 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
+	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
+		return -EACCES;
 	return 0;
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 149581f..45bd711 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1237,7 +1237,6 @@ static struct ctl_table vm_table[] = {
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
-#ifdef CONFIG_SECURITY
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "mmap_min_addr",
@@ -1246,7 +1245,6 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
-#endif
 #ifdef CONFIG_NUMA
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/mm/Kconfig b/mm/Kconfig
index c2b57d8..71830ba 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -226,6 +226,25 @@ config HAVE_MLOCKED_PAGE_BIT
 config MMU_NOTIFIER
 	bool
 
+config DEFAULT_MMAP_MIN_ADDR
+        int "Low address space to protect from user allocation"
+        default 4096
+        help
+	  This is the portion of low virtual memory which should be protected
+	  from userspace allocation.  Keeping a user from writing to low pages
+	  can help reduce the impact of kernel NULL pointer bugs.
+
+	  For most ia64, ppc64 and x86 users with lots of address space
+	  a value of 65536 is reasonable and should cause no problems.
+	  On arm and other archs it should not be higher than 32768.
+	  Programs which use vm86 functionality would either need additional
+	  permissions from either the LSM or the capabilities module or have
+	  this protection disabled.
+
+	  This value can be changed after boot using the
+	  /proc/sys/vm/mmap_min_addr tunable.
+
+
 config NOMMU_INITIAL_TRIM_EXCESS
 	int "Turn on mmap() excess space trimming before booting"
 	depends on !MMU
diff --git a/mm/mmap.c b/mm/mmap.c
index 6b7b1a9..2b43fa1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -87,6 +87,9 @@ int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
+/* amount of vm to protect from userspace access */
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/security/Kconfig b/security/Kconfig
index bb24477..d23c839 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -110,28 +110,8 @@ config SECURITY_ROOTPLUG
 
 	  See <http://www.linuxjournal.com/article.php?sid=6279> for
 	  more information about this module.
-	  
-	  If you are unsure how to answer this question, answer N.
-
-config SECURITY_DEFAULT_MMAP_MIN_ADDR
-        int "Low address space to protect from user allocation"
-        depends on SECURITY
-        default 0
-        help
-	  This is the portion of low virtual memory which should be protected
-	  from userspace allocation.  Keeping a user from writing to low pages
-	  can help reduce the impact of kernel NULL pointer bugs.
-
-	  For most ia64, ppc64 and x86 users with lots of address space
-	  a value of 65536 is reasonable and should cause no problems.
-	  On arm and other archs it should not be higher than 32768.
-	  Programs which use vm86 functionality would either need additional
-	  permissions from either the LSM or the capabilities module or have
-	  this protection disabled.
-
-	  This value can be changed after boot using the
-	  /proc/sys/vm/mmap_min_addr tunable.
 
+	  If you are unsure how to answer this question, answer N.
 
 source security/selinux/Kconfig
 source security/smack/Kconfig
diff --git a/security/security.c b/security/security.c
index 5284255..dc7674f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -26,9 +26,6 @@ extern void security_fixup_ops(struct security_operations *ops);
 
 struct security_operations *security_ops;	/* Initialized to NULL */
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR;
-
 static inline int verify(struct security_operations *ops)
 {
 	/* verify the security_operations structure exists */
-- 
cgit v0.10.2


From d81e77f041881d8d91c767b8c84f9068290b10c6 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Mon, 25 May 2009 16:50:10 +0000
Subject: [ARM] pxa/imote2: fix UCAM sensor board ADC model number

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 2121309..2b27336 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -412,7 +412,7 @@ static struct platform_device imote2_flash_device = {
  */
 static struct i2c_board_info __initdata imote2_i2c_board_info[] = {
 	{ /* UCAM sensor board */
-		.type = "max1238",
+		.type = "max1239",
 		.addr = 0x35,
 	}, { /* ITS400 Sensor board only */
 		.type = "max1363",
-- 
cgit v0.10.2


From 1257629b0712a0a68a24c532a05a4cd23e3f7565 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Tue, 26 May 2009 22:03:32 +0200
Subject: [ARM] pxa: fix pxa27x_udc default pullup GPIO

Currently, pxa27x_udc tries to use GPIO 0 as D+ pullup if not
explicitly configured. Default to an invalid GPIO (-1) instead.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index d245e59c..29970f7 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -72,7 +72,10 @@ void __init pxa_set_mci_info(struct pxamci_platform_data *info)
 }
 
 
-static struct pxa2xx_udc_mach_info pxa_udc_info;
+static struct pxa2xx_udc_mach_info pxa_udc_info = {
+	.gpio_pullup = -1,
+	.gpio_vbus   = -1,
+};
 
 void __init pxa_set_udc_info(struct pxa2xx_udc_mach_info *info)
 {
-- 
cgit v0.10.2


From a4c43beaff0fe6c83aa2505dce8ffe65db8e0a33 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Jun 2009 23:02:33 -0300
Subject: perf report: Fix rbtree bug

Ingo Molnar reported:

> FYI, i just got this crash (segfault) in perf report after
> collecting a long profile from Xorg:
>
> Starting program: /home/mingo/tip/Documentation/perf_counter/perf report
> [Thread debugging using libthread_db enabled]
> Detaching after fork from child process 20008.
> [New Thread 0x7f92fd62a6f0 (LWP 20005)]
>
> Program received signal SIGSEGV, Segmentation fault.
> 0x000000000041031a in __rb_erase_color (node=0x142c090, parent=0x0,
> root=0x881918)
>     at util/rbtree.c:143
> 143			if (parent->rb_left == node)

It was a problem introduced in this cset:

 perf report: Fix comm sorting - 8229289b607682f90b946ad2c319526303c17700

This patch should fix it.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 6003cc3..86f23f0 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -699,17 +699,18 @@ static void output__resort(void)
 {
 	struct rb_node *next;
 	struct hist_entry *n;
+	struct rb_root *tree = &hist;
 
 	if (sort__need_collapse)
-		next = rb_first(&collapse_hists);
-	else
-		next = rb_first(&hist);
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
 
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node);
 		next = rb_next(&n->rb_node);
 
-		rb_erase(&n->rb_node, &hist);
+		rb_erase(&n->rb_node, tree);
 		output__insert_entry(n);
 	}
 }
-- 
cgit v0.10.2


From 6e53cdf11dfc8d302ebb67e7112d1baf8d7c66d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 08:53:05 +0200
Subject: perf top: Reduce default filter threshold

On idle systems 'perf top' comes up empty by default, because the event
count filter is set to 100.

Reduce it to 5 instead.

Also add an option to limit the number of functions displayed.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 7c907e2..3f7778b 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -64,9 +64,10 @@ static int			default_interval = 100000;
 static int			event_count[MAX_COUNTERS];
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static __u64			count_filter		       = 100;
+static __u64			count_filter			=  5;
+static int			print_entries			= 15;
 
-static int			target_pid				= -1;
+static int			target_pid			= -1;
 static int			profile_cpu			= -1;
 static int			nr_cpus				=  0;
 static unsigned int		realtime_prio			=  0;
@@ -254,7 +255,7 @@ static void print_sym_table(void)
 		struct symbol *sym = (struct symbol *)(syme + 1);
 		float pcnt;
 
-		if (++printed > 18 || syme->snap_count < count_filter)
+		if (++printed > print_entries || syme->snap_count < count_filter)
 			continue;
 
 		pcnt = 100.0 - (100.0 * ((sum_kevents - syme->snap_count) /
@@ -650,7 +651,7 @@ static const struct option options[] = {
 		    "number of seconds to delay between refreshes"),
 	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
 			    "dump the symbol table used for profiling"),
-	OPT_INTEGER('f', "--count-filter", &count_filter,
+	OPT_INTEGER('f', "count-filter", &count_filter,
 		    "only display functions with more events than this"),
 	OPT_BOOLEAN('g', "group", &group,
 			    "put the counters into a counter group"),
@@ -662,8 +663,10 @@ static const struct option options[] = {
 		    "track mmap events"),
 	OPT_BOOLEAN('U', "use-munmap", &use_munmap,
 		    "track munmap events"),
-	OPT_INTEGER('F', "--freq", &freq,
+	OPT_INTEGER('F', "freq", &freq,
 		    "profile at this frequency"),
+	OPT_INTEGER('E', "entries", &print_entries,
+		    "display this many functions"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 8c032ec337bdf6ca9b083345b265130a55100b69 Mon Sep 17 00:00:00 2001
From: Daniel Schaeffer <daniel.schaeffer@timesys.com>
Date: Wed, 3 Jun 2009 17:23:54 -0400
Subject: mxc: Add i.MX27LITE board support

Sascha Hauer wrote:
> On Tue, Jun 02, 2009 at 04:18:42PM -0400, Daniel Schaeffer wrote:
>> Add basic support for the Logic i.MX27LITE board.
>>
>> Signed-off-by: Daniel Schaeffer <daniel.schaeffer@timesys.com>
>
> Besides the comment made by Fabio this looks ok to me.
>
> Sascha
>
>

Fixed issues pointed out by Fabio and Magnus, and rebased to mxc-master head.

Signed-off-by: Daniel Schaeffer <daniel.schaeffer@timesys.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/Kconfig b/arch/arm/mach-mx2/Kconfig
index 6155044..c77da58 100644
--- a/arch/arm/mach-mx2/Kconfig
+++ b/arch/arm/mach-mx2/Kconfig
@@ -59,4 +59,12 @@ config MACH_MX27_3DS
 	help
 	  Include support for MX27PDK platform. This includes specific
 	  configurations for the board and its peripherals.
+
+config MACH_MX27LITE
+	bool "LogicPD MX27 LITEKIT platform"
+	depends on MACH_MX27
+	help
+	  Include support for MX27 LITEKIT platform. This includes specific
+	  configurations for the board and its peripherals.
+
 endif
diff --git a/arch/arm/mach-mx2/Makefile b/arch/arm/mach-mx2/Makefile
index d140e2d..b9b1cca 100644
--- a/arch/arm/mach-mx2/Makefile
+++ b/arch/arm/mach-mx2/Makefile
@@ -16,3 +16,5 @@ obj-$(CONFIG_MACH_MX27ADS) += mx27ads.o
 obj-$(CONFIG_MACH_PCM038) += pcm038.o
 obj-$(CONFIG_MACH_PCM970_BASEBOARD) += pcm970-baseboard.o
 obj-$(CONFIG_MACH_MX27_3DS) += mx27pdk.o
+obj-$(CONFIG_MACH_MX27LITE) += mx27lite.o
+
diff --git a/arch/arm/mach-mx2/mx27lite.c b/arch/arm/mach-mx2/mx27lite.c
new file mode 100644
index 0000000..3ae11cb
--- /dev/null
+++ b/arch/arm/mach-mx2/mx27lite.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2007 Robert Schwebel <r.schwebel@pengutronix.de>, Pengutronix
+ * Copyright (C) 2008 Juergen Beisert (kernel@pengutronix.de)
+ * Copyright 2009 Daniel Schaeffer (daniel.schaeffer@timesys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/time.h>
+#include <asm/mach/map.h>
+#include <mach/hardware.h>
+#include <mach/common.h>
+#include <mach/imx-uart.h>
+#include <mach/iomux.h>
+#include <mach/board-mx27lite.h>
+
+#include "devices.h"
+
+static unsigned int mx27lite_pins[] = {
+	/* UART1 */
+	PE12_PF_UART1_TXD,
+	PE13_PF_UART1_RXD,
+	PE14_PF_UART1_CTS,
+	PE15_PF_UART1_RTS,
+	/* FEC */
+	PD0_AIN_FEC_TXD0,
+	PD1_AIN_FEC_TXD1,
+	PD2_AIN_FEC_TXD2,
+	PD3_AIN_FEC_TXD3,
+	PD4_AOUT_FEC_RX_ER,
+	PD5_AOUT_FEC_RXD1,
+	PD6_AOUT_FEC_RXD2,
+	PD7_AOUT_FEC_RXD3,
+	PD8_AF_FEC_MDIO,
+	PD9_AIN_FEC_MDC,
+	PD10_AOUT_FEC_CRS,
+	PD11_AOUT_FEC_TX_CLK,
+	PD12_AOUT_FEC_RXD0,
+	PD13_AOUT_FEC_RX_DV,
+	PD14_AOUT_FEC_RX_CLK,
+	PD15_AOUT_FEC_COL,
+	PD16_AIN_FEC_TX_ER,
+	PF23_AIN_FEC_TX_EN,
+};
+
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+static struct platform_device *platform_devices[] __initdata = {
+	&mxc_fec_device,
+};
+
+static void __init mx27lite_init(void)
+{
+	mxc_gpio_setup_multiple_pins(mx27lite_pins, ARRAY_SIZE(mx27lite_pins),
+		"imx27lite");
+	mxc_register_device(&mxc_uart_device0, &uart_pdata);
+	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
+}
+
+static void __init mx27lite_timer_init(void)
+{
+	mx27_clocks_init(26000000);
+}
+
+static struct sys_timer mx27lite_timer = {
+	.init	= mx27lite_timer_init,
+};
+
+MACHINE_START(IMX27LITE, "LogicPD i.MX27LITE")
+	.phys_io        = AIPI_BASE_ADDR,
+	.io_pg_offst    = ((AIPI_BASE_ADDR_VIRT) >> 18) & 0xfffc,
+	.boot_params    = PHYS_OFFSET + 0x100,
+	.map_io         = mx27_map_io,
+	.init_irq       = mxc_init_irq,
+	.init_machine   = mx27lite_init,
+	.timer          = &mx27lite_timer,
+MACHINE_END
diff --git a/arch/arm/plat-mxc/include/mach/board-mx27lite.h b/arch/arm/plat-mxc/include/mach/board-mx27lite.h
new file mode 100644
index 0000000..a870f8e
--- /dev/null
+++ b/arch/arm/plat-mxc/include/mach/board-mx27lite.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_MXC_BOARD_MX27LITE_H__
+#define __ASM_ARCH_MXC_BOARD_MX27LITE_H__
+
+/* mandatory for CONFIG_DEBUG_LL */
+
+#define MXC_LL_UART_PADDR	UART1_BASE_ADDR
+#define MXC_LL_UART_VADDR	AIPS1_IO_ADDRESS(UART1_BASE_ADDR)
+
+#endif /* __ASM_ARCH_MXC_BOARD_MX27LITE_H__ */
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S
index e280978..bbc5f67 100644
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -49,6 +49,9 @@
 #ifdef CONFIG_MACH_MX35_3DS
 #include <mach/board-mx35pdk.h>
 #endif
+#ifdef CONFIG_MACH_MX27LITE
+#include <mach/board-mx27lite.h>
+#endif
 		.macro	addruart,rx
 		mrc	p15, 0, \rx, c1, c0
 		tst	\rx, #1			@ MMU enabled?
-- 
cgit v0.10.2


From bcb86975dbcc24f820f1a37918d53914af29ace7 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Thu, 4 Jun 2009 15:14:34 +0900
Subject: TOMOYO: Remove unused parameter.

TOMOYO 2.2.0 does not check argv[] and envp[] upon execve().
We don't need to pass "struct tomoyo_page_buffer".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 34bb641..eb75401 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -721,7 +721,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm,
 	}
 
 	/* Check execute permission. */
-	retval = tomoyo_check_exec_perm(old_domain, &r, tmp);
+	retval = tomoyo_check_exec_perm(old_domain, &r);
 	if (retval < 0)
 		goto out;
 
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index a67f9e6..ab0cd35 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -1012,13 +1012,11 @@ int tomoyo_check_file_perm(struct tomoyo_domain_info *domain,
  *
  * @domain:   Pointer to "struct tomoyo_domain_info".
  * @filename: Check permission for "execute".
- * @tmp:      Buffer for temporary use.
  *
  * Returns 0 on success, negativevalue otherwise.
  */
 int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain,
-			   const struct tomoyo_path_info *filename,
-			   struct tomoyo_page_buffer *tmp)
+			   const struct tomoyo_path_info *filename)
 {
 	const u8 mode = tomoyo_check_flags(domain, TOMOYO_MAC_FOR_FILE);
 
diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index f12d5ad..0fd588a 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -17,13 +17,11 @@ struct path;
 struct inode;
 struct linux_binprm;
 struct pt_regs;
-struct tomoyo_page_buffer;
 
 int tomoyo_check_file_perm(struct tomoyo_domain_info *domain,
 			   const char *filename, const u8 perm);
 int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain,
-			   const struct tomoyo_path_info *filename,
-			   struct tomoyo_page_buffer *buf);
+			   const struct tomoyo_path_info *filename);
 int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
 				 struct path *path, const int flag);
 int tomoyo_check_1path_perm(struct tomoyo_domain_info *domain,
-- 
cgit v0.10.2


From d08664fdb50795b29cf70b0269ea02f7248e76c3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 4 Jun 2009 09:58:18 +0200
Subject: ASoC: Fix build error in twl4030.c

Fix the (likely cut-n-paste) error by commit
16a30fbb0d3aa4ee829a2dd3d0e314e2b5ae96a9, which causes the error below:
  sound/soc/codecs/twl4030.c: In function 'twl4030_read_reg_cache':
  sound/soc/codecs/twl4030.c:152: error: 'cache' undeclared (first use in this function)

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index c53c7ca..4dbb853 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -145,6 +145,7 @@ struct twl4030_priv {
 static inline unsigned int twl4030_read_reg_cache(struct snd_soc_codec *codec,
 	unsigned int reg)
 {
+	u8 *cache = codec->reg_cache;
 
 	if (reg >= TWL4030_CACHEREGNUM)
 		return -EIO;
-- 
cgit v0.10.2


From 82ced6fd28653ab456c3e5b25e9ef3c1c96cd6e9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 4 Jun 2009 10:46:43 +0200
Subject: ALSA: Add missing __devexit_p() markers

3 ISA sound drivers lack their __devexit_p() markers, which would
cause build failures when the kernel is built without hotplug support.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c
index 442b081..07df201 100644
--- a/sound/isa/es1688/es1688.c
+++ b/sound/isa/es1688/es1688.c
@@ -193,7 +193,7 @@ static int __devexit snd_es1688_remove(struct device *dev, unsigned int n)
 static struct isa_driver snd_es1688_driver = {
 	.match		= snd_es1688_match,
 	.probe		= snd_es1688_probe,
-	.remove		= snd_es1688_remove,
+	.remove		= __devexit_p(snd_es1688_remove),
 #if 0	/* FIXME */
 	.suspend	= snd_es1688_suspend,
 	.resume		= snd_es1688_resume,
diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c
index 180a8de..65e4b18 100644
--- a/sound/isa/gus/gusextreme.c
+++ b/sound/isa/gus/gusextreme.c
@@ -348,7 +348,7 @@ static int __devexit snd_gusextreme_remove(struct device *dev, unsigned int n)
 static struct isa_driver snd_gusextreme_driver = {
 	.match		= snd_gusextreme_match,
 	.probe		= snd_gusextreme_probe,
-	.remove		= snd_gusextreme_remove,
+	.remove		= __devexit_p(snd_gusextreme_remove),
 #if 0	/* FIXME */
 	.suspend	= snd_gusextreme_suspend,
 	.resume		= snd_gusextreme_resume,
diff --git a/sound/parisc/harmony.c b/sound/parisc/harmony.c
index 6055fd6..610c439 100644
--- a/sound/parisc/harmony.c
+++ b/sound/parisc/harmony.c
@@ -1020,7 +1020,7 @@ static struct parisc_driver snd_harmony_driver = {
 	.name = "harmony",
 	.id_table = snd_harmony_devtable,
 	.probe = snd_harmony_probe,
-	.remove = snd_harmony_remove,
+	.remove = __devexit_p(snd_harmony_remove),
 };
 
 static int __init 
-- 
cgit v0.10.2


From f79b1b146b52765ee38bfb91bb14eb850fa98017 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Thu, 28 May 2009 00:25:05 +0800
Subject: PCI: use fixed-up device class when configuring device

The device class may be changed after the fixup, so re-read the class
value from pci_dev when configuring the device.  Otherwise some devices
such as JMicron SATA controller won't work.

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Tested-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3c3e08..f1ae247 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -745,6 +745,8 @@ int pci_setup_device(struct pci_dev *dev)
 
 	/* Early fixups, before probing the BARs */
 	pci_fixup_device(pci_fixup_early, dev);
+	/* device class may be changed after fixup */
+	class = dev->class >> 8;
 
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
-- 
cgit v0.10.2


From 75e613cdc7bb2ba3795b1bc3ddf19476c767ba68 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 3 Jun 2009 00:13:13 -0700
Subject: x86/pci: fix mmconfig detection with 32bit near 4g

Pascal reported and bisected a commit:
|	x86/PCI: don't call e820_all_mapped with -1 in the mmconfig case

which broke one system system.

ACPI: Using IOAPIC for interrupt routing
PCI: MCFG configuration 0: base f0000000 segment 0 buses 0 - 255
PCI: MCFG area at f0000000 reserved in ACPI motherboard resources
PCI: Using MMCONFIG for extended config space

it didn't have
PCI: updated MCFG configuration 0: base f0000000 segment 0 buses 0 - 63
anymore, and try to use 0xf000000 - 0xffffffff for mmconfig

For 32bit, mcfg_res->end could be 32bit only (if 64 resources aren't used)
So use end - 1 to pass the value in mcfg->end to avoid overflow.

We don't need to worry about the e820 path, they are always 64 bit.

Reported-by: Pascal Terjan <pterjan@mandriva.com>
Bisected-by: Pascal Terjan <pterjan@mandriva.com>
Tested-by: Pascal Terjan <pterjan@mandriva.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: stable@kernel.org
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 5fa10bb..8766b0e 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -375,7 +375,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
 		if (!fixmem32)
 			return AE_OK;
 		if ((mcfg_res->start >= fixmem32->address) &&
-		    (mcfg_res->end <= (fixmem32->address +
+		    (mcfg_res->end < (fixmem32->address +
 				      fixmem32->address_length))) {
 			mcfg_res->flags = 1;
 			return AE_CTRL_TERMINATE;
@@ -392,7 +392,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
 		return AE_OK;
 
 	if ((mcfg_res->start >= address.minimum) &&
-	    (mcfg_res->end <= (address.minimum + address.address_length))) {
+	    (mcfg_res->end < (address.minimum + address.address_length))) {
 		mcfg_res->flags = 1;
 		return AE_CTRL_TERMINATE;
 	}
@@ -418,7 +418,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
 	struct resource mcfg_res;
 
 	mcfg_res.start = start;
-	mcfg_res.end = end;
+	mcfg_res.end = end - 1;
 	mcfg_res.flags = 0;
 
 	acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
-- 
cgit v0.10.2


From 1b58c2515be48d5df79d20210ac5a86e30094de2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 4 Jun 2009 09:49:59 +1000
Subject: perf_counter: powerpc: Use new identifier names in powerpc-specific
 code

Commit b23f3325 ("perf_counter: Rename various fields") fixed up
most of the uses of the renamed fields, but missed one instance
of "record_type" in powerpc-specific code which needs to be changed
to "sample_type", and a "PERF_RECORD_ADDR" in the same statement that
needs to be changed to "PERF_SAMPLE_ADDR", causing compilation
errors on powerpc.  This fixes it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18983.3111.770392.800486@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 4cc4ac5..232b00a 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -1002,7 +1002,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	if (record) {
 		addr = 0;
-		if (counter->attr.record_type & PERF_RECORD_ADDR) {
+		if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
 			/*
 			 * The user wants a data address recorded.
 			 * If we're not doing instruction sampling,
-- 
cgit v0.10.2


From 3aff27ca84fa94311ae99189e54fed8d83b69fc1 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Wed, 3 Jun 2009 16:42:25 +0800
Subject: perf_counter: Documentation update

The 'nmi' bit is no longer there.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <20090603084225.GA6553@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt
index 9930c4b..d325076 100644
--- a/Documentation/perf_counter/design.txt
+++ b/Documentation/perf_counter/design.txt
@@ -48,7 +48,6 @@ struct perf_counter_hw_event {
         __u32                   read_format;
 
         __u64                   disabled       :  1, /* off by default        */
-                                nmi            :  1, /* NMI sampling          */
                                 inherit        :  1, /* children inherit it   */
                                 pinned         :  1, /* must always be on PMU */
                                 exclusive      :  1, /* only group on PMU     */
@@ -195,12 +194,6 @@ The 'disabled' bit specifies whether the counter starts out disabled
 or enabled.  If it is initially disabled, it can be enabled by ioctl
 or prctl (see below).
 
-The 'nmi' bit specifies, for hardware events, whether the counter
-should be set up to request non-maskable interrupts (NMIs) or normal
-interrupts.  This bit is ignored if the user doesn't have
-CAP_SYS_ADMIN privilege (i.e. is not root) or if the CPU doesn't
-generate NMIs from hardware counters.
-
 The 'inherit' bit, if set, specifies that this counter should count
 events on descendant tasks as well as the task specified.  This only
 applies to new descendents, not to any existing descendents at the
-- 
cgit v0.10.2


From 48c72fccbfb1db01b5d0b98baff4442fea50d7a4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 20:20:24 +0900
Subject: sh: 16-bit get_unaligned() sh4a fix

This patch fixes the 16-bit case of the sh4a specific
unaligned access implementation. Without this patch
the 16-bit version of sh4a get_unaligned() results in
a 32-bit read which may read more data than intended
and/or cross page boundaries.

Unbreaks mtd NOR write handling on Migo-R.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/unaligned-sh4a.h b/arch/sh/include/asm/unaligned-sh4a.h
index d8f8977..9f4dd25 100644
--- a/arch/sh/include/asm/unaligned-sh4a.h
+++ b/arch/sh/include/asm/unaligned-sh4a.h
@@ -3,9 +3,9 @@
 
 /*
  * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
- * Support for 16 and 64-bit accesses are done through shifting and
- * masking relative to the endianness. Unaligned stores are not supported
- * by the instruction encoding, so these continue to use the packed
+ * Support for 64-bit accesses are done through shifting and masking
+ * relative to the endianness. Unaligned stores are not supported by the
+ * instruction encoding, so these continue to use the packed
  * struct.
  *
  * The same note as with the movli.l/movco.l pair applies here, as long
@@ -41,9 +41,9 @@ struct __una_u64 { u64 x __attribute__((packed)); };
 static inline u16 __get_unaligned_cpu16(const u8 *p)
 {
 #ifdef __LITTLE_ENDIAN
-	return __get_unaligned_cpu32(p) & 0xffff;
+	return p[0] | p[1] << 8;
 #else
-	return __get_unaligned_cpu32(p) >> 16;
+	return p[0] << 8 | p[1];
 #endif
 }
 
-- 
cgit v0.10.2


From df97992c6e25ffc66c549c8bc59262dc627c6d17 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 13:41:22 +0200
Subject: perf record/report: Fix PID/COMM handling

Fix two bugs causing lost comm mappings:

 - initial PID is not 0 but getpid()

 - when we are unable to handle an mmap event, dont assume the event
   itself is broken - try to parse the stream. This way we wont lose
   comm events.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index efa2eb4..bf59df5 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -430,7 +430,7 @@ static int __cmd_record(int argc, const char **argv)
 	}
 
 	if (!system_wide) {
-		open_counters(-1, target_pid != -1 ? target_pid : 0);
+		open_counters(-1, target_pid != -1 ? target_pid : getpid());
 	} else for (i = 0; i < nr_cpus; i++)
 		open_counters(i, target_pid);
 
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 86f23f0..ff6f657 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -852,7 +852,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	if (thread == NULL || map == NULL) {
 		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
-		return -1;
+		return 0;
 	}
 
 	thread__insert_map(thread, map);
-- 
cgit v0.10.2


From 0e7ddf7eeeef5aea85412120539ab5369577faeb Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 4 Jun 2009 11:18:14 +0000
Subject: drm/i915: Remove a bad BUG_ON in the fence management code.

This could be triggered by a gtt mapping fault on 965 that decides to
remove the fence from another object that happens to be active currently.
Since the other object doesn't rely on the fence reg for its execution, we
don't wait for it to finish.  We'll soon be not waiting on 915 most of the
time as well, so just drop the BUG_ON.

Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 670d128..39f5c65 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2260,9 +2260,6 @@ try_again:
 			goto try_again;
 		}
 
-		BUG_ON(old_obj_priv->active ||
-		       (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
-
 		/*
 		 * Zap this virtual mapping so we can set up a fence again
 		 * for this object next time we need it.
-- 
cgit v0.10.2


From 1fd1c624362819ecc36db2458c6a972c48ae92d6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 3 Jun 2009 07:26:58 +0000
Subject: drm/i915: Save/restore cursor state on suspend/resume.

This may fix cursor corruption in X on resume, which would persist until
the cursor was hidden and then shown again.

V2: Also include the cursor control regs.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c431fa5..fcaa544 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -285,6 +285,13 @@ typedef struct drm_i915_private {
 	u8 saveDACMASK;
 	u8 saveCR[37];
 	uint64_t saveFENCE[16];
+	u32 saveCURACNTR;
+	u32 saveCURAPOS;
+	u32 saveCURABASE;
+	u32 saveCURBCNTR;
+	u32 saveCURBPOS;
+	u32 saveCURBBASE;
+	u32 saveCURSIZE;
 
 	struct {
 		struct drm_mm gtt_space;
@@ -642,6 +649,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
 void i915_gem_free_all_phys_object(struct drm_device *dev);
 int i915_gem_object_get_pages(struct drm_gem_object *obj);
 void i915_gem_object_put_pages(struct drm_gem_object *obj);
+void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index ce8a213..a98e283 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -295,6 +295,16 @@ int i915_save_state(struct drm_device *dev)
 	i915_save_palette(dev, PIPE_B);
 	dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT);
 
+	/* Cursor state */
+	dev_priv->saveCURACNTR = I915_READ(CURACNTR);
+	dev_priv->saveCURAPOS = I915_READ(CURAPOS);
+	dev_priv->saveCURABASE = I915_READ(CURABASE);
+	dev_priv->saveCURBCNTR = I915_READ(CURBCNTR);
+	dev_priv->saveCURBPOS = I915_READ(CURBPOS);
+	dev_priv->saveCURBBASE = I915_READ(CURBBASE);
+	if (!IS_I9XX(dev))
+		dev_priv->saveCURSIZE = I915_READ(CURSIZE);
+
 	/* CRT state */
 	dev_priv->saveADPA = I915_READ(ADPA);
 
@@ -480,6 +490,16 @@ int i915_restore_state(struct drm_device *dev)
 	I915_WRITE(DSPBCNTR, dev_priv->saveDSPBCNTR);
 	I915_WRITE(DSPBADDR, I915_READ(DSPBADDR));
 
+	/* Cursor state */
+	I915_WRITE(CURAPOS, dev_priv->saveCURAPOS);
+	I915_WRITE(CURACNTR, dev_priv->saveCURACNTR);
+	I915_WRITE(CURABASE, dev_priv->saveCURABASE);
+	I915_WRITE(CURBPOS, dev_priv->saveCURBPOS);
+	I915_WRITE(CURBCNTR, dev_priv->saveCURBCNTR);
+	I915_WRITE(CURBBASE, dev_priv->saveCURBBASE);
+	if (!IS_I9XX(dev))
+		I915_WRITE(CURSIZE, dev_priv->saveCURSIZE);
+
 	/* CRT state */
 	I915_WRITE(ADPA, dev_priv->saveADPA);
 
-- 
cgit v0.10.2


From b962442e46a9340bdbc6711982c59ff0cc2b5afb Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 3 Jun 2009 07:27:35 +0000
Subject: drm/i915: Change GEM throttling to be 20ms like the comment says.

keithp didn't like the original 20ms plan because a cooperative client could
be starved by an uncooperative client.  There may even have been problems
with cooperative clients versus cooperative clients.  So keithp changed
throttle to just wait for the second to last seqno emitted by that client.
It worked well, until we started getting more round-trips to the server
due to DRI2 -- the server throttles in BlockHandler, and so if you did more
than one round trip after finishing your frame, you'd end up unintentionally
syncing to the swap.

Fix this by keeping track of the client's requests, so the client can wait
when it has an outstanding request over 20ms old.  This should have
non-starving behavior, good behavior in the presence of restarts, and less
waiting.  Improves high-settings openarena performance on my GM45 by 50%.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 53d5445..0c222c2 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1273,8 +1273,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 
 	file_priv->driver_priv = i915_file_priv;
 
-	i915_file_priv->mm.last_gem_seqno = 0;
-	i915_file_priv->mm.last_gem_throttle_seqno = 0;
+	INIT_LIST_HEAD(&i915_file_priv->mm.request_list);
 
 	return 0;
 }
@@ -1311,6 +1310,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	i915_gem_release(dev, file_priv);
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		i915_mem_release(dev, file_priv, dev_priv->agp_heap);
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fcaa544..e0fac5f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -498,13 +498,16 @@ struct drm_i915_gem_request {
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
 
+	/** global list entry for this request */
 	struct list_head list;
+
+	/** file_priv list entry for this request */
+	struct list_head client_list;
 };
 
 struct drm_i915_file_private {
 	struct {
-		uint32_t last_gem_seqno;
-		uint32_t last_gem_throttle_seqno;
+		struct list_head request_list;
 	} mm;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 39f5c65..3fbd8a0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1481,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
  * Returned sequence numbers are nonzero on success.
  */
 static uint32_t
-i915_add_request(struct drm_device *dev, uint32_t flush_domains)
+i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
+		 uint32_t flush_domains)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_file_private *i915_file_priv = NULL;
 	struct drm_i915_gem_request *request;
 	uint32_t seqno;
 	int was_empty;
 	RING_LOCALS;
 
+	if (file_priv != NULL)
+		i915_file_priv = file_priv->driver_priv;
+
 	request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
 	if (request == NULL)
 		return 0;
@@ -1515,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)
 	request->emitted_jiffies = jiffies;
 	was_empty = list_empty(&dev_priv->mm.request_list);
 	list_add_tail(&request->list, &dev_priv->mm.request_list);
+	if (i915_file_priv) {
+		list_add_tail(&request->client_list,
+			      &i915_file_priv->mm.request_list);
+	} else {
+		INIT_LIST_HEAD(&request->client_list);
+	}
 
 	/* Associate any objects on the flushing list matching the write
 	 * domain we're flushing with our flush.
@@ -1664,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev)
 			i915_gem_retire_request(dev, request);
 
 			list_del(&request->list);
+			list_del(&request->client_list);
 			drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
 		} else
 			break;
@@ -1977,7 +1989,7 @@ i915_gem_evict_something(struct drm_device *dev)
 			i915_gem_flush(dev,
 				       obj->write_domain,
 				       obj->write_domain);
-			i915_add_request(dev, obj->write_domain);
+			i915_add_request(dev, NULL, obj->write_domain);
 
 			obj = NULL;
 			continue;
@@ -2248,7 +2260,7 @@ try_again:
 				i915_gem_flush(dev,
 					       I915_GEM_GPU_DOMAINS,
 					       I915_GEM_GPU_DOMAINS);
-				seqno = i915_add_request(dev,
+				seqno = i915_add_request(dev, NULL,
 							 I915_GEM_GPU_DOMAINS);
 				if (seqno == 0)
 					return -ENOMEM;
@@ -2452,7 +2464,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 
 	/* Queue the GPU write cache flushing we need. */
 	i915_gem_flush(dev, 0, obj->write_domain);
-	seqno = i915_add_request(dev, obj->write_domain);
+	seqno = i915_add_request(dev, NULL, obj->write_domain);
 	obj->write_domain = 0;
 	i915_gem_object_move_to_active(obj, seqno);
 }
@@ -3089,6 +3101,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
  * This should get us reasonable parallelism between CPU and GPU but also
  * relatively low latency when blocking on a particular request to finish.
  */
@@ -3097,15 +3113,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
 	int ret = 0;
-	uint32_t seqno;
+	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
 
 	mutex_lock(&dev->struct_mutex);
-	seqno = i915_file_priv->mm.last_gem_throttle_seqno;
-	i915_file_priv->mm.last_gem_throttle_seqno =
-		i915_file_priv->mm.last_gem_seqno;
-	if (seqno)
-		ret = i915_wait_request(dev, seqno);
+	while (!list_empty(&i915_file_priv->mm.request_list)) {
+		struct drm_i915_gem_request *request;
+
+		request = list_first_entry(&i915_file_priv->mm.request_list,
+					   struct drm_i915_gem_request,
+					   client_list);
+
+		if (time_after_eq(request->emitted_jiffies, recent_enough))
+			break;
+
+		ret = i915_wait_request(dev, request->seqno);
+		if (ret != 0)
+			break;
+	}
 	mutex_unlock(&dev->struct_mutex);
+
 	return ret;
 }
 
@@ -3187,7 +3213,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
 	struct drm_i915_gem_execbuffer *args = data;
 	struct drm_i915_gem_exec_object *exec_list = NULL;
 	struct drm_gem_object **object_list = NULL;
@@ -3363,7 +3388,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 			       dev->invalidate_domains,
 			       dev->flush_domains);
 		if (dev->flush_domains)
-			(void)i915_add_request(dev, dev->flush_domains);
+			(void)i915_add_request(dev, file_priv,
+					       dev->flush_domains);
 	}
 
 	for (i = 0; i < args->buffer_count; i++) {
@@ -3412,9 +3438,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	 * *some* interrupts representing completion of buffers that we can
 	 * wait on when trying to clear up gtt space).
 	 */
-	seqno = i915_add_request(dev, flush_domains);
+	seqno = i915_add_request(dev, file_priv, flush_domains);
 	BUG_ON(seqno == 0);
-	i915_file_priv->mm.last_gem_seqno = seqno;
 	for (i = 0; i < args->buffer_count; i++) {
 		struct drm_gem_object *obj = object_list[i];
 
@@ -3802,7 +3827,7 @@ i915_gem_idle(struct drm_device *dev)
 	 */
 	i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
 		       ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-	seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+	seqno = i915_add_request(dev, NULL, ~I915_GEM_DOMAIN_CPU);
 
 	if (seqno == 0) {
 		mutex_unlock(&dev->struct_mutex);
@@ -4352,3 +4377,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	drm_agp_chipset_flush(dev);
 	return 0;
 }
+
+void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
+{
+	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+
+	/* Clean up our request list when the client is going away, so that
+	 * later retire_requests won't dereference our soon-to-be-gone
+	 * file_priv.
+	 */
+	mutex_lock(&dev->struct_mutex);
+	while (!list_empty(&i915_file_priv->mm.request_list))
+		list_del_init(i915_file_priv->mm.request_list.next);
+	mutex_unlock(&dev->struct_mutex);
+}
-- 
cgit v0.10.2


From af794b94ae8a16fb4a9da6ce640c122efb44e2a0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 13:58:13 +0200
Subject: perf_counter tools: Build with native optimization

Build the tools with -march=native by default.

No measurable difference in speed though, compared to the
default, on a Nehalem testbox.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 005709b..414399c 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -159,7 +159,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
-CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
+CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -march=native
 LDFLAGS = -lpthread -lrt -lelf
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
-- 
cgit v0.10.2


From 95ed6fd06e52bf850cd17524f0b36ed14300c10d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 15:00:45 +0200
Subject: perf report: Simplify symbol output

The DSO can be printed already - no need to repeat it in the
symbol field.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index ff6f657..56c664d 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -382,7 +382,7 @@ sort__comm_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_comm = {
-	.header 	= "          Command",
+	.header		= "          Command",
 	.cmp		= sort__comm_cmp,
 	.collapse	= sort__comm_collapse,
 	.print		= sort__comm_print,
@@ -414,11 +414,11 @@ sort__dso_print(FILE *fp, struct hist_entry *self)
 	if (self->dso)
 		return fprintf(fp, "  %-25s", self->dso->name);
 
-	return fprintf(fp, "  %016llx", (__u64)self->ip);
+	return fprintf(fp, "  %016llx         ", (__u64)self->ip);
 }
 
 static struct sort_entry sort_dso = {
-	.header = " Shared Object          ",
+	.header = " Shared Object            ",
 	.cmp	= sort__dso_cmp,
 	.print	= sort__dso_print,
 };
@@ -447,21 +447,16 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 	if (verbose)
 		ret += fprintf(fp, "  %#018llx", (__u64)self->ip);
 
-	if (self->dso)
-		ret += fprintf(fp, "  %s: ", self->dso->name);
-	else
-		ret += fprintf(fp, "  %#016llx: ", (__u64)self->ip);
-
 	if (self->sym)
-		ret += fprintf(fp, "%s", self->sym->name);
+		ret += fprintf(fp, "  %s", self->sym->name);
 	else
-		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+		ret += fprintf(fp, "  %#016llx", (__u64)self->ip);
 
 	return ret;
 }
 
 static struct sort_entry sort_sym = {
-	.header = " Shared Object: Symbol",
+	.header = " Symbol",
 	.cmp	= sort__sym_cmp,
 	.print	= sort__sym_print,
 };
-- 
cgit v0.10.2


From bd74137ec9aaca3df3ff22b92455fddf7afaced1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 14:13:04 +0200
Subject: perf_counter tools: Print out symbol parsing errors only if --verbose

Also, add a suggestion to 'perf report', if the default sort order is
used.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 56c664d..15fe9da 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -26,7 +26,10 @@
 
 static char		const *input_name = "perf.data";
 static char		*vmlinux = NULL;
-static char		*sort_order = "comm,dso";
+
+static char		default_sort_order[] = "comm,dso";
+static char		*sort_order = default_sort_order;
+
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
@@ -103,9 +106,10 @@ static struct dso *dsos__findnew(const char *name)
 	if (!dso)
 		goto out_delete_dso;
 
-	nr = dso__load(dso, NULL);
+	nr = dso__load(dso, NULL, verbose);
 	if (nr < 0) {
-		fprintf(stderr, "Failed to open: %s\n", name);
+		if (verbose)
+			fprintf(stderr, "Failed to open: %s\n", name);
 		goto out_delete_dso;
 	}
 	if (!nr && verbose) {
@@ -139,7 +143,7 @@ static int load_kernel(void)
 	if (!kernel_dso)
 		return -1;
 
-	err = dso__load_kernel(kernel_dso, vmlinux, NULL);
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
 	if (err) {
 		dso__delete(kernel_dso);
 		kernel_dso = NULL;
@@ -741,6 +745,12 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 		ret += hist_entry__fprintf(fp, pos, total_samples);
 	}
 
+	if (!strcmp(sort_order, default_sort_order)) {
+		fprintf(fp, "#\n");
+		fprintf(fp, "# ( For more details, try: perf report --sort comm,dso,symbol )\n");
+		fprintf(fp, "#\n");
+	}
+
 	return ret;
 }
 
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 3f7778b..548a8da 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -349,7 +349,7 @@ static int parse_symbols(void)
 	if (kernel_dso == NULL)
 		return -1;
 
-	if (dso__load_kernel(kernel_dso, NULL, symbol_filter) != 0)
+	if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0)
 		goto out_delete_dso;
 
 	node = rb_first(&kernel_dso->syms);
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 35ee6de..15d5cf9 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -124,7 +124,7 @@ size_t dso__fprintf(struct dso *self, FILE *fp)
 	return ret;
 }
 
-static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter)
+static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose)
 {
 	struct rb_node *nd, *prevnd;
 	char *line = NULL;
@@ -370,7 +370,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
 }
 
 static int dso__load_sym(struct dso *self, int fd, const char *name,
-			 symbol_filter_t filter)
+			 symbol_filter_t filter, int verbose)
 {
 	Elf_Data *symstrs;
 	uint32_t nr_syms;
@@ -387,13 +387,15 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 
 	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
 	if (elf == NULL) {
-		fprintf(stderr, "%s: cannot read %s ELF file.\n",
-			__func__, name);
+		if (verbose)
+			fprintf(stderr, "%s: cannot read %s ELF file.\n",
+				__func__, name);
 		goto out_close;
 	}
 
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
-		fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		if (verbose)
+			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
 		goto out_elf_end;
 	}
 
@@ -473,7 +475,7 @@ out_close:
 	return err;
 }
 
-int dso__load(struct dso *self, symbol_filter_t filter)
+int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
 {
 	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
 	char *name = malloc(size);
@@ -505,7 +507,7 @@ more:
 		fd = open(name, O_RDONLY);
 	} while (fd < 0);
 
-	ret = dso__load_sym(self, fd, name, filter);
+	ret = dso__load_sym(self, fd, name, filter, verbose);
 	close(fd);
 
 	/*
@@ -520,28 +522,29 @@ out:
 }
 
 static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
-			     symbol_filter_t filter)
+			     symbol_filter_t filter, int verbose)
 {
 	int err, fd = open(vmlinux, O_RDONLY);
 
 	if (fd < 0)
 		return -1;
 
-	err = dso__load_sym(self, fd, vmlinux, filter);
+	err = dso__load_sym(self, fd, vmlinux, filter, verbose);
 	close(fd);
 
 	return err;
 }
 
-int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter)
+int dso__load_kernel(struct dso *self, const char *vmlinux,
+		     symbol_filter_t filter, int verbose)
 {
 	int err = -1;
 
 	if (vmlinux)
-		err = dso__load_vmlinux(self, vmlinux, filter);
+		err = dso__load_vmlinux(self, vmlinux, filter, verbose);
 
 	if (err)
-		err = dso__load_kallsyms(self, filter);
+		err = dso__load_kallsyms(self, filter, verbose);
 
 	return err;
 }
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
index b0299bc..8dd8522 100644
--- a/Documentation/perf_counter/util/symbol.h
+++ b/Documentation/perf_counter/util/symbol.h
@@ -32,8 +32,8 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
 struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
 
 int dso__load_kernel(struct dso *self, const char *vmlinux,
-		     symbol_filter_t filter);
-int dso__load(struct dso *self, symbol_filter_t filter);
+		     symbol_filter_t filter, int verbose);
+int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
 
 size_t dso__fprintf(struct dso *self, FILE *fp);
 
-- 
cgit v0.10.2


From 65f759831179bb8922f2a91a989487161d476a94 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Thu, 4 Jun 2009 13:46:16 +0400
Subject: ALSA: hda_intel: fix build error when !PM

Fix this build error when CONFIG_PM is not set:
ound/pci/hda/hda_intel.c: In function 'azx_bus_reset':
sound/pci/hda/hda_intel.c:1270: error: implicit declaration of function 'snd_pcm_suspend_all'
sound/pci/hda/hda_intel.c:1271: error: implicit declaration of function 'snd_hda_suspend'
sound/pci/hda/hda_intel.c:1272: error: implicit declaration of function 'snd_hda_resume'

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 9f44645..4e9ea70 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1260,17 +1260,20 @@ static void azx_stop_chip(struct azx *chip);
 static void azx_bus_reset(struct hda_bus *bus)
 {
 	struct azx *chip = bus->private_data;
-	int i;
 
 	bus->in_reset = 1;
 	azx_stop_chip(chip);
 	azx_init_chip(chip);
+#ifdef CONFIG_PM
 	if (chip->initialized) {
+		int i;
+
 		for (i = 0; i < AZX_MAX_PCMS; i++)
 			snd_pcm_suspend_all(chip->pcm[i]);
 		snd_hda_suspend(chip->bus);
 		snd_hda_resume(chip->bus);
 	}
+#endif
 	bus->in_reset = 0;
 }
 
-- 
cgit v0.10.2


From 05ca061eb9704ad9b0739f88046276792b75f2c1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 14:21:16 +0200
Subject: perf report: Print out the total number of events

So that the statistical quality of the profile can be estimated at a glance.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 15fe9da..be392e0 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -722,6 +722,8 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 	size_t ret = 0;
 
 	fprintf(fp, "#\n");
+	fprintf(fp, "# (%Ld profiler events)\n", (__u64)total_samples);
+	fprintf(fp, "#\n");
 
 	fprintf(fp, "# Overhead");
 	list_for_each_entry(se, &hist_entry__sort_list, list)
-- 
cgit v0.10.2


From e3509ff0fb9df53e45cd68488e3b463a80455db7 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 3 Jun 2009 17:44:49 +0200
Subject: ASoC: fix NULL pointer dereference in soc_suspend()

In case the initalization of an soc_device failed, there is no codec
associated with it. soc_suspend() will still dereference the pointer
and cause an Ooops when entering the sleep mode.

This happens on our board with a multi-target kernel image when booted
on a machine without audio circuits.

This patch makes the code bail out very early in this special case.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 4aa8e2d..3f44150 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -628,6 +628,12 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	struct snd_soc_codec *codec = card->codec;
 	int i;
 
+	/* If the initialization of this soc device failed, there is no codec
+	 * associated with it. Just bail out in this case.
+	 */
+	if (!codec)
+		return 0;
+
 	/* Due to the resume being scheduled into a workqueue we could
 	* suspend before that's finished - wait for it to complete.
 	 */
-- 
cgit v0.10.2


From 71dd8945d8d827ab101cd287f9480ef22fc7c1b6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 15:16:56 +0200
Subject: perf report: Add consistent spacing rules

Make the sort header and the print function have the same column width.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index be392e0..e930b4e 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -344,11 +344,11 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__thread_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, " %16s:%5d", self->thread->comm ?: "", self->thread->pid);
+	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
 }
 
 static struct sort_entry sort_thread = {
-	.header = "         Command: Pid ",
+	.header = "         Command:  Pid",
 	.cmp	= sort__thread_cmp,
 	.print	= sort__thread_print,
 };
@@ -382,11 +382,11 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 static size_t
 sort__comm_print(FILE *fp, struct hist_entry *self)
 {
-	return fprintf(fp, "  %16s", self->thread->comm);
+	return fprintf(fp, "%16s", self->thread->comm);
 }
 
 static struct sort_entry sort_comm = {
-	.header		= "          Command",
+	.header 	= "         Command",
 	.cmp		= sort__comm_cmp,
 	.collapse	= sort__comm_collapse,
 	.print		= sort__comm_print,
@@ -416,13 +416,13 @@ static size_t
 sort__dso_print(FILE *fp, struct hist_entry *self)
 {
 	if (self->dso)
-		return fprintf(fp, "  %-25s", self->dso->name);
+		return fprintf(fp, "%-25s", self->dso->name);
 
-	return fprintf(fp, "  %016llx         ", (__u64)self->ip);
+	return fprintf(fp, "%016llx         ", (__u64)self->ip);
 }
 
 static struct sort_entry sort_dso = {
-	.header = " Shared Object            ",
+	.header = "Shared Object            ",
 	.cmp	= sort__dso_cmp,
 	.print	= sort__dso_print,
 };
@@ -449,18 +449,18 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 	size_t ret = 0;
 
 	if (verbose)
-		ret += fprintf(fp, "  %#018llx", (__u64)self->ip);
+		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
 
 	if (self->sym)
-		ret += fprintf(fp, "  %s", self->sym->name);
+		ret += fprintf(fp, "%s", self->sym->name);
 	else
-		ret += fprintf(fp, "  %#016llx", (__u64)self->ip);
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
 
 	return ret;
 }
 
 static struct sort_entry sort_sym = {
-	.header = " Symbol",
+	.header = "Symbol",
 	.cmp	= sort__sym_cmp,
 	.print	= sort__sym_print,
 };
@@ -553,8 +553,10 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 	} else
 		ret = fprintf(fp, "%12d ", self->count);
 
-	list_for_each_entry(se, &hist_entry__sort_list, list)
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		fprintf(fp, "  ");
 		ret += se->print(fp, self);
+	}
 
 	ret += fprintf(fp, "\n");
 
@@ -721,13 +723,14 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 	struct rb_node *nd;
 	size_t ret = 0;
 
+	fprintf(fp, "\n");
 	fprintf(fp, "#\n");
 	fprintf(fp, "# (%Ld profiler events)\n", (__u64)total_samples);
 	fprintf(fp, "#\n");
 
 	fprintf(fp, "# Overhead");
 	list_for_each_entry(se, &hist_entry__sort_list, list)
-		fprintf(fp, " %s", se->header);
+		fprintf(fp, "  %s", se->header);
 	fprintf(fp, "\n");
 
 	fprintf(fp, "# ........");
@@ -735,7 +738,7 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 		int i;
 
 		fprintf(fp, "  ");
-		for (i = 0; i < strlen(se->header)-1; i++)
+		for (i = 0; i < strlen(se->header); i++)
 			fprintf(fp, ".");
 	}
 	fprintf(fp, "\n");
@@ -749,9 +752,10 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 
 	if (!strcmp(sort_order, default_sort_order)) {
 		fprintf(fp, "#\n");
-		fprintf(fp, "# ( For more details, try: perf report --sort comm,dso,symbol )\n");
+		fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
 		fprintf(fp, "#\n");
 	}
+	fprintf(fp, "\n");
 
 	return ret;
 }
-- 
cgit v0.10.2


From fc3fdfd632925990f14cffe314f5d90a2b0ae8f6 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Thu, 4 Jun 2009 13:48:51 +0100
Subject: [ARM] 5537/1: Freescale STMP: 37nn registers definition

Add register definitions for Freescale STMP 37nn boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-audioin.h b/arch/arm/mach-stmp37xx/include/mach/regs-audioin.h
new file mode 100644
index 0000000..3b511f9
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-audioin.h
@@ -0,0 +1,61 @@
+/*
+ * stmp37xx: AUDIOIN register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_AUDIOIN_BASE	(STMP3XXX_REGS_BASE + 0x4C000)
+
+#define HW_AUDIOIN_CTRL		0x0
+#define BM_AUDIOIN_CTRL_RUN	0x00000001
+#define BP_AUDIOIN_CTRL_RUN	0
+#define BM_AUDIOIN_CTRL_FIFO_ERROR_IRQ_EN	0x00000002
+#define BM_AUDIOIN_CTRL_FIFO_OVERFLOW_IRQ	0x00000004
+#define BM_AUDIOIN_CTRL_FIFO_UNDERFLOW_IRQ	0x00000008
+#define BM_AUDIOIN_CTRL_WORD_LENGTH	0x00000020
+#define BM_AUDIOIN_CTRL_CLKGATE	0x40000000
+#define BM_AUDIOIN_CTRL_SFTRST	0x80000000
+
+#define HW_AUDIOIN_STAT		0x10
+
+#define HW_AUDIOIN_ADCSRR	0x20
+
+#define HW_AUDIOIN_ADCVOLUME	0x30
+#define BM_AUDIOIN_ADCVOLUME_VOLUME_RIGHT	0x000000FF
+#define BP_AUDIOIN_ADCVOLUME_VOLUME_RIGHT	0
+#define BM_AUDIOIN_ADCVOLUME_VOLUME_LEFT	0x00FF0000
+#define BP_AUDIOIN_ADCVOLUME_VOLUME_LEFT	16
+
+#define HW_AUDIOIN_ADCDEBUG	0x40
+
+#define HW_AUDIOIN_ADCVOL	0x50
+#define BM_AUDIOIN_ADCVOL_GAIN_RIGHT	0x0000000F
+#define BP_AUDIOIN_ADCVOL_GAIN_RIGHT	0
+#define BM_AUDIOIN_ADCVOL_SELECT_RIGHT	0x00000030
+#define BP_AUDIOIN_ADCVOL_SELECT_RIGHT	4
+#define BM_AUDIOIN_ADCVOL_GAIN_LEFT	0x00000F00
+#define BP_AUDIOIN_ADCVOL_GAIN_LEFT	8
+#define BM_AUDIOIN_ADCVOL_SELECT_LEFT	0x00003000
+#define BP_AUDIOIN_ADCVOL_SELECT_LEFT	12
+#define BM_AUDIOIN_ADCVOL_MUTE	0x01000000
+
+#define HW_AUDIOIN_MICLINE	0x60
+
+#define HW_AUDIOIN_ANACLKCTRL	0x70
+#define BM_AUDIOIN_ANACLKCTRL_CLKGATE	0x80000000
+
+#define HW_AUDIOIN_DATA		0x80
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-audioout.h b/arch/arm/mach-stmp37xx/include/mach/regs-audioout.h
new file mode 100644
index 0000000..ca1942b
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-audioout.h
@@ -0,0 +1,111 @@
+/*
+ * stmp37xx: AUDIOOUT register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_AUDIOOUT_BASE	(STMP3XXX_REGS_BASE + 0x48000)
+
+#define HW_AUDIOOUT_CTRL	0x0
+#define BM_AUDIOOUT_CTRL_RUN	0x00000001
+#define BP_AUDIOOUT_CTRL_RUN	0
+#define BM_AUDIOOUT_CTRL_FIFO_ERROR_IRQ_EN	0x00000002
+#define BM_AUDIOOUT_CTRL_FIFO_OVERFLOW_IRQ	0x00000004
+#define BM_AUDIOOUT_CTRL_FIFO_UNDERFLOW_IRQ	0x00000008
+#define BM_AUDIOOUT_CTRL_WORD_LENGTH	0x00000040
+#define BM_AUDIOOUT_CTRL_CLKGATE	0x40000000
+#define BM_AUDIOOUT_CTRL_SFTRST	0x80000000
+
+#define HW_AUDIOOUT_STAT	0x10
+
+#define HW_AUDIOOUT_DACSRR	0x20
+#define BM_AUDIOOUT_DACSRR_SRC_FRAC	0x00001FFF
+#define BP_AUDIOOUT_DACSRR_SRC_FRAC	0
+#define BM_AUDIOOUT_DACSRR_SRC_INT	0x001F0000
+#define BP_AUDIOOUT_DACSRR_SRC_INT	16
+#define BM_AUDIOOUT_DACSRR_SRC_HOLD	0x07000000
+#define BP_AUDIOOUT_DACSRR_SRC_HOLD	24
+#define BM_AUDIOOUT_DACSRR_BASEMULT	0x70000000
+#define BP_AUDIOOUT_DACSRR_BASEMULT	28
+
+#define HW_AUDIOOUT_DACVOLUME	0x30
+#define BM_AUDIOOUT_DACVOLUME_MUTE_RIGHT	0x00000100
+#define BM_AUDIOOUT_DACVOLUME_MUTE_LEFT	0x01000000
+#define BM_AUDIOOUT_DACVOLUME_EN_ZCD	0x02000000
+
+#define HW_AUDIOOUT_DACDEBUG	0x40
+
+#define HW_AUDIOOUT_HPVOL	0x50
+#define BM_AUDIOOUT_HPVOL_MUTE	0x01000000
+#define BM_AUDIOOUT_HPVOL_EN_MSTR_ZCD	0x02000000
+
+#define HW_AUDIOOUT_PWRDN	0x70
+#define BM_AUDIOOUT_PWRDN_HEADPHONE	0x00000001
+#define BP_AUDIOOUT_PWRDN_HEADPHONE	0
+#define BM_AUDIOOUT_PWRDN_CAPLESS	0x00000010
+#define BM_AUDIOOUT_PWRDN_ADC	0x00000100
+#define BM_AUDIOOUT_PWRDN_DAC	0x00001000
+#define BM_AUDIOOUT_PWRDN_RIGHT_ADC	0x00010000
+#define BM_AUDIOOUT_PWRDN_LINEOUT	0x01000000
+
+#define HW_AUDIOOUT_REFCTRL	0x80
+#define BM_AUDIOOUT_REFCTRL_VAG_VAL	0x000000F0
+#define BP_AUDIOOUT_REFCTRL_VAG_VAL	4
+#define BM_AUDIOOUT_REFCTRL_ADC_REFVAL	0x00000F00
+#define BP_AUDIOOUT_REFCTRL_ADC_REFVAL	8
+#define BM_AUDIOOUT_REFCTRL_ADJ_VAG	0x00001000
+#define BM_AUDIOOUT_REFCTRL_ADJ_ADC	0x00002000
+#define BM_AUDIOOUT_REFCTRL_BIAS_CTRL	0x00030000
+#define BP_AUDIOOUT_REFCTRL_BIAS_CTRL	16
+#define BM_AUDIOOUT_REFCTRL_LOW_PWR	0x00080000
+#define BM_AUDIOOUT_REFCTRL_VBG_ADJ	0x00700000
+#define BP_AUDIOOUT_REFCTRL_VBG_ADJ	20
+#define BM_AUDIOOUT_REFCTRL_XTAL_BGR_BIAS	0x01000000
+#define BM_AUDIOOUT_REFCTRL_RAISE_REF	0x02000000
+
+#define HW_AUDIOOUT_ANACTRL	0x90
+#define BM_AUDIOOUT_ANACTRL_HP_CLASSAB	0x00000010
+#define BM_AUDIOOUT_ANACTRL_HP_HOLD_GND	0x00000020
+
+#define HW_AUDIOOUT_TEST	0xA0
+#define BM_AUDIOOUT_TEST_HP_I1_ADJ	0x00C00000
+#define BP_AUDIOOUT_TEST_HP_I1_ADJ	22
+
+#define HW_AUDIOOUT_BISTCTRL	0xB0
+
+#define HW_AUDIOOUT_BISTSTAT0	0xC0
+
+#define HW_AUDIOOUT_BISTSTAT1	0xD0
+
+#define HW_AUDIOOUT_ANACLKCTRL	0xE0
+#define BM_AUDIOOUT_ANACLKCTRL_CLKGATE	0x80000000
+
+#define HW_AUDIOOUT_DATA	0xF0
+
+#define HW_AUDIOOUT_LINEOUTCTRL	0x100
+#define BM_AUDIOOUT_LINEOUTCTRL_VOL_RIGHT	0x0000001F
+#define BP_AUDIOOUT_LINEOUTCTRL_VOL_RIGHT	0
+#define BM_AUDIOOUT_LINEOUTCTRL_VOL_LEFT	0x00001F00
+#define BP_AUDIOOUT_LINEOUTCTRL_VOL_LEFT	8
+#define BM_AUDIOOUT_LINEOUTCTRL_CHARGE_CAP	0x00007000
+#define BP_AUDIOOUT_LINEOUTCTRL_CHARGE_CAP	12
+#define BM_AUDIOOUT_LINEOUTCTRL_VAG_CTRL	0x00F00000
+#define BP_AUDIOOUT_LINEOUTCTRL_VAG_CTRL	20
+#define BM_AUDIOOUT_LINEOUTCTRL_MUTE	0x01000000
+#define BM_AUDIOOUT_LINEOUTCTRL_EN_ZCD	0x02000000
+
+#define HW_AUDIOOUT_VERSION	0x200
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-digctl.h b/arch/arm/mach-stmp37xx/include/mach/regs-digctl.h
new file mode 100644
index 0000000..ba1bbe2
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-digctl.h
@@ -0,0 +1,24 @@
+/*
+ * stmp37xx: DIGCTL register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_DIGCTL_BASE	(STMP3XXX_REGS_BASE + 0x1C000)
+
+#define HW_DIGCTL_CTRL		0x0
+#define BM_DIGCTL_CTRL_USB_CLKGATE	0x00000004
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-ecc8.h b/arch/arm/mach-stmp37xx/include/mach/regs-ecc8.h
new file mode 100644
index 0000000..3b6d990
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-ecc8.h
@@ -0,0 +1,37 @@
+/*
+ * stmp37xx: ECC8 register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_ECC8_BASE	(STMP3XXX_REGS_BASE + 0x8000)
+
+#define HW_ECC8_CTRL		0x0
+#define BM_ECC8_CTRL_COMPLETE_IRQ	0x00000001
+#define BP_ECC8_CTRL_COMPLETE_IRQ	0
+#define BM_ECC8_CTRL_COMPLETE_IRQ_EN	0x00000100
+#define BM_ECC8_CTRL_AHBM_SFTRST	0x20000000
+
+#define HW_ECC8_STATUS0		0x10
+#define BM_ECC8_STATUS0_UNCORRECTABLE	0x00000004
+#define BM_ECC8_STATUS0_CORRECTED	0x00000008
+#define BM_ECC8_STATUS0_STATUS_AUX	0x00000F00
+#define BP_ECC8_STATUS0_STATUS_AUX	8
+#define BM_ECC8_STATUS0_COMPLETED_CE	0x000F0000
+#define BP_ECC8_STATUS0_COMPLETED_CE	16
+
+#define HW_ECC8_STATUS1		0x20
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-gpmi.h b/arch/arm/mach-stmp37xx/include/mach/regs-gpmi.h
new file mode 100644
index 0000000..f2b304f
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-gpmi.h
@@ -0,0 +1,63 @@
+/*
+ * stmp37xx: GPMI register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_GPMI_BASE	(STMP3XXX_REGS_BASE + 0xC000)
+#define REGS_GPMI_PHYS	0x8000C000
+#define REGS_GPMI_SIZE	0x2000
+
+#define HW_GPMI_CTRL0		0x0
+#define BM_GPMI_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_GPMI_CTRL0_XFER_COUNT	0
+#define BM_GPMI_CTRL0_CS	0x00300000
+#define BP_GPMI_CTRL0_CS	20
+#define BM_GPMI_CTRL0_LOCK_CS	0x00400000
+#define BM_GPMI_CTRL0_WORD_LENGTH	0x00800000
+#define BM_GPMI_CTRL0_COMMAND_MODE	0x03000000
+#define BP_GPMI_CTRL0_COMMAND_MODE	24
+#define BV_GPMI_CTRL0_COMMAND_MODE__WRITE	    0x0
+#define BV_GPMI_CTRL0_COMMAND_MODE__READ	     0x1
+#define BV_GPMI_CTRL0_COMMAND_MODE__READ_AND_COMPARE 0x2
+#define BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY   0x3
+#define BM_GPMI_CTRL0_RUN	0x20000000
+#define BM_GPMI_CTRL0_CLKGATE	0x40000000
+#define BM_GPMI_CTRL0_SFTRST	0x80000000
+#define BM_GPMI_ECCCTRL_ENABLE_ECC	0x00001000
+#define BM_GPMI_ECCCTRL_ECC_CMD	0x00006000
+#define BP_GPMI_ECCCTRL_ECC_CMD	13
+
+#define HW_GPMI_CTRL1		0x60
+#define BM_GPMI_CTRL1_GPMI_MODE	0x00000003
+#define BP_GPMI_CTRL1_GPMI_MODE	0
+#define BM_GPMI_CTRL1_ATA_IRQRDY_POLARITY	0x00000004
+#define BM_GPMI_CTRL1_DEV_RESET	0x00000008
+#define BM_GPMI_CTRL1_TIMEOUT_IRQ	0x00000200
+#define BM_GPMI_CTRL1_DEV_IRQ	0x00000400
+#define BM_GPMI_CTRL1_DSAMPLE_TIME	0x00007000
+#define BP_GPMI_CTRL1_DSAMPLE_TIME	12
+
+#define HW_GPMI_TIMING0		0x70
+#define BM_GPMI_TIMING0_DATA_SETUP	0x000000FF
+#define BP_GPMI_TIMING0_DATA_SETUP	0
+#define BM_GPMI_TIMING0_DATA_HOLD	0x0000FF00
+#define BP_GPMI_TIMING0_DATA_HOLD	8
+
+#define HW_GPMI_TIMING1		0x80
+#define BM_GPMI_TIMING1_DEVICE_BUSY_TIMEOUT	0xFFFF0000
+#define BP_GPMI_TIMING1_DEVICE_BUSY_TIMEOUT	16
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-i2c.h b/arch/arm/mach-stmp37xx/include/mach/regs-i2c.h
new file mode 100644
index 0000000..35882a9
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-i2c.h
@@ -0,0 +1,55 @@
+/*
+ * stmp37xx: I2C register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_I2C_BASE	(STMP3XXX_REGS_BASE + 0x58000)
+#define REGS_I2C_PHYS	0x80058000
+#define REGS_I2C_SIZE	0x2000
+
+#define HW_I2C_CTRL0		0x0
+#define BM_I2C_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_I2C_CTRL0_XFER_COUNT	0
+#define BM_I2C_CTRL0_DIRECTION	0x00010000
+#define BM_I2C_CTRL0_MASTER_MODE	0x00020000
+#define BM_I2C_CTRL0_PRE_SEND_START	0x00080000
+#define BM_I2C_CTRL0_POST_SEND_STOP	0x00100000
+#define BM_I2C_CTRL0_RETAIN_CLOCK	0x00200000
+#define BM_I2C_CTRL0_SEND_NAK_ON_LAST	0x02000000
+#define BM_I2C_CTRL0_CLKGATE	0x40000000
+#define BM_I2C_CTRL0_SFTRST	0x80000000
+
+#define HW_I2C_TIMING0		0x10
+
+#define HW_I2C_TIMING1		0x20
+
+#define HW_I2C_TIMING2		0x30
+
+#define HW_I2C_CTRL1		0x40
+#define BM_I2C_CTRL1_SLAVE_IRQ	0x00000001
+#define BP_I2C_CTRL1_SLAVE_IRQ	0
+#define BM_I2C_CTRL1_SLAVE_STOP_IRQ	0x00000002
+#define BM_I2C_CTRL1_MASTER_LOSS_IRQ	0x00000004
+#define BM_I2C_CTRL1_EARLY_TERM_IRQ	0x00000008
+#define BM_I2C_CTRL1_OVERSIZE_XFER_TERM_IRQ	0x00000010
+#define BM_I2C_CTRL1_NO_SLAVE_ACK_IRQ	0x00000020
+#define BM_I2C_CTRL1_DATA_ENGINE_CMPLT_IRQ	0x00000040
+#define BM_I2C_CTRL1_BUS_FREE_IRQ	0x00000080
+#define BM_I2C_CTRL1_CLR_GOT_A_NAK	0x10000000
+
+#define HW_I2C_VERSION		0x90
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-lcdif.h b/arch/arm/mach-stmp37xx/include/mach/regs-lcdif.h
new file mode 100644
index 0000000..72514e8
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-lcdif.h
@@ -0,0 +1,89 @@
+/*
+ * stmp37xx: LCDIF register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_LCDIF_BASE	(STMP3XXX_REGS_BASE + 0x30000)
+#define REGS_LCDIF_PHYS	0x80030000
+#define REGS_LCDIF_SIZE	0x2000
+
+#define HW_LCDIF_CTRL		0x0
+#define BM_LCDIF_CTRL_COUNT	0x0000FFFF
+#define BP_LCDIF_CTRL_COUNT	0
+#define BM_LCDIF_CTRL_RUN	0x00010000
+#define BM_LCDIF_CTRL_WORD_LENGTH	0x00020000
+#define BM_LCDIF_CTRL_DATA_SELECT	0x00040000
+#define BM_LCDIF_CTRL_DOTCLK_MODE	0x00080000
+#define BM_LCDIF_CTRL_VSYNC_MODE	0x00100000
+#define BM_LCDIF_CTRL_DATA_SWIZZLE	0x00600000
+#define BP_LCDIF_CTRL_DATA_SWIZZLE	21
+#define BM_LCDIF_CTRL_BYPASS_COUNT	0x00800000
+#define BM_LCDIF_CTRL_SHIFT_NUM_BITS	0x06000000
+#define BP_LCDIF_CTRL_SHIFT_NUM_BITS	25
+#define BM_LCDIF_CTRL_DATA_SHIFT_DIR	0x08000000
+#define BM_LCDIF_CTRL_WAIT_FOR_VSYNC_EDGE	0x10000000
+#define BM_LCDIF_CTRL_CLKGATE	0x40000000
+#define BM_LCDIF_CTRL_SFTRST	0x80000000
+
+#define HW_LCDIF_CTRL1		0x10
+#define BM_LCDIF_CTRL1_RESET	0x00000001
+#define BP_LCDIF_CTRL1_RESET	0
+#define BM_LCDIF_CTRL1_MODE86	0x00000002
+#define BM_LCDIF_CTRL1_BUSY_ENABLE	0x00000004
+#define BM_LCDIF_CTRL1_VSYNC_EDGE_IRQ	0x00000100
+#define BM_LCDIF_CTRL1_CUR_FRAME_DONE_IRQ	0x00000200
+#define BM_LCDIF_CTRL1_UNDERFLOW_IRQ	0x00000400
+#define BM_LCDIF_CTRL1_OVERFLOW_IRQ	0x00000800
+#define BM_LCDIF_CTRL1_VSYNC_EDGE_IRQ_EN	0x00001000
+#define BM_LCDIF_CTRL1_BYTE_PACKING_FORMAT	0x000F0000
+#define BP_LCDIF_CTRL1_BYTE_PACKING_FORMAT	16
+
+#define HW_LCDIF_TIMING		0x20
+
+#define HW_LCDIF_VDCTRL0	0x30
+#define BM_LCDIF_VDCTRL0_VALID_DATA_CNT	0x000003FF
+#define BP_LCDIF_VDCTRL0_VALID_DATA_CNT	0
+#define BM_LCDIF_VDCTRL0_VSYNC_PULSE_WIDTH_UNIT	0x00100000
+#define BM_LCDIF_VDCTRL0_VSYNC_PERIOD_UNIT	0x00200000
+#define BM_LCDIF_VDCTRL0_ENABLE_POL	0x01000000
+#define BM_LCDIF_VDCTRL0_DOTCLK_POL	0x02000000
+#define BM_LCDIF_VDCTRL0_HSYNC_POL	0x04000000
+#define BM_LCDIF_VDCTRL0_VSYNC_POL	0x08000000
+#define BM_LCDIF_VDCTRL0_ENABLE_PRESENT	0x10000000
+#define BM_LCDIF_VDCTRL0_VSYNC_OEB	0x20000000
+
+#define HW_LCDIF_VDCTRL1	0x40
+#define BM_LCDIF_VDCTRL1_VSYNC_PERIOD	0x000FFFFF
+#define BP_LCDIF_VDCTRL1_VSYNC_PERIOD	0
+#define BM_LCDIF_VDCTRL1_VSYNC_PULSE_WIDTH	0xFFF00000
+#define BP_LCDIF_VDCTRL1_VSYNC_PULSE_WIDTH	20
+
+#define HW_LCDIF_VDCTRL2	0x50
+#define BM_LCDIF_VDCTRL2_VALID_DATA_CNT	0x000007FF
+#define BP_LCDIF_VDCTRL2_VALID_DATA_CNT	0
+#define BM_LCDIF_VDCTRL2_HSYNC_PERIOD	0x007FF800
+#define BP_LCDIF_VDCTRL2_HSYNC_PERIOD	11
+#define BM_LCDIF_VDCTRL2_HSYNC_PULSE_WIDTH	0xFF800000
+#define BP_LCDIF_VDCTRL2_HSYNC_PULSE_WIDTH	23
+
+#define HW_LCDIF_VDCTRL3	0x60
+#define BM_LCDIF_VDCTRL3_VERTICAL_WAIT_CNT	0x000001FF
+#define BP_LCDIF_VDCTRL3_VERTICAL_WAIT_CNT	0
+#define BM_LCDIF_VDCTRL3_HORIZONTAL_WAIT_CNT	0x00FFF000
+#define BP_LCDIF_VDCTRL3_HORIZONTAL_WAIT_CNT	12
+#define BM_LCDIF_VDCTRL3_SYNC_SIGNALS_ON	0x01000000
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-lradc.h b/arch/arm/mach-stmp37xx/include/mach/regs-lradc.h
new file mode 100644
index 0000000..cc7b470
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-lradc.h
@@ -0,0 +1,97 @@
+/*
+ * stmp37xx: LRADC register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_LRADC_BASE	(STMP3XXX_REGS_BASE + 0x50000)
+
+#define HW_LRADC_CTRL0		0x0
+#define BM_LRADC_CTRL0_SCHEDULE	0x000000FF
+#define BP_LRADC_CTRL0_SCHEDULE	0
+#define BM_LRADC_CTRL0_XPLUS_ENABLE	0x00010000
+#define BM_LRADC_CTRL0_YPLUS_ENABLE	0x00020000
+#define BM_LRADC_CTRL0_XMINUS_ENABLE	0x00040000
+#define BM_LRADC_CTRL0_YMINUS_ENABLE	0x00080000
+#define BM_LRADC_CTRL0_TOUCH_DETECT_ENABLE	0x00100000
+#define BM_LRADC_CTRL0_ONCHIP_GROUNDREF	0x00200000
+#define BM_LRADC_CTRL0_CLKGATE	0x40000000
+#define BM_LRADC_CTRL0_SFTRST	0x80000000
+
+#define HW_LRADC_CTRL1		0x10
+#define BM_LRADC_CTRL1_LRADC0_IRQ	0x00000001
+#define BP_LRADC_CTRL1_LRADC0_IRQ	0
+#define BM_LRADC_CTRL1_LRADC5_IRQ	0x00000020
+#define BM_LRADC_CTRL1_LRADC6_IRQ	0x00000040
+#define BM_LRADC_CTRL1_TOUCH_DETECT_IRQ	0x00000100
+#define BM_LRADC_CTRL1_LRADC0_IRQ_EN	0x00010000
+#define BM_LRADC_CTRL1_LRADC5_IRQ_EN	0x00200000
+#define BM_LRADC_CTRL1_TOUCH_DETECT_IRQ_EN	0x01000000
+
+#define HW_LRADC_CTRL2		0x20
+#define BM_LRADC_CTRL2_BL_BRIGHTNESS	0x001F0000
+#define BP_LRADC_CTRL2_BL_BRIGHTNESS	16
+#define BM_LRADC_CTRL2_BL_MUX_SELECT	0x00200000
+#define BM_LRADC_CTRL2_BL_ENABLE	0x00400000
+#define BM_LRADC_CTRL2_DIVIDE_BY_TWO	0xFF000000
+#define BP_LRADC_CTRL2_DIVIDE_BY_TWO	24
+
+#define HW_LRADC_CTRL3		0x30
+#define BM_LRADC_CTRL3_CYCLE_TIME	0x00000300
+#define BP_LRADC_CTRL3_CYCLE_TIME	8
+
+#define HW_LRADC_STATUS		0x40
+#define BM_LRADC_STATUS_TOUCH_DETECT_RAW	0x00000001
+#define BP_LRADC_STATUS_TOUCH_DETECT_RAW	0
+
+#define HW_LRADC_CH0		(0x50 + 0 * 0x10)
+#define HW_LRADC_CH1		(0x50 + 1 * 0x10)
+#define HW_LRADC_CH2		(0x50 + 2 * 0x10)
+#define HW_LRADC_CH3		(0x50 + 3 * 0x10)
+#define HW_LRADC_CH4		(0x50 + 4 * 0x10)
+#define HW_LRADC_CH5		(0x50 + 5 * 0x10)
+#define HW_LRADC_CH6		(0x50 + 6 * 0x10)
+#define HW_LRADC_CH7		(0x50 + 7 * 0x10)
+
+#define HW_LRADC_CHn		0x50
+#define BM_LRADC_CHn_VALUE	0x0003FFFF
+#define BP_LRADC_CHn_VALUE	0
+#define BM_LRADC_CHn_NUM_SAMPLES	0x1F000000
+#define BP_LRADC_CHn_NUM_SAMPLES	24
+#define BM_LRADC_CHn_ACCUMULATE	0x20000000
+
+#define HW_LRADC_DELAY0		(0xD0 + 0 * 0x10)
+#define HW_LRADC_DELAY1		(0xD0 + 1 * 0x10)
+#define HW_LRADC_DELAY2		(0xD0 + 2 * 0x10)
+#define HW_LRADC_DELAY3		(0xD0 + 3 * 0x10)
+
+#define HW_LRADC_DELAYn		0xD0
+#define BM_LRADC_DELAYn_DELAY	0x000007FF
+#define BP_LRADC_DELAYn_DELAY	0
+#define BM_LRADC_DELAYn_LOOP_COUNT	0x0000F800
+#define BP_LRADC_DELAYn_LOOP_COUNT	11
+#define BM_LRADC_DELAYn_TRIGGER_DELAYS	0x000F0000
+#define BP_LRADC_DELAYn_TRIGGER_DELAYS	16
+#define BM_LRADC_DELAYn_KICK	0x00100000
+#define BM_LRADC_DELAYn_TRIGGER_LRADCS	0xFF000000
+#define BP_LRADC_DELAYn_TRIGGER_LRADCS	24
+
+#define HW_LRADC_CTRL4		0x140
+#define BM_LRADC_CTRL4_LRADC6SELECT	0x0F000000
+#define BP_LRADC_CTRL4_LRADC6SELECT	24
+#define BM_LRADC_CTRL4_LRADC7SELECT	0xF0000000
+#define BP_LRADC_CTRL4_LRADC7SELECT	28
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-pwm.h b/arch/arm/mach-stmp37xx/include/mach/regs-pwm.h
new file mode 100644
index 0000000..15966a1
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-pwm.h
@@ -0,0 +1,51 @@
+/*
+ * stmp37xx: PWM register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_PWM_BASE	(STMP3XXX_REGS_BASE + 0x64000)
+
+#define HW_PWM_CTRL		0x0
+#define BM_PWM_CTRL_PWM2_ENABLE	0x00000004
+#define BM_PWM_CTRL_PWM2_ANA_CTRL_ENABLE	0x00000020
+
+#define HW_PWM_ACTIVE0		(0x10 + 0 * 0x20)
+#define HW_PWM_ACTIVE1		(0x10 + 1 * 0x20)
+#define HW_PWM_ACTIVE2		(0x10 + 2 * 0x20)
+#define HW_PWM_ACTIVE3		(0x10 + 3 * 0x20)
+
+#define HW_PWM_ACTIVEn		0x10
+#define BM_PWM_ACTIVEn_ACTIVE	0x0000FFFF
+#define BP_PWM_ACTIVEn_ACTIVE	0
+#define BM_PWM_ACTIVEn_INACTIVE	0xFFFF0000
+#define BP_PWM_ACTIVEn_INACTIVE	16
+
+#define HW_PWM_PERIOD0		(0x20 + 0 * 0x20)
+#define HW_PWM_PERIOD1		(0x20 + 1 * 0x20)
+#define HW_PWM_PERIOD2		(0x20 + 2 * 0x20)
+#define HW_PWM_PERIOD3		(0x20 + 3 * 0x20)
+
+#define HW_PWM_PERIODn		0x20
+#define BM_PWM_PERIODn_PERIOD	0x0000FFFF
+#define BP_PWM_PERIODn_PERIOD	0
+#define BM_PWM_PERIODn_ACTIVE_STATE	0x00030000
+#define BP_PWM_PERIODn_ACTIVE_STATE	16
+#define BM_PWM_PERIODn_INACTIVE_STATE	0x000C0000
+#define BP_PWM_PERIODn_INACTIVE_STATE	18
+#define BM_PWM_PERIODn_CDIV	0x00700000
+#define BP_PWM_PERIODn_CDIV	20
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-rtc.h b/arch/arm/mach-stmp37xx/include/mach/regs-rtc.h
new file mode 100644
index 0000000..fac40ed
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-rtc.h
@@ -0,0 +1,57 @@
+/*
+ * stmp37xx: RTC register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_RTC_BASE	(STMP3XXX_REGS_BASE + 0x5C000)
+#define REGS_RTC_PHYS   0x8005C000
+#define REGS_RTC_SIZE   0x2000
+
+#define HW_RTC_CTRL		0x0
+#define BM_RTC_CTRL_ALARM_IRQ_EN	0x00000001
+#define BP_RTC_CTRL_ALARM_IRQ_EN	0
+#define BM_RTC_CTRL_ONEMSEC_IRQ_EN	0x00000002
+#define BM_RTC_CTRL_ALARM_IRQ	0x00000004
+#define BM_RTC_CTRL_ONEMSEC_IRQ	0x00000008
+#define BM_RTC_CTRL_WATCHDOGEN	0x00000010
+
+#define HW_RTC_STAT		0x10
+#define BM_RTC_STAT_NEW_REGS	0x0000FF00
+#define BP_RTC_STAT_NEW_REGS	8
+#define BM_RTC_STAT_STALE_REGS	0x00FF0000
+#define BP_RTC_STAT_STALE_REGS	16
+#define BM_RTC_STAT_RTC_PRESENT	0x80000000
+
+#define HW_RTC_SECONDS		0x30
+
+#define HW_RTC_ALARM		0x40
+
+#define HW_RTC_WATCHDOG		0x50
+
+#define HW_RTC_PERSISTENT0	0x60
+#define BM_RTC_PERSISTENT0_ALARM_WAKE_EN	0x00000002
+#define BM_RTC_PERSISTENT0_ALARM_EN	0x00000004
+#define BM_RTC_PERSISTENT0_XTAL24MHZ_PWRUP	0x00000010
+#define BM_RTC_PERSISTENT0_XTAL32KHZ_PWRUP	0x00000020
+#define BM_RTC_PERSISTENT0_ALARM_WAKE	0x00000080
+#define BM_RTC_PERSISTENT0_SPARE_ANALOG	0xFFFC0000
+#define BP_RTC_PERSISTENT0_SPARE_ANALOG	18
+
+#define HW_RTC_PERSISTENT1	0x70
+
+#define HW_RTC_VERSION		0xD0
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-ssp.h b/arch/arm/mach-stmp37xx/include/mach/regs-ssp.h
new file mode 100644
index 0000000..cbde891
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-ssp.h
@@ -0,0 +1,101 @@
+/*
+ * stmp37xx: SSP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_SSP_BASE	(STMP3XXX_REGS_BASE + 0x10000)
+#define REGS_SSP1_PHYS	0x80010000
+#define REGS_SSP2_PHYS	0x80034000
+#define REGS_SSP_SIZE	0x2000
+
+#define HW_SSP_CTRL0		0x0
+#define BM_SSP_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_SSP_CTRL0_XFER_COUNT	0
+#define BM_SSP_CTRL0_ENABLE	0x00010000
+#define BM_SSP_CTRL0_GET_RESP	0x00020000
+#define BM_SSP_CTRL0_LONG_RESP	0x00080000
+#define BM_SSP_CTRL0_WAIT_FOR_CMD	0x00100000
+#define BM_SSP_CTRL0_WAIT_FOR_IRQ	0x00200000
+#define BM_SSP_CTRL0_BUS_WIDTH	0x00C00000
+#define BP_SSP_CTRL0_BUS_WIDTH	22
+#define BM_SSP_CTRL0_DATA_XFER	0x01000000
+#define BM_SSP_CTRL0_READ	0x02000000
+#define BM_SSP_CTRL0_IGNORE_CRC	0x04000000
+#define BM_SSP_CTRL0_LOCK_CS	0x08000000
+#define BM_SSP_CTRL0_RUN	0x20000000
+#define BM_SSP_CTRL0_CLKGATE	0x40000000
+#define BM_SSP_CTRL0_SFTRST	0x80000000
+
+#define HW_SSP_CMD0		0x10
+#define BM_SSP_CMD0_CMD		0x000000FF
+#define BP_SSP_CMD0_CMD		0
+#define BM_SSP_CMD0_BLOCK_COUNT	0x0000FF00
+#define BP_SSP_CMD0_BLOCK_COUNT	8
+#define BM_SSP_CMD0_BLOCK_SIZE	0x000F0000
+#define BP_SSP_CMD0_BLOCK_SIZE	16
+#define BM_SSP_CMD0_APPEND_8CYC	0x00100000
+#define BM_SSP_CMD1_CMD_ARG	0xFFFFFFFF
+#define BP_SSP_CMD1_CMD_ARG	0
+
+#define HW_SSP_TIMING		0x50
+#define BM_SSP_TIMING_CLOCK_RATE	0x000000FF
+#define BP_SSP_TIMING_CLOCK_RATE	0
+#define BM_SSP_TIMING_CLOCK_DIVIDE	0x0000FF00
+#define BP_SSP_TIMING_CLOCK_DIVIDE	8
+#define BM_SSP_TIMING_TIMEOUT	0xFFFF0000
+#define BP_SSP_TIMING_TIMEOUT	16
+
+#define HW_SSP_CTRL1		0x60
+#define BM_SSP_CTRL1_SSP_MODE	0x0000000F
+#define BP_SSP_CTRL1_SSP_MODE	0
+#define BM_SSP_CTRL1_WORD_LENGTH	0x000000F0
+#define BP_SSP_CTRL1_WORD_LENGTH	4
+#define BM_SSP_CTRL1_POLARITY	0x00000200
+#define BM_SSP_CTRL1_PHASE	0x00000400
+#define BM_SSP_CTRL1_DMA_ENABLE	0x00002000
+#define BM_SSP_CTRL1_FIFO_OVERRUN_IRQ	0x00008000
+#define BM_SSP_CTRL1_RECV_TIMEOUT_IRQ_EN	0x00010000
+#define BM_SSP_CTRL1_RECV_TIMEOUT_IRQ	0x00020000
+#define BM_SSP_CTRL1_FIFO_UNDERRUN_IRQ	0x00200000
+#define BM_SSP_CTRL1_DATA_CRC_IRQ_EN	0x00400000
+#define BM_SSP_CTRL1_DATA_CRC_IRQ	0x00800000
+#define BM_SSP_CTRL1_DATA_TIMEOUT_IRQ_EN	0x01000000
+#define BM_SSP_CTRL1_DATA_TIMEOUT_IRQ	0x02000000
+#define BM_SSP_CTRL1_RESP_TIMEOUT_IRQ_EN	0x04000000
+#define BM_SSP_CTRL1_RESP_TIMEOUT_IRQ	0x08000000
+#define BM_SSP_CTRL1_RESP_ERR_IRQ_EN	0x10000000
+#define BM_SSP_CTRL1_RESP_ERR_IRQ	0x20000000
+#define BM_SSP_CTRL1_SDIO_IRQ	0x80000000
+
+#define HW_SSP_DATA		0x70
+
+#define HW_SSP_SDRESP0		0x80
+
+#define HW_SSP_SDRESP1		0x90
+
+#define HW_SSP_SDRESP2		0xA0
+
+#define HW_SSP_SDRESP3		0xB0
+
+#define HW_SSP_STATUS		0xC0
+#define BM_SSP_STATUS_FIFO_EMPTY	0x00000020
+#define BM_SSP_STATUS_TIMEOUT	0x00001000
+#define BM_SSP_STATUS_RESP_TIMEOUT	0x00004000
+#define BM_SSP_STATUS_RESP_ERR	0x00008000
+#define BM_SSP_STATUS_RESP_CRC_ERR	0x00010000
+#define BM_SSP_STATUS_CARD_DETECT	0x10000000
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-uartapp.h b/arch/arm/mach-stmp37xx/include/mach/regs-uartapp.h
new file mode 100644
index 0000000..0594275
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-uartapp.h
@@ -0,0 +1,85 @@
+/*
+ * stmp37xx: UARTAPP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_UARTAPP_BASE	(STMP3XXX_REGS_BASE + 0x6C000)
+#define REGS_UARTAPP1_PHYS	0x8006C000
+#define REGS_UARTAPP_SIZE	0x2000
+
+#define HW_UARTAPP_CTRL0	0x0
+#define BM_UARTAPP_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_UARTAPP_CTRL0_XFER_COUNT	0
+#define BM_UARTAPP_CTRL0_RXTIMEOUT	0x07FF0000
+#define BP_UARTAPP_CTRL0_RXTIMEOUT	16
+#define BM_UARTAPP_CTRL0_RXTO_ENABLE	0x08000000
+#define BM_UARTAPP_CTRL0_RUN	0x20000000
+#define BM_UARTAPP_CTRL0_SFTRST	0x80000000
+#define BM_UARTAPP_CTRL1_XFER_COUNT	0x0000FFFF
+#define BP_UARTAPP_CTRL1_XFER_COUNT	0
+#define BM_UARTAPP_CTRL1_RUN	0x10000000
+
+#define HW_UARTAPP_CTRL2	0x20
+#define BM_UARTAPP_CTRL2_UARTEN	0x00000001
+#define BP_UARTAPP_CTRL2_UARTEN	0
+#define BM_UARTAPP_CTRL2_TXE	0x00000100
+#define BM_UARTAPP_CTRL2_RXE	0x00000200
+#define BM_UARTAPP_CTRL2_RTS	0x00000800
+#define BM_UARTAPP_CTRL2_RTSEN	0x00004000
+#define BM_UARTAPP_CTRL2_CTSEN	0x00008000
+#define BM_UARTAPP_CTRL2_RXDMAE	0x01000000
+#define BM_UARTAPP_CTRL2_TXDMAE	0x02000000
+#define BM_UARTAPP_CTRL2_DMAONERR	0x04000000
+
+#define HW_UARTAPP_LINECTRL	0x30
+#define BM_UARTAPP_LINECTRL_BRK	0x00000001
+#define BP_UARTAPP_LINECTRL_BRK	0
+#define BM_UARTAPP_LINECTRL_PEN	0x00000002
+#define BM_UARTAPP_LINECTRL_EPS	0x00000004
+#define BM_UARTAPP_LINECTRL_STP2	0x00000008
+#define BM_UARTAPP_LINECTRL_FEN	0x00000010
+#define BM_UARTAPP_LINECTRL_WLEN	0x00000060
+#define BP_UARTAPP_LINECTRL_WLEN	5
+#define BM_UARTAPP_LINECTRL_SPS	0x00000080
+#define BM_UARTAPP_LINECTRL_BAUD_DIVFRAC	0x00003F00
+#define BP_UARTAPP_LINECTRL_BAUD_DIVFRAC	8
+#define BM_UARTAPP_LINECTRL_BAUD_DIVINT	0xFFFF0000
+#define BP_UARTAPP_LINECTRL_BAUD_DIVINT	16
+
+#define HW_UARTAPP_INTR		0x50
+#define BM_UARTAPP_INTR_CTSMIS	0x00000002
+#define BM_UARTAPP_INTR_RTIS	0x00000040
+#define BM_UARTAPP_INTR_CTSMIEN	0x00020000
+#define BM_UARTAPP_INTR_RXIEN	0x00100000
+#define BM_UARTAPP_INTR_RTIEN	0x00400000
+
+#define HW_UARTAPP_DATA		0x60
+
+#define HW_UARTAPP_STAT		0x70
+#define BM_UARTAPP_STAT_RXCOUNT	0x0000FFFF
+#define BP_UARTAPP_STAT_RXCOUNT	0
+#define BM_UARTAPP_STAT_FERR	0x00010000
+#define BM_UARTAPP_STAT_PERR	0x00020000
+#define BM_UARTAPP_STAT_BERR	0x00040000
+#define BM_UARTAPP_STAT_OERR	0x00080000
+#define BM_UARTAPP_STAT_RXFE	0x01000000
+#define BM_UARTAPP_STAT_TXFF	0x02000000
+#define BM_UARTAPP_STAT_TXFE	0x08000000
+#define BM_UARTAPP_STAT_CTS	0x10000000
+
+#define HW_UARTAPP_VERSION	0x90
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-uartdbg.h b/arch/arm/mach-stmp37xx/include/mach/regs-uartdbg.h
new file mode 100644
index 0000000..b810deb
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-uartdbg.h
@@ -0,0 +1,268 @@
+/*
+ * stmp378x: UARTDBG register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_UARTDBG_BASE	(STMP3XXX_REGS_BASE + 0x70000)
+#define REGS_UARTDBG_PHYS	0x80070000
+#define REGS_UARTDBG_SIZE	0x2000
+
+#define HW_UARTDBGDR 0x00000000
+#define BP_UARTDBGDR_UNAVAILABLE      16
+#define BM_UARTDBGDR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGDR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGDR_UNAVAILABLE)
+#define BP_UARTDBGDR_RESERVED      12
+#define BM_UARTDBGDR_RESERVED 0x0000F000
+#define BF_UARTDBGDR_RESERVED(v)  \
+	(((v) << 12) & BM_UARTDBGDR_RESERVED)
+#define BM_UARTDBGDR_OE 0x00000800
+#define BM_UARTDBGDR_BE 0x00000400
+#define BM_UARTDBGDR_PE 0x00000200
+#define BM_UARTDBGDR_FE 0x00000100
+#define BP_UARTDBGDR_DATA      0
+#define BM_UARTDBGDR_DATA 0x000000FF
+#define BF_UARTDBGDR_DATA(v)  \
+	(((v) << 0) & BM_UARTDBGDR_DATA)
+#define HW_UARTDBGRSR_ECR 0x00000004
+#define BP_UARTDBGRSR_ECR_UNAVAILABLE      8
+#define BM_UARTDBGRSR_ECR_UNAVAILABLE 0xFFFFFF00
+#define BF_UARTDBGRSR_ECR_UNAVAILABLE(v) \
+	(((v) << 8) & BM_UARTDBGRSR_ECR_UNAVAILABLE)
+#define BP_UARTDBGRSR_ECR_EC      4
+#define BM_UARTDBGRSR_ECR_EC 0x000000F0
+#define BF_UARTDBGRSR_ECR_EC(v)  \
+	(((v) << 4) & BM_UARTDBGRSR_ECR_EC)
+#define BM_UARTDBGRSR_ECR_OE 0x00000008
+#define BM_UARTDBGRSR_ECR_BE 0x00000004
+#define BM_UARTDBGRSR_ECR_PE 0x00000002
+#define BM_UARTDBGRSR_ECR_FE 0x00000001
+#define HW_UARTDBGFR 0x00000018
+#define BP_UARTDBGFR_UNAVAILABLE      16
+#define BM_UARTDBGFR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGFR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGFR_UNAVAILABLE)
+#define BP_UARTDBGFR_RESERVED      9
+#define BM_UARTDBGFR_RESERVED 0x0000FE00
+#define BF_UARTDBGFR_RESERVED(v)  \
+	(((v) << 9) & BM_UARTDBGFR_RESERVED)
+#define BM_UARTDBGFR_RI 0x00000100
+#define BM_UARTDBGFR_TXFE 0x00000080
+#define BM_UARTDBGFR_RXFF 0x00000040
+#define BM_UARTDBGFR_TXFF 0x00000020
+#define BM_UARTDBGFR_RXFE 0x00000010
+#define BM_UARTDBGFR_BUSY 0x00000008
+#define BM_UARTDBGFR_DCD 0x00000004
+#define BM_UARTDBGFR_DSR 0x00000002
+#define BM_UARTDBGFR_CTS 0x00000001
+#define HW_UARTDBGILPR 0x00000020
+#define BP_UARTDBGILPR_UNAVAILABLE      8
+#define BM_UARTDBGILPR_UNAVAILABLE 0xFFFFFF00
+#define BF_UARTDBGILPR_UNAVAILABLE(v) \
+	(((v) << 8) & BM_UARTDBGILPR_UNAVAILABLE)
+#define BP_UARTDBGILPR_ILPDVSR      0
+#define BM_UARTDBGILPR_ILPDVSR 0x000000FF
+#define BF_UARTDBGILPR_ILPDVSR(v)  \
+	(((v) << 0) & BM_UARTDBGILPR_ILPDVSR)
+#define HW_UARTDBGIBRD 0x00000024
+#define BP_UARTDBGIBRD_UNAVAILABLE      16
+#define BM_UARTDBGIBRD_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGIBRD_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGIBRD_UNAVAILABLE)
+#define BP_UARTDBGIBRD_BAUD_DIVINT      0
+#define BM_UARTDBGIBRD_BAUD_DIVINT 0x0000FFFF
+#define BF_UARTDBGIBRD_BAUD_DIVINT(v)  \
+	(((v) << 0) & BM_UARTDBGIBRD_BAUD_DIVINT)
+#define HW_UARTDBGFBRD 0x00000028
+#define BP_UARTDBGFBRD_UNAVAILABLE      8
+#define BM_UARTDBGFBRD_UNAVAILABLE 0xFFFFFF00
+#define BF_UARTDBGFBRD_UNAVAILABLE(v) \
+	(((v) << 8) & BM_UARTDBGFBRD_UNAVAILABLE)
+#define BP_UARTDBGFBRD_RESERVED      6
+#define BM_UARTDBGFBRD_RESERVED 0x000000C0
+#define BF_UARTDBGFBRD_RESERVED(v)  \
+	(((v) << 6) & BM_UARTDBGFBRD_RESERVED)
+#define BP_UARTDBGFBRD_BAUD_DIVFRAC      0
+#define BM_UARTDBGFBRD_BAUD_DIVFRAC 0x0000003F
+#define BF_UARTDBGFBRD_BAUD_DIVFRAC(v)  \
+	(((v) << 0) & BM_UARTDBGFBRD_BAUD_DIVFRAC)
+#define HW_UARTDBGLCR_H 0x0000002c
+#define BP_UARTDBGLCR_H_UNAVAILABLE      16
+#define BM_UARTDBGLCR_H_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGLCR_H_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGLCR_H_UNAVAILABLE)
+#define BP_UARTDBGLCR_H_RESERVED      8
+#define BM_UARTDBGLCR_H_RESERVED 0x0000FF00
+#define BF_UARTDBGLCR_H_RESERVED(v)  \
+	(((v) << 8) & BM_UARTDBGLCR_H_RESERVED)
+#define BM_UARTDBGLCR_H_SPS 0x00000080
+#define BP_UARTDBGLCR_H_WLEN      5
+#define BM_UARTDBGLCR_H_WLEN 0x00000060
+#define BF_UARTDBGLCR_H_WLEN(v)  \
+	(((v) << 5) & BM_UARTDBGLCR_H_WLEN)
+#define BM_UARTDBGLCR_H_FEN 0x00000010
+#define BM_UARTDBGLCR_H_STP2 0x00000008
+#define BM_UARTDBGLCR_H_EPS 0x00000004
+#define BM_UARTDBGLCR_H_PEN 0x00000002
+#define BM_UARTDBGLCR_H_BRK 0x00000001
+#define HW_UARTDBGCR 0x00000030
+#define BP_UARTDBGCR_UNAVAILABLE      16
+#define BM_UARTDBGCR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGCR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGCR_UNAVAILABLE)
+#define BM_UARTDBGCR_CTSEN 0x00008000
+#define BM_UARTDBGCR_RTSEN 0x00004000
+#define BM_UARTDBGCR_OUT2 0x00002000
+#define BM_UARTDBGCR_OUT1 0x00001000
+#define BM_UARTDBGCR_RTS 0x00000800
+#define BM_UARTDBGCR_DTR 0x00000400
+#define BM_UARTDBGCR_RXE 0x00000200
+#define BM_UARTDBGCR_TXE 0x00000100
+#define BM_UARTDBGCR_LBE 0x00000080
+#define BP_UARTDBGCR_RESERVED      3
+#define BM_UARTDBGCR_RESERVED 0x00000078
+#define BF_UARTDBGCR_RESERVED(v)  \
+	(((v) << 3) & BM_UARTDBGCR_RESERVED)
+#define BM_UARTDBGCR_SIRLP 0x00000004
+#define BM_UARTDBGCR_SIREN 0x00000002
+#define BM_UARTDBGCR_UARTEN 0x00000001
+#define HW_UARTDBGIFLS 0x00000034
+#define BP_UARTDBGIFLS_UNAVAILABLE      16
+#define BM_UARTDBGIFLS_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGIFLS_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGIFLS_UNAVAILABLE)
+#define BP_UARTDBGIFLS_RESERVED      6
+#define BM_UARTDBGIFLS_RESERVED 0x0000FFC0
+#define BF_UARTDBGIFLS_RESERVED(v)  \
+	(((v) << 6) & BM_UARTDBGIFLS_RESERVED)
+#define BP_UARTDBGIFLS_RXIFLSEL      3
+#define BM_UARTDBGIFLS_RXIFLSEL 0x00000038
+#define BF_UARTDBGIFLS_RXIFLSEL(v)  \
+	(((v) << 3) & BM_UARTDBGIFLS_RXIFLSEL)
+#define BV_UARTDBGIFLS_RXIFLSEL__NOT_EMPTY      0x0
+#define BV_UARTDBGIFLS_RXIFLSEL__ONE_QUARTER    0x1
+#define BV_UARTDBGIFLS_RXIFLSEL__ONE_HALF       0x2
+#define BV_UARTDBGIFLS_RXIFLSEL__THREE_QUARTERS 0x3
+#define BV_UARTDBGIFLS_RXIFLSEL__SEVEN_EIGHTHS  0x4
+#define BV_UARTDBGIFLS_RXIFLSEL__INVALID5       0x5
+#define BV_UARTDBGIFLS_RXIFLSEL__INVALID6       0x6
+#define BV_UARTDBGIFLS_RXIFLSEL__INVALID7       0x7
+#define BP_UARTDBGIFLS_TXIFLSEL      0
+#define BM_UARTDBGIFLS_TXIFLSEL 0x00000007
+#define BF_UARTDBGIFLS_TXIFLSEL(v)  \
+	(((v) << 0) & BM_UARTDBGIFLS_TXIFLSEL)
+#define BV_UARTDBGIFLS_TXIFLSEL__EMPTY	  0x0
+#define BV_UARTDBGIFLS_TXIFLSEL__ONE_QUARTER    0x1
+#define BV_UARTDBGIFLS_TXIFLSEL__ONE_HALF       0x2
+#define BV_UARTDBGIFLS_TXIFLSEL__THREE_QUARTERS 0x3
+#define BV_UARTDBGIFLS_TXIFLSEL__SEVEN_EIGHTHS  0x4
+#define BV_UARTDBGIFLS_TXIFLSEL__INVALID5       0x5
+#define BV_UARTDBGIFLS_TXIFLSEL__INVALID6       0x6
+#define BV_UARTDBGIFLS_TXIFLSEL__INVALID7       0x7
+#define HW_UARTDBGIMSC 0x00000038
+#define BP_UARTDBGIMSC_UNAVAILABLE      16
+#define BM_UARTDBGIMSC_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGIMSC_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGIMSC_UNAVAILABLE)
+#define BP_UARTDBGIMSC_RESERVED      11
+#define BM_UARTDBGIMSC_RESERVED 0x0000F800
+#define BF_UARTDBGIMSC_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGIMSC_RESERVED)
+#define BM_UARTDBGIMSC_OEIM 0x00000400
+#define BM_UARTDBGIMSC_BEIM 0x00000200
+#define BM_UARTDBGIMSC_PEIM 0x00000100
+#define BM_UARTDBGIMSC_FEIM 0x00000080
+#define BM_UARTDBGIMSC_RTIM 0x00000040
+#define BM_UARTDBGIMSC_TXIM 0x00000020
+#define BM_UARTDBGIMSC_RXIM 0x00000010
+#define BM_UARTDBGIMSC_DSRMIM 0x00000008
+#define BM_UARTDBGIMSC_DCDMIM 0x00000004
+#define BM_UARTDBGIMSC_CTSMIM 0x00000002
+#define BM_UARTDBGIMSC_RIMIM 0x00000001
+#define HW_UARTDBGRIS 0x0000003c
+#define BP_UARTDBGRIS_UNAVAILABLE      16
+#define BM_UARTDBGRIS_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGRIS_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGRIS_UNAVAILABLE)
+#define BP_UARTDBGRIS_RESERVED      11
+#define BM_UARTDBGRIS_RESERVED 0x0000F800
+#define BF_UARTDBGRIS_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGRIS_RESERVED)
+#define BM_UARTDBGRIS_OERIS 0x00000400
+#define BM_UARTDBGRIS_BERIS 0x00000200
+#define BM_UARTDBGRIS_PERIS 0x00000100
+#define BM_UARTDBGRIS_FERIS 0x00000080
+#define BM_UARTDBGRIS_RTRIS 0x00000040
+#define BM_UARTDBGRIS_TXRIS 0x00000020
+#define BM_UARTDBGRIS_RXRIS 0x00000010
+#define BM_UARTDBGRIS_DSRRMIS 0x00000008
+#define BM_UARTDBGRIS_DCDRMIS 0x00000004
+#define BM_UARTDBGRIS_CTSRMIS 0x00000002
+#define BM_UARTDBGRIS_RIRMIS 0x00000001
+#define HW_UARTDBGMIS 0x00000040
+#define BP_UARTDBGMIS_UNAVAILABLE      16
+#define BM_UARTDBGMIS_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGMIS_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGMIS_UNAVAILABLE)
+#define BP_UARTDBGMIS_RESERVED      11
+#define BM_UARTDBGMIS_RESERVED 0x0000F800
+#define BF_UARTDBGMIS_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGMIS_RESERVED)
+#define BM_UARTDBGMIS_OEMIS 0x00000400
+#define BM_UARTDBGMIS_BEMIS 0x00000200
+#define BM_UARTDBGMIS_PEMIS 0x00000100
+#define BM_UARTDBGMIS_FEMIS 0x00000080
+#define BM_UARTDBGMIS_RTMIS 0x00000040
+#define BM_UARTDBGMIS_TXMIS 0x00000020
+#define BM_UARTDBGMIS_RXMIS 0x00000010
+#define BM_UARTDBGMIS_DSRMMIS 0x00000008
+#define BM_UARTDBGMIS_DCDMMIS 0x00000004
+#define BM_UARTDBGMIS_CTSMMIS 0x00000002
+#define BM_UARTDBGMIS_RIMMIS 0x00000001
+#define HW_UARTDBGICR 0x00000044
+#define BP_UARTDBGICR_UNAVAILABLE      16
+#define BM_UARTDBGICR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGICR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGICR_UNAVAILABLE)
+#define BP_UARTDBGICR_RESERVED      11
+#define BM_UARTDBGICR_RESERVED 0x0000F800
+#define BF_UARTDBGICR_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGICR_RESERVED)
+#define BM_UARTDBGICR_OEIC 0x00000400
+#define BM_UARTDBGICR_BEIC 0x00000200
+#define BM_UARTDBGICR_PEIC 0x00000100
+#define BM_UARTDBGICR_FEIC 0x00000080
+#define BM_UARTDBGICR_RTIC 0x00000040
+#define BM_UARTDBGICR_TXIC 0x00000020
+#define BM_UARTDBGICR_RXIC 0x00000010
+#define BM_UARTDBGICR_DSRMIC 0x00000008
+#define BM_UARTDBGICR_DCDMIC 0x00000004
+#define BM_UARTDBGICR_CTSMIC 0x00000002
+#define BM_UARTDBGICR_RIMIC 0x00000001
+#define HW_UARTDBGDMACR 0x00000048
+#define BP_UARTDBGDMACR_UNAVAILABLE      16
+#define BM_UARTDBGDMACR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGDMACR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGDMACR_UNAVAILABLE)
+#define BP_UARTDBGDMACR_RESERVED      3
+#define BM_UARTDBGDMACR_RESERVED 0x0000FFF8
+#define BF_UARTDBGDMACR_RESERVED(v)  \
+	(((v) << 3) & BM_UARTDBGDMACR_RESERVED)
+#define BM_UARTDBGDMACR_DMAONERR 0x00000004
+#define BM_UARTDBGDMACR_TXDMAE 0x00000002
+#define BM_UARTDBGDMACR_RXDMAE 0x00000001
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-usbctl.h b/arch/arm/mach-stmp37xx/include/mach/regs-usbctl.h
new file mode 100644
index 0000000..9145e22
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-usbctl.h
@@ -0,0 +1,22 @@
+/*
+ * stmp37xx: USBCTL register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_USBCTL_BASE	(STMP3XXX_REGS_BASE + 0x80000)
+#define REGS_USBCTL_PHYS	0x80000
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-usbctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-usbctrl.h
new file mode 100644
index 0000000..1a2ae9c
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-usbctrl.h
@@ -0,0 +1,22 @@
+/*
+ * stmp37xx: USBCTRL register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_USBCTRL_BASE	(STMP3XXX_REGS_BASE + 0x80000)
+#define REGS_USBCTRL_PHYS	0x80080000
diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-usbphy.h b/arch/arm/mach-stmp37xx/include/mach/regs-usbphy.h
new file mode 100644
index 0000000..b7fce0f
--- /dev/null
+++ b/arch/arm/mach-stmp37xx/include/mach/regs-usbphy.h
@@ -0,0 +1,37 @@
+/*
+ * stmp37xx: USBPHY register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_USBPHY_BASE	(STMP3XXX_REGS_BASE + 0x7C000)
+
+#define HW_USBPHY_PWD		0x0
+
+#define HW_USBPHY_CTRL		0x30
+#define BM_USBPHY_CTRL_ENHSPRECHARGEXMIT	0x00000001
+#define BP_USBPHY_CTRL_ENHSPRECHARGEXMIT	0
+#define BM_USBPHY_CTRL_ENHOSTDISCONDETECT	0x00000002
+#define BM_USBPHY_CTRL_ENDEVPLUGINDETECT	0x00000010
+#define BM_USBPHY_CTRL_ENOTGIDDETECT	0x00000080
+#define BM_USBPHY_CTRL_ENIRQDEVPLUGIN	0x00000800
+#define BM_USBPHY_CTRL_CLKGATE	0x40000000
+#define BM_USBPHY_CTRL_SFTRST	0x80000000
+
+#define HW_USBPHY_STATUS	0x40
+#define BM_USBPHY_STATUS_DEVPLUGIN_STATUS	0x00000040
+#define BM_USBPHY_STATUS_OTGID_STATUS	0x00000100
-- 
cgit v0.10.2


From a50808b6c4e0e25a72314391d70479447e3ce092 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Thu, 4 Jun 2009 13:49:37 +0100
Subject: [ARM] 5538/1: Freescale STMP: 378n registers definition

Add register definitions for Freescale STMP 378n boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/include/mach/regs-audioin.h b/arch/arm/mach-stmp378x/include/mach/regs-audioin.h
new file mode 100644
index 0000000..641ac61
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-audioin.h
@@ -0,0 +1,63 @@
+/*
+ * stmp378x: AUDIOIN register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_AUDIOIN_BASE	(STMP3XXX_REGS_BASE + 0x4C000)
+#define REGS_AUDIOIN_PHYS	0x8004C000
+#define REGS_AUDIOIN_SIZE	0x2000
+
+#define HW_AUDIOIN_CTRL		0x0
+#define BM_AUDIOIN_CTRL_RUN	0x00000001
+#define BP_AUDIOIN_CTRL_RUN	0
+#define BM_AUDIOIN_CTRL_FIFO_ERROR_IRQ_EN	0x00000002
+#define BM_AUDIOIN_CTRL_FIFO_OVERFLOW_IRQ	0x00000004
+#define BM_AUDIOIN_CTRL_FIFO_UNDERFLOW_IRQ	0x00000008
+#define BM_AUDIOIN_CTRL_WORD_LENGTH	0x00000020
+#define BM_AUDIOIN_CTRL_CLKGATE	0x40000000
+#define BM_AUDIOIN_CTRL_SFTRST	0x80000000
+
+#define HW_AUDIOIN_STAT		0x10
+
+#define HW_AUDIOIN_ADCSRR	0x20
+
+#define HW_AUDIOIN_ADCVOLUME	0x30
+#define BM_AUDIOIN_ADCVOLUME_VOLUME_RIGHT	0x000000FF
+#define BP_AUDIOIN_ADCVOLUME_VOLUME_RIGHT	0
+#define BM_AUDIOIN_ADCVOLUME_VOLUME_LEFT	0x00FF0000
+#define BP_AUDIOIN_ADCVOLUME_VOLUME_LEFT	16
+
+#define HW_AUDIOIN_ADCDEBUG	0x40
+
+#define HW_AUDIOIN_ADCVOL	0x50
+#define BM_AUDIOIN_ADCVOL_GAIN_RIGHT	0x0000000F
+#define BP_AUDIOIN_ADCVOL_GAIN_RIGHT	0
+#define BM_AUDIOIN_ADCVOL_SELECT_RIGHT	0x00000030
+#define BP_AUDIOIN_ADCVOL_SELECT_RIGHT	4
+#define BM_AUDIOIN_ADCVOL_GAIN_LEFT	0x00000F00
+#define BP_AUDIOIN_ADCVOL_GAIN_LEFT	8
+#define BM_AUDIOIN_ADCVOL_SELECT_LEFT	0x00003000
+#define BP_AUDIOIN_ADCVOL_SELECT_LEFT	12
+#define BM_AUDIOIN_ADCVOL_MUTE	0x01000000
+
+#define HW_AUDIOIN_MICLINE	0x60
+
+#define HW_AUDIOIN_ANACLKCTRL	0x70
+#define BM_AUDIOIN_ANACLKCTRL_CLKGATE	0x80000000
+
+#define HW_AUDIOIN_DATA		0x80
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-audioout.h b/arch/arm/mach-stmp378x/include/mach/regs-audioout.h
new file mode 100644
index 0000000..f533e23
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-audioout.h
@@ -0,0 +1,104 @@
+/*
+ * stmp378x: AUDIOOUT register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_AUDIOOUT_BASE	(STMP3XXX_REGS_BASE + 0x48000)
+#define REGS_AUDIOOUT_PHYS	0x80048000
+#define REGS_AUDIOOUT_SIZE	0x2000
+
+#define HW_AUDIOOUT_CTRL	0x0
+#define BM_AUDIOOUT_CTRL_RUN	0x00000001
+#define BP_AUDIOOUT_CTRL_RUN	0
+#define BM_AUDIOOUT_CTRL_FIFO_ERROR_IRQ_EN	0x00000002
+#define BM_AUDIOOUT_CTRL_FIFO_OVERFLOW_IRQ	0x00000004
+#define BM_AUDIOOUT_CTRL_FIFO_UNDERFLOW_IRQ	0x00000008
+#define BM_AUDIOOUT_CTRL_WORD_LENGTH	0x00000040
+#define BM_AUDIOOUT_CTRL_CLKGATE	0x40000000
+#define BM_AUDIOOUT_CTRL_SFTRST	0x80000000
+
+#define HW_AUDIOOUT_STAT	0x10
+
+#define HW_AUDIOOUT_DACSRR	0x20
+#define BM_AUDIOOUT_DACSRR_SRC_FRAC	0x00001FFF
+#define BP_AUDIOOUT_DACSRR_SRC_FRAC	0
+#define BM_AUDIOOUT_DACSRR_SRC_INT	0x001F0000
+#define BP_AUDIOOUT_DACSRR_SRC_INT	16
+#define BM_AUDIOOUT_DACSRR_SRC_HOLD	0x07000000
+#define BP_AUDIOOUT_DACSRR_SRC_HOLD	24
+#define BM_AUDIOOUT_DACSRR_BASEMULT	0x70000000
+#define BP_AUDIOOUT_DACSRR_BASEMULT	28
+
+#define HW_AUDIOOUT_DACVOLUME	0x30
+#define BM_AUDIOOUT_DACVOLUME_MUTE_RIGHT	0x00000100
+#define BM_AUDIOOUT_DACVOLUME_MUTE_LEFT	0x01000000
+#define BM_AUDIOOUT_DACVOLUME_EN_ZCD	0x02000000
+
+#define HW_AUDIOOUT_DACDEBUG	0x40
+
+#define HW_AUDIOOUT_HPVOL	0x50
+#define BM_AUDIOOUT_HPVOL_MUTE	0x01000000
+#define BM_AUDIOOUT_HPVOL_EN_MSTR_ZCD	0x02000000
+
+#define HW_AUDIOOUT_PWRDN	0x70
+#define BM_AUDIOOUT_PWRDN_HEADPHONE	0x00000001
+#define BP_AUDIOOUT_PWRDN_HEADPHONE	0
+#define BM_AUDIOOUT_PWRDN_CAPLESS	0x00000010
+#define BM_AUDIOOUT_PWRDN_ADC	0x00000100
+#define BM_AUDIOOUT_PWRDN_DAC	0x00001000
+#define BM_AUDIOOUT_PWRDN_RIGHT_ADC	0x00010000
+#define BM_AUDIOOUT_PWRDN_SPEAKER	0x01000000
+
+#define HW_AUDIOOUT_REFCTRL	0x80
+#define BM_AUDIOOUT_REFCTRL_VAG_VAL	0x000000F0
+#define BP_AUDIOOUT_REFCTRL_VAG_VAL	4
+#define BM_AUDIOOUT_REFCTRL_ADC_REFVAL	0x00000F00
+#define BP_AUDIOOUT_REFCTRL_ADC_REFVAL	8
+#define BM_AUDIOOUT_REFCTRL_ADJ_VAG	0x00001000
+#define BM_AUDIOOUT_REFCTRL_ADJ_ADC	0x00002000
+#define BM_AUDIOOUT_REFCTRL_BIAS_CTRL	0x00030000
+#define BP_AUDIOOUT_REFCTRL_BIAS_CTRL	16
+#define BM_AUDIOOUT_REFCTRL_LOW_PWR	0x00080000
+#define BM_AUDIOOUT_REFCTRL_VBG_ADJ	0x00700000
+#define BP_AUDIOOUT_REFCTRL_VBG_ADJ	20
+#define BM_AUDIOOUT_REFCTRL_XTAL_BGR_BIAS	0x01000000
+#define BM_AUDIOOUT_REFCTRL_RAISE_REF	0x02000000
+
+#define HW_AUDIOOUT_ANACTRL	0x90
+#define BM_AUDIOOUT_ANACTRL_HP_CLASSAB	0x00000010
+#define BM_AUDIOOUT_ANACTRL_HP_HOLD_GND	0x00000020
+
+#define HW_AUDIOOUT_TEST	0xA0
+#define BM_AUDIOOUT_TEST_HP_I1_ADJ	0x00C00000
+#define BP_AUDIOOUT_TEST_HP_I1_ADJ	22
+
+#define HW_AUDIOOUT_BISTCTRL	0xB0
+
+#define HW_AUDIOOUT_BISTSTAT0	0xC0
+
+#define HW_AUDIOOUT_BISTSTAT1	0xD0
+
+#define HW_AUDIOOUT_ANACLKCTRL	0xE0
+#define BM_AUDIOOUT_ANACLKCTRL_CLKGATE	0x80000000
+
+#define HW_AUDIOOUT_DATA	0xF0
+
+#define HW_AUDIOOUT_SPEAKERCTRL	0x100
+#define BM_AUDIOOUT_SPEAKERCTRL_MUTE	0x01000000
+
+#define HW_AUDIOOUT_VERSION	0x200
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-bch.h b/arch/arm/mach-stmp378x/include/mach/regs-bch.h
new file mode 100644
index 0000000..532d246
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-bch.h
@@ -0,0 +1,56 @@
+/*
+ * stmp378x: BCH register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_BCH_BASE	(STMP3XXX_REGS_BASE + 0xA000)
+#define REGS_BCH_PHYS	0x8000A000
+#define REGS_BCH_SIZE	0x2000
+
+#define HW_BCH_CTRL		0x0
+#define BM_BCH_CTRL_COMPLETE_IRQ	0x00000001
+#define BP_BCH_CTRL_COMPLETE_IRQ	0
+#define BM_BCH_CTRL_COMPLETE_IRQ_EN	0x00000100
+
+#define HW_BCH_STATUS0		0x10
+#define BM_BCH_STATUS0_UNCORRECTABLE	0x00000004
+#define BM_BCH_STATUS0_CORRECTED	0x00000008
+#define BM_BCH_STATUS0_STATUS_BLK0	0x0000FF00
+#define BP_BCH_STATUS0_STATUS_BLK0	8
+#define BM_BCH_STATUS0_COMPLETED_CE	0x000F0000
+#define BP_BCH_STATUS0_COMPLETED_CE	16
+
+#define HW_BCH_LAYOUTSELECT	0x70
+
+#define HW_BCH_FLASH0LAYOUT0	0x80
+#define BM_BCH_FLASH0LAYOUT0_DATA0_SIZE	0x00000FFF
+#define BP_BCH_FLASH0LAYOUT0_DATA0_SIZE	0
+#define BM_BCH_FLASH0LAYOUT0_ECC0	0x0000F000
+#define BP_BCH_FLASH0LAYOUT0_ECC0	12
+#define BM_BCH_FLASH0LAYOUT0_META_SIZE	0x00FF0000
+#define BP_BCH_FLASH0LAYOUT0_META_SIZE	16
+#define BM_BCH_FLASH0LAYOUT0_NBLOCKS	0xFF000000
+#define BP_BCH_FLASH0LAYOUT0_NBLOCKS	24
+#define BM_BCH_FLASH0LAYOUT1_DATAN_SIZE	0x00000FFF
+#define BP_BCH_FLASH0LAYOUT1_DATAN_SIZE	0
+#define BM_BCH_FLASH0LAYOUT1_ECCN	0x0000F000
+#define BP_BCH_FLASH0LAYOUT1_ECCN	12
+#define BM_BCH_FLASH0LAYOUT1_PAGE_SIZE	0xFFFF0000
+#define BP_BCH_FLASH0LAYOUT1_PAGE_SIZE	16
+
+#define HW_BCH_BLOCKNAME	0x150
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-dcp.h b/arch/arm/mach-stmp378x/include/mach/regs-dcp.h
new file mode 100644
index 0000000..fdedd00
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-dcp.h
@@ -0,0 +1,87 @@
+/*
+ * stmp378x: DCP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_DCP_BASE	(STMP3XXX_REGS_BASE + 0x28000)
+#define REGS_DCP_PHYS	0x80028000
+#define REGS_DCP_SIZE	0x2000
+
+#define HW_DCP_CTRL		0x0
+#define BM_DCP_CTRL_CHANNEL_INTERRUPT_ENABLE	0x000000FF
+#define BP_DCP_CTRL_CHANNEL_INTERRUPT_ENABLE	0
+#define BM_DCP_CTRL_ENABLE_CONTEXT_CACHING	0x00400000
+#define BM_DCP_CTRL_GATHER_RESIDUAL_WRITES	0x00800000
+#define BM_DCP_CTRL_CLKGATE	0x40000000
+#define BM_DCP_CTRL_SFTRST	0x80000000
+
+#define HW_DCP_STAT		0x10
+#define BM_DCP_STAT_IRQ		0x0000000F
+#define BP_DCP_STAT_IRQ		0
+
+#define HW_DCP_CHANNELCTRL	0x20
+#define BM_DCP_CHANNELCTRL_ENABLE_CHANNEL	0x000000FF
+#define BP_DCP_CHANNELCTRL_ENABLE_CHANNEL	0
+
+#define HW_DCP_CONTEXT		0x50
+#define BM_DCP_PACKET1_INTERRUPT	0x00000001
+#define BP_DCP_PACKET1_INTERRUPT	0
+#define BM_DCP_PACKET1_DECR_SEMAPHORE	0x00000002
+#define BM_DCP_PACKET1_CHAIN	0x00000004
+#define BM_DCP_PACKET1_CHAIN_CONTIGUOUS	0x00000008
+#define BM_DCP_PACKET1_ENABLE_CIPHER	0x00000020
+#define BM_DCP_PACKET1_ENABLE_HASH	0x00000040
+#define BM_DCP_PACKET1_CIPHER_ENCRYPT	0x00000100
+#define BM_DCP_PACKET1_CIPHER_INIT	0x00000200
+#define BM_DCP_PACKET1_OTP_KEY	0x00000400
+#define BM_DCP_PACKET1_PAYLOAD_KEY	0x00000800
+#define BM_DCP_PACKET1_HASH_INIT	0x00001000
+#define BM_DCP_PACKET1_HASH_TERM	0x00002000
+#define BM_DCP_PACKET2_CIPHER_SELECT	0x0000000F
+#define BP_DCP_PACKET2_CIPHER_SELECT	0
+#define BM_DCP_PACKET2_CIPHER_MODE	0x000000F0
+#define BP_DCP_PACKET2_CIPHER_MODE	4
+#define BM_DCP_PACKET2_KEY_SELECT	0x0000FF00
+#define BP_DCP_PACKET2_KEY_SELECT	8
+#define BM_DCP_PACKET2_HASH_SELECT	0x000F0000
+#define BP_DCP_PACKET2_HASH_SELECT	16
+#define BM_DCP_PACKET2_CIPHER_CFG	0xFF000000
+#define BP_DCP_PACKET2_CIPHER_CFG	24
+
+#define HW_DCP_CH0CMDPTR	(0x100 + 0 * 0x40)
+#define HW_DCP_CH1CMDPTR	(0x100 + 1 * 0x40)
+#define HW_DCP_CH2CMDPTR	(0x100 + 2 * 0x40)
+#define HW_DCP_CH3CMDPTR	(0x100 + 3 * 0x40)
+
+#define HW_DCP_CHnCMDPTR	0x100
+
+#define HW_DCP_CH0SEMA		(0x110 + 0 * 0x40)
+#define HW_DCP_CH1SEMA		(0x110 + 1 * 0x40)
+#define HW_DCP_CH2SEMA		(0x110 + 2 * 0x40)
+#define HW_DCP_CH3SEMA		(0x110 + 3 * 0x40)
+
+#define HW_DCP_CHnSEMA		0x110
+#define BM_DCP_CHnSEMA_INCREMENT	0x000000FF
+#define BP_DCP_CHnSEMA_INCREMENT	0
+
+#define HW_DCP_CH0STAT		(0x120 + 0 * 0x40)
+#define HW_DCP_CH1STAT		(0x120 + 1 * 0x40)
+#define HW_DCP_CH2STAT		(0x120 + 2 * 0x40)
+#define HW_DCP_CH3STAT		(0x120 + 3 * 0x40)
+
+#define HW_DCP_CHnSTAT		0x120
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-digctl.h b/arch/arm/mach-stmp378x/include/mach/regs-digctl.h
new file mode 100644
index 0000000..5293005
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-digctl.h
@@ -0,0 +1,38 @@
+/*
+ * stmp378x: DIGCTL register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_DIGCTL_BASE	(STMP3XXX_REGS_BASE + 0x1C000)
+#define REGS_DIGCTL_PHYS	0x8001C000
+#define REGS_DIGCTL_SIZE	0x2000
+
+#define HW_DIGCTL_CTRL		0x0
+#define BM_DIGCTL_CTRL_USB_CLKGATE	0x00000004
+
+#define HW_DIGCTL_ARMCACHE	0x2B0
+#define BM_DIGCTL_ARMCACHE_ITAG_SS	0x00000003
+#define BP_DIGCTL_ARMCACHE_ITAG_SS	0
+#define BM_DIGCTL_ARMCACHE_DTAG_SS	0x00000030
+#define BP_DIGCTL_ARMCACHE_DTAG_SS	4
+#define BM_DIGCTL_ARMCACHE_CACHE_SS	0x00000300
+#define BP_DIGCTL_ARMCACHE_CACHE_SS	8
+#define BM_DIGCTL_ARMCACHE_DRTY_SS	0x00003000
+#define BP_DIGCTL_ARMCACHE_DRTY_SS	12
+#define BM_DIGCTL_ARMCACHE_VALID_SS	0x00030000
+#define BP_DIGCTL_ARMCACHE_VALID_SS	16
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-dram.h b/arch/arm/mach-stmp378x/include/mach/regs-dram.h
new file mode 100644
index 0000000..0285143
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-dram.h
@@ -0,0 +1,27 @@
+/*
+ * stmp378x: DRAM register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_DRAM_BASE	(STMP3XXX_REGS_BASE + 0xE0000)
+#define REGS_DRAM_PHYS	0x800E0000
+#define REGS_DRAM_SIZE	0x2000
+
+#define HW_DRAM_CTL06		0x18
+
+#define HW_DRAM_CTL08		0x20
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-dri.h b/arch/arm/mach-stmp378x/include/mach/regs-dri.h
new file mode 100644
index 0000000..da25f7e
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-dri.h
@@ -0,0 +1,45 @@
+/*
+ * stmp378x: DRI register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_DRI_BASE	(STMP3XXX_REGS_BASE + 0x74000)
+#define REGS_DRI_PHYS	0x80074000
+#define REGS_DRI_SIZE	0x2000
+
+#define HW_DRI_CTRL		0x0
+#define BM_DRI_CTRL_RUN		0x00000001
+#define BP_DRI_CTRL_RUN		0
+#define BM_DRI_CTRL_ATTENTION_IRQ	0x00000002
+#define BM_DRI_CTRL_PILOT_SYNC_LOSS_IRQ	0x00000004
+#define BM_DRI_CTRL_OVERFLOW_IRQ	0x00000008
+#define BM_DRI_CTRL_ATTENTION_IRQ_EN	0x00000200
+#define BM_DRI_CTRL_PILOT_SYNC_LOSS_IRQ_EN	0x00000400
+#define BM_DRI_CTRL_OVERFLOW_IRQ_EN	0x00000800
+#define BM_DRI_CTRL_REACQUIRE_PHASE	0x00008000
+#define BM_DRI_CTRL_STOP_ON_PILOT_ERROR	0x02000000
+#define BM_DRI_CTRL_STOP_ON_OFLOW_ERROR	0x04000000
+#define BM_DRI_CTRL_ENABLE_INPUTS	0x20000000
+#define BM_DRI_CTRL_CLKGATE	0x40000000
+#define BM_DRI_CTRL_SFTRST	0x80000000
+
+#define HW_DRI_TIMING		0x10
+#define BM_DRI_TIMING_GAP_DETECTION_INTERVAL	0x000000FF
+#define BP_DRI_TIMING_GAP_DETECTION_INTERVAL	0
+#define BM_DRI_TIMING_PILOT_REP_RATE	0x000F0000
+#define BP_DRI_TIMING_PILOT_REP_RATE	16
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-ecc8.h b/arch/arm/mach-stmp378x/include/mach/regs-ecc8.h
new file mode 100644
index 0000000..cc353be
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-ecc8.h
@@ -0,0 +1,39 @@
+/*
+ * stmp378x: ECC8 register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_ECC8_BASE	(STMP3XXX_REGS_BASE + 0x8000)
+#define REGS_ECC8_PHYS	0x80008000
+#define REGS_ECC8_SIZE	0x2000
+
+#define HW_ECC8_CTRL		0x0
+#define BM_ECC8_CTRL_COMPLETE_IRQ	0x00000001
+#define BP_ECC8_CTRL_COMPLETE_IRQ	0
+#define BM_ECC8_CTRL_COMPLETE_IRQ_EN	0x00000100
+#define BM_ECC8_CTRL_AHBM_SFTRST	0x20000000
+
+#define HW_ECC8_STATUS0		0x10
+#define BM_ECC8_STATUS0_UNCORRECTABLE	0x00000004
+#define BM_ECC8_STATUS0_CORRECTED	0x00000008
+#define BM_ECC8_STATUS0_STATUS_AUX	0x00000F00
+#define BP_ECC8_STATUS0_STATUS_AUX	8
+#define BM_ECC8_STATUS0_COMPLETED_CE	0x000F0000
+#define BP_ECC8_STATUS0_COMPLETED_CE	16
+
+#define HW_ECC8_STATUS1		0x20
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-emi.h b/arch/arm/mach-stmp378x/include/mach/regs-emi.h
new file mode 100644
index 0000000..98773fc
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-emi.h
@@ -0,0 +1,25 @@
+/*
+ * stmp378x: EMI register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_EMI_BASE	(STMP3XXX_REGS_BASE + 0x20000)
+#define REGS_EMI_PHYS	0x80020000
+#define REGS_EMI_SIZE	0x2000
+
+#define HW_EMI_STAT		0x10
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-gpmi.h b/arch/arm/mach-stmp378x/include/mach/regs-gpmi.h
new file mode 100644
index 0000000..2cc8bbe
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-gpmi.h
@@ -0,0 +1,78 @@
+/*
+ * stmp378x: GPMI register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_GPMI_BASE	(STMP3XXX_REGS_BASE + 0xC000)
+#define REGS_GPMI_PHYS	0x8000C000
+#define REGS_GPMI_SIZE	0x2000
+
+#define HW_GPMI_CTRL0		0x0
+#define BM_GPMI_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_GPMI_CTRL0_XFER_COUNT	0
+#define BM_GPMI_CTRL0_CS	0x00300000
+#define BP_GPMI_CTRL0_CS	20
+#define BM_GPMI_CTRL0_LOCK_CS	0x00400000
+#define BM_GPMI_CTRL0_WORD_LENGTH	0x00800000
+#define BM_GPMI_CTRL0_ADDRESS      0x000E0000
+#define BP_GPMI_CTRL0_ADDRESS      17
+#define BV_GPMI_CTRL0_ADDRESS__NAND_DATA  0x0
+#define BV_GPMI_CTRL0_ADDRESS__NAND_CLE   0x1
+#define BV_GPMI_CTRL0_ADDRESS__NAND_ALE   0x2
+#define BM_GPMI_CTRL0_ADDRESS_INCREMENT      0x00010000
+#define BM_GPMI_CTRL0_COMMAND_MODE	0x03000000
+#define BP_GPMI_CTRL0_COMMAND_MODE	24
+#define BV_GPMI_CTRL0_COMMAND_MODE__WRITE	    0x0
+#define BV_GPMI_CTRL0_COMMAND_MODE__READ	     0x1
+#define BV_GPMI_CTRL0_COMMAND_MODE__READ_AND_COMPARE 0x2
+#define BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY   0x3
+#define BM_GPMI_CTRL0_RUN	0x20000000
+#define BM_GPMI_CTRL0_CLKGATE	0x40000000
+#define BM_GPMI_CTRL0_SFTRST	0x80000000
+#define BM_GPMI_ECCCTRL_BUFFER_MASK	0x000001FF
+#define BP_GPMI_ECCCTRL_BUFFER_MASK	0
+#define BM_GPMI_ECCCTRL_ENABLE_ECC	0x00001000
+#define BM_GPMI_ECCCTRL_ECC_CMD	0x00006000
+#define BP_GPMI_ECCCTRL_ECC_CMD	13
+#define BV_GPMI_ECCCTRL_ECC_CMD__DECODE_4_BIT			0
+#define BV_GPMI_ECCCTRL_ECC_CMD__ENCODE_4_BIT			1
+#define BV_GPMI_ECCCTRL_ECC_CMD__DECODE_8_BIT			2
+#define BV_GPMI_ECCCTRL_ECC_CMD__ENCODE_8_BIT			3
+
+#define HW_GPMI_CTRL1		0x60
+#define BM_GPMI_CTRL1_GPMI_MODE	0x00000001
+#define BP_GPMI_CTRL1_GPMI_MODE	0
+#define BM_GPMI_CTRL1_ATA_IRQRDY_POLARITY	0x00000004
+#define BM_GPMI_CTRL1_DEV_RESET	0x00000008
+#define BM_GPMI_CTRL1_TIMEOUT_IRQ	0x00000200
+#define BM_GPMI_CTRL1_DEV_IRQ	0x00000400
+#define BM_GPMI_CTRL1_RDN_DELAY	0x0000F000
+#define BP_GPMI_CTRL1_RDN_DELAY	12
+#define BM_GPMI_CTRL1_BCH_MODE	0x00040000
+
+#define HW_GPMI_TIMING0		0x70
+#define BM_GPMI_TIMING0_DATA_SETUP	0x000000FF
+#define BP_GPMI_TIMING0_DATA_SETUP	0
+#define BM_GPMI_TIMING0_DATA_HOLD	0x0000FF00
+#define BP_GPMI_TIMING0_DATA_HOLD	8
+#define BM_GPMI_TIMING0_ADDRESS_SETUP	0x00FF0000
+#define BP_GPMI_TIMING0_ADDRESS_SETUP	16
+
+#define HW_GPMI_TIMING1		0x80
+#define BM_GPMI_TIMING1_DEVICE_BUSY_TIMEOUT	0xFFFF0000
+#define BP_GPMI_TIMING1_DEVICE_BUSY_TIMEOUT	16
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-i2c.h b/arch/arm/mach-stmp378x/include/mach/regs-i2c.h
new file mode 100644
index 0000000..13a234c
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-i2c.h
@@ -0,0 +1,55 @@
+/*
+ * stmp378x: I2C register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_I2C_BASE	(STMP3XXX_REGS_BASE + 0x58000)
+#define REGS_I2C_PHYS	0x80058000
+#define REGS_I2C_SIZE	0x2000
+
+#define HW_I2C_CTRL0		0x0
+#define BM_I2C_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_I2C_CTRL0_XFER_COUNT	0
+#define BM_I2C_CTRL0_DIRECTION	0x00010000
+#define BM_I2C_CTRL0_MASTER_MODE	0x00020000
+#define BM_I2C_CTRL0_PRE_SEND_START	0x00080000
+#define BM_I2C_CTRL0_POST_SEND_STOP	0x00100000
+#define BM_I2C_CTRL0_RETAIN_CLOCK	0x00200000
+#define BM_I2C_CTRL0_SEND_NAK_ON_LAST	0x02000000
+#define BM_I2C_CTRL0_CLKGATE	0x40000000
+#define BM_I2C_CTRL0_SFTRST	0x80000000
+
+#define HW_I2C_TIMING0		0x10
+
+#define HW_I2C_TIMING1		0x20
+
+#define HW_I2C_TIMING2		0x30
+
+#define HW_I2C_CTRL1		0x40
+#define BM_I2C_CTRL1_SLAVE_IRQ	0x00000001
+#define BP_I2C_CTRL1_SLAVE_IRQ	0
+#define BM_I2C_CTRL1_SLAVE_STOP_IRQ	0x00000002
+#define BM_I2C_CTRL1_MASTER_LOSS_IRQ	0x00000004
+#define BM_I2C_CTRL1_EARLY_TERM_IRQ	0x00000008
+#define BM_I2C_CTRL1_OVERSIZE_XFER_TERM_IRQ	0x00000010
+#define BM_I2C_CTRL1_NO_SLAVE_ACK_IRQ	0x00000020
+#define BM_I2C_CTRL1_DATA_ENGINE_CMPLT_IRQ	0x00000040
+#define BM_I2C_CTRL1_BUS_FREE_IRQ	0x00000080
+#define BM_I2C_CTRL1_CLR_GOT_A_NAK	0x10000000
+
+#define HW_I2C_VERSION		0x90
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-ir.h b/arch/arm/mach-stmp378x/include/mach/regs-ir.h
new file mode 100644
index 0000000..a5b4ef1
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-ir.h
@@ -0,0 +1,23 @@
+/*
+ * stmp378x: IR register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_IR_BASE	(STMP3XXX_REGS_BASE + 0x78000)
+#define REGS_IR_PHYS	0x80078000
+#define REGS_IR_SIZE	0x2000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-lcdif.h b/arch/arm/mach-stmp378x/include/mach/regs-lcdif.h
new file mode 100644
index 0000000..9cdbef4
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-lcdif.h
@@ -0,0 +1,195 @@
+/*
+ * stmp378x: LCDIF register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_LCDIF_BASE	(STMP3XXX_REGS_BASE + 0x30000)
+#define REGS_LCDIF_PHYS	0x80030000
+#define REGS_LCDIF_SIZE	0x2000
+
+#define HW_LCDIF_CTRL		0x0
+#define BM_LCDIF_CTRL_RUN	0x00000001
+#define BP_LCDIF_CTRL_RUN	0
+#define BM_LCDIF_CTRL_LCDIF_MASTER	0x00000020
+#define BM_LCDIF_CTRL_RGB_TO_YCBCR422_CSC	0x00000080
+#define BM_LCDIF_CTRL_WORD_LENGTH	0x00000300
+#define BP_LCDIF_CTRL_WORD_LENGTH	8
+#define BM_LCDIF_CTRL_LCD_DATABUS_WIDTH	0x00000C00
+#define BP_LCDIF_CTRL_LCD_DATABUS_WIDTH	10
+#define BM_LCDIF_CTRL_INPUT_DATA_SWIZZLE	0x0000C000
+#define BP_LCDIF_CTRL_INPUT_DATA_SWIZZLE	14
+#define BM_LCDIF_CTRL_DATA_SELECT	0x00010000
+#define BM_LCDIF_CTRL_DOTCLK_MODE	0x00020000
+#define BM_LCDIF_CTRL_VSYNC_MODE	0x00040000
+#define BM_LCDIF_CTRL_BYPASS_COUNT	0x00080000
+#define BM_LCDIF_CTRL_DVI_MODE	0x00100000
+#define BM_LCDIF_CTRL_SHIFT_NUM_BITS	0x03E00000
+#define BP_LCDIF_CTRL_SHIFT_NUM_BITS	21
+#define BM_LCDIF_CTRL_DATA_SHIFT_DIR	0x04000000
+#define BM_LCDIF_CTRL_WAIT_FOR_VSYNC_EDGE	0x08000000
+#define BM_LCDIF_CTRL_CLKGATE	0x40000000
+#define BM_LCDIF_CTRL_SFTRST	0x80000000
+
+#define HW_LCDIF_CTRL1		0x10
+#define BM_LCDIF_CTRL1_RESET	0x00000001
+#define BP_LCDIF_CTRL1_RESET	0
+#define BM_LCDIF_CTRL1_MODE86	0x00000002
+#define BM_LCDIF_CTRL1_BUSY_ENABLE	0x00000004
+#define BM_LCDIF_CTRL1_VSYNC_EDGE_IRQ	0x00000100
+#define BM_LCDIF_CTRL1_CUR_FRAME_DONE_IRQ	0x00000200
+#define BM_LCDIF_CTRL1_UNDERFLOW_IRQ	0x00000400
+#define BM_LCDIF_CTRL1_OVERFLOW_IRQ	0x00000800
+#define BM_LCDIF_CTRL1_VSYNC_EDGE_IRQ_EN	0x00001000
+#define BM_LCDIF_CTRL1_BYTE_PACKING_FORMAT	0x000F0000
+#define BP_LCDIF_CTRL1_BYTE_PACKING_FORMAT	16
+#define BM_LCDIF_CTRL1_INTERLACE_FIELDS	0x00800000
+#define BM_LCDIF_CTRL1_RECOVER_ON_UNDERFLOW	0x01000000
+
+#define HW_LCDIF_TRANSFER_COUNT	0x20
+#define BM_LCDIF_TRANSFER_COUNT_H_COUNT	0x0000FFFF
+#define BP_LCDIF_TRANSFER_COUNT_H_COUNT	0
+#define BM_LCDIF_TRANSFER_COUNT_V_COUNT	0xFFFF0000
+#define BP_LCDIF_TRANSFER_COUNT_V_COUNT	16
+
+#define HW_LCDIF_CUR_BUF	0x30
+
+#define HW_LCDIF_NEXT_BUF	0x40
+
+#define HW_LCDIF_TIMING		0x60
+
+#define HW_LCDIF_VDCTRL0	0x70
+#define BM_LCDIF_VDCTRL0_VSYNC_PULSE_WIDTH	0x0003FFFF
+#define BP_LCDIF_VDCTRL0_VSYNC_PULSE_WIDTH	0
+#define BM_LCDIF_VDCTRL0_VSYNC_PULSE_WIDTH_UNIT	0x00100000
+#define BM_LCDIF_VDCTRL0_VSYNC_PERIOD_UNIT	0x00200000
+#define BM_LCDIF_VDCTRL0_ENABLE_POL	0x01000000
+#define BM_LCDIF_VDCTRL0_DOTCLK_POL	0x02000000
+#define BM_LCDIF_VDCTRL0_HSYNC_POL	0x04000000
+#define BM_LCDIF_VDCTRL0_VSYNC_POL	0x08000000
+#define BM_LCDIF_VDCTRL0_ENABLE_PRESENT	0x10000000
+#define BM_LCDIF_VDCTRL0_VSYNC_OEB	0x20000000
+
+#define HW_LCDIF_VDCTRL1	0x80
+#define BM_LCDIF_VDCTRL1_VSYNC_PERIOD	0xFFFFFFFF
+#define BP_LCDIF_VDCTRL1_VSYNC_PERIOD	0
+
+#define HW_LCDIF_VDCTRL2	0x90
+#define BM_LCDIF_VDCTRL2_HSYNC_PERIOD	0x0003FFFF
+#define BP_LCDIF_VDCTRL2_HSYNC_PERIOD	0
+#define BM_LCDIF_VDCTRL2_HSYNC_PULSE_WIDTH	0xFF000000
+#define BP_LCDIF_VDCTRL2_HSYNC_PULSE_WIDTH	24
+
+#define HW_LCDIF_VDCTRL3	0xA0
+#define BM_LCDIF_VDCTRL3_VERTICAL_WAIT_CNT	0x0000FFFF
+#define BP_LCDIF_VDCTRL3_VERTICAL_WAIT_CNT	0
+#define BM_LCDIF_VDCTRL3_HORIZONTAL_WAIT_CNT	0x0FFF0000
+#define BP_LCDIF_VDCTRL3_HORIZONTAL_WAIT_CNT	16
+
+#define HW_LCDIF_VDCTRL4	0xB0
+#define BM_LCDIF_VDCTRL4_DOTCLK_H_VALID_DATA_CNT	0x0003FFFF
+#define BP_LCDIF_VDCTRL4_DOTCLK_H_VALID_DATA_CNT	0
+#define BM_LCDIF_VDCTRL4_SYNC_SIGNALS_ON	0x00040000
+
+#define HW_LCDIF_DVICTRL0	0xC0
+#define BM_LCDIF_DVICTRL0_V_LINES_CNT	0x000003FF
+#define BP_LCDIF_DVICTRL0_V_LINES_CNT	0
+#define BM_LCDIF_DVICTRL0_H_BLANKING_CNT	0x000FFC00
+#define BP_LCDIF_DVICTRL0_H_BLANKING_CNT	10
+#define BM_LCDIF_DVICTRL0_H_ACTIVE_CNT	0x7FF00000
+#define BP_LCDIF_DVICTRL0_H_ACTIVE_CNT	20
+
+#define HW_LCDIF_DVICTRL1	0xD0
+#define BM_LCDIF_DVICTRL1_F2_START_LINE	0x000003FF
+#define BP_LCDIF_DVICTRL1_F2_START_LINE	0
+#define BM_LCDIF_DVICTRL1_F1_END_LINE	0x000FFC00
+#define BP_LCDIF_DVICTRL1_F1_END_LINE	10
+#define BM_LCDIF_DVICTRL1_F1_START_LINE	0x3FF00000
+#define BP_LCDIF_DVICTRL1_F1_START_LINE	20
+
+#define HW_LCDIF_DVICTRL2	0xE0
+#define BM_LCDIF_DVICTRL2_V1_BLANK_END_LINE	0x000003FF
+#define BP_LCDIF_DVICTRL2_V1_BLANK_END_LINE	0
+#define BM_LCDIF_DVICTRL2_V1_BLANK_START_LINE	0x000FFC00
+#define BP_LCDIF_DVICTRL2_V1_BLANK_START_LINE	10
+#define BM_LCDIF_DVICTRL2_F2_END_LINE	0x3FF00000
+#define BP_LCDIF_DVICTRL2_F2_END_LINE	20
+
+#define HW_LCDIF_DVICTRL3	0xF0
+#define BM_LCDIF_DVICTRL3_V2_BLANK_END_LINE	0x000003FF
+#define BP_LCDIF_DVICTRL3_V2_BLANK_END_LINE	0
+#define BM_LCDIF_DVICTRL3_V2_BLANK_START_LINE	0x03FF0000
+#define BP_LCDIF_DVICTRL3_V2_BLANK_START_LINE	16
+
+#define HW_LCDIF_DVICTRL4	0x100
+#define BM_LCDIF_DVICTRL4_H_FILL_CNT	0x000000FF
+#define BP_LCDIF_DVICTRL4_H_FILL_CNT	0
+#define BM_LCDIF_DVICTRL4_CR_FILL_VALUE	0x0000FF00
+#define BP_LCDIF_DVICTRL4_CR_FILL_VALUE	8
+#define BM_LCDIF_DVICTRL4_CB_FILL_VALUE	0x00FF0000
+#define BP_LCDIF_DVICTRL4_CB_FILL_VALUE	16
+#define BM_LCDIF_DVICTRL4_Y_FILL_VALUE	0xFF000000
+#define BP_LCDIF_DVICTRL4_Y_FILL_VALUE	24
+
+#define HW_LCDIF_CSC_COEFF0	0x110
+#define BM_LCDIF_CSC_COEFF0_CSC_SUBSAMPLE_FILTER	0x00000003
+#define BP_LCDIF_CSC_COEFF0_CSC_SUBSAMPLE_FILTER	0
+#define BM_LCDIF_CSC_COEFF0_C0	0x03FF0000
+#define BP_LCDIF_CSC_COEFF0_C0	16
+
+#define HW_LCDIF_CSC_COEFF1	0x120
+#define BM_LCDIF_CSC_COEFF1_C1	0x000003FF
+#define BP_LCDIF_CSC_COEFF1_C1	0
+#define BM_LCDIF_CSC_COEFF1_C2	0x03FF0000
+#define BP_LCDIF_CSC_COEFF1_C2	16
+
+#define HW_LCDIF_CSC_COEFF2	0x130
+#define BM_LCDIF_CSC_COEFF2_C3	0x000003FF
+#define BP_LCDIF_CSC_COEFF2_C3	0
+#define BM_LCDIF_CSC_COEFF2_C4	0x03FF0000
+#define BP_LCDIF_CSC_COEFF2_C4	16
+
+#define HW_LCDIF_CSC_COEFF3	0x140
+#define BM_LCDIF_CSC_COEFF3_C5	0x000003FF
+#define BP_LCDIF_CSC_COEFF3_C5	0
+#define BM_LCDIF_CSC_COEFF3_C6	0x03FF0000
+#define BP_LCDIF_CSC_COEFF3_C6	16
+
+#define HW_LCDIF_CSC_COEFF4	0x150
+#define BM_LCDIF_CSC_COEFF4_C7	0x000003FF
+#define BP_LCDIF_CSC_COEFF4_C7	0
+#define BM_LCDIF_CSC_COEFF4_C8	0x03FF0000
+#define BP_LCDIF_CSC_COEFF4_C8	16
+
+#define HW_LCDIF_CSC_OFFSET	0x160
+#define BM_LCDIF_CSC_OFFSET_Y_OFFSET	0x000001FF
+#define BP_LCDIF_CSC_OFFSET_Y_OFFSET	0
+#define BM_LCDIF_CSC_OFFSET_CBCR_OFFSET	0x01FF0000
+#define BP_LCDIF_CSC_OFFSET_CBCR_OFFSET	16
+
+#define HW_LCDIF_CSC_LIMIT	0x170
+#define BM_LCDIF_CSC_LIMIT_Y_MAX	0x000000FF
+#define BP_LCDIF_CSC_LIMIT_Y_MAX	0
+#define BM_LCDIF_CSC_LIMIT_Y_MIN	0x0000FF00
+#define BP_LCDIF_CSC_LIMIT_Y_MIN	8
+#define BM_LCDIF_CSC_LIMIT_CBCR_MAX	0x00FF0000
+#define BP_LCDIF_CSC_LIMIT_CBCR_MAX	16
+#define BM_LCDIF_CSC_LIMIT_CBCR_MIN	0xFF000000
+#define BP_LCDIF_CSC_LIMIT_CBCR_MIN	24
+
+#define HW_LCDIF_STAT		0x1D0
+#define BM_LCDIF_STAT_TXFIFO_EMPTY	0x04000000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-lradc.h b/arch/arm/mach-stmp378x/include/mach/regs-lradc.h
new file mode 100644
index 0000000..cb8cb06
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-lradc.h
@@ -0,0 +1,99 @@
+/*
+ * stmp378x: LRADC register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_LRADC_BASE	(STMP3XXX_REGS_BASE + 0x50000)
+#define REGS_LRADC_PHYS	0x80050000
+#define REGS_LRADC_SIZE	0x2000
+
+#define HW_LRADC_CTRL0		0x0
+#define BM_LRADC_CTRL0_SCHEDULE	0x000000FF
+#define BP_LRADC_CTRL0_SCHEDULE	0
+#define BM_LRADC_CTRL0_XPLUS_ENABLE	0x00010000
+#define BM_LRADC_CTRL0_YPLUS_ENABLE	0x00020000
+#define BM_LRADC_CTRL0_XMINUS_ENABLE	0x00040000
+#define BM_LRADC_CTRL0_YMINUS_ENABLE	0x00080000
+#define BM_LRADC_CTRL0_TOUCH_DETECT_ENABLE	0x00100000
+#define BM_LRADC_CTRL0_ONCHIP_GROUNDREF	0x00200000
+#define BM_LRADC_CTRL0_CLKGATE	0x40000000
+#define BM_LRADC_CTRL0_SFTRST	0x80000000
+
+#define HW_LRADC_CTRL1		0x10
+#define BM_LRADC_CTRL1_LRADC0_IRQ	0x00000001
+#define BP_LRADC_CTRL1_LRADC0_IRQ	0
+#define BM_LRADC_CTRL1_LRADC5_IRQ	0x00000020
+#define BM_LRADC_CTRL1_LRADC6_IRQ	0x00000040
+#define BM_LRADC_CTRL1_TOUCH_DETECT_IRQ	0x00000100
+#define BM_LRADC_CTRL1_LRADC0_IRQ_EN	0x00010000
+#define BM_LRADC_CTRL1_LRADC5_IRQ_EN	0x00200000
+#define BM_LRADC_CTRL1_TOUCH_DETECT_IRQ_EN	0x01000000
+
+#define HW_LRADC_CTRL2		0x20
+#define BM_LRADC_CTRL2_BL_BRIGHTNESS	0x001F0000
+#define BP_LRADC_CTRL2_BL_BRIGHTNESS	16
+#define BM_LRADC_CTRL2_BL_MUX_SELECT	0x00200000
+#define BM_LRADC_CTRL2_BL_ENABLE	0x00400000
+#define BM_LRADC_CTRL2_DIVIDE_BY_TWO	0xFF000000
+#define BP_LRADC_CTRL2_DIVIDE_BY_TWO	24
+
+#define HW_LRADC_CTRL3		0x30
+#define BM_LRADC_CTRL3_CYCLE_TIME	0x00000300
+#define BP_LRADC_CTRL3_CYCLE_TIME	8
+
+#define HW_LRADC_STATUS		0x40
+#define BM_LRADC_STATUS_TOUCH_DETECT_RAW	0x00000001
+#define BP_LRADC_STATUS_TOUCH_DETECT_RAW	0
+
+#define HW_LRADC_CH0		(0x50 + 0 * 0x10)
+#define HW_LRADC_CH1		(0x50 + 1 * 0x10)
+#define HW_LRADC_CH2		(0x50 + 2 * 0x10)
+#define HW_LRADC_CH3		(0x50 + 3 * 0x10)
+#define HW_LRADC_CH4		(0x50 + 4 * 0x10)
+#define HW_LRADC_CH5		(0x50 + 5 * 0x10)
+#define HW_LRADC_CH6		(0x50 + 6 * 0x10)
+#define HW_LRADC_CH7		(0x50 + 7 * 0x10)
+
+#define HW_LRADC_CHn		0x50
+#define BM_LRADC_CHn_VALUE	0x0003FFFF
+#define BP_LRADC_CHn_VALUE	0
+#define BM_LRADC_CHn_NUM_SAMPLES	0x1F000000
+#define BP_LRADC_CHn_NUM_SAMPLES	24
+#define BM_LRADC_CHn_ACCUMULATE	0x20000000
+
+#define HW_LRADC_DELAY0		(0xD0 + 0 * 0x10)
+#define HW_LRADC_DELAY1		(0xD0 + 1 * 0x10)
+#define HW_LRADC_DELAY2		(0xD0 + 2 * 0x10)
+#define HW_LRADC_DELAY3		(0xD0 + 3 * 0x10)
+
+#define HW_LRADC_DELAYn		0xD0
+#define BM_LRADC_DELAYn_DELAY	0x000007FF
+#define BP_LRADC_DELAYn_DELAY	0
+#define BM_LRADC_DELAYn_LOOP_COUNT	0x0000F800
+#define BP_LRADC_DELAYn_LOOP_COUNT	11
+#define BM_LRADC_DELAYn_TRIGGER_DELAYS	0x000F0000
+#define BP_LRADC_DELAYn_TRIGGER_DELAYS	16
+#define BM_LRADC_DELAYn_KICK	0x00100000
+#define BM_LRADC_DELAYn_TRIGGER_LRADCS	0xFF000000
+#define BP_LRADC_DELAYn_TRIGGER_LRADCS	24
+
+#define HW_LRADC_CTRL4		0x140
+#define BM_LRADC_CTRL4_LRADC6SELECT	0x0F000000
+#define BP_LRADC_CTRL4_LRADC6SELECT	24
+#define BM_LRADC_CTRL4_LRADC7SELECT	0xF0000000
+#define BP_LRADC_CTRL4_LRADC7SELECT	28
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-ocotp.h b/arch/arm/mach-stmp378x/include/mach/regs-ocotp.h
new file mode 100644
index 0000000..f0af64d
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-ocotp.h
@@ -0,0 +1,40 @@
+/*
+ * stmp378x: OCOTP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_OCOTP_BASE	(STMP3XXX_REGS_BASE + 0x2C000)
+#define REGS_OCOTP_PHYS	0x8002C000
+#define REGS_OCOTP_SIZE	0x2000
+
+#define HW_OCOTP_CTRL		0x0
+#define BM_OCOTP_CTRL_BUSY	0x00000100
+#define BM_OCOTP_CTRL_ERROR	0x00000200
+#define BM_OCOTP_CTRL_RD_BANK_OPEN	0x00001000
+#define BM_OCOTP_CTRL_RELOAD_SHADOWS	0x00002000
+#define BM_OCOTP_CTRL_WR_UNLOCK	0xFFFF0000
+#define BP_OCOTP_CTRL_WR_UNLOCK	16
+
+#define HW_OCOTP_DATA		0x10
+
+#define HW_OCOTP_CUST0		(0x20 + 0 * 0x10)
+#define HW_OCOTP_CUST1		(0x20 + 1 * 0x10)
+#define HW_OCOTP_CUST2		(0x20 + 2 * 0x10)
+#define HW_OCOTP_CUST3		(0x20 + 3 * 0x10)
+
+#define HW_OCOTP_CUSTn		0x20
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-pwm.h b/arch/arm/mach-stmp378x/include/mach/regs-pwm.h
new file mode 100644
index 0000000..0d0f9e5
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-pwm.h
@@ -0,0 +1,53 @@
+/*
+ * stmp378x: PWM register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_PWM_BASE	(STMP3XXX_REGS_BASE + 0x64000)
+#define REGS_PWM_PHYS	0x80064000
+#define REGS_PWM_SIZE	0x2000
+
+#define HW_PWM_CTRL		0x0
+#define BM_PWM_CTRL_PWM2_ENABLE	0x00000004
+#define BM_PWM_CTRL_PWM2_ANA_CTRL_ENABLE	0x00000020
+
+#define HW_PWM_ACTIVE0		(0x10 + 0 * 0x20)
+#define HW_PWM_ACTIVE1		(0x10 + 1 * 0x20)
+#define HW_PWM_ACTIVE2		(0x10 + 2 * 0x20)
+#define HW_PWM_ACTIVE3		(0x10 + 3 * 0x20)
+
+#define HW_PWM_ACTIVEn		0x10
+#define BM_PWM_ACTIVEn_ACTIVE	0x0000FFFF
+#define BP_PWM_ACTIVEn_ACTIVE	0
+#define BM_PWM_ACTIVEn_INACTIVE	0xFFFF0000
+#define BP_PWM_ACTIVEn_INACTIVE	16
+
+#define HW_PWM_PERIOD0		(0x20 + 0 * 0x20)
+#define HW_PWM_PERIOD1		(0x20 + 1 * 0x20)
+#define HW_PWM_PERIOD2		(0x20 + 2 * 0x20)
+#define HW_PWM_PERIOD3		(0x20 + 3 * 0x20)
+
+#define HW_PWM_PERIODn		0x20
+#define BM_PWM_PERIODn_PERIOD	0x0000FFFF
+#define BP_PWM_PERIODn_PERIOD	0
+#define BM_PWM_PERIODn_ACTIVE_STATE	0x00030000
+#define BP_PWM_PERIODn_ACTIVE_STATE	16
+#define BM_PWM_PERIODn_INACTIVE_STATE	0x000C0000
+#define BP_PWM_PERIODn_INACTIVE_STATE	18
+#define BM_PWM_PERIODn_CDIV	0x00700000
+#define BP_PWM_PERIODn_CDIV	20
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-pxp.h b/arch/arm/mach-stmp378x/include/mach/regs-pxp.h
new file mode 100644
index 0000000..54d2978
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-pxp.h
@@ -0,0 +1,140 @@
+/*
+ * stmp378x: PXP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_PXP_BASE	(STMP3XXX_REGS_BASE + 0x2A000)
+#define REGS_PXP_PHYS	0x8002A000
+#define REGS_PXP_SIZE	0x2000
+
+#define HW_PXP_CTRL		0x0
+#define BM_PXP_CTRL_ENABLE	0x00000001
+#define BP_PXP_CTRL_ENABLE	0
+#define BM_PXP_CTRL_IRQ_ENABLE	0x00000002
+#define BM_PXP_CTRL_OUTPUT_RGB_FORMAT	0x000000F0
+#define BP_PXP_CTRL_OUTPUT_RGB_FORMAT	4
+#define BM_PXP_CTRL_ROTATE	0x00000300
+#define BP_PXP_CTRL_ROTATE	8
+#define BM_PXP_CTRL_HFLIP	0x00000400
+#define BM_PXP_CTRL_VFLIP	0x00000800
+#define BM_PXP_CTRL_S0_FORMAT	0x0000F000
+#define BP_PXP_CTRL_S0_FORMAT	12
+#define BM_PXP_CTRL_SCALE	0x00040000
+#define BM_PXP_CTRL_CROP	0x00080000
+
+#define HW_PXP_STAT		0x10
+#define BM_PXP_STAT_IRQ		0x00000001
+#define BP_PXP_STAT_IRQ		0
+
+#define HW_PXP_RGBBUF		0x20
+
+#define HW_PXP_RGBSIZE		0x40
+#define BM_PXP_RGBSIZE_HEIGHT	0x00000FFF
+#define BP_PXP_RGBSIZE_HEIGHT	0
+#define BM_PXP_RGBSIZE_WIDTH	0x00FFF000
+#define BP_PXP_RGBSIZE_WIDTH	12
+
+#define HW_PXP_S0BUF		0x50
+
+#define HW_PXP_S0UBUF		0x60
+
+#define HW_PXP_S0VBUF		0x70
+
+#define HW_PXP_S0PARAM		0x80
+#define BM_PXP_S0PARAM_HEIGHT	0x000000FF
+#define BP_PXP_S0PARAM_HEIGHT	0
+#define BM_PXP_S0PARAM_WIDTH	0x0000FF00
+#define BP_PXP_S0PARAM_WIDTH	8
+#define BM_PXP_S0PARAM_YBASE	0x00FF0000
+#define BP_PXP_S0PARAM_YBASE	16
+#define BM_PXP_S0PARAM_XBASE	0xFF000000
+#define BP_PXP_S0PARAM_XBASE	24
+
+#define HW_PXP_S0BACKGROUND	0x90
+
+#define HW_PXP_S0CROP		0xA0
+#define BM_PXP_S0CROP_HEIGHT	0x000000FF
+#define BP_PXP_S0CROP_HEIGHT	0
+#define BM_PXP_S0CROP_WIDTH	0x0000FF00
+#define BP_PXP_S0CROP_WIDTH	8
+#define BM_PXP_S0CROP_YBASE	0x00FF0000
+#define BP_PXP_S0CROP_YBASE	16
+#define BM_PXP_S0CROP_XBASE	0xFF000000
+#define BP_PXP_S0CROP_XBASE	24
+
+#define HW_PXP_S0SCALE		0xB0
+#define BM_PXP_S0SCALE_XSCALE	0x00003FFF
+#define BP_PXP_S0SCALE_XSCALE	0
+#define BM_PXP_S0SCALE_YSCALE	0x3FFF0000
+#define BP_PXP_S0SCALE_YSCALE	16
+
+#define HW_PXP_CSCCOEFF0	0xD0
+
+#define HW_PXP_CSCCOEFF1	0xE0
+
+#define HW_PXP_CSCCOEFF2	0xF0
+
+#define HW_PXP_S0COLORKEYLOW	0x180
+
+#define HW_PXP_S0COLORKEYHIGH	0x190
+
+#define HW_PXP_OL0		(0x200 + 0 * 0x40)
+#define HW_PXP_OL1		(0x200 + 1 * 0x40)
+#define HW_PXP_OL2		(0x200 + 2 * 0x40)
+#define HW_PXP_OL3		(0x200 + 3 * 0x40)
+#define HW_PXP_OL4		(0x200 + 4 * 0x40)
+#define HW_PXP_OL5		(0x200 + 5 * 0x40)
+#define HW_PXP_OL6		(0x200 + 6 * 0x40)
+#define HW_PXP_OL7		(0x200 + 7 * 0x40)
+
+#define HW_PXP_OLn		0x200
+
+#define HW_PXP_OL0SIZE		(0x210 + 0 * 0x40)
+#define HW_PXP_OL1SIZE		(0x210 + 1 * 0x40)
+#define HW_PXP_OL2SIZE		(0x210 + 2 * 0x40)
+#define HW_PXP_OL3SIZE		(0x210 + 3 * 0x40)
+#define HW_PXP_OL4SIZE		(0x210 + 4 * 0x40)
+#define HW_PXP_OL5SIZE		(0x210 + 5 * 0x40)
+#define HW_PXP_OL6SIZE		(0x210 + 6 * 0x40)
+#define HW_PXP_OL7SIZE		(0x210 + 7 * 0x40)
+
+#define HW_PXP_OLnSIZE		0x210
+#define BM_PXP_OLnSIZE_HEIGHT	0x000000FF
+#define BP_PXP_OLnSIZE_HEIGHT	0
+#define BM_PXP_OLnSIZE_WIDTH	0x0000FF00
+#define BP_PXP_OLnSIZE_WIDTH	8
+
+#define HW_PXP_OL0PARAM		(0x220 + 0 * 0x40)
+#define HW_PXP_OL1PARAM		(0x220 + 1 * 0x40)
+#define HW_PXP_OL2PARAM		(0x220 + 2 * 0x40)
+#define HW_PXP_OL3PARAM		(0x220 + 3 * 0x40)
+#define HW_PXP_OL4PARAM		(0x220 + 4 * 0x40)
+#define HW_PXP_OL5PARAM		(0x220 + 5 * 0x40)
+#define HW_PXP_OL6PARAM		(0x220 + 6 * 0x40)
+#define HW_PXP_OL7PARAM		(0x220 + 7 * 0x40)
+
+#define HW_PXP_OLnPARAM		0x220
+#define BM_PXP_OLnPARAM_ENABLE	0x00000001
+#define BP_PXP_OLnPARAM_ENABLE	0
+#define BM_PXP_OLnPARAM_ALPHA_CNTL	0x00000006
+#define BP_PXP_OLnPARAM_ALPHA_CNTL	1
+#define BM_PXP_OLnPARAM_ENABLE_COLORKEY	0x00000008
+#define BM_PXP_OLnPARAM_FORMAT	0x000000F0
+#define BP_PXP_OLnPARAM_FORMAT	4
+#define BM_PXP_OLnPARAM_ALPHA	0x0000FF00
+#define BP_PXP_OLnPARAM_ALPHA	8
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-rtc.h b/arch/arm/mach-stmp378x/include/mach/regs-rtc.h
new file mode 100644
index 0000000..b8dbd67
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-rtc.h
@@ -0,0 +1,59 @@
+/*
+ * stmp378x: RTC register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_RTC_BASE	(STMP3XXX_REGS_BASE + 0x5C000)
+#define REGS_RTC_PHYS	0x8005C000
+#define REGS_RTC_SIZE	0x2000
+
+#define HW_RTC_CTRL		0x0
+#define BM_RTC_CTRL_ALARM_IRQ_EN	0x00000001
+#define BP_RTC_CTRL_ALARM_IRQ_EN	0
+#define BM_RTC_CTRL_ONEMSEC_IRQ_EN	0x00000002
+#define BM_RTC_CTRL_ALARM_IRQ	0x00000004
+#define BM_RTC_CTRL_ONEMSEC_IRQ	0x00000008
+#define BM_RTC_CTRL_WATCHDOGEN	0x00000010
+
+#define HW_RTC_STAT		0x10
+#define BM_RTC_STAT_NEW_REGS	0x0000FF00
+#define BP_RTC_STAT_NEW_REGS	8
+#define BM_RTC_STAT_STALE_REGS	0x00FF0000
+#define BP_RTC_STAT_STALE_REGS	16
+#define BM_RTC_STAT_RTC_PRESENT	0x80000000
+
+#define HW_RTC_SECONDS		0x30
+
+#define HW_RTC_ALARM		0x40
+
+#define HW_RTC_WATCHDOG		0x50
+
+#define HW_RTC_PERSISTENT0	0x60
+#define BM_RTC_PERSISTENT0_ALARM_WAKE_EN	0x00000002
+#define BM_RTC_PERSISTENT0_ALARM_EN	0x00000004
+#define BM_RTC_PERSISTENT0_XTAL24MHZ_PWRUP	0x00000010
+#define BM_RTC_PERSISTENT0_XTAL32KHZ_PWRUP	0x00000020
+#define BM_RTC_PERSISTENT0_ALARM_WAKE	0x00000080
+#define BM_RTC_PERSISTENT0_SPARE_ANALOG	0xFFFC0000
+#define BP_RTC_PERSISTENT0_SPARE_ANALOG	18
+
+#define HW_RTC_PERSISTENT1	0x70
+#define BM_RTC_PERSISTENT1_GENERAL	0xFFFFFFFF
+#define BP_RTC_PERSISTENT1_GENERAL	0
+
+#define HW_RTC_VERSION		0xD0
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-saif.h b/arch/arm/mach-stmp378x/include/mach/regs-saif.h
new file mode 100644
index 0000000..6df4176
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-saif.h
@@ -0,0 +1,21 @@
+/*
+ * stmp378x: SAIF register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_SAIF_SIZE	0x2000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-spdif.h b/arch/arm/mach-stmp378x/include/mach/regs-spdif.h
new file mode 100644
index 0000000..8015398
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-spdif.h
@@ -0,0 +1,49 @@
+/*
+ * stmp378x: SPDIF register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_SPDIF_BASE	(STMP3XXX_REGS_BASE + 0x54000)
+#define REGS_SPDIF_PHYS	0x80054000
+#define REGS_SPDIF_SIZE	0x2000
+
+#define HW_SPDIF_CTRL		0x0
+#define BM_SPDIF_CTRL_RUN	0x00000001
+#define BP_SPDIF_CTRL_RUN	0
+#define BM_SPDIF_CTRL_FIFO_ERROR_IRQ_EN	0x00000002
+#define BM_SPDIF_CTRL_FIFO_OVERFLOW_IRQ	0x00000004
+#define BM_SPDIF_CTRL_FIFO_UNDERFLOW_IRQ	0x00000008
+#define BM_SPDIF_CTRL_WORD_LENGTH	0x00000010
+#define BM_SPDIF_CTRL_CLKGATE	0x40000000
+#define BM_SPDIF_CTRL_SFTRST	0x80000000
+
+#define HW_SPDIF_STAT		0x10
+
+#define HW_SPDIF_FRAMECTRL	0x20
+
+#define HW_SPDIF_SRR		0x30
+#define BM_SPDIF_SRR_RATE	0x000FFFFF
+#define BP_SPDIF_SRR_RATE	0
+#define BM_SPDIF_SRR_BASEMULT	0x70000000
+#define BP_SPDIF_SRR_BASEMULT	28
+
+#define HW_SPDIF_DEBUG		0x40
+
+#define HW_SPDIF_DATA		0x50
+
+#define HW_SPDIF_VERSION	0x60
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-ssp.h b/arch/arm/mach-stmp378x/include/mach/regs-ssp.h
new file mode 100644
index 0000000..28aacf0
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-ssp.h
@@ -0,0 +1,102 @@
+/*
+ * stmp378x: SSP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_SSP1_BASE	(STMP3XXX_REGS_BASE + 0x10000)
+#define REGS_SSP1_PHYS	0x80010000
+#define REGS_SSP2_BASE	(STMP3XXX_REGS_BASE + 0x34000)
+#define REGS_SSP2_PHYS	0x80034000
+#define REGS_SSP_SIZE	0x2000
+
+#define HW_SSP_CTRL0		0x0
+#define BM_SSP_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_SSP_CTRL0_XFER_COUNT	0
+#define BM_SSP_CTRL0_ENABLE	0x00010000
+#define BM_SSP_CTRL0_GET_RESP	0x00020000
+#define BM_SSP_CTRL0_LONG_RESP	0x00080000
+#define BM_SSP_CTRL0_WAIT_FOR_CMD	0x00100000
+#define BM_SSP_CTRL0_WAIT_FOR_IRQ	0x00200000
+#define BM_SSP_CTRL0_BUS_WIDTH	0x00C00000
+#define BP_SSP_CTRL0_BUS_WIDTH	22
+#define BM_SSP_CTRL0_DATA_XFER	0x01000000
+#define BM_SSP_CTRL0_READ	0x02000000
+#define BM_SSP_CTRL0_IGNORE_CRC	0x04000000
+#define BM_SSP_CTRL0_LOCK_CS	0x08000000
+#define BM_SSP_CTRL0_RUN	0x20000000
+#define BM_SSP_CTRL0_CLKGATE	0x40000000
+#define BM_SSP_CTRL0_SFTRST	0x80000000
+
+#define HW_SSP_CMD0		0x10
+#define BM_SSP_CMD0_CMD		0x000000FF
+#define BP_SSP_CMD0_CMD		0
+#define BM_SSP_CMD0_BLOCK_COUNT	0x0000FF00
+#define BP_SSP_CMD0_BLOCK_COUNT	8
+#define BM_SSP_CMD0_BLOCK_SIZE	0x000F0000
+#define BP_SSP_CMD0_BLOCK_SIZE	16
+#define BM_SSP_CMD0_APPEND_8CYC	0x00100000
+#define BM_SSP_CMD1_CMD_ARG	0xFFFFFFFF
+#define BP_SSP_CMD1_CMD_ARG	0
+
+#define HW_SSP_TIMING		0x50
+#define BM_SSP_TIMING_CLOCK_RATE	0x000000FF
+#define BP_SSP_TIMING_CLOCK_RATE	0
+#define BM_SSP_TIMING_CLOCK_DIVIDE	0x0000FF00
+#define BP_SSP_TIMING_CLOCK_DIVIDE	8
+#define BM_SSP_TIMING_TIMEOUT	0xFFFF0000
+#define BP_SSP_TIMING_TIMEOUT	16
+
+#define HW_SSP_CTRL1		0x60
+#define BM_SSP_CTRL1_SSP_MODE	0x0000000F
+#define BP_SSP_CTRL1_SSP_MODE	0
+#define BM_SSP_CTRL1_WORD_LENGTH	0x000000F0
+#define BP_SSP_CTRL1_WORD_LENGTH	4
+#define BM_SSP_CTRL1_POLARITY	0x00000200
+#define BM_SSP_CTRL1_PHASE	0x00000400
+#define BM_SSP_CTRL1_DMA_ENABLE	0x00002000
+#define BM_SSP_CTRL1_FIFO_OVERRUN_IRQ	0x00008000
+#define BM_SSP_CTRL1_RECV_TIMEOUT_IRQ_EN	0x00010000
+#define BM_SSP_CTRL1_RECV_TIMEOUT_IRQ	0x00020000
+#define BM_SSP_CTRL1_FIFO_UNDERRUN_IRQ	0x00200000
+#define BM_SSP_CTRL1_DATA_CRC_IRQ_EN	0x00400000
+#define BM_SSP_CTRL1_DATA_CRC_IRQ	0x00800000
+#define BM_SSP_CTRL1_DATA_TIMEOUT_IRQ_EN	0x01000000
+#define BM_SSP_CTRL1_DATA_TIMEOUT_IRQ	0x02000000
+#define BM_SSP_CTRL1_RESP_TIMEOUT_IRQ_EN	0x04000000
+#define BM_SSP_CTRL1_RESP_TIMEOUT_IRQ	0x08000000
+#define BM_SSP_CTRL1_RESP_ERR_IRQ_EN	0x10000000
+#define BM_SSP_CTRL1_RESP_ERR_IRQ	0x20000000
+#define BM_SSP_CTRL1_SDIO_IRQ	0x80000000
+
+#define HW_SSP_DATA		0x70
+
+#define HW_SSP_SDRESP0		0x80
+
+#define HW_SSP_SDRESP1		0x90
+
+#define HW_SSP_SDRESP2		0xA0
+
+#define HW_SSP_SDRESP3		0xB0
+
+#define HW_SSP_STATUS		0xC0
+#define BM_SSP_STATUS_FIFO_EMPTY	0x00000020
+#define BM_SSP_STATUS_TIMEOUT	0x00001000
+#define BM_SSP_STATUS_RESP_TIMEOUT	0x00004000
+#define BM_SSP_STATUS_RESP_ERR	0x00008000
+#define BM_SSP_STATUS_RESP_CRC_ERR	0x00010000
+#define BM_SSP_STATUS_CARD_DETECT	0x10000000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-sydma.h b/arch/arm/mach-stmp378x/include/mach/regs-sydma.h
new file mode 100644
index 0000000..08343a8
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-sydma.h
@@ -0,0 +1,23 @@
+/*
+ * stmp378x: SYDMA register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_SYDMA_BASE	(STMP3XXX_REGS_BASE + 0x26000)
+#define REGS_SYDMA_PHYS	0x80026000
+#define REGS_SYDMA_SIZE	0x2000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-tvenc.h b/arch/arm/mach-stmp378x/include/mach/regs-tvenc.h
new file mode 100644
index 0000000..7f895cb
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-tvenc.h
@@ -0,0 +1,67 @@
+/*
+ * stmp378x: TVENC register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_TVENC_BASE	(STMP3XXX_REGS_BASE + 0x38000)
+#define REGS_TVENC_PHYS	0x80038000
+#define REGS_TVENC_SIZE	0x2000
+
+#define HW_TVENC_CTRL		0x0
+#define BM_TVENC_CTRL_CLKGATE	0x40000000
+#define BM_TVENC_CTRL_SFTRST	0x80000000
+
+#define HW_TVENC_CONFIG		0x10
+#define BM_TVENC_CONFIG_ENCD_MODE	0x00000007
+#define BP_TVENC_CONFIG_ENCD_MODE	0
+#define BM_TVENC_CONFIG_SYNC_MODE	0x00000070
+#define BP_TVENC_CONFIG_SYNC_MODE	4
+#define BM_TVENC_CONFIG_FSYNC_PHS	0x00000200
+#define BM_TVENC_CONFIG_CGAIN	0x0000C000
+#define BP_TVENC_CONFIG_CGAIN	14
+#define BM_TVENC_CONFIG_YGAIN_SEL	0x00030000
+#define BP_TVENC_CONFIG_YGAIN_SEL	16
+#define BM_TVENC_CONFIG_PAL_SHAPE	0x00100000
+
+#define HW_TVENC_SYNCOFFSET	0x30
+
+#define HW_TVENC_COLORSUB0	0xC0
+
+#define HW_TVENC_COLORBURST	0x140
+#define BM_TVENC_COLORBURST_PBA	0x00FF0000
+#define BP_TVENC_COLORBURST_PBA	16
+#define BM_TVENC_COLORBURST_NBA	0xFF000000
+#define BP_TVENC_COLORBURST_NBA	24
+
+#define HW_TVENC_MACROVISION0	0x150
+
+#define HW_TVENC_MACROVISION1	0x160
+
+#define HW_TVENC_MACROVISION2	0x170
+
+#define HW_TVENC_MACROVISION3	0x180
+
+#define HW_TVENC_MACROVISION4	0x190
+
+#define HW_TVENC_DACCTRL	0x1A0
+#define BM_TVENC_DACCTRL_RVAL	0x00000070
+#define BP_TVENC_DACCTRL_RVAL	4
+#define BM_TVENC_DACCTRL_DUMP_TOVDD1	0x00000100
+#define BM_TVENC_DACCTRL_PWRUP1	0x00001000
+#define BM_TVENC_DACCTRL_GAINUP	0x00040000
+#define BM_TVENC_DACCTRL_GAINDN	0x00080000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-uartapp.h b/arch/arm/mach-stmp378x/include/mach/regs-uartapp.h
new file mode 100644
index 0000000..a251e68
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-uartapp.h
@@ -0,0 +1,87 @@
+/*
+ * stmp378x: UARTAPP register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_UARTAPP1_BASE	(STMP3XXX_REGS_BASE + 0x6C000)
+#define REGS_UARTAPP1_PHYS	0x8006C000
+#define REGS_UARTAPP2_BASE	(STMP3XXX_REGS_BASE + 0x6E000)
+#define REGS_UARTAPP2_PHYS	0x8006E000
+#define REGS_UARTAPP_SIZE	0x2000
+
+#define HW_UARTAPP_CTRL0	0x0
+#define BM_UARTAPP_CTRL0_XFER_COUNT	0x0000FFFF
+#define BP_UARTAPP_CTRL0_XFER_COUNT	0
+#define BM_UARTAPP_CTRL0_RXTIMEOUT	0x07FF0000
+#define BP_UARTAPP_CTRL0_RXTIMEOUT	16
+#define BM_UARTAPP_CTRL0_RXTO_ENABLE	0x08000000
+#define BM_UARTAPP_CTRL0_RUN	0x20000000
+#define BM_UARTAPP_CTRL0_SFTRST	0x80000000
+#define BM_UARTAPP_CTRL1_XFER_COUNT	0x0000FFFF
+#define BP_UARTAPP_CTRL1_XFER_COUNT	0
+#define BM_UARTAPP_CTRL1_RUN	0x10000000
+
+#define HW_UARTAPP_CTRL2	0x20
+#define BM_UARTAPP_CTRL2_UARTEN	0x00000001
+#define BP_UARTAPP_CTRL2_UARTEN	0
+#define BM_UARTAPP_CTRL2_TXE	0x00000100
+#define BM_UARTAPP_CTRL2_RXE	0x00000200
+#define BM_UARTAPP_CTRL2_RTS	0x00000800
+#define BM_UARTAPP_CTRL2_RTSEN	0x00004000
+#define BM_UARTAPP_CTRL2_CTSEN	0x00008000
+#define BM_UARTAPP_CTRL2_RXDMAE	0x01000000
+#define BM_UARTAPP_CTRL2_TXDMAE	0x02000000
+#define BM_UARTAPP_CTRL2_DMAONERR	0x04000000
+
+#define HW_UARTAPP_LINECTRL	0x30
+#define BM_UARTAPP_LINECTRL_BRK	0x00000001
+#define BP_UARTAPP_LINECTRL_BRK	0
+#define BM_UARTAPP_LINECTRL_PEN	0x00000002
+#define BM_UARTAPP_LINECTRL_EPS	0x00000004
+#define BM_UARTAPP_LINECTRL_STP2	0x00000008
+#define BM_UARTAPP_LINECTRL_FEN	0x00000010
+#define BM_UARTAPP_LINECTRL_WLEN	0x00000060
+#define BP_UARTAPP_LINECTRL_WLEN	5
+#define BM_UARTAPP_LINECTRL_SPS	0x00000080
+#define BM_UARTAPP_LINECTRL_BAUD_DIVFRAC	0x00003F00
+#define BP_UARTAPP_LINECTRL_BAUD_DIVFRAC	8
+#define BM_UARTAPP_LINECTRL_BAUD_DIVINT	0xFFFF0000
+#define BP_UARTAPP_LINECTRL_BAUD_DIVINT	16
+
+#define HW_UARTAPP_INTR		0x50
+#define BM_UARTAPP_INTR_CTSMIS	0x00000002
+#define BM_UARTAPP_INTR_RTIS	0x00000040
+#define BM_UARTAPP_INTR_CTSMIEN	0x00020000
+#define BM_UARTAPP_INTR_RXIEN	0x00100000
+#define BM_UARTAPP_INTR_RTIEN	0x00400000
+
+#define HW_UARTAPP_DATA		0x60
+
+#define HW_UARTAPP_STAT		0x70
+#define BM_UARTAPP_STAT_RXCOUNT	0x0000FFFF
+#define BP_UARTAPP_STAT_RXCOUNT	0
+#define BM_UARTAPP_STAT_FERR	0x00010000
+#define BM_UARTAPP_STAT_PERR	0x00020000
+#define BM_UARTAPP_STAT_BERR	0x00040000
+#define BM_UARTAPP_STAT_OERR	0x00080000
+#define BM_UARTAPP_STAT_RXFE	0x01000000
+#define BM_UARTAPP_STAT_TXFF	0x02000000
+#define BM_UARTAPP_STAT_TXFE	0x08000000
+#define BM_UARTAPP_STAT_CTS	0x10000000
+
+#define HW_UARTAPP_VERSION	0x90
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-uartdbg.h b/arch/arm/mach-stmp378x/include/mach/regs-uartdbg.h
new file mode 100644
index 0000000..b810deb
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-uartdbg.h
@@ -0,0 +1,268 @@
+/*
+ * stmp378x: UARTDBG register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_UARTDBG_BASE	(STMP3XXX_REGS_BASE + 0x70000)
+#define REGS_UARTDBG_PHYS	0x80070000
+#define REGS_UARTDBG_SIZE	0x2000
+
+#define HW_UARTDBGDR 0x00000000
+#define BP_UARTDBGDR_UNAVAILABLE      16
+#define BM_UARTDBGDR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGDR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGDR_UNAVAILABLE)
+#define BP_UARTDBGDR_RESERVED      12
+#define BM_UARTDBGDR_RESERVED 0x0000F000
+#define BF_UARTDBGDR_RESERVED(v)  \
+	(((v) << 12) & BM_UARTDBGDR_RESERVED)
+#define BM_UARTDBGDR_OE 0x00000800
+#define BM_UARTDBGDR_BE 0x00000400
+#define BM_UARTDBGDR_PE 0x00000200
+#define BM_UARTDBGDR_FE 0x00000100
+#define BP_UARTDBGDR_DATA      0
+#define BM_UARTDBGDR_DATA 0x000000FF
+#define BF_UARTDBGDR_DATA(v)  \
+	(((v) << 0) & BM_UARTDBGDR_DATA)
+#define HW_UARTDBGRSR_ECR 0x00000004
+#define BP_UARTDBGRSR_ECR_UNAVAILABLE      8
+#define BM_UARTDBGRSR_ECR_UNAVAILABLE 0xFFFFFF00
+#define BF_UARTDBGRSR_ECR_UNAVAILABLE(v) \
+	(((v) << 8) & BM_UARTDBGRSR_ECR_UNAVAILABLE)
+#define BP_UARTDBGRSR_ECR_EC      4
+#define BM_UARTDBGRSR_ECR_EC 0x000000F0
+#define BF_UARTDBGRSR_ECR_EC(v)  \
+	(((v) << 4) & BM_UARTDBGRSR_ECR_EC)
+#define BM_UARTDBGRSR_ECR_OE 0x00000008
+#define BM_UARTDBGRSR_ECR_BE 0x00000004
+#define BM_UARTDBGRSR_ECR_PE 0x00000002
+#define BM_UARTDBGRSR_ECR_FE 0x00000001
+#define HW_UARTDBGFR 0x00000018
+#define BP_UARTDBGFR_UNAVAILABLE      16
+#define BM_UARTDBGFR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGFR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGFR_UNAVAILABLE)
+#define BP_UARTDBGFR_RESERVED      9
+#define BM_UARTDBGFR_RESERVED 0x0000FE00
+#define BF_UARTDBGFR_RESERVED(v)  \
+	(((v) << 9) & BM_UARTDBGFR_RESERVED)
+#define BM_UARTDBGFR_RI 0x00000100
+#define BM_UARTDBGFR_TXFE 0x00000080
+#define BM_UARTDBGFR_RXFF 0x00000040
+#define BM_UARTDBGFR_TXFF 0x00000020
+#define BM_UARTDBGFR_RXFE 0x00000010
+#define BM_UARTDBGFR_BUSY 0x00000008
+#define BM_UARTDBGFR_DCD 0x00000004
+#define BM_UARTDBGFR_DSR 0x00000002
+#define BM_UARTDBGFR_CTS 0x00000001
+#define HW_UARTDBGILPR 0x00000020
+#define BP_UARTDBGILPR_UNAVAILABLE      8
+#define BM_UARTDBGILPR_UNAVAILABLE 0xFFFFFF00
+#define BF_UARTDBGILPR_UNAVAILABLE(v) \
+	(((v) << 8) & BM_UARTDBGILPR_UNAVAILABLE)
+#define BP_UARTDBGILPR_ILPDVSR      0
+#define BM_UARTDBGILPR_ILPDVSR 0x000000FF
+#define BF_UARTDBGILPR_ILPDVSR(v)  \
+	(((v) << 0) & BM_UARTDBGILPR_ILPDVSR)
+#define HW_UARTDBGIBRD 0x00000024
+#define BP_UARTDBGIBRD_UNAVAILABLE      16
+#define BM_UARTDBGIBRD_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGIBRD_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGIBRD_UNAVAILABLE)
+#define BP_UARTDBGIBRD_BAUD_DIVINT      0
+#define BM_UARTDBGIBRD_BAUD_DIVINT 0x0000FFFF
+#define BF_UARTDBGIBRD_BAUD_DIVINT(v)  \
+	(((v) << 0) & BM_UARTDBGIBRD_BAUD_DIVINT)
+#define HW_UARTDBGFBRD 0x00000028
+#define BP_UARTDBGFBRD_UNAVAILABLE      8
+#define BM_UARTDBGFBRD_UNAVAILABLE 0xFFFFFF00
+#define BF_UARTDBGFBRD_UNAVAILABLE(v) \
+	(((v) << 8) & BM_UARTDBGFBRD_UNAVAILABLE)
+#define BP_UARTDBGFBRD_RESERVED      6
+#define BM_UARTDBGFBRD_RESERVED 0x000000C0
+#define BF_UARTDBGFBRD_RESERVED(v)  \
+	(((v) << 6) & BM_UARTDBGFBRD_RESERVED)
+#define BP_UARTDBGFBRD_BAUD_DIVFRAC      0
+#define BM_UARTDBGFBRD_BAUD_DIVFRAC 0x0000003F
+#define BF_UARTDBGFBRD_BAUD_DIVFRAC(v)  \
+	(((v) << 0) & BM_UARTDBGFBRD_BAUD_DIVFRAC)
+#define HW_UARTDBGLCR_H 0x0000002c
+#define BP_UARTDBGLCR_H_UNAVAILABLE      16
+#define BM_UARTDBGLCR_H_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGLCR_H_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGLCR_H_UNAVAILABLE)
+#define BP_UARTDBGLCR_H_RESERVED      8
+#define BM_UARTDBGLCR_H_RESERVED 0x0000FF00
+#define BF_UARTDBGLCR_H_RESERVED(v)  \
+	(((v) << 8) & BM_UARTDBGLCR_H_RESERVED)
+#define BM_UARTDBGLCR_H_SPS 0x00000080
+#define BP_UARTDBGLCR_H_WLEN      5
+#define BM_UARTDBGLCR_H_WLEN 0x00000060
+#define BF_UARTDBGLCR_H_WLEN(v)  \
+	(((v) << 5) & BM_UARTDBGLCR_H_WLEN)
+#define BM_UARTDBGLCR_H_FEN 0x00000010
+#define BM_UARTDBGLCR_H_STP2 0x00000008
+#define BM_UARTDBGLCR_H_EPS 0x00000004
+#define BM_UARTDBGLCR_H_PEN 0x00000002
+#define BM_UARTDBGLCR_H_BRK 0x00000001
+#define HW_UARTDBGCR 0x00000030
+#define BP_UARTDBGCR_UNAVAILABLE      16
+#define BM_UARTDBGCR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGCR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGCR_UNAVAILABLE)
+#define BM_UARTDBGCR_CTSEN 0x00008000
+#define BM_UARTDBGCR_RTSEN 0x00004000
+#define BM_UARTDBGCR_OUT2 0x00002000
+#define BM_UARTDBGCR_OUT1 0x00001000
+#define BM_UARTDBGCR_RTS 0x00000800
+#define BM_UARTDBGCR_DTR 0x00000400
+#define BM_UARTDBGCR_RXE 0x00000200
+#define BM_UARTDBGCR_TXE 0x00000100
+#define BM_UARTDBGCR_LBE 0x00000080
+#define BP_UARTDBGCR_RESERVED      3
+#define BM_UARTDBGCR_RESERVED 0x00000078
+#define BF_UARTDBGCR_RESERVED(v)  \
+	(((v) << 3) & BM_UARTDBGCR_RESERVED)
+#define BM_UARTDBGCR_SIRLP 0x00000004
+#define BM_UARTDBGCR_SIREN 0x00000002
+#define BM_UARTDBGCR_UARTEN 0x00000001
+#define HW_UARTDBGIFLS 0x00000034
+#define BP_UARTDBGIFLS_UNAVAILABLE      16
+#define BM_UARTDBGIFLS_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGIFLS_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGIFLS_UNAVAILABLE)
+#define BP_UARTDBGIFLS_RESERVED      6
+#define BM_UARTDBGIFLS_RESERVED 0x0000FFC0
+#define BF_UARTDBGIFLS_RESERVED(v)  \
+	(((v) << 6) & BM_UARTDBGIFLS_RESERVED)
+#define BP_UARTDBGIFLS_RXIFLSEL      3
+#define BM_UARTDBGIFLS_RXIFLSEL 0x00000038
+#define BF_UARTDBGIFLS_RXIFLSEL(v)  \
+	(((v) << 3) & BM_UARTDBGIFLS_RXIFLSEL)
+#define BV_UARTDBGIFLS_RXIFLSEL__NOT_EMPTY      0x0
+#define BV_UARTDBGIFLS_RXIFLSEL__ONE_QUARTER    0x1
+#define BV_UARTDBGIFLS_RXIFLSEL__ONE_HALF       0x2
+#define BV_UARTDBGIFLS_RXIFLSEL__THREE_QUARTERS 0x3
+#define BV_UARTDBGIFLS_RXIFLSEL__SEVEN_EIGHTHS  0x4
+#define BV_UARTDBGIFLS_RXIFLSEL__INVALID5       0x5
+#define BV_UARTDBGIFLS_RXIFLSEL__INVALID6       0x6
+#define BV_UARTDBGIFLS_RXIFLSEL__INVALID7       0x7
+#define BP_UARTDBGIFLS_TXIFLSEL      0
+#define BM_UARTDBGIFLS_TXIFLSEL 0x00000007
+#define BF_UARTDBGIFLS_TXIFLSEL(v)  \
+	(((v) << 0) & BM_UARTDBGIFLS_TXIFLSEL)
+#define BV_UARTDBGIFLS_TXIFLSEL__EMPTY	  0x0
+#define BV_UARTDBGIFLS_TXIFLSEL__ONE_QUARTER    0x1
+#define BV_UARTDBGIFLS_TXIFLSEL__ONE_HALF       0x2
+#define BV_UARTDBGIFLS_TXIFLSEL__THREE_QUARTERS 0x3
+#define BV_UARTDBGIFLS_TXIFLSEL__SEVEN_EIGHTHS  0x4
+#define BV_UARTDBGIFLS_TXIFLSEL__INVALID5       0x5
+#define BV_UARTDBGIFLS_TXIFLSEL__INVALID6       0x6
+#define BV_UARTDBGIFLS_TXIFLSEL__INVALID7       0x7
+#define HW_UARTDBGIMSC 0x00000038
+#define BP_UARTDBGIMSC_UNAVAILABLE      16
+#define BM_UARTDBGIMSC_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGIMSC_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGIMSC_UNAVAILABLE)
+#define BP_UARTDBGIMSC_RESERVED      11
+#define BM_UARTDBGIMSC_RESERVED 0x0000F800
+#define BF_UARTDBGIMSC_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGIMSC_RESERVED)
+#define BM_UARTDBGIMSC_OEIM 0x00000400
+#define BM_UARTDBGIMSC_BEIM 0x00000200
+#define BM_UARTDBGIMSC_PEIM 0x00000100
+#define BM_UARTDBGIMSC_FEIM 0x00000080
+#define BM_UARTDBGIMSC_RTIM 0x00000040
+#define BM_UARTDBGIMSC_TXIM 0x00000020
+#define BM_UARTDBGIMSC_RXIM 0x00000010
+#define BM_UARTDBGIMSC_DSRMIM 0x00000008
+#define BM_UARTDBGIMSC_DCDMIM 0x00000004
+#define BM_UARTDBGIMSC_CTSMIM 0x00000002
+#define BM_UARTDBGIMSC_RIMIM 0x00000001
+#define HW_UARTDBGRIS 0x0000003c
+#define BP_UARTDBGRIS_UNAVAILABLE      16
+#define BM_UARTDBGRIS_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGRIS_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGRIS_UNAVAILABLE)
+#define BP_UARTDBGRIS_RESERVED      11
+#define BM_UARTDBGRIS_RESERVED 0x0000F800
+#define BF_UARTDBGRIS_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGRIS_RESERVED)
+#define BM_UARTDBGRIS_OERIS 0x00000400
+#define BM_UARTDBGRIS_BERIS 0x00000200
+#define BM_UARTDBGRIS_PERIS 0x00000100
+#define BM_UARTDBGRIS_FERIS 0x00000080
+#define BM_UARTDBGRIS_RTRIS 0x00000040
+#define BM_UARTDBGRIS_TXRIS 0x00000020
+#define BM_UARTDBGRIS_RXRIS 0x00000010
+#define BM_UARTDBGRIS_DSRRMIS 0x00000008
+#define BM_UARTDBGRIS_DCDRMIS 0x00000004
+#define BM_UARTDBGRIS_CTSRMIS 0x00000002
+#define BM_UARTDBGRIS_RIRMIS 0x00000001
+#define HW_UARTDBGMIS 0x00000040
+#define BP_UARTDBGMIS_UNAVAILABLE      16
+#define BM_UARTDBGMIS_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGMIS_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGMIS_UNAVAILABLE)
+#define BP_UARTDBGMIS_RESERVED      11
+#define BM_UARTDBGMIS_RESERVED 0x0000F800
+#define BF_UARTDBGMIS_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGMIS_RESERVED)
+#define BM_UARTDBGMIS_OEMIS 0x00000400
+#define BM_UARTDBGMIS_BEMIS 0x00000200
+#define BM_UARTDBGMIS_PEMIS 0x00000100
+#define BM_UARTDBGMIS_FEMIS 0x00000080
+#define BM_UARTDBGMIS_RTMIS 0x00000040
+#define BM_UARTDBGMIS_TXMIS 0x00000020
+#define BM_UARTDBGMIS_RXMIS 0x00000010
+#define BM_UARTDBGMIS_DSRMMIS 0x00000008
+#define BM_UARTDBGMIS_DCDMMIS 0x00000004
+#define BM_UARTDBGMIS_CTSMMIS 0x00000002
+#define BM_UARTDBGMIS_RIMMIS 0x00000001
+#define HW_UARTDBGICR 0x00000044
+#define BP_UARTDBGICR_UNAVAILABLE      16
+#define BM_UARTDBGICR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGICR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGICR_UNAVAILABLE)
+#define BP_UARTDBGICR_RESERVED      11
+#define BM_UARTDBGICR_RESERVED 0x0000F800
+#define BF_UARTDBGICR_RESERVED(v)  \
+	(((v) << 11) & BM_UARTDBGICR_RESERVED)
+#define BM_UARTDBGICR_OEIC 0x00000400
+#define BM_UARTDBGICR_BEIC 0x00000200
+#define BM_UARTDBGICR_PEIC 0x00000100
+#define BM_UARTDBGICR_FEIC 0x00000080
+#define BM_UARTDBGICR_RTIC 0x00000040
+#define BM_UARTDBGICR_TXIC 0x00000020
+#define BM_UARTDBGICR_RXIC 0x00000010
+#define BM_UARTDBGICR_DSRMIC 0x00000008
+#define BM_UARTDBGICR_DCDMIC 0x00000004
+#define BM_UARTDBGICR_CTSMIC 0x00000002
+#define BM_UARTDBGICR_RIMIC 0x00000001
+#define HW_UARTDBGDMACR 0x00000048
+#define BP_UARTDBGDMACR_UNAVAILABLE      16
+#define BM_UARTDBGDMACR_UNAVAILABLE 0xFFFF0000
+#define BF_UARTDBGDMACR_UNAVAILABLE(v) \
+	(((v) << 16) & BM_UARTDBGDMACR_UNAVAILABLE)
+#define BP_UARTDBGDMACR_RESERVED      3
+#define BM_UARTDBGDMACR_RESERVED 0x0000FFF8
+#define BF_UARTDBGDMACR_RESERVED(v)  \
+	(((v) << 3) & BM_UARTDBGDMACR_RESERVED)
+#define BM_UARTDBGDMACR_DMAONERR 0x00000004
+#define BM_UARTDBGDMACR_TXDMAE 0x00000002
+#define BM_UARTDBGDMACR_RXDMAE 0x00000001
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-usbctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-usbctrl.h
new file mode 100644
index 0000000..25112c1
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-usbctrl.h
@@ -0,0 +1,40 @@
+/*
+ * stmp378x: USBCTRL register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_USBCTRL_BASE	(STMP3XXX_REGS_BASE + 0x80000)
+#define REGS_USBCTRL_PHYS	0x80080000
+#define REGS_USBCTRL_SIZE	0x2000
+
+#define HW_USBCTRL_USBCMD	0x140
+#define BM_USBCTRL_USBCMD_RS	0x00000001
+#define BP_USBCTRL_USBCMD_RS	0
+#define BM_USBCTRL_USBCMD_RST	0x00000002
+
+#define HW_USBCTRL_USBINTR	0x148
+#define BM_USBCTRL_USBINTR_UE	0x00000001
+#define BP_USBCTRL_USBINTR_UE	0
+
+#define HW_USBCTRL_PORTSC1	0x184
+#define BM_USBCTRL_PORTSC1_PHCD	0x00800000
+
+#define HW_USBCTRL_OTGSC	0x1A4
+#define BM_USBCTRL_OTGSC_ID	0x00000100
+#define BM_USBCTRL_OTGSC_IDIS	0x00010000
+#define BM_USBCTRL_OTGSC_IDIE	0x01000000
diff --git a/arch/arm/mach-stmp378x/include/mach/regs-usbphy.h b/arch/arm/mach-stmp378x/include/mach/regs-usbphy.h
new file mode 100644
index 0000000..11f3b73
--- /dev/null
+++ b/arch/arm/mach-stmp378x/include/mach/regs-usbphy.h
@@ -0,0 +1,37 @@
+/*
+ * stmp378x: USBPHY register definitions
+ *
+ * Copyright (c) 2008 Freescale Semiconductor
+ * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define REGS_USBPHY_BASE	(STMP3XXX_REGS_BASE + 0x7C000)
+#define REGS_USBPHY_PHYS	0x8007C000
+#define REGS_USBPHY_SIZE	0x2000
+
+#define HW_USBPHY_PWD		0x0
+
+#define HW_USBPHY_CTRL		0x30
+#define BM_USBPHY_CTRL_ENHOSTDISCONDETECT	0x00000002
+#define BM_USBPHY_CTRL_ENDEVPLUGINDETECT	0x00000010
+#define BM_USBPHY_CTRL_ENOTGIDDETECT	0x00000080
+#define BM_USBPHY_CTRL_ENIRQDEVPLUGIN	0x00000800
+#define BM_USBPHY_CTRL_CLKGATE	0x40000000
+#define BM_USBPHY_CTRL_SFTRST	0x80000000
+
+#define HW_USBPHY_STATUS	0x40
+#define BM_USBPHY_STATUS_DEVPLUGIN_STATUS	0x00000040
+#define BM_USBPHY_STATUS_OTGID_STATUS	0x00000100
-- 
cgit v0.10.2


From c11c22177ae2929598051a39e4655be4a42cb805 Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dpervushin@embeddedalley.com>
Date: Thu, 4 Jun 2009 13:51:05 +0100
Subject: [ARM] 5539/1: Freescale STMP: onboard devices declaration

Define onboard devices for Freescale STMP3xxx boards

Signed-off-by: dmitry pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-stmp378x/stmp378x.c b/arch/arm/mach-stmp378x/stmp378x.c
index 9a363fb..ddd49a7 100644
--- a/arch/arm/mach-stmp378x/stmp378x.c
+++ b/arch/arm/mach-stmp378x/stmp378x.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/dma.h>
 #include <asm/setup.h>
@@ -39,6 +40,8 @@
 #include <mach/regs-icoll.h>
 #include <mach/regs-apbh.h>
 #include <mach/regs-apbx.h>
+#include <mach/regs-pxp.h>
+#include <mach/regs-i2c.h>
 
 #include "stmp378x.h"
 /*
@@ -232,6 +235,64 @@ static struct map_desc stmp378x_io_desc[] __initdata = {
 	},
 };
 
+
+static u64 common_dmamask = DMA_BIT_MASK(32);
+
+/*
+ * devices that are present only on stmp378x, not on all 3xxx boards:
+ * 	PxP
+ * 	I2C
+ */
+static struct resource pxp_resource[] = {
+	{
+		.flags	= IORESOURCE_MEM,
+		.start	= REGS_PXP_PHYS,
+		.end	= REGS_PXP_PHYS + REGS_PXP_SIZE,
+	}, {
+		.flags	= IORESOURCE_IRQ,
+		.start	= IRQ_PXP,
+		.end	= IRQ_PXP,
+	},
+};
+
+struct platform_device stmp378x_pxp = {
+	.name		= "stmp3xxx-pxp",
+	.id		= -1,
+	.dev		= {
+		.dma_mask		= &common_dmamask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+	},
+	.num_resources	= ARRAY_SIZE(pxp_resource),
+	.resource	= pxp_resource,
+};
+
+static struct resource i2c_resources[] = {
+	{
+		.flags = IORESOURCE_IRQ,
+		.start = IRQ_I2C_ERROR,
+		.end = IRQ_I2C_ERROR,
+	}, {
+		.flags = IORESOURCE_MEM,
+		.start = REGS_I2C_PHYS,
+		.end = REGS_I2C_PHYS + REGS_I2C_SIZE,
+	}, {
+		.flags = IORESOURCE_DMA,
+		.start = STMP3XXX_DMA(3, STMP3XXX_BUS_APBX),
+		.end = STMP3XXX_DMA(3, STMP3XXX_BUS_APBX),
+	},
+};
+
+struct platform_device stmp378x_i2c = {
+	.name = "i2c_stmp3xxx",
+	.id = 0,
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+	.resource = i2c_resources,
+	.num_resources = ARRAY_SIZE(i2c_resources),
+};
+
 void __init stmp378x_map_io(void)
 {
 	iotable_init(stmp378x_io_desc, ARRAY_SIZE(stmp378x_io_desc));
diff --git a/arch/arm/mach-stmp378x/stmp378x.h b/arch/arm/mach-stmp378x/stmp378x.h
index 473de64..0dc15b3 100644
--- a/arch/arm/mach-stmp378x/stmp378x.h
+++ b/arch/arm/mach-stmp378x/stmp378x.h
@@ -21,4 +21,5 @@
 void stmp378x_map_io(void);
 void stmp378x_init_irq(void);
 
+extern struct platform_device stmp378x_pxp, stmp378x_i2c;
 #endif /* __MACH_STMP378X_COMMON_H */
diff --git a/arch/arm/mach-stmp378x/stmp378x_devb.c b/arch/arm/mach-stmp378x/stmp378x_devb.c
index bc643f6..90d8fe6 100644
--- a/arch/arm/mach-stmp378x/stmp378x_devb.c
+++ b/arch/arm/mach-stmp378x/stmp378x_devb.c
@@ -17,7 +17,12 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/spi/spi.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -25,12 +30,39 @@
 
 #include <mach/pins.h>
 #include <mach/pinmux.h>
+#include <mach/platform.h>
 #include <mach/stmp3xxx.h>
+#include <mach/mmc.h>
+#include <mach/gpmi.h>
 
 #include "stmp378x.h"
 
 static struct platform_device *devices[] = {
 	&stmp3xxx_dbguart,
+	&stmp3xxx_appuart,
+	&stmp3xxx_watchdog,
+	&stmp3xxx_touchscreen,
+	&stmp3xxx_rtc,
+	&stmp3xxx_keyboard,
+	&stmp3xxx_framebuffer,
+	&stmp3xxx_backlight,
+	&stmp3xxx_rotdec,
+	&stmp3xxx_persistent,
+	&stmp3xxx_dcp_bootstream,
+	&stmp3xxx_dcp,
+	&stmp3xxx_battery,
+	&stmp378x_pxp,
+	&stmp378x_i2c,
+};
+
+static struct pin_desc i2c_pins_desc[] = {
+	{ PINID_I2C_SCL, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_I2C_SDA, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+};
+
+static struct pin_group i2c_pins = {
+	.pins		= i2c_pins_desc,
+	.nr_pins	= ARRAY_SIZE(i2c_pins_desc),
 };
 
 static struct pin_desc dbguart_pins_0[] = {
@@ -56,6 +88,215 @@ static int dbguart_pins_control(int id, int request)
 	return r;
 }
 
+static struct pin_desc appuart_pins_0[] = {
+	{ PINID_AUART1_CTS, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_AUART1_RTS, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_AUART1_RX, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_AUART1_TX, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+};
+
+static struct pin_desc appuart_pins_1[] = {
+#if 0 /* enable these when second appuart will be connected */
+	{ PINID_AUART2_CTS, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_AUART2_RTS, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_AUART2_RX, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_AUART2_TX, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+#endif
+};
+
+static struct pin_desc mmc_pins_desc[] = {
+	{ PINID_SSP1_DATA0, PIN_FUN1, PIN_8MA, PIN_3_3V, 1 },
+	{ PINID_SSP1_DATA1, PIN_FUN1, PIN_8MA, PIN_3_3V, 1 },
+	{ PINID_SSP1_DATA2, PIN_FUN1, PIN_8MA, PIN_3_3V, 1 },
+	{ PINID_SSP1_DATA3, PIN_FUN1, PIN_8MA, PIN_3_3V, 1 },
+	{ PINID_SSP1_CMD, PIN_FUN1, PIN_8MA, PIN_3_3V, 1 },
+	{ PINID_SSP1_SCK, PIN_FUN1, PIN_8MA, PIN_3_3V, 0 },
+	{ PINID_SSP1_DETECT, PIN_FUN1, PIN_8MA, PIN_3_3V, 0 },
+};
+
+static struct pin_group mmc_pins = {
+	.pins		= mmc_pins_desc,
+	.nr_pins	= ARRAY_SIZE(mmc_pins_desc),
+};
+
+static int stmp3xxxmmc_get_wp(void)
+{
+	return gpio_get_value(PINID_PWM4);
+}
+
+static int stmp3xxxmmc_hw_init_ssp1(void)
+{
+	int ret;
+
+	ret = stmp3xxx_request_pin_group(&mmc_pins, "mmc");
+	if (ret)
+		goto out;
+
+	/* Configure write protect GPIO pin */
+	ret = gpio_request(PINID_PWM4, "mmc wp");
+	if (ret)
+		goto out_wp;
+
+	gpio_direction_input(PINID_PWM4);
+
+	/* Configure POWER pin as gpio to drive power to MMC slot */
+	ret = gpio_request(PINID_PWM3, "mmc power");
+	if (ret)
+		goto out_power;
+
+	gpio_direction_output(PINID_PWM3, 0);
+	mdelay(100);
+
+	return 0;
+
+out_power:
+	gpio_free(PINID_PWM4);
+out_wp:
+	stmp3xxx_release_pin_group(&mmc_pins, "mmc");
+out:
+	return ret;
+}
+
+static void stmp3xxxmmc_hw_release_ssp1(void)
+{
+	gpio_free(PINID_PWM3);
+	gpio_free(PINID_PWM4);
+	stmp3xxx_release_pin_group(&mmc_pins, "mmc");
+}
+
+static void stmp3xxxmmc_cmd_pullup_ssp1(int enable)
+{
+	stmp3xxx_pin_pullup(PINID_SSP1_CMD, enable, "mmc");
+}
+
+static unsigned long
+stmp3xxxmmc_setclock_ssp1(void __iomem *base, unsigned long hz)
+{
+	struct clk *ssp, *parent;
+	char *p;
+	long r;
+
+	ssp = clk_get(NULL, "ssp");
+
+	/* using SSP1, no timeout, clock rate 1 */
+	writel(BF(2, SSP_TIMING_CLOCK_DIVIDE) |
+	       BF(0xFFFF, SSP_TIMING_TIMEOUT),
+	       base + HW_SSP_TIMING);
+
+	p = (hz > 1000000) ? "io" : "osc_24M";
+	parent = clk_get(NULL, p);
+	clk_set_parent(ssp, parent);
+	r = clk_set_rate(ssp, 2 * hz / 1000);
+	clk_put(parent);
+	clk_put(ssp);
+
+	return hz;
+}
+
+static struct stmp3xxxmmc_platform_data mmc_data = {
+	.hw_init	= stmp3xxxmmc_hw_init_ssp1,
+	.hw_release	= stmp3xxxmmc_hw_release_ssp1,
+	.get_wp		= stmp3xxxmmc_get_wp,
+	.cmd_pullup	= stmp3xxxmmc_cmd_pullup_ssp1,
+	.setclock	= stmp3xxxmmc_setclock_ssp1,
+};
+
+
+static struct pin_group appuart_pins[] = {
+	[0] = {
+		.pins		= appuart_pins_0,
+		.nr_pins	= ARRAY_SIZE(appuart_pins_0),
+	},
+	[1] = {
+		.pins		= appuart_pins_1,
+		.nr_pins	= ARRAY_SIZE(appuart_pins_1),
+	},
+};
+
+static struct pin_desc ssp1_pins_desc[] = {
+	{ PINID_SSP1_SCK,	PIN_FUN1, PIN_8MA, PIN_3_3V, 0, },
+	{ PINID_SSP1_CMD,	PIN_FUN1, PIN_4MA, PIN_3_3V, 0, },
+	{ PINID_SSP1_DATA0,	PIN_FUN1, PIN_4MA, PIN_3_3V, 0, },
+	{ PINID_SSP1_DATA3,	PIN_FUN1, PIN_4MA, PIN_3_3V, 0, },
+};
+
+static struct pin_desc ssp2_pins_desc[] = {
+	{ PINID_GPMI_WRN,	PIN_FUN3, PIN_8MA, PIN_3_3V, 0, },
+	{ PINID_GPMI_RDY1,	PIN_FUN3, PIN_4MA, PIN_3_3V, 0, },
+	{ PINID_GPMI_D00,	PIN_FUN3, PIN_4MA, PIN_3_3V, 0, },
+	{ PINID_GPMI_D03,	PIN_FUN3, PIN_4MA, PIN_3_3V, 0, },
+};
+
+static struct pin_group ssp1_pins = {
+	.pins = ssp1_pins_desc,
+	.nr_pins = ARRAY_SIZE(ssp1_pins_desc),
+};
+
+static struct pin_group ssp2_pins = {
+	.pins = ssp1_pins_desc,
+	.nr_pins = ARRAY_SIZE(ssp2_pins_desc),
+};
+
+static struct pin_desc gpmi_pins_desc[] = {
+	{ PINID_GPMI_CE0N, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_CE1N, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GMPI_CE2N, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_CLE, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_ALE, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_WPN, PIN_FUN1, PIN_12MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_RDY1, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D00, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D01, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D02, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D03, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D04, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D05, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D06, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_D07, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_RDY0, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_RDY2, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_RDY3, PIN_FUN1, PIN_4MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_WRN, PIN_FUN1, PIN_12MA, PIN_3_3V, 0 },
+	{ PINID_GPMI_RDN, PIN_FUN1, PIN_12MA, PIN_3_3V, 0 },
+};
+
+static struct pin_group gpmi_pins = {
+	.pins		= gpmi_pins_desc,
+	.nr_pins	= ARRAY_SIZE(gpmi_pins_desc),
+};
+
+static struct mtd_partition gpmi_partitions[] = {
+	[0] = {
+		.name	= "boot",
+		.size	= 10 * SZ_1M,
+		.offset	= 0,
+	},
+	[1] = {
+		.name	= "data",
+		.size	= MTDPART_SIZ_FULL,
+		.offset	= MTDPART_OFS_APPEND,
+	},
+};
+
+static struct gpmi_platform_data gpmi_data = {
+	.pins = &gpmi_pins,
+	.nr_parts = ARRAY_SIZE(gpmi_partitions),
+	.parts = gpmi_partitions,
+	.part_types = { "cmdline", NULL },
+};
+
+static struct spi_board_info spi_board_info[] __initdata = {
+#if defined(CONFIG_ENC28J60) || defined(CONFIG_ENC28J60_MODULE)
+	{
+		.modalias       = "enc28j60",
+		.max_speed_hz   = 6 * 1000 * 1000,
+		.bus_num	= 1,
+		.chip_select    = 0,
+		.platform_data  = NULL,
+	},
+#endif
+};
+
 static void __init stmp378x_devb_init(void)
 {
 	stmp3xxx_pinmux_init(NR_REAL_IRQS);
@@ -64,9 +305,22 @@ static void __init stmp378x_devb_init(void)
 	stmp3xxx_init();
 
 	stmp3xxx_dbguart.dev.platform_data = dbguart_pins_control;
+	stmp3xxx_appuart.dev.platform_data = appuart_pins;
+	stmp3xxx_mmc.dev.platform_data = &mmc_data;
+	stmp3xxx_gpmi.dev.platform_data = &gpmi_data;
+	stmp3xxx_spi1.dev.platform_data = &ssp1_pins;
+	stmp3xxx_spi2.dev.platform_data = &ssp2_pins;
+	stmp378x_i2c.dev.platform_data = &i2c_pins;
+
+	/* register spi devices */
+	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
 
 	/* add board's devices */
 	platform_add_devices(devices, ARRAY_SIZE(devices));
+
+	/* add devices selected by command line ssp1= and ssp2= options */
+	stmp3xxx_ssp1_device_register();
+	stmp3xxx_ssp2_device_register();
 }
 
 MACHINE_START(STMP378X, "STMP378X")
diff --git a/arch/arm/mach-stmp37xx/stmp37xx_devb.c b/arch/arm/mach-stmp37xx/stmp37xx_devb.c
index adfbdc7..394f21a 100644
--- a/arch/arm/mach-stmp37xx/stmp37xx_devb.c
+++ b/arch/arm/mach-stmp37xx/stmp37xx_devb.c
@@ -33,6 +33,7 @@
  */
 static struct platform_device *stmp37xx_devb_devices[] = {
 	&stmp3xxx_dbguart,
+	&stmp3xxx_appuart,
 };
 
 static struct pin_desc dbguart_pins_0[] = {
@@ -40,6 +41,21 @@ static struct pin_desc dbguart_pins_0[] = {
 	{ PINID_PWM1, PIN_FUN3, },
 };
 
+struct pin_desc appuart_pins_0[] = {
+	{ PINID_UART2_CTS, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_UART2_RTS, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_UART2_RX, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+	{ PINID_UART2_TX, PIN_FUN1, PIN_4MA, PIN_1_8V, 0, },
+};
+
+static struct pin_group appuart_pins[] = {
+	[0] = {
+		.pins		= appuart_pins_0,
+		.nr_pins	= ARRAY_SIZE(appuart_pins_0),
+	},
+	/* 37xx has the only app uart */
+};
+
 static struct pin_group dbguart_pins[] = {
 	[0] = {
 		.pins		= dbguart_pins_0,
@@ -67,6 +83,8 @@ static void __init stmp37xx_devb_init(void)
 	stmp3xxx_init();
 
 	stmp3xxx_dbguart.dev.platform_data = dbguart_pins_control;
+	stmp3xxx_appuart.dev.platform_data = appuart_pins;
+
 	/* Add STMP37xx development board devices */
 	platform_add_devices(stmp37xx_devb_devices,
 			ARRAY_SIZE(stmp37xx_devb_devices));
diff --git a/arch/arm/plat-stmp3xxx/Makefile b/arch/arm/plat-stmp3xxx/Makefile
index b634800..31dd518 100644
--- a/arch/arm/plat-stmp3xxx/Makefile
+++ b/arch/arm/plat-stmp3xxx/Makefile
@@ -2,4 +2,4 @@
 # Makefile for the linux kernel.
 #
 # Object file lists.
-obj-y += core.o timer.o irq.o dma.o clock.o pinmux.o
+obj-y += core.o timer.o irq.o dma.o clock.o pinmux.o devices.o
diff --git a/arch/arm/plat-stmp3xxx/devices.c b/arch/arm/plat-stmp3xxx/devices.c
new file mode 100644
index 0000000..68fed4b
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/devices.c
@@ -0,0 +1,389 @@
+/*
+* Freescale STMP37XX/STMP378X platform devices
+*
+* Embedded Alley Solutions, Inc <source@embeddedalley.com>
+*
+* Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
+* Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+*/
+
+/*
+* The code contained herein is licensed under the GNU General Public
+* License. You may obtain a copy of the GNU General Public License
+* Version 2 or later at the following locations:
+*
+* http://www.opensource.org/licenses/gpl-license.html
+* http://www.gnu.org/copyleft/gpl.html
+*/
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+
+#include <mach/dma.h>
+#include <mach/platform.h>
+#include <mach/stmp3xxx.h>
+#include <mach/regs-lcdif.h>
+#include <mach/regs-uartapp.h>
+#include <mach/regs-gpmi.h>
+#include <mach/regs-usbctrl.h>
+#include <mach/regs-ssp.h>
+#include <mach/regs-rtc.h>
+
+static u64 common_dmamask = DMA_BIT_MASK(32);
+
+static struct resource appuart_resources[] = {
+	{
+		.start = IRQ_UARTAPP_INTERNAL,
+		.end = IRQ_UARTAPP_INTERNAL,
+		.flags = IORESOURCE_IRQ,
+	}, {
+		.start = IRQ_UARTAPP_RX_DMA,
+		.end = IRQ_UARTAPP_RX_DMA,
+		.flags = IORESOURCE_IRQ,
+	}, {
+		.start = IRQ_UARTAPP_TX_DMA,
+		.end = IRQ_UARTAPP_TX_DMA,
+		.flags = IORESOURCE_IRQ,
+	}, {
+		.start = REGS_UARTAPP1_PHYS,
+		.end = REGS_UARTAPP1_PHYS + REGS_UARTAPP_SIZE,
+		.flags = IORESOURCE_MEM,
+	}, {
+		/* Rx DMA channel */
+		.start = STMP3XXX_DMA(6, STMP3XXX_BUS_APBX),
+		.end = STMP3XXX_DMA(6, STMP3XXX_BUS_APBX),
+		.flags = IORESOURCE_DMA,
+	}, {
+		/* Tx DMA channel */
+		.start = STMP3XXX_DMA(7, STMP3XXX_BUS_APBX),
+		.end = STMP3XXX_DMA(7, STMP3XXX_BUS_APBX),
+		.flags = IORESOURCE_DMA,
+	},
+};
+
+struct platform_device stmp3xxx_appuart = {
+	.name = "stmp3xxx-appuart",
+	.id = 0,
+	.resource = appuart_resources,
+	.num_resources = ARRAY_SIZE(appuart_resources),
+	.dev = {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+};
+
+struct platform_device stmp3xxx_watchdog = {
+      .name   = "stmp3xxx_wdt",
+      .id     = -1,
+};
+
+static struct resource ts_resource[] = {
+	{
+		.flags  = IORESOURCE_IRQ,
+		.start  = IRQ_TOUCH_DETECT,
+		.end    = IRQ_TOUCH_DETECT,
+	}, {
+		.flags  = IORESOURCE_IRQ,
+		.start  = IRQ_LRADC_CH5,
+		.end    = IRQ_LRADC_CH5,
+	},
+};
+
+struct platform_device stmp3xxx_touchscreen = {
+	.name		= "stmp3xxx_ts",
+	.id		= -1,
+	.resource	= ts_resource,
+	.num_resources	= ARRAY_SIZE(ts_resource),
+};
+
+/*
+* Keypad device
+*/
+struct platform_device stmp3xxx_keyboard = {
+	.name		= "stmp3xxx-keyboard",
+	.id		= -1,
+};
+
+static struct resource gpmi_resources[] = {
+	{
+		.flags = IORESOURCE_MEM,
+		.start = REGS_GPMI_PHYS,
+		.end = REGS_GPMI_PHYS + REGS_GPMI_SIZE,
+	}, {
+		.flags = IORESOURCE_IRQ,
+		.start = IRQ_GPMI_DMA,
+		.end = IRQ_GPMI_DMA,
+	}, {
+		.flags = IORESOURCE_DMA,
+		.start = STMP3XXX_DMA(4, STMP3XXX_BUS_APBH),
+		.end = STMP3XXX_DMA(8, STMP3XXX_BUS_APBH),
+	},
+};
+
+struct platform_device stmp3xxx_gpmi = {
+	.name = "gpmi",
+	.id = -1,
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+	.resource = gpmi_resources,
+	.num_resources = ARRAY_SIZE(gpmi_resources),
+};
+
+static struct resource mmc1_resource[] = {
+	{
+		.flags	= IORESOURCE_MEM,
+		.start	= REGS_SSP1_PHYS,
+		.end	= REGS_SSP1_PHYS + REGS_SSP_SIZE,
+	}, {
+		.flags	= IORESOURCE_DMA,
+		.start	= STMP3XXX_DMA(1, STMP3XXX_BUS_APBH),
+		.end	= STMP3XXX_DMA(1, STMP3XXX_BUS_APBH),
+	}, {
+		.flags	= IORESOURCE_IRQ,
+		.start	= IRQ_SSP1_DMA,
+		.end	= IRQ_SSP1_DMA,
+	}, {
+		.flags	= IORESOURCE_IRQ,
+		.start	= IRQ_SSP_ERROR,
+		.end	= IRQ_SSP_ERROR,
+	},
+};
+
+struct platform_device stmp3xxx_mmc = {
+	.name	= "stmp3xxx-mmc",
+	.id	= 1,
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+	.resource = mmc1_resource,
+	.num_resources = ARRAY_SIZE(mmc1_resource),
+};
+
+static struct resource usb_resources[] = {
+	{
+		.start	= REGS_USBCTRL_PHYS,
+		.end	= REGS_USBCTRL_PHYS + SZ_4K,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_USB_CTRL,
+		.end	= IRQ_USB_CTRL,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device stmp3xxx_udc = {
+	.name		= "fsl-usb2-udc",
+	.id		= -1,
+	.dev		= {
+		.dma_mask		= &common_dmamask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+	},
+	.resource = usb_resources,
+	.num_resources = ARRAY_SIZE(usb_resources),
+};
+
+struct platform_device stmp3xxx_ehci = {
+	.name		= "fsl-ehci",
+	.id		= -1,
+	.dev		= {
+		.dma_mask		= &common_dmamask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+	},
+	.resource	= usb_resources,
+	.num_resources	= ARRAY_SIZE(usb_resources),
+};
+
+static struct resource rtc_resources[] = {
+	{
+		.start	= REGS_RTC_PHYS,
+		.end	= REGS_RTC_PHYS + REGS_RTC_SIZE,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_RTC_ALARM,
+		.end	= IRQ_RTC_ALARM,
+		.flags	= IORESOURCE_IRQ,
+	}, {
+		.start	= IRQ_RTC_1MSEC,
+		.end	= IRQ_RTC_1MSEC,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device stmp3xxx_rtc = {
+	.name		= "stmp3xxx-rtc",
+	.id		= -1,
+	.resource	= rtc_resources,
+	.num_resources	= ARRAY_SIZE(rtc_resources),
+};
+
+static struct resource ssp1_resources[] = {
+	{
+		.start	= REGS_SSP1_PHYS,
+		.end	= REGS_SSP1_PHYS + REGS_SSP_SIZE,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_SSP1_DMA,
+		.end	= IRQ_SSP1_DMA,
+		.flags	= IORESOURCE_IRQ,
+	}, {
+		.start	= STMP3XXX_DMA(1, STMP3XXX_BUS_APBH),
+		.end	= STMP3XXX_DMA(1, STMP3XXX_BUS_APBH),
+		.flags	= IORESOURCE_DMA,
+	},
+};
+
+static struct resource ssp2_resources[] = {
+	{
+		.start	= REGS_SSP2_PHYS,
+		.end	= REGS_SSP2_PHYS + REGS_SSP_SIZE,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_SSP2_DMA,
+		.end	= IRQ_SSP2_DMA,
+		.flags	= IORESOURCE_IRQ,
+	}, {
+		.start	= STMP3XXX_DMA(2, STMP3XXX_BUS_APBH),
+		.end	= STMP3XXX_DMA(2, STMP3XXX_BUS_APBH),
+		.flags	= IORESOURCE_DMA,
+	},
+};
+
+struct platform_device stmp3xxx_spi1 = {
+	.name	= "stmp3xxx_ssp",
+	.id	= 1,
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+	.resource = ssp1_resources,
+	.num_resources = ARRAY_SIZE(ssp1_resources),
+};
+
+struct platform_device stmp3xxx_spi2 = {
+	.name	= "stmp3xxx_ssp",
+	.id	= 2,
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+	.resource = ssp2_resources,
+	.num_resources = ARRAY_SIZE(ssp2_resources),
+};
+
+static struct resource fb_resource[] = {
+	{
+		.flags	= IORESOURCE_IRQ,
+		.start	= IRQ_LCDIF_DMA,
+		.end	= IRQ_LCDIF_DMA,
+	}, {
+		.flags	= IORESOURCE_IRQ,
+		.start	= IRQ_LCDIF_ERROR,
+		.end	= IRQ_LCDIF_ERROR,
+	}, {
+		.flags	= IORESOURCE_MEM,
+		.start	= REGS_LCDIF_PHYS,
+		.end	= REGS_LCDIF_PHYS + REGS_LCDIF_SIZE,
+	},
+};
+
+struct platform_device stmp3xxx_framebuffer = {
+	.name		= "stmp3xxx-fb",
+	.id		= -1,
+	.dev		= {
+		.dma_mask		= &common_dmamask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+	},
+	.num_resources	= ARRAY_SIZE(fb_resource),
+	.resource	= fb_resource,
+};
+
+#define CMDLINE_DEVICE_CHOOSE(name, dev1, dev2)			\
+	static char *cmdline_device_##name;			\
+	static int cmdline_device_##name##_setup(char *dev)	\
+	{							\
+		cmdline_device_##name = dev + 1;		\
+		return 0;					\
+	}							\
+	__setup(#name, cmdline_device_##name##_setup);		\
+	int stmp3xxx_##name##_device_register(void)		\
+	{							\
+		struct platform_device *d = NULL;		\
+		if (!cmdline_device_##name ||			\
+			!strcmp(cmdline_device_##name, #dev1))	\
+				d = &stmp3xxx_##dev1;		\
+		else if (!strcmp(cmdline_device_##name, #dev2))	\
+				d = &stmp3xxx_##dev2;		\
+		else						\
+			printk(KERN_ERR"Unknown %s assignment '%s'.\n",	\
+				#name, cmdline_device_##name);	\
+		return d ? platform_device_register(d) : -ENOENT;	\
+	}
+
+CMDLINE_DEVICE_CHOOSE(ssp1, mmc, spi1)
+CMDLINE_DEVICE_CHOOSE(ssp2, gpmi, spi2)
+
+struct platform_device stmp3xxx_backlight = {
+	.name		= "stmp3xxx-bl",
+	.id		= -1,
+};
+
+struct platform_device stmp3xxx_rotdec = {
+	.name	= "stmp3xxx-rotdec",
+	.id	= -1,
+};
+
+struct platform_device stmp3xxx_persistent = {
+	.name			= "stmp3xxx-persistent",
+	.id			= -1,
+};
+
+struct platform_device stmp3xxx_dcp_bootstream = {
+	.name			= "stmp3xxx-dcpboot",
+	.id			= -1,
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+};
+
+static struct resource dcp_resources[] = {
+	{
+		.start = IRQ_DCP_VMI,
+		.end = IRQ_DCP_VMI,
+		.flags = IORESOURCE_IRQ,
+	}, {
+		.start = IRQ_DCP,
+		.end = IRQ_DCP,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+struct platform_device stmp3xxx_dcp = {
+	.name			= "stmp3xxx-dcp",
+	.id			= -1,
+	.resource		= dcp_resources,
+	.num_resources		= ARRAY_SIZE(dcp_resources),
+	.dev	= {
+		.dma_mask	= &common_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+};
+
+static struct resource battery_resource[] = {
+	{
+		.flags  = IORESOURCE_IRQ,
+		.start  = IRQ_VDD5V,
+		.end    = IRQ_VDD5V,
+	},
+};
+
+struct platform_device stmp3xxx_battery = {
+	.name   = "stmp3xxx-battery",
+	.resource = battery_resource,
+	.num_resources = ARRAY_SIZE(battery_resource),
+};
diff --git a/arch/arm/plat-stmp3xxx/include/mach/gpmi.h b/arch/arm/plat-stmp3xxx/include/mach/gpmi.h
new file mode 100644
index 0000000..e166432
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/gpmi.h
@@ -0,0 +1,12 @@
+#ifndef __MACH_GPMI_H
+
+#include <linux/mtd/partitions.h>
+#include <mach/regs-gpmi.h>
+
+struct gpmi_platform_data {
+	void *pins;
+	int nr_parts;
+	struct mtd_partition *parts;
+	const char *part_types[];
+};
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/mmc.h b/arch/arm/plat-stmp3xxx/include/mach/mmc.h
new file mode 100644
index 0000000..ba81e15
--- /dev/null
+++ b/arch/arm/plat-stmp3xxx/include/mach/mmc.h
@@ -0,0 +1,14 @@
+#ifndef _MACH_MMC_H
+#define _MACH_MMC_H
+
+#include <mach/regs-ssp.h>
+
+struct stmp3xxxmmc_platform_data {
+	int (*get_wp)(void);
+	unsigned long (*setclock)(void __iomem *base, unsigned long);
+	void (*cmd_pullup)(int);
+	int  (*hw_init)(void);
+	void (*hw_release)(void);
+};
+
+#endif
diff --git a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h
index 78cf1be..2e300fe 100644
--- a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h
+++ b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h
@@ -25,7 +25,27 @@ extern struct sys_timer stmp3xxx_timer;
 void stmp3xxx_init_irq(struct irq_chip *chip);
 void stmp3xxx_init(void);
 int stmp3xxx_reset_block(void __iomem *hwreg, int just_enable);
-extern struct platform_device stmp3xxx_dbguart;
+extern struct platform_device stmp3xxx_dbguart,
+			      stmp3xxx_appuart,
+			      stmp3xxx_watchdog,
+			      stmp3xxx_touchscreen,
+			      stmp3xxx_keyboard,
+			      stmp3xxx_gpmi,
+			      stmp3xxx_mmc,
+			      stmp3xxx_udc,
+			      stmp3xxx_ehci,
+			      stmp3xxx_rtc,
+			      stmp3xxx_spi1,
+			      stmp3xxx_spi2,
+			      stmp3xxx_backlight,
+			      stmp3xxx_rotdec,
+			      stmp3xxx_dcp,
+			      stmp3xxx_dcp_bootstream,
+			      stmp3xxx_persistent,
+			      stmp3xxx_framebuffer,
+			      stmp3xxx_battery;
+int stmp3xxx_ssp1_device_register(void);
+int stmp3xxx_ssp2_device_register(void);
 
 struct pin_group;
 void stmp3xxx_release_pin_group(struct pin_group *pin_group, const char *label);
-- 
cgit v0.10.2


From 2cc3c559fb2fe8cecca82a517bc56e88b0c1effd Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 4 Jun 2009 09:23:50 -0400
Subject: Btrfs: set device->total_disk_bytes when adding new device

It was not being properly initialized, and so the size saved to
disk was not correct.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5f01dad..a6d35b0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1440,6 +1440,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	device->io_align = root->sectorsize;
 	device->sector_size = root->sectorsize;
 	device->total_bytes = i_size_read(bdev->bd_inode);
+	device->disk_total_bytes = device->total_bytes;
 	device->dev_root = root->fs_info->dev_root;
 	device->bdev = bdev;
 	device->in_fs_metadata = 1;
-- 
cgit v0.10.2


From 8fc0321f1ad0ffef969056dda91b453bbd7a494d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 15:19:47 +0200
Subject: perf_counter tools: Add color terminal output support

Add Git's color printing library to util/color.[ch].

Add it to perf report, with a trivial example to print high-overhead
entries in red, low-overhead entries in green.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 414399c..c9ec458 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -298,6 +298,7 @@ LIB_H += util/string.h
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
 LIB_H += util/symbol.h
+LIB_H += util/color.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
@@ -319,6 +320,7 @@ LIB_OBJS += util/usage.o
 LIB_OBJS += util/wrapper.o
 LIB_OBJS += util/sigchain.o
 LIB_OBJS += util/symbol.o
+LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
 
 BUILTIN_OBJS += builtin-help.o
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index e930b4e..7beedc6 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -9,6 +9,7 @@
 
 #include "util/util.h"
 
+#include "util/color.h"
 #include "util/list.h"
 #include "util/cache.h"
 #include "util/rbtree.h"
@@ -548,7 +549,19 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 	size_t ret;
 
 	if (total_samples) {
-		ret = fprintf(fp, "   %6.2f%%",
+		double percent = self->count * 100.0 / total_samples;
+		char *color = PERF_COLOR_NORMAL;
+
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (percent >= 5.0)
+			color = PERF_COLOR_RED;
+		if (percent < 0.5)
+			color = PERF_COLOR_GREEN;
+
+		ret = color_fprintf(fp, color, "   %6.2f%%",
 				(self->count * 100.0) / total_samples);
 	} else
 		ret = fprintf(fp, "%12d ", self->count);
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 548a8da..20e5b12 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -21,6 +21,7 @@
 #include "perf.h"
 
 #include "util/symbol.h"
+#include "util/color.h"
 #include "util/util.h"
 #include "util/rbtree.h"
 #include "util/parse-options.h"
@@ -253,7 +254,8 @@ static void print_sym_table(void)
 	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
 		struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
 		struct symbol *sym = (struct symbol *)(syme + 1);
-		float pcnt;
+		char *color = PERF_COLOR_NORMAL;
+		double pcnt;
 
 		if (++printed > print_entries || syme->snap_count < count_filter)
 			continue;
@@ -261,13 +263,22 @@ static void print_sym_table(void)
 		pcnt = 100.0 - (100.0 * ((sum_kevents - syme->snap_count) /
 					 sum_kevents));
 
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (pcnt >= 5.0)
+			color = PERF_COLOR_RED;
+		if (pcnt < 0.5)
+			color = PERF_COLOR_GREEN;
+
 		if (nr_counters == 1)
-			printf("%19.2f - %4.1f%% - %016llx : %s\n",
-				syme->weight, pcnt, sym->start, sym->name);
+			printf("%19.2f - ", syme->weight);
 		else
-			printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
-				syme->weight, syme->snap_count,
-				pcnt, sym->start, sym->name);
+			printf("%8.1f %10ld - ", syme->weight, syme->snap_count);
+
+		color_fprintf(stdout, color, "%4.1f%%", pcnt);
+		printf(" - %016llx : %s\n", sym->start, sym->name);
 	}
 
 	{
diff --git a/Documentation/perf_counter/util/color.c b/Documentation/perf_counter/util/color.c
new file mode 100644
index 0000000..a77975d
--- /dev/null
+++ b/Documentation/perf_counter/util/color.c
@@ -0,0 +1,230 @@
+#include "cache.h"
+#include "color.h"
+
+int perf_use_color_default = 0;
+
+static int parse_color(const char *name, int len)
+{
+	static const char * const color_names[] = {
+		"normal", "black", "red", "green", "yellow",
+		"blue", "magenta", "cyan", "white"
+	};
+	char *end;
+	int i;
+	for (i = 0; i < ARRAY_SIZE(color_names); i++) {
+		const char *str = color_names[i];
+		if (!strncasecmp(name, str, len) && !str[len])
+			return i - 1;
+	}
+	i = strtol(name, &end, 10);
+	if (end - name == len && i >= -1 && i <= 255)
+		return i;
+	return -2;
+}
+
+static int parse_attr(const char *name, int len)
+{
+	static const int attr_values[] = { 1, 2, 4, 5, 7 };
+	static const char * const attr_names[] = {
+		"bold", "dim", "ul", "blink", "reverse"
+	};
+	int i;
+	for (i = 0; i < ARRAY_SIZE(attr_names); i++) {
+		const char *str = attr_names[i];
+		if (!strncasecmp(name, str, len) && !str[len])
+			return attr_values[i];
+	}
+	return -1;
+}
+
+void color_parse(const char *value, const char *var, char *dst)
+{
+	color_parse_mem(value, strlen(value), var, dst);
+}
+
+void color_parse_mem(const char *value, int value_len, const char *var,
+		char *dst)
+{
+	const char *ptr = value;
+	int len = value_len;
+	int attr = -1;
+	int fg = -2;
+	int bg = -2;
+
+	if (!strncasecmp(value, "reset", len)) {
+		strcpy(dst, PERF_COLOR_RESET);
+		return;
+	}
+
+	/* [fg [bg]] [attr] */
+	while (len > 0) {
+		const char *word = ptr;
+		int val, wordlen = 0;
+
+		while (len > 0 && !isspace(word[wordlen])) {
+			wordlen++;
+			len--;
+		}
+
+		ptr = word + wordlen;
+		while (len > 0 && isspace(*ptr)) {
+			ptr++;
+			len--;
+		}
+
+		val = parse_color(word, wordlen);
+		if (val >= -1) {
+			if (fg == -2) {
+				fg = val;
+				continue;
+			}
+			if (bg == -2) {
+				bg = val;
+				continue;
+			}
+			goto bad;
+		}
+		val = parse_attr(word, wordlen);
+		if (val < 0 || attr != -1)
+			goto bad;
+		attr = val;
+	}
+
+	if (attr >= 0 || fg >= 0 || bg >= 0) {
+		int sep = 0;
+
+		*dst++ = '\033';
+		*dst++ = '[';
+		if (attr >= 0) {
+			*dst++ = '0' + attr;
+			sep++;
+		}
+		if (fg >= 0) {
+			if (sep++)
+				*dst++ = ';';
+			if (fg < 8) {
+				*dst++ = '3';
+				*dst++ = '0' + fg;
+			} else {
+				dst += sprintf(dst, "38;5;%d", fg);
+			}
+		}
+		if (bg >= 0) {
+			if (sep++)
+				*dst++ = ';';
+			if (bg < 8) {
+				*dst++ = '4';
+				*dst++ = '0' + bg;
+			} else {
+				dst += sprintf(dst, "48;5;%d", bg);
+			}
+		}
+		*dst++ = 'm';
+	}
+	*dst = 0;
+	return;
+bad:
+	die("bad color value '%.*s' for variable '%s'", value_len, value, var);
+}
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+	if (value) {
+		if (!strcasecmp(value, "never"))
+			return 0;
+		if (!strcasecmp(value, "always"))
+			return 1;
+		if (!strcasecmp(value, "auto"))
+			goto auto_color;
+	}
+
+	/* Missing or explicit false to turn off colorization */
+	if (!perf_config_bool(var, value))
+		return 0;
+
+	/* any normal truth value defaults to 'auto' */
+ auto_color:
+	if (stdout_is_tty < 0)
+		stdout_is_tty = isatty(1);
+	if (stdout_is_tty || (pager_in_use() && pager_use_color)) {
+		char *term = getenv("TERM");
+		if (term && strcmp(term, "dumb"))
+			return 1;
+	}
+	return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "color.ui")) {
+		perf_use_color_default = perf_config_colorbool(var, value, -1);
+		return 0;
+	}
+
+	return perf_default_config(var, value, cb);
+}
+
+static int color_vfprintf(FILE *fp, const char *color, const char *fmt,
+		va_list args, const char *trail)
+{
+	int r = 0;
+
+	if (*color)
+		r += fprintf(fp, "%s", color);
+	r += vfprintf(fp, fmt, args);
+	if (*color)
+		r += fprintf(fp, "%s", PERF_COLOR_RESET);
+	if (trail)
+		r += fprintf(fp, "%s", trail);
+	return r;
+}
+
+
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args, NULL);
+	va_end(args);
+	return r;
+}
+
+int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args, "\n");
+	va_end(args);
+	return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+		size_t count, const char *buf)
+{
+	if (!*color)
+		return fwrite(buf, count, 1, fp) != 1;
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+		if (p != buf && (fputs(color, fp) < 0 ||
+				fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
+				fputs(PERF_COLOR_RESET, fp) < 0))
+			return -1;
+		if (!p)
+			return 0;
+		if (fputc('\n', fp) < 0)
+			return -1;
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+	return 0;
+}
+
+
diff --git a/Documentation/perf_counter/util/color.h b/Documentation/perf_counter/util/color.h
new file mode 100644
index 0000000..5abfd37
--- /dev/null
+++ b/Documentation/perf_counter/util/color.h
@@ -0,0 +1,36 @@
+#ifndef COLOR_H
+#define COLOR_H
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL	""
+#define PERF_COLOR_RESET	"\033[m"
+#define PERF_COLOR_BOLD		"\033[1m"
+#define PERF_COLOR_RED		"\033[31m"
+#define PERF_COLOR_GREEN	"\033[32m"
+#define PERF_COLOR_YELLOW	"\033[33m"
+#define PERF_COLOR_BLUE		"\033[34m"
+#define PERF_COLOR_MAGENTA	"\033[35m"
+#define PERF_COLOR_CYAN		"\033[36m"
+#define PERF_COLOR_BG_RED	"\033[41m"
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+void color_parse(const char *value, const char *var, char *dst);
+void color_parse_mem(const char *value, int len, const char *var, char *dst);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+
+#endif /* COLOR_H */
diff --git a/Documentation/perf_counter/util/environment.c b/Documentation/perf_counter/util/environment.c
index 9b1c819..275b0ee 100644
--- a/Documentation/perf_counter/util/environment.c
+++ b/Documentation/perf_counter/util/environment.c
@@ -6,3 +6,4 @@
 #include "cache.h"
 
 const char *pager_program;
+int pager_use_color = 1;
-- 
cgit v0.10.2


From 13d0ab5ec29852a6925f612830fa9e822669ece6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 15:40:25 +0200
Subject: perf_counter tools: Dont output in color on !tty

Dont emit ASCII color characters if the terminal is not a tty,
such as when perf report gets redirected into a file.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/color.c b/Documentation/perf_counter/util/color.c
index a77975d..9a8c20c 100644
--- a/Documentation/perf_counter/util/color.c
+++ b/Documentation/perf_counter/util/color.c
@@ -1,7 +1,7 @@
 #include "cache.h"
 #include "color.h"
 
-int perf_use_color_default = 0;
+int perf_use_color_default = -1;
 
 static int parse_color(const char *name, int len)
 {
@@ -169,10 +169,20 @@ static int color_vfprintf(FILE *fp, const char *color, const char *fmt,
 {
 	int r = 0;
 
-	if (*color)
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(1) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
 		r += fprintf(fp, "%s", color);
 	r += vfprintf(fp, fmt, args);
-	if (*color)
+	if (perf_use_color_default && *color)
 		r += fprintf(fp, "%s", PERF_COLOR_RESET);
 	if (trail)
 		r += fprintf(fp, "%s", trail);
@@ -185,6 +195,7 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
 {
 	va_list args;
 	int r;
+
 	va_start(args, fmt);
 	r = color_vfprintf(fp, color, fmt, args, NULL);
 	va_end(args);
-- 
cgit v0.10.2


From bbe281fad65f2eda1792d70763663a1b729fc03c Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 4 Jun 2009 15:44:25 +0200
Subject: HID: hidraw -- fix comment about accepted devices

hidraw accepts any devices, no matter if the device has
already been claimed by other HID driver (hid-input, hidraw), and
this is intended to stay. Fix up the comment to reflect reality.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 00ccf4b..0c6639e 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -349,10 +349,7 @@ int hidraw_connect(struct hid_device *hid)
 	int minor, result;
 	struct hidraw *dev;
 
-	/* TODO currently we accept any HID device. This should later
-	 * probably be fixed to accept only those devices which provide
-	 * non-input applications
-	 */
+	/* we accept any HID device, no matter the applications */
 
 	dev = kzalloc(sizeof(struct hidraw), GFP_KERNEL);
 	if (!dev)
-- 
cgit v0.10.2


From 70fa9f2eadea7ca35bdcd89b20d555934593b40c Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 4 Jun 2009 15:48:38 +0200
Subject: HID: no more reinitializtion is needed in post_reset

No more reinitialization is needed in the post reset hook, remove
the FIXME comment.

While at it, clean up whitespaces in the immediate surrounding.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index ac8049b..76c4bbe 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1234,12 +1234,11 @@ static int hid_post_reset(struct usb_interface *intf)
 	struct hid_device *hid = usb_get_intfdata(intf);
 	struct usbhid_device *usbhid = hid->driver_data;
 	int status;
- 
+
 	spin_lock_irq(&usbhid->lock);
 	clear_bit(HID_RESET_PENDING, &usbhid->iofl);
 	spin_unlock_irq(&usbhid->lock);
 	hid_set_idle(dev, intf->cur_altsetting->desc.bInterfaceNumber, 0, 0);
-	/* FIXME: Any more reinitialization needed? */
 	status = hid_start_in(hid);
 	if (status < 0)
 		hid_io_error(hid);
@@ -1251,14 +1250,14 @@ static int hid_post_reset(struct usb_interface *intf)
 int usbhid_get_power(struct hid_device *hid)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
- 
+
 	return usb_autopm_get_interface(usbhid->intf);
 }
 
 void usbhid_put_power(struct hid_device *hid)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
- 
+
 	usb_autopm_put_interface(usbhid->intf);
 }
 
-- 
cgit v0.10.2


From edc52deac624e4641211a325c23da2a73b01a85d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 16:24:37 +0200
Subject: perf report: Bail out if there are unrecognized options/arguments

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 7beedc6..389ae25 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -1094,10 +1094,16 @@ int cmd_report(int argc, const char **argv, const char *prefix)
 
 	page_size = getpagesize();
 
-	parse_options(argc, argv, options, report_usage, 0);
+	argc = parse_options(argc, argv, options, report_usage, 0);
 
 	setup_sorting();
 
+	/*
+	 * Any (unrecognized) arguments left?
+	 */
+	if (argc)
+		usage_with_options(report_usage, options);
+
 	setup_pager();
 
 	return __cmd_report();
-- 
cgit v0.10.2


From 20c84e959ec11b1803d2b2832eef703d5fbe7f7b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Jun 2009 16:33:00 +0200
Subject: perf stat: Update help text

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
index a67d0e3..a340e7b 100644
--- a/Documentation/perf_counter/Documentation/perf-stat.txt
+++ b/Documentation/perf_counter/Documentation/perf-stat.txt
@@ -22,6 +22,7 @@ OPTIONS
 <command>...::
 	Any command you can specify in a shell.
 
+
 -e::
 --event=::
                              0:0: cpu-cycles          
@@ -45,6 +46,13 @@ OPTIONS
                              1:4: migrations          
                            rNNN: raw PMU events (eventsel+umask)
 
+-i::
+--inherit::
+        child tasks inherit counters
+-p::
+--pid=<pid>::
+        stat events on existing pid
+
 -a::
         system-wide collection
 
@@ -54,20 +62,20 @@ OPTIONS
 EXAMPLES
 --------
 
-$ perf stat sleep 1
+$ perf stat -- make -j
 
- Performance counter stats for 'sleep':
+ Performance counter stats for 'make -j':
 
-       0.678356  task clock ticks     (msecs)
-              7  context switches     (events)
-              4  CPU migrations       (events)
-            232  pagefaults           (events)
-        1810403  CPU cycles           (events)
-         946759  instructions         (events)
-          18952  cache references     (events)
-           4885  cache misses         (events)
+    8117.370256  task clock ticks     #      11.281 CPU utilization factor
+            678  context switches     #       0.000 M/sec
+            133  CPU migrations       #       0.000 M/sec
+         235724  pagefaults           #       0.029 M/sec
+    24821162526  CPU cycles           #    3057.784 M/sec
+    18687303457  instructions         #    2302.138 M/sec
+      172158895  cache references     #      21.209 M/sec
+       27075259  cache misses         #       3.335 M/sec
 
- Wall-clock time elapsed:  1001.252894 msecs
+ Wall-clock time elapsed:   719.554352 msecs
 
 SEE ALSO
 --------
-- 
cgit v0.10.2


From 60313ebed739b331e8e61079da27a11ee3b73a30 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 16:53:44 +0200
Subject: perf_counter: Add fork event

Create a fork event so that we can easily clone the comm and
dso maps without having to generate all those events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 37d5541..380247b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -277,6 +277,14 @@ enum perf_event_type {
 	PERF_EVENT_UNTHROTTLE		= 6,
 
 	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u32				pid, ppid;
+	 * };
+	 */
+	PERF_EVENT_FORK			= 7,
+
+	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
 	 *
@@ -618,6 +626,7 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
 extern void perf_counter_comm(struct task_struct *tsk);
+extern void perf_counter_fork(struct task_struct *tsk);
 
 extern void perf_counter_task_migration(struct task_struct *task, int cpu);
 
@@ -673,6 +682,7 @@ perf_counter_munmap(unsigned long addr, unsigned long len,
 		    unsigned long pgoff, struct file *file)		{ }
 
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
+static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
 static inline void perf_counter_task_migration(struct task_struct *task,
 					       int cpu)			{ }
diff --git a/kernel/fork.c b/kernel/fork.c
index b7d7a9f..f4466ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1412,12 +1412,12 @@ long do_fork(unsigned long clone_flags,
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
-		} else {
+		} else if (!(clone_flags & CLONE_VM)) {
 			/*
 			 * vfork will do an exec which will call
 			 * set_task_comm()
 			 */
-			perf_counter_comm(p);
+			perf_counter_fork(p);
 		}
 
 		audit_finish_fork(p);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0bb03f1..78c5862 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -40,9 +40,9 @@ static int perf_reserved_percpu __read_mostly;
 static int perf_overcommit __read_mostly = 1;
 
 static atomic_t nr_counters __read_mostly;
-static atomic_t nr_mmap_tracking __read_mostly;
-static atomic_t nr_munmap_tracking __read_mostly;
-static atomic_t nr_comm_tracking __read_mostly;
+static atomic_t nr_mmap_counters __read_mostly;
+static atomic_t nr_munmap_counters __read_mostly;
+static atomic_t nr_comm_counters __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
@@ -1447,11 +1447,11 @@ static void free_counter(struct perf_counter *counter)
 
 	atomic_dec(&nr_counters);
 	if (counter->attr.mmap)
-		atomic_dec(&nr_mmap_tracking);
+		atomic_dec(&nr_mmap_counters);
 	if (counter->attr.munmap)
-		atomic_dec(&nr_munmap_tracking);
+		atomic_dec(&nr_munmap_counters);
 	if (counter->attr.comm)
-		atomic_dec(&nr_comm_tracking);
+		atomic_dec(&nr_comm_counters);
 
 	if (counter->destroy)
 		counter->destroy(counter);
@@ -2476,6 +2476,105 @@ static void perf_counter_output(struct perf_counter *counter,
 }
 
 /*
+ * fork tracking
+ */
+
+struct perf_fork_event {
+	struct task_struct	*task;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				ppid;
+	} event;
+};
+
+static void perf_counter_fork_output(struct perf_counter *counter,
+				     struct perf_fork_event *fork_event)
+{
+	struct perf_output_handle handle;
+	int size = fork_event->event.header.size;
+	struct task_struct *task = fork_event->task;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	fork_event->event.pid = perf_counter_pid(counter, task);
+	fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+
+	perf_output_put(&handle, fork_event->event);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_fork_match(struct perf_counter *counter)
+{
+	if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
+				  struct perf_fork_event *fork_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_fork_match(counter))
+			perf_counter_fork_output(counter, fork_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_fork_event(struct perf_fork_event *fork_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_fork_ctx(ctx, fork_event);
+	rcu_read_unlock();
+}
+
+void perf_counter_fork(struct task_struct *task)
+{
+	struct perf_fork_event fork_event;
+
+	if (!atomic_read(&nr_comm_counters) &&
+	    !atomic_read(&nr_mmap_counters) &&
+	    !atomic_read(&nr_munmap_counters))
+		return;
+
+	fork_event = (struct perf_fork_event){
+		.task	= task,
+		.event  = {
+			.header = {
+				.type = PERF_EVENT_FORK,
+				.size = sizeof(fork_event.event),
+			},
+		},
+	};
+
+	perf_counter_fork_event(&fork_event);
+}
+
+/*
  * comm tracking
  */
 
@@ -2511,11 +2610,9 @@ static void perf_counter_comm_output(struct perf_counter *counter,
 	perf_output_end(&handle);
 }
 
-static int perf_counter_comm_match(struct perf_counter *counter,
-				   struct perf_comm_event *comm_event)
+static int perf_counter_comm_match(struct perf_counter *counter)
 {
-	if (counter->attr.comm &&
-	    comm_event->event.header.type == PERF_EVENT_COMM)
+	if (counter->attr.comm)
 		return 1;
 
 	return 0;
@@ -2531,7 +2628,7 @@ static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_comm_match(counter, comm_event))
+		if (perf_counter_comm_match(counter))
 			perf_counter_comm_output(counter, comm_event);
 	}
 	rcu_read_unlock();
@@ -2570,7 +2667,7 @@ void perf_counter_comm(struct task_struct *task)
 {
 	struct perf_comm_event comm_event;
 
-	if (!atomic_read(&nr_comm_tracking))
+	if (!atomic_read(&nr_comm_counters))
 		return;
 
 	comm_event = (struct perf_comm_event){
@@ -2708,7 +2805,7 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 {
 	struct perf_mmap_event mmap_event;
 
-	if (!atomic_read(&nr_mmap_tracking))
+	if (!atomic_read(&nr_mmap_counters))
 		return;
 
 	mmap_event = (struct perf_mmap_event){
@@ -2729,7 +2826,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 {
 	struct perf_mmap_event mmap_event;
 
-	if (!atomic_read(&nr_munmap_tracking))
+	if (!atomic_read(&nr_munmap_counters))
 		return;
 
 	mmap_event = (struct perf_mmap_event){
@@ -3427,11 +3524,11 @@ done:
 
 	atomic_inc(&nr_counters);
 	if (counter->attr.mmap)
-		atomic_inc(&nr_mmap_tracking);
+		atomic_inc(&nr_mmap_counters);
 	if (counter->attr.munmap)
-		atomic_inc(&nr_munmap_tracking);
+		atomic_inc(&nr_munmap_counters);
 	if (counter->attr.comm)
-		atomic_inc(&nr_comm_tracking);
+		atomic_inc(&nr_comm_counters);
 
 	return counter;
 }
-- 
cgit v0.10.2


From d99e9446200c1ffab28cb0e39b76c34a2bfafd06 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 17:08:58 +0200
Subject: perf_counter: Remove munmap stuff

In name of keeping it simple, only track mmap events. Userspace
will have to remove old overlapping maps when it encounters them.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 380247b..6ca403a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -148,11 +148,10 @@ struct perf_counter_attr {
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
 				mmap           :  1, /* include mmap data     */
-				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 53;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
@@ -246,7 +245,6 @@ enum perf_event_type {
 	 * };
 	 */
 	PERF_EVENT_MMAP			= 1,
-	PERF_EVENT_MUNMAP		= 2,
 
 	/*
 	 * struct {
@@ -622,9 +620,6 @@ extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 			      unsigned long pgoff, struct file *file);
 
-extern void perf_counter_munmap(unsigned long addr, unsigned long len,
-				unsigned long pgoff, struct file *file);
-
 extern void perf_counter_comm(struct task_struct *tsk);
 extern void perf_counter_fork(struct task_struct *tsk);
 
@@ -677,10 +672,6 @@ static inline void
 perf_counter_mmap(unsigned long addr, unsigned long len,
 		  unsigned long pgoff, struct file *file)		{ }
 
-static inline void
-perf_counter_munmap(unsigned long addr, unsigned long len,
-		    unsigned long pgoff, struct file *file)		{ }
-
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 78c5862..195712e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -41,7 +41,6 @@ static int perf_overcommit __read_mostly = 1;
 
 static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_counters __read_mostly;
-static atomic_t nr_munmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
@@ -1448,8 +1447,6 @@ static void free_counter(struct perf_counter *counter)
 	atomic_dec(&nr_counters);
 	if (counter->attr.mmap)
 		atomic_dec(&nr_mmap_counters);
-	if (counter->attr.munmap)
-		atomic_dec(&nr_munmap_counters);
 	if (counter->attr.comm)
 		atomic_dec(&nr_comm_counters);
 
@@ -2510,7 +2507,7 @@ static void perf_counter_fork_output(struct perf_counter *counter,
 
 static int perf_counter_fork_match(struct perf_counter *counter)
 {
-	if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap)
+	if (counter->attr.comm || counter->attr.mmap)
 		return 1;
 
 	return 0;
@@ -2557,8 +2554,7 @@ void perf_counter_fork(struct task_struct *task)
 	struct perf_fork_event fork_event;
 
 	if (!atomic_read(&nr_comm_counters) &&
-	    !atomic_read(&nr_mmap_counters) &&
-	    !atomic_read(&nr_munmap_counters))
+	    !atomic_read(&nr_mmap_counters))
 		return;
 
 	fork_event = (struct perf_fork_event){
@@ -2722,12 +2718,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 static int perf_counter_mmap_match(struct perf_counter *counter,
 				   struct perf_mmap_event *mmap_event)
 {
-	if (counter->attr.mmap &&
-	    mmap_event->event.header.type == PERF_EVENT_MMAP)
-		return 1;
-
-	if (counter->attr.munmap &&
-	    mmap_event->event.header.type == PERF_EVENT_MUNMAP)
+	if (counter->attr.mmap)
 		return 1;
 
 	return 0;
@@ -2821,27 +2812,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 	perf_counter_mmap_event(&mmap_event);
 }
 
-void perf_counter_munmap(unsigned long addr, unsigned long len,
-			 unsigned long pgoff, struct file *file)
-{
-	struct perf_mmap_event mmap_event;
-
-	if (!atomic_read(&nr_munmap_counters))
-		return;
-
-	mmap_event = (struct perf_mmap_event){
-		.file   = file,
-		.event  = {
-			.header = { .type = PERF_EVENT_MUNMAP, },
-			.start  = addr,
-			.len    = len,
-			.pgoff  = pgoff,
-		},
-	};
-
-	perf_counter_mmap_event(&mmap_event);
-}
-
 /*
  * Log sample_period changes so that analyzing tools can re-normalize the
  * event flow.
@@ -3525,8 +3495,6 @@ done:
 	atomic_inc(&nr_counters);
 	if (counter->attr.mmap)
 		atomic_inc(&nr_mmap_counters);
-	if (counter->attr.munmap)
-		atomic_inc(&nr_munmap_counters);
 	if (counter->attr.comm)
 		atomic_inc(&nr_comm_counters);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 2c1c2cb..6451ce2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1756,12 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 	do {
 		long nrpages = vma_pages(vma);
 
-		if (vma->vm_flags & VM_EXEC) {
-			perf_counter_munmap(vma->vm_start,
-					nrpages << PAGE_SHIFT,
-					vma->vm_pgoff, vma->vm_file);
-		}
-
 		mm->total_vm -= nrpages;
 		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
 		vma = remove_vma(vma);
-- 
cgit v0.10.2


From 62fc44536c14b5787531bac7417580fca54c88b4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 16:53:49 +0200
Subject: perf_counter tools: Use fork and remove munmap events

Use fork events to clone comm and map data and remove everything
munmap related

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 389ae25..5d19121 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -43,12 +43,6 @@ static int		full_paths;
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
 
-const char *perf_event_names[] = {
-	[PERF_EVENT_MMAP]   = " PERF_EVENT_MMAP",
-	[PERF_EVENT_MUNMAP] = " PERF_EVENT_MUNMAP",
-	[PERF_EVENT_COMM]   = " PERF_EVENT_COMM",
-};
-
 struct ip_event {
 	struct perf_event_header header;
 	__u64 ip;
@@ -70,11 +64,17 @@ struct comm_event {
 	char comm[16];
 };
 
+struct fork_event {
+	struct perf_event_header header;
+	__u32 pid, ppid;
+};
+
 typedef union event_union {
 	struct perf_event_header header;
 	struct ip_event ip;
 	struct mmap_event mmap;
 	struct comm_event comm;
+	struct fork_event fork;
 } event_t;
 
 static LIST_HEAD(dsos);
@@ -208,7 +208,31 @@ out_delete:
 	return NULL;
 }
 
-struct thread;
+static struct map *map__clone(struct map *self)
+{
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
+}
+
+static int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
 
 struct thread {
 	struct rb_node	 rb_node;
@@ -284,9 +308,39 @@ static struct thread *threads__findnew(pid_t pid)
 
 static void thread__insert_map(struct thread *self, struct map *map)
 {
+	struct map *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
+		if (map__overlap(pos, map)) {
+			list_del_init(&pos->node);
+			/* XXX leaks dsos */
+			free(pos);
+		}
+	}
+
 	list_add_tail(&map->node, &self->maps);
 }
 
+static int thread__fork(struct thread *self, struct thread *parent)
+{
+	struct map *map;
+
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(parent->comm);
+	if (!self->comm)
+		return -ENOMEM;
+
+	list_for_each_entry(map, &parent->maps, node) {
+		struct map *new = map__clone(map);
+		if (!new)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+
+	return 0;
+}
+
 static struct map *thread__find_map(struct thread *self, uint64_t ip)
 {
 	struct map *pos;
@@ -784,7 +838,11 @@ static void register_idle_thread(void)
 	}
 }
 
-static unsigned long total = 0, total_mmap = 0, total_comm = 0, total_unknown = 0;
+static unsigned long total = 0,
+		     total_mmap = 0,
+		     total_comm = 0,
+		     total_fork = 0,
+		     total_unknown = 0;
 
 static int
 process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
@@ -866,9 +924,10 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 	struct thread *thread = threads__findnew(event->mmap.pid);
 	struct map *map = map__new(&event->mmap);
 
-	dprintf("%p [%p]: PERF_EVENT_MMAP: [%p(%p) @ %p]: %s\n",
+	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
+		event->mmap.pid,
 		(void *)(long)event->mmap.start,
 		(void *)(long)event->mmap.len,
 		(void *)(long)event->mmap.pgoff,
@@ -906,6 +965,26 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 }
 
 static int
+process_fork_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->fork.pid);
+	struct thread *parent = threads__findnew(event->fork.ppid);
+
+	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->fork.pid, event->fork.ppid);
+
+	if (!thread || !parent || thread__fork(thread, parent)) {
+		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		return -1;
+	}
+	total_fork++;
+
+	return 0;
+}
+
+static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
@@ -918,10 +997,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	case PERF_EVENT_COMM:
 		return process_comm_event(event, offset, head);
 
+	case PERF_EVENT_FORK:
+		return process_fork_event(event, offset, head);
+
 	/*
 	 * We dont process them right now but they are fine:
 	 */
-	case PERF_EVENT_MUNMAP:
+
 	case PERF_EVENT_PERIOD:
 	case PERF_EVENT_THROTTLE:
 	case PERF_EVENT_UNTHROTTLE:
@@ -1038,6 +1120,7 @@ more:
 	dprintf("      IP events: %10ld\n", total);
 	dprintf("    mmap events: %10ld\n", total_mmap);
 	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf("    fork events: %10ld\n", total_fork);
 	dprintf(" unknown events: %10ld\n", total_unknown);
 
 	if (dump_trace)
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 20e5b12..31c00ba 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -75,8 +75,6 @@ static unsigned int		realtime_prio			=  0;
 static int			group				=  0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
-static int			use_mmap			= 0;
-static int			use_munmap			= 0;
 static int			freq				= 0;
 
 static char			*sym_filter;
@@ -527,19 +525,6 @@ static void mmap_read(struct mmap_data *md)
 		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
 			if (event->header.type & PERF_SAMPLE_IP)
 				process_event(event->ip.ip, md->counter);
-		} else {
-			switch (event->header.type) {
-				case PERF_EVENT_MMAP:
-				case PERF_EVENT_MUNMAP:
-					printf("%s: %Lu %Lu %Lu %s\n",
-							event->header.type == PERF_EVENT_MMAP
-							? "mmap" : "munmap",
-							event->mmap.start,
-							event->mmap.len,
-							event->mmap.pgoff,
-							event->mmap.filename);
-					break;
-			}
 		}
 	}
 
@@ -569,8 +554,6 @@ static int __cmd_top(void)
 			attr.config		= event_id[counter];
 			attr.sample_period	= event_count[counter];
 			attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-			attr.mmap		= use_mmap;
-			attr.munmap		= use_munmap;
 			attr.freq		= freq;
 
 			fd[i][counter] = sys_perf_counter_open(&attr, target_pid, cpu, group_fd, 0);
@@ -670,10 +653,6 @@ static const struct option options[] = {
 		    "only display symbols matchig this pattern"),
 	OPT_BOOLEAN('z', "zero", &group,
 		    "zero history across updates"),
-	OPT_BOOLEAN('M', "use-mmap", &use_mmap,
-		    "track mmap events"),
-	OPT_BOOLEAN('U', "use-munmap", &use_munmap,
-		    "track munmap events"),
 	OPT_INTEGER('F', "freq", &freq,
 		    "profile at this frequency"),
 	OPT_INTEGER('E', "entries", &print_entries,
-- 
cgit v0.10.2


From bbb0a4247aaf1eabbd6d87750eafe99c577920f7 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Fri, 16 Jan 2009 09:49:50 -0700
Subject: Document Reported-by in SubmittingPatches

Randy pointed out that the Reported-By tag should be documented with the
others in SubmittingPatches.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index f309d3c..6f29e29 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -405,7 +405,14 @@ person it names.  This tag documents that potentially interested parties
 have been included in the discussion
 
 
-14) Using Tested-by: and Reviewed-by:
+14) Using Reported-by:, Tested-by: and Reviewed-by:
+
+If this patch fixes a problem reported by somebody else, consider adding a
+Reported-by: tag to credit the reporter for their contribution.  Please
+note that this tag should not be added without the reporter's permission,
+especially if the problem was not reported in a public forum.  That said,
+if we diligently credit our bug reporters, they will, hopefully, be
+inspired to help us again in the future.
 
 A Tested-by: tag indicates that the patch has been successfully tested (in
 some environment) by the person named.  This tag informs maintainers that
-- 
cgit v0.10.2


From 5d98932ab0acb699dc56d9e252f056b9b2cdab25 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 21 Apr 2009 13:33:06 -0600
Subject: docs: Encourage better changelogs in the development process document

Add a couple of paragraphs to the "patch formatting" section on how patches
should be described.  This text is shamelessly cribbed from suggestions
posted by Rusty Russell.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>

diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index dd48132..f622c1e 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting
@@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been
 done.  When done properly, though, it is time well spent.
 
 
-5.4: PATCH FORMATTING
+5.4: PATCH FORMATTING AND CHANGELOGS
 
 So now you have a perfect series of patches for posting, but the work is
 not done quite yet.  Each patch needs to be formatted into a message which
@@ -146,8 +146,33 @@ that end, each patch will be composed of the following:
  - One or more tag lines, with, at a minimum, one Signed-off-by: line from
    the author of the patch.  Tags will be described in more detail below.
 
-The above three items should, normally, be the text used when committing
-the change to a revision control system.  They are followed by:
+The items above, together, form the changelog for the patch.  Writing good
+changelogs is a crucial but often-neglected art; it's worth spending
+another moment discussing this issue.  When writing a changelog, you should
+bear in mind that a number of different people will be reading your words.
+These include subsystem maintainers and reviewers who need to decide
+whether the patch should be included, distributors and other maintainers
+trying to decide whether a patch should be backported to other kernels, bug
+hunters wondering whether the patch is responsible for a problem they are
+chasing, users who want to know how the kernel has changed, and more.  A
+good changelog conveys the needed information to all of these people in the
+most direct and concise way possible.
+
+To that end, the summary line should describe the effects of and motivation
+for the change as well as possible given the one-line constraint.  The
+detailed description can then amplify on those topics and provide any
+needed additional information.  If the patch fixes a bug, cite the commit
+which introduced the bug if possible.  If a problem is associated with
+specific log or compiler output, include that output to help others
+searching for a solution to the same problem.  If the change is meant to
+support other changes coming in later patch, say so.  If internal APIs are
+changed, detail those changes and how other developers should respond.  In
+general, the more you can put yourself into the shoes of everybody who will
+be reading your changelog, the better that changelog (and the kernel as a
+whole) will be.
+
+Needless to say, the changelog should be the text used when committing the
+change to a revision control system.  It will be followed by:
 
  - The patch itself, in the unified ("-u") patch format.  Using the "-p"
    option to diff will associate function names with changes, making the
-- 
cgit v0.10.2


From 5801da1b2f1207da21271ffd6768cd40a6c7f1c4 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 4 Jun 2009 16:26:50 +0200
Subject: SubmittingPatches: fix typo

Fix typo.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 6f29e29..71b6da2 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -451,7 +451,7 @@ offer a Reviewed-by tag for a patch.  This tag serves to give credit to
 reviewers and to inform maintainers of the degree of review which has been
 done on the patch.  Reviewed-by: tags, when supplied by reviewers known to
 understand the subject area and to perform thorough reviews, will normally
-increase the liklihood of your patch getting into the kernel.
+increase the likelihood of your patch getting into the kernel.
 
 
 15) The canonical patch format
-- 
cgit v0.10.2


From 2ae19acaa50a09c1099956efb895c0aca74ab050 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 16 Apr 2009 07:44:45 -0400
Subject: Documentation: Add "how to write a good patch summary" to
 SubmittingPatches

Unfortunately many patch submissions are arriving with painfully poor
patch descriptions.   As a result of the discussion on LKML:

      http://lkml.org/lkml/2009/4/15/296

explain how to submit a better patch description, in the (perhaps
vain) hope that maintainers won't end up having to rewrite the git
commit logs as often as they do today.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 71b6da2..6c45683 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -91,6 +91,10 @@ Be as specific as possible.  The WORST descriptions possible include
 things like "update driver X", "bug fix for driver X", or "this patch
 includes updates for subsystem X.  Please apply."
 
+The maintainer will thank you if you write your patch description in a
+form which can be easily pulled into Linux's source code management
+system, git, as a "commit log".  See #15, below.
+
 If your description starts to get long, that's a sign that you probably
 need to split up your patch.  See #3, next.
 
@@ -492,12 +496,33 @@ phrase" should not be a filename.  Do not use the same "summary
 phrase" for every patch in a whole patch series (where a "patch
 series" is an ordered sequence of multiple, related patches).
 
-Bear in mind that the "summary phrase" of your email becomes
-a globally-unique identifier for that patch.  It propagates
-all the way into the git changelog.  The "summary phrase" may
-later be used in developer discussions which refer to the patch.
-People will want to google for the "summary phrase" to read
-discussion regarding that patch.
+Bear in mind that the "summary phrase" of your email becomes a
+globally-unique identifier for that patch.  It propagates all the way
+into the git changelog.  The "summary phrase" may later be used in
+developer discussions which refer to the patch.  People will want to
+google for the "summary phrase" to read discussion regarding that
+patch.  It will also be the only thing that people may quickly see
+when, two or three months later, they are going through perhaps
+thousands of patches using tools such as "gitk" or "git log
+--oneline".
+
+For these reasons, the "summary" must be no more than 70-75
+characters, and it must describe both what the patch changes, as well
+as why the patch might be necessary.  It is challenging to be both
+succinct and descriptive, but that is what a well-written summary
+should do.
+
+The "summary phrase" may be prefixed by tags enclosed in square
+brackets: "Subject: [PATCH tag] <summary phrase>".  The tags are not
+considered part of the summary phrase, but describe how the patch
+should be treated.  Common tags might include a version descriptor if
+the multiple versions of the patch have been sent out in response to
+comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for
+comments.  If there are four patches in a patch series the individual
+patches may be numbered like this: 1/4, 2/4, 3/4, 4/4.  This assures
+that developers understand the order in which the patches should be
+applied and that they have reviewed or applied all of the patches in
+the patch series.
 
 A couple of example Subjects:
 
@@ -517,19 +542,31 @@ the patch author in the changelog.
 The explanation body will be committed to the permanent source
 changelog, so should make sense to a competent reader who has long
 since forgotten the immediate details of the discussion that might
-have led to this patch.
+have led to this patch.  Including symptoms of the failure which the
+patch addresses (kernel log messages, oops messages, etc.) is
+especially useful for people who might be searching the commit logs
+looking for the applicable patch.  If a patch fixes a compile failure,
+it may not be necessary to include _all_ of the compile failures; just
+enough that it is likely that someone searching for the patch can find
+it.  As in the "summary phrase", it is important to be both succinct as
+well as descriptive.
 
 The "---" marker line serves the essential purpose of marking for patch
 handling tools where the changelog message ends.
 
 One good use for the additional comments after the "---" marker is for
-a diffstat, to show what files have changed, and the number of inserted
-and deleted lines per file.  A diffstat is especially useful on bigger
-patches.  Other comments relevant only to the moment or the maintainer,
-not suitable for the permanent changelog, should also go here.
-Use diffstat options "-p 1 -w 70" so that filenames are listed from the
-top of the kernel source tree and don't use too much horizontal space
-(easily fit in 80 columns, maybe with some indentation).
+a diffstat, to show what files have changed, and the number of
+inserted and deleted lines per file.  A diffstat is especially useful
+on bigger patches.  Other comments relevant only to the moment or the
+maintainer, not suitable for the permanent changelog, should also go
+here.  A good example of such comments might be "patch changelogs"
+which describe what has changed between the v1 and v2 version of the
+patch.
+
+If you are going to include a diffstat after the "---" marker, please
+use diffstat options "-p 1 -w 70" so that filenames are listed from
+the top of the kernel source tree and don't use too much horizontal
+space (easily fit in 80 columns, maybe with some indentation).
 
 See more details on the proper patch format in the following
 references.
-- 
cgit v0.10.2


From 5926a295bb78272b3f648f62febecd19a1b6a6ca Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Thu, 4 Jun 2009 17:43:04 +0100
Subject: [ARM] 5541/1: serial/amba-pl011.c: add support for the modified port
 found in Nomadik

The Nomadik 8815 SoC has a slightly modified version of the PL011 block.
The patch uses the different ID value as a key to select a vendor
structure that is used to keep track of the differences, as suggested
by Russell King.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Acked-by: Andrea Gallo <andrea.gallo@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 4cfa1eb..8c5bda2 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -70,6 +70,23 @@ struct uart_amba_port {
 	struct clk		*clk;
 	unsigned int		im;	/* interrupt mask */
 	unsigned int		old_status;
+	unsigned int		ifls;	/* vendor-specific */
+};
+
+/* There is by now at least one vendor with differing details, so handle it */
+struct vendor_data {
+	unsigned int		ifls;
+	unsigned int		fifosize;
+};
+
+static struct vendor_data vendor_arm = {
+	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
+	.fifosize		= 16,
+};
+
+static struct vendor_data vendor_st = {
+	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
+	.fifosize		= 64,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -360,8 +377,7 @@ static int pl011_startup(struct uart_port *port)
 	if (retval)
 		goto clk_dis;
 
-	writew(UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
-	       uap->port.membase + UART011_IFLS);
+	writew(uap->ifls, uap->port.membase + UART011_IFLS);
 
 	/*
 	 * Provoke TX FIFO interrupt into asserting.
@@ -732,6 +748,7 @@ static struct uart_driver amba_reg = {
 static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct uart_amba_port *uap;
+	struct vendor_data *vendor = id->data;
 	void __iomem *base;
 	int i, ret;
 
@@ -762,12 +779,13 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 		goto unmap;
 	}
 
+	uap->ifls = vendor->ifls;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
 	uap->port.iotype = UPIO_MEM;
 	uap->port.irq = dev->irq[0];
-	uap->port.fifosize = 16;
+	uap->port.fifosize = vendor->fifosize;
 	uap->port.ops = &amba_pl011_pops;
 	uap->port.flags = UPF_BOOT_AUTOCONF;
 	uap->port.line = i;
@@ -812,6 +830,12 @@ static struct amba_id pl011_ids[] __initdata = {
 	{
 		.id	= 0x00041011,
 		.mask	= 0x000fffff,
+		.data	= &vendor_arm,
+	},
+	{
+		.id	= 0x00380802,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_st,
 	},
 	{ 0, 0 },
 };
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 48ee32a..42949e2 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -114,6 +114,9 @@
 #define UART011_IFLS_TX4_8	(2 << 0)
 #define UART011_IFLS_TX6_8	(3 << 0)
 #define UART011_IFLS_TX7_8	(4 << 0)
+/* special values for ST vendor with deeper fifo */
+#define UART011_IFLS_RX_HALF	(5 << 3)
+#define UART011_IFLS_TX_HALF	(5 << 0)
 
 #define UART011_OEIM		(1 << 10)	/* overrun error interrupt mask */
 #define UART011_BEIM		(1 << 9)	/* break error interrupt mask */
-- 
cgit v0.10.2


From c0683039207226afcffbe0fbf6a1caaee77a37b0 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 3 Jun 2009 17:43:14 +0100
Subject: [ARM] 5536/1: Move clk_add_alias() to arch/arm/common/clkdev.c

This can be used for other arm platforms too as discussed
on the linux-arm-kernel list.

Also check the return value with IS_ERR and return PTR_ERR
as suggested by Russell King.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c
index 5589444..f37afd9 100644
--- a/arch/arm/common/clkdev.c
+++ b/arch/arm/common/clkdev.c
@@ -135,6 +135,24 @@ struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
 }
 EXPORT_SYMBOL(clkdev_alloc);
 
+int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
+	struct device *dev)
+{
+	struct clk *r = clk_get(dev, id);
+	struct clk_lookup *l;
+
+	if (IS_ERR(r))
+		return PTR_ERR(r);
+
+	l = clkdev_alloc(r, alias, alias_dev_name);
+	clk_put(r);
+	if (!l)
+		return -ENODEV;
+	clkdev_add(l);
+	return 0;
+}
+EXPORT_SYMBOL(clk_add_alias);
+
 /*
  * clkdev_drop - remove a clock dynamically allocated
  */
diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c
index db52d2c..49ae382 100644
--- a/arch/arm/mach-pxa/clock.c
+++ b/arch/arm/mach-pxa/clock.c
@@ -86,20 +86,3 @@ void clks_register(struct clk_lookup *clks, size_t num)
 	for (i = 0; i < num; i++)
 		clkdev_add(&clks[i]);
 }
-
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-	struct device *dev)
-{
-	struct clk *r = clk_get(dev, id);
-	struct clk_lookup *l;
-
-	if (!r)
-		return -ENODEV;
-
-	l = clkdev_alloc(r, alias, alias_dev_name);
-	clk_put(r);
-	if (!l)
-		return -ENODEV;
-	clkdev_add(l);
-	return 0;
-}
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 1db9bbf..1d37f42 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -142,4 +142,17 @@ struct clk *clk_get_parent(struct clk *clk);
  */
 struct clk *clk_get_sys(const char *dev_id, const char *con_id);
 
+/**
+ * clk_add_alias - add a new clock alias
+ * @alias: name for clock alias
+ * @alias_dev_name: device name
+ * @id: platform specific clock name
+ * @dev: device
+ *
+ * Allows using generic clock names for drivers by adding a new alias.
+ * Assumes clkdev, see clkdev.h for more info.
+ */
+int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
+			struct device *dev);
+
 #endif
-- 
cgit v0.10.2


From 7666c17e2b0986a079da46122d8658544416c2cf Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 1 Jun 2009 14:27:26 +0100
Subject: [ARM] 5535/1: U300 Makefile.boot

The Makefile.boot file for the U300 port. This will compile the
kernel for different ZRELADDR depending on the location of
physical RAM in the chosen configuration.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-u300/Makefile.boot b/arch/arm/mach-u300/Makefile.boot
new file mode 100644
index 0000000..6fbfc6e
--- /dev/null
+++ b/arch/arm/mach-u300/Makefile.boot
@@ -0,0 +1,15 @@
+# Note: the following conditions must always be true:
+#   ZRELADDR == virt_to_phys(TEXTADDR)
+#   PARAMS_PHYS must be within 4MB of ZRELADDR
+#   INITRD_PHYS must be in RAM
+
+ifdef CONFIG_MACH_U300_SINGLE_RAM
+     zreladdr-y	:= 0x28E08000
+  params_phys-y	:= 0x28E00100
+else
+     zreladdr-y	:= 0x48008000
+  params_phys-y	:= 0x48000100
+endif
+
+# This isn't used.
+#initrd_phys-y	:= 0x29800000
-- 
cgit v0.10.2


From 2cb7878a3a4341d1faa208de962d66f0817d3e7a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 3 Jun 2009 14:52:24 +0930
Subject: lguest: fix 'unhandled trap 13' with CONFIG_CC_STACKPROTECTOR

We don't set up the canary; let's disable stack protector on boot.c so
we can get into lguest_init, then set it up.  As a side effect,
switch_to_new_gdt() sets up %fs for us properly too.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
index 27f0c9e..94e0e54 100644
--- a/arch/x86/lguest/Makefile
+++ b/arch/x86/lguest/Makefile
@@ -1 +1,2 @@
 obj-y		:= i386_head.o boot.o
+CFLAGS_boot.o	:= $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ca7ec44..33a93b4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -67,6 +67,7 @@
 #include <asm/mce.h>
 #include <asm/io.h>
 #include <asm/i387.h>
+#include <asm/stackprotector.h>
 #include <asm/reboot.h>		/* for struct machine_ops */
 
 /*G:010 Welcome to the Guest!
@@ -1088,13 +1089,21 @@ __init void lguest_init(void)
 	 * lguest_init() where the rest of the fairly chaotic boot setup
 	 * occurs. */
 
+	/* The stack protector is a weird thing where gcc places a canary
+	 * value on the stack and then checks it on return.  This file is
+	 * compiled with -fno-stack-protector it, so we got this far without
+	 * problems.  The value of the canary is kept at offset 20 from the
+	 * %gs register, so we need to set that up before calling C functions
+	 * in other files. */
+	setup_stack_canary_segment(0);
+	/* We could just call load_stack_canary_segment(), but we might as
+	 * call switch_to_new_gdt() which loads the whole table and sets up
+	 * the per-cpu segment descriptor register %fs as well. */
+	switch_to_new_gdt(0);
+
 	/* As described in head_32.S, we map the first 128M of memory. */
 	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
 
-	/* Load the %fs segment register (the per-cpu segment register) with
-	 * the normal data segment to get through booting. */
-	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
-
 	/* The Host<->Guest Switcher lives at the top of our address space, and
 	 * the Host told us how big it is when we made LGUEST_INIT hypercall:
 	 * it put the answer in lguest_data.reserve_mem  */
-- 
cgit v0.10.2


From 44fb5511638938a2c37c895abc14df648ffc07e9 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 4 Jun 2009 15:34:51 -0400
Subject: Btrfs: Fix oops and use after free during space balancing

The btrfs allocator uses list_for_each to walk the available block
groups when searching for free blocks.  It starts off with a hint
to help find the best block group for a given allocation.

The hint is resolved into a block group, but we don't properly check
to make sure the block group we find isn't in the middle of being
freed due to filesystem shrinking or balancing.  If it is being
freed, the list pointers in it are bogus and can't be trusted.  But,
the code happily goes along and uses them in the list_for_each loop,
leading to all kinds of fun.

The fix used here is to check to make sure the block group we find really
is on the list before we use it.  list_del_init is used when removing
it from the list, so we can do a proper check.

The allocation clustering code has a similar bug where it will trust
the block group in the current free space cluster.  If our allocation
flags have changed (going from single spindle dup to raid1 for example)
because the drives in the FS have changed, we're not allowed to use
the old block group any more.

The fix used here is to check the current cluster against the
current allocation flags.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e2c7c7..35af933 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2622,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 						       search_start);
 		if (block_group && block_group_bits(block_group, data)) {
 			down_read(&space_info->groups_sem);
-			goto have_block_group;
+			if (list_empty(&block_group->list) ||
+			    block_group->ro) {
+				/*
+				 * someone is removing this block group,
+				 * we can't jump into the have_block_group
+				 * target because our list pointers are not
+				 * valid
+				 */
+				btrfs_put_block_group(block_group);
+				up_read(&space_info->groups_sem);
+			} else
+				goto have_block_group;
 		} else if (block_group) {
 			btrfs_put_block_group(block_group);
 		}
@@ -2656,6 +2667,13 @@ have_block_group:
 			 * people trying to start a new cluster
 			 */
 			spin_lock(&last_ptr->refill_lock);
+			if (last_ptr->block_group &&
+			    (last_ptr->block_group->ro ||
+			    !block_group_bits(last_ptr->block_group, data))) {
+				offset = 0;
+				goto refill_cluster;
+			}
+
 			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
 						 num_bytes, search_start);
 			if (offset) {
@@ -2681,10 +2699,17 @@ have_block_group:
 
 				last_ptr_loop = 1;
 				search_start = block_group->key.objectid;
+				/*
+				 * we know this block group is properly
+				 * in the list because
+				 * btrfs_remove_block_group, drops the
+				 * cluster before it removes the block
+				 * group from the list
+				 */
 				goto have_block_group;
 			}
 			spin_unlock(&last_ptr->lock);
-
+refill_cluster:
 			/*
 			 * this cluster didn't work out, free it and
 			 * start over
@@ -5968,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_path *path;
 	struct btrfs_block_group_cache *block_group;
+	struct btrfs_free_cluster *cluster;
 	struct btrfs_key key;
 	int ret;
 
@@ -5979,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
 	memcpy(&key, &block_group->key, sizeof(key));
 
+	/* make sure this block group isn't part of an allocation cluster */
+	cluster = &root->fs_info->data_alloc_cluster;
+	spin_lock(&cluster->refill_lock);
+	btrfs_return_cluster_to_free_space(block_group, cluster);
+	spin_unlock(&cluster->refill_lock);
+
+	/*
+	 * make sure this block group isn't part of a metadata
+	 * allocation cluster
+	 */
+	cluster = &root->fs_info->meta_alloc_cluster;
+	spin_lock(&cluster->refill_lock);
+	btrfs_return_cluster_to_free_space(block_group, cluster);
+	spin_unlock(&cluster->refill_lock);
+
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
@@ -5988,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	spin_unlock(&root->fs_info->block_group_cache_lock);
 	btrfs_remove_free_space_cache(block_group);
 	down_write(&block_group->space_info->groups_sem);
-	list_del(&block_group->list);
+	/*
+	 * we must use list_del_init so people can check to see if they
+	 * are still on the list after taking the semaphore
+	 */
+	list_del_init(&block_group->list);
 	up_write(&block_group->space_info->groups_sem);
 
 	spin_lock(&block_group->space_info->lock);
-- 
cgit v0.10.2


From 0f5486b5c71a831a713ce356d8d06822e3c7c379 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Jun 2009 20:48:04 +0200
Subject: perf_counter: Sleep before refresh using poll in perf top

perf top is refreshed every delay_secs the thread runs in such
loop:

while (sleep(delay_secs)) {
	print_sym_table();
}

At the end of print_sym_table(), poll is used without sleep delay
to check if we have something from stdin.

It means that this check is done only every delay_secs, which can
be higher that 2 secs if the user defined a custom refresh rate.

We can drop sleep() here and directly use poll to wait between
refresh periods, so that the reaction after the user stops perf top
after typing "Enter" is immediate and doesn't suffer from the
delay_secs latency.

Nb: poll doesn't add any overhead that can parasite perf top measures
since it sleeps the entire timeout here.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1244141284-7507-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 31c00ba..28cbde4 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -204,7 +204,7 @@ static void print_sym_table(void)
 			list_remove_active_sym(syme);
 	}
 
-	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
+	puts(CONSOLE_CLEAR);
 
 	printf(
 "------------------------------------------------------------------------------\n");
@@ -278,23 +278,21 @@ static void print_sym_table(void)
 		color_fprintf(stdout, color, "%4.1f%%", pcnt);
 		printf(" - %016llx : %s\n", sym->start, sym->name);
 	}
-
-	{
-		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
-
-		if (poll(&stdin_poll, 1, 0) == 1) {
-			printf("key pressed - exiting.\n");
-			exit(0);
-		}
-	}
 }
 
 static void *display_thread(void *arg)
 {
+	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+	int delay_msecs = delay_secs * 1000;
+
 	printf("PerfTop refresh period: %d seconds\n", delay_secs);
 
-	while (!sleep(delay_secs))
+	do {
 		print_sym_table();
+	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
+
+	printf("key pressed - exiting.\n");
+	exit(0);
 
 	return NULL;
 }
-- 
cgit v0.10.2


From 9ac995457b2a148ed9bb8860e8b7cb869327b102 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 4 Jun 2009 13:54:00 -0300
Subject: perf report: Add -vvv to print the list of threads and its mmaps

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 5d19121..1a1028d 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -234,6 +234,13 @@ static int map__overlap(struct map *l, struct map *r)
 	return 0;
 }
 
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %lx-%lx %lx %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+
 struct thread {
 	struct rb_node	 rb_node;
 	struct list_head maps;
@@ -264,6 +271,18 @@ static int thread__set_comm(struct thread *self, const char *comm)
 	return self->comm ? 0 : -ENOMEM;
 }
 
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+
 static struct rb_root threads;
 static struct thread *last_match;
 
@@ -355,6 +374,20 @@ static struct map *thread__find_map(struct thread *self, uint64_t ip)
 	return NULL;
 }
 
+static size_t threads__fprintf(FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
 /*
  * histogram, sorted on item, collects counts
  */
@@ -1126,6 +1159,9 @@ more:
 	if (dump_trace)
 		return 0;
 
+	if (verbose >= 3)
+		threads__fprintf(stdout);
+
 	if (verbose >= 2)
 		dsos__fprintf(stdout);
 
-- 
cgit v0.10.2


From 172124e220f1854acc99ee394671781b8b5e2120 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 4 Jun 2009 22:34:44 +0200
Subject: Revert "block: implement blkdev_readpages"

This reverts commit db2dbb12dc47a50c7a4c5678f526014063e486f6.

It apparently causes problems with partition table read-ahead
on archs with large page sizes. Until that problem is diagnosed
further, just drop the readpages support on block devices.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index a29b4dc..2dfc6cd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -331,12 +331,6 @@ static int blkdev_readpage(struct file * file, struct page * page)
 	return block_read_full_page(page, blkdev_get_block);
 }
 
-static int blkdev_readpages(struct file *file, struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages)
-{
-	return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
-}
-
 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
@@ -1405,7 +1399,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
-	.readpages	= blkdev_readpages,
 	.writepage	= blkdev_writepage,
 	.sync_page	= block_sync_page,
 	.write_begin	= blkdev_write_begin,
-- 
cgit v0.10.2


From 76a0f40fd6eff1bce3b91925cea7587b3399fe80 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Jun 2009 22:15:58 +0200
Subject: perf_counter tools: Fix warn_unused_result warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix warnings for return values that we don't care about:

 util/quote.c:222: attention : ignoring return value of ‘fwrite’, declared with attribute warn_unused_result
 util/quote.c:235: attention : ignoring return value of ‘fwrite’, declared with attribute warn_unused_result
 util/quote.c: In function ‘write_name_quotedpfx’:
 util/quote.c:290: attention : ignoring return value of ‘fwrite’, declared with attribute warn_unused_result

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1244146558-8635-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/quote.c b/Documentation/perf_counter/util/quote.c
index 7a49fcf..f18c521 100644
--- a/Documentation/perf_counter/util/quote.c
+++ b/Documentation/perf_counter/util/quote.c
@@ -201,8 +201,9 @@ static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
 	} while (0)
 #define EMITBUF(s, l)                           \
 	do {                                        \
+		int __ret;				\
 		if (sb) strbuf_add(sb, (s), (l));       \
-		if (fp) fwrite((s), (l), 1, fp);        \
+		if (fp) __ret = fwrite((s), (l), 1, fp);        \
 		count += (l);                           \
 	} while (0)
 
@@ -287,7 +288,9 @@ extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
 		quote_c_style(name, NULL, fp, 1);
 		fputc('"', fp);
 	} else {
-		fwrite(pfx, pfxlen, 1, fp);
+		int ret;
+
+		ret = fwrite(pfx, pfxlen, 1, fp);
 		fputs(name, fp);
 	}
 	fputc(terminator, fp);
-- 
cgit v0.10.2


From 03f5d8bcf094a5e3b501bd2ae1553656efa8d1be Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 9 Jun 2009 00:17:05 -0400
Subject: ext4: Get rid of EXTEND_DISKSIZE flag of ext4_get_blocks_handle()

Get rid of EXTEND_DISKSIZE flag of ext4_get_blocks_handle(). This
seems to be a relict from some old days and setting disksize in this
function does not make much sense.  Currently it was set only by
ext4_getblk().  Since the parameter has some effect only if create ==
1, it is easy to check by grepping through the sources that the three
callers which end up calling ext4_getblk() with create == 1
(ext4_append, ext4_quota_write, ext4_mkdir) do the right thing and set
disksize themselves.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4311cc8..59657ff 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -323,15 +323,13 @@ struct ext4_new_group_data {
 #define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
 #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
 						 EXT4_GET_BLOCKS_CREATE)
-	/* Update the ext4_inode_info i_disksize field */
-#define EXT4_GET_BLOCKS_EXTEND_DISKSIZE		0x0004
 	/* Caller is from the delayed allocation writeout path,
 	   so set the magic i_delalloc_reserve_flag after taking the 
 	   inode allocation semaphore for */
-#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0008
+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
 	/* Call ext4_da_update_reserve_space() after successfully 
 	   allocating the blocks */
-#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE	0x0010
+#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE	0x0008
 
 
 /*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d4e99e9..9c35a7b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2779,7 +2779,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
 	struct ext4_allocation_request ar;
-	loff_t disksize;
 
 	__clear_bit(BH_New, &bh_result->b_state);
 	ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2969,14 +2968,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	newblock = ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-	if (flags & EXT4_GET_BLOCKS_EXTEND_DISKSIZE) {
-		disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
-		if (disksize > i_size_read(inode))
-			disksize = i_size_read(inode);
-		if (disksize > EXT4_I(inode)->i_disksize)
-			EXT4_I(inode)->i_disksize = disksize;
-	}
-
 	set_buffer_new(bh_result);
 
 	/* Cache only when it is _not_ an uninitialized extent */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 14c00ff..17ed0d2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -933,11 +933,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	int count = 0;
 	ext4_fsblk_t first_block = 0;
-	loff_t disksize;
-
 
 	J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
 	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
@@ -1003,19 +1000,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
 	if (!err)
 		err = ext4_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
-	/*
-	 * i_disksize growing is protected by i_data_sem.  Don't forget to
-	 * protect it if you're about to implement concurrent
-	 * ext4_get_block() -bzzz
-	*/
-	if (!err && (flags & EXT4_GET_BLOCKS_EXTEND_DISKSIZE)) {
-		disksize = ((loff_t) iblock + count) << inode->i_blkbits;
-		if (disksize > i_size_read(inode))
-			disksize = i_size_read(inode);
-		if (disksize > ei->i_disksize)
-			ei->i_disksize = disksize;
-	}
-	if (err)
+	else 
 		goto cleanup;
 
 	set_buffer_new(bh_result);
@@ -1321,7 +1306,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head dummy;
 	int fatal = 0, err;
-	int flags = EXT4_GET_BLOCKS_EXTEND_DISKSIZE;
+	int flags = 0;
 
 	J_ASSERT(handle != NULL || create == 0);
 
@@ -2153,9 +2138,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 	}
 
 	/*
-	 * Update on-disk size along with block allocation we don't
-	 * use EXT4_GET_BLOCKS_EXTEND_DISKSIZE as size may change
-	 * within already allocated block -bzzz
+	 * Update on-disk size along with block allocation.
 	 */
 	disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
 	if (disksize > i_size_read(mpd->inode))
-- 
cgit v0.10.2


From b31e15527a9bb71b6a11a425d17ce139a62f5af5 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 4 Jun 2009 17:36:36 -0400
Subject: ext4: Change all super.c messages to print the device

This patch changes ext4 super.c to include the device name with all
warning/error messages, by using a new utility function ext4_msg.
It's a rather large patch, but very mechanic. I left debug printks
alone.

This is a straightforward port of a patch which Andi Kleen did for
ext3.

Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 59657ff..cc7d5ed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1401,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_warning(struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
+extern void ext4_msg(struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
 				const char *, const char *, ...)
 	__attribute__ ((format (printf, 4, 5)));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0a97b1a..c191d0f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -306,7 +306,7 @@ static void ext4_handle_error(struct super_block *sb)
 			jbd2_journal_abort(journal, -EIO);
 	}
 	if (test_opt(sb, ERRORS_RO)) {
-		printk(KERN_CRIT "Remounting filesystem read-only\n");
+		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 		sb->s_flags |= MS_RDONLY;
 	}
 	ext4_commit_super(sb, 1);
@@ -399,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function,
 {
 	va_list args;
 
-	printk(KERN_CRIT "ext4_abort called.\n");
-
 	va_start(args, fmt);
 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 	vprintk(fmt, args);
@@ -413,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function,
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	printk(KERN_CRIT "Remounting filesystem read-only\n");
+	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
 	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -421,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function,
 		jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
+void ext4_msg (struct super_block * sb, const char *prefix,
+		   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+}
+
 void ext4_warning(struct super_block *sb, const char *function,
 		  const char *fmt, ...)
 {
@@ -499,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
 /*
  * Open the external journal device
  */
-static struct block_device *ext4_blkdev_get(dev_t dev)
+static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
@@ -510,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev)
 	return bdev;
 
 fail:
-	printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
+	ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 			__bdevname(dev, b), PTR_ERR(bdev));
 	return NULL;
 }
@@ -546,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 {
 	struct list_head *l;
 
-	printk(KERN_ERR "sb orphan head is %d\n",
-	       le32_to_cpu(sbi->s_es->s_last_orphan));
+	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
+		 le32_to_cpu(sbi->s_es->s_last_orphan));
 
 	printk(KERN_ERR "sb_info orphan list:\n");
 	list_for_each(l, &sbi->s_orphan) {
@@ -678,8 +688,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 static void ext4_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
-		printk("EXT4 Inode %p: orphan list check failed!\n",
-			EXT4_I(inode));
+		ext4_msg(inode->i_sb, KERN_ERR,
+			 "Inode %lu (%p): orphan list check failed!",
+			 inode->i_ino, EXT4_I(inode));
 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 				EXT4_I(inode), sizeof(struct ext4_inode_info),
 				true);
@@ -1239,8 +1250,7 @@ static int parse_options(char *options, struct super_block *sb,
 #else
 		case Opt_user_xattr:
 		case Opt_nouser_xattr:
-			printk(KERN_ERR "EXT4 (no)user_xattr options "
-			       "not supported\n");
+			ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
 			break;
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1253,8 +1263,7 @@ static int parse_options(char *options, struct super_block *sb,
 #else
 		case Opt_acl:
 		case Opt_noacl:
-			printk(KERN_ERR "EXT4 (no)acl options "
-			       "not supported\n");
+			ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
 			break;
 #endif
 		case Opt_journal_update:
@@ -1264,16 +1273,16 @@ static int parse_options(char *options, struct super_block *sb,
 			   user to specify an existing inode to be the
 			   journal file. */
 			if (is_remount) {
-				printk(KERN_ERR "EXT4-fs: cannot specify "
-				       "journal on remount\n");
+				ext4_msg(sb, KERN_ERR,
+					 "Cannot specify journal on remount");
 				return 0;
 			}
 			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
 			break;
 		case Opt_journal_dev:
 			if (is_remount) {
-				printk(KERN_ERR "EXT4-fs: cannot specify "
-				       "journal on remount\n");
+				ext4_msg(sb, KERN_ERR,
+					"Cannot specify journal on remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option))
@@ -1327,9 +1336,8 @@ static int parse_options(char *options, struct super_block *sb,
 			if (is_remount) {
 				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
 						!= data_opt) {
-					printk(KERN_ERR
-						"EXT4-fs: cannot change data "
-						"mode on remount\n");
+					ext4_msg(sb, KERN_ERR,
+						"Cannot change data mode on remount");
 					return 0;
 				}
 			} else {
@@ -1359,31 +1367,31 @@ static int parse_options(char *options, struct super_block *sb,
 set_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    !sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR
-				       "EXT4-fs: Cannot change journaled "
-				       "quota options when quota turned on.\n");
+				ext4_msg(sb, KERN_ERR,
+				       "Cannot change journaled "
+				       "quota options when quota turned on");
 				return 0;
 			}
 			qname = match_strdup(&args[0]);
 			if (!qname) {
-				printk(KERN_ERR
-					"EXT4-fs: not enough memory for "
-					"storing quotafile name.\n");
+				ext4_msg(sb, KERN_ERR,
+					"Not enough memory for "
+					"storing quotafile name");
 				return 0;
 			}
 			if (sbi->s_qf_names[qtype] &&
 			    strcmp(sbi->s_qf_names[qtype], qname)) {
-				printk(KERN_ERR
-					"EXT4-fs: %s quota file already "
-					"specified.\n", QTYPE2NAME(qtype));
+				ext4_msg(sb, KERN_ERR,
+					"%s quota file already "
+					"specified", QTYPE2NAME(qtype));
 				kfree(qname);
 				return 0;
 			}
 			sbi->s_qf_names[qtype] = qname;
 			if (strchr(sbi->s_qf_names[qtype], '/')) {
-				printk(KERN_ERR
-					"EXT4-fs: quotafile must be on "
-					"filesystem root.\n");
+				ext4_msg(sb, KERN_ERR,
+					"quotafile must be on "
+					"filesystem root");
 				kfree(sbi->s_qf_names[qtype]);
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
@@ -1398,9 +1406,9 @@ set_qf_name:
 clear_qf_name:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_qf_names[qtype]) {
-				printk(KERN_ERR "EXT4-fs: Cannot change "
+				ext4_msg(sb, KERN_ERR, "Cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on");
 				return 0;
 			}
 			/*
@@ -1417,9 +1425,9 @@ clear_qf_name:
 set_qf_format:
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != qfmt) {
-				printk(KERN_ERR "EXT4-fs: Cannot change "
+				ext4_msg(sb, KERN_ERR, "Cannot change "
 					"journaled quota options when "
-					"quota turned on.\n");
+					"quota turned on");
 				return 0;
 			}
 			sbi->s_jquota_fmt = qfmt;
@@ -1435,8 +1443,8 @@ set_qf_format:
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_loaded(sb)) {
-				printk(KERN_ERR "EXT4-fs: Cannot change quota "
-					"options when quota turned on.\n");
+				ext4_msg(sb, KERN_ERR, "Cannot change quota "
+					"options when quota turned on");
 				return 0;
 			}
 			clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1447,8 +1455,8 @@ set_qf_format:
 		case Opt_quota:
 		case Opt_usrquota:
 		case Opt_grpquota:
-			printk(KERN_ERR
-				"EXT4-fs: quota options not supported.\n");
+			ext4_msg(sb, KERN_ERR,
+				"quota options not supported");
 			break;
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -1456,9 +1464,8 @@ set_qf_format:
 		case Opt_offgrpjquota:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
-			printk(KERN_ERR
-				"EXT4-fs: journaled quota options not "
-				"supported.\n");
+			ext4_msg(sb, KERN_ERR,
+				"journaled quota options not supported");
 			break;
 		case Opt_noquota:
 			break;
@@ -1483,8 +1490,9 @@ set_qf_format:
 			break;
 		case Opt_resize:
 			if (!is_remount) {
-				printk("EXT4-fs: resize option only available "
-					"for remount\n");
+				ext4_msg(sb, KERN_ERR,
+					"resize option only available "
+					"for remount");
 				return 0;
 			}
 			if (match_int(&args[0], &option) != 0)
@@ -1526,8 +1534,9 @@ set_qf_format:
 			if (option < 0 || option > (1 << 30))
 				return 0;
 			if (!is_power_of_2(option)) {
-				printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
-				       " must be a power of 2\n");
+				ext4_msg(sb, KERN_ERR,
+					 "EXT4-fs: inode_readahead_blks"
+					 " must be a power of 2");
 				return 0;
 			}
 			sbi->s_inode_readahead_blks = option;
@@ -1554,9 +1563,9 @@ set_qf_format:
 				set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 			break;
 		default:
-			printk(KERN_ERR
-			       "EXT4-fs: Unrecognized mount option \"%s\" "
-			       "or missing value\n", p);
+			ext4_msg(sb, KERN_ERR,
+			       "Unrecognized mount option \"%s\" "
+			       "or missing value", p);
 			return 0;
 		}
 	}
@@ -1574,21 +1583,21 @@ set_qf_format:
 				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
 		    (sbi->s_qf_names[GRPQUOTA] &&
 				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
-			printk(KERN_ERR "EXT4-fs: old and new quota "
-					"format mixing.\n");
+			ext4_msg(sb, KERN_ERR, "old and new quota "
+					"format mixing");
 			return 0;
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journaled quota format "
-					"not specified.\n");
+			ext4_msg(sb, KERN_ERR, "journaled quota format "
+					"not specified");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journaled quota format "
+			ext4_msg(sb, KERN_ERR, "journaled quota format "
 					"specified with no journaling "
-					"enabled.\n");
+					"enabled");
 			return 0;
 		}
 	}
@@ -1603,31 +1612,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	int res = 0;
 
 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
-		printk(KERN_ERR "EXT4-fs warning: revision level too high, "
-		       "forcing read-only mode\n");
+		ext4_msg(sb, KERN_ERR, "revision level too high, "
+			 "forcing read-only mode");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
-		printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
+			 "running e2fsck is recommended");
 	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: mounting fs with errors, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: mounting fs with errors, "
+			 "running e2fsck is recommended");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: maximal mount count reached, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: maximal mount count reached, "
+			 "running e2fsck is recommended");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: checktime reached, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+			 "warning: checktime reached, "
+			 "running e2fsck is recommended");
 	if (!sbi->s_journal)
 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
@@ -1649,11 +1658,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 			sbi->s_mount_opt);
 
 	if (EXT4_SB(sb)->s_journal) {
-		printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
-		       sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+		ext4_msg(sb, KERN_INFO, "%s journal on %s",
+		       EXT4_SB(sb)->s_journal->j_inode ? "internal" :
 		       "external", EXT4_SB(sb)->s_journal->j_devname);
 	} else {
-		printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+		ext4_msg(sb, KERN_INFO, "no journal");
 	}
 	return res;
 }
@@ -1688,8 +1697,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
 			memset(sbi->s_flex_groups, 0, size);
 	}
 	if (sbi->s_flex_groups == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory for "
-				"%u flex groups\n", flex_group_count);
+		ext4_msg(sb, KERN_ERR, "not enough memory for "
+				"%u flex groups", flex_group_count);
 		goto failed;
 	}
 
@@ -1775,32 +1784,32 @@ static int ext4_check_descriptors(struct super_block *sb)
 
 		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Block bitmap for group %u not in group "
-			       "(block %llu)!\n", i, block_bitmap);
+			       "(block %llu)!", i, block_bitmap);
 			return 0;
 		}
 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode bitmap for group %u not in group "
-			       "(block %llu)!\n", i, inode_bitmap);
+			       "(block %llu)!", i, inode_bitmap);
 			return 0;
 		}
 		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode table for group %u not in group "
-			       "(block %llu)!\n", i, inode_table);
+			       "(block %llu)!", i, inode_table);
 			return 0;
 		}
 		ext4_lock_group(sb, i);
 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-			       "Checksum for group %u failed (%u!=%u)\n",
-			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-			       gdp)), le16_to_cpu(gdp->bg_checksum));
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Checksum for group %u failed (%u!=%u)",
+				 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+				     gdp)), le16_to_cpu(gdp->bg_checksum));
 			if (!(sb->s_flags & MS_RDONLY)) {
 				ext4_unlock_group(sb, i);
 				return 0;
@@ -1847,8 +1856,8 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 	}
 
 	if (bdev_read_only(sb->s_bdev)) {
-		printk(KERN_ERR "EXT4-fs: write access "
-			"unavailable, skipping orphan cleanup.\n");
+		ext4_msg(sb, KERN_ERR, "write access "
+			"unavailable, skipping orphan cleanup");
 		return;
 	}
 
@@ -1862,8 +1871,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 	}
 
 	if (s_flags & MS_RDONLY) {
-		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
-		       sb->s_id);
+		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
 		sb->s_flags &= ~MS_RDONLY;
 	}
 #ifdef CONFIG_QUOTA
@@ -1874,9 +1882,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 		if (EXT4_SB(sb)->s_qf_names[i]) {
 			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
-				printk(KERN_ERR
-					"EXT4-fs: Cannot turn on journaled "
-					"quota: error %d\n", ret);
+				ext4_msg(sb, KERN_ERR,
+					"Cannot turn on journaled "
+					"quota: error %d", ret);
 		}
 	}
 #endif
@@ -1893,16 +1901,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 		vfs_dq_init(inode);
 		if (inode->i_nlink) {
-			printk(KERN_DEBUG
-				"%s: truncating inode %lu to %lld bytes\n",
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: truncating inode %lu to %lld bytes",
 				__func__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 				  inode->i_ino, inode->i_size);
 			ext4_truncate(inode);
 			nr_truncates++;
 		} else {
-			printk(KERN_DEBUG
-				"%s: deleting unreferenced inode %lu\n",
+			ext4_msg(sb, KERN_DEBUG,
+				"%s: deleting unreferenced inode %lu",
 				__func__, inode->i_ino);
 			jbd_debug(2, "deleting unreferenced inode %lu\n",
 				  inode->i_ino);
@@ -1914,11 +1922,11 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
 
 	if (nr_orphans)
-		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
-		       sb->s_id, PLURAL(nr_orphans));
+		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
+		       PLURAL(nr_orphans));
 	if (nr_truncates)
-		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
-		       sb->s_id, PLURAL(nr_truncates));
+		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
+		       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
 	for (i = 0; i < MAXQUOTAS; i++) {
@@ -2307,7 +2315,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
 	if (!blocksize) {
-		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
+		ext4_msg(sb, KERN_ERR, "unable to set blocksize");
 		goto out_fail;
 	}
 
@@ -2323,7 +2331,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (!(bh = sb_bread(sb, logical_sb_block))) {
-		printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
+		ext4_msg(sb, KERN_ERR, "unable to read superblock");
 		goto out_fail;
 	}
 	/*
@@ -2393,9 +2401,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-		printk(KERN_WARNING
-		       "EXT4-fs warning: feature flags set on rev 0 fs, "
-		       "running e2fsck is recommended\n");
+		ext4_msg(sb, KERN_WARNING,
+		       "feature flags set on rev 0 fs, "
+		       "running e2fsck is recommended");
 
 	/*
 	 * Check feature flags regardless of the revision level, since we
@@ -2404,16 +2412,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 */
 	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
 	if (features) {
-		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
-		       "unsupported optional features (%x).\n", sb->s_id,
+		ext4_msg(sb, KERN_ERR,
+			"Couldn't mount because of "
+			"unsupported optional features (%x)",
 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
 			~EXT4_FEATURE_INCOMPAT_SUPP));
 		goto failed_mount;
 	}
 	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
 	if (!(sb->s_flags & MS_RDONLY) && features) {
-		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
-		       "unsupported optional features (%x).\n", sb->s_id,
+		ext4_msg(sb, KERN_ERR,
+			"Couldn't mount RDWR because of "
+			"unsupported optional features (%x)",
 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
 			~EXT4_FEATURE_RO_COMPAT_SUPP));
 		goto failed_mount;
@@ -2427,9 +2437,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		 */
 		if (sizeof(root->i_blocks) < sizeof(u64) &&
 				!(sb->s_flags & MS_RDONLY)) {
-			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
+			ext4_msg(sb, KERN_ERR, "Filesystem with huge "
 					"files cannot be mounted read-write "
-					"without CONFIG_LBD.\n", sb->s_id);
+					"without CONFIG_LBD");
 			goto failed_mount;
 		}
 	}
@@ -2437,16 +2447,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
 	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
 	    blocksize > EXT4_MAX_BLOCK_SIZE) {
-		printk(KERN_ERR
-		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
-		       blocksize, sb->s_id);
+		ext4_msg(sb, KERN_ERR,
+		       "Unsupported filesystem blocksize %d", blocksize);
 		goto failed_mount;
 	}
 
 	if (sb->s_blocksize != blocksize) {
 		/* Validate the filesystem blocksize */
 		if (!sb_set_blocksize(sb, blocksize)) {
-			printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
+			ext4_msg(sb, KERN_ERR, "bad block size %d",
 					blocksize);
 			goto failed_mount;
 		}
@@ -2456,15 +2465,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		offset = do_div(logical_sb_block, blocksize);
 		bh = sb_bread(sb, logical_sb_block);
 		if (!bh) {
-			printk(KERN_ERR
-			       "EXT4-fs: Can't read superblock on 2nd try.\n");
+			ext4_msg(sb, KERN_ERR,
+			       "Can't read superblock on 2nd try");
 			goto failed_mount;
 		}
 		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
-			printk(KERN_ERR
-			       "EXT4-fs: Magic mismatch, very weird !\n");
+			ext4_msg(sb, KERN_ERR,
+			       "Magic mismatch, very weird!");
 			goto failed_mount;
 		}
 	}
@@ -2482,8 +2491,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unsupported inode size: %d\n",
+			ext4_msg(sb, KERN_ERR,
+			       "unsupported inode size: %d",
 			       sbi->s_inode_size);
 			goto failed_mount;
 		}
@@ -2496,8 +2505,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
 		    !is_power_of_2(sbi->s_desc_size)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unsupported descriptor size %lu\n",
+			ext4_msg(sb, KERN_ERR,
+			       "unsupported descriptor size %lu",
 			       sbi->s_desc_size);
 			goto failed_mount;
 		}
@@ -2537,25 +2546,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
-		printk(KERN_ERR
-		       "EXT4-fs: #blocks per group too big: %lu\n",
+		ext4_msg(sb, KERN_ERR,
+		       "#blocks per group too big: %lu",
 		       sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
-		printk(KERN_ERR
-		       "EXT4-fs: #inodes per group too big: %lu\n",
+		ext4_msg(sb, KERN_ERR,
+		       "#inodes per group too big: %lu",
 		       sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
 	if (ext4_blocks_count(es) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to mount safely\n", sb->s_id);
+		ext4_msg(sb, KERN_ERR, "filesystem"
+			" too large to mount safely");
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
-					"enabled\n");
+			ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
 		goto failed_mount;
 	}
 
@@ -2565,8 +2573,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/* check blocks count against device size */
 	blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
 	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
-		printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
-		       "exceeds size of device (%llu blocks)\n",
+		ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
+		       "exceeds size of device (%llu blocks)",
 		       ext4_blocks_count(es), blocks_count);
 		goto failed_mount;
 	}
@@ -2576,10 +2584,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 * of the filesystem.
 	 */
 	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-		printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
-		       "block %u is beyond end of filesystem (%llu)\n",
-		       le32_to_cpu(es->s_first_data_block),
-		       ext4_blocks_count(es));
+                ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
+			 "block %u is beyond end of filesystem (%llu)",
+			 le32_to_cpu(es->s_first_data_block),
+			 ext4_blocks_count(es));
 		goto failed_mount;
 	}
 	blocks_count = (ext4_blocks_count(es) -
@@ -2587,9 +2595,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
 	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
-		printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
+		ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
 		       "(block count %llu, first data block %u, "
-		       "blocks per group %lu)\n", sbi->s_groups_count,
+		       "blocks per group %lu)", sbi->s_groups_count,
 		       ext4_blocks_count(es),
 		       le32_to_cpu(es->s_first_data_block),
 		       EXT4_BLOCKS_PER_GROUP(sb));
@@ -2601,7 +2609,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk(KERN_ERR "EXT4-fs: not enough memory\n");
+		ext4_msg(sb, KERN_ERR, "not enough memory");
 		goto failed_mount;
 	}
 
@@ -2616,21 +2624,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		block = descriptor_loc(sb, logical_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk(KERN_ERR "EXT4-fs: "
-			       "can't read group descriptor %d\n", i);
+			ext4_msg(sb, KERN_ERR,
+			       "can't read group descriptor %d", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
 	if (!ext4_check_descriptors(sb)) {
-		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
+		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		goto failed_mount2;
 	}
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
 		if (!ext4_fill_flex_info(sb)) {
-			printk(KERN_ERR
-			       "EXT4-fs: unable to initialize "
-			       "flex_bg meta info!\n");
+			ext4_msg(sb, KERN_ERR,
+			       "unable to initialize "
+			       "flex_bg meta info!");
 			goto failed_mount2;
 		}
 
@@ -2652,7 +2660,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
 	}
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
+		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount3;
 	}
 
@@ -2692,13 +2700,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			goto failed_mount3;
 		if (!(sb->s_flags & MS_RDONLY) &&
 		    EXT4_SB(sb)->s_journal->j_failed_commit) {
-			printk(KERN_CRIT "EXT4-fs error (device %s): "
+			ext4_msg(sb, KERN_CRIT, "error: "
 			       "ext4_fill_super: Journal transaction "
-			       "%u is corrupt\n", sb->s_id,
+			       "%u is corrupt",
 			       EXT4_SB(sb)->s_journal->j_failed_commit);
 			if (test_opt(sb, ERRORS_RO)) {
-				printk(KERN_CRIT
-				       "Mounting filesystem read-only\n");
+				ext4_msg(sb, KERN_CRIT,
+				       "Mounting filesystem read-only");
 				sb->s_flags |= MS_RDONLY;
 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2712,8 +2720,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
-		printk(KERN_ERR "EXT4-fs: required journal recovery "
-		       "suppressed and not mounted read-only\n");
+		ext4_msg(sb, KERN_ERR, "required journal recovery "
+		       "suppressed and not mounted read-only");
 		goto failed_mount4;
 	} else {
 		clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2726,7 +2734,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (ext4_blocks_count(es) > 0xffffffffULL &&
 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
-		printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
+		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
 		goto failed_mount4;
 	}
 
@@ -2764,8 +2772,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	case EXT4_MOUNT_WRITEBACK_DATA:
 		if (!jbd2_journal_check_available_features
 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
-			printk(KERN_ERR "EXT4-fs: Journal does not support "
-			       "requested data journaling mode\n");
+			ext4_msg(sb, KERN_ERR, "Journal does not support "
+			       "requested data journaling mode");
 			goto failed_mount4;
 		}
 	default:
@@ -2777,8 +2785,8 @@ no_journal:
 
 	if (test_opt(sb, NOBH)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
-			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
-				"its supported only with writeback mode\n");
+			ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
+				"its supported only with writeback mode");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
 	}
@@ -2789,18 +2797,18 @@ no_journal:
 
 	root = ext4_iget(sb, EXT4_ROOT_INO);
 	if (IS_ERR(root)) {
-		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
+		ext4_msg(sb, KERN_ERR, "get root inode failed");
 		ret = PTR_ERR(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		iput(root);
-		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
+		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
 		goto failed_mount4;
 	}
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
-		printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
+		ext4_msg(sb, KERN_ERR, "get root dentry failed");
 		iput(root);
 		ret = -ENOMEM;
 		goto failed_mount4;
@@ -2829,29 +2837,29 @@ no_journal:
 							sbi->s_inode_size) {
 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
 						       EXT4_GOOD_OLD_INODE_SIZE;
-		printk(KERN_INFO "EXT4-fs: required extra inode space not"
-			"available.\n");
+		ext4_msg(sb, KERN_INFO, "required extra inode space not"
+			 "available");
 	}
 
 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
-				"requested data journaling mode\n");
+		ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
+			 "requested data journaling mode");
 		clear_opt(sbi->s_mount_opt, DELALLOC);
 	} else if (test_opt(sb, DELALLOC))
-		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+		ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
 
 	err = ext4_setup_system_zone(sb);
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: failed to initialize system "
-		       "zone (%d)\n", err);
+		ext4_msg(sb, KERN_ERR, "failed to initialize system "
+			 "zone (%d)\n", err);
 		goto failed_mount4;
 	}
 
 	ext4_ext_init(sb);
 	err = ext4_mb_init(sb, needs_recovery);
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
-		       err);
+		ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
+			 err);
 		goto failed_mount4;
 	}
 
@@ -2869,7 +2877,7 @@ no_journal:
 	ext4_orphan_cleanup(sb, es);
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery) {
-		printk(KERN_INFO "EXT4-fs: recovery complete.\n");
+		ext4_msg(sb, KERN_INFO, "recovery complete");
 		ext4_mark_recovery_complete(sb, es);
 	}
 	if (EXT4_SB(sb)->s_journal) {
@@ -2882,20 +2890,18 @@ no_journal:
 	} else
 		descr = "out journal";
 
-	printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
-	       sb->s_id, descr);
+	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
 
 	lock_kernel();
 	return 0;
 
 cantfind_ext4:
 	if (!silent)
-		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
-		       sb->s_id);
+		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
 	goto failed_mount;
 
 failed_mount4:
-	printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+	ext4_msg(sb, KERN_ERR, "mount failed");
 	ext4_release_system_zone(sb);
 	if (sbi->s_journal) {
 		jbd2_journal_destroy(sbi->s_journal);
@@ -2973,27 +2979,27 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 
 	journal_inode = ext4_iget(sb, journal_inum);
 	if (IS_ERR(journal_inode)) {
-		printk(KERN_ERR "EXT4-fs: no journal found.\n");
+		ext4_msg(sb, KERN_ERR, "no journal found");
 		return NULL;
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
-		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
+		ext4_msg(sb, KERN_ERR, "journal inode is deleted");
 		return NULL;
 	}
 
 	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
 		  journal_inode, journal_inode->i_size);
 	if (!S_ISREG(journal_inode->i_mode)) {
-		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
+		ext4_msg(sb, KERN_ERR, "invalid journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
 
 	journal = jbd2_journal_init_inode(journal_inode);
 	if (!journal) {
-		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
+		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
 		iput(journal_inode);
 		return NULL;
 	}
@@ -3017,13 +3023,13 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 
 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
 
-	bdev = ext4_blkdev_get(j_dev);
+	bdev = ext4_blkdev_get(j_dev, sb);
 	if (bdev == NULL)
 		return NULL;
 
 	if (bd_claim(bdev, sb)) {
-		printk(KERN_ERR
-			"EXT4-fs: failed to claim external journal device.\n");
+		ext4_msg(sb, KERN_ERR,
+			"failed to claim external journal device");
 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 		return NULL;
 	}
@@ -3031,8 +3037,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	blocksize = sb->s_blocksize;
 	hblock = bdev_hardsect_size(bdev);
 	if (blocksize < hblock) {
-		printk(KERN_ERR
-			"EXT4-fs: blocksize too small for journal device.\n");
+		ext4_msg(sb, KERN_ERR,
+			"blocksize too small for journal device");
 		goto out_bdev;
 	}
 
@@ -3040,8 +3046,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
-		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
-		       "external journal\n");
+		ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
+		       "external journal");
 		goto out_bdev;
 	}
 
@@ -3049,14 +3055,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		printk(KERN_ERR "EXT4-fs: external journal has "
-					"bad superblock\n");
+		ext4_msg(sb, KERN_ERR, "external journal has "
+					"bad superblock");
 		brelse(bh);
 		goto out_bdev;
 	}
 
 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
+		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
 		brelse(bh);
 		goto out_bdev;
 	}
@@ -3068,19 +3074,19 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
-		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
+		ext4_msg(sb, KERN_ERR, "failed to create device journal");
 		goto out_bdev;
 	}
 	journal->j_private = sb;
 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
-		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
+		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
 		goto out_journal;
 	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-		printk(KERN_ERR "EXT4-fs: External journal has more than one "
-					"user (unsupported) - %d\n",
+		ext4_msg(sb, KERN_ERR, "External journal has more than one "
+					"user (unsupported) - %d",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
 		goto out_journal;
 	}
@@ -3109,8 +3115,8 @@ static int ext4_load_journal(struct super_block *sb,
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
-			"numbers have changed\n");
+		ext4_msg(sb, KERN_INFO, "external journal device major/minor "
+			"numbers have changed");
 		journal_dev = new_decode_dev(journal_devnum);
 	} else
 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3124,21 +3130,21 @@ static int ext4_load_journal(struct super_block *sb,
 	 */
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_INFO "EXT4-fs: INFO: recovery "
-					"required on readonly filesystem.\n");
+			ext4_msg(sb, KERN_INFO, "INFO: recovery "
+					"required on readonly filesystem");
 			if (really_read_only) {
-				printk(KERN_ERR "EXT4-fs: write access "
-					"unavailable, cannot proceed.\n");
+				ext4_msg(sb, KERN_ERR, "write access "
+					"unavailable, cannot proceed");
 				return -EROFS;
 			}
-			printk(KERN_INFO "EXT4-fs: write access will "
-			       "be enabled during recovery.\n");
+			ext4_msg(sb, KERN_INFO, "write access will "
+			       "be enabled during recovery");
 		}
 	}
 
 	if (journal_inum && journal_dev) {
-		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
-		       "and inode journals!\n");
+		ext4_msg(sb, KERN_ERR, "filesystem has both journal "
+		       "and inode journals!");
 		return -EINVAL;
 	}
 
@@ -3151,14 +3157,14 @@ static int ext4_load_journal(struct super_block *sb,
 	}
 
 	if (journal->j_flags & JBD2_BARRIER)
-		printk(KERN_INFO "EXT4-fs: barriers enabled\n");
+		ext4_msg(sb, KERN_INFO, "barriers enabled");
 	else
-		printk(KERN_INFO "EXT4-fs: barriers disabled\n");
+		ext4_msg(sb, KERN_INFO, "barriers disabled");
 
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
 		err = jbd2_journal_update_format(journal);
 		if (err)  {
-			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
+			ext4_msg(sb, KERN_ERR, "error updating journal");
 			jbd2_journal_destroy(journal);
 			return err;
 		}
@@ -3170,7 +3176,7 @@ static int ext4_load_journal(struct super_block *sb,
 		err = jbd2_journal_load(journal);
 
 	if (err) {
-		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
+		ext4_msg(sb, KERN_ERR, "error loading journal");
 		jbd2_journal_destroy(journal);
 		return err;
 	}
@@ -3206,8 +3212,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 		 * be remapped.  Nothing we can do but to retry the
 		 * write and hope for the best.
 		 */
-		printk(KERN_ERR "EXT4-fs: previous I/O error to "
-		       "superblock detected for %s.\n", sb->s_id);
+		ext4_msg(sb, KERN_ERR, "previous I/O error to "
+		       "superblock detected");
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
@@ -3230,8 +3236,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 
 		error = buffer_write_io_error(sbh);
 		if (error) {
-			printk(KERN_ERR "EXT4-fs: I/O error while writing "
-			       "superblock for %s.\n", sb->s_id);
+			ext4_msg(sb, KERN_ERR, "I/O error while writing "
+			       "superblock");
 			clear_buffer_write_io_error(sbh);
 			set_buffer_uptodate(sbh);
 		}
@@ -3478,9 +3484,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			int ret;
 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
 					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
-				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+				ext4_msg(sb, KERN_WARNING, "couldn't "
 				       "remount RDWR because of unsupported "
-				       "optional features (%x).\n", sb->s_id,
+				       "optional features (%x)",
 				(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
 					~EXT4_FEATURE_RO_COMPAT_SUPP));
 				err = -EROFS;
@@ -3496,9 +3502,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 					ext4_get_group_desc(sb, g, NULL);
 
 				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
-					printk(KERN_ERR
-	       "EXT4-fs: ext4_remount: "
-		"Checksum for group %u failed (%u!=%u)\n",
+					ext4_msg(sb, KERN_ERR,
+	       "ext4_remount: Checksum for group %u failed (%u!=%u)",
 		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
 					       le16_to_cpu(gdp->bg_checksum));
 					err = -EINVAL;
@@ -3512,11 +3517,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			 * require a full umount/remount for now.
 			 */
 			if (es->s_last_orphan) {
-				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+				ext4_msg(sb, KERN_WARNING, "Couldn't "
 				       "remount RDWR because of unprocessed "
 				       "orphan inode list.  Please "
-				       "umount/remount instead.\n",
-				       sb->s_id);
+				       "umount/remount instead");
 				err = -EINVAL;
 				goto restore_opts;
 			}
@@ -3772,9 +3776,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 	if (EXT4_SB(sb)->s_qf_names[type]) {
 		/* Quotafile not in fs root? */
 		if (path.dentry->d_parent != sb->s_root)
-			printk(KERN_WARNING
-				"EXT4-fs: Quota file not on filesystem root. "
-				"Journaled quota will not work.\n");
+			ext4_msg(sb, KERN_WARNING,
+				"Quota file not on filesystem root. "
+				"Journaled quota will not work");
 	}
 
 	/*
@@ -3857,8 +3861,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	handle_t *handle = journal_current_handle();
 
 	if (EXT4_SB(sb)->s_journal && !handle) {
-		printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
-			" cancelled because transaction is not started.\n",
+		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
+			" cancelled because transaction is not started",
 			(unsigned long long)off, (unsigned long long)len);
 		return -EIO;
 	}
@@ -3930,10 +3934,10 @@ static struct file_system_type ext4_fs_type = {
 static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
 			  const char *dev_name, void *data,struct vfsmount *mnt)
 {
-	printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
-	       "to mount using ext4\n");
-	printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
-	       "will go away by 2.6.31\n");
+	printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
+	       "to mount using ext4\n", dev_name);
+	printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
+	       "will go away by 2.6.31\n", dev_name);
 	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
-- 
cgit v0.10.2


From 04288f42033607099cebf5ca15ce8dcec3a9688b Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Thu, 4 Jun 2009 13:53:10 -0400
Subject: integrity: ima audit dentry_open failure

Until we start appraising measurements, the ima_path_check()
return code should always be 0.

- Update the ima_path_check() return code comment
- Instead of the pr_info, audit the dentry_open failure

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a2eb233..6f61187 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -149,8 +149,8 @@ static void ima_update_counts(struct ima_iint_cache *iint, int mask)
  *	- Opening a file for read when already open for write,
  * 	  could result in a file measurement error.
  *
- * Return 0 on success, an error code on failure.
- * (Based on the results of appraise_measurement().)
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
  */
 int ima_path_check(struct path *path, int mask, int update_counts)
 {
@@ -189,8 +189,13 @@ int ima_path_check(struct path *path, int mask, int update_counts)
 		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
 				   current_cred());
 		if (IS_ERR(file)) {
-			pr_info("%s dentry_open failed\n", dentry->d_name.name);
-			rc = PTR_ERR(file);
+			int audit_info = 0;
+
+			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
+					    dentry->d_name.name,
+					    "add_measurement",
+					    "dentry_open failed",
+					    1, audit_info);
 			file = NULL;
 			goto out;
 		}
-- 
cgit v0.10.2


From 730c586ad5228c339949b2eb4e72b80ae167abc4 Mon Sep 17 00:00:00 2001
From: Salman Qazi <sqazi@google.com>
Date: Thu, 4 Jun 2009 15:20:39 -0700
Subject: drivers/char/mem.c: avoid OOM lockup during large reads from
 /dev/zero

While running 20 parallel instances of dd as follows:

  #!/bin/bash
  for i in `seq 1 20`; do
           dd if=/dev/zero of=/export/hda3/dd_$i bs=1073741824 count=1 &
  done
  wait

on a 16G machine, we noticed that rather than just killing the processes,
the entire kernel went down.  Stracing dd reveals that it first does an
mmap2, which makes 1GB worth of zero page mappings.  Then it performs a
read on those pages from /dev/zero, and finally it performs a write.

The machine died during the reads.  Looking at the code, it was noticed
that /dev/zero's read operation had been changed by
557ed1fa2620dc119adb86b34c614e152a629a80 ("remove ZERO_PAGE") from giving
zero page mappings to actually zeroing the page.

The zeroing of the pages causes physical pages to be allocated to the
process.  But, when the process exhausts all the memory that it can, the
kernel cannot kill it, as it is still in the kernel mode allocating more
memory.  Consequently, the kernel eventually crashes.

To fix this, I propose that when a fatal signal is pending during
/dev/zero read operation, we simply return and let the user process die.

Signed-off-by: Salman Qazi <sqazi@google.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Modified error return and comment trivially.  - Linus]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 8f05c38..65e12bc 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -694,6 +694,9 @@ static ssize_t read_zero(struct file * file, char __user * buf,
 		written += chunk - unwritten;
 		if (unwritten)
 			break;
+		/* Consider changing this to just 'signal_pending()' with lots of testing */
+		if (fatal_signal_pending(current))
+			return written ? written : -EINTR;
 		buf += chunk;
 		count -= chunk;
 		cond_resched();
-- 
cgit v0.10.2


From 087eb437051b3de817720f9c80c440fc9e7dcce8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 4 Jun 2009 16:29:07 -0700
Subject: ptrace: tracehook_report_clone: fix false positives

The "trace || CLONE_PTRACE" check in tracehook_report_clone() is not right,

- If the untraced task does clone(CLONE_PTRACE) the new child is not traced,
  we must not queue SIGSTOP.

- If we forked the traced task, but the tracer exits and untraces both the
  forking task and the new child (after copy_process() drops tasklist_lock),
  we should not queue SIGSTOP too.

Change the code to check task_ptrace() != 0 instead. This is still racy, but
the race is harmless.

We can race with another tracer attaching to this child, or the tracer can
exit and detach in parallel. But giwen that we didn't do wake_up_new_task()
yet, the child must have the pending SIGSTOP anyway.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index c7aa154..eb96603 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child,
 
 /**
  * tracehook_report_clone - in parent, new child is about to start running
- * @trace:		return value from tracehook_prepare_clone()
  * @regs:		parent's user register state
  * @clone_flags:	flags from parent's system call
  * @pid:		new child's PID in the parent's namespace
  * @child:		new child task
  *
- * Called after a child is set up, but before it has been started
- * running.  @trace is the value returned by tracehook_prepare_clone().
+ * Called after a child is set up, but before it has been started running.
  * This is not a good place to block, because the child has not started
  * yet.  Suspend the child here if desired, and then block in
  * tracehook_report_clone_complete().  This must prevent the child from
@@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child,
  *
  * Called with no locks held, but the child cannot run until this returns.
  */
-static inline void tracehook_report_clone(int trace, struct pt_regs *regs,
+static inline void tracehook_report_clone(struct pt_regs *regs,
 					  unsigned long clone_flags,
 					  pid_t pid, struct task_struct *child)
 {
-	if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) {
+	if (unlikely(task_ptrace(child))) {
 		/*
-		 * The child starts up with an immediate SIGSTOP.
+		 * It doesn't matter who attached/attaching to this
+		 * task, the pending SIGSTOP is right in any case.
 		 */
 		sigaddset(&child->pending.signal, SIGSTOP);
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..875ffbd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1409,7 +1409,7 @@ long do_fork(unsigned long clone_flags,
 		}
 
 		audit_finish_fork(p);
-		tracehook_report_clone(trace, regs, clone_flags, nr, p);
+		tracehook_report_clone(regs, clone_flags, nr, p);
 
 		/*
 		 * We set PF_STARTING at creation in case tracing wants to
-- 
cgit v0.10.2


From 08f67461c609ad96bf26732b590569e02e322019 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 4 Jun 2009 16:29:08 -0700
Subject: kbuild: fix detection of CONFIG_FRAME_WARN=0

The checking of CONFIG_FRAME_WARN in the top level Makefile forgot to
actually derefence the variable thus leading to an always true check.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Andi Kleen <ak@suse.de>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Makefile b/Makefile
index 610d1c3..1065154 100644
--- a/Makefile
+++ b/Makefile
@@ -533,7 +533,7 @@ endif
 
 include $(srctree)/arch/$(SRCARCH)/Makefile
 
-ifneq (CONFIG_FRAME_WARN,0)
+ifneq ($(CONFIG_FRAME_WARN),0)
 KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
-- 
cgit v0.10.2


From edaba2c5334492f82d39ec35637c6dea5176a977 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 4 Jun 2009 16:29:09 -0700
Subject: ptrace: revert "ptrace_detach: the wrong wakeup breaks the
 ERESTARTxxx logic"

Commit 95a3540da9c81a5987be810e1d9a83640a366bd5 ("ptrace_detach: the wrong
wakeup breaks the ERESTARTxxx logic") removed the "extra"
wake_up_process() from ptrace_detach(), but as Jan pointed out this breaks
the compatibility.

I believe the changelog is right and this wake_up() is wrong in many
ways, but GDB assumes that ptrace(PTRACE_DETACH, child, 0, 0) always
wakes up the tracee.

Despite the fact this breaks SIGNAL_STOP_STOPPED/group_stop_count logic,
and despite the fact this wake_up_process() can break another
assumption: PTRACE_DETACH with SIGSTOP should leave the tracee in
TASK_STOPPED case.  Because the untraced child can dequeue SIGSTOP and
call do_signal_stop() before ptrace_detach() calls wake_up_process().

Revert this change for now.  We need some fixes even if we we want to keep
the current behaviour, but these fixes are not for 2.6.30.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0692ab5..42c3178 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -304,6 +304,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
 	if (child->ptrace) {
 		child->exit_code = data;
 		dead = __ptrace_detach(current, child);
+		if (!child->exit_state)
+			wake_up_process(child);
 	}
 	write_unlock_irq(&tasklist_lock);
 
-- 
cgit v0.10.2


From f23d4911319fdebffd0529b31bb66d324ef287e6 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 14:43:25 +0800
Subject: [ARM] pxa: add platform device ID table to pxa i2c driver

Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index d245e59c..899418e 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -287,7 +287,7 @@ static struct resource pxa3xx_resources_i2c_power[] = {
 };
 
 struct platform_device pxa3xx_device_i2c_power = {
-	.name		= "pxa2xx-i2c",
+	.name		= "pxa3xx-pwri2c",
 	.id		= 1,
 	.resource	= pxa3xx_resources_i2c_power,
 	.num_resources	= ARRAY_SIZE(pxa3xx_resources_i2c_power),
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index acc7143..e4ee883 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -34,12 +34,26 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 
-#include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/io.h>
 #include <mach/i2c.h>
 
 /*
+ * I2C register offsets will be shifted 0 or 1 bit left, depending on
+ * different SoCs
+ */
+#define REG_SHIFT_0	(0 << 0)
+#define REG_SHIFT_1	(1 << 0)
+#define REG_SHIFT(d)	((d) & 0x1)
+
+static const struct platform_device_id i2c_pxa_id_table[] = {
+	{ "pxa2xx-i2c",		REG_SHIFT_1 },
+	{ "pxa3xx-pwri2c",	REG_SHIFT_0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, i2c_pxa_id_table);
+
+/*
  * I2C registers and bit definitions
  */
 #define IBMR		(0x00)
@@ -985,6 +999,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
 	struct pxa_i2c *i2c;
 	struct resource *res;
 	struct i2c_pxa_platform_data *plat = dev->dev.platform_data;
+	struct platform_device_id *id = platform_get_device_id(dev);
 	int ret;
 	int irq;
 
@@ -1028,7 +1043,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
 		ret = -EIO;
 		goto eremap;
 	}
-	i2c->reg_shift = (cpu_is_pxa3xx() && (dev->id == 1)) ? 0 : 1;
+	i2c->reg_shift = REG_SHIFT(id->driver_data);
 
 	i2c->iobase = res->start;
 	i2c->iosize = res_len(res);
@@ -1150,6 +1165,7 @@ static struct platform_driver i2c_pxa_driver = {
 		.name	= "pxa2xx-i2c",
 		.owner	= THIS_MODULE,
 	},
+	.id_table	= i2c_pxa_id_table,
 };
 
 static int __init i2c_adap_pxa_init(void)
-- 
cgit v0.10.2


From f0a83701399123b0e95cc4d949fcccf9941fd190 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 15:03:11 +0800
Subject: [ARM] pxa: move mach/i2c.h to plat/i2c.h

Signed-off-by: Paul Shen <paul.shen@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index a9f48b1..a97f87a 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -32,7 +32,7 @@
 #include <mach/pxafb.h>
 #include <mach/mmc.h>
 #include <mach/ohci.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/pxa3xx_nand.h>
 
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 930e364..f04e505 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -42,7 +42,7 @@
 #include <asm/mach/irq.h>
 
 #include <mach/pxa25x.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/irda.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index 2b289f8..f95b32d 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -21,7 +21,7 @@
 #include <asm/mach/arch.h>
 #include <mach/csb726.h>
 #include <mach/mfp-pxa27x.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/ohci.h>
 #include <mach/pxa2xx-regs.h>
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index 899418e..d3c3b7b 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -8,7 +8,7 @@
 #include <mach/pxafb.h>
 #include <mach/mmc.h>
 #include <mach/irda.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/ohci.h>
 #include <mach/pxa27x_keypad.h>
 #include <mach/pxa2xx_spi.h>
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index bc0f73f..3503f52 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -41,7 +41,7 @@
 #include <mach/ohci.h>
 #include <mach/mmc.h>
 #include <mach/pxa27x_keypad.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/camera.h>
 #include <mach/pxa2xx_spi.h>
 
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 7db966d..4ce63b4 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -25,7 +25,7 @@
 #include <mach/pxa27x.h>
 #include <mach/pxafb.h>
 #include <mach/ohci.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/hardware.h>
 #include <mach/pxa27x_keypad.h>
 
diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 2121309..821e65a 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -29,7 +29,7 @@
 #include <asm/mach/flash.h>
 
 #include <mach/pxa27x.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/udc.h>
 #include <mach/mmc.h>
 #include <mach/pxa2xx_spi.h>
diff --git a/arch/arm/mach-pxa/include/mach/i2c.h b/arch/arm/mach-pxa/include/mach/i2c.h
deleted file mode 100644
index 1a9f65e..0000000
--- a/arch/arm/mach-pxa/include/mach/i2c.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  i2c_pxa.h
- *
- *  Copyright (C) 2002 Intrinsyc Software Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- */
-#ifndef _I2C_PXA_H_
-#define _I2C_PXA_H_
-
-#if 0
-#define DEF_TIMEOUT             3
-#else
-/* need a longer timeout if we're dealing with the fact we may well be
- * looking at a multi-master environment
-*/
-#define DEF_TIMEOUT             32
-#endif
-
-#define BUS_ERROR               (-EREMOTEIO)
-#define XFER_NAKED              (-ECONNREFUSED)
-#define I2C_RETRY               (-2000) /* an error has occurred retry transmit */
-
-/* ICR initialize bit values
-*
-*  15. FM       0 (100 Khz operation)
-*  14. UR       0 (No unit reset)
-*  13. SADIE    0 (Disables the unit from interrupting on slave addresses
-*                                       matching its slave address)
-*  12. ALDIE    0 (Disables the unit from interrupt when it loses arbitration
-*                                       in master mode)
-*  11. SSDIE    0 (Disables interrupts from a slave stop detected, in slave mode)
-*  10. BEIE     1 (Enable interrupts from detected bus errors, no ACK sent)
-*  9.  IRFIE    1 (Enable interrupts from full buffer received)
-*  8.  ITEIE    1 (Enables the I2C unit to interrupt when transmit buffer empty)
-*  7.  GCD      1 (Disables i2c unit response to general call messages as a slave)
-*  6.  IUE      0 (Disable unit until we change settings)
-*  5.  SCLE     1 (Enables the i2c clock output for master mode (drives SCL)
-*  4.  MA       0 (Only send stop with the ICR stop bit)
-*  3.  TB       0 (We are not transmitting a byte initially)
-*  2.  ACKNAK   0 (Send an ACK after the unit receives a byte)
-*  1.  STOP     0 (Do not send a STOP)
-*  0.  START    0 (Do not send a START)
-*
-*/
-#define I2C_ICR_INIT	(ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE)
-
-/* I2C status register init values
- *
- * 10. BED      1 (Clear bus error detected)
- * 9.  SAD      1 (Clear slave address detected)
- * 7.  IRF      1 (Clear IDBR Receive Full)
- * 6.  ITE      1 (Clear IDBR Transmit Empty)
- * 5.  ALD      1 (Clear Arbitration Loss Detected)
- * 4.  SSD      1 (Clear Slave Stop Detected)
- */
-#define I2C_ISR_INIT	0x7FF  /* status register init */
-
-struct i2c_slave_client;
-
-struct i2c_pxa_platform_data {
-	unsigned int		slave_addr;
-	struct i2c_slave_client	*slave;
-	unsigned int		class;
-	unsigned int		use_pio :1;
-	unsigned int		fast_mode :1;
-};
-
-extern void pxa_set_i2c_info(struct i2c_pxa_platform_data *info);
-
-#ifdef CONFIG_PXA27x
-extern void pxa27x_set_i2c_power_info(struct i2c_pxa_platform_data *info);
-#endif
-
-#ifdef CONFIG_PXA3xx
-extern void pxa3xx_set_i2c_power_info(struct i2c_pxa_platform_data *info);
-#endif
-
-#endif
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index c872b9f..ad5378b 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -43,7 +43,7 @@
 #include <mach/pxafb.h>
 #include <mach/ssp.h>
 #include <mach/pxa2xx_spi.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/pxa27x_keypad.h>
 #include <mach/pxa3xx_nand.h>
 #include <mach/littleton.h>
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index c899bbd..5a9df97 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -36,7 +36,7 @@
 #include <mach/pxa27x.h>
 #include <mach/magician.h>
 #include <mach/pxafb.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/irda.h>
 #include <mach/ohci.h>
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index a6c8429..f4dabf0 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -46,7 +46,7 @@
 #include <mach/mainstone.h>
 #include <mach/audio.h>
 #include <mach/pxafb.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/irda.h>
 #include <mach/ohci.h>
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index ff8052c..804c25c 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -48,7 +48,7 @@
 #include <mach/mmc.h>
 #include <mach/udc.h>
 #include <mach/pxa27x-udc.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/camera.h>
 #include <mach/audio.h>
 #include <media/soc_camera.h>
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index 6c12b5a..095521e 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -28,7 +28,7 @@
 #include <media/soc_camera.h>
 
 #include <asm/gpio.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/camera.h>
 #include <asm/mach/map.h>
 #include <mach/pxa27x.h>
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 036bbde..74f4ce6 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -39,7 +39,7 @@
 #include <mach/pxa25x.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/irda.h>
 #include <mach/poodle.h>
 #include <mach/pxafb.h>
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index a425ec7..c54ad8b 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -27,7 +27,7 @@
 #include <mach/ohci.h>
 #include <mach/pm.h>
 #include <mach/dma.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index b02d454..462503a 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -30,7 +30,7 @@
 #include <mach/pm.h>
 #include <mach/dma.h>
 #include <mach/ssp.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/saar.c b/arch/arm/mach-pxa/saar.c
index ff82399..8241a63 100644
--- a/arch/arm/mach-pxa/saar.c
+++ b/arch/arm/mach-pxa/saar.c
@@ -27,7 +27,7 @@
 #include <asm/mach/arch.h>
 
 #include <mach/pxa930.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/pxafb.h>
 
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 5a45fe3..229f485 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -47,7 +47,7 @@
 #include <mach/pxa27x.h>
 #include <mach/pxa27x-udc.h>
 #include <mach/reset.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/irda.h>
 #include <mach/mmc.h>
 #include <mach/ohci.h>
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index a0bd46e..168267a 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -40,7 +40,7 @@
 #include <mach/pxa25x.h>
 #include <mach/reset.h>
 #include <mach/irda.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
 #include <mach/tosa_bt.h>
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index f79c9cb..825f540 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -47,7 +47,7 @@
 #include <mach/mmc.h>
 #include <mach/irda.h>
 #include <mach/ohci.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index dd031cc..d33c232 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -45,7 +45,7 @@
 #include <mach/pxa25x.h>
 #include <mach/audio.h>
 #include <mach/pxafb.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/regs-uart.h>
 #include <mach/viper.h>
 
diff --git a/arch/arm/mach-pxa/zylonite_pxa300.c b/arch/arm/mach-pxa/zylonite_pxa300.c
index c256c57..cefd1c0 100644
--- a/arch/arm/mach-pxa/zylonite_pxa300.c
+++ b/arch/arm/mach-pxa/zylonite_pxa300.c
@@ -21,7 +21,7 @@
 #include <linux/gpio.h>
 
 #include <mach/pxa300.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 #include <mach/zylonite.h>
 
 #include "generic.h"
diff --git a/arch/arm/plat-pxa/include/plat/i2c.h b/arch/arm/plat-pxa/include/plat/i2c.h
new file mode 100644
index 0000000..1a9f65e
--- /dev/null
+++ b/arch/arm/plat-pxa/include/plat/i2c.h
@@ -0,0 +1,82 @@
+/*
+ *  i2c_pxa.h
+ *
+ *  Copyright (C) 2002 Intrinsyc Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#ifndef _I2C_PXA_H_
+#define _I2C_PXA_H_
+
+#if 0
+#define DEF_TIMEOUT             3
+#else
+/* need a longer timeout if we're dealing with the fact we may well be
+ * looking at a multi-master environment
+*/
+#define DEF_TIMEOUT             32
+#endif
+
+#define BUS_ERROR               (-EREMOTEIO)
+#define XFER_NAKED              (-ECONNREFUSED)
+#define I2C_RETRY               (-2000) /* an error has occurred retry transmit */
+
+/* ICR initialize bit values
+*
+*  15. FM       0 (100 Khz operation)
+*  14. UR       0 (No unit reset)
+*  13. SADIE    0 (Disables the unit from interrupting on slave addresses
+*                                       matching its slave address)
+*  12. ALDIE    0 (Disables the unit from interrupt when it loses arbitration
+*                                       in master mode)
+*  11. SSDIE    0 (Disables interrupts from a slave stop detected, in slave mode)
+*  10. BEIE     1 (Enable interrupts from detected bus errors, no ACK sent)
+*  9.  IRFIE    1 (Enable interrupts from full buffer received)
+*  8.  ITEIE    1 (Enables the I2C unit to interrupt when transmit buffer empty)
+*  7.  GCD      1 (Disables i2c unit response to general call messages as a slave)
+*  6.  IUE      0 (Disable unit until we change settings)
+*  5.  SCLE     1 (Enables the i2c clock output for master mode (drives SCL)
+*  4.  MA       0 (Only send stop with the ICR stop bit)
+*  3.  TB       0 (We are not transmitting a byte initially)
+*  2.  ACKNAK   0 (Send an ACK after the unit receives a byte)
+*  1.  STOP     0 (Do not send a STOP)
+*  0.  START    0 (Do not send a START)
+*
+*/
+#define I2C_ICR_INIT	(ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE)
+
+/* I2C status register init values
+ *
+ * 10. BED      1 (Clear bus error detected)
+ * 9.  SAD      1 (Clear slave address detected)
+ * 7.  IRF      1 (Clear IDBR Receive Full)
+ * 6.  ITE      1 (Clear IDBR Transmit Empty)
+ * 5.  ALD      1 (Clear Arbitration Loss Detected)
+ * 4.  SSD      1 (Clear Slave Stop Detected)
+ */
+#define I2C_ISR_INIT	0x7FF  /* status register init */
+
+struct i2c_slave_client;
+
+struct i2c_pxa_platform_data {
+	unsigned int		slave_addr;
+	struct i2c_slave_client	*slave;
+	unsigned int		class;
+	unsigned int		use_pio :1;
+	unsigned int		fast_mode :1;
+};
+
+extern void pxa_set_i2c_info(struct i2c_pxa_platform_data *info);
+
+#ifdef CONFIG_PXA27x
+extern void pxa27x_set_i2c_power_info(struct i2c_pxa_platform_data *info);
+#endif
+
+#ifdef CONFIG_PXA3xx
+extern void pxa3xx_set_i2c_power_info(struct i2c_pxa_platform_data *info);
+#endif
+
+#endif
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index e4ee883..035a6c7 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -36,7 +36,7 @@
 
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <mach/i2c.h>
+#include <plat/i2c.h>
 
 /*
  * I2C register offsets will be shifted 0 or 1 bit left, depending on
-- 
cgit v0.10.2


From 1a77920e4cbe508c8dc40fef1d0beb21aac8cc17 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 15:34:54 +0800
Subject: [ARM] pxa: add I2C (TWSI) devices to pxa168/pxa910

Signed-off-by: Paul Shen <paul.shen@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
index 2e91464..dc42263 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
@@ -253,6 +253,10 @@
 #define GPIO58_LCD_PCLK_WR	MFP_CFG(GPIO58, AF1)
 #define GPIO85_LCD_VSYNC	MFP_CFG(GPIO85, AF1)
 
+/* I2C */
+#define GPIO105_CI2C_SDA	MFP_CFG(GPIO105, AF1)
+#define GPIO106_CI2C_SCL	MFP_CFG(GPIO106, AF1)
+
 /* I2S */
 #define GPIO113_I2S_MCLK	MFP_CFG(GPIO113,AF6)
 #define GPIO114_I2S_FRM		MFP_CFG(GPIO114,AF1)
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index ef0a8a2..bfdd629 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -1,10 +1,14 @@
 #ifndef __ASM_MACH_PXA168_H
 #define __ASM_MACH_PXA168_H
 
+#include <linux/i2c.h>
 #include <mach/devices.h>
+#include <plat/i2c.h>
 
 extern struct pxa_device_desc pxa168_device_uart1;
 extern struct pxa_device_desc pxa168_device_uart2;
+extern struct pxa_device_desc pxa168_device_twsi0;
+extern struct pxa_device_desc pxa168_device_twsi1;
 
 static inline int pxa168_add_uart(int id)
 {
@@ -20,4 +24,24 @@ static inline int pxa168_add_uart(int id)
 
 	return pxa_register_device(d, NULL, 0);
 }
+
+static inline int pxa168_add_twsi(int id, struct i2c_pxa_platform_data *data,
+				  struct i2c_board_info *info, unsigned size)
+{
+	struct pxa_device_desc *d = NULL;
+	int ret;
+
+	switch (id) {
+	case 0: d = &pxa168_device_twsi0; break;
+	case 1: d = &pxa168_device_twsi1; break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = i2c_register_board_info(id, info, size);
+	if (ret)
+		return ret;
+
+	return pxa_register_device(d, data, sizeof(*data));
+}
 #endif /* __ASM_MACH_PXA168_H */
diff --git a/arch/arm/mach-mmp/include/mach/pxa910.h b/arch/arm/mach-mmp/include/mach/pxa910.h
index b7aeaf5..a0f0cbe 100644
--- a/arch/arm/mach-mmp/include/mach/pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/pxa910.h
@@ -1,10 +1,14 @@
 #ifndef __ASM_MACH_PXA910_H
 #define __ASM_MACH_PXA910_H
 
+#include <linux/i2c.h>
 #include <mach/devices.h>
+#include <plat/i2c.h>
 
 extern struct pxa_device_desc pxa910_device_uart1;
 extern struct pxa_device_desc pxa910_device_uart2;
+extern struct pxa_device_desc pxa910_device_twsi0;
+extern struct pxa_device_desc pxa910_device_twsi1;
 
 static inline int pxa910_add_uart(int id)
 {
@@ -20,4 +24,24 @@ static inline int pxa910_add_uart(int id)
 
 	return pxa_register_device(d, NULL, 0);
 }
+
+static inline int pxa910_add_twsi(int id, struct i2c_pxa_platform_data *data,
+				  struct i2c_board_info *info, unsigned size)
+{
+	struct pxa_device_desc *d = NULL;
+	int ret;
+
+	switch (id) {
+	case 0: d = &pxa910_device_twsi0; break;
+	case 1: d = &pxa910_device_twsi1; break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = i2c_register_board_info(id, info, size);
+	if (ret)
+		return ret;
+
+	return pxa_register_device(d, data, sizeof(*data));
+}
 #endif /* __ASM_MACH_PXA910_H */
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index ae92446..e0729e3 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -65,11 +65,15 @@ void __init pxa168_init_irq(void)
 /* APB peripheral clocks */
 static APBC_CLK(uart1, PXA168_UART1, 1, 14745600);
 static APBC_CLK(uart2, PXA168_UART2, 1, 14745600);
+static APBC_CLK(twsi0, PXA168_TWSI0, 1, 33000000);
+static APBC_CLK(twsi1, PXA168_TWSI1, 1, 33000000);
 
 /* device and clock bindings */
 static struct clk_lookup pxa168_clkregs[] = {
 	INIT_CLKREG(&clk_uart1, "pxa2xx-uart.0", NULL),
 	INIT_CLKREG(&clk_uart2, "pxa2xx-uart.1", NULL),
+	INIT_CLKREG(&clk_twsi0, "pxa2xx-i2c.0", NULL),
+	INIT_CLKREG(&clk_twsi1, "pxa2xx-i2c.1", NULL),
 };
 
 static int __init pxa168_init(void)
@@ -109,3 +113,5 @@ struct sys_timer pxa168_timer = {
 /* on-chip devices */
 PXA168_DEVICE(uart1, "pxa2xx-uart", 0, UART1, 0xd4017000, 0x30, 21, 22);
 PXA168_DEVICE(uart2, "pxa2xx-uart", 1, UART2, 0xd4018000, 0x30, 23, 24);
+PXA168_DEVICE(twsi0, "pxa2xx-i2c", 0, TWSI0, 0xd4011000, 0x28);
+PXA168_DEVICE(twsi1, "pxa2xx-i2c", 1, TWSI1, 0xd4025000, 0x28);
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index 453f8f7..b97328b 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -103,11 +103,15 @@ void __init pxa910_init_irq(void)
 /* APB peripheral clocks */
 static APBC_CLK(uart1, PXA910_UART0, 1, 14745600);
 static APBC_CLK(uart2, PXA910_UART1, 1, 14745600);
+static APBC_CLK(twsi0, PXA168_TWSI0, 1, 33000000);
+static APBC_CLK(twsi1, PXA168_TWSI1, 1, 33000000);
 
 /* device and clock bindings */
 static struct clk_lookup pxa910_clkregs[] = {
 	INIT_CLKREG(&clk_uart1, "pxa2xx-uart.0", NULL),
 	INIT_CLKREG(&clk_uart2, "pxa2xx-uart.1", NULL),
+	INIT_CLKREG(&clk_twsi0, "pxa2xx-i2c.0", NULL),
+	INIT_CLKREG(&clk_twsi1, "pxa2xx-i2c.1", NULL),
 };
 
 static int __init pxa910_init(void)
@@ -156,3 +160,5 @@ struct sys_timer pxa910_timer = {
  */
 PXA910_DEVICE(uart1, "pxa2xx-uart", 0, UART2, 0xd4017000, 0x30, 21, 22);
 PXA910_DEVICE(uart2, "pxa2xx-uart", 1, UART3, 0xd4018000, 0x30, 23, 24);
+PXA910_DEVICE(twsi0, "pxa2xx-i2c", 0, TWSI0, 0xd4011000, 0x28);
+PXA910_DEVICE(twsi1, "pxa2xx-i2c", 1, TWSI1, 0xd4025000, 0x28);
-- 
cgit v0.10.2


From 3d2a98cd5e17052f461390c72efe1076c18fd62d Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 15:59:03 +0800
Subject: [ARM] pxa: simplify secondary PWM handling and use platform_device_id
 table

Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/pwm.c b/arch/arm/mach-pxa/pwm.c
index fcdd374..dec6f2c 100644
--- a/arch/arm/mach-pxa/pwm.c
+++ b/arch/arm/mach-pxa/pwm.c
@@ -21,6 +21,16 @@
 
 #include <asm/div64.h>
 
+#define HAS_SECONDARY_PWM	0x10
+
+static const struct platform_device_id pwm_id_table[] = {
+	/*   PWM    has_secondary_pwm? */
+	{ "pxa25x-pwm", 0 },
+	{ "pxa27x-pwm", HAS_SECONDARY_PWM },
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, pwm_id_table);
+
 /* PWM registers and bits definitions */
 #define PWMCR		(0x00)
 #define PWMDCR		(0x04)
@@ -31,7 +41,8 @@
 
 struct pwm_device {
 	struct list_head	node;
-	struct platform_device *pdev;
+	struct pwm_device	*secondary;
+	struct platform_device	*pdev;
 
 	const char	*label;
 	struct clk	*clk;
@@ -159,17 +170,17 @@ static inline void __add_pwm(struct pwm_device *pwm)
 	mutex_unlock(&pwm_lock);
 }
 
-static struct pwm_device *pwm_probe(struct platform_device *pdev,
-		unsigned int pwm_id, struct pwm_device *parent_pwm)
+static int __devinit pwm_probe(struct platform_device *pdev)
 {
-	struct pwm_device *pwm;
+	struct platform_device_id *id = platform_get_device_id(pdev);
+	struct pwm_device *pwm, *secondary = NULL;
 	struct resource *r;
 	int ret = 0;
 
 	pwm = kzalloc(sizeof(struct pwm_device), GFP_KERNEL);
 	if (pwm == NULL) {
 		dev_err(&pdev->dev, "failed to allocate memory\n");
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	}
 
 	pwm->clk = clk_get(&pdev->dev, NULL);
@@ -180,16 +191,9 @@ static struct pwm_device *pwm_probe(struct platform_device *pdev,
 	pwm->clk_enabled = 0;
 
 	pwm->use_count = 0;
-	pwm->pwm_id = pwm_id;
+	pwm->pwm_id = pdev->id;
 	pwm->pdev = pdev;
 
-	if (parent_pwm != NULL) {
-		/* registers for the second PWM has offset of 0x10 */
-		pwm->mmio_base = parent_pwm->mmio_base + 0x10;
-		__add_pwm(pwm);
-		return pwm;
-	}
-
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL) {
 		dev_err(&pdev->dev, "no memory resource defined\n");
@@ -211,9 +215,27 @@ static struct pwm_device *pwm_probe(struct platform_device *pdev,
 		goto err_free_mem;
 	}
 
+	if (id->driver_data & HAS_SECONDARY_PWM) {
+		secondary = kzalloc(sizeof(struct pwm_device), GFP_KERNEL);
+		if (secondary == NULL) {
+			ret = -ENOMEM;
+			goto err_free_mem;
+		}
+
+		*secondary = *pwm;
+		pwm->secondary = secondary;
+
+		/* registers for the second PWM has offset of 0x10 */
+		secondary->mmio_base = pwm->mmio_base + 0x10;
+		secondary->pwm_id = pdev->id + 2;
+	}
+
 	__add_pwm(pwm);
+	if (secondary)
+		__add_pwm(secondary);
+
 	platform_set_drvdata(pdev, pwm);
-	return pwm;
+	return 0;
 
 err_free_mem:
 	release_mem_region(r->start, r->end - r->start + 1);
@@ -221,32 +243,7 @@ err_free_clk:
 	clk_put(pwm->clk);
 err_free:
 	kfree(pwm);
-	return ERR_PTR(ret);
-}
-
-static int __devinit pxa25x_pwm_probe(struct platform_device *pdev)
-{
-	struct pwm_device *pwm = pwm_probe(pdev, pdev->id, NULL);
-
-	if (IS_ERR(pwm))
-		return PTR_ERR(pwm);
-
-	return 0;
-}
-
-static int __devinit pxa27x_pwm_probe(struct platform_device *pdev)
-{
-	struct pwm_device *pwm;
-
-	pwm = pwm_probe(pdev, pdev->id, NULL);
-	if (IS_ERR(pwm))
-		return PTR_ERR(pwm);
-
-	pwm = pwm_probe(pdev, pdev->id + 2, pwm);
-	if (IS_ERR(pwm))
-		return PTR_ERR(pwm);
-
-	return 0;
+	return ret;
 }
 
 static int __devexit pwm_remove(struct platform_device *pdev)
@@ -259,6 +256,12 @@ static int __devexit pwm_remove(struct platform_device *pdev)
 		return -ENODEV;
 
 	mutex_lock(&pwm_lock);
+
+	if (pwm->secondary) {
+		list_del(&pwm->secondary->node);
+		kfree(pwm->secondary);
+	}
+
 	list_del(&pwm->node);
 	mutex_unlock(&pwm_lock);
 
@@ -272,46 +275,25 @@ static int __devexit pwm_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver pxa25x_pwm_driver = {
+static struct platform_driver pwm_driver = {
 	.driver		= {
 		.name	= "pxa25x-pwm",
+		.owner	= THIS_MODULE,
 	},
-	.probe		= pxa25x_pwm_probe,
-	.remove		= __devexit_p(pwm_remove),
-};
-
-static struct platform_driver pxa27x_pwm_driver = {
-	.driver		= {
-		.name	= "pxa27x-pwm",
-	},
-	.probe		= pxa27x_pwm_probe,
+	.probe		= pwm_probe,
 	.remove		= __devexit_p(pwm_remove),
+	.id_table	= pwm_id_table,
 };
 
 static int __init pwm_init(void)
 {
-	int ret = 0;
-
-	ret = platform_driver_register(&pxa25x_pwm_driver);
-	if (ret) {
-		printk(KERN_ERR "failed to register pxa25x_pwm_driver\n");
-		return ret;
-	}
-
-	ret = platform_driver_register(&pxa27x_pwm_driver);
-	if (ret) {
-		printk(KERN_ERR "failed to register pxa27x_pwm_driver\n");
-		return ret;
-	}
-
-	return ret;
+	return platform_driver_register(&pwm_driver);
 }
 arch_initcall(pwm_init);
 
 static void __exit pwm_exit(void)
 {
-	platform_driver_unregister(&pxa25x_pwm_driver);
-	platform_driver_unregister(&pxa27x_pwm_driver);
+	platform_driver_unregister(&pwm_driver);
 }
 module_exit(pwm_exit);
 
-- 
cgit v0.10.2


From a757ad8b391adb2129f8357545aaa678099df473 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 18:51:31 +0800
Subject: [ARM] pxa: allow PWM ID base number to be specified in pwm_id_table

PWMs on PXA168/910 start at number 1 instead of 0, (i.e. PWM1/2/3/4 instead
of PWM0/1/2/3 on PXA25x/PXA27x/PXA3xx). Allow this number to be specified
in pwm_id_table.

Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/pwm.c b/arch/arm/mach-pxa/pwm.c
index dec6f2c..5513c51 100644
--- a/arch/arm/mach-pxa/pwm.c
+++ b/arch/arm/mach-pxa/pwm.c
@@ -22,11 +22,12 @@
 #include <asm/div64.h>
 
 #define HAS_SECONDARY_PWM	0x10
+#define PWM_ID_BASE(d)		((d) & 0xf)
 
 static const struct platform_device_id pwm_id_table[] = {
 	/*   PWM    has_secondary_pwm? */
 	{ "pxa25x-pwm", 0 },
-	{ "pxa27x-pwm", HAS_SECONDARY_PWM },
+	{ "pxa27x-pwm", 0 | HAS_SECONDARY_PWM },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, pwm_id_table);
@@ -191,7 +192,7 @@ static int __devinit pwm_probe(struct platform_device *pdev)
 	pwm->clk_enabled = 0;
 
 	pwm->use_count = 0;
-	pwm->pwm_id = pdev->id;
+	pwm->pwm_id = PWM_ID_BASE(id->driver_data) + pdev->id;
 	pwm->pdev = pdev;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
cgit v0.10.2


From a71ef16327a534a88fb1277d28dcd362dbb1992f Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 16:09:21 +0800
Subject: [ARM] pxa: build pwm.c according to HAVE_PWM instead of PXA_PWM

The PXA_PWM config option is really redundant since the introduction
of HAVE_PWM, replace that with HAVE_PWM to avoid confusion.

Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 17d3fbd..47747f9 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -505,12 +505,6 @@ config PXA_SSP
 	help
 	  Enable support for PXA2xx SSP ports
 
-config PXA_PWM
-	tristate
-	default BACKLIGHT_PWM
-	help
-	  Enable support for PXA2xx/PXA3xx PWM controllers
-
 config TOSA_BT
 	tristate "Control the state of built-in bluetooth chip on Sharp SL-6000"
 	depends on MACH_TOSA
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index 682dbf4..4bcebf7 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -15,7 +15,7 @@ endif
 
 # Generic drivers that other drivers may depend upon
 obj-$(CONFIG_PXA_SSP)		+= ssp.o
-obj-$(CONFIG_PXA_PWM)		+= pwm.o
+obj-$(CONFIG_HAVE_PWM)		+= pwm.o
 
 # SoC-specific code
 obj-$(CONFIG_PXA25x)		+= mfp-pxa2xx.o pxa2xx.o pxa25x.o
-- 
cgit v0.10.2


From 3852ac7113f313d96af80257a070fd8ab2eea5c6 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 16:28:02 +0800
Subject: [ARM] pxa: move pwm.c to common plat-pxa directory

Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index 4bcebf7..47d0157 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -15,7 +15,6 @@ endif
 
 # Generic drivers that other drivers may depend upon
 obj-$(CONFIG_PXA_SSP)		+= ssp.o
-obj-$(CONFIG_HAVE_PWM)		+= pwm.o
 
 # SoC-specific code
 obj-$(CONFIG_PXA25x)		+= mfp-pxa2xx.o pxa2xx.o pxa25x.o
diff --git a/arch/arm/mach-pxa/pwm.c b/arch/arm/mach-pxa/pwm.c
deleted file mode 100644
index 5513c51..0000000
--- a/arch/arm/mach-pxa/pwm.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * linux/arch/arm/mach-pxa/pwm.c
- *
- * simple driver for PWM (Pulse Width Modulator) controller
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 2008-02-13	initial version
- * 		eric miao <eric.miao@marvell.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/pwm.h>
-
-#include <asm/div64.h>
-
-#define HAS_SECONDARY_PWM	0x10
-#define PWM_ID_BASE(d)		((d) & 0xf)
-
-static const struct platform_device_id pwm_id_table[] = {
-	/*   PWM    has_secondary_pwm? */
-	{ "pxa25x-pwm", 0 },
-	{ "pxa27x-pwm", 0 | HAS_SECONDARY_PWM },
-	{ },
-};
-MODULE_DEVICE_TABLE(platform, pwm_id_table);
-
-/* PWM registers and bits definitions */
-#define PWMCR		(0x00)
-#define PWMDCR		(0x04)
-#define PWMPCR		(0x08)
-
-#define PWMCR_SD	(1 << 6)
-#define PWMDCR_FD	(1 << 10)
-
-struct pwm_device {
-	struct list_head	node;
-	struct pwm_device	*secondary;
-	struct platform_device	*pdev;
-
-	const char	*label;
-	struct clk	*clk;
-	int		clk_enabled;
-	void __iomem	*mmio_base;
-
-	unsigned int	use_count;
-	unsigned int	pwm_id;
-};
-
-/*
- * period_ns = 10^9 * (PRESCALE + 1) * (PV + 1) / PWM_CLK_RATE
- * duty_ns   = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE
- */
-int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
-{
-	unsigned long long c;
-	unsigned long period_cycles, prescale, pv, dc;
-
-	if (pwm == NULL || period_ns == 0 || duty_ns > period_ns)
-		return -EINVAL;
-
-	c = clk_get_rate(pwm->clk);
-	c = c * period_ns;
-	do_div(c, 1000000000);
-	period_cycles = c;
-
-	if (period_cycles < 1)
-		period_cycles = 1;
-	prescale = (period_cycles - 1) / 1024;
-	pv = period_cycles / (prescale + 1) - 1;
-
-	if (prescale > 63)
-		return -EINVAL;
-
-	if (duty_ns == period_ns)
-		dc = PWMDCR_FD;
-	else
-		dc = (pv + 1) * duty_ns / period_ns;
-
-	/* NOTE: the clock to PWM has to be enabled first
-	 * before writing to the registers
-	 */
-	clk_enable(pwm->clk);
-	__raw_writel(prescale, pwm->mmio_base + PWMCR);
-	__raw_writel(dc, pwm->mmio_base + PWMDCR);
-	__raw_writel(pv, pwm->mmio_base + PWMPCR);
-	clk_disable(pwm->clk);
-
-	return 0;
-}
-EXPORT_SYMBOL(pwm_config);
-
-int pwm_enable(struct pwm_device *pwm)
-{
-	int rc = 0;
-
-	if (!pwm->clk_enabled) {
-		rc = clk_enable(pwm->clk);
-		if (!rc)
-			pwm->clk_enabled = 1;
-	}
-	return rc;
-}
-EXPORT_SYMBOL(pwm_enable);
-
-void pwm_disable(struct pwm_device *pwm)
-{
-	if (pwm->clk_enabled) {
-		clk_disable(pwm->clk);
-		pwm->clk_enabled = 0;
-	}
-}
-EXPORT_SYMBOL(pwm_disable);
-
-static DEFINE_MUTEX(pwm_lock);
-static LIST_HEAD(pwm_list);
-
-struct pwm_device *pwm_request(int pwm_id, const char *label)
-{
-	struct pwm_device *pwm;
-	int found = 0;
-
-	mutex_lock(&pwm_lock);
-
-	list_for_each_entry(pwm, &pwm_list, node) {
-		if (pwm->pwm_id == pwm_id) {
-			found = 1;
-			break;
-		}
-	}
-
-	if (found) {
-		if (pwm->use_count == 0) {
-			pwm->use_count++;
-			pwm->label = label;
-		} else
-			pwm = ERR_PTR(-EBUSY);
-	} else
-		pwm = ERR_PTR(-ENOENT);
-
-	mutex_unlock(&pwm_lock);
-	return pwm;
-}
-EXPORT_SYMBOL(pwm_request);
-
-void pwm_free(struct pwm_device *pwm)
-{
-	mutex_lock(&pwm_lock);
-
-	if (pwm->use_count) {
-		pwm->use_count--;
-		pwm->label = NULL;
-	} else
-		pr_warning("PWM device already freed\n");
-
-	mutex_unlock(&pwm_lock);
-}
-EXPORT_SYMBOL(pwm_free);
-
-static inline void __add_pwm(struct pwm_device *pwm)
-{
-	mutex_lock(&pwm_lock);
-	list_add_tail(&pwm->node, &pwm_list);
-	mutex_unlock(&pwm_lock);
-}
-
-static int __devinit pwm_probe(struct platform_device *pdev)
-{
-	struct platform_device_id *id = platform_get_device_id(pdev);
-	struct pwm_device *pwm, *secondary = NULL;
-	struct resource *r;
-	int ret = 0;
-
-	pwm = kzalloc(sizeof(struct pwm_device), GFP_KERNEL);
-	if (pwm == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
-		return -ENOMEM;
-	}
-
-	pwm->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(pwm->clk)) {
-		ret = PTR_ERR(pwm->clk);
-		goto err_free;
-	}
-	pwm->clk_enabled = 0;
-
-	pwm->use_count = 0;
-	pwm->pwm_id = PWM_ID_BASE(id->driver_data) + pdev->id;
-	pwm->pdev = pdev;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL) {
-		dev_err(&pdev->dev, "no memory resource defined\n");
-		ret = -ENODEV;
-		goto err_free_clk;
-	}
-
-	r = request_mem_region(r->start, r->end - r->start + 1, pdev->name);
-	if (r == NULL) {
-		dev_err(&pdev->dev, "failed to request memory resource\n");
-		ret = -EBUSY;
-		goto err_free_clk;
-	}
-
-	pwm->mmio_base = ioremap(r->start, r->end - r->start + 1);
-	if (pwm->mmio_base == NULL) {
-		dev_err(&pdev->dev, "failed to ioremap() registers\n");
-		ret = -ENODEV;
-		goto err_free_mem;
-	}
-
-	if (id->driver_data & HAS_SECONDARY_PWM) {
-		secondary = kzalloc(sizeof(struct pwm_device), GFP_KERNEL);
-		if (secondary == NULL) {
-			ret = -ENOMEM;
-			goto err_free_mem;
-		}
-
-		*secondary = *pwm;
-		pwm->secondary = secondary;
-
-		/* registers for the second PWM has offset of 0x10 */
-		secondary->mmio_base = pwm->mmio_base + 0x10;
-		secondary->pwm_id = pdev->id + 2;
-	}
-
-	__add_pwm(pwm);
-	if (secondary)
-		__add_pwm(secondary);
-
-	platform_set_drvdata(pdev, pwm);
-	return 0;
-
-err_free_mem:
-	release_mem_region(r->start, r->end - r->start + 1);
-err_free_clk:
-	clk_put(pwm->clk);
-err_free:
-	kfree(pwm);
-	return ret;
-}
-
-static int __devexit pwm_remove(struct platform_device *pdev)
-{
-	struct pwm_device *pwm;
-	struct resource *r;
-
-	pwm = platform_get_drvdata(pdev);
-	if (pwm == NULL)
-		return -ENODEV;
-
-	mutex_lock(&pwm_lock);
-
-	if (pwm->secondary) {
-		list_del(&pwm->secondary->node);
-		kfree(pwm->secondary);
-	}
-
-	list_del(&pwm->node);
-	mutex_unlock(&pwm_lock);
-
-	iounmap(pwm->mmio_base);
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(r->start, r->end - r->start + 1);
-
-	clk_put(pwm->clk);
-	kfree(pwm);
-	return 0;
-}
-
-static struct platform_driver pwm_driver = {
-	.driver		= {
-		.name	= "pxa25x-pwm",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= pwm_probe,
-	.remove		= __devexit_p(pwm_remove),
-	.id_table	= pwm_id_table,
-};
-
-static int __init pwm_init(void)
-{
-	return platform_driver_register(&pwm_driver);
-}
-arch_initcall(pwm_init);
-
-static void __exit pwm_exit(void)
-{
-	platform_driver_unregister(&pwm_driver);
-}
-module_exit(pwm_exit);
-
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/plat-pxa/Makefile b/arch/arm/plat-pxa/Makefile
index 8f2c4c7..0264bfb 100644
--- a/arch/arm/plat-pxa/Makefile
+++ b/arch/arm/plat-pxa/Makefile
@@ -7,3 +7,5 @@ obj-y	:= dma.o
 obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 obj-$(CONFIG_PXA3xx)		+= mfp.o
 obj-$(CONFIG_ARCH_MMP)		+= mfp.o
+
+obj-$(CONFIG_HAVE_PWM)		+= pwm.o
diff --git a/arch/arm/plat-pxa/pwm.c b/arch/arm/plat-pxa/pwm.c
new file mode 100644
index 0000000..5513c51
--- /dev/null
+++ b/arch/arm/plat-pxa/pwm.c
@@ -0,0 +1,301 @@
+/*
+ * linux/arch/arm/mach-pxa/pwm.c
+ *
+ * simple driver for PWM (Pulse Width Modulator) controller
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 2008-02-13	initial version
+ * 		eric miao <eric.miao@marvell.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/pwm.h>
+
+#include <asm/div64.h>
+
+#define HAS_SECONDARY_PWM	0x10
+#define PWM_ID_BASE(d)		((d) & 0xf)
+
+static const struct platform_device_id pwm_id_table[] = {
+	/*   PWM    has_secondary_pwm? */
+	{ "pxa25x-pwm", 0 },
+	{ "pxa27x-pwm", 0 | HAS_SECONDARY_PWM },
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, pwm_id_table);
+
+/* PWM registers and bits definitions */
+#define PWMCR		(0x00)
+#define PWMDCR		(0x04)
+#define PWMPCR		(0x08)
+
+#define PWMCR_SD	(1 << 6)
+#define PWMDCR_FD	(1 << 10)
+
+struct pwm_device {
+	struct list_head	node;
+	struct pwm_device	*secondary;
+	struct platform_device	*pdev;
+
+	const char	*label;
+	struct clk	*clk;
+	int		clk_enabled;
+	void __iomem	*mmio_base;
+
+	unsigned int	use_count;
+	unsigned int	pwm_id;
+};
+
+/*
+ * period_ns = 10^9 * (PRESCALE + 1) * (PV + 1) / PWM_CLK_RATE
+ * duty_ns   = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE
+ */
+int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
+{
+	unsigned long long c;
+	unsigned long period_cycles, prescale, pv, dc;
+
+	if (pwm == NULL || period_ns == 0 || duty_ns > period_ns)
+		return -EINVAL;
+
+	c = clk_get_rate(pwm->clk);
+	c = c * period_ns;
+	do_div(c, 1000000000);
+	period_cycles = c;
+
+	if (period_cycles < 1)
+		period_cycles = 1;
+	prescale = (period_cycles - 1) / 1024;
+	pv = period_cycles / (prescale + 1) - 1;
+
+	if (prescale > 63)
+		return -EINVAL;
+
+	if (duty_ns == period_ns)
+		dc = PWMDCR_FD;
+	else
+		dc = (pv + 1) * duty_ns / period_ns;
+
+	/* NOTE: the clock to PWM has to be enabled first
+	 * before writing to the registers
+	 */
+	clk_enable(pwm->clk);
+	__raw_writel(prescale, pwm->mmio_base + PWMCR);
+	__raw_writel(dc, pwm->mmio_base + PWMDCR);
+	__raw_writel(pv, pwm->mmio_base + PWMPCR);
+	clk_disable(pwm->clk);
+
+	return 0;
+}
+EXPORT_SYMBOL(pwm_config);
+
+int pwm_enable(struct pwm_device *pwm)
+{
+	int rc = 0;
+
+	if (!pwm->clk_enabled) {
+		rc = clk_enable(pwm->clk);
+		if (!rc)
+			pwm->clk_enabled = 1;
+	}
+	return rc;
+}
+EXPORT_SYMBOL(pwm_enable);
+
+void pwm_disable(struct pwm_device *pwm)
+{
+	if (pwm->clk_enabled) {
+		clk_disable(pwm->clk);
+		pwm->clk_enabled = 0;
+	}
+}
+EXPORT_SYMBOL(pwm_disable);
+
+static DEFINE_MUTEX(pwm_lock);
+static LIST_HEAD(pwm_list);
+
+struct pwm_device *pwm_request(int pwm_id, const char *label)
+{
+	struct pwm_device *pwm;
+	int found = 0;
+
+	mutex_lock(&pwm_lock);
+
+	list_for_each_entry(pwm, &pwm_list, node) {
+		if (pwm->pwm_id == pwm_id) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found) {
+		if (pwm->use_count == 0) {
+			pwm->use_count++;
+			pwm->label = label;
+		} else
+			pwm = ERR_PTR(-EBUSY);
+	} else
+		pwm = ERR_PTR(-ENOENT);
+
+	mutex_unlock(&pwm_lock);
+	return pwm;
+}
+EXPORT_SYMBOL(pwm_request);
+
+void pwm_free(struct pwm_device *pwm)
+{
+	mutex_lock(&pwm_lock);
+
+	if (pwm->use_count) {
+		pwm->use_count--;
+		pwm->label = NULL;
+	} else
+		pr_warning("PWM device already freed\n");
+
+	mutex_unlock(&pwm_lock);
+}
+EXPORT_SYMBOL(pwm_free);
+
+static inline void __add_pwm(struct pwm_device *pwm)
+{
+	mutex_lock(&pwm_lock);
+	list_add_tail(&pwm->node, &pwm_list);
+	mutex_unlock(&pwm_lock);
+}
+
+static int __devinit pwm_probe(struct platform_device *pdev)
+{
+	struct platform_device_id *id = platform_get_device_id(pdev);
+	struct pwm_device *pwm, *secondary = NULL;
+	struct resource *r;
+	int ret = 0;
+
+	pwm = kzalloc(sizeof(struct pwm_device), GFP_KERNEL);
+	if (pwm == NULL) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	pwm->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pwm->clk)) {
+		ret = PTR_ERR(pwm->clk);
+		goto err_free;
+	}
+	pwm->clk_enabled = 0;
+
+	pwm->use_count = 0;
+	pwm->pwm_id = PWM_ID_BASE(id->driver_data) + pdev->id;
+	pwm->pdev = pdev;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL) {
+		dev_err(&pdev->dev, "no memory resource defined\n");
+		ret = -ENODEV;
+		goto err_free_clk;
+	}
+
+	r = request_mem_region(r->start, r->end - r->start + 1, pdev->name);
+	if (r == NULL) {
+		dev_err(&pdev->dev, "failed to request memory resource\n");
+		ret = -EBUSY;
+		goto err_free_clk;
+	}
+
+	pwm->mmio_base = ioremap(r->start, r->end - r->start + 1);
+	if (pwm->mmio_base == NULL) {
+		dev_err(&pdev->dev, "failed to ioremap() registers\n");
+		ret = -ENODEV;
+		goto err_free_mem;
+	}
+
+	if (id->driver_data & HAS_SECONDARY_PWM) {
+		secondary = kzalloc(sizeof(struct pwm_device), GFP_KERNEL);
+		if (secondary == NULL) {
+			ret = -ENOMEM;
+			goto err_free_mem;
+		}
+
+		*secondary = *pwm;
+		pwm->secondary = secondary;
+
+		/* registers for the second PWM has offset of 0x10 */
+		secondary->mmio_base = pwm->mmio_base + 0x10;
+		secondary->pwm_id = pdev->id + 2;
+	}
+
+	__add_pwm(pwm);
+	if (secondary)
+		__add_pwm(secondary);
+
+	platform_set_drvdata(pdev, pwm);
+	return 0;
+
+err_free_mem:
+	release_mem_region(r->start, r->end - r->start + 1);
+err_free_clk:
+	clk_put(pwm->clk);
+err_free:
+	kfree(pwm);
+	return ret;
+}
+
+static int __devexit pwm_remove(struct platform_device *pdev)
+{
+	struct pwm_device *pwm;
+	struct resource *r;
+
+	pwm = platform_get_drvdata(pdev);
+	if (pwm == NULL)
+		return -ENODEV;
+
+	mutex_lock(&pwm_lock);
+
+	if (pwm->secondary) {
+		list_del(&pwm->secondary->node);
+		kfree(pwm->secondary);
+	}
+
+	list_del(&pwm->node);
+	mutex_unlock(&pwm_lock);
+
+	iounmap(pwm->mmio_base);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(r->start, r->end - r->start + 1);
+
+	clk_put(pwm->clk);
+	kfree(pwm);
+	return 0;
+}
+
+static struct platform_driver pwm_driver = {
+	.driver		= {
+		.name	= "pxa25x-pwm",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pwm_probe,
+	.remove		= __devexit_p(pwm_remove),
+	.id_table	= pwm_id_table,
+};
+
+static int __init pwm_init(void)
+{
+	return platform_driver_register(&pwm_driver);
+}
+arch_initcall(pwm_init);
+
+static void __exit pwm_exit(void)
+{
+	platform_driver_unregister(&pwm_driver);
+}
+module_exit(pwm_exit);
+
+MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From 2a55b910e0d240984860fa0264866c122751bd09 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 18:02:13 +0800
Subject: [ARM] pxa: add missing IRQ_PXA910_NONE to irqs.h

Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-mmp/include/mach/irqs.h b/arch/arm/mach-mmp/include/mach/irqs.h
index e83e45e..16295cf 100644
--- a/arch/arm/mach-mmp/include/mach/irqs.h
+++ b/arch/arm/mach-mmp/include/mach/irqs.h
@@ -52,6 +52,7 @@
 /*
  * Interrupt numbers for PXA910
  */
+#define IRQ_PXA910_NONE			(-1)
 #define IRQ_PXA910_AIRQ			0
 #define IRQ_PXA910_SSP3			1
 #define IRQ_PXA910_SSP2			2
-- 
cgit v0.10.2


From a27ba768a11ac7a1d56688d4224cef3a802d1f89 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Mon, 13 Apr 2009 18:29:52 +0800
Subject: [ARM] pxa: add PWM devices support for pxa168/910

Signed-off-by: Mingwei Wang <mingwei.wang@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
index dc42263..3b216bf 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
@@ -264,4 +264,27 @@
 #define GPIO116_I2S_RXD		MFP_CFG(GPIO116,AF2)
 #define GPIO117_I2S_TXD		MFP_CFG(GPIO117,AF2)
 
+/* PWM */
+#define GPIO96_PWM3_OUT		MFP_CFG(GPIO96, AF1)
+#define GPIO97_PWM2_OUT		MFP_CFG(GPIO97, AF1)
+#define GPIO98_PWM1_OUT		MFP_CFG(GPIO98, AF1)
+#define GPIO104_PWM4_OUT	MFP_CFG(GPIO104, AF1)
+#define GPIO106_PWM2_OUT	MFP_CFG(GPIO106, AF2)
+#define GPIO74_PWM4_OUT		MFP_CFG(GPIO74, AF2)
+#define GPIO75_PWM3_OUT		MFP_CFG(GPIO75, AF2)
+#define GPIO76_PWM2_OUT		MFP_CFG(GPIO76, AF2)
+#define GPIO77_PWM1_OUT		MFP_CFG(GPIO77, AF2)
+#define GPIO82_PWM4_OUT		MFP_CFG(GPIO82, AF2)
+#define GPIO83_PWM3_OUT		MFP_CFG(GPIO83, AF2)
+#define GPIO84_PWM2_OUT		MFP_CFG(GPIO84, AF2)
+#define GPIO85_PWM1_OUT		MFP_CFG(GPIO85, AF2)
+#define GPIO84_PWM1_OUT		MFP_CFG(GPIO84, AF4)
+#define GPIO122_PWM3_OUT	MFP_CFG(GPIO122, AF3)
+#define GPIO123_PWM1_OUT	MFP_CFG(GPIO123, AF1)
+#define GPIO124_PWM2_OUT	MFP_CFG(GPIO124, AF1)
+#define GPIO125_PWM3_OUT	MFP_CFG(GPIO125, AF1)
+#define GPIO126_PWM4_OUT	MFP_CFG(GPIO126, AF1)
+#define GPIO86_PWM1_OUT		MFP_CFG(GPIO86, AF2)
+#define GPIO86_PWM2_OUT		MFP_CFG(GPIO86, AF3)
+
 #endif /* __ASM_MACH_MFP_PXA168_H */
diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
index d97de36..bf1189f 100644
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h
@@ -159,4 +159,12 @@
 #define MMC1_CD_MMC1_CD		MFP_CFG_DRV(MMC1_CD, AF0, MEDIUM)
 #define MMC1_WP_MMC1_WP		MFP_CFG_DRV(MMC1_WP, AF0, MEDIUM)
 
+/* PWM */
+#define GPIO27 PWM3 AF2		MFP_CFG(GPIO27, AF2)
+#define GPIO51_PWM2_OUT		MFP_CFG(GPIO51, AF2)
+#define GPIO117_PWM1_OUT	MFP_CFG(GPIO117, AF2)
+#define GPIO118_PWM2_OUT	MFP_CFG(GPIO118, AF2)
+#define GPIO119_PWM3_OUT	MFP_CFG(GPIO119, AF2)
+#define GPIO120_PWM4_OUT	MFP_CFG(GPIO120, AF2)
+
 #endif /* __ASM_MACH MFP_PXA910_H */
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index bfdd629..6bf1f0e 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -9,6 +9,10 @@ extern struct pxa_device_desc pxa168_device_uart1;
 extern struct pxa_device_desc pxa168_device_uart2;
 extern struct pxa_device_desc pxa168_device_twsi0;
 extern struct pxa_device_desc pxa168_device_twsi1;
+extern struct pxa_device_desc pxa168_device_pwm1;
+extern struct pxa_device_desc pxa168_device_pwm2;
+extern struct pxa_device_desc pxa168_device_pwm3;
+extern struct pxa_device_desc pxa168_device_pwm4;
 
 static inline int pxa168_add_uart(int id)
 {
@@ -44,4 +48,20 @@ static inline int pxa168_add_twsi(int id, struct i2c_pxa_platform_data *data,
 
 	return pxa_register_device(d, data, sizeof(*data));
 }
+
+static inline int pxa168_add_pwm(int id)
+{
+	struct pxa_device_desc *d = NULL;
+
+	switch (id) {
+	case 1: d = &pxa168_device_pwm1; break;
+	case 2: d = &pxa168_device_pwm2; break;
+	case 3: d = &pxa168_device_pwm3; break;
+	case 4: d = &pxa168_device_pwm4; break;
+	default:
+		return -EINVAL;
+	}
+
+	return pxa_register_device(d, NULL, 0);
+}
 #endif /* __ASM_MACH_PXA168_H */
diff --git a/arch/arm/mach-mmp/include/mach/pxa910.h b/arch/arm/mach-mmp/include/mach/pxa910.h
index a0f0cbe..6ae1ed7 100644
--- a/arch/arm/mach-mmp/include/mach/pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/pxa910.h
@@ -9,6 +9,10 @@ extern struct pxa_device_desc pxa910_device_uart1;
 extern struct pxa_device_desc pxa910_device_uart2;
 extern struct pxa_device_desc pxa910_device_twsi0;
 extern struct pxa_device_desc pxa910_device_twsi1;
+extern struct pxa_device_desc pxa910_device_pwm1;
+extern struct pxa_device_desc pxa910_device_pwm2;
+extern struct pxa_device_desc pxa910_device_pwm3;
+extern struct pxa_device_desc pxa910_device_pwm4;
 
 static inline int pxa910_add_uart(int id)
 {
@@ -44,4 +48,20 @@ static inline int pxa910_add_twsi(int id, struct i2c_pxa_platform_data *data,
 
 	return pxa_register_device(d, data, sizeof(*data));
 }
+
+static inline int pxa910_add_pwm(int id)
+{
+	struct pxa_device_desc *d = NULL;
+
+	switch (id) {
+	case 1: d = &pxa910_device_pwm1; break;
+	case 2: d = &pxa910_device_pwm2; break;
+	case 3: d = &pxa910_device_pwm3; break;
+	case 4: d = &pxa910_device_pwm4; break;
+	default:
+		return -EINVAL;
+	}
+
+	return pxa_register_device(d, NULL, 0);
+}
 #endif /* __ASM_MACH_PXA910_H */
diff --git a/arch/arm/mach-mmp/include/mach/regs-apbc.h b/arch/arm/mach-mmp/include/mach/regs-apbc.h
index c6b8c9d..98ccbee 100644
--- a/arch/arm/mach-mmp/include/mach/regs-apbc.h
+++ b/arch/arm/mach-mmp/include/mach/regs-apbc.h
@@ -22,8 +22,10 @@
 #define APBC_PXA168_UART1	APBC_REG(0x000)
 #define APBC_PXA168_UART2	APBC_REG(0x004)
 #define APBC_PXA168_GPIO	APBC_REG(0x008)
-#define APBC_PXA168_PWM0	APBC_REG(0x00c)
-#define APBC_PXA168_PWM1	APBC_REG(0x010)
+#define APBC_PXA168_PWM1	APBC_REG(0x00c)
+#define APBC_PXA168_PWM2	APBC_REG(0x010)
+#define APBC_PXA168_PWM3	APBC_REG(0x014)
+#define APBC_PXA168_PWM4	APBC_REG(0x018)
 #define APBC_PXA168_SSP1	APBC_REG(0x01c)
 #define APBC_PXA168_SSP2	APBC_REG(0x020)
 #define APBC_PXA168_RTC		APBC_REG(0x028)
@@ -48,10 +50,10 @@
 #define APBC_PXA910_UART0	APBC_REG(0x000)
 #define APBC_PXA910_UART1	APBC_REG(0x004)
 #define APBC_PXA910_GPIO	APBC_REG(0x008)
-#define APBC_PXA910_PWM0	APBC_REG(0x00c)
-#define APBC_PXA910_PWM1	APBC_REG(0x010)
-#define APBC_PXA910_PWM2	APBC_REG(0x014)
-#define APBC_PXA910_PWM3	APBC_REG(0x018)
+#define APBC_PXA910_PWM1	APBC_REG(0x00c)
+#define APBC_PXA910_PWM2	APBC_REG(0x010)
+#define APBC_PXA910_PWM3	APBC_REG(0x014)
+#define APBC_PXA910_PWM4	APBC_REG(0x018)
 #define APBC_PXA910_SSP1	APBC_REG(0x01c)
 #define APBC_PXA910_SSP2	APBC_REG(0x020)
 #define APBC_PXA910_IPC		APBC_REG(0x024)
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index e0729e3..71b1ae3 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -67,6 +67,10 @@ static APBC_CLK(uart1, PXA168_UART1, 1, 14745600);
 static APBC_CLK(uart2, PXA168_UART2, 1, 14745600);
 static APBC_CLK(twsi0, PXA168_TWSI0, 1, 33000000);
 static APBC_CLK(twsi1, PXA168_TWSI1, 1, 33000000);
+static APBC_CLK(pwm1, PXA168_PWM1, 1, 13000000);
+static APBC_CLK(pwm2, PXA168_PWM2, 1, 13000000);
+static APBC_CLK(pwm3, PXA168_PWM3, 1, 13000000);
+static APBC_CLK(pwm4, PXA168_PWM4, 1, 13000000);
 
 /* device and clock bindings */
 static struct clk_lookup pxa168_clkregs[] = {
@@ -74,6 +78,10 @@ static struct clk_lookup pxa168_clkregs[] = {
 	INIT_CLKREG(&clk_uart2, "pxa2xx-uart.1", NULL),
 	INIT_CLKREG(&clk_twsi0, "pxa2xx-i2c.0", NULL),
 	INIT_CLKREG(&clk_twsi1, "pxa2xx-i2c.1", NULL),
+	INIT_CLKREG(&clk_pwm1, "pxa168-pwm.0", NULL),
+	INIT_CLKREG(&clk_pwm2, "pxa168-pwm.1", NULL),
+	INIT_CLKREG(&clk_pwm3, "pxa168-pwm.2", NULL),
+	INIT_CLKREG(&clk_pwm4, "pxa168-pwm.3", NULL),
 };
 
 static int __init pxa168_init(void)
@@ -115,3 +123,7 @@ PXA168_DEVICE(uart1, "pxa2xx-uart", 0, UART1, 0xd4017000, 0x30, 21, 22);
 PXA168_DEVICE(uart2, "pxa2xx-uart", 1, UART2, 0xd4018000, 0x30, 23, 24);
 PXA168_DEVICE(twsi0, "pxa2xx-i2c", 0, TWSI0, 0xd4011000, 0x28);
 PXA168_DEVICE(twsi1, "pxa2xx-i2c", 1, TWSI1, 0xd4025000, 0x28);
+PXA168_DEVICE(pwm1, "pxa168-pwm", 0, NONE, 0xd401a000, 0x10);
+PXA168_DEVICE(pwm2, "pxa168-pwm", 1, NONE, 0xd401a400, 0x10);
+PXA168_DEVICE(pwm3, "pxa168-pwm", 2, NONE, 0xd401a800, 0x10);
+PXA168_DEVICE(pwm4, "pxa168-pwm", 3, NONE, 0xd401ac00, 0x10);
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index b97328b..5882ca6 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -105,6 +105,10 @@ static APBC_CLK(uart1, PXA910_UART0, 1, 14745600);
 static APBC_CLK(uart2, PXA910_UART1, 1, 14745600);
 static APBC_CLK(twsi0, PXA168_TWSI0, 1, 33000000);
 static APBC_CLK(twsi1, PXA168_TWSI1, 1, 33000000);
+static APBC_CLK(pwm1, PXA910_PWM1, 1, 13000000);
+static APBC_CLK(pwm2, PXA910_PWM2, 1, 13000000);
+static APBC_CLK(pwm3, PXA910_PWM3, 1, 13000000);
+static APBC_CLK(pwm4, PXA910_PWM4, 1, 13000000);
 
 /* device and clock bindings */
 static struct clk_lookup pxa910_clkregs[] = {
@@ -112,6 +116,10 @@ static struct clk_lookup pxa910_clkregs[] = {
 	INIT_CLKREG(&clk_uart2, "pxa2xx-uart.1", NULL),
 	INIT_CLKREG(&clk_twsi0, "pxa2xx-i2c.0", NULL),
 	INIT_CLKREG(&clk_twsi1, "pxa2xx-i2c.1", NULL),
+	INIT_CLKREG(&clk_pwm1, "pxa910-pwm.0", NULL),
+	INIT_CLKREG(&clk_pwm2, "pxa910-pwm.1", NULL),
+	INIT_CLKREG(&clk_pwm3, "pxa910-pwm.2", NULL),
+	INIT_CLKREG(&clk_pwm4, "pxa910-pwm.3", NULL),
 };
 
 static int __init pxa910_init(void)
@@ -162,3 +170,7 @@ PXA910_DEVICE(uart1, "pxa2xx-uart", 0, UART2, 0xd4017000, 0x30, 21, 22);
 PXA910_DEVICE(uart2, "pxa2xx-uart", 1, UART3, 0xd4018000, 0x30, 23, 24);
 PXA910_DEVICE(twsi0, "pxa2xx-i2c", 0, TWSI0, 0xd4011000, 0x28);
 PXA910_DEVICE(twsi1, "pxa2xx-i2c", 1, TWSI1, 0xd4025000, 0x28);
+PXA910_DEVICE(pwm1, "pxa910-pwm", 0, NONE, 0xd401a000, 0x10);
+PXA910_DEVICE(pwm2, "pxa910-pwm", 1, NONE, 0xd401a400, 0x10);
+PXA910_DEVICE(pwm3, "pxa910-pwm", 2, NONE, 0xd401a800, 0x10);
+PXA910_DEVICE(pwm4, "pxa910-pwm", 3, NONE, 0xd401ac00, 0x10);
diff --git a/arch/arm/plat-pxa/pwm.c b/arch/arm/plat-pxa/pwm.c
index 5513c51..a9eabdc 100644
--- a/arch/arm/plat-pxa/pwm.c
+++ b/arch/arm/plat-pxa/pwm.c
@@ -28,6 +28,8 @@ static const struct platform_device_id pwm_id_table[] = {
 	/*   PWM    has_secondary_pwm? */
 	{ "pxa25x-pwm", 0 },
 	{ "pxa27x-pwm", 0 | HAS_SECONDARY_PWM },
+	{ "pxa168-pwm", 1 },
+	{ "pxa910-pwm", 1 },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, pwm_id_table);
-- 
cgit v0.10.2


From 46580c03062da8c047238a6b3d2b2b13af1700b5 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 14 Apr 2009 08:33:49 +0300
Subject: [ARM] pxa: update pxa2xx_spi_chip initialization to use .gpio_cs
 field

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index f04e505..962dda2 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -445,13 +445,8 @@ static struct ads7846_platform_data corgi_ads7846_info = {
 	.wait_for_sync		= corgi_wait_for_hsync,
 };
 
-static void corgi_ads7846_cs(u32 command)
-{
-	gpio_set_value(CORGI_GPIO_ADS7846_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip corgi_ads7846_chip = {
-	.cs_control	= corgi_ads7846_cs,
+	.gpio_cs	= CORGI_GPIO_ADS7846_CS,
 };
 
 static void corgi_bl_kick_battery(void)
@@ -475,22 +470,12 @@ static struct corgi_lcd_platform_data corgi_lcdcon_info = {
 	.kick_battery		= corgi_bl_kick_battery,
 };
 
-static void corgi_lcdcon_cs(u32 command)
-{
-	gpio_set_value(CORGI_GPIO_LCDCON_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip corgi_lcdcon_chip = {
-	.cs_control	= corgi_lcdcon_cs,
+	.gpio_cs	= CORGI_GPIO_LCDCON_CS,
 };
 
-static void corgi_max1111_cs(u32 command)
-{
-	gpio_set_value(CORGI_GPIO_MAX1111_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip corgi_max1111_chip = {
-	.cs_control	= corgi_max1111_cs,
+	.gpio_cs	= CORGI_GPIO_MAX1111_CS,
 };
 
 static struct spi_board_info corgi_spi_devices[] = {
@@ -520,32 +505,8 @@ static struct spi_board_info corgi_spi_devices[] = {
 
 static void __init corgi_init_spi(void)
 {
-	int err;
-
-	err = gpio_request(CORGI_GPIO_ADS7846_CS, "ADS7846_CS");
-	if (err)
-		return;
-
-	err = gpio_request(CORGI_GPIO_LCDCON_CS, "LCDCON_CS");
-	if (err)
-		goto err_free_1;
-
-	err = gpio_request(CORGI_GPIO_MAX1111_CS, "MAX1111_CS");
-	if (err)
-		goto err_free_2;
-
-	gpio_direction_output(CORGI_GPIO_ADS7846_CS, 1);
-	gpio_direction_output(CORGI_GPIO_LCDCON_CS, 1);
-	gpio_direction_output(CORGI_GPIO_MAX1111_CS, 1);
-
 	pxa2xx_set_spi_info(1, &corgi_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(corgi_spi_devices));
-	return;
-
-err_free_2:
-	gpio_free(CORGI_GPIO_LCDCON_CS);
-err_free_1:
-	gpio_free(CORGI_GPIO_ADS7846_CS);
 }
 #else
 static inline void corgi_init_spi(void) {}
diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 821e65a..3ef98d4 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -456,25 +456,12 @@ static struct pxa2xx_spi_master pxa_ssp_master_2_info = {
 	.num_chipselect = 1,
 };
 
-/* Patch posted by Eric Miao <eric.miao@marvell.com> will remove
- * the need for these functions.
- */
-static void spi1control(u32 command)
-{
-	gpio_set_value(24, command & PXA2XX_CS_ASSERT ? 0 : 1);
-};
-
-static void spi3control(u32 command)
-{
-	gpio_set_value(39, command & PXA2XX_CS_ASSERT ? 0 : 1);
-};
-
 static struct pxa2xx_spi_chip staccel_chip_info = {
 	.tx_threshold = 8,
 	.rx_threshold = 8,
 	.dma_burst_size = 8,
 	.timeout = 235,
-	.cs_control = spi1control,
+	.gpio_cs = 24,
 };
 
 static struct pxa2xx_spi_chip cc2420_info = {
@@ -482,7 +469,7 @@ static struct pxa2xx_spi_chip cc2420_info = {
 	.rx_threshold = 8,
 	.dma_burst_size = 8,
 	.timeout = 235,
-	.cs_control = spi3control,
+	.gpio_cs = 39,
 };
 
 static struct spi_board_info spi_board_info[] __initdata = {
@@ -538,8 +525,6 @@ static void __init imote2_init(void)
 	/* SPI chip select directions - all other directions should
 	 * be handled by drivers.*/
 	gpio_direction_output(37, 0);
-	gpio_direction_output(24, 0);
-	gpio_direction_output(39, 0);
 
 	platform_add_devices(imote2_devices, ARRAY_SIZE(imote2_devices));
 
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index ad5378b..94a3232 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -179,15 +179,10 @@ static struct pxa2xx_spi_master littleton_spi_info = {
 	.num_chipselect		= 1,
 };
 
-static void littleton_tdo24m_cs(u32 cmd)
-{
-	gpio_set_value(LITTLETON_GPIO_LCD_CS, !(cmd == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip littleton_tdo24m_chip = {
 	.rx_threshold	= 1,
 	.tx_threshold	= 1,
-	.cs_control	= littleton_tdo24m_cs,
+	.gpio_cs	= LITTLETON_GPIO_LCD_CS,
 };
 
 static struct spi_board_info littleton_spi_devices[] __initdata = {
@@ -202,16 +197,6 @@ static struct spi_board_info littleton_spi_devices[] __initdata = {
 
 static void __init littleton_init_spi(void)
 {
-	int err;
-
-	err = gpio_request(LITTLETON_GPIO_LCD_CS, "LCD_CS");
-	if (err) {
-		pr_warning("failed to request GPIO for LCS CS\n");
-		return;
-	}
-
-	gpio_direction_output(LITTLETON_GPIO_LCD_CS, 1);
-
 	pxa2xx_set_spi_info(2, &littleton_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(littleton_spi_devices));
 }
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 74f4ce6..ac431ed 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -214,13 +214,8 @@ static struct ads7846_platform_data poodle_ads7846_info = {
 	.gpio_pendown		= POODLE_GPIO_TP_INT,
 };
 
-static void ads7846_cs(u32 command)
-{
-	gpio_set_value(POODLE_GPIO_TP_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip poodle_ads7846_chip = {
-	.cs_control		= ads7846_cs,
+	.gpio_cs		= POODLE_GPIO_TP_CS,
 };
 
 static struct spi_board_info poodle_spi_devices[] = {
@@ -236,14 +231,6 @@ static struct spi_board_info poodle_spi_devices[] = {
 
 static void __init poodle_init_spi(void)
 {
-	int err;
-
-	err = gpio_request(POODLE_GPIO_TP_CS, "ADS7846_CS");
-	if (err)
-		return;
-
-	gpio_direction_output(POODLE_GPIO_TP_CS, 1);
-
 	pxa2xx_set_spi_info(1, &poodle_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(poodle_spi_devices));
 }
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 229f485..1182b79 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -317,13 +317,8 @@ static struct ads7846_platform_data spitz_ads7846_info = {
 	.wait_for_sync		= spitz_wait_for_hsync,
 };
 
-static void spitz_ads7846_cs(u32 command)
-{
-	gpio_set_value(SPITZ_GPIO_ADS7846_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip spitz_ads7846_chip = {
-	.cs_control		= spitz_ads7846_cs,
+	.gpio_cs		= SPITZ_GPIO_ADS7846_CS,
 };
 
 static void spitz_bl_kick_battery(void)
@@ -347,22 +342,12 @@ static struct corgi_lcd_platform_data spitz_lcdcon_info = {
 	.kick_battery		= spitz_bl_kick_battery,
 };
 
-static void spitz_lcdcon_cs(u32 command)
-{
-	gpio_set_value(SPITZ_GPIO_LCDCON_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip spitz_lcdcon_chip = {
-	.cs_control	= spitz_lcdcon_cs,
+	.gpio_cs	= SPITZ_GPIO_LCDCON_CS,
 };
 
-static void spitz_max1111_cs(u32 command)
-{
-	gpio_set_value(SPITZ_GPIO_MAX1111_CS, !(command == PXA2XX_CS_ASSERT));
-}
-
 static struct pxa2xx_spi_chip spitz_max1111_chip = {
-	.cs_control	= spitz_max1111_cs,
+	.gpio_cs	= SPITZ_GPIO_MAX1111_CS,
 };
 
 static struct spi_board_info spitz_spi_devices[] = {
@@ -392,30 +377,6 @@ static struct spi_board_info spitz_spi_devices[] = {
 
 static void __init spitz_init_spi(void)
 {
-	int err;
-
-	err = gpio_request(SPITZ_GPIO_ADS7846_CS, "ADS7846_CS");
-	if (err)
-		return;
-
-	err = gpio_request(SPITZ_GPIO_LCDCON_CS, "LCDCON_CS");
-	if (err)
-		goto err_free_1;
-
-	err = gpio_request(SPITZ_GPIO_MAX1111_CS, "MAX1111_CS");
-	if (err)
-		goto err_free_2;
-
-	err = gpio_direction_output(SPITZ_GPIO_ADS7846_CS, 1);
-	if (err)
-		goto err_free_3;
-	err = gpio_direction_output(SPITZ_GPIO_LCDCON_CS, 1);
-	if (err)
-		goto err_free_3;
-	err = gpio_direction_output(SPITZ_GPIO_MAX1111_CS, 1);
-	if (err)
-		goto err_free_3;
-
 	if (machine_is_akita()) {
 		spitz_lcdcon_info.gpio_backlight_cont = AKITA_GPIO_BACKLIGHT_CONT;
 		spitz_lcdcon_info.gpio_backlight_on = AKITA_GPIO_BACKLIGHT_ON;
@@ -423,14 +384,6 @@ static void __init spitz_init_spi(void)
 
 	pxa2xx_set_spi_info(2, &spitz_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices));
-	return;
-
-err_free_3:
-	gpio_free(SPITZ_GPIO_MAX1111_CS);
-err_free_2:
-	gpio_free(SPITZ_GPIO_LCDCON_CS);
-err_free_1:
-	gpio_free(SPITZ_GPIO_ADS7846_CS);
 }
 #else
 static inline void spitz_init_spi(void) {}
-- 
cgit v0.10.2


From a01bd58449088b63da45ab5f2c7921893eb7d143 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 17 Apr 2009 11:47:57 +0200
Subject: [ARM] pxa: allow IRQ_BOARD_END to be customized and make zylonite to
 use it

The default value is 16 IRQs. Zylonite needs 32, ASIC3 based boards need 70.

My problem is still that due to the way IRQ_GPIO is hardcoded, ASIC3 based boards
need 70 IRQs starting at IRQ_BOARD_START. If I define ASIC3 IRQs similar to LoCoMo
or SA1111, things break as soon as something selects PXA_HAVE_BOARD_IRQS.
Increasing the default number of board IRQs to 70 instead doesn't seem very nice.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 47747f9..3b34f34 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -280,6 +280,7 @@ config MACH_ZYLONITE
 	select PXA3xx
 	select PXA_SSP
 	select HAVE_PWM
+	select PXA_HAVE_BOARD_IRQS
 
 config MACH_LITTLETON
 	bool "PXA3xx Form Factor Platform (aka Littleton)"
diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h
index 32bb4a2..ebf38bb 100644
--- a/arch/arm/mach-pxa/include/mach/irqs.h
+++ b/arch/arm/mach-pxa/include/mach/irqs.h
@@ -91,13 +91,21 @@
 #define IRQ_TO_GPIO(i)	(((i) < IRQ_GPIO(2)) ? ((i) - IRQ_GPIO0) : IRQ_TO_GPIO_2_x(i))
 
 /*
- * The next 16 interrupts are for board specific purposes.  Since
+ * The following interrupts are for board specific purposes. Since
  * the kernel can only run on one machine at a time, we can re-use
- * these.  If you need more, increase IRQ_BOARD_END, but keep it
- * within sensible limits.
+ * these.  There will be 16 IRQs by default.  If it is not enough,
+ * IRQ_BOARD_END is allowed be customized for each board, but keep
+ * the numbers within sensible limits and in descending order, so
+ * when multiple config options are selected, the maximum will be
+ * used.
  */
 #define IRQ_BOARD_START		(PXA_GPIO_IRQ_BASE + PXA_GPIO_IRQ_NUM)
+
+#if defined(CONFIG_MACH_ZYLONITE)
+#define IRQ_BOARD_END		(IRQ_BOARD_START + 32)
+#else
 #define IRQ_BOARD_END		(IRQ_BOARD_START + 16)
+#endif
 
 #define IRQ_SA1111_START	(IRQ_BOARD_END)
 #define IRQ_GPAIN0		(IRQ_BOARD_END + 0)
@@ -188,8 +196,6 @@
 #define NR_IRQS			(IRQ_LOCOMO_SPI_TEND + 1)
 #elif defined(CONFIG_PXA_HAVE_BOARD_IRQS)
 #define NR_IRQS			(IRQ_BOARD_END)
-#elif defined(CONFIG_MACH_ZYLONITE)
-#define NR_IRQS			(IRQ_BOARD_START + 32)
 #else
 #define NR_IRQS			(IRQ_BOARD_START)
 #endif
-- 
cgit v0.10.2


From 94c35a6b5129bc370893d74085c13607f260ef39 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Tue, 21 Apr 2009 19:19:36 +0200
Subject: [ARM] pxa: add udc support for pxa3xx

As reported by Aric Blumer, the pxa27x_udc driver does work
with pxa3xx devices. Add support into device files.

Reported-by: Aric Blumer <aric@sdgsystems.com>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Matt Reimer <mattjreimer@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 462503a..6f678d9 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -552,7 +552,7 @@ void __init pxa3xx_set_i2c_power_info(struct i2c_pxa_platform_data *info)
 }
 
 static struct platform_device *devices[] __initdata = {
-/*	&pxa_device_udc,	The UDC driver is PXA25x only */
+	&pxa27x_device_udc,
 	&pxa_device_ffuart,
 	&pxa_device_btuart,
 	&pxa_device_stuart,
-- 
cgit v0.10.2


From d082d36ea82aff6f25e8380770713e6fd0c7290a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 26 May 2009 09:10:18 +0300
Subject: [ARM] pxa: add ability to set suspend mode

PXA processors have several low-power modes. Currently kernel supports
only one of these modes for PM_SUSPEND_MEM.

This patch adds ability to set desired suspend mode for PXA27x based
machines.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Reviewed-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/include/mach/pxa27x.h b/arch/arm/mach-pxa/include/mach/pxa27x.h
index 6876e16..0b70269 100644
--- a/arch/arm/mach-pxa/include/mach/pxa27x.h
+++ b/arch/arm/mach-pxa/include/mach/pxa27x.h
@@ -16,4 +16,7 @@
 #define ARB_DMA_PARK		(1<<25)	   /* Be parked with DMA when idle */
 #define ARB_CORE_PARK		(1<<24)	   /* Be parked with core when idle */
 #define ARB_LOCK_FLAG		(1<<23)	   /* Only Locking masters gain access to the bus */
+
+extern int __init pxa27x_set_pwrmode(unsigned int mode);
+
 #endif /* __MACH_PXA27x_H */
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index c54ad8b..ec68cc1 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -204,6 +204,23 @@ static struct clk_lookup pxa27x_clkregs[] = {
 #define RESTORE(x)	x = sleep_save[SLEEP_SAVE_##x]
 
 /*
+ * allow platforms to override default PWRMODE setting used for PM_SUSPEND_MEM
+ */
+static unsigned int pwrmode = PWRMODE_SLEEP;
+
+int __init pxa27x_set_pwrmode(unsigned int mode)
+{
+	switch (mode) {
+	case PWRMODE_SLEEP:
+	case PWRMODE_DEEPSLEEP:
+		pwrmode = mode;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/*
  * List of global PXA peripheral registers to preserve.
  * More ones like CP and general purpose register values are preserved
  * with the stack pointer in sleep.S.
@@ -254,7 +271,7 @@ void pxa27x_cpu_pm_enter(suspend_state_t state)
 		pxa_cpu_standby();
 		break;
 	case PM_SUSPEND_MEM:
-		pxa27x_cpu_suspend(PWRMODE_SLEEP);
+		pxa27x_cpu_suspend(pwrmode);
 		break;
 	}
 }
-- 
cgit v0.10.2


From dd5b94aba709aae68d1ba11b5f963df54efa58bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrgen=20Schindele?= <linux@schindele.name>
Date: Thu, 4 Jun 2009 11:43:22 +0800
Subject: [ARM] pxa: add additional mfp definitions for pxa320
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jürgen Schindele <linux@schindele.name>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>

diff --git a/arch/arm/mach-pxa/include/mach/mfp-pxa320.h b/arch/arm/mach-pxa/include/mach/mfp-pxa320.h
index 07897e6..3ce4682 100644
--- a/arch/arm/mach-pxa/include/mach/mfp-pxa320.h
+++ b/arch/arm/mach-pxa/include/mach/mfp-pxa320.h
@@ -283,6 +283,9 @@
 #define GPIO41_UART1_TXD	MFP_CFG_LPM(GPIO41, AF4, FLOAT)
 #define GPIO42_UART1_RXD	MFP_CFG_LPM(GPIO42, AF4, FLOAT)
 #define GPIO42_UART1_TXD	MFP_CFG_LPM(GPIO42, AF2, FLOAT)
+#define GPIO75_UART1_RXD	MFP_CFG_LPM(GPIO75, AF1, FLOAT)
+#define GPIO76_UART1_RXD	MFP_CFG_LPM(GPIO76, AF3, FLOAT)
+#define GPIO76_UART1_TXD	MFP_CFG_LPM(GPIO76, AF1, FLOAT)
 #define GPIO97_UART1_RXD	MFP_CFG_LPM(GPIO97, AF1, FLOAT)
 #define GPIO97_UART1_TXD	MFP_CFG_LPM(GPIO97, AF6, FLOAT)
 #define GPIO98_UART1_RXD	MFP_CFG_LPM(GPIO98, AF6, FLOAT)
@@ -291,6 +294,9 @@
 #define GPIO43_UART1_RTS	MFP_CFG_LPM(GPIO43, AF4, FLOAT)
 #define GPIO48_UART1_CTS	MFP_CFG_LPM(GPIO48, AF4, FLOAT)
 #define GPIO48_UART1_RTS	MFP_CFG_LPM(GPIO48, AF2, FLOAT)
+#define GPIO77_UART1_CTS	MFP_CFG_LPM(GPIO77, AF1, FLOAT)
+#define GPIO82_UART1_RTS	MFP_CFG_LPM(GPIO82, AF1, FLOAT)
+#define GPIO82_UART1_CTS	MFP_CFG_LPM(GPIO82, AF3, FLOAT)
 #define GPIO99_UART1_CTS	MFP_CFG_LPM(GPIO99, AF1, FLOAT)
 #define GPIO99_UART1_RTS	MFP_CFG_LPM(GPIO99, AF6, FLOAT)
 #define GPIO104_UART1_CTS	MFP_CFG_LPM(GPIO104, AF6, FLOAT)
@@ -299,13 +305,18 @@
 #define GPIO45_UART1_DSR	MFP_CFG_LPM(GPIO45, AF2, FLOAT)
 #define GPIO47_UART1_DTR	MFP_CFG_LPM(GPIO47, AF2, FLOAT)
 #define GPIO47_UART1_DSR	MFP_CFG_LPM(GPIO47, AF4, FLOAT)
+#define GPIO79_UART1_DSR	MFP_CFG_LPM(GPIO79, AF1, FLOAT)
+#define GPIO81_UART1_DTR	MFP_CFG_LPM(GPIO81, AF1, FLOAT)
+#define GPIO81_UART1_DSR	MFP_CFG_LPM(GPIO81, AF3, FLOAT)
 #define GPIO101_UART1_DTR	MFP_CFG_LPM(GPIO101, AF6, FLOAT)
 #define GPIO101_UART1_DSR	MFP_CFG_LPM(GPIO101, AF1, FLOAT)
 #define GPIO103_UART1_DTR	MFP_CFG_LPM(GPIO103, AF1, FLOAT)
 #define GPIO103_UART1_DSR	MFP_CFG_LPM(GPIO103, AF6, FLOAT)
 #define GPIO44_UART1_DCD	MFP_CFG_LPM(GPIO44, AF2, FLOAT)
+#define GPIO78_UART1_DCD	MFP_CFG_LPM(GPIO78, AF1, FLOAT)
 #define GPIO100_UART1_DCD	MFP_CFG_LPM(GPIO100, AF1, FLOAT)
 #define GPIO46_UART1_RI		MFP_CFG_LPM(GPIO46, AF2, FLOAT)
+#define GPIO80_UART1_RI		MFP_CFG_LPM(GPIO80, AF1, FLOAT)
 #define GPIO102_UART1_RI	MFP_CFG_LPM(GPIO102, AF1, FLOAT)
 
 /* UART2 */
@@ -438,6 +449,9 @@
 
 #define GPIO2_RDY		MFP_CFG(GPIO2, AF1)
 #define GPIO5_NPIOR		MFP_CFG(GPIO5, AF3)
+#define GPIO6_NPIOW		MFP_CFG(GPIO6, AF3)
+#define GPIO7_NPIOS16		MFP_CFG(GPIO7, AF3)
+#define GPIO8_NPWAIT		MFP_CFG(GPIO8, AF3)
 
 #define GPIO11_PWM0_OUT		MFP_CFG(GPIO11, AF1)
 #define GPIO12_PWM1_OUT		MFP_CFG(GPIO12, AF1)
-- 
cgit v0.10.2


From 3dbeef231e2247f91784f33555ed6f7da7a6f726 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 May 2009 23:03:55 +0200
Subject: [ARM] pxa: add vcc_core regulation for cpufreq on pxa2xx

Add voltage regulation capability to pxa2xx cpufreq
driver. The cpufreq will ask for a "vcc_core" regulator to
the regulator framework.

If a regulator is found at probe time, it will be used with
values specified in PXA270 Electrical, Mechanical, and
Thermal Specifications.

If not, it will be assumed for now that frequency change
will work without voltage control. This assumes that the
IPL/SPL installs sane values to an existing voltage
regulator (ie. voltage high enough to support the full
range).

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/arch/arm/mach-pxa/cpufreq-pxa2xx.c
index 083a1d8..3a8ee22 100644
--- a/arch/arm/mach-pxa/cpufreq-pxa2xx.c
+++ b/arch/arm/mach-pxa/cpufreq-pxa2xx.c
@@ -36,6 +36,8 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/regulator/consumer.h>
 
 #include <mach/pxa2xx-regs.h>
 
@@ -47,6 +49,8 @@ MODULE_PARM_DESC(freq_debug, "Set the debug messages to on=1/off=0");
 #define freq_debug  0
 #endif
 
+static struct regulator *vcc_core;
+
 static unsigned int pxa27x_maxfreq;
 module_param(pxa27x_maxfreq, uint, 0);
 MODULE_PARM_DESC(pxa27x_maxfreq, "Set the pxa27x maxfreq in MHz"
@@ -58,6 +62,8 @@ typedef struct {
 	unsigned int cccr;
 	unsigned int div2;
 	unsigned int cclkcfg;
+	int vmin;
+	int vmax;
 } pxa_freqs_t;
 
 /* Define the refresh period in mSec for the SDRAM and the number of rows */
@@ -82,24 +88,24 @@ static unsigned int sdram_rows;
 
 static pxa_freqs_t pxa255_run_freqs[] =
 {
-	/* CPU   MEMBUS  CCCR  DIV2 CCLKCFG	   run  turbo PXbus SDRAM */
-	{ 99500,  99500, 0x121, 1,  CCLKCFG},	/*  99,   99,   50,   50  */
-	{132700, 132700, 0x123, 1,  CCLKCFG},	/* 133,  133,   66,   66  */
-	{199100,  99500, 0x141, 0,  CCLKCFG},	/* 199,  199,   99,   99  */
-	{265400, 132700, 0x143, 1,  CCLKCFG},	/* 265,  265,  133,   66  */
-	{331800, 165900, 0x145, 1,  CCLKCFG},	/* 331,  331,  166,   83  */
-	{398100,  99500, 0x161, 0,  CCLKCFG},	/* 398,  398,  196,   99  */
+	/* CPU   MEMBUS  CCCR  DIV2 CCLKCFG	           run  turbo PXbus SDRAM */
+	{ 99500,  99500, 0x121, 1,  CCLKCFG, -1, -1},	/*  99,   99,   50,   50  */
+	{132700, 132700, 0x123, 1,  CCLKCFG, -1, -1},	/* 133,  133,   66,   66  */
+	{199100,  99500, 0x141, 0,  CCLKCFG, -1, -1},	/* 199,  199,   99,   99  */
+	{265400, 132700, 0x143, 1,  CCLKCFG, -1, -1},	/* 265,  265,  133,   66  */
+	{331800, 165900, 0x145, 1,  CCLKCFG, -1, -1},	/* 331,  331,  166,   83  */
+	{398100,  99500, 0x161, 0,  CCLKCFG, -1, -1},	/* 398,  398,  196,   99  */
 };
 
 /* Use the turbo mode frequencies for the CPUFREQ_POLICY_POWERSAVE policy */
 static pxa_freqs_t pxa255_turbo_freqs[] =
 {
 	/* CPU   MEMBUS  CCCR  DIV2 CCLKCFG	   run  turbo PXbus SDRAM */
-	{ 99500, 99500,  0x121, 1,  CCLKCFG},	/*  99,   99,   50,   50  */
-	{199100, 99500,  0x221, 0,  CCLKCFG},	/*  99,  199,   50,   99  */
-	{298500, 99500,  0x321, 0,  CCLKCFG},	/*  99,  287,   50,   99  */
-	{298600, 99500,  0x1c1, 0,  CCLKCFG},	/* 199,  287,   99,   99  */
-	{398100, 99500,  0x241, 0,  CCLKCFG},	/* 199,  398,   99,   99  */
+	{ 99500, 99500,  0x121, 1,  CCLKCFG, -1, -1},	/*  99,   99,   50,   50  */
+	{199100, 99500,  0x221, 0,  CCLKCFG, -1, -1},	/*  99,  199,   50,   99  */
+	{298500, 99500,  0x321, 0,  CCLKCFG, -1, -1},	/*  99,  287,   50,   99  */
+	{298600, 99500,  0x1c1, 0,  CCLKCFG, -1, -1},	/* 199,  287,   99,   99  */
+	{398100, 99500,  0x241, 0,  CCLKCFG, -1, -1},	/* 199,  398,   99,   99  */
 };
 
 #define NUM_PXA25x_RUN_FREQS ARRAY_SIZE(pxa255_run_freqs)
@@ -148,13 +154,13 @@ MODULE_PARM_DESC(pxa255_turbo_table, "Selects the frequency table (0 = run table
    ((T)  ? CCLKCFG_TURBO : 0))
 
 static pxa_freqs_t pxa27x_freqs[] = {
-	{104000, 104000, PXA27x_CCCR(1,	 8, 2), 0, CCLKCFG2(1, 0, 1)},
-	{156000, 104000, PXA27x_CCCR(1,	 8, 6), 0, CCLKCFG2(1, 1, 1)},
-	{208000, 208000, PXA27x_CCCR(0, 16, 2), 1, CCLKCFG2(0, 0, 1)},
-	{312000, 208000, PXA27x_CCCR(1, 16, 3), 1, CCLKCFG2(1, 0, 1)},
-	{416000, 208000, PXA27x_CCCR(1, 16, 4), 1, CCLKCFG2(1, 0, 1)},
-	{520000, 208000, PXA27x_CCCR(1, 16, 5), 1, CCLKCFG2(1, 0, 1)},
-	{624000, 208000, PXA27x_CCCR(1, 16, 6), 1, CCLKCFG2(1, 0, 1)}
+	{104000, 104000, PXA27x_CCCR(1,	 8, 2), 0, CCLKCFG2(1, 0, 1),  900000, 1705000 },
+	{156000, 104000, PXA27x_CCCR(1,	 8, 6), 0, CCLKCFG2(1, 1, 1), 1000000, 1705000 },
+	{208000, 208000, PXA27x_CCCR(0, 16, 2), 1, CCLKCFG2(0, 0, 1), 1180000, 1705000 },
+	{312000, 208000, PXA27x_CCCR(1, 16, 3), 1, CCLKCFG2(1, 0, 1), 1250000, 1705000 },
+	{416000, 208000, PXA27x_CCCR(1, 16, 4), 1, CCLKCFG2(1, 0, 1), 1350000, 1705000 },
+	{520000, 208000, PXA27x_CCCR(1, 16, 5), 1, CCLKCFG2(1, 0, 1), 1450000, 1705000 },
+	{624000, 208000, PXA27x_CCCR(1, 16, 6), 1, CCLKCFG2(1, 0, 1), 1550000, 1705000 }
 };
 
 #define NUM_PXA27x_FREQS ARRAY_SIZE(pxa27x_freqs)
@@ -163,6 +169,47 @@ static struct cpufreq_frequency_table
 
 extern unsigned get_clk_frequency_khz(int info);
 
+#ifdef CONFIG_REGULATOR
+
+static int pxa_cpufreq_change_voltage(pxa_freqs_t *pxa_freq)
+{
+	int ret = 0;
+	int vmin, vmax;
+
+	if (!cpu_is_pxa27x())
+		return 0;
+
+	vmin = pxa_freq->vmin;
+	vmax = pxa_freq->vmax;
+	if ((vmin == -1) || (vmax == -1))
+		return 0;
+
+	ret = regulator_set_voltage(vcc_core, vmin, vmax);
+	if (ret)
+		pr_err("cpufreq: Failed to set vcc_core in [%dmV..%dmV]\n",
+		       vmin, vmax);
+	return ret;
+}
+
+static __init void pxa_cpufreq_init_voltages(void)
+{
+	vcc_core = regulator_get(NULL, "vcc_core");
+	if (IS_ERR(vcc_core)) {
+		pr_info("cpufreq: Didn't find vcc_core regulator\n");
+		vcc_core = NULL;
+	} else {
+		pr_info("cpufreq: Found vcc_core regulator\n");
+	}
+}
+#else
+static int pxa_cpufreq_change_voltage(pxa_freqs_t *pxa_freq)
+{
+	return 0;
+}
+
+static __init void pxa_cpufreq_init_voltages(void) { }
+#endif
+
 static void find_freq_tables(struct cpufreq_frequency_table **freq_table,
 			     pxa_freqs_t **pxa_freqs)
 {
@@ -251,6 +298,7 @@ static int pxa_set_target(struct cpufreq_policy *policy,
 	unsigned long flags;
 	unsigned int new_freq_cpu, new_freq_mem;
 	unsigned int unused, preset_mdrefr, postset_mdrefr, cclkcfg;
+	int ret = 0;
 
 	/* Get the current policy */
 	find_freq_tables(&pxa_freqs_table, &pxa_freq_settings);
@@ -273,6 +321,10 @@ static int pxa_set_target(struct cpufreq_policy *policy,
 			 freqs.new / 1000, (pxa_freq_settings[idx].div2) ?
 			 (new_freq_mem / 2000) : (new_freq_mem / 1000));
 
+	if (vcc_core && freqs.new > freqs.old)
+		ret = pxa_cpufreq_change_voltage(&pxa_freq_settings[idx]);
+	if (ret)
+		return ret;
 	/*
 	 * Tell everyone what we're about to do...
 	 * you should add a notify client with any platform specific
@@ -335,6 +387,18 @@ static int pxa_set_target(struct cpufreq_policy *policy,
 	 */
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
+	/*
+	 * Even if voltage setting fails, we don't report it, as the frequency
+	 * change succeeded. The voltage reduction is not a critical failure,
+	 * only power savings will suffer from this.
+	 *
+	 * Note: if the voltage change fails, and a return value is returned, a
+	 * bug is triggered (seems a deadlock). Should anybody find out where,
+	 * the "return 0" should become a "return ret".
+	 */
+	if (vcc_core && freqs.new < freqs.old)
+		ret = pxa_cpufreq_change_voltage(&pxa_freq_settings[idx]);
+
 	return 0;
 }
 
@@ -349,6 +413,8 @@ static __init int pxa_cpufreq_init(struct cpufreq_policy *policy)
 	if (cpu_is_pxa27x())
 		pxa27x_guess_max_freq();
 
+	pxa_cpufreq_init_voltages();
+
 	init_sdram_rows();
 
 	/* set default policy and cpuinfo */
-- 
cgit v0.10.2


From f8f9d5ecfff9b2d04db4decd7d2c86aba86e49e5 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 17 Apr 2009 11:38:23 +0200
Subject: [ARM] pxa/magician: use I2C fast mode

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 5a9df97..34046c8 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -745,6 +745,14 @@ static struct platform_device strataflash = {
 };
 
 /*
+ * I2C
+ */
+
+static struct i2c_pxa_platform_data i2c_info = {
+	.fast_mode = 1,
+};
+
+/*
  * Platform devices
  */
 
@@ -779,7 +787,7 @@ static void __init magician_init(void)
 		pxa_set_ficp_info(&magician_ficp_info);
 	}
 	pxa27x_set_i2c_power_info(NULL);
-	pxa_set_i2c_info(NULL);
+	pxa_set_i2c_info(&i2c_info);
 	pxa_set_mci_info(&magician_mci_info);
 	pxa_set_ohci_info(&magician_ohci_info);
 
-- 
cgit v0.10.2


From 2f1a5bf707a39153d93d04858f69f2d43563d08c Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Tue, 28 Apr 2009 13:31:08 +0200
Subject: [ARM] pxa/magician: use ARRAY_AND_SIZE for platform_add_devices

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 34046c8..ca39669 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -779,7 +779,7 @@ static void __init magician_init(void)
 
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(magician_pin_config));
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(devices));
 
 	err = gpio_request(GPIO83_MAGICIAN_nIR_EN, "nIR_EN");
 	if (!err) {
-- 
cgit v0.10.2


From d3ca1952e63ffbebab6f2f1c9551df7b3cb731f5 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 28 May 2009 07:05:18 +0200
Subject: [ARM] pxa: add basic support for HP iPAQ hx4700 PDAs

This includes
- IrDA (pxafixp_ir),
- Buttons (gpio-keys),
- ASIC3 IRQ/GPIOs (asic3),
- EGPIOs (htc-egpio),
- ATI Imageon w3220 framebuffer (w100fb),
- Backlight (pwm-backlight),
- StrataFlash (physmap),
- Battery monitor (ds1wm,w1_ds2760,ds2760_battery)
- USB gadget support (pxa27x_udc,gpio_vbus).
- bq24022 battery charger (pda_power,bq24022)
- TSC2046 touchscreen (ads7846)

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 3b34f34..a911bc9 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -309,6 +309,14 @@ config MACH_CM_X300
 	select PXA3xx
 	select CPU_PXA300
 
+config MACH_H4700
+	bool "HP iPAQ hx4700"
+	select PXA27x
+	select IWMMXT
+	select PXA_SSP
+	select HAVE_PWM
+	select PXA_HAVE_BOARD_IRQS
+
 config MACH_MAGICIAN
 	bool "Enable HTC Magician Support"
 	select PXA27x
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index 47d0157..e950c40 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_MACH_PCM027)	+= pcm027.o
 obj-$(CONFIG_MACH_PCM990_BASEBOARD)	+= pcm990-baseboard.o
 obj-$(CONFIG_MACH_TOSA)		+= tosa.o
 obj-$(CONFIG_MACH_EM_X270)	+= em-x270.o
+obj-$(CONFIG_MACH_H4700)	+= hx4700.o
 obj-$(CONFIG_MACH_MAGICIAN)	+= magician.o
 obj-$(CONFIG_MACH_HIMALAYA)	+= himalaya.o
 obj-$(CONFIG_MACH_MIOA701)	+= mioa701.o mioa701_bootresume.o
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
new file mode 100644
index 0000000..0f65680
--- /dev/null
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -0,0 +1,851 @@
+/*
+ * Support for HP iPAQ hx4700 PDAs.
+ *
+ * Copyright (c) 2008-2009 Philipp Zabel
+ *
+ * Based on code:
+ *    Copyright (c) 2004 Hewlett-Packard Company.
+ *    Copyright (c) 2005 SDG Systems, LLC
+ *    Copyright (c) 2006 Anton Vorontsov <cbou@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/gpio.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/lcd.h>
+#include <linux/mfd/htc-egpio.h>
+#include <linux/mfd/asic3.h>
+#include <linux/mtd/physmap.h>
+#include <linux/pda_power.h>
+#include <linux/pwm_backlight.h>
+#include <linux/regulator/bq24022.h>
+#include <linux/regulator/machine.h>
+#include <linux/spi/ads7846.h>
+#include <linux/spi/spi.h>
+#include <linux/usb/gpio_vbus.h>
+
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+#include <mach/pxa27x.h>
+#include <mach/hx4700.h>
+#include <plat/i2c.h>
+#include <mach/irda.h>
+#include <mach/pxa2xx_spi.h>
+
+#include <video/w100fb.h>
+
+#include "devices.h"
+#include "generic.h"
+
+/* Physical address space information */
+
+#define ATI_W3220_PHYS  PXA_CS2_PHYS /* ATI Imageon 3220 Graphics */
+#define ASIC3_PHYS      PXA_CS3_PHYS
+#define ASIC3_SD_PHYS   (PXA_CS3_PHYS + 0x02000000)
+
+static unsigned long hx4700_pin_config[] __initdata = {
+
+	/* SDRAM and Static Memory I/O Signals */
+	GPIO20_nSDCS_2,
+	GPIO21_nSDCS_3,
+	GPIO15_nCS_1,
+	GPIO78_nCS_2,   /* W3220 */
+	GPIO79_nCS_3,   /* ASIC3 */
+	GPIO80_nCS_4,
+	GPIO33_nCS_5,	/* EGPIO, WLAN */
+
+	/* PC CARD */
+	GPIO48_nPOE,
+	GPIO49_nPWE,
+	GPIO50_nPIOR,
+	GPIO51_nPIOW,
+	GPIO54_nPCE_2,
+	GPIO55_nPREG,
+	GPIO56_nPWAIT,
+	GPIO57_nIOIS16,
+	GPIO85_nPCE_1,
+	GPIO104_PSKTSEL,
+
+	/* I2C */
+	GPIO117_I2C_SCL,
+	GPIO118_I2C_SDA,
+
+	/* FFUART (RS-232) */
+	GPIO34_FFUART_RXD,
+	GPIO35_FFUART_CTS,
+	GPIO36_FFUART_DCD,
+	GPIO37_FFUART_DSR,
+	GPIO38_FFUART_RI,
+	GPIO39_FFUART_TXD,
+	GPIO40_FFUART_DTR,
+	GPIO41_FFUART_RTS,
+
+	/* BTUART */
+	GPIO42_BTUART_RXD,
+	GPIO43_BTUART_TXD,
+	GPIO44_BTUART_CTS,
+	GPIO45_BTUART_RTS,
+
+	/* PWM 1 (Backlight) */
+	GPIO17_PWM1_OUT,
+
+	/* I2S */
+	GPIO28_I2S_BITCLK_OUT,
+	GPIO29_I2S_SDATA_IN,
+	GPIO30_I2S_SDATA_OUT,
+	GPIO31_I2S_SYNC,
+	GPIO113_I2S_SYSCLK,
+
+	/* SSP 1 (NavPoint) */
+	GPIO23_SSP1_SCLK,
+	GPIO24_SSP1_SFRM,
+	GPIO25_SSP1_TXD,
+	GPIO26_SSP1_RXD,
+
+	/* SSP 2 (TSC2046) */
+	GPIO19_SSP2_SCLK,
+	GPIO86_SSP2_RXD,
+	GPIO87_SSP2_TXD,
+	GPIO88_GPIO,
+
+	/* HX4700 specific input GPIOs */
+	GPIO12_GPIO,	/* ASIC3_IRQ */
+	GPIO13_GPIO,	/* W3220_IRQ */
+	GPIO14_GPIO,	/* nWLAN_IRQ */
+
+	GPIO10_GPIO,	/* GSM_IRQ */
+	GPIO13_GPIO,	/* CPLD_IRQ */
+	GPIO107_GPIO,	/* DS1WM_IRQ */
+	GPIO108_GPIO,	/* GSM_READY */
+	GPIO58_GPIO,	/* TSC2046_nPENIRQ */
+	GPIO66_GPIO,	/* nSDIO_IRQ */
+};
+
+#define HX4700_GPIO_IN(num, _desc) \
+	{ .gpio = (num), .dir = 0, .desc = (_desc) }
+#define HX4700_GPIO_OUT(num, _init, _desc) \
+	{ .gpio = (num), .dir = 1, .init = (_init), .desc = (_desc) }
+struct gpio_ress {
+	unsigned gpio : 8;
+	unsigned dir : 1;
+	unsigned init : 1;
+	char *desc;
+};
+
+static int hx4700_gpio_request(struct gpio_ress *gpios, int size)
+{
+	int i, rc = 0;
+	int gpio;
+	int dir;
+
+	for (i = 0; (!rc) && (i < size); i++) {
+		gpio = gpios[i].gpio;
+		dir = gpios[i].dir;
+		rc = gpio_request(gpio, gpios[i].desc);
+		if (rc) {
+			pr_err("Error requesting GPIO %d(%s) : %d\n",
+			       gpio, gpios[i].desc, rc);
+			continue;
+		}
+		if (dir)
+			gpio_direction_output(gpio, gpios[i].init);
+		else
+			gpio_direction_input(gpio);
+	}
+	while ((rc) && (--i >= 0))
+		gpio_free(gpios[i].gpio);
+	return rc;
+}
+
+/*
+ * IRDA
+ */
+
+static void irda_transceiver_mode(struct device *dev, int mode)
+{
+	gpio_set_value(GPIO105_HX4700_nIR_ON, mode & IR_OFF);
+}
+
+static struct pxaficp_platform_data ficp_info = {
+	.transceiver_cap  = IR_SIRMODE | IR_OFF,
+	.transceiver_mode = irda_transceiver_mode,
+};
+
+/*
+ * GPIO Keys
+ */
+
+#define INIT_KEY(_code, _gpio, _active_low, _desc)	\
+	{						\
+		.code       = KEY_##_code,		\
+		.gpio       = _gpio,			\
+		.active_low = _active_low,		\
+		.desc       = _desc,			\
+		.type       = EV_KEY,			\
+		.wakeup     = 1,			\
+	}
+
+static struct gpio_keys_button gpio_keys_buttons[] = {
+	INIT_KEY(POWER,       GPIO0_HX4700_nKEY_POWER,   1, "Power button"),
+	INIT_KEY(MAIL,        GPIO94_HX4700_KEY_MAIL,    0, "Mail button"),
+	INIT_KEY(ADDRESSBOOK, GPIO99_HX4700_KEY_CONTACTS,0, "Contacts button"),
+	INIT_KEY(RECORD,      GPIOD6_nKEY_RECORD,        1, "Record button"),
+	INIT_KEY(CALENDAR,    GPIOD1_nKEY_CALENDAR,      1, "Calendar button"),
+	INIT_KEY(HOMEPAGE,    GPIOD3_nKEY_HOME,          1, "Home button"),
+};
+
+static struct gpio_keys_platform_data gpio_keys_data = {
+	.buttons = gpio_keys_buttons,
+	.nbuttons = ARRAY_SIZE(gpio_keys_buttons),
+};
+
+static struct platform_device gpio_keys = {
+	.name = "gpio-keys",
+	.dev  = {
+		.platform_data = &gpio_keys_data,
+	},
+	.id   = -1,
+};
+
+/*
+ * ASIC3
+ */
+
+static u16 asic3_gpio_config[] = {
+	/* ASIC3 GPIO banks A and B along with some of C and D
+	   implement the buffering for the CF slot. */
+	ASIC3_CONFIG_GPIO(0, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(1, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(2, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(3, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(4, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(5, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(6, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(7, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(8, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(9, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(10, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(11, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(12, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(13, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(14, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(15, 1, 1, 0),
+
+	ASIC3_CONFIG_GPIO(16, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(17, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(18, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(19, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(20, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(21, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(22, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(23, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(24, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(25, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(26, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(27, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(28, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(29, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(30, 1, 1, 0),
+	ASIC3_CONFIG_GPIO(31, 1, 1, 0),
+
+	/* GPIOC - CF, LEDs, SD */
+	ASIC3_GPIOC0_LED0,		/* red */
+	ASIC3_GPIOC1_LED1,		/* green */
+	ASIC3_GPIOC2_LED2,		/* blue */
+	ASIC3_GPIOC4_CF_nCD,
+	ASIC3_GPIOC5_nCIOW,
+	ASIC3_GPIOC6_nCIOR,
+	ASIC3_GPIOC7_nPCE_1,
+	ASIC3_GPIOC8_nPCE_2,
+	ASIC3_GPIOC9_nPOE,
+	ASIC3_GPIOC10_nPWE,
+	ASIC3_GPIOC11_PSKTSEL,
+	ASIC3_GPIOC12_nPREG,
+	ASIC3_GPIOC13_nPWAIT,
+	ASIC3_GPIOC14_nPIOIS16,
+	ASIC3_GPIOC15_nPIOR,
+
+	/* GPIOD: input GPIOs, CF */
+	ASIC3_GPIOD11_nCIOIS16,
+	ASIC3_GPIOD12_nCWAIT,
+	ASIC3_GPIOD15_nPIOW,
+};
+
+static struct resource asic3_resources[] = {
+	/* GPIO part */
+	[0] = {
+		.start	= ASIC3_PHYS,
+		.end	= ASIC3_PHYS + ASIC3_MAP_SIZE_16BIT - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= gpio_to_irq(GPIO12_HX4700_ASIC3_IRQ),
+		.end	= gpio_to_irq(GPIO12_HX4700_ASIC3_IRQ),
+		.flags	= IORESOURCE_IRQ,
+	},
+	/* SD part */
+	[2] = {
+		.start	= ASIC3_SD_PHYS,
+		.end	= ASIC3_SD_PHYS + ASIC3_MAP_SIZE_16BIT - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[3] = {
+		.start	= gpio_to_irq(GPIO66_HX4700_ASIC3_nSDIO_IRQ),
+		.end	= gpio_to_irq(GPIO66_HX4700_ASIC3_nSDIO_IRQ),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct asic3_platform_data asic3_platform_data = {
+	.gpio_config     = asic3_gpio_config,
+	.gpio_config_num = ARRAY_SIZE(asic3_gpio_config),
+	.irq_base        = IRQ_BOARD_START,
+	.gpio_base       = HX4700_ASIC3_GPIO_BASE,
+};
+
+static struct platform_device asic3 = {
+	.name          = "asic3",
+	.id            = -1,
+	.resource      = asic3_resources,
+	.num_resources = ARRAY_SIZE(asic3_resources),
+	.dev = {
+		.platform_data = &asic3_platform_data,
+	},
+};
+
+/*
+ * EGPIO
+ */
+
+static struct resource egpio_resources[] = {
+	[0] = {
+		.start = PXA_CS5_PHYS,
+		.end   = PXA_CS5_PHYS + 0x4 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
+static struct htc_egpio_chip egpio_chips[] = {
+	[0] = {
+		.reg_start = 0,
+		.gpio_base = HX4700_EGPIO_BASE,
+		.num_gpios = 8,
+		.direction = HTC_EGPIO_OUTPUT,
+	},
+};
+
+static struct htc_egpio_platform_data egpio_info = {
+	.reg_width = 16,
+	.bus_width = 16,
+	.chip      = egpio_chips,
+	.num_chips = ARRAY_SIZE(egpio_chips),
+};
+
+static struct platform_device egpio = {
+	.name          = "htc-egpio",
+	.id            = -1,
+	.resource      = egpio_resources,
+	.num_resources = ARRAY_SIZE(egpio_resources),
+	.dev = {
+		.platform_data = &egpio_info,
+	},
+};
+
+/*
+ * LCD - Sony display connected to ATI Imageon w3220
+ */
+
+static int lcd_power;
+
+static void sony_lcd_init(void)
+{
+	gpio_set_value(GPIO84_HX4700_LCD_SQN, 1);
+	gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0);
+	gpio_set_value(GPIO111_HX4700_LCD_AVDD_3V3_ON, 0);
+	gpio_set_value(GPIO70_HX4700_LCD_SLIN1, 0);
+	gpio_set_value(GPIO62_HX4700_LCD_nRESET, 0);
+	mdelay(10);
+	gpio_set_value(GPIO59_HX4700_LCD_PC1, 0);
+	gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0);
+	mdelay(20);
+
+	gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 1);
+	mdelay(5);
+	gpio_set_value(GPIO111_HX4700_LCD_AVDD_3V3_ON, 1);
+
+	/* FIXME: init w3220 registers here */
+
+	mdelay(5);
+	gpio_set_value(GPIO70_HX4700_LCD_SLIN1, 1);
+	mdelay(10);
+	gpio_set_value(GPIO62_HX4700_LCD_nRESET, 1);
+	mdelay(10);
+	gpio_set_value(GPIO59_HX4700_LCD_PC1, 1);
+	mdelay(10);
+	gpio_set_value(GPIO112_HX4700_LCD_N2V7_7V3_ON, 1);
+}
+
+static void sony_lcd_off(void)
+{
+	gpio_set_value(GPIO59_HX4700_LCD_PC1, 0);
+	gpio_set_value(GPIO62_HX4700_LCD_nRESET, 0);
+	mdelay(10);
+	gpio_set_value(GPIO112_HX4700_LCD_N2V7_7V3_ON, 0);
+	mdelay(10);
+	gpio_set_value(GPIO111_HX4700_LCD_AVDD_3V3_ON, 0);
+	mdelay(10);
+	gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0);
+}
+
+static int hx4700_lcd_set_power(struct lcd_device *ldev, int level)
+{
+	switch (level) {
+	case FB_BLANK_UNBLANK:
+		sony_lcd_init();
+		break;
+	case FB_BLANK_NORMAL:
+	case FB_BLANK_VSYNC_SUSPEND:
+	case FB_BLANK_HSYNC_SUSPEND:
+	case FB_BLANK_POWERDOWN:
+		sony_lcd_off();
+		break;
+	}
+	lcd_power = level;
+	return 0;
+}
+
+static int hx4700_lcd_get_power(struct lcd_device *lm)
+{
+	return lcd_power;
+}
+
+static struct lcd_ops hx4700_lcd_ops = {
+	.get_power = hx4700_lcd_get_power,
+	.set_power = hx4700_lcd_set_power,
+};
+
+static struct lcd_device *hx4700_lcd_device;
+
+#ifdef CONFIG_PM
+static void w3220_lcd_suspend(struct w100fb_par *wfb)
+{
+	sony_lcd_off();
+}
+
+static void w3220_lcd_resume(struct w100fb_par *wfb)
+{
+	sony_lcd_init();
+}
+#else
+#define w3220_lcd_resume	NULL
+#define w3220_lcd_suspend	NULL
+#endif
+
+static struct w100_tg_info w3220_tg_info = {
+	.suspend	= w3220_lcd_suspend,
+	.resume		= w3220_lcd_resume,
+};
+
+/*  				 W3220_VGA		QVGA */
+static struct w100_gen_regs w3220_regs = {
+	.lcd_format =        0x00000003,
+	.lcdd_cntl1 =        0x00000000,
+	.lcdd_cntl2 =        0x0003ffff,
+	.genlcd_cntl1 =      0x00abf003,	/* 0x00fff003 */
+	.genlcd_cntl2 =      0x00000003,
+	.genlcd_cntl3 =      0x000102aa,
+};
+
+static struct w100_mode w3220_modes[] = {
+{
+	.xres 		= 480,
+	.yres 		= 640,
+	.left_margin 	= 15,
+	.right_margin 	= 16,
+	.upper_margin 	= 8,
+	.lower_margin 	= 7,
+	.crtc_ss	= 0x00000000,
+	.crtc_ls	= 0xa1ff01f9,	/* 0x21ff01f9 */
+	.crtc_gs	= 0xc0000000,	/* 0x40000000 */
+	.crtc_vpos_gs	= 0x0000028f,
+	.crtc_ps1_active = 0x00000000,	/* 0x41060010 */
+	.crtc_rev	= 0,
+	.crtc_dclk	= 0x80000000,
+	.crtc_gclk	= 0x040a0104,
+	.crtc_goe	= 0,
+	.pll_freq 	= 95,
+	.pixclk_divider = 4,
+	.pixclk_divider_rotated = 4,
+	.pixclk_src     = CLK_SRC_PLL,
+	.sysclk_divider = 0,
+	.sysclk_src     = CLK_SRC_PLL,
+},
+{
+	.xres 		= 240,
+	.yres 		= 320,
+	.left_margin 	= 9,
+	.right_margin 	= 8,
+	.upper_margin 	= 5,
+	.lower_margin 	= 4,
+	.crtc_ss	= 0x80150014,
+	.crtc_ls        = 0xa0fb00f7,
+	.crtc_gs	= 0xc0080007,
+	.crtc_vpos_gs	= 0x00080007,
+	.crtc_rev	= 0x0000000a,
+	.crtc_dclk	= 0x81700030,
+	.crtc_gclk	= 0x8015010f,
+	.crtc_goe	= 0x00000000,
+	.pll_freq 	= 95,
+	.pixclk_divider = 4,
+	.pixclk_divider_rotated = 4,
+	.pixclk_src     = CLK_SRC_PLL,
+	.sysclk_divider = 0,
+	.sysclk_src     = CLK_SRC_PLL,
+},
+};
+
+struct w100_mem_info w3220_mem_info = {
+	.ext_cntl        = 0x09640011,
+	.sdram_mode_reg  = 0x00600021,
+	.ext_timing_cntl = 0x1a001545,	/* 0x15001545 */
+	.io_cntl         = 0x7ddd7333,
+	.size            = 0x1fffff,
+};
+
+struct w100_bm_mem_info w3220_bm_mem_info = {
+	.ext_mem_bw = 0x50413e01,
+	.offset = 0,
+	.ext_timing_ctl = 0x00043f7f,
+	.ext_cntl = 0x00000010,
+	.mode_reg = 0x00250000,
+	.io_cntl = 0x0fff0000,
+	.config = 0x08301480,
+};
+
+static struct w100_gpio_regs w3220_gpio_info = {
+	.init_data1 = 0xdfe00100,	/* GPIO_DATA */
+	.gpio_dir1  = 0xffff0000,	/* GPIO_CNTL1 */
+	.gpio_oe1   = 0x00000000,	/* GPIO_CNTL2 */
+	.init_data2 = 0x00000000,	/* GPIO_DATA2 */
+	.gpio_dir2  = 0x00000000,	/* GPIO_CNTL3 */
+	.gpio_oe2   = 0x00000000,	/* GPIO_CNTL4 */
+};
+
+static struct w100fb_mach_info w3220_info = {
+	.tg        = &w3220_tg_info,
+	.mem       = &w3220_mem_info,
+	.bm_mem    = &w3220_bm_mem_info,
+	.gpio      = &w3220_gpio_info,
+	.regs      = &w3220_regs,
+	.modelist  = w3220_modes,
+	.num_modes = 2,
+	.xtal_freq = 16000000,
+};
+
+static struct resource w3220_resources[] = {
+	[0] = {
+		.start	= ATI_W3220_PHYS,
+		.end	= ATI_W3220_PHYS + 0x00ffffff,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device w3220 = {
+	.name	= "w100fb",
+	.id	= -1,
+	.dev	= {
+		.platform_data = &w3220_info,
+	},
+	.num_resources = ARRAY_SIZE(w3220_resources),
+	.resource      = w3220_resources,
+};
+
+/*
+ * Backlight
+ */
+
+static struct platform_pwm_backlight_data backlight_data = {
+	.pwm_id         = 1,
+	.max_brightness = 200,
+	.dft_brightness = 100,
+	.pwm_period_ns  = 30923,
+};
+
+static struct platform_device backlight = {
+	.name = "pwm-backlight",
+	.id   = -1,
+	.dev  = {
+		.parent        = &pxa27x_device_pwm1.dev,
+		.platform_data = &backlight_data,
+	},
+};
+
+/*
+ * USB "Transceiver"
+ */
+
+static struct gpio_vbus_mach_info gpio_vbus_info = {
+	.gpio_pullup        = GPIO76_HX4700_USBC_PUEN,
+	.gpio_vbus          = GPIOD14_nUSBC_DETECT,
+	.gpio_vbus_inverted = 1,
+};
+
+static struct platform_device gpio_vbus = {
+	.name          = "gpio-vbus",
+	.id            = -1,
+	.dev = {
+		.platform_data = &gpio_vbus_info,
+	},
+};
+
+/*
+ * Touchscreen - TSC2046 connected to SSP2
+ */
+
+static const struct ads7846_platform_data tsc2046_info = {
+	.model            = 7846,
+	.vref_delay_usecs = 100,
+	.pressure_max     = 512,
+	.debounce_max     = 10,
+	.debounce_tol     = 3,
+	.debounce_rep     = 1,
+	.gpio_pendown     = GPIO58_HX4700_TSC2046_nPENIRQ,
+};
+
+static struct pxa2xx_spi_chip tsc2046_chip = {
+	.tx_threshold = 1,
+	.rx_threshold = 2,
+	.timeout      = 64,
+	.gpio_cs      = GPIO88_HX4700_TSC2046_CS,
+};
+
+static struct spi_board_info tsc2046_board_info[] __initdata = {
+	{
+		.modalias        = "ads7846",
+		.bus_num         = 2,
+		.max_speed_hz    = 2600000, /* 100 kHz sample rate */
+		.irq             = gpio_to_irq(GPIO58_HX4700_TSC2046_nPENIRQ),
+		.platform_data   = &tsc2046_info,
+		.controller_data = &tsc2046_chip,
+	},
+};
+
+static struct pxa2xx_spi_master pxa_ssp2_master_info = {
+	.num_chipselect = 1,
+	.clock_enable   = CKEN_SSP2,
+	.enable_dma     = 1,
+};
+
+/*
+ * External power
+ */
+
+static int power_supply_init(struct device *dev)
+{
+	return gpio_request(GPIOD9_nAC_IN, "AC charger detect");
+}
+
+static int hx4700_is_ac_online(void)
+{
+	return !gpio_get_value(GPIOD9_nAC_IN);
+}
+
+static void power_supply_exit(struct device *dev)
+{
+	gpio_free(GPIOD9_nAC_IN);
+}
+
+static char *hx4700_supplicants[] = {
+	"ds2760-battery.0", "backup-battery"
+};
+
+static struct pda_power_pdata power_supply_info = {
+	.init            = power_supply_init,
+	.is_ac_online    = hx4700_is_ac_online,
+	.exit            = power_supply_exit,
+	.supplied_to     = hx4700_supplicants,
+	.num_supplicants = ARRAY_SIZE(hx4700_supplicants),
+};
+
+static struct resource power_supply_resources[] = {
+	[0] = {
+		.name  = "ac",
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE |
+		         IORESOURCE_IRQ_LOWEDGE,
+		.start = gpio_to_irq(GPIOD9_nAC_IN),
+		.end   = gpio_to_irq(GPIOD9_nAC_IN),
+	},
+	[1] = {
+		.name  = "usb",
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE |
+		         IORESOURCE_IRQ_LOWEDGE,
+		.start = gpio_to_irq(GPIOD14_nUSBC_DETECT),
+		.end   = gpio_to_irq(GPIOD14_nUSBC_DETECT),
+	},
+};
+
+static struct platform_device power_supply = {
+	.name = "pda-power",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &power_supply_info,
+	},
+	.resource      = power_supply_resources,
+	.num_resources = ARRAY_SIZE(power_supply_resources),
+};
+
+/*
+ * Battery charger
+ */
+
+static struct regulator_consumer_supply bq24022_consumers[] = {
+	{
+		.dev = &gpio_vbus.dev,
+		.supply = "vbus_draw",
+	},
+	{
+		.dev = &power_supply.dev,
+		.supply = "ac_draw",
+	},
+};
+
+static struct regulator_init_data bq24022_init_data = {
+	.constraints = {
+		.max_uA         = 500000,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+	},
+	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
+	.consumer_supplies      = bq24022_consumers,
+};
+
+static struct bq24022_mach_info bq24022_info = {
+	.gpio_nce   = GPIO72_HX4700_BQ24022_nCHARGE_EN,
+	.gpio_iset2 = GPIO96_HX4700_BQ24022_ISET2,
+	.init_data  = &bq24022_init_data,
+};
+
+static struct platform_device bq24022 = {
+	.name = "bq24022",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &bq24022_info,
+	},
+};
+
+/*
+ * StrataFlash
+ */
+
+static void hx4700_set_vpp(struct map_info *map, int vpp)
+{
+	gpio_set_value(GPIO91_HX4700_FLASH_VPEN, vpp);
+}
+
+static struct resource strataflash_resource = {
+	.start = PXA_CS0_PHYS,
+	.end   = PXA_CS0_PHYS + SZ_128M - 1,
+	.flags = IORESOURCE_MEM,
+};
+
+static struct physmap_flash_data strataflash_data = {
+	.width = 4,
+	.set_vpp = hx4700_set_vpp,
+};
+
+static struct platform_device strataflash = {
+	.name          = "physmap-flash",
+	.id            = -1,
+	.resource      = &strataflash_resource,
+	.num_resources = 1,
+	.dev = {
+		.platform_data = &strataflash_data,
+	},
+};
+
+/*
+ * PCMCIA
+ */
+
+static struct platform_device pcmcia = {
+	.name = "hx4700-pcmcia",
+	.dev  = {
+		.parent = &asic3.dev,
+	},
+};
+
+/*
+ * Platform devices
+ */
+
+static struct platform_device *devices[] __initdata = {
+	&asic3,
+	&gpio_keys,
+	&backlight,
+	&w3220,
+	&egpio,
+	&bq24022,
+	&gpio_vbus,
+	&power_supply,
+	&strataflash,
+	&pcmcia,
+};
+
+struct gpio_ress global_gpios[] = {
+	HX4700_GPIO_IN(GPIO12_HX4700_ASIC3_IRQ, "ASIC3_IRQ"),
+	HX4700_GPIO_IN(GPIO13_HX4700_W3220_IRQ, "W3220_IRQ"),
+	HX4700_GPIO_IN(GPIO14_HX4700_nWLAN_IRQ, "WLAN_IRQ"),
+	HX4700_GPIO_OUT(GPIO59_HX4700_LCD_PC1,          1, "LCD_PC1"),
+	HX4700_GPIO_OUT(GPIO62_HX4700_LCD_nRESET,       1, "LCD_RESET"),
+	HX4700_GPIO_OUT(GPIO70_HX4700_LCD_SLIN1,        1, "LCD_SLIN1"),
+	HX4700_GPIO_OUT(GPIO84_HX4700_LCD_SQN,          1, "LCD_SQN"),
+	HX4700_GPIO_OUT(GPIO110_HX4700_LCD_LVDD_3V3_ON, 1, "LCD_LVDD"),
+	HX4700_GPIO_OUT(GPIO111_HX4700_LCD_AVDD_3V3_ON, 1, "LCD_AVDD"),
+	HX4700_GPIO_OUT(GPIO32_HX4700_RS232_ON,         1, "RS232_ON"),
+	HX4700_GPIO_OUT(GPIO71_HX4700_ASIC3_nRESET,     1, "ASIC3_nRESET"),
+	HX4700_GPIO_OUT(GPIO82_HX4700_EUART_RESET,      1, "EUART_RESET"),
+	HX4700_GPIO_OUT(GPIO105_HX4700_nIR_ON,          1, "nIR_EN"),
+};
+
+static void __init hx4700_init(void)
+{
+	pxa2xx_mfp_config(ARRAY_AND_SIZE(hx4700_pin_config));
+	hx4700_gpio_request(ARRAY_AND_SIZE(global_gpios));
+
+	platform_add_devices(devices, ARRAY_SIZE(devices));
+
+	pxa_set_ficp_info(&ficp_info);
+	pxa27x_set_i2c_power_info(NULL);
+	pxa_set_i2c_info(NULL);
+	pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
+	spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info));
+
+	hx4700_lcd_device = lcd_device_register("w100fb", NULL,
+					(void *)&w3220_info, &hx4700_lcd_ops);
+
+	gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 0);
+	mdelay(10);
+	gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 1);
+	mdelay(10);
+}
+
+MACHINE_START(H4700, "HP iPAQ HX4700")
+	.phys_io      = 0x40000000,
+	.io_pg_offst  = (io_p2v(0x40000000) >> 18) & 0xfffc,
+	.boot_params  = 0xa0000100,
+	.map_io       = pxa_map_io,
+	.init_irq     = pxa27x_init_irq,
+	.init_machine = hx4700_init,
+	.timer        = &pxa_timer,
+MACHINE_END
diff --git a/arch/arm/mach-pxa/include/mach/hx4700.h b/arch/arm/mach-pxa/include/mach/hx4700.h
new file mode 100644
index 0000000..9eaeed1
--- /dev/null
+++ b/arch/arm/mach-pxa/include/mach/hx4700.h
@@ -0,0 +1,131 @@
+/*
+ * GPIO and IRQ definitions for HP iPAQ hx4700
+ *
+ * Copyright (c) 2008 Philipp Zabel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _HX4700_H_
+#define _HX4700_H_
+
+#include <linux/gpio.h>
+#include <linux/mfd/asic3.h>
+
+#define HX4700_ASIC3_GPIO_BASE	NR_BUILTIN_GPIO
+#define HX4700_EGPIO_BASE	(HX4700_ASIC3_GPIO_BASE + ASIC3_NUM_GPIOS)
+
+/*
+ * PXA GPIOs
+ */
+
+#define GPIO0_HX4700_nKEY_POWER			0
+#define GPIO12_HX4700_ASIC3_IRQ			12
+#define GPIO13_HX4700_W3220_IRQ			13
+#define GPIO14_HX4700_nWLAN_IRQ			14
+#define GPIO18_HX4700_RDY			18
+#define GPIO22_HX4700_LCD_RL			22
+#define GPIO27_HX4700_CODEC_ON			27
+#define GPIO32_HX4700_RS232_ON			32
+#define GPIO52_HX4700_CPU_nBATT_FAULT		52
+#define GPIO58_HX4700_TSC2046_nPENIRQ		58
+#define GPIO59_HX4700_LCD_PC1			59
+#define GPIO60_HX4700_CF_RNB			60
+#define GPIO61_HX4700_W3220_nRESET		61
+#define GPIO62_HX4700_LCD_nRESET		62
+#define GPIO63_HX4700_CPU_SS_nRESET		63
+#define GPIO65_HX4700_TSC2046_PEN_PU		65
+#define GPIO66_HX4700_ASIC3_nSDIO_IRQ		66
+#define GPIO67_HX4700_EUART_PS			67
+#define GPIO70_HX4700_LCD_SLIN1			70
+#define GPIO71_HX4700_ASIC3_nRESET		71
+#define GPIO72_HX4700_BQ24022_nCHARGE_EN	72
+#define GPIO73_HX4700_LCD_UD_1			73
+#define GPIO75_HX4700_EARPHONE_nDET		75
+#define GPIO76_HX4700_USBC_PUEN			76
+#define GPIO81_HX4700_CPU_GP_nRESET		81
+#define GPIO82_HX4700_EUART_RESET		82
+#define GPIO83_HX4700_WLAN_nRESET		83
+#define GPIO84_HX4700_LCD_SQN			84
+#define GPIO85_HX4700_nPCE1			85
+#define GPIO88_HX4700_TSC2046_CS		88
+#define GPIO91_HX4700_FLASH_VPEN		91
+#define GPIO92_HX4700_HP_DRIVER			92
+#define GPIO93_HX4700_EUART_INT			93
+#define GPIO94_HX4700_KEY_MAIL			94
+#define GPIO95_HX4700_BATT_OFF			95
+#define GPIO96_HX4700_BQ24022_ISET2		96
+#define GPIO97_HX4700_nBL_DETECT		97
+#define GPIO99_HX4700_KEY_CONTACTS		99
+#define GPIO100_HX4700_AUTO_SENSE		100 /* BL auto brightness */
+#define GPIO102_HX4700_SYNAPTICS_POWER_ON	102
+#define GPIO103_HX4700_SYNAPTICS_INT		103
+#define GPIO105_HX4700_nIR_ON			105
+#define GPIO106_HX4700_CPU_BT_nRESET		106
+#define GPIO107_HX4700_SPK_nSD			107
+#define GPIO109_HX4700_CODEC_nPDN		109
+#define GPIO110_HX4700_LCD_LVDD_3V3_ON		110
+#define GPIO111_HX4700_LCD_AVDD_3V3_ON		111
+#define GPIO112_HX4700_LCD_N2V7_7V3_ON		112
+#define GPIO114_HX4700_CF_RESET			114
+#define GPIO116_HX4700_CPU_HW_nRESET		116
+
+/*
+ * ASIC3 GPIOs
+ */
+
+#define GPIOC_BASE		(HX4700_ASIC3_GPIO_BASE + 32)
+#define GPIOD_BASE		(HX4700_ASIC3_GPIO_BASE + 48)
+
+#define GPIOC0_LED_RED		(GPIOC_BASE + 0)
+#define GPIOC1_LED_GREEN	(GPIOC_BASE + 1)
+#define GPIOC2_LED_BLUE		(GPIOC_BASE + 2)
+#define GPIOC3_nSD_CS		(GPIOC_BASE + 3)
+#define GPIOC4_CF_nCD		(GPIOC_BASE + 4)	/* Input */
+#define GPIOC5_nCIOW		(GPIOC_BASE + 5)	/* Output, to CF */
+#define GPIOC6_nCIOR		(GPIOC_BASE + 6)	/* Output, to CF */
+#define GPIOC7_nPCE1		(GPIOC_BASE + 7)	/* Input, from CPU */
+#define GPIOC8_nPCE2		(GPIOC_BASE + 8)	/* Input, from CPU */
+#define GPIOC9_nPOE		(GPIOC_BASE + 9)	/* Input, from CPU */
+#define GPIOC10_CF_nPWE		(GPIOC_BASE + 10)	/* Input */
+#define GPIOC11_PSKTSEL		(GPIOC_BASE + 11)	/* Input, from CPU */
+#define GPIOC12_nPREG		(GPIOC_BASE + 12)	/* Input, from CPU */
+#define GPIOC13_nPWAIT		(GPIOC_BASE + 13)	/* Output, to CPU */
+#define GPIOC14_nPIOIS16	(GPIOC_BASE + 14)	/* Output, to CPU */
+#define GPIOC15_nPIOR		(GPIOC_BASE + 15)	/* Input, from CPU */
+
+#define GPIOD0_CPU_SS_INT	(GPIOD_BASE + 0)	/* Input */
+#define GPIOD1_nKEY_CALENDAR	(GPIOD_BASE + 1)
+#define GPIOD2_BLUETOOTH_WAKEUP	(GPIOD_BASE + 2)
+#define GPIOD3_nKEY_HOME	(GPIOD_BASE + 3)
+#define GPIOD4_CF_nCD		(GPIOD_BASE + 4)	/* Input, from CF */
+#define GPIOD5_nPIO		(GPIOD_BASE + 5)	/* Input */
+#define GPIOD6_nKEY_RECORD	(GPIOD_BASE + 6)
+#define GPIOD7_nSDIO_DETECT	(GPIOD_BASE + 7)
+#define GPIOD8_COM_DCD		(GPIOD_BASE + 8)	/* Input */
+#define GPIOD9_nAC_IN		(GPIOD_BASE + 9)
+#define GPIOD10_nSDIO_IRQ	(GPIOD_BASE + 10)	/* Input */
+#define GPIOD11_nCIOIS16	(GPIOD_BASE + 11)	/* Input, from CF */
+#define GPIOD12_nCWAIT		(GPIOD_BASE + 12)	/* Input, from CF */
+#define GPIOD13_CF_RNB		(GPIOD_BASE + 13)	/* Input */
+#define GPIOD14_nUSBC_DETECT	(GPIOD_BASE + 14)
+#define GPIOD15_nPIOW		(GPIOD_BASE + 15)	/* Input, from CPU */
+
+/*
+ * EGPIOs
+ */
+
+#define EGPIO0_VCC_3V3_EN	(HX4700_EGPIO_BASE + 0)	/* WLAN support chip */
+#define EGPIO1_WL_VREG_EN	(HX4700_EGPIO_BASE + 1)	/* WLAN power */
+#define EGPIO2_VCC_2V1_WL_EN	(HX4700_EGPIO_BASE + 2)	/* unused */
+#define EGPIO3_SS_PWR_ON	(HX4700_EGPIO_BASE + 3)	/* smart slot power */
+#define EGPIO4_CF_3V3_ON	(HX4700_EGPIO_BASE + 4)	/* CF 3.3V enable */
+#define EGPIO5_BT_3V3_ON	(HX4700_EGPIO_BASE + 5)	/* BT 3.3V enable */
+#define EGPIO6_WL1V8_EN		(HX4700_EGPIO_BASE + 6)	/* WLAN 1.8V enable */
+#define EGPIO7_VCC_3V3_WL_EN	(HX4700_EGPIO_BASE + 7)	/* WLAN 3.3V enable */
+#define EGPIO8_USB_3V3_ON	(HX4700_EGPIO_BASE + 8)	/* unused */
+
+#endif /* _HX4700_H_ */
diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h
index ebf38bb..6a1d959 100644
--- a/arch/arm/mach-pxa/include/mach/irqs.h
+++ b/arch/arm/mach-pxa/include/mach/irqs.h
@@ -101,7 +101,9 @@
  */
 #define IRQ_BOARD_START		(PXA_GPIO_IRQ_BASE + PXA_GPIO_IRQ_NUM)
 
-#if defined(CONFIG_MACH_ZYLONITE)
+#if defined(CONFIG_MACH_H4700)
+#define IRQ_BOARD_END		(IRQ_BOARD_START + 70)
+#elif defined(CONFIG_MACH_ZYLONITE)
 #define IRQ_BOARD_END		(IRQ_BOARD_START + 32)
 #else
 #define IRQ_BOARD_END		(IRQ_BOARD_START + 16)
-- 
cgit v0.10.2


From dd045445f82a9ff38120a5cd7f6117f065285859 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Tue, 26 May 2009 22:04:38 +0200
Subject: [ARM] pxa: magician_defconfig enable hx4700, asic3 and w100fb

I'm not going to maintain separate defconfigs for magician and hx4700.
This should probably be renamed to htcpxa_defconfig.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig
index f56837f..957fd5f 100644
--- a/arch/arm/configs/magician_defconfig
+++ b/arch/arm/configs/magician_defconfig
@@ -190,6 +190,7 @@ CONFIG_ARCH_PXA=y
 # CONFIG_MACH_SAAR is not set
 # CONFIG_MACH_ARMCORE is not set
 # CONFIG_MACH_CM_X300 is not set
+CONFIG_MACH_H4700=y
 CONFIG_MACH_MAGICIAN=y
 # CONFIG_MACH_MIOA701 is not set
 # CONFIG_MACH_PCM027 is not set
@@ -828,7 +829,7 @@ CONFIG_SSB_POSSIBLE=y
 #
 # CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
-# CONFIG_MFD_ASIC3 is not set
+CONFIG_MFD_ASIC3=y
 CONFIG_HTC_EGPIO=y
 CONFIG_HTC_PASIC3=y
 # CONFIG_TPS65010 is not set
@@ -891,7 +892,7 @@ CONFIG_FB_PXA_OVERLAY=y
 # CONFIG_FB_PXA_SMARTPANEL is not set
 # CONFIG_FB_PXA_PARAMETERS is not set
 # CONFIG_FB_MBX is not set
-# CONFIG_FB_W100 is not set
+CONFIG_FB_W100=y
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
-- 
cgit v0.10.2


From ef47d5f02402b4b1183d64edde5e89178794bb9d Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 28 May 2009 07:07:47 +0200
Subject: MAINTAINERS: add a maintainer for iPAQ hx4700

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abdd..b1c449f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -680,6 +680,13 @@ M:	sakoman@gmail.com
 L:	linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only)
 S:	Maintained
 
+ARM/H4700 (HP IPAQ HX4700) MACHINE SUPPORT
+P:	Philipp Zabel
+M:	philipp.zabel@gmail.com
+S:	Maintained
+F:	arch/arm/mach-pxa/hx4700.c
+F:	arch/arm/mach-pxa/include/mach/hx4700.h
+
 ARM/HP JORNADA 7XX MACHINE SUPPORT
 P:	Kristoffer Ericson
 M:	kristoffer.ericson@gmail.com
-- 
cgit v0.10.2


From b86017150087ac62c834ac64b87b9565850eb320 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Wed, 22 Apr 2009 00:43:21 +0400
Subject: [ARM] pxa/csb726: switch to use smsc911x driver

csb726 used obsolete and not working out-of-tree driver smc911x.
Switch it to use new smsc911x driver.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index f95b32d..657d7a4 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -16,6 +16,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
 #include <linux/sm501.h>
+#include <linux/smsc911x.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -275,15 +276,26 @@ static struct resource csb726_lan_resources[] = {
 	{
 		.start	= CSB726_IRQ_LAN,
 		.end	= CSB726_IRQ_LAN,
-		.flags	= IORESOURCE_IRQ,
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
 	},
 };
 
+struct smsc911x_platform_config csb726_lan_config = {
+	.irq_type	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
+	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
+	.flags		= SMSC911X_USE_32BIT,
+	.phy_interface	= PHY_INTERFACE_MODE_MII,
+};
+
+
 static struct platform_device csb726_lan = {
-	.name		= "smc911x",
+	.name		= "smsc911x",
 	.id		= -1,
 	.num_resources	= ARRAY_SIZE(csb726_lan_resources),
 	.resource	= csb726_lan_resources,
+	.dev		= {
+		.platform_data	= &csb726_lan_config,
+	},
 };
 
 static struct platform_device *devices[] __initdata = {
-- 
cgit v0.10.2


From 96a9fcaf5d9a04f3ca8855d7b8d034089ccc8533 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Wed, 22 Apr 2009 00:43:22 +0400
Subject: [ARM] pxa/csb726: register ac97 controller

Add a call to pxa_set_ac97_info() to enable the audio support.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index 657d7a4..7d3e1b4 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -26,6 +26,7 @@
 #include <mach/mmc.h>
 #include <mach/ohci.h>
 #include <mach/pxa2xx-regs.h>
+#include <mach/audio.h>
 
 #include "generic.h"
 #include "devices.h"
@@ -315,6 +316,7 @@ static void __init csb726_init(void)
 	pxa27x_set_i2c_power_info(NULL);
 	pxa_set_mci_info(&csb726_mci);
 	pxa_set_ohci_info(&csb726_ohci_platform_data);
+	pxa_set_ac97_info(NULL);
 
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
-- 
cgit v0.10.2


From 8768dc9b75efa43e612f6f520cf76a89bbab69d3 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 23 Apr 2009 11:12:37 +0200
Subject: [ARM] pxa/palm: Switch PalmT5, TX, TE2 to GPIO VBUS

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 05bf979..76a6ea2 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -26,6 +26,7 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx_batt.h>
 #include <linux/power_supply.h>
+#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -343,11 +344,18 @@ static struct pxaficp_platform_data palmt5_ficp_platform_data = {
 /******************************************************************************
  * UDC
  ******************************************************************************/
-static struct pxa2xx_udc_mach_info palmt5_udc_info __initdata = {
+static struct gpio_vbus_mach_info palmt5_udc_info = {
 	.gpio_vbus		= GPIO_NR_PALMT5_USB_DETECT_N,
 	.gpio_vbus_inverted	= 1,
 	.gpio_pullup		= GPIO_NR_PALMT5_USB_PULLUP,
-	.gpio_pullup_inverted	= 0,
+};
+
+static struct platform_device palmt5_gpio_vbus = {
+	.name	= "gpio-vbus",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &palmt5_udc_info,
+	},
 };
 
 /******************************************************************************
@@ -500,6 +508,7 @@ static struct platform_device *devices[] __initdata = {
 	&palmt5_backlight,
 	&power_supply,
 	&palmt5_asoc,
+	&palmt5_gpio_vbus,
 };
 
 /* setup udc GPIOs initial state */
@@ -519,7 +528,6 @@ static void __init palmt5_init(void)
 	pxa_set_mci_info(&palmt5_mci_platform_data);
 	palmt5_udc_init();
 	pxa_set_ac97_info(&palmt5_ac97_pdata);
-	pxa_set_udc_info(&palmt5_udc_info);
 	pxa_set_ficp_info(&palmt5_ficp_platform_data);
 	pxa_set_keypad_info(&palmt5_keypad_platform_data);
 	wm97xx_bat_set_pdata(&wm97xx_batt_pdata);
diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c
index 43fcf2e..b09a9ef 100644
--- a/arch/arm/mach-pxa/palmte2.c
+++ b/arch/arm/mach-pxa/palmte2.c
@@ -25,6 +25,7 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx_batt.h>
 #include <linux/power_supply.h>
+#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -318,11 +319,18 @@ static struct pxaficp_platform_data palmte2_ficp_platform_data = {
 /******************************************************************************
  * UDC
  ******************************************************************************/
-static struct pxa2xx_udc_mach_info palmte2_udc_info __initdata = {
+static struct gpio_vbus_mach_info palmte2_udc_info = {
 	.gpio_vbus		= GPIO_NR_PALMTE2_USB_DETECT_N,
 	.gpio_vbus_inverted	= 1,
 	.gpio_pullup		= GPIO_NR_PALMTE2_USB_PULLUP,
-	.gpio_pullup_inverted	= 0,
+};
+
+static struct platform_device palmte2_gpio_vbus = {
+	.name	= "gpio-vbus",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &palmte2_udc_info,
+	},
 };
 
 /******************************************************************************
@@ -429,6 +437,7 @@ static struct platform_device *devices[] __initdata = {
 #endif
 	&palmte2_backlight,
 	&power_supply,
+	&palmte2_gpio_vbus,
 };
 
 /* setup udc GPIOs initial state */
@@ -447,7 +456,6 @@ static void __init palmte2_init(void)
 	set_pxa_fb_info(&palmte2_lcd_screen);
 	pxa_set_mci_info(&palmte2_mci_platform_data);
 	palmte2_udc_init();
-	pxa_set_udc_info(&palmte2_udc_info);
 	pxa_set_ac97_info(NULL);
 	pxa_set_ficp_info(&palmte2_ficp_platform_data);
 	wm97xx_bat_set_pdata(&wm97xx_batt_pdata);
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index e99a893..3ed5ae0 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -27,6 +27,7 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx_batt.h>
 #include <linux/power_supply.h>
+#include <linux/usb/gpio_vbus.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -359,11 +360,18 @@ static struct pxaficp_platform_data palmtx_ficp_platform_data = {
 /******************************************************************************
  * UDC
  ******************************************************************************/
-static struct pxa2xx_udc_mach_info palmtx_udc_info __initdata = {
+static struct gpio_vbus_mach_info palmtx_udc_info = {
 	.gpio_vbus		= GPIO_NR_PALMTX_USB_DETECT_N,
 	.gpio_vbus_inverted	= 1,
 	.gpio_pullup		= GPIO_NR_PALMTX_USB_PULLUP,
-	.gpio_pullup_inverted	= 0,
+};
+
+static struct platform_device palmtx_gpio_vbus = {
+	.name	= "gpio-vbus",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &palmtx_udc_info,
+	},
 };
 
 /******************************************************************************
@@ -517,6 +525,7 @@ static struct platform_device *devices[] __initdata = {
 	&palmtx_backlight,
 	&power_supply,
 	&palmtx_asoc,
+	&palmtx_gpio_vbus,
 };
 
 static struct map_desc palmtx_io_desc[] __initdata = {
@@ -552,7 +561,6 @@ static void __init palmtx_init(void)
 	pxa_set_mci_info(&palmtx_mci_platform_data);
 	palmtx_udc_init();
 	pxa_set_ac97_info(&palmtx_ac97_pdata);
-	pxa_set_udc_info(&palmtx_udc_info);
 	pxa_set_ficp_info(&palmtx_ficp_platform_data);
 	pxa_set_keypad_info(&palmtx_keypad_platform_data);
 	wm97xx_bat_set_pdata(&wm97xx_batt_pdata);
-- 
cgit v0.10.2


From 37330efd4abb474b3fdfacea68beb37cf67564ed Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 23 Apr 2009 11:27:11 +0200
Subject: [ARM] pxa/palm: Add Palm27x aSoC driver to PalmTE2

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c
index b09a9ef..d823b09 100644
--- a/arch/arm/mach-pxa/palmte2.c
+++ b/arch/arm/mach-pxa/palmte2.c
@@ -38,6 +38,7 @@
 #include <mach/mfp-pxa25x.h>
 #include <mach/irda.h>
 #include <mach/udc.h>
+#include <mach/palmasoc.h>
 
 #include "generic.h"
 #include "devices.h"
@@ -108,6 +109,7 @@ static unsigned long palmte2_pin_config[] __initdata = {
 	GPIO1_RST,	/* reset */
 	GPIO4_GPIO,	/* Hotsync button */
 	GPIO9_GPIO,	/* power detect */
+	GPIO15_GPIO,	/* earphone detect */
 	GPIO37_GPIO,	/* LCD power */
 	GPIO56_GPIO,	/* Backlight power */
 };
@@ -403,6 +405,21 @@ static struct wm97xx_batt_info wm97xx_batt_pdata = {
 };
 
 /******************************************************************************
+ * aSoC audio
+ ******************************************************************************/
+static struct palm27x_asoc_info palmte2_asoc_pdata = {
+	.jack_gpio	= GPIO_NR_PALMTE2_EARPHONE_DETECT,
+};
+
+static struct platform_device palmte2_asoc = {
+	.name = "palm27x-asoc",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &palmte2_asoc_pdata,
+	},
+};
+
+/******************************************************************************
  * Framebuffer
  ******************************************************************************/
 static struct pxafb_mode_info palmte2_lcd_modes[] = {
@@ -437,6 +454,7 @@ static struct platform_device *devices[] __initdata = {
 #endif
 	&palmte2_backlight,
 	&power_supply,
+	&palmte2_asoc,
 	&palmte2_gpio_vbus,
 };
 
diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index ad8a10f..96b2699 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -98,13 +98,14 @@ config SND_PXA2XX_SOC_EM_X270
 	  CompuLab EM-x270.
 
 config SND_PXA2XX_SOC_PALM27X
-	bool "SoC Audio support for Palm T|X, T5 and LifeDrive"
-	depends on SND_PXA2XX_SOC && (MACH_PALMLD || MACH_PALMTX || MACH_PALMT5)
+	bool "SoC Audio support for Palm T|X, T5, E2 and LifeDrive"
+	depends on SND_PXA2XX_SOC && (MACH_PALMLD || MACH_PALMTX || \
+			MACH_PALMT5 || MACH_PALMTE2)
 	select SND_PXA2XX_SOC_AC97
 	select SND_SOC_WM9712
 	help
 	  Say Y if you want to add support for SoC audio on
-	  Palm T|X, T5 or LifeDrive handheld computer.
+	  Palm T|X, T5, E2 or LifeDrive handheld computer.
 
 config SND_SOC_ZYLONITE
 	tristate "SoC Audio support for Marvell Zylonite"
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index 44fcc4e..e6102fd 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -205,7 +205,7 @@ static int palm27x_asoc_probe(struct platform_device *pdev)
 	int ret;
 
 	if (!(machine_is_palmtx() || machine_is_palmt5() ||
-		machine_is_palmld()))
+		machine_is_palmld() || machine_is_palmte2()))
 		return -ENODEV;
 
 	if (pdev->dev.platform_data)
-- 
cgit v0.10.2


From fc9c1b6fc874d786f32163291b059ec574698aa2 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 14 May 2009 11:27:00 +0200
Subject: [ARM] pxa/palm: Palm TX, T5, LD suspend-to-mem rework

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 471a853..1f1917f 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -524,30 +524,18 @@ static struct pxafb_mach_info palmld_lcd_screen = {
 /******************************************************************************
  * Power management - standby
  ******************************************************************************/
-#ifdef CONFIG_PM
-static u32 *addr __initdata;
-static u32 resume[3] __initdata = {
-	0xe3a00101,	/* mov	r0,	#0x40000000 */
-	0xe380060f,	/* orr	r0, r0, #0x00f00000 */
-	0xe590f008,	/* ldr	pc, [r0, #0x08] */
-};
-
-static int __init palmld_pm_init(void)
+static void __init palmld_pm_init(void)
 {
-	int i;
-
-	/* this is where the bootloader jumps */
-	addr = phys_to_virt(PALMLD_STR_BASE);
-
-	for (i = 0; i < 3; i++)
-		addr[i] = resume[i];
-
-	return 0;
+	static u32 resume[] = {
+		0xe3a00101,	/* mov	r0,	#0x40000000 */
+		0xe380060f,	/* orr	r0, r0, #0x00f00000 */
+		0xe590f008,	/* ldr	pc, [r0, #0x08] */
+	};
+
+	/* copy the bootloader */
+	memcpy(phys_to_virt(PALMLD_STR_BASE), resume, sizeof(resume));
 }
 
-device_initcall(palmld_pm_init);
-#endif
-
 /******************************************************************************
  * Machine init
  ******************************************************************************/
@@ -586,6 +574,7 @@ static void __init palmld_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(palmld_pin_config));
 
+	palmld_pm_init();
 	set_pxa_fb_info(&palmld_lcd_screen);
 	pxa_set_mci_info(&palmld_mci_platform_data);
 	pxa_set_ac97_info(&palmld_ac97_pdata);
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 76a6ea2..aae64a1 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -474,30 +474,18 @@ static struct pxafb_mach_info palmt5_lcd_screen = {
 /******************************************************************************
  * Power management - standby
  ******************************************************************************/
-#ifdef CONFIG_PM
-static u32 *addr __initdata;
-static u32 resume[3] __initdata = {
-	0xe3a00101,	/* mov	r0,	#0x40000000 */
-	0xe380060f,	/* orr	r0, r0, #0x00f00000 */
-	0xe590f008,	/* ldr	pc, [r0, #0x08] */
-};
-
-static int __init palmt5_pm_init(void)
+static void __init palmt5_pm_init(void)
 {
-	int i;
-
-	/* this is where the bootloader jumps */
-	addr = phys_to_virt(PALMT5_STR_BASE);
-
-	for (i = 0; i < 3; i++)
-		addr[i] = resume[i];
-
-	return 0;
+	static u32 resume[] = {
+		0xe3a00101,	/* mov	r0,	#0x40000000 */
+		0xe380060f,	/* orr	r0, r0, #0x00f00000 */
+		0xe590f008,	/* ldr	pc, [r0, #0x08] */
+	};
+
+	/* copy the bootloader */
+	memcpy(phys_to_virt(PALMT5_STR_BASE), resume, sizeof(resume));
 }
 
-device_initcall(palmt5_pm_init);
-#endif
-
 /******************************************************************************
  * Machine init
  ******************************************************************************/
@@ -524,6 +512,7 @@ static void __init palmt5_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(palmt5_pin_config));
 
+	palmt5_pm_init();
 	set_pxa_fb_info(&palmt5_lcd_screen);
 	pxa_set_mci_info(&palmt5_mci_platform_data);
 	palmt5_udc_init();
@@ -531,6 +520,7 @@ static void __init palmt5_init(void)
 	pxa_set_ficp_info(&palmt5_ficp_platform_data);
 	pxa_set_keypad_info(&palmt5_keypad_platform_data);
 	wm97xx_bat_set_pdata(&wm97xx_batt_pdata);
+
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 3ed5ae0..6c15d84 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -491,30 +491,18 @@ static struct pxafb_mach_info palmtx_lcd_screen = {
 /******************************************************************************
  * Power management - standby
  ******************************************************************************/
-#ifdef CONFIG_PM
-static u32 *addr __initdata;
-static u32 resume[3] __initdata = {
-	0xe3a00101,	/* mov	r0,	#0x40000000 */
-	0xe380060f,	/* orr	r0, r0, #0x00f00000 */
-	0xe590f008,	/* ldr	pc, [r0, #0x08] */
-};
-
-static int __init palmtx_pm_init(void)
+static void __init palmtx_pm_init(void)
 {
-	int i;
-
-	/* this is where the bootloader jumps */
-	addr = phys_to_virt(PALMTX_STR_BASE);
-
-	for (i = 0; i < 3; i++)
-		addr[i] = resume[i];
-
-	return 0;
+	static u32 resume[] = {
+		0xe3a00101,	/* mov	r0,	#0x40000000 */
+		0xe380060f,	/* orr	r0, r0, #0x00f00000 */
+		0xe590f008,	/* ldr	pc, [r0, #0x08] */
+	};
+
+	/* copy the bootloader */
+	memcpy(phys_to_virt(PALMTX_STR_BASE), resume, sizeof(resume));
 }
 
-device_initcall(palmtx_pm_init);
-#endif
-
 /******************************************************************************
  * Machine init
  ******************************************************************************/
@@ -557,6 +545,7 @@ static void __init palmtx_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(palmtx_pin_config));
 
+	palmtx_pm_init();
 	set_pxa_fb_info(&palmtx_lcd_screen);
 	pxa_set_mci_info(&palmtx_mci_platform_data);
 	palmtx_udc_init();
-- 
cgit v0.10.2


From 4036e1dea565207010408f5c6137a9d8d3c0ff5c Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Sat, 16 May 2009 18:20:39 +0000
Subject: [ARM] pxa: Stargate 2 board support

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index a911bc9..2e45630 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -51,6 +51,12 @@ config MACH_INTELMOTE2
 	select IWMMXT
 	select PXA_HAVE_BOARD_IRQS
 
+config MACH_STARGATE2
+	bool "Intel Stargate 2 Platform"
+	select PXA27x
+	select IWMMXT
+	select PXA_HAVE_BOARD_IRQS
+
 config ARCH_LUBBOCK
 	bool "Intel DBPXA250 Development Platform"
 	select PXA25x
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index e950c40..d18ffef 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_MACH_CM_X300)      += cm-x300.o
 obj-$(CONFIG_PXA_EZX)           += ezx.o
 
 obj-$(CONFIG_MACH_INTELMOTE2)   += imote2.o
+obj-$(CONFIG_MACH_STARGATE2)	+= stargate2.o
 obj-$(CONFIG_MACH_CSB726)	+= csb726.o
 obj-$(CONFIG_CSB726_CSB701)	+= csb701.o
 
diff --git a/arch/arm/mach-pxa/include/mach/uncompress.h b/arch/arm/mach-pxa/include/mach/uncompress.h
index 5706cea..c6cfd7c 100644
--- a/arch/arm/mach-pxa/include/mach/uncompress.h
+++ b/arch/arm/mach-pxa/include/mach/uncompress.h
@@ -36,7 +36,7 @@ static inline void flush(void)
 static inline void arch_decomp_setup(void)
 {
 	if (machine_is_littleton() || machine_is_intelmote2()
-			|| machine_is_csb726())
+	    || machine_is_csb726() || machine_is_stargate2())
 		UART = STUART;
 }
 
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
new file mode 100644
index 0000000..3b205b6
--- /dev/null
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -0,0 +1,796 @@
+/*
+ *  linux/arch/arm/mach-pxa/stargate2.c
+ *
+ *  Author:	Ed C. Epp
+ *  Created:	Nov 05, 2002
+ *  Copyright:	Intel Corp.
+ *
+ *  Modified 2009:  Jonathan Cameron <jic23@cam.ac.uk>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/machine.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/mtd/partitions.h>
+
+#include <linux/i2c/pcf857x.h>
+#include <linux/i2c/at24.h>
+#include <linux/smc91x.h>
+#include <linux/gpio.h>
+
+#include <asm/types.h>
+#include <asm/setup.h>
+#include <asm/memory.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/flash.h>
+
+#include <mach/pxa27x.h>
+#include <plat/i2c.h>
+#include <mach/mmc.h>
+#include <mach/udc.h>
+#include <mach/pxa2xx_spi.h>
+#include <mach/pxa27x-udc.h>
+
+#include <linux/spi/spi.h>
+#include <linux/mfd/da903x.h>
+#include <linux/sht15.h>
+
+#include "devices.h"
+#include "generic.h"
+
+/* Bluetooth */
+#define SG2_BT_RESET		81
+
+/* SD */
+#define SG2_GPIO_nSD_DETECT	90
+#define SG2_SD_POWER_ENABLE	89
+
+static unsigned long stargate2_pin_config[] __initdata = {
+
+	GPIO15_nCS_1, /* SRAM */
+	/* SMC91x */
+	GPIO80_nCS_4,
+	GPIO40_GPIO, /*cable detect?*/
+	/* Device Identification for wakeup*/
+	GPIO102_GPIO,
+
+	/* Button */
+	GPIO91_GPIO | WAKEUP_ON_LEVEL_HIGH,
+
+	/* DA9030 */
+	GPIO1_GPIO,
+
+	/* Compact Flash */
+	GPIO79_PSKTSEL,
+	GPIO48_nPOE,
+	GPIO49_nPWE,
+	GPIO50_nPIOR,
+	GPIO51_nPIOW,
+	GPIO85_nPCE_1,
+	GPIO54_nPCE_2,
+	GPIO55_nPREG,
+	GPIO56_nPWAIT,
+	GPIO57_nIOIS16,
+	GPIO120_GPIO, /* Buff ctrl */
+	GPIO108_GPIO, /* Power ctrl */
+	GPIO82_GPIO, /* Reset */
+	GPIO53_GPIO, /* SG2_S0_GPIO_DETECT */
+
+	/* MMC */
+	GPIO32_MMC_CLK,
+	GPIO112_MMC_CMD,
+	GPIO92_MMC_DAT_0,
+	GPIO109_MMC_DAT_1,
+	GPIO110_MMC_DAT_2,
+	GPIO111_MMC_DAT_3,
+	GPIO90_GPIO, /* nSD detect */
+	GPIO89_GPIO, /* SD_POWER_ENABLE */
+
+	/* Bluetooth */
+	GPIO81_GPIO, /* reset */
+
+	/* cc2420 802.15.4 radio */
+	GPIO22_GPIO,		/* CC_RSTN  (out)*/
+	GPIO114_GPIO,		/* CC_FIFO (in) */
+	GPIO116_GPIO, 		/* CC_CCA (in) */
+	GPIO0_GPIO,		/* CC_FIFOP (in) */
+	GPIO16_GPIO,		/* CCSFD (in) */
+	GPIO39_GPIO,		/* CSn (out) */
+
+	/* I2C */
+	GPIO117_I2C_SCL,
+	GPIO118_I2C_SDA,
+
+	/* SSP 3 - 802.15.4 radio */
+	GPIO39_GPIO, /* chip select */
+	GPIO34_SSP3_SCLK,
+	GPIO35_SSP3_TXD,
+	GPIO41_SSP3_RXD,
+
+	/* SSP 2 */
+	GPIO11_SSP2_RXD,
+	GPIO38_SSP2_TXD,
+	GPIO36_SSP2_SCLK,
+	GPIO37_GPIO, /* chip select */
+
+	/* SSP 1 */
+	GPIO26_SSP1_RXD,
+	GPIO25_SSP1_TXD,
+	GPIO23_SSP1_SCLK,
+	GPIO24_GPIO, /* chip select */
+
+	/* BTUART */
+	GPIO42_BTUART_RXD,
+	GPIO43_BTUART_TXD,
+	GPIO44_BTUART_CTS,
+	GPIO45_BTUART_RTS,
+
+	/* STUART */
+	GPIO46_STUART_RXD,
+	GPIO47_STUART_TXD,
+
+	/* Basic sensor board */
+	GPIO96_GPIO,	/* accelerometer interrupt */
+	GPIO99_GPIO,	/* ADC interrupt */
+
+	/* Connector pins specified as gpios */
+	GPIO94_GPIO, /* large basic connector pin 14 */
+	GPIO10_GPIO, /* large basic connector pin 23 */
+
+	/* SHT15 */
+	GPIO100_GPIO,
+	GPIO98_GPIO,
+};
+
+/**
+ * stargate2_reset_bluetooth() reset the bluecore to ensure consistent state
+ **/
+static int stargate2_reset_bluetooth(void)
+{
+	int err;
+	err = gpio_request(SG2_BT_RESET, "SG2_BT_RESET");
+	if (err) {
+		printk(KERN_ERR "Could not get gpio for bluetooth reset \n");
+		return err;
+	}
+	gpio_direction_output(SG2_BT_RESET, 1);
+	mdelay(5);
+	/* now reset it - 5 msec minimum */
+	gpio_set_value(SG2_BT_RESET, 0);
+	mdelay(10);
+	gpio_set_value(SG2_BT_RESET, 1);
+	gpio_free(SG2_BT_RESET);
+	return 0;
+}
+
+static struct led_info stargate2_leds[] = {
+	{
+		.name = "sg2:red",
+		.flags = DA9030_LED_RATE_ON,
+	}, {
+		.name = "sg2:blue",
+		.flags = DA9030_LED_RATE_ON,
+	}, {
+		.name = "sg2:green",
+		.flags = DA9030_LED_RATE_ON,
+	},
+};
+
+static struct sht15_platform_data platform_data_sht15 = {
+	.gpio_data =  100,
+	.gpio_sck  =  98,
+};
+
+static struct platform_device sht15 = {
+	.name = "sht15",
+	.id = -1,
+	.dev = {
+		.platform_data = &platform_data_sht15,
+	},
+};
+
+static struct regulator_consumer_supply stargate2_sensor_3_con[] = {
+	{
+		.dev = &sht15.dev,
+		.supply = "vcc",
+	},
+};
+
+enum stargate2_ldos{
+	vcc_vref,
+	vcc_cc2420,
+	/* a mote connector? */
+	vcc_mica,
+	/* the CSR bluecore chip */
+	vcc_bt,
+	/* The two voltages available to sensor boards */
+	vcc_sensor_1_8,
+	vcc_sensor_3,
+	/* directly connected to the pxa27x */
+	vcc_sram_ext,
+	vcc_pxa_pll,
+	vcc_pxa_usim, /* Reference voltage for certain gpios */
+	vcc_pxa_mem,
+	vcc_pxa_flash,
+	vcc_pxa_core, /*Dc-Dc buck not yet supported */
+	vcc_lcd,
+	vcc_bb,
+	vcc_bbio, /*not sure!*/
+	vcc_io, /* cc2420 802.15.4 radio and pxa vcc_io ?*/
+};
+
+/* The values of the various regulator constraints are obviously dependent
+ * on exactly what is wired to each ldo.  Unfortunately this information is
+ * not generally available.  More information has been requested from Xbow.
+ */
+static struct regulator_init_data stargate2_ldo_init_data[] = {
+	[vcc_bbio] = {
+		.constraints = { /* board default 1.8V */
+			.name = "vcc_bbio",
+			.min_uV = 1800000,
+			.max_uV = 1800000,
+		},
+	},
+	[vcc_bb] = {
+		.constraints = { /* board default 2.8V */
+			.name = "vcc_bb",
+			.min_uV = 2700000,
+			.max_uV = 3000000,
+		},
+	},
+	[vcc_pxa_flash] = {
+		.constraints = {/* default is 1.8V */
+			.name = "vcc_pxa_flash",
+			.min_uV = 1800000,
+			.max_uV = 1800000,
+		},
+	},
+	[vcc_cc2420] = { /* also vcc_io */
+		.constraints = {
+			/* board default is 2.8V */
+			.name = "vcc_cc2420",
+			.min_uV = 2700000,
+			.max_uV = 3300000,
+		},
+	},
+	[vcc_vref] = { /* Reference for what? */
+		.constraints = { /* default 1.8V */
+			.name = "vcc_vref",
+			.min_uV = 1800000,
+			.max_uV = 1800000,
+		},
+	},
+	[vcc_sram_ext] = {
+		.constraints = { /* default 2.8V */
+			.name = "vcc_sram_ext",
+			.min_uV = 2800000,
+			.max_uV = 2800000,
+		},
+	},
+	[vcc_mica] = {
+		.constraints = { /* default 2.8V */
+			.name = "vcc_mica",
+			.min_uV = 2800000,
+			.max_uV = 2800000,
+		},
+	},
+	[vcc_bt] = {
+		.constraints = { /* default 2.8V */
+			.name = "vcc_bt",
+			.min_uV = 2800000,
+			.max_uV = 2800000,
+		},
+	},
+	[vcc_lcd] = {
+		.constraints = { /* default 2.8V */
+			.name = "vcc_lcd",
+			.min_uV = 2700000,
+			.max_uV = 3300000,
+		},
+	},
+	[vcc_io] = { /* Same or higher than everything
+			  * bar vccbat and vccusb */
+		.constraints = { /* default 2.8V */
+			.name = "vcc_io",
+			.min_uV = 2692000,
+			.max_uV = 3300000,
+		},
+	},
+	[vcc_sensor_1_8] = {
+		.constraints = { /* default 1.8V */
+			.name = "vcc_sensor_1_8",
+			.min_uV = 1800000,
+			.max_uV = 1800000,
+		},
+	},
+	[vcc_sensor_3] = { /* curiously default 2.8V */
+		.constraints = {
+			.name = "vcc_sensor_3",
+			.min_uV = 2800000,
+			.max_uV = 3000000,
+		},
+		.num_consumer_supplies = ARRAY_SIZE(stargate2_sensor_3_con),
+		.consumer_supplies = stargate2_sensor_3_con,
+	},
+	[vcc_pxa_pll] = { /* 1.17V - 1.43V, default 1.3V*/
+		.constraints = {
+			.name = "vcc_pxa_pll",
+			.min_uV = 1170000,
+			.max_uV = 1430000,
+		},
+	},
+	[vcc_pxa_usim] = {
+		.constraints = { /* default 1.8V */
+			.name = "vcc_pxa_usim",
+			.min_uV = 1710000,
+			.max_uV = 2160000,
+		},
+	},
+	[vcc_pxa_mem] = {
+		.constraints = { /* default 1.8V */
+			.name = "vcc_pxa_mem",
+			.min_uV = 1800000,
+			.max_uV = 1800000,
+		},
+	},
+};
+
+static struct da903x_subdev_info stargate2_da9030_subdevs[] = {
+	{
+		.name = "da903x-led",
+		.id = DA9030_ID_LED_2,
+		.platform_data = &stargate2_leds[0],
+	}, {
+		.name = "da903x-led",
+		.id = DA9030_ID_LED_3,
+		.platform_data = &stargate2_leds[2],
+	}, {
+		.name = "da903x-led",
+		.id = DA9030_ID_LED_4,
+		.platform_data = &stargate2_leds[1],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO2,
+		.platform_data = &stargate2_ldo_init_data[vcc_bbio],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO3,
+		.platform_data = &stargate2_ldo_init_data[vcc_bb],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO4,
+		.platform_data = &stargate2_ldo_init_data[vcc_pxa_flash],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO5,
+		.platform_data = &stargate2_ldo_init_data[vcc_cc2420],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO6,
+		.platform_data = &stargate2_ldo_init_data[vcc_vref],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO7,
+		.platform_data = &stargate2_ldo_init_data[vcc_sram_ext],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO8,
+		.platform_data = &stargate2_ldo_init_data[vcc_mica],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO9,
+		.platform_data = &stargate2_ldo_init_data[vcc_bt],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO10,
+		.platform_data = &stargate2_ldo_init_data[vcc_sensor_1_8],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO11,
+		.platform_data = &stargate2_ldo_init_data[vcc_sensor_3],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO12,
+		.platform_data = &stargate2_ldo_init_data[vcc_lcd],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO15,
+		.platform_data = &stargate2_ldo_init_data[vcc_pxa_pll],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO17,
+		.platform_data = &stargate2_ldo_init_data[vcc_pxa_usim],
+	}, {
+		.name = "da903x-regulator", /*pxa vcc i/o and cc2420 vcc i/o */
+		.id = DA9030_ID_LDO18,
+		.platform_data = &stargate2_ldo_init_data[vcc_io],
+	}, {
+		.name = "da903x-regulator",
+		.id = DA9030_ID_LDO19,
+		.platform_data = &stargate2_ldo_init_data[vcc_pxa_mem],
+	},
+};
+
+static struct da903x_platform_data stargate2_da9030_pdata = {
+	.num_subdevs = ARRAY_SIZE(stargate2_da9030_subdevs),
+	.subdevs = stargate2_da9030_subdevs,
+};
+
+static struct resource smc91x_resources[] = {
+	[0] = {
+		.name = "smc91x-regs",
+		.start = (PXA_CS4_PHYS + 0x300),
+		.end = (PXA_CS4_PHYS + 0xfffff),
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_GPIO(40),
+		.end = IRQ_GPIO(40),
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE,
+	}
+};
+
+static struct smc91x_platdata stargate2_smc91x_info = {
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT
+	| SMC91X_NOWAIT | SMC91X_USE_DMA,
+};
+
+static struct platform_device smc91x_device = {
+	.name = "smc91x",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(smc91x_resources),
+	.resource = smc91x_resources,
+	.dev = {
+		.platform_data = &stargate2_smc91x_info,
+	},
+};
+
+
+
+static struct pxamci_platform_data stargate2_mci_platform_data;
+
+/*
+ * The card detect interrupt isn't debounced so we delay it by 250ms
+ * to give the card a chance to fully insert / eject.
+ */
+static int stargate2_mci_init(struct device *dev,
+			      irq_handler_t stargate2_detect_int,
+			      void *data)
+{
+	int err;
+
+	err = gpio_request(SG2_SD_POWER_ENABLE, "SG2_sd_power_enable");
+	if (err) {
+		printk(KERN_ERR "Can't get the gpio for SD power control");
+		goto return_err;
+	}
+	gpio_direction_output(SG2_SD_POWER_ENABLE, 0);
+
+	err = gpio_request(SG2_GPIO_nSD_DETECT, "SG2_sd_detect");
+	if (err) {
+		printk(KERN_ERR "Can't get the sd detect gpio");
+		goto free_power_en;
+	}
+	gpio_direction_input(SG2_GPIO_nSD_DETECT);
+	/* Delay to allow for full insertion */
+	stargate2_mci_platform_data.detect_delay = msecs_to_jiffies(250);
+
+	err = request_irq(IRQ_GPIO(SG2_GPIO_nSD_DETECT),
+			  stargate2_detect_int,
+			  IRQ_TYPE_EDGE_BOTH,
+			  "MMC card detect",
+			  data);
+	if (err) {
+		printk(KERN_ERR "can't request MMC card detect IRQ\n");
+		goto free_nsd_detect;
+	}
+	return 0;
+
+ free_nsd_detect:
+	gpio_free(SG2_GPIO_nSD_DETECT);
+ free_power_en:
+	gpio_free(SG2_SD_POWER_ENABLE);
+ return_err:
+	return err;
+}
+
+/**
+ * stargate2_mci_setpower() - set state of mmc power supply
+ *
+ * Very simple control. Either it is on or off and is controlled by
+ * a gpio pin */
+static void stargate2_mci_setpower(struct device *dev, unsigned int vdd)
+{
+	gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd);
+}
+
+static void stargate2_mci_exit(struct device *dev, void *data)
+{
+	free_irq(IRQ_GPIO(SG2_GPIO_nSD_DETECT), data);
+	gpio_free(SG2_SD_POWER_ENABLE);
+	gpio_free(SG2_GPIO_nSD_DETECT);
+}
+
+static struct pxamci_platform_data stargate2_mci_platform_data = {
+	.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
+	.init = stargate2_mci_init,
+	.setpower = stargate2_mci_setpower,
+	.exit = stargate2_mci_exit,
+};
+
+static struct mtd_partition stargate2flash_partitions[] = {
+	{
+		.name = "Bootloader",
+		.size = 0x00040000,
+		.offset = 0,
+		.mask_flags = 0,
+	}, {
+		.name = "Kernel",
+		.size = 0x00200000,
+		.offset = 0x00040000,
+		.mask_flags = 0
+	}, {
+		.name = "Filesystem",
+		.size = 0x01DC0000,
+		.offset = 0x00240000,
+		.mask_flags = 0
+	},
+};
+
+static struct resource flash_resources = {
+	.start = PXA_CS0_PHYS,
+	.end = PXA_CS0_PHYS + SZ_32M - 1,
+	.flags = IORESOURCE_MEM,
+};
+
+static struct flash_platform_data stargate2_flash_data = {
+	.map_name = "cfi_probe",
+	.parts = stargate2flash_partitions,
+	.nr_parts = ARRAY_SIZE(stargate2flash_partitions),
+	.name = "PXA27xOnChipROM",
+	.width = 2,
+};
+
+static struct platform_device stargate2_flash_device = {
+	.name = "pxa2xx-flash",
+	.id = 0,
+	.dev = {
+		.platform_data = &stargate2_flash_data,
+	},
+	.resource = &flash_resources,
+	.num_resources = 1,
+};
+
+/*
+ * SRAM - The Stargate 2 has 32MB of SRAM.
+ *
+ * Here it is made available as an MTD. This will then
+ * typically have a cifs filesystem created on it to provide
+ * fast temporary storage.
+ */
+static struct resource sram_resources = {
+	.start = PXA_CS1_PHYS,
+	.end = PXA_CS1_PHYS + SZ_32M-1,
+	.flags = IORESOURCE_MEM,
+};
+
+static struct platdata_mtd_ram stargate2_sram_pdata = {
+	.mapname = "Stargate2 SRAM",
+	.bankwidth = 2,
+};
+
+static struct platform_device stargate2_sram = {
+	.name = "mtd-ram",
+	.id = 0,
+	.resource = &sram_resources,
+	.num_resources = 1,
+	.dev = {
+		.platform_data = &stargate2_sram_pdata,
+	},
+};
+
+static struct pcf857x_platform_data platform_data_pcf857x = {
+	.gpio_base = 128,
+	.n_latch = 0,
+	.setup = NULL,
+	.teardown = NULL,
+	.context = NULL,
+};
+
+static struct at24_platform_data pca9500_eeprom_pdata = {
+	.byte_len = 256,
+	.page_size = 4,
+};
+
+
+static struct i2c_board_info __initdata stargate2_i2c_board_info[] = {
+	/* Techically this a pca9500 - but it's compatible with the 8574
+	 * for gpio expansion and the 24c02 for eeprom access.
+	 */
+	{
+		.type = "pcf8574",
+		.addr =  0x27,
+		.platform_data = &platform_data_pcf857x,
+	}, {
+		.type = "24c02",
+		.addr = 0x57,
+		.platform_data = &pca9500_eeprom_pdata,
+	}, {
+		.type = "max1238",
+		.addr = 0x35,
+	}, { /* ITS400 Sensor board only */
+		.type = "max1363",
+		.addr = 0x34,
+		/* Through a nand gate - Also beware, on V2 sensor board the
+		 * pull up resistors are missing.
+		 */
+		.irq = IRQ_GPIO(99),
+	}, { /* ITS400 Sensor board only */
+		.type = "tsl2561",
+		.addr = 0x49,
+		/* Through a nand gate - Also beware, on V2 sensor board the
+		 * pull up resistors are missing.
+		 */
+		.irq = IRQ_GPIO(99),
+	}, { /* ITS400 Sensor board only */
+		.type = "tmp175",
+		.addr = 0x4A,
+		.irq = IRQ_GPIO(96),
+	},
+};
+
+static struct i2c_board_info __initdata stargate2_pwr_i2c_board_info[] = {
+	{
+		.type = "da9030",
+		.addr = 0x49,
+		.platform_data = &stargate2_da9030_pdata,
+		.irq = gpio_to_irq(1),
+	},
+};
+
+static struct pxa2xx_spi_master pxa_ssp_master_0_info = {
+	.num_chipselect = 1,
+};
+
+static struct pxa2xx_spi_master pxa_ssp_master_1_info = {
+	.num_chipselect = 1,
+};
+
+static struct pxa2xx_spi_master pxa_ssp_master_2_info = {
+	.num_chipselect = 1,
+};
+
+/* An upcoming kernel change will scrap SFRM usage so these
+ * drivers have been moved to use gpio's via cs_control */
+static struct pxa2xx_spi_chip staccel_chip_info = {
+	.tx_threshold = 8,
+	.rx_threshold = 8,
+	.dma_burst_size = 8,
+	.timeout = 235,
+	.gpio_cs = 24,
+};
+
+static struct pxa2xx_spi_chip cc2420_info = {
+	.tx_threshold = 8,
+	.rx_threshold = 8,
+	.dma_burst_size = 8,
+	.timeout = 235,
+	.gpio_cs = 39,
+};
+
+static struct spi_board_info spi_board_info[] __initdata = {
+	{
+		.modalias = "lis3l02dq",
+		.max_speed_hz = 8000000,/* 8MHz max spi frequency at 3V */
+		.bus_num = 1,
+		.chip_select = 0,
+		.controller_data = &staccel_chip_info,
+		.irq = IRQ_GPIO(96),
+	}, {
+		.modalias = "cc2420",
+		.max_speed_hz = 6500000,
+		.bus_num = 3,
+		.chip_select = 0,
+		.controller_data = &cc2420_info,
+	},
+};
+
+static void sg2_udc_command(int cmd)
+{
+	switch (cmd) {
+	case PXA2XX_UDC_CMD_CONNECT:
+		UP2OCR |=  UP2OCR_HXOE  | UP2OCR_DPPUE | UP2OCR_DPPUBE;
+		break;
+	case PXA2XX_UDC_CMD_DISCONNECT:
+		UP2OCR &= ~(UP2OCR_HXOE  | UP2OCR_DPPUE | UP2OCR_DPPUBE);
+		break;
+	}
+}
+
+/* Board doesn't support cable detection - so always lie and say
+ * something is there.
+ */
+static int sg2_udc_detect(void)
+{
+	return 1;
+}
+
+static struct pxa2xx_udc_mach_info stargate2_udc_info __initdata = {
+	.udc_is_connected	= sg2_udc_detect,
+	.udc_command		= sg2_udc_command,
+};
+
+static struct platform_device *stargate2_devices[] = {
+	&stargate2_flash_device,
+	&stargate2_sram,
+	&smc91x_device,
+	&sht15,
+};
+
+static struct i2c_pxa_platform_data i2c_pwr_pdata = {
+	.fast_mode = 1,
+};
+
+static struct i2c_pxa_platform_data i2c_pdata = {
+	.fast_mode = 1,
+};
+
+static void __init stargate2_init(void)
+{
+	/* This is probably a board specific hack as this must be set
+	   prior to connecting the MFP stuff up. */
+	MECR &= ~MECR_NOS;
+
+	pxa2xx_mfp_config(ARRAY_AND_SIZE(stargate2_pin_config));
+
+	/* spi chip selects */
+	gpio_direction_output(37, 0);
+	gpio_direction_output(24, 0);
+	gpio_direction_output(39, 0);
+
+	platform_add_devices(ARRAY_AND_SIZE(stargate2_devices));
+
+	pxa2xx_set_spi_info(1, &pxa_ssp_master_0_info);
+	pxa2xx_set_spi_info(2, &pxa_ssp_master_1_info);
+	pxa2xx_set_spi_info(3, &pxa_ssp_master_2_info);
+	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+
+	i2c_register_board_info(0, ARRAY_AND_SIZE(stargate2_i2c_board_info));
+	i2c_register_board_info(1,
+				ARRAY_AND_SIZE(stargate2_pwr_i2c_board_info));
+	pxa27x_set_i2c_power_info(&i2c_pwr_pdata);
+	pxa_set_i2c_info(&i2c_pdata);
+
+	pxa_set_mci_info(&stargate2_mci_platform_data);
+
+	pxa_set_udc_info(&stargate2_udc_info);
+
+	stargate2_reset_bluetooth();
+}
+
+MACHINE_START(STARGATE2, "Stargate 2")
+	.phys_io = 0x40000000,
+	.io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc,
+	.map_io = pxa_map_io,
+	.init_irq = pxa27x_init_irq,
+	.timer = &pxa_timer,
+	.init_machine = stargate2_init,
+	.boot_params = 0xA0000100,
+MACHINE_END
-- 
cgit v0.10.2


From 80153d1bcc6a20361d5974f37d3729583ba99154 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Tue, 12 May 2009 19:37:20 +0000
Subject: [ARM] pxa/stargate2: Add board specific elements to the smc91x driver

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index 329f890..f1f773b 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -45,7 +45,8 @@
     defined(CONFIG_MACH_ZYLONITE) ||\
     defined(CONFIG_MACH_LITTLETON) ||\
     defined(CONFIG_MACH_ZYLONITE2) ||\
-    defined(CONFIG_ARCH_VIPER)
+    defined(CONFIG_ARCH_VIPER) ||\
+    defined(CONFIG_MACH_STARGATE2)
 
 #include <asm/mach-types.h>
 
@@ -73,7 +74,7 @@
 /* We actually can't write halfwords properly if not word aligned */
 static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 {
-	if (machine_is_mainstone() && reg & 2) {
+	if ((machine_is_mainstone() || machine_is_stargate2()) && reg & 2) {
 		unsigned int v = val << 16;
 		v |= readl(ioaddr + (reg & ~2)) & 0xffff;
 		writel(v, ioaddr + (reg & ~2));
-- 
cgit v0.10.2


From 5aeb1a5e9f2eced482805eeb154baf77ea53c8ce Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Tue, 12 May 2009 19:37:21 +0000
Subject: [ARM] pxa/stargate2: add support for Compact Flash/PCMCIA

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
index 2764735..fbf965b 100644
--- a/drivers/pcmcia/Kconfig
+++ b/drivers/pcmcia/Kconfig
@@ -217,7 +217,7 @@ config PCMCIA_PXA2XX
 	depends on ARM && ARCH_PXA && PCMCIA
 	depends on (ARCH_LUBBOCK || MACH_MAINSTONE || PXA_SHARPSL \
 		    || MACH_ARMCORE || ARCH_PXA_PALM || TRIZEPS_PCMCIA \
-		    || ARCH_VIPER || ARCH_PXA_ESERIES)
+		    || ARCH_VIPER || ARCH_PXA_ESERIES || MACH_STARGATE2)
 	help
 	  Say Y here to include support for the PXA2xx PCMCIA controller
 
diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
index bbac463..047394d 100644
--- a/drivers/pcmcia/Makefile
+++ b/drivers/pcmcia/Makefile
@@ -73,5 +73,6 @@ pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA)		+= pxa2xx_trizeps4.o
 pxa2xx-obj-$(CONFIG_MACH_PALMTX)		+= pxa2xx_palmtx.o
 pxa2xx-obj-$(CONFIG_MACH_PALMLD)		+= pxa2xx_palmld.o
 pxa2xx-obj-$(CONFIG_MACH_E740)			+= pxa2xx_e740.o
+pxa2xx-obj-$(CONFIG_MACH_STARGATE2)		+= pxa2xx_stargate2.o
 
 obj-$(CONFIG_PCMCIA_PXA2XX)			+= pxa2xx_core.o $(pxa2xx-obj-y)
diff --git a/drivers/pcmcia/pxa2xx_stargate2.c b/drivers/pcmcia/pxa2xx_stargate2.c
new file mode 100644
index 0000000..490749e
--- /dev/null
+++ b/drivers/pcmcia/pxa2xx_stargate2.c
@@ -0,0 +1,174 @@
+/*
+ * linux/drivers/pcmcia/pxa2xx_stargate2.c
+ *
+ * Stargate 2 PCMCIA specific routines.
+ *
+ * Created:	December 6, 2005
+ * Author:	Ed C. Epp
+ * Copyright:	Intel Corp 2005
+ *              Jonathan Cameron <jic23@cam.ac.uk> 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <pcmcia/ss.h>
+
+#include <asm/irq.h>
+#include <asm/mach-types.h>
+
+#include "soc_common.h"
+
+#define SG2_S0_BUFF_CTL		120
+#define SG2_S0_POWER_CTL	108
+#define SG2_S0_GPIO_RESET	82
+#define SG2_S0_GPIO_DETECT	53
+#define SG2_S0_GPIO_READY	81
+
+static struct pcmcia_irqs irqs[] = {
+	{ 0, IRQ_GPIO(SG2_S0_GPIO_DETECT), "PCMCIA0 CD" },
+};
+
+static int sg2_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
+{
+	skt->irq = IRQ_GPIO(SG2_S0_GPIO_READY);
+	return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs));
+}
+
+static void sg2_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
+{
+	soc_pcmcia_free_irqs(skt, irqs, ARRAY_SIZE(irqs));
+}
+
+static void sg2_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
+				    struct pcmcia_state *state)
+{
+	state->detect = !gpio_get_value(SG2_S0_GPIO_DETECT);
+	state->ready  = !!gpio_get_value(SG2_S0_GPIO_READY);
+	state->bvd1   = 0; /* not available - battery detect on card */
+	state->bvd2   = 0; /* not available */
+	state->vs_3v  = 1; /* not available - voltage detect for card */
+	state->vs_Xv  = 0; /* not available */
+	state->wrprot = 0; /* not available - write protect */
+}
+
+static int sg2_pcmcia_configure_socket(struct soc_pcmcia_socket *skt,
+				       const socket_state_t *state)
+{
+	/* Enable card power */
+	switch (state->Vcc) {
+	case 0:
+		/* sets power ctl register high */
+		gpio_set_value(SG2_S0_POWER_CTL, 1);
+		break;
+	case 33:
+	case 50:
+		/* sets power control register low (clear) */
+		gpio_set_value(SG2_S0_POWER_CTL, 0);
+		msleep(100);
+		break;
+	default:
+		pr_err("%s(): bad Vcc %u\n",
+		       __func__, state->Vcc);
+		return -1;
+	}
+
+	/* reset */
+	gpio_set_value(SG2_S0_GPIO_RESET, !!(state->flags & SS_RESET));
+
+	return 0;
+}
+
+static void sg2_pcmcia_socket_init(struct soc_pcmcia_socket *skt)
+{
+	soc_pcmcia_enable_irqs(skt, irqs, ARRAY_SIZE(irqs));
+}
+
+static void sg2_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt)
+{
+	soc_pcmcia_disable_irqs(skt, irqs, ARRAY_SIZE(irqs));
+}
+
+static struct pcmcia_low_level sg2_pcmcia_ops __initdata = {
+	.owner			= THIS_MODULE,
+	.hw_init		= sg2_pcmcia_hw_init,
+	.hw_shutdown		= sg2_pcmcia_hw_shutdown,
+	.socket_state		= sg2_pcmcia_socket_state,
+	.configure_socket	= sg2_pcmcia_configure_socket,
+	.socket_init		= sg2_pcmcia_socket_init,
+	.socket_suspend		= sg2_pcmcia_socket_suspend,
+	.nr			= 1,
+};
+
+static struct platform_device *sg2_pcmcia_device;
+
+static int __init sg2_pcmcia_init(void)
+{
+	int ret;
+
+	if (!machine_is_stargate2())
+		return -ENODEV;
+
+	sg2_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
+	if (!sg2_pcmcia_device)
+		return -ENOMEM;
+
+	ret = gpio_request(SG2_S0_BUFF_CTL, "SG2 CF buff ctl");
+	if (ret)
+		goto error_put_platform_device;
+	ret = gpio_request(SG2_S0_POWER_CTL, "SG2 CF power ctl");
+	if (ret)
+		goto error_free_gpio_buff_ctl;
+	ret = gpio_request(SG2_S0_GPIO_RESET, "SG2 CF reset");
+	if (ret)
+		goto error_free_gpio_power_ctl;
+	/* Set gpio directions */
+	gpio_direction_output(SG2_S0_BUFF_CTL, 0);
+	gpio_direction_output(SG2_S0_POWER_CTL, 1);
+	gpio_direction_output(SG2_S0_GPIO_RESET, 1);
+
+	ret = platform_device_add_data(sg2_pcmcia_device,
+				       &sg2_pcmcia_ops,
+				       sizeof(sg2_pcmcia_ops));
+	if (ret)
+		goto error_free_gpio_reset;
+
+	ret = platform_device_add(sg2_pcmcia_device);
+	if (ret)
+		goto error_free_gpio_reset;
+
+	return 0;
+error_free_gpio_reset:
+	gpio_free(SG2_S0_GPIO_RESET);
+error_free_gpio_power_ctl:
+	gpio_free(SG2_S0_POWER_CTL);
+error_free_gpio_buff_ctl:
+	gpio_free(SG2_S0_BUFF_CTL);
+error_put_platform_device:
+	platform_device_put(sg2_pcmcia_device);
+
+	return ret;
+}
+
+static void __exit sg2_pcmcia_exit(void)
+{
+	platform_device_unregister(sg2_pcmcia_device);
+	gpio_free(SG2_S0_BUFF_CTL);
+	gpio_free(SG2_S0_POWER_CTL);
+	gpio_free(SG2_S0_GPIO_RESET);
+}
+
+fs_initcall(sg2_pcmcia_init);
+module_exit(sg2_pcmcia_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:pxa2xx-pcmcia");
-- 
cgit v0.10.2


From a2520846589ee9821f9f50744cece84f2853518a Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Mon, 25 May 2009 16:50:12 +0000
Subject: [ARM] pxa/imote2: add board config for SHT15 humidity sensor

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index 3ef98d4..f022247 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -22,6 +22,7 @@
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
 #include <linux/mfd/da903x.h>
+#include <linux/sht15.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -102,6 +103,10 @@ static unsigned long imote2_pin_config[] __initdata = {
 	GPIO96_GPIO,	/* accelerometer interrupt */
 	GPIO99_GPIO,	/* ADC interrupt */
 
+	/* SHT15 */
+	GPIO100_GPIO,
+	GPIO98_GPIO,
+
 	/* Connector pins specified as gpios */
 	GPIO94_GPIO, /* large basic connector pin 14 */
 	GPIO10_GPIO, /* large basic connector pin 23 */
@@ -112,6 +117,26 @@ static unsigned long imote2_pin_config[] __initdata = {
 	GPIO105_GPIO, /* blue led */
 };
 
+static struct sht15_platform_data platform_data_sht15 = {
+	.gpio_data =  100,
+	.gpio_sck  =  98,
+};
+
+static struct platform_device sht15 = {
+	.name = "sht15",
+	.id = -1,
+	.dev = {
+		.platform_data = &platform_data_sht15,
+	},
+};
+
+static struct regulator_consumer_supply imote2_sensor_3_con[] = {
+	{
+		.dev = &sht15.dev,
+		.supply = "vcc",
+	},
+};
+
 static struct gpio_led imote2_led_pins[] = {
 	{
 		.name       =  "imote2:red",
@@ -257,6 +282,8 @@ static struct regulator_init_data imote2_ldo_init_data[] = {
 			.min_uV = 2800000,
 			.max_uV = 3000000,
 		},
+		.num_consumer_supplies = ARRAY_SIZE(imote2_sensor_3_con),
+		.consumer_supplies = imote2_sensor_3_con,
 	},
 	[vcc_pxa_pll] = { /* 1.17V - 1.43V, default 1.3V*/
 		.constraints = {
@@ -508,6 +535,7 @@ static struct pxa2xx_udc_mach_info imote2_udc_info __initdata = {
 static struct platform_device *imote2_devices[] = {
 	&imote2_flash_device,
 	&imote2_leds,
+	&sht15,
 };
 
 static struct i2c_pxa_platform_data i2c_pwr_pdata = {
-- 
cgit v0.10.2


From 0a0ca89d59b203b772a08d04d4e2052ffaeac221 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Mon, 25 May 2009 16:50:14 +0000
Subject: [ARM] pxa/imote2: add i2c board info for the wm8940 audio codec

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c
index f022247..be7be72 100644
--- a/arch/arm/mach-pxa/imote2.c
+++ b/arch/arm/mach-pxa/imote2.c
@@ -459,6 +459,9 @@ static struct i2c_board_info __initdata imote2_i2c_board_info[] = {
 		.type = "tmp175",
 		.addr = 0x4A,
 		.irq = IRQ_GPIO(96),
+	}, { /* IMB400 Multimedia board */
+		.type = "wm8940",
+		.addr = 0x1A,
 	},
 };
 
-- 
cgit v0.10.2


From f0ba401769a5ca0b69e60914ef03e4fc373c3a02 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Sun, 17 May 2009 17:05:23 +0400
Subject: [ARM] pxa/spitz: drop unused header files from spitz.c

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 1182b79..dda310f 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -13,19 +13,11 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
-#include <linux/major.h>
-#include <linux/fs.h>
-#include <linux/interrupt.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
-#include <linux/mmc/host.h>
 #include <linux/mtd/physmap.h>
-#include <linux/pm.h>
-#include <linux/backlight.h>
-#include <linux/io.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
 #include <linux/spi/spi.h>
@@ -34,15 +26,11 @@
 #include <linux/mtd/sharpsl.h>
 
 #include <asm/setup.h>
-#include <asm/memory.h>
 #include <asm/mach-types.h>
-#include <mach/hardware.h>
-#include <asm/irq.h>
-#include <asm/system.h>
-
 #include <asm/mach/arch.h>
-#include <asm/mach/map.h>
-#include <asm/mach/irq.h>
+#include <asm/mach/sharpsl_param.h>
+#include <asm/hardware/scoop.h>
+
 
 #include <mach/pxa27x.h>
 #include <mach/pxa27x-udc.h>
@@ -51,14 +39,9 @@
 #include <mach/irda.h>
 #include <mach/mmc.h>
 #include <mach/ohci.h>
-#include <mach/udc.h>
 #include <mach/pxafb.h>
 #include <mach/pxa2xx_spi.h>
 #include <mach/spitz.h>
-#include <mach/sharpsl.h>
-
-#include <asm/mach/sharpsl_param.h>
-#include <asm/hardware/scoop.h>
 
 #include "generic.h"
 #include "devices.h"
-- 
cgit v0.10.2


From 1ce2c51e6d594d19126f51bba88aa9f57eb7cea6 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Mon, 1 Jun 2009 12:26:58 +0200
Subject: [ARM] pxa/ezx: setup gpio-keys for EzX phones

Setup gpio-keys for EzX phones

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Acked-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 4ce63b4..588b265 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/pwm_backlight.h>
 #include <linux/input.h>
+#include <linux/gpio_keys.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -32,6 +33,12 @@
 #include "devices.h"
 #include "generic.h"
 
+#define GPIO12_A780_FLIP_LID 		12
+#define GPIO15_A1200_FLIP_LID 		15
+#define GPIO15_A910_FLIP_LID 		15
+#define GPIO12_E680_LOCK_SWITCH 	12
+#define GPIO15_E6_LOCK_SWITCH 		15
+
 static struct platform_pwm_backlight_data ezx_backlight_data = {
 	.pwm_id		= 0,
 	.max_brightness	= 1023,
@@ -88,7 +95,7 @@ static struct pxafb_mach_info ezx_fb_info_2 = {
 	.lcd_conn	= LCD_COLOR_TFT_18BPP,
 };
 
-static struct platform_device *devices[] __initdata = {
+static struct platform_device *ezx_devices[] __initdata = {
 	&ezx_backlight_device,
 };
 
@@ -651,6 +658,35 @@ static struct pxa27x_keypad_platform_data e2_keypad_platform_data = {
 #endif /* CONFIG_MACH_EZX_E2 */
 
 #ifdef CONFIG_MACH_EZX_A780
+/* gpio_keys */
+static struct gpio_keys_button a780_buttons[] = {
+	[0] = {
+		.code       = SW_LID,
+		.gpio       = GPIO12_A780_FLIP_LID,
+		.active_low = 0,
+		.desc       = "A780 flip lid",
+		.type       = EV_SW,
+		.wakeup     = 1,
+	},
+};
+
+static struct gpio_keys_platform_data a780_gpio_keys_platform_data = {
+	.buttons  = a780_buttons,
+	.nbuttons = ARRAY_SIZE(a780_buttons),
+};
+
+static struct platform_device a780_gpio_keys = {
+	.name = "gpio-keys",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &a780_gpio_keys_platform_data,
+	},
+};
+
+static struct platform_device *a780_devices[] __initdata = {
+	&a780_gpio_keys,
+};
+
 static void __init a780_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(ezx_pin_config));
@@ -663,7 +699,8 @@ static void __init a780_init(void)
 
 	pxa_set_keypad_info(&a780_keypad_platform_data);
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
+	platform_add_devices(ARRAY_AND_SIZE(a780_devices));
 }
 
 MACHINE_START(EZX_A780, "Motorola EZX A780")
@@ -678,10 +715,39 @@ MACHINE_END
 #endif
 
 #ifdef CONFIG_MACH_EZX_E680
+/* gpio_keys */
+static struct gpio_keys_button e680_buttons[] = {
+	[0] = {
+		.code       = KEY_SCREENLOCK,
+		.gpio       = GPIO12_E680_LOCK_SWITCH,
+		.active_low = 0,
+		.desc       = "E680 lock switch",
+		.type       = EV_KEY,
+		.wakeup     = 1,
+	},
+};
+
+static struct gpio_keys_platform_data e680_gpio_keys_platform_data = {
+	.buttons  = e680_buttons,
+	.nbuttons = ARRAY_SIZE(e680_buttons),
+};
+
+static struct platform_device e680_gpio_keys = {
+	.name = "gpio-keys",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &e680_gpio_keys_platform_data,
+	},
+};
+
 static struct i2c_board_info __initdata e680_i2c_board_info[] = {
 	{ I2C_BOARD_INFO("tea5767", 0x81) },
 };
 
+static struct platform_device *e680_devices[] __initdata = {
+	&e680_gpio_keys,
+};
+
 static void __init e680_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(ezx_pin_config));
@@ -695,7 +761,8 @@ static void __init e680_init(void)
 
 	pxa_set_keypad_info(&e680_keypad_platform_data);
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
+	platform_add_devices(ARRAY_AND_SIZE(e680_devices));
 }
 
 MACHINE_START(EZX_E680, "Motorola EZX E680")
@@ -710,10 +777,39 @@ MACHINE_END
 #endif
 
 #ifdef CONFIG_MACH_EZX_A1200
+/* gpio_keys */
+static struct gpio_keys_button a1200_buttons[] = {
+	[0] = {
+		.code       = SW_LID,
+		.gpio       = GPIO15_A1200_FLIP_LID,
+		.active_low = 0,
+		.desc       = "A1200 flip lid",
+		.type       = EV_SW,
+		.wakeup     = 1,
+	},
+};
+
+static struct gpio_keys_platform_data a1200_gpio_keys_platform_data = {
+	.buttons  = a1200_buttons,
+	.nbuttons = ARRAY_SIZE(a1200_buttons),
+};
+
+static struct platform_device a1200_gpio_keys = {
+	.name = "gpio-keys",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &a1200_gpio_keys_platform_data,
+	},
+};
+
 static struct i2c_board_info __initdata a1200_i2c_board_info[] = {
 	{ I2C_BOARD_INFO("tea5767", 0x81) },
 };
 
+static struct platform_device *a1200_devices[] __initdata = {
+	&a1200_gpio_keys,
+};
+
 static void __init a1200_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(ezx_pin_config));
@@ -727,7 +823,8 @@ static void __init a1200_init(void)
 
 	pxa_set_keypad_info(&a1200_keypad_platform_data);
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
+	platform_add_devices(ARRAY_AND_SIZE(a1200_devices));
 }
 
 MACHINE_START(EZX_A1200, "Motorola EZX A1200")
@@ -742,6 +839,35 @@ MACHINE_END
 #endif
 
 #ifdef CONFIG_MACH_EZX_A910
+/* gpio_keys */
+static struct gpio_keys_button a910_buttons[] = {
+	[0] = {
+		.code       = SW_LID,
+		.gpio       = GPIO15_A910_FLIP_LID,
+		.active_low = 0,
+		.desc       = "A910 flip lid",
+		.type       = EV_SW,
+		.wakeup     = 1,
+	},
+};
+
+static struct gpio_keys_platform_data a910_gpio_keys_platform_data = {
+	.buttons  = a910_buttons,
+	.nbuttons = ARRAY_SIZE(a910_buttons),
+};
+
+static struct platform_device a910_gpio_keys = {
+	.name = "gpio-keys",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &a910_gpio_keys_platform_data,
+	},
+};
+
+static struct platform_device *a910_devices[] __initdata = {
+	&a910_gpio_keys,
+};
+
 static void __init a910_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(ezx_pin_config));
@@ -754,7 +880,8 @@ static void __init a910_init(void)
 
 	pxa_set_keypad_info(&a910_keypad_platform_data);
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
+	platform_add_devices(ARRAY_AND_SIZE(a910_devices));
 }
 
 MACHINE_START(EZX_A910, "Motorola EZX A910")
@@ -769,10 +896,39 @@ MACHINE_END
 #endif
 
 #ifdef CONFIG_MACH_EZX_E6
+/* gpio_keys */
+static struct gpio_keys_button e6_buttons[] = {
+	[0] = {
+		.code       = KEY_SCREENLOCK,
+		.gpio       = GPIO15_E6_LOCK_SWITCH,
+		.active_low = 0,
+		.desc       = "E6 lock switch",
+		.type       = EV_KEY,
+		.wakeup     = 1,
+	},
+};
+
+static struct gpio_keys_platform_data e6_gpio_keys_platform_data = {
+	.buttons  = e6_buttons,
+	.nbuttons = ARRAY_SIZE(e6_buttons),
+};
+
+static struct platform_device e6_gpio_keys = {
+	.name = "gpio-keys",
+	.id   = -1,
+	.dev  = {
+		.platform_data = &e6_gpio_keys_platform_data,
+	},
+};
+
 static struct i2c_board_info __initdata e6_i2c_board_info[] = {
 	{ I2C_BOARD_INFO("tea5767", 0x81) },
 };
 
+static struct platform_device *e6_devices[] __initdata = {
+	&e6_gpio_keys,
+};
+
 static void __init e6_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(ezx_pin_config));
@@ -786,7 +942,8 @@ static void __init e6_init(void)
 
 	pxa_set_keypad_info(&e6_keypad_platform_data);
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
+	platform_add_devices(ARRAY_AND_SIZE(e6_devices));
 }
 
 MACHINE_START(EZX_E6, "Motorola EZX E6")
@@ -805,6 +962,9 @@ static struct i2c_board_info __initdata e2_i2c_board_info[] = {
 	{ I2C_BOARD_INFO("tea5767", 0x81) },
 };
 
+static struct platform_device *e2_devices[] __initdata = {
+};
+
 static void __init e2_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(ezx_pin_config));
@@ -818,7 +978,8 @@ static void __init e2_init(void)
 
 	pxa_set_keypad_info(&e2_keypad_platform_data);
 
-	platform_add_devices(devices, ARRAY_SIZE(devices));
+	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
+	platform_add_devices(ARRAY_AND_SIZE(e2_devices));
 }
 
 MACHINE_START(EZX_E2, "Motorola EZX E2")
-- 
cgit v0.10.2


From 76e3fc36eb3d78997e7f4dcdc01cc38dc3178201 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 12 May 2009 16:31:13 +0300
Subject: [ARM] pxa/em-x270: add exeda GPIO extender and update GPIO mappings

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 3503f52..e2f9834 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -28,6 +28,8 @@
 #include <linux/spi/libertas_spi.h>
 #include <linux/power_supply.h>
 #include <linux/apm-emulation.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
 
 #include <media/soc_camera.h>
 
@@ -52,15 +54,18 @@
 #define GPIO13_MMC_CD		(13)
 #define GPIO95_MMC_WP		(95)
 #define GPIO56_NAND_RB		(56)
+#define GPIO93_CAM_RESET	(93)
 
 /* eXeda specific GPIOs */
 #define GPIO114_MMC_CD		(114)
 #define GPIO20_NAND_RB		(20)
 #define GPIO38_SD_PWEN		(38)
+#define GPIO37_WLAN_RST		(37)
+#define GPIO95_TOUCHPAD_INT	(95)
+#define GPIO130_CAM_RESET	(130)
 
 /* common  GPIOs */
 #define GPIO11_NAND_CS		(11)
-#define GPIO93_CAM_RESET	(93)
 #define GPIO41_ETHIRQ		(41)
 #define EM_X270_ETHIRQ		IRQ_GPIO(GPIO41_ETHIRQ)
 #define GPIO115_WLAN_PWEN	(115)
@@ -69,6 +74,7 @@
 static int mmc_cd;
 static int nand_rb;
 static int dm9000_flags;
+static int cam_reset;
 
 static unsigned long common_pin_config[] = {
 	/* AC'97 */
@@ -180,7 +186,6 @@ static unsigned long common_pin_config[] = {
 
 	/* power controls */
 	GPIO20_GPIO	| MFP_LPM_DRIVE_LOW,	/* GPRS_PWEN */
-	GPIO93_GPIO	| MFP_LPM_DRIVE_LOW,	/* Camera reset */
 	GPIO115_GPIO	| MFP_LPM_DRIVE_LOW,	/* WLAN_PWEN */
 
 	/* NAND controls */
@@ -191,14 +196,16 @@ static unsigned long common_pin_config[] = {
 };
 
 static unsigned long em_x270_pin_config[] = {
-	GPIO13_GPIO,	/* MMC card detect */
-	GPIO56_GPIO,	/* NAND Ready/Busy */
-	GPIO95_GPIO,	/* MMC Write protect */
+	GPIO13_GPIO,				/* MMC card detect */
+	GPIO56_GPIO,				/* NAND Ready/Busy */
+	GPIO93_GPIO	| MFP_LPM_DRIVE_LOW,	/* Camera reset */
+	GPIO95_GPIO,				/* MMC Write protect */
 };
 
 static unsigned long exeda_pin_config[] = {
 	GPIO20_GPIO,				/* NAND Ready/Busy */
 	GPIO38_GPIO	| MFP_LPM_DRIVE_LOW,	/* SD slot power */
+	GPIO95_GPIO,				/* touchpad IRQ */
 	GPIO114_GPIO,				/* MMC card detect */
 };
 
@@ -863,26 +870,26 @@ static int em_x270_sensor_init(struct device *dev)
 {
 	int ret;
 
-	ret = gpio_request(GPIO93_CAM_RESET, "camera reset");
+	ret = gpio_request(cam_reset, "camera reset");
 	if (ret)
 		return ret;
 
-	gpio_direction_output(GPIO93_CAM_RESET, 0);
+	gpio_direction_output(cam_reset, 0);
 
 	em_x270_camera_ldo = regulator_get(NULL, "vcc cam");
 	if (em_x270_camera_ldo == NULL) {
-		gpio_free(GPIO93_CAM_RESET);
+		gpio_free(cam_reset);
 		return -ENODEV;
 	}
 
 	ret = regulator_enable(em_x270_camera_ldo);
 	if (ret) {
 		regulator_put(em_x270_camera_ldo);
-		gpio_free(GPIO93_CAM_RESET);
+		gpio_free(cam_reset);
 		return ret;
 	}
 
-	gpio_set_value(GPIO93_CAM_RESET, 1);
+	gpio_set_value(cam_reset, 1);
 
 	return 0;
 }
@@ -902,7 +909,7 @@ static int em_x270_sensor_power(struct device *dev, int on)
 	if (on == is_on)
 		return 0;
 
-	gpio_set_value(GPIO93_CAM_RESET, !on);
+	gpio_set_value(cam_reset, !on);
 
 	if (on)
 		ret = regulator_enable(em_x270_camera_ldo);
@@ -912,7 +919,7 @@ static int em_x270_sensor_power(struct device *dev, int on)
 	if (ret)
 		return ret;
 
-	gpio_set_value(GPIO93_CAM_RESET, on);
+	gpio_set_value(cam_reset, on);
 
 	return 0;
 }
@@ -929,13 +936,8 @@ static struct i2c_board_info em_x270_i2c_cam_info[] = {
 	},
 };
 
-static struct i2c_pxa_platform_data em_x270_i2c_info = {
-	.fast_mode = 1,
-};
-
 static void  __init em_x270_init_camera(void)
 {
-	pxa_set_i2c_info(&em_x270_i2c_info);
 	i2c_register_board_info(0, ARRAY_AND_SIZE(em_x270_i2c_cam_info));
 	pxa_set_camera_info(&em_x270_camera_platform_data);
 }
@@ -1069,6 +1071,29 @@ static void __init em_x270_init_da9030(void)
 	i2c_register_board_info(1, &em_x270_i2c_pmic_info, 1);
 }
 
+static struct pca953x_platform_data exeda_gpio_ext_pdata = {
+	.gpio_base = 128,
+};
+
+static struct i2c_board_info exeda_i2c_info[] = {
+	{
+		I2C_BOARD_INFO("pca9555", 0x21),
+		.platform_data = &exeda_gpio_ext_pdata,
+	},
+};
+
+static struct i2c_pxa_platform_data em_x270_i2c_info = {
+	.fast_mode = 1,
+};
+
+static void __init em_x270_init_i2c(void)
+{
+	pxa_set_i2c_info(&em_x270_i2c_info);
+
+	if (machine_is_exeda())
+		i2c_register_board_info(0, ARRAY_AND_SIZE(exeda_i2c_info));
+}
+
 static void __init em_x270_module_init(void)
 {
 	pr_info("%s\n", __func__);
@@ -1077,6 +1102,7 @@ static void __init em_x270_module_init(void)
 	mmc_cd = GPIO13_MMC_CD;
 	nand_rb = GPIO56_NAND_RB;
 	dm9000_flags = DM9000_PLATF_32BITONLY;
+	cam_reset = GPIO93_CAM_RESET;
 }
 
 static void __init em_x270_exeda_init(void)
@@ -1087,6 +1113,7 @@ static void __init em_x270_exeda_init(void)
 	mmc_cd = GPIO114_MMC_CD;
 	nand_rb = GPIO20_NAND_RB;
 	dm9000_flags = DM9000_PLATF_16BITONLY;
+	cam_reset = GPIO130_CAM_RESET;
 }
 
 static void __init em_x270_init(void)
@@ -1111,8 +1138,9 @@ static void __init em_x270_init(void)
 	em_x270_init_keypad();
 	em_x270_init_gpio_keys();
 	em_x270_init_ac97();
-	em_x270_init_camera();
 	em_x270_init_spi();
+	em_x270_init_i2c();
+	em_x270_init_camera();
 }
 
 MACHINE_START(EM_X270, "Compulab EM-X270")
-- 
cgit v0.10.2


From 2f15cb594e0c17177bd79ead9234cc6806954e6f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 12 May 2009 16:31:14 +0300
Subject: [ARM] pxa/em-x270: update libertas device setup

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index e2f9834..65e8dba 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -684,26 +684,52 @@ static int em_x270_libertas_setup(struct spi_device *spi)
 	if (err)
 		return err;
 
+	err = gpio_request(GPIO19_WLAN_STRAP, "WLAN STRAP");
+	if (err)
+		goto err_free_pwen;
+
+	if (machine_is_exeda()) {
+		err = gpio_request(GPIO37_WLAN_RST, "WLAN RST");
+		if (err)
+			goto err_free_strap;
+
+		gpio_direction_output(GPIO37_WLAN_RST, 1);
+		msleep(100);
+	}
+
 	gpio_direction_output(GPIO19_WLAN_STRAP, 1);
-	mdelay(100);
+	msleep(100);
 
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(em_x270_libertas_pin_config));
 
 	gpio_direction_output(GPIO115_WLAN_PWEN, 0);
-	mdelay(100);
+	msleep(100);
 	gpio_set_value(GPIO115_WLAN_PWEN, 1);
-	mdelay(100);
+	msleep(100);
 
 	spi->bits_per_word = 16;
 	spi_setup(spi);
 
 	return 0;
+
+err_free_strap:
+	gpio_free(GPIO19_WLAN_STRAP);
+err_free_pwen:
+	gpio_free(GPIO115_WLAN_PWEN);
+
+	return err;
 }
 
 static int em_x270_libertas_teardown(struct spi_device *spi)
 {
 	gpio_set_value(GPIO115_WLAN_PWEN, 0);
 	gpio_free(GPIO115_WLAN_PWEN);
+	gpio_free(GPIO19_WLAN_STRAP);
+
+	if (machine_is_exeda()) {
+		gpio_set_value(GPIO37_WLAN_RST, 0);
+		gpio_free(GPIO37_WLAN_RST);
+	}
 
 	return 0;
 }
-- 
cgit v0.10.2


From 3690a0f4266127557524501c680760b1e6cb55d0 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 12 May 2009 16:31:15 +0300
Subject: [ARM] pxa/em-x270: change power supply name to "battery"

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 65e8dba..35a1203 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -1013,7 +1013,7 @@ struct led_info em_x270_led_info = {
 };
 
 struct power_supply_info em_x270_psy_info = {
-	.name = "LP555597P6H-FPS",
+	.name = "battery",
 	.technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
 	.voltage_max_design = 4200000,
 	.voltage_min_design = 3000000,
-- 
cgit v0.10.2


From 128d88b82e47d070170c256b2b19f57c352af310 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 12 May 2009 16:31:16 +0300
Subject: [ARM] pxa/em-x270: add support for on-board USB Hub

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 35a1203..3ff1fec 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -55,6 +55,7 @@
 #define GPIO95_MMC_WP		(95)
 #define GPIO56_NAND_RB		(56)
 #define GPIO93_CAM_RESET	(93)
+#define GPIO16_USB_HUB_RESET	(16)
 
 /* eXeda specific GPIOs */
 #define GPIO114_MMC_CD		(114)
@@ -63,6 +64,7 @@
 #define GPIO37_WLAN_RST		(37)
 #define GPIO95_TOUCHPAD_INT	(95)
 #define GPIO130_CAM_RESET	(130)
+#define GPIO10_USB_HUB_RESET	(10)
 
 /* common  GPIOs */
 #define GPIO11_NAND_CS		(11)
@@ -70,11 +72,13 @@
 #define EM_X270_ETHIRQ		IRQ_GPIO(GPIO41_ETHIRQ)
 #define GPIO115_WLAN_PWEN	(115)
 #define GPIO19_WLAN_STRAP	(19)
+#define GPIO9_USB_VBUS_EN	(9)
 
 static int mmc_cd;
 static int nand_rb;
 static int dm9000_flags;
 static int cam_reset;
+static int usb_hub_reset;
 
 static unsigned long common_pin_config[] = {
 	/* AC'97 */
@@ -197,12 +201,14 @@ static unsigned long common_pin_config[] = {
 
 static unsigned long em_x270_pin_config[] = {
 	GPIO13_GPIO,				/* MMC card detect */
+	GPIO16_GPIO,				/* USB hub reset */
 	GPIO56_GPIO,				/* NAND Ready/Busy */
 	GPIO93_GPIO	| MFP_LPM_DRIVE_LOW,	/* Camera reset */
 	GPIO95_GPIO,				/* MMC Write protect */
 };
 
 static unsigned long exeda_pin_config[] = {
+	GPIO10_GPIO,				/* USB hub reset */
 	GPIO20_GPIO,				/* NAND Ready/Busy */
 	GPIO38_GPIO	| MFP_LPM_DRIVE_LOW,	/* SD slot power */
 	GPIO95_GPIO,				/* touchpad IRQ */
@@ -471,18 +477,79 @@ static inline void em_x270_init_nor(void) {}
 
 /* PXA27x OHCI controller setup */
 #if defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE)
+static struct regulator *em_x270_usb_ldo;
+
+static int em_x270_usb_hub_init(void)
+{
+	int err;
+
+	em_x270_usb_ldo = regulator_get(NULL, "vcc usb");
+	if (IS_ERR(em_x270_usb_ldo))
+		return PTR_ERR(em_x270_usb_ldo);
+
+	err = gpio_request(GPIO9_USB_VBUS_EN, "vbus en");
+	if (err)
+		goto err_free_usb_ldo;
+
+	err = gpio_request(usb_hub_reset, "hub rst");
+	if (err)
+		goto err_free_vbus_gpio;
+
+	/* USB Hub power-on and reset */
+	gpio_direction_output(usb_hub_reset, 0);
+	regulator_enable(em_x270_usb_ldo);
+	gpio_set_value(usb_hub_reset, 1);
+	gpio_set_value(usb_hub_reset, 0);
+	regulator_disable(em_x270_usb_ldo);
+	regulator_enable(em_x270_usb_ldo);
+	gpio_set_value(usb_hub_reset, 1);
+
+	/* enable VBUS */
+	gpio_direction_output(GPIO9_USB_VBUS_EN, 1);
+
+	return 0;
+
+err_free_vbus_gpio:
+	gpio_free(GPIO9_USB_VBUS_EN);
+err_free_usb_ldo:
+	regulator_put(em_x270_usb_ldo);
+
+	return err;
+}
+
 static int em_x270_ohci_init(struct device *dev)
 {
+	int err;
+
+	/* we don't want to entirely disable USB if the HUB init failed */
+	err = em_x270_usb_hub_init();
+	if (err)
+		pr_err("USB Hub initialization failed: %d\n", err);
+
 	/* enable port 2 transiever */
 	UP2OCR = UP2OCR_HXS | UP2OCR_HXOE;
 
 	return 0;
 }
 
+static void em_x270_ohci_exit(struct device *dev)
+{
+	gpio_free(usb_hub_reset);
+	gpio_free(GPIO9_USB_VBUS_EN);
+
+	if (!IS_ERR(em_x270_usb_ldo)) {
+		if (regulator_is_enabled(em_x270_usb_ldo))
+			regulator_disable(em_x270_usb_ldo);
+
+		regulator_put(em_x270_usb_ldo);
+	}
+}
+
 static struct pxaohci_platform_data em_x270_ohci_platform_data = {
 	.port_mode	= PMM_PERPORT_MODE,
 	.flags		= ENABLE_PORT1 | ENABLE_PORT2 | POWER_CONTROL_LOW,
 	.init		= em_x270_ohci_init,
+	.exit		= em_x270_ohci_exit,
 };
 
 static void __init em_x270_init_ohci(void)
@@ -1129,6 +1196,7 @@ static void __init em_x270_module_init(void)
 	nand_rb = GPIO56_NAND_RB;
 	dm9000_flags = DM9000_PLATF_32BITONLY;
 	cam_reset = GPIO93_CAM_RESET;
+	usb_hub_reset = GPIO16_USB_HUB_RESET;
 }
 
 static void __init em_x270_exeda_init(void)
@@ -1140,6 +1208,7 @@ static void __init em_x270_exeda_init(void)
 	nand_rb = GPIO20_NAND_RB;
 	dm9000_flags = DM9000_PLATF_16BITONLY;
 	cam_reset = GPIO130_CAM_RESET;
+	usb_hub_reset = GPIO10_USB_HUB_RESET;
 }
 
 static void __init em_x270_init(void)
-- 
cgit v0.10.2


From 9599d1dbc2264355ecad0bae76bf6c1236c84c73 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 26 May 2009 09:41:46 +0300
Subject: [ARM] pxa/em-x270, cm-x270: use DEEPSLEEP for PM_SUSPEND_MEM

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index 34576ba..1d2cec2 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -335,6 +335,10 @@ void __init cmx270_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(cmx270_pin_config));
 
+#ifdef CONFIG_PM
+	pxa27x_set_pwrmode(PWRMODE_DEEPSLEEP);
+#endif
+
 	cmx270_init_rtc();
 	cmx270_init_mmc();
 	cmx270_init_ohci();
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 3ff1fec..243e080 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -1215,6 +1215,10 @@ static void __init em_x270_init(void)
 {
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(common_pin_config));
 
+#ifdef CONFIG_PM
+	pxa27x_set_pwrmode(PWRMODE_DEEPSLEEP);
+#endif
+
 	if (machine_is_em_x270())
 		em_x270_module_init();
 	else if (machine_is_exeda())
-- 
cgit v0.10.2


From b3992b665379c8de6f47cd0b0e5b3004d26b05be Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 4 Jun 2009 10:44:51 +0300
Subject: [ARM] pxa/cm-x300: use OBM configuration for NAND flash

CM-X300 can be assembled with different NAND flashes from different
manufacturers. Adding their configuration to the kernel is impractical,
therefore we will use the default NAND controller settings set up by the
bootloader.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index a97f87a..5295304 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -265,6 +265,7 @@ static struct mtd_partition cm_x300_nand_partitions[] = {
 
 static struct pxa3xx_nand_platform_data cm_x300_nand_info = {
 	.enable_arbiter	= 1,
+	.keep_config	= 1,
 	.parts		= cm_x300_nand_partitions,
 	.nr_parts	= ARRAY_SIZE(cm_x300_nand_partitions),
 };
-- 
cgit v0.10.2


From 1858ced3f990777defda93ad10ffa81c28ddc2e2 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 4 Jun 2009 10:44:52 +0300
Subject: [ARM] pxa/cm-x300: add rtc-v3020 device registration

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 5295304..cd39fa2 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -21,6 +21,7 @@
 #include <linux/gpio.h>
 #include <linux/dm9000.h>
 #include <linux/leds.h>
+#include <linux/rtc-v3020.h>
 
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
@@ -46,6 +47,11 @@
 
 #define	CM_X300_MMC2_IRQ	IRQ_GPIO(GPIO82_MMC2_IRQ)
 
+#define GPIO95_RTC_CS		(95)
+#define GPIO96_RTC_WR		(96)
+#define GPIO97_RTC_RD		(97)
+#define GPIO98_RTC_IO		(98)
+
 static mfp_cfg_t cm_x300_mfp_cfg[] __initdata = {
 	/* LCD */
 	GPIO54_LCD_LDD_0,
@@ -135,6 +141,12 @@ static mfp_cfg_t cm_x300_mfp_cfg[] __initdata = {
 	GPIO85_GPIO,			/* MMC WP */
 	GPIO99_GPIO,			/* Ethernet IRQ */
 
+	/* RTC GPIOs */
+	GPIO95_GPIO,			/* RTC CS */
+	GPIO96_GPIO,			/* RTC WR */
+	GPIO97_GPIO,			/* RTC RD */
+	GPIO98_GPIO,			/* RTC IO */
+
 	/* Standard I2C */
 	GPIO21_I2C_SCL,
 	GPIO22_I2C_SDA,
@@ -442,6 +454,31 @@ static void __init cm_x300_init_i2c(void)
 static inline void cm_x300_init_i2c(void) {}
 #endif
 
+#if defined(CONFIG_RTC_DRV_V3020) || defined(CONFIG_RTC_DRV_V3020_MODULE)
+struct v3020_platform_data cm_x300_v3020_pdata = {
+	.use_gpio	= 1,
+	.gpio_cs	= GPIO95_RTC_CS,
+	.gpio_wr	= GPIO96_RTC_WR,
+	.gpio_rd	= GPIO97_RTC_RD,
+	.gpio_io	= GPIO98_RTC_IO,
+};
+
+static struct platform_device cm_x300_rtc_device = {
+	.name		= "v3020",
+	.id		= -1,
+	.dev		= {
+		.platform_data = &cm_x300_v3020_pdata,
+	}
+};
+
+static void __init cm_x300_init_rtc(void)
+{
+	platform_device_register(&cm_x300_rtc_device);
+}
+#else
+static inline void cm_x300_init_rtc(void) {}
+#endif
+
 static void __init cm_x300_init(void)
 {
 	/* board-processor specific GPIO initialization */
@@ -454,6 +491,7 @@ static void __init cm_x300_init(void)
 	cm_x300_init_nand();
 	cm_x300_init_leds();
 	cm_x300_init_i2c();
+	cm_x300_init_rtc();
 }
 
 MACHINE_START(CM_X300, "CM-X300 module")
-- 
cgit v0.10.2


From 321d9eb32a903edc30a499262ad5b42cda98656c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 4 Jun 2009 10:44:53 +0300
Subject: [ARM] pxa/cm-x300: use STUART for uncompressor

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/include/mach/uncompress.h b/arch/arm/mach-pxa/include/mach/uncompress.h
index c6cfd7c..b547494 100644
--- a/arch/arm/mach-pxa/include/mach/uncompress.h
+++ b/arch/arm/mach-pxa/include/mach/uncompress.h
@@ -36,7 +36,8 @@ static inline void flush(void)
 static inline void arch_decomp_setup(void)
 {
 	if (machine_is_littleton() || machine_is_intelmote2()
-	    || machine_is_csb726() || machine_is_stargate2())
+	    || machine_is_csb726() || machine_is_stargate2()
+	    || machine_is_cm_x300())
 		UART = STUART;
 }
 
-- 
cgit v0.10.2


From b5a5c474b037ab61930c16b7a8e42874b54e5cb8 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 4 Jun 2009 10:44:54 +0300
Subject: [ARM] pxa/cm-x300: add .fixup method to enable second DRAM bank

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index cd39fa2..465da26 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -28,6 +28,7 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
+#include <asm/setup.h>
 
 #include <mach/pxa300.h>
 #include <mach/pxafb.h>
@@ -494,6 +495,18 @@ static void __init cm_x300_init(void)
 	cm_x300_init_rtc();
 }
 
+static void __init cm_x300_fixup(struct machine_desc *mdesc, struct tag *tags,
+				 char **cmdline, struct meminfo *mi)
+{
+	mi->nr_banks = 2;
+	mi->bank[0].start = 0xa0000000;
+	mi->bank[0].node = 0;
+	mi->bank[0].size = (64*1024*1024);
+	mi->bank[1].start = 0xc0000000;
+	mi->bank[1].node = 0;
+	mi->bank[1].size = (64*1024*1024);
+}
+
 MACHINE_START(CM_X300, "CM-X300 module")
 	.phys_io	= 0x40000000,
 	.boot_params	= 0xa0000100,
@@ -502,4 +515,5 @@ MACHINE_START(CM_X300, "CM-X300 module")
 	.init_irq	= pxa3xx_init_irq,
 	.timer		= &pxa_timer,
 	.init_machine	= cm_x300_init,
+	.fixup		= cm_x300_fixup,
 MACHINE_END
-- 
cgit v0.10.2


From f6394e268fc541c274be8fa30ccb04b3e2f0450f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 4 Jun 2009 10:44:55 +0300
Subject: [ARM] pxa/cm-x300: update defconfig

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/configs/cm_x300_defconfig b/arch/arm/configs/cm_x300_defconfig
index 227da08..d18d21b 100644
--- a/arch/arm/configs/cm_x300_defconfig
+++ b/arch/arm/configs/cm_x300_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc3
-# Tue Aug 19 11:26:54 2008
+# Linux kernel version: 2.6.30-rc8
+# Thu Jun  4 09:53:21 2009
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
@@ -22,8 +22,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_SUPPORTS_AOUT=y
-CONFIG_ZONE_DMA=y
 CONFIG_ARCH_MTD_XIP=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_VECTORS_BASE=0xffff0000
@@ -44,15 +42,24 @@ CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=18
-# CONFIG_CGROUPS is not set
 CONFIG_GROUP_SCHED=y
 CONFIG_FAIR_GROUP_SCHED=y
 # CONFIG_RT_GROUP_SCHED is not set
 CONFIG_USER_SCHED=y
 # CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 # CONFIG_RELAY is not set
@@ -61,31 +68,37 @@ CONFIG_NAMESPACES=y
 # CONFIG_IPC_NS is not set
 # CONFIG_USER_NS is not set
 # CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
+CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_SLUB_DEBUG=y
+CONFIG_COMPAT_BRK=y
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -93,19 +106,13 @@ CONFIG_SLUB=y
 # CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
-# CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is not set
-# CONFIG_HAVE_IOREMAP_PROT is not set
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
-# CONFIG_HAVE_ARCH_TRACEHOOK is not set
-# CONFIG_HAVE_DMA_ATTRS is not set
-# CONFIG_USE_GENERIC_SMP_HELPERS is not set
 CONFIG_HAVE_CLK=y
-CONFIG_PROC_PAGE_MONITOR=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
 # CONFIG_MODULE_FORCE_LOAD is not set
@@ -113,11 +120,8 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -133,7 +137,7 @@ CONFIG_IOSCHED_CFQ=y
 CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="cfq"
-CONFIG_CLASSIC_RCU=y
+CONFIG_FREEZER=y
 
 #
 # System Type
@@ -143,10 +147,10 @@ CONFIG_CLASSIC_RCU=y
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
 # CONFIG_ARCH_AT91 is not set
-# CONFIG_ARCH_CLPS7500 is not set
 # CONFIG_ARCH_CLPS711X is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_FOOTBRIDGE is not set
 # CONFIG_ARCH_NETX is not set
 # CONFIG_ARCH_H720X is not set
@@ -167,14 +171,17 @@ CONFIG_CLASSIC_RCU=y
 # CONFIG_ARCH_ORION5X is not set
 # CONFIG_ARCH_PNX4008 is not set
 CONFIG_ARCH_PXA=y
+# CONFIG_ARCH_MMP is not set
 # CONFIG_ARCH_RPC is not set
 # CONFIG_ARCH_SA1100 is not set
 # CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
-# CONFIG_ARCH_MSM7X00A is not set
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_W90X900 is not set
 
 #
 # Intel PXA2xx/PXA3xx Implementations
@@ -187,16 +194,24 @@ CONFIG_CPU_PXA300=y
 # CONFIG_CPU_PXA310 is not set
 # CONFIG_CPU_PXA320 is not set
 # CONFIG_CPU_PXA930 is not set
+# CONFIG_CPU_PXA935 is not set
 # CONFIG_ARCH_GUMSTIX is not set
+# CONFIG_MACH_INTELMOTE2 is not set
 # CONFIG_ARCH_LUBBOCK is not set
 # CONFIG_MACH_LOGICPD_PXA270 is not set
 # CONFIG_MACH_MAINSTONE is not set
+# CONFIG_MACH_MP900C is not set
 # CONFIG_ARCH_PXA_IDP is not set
 # CONFIG_PXA_SHARPSL is not set
+# CONFIG_ARCH_VIPER is not set
 # CONFIG_ARCH_PXA_ESERIES is not set
-# CONFIG_MACH_TRIZEPS4 is not set
+# CONFIG_TRIZEPS_PXA is not set
+# CONFIG_MACH_H5000 is not set
 # CONFIG_MACH_EM_X270 is not set
+# CONFIG_MACH_EXEDA is not set
 # CONFIG_MACH_COLIBRI is not set
+# CONFIG_MACH_COLIBRI300 is not set
+# CONFIG_MACH_COLIBRI320 is not set
 # CONFIG_MACH_ZYLONITE is not set
 # CONFIG_MACH_LITTLETON is not set
 # CONFIG_MACH_TAVOREVB is not set
@@ -204,19 +219,15 @@ CONFIG_CPU_PXA300=y
 # CONFIG_MACH_ARMCORE is not set
 CONFIG_MACH_CM_X300=y
 # CONFIG_MACH_MAGICIAN is not set
+# CONFIG_MACH_HIMALAYA is not set
+# CONFIG_MACH_MIOA701 is not set
 # CONFIG_MACH_PCM027 is not set
 # CONFIG_ARCH_PXA_PALM is not set
+# CONFIG_MACH_CSB726 is not set
 # CONFIG_PXA_EZX is not set
 CONFIG_PXA3xx=y
 # CONFIG_PXA_PWM is not set
-
-#
-# Boot options
-#
-
-#
-# Power management
-#
+CONFIG_PLAT_PXA=y
 
 #
 # Processor Type
@@ -241,6 +252,7 @@ CONFIG_IO_36=y
 CONFIG_OUTER_CACHE=y
 CONFIG_CACHE_XSC3L2=y
 CONFIG_IWMMXT=y
+CONFIG_COMMON_CLKDEV=y
 
 #
 # Bus support
@@ -256,25 +268,33 @@ CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
 # CONFIG_PREEMPT is not set
 CONFIG_HZ=100
 CONFIG_AEABI=y
 CONFIG_OABI_COMPAT=y
-# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+# CONFIG_ARCH_HAS_HOLES_MEMORYMODEL is not set
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+CONFIG_HIGHMEM=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
 CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4096
-# CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_ALIGNMENT_TRAP=y
 
 #
@@ -287,7 +307,7 @@ CONFIG_CMDLINE="root=/dev/mtdblock5 rootfstype=jffs2 console=ttyS2,38400"
 # CONFIG_KEXEC is not set
 
 #
-# CPU Frequency scaling
+# CPU Power Management
 #
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_TABLE=y
@@ -304,6 +324,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
 # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+# CONFIG_CPU_IDLE is not set
 
 #
 # Floating point emulation
@@ -320,6 +341,8 @@ CONFIG_FPE_NWFPE=y
 # Userspace binary formats
 #
 CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
 # CONFIG_BINFMT_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
 
@@ -376,6 +399,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
 # CONFIG_VLAN_8021Q is not set
 # CONFIG_DECNET is not set
 # CONFIG_LLC2 is not set
@@ -385,7 +409,9 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 # CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
 
 #
 # Network testing
@@ -407,8 +433,7 @@ CONFIG_BT_HIDP=m
 #
 # Bluetooth device drivers
 #
-CONFIG_BT_HCIUSB=m
-CONFIG_BT_HCIUSB_SCO=y
+# CONFIG_BT_HCIBTUSB is not set
 # CONFIG_BT_HCIBTSDIO is not set
 # CONFIG_BT_HCIUART is not set
 # CONFIG_BT_HCIBCM203X is not set
@@ -416,19 +441,15 @@ CONFIG_BT_HCIUSB_SCO=y
 # CONFIG_BT_HCIBFUSB is not set
 # CONFIG_BT_HCIVHCI is not set
 # CONFIG_AF_RXRPC is not set
-
-#
-# Wireless
-#
+CONFIG_WIRELESS=y
 # CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
+CONFIG_LIB80211=m
+# CONFIG_LIB80211_DEBUG is not set
 # CONFIG_MAC80211 is not set
-CONFIG_IEEE80211=m
-# CONFIG_IEEE80211_DEBUG is not set
-CONFIG_IEEE80211_CRYPT_WEP=m
-CONFIG_IEEE80211_CRYPT_CCMP=m
-CONFIG_IEEE80211_CRYPT_TKIP=m
+# CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
 
@@ -453,6 +474,7 @@ CONFIG_MTD=y
 # CONFIG_MTD_DEBUG is not set
 # CONFIG_MTD_CONCAT is not set
 CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
 # CONFIG_MTD_REDBOOT_PARTS is not set
 # CONFIG_MTD_CMDLINE_PARTS is not set
 # CONFIG_MTD_AFS_PARTS is not set
@@ -494,7 +516,6 @@ CONFIG_MTD_CFI_I2=y
 # Mapping drivers for chip access
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
-# CONFIG_MTD_SHARP_SL is not set
 # CONFIG_MTD_PLATRAM is not set
 
 #
@@ -516,16 +537,23 @@ CONFIG_MTD_NAND=y
 # CONFIG_MTD_NAND_ECC_SMC is not set
 # CONFIG_MTD_NAND_MUSEUM_IDS is not set
 # CONFIG_MTD_NAND_H1900 is not set
+# CONFIG_MTD_NAND_GPIO is not set
 CONFIG_MTD_NAND_IDS=y
 # CONFIG_MTD_NAND_DISKONCHIP is not set
 # CONFIG_MTD_NAND_SHARPSL is not set
 CONFIG_MTD_NAND_PXA3xx=y
+# CONFIG_MTD_NAND_PXA3xx_BUILTIN is not set
 # CONFIG_MTD_NAND_NANDSIM is not set
 # CONFIG_MTD_NAND_PLATFORM is not set
 # CONFIG_MTD_ALAUDA is not set
 # CONFIG_MTD_ONENAND is not set
 
 #
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
 # UBI - Unsorted block images
 #
 # CONFIG_MTD_UBI is not set
@@ -585,11 +613,15 @@ CONFIG_SCSI_WAIT_SCAN=m
 # CONFIG_SCSI_SRP_ATTRS is not set
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_LIBFCOE is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
 # CONFIG_MACVLAN is not set
@@ -604,11 +636,17 @@ CONFIG_MII=y
 CONFIG_DM9000=y
 CONFIG_DM9000_DEBUGLEVEL=0
 CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL=y
+# CONFIG_ETHOC is not set
 # CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
 # CONFIG_IBM_NEW_EMAC_TAH is not set
 # CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
@@ -624,10 +662,13 @@ CONFIG_LIBERTAS_SDIO=m
 # CONFIG_LIBERTAS_DEBUG is not set
 # CONFIG_USB_ZD1201 is not set
 # CONFIG_USB_NET_RNDIS_WLAN is not set
-# CONFIG_IWLWIFI_LEDS is not set
 # CONFIG_HOSTAP is not set
 
 #
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+
+#
 # USB Network Adapters
 #
 # CONFIG_USB_CATC is not set
@@ -677,18 +718,21 @@ CONFIG_KEYBOARD_PXA27x=m
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
 # CONFIG_TOUCHSCREEN_MTOUCH is not set
 # CONFIG_TOUCHSCREEN_INEXIO is not set
 # CONFIG_TOUCHSCREEN_MK712 is not set
 # CONFIG_TOUCHSCREEN_PENMOUNT is not set
 # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
 # CONFIG_TOUCHSCREEN_TOUCHWIN is not set
-# CONFIG_TOUCHSCREEN_UCB1400 is not set
 # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
 # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
 # CONFIG_INPUT_MISC is not set
 
 #
@@ -721,10 +765,10 @@ CONFIG_SERIAL_PXA_CONSOLE=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 # CONFIG_HW_RANDOM is not set
-# CONFIG_NVRAM is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
 # CONFIG_TCG_TPM is not set
@@ -763,12 +807,8 @@ CONFIG_I2C_PXA=y
 # Miscellaneous I2C Chip support
 #
 # CONFIG_DS1682 is not set
-# CONFIG_EEPROM_AT24 is not set
-# CONFIG_EEPROM_LEGACY is not set
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_TPS65010 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -782,6 +822,10 @@ CONFIG_GPIOLIB=y
 # CONFIG_GPIO_SYSFS is not set
 
 #
+# Memory mapped GPIO expanders:
+#
+
+#
 # I2C GPIO expanders:
 #
 # CONFIG_GPIO_MAX732X is not set
@@ -798,12 +842,14 @@ CONFIG_GPIO_PCA953X=y
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
 # Sonics Silicon Backplane
 #
-CONFIG_SSB_POSSIBLE=y
 # CONFIG_SSB is not set
 
 #
@@ -811,12 +857,19 @@ CONFIG_SSB_POSSIBLE=y
 #
 # CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
 # CONFIG_HTC_EGPIO is not set
 # CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_MFD_T7L66XB is not set
 # CONFIG_MFD_TC6387XB is not set
 # CONFIG_MFD_TC6393XB is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
 
 #
 # Multimedia devices
@@ -842,6 +895,7 @@ CONFIG_SSB_POSSIBLE=y
 CONFIG_FB=y
 # CONFIG_FIRMWARE_EDID is not set
 # CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
 CONFIG_FB_CFB_FILLRECT=y
 CONFIG_FB_CFB_COPYAREA=y
 CONFIG_FB_CFB_IMAGEBLIT=y
@@ -862,12 +916,15 @@ CONFIG_FB_CFB_IMAGEBLIT=y
 #
 # CONFIG_FB_S1D13XXX is not set
 CONFIG_FB_PXA=y
+# CONFIG_FB_PXA_OVERLAY is not set
 # CONFIG_FB_PXA_SMARTPANEL is not set
 # CONFIG_FB_PXA_PARAMETERS is not set
 # CONFIG_FB_MBX is not set
 # CONFIG_FB_W100 is not set
-# CONFIG_FB_AM200EPD is not set
 # CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
@@ -899,9 +956,11 @@ CONFIG_LOGO_LINUX_MONO=y
 CONFIG_LOGO_LINUX_VGA16=y
 CONFIG_LOGO_LINUX_CLUT224=y
 CONFIG_SOUND=m
+# CONFIG_SOUND_OSS_CORE is not set
 CONFIG_SND=m
 CONFIG_SND_TIMER=m
 CONFIG_SND_PCM=m
+CONFIG_SND_JACK=y
 # CONFIG_SND_SEQUENCER is not set
 # CONFIG_SND_MIXER_OSS is not set
 # CONFIG_SND_PCM_OSS is not set
@@ -916,12 +975,15 @@ CONFIG_SND_DRIVERS=y
 # CONFIG_SND_SERIAL_U16550 is not set
 # CONFIG_SND_MPU401 is not set
 CONFIG_SND_ARM=y
+CONFIG_SND_PXA2XX_LIB=m
 # CONFIG_SND_PXA2XX_AC97 is not set
 CONFIG_SND_USB=y
 # CONFIG_SND_USB_AUDIO is not set
 # CONFIG_SND_USB_CAIAQ is not set
 CONFIG_SND_SOC=m
 CONFIG_SND_PXA2XX_SOC=m
+CONFIG_SND_SOC_I2C_AND_SPI=m
+# CONFIG_SND_SOC_ALL_CODECS is not set
 # CONFIG_SOUND_PRIME is not set
 CONFIG_HID_SUPPORT=y
 CONFIG_HID=y
@@ -932,9 +994,39 @@ CONFIG_HID_DEBUG=y
 # USB Input Devices
 #
 CONFIG_USB_HID=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
+# CONFIG_HID_PID is not set
 # CONFIG_USB_HIDDEV is not set
+
+#
+# Special HID drivers
+#
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
+CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LOGITECH=y
+# CONFIG_LOGITECH_FF is not set
+# CONFIG_LOGIRUMBLEPAD2_FF is not set
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+# CONFIG_PANTHERLORD_FF is not set
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+# CONFIG_GREENASIA_FF is not set
+CONFIG_HID_TOPSEED=y
+# CONFIG_THRUSTMASTER_FF is not set
+# CONFIG_ZEROPLUS_FF is not set
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -952,11 +1044,14 @@ CONFIG_USB_DEVICEFS=y
 # CONFIG_USB_SUSPEND is not set
 # CONFIG_USB_OTG is not set
 CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
 
 #
 # USB Host Controller Drivers
 #
 # CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_OXU210HP_HCD is not set
 # CONFIG_USB_ISP116X_HCD is not set
 # CONFIG_USB_ISP1760_HCD is not set
 CONFIG_USB_OHCI_HCD=y
@@ -965,6 +1060,7 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 # CONFIG_USB_SL811_HCD is not set
 # CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_HWA_HCD is not set
 # CONFIG_USB_MUSB_HDRC is not set
 
 #
@@ -973,20 +1069,20 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_PRINTER is not set
 # CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# may also be needed; see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
 # CONFIG_USB_STORAGE_ISD200 is not set
-# CONFIG_USB_STORAGE_DPCM is not set
 # CONFIG_USB_STORAGE_USBAT is not set
 # CONFIG_USB_STORAGE_SDDR09 is not set
 # CONFIG_USB_STORAGE_SDDR55 is not set
@@ -994,7 +1090,6 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_ALAUDA is not set
 # CONFIG_USB_STORAGE_ONETOUCH is not set
 # CONFIG_USB_STORAGE_KARMA is not set
-# CONFIG_USB_STORAGE_SIERRA is not set
 # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
 # CONFIG_USB_LIBUSUAL is not set
 
@@ -1015,6 +1110,7 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_EMI62 is not set
 # CONFIG_USB_EMI26 is not set
 # CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
 # CONFIG_USB_RIO500 is not set
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LCD is not set
@@ -1022,7 +1118,6 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1031,13 +1126,20 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_IOWARRIOR is not set
 # CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_NOP_USB_XCEIV is not set
 CONFIG_MMC=m
 # CONFIG_MMC_DEBUG is not set
 # CONFIG_MMC_UNSAFE_RESUME is not set
 
 #
-# MMC/SD Card Drivers
+# MMC/SD/SDIO Card Drivers
 #
 CONFIG_MMC_BLOCK=m
 CONFIG_MMC_BLOCK_BOUNCE=y
@@ -1045,10 +1147,12 @@ CONFIG_MMC_BLOCK_BOUNCE=y
 # CONFIG_MMC_TEST is not set
 
 #
-# MMC/SD Host Controller Drivers
+# MMC/SD/SDIO Host Controller Drivers
 #
 CONFIG_MMC_PXA=m
 # CONFIG_MMC_SDHCI is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 
@@ -1057,7 +1161,10 @@ CONFIG_LEDS_CLASS=y
 #
 # CONFIG_LEDS_PCA9532 is not set
 CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_GPIO_PLATFORM=y
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1065,7 +1172,13 @@ CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_TRIGGERS=y
 # CONFIG_LEDS_TRIGGER_TIMER is not set
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
+# CONFIG_LEDS_TRIGGER_GPIO is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 CONFIG_RTC_LIB=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_HCTOSYS=y
@@ -1096,6 +1209,7 @@ CONFIG_RTC_INTF_DEV=y
 # CONFIG_RTC_DRV_M41T80 is not set
 # CONFIG_RTC_DRV_S35390A is not set
 # CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
 
 #
 # SPI RTC drivers
@@ -1105,28 +1219,27 @@ CONFIG_RTC_INTF_DEV=y
 # Platform RTC drivers
 #
 # CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1286 is not set
 # CONFIG_RTC_DRV_DS1511 is not set
 # CONFIG_RTC_DRV_DS1553 is not set
 # CONFIG_RTC_DRV_DS1742 is not set
 # CONFIG_RTC_DRV_STK17TA8 is not set
 # CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
 # CONFIG_RTC_DRV_M48T59 is not set
-# CONFIG_RTC_DRV_V3020 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+CONFIG_RTC_DRV_V3020=y
 
 #
 # on-CPU RTC drivers
 #
 CONFIG_RTC_DRV_SA1100=y
+# CONFIG_RTC_DRV_PXA is not set
 # CONFIG_DMADEVICES is not set
-
-#
-# Voltage and Current regulators
-#
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_REGULATOR is not set
-# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
-# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
-# CONFIG_REGULATOR_BQ24022 is not set
 # CONFIG_UIO is not set
+# CONFIG_STAGING is not set
 
 #
 # File systems
@@ -1135,15 +1248,18 @@ CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
 # CONFIG_JBD_DEBUG is not set
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 CONFIG_FS_POSIX_ACL=y
+CONFIG_FILE_LOCKING=y
 # CONFIG_XFS_FS is not set
 # CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
 CONFIG_DNOTIFY=y
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
@@ -1153,6 +1269,11 @@ CONFIG_INOTIFY_USER=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -1173,15 +1294,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 #
 CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 # CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
 # CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ADFS_FS is not set
 # CONFIG_AFFS_FS is not set
 # CONFIG_HFS_FS is not set
@@ -1201,6 +1320,7 @@ CONFIG_JFFS2_ZLIB=y
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
 # CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_OMFS_FS is not set
@@ -1209,6 +1329,7 @@ CONFIG_JFFS2_RTIME=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -1313,6 +1434,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
@@ -1329,6 +1451,7 @@ CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
@@ -1336,22 +1459,38 @@ CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
-CONFIG_FRAME_POINTER=y
+# CONFIG_DEBUG_NOTIFIERS is not set
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_HAVE_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-# CONFIG_FTRACE is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
+CONFIG_ARM_UNWIND=y
 CONFIG_DEBUG_USER=y
 # CONFIG_DEBUG_ERRORS is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
@@ -1363,17 +1502,28 @@ CONFIG_DEBUG_LL=y
 #
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 CONFIG_CRYPTO=y
 
 #
 # Crypto core or helper
 #
+# CONFIG_CRYPTO_FIPS is not set
 CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 # CONFIG_CRYPTO_AUTHENC is not set
 # CONFIG_CRYPTO_TEST is not set
@@ -1442,15 +1592,21 @@ CONFIG_CRYPTO_DES=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
 #
 CONFIG_BITREVERSE=y
-# CONFIG_GENERIC_FIND_FIRST_BIT is not set
-# CONFIG_GENERIC_FIND_NEXT_BIT is not set
+CONFIG_GENERIC_FIND_LAST_BIT=y
 # CONFIG_CRC_CCITT is not set
 # CONFIG_CRC16 is not set
 CONFIG_CRC_T10DIF=y
@@ -1460,7 +1616,10 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From 1938a150c25bf7c2c47182e753a1038945b70b0e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 5 Jun 2009 01:00:26 -0400
Subject: ext4: Avoid leaking blocks after a block allocation failure

We should add inode to the orphan list in the same transaction
as block allocation.  This ensures that if we crash after a failed
block allocation and before we do a vmtruncate we don't leak block
(ie block marked as used in bitmap but not claimed by the inode).

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
CC:  Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 17ed0d2..8d21588 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1459,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
 				struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
-	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, needed_blocks;
 	handle_t *handle;
 	int retries = 0;
 	struct page *page;
@@ -1470,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
 		   "dev %s ino %lu pos %llu len %u flags %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, flags);
+	/*
+	 * Reserve one block more for addition to orphan list in case
+	 * we allocate blocks but write fails for some reason
+	 */
+	needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
  	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
@@ -1503,15 +1508,30 @@ retry:
 
 	if (ret) {
 		unlock_page(page);
-		ext4_journal_stop(handle);
 		page_cache_release(page);
 		/*
 		 * block_write_begin may have instantiated a few blocks
 		 * outside i_size.  Trim these off again. Don't need
 		 * i_size_read because we hold i_mutex.
+		 *
+		 * Add inode to orphan list in case we crash before
+		 * truncate finishes
 		 */
 		if (pos + len > inode->i_size)
+			ext4_orphan_add(handle, inode);
+
+		ext4_journal_stop(handle);
+		if (pos + len > inode->i_size) {
 			vmtruncate(inode, inode->i_size);
+			/* 
+			 * If vmtruncate failed early the inode might
+			 * still be on the orphan list; we need to
+			 * make sure the inode is removed from the
+			 * orphan list in that case.
+			 */
+			if (inode->i_nlink)
+				ext4_orphan_del(NULL, inode);
+		}
 	}
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
-- 
cgit v0.10.2


From f8514083cd61daef12fba5ef883ad9352c450428 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 5 Jun 2009 00:56:49 -0400
Subject: ext4: truncate the file properly if we fail to copy data from
 userspace

In generic_perform_write if we fail to copy the user data we don't
update the inode->i_size.  We should truncate the file in the above
case so that we don't have blocks allocated outside inode->i_size.  Add
the inode to orphan list in the same transaction as block allocation
This ensures that if we crash in between the recovery would do the
truncate.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
CC:  Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8d21588..2c10d34 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1549,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 	return ext4_handle_dirty_metadata(handle, NULL, bh);
 }
 
+static int ext4_generic_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	int i_size_changed = 0;
+	struct inode *inode = mapping->host;
+	handle_t *handle = ext4_journal_current_handle();
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos + copied > inode->i_size) {
+		i_size_write(inode, pos + copied);
+		i_size_changed = 1;
+	}
+
+	if (pos + copied >  EXT4_I(inode)->i_disksize) {
+		/* We need to mark inode dirty even if
+		 * new_i_size is less that inode->i_size
+		 * bu greater than i_disksize.(hint delalloc)
+		 */
+		ext4_update_i_disksize(inode, (pos + copied));
+		i_size_changed = 1;
+	}
+	unlock_page(page);
+	page_cache_release(page);
+
+	/*
+	 * Don't mark the inode dirty under page lock. First, it unnecessarily
+	 * makes the holding time of page lock longer. Second, it forces lock
+	 * ordering of page lock and transaction start for journaling
+	 * filesystems.
+	 */
+	if (i_size_changed)
+		ext4_mark_inode_dirty(handle, inode);
+
+	return copied;
+}
+
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
@@ -1572,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file,
 	ret = ext4_jbd2_file_inode(handle, inode);
 
 	if (ret == 0) {
-		loff_t new_i_size;
-
-		new_i_size = pos + copied;
-		if (new_i_size > EXT4_I(inode)->i_disksize) {
-			ext4_update_i_disksize(inode, new_i_size);
-			/* We need to mark inode dirty even if
-			 * new_i_size is less that inode->i_size
-			 * bu greater than i_disksize.(hint delalloc)
-			 */
-			ext4_mark_inode_dirty(handle, inode);
-		}
-
-		ret2 = generic_write_end(file, mapping, pos, len, copied,
+		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 		copied = ret2;
+		if (pos + len > inode->i_size)
+			/* if we have allocated more blocks and copied
+			 * less. We will have blocks allocated outside
+			 * inode->i_size. So truncate them
+			 */
+			ext4_orphan_add(handle, inode);
 		if (ret2 < 0)
 			ret = ret2;
 	}
@@ -1594,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file,
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
+
 	return ret ? ret : copied;
 }
 
@@ -1605,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file,
 	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
-	loff_t new_i_size;
 
 	trace_mark(ext4_writeback_write_end,
 		   "dev %s ino %lu pos %llu len %u copied %u",
 		   inode->i_sb->s_id, inode->i_ino,
 		   (unsigned long long) pos, len, copied);
-	new_i_size = pos + copied;
-	if (new_i_size > EXT4_I(inode)->i_disksize) {
-		ext4_update_i_disksize(inode, new_i_size);
-		/* We need to mark inode dirty even if
-		 * new_i_size is less that inode->i_size
-		 * bu greater than i_disksize.(hint delalloc)
-		 */
-		ext4_mark_inode_dirty(handle, inode);
-	}
-
-	ret2 = generic_write_end(file, mapping, pos, len, copied,
+	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 	copied = ret2;
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	if (ret2 < 0)
 		ret = ret2;
 
@@ -1631,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file,
 	if (!ret)
 		ret = ret2;
 
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
 	return ret ? ret : copied;
 }
 
@@ -1675,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file,
 	}
 
 	unlock_page(page);
+	page_cache_release(page);
+	if (pos + len > inode->i_size)
+		/* if we have allocated more blocks and copied
+		 * less. We will have blocks allocated outside
+		 * inode->i_size. So truncate them
+		 */
+		ext4_orphan_add(handle, inode);
+
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	page_cache_release(page);
+	if (pos + len > inode->i_size) {
+		vmtruncate(inode, inode->i_size);
+		/* 
+		 * If vmtruncate failed early the inode might still be
+		 * on the orphan list; we need to make sure the inode
+		 * is removed from the orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
 
 	return ret ? ret : copied;
 }
-- 
cgit v0.10.2


From 6dc5f2a41759987e35e757ef00192e7b424563bb Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 5 Jun 2009 12:36:28 +1000
Subject: perf_counter: Fix lockup with interrupting counters

Commit 8e3747c1 ("perf_counter: Change data head from u32 to u64")
changed the type of 'head' in struct perf_mmap_data from atomic_t
to atomic_long_t, but missed converting one use of atomic_read on
it to atomic_long_read.  The effect of using atomic_read rather than
atomic_long_read on powerpc (and other big-endian architectures) is
that we get the high half of the 64-bit quantity, resulting in the
cmpxchg retry loop in perf_output_begin spinning forever as soon as
data->head becomes non-zero.  On little-endian architectures such as
x86 we would get the low half, resulting in a lockup once data->head
becomes greater than 4G.

This fixes it by using atomic_long_read rather than atomic_read.

[ Impact: fix perfcounter lockup on PowerPC / big-endian systems ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18984.33964.21541.743096@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 195712e..a5d3e2a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2234,7 +2234,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	perf_output_lock(handle);
 
 	do {
-		offset = head = atomic_read(&data->head);
+		offset = head = atomic_long_read(&data->head);
 		head += size;
 	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
 
-- 
cgit v0.10.2


From ee7b31fe5c5da8a038b96e54ae9fbd5dcab3b1da Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Fri, 5 Jun 2009 11:37:35 +0800
Subject: perf_counter tools: Fix incorrect printf formats

Otherwise the code does not compile on 32-bit boxes.

builtin-report.c: In function 'map__fprintf':
builtin-report.c:240: error: format '%lx' expects type 'long unsigned int', but argument 3 has type 'uint64_t'
builtin-report.c:240: error: format '%lx' expects type 'long unsigned int', but argument 4 has type 'uint64_t'
builtin-report.c:240: error: format '%lx' expects type 'long unsigned int', but argument 5 has type 'uint64_t'

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090605033735.GA20451@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 1a1028d..eb5424f 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -236,7 +236,7 @@ static int map__overlap(struct map *l, struct map *r)
 
 static size_t map__fprintf(struct map *self, FILE *fp)
 {
-	return fprintf(fp, " %lx-%lx %lx %s\n",
+	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
 		       self->start, self->end, self->pgoff, self->dso->name);
 }
 
-- 
cgit v0.10.2


From 6d74b86d3c0f9cfa949566a862aaad840e393249 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 09:26:41 +0200
Subject: ALSA: ctxfi - Allow 64bit DMA

emu20kx chips support 64bit address PTE.  Allow the DMA bit mask to
accept 64bit address, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index b7b8e6f..1da1f82 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -15,8 +15,6 @@
  *
  */
 
-#include "cthw20k1.h"
-#include "ct20k1reg.h"
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
@@ -26,8 +24,14 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include "cthw20k1.h"
+#include "ct20k1reg.h"
 
-#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bits */
+#if BITS_PER_LONG == 32
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
+#else
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
+#endif
 
 struct hw20k1 {
 	struct hw hw;
-- 
cgit v0.10.2


From 42a0b31827e4c555efebda7d347cf4ea6b82913a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 09:29:22 +0200
Subject: ALSA: ctxfi - Fix endian-dependent codes

The UAA-mode check in hwct20k1.c is implemented with the endian-dependent
codes.  Fix to be more portable (and readable).

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 1da1f82..fd5f454 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1785,6 +1785,8 @@ static int hw_have_digit_io_switch(struct hw *hw)
 				|| ((subsys_id & 0xf000) == 0x6000));
 }
 
+#define CTLBITS(a, b, c, d)	(((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+
 #define UAA_CFG_PWRSTATUS	0x44
 #define UAA_CFG_SPACE_FLAG	0xA0
 #define UAA_CORE_CHANGE		0x3FFC
@@ -1792,12 +1794,18 @@ static int uaa_to_xfi(struct pci_dev *pci)
 {
 	unsigned int bar0, bar1, bar2, bar3, bar4, bar5;
 	unsigned int cmd, irq, cl_size, l_timer, pwr;
-	unsigned int CTLA, CTLZ, CTLL, CTLX, CTL_, CTLF, CTLi;
 	unsigned int is_uaa = 0;
 	unsigned int data[4] = {0};
 	unsigned int io_base;
 	void *mem_base;
 	int i = 0;
+	const u32 CTLX = CTLBITS('C', 'T', 'L', 'X');
+	const u32 CTL_ = CTLBITS('C', 'T', 'L', '-');
+	const u32 CTLF = CTLBITS('C', 'T', 'L', 'F');
+	const u32 CTLi = CTLBITS('C', 'T', 'L', 'i');
+	const u32 CTLA = CTLBITS('C', 'T', 'L', 'A');
+	const u32 CTLZ = CTLBITS('C', 'T', 'L', 'Z');
+	const u32 CTLL = CTLBITS('C', 'T', 'L', 'L');
 
 	/* By default, Hendrix card UAA Bar0 should be using memory... */
 	io_base = pci_resource_start(pci, 0);
@@ -1805,14 +1813,6 @@ static int uaa_to_xfi(struct pci_dev *pci)
 	if (NULL == mem_base)
 		return -ENOENT;
 
-	CTLX = ___constant_swab32(*((unsigned int *)"CTLX"));
-	CTL_ = ___constant_swab32(*((unsigned int *)"CTL-"));
-	CTLF = ___constant_swab32(*((unsigned int *)"CTLF"));
-	CTLi = ___constant_swab32(*((unsigned int *)"CTLi"));
-	CTLA = ___constant_swab32(*((unsigned int *)"CTLA"));
-	CTLZ = ___constant_swab32(*((unsigned int *)"CTLZ"));
-	CTLL = ___constant_swab32(*((unsigned int *)"CTLL"));
-
 	/* Read current mode from Mode Change Register */
 	for (i = 0; i < 4; i++)
 		data[i] = readl(mem_base + UAA_CORE_CHANGE);
-- 
cgit v0.10.2


From f6d03139d745198b434f65a28aabed524f415a4c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 5 Jun 2009 07:18:57 +0100
Subject: GFS2: Fix locking issue mounting gfs2meta fs

This patch uses sget() to get a reference to the
existing gfs2 sb when mouting the gfs2meta filesystem
(in fact thats just another mount of the gfs2
filesystem with a different root and this interface
is for backward compatibility).

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Benjamin Marzinski <bmarzins@redhat.com>
Tested-by: Benjamin Marzinski <bmarzins@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2cd1164..9da161c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1273,9 +1273,20 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
 	return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
 }
 
+static int test_meta_super(struct super_block *s, void *ptr)
+{
+	struct block_device *bdev = ptr;
+	return (bdev == s->s_bdev);
+}
+
+static int set_meta_super(struct super_block *s, void *ptr)
+{
+	return -EINVAL;
+}
+
 static struct super_block *get_gfs2_sb(const char *dev_name)
 {
-	struct super_block *sb;
+	struct super_block *s;
 	struct path path;
 	int error;
 
@@ -1283,30 +1294,27 @@ static struct super_block *get_gfs2_sb(const char *dev_name)
 	if (error) {
 		printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
 		       dev_name, error);
-		return NULL;
+		return ERR_PTR(-ENOENT);
 	}
-	sb = path.dentry->d_inode->i_sb;
-	if (sb && (sb->s_type == &gfs2_fs_type))
-		atomic_inc(&sb->s_active);
-	else
-		sb = NULL;
+	s = sget(&gfs2_fs_type, test_meta_super, set_meta_super,
+		 path.dentry->d_inode->i_sb->s_bdev);
 	path_put(&path);
-	return sb;
+	return s;
 }
 
 static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 			    const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	struct super_block *sb = NULL;
+	struct super_block *s;
 	struct gfs2_sbd *sdp;
 
-	sb = get_gfs2_sb(dev_name);
-	if (!sb) {
+	s = get_gfs2_sb(dev_name);
+	if (IS_ERR(s)) {
 		printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
-		return -ENOENT;
+		return PTR_ERR(s);
 	}
-	sdp = sb->s_fs_info;
-	mnt->mnt_sb = sb;
+	sdp = s->s_fs_info;
+	mnt->mnt_sb = s;
 	mnt->mnt_root = dget(sdp->sd_master_dir);
 	return 0;
 }
-- 
cgit v0.10.2


From add85a418f8e224d2b09d4a8d14c6c060ba3a79c Mon Sep 17 00:00:00 2001
From: Simon POLETTE <spolette@adetelgroup.com>
Date: Thu, 4 Jun 2009 12:09:01 +0200
Subject: PATCH] mx27pdk: rename mxc_map_io to mx27_map_io

Hi,
Fixed issue in the mxc-master head :

Signed-off-by: Simon POLETTE <spolette@adnlysd018.(none)>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/mx27pdk.c b/arch/arm/mach-mx2/mx27pdk.c
index 90b1fa5..1d9238c 100644
--- a/arch/arm/mach-mx2/mx27pdk.c
+++ b/arch/arm/mach-mx2/mx27pdk.c
@@ -88,7 +88,7 @@ MACHINE_START(MX27_3DS, "Freescale MX27PDK")
 	.phys_io        = AIPI_BASE_ADDR,
 	.io_pg_offst    = ((AIPI_BASE_ADDR_VIRT) >> 18) & 0xfffc,
 	.boot_params    = PHYS_OFFSET + 0x100,
-	.map_io         = mxc_map_io,
+	.map_io         = mx27_map_io,
 	.init_irq       = mxc_init_irq,
 	.init_machine   = mx27pdk_init,
 	.timer          = &mx27pdk_timer,
-- 
cgit v0.10.2


From 361778d6e7cb315b50f3f2469d782e90df7ac2cb Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Fri, 5 Jun 2009 16:42:29 +0800
Subject: [ARM] pxa/littleton: add support for the Micro-SD slot (MMC1)

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>

diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 94a3232..55b3788 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -42,6 +42,7 @@
 #include <mach/pxa300.h>
 #include <mach/pxafb.h>
 #include <mach/ssp.h>
+#include <mach/mmc.h>
 #include <mach/pxa2xx_spi.h>
 #include <plat/i2c.h>
 #include <mach/pxa27x_keypad.h>
@@ -50,6 +51,8 @@
 
 #include "generic.h"
 
+#define GPIO_MMC1_CARD_DETECT	mfp_to_gpio(MFP_PIN_GPIO15)
+
 /* Littleton MFP configurations */
 static mfp_cfg_t littleton_mfp_cfg[] __initdata = {
 	/* LCD */
@@ -98,6 +101,15 @@ static mfp_cfg_t littleton_mfp_cfg[] __initdata = {
 	GPIO123_KP_MKOUT_2,
 	GPIO124_KP_MKOUT_3,
 	GPIO125_KP_MKOUT_4,
+
+	/* MMC1 */
+	GPIO3_MMC1_DAT0,
+	GPIO4_MMC1_DAT1,
+	GPIO5_MMC1_DAT2,
+	GPIO6_MMC1_DAT3,
+	GPIO7_MMC1_CLK,
+	GPIO8_MMC1_CMD,
+	GPIO15_GPIO, /* card detect */
 };
 
 static struct resource smc91x_resources[] = {
@@ -252,6 +264,56 @@ static void __init littleton_init_keypad(void)
 static inline void littleton_init_keypad(void) {}
 #endif
 
+#if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
+static int littleton_mci_init(struct device *dev,
+			      irq_handler_t littleton_detect_int, void *data)
+{
+	int err, gpio_cd = GPIO_MMC1_CARD_DETECT;
+
+	err = gpio_request(gpio_cd, "mmc card detect");
+	if (err)
+		goto err_request_cd;
+
+	gpio_direction_input(gpio_cd);
+
+	err = request_irq(gpio_to_irq(gpio_cd), littleton_detect_int,
+			  IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+			  "mmc card detect", data);
+	if (err) {
+		dev_err(dev, "failed to request card detect IRQ\n");
+		goto err_request_irq;
+	}
+	return 0;
+
+err_request_irq:
+	gpio_free(gpio_cd);
+err_request_cd:
+	return err;
+}
+
+static void littleton_mci_exit(struct device *dev, void *data)
+{
+	int gpio_cd = GPIO_MMC1_CARD_DETECT;
+
+	free_irq(gpio_to_irq(gpio_cd), data);
+	gpio_free(gpio_cd);
+}
+
+static struct pxamci_platform_data littleton_mci_platform_data = {
+	.detect_delay	= 20,
+	.ocr_mask	= MMC_VDD_32_33 | MMC_VDD_33_34,
+	.init 		= littleton_mci_init,
+	.exit		= littleton_mci_exit,
+};
+
+static void __init littleton_init_mmc(void)
+{
+	pxa_set_mci_info(&littleton_mci_platform_data);
+}
+#else
+static inline void littleton_init_mmc(void) {}
+#endif
+
 #if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE)
 static struct mtd_partition littleton_nand_partitions[] = {
 	[0] = {
@@ -392,6 +454,7 @@ static void __init littleton_init(void)
 
 	littleton_init_spi();
 	littleton_init_i2c();
+	littleton_init_mmc();
 	littleton_init_lcd();
 	littleton_init_keypad();
 	littleton_init_nand();
-- 
cgit v0.10.2


From 3e1647c5b54a91a7182e121cfe569e6f0bf167ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20G=C3=BCnther?= <agx@sigxcpu.org>
Date: Fri, 5 Jun 2009 00:47:26 +0200
Subject: ALSA: support Sony Vaio TT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

with BIOS probing only we offer a non functional headphone swith and
volume slider.

Signed-off-by: Guido Günther <agx@sigxcpu.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 29c6125..54b0805 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -159,6 +159,7 @@ ALC883/888
   3stack-6ch-intel Intel DG33* boards
   asus-p5q	ASUS P5Q-EM boards
   mb31		MacBook 3,1
+  sony-vaio-tt  Sony VAIO TT
   auto		auto-config reading BIOS (default)
 
 ALC861/660
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8699b7f..1ab1b92 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -243,6 +243,7 @@ enum {
 	ALC888_ASUS_EEE1601,
 	ALC889A_MB31,
 	ALC1200_ASUS_P5Q,
+	ALC883_SONY_VAIO_TT,
 	ALC883_AUTO,
 	ALC883_MODEL_LAST,
 };
@@ -8134,6 +8135,16 @@ static struct snd_kcontrol_new alc889A_mb31_mixer[] = {
 	{ } /* end */
 };
 
+static struct snd_kcontrol_new alc883_vaiott_mixer[] = {
+	HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+	HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
+	HDA_CODEC_MUTE("Headphone Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost", 0x19, 0, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+	{ } /* end */
+};
+
 static struct hda_bind_ctls alc883_bind_cap_vol = {
 	.ops = &snd_hda_bind_vol,
 	.values = {
@@ -8410,6 +8421,17 @@ static struct hda_verb alc888_6st_dell_verbs[] = {
 	{ }
 };
 
+static struct hda_verb alc883_vaiott_verbs[] = {
+	/* HP */
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+
+	/* enable unsolicited event */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+
+	{ } /* end */
+};
+
 static void alc888_3st_hp_init_hook(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
@@ -8669,6 +8691,16 @@ static void alc888_lenovo_sky_init_hook(struct hda_codec *codec)
 	alc_automute_amp(codec);
 }
 
+static void alc883_vaiott_init_hook(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x17;
+	alc_automute_amp(codec);
+}
+
 /*
  * generic initialization of ADC, input mixers and output mixers
  */
@@ -8884,6 +8916,7 @@ static const char *alc883_models[ALC883_MODEL_LAST] = {
 	[ALC883_3ST_6ch_INTEL]	= "3stack-6ch-intel",
 	[ALC1200_ASUS_P5Q]	= "asus-p5q",
 	[ALC889A_MB31]		= "mb31",
+	[ALC883_SONY_VAIO_TT]	= "sony-vaio-tt",
 	[ALC883_AUTO]		= "auto",
 };
 
@@ -8978,6 +9011,7 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x8086, 0x2503, "82801H", ALC883_MITAC),
 	SND_PCI_QUIRK(0x8086, 0x0022, "DX58SO", ALC883_3ST_6ch_INTEL),
 	SND_PCI_QUIRK(0x8086, 0xd601, "D102GGC", ALC883_3ST_6ch),
+	SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC883_SONY_VAIO_TT),
 	{}
 };
 
@@ -9373,6 +9407,17 @@ static struct alc_config_preset alc883_presets[] = {
 		.unsol_event = alc889A_mb31_unsol_event,
 		.init_hook = alc889A_mb31_automute,
 	},
+	[ALC883_SONY_VAIO_TT] = {
+		.mixers = { alc883_vaiott_mixer },
+		.init_verbs = { alc883_init_verbs, alc883_vaiott_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
+		.channel_mode = alc883_3ST_2ch_modes,
+		.input_mux = &alc883_capture_source,
+		.unsol_event = alc_automute_amp_unsol_event,
+		.init_hook = alc883_vaiott_init_hook,
+	},
 };
 
 
-- 
cgit v0.10.2


From 6bc5874a1ddf98ac0fe6c4eab7d286c11cb1c748 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 12:15:51 +0200
Subject: ALSA: ctxfi - Fix previous fix for 64bit DMA

Remove unneeded substitution to 32bit int to make it really working.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index fd5f454..e530a6d 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1883,18 +1883,17 @@ static int hw_card_start(struct hw *hw)
 	int err = 0;
 	struct pci_dev *pci = hw->pci;
 	u16 subsys_id = 0;
-	unsigned int dma_mask = 0;
 
 	err = pci_enable_device(pci);
 	if (err < 0)
 		return err;
 
 	/* Set DMA transfer mask */
-	dma_mask = CT_XFI_DMA_MASK;
-	if (pci_set_dma_mask(pci, dma_mask) < 0 ||
-	    pci_set_consistent_dma_mask(pci, dma_mask) < 0) {
+	if (pci_set_dma_mask(pci, CT_XFI_DMA_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, CT_XFI_DMA_MASK) < 0) {
 		printk(KERN_ERR "architecture does not support PCI "
-				"busmaster DMA with mask 0x%x\n", dma_mask);
+				"busmaster DMA with mask 0x%llx\n",
+		       CT_XFI_DMA_MASK);
 		err = -ENXIO;
 		goto error1;
 	}
-- 
cgit v0.10.2


From 32cb055b57eab803ea82b76dc913b0378e5af145 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:36 +0800
Subject: agp/intel: Add support for new chipsets

Both desktop and mobile versions are added.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 3686912..7a748fa 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -46,6 +46,10 @@
 #define PCI_DEVICE_ID_INTEL_G45_IG          0x2E22
 #define PCI_DEVICE_ID_INTEL_G41_HB          0x2E30
 #define PCI_DEVICE_ID_INTEL_G41_IG          0x2E32
+#define PCI_DEVICE_ID_INTEL_IGDNG_D_HB	    0x0040
+#define PCI_DEVICE_ID_INTEL_IGDNG_D_IG	    0x0042
+#define PCI_DEVICE_ID_INTEL_IGDNG_M_HB	    0x0044
+#define PCI_DEVICE_ID_INTEL_IGDNG_M_IG	    0x0046
 
 /* cover 915 and 945 variants */
 #define IS_I915 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB || \
@@ -75,7 +79,9 @@
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_Q45_HB || \
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \
-		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB)
+		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \
+		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \
+		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB)
 
 extern int agp_memory_reserved;
 
@@ -1211,6 +1217,8 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size)
 	case PCI_DEVICE_ID_INTEL_Q45_HB:
 	case PCI_DEVICE_ID_INTEL_G45_HB:
 	case PCI_DEVICE_ID_INTEL_G41_HB:
+	case PCI_DEVICE_ID_INTEL_IGDNG_D_HB:
+	case PCI_DEVICE_ID_INTEL_IGDNG_M_HB:
 		*gtt_offset = *gtt_size = MB(2);
 		break;
 	default:
@@ -2186,6 +2194,10 @@ static const struct intel_driver_description {
 	    "G45/G43", NULL, &intel_i965_driver },
 	{ PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
 	    "G41", NULL, &intel_i965_driver },
+	{ PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0,
+	    "IGDNG/D", NULL, &intel_i965_driver },
+	{ PCI_DEVICE_ID_INTEL_IGDNG_M_HB, PCI_DEVICE_ID_INTEL_IGDNG_M_IG, 0,
+	    "IGDNG/M", NULL, &intel_i965_driver },
 	{ 0, 0, 0, NULL, NULL, NULL }
 };
 
@@ -2387,6 +2399,8 @@ static struct pci_device_id agp_intel_pci_table[] = {
 	ID(PCI_DEVICE_ID_INTEL_Q45_HB),
 	ID(PCI_DEVICE_ID_INTEL_G45_HB),
 	ID(PCI_DEVICE_ID_INTEL_G41_HB),
+	ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB),
+	ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB),
 	{ }
 };
 
-- 
cgit v0.10.2


From f250c030a87273f8838a2302bee7c2b4d03e9151 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 13:18:41 +0200
Subject: perf record: Split out counter creation into a helper function

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index bf59df5..7f2d7ce 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -336,65 +336,71 @@ static void synthesize_events(void)
 	closedir(proc);
 }
 
-static void open_counters(int cpu, pid_t pid)
+static int group_fd;
+
+static void create_counter(int counter, int cpu, pid_t pid)
 {
 	struct perf_counter_attr attr;
-	int counter, group_fd;
 	int track = 1;
 
-	if (pid > 0) {
-		pid_synthesize_comm_event(pid, 0);
-		pid_synthesize_mmap_events(pid);
-	}
+	memset(&attr, 0, sizeof(attr));
+	attr.config		= event_id[counter];
+	attr.sample_period	= event_count[counter];
+	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+	attr.mmap		= track;
+	attr.comm		= track;
+	attr.inherit	= (cpu < 0) && inherit;
 
-	group_fd = -1;
-	for (counter = 0; counter < nr_counters; counter++) {
+	track = 0; /* only the first counter needs these */
 
-		memset(&attr, 0, sizeof(attr));
-		attr.config		= event_id[counter];
-		attr.sample_period	= event_count[counter];
-		attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-		attr.mmap		= track;
-		attr.comm		= track;
-		attr.inherit	= (cpu < 0) && inherit;
+	fd[nr_cpu][counter] = sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
 
-		track = 0; // only the first counter needs these
+	if (fd[nr_cpu][counter] < 0) {
+		int err = errno;
 
-		fd[nr_cpu][counter] =
-			sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
+		error("syscall returned with %d (%s)\n",
+				fd[nr_cpu][counter], strerror(err));
+		if (err == EPERM)
+			printf("Are you root?\n");
+		exit(-1);
+	}
+	assert(fd[nr_cpu][counter] >= 0);
+	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
 
-		if (fd[nr_cpu][counter] < 0) {
-			int err = errno;
+	/*
+	 * First counter acts as the group leader:
+	 */
+	if (group && group_fd == -1)
+		group_fd = fd[nr_cpu][counter];
+
+	event_array[nr_poll].fd = fd[nr_cpu][counter];
+	event_array[nr_poll].events = POLLIN;
+	nr_poll++;
+
+	mmap_array[nr_cpu][counter].counter = counter;
+	mmap_array[nr_cpu][counter].prev = 0;
+	mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+	mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+			PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
+	if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+		error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+		exit(-1);
+	}
+}
 
-			error("syscall returned with %d (%s)\n",
-					fd[nr_cpu][counter], strerror(err));
-			if (err == EPERM)
-				printf("Are you root?\n");
-			exit(-1);
-		}
-		assert(fd[nr_cpu][counter] >= 0);
-		fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
+static void open_counters(int cpu, pid_t pid)
+{
+	int counter;
 
-		/*
-		 * First counter acts as the group leader:
-		 */
-		if (group && group_fd == -1)
-			group_fd = fd[nr_cpu][counter];
-
-		event_array[nr_poll].fd = fd[nr_cpu][counter];
-		event_array[nr_poll].events = POLLIN;
-		nr_poll++;
-
-		mmap_array[nr_cpu][counter].counter = counter;
-		mmap_array[nr_cpu][counter].prev = 0;
-		mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
-		mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-				PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
-		if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
-			error("failed to mmap with %d (%s)\n", errno, strerror(errno));
-			exit(-1);
-		}
+	if (pid > 0) {
+		pid_synthesize_comm_event(pid, 0);
+		pid_synthesize_mmap_events(pid);
 	}
+
+	group_fd = -1;
+	for (counter = 0; counter < nr_counters; counter++)
+		create_counter(counter, cpu, pid);
+
 	nr_cpu++;
 }
 
-- 
cgit v0.10.2


From 280da227c870a50f669de0c8d46bfb2c62da9995 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:37 +0800
Subject: drm/i915: Add chipset/feature defines for for new chipsets

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
[anholt: dropped drm_pciids.h hunk to avoid loading an incomplete driver]
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e0fac5f..ded9e78 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -796,7 +796,9 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 		       (dev)->pci_device == 0x2E02 || \
 		       (dev)->pci_device == 0x2E12 || \
 		       (dev)->pci_device == 0x2E22 || \
-		       (dev)->pci_device == 0x2E32)
+		       (dev)->pci_device == 0x2E32 || \
+		       (dev)->pci_device == 0x0042 || \
+		       (dev)->pci_device == 0x0046)
 
 #define IS_I965GM(dev) ((dev)->pci_device == 0x2A02 || \
 			(dev)->pci_device == 0x2A12)
@@ -818,20 +820,26 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 			(dev)->pci_device == 0x29D2 ||  \
 			(IS_IGD(dev)))
 
+#define IS_IGDNG_D(dev)	((dev)->pci_device == 0x0042)
+#define IS_IGDNG_M(dev)	((dev)->pci_device == 0x0046)
+#define IS_IGDNG(dev)	(IS_IGDNG_D(dev) || IS_IGDNG_M(dev))
+
 #define IS_I9XX(dev) (IS_I915G(dev) || IS_I915GM(dev) || IS_I945G(dev) || \
-		      IS_I945GM(dev) || IS_I965G(dev) || IS_G33(dev))
+		      IS_I945GM(dev) || IS_I965G(dev) || IS_G33(dev) || \
+		      IS_IGDNG(dev))
 
 #define IS_MOBILE(dev) (IS_I830(dev) || IS_I85X(dev) || IS_I915GM(dev) || \
 			IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev) || \
-			IS_IGD(dev))
+			IS_IGD(dev) || IS_IGDNG_M(dev))
 
-#define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev))
+#define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev) || \
+				IS_IGDNG(dev))
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
  */
 #define HAS_128_BYTE_Y_TILING(dev) (IS_I9XX(dev) && !(IS_I915G(dev) || \
 						      IS_I915GM(dev)))
-#define SUPPORTS_INTEGRATED_HDMI(dev)	(IS_G4X(dev))
+#define SUPPORTS_INTEGRATED_HDMI(dev)	(IS_G4X(dev) || IS_IGDNG(dev))
 #define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev))
 
 #define PRIMARY_RINGBUFFER_SIZE         (128*1024)
-- 
cgit v0.10.2


From b9055052d3e0388b4a5e8c3e0bbab665c5996f50 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:38 +0800
Subject: drm/i915: Add new chipset register definitions

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 375569d..99681cf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -450,6 +450,13 @@
 #define   PLLB_REF_INPUT_SPREADSPECTRUMIN (3 << 13)
 #define   PLL_REF_INPUT_MASK		(3 << 13)
 #define   PLL_LOAD_PULSE_PHASE_SHIFT		9
+/* IGDNG */
+# define PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT     9
+# define PLL_REF_SDVO_HDMI_MULTIPLIER_MASK      (7 << 9)
+# define PLL_REF_SDVO_HDMI_MULTIPLIER(x)	(((x)-1) << 9)
+# define DPLL_FPA1_P1_POST_DIV_SHIFT            0
+# define DPLL_FPA1_P1_POST_DIV_MASK             0xff
+
 /*
  * Parallel to Serial Load Pulse phase selection.
  * Selects the phase for the 10X DPLL clock for the PCIe
@@ -1517,4 +1524,444 @@
 # define VGA_2X_MODE				(1 << 30)
 # define VGA_PIPE_B_SELECT			(1 << 29)
 
+/* IGDNG */
+
+#define CPU_VGACNTRL	0x41000
+
+#define DIGITAL_PORT_HOTPLUG_CNTRL      0x44030
+#define  DIGITAL_PORTA_HOTPLUG_ENABLE           (1 << 4)
+#define  DIGITAL_PORTA_SHORT_PULSE_2MS          (0 << 2)
+#define  DIGITAL_PORTA_SHORT_PULSE_4_5MS        (1 << 2)
+#define  DIGITAL_PORTA_SHORT_PULSE_6MS          (2 << 2)
+#define  DIGITAL_PORTA_SHORT_PULSE_100MS        (3 << 2)
+#define  DIGITAL_PORTA_NO_DETECT                (0 << 0)
+#define  DIGITAL_PORTA_LONG_PULSE_DETECT_MASK   (1 << 1)
+#define  DIGITAL_PORTA_SHORT_PULSE_DETECT_MASK  (1 << 0)
+
+/* refresh rate hardware control */
+#define RR_HW_CTL       0x45300
+#define  RR_HW_LOW_POWER_FRAMES_MASK    0xff
+#define  RR_HW_HIGH_POWER_FRAMES_MASK   0xff00
+
+#define FDI_PLL_BIOS_0  0x46000
+#define FDI_PLL_BIOS_1  0x46004
+#define FDI_PLL_BIOS_2  0x46008
+#define DISPLAY_PORT_PLL_BIOS_0         0x4600c
+#define DISPLAY_PORT_PLL_BIOS_1         0x46010
+#define DISPLAY_PORT_PLL_BIOS_2         0x46014
+
+#define FDI_PLL_FREQ_CTL        0x46030
+#define  FDI_PLL_FREQ_CHANGE_REQUEST    (1<<24)
+#define  FDI_PLL_FREQ_LOCK_LIMIT_MASK   0xfff00
+#define  FDI_PLL_FREQ_DISABLE_COUNT_LIMIT_MASK  0xff
+
+
+#define PIPEA_DATA_M1           0x60030
+#define  TU_SIZE(x)             (((x)-1) << 25) /* default size 64 */
+#define  TU_SIZE_MASK           0x7e000000
+#define  PIPEA_DATA_M1_OFFSET   0
+#define PIPEA_DATA_N1           0x60034
+#define  PIPEA_DATA_N1_OFFSET   0
+
+#define PIPEA_DATA_M2           0x60038
+#define  PIPEA_DATA_M2_OFFSET   0
+#define PIPEA_DATA_N2           0x6003c
+#define  PIPEA_DATA_N2_OFFSET   0
+
+#define PIPEA_LINK_M1           0x60040
+#define  PIPEA_LINK_M1_OFFSET   0
+#define PIPEA_LINK_N1           0x60044
+#define  PIPEA_LINK_N1_OFFSET   0
+
+#define PIPEA_LINK_M2           0x60048
+#define  PIPEA_LINK_M2_OFFSET   0
+#define PIPEA_LINK_N2           0x6004c
+#define  PIPEA_LINK_N2_OFFSET   0
+
+/* PIPEB timing regs are same start from 0x61000 */
+
+#define PIPEB_DATA_M1           0x61030
+#define  PIPEB_DATA_M1_OFFSET   0
+#define PIPEB_DATA_N1           0x61034
+#define  PIPEB_DATA_N1_OFFSET   0
+
+#define PIPEB_DATA_M2           0x61038
+#define  PIPEB_DATA_M2_OFFSET   0
+#define PIPEB_DATA_N2           0x6103c
+#define  PIPEB_DATA_N2_OFFSET   0
+
+#define PIPEB_LINK_M1           0x61040
+#define  PIPEB_LINK_M1_OFFSET   0
+#define PIPEB_LINK_N1           0x61044
+#define  PIPEB_LINK_N1_OFFSET   0
+
+#define PIPEB_LINK_M2           0x61048
+#define  PIPEB_LINK_M2_OFFSET   0
+#define PIPEB_LINK_N2           0x6104c
+#define  PIPEB_LINK_N2_OFFSET   0
+
+/* CPU panel fitter */
+#define PFA_CTL_1               0x68080
+#define PFB_CTL_1               0x68880
+#define  PF_ENABLE              (1<<31)
+
+/* legacy palette */
+#define LGC_PALETTE_A           0x4a000
+#define LGC_PALETTE_B           0x4a800
+
+/* interrupts */
+#define DE_MASTER_IRQ_CONTROL   (1 << 31)
+#define DE_SPRITEB_FLIP_DONE    (1 << 29)
+#define DE_SPRITEA_FLIP_DONE    (1 << 28)
+#define DE_PLANEB_FLIP_DONE     (1 << 27)
+#define DE_PLANEA_FLIP_DONE     (1 << 26)
+#define DE_PCU_EVENT            (1 << 25)
+#define DE_GTT_FAULT            (1 << 24)
+#define DE_POISON               (1 << 23)
+#define DE_PERFORM_COUNTER      (1 << 22)
+#define DE_PCH_EVENT            (1 << 21)
+#define DE_AUX_CHANNEL_A        (1 << 20)
+#define DE_DP_A_HOTPLUG         (1 << 19)
+#define DE_GSE                  (1 << 18)
+#define DE_PIPEB_VBLANK         (1 << 15)
+#define DE_PIPEB_EVEN_FIELD     (1 << 14)
+#define DE_PIPEB_ODD_FIELD      (1 << 13)
+#define DE_PIPEB_LINE_COMPARE   (1 << 12)
+#define DE_PIPEB_VSYNC          (1 << 11)
+#define DE_PIPEB_FIFO_UNDERRUN  (1 << 8)
+#define DE_PIPEA_VBLANK         (1 << 7)
+#define DE_PIPEA_EVEN_FIELD     (1 << 6)
+#define DE_PIPEA_ODD_FIELD      (1 << 5)
+#define DE_PIPEA_LINE_COMPARE   (1 << 4)
+#define DE_PIPEA_VSYNC          (1 << 3)
+#define DE_PIPEA_FIFO_UNDERRUN  (1 << 0)
+
+#define DEISR   0x44000
+#define DEIMR   0x44004
+#define DEIIR   0x44008
+#define DEIER   0x4400c
+
+/* GT interrupt */
+#define GT_SYNC_STATUS          (1 << 2)
+#define GT_USER_INTERRUPT       (1 << 0)
+
+#define GTISR   0x44010
+#define GTIMR   0x44014
+#define GTIIR   0x44018
+#define GTIER   0x4401c
+
+/* PCH */
+
+/* south display engine interrupt */
+#define SDE_CRT_HOTPLUG         (1 << 11)
+#define SDE_PORTD_HOTPLUG       (1 << 10)
+#define SDE_PORTC_HOTPLUG       (1 << 9)
+#define SDE_PORTB_HOTPLUG       (1 << 8)
+#define SDE_SDVOB_HOTPLUG       (1 << 6)
+
+#define SDEISR  0xc4000
+#define SDEIMR  0xc4004
+#define SDEIIR  0xc4008
+#define SDEIER  0xc400c
+
+/* digital port hotplug */
+#define PCH_PORT_HOTPLUG        0xc4030
+#define PORTD_HOTPLUG_ENABLE            (1 << 20)
+#define PORTD_PULSE_DURATION_2ms        (0)
+#define PORTD_PULSE_DURATION_4_5ms      (1 << 18)
+#define PORTD_PULSE_DURATION_6ms        (2 << 18)
+#define PORTD_PULSE_DURATION_100ms      (3 << 18)
+#define PORTD_HOTPLUG_NO_DETECT         (0)
+#define PORTD_HOTPLUG_SHORT_DETECT      (1 << 16)
+#define PORTD_HOTPLUG_LONG_DETECT       (1 << 17)
+#define PORTC_HOTPLUG_ENABLE            (1 << 12)
+#define PORTC_PULSE_DURATION_2ms        (0)
+#define PORTC_PULSE_DURATION_4_5ms      (1 << 10)
+#define PORTC_PULSE_DURATION_6ms        (2 << 10)
+#define PORTC_PULSE_DURATION_100ms      (3 << 10)
+#define PORTC_HOTPLUG_NO_DETECT         (0)
+#define PORTC_HOTPLUG_SHORT_DETECT      (1 << 8)
+#define PORTC_HOTPLUG_LONG_DETECT       (1 << 9)
+#define PORTB_HOTPLUG_ENABLE            (1 << 4)
+#define PORTB_PULSE_DURATION_2ms        (0)
+#define PORTB_PULSE_DURATION_4_5ms      (1 << 2)
+#define PORTB_PULSE_DURATION_6ms        (2 << 2)
+#define PORTB_PULSE_DURATION_100ms      (3 << 2)
+#define PORTB_HOTPLUG_NO_DETECT         (0)
+#define PORTB_HOTPLUG_SHORT_DETECT      (1 << 0)
+#define PORTB_HOTPLUG_LONG_DETECT       (1 << 1)
+
+#define PCH_GPIOA               0xc5010
+#define PCH_GPIOB               0xc5014
+#define PCH_GPIOC               0xc5018
+#define PCH_GPIOD               0xc501c
+#define PCH_GPIOE               0xc5020
+#define PCH_GPIOF               0xc5024
+
+#define PCH_DPLL_A              0xc6014
+#define PCH_DPLL_B              0xc6018
+
+#define PCH_FPA0                0xc6040
+#define PCH_FPA1                0xc6044
+#define PCH_FPB0                0xc6048
+#define PCH_FPB1                0xc604c
+
+#define PCH_DPLL_TEST           0xc606c
+
+#define PCH_DREF_CONTROL        0xC6200
+#define  DREF_CONTROL_MASK      0x7fc3
+#define  DREF_CPU_SOURCE_OUTPUT_DISABLE         (0<<13)
+#define  DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD      (2<<13)
+#define  DREF_CPU_SOURCE_OUTPUT_NONSPREAD       (3<<13)
+#define  DREF_CPU_SOURCE_OUTPUT_MASK		(3<<13)
+#define  DREF_SSC_SOURCE_DISABLE                (0<<11)
+#define  DREF_SSC_SOURCE_ENABLE                 (2<<11)
+#define  DREF_SSC_SOURCE_MASK			(2<<11)
+#define  DREF_NONSPREAD_SOURCE_DISABLE          (0<<9)
+#define  DREF_NONSPREAD_CK505_ENABLE		(1<<9)
+#define  DREF_NONSPREAD_SOURCE_ENABLE           (2<<9)
+#define  DREF_NONSPREAD_SOURCE_MASK		(2<<9)
+#define  DREF_SUPERSPREAD_SOURCE_DISABLE        (0<<7)
+#define  DREF_SUPERSPREAD_SOURCE_ENABLE         (2<<7)
+#define  DREF_SSC4_DOWNSPREAD                   (0<<6)
+#define  DREF_SSC4_CENTERSPREAD                 (1<<6)
+#define  DREF_SSC1_DISABLE                      (0<<1)
+#define  DREF_SSC1_ENABLE                       (1<<1)
+#define  DREF_SSC4_DISABLE                      (0)
+#define  DREF_SSC4_ENABLE                       (1)
+
+#define PCH_RAWCLK_FREQ         0xc6204
+#define  FDL_TP1_TIMER_SHIFT    12
+#define  FDL_TP1_TIMER_MASK     (3<<12)
+#define  FDL_TP2_TIMER_SHIFT    10
+#define  FDL_TP2_TIMER_MASK     (3<<10)
+#define  RAWCLK_FREQ_MASK       0x3ff
+
+#define PCH_DPLL_TMR_CFG        0xc6208
+
+#define PCH_SSC4_PARMS          0xc6210
+#define PCH_SSC4_AUX_PARMS      0xc6214
+
+/* transcoder */
+
+#define TRANS_HTOTAL_A          0xe0000
+#define  TRANS_HTOTAL_SHIFT     16
+#define  TRANS_HACTIVE_SHIFT    0
+#define TRANS_HBLANK_A          0xe0004
+#define  TRANS_HBLANK_END_SHIFT 16
+#define  TRANS_HBLANK_START_SHIFT 0
+#define TRANS_HSYNC_A           0xe0008
+#define  TRANS_HSYNC_END_SHIFT  16
+#define  TRANS_HSYNC_START_SHIFT 0
+#define TRANS_VTOTAL_A          0xe000c
+#define  TRANS_VTOTAL_SHIFT     16
+#define  TRANS_VACTIVE_SHIFT    0
+#define TRANS_VBLANK_A          0xe0010
+#define  TRANS_VBLANK_END_SHIFT 16
+#define  TRANS_VBLANK_START_SHIFT 0
+#define TRANS_VSYNC_A           0xe0014
+#define  TRANS_VSYNC_END_SHIFT  16
+#define  TRANS_VSYNC_START_SHIFT 0
+
+#define TRANSA_DATA_M1          0xe0030
+#define TRANSA_DATA_N1          0xe0034
+#define TRANSA_DATA_M2          0xe0038
+#define TRANSA_DATA_N2          0xe003c
+#define TRANSA_DP_LINK_M1       0xe0040
+#define TRANSA_DP_LINK_N1       0xe0044
+#define TRANSA_DP_LINK_M2       0xe0048
+#define TRANSA_DP_LINK_N2       0xe004c
+
+#define TRANS_HTOTAL_B          0xe1000
+#define TRANS_HBLANK_B          0xe1004
+#define TRANS_HSYNC_B           0xe1008
+#define TRANS_VTOTAL_B          0xe100c
+#define TRANS_VBLANK_B          0xe1010
+#define TRANS_VSYNC_B           0xe1014
+
+#define TRANSB_DATA_M1          0xe1030
+#define TRANSB_DATA_N1          0xe1034
+#define TRANSB_DATA_M2          0xe1038
+#define TRANSB_DATA_N2          0xe103c
+#define TRANSB_DP_LINK_M1       0xe1040
+#define TRANSB_DP_LINK_N1       0xe1044
+#define TRANSB_DP_LINK_M2       0xe1048
+#define TRANSB_DP_LINK_N2       0xe104c
+
+#define TRANSACONF              0xf0008
+#define TRANSBCONF              0xf1008
+#define  TRANS_DISABLE          (0<<31)
+#define  TRANS_ENABLE           (1<<31)
+#define  TRANS_STATE_MASK       (1<<30)
+#define  TRANS_STATE_DISABLE    (0<<30)
+#define  TRANS_STATE_ENABLE     (1<<30)
+#define  TRANS_FSYNC_DELAY_HB1  (0<<27)
+#define  TRANS_FSYNC_DELAY_HB2  (1<<27)
+#define  TRANS_FSYNC_DELAY_HB3  (2<<27)
+#define  TRANS_FSYNC_DELAY_HB4  (3<<27)
+#define  TRANS_DP_AUDIO_ONLY    (1<<26)
+#define  TRANS_DP_VIDEO_AUDIO   (0<<26)
+#define  TRANS_PROGRESSIVE      (0<<21)
+#define  TRANS_8BPC             (0<<5)
+#define  TRANS_10BPC            (1<<5)
+#define  TRANS_6BPC             (2<<5)
+#define  TRANS_12BPC            (3<<5)
+
+#define FDI_RXA_CHICKEN         0xc200c
+#define FDI_RXB_CHICKEN         0xc2010
+#define  FDI_RX_PHASE_SYNC_POINTER_ENABLE       (1)
+
+/* CPU: FDI_TX */
+#define FDI_TXA_CTL             0x60100
+#define FDI_TXB_CTL             0x61100
+#define  FDI_TX_DISABLE         (0<<31)
+#define  FDI_TX_ENABLE          (1<<31)
+#define  FDI_LINK_TRAIN_PATTERN_1       (0<<28)
+#define  FDI_LINK_TRAIN_PATTERN_2       (1<<28)
+#define  FDI_LINK_TRAIN_PATTERN_IDLE    (2<<28)
+#define  FDI_LINK_TRAIN_NONE            (3<<28)
+#define  FDI_LINK_TRAIN_VOLTAGE_0_4V    (0<<25)
+#define  FDI_LINK_TRAIN_VOLTAGE_0_6V    (1<<25)
+#define  FDI_LINK_TRAIN_VOLTAGE_0_8V    (2<<25)
+#define  FDI_LINK_TRAIN_VOLTAGE_1_2V    (3<<25)
+#define  FDI_LINK_TRAIN_PRE_EMPHASIS_NONE (0<<22)
+#define  FDI_LINK_TRAIN_PRE_EMPHASIS_1_5X (1<<22)
+#define  FDI_LINK_TRAIN_PRE_EMPHASIS_2X   (2<<22)
+#define  FDI_LINK_TRAIN_PRE_EMPHASIS_3X   (3<<22)
+#define  FDI_DP_PORT_WIDTH_X1           (0<<19)
+#define  FDI_DP_PORT_WIDTH_X2           (1<<19)
+#define  FDI_DP_PORT_WIDTH_X3           (2<<19)
+#define  FDI_DP_PORT_WIDTH_X4           (3<<19)
+#define  FDI_TX_ENHANCE_FRAME_ENABLE    (1<<18)
+/* IGDNG: hardwired to 1 */
+#define  FDI_TX_PLL_ENABLE              (1<<14)
+/* both Tx and Rx */
+#define  FDI_SCRAMBLING_ENABLE          (0<<7)
+#define  FDI_SCRAMBLING_DISABLE         (1<<7)
+
+/* FDI_RX, FDI_X is hard-wired to Transcoder_X */
+#define FDI_RXA_CTL             0xf000c
+#define FDI_RXB_CTL             0xf100c
+#define  FDI_RX_ENABLE          (1<<31)
+#define  FDI_RX_DISABLE         (0<<31)
+/* train, dp width same as FDI_TX */
+#define  FDI_DP_PORT_WIDTH_X8           (7<<19)
+#define  FDI_8BPC                       (0<<16)
+#define  FDI_10BPC                      (1<<16)
+#define  FDI_6BPC                       (2<<16)
+#define  FDI_12BPC                      (3<<16)
+#define  FDI_LINK_REVERSE_OVERWRITE     (1<<15)
+#define  FDI_DMI_LINK_REVERSE_MASK      (1<<14)
+#define  FDI_RX_PLL_ENABLE              (1<<13)
+#define  FDI_FS_ERR_CORRECT_ENABLE      (1<<11)
+#define  FDI_FE_ERR_CORRECT_ENABLE      (1<<10)
+#define  FDI_FS_ERR_REPORT_ENABLE       (1<<9)
+#define  FDI_FE_ERR_REPORT_ENABLE       (1<<8)
+#define  FDI_RX_ENHANCE_FRAME_ENABLE    (1<<6)
+#define  FDI_SEL_RAWCLK                 (0<<4)
+#define  FDI_SEL_PCDCLK                 (1<<4)
+
+#define FDI_RXA_MISC            0xf0010
+#define FDI_RXB_MISC            0xf1010
+#define FDI_RXA_TUSIZE1         0xf0030
+#define FDI_RXA_TUSIZE2         0xf0038
+#define FDI_RXB_TUSIZE1         0xf1030
+#define FDI_RXB_TUSIZE2         0xf1038
+
+/* FDI_RX interrupt register format */
+#define FDI_RX_INTER_LANE_ALIGN         (1<<10)
+#define FDI_RX_SYMBOL_LOCK              (1<<9) /* train 2 */
+#define FDI_RX_BIT_LOCK                 (1<<8) /* train 1 */
+#define FDI_RX_TRAIN_PATTERN_2_FAIL     (1<<7)
+#define FDI_RX_FS_CODE_ERR              (1<<6)
+#define FDI_RX_FE_CODE_ERR              (1<<5)
+#define FDI_RX_SYMBOL_ERR_RATE_ABOVE    (1<<4)
+#define FDI_RX_HDCP_LINK_FAIL           (1<<3)
+#define FDI_RX_PIXEL_FIFO_OVERFLOW      (1<<2)
+#define FDI_RX_CROSS_CLOCK_OVERFLOW     (1<<1)
+#define FDI_RX_SYMBOL_QUEUE_OVERFLOW    (1<<0)
+
+#define FDI_RXA_IIR             0xf0014
+#define FDI_RXA_IMR             0xf0018
+#define FDI_RXB_IIR             0xf1014
+#define FDI_RXB_IMR             0xf1018
+
+#define FDI_PLL_CTL_1           0xfe000
+#define FDI_PLL_CTL_2           0xfe004
+
+/* CRT */
+#define PCH_ADPA                0xe1100
+#define  ADPA_TRANS_SELECT_MASK (1<<30)
+#define  ADPA_TRANS_A_SELECT    0
+#define  ADPA_TRANS_B_SELECT    (1<<30)
+#define  ADPA_CRT_HOTPLUG_MASK  0x03ff0000 /* bit 25-16 */
+#define  ADPA_CRT_HOTPLUG_MONITOR_NONE  (0<<24)
+#define  ADPA_CRT_HOTPLUG_MONITOR_MASK  (3<<24)
+#define  ADPA_CRT_HOTPLUG_MONITOR_COLOR (3<<24)
+#define  ADPA_CRT_HOTPLUG_MONITOR_MONO  (2<<24)
+#define  ADPA_CRT_HOTPLUG_ENABLE        (1<<23)
+#define  ADPA_CRT_HOTPLUG_PERIOD_64     (0<<22)
+#define  ADPA_CRT_HOTPLUG_PERIOD_128    (1<<22)
+#define  ADPA_CRT_HOTPLUG_WARMUP_5MS    (0<<21)
+#define  ADPA_CRT_HOTPLUG_WARMUP_10MS   (1<<21)
+#define  ADPA_CRT_HOTPLUG_SAMPLE_2S     (0<<20)
+#define  ADPA_CRT_HOTPLUG_SAMPLE_4S     (1<<20)
+#define  ADPA_CRT_HOTPLUG_VOLTAGE_40    (0<<18)
+#define  ADPA_CRT_HOTPLUG_VOLTAGE_50    (1<<18)
+#define  ADPA_CRT_HOTPLUG_VOLTAGE_60    (2<<18)
+#define  ADPA_CRT_HOTPLUG_VOLTAGE_70    (3<<18)
+#define  ADPA_CRT_HOTPLUG_VOLREF_325MV  (0<<17)
+#define  ADPA_CRT_HOTPLUG_VOLREF_475MV  (1<<17)
+#define  ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16)
+
+/* or SDVOB */
+#define HDMIB   0xe1140
+#define  PORT_ENABLE    (1 << 31)
+#define  TRANSCODER_A   (0)
+#define  TRANSCODER_B   (1 << 30)
+#define  COLOR_FORMAT_8bpc      (0)
+#define  COLOR_FORMAT_12bpc     (3 << 26)
+#define  SDVOB_HOTPLUG_ENABLE   (1 << 23)
+#define  SDVO_ENCODING          (0)
+#define  TMDS_ENCODING          (2 << 10)
+#define  NULL_PACKET_VSYNC_ENABLE       (1 << 9)
+#define  SDVOB_BORDER_ENABLE    (1 << 7)
+#define  AUDIO_ENABLE           (1 << 6)
+#define  VSYNC_ACTIVE_HIGH      (1 << 4)
+#define  HSYNC_ACTIVE_HIGH      (1 << 3)
+#define  PORT_DETECTED          (1 << 2)
+
+#define HDMIC   0xe1150
+#define HDMID   0xe1160
+
+#define PCH_LVDS	0xe1180
+#define  LVDS_DETECTED	(1 << 1)
+
+#define BLC_PWM_CPU_CTL2	0x48250
+#define  PWM_ENABLE		(1 << 31)
+#define  PWM_PIPE_A		(0 << 29)
+#define  PWM_PIPE_B		(1 << 29)
+#define BLC_PWM_CPU_CTL		0x48254
+
+#define BLC_PWM_PCH_CTL1	0xc8250
+#define  PWM_PCH_ENABLE		(1 << 31)
+#define  PWM_POLARITY_ACTIVE_LOW	(1 << 29)
+#define  PWM_POLARITY_ACTIVE_HIGH	(0 << 29)
+#define  PWM_POLARITY_ACTIVE_LOW2	(1 << 28)
+#define  PWM_POLARITY_ACTIVE_HIGH2	(0 << 28)
+
+#define BLC_PWM_PCH_CTL2	0xc8254
+
+#define PCH_PP_STATUS		0xc7200
+#define PCH_PP_CONTROL		0xc7204
+#define  EDP_FORCE_VDD		(1 << 3)
+#define  EDP_BLC_ENABLE		(1 << 2)
+#define  PANEL_POWER_RESET	(1 << 1)
+#define  PANEL_POWER_OFF	(0 << 0)
+#define  PANEL_POWER_ON		(1 << 0)
+#define PCH_PP_ON_DELAYS	0xc7208
+#define  EDP_PANEL		(1 << 30)
+#define PCH_PP_OFF_DELAYS	0xc720c
+#define PCH_PP_DIVISOR		0xc7210
+
 #endif /* _I915_REG_H_ */
-- 
cgit v0.10.2


From e170b030dcd6aed11dde2d124c09991ec771f529 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:40 +0800
Subject: drm/i915: Disable opregion on IGDNG for now

Disable OpRegion support for now until verified on new chipsets.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0c222c2..fa105bd 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1206,7 +1206,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	}
 
 	/* Must be done after probing outputs */
-	intel_opregion_init(dev, 0);
+	/* FIXME: verify on IGDNG */
+	if (!IS_IGDNG(dev))
+		intel_opregion_init(dev, 0);
 
 	return 0;
 
@@ -1240,7 +1242,8 @@ int i915_driver_unload(struct drm_device *dev)
 	if (dev_priv->regs != NULL)
 		iounmap(dev_priv->regs);
 
-	intel_opregion_free(dev, 0);
+	if (!IS_IGDNG(dev))
+		intel_opregion_free(dev, 0);
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		intel_modeset_cleanup(dev);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 98bb4c8..4b0bcbd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -484,7 +484,9 @@ void i915_disable_vblank(struct drm_device *dev, int pipe)
 void i915_enable_interrupt (struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	opregion_enable_asle(dev);
+
+	if (!IS_IGDNG(dev))
+		opregion_enable_asle(dev);
 	dev_priv->irq_enabled = 1;
 }
 
-- 
cgit v0.10.2


From 2cce0d8740f0d1454d012401257d96c513ce358f Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:41 +0800
Subject: drm/i915: Disable tiling on IGDNG for now

Swizzle bit detection not working right on it.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 540dd33..07d976b 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -170,6 +170,13 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 		}
 	}
 
+	/* FIXME: check with memory config on IGDNG */
+	if (IS_IGDNG(dev)) {
+		DRM_ERROR("disable tiling on IGDNG...\n");
+		swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+		swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+	}
+
 	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
 	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 }
-- 
cgit v0.10.2


From 2c07245fb8f7f0a282282e5a9747e46defdb2cc7 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:42 +0800
Subject: drm/i915: enable kernel modesetting on IGDNG

This adds kernel mode setting on IGDNG with VGA output support.
Note that suspend/resume doesn't work yet.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index fa105bd..5d36059 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -922,7 +922,7 @@ static int i915_probe_agp(struct drm_device *dev, unsigned long *aperture_size,
 	 * Some of the preallocated space is taken by the GTT
 	 * and popup.  GTT is 1K per MB of aperture size, and popup is 4K.
 	 */
-	if (IS_G4X(dev) || IS_IGD(dev))
+	if (IS_G4X(dev) || IS_IGD(dev) || IS_IGDNG(dev))
 		overhead = 4096;
 	else
 		overhead = (*aperture_size / 1024) + 4096;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 640f515..ff9bcca 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -37,9 +37,14 @@ static void intel_crt_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 temp;
+	u32 temp, reg;
 
-	temp = I915_READ(ADPA);
+	if (IS_IGDNG(dev))
+		reg = PCH_ADPA;
+	else
+		reg = ADPA;
+
+	temp = I915_READ(reg);
 	temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
 	temp |= ADPA_DAC_ENABLE;
 
@@ -58,7 +63,7 @@ static void intel_crt_dpms(struct drm_encoder *encoder, int mode)
 		break;
 	}
 
-	I915_WRITE(ADPA, temp);
+	I915_WRITE(reg, temp);
 }
 
 static int intel_crt_mode_valid(struct drm_connector *connector,
@@ -101,17 +106,23 @@ static void intel_crt_mode_set(struct drm_encoder *encoder,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int dpll_md_reg;
 	u32 adpa, dpll_md;
+	u32 adpa_reg;
 
 	if (intel_crtc->pipe == 0)
 		dpll_md_reg = DPLL_A_MD;
 	else
 		dpll_md_reg = DPLL_B_MD;
 
+	if (IS_IGDNG(dev))
+		adpa_reg = PCH_ADPA;
+	else
+		adpa_reg = ADPA;
+
 	/*
 	 * Disable separate mode multiplier used when cloning SDVO to CRT
 	 * XXX this needs to be adjusted when we really are cloning
 	 */
-	if (IS_I965G(dev)) {
+	if (IS_I965G(dev) && !IS_IGDNG(dev)) {
 		dpll_md = I915_READ(dpll_md_reg);
 		I915_WRITE(dpll_md_reg,
 			   dpll_md & ~DPLL_MD_UDI_MULTIPLIER_MASK);
@@ -125,13 +136,53 @@ static void intel_crt_mode_set(struct drm_encoder *encoder,
 
 	if (intel_crtc->pipe == 0) {
 		adpa |= ADPA_PIPE_A_SELECT;
-		I915_WRITE(BCLRPAT_A, 0);
+		if (!IS_IGDNG(dev))
+			I915_WRITE(BCLRPAT_A, 0);
 	} else {
 		adpa |= ADPA_PIPE_B_SELECT;
-		I915_WRITE(BCLRPAT_B, 0);
+		if (!IS_IGDNG(dev))
+			I915_WRITE(BCLRPAT_B, 0);
 	}
 
-	I915_WRITE(ADPA, adpa);
+	I915_WRITE(adpa_reg, adpa);
+}
+
+static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 adpa, temp;
+	bool ret;
+
+	temp = adpa = I915_READ(PCH_ADPA);
+
+	adpa &= ~ADPA_CRT_HOTPLUG_MASK;
+
+	adpa |= (ADPA_CRT_HOTPLUG_PERIOD_128 |
+			ADPA_CRT_HOTPLUG_WARMUP_10MS |
+			ADPA_CRT_HOTPLUG_SAMPLE_4S |
+			ADPA_CRT_HOTPLUG_VOLTAGE_50 | /* default */
+			ADPA_CRT_HOTPLUG_VOLREF_325MV |
+			ADPA_CRT_HOTPLUG_ENABLE |
+			ADPA_CRT_HOTPLUG_FORCE_TRIGGER);
+
+	DRM_DEBUG("pch crt adpa 0x%x", adpa);
+	I915_WRITE(PCH_ADPA, adpa);
+
+	/* This might not be needed as not specified in spec...*/
+	udelay(1000);
+
+	/* Check the status to see if both blue and green are on now */
+	adpa = I915_READ(PCH_ADPA);
+	if ((adpa & ADPA_CRT_HOTPLUG_MONITOR_MASK) ==
+			ADPA_CRT_HOTPLUG_MONITOR_COLOR)
+		ret = true;
+	else
+		ret = false;
+
+	/* restore origin register */
+	I915_WRITE(PCH_ADPA, temp);
+	return ret;
 }
 
 /**
@@ -148,6 +199,10 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 hotplug_en;
 	int i, tries = 0;
+
+	if (IS_IGDNG(dev))
+		return intel_igdng_crt_detect_hotplug(connector);
+
 	/*
 	 * On 4 series desktop, CRT detect sequence need to be done twice
 	 * to get a reliable result.
@@ -427,6 +482,7 @@ void intel_crt_init(struct drm_device *dev)
 {
 	struct drm_connector *connector;
 	struct intel_output *intel_output;
+	u32 i2c_reg;
 
 	intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
 	if (!intel_output)
@@ -443,7 +499,11 @@ void intel_crt_init(struct drm_device *dev)
 					  &intel_output->enc);
 
 	/* Set up the DDC bus. */
-	intel_output->ddc_bus = intel_i2c_create(dev, GPIOA, "CRTDDC_A");
+	if (IS_IGDNG(dev))
+		i2c_reg = PCH_GPIOA;
+	else
+		i2c_reg = GPIOA;
+	intel_output->ddc_bus = intel_i2c_create(dev, i2c_reg, "CRTDDC_A");
 	if (!intel_output->ddc_bus) {
 		dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
 			   "failed.\n");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c9d6f10..2cd6ba6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -137,6 +137,8 @@ struct intel_limit {
 #define INTEL_LIMIT_G4X_DUAL_CHANNEL_LVDS   7
 #define INTEL_LIMIT_IGD_SDVO_DAC    8
 #define INTEL_LIMIT_IGD_LVDS	    9
+#define INTEL_LIMIT_IGDNG_SDVO_DAC  10
+#define INTEL_LIMIT_IGDNG_LVDS	    11
 
 /*The parameter is for SDVO on G4x platform*/
 #define G4X_DOT_SDVO_MIN           25000
@@ -216,12 +218,43 @@ struct intel_limit {
 #define G4X_P2_DUAL_CHANNEL_LVDS_FAST           7
 #define G4X_P2_DUAL_CHANNEL_LVDS_LIMIT          0
 
+/* IGDNG */
+/* as we calculate clock using (register_value + 2) for
+   N/M1/M2, so here the range value for them is (actual_value-2).
+ */
+#define IGDNG_DOT_MIN         25000
+#define IGDNG_DOT_MAX         350000
+#define IGDNG_VCO_MIN         1760000
+#define IGDNG_VCO_MAX         3510000
+#define IGDNG_N_MIN           1
+#define IGDNG_N_MAX           5
+#define IGDNG_M_MIN           79
+#define IGDNG_M_MAX           118
+#define IGDNG_M1_MIN          12
+#define IGDNG_M1_MAX          23
+#define IGDNG_M2_MIN          5
+#define IGDNG_M2_MAX          9
+#define IGDNG_P_SDVO_DAC_MIN  5
+#define IGDNG_P_SDVO_DAC_MAX  80
+#define IGDNG_P_LVDS_MIN      28
+#define IGDNG_P_LVDS_MAX      112
+#define IGDNG_P1_MIN          1
+#define IGDNG_P1_MAX          8
+#define IGDNG_P2_SDVO_DAC_SLOW 10
+#define IGDNG_P2_SDVO_DAC_FAST 5
+#define IGDNG_P2_LVDS_SLOW    14 /* single channel */
+#define IGDNG_P2_LVDS_FAST    7  /* double channel */
+#define IGDNG_P2_DOT_LIMIT    225000 /* 225Mhz */
+
 static bool
 intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
 		    int target, int refclk, intel_clock_t *best_clock);
 static bool
 intel_g4x_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
 			int target, int refclk, intel_clock_t *best_clock);
+static bool
+intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
+			int target, int refclk, intel_clock_t *best_clock);
 
 static const intel_limit_t intel_limits[] = {
     { /* INTEL_LIMIT_I8XX_DVO_DAC */
@@ -383,9 +416,47 @@ static const intel_limit_t intel_limits[] = {
 		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_SLOW },
 	.find_pll = intel_find_best_PLL,
     },
-
+    { /* INTEL_LIMIT_IGDNG_SDVO_DAC */
+	.dot = { .min = IGDNG_DOT_MIN,          .max = IGDNG_DOT_MAX },
+	.vco = { .min = IGDNG_VCO_MIN,          .max = IGDNG_VCO_MAX },
+	.n   = { .min = IGDNG_N_MIN,            .max = IGDNG_N_MAX },
+	.m   = { .min = IGDNG_M_MIN,            .max = IGDNG_M_MAX },
+	.m1  = { .min = IGDNG_M1_MIN,           .max = IGDNG_M1_MAX },
+	.m2  = { .min = IGDNG_M2_MIN,           .max = IGDNG_M2_MAX },
+	.p   = { .min = IGDNG_P_SDVO_DAC_MIN,   .max = IGDNG_P_SDVO_DAC_MAX },
+	.p1  = { .min = IGDNG_P1_MIN,           .max = IGDNG_P1_MAX },
+	.p2  = { .dot_limit = IGDNG_P2_DOT_LIMIT,
+		 .p2_slow = IGDNG_P2_SDVO_DAC_SLOW,
+		 .p2_fast = IGDNG_P2_SDVO_DAC_FAST },
+	.find_pll = intel_igdng_find_best_PLL,
+    },
+    { /* INTEL_LIMIT_IGDNG_LVDS */
+	.dot = { .min = IGDNG_DOT_MIN,          .max = IGDNG_DOT_MAX },
+	.vco = { .min = IGDNG_VCO_MIN,          .max = IGDNG_VCO_MAX },
+	.n   = { .min = IGDNG_N_MIN,            .max = IGDNG_N_MAX },
+	.m   = { .min = IGDNG_M_MIN,            .max = IGDNG_M_MAX },
+	.m1  = { .min = IGDNG_M1_MIN,           .max = IGDNG_M1_MAX },
+	.m2  = { .min = IGDNG_M2_MIN,           .max = IGDNG_M2_MAX },
+	.p   = { .min = IGDNG_P_LVDS_MIN,       .max = IGDNG_P_LVDS_MAX },
+	.p1  = { .min = IGDNG_P1_MIN,           .max = IGDNG_P1_MAX },
+	.p2  = { .dot_limit = IGDNG_P2_DOT_LIMIT,
+		 .p2_slow = IGDNG_P2_LVDS_SLOW,
+		 .p2_fast = IGDNG_P2_LVDS_FAST },
+	.find_pll = intel_igdng_find_best_PLL,
+    },
 };
 
+static const intel_limit_t *intel_igdng_limit(struct drm_crtc *crtc)
+{
+	const intel_limit_t *limit;
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
+		limit = &intel_limits[INTEL_LIMIT_IGDNG_LVDS];
+	else
+		limit = &intel_limits[INTEL_LIMIT_IGDNG_SDVO_DAC];
+
+	return limit;
+}
+
 static const intel_limit_t *intel_g4x_limit(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -418,7 +489,9 @@ static const intel_limit_t *intel_limit(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	const intel_limit_t *limit;
 
-	if (IS_G4X(dev)) {
+	if (IS_IGDNG(dev))
+		limit = intel_igdng_limit(crtc);
+	else if (IS_G4X(dev)) {
 		limit = intel_g4x_limit(crtc);
 	} else if (IS_I9XX(dev) && !IS_IGD(dev)) {
 		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
@@ -630,7 +703,64 @@ intel_g4x_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
 			}
 		}
 	}
+	return found;
+}
+
+static bool
+intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
+			int target, int refclk, intel_clock_t *best_clock)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	intel_clock_t clock;
+	int max_n;
+	bool found;
+	int err_most = 47;
+	found = false;
+
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+		if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+		    LVDS_CLKB_POWER_UP)
+			clock.p2 = limit->p2.p2_fast;
+		else
+			clock.p2 = limit->p2.p2_slow;
+	} else {
+		if (target < limit->p2.dot_limit)
+			clock.p2 = limit->p2.p2_slow;
+		else
+			clock.p2 = limit->p2.p2_fast;
+	}
+
+	memset(best_clock, 0, sizeof(*best_clock));
+	max_n = limit->n.max;
+	/* based on hardware requriment prefer smaller n to precision */
+	for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
+		/* based on hardware requirment prefere larger m1,m2, p1 */
+		for (clock.m1 = limit->m1.max;
+		     clock.m1 >= limit->m1.min; clock.m1--) {
+			for (clock.m2 = limit->m2.max;
+			     clock.m2 >= limit->m2.min; clock.m2--) {
+				for (clock.p1 = limit->p1.max;
+				     clock.p1 >= limit->p1.min; clock.p1--) {
+					int this_err;
 
+					intel_clock(dev, refclk, &clock);
+					if (!intel_PLL_is_valid(crtc, &clock))
+						continue;
+					this_err = abs((10000 - (target*10000/clock.dot)));
+					if (this_err < err_most) {
+						*best_clock = clock;
+						err_most = this_err;
+						max_n = clock.n;
+						found = true;
+						/* found on first matching */
+						goto out;
+					}
+				}
+			}
+		}
+	}
+out:
 	return found;
 }
 
@@ -785,18 +915,292 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	return 0;
 }
 
+static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->pipe;
+	int pch_dpll_reg = (pipe == 0) ? PCH_DPLL_A : PCH_DPLL_B;
+	int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
+	int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
+	int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR;
+	int fdi_tx_reg = (pipe == 0) ? FDI_TXA_CTL : FDI_TXB_CTL;
+	int fdi_rx_reg = (pipe == 0) ? FDI_RXA_CTL : FDI_RXB_CTL;
+	int fdi_rx_iir_reg = (pipe == 0) ? FDI_RXA_IIR : FDI_RXB_IIR;
+	int fdi_rx_imr_reg = (pipe == 0) ? FDI_RXA_IMR : FDI_RXB_IMR;
+	int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF;
+	int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1;
+	int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
+	int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
+	int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
+	int cpu_vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
+	int cpu_vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
+	int cpu_vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
+	int trans_htot_reg = (pipe == 0) ? TRANS_HTOTAL_A : TRANS_HTOTAL_B;
+	int trans_hblank_reg = (pipe == 0) ? TRANS_HBLANK_A : TRANS_HBLANK_B;
+	int trans_hsync_reg = (pipe == 0) ? TRANS_HSYNC_A : TRANS_HSYNC_B;
+	int trans_vtot_reg = (pipe == 0) ? TRANS_VTOTAL_A : TRANS_VTOTAL_B;
+	int trans_vblank_reg = (pipe == 0) ? TRANS_VBLANK_A : TRANS_VBLANK_B;
+	int trans_vsync_reg = (pipe == 0) ? TRANS_VSYNC_A : TRANS_VSYNC_B;
+	u32 temp;
+	int tries = 5, j;
 
+	/* XXX: When our outputs are all unaware of DPMS modes other than off
+	 * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC.
+	 */
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+		DRM_DEBUG("crtc %d dpms on\n", pipe);
+		/* enable PCH DPLL */
+		temp = I915_READ(pch_dpll_reg);
+		if ((temp & DPLL_VCO_ENABLE) == 0) {
+			I915_WRITE(pch_dpll_reg, temp | DPLL_VCO_ENABLE);
+			I915_READ(pch_dpll_reg);
+		}
 
-/**
- * Sets the power management mode of the pipe and plane.
- *
- * This code should probably grow support for turning the cursor off and back
- * on appropriately at the same time as we're turning the pipe off/on.
- */
-static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
+		/* enable PCH FDI RX PLL, wait warmup plus DMI latency */
+		temp = I915_READ(fdi_rx_reg);
+		I915_WRITE(fdi_rx_reg, temp | FDI_RX_PLL_ENABLE |
+				FDI_SEL_PCDCLK |
+				FDI_DP_PORT_WIDTH_X4); /* default 4 lanes */
+		I915_READ(fdi_rx_reg);
+		udelay(200);
+
+		/* Enable CPU FDI TX PLL, always on for IGDNG */
+		temp = I915_READ(fdi_tx_reg);
+		if ((temp & FDI_TX_PLL_ENABLE) == 0) {
+			I915_WRITE(fdi_tx_reg, temp | FDI_TX_PLL_ENABLE);
+			I915_READ(fdi_tx_reg);
+			udelay(100);
+		}
+
+		/* Enable CPU pipe */
+		temp = I915_READ(pipeconf_reg);
+		if ((temp & PIPEACONF_ENABLE) == 0) {
+			I915_WRITE(pipeconf_reg, temp | PIPEACONF_ENABLE);
+			I915_READ(pipeconf_reg);
+			udelay(100);
+		}
+
+		/* configure and enable CPU plane */
+		temp = I915_READ(dspcntr_reg);
+		if ((temp & DISPLAY_PLANE_ENABLE) == 0) {
+			I915_WRITE(dspcntr_reg, temp | DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+		}
+
+		/* enable CPU FDI TX and PCH FDI RX */
+		temp = I915_READ(fdi_tx_reg);
+		temp |= FDI_TX_ENABLE;
+		temp |= FDI_DP_PORT_WIDTH_X4; /* default */
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_PATTERN_1;
+		I915_WRITE(fdi_tx_reg, temp);
+		I915_READ(fdi_tx_reg);
+
+		temp = I915_READ(fdi_rx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_PATTERN_1;
+		I915_WRITE(fdi_rx_reg, temp | FDI_RX_ENABLE);
+		I915_READ(fdi_rx_reg);
+
+		udelay(150);
+
+		/* Train FDI. */
+		/* umask FDI RX Interrupt symbol_lock and bit_lock bit
+		   for train result */
+		temp = I915_READ(fdi_rx_imr_reg);
+		temp &= ~FDI_RX_SYMBOL_LOCK;
+		temp &= ~FDI_RX_BIT_LOCK;
+		I915_WRITE(fdi_rx_imr_reg, temp);
+		I915_READ(fdi_rx_imr_reg);
+		udelay(150);
+
+		temp = I915_READ(fdi_rx_iir_reg);
+		DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp);
+
+		if ((temp & FDI_RX_BIT_LOCK) == 0) {
+			for (j = 0; j < tries; j++) {
+				temp = I915_READ(fdi_rx_iir_reg);
+				DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp);
+				if (temp & FDI_RX_BIT_LOCK)
+					break;
+				udelay(200);
+			}
+			if (j != tries)
+				I915_WRITE(fdi_rx_iir_reg,
+						temp | FDI_RX_BIT_LOCK);
+			else
+				DRM_DEBUG("train 1 fail\n");
+		} else {
+			I915_WRITE(fdi_rx_iir_reg,
+					temp | FDI_RX_BIT_LOCK);
+			DRM_DEBUG("train 1 ok 2!\n");
+		}
+		temp = I915_READ(fdi_tx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_PATTERN_2;
+		I915_WRITE(fdi_tx_reg, temp);
+
+		temp = I915_READ(fdi_rx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_PATTERN_2;
+		I915_WRITE(fdi_rx_reg, temp);
+
+		udelay(150);
+
+		temp = I915_READ(fdi_rx_iir_reg);
+		DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp);
+
+		if ((temp & FDI_RX_SYMBOL_LOCK) == 0) {
+			for (j = 0; j < tries; j++) {
+				temp = I915_READ(fdi_rx_iir_reg);
+				DRM_DEBUG("FDI_RX_IIR 0x%x\n", temp);
+				if (temp & FDI_RX_SYMBOL_LOCK)
+					break;
+				udelay(200);
+			}
+			if (j != tries) {
+				I915_WRITE(fdi_rx_iir_reg,
+						temp | FDI_RX_SYMBOL_LOCK);
+				DRM_DEBUG("train 2 ok 1!\n");
+			} else
+				DRM_DEBUG("train 2 fail\n");
+		} else {
+			I915_WRITE(fdi_rx_iir_reg, temp | FDI_RX_SYMBOL_LOCK);
+			DRM_DEBUG("train 2 ok 2!\n");
+		}
+		DRM_DEBUG("train done\n");
+
+		/* set transcoder timing */
+		I915_WRITE(trans_htot_reg, I915_READ(cpu_htot_reg));
+		I915_WRITE(trans_hblank_reg, I915_READ(cpu_hblank_reg));
+		I915_WRITE(trans_hsync_reg, I915_READ(cpu_hsync_reg));
+
+		I915_WRITE(trans_vtot_reg, I915_READ(cpu_vtot_reg));
+		I915_WRITE(trans_vblank_reg, I915_READ(cpu_vblank_reg));
+		I915_WRITE(trans_vsync_reg, I915_READ(cpu_vsync_reg));
+
+		/* enable PCH transcoder */
+		temp = I915_READ(transconf_reg);
+		I915_WRITE(transconf_reg, temp | TRANS_ENABLE);
+		I915_READ(transconf_reg);
+
+		while ((I915_READ(transconf_reg) & TRANS_STATE_ENABLE) == 0)
+			;
+
+		/* enable normal */
+
+		temp = I915_READ(fdi_tx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		I915_WRITE(fdi_tx_reg, temp | FDI_LINK_TRAIN_NONE |
+				FDI_TX_ENHANCE_FRAME_ENABLE);
+		I915_READ(fdi_tx_reg);
+
+		temp = I915_READ(fdi_rx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		I915_WRITE(fdi_rx_reg, temp | FDI_LINK_TRAIN_NONE |
+				FDI_RX_ENHANCE_FRAME_ENABLE);
+		I915_READ(fdi_rx_reg);
+
+		/* wait one idle pattern time */
+		udelay(100);
+
+		intel_crtc_load_lut(crtc);
+
+	break;
+	case DRM_MODE_DPMS_OFF:
+		DRM_DEBUG("crtc %d dpms off\n", pipe);
+
+		/* Disable the VGA plane that we never use */
+		I915_WRITE(CPU_VGACNTRL, VGA_DISP_DISABLE);
+
+		/* Disable display plane */
+		temp = I915_READ(dspcntr_reg);
+		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {
+			I915_WRITE(dspcntr_reg, temp & ~DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+			I915_READ(dspbase_reg);
+		}
+
+		/* disable cpu pipe, disable after all planes disabled */
+		temp = I915_READ(pipeconf_reg);
+		if ((temp & PIPEACONF_ENABLE) != 0) {
+			I915_WRITE(pipeconf_reg, temp & ~PIPEACONF_ENABLE);
+			I915_READ(pipeconf_reg);
+			/* wait for cpu pipe off, pipe state */
+			while ((I915_READ(pipeconf_reg) & I965_PIPECONF_ACTIVE) != 0)
+				;
+		} else
+			DRM_DEBUG("crtc %d is disabled\n", pipe);
+
+		/* IGDNG-A : disable cpu panel fitter ? */
+		temp = I915_READ(pf_ctl_reg);
+		if ((temp & PF_ENABLE) != 0) {
+			I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE);
+			I915_READ(pf_ctl_reg);
+		}
+
+		/* disable CPU FDI tx and PCH FDI rx */
+		temp = I915_READ(fdi_tx_reg);
+		I915_WRITE(fdi_tx_reg, temp & ~FDI_TX_ENABLE);
+		I915_READ(fdi_tx_reg);
+
+		temp = I915_READ(fdi_rx_reg);
+		I915_WRITE(fdi_rx_reg, temp & ~FDI_RX_ENABLE);
+		I915_READ(fdi_rx_reg);
+
+		/* still set train pattern 1 */
+		temp = I915_READ(fdi_tx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_PATTERN_1;
+		I915_WRITE(fdi_tx_reg, temp);
+
+		temp = I915_READ(fdi_rx_reg);
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_PATTERN_1;
+		I915_WRITE(fdi_rx_reg, temp);
+
+		/* disable PCH transcoder */
+		temp = I915_READ(transconf_reg);
+		if ((temp & TRANS_ENABLE) != 0) {
+			I915_WRITE(transconf_reg, temp & ~TRANS_ENABLE);
+			I915_READ(transconf_reg);
+			/* wait for PCH transcoder off, transcoder state */
+			while ((I915_READ(transconf_reg) & TRANS_STATE_ENABLE) != 0)
+				;
+		}
+
+		/* disable PCH DPLL */
+		temp = I915_READ(pch_dpll_reg);
+		if ((temp & DPLL_VCO_ENABLE) != 0) {
+			I915_WRITE(pch_dpll_reg, temp & ~DPLL_VCO_ENABLE);
+			I915_READ(pch_dpll_reg);
+		}
+
+		temp = I915_READ(fdi_rx_reg);
+		if ((temp & FDI_RX_PLL_ENABLE) != 0) {
+			temp &= ~FDI_SEL_PCDCLK;
+			temp &= ~FDI_RX_PLL_ENABLE;
+			I915_WRITE(fdi_rx_reg, temp);
+			I915_READ(fdi_rx_reg);
+		}
+
+		/* Wait for the clocks to turn off. */
+		udelay(150);
+		break;
+	}
+}
+
+static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_master_private *master_priv;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
@@ -805,7 +1209,6 @@ static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
 	int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR;
 	int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
 	u32 temp;
-	bool enabled;
 
 	/* XXX: When our outputs are all unaware of DPMS modes other than off
 	 * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC.
@@ -890,6 +1293,26 @@ static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
 		udelay(150);
 		break;
 	}
+}
+
+/**
+ * Sets the power management mode of the pipe and plane.
+ *
+ * This code should probably grow support for turning the cursor off and back
+ * on appropriately at the same time as we're turning the pipe off/on.
+ */
+static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_master_private *master_priv;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	bool enabled;
+
+	if (IS_IGDNG(dev))
+		igdng_crtc_dpms(crtc, mode);
+	else
+		i9xx_crtc_dpms(crtc, mode);
 
 	if (!dev->primary->master)
 		return;
@@ -947,6 +1370,12 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
 				  struct drm_display_mode *mode,
 				  struct drm_display_mode *adjusted_mode)
 {
+	struct drm_device *dev = crtc->dev;
+	if (IS_IGDNG(dev)) {
+		/* FDI link clock is fixed at 2.7G */
+		if (mode->clock * 3 > 27000 * 4)
+			return MODE_CLOCK_HIGH;
+	}
 	return true;
 }
 
@@ -1030,6 +1459,48 @@ static int intel_panel_fitter_pipe (struct drm_device *dev)
 	return 1;
 }
 
+struct fdi_m_n {
+	u32        tu;
+	u32        gmch_m;
+	u32        gmch_n;
+	u32        link_m;
+	u32        link_n;
+};
+
+static void
+fdi_reduce_ratio(u32 *num, u32 *den)
+{
+	while (*num > 0xffffff || *den > 0xffffff) {
+		*num >>= 1;
+		*den >>= 1;
+	}
+}
+
+#define DATA_N 0x800000
+#define LINK_N 0x80000
+
+static void
+igdng_compute_m_n(int bytes_per_pixel, int nlanes,
+		int pixel_clock, int link_clock,
+		struct fdi_m_n *m_n)
+{
+	u64 temp;
+
+	m_n->tu = 64; /* default size */
+
+	temp = (u64) DATA_N * pixel_clock;
+	temp = div_u64(temp, link_clock);
+	m_n->gmch_m = (temp * bytes_per_pixel) / nlanes;
+	m_n->gmch_n = DATA_N;
+	fdi_reduce_ratio(&m_n->gmch_m, &m_n->gmch_n);
+
+	temp = (u64) LINK_N * pixel_clock;
+	m_n->link_m = div_u64(temp, link_clock);
+	m_n->link_n = LINK_N;
+	fdi_reduce_ratio(&m_n->link_m, &m_n->link_n);
+}
+
+
 static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			       struct drm_display_mode *mode,
 			       struct drm_display_mode *adjusted_mode,
@@ -1063,6 +1534,16 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	struct drm_connector *connector;
 	const intel_limit_t *limit;
 	int ret;
+	struct fdi_m_n m_n = {0};
+	int data_m1_reg = (pipe == 0) ? PIPEA_DATA_M1 : PIPEB_DATA_M1;
+	int data_n1_reg = (pipe == 0) ? PIPEA_DATA_N1 : PIPEB_DATA_N1;
+	int link_m1_reg = (pipe == 0) ? PIPEA_LINK_M1 : PIPEB_LINK_M1;
+	int link_n1_reg = (pipe == 0) ? PIPEA_LINK_N1 : PIPEB_LINK_N1;
+	int pch_fp_reg = (pipe == 0) ? PCH_FPA0 : PCH_FPB0;
+	int pch_dpll_reg = (pipe == 0) ? PCH_DPLL_A : PCH_DPLL_B;
+	int fdi_rx_reg = (pipe == 0) ? FDI_RXA_CTL : FDI_RXB_CTL;
+	u32 temp;
+	int sdvo_pixel_multiply;
 
 	drm_vblank_pre_modeset(dev, pipe);
 
@@ -1101,6 +1582,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		DRM_DEBUG("using SSC reference clock of %d MHz\n", refclk / 1000);
 	} else if (IS_I9XX(dev)) {
 		refclk = 96000;
+		if (IS_IGDNG(dev))
+			refclk = 120000; /* 120Mhz refclk */
 	} else {
 		refclk = 48000;
 	}
@@ -1137,12 +1620,21 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		}
 	}
 
+	/* FDI link */
+	if (IS_IGDNG(dev))
+		igdng_compute_m_n(3, 4, /* lane num 4 */
+				adjusted_mode->clock,
+				270000, /* lane clock */
+				&m_n);
+
 	if (IS_IGD(dev))
 		fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2;
 	else
 		fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
 
-	dpll = DPLL_VGA_MODE_DIS;
+	if (!IS_IGDNG(dev))
+		dpll = DPLL_VGA_MODE_DIS;
+
 	if (IS_I9XX(dev)) {
 		if (is_lvds)
 			dpll |= DPLLB_MODE_LVDS;
@@ -1150,17 +1642,22 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= DPLLB_MODE_DAC_SERIAL;
 		if (is_sdvo) {
 			dpll |= DPLL_DVO_HIGH_SPEED;
-			if (IS_I945G(dev) || IS_I945GM(dev)) {
-				int sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+			sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+			if (IS_I945G(dev) || IS_I945GM(dev))
 				dpll |= (sdvo_pixel_multiply - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-			}
+			else if (IS_IGDNG(dev))
+				dpll |= (sdvo_pixel_multiply - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 		}
 
 		/* compute bitmask from p1 value */
 		if (IS_IGD(dev))
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_IGD;
-		else
+		else {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+			/* also FPA1 */
+			if (IS_IGDNG(dev))
+				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+		}
 		switch (clock.p2) {
 		case 5:
 			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
@@ -1175,7 +1672,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
 			break;
 		}
-		if (IS_I965G(dev))
+		if (IS_I965G(dev) && !IS_IGDNG(dev))
 			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
 	} else {
 		if (is_lvds) {
@@ -1207,10 +1704,14 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	/* Set up the display plane register */
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
 
-	if (pipe == 0)
-		dspcntr |= DISPPLANE_SEL_PIPE_A;
-	else
-		dspcntr |= DISPPLANE_SEL_PIPE_B;
+	/* IGDNG's plane is forced to pipe, bit 24 is to
+	   enable color space conversion */
+	if (!IS_IGDNG(dev)) {
+		if (pipe == 0)
+			dspcntr |= DISPPLANE_SEL_PIPE_A;
+		else
+			dspcntr |= DISPPLANE_SEL_PIPE_B;
+	}
 
 	if (pipe == 0 && !IS_I965G(dev)) {
 		/* Enable pixel doubling when the dot clock is > 90% of the (display)
@@ -1231,12 +1732,17 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 
 	/* Disable the panel fitter if it was on our pipe */
-	if (intel_panel_fitter_pipe(dev) == pipe)
+	if (!IS_IGDNG(dev) && intel_panel_fitter_pipe(dev) == pipe)
 		I915_WRITE(PFIT_CONTROL, 0);
 
 	DRM_DEBUG("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
+	/* assign to IGDNG registers */
+	if (IS_IGDNG(dev)) {
+		fp_reg = pch_fp_reg;
+		dpll_reg = pch_dpll_reg;
+	}
 
 	if (dpll & DPLL_VCO_ENABLE) {
 		I915_WRITE(fp_reg, fp);
@@ -1245,6 +1751,22 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		udelay(150);
 	}
 
+	if (IS_IGDNG(dev)) {
+		/* enable PCH clock reference source */
+		/* XXX need to change the setting for other outputs */
+		u32 temp;
+		temp = I915_READ(PCH_DREF_CONTROL);
+		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
+		temp |= DREF_NONSPREAD_CK505_ENABLE;
+		temp &= ~DREF_SSC_SOURCE_MASK;
+		temp |= DREF_SSC_SOURCE_ENABLE;
+		temp &= ~DREF_SSC1_ENABLE;
+		/* if no eDP, disable source output to CPU */
+		temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+		temp |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+	}
+
 	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
 	 * This is an exception to the general rule that mode_set doesn't turn
 	 * things on.
@@ -1276,8 +1798,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	/* Wait for the clocks to stabilize. */
 	udelay(150);
 
-	if (IS_I965G(dev)) {
-		int sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+	if (IS_I965G(dev) && !IS_IGDNG(dev)) {
+		sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
 		I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
 			   ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
 	} else {
@@ -1303,9 +1825,25 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	/* pipesrc and dspsize control the size that is scaled from, which should
 	 * always be the user's requested size.
 	 */
-	I915_WRITE(dspsize_reg, ((mode->vdisplay - 1) << 16) | (mode->hdisplay - 1));
-	I915_WRITE(dsppos_reg, 0);
+	if (!IS_IGDNG(dev)) {
+		I915_WRITE(dspsize_reg, ((mode->vdisplay - 1) << 16) |
+				(mode->hdisplay - 1));
+		I915_WRITE(dsppos_reg, 0);
+	}
 	I915_WRITE(pipesrc_reg, ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
+
+	if (IS_IGDNG(dev)) {
+		I915_WRITE(data_m1_reg, TU_SIZE(m_n.tu) | m_n.gmch_m);
+		I915_WRITE(data_n1_reg, TU_SIZE(m_n.tu) | m_n.gmch_n);
+		I915_WRITE(link_m1_reg, m_n.link_m);
+		I915_WRITE(link_n1_reg, m_n.link_n);
+
+		 /* enable FDI RX PLL too */
+		temp = I915_READ(fdi_rx_reg);
+		I915_WRITE(fdi_rx_reg, temp | FDI_RX_PLL_ENABLE);
+		udelay(200);
+	}
+
 	I915_WRITE(pipeconf_reg, pipeconf);
 	I915_READ(pipeconf_reg);
 
@@ -1336,6 +1874,11 @@ void intel_crtc_load_lut(struct drm_crtc *crtc)
 	if (!crtc->enabled)
 		return;
 
+	/* use legacy palette for IGDNG */
+	if (IS_IGDNG(dev))
+		palreg = (intel_crtc->pipe == 0) ? LGC_PALETTE_A :
+						   LGC_PALETTE_B;
+
 	for (i = 0; i < 256; i++) {
 		I915_WRITE(palreg + 4 * i,
 			   (intel_crtc->lut_r[i] << 16) |
@@ -1885,10 +2428,12 @@ static void intel_setup_outputs(struct drm_device *dev)
 	intel_crt_init(dev);
 
 	/* Set up integrated LVDS */
-	if (IS_MOBILE(dev) && !IS_I830(dev))
+	if (IS_MOBILE(dev) && !IS_I830(dev) && !IS_IGDNG(dev))
 		intel_lvds_init(dev);
 
-	if (IS_I9XX(dev)) {
+	if (IS_IGDNG(dev)) {
+		/* ignore for other outputs */
+	} else if (IS_I9XX(dev)) {
 		int found;
 		u32 reg;
 
@@ -1912,7 +2457,7 @@ static void intel_setup_outputs(struct drm_device *dev)
 	} else
 		intel_dvo_init(dev);
 
-	if (IS_I9XX(dev) && IS_MOBILE(dev))
+	if (IS_I9XX(dev) && IS_MOBILE(dev) && !IS_IGDNG(dev))
 		intel_tv_init(dev);
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-- 
cgit v0.10.2


From 30ad48b7334a2eb2edf22f6c91f7b3f22a22a837 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:43 +0800
Subject: drm/i915: Add HDMI support on IGDNG

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2cd6ba6..53cf6ef 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2432,7 +2432,22 @@ static void intel_setup_outputs(struct drm_device *dev)
 		intel_lvds_init(dev);
 
 	if (IS_IGDNG(dev)) {
-		/* ignore for other outputs */
+		int found;
+
+		if (I915_READ(HDMIB) & PORT_DETECTED) {
+			/* check SDVOB */
+			/* found = intel_sdvo_init(dev, HDMIB); */
+			found = 0;
+			if (!found)
+				intel_hdmi_init(dev, HDMIB);
+		}
+
+		if (I915_READ(HDMIC) & PORT_DETECTED)
+			intel_hdmi_init(dev, HDMIC);
+
+		if (I915_READ(HDMID) & PORT_DETECTED)
+			intel_hdmi_init(dev, HDMID);
+
 	} else if (IS_I9XX(dev)) {
 		int found;
 		u32 reg;
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index d0983bb..d874b0c 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -56,7 +56,8 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder,
 	sdvox = SDVO_ENCODING_HDMI |
 		SDVO_BORDER_ENABLE |
 		SDVO_VSYNC_ACTIVE_HIGH |
-		SDVO_HSYNC_ACTIVE_HIGH;
+		SDVO_HSYNC_ACTIVE_HIGH |
+		SDVO_NULL_PACKETS_DURING_VSYNC;
 
 	if (hdmi_priv->has_hdmi_sink)
 		sdvox |= SDVO_AUDIO_ENABLE;
@@ -145,6 +146,22 @@ intel_hdmi_sink_detect(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
+igdng_hdmi_detect(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv;
+
+	/* FIXME hotplug detect */
+
+	hdmi_priv->has_hdmi_sink = false;
+	intel_hdmi_sink_detect(connector);
+	if (hdmi_priv->has_hdmi_sink)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static enum drm_connector_status
 intel_hdmi_detect(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
@@ -153,6 +170,9 @@ intel_hdmi_detect(struct drm_connector *connector)
 	struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv;
 	u32 temp, bit;
 
+	if (IS_IGDNG(dev))
+		return igdng_hdmi_detect(connector);
+
 	temp = I915_READ(PORT_HOTPLUG_EN);
 
 	switch (hdmi_priv->sdvox_reg) {
@@ -268,8 +288,17 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg)
 	/* Set up the DDC bus. */
 	if (sdvox_reg == SDVOB)
 		intel_output->ddc_bus = intel_i2c_create(dev, GPIOE, "HDMIB");
-	else
+	else if (sdvox_reg == SDVOC)
 		intel_output->ddc_bus = intel_i2c_create(dev, GPIOD, "HDMIC");
+	else if (sdvox_reg == HDMIB)
+		intel_output->ddc_bus = intel_i2c_create(dev, PCH_GPIOE,
+								"HDMIB");
+	else if (sdvox_reg == HDMIC)
+		intel_output->ddc_bus = intel_i2c_create(dev, PCH_GPIOD,
+								"HDMIC");
+	else if (sdvox_reg == HDMID)
+		intel_output->ddc_bus = intel_i2c_create(dev, PCH_GPIOF,
+								"HDMID");
 
 	if (!intel_output->ddc_bus)
 		goto err_connector;
-- 
cgit v0.10.2


From 541998a18b72d2cac48b3369fa4540116ff3f0a8 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Jun 2009 15:38:44 +0800
Subject: drm/i915: Add LVDS support for IGDNG

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 53cf6ef..05bd97e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1542,6 +1542,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	int pch_fp_reg = (pipe == 0) ? PCH_FPA0 : PCH_FPB0;
 	int pch_dpll_reg = (pipe == 0) ? PCH_DPLL_A : PCH_DPLL_B;
 	int fdi_rx_reg = (pipe == 0) ? FDI_RXA_CTL : FDI_RXB_CTL;
+	int lvds_reg = LVDS;
 	u32 temp;
 	int sdvo_pixel_multiply;
 
@@ -1772,8 +1773,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	 * things on.
 	 */
 	if (is_lvds) {
-		u32 lvds = I915_READ(LVDS);
+		u32 lvds;
 
+		if (IS_IGDNG(dev))
+			lvds_reg = PCH_LVDS;
+
+		lvds = I915_READ(lvds_reg);
 		lvds |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP | LVDS_PIPEB_SELECT;
 		/* Set the B0-B3 data pairs corresponding to whether we're going to
 		 * set the DPLLs for dual-channel mode or not.
@@ -1788,8 +1793,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		 * panels behave in the two modes.
 		 */
 
-		I915_WRITE(LVDS, lvds);
-		I915_READ(LVDS);
+		I915_WRITE(lvds_reg, lvds);
+		I915_READ(lvds_reg);
 	}
 
 	I915_WRITE(fp_reg, fp);
@@ -2428,7 +2433,7 @@ static void intel_setup_outputs(struct drm_device *dev)
 	intel_crt_init(dev);
 
 	/* Set up integrated LVDS */
-	if (IS_MOBILE(dev) && !IS_I830(dev) && !IS_IGDNG(dev))
+	if (IS_MOBILE(dev) && !IS_I830(dev))
 		intel_lvds_init(dev);
 
 	if (IS_IGDNG(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 53731f0..eea3a54 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -45,10 +45,15 @@
 static void intel_lvds_set_backlight(struct drm_device *dev, int level)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 blc_pwm_ctl;
+	u32 blc_pwm_ctl, reg;
 
-	blc_pwm_ctl = I915_READ(BLC_PWM_CTL) & ~BACKLIGHT_DUTY_CYCLE_MASK;
-	I915_WRITE(BLC_PWM_CTL, (blc_pwm_ctl |
+	if (IS_IGDNG(dev))
+		reg = BLC_PWM_CPU_CTL;
+	else
+		reg = BLC_PWM_CTL;
+
+	blc_pwm_ctl = I915_READ(reg) & ~BACKLIGHT_DUTY_CYCLE_MASK;
+	I915_WRITE(reg, (blc_pwm_ctl |
 				 (level << BACKLIGHT_DUTY_CYCLE_SHIFT)));
 }
 
@@ -58,8 +63,14 @@ static void intel_lvds_set_backlight(struct drm_device *dev, int level)
 static u32 intel_lvds_get_max_backlight(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 reg;
+
+	if (IS_IGDNG(dev))
+		reg = BLC_PWM_PCH_CTL2;
+	else
+		reg = BLC_PWM_CTL;
 
-	return ((I915_READ(BLC_PWM_CTL) & BACKLIGHT_MODULATION_FREQ_MASK) >>
+	return ((I915_READ(reg) & BACKLIGHT_MODULATION_FREQ_MASK) >>
 		BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
 }
 
@@ -69,23 +80,31 @@ static u32 intel_lvds_get_max_backlight(struct drm_device *dev)
 static void intel_lvds_set_power(struct drm_device *dev, bool on)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 pp_status;
+	u32 pp_status, ctl_reg, status_reg;
+
+	if (IS_IGDNG(dev)) {
+		ctl_reg = PCH_PP_CONTROL;
+		status_reg = PCH_PP_STATUS;
+	} else {
+		ctl_reg = PP_CONTROL;
+		status_reg = PP_STATUS;
+	}
 
 	if (on) {
-		I915_WRITE(PP_CONTROL, I915_READ(PP_CONTROL) |
+		I915_WRITE(ctl_reg, I915_READ(ctl_reg) |
 			   POWER_TARGET_ON);
 		do {
-			pp_status = I915_READ(PP_STATUS);
+			pp_status = I915_READ(status_reg);
 		} while ((pp_status & PP_ON) == 0);
 
 		intel_lvds_set_backlight(dev, dev_priv->backlight_duty_cycle);
 	} else {
 		intel_lvds_set_backlight(dev, 0);
 
-		I915_WRITE(PP_CONTROL, I915_READ(PP_CONTROL) &
+		I915_WRITE(ctl_reg, I915_READ(ctl_reg) &
 			   ~POWER_TARGET_ON);
 		do {
-			pp_status = I915_READ(PP_STATUS);
+			pp_status = I915_READ(status_reg);
 		} while (pp_status & PP_ON);
 	}
 }
@@ -106,12 +125,28 @@ static void intel_lvds_save(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 pp_on_reg, pp_off_reg, pp_ctl_reg, pp_div_reg;
+	u32 pwm_ctl_reg;
+
+	if (IS_IGDNG(dev)) {
+		pp_on_reg = PCH_PP_ON_DELAYS;
+		pp_off_reg = PCH_PP_OFF_DELAYS;
+		pp_ctl_reg = PCH_PP_CONTROL;
+		pp_div_reg = PCH_PP_DIVISOR;
+		pwm_ctl_reg = BLC_PWM_CPU_CTL;
+	} else {
+		pp_on_reg = PP_ON_DELAYS;
+		pp_off_reg = PP_OFF_DELAYS;
+		pp_ctl_reg = PP_CONTROL;
+		pp_div_reg = PP_DIVISOR;
+		pwm_ctl_reg = BLC_PWM_CTL;
+	}
 
-	dev_priv->savePP_ON = I915_READ(PP_ON_DELAYS);
-	dev_priv->savePP_OFF = I915_READ(PP_OFF_DELAYS);
-	dev_priv->savePP_CONTROL = I915_READ(PP_CONTROL);
-	dev_priv->savePP_DIVISOR = I915_READ(PP_DIVISOR);
-	dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL);
+	dev_priv->savePP_ON = I915_READ(pp_on_reg);
+	dev_priv->savePP_OFF = I915_READ(pp_off_reg);
+	dev_priv->savePP_CONTROL = I915_READ(pp_ctl_reg);
+	dev_priv->savePP_DIVISOR = I915_READ(pp_div_reg);
+	dev_priv->saveBLC_PWM_CTL = I915_READ(pwm_ctl_reg);
 	dev_priv->backlight_duty_cycle = (dev_priv->saveBLC_PWM_CTL &
 				       BACKLIGHT_DUTY_CYCLE_MASK);
 
@@ -127,12 +162,28 @@ static void intel_lvds_restore(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 pp_on_reg, pp_off_reg, pp_ctl_reg, pp_div_reg;
+	u32 pwm_ctl_reg;
+
+	if (IS_IGDNG(dev)) {
+		pp_on_reg = PCH_PP_ON_DELAYS;
+		pp_off_reg = PCH_PP_OFF_DELAYS;
+		pp_ctl_reg = PCH_PP_CONTROL;
+		pp_div_reg = PCH_PP_DIVISOR;
+		pwm_ctl_reg = BLC_PWM_CPU_CTL;
+	} else {
+		pp_on_reg = PP_ON_DELAYS;
+		pp_off_reg = PP_OFF_DELAYS;
+		pp_ctl_reg = PP_CONTROL;
+		pp_div_reg = PP_DIVISOR;
+		pwm_ctl_reg = BLC_PWM_CTL;
+	}
 
-	I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL);
-	I915_WRITE(PP_ON_DELAYS, dev_priv->savePP_ON);
-	I915_WRITE(PP_OFF_DELAYS, dev_priv->savePP_OFF);
-	I915_WRITE(PP_DIVISOR, dev_priv->savePP_DIVISOR);
-	I915_WRITE(PP_CONTROL, dev_priv->savePP_CONTROL);
+	I915_WRITE(pwm_ctl_reg, dev_priv->saveBLC_PWM_CTL);
+	I915_WRITE(pp_on_reg, dev_priv->savePP_ON);
+	I915_WRITE(pp_off_reg, dev_priv->savePP_OFF);
+	I915_WRITE(pp_div_reg, dev_priv->savePP_DIVISOR);
+	I915_WRITE(pp_ctl_reg, dev_priv->savePP_CONTROL);
 	if (dev_priv->savePP_CONTROL & POWER_TARGET_ON)
 		intel_lvds_set_power(dev, true);
 	else
@@ -216,8 +267,14 @@ static void intel_lvds_prepare(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 reg;
 
-	dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL);
+	if (IS_IGDNG(dev))
+		reg = BLC_PWM_CPU_CTL;
+	else
+		reg = BLC_PWM_CTL;
+
+	dev_priv->saveBLC_PWM_CTL = I915_READ(reg);
 	dev_priv->backlight_duty_cycle = (dev_priv->saveBLC_PWM_CTL &
 				       BACKLIGHT_DUTY_CYCLE_MASK);
 
@@ -251,6 +308,10 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
 	 * settings.
 	 */
 
+	/* No panel fitting yet, fixme */
+	if (IS_IGDNG(dev))
+		return;
+
 	/*
 	 * Enable automatic panel scaling so that non-native modes fill the
 	 * screen.  Should be enabled before the pipe is enabled, according to
@@ -446,12 +507,18 @@ void intel_lvds_init(struct drm_device *dev)
 	struct drm_display_mode *scan; /* *modes, *bios_mode; */
 	struct drm_crtc *crtc;
 	u32 lvds;
-	int pipe;
+	int pipe, gpio = GPIOC;
 
 	/* Skip init on machines we know falsely report LVDS */
 	if (dmi_check_system(intel_no_lvds))
 		return;
 
+	if (IS_IGDNG(dev)) {
+		if ((I915_READ(PCH_LVDS) & LVDS_DETECTED) == 0)
+			return;
+		gpio = PCH_GPIOC;
+	}
+
 	intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
 	if (!intel_output) {
 		return;
@@ -486,7 +553,7 @@ void intel_lvds_init(struct drm_device *dev)
 	 */
 
 	/* Set up the DDC bus. */
-	intel_output->ddc_bus = intel_i2c_create(dev, GPIOC, "LVDSDDC_C");
+	intel_output->ddc_bus = intel_i2c_create(dev, gpio, "LVDSDDC_C");
 	if (!intel_output->ddc_bus) {
 		dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
 			   "failed.\n");
@@ -528,6 +595,11 @@ void intel_lvds_init(struct drm_device *dev)
 	 * on.  If so, assume that whatever is currently programmed is the
 	 * correct mode.
 	 */
+
+	/* IGDNG: FIXME if still fail, not try pipe mode now */
+	if (IS_IGDNG(dev))
+		goto failed;
+
 	lvds = I915_READ(LVDS);
 	pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0;
 	crtc = intel_get_crtc_from_pipe(dev, pipe);
@@ -546,6 +618,17 @@ void intel_lvds_init(struct drm_device *dev)
 		goto failed;
 
 out:
+	if (IS_IGDNG(dev)) {
+		u32 pwm;
+		/* make sure PWM is enabled */
+		pwm = I915_READ(BLC_PWM_CPU_CTL2);
+		pwm |= (PWM_ENABLE | PWM_PIPE_B);
+		I915_WRITE(BLC_PWM_CPU_CTL2, pwm);
+
+		pwm = I915_READ(BLC_PWM_PCH_CTL1);
+		pwm |= PWM_PCH_ENABLE;
+		I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
+	}
 	drm_sysfs_connector_add(connector);
 	return;
 
-- 
cgit v0.10.2


From cf1f45744c6fa3501e0a6f0ddc418f0ef27e725b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 13:27:02 +0200
Subject: perf record, top: Implement --freq

Support frequency-based profiling and make it the default.

(Also add a Hz printout in perf top.)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 7f2d7ce..e2301f3 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -27,6 +27,7 @@ static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 static int			nr_cpus				= 0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			= 128;
+static int			freq				= 0;
 static int			output;
 static const char		*output_name			= "perf.data";
 static int			group				= 0;
@@ -347,9 +348,10 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	attr.config		= event_id[counter];
 	attr.sample_period	= event_count[counter];
 	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+	attr.freq		= freq;
 	attr.mmap		= track;
 	attr.comm		= track;
-	attr.inherit	= (cpu < 0) && inherit;
+	attr.inherit		= (cpu < 0) && inherit;
 
 	track = 0; /* only the first counter needs these */
 
@@ -520,6 +522,8 @@ static const struct option options[] = {
 		    "output file name"),
 	OPT_BOOLEAN('i', "inherit", &inherit,
 		    "child tasks inherit counters"),
+	OPT_INTEGER('F', "freq", &freq,
+		    "profile at this frequency"),
 	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
 		    "number of mmap data pages"),
 	OPT_END()
@@ -540,6 +544,10 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 		event_id[0] = 0;
 	}
 
+	if (freq) {
+		default_interval = freq;
+		freq = 1;
+	}
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (event_count[counter])
 			continue;
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 28cbde4..2fee595 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -74,8 +74,8 @@ static int			nr_cpus				=  0;
 static unsigned int		realtime_prio			=  0;
 static int			group				=  0;
 static unsigned int		page_size;
-static unsigned int		mmap_pages			=  16;
-static int			freq				= 0;
+static unsigned int		mmap_pages			= 16;
+static int			freq				=  0;
 
 static char			*sym_filter;
 static unsigned long		filter_start;
@@ -212,8 +212,13 @@ static void print_sym_table(void)
 		events_per_sec,
 		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)));
 
-	if (nr_counters == 1)
-		printf("%d ", event_count[0]);
+	if (nr_counters == 1) {
+		printf("%d", event_count[0]);
+		if (freq)
+			printf("Hz ");
+		else
+			printf(" ");
+	}
 
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (counter)
-- 
cgit v0.10.2


From f7b6eb3fa07269da20dbbde8ba37a0273fdbd9c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 14:04:51 +0200
Subject: x86: Set context.vdso before installing the mapping

In order to make arch_vma_name() work from inside
install_special_mapping() we need to set the context.vdso
before calling it.

( This is needed for performance counters to be able to track
  this special executable area. )

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 1241f11..58bc00f 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -338,6 +338,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		}
 	}
 
+	current->mm->context.vdso = (void *)addr;
+
 	if (compat_uses_vma || !compat) {
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -358,11 +360,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 			goto up_fail;
 	}
 
-	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return =
 		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
 
   up_fail:
+	if (ret)
+		current->mm->context.vdso = NULL;
+
 	up_write(&mm->mmap_sem);
 
 	return ret;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7133cdf..93b7a29 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -115,15 +115,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		goto up_fail;
 	}
 
+	current->mm->context.vdso = (void *)addr;
+
 	ret = install_special_mapping(mm, addr, vdso_size,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
 				      VM_ALWAYSDUMP,
 				      vdso_pages);
-	if (ret)
+	if (ret) {
+		current->mm->context.vdso = NULL;
 		goto up_fail;
+	}
 
-	current->mm->context.vdso = (void *)addr;
 up_fail:
 	up_write(&mm->mmap_sem);
 	return ret;
-- 
cgit v0.10.2


From 089dd79db9264dc0da602bad45d42f1b3e7d1e07 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 14:04:55 +0200
Subject: perf_counter: Generate mmap events for install_special_mapping()

In order to track the vdso also generate mmap events for
install_special_mapping().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6ca403a..40dc0e2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -617,8 +617,13 @@ static inline int is_software_counter(struct perf_counter *counter)
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 
-extern void perf_counter_mmap(unsigned long addr, unsigned long len,
-			      unsigned long pgoff, struct file *file);
+extern void __perf_counter_mmap(struct vm_area_struct *vma);
+
+static inline void perf_counter_mmap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__perf_counter_mmap(vma);
+}
 
 extern void perf_counter_comm(struct task_struct *tsk);
 extern void perf_counter_fork(struct task_struct *tsk);
@@ -668,10 +673,7 @@ static inline void
 perf_swcounter_event(u32 event, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 
-static inline void
-perf_counter_mmap(unsigned long addr, unsigned long len,
-		  unsigned long pgoff, struct file *file)		{ }
-
+static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a5d3e2a..37a5a24 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2255,7 +2255,7 @@ out:
 }
 
 static void perf_output_copy(struct perf_output_handle *handle,
-			     void *buf, unsigned int len)
+			     const void *buf, unsigned int len)
 {
 	unsigned int pages_mask;
 	unsigned int offset;
@@ -2681,9 +2681,10 @@ void perf_counter_comm(struct task_struct *task)
  */
 
 struct perf_mmap_event {
-	struct file	*file;
-	char		*file_name;
-	int		file_size;
+	struct vm_area_struct	*vma;
+
+	const char		*file_name;
+	int			file_size;
 
 	struct {
 		struct perf_event_header	header;
@@ -2744,11 +2745,12 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
-	struct file *file = mmap_event->file;
+	struct vm_area_struct *vma = mmap_event->vma;
+	struct file *file = vma->vm_file;
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
-	char *name;
+	const char *name;
 
 	if (file) {
 		buf = kzalloc(PATH_MAX, GFP_KERNEL);
@@ -2762,6 +2764,15 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 			goto got_name;
 		}
 	} else {
+		name = arch_vma_name(mmap_event->vma);
+		if (name)
+			goto got_name;
+
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		}
+
 		name = strncpy(tmp, "//anon", sizeof(tmp));
 		goto got_name;
 	}
@@ -2791,8 +2802,7 @@ got_name:
 	kfree(buf);
 }
 
-void perf_counter_mmap(unsigned long addr, unsigned long len,
-		       unsigned long pgoff, struct file *file)
+void __perf_counter_mmap(struct vm_area_struct *vma)
 {
 	struct perf_mmap_event mmap_event;
 
@@ -2800,12 +2810,12 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 		return;
 
 	mmap_event = (struct perf_mmap_event){
-		.file   = file,
+		.vma	= vma,
 		.event  = {
 			.header = { .type = PERF_EVENT_MMAP, },
-			.start  = addr,
-			.len    = len,
-			.pgoff  = pgoff,
+			.start  = vma->vm_start,
+			.len    = vma->vm_end - vma->vm_start,
+			.pgoff  = vma->vm_pgoff,
 		},
 	};
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 6451ce2..8101de4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1220,8 +1220,7 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
-	if (vm_flags & VM_EXEC)
-		perf_counter_mmap(addr, len, pgoff, file);
+	perf_counter_mmap(vma);
 
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
@@ -2309,6 +2308,8 @@ int install_special_mapping(struct mm_struct *mm,
 
 	mm->total_vm += len >> PAGE_SHIFT;
 
+	perf_counter_mmap(vma);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From fc54db5105d01ad691a7d747064c7890e17f936c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 14:04:59 +0200
Subject: perf report: Deal with maps

In order to deal with [vdso] maps generalize the ip->symbol path
a bit and allow to override some bits with custom functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index eb5424f..9783d1e 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -79,6 +79,7 @@ typedef union event_union {
 
 static LIST_HEAD(dsos);
 static struct dso *kernel_dso;
+static struct dso *vdso;
 
 static void dsos__add(struct dso *dso)
 {
@@ -136,6 +137,11 @@ static void dsos__fprintf(FILE *fp)
 		dso__fprintf(pos, fp);
 }
 
+static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
+{
+	return dso__find_symbol(kernel_dso, ip);
+}
+
 static int load_kernel(void)
 {
 	int err;
@@ -151,6 +157,14 @@ static int load_kernel(void)
 	} else
 		dsos__add(kernel_dso);
 
+	vdso = dso__new("[vdso]", 0);
+	if (!vdso)
+		return -1;
+
+	vdso->find_symbol = vdso__find_symbol;
+
+	dsos__add(vdso);
+
 	return err;
 }
 
@@ -173,9 +187,20 @@ struct map {
 	uint64_t	 start;
 	uint64_t	 end;
 	uint64_t	 pgoff;
+	uint64_t	 (*map_ip)(struct map *, uint64_t);
 	struct dso	 *dso;
 };
 
+static uint64_t map__map_ip(struct map *map, uint64_t ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
+{
+	return ip;
+}
+
 static struct map *map__new(struct mmap_event *event)
 {
 	struct map *self = malloc(sizeof(*self));
@@ -201,6 +226,11 @@ static struct map *map__new(struct mmap_event *event)
 		self->dso = dsos__findnew(filename);
 		if (self->dso == NULL)
 			goto out_delete;
+
+		if (self->dso == vdso)
+			self->map_ip = vdso__map_ip;
+		else
+			self->map_ip = map__map_ip;
 	}
 	return self;
 out_delete:
@@ -917,8 +947,8 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 
 		map = thread__find_map(thread, ip);
 		if (map != NULL) {
+			ip = map->map_ip(map, ip);
 			dso = map->dso;
-			ip -= map->start + map->pgoff;
 		} else {
 			/*
 			 * If this is outside of all known maps,
@@ -938,7 +968,10 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	}
 
 	if (show & show_mask) {
-		struct symbol *sym = dso__find_symbol(dso, ip);
+		struct symbol *sym = NULL;
+
+		if (dso)
+			sym = dso->find_symbol(dso, ip);
 
 		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
 			fprintf(stderr,
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index 15d5cf9..a06bbfba 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -45,6 +45,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
 		strcpy(self->name, name);
 		self->syms = RB_ROOT;
 		self->sym_priv_size = sym_priv_size;
+		self->find_symbol = dso__find_symbol;
 	}
 
 	return self;
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
index 8dd8522..e23cc31 100644
--- a/Documentation/perf_counter/util/symbol.h
+++ b/Documentation/perf_counter/util/symbol.h
@@ -16,6 +16,7 @@ struct dso {
 	struct list_head node;
 	struct rb_root	 syms;
 	unsigned int	 sym_priv_size;
+	struct symbol    *(*find_symbol)(struct dso *, uint64_t ip);
 	char		 name[0];
 };
 
-- 
cgit v0.10.2


From 8edd4286f99f78fe07fe9196e69d5643da86cada Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 14:13:18 +0200
Subject: perf report: Display user/kernel differentiator

Before:

    25.96%  copy_user_generic_string
    15.23%  two_op
    15.19%  one_op
     6.92%  enough_duration
     1.23%  alloc_pages_current
     1.14%  acpi_os_read_port
     1.08%  _spin_lock

After:

    25.96%  [k] copy_user_generic_string
    15.23%  [.] two_op
    15.19%  [.] one_op
     6.92%  [.] enough_duration
     1.23%  [k] alloc_pages_current
     1.14%  [k] acpi_os_read_port
     1.08%  [k] _spin_lock

The '[k]' differentiator is a quick clue that it's a kernel symbol,
without having to bring in the full dso column.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 9783d1e..ca303fd 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -504,7 +504,7 @@ sort__comm_print(FILE *fp, struct hist_entry *self)
 }
 
 static struct sort_entry sort_comm = {
-	.header 	= "         Command",
+	.header		= "         Command",
 	.cmp		= sort__comm_cmp,
 	.collapse	= sort__comm_collapse,
 	.print		= sort__comm_print,
@@ -569,10 +569,12 @@ sort__sym_print(FILE *fp, struct hist_entry *self)
 	if (verbose)
 		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
 
-	if (self->sym)
-		ret += fprintf(fp, "%s", self->sym->name);
-	else
+	if (self->sym) {
+		ret += fprintf(fp, "[%c] %s",
+			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
+	} else {
 		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+	}
 
 	return ret;
 }
@@ -586,9 +588,9 @@ static struct sort_entry sort_sym = {
 static int sort__need_collapse = 0;
 
 struct sort_dimension {
-	char *name;
-	struct sort_entry *entry;
-	int taken;
+	char			*name;
+	struct sort_entry	*entry;
+	int			taken;
 };
 
 static struct sort_dimension sort_dimensions[] = {
-- 
cgit v0.10.2


From 2debbc836696f2a815d02630230584a1754a5022 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 14:29:10 +0200
Subject: perf_counter tools: Clarify events/samples naming

A number of places said 'events' while they should say 'samples'.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index e2301f3..d4ad3057 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -65,7 +65,7 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 	return head;
 }
 
-static long events;
+static long samples;
 static struct timeval last_read, this_read;
 
 static __u64 bytes_written;
@@ -83,7 +83,7 @@ static void mmap_read(struct mmap_data *md)
 
 	/*
 	 * If we're further behind than half the buffer, there's a chance
-	 * the writer will bite our tail and screw up the events under us.
+	 * the writer will bite our tail and mess up the samples under us.
 	 *
 	 * If we somehow ended up ahead of the head, we got messed up.
 	 *
@@ -109,7 +109,7 @@ static void mmap_read(struct mmap_data *md)
 	last_read = this_read;
 
 	if (old != head)
-		events++;
+		samples++;
 
 	size = head - old;
 
@@ -257,7 +257,7 @@ out_failure:
 	exit(EXIT_FAILURE);
 }
 
-static void pid_synthesize_mmap_events(pid_t pid)
+static void pid_synthesize_mmap_samples(pid_t pid)
 {
 	char filename[PATH_MAX];
 	FILE *fp;
@@ -315,7 +315,7 @@ static void pid_synthesize_mmap_events(pid_t pid)
 	fclose(fp);
 }
 
-static void synthesize_events(void)
+static void synthesize_samples(void)
 {
 	DIR *proc;
 	struct dirent dirent, *next;
@@ -331,7 +331,7 @@ static void synthesize_events(void)
 			continue;
 
 		pid_synthesize_comm_event(pid, 1);
-		pid_synthesize_mmap_events(pid);
+		pid_synthesize_mmap_samples(pid);
 	}
 
 	closedir(proc);
@@ -396,7 +396,7 @@ static void open_counters(int cpu, pid_t pid)
 
 	if (pid > 0) {
 		pid_synthesize_comm_event(pid, 0);
-		pid_synthesize_mmap_events(pid);
+		pid_synthesize_mmap_samples(pid);
 	}
 
 	group_fd = -1;
@@ -469,17 +469,17 @@ static int __cmd_record(int argc, const char **argv)
 	}
 
 	if (system_wide)
-		synthesize_events();
+		synthesize_samples();
 
 	while (!done) {
-		int hits = events;
+		int hits = samples;
 
 		for (i = 0; i < nr_cpu; i++) {
 			for (counter = 0; counter < nr_counters; counter++)
 				mmap_read(&mmap_array[i][counter]);
 		}
 
-		if (hits == events)
+		if (hits == samples)
 			ret = poll(event_array, nr_poll, 100);
 	}
 
@@ -487,7 +487,7 @@ static int __cmd_record(int argc, const char **argv)
 	 * Approximate RIP event size: 24 bytes.
 	 */
 	fprintf(stderr,
-		"[ perf record: Captured and wrote %.3f MB %s (~%lld events) ]\n",
+		"[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
 		(double)bytes_written / 1024.0 / 1024.0,
 		output_name,
 		bytes_written / 24);
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index ca303fd..5af105c 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -857,7 +857,7 @@ static size_t output__fprintf(FILE *fp, uint64_t total_samples)
 
 	fprintf(fp, "\n");
 	fprintf(fp, "#\n");
-	fprintf(fp, "# (%Ld profiler events)\n", (__u64)total_samples);
+	fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
 	fprintf(fp, "#\n");
 
 	fprintf(fp, "# Overhead");
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 2fee595..ff7e13c 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -137,8 +137,8 @@ static double sym_weight(const struct sym_entry *sym)
 	return weight;
 }
 
-static long			events;
-static long			userspace_events;
+static long			samples;
+static long			userspace_samples;
 static const char		CONSOLE_CLEAR[] = "[H[2J";
 
 static void __list_insert_active_sym(struct sym_entry *syme)
@@ -177,14 +177,14 @@ static void print_sym_table(void)
 {
 	int printed = 0, j;
 	int counter;
-	float events_per_sec = events/delay_secs;
-	float kevents_per_sec = (events-userspace_events)/delay_secs;
-	float sum_kevents = 0.0;
+	float samples_per_sec = samples/delay_secs;
+	float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
+	float sum_ksamples = 0.0;
 	struct sym_entry *syme, *n;
 	struct rb_root tmp = RB_ROOT;
 	struct rb_node *nd;
 
-	events = userspace_events = 0;
+	samples = userspace_samples = 0;
 
 	/* Sort the active symbols */
 	pthread_mutex_lock(&active_symbols_lock);
@@ -196,7 +196,7 @@ static void print_sym_table(void)
 		if (syme->snap_count != 0) {
 			syme->weight = sym_weight(syme);
 			rb_insert_active_sym(&tmp, syme);
-			sum_kevents += syme->snap_count;
+			sum_ksamples += syme->snap_count;
 
 			for (j = 0; j < nr_counters; j++)
 				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
@@ -209,8 +209,8 @@ static void print_sym_table(void)
 	printf(
 "------------------------------------------------------------------------------\n");
 	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
-		events_per_sec,
-		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)));
+		samples_per_sec,
+		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
 
 	if (nr_counters == 1) {
 		printf("%d", event_count[0]);
@@ -246,12 +246,12 @@ static void print_sym_table(void)
 	printf("------------------------------------------------------------------------------\n\n");
 
 	if (nr_counters == 1)
-		printf("             events    pcnt");
+		printf("             samples    pcnt");
 	else
-		printf("  weight     events    pcnt");
+		printf("  weight     samples    pcnt");
 
 	printf("         RIP          kernel function\n"
-	       	       "  ______     ______   _____   ________________   _______________\n\n"
+	       	       "  ______     _______   _____   ________________   _______________\n\n"
 	);
 
 	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
@@ -263,8 +263,8 @@ static void print_sym_table(void)
 		if (++printed > print_entries || syme->snap_count < count_filter)
 			continue;
 
-		pcnt = 100.0 - (100.0 * ((sum_kevents - syme->snap_count) /
-					 sum_kevents));
+		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
+					 sum_ksamples));
 
 		/*
 		 * We color high-overhead entries in red, low-overhead
@@ -276,9 +276,9 @@ static void print_sym_table(void)
 			color = PERF_COLOR_GREEN;
 
 		if (nr_counters == 1)
-			printf("%19.2f - ", syme->weight);
+			printf("%20.2f - ", syme->weight);
 		else
-			printf("%8.1f %10ld - ", syme->weight, syme->snap_count);
+			printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
 
 		color_fprintf(stdout, color, "%4.1f%%", pcnt);
 		printf(" - %016llx : %s\n", sym->start, sym->name);
@@ -318,7 +318,7 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
 		return 1;
 
 	syme = dso__sym_priv(self, sym);
-	/* Tag events to be skipped. */
+	/* Tag samples to be skipped. */
 	if (!strcmp("default_idle", name) ||
 	    !strcmp("cpu_idle", name) ||
 	    !strcmp("enter_idle", name) ||
@@ -405,15 +405,15 @@ static void record_ip(uint64_t ip, int counter)
 		}
 	}
 
-	events--;
+	samples--;
 }
 
 static void process_event(uint64_t ip, int counter)
 {
-	events++;
+	samples++;
 
 	if (ip < min_ip || ip > max_ip) {
-		userspace_events++;
+		userspace_samples++;
 		return;
 	}
 
@@ -451,7 +451,7 @@ static void mmap_read(struct mmap_data *md)
 
 	/*
 	 * If we're further behind than half the buffer, there's a chance
-	 * the writer will bite our tail and screw up the events under us.
+	 * the writer will bite our tail and mess up the samples under us.
 	 *
 	 * If we somehow ended up ahead of the head, we got messed up.
 	 *
@@ -608,14 +608,14 @@ static int __cmd_top(void)
 	}
 
 	while (1) {
-		int hits = events;
+		int hits = samples;
 
 		for (i = 0; i < nr_cpus; i++) {
 			for (counter = 0; counter < nr_counters; counter++)
 				mmap_read(&mmap_array[i][counter]);
 		}
 
-		if (hits == events)
+		if (hits == samples)
 			ret = poll(event_array, nr_poll, 100);
 	}
 
-- 
cgit v0.10.2


From cb66c692d1ae257f32dc7f6085cf9cb9f2f6bab8 Mon Sep 17 00:00:00 2001
From: Ma Ling <ling.ma@intel.com>
Date: Sun, 31 May 2009 16:58:32 +0800
Subject: drm/i915: Set correct TV detection voltage level override values

We detect TV connect status by setting DAC voltage level override
values as 0.7 voltage for DAC_A/B/C. The corresponding 2-bits shold be 0x2,
In order correctly to set last bit as 0, at first we must clean it.

It fixed freedesktop.org bug #21204

Signed-off-by: Ma Ling <ling.ma@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 99681cf..79df9e9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -958,15 +958,15 @@
 # define DAC_A_1_3_V			(0 << 4)
 # define DAC_A_1_1_V			(1 << 4)
 # define DAC_A_0_7_V			(2 << 4)
-# define DAC_A_OFF			(3 << 4)
+# define DAC_A_MASK			(3 << 4)
 # define DAC_B_1_3_V			(0 << 2)
 # define DAC_B_1_1_V			(1 << 2)
 # define DAC_B_0_7_V			(2 << 2)
-# define DAC_B_OFF			(3 << 2)
+# define DAC_B_MASK			(3 << 2)
 # define DAC_C_1_3_V			(0 << 0)
 # define DAC_C_1_1_V			(1 << 0)
 # define DAC_C_0_7_V			(2 << 0)
-# define DAC_C_OFF			(3 << 0)
+# define DAC_C_MASK			(3 << 0)
 
 /**
  * CSC coefficients are stored in a floating point format with 9 bits of
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index d2c3298..c7d9ef0 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1392,6 +1392,9 @@ intel_tv_detect_type (struct drm_crtc *crtc, struct intel_output *intel_output)
 		tv_ctl &= ~TV_TEST_MODE_MASK;
 		tv_ctl |= TV_TEST_MODE_MONITOR_DETECT;
 		tv_dac &= ~TVDAC_SENSE_MASK;
+		tv_dac &= ~DAC_A_MASK;
+		tv_dac &= ~DAC_B_MASK;
+		tv_dac &= ~DAC_C_MASK;
 		tv_dac |= (TVDAC_STATE_CHG_EN |
 			   TVDAC_A_SENSE_CTL |
 			   TVDAC_B_SENSE_CTL |
-- 
cgit v0.10.2


From 2245fda810f870dce9b030e6aa604320abba53a5 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sat, 30 May 2009 20:42:29 -0700
Subject: drm/i915: Don't trim cursor addresses to 11 bits

We can safely assume that cursor addresses will not extend beyond the
addressable screen dimensions; setting the additional bits is harmless in
any case.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 05bd97e..c5c4582 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2012,16 +2012,16 @@ static int intel_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	uint32_t adder;
 
 	if (x < 0) {
-		temp |= (CURSOR_POS_SIGN << CURSOR_X_SHIFT);
+		temp |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
 		x = -x;
 	}
 	if (y < 0) {
-		temp |= (CURSOR_POS_SIGN << CURSOR_Y_SHIFT);
+		temp |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
 		y = -y;
 	}
 
-	temp |= ((x & CURSOR_POS_MASK) << CURSOR_X_SHIFT);
-	temp |= ((y & CURSOR_POS_MASK) << CURSOR_Y_SHIFT);
+	temp |= x << CURSOR_X_SHIFT;
+	temp |= y << CURSOR_Y_SHIFT;
 
 	adder = intel_crtc->cursor_addr;
 	I915_WRITE((pipe == 0) ? CURAPOS : CURBPOS, temp);
-- 
cgit v0.10.2


From 040d87f15a0129242463d3ed7c48381505f596e2 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sat, 30 May 2009 20:42:33 -0700
Subject: drm/i915: Add Display Port register defines

This adds the register definitions for the display port enable register
along with those for the GMCH and Link M/N ratios required to drive display
port outputs.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 79df9e9..f6237a0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -638,8 +638,11 @@
 /* Hotplug control (945+ only) */
 #define PORT_HOTPLUG_EN		0x61110
 #define   HDMIB_HOTPLUG_INT_EN			(1 << 29)
+#define   DPB_HOTPLUG_INT_EN			(1 << 29)
 #define   HDMIC_HOTPLUG_INT_EN			(1 << 28)
+#define   DPC_HOTPLUG_INT_EN			(1 << 28)
 #define   HDMID_HOTPLUG_INT_EN			(1 << 27)
+#define   DPD_HOTPLUG_INT_EN			(1 << 27)
 #define   SDVOB_HOTPLUG_INT_EN			(1 << 26)
 #define   SDVOC_HOTPLUG_INT_EN			(1 << 25)
 #define   TV_HOTPLUG_INT_EN			(1 << 18)
@@ -672,8 +675,11 @@
 
 #define PORT_HOTPLUG_STAT	0x61114
 #define   HDMIB_HOTPLUG_INT_STATUS		(1 << 29)
+#define   DPB_HOTPLUG_INT_STATUS		(1 << 29)
 #define   HDMIC_HOTPLUG_INT_STATUS		(1 << 28)
+#define   DPC_HOTPLUG_INT_STATUS		(1 << 28)
 #define   HDMID_HOTPLUG_INT_STATUS		(1 << 27)
+#define   DPD_HOTPLUG_INT_STATUS		(1 << 27)
 #define   CRT_HOTPLUG_INT_STATUS		(1 << 11)
 #define   TV_HOTPLUG_INT_STATUS			(1 << 10)
 #define   CRT_HOTPLUG_MONITOR_MASK		(3 << 8)
@@ -1335,6 +1341,163 @@
 #define TV_V_CHROMA_0		0x68400
 #define TV_V_CHROMA_42		0x684a8
 
+/* Display Port */
+#define DP_B				0x64100
+#define DP_C				0x64200
+#define DP_D				0x64300
+
+#define   DP_PORT_EN			(1 << 31)
+#define   DP_PIPEB_SELECT		(1 << 30)
+
+/* Link training mode - select a suitable mode for each stage */
+#define   DP_LINK_TRAIN_PAT_1		(0 << 28)
+#define   DP_LINK_TRAIN_PAT_2		(1 << 28)
+#define   DP_LINK_TRAIN_PAT_IDLE	(2 << 28)
+#define   DP_LINK_TRAIN_OFF		(3 << 28)
+#define   DP_LINK_TRAIN_MASK		(3 << 28)
+#define   DP_LINK_TRAIN_SHIFT		28
+
+/* Signal voltages. These are mostly controlled by the other end */
+#define   DP_VOLTAGE_0_4		(0 << 25)
+#define   DP_VOLTAGE_0_6		(1 << 25)
+#define   DP_VOLTAGE_0_8		(2 << 25)
+#define   DP_VOLTAGE_1_2		(3 << 25)
+#define   DP_VOLTAGE_MASK		(7 << 25)
+#define   DP_VOLTAGE_SHIFT		25
+
+/* Signal pre-emphasis levels, like voltages, the other end tells us what
+ * they want
+ */
+#define   DP_PRE_EMPHASIS_0		(0 << 22)
+#define   DP_PRE_EMPHASIS_3_5		(1 << 22)
+#define   DP_PRE_EMPHASIS_6		(2 << 22)
+#define   DP_PRE_EMPHASIS_9_5		(3 << 22)
+#define   DP_PRE_EMPHASIS_MASK		(7 << 22)
+#define   DP_PRE_EMPHASIS_SHIFT		22
+
+/* How many wires to use. I guess 3 was too hard */
+#define   DP_PORT_WIDTH_1		(0 << 19)
+#define   DP_PORT_WIDTH_2		(1 << 19)
+#define   DP_PORT_WIDTH_4		(3 << 19)
+#define   DP_PORT_WIDTH_MASK		(7 << 19)
+
+/* Mystic DPCD version 1.1 special mode */
+#define   DP_ENHANCED_FRAMING		(1 << 18)
+
+/** locked once port is enabled */
+#define   DP_PORT_REVERSAL		(1 << 15)
+
+/** sends the clock on lane 15 of the PEG for debug */
+#define   DP_CLOCK_OUTPUT_ENABLE	(1 << 13)
+
+#define   DP_SCRAMBLING_DISABLE		(1 << 12)
+
+/** limit RGB values to avoid confusing TVs */
+#define   DP_COLOR_RANGE_16_235		(1 << 8)
+
+/** Turn on the audio link */
+#define   DP_AUDIO_OUTPUT_ENABLE	(1 << 6)
+
+/** vs and hs sync polarity */
+#define   DP_SYNC_VS_HIGH		(1 << 4)
+#define   DP_SYNC_HS_HIGH		(1 << 3)
+
+/** A fantasy */
+#define   DP_DETECTED			(1 << 2)
+
+/** The aux channel provides a way to talk to the
+ * signal sink for DDC etc. Max packet size supported
+ * is 20 bytes in each direction, hence the 5 fixed
+ * data registers
+ */
+#define DPB_AUX_CH_CTL			0x64110
+#define DPB_AUX_CH_DATA1		0x64114
+#define DPB_AUX_CH_DATA2		0x64118
+#define DPB_AUX_CH_DATA3		0x6411c
+#define DPB_AUX_CH_DATA4		0x64120
+#define DPB_AUX_CH_DATA5		0x64124
+
+#define DPC_AUX_CH_CTL			0x64210
+#define DPC_AUX_CH_DATA1		0x64214
+#define DPC_AUX_CH_DATA2		0x64218
+#define DPC_AUX_CH_DATA3		0x6421c
+#define DPC_AUX_CH_DATA4		0x64220
+#define DPC_AUX_CH_DATA5		0x64224
+
+#define DPD_AUX_CH_CTL			0x64310
+#define DPD_AUX_CH_DATA1		0x64314
+#define DPD_AUX_CH_DATA2		0x64318
+#define DPD_AUX_CH_DATA3		0x6431c
+#define DPD_AUX_CH_DATA4		0x64320
+#define DPD_AUX_CH_DATA5		0x64324
+
+#define   DP_AUX_CH_CTL_SEND_BUSY	    (1 << 31)
+#define   DP_AUX_CH_CTL_DONE		    (1 << 30)
+#define   DP_AUX_CH_CTL_INTERRUPT	    (1 << 29)
+#define   DP_AUX_CH_CTL_TIME_OUT_ERROR	    (1 << 28)
+#define   DP_AUX_CH_CTL_TIME_OUT_400us	    (0 << 26)
+#define   DP_AUX_CH_CTL_TIME_OUT_600us	    (1 << 26)
+#define   DP_AUX_CH_CTL_TIME_OUT_800us	    (2 << 26)
+#define   DP_AUX_CH_CTL_TIME_OUT_1600us	    (3 << 26)
+#define   DP_AUX_CH_CTL_TIME_OUT_MASK	    (3 << 26)
+#define   DP_AUX_CH_CTL_RECEIVE_ERROR	    (1 << 25)
+#define   DP_AUX_CH_CTL_MESSAGE_SIZE_MASK    (0x1f << 20)
+#define   DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT   20
+#define   DP_AUX_CH_CTL_PRECHARGE_2US_MASK   (0xf << 16)
+#define   DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT  16
+#define   DP_AUX_CH_CTL_AUX_AKSV_SELECT	    (1 << 15)
+#define   DP_AUX_CH_CTL_MANCHESTER_TEST	    (1 << 14)
+#define   DP_AUX_CH_CTL_SYNC_TEST	    (1 << 13)
+#define   DP_AUX_CH_CTL_DEGLITCH_TEST	    (1 << 12)
+#define   DP_AUX_CH_CTL_PRECHARGE_TEST	    (1 << 11)
+#define   DP_AUX_CH_CTL_BIT_CLOCK_2X_MASK    (0x7ff)
+#define   DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT   0
+
+/*
+ * Computing GMCH M and N values for the Display Port link
+ *
+ * GMCH M/N = dot clock * bytes per pixel / ls_clk * # of lanes
+ *
+ * ls_clk (we assume) is the DP link clock (1.62 or 2.7 GHz)
+ *
+ * The GMCH value is used internally
+ *
+ * bytes_per_pixel is the number of bytes coming out of the plane,
+ * which is after the LUTs, so we want the bytes for our color format.
+ * For our current usage, this is always 3, one byte for R, G and B.
+ */
+#define PIPEA_GMCH_DATA_M			0x70050
+#define PIPEB_GMCH_DATA_M			0x71050
+
+/* Transfer unit size for display port - 1, default is 0x3f (for TU size 64) */
+#define   PIPE_GMCH_DATA_M_TU_SIZE_MASK		(0x3f << 25)
+#define   PIPE_GMCH_DATA_M_TU_SIZE_SHIFT	25
+
+#define   PIPE_GMCH_DATA_M_MASK			(0xffffff)
+
+#define PIPEA_GMCH_DATA_N			0x70054
+#define PIPEB_GMCH_DATA_N			0x71054
+#define   PIPE_GMCH_DATA_N_MASK			(0xffffff)
+
+/*
+ * Computing Link M and N values for the Display Port link
+ *
+ * Link M / N = pixel_clock / ls_clk
+ *
+ * (the DP spec calls pixel_clock the 'strm_clk')
+ *
+ * The Link value is transmitted in the Main Stream
+ * Attributes and VB-ID.
+ */
+
+#define PIPEA_DP_LINK_M				0x70060
+#define PIPEB_DP_LINK_M				0x71060
+#define   PIPEA_DP_LINK_M_MASK			(0xffffff)
+
+#define PIPEA_DP_LINK_N				0x70064
+#define PIPEB_DP_LINK_N				0x71064
+#define   PIPEA_DP_LINK_N_MASK			(0xffffff)
+
 /* Display & cursor control */
 
 /* Pipe A */
-- 
cgit v0.10.2


From 59a036cfbd29aadf40d2b754cfebee2a96268752 Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Sun, 31 May 2009 17:16:22 +0800
Subject: drm/i915: Add the structure of child_device_config in video BIOS
 tables.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 8ca2cde..fe72e1c 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -135,6 +135,86 @@ struct bdb_general_features {
 	u8 rsvd11:6; /* finish byte */
 } __attribute__((packed));
 
+/* pre-915 */
+#define GPIO_PIN_DVI_LVDS	0x03 /* "DVI/LVDS DDC GPIO pins" */
+#define GPIO_PIN_ADD_I2C	0x05 /* "ADDCARD I2C GPIO pins" */
+#define GPIO_PIN_ADD_DDC	0x04 /* "ADDCARD DDC GPIO pins" */
+#define GPIO_PIN_ADD_DDC_I2C	0x06 /* "ADDCARD DDC/I2C GPIO pins" */
+
+/* Pre 915 */
+#define DEVICE_TYPE_NONE	0x00
+#define DEVICE_TYPE_CRT		0x01
+#define DEVICE_TYPE_TV		0x09
+#define DEVICE_TYPE_EFP		0x12
+#define DEVICE_TYPE_LFP		0x22
+/* On 915+ */
+#define DEVICE_TYPE_CRT_DPMS		0x6001
+#define DEVICE_TYPE_CRT_DPMS_HOTPLUG	0x4001
+#define DEVICE_TYPE_TV_COMPOSITE	0x0209
+#define DEVICE_TYPE_TV_MACROVISION	0x0289
+#define DEVICE_TYPE_TV_RF_COMPOSITE	0x020c
+#define DEVICE_TYPE_TV_SVIDEO_COMPOSITE	0x0609
+#define DEVICE_TYPE_TV_SCART		0x0209
+#define DEVICE_TYPE_TV_CODEC_HOTPLUG_PWR 0x6009
+#define DEVICE_TYPE_EFP_HOTPLUG_PWR	0x6012
+#define DEVICE_TYPE_EFP_DVI_HOTPLUG_PWR	0x6052
+#define DEVICE_TYPE_EFP_DVI_I		0x6053
+#define DEVICE_TYPE_EFP_DVI_D_DUAL	0x6152
+#define DEVICE_TYPE_EFP_DVI_D_HDCP	0x60d2
+#define DEVICE_TYPE_OPENLDI_HOTPLUG_PWR	0x6062
+#define DEVICE_TYPE_OPENLDI_DUALPIX	0x6162
+#define DEVICE_TYPE_LFP_PANELLINK	0x5012
+#define DEVICE_TYPE_LFP_CMOS_PWR	0x5042
+#define DEVICE_TYPE_LFP_LVDS_PWR	0x5062
+#define DEVICE_TYPE_LFP_LVDS_DUAL	0x5162
+#define DEVICE_TYPE_LFP_LVDS_DUAL_HDCP	0x51e2
+
+#define DEVICE_CFG_NONE		0x00
+#define DEVICE_CFG_12BIT_DVOB	0x01
+#define DEVICE_CFG_12BIT_DVOC	0x02
+#define DEVICE_CFG_24BIT_DVOBC	0x09
+#define DEVICE_CFG_24BIT_DVOCB	0x0a
+#define DEVICE_CFG_DUAL_DVOB	0x11
+#define DEVICE_CFG_DUAL_DVOC	0x12
+#define DEVICE_CFG_DUAL_DVOBC	0x13
+#define DEVICE_CFG_DUAL_LINK_DVOBC	0x19
+#define DEVICE_CFG_DUAL_LINK_DVOCB	0x1a
+
+#define DEVICE_WIRE_NONE	0x00
+#define DEVICE_WIRE_DVOB	0x01
+#define DEVICE_WIRE_DVOC	0x02
+#define DEVICE_WIRE_DVOBC	0x03
+#define DEVICE_WIRE_DVOBB	0x05
+#define DEVICE_WIRE_DVOCC	0x06
+#define DEVICE_WIRE_DVOB_MASTER 0x0d
+#define DEVICE_WIRE_DVOC_MASTER 0x0e
+
+#define DEVICE_PORT_DVOA	0x00 /* none on 845+ */
+#define DEVICE_PORT_DVOB	0x01
+#define DEVICE_PORT_DVOC	0x02
+
+struct child_device_config {
+	u16 handle;
+	u16 device_type;
+	u8  device_id[10]; /* See DEVICE_TYPE_* above */
+	u16 addin_offset;
+	u8  dvo_port; /* See Device_PORT_* above */
+	u8  i2c_pin;
+	u8  slave_addr;
+	u8  ddc_pin;
+	u16 edid_ptr;
+	u8  dvo_cfg; /* See DEVICE_CFG_* above */
+	u8  dvo2_port;
+	u8  i2c2_pin;
+	u8  slave2_addr;
+	u8  ddc2_pin;
+	u8  capabilities;
+	u8  dvo_wiring;/* See DEVICE_WIRE_* above */
+	u8  dvo2_wiring;
+	u16 extended_type;
+	u8  dvo_function;
+} __attribute__((packed));
+
 struct bdb_general_definitions {
 	/* DDC GPIO */
 	u8 crt_ddc_gmbus_pin;
@@ -149,14 +229,19 @@ struct bdb_general_definitions {
 	u8 boot_display[2];
 	u8 child_dev_size;
 
-	/* device info */
-	u8 tv_or_lvds_info[33];
-	u8 dev1[33];
-	u8 dev2[33];
-	u8 dev3[33];
-	u8 dev4[33];
-	/* may be another device block here on some platforms */
-};
+	/*
+	 * Device info:
+	 * If TV is present, it'll be at devices[0].
+	 * LVDS will be next, either devices[0] or [1], if present.
+	 * On some platforms the number of device is 6. But could be as few as
+	 * 4 if both TV and LVDS are missing.
+	 * And the device num is related with the size of general definition
+	 * block. It is obtained by using the following formula:
+	 * number = (block_size - sizeof(bdb_general_definitions))/
+	 * 		sizeof(child_device_config);
+	 */
+	struct child_device_config devices[0];
+} __attribute__((packed));
 
 struct bdb_lvds_options {
 	u8 panel_type;
-- 
cgit v0.10.2


From 9b9d172d06b0f2d51cc9431e2c6c3055f0cf10ef Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Sun, 31 May 2009 17:17:17 +0800
Subject: drm/i915: parse VBT general definition block to get the SDVO device
 info

The general definition block contains the child device tables, which include
the SDVO device info. For example: device slave address, device dvo port,
device type.

We will get the info of SDVO device by parsing the general definition blocks.
Only when a valid slave address is found, it is regarded as the SDVO device.
And the info of DVO port and slave address is recorded.

http://bugs.freedesktop.org/show_bug.cgi?id=20429

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ded9e78..db81f55 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -126,6 +126,13 @@ struct drm_i915_fence_reg {
 	struct drm_gem_object *obj;
 };
 
+struct sdvo_device_mapping {
+	u8 dvo_port;
+	u8 slave_addr;
+	u8 dvo_wiring;
+	u8 initialized;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 
@@ -389,6 +396,7 @@ typedef struct drm_i915_private {
 		/* storage for physical objects */
 		struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 	} mm;
+	struct sdvo_device_mapping sdvo_mappings[2];
 } drm_i915_private_t;
 
 /** driver private structure attached to each drm_gem_object */
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 9d78cff..754dd22 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -30,6 +30,8 @@
 #include "i915_drv.h"
 #include "intel_bios.h"
 
+#define	SLAVE_ADDR1	0x70
+#define	SLAVE_ADDR2	0x72
 
 static void *
 find_section(struct bdb_header *bdb, int section_id)
@@ -193,6 +195,88 @@ parse_general_features(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void
+parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
+		       struct bdb_header *bdb)
+{
+	struct sdvo_device_mapping *p_mapping;
+	struct bdb_general_definitions *p_defs;
+	struct child_device_config *p_child;
+	int i, child_device_num, count;
+	u16	block_size, *block_ptr;
+
+	p_defs = find_section(bdb, BDB_GENERAL_DEFINITIONS);
+	if (!p_defs) {
+		DRM_DEBUG("No general definition block is found\n");
+		return;
+	}
+	/* judge whether the size of child device meets the requirements.
+	 * If the child device size obtained from general definition block
+	 * is different with sizeof(struct child_device_config), skip the
+	 * parsing of sdvo device info
+	 */
+	if (p_defs->child_dev_size != sizeof(*p_child)) {
+		/* different child dev size . Ignore it */
+		DRM_DEBUG("different child size is found. Invalid.\n");
+		return;
+	}
+	/* get the block size of general definitions */
+	block_ptr = (u16 *)((char *)p_defs - 2);
+	block_size = *block_ptr;
+	/* get the number of child device */
+	child_device_num = (block_size - sizeof(*p_defs)) /
+				sizeof(*p_child);
+	count = 0;
+	for (i = 0; i < child_device_num; i++) {
+		p_child = &(p_defs->devices[i]);
+		if (!p_child->device_type) {
+			/* skip the device block if device type is invalid */
+			continue;
+		}
+		if (p_child->slave_addr != SLAVE_ADDR1 &&
+			p_child->slave_addr != SLAVE_ADDR2) {
+			/*
+			 * If the slave address is neither 0x70 nor 0x72,
+			 * it is not a SDVO device. Skip it.
+			 */
+			continue;
+		}
+		if (p_child->dvo_port != DEVICE_PORT_DVOB &&
+			p_child->dvo_port != DEVICE_PORT_DVOC) {
+			/* skip the incorrect SDVO port */
+			DRM_DEBUG("Incorrect SDVO port. Skip it \n");
+			continue;
+		}
+		DRM_DEBUG("the SDVO device with slave addr %2x is found on "
+				"%s port\n",
+				p_child->slave_addr,
+				(p_child->dvo_port == DEVICE_PORT_DVOB) ?
+					"SDVOB" : "SDVOC");
+		p_mapping = &(dev_priv->sdvo_mappings[p_child->dvo_port - 1]);
+		if (!p_mapping->initialized) {
+			p_mapping->dvo_port = p_child->dvo_port;
+			p_mapping->slave_addr = p_child->slave_addr;
+			p_mapping->dvo_wiring = p_child->dvo_wiring;
+			p_mapping->initialized = 1;
+		} else {
+			DRM_DEBUG("Maybe one SDVO port is shared by "
+					 "two SDVO device.\n");
+		}
+		if (p_child->slave2_addr) {
+			/* Maybe this is a SDVO device with multiple inputs */
+			/* And the mapping info is not added */
+			DRM_DEBUG("there exists the slave2_addr. Maybe this "
+				"is a SDVO device with multiple inputs.\n");
+		}
+		count++;
+	}
+
+	if (!count) {
+		/* No SDVO device info is found */
+		DRM_DEBUG("No SDVO device info is found in VBT\n");
+	}
+	return;
+}
 /**
  * intel_init_bios - initialize VBIOS settings & find VBT
  * @dev: DRM device
@@ -242,7 +326,7 @@ intel_init_bios(struct drm_device *dev)
 	parse_general_features(dev_priv, bdb);
 	parse_lfp_panel_data(dev_priv, bdb);
 	parse_sdvo_panel_data(dev_priv, bdb);
-
+	parse_sdvo_device_mapping(dev_priv, bdb);
 	pci_unmap_rom(pdev, bios);
 
 	return 0;
-- 
cgit v0.10.2


From 714605e4a05787c51a5ac36c926d2169cfdfbfba Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Sun, 31 May 2009 17:18:07 +0800
Subject: drm/i915: Initialize the SDVO device based on the sdvo info parsed
 from VBT

http://bugs.freedesktop.org/show_bug.cgi?id=20429

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
[anholt: Massive cleanup of the slave addr function]
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index f3ef6bf..d8fb88d 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1741,6 +1741,43 @@ static struct i2c_algorithm intel_sdvo_i2c_bit_algo = {
 	.master_xfer	= intel_sdvo_master_xfer,
 };
 
+static u8
+intel_sdvo_get_slave_addr(struct drm_device *dev, int output_device)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct sdvo_device_mapping *my_mapping, *other_mapping;
+
+	if (output_device == SDVOB) {
+		my_mapping = &dev_priv->sdvo_mappings[0];
+		other_mapping = &dev_priv->sdvo_mappings[1];
+	} else {
+		my_mapping = &dev_priv->sdvo_mappings[1];
+		other_mapping = &dev_priv->sdvo_mappings[0];
+	}
+
+	/* If the BIOS described our SDVO device, take advantage of it. */
+	if (my_mapping->slave_addr)
+		return my_mapping->slave_addr;
+
+	/* If the BIOS only described a different SDVO device, use the
+	 * address that it isn't using.
+	 */
+	if (other_mapping->slave_addr) {
+		if (other_mapping->slave_addr == 0x70)
+			return 0x72;
+		else
+			return 0x70;
+	}
+
+	/* No SDVO device info is found for another DVO port,
+	 * so use mapping assumption we had before BIOS parsing.
+	 */
+	if (output_device == SDVOB)
+		return 0x70;
+	else
+		return 0x72;
+}
+
 bool intel_sdvo_init(struct drm_device *dev, int output_device)
 {
 	struct drm_connector *connector;
@@ -1752,6 +1789,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	u8 ch[0x40];
 	int i;
 	int encoder_type, output_id;
+	u8 slave_addr;
 
 	intel_output = kcalloc(sizeof(struct intel_output)+sizeof(struct intel_sdvo_priv), 1, GFP_KERNEL);
 	if (!intel_output) {
@@ -1770,16 +1808,15 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	if (!i2cbus)
 		goto err_inteloutput;
 
+	slave_addr = intel_sdvo_get_slave_addr(dev, output_device);
 	sdvo_priv->i2c_bus = i2cbus;
 
 	if (output_device == SDVOB) {
 		output_id = 1;
-		sdvo_priv->i2c_bus->slave_addr = 0x38;
 	} else {
 		output_id = 2;
-		sdvo_priv->i2c_bus->slave_addr = 0x39;
 	}
-
+	sdvo_priv->i2c_bus->slave_addr = slave_addr >> 1;
 	sdvo_priv->output_device = output_device;
 	intel_output->i2c_bus = i2cbus;
 	intel_output->dev_priv = sdvo_priv;
-- 
cgit v0.10.2


From 70aa96ca2d8d938fc036ef8fd189b0151f4fc3ba Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 27 May 2009 17:20:39 -0400
Subject: drm/i915: add ignore lvds quirk info for AOpen Mini PC

Fix a FIXME in the intel LVDS bring-up code, adding the appropriate
blacklist entry for the AOpen Mini PC, courtesy of a dmidecode
dump from Florian Demmer.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
CC: Florian Demmer <florian@demmer.org>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index eea3a54..e4ca6a3 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -485,8 +485,14 @@ static const struct dmi_system_id __initdata intel_no_lvds[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Studio Hybrid 140g"),
 		},
 	},
-
-	/* FIXME: add a check for the Aopen Mini PC */
+	{
+		.callback = intel_no_lvds_dmi_callback,
+		.ident = "AOpen Mini PC",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "AOpen"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "i965GMx-IF"),
+		},
+	},
 
 	{ }	/* terminating entry */
 };
-- 
cgit v0.10.2


From 42c2798b35b95c471877133e19ccc3cab00e9b65 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 5 May 2009 13:13:16 -0700
Subject: drm/i915: apply G45 vblank count code to all G4x chips and fix
 max_frame_count

All G4x and newer chips use the new style frame count register, with a
full 32 bit frame count.  Update the code to reflect this.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5d36059..68e882c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1161,8 +1161,11 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 #endif
 
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
-	if (IS_GM45(dev))
+	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
+	if (IS_G4X(dev)) {
+		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
 		dev->driver->get_vblank_counter = gm45_get_vblank_counter;
+	}
 
 	i915_gem_load(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4b0bcbd..701d680 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -574,8 +574,6 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 
 	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 
-	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-
 	/* Unmask the interrupts that we always want on. */
 	dev_priv->irq_mask_reg = ~I915_INTERRUPT_ENABLE_FIX;
 
-- 
cgit v0.10.2


From b66d18ddb16603d1e1ec39cb2ff3abf3fd212180 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 15 May 2009 14:11:48 -0700
Subject: drm/i915: avoid non-atomic sysrq execution

The sysrq functions are executed in hardirq context, so we shouldn't be
calling sleeping functions from them, like mutex_locks or memory
allocations.

Fix up the i915 sysrq handler to avoid this.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index e4652dc..7a66b91 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -857,9 +857,15 @@ void intelfb_restore(void)
 	drm_crtc_helper_set_config(&kernelfb_mode);
 }
 
+static void intelfb_restore_work_fn(struct work_struct *ignored)
+{
+	intelfb_restore();
+}
+static DECLARE_WORK(intelfb_restore_work, intelfb_restore_work_fn);
+
 static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3)
 {
-        intelfb_restore();
+        schedule_work(&intelfb_restore_work);
 }
 
 static struct sysrq_key_op sysrq_intelfb_restore_op = {
-- 
cgit v0.10.2


From 1b8e69662e1a086878bf930a6042daf7f8a076cc Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas at hp.com>
Date: Fri, 5 Jun 2009 14:37:23 +0000
Subject: pnp: add PNP resource range checking function

Add a PNP resource range check function, indicating whether a resource
has been assigned to any device.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
[apw@canonical.com: fixed up exports et al]
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index f604061..ba97654 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -638,6 +638,24 @@ int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start,
 }
 EXPORT_SYMBOL(pnp_possible_config);
 
+int pnp_range_reserved(resource_size_t start, resource_size_t end)
+{
+	struct pnp_dev *dev;
+	struct pnp_resource *pnp_res;
+	resource_size_t *dev_start, *dev_end;
+
+	pnp_for_each_dev(dev) {
+		list_for_each_entry(pnp_res, &dev->resources, list) {
+			dev_start = &pnp_res->res.start;
+			dev_end   = &pnp_res->res.end;
+			if (ranged_conflict(&start, &end, dev_start, dev_end))
+				return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(pnp_range_reserved);
+
 /* format is: pnp_reserve_irq=irq1[,irq2] .... */
 static int __init pnp_setup_reserve_irq(char *str)
 {
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index ca3c887..b063c73 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -446,6 +446,7 @@ int pnp_start_dev(struct pnp_dev *dev);
 int pnp_stop_dev(struct pnp_dev *dev);
 int pnp_activate_dev(struct pnp_dev *dev);
 int pnp_disable_dev(struct pnp_dev *dev);
+int pnp_range_reserved(resource_size_t start, resource_size_t end);
 
 /* protocol helpers */
 int pnp_is_active(struct pnp_dev *dev);
@@ -476,6 +477,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; }
+static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;}
 
 /* protocol helpers */
 static inline int pnp_is_active(struct pnp_dev *dev) { return 0; }
-- 
cgit v0.10.2


From b7bbf876087e0e2c0ba723a8398083c9a9ac1dfd Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 16:11:07 +0200
Subject: ALSA: ctxfi - Use native timer interrupt on emu20k1

emu20k1 has a native timer interrupt based on the audio clock, which
is more accurate than the system timer (from the synchronization POV).
This patch adds the code to handle this with multiple streams.

The system timer is still used on emu20k2, and can be used also for
emu20k1 easily by changing USE_SYSTEM_TIMER to 1 in cttimer.c.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/Makefile b/sound/pci/ctxfi/Makefile
index 2904323..15075f8 100644
--- a/sound/pci/ctxfi/Makefile
+++ b/sound/pci/ctxfi/Makefile
@@ -1,5 +1,5 @@
 snd-ctxfi-objs := xfi.o ctatc.o ctvmem.o ctpcm.o ctmixer.o ctresource.o \
-	ctsrc.o ctamixer.o ctdaio.o ctimap.o cthardware.o \
+	ctsrc.o ctamixer.o ctdaio.o ctimap.o cthardware.o cttimer.o \
 	cthw20k2.o cthw20k1.o
 
 obj-$(CONFIG_SND_CTXFI) += snd-ctxfi.o
diff --git a/sound/pci/ctxfi/ct20k1reg.h b/sound/pci/ctxfi/ct20k1reg.h
index c62e677..f2e34e3 100644
--- a/sound/pci/ctxfi/ct20k1reg.h
+++ b/sound/pci/ctxfi/ct20k1reg.h
@@ -589,6 +589,8 @@
 
 #define		WC		0x1C6000
 #define		TIMR		0x1C6004
+# define	TIMR_IE		(1<<15)
+# define	TIMR_IP		(1<<14)
 
 #define		GIP		0x1C6010
 #define		GIE		0x1C6014
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 6849475..10b7419 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -22,6 +22,7 @@
 #include "ctsrc.h"
 #include "ctamixer.h"
 #include "ctdaio.h"
+#include "cttimer.h"
 #include <linux/delay.h>
 #include <sound/pcm.h>
 #include <sound/control.h>
@@ -307,6 +308,8 @@ static int atc_pcm_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 			src = apcm->src;
 	}
 
+	ct_timer_prepare(apcm->timer);
+
 	return 0;
 
 error1:
@@ -389,6 +392,7 @@ static int atc_pcm_playback_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	src->ops->set_state(src, SRC_STATE_INIT);
 	src->ops->commit_write(src);
 
+	ct_timer_start(apcm->timer);
 	return 0;
 }
 
@@ -397,6 +401,8 @@ static int atc_pcm_stop(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	struct src *src = NULL;
 	int i = 0;
 
+	ct_timer_stop(apcm->timer);
+
 	src = apcm->src;
 	src->ops->set_bm(src, 0);
 	src->ops->set_state(src, SRC_STATE_OFF);
@@ -701,6 +707,8 @@ static int atc_pcm_capture_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 		}
 	}
 
+	ct_timer_prepare(apcm->timer);
+
 	return 0;
 }
 
@@ -749,6 +757,7 @@ static int atc_pcm_capture_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	/* Enable relevant SRCs synchronously */
 	src_mgr->commit_write(src_mgr);
 
+	ct_timer_start(apcm->timer);
 	return 0;
 }
 
@@ -906,6 +915,8 @@ spdif_passthru_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	dao->ops->set_right_input(dao, &amixer->rsc);
 	spin_unlock_irqrestore(&atc->atc_lock, flags);
 
+	ct_timer_prepare(apcm->timer);
+
 	return 0;
 }
 
@@ -1100,6 +1111,11 @@ static int ct_atc_destroy(struct ct_atc *atc)
 	if (NULL == atc)
 		return 0;
 
+	if (atc->timer) {
+		ct_timer_free(atc->timer);
+		atc->timer = NULL;
+	}
+
 	/* Stop hardware and disable all interrupts */
 	if (NULL != atc->hw)
 		((struct hw *)atc->hw)->card_stop(atc->hw);
@@ -1586,6 +1602,10 @@ int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 	/* Build topology */
 	atc_connect_resources(atc);
 
+	atc->timer = ct_timer_new(atc);
+	if (!atc->timer)
+		goto error1;
+
 	atc->create_alsa_devs = ct_create_alsa_devs;
 
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, atc, &ops);
@@ -1602,4 +1622,3 @@ error1:
 	printk(KERN_ERR "ctxfi: Something wrong!!!\n");
 	return err;
 }
-
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
index b86d12c..a3f9b1b 100644
--- a/sound/pci/ctxfi/ctatc.h
+++ b/sound/pci/ctxfi/ctatc.h
@@ -59,16 +59,15 @@ struct ct_atc_chip_details {
 };
 
 struct ct_atc;
+struct ct_timer;
+struct ct_timer_instance;
 
 /* alsa pcm stream descriptor */
 struct ct_atc_pcm {
 	struct snd_pcm_substream *substream;
 	void (*interrupt)(struct ct_atc_pcm *apcm);
+	struct ct_timer_instance *timer;
 	unsigned int started:1;
-	unsigned int stop_timer:1;
-	struct timer_list timer;
-	spinlock_t timer_lock;
-	unsigned int position;
 
 	/* Only mono and interleaved modes are supported now. */
 	struct ct_vm_block *vm_block;
@@ -144,6 +143,8 @@ struct ct_atc {
 	unsigned char n_src;
 	unsigned char n_srcimp;
 	unsigned char n_pcm;
+
+	struct ct_timer *timer;
 };
 
 
diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h
index b0512df..35350cf 100644
--- a/sound/pci/ctxfi/cthardware.h
+++ b/sound/pci/ctxfi/cthardware.h
@@ -145,6 +145,12 @@ struct hw {
 	int (*daio_mgr_set_imapaddr)(void *blk, unsigned int addr);
 	int (*daio_mgr_commit_write)(struct hw *hw, void *blk);
 
+	int (*set_timer_irq)(struct hw *hw, int enable);
+	int (*set_timer_tick)(struct hw *hw, unsigned int tick);
+
+	void (*irq_callback)(void *data, unsigned int bit);
+	void *irq_callback_data;
+
 	struct pci_dev *pci;	/* the pci kernel structure of this card */
 	int irq;
 	unsigned long io_base;
@@ -157,4 +163,17 @@ int destroy_hw_obj(struct hw *hw);
 unsigned int get_field(unsigned int data, unsigned int field);
 void set_field(unsigned int *data, unsigned int field, unsigned int value);
 
+/* IRQ bits */
+#define	PLL_INT		(1 << 10) /* PLL input-clock out-of-range */
+#define FI_INT		(1 << 9)  /* forced interrupt */
+#define IT_INT		(1 << 8)  /* timer interrupt */
+#define PCI_INT		(1 << 7)  /* PCI bus error pending */
+#define URT_INT		(1 << 6)  /* UART Tx/Rx */
+#define GPI_INT		(1 << 5)  /* GPI pin */
+#define MIX_INT		(1 << 4)  /* mixer parameter segment FIFO channels */
+#define DAI_INT		(1 << 3)  /* DAI (SR-tracker or SPDIF-receiver) */
+#define TP_INT		(1 << 2)  /* transport priority queue */
+#define DSP_INT		(1 << 1)  /* DSP */
+#define SRC_INT		(1 << 0)  /* SRC channels */
+
 #endif /* CTHARDWARE_H */
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index e530a6d..550b30a 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1171,6 +1171,21 @@ static int daio_mgr_put_ctrl_blk(void *blk)
 	return 0;
 }
 
+/* Timer interrupt */
+static int set_timer_irq(struct hw *hw, int enable)
+{
+	hw_write_20kx(hw, GIE, enable ? IT_INT : 0);
+	return 0;
+}
+
+static int set_timer_tick(struct hw *hw, unsigned int ticks)
+{
+	if (ticks)
+		ticks |= TIMR_IE | TIMR_IP;
+	hw_write_20kx(hw, TIMR, ticks);
+	return 0;
+}
+
 /* Card hardware initialization block */
 struct dac_conf {
 	unsigned int msr; /* master sample rate in rsrs */
@@ -1878,6 +1893,22 @@ static int uaa_to_xfi(struct pci_dev *pci)
 	return 0;
 }
 
+static irqreturn_t ct_20k1_interrupt(int irq, void *dev_id)
+{
+	struct hw *hw = dev_id;
+	unsigned int status;
+
+	status = hw_read_20kx(hw, GIP);
+	if (!status)
+		return IRQ_NONE;
+
+	if (hw->irq_callback)
+		hw->irq_callback(hw->irq_callback_data, status);
+
+	hw_write_20kx(hw, GIP, status);
+	return IRQ_HANDLED;
+}
+
 static int hw_card_start(struct hw *hw)
 {
 	int err = 0;
@@ -1914,12 +1945,13 @@ static int hw_card_start(struct hw *hw)
 		hw->io_base = pci_resource_start(pci, 0);
 	}
 
-	/*if ((err = request_irq(pci->irq, ct_atc_interrupt, IRQF_SHARED,
-				atc->chip_details->nm_card, hw))) {
+	err = request_irq(pci->irq, ct_20k1_interrupt, IRQF_SHARED,
+			  "ctxfi", hw);
+	if (err < 0) {
+		printk(KERN_ERR "XFi: Cannot get irq %d\n", pci->irq);
 		goto error2;
 	}
 	hw->irq = pci->irq;
-	*/
 
 	pci_set_master(pci);
 
@@ -1936,6 +1968,8 @@ error1:
 static int hw_card_stop(struct hw *hw)
 {
 	/* TODO: Disable interrupt and so on... */
+	if (hw->irq >= 0)
+		synchronize_irq(hw->irq);
 	return 0;
 }
 
@@ -2215,6 +2249,9 @@ int create_20k1_hw_obj(struct hw **rhw)
 	hw->daio_mgr_set_imapaddr = daio_mgr_set_imapaddr;
 	hw->daio_mgr_commit_write = daio_mgr_commit_write;
 
+	hw->set_timer_irq = set_timer_irq;
+	hw->set_timer_tick = set_timer_tick;
+
 	*rhw = hw;
 
 	return 0;
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 52ddf19..32b742d 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -16,6 +16,7 @@
  */
 
 #include "ctpcm.h"
+#include "cttimer.h"
 #include <sound/pcm.h>
 
 /* Hardware descriptions for playback */
@@ -108,6 +109,7 @@ static void ct_atc_pcm_free_substream(struct snd_pcm_runtime *runtime)
 	struct ct_atc *atc = snd_pcm_substream_chip(apcm->substream);
 
 	atc->pcm_release_resources(atc, apcm);
+	ct_timer_instance_free(apcm->timer);
 	kfree(apcm);
 	runtime->private_data = NULL;
 }
@@ -124,8 +126,6 @@ static int ct_pcm_playback_open(struct snd_pcm_substream *substream)
 	if (NULL == apcm)
 		return -ENOMEM;
 
-	spin_lock_init(&apcm->timer_lock);
-	apcm->stop_timer = 0;
 	apcm->substream = substream;
 	apcm->interrupt = ct_atc_pcm_interrupt;
 	runtime->private_data = apcm;
@@ -153,6 +153,10 @@ static int ct_pcm_playback_open(struct snd_pcm_substream *substream)
 		return err;
 	}
 
+	apcm->timer = ct_timer_instance_new(atc->timer, apcm);
+	if (!apcm->timer)
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -182,89 +186,6 @@ static int ct_pcm_hw_free(struct snd_pcm_substream *substream)
 	return snd_pcm_lib_free_pages(substream);
 }
 
-static void ct_pcm_timer_callback(unsigned long data)
-{
-	struct ct_atc_pcm *apcm = (struct ct_atc_pcm *)data;
-	struct snd_pcm_substream *substream = apcm->substream;
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	unsigned int period_size = runtime->period_size;
-	unsigned int buffer_size = runtime->buffer_size;
-	unsigned long flags;
-	unsigned int position = 0, dist = 0, interval = 0;
-
-	position = substream->ops->pointer(substream);
-	dist = (position + buffer_size - apcm->position) % buffer_size;
-	if ((dist >= period_size) ||
-		(position/period_size != apcm->position/period_size)) {
-		apcm->interrupt(apcm);
-		apcm->position = position;
-	}
-	/* Add extra HZ*5/1000 to avoid overrun issue when recording
-	 * at 8kHz in 8-bit format or at 88kHz in 24-bit format. */
-	interval = ((period_size - (position % period_size))
-		   * HZ + (runtime->rate - 1)) / runtime->rate + HZ * 5 / 1000;
-	spin_lock_irqsave(&apcm->timer_lock, flags);
-	apcm->timer.expires = jiffies + interval;
-	if (!apcm->stop_timer)
-		add_timer(&apcm->timer);
-
-	spin_unlock_irqrestore(&apcm->timer_lock, flags);
-}
-
-static int ct_pcm_timer_prepare(struct ct_atc_pcm *apcm)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&apcm->timer_lock, flags);
-	if (timer_pending(&apcm->timer)) {
-		/* The timer has already been started. */
-		spin_unlock_irqrestore(&apcm->timer_lock, flags);
-		return 0;
-	}
-
-	init_timer(&apcm->timer);
-	apcm->timer.data = (unsigned long)apcm;
-	apcm->timer.function = ct_pcm_timer_callback;
-	spin_unlock_irqrestore(&apcm->timer_lock, flags);
-	apcm->position = 0;
-
-	return 0;
-}
-
-static int ct_pcm_timer_start(struct ct_atc_pcm *apcm)
-{
-	struct snd_pcm_runtime *runtime = apcm->substream->runtime;
-	unsigned long flags;
-
-	spin_lock_irqsave(&apcm->timer_lock, flags);
-	if (timer_pending(&apcm->timer)) {
-		/* The timer has already been started. */
-		spin_unlock_irqrestore(&apcm->timer_lock, flags);
-		return 0;
-	}
-
-	apcm->timer.expires = jiffies + (runtime->period_size * HZ +
-				(runtime->rate - 1)) / runtime->rate;
-	apcm->stop_timer = 0;
-	add_timer(&apcm->timer);
-	spin_unlock_irqrestore(&apcm->timer_lock, flags);
-
-	return 0;
-}
-
-static int ct_pcm_timer_stop(struct ct_atc_pcm *apcm)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&apcm->timer_lock, flags);
-	apcm->stop_timer = 1;
-	del_timer(&apcm->timer);
-	spin_unlock_irqrestore(&apcm->timer_lock, flags);
-
-	try_to_del_timer_sync(&apcm->timer);
-
-	return 0;
-}
 
 static int ct_pcm_playback_prepare(struct snd_pcm_substream *substream)
 {
@@ -283,8 +204,6 @@ static int ct_pcm_playback_prepare(struct snd_pcm_substream *substream)
 		return err;
 	}
 
-	ct_pcm_timer_prepare(apcm);
-
 	return 0;
 }
 
@@ -300,12 +219,10 @@ ct_pcm_playback_trigger(struct snd_pcm_substream *substream, int cmd)
 	case SNDRV_PCM_TRIGGER_RESUME:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 		atc->pcm_playback_start(atc, apcm);
-		ct_pcm_timer_start(apcm);
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-		ct_pcm_timer_stop(apcm);
 		atc->pcm_playback_stop(atc, apcm);
 		break;
 	default:
@@ -341,9 +258,7 @@ static int ct_pcm_capture_open(struct snd_pcm_substream *substream)
 	if (NULL == apcm)
 		return -ENOMEM;
 
-	spin_lock_init(&apcm->timer_lock);
 	apcm->started = 0;
-	apcm->stop_timer = 0;
 	apcm->substream = substream;
 	apcm->interrupt = ct_atc_pcm_interrupt;
 	runtime->private_data = apcm;
@@ -365,6 +280,10 @@ static int ct_pcm_capture_open(struct snd_pcm_substream *substream)
 		return err;
 	}
 
+	apcm->timer = ct_timer_instance_new(atc->timer, apcm);
+	if (!apcm->timer)
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -388,8 +307,6 @@ static int ct_pcm_capture_prepare(struct snd_pcm_substream *substream)
 		return err;
 	}
 
-	ct_pcm_timer_prepare(apcm);
-
 	return 0;
 }
 
@@ -403,14 +320,11 @@ ct_pcm_capture_trigger(struct snd_pcm_substream *substream, int cmd)
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
 		atc->pcm_capture_start(atc, apcm);
-		ct_pcm_timer_start(apcm);
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
-		ct_pcm_timer_stop(apcm);
 		atc->pcm_capture_stop(atc, apcm);
 		break;
 	default:
-		ct_pcm_timer_stop(apcm);
 		atc->pcm_capture_stop(atc, apcm);
 		break;
 	}
diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
new file mode 100644
index 0000000..3acb26d
--- /dev/null
+++ b/sound/pci/ctxfi/cttimer.c
@@ -0,0 +1,417 @@
+/*
+ * PCM timer handling on ctxfi
+ *
+ * This source file is released under GPL v2 license (no other versions).
+ * See the COPYING file included in the main directory of this source
+ * distribution for the license terms and conditions.
+ */
+
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include "ctatc.h"
+#include "cthardware.h"
+#include "cttimer.h"
+
+struct ct_timer_ops {
+	void (*init)(struct ct_timer_instance *);
+	void (*prepare)(struct ct_timer_instance *);
+	void (*start)(struct ct_timer_instance *);
+	void (*stop)(struct ct_timer_instance *);
+	void (*free_instance)(struct ct_timer_instance *);
+	void (*interrupt)(struct ct_timer *);
+	void (*free_global)(struct ct_timer *);
+};
+
+/* timer instance -- assigned to each PCM stream */
+struct ct_timer_instance {
+	spinlock_t lock;
+	struct ct_timer *timer_base;
+	struct ct_atc_pcm *apcm;
+	struct snd_pcm_substream *substream;
+	struct timer_list timer;
+	struct list_head instance_list;
+	struct list_head running_list;
+	unsigned int position;
+	unsigned int frag_count;
+	unsigned int running:1;
+	unsigned int need_update:1;
+};
+
+/* timer instance manager */
+struct ct_timer {
+	spinlock_t lock;		/* global timer lock (for xfitimer) */
+	spinlock_t list_lock;		/* lock for instance list */
+	struct ct_atc *atc;
+	struct ct_timer_ops *ops;
+	struct list_head instance_head;
+	struct list_head running_head;
+	unsigned int irq_handling:1;	/* in IRQ handling */
+	unsigned int reprogram:1;	/* need to reprogram the internval */
+	unsigned int running:1;		/* global timer running */
+};
+
+
+/*
+ * system-timer-based updates
+ */
+
+static void ct_systimer_callback(unsigned long data)
+{
+	struct ct_timer_instance *ti = (struct ct_timer_instance *)data;
+	struct snd_pcm_substream *substream = ti->substream;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct ct_atc_pcm *apcm = ti->apcm;
+	unsigned int period_size = runtime->period_size;
+	unsigned int buffer_size = runtime->buffer_size;
+	unsigned long flags;
+	unsigned int position, dist, interval;
+
+	position = substream->ops->pointer(substream);
+	dist = (position + buffer_size - ti->position) % buffer_size;
+	if (dist >= period_size ||
+	    position / period_size != ti->position / period_size) {
+		apcm->interrupt(apcm);
+		ti->position = position;
+	}
+	/* Add extra HZ*5/1000 to avoid overrun issue when recording
+	 * at 8kHz in 8-bit format or at 88kHz in 24-bit format. */
+	interval = ((period_size - (position % period_size))
+		   * HZ + (runtime->rate - 1)) / runtime->rate + HZ * 5 / 1000;
+	spin_lock_irqsave(&ti->lock, flags);
+	if (ti->running)
+		mod_timer(&ti->timer, jiffies + interval);
+	spin_unlock_irqrestore(&ti->lock, flags);
+}
+
+static void ct_systimer_init(struct ct_timer_instance *ti)
+{
+	setup_timer(&ti->timer, ct_systimer_callback,
+		    (unsigned long)ti);
+}
+
+static void ct_systimer_start(struct ct_timer_instance *ti)
+{
+	struct snd_pcm_runtime *runtime = ti->substream->runtime;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ti->lock, flags);
+	ti->running = 1;
+	mod_timer(&ti->timer,
+		  jiffies + (runtime->period_size * HZ +
+			     (runtime->rate - 1)) / runtime->rate);
+	spin_unlock_irqrestore(&ti->lock, flags);
+}
+
+static void ct_systimer_stop(struct ct_timer_instance *ti)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ti->lock, flags);
+	ti->running = 0;
+	del_timer(&ti->timer);
+	spin_unlock_irqrestore(&ti->lock, flags);
+}
+
+static void ct_systimer_prepare(struct ct_timer_instance *ti)
+{
+	ct_systimer_stop(ti);
+	try_to_del_timer_sync(&ti->timer);
+}
+
+#define ct_systimer_free	ct_systimer_prepare
+
+static struct ct_timer_ops ct_systimer_ops = {
+	.init = ct_systimer_init,
+	.free_instance = ct_systimer_free,
+	.prepare = ct_systimer_prepare,
+	.start = ct_systimer_start,
+	.stop = ct_systimer_stop,
+};
+
+
+/*
+ * Handling multiple streams using a global emu20k1 timer irq
+ */
+
+#define CT_TIMER_FREQ	48000
+#define MAX_TICKS	((1 << 13) - 1)
+
+static void ct_xfitimer_irq_rearm(struct ct_timer *atimer, int ticks)
+{
+	struct hw *hw = atimer->atc->hw;
+	if (ticks > MAX_TICKS)
+		ticks = MAX_TICKS;
+	hw->set_timer_tick(hw, ticks);
+	if (!atimer->running)
+		hw->set_timer_irq(hw, 1);
+	atimer->running = 1;
+}
+
+static void ct_xfitimer_irq_stop(struct ct_timer *atimer)
+{
+	if (atimer->running) {
+		struct hw *hw = atimer->atc->hw;
+		hw->set_timer_irq(hw, 0);
+		hw->set_timer_tick(hw, 0);
+		atimer->running = 0;
+	}
+}
+
+/*
+ * reprogram the timer interval;
+ * checks the running instance list and determines the next timer interval.
+ * also updates the each stream position, returns the number of streams
+ * to call snd_pcm_period_elapsed() appropriately
+ *
+ * call this inside the lock and irq disabled
+ */
+static int ct_xfitimer_reprogram(struct ct_timer *atimer)
+{
+	struct ct_timer_instance *ti;
+	int min_intr = -1;
+	int updates = 0;
+
+	list_for_each_entry(ti, &atimer->running_head, running_list) {
+		struct snd_pcm_runtime *runtime;
+		unsigned int pos, diff;
+		int intr;
+		runtime = ti->substream->runtime;
+		pos = ti->substream->ops->pointer(ti->substream);
+		if (pos < ti->position)
+			diff = runtime->buffer_size - ti->position + pos;
+		else
+			diff = pos - ti->position;
+		ti->position = pos;
+		while (diff >= ti->frag_count) {
+			ti->frag_count += runtime->period_size;
+			ti->need_update = 1;
+			updates++;
+		}
+		ti->frag_count -= diff;
+		intr = div_u64((u64)ti->frag_count * CT_TIMER_FREQ,
+			       runtime->rate);
+		if (min_intr < 0 || intr < min_intr)
+			min_intr = intr;
+	}
+
+	if (min_intr > 0)
+		ct_xfitimer_irq_rearm(atimer, min_intr);
+	else
+		ct_xfitimer_irq_stop(atimer);
+
+	atimer->reprogram = 0; /* clear flag */
+	return updates;
+}
+
+/* look through the instance list and call period_elapsed if needed */
+static void ct_xfitimer_check_period(struct ct_timer *atimer)
+{
+	struct ct_timer_instance *ti;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->list_lock, flags);
+	list_for_each_entry(ti, &atimer->instance_head, instance_list) {
+		if (ti->need_update) {
+			ti->need_update = 0;
+			ti->apcm->interrupt(ti->apcm);
+		}
+	}
+	spin_unlock_irqrestore(&atimer->list_lock, flags);
+}
+
+/* Handle timer-interrupt */
+static void ct_xfitimer_callback(struct ct_timer *atimer)
+{
+	int update;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	atimer->irq_handling = 1;
+	do {
+		update = ct_xfitimer_reprogram(atimer);
+		spin_unlock(&atimer->lock);
+		if (update)
+			ct_xfitimer_check_period(atimer);
+		spin_lock(&atimer->lock);
+	} while (atimer->reprogram);
+	atimer->irq_handling = 0;
+	spin_unlock_irqrestore(&atimer->lock, flags);
+}
+
+static void ct_xfitimer_prepare(struct ct_timer_instance *ti)
+{
+	ti->frag_count = ti->substream->runtime->period_size;
+	ti->need_update = 0;
+}
+
+
+/* start/stop the timer */
+static void ct_xfitimer_update(struct ct_timer *atimer)
+{
+	unsigned long flags;
+	int update;
+
+	if (atimer->irq_handling) {
+		/* reached from IRQ handler; let it handle later */
+		atimer->reprogram = 1;
+		return;
+	}
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	ct_xfitimer_irq_stop(atimer);
+	update = ct_xfitimer_reprogram(atimer);
+	spin_unlock_irqrestore(&atimer->lock, flags);
+	if (update)
+		ct_xfitimer_check_period(atimer);
+}
+
+static void ct_xfitimer_start(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	list_add(&ti->running_list, &atimer->running_head);
+	spin_unlock_irqrestore(&atimer->lock, flags);
+	ct_xfitimer_update(atimer);
+}
+
+static void ct_xfitimer_stop(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atimer->lock, flags);
+	list_del_init(&ti->running_list);
+	ti->need_update = 0;
+	spin_unlock_irqrestore(&atimer->lock, flags);
+	ct_xfitimer_update(atimer);
+}
+
+static void ct_xfitimer_free_global(struct ct_timer *atimer)
+{
+	ct_xfitimer_irq_stop(atimer);
+}
+
+static struct ct_timer_ops ct_xfitimer_ops = {
+	.prepare = ct_xfitimer_prepare,
+	.start = ct_xfitimer_start,
+	.stop = ct_xfitimer_stop,
+	.interrupt = ct_xfitimer_callback,
+	.free_global = ct_xfitimer_free_global,
+};
+
+/*
+ * timer instance
+ */
+
+struct ct_timer_instance *
+ct_timer_instance_new(struct ct_timer *atimer, struct ct_atc_pcm *apcm)
+{
+	struct ct_timer_instance *ti;
+
+	ti = kzalloc(sizeof(*ti), GFP_KERNEL);
+	if (!ti)
+		return NULL;
+	spin_lock_init(&ti->lock);
+	INIT_LIST_HEAD(&ti->instance_list);
+	INIT_LIST_HEAD(&ti->running_list);
+	ti->timer_base = atimer;
+	ti->apcm = apcm;
+	ti->substream = apcm->substream;
+	if (atimer->ops->init)
+		atimer->ops->init(ti);
+
+	spin_lock_irq(&atimer->list_lock);
+	list_add(&ti->instance_list, &atimer->instance_head);
+	spin_unlock_irq(&atimer->list_lock);
+
+	return ti;
+}
+
+void ct_timer_prepare(struct ct_timer_instance *ti)
+{
+	if (ti->timer_base->ops->prepare)
+		ti->timer_base->ops->prepare(ti);
+	ti->position = 0;
+	ti->running = 0;
+}
+
+void ct_timer_start(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	atimer->ops->start(ti);
+}
+
+void ct_timer_stop(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+	atimer->ops->stop(ti);
+}
+
+void ct_timer_instance_free(struct ct_timer_instance *ti)
+{
+	struct ct_timer *atimer = ti->timer_base;
+
+	atimer->ops->stop(ti); /* to be sure */
+	if (atimer->ops->free_instance)
+		atimer->ops->free_instance(ti);
+
+	spin_lock_irq(&atimer->list_lock);
+	list_del(&ti->instance_list);
+	spin_unlock_irq(&atimer->list_lock);
+
+	kfree(ti);
+}
+
+/*
+ * timer manager
+ */
+
+#define USE_SYSTEM_TIMER	0
+
+static void ct_timer_interrupt(void *data, unsigned int status)
+{
+	struct ct_timer *timer = data;
+
+	/* Interval timer interrupt */
+	if ((status & IT_INT) && timer->ops->interrupt)
+		timer->ops->interrupt(timer);
+}
+
+struct ct_timer *ct_timer_new(struct ct_atc *atc)
+{
+	struct ct_timer *atimer;
+	struct hw *hw;
+
+	atimer = kzalloc(sizeof(*atimer), GFP_KERNEL);
+	if (!atimer)
+		return NULL;
+	spin_lock_init(&atimer->lock);
+	spin_lock_init(&atimer->list_lock);
+	INIT_LIST_HEAD(&atimer->instance_head);
+	INIT_LIST_HEAD(&atimer->running_head);
+	atimer->atc = atc;
+	hw = atc->hw;
+	if (!USE_SYSTEM_TIMER && hw->set_timer_irq) {
+		printk(KERN_INFO "ctxfi: Use xfi-native timer\n");
+		atimer->ops = &ct_xfitimer_ops;
+		hw->irq_callback_data = atimer;
+		hw->irq_callback = ct_timer_interrupt;
+	} else {
+		printk(KERN_INFO "ctxfi: Use system timer\n");
+		atimer->ops = &ct_systimer_ops;
+	}
+	return atimer;
+}
+
+void ct_timer_free(struct ct_timer *atimer)
+{
+	struct hw *hw = atimer->atc->hw;
+	hw->irq_callback = NULL;
+	if (atimer->ops->free_global)
+		atimer->ops->free_global(atimer);
+	kfree(atimer);
+}
+
diff --git a/sound/pci/ctxfi/cttimer.h b/sound/pci/ctxfi/cttimer.h
new file mode 100644
index 0000000..9793482
--- /dev/null
+++ b/sound/pci/ctxfi/cttimer.h
@@ -0,0 +1,29 @@
+/*
+ * Timer handling
+ */
+
+#ifndef __CTTIMER_H
+#define __CTTIMER_H
+
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+
+struct snd_pcm_substream;
+struct ct_atc;
+struct ct_atc_pcm;
+
+struct ct_timer;
+struct ct_timer_instance;
+
+struct ct_timer *ct_timer_new(struct ct_atc *atc);
+void ct_timer_free(struct ct_timer *atimer);
+
+struct ct_timer_instance *
+ct_timer_instance_new(struct ct_timer *atimer, struct ct_atc_pcm *apcm);
+void ct_timer_instance_free(struct ct_timer_instance *ti);
+void ct_timer_start(struct ct_timer_instance *ti);
+void ct_timer_stop(struct ct_timer_instance *ti);
+void ct_timer_prepare(struct ct_timer_instance *ti);
+
+#endif /* __CTTIMER_H */
-- 
cgit v0.10.2


From 775ffa1d3e5a550dd2c9d947d773021c61531b36 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 16:12:16 +0200
Subject: ALSA: ctxfi - Set periods_min to 2

Set 2 to minimal periods of playback pcm setups, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 32b742d..a0bd31c 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -40,7 +40,7 @@ static struct snd_pcm_hardware ct_pcm_playback_hw = {
 	.buffer_bytes_max	= (128*1024),
 	.period_bytes_min	= (64),
 	.period_bytes_max	= (128*1024),
-	.periods_min		= 1,
+	.periods_min		= 2,
 	.periods_max		= 1024,
 	.fifo_size		= 0,
 };
@@ -62,7 +62,7 @@ static struct snd_pcm_hardware ct_spdif_passthru_playback_hw = {
 	.buffer_bytes_max	= (128*1024),
 	.period_bytes_min	= (64),
 	.period_bytes_max	= (128*1024),
-	.periods_min		= 1,
+	.periods_min		= 2,
 	.periods_max		= 1024,
 	.fifo_size		= 0,
 };
-- 
cgit v0.10.2


From 2a36f67f8c81f0babda0e811c760b7bfa971010b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 16:34:10 +0200
Subject: ALSA: ctxfi - Clean up / optimize

- Use static tables instead of assigining each funciton pointer
- Add __devinit* to appropriate places; pcm, mixer and timer cannot be
  marked because they are kept in the function table that lives long
- Move create_alsa_devs function out of struct ct_atc to mark it
  __devinit

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 10b7419..9b13245 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -1202,7 +1202,7 @@ static int atc_dev_free(struct snd_device *dev)
 	return ct_atc_destroy(atc);
 }
 
-static int atc_identify_card(struct ct_atc *atc)
+static int __devinit atc_identify_card(struct ct_atc *atc)
 {
 	u16 subsys;
 	u8 revision;
@@ -1243,7 +1243,7 @@ static int atc_identify_card(struct ct_atc *atc)
 	return 0;
 }
 
-static int ct_create_alsa_devs(struct ct_atc *atc)
+int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc)
 {
 	enum CTALSADEVS i;
 	struct hw *hw = atc->hw;
@@ -1277,7 +1277,7 @@ static int ct_create_alsa_devs(struct ct_atc *atc)
 	return 0;
 }
 
-static int atc_create_hw_devs(struct ct_atc *atc)
+static int __devinit atc_create_hw_devs(struct ct_atc *atc)
 {
 	struct hw *hw = NULL;
 	struct card_conf info = {0};
@@ -1313,7 +1313,7 @@ static int atc_create_hw_devs(struct ct_atc *atc)
 	return 0;
 }
 
-static int atc_get_resources(struct ct_atc *atc)
+static int __devinit atc_get_resources(struct ct_atc *atc)
 {
 	struct daio_desc da_desc = {0};
 	struct daio_mgr *daio_mgr = NULL;
@@ -1423,7 +1423,7 @@ static int atc_get_resources(struct ct_atc *atc)
 	return 0;
 }
 
-static void
+static void __devinit
 atc_connect_dai(struct src_mgr *src_mgr, struct dai *dai,
 		struct src **srcs, struct srcimp **srcimps)
 {
@@ -1462,7 +1462,7 @@ atc_connect_dai(struct src_mgr *src_mgr, struct dai *dai,
 	src_mgr->commit_write(src_mgr); /* Synchronously enable SRCs */
 }
 
-static void atc_connect_resources(struct ct_atc *atc)
+static void __devinit atc_connect_resources(struct ct_atc *atc)
 {
 	struct dai *dai = NULL;
 	struct dao *dao = NULL;
@@ -1508,37 +1508,35 @@ static void atc_connect_resources(struct ct_atc *atc)
 	}
 }
 
-static void atc_set_ops(struct ct_atc *atc)
-{
-	/* Set operations */
-	atc->map_audio_buffer = ct_map_audio_buffer;
-	atc->unmap_audio_buffer = ct_unmap_audio_buffer;
-	atc->pcm_playback_prepare = atc_pcm_playback_prepare;
-	atc->pcm_release_resources = atc_pcm_release_resources;
-	atc->pcm_playback_start = atc_pcm_playback_start;
-	atc->pcm_playback_stop = atc_pcm_stop;
-	atc->pcm_playback_position = atc_pcm_playback_position;
-	atc->pcm_capture_prepare = atc_pcm_capture_prepare;
-	atc->pcm_capture_start = atc_pcm_capture_start;
-	atc->pcm_capture_stop = atc_pcm_stop;
-	atc->pcm_capture_position = atc_pcm_capture_position;
-	atc->spdif_passthru_playback_prepare = spdif_passthru_playback_prepare;
-	atc->get_ptp_phys = atc_get_ptp_phys;
-	atc->select_line_in = atc_select_line_in;
-	atc->select_mic_in = atc_select_mic_in;
-	atc->select_digit_io = atc_select_digit_io;
-	atc->line_front_unmute = atc_line_front_unmute;
-	atc->line_surround_unmute = atc_line_surround_unmute;
-	atc->line_clfe_unmute = atc_line_clfe_unmute;
-	atc->line_rear_unmute = atc_line_rear_unmute;
-	atc->line_in_unmute = atc_line_in_unmute;
-	atc->spdif_out_unmute = atc_spdif_out_unmute;
-	atc->spdif_in_unmute = atc_spdif_in_unmute;
-	atc->spdif_out_get_status = atc_spdif_out_get_status;
-	atc->spdif_out_set_status = atc_spdif_out_set_status;
-	atc->spdif_out_passthru = atc_spdif_out_passthru;
-	atc->have_digit_io_switch = atc_have_digit_io_switch;
-}
+static struct ct_atc atc_preset __devinitdata = {
+	.map_audio_buffer = ct_map_audio_buffer,
+	.unmap_audio_buffer = ct_unmap_audio_buffer,
+	.pcm_playback_prepare = atc_pcm_playback_prepare,
+	.pcm_release_resources = atc_pcm_release_resources,
+	.pcm_playback_start = atc_pcm_playback_start,
+	.pcm_playback_stop = atc_pcm_stop,
+	.pcm_playback_position = atc_pcm_playback_position,
+	.pcm_capture_prepare = atc_pcm_capture_prepare,
+	.pcm_capture_start = atc_pcm_capture_start,
+	.pcm_capture_stop = atc_pcm_stop,
+	.pcm_capture_position = atc_pcm_capture_position,
+	.spdif_passthru_playback_prepare = spdif_passthru_playback_prepare,
+	.get_ptp_phys = atc_get_ptp_phys,
+	.select_line_in = atc_select_line_in,
+	.select_mic_in = atc_select_mic_in,
+	.select_digit_io = atc_select_digit_io,
+	.line_front_unmute = atc_line_front_unmute,
+	.line_surround_unmute = atc_line_surround_unmute,
+	.line_clfe_unmute = atc_line_clfe_unmute,
+	.line_rear_unmute = atc_line_rear_unmute,
+	.line_in_unmute = atc_line_in_unmute,
+	.spdif_out_unmute = atc_spdif_out_unmute,
+	.spdif_in_unmute = atc_spdif_in_unmute,
+	.spdif_out_get_status = atc_spdif_out_get_status,
+	.spdif_out_set_status = atc_spdif_out_set_status,
+	.spdif_out_passthru = atc_spdif_out_passthru,
+	.have_digit_io_switch = atc_have_digit_io_switch,
+};
 
 /**
  *  ct_atc_create - create and initialize a hardware manager
@@ -1552,7 +1550,7 @@ static void atc_set_ops(struct ct_atc *atc)
  *  Returns 0 if suceeds, or negative error code if fails.
  */
 
-int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
+int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 		  unsigned int rsr, unsigned int msr, struct ct_atc **ratc)
 {
 	struct ct_atc *atc = NULL;
@@ -1567,14 +1565,14 @@ int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 	if (NULL == atc)
 		return -ENOMEM;
 
+	/* Set operations */
+	*atc = atc_preset;
+
 	atc->card = card;
 	atc->pci = pci;
 	atc->rsr = rsr;
 	atc->msr = msr;
 
-	/* Set operations */
-	atc_set_ops(atc);
-
 	spin_lock_init(&atc->atc_lock);
 
 	/* Find card model */
@@ -1606,8 +1604,6 @@ int ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 	if (!atc->timer)
 		goto error1;
 
-	atc->create_alsa_devs = ct_create_alsa_devs;
-
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, atc, &ops);
 	if (err < 0)
 		goto error1;
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
index a3f9b1b..04459aa 100644
--- a/sound/pci/ctxfi/ctatc.h
+++ b/sound/pci/ctxfi/ctatc.h
@@ -91,8 +91,6 @@ struct ct_atc {
 
 	const struct ct_atc_chip_details *chip_details;
 	enum CTCARDS model;
-	/* Create all alsa devices */
-	int (*create_alsa_devs)(struct ct_atc *atc);
 
 	struct ct_vm *vm; /* device virtual memory manager for this card */
 	int (*map_audio_buffer)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
@@ -151,5 +149,6 @@ struct ct_atc {
 int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 			    unsigned int rsr, unsigned int msr,
 			    struct ct_atc **ratc);
+int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc);
 
 #endif /* CTATC_H */
diff --git a/sound/pci/ctxfi/cthardware.c b/sound/pci/ctxfi/cthardware.c
index 8e58860..53d1aca 100644
--- a/sound/pci/ctxfi/cthardware.c
+++ b/sound/pci/ctxfi/cthardware.c
@@ -20,7 +20,7 @@
 #include "cthw20k2.h"
 #include <linux/bug.h>
 
-static enum CHIPTYP get_chip_type(struct hw *hw)
+static enum CHIPTYP __devinitdata get_chip_type(struct hw *hw)
 {
 	enum CHIPTYP type = ATCNONE;
 
@@ -39,7 +39,7 @@ static enum CHIPTYP get_chip_type(struct hw *hw)
 	return type;
 }
 
-int create_hw_obj(struct pci_dev *pci, struct hw **rhw)
+int __devinit create_hw_obj(struct pci_dev *pci, struct hw **rhw)
 {
 	int err = 0;
 
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 550b30a..df565c1 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -2138,9 +2138,107 @@ static void hw_write_pci(struct hw *hw, u32 reg, u32 data)
 		&container_of(hw, struct hw20k1, hw)->reg_pci_lock, flags);
 }
 
-int create_20k1_hw_obj(struct hw **rhw)
+static struct hw ct20k1_preset __devinitdata = {
+	.irq = -1,
+
+	.card_init = hw_card_init,
+	.card_stop = hw_card_stop,
+	.pll_init = hw_pll_init,
+	.is_adc_source_selected = hw_is_adc_input_selected,
+	.select_adc_source = hw_adc_input_select,
+	.have_digit_io_switch = hw_have_digit_io_switch,
+
+	.src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk,
+	.src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk,
+	.src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk,
+	.src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk,
+	.src_set_state = src_set_state,
+	.src_set_bm = src_set_bm,
+	.src_set_rsr = src_set_rsr,
+	.src_set_sf = src_set_sf,
+	.src_set_wr = src_set_wr,
+	.src_set_pm = src_set_pm,
+	.src_set_rom = src_set_rom,
+	.src_set_vo = src_set_vo,
+	.src_set_st = src_set_st,
+	.src_set_ie = src_set_ie,
+	.src_set_ilsz = src_set_ilsz,
+	.src_set_bp = src_set_bp,
+	.src_set_cisz = src_set_cisz,
+	.src_set_ca = src_set_ca,
+	.src_set_sa = src_set_sa,
+	.src_set_la = src_set_la,
+	.src_set_pitch = src_set_pitch,
+	.src_set_dirty = src_set_dirty,
+	.src_set_clear_zbufs = src_set_clear_zbufs,
+	.src_set_dirty_all = src_set_dirty_all,
+	.src_commit_write = src_commit_write,
+	.src_get_ca = src_get_ca,
+	.src_get_dirty = src_get_dirty,
+	.src_dirty_conj_mask = src_dirty_conj_mask,
+	.src_mgr_enbs_src = src_mgr_enbs_src,
+	.src_mgr_enb_src = src_mgr_enb_src,
+	.src_mgr_dsb_src = src_mgr_dsb_src,
+	.src_mgr_commit_write = src_mgr_commit_write,
+
+	.srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk,
+	.srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk,
+	.srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc,
+	.srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser,
+	.srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt,
+	.srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr,
+	.srcimp_mgr_commit_write = srcimp_mgr_commit_write,
+
+	.amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk,
+	.amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk,
+	.amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk,
+	.amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk,
+	.amixer_set_mode = amixer_set_mode,
+	.amixer_set_iv = amixer_set_iv,
+	.amixer_set_x = amixer_set_x,
+	.amixer_set_y = amixer_set_y,
+	.amixer_set_sadr = amixer_set_sadr,
+	.amixer_set_se = amixer_set_se,
+	.amixer_set_dirty = amixer_set_dirty,
+	.amixer_set_dirty_all = amixer_set_dirty_all,
+	.amixer_commit_write = amixer_commit_write,
+	.amixer_get_y = amixer_get_y,
+	.amixer_get_dirty = amixer_get_dirty,
+
+	.dai_get_ctrl_blk = dai_get_ctrl_blk,
+	.dai_put_ctrl_blk = dai_put_ctrl_blk,
+	.dai_srt_set_srco = dai_srt_set_srcr,
+	.dai_srt_set_srcm = dai_srt_set_srcl,
+	.dai_srt_set_rsr = dai_srt_set_rsr,
+	.dai_srt_set_drat = dai_srt_set_drat,
+	.dai_srt_set_ec = dai_srt_set_ec,
+	.dai_srt_set_et = dai_srt_set_et,
+	.dai_commit_write = dai_commit_write,
+
+	.dao_get_ctrl_blk = dao_get_ctrl_blk,
+	.dao_put_ctrl_blk = dao_put_ctrl_blk,
+	.dao_set_spos = dao_set_spos,
+	.dao_commit_write = dao_commit_write,
+	.dao_get_spos = dao_get_spos,
+
+	.daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk,
+	.daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk,
+	.daio_mgr_enb_dai = daio_mgr_enb_dai,
+	.daio_mgr_dsb_dai = daio_mgr_dsb_dai,
+	.daio_mgr_enb_dao = daio_mgr_enb_dao,
+	.daio_mgr_dsb_dao = daio_mgr_dsb_dao,
+	.daio_mgr_dao_init = daio_mgr_dao_init,
+	.daio_mgr_set_imaparc = daio_mgr_set_imaparc,
+	.daio_mgr_set_imapnxt = daio_mgr_set_imapnxt,
+	.daio_mgr_set_imapaddr = daio_mgr_set_imapaddr,
+	.daio_mgr_commit_write = daio_mgr_commit_write,
+
+	.set_timer_irq = set_timer_irq,
+	.set_timer_tick = set_timer_tick,
+};
+
+int __devinit create_20k1_hw_obj(struct hw **rhw)
 {
-	struct hw *hw;
 	struct hw20k1 *hw20k1;
 
 	*rhw = NULL;
@@ -2151,108 +2249,9 @@ int create_20k1_hw_obj(struct hw **rhw)
 	spin_lock_init(&hw20k1->reg_20k1_lock);
 	spin_lock_init(&hw20k1->reg_pci_lock);
 
-	hw = &hw20k1->hw;
+	hw20k1->hw = ct20k1_preset;
 
-	hw->io_base = 0;
-	hw->mem_base = (unsigned long)NULL;
-	hw->irq = -1;
-
-	hw->card_init = hw_card_init;
-	hw->card_stop = hw_card_stop;
-	hw->pll_init = hw_pll_init;
-	hw->is_adc_source_selected = hw_is_adc_input_selected;
-	hw->select_adc_source = hw_adc_input_select;
-	hw->have_digit_io_switch = hw_have_digit_io_switch;
-
-	hw->src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk;
-	hw->src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk;
-	hw->src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk;
-	hw->src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk;
-	hw->src_set_state = src_set_state;
-	hw->src_set_bm = src_set_bm;
-	hw->src_set_rsr = src_set_rsr;
-	hw->src_set_sf = src_set_sf;
-	hw->src_set_wr = src_set_wr;
-	hw->src_set_pm = src_set_pm;
-	hw->src_set_rom = src_set_rom;
-	hw->src_set_vo = src_set_vo;
-	hw->src_set_st = src_set_st;
-	hw->src_set_ie = src_set_ie;
-	hw->src_set_ilsz = src_set_ilsz;
-	hw->src_set_bp = src_set_bp;
-	hw->src_set_cisz = src_set_cisz;
-	hw->src_set_ca = src_set_ca;
-	hw->src_set_sa = src_set_sa;
-	hw->src_set_la = src_set_la;
-	hw->src_set_pitch = src_set_pitch;
-	hw->src_set_dirty = src_set_dirty;
-	hw->src_set_clear_zbufs = src_set_clear_zbufs;
-	hw->src_set_dirty_all = src_set_dirty_all;
-	hw->src_commit_write = src_commit_write;
-	hw->src_get_ca = src_get_ca;
-	hw->src_get_dirty = src_get_dirty;
-	hw->src_dirty_conj_mask = src_dirty_conj_mask;
-	hw->src_mgr_enbs_src = src_mgr_enbs_src;
-	hw->src_mgr_enb_src = src_mgr_enb_src;
-	hw->src_mgr_dsb_src = src_mgr_dsb_src;
-	hw->src_mgr_commit_write = src_mgr_commit_write;
-
-	hw->srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk;
-	hw->srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk;
-	hw->srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc;
-	hw->srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser;
-	hw->srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt;
-	hw->srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr;
-	hw->srcimp_mgr_commit_write = srcimp_mgr_commit_write;
-
-	hw->amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk;
-	hw->amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk;
-	hw->amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk;
-	hw->amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk;
-	hw->amixer_set_mode = amixer_set_mode;
-	hw->amixer_set_iv = amixer_set_iv;
-	hw->amixer_set_x = amixer_set_x;
-	hw->amixer_set_y = amixer_set_y;
-	hw->amixer_set_sadr = amixer_set_sadr;
-	hw->amixer_set_se = amixer_set_se;
-	hw->amixer_set_dirty = amixer_set_dirty;
-	hw->amixer_set_dirty_all = amixer_set_dirty_all;
-	hw->amixer_commit_write = amixer_commit_write;
-	hw->amixer_get_y = amixer_get_y;
-	hw->amixer_get_dirty = amixer_get_dirty;
-
-	hw->dai_get_ctrl_blk = dai_get_ctrl_blk;
-	hw->dai_put_ctrl_blk = dai_put_ctrl_blk;
-	hw->dai_srt_set_srco = dai_srt_set_srcr;
-	hw->dai_srt_set_srcm = dai_srt_set_srcl;
-	hw->dai_srt_set_rsr = dai_srt_set_rsr;
-	hw->dai_srt_set_drat = dai_srt_set_drat;
-	hw->dai_srt_set_ec = dai_srt_set_ec;
-	hw->dai_srt_set_et = dai_srt_set_et;
-	hw->dai_commit_write = dai_commit_write;
-
-	hw->dao_get_ctrl_blk = dao_get_ctrl_blk;
-	hw->dao_put_ctrl_blk = dao_put_ctrl_blk;
-	hw->dao_set_spos = dao_set_spos;
-	hw->dao_commit_write = dao_commit_write;
-	hw->dao_get_spos = dao_get_spos;
-
-	hw->daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk;
-	hw->daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk;
-	hw->daio_mgr_enb_dai = daio_mgr_enb_dai;
-	hw->daio_mgr_dsb_dai = daio_mgr_dsb_dai;
-	hw->daio_mgr_enb_dao = daio_mgr_enb_dao;
-	hw->daio_mgr_dsb_dao = daio_mgr_dsb_dao;
-	hw->daio_mgr_dao_init = daio_mgr_dao_init;
-	hw->daio_mgr_set_imaparc = daio_mgr_set_imaparc;
-	hw->daio_mgr_set_imapnxt = daio_mgr_set_imapnxt;
-	hw->daio_mgr_set_imapaddr = daio_mgr_set_imapaddr;
-	hw->daio_mgr_commit_write = daio_mgr_commit_write;
-
-	hw->set_timer_irq = set_timer_irq;
-	hw->set_timer_tick = set_timer_tick;
-
-	*rhw = hw;
+	*rhw = &hw20k1->hw;
 
 	return 0;
 }
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index 3497287..041199f 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -2006,7 +2006,103 @@ static void hw_write_20kx(struct hw *hw, u32 reg, u32 data)
 	writel(data, (void *)(hw->mem_base + reg));
 }
 
-int create_20k2_hw_obj(struct hw **rhw)
+static struct hw ct20k2_preset __devinitdata = {
+	.irq = -1,
+
+	.card_init = hw_card_init,
+	.card_stop = hw_card_stop,
+	.pll_init = hw_pll_init,
+	.is_adc_source_selected = hw_is_adc_input_selected,
+	.select_adc_source = hw_adc_input_select,
+	.have_digit_io_switch = hw_have_digit_io_switch,
+
+	.src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk,
+	.src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk,
+	.src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk,
+	.src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk,
+	.src_set_state = src_set_state,
+	.src_set_bm = src_set_bm,
+	.src_set_rsr = src_set_rsr,
+	.src_set_sf = src_set_sf,
+	.src_set_wr = src_set_wr,
+	.src_set_pm = src_set_pm,
+	.src_set_rom = src_set_rom,
+	.src_set_vo = src_set_vo,
+	.src_set_st = src_set_st,
+	.src_set_ie = src_set_ie,
+	.src_set_ilsz = src_set_ilsz,
+	.src_set_bp = src_set_bp,
+	.src_set_cisz = src_set_cisz,
+	.src_set_ca = src_set_ca,
+	.src_set_sa = src_set_sa,
+	.src_set_la = src_set_la,
+	.src_set_pitch = src_set_pitch,
+	.src_set_dirty = src_set_dirty,
+	.src_set_clear_zbufs = src_set_clear_zbufs,
+	.src_set_dirty_all = src_set_dirty_all,
+	.src_commit_write = src_commit_write,
+	.src_get_ca = src_get_ca,
+	.src_get_dirty = src_get_dirty,
+	.src_dirty_conj_mask = src_dirty_conj_mask,
+	.src_mgr_enbs_src = src_mgr_enbs_src,
+	.src_mgr_enb_src = src_mgr_enb_src,
+	.src_mgr_dsb_src = src_mgr_dsb_src,
+	.src_mgr_commit_write = src_mgr_commit_write,
+
+	.srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk,
+	.srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk,
+	.srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc,
+	.srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser,
+	.srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt,
+	.srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr,
+	.srcimp_mgr_commit_write = srcimp_mgr_commit_write,
+
+	.amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk,
+	.amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk,
+	.amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk,
+	.amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk,
+	.amixer_set_mode = amixer_set_mode,
+	.amixer_set_iv = amixer_set_iv,
+	.amixer_set_x = amixer_set_x,
+	.amixer_set_y = amixer_set_y,
+	.amixer_set_sadr = amixer_set_sadr,
+	.amixer_set_se = amixer_set_se,
+	.amixer_set_dirty = amixer_set_dirty,
+	.amixer_set_dirty_all = amixer_set_dirty_all,
+	.amixer_commit_write = amixer_commit_write,
+	.amixer_get_y = amixer_get_y,
+	.amixer_get_dirty = amixer_get_dirty,
+
+	.dai_get_ctrl_blk = dai_get_ctrl_blk,
+	.dai_put_ctrl_blk = dai_put_ctrl_blk,
+	.dai_srt_set_srco = dai_srt_set_srco,
+	.dai_srt_set_srcm = dai_srt_set_srcm,
+	.dai_srt_set_rsr = dai_srt_set_rsr,
+	.dai_srt_set_drat = dai_srt_set_drat,
+	.dai_srt_set_ec = dai_srt_set_ec,
+	.dai_srt_set_et = dai_srt_set_et,
+	.dai_commit_write = dai_commit_write,
+
+	.dao_get_ctrl_blk = dao_get_ctrl_blk,
+	.dao_put_ctrl_blk = dao_put_ctrl_blk,
+	.dao_set_spos = dao_set_spos,
+	.dao_commit_write = dao_commit_write,
+	.dao_get_spos = dao_get_spos,
+
+	.daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk,
+	.daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk,
+	.daio_mgr_enb_dai = daio_mgr_enb_dai,
+	.daio_mgr_dsb_dai = daio_mgr_dsb_dai,
+	.daio_mgr_enb_dao = daio_mgr_enb_dao,
+	.daio_mgr_dsb_dao = daio_mgr_dsb_dao,
+	.daio_mgr_dao_init = daio_mgr_dao_init,
+	.daio_mgr_set_imaparc = daio_mgr_set_imaparc,
+	.daio_mgr_set_imapnxt = daio_mgr_set_imapnxt,
+	.daio_mgr_set_imapaddr = daio_mgr_set_imapaddr,
+	.daio_mgr_commit_write = daio_mgr_commit_write,
+};
+
+int __devinit create_20k2_hw_obj(struct hw **rhw)
 {
 	struct hw *hw;
 
@@ -2015,102 +2111,7 @@ int create_20k2_hw_obj(struct hw **rhw)
 	if (NULL == hw)
 		return -ENOMEM;
 
-	hw->io_base = 0;
-	hw->mem_base = (unsigned long)NULL;
-	hw->irq = -1;
-
-	hw->card_init = hw_card_init;
-	hw->card_stop = hw_card_stop;
-	hw->pll_init = hw_pll_init;
-	hw->is_adc_source_selected = hw_is_adc_input_selected;
-	hw->select_adc_source = hw_adc_input_select;
-	hw->have_digit_io_switch = hw_have_digit_io_switch;
-
-	hw->src_rsc_get_ctrl_blk = src_get_rsc_ctrl_blk;
-	hw->src_rsc_put_ctrl_blk = src_put_rsc_ctrl_blk;
-	hw->src_mgr_get_ctrl_blk = src_mgr_get_ctrl_blk;
-	hw->src_mgr_put_ctrl_blk = src_mgr_put_ctrl_blk;
-	hw->src_set_state = src_set_state;
-	hw->src_set_bm = src_set_bm;
-	hw->src_set_rsr = src_set_rsr;
-	hw->src_set_sf = src_set_sf;
-	hw->src_set_wr = src_set_wr;
-	hw->src_set_pm = src_set_pm;
-	hw->src_set_rom = src_set_rom;
-	hw->src_set_vo = src_set_vo;
-	hw->src_set_st = src_set_st;
-	hw->src_set_ie = src_set_ie;
-	hw->src_set_ilsz = src_set_ilsz;
-	hw->src_set_bp = src_set_bp;
-	hw->src_set_cisz = src_set_cisz;
-	hw->src_set_ca = src_set_ca;
-	hw->src_set_sa = src_set_sa;
-	hw->src_set_la = src_set_la;
-	hw->src_set_pitch = src_set_pitch;
-	hw->src_set_dirty = src_set_dirty;
-	hw->src_set_clear_zbufs = src_set_clear_zbufs;
-	hw->src_set_dirty_all = src_set_dirty_all;
-	hw->src_commit_write = src_commit_write;
-	hw->src_get_ca = src_get_ca;
-	hw->src_get_dirty = src_get_dirty;
-	hw->src_dirty_conj_mask = src_dirty_conj_mask;
-	hw->src_mgr_enbs_src = src_mgr_enbs_src;
-	hw->src_mgr_enb_src = src_mgr_enb_src;
-	hw->src_mgr_dsb_src = src_mgr_dsb_src;
-	hw->src_mgr_commit_write = src_mgr_commit_write;
-
-	hw->srcimp_mgr_get_ctrl_blk = srcimp_mgr_get_ctrl_blk;
-	hw->srcimp_mgr_put_ctrl_blk = srcimp_mgr_put_ctrl_blk;
-	hw->srcimp_mgr_set_imaparc = srcimp_mgr_set_imaparc;
-	hw->srcimp_mgr_set_imapuser = srcimp_mgr_set_imapuser;
-	hw->srcimp_mgr_set_imapnxt = srcimp_mgr_set_imapnxt;
-	hw->srcimp_mgr_set_imapaddr = srcimp_mgr_set_imapaddr;
-	hw->srcimp_mgr_commit_write = srcimp_mgr_commit_write;
-
-	hw->amixer_rsc_get_ctrl_blk = amixer_rsc_get_ctrl_blk;
-	hw->amixer_rsc_put_ctrl_blk = amixer_rsc_put_ctrl_blk;
-	hw->amixer_mgr_get_ctrl_blk = amixer_mgr_get_ctrl_blk;
-	hw->amixer_mgr_put_ctrl_blk = amixer_mgr_put_ctrl_blk;
-	hw->amixer_set_mode = amixer_set_mode;
-	hw->amixer_set_iv = amixer_set_iv;
-	hw->amixer_set_x = amixer_set_x;
-	hw->amixer_set_y = amixer_set_y;
-	hw->amixer_set_sadr = amixer_set_sadr;
-	hw->amixer_set_se = amixer_set_se;
-	hw->amixer_set_dirty = amixer_set_dirty;
-	hw->amixer_set_dirty_all = amixer_set_dirty_all;
-	hw->amixer_commit_write = amixer_commit_write;
-	hw->amixer_get_y = amixer_get_y;
-	hw->amixer_get_dirty = amixer_get_dirty;
-
-	hw->dai_get_ctrl_blk = dai_get_ctrl_blk;
-	hw->dai_put_ctrl_blk = dai_put_ctrl_blk;
-	hw->dai_srt_set_srco = dai_srt_set_srco;
-	hw->dai_srt_set_srcm = dai_srt_set_srcm;
-	hw->dai_srt_set_rsr = dai_srt_set_rsr;
-	hw->dai_srt_set_drat = dai_srt_set_drat;
-	hw->dai_srt_set_ec = dai_srt_set_ec;
-	hw->dai_srt_set_et = dai_srt_set_et;
-	hw->dai_commit_write = dai_commit_write;
-
-	hw->dao_get_ctrl_blk = dao_get_ctrl_blk;
-	hw->dao_put_ctrl_blk = dao_put_ctrl_blk;
-	hw->dao_set_spos = dao_set_spos;
-	hw->dao_commit_write = dao_commit_write;
-	hw->dao_get_spos = dao_get_spos;
-
-	hw->daio_mgr_get_ctrl_blk = daio_mgr_get_ctrl_blk;
-	hw->daio_mgr_put_ctrl_blk = daio_mgr_put_ctrl_blk;
-	hw->daio_mgr_enb_dai = daio_mgr_enb_dai;
-	hw->daio_mgr_dsb_dai = daio_mgr_dsb_dai;
-	hw->daio_mgr_enb_dao = daio_mgr_enb_dao;
-	hw->daio_mgr_dsb_dao = daio_mgr_dsb_dao;
-	hw->daio_mgr_dao_init = daio_mgr_dao_init;
-	hw->daio_mgr_set_imaparc = daio_mgr_set_imaparc;
-	hw->daio_mgr_set_imapnxt = daio_mgr_set_imapnxt;
-	hw->daio_mgr_set_imapaddr = daio_mgr_set_imapaddr;
-	hw->daio_mgr_commit_write = daio_mgr_commit_write;
-
+	*hw = ct20k2_preset;
 	*rhw = hw;
 
 	return 0;
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index bf232e7..279dac6 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -86,7 +86,7 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 	card->private_data = atc;
 
 	/* Create alsa devices supported by this card */
-	err = atc->create_alsa_devs(atc);
+	err = ct_atc_create_alsa_devs(atc);
 	if (err < 0)
 		goto error;
 
-- 
cgit v0.10.2


From 032abb519c23000f8a0fae78bb460047f9129270 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 16:37:19 +0200
Subject: ALSA: ctxfi - Set device 0 for mixer control elements

Mixer control elements are usually assigned to device 0.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
index 177c46e..fac783f 100644
--- a/sound/pci/ctxfi/ctmixer.c
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -1097,7 +1097,7 @@ int ct_alsa_mix_create(struct ct_atc *atc,
 	int err = 0;
 
 	/* Create snd kcontrol instances on demand */
-	vol_ctl.device = swh_ctl.device = device;
+	/* vol_ctl.device = swh_ctl.device = device; */ /* better w/ device 0 */
 	err = ct_mixer_kcontrols_create((struct ct_mixer *)atc->mixer);
 	if (err)
 		return err;
-- 
cgit v0.10.2


From e9e10124e269a39de089c5e0d9936fae2ff889b2 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Fri, 5 Jun 2009 11:56:18 +0100
Subject: ivtv: Fix PCI DMA direction

The ivtv stream buffers may be for receive or for send but the attached
sg handle is always destined cpu->device.  We flush it correctly but the
allocation is wrongly done with the same type as the buffers.

See bug: http://bugzilla.kernel.org/show_bug.cgi?id=13385

(Note this doesn't close the bug - it fixes the ivtv part and in turn
the logging next shows up some rather alarming DMA sg list warnings in
libata)

Signed-off-by: Alan Cox <alan@linux.intel.com>
Acked-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/media/video/ivtv/ivtv-queue.c b/drivers/media/video/ivtv/ivtv-queue.c
index ff7b7de..7fde36e 100644
--- a/drivers/media/video/ivtv/ivtv-queue.c
+++ b/drivers/media/video/ivtv/ivtv-queue.c
@@ -230,7 +230,8 @@ int ivtv_stream_alloc(struct ivtv_stream *s)
 		return -ENOMEM;
 	}
 	if (ivtv_might_use_dma(s)) {
-		s->sg_handle = pci_map_single(itv->pdev, s->sg_dma, sizeof(struct ivtv_sg_element), s->dma);
+		s->sg_handle = pci_map_single(itv->pdev, s->sg_dma,
+				sizeof(struct ivtv_sg_element), PCI_DMA_TODEVICE);
 		ivtv_stream_sync_for_cpu(s);
 	}
 
-- 
cgit v0.10.2


From 3f7440a6b771169e1f11fa582e53a4259b682809 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 17:40:04 +0200
Subject: ALSA: Clean up 64bit division functions

Replace the house-made div64_32() with the standard div_u64*() functions.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c172968..0caf71e 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -486,80 +486,6 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream);
 void snd_pcm_vma_notify_data(void *client, void *data);
 int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);
 
-#if BITS_PER_LONG >= 64
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	*rem = *n % div;
-	*n /= div;
-}
-
-#elif defined(i386)
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	u_int32_t low, high;
-	low = *n & 0xffffffff;
-	high = *n >> 32;
-	if (high) {
-		u_int32_t high1 = high % div;
-		high /= div;
-		asm("divl %2":"=a" (low), "=d" (*rem):"rm" (div), "a" (low), "d" (high1));
-		*n = (u_int64_t)high << 32 | low;
-	} else {
-		*n = low / div;
-		*rem = low % div;
-	}
-}
-#else
-
-static inline void divl(u_int32_t high, u_int32_t low,
-			u_int32_t div,
-			u_int32_t *q, u_int32_t *r)
-{
-	u_int64_t n = (u_int64_t)high << 32 | low;
-	u_int64_t d = (u_int64_t)div << 31;
-	u_int32_t q1 = 0;
-	int c = 32;
-	while (n > 0xffffffffU) {
-		q1 <<= 1;
-		if (n >= d) {
-			n -= d;
-			q1 |= 1;
-		}
-		d >>= 1;
-		c--;
-	}
-	q1 <<= c;
-	if (n) {
-		low = n;
-		*q = q1 | (low / div);
-		*r = low % div;
-	} else {
-		*r = 0;
-		*q = q1;
-	}
-	return;
-}
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	u_int32_t low, high;
-	low = *n & 0xffffffff;
-	high = *n >> 32;
-	if (high) {
-		u_int32_t high1 = high % div;
-		u_int32_t low1 = low;
-		high /= div;
-		divl(high1, low1, div, &low, rem);
-		*n = (u_int64_t)high << 32 | low;
-	} else {
-		*n = low / div;
-		*rem = low % div;
-	}
-}
-#endif
-
 /*
  *  PCM library
  */
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index dda000b..dbe406b 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -31,6 +31,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/math64.h>
 #include <linux/string.h>
 #include <sound/core.h>
 #include <sound/minors.h>
@@ -617,9 +618,7 @@ static long snd_pcm_oss_bytes(struct snd_pcm_substream *substream, long frames)
 #else
 	{
 		u64 bsize = (u64)runtime->oss.buffer_bytes * (u64)bytes;
-		u32 rem;
-		div64_32(&bsize, buffer_size, &rem);
-		return (long)bsize;
+		return div_u64(bsize, buffer_size);
 	}
 #endif
 }
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d659995..a748287 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -22,6 +22,7 @@
 
 #include <linux/slab.h>
 #include <linux/time.h>
+#include <linux/math64.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/info.h>
@@ -452,7 +453,7 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b,
 		*r = 0;
 		return UINT_MAX;
 	}
-	div64_32(&n, c, r);
+	n = div_u64_rem(n, c, r);
 	if (n >= UINT_MAX) {
 		*r = 0;
 		return UINT_MAX;
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 314e735..bcfdbb5 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/firmware.h>
 #include <linux/moduleparam.h>
+#include <linux/math64.h>
 
 #include <sound/core.h>
 #include <sound/control.h>
@@ -1047,7 +1048,6 @@ static int hdsp_set_interrupt_interval(struct hdsp *s, unsigned int frames)
 static void hdsp_set_dds_value(struct hdsp *hdsp, int rate)
 {
 	u64 n;
-	u32 r;
 
 	if (rate >= 112000)
 		rate /= 4;
@@ -1055,7 +1055,7 @@ static void hdsp_set_dds_value(struct hdsp *hdsp, int rate)
 		rate /= 2;
 
 	n = DDS_NUMERATOR;
-	div64_32(&n, rate, &r);
+	n = div_u64(n, rate);
 	/* n should be less than 2^32 for being written to FREQ register */
 	snd_BUG_ON(n >> 32);
 	/* HDSP_freqReg and HDSP_resetPointer are the same, so keep the DDS
@@ -3097,7 +3097,6 @@ static int snd_hdsp_get_adat_sync_check(struct snd_kcontrol *kcontrol, struct sn
 static int hdsp_dds_offset(struct hdsp *hdsp)
 {
 	u64 n;
-	u32 r;
 	unsigned int dds_value = hdsp->dds_value;
 	int system_sample_rate = hdsp->system_sample_rate;
 
@@ -3109,7 +3108,7 @@ static int hdsp_dds_offset(struct hdsp *hdsp)
 	 * dds_value = n / rate
 	 * rate = n / dds_value
 	 */
-	div64_32(&n, dds_value, &r);
+	n = div_u64(n, dds_value);
 	if (system_sample_rate >= 112000)
 		n *= 4;
 	else if (system_sample_rate >= 56000)
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index bac2dc0..0dce331 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -29,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/math64.h>
 #include <asm/io.h>
 
 #include <sound/core.h>
@@ -831,7 +832,6 @@ static int hdspm_set_interrupt_interval(struct hdspm * s, unsigned int frames)
 static void hdspm_set_dds_value(struct hdspm *hdspm, int rate)
 {
 	u64 n;
-	u32 r;
 	
 	if (rate >= 112000)
 		rate /= 4;
@@ -844,7 +844,7 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate)
         */	   
 	/* n = 104857600000000ULL; */ /*  =  2^20 * 10^8 */
 	n = 110100480000000ULL;    /* Value checked for AES32 and MADI */
-	div64_32(&n, rate, &r);
+	n = div_u64(n, rate);
 	/* n should be less than 2^32 for being written to FREQ register */
 	snd_BUG_ON(n >> 32);
 	hdspm_write(hdspm, HDSPM_freqReg, (u32)n);
-- 
cgit v0.10.2


From 136107a76fe5f62906162f730834477b71cf131e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 17:56:21 +0200
Subject: perf_counter tools: Remove -march=native

Turns out that neither PowerPC nor older x86 compilers know this switch
...

and since it does not make a measurable difference, just omit it.

Reported-by: Paul Mackerras <paulus@samba.org>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index c9ec458..5b99f04 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -159,7 +159,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
-CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -march=native
+CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
 LDFLAGS = -lpthread -lrt -lelf
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
-- 
cgit v0.10.2


From 28cd4aa43de2b6d3b1e3385d450bfb31cbe8d72a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 17:58:00 +0200
Subject: ALSA: ctxfi - Add missing inclusion of linux/math64.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
index 3acb26d..ceda74e 100644
--- a/sound/pci/ctxfi/cttimer.c
+++ b/sound/pci/ctxfi/cttimer.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/math64.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include "ctatc.h"
-- 
cgit v0.10.2


From ac4bcf889469ffbca88f234d3184452886a47905 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 14:44:52 +0200
Subject: perf_counter: Change PERF_SAMPLE_CONFIG into PERF_SAMPLE_ID

The purpose of PERF_SAMPLE_CONFIG was to identify the counters,
since then we've added counter ids, use those instead.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 40dc0e2..9cea32a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -104,7 +104,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_ADDR		= 1U << 3,
 	PERF_SAMPLE_GROUP		= 1U << 4,
 	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
-	PERF_SAMPLE_CONFIG		= 1U << 6,
+	PERF_SAMPLE_ID			= 1U << 6,
 	PERF_SAMPLE_CPU			= 1U << 7,
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 37a5a24..e75b91a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2392,8 +2392,8 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
-	if (sample_type & PERF_SAMPLE_CONFIG) {
-		header.type |= PERF_SAMPLE_CONFIG;
+	if (sample_type & PERF_SAMPLE_ID) {
+		header.type |= PERF_SAMPLE_ID;
 		header.size += sizeof(u64);
 	}
 
@@ -2439,8 +2439,8 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (sample_type & PERF_SAMPLE_ADDR)
 		perf_output_put(&handle, addr);
 
-	if (sample_type & PERF_SAMPLE_CONFIG)
-		perf_output_put(&handle, counter->attr.config);
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(&handle, counter->id);
 
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
-- 
cgit v0.10.2


From 689802b2d0536e72281dc959ab9cb34fb3c304cf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 15:05:43 +0200
Subject: perf_counter: Add PERF_SAMPLE_PERIOD

In order to allow easy tracking of the period, also provide means of
adding it to the sample data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9cea32a..6bc2500 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -106,6 +106,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
 	PERF_SAMPLE_ID			= 1U << 6,
 	PERF_SAMPLE_CPU			= 1U << 7,
+	PERF_SAMPLE_PERIOD		= 1U << 8,
 };
 
 /*
@@ -260,6 +261,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
+	 *	u64				id;
 	 *	u64				sample_period;
 	 * };
 	 */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e75b91a..f839066 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2404,6 +2404,11 @@ static void perf_counter_output(struct perf_counter *counter,
 		cpu_entry.cpu = raw_smp_processor_id();
 	}
 
+	if (sample_type & PERF_SAMPLE_PERIOD) {
+		header.type |= PERF_SAMPLE_PERIOD;
+		header.size += sizeof(u64);
+	}
+
 	if (sample_type & PERF_SAMPLE_GROUP) {
 		header.type |= PERF_SAMPLE_GROUP;
 		header.size += sizeof(u64) +
@@ -2445,6 +2450,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
 
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(&handle, counter->hw.sample_period);
+
 	/*
 	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
 	 */
@@ -2835,6 +2843,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 	struct {
 		struct perf_event_header	header;
 		u64				time;
+		u64				id;
 		u64				period;
 	} freq_event = {
 		.header = {
@@ -2843,6 +2852,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 			.size = sizeof(freq_event),
 		},
 		.time = sched_clock(),
+		.id = counter->id,
 		.period = period,
 	};
 
-- 
cgit v0.10.2


From 6a24ed6c6082ec65d19331a4bfa30c0512a1a822 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 18:01:29 +0200
Subject: perf_counter: Fix frequency adjustment for < HZ

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6bc2500..4f9d39e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -373,6 +373,9 @@ struct hw_perf_counter {
 	u64				sample_period;
 	atomic64_t			period_left;
 	u64				interrupts;
+
+	u64				freq_count;
+	u64				freq_interrupts;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f839066..47c92fb 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1187,8 +1187,9 @@ static void perf_log_period(struct perf_counter *counter, u64 period);
 static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	u64 interrupts, sample_period;
-	u64 events, period;
+	u64 events, period, freq;
 	s64 delta;
 
 	spin_lock(&ctx->lock);
@@ -1196,8 +1197,10 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
-		interrupts = counter->hw.interrupts;
-		counter->hw.interrupts = 0;
+		hwc = &counter->hw;
+
+		interrupts = hwc->interrupts;
+		hwc->interrupts = 0;
 
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(counter, 1);
@@ -1208,20 +1211,35 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (!counter->attr.freq || !counter->attr.sample_freq)
 			continue;
 
-		events = HZ * interrupts * counter->hw.sample_period;
+		if (counter->attr.sample_freq < HZ) {
+			freq = counter->attr.sample_freq;
+
+			hwc->freq_count += freq;
+			hwc->freq_interrupts += interrupts;
+
+			if (hwc->freq_count < HZ)
+				continue;
+
+			interrupts = hwc->freq_interrupts;
+			hwc->freq_interrupts = 0;
+			hwc->freq_count -= HZ;
+		} else
+			freq = HZ;
+
+		events = freq * interrupts * hwc->sample_period;
 		period = div64_u64(events, counter->attr.sample_freq);
 
-		delta = (s64)(1 + period - counter->hw.sample_period);
+		delta = (s64)(1 + period - hwc->sample_period);
 		delta >>= 1;
 
-		sample_period = counter->hw.sample_period + delta;
+		sample_period = hwc->sample_period + delta;
 
 		if (!sample_period)
 			sample_period = 1;
 
 		perf_log_period(counter, sample_period);
 
-		counter->hw.sample_period = sample_period;
+		hwc->sample_period = sample_period;
 	}
 	spin_unlock(&ctx->lock);
 }
-- 
cgit v0.10.2


From b2fef0762fdb65cf8702eea93f4e58abeb0ecefc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 18:07:51 +0200
Subject: perf_counter tools: Sample and display frequency adjustment changes

To allow the debugging of frequency-adjusting counters, sample
those adjustments and display them in perf report -D.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index d4ad3057..43ddab3 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -347,7 +347,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	memset(&attr, 0, sizeof(attr));
 	attr.config		= event_id[counter];
 	attr.sample_period	= event_count[counter];
-	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
 	attr.freq		= freq;
 	attr.mmap		= track;
 	attr.comm		= track;
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
index 5af105c..242e09f 100644
--- a/Documentation/perf_counter/builtin-report.c
+++ b/Documentation/perf_counter/builtin-report.c
@@ -69,12 +69,20 @@ struct fork_event {
 	__u32 pid, ppid;
 };
 
-typedef union event_union {
+struct period_event {
 	struct perf_event_header header;
-	struct ip_event ip;
-	struct mmap_event mmap;
-	struct comm_event comm;
-	struct fork_event fork;
+	__u64 time;
+	__u64 id;
+	__u64 sample_period;
+};
+
+typedef union event_union {
+	struct perf_event_header	header;
+	struct ip_event			ip;
+	struct mmap_event		mmap;
+	struct comm_event		comm;
+	struct fork_event		fork;
+	struct period_event		period;
 } event_t;
 
 static LIST_HEAD(dsos);
@@ -1053,6 +1061,19 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 }
 
 static int
+process_period_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->period.time,
+		event->period.id,
+		event->period.sample_period);
+
+	return 0;
+}
+
+static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
@@ -1068,11 +1089,12 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	case PERF_EVENT_FORK:
 		return process_fork_event(event, offset, head);
 
+	case PERF_EVENT_PERIOD:
+		return process_period_event(event, offset, head);
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_PERIOD:
 	case PERF_EVENT_THROTTLE:
 	case PERF_EVENT_UNTHROTTLE:
 		return 0;
@@ -1157,6 +1179,11 @@ more:
 
 	size = event->header.size;
 
+	dprintf("%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
 	if (!size || process_event(event, offset, head) < 0) {
 
 		dprintf("%p [%p]: skipping unknown header type: %d\n",
-- 
cgit v0.10.2


From 1dba15e74aba5a90c1f2557f37e5d09f8a2df643 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 18:37:22 +0200
Subject: perf record: Set frequency correctly

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 43ddab3..c22ea0c 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -348,7 +348,10 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	attr.config		= event_id[counter];
 	attr.sample_period	= event_count[counter];
 	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
-	attr.freq		= freq;
+	if (freq) {
+		attr.freq		= 1;
+		attr.sample_freq	= freq;
+	}
 	attr.mmap		= track;
 	attr.comm		= track;
 	attr.inherit		= (cpu < 0) && inherit;
@@ -544,10 +547,6 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 		event_id[0] = 0;
 	}
 
-	if (freq) {
-		default_interval = freq;
-		freq = 1;
-	}
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (event_count[counter])
 			continue;
-- 
cgit v0.10.2


From 27704a16c9e0fb4c6b04344c7c4c40ac16148ec0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 5 Jun 2009 10:21:52 -0700
Subject: Revert "drm: don't associate _DRM_DRIVER maps with a master"

This reverts commit 6c51d1cfa0a370b48a157163340190cf5fd2346b, which
apparently causes DRI initialization failures on Radeons.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Requested-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 80a2575..0411d91 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -371,8 +371,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	list->user_token = list->hash.key << PAGE_SHIFT;
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!(map->flags & _DRM_DRIVER))
-		list->master = dev->primary->master;
+	list->master = dev->primary->master;
 	*maplist = list;
 	return 0;
 	}
-- 
cgit v0.10.2


From 2c701b10283b58937201004276319ef9d9051b5d Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Fri, 5 Jun 2009 12:37:07 -0400
Subject: [CPUFREQ] powernow-k8: check space_id of _PCT registers to be FFH

The powernow-k8 driver checks to see that the Performance Control/Status
Registers are declared as FFH (functional fixed hardware) by the BIOS.
However, this check got broken in the commit:
 0e64a0c982c06a6b8f5e2a7f29eb108fdf257b2f
 [CPUFREQ] checkpatch cleanups for powernow-k8

Fix based on an original patch from Naga Chumbalkar.

Signed-off-by: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Cc: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index f6b32d1..35dc8fb 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -835,7 +835,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
 	struct cpufreq_frequency_table *powernow_table;
 	int ret_val = -ENODEV;
-	acpi_integer space_id;
+	acpi_integer control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
 		dprintk("register performance failed: bad ACPI data\n");
@@ -848,12 +848,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 		goto err_out;
 	}
 
-	space_id = data->acpi_data.control_register.space_id;
-	if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-		(space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+	control = data->acpi_data.control_register.space_id;
+	status = data->acpi_data.status_register.space_id;
+
+	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
 		dprintk("Invalid control/status registers (%x - %x)\n",
-			data->acpi_data.control_register.space_id,
-			space_id);
+			control, status);
 		goto err_out;
 	}
 
-- 
cgit v0.10.2


From 2ac6bf4ddc87c3b6b609f8fa82f6ebbffeac12f4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Fri, 5 Jun 2009 10:36:24 -0700
Subject: IB/mlx4: Add strong ordering to local inval and fast reg work
 requests

The ConnectX Programmer's Reference Manual states that the "SO" bit
must be set when posting Fast Register and Local Invalidate send work
requests.  When this bit is set, the work request will be executed
only after all previous work requests on the send queue have been
executed.  (If the bit is not set, Fast Register and Local Invalidate
WQEs may begin execution too early, which violates the defined
semantics for these operations)

This fixes the issue with NFS/RDMA reported in
<http://lists.openfabrics.org/pipermail/general/2009-April/059253.html>

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Cc: <stable@kernel.org>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 20724ae..c4a0264 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1585,12 +1585,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				break;
 
 			case IB_WR_LOCAL_INV:
+				ctrl->srcrb_flags |=
+					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
 				set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
 				wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
 				size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
 				break;
 
 			case IB_WR_FAST_REG_MR:
+				ctrl->srcrb_flags |=
+					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
 				set_fmr_seg(wqe, wr);
 				wqe  += sizeof (struct mlx4_wqe_fmr_seg);
 				size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index bf8f119..9f29d86 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -165,6 +165,7 @@ enum {
 	MLX4_WQE_CTRL_IP_CSUM		= 1 << 4,
 	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
 	MLX4_WQE_CTRL_INS_VLAN		= 1 << 6,
+	MLX4_WQE_CTRL_STRONG_ORDER	= 1 << 7,
 };
 
 struct mlx4_wqe_ctrl_seg {
-- 
cgit v0.10.2


From 45bc955bb1324a46c9539550cc615994e6d0a43d Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 5 Jun 2009 10:41:39 -0400
Subject: pata_legacy: wait for async probing

The basic problem here that pata_legacy attaches the host, sees if it found
any devices and detaches it if none were found.  With async probing, it's not
waiting until discovery is finished before deciding it has no devices and
trying the detach leading to this warning:

ata1: PATA max PIO4 cmd 0x1f0 ctl 0x3f6 irq 14
------------[ cut here ]------------
WARNING: at drivers/ata/libata-core.c:6222 ata_host_detach+0x75/0x90()
Modules linked in:
Pid: 1, comm: swapper Not tainted 2.6.30-rc7 #1
Call Trace:
 [<c01fbb05>] ? ata_host_detach+0x75/0x90
 [<c01fbb05>] ? ata_host_detach+0x75/0x90
 [<c01139b5>] ? warn_slowpath_common+0x45/0x80
 [<c01139fa>] ? warn_slowpath_null+0xa/0x10
 [<c01fbb05>] ? ata_host_detach+0x75/0x90
 [<c02f40e0>] ? legacy_init+0x44e/0x87f
 [<c02f3c92>] ? legacy_init+0x0/0x87f
 [<c0101021>] ? _stext+0x21/0x140
 [<c01890ff>] ? proc_register+0x2f/0x190
 [<c018938c>] ? create_proc_entry+0x5c/0xc0
 [<c0135ebe>] ? register_irq_proc+0x6e/0x90
 [<c02e6484>] ? kernel_init+0x6e/0xbf
 [<c02e6416>] ? kernel_init+0x0/0xbf
 [<c01031d7>] ? kernel_thread_helper+0x7/0x10
---[ end trace ef1ee36e873ae3a0 ]---

Because it detaches before the probe is complete.

One way to fix it would be to put an async_synchronize_full() before looking
for devices, which this patch does.  A better way might be to separate libata
into its own domain and only wait for that.

Reported-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index f72c6c5..6932e56 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -48,6 +48,7 @@
  *
  */
 
+#include <linux/async.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -1028,6 +1029,7 @@ static __init int legacy_init_one(struct legacy_probe *probe)
 				&legacy_sht);
 	if (ret)
 		goto fail;
+	async_synchronize_full();
 	ld->platform_dev = pdev;
 
 	/* Nothing found means we drop the port as its probably not there */
-- 
cgit v0.10.2


From 5f33b3bcd7aac66a51e6bfaf35e8cff4eabafb06 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Mon, 1 Jun 2009 22:42:10 +0300
Subject: pata_efar: fix PIO2 underclocking

Fix the PIO mode 2 using mode 0 timings -- this driver should enable the
fast timing bank starting with PIO2, just like the PIIX/ICH drivers do.
Also, fix/rephrase some comments while at it.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c
index 2085e0a..2a6412f 100644
--- a/drivers/ata/pata_efar.c
+++ b/drivers/ata/pata_efar.c
@@ -22,7 +22,7 @@
 #include <linux/ata.h>
 
 #define DRV_NAME	"pata_efar"
-#define DRV_VERSION	"0.4.4"
+#define DRV_VERSION	"0.4.5"
 
 /**
  *	efar_pre_reset	-	Enable bits
@@ -98,18 +98,17 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
 			    { 2, 1 },
 			    { 2, 3 }, };
 
-	if (pio > 2)
-		control |= 1;	/* TIME1 enable */
+	if (pio > 1)
+		control |= 1;	/* TIME */
 	if (ata_pio_need_iordy(adev))	/* PIO 3/4 require IORDY */
-		control |= 2;	/* IE enable */
-	/* Intel specifies that the PPE functionality is for disk only */
+		control |= 2;	/* IE */
+	/* Intel specifies that the prefetch/posting is for disk only */
 	if (adev->class == ATA_DEV_ATA)
-		control |= 4;	/* PPE enable */
+		control |= 4;	/* PPE */
 
 	pci_read_config_word(dev, idetm_port, &idetm_data);
 
-	/* Enable PPE, IE and TIME as appropriate */
-
+	/* Set PPE, IE, and TIME as appropriate */
 	if (adev->devno == 0) {
 		idetm_data &= 0xCCF0;
 		idetm_data |= control;
@@ -129,7 +128,7 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
 		pci_write_config_byte(dev, 0x44, slave_data);
 	}
 
-	idetm_data |= 0x4000;	/* Ensure SITRE is enabled */
+	idetm_data |= 0x4000;	/* Ensure SITRE is set */
 	pci_write_config_word(dev, idetm_port, idetm_data);
 }
 
-- 
cgit v0.10.2


From 9b10ae86d1616f46dabb67c663fe6a9c3a502663 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 30 May 2009 20:50:12 +0900
Subject: ahci: add warning messages for hp laptops with broken suspend

Harddisks on HP dv[4-6] and HDX18 fail to come online after resume on
earlier BIOSen.  Fortunately, HP recently released BIOS updates for
all machines to fix the issue.  Detect old BIOSen, warn the user to
update BIOS on boot and suspend attempts and fail suspend.

Kudos to all the bug reporters.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: kernel.org@epperson.homelinux.net
Cc: emisca@gmail.com
Cc: Gadi Cohen <dragon@wastelands.net>
Cc: Paul Swanson <paul@procursa.com>
Cc: s@ourada.org
Cc: Trevor Davenport <trevor.davenport@gmail.com>
Cc: corruptor1972 <steven_tierney@yahoo.co.uk>
Cc: Victoria Wilson <mail@vwilson.co.uk>
Cc: khiraly <khiraly.list@gmail.com>
Cc: Sean <wollombi@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 08186ec..6b91c26 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -220,6 +220,7 @@ enum {
 	AHCI_HFLAG_NO_HOTPLUG		= (1 << 7), /* ignore PxSERR.DIAG.N */
 	AHCI_HFLAG_SECT255		= (1 << 8), /* max 255 sectors */
 	AHCI_HFLAG_YES_NCQ		= (1 << 9), /* force NCQ cap on */
+	AHCI_HFLAG_NO_SUSPEND		= (1 << 10), /* don't suspend */
 
 	/* ap->flags bits */
 
@@ -2316,9 +2317,17 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg)
 static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
 	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+	struct ahci_host_priv *hpriv = host->private_data;
 	void __iomem *mmio = host->iomap[AHCI_PCI_BAR];
 	u32 ctl;
 
+	if (mesg.event & PM_EVENT_SUSPEND &&
+	    hpriv->flags & AHCI_HFLAG_NO_SUSPEND) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "BIOS update required for suspend/resume\n");
+		return -EIO;
+	}
+
 	if (mesg.event & PM_EVENT_SLEEP) {
 		/* AHCI spec rev1.1 section 8.3.3:
 		 * Software must disable interrupts prior to requesting a
@@ -2610,6 +2619,63 @@ static bool ahci_broken_system_poweroff(struct pci_dev *pdev)
 	return false;
 }
 
+static bool ahci_broken_suspend(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		/*
+		 * On HP dv[4-6] and HDX18 with earlier BIOSen, link
+		 * to the harddisk doesn't become online after
+		 * resuming from STR.  Warn and fail suspend.
+		 */
+		{
+			.ident = "dv4",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP Pavilion dv4 Notebook PC"),
+			},
+			.driver_data = "F.30", /* cutoff BIOS version */
+		},
+		{
+			.ident = "dv5",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP Pavilion dv5 Notebook PC"),
+			},
+			.driver_data = "F.16", /* cutoff BIOS version */
+		},
+		{
+			.ident = "dv6",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP Pavilion dv6 Notebook PC"),
+			},
+			.driver_data = "F.21",	/* cutoff BIOS version */
+		},
+		{
+			.ident = "HDX18",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME,
+					  "HP HDX18 Notebook PC"),
+			},
+			.driver_data = "F.23",	/* cutoff BIOS version */
+		},
+		{ }	/* terminate list */
+	};
+	const struct dmi_system_id *dmi = dmi_first_match(sysids);
+	const char *ver;
+
+	if (!dmi || pdev->bus->number || pdev->devfn != PCI_DEVFN(0x1f, 2))
+		return false;
+
+	ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+	return !ver || strcmp(ver, dmi->driver_data) < 0;
+}
+
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version;
@@ -2715,6 +2781,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			"quirky BIOS, skipping spindown on poweroff\n");
 	}
 
+	if (ahci_broken_suspend(pdev)) {
+		hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
+		dev_printk(KERN_WARNING, &pdev->dev,
+			   "BIOS update required for suspend/resume\n");
+	}
+
 	/* CAP.NP sometimes indicate the index of the last enabled
 	 * port, at other times, that of the last possible port, so
 	 * determining the maximum port number requires looking at
-- 
cgit v0.10.2


From 65e31643115349fd7a81acbe75ec4a54d5df8aad Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Tue, 19 May 2009 01:37:44 +0300
Subject: ata_piix: Add HP Compaq nc6000 to the broken poweroff list

HP Compaq nc6000 suffers from the double disk spindown issue.
Add it to the broken poweroff DMI list.

Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index d51a17c..1aeb708 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1455,6 +1455,15 @@ static bool piix_broken_system_poweroff(struct pci_dev *pdev)
 			/* PCI slot number of the controller */
 			.driver_data = (void *)0x1FUL,
 		},
+		{
+			.ident = "HP Compaq nc6000",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+				DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6000"),
+			},
+			/* PCI slot number of the controller */
+			.driver_data = (void *)0x1FUL,
+		},
 
 		{ }	/* terminate list */
 	};
-- 
cgit v0.10.2


From 2f335a02b3c816e77e7df1d15b12e3bbb8f4c8f0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 5 Jun 2009 19:31:01 +0200
Subject: perf top: Fix zero or negative refresh delay

If perf top is executed with a zero value for the refresh rate,
we get a division by zero exception while computing samples_per_sec.

Also a zero refresh rate is not possible, neither do we want to
accept negative values.

[ Impact: fix division by zero in perf top ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1244223061-5399-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index ff7e13c..b2f480b 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -693,6 +693,9 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 		event_id[0] = 0;
 	}
 
+	if (delay_secs < 1)
+		delay_secs = 1;
+
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (event_count[counter])
 			continue;
-- 
cgit v0.10.2


From cd24f8c1e7e27a2c6051a9a338d4704a2431dbf0 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Fri, 5 Jun 2009 18:48:08 +0100
Subject: mtd: davinci nand: update clock naming

DaVinci clock support has been updated in mainline.
Update clock names accordingly.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 0119220..02700f7 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -407,16 +407,17 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 	}
 	info->chip.ecc.mode = ecc_mode;
 
-	info->clk = clk_get(&pdev->dev, "AEMIFCLK");
+	info->clk = clk_get(&pdev->dev, "aemif");
 	if (IS_ERR(info->clk)) {
 		ret = PTR_ERR(info->clk);
-		dev_dbg(&pdev->dev, "unable to get AEMIFCLK, err %d\n", ret);
+		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
 		goto err_clk;
 	}
 
 	ret = clk_enable(info->clk);
 	if (ret < 0) {
-		dev_dbg(&pdev->dev, "unable to enable AEMIFCLK, err %d\n", ret);
+		dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
+			ret);
 		goto err_clk_enable;
 	}
 
-- 
cgit v0.10.2


From a3cb900cc408977a11519bc7c760f3e499079589 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Wed, 13 May 2009 15:02:27 +0100
Subject: [libata] pata_ali: Use IGN_SIMPLEX

Some ALi devices report simplex if they have been disabled and re-enabled, and
restoring the byte does not work. Ignore it - the needed supporting logic is
already present for the SATA ULi ports.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 751b7ea..fc9c5d6 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -497,14 +497,16 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	/* Revision 0x20 added DMA */
 	static const struct ata_port_info info_20 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.port_ops = &ali_20_port_ops
 	};
 	/* Revision 0x20 with support logic added UDMA */
 	static const struct ata_port_info info_20_udma = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA2,
@@ -512,7 +514,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	/* Revision 0xC2 adds UDMA66 */
 	static const struct ata_port_info info_c2 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA4,
@@ -520,7 +523,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	/* Revision 0xC3 is UDMA66 for now */
 	static const struct ata_port_info info_c3 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA4,
@@ -528,7 +532,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	/* Revision 0xC4 is UDMA100 */
 	static const struct ata_port_info info_c4 = {
-		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+		.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+							ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA5,
@@ -536,7 +541,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	/* Revision 0xC5 is UDMA133 with LBA48 DMA */
 	static const struct ata_port_info info_c5 = {
-		.flags = ATA_FLAG_SLAVE_POSS,
+		.flags = ATA_FLAG_SLAVE_POSS | 	ATA_FLAG_IGN_SIMPLEX,
 		.pio_mask = ATA_PIO4,
 		.mwdma_mask = ATA_MWDMA2,
 		.udma_mask = ATA_UDMA6,
-- 
cgit v0.10.2


From fe2245c905631a3a353504fc04388ce3dfaf9d9e Mon Sep 17 00:00:00 2001
From: Mark Langsdorf <mark.langsdorf@amd.com>
Date: Sun, 5 Jul 2009 15:50:52 -0500
Subject: x86: enable GART-IOMMU only after setting up protection methods

The current code to set up the GART as an IOMMU enables GART
translations before it removes the aperture from the kernel memory
map, sets the GART PTEs to UC, sets up the guard and scratch
pages, or does a wbinvd().  This leaves the possibility of cache
aliasing open and can cause system crashes.

Re-order the code so as to enable the GART translations only
after all safeguards are in place and the tlb has been flushed.

AMD has tested this patch on both Istanbul systems and 1st
generation Opteron systems with APG enabled and seen no adverse
effects.  Istanbul systems with HT Assist enabled sometimes
see MCE errors due to cache artifacts with the unmodified
code.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Cc: <stable@kernel.org>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: akpm@linux-foundation.org
Cc: jbarnes@virtuousgeek.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 1e8920d..cfd9f90 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -658,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 	agp_gatt_table = gatt;
 
-	enable_gart_translations();
-
 	error = sysdev_class_register(&gart_sysdev_class);
 	if (!error)
 		error = sysdev_register(&device_gart);
@@ -816,6 +814,14 @@ void __init gart_iommu_init(void)
 	 * the pages as Not-Present:
 	 */
 	wbinvd();
+	
+	/*
+	 * Now all caches are flushed and we can safely enable
+	 * GART hardware.  Doing it early leaves the possibility
+	 * of stale cache entries that can lead to GART PTE
+	 * errors.
+	 */
+	enable_gart_translations();
 
 	/*
 	 * Try to workaround a bug (thanks to BenH):
-- 
cgit v0.10.2


From ccff4b15e0847223de0a481f5b7fa5ef902cf3bd Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Fri, 5 Jun 2009 19:15:58 -0700
Subject: ASoC: codec tlv320aic23 fix bogus divide by 0 message

Some code analyzer software mistakenly gives
divide by 0 error messages for these lines.
This patch will end its confusion.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c
index 9fcbb9c..0b8dcb5 100644
--- a/sound/soc/codecs/tlv320aic23.c
+++ b/sound/soc/codecs/tlv320aic23.c
@@ -273,14 +273,14 @@ static const unsigned short sr_valid_mask[] = {
  * Every divisor is a factor of 11*12
  */
 #define SR_MULT (11*12)
-#define A(x) (x) ? (SR_MULT/x) : 0
+#define A(x) (SR_MULT/x)
 static const unsigned char sr_adc_mult_table[] = {
-	A(2), A(2), A(12), A(12),  A(0), A(0), A(3), A(1),
-	A(2), A(2), A(11), A(11),  A(0), A(0), A(0), A(1)
+	A(2), A(2), A(12), A(12),  0, 0, A(3), A(1),
+	A(2), A(2), A(11), A(11),  0, 0, 0, A(1)
 };
 static const unsigned char sr_dac_mult_table[] = {
-	A(2), A(12), A(2), A(12),  A(0), A(0), A(3), A(1),
-	A(2), A(11), A(2), A(11),  A(0), A(0), A(0), A(1)
+	A(2), A(12), A(2), A(12),  0, 0, A(3), A(1),
+	A(2), A(11), A(2), A(11),  0, 0, 0, A(1)
 };
 
 static unsigned get_score(int adc, int adc_l, int adc_h, int need_adc,
-- 
cgit v0.10.2


From a21ca2cac582886a3e95c8bb84ff7c52d4d15e54 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 09:58:57 +0200
Subject: perf_counter: Separate out attr->type from attr->config

Counter type is a frequently used value and we do a lot of
bit juggling by encoding and decoding it from attr->config.

Clean this up by creating a separate attr->type field.

Also clean up the various similarly complex user-space bits
all around counter attribute management.

The net improvement is significant, and it will be easier
to add a new major type (which is what triggered this cleanup).

(This changes the ABI, all tools are adapted.)
(PowerPC build-tested.)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index c22ea0c..130fd88 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -20,10 +20,10 @@
 #define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
-static long			default_interval = 100000;
-static long			event_count[MAX_COUNTERS];
-
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static long			default_interval		= 100000;
+
 static int			nr_cpus				= 0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			= 128;
@@ -38,22 +38,44 @@ static int			inherit				= 1;
 static int			force				= 0;
 static int			append_file			= 0;
 
-const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
+static long			samples;
+static struct timeval		last_read;
+static struct timeval		this_read;
+
+static __u64			bytes_written;
+
+static struct pollfd		event_array[MAX_NR_CPUS * MAX_COUNTERS];
+
+static int			nr_poll;
+static int			nr_cpu;
+
+struct mmap_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	__u64				start;
+	__u64				len;
+	__u64				pgoff;
+	char				filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	char				comm[16];
 };
 
+
 struct mmap_data {
-	int counter;
-	void *base;
-	unsigned int mask;
-	unsigned int prev;
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
 };
 
+static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
 	struct perf_counter_mmap_page *pc = md->base;
@@ -65,11 +87,6 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 	return head;
 }
 
-static long samples;
-static struct timeval last_read, this_read;
-
-static __u64 bytes_written;
-
 static void mmap_read(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
@@ -157,29 +174,6 @@ static void sig_handler(int sig)
 	done = 1;
 }
 
-static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int nr_poll;
-static int nr_cpu;
-
-struct mmap_event {
-	struct perf_event_header	header;
-	__u32				pid;
-	__u32				tid;
-	__u64				start;
-	__u64				len;
-	__u64				pgoff;
-	char				filename[PATH_MAX];
-};
-
-struct comm_event {
-	struct perf_event_header	header;
-	__u32				pid;
-	__u32				tid;
-	char				comm[16];
-};
-
 static void pid_synthesize_comm_event(pid_t pid, int full)
 {
 	struct comm_event comm_ev;
@@ -341,24 +335,21 @@ static int group_fd;
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
-	struct perf_counter_attr attr;
+	struct perf_counter_attr *attr = attrs + counter;
 	int track = 1;
 
-	memset(&attr, 0, sizeof(attr));
-	attr.config		= event_id[counter];
-	attr.sample_period	= event_count[counter];
-	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
 	if (freq) {
-		attr.freq		= 1;
-		attr.sample_freq	= freq;
+		attr->freq		= 1;
+		attr->sample_freq	= freq;
 	}
-	attr.mmap		= track;
-	attr.comm		= track;
-	attr.inherit		= (cpu < 0) && inherit;
+	attr->mmap		= track;
+	attr->comm		= track;
+	attr->inherit		= (cpu < 0) && inherit;
 
 	track = 0; /* only the first counter needs these */
 
-	fd[nr_cpu][counter] = sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
+	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
 
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
@@ -542,16 +533,14 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 	if (!argc && target_pid == -1 && !system_wide)
 		usage_with_options(record_usage, options);
 
-	if (!nr_counters) {
+	if (!nr_counters)
 		nr_counters = 1;
-		event_id[0] = 0;
-	}
 
 	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
+		if (attrs[counter].sample_period)
 			continue;
 
-		event_count[counter] = default_interval;
+		attrs[counter].sample_period = default_interval;
 	}
 
 	return __cmd_record(argc, argv);
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 4fc0d80..9711e55 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -44,23 +44,22 @@
 
 #include <sys/prctl.h>
 
-static int			system_wide			=  0;
-static int			inherit				=  1;
+static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
 
-static __u64			default_event_id[MAX_COUNTERS]	= {
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS	},
 
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES	},
 };
 
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
+static int			system_wide			=  0;
+static int			inherit				=  1;
+
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int			target_pid			= -1;
@@ -86,22 +85,16 @@ static __u64			walltime_nsecs;
 
 static void create_perfstat_counter(int counter)
 {
-	struct perf_counter_attr attr;
-
-	memset(&attr, 0, sizeof(attr));
-	attr.config		= event_id[counter];
-	attr.sample_type	= 0;
-	attr.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
-	attr.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
+	struct perf_counter_attr *attr = attrs + counter;
 
 	if (scale)
-		attr.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
-					  PERF_FORMAT_TOTAL_TIME_RUNNING;
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
 	if (system_wide) {
 		int cpu;
 		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(&attr, -1, cpu, -1, 0);
+			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
 			if (fd[cpu][counter] < 0) {
 				printf("perfstat error: syscall returned with %d (%s)\n",
 						fd[cpu][counter], strerror(errno));
@@ -109,10 +102,10 @@ static void create_perfstat_counter(int counter)
 			}
 		}
 	} else {
-		attr.inherit	= inherit;
-		attr.disabled	= 1;
+		attr->inherit	= inherit;
+		attr->disabled	= 1;
 
-		fd[0][counter] = sys_perf_counter_open(&attr, 0, -1, -1, 0);
+		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
 		if (fd[0][counter] < 0) {
 			printf("perfstat error: syscall returned with %d (%s)\n",
 					fd[0][counter], strerror(errno));
@@ -126,9 +119,13 @@ static void create_perfstat_counter(int counter)
  */
 static inline int nsec_counter(int counter)
 {
-	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK))
+	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
+		return 0;
+
+	if (attrs[counter].config == PERF_COUNT_CPU_CLOCK)
 		return 1;
-	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
+
+	if (attrs[counter].config == PERF_COUNT_TASK_CLOCK)
 		return 1;
 
 	return 0;
@@ -177,7 +174,8 @@ static void read_counter(int counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
+	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
 		runtime_nsecs = count[0];
 }
 
@@ -203,8 +201,8 @@ static void print_counter(int counter)
 
 		fprintf(stderr, " %14.6f  %-20s",
 			msecs, event_name(counter));
-		if (event_id[counter] ==
-				EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
+		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
 
 			fprintf(stderr, " # %11.3f CPU utilization factor",
 				(double)count[0] / (double)walltime_nsecs);
@@ -300,8 +298,6 @@ static char events_help_msg[EVENTS_HELP_MAX];
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     events_help_msg, parse_events),
-	OPT_INTEGER('c', "count", &default_interval,
-		    "event period to sample"),
 	OPT_BOOLEAN('i', "inherit", &inherit,
 		    "child tasks inherit counters"),
 	OPT_INTEGER('p', "pid", &target_pid,
@@ -315,27 +311,19 @@ static const struct option options[] = {
 
 int cmd_stat(int argc, const char **argv, const char *prefix)
 {
-	int counter;
-
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	create_events_help(events_help_msg);
-	memcpy(event_id, default_event_id, sizeof(default_event_id));
+
+	memcpy(attrs, default_attrs, sizeof(attrs));
 
 	argc = parse_options(argc, argv, options, stat_usage, 0);
 	if (!argc)
 		usage_with_options(stat_usage, options);
 
-	if (!nr_counters) {
+	if (!nr_counters)
 		nr_counters = 8;
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
 
-		event_count[counter] = default_interval;
-	}
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index b2f480b..98a6d53 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -48,22 +48,11 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-static int			system_wide			=  0;
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static __u64			default_event_id[MAX_COUNTERS]		= {
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+static int			system_wide			=  0;
 
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
-};
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+static int			default_interval		= 100000;
 
 static __u64			count_filter			=  5;
 static int			print_entries			= 15;
@@ -85,15 +74,6 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
-static const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
-
 /*
  * Symbols
  */
@@ -112,7 +92,7 @@ struct sym_entry {
 
 struct sym_entry		*sym_filter_entry;
 
-struct dso *kernel_dso;
+struct dso			*kernel_dso;
 
 /*
  * Symbols will be added here in record_ip and will get out
@@ -213,7 +193,7 @@ static void print_sym_table(void)
 		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
 
 	if (nr_counters == 1) {
-		printf("%d", event_count[0]);
+		printf("%Ld", attrs[0].sample_period);
 		if (freq)
 			printf("Hz ");
 		else
@@ -421,10 +401,10 @@ static void process_event(uint64_t ip, int counter)
 }
 
 struct mmap_data {
-	int counter;
-	void *base;
-	unsigned int mask;
-	unsigned int prev;
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
 };
 
 static unsigned int mmap_read_head(struct mmap_data *md)
@@ -539,7 +519,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int __cmd_top(void)
 {
-	struct perf_counter_attr attr;
+	struct perf_counter_attr *attr;
 	pthread_t thread;
 	int i, counter, group_fd, nr_poll = 0;
 	unsigned int cpu;
@@ -553,13 +533,12 @@ static int __cmd_top(void)
 			if (target_pid == -1 && profile_cpu == -1)
 				cpu = i;
 
-			memset(&attr, 0, sizeof(attr));
-			attr.config		= event_id[counter];
-			attr.sample_period	= event_count[counter];
-			attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-			attr.freq		= freq;
+			attr = attrs + counter;
 
-			fd[i][counter] = sys_perf_counter_open(&attr, target_pid, cpu, group_fd, 0);
+			attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+			attr->freq		= freq;
+
+			fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
 				int err = errno;
 
@@ -670,7 +649,6 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	create_events_help(events_help_msg);
-	memcpy(event_id, default_event_id, sizeof(default_event_id));
 
 	argc = parse_options(argc, argv, options, top_usage, 0);
 	if (argc)
@@ -688,19 +666,22 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 		profile_cpu = -1;
 	}
 
-	if (!nr_counters) {
+	if (!nr_counters)
 		nr_counters = 1;
-		event_id[0] = 0;
-	}
 
 	if (delay_secs < 1)
 		delay_secs = 1;
 
+	parse_symbols();
+
+	/*
+	 * Fill in the ones not specifically initialized via -c:
+	 */
 	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
+		if (attrs[counter].sample_period)
 			continue;
 
-		event_count[counter] = default_interval;
+		attrs[counter].sample_period = default_interval;
 	}
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -710,7 +691,5 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 	if (target_pid != -1 || profile_cpu != -1)
 		nr_cpus = 1;
 
-	parse_symbols();
-
 	return __cmd_top();
 }
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index 10622a4..af0a504 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -64,6 +64,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
 #define MAX_COUNTERS			256
 #define MAX_NR_CPUS			256
 
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
-
 #endif
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index 2fdfd1d..eb56bd9 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -6,37 +6,39 @@
 #include "exec_cmd.h"
 #include "string.h"
 
-int nr_counters;
+int					nr_counters;
 
-__u64			event_id[MAX_COUNTERS]		= { };
-int			event_mask[MAX_COUNTERS];
+struct perf_counter_attr		attrs[MAX_COUNTERS];
 
 struct event_symbol {
-	__u64 event;
-	char *symbol;
+	__u8	type;
+	__u64	config;
+	char	*symbol;
 };
 
+#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
+
 static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+  { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
+  { C(HARDWARE, CPU_CYCLES),		"cycles",		},
+  { C(HARDWARE, INSTRUCTIONS),		"instructions",		},
+  { C(HARDWARE, CACHE_REFERENCES),	"cache-references",	},
+  { C(HARDWARE, CACHE_MISSES),		"cache-misses",		},
+  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branches",		},
+  { C(HARDWARE, BRANCH_MISSES),		"branch-misses",	},
+  { C(HARDWARE, BUS_CYCLES),		"bus-cycles",		},
+
+  { C(SOFTWARE, CPU_CLOCK),		"cpu-clock",		},
+  { C(SOFTWARE, TASK_CLOCK),		"task-clock",		},
+  { C(SOFTWARE, PAGE_FAULTS),		"page-faults",		},
+  { C(SOFTWARE, PAGE_FAULTS),		"faults",		},
+  { C(SOFTWARE, PAGE_FAULTS_MIN),	"minor-faults",		},
+  { C(SOFTWARE, PAGE_FAULTS_MAJ),	"major-faults",		},
+  { C(SOFTWARE, CONTEXT_SWITCHES),	"context-switches",	},
+  { C(SOFTWARE, CONTEXT_SWITCHES),	"cs",			},
+  { C(SOFTWARE, CPU_MIGRATIONS),	"cpu-migrations",	},
+  { C(SOFTWARE, CPU_MIGRATIONS),	"migrations",		},
 };
 
 #define __PERF_COUNTER_FIELD(config, name) \
@@ -67,27 +69,26 @@ static char *sw_event_names[] = {
 	"major faults",
 };
 
-char *event_name(int ctr)
+char *event_name(int counter)
 {
-	__u64 config = event_id[ctr];
-	int type = PERF_COUNTER_TYPE(config);
-	int id = PERF_COUNTER_ID(config);
+	__u64 config = attrs[counter].config;
+	int type = attrs[counter].type;
 	static char buf[32];
 
-	if (PERF_COUNTER_RAW(config)) {
-		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
+	if (attrs[counter].type == PERF_TYPE_RAW) {
+		sprintf(buf, "raw 0x%llx", config);
 		return buf;
 	}
 
 	switch (type) {
 	case PERF_TYPE_HARDWARE:
-		if (id < PERF_HW_EVENTS_MAX)
-			return hw_event_names[id];
+		if (config < PERF_HW_EVENTS_MAX)
+			return hw_event_names[config];
 		return "unknown-hardware";
 
 	case PERF_TYPE_SOFTWARE:
-		if (id < PERF_SW_EVENTS_MAX)
-			return sw_event_names[id];
+		if (config < PERF_SW_EVENTS_MAX)
+			return sw_event_names[config];
 		return "unknown-software";
 
 	default:
@@ -101,15 +102,19 @@ char *event_name(int ctr)
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
  */
-static __u64 match_event_symbols(const char *str)
+static int match_event_symbols(const char *str, struct perf_counter_attr *attr)
 {
 	__u64 config, id;
 	int type;
 	unsigned int i;
 	const char *sep, *pstr;
 
-	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0)
-		return config | PERF_COUNTER_RAW_MASK;
+	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) {
+		attr->type = PERF_TYPE_RAW;
+		attr->config = config;
+
+		return 0;
+	}
 
 	pstr = str;
 	sep = strchr(pstr, ':');
@@ -121,35 +126,45 @@ static __u64 match_event_symbols(const char *str)
 		if (sep) {
 			pstr = sep + 1;
 			if (strchr(pstr, 'k'))
-				event_mask[nr_counters] |= EVENT_MASK_USER;
+				attr->exclude_user = 1;
 			if (strchr(pstr, 'u'))
-				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
+				attr->exclude_kernel = 1;
 		}
-		return EID(type, id);
+		attr->type = type;
+		attr->config = id;
+
+		return 0;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
+			     strlen(event_symbols[i].symbol))) {
+
+			attr->type = event_symbols[i].type;
+			attr->config = event_symbols[i].config;
+
+			return 0;
+		}
 	}
 
-	return ~0ULL;
+	return -EINVAL;
 }
 
 int parse_events(const struct option *opt, const char *str, int unset)
 {
-	__u64 config;
+	struct perf_counter_attr attr;
+	int ret;
 
+	memset(&attr, 0, sizeof(attr));
 again:
 	if (nr_counters == MAX_COUNTERS)
 		return -1;
 
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
+	ret = match_event_symbols(str, &attr);
+	if (ret < 0)
+		return ret;
 
-	event_id[nr_counters] = config;
+	attrs[nr_counters] = attr;
 	nr_counters++;
 
 	str = strstr(str, ",");
@@ -168,7 +183,6 @@ void create_events_help(char *events_help_msg)
 {
 	unsigned int i;
 	char *str;
-	__u64 e;
 
 	str = events_help_msg;
 
@@ -178,9 +192,8 @@ void create_events_help(char *events_help_msg)
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		int type, id;
 
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
+		type = event_symbols[i].type;
+		id = event_symbols[i].config;
 
 		if (i)
 			str += sprintf(str, "|");
@@ -191,4 +204,3 @@ void create_events_help(char *events_help_msg)
 
 	str += sprintf(str, "|rNNN]");
 }
-
diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h
index 0da306b..542971c 100644
--- a/Documentation/perf_counter/util/parse-events.h
+++ b/Documentation/perf_counter/util/parse-events.h
@@ -3,12 +3,9 @@
  * Parse symbolic events/counts passed in as options:
  */
 
-extern int nr_counters;
-extern __u64			event_id[MAX_COUNTERS];
-extern int			event_mask[MAX_COUNTERS];
+extern int			nr_counters;
 
-#define EVENT_MASK_KERNEL	1
-#define EVENT_MASK_USER		2
+extern struct perf_counter_attr attrs[MAX_COUNTERS];
 
 extern char *event_name(int ctr);
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 232b00a..4786ad9 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -867,13 +867,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if (!perf_event_raw(&counter->attr)) {
-		ev = perf_event_id(&counter->attr);
+	if (counter->attr.type != PERF_TYPE_RAW) {
+		ev = counter->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return ERR_PTR(-EOPNOTSUPP);
 		ev = ppmu->generic_events[ev];
 	} else {
-		ev = perf_event_config(&counter->attr);
+		ev = counter->attr.config;
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8f53f3a..430e048 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -292,15 +292,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (perf_event_raw(attr)) {
-		hwc->config |= x86_pmu.raw_event(perf_event_config(attr));
+	if (attr->type == PERF_TYPE_RAW) {
+		hwc->config |= x86_pmu.raw_event(attr->config);
 	} else {
-		if (perf_event_id(attr) >= x86_pmu.max_events)
+		if (attr->config >= x86_pmu.max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= x86_pmu.event_map(perf_event_id(attr));
+		hwc->config |= x86_pmu.event_map(attr->config);
 	}
 
 	counter->destroy = hw_perf_counter_destroy;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4f9d39e..f794c69 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -73,26 +73,6 @@ enum sw_event_ids {
 	PERF_SW_EVENTS_MAX		= 7,
 };
 
-#define __PERF_COUNTER_MASK(name)			\
-	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
-	 PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW_BITS		1
-#define PERF_COUNTER_RAW_SHIFT		63
-#define PERF_COUNTER_RAW_MASK		__PERF_COUNTER_MASK(RAW)
-
-#define PERF_COUNTER_CONFIG_BITS	63
-#define PERF_COUNTER_CONFIG_SHIFT	0
-#define PERF_COUNTER_CONFIG_MASK	__PERF_COUNTER_MASK(CONFIG)
-
-#define PERF_COUNTER_TYPE_BITS		7
-#define PERF_COUNTER_TYPE_SHIFT		56
-#define PERF_COUNTER_TYPE_MASK		__PERF_COUNTER_MASK(TYPE)
-
-#define PERF_COUNTER_EVENT_BITS		56
-#define PERF_COUNTER_EVENT_SHIFT	0
-#define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
-
 /*
  * Bits that can be set in attr.sample_type to request information
  * in the overflow packets.
@@ -125,10 +105,13 @@ enum perf_counter_read_format {
  */
 struct perf_counter_attr {
 	/*
-	 * The MSB of the config word signifies if the rest contains cpu
-	 * specific (raw) counter configuration data, if unset, the next
-	 * 7 bits are an event type and the rest of the bits are the event
-	 * identifier.
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+	__u32			__reserved_1;
+
+	/*
+	 * Type specific configuration information.
 	 */
 	__u64			config;
 
@@ -152,12 +135,11 @@ struct perf_counter_attr {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 53;
+				__reserved_2   : 53;
 
 	__u32			wakeup_events;	/* wakeup every n events */
-	__u32			__reserved_2;
+	__u32			__reserved_3;
 
-	__u64			__reserved_3;
 	__u64			__reserved_4;
 };
 
@@ -278,8 +260,8 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, ppid;
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
@@ -331,27 +313,6 @@ enum perf_event_type {
 
 struct task_struct;
 
-static inline u64 perf_event_raw(struct perf_counter_attr *attr)
-{
-	return attr->config & PERF_COUNTER_RAW_MASK;
-}
-
-static inline u64 perf_event_config(struct perf_counter_attr *attr)
-{
-	return attr->config & PERF_COUNTER_CONFIG_MASK;
-}
-
-static inline u64 perf_event_type(struct perf_counter_attr *attr)
-{
-	return (attr->config & PERF_COUNTER_TYPE_MASK) >>
-		PERF_COUNTER_TYPE_SHIFT;
-}
-
-static inline u64 perf_event_id(struct perf_counter_attr *attr)
-{
-	return attr->config & PERF_COUNTER_EVENT_MASK;
-}
-
 /**
  * struct hw_perf_counter - performance counter hardware details:
  */
@@ -616,8 +577,8 @@ extern int perf_counter_overflow(struct perf_counter *counter,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !perf_event_raw(&counter->attr) &&
-		perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE;
+	return (counter->attr.type != PERF_TYPE_RAW) &&
+		(counter->attr.type != PERF_TYPE_HARDWARE);
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 47c92fb..75ae767 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3091,14 +3091,12 @@ static int perf_swcounter_match(struct perf_counter *counter,
 				enum perf_event_types type,
 				u32 event, struct pt_regs *regs)
 {
-	u64 event_config;
-
-	event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event;
-
 	if (!perf_swcounter_is_counting(counter))
 		return 0;
 
-	if (counter->attr.config != event_config)
+	if (counter->attr.type != type)
+		return 0;
+	if (counter->attr.config != event)
 		return 0;
 
 	if (regs) {
@@ -3403,7 +3401,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (perf_event_id(&counter->attr)) {
+	switch (counter->attr.config) {
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
@@ -3496,12 +3494,12 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
-	if (perf_event_raw(attr)) {
+	if (attr->type == PERF_TYPE_RAW) {
 		pmu = hw_perf_counter_init(counter);
 		goto done;
 	}
 
-	switch (perf_event_type(attr)) {
+	switch (attr->type) {
 	case PERF_TYPE_HARDWARE:
 		pmu = hw_perf_counter_init(counter);
 		break;
-- 
cgit v0.10.2


From 460bcf57b128ce1c0dd553d905fedc097f9955c6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 12 May 2009 07:37:56 -0400
Subject: Fix nobh_truncate_page() to not pass stack garbage to get_block()

The nobh_truncate_page() function is used by ext2, exofs, and jfs.  Of
these three, only ext2 and jfs's get_block() function pays attention
to bh->b_size --- which is normally always the filesystem blocksize
except when the get_block() function is called by either
mpage_readpage(), mpage_readpages(), or the direct I/O routines in
fs/direct_io.c.

Unfortunately, nobh_truncate_page() does not initialize map_bh before
calling the filesystem-supplied get_block() function.  So ext2 and jfs
will try to calculate the number of blocks to map by taking stack
garbage and shifting it left by inode->i_blkbits.  This should be
*mostly* harmless (except the filesystem will do some unnneeded work)
unless the stack garbage is less than filesystem's blocksize, in which
case maxblocks will be zero, and the attempt to find out whether or
not the filesystem has a hole at a given logical block will fail, and
the page cache entry might not get zero'ed out.

Also if the stack garbage in in map_bh->state happens to have the
BH_Mapped bit set, there could be an attempt to call readpage() on a
non-existent page, which could cause nobh_truncate_page() to return an
error when it should not.

Fix this by initializing map_bh->state and map_bh->size.

Fortunately, it's probably fairly unlikely that ext2 and jfs users
mount with nobh these days.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/buffer.c b/fs/buffer.c
index aed2977..4910612 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2736,6 +2736,8 @@ has_buffers:
 		pos += blocksize;
 	}
 
+	map_bh.b_size = blocksize;
+	map_bh.b_state = 0;
 	err = get_block(inode, iblock, &map_bh, 0);
 	if (err)
 		goto unlock;
-- 
cgit v0.10.2


From 72a43d63cb51057393edfbcfc4596066205ad15d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 13 May 2009 19:13:40 +0100
Subject: ext3/4 with synchronous writes gets wedged by Postfix

OK, that's probably the easiest way to do that, as much as I don't like it...
Since iget() et.al. will not accept I_FREEING (will wait to go away
and restart), and since we'd better have serialization between new/free
on fs data structures anyway, we can afford simply skipping I_FREEING
et.al. in insert_inode_locked().

We do that from new_inode, so it won't race with free_inode in any interesting
ways and it won't race with iget (of any origin; nfsd or in case of fs
corruption a lookup) since both still will wait for I_LOCK.

Reviewed-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Jan Kara <jack@suse.cz>
Tested-by: David Watson <dbwatson@ukfsn.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/inode.c b/fs/inode.c
index 0571983..a4876e5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1053,13 +1053,22 @@ int insert_inode_locked(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-	struct inode *old;
 
 	inode->i_state |= I_LOCK|I_NEW;
 	while (1) {
+		struct hlist_node *node;
+		struct inode *old = NULL;
 		spin_lock(&inode_lock);
-		old = find_inode_fast(sb, head, ino);
-		if (likely(!old)) {
+		hlist_for_each_entry(old, node, head, i_hash) {
+			if (old->i_ino != ino)
+				continue;
+			if (old->i_sb != sb)
+				continue;
+			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+				continue;
+			break;
+		}
+		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode_lock);
 			return 0;
@@ -1081,14 +1090,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 {
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-	struct inode *old;
 
 	inode->i_state |= I_LOCK|I_NEW;
 
 	while (1) {
+		struct hlist_node *node;
+		struct inode *old = NULL;
+
 		spin_lock(&inode_lock);
-		old = find_inode(sb, head, test, data);
-		if (likely(!old)) {
+		hlist_for_each_entry(old, node, head, i_hash) {
+			if (old->i_sb != sb)
+				continue;
+			if (!test(old, data))
+				continue;
+			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+				continue;
+			break;
+		}
+		if (likely(!node)) {
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode_lock);
 			return 0;
-- 
cgit v0.10.2


From 74b8f955a73d20b1e22403fd1ef85834fbf38d98 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 6 Jun 2009 11:26:15 +0100
Subject: ASoC: Apostrophe patrol

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 39a63f9..21c6907 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -12,7 +12,7 @@
  *  Features:
  *    o Changes power status of internal codec blocks depending on the
  *      dynamic configuration of codec internal audio paths and active
- *      DAC's/ADC's.
+ *      DACs/ADCs.
  *    o Platform power domain - can support external components i.e. amps and
  *      mic/meadphone insertion events.
  *    o Automatic Mic Bias support
@@ -220,7 +220,7 @@ static void dapm_set_path_status(struct snd_soc_dapm_widget *w,
 	}
 }
 
-/* connect mux widget to it's interconnecting audio paths */
+/* connect mux widget to its interconnecting audio paths */
 static int dapm_connect_mux(struct snd_soc_codec *codec,
 	struct snd_soc_dapm_widget *src, struct snd_soc_dapm_widget *dest,
 	struct snd_soc_dapm_path *path, const char *control_name,
@@ -243,7 +243,7 @@ static int dapm_connect_mux(struct snd_soc_codec *codec,
 	return -ENODEV;
 }
 
-/* connect mixer widget to it's interconnecting audio paths */
+/* connect mixer widget to its interconnecting audio paths */
 static int dapm_connect_mixer(struct snd_soc_codec *codec,
 	struct snd_soc_dapm_widget *src, struct snd_soc_dapm_widget *dest,
 	struct snd_soc_dapm_path *path, const char *control_name)
@@ -1797,7 +1797,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event);
  * @codec: SoC codec
  * @pin: pin name
  *
- * Enables input/output pin and it's parents or children widgets iff there is
+ * Enables input/output pin and its parents or children widgets iff there is
  * a valid audio route and active audio stream.
  * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to
  * do any widget power switching.
@@ -1813,7 +1813,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_enable_pin);
  * @codec: SoC codec
  * @pin: pin name
  *
- * Disables input/output pin and it's parents or children widgets.
+ * Disables input/output pin and its parents or children widgets.
  * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to
  * do any widget power switching.
  */
-- 
cgit v0.10.2


From 8326f44da090d6d304d29b9fdc7fb3e20889e329 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 20:22:46 +0200
Subject: perf_counter: Implement generalized cache event types

Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.

This is a 3-dimensional space:

       { L1-D, L1-I, L2, ITLB, DTLB, BPU } x
       { load, store, prefetch } x
       { accesses, misses }

User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)

Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.

Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.

( x86 is supported for now, with the Nehalem event table filled in,
  and with Core2 and Atom having placeholder tables. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index eb56bd9..de9a77c 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -6,6 +6,8 @@
 #include "exec_cmd.h"
 #include "string.h"
 
+extern char *strcasestr(const char *haystack, const char *needle);
+
 int					nr_counters;
 
 struct perf_counter_attr		attrs[MAX_COUNTERS];
@@ -17,6 +19,7 @@ struct event_symbol {
 };
 
 #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
+#define CR(x, y) .type = PERF_TYPE_##x, .config = y
 
 static struct event_symbol event_symbols[] = {
   { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
@@ -69,6 +72,28 @@ static char *sw_event_names[] = {
 	"major faults",
 };
 
+#define MAX_ALIASES 8
+
+static char *hw_cache [][MAX_ALIASES] = {
+	{ "l1-d" ,	"l1d" ,	"l1", "l1-data-cache"			},
+	{ "l1-i" ,	"l1i" ,	"l1-instruction-cache"		},
+	{ "l2"  , },
+	{ "dtlb", },
+	{ "itlb", },
+	{ "bpu" , "btb", "branch-cache", NULL },
+};
+
+static char *hw_cache_op [][MAX_ALIASES] = {
+	{ "read"	, "load" },
+	{ "write"	, "store" },
+	{ "prefetch"	, "speculative-read", "speculative-load" },
+};
+
+static char *hw_cache_result [][MAX_ALIASES] = {
+	{ "access", "ops" },
+	{ "miss", },
+};
+
 char *event_name(int counter)
 {
 	__u64 config = attrs[counter].config;
@@ -86,6 +111,30 @@ char *event_name(int counter)
 			return hw_event_names[config];
 		return "unknown-hardware";
 
+	case PERF_TYPE_HW_CACHE: {
+		__u8 cache_type, cache_op, cache_result;
+		static char name[100];
+
+		cache_type   = (config >>  0) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
+			return "unknown-ext-hardware-cache-type";
+
+		cache_op     = (config >>  8) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_OP_MAX)
+			return "unknown-ext-hardware-cache-op-type";
+
+		cache_result = (config >> 16) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_RESULT_MAX)
+			return "unknown-ext-hardware-cache-result-type";
+
+		sprintf(name, "%s:%s:%s",
+			hw_cache[cache_type][0],
+			hw_cache_op[cache_op][0],
+			hw_cache_result[cache_result][0]);
+
+		return name;
+	}
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_SW_EVENTS_MAX)
 			return sw_event_names[config];
@@ -98,11 +147,60 @@ char *event_name(int counter)
 	return "unknown";
 }
 
+static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
+{
+	int i, j;
+
+	for (i = 0; i < size; i++) {
+		for (j = 0; j < MAX_ALIASES; j++) {
+			if (!names[i][j])
+				break;
+			if (strcasestr(str, names[i][j]))
+				return i;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
+{
+	__u8 cache_type = -1, cache_op = 0, cache_result = 0;
+
+	cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
+	/*
+	 * No fallback - if we cannot get a clear cache type
+	 * then bail out:
+	 */
+	if (cache_type == -1)
+		return -EINVAL;
+
+	cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
+	/*
+	 * Fall back to reads:
+	 */
+	if (cache_type == -1)
+		cache_type = PERF_COUNT_HW_CACHE_OP_READ;
+
+	cache_result = parse_aliases(str, hw_cache_result,
+					PERF_COUNT_HW_CACHE_RESULT_MAX);
+	/*
+	 * Fall back to accesses:
+	 */
+	if (cache_result == -1)
+		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+	attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
+	attr->type = PERF_TYPE_HW_CACHE;
+
+	return 0;
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
  */
-static int match_event_symbols(const char *str, struct perf_counter_attr *attr)
+static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
 {
 	__u64 config, id;
 	int type;
@@ -147,7 +245,7 @@ static int match_event_symbols(const char *str, struct perf_counter_attr *attr)
 		}
 	}
 
-	return -EINVAL;
+	return parse_generic_hw_symbols(str, attr);
 }
 
 int parse_events(const struct option *opt, const char *str, int unset)
@@ -160,7 +258,7 @@ again:
 	if (nr_counters == MAX_COUNTERS)
 		return -1;
 
-	ret = match_event_symbols(str, &attr);
+	ret = parse_event_symbols(str, &attr);
 	if (ret < 0)
 		return ret;
 
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 430e048..e86679f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -83,6 +83,128 @@ static u64 intel_pmu_event_map(int event)
 	return intel_perfmon_event_map[event];
 }
 
+/*
+ * Generalized hw caching related event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'event makes no sense on
+ * this CPU', any other value means the raw event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0480, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(L2  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES          */
+		[ C(RESULT_MISS)   ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS       */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+	/* To be filled in */
+};
+
+static const u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+	/* To be filled in */
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -246,6 +368,39 @@ static inline int x86_pmu_initialized(void)
 	return x86_pmu.handle_irq != NULL;
 }
 
+static inline int
+set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	u64 config, val;
+
+	config = attr->config;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+	if (val == 0)
+		return -ENOENT;
+
+	if (val == -1)
+		return -EINVAL;
+
+	hwc->config |= val;
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -288,22 +443,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->sample_period = x86_pmu.max_period;
 
 	atomic64_set(&hwc->period_left, hwc->sample_period);
+	counter->destroy = hw_perf_counter_destroy;
 
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
-	} else {
-		if (attr->config >= x86_pmu.max_events)
-			return -EINVAL;
-		/*
-		 * The generic map:
-		 */
-		hwc->config |= x86_pmu.event_map(attr->config);
+		return 0;
 	}
 
-	counter->destroy = hw_perf_counter_destroy;
+	if (attr->type == PERF_TYPE_HW_CACHE)
+		return set_ext_hw_attr(hwc, attr);
+
+	if (attr->config >= x86_pmu.max_events)
+		return -EINVAL;
+	/*
+	 * The generic map:
+	 */
+	hwc->config |= x86_pmu.event_map(attr->config);
 
 	return 0;
 }
@@ -989,6 +1147,33 @@ static int intel_pmu_init(void)
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
+	/*
+	 * Nehalem:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 17:
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
+			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+
+		pr_info("... installed Core2 event tables\n");
+		break;
+	default:
+	case 26:
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
+			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+
+		pr_info("... installed Nehalem/Corei7 event tables\n");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
+			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+
+		pr_info("... installed Atom event tables\n");
+		break;
+	}
 	return 0;
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f794c69..3586df8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -28,6 +28,7 @@ enum perf_event_types {
 	PERF_TYPE_HARDWARE		= 0,
 	PERF_TYPE_SOFTWARE		= 1,
 	PERF_TYPE_TRACEPOINT		= 2,
+	PERF_TYPE_HW_CACHE		= 3,
 
 	/*
 	 * available TYPE space, raw is the max value.
@@ -56,6 +57,39 @@ enum attr_ids {
 };
 
 /*
+ * Generalized hardware cache counters:
+ *
+ *       { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D,
+	PERF_COUNT_HW_CACHE_L1I,
+	PERF_COUNT_HW_CACHE_L2,
+	PERF_COUNT_HW_CACHE_DTLB,
+	PERF_COUNT_HW_CACHE_ITLB,
+	PERF_COUNT_HW_CACHE_BPU,
+
+	PERF_COUNT_HW_CACHE_MAX,
+};
+
+enum hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ,
+	PERF_COUNT_HW_CACHE_OP_WRITE,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,
+};
+
+enum hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS,
+	PERF_COUNT_HW_CACHE_RESULT_MISS,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 75ae767..5eacaaf 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3501,6 +3501,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 
 	switch (attr->type) {
 	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
-- 
cgit v0.10.2


From 86847b62f0781ccc97a79936c9ed9dc818cff67b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 6 Jun 2009 12:24:17 +0200
Subject: perf_counter tools: Add 'perf list' to list available events

perf list: List all the available event types which can be used in
-e (--event) options.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 5b99f04..32c0bb2 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -328,6 +328,7 @@ BUILTIN_OBJS += builtin-record.o
 BUILTIN_OBJS += builtin-report.o
 BUILTIN_OBJS += builtin-stat.o
 BUILTIN_OBJS += builtin-top.o
+BUILTIN_OBJS += builtin-list.o
 
 PERFLIBS = $(LIB_FILE)
 EXTLIBS =
diff --git a/Documentation/perf_counter/builtin-list.c b/Documentation/perf_counter/builtin-list.c
new file mode 100644
index 0000000..fe60e37
--- /dev/null
+++ b/Documentation/perf_counter/builtin-list.c
@@ -0,0 +1,20 @@
+/*
+ * builtin-list.c
+ *
+ * Builtin list command: list all event types
+ *
+ * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+int cmd_list(int argc, const char **argv, const char *prefix)
+{
+	print_events();
+	return 0;
+}
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index 130fd88..aeab9c4 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -495,11 +495,10 @@ static const char * const record_usage[] = {
 	NULL
 };
 
-static char events_help_msg[EVENTS_HELP_MAX];
-
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
-		     events_help_msg, parse_events),
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
 	OPT_INTEGER('p', "pid", &target_pid,
 		    "record events on existing pid"),
 	OPT_INTEGER('r', "realtime", &realtime_prio,
@@ -527,8 +526,6 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 {
 	int counter;
 
-	create_events_help(events_help_msg);
-
 	argc = parse_options(argc, argv, options, record_usage, 0);
 	if (!argc && target_pid == -1 && !system_wide)
 		usage_with_options(record_usage, options);
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 9711e55..2cbf5a1 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -293,18 +293,17 @@ static const char * const stat_usage[] = {
 	NULL
 };
 
-static char events_help_msg[EVENTS_HELP_MAX];
-
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
-		     events_help_msg, parse_events),
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
 	OPT_BOOLEAN('i', "inherit", &inherit,
 		    "child tasks inherit counters"),
 	OPT_INTEGER('p', "pid", &target_pid,
 		    "stat events on existing pid"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 			    "system-wide collection from all CPUs"),
-	OPT_BOOLEAN('l', "scale", &scale,
+	OPT_BOOLEAN('S', "scale", &scale,
 			    "scale/normalize counters"),
 	OPT_END()
 };
@@ -313,8 +312,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 {
 	page_size = sysconf(_SC_PAGE_SIZE);
 
-	create_events_help(events_help_msg);
-
 	memcpy(attrs, default_attrs, sizeof(attrs));
 
 	argc = parse_options(argc, argv, options, stat_usage, 0);
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index 98a6d53..f2e7312 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -606,11 +606,10 @@ static const char * const top_usage[] = {
 	NULL
 };
 
-static char events_help_msg[EVENTS_HELP_MAX];
-
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
-		     events_help_msg, parse_events),
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
 	OPT_INTEGER('c', "count", &default_interval,
 		    "event period to sample"),
 	OPT_INTEGER('p', "pid", &target_pid,
@@ -648,8 +647,6 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 
 	page_size = sysconf(_SC_PAGE_SIZE);
 
-	create_events_help(events_help_msg);
-
 	argc = parse_options(argc, argv, options, top_usage, 0);
 	if (argc)
 		usage_with_options(top_usage, options);
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index 5bfea57..e7de47d 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -20,4 +20,6 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
+extern int cmd_list(int argc, const char **argv, const char *prefix);
+
 #endif
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index ec7edb7..9ac7565 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -258,6 +258,7 @@ static void handle_internal_command(int argc, const char **argv)
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
 		{ "help", cmd_help, 0 },
+		{ "list", cmd_list, 0 },
 		{ "record", cmd_record, 0 },
 		{ "report", cmd_report, 0 },
 		{ "stat", cmd_stat, 0 },
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index de9a77c..150fbd2 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -274,31 +274,43 @@ again:
 	return 0;
 }
 
+static const char * const event_type_descriptors[] = {
+	"",
+	"Hardware event",
+	"Software event",
+	"Tracepoint event",
+	"Hardware cache event",
+};
+
 /*
- * Create the help text for the event symbols:
+ * Print the help text for the event symbols:
  */
-void create_events_help(char *events_help_msg)
+void print_events(void)
 {
-	unsigned int i;
-	char *str;
+	struct event_symbol *syms = event_symbols;
+	unsigned int i, type, prev_type = -1;
 
-	str = events_help_msg;
+	fprintf(stderr, "\n");
+	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
 
-	str += sprintf(str,
-	"event name: [");
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
+		type = syms->type + 1;
+		if (type > ARRAY_SIZE(event_type_descriptors))
+			type = 0;
 
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		int type, id;
-
-		type = event_symbols[i].type;
-		id = event_symbols[i].config;
+		if (type != prev_type)
+			fprintf(stderr, "\n");
 
-		if (i)
-			str += sprintf(str, "|");
+		fprintf(stderr, "  %-30s [%s]\n", syms->symbol,
+			event_type_descriptors[type]);
 
-		str += sprintf(str, "%s",
-				event_symbols[i].symbol);
+		prev_type = type;
 	}
 
-	str += sprintf(str, "|rNNN]");
+	fprintf(stderr, "\n");
+	fprintf(stderr, "  %-30s [raw hardware event descriptor]\n",
+		"rNNN");
+	fprintf(stderr, "\n");
+
+	exit(129);
 }
diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h
index 542971c..e3d5529 100644
--- a/Documentation/perf_counter/util/parse-events.h
+++ b/Documentation/perf_counter/util/parse-events.h
@@ -13,5 +13,5 @@ extern int parse_events(const struct option *opt, const char *str, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
 
-extern void create_events_help(char *help_msg);
+extern void print_events(void);
 
-- 
cgit v0.10.2


From 8faf3b547593bf6ea10df631e73204975273c4e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 13:58:12 +0200
Subject: perf_counter tools: Fix cache-event printout

Also standardize the cache printout (so that it can be pasted back
into the command) and sort out the aliases.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index 150fbd2..e0820b4 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -53,45 +53,45 @@ static struct event_symbol event_symbols[] = {
 #define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
 
 static char *hw_event_names[] = {
-	"CPU cycles",
+	"cycles",
 	"instructions",
-	"cache references",
-	"cache misses",
+	"cache-references",
+	"cache-misses",
 	"branches",
-	"branch misses",
-	"bus cycles",
+	"branch-misses",
+	"bus-cycles",
 };
 
 static char *sw_event_names[] = {
-	"cpu clock ticks",
-	"task clock ticks",
-	"pagefaults",
-	"context switches",
-	"CPU migrations",
-	"minor faults",
-	"major faults",
+	"cpu-clock-ticks",
+	"task-clock-ticks",
+	"page-faults",
+	"context-switches",
+	"CPU-migrations",
+	"minor-faults",
+	"major-faults",
 };
 
 #define MAX_ALIASES 8
 
 static char *hw_cache [][MAX_ALIASES] = {
-	{ "l1-d" ,	"l1d" ,	"l1", "l1-data-cache"			},
-	{ "l1-i" ,	"l1i" ,	"l1-instruction-cache"		},
-	{ "l2"  , },
-	{ "dtlb", },
-	{ "itlb", },
-	{ "bpu" , "btb", "branch-cache", NULL },
+	{ "L1-data"		, "l1-d", "l1d", "l1"				},
+	{ "L1-instruction"	, "l1-i", "l1i"					},
+	{ "L2"			, "l2"						},
+	{ "Data-TLB"		, "dtlb", "d-tlb"				},
+	{ "Instruction-TLB"	, "itlb", "i-tlb"				},
+	{ "Branch"		, "bpu" , "btb", "bpc"				},
 };
 
 static char *hw_cache_op [][MAX_ALIASES] = {
-	{ "read"	, "load" },
-	{ "write"	, "store" },
-	{ "prefetch"	, "speculative-read", "speculative-load" },
+	{ "Load"		, "read"					},
+	{ "Store"		, "write"					},
+	{ "Prefetch"		, "speculative-read", "speculative-load"	},
 };
 
 static char *hw_cache_result [][MAX_ALIASES] = {
-	{ "access", "ops" },
-	{ "miss", },
+	{ "Reference"		, "ops", "access"				},
+	{ "Miss"								},
 };
 
 char *event_name(int counter)
@@ -120,14 +120,14 @@ char *event_name(int counter)
 			return "unknown-ext-hardware-cache-type";
 
 		cache_op     = (config >>  8) & 0xff;
-		if (cache_type > PERF_COUNT_HW_CACHE_OP_MAX)
-			return "unknown-ext-hardware-cache-op-type";
+		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
+			return "unknown-ext-hardware-cache-op";
 
 		cache_result = (config >> 16) & 0xff;
-		if (cache_type > PERF_COUNT_HW_CACHE_RESULT_MAX)
-			return "unknown-ext-hardware-cache-result-type";
+		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+			return "unknown-ext-hardware-cache-result";
 
-		sprintf(name, "%s:%s:%s",
+		sprintf(name, "%s-Cache-%s-%ses",
 			hw_cache[cache_type][0],
 			hw_cache_op[cache_op][0],
 			hw_cache_result[cache_result][0]);
-- 
cgit v0.10.2


From 386b05e3a2f3c5b0a9c5575060421cca0911648a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 6 Jun 2009 14:56:33 +0200
Subject: perf_counter tools: Add help for perf list

Also update other areas of the help texts.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-list.txt b/Documentation/perf_counter/Documentation/perf-list.txt
new file mode 100644
index 0000000..aa55a71
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-list.txt
@@ -0,0 +1,25 @@
+perf-list(1)
+==============
+
+NAME
+----
+perf-list - List all symbolic event types
+
+SYNOPSIS
+--------
+[verse]
+'perf list
+
+DESCRIPTION
+-----------
+This command displays the symbolic event types which can be selected in the
+various perf commands with the -e option.
+
+OPTIONS
+-------
+None
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1]
diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt
index 4d3416f..1dbc1ee 100644
--- a/Documentation/perf_counter/Documentation/perf-record.txt
+++ b/Documentation/perf_counter/Documentation/perf-record.txt
@@ -26,26 +26,10 @@ OPTIONS
 
 -e::
 --event=::
-                             0:0: cpu-cycles          
-                             0:0: cycles              
-                             0:1: instructions        
-                             0:2: cache-references    
-                             0:3: cache-misses        
-                             0:4: branch-instructions 
-                             0:4: branches            
-                             0:5: branch-misses       
-                             0:6: bus-cycles          
-                             1:0: cpu-clock           
-                             1:1: task-clock          
-                             1:2: page-faults         
-                             1:2: faults              
-                             1:5: minor-faults        
-                             1:6: major-faults        
-                             1:3: context-switches    
-                             1:3: cs                  
-                             1:4: cpu-migrations      
-                             1:4: migrations          
-                           rNNN: raw PMU events (eventsel+umask)
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
 
 -a::
         system-wide collection
@@ -55,4 +39,4 @@ OPTIONS
 
 SEE ALSO
 --------
-linkperf:perf-stat[1]
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
index a340e7b..5d95784 100644
--- a/Documentation/perf_counter/Documentation/perf-stat.txt
+++ b/Documentation/perf_counter/Documentation/perf-stat.txt
@@ -25,26 +25,10 @@ OPTIONS
 
 -e::
 --event=::
-                             0:0: cpu-cycles          
-                             0:0: cycles              
-                             0:1: instructions        
-                             0:2: cache-references    
-                             0:3: cache-misses        
-                             0:4: branch-instructions 
-                             0:4: branches            
-                             0:5: branch-misses       
-                             0:6: bus-cycles          
-                             1:0: cpu-clock           
-                             1:1: task-clock          
-                             1:2: page-faults         
-                             1:2: faults              
-                             1:5: minor-faults        
-                             1:6: major-faults        
-                             1:3: context-switches    
-                             1:3: cs                  
-                             1:4: cpu-migrations      
-                             1:4: migrations          
-                           rNNN: raw PMU events (eventsel+umask)
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
 
 -i::
 --inherit::
@@ -79,4 +63,4 @@ $ perf stat -- make -j
 
 SEE ALSO
 --------
-linkperf:perf-tops[1]
+linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt
index 15251e4..c8eb7cf 100644
--- a/Documentation/perf_counter/Documentation/perf-top.txt
+++ b/Documentation/perf_counter/Documentation/perf-top.txt
@@ -23,26 +23,10 @@ OPTIONS
 
 -e::
 --event=::
-                             0:0: cpu-cycles          
-                             0:0: cycles              
-                             0:1: instructions        
-                             0:2: cache-references    
-                             0:3: cache-misses        
-                             0:4: branch-instructions 
-                             0:4: branches            
-                             0:5: branch-misses       
-                             0:6: bus-cycles          
-                             1:0: cpu-clock           
-                             1:1: task-clock          
-                             1:2: page-faults         
-                             1:2: faults              
-                             1:5: minor-faults        
-                             1:6: major-faults        
-                             1:3: context-switches    
-                             1:3: cs                  
-                             1:4: cpu-migrations      
-                             1:4: migrations          
-                           rNNN: raw PMU events (eventsel+umask)
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
 
 -a::
         system-wide collection
@@ -52,4 +36,4 @@ OPTIONS
 
 SEE ALSO
 --------
-linkperf:perf-stat[1]
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Documentation/perf.txt b/Documentation/perf_counter/Documentation/perf.txt
index e3d8b38..69c8325 100644
--- a/Documentation/perf_counter/Documentation/perf.txt
+++ b/Documentation/perf_counter/Documentation/perf.txt
@@ -20,4 +20,5 @@ and software features (software counters, tracepoints) as well.
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-top[1],
-linkperf:perf-record[1], linkperf:perf-report[1]
+linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-list[1]
-- 
cgit v0.10.2


From 502fc5c72a886ff9d4d7a596e65ecc4dd5e4d458 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 13 Mar 2009 03:20:49 +0100
Subject: perf_counter tools: Uniform help printouts

Also add perf list to command-list.txt.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
index a3894bf..0f32dc3 100644
--- a/Documentation/perf_counter/builtin-help.c
+++ b/Documentation/perf_counter/builtin-help.c
@@ -284,7 +284,7 @@ void list_common_cmds_help(void)
 			longest = strlen(common_cmds[i].name);
 	}
 
-	puts("The most commonly used perf commands are:");
+	puts(" The most commonly used perf commands are:");
 	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
 		printf("   %s   ", common_cmds[i].name);
 		mput_char(' ', longest - strlen(common_cmds[i].name));
@@ -426,16 +426,16 @@ int cmd_help(int argc, const char **argv, const char *prefix)
 			builtin_help_usage, 0);
 
 	if (show_all) {
-		printf("usage: %s\n\n", perf_usage_string);
+		printf("\n usage: %s\n\n", perf_usage_string);
 		list_commands("perf commands", &main_cmds, &other_cmds);
-		printf("%s\n", perf_more_info_string);
+		printf(" %s\n\n", perf_more_info_string);
 		return 0;
 	}
 
 	if (!argv[0]) {
-		printf("usage: %s\n\n", perf_usage_string);
+		printf("\n usage: %s\n\n", perf_usage_string);
 		list_common_cmds_help();
-		printf("\n%s\n", perf_more_info_string);
+		printf("\n %s\n\n", perf_more_info_string);
 		return 0;
 	}
 
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
index 4390292..f0b922c 100644
--- a/Documentation/perf_counter/command-list.txt
+++ b/Documentation/perf_counter/command-list.txt
@@ -1,7 +1,9 @@
+#
 # List of known perf commands.
-# command name				category [deprecated] [common]
-perf-record                             mainporcelain common
-perf-report                             mainporcelain common
-perf-stat                               mainporcelain common
-perf-top                                mainporcelain common
-
+# command name			category [deprecated] [common]
+#
+perf-record			mainporcelain common
+perf-report			mainporcelain common
+perf-stat			mainporcelain common
+perf-top			mainporcelain common
+perf-list			mainporcelain common
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 9ac7565..161824f 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -384,9 +384,9 @@ int main(int argc, const char **argv)
 			argv[0] += 2;
 	} else {
 		/* The user didn't specify a command; give them help */
-		printf("usage: %s\n\n", perf_usage_string);
+		printf("\n usage: %s\n\n", perf_usage_string);
 		list_common_cmds_help();
-		printf("\n%s\n", perf_more_info_string);
+		printf("\n %s\n\n", perf_more_info_string);
 		exit(1);
 	}
 	cmd = argv[0];
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c
index 551b6bc..e4d3533 100644
--- a/Documentation/perf_counter/util/parse-options.c
+++ b/Documentation/perf_counter/util/parse-options.c
@@ -385,7 +385,7 @@ int usage_with_options_internal(const char * const *usagestr,
 	if (!usagestr)
 		return PARSE_OPT_HELP;
 
-	fprintf(stderr, "usage: %s\n", *usagestr++);
+	fprintf(stderr, "\n usage: %s\n", *usagestr++);
 	while (*usagestr && **usagestr)
 		fprintf(stderr, "   or: %s\n", *usagestr++);
 	while (*usagestr) {
diff --git a/Documentation/perf_counter/util/usage.c b/Documentation/perf_counter/util/usage.c
index 7a10421..2cad286 100644
--- a/Documentation/perf_counter/util/usage.c
+++ b/Documentation/perf_counter/util/usage.c
@@ -14,7 +14,7 @@ static void report(const char *prefix, const char *err, va_list params)
 
 static NORETURN void usage_builtin(const char *err)
 {
-	fprintf(stderr, "usage: %s\n", err);
+	fprintf(stderr, "\n usage: %s\n", err);
 	exit(129);
 }
 
-- 
cgit v0.10.2


From 6e6b754ffdb6415723686c733f13275397e44422 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 15 Apr 2008 22:39:31 +0200
Subject: perf_counter tools: Tidy up manpage details

Also fix a misalignment in usage string printing.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-help.txt b/Documentation/perf_counter/Documentation/perf-help.txt
index f85fed5..5143918 100644
--- a/Documentation/perf_counter/Documentation/perf-help.txt
+++ b/Documentation/perf_counter/Documentation/perf-help.txt
@@ -1,5 +1,5 @@
 perf-help(1)
-===========
+============
 
 NAME
 ----
diff --git a/Documentation/perf_counter/Documentation/perf-list.txt b/Documentation/perf_counter/Documentation/perf-list.txt
index aa55a71..8290b94 100644
--- a/Documentation/perf_counter/Documentation/perf-list.txt
+++ b/Documentation/perf_counter/Documentation/perf-list.txt
@@ -1,5 +1,5 @@
 perf-list(1)
-==============
+============
 
 NAME
 ----
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
 SYNOPSIS
 --------
 [verse]
-'perf list
+'perf list'
 
 DESCRIPTION
 -----------
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
index 5d95784..c368a72 100644
--- a/Documentation/perf_counter/Documentation/perf-stat.txt
+++ b/Documentation/perf_counter/Documentation/perf-stat.txt
@@ -1,5 +1,5 @@
 perf-stat(1)
-==========
+============
 
 NAME
 ----
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt
index c8eb7cf..539d012 100644
--- a/Documentation/perf_counter/Documentation/perf-top.txt
+++ b/Documentation/perf_counter/Documentation/perf-top.txt
@@ -1,5 +1,5 @@
 perf-top(1)
-==========
+===========
 
 NAME
 ----
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c
index e4d3533..b3affb1 100644
--- a/Documentation/perf_counter/util/parse-options.c
+++ b/Documentation/perf_counter/util/parse-options.c
@@ -387,7 +387,7 @@ int usage_with_options_internal(const char * const *usagestr,
 
 	fprintf(stderr, "\n usage: %s\n", *usagestr++);
 	while (*usagestr && **usagestr)
-		fprintf(stderr, "   or: %s\n", *usagestr++);
+		fprintf(stderr, "    or: %s\n", *usagestr++);
 	while (*usagestr) {
 		fprintf(stderr, "%s%s\n",
 				**usagestr ? "    " : "",
-- 
cgit v0.10.2


From f89d7eaf6c34828070f407d0e04b73127f176ec5 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Thu, 4 Jun 2009 16:35:25 -0600
Subject: Document the debugfs API

This is an updated document covering the internal API for the debugfs
filesystem.  Thanks to Shen Feng for suggesting that I put this text here
and noting that the old LWN version was rather out of date.

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Reported-by: Shen Feng <shen@cn.fujitsu.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
new file mode 100644
index 0000000..ed52af6
--- /dev/null
+++ b/Documentation/filesystems/debugfs.txt
@@ -0,0 +1,158 @@
+Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+
+Debugfs exists as a simple way for kernel developers to make information
+available to user space.  Unlike /proc, which is only meant for information
+about a process, or sysfs, which has strict one-value-per-file rules,
+debugfs has no rules at all.  Developers can put any information they want
+there.  The debugfs filesystem is also intended to not serve as a stable
+ABI to user space; in theory, there are no stability constraints placed on
+files exported there.  The real world is not always so simple, though [1];
+even debugfs interfaces are best designed with the idea that they will need
+to be maintained forever.
+
+Debugfs is typically mounted with a command like:
+
+    mount -t debugfs none /sys/kernel/debug
+
+(Or an equivalent /etc/fstab line). 
+
+Note that the debugfs API is exported GPL-only to modules.
+
+Code using debugfs should include <linux/debugfs.h>.  Then, the first order
+of business will be to create at least one directory to hold a set of
+debugfs files:
+
+    struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
+
+This call, if successful, will make a directory called name underneath the
+indicated parent directory.  If parent is NULL, the directory will be
+created in the debugfs root.  On success, the return value is a struct
+dentry pointer which can be used to create files in the directory (and to
+clean it up at the end).  A NULL return value indicates that something went
+wrong.  If ERR_PTR(-ENODEV) is returned, that is an indication that the
+kernel has been built without debugfs support and none of the functions
+described below will work.
+
+The most general way to create a file within a debugfs directory is with:
+
+    struct dentry *debugfs_create_file(const char *name, mode_t mode,
+				       struct dentry *parent, void *data,
+				       const struct file_operations *fops);
+
+Here, name is the name of the file to create, mode describes the access
+permissions the file should have, parent indicates the directory which
+should hold the file, data will be stored in the i_private field of the
+resulting inode structure, and fops is a set of file operations which
+implement the file's behavior.  At a minimum, the read() and/or write()
+operations should be provided; others can be included as needed.  Again,
+the return value will be a dentry pointer to the created file, NULL for
+error, or ERR_PTR(-ENODEV) if debugfs support is missing.
+
+In a number of cases, the creation of a set of file operations is not
+actually necessary; the debugfs code provides a number of helper functions
+for simple situations.  Files containing a single integer value can be
+created with any of:
+
+    struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+				     struct dentry *parent, u8 *value);
+    struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+				      struct dentry *parent, u16 *value);
+    struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+				      struct dentry *parent, u32 *value);
+    struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+				      struct dentry *parent, u64 *value);
+
+These files support both reading and writing the given value; if a specific
+file should not be written to, simply set the mode bits accordingly.  The
+values in these files are in decimal; if hexadecimal is more appropriate,
+the following functions can be used instead:
+
+    struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+				     struct dentry *parent, u8 *value);
+    struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+				      struct dentry *parent, u16 *value);
+    struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+				      struct dentry *parent, u32 *value);
+
+Note that there is no debugfs_create_x64().
+
+These functions are useful as long as the developer knows the size of the
+value to be exported.  Some types can have different widths on different
+architectures, though, complicating the situation somewhat.  There is a
+function meant to help out in one special case:
+
+    struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+				         struct dentry *parent, 
+					 size_t *value);
+
+As might be expected, this function will create a debugfs file to represent
+a variable of type size_t.
+
+Boolean values can be placed in debugfs with:
+
+    struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+				       struct dentry *parent, u32 *value);
+
+A read on the resulting file will yield either Y (for non-zero values) or
+N, followed by a newline.  If written to, it will accept either upper- or
+lower-case values, or 1 or 0.  Any other input will be silently ignored.
+
+Finally, a block of arbitrary binary data can be exported with:
+
+    struct debugfs_blob_wrapper {
+	void *data;
+	unsigned long size;
+    };
+
+    struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+				       struct dentry *parent,
+				       struct debugfs_blob_wrapper *blob);
+
+A read of this file will return the data pointed to by the
+debugfs_blob_wrapper structure.  Some drivers use "blobs" as a simple way
+to return several lines of (static) formatted text output.  This function
+can be used to export binary information, but there does not appear to be
+any code which does so in the mainline.  Note that all files created with
+debugfs_create_blob() are read-only.
+
+There are a couple of other directory-oriented helper functions:
+
+    struct dentry *debugfs_rename(struct dentry *old_dir, 
+    				  struct dentry *old_dentry,
+		                  struct dentry *new_dir, 
+				  const char *new_name);
+
+    struct dentry *debugfs_create_symlink(const char *name, 
+                                          struct dentry *parent,
+				      	  const char *target);
+
+A call to debugfs_rename() will give a new name to an existing debugfs
+file, possibly in a different directory.  The new_name must not exist prior
+to the call; the return value is old_dentry with updated information.
+Symbolic links can be created with debugfs_create_symlink().
+
+There is one important thing that all debugfs users must take into account:
+there is no automatic cleanup of any directories created in debugfs.  If a
+module is unloaded without explicitly removing debugfs entries, the result
+will be a lot of stale pointers and no end of highly antisocial behavior.
+So all debugfs users - at least those which can be built as modules - must
+be prepared to remove all files and directories they create there.  A file
+can be removed with:
+
+    void debugfs_remove(struct dentry *dentry);
+
+The dentry value can be NULL, in which case nothing will be removed.
+
+Once upon a time, debugfs users were required to remember the dentry
+pointer for every debugfs file they created so that all files could be
+cleaned up.  We live in more civilized times now, though, and debugfs users
+can call:
+
+    void debugfs_remove_recursive(struct dentry *dentry);
+
+If this function is passed a pointer for the dentry corresponding to the
+top-level directory, the entire hierarchy below that directory will be
+removed.
+
+Notes:
+	[1] http://lwn.net/Articles/309298/
-- 
cgit v0.10.2


From d86bf92313bfd47885a92c7de63bde392d585f95 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Sat, 6 Jun 2009 18:32:06 +0200
Subject: ALSA: pcm - Fix a typo in hw_ptr update check

Fix a typo in the commit 13f040f9e55d41e92e485389123654971e03b819
  ALSA: PCM midlevel: Do not update hw_ptr_jiffies when hw_ptr is not changed
which causes obvious problems with PA.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index dd9126b..bf34603 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -371,7 +371,7 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
 
-	if (runtime->status->hw_ptr != new_hw_ptr)
+	if (runtime->status->hw_ptr == new_hw_ptr)
 		return 0;
 
 	runtime->hw_ptr_base = hw_base;
-- 
cgit v0.10.2


From 8035e4288078cb806e7dd6bafe4d3e54d44cab3f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 15:19:13 +0200
Subject: perf_counter tools: Prepare for 'perf annotate'

Prepare for the 'perf annotate' implementation by splitting off
builtin-annotate.c from builtin-report.c.

( We keep this commit separate to ease the later librarization
  of the facilities that perf-report and perf-annotate shares. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-annotate.txt b/Documentation/perf_counter/Documentation/perf-annotate.txt
new file mode 100644
index 0000000..a9d6d5e
--- /dev/null
+++ b/Documentation/perf_counter/Documentation/perf-annotate.txt
@@ -0,0 +1,26 @@
+perf-annotate(1)
+==============
+
+NAME
+----
+perf-annotate - Read perf.data (created by perf record) and annotate functions
+
+SYNOPSIS
+--------
+[verse]
+'perf annotate' [-i <file> | --input=file] symbol_name
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf record.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data)
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
index 32c0bb2..0cbd5d6 100644
--- a/Documentation/perf_counter/Makefile
+++ b/Documentation/perf_counter/Makefile
@@ -323,12 +323,13 @@ LIB_OBJS += util/symbol.o
 LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
 
+BUILTIN_OBJS += builtin-annotate.o
 BUILTIN_OBJS += builtin-help.o
+BUILTIN_OBJS += builtin-list.o
 BUILTIN_OBJS += builtin-record.o
 BUILTIN_OBJS += builtin-report.o
 BUILTIN_OBJS += builtin-stat.o
 BUILTIN_OBJS += builtin-top.o
-BUILTIN_OBJS += builtin-list.o
 
 PERFLIBS = $(LIB_FILE)
 EXTLIBS =
diff --git a/Documentation/perf_counter/builtin-annotate.c b/Documentation/perf_counter/builtin-annotate.c
new file mode 100644
index 0000000..d656484
--- /dev/null
+++ b/Documentation/perf_counter/builtin-annotate.c
@@ -0,0 +1,1291 @@
+/*
+ * builtin-annotate.c
+ *
+ * Builtin annotate command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include "util/list.h"
+#include "util/cache.h"
+#include "util/rbtree.h"
+#include "util/symbol.h"
+#include "util/string.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
+static char		const *input_name = "perf.data";
+static char		*vmlinux = NULL;
+
+static char		default_sort_order[] = "comm,dso";
+static char		*sort_order = default_sort_order;
+
+static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
+
+static int		dump_trace = 0;
+#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
+
+static int		verbose;
+static int		full_paths;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+};
+
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	char comm[16];
+};
+
+struct fork_event {
+	struct perf_event_header header;
+	__u32 pid, ppid;
+};
+
+struct period_event {
+	struct perf_event_header header;
+	__u64 time;
+	__u64 id;
+	__u64 sample_period;
+};
+
+typedef union event_union {
+	struct perf_event_header	header;
+	struct ip_event			ip;
+	struct mmap_event		mmap;
+	struct comm_event		comm;
+	struct fork_event		fork;
+	struct period_event		period;
+} event_t;
+
+static LIST_HEAD(dsos);
+static struct dso *kernel_dso;
+static struct dso *vdso;
+
+static void dsos__add(struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos);
+}
+
+static struct dso *dsos__find(const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		if (strcmp(pos->name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+static struct dso *dsos__findnew(const char *name)
+{
+	struct dso *dso = dsos__find(name);
+	int nr;
+
+	if (dso)
+		return dso;
+
+	dso = dso__new(name, 0);
+	if (!dso)
+		goto out_delete_dso;
+
+	nr = dso__load(dso, NULL, verbose);
+	if (nr < 0) {
+		if (verbose)
+			fprintf(stderr, "Failed to open: %s\n", name);
+		goto out_delete_dso;
+	}
+	if (!nr && verbose) {
+		fprintf(stderr,
+		"No symbols found in: %s, maybe install a debug package?\n",
+				name);
+	}
+
+	dsos__add(dso);
+
+	return dso;
+
+out_delete_dso:
+	dso__delete(dso);
+	return NULL;
+}
+
+static void dsos__fprintf(FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		dso__fprintf(pos, fp);
+}
+
+static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
+{
+	return dso__find_symbol(kernel_dso, ip);
+}
+
+static int load_kernel(void)
+{
+	int err;
+
+	kernel_dso = dso__new("[kernel]", 0);
+	if (!kernel_dso)
+		return -1;
+
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
+	if (err) {
+		dso__delete(kernel_dso);
+		kernel_dso = NULL;
+	} else
+		dsos__add(kernel_dso);
+
+	vdso = dso__new("[vdso]", 0);
+	if (!vdso)
+		return -1;
+
+	vdso->find_symbol = vdso__find_symbol;
+
+	dsos__add(vdso);
+
+	return err;
+}
+
+static char __cwd[PATH_MAX];
+static char *cwd = __cwd;
+static int cwdlen;
+
+static int strcommon(const char *pathname)
+{
+	int n = 0;
+
+	while (pathname[n] == cwd[n] && n < cwdlen)
+		++n;
+
+	return n;
+}
+
+struct map {
+	struct list_head node;
+	uint64_t	 start;
+	uint64_t	 end;
+	uint64_t	 pgoff;
+	uint64_t	 (*map_ip)(struct map *, uint64_t);
+	struct dso	 *dso;
+};
+
+static uint64_t map__map_ip(struct map *map, uint64_t ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
+{
+	return ip;
+}
+
+static struct map *map__new(struct mmap_event *event)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		const char *filename = event->filename;
+		char newfilename[PATH_MAX];
+
+		if (cwd) {
+			int n = strcommon(filename);
+
+			if (n == cwdlen) {
+				snprintf(newfilename, sizeof(newfilename),
+					 ".%s", filename + n);
+				filename = newfilename;
+			}
+		}
+
+		self->start = event->start;
+		self->end   = event->start + event->len;
+		self->pgoff = event->pgoff;
+
+		self->dso = dsos__findnew(filename);
+		if (self->dso == NULL)
+			goto out_delete;
+
+		if (self->dso == vdso)
+			self->map_ip = vdso__map_ip;
+		else
+			self->map_ip = map__map_ip;
+	}
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static struct map *map__clone(struct map *self)
+{
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
+}
+
+static int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
+
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+
+struct thread {
+	struct rb_node	 rb_node;
+	struct list_head maps;
+	pid_t		 pid;
+	char		 *comm;
+};
+
+static struct thread *thread__new(pid_t pid)
+{
+	struct thread *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->pid = pid;
+		self->comm = malloc(32);
+		if (self->comm)
+			snprintf(self->comm, 32, ":%d", self->pid);
+		INIT_LIST_HEAD(&self->maps);
+	}
+
+	return self;
+}
+
+static int thread__set_comm(struct thread *self, const char *comm)
+{
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(comm);
+	return self->comm ? 0 : -ENOMEM;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+
+static struct rb_root threads;
+static struct thread *last_match;
+
+static struct thread *threads__findnew(pid_t pid)
+{
+	struct rb_node **p = &threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->pid == pid) {
+			last_match = th;
+			return th;
+		}
+
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
+	}
+
+	return th;
+}
+
+static void thread__insert_map(struct thread *self, struct map *map)
+{
+	struct map *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
+		if (map__overlap(pos, map)) {
+			list_del_init(&pos->node);
+			/* XXX leaks dsos */
+			free(pos);
+		}
+	}
+
+	list_add_tail(&map->node, &self->maps);
+}
+
+static int thread__fork(struct thread *self, struct thread *parent)
+{
+	struct map *map;
+
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(parent->comm);
+	if (!self->comm)
+		return -ENOMEM;
+
+	list_for_each_entry(map, &parent->maps, node) {
+		struct map *new = map__clone(map);
+		if (!new)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+
+	return 0;
+}
+
+static struct map *thread__find_map(struct thread *self, uint64_t ip)
+{
+	struct map *pos;
+
+	if (self == NULL)
+		return NULL;
+
+	list_for_each_entry(pos, &self->maps, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static size_t threads__fprintf(FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+static struct rb_root hist;
+
+struct hist_entry {
+	struct rb_node	 rb_node;
+
+	struct thread	 *thread;
+	struct map	 *map;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	uint64_t	 ip;
+	char		 level;
+
+	uint32_t	 count;
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	char *header;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *);
+};
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static size_t
+sort__thread_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
+}
+
+static struct sort_entry sort_thread = {
+	.header = "         Command:  Pid",
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+};
+
+/* --sort comm */
+
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r) {
+		if (!comm_l && !comm_r)
+			return 0;
+		else if (!comm_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(comm_l, comm_r);
+}
+
+static size_t
+sort__comm_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s", self->thread->comm);
+}
+
+static struct sort_entry sort_comm = {
+	.header		= "         Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
+};
+
+/* --sort dso */
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->dso;
+	struct dso *dso_r = right->dso;
+
+	if (!dso_l || !dso_r) {
+		if (!dso_l && !dso_r)
+			return 0;
+		else if (!dso_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(dso_l->name, dso_r->name);
+}
+
+static size_t
+sort__dso_print(FILE *fp, struct hist_entry *self)
+{
+	if (self->dso)
+		return fprintf(fp, "%-25s", self->dso->name);
+
+	return fprintf(fp, "%016llx         ", (__u64)self->ip);
+}
+
+static struct sort_entry sort_dso = {
+	.header = "Shared Object            ",
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+};
+
+/* --sort symbol */
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t ip_l, ip_r;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+static size_t
+sort__sym_print(FILE *fp, struct hist_entry *self)
+{
+	size_t ret = 0;
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
+
+	if (self->sym) {
+		ret += fprintf(fp, "[%c] %s",
+			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
+	} else {
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+	}
+
+	return ret;
+}
+
+static struct sort_entry sort_sym = {
+	.header = "Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+static int sort__need_collapse = 0;
+
+struct sort_dimension {
+	char			*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+};
+
+static LIST_HEAD(hist_entry__sort_list);
+
+static int sort_dimension__add(char *tok)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+
+		return 0;
+	}
+
+	return -ESRCH;
+}
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static size_t
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
+{
+	struct sort_entry *se;
+	size_t ret;
+
+	if (total_samples) {
+		double percent = self->count * 100.0 / total_samples;
+		char *color = PERF_COLOR_NORMAL;
+
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (percent >= 5.0)
+			color = PERF_COLOR_RED;
+		if (percent < 0.5)
+			color = PERF_COLOR_GREEN;
+
+		ret = color_fprintf(fp, color, "   %6.2f%%",
+				(self->count * 100.0) / total_samples);
+	} else
+		ret = fprintf(fp, "%12d ", self->count);
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		fprintf(fp, "  ");
+		ret += se->print(fp, self);
+	}
+
+	ret += fprintf(fp, "\n");
+
+	return ret;
+}
+
+/*
+ * collect histogram counts
+ */
+
+static int
+hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+		struct symbol *sym, uint64_t ip, char level)
+{
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= thread,
+		.map	= map,
+		.dso	= dso,
+		.sym	= sym,
+		.ip	= ip,
+		.level	= level,
+		.count	= 1,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			he->count++;
+			return 0;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return -ENOMEM;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+
+	return 0;
+}
+
+static void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+static struct rb_root collapse_hists;
+
+static void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+static void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+static struct rb_root output_hists;
+
+static void output__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &output_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (he->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
+}
+
+static void output__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+	struct rb_root *tree = &hist;
+
+	if (sort__need_collapse)
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, tree);
+		output__insert_entry(n);
+	}
+}
+
+static size_t output__fprintf(FILE *fp, uint64_t total_samples)
+{
+	struct hist_entry *pos;
+	struct sort_entry *se;
+	struct rb_node *nd;
+	size_t ret = 0;
+
+	fprintf(fp, "\n");
+	fprintf(fp, "#\n");
+	fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
+	fprintf(fp, "#\n");
+
+	fprintf(fp, "# Overhead");
+	list_for_each_entry(se, &hist_entry__sort_list, list)
+		fprintf(fp, "  %s", se->header);
+	fprintf(fp, "\n");
+
+	fprintf(fp, "# ........");
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int i;
+
+		fprintf(fp, "  ");
+		for (i = 0; i < strlen(se->header); i++)
+			fprintf(fp, ".");
+	}
+	fprintf(fp, "\n");
+
+	fprintf(fp, "#\n");
+
+	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node);
+		ret += hist_entry__fprintf(fp, pos, total_samples);
+	}
+
+	if (!strcmp(sort_order, default_sort_order)) {
+		fprintf(fp, "#\n");
+		fprintf(fp, "# (For more details, try: perf annotate --sort comm,dso,symbol)\n");
+		fprintf(fp, "#\n");
+	}
+	fprintf(fp, "\n");
+
+	return ret;
+}
+
+static void register_idle_thread(void)
+{
+	struct thread *thread = threads__findnew(0);
+
+	if (thread == NULL ||
+			thread__set_comm(thread, "[idle]")) {
+		fprintf(stderr, "problem inserting idle task.\n");
+		exit(-1);
+	}
+}
+
+static unsigned long total = 0,
+		     total_mmap = 0,
+		     total_comm = 0,
+		     total_fork = 0,
+		     total_unknown = 0;
+
+static int
+process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	char level;
+	int show = 0;
+	struct dso *dso = NULL;
+	struct thread *thread = threads__findnew(event->ip.pid);
+	uint64_t ip = event->ip.ip;
+	struct map *map = NULL;
+
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->header.misc,
+		event->ip.pid,
+		(void *)(long)ip);
+
+	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (thread == NULL) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+		show = SHOW_KERNEL;
+		level = 'k';
+
+		dso = kernel_dso;
+
+		dprintf(" ...... dso: %s\n", dso->name);
+
+	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+
+		show = SHOW_USER;
+		level = '.';
+
+		map = thread__find_map(thread, ip);
+		if (map != NULL) {
+			ip = map->map_ip(map, ip);
+			dso = map->dso;
+		} else {
+			/*
+			 * If this is outside of all known maps,
+			 * and is a negative address, try to look it
+			 * up in the kernel dso, as it might be a
+			 * vsyscall (which executes in user-mode):
+			 */
+			if ((long long)ip < 0)
+				dso = kernel_dso;
+		}
+		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+
+	} else {
+		show = SHOW_HV;
+		level = 'H';
+		dprintf(" ...... dso: [hypervisor]\n");
+	}
+
+	if (show & show_mask) {
+		struct symbol *sym = NULL;
+
+		if (dso)
+			sym = dso->find_symbol(dso, ip);
+
+		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+			fprintf(stderr,
+		"problem incrementing symbol count, skipping event\n");
+			return -1;
+		}
+	}
+	total++;
+
+	return 0;
+}
+
+static int
+process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->mmap.pid);
+	struct map *map = map__new(&event->mmap);
+
+	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->mmap.pid,
+		(void *)(long)event->mmap.start,
+		(void *)(long)event->mmap.len,
+		(void *)(long)event->mmap.pgoff,
+		event->mmap.filename);
+
+	if (thread == NULL || map == NULL) {
+		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		return 0;
+	}
+
+	thread__insert_map(thread, map);
+	total_mmap++;
+
+	return 0;
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->comm.pid);
+
+	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->comm.comm, event->comm.pid);
+
+	if (thread == NULL ||
+	    thread__set_comm(thread, event->comm.comm)) {
+		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		return -1;
+	}
+	total_comm++;
+
+	return 0;
+}
+
+static int
+process_fork_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->fork.pid);
+	struct thread *parent = threads__findnew(event->fork.ppid);
+
+	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->fork.pid, event->fork.ppid);
+
+	if (!thread || !parent || thread__fork(thread, parent)) {
+		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		return -1;
+	}
+	total_fork++;
+
+	return 0;
+}
+
+static int
+process_period_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->period.time,
+		event->period.id,
+		event->period.sample_period);
+
+	return 0;
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
+		return process_overflow_event(event, offset, head);
+
+	switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		return process_mmap_event(event, offset, head);
+
+	case PERF_EVENT_COMM:
+		return process_comm_event(event, offset, head);
+
+	case PERF_EVENT_FORK:
+		return process_fork_event(event, offset, head);
+
+	case PERF_EVENT_PERIOD:
+		return process_period_event(event, offset, head);
+	/*
+	 * We dont process them right now but they are fine:
+	 */
+
+	case PERF_EVENT_THROTTLE:
+	case PERF_EVENT_UNTHROTTLE:
+		return 0;
+
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __cmd_annotate(void)
+{
+	int ret, rc = EXIT_FAILURE;
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	event_t *event;
+	uint32_t size;
+	char *buf;
+
+	register_idle_thread();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kernel() < 0) {
+		perror("failed to load kernel symbols");
+		return EXIT_FAILURE;
+	}
+
+	if (!full_paths) {
+		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
+			perror("failed to get the current directory");
+			return EXIT_FAILURE;
+		}
+		cwdlen = strlen(cwd);
+	} else {
+		cwd = NULL;
+		cwdlen = 0;
+	}
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dprintf("%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (!size || process_event(event, offset, head) < 0) {
+
+		dprintf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
+
+		total_unknown++;
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	rc = EXIT_SUCCESS;
+	close(input);
+
+	dprintf("      IP events: %10ld\n", total);
+	dprintf("    mmap events: %10ld\n", total_mmap);
+	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf("    fork events: %10ld\n", total_fork);
+	dprintf(" unknown events: %10ld\n", total_unknown);
+
+	if (dump_trace)
+		return 0;
+
+	if (verbose >= 3)
+		threads__fprintf(stdout);
+
+	if (verbose >= 2)
+		dsos__fprintf(stdout);
+
+	collapse__resort();
+	output__resort();
+	output__fprintf(stdout, total);
+
+	return rc;
+}
+
+static const char * const annotate_usage[] = {
+	"perf annotate [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
+	OPT_BOOLEAN('P', "full-paths", &full_paths,
+		    "Don't shorten the pathnames taking into account the cwd"),
+	OPT_END()
+};
+
+static void setup_sorting(void)
+{
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok) < 0) {
+			error("Unknown --sort key: `%s'", tok);
+			usage_with_options(annotate_usage, options);
+		}
+	}
+
+	free(str);
+}
+
+int cmd_annotate(int argc, const char **argv, const char *prefix)
+{
+	symbol__init();
+
+	page_size = getpagesize();
+
+	argc = parse_options(argc, argv, options, annotate_usage, 0);
+
+	setup_sorting();
+
+	/*
+	 * Any (unrecognized) arguments left?
+	 */
+	if (argc)
+		usage_with_options(annotate_usage, options);
+
+	setup_pager();
+
+	return __cmd_annotate();
+}
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
index e7de47d..51d1682 100644
--- a/Documentation/perf_counter/builtin.h
+++ b/Documentation/perf_counter/builtin.h
@@ -14,6 +14,7 @@ extern void prune_packed_objects(int);
 extern int read_line_with_nul(char *buf, int size, FILE *file);
 extern int check_pager_config(const char *cmd);
 
+extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
 extern int cmd_record(int argc, const char **argv, const char *prefix);
 extern int cmd_report(int argc, const char **argv, const char *prefix);
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
index f0b922c..eebce30 100644
--- a/Documentation/perf_counter/command-list.txt
+++ b/Documentation/perf_counter/command-list.txt
@@ -2,8 +2,9 @@
 # List of known perf commands.
 # command name			category [deprecated] [common]
 #
+perf-annotate			mainporcelain common
+perf-list			mainporcelain common
 perf-record			mainporcelain common
 perf-report			mainporcelain common
 perf-stat			mainporcelain common
 perf-top			mainporcelain common
-perf-list			mainporcelain common
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
index 161824f..4eb7259 100644
--- a/Documentation/perf_counter/perf.c
+++ b/Documentation/perf_counter/perf.c
@@ -263,6 +263,7 @@ static void handle_internal_command(int argc, const char **argv)
 		{ "report", cmd_report, 0 },
 		{ "stat", cmd_stat, 0 },
 		{ "top", cmd_top, 0 },
+		{ "annotate", cmd_annotate, 0 },
 		{ "version", cmd_version, 0 },
 	};
 	int i;
@@ -402,9 +403,11 @@ int main(int argc, const char **argv)
 	while (1) {
 		static int done_help = 0;
 		static int was_alias = 0;
+
 		was_alias = run_argv(&argc, &argv);
 		if (errno != ENOENT)
 			break;
+
 		if (was_alias) {
 			fprintf(stderr, "Expansion of alias '%s' failed; "
 				"'%s' is not a perf-command\n",
-- 
cgit v0.10.2


From 0b73da3f40128eab6ca2a07508f424029a1edaeb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 15:48:52 +0200
Subject: perf_counter tools: Add 'perf annotate' feature

Add new perf sub-command to display annotated source code:

 $ perf annotate decode_tree_entry

------------------------------------------------
 Percent |	Source code & Disassembly of /home/mingo/git/git
------------------------------------------------
         :
         :	/home/mingo/git/git:     file format elf64-x86-64
         :
         :
         :	Disassembly of section .text:
         :
         :	00000000004a0da0 <decode_tree_entry>:
         :		*modep = mode;
         :		return str;
         :	}
         :
         :	static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
         :	{
    3.82 :	  4a0da0:	41 54                	push   %r12
         :		const char *path;
         :		unsigned int mode, len;
         :
         :		if (size < 24 || buf[size - 21])
    0.17 :	  4a0da2:	48 83 fa 17          	cmp    $0x17,%rdx
         :		*modep = mode;
         :		return str;
         :	}
         :
         :	static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
         :	{
    0.00 :	  4a0da6:	49 89 fc             	mov    %rdi,%r12
    0.00 :	  4a0da9:	55                   	push   %rbp
    3.37 :	  4a0daa:	53                   	push   %rbx
         :		const char *path;
         :		unsigned int mode, len;
         :
         :		if (size < 24 || buf[size - 21])
    0.08 :	  4a0dab:	76 73                	jbe    4a0e20 <decode_tree_entry+0x80>
    0.00 :	  4a0dad:	80 7c 16 eb 00       	cmpb   $0x0,-0x15(%rsi,%rdx,1)
    3.48 :	  4a0db2:	75 6c                	jne    4a0e20 <decode_tree_entry+0x80>
         :	static const char *get_mode(const char *str, unsigned int *modep)
         :	{
         :		unsigned char c;
         :		unsigned int mode = 0;
         :
         :		if (*str == ' ')
    1.94 :	  4a0db4:	0f b6 06             	movzbl (%rsi),%eax
    0.39 :	  4a0db7:	3c 20                	cmp    $0x20,%al
    0.00 :	  4a0db9:	74 65                	je     4a0e20 <decode_tree_entry+0x80>
         :			return NULL;
         :
         :		while ((c = *str++) != ' ') {
    0.06 :	  4a0dbb:	89 c2                	mov    %eax,%edx
         :			if (c < '0' || c > '7')
    1.99 :	  4a0dbd:	31 ed                	xor    %ebp,%ebp
         :		unsigned int mode = 0;
         :
         :		if (*str == ' ')
         :			return NULL;
         :
         :		while ((c = *str++) != ' ') {
    1.74 :	  4a0dbf:	48 8d 5e 01          	lea    0x1(%rsi),%rbx
         :			if (c < '0' || c > '7')
    0.00 :	  4a0dc3:	8d 42 d0             	lea    -0x30(%rdx),%eax
    0.17 :	  4a0dc6:	3c 07                	cmp    $0x7,%al
    0.00 :	  4a0dc8:	76 0d                	jbe    4a0dd7 <decode_tree_entry+0x37>
    0.00 :	  4a0dca:	eb 54                	jmp    4a0e20 <decode_tree_entry+0x80>
    0.00 :	  4a0dcc:	0f 1f 40 00          	nopl   0x0(%rax)
   16.57 :	  4a0dd0:	8d 42 d0             	lea    -0x30(%rdx),%eax
    0.14 :	  4a0dd3:	3c 07                	cmp    $0x7,%al
    0.00 :	  4a0dd5:	77 49                	ja     4a0e20 <decode_tree_entry+0x80>
         :				return NULL;
         :			mode = (mode << 3) + (c - '0');
    3.12 :	  4a0dd7:	0f b6 c2             	movzbl %dl,%eax
         :		unsigned int mode = 0;
         :
         :		if (*str == ' ')
         :			return NULL;
         :
         :		while ((c = *str++) != ' ') {
    0.00 :	  4a0dda:	0f b6 13             	movzbl (%rbx),%edx
   16.74 :	  4a0ddd:	48 83 c3 01          	add    $0x1,%rbx
         :			if (c < '0' || c > '7')
         :				return NULL;
         :			mode = (mode << 3) + (c - '0');

The first column is the percentage of samples that arrived on that
particular line - relative to the total cost of the function.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/Documentation/perf-annotate.txt b/Documentation/perf_counter/Documentation/perf-annotate.txt
index a9d6d5e..c9dcade 100644
--- a/Documentation/perf_counter/Documentation/perf-annotate.txt
+++ b/Documentation/perf_counter/Documentation/perf-annotate.txt
@@ -3,7 +3,7 @@ perf-annotate(1)
 
 NAME
 ----
-perf-annotate - Read perf.data (created by perf record) and annotate functions
+perf-annotate - Read perf.data (created by perf record) and display annotated code
 
 SYNOPSIS
 --------
@@ -12,8 +12,11 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command displays the performance counter profile information recorded
-via perf record.
+This command reads the input file and displays an annotated version of the
+code. If the object file has debug symbols then the source code will be
+displayed alongside assembly code.
+
+If there is no debug info in the object, then annotated assembly is displayed.
 
 OPTIONS
 -------
diff --git a/Documentation/perf_counter/builtin-annotate.c b/Documentation/perf_counter/builtin-annotate.c
index d656484..116a397 100644
--- a/Documentation/perf_counter/builtin-annotate.c
+++ b/Documentation/perf_counter/builtin-annotate.c
@@ -28,7 +28,7 @@
 static char		const *input_name = "perf.data";
 static char		*vmlinux = NULL;
 
-static char		default_sort_order[] = "comm,dso";
+static char		default_sort_order[] = "comm,symbol";
 static char		*sort_order = default_sort_order;
 
 static int		input;
@@ -38,7 +38,6 @@ static int		dump_trace = 0;
 #define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
 
 static int		verbose;
-static int		full_paths;
 
 static unsigned long	page_size;
 static unsigned long	mmap_window = 32;
@@ -89,6 +88,7 @@ static LIST_HEAD(dsos);
 static struct dso *kernel_dso;
 static struct dso *vdso;
 
+
 static void dsos__add(struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos);
@@ -176,20 +176,6 @@ static int load_kernel(void)
 	return err;
 }
 
-static char __cwd[PATH_MAX];
-static char *cwd = __cwd;
-static int cwdlen;
-
-static int strcommon(const char *pathname)
-{
-	int n = 0;
-
-	while (pathname[n] == cwd[n] && n < cwdlen)
-		++n;
-
-	return n;
-}
-
 struct map {
 	struct list_head node;
 	uint64_t	 start;
@@ -215,17 +201,6 @@ static struct map *map__new(struct mmap_event *event)
 
 	if (self != NULL) {
 		const char *filename = event->filename;
-		char newfilename[PATH_MAX];
-
-		if (cwd) {
-			int n = strcommon(filename);
-
-			if (n == cwdlen) {
-				snprintf(newfilename, sizeof(newfilename),
-					 ".%s", filename + n);
-				filename = newfilename;
-			}
-		}
 
 		self->start = event->start;
 		self->end   = event->start + event->len;
@@ -669,44 +644,36 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 	return cmp;
 }
 
-static size_t
-hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
+/*
+ * collect histogram counts
+ */
+static void hist_hit(struct hist_entry *he, uint64_t ip)
 {
-	struct sort_entry *se;
-	size_t ret;
+	unsigned int sym_size, offset;
+	struct symbol *sym = he->sym;
 
-	if (total_samples) {
-		double percent = self->count * 100.0 / total_samples;
-		char *color = PERF_COLOR_NORMAL;
+	he->count++;
 
-		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
-		 */
-		if (percent >= 5.0)
-			color = PERF_COLOR_RED;
-		if (percent < 0.5)
-			color = PERF_COLOR_GREEN;
+	if (!sym || !sym->hist)
+		return;
 
-		ret = color_fprintf(fp, color, "   %6.2f%%",
-				(self->count * 100.0) / total_samples);
-	} else
-		ret = fprintf(fp, "%12d ", self->count);
+	sym_size = sym->end - sym->start;
+	offset = ip - sym->start;
 
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		fprintf(fp, "  ");
-		ret += se->print(fp, self);
-	}
+	if (offset >= sym_size)
+		return;
 
-	ret += fprintf(fp, "\n");
+	sym->hist_sum++;
+	sym->hist[offset]++;
 
-	return ret;
+	if (verbose >= 3)
+		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
+			(void *)he->sym->start,
+			he->sym->name,
+			(void *)ip, ip - he->sym->start,
+			sym->hist[offset]);
 }
 
-/*
- * collect histogram counts
- */
-
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		struct symbol *sym, uint64_t ip, char level)
@@ -732,7 +699,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		cmp = hist_entry__cmp(&entry, he);
 
 		if (!cmp) {
-			he->count++;
+			hist_hit(he, ip);
+
 			return 0;
 		}
 
@@ -856,50 +824,6 @@ static void output__resort(void)
 	}
 }
 
-static size_t output__fprintf(FILE *fp, uint64_t total_samples)
-{
-	struct hist_entry *pos;
-	struct sort_entry *se;
-	struct rb_node *nd;
-	size_t ret = 0;
-
-	fprintf(fp, "\n");
-	fprintf(fp, "#\n");
-	fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
-	fprintf(fp, "#\n");
-
-	fprintf(fp, "# Overhead");
-	list_for_each_entry(se, &hist_entry__sort_list, list)
-		fprintf(fp, "  %s", se->header);
-	fprintf(fp, "\n");
-
-	fprintf(fp, "# ........");
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		int i;
-
-		fprintf(fp, "  ");
-		for (i = 0; i < strlen(se->header); i++)
-			fprintf(fp, ".");
-	}
-	fprintf(fp, "\n");
-
-	fprintf(fp, "#\n");
-
-	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
-		pos = rb_entry(nd, struct hist_entry, rb_node);
-		ret += hist_entry__fprintf(fp, pos, total_samples);
-	}
-
-	if (!strcmp(sort_order, default_sort_order)) {
-		fprintf(fp, "#\n");
-		fprintf(fp, "# (For more details, try: perf annotate --sort comm,dso,symbol)\n");
-		fprintf(fp, "#\n");
-	}
-	fprintf(fp, "\n");
-
-	return ret;
-}
-
 static void register_idle_thread(void)
 {
 	struct thread *thread = threads__findnew(0);
@@ -1106,6 +1030,149 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	return 0;
 }
 
+static int
+parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len)
+{
+	char *line = NULL, *tmp, *tmp2;
+	unsigned int offset;
+	size_t line_len;
+	__u64 line_ip;
+	int ret;
+	char *c;
+
+	if (getline(&line, &line_len, file) < 0)
+		return -1;
+	if (!line)
+		return -1;
+
+	c = strchr(line, '\n');
+	if (c)
+		*c = 0;
+
+	line_ip = -1;
+	offset = 0;
+	ret = -2;
+
+	/*
+	 * Strip leading spaces:
+	 */
+	tmp = line;
+	while (*tmp) {
+		if (*tmp != ' ')
+			break;
+		tmp++;
+	}
+
+	if (*tmp) {
+		/*
+		 * Parse hexa addresses followed by ':'
+		 */
+		line_ip = strtoull(tmp, &tmp2, 16);
+		if (*tmp2 != ':')
+			line_ip = -1;
+	}
+
+	if (line_ip != -1) {
+		unsigned int hits = 0;
+		double percent = 0.0;
+		char *color = PERF_COLOR_NORMAL;
+
+		offset = line_ip - start;
+		if (offset < len)
+			hits = sym->hist[offset];
+
+		if (sym->hist_sum)
+			percent = 100.0 * hits / sym->hist_sum;
+
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (percent >= 5.0)
+			color = PERF_COLOR_RED;
+		else {
+			if (percent > 0.5)
+				color = PERF_COLOR_GREEN;
+		}
+
+		color_fprintf(stdout, color, " %7.2f", percent);
+		printf(" :	");
+		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line);
+	} else {
+		if (!*line)
+			printf("         :\n");
+		else
+			printf("         :	%s\n", line);
+	}
+
+	return 0;
+}
+
+static void annotate_sym(struct dso *dso, struct symbol *sym)
+{
+	char *filename = dso->name;
+	uint64_t start, end, len;
+	char command[PATH_MAX*2];
+	FILE *file;
+
+	if (!filename)
+		return;
+	if (dso == kernel_dso)
+		filename = vmlinux;
+
+	printf("\n------------------------------------------------\n");
+	printf(" Percent |	Source code & Disassembly of %s\n", filename);
+	printf("------------------------------------------------\n");
+
+	if (verbose >= 2)
+		printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+
+	start = sym->obj_start;
+	if (!start)
+		start = sym->start;
+
+	end = start + sym->end - sym->start + 1;
+	len = sym->end - sym->start;
+
+	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename);
+
+	if (verbose >= 3)
+		printf("doing: %s\n", command);
+
+	file = popen(command, "r");
+	if (!file)
+		return;
+
+	while (!feof(file)) {
+		if (parse_line(file, sym, start, len) < 0)
+			break;
+	}
+
+	pclose(file);
+}
+
+static void find_annotations(void)
+{
+	struct rb_node *nd;
+	struct dso *dso;
+	int count = 0;
+
+	list_for_each_entry(dso, &dsos, node) {
+
+		for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
+			struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+
+			if (sym->hist) {
+				annotate_sym(dso, sym);
+				count++;
+			}
+		}
+	}
+
+	if (!count)
+		printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
+}
+
 static int __cmd_annotate(void)
 {
 	int ret, rc = EXIT_FAILURE;
@@ -1140,16 +1207,6 @@ static int __cmd_annotate(void)
 		return EXIT_FAILURE;
 	}
 
-	if (!full_paths) {
-		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
-			perror("failed to get the current directory");
-			return EXIT_FAILURE;
-		}
-		cwdlen = strlen(cwd);
-	} else {
-		cwd = NULL;
-		cwdlen = 0;
-	}
 remap:
 	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
 			   MAP_SHARED, input, offset);
@@ -1229,7 +1286,8 @@ more:
 
 	collapse__resort();
 	output__resort();
-	output__fprintf(stdout, total);
+
+	find_annotations();
 
 	return rc;
 }
@@ -1242,15 +1300,13 @@ static const char * const annotate_usage[] = {
 static const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
+	OPT_STRING('s', "symbol", &sym_hist_filter, "file",
+		    "symbol to annotate"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
-	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
-	OPT_BOOLEAN('P', "full-paths", &full_paths,
-		    "Don't shorten the pathnames taking into account the cwd"),
 	OPT_END()
 };
 
@@ -1279,10 +1335,18 @@ int cmd_annotate(int argc, const char **argv, const char *prefix)
 
 	setup_sorting();
 
-	/*
-	 * Any (unrecognized) arguments left?
-	 */
-	if (argc)
+	if (argc) {
+		/*
+		 * Special case: if there's an argument left then assume tha
+		 * it's a symbol filter:
+		 */
+		if (argc > 1)
+			usage_with_options(annotate_usage, options);
+
+		sym_hist_filter = argv[0];
+	}
+
+	if (!sym_hist_filter)
 		usage_with_options(annotate_usage, options);
 
 	setup_pager();
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
index a06bbfba..23f4f7b 100644
--- a/Documentation/perf_counter/util/symbol.c
+++ b/Documentation/perf_counter/util/symbol.c
@@ -7,21 +7,36 @@
 #include <gelf.h>
 #include <elf.h>
 
+const char *sym_hist_filter;
+
 static struct symbol *symbol__new(uint64_t start, uint64_t len,
-				  const char *name, unsigned int priv_size)
+				  const char *name, unsigned int priv_size,
+				  uint64_t obj_start, int verbose)
 {
 	size_t namelen = strlen(name) + 1;
-	struct symbol *self = malloc(priv_size + sizeof(*self) + namelen);
+	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
 
-	if (self != NULL) {
-		if (priv_size) {
-			memset(self, 0, priv_size);
-			self = ((void *)self) + priv_size;
-		}
-		self->start = start;
-		self->end   = start + len - 1;
-		memcpy(self->name, name, namelen);
+	if (!self)
+		return NULL;
+
+	if (verbose >= 2)
+		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
+			(__u64)start, len, name, self->hist, (void *)obj_start);
+
+	self->obj_start= obj_start;
+	self->hist = NULL;
+	self->hist_sum = 0;
+
+	if (sym_hist_filter && !strcmp(name, sym_hist_filter))
+		self->hist = calloc(sizeof(__u64), len);
+
+	if (priv_size) {
+		memset(self, 0, priv_size);
+		self = ((void *)self) + priv_size;
 	}
+	self->start = start;
+	self->end   = start + len - 1;
+	memcpy(self->name, name, namelen);
 
 	return self;
 }
@@ -166,7 +181,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb
 		 * Well fix up the end later, when we have all sorted.
 		 */
 		sym = symbol__new(start, 0xdead, line + len + 2,
-				  self->sym_priv_size);
+				  self->sym_priv_size, 0, verbose);
 
 		if (sym == NULL)
 			goto out_delete_line;
@@ -272,7 +287,7 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
 static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
 				       GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym,
 				       GElf_Shdr *shdr_dynsym,
-				       size_t dynsym_idx)
+				       size_t dynsym_idx, int verbose)
 {
 	uint32_t nr_rel_entries, idx;
 	GElf_Sym sym;
@@ -335,7 +350,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
 				 "%s@plt", elf_sym__name(&sym, symstrs));
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
-					sympltname, self->sym_priv_size);
+					sympltname, self->sym_priv_size, 0, verbose);
 			if (!f)
 				return -1;
 
@@ -353,7 +368,7 @@ static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
 				 "%s@plt", elf_sym__name(&sym, symstrs));
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
-					sympltname, self->sym_priv_size);
+					sympltname, self->sym_priv_size, 0, verbose);
 			if (!f)
 				return -1;
 
@@ -410,7 +425,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 	if (sec_dynsym != NULL) {
 		nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
 						 sec_dynsym, &shdr,
-						 dynsym_idx);
+						 dynsym_idx, verbose);
 		if (nr < 0)
 			goto out_elf_end;
 	}
@@ -444,6 +459,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 
 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
 		struct symbol *f;
+		uint64_t obj_start;
 
 		if (!elf_sym__is_function(&sym))
 			continue;
@@ -453,11 +469,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 			goto out_elf_end;
 
 		gelf_getshdr(sec, &shdr);
+		obj_start = sym.st_value;
+
 		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
 		f = symbol__new(sym.st_value, sym.st_size,
 				elf_sym__name(&sym, symstrs),
-				self->sym_priv_size);
+				self->sym_priv_size, obj_start, verbose);
 		if (!f)
 			goto out_elf_end;
 
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
index e23cc31..4839d68 100644
--- a/Documentation/perf_counter/util/symbol.h
+++ b/Documentation/perf_counter/util/symbol.h
@@ -9,6 +9,9 @@ struct symbol {
 	struct rb_node	rb_node;
 	__u64		start;
 	__u64		end;
+	__u64		obj_start;
+	__u64		hist_sum;
+	__u64		*hist;
 	char		name[0];
 };
 
@@ -20,6 +23,8 @@ struct dso {
 	char		 name[0];
 };
 
+const char *sym_hist_filter;
+
 typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
 
 struct dso *dso__new(const char *name, unsigned int sym_priv_size);
-- 
cgit v0.10.2


From 864709302a80f26fa9da3be5b47304f0b8bae192 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 20:33:43 +0200
Subject: perf_counter tools: Move from Documentation/perf_counter/ to
 tools/perf/

Several people have suggested that 'perf' has become a full-fledged
tool that should be moved out of Documentation/. Move it to the
(new) tools/ directory.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/perf_counter/.gitignore b/Documentation/perf_counter/.gitignore
deleted file mode 100644
index d69a759..0000000
--- a/Documentation/perf_counter/.gitignore
+++ /dev/null
@@ -1,16 +0,0 @@
-PERF-BUILD-OPTIONS
-PERF-CFLAGS
-PERF-GUI-VARS
-PERF-VERSION-FILE
-perf
-perf-help
-perf-record
-perf-report
-perf-stat
-perf-top
-perf*.1
-perf*.xml
-common-cmds.h
-tags
-TAGS
-cscope*
diff --git a/Documentation/perf_counter/Documentation/Makefile b/Documentation/perf_counter/Documentation/Makefile
deleted file mode 100644
index 5457192..0000000
--- a/Documentation/perf_counter/Documentation/Makefile
+++ /dev/null
@@ -1,300 +0,0 @@
-MAN1_TXT= \
-	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
-		$(wildcard perf-*.txt)) \
-	perf.txt
-MAN5_TXT=
-MAN7_TXT=
-
-MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
-MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
-MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
-
-DOC_HTML=$(MAN_HTML)
-
-ARTICLES =
-# with their own formatting rules.
-SP_ARTICLES =
-API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
-SP_ARTICLES += $(API_DOCS)
-SP_ARTICLES += technical/api-index
-
-DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
-
-DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
-DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
-DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
-
-prefix?=$(HOME)
-bindir?=$(prefix)/bin
-htmldir?=$(prefix)/share/doc/perf-doc
-pdfdir?=$(prefix)/share/doc/perf-doc
-mandir?=$(prefix)/share/man
-man1dir=$(mandir)/man1
-man5dir=$(mandir)/man5
-man7dir=$(mandir)/man7
-# DESTDIR=
-
-ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA =
-MANPAGE_XSL = manpage-normal.xsl
-XMLTO_EXTRA =
-INSTALL?=install
-RM ?= rm -f
-DOC_REF = origin/man
-HTML_REF = origin/html
-
-infodir?=$(prefix)/share/info
-MAKEINFO=makeinfo
-INSTALL_INFO=install-info
-DOCBOOK2X_TEXI=docbook2x-texi
-DBLATEX=dblatex
-ifndef PERL_PATH
-	PERL_PATH = /usr/bin/perl
-endif
-
--include ../config.mak.autogen
--include ../config.mak
-
-#
-# For asciidoc ...
-#	-7.1.2,	no extra settings are needed.
-#	8.0-,	set ASCIIDOC8.
-#
-
-#
-# For docbook-xsl ...
-#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
-#	1.69.0,		no extra settings are needed?
-#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
-#	1.71.1,		no extra settings are needed?
-#	1.72.0,		set DOCBOOK_XSL_172.
-#	1.73.0-,	set ASCIIDOC_NO_ROFF
-#
-
-#
-# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
-# of 'the ".ft C" problem' in your generated manpages, and you
-# instead ended up with weird characters around callouts, try
-# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
-#
-
-ifdef ASCIIDOC8
-ASCIIDOC_EXTRA += -a asciidoc7compatible
-endif
-ifdef DOCBOOK_XSL_172
-ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
-MANPAGE_XSL = manpage-1.72.xsl
-else
-	ifdef ASCIIDOC_NO_ROFF
-	# docbook-xsl after 1.72 needs the regular XSL, but will not
-	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
-	ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
-	endif
-endif
-ifdef MAN_BOLD_LITERAL
-XMLTO_EXTRA += -m manpage-bold-literal.xsl
-endif
-ifdef DOCBOOK_SUPPRESS_SP
-XMLTO_EXTRA += -m manpage-suppress-sp.xsl
-endif
-
-SHELL_PATH ?= $(SHELL)
-# Shell quote;
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-
-#
-# Please note that there is a minor bug in asciidoc.
-# The version after 6.0.3 _will_ include the patch found here:
-#   http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
-#
-# Until that version is released you may have to apply the patch
-# yourself - yes, all 6 characters of it!
-#
-
-QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
-QUIET_SUBDIR1  =
-
-ifneq ($(findstring $(MAKEFLAGS),w),w)
-PRINT_DIR = --no-print-directory
-else # "make -w"
-NO_SUBDIR = :
-endif
-
-ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifndef V
-	QUIET_ASCIIDOC	= @echo '   ' ASCIIDOC $@;
-	QUIET_XMLTO	= @echo '   ' XMLTO $@;
-	QUIET_DB2TEXI	= @echo '   ' DB2TEXI $@;
-	QUIET_MAKEINFO	= @echo '   ' MAKEINFO $@;
-	QUIET_DBLATEX	= @echo '   ' DBLATEX $@;
-	QUIET_XSLTPROC	= @echo '   ' XSLTPROC $@;
-	QUIET_GEN	= @echo '   ' GEN $@;
-	QUIET_STDERR	= 2> /dev/null
-	QUIET_SUBDIR0	= +@subdir=
-	QUIET_SUBDIR1	= ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
-			  $(MAKE) $(PRINT_DIR) -C $$subdir
-	export V
-endif
-endif
-
-all: html man
-
-html: $(DOC_HTML)
-
-$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
-
-man: man1 man5 man7
-man1: $(DOC_MAN1)
-man5: $(DOC_MAN5)
-man7: $(DOC_MAN7)
-
-info: perf.info perfman.info
-
-pdf: user-manual.pdf
-
-install: install-man
-
-install-man: man
-	$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
-#	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
-#	$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
-	$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
-#	$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
-#	$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
-
-install-info: info
-	$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
-	$(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
-	if test -r $(DESTDIR)$(infodir)/dir; then \
-	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
-	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
-	else \
-	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
-	fi
-
-install-pdf: pdf
-	$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
-	$(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
-
-install-html: html
-	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
-
-../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
-	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
-
--include ../PERF-VERSION-FILE
-
-#
-# Determine "include::" file references in asciidoc files.
-#
-doc.dep : $(wildcard *.txt) build-docdep.perl
-	$(QUIET_GEN)$(RM) $@+ $@ && \
-	$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
-	mv $@+ $@
-
--include doc.dep
-
-cmds_txt = cmds-ancillaryinterrogators.txt \
-	cmds-ancillarymanipulators.txt \
-	cmds-mainporcelain.txt \
-	cmds-plumbinginterrogators.txt \
-	cmds-plumbingmanipulators.txt \
-	cmds-synchingrepositories.txt \
-	cmds-synchelpers.txt \
-	cmds-purehelpers.txt \
-	cmds-foreignscminterface.txt
-
-$(cmds_txt): cmd-list.made
-
-cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
-	$(QUIET_GEN)$(RM) $@ && \
-	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
-	date >$@
-
-clean:
-	$(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
-	$(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
-	$(RM) howto-index.txt howto/*.html doc.dep
-	$(RM) technical/api-*.html technical/api-index.txt
-	$(RM) $(cmds_txt) *.made
-
-$(MAN_HTML): %.html : %.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
-		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
-	mv $@+ $@
-
-%.1 %.5 %.7 : %.xml
-	$(QUIET_XMLTO)$(RM) $@ && \
-	xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
-
-%.xml : %.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
-		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
-	mv $@+ $@
-
-XSLT = docbook.xsl
-XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
-
-user-manual.html: user-manual.xml
-	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
-
-perf.info: user-manual.texi
-	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
-
-user-manual.texi: user-manual.xml
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-	$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
-	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
-	rm $@++ && \
-	mv $@+ $@
-
-user-manual.pdf: user-manual.xml
-	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
-	$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
-	mv $@+ $@
-
-perfman.texi: $(MAN_XML) cat-texi.perl
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-	($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
-		--to-stdout $(xml) &&) true) > $@++ && \
-	$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
-	rm $@++ && \
-	mv $@+ $@
-
-perfman.info: perfman.texi
-	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
-
-$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
-	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
-	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
-	mv $@+ $@
-
-howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
-	$(QUIET_GEN)$(RM) $@+ $@ && \
-	'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
-	mv $@+ $@
-
-$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
-	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
-
-WEBDOC_DEST = /pub/software/tools/perf/docs
-
-$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
-	mv $@+ $@
-
-install-webdoc : html
-	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
-
-quick-install: quick-install-man
-
-quick-install-man:
-	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
-
-quick-install-html:
-	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
-
-.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/Documentation/perf_counter/Documentation/asciidoc.conf b/Documentation/perf_counter/Documentation/asciidoc.conf
deleted file mode 100644
index 356b23a..0000000
--- a/Documentation/perf_counter/Documentation/asciidoc.conf
+++ /dev/null
@@ -1,91 +0,0 @@
-## linkperf: macro
-#
-# Usage: linkperf:command[manpage-section]
-#
-# Note, {0} is the manpage section, while {target} is the command.
-#
-# Show PERF link as: <command>(<section>); if section is defined, else just show
-# the command.
-
-[macros]
-(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
-
-[attributes]
-asterisk=&#42;
-plus=&#43;
-caret=&#94;
-startsb=&#91;
-endsb=&#93;
-tilde=&#126;
-
-ifdef::backend-docbook[]
-[linkperf-inlinemacro]
-{0%{target}}
-{0#<citerefentry>}
-{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
-{0#</citerefentry>}
-endif::backend-docbook[]
-
-ifdef::backend-docbook[]
-ifndef::perf-asciidoc-no-roff[]
-# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
-# v1.72 breaks with this because it replaces dots not in roff requests.
-[listingblock]
-<example><title>{title}</title>
-<literallayout>
-ifdef::doctype-manpage[]
-&#10;.ft C&#10;
-endif::doctype-manpage[]
-|
-ifdef::doctype-manpage[]
-&#10;.ft&#10;
-endif::doctype-manpage[]
-</literallayout>
-{title#}</example>
-endif::perf-asciidoc-no-roff[]
-
-ifdef::perf-asciidoc-no-roff[]
-ifdef::doctype-manpage[]
-# The following two small workarounds insert a simple paragraph after screen
-[listingblock]
-<example><title>{title}</title>
-<literallayout>
-|
-</literallayout><simpara></simpara>
-{title#}</example>
-
-[verseblock]
-<formalpara{id? id="{id}"}><title>{title}</title><para>
-{title%}<literallayout{id? id="{id}"}>
-{title#}<literallayout>
-|
-</literallayout>
-{title#}</para></formalpara>
-{title%}<simpara></simpara>
-endif::doctype-manpage[]
-endif::perf-asciidoc-no-roff[]
-endif::backend-docbook[]
-
-ifdef::doctype-manpage[]
-ifdef::backend-docbook[]
-[header]
-template::[header-declarations]
-<refentry>
-<refmeta>
-<refentrytitle>{mantitle}</refentrytitle>
-<manvolnum>{manvolnum}</manvolnum>
-<refmiscinfo class="source">perf</refmiscinfo>
-<refmiscinfo class="version">{perf_version}</refmiscinfo>
-<refmiscinfo class="manual">perf Manual</refmiscinfo>
-</refmeta>
-<refnamediv>
-  <refname>{manname}</refname>
-  <refpurpose>{manpurpose}</refpurpose>
-</refnamediv>
-endif::backend-docbook[]
-endif::doctype-manpage[]
-
-ifdef::backend-xhtml11[]
-[linkperf-inlinemacro]
-<a href="{target}.html">{target}{0?({0})}</a>
-endif::backend-xhtml11[]
diff --git a/Documentation/perf_counter/Documentation/manpage-1.72.xsl b/Documentation/perf_counter/Documentation/manpage-1.72.xsl
deleted file mode 100644
index b4d315c..0000000
--- a/Documentation/perf_counter/Documentation/manpage-1.72.xsl
+++ /dev/null
@@ -1,14 +0,0 @@
-<!-- manpage-1.72.xsl:
-     special settings for manpages rendered from asciidoc+docbook
-     handles peculiarities in docbook-xsl 1.72.0 -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<xsl:import href="manpage-base.xsl"/>
-
-<!-- these are the special values for the roff control characters
-     needed for docbook-xsl 1.72.0 -->
-<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
-<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
-
-</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-base.xsl b/Documentation/perf_counter/Documentation/manpage-base.xsl
deleted file mode 100644
index a264fa61..0000000
--- a/Documentation/perf_counter/Documentation/manpage-base.xsl
+++ /dev/null
@@ -1,35 +0,0 @@
-<!-- manpage-base.xsl:
-     special formatting for manpages rendered from asciidoc+docbook -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<!-- these params silence some output from xmlto -->
-<xsl:param name="man.output.quietly" select="1"/>
-<xsl:param name="refentry.meta.get.quietly" select="1"/>
-
-<!-- convert asciidoc callouts to man page format;
-     git.docbook.backslash and git.docbook.dot params
-     must be supplied by another XSL file or other means -->
-<xsl:template match="co">
-	<xsl:value-of select="concat(
-			      $git.docbook.backslash,'fB(',
-			      substring-after(@id,'-'),')',
-			      $git.docbook.backslash,'fR')"/>
-</xsl:template>
-<xsl:template match="calloutlist">
-	<xsl:value-of select="$git.docbook.dot"/>
-	<xsl:text>sp&#10;</xsl:text>
-	<xsl:apply-templates/>
-	<xsl:text>&#10;</xsl:text>
-</xsl:template>
-<xsl:template match="callout">
-	<xsl:value-of select="concat(
-			      $git.docbook.backslash,'fB',
-			      substring-after(@arearefs,'-'),
-			      '. ',$git.docbook.backslash,'fR')"/>
-	<xsl:apply-templates/>
-	<xsl:value-of select="$git.docbook.dot"/>
-	<xsl:text>br&#10;</xsl:text>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-bold-literal.xsl b/Documentation/perf_counter/Documentation/manpage-bold-literal.xsl
deleted file mode 100644
index 608eb5d..0000000
--- a/Documentation/perf_counter/Documentation/manpage-bold-literal.xsl
+++ /dev/null
@@ -1,17 +0,0 @@
-<!-- manpage-bold-literal.xsl:
-     special formatting for manpages rendered from asciidoc+docbook -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<!-- render literal text as bold (instead of plain or monospace);
-     this makes literal text easier to distinguish in manpages
-     viewed on a tty -->
-<xsl:template match="literal">
-	<xsl:value-of select="$git.docbook.backslash"/>
-	<xsl:text>fB</xsl:text>
-	<xsl:apply-templates/>
-	<xsl:value-of select="$git.docbook.backslash"/>
-	<xsl:text>fR</xsl:text>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-normal.xsl b/Documentation/perf_counter/Documentation/manpage-normal.xsl
deleted file mode 100644
index a48f5b1..0000000
--- a/Documentation/perf_counter/Documentation/manpage-normal.xsl
+++ /dev/null
@@ -1,13 +0,0 @@
-<!-- manpage-normal.xsl:
-     special settings for manpages rendered from asciidoc+docbook
-     handles anything we want to keep away from docbook-xsl 1.72.0 -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<xsl:import href="manpage-base.xsl"/>
-
-<!-- these are the normal values for the roff control characters -->
-<xsl:param name="git.docbook.backslash">\</xsl:param>
-<xsl:param name="git.docbook.dot"	>.</xsl:param>
-
-</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/manpage-suppress-sp.xsl b/Documentation/perf_counter/Documentation/manpage-suppress-sp.xsl
deleted file mode 100644
index a63c763..0000000
--- a/Documentation/perf_counter/Documentation/manpage-suppress-sp.xsl
+++ /dev/null
@@ -1,21 +0,0 @@
-<!-- manpage-suppress-sp.xsl:
-     special settings for manpages rendered from asciidoc+docbook
-     handles erroneous, inline .sp in manpage output of some
-     versions of docbook-xsl -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<!-- attempt to work around spurious .sp at the tail of the line
-     that some versions of docbook stylesheets seem to add -->
-<xsl:template match="simpara">
-  <xsl:variable name="content">
-    <xsl:apply-templates/>
-  </xsl:variable>
-  <xsl:value-of select="normalize-space($content)"/>
-  <xsl:if test="not(ancestor::authorblurb) and
-                not(ancestor::personblurb)">
-    <xsl:text>&#10;&#10;</xsl:text>
-  </xsl:if>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/Documentation/perf_counter/Documentation/perf-annotate.txt b/Documentation/perf_counter/Documentation/perf-annotate.txt
deleted file mode 100644
index c9dcade..0000000
--- a/Documentation/perf_counter/Documentation/perf-annotate.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-perf-annotate(1)
-==============
-
-NAME
-----
-perf-annotate - Read perf.data (created by perf record) and display annotated code
-
-SYNOPSIS
---------
-[verse]
-'perf annotate' [-i <file> | --input=file] symbol_name
-
-DESCRIPTION
------------
-This command reads the input file and displays an annotated version of the
-code. If the object file has debug symbols then the source code will be
-displayed alongside assembly code.
-
-If there is no debug info in the object, then annotated assembly is displayed.
-
-OPTIONS
--------
--i::
---input=::
-        Input file name. (default: perf.data)
-
-SEE ALSO
---------
-linkperf:perf-record[1]
diff --git a/Documentation/perf_counter/Documentation/perf-help.txt b/Documentation/perf_counter/Documentation/perf-help.txt
deleted file mode 100644
index 5143918..0000000
--- a/Documentation/perf_counter/Documentation/perf-help.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-perf-help(1)
-============
-
-NAME
-----
-perf-help - display help information about perf
-
-SYNOPSIS
---------
-'perf help' [-a|--all] [COMMAND]
-
-DESCRIPTION
------------
-
-With no options and no COMMAND given, the synopsis of the 'perf'
-command and a list of the most commonly used perf commands are printed
-on the standard output.
-
-If the option '--all' or '-a' is given, then all available commands are
-printed on the standard output.
-
-If a perf command is named, a manual page for that command is brought
-up. The 'man' program is used by default for this purpose, but this
-can be overridden by other options or configuration variables.
-
-Note that `perf --help ...` is identical to `perf help ...` because the
-former is internally converted into the latter.
-
-OPTIONS
--------
--a::
---all::
-	Prints all the available commands on the standard output. This
-	option supersedes any other option.
-
-PERF
-----
-Part of the linkperf:perf[1] suite
diff --git a/Documentation/perf_counter/Documentation/perf-list.txt b/Documentation/perf_counter/Documentation/perf-list.txt
deleted file mode 100644
index 8290b94..0000000
--- a/Documentation/perf_counter/Documentation/perf-list.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-perf-list(1)
-============
-
-NAME
-----
-perf-list - List all symbolic event types
-
-SYNOPSIS
---------
-[verse]
-'perf list'
-
-DESCRIPTION
------------
-This command displays the symbolic event types which can be selected in the
-various perf commands with the -e option.
-
-OPTIONS
--------
-None
-
-SEE ALSO
---------
-linkperf:perf-stat[1], linkperf:perf-top[1],
-linkperf:perf-record[1]
diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt
deleted file mode 100644
index 1dbc1ee..0000000
--- a/Documentation/perf_counter/Documentation/perf-record.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-perf-record(1)
-==============
-
-NAME
-----
-perf-record - Run a command and record its profile into perf.data
-
-SYNOPSIS
---------
-[verse]
-'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
-'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
-
-DESCRIPTION
------------
-This command runs a command and gathers a performance counter profile
-from it, into perf.data - without displaying anything.
-
-This file can then be inspected later on, using 'perf report'.
-
-
-OPTIONS
--------
-<command>...::
-	Any command you can specify in a shell.
-
--e::
---event=::
-	Select the PMU event. Selection can be a symbolic event name
-	(use 'perf list' to list all events) or a raw PMU
-	event (eventsel+umask) in the form of rNNN where NNN is a
-	 hexadecimal event descriptor.
-
--a::
-        system-wide collection
-
--l::
-        scale counter values
-
-SEE ALSO
---------
-linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Documentation/perf-report.txt b/Documentation/perf_counter/Documentation/perf-report.txt
deleted file mode 100644
index 52d3fc6..0000000
--- a/Documentation/perf_counter/Documentation/perf-report.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-perf-report(1)
-==============
-
-NAME
-----
-perf-report - Read perf.data (created by perf record) and display the profile
-
-SYNOPSIS
---------
-[verse]
-'perf report' [-i <file> | --input=file]
-
-DESCRIPTION
------------
-This command displays the performance counter profile information recorded
-via perf report.
-
-OPTIONS
--------
--i::
---input=::
-        Input file name. (default: perf.data)
-
-SEE ALSO
---------
-linkperf:perf-stat[1]
diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt
deleted file mode 100644
index c368a72..0000000
--- a/Documentation/perf_counter/Documentation/perf-stat.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-perf-stat(1)
-============
-
-NAME
-----
-perf-stat - Run a command and gather performance counter statistics
-
-SYNOPSIS
---------
-[verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
-
-DESCRIPTION
------------
-This command runs a command and gathers performance counter statistics
-from it.
-
-
-OPTIONS
--------
-<command>...::
-	Any command you can specify in a shell.
-
-
--e::
---event=::
-	Select the PMU event. Selection can be a symbolic event name
-	(use 'perf list' to list all events) or a raw PMU
-	event (eventsel+umask) in the form of rNNN where NNN is a
-	 hexadecimal event descriptor.
-
--i::
---inherit::
-        child tasks inherit counters
--p::
---pid=<pid>::
-        stat events on existing pid
-
--a::
-        system-wide collection
-
--l::
-        scale counter values
-
-EXAMPLES
---------
-
-$ perf stat -- make -j
-
- Performance counter stats for 'make -j':
-
-    8117.370256  task clock ticks     #      11.281 CPU utilization factor
-            678  context switches     #       0.000 M/sec
-            133  CPU migrations       #       0.000 M/sec
-         235724  pagefaults           #       0.029 M/sec
-    24821162526  CPU cycles           #    3057.784 M/sec
-    18687303457  instructions         #    2302.138 M/sec
-      172158895  cache references     #      21.209 M/sec
-       27075259  cache misses         #       3.335 M/sec
-
- Wall-clock time elapsed:   719.554352 msecs
-
-SEE ALSO
---------
-linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt
deleted file mode 100644
index 539d012..0000000
--- a/Documentation/perf_counter/Documentation/perf-top.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-perf-top(1)
-===========
-
-NAME
-----
-perf-top - Run a command and profile it
-
-SYNOPSIS
---------
-[verse]
-'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
-
-DESCRIPTION
------------
-This command runs a command and gathers a performance counter profile
-from it.
-
-
-OPTIONS
--------
-<command>...::
-	Any command you can specify in a shell.
-
--e::
---event=::
-	Select the PMU event. Selection can be a symbolic event name
-	(use 'perf list' to list all events) or a raw PMU
-	event (eventsel+umask) in the form of rNNN where NNN is a
-	 hexadecimal event descriptor.
-
--a::
-        system-wide collection
-
--l::
-        scale counter values
-
-SEE ALSO
---------
-linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Documentation/perf.txt b/Documentation/perf_counter/Documentation/perf.txt
deleted file mode 100644
index 69c8325..0000000
--- a/Documentation/perf_counter/Documentation/perf.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-perf(1)
-=======
-
-NAME
-----
-perf - Performance analysis tools for Linux
-
-SYNOPSIS
---------
-[verse]
-'perf' [--version] [--help] COMMAND [ARGS]
-
-DESCRIPTION
------------
-Performance counters for Linux are are a new kernel-based subsystem
-that provide a framework for all things performance analysis. It
-covers hardware level (CPU/PMU, Performance Monitoring Unit) features
-and software features (software counters, tracepoints) as well.
-
-SEE ALSO
---------
-linkperf:perf-stat[1], linkperf:perf-top[1],
-linkperf:perf-record[1], linkperf:perf-report[1],
-linkperf:perf-list[1]
diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile
deleted file mode 100644
index 0cbd5d6..0000000
--- a/Documentation/perf_counter/Makefile
+++ /dev/null
@@ -1,929 +0,0 @@
-# The default target of this Makefile is...
-all::
-
-# Define V=1 to have a more verbose compile.
-#
-# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
-# or vsnprintf() return -1 instead of number of characters which would
-# have been written to the final string if enough space had been available.
-#
-# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
-# when attempting to read from an fopen'ed directory.
-#
-# Define NO_OPENSSL environment variable if you do not have OpenSSL.
-# This also implies MOZILLA_SHA1.
-#
-# Define CURLDIR=/foo/bar if your curl header and library files are in
-# /foo/bar/include and /foo/bar/lib directories.
-#
-# Define EXPATDIR=/foo/bar if your expat header and library files are in
-# /foo/bar/include and /foo/bar/lib directories.
-#
-# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
-#
-# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
-# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
-#
-# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
-# do not support the 'size specifiers' introduced by C99, namely ll, hh,
-# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
-# some C compilers supported these specifiers prior to C99 as an extension.
-#
-# Define NO_STRCASESTR if you don't have strcasestr.
-#
-# Define NO_MEMMEM if you don't have memmem.
-#
-# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
-# If your compiler also does not support long long or does not have
-# strtoull, define NO_STRTOULL.
-#
-# Define NO_SETENV if you don't have setenv in the C library.
-#
-# Define NO_UNSETENV if you don't have unsetenv in the C library.
-#
-# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
-#
-# Define NO_SYS_SELECT_H if you don't have sys/select.h.
-#
-# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
-# Enable it on Windows.  By default, symrefs are still used.
-#
-# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
-# tests.  These tests take up a significant amount of the total test time
-# but are not needed unless you plan to talk to SVN repos.
-#
-# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
-# installed in /sw, but don't want PERF to link against any libraries
-# installed there.  If defined you may specify your own (or Fink's)
-# include directories and library directories by defining CFLAGS
-# and LDFLAGS appropriately.
-#
-# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
-# have DarwinPorts installed in /opt/local, but don't want PERF to
-# link against any libraries installed there.  If defined you may
-# specify your own (or DarwinPort's) include directories and
-# library directories by defining CFLAGS and LDFLAGS appropriately.
-#
-# Define PPC_SHA1 environment variable when running make to make use of
-# a bundled SHA1 routine optimized for PowerPC.
-#
-# Define ARM_SHA1 environment variable when running make to make use of
-# a bundled SHA1 routine optimized for ARM.
-#
-# Define MOZILLA_SHA1 environment variable when running make to make use of
-# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
-# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
-# choice) has very fast version optimized for i586.
-#
-# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
-#
-# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
-#
-# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
-# Patrick Mauritz).
-#
-# Define NO_MMAP if you want to avoid mmap.
-#
-# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
-#
-# Define NO_PREAD if you have a problem with pread() system call (e.g.
-# cygwin.dll before v1.5.22).
-#
-# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
-# generally faster on your platform than accessing the working directory.
-#
-# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
-# the executable mode bit, but doesn't really do so.
-#
-# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
-#
-# Define NO_SOCKADDR_STORAGE if your platform does not have struct
-# sockaddr_storage.
-#
-# Define NO_ICONV if your libc does not properly support iconv.
-#
-# Define OLD_ICONV if your library has an old iconv(), where the second
-# (input buffer pointer) parameter is declared with type (const char **).
-#
-# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
-#
-# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
-# that tells runtime paths to dynamic libraries;
-# "-Wl,-rpath=/path/lib" is used instead.
-#
-# Define USE_NSEC below if you want perf to care about sub-second file mtimes
-# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
-# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
-# randomly break unless your underlying filesystem supports those sub-second
-# times (my ext3 doesn't).
-#
-# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
-# "st_ctim"
-#
-# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
-# available.  This automatically turns USE_NSEC off.
-#
-# Define USE_STDEV below if you want perf to care about the underlying device
-# change being considered an inode change from the update-index perspective.
-#
-# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
-# field that counts the on-disk footprint in 512-byte blocks.
-#
-# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
-#
-# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
-#
-# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
-# MakeMaker (e.g. using ActiveState under Cygwin).
-#
-# Define NO_PERL if you do not want Perl scripts or libraries at all.
-#
-# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
-# is a simplified version of the merge sort used in glibc. This is
-# recommended if Git triggers O(n^2) behavior in your platform's qsort().
-#
-# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
-# your external grep (e.g., if your system lacks grep, if its grep is
-# broken, or spawning external process is slower than built-in grep perf has).
-
-PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
-	@$(SHELL_PATH) util/PERF-VERSION-GEN
--include PERF-VERSION-FILE
-
-uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
-uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
-uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
-uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
-uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
-uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
-
-# CFLAGS and LDFLAGS are for the users to override from the command line.
-
-CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
-LDFLAGS = -lpthread -lrt -lelf
-ALL_CFLAGS = $(CFLAGS)
-ALL_LDFLAGS = $(LDFLAGS)
-STRIP ?= strip
-
-# Among the variables below, these:
-#   perfexecdir
-#   template_dir
-#   mandir
-#   infodir
-#   htmldir
-#   ETC_PERFCONFIG (but not sysconfdir)
-# can be specified as a relative path some/where/else;
-# this is interpreted as relative to $(prefix) and "perf" at
-# runtime figures out where they are based on the path to the executable.
-# This can help installing the suite in a relocatable way.
-
-prefix = $(HOME)
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
-mandir = share/man
-infodir = share/info
-perfexecdir = libexec/perf-core
-sharedir = $(prefix)/share
-template_dir = share/perf-core/templates
-htmldir = share/doc/perf-doc
-ifeq ($(prefix),/usr)
-sysconfdir = /etc
-ETC_PERFCONFIG = $(sysconfdir)/perfconfig
-else
-sysconfdir = $(prefix)/etc
-ETC_PERFCONFIG = etc/perfconfig
-endif
-lib = lib
-# DESTDIR=
-
-export prefix bindir sharedir sysconfdir
-
-CC = gcc
-AR = ar
-RM = rm -f
-TAR = tar
-FIND = find
-INSTALL = install
-RPMBUILD = rpmbuild
-PTHREAD_LIBS = -lpthread
-
-# sparse is architecture-neutral, which means that we need to tell it
-# explicitly what architecture to check for. Fix this up for yours..
-SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
-
-
-
-### --- END CONFIGURATION SECTION ---
-
-# Those must not be GNU-specific; they are shared with perl/ which may
-# be built by a different compiler. (Note that this is an artifact now
-# but it still might be nice to keep that distinction.)
-BASIC_CFLAGS =
-BASIC_LDFLAGS =
-
-# Guard against environment variables
-BUILTIN_OBJS =
-BUILT_INS =
-COMPAT_CFLAGS =
-COMPAT_OBJS =
-LIB_H =
-LIB_OBJS =
-SCRIPT_PERL =
-SCRIPT_SH =
-TEST_PROGRAMS =
-
-#
-# No scripts right now:
-#
-
-# SCRIPT_SH += perf-am.sh
-
-#
-# No Perl scripts right now:
-#
-
-# SCRIPT_PERL += perf-add--interactive.perl
-
-SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
-	  $(patsubst %.perl,%,$(SCRIPT_PERL))
-
-# Empty...
-EXTRA_PROGRAMS =
-
-# ... and all the rest that could be moved out of bindir to perfexecdir
-PROGRAMS += $(EXTRA_PROGRAMS)
-
-#
-# Single 'perf' binary right now:
-#
-PROGRAMS += perf
-
-# List built-in command $C whose implementation cmd_$C() is not in
-# builtin-$C.o but is linked in as part of some other command.
-#
-# None right now:
-#
-# BUILT_INS += perf-init $X
-
-# what 'all' will build and 'install' will install, in perfexecdir
-ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
-
-# what 'all' will build but not install in perfexecdir
-OTHER_PROGRAMS = perf$X
-
-# Set paths to tools early so that they can be used for version tests.
-ifndef SHELL_PATH
-	SHELL_PATH = /bin/sh
-endif
-ifndef PERL_PATH
-	PERL_PATH = /usr/bin/perl
-endif
-
-export PERL_PATH
-
-LIB_FILE=libperf.a
-
-LIB_H += ../../include/linux/perf_counter.h
-LIB_H += perf.h
-LIB_H += util/list.h
-LIB_H += util/rbtree.h
-LIB_H += util/levenshtein.h
-LIB_H += util/parse-options.h
-LIB_H += util/parse-events.h
-LIB_H += util/quote.h
-LIB_H += util/util.h
-LIB_H += util/help.h
-LIB_H += util/strbuf.h
-LIB_H += util/string.h
-LIB_H += util/run-command.h
-LIB_H += util/sigchain.h
-LIB_H += util/symbol.h
-LIB_H += util/color.h
-
-LIB_OBJS += util/abspath.o
-LIB_OBJS += util/alias.o
-LIB_OBJS += util/config.o
-LIB_OBJS += util/ctype.o
-LIB_OBJS += util/environment.o
-LIB_OBJS += util/exec_cmd.o
-LIB_OBJS += util/help.o
-LIB_OBJS += util/levenshtein.o
-LIB_OBJS += util/parse-options.o
-LIB_OBJS += util/parse-events.o
-LIB_OBJS += util/path.o
-LIB_OBJS += util/rbtree.o
-LIB_OBJS += util/run-command.o
-LIB_OBJS += util/quote.o
-LIB_OBJS += util/strbuf.o
-LIB_OBJS += util/string.o
-LIB_OBJS += util/usage.o
-LIB_OBJS += util/wrapper.o
-LIB_OBJS += util/sigchain.o
-LIB_OBJS += util/symbol.o
-LIB_OBJS += util/color.o
-LIB_OBJS += util/pager.o
-
-BUILTIN_OBJS += builtin-annotate.o
-BUILTIN_OBJS += builtin-help.o
-BUILTIN_OBJS += builtin-list.o
-BUILTIN_OBJS += builtin-record.o
-BUILTIN_OBJS += builtin-report.o
-BUILTIN_OBJS += builtin-stat.o
-BUILTIN_OBJS += builtin-top.o
-
-PERFLIBS = $(LIB_FILE)
-EXTLIBS =
-
-#
-# Platform specific tweaks
-#
-
-# We choose to avoid "if .. else if .. else .. endif endif"
-# because maintaining the nesting to match is a pain.  If
-# we had "elif" things would have been much nicer...
-
--include config.mak.autogen
--include config.mak
-
-ifeq ($(uname_S),Darwin)
-	ifndef NO_FINK
-		ifeq ($(shell test -d /sw/lib && echo y),y)
-			BASIC_CFLAGS += -I/sw/include
-			BASIC_LDFLAGS += -L/sw/lib
-		endif
-	endif
-	ifndef NO_DARWIN_PORTS
-		ifeq ($(shell test -d /opt/local/lib && echo y),y)
-			BASIC_CFLAGS += -I/opt/local/include
-			BASIC_LDFLAGS += -L/opt/local/lib
-		endif
-	endif
-	PTHREAD_LIBS =
-endif
-
-ifndef CC_LD_DYNPATH
-	ifdef NO_R_TO_GCC_LINKER
-		# Some gcc does not accept and pass -R to the linker to specify
-		# the runtime dynamic library path.
-		CC_LD_DYNPATH = -Wl,-rpath,
-	else
-		CC_LD_DYNPATH = -R
-	endif
-endif
-
-ifdef ZLIB_PATH
-	BASIC_CFLAGS += -I$(ZLIB_PATH)/include
-	EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib)
-endif
-EXTLIBS += -lz
-
-ifdef NEEDS_SOCKET
-	EXTLIBS += -lsocket
-endif
-ifdef NEEDS_NSL
-	EXTLIBS += -lnsl
-endif
-ifdef NO_D_TYPE_IN_DIRENT
-	BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
-endif
-ifdef NO_D_INO_IN_DIRENT
-	BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
-endif
-ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
-	BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
-endif
-ifdef USE_NSEC
-	BASIC_CFLAGS += -DUSE_NSEC
-endif
-ifdef USE_ST_TIMESPEC
-	BASIC_CFLAGS += -DUSE_ST_TIMESPEC
-endif
-ifdef NO_NSEC
-	BASIC_CFLAGS += -DNO_NSEC
-endif
-ifdef NO_C99_FORMAT
-	BASIC_CFLAGS += -DNO_C99_FORMAT
-endif
-ifdef SNPRINTF_RETURNS_BOGUS
-	COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
-	COMPAT_OBJS += compat/snprintf.o
-endif
-ifdef FREAD_READS_DIRECTORIES
-	COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
-	COMPAT_OBJS += compat/fopen.o
-endif
-ifdef NO_SYMLINK_HEAD
-	BASIC_CFLAGS += -DNO_SYMLINK_HEAD
-endif
-ifdef NO_STRCASESTR
-	COMPAT_CFLAGS += -DNO_STRCASESTR
-	COMPAT_OBJS += compat/strcasestr.o
-endif
-ifdef NO_STRTOUMAX
-	COMPAT_CFLAGS += -DNO_STRTOUMAX
-	COMPAT_OBJS += compat/strtoumax.o
-endif
-ifdef NO_STRTOULL
-	COMPAT_CFLAGS += -DNO_STRTOULL
-endif
-ifdef NO_SETENV
-	COMPAT_CFLAGS += -DNO_SETENV
-	COMPAT_OBJS += compat/setenv.o
-endif
-ifdef NO_MKDTEMP
-	COMPAT_CFLAGS += -DNO_MKDTEMP
-	COMPAT_OBJS += compat/mkdtemp.o
-endif
-ifdef NO_UNSETENV
-	COMPAT_CFLAGS += -DNO_UNSETENV
-	COMPAT_OBJS += compat/unsetenv.o
-endif
-ifdef NO_SYS_SELECT_H
-	BASIC_CFLAGS += -DNO_SYS_SELECT_H
-endif
-ifdef NO_MMAP
-	COMPAT_CFLAGS += -DNO_MMAP
-	COMPAT_OBJS += compat/mmap.o
-else
-	ifdef USE_WIN32_MMAP
-		COMPAT_CFLAGS += -DUSE_WIN32_MMAP
-		COMPAT_OBJS += compat/win32mmap.o
-	endif
-endif
-ifdef NO_PREAD
-	COMPAT_CFLAGS += -DNO_PREAD
-	COMPAT_OBJS += compat/pread.o
-endif
-ifdef NO_FAST_WORKING_DIRECTORY
-	BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
-endif
-ifdef NO_TRUSTABLE_FILEMODE
-	BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
-endif
-ifdef NO_IPV6
-	BASIC_CFLAGS += -DNO_IPV6
-endif
-ifdef NO_UINTMAX_T
-	BASIC_CFLAGS += -Duintmax_t=uint32_t
-endif
-ifdef NO_SOCKADDR_STORAGE
-ifdef NO_IPV6
-	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
-else
-	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
-endif
-endif
-ifdef NO_INET_NTOP
-	LIB_OBJS += compat/inet_ntop.o
-endif
-ifdef NO_INET_PTON
-	LIB_OBJS += compat/inet_pton.o
-endif
-
-ifdef NO_ICONV
-	BASIC_CFLAGS += -DNO_ICONV
-endif
-
-ifdef OLD_ICONV
-	BASIC_CFLAGS += -DOLD_ICONV
-endif
-
-ifdef NO_DEFLATE_BOUND
-	BASIC_CFLAGS += -DNO_DEFLATE_BOUND
-endif
-
-ifdef PPC_SHA1
-	SHA1_HEADER = "ppc/sha1.h"
-	LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
-else
-ifdef ARM_SHA1
-	SHA1_HEADER = "arm/sha1.h"
-	LIB_OBJS += arm/sha1.o arm/sha1_arm.o
-else
-ifdef MOZILLA_SHA1
-	SHA1_HEADER = "mozilla-sha1/sha1.h"
-	LIB_OBJS += mozilla-sha1/sha1.o
-else
-	SHA1_HEADER = <openssl/sha.h>
-	EXTLIBS += $(LIB_4_CRYPTO)
-endif
-endif
-endif
-ifdef NO_PERL_MAKEMAKER
-	export NO_PERL_MAKEMAKER
-endif
-ifdef NO_HSTRERROR
-	COMPAT_CFLAGS += -DNO_HSTRERROR
-	COMPAT_OBJS += compat/hstrerror.o
-endif
-ifdef NO_MEMMEM
-	COMPAT_CFLAGS += -DNO_MEMMEM
-	COMPAT_OBJS += compat/memmem.o
-endif
-ifdef INTERNAL_QSORT
-	COMPAT_CFLAGS += -DINTERNAL_QSORT
-	COMPAT_OBJS += compat/qsort.o
-endif
-ifdef RUNTIME_PREFIX
-	COMPAT_CFLAGS += -DRUNTIME_PREFIX
-endif
-
-ifdef DIR_HAS_BSD_GROUP_SEMANTICS
-	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
-endif
-ifdef NO_EXTERNAL_GREP
-	BASIC_CFLAGS += -DNO_EXTERNAL_GREP
-endif
-
-ifeq ($(PERL_PATH),)
-NO_PERL=NoThanks
-endif
-
-QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
-QUIET_SUBDIR1  =
-
-ifneq ($(findstring $(MAKEFLAGS),w),w)
-PRINT_DIR = --no-print-directory
-else # "make -w"
-NO_SUBDIR = :
-endif
-
-ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifndef V
-	QUIET_CC       = @echo '   ' CC $@;
-	QUIET_AR       = @echo '   ' AR $@;
-	QUIET_LINK     = @echo '   ' LINK $@;
-	QUIET_BUILT_IN = @echo '   ' BUILTIN $@;
-	QUIET_GEN      = @echo '   ' GEN $@;
-	QUIET_SUBDIR0  = +@subdir=
-	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
-			 $(MAKE) $(PRINT_DIR) -C $$subdir
-	export V
-	export QUIET_GEN
-	export QUIET_BUILT_IN
-endif
-endif
-
-ifdef ASCIIDOC8
-	export ASCIIDOC8
-endif
-
-# Shell quote (do not use $(call) to accommodate ancient setups);
-
-SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
-ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
-
-DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
-bindir_SQ = $(subst ','\'',$(bindir))
-bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
-mandir_SQ = $(subst ','\'',$(mandir))
-infodir_SQ = $(subst ','\'',$(infodir))
-perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
-template_dir_SQ = $(subst ','\'',$(template_dir))
-htmldir_SQ = $(subst ','\'',$(htmldir))
-prefix_SQ = $(subst ','\'',$(prefix))
-
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
-
-LIBS = $(PERFLIBS) $(EXTLIBS)
-
-BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
-	$(COMPAT_CFLAGS)
-LIB_OBJS += $(COMPAT_OBJS)
-
-ALL_CFLAGS += $(BASIC_CFLAGS)
-ALL_LDFLAGS += $(BASIC_LDFLAGS)
-
-export TAR INSTALL DESTDIR SHELL_PATH
-
-
-### Build rules
-
-SHELL = $(SHELL_PATH)
-
-all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS
-ifneq (,$X)
-	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
-endif
-
-all::
-
-please_set_SHELL_PATH_to_a_more_modern_shell:
-	@$$(:)
-
-shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
-
-strip: $(PROGRAMS) perf$X
-	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X
-
-perf.o: perf.c common-cmds.h PERF-CFLAGS
-	$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		$(ALL_CFLAGS) -c $(filter %.c,$^)
-
-perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \
-		$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
-
-builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
-		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(BUILT_INS): perf$X
-	$(QUIET_BUILT_IN)$(RM) $@ && \
-	ln perf$X $@ 2>/dev/null || \
-	ln -s perf$X $@ 2>/dev/null || \
-	cp perf$X $@
-
-common-cmds.h: util/generate-cmdlist.sh command-list.txt
-
-common-cmds.h: $(wildcard Documentation/perf-*.txt)
-	$(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@
-
-$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
-	$(QUIET_GEN)$(RM) $@ $@+ && \
-	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
-	    -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
-	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
-	    -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
-	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
-	    $@.sh >$@+ && \
-	chmod +x $@+ && \
-	mv $@+ $@
-
-configure: configure.ac
-	$(QUIET_GEN)$(RM) $@ $<+ && \
-	sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
-	    $< > $<+ && \
-	autoconf -o $@ $<+ && \
-	$(RM) $<+
-
-# These can record PERF_VERSION
-perf.o perf.spec \
-	$(patsubst %.sh,%,$(SCRIPT_SH)) \
-	$(patsubst %.perl,%,$(SCRIPT_PERL)) \
-	: PERF-VERSION-FILE
-
-%.o: %.c PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
-%.s: %.c PERF-CFLAGS
-	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
-%.o: %.S
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
-
-util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
-		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
-		'-DBINDIR="$(bindir_relative_SQ)"' \
-		'-DPREFIX="$(prefix_SQ)"' \
-		$<
-
-builtin-init-db.o: builtin-init-db.c PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
-
-util/config.o: util/config.c PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-perf-%$X: %.o $(PERFLIBS)
-	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
-
-$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
-builtin-revert.o wt-status.o: wt-status.h
-
-$(LIB_FILE): $(LIB_OBJS)
-	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
-
-doc:
-	$(MAKE) -C Documentation all
-
-man:
-	$(MAKE) -C Documentation man
-
-html:
-	$(MAKE) -C Documentation html
-
-info:
-	$(MAKE) -C Documentation info
-
-pdf:
-	$(MAKE) -C Documentation pdf
-
-TAGS:
-	$(RM) TAGS
-	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
-
-tags:
-	$(RM) tags
-	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
-
-cscope:
-	$(RM) cscope*
-	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
-
-### Detect prefix changes
-TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
-             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
-
-PERF-CFLAGS: .FORCE-PERF-CFLAGS
-	@FLAGS='$(TRACK_CFLAGS)'; \
-	    if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \
-		echo 1>&2 "    * new build flags or prefix"; \
-		echo "$$FLAGS" >PERF-CFLAGS; \
-            fi
-
-# We need to apply sq twice, once to protect from the shell
-# that runs PERF-BUILD-OPTIONS, and then again to protect it
-# and the first level quoting from the shell that runs "echo".
-PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
-	@echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
-	@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
-	@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
-	@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
-
-### Testing rules
-
-#
-# None right now:
-#
-# TEST_PROGRAMS += test-something$X
-
-all:: $(TEST_PROGRAMS)
-
-# GNU make supports exporting all variables by "export" without parameters.
-# However, the environment gets quite big, and some programs have problems
-# with that.
-
-export NO_SVN_TESTS
-
-check: common-cmds.h
-	if sparse; \
-	then \
-		for i in *.c */*.c; \
-		do \
-			sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
-		done; \
-	else \
-		echo 2>&1 "Did you mean 'make test'?"; \
-		exit 1; \
-	fi
-
-remove-dashes:
-	./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
-
-### Installation rules
-
-ifneq ($(filter /%,$(firstword $(template_dir))),)
-template_instdir = $(template_dir)
-else
-template_instdir = $(prefix)/$(template_dir)
-endif
-export template_instdir
-
-ifneq ($(filter /%,$(firstword $(perfexecdir))),)
-perfexec_instdir = $(perfexecdir)
-else
-perfexec_instdir = $(prefix)/$(perfexecdir)
-endif
-perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
-export perfexec_instdir
-
-install: all
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
-	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
-ifdef BUILT_INS
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-	$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifneq (,$X)
-	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
-endif
-endif
-
-install-doc:
-	$(MAKE) -C Documentation install
-
-install-man:
-	$(MAKE) -C Documentation install-man
-
-install-html:
-	$(MAKE) -C Documentation install-html
-
-install-info:
-	$(MAKE) -C Documentation install-info
-
-install-pdf:
-	$(MAKE) -C Documentation install-pdf
-
-quick-install-doc:
-	$(MAKE) -C Documentation quick-install
-
-quick-install-man:
-	$(MAKE) -C Documentation quick-install-man
-
-quick-install-html:
-	$(MAKE) -C Documentation quick-install-html
-
-
-### Maintainer's dist rules
-#
-# None right now
-#
-#
-# perf.spec: perf.spec.in
-#	sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
-#	mv $@+ $@
-#
-# PERF_TARNAME=perf-$(PERF_VERSION)
-# dist: perf.spec perf-archive$(X) configure
-#	./perf-archive --format=tar \
-#		--prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
-#	@mkdir -p $(PERF_TARNAME)
-#	@cp perf.spec configure $(PERF_TARNAME)
-#	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
-#	$(TAR) rf $(PERF_TARNAME).tar \
-#		$(PERF_TARNAME)/perf.spec \
-#		$(PERF_TARNAME)/configure \
-#		$(PERF_TARNAME)/version
-#	@$(RM) -r $(PERF_TARNAME)
-#	gzip -f -9 $(PERF_TARNAME).tar
-#
-# htmldocs = perf-htmldocs-$(PERF_VERSION)
-# manpages = perf-manpages-$(PERF_VERSION)
-# dist-doc:
-#	$(RM) -r .doc-tmp-dir
-#	mkdir .doc-tmp-dir
-#	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
-#	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
-#	gzip -n -9 -f $(htmldocs).tar
-#	:
-#	$(RM) -r .doc-tmp-dir
-#	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
-#	$(MAKE) -C Documentation DESTDIR=./ \
-#		man1dir=../.doc-tmp-dir/man1 \
-#		man5dir=../.doc-tmp-dir/man5 \
-#		man7dir=../.doc-tmp-dir/man7 \
-#		install
-#	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
-#	gzip -n -9 -f $(manpages).tar
-#	$(RM) -r .doc-tmp-dir
-#
-# rpm: dist
-#	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
-
-### Cleaning rules
-
-distclean: clean
-#	$(RM) configure
-
-clean:
-	$(RM) *.o */*.o $(LIB_FILE)
-	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
-	$(RM) $(TEST_PROGRAMS)
-	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
-	$(RM) -r autom4te.cache
-	$(RM) config.log config.mak.autogen config.mak.append config.status config.cache
-	$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
-	$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
-	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
-	$(MAKE) -C Documentation/ clean
-	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
-
-.PHONY: all install clean strip
-.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
-.PHONY: .FORCE-PERF-BUILD-OPTIONS
-
-### Make sure built-ins do not have dups and listed in perf.c
-#
-check-builtins::
-	./check-builtins.sh
-
-### Test suite coverage testing
-#
-# None right now
-#
-# .PHONY: coverage coverage-clean coverage-build coverage-report
-#
-# coverage:
-#	$(MAKE) coverage-build
-#	$(MAKE) coverage-report
-#
-# coverage-clean:
-#	rm -f *.gcda *.gcno
-#
-# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
-# COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
-#
-# coverage-build: coverage-clean
-#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
-#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
-#		-j1 test
-#
-# coverage-report:
-#	gcov -b *.c */*.c
-#	grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
-#		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
-#		| tee coverage-untested-functions
diff --git a/Documentation/perf_counter/builtin-annotate.c b/Documentation/perf_counter/builtin-annotate.c
deleted file mode 100644
index 116a397..0000000
--- a/Documentation/perf_counter/builtin-annotate.c
+++ /dev/null
@@ -1,1355 +0,0 @@
-/*
- * builtin-annotate.c
- *
- * Builtin annotate command: Analyze the perf.data input file,
- * look up and read DSOs and symbol information and display
- * a histogram of results, along various sorting keys.
- */
-#include "builtin.h"
-
-#include "util/util.h"
-
-#include "util/color.h"
-#include "util/list.h"
-#include "util/cache.h"
-#include "util/rbtree.h"
-#include "util/symbol.h"
-#include "util/string.h"
-
-#include "perf.h"
-
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-
-#define SHOW_KERNEL	1
-#define SHOW_USER	2
-#define SHOW_HV		4
-
-static char		const *input_name = "perf.data";
-static char		*vmlinux = NULL;
-
-static char		default_sort_order[] = "comm,symbol";
-static char		*sort_order = default_sort_order;
-
-static int		input;
-static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
-
-static int		dump_trace = 0;
-#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
-
-static int		verbose;
-
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
-
-struct ip_event {
-	struct perf_event_header header;
-	__u64 ip;
-	__u32 pid, tid;
-};
-
-struct mmap_event {
-	struct perf_event_header header;
-	__u32 pid, tid;
-	__u64 start;
-	__u64 len;
-	__u64 pgoff;
-	char filename[PATH_MAX];
-};
-
-struct comm_event {
-	struct perf_event_header header;
-	__u32 pid, tid;
-	char comm[16];
-};
-
-struct fork_event {
-	struct perf_event_header header;
-	__u32 pid, ppid;
-};
-
-struct period_event {
-	struct perf_event_header header;
-	__u64 time;
-	__u64 id;
-	__u64 sample_period;
-};
-
-typedef union event_union {
-	struct perf_event_header	header;
-	struct ip_event			ip;
-	struct mmap_event		mmap;
-	struct comm_event		comm;
-	struct fork_event		fork;
-	struct period_event		period;
-} event_t;
-
-static LIST_HEAD(dsos);
-static struct dso *kernel_dso;
-static struct dso *vdso;
-
-
-static void dsos__add(struct dso *dso)
-{
-	list_add_tail(&dso->node, &dsos);
-}
-
-static struct dso *dsos__find(const char *name)
-{
-	struct dso *pos;
-
-	list_for_each_entry(pos, &dsos, node)
-		if (strcmp(pos->name, name) == 0)
-			return pos;
-	return NULL;
-}
-
-static struct dso *dsos__findnew(const char *name)
-{
-	struct dso *dso = dsos__find(name);
-	int nr;
-
-	if (dso)
-		return dso;
-
-	dso = dso__new(name, 0);
-	if (!dso)
-		goto out_delete_dso;
-
-	nr = dso__load(dso, NULL, verbose);
-	if (nr < 0) {
-		if (verbose)
-			fprintf(stderr, "Failed to open: %s\n", name);
-		goto out_delete_dso;
-	}
-	if (!nr && verbose) {
-		fprintf(stderr,
-		"No symbols found in: %s, maybe install a debug package?\n",
-				name);
-	}
-
-	dsos__add(dso);
-
-	return dso;
-
-out_delete_dso:
-	dso__delete(dso);
-	return NULL;
-}
-
-static void dsos__fprintf(FILE *fp)
-{
-	struct dso *pos;
-
-	list_for_each_entry(pos, &dsos, node)
-		dso__fprintf(pos, fp);
-}
-
-static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
-{
-	return dso__find_symbol(kernel_dso, ip);
-}
-
-static int load_kernel(void)
-{
-	int err;
-
-	kernel_dso = dso__new("[kernel]", 0);
-	if (!kernel_dso)
-		return -1;
-
-	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
-	if (err) {
-		dso__delete(kernel_dso);
-		kernel_dso = NULL;
-	} else
-		dsos__add(kernel_dso);
-
-	vdso = dso__new("[vdso]", 0);
-	if (!vdso)
-		return -1;
-
-	vdso->find_symbol = vdso__find_symbol;
-
-	dsos__add(vdso);
-
-	return err;
-}
-
-struct map {
-	struct list_head node;
-	uint64_t	 start;
-	uint64_t	 end;
-	uint64_t	 pgoff;
-	uint64_t	 (*map_ip)(struct map *, uint64_t);
-	struct dso	 *dso;
-};
-
-static uint64_t map__map_ip(struct map *map, uint64_t ip)
-{
-	return ip - map->start + map->pgoff;
-}
-
-static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
-{
-	return ip;
-}
-
-static struct map *map__new(struct mmap_event *event)
-{
-	struct map *self = malloc(sizeof(*self));
-
-	if (self != NULL) {
-		const char *filename = event->filename;
-
-		self->start = event->start;
-		self->end   = event->start + event->len;
-		self->pgoff = event->pgoff;
-
-		self->dso = dsos__findnew(filename);
-		if (self->dso == NULL)
-			goto out_delete;
-
-		if (self->dso == vdso)
-			self->map_ip = vdso__map_ip;
-		else
-			self->map_ip = map__map_ip;
-	}
-	return self;
-out_delete:
-	free(self);
-	return NULL;
-}
-
-static struct map *map__clone(struct map *self)
-{
-	struct map *map = malloc(sizeof(*self));
-
-	if (!map)
-		return NULL;
-
-	memcpy(map, self, sizeof(*self));
-
-	return map;
-}
-
-static int map__overlap(struct map *l, struct map *r)
-{
-	if (l->start > r->start) {
-		struct map *t = l;
-		l = r;
-		r = t;
-	}
-
-	if (l->end > r->start)
-		return 1;
-
-	return 0;
-}
-
-static size_t map__fprintf(struct map *self, FILE *fp)
-{
-	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
-		       self->start, self->end, self->pgoff, self->dso->name);
-}
-
-
-struct thread {
-	struct rb_node	 rb_node;
-	struct list_head maps;
-	pid_t		 pid;
-	char		 *comm;
-};
-
-static struct thread *thread__new(pid_t pid)
-{
-	struct thread *self = malloc(sizeof(*self));
-
-	if (self != NULL) {
-		self->pid = pid;
-		self->comm = malloc(32);
-		if (self->comm)
-			snprintf(self->comm, 32, ":%d", self->pid);
-		INIT_LIST_HEAD(&self->maps);
-	}
-
-	return self;
-}
-
-static int thread__set_comm(struct thread *self, const char *comm)
-{
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(comm);
-	return self->comm ? 0 : -ENOMEM;
-}
-
-static size_t thread__fprintf(struct thread *self, FILE *fp)
-{
-	struct map *pos;
-	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
-
-	list_for_each_entry(pos, &self->maps, node)
-		ret += map__fprintf(pos, fp);
-
-	return ret;
-}
-
-
-static struct rb_root threads;
-static struct thread *last_match;
-
-static struct thread *threads__findnew(pid_t pid)
-{
-	struct rb_node **p = &threads.rb_node;
-	struct rb_node *parent = NULL;
-	struct thread *th;
-
-	/*
-	 * Font-end cache - PID lookups come in blocks,
-	 * so most of the time we dont have to look up
-	 * the full rbtree:
-	 */
-	if (last_match && last_match->pid == pid)
-		return last_match;
-
-	while (*p != NULL) {
-		parent = *p;
-		th = rb_entry(parent, struct thread, rb_node);
-
-		if (th->pid == pid) {
-			last_match = th;
-			return th;
-		}
-
-		if (pid < th->pid)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	th = thread__new(pid);
-	if (th != NULL) {
-		rb_link_node(&th->rb_node, parent, p);
-		rb_insert_color(&th->rb_node, &threads);
-		last_match = th;
-	}
-
-	return th;
-}
-
-static void thread__insert_map(struct thread *self, struct map *map)
-{
-	struct map *pos, *tmp;
-
-	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
-		if (map__overlap(pos, map)) {
-			list_del_init(&pos->node);
-			/* XXX leaks dsos */
-			free(pos);
-		}
-	}
-
-	list_add_tail(&map->node, &self->maps);
-}
-
-static int thread__fork(struct thread *self, struct thread *parent)
-{
-	struct map *map;
-
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(parent->comm);
-	if (!self->comm)
-		return -ENOMEM;
-
-	list_for_each_entry(map, &parent->maps, node) {
-		struct map *new = map__clone(map);
-		if (!new)
-			return -ENOMEM;
-		thread__insert_map(self, new);
-	}
-
-	return 0;
-}
-
-static struct map *thread__find_map(struct thread *self, uint64_t ip)
-{
-	struct map *pos;
-
-	if (self == NULL)
-		return NULL;
-
-	list_for_each_entry(pos, &self->maps, node)
-		if (ip >= pos->start && ip <= pos->end)
-			return pos;
-
-	return NULL;
-}
-
-static size_t threads__fprintf(FILE *fp)
-{
-	size_t ret = 0;
-	struct rb_node *nd;
-
-	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
-		struct thread *pos = rb_entry(nd, struct thread, rb_node);
-
-		ret += thread__fprintf(pos, fp);
-	}
-
-	return ret;
-}
-
-/*
- * histogram, sorted on item, collects counts
- */
-
-static struct rb_root hist;
-
-struct hist_entry {
-	struct rb_node	 rb_node;
-
-	struct thread	 *thread;
-	struct map	 *map;
-	struct dso	 *dso;
-	struct symbol	 *sym;
-	uint64_t	 ip;
-	char		 level;
-
-	uint32_t	 count;
-};
-
-/*
- * configurable sorting bits
- */
-
-struct sort_entry {
-	struct list_head list;
-
-	char *header;
-
-	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
-	size_t	(*print)(FILE *fp, struct hist_entry *);
-};
-
-/* --sort pid */
-
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static size_t
-sort__thread_print(FILE *fp, struct hist_entry *self)
-{
-	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
-}
-
-static struct sort_entry sort_thread = {
-	.header = "         Command:  Pid",
-	.cmp	= sort__thread_cmp,
-	.print	= sort__thread_print,
-};
-
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
-
-	if (!comm_l || !comm_r) {
-		if (!comm_l && !comm_r)
-			return 0;
-		else if (!comm_l)
-			return -1;
-		else
-			return 1;
-	}
-
-	return strcmp(comm_l, comm_r);
-}
-
-static size_t
-sort__comm_print(FILE *fp, struct hist_entry *self)
-{
-	return fprintf(fp, "%16s", self->thread->comm);
-}
-
-static struct sort_entry sort_comm = {
-	.header		= "         Command",
-	.cmp		= sort__comm_cmp,
-	.collapse	= sort__comm_collapse,
-	.print		= sort__comm_print,
-};
-
-/* --sort dso */
-
-static int64_t
-sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct dso *dso_l = left->dso;
-	struct dso *dso_r = right->dso;
-
-	if (!dso_l || !dso_r) {
-		if (!dso_l && !dso_r)
-			return 0;
-		else if (!dso_l)
-			return -1;
-		else
-			return 1;
-	}
-
-	return strcmp(dso_l->name, dso_r->name);
-}
-
-static size_t
-sort__dso_print(FILE *fp, struct hist_entry *self)
-{
-	if (self->dso)
-		return fprintf(fp, "%-25s", self->dso->name);
-
-	return fprintf(fp, "%016llx         ", (__u64)self->ip);
-}
-
-static struct sort_entry sort_dso = {
-	.header = "Shared Object            ",
-	.cmp	= sort__dso_cmp,
-	.print	= sort__dso_print,
-};
-
-/* --sort symbol */
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	uint64_t ip_l, ip_r;
-
-	if (left->sym == right->sym)
-		return 0;
-
-	ip_l = left->sym ? left->sym->start : left->ip;
-	ip_r = right->sym ? right->sym->start : right->ip;
-
-	return (int64_t)(ip_r - ip_l);
-}
-
-static size_t
-sort__sym_print(FILE *fp, struct hist_entry *self)
-{
-	size_t ret = 0;
-
-	if (verbose)
-		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
-
-	if (self->sym) {
-		ret += fprintf(fp, "[%c] %s",
-			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
-	} else {
-		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
-	}
-
-	return ret;
-}
-
-static struct sort_entry sort_sym = {
-	.header = "Symbol",
-	.cmp	= sort__sym_cmp,
-	.print	= sort__sym_print,
-};
-
-static int sort__need_collapse = 0;
-
-struct sort_dimension {
-	char			*name;
-	struct sort_entry	*entry;
-	int			taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-	{ .name = "pid",	.entry = &sort_thread,	},
-	{ .name = "comm",	.entry = &sort_comm,	},
-	{ .name = "dso",	.entry = &sort_dso,	},
-	{ .name = "symbol",	.entry = &sort_sym,	},
-};
-
-static LIST_HEAD(hist_entry__sort_list);
-
-static int sort_dimension__add(char *tok)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
-		struct sort_dimension *sd = &sort_dimensions[i];
-
-		if (sd->taken)
-			continue;
-
-		if (strncasecmp(tok, sd->name, strlen(tok)))
-			continue;
-
-		if (sd->entry->collapse)
-			sort__need_collapse = 1;
-
-		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-		sd->taken = 1;
-
-		return 0;
-	}
-
-	return -ESRCH;
-}
-
-static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		cmp = se->cmp(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-static int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		int64_t (*f)(struct hist_entry *, struct hist_entry *);
-
-		f = se->collapse ?: se->cmp;
-
-		cmp = f(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-/*
- * collect histogram counts
- */
-static void hist_hit(struct hist_entry *he, uint64_t ip)
-{
-	unsigned int sym_size, offset;
-	struct symbol *sym = he->sym;
-
-	he->count++;
-
-	if (!sym || !sym->hist)
-		return;
-
-	sym_size = sym->end - sym->start;
-	offset = ip - sym->start;
-
-	if (offset >= sym_size)
-		return;
-
-	sym->hist_sum++;
-	sym->hist[offset]++;
-
-	if (verbose >= 3)
-		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
-			(void *)he->sym->start,
-			he->sym->name,
-			(void *)ip, ip - he->sym->start,
-			sym->hist[offset]);
-}
-
-static int
-hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, uint64_t ip, char level)
-{
-	struct rb_node **p = &hist.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *he;
-	struct hist_entry entry = {
-		.thread	= thread,
-		.map	= map,
-		.dso	= dso,
-		.sym	= sym,
-		.ip	= ip,
-		.level	= level,
-		.count	= 1,
-	};
-	int cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		he = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__cmp(&entry, he);
-
-		if (!cmp) {
-			hist_hit(he, ip);
-
-			return 0;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	he = malloc(sizeof(*he));
-	if (!he)
-		return -ENOMEM;
-	*he = entry;
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &hist);
-
-	return 0;
-}
-
-static void hist_entry__free(struct hist_entry *he)
-{
-	free(he);
-}
-
-/*
- * collapse the histogram
- */
-
-static struct rb_root collapse_hists;
-
-static void collapse__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &collapse_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-	int64_t cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__collapse(iter, he);
-
-		if (!cmp) {
-			iter->count += he->count;
-			hist_entry__free(he);
-			return;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &collapse_hists);
-}
-
-static void collapse__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-
-	if (!sort__need_collapse)
-		return;
-
-	next = rb_first(&hist);
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, &hist);
-		collapse__insert_entry(n);
-	}
-}
-
-/*
- * reverse the map, sort on count.
- */
-
-static struct rb_root output_hists;
-
-static void output__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &output_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		if (he->count > iter->count)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &output_hists);
-}
-
-static void output__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-	struct rb_root *tree = &hist;
-
-	if (sort__need_collapse)
-		tree = &collapse_hists;
-
-	next = rb_first(tree);
-
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, tree);
-		output__insert_entry(n);
-	}
-}
-
-static void register_idle_thread(void)
-{
-	struct thread *thread = threads__findnew(0);
-
-	if (thread == NULL ||
-			thread__set_comm(thread, "[idle]")) {
-		fprintf(stderr, "problem inserting idle task.\n");
-		exit(-1);
-	}
-}
-
-static unsigned long total = 0,
-		     total_mmap = 0,
-		     total_comm = 0,
-		     total_fork = 0,
-		     total_unknown = 0;
-
-static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	char level;
-	int show = 0;
-	struct dso *dso = NULL;
-	struct thread *thread = threads__findnew(event->ip.pid);
-	uint64_t ip = event->ip.ip;
-	struct map *map = NULL;
-
-	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->header.misc,
-		event->ip.pid,
-		(void *)(long)ip);
-
-	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
-	if (thread == NULL) {
-		fprintf(stderr, "problem processing %d event, skipping it.\n",
-			event->header.type);
-		return -1;
-	}
-
-	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
-		show = SHOW_KERNEL;
-		level = 'k';
-
-		dso = kernel_dso;
-
-		dprintf(" ...... dso: %s\n", dso->name);
-
-	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
-
-		show = SHOW_USER;
-		level = '.';
-
-		map = thread__find_map(thread, ip);
-		if (map != NULL) {
-			ip = map->map_ip(map, ip);
-			dso = map->dso;
-		} else {
-			/*
-			 * If this is outside of all known maps,
-			 * and is a negative address, try to look it
-			 * up in the kernel dso, as it might be a
-			 * vsyscall (which executes in user-mode):
-			 */
-			if ((long long)ip < 0)
-				dso = kernel_dso;
-		}
-		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
-
-	} else {
-		show = SHOW_HV;
-		level = 'H';
-		dprintf(" ...... dso: [hypervisor]\n");
-	}
-
-	if (show & show_mask) {
-		struct symbol *sym = NULL;
-
-		if (dso)
-			sym = dso->find_symbol(dso, ip);
-
-		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
-			fprintf(stderr,
-		"problem incrementing symbol count, skipping event\n");
-			return -1;
-		}
-	}
-	total++;
-
-	return 0;
-}
-
-static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread = threads__findnew(event->mmap.pid);
-	struct map *map = map__new(&event->mmap);
-
-	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->mmap.pid,
-		(void *)(long)event->mmap.start,
-		(void *)(long)event->mmap.len,
-		(void *)(long)event->mmap.pgoff,
-		event->mmap.filename);
-
-	if (thread == NULL || map == NULL) {
-		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
-		return 0;
-	}
-
-	thread__insert_map(thread, map);
-	total_mmap++;
-
-	return 0;
-}
-
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread = threads__findnew(event->comm.pid);
-
-	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->comm.comm, event->comm.pid);
-
-	if (thread == NULL ||
-	    thread__set_comm(thread, event->comm.comm)) {
-		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
-		return -1;
-	}
-	total_comm++;
-
-	return 0;
-}
-
-static int
-process_fork_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread = threads__findnew(event->fork.pid);
-	struct thread *parent = threads__findnew(event->fork.ppid);
-
-	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->fork.pid, event->fork.ppid);
-
-	if (!thread || !parent || thread__fork(thread, parent)) {
-		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
-		return -1;
-	}
-	total_fork++;
-
-	return 0;
-}
-
-static int
-process_period_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->period.time,
-		event->period.id,
-		event->period.sample_period);
-
-	return 0;
-}
-
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-		return process_overflow_event(event, offset, head);
-
-	switch (event->header.type) {
-	case PERF_EVENT_MMAP:
-		return process_mmap_event(event, offset, head);
-
-	case PERF_EVENT_COMM:
-		return process_comm_event(event, offset, head);
-
-	case PERF_EVENT_FORK:
-		return process_fork_event(event, offset, head);
-
-	case PERF_EVENT_PERIOD:
-		return process_period_event(event, offset, head);
-	/*
-	 * We dont process them right now but they are fine:
-	 */
-
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
-		return 0;
-
-	default:
-		return -1;
-	}
-
-	return 0;
-}
-
-static int
-parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len)
-{
-	char *line = NULL, *tmp, *tmp2;
-	unsigned int offset;
-	size_t line_len;
-	__u64 line_ip;
-	int ret;
-	char *c;
-
-	if (getline(&line, &line_len, file) < 0)
-		return -1;
-	if (!line)
-		return -1;
-
-	c = strchr(line, '\n');
-	if (c)
-		*c = 0;
-
-	line_ip = -1;
-	offset = 0;
-	ret = -2;
-
-	/*
-	 * Strip leading spaces:
-	 */
-	tmp = line;
-	while (*tmp) {
-		if (*tmp != ' ')
-			break;
-		tmp++;
-	}
-
-	if (*tmp) {
-		/*
-		 * Parse hexa addresses followed by ':'
-		 */
-		line_ip = strtoull(tmp, &tmp2, 16);
-		if (*tmp2 != ':')
-			line_ip = -1;
-	}
-
-	if (line_ip != -1) {
-		unsigned int hits = 0;
-		double percent = 0.0;
-		char *color = PERF_COLOR_NORMAL;
-
-		offset = line_ip - start;
-		if (offset < len)
-			hits = sym->hist[offset];
-
-		if (sym->hist_sum)
-			percent = 100.0 * hits / sym->hist_sum;
-
-		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
-		 */
-		if (percent >= 5.0)
-			color = PERF_COLOR_RED;
-		else {
-			if (percent > 0.5)
-				color = PERF_COLOR_GREEN;
-		}
-
-		color_fprintf(stdout, color, " %7.2f", percent);
-		printf(" :	");
-		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line);
-	} else {
-		if (!*line)
-			printf("         :\n");
-		else
-			printf("         :	%s\n", line);
-	}
-
-	return 0;
-}
-
-static void annotate_sym(struct dso *dso, struct symbol *sym)
-{
-	char *filename = dso->name;
-	uint64_t start, end, len;
-	char command[PATH_MAX*2];
-	FILE *file;
-
-	if (!filename)
-		return;
-	if (dso == kernel_dso)
-		filename = vmlinux;
-
-	printf("\n------------------------------------------------\n");
-	printf(" Percent |	Source code & Disassembly of %s\n", filename);
-	printf("------------------------------------------------\n");
-
-	if (verbose >= 2)
-		printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
-
-	start = sym->obj_start;
-	if (!start)
-		start = sym->start;
-
-	end = start + sym->end - sym->start + 1;
-	len = sym->end - sym->start;
-
-	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename);
-
-	if (verbose >= 3)
-		printf("doing: %s\n", command);
-
-	file = popen(command, "r");
-	if (!file)
-		return;
-
-	while (!feof(file)) {
-		if (parse_line(file, sym, start, len) < 0)
-			break;
-	}
-
-	pclose(file);
-}
-
-static void find_annotations(void)
-{
-	struct rb_node *nd;
-	struct dso *dso;
-	int count = 0;
-
-	list_for_each_entry(dso, &dsos, node) {
-
-		for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
-			struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
-
-			if (sym->hist) {
-				annotate_sym(dso, sym);
-				count++;
-			}
-		}
-	}
-
-	if (!count)
-		printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
-}
-
-static int __cmd_annotate(void)
-{
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat stat;
-	event_t *event;
-	uint32_t size;
-	char *buf;
-
-	register_idle_thread();
-
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
-	}
-
-	ret = fstat(input, &stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
-	}
-
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int ret;
-
-		ret = munmap(buf, page_size * mmap_window);
-		assert(ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	dprintf("%p [%p]: event: %d\n",
-			(void *)(offset + head),
-			(void *)(long)event->header.size,
-			event->header.type);
-
-	if (!size || process_event(event, offset, head) < 0) {
-
-		dprintf("%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
-
-		total_unknown++;
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head < stat.st_size)
-		goto more;
-
-	rc = EXIT_SUCCESS;
-	close(input);
-
-	dprintf("      IP events: %10ld\n", total);
-	dprintf("    mmap events: %10ld\n", total_mmap);
-	dprintf("    comm events: %10ld\n", total_comm);
-	dprintf("    fork events: %10ld\n", total_fork);
-	dprintf(" unknown events: %10ld\n", total_unknown);
-
-	if (dump_trace)
-		return 0;
-
-	if (verbose >= 3)
-		threads__fprintf(stdout);
-
-	if (verbose >= 2)
-		dsos__fprintf(stdout);
-
-	collapse__resort();
-	output__resort();
-
-	find_annotations();
-
-	return rc;
-}
-
-static const char * const annotate_usage[] = {
-	"perf annotate [<options>] <command>",
-	NULL
-};
-
-static const struct option options[] = {
-	OPT_STRING('i', "input", &input_name, "file",
-		    "input file name"),
-	OPT_STRING('s', "symbol", &sym_hist_filter, "file",
-		    "symbol to annotate"),
-	OPT_BOOLEAN('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
-	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
-	OPT_END()
-};
-
-static void setup_sorting(void)
-{
-	char *tmp, *tok, *str = strdup(sort_order);
-
-	for (tok = strtok_r(str, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		if (sort_dimension__add(tok) < 0) {
-			error("Unknown --sort key: `%s'", tok);
-			usage_with_options(annotate_usage, options);
-		}
-	}
-
-	free(str);
-}
-
-int cmd_annotate(int argc, const char **argv, const char *prefix)
-{
-	symbol__init();
-
-	page_size = getpagesize();
-
-	argc = parse_options(argc, argv, options, annotate_usage, 0);
-
-	setup_sorting();
-
-	if (argc) {
-		/*
-		 * Special case: if there's an argument left then assume tha
-		 * it's a symbol filter:
-		 */
-		if (argc > 1)
-			usage_with_options(annotate_usage, options);
-
-		sym_hist_filter = argv[0];
-	}
-
-	if (!sym_hist_filter)
-		usage_with_options(annotate_usage, options);
-
-	setup_pager();
-
-	return __cmd_annotate();
-}
diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c
deleted file mode 100644
index 0f32dc3..0000000
--- a/Documentation/perf_counter/builtin-help.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/*
- * builtin-help.c
- *
- * Builtin help command
- */
-#include "util/cache.h"
-#include "builtin.h"
-#include "util/exec_cmd.h"
-#include "common-cmds.h"
-#include "util/parse-options.h"
-#include "util/run-command.h"
-#include "util/help.h"
-
-static struct man_viewer_list {
-	struct man_viewer_list *next;
-	char name[FLEX_ARRAY];
-} *man_viewer_list;
-
-static struct man_viewer_info_list {
-	struct man_viewer_info_list *next;
-	const char *info;
-	char name[FLEX_ARRAY];
-} *man_viewer_info_list;
-
-enum help_format {
-	HELP_FORMAT_MAN,
-	HELP_FORMAT_INFO,
-	HELP_FORMAT_WEB,
-};
-
-static int show_all = 0;
-static enum help_format help_format = HELP_FORMAT_MAN;
-static struct option builtin_help_options[] = {
-	OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
-	OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
-	OPT_SET_INT('w', "web", &help_format, "show manual in web browser",
-			HELP_FORMAT_WEB),
-	OPT_SET_INT('i', "info", &help_format, "show info page",
-			HELP_FORMAT_INFO),
-	OPT_END(),
-};
-
-static const char * const builtin_help_usage[] = {
-	"perf help [--all] [--man|--web|--info] [command]",
-	NULL
-};
-
-static enum help_format parse_help_format(const char *format)
-{
-	if (!strcmp(format, "man"))
-		return HELP_FORMAT_MAN;
-	if (!strcmp(format, "info"))
-		return HELP_FORMAT_INFO;
-	if (!strcmp(format, "web") || !strcmp(format, "html"))
-		return HELP_FORMAT_WEB;
-	die("unrecognized help format '%s'", format);
-}
-
-static const char *get_man_viewer_info(const char *name)
-{
-	struct man_viewer_info_list *viewer;
-
-	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
-	{
-		if (!strcasecmp(name, viewer->name))
-			return viewer->info;
-	}
-	return NULL;
-}
-
-static int check_emacsclient_version(void)
-{
-	struct strbuf buffer = STRBUF_INIT;
-	struct child_process ec_process;
-	const char *argv_ec[] = { "emacsclient", "--version", NULL };
-	int version;
-
-	/* emacsclient prints its version number on stderr */
-	memset(&ec_process, 0, sizeof(ec_process));
-	ec_process.argv = argv_ec;
-	ec_process.err = -1;
-	ec_process.stdout_to_stderr = 1;
-	if (start_command(&ec_process)) {
-		fprintf(stderr, "Failed to start emacsclient.\n");
-		return -1;
-	}
-	strbuf_read(&buffer, ec_process.err, 20);
-	close(ec_process.err);
-
-	/*
-	 * Don't bother checking return value, because "emacsclient --version"
-	 * seems to always exits with code 1.
-	 */
-	finish_command(&ec_process);
-
-	if (prefixcmp(buffer.buf, "emacsclient")) {
-		fprintf(stderr, "Failed to parse emacsclient version.\n");
-		strbuf_release(&buffer);
-		return -1;
-	}
-
-	strbuf_remove(&buffer, 0, strlen("emacsclient"));
-	version = atoi(buffer.buf);
-
-	if (version < 22) {
-		fprintf(stderr,
-			"emacsclient version '%d' too old (< 22).\n",
-			version);
-		strbuf_release(&buffer);
-		return -1;
-	}
-
-	strbuf_release(&buffer);
-	return 0;
-}
-
-static void exec_woman_emacs(const char* path, const char *page)
-{
-	if (!check_emacsclient_version()) {
-		/* This works only with emacsclient version >= 22. */
-		struct strbuf man_page = STRBUF_INIT;
-
-		if (!path)
-			path = "emacsclient";
-		strbuf_addf(&man_page, "(woman \"%s\")", page);
-		execlp(path, "emacsclient", "-e", man_page.buf, NULL);
-		warning("failed to exec '%s': %s", path, strerror(errno));
-	}
-}
-
-static void exec_man_konqueror(const char* path, const char *page)
-{
-	const char *display = getenv("DISPLAY");
-	if (display && *display) {
-		struct strbuf man_page = STRBUF_INIT;
-		const char *filename = "kfmclient";
-
-		/* It's simpler to launch konqueror using kfmclient. */
-		if (path) {
-			const char *file = strrchr(path, '/');
-			if (file && !strcmp(file + 1, "konqueror")) {
-				char *new = strdup(path);
-				char *dest = strrchr(new, '/');
-
-				/* strlen("konqueror") == strlen("kfmclient") */
-				strcpy(dest + 1, "kfmclient");
-				path = new;
-			}
-			if (file)
-				filename = file;
-		} else
-			path = "kfmclient";
-		strbuf_addf(&man_page, "man:%s(1)", page);
-		execlp(path, filename, "newTab", man_page.buf, NULL);
-		warning("failed to exec '%s': %s", path, strerror(errno));
-	}
-}
-
-static void exec_man_man(const char* path, const char *page)
-{
-	if (!path)
-		path = "man";
-	execlp(path, "man", page, NULL);
-	warning("failed to exec '%s': %s", path, strerror(errno));
-}
-
-static void exec_man_cmd(const char *cmd, const char *page)
-{
-	struct strbuf shell_cmd = STRBUF_INIT;
-	strbuf_addf(&shell_cmd, "%s %s", cmd, page);
-	execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
-	warning("failed to exec '%s': %s", cmd, strerror(errno));
-}
-
-static void add_man_viewer(const char *name)
-{
-	struct man_viewer_list **p = &man_viewer_list;
-	size_t len = strlen(name);
-
-	while (*p)
-		p = &((*p)->next);
-	*p = calloc(1, (sizeof(**p) + len + 1));
-	strncpy((*p)->name, name, len);
-}
-
-static int supported_man_viewer(const char *name, size_t len)
-{
-	return (!strncasecmp("man", name, len) ||
-		!strncasecmp("woman", name, len) ||
-		!strncasecmp("konqueror", name, len));
-}
-
-static void do_add_man_viewer_info(const char *name,
-				   size_t len,
-				   const char *value)
-{
-	struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
-
-	strncpy(new->name, name, len);
-	new->info = strdup(value);
-	new->next = man_viewer_info_list;
-	man_viewer_info_list = new;
-}
-
-static int add_man_viewer_path(const char *name,
-			       size_t len,
-			       const char *value)
-{
-	if (supported_man_viewer(name, len))
-		do_add_man_viewer_info(name, len, value);
-	else
-		warning("'%s': path for unsupported man viewer.\n"
-			"Please consider using 'man.<tool>.cmd' instead.",
-			name);
-
-	return 0;
-}
-
-static int add_man_viewer_cmd(const char *name,
-			      size_t len,
-			      const char *value)
-{
-	if (supported_man_viewer(name, len))
-		warning("'%s': cmd for supported man viewer.\n"
-			"Please consider using 'man.<tool>.path' instead.",
-			name);
-	else
-		do_add_man_viewer_info(name, len, value);
-
-	return 0;
-}
-
-static int add_man_viewer_info(const char *var, const char *value)
-{
-	const char *name = var + 4;
-	const char *subkey = strrchr(name, '.');
-
-	if (!subkey)
-		return error("Config with no key for man viewer: %s", name);
-
-	if (!strcmp(subkey, ".path")) {
-		if (!value)
-			return config_error_nonbool(var);
-		return add_man_viewer_path(name, subkey - name, value);
-	}
-	if (!strcmp(subkey, ".cmd")) {
-		if (!value)
-			return config_error_nonbool(var);
-		return add_man_viewer_cmd(name, subkey - name, value);
-	}
-
-	warning("'%s': unsupported man viewer sub key.", subkey);
-	return 0;
-}
-
-static int perf_help_config(const char *var, const char *value, void *cb)
-{
-	if (!strcmp(var, "help.format")) {
-		if (!value)
-			return config_error_nonbool(var);
-		help_format = parse_help_format(value);
-		return 0;
-	}
-	if (!strcmp(var, "man.viewer")) {
-		if (!value)
-			return config_error_nonbool(var);
-		add_man_viewer(value);
-		return 0;
-	}
-	if (!prefixcmp(var, "man."))
-		return add_man_viewer_info(var, value);
-
-	return perf_default_config(var, value, cb);
-}
-
-static struct cmdnames main_cmds, other_cmds;
-
-void list_common_cmds_help(void)
-{
-	int i, longest = 0;
-
-	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
-		if (longest < strlen(common_cmds[i].name))
-			longest = strlen(common_cmds[i].name);
-	}
-
-	puts(" The most commonly used perf commands are:");
-	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
-		printf("   %s   ", common_cmds[i].name);
-		mput_char(' ', longest - strlen(common_cmds[i].name));
-		puts(common_cmds[i].help);
-	}
-}
-
-static int is_perf_command(const char *s)
-{
-	return is_in_cmdlist(&main_cmds, s) ||
-		is_in_cmdlist(&other_cmds, s);
-}
-
-static const char *prepend(const char *prefix, const char *cmd)
-{
-	size_t pre_len = strlen(prefix);
-	size_t cmd_len = strlen(cmd);
-	char *p = malloc(pre_len + cmd_len + 1);
-	memcpy(p, prefix, pre_len);
-	strcpy(p + pre_len, cmd);
-	return p;
-}
-
-static const char *cmd_to_page(const char *perf_cmd)
-{
-	if (!perf_cmd)
-		return "perf";
-	else if (!prefixcmp(perf_cmd, "perf"))
-		return perf_cmd;
-	else if (is_perf_command(perf_cmd))
-		return prepend("perf-", perf_cmd);
-	else
-		return prepend("perf-", perf_cmd);
-}
-
-static void setup_man_path(void)
-{
-	struct strbuf new_path = STRBUF_INIT;
-	const char *old_path = getenv("MANPATH");
-
-	/* We should always put ':' after our path. If there is no
-	 * old_path, the ':' at the end will let 'man' to try
-	 * system-wide paths after ours to find the manual page. If
-	 * there is old_path, we need ':' as delimiter. */
-	strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
-	strbuf_addch(&new_path, ':');
-	if (old_path)
-		strbuf_addstr(&new_path, old_path);
-
-	setenv("MANPATH", new_path.buf, 1);
-
-	strbuf_release(&new_path);
-}
-
-static void exec_viewer(const char *name, const char *page)
-{
-	const char *info = get_man_viewer_info(name);
-
-	if (!strcasecmp(name, "man"))
-		exec_man_man(info, page);
-	else if (!strcasecmp(name, "woman"))
-		exec_woman_emacs(info, page);
-	else if (!strcasecmp(name, "konqueror"))
-		exec_man_konqueror(info, page);
-	else if (info)
-		exec_man_cmd(info, page);
-	else
-		warning("'%s': unknown man viewer.", name);
-}
-
-static void show_man_page(const char *perf_cmd)
-{
-	struct man_viewer_list *viewer;
-	const char *page = cmd_to_page(perf_cmd);
-	const char *fallback = getenv("PERF_MAN_VIEWER");
-
-	setup_man_path();
-	for (viewer = man_viewer_list; viewer; viewer = viewer->next)
-	{
-		exec_viewer(viewer->name, page); /* will return when unable */
-	}
-	if (fallback)
-		exec_viewer(fallback, page);
-	exec_viewer("man", page);
-	die("no man viewer handled the request");
-}
-
-static void show_info_page(const char *perf_cmd)
-{
-	const char *page = cmd_to_page(perf_cmd);
-	setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
-	execlp("info", "info", "perfman", page, NULL);
-}
-
-static void get_html_page_path(struct strbuf *page_path, const char *page)
-{
-	struct stat st;
-	const char *html_path = system_path(PERF_HTML_PATH);
-
-	/* Check that we have a perf documentation directory. */
-	if (stat(mkpath("%s/perf.html", html_path), &st)
-	    || !S_ISREG(st.st_mode))
-		die("'%s': not a documentation directory.", html_path);
-
-	strbuf_init(page_path, 0);
-	strbuf_addf(page_path, "%s/%s.html", html_path, page);
-}
-
-/*
- * If open_html is not defined in a platform-specific way (see for
- * example compat/mingw.h), we use the script web--browse to display
- * HTML.
- */
-#ifndef open_html
-static void open_html(const char *path)
-{
-	execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
-}
-#endif
-
-static void show_html_page(const char *perf_cmd)
-{
-	const char *page = cmd_to_page(perf_cmd);
-	struct strbuf page_path; /* it leaks but we exec bellow */
-
-	get_html_page_path(&page_path, page);
-
-	open_html(page_path.buf);
-}
-
-int cmd_help(int argc, const char **argv, const char *prefix)
-{
-	const char *alias;
-	load_command_list("perf-", &main_cmds, &other_cmds);
-
-	perf_config(perf_help_config, NULL);
-
-	argc = parse_options(argc, argv, builtin_help_options,
-			builtin_help_usage, 0);
-
-	if (show_all) {
-		printf("\n usage: %s\n\n", perf_usage_string);
-		list_commands("perf commands", &main_cmds, &other_cmds);
-		printf(" %s\n\n", perf_more_info_string);
-		return 0;
-	}
-
-	if (!argv[0]) {
-		printf("\n usage: %s\n\n", perf_usage_string);
-		list_common_cmds_help();
-		printf("\n %s\n\n", perf_more_info_string);
-		return 0;
-	}
-
-	alias = alias_lookup(argv[0]);
-	if (alias && !is_perf_command(argv[0])) {
-		printf("`perf %s' is aliased to `%s'\n", argv[0], alias);
-		return 0;
-	}
-
-	switch (help_format) {
-	case HELP_FORMAT_MAN:
-		show_man_page(argv[0]);
-		break;
-	case HELP_FORMAT_INFO:
-		show_info_page(argv[0]);
-		break;
-	case HELP_FORMAT_WEB:
-		show_html_page(argv[0]);
-		break;
-	}
-
-	return 0;
-}
diff --git a/Documentation/perf_counter/builtin-list.c b/Documentation/perf_counter/builtin-list.c
deleted file mode 100644
index fe60e37..0000000
--- a/Documentation/perf_counter/builtin-list.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * builtin-list.c
- *
- * Builtin list command: list all event types
- *
- * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
- * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
- */
-#include "builtin.h"
-
-#include "perf.h"
-
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-
-int cmd_list(int argc, const char **argv, const char *prefix)
-{
-	print_events();
-	return 0;
-}
diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
deleted file mode 100644
index aeab9c4..0000000
--- a/Documentation/perf_counter/builtin-record.c
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * builtin-record.c
- *
- * Builtin record command: Record the profile of a workload
- * (or a CPU, or a PID) into the perf.data output file - for
- * later analysis via perf report.
- */
-#include "builtin.h"
-
-#include "perf.h"
-
-#include "util/util.h"
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-#include "util/string.h"
-
-#include <unistd.h>
-#include <sched.h>
-
-#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
-#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
-
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static long			default_interval		= 100000;
-
-static int			nr_cpus				= 0;
-static unsigned int		page_size;
-static unsigned int		mmap_pages			= 128;
-static int			freq				= 0;
-static int			output;
-static const char		*output_name			= "perf.data";
-static int			group				= 0;
-static unsigned int		realtime_prio			= 0;
-static int			system_wide			= 0;
-static pid_t			target_pid			= -1;
-static int			inherit				= 1;
-static int			force				= 0;
-static int			append_file			= 0;
-
-static long			samples;
-static struct timeval		last_read;
-static struct timeval		this_read;
-
-static __u64			bytes_written;
-
-static struct pollfd		event_array[MAX_NR_CPUS * MAX_COUNTERS];
-
-static int			nr_poll;
-static int			nr_cpu;
-
-struct mmap_event {
-	struct perf_event_header	header;
-	__u32				pid;
-	__u32				tid;
-	__u64				start;
-	__u64				len;
-	__u64				pgoff;
-	char				filename[PATH_MAX];
-};
-
-struct comm_event {
-	struct perf_event_header	header;
-	__u32				pid;
-	__u32				tid;
-	char				comm[16];
-};
-
-
-struct mmap_data {
-	int			counter;
-	void			*base;
-	unsigned int		mask;
-	unsigned int		prev;
-};
-
-static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
-	struct perf_counter_mmap_page *pc = md->base;
-	int head;
-
-	head = pc->data_head;
-	rmb();
-
-	return head;
-}
-
-static void mmap_read(struct mmap_data *md)
-{
-	unsigned int head = mmap_read_head(md);
-	unsigned int old = md->prev;
-	unsigned char *data = md->base + page_size;
-	unsigned long size;
-	void *buf;
-	int diff;
-
-	gettimeofday(&this_read, NULL);
-
-	/*
-	 * If we're further behind than half the buffer, there's a chance
-	 * the writer will bite our tail and mess up the samples under us.
-	 *
-	 * If we somehow ended up ahead of the head, we got messed up.
-	 *
-	 * In either case, truncate and restart at head.
-	 */
-	diff = head - old;
-	if (diff > md->mask / 2 || diff < 0) {
-		struct timeval iv;
-		unsigned long msecs;
-
-		timersub(&this_read, &last_read, &iv);
-		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
-
-		fprintf(stderr, "WARNING: failed to keep up with mmap data."
-				"  Last read %lu msecs ago.\n", msecs);
-
-		/*
-		 * head points to a known good entry, start there.
-		 */
-		old = head;
-	}
-
-	last_read = this_read;
-
-	if (old != head)
-		samples++;
-
-	size = head - old;
-
-	if ((old & md->mask) + size != (head & md->mask)) {
-		buf = &data[old & md->mask];
-		size = md->mask + 1 - (old & md->mask);
-		old += size;
-
-		while (size) {
-			int ret = write(output, buf, size);
-
-			if (ret < 0)
-				die("failed to write");
-
-			size -= ret;
-			buf += ret;
-
-			bytes_written += ret;
-		}
-	}
-
-	buf = &data[old & md->mask];
-	size = head - old;
-	old += size;
-
-	while (size) {
-		int ret = write(output, buf, size);
-
-		if (ret < 0)
-			die("failed to write");
-
-		size -= ret;
-		buf += ret;
-
-		bytes_written += ret;
-	}
-
-	md->prev = old;
-}
-
-static volatile int done = 0;
-
-static void sig_handler(int sig)
-{
-	done = 1;
-}
-
-static void pid_synthesize_comm_event(pid_t pid, int full)
-{
-	struct comm_event comm_ev;
-	char filename[PATH_MAX];
-	char bf[BUFSIZ];
-	int fd, ret;
-	size_t size;
-	char *field, *sep;
-	DIR *tasks;
-	struct dirent dirent, *next;
-
-	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
-
-	fd = open(filename, O_RDONLY);
-	if (fd < 0) {
-		fprintf(stderr, "couldn't open %s\n", filename);
-		exit(EXIT_FAILURE);
-	}
-	if (read(fd, bf, sizeof(bf)) < 0) {
-		fprintf(stderr, "couldn't read %s\n", filename);
-		exit(EXIT_FAILURE);
-	}
-	close(fd);
-
-	/* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
-	memset(&comm_ev, 0, sizeof(comm_ev));
-	field = strchr(bf, '(');
-	if (field == NULL)
-		goto out_failure;
-	sep = strchr(++field, ')');
-	if (sep == NULL)
-		goto out_failure;
-	size = sep - field;
-	memcpy(comm_ev.comm, field, size++);
-
-	comm_ev.pid = pid;
-	comm_ev.header.type = PERF_EVENT_COMM;
-	size = ALIGN(size, sizeof(uint64_t));
-	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
-
-	if (!full) {
-		comm_ev.tid = pid;
-
-		ret = write(output, &comm_ev, comm_ev.header.size);
-		if (ret < 0) {
-			perror("failed to write");
-			exit(-1);
-		}
-		return;
-	}
-
-	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
-
-	tasks = opendir(filename);
-	while (!readdir_r(tasks, &dirent, &next) && next) {
-		char *end;
-		pid = strtol(dirent.d_name, &end, 10);
-		if (*end)
-			continue;
-
-		comm_ev.tid = pid;
-
-		ret = write(output, &comm_ev, comm_ev.header.size);
-		if (ret < 0) {
-			perror("failed to write");
-			exit(-1);
-		}
-	}
-	closedir(tasks);
-	return;
-
-out_failure:
-	fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
-		filename);
-	exit(EXIT_FAILURE);
-}
-
-static void pid_synthesize_mmap_samples(pid_t pid)
-{
-	char filename[PATH_MAX];
-	FILE *fp;
-
-	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
-
-	fp = fopen(filename, "r");
-	if (fp == NULL) {
-		fprintf(stderr, "couldn't open %s\n", filename);
-		exit(EXIT_FAILURE);
-	}
-	while (1) {
-		char bf[BUFSIZ], *pbf = bf;
-		struct mmap_event mmap_ev = {
-			.header.type = PERF_EVENT_MMAP,
-		};
-		int n;
-		size_t size;
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			break;
-
-		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = hex2u64(pbf, &mmap_ev.start);
-		if (n < 0)
-			continue;
-		pbf += n + 1;
-		n = hex2u64(pbf, &mmap_ev.len);
-		if (n < 0)
-			continue;
-		pbf += n + 3;
-		if (*pbf == 'x') { /* vm_exec */
-			char *execname = strrchr(bf, ' ');
-
-			if (execname == NULL || execname[1] != '/')
-				continue;
-
-			execname += 1;
-			size = strlen(execname);
-			execname[size - 1] = '\0'; /* Remove \n */
-			memcpy(mmap_ev.filename, execname, size);
-			size = ALIGN(size, sizeof(uint64_t));
-			mmap_ev.len -= mmap_ev.start;
-			mmap_ev.header.size = (sizeof(mmap_ev) -
-					       (sizeof(mmap_ev.filename) - size));
-			mmap_ev.pid = pid;
-			mmap_ev.tid = pid;
-
-			if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
-				perror("failed to write");
-				exit(-1);
-			}
-		}
-	}
-
-	fclose(fp);
-}
-
-static void synthesize_samples(void)
-{
-	DIR *proc;
-	struct dirent dirent, *next;
-
-	proc = opendir("/proc");
-
-	while (!readdir_r(proc, &dirent, &next) && next) {
-		char *end;
-		pid_t pid;
-
-		pid = strtol(dirent.d_name, &end, 10);
-		if (*end) /* only interested in proper numerical dirents */
-			continue;
-
-		pid_synthesize_comm_event(pid, 1);
-		pid_synthesize_mmap_samples(pid);
-	}
-
-	closedir(proc);
-}
-
-static int group_fd;
-
-static void create_counter(int counter, int cpu, pid_t pid)
-{
-	struct perf_counter_attr *attr = attrs + counter;
-	int track = 1;
-
-	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
-	if (freq) {
-		attr->freq		= 1;
-		attr->sample_freq	= freq;
-	}
-	attr->mmap		= track;
-	attr->comm		= track;
-	attr->inherit		= (cpu < 0) && inherit;
-
-	track = 0; /* only the first counter needs these */
-
-	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
-
-	if (fd[nr_cpu][counter] < 0) {
-		int err = errno;
-
-		error("syscall returned with %d (%s)\n",
-				fd[nr_cpu][counter], strerror(err));
-		if (err == EPERM)
-			printf("Are you root?\n");
-		exit(-1);
-	}
-	assert(fd[nr_cpu][counter] >= 0);
-	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
-
-	/*
-	 * First counter acts as the group leader:
-	 */
-	if (group && group_fd == -1)
-		group_fd = fd[nr_cpu][counter];
-
-	event_array[nr_poll].fd = fd[nr_cpu][counter];
-	event_array[nr_poll].events = POLLIN;
-	nr_poll++;
-
-	mmap_array[nr_cpu][counter].counter = counter;
-	mmap_array[nr_cpu][counter].prev = 0;
-	mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
-	mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-			PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
-	if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
-		error("failed to mmap with %d (%s)\n", errno, strerror(errno));
-		exit(-1);
-	}
-}
-
-static void open_counters(int cpu, pid_t pid)
-{
-	int counter;
-
-	if (pid > 0) {
-		pid_synthesize_comm_event(pid, 0);
-		pid_synthesize_mmap_samples(pid);
-	}
-
-	group_fd = -1;
-	for (counter = 0; counter < nr_counters; counter++)
-		create_counter(counter, cpu, pid);
-
-	nr_cpu++;
-}
-
-static int __cmd_record(int argc, const char **argv)
-{
-	int i, counter;
-	struct stat st;
-	pid_t pid;
-	int flags;
-	int ret;
-
-	page_size = sysconf(_SC_PAGE_SIZE);
-	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	assert(nr_cpus <= MAX_NR_CPUS);
-	assert(nr_cpus >= 0);
-
-	if (!stat(output_name, &st) && !force && !append_file) {
-		fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
-				output_name);
-		exit(-1);
-	}
-
-	flags = O_CREAT|O_RDWR;
-	if (append_file)
-		flags |= O_APPEND;
-	else
-		flags |= O_TRUNC;
-
-	output = open(output_name, flags, S_IRUSR|S_IWUSR);
-	if (output < 0) {
-		perror("failed to create output file");
-		exit(-1);
-	}
-
-	if (!system_wide) {
-		open_counters(-1, target_pid != -1 ? target_pid : getpid());
-	} else for (i = 0; i < nr_cpus; i++)
-		open_counters(i, target_pid);
-
-	signal(SIGCHLD, sig_handler);
-	signal(SIGINT, sig_handler);
-
-	if (target_pid == -1 && argc) {
-		pid = fork();
-		if (pid < 0)
-			perror("failed to fork");
-
-		if (!pid) {
-			if (execvp(argv[0], (char **)argv)) {
-				perror(argv[0]);
-				exit(-1);
-			}
-		}
-	}
-
-	if (realtime_prio) {
-		struct sched_param param;
-
-		param.sched_priority = realtime_prio;
-		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-			printf("Could not set realtime priority.\n");
-			exit(-1);
-		}
-	}
-
-	if (system_wide)
-		synthesize_samples();
-
-	while (!done) {
-		int hits = samples;
-
-		for (i = 0; i < nr_cpu; i++) {
-			for (counter = 0; counter < nr_counters; counter++)
-				mmap_read(&mmap_array[i][counter]);
-		}
-
-		if (hits == samples)
-			ret = poll(event_array, nr_poll, 100);
-	}
-
-	/*
-	 * Approximate RIP event size: 24 bytes.
-	 */
-	fprintf(stderr,
-		"[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
-		(double)bytes_written / 1024.0 / 1024.0,
-		output_name,
-		bytes_written / 24);
-
-	return 0;
-}
-
-static const char * const record_usage[] = {
-	"perf record [<options>] [<command>]",
-	"perf record [<options>] -- <command> [<options>]",
-	NULL
-};
-
-static const struct option options[] = {
-	OPT_CALLBACK('e', "event", NULL, "event",
-		     "event selector. use 'perf list' to list available events",
-		     parse_events),
-	OPT_INTEGER('p', "pid", &target_pid,
-		    "record events on existing pid"),
-	OPT_INTEGER('r', "realtime", &realtime_prio,
-		    "collect data with this RT SCHED_FIFO priority"),
-	OPT_BOOLEAN('a', "all-cpus", &system_wide,
-			    "system-wide collection from all CPUs"),
-	OPT_BOOLEAN('A', "append", &append_file,
-			    "append to the output file to do incremental profiling"),
-	OPT_BOOLEAN('f', "force", &force,
-			"overwrite existing data file"),
-	OPT_LONG('c', "count", &default_interval,
-		    "event period to sample"),
-	OPT_STRING('o', "output", &output_name, "file",
-		    "output file name"),
-	OPT_BOOLEAN('i', "inherit", &inherit,
-		    "child tasks inherit counters"),
-	OPT_INTEGER('F', "freq", &freq,
-		    "profile at this frequency"),
-	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
-		    "number of mmap data pages"),
-	OPT_END()
-};
-
-int cmd_record(int argc, const char **argv, const char *prefix)
-{
-	int counter;
-
-	argc = parse_options(argc, argv, options, record_usage, 0);
-	if (!argc && target_pid == -1 && !system_wide)
-		usage_with_options(record_usage, options);
-
-	if (!nr_counters)
-		nr_counters = 1;
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (attrs[counter].sample_period)
-			continue;
-
-		attrs[counter].sample_period = default_interval;
-	}
-
-	return __cmd_record(argc, argv);
-}
diff --git a/Documentation/perf_counter/builtin-report.c b/Documentation/perf_counter/builtin-report.c
deleted file mode 100644
index 242e09f..0000000
--- a/Documentation/perf_counter/builtin-report.c
+++ /dev/null
@@ -1,1291 +0,0 @@
-/*
- * builtin-report.c
- *
- * Builtin report command: Analyze the perf.data input file,
- * look up and read DSOs and symbol information and display
- * a histogram of results, along various sorting keys.
- */
-#include "builtin.h"
-
-#include "util/util.h"
-
-#include "util/color.h"
-#include "util/list.h"
-#include "util/cache.h"
-#include "util/rbtree.h"
-#include "util/symbol.h"
-#include "util/string.h"
-
-#include "perf.h"
-
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-
-#define SHOW_KERNEL	1
-#define SHOW_USER	2
-#define SHOW_HV		4
-
-static char		const *input_name = "perf.data";
-static char		*vmlinux = NULL;
-
-static char		default_sort_order[] = "comm,dso";
-static char		*sort_order = default_sort_order;
-
-static int		input;
-static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
-
-static int		dump_trace = 0;
-#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
-
-static int		verbose;
-static int		full_paths;
-
-static unsigned long	page_size;
-static unsigned long	mmap_window = 32;
-
-struct ip_event {
-	struct perf_event_header header;
-	__u64 ip;
-	__u32 pid, tid;
-};
-
-struct mmap_event {
-	struct perf_event_header header;
-	__u32 pid, tid;
-	__u64 start;
-	__u64 len;
-	__u64 pgoff;
-	char filename[PATH_MAX];
-};
-
-struct comm_event {
-	struct perf_event_header header;
-	__u32 pid, tid;
-	char comm[16];
-};
-
-struct fork_event {
-	struct perf_event_header header;
-	__u32 pid, ppid;
-};
-
-struct period_event {
-	struct perf_event_header header;
-	__u64 time;
-	__u64 id;
-	__u64 sample_period;
-};
-
-typedef union event_union {
-	struct perf_event_header	header;
-	struct ip_event			ip;
-	struct mmap_event		mmap;
-	struct comm_event		comm;
-	struct fork_event		fork;
-	struct period_event		period;
-} event_t;
-
-static LIST_HEAD(dsos);
-static struct dso *kernel_dso;
-static struct dso *vdso;
-
-static void dsos__add(struct dso *dso)
-{
-	list_add_tail(&dso->node, &dsos);
-}
-
-static struct dso *dsos__find(const char *name)
-{
-	struct dso *pos;
-
-	list_for_each_entry(pos, &dsos, node)
-		if (strcmp(pos->name, name) == 0)
-			return pos;
-	return NULL;
-}
-
-static struct dso *dsos__findnew(const char *name)
-{
-	struct dso *dso = dsos__find(name);
-	int nr;
-
-	if (dso)
-		return dso;
-
-	dso = dso__new(name, 0);
-	if (!dso)
-		goto out_delete_dso;
-
-	nr = dso__load(dso, NULL, verbose);
-	if (nr < 0) {
-		if (verbose)
-			fprintf(stderr, "Failed to open: %s\n", name);
-		goto out_delete_dso;
-	}
-	if (!nr && verbose) {
-		fprintf(stderr,
-		"No symbols found in: %s, maybe install a debug package?\n",
-				name);
-	}
-
-	dsos__add(dso);
-
-	return dso;
-
-out_delete_dso:
-	dso__delete(dso);
-	return NULL;
-}
-
-static void dsos__fprintf(FILE *fp)
-{
-	struct dso *pos;
-
-	list_for_each_entry(pos, &dsos, node)
-		dso__fprintf(pos, fp);
-}
-
-static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
-{
-	return dso__find_symbol(kernel_dso, ip);
-}
-
-static int load_kernel(void)
-{
-	int err;
-
-	kernel_dso = dso__new("[kernel]", 0);
-	if (!kernel_dso)
-		return -1;
-
-	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
-	if (err) {
-		dso__delete(kernel_dso);
-		kernel_dso = NULL;
-	} else
-		dsos__add(kernel_dso);
-
-	vdso = dso__new("[vdso]", 0);
-	if (!vdso)
-		return -1;
-
-	vdso->find_symbol = vdso__find_symbol;
-
-	dsos__add(vdso);
-
-	return err;
-}
-
-static char __cwd[PATH_MAX];
-static char *cwd = __cwd;
-static int cwdlen;
-
-static int strcommon(const char *pathname)
-{
-	int n = 0;
-
-	while (pathname[n] == cwd[n] && n < cwdlen)
-		++n;
-
-	return n;
-}
-
-struct map {
-	struct list_head node;
-	uint64_t	 start;
-	uint64_t	 end;
-	uint64_t	 pgoff;
-	uint64_t	 (*map_ip)(struct map *, uint64_t);
-	struct dso	 *dso;
-};
-
-static uint64_t map__map_ip(struct map *map, uint64_t ip)
-{
-	return ip - map->start + map->pgoff;
-}
-
-static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
-{
-	return ip;
-}
-
-static struct map *map__new(struct mmap_event *event)
-{
-	struct map *self = malloc(sizeof(*self));
-
-	if (self != NULL) {
-		const char *filename = event->filename;
-		char newfilename[PATH_MAX];
-
-		if (cwd) {
-			int n = strcommon(filename);
-
-			if (n == cwdlen) {
-				snprintf(newfilename, sizeof(newfilename),
-					 ".%s", filename + n);
-				filename = newfilename;
-			}
-		}
-
-		self->start = event->start;
-		self->end   = event->start + event->len;
-		self->pgoff = event->pgoff;
-
-		self->dso = dsos__findnew(filename);
-		if (self->dso == NULL)
-			goto out_delete;
-
-		if (self->dso == vdso)
-			self->map_ip = vdso__map_ip;
-		else
-			self->map_ip = map__map_ip;
-	}
-	return self;
-out_delete:
-	free(self);
-	return NULL;
-}
-
-static struct map *map__clone(struct map *self)
-{
-	struct map *map = malloc(sizeof(*self));
-
-	if (!map)
-		return NULL;
-
-	memcpy(map, self, sizeof(*self));
-
-	return map;
-}
-
-static int map__overlap(struct map *l, struct map *r)
-{
-	if (l->start > r->start) {
-		struct map *t = l;
-		l = r;
-		r = t;
-	}
-
-	if (l->end > r->start)
-		return 1;
-
-	return 0;
-}
-
-static size_t map__fprintf(struct map *self, FILE *fp)
-{
-	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
-		       self->start, self->end, self->pgoff, self->dso->name);
-}
-
-
-struct thread {
-	struct rb_node	 rb_node;
-	struct list_head maps;
-	pid_t		 pid;
-	char		 *comm;
-};
-
-static struct thread *thread__new(pid_t pid)
-{
-	struct thread *self = malloc(sizeof(*self));
-
-	if (self != NULL) {
-		self->pid = pid;
-		self->comm = malloc(32);
-		if (self->comm)
-			snprintf(self->comm, 32, ":%d", self->pid);
-		INIT_LIST_HEAD(&self->maps);
-	}
-
-	return self;
-}
-
-static int thread__set_comm(struct thread *self, const char *comm)
-{
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(comm);
-	return self->comm ? 0 : -ENOMEM;
-}
-
-static size_t thread__fprintf(struct thread *self, FILE *fp)
-{
-	struct map *pos;
-	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
-
-	list_for_each_entry(pos, &self->maps, node)
-		ret += map__fprintf(pos, fp);
-
-	return ret;
-}
-
-
-static struct rb_root threads;
-static struct thread *last_match;
-
-static struct thread *threads__findnew(pid_t pid)
-{
-	struct rb_node **p = &threads.rb_node;
-	struct rb_node *parent = NULL;
-	struct thread *th;
-
-	/*
-	 * Font-end cache - PID lookups come in blocks,
-	 * so most of the time we dont have to look up
-	 * the full rbtree:
-	 */
-	if (last_match && last_match->pid == pid)
-		return last_match;
-
-	while (*p != NULL) {
-		parent = *p;
-		th = rb_entry(parent, struct thread, rb_node);
-
-		if (th->pid == pid) {
-			last_match = th;
-			return th;
-		}
-
-		if (pid < th->pid)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	th = thread__new(pid);
-	if (th != NULL) {
-		rb_link_node(&th->rb_node, parent, p);
-		rb_insert_color(&th->rb_node, &threads);
-		last_match = th;
-	}
-
-	return th;
-}
-
-static void thread__insert_map(struct thread *self, struct map *map)
-{
-	struct map *pos, *tmp;
-
-	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
-		if (map__overlap(pos, map)) {
-			list_del_init(&pos->node);
-			/* XXX leaks dsos */
-			free(pos);
-		}
-	}
-
-	list_add_tail(&map->node, &self->maps);
-}
-
-static int thread__fork(struct thread *self, struct thread *parent)
-{
-	struct map *map;
-
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(parent->comm);
-	if (!self->comm)
-		return -ENOMEM;
-
-	list_for_each_entry(map, &parent->maps, node) {
-		struct map *new = map__clone(map);
-		if (!new)
-			return -ENOMEM;
-		thread__insert_map(self, new);
-	}
-
-	return 0;
-}
-
-static struct map *thread__find_map(struct thread *self, uint64_t ip)
-{
-	struct map *pos;
-
-	if (self == NULL)
-		return NULL;
-
-	list_for_each_entry(pos, &self->maps, node)
-		if (ip >= pos->start && ip <= pos->end)
-			return pos;
-
-	return NULL;
-}
-
-static size_t threads__fprintf(FILE *fp)
-{
-	size_t ret = 0;
-	struct rb_node *nd;
-
-	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
-		struct thread *pos = rb_entry(nd, struct thread, rb_node);
-
-		ret += thread__fprintf(pos, fp);
-	}
-
-	return ret;
-}
-
-/*
- * histogram, sorted on item, collects counts
- */
-
-static struct rb_root hist;
-
-struct hist_entry {
-	struct rb_node	 rb_node;
-
-	struct thread	 *thread;
-	struct map	 *map;
-	struct dso	 *dso;
-	struct symbol	 *sym;
-	uint64_t	 ip;
-	char		 level;
-
-	uint32_t	 count;
-};
-
-/*
- * configurable sorting bits
- */
-
-struct sort_entry {
-	struct list_head list;
-
-	char *header;
-
-	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
-	size_t	(*print)(FILE *fp, struct hist_entry *);
-};
-
-/* --sort pid */
-
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static size_t
-sort__thread_print(FILE *fp, struct hist_entry *self)
-{
-	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
-}
-
-static struct sort_entry sort_thread = {
-	.header = "         Command:  Pid",
-	.cmp	= sort__thread_cmp,
-	.print	= sort__thread_print,
-};
-
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
-
-	if (!comm_l || !comm_r) {
-		if (!comm_l && !comm_r)
-			return 0;
-		else if (!comm_l)
-			return -1;
-		else
-			return 1;
-	}
-
-	return strcmp(comm_l, comm_r);
-}
-
-static size_t
-sort__comm_print(FILE *fp, struct hist_entry *self)
-{
-	return fprintf(fp, "%16s", self->thread->comm);
-}
-
-static struct sort_entry sort_comm = {
-	.header		= "         Command",
-	.cmp		= sort__comm_cmp,
-	.collapse	= sort__comm_collapse,
-	.print		= sort__comm_print,
-};
-
-/* --sort dso */
-
-static int64_t
-sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct dso *dso_l = left->dso;
-	struct dso *dso_r = right->dso;
-
-	if (!dso_l || !dso_r) {
-		if (!dso_l && !dso_r)
-			return 0;
-		else if (!dso_l)
-			return -1;
-		else
-			return 1;
-	}
-
-	return strcmp(dso_l->name, dso_r->name);
-}
-
-static size_t
-sort__dso_print(FILE *fp, struct hist_entry *self)
-{
-	if (self->dso)
-		return fprintf(fp, "%-25s", self->dso->name);
-
-	return fprintf(fp, "%016llx         ", (__u64)self->ip);
-}
-
-static struct sort_entry sort_dso = {
-	.header = "Shared Object            ",
-	.cmp	= sort__dso_cmp,
-	.print	= sort__dso_print,
-};
-
-/* --sort symbol */
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	uint64_t ip_l, ip_r;
-
-	if (left->sym == right->sym)
-		return 0;
-
-	ip_l = left->sym ? left->sym->start : left->ip;
-	ip_r = right->sym ? right->sym->start : right->ip;
-
-	return (int64_t)(ip_r - ip_l);
-}
-
-static size_t
-sort__sym_print(FILE *fp, struct hist_entry *self)
-{
-	size_t ret = 0;
-
-	if (verbose)
-		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
-
-	if (self->sym) {
-		ret += fprintf(fp, "[%c] %s",
-			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
-	} else {
-		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
-	}
-
-	return ret;
-}
-
-static struct sort_entry sort_sym = {
-	.header = "Symbol",
-	.cmp	= sort__sym_cmp,
-	.print	= sort__sym_print,
-};
-
-static int sort__need_collapse = 0;
-
-struct sort_dimension {
-	char			*name;
-	struct sort_entry	*entry;
-	int			taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-	{ .name = "pid",	.entry = &sort_thread,	},
-	{ .name = "comm",	.entry = &sort_comm,	},
-	{ .name = "dso",	.entry = &sort_dso,	},
-	{ .name = "symbol",	.entry = &sort_sym,	},
-};
-
-static LIST_HEAD(hist_entry__sort_list);
-
-static int sort_dimension__add(char *tok)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
-		struct sort_dimension *sd = &sort_dimensions[i];
-
-		if (sd->taken)
-			continue;
-
-		if (strncasecmp(tok, sd->name, strlen(tok)))
-			continue;
-
-		if (sd->entry->collapse)
-			sort__need_collapse = 1;
-
-		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-		sd->taken = 1;
-
-		return 0;
-	}
-
-	return -ESRCH;
-}
-
-static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		cmp = se->cmp(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-static int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	struct sort_entry *se;
-	int64_t cmp = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		int64_t (*f)(struct hist_entry *, struct hist_entry *);
-
-		f = se->collapse ?: se->cmp;
-
-		cmp = f(left, right);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
-static size_t
-hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
-{
-	struct sort_entry *se;
-	size_t ret;
-
-	if (total_samples) {
-		double percent = self->count * 100.0 / total_samples;
-		char *color = PERF_COLOR_NORMAL;
-
-		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
-		 */
-		if (percent >= 5.0)
-			color = PERF_COLOR_RED;
-		if (percent < 0.5)
-			color = PERF_COLOR_GREEN;
-
-		ret = color_fprintf(fp, color, "   %6.2f%%",
-				(self->count * 100.0) / total_samples);
-	} else
-		ret = fprintf(fp, "%12d ", self->count);
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		fprintf(fp, "  ");
-		ret += se->print(fp, self);
-	}
-
-	ret += fprintf(fp, "\n");
-
-	return ret;
-}
-
-/*
- * collect histogram counts
- */
-
-static int
-hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, uint64_t ip, char level)
-{
-	struct rb_node **p = &hist.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *he;
-	struct hist_entry entry = {
-		.thread	= thread,
-		.map	= map,
-		.dso	= dso,
-		.sym	= sym,
-		.ip	= ip,
-		.level	= level,
-		.count	= 1,
-	};
-	int cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		he = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__cmp(&entry, he);
-
-		if (!cmp) {
-			he->count++;
-			return 0;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	he = malloc(sizeof(*he));
-	if (!he)
-		return -ENOMEM;
-	*he = entry;
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &hist);
-
-	return 0;
-}
-
-static void hist_entry__free(struct hist_entry *he)
-{
-	free(he);
-}
-
-/*
- * collapse the histogram
- */
-
-static struct rb_root collapse_hists;
-
-static void collapse__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &collapse_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-	int64_t cmp;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = hist_entry__collapse(iter, he);
-
-		if (!cmp) {
-			iter->count += he->count;
-			hist_entry__free(he);
-			return;
-		}
-
-		if (cmp < 0)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &collapse_hists);
-}
-
-static void collapse__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-
-	if (!sort__need_collapse)
-		return;
-
-	next = rb_first(&hist);
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, &hist);
-		collapse__insert_entry(n);
-	}
-}
-
-/*
- * reverse the map, sort on count.
- */
-
-static struct rb_root output_hists;
-
-static void output__insert_entry(struct hist_entry *he)
-{
-	struct rb_node **p = &output_hists.rb_node;
-	struct rb_node *parent = NULL;
-	struct hist_entry *iter;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		if (he->count > iter->count)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&he->rb_node, parent, p);
-	rb_insert_color(&he->rb_node, &output_hists);
-}
-
-static void output__resort(void)
-{
-	struct rb_node *next;
-	struct hist_entry *n;
-	struct rb_root *tree = &hist;
-
-	if (sort__need_collapse)
-		tree = &collapse_hists;
-
-	next = rb_first(tree);
-
-	while (next) {
-		n = rb_entry(next, struct hist_entry, rb_node);
-		next = rb_next(&n->rb_node);
-
-		rb_erase(&n->rb_node, tree);
-		output__insert_entry(n);
-	}
-}
-
-static size_t output__fprintf(FILE *fp, uint64_t total_samples)
-{
-	struct hist_entry *pos;
-	struct sort_entry *se;
-	struct rb_node *nd;
-	size_t ret = 0;
-
-	fprintf(fp, "\n");
-	fprintf(fp, "#\n");
-	fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
-	fprintf(fp, "#\n");
-
-	fprintf(fp, "# Overhead");
-	list_for_each_entry(se, &hist_entry__sort_list, list)
-		fprintf(fp, "  %s", se->header);
-	fprintf(fp, "\n");
-
-	fprintf(fp, "# ........");
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		int i;
-
-		fprintf(fp, "  ");
-		for (i = 0; i < strlen(se->header); i++)
-			fprintf(fp, ".");
-	}
-	fprintf(fp, "\n");
-
-	fprintf(fp, "#\n");
-
-	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
-		pos = rb_entry(nd, struct hist_entry, rb_node);
-		ret += hist_entry__fprintf(fp, pos, total_samples);
-	}
-
-	if (!strcmp(sort_order, default_sort_order)) {
-		fprintf(fp, "#\n");
-		fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
-		fprintf(fp, "#\n");
-	}
-	fprintf(fp, "\n");
-
-	return ret;
-}
-
-static void register_idle_thread(void)
-{
-	struct thread *thread = threads__findnew(0);
-
-	if (thread == NULL ||
-			thread__set_comm(thread, "[idle]")) {
-		fprintf(stderr, "problem inserting idle task.\n");
-		exit(-1);
-	}
-}
-
-static unsigned long total = 0,
-		     total_mmap = 0,
-		     total_comm = 0,
-		     total_fork = 0,
-		     total_unknown = 0;
-
-static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	char level;
-	int show = 0;
-	struct dso *dso = NULL;
-	struct thread *thread = threads__findnew(event->ip.pid);
-	uint64_t ip = event->ip.ip;
-	struct map *map = NULL;
-
-	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->header.misc,
-		event->ip.pid,
-		(void *)(long)ip);
-
-	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
-	if (thread == NULL) {
-		fprintf(stderr, "problem processing %d event, skipping it.\n",
-			event->header.type);
-		return -1;
-	}
-
-	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
-		show = SHOW_KERNEL;
-		level = 'k';
-
-		dso = kernel_dso;
-
-		dprintf(" ...... dso: %s\n", dso->name);
-
-	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
-
-		show = SHOW_USER;
-		level = '.';
-
-		map = thread__find_map(thread, ip);
-		if (map != NULL) {
-			ip = map->map_ip(map, ip);
-			dso = map->dso;
-		} else {
-			/*
-			 * If this is outside of all known maps,
-			 * and is a negative address, try to look it
-			 * up in the kernel dso, as it might be a
-			 * vsyscall (which executes in user-mode):
-			 */
-			if ((long long)ip < 0)
-				dso = kernel_dso;
-		}
-		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
-
-	} else {
-		show = SHOW_HV;
-		level = 'H';
-		dprintf(" ...... dso: [hypervisor]\n");
-	}
-
-	if (show & show_mask) {
-		struct symbol *sym = NULL;
-
-		if (dso)
-			sym = dso->find_symbol(dso, ip);
-
-		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
-			fprintf(stderr,
-		"problem incrementing symbol count, skipping event\n");
-			return -1;
-		}
-	}
-	total++;
-
-	return 0;
-}
-
-static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread = threads__findnew(event->mmap.pid);
-	struct map *map = map__new(&event->mmap);
-
-	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->mmap.pid,
-		(void *)(long)event->mmap.start,
-		(void *)(long)event->mmap.len,
-		(void *)(long)event->mmap.pgoff,
-		event->mmap.filename);
-
-	if (thread == NULL || map == NULL) {
-		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
-		return 0;
-	}
-
-	thread__insert_map(thread, map);
-	total_mmap++;
-
-	return 0;
-}
-
-static int
-process_comm_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread = threads__findnew(event->comm.pid);
-
-	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->comm.comm, event->comm.pid);
-
-	if (thread == NULL ||
-	    thread__set_comm(thread, event->comm.comm)) {
-		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
-		return -1;
-	}
-	total_comm++;
-
-	return 0;
-}
-
-static int
-process_fork_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	struct thread *thread = threads__findnew(event->fork.pid);
-	struct thread *parent = threads__findnew(event->fork.ppid);
-
-	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->fork.pid, event->fork.ppid);
-
-	if (!thread || !parent || thread__fork(thread, parent)) {
-		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
-		return -1;
-	}
-	total_fork++;
-
-	return 0;
-}
-
-static int
-process_period_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->period.time,
-		event->period.id,
-		event->period.sample_period);
-
-	return 0;
-}
-
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-		return process_overflow_event(event, offset, head);
-
-	switch (event->header.type) {
-	case PERF_EVENT_MMAP:
-		return process_mmap_event(event, offset, head);
-
-	case PERF_EVENT_COMM:
-		return process_comm_event(event, offset, head);
-
-	case PERF_EVENT_FORK:
-		return process_fork_event(event, offset, head);
-
-	case PERF_EVENT_PERIOD:
-		return process_period_event(event, offset, head);
-	/*
-	 * We dont process them right now but they are fine:
-	 */
-
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
-		return 0;
-
-	default:
-		return -1;
-	}
-
-	return 0;
-}
-
-static int __cmd_report(void)
-{
-	int ret, rc = EXIT_FAILURE;
-	unsigned long offset = 0;
-	unsigned long head = 0;
-	struct stat stat;
-	event_t *event;
-	uint32_t size;
-	char *buf;
-
-	register_idle_thread();
-
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		perror("failed to open file");
-		exit(-1);
-	}
-
-	ret = fstat(input, &stat);
-	if (ret < 0) {
-		perror("failed to stat file");
-		exit(-1);
-	}
-
-	if (!stat.st_size) {
-		fprintf(stderr, "zero-sized file, nothing to do!\n");
-		exit(0);
-	}
-
-	if (load_kernel() < 0) {
-		perror("failed to load kernel symbols");
-		return EXIT_FAILURE;
-	}
-
-	if (!full_paths) {
-		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
-			perror("failed to get the current directory");
-			return EXIT_FAILURE;
-		}
-		cwdlen = strlen(cwd);
-	} else {
-		cwd = NULL;
-		cwdlen = 0;
-	}
-remap:
-	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-			   MAP_SHARED, input, offset);
-	if (buf == MAP_FAILED) {
-		perror("failed to mmap file");
-		exit(-1);
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
-		int ret;
-
-		ret = munmap(buf, page_size * mmap_window);
-		assert(ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	dprintf("%p [%p]: event: %d\n",
-			(void *)(offset + head),
-			(void *)(long)event->header.size,
-			event->header.type);
-
-	if (!size || process_event(event, offset, head) < 0) {
-
-		dprintf("%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
-
-		total_unknown++;
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head < stat.st_size)
-		goto more;
-
-	rc = EXIT_SUCCESS;
-	close(input);
-
-	dprintf("      IP events: %10ld\n", total);
-	dprintf("    mmap events: %10ld\n", total_mmap);
-	dprintf("    comm events: %10ld\n", total_comm);
-	dprintf("    fork events: %10ld\n", total_fork);
-	dprintf(" unknown events: %10ld\n", total_unknown);
-
-	if (dump_trace)
-		return 0;
-
-	if (verbose >= 3)
-		threads__fprintf(stdout);
-
-	if (verbose >= 2)
-		dsos__fprintf(stdout);
-
-	collapse__resort();
-	output__resort();
-	output__fprintf(stdout, total);
-
-	return rc;
-}
-
-static const char * const report_usage[] = {
-	"perf report [<options>] <command>",
-	NULL
-};
-
-static const struct option options[] = {
-	OPT_STRING('i', "input", &input_name, "file",
-		    "input file name"),
-	OPT_BOOLEAN('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
-	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
-	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
-	OPT_BOOLEAN('P', "full-paths", &full_paths,
-		    "Don't shorten the pathnames taking into account the cwd"),
-	OPT_END()
-};
-
-static void setup_sorting(void)
-{
-	char *tmp, *tok, *str = strdup(sort_order);
-
-	for (tok = strtok_r(str, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		if (sort_dimension__add(tok) < 0) {
-			error("Unknown --sort key: `%s'", tok);
-			usage_with_options(report_usage, options);
-		}
-	}
-
-	free(str);
-}
-
-int cmd_report(int argc, const char **argv, const char *prefix)
-{
-	symbol__init();
-
-	page_size = getpagesize();
-
-	argc = parse_options(argc, argv, options, report_usage, 0);
-
-	setup_sorting();
-
-	/*
-	 * Any (unrecognized) arguments left?
-	 */
-	if (argc)
-		usage_with_options(report_usage, options);
-
-	setup_pager();
-
-	return __cmd_report();
-}
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
deleted file mode 100644
index 2cbf5a1..0000000
--- a/Documentation/perf_counter/builtin-stat.c
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * builtin-stat.c
- *
- * Builtin stat command: Give a precise performance counters summary
- * overview about any workload, CPU or specific PID.
- *
- * Sample output:
-
-   $ perf stat ~/hackbench 10
-   Time: 0.104
-
-    Performance counter stats for '/home/mingo/hackbench':
-
-       1255.538611  task clock ticks     #      10.143 CPU utilization factor
-             54011  context switches     #       0.043 M/sec
-               385  CPU migrations       #       0.000 M/sec
-             17755  pagefaults           #       0.014 M/sec
-        3808323185  CPU cycles           #    3033.219 M/sec
-        1575111190  instructions         #    1254.530 M/sec
-          17367895  cache references     #      13.833 M/sec
-           7674421  cache misses         #       6.112 M/sec
-
-    Wall-clock time elapsed:   123.786620 msecs
-
- *
- * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
- *
- * Improvements and fixes by:
- *
- *   Arjan van de Ven <arjan@linux.intel.com>
- *   Yanmin Zhang <yanmin.zhang@intel.com>
- *   Wu Fengguang <fengguang.wu@intel.com>
- *   Mike Galbraith <efault@gmx.de>
- *   Paul Mackerras <paulus@samba.org>
- *
- * Released under the GPL v2. (and only v2, not any later version)
- */
-
-#include "perf.h"
-#include "builtin.h"
-#include "util/util.h"
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-
-#include <sys/prctl.h>
-
-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
-
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK		},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS	},
-
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES	},
-};
-
-static int			system_wide			=  0;
-static int			inherit				=  1;
-
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int			target_pid			= -1;
-static int			nr_cpus				=  0;
-static unsigned int		page_size;
-
-static int			scale				=  1;
-
-static const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
-
-static __u64			event_res[MAX_COUNTERS][3];
-static __u64			event_scaled[MAX_COUNTERS];
-
-static __u64			runtime_nsecs;
-static __u64			walltime_nsecs;
-
-static void create_perfstat_counter(int counter)
-{
-	struct perf_counter_attr *attr = attrs + counter;
-
-	if (scale)
-		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
-				    PERF_FORMAT_TOTAL_TIME_RUNNING;
-
-	if (system_wide) {
-		int cpu;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
-			if (fd[cpu][counter] < 0) {
-				printf("perfstat error: syscall returned with %d (%s)\n",
-						fd[cpu][counter], strerror(errno));
-				exit(-1);
-			}
-		}
-	} else {
-		attr->inherit	= inherit;
-		attr->disabled	= 1;
-
-		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
-		if (fd[0][counter] < 0) {
-			printf("perfstat error: syscall returned with %d (%s)\n",
-					fd[0][counter], strerror(errno));
-			exit(-1);
-		}
-	}
-}
-
-/*
- * Does the counter have nsecs as a unit?
- */
-static inline int nsec_counter(int counter)
-{
-	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
-		return 0;
-
-	if (attrs[counter].config == PERF_COUNT_CPU_CLOCK)
-		return 1;
-
-	if (attrs[counter].config == PERF_COUNT_TASK_CLOCK)
-		return 1;
-
-	return 0;
-}
-
-/*
- * Read out the results of a single counter:
- */
-static void read_counter(int counter)
-{
-	__u64 *count, single_count[3];
-	ssize_t res;
-	int cpu, nv;
-	int scaled;
-
-	count = event_res[counter];
-
-	count[0] = count[1] = count[2] = 0;
-
-	nv = scale ? 3 : 1;
-	for (cpu = 0; cpu < nr_cpus; cpu ++) {
-		res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
-		assert(res == nv * sizeof(__u64));
-
-		count[0] += single_count[0];
-		if (scale) {
-			count[1] += single_count[1];
-			count[2] += single_count[2];
-		}
-	}
-
-	scaled = 0;
-	if (scale) {
-		if (count[2] == 0) {
-			event_scaled[counter] = -1;
-			count[0] = 0;
-			return;
-		}
-
-		if (count[2] < count[1]) {
-			event_scaled[counter] = 1;
-			count[0] = (unsigned long long)
-				((double)count[0] * count[1] / count[2] + 0.5);
-		}
-	}
-	/*
-	 * Save the full runtime - to allow normalization during printout:
-	 */
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
-		runtime_nsecs = count[0];
-}
-
-/*
- * Print out the results of a single counter:
- */
-static void print_counter(int counter)
-{
-	__u64 *count;
-	int scaled;
-
-	count = event_res[counter];
-	scaled = event_scaled[counter];
-
-	if (scaled == -1) {
-		fprintf(stderr, " %14s  %-20s\n",
-			"<not counted>", event_name(counter));
-		return;
-	}
-
-	if (nsec_counter(counter)) {
-		double msecs = (double)count[0] / 1000000;
-
-		fprintf(stderr, " %14.6f  %-20s",
-			msecs, event_name(counter));
-		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
-
-			fprintf(stderr, " # %11.3f CPU utilization factor",
-				(double)count[0] / (double)walltime_nsecs);
-		}
-	} else {
-		fprintf(stderr, " %14Ld  %-20s",
-			count[0], event_name(counter));
-		if (runtime_nsecs)
-			fprintf(stderr, " # %11.3f M/sec",
-				(double)count[0]/runtime_nsecs*1000.0);
-	}
-	if (scaled)
-		fprintf(stderr, "  (scaled from %.2f%%)",
-			(double) count[2] / count[1] * 100);
-	fprintf(stderr, "\n");
-}
-
-static int do_perfstat(int argc, const char **argv)
-{
-	unsigned long long t0, t1;
-	int counter;
-	int status;
-	int pid;
-	int i;
-
-	if (!system_wide)
-		nr_cpus = 1;
-
-	for (counter = 0; counter < nr_counters; counter++)
-		create_perfstat_counter(counter);
-
-	/*
-	 * Enable counters and exec the command:
-	 */
-	t0 = rdclock();
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
-
-	if ((pid = fork()) < 0)
-		perror("failed to fork");
-
-	if (!pid) {
-		if (execvp(argv[0], (char **)argv)) {
-			perror(argv[0]);
-			exit(-1);
-		}
-	}
-
-	while (wait(&status) >= 0)
-		;
-
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
-	t1 = rdclock();
-
-	walltime_nsecs = t1 - t0;
-
-	fflush(stdout);
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
-
-	for (i = 1; i < argc; i++)
-		fprintf(stderr, " %s", argv[i]);
-
-	fprintf(stderr, "\':\n");
-	fprintf(stderr, "\n");
-
-	for (counter = 0; counter < nr_counters; counter++)
-		read_counter(counter);
-
-	for (counter = 0; counter < nr_counters; counter++)
-		print_counter(counter);
-
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
-			(double)(t1-t0)/1e6);
-	fprintf(stderr, "\n");
-
-	return 0;
-}
-
-static void skip_signal(int signo)
-{
-}
-
-static const char * const stat_usage[] = {
-	"perf stat [<options>] <command>",
-	NULL
-};
-
-static const struct option options[] = {
-	OPT_CALLBACK('e', "event", NULL, "event",
-		     "event selector. use 'perf list' to list available events",
-		     parse_events),
-	OPT_BOOLEAN('i', "inherit", &inherit,
-		    "child tasks inherit counters"),
-	OPT_INTEGER('p', "pid", &target_pid,
-		    "stat events on existing pid"),
-	OPT_BOOLEAN('a', "all-cpus", &system_wide,
-			    "system-wide collection from all CPUs"),
-	OPT_BOOLEAN('S', "scale", &scale,
-			    "scale/normalize counters"),
-	OPT_END()
-};
-
-int cmd_stat(int argc, const char **argv, const char *prefix)
-{
-	page_size = sysconf(_SC_PAGE_SIZE);
-
-	memcpy(attrs, default_attrs, sizeof(attrs));
-
-	argc = parse_options(argc, argv, options, stat_usage, 0);
-	if (!argc)
-		usage_with_options(stat_usage, options);
-
-	if (!nr_counters)
-		nr_counters = 8;
-
-	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	assert(nr_cpus <= MAX_NR_CPUS);
-	assert(nr_cpus >= 0);
-
-	/*
-	 * We dont want to block the signals - that would cause
-	 * child tasks to inherit that and Ctrl-C would not work.
-	 * What we want is for Ctrl-C to work in the exec()-ed
-	 * task, but being ignored by perf stat itself:
-	 */
-	signal(SIGINT,  skip_signal);
-	signal(SIGALRM, skip_signal);
-	signal(SIGABRT, skip_signal);
-
-	return do_perfstat(argc, argv);
-}
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
deleted file mode 100644
index f2e7312..0000000
--- a/Documentation/perf_counter/builtin-top.c
+++ /dev/null
@@ -1,692 +0,0 @@
-/*
- * builtin-top.c
- *
- * Builtin top command: Display a continuously updated profile of
- * any workload, CPU or specific PID.
- *
- * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
- *
- * Improvements and fixes by:
- *
- *   Arjan van de Ven <arjan@linux.intel.com>
- *   Yanmin Zhang <yanmin.zhang@intel.com>
- *   Wu Fengguang <fengguang.wu@intel.com>
- *   Mike Galbraith <efault@gmx.de>
- *   Paul Mackerras <paulus@samba.org>
- *
- * Released under the GPL v2. (and only v2, not any later version)
- */
-#include "builtin.h"
-
-#include "perf.h"
-
-#include "util/symbol.h"
-#include "util/color.h"
-#include "util/util.h"
-#include "util/rbtree.h"
-#include "util/parse-options.h"
-#include "util/parse-events.h"
-
-#include <assert.h>
-#include <fcntl.h>
-
-#include <stdio.h>
-
-#include <errno.h>
-#include <time.h>
-#include <sched.h>
-#include <pthread.h>
-
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/prctl.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <sys/mman.h>
-
-#include <linux/unistd.h>
-#include <linux/types.h>
-
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int			system_wide			=  0;
-
-static int			default_interval		= 100000;
-
-static __u64			count_filter			=  5;
-static int			print_entries			= 15;
-
-static int			target_pid			= -1;
-static int			profile_cpu			= -1;
-static int			nr_cpus				=  0;
-static unsigned int		realtime_prio			=  0;
-static int			group				=  0;
-static unsigned int		page_size;
-static unsigned int		mmap_pages			= 16;
-static int			freq				=  0;
-
-static char			*sym_filter;
-static unsigned long		filter_start;
-static unsigned long		filter_end;
-
-static int			delay_secs			=  2;
-static int			zero;
-static int			dump_symtab;
-
-/*
- * Symbols
- */
-
-static uint64_t			min_ip;
-static uint64_t			max_ip = -1ll;
-
-struct sym_entry {
-	struct rb_node		rb_node;
-	struct list_head	node;
-	unsigned long		count[MAX_COUNTERS];
-	unsigned long		snap_count;
-	double			weight;
-	int			skip;
-};
-
-struct sym_entry		*sym_filter_entry;
-
-struct dso			*kernel_dso;
-
-/*
- * Symbols will be added here in record_ip and will get out
- * after decayed.
- */
-static LIST_HEAD(active_symbols);
-static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/*
- * Ordering weight: count-1 * count-2 * ... / count-n
- */
-static double sym_weight(const struct sym_entry *sym)
-{
-	double weight = sym->snap_count;
-	int counter;
-
-	for (counter = 1; counter < nr_counters-1; counter++)
-		weight *= sym->count[counter];
-
-	weight /= (sym->count[counter] + 1);
-
-	return weight;
-}
-
-static long			samples;
-static long			userspace_samples;
-static const char		CONSOLE_CLEAR[] = "[H[2J";
-
-static void __list_insert_active_sym(struct sym_entry *syme)
-{
-	list_add(&syme->node, &active_symbols);
-}
-
-static void list_remove_active_sym(struct sym_entry *syme)
-{
-	pthread_mutex_lock(&active_symbols_lock);
-	list_del_init(&syme->node);
-	pthread_mutex_unlock(&active_symbols_lock);
-}
-
-static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
-{
-	struct rb_node **p = &tree->rb_node;
-	struct rb_node *parent = NULL;
-	struct sym_entry *iter;
-
-	while (*p != NULL) {
-		parent = *p;
-		iter = rb_entry(parent, struct sym_entry, rb_node);
-
-		if (se->weight > iter->weight)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&se->rb_node, parent, p);
-	rb_insert_color(&se->rb_node, tree);
-}
-
-static void print_sym_table(void)
-{
-	int printed = 0, j;
-	int counter;
-	float samples_per_sec = samples/delay_secs;
-	float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
-	float sum_ksamples = 0.0;
-	struct sym_entry *syme, *n;
-	struct rb_root tmp = RB_ROOT;
-	struct rb_node *nd;
-
-	samples = userspace_samples = 0;
-
-	/* Sort the active symbols */
-	pthread_mutex_lock(&active_symbols_lock);
-	syme = list_entry(active_symbols.next, struct sym_entry, node);
-	pthread_mutex_unlock(&active_symbols_lock);
-
-	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
-		syme->snap_count = syme->count[0];
-		if (syme->snap_count != 0) {
-			syme->weight = sym_weight(syme);
-			rb_insert_active_sym(&tmp, syme);
-			sum_ksamples += syme->snap_count;
-
-			for (j = 0; j < nr_counters; j++)
-				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
-		} else
-			list_remove_active_sym(syme);
-	}
-
-	puts(CONSOLE_CLEAR);
-
-	printf(
-"------------------------------------------------------------------------------\n");
-	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
-		samples_per_sec,
-		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
-
-	if (nr_counters == 1) {
-		printf("%Ld", attrs[0].sample_period);
-		if (freq)
-			printf("Hz ");
-		else
-			printf(" ");
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (counter)
-			printf("/");
-
-		printf("%s", event_name(counter));
-	}
-
-	printf( "], ");
-
-	if (target_pid != -1)
-		printf(" (target_pid: %d", target_pid);
-	else
-		printf(" (all");
-
-	if (profile_cpu != -1)
-		printf(", cpu: %d)\n", profile_cpu);
-	else {
-		if (target_pid != -1)
-			printf(")\n");
-		else
-			printf(", %d CPUs)\n", nr_cpus);
-	}
-
-	printf("------------------------------------------------------------------------------\n\n");
-
-	if (nr_counters == 1)
-		printf("             samples    pcnt");
-	else
-		printf("  weight     samples    pcnt");
-
-	printf("         RIP          kernel function\n"
-	       	       "  ______     _______   _____   ________________   _______________\n\n"
-	);
-
-	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
-		struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
-		struct symbol *sym = (struct symbol *)(syme + 1);
-		char *color = PERF_COLOR_NORMAL;
-		double pcnt;
-
-		if (++printed > print_entries || syme->snap_count < count_filter)
-			continue;
-
-		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
-					 sum_ksamples));
-
-		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
-		 */
-		if (pcnt >= 5.0)
-			color = PERF_COLOR_RED;
-		if (pcnt < 0.5)
-			color = PERF_COLOR_GREEN;
-
-		if (nr_counters == 1)
-			printf("%20.2f - ", syme->weight);
-		else
-			printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
-
-		color_fprintf(stdout, color, "%4.1f%%", pcnt);
-		printf(" - %016llx : %s\n", sym->start, sym->name);
-	}
-}
-
-static void *display_thread(void *arg)
-{
-	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
-	int delay_msecs = delay_secs * 1000;
-
-	printf("PerfTop refresh period: %d seconds\n", delay_secs);
-
-	do {
-		print_sym_table();
-	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
-
-	printf("key pressed - exiting.\n");
-	exit(0);
-
-	return NULL;
-}
-
-static int symbol_filter(struct dso *self, struct symbol *sym)
-{
-	static int filter_match;
-	struct sym_entry *syme;
-	const char *name = sym->name;
-
-	if (!strcmp(name, "_text") ||
-	    !strcmp(name, "_etext") ||
-	    !strcmp(name, "_sinittext") ||
-	    !strncmp("init_module", name, 11) ||
-	    !strncmp("cleanup_module", name, 14) ||
-	    strstr(name, "_text_start") ||
-	    strstr(name, "_text_end"))
-		return 1;
-
-	syme = dso__sym_priv(self, sym);
-	/* Tag samples to be skipped. */
-	if (!strcmp("default_idle", name) ||
-	    !strcmp("cpu_idle", name) ||
-	    !strcmp("enter_idle", name) ||
-	    !strcmp("exit_idle", name) ||
-	    !strcmp("mwait_idle", name))
-		syme->skip = 1;
-
-	if (filter_match == 1) {
-		filter_end = sym->start;
-		filter_match = -1;
-		if (filter_end - filter_start > 10000) {
-			fprintf(stderr,
-				"hm, too large filter symbol <%s> - skipping.\n",
-				sym_filter);
-			fprintf(stderr, "symbol filter start: %016lx\n",
-				filter_start);
-			fprintf(stderr, "                end: %016lx\n",
-				filter_end);
-			filter_end = filter_start = 0;
-			sym_filter = NULL;
-			sleep(1);
-		}
-	}
-
-	if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
-		filter_match = 1;
-		filter_start = sym->start;
-	}
-
-
-	return 0;
-}
-
-static int parse_symbols(void)
-{
-	struct rb_node *node;
-	struct symbol  *sym;
-
-	kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
-	if (kernel_dso == NULL)
-		return -1;
-
-	if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0)
-		goto out_delete_dso;
-
-	node = rb_first(&kernel_dso->syms);
-	sym = rb_entry(node, struct symbol, rb_node);
-	min_ip = sym->start;
-
-	node = rb_last(&kernel_dso->syms);
-	sym = rb_entry(node, struct symbol, rb_node);
-	max_ip = sym->end;
-
-	if (dump_symtab)
-		dso__fprintf(kernel_dso, stderr);
-
-	return 0;
-
-out_delete_dso:
-	dso__delete(kernel_dso);
-	kernel_dso = NULL;
-	return -1;
-}
-
-#define TRACE_COUNT     3
-
-/*
- * Binary search in the histogram table and record the hit:
- */
-static void record_ip(uint64_t ip, int counter)
-{
-	struct symbol *sym = dso__find_symbol(kernel_dso, ip);
-
-	if (sym != NULL) {
-		struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
-
-		if (!syme->skip) {
-			syme->count[counter]++;
-			pthread_mutex_lock(&active_symbols_lock);
-			if (list_empty(&syme->node) || !syme->node.next)
-				__list_insert_active_sym(syme);
-			pthread_mutex_unlock(&active_symbols_lock);
-			return;
-		}
-	}
-
-	samples--;
-}
-
-static void process_event(uint64_t ip, int counter)
-{
-	samples++;
-
-	if (ip < min_ip || ip > max_ip) {
-		userspace_samples++;
-		return;
-	}
-
-	record_ip(ip, counter);
-}
-
-struct mmap_data {
-	int			counter;
-	void			*base;
-	unsigned int		mask;
-	unsigned int		prev;
-};
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
-	struct perf_counter_mmap_page *pc = md->base;
-	int head;
-
-	head = pc->data_head;
-	rmb();
-
-	return head;
-}
-
-struct timeval last_read, this_read;
-
-static void mmap_read(struct mmap_data *md)
-{
-	unsigned int head = mmap_read_head(md);
-	unsigned int old = md->prev;
-	unsigned char *data = md->base + page_size;
-	int diff;
-
-	gettimeofday(&this_read, NULL);
-
-	/*
-	 * If we're further behind than half the buffer, there's a chance
-	 * the writer will bite our tail and mess up the samples under us.
-	 *
-	 * If we somehow ended up ahead of the head, we got messed up.
-	 *
-	 * In either case, truncate and restart at head.
-	 */
-	diff = head - old;
-	if (diff > md->mask / 2 || diff < 0) {
-		struct timeval iv;
-		unsigned long msecs;
-
-		timersub(&this_read, &last_read, &iv);
-		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
-
-		fprintf(stderr, "WARNING: failed to keep up with mmap data."
-				"  Last read %lu msecs ago.\n", msecs);
-
-		/*
-		 * head points to a known good entry, start there.
-		 */
-		old = head;
-	}
-
-	last_read = this_read;
-
-	for (; old != head;) {
-		struct ip_event {
-			struct perf_event_header header;
-			__u64 ip;
-			__u32 pid, target_pid;
-		};
-		struct mmap_event {
-			struct perf_event_header header;
-			__u32 pid, target_pid;
-			__u64 start;
-			__u64 len;
-			__u64 pgoff;
-			char filename[PATH_MAX];
-		};
-
-		typedef union event_union {
-			struct perf_event_header header;
-			struct ip_event ip;
-			struct mmap_event mmap;
-		} event_t;
-
-		event_t *event = (event_t *)&data[old & md->mask];
-
-		event_t event_copy;
-
-		size_t size = event->header.size;
-
-		/*
-		 * Event straddles the mmap boundary -- header should always
-		 * be inside due to u64 alignment of output.
-		 */
-		if ((old & md->mask) + size != ((old + size) & md->mask)) {
-			unsigned int offset = old;
-			unsigned int len = min(sizeof(*event), size), cpy;
-			void *dst = &event_copy;
-
-			do {
-				cpy = min(md->mask + 1 - (offset & md->mask), len);
-				memcpy(dst, &data[offset & md->mask], cpy);
-				offset += cpy;
-				dst += cpy;
-				len -= cpy;
-			} while (len);
-
-			event = &event_copy;
-		}
-
-		old += size;
-
-		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-			if (event->header.type & PERF_SAMPLE_IP)
-				process_event(event->ip.ip, md->counter);
-		}
-	}
-
-	md->prev = old;
-}
-
-static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int __cmd_top(void)
-{
-	struct perf_counter_attr *attr;
-	pthread_t thread;
-	int i, counter, group_fd, nr_poll = 0;
-	unsigned int cpu;
-	int ret;
-
-	for (i = 0; i < nr_cpus; i++) {
-		group_fd = -1;
-		for (counter = 0; counter < nr_counters; counter++) {
-
-			cpu	= profile_cpu;
-			if (target_pid == -1 && profile_cpu == -1)
-				cpu = i;
-
-			attr = attrs + counter;
-
-			attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-			attr->freq		= freq;
-
-			fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
-			if (fd[i][counter] < 0) {
-				int err = errno;
-
-				error("syscall returned with %d (%s)\n",
-					fd[i][counter], strerror(err));
-				if (err == EPERM)
-					printf("Are you root?\n");
-				exit(-1);
-			}
-			assert(fd[i][counter] >= 0);
-			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
-
-			/*
-			 * First counter acts as the group leader:
-			 */
-			if (group && group_fd == -1)
-				group_fd = fd[i][counter];
-
-			event_array[nr_poll].fd = fd[i][counter];
-			event_array[nr_poll].events = POLLIN;
-			nr_poll++;
-
-			mmap_array[i][counter].counter = counter;
-			mmap_array[i][counter].prev = 0;
-			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
-			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-					PROT_READ, MAP_SHARED, fd[i][counter], 0);
-			if (mmap_array[i][counter].base == MAP_FAILED)
-				die("failed to mmap with %d (%s)\n", errno, strerror(errno));
-		}
-	}
-
-	if (pthread_create(&thread, NULL, display_thread, NULL)) {
-		printf("Could not create display thread.\n");
-		exit(-1);
-	}
-
-	if (realtime_prio) {
-		struct sched_param param;
-
-		param.sched_priority = realtime_prio;
-		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-			printf("Could not set realtime priority.\n");
-			exit(-1);
-		}
-	}
-
-	while (1) {
-		int hits = samples;
-
-		for (i = 0; i < nr_cpus; i++) {
-			for (counter = 0; counter < nr_counters; counter++)
-				mmap_read(&mmap_array[i][counter]);
-		}
-
-		if (hits == samples)
-			ret = poll(event_array, nr_poll, 100);
-	}
-
-	return 0;
-}
-
-static const char * const top_usage[] = {
-	"perf top [<options>]",
-	NULL
-};
-
-static const struct option options[] = {
-	OPT_CALLBACK('e', "event", NULL, "event",
-		     "event selector. use 'perf list' to list available events",
-		     parse_events),
-	OPT_INTEGER('c', "count", &default_interval,
-		    "event period to sample"),
-	OPT_INTEGER('p', "pid", &target_pid,
-		    "profile events on existing pid"),
-	OPT_BOOLEAN('a', "all-cpus", &system_wide,
-			    "system-wide collection from all CPUs"),
-	OPT_INTEGER('C', "CPU", &profile_cpu,
-		    "CPU to profile on"),
-	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
-		    "number of mmap data pages"),
-	OPT_INTEGER('r', "realtime", &realtime_prio,
-		    "collect data with this RT SCHED_FIFO priority"),
-	OPT_INTEGER('d', "delay", &delay_secs,
-		    "number of seconds to delay between refreshes"),
-	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
-			    "dump the symbol table used for profiling"),
-	OPT_INTEGER('f', "count-filter", &count_filter,
-		    "only display functions with more events than this"),
-	OPT_BOOLEAN('g', "group", &group,
-			    "put the counters into a counter group"),
-	OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
-		    "only display symbols matchig this pattern"),
-	OPT_BOOLEAN('z', "zero", &group,
-		    "zero history across updates"),
-	OPT_INTEGER('F', "freq", &freq,
-		    "profile at this frequency"),
-	OPT_INTEGER('E', "entries", &print_entries,
-		    "display this many functions"),
-	OPT_END()
-};
-
-int cmd_top(int argc, const char **argv, const char *prefix)
-{
-	int counter;
-
-	page_size = sysconf(_SC_PAGE_SIZE);
-
-	argc = parse_options(argc, argv, options, top_usage, 0);
-	if (argc)
-		usage_with_options(top_usage, options);
-
-	if (freq) {
-		default_interval = freq;
-		freq = 1;
-	}
-
-	/* CPU and PID are mutually exclusive */
-	if (target_pid != -1 && profile_cpu != -1) {
-		printf("WARNING: PID switch overriding CPU\n");
-		sleep(1);
-		profile_cpu = -1;
-	}
-
-	if (!nr_counters)
-		nr_counters = 1;
-
-	if (delay_secs < 1)
-		delay_secs = 1;
-
-	parse_symbols();
-
-	/*
-	 * Fill in the ones not specifically initialized via -c:
-	 */
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (attrs[counter].sample_period)
-			continue;
-
-		attrs[counter].sample_period = default_interval;
-	}
-
-	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	assert(nr_cpus <= MAX_NR_CPUS);
-	assert(nr_cpus >= 0);
-
-	if (target_pid != -1 || profile_cpu != -1)
-		nr_cpus = 1;
-
-	return __cmd_top();
-}
diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h
deleted file mode 100644
index 51d1682..0000000
--- a/Documentation/perf_counter/builtin.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef BUILTIN_H
-#define BUILTIN_H
-
-#include "util/util.h"
-#include "util/strbuf.h"
-
-extern const char perf_version_string[];
-extern const char perf_usage_string[];
-extern const char perf_more_info_string[];
-
-extern void list_common_cmds_help(void);
-extern const char *help_unknown_cmd(const char *cmd);
-extern void prune_packed_objects(int);
-extern int read_line_with_nul(char *buf, int size, FILE *file);
-extern int check_pager_config(const char *cmd);
-
-extern int cmd_annotate(int argc, const char **argv, const char *prefix);
-extern int cmd_help(int argc, const char **argv, const char *prefix);
-extern int cmd_record(int argc, const char **argv, const char *prefix);
-extern int cmd_report(int argc, const char **argv, const char *prefix);
-extern int cmd_stat(int argc, const char **argv, const char *prefix);
-extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_version(int argc, const char **argv, const char *prefix);
-extern int cmd_list(int argc, const char **argv, const char *prefix);
-
-#endif
diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt
deleted file mode 100644
index eebce30..0000000
--- a/Documentation/perf_counter/command-list.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# List of known perf commands.
-# command name			category [deprecated] [common]
-#
-perf-annotate			mainporcelain common
-perf-list			mainporcelain common
-perf-record			mainporcelain common
-perf-report			mainporcelain common
-perf-stat			mainporcelain common
-perf-top			mainporcelain common
diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt
deleted file mode 100644
index d325076..0000000
--- a/Documentation/perf_counter/design.txt
+++ /dev/null
@@ -1,442 +0,0 @@
-
-Performance Counters for Linux
-------------------------------
-
-Performance counters are special hardware registers available on most modern
-CPUs. These registers count the number of certain types of hw events: such
-as instructions executed, cachemisses suffered, or branches mis-predicted -
-without slowing down the kernel or applications. These registers can also
-trigger interrupts when a threshold number of events have passed - and can
-thus be used to profile the code that runs on that CPU.
-
-The Linux Performance Counter subsystem provides an abstraction of these
-hardware capabilities. It provides per task and per CPU counters, counter
-groups, and it provides event capabilities on top of those.  It
-provides "virtual" 64-bit counters, regardless of the width of the
-underlying hardware counters.
-
-Performance counters are accessed via special file descriptors.
-There's one file descriptor per virtual counter used.
-
-The special file descriptor is opened via the perf_counter_open()
-system call:
-
-   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
-			     pid_t pid, int cpu, int group_fd,
-			     unsigned long flags);
-
-The syscall returns the new fd. The fd can be used via the normal
-VFS system calls: read() can be used to read the counter, fcntl()
-can be used to set the blocking mode, etc.
-
-Multiple counters can be kept open at a time, and the counters
-can be poll()ed.
-
-When creating a new counter fd, 'perf_counter_hw_event' is:
-
-struct perf_counter_hw_event {
-        /*
-         * The MSB of the config word signifies if the rest contains cpu
-         * specific (raw) counter configuration data, if unset, the next
-         * 7 bits are an event type and the rest of the bits are the event
-         * identifier.
-         */
-        __u64                   config;
-
-        __u64                   irq_period;
-        __u32                   record_type;
-        __u32                   read_format;
-
-        __u64                   disabled       :  1, /* off by default        */
-                                inherit        :  1, /* children inherit it   */
-                                pinned         :  1, /* must always be on PMU */
-                                exclusive      :  1, /* only group on PMU     */
-                                exclude_user   :  1, /* don't count user      */
-                                exclude_kernel :  1, /* ditto kernel          */
-                                exclude_hv     :  1, /* ditto hypervisor      */
-                                exclude_idle   :  1, /* don't count when idle */
-                                mmap           :  1, /* include mmap data     */
-                                munmap         :  1, /* include munmap data   */
-                                comm           :  1, /* include comm data     */
-
-                                __reserved_1   : 52;
-
-        __u32                   extra_config_len;
-        __u32                   wakeup_events;  /* wakeup every n events */
-
-        __u64                   __reserved_2;
-        __u64                   __reserved_3;
-};
-
-The 'config' field specifies what the counter should count.  It
-is divided into 3 bit-fields:
-
-raw_type: 1 bit   (most significant bit)	0x8000_0000_0000_0000
-type:	  7 bits  (next most significant)	0x7f00_0000_0000_0000
-event_id: 56 bits (least significant)		0x00ff_ffff_ffff_ffff
-
-If 'raw_type' is 1, then the counter will count a hardware event
-specified by the remaining 63 bits of event_config.  The encoding is
-machine-specific.
-
-If 'raw_type' is 0, then the 'type' field says what kind of counter
-this is, with the following encoding:
-
-enum perf_event_types {
-	PERF_TYPE_HARDWARE		= 0,
-	PERF_TYPE_SOFTWARE		= 1,
-	PERF_TYPE_TRACEPOINT		= 2,
-};
-
-A counter of PERF_TYPE_HARDWARE will count the hardware event
-specified by 'event_id':
-
-/*
- * Generalized performance counter event types, used by the hw_event.event_id
- * parameter of the sys_perf_counter_open() syscall:
- */
-enum hw_event_ids {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_CPU_CYCLES		= 0,
-	PERF_COUNT_INSTRUCTIONS		= 1,
-	PERF_COUNT_CACHE_REFERENCES	= 2,
-	PERF_COUNT_CACHE_MISSES		= 3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_BRANCH_MISSES	= 5,
-	PERF_COUNT_BUS_CYCLES		= 6,
-};
-
-These are standardized types of events that work relatively uniformly
-on all CPUs that implement Performance Counters support under Linux,
-although there may be variations (e.g., different CPUs might count
-cache references and misses at different levels of the cache hierarchy).
-If a CPU is not able to count the selected event, then the system call
-will return -EINVAL.
-
-More hw_event_types are supported as well, but they are CPU-specific
-and accessed as raw events.  For example, to count "External bus
-cycles while bus lock signal asserted" events on Intel Core CPUs, pass
-in a 0x4064 event_id value and set hw_event.raw_type to 1.
-
-A counter of type PERF_TYPE_SOFTWARE will count one of the available
-software events, selected by 'event_id':
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum sw_event_ids {
-	PERF_COUNT_CPU_CLOCK		= 0,
-	PERF_COUNT_TASK_CLOCK		= 1,
-	PERF_COUNT_PAGE_FAULTS		= 2,
-	PERF_COUNT_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
-};
-
-Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
-tracer is available, and event_id values can be obtained from
-/debug/tracing/events/*/*/id
-
-
-Counters come in two flavours: counting counters and sampling
-counters.  A "counting" counter is one that is used for counting the
-number of events that occur, and is characterised by having
-irq_period = 0.
-
-
-A read() on a counter returns the current value of the counter and possible
-additional values as specified by 'read_format', each value is a u64 (8 bytes)
-in size.
-
-/*
- * Bits that can be set in hw_event.read_format to request that
- * reads on the counter should return the indicated quantities,
- * in increasing order of bit value, after the counter value.
- */
-enum perf_counter_read_format {
-        PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
-        PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
-};
-
-Using these additional values one can establish the overcommit ratio for a
-particular counter allowing one to take the round-robin scheduling effect
-into account.
-
-
-A "sampling" counter is one that is set up to generate an interrupt
-every N events, where N is given by 'irq_period'.  A sampling counter
-has irq_period > 0. The record_type controls what data is recorded on each
-interrupt:
-
-/*
- * Bits that can be set in hw_event.record_type to request information
- * in the overflow packets.
- */
-enum perf_counter_record_format {
-        PERF_RECORD_IP          = 1U << 0,
-        PERF_RECORD_TID         = 1U << 1,
-        PERF_RECORD_TIME        = 1U << 2,
-        PERF_RECORD_ADDR        = 1U << 3,
-        PERF_RECORD_GROUP       = 1U << 4,
-        PERF_RECORD_CALLCHAIN   = 1U << 5,
-};
-
-Such (and other) events will be recorded in a ring-buffer, which is
-available to user-space using mmap() (see below).
-
-The 'disabled' bit specifies whether the counter starts out disabled
-or enabled.  If it is initially disabled, it can be enabled by ioctl
-or prctl (see below).
-
-The 'inherit' bit, if set, specifies that this counter should count
-events on descendant tasks as well as the task specified.  This only
-applies to new descendents, not to any existing descendents at the
-time the counter is created (nor to any new descendents of existing
-descendents).
-
-The 'pinned' bit, if set, specifies that the counter should always be
-on the CPU if at all possible.  It only applies to hardware counters
-and only to group leaders.  If a pinned counter cannot be put onto the
-CPU (e.g. because there are not enough hardware counters or because of
-a conflict with some other event), then the counter goes into an
-'error' state, where reads return end-of-file (i.e. read() returns 0)
-until the counter is subsequently enabled or disabled.
-
-The 'exclusive' bit, if set, specifies that when this counter's group
-is on the CPU, it should be the only group using the CPU's counters.
-In future, this will allow sophisticated monitoring programs to supply
-extra configuration information via 'extra_config_len' to exploit
-advanced features of the CPU's Performance Monitor Unit (PMU) that are
-not otherwise accessible and that might disrupt other hardware
-counters.
-
-The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
-way to request that counting of events be restricted to times when the
-CPU is in user, kernel and/or hypervisor mode.
-
-The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
-operations, these can be used to relate userspace IP addresses to actual
-code, even after the mapping (or even the whole process) is gone,
-these events are recorded in the ring-buffer (see below).
-
-The 'comm' bit allows tracking of process comm data on process creation.
-This too is recorded in the ring-buffer (see below).
-
-The 'pid' parameter to the perf_counter_open() system call allows the
-counter to be specific to a task:
-
- pid == 0: if the pid parameter is zero, the counter is attached to the
- current task.
-
- pid > 0: the counter is attached to a specific task (if the current task
- has sufficient privilege to do so)
-
- pid < 0: all tasks are counted (per cpu counters)
-
-The 'cpu' parameter allows a counter to be made specific to a CPU:
-
- cpu >= 0: the counter is restricted to a specific CPU
- cpu == -1: the counter counts on all CPUs
-
-(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
-
-A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
-events of that task and 'follows' that task to whatever CPU the task
-gets schedule to. Per task counters can be created by any user, for
-their own tasks.
-
-A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
-all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
-
-The 'flags' parameter is currently unused and must be zero.
-
-The 'group_fd' parameter allows counter "groups" to be set up.  A
-counter group has one counter which is the group "leader".  The leader
-is created first, with group_fd = -1 in the perf_counter_open call
-that creates it.  The rest of the group members are created
-subsequently, with group_fd giving the fd of the group leader.
-(A single counter on its own is created with group_fd = -1 and is
-considered to be a group with only 1 member.)
-
-A counter group is scheduled onto the CPU as a unit, that is, it will
-only be put onto the CPU if all of the counters in the group can be
-put onto the CPU.  This means that the values of the member counters
-can be meaningfully compared, added, divided (to get ratios), etc.,
-with each other, since they have counted events for the same set of
-executed instructions.
-
-
-Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
-tracking are logged into a ring-buffer. This ring-buffer is created and
-accessed through mmap().
-
-The mmap size should be 1+2^n pages, where the first page is a meta-data page
-(struct perf_counter_mmap_page) that contains various bits of information such
-as where the ring-buffer head is.
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-        __u32   version;                /* version number of this structure */
-        __u32   compat_version;         /* lowest version this is compat with */
-
-        /*
-         * Bits needed to read the hw counters in user-space.
-         *
-         *   u32 seq;
-         *   s64 count;
-         *
-         *   do {
-         *     seq = pc->lock;
-         *
-         *     barrier()
-         *     if (pc->index) {
-         *       count = pmc_read(pc->index - 1);
-         *       count += pc->offset;
-         *     } else
-         *       goto regular_read;
-         *
-         *     barrier();
-         *   } while (pc->lock != seq);
-         *
-         * NOTE: for obvious reason this only works on self-monitoring
-         *       processes.
-         */
-        __u32   lock;                   /* seqlock for synchronization */
-        __u32   index;                  /* hardware counter identifier */
-        __s64   offset;                 /* add to hardware counter value */
-
-        /*
-         * Control data for the mmap() data buffer.
-         *
-         * User-space reading this value should issue an rmb(), on SMP capable
-         * platforms, after reading this value -- see perf_counter_wakeup().
-         */
-        __u32   data_head;              /* head in the data section */
-};
-
-NOTE: the hw-counter userspace bits are arch specific and are currently only
-      implemented on powerpc.
-
-The following 2^n pages are the ring-buffer which contains events of the form:
-
-#define PERF_EVENT_MISC_KERNEL          (1 << 0)
-#define PERF_EVENT_MISC_USER            (1 << 1)
-#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
-
-struct perf_event_header {
-        __u32   type;
-        __u16   misc;
-        __u16   size;
-};
-
-enum perf_event_type {
-
-        /*
-         * The MMAP events record the PROT_EXEC mappings so that we can
-         * correlate userspace IPs to code. They have the following structure:
-         *
-         * struct {
-         *      struct perf_event_header        header;
-         *
-         *      u32                             pid, tid;
-         *      u64                             addr;
-         *      u64                             len;
-         *      u64                             pgoff;
-         *      char                            filename[];
-         * };
-         */
-        PERF_EVENT_MMAP                 = 1,
-        PERF_EVENT_MUNMAP               = 2,
-
-        /*
-         * struct {
-         *      struct perf_event_header        header;
-         *
-         *      u32                             pid, tid;
-         *      char                            comm[];
-         * };
-         */
-        PERF_EVENT_COMM                 = 3,
-
-        /*
-         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
-         * will be PERF_RECORD_*
-         *
-         * struct {
-         *      struct perf_event_header        header;
-         *
-         *      { u64                   ip;       } && PERF_RECORD_IP
-         *      { u32                   pid, tid; } && PERF_RECORD_TID
-         *      { u64                   time;     } && PERF_RECORD_TIME
-         *      { u64                   addr;     } && PERF_RECORD_ADDR
-         *
-         *      { u64                   nr;
-         *        { u64 event, val; }   cnt[nr];  } && PERF_RECORD_GROUP
-         *
-         *      { u16                   nr,
-         *                              hv,
-         *                              kernel,
-         *                              user;
-         *        u64                   ips[nr];  } && PERF_RECORD_CALLCHAIN
-         * };
-         */
-};
-
-NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
-      on x86.
-
-Notification of new events is possible through poll()/select()/epoll() and
-fcntl() managing signals.
-
-Normally a notification is generated for every page filled, however one can
-additionally set perf_counter_hw_event.wakeup_events to generate one every
-so many counter overflow events.
-
-Future work will include a splice() interface to the ring-buffer.
-
-
-Counters can be enabled and disabled in two ways: via ioctl and via
-prctl.  When a counter is disabled, it doesn't count or generate
-events but does continue to exist and maintain its count value.
-
-An individual counter or counter group can be enabled with
-
-	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
-
-or disabled with
-
-	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
-
-Enabling or disabling the leader of a group enables or disables the
-whole group; that is, while the group leader is disabled, none of the
-counters in the group will count.  Enabling or disabling a member of a
-group other than the leader only affects that counter - disabling an
-non-leader stops that counter from counting but doesn't affect any
-other counter.
-
-Additionally, non-inherited overflow counters can use
-
-	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
-
-to enable a counter for 'nr' events, after which it gets disabled again.
-
-A process can enable or disable all the counter groups that are
-attached to it, using prctl:
-
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
-
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
-
-This applies to all counters on the current process, whether created
-by this process or by another, and doesn't affect any counters that
-this process has created on other processes.  It only enables or
-disables the group leaders, not any other members in the groups.
-
diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c
deleted file mode 100644
index 4eb7259..0000000
--- a/Documentation/perf_counter/perf.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * perf.c
- *
- * Performance analysis utility.
- *
- * This is the main hub from which the sub-commands (perf stat,
- * perf top, perf record, perf report, etc.) are started.
- */
-#include "builtin.h"
-
-#include "util/exec_cmd.h"
-#include "util/cache.h"
-#include "util/quote.h"
-#include "util/run-command.h"
-
-const char perf_usage_string[] =
-	"perf [--version] [--help] COMMAND [ARGS]";
-
-const char perf_more_info_string[] =
-	"See 'perf help COMMAND' for more information on a specific command.";
-
-static int use_pager = -1;
-struct pager_config {
-	const char *cmd;
-	int val;
-};
-
-static int pager_command_config(const char *var, const char *value, void *data)
-{
-	struct pager_config *c = data;
-	if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd))
-		c->val = perf_config_bool(var, value);
-	return 0;
-}
-
-/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
-int check_pager_config(const char *cmd)
-{
-	struct pager_config c;
-	c.cmd = cmd;
-	c.val = -1;
-	perf_config(pager_command_config, &c);
-	return c.val;
-}
-
-static void commit_pager_choice(void) {
-	switch (use_pager) {
-	case 0:
-		setenv("PERF_PAGER", "cat", 1);
-		break;
-	case 1:
-		/* setup_pager(); */
-		break;
-	default:
-		break;
-	}
-}
-
-static int handle_options(const char*** argv, int* argc, int* envchanged)
-{
-	int handled = 0;
-
-	while (*argc > 0) {
-		const char *cmd = (*argv)[0];
-		if (cmd[0] != '-')
-			break;
-
-		/*
-		 * For legacy reasons, the "version" and "help"
-		 * commands can be written with "--" prepended
-		 * to make them look like flags.
-		 */
-		if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
-			break;
-
-		/*
-		 * Check remaining flags.
-		 */
-		if (!prefixcmp(cmd, "--exec-path")) {
-			cmd += 11;
-			if (*cmd == '=')
-				perf_set_argv_exec_path(cmd + 1);
-			else {
-				puts(perf_exec_path());
-				exit(0);
-			}
-		} else if (!strcmp(cmd, "--html-path")) {
-			puts(system_path(PERF_HTML_PATH));
-			exit(0);
-		} else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
-			use_pager = 1;
-		} else if (!strcmp(cmd, "--no-pager")) {
-			use_pager = 0;
-			if (envchanged)
-				*envchanged = 1;
-		} else if (!strcmp(cmd, "--perf-dir")) {
-			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --perf-dir.\n" );
-				usage(perf_usage_string);
-			}
-			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
-			if (envchanged)
-				*envchanged = 1;
-			(*argv)++;
-			(*argc)--;
-			handled++;
-		} else if (!prefixcmp(cmd, "--perf-dir=")) {
-			setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1);
-			if (envchanged)
-				*envchanged = 1;
-		} else if (!strcmp(cmd, "--work-tree")) {
-			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --work-tree.\n" );
-				usage(perf_usage_string);
-			}
-			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
-			if (envchanged)
-				*envchanged = 1;
-			(*argv)++;
-			(*argc)--;
-		} else if (!prefixcmp(cmd, "--work-tree=")) {
-			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1);
-			if (envchanged)
-				*envchanged = 1;
-		} else {
-			fprintf(stderr, "Unknown option: %s\n", cmd);
-			usage(perf_usage_string);
-		}
-
-		(*argv)++;
-		(*argc)--;
-		handled++;
-	}
-	return handled;
-}
-
-static int handle_alias(int *argcp, const char ***argv)
-{
-	int envchanged = 0, ret = 0, saved_errno = errno;
-	int count, option_count;
-	const char** new_argv;
-	const char *alias_command;
-	char *alias_string;
-
-	alias_command = (*argv)[0];
-	alias_string = alias_lookup(alias_command);
-	if (alias_string) {
-		if (alias_string[0] == '!') {
-			if (*argcp > 1) {
-				struct strbuf buf;
-
-				strbuf_init(&buf, PATH_MAX);
-				strbuf_addstr(&buf, alias_string);
-				sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
-				free(alias_string);
-				alias_string = buf.buf;
-			}
-			ret = system(alias_string + 1);
-			if (ret >= 0 && WIFEXITED(ret) &&
-			    WEXITSTATUS(ret) != 127)
-				exit(WEXITSTATUS(ret));
-			die("Failed to run '%s' when expanding alias '%s'",
-			    alias_string + 1, alias_command);
-		}
-		count = split_cmdline(alias_string, &new_argv);
-		if (count < 0)
-			die("Bad alias.%s string", alias_command);
-		option_count = handle_options(&new_argv, &count, &envchanged);
-		if (envchanged)
-			die("alias '%s' changes environment variables\n"
-				 "You can use '!perf' in the alias to do this.",
-				 alias_command);
-		memmove(new_argv - option_count, new_argv,
-				count * sizeof(char *));
-		new_argv -= option_count;
-
-		if (count < 1)
-			die("empty alias for %s", alias_command);
-
-		if (!strcmp(alias_command, new_argv[0]))
-			die("recursive alias: %s", alias_command);
-
-		new_argv = realloc(new_argv, sizeof(char*) *
-				    (count + *argcp + 1));
-		/* insert after command name */
-		memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp);
-		new_argv[count+*argcp] = NULL;
-
-		*argv = new_argv;
-		*argcp += count - 1;
-
-		ret = 1;
-	}
-
-	errno = saved_errno;
-
-	return ret;
-}
-
-const char perf_version_string[] = PERF_VERSION;
-
-#define RUN_SETUP	(1<<0)
-#define USE_PAGER	(1<<1)
-/*
- * require working tree to be present -- anything uses this needs
- * RUN_SETUP for reading from the configuration file.
- */
-#define NEED_WORK_TREE	(1<<2)
-
-struct cmd_struct {
-	const char *cmd;
-	int (*fn)(int, const char **, const char *);
-	int option;
-};
-
-static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
-{
-	int status;
-	struct stat st;
-	const char *prefix;
-
-	prefix = NULL;
-	if (p->option & RUN_SETUP)
-		prefix = NULL; /* setup_perf_directory(); */
-
-	if (use_pager == -1 && p->option & RUN_SETUP)
-		use_pager = check_pager_config(p->cmd);
-	if (use_pager == -1 && p->option & USE_PAGER)
-		use_pager = 1;
-	commit_pager_choice();
-
-	if (p->option & NEED_WORK_TREE)
-		/* setup_work_tree() */;
-
-	status = p->fn(argc, argv, prefix);
-	if (status)
-		return status & 0xff;
-
-	/* Somebody closed stdout? */
-	if (fstat(fileno(stdout), &st))
-		return 0;
-	/* Ignore write errors for pipes and sockets.. */
-	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
-		return 0;
-
-	/* Check for ENOSPC and EIO errors.. */
-	if (fflush(stdout))
-		die("write failure on standard output: %s", strerror(errno));
-	if (ferror(stdout))
-		die("unknown write failure on standard output");
-	if (fclose(stdout))
-		die("close failed on standard output: %s", strerror(errno));
-	return 0;
-}
-
-static void handle_internal_command(int argc, const char **argv)
-{
-	const char *cmd = argv[0];
-	static struct cmd_struct commands[] = {
-		{ "help", cmd_help, 0 },
-		{ "list", cmd_list, 0 },
-		{ "record", cmd_record, 0 },
-		{ "report", cmd_report, 0 },
-		{ "stat", cmd_stat, 0 },
-		{ "top", cmd_top, 0 },
-		{ "annotate", cmd_annotate, 0 },
-		{ "version", cmd_version, 0 },
-	};
-	int i;
-	static const char ext[] = STRIP_EXTENSION;
-
-	if (sizeof(ext) > 1) {
-		i = strlen(argv[0]) - strlen(ext);
-		if (i > 0 && !strcmp(argv[0] + i, ext)) {
-			char *argv0 = strdup(argv[0]);
-			argv[0] = cmd = argv0;
-			argv0[i] = '\0';
-		}
-	}
-
-	/* Turn "perf cmd --help" into "perf help cmd" */
-	if (argc > 1 && !strcmp(argv[1], "--help")) {
-		argv[1] = argv[0];
-		argv[0] = cmd = "help";
-	}
-
-	for (i = 0; i < ARRAY_SIZE(commands); i++) {
-		struct cmd_struct *p = commands+i;
-		if (strcmp(p->cmd, cmd))
-			continue;
-		exit(run_builtin(p, argc, argv));
-	}
-}
-
-static void execv_dashed_external(const char **argv)
-{
-	struct strbuf cmd = STRBUF_INIT;
-	const char *tmp;
-	int status;
-
-	strbuf_addf(&cmd, "perf-%s", argv[0]);
-
-	/*
-	 * argv[0] must be the perf command, but the argv array
-	 * belongs to the caller, and may be reused in
-	 * subsequent loop iterations. Save argv[0] and
-	 * restore it on error.
-	 */
-	tmp = argv[0];
-	argv[0] = cmd.buf;
-
-	/*
-	 * if we fail because the command is not found, it is
-	 * OK to return. Otherwise, we just pass along the status code.
-	 */
-	status = run_command_v_opt(argv, 0);
-	if (status != -ERR_RUN_COMMAND_EXEC) {
-		if (IS_RUN_COMMAND_ERR(status))
-			die("unable to run '%s'", argv[0]);
-		exit(-status);
-	}
-	errno = ENOENT; /* as if we called execvp */
-
-	argv[0] = tmp;
-
-	strbuf_release(&cmd);
-}
-
-static int run_argv(int *argcp, const char ***argv)
-{
-	int done_alias = 0;
-
-	while (1) {
-		/* See if it's an internal command */
-		handle_internal_command(*argcp, *argv);
-
-		/* .. then try the external ones */
-		execv_dashed_external(*argv);
-
-		/* It could be an alias -- this works around the insanity
-		 * of overriding "perf log" with "perf show" by having
-		 * alias.log = show
-		 */
-		if (done_alias || !handle_alias(argcp, argv))
-			break;
-		done_alias = 1;
-	}
-
-	return done_alias;
-}
-
-
-int main(int argc, const char **argv)
-{
-	const char *cmd;
-
-	cmd = perf_extract_argv0_path(argv[0]);
-	if (!cmd)
-		cmd = "perf-help";
-
-	/*
-	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
-	 *
-	 *  - cannot take flags in between the "perf" and the "xxxx".
-	 *  - cannot execute it externally (since it would just do
-	 *    the same thing over again)
-	 *
-	 * So we just directly call the internal command handler, and
-	 * die if that one cannot handle it.
-	 */
-	if (!prefixcmp(cmd, "perf-")) {
-		cmd += 5;
-		argv[0] = cmd;
-		handle_internal_command(argc, argv);
-		die("cannot handle %s internally", cmd);
-	}
-
-	/* Look for flags.. */
-	argv++;
-	argc--;
-	handle_options(&argv, &argc, NULL);
-	commit_pager_choice();
-	if (argc > 0) {
-		if (!prefixcmp(argv[0], "--"))
-			argv[0] += 2;
-	} else {
-		/* The user didn't specify a command; give them help */
-		printf("\n usage: %s\n\n", perf_usage_string);
-		list_common_cmds_help();
-		printf("\n %s\n\n", perf_more_info_string);
-		exit(1);
-	}
-	cmd = argv[0];
-
-	/*
-	 * We use PATH to find perf commands, but we prepend some higher
-	 * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH
-	 * environment, and the $(perfexecdir) from the Makefile at build
-	 * time.
-	 */
-	setup_path();
-
-	while (1) {
-		static int done_help = 0;
-		static int was_alias = 0;
-
-		was_alias = run_argv(&argc, &argv);
-		if (errno != ENOENT)
-			break;
-
-		if (was_alias) {
-			fprintf(stderr, "Expansion of alias '%s' failed; "
-				"'%s' is not a perf-command\n",
-				cmd, argv[0]);
-			exit(1);
-		}
-		if (!done_help) {
-			cmd = argv[0] = help_unknown_cmd(cmd);
-			done_help = 1;
-		} else
-			break;
-	}
-
-	fprintf(stderr, "Failed to run command '%s': %s\n",
-		cmd, strerror(errno));
-
-	return 1;
-}
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
deleted file mode 100644
index af0a504..0000000
--- a/Documentation/perf_counter/perf.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef _PERF_PERF_H
-#define _PERF_PERF_H
-
-#if defined(__x86_64__) || defined(__i386__)
-#include "../../arch/x86/include/asm/unistd.h"
-#define rmb()		asm volatile("lfence" ::: "memory")
-#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
-#endif
-
-#ifdef __powerpc__
-#include "../../arch/powerpc/include/asm/unistd.h"
-#define rmb()		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
-#endif
-
-#include <time.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-
-#include "../../include/linux/perf_counter.h"
-
-/*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
-
-#ifndef NSEC_PER_SEC
-# define NSEC_PER_SEC			1000000000ULL
-#endif
-
-static inline unsigned long long rdclock(void)
-{
-	struct timespec ts;
-
-	clock_gettime(CLOCK_MONOTONIC, &ts);
-	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
-}
-
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#define unlikely(x)	__builtin_expect(!!(x), 0)
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
-		      pid_t pid, int cpu, int group_fd,
-		      unsigned long flags)
-{
-	return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
-		       group_fd, flags);
-}
-
-#define MAX_COUNTERS			256
-#define MAX_NR_CPUS			256
-
-#endif
diff --git a/Documentation/perf_counter/util/PERF-VERSION-GEN b/Documentation/perf_counter/util/PERF-VERSION-GEN
deleted file mode 100755
index c561d15..0000000
--- a/Documentation/perf_counter/util/PERF-VERSION-GEN
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/sh
-
-GVF=PERF-VERSION-FILE
-DEF_VER=v0.0.1.PERF
-
-LF='
-'
-
-# First see if there is a version file (included in release tarballs),
-# then try git-describe, then default.
-if test -f version
-then
-	VN=$(cat version) || VN="$DEF_VER"
-elif test -d .git -o -f .git &&
-	VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
-	case "$VN" in
-	*$LF*) (exit 1) ;;
-	v[0-9]*)
-		git update-index -q --refresh
-		test -z "$(git diff-index --name-only HEAD --)" ||
-		VN="$VN-dirty" ;;
-	esac
-then
-	VN=$(echo "$VN" | sed -e 's/-/./g');
-else
-	VN="$DEF_VER"
-fi
-
-VN=$(expr "$VN" : v*'\(.*\)')
-
-if test -r $GVF
-then
-	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
-else
-	VC=unset
-fi
-test "$VN" = "$VC" || {
-	echo >&2 "PERF_VERSION = $VN"
-	echo "PERF_VERSION = $VN" >$GVF
-}
-
-
diff --git a/Documentation/perf_counter/util/abspath.c b/Documentation/perf_counter/util/abspath.c
deleted file mode 100644
index 61d33b8..0000000
--- a/Documentation/perf_counter/util/abspath.c
+++ /dev/null
@@ -1,117 +0,0 @@
-#include "cache.h"
-
-/*
- * Do not use this for inspecting *tracked* content.  When path is a
- * symlink to a directory, we do not want to say it is a directory when
- * dealing with tracked content in the working tree.
- */
-static int is_directory(const char *path)
-{
-	struct stat st;
-	return (!stat(path, &st) && S_ISDIR(st.st_mode));
-}
-
-/* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
-
-const char *make_absolute_path(const char *path)
-{
-	static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
-	char cwd[1024] = "";
-	int buf_index = 1, len;
-
-	int depth = MAXDEPTH;
-	char *last_elem = NULL;
-	struct stat st;
-
-	if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
-		die ("Too long path: %.*s", 60, path);
-
-	while (depth--) {
-		if (!is_directory(buf)) {
-			char *last_slash = strrchr(buf, '/');
-			if (last_slash) {
-				*last_slash = '\0';
-				last_elem = xstrdup(last_slash + 1);
-			} else {
-				last_elem = xstrdup(buf);
-				*buf = '\0';
-			}
-		}
-
-		if (*buf) {
-			if (!*cwd && !getcwd(cwd, sizeof(cwd)))
-				die ("Could not get current working directory");
-
-			if (chdir(buf))
-				die ("Could not switch to '%s'", buf);
-		}
-		if (!getcwd(buf, PATH_MAX))
-			die ("Could not get current working directory");
-
-		if (last_elem) {
-			int len = strlen(buf);
-			if (len + strlen(last_elem) + 2 > PATH_MAX)
-				die ("Too long path name: '%s/%s'",
-						buf, last_elem);
-			buf[len] = '/';
-			strcpy(buf + len + 1, last_elem);
-			free(last_elem);
-			last_elem = NULL;
-		}
-
-		if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
-			len = readlink(buf, next_buf, PATH_MAX);
-			if (len < 0)
-				die ("Invalid symlink: %s", buf);
-			if (PATH_MAX <= len)
-				die("symbolic link too long: %s", buf);
-			next_buf[len] = '\0';
-			buf = next_buf;
-			buf_index = 1 - buf_index;
-			next_buf = bufs[buf_index];
-		} else
-			break;
-	}
-
-	if (*cwd && chdir(cwd))
-		die ("Could not change back to '%s'", cwd);
-
-	return buf;
-}
-
-static const char *get_pwd_cwd(void)
-{
-	static char cwd[PATH_MAX + 1];
-	char *pwd;
-	struct stat cwd_stat, pwd_stat;
-	if (getcwd(cwd, PATH_MAX) == NULL)
-		return NULL;
-	pwd = getenv("PWD");
-	if (pwd && strcmp(pwd, cwd)) {
-		stat(cwd, &cwd_stat);
-		if (!stat(pwd, &pwd_stat) &&
-		    pwd_stat.st_dev == cwd_stat.st_dev &&
-		    pwd_stat.st_ino == cwd_stat.st_ino) {
-			strlcpy(cwd, pwd, PATH_MAX);
-		}
-	}
-	return cwd;
-}
-
-const char *make_nonrelative_path(const char *path)
-{
-	static char buf[PATH_MAX + 1];
-
-	if (is_absolute_path(path)) {
-		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	} else {
-		const char *cwd = get_pwd_cwd();
-		if (!cwd)
-			die("Cannot determine the current working directory");
-		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	}
-	return buf;
-}
diff --git a/Documentation/perf_counter/util/alias.c b/Documentation/perf_counter/util/alias.c
deleted file mode 100644
index 9b3dd2b..0000000
--- a/Documentation/perf_counter/util/alias.c
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "cache.h"
-
-static const char *alias_key;
-static char *alias_val;
-
-static int alias_lookup_cb(const char *k, const char *v, void *cb)
-{
-	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
-		if (!v)
-			return config_error_nonbool(k);
-		alias_val = strdup(v);
-		return 0;
-	}
-	return 0;
-}
-
-char *alias_lookup(const char *alias)
-{
-	alias_key = alias;
-	alias_val = NULL;
-	perf_config(alias_lookup_cb, NULL);
-	return alias_val;
-}
-
-int split_cmdline(char *cmdline, const char ***argv)
-{
-	int src, dst, count = 0, size = 16;
-	char quoted = 0;
-
-	*argv = malloc(sizeof(char*) * size);
-
-	/* split alias_string */
-	(*argv)[count++] = cmdline;
-	for (src = dst = 0; cmdline[src];) {
-		char c = cmdline[src];
-		if (!quoted && isspace(c)) {
-			cmdline[dst++] = 0;
-			while (cmdline[++src]
-					&& isspace(cmdline[src]))
-				; /* skip */
-			if (count >= size) {
-				size += 16;
-				*argv = realloc(*argv, sizeof(char*) * size);
-			}
-			(*argv)[count++] = cmdline + dst;
-		} else if (!quoted && (c == '\'' || c == '"')) {
-			quoted = c;
-			src++;
-		} else if (c == quoted) {
-			quoted = 0;
-			src++;
-		} else {
-			if (c == '\\' && quoted != '\'') {
-				src++;
-				c = cmdline[src];
-				if (!c) {
-					free(*argv);
-					*argv = NULL;
-					return error("cmdline ends with \\");
-				}
-			}
-			cmdline[dst++] = c;
-			src++;
-		}
-	}
-
-	cmdline[dst] = 0;
-
-	if (quoted) {
-		free(*argv);
-		*argv = NULL;
-		return error("unclosed quote");
-	}
-
-	return count;
-}
-
diff --git a/Documentation/perf_counter/util/cache.h b/Documentation/perf_counter/util/cache.h
deleted file mode 100644
index 393d614..0000000
--- a/Documentation/perf_counter/util/cache.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#ifndef CACHE_H
-#define CACHE_H
-
-#include "util.h"
-#include "strbuf.h"
-
-#define PERF_DIR_ENVIRONMENT "PERF_DIR"
-#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
-#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
-#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY"
-#define INDEX_ENVIRONMENT "PERF_INDEX_FILE"
-#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE"
-#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR"
-#define CONFIG_ENVIRONMENT "PERF_CONFIG"
-#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
-#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES"
-#define PERFATTRIBUTES_FILE ".perfattributes"
-#define INFOATTRIBUTES_FILE "info/attributes"
-#define ATTRIBUTE_MACRO_PREFIX "[attr]"
-
-typedef int (*config_fn_t)(const char *, const char *, void *);
-extern int perf_default_config(const char *, const char *, void *);
-extern int perf_config_from_file(config_fn_t fn, const char *, void *);
-extern int perf_config(config_fn_t fn, void *);
-extern int perf_parse_ulong(const char *, unsigned long *);
-extern int perf_config_int(const char *, const char *);
-extern unsigned long perf_config_ulong(const char *, const char *);
-extern int perf_config_bool_or_int(const char *, const char *, int *);
-extern int perf_config_bool(const char *, const char *);
-extern int perf_config_string(const char **, const char *, const char *);
-extern int perf_config_set(const char *, const char *);
-extern int perf_config_set_multivar(const char *, const char *, const char *, int);
-extern int perf_config_rename_section(const char *, const char *);
-extern const char *perf_etc_perfconfig(void);
-extern int check_repository_format_version(const char *var, const char *value, void *cb);
-extern int perf_config_system(void);
-extern int perf_config_global(void);
-extern int config_error_nonbool(const char *);
-extern const char *config_exclusive_filename;
-
-#define MAX_PERFNAME (1000)
-extern char perf_default_email[MAX_PERFNAME];
-extern char perf_default_name[MAX_PERFNAME];
-extern int user_ident_explicitly_given;
-
-extern const char *perf_log_output_encoding;
-extern const char *perf_mailmap_file;
-
-/* IO helper functions */
-extern void maybe_flush_or_die(FILE *, const char *);
-extern int copy_fd(int ifd, int ofd);
-extern int copy_file(const char *dst, const char *src, int mode);
-extern ssize_t read_in_full(int fd, void *buf, size_t count);
-extern ssize_t write_in_full(int fd, const void *buf, size_t count);
-extern void write_or_die(int fd, const void *buf, size_t count);
-extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
-extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
-extern void fsync_or_die(int fd, const char *);
-
-/* pager.c */
-extern void setup_pager(void);
-extern const char *pager_program;
-extern int pager_in_use(void);
-extern int pager_use_color;
-
-extern const char *editor_program;
-extern const char *excludes_file;
-
-char *alias_lookup(const char *alias);
-int split_cmdline(char *cmdline, const char ***argv);
-
-#define alloc_nr(x) (((x)+16)*3/2)
-
-/*
- * Realloc the buffer pointed at by variable 'x' so that it can hold
- * at least 'nr' entries; the number of entries currently allocated
- * is 'alloc', using the standard growing factor alloc_nr() macro.
- *
- * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
-	do { \
-		if ((nr) > alloc) { \
-			if (alloc_nr(alloc) < (nr)) \
-				alloc = (nr); \
-			else \
-				alloc = alloc_nr(alloc); \
-			x = xrealloc((x), alloc * sizeof(*(x))); \
-		} \
-	} while(0)
-
-
-static inline int is_absolute_path(const char *path)
-{
-	return path[0] == '/';
-}
-
-const char *make_absolute_path(const char *path);
-const char *make_nonrelative_path(const char *path);
-const char *make_relative_path(const char *abs, const char *base);
-int normalize_path_copy(char *dst, const char *src);
-int longest_ancestor_length(const char *path, const char *prefix_list);
-char *strip_path_suffix(const char *path, const char *suffix);
-
-extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
-extern int perf_mkstemp(char *path, size_t len, const char *template);
-
-extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
-	__attribute__((format (printf, 3, 4)));
-extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
-	__attribute__((format (printf, 3, 4)));
-extern char *perf_pathdup(const char *fmt, ...)
-	__attribute__((format (printf, 1, 2)));
-
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-
-#endif /* CACHE_H */
diff --git a/Documentation/perf_counter/util/color.c b/Documentation/perf_counter/util/color.c
deleted file mode 100644
index 9a8c20c..0000000
--- a/Documentation/perf_counter/util/color.c
+++ /dev/null
@@ -1,241 +0,0 @@
-#include "cache.h"
-#include "color.h"
-
-int perf_use_color_default = -1;
-
-static int parse_color(const char *name, int len)
-{
-	static const char * const color_names[] = {
-		"normal", "black", "red", "green", "yellow",
-		"blue", "magenta", "cyan", "white"
-	};
-	char *end;
-	int i;
-	for (i = 0; i < ARRAY_SIZE(color_names); i++) {
-		const char *str = color_names[i];
-		if (!strncasecmp(name, str, len) && !str[len])
-			return i - 1;
-	}
-	i = strtol(name, &end, 10);
-	if (end - name == len && i >= -1 && i <= 255)
-		return i;
-	return -2;
-}
-
-static int parse_attr(const char *name, int len)
-{
-	static const int attr_values[] = { 1, 2, 4, 5, 7 };
-	static const char * const attr_names[] = {
-		"bold", "dim", "ul", "blink", "reverse"
-	};
-	int i;
-	for (i = 0; i < ARRAY_SIZE(attr_names); i++) {
-		const char *str = attr_names[i];
-		if (!strncasecmp(name, str, len) && !str[len])
-			return attr_values[i];
-	}
-	return -1;
-}
-
-void color_parse(const char *value, const char *var, char *dst)
-{
-	color_parse_mem(value, strlen(value), var, dst);
-}
-
-void color_parse_mem(const char *value, int value_len, const char *var,
-		char *dst)
-{
-	const char *ptr = value;
-	int len = value_len;
-	int attr = -1;
-	int fg = -2;
-	int bg = -2;
-
-	if (!strncasecmp(value, "reset", len)) {
-		strcpy(dst, PERF_COLOR_RESET);
-		return;
-	}
-
-	/* [fg [bg]] [attr] */
-	while (len > 0) {
-		const char *word = ptr;
-		int val, wordlen = 0;
-
-		while (len > 0 && !isspace(word[wordlen])) {
-			wordlen++;
-			len--;
-		}
-
-		ptr = word + wordlen;
-		while (len > 0 && isspace(*ptr)) {
-			ptr++;
-			len--;
-		}
-
-		val = parse_color(word, wordlen);
-		if (val >= -1) {
-			if (fg == -2) {
-				fg = val;
-				continue;
-			}
-			if (bg == -2) {
-				bg = val;
-				continue;
-			}
-			goto bad;
-		}
-		val = parse_attr(word, wordlen);
-		if (val < 0 || attr != -1)
-			goto bad;
-		attr = val;
-	}
-
-	if (attr >= 0 || fg >= 0 || bg >= 0) {
-		int sep = 0;
-
-		*dst++ = '\033';
-		*dst++ = '[';
-		if (attr >= 0) {
-			*dst++ = '0' + attr;
-			sep++;
-		}
-		if (fg >= 0) {
-			if (sep++)
-				*dst++ = ';';
-			if (fg < 8) {
-				*dst++ = '3';
-				*dst++ = '0' + fg;
-			} else {
-				dst += sprintf(dst, "38;5;%d", fg);
-			}
-		}
-		if (bg >= 0) {
-			if (sep++)
-				*dst++ = ';';
-			if (bg < 8) {
-				*dst++ = '4';
-				*dst++ = '0' + bg;
-			} else {
-				dst += sprintf(dst, "48;5;%d", bg);
-			}
-		}
-		*dst++ = 'm';
-	}
-	*dst = 0;
-	return;
-bad:
-	die("bad color value '%.*s' for variable '%s'", value_len, value, var);
-}
-
-int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
-{
-	if (value) {
-		if (!strcasecmp(value, "never"))
-			return 0;
-		if (!strcasecmp(value, "always"))
-			return 1;
-		if (!strcasecmp(value, "auto"))
-			goto auto_color;
-	}
-
-	/* Missing or explicit false to turn off colorization */
-	if (!perf_config_bool(var, value))
-		return 0;
-
-	/* any normal truth value defaults to 'auto' */
- auto_color:
-	if (stdout_is_tty < 0)
-		stdout_is_tty = isatty(1);
-	if (stdout_is_tty || (pager_in_use() && pager_use_color)) {
-		char *term = getenv("TERM");
-		if (term && strcmp(term, "dumb"))
-			return 1;
-	}
-	return 0;
-}
-
-int perf_color_default_config(const char *var, const char *value, void *cb)
-{
-	if (!strcmp(var, "color.ui")) {
-		perf_use_color_default = perf_config_colorbool(var, value, -1);
-		return 0;
-	}
-
-	return perf_default_config(var, value, cb);
-}
-
-static int color_vfprintf(FILE *fp, const char *color, const char *fmt,
-		va_list args, const char *trail)
-{
-	int r = 0;
-
-	/*
-	 * Auto-detect:
-	 */
-	if (perf_use_color_default < 0) {
-		if (isatty(1) || pager_in_use())
-			perf_use_color_default = 1;
-		else
-			perf_use_color_default = 0;
-	}
-
-	if (perf_use_color_default && *color)
-		r += fprintf(fp, "%s", color);
-	r += vfprintf(fp, fmt, args);
-	if (perf_use_color_default && *color)
-		r += fprintf(fp, "%s", PERF_COLOR_RESET);
-	if (trail)
-		r += fprintf(fp, "%s", trail);
-	return r;
-}
-
-
-
-int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
-{
-	va_list args;
-	int r;
-
-	va_start(args, fmt);
-	r = color_vfprintf(fp, color, fmt, args, NULL);
-	va_end(args);
-	return r;
-}
-
-int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
-{
-	va_list args;
-	int r;
-	va_start(args, fmt);
-	r = color_vfprintf(fp, color, fmt, args, "\n");
-	va_end(args);
-	return r;
-}
-
-/*
- * This function splits the buffer by newlines and colors the lines individually.
- *
- * Returns 0 on success.
- */
-int color_fwrite_lines(FILE *fp, const char *color,
-		size_t count, const char *buf)
-{
-	if (!*color)
-		return fwrite(buf, count, 1, fp) != 1;
-	while (count) {
-		char *p = memchr(buf, '\n', count);
-		if (p != buf && (fputs(color, fp) < 0 ||
-				fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
-				fputs(PERF_COLOR_RESET, fp) < 0))
-			return -1;
-		if (!p)
-			return 0;
-		if (fputc('\n', fp) < 0)
-			return -1;
-		count -= p + 1 - buf;
-		buf = p + 1;
-	}
-	return 0;
-}
-
-
diff --git a/Documentation/perf_counter/util/color.h b/Documentation/perf_counter/util/color.h
deleted file mode 100644
index 5abfd37..0000000
--- a/Documentation/perf_counter/util/color.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef COLOR_H
-#define COLOR_H
-
-/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
-#define COLOR_MAXLEN 24
-
-#define PERF_COLOR_NORMAL	""
-#define PERF_COLOR_RESET	"\033[m"
-#define PERF_COLOR_BOLD		"\033[1m"
-#define PERF_COLOR_RED		"\033[31m"
-#define PERF_COLOR_GREEN	"\033[32m"
-#define PERF_COLOR_YELLOW	"\033[33m"
-#define PERF_COLOR_BLUE		"\033[34m"
-#define PERF_COLOR_MAGENTA	"\033[35m"
-#define PERF_COLOR_CYAN		"\033[36m"
-#define PERF_COLOR_BG_RED	"\033[41m"
-
-/*
- * This variable stores the value of color.ui
- */
-extern int perf_use_color_default;
-
-
-/*
- * Use this instead of perf_default_config if you need the value of color.ui.
- */
-int perf_color_default_config(const char *var, const char *value, void *cb);
-
-int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
-void color_parse(const char *value, const char *var, char *dst);
-void color_parse_mem(const char *value, int len, const char *var, char *dst);
-int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
-int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
-int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
-
-#endif /* COLOR_H */
diff --git a/Documentation/perf_counter/util/config.c b/Documentation/perf_counter/util/config.c
deleted file mode 100644
index 3dd13fa..0000000
--- a/Documentation/perf_counter/util/config.c
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * GIT - The information manager from hell
- *
- * Copyright (C) Linus Torvalds, 2005
- * Copyright (C) Johannes Schindelin, 2005
- *
- */
-#include "util.h"
-#include "cache.h"
-#include "exec_cmd.h"
-
-#define MAXNAME (256)
-
-static FILE *config_file;
-static const char *config_file_name;
-static int config_linenr;
-static int config_file_eof;
-
-const char *config_exclusive_filename = NULL;
-
-static int get_next_char(void)
-{
-	int c;
-	FILE *f;
-
-	c = '\n';
-	if ((f = config_file) != NULL) {
-		c = fgetc(f);
-		if (c == '\r') {
-			/* DOS like systems */
-			c = fgetc(f);
-			if (c != '\n') {
-				ungetc(c, f);
-				c = '\r';
-			}
-		}
-		if (c == '\n')
-			config_linenr++;
-		if (c == EOF) {
-			config_file_eof = 1;
-			c = '\n';
-		}
-	}
-	return c;
-}
-
-static char *parse_value(void)
-{
-	static char value[1024];
-	int quote = 0, comment = 0, len = 0, space = 0;
-
-	for (;;) {
-		int c = get_next_char();
-		if (len >= sizeof(value) - 1)
-			return NULL;
-		if (c == '\n') {
-			if (quote)
-				return NULL;
-			value[len] = 0;
-			return value;
-		}
-		if (comment)
-			continue;
-		if (isspace(c) && !quote) {
-			space = 1;
-			continue;
-		}
-		if (!quote) {
-			if (c == ';' || c == '#') {
-				comment = 1;
-				continue;
-			}
-		}
-		if (space) {
-			if (len)
-				value[len++] = ' ';
-			space = 0;
-		}
-		if (c == '\\') {
-			c = get_next_char();
-			switch (c) {
-			case '\n':
-				continue;
-			case 't':
-				c = '\t';
-				break;
-			case 'b':
-				c = '\b';
-				break;
-			case 'n':
-				c = '\n';
-				break;
-			/* Some characters escape as themselves */
-			case '\\': case '"':
-				break;
-			/* Reject unknown escape sequences */
-			default:
-				return NULL;
-			}
-			value[len++] = c;
-			continue;
-		}
-		if (c == '"') {
-			quote = 1-quote;
-			continue;
-		}
-		value[len++] = c;
-	}
-}
-
-static inline int iskeychar(int c)
-{
-	return isalnum(c) || c == '-';
-}
-
-static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
-{
-	int c;
-	char *value;
-
-	/* Get the full name */
-	for (;;) {
-		c = get_next_char();
-		if (config_file_eof)
-			break;
-		if (!iskeychar(c))
-			break;
-		name[len++] = tolower(c);
-		if (len >= MAXNAME)
-			return -1;
-	}
-	name[len] = 0;
-	while (c == ' ' || c == '\t')
-		c = get_next_char();
-
-	value = NULL;
-	if (c != '\n') {
-		if (c != '=')
-			return -1;
-		value = parse_value();
-		if (!value)
-			return -1;
-	}
-	return fn(name, value, data);
-}
-
-static int get_extended_base_var(char *name, int baselen, int c)
-{
-	do {
-		if (c == '\n')
-			return -1;
-		c = get_next_char();
-	} while (isspace(c));
-
-	/* We require the format to be '[base "extension"]' */
-	if (c != '"')
-		return -1;
-	name[baselen++] = '.';
-
-	for (;;) {
-		int c = get_next_char();
-		if (c == '\n')
-			return -1;
-		if (c == '"')
-			break;
-		if (c == '\\') {
-			c = get_next_char();
-			if (c == '\n')
-				return -1;
-		}
-		name[baselen++] = c;
-		if (baselen > MAXNAME / 2)
-			return -1;
-	}
-
-	/* Final ']' */
-	if (get_next_char() != ']')
-		return -1;
-	return baselen;
-}
-
-static int get_base_var(char *name)
-{
-	int baselen = 0;
-
-	for (;;) {
-		int c = get_next_char();
-		if (config_file_eof)
-			return -1;
-		if (c == ']')
-			return baselen;
-		if (isspace(c))
-			return get_extended_base_var(name, baselen, c);
-		if (!iskeychar(c) && c != '.')
-			return -1;
-		if (baselen > MAXNAME / 2)
-			return -1;
-		name[baselen++] = tolower(c);
-	}
-}
-
-static int perf_parse_file(config_fn_t fn, void *data)
-{
-	int comment = 0;
-	int baselen = 0;
-	static char var[MAXNAME];
-
-	/* U+FEFF Byte Order Mark in UTF8 */
-	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
-	const unsigned char *bomptr = utf8_bom;
-
-	for (;;) {
-		int c = get_next_char();
-		if (bomptr && *bomptr) {
-			/* We are at the file beginning; skip UTF8-encoded BOM
-			 * if present. Sane editors won't put this in on their
-			 * own, but e.g. Windows Notepad will do it happily. */
-			if ((unsigned char) c == *bomptr) {
-				bomptr++;
-				continue;
-			} else {
-				/* Do not tolerate partial BOM. */
-				if (bomptr != utf8_bom)
-					break;
-				/* No BOM at file beginning. Cool. */
-				bomptr = NULL;
-			}
-		}
-		if (c == '\n') {
-			if (config_file_eof)
-				return 0;
-			comment = 0;
-			continue;
-		}
-		if (comment || isspace(c))
-			continue;
-		if (c == '#' || c == ';') {
-			comment = 1;
-			continue;
-		}
-		if (c == '[') {
-			baselen = get_base_var(var);
-			if (baselen <= 0)
-				break;
-			var[baselen++] = '.';
-			var[baselen] = 0;
-			continue;
-		}
-		if (!isalpha(c))
-			break;
-		var[baselen] = tolower(c);
-		if (get_value(fn, data, var, baselen+1) < 0)
-			break;
-	}
-	die("bad config file line %d in %s", config_linenr, config_file_name);
-}
-
-static int parse_unit_factor(const char *end, unsigned long *val)
-{
-	if (!*end)
-		return 1;
-	else if (!strcasecmp(end, "k")) {
-		*val *= 1024;
-		return 1;
-	}
-	else if (!strcasecmp(end, "m")) {
-		*val *= 1024 * 1024;
-		return 1;
-	}
-	else if (!strcasecmp(end, "g")) {
-		*val *= 1024 * 1024 * 1024;
-		return 1;
-	}
-	return 0;
-}
-
-static int perf_parse_long(const char *value, long *ret)
-{
-	if (value && *value) {
-		char *end;
-		long val = strtol(value, &end, 0);
-		unsigned long factor = 1;
-		if (!parse_unit_factor(end, &factor))
-			return 0;
-		*ret = val * factor;
-		return 1;
-	}
-	return 0;
-}
-
-int perf_parse_ulong(const char *value, unsigned long *ret)
-{
-	if (value && *value) {
-		char *end;
-		unsigned long val = strtoul(value, &end, 0);
-		if (!parse_unit_factor(end, &val))
-			return 0;
-		*ret = val;
-		return 1;
-	}
-	return 0;
-}
-
-static void die_bad_config(const char *name)
-{
-	if (config_file_name)
-		die("bad config value for '%s' in %s", name, config_file_name);
-	die("bad config value for '%s'", name);
-}
-
-int perf_config_int(const char *name, const char *value)
-{
-	long ret = 0;
-	if (!perf_parse_long(value, &ret))
-		die_bad_config(name);
-	return ret;
-}
-
-unsigned long perf_config_ulong(const char *name, const char *value)
-{
-	unsigned long ret;
-	if (!perf_parse_ulong(value, &ret))
-		die_bad_config(name);
-	return ret;
-}
-
-int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
-{
-	*is_bool = 1;
-	if (!value)
-		return 1;
-	if (!*value)
-		return 0;
-	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
-		return 1;
-	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
-		return 0;
-	*is_bool = 0;
-	return perf_config_int(name, value);
-}
-
-int perf_config_bool(const char *name, const char *value)
-{
-	int discard;
-	return !!perf_config_bool_or_int(name, value, &discard);
-}
-
-int perf_config_string(const char **dest, const char *var, const char *value)
-{
-	if (!value)
-		return config_error_nonbool(var);
-	*dest = strdup(value);
-	return 0;
-}
-
-static int perf_default_core_config(const char *var, const char *value)
-{
-	/* Add other config variables here and to Documentation/config.txt. */
-	return 0;
-}
-
-int perf_default_config(const char *var, const char *value, void *dummy)
-{
-	if (!prefixcmp(var, "core."))
-		return perf_default_core_config(var, value);
-
-	/* Add other config variables here and to Documentation/config.txt. */
-	return 0;
-}
-
-int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
-{
-	int ret;
-	FILE *f = fopen(filename, "r");
-
-	ret = -1;
-	if (f) {
-		config_file = f;
-		config_file_name = filename;
-		config_linenr = 1;
-		config_file_eof = 0;
-		ret = perf_parse_file(fn, data);
-		fclose(f);
-		config_file_name = NULL;
-	}
-	return ret;
-}
-
-const char *perf_etc_perfconfig(void)
-{
-	static const char *system_wide;
-	if (!system_wide)
-		system_wide = system_path(ETC_PERFCONFIG);
-	return system_wide;
-}
-
-static int perf_env_bool(const char *k, int def)
-{
-	const char *v = getenv(k);
-	return v ? perf_config_bool(k, v) : def;
-}
-
-int perf_config_system(void)
-{
-	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
-}
-
-int perf_config_global(void)
-{
-	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
-}
-
-int perf_config(config_fn_t fn, void *data)
-{
-	int ret = 0, found = 0;
-	char *repo_config = NULL;
-	const char *home = NULL;
-
-	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
-	if (config_exclusive_filename)
-		return perf_config_from_file(fn, config_exclusive_filename, data);
-	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
-		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
-					    data);
-		found += 1;
-	}
-
-	home = getenv("HOME");
-	if (perf_config_global() && home) {
-		char *user_config = strdup(mkpath("%s/.perfconfig", home));
-		if (!access(user_config, R_OK)) {
-			ret += perf_config_from_file(fn, user_config, data);
-			found += 1;
-		}
-		free(user_config);
-	}
-
-	repo_config = perf_pathdup("config");
-	if (!access(repo_config, R_OK)) {
-		ret += perf_config_from_file(fn, repo_config, data);
-		found += 1;
-	}
-	free(repo_config);
-	if (found == 0)
-		return -1;
-	return ret;
-}
-
-/*
- * Find all the stuff for perf_config_set() below.
- */
-
-#define MAX_MATCHES 512
-
-static struct {
-	int baselen;
-	char* key;
-	int do_not_match;
-	regex_t* value_regex;
-	int multi_replace;
-	size_t offset[MAX_MATCHES];
-	enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state;
-	int seen;
-} store;
-
-static int matches(const char* key, const char* value)
-{
-	return !strcmp(key, store.key) &&
-		(store.value_regex == NULL ||
-		 (store.do_not_match ^
-		  !regexec(store.value_regex, value, 0, NULL, 0)));
-}
-
-static int store_aux(const char* key, const char* value, void *cb)
-{
-	const char *ep;
-	size_t section_len;
-
-	switch (store.state) {
-	case KEY_SEEN:
-		if (matches(key, value)) {
-			if (store.seen == 1 && store.multi_replace == 0) {
-				warning("%s has multiple values", key);
-			} else if (store.seen >= MAX_MATCHES) {
-				error("too many matches for %s", key);
-				return 1;
-			}
-
-			store.offset[store.seen] = ftell(config_file);
-			store.seen++;
-		}
-		break;
-	case SECTION_SEEN:
-		/*
-		 * What we are looking for is in store.key (both
-		 * section and var), and its section part is baselen
-		 * long.  We found key (again, both section and var).
-		 * We would want to know if this key is in the same
-		 * section as what we are looking for.  We already
-		 * know we are in the same section as what should
-		 * hold store.key.
-		 */
-		ep = strrchr(key, '.');
-		section_len = ep - key;
-
-		if ((section_len != store.baselen) ||
-		    memcmp(key, store.key, section_len+1)) {
-			store.state = SECTION_END_SEEN;
-			break;
-		}
-
-		/*
-		 * Do not increment matches: this is no match, but we
-		 * just made sure we are in the desired section.
-		 */
-		store.offset[store.seen] = ftell(config_file);
-		/* fallthru */
-	case SECTION_END_SEEN:
-	case START:
-		if (matches(key, value)) {
-			store.offset[store.seen] = ftell(config_file);
-			store.state = KEY_SEEN;
-			store.seen++;
-		} else {
-			if (strrchr(key, '.') - key == store.baselen &&
-			      !strncmp(key, store.key, store.baselen)) {
-					store.state = SECTION_SEEN;
-					store.offset[store.seen] = ftell(config_file);
-			}
-		}
-	}
-	return 0;
-}
-
-static int store_write_section(int fd, const char* key)
-{
-	const char *dot;
-	int i, success;
-	struct strbuf sb = STRBUF_INIT;
-
-	dot = memchr(key, '.', store.baselen);
-	if (dot) {
-		strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key);
-		for (i = dot - key + 1; i < store.baselen; i++) {
-			if (key[i] == '"' || key[i] == '\\')
-				strbuf_addch(&sb, '\\');
-			strbuf_addch(&sb, key[i]);
-		}
-		strbuf_addstr(&sb, "\"]\n");
-	} else {
-		strbuf_addf(&sb, "[%.*s]\n", store.baselen, key);
-	}
-
-	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
-	strbuf_release(&sb);
-
-	return success;
-}
-
-static int store_write_pair(int fd, const char* key, const char* value)
-{
-	int i, success;
-	int length = strlen(key + store.baselen + 1);
-	const char *quote = "";
-	struct strbuf sb = STRBUF_INIT;
-
-	/*
-	 * Check to see if the value needs to be surrounded with a dq pair.
-	 * Note that problematic characters are always backslash-quoted; this
-	 * check is about not losing leading or trailing SP and strings that
-	 * follow beginning-of-comment characters (i.e. ';' and '#') by the
-	 * configuration parser.
-	 */
-	if (value[0] == ' ')
-		quote = "\"";
-	for (i = 0; value[i]; i++)
-		if (value[i] == ';' || value[i] == '#')
-			quote = "\"";
-	if (i && value[i - 1] == ' ')
-		quote = "\"";
-
-	strbuf_addf(&sb, "\t%.*s = %s",
-		    length, key + store.baselen + 1, quote);
-
-	for (i = 0; value[i]; i++)
-		switch (value[i]) {
-		case '\n':
-			strbuf_addstr(&sb, "\\n");
-			break;
-		case '\t':
-			strbuf_addstr(&sb, "\\t");
-			break;
-		case '"':
-		case '\\':
-			strbuf_addch(&sb, '\\');
-		default:
-			strbuf_addch(&sb, value[i]);
-			break;
-		}
-	strbuf_addf(&sb, "%s\n", quote);
-
-	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
-	strbuf_release(&sb);
-
-	return success;
-}
-
-static ssize_t find_beginning_of_line(const char* contents, size_t size,
-	size_t offset_, int* found_bracket)
-{
-	size_t equal_offset = size, bracket_offset = size;
-	ssize_t offset;
-
-contline:
-	for (offset = offset_-2; offset > 0
-			&& contents[offset] != '\n'; offset--)
-		switch (contents[offset]) {
-			case '=': equal_offset = offset; break;
-			case ']': bracket_offset = offset; break;
-		}
-	if (offset > 0 && contents[offset-1] == '\\') {
-		offset_ = offset;
-		goto contline;
-	}
-	if (bracket_offset < equal_offset) {
-		*found_bracket = 1;
-		offset = bracket_offset+1;
-	} else
-		offset++;
-
-	return offset;
-}
-
-int perf_config_set(const char* key, const char* value)
-{
-	return perf_config_set_multivar(key, value, NULL, 0);
-}
-
-/*
- * If value==NULL, unset in (remove from) config,
- * if value_regex!=NULL, disregard key/value pairs where value does not match.
- * if multi_replace==0, nothing, or only one matching key/value is replaced,
- *     else all matching key/values (regardless how many) are removed,
- *     before the new pair is written.
- *
- * Returns 0 on success.
- *
- * This function does this:
- *
- * - it locks the config file by creating ".perf/config.lock"
- *
- * - it then parses the config using store_aux() as validator to find
- *   the position on the key/value pair to replace. If it is to be unset,
- *   it must be found exactly once.
- *
- * - the config file is mmap()ed and the part before the match (if any) is
- *   written to the lock file, then the changed part and the rest.
- *
- * - the config file is removed and the lock file rename()d to it.
- *
- */
-int perf_config_set_multivar(const char* key, const char* value,
-	const char* value_regex, int multi_replace)
-{
-	int i, dot;
-	int fd = -1, in_fd;
-	int ret = 0;
-	char* config_filename;
-	const char* last_dot = strrchr(key, '.');
-
-	if (config_exclusive_filename)
-		config_filename = strdup(config_exclusive_filename);
-	else
-		config_filename = perf_pathdup("config");
-
-	/*
-	 * Since "key" actually contains the section name and the real
-	 * key name separated by a dot, we have to know where the dot is.
-	 */
-
-	if (last_dot == NULL) {
-		error("key does not contain a section: %s", key);
-		ret = 2;
-		goto out_free;
-	}
-	store.baselen = last_dot - key;
-
-	store.multi_replace = multi_replace;
-
-	/*
-	 * Validate the key and while at it, lower case it for matching.
-	 */
-	store.key = malloc(strlen(key) + 1);
-	dot = 0;
-	for (i = 0; key[i]; i++) {
-		unsigned char c = key[i];
-		if (c == '.')
-			dot = 1;
-		/* Leave the extended basename untouched.. */
-		if (!dot || i > store.baselen) {
-			if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) {
-				error("invalid key: %s", key);
-				free(store.key);
-				ret = 1;
-				goto out_free;
-			}
-			c = tolower(c);
-		} else if (c == '\n') {
-			error("invalid key (newline): %s", key);
-			free(store.key);
-			ret = 1;
-			goto out_free;
-		}
-		store.key[i] = c;
-	}
-	store.key[i] = 0;
-
-	/*
-	 * If .perf/config does not exist yet, write a minimal version.
-	 */
-	in_fd = open(config_filename, O_RDONLY);
-	if ( in_fd < 0 ) {
-		free(store.key);
-
-		if ( ENOENT != errno ) {
-			error("opening %s: %s", config_filename,
-			      strerror(errno));
-			ret = 3; /* same as "invalid config file" */
-			goto out_free;
-		}
-		/* if nothing to unset, error out */
-		if (value == NULL) {
-			ret = 5;
-			goto out_free;
-		}
-
-		store.key = (char*)key;
-		if (!store_write_section(fd, key) ||
-		    !store_write_pair(fd, key, value))
-			goto write_err_out;
-	} else {
-		struct stat st;
-		char* contents;
-		size_t contents_sz, copy_begin, copy_end;
-		int i, new_line = 0;
-
-		if (value_regex == NULL)
-			store.value_regex = NULL;
-		else {
-			if (value_regex[0] == '!') {
-				store.do_not_match = 1;
-				value_regex++;
-			} else
-				store.do_not_match = 0;
-
-			store.value_regex = (regex_t*)malloc(sizeof(regex_t));
-			if (regcomp(store.value_regex, value_regex,
-					REG_EXTENDED)) {
-				error("invalid pattern: %s", value_regex);
-				free(store.value_regex);
-				ret = 6;
-				goto out_free;
-			}
-		}
-
-		store.offset[0] = 0;
-		store.state = START;
-		store.seen = 0;
-
-		/*
-		 * After this, store.offset will contain the *end* offset
-		 * of the last match, or remain at 0 if no match was found.
-		 * As a side effect, we make sure to transform only a valid
-		 * existing config file.
-		 */
-		if (perf_config_from_file(store_aux, config_filename, NULL)) {
-			error("invalid config file %s", config_filename);
-			free(store.key);
-			if (store.value_regex != NULL) {
-				regfree(store.value_regex);
-				free(store.value_regex);
-			}
-			ret = 3;
-			goto out_free;
-		}
-
-		free(store.key);
-		if (store.value_regex != NULL) {
-			regfree(store.value_regex);
-			free(store.value_regex);
-		}
-
-		/* if nothing to unset, or too many matches, error out */
-		if ((store.seen == 0 && value == NULL) ||
-				(store.seen > 1 && multi_replace == 0)) {
-			ret = 5;
-			goto out_free;
-		}
-
-		fstat(in_fd, &st);
-		contents_sz = xsize_t(st.st_size);
-		contents = mmap(NULL, contents_sz, PROT_READ,
-			MAP_PRIVATE, in_fd, 0);
-		close(in_fd);
-
-		if (store.seen == 0)
-			store.seen = 1;
-
-		for (i = 0, copy_begin = 0; i < store.seen; i++) {
-			if (store.offset[i] == 0) {
-				store.offset[i] = copy_end = contents_sz;
-			} else if (store.state != KEY_SEEN) {
-				copy_end = store.offset[i];
-			} else
-				copy_end = find_beginning_of_line(
-					contents, contents_sz,
-					store.offset[i]-2, &new_line);
-
-			if (copy_end > 0 && contents[copy_end-1] != '\n')
-				new_line = 1;
-
-			/* write the first part of the config */
-			if (copy_end > copy_begin) {
-				if (write_in_full(fd, contents + copy_begin,
-						  copy_end - copy_begin) <
-				    copy_end - copy_begin)
-					goto write_err_out;
-				if (new_line &&
-				    write_in_full(fd, "\n", 1) != 1)
-					goto write_err_out;
-			}
-			copy_begin = store.offset[i];
-		}
-
-		/* write the pair (value == NULL means unset) */
-		if (value != NULL) {
-			if (store.state == START) {
-				if (!store_write_section(fd, key))
-					goto write_err_out;
-			}
-			if (!store_write_pair(fd, key, value))
-				goto write_err_out;
-		}
-
-		/* write the rest of the config */
-		if (copy_begin < contents_sz)
-			if (write_in_full(fd, contents + copy_begin,
-					  contents_sz - copy_begin) <
-			    contents_sz - copy_begin)
-				goto write_err_out;
-
-		munmap(contents, contents_sz);
-	}
-
-	ret = 0;
-
-out_free:
-	free(config_filename);
-	return ret;
-
-write_err_out:
-	goto out_free;
-
-}
-
-/*
- * Call this to report error for your variable that should not
- * get a boolean value (i.e. "[my] var" means "true").
- */
-int config_error_nonbool(const char *var)
-{
-	return error("Missing value for '%s'", var);
-}
diff --git a/Documentation/perf_counter/util/ctype.c b/Documentation/perf_counter/util/ctype.c
deleted file mode 100644
index b90ec00..0000000
--- a/Documentation/perf_counter/util/ctype.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Sane locale-independent, ASCII ctype.
- *
- * No surprises, and works with signed and unsigned chars.
- */
-#include "cache.h"
-
-enum {
-	S = GIT_SPACE,
-	A = GIT_ALPHA,
-	D = GIT_DIGIT,
-	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
-	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
-};
-
-unsigned char sane_ctype[256] = {
-	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
-	S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0,		/*  32.. 47 */
-	D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G,		/*  48.. 63 */
-	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
-	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0,		/*  80.. 95 */
-	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
-	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0,		/* 112..127 */
-	/* Nothing in the 128.. range */
-};
diff --git a/Documentation/perf_counter/util/environment.c b/Documentation/perf_counter/util/environment.c
deleted file mode 100644
index 275b0ee..0000000
--- a/Documentation/perf_counter/util/environment.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * We put all the perf config variables in this same object
- * file, so that programs can link against the config parser
- * without having to link against all the rest of perf.
- */
-#include "cache.h"
-
-const char *pager_program;
-int pager_use_color = 1;
diff --git a/Documentation/perf_counter/util/exec_cmd.c b/Documentation/perf_counter/util/exec_cmd.c
deleted file mode 100644
index d392922..0000000
--- a/Documentation/perf_counter/util/exec_cmd.c
+++ /dev/null
@@ -1,165 +0,0 @@
-#include "cache.h"
-#include "exec_cmd.h"
-#include "quote.h"
-#define MAX_ARGS	32
-
-extern char **environ;
-static const char *argv_exec_path;
-static const char *argv0_path;
-
-const char *system_path(const char *path)
-{
-#ifdef RUNTIME_PREFIX
-	static const char *prefix;
-#else
-	static const char *prefix = PREFIX;
-#endif
-	struct strbuf d = STRBUF_INIT;
-
-	if (is_absolute_path(path))
-		return path;
-
-#ifdef RUNTIME_PREFIX
-	assert(argv0_path);
-	assert(is_absolute_path(argv0_path));
-
-	if (!prefix &&
-	    !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
-	    !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
-	    !(prefix = strip_path_suffix(argv0_path, "perf"))) {
-		prefix = PREFIX;
-		fprintf(stderr, "RUNTIME_PREFIX requested, "
-				"but prefix computation failed.  "
-				"Using static fallback '%s'.\n", prefix);
-	}
-#endif
-
-	strbuf_addf(&d, "%s/%s", prefix, path);
-	path = strbuf_detach(&d, NULL);
-	return path;
-}
-
-const char *perf_extract_argv0_path(const char *argv0)
-{
-	const char *slash;
-
-	if (!argv0 || !*argv0)
-		return NULL;
-	slash = argv0 + strlen(argv0);
-
-	while (argv0 <= slash && !is_dir_sep(*slash))
-		slash--;
-
-	if (slash >= argv0) {
-		argv0_path = strndup(argv0, slash - argv0);
-		return slash + 1;
-	}
-
-	return argv0;
-}
-
-void perf_set_argv_exec_path(const char *exec_path)
-{
-	argv_exec_path = exec_path;
-	/*
-	 * Propagate this setting to external programs.
-	 */
-	setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
-}
-
-
-/* Returns the highest-priority, location to look for perf programs. */
-const char *perf_exec_path(void)
-{
-	const char *env;
-
-	if (argv_exec_path)
-		return argv_exec_path;
-
-	env = getenv(EXEC_PATH_ENVIRONMENT);
-	if (env && *env) {
-		return env;
-	}
-
-	return system_path(PERF_EXEC_PATH);
-}
-
-static void add_path(struct strbuf *out, const char *path)
-{
-	if (path && *path) {
-		if (is_absolute_path(path))
-			strbuf_addstr(out, path);
-		else
-			strbuf_addstr(out, make_nonrelative_path(path));
-
-		strbuf_addch(out, PATH_SEP);
-	}
-}
-
-void setup_path(void)
-{
-	const char *old_path = getenv("PATH");
-	struct strbuf new_path = STRBUF_INIT;
-
-	add_path(&new_path, perf_exec_path());
-	add_path(&new_path, argv0_path);
-
-	if (old_path)
-		strbuf_addstr(&new_path, old_path);
-	else
-		strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
-
-	setenv("PATH", new_path.buf, 1);
-
-	strbuf_release(&new_path);
-}
-
-const char **prepare_perf_cmd(const char **argv)
-{
-	int argc;
-	const char **nargv;
-
-	for (argc = 0; argv[argc]; argc++)
-		; /* just counting */
-	nargv = malloc(sizeof(*nargv) * (argc + 2));
-
-	nargv[0] = "perf";
-	for (argc = 0; argv[argc]; argc++)
-		nargv[argc + 1] = argv[argc];
-	nargv[argc + 1] = NULL;
-	return nargv;
-}
-
-int execv_perf_cmd(const char **argv) {
-	const char **nargv = prepare_perf_cmd(argv);
-
-	/* execvp() can only ever return if it fails */
-	execvp("perf", (char **)nargv);
-
-	free(nargv);
-	return -1;
-}
-
-
-int execl_perf_cmd(const char *cmd,...)
-{
-	int argc;
-	const char *argv[MAX_ARGS + 1];
-	const char *arg;
-	va_list param;
-
-	va_start(param, cmd);
-	argv[0] = cmd;
-	argc = 1;
-	while (argc < MAX_ARGS) {
-		arg = argv[argc++] = va_arg(param, char *);
-		if (!arg)
-			break;
-	}
-	va_end(param);
-	if (MAX_ARGS <= argc)
-		return error("too many args to run %s", cmd);
-
-	argv[argc] = NULL;
-	return execv_perf_cmd(argv);
-}
diff --git a/Documentation/perf_counter/util/exec_cmd.h b/Documentation/perf_counter/util/exec_cmd.h
deleted file mode 100644
index effe25e..0000000
--- a/Documentation/perf_counter/util/exec_cmd.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef PERF_EXEC_CMD_H
-#define PERF_EXEC_CMD_H
-
-extern void perf_set_argv_exec_path(const char *exec_path);
-extern const char *perf_extract_argv0_path(const char *path);
-extern const char *perf_exec_path(void);
-extern void setup_path(void);
-extern const char **prepare_perf_cmd(const char **argv);
-extern int execv_perf_cmd(const char **argv); /* NULL terminated */
-extern int execl_perf_cmd(const char *cmd, ...);
-extern const char *system_path(const char *path);
-
-#endif /* PERF_EXEC_CMD_H */
diff --git a/Documentation/perf_counter/util/generate-cmdlist.sh b/Documentation/perf_counter/util/generate-cmdlist.sh
deleted file mode 100755
index f06f6fd..0000000
--- a/Documentation/perf_counter/util/generate-cmdlist.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/sh
-
-echo "/* Automatically generated by $0 */
-struct cmdname_help
-{
-    char name[16];
-    char help[80];
-};
-
-static struct cmdname_help common_cmds[] = {"
-
-sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
-sort |
-while read cmd
-do
-     sed -n '
-     /^NAME/,/perf-'"$cmd"'/H
-     ${
-            x
-            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
-	    p
-     }' "Documentation/perf-$cmd.txt"
-done
-echo "};"
diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c
deleted file mode 100644
index 6653f7d..0000000
--- a/Documentation/perf_counter/util/help.c
+++ /dev/null
@@ -1,367 +0,0 @@
-#include "cache.h"
-#include "../builtin.h"
-#include "exec_cmd.h"
-#include "levenshtein.h"
-#include "help.h"
-
-/* most GUI terminals set COLUMNS (although some don't export it) */
-static int term_columns(void)
-{
-	char *col_string = getenv("COLUMNS");
-	int n_cols;
-
-	if (col_string && (n_cols = atoi(col_string)) > 0)
-		return n_cols;
-
-#ifdef TIOCGWINSZ
-	{
-		struct winsize ws;
-		if (!ioctl(1, TIOCGWINSZ, &ws)) {
-			if (ws.ws_col)
-				return ws.ws_col;
-		}
-	}
-#endif
-
-	return 80;
-}
-
-void add_cmdname(struct cmdnames *cmds, const char *name, int len)
-{
-	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
-
-	ent->len = len;
-	memcpy(ent->name, name, len);
-	ent->name[len] = 0;
-
-	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
-	cmds->names[cmds->cnt++] = ent;
-}
-
-static void clean_cmdnames(struct cmdnames *cmds)
-{
-	int i;
-	for (i = 0; i < cmds->cnt; ++i)
-		free(cmds->names[i]);
-	free(cmds->names);
-	cmds->cnt = 0;
-	cmds->alloc = 0;
-}
-
-static int cmdname_compare(const void *a_, const void *b_)
-{
-	struct cmdname *a = *(struct cmdname **)a_;
-	struct cmdname *b = *(struct cmdname **)b_;
-	return strcmp(a->name, b->name);
-}
-
-static void uniq(struct cmdnames *cmds)
-{
-	int i, j;
-
-	if (!cmds->cnt)
-		return;
-
-	for (i = j = 1; i < cmds->cnt; i++)
-		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
-			cmds->names[j++] = cmds->names[i];
-
-	cmds->cnt = j;
-}
-
-void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
-{
-	int ci, cj, ei;
-	int cmp;
-
-	ci = cj = ei = 0;
-	while (ci < cmds->cnt && ei < excludes->cnt) {
-		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
-		if (cmp < 0)
-			cmds->names[cj++] = cmds->names[ci++];
-		else if (cmp == 0)
-			ci++, ei++;
-		else if (cmp > 0)
-			ei++;
-	}
-
-	while (ci < cmds->cnt)
-		cmds->names[cj++] = cmds->names[ci++];
-
-	cmds->cnt = cj;
-}
-
-static void pretty_print_string_list(struct cmdnames *cmds, int longest)
-{
-	int cols = 1, rows;
-	int space = longest + 1; /* min 1 SP between words */
-	int max_cols = term_columns() - 1; /* don't print *on* the edge */
-	int i, j;
-
-	if (space < max_cols)
-		cols = max_cols / space;
-	rows = (cmds->cnt + cols - 1) / cols;
-
-	for (i = 0; i < rows; i++) {
-		printf("  ");
-
-		for (j = 0; j < cols; j++) {
-			int n = j * rows + i;
-			int size = space;
-			if (n >= cmds->cnt)
-				break;
-			if (j == cols-1 || n + rows >= cmds->cnt)
-				size = 1;
-			printf("%-*s", size, cmds->names[n]->name);
-		}
-		putchar('\n');
-	}
-}
-
-static int is_executable(const char *name)
-{
-	struct stat st;
-
-	if (stat(name, &st) || /* stat, not lstat */
-	    !S_ISREG(st.st_mode))
-		return 0;
-
-#ifdef __MINGW32__
-	/* cannot trust the executable bit, peek into the file instead */
-	char buf[3] = { 0 };
-	int n;
-	int fd = open(name, O_RDONLY);
-	st.st_mode &= ~S_IXUSR;
-	if (fd >= 0) {
-		n = read(fd, buf, 2);
-		if (n == 2)
-			/* DOS executables start with "MZ" */
-			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
-				st.st_mode |= S_IXUSR;
-		close(fd);
-	}
-#endif
-	return st.st_mode & S_IXUSR;
-}
-
-static void list_commands_in_dir(struct cmdnames *cmds,
-					 const char *path,
-					 const char *prefix)
-{
-	int prefix_len;
-	DIR *dir = opendir(path);
-	struct dirent *de;
-	struct strbuf buf = STRBUF_INIT;
-	int len;
-
-	if (!dir)
-		return;
-	if (!prefix)
-		prefix = "perf-";
-	prefix_len = strlen(prefix);
-
-	strbuf_addf(&buf, "%s/", path);
-	len = buf.len;
-
-	while ((de = readdir(dir)) != NULL) {
-		int entlen;
-
-		if (prefixcmp(de->d_name, prefix))
-			continue;
-
-		strbuf_setlen(&buf, len);
-		strbuf_addstr(&buf, de->d_name);
-		if (!is_executable(buf.buf))
-			continue;
-
-		entlen = strlen(de->d_name) - prefix_len;
-		if (has_extension(de->d_name, ".exe"))
-			entlen -= 4;
-
-		add_cmdname(cmds, de->d_name + prefix_len, entlen);
-	}
-	closedir(dir);
-	strbuf_release(&buf);
-}
-
-void load_command_list(const char *prefix,
-		struct cmdnames *main_cmds,
-		struct cmdnames *other_cmds)
-{
-	const char *env_path = getenv("PATH");
-	const char *exec_path = perf_exec_path();
-
-	if (exec_path) {
-		list_commands_in_dir(main_cmds, exec_path, prefix);
-		qsort(main_cmds->names, main_cmds->cnt,
-		      sizeof(*main_cmds->names), cmdname_compare);
-		uniq(main_cmds);
-	}
-
-	if (env_path) {
-		char *paths, *path, *colon;
-		path = paths = strdup(env_path);
-		while (1) {
-			if ((colon = strchr(path, PATH_SEP)))
-				*colon = 0;
-			if (!exec_path || strcmp(path, exec_path))
-				list_commands_in_dir(other_cmds, path, prefix);
-
-			if (!colon)
-				break;
-			path = colon + 1;
-		}
-		free(paths);
-
-		qsort(other_cmds->names, other_cmds->cnt,
-		      sizeof(*other_cmds->names), cmdname_compare);
-		uniq(other_cmds);
-	}
-	exclude_cmds(other_cmds, main_cmds);
-}
-
-void list_commands(const char *title, struct cmdnames *main_cmds,
-		   struct cmdnames *other_cmds)
-{
-	int i, longest = 0;
-
-	for (i = 0; i < main_cmds->cnt; i++)
-		if (longest < main_cmds->names[i]->len)
-			longest = main_cmds->names[i]->len;
-	for (i = 0; i < other_cmds->cnt; i++)
-		if (longest < other_cmds->names[i]->len)
-			longest = other_cmds->names[i]->len;
-
-	if (main_cmds->cnt) {
-		const char *exec_path = perf_exec_path();
-		printf("available %s in '%s'\n", title, exec_path);
-		printf("----------------");
-		mput_char('-', strlen(title) + strlen(exec_path));
-		putchar('\n');
-		pretty_print_string_list(main_cmds, longest);
-		putchar('\n');
-	}
-
-	if (other_cmds->cnt) {
-		printf("%s available from elsewhere on your $PATH\n", title);
-		printf("---------------------------------------");
-		mput_char('-', strlen(title));
-		putchar('\n');
-		pretty_print_string_list(other_cmds, longest);
-		putchar('\n');
-	}
-}
-
-int is_in_cmdlist(struct cmdnames *c, const char *s)
-{
-	int i;
-	for (i = 0; i < c->cnt; i++)
-		if (!strcmp(s, c->names[i]->name))
-			return 1;
-	return 0;
-}
-
-static int autocorrect;
-static struct cmdnames aliases;
-
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
-{
-	if (!strcmp(var, "help.autocorrect"))
-		autocorrect = perf_config_int(var,value);
-	/* Also use aliases for command lookup */
-	if (!prefixcmp(var, "alias."))
-		add_cmdname(&aliases, var + 6, strlen(var + 6));
-
-	return perf_default_config(var, value, cb);
-}
-
-static int levenshtein_compare(const void *p1, const void *p2)
-{
-	const struct cmdname *const *c1 = p1, *const *c2 = p2;
-	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
-	int l1 = (*c1)->len;
-	int l2 = (*c2)->len;
-	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
-}
-
-static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
-{
-	int i;
-	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
-
-	for (i = 0; i < old->cnt; i++)
-		cmds->names[cmds->cnt++] = old->names[i];
-	free(old->names);
-	old->cnt = 0;
-	old->names = NULL;
-}
-
-const char *help_unknown_cmd(const char *cmd)
-{
-	int i, n = 0, best_similarity = 0;
-	struct cmdnames main_cmds, other_cmds;
-
-	memset(&main_cmds, 0, sizeof(main_cmds));
-	memset(&other_cmds, 0, sizeof(main_cmds));
-	memset(&aliases, 0, sizeof(aliases));
-
-	perf_config(perf_unknown_cmd_config, NULL);
-
-	load_command_list("perf-", &main_cmds, &other_cmds);
-
-	add_cmd_list(&main_cmds, &aliases);
-	add_cmd_list(&main_cmds, &other_cmds);
-	qsort(main_cmds.names, main_cmds.cnt,
-	      sizeof(main_cmds.names), cmdname_compare);
-	uniq(&main_cmds);
-
-	if (main_cmds.cnt) {
-		/* This reuses cmdname->len for similarity index */
-		for (i = 0; i < main_cmds.cnt; ++i)
-			main_cmds.names[i]->len =
-				levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
-
-		qsort(main_cmds.names, main_cmds.cnt,
-		      sizeof(*main_cmds.names), levenshtein_compare);
-
-		best_similarity = main_cmds.names[0]->len;
-		n = 1;
-		while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
-			++n;
-	}
-
-	if (autocorrect && n == 1) {
-		const char *assumed = main_cmds.names[0]->name;
-
-		main_cmds.names[0] = NULL;
-		clean_cmdnames(&main_cmds);
-		fprintf(stderr, "WARNING: You called a Git program named '%s', "
-			"which does not exist.\n"
-			"Continuing under the assumption that you meant '%s'\n",
-			cmd, assumed);
-		if (autocorrect > 0) {
-			fprintf(stderr, "in %0.1f seconds automatically...\n",
-				(float)autocorrect/10.0);
-			poll(NULL, 0, autocorrect * 100);
-		}
-		return assumed;
-	}
-
-	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
-
-	if (main_cmds.cnt && best_similarity < 6) {
-		fprintf(stderr, "\nDid you mean %s?\n",
-			n < 2 ? "this": "one of these");
-
-		for (i = 0; i < n; i++)
-			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
-	}
-
-	exit(1);
-}
-
-int cmd_version(int argc, const char **argv, const char *prefix)
-{
-	printf("perf version %s\n", perf_version_string);
-	return 0;
-}
diff --git a/Documentation/perf_counter/util/help.h b/Documentation/perf_counter/util/help.h
deleted file mode 100644
index 56bc154..0000000
--- a/Documentation/perf_counter/util/help.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef HELP_H
-#define HELP_H
-
-struct cmdnames {
-	int alloc;
-	int cnt;
-	struct cmdname {
-		size_t len; /* also used for similarity index in help.c */
-		char name[FLEX_ARRAY];
-	} **names;
-};
-
-static inline void mput_char(char c, unsigned int num)
-{
-	while(num--)
-		putchar(c);
-}
-
-void load_command_list(const char *prefix,
-		struct cmdnames *main_cmds,
-		struct cmdnames *other_cmds);
-void add_cmdname(struct cmdnames *cmds, const char *name, int len);
-/* Here we require that excludes is a sorted list. */
-void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
-int is_in_cmdlist(struct cmdnames *c, const char *s);
-void list_commands(const char *title, struct cmdnames *main_cmds,
-		   struct cmdnames *other_cmds);
-
-#endif /* HELP_H */
diff --git a/Documentation/perf_counter/util/levenshtein.c b/Documentation/perf_counter/util/levenshtein.c
deleted file mode 100644
index e521d15..0000000
--- a/Documentation/perf_counter/util/levenshtein.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#include "cache.h"
-#include "levenshtein.h"
-
-/*
- * This function implements the Damerau-Levenshtein algorithm to
- * calculate a distance between strings.
- *
- * Basically, it says how many letters need to be swapped, substituted,
- * deleted from, or added to string1, at least, to get string2.
- *
- * The idea is to build a distance matrix for the substrings of both
- * strings.  To avoid a large space complexity, only the last three rows
- * are kept in memory (if swaps had the same or higher cost as one deletion
- * plus one insertion, only two rows would be needed).
- *
- * At any stage, "i + 1" denotes the length of the current substring of
- * string1 that the distance is calculated for.
- *
- * row2 holds the current row, row1 the previous row (i.e. for the substring
- * of string1 of length "i"), and row0 the row before that.
- *
- * In other words, at the start of the big loop, row2[j + 1] contains the
- * Damerau-Levenshtein distance between the substring of string1 of length
- * "i" and the substring of string2 of length "j + 1".
- *
- * All the big loop does is determine the partial minimum-cost paths.
- *
- * It does so by calculating the costs of the path ending in characters
- * i (in string1) and j (in string2), respectively, given that the last
- * operation is a substition, a swap, a deletion, or an insertion.
- *
- * This implementation allows the costs to be weighted:
- *
- * - w (as in "sWap")
- * - s (as in "Substitution")
- * - a (for insertion, AKA "Add")
- * - d (as in "Deletion")
- *
- * Note that this algorithm calculates a distance _iff_ d == a.
- */
-int levenshtein(const char *string1, const char *string2,
-		int w, int s, int a, int d)
-{
-	int len1 = strlen(string1), len2 = strlen(string2);
-	int *row0 = malloc(sizeof(int) * (len2 + 1));
-	int *row1 = malloc(sizeof(int) * (len2 + 1));
-	int *row2 = malloc(sizeof(int) * (len2 + 1));
-	int i, j;
-
-	for (j = 0; j <= len2; j++)
-		row1[j] = j * a;
-	for (i = 0; i < len1; i++) {
-		int *dummy;
-
-		row2[0] = (i + 1) * d;
-		for (j = 0; j < len2; j++) {
-			/* substitution */
-			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
-			/* swap */
-			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
-					string1[i] == string2[j - 1] &&
-					row2[j + 1] > row0[j - 1] + w)
-				row2[j + 1] = row0[j - 1] + w;
-			/* deletion */
-			if (row2[j + 1] > row1[j + 1] + d)
-				row2[j + 1] = row1[j + 1] + d;
-			/* insertion */
-			if (row2[j + 1] > row2[j] + a)
-				row2[j + 1] = row2[j] + a;
-		}
-
-		dummy = row0;
-		row0 = row1;
-		row1 = row2;
-		row2 = dummy;
-	}
-
-	i = row1[len2];
-	free(row0);
-	free(row1);
-	free(row2);
-
-	return i;
-}
diff --git a/Documentation/perf_counter/util/levenshtein.h b/Documentation/perf_counter/util/levenshtein.h
deleted file mode 100644
index 0173abe..0000000
--- a/Documentation/perf_counter/util/levenshtein.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef LEVENSHTEIN_H
-#define LEVENSHTEIN_H
-
-int levenshtein(const char *string1, const char *string2,
-	int swap_penalty, int substition_penalty,
-	int insertion_penalty, int deletion_penalty);
-
-#endif
diff --git a/Documentation/perf_counter/util/list.h b/Documentation/perf_counter/util/list.h
deleted file mode 100644
index e2548e8..0000000
--- a/Documentation/perf_counter/util/list.h
+++ /dev/null
@@ -1,603 +0,0 @@
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-/*
-  Copyright (C) Cast of dozens, comes from the Linux kernel
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-*/
-
-#include <stddef.h>
-
-/*
- * These are non-NULL pointers that will result in page faults
- * under normal circumstances, used to verify that nobody uses
- * non-initialized list entries.
- */
-#define LIST_POISON1 ((void *)0x00100100)
-#define LIST_POISON2 ((void *)0x00200200)
-
-/**
- * container_of - cast a member of a structure out to the containing structure
- * @ptr:	the pointer to the member.
- * @type:	the type of the container struct this is embedded in.
- * @member:	the name of the member within the struct.
- *
- */
-#define container_of(ptr, type, member) ({			\
-        const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-        (type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
-	struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
-	struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-	list->next = list;
-	list->prev = list;
-}
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next)
-{
-	next->prev = new;
-	new->next = next;
-	new->prev = prev;
-	prev->next = new;
-}
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-	next->prev = prev;
-	prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is
- * in an undefined state.
- */
-static inline void list_del(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	entry->next = LIST_POISON1;
-	entry->prev = LIST_POISON2;
-}
-
-/**
- * list_del_range - deletes range of entries from list.
- * @beging: first element in the range to delete from the list.
- * @beging: first element in the range to delete from the list.
- * Note: list_empty on the range of entries does not return true after this,
- * the entries is in an undefined state.
- */
-static inline void list_del_range(struct list_head *begin,
-				  struct list_head *end)
-{
-	begin->prev->next = end->next;
-	end->next->prev = begin->prev;
-}
-
-/**
- * list_replace - replace old entry by new one
- * @old : the element to be replaced
- * @new : the new element to insert
- * Note: if 'old' was empty, it will be overwritten.
- */
-static inline void list_replace(struct list_head *old,
-				struct list_head *new)
-{
-	new->next = old->next;
-	new->next->prev = new;
-	new->prev = old->prev;
-	new->prev->next = new;
-}
-
-static inline void list_replace_init(struct list_head *old,
-					struct list_head *new)
-{
-	list_replace(old, new);
-	INIT_LIST_HEAD(old);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
-        __list_del(list->prev, list->next);
-        list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
-				  struct list_head *head)
-{
-        __list_del(list->prev, list->next);
-        list_add_tail(list, head);
-}
-
-/**
- * list_is_last - tests whether @list is the last entry in list @head
- * @list: the entry to test
- * @head: the head of the list
- */
-static inline int list_is_last(const struct list_head *list,
-				const struct list_head *head)
-{
-	return list->next == head;
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(const struct list_head *head)
-{
-	return head->next == head;
-}
-
-/**
- * list_empty_careful - tests whether a list is empty and not being modified
- * @head: the list to test
- *
- * Description:
- * tests whether a list is empty _and_ checks that no other CPU might be
- * in the process of modifying either member (next or prev)
- *
- * NOTE: using list_empty_careful() without synchronization
- * can only be safe if the only activity that can happen
- * to the list entry is list_del_init(). Eg. it cannot be used
- * if another CPU could re-list_add() it.
- */
-static inline int list_empty_careful(const struct list_head *head)
-{
-	struct list_head *next = head->next;
-	return (next == head) && (next == head->prev);
-}
-
-static inline void __list_splice(struct list_head *list,
-				 struct list_head *head)
-{
-	struct list_head *first = list->next;
-	struct list_head *last = list->prev;
-	struct list_head *at = head->next;
-
-	first->prev = head;
-	head->next = first;
-
-	last->next = at;
-	at->prev = last;
-}
-
-/**
- * list_splice - join two lists
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(struct list_head *list, struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
-				    struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr:	the &struct list_head pointer.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
-
-/**
- * list_first_entry - get the first element from a list
- * @ptr:       the list head to take the element from.
- * @type:      the type of the struct this is embedded in.
- * @member:    the name of the list_struct within the struct.
- *
- * Note, that list is expected to be not empty.
- */
-#define list_first_entry(ptr, type, member) \
-	list_entry((ptr)->next, type, member)
-
-/**
- * list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); \
-        	pos = pos->next)
-
-/**
- * __list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
- */
-#define __list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
-
-/**
- * list_for_each_prev	-	iterate over a list backwards
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; pos != (head); \
-        	pos = pos->prev)
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
-
-/**
- * list_for_each_entry	-	iterate over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member)				\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     &pos->member != (head); 	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_reverse - iterate backwards over list of given type.
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_reverse(pos, head, member)			\
-	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     &pos->member != (head); 	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
- * @pos:	the type * to use as a start point
- * @head:	the head of the list
- * @member:	the name of the list_struct within the struct.
- *
- * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
- */
-#define list_prepare_entry(pos, head, member) \
-	((pos) ? : list_entry(head, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue - continue iteration over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Continue to iterate over list of given type, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue(pos, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_from - iterate over list of given type from the current point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing from current position.
- */
-#define list_for_each_entry_from(pos, head, member) 			\
-	for (; &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member),	\
-		n = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_continue
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing after current point,
- * safe against removal of list entry.
- */
-#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
-		n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_from
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type from current point, safe against
- * removal of list entry.
- */
-#define list_for_each_entry_safe_from(pos, n, head, member) 			\
-	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_reverse
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate backwards over list of given type, safe against removal
- * of list entry.
- */
-#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
-	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
-		n = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
-
-/*
- * Double linked lists with a single pointer list head.
- * Mostly useful for hash tables where the two pointer list head is
- * too wasteful.
- * You lose the ability to access the tail in O(1).
- */
-
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-#define HLIST_HEAD_INIT { .first = NULL }
-#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
-#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-static inline void INIT_HLIST_NODE(struct hlist_node *h)
-{
-	h->next = NULL;
-	h->pprev = NULL;
-}
-
-static inline int hlist_unhashed(const struct hlist_node *h)
-{
-	return !h->pprev;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
-	return !h->first;
-}
-
-static inline void __hlist_del(struct hlist_node *n)
-{
-	struct hlist_node *next = n->next;
-	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
-	if (next)
-		next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->next = LIST_POISON1;
-	n->pprev = LIST_POISON2;
-}
-
-static inline void hlist_del_init(struct hlist_node *n)
-{
-	if (!hlist_unhashed(n)) {
-		__hlist_del(n);
-		INIT_HLIST_NODE(n);
-	}
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-	n->pprev = &h->first;
-}
-
-/* next must be != NULL */
-static inline void hlist_add_before(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	n->pprev = next->pprev;
-	n->next = next;
-	next->pprev = &n->next;
-	*(n->pprev) = n;
-}
-
-static inline void hlist_add_after(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	next->next = n->next;
-	n->next = next;
-	next->pprev = &n->next;
-
-	if(next->next)
-		next->next->pprev  = &next->next;
-}
-
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos; \
-	     pos = pos->next)
-
-#define hlist_for_each_safe(pos, n, head) \
-	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
-	     pos = n)
-
-/**
- * hlist_for_each_entry	- iterate over list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry(tpos, pos, head, member)			 \
-	for (pos = (head)->first;					 \
-	     pos && 			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_continue(tpos, pos, member)		 \
-	for (pos = (pos)->next;						 \
-	     pos && 			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_from - iterate over a hlist continuing from current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && 			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @n:		another &struct hlist_node to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
-	for (pos = (head)->first;					 \
-	     pos && ({ n = pos->next; 1; }) && 				 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = n)
-
-#endif
diff --git a/Documentation/perf_counter/util/pager.c b/Documentation/perf_counter/util/pager.c
deleted file mode 100644
index a28bcca..0000000
--- a/Documentation/perf_counter/util/pager.c
+++ /dev/null
@@ -1,99 +0,0 @@
-#include "cache.h"
-#include "run-command.h"
-#include "sigchain.h"
-
-/*
- * This is split up from the rest of git so that we can do
- * something different on Windows.
- */
-
-static int spawned_pager;
-
-#ifndef __MINGW32__
-static void pager_preexec(void)
-{
-	/*
-	 * Work around bug in "less" by not starting it until we
-	 * have real input
-	 */
-	fd_set in;
-
-	FD_ZERO(&in);
-	FD_SET(0, &in);
-	select(1, &in, NULL, &in, NULL);
-
-	setenv("LESS", "FRSX", 0);
-}
-#endif
-
-static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
-static struct child_process pager_process;
-
-static void wait_for_pager(void)
-{
-	fflush(stdout);
-	fflush(stderr);
-	/* signal EOF to pager */
-	close(1);
-	close(2);
-	finish_command(&pager_process);
-}
-
-static void wait_for_pager_signal(int signo)
-{
-	wait_for_pager();
-	sigchain_pop(signo);
-	raise(signo);
-}
-
-void setup_pager(void)
-{
-	const char *pager = getenv("PERF_PAGER");
-
-	if (!isatty(1))
-		return;
-	if (!pager) {
-		if (!pager_program)
-			perf_config(perf_default_config, NULL);
-		pager = pager_program;
-	}
-	if (!pager)
-		pager = getenv("PAGER");
-	if (!pager)
-		pager = "less";
-	else if (!*pager || !strcmp(pager, "cat"))
-		return;
-
-	spawned_pager = 1; /* means we are emitting to terminal */
-
-	/* spawn the pager */
-	pager_argv[2] = pager;
-	pager_process.argv = pager_argv;
-	pager_process.in = -1;
-#ifndef __MINGW32__
-	pager_process.preexec_cb = pager_preexec;
-#endif
-	if (start_command(&pager_process))
-		return;
-
-	/* original process continues, but writes to the pipe */
-	dup2(pager_process.in, 1);
-	if (isatty(2))
-		dup2(pager_process.in, 2);
-	close(pager_process.in);
-
-	/* this makes sure that the parent terminates after the pager */
-	sigchain_push_common(wait_for_pager_signal);
-	atexit(wait_for_pager);
-}
-
-int pager_in_use(void)
-{
-	const char *env;
-
-	if (spawned_pager)
-		return 1;
-
-	env = getenv("PERF_PAGER_IN_USE");
-	return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0;
-}
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
deleted file mode 100644
index e0820b4..0000000
--- a/Documentation/perf_counter/util/parse-events.c
+++ /dev/null
@@ -1,316 +0,0 @@
-
-#include "../perf.h"
-#include "util.h"
-#include "parse-options.h"
-#include "parse-events.h"
-#include "exec_cmd.h"
-#include "string.h"
-
-extern char *strcasestr(const char *haystack, const char *needle);
-
-int					nr_counters;
-
-struct perf_counter_attr		attrs[MAX_COUNTERS];
-
-struct event_symbol {
-	__u8	type;
-	__u64	config;
-	char	*symbol;
-};
-
-#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
-#define CR(x, y) .type = PERF_TYPE_##x, .config = y
-
-static struct event_symbol event_symbols[] = {
-  { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
-  { C(HARDWARE, CPU_CYCLES),		"cycles",		},
-  { C(HARDWARE, INSTRUCTIONS),		"instructions",		},
-  { C(HARDWARE, CACHE_REFERENCES),	"cache-references",	},
-  { C(HARDWARE, CACHE_MISSES),		"cache-misses",		},
-  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branches",		},
-  { C(HARDWARE, BRANCH_MISSES),		"branch-misses",	},
-  { C(HARDWARE, BUS_CYCLES),		"bus-cycles",		},
-
-  { C(SOFTWARE, CPU_CLOCK),		"cpu-clock",		},
-  { C(SOFTWARE, TASK_CLOCK),		"task-clock",		},
-  { C(SOFTWARE, PAGE_FAULTS),		"page-faults",		},
-  { C(SOFTWARE, PAGE_FAULTS),		"faults",		},
-  { C(SOFTWARE, PAGE_FAULTS_MIN),	"minor-faults",		},
-  { C(SOFTWARE, PAGE_FAULTS_MAJ),	"major-faults",		},
-  { C(SOFTWARE, CONTEXT_SWITCHES),	"context-switches",	},
-  { C(SOFTWARE, CONTEXT_SWITCHES),	"cs",			},
-  { C(SOFTWARE, CPU_MIGRATIONS),	"cpu-migrations",	},
-  { C(SOFTWARE, CPU_MIGRATIONS),	"migrations",		},
-};
-
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
-
-static char *hw_event_names[] = {
-	"cycles",
-	"instructions",
-	"cache-references",
-	"cache-misses",
-	"branches",
-	"branch-misses",
-	"bus-cycles",
-};
-
-static char *sw_event_names[] = {
-	"cpu-clock-ticks",
-	"task-clock-ticks",
-	"page-faults",
-	"context-switches",
-	"CPU-migrations",
-	"minor-faults",
-	"major-faults",
-};
-
-#define MAX_ALIASES 8
-
-static char *hw_cache [][MAX_ALIASES] = {
-	{ "L1-data"		, "l1-d", "l1d", "l1"				},
-	{ "L1-instruction"	, "l1-i", "l1i"					},
-	{ "L2"			, "l2"						},
-	{ "Data-TLB"		, "dtlb", "d-tlb"				},
-	{ "Instruction-TLB"	, "itlb", "i-tlb"				},
-	{ "Branch"		, "bpu" , "btb", "bpc"				},
-};
-
-static char *hw_cache_op [][MAX_ALIASES] = {
-	{ "Load"		, "read"					},
-	{ "Store"		, "write"					},
-	{ "Prefetch"		, "speculative-read", "speculative-load"	},
-};
-
-static char *hw_cache_result [][MAX_ALIASES] = {
-	{ "Reference"		, "ops", "access"				},
-	{ "Miss"								},
-};
-
-char *event_name(int counter)
-{
-	__u64 config = attrs[counter].config;
-	int type = attrs[counter].type;
-	static char buf[32];
-
-	if (attrs[counter].type == PERF_TYPE_RAW) {
-		sprintf(buf, "raw 0x%llx", config);
-		return buf;
-	}
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		if (config < PERF_HW_EVENTS_MAX)
-			return hw_event_names[config];
-		return "unknown-hardware";
-
-	case PERF_TYPE_HW_CACHE: {
-		__u8 cache_type, cache_op, cache_result;
-		static char name[100];
-
-		cache_type   = (config >>  0) & 0xff;
-		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
-			return "unknown-ext-hardware-cache-type";
-
-		cache_op     = (config >>  8) & 0xff;
-		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
-			return "unknown-ext-hardware-cache-op";
-
-		cache_result = (config >> 16) & 0xff;
-		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
-			return "unknown-ext-hardware-cache-result";
-
-		sprintf(name, "%s-Cache-%s-%ses",
-			hw_cache[cache_type][0],
-			hw_cache_op[cache_op][0],
-			hw_cache_result[cache_result][0]);
-
-		return name;
-	}
-
-	case PERF_TYPE_SOFTWARE:
-		if (config < PERF_SW_EVENTS_MAX)
-			return sw_event_names[config];
-		return "unknown-software";
-
-	default:
-		break;
-	}
-
-	return "unknown";
-}
-
-static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
-{
-	int i, j;
-
-	for (i = 0; i < size; i++) {
-		for (j = 0; j < MAX_ALIASES; j++) {
-			if (!names[i][j])
-				break;
-			if (strcasestr(str, names[i][j]))
-				return i;
-		}
-	}
-
-	return 0;
-}
-
-static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
-{
-	__u8 cache_type = -1, cache_op = 0, cache_result = 0;
-
-	cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
-	/*
-	 * No fallback - if we cannot get a clear cache type
-	 * then bail out:
-	 */
-	if (cache_type == -1)
-		return -EINVAL;
-
-	cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
-	/*
-	 * Fall back to reads:
-	 */
-	if (cache_type == -1)
-		cache_type = PERF_COUNT_HW_CACHE_OP_READ;
-
-	cache_result = parse_aliases(str, hw_cache_result,
-					PERF_COUNT_HW_CACHE_RESULT_MAX);
-	/*
-	 * Fall back to accesses:
-	 */
-	if (cache_result == -1)
-		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
-
-	attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
-	attr->type = PERF_TYPE_HW_CACHE;
-
-	return 0;
-}
-
-/*
- * Each event can have multiple symbolic names.
- * Symbolic names are (almost) exactly matched.
- */
-static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
-{
-	__u64 config, id;
-	int type;
-	unsigned int i;
-	const char *sep, *pstr;
-
-	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) {
-		attr->type = PERF_TYPE_RAW;
-		attr->config = config;
-
-		return 0;
-	}
-
-	pstr = str;
-	sep = strchr(pstr, ':');
-	if (sep) {
-		type = atoi(pstr);
-		pstr = sep + 1;
-		id = atoi(pstr);
-		sep = strchr(pstr, ':');
-		if (sep) {
-			pstr = sep + 1;
-			if (strchr(pstr, 'k'))
-				attr->exclude_user = 1;
-			if (strchr(pstr, 'u'))
-				attr->exclude_kernel = 1;
-		}
-		attr->type = type;
-		attr->config = id;
-
-		return 0;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol))) {
-
-			attr->type = event_symbols[i].type;
-			attr->config = event_symbols[i].config;
-
-			return 0;
-		}
-	}
-
-	return parse_generic_hw_symbols(str, attr);
-}
-
-int parse_events(const struct option *opt, const char *str, int unset)
-{
-	struct perf_counter_attr attr;
-	int ret;
-
-	memset(&attr, 0, sizeof(attr));
-again:
-	if (nr_counters == MAX_COUNTERS)
-		return -1;
-
-	ret = parse_event_symbols(str, &attr);
-	if (ret < 0)
-		return ret;
-
-	attrs[nr_counters] = attr;
-	nr_counters++;
-
-	str = strstr(str, ",");
-	if (str) {
-		str++;
-		goto again;
-	}
-
-	return 0;
-}
-
-static const char * const event_type_descriptors[] = {
-	"",
-	"Hardware event",
-	"Software event",
-	"Tracepoint event",
-	"Hardware cache event",
-};
-
-/*
- * Print the help text for the event symbols:
- */
-void print_events(void)
-{
-	struct event_symbol *syms = event_symbols;
-	unsigned int i, type, prev_type = -1;
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
-		type = syms->type + 1;
-		if (type > ARRAY_SIZE(event_type_descriptors))
-			type = 0;
-
-		if (type != prev_type)
-			fprintf(stderr, "\n");
-
-		fprintf(stderr, "  %-30s [%s]\n", syms->symbol,
-			event_type_descriptors[type]);
-
-		prev_type = type;
-	}
-
-	fprintf(stderr, "\n");
-	fprintf(stderr, "  %-30s [raw hardware event descriptor]\n",
-		"rNNN");
-	fprintf(stderr, "\n");
-
-	exit(129);
-}
diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h
deleted file mode 100644
index e3d5529..0000000
--- a/Documentation/perf_counter/util/parse-events.h
+++ /dev/null
@@ -1,17 +0,0 @@
-
-/*
- * Parse symbolic events/counts passed in as options:
- */
-
-extern int			nr_counters;
-
-extern struct perf_counter_attr attrs[MAX_COUNTERS];
-
-extern char *event_name(int ctr);
-
-extern int parse_events(const struct option *opt, const char *str, int unset);
-
-#define EVENTS_HELP_MAX (128*1024)
-
-extern void print_events(void);
-
diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c
deleted file mode 100644
index b3affb1..0000000
--- a/Documentation/perf_counter/util/parse-options.c
+++ /dev/null
@@ -1,508 +0,0 @@
-#include "util.h"
-#include "parse-options.h"
-#include "cache.h"
-
-#define OPT_SHORT 1
-#define OPT_UNSET 2
-
-static int opterror(const struct option *opt, const char *reason, int flags)
-{
-	if (flags & OPT_SHORT)
-		return error("switch `%c' %s", opt->short_name, reason);
-	if (flags & OPT_UNSET)
-		return error("option `no-%s' %s", opt->long_name, reason);
-	return error("option `%s' %s", opt->long_name, reason);
-}
-
-static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
-		   int flags, const char **arg)
-{
-	if (p->opt) {
-		*arg = p->opt;
-		p->opt = NULL;
-	} else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) {
-		*arg = (const char *)opt->defval;
-	} else if (p->argc > 1) {
-		p->argc--;
-		*arg = *++p->argv;
-	} else
-		return opterror(opt, "requires a value", flags);
-	return 0;
-}
-
-static int get_value(struct parse_opt_ctx_t *p,
-		     const struct option *opt, int flags)
-{
-	const char *s, *arg = NULL;
-	const int unset = flags & OPT_UNSET;
-
-	if (unset && p->opt)
-		return opterror(opt, "takes no value", flags);
-	if (unset && (opt->flags & PARSE_OPT_NONEG))
-		return opterror(opt, "isn't available", flags);
-
-	if (!(flags & OPT_SHORT) && p->opt) {
-		switch (opt->type) {
-		case OPTION_CALLBACK:
-			if (!(opt->flags & PARSE_OPT_NOARG))
-				break;
-			/* FALLTHROUGH */
-		case OPTION_BOOLEAN:
-		case OPTION_BIT:
-		case OPTION_SET_INT:
-		case OPTION_SET_PTR:
-			return opterror(opt, "takes no value", flags);
-		default:
-			break;
-		}
-	}
-
-	switch (opt->type) {
-	case OPTION_BIT:
-		if (unset)
-			*(int *)opt->value &= ~opt->defval;
-		else
-			*(int *)opt->value |= opt->defval;
-		return 0;
-
-	case OPTION_BOOLEAN:
-		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
-		return 0;
-
-	case OPTION_SET_INT:
-		*(int *)opt->value = unset ? 0 : opt->defval;
-		return 0;
-
-	case OPTION_SET_PTR:
-		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
-		return 0;
-
-	case OPTION_STRING:
-		if (unset)
-			*(const char **)opt->value = NULL;
-		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
-			*(const char **)opt->value = (const char *)opt->defval;
-		else
-			return get_arg(p, opt, flags, (const char **)opt->value);
-		return 0;
-
-	case OPTION_CALLBACK:
-		if (unset)
-			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
-		if (opt->flags & PARSE_OPT_NOARG)
-			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
-		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
-			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
-		if (get_arg(p, opt, flags, &arg))
-			return -1;
-		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
-
-	case OPTION_INTEGER:
-		if (unset) {
-			*(int *)opt->value = 0;
-			return 0;
-		}
-		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
-			*(int *)opt->value = opt->defval;
-			return 0;
-		}
-		if (get_arg(p, opt, flags, &arg))
-			return -1;
-		*(int *)opt->value = strtol(arg, (char **)&s, 10);
-		if (*s)
-			return opterror(opt, "expects a numerical value", flags);
-		return 0;
-
-	case OPTION_LONG:
-		if (unset) {
-			*(long *)opt->value = 0;
-			return 0;
-		}
-		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
-			*(long *)opt->value = opt->defval;
-			return 0;
-		}
-		if (get_arg(p, opt, flags, &arg))
-			return -1;
-		*(long *)opt->value = strtol(arg, (char **)&s, 10);
-		if (*s)
-			return opterror(opt, "expects a numerical value", flags);
-		return 0;
-
-	default:
-		die("should not happen, someone must be hit on the forehead");
-	}
-}
-
-static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
-{
-	for (; options->type != OPTION_END; options++) {
-		if (options->short_name == *p->opt) {
-			p->opt = p->opt[1] ? p->opt + 1 : NULL;
-			return get_value(p, options, OPT_SHORT);
-		}
-	}
-	return -2;
-}
-
-static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
-                          const struct option *options)
-{
-	const char *arg_end = strchr(arg, '=');
-	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
-	int abbrev_flags = 0, ambiguous_flags = 0;
-
-	if (!arg_end)
-		arg_end = arg + strlen(arg);
-
-	for (; options->type != OPTION_END; options++) {
-		const char *rest;
-		int flags = 0;
-
-		if (!options->long_name)
-			continue;
-
-		rest = skip_prefix(arg, options->long_name);
-		if (options->type == OPTION_ARGUMENT) {
-			if (!rest)
-				continue;
-			if (*rest == '=')
-				return opterror(options, "takes no value", flags);
-			if (*rest)
-				continue;
-			p->out[p->cpidx++] = arg - 2;
-			return 0;
-		}
-		if (!rest) {
-			/* abbreviated? */
-			if (!strncmp(options->long_name, arg, arg_end - arg)) {
-is_abbreviated:
-				if (abbrev_option) {
-					/*
-					 * If this is abbreviated, it is
-					 * ambiguous. So when there is no
-					 * exact match later, we need to
-					 * error out.
-					 */
-					ambiguous_option = abbrev_option;
-					ambiguous_flags = abbrev_flags;
-				}
-				if (!(flags & OPT_UNSET) && *arg_end)
-					p->opt = arg_end + 1;
-				abbrev_option = options;
-				abbrev_flags = flags;
-				continue;
-			}
-			/* negated and abbreviated very much? */
-			if (!prefixcmp("no-", arg)) {
-				flags |= OPT_UNSET;
-				goto is_abbreviated;
-			}
-			/* negated? */
-			if (strncmp(arg, "no-", 3))
-				continue;
-			flags |= OPT_UNSET;
-			rest = skip_prefix(arg + 3, options->long_name);
-			/* abbreviated and negated? */
-			if (!rest && !prefixcmp(options->long_name, arg + 3))
-				goto is_abbreviated;
-			if (!rest)
-				continue;
-		}
-		if (*rest) {
-			if (*rest != '=')
-				continue;
-			p->opt = rest + 1;
-		}
-		return get_value(p, options, flags);
-	}
-
-	if (ambiguous_option)
-		return error("Ambiguous option: %s "
-			"(could be --%s%s or --%s%s)",
-			arg,
-			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
-			ambiguous_option->long_name,
-			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
-			abbrev_option->long_name);
-	if (abbrev_option)
-		return get_value(p, abbrev_option, abbrev_flags);
-	return -2;
-}
-
-static void check_typos(const char *arg, const struct option *options)
-{
-	if (strlen(arg) < 3)
-		return;
-
-	if (!prefixcmp(arg, "no-")) {
-		error ("did you mean `--%s` (with two dashes ?)", arg);
-		exit(129);
-	}
-
-	for (; options->type != OPTION_END; options++) {
-		if (!options->long_name)
-			continue;
-		if (!prefixcmp(options->long_name, arg)) {
-			error ("did you mean `--%s` (with two dashes ?)", arg);
-			exit(129);
-		}
-	}
-}
-
-void parse_options_start(struct parse_opt_ctx_t *ctx,
-			 int argc, const char **argv, int flags)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	ctx->argc = argc - 1;
-	ctx->argv = argv + 1;
-	ctx->out  = argv;
-	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
-	ctx->flags = flags;
-	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
-	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
-		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
-}
-
-static int usage_with_options_internal(const char * const *,
-				       const struct option *, int);
-
-int parse_options_step(struct parse_opt_ctx_t *ctx,
-		       const struct option *options,
-		       const char * const usagestr[])
-{
-	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
-
-	/* we must reset ->opt, unknown short option leave it dangling */
-	ctx->opt = NULL;
-
-	for (; ctx->argc; ctx->argc--, ctx->argv++) {
-		const char *arg = ctx->argv[0];
-
-		if (*arg != '-' || !arg[1]) {
-			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
-				break;
-			ctx->out[ctx->cpidx++] = ctx->argv[0];
-			continue;
-		}
-
-		if (arg[1] != '-') {
-			ctx->opt = arg + 1;
-			if (internal_help && *ctx->opt == 'h')
-				return parse_options_usage(usagestr, options);
-			switch (parse_short_opt(ctx, options)) {
-			case -1:
-				return parse_options_usage(usagestr, options);
-			case -2:
-				goto unknown;
-			}
-			if (ctx->opt)
-				check_typos(arg + 1, options);
-			while (ctx->opt) {
-				if (internal_help && *ctx->opt == 'h')
-					return parse_options_usage(usagestr, options);
-				switch (parse_short_opt(ctx, options)) {
-				case -1:
-					return parse_options_usage(usagestr, options);
-				case -2:
-					/* fake a short option thing to hide the fact that we may have
-					 * started to parse aggregated stuff
-					 *
-					 * This is leaky, too bad.
-					 */
-					ctx->argv[0] = strdup(ctx->opt - 1);
-					*(char *)ctx->argv[0] = '-';
-					goto unknown;
-				}
-			}
-			continue;
-		}
-
-		if (!arg[2]) { /* "--" */
-			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
-				ctx->argc--;
-				ctx->argv++;
-			}
-			break;
-		}
-
-		if (internal_help && !strcmp(arg + 2, "help-all"))
-			return usage_with_options_internal(usagestr, options, 1);
-		if (internal_help && !strcmp(arg + 2, "help"))
-			return parse_options_usage(usagestr, options);
-		switch (parse_long_opt(ctx, arg + 2, options)) {
-		case -1:
-			return parse_options_usage(usagestr, options);
-		case -2:
-			goto unknown;
-		}
-		continue;
-unknown:
-		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
-			return PARSE_OPT_UNKNOWN;
-		ctx->out[ctx->cpidx++] = ctx->argv[0];
-		ctx->opt = NULL;
-	}
-	return PARSE_OPT_DONE;
-}
-
-int parse_options_end(struct parse_opt_ctx_t *ctx)
-{
-	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
-	ctx->out[ctx->cpidx + ctx->argc] = NULL;
-	return ctx->cpidx + ctx->argc;
-}
-
-int parse_options(int argc, const char **argv, const struct option *options,
-		  const char * const usagestr[], int flags)
-{
-	struct parse_opt_ctx_t ctx;
-
-	parse_options_start(&ctx, argc, argv, flags);
-	switch (parse_options_step(&ctx, options, usagestr)) {
-	case PARSE_OPT_HELP:
-		exit(129);
-	case PARSE_OPT_DONE:
-		break;
-	default: /* PARSE_OPT_UNKNOWN */
-		if (ctx.argv[0][1] == '-') {
-			error("unknown option `%s'", ctx.argv[0] + 2);
-		} else {
-			error("unknown switch `%c'", *ctx.opt);
-		}
-		usage_with_options(usagestr, options);
-	}
-
-	return parse_options_end(&ctx);
-}
-
-#define USAGE_OPTS_WIDTH 24
-#define USAGE_GAP         2
-
-int usage_with_options_internal(const char * const *usagestr,
-				const struct option *opts, int full)
-{
-	if (!usagestr)
-		return PARSE_OPT_HELP;
-
-	fprintf(stderr, "\n usage: %s\n", *usagestr++);
-	while (*usagestr && **usagestr)
-		fprintf(stderr, "    or: %s\n", *usagestr++);
-	while (*usagestr) {
-		fprintf(stderr, "%s%s\n",
-				**usagestr ? "    " : "",
-				*usagestr);
-		usagestr++;
-	}
-
-	if (opts->type != OPTION_GROUP)
-		fputc('\n', stderr);
-
-	for (; opts->type != OPTION_END; opts++) {
-		size_t pos;
-		int pad;
-
-		if (opts->type == OPTION_GROUP) {
-			fputc('\n', stderr);
-			if (*opts->help)
-				fprintf(stderr, "%s\n", opts->help);
-			continue;
-		}
-		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
-			continue;
-
-		pos = fprintf(stderr, "    ");
-		if (opts->short_name)
-			pos += fprintf(stderr, "-%c", opts->short_name);
-		if (opts->long_name && opts->short_name)
-			pos += fprintf(stderr, ", ");
-		if (opts->long_name)
-			pos += fprintf(stderr, "--%s", opts->long_name);
-
-		switch (opts->type) {
-		case OPTION_ARGUMENT:
-			break;
-		case OPTION_INTEGER:
-			if (opts->flags & PARSE_OPT_OPTARG)
-				if (opts->long_name)
-					pos += fprintf(stderr, "[=<n>]");
-				else
-					pos += fprintf(stderr, "[<n>]");
-			else
-				pos += fprintf(stderr, " <n>");
-			break;
-		case OPTION_CALLBACK:
-			if (opts->flags & PARSE_OPT_NOARG)
-				break;
-			/* FALLTHROUGH */
-		case OPTION_STRING:
-			if (opts->argh) {
-				if (opts->flags & PARSE_OPT_OPTARG)
-					if (opts->long_name)
-						pos += fprintf(stderr, "[=<%s>]", opts->argh);
-					else
-						pos += fprintf(stderr, "[<%s>]", opts->argh);
-				else
-					pos += fprintf(stderr, " <%s>", opts->argh);
-			} else {
-				if (opts->flags & PARSE_OPT_OPTARG)
-					if (opts->long_name)
-						pos += fprintf(stderr, "[=...]");
-					else
-						pos += fprintf(stderr, "[...]");
-				else
-					pos += fprintf(stderr, " ...");
-			}
-			break;
-		default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */
-			break;
-		}
-
-		if (pos <= USAGE_OPTS_WIDTH)
-			pad = USAGE_OPTS_WIDTH - pos;
-		else {
-			fputc('\n', stderr);
-			pad = USAGE_OPTS_WIDTH;
-		}
-		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
-	}
-	fputc('\n', stderr);
-
-	return PARSE_OPT_HELP;
-}
-
-void usage_with_options(const char * const *usagestr,
-			const struct option *opts)
-{
-	usage_with_options_internal(usagestr, opts, 0);
-	exit(129);
-}
-
-int parse_options_usage(const char * const *usagestr,
-			const struct option *opts)
-{
-	return usage_with_options_internal(usagestr, opts, 0);
-}
-
-
-int parse_opt_verbosity_cb(const struct option *opt, const char *arg,
-			   int unset)
-{
-	int *target = opt->value;
-
-	if (unset)
-		/* --no-quiet, --no-verbose */
-		*target = 0;
-	else if (opt->short_name == 'v') {
-		if (*target >= 0)
-			(*target)++;
-		else
-			*target = 1;
-	} else {
-		if (*target <= 0)
-			(*target)--;
-		else
-			*target = -1;
-	}
-	return 0;
-}
diff --git a/Documentation/perf_counter/util/parse-options.h b/Documentation/perf_counter/util/parse-options.h
deleted file mode 100644
index a1039a6..0000000
--- a/Documentation/perf_counter/util/parse-options.h
+++ /dev/null
@@ -1,174 +0,0 @@
-#ifndef PARSE_OPTIONS_H
-#define PARSE_OPTIONS_H
-
-enum parse_opt_type {
-	/* special types */
-	OPTION_END,
-	OPTION_ARGUMENT,
-	OPTION_GROUP,
-	/* options with no arguments */
-	OPTION_BIT,
-	OPTION_BOOLEAN, /* _INCR would have been a better name */
-	OPTION_SET_INT,
-	OPTION_SET_PTR,
-	/* options with arguments (usually) */
-	OPTION_STRING,
-	OPTION_INTEGER,
-	OPTION_LONG,
-	OPTION_CALLBACK,
-};
-
-enum parse_opt_flags {
-	PARSE_OPT_KEEP_DASHDASH = 1,
-	PARSE_OPT_STOP_AT_NON_OPTION = 2,
-	PARSE_OPT_KEEP_ARGV0 = 4,
-	PARSE_OPT_KEEP_UNKNOWN = 8,
-	PARSE_OPT_NO_INTERNAL_HELP = 16,
-};
-
-enum parse_opt_option_flags {
-	PARSE_OPT_OPTARG  = 1,
-	PARSE_OPT_NOARG   = 2,
-	PARSE_OPT_NONEG   = 4,
-	PARSE_OPT_HIDDEN  = 8,
-	PARSE_OPT_LASTARG_DEFAULT = 16,
-};
-
-struct option;
-typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
-
-/*
- * `type`::
- *   holds the type of the option, you must have an OPTION_END last in your
- *   array.
- *
- * `short_name`::
- *   the character to use as a short option name, '\0' if none.
- *
- * `long_name`::
- *   the long option name, without the leading dashes, NULL if none.
- *
- * `value`::
- *   stores pointers to the values to be filled.
- *
- * `argh`::
- *   token to explain the kind of argument this option wants. Keep it
- *   homogenous across the repository.
- *
- * `help`::
- *   the short help associated to what the option does.
- *   Must never be NULL (except for OPTION_END).
- *   OPTION_GROUP uses this pointer to store the group header.
- *
- * `flags`::
- *   mask of parse_opt_option_flags.
- *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
- *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
- *   PARSE_OPT_NONEG: says that this option cannot be negated
- *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
- *                    the long one.
- *
- * `callback`::
- *   pointer to the callback to use for OPTION_CALLBACK.
- *
- * `defval`::
- *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
- *   OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in
- *   the value when met.
- *   CALLBACKS can use it like they want.
- */
-struct option {
-	enum parse_opt_type type;
-	int short_name;
-	const char *long_name;
-	void *value;
-	const char *argh;
-	const char *help;
-
-	int flags;
-	parse_opt_cb *callback;
-	intptr_t defval;
-};
-
-#define OPT_END()                   { OPTION_END }
-#define OPT_ARGUMENT(l, h)          { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) }
-#define OPT_GROUP(h)                { OPTION_GROUP, 0, NULL, NULL, NULL, (h) }
-#define OPT_BIT(s, l, v, h, b)      { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) }
-#define OPT_BOOLEAN(s, l, v, h)     { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) }
-#define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
-#define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
-#define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
-#define OPT_LONG(s, l, v, h)        { OPTION_LONG, (s), (l), (v), NULL, (h) }
-#define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
-#define OPT_DATE(s, l, v, h) \
-	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
-	  parse_opt_approxidate_cb }
-#define OPT_CALLBACK(s, l, v, a, h, f) \
-	{ OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) }
-
-/* parse_options() will filter out the processed options and leave the
- * non-option argments in argv[].
- * Returns the number of arguments left in argv[].
- */
-extern int parse_options(int argc, const char **argv,
-                         const struct option *options,
-                         const char * const usagestr[], int flags);
-
-extern NORETURN void usage_with_options(const char * const *usagestr,
-                                        const struct option *options);
-
-/*----- incremantal advanced APIs -----*/
-
-enum {
-	PARSE_OPT_HELP = -1,
-	PARSE_OPT_DONE,
-	PARSE_OPT_UNKNOWN,
-};
-
-/*
- * It's okay for the caller to consume argv/argc in the usual way.
- * Other fields of that structure are private to parse-options and should not
- * be modified in any way.
- */
-struct parse_opt_ctx_t {
-	const char **argv;
-	const char **out;
-	int argc, cpidx;
-	const char *opt;
-	int flags;
-};
-
-extern int parse_options_usage(const char * const *usagestr,
-			       const struct option *opts);
-
-extern void parse_options_start(struct parse_opt_ctx_t *ctx,
-				int argc, const char **argv, int flags);
-
-extern int parse_options_step(struct parse_opt_ctx_t *ctx,
-			      const struct option *options,
-			      const char * const usagestr[]);
-
-extern int parse_options_end(struct parse_opt_ctx_t *ctx);
-
-
-/*----- some often used options -----*/
-extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
-extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
-extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
-
-#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
-#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
-#define OPT__VERBOSITY(var) \
-	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
-	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
-	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
-	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
-#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
-#define OPT__ABBREV(var)  \
-	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
-	  "use <n> digits to display SHA-1s", \
-	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
-
-extern const char *parse_options_fix_filename(const char *prefix, const char *file);
-
-#endif
diff --git a/Documentation/perf_counter/util/path.c b/Documentation/perf_counter/util/path.c
deleted file mode 100644
index a501a40..0000000
--- a/Documentation/perf_counter/util/path.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * I'm tired of doing "vsnprintf()" etc just to open a
- * file, so here's a "return static buffer with printf"
- * interface for paths.
- *
- * It's obviously not thread-safe. Sue me. But it's quite
- * useful for doing things like
- *
- *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
- *
- * which is what it's designed for.
- */
-#include "cache.h"
-
-static char bad_path[] = "/bad-path/";
-/*
- * Two hacks:
- */
-
-static char *get_perf_dir(void)
-{
-	return ".";
-}
-
-size_t strlcpy(char *dest, const char *src, size_t size)
-{
-	size_t ret = strlen(src);
-
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		memcpy(dest, src, len);
-		dest[len] = '\0';
-	}
-	return ret;
-}
-
-
-static char *get_pathname(void)
-{
-	static char pathname_array[4][PATH_MAX];
-	static int index;
-	return pathname_array[3 & ++index];
-}
-
-static char *cleanup_path(char *path)
-{
-	/* Clean it up */
-	if (!memcmp(path, "./", 2)) {
-		path += 2;
-		while (*path == '/')
-			path++;
-	}
-	return path;
-}
-
-char *mksnpath(char *buf, size_t n, const char *fmt, ...)
-{
-	va_list args;
-	unsigned len;
-
-	va_start(args, fmt);
-	len = vsnprintf(buf, n, fmt, args);
-	va_end(args);
-	if (len >= n) {
-		strlcpy(buf, bad_path, n);
-		return buf;
-	}
-	return cleanup_path(buf);
-}
-
-static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
-{
-	const char *perf_dir = get_perf_dir();
-	size_t len;
-
-	len = strlen(perf_dir);
-	if (n < len + 1)
-		goto bad;
-	memcpy(buf, perf_dir, len);
-	if (len && !is_dir_sep(perf_dir[len-1]))
-		buf[len++] = '/';
-	len += vsnprintf(buf + len, n - len, fmt, args);
-	if (len >= n)
-		goto bad;
-	return cleanup_path(buf);
-bad:
-	strlcpy(buf, bad_path, n);
-	return buf;
-}
-
-char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
-{
-	va_list args;
-	va_start(args, fmt);
-	(void)perf_vsnpath(buf, n, fmt, args);
-	va_end(args);
-	return buf;
-}
-
-char *perf_pathdup(const char *fmt, ...)
-{
-	char path[PATH_MAX];
-	va_list args;
-	va_start(args, fmt);
-	(void)perf_vsnpath(path, sizeof(path), fmt, args);
-	va_end(args);
-	return xstrdup(path);
-}
-
-char *mkpath(const char *fmt, ...)
-{
-	va_list args;
-	unsigned len;
-	char *pathname = get_pathname();
-
-	va_start(args, fmt);
-	len = vsnprintf(pathname, PATH_MAX, fmt, args);
-	va_end(args);
-	if (len >= PATH_MAX)
-		return bad_path;
-	return cleanup_path(pathname);
-}
-
-char *perf_path(const char *fmt, ...)
-{
-	const char *perf_dir = get_perf_dir();
-	char *pathname = get_pathname();
-	va_list args;
-	unsigned len;
-
-	len = strlen(perf_dir);
-	if (len > PATH_MAX-100)
-		return bad_path;
-	memcpy(pathname, perf_dir, len);
-	if (len && perf_dir[len-1] != '/')
-		pathname[len++] = '/';
-	va_start(args, fmt);
-	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
-	va_end(args);
-	if (len >= PATH_MAX)
-		return bad_path;
-	return cleanup_path(pathname);
-}
-
-
-/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
-int perf_mkstemp(char *path, size_t len, const char *template)
-{
-	const char *tmp;
-	size_t n;
-
-	tmp = getenv("TMPDIR");
-	if (!tmp)
-		tmp = "/tmp";
-	n = snprintf(path, len, "%s/%s", tmp, template);
-	if (len <= n) {
-		errno = ENAMETOOLONG;
-		return -1;
-	}
-	return mkstemp(path);
-}
-
-
-const char *make_relative_path(const char *abs, const char *base)
-{
-	static char buf[PATH_MAX + 1];
-	int baselen;
-	if (!base)
-		return abs;
-	baselen = strlen(base);
-	if (prefixcmp(abs, base))
-		return abs;
-	if (abs[baselen] == '/')
-		baselen++;
-	else if (base[baselen - 1] != '/')
-		return abs;
-	strcpy(buf, abs + baselen);
-	return buf;
-}
-
-/*
- * It is okay if dst == src, but they should not overlap otherwise.
- *
- * Performs the following normalizations on src, storing the result in dst:
- * - Ensures that components are separated by '/' (Windows only)
- * - Squashes sequences of '/'.
- * - Removes "." components.
- * - Removes ".." components, and the components the precede them.
- * Returns failure (non-zero) if a ".." component appears as first path
- * component anytime during the normalization. Otherwise, returns success (0).
- *
- * Note that this function is purely textual.  It does not follow symlinks,
- * verify the existence of the path, or make any system calls.
- */
-int normalize_path_copy(char *dst, const char *src)
-{
-	char *dst0;
-
-	if (has_dos_drive_prefix(src)) {
-		*dst++ = *src++;
-		*dst++ = *src++;
-	}
-	dst0 = dst;
-
-	if (is_dir_sep(*src)) {
-		*dst++ = '/';
-		while (is_dir_sep(*src))
-			src++;
-	}
-
-	for (;;) {
-		char c = *src;
-
-		/*
-		 * A path component that begins with . could be
-		 * special:
-		 * (1) "." and ends   -- ignore and terminate.
-		 * (2) "./"           -- ignore them, eat slash and continue.
-		 * (3) ".." and ends  -- strip one and terminate.
-		 * (4) "../"          -- strip one, eat slash and continue.
-		 */
-		if (c == '.') {
-			if (!src[1]) {
-				/* (1) */
-				src++;
-			} else if (is_dir_sep(src[1])) {
-				/* (2) */
-				src += 2;
-				while (is_dir_sep(*src))
-					src++;
-				continue;
-			} else if (src[1] == '.') {
-				if (!src[2]) {
-					/* (3) */
-					src += 2;
-					goto up_one;
-				} else if (is_dir_sep(src[2])) {
-					/* (4) */
-					src += 3;
-					while (is_dir_sep(*src))
-						src++;
-					goto up_one;
-				}
-			}
-		}
-
-		/* copy up to the next '/', and eat all '/' */
-		while ((c = *src++) != '\0' && !is_dir_sep(c))
-			*dst++ = c;
-		if (is_dir_sep(c)) {
-			*dst++ = '/';
-			while (is_dir_sep(c))
-				c = *src++;
-			src--;
-		} else if (!c)
-			break;
-		continue;
-
-	up_one:
-		/*
-		 * dst0..dst is prefix portion, and dst[-1] is '/';
-		 * go up one level.
-		 */
-		dst--;	/* go to trailing '/' */
-		if (dst <= dst0)
-			return -1;
-		/* Windows: dst[-1] cannot be backslash anymore */
-		while (dst0 < dst && dst[-1] != '/')
-			dst--;
-	}
-	*dst = '\0';
-	return 0;
-}
-
-/*
- * path = Canonical absolute path
- * prefix_list = Colon-separated list of absolute paths
- *
- * Determines, for each path in prefix_list, whether the "prefix" really
- * is an ancestor directory of path.  Returns the length of the longest
- * ancestor directory, excluding any trailing slashes, or -1 if no prefix
- * is an ancestor.  (Note that this means 0 is returned if prefix_list is
- * "/".) "/foo" is not considered an ancestor of "/foobar".  Directories
- * are not considered to be their own ancestors.  path must be in a
- * canonical form: empty components, or "." or ".." components are not
- * allowed.  prefix_list may be null, which is like "".
- */
-int longest_ancestor_length(const char *path, const char *prefix_list)
-{
-	char buf[PATH_MAX+1];
-	const char *ceil, *colon;
-	int len, max_len = -1;
-
-	if (prefix_list == NULL || !strcmp(path, "/"))
-		return -1;
-
-	for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
-		for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
-		len = colon - ceil;
-		if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
-			continue;
-		strlcpy(buf, ceil, len+1);
-		if (normalize_path_copy(buf, buf) < 0)
-			continue;
-		len = strlen(buf);
-		if (len > 0 && buf[len-1] == '/')
-			buf[--len] = '\0';
-
-		if (!strncmp(path, buf, len) &&
-		    path[len] == '/' &&
-		    len > max_len) {
-			max_len = len;
-		}
-	}
-
-	return max_len;
-}
-
-/* strip arbitrary amount of directory separators at end of path */
-static inline int chomp_trailing_dir_sep(const char *path, int len)
-{
-	while (len && is_dir_sep(path[len - 1]))
-		len--;
-	return len;
-}
-
-/*
- * If path ends with suffix (complete path components), returns the
- * part before suffix (sans trailing directory separators).
- * Otherwise returns NULL.
- */
-char *strip_path_suffix(const char *path, const char *suffix)
-{
-	int path_len = strlen(path), suffix_len = strlen(suffix);
-
-	while (suffix_len) {
-		if (!path_len)
-			return NULL;
-
-		if (is_dir_sep(path[path_len - 1])) {
-			if (!is_dir_sep(suffix[suffix_len - 1]))
-				return NULL;
-			path_len = chomp_trailing_dir_sep(path, path_len);
-			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
-		}
-		else if (path[--path_len] != suffix[--suffix_len])
-			return NULL;
-	}
-
-	if (path_len && !is_dir_sep(path[path_len - 1]))
-		return NULL;
-	return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
-}
diff --git a/Documentation/perf_counter/util/quote.c b/Documentation/perf_counter/util/quote.c
deleted file mode 100644
index f18c521..0000000
--- a/Documentation/perf_counter/util/quote.c
+++ /dev/null
@@ -1,481 +0,0 @@
-#include "cache.h"
-#include "quote.h"
-
-int quote_path_fully = 1;
-
-/* Help to copy the thing properly quoted for the shell safety.
- * any single quote is replaced with '\'', any exclamation point
- * is replaced with '\!', and the whole thing is enclosed in a
- *
- * E.g.
- *  original     sq_quote     result
- *  name     ==> name      ==> 'name'
- *  a b      ==> a b       ==> 'a b'
- *  a'b      ==> a'\''b    ==> 'a'\''b'
- *  a!b      ==> a'\!'b    ==> 'a'\!'b'
- */
-static inline int need_bs_quote(char c)
-{
-	return (c == '\'' || c == '!');
-}
-
-void sq_quote_buf(struct strbuf *dst, const char *src)
-{
-	char *to_free = NULL;
-
-	if (dst->buf == src)
-		to_free = strbuf_detach(dst, NULL);
-
-	strbuf_addch(dst, '\'');
-	while (*src) {
-		size_t len = strcspn(src, "'!");
-		strbuf_add(dst, src, len);
-		src += len;
-		while (need_bs_quote(*src)) {
-			strbuf_addstr(dst, "'\\");
-			strbuf_addch(dst, *src++);
-			strbuf_addch(dst, '\'');
-		}
-	}
-	strbuf_addch(dst, '\'');
-	free(to_free);
-}
-
-void sq_quote_print(FILE *stream, const char *src)
-{
-	char c;
-
-	fputc('\'', stream);
-	while ((c = *src++)) {
-		if (need_bs_quote(c)) {
-			fputs("'\\", stream);
-			fputc(c, stream);
-			fputc('\'', stream);
-		} else {
-			fputc(c, stream);
-		}
-	}
-	fputc('\'', stream);
-}
-
-void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
-{
-	int i;
-
-	/* Copy into destination buffer. */
-	strbuf_grow(dst, 255);
-	for (i = 0; argv[i]; ++i) {
-		strbuf_addch(dst, ' ');
-		sq_quote_buf(dst, argv[i]);
-		if (maxlen && dst->len > maxlen)
-			die("Too many or long arguments");
-	}
-}
-
-char *sq_dequote_step(char *arg, char **next)
-{
-	char *dst = arg;
-	char *src = arg;
-	char c;
-
-	if (*src != '\'')
-		return NULL;
-	for (;;) {
-		c = *++src;
-		if (!c)
-			return NULL;
-		if (c != '\'') {
-			*dst++ = c;
-			continue;
-		}
-		/* We stepped out of sq */
-		switch (*++src) {
-		case '\0':
-			*dst = 0;
-			if (next)
-				*next = NULL;
-			return arg;
-		case '\\':
-			c = *++src;
-			if (need_bs_quote(c) && *++src == '\'') {
-				*dst++ = c;
-				continue;
-			}
-		/* Fallthrough */
-		default:
-			if (!next || !isspace(*src))
-				return NULL;
-			do {
-				c = *++src;
-			} while (isspace(c));
-			*dst = 0;
-			*next = src;
-			return arg;
-		}
-	}
-}
-
-char *sq_dequote(char *arg)
-{
-	return sq_dequote_step(arg, NULL);
-}
-
-int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
-{
-	char *next = arg;
-
-	if (!*arg)
-		return 0;
-	do {
-		char *dequoted = sq_dequote_step(next, &next);
-		if (!dequoted)
-			return -1;
-		ALLOC_GROW(*argv, *nr + 1, *alloc);
-		(*argv)[(*nr)++] = dequoted;
-	} while (next);
-
-	return 0;
-}
-
-/* 1 means: quote as octal
- * 0 means: quote as octal if (quote_path_fully)
- * -1 means: never quote
- * c: quote as "\\c"
- */
-#define X8(x)   x, x, x, x, x, x, x, x
-#define X16(x)  X8(x), X8(x)
-static signed char const sq_lookup[256] = {
-	/*           0    1    2    3    4    5    6    7 */
-	/* 0x00 */   1,   1,   1,   1,   1,   1,   1, 'a',
-	/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r',   1,   1,
-	/* 0x10 */ X16(1),
-	/* 0x20 */  -1,  -1, '"',  -1,  -1,  -1,  -1,  -1,
-	/* 0x28 */ X16(-1), X16(-1), X16(-1),
-	/* 0x58 */  -1,  -1,  -1,  -1,'\\',  -1,  -1,  -1,
-	/* 0x60 */ X16(-1), X8(-1),
-	/* 0x78 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,   1,
-	/* 0x80 */ /* set to 0 */
-};
-
-static inline int sq_must_quote(char c)
-{
-	return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
-}
-
-/* returns the longest prefix not needing a quote up to maxlen if positive.
-   This stops at the first \0 because it's marked as a character needing an
-   escape */
-static size_t next_quote_pos(const char *s, ssize_t maxlen)
-{
-	size_t len;
-	if (maxlen < 0) {
-		for (len = 0; !sq_must_quote(s[len]); len++);
-	} else {
-		for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
-	}
-	return len;
-}
-
-/*
- * C-style name quoting.
- *
- * (1) if sb and fp are both NULL, inspect the input name and counts the
- *     number of bytes that are needed to hold c_style quoted version of name,
- *     counting the double quotes around it but not terminating NUL, and
- *     returns it.
- *     However, if name does not need c_style quoting, it returns 0.
- *
- * (2) if sb or fp are not NULL, it emits the c_style quoted version
- *     of name, enclosed with double quotes if asked and needed only.
- *     Return value is the same as in (1).
- */
-static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
-                                    struct strbuf *sb, FILE *fp, int no_dq)
-{
-#undef EMIT
-#define EMIT(c)                                 \
-	do {                                        \
-		if (sb) strbuf_addch(sb, (c));          \
-		if (fp) fputc((c), fp);                 \
-		count++;                                \
-	} while (0)
-#define EMITBUF(s, l)                           \
-	do {                                        \
-		int __ret;				\
-		if (sb) strbuf_add(sb, (s), (l));       \
-		if (fp) __ret = fwrite((s), (l), 1, fp);        \
-		count += (l);                           \
-	} while (0)
-
-	size_t len, count = 0;
-	const char *p = name;
-
-	for (;;) {
-		int ch;
-
-		len = next_quote_pos(p, maxlen);
-		if (len == maxlen || !p[len])
-			break;
-
-		if (!no_dq && p == name)
-			EMIT('"');
-
-		EMITBUF(p, len);
-		EMIT('\\');
-		p += len;
-		ch = (unsigned char)*p++;
-		if (sq_lookup[ch] >= ' ') {
-			EMIT(sq_lookup[ch]);
-		} else {
-			EMIT(((ch >> 6) & 03) + '0');
-			EMIT(((ch >> 3) & 07) + '0');
-			EMIT(((ch >> 0) & 07) + '0');
-		}
-	}
-
-	EMITBUF(p, len);
-	if (p == name)   /* no ending quote needed */
-		return 0;
-
-	if (!no_dq)
-		EMIT('"');
-	return count;
-}
-
-size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
-{
-	return quote_c_style_counted(name, -1, sb, fp, nodq);
-}
-
-void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
-{
-	if (quote_c_style(prefix, NULL, NULL, 0) ||
-	    quote_c_style(path, NULL, NULL, 0)) {
-		if (!nodq)
-			strbuf_addch(sb, '"');
-		quote_c_style(prefix, sb, NULL, 1);
-		quote_c_style(path, sb, NULL, 1);
-		if (!nodq)
-			strbuf_addch(sb, '"');
-	} else {
-		strbuf_addstr(sb, prefix);
-		strbuf_addstr(sb, path);
-	}
-}
-
-void write_name_quoted(const char *name, FILE *fp, int terminator)
-{
-	if (terminator) {
-		quote_c_style(name, NULL, fp, 0);
-	} else {
-		fputs(name, fp);
-	}
-	fputc(terminator, fp);
-}
-
-extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
-                                 const char *name, FILE *fp, int terminator)
-{
-	int needquote = 0;
-
-	if (terminator) {
-		needquote = next_quote_pos(pfx, pfxlen) < pfxlen
-			|| name[next_quote_pos(name, -1)];
-	}
-	if (needquote) {
-		fputc('"', fp);
-		quote_c_style_counted(pfx, pfxlen, NULL, fp, 1);
-		quote_c_style(name, NULL, fp, 1);
-		fputc('"', fp);
-	} else {
-		int ret;
-
-		ret = fwrite(pfx, pfxlen, 1, fp);
-		fputs(name, fp);
-	}
-	fputc(terminator, fp);
-}
-
-/* quote path as relative to the given prefix */
-char *quote_path_relative(const char *in, int len,
-			  struct strbuf *out, const char *prefix)
-{
-	int needquote;
-
-	if (len < 0)
-		len = strlen(in);
-
-	/* "../" prefix itself does not need quoting, but "in" might. */
-	needquote = next_quote_pos(in, len) < len;
-	strbuf_setlen(out, 0);
-	strbuf_grow(out, len);
-
-	if (needquote)
-		strbuf_addch(out, '"');
-	if (prefix) {
-		int off = 0;
-		while (prefix[off] && off < len && prefix[off] == in[off])
-			if (prefix[off] == '/') {
-				prefix += off + 1;
-				in += off + 1;
-				len -= off + 1;
-				off = 0;
-			} else
-				off++;
-
-		for (; *prefix; prefix++)
-			if (*prefix == '/')
-				strbuf_addstr(out, "../");
-	}
-
-	quote_c_style_counted (in, len, out, NULL, 1);
-
-	if (needquote)
-		strbuf_addch(out, '"');
-	if (!out->len)
-		strbuf_addstr(out, "./");
-
-	return out->buf;
-}
-
-/*
- * C-style name unquoting.
- *
- * Quoted should point at the opening double quote.
- * + Returns 0 if it was able to unquote the string properly, and appends the
- *   result in the strbuf `sb'.
- * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
- *   that this function will allocate memory in the strbuf, so calling
- *   strbuf_release is mandatory whichever result unquote_c_style returns.
- *
- * Updates endp pointer to point at one past the ending double quote if given.
- */
-int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
-{
-	size_t oldlen = sb->len, len;
-	int ch, ac;
-
-	if (*quoted++ != '"')
-		return -1;
-
-	for (;;) {
-		len = strcspn(quoted, "\"\\");
-		strbuf_add(sb, quoted, len);
-		quoted += len;
-
-		switch (*quoted++) {
-		  case '"':
-			if (endp)
-				*endp = quoted;
-			return 0;
-		  case '\\':
-			break;
-		  default:
-			goto error;
-		}
-
-		switch ((ch = *quoted++)) {
-		case 'a': ch = '\a'; break;
-		case 'b': ch = '\b'; break;
-		case 'f': ch = '\f'; break;
-		case 'n': ch = '\n'; break;
-		case 'r': ch = '\r'; break;
-		case 't': ch = '\t'; break;
-		case 'v': ch = '\v'; break;
-
-		case '\\': case '"':
-			break; /* verbatim */
-
-		/* octal values with first digit over 4 overflow */
-		case '0': case '1': case '2': case '3':
-					ac = ((ch - '0') << 6);
-			if ((ch = *quoted++) < '0' || '7' < ch)
-				goto error;
-					ac |= ((ch - '0') << 3);
-			if ((ch = *quoted++) < '0' || '7' < ch)
-				goto error;
-					ac |= (ch - '0');
-					ch = ac;
-					break;
-				default:
-			goto error;
-			}
-		strbuf_addch(sb, ch);
-		}
-
-  error:
-	strbuf_setlen(sb, oldlen);
-	return -1;
-}
-
-/* quoting as a string literal for other languages */
-
-void perl_quote_print(FILE *stream, const char *src)
-{
-	const char sq = '\'';
-	const char bq = '\\';
-	char c;
-
-	fputc(sq, stream);
-	while ((c = *src++)) {
-		if (c == sq || c == bq)
-			fputc(bq, stream);
-		fputc(c, stream);
-	}
-	fputc(sq, stream);
-}
-
-void python_quote_print(FILE *stream, const char *src)
-{
-	const char sq = '\'';
-	const char bq = '\\';
-	const char nl = '\n';
-	char c;
-
-	fputc(sq, stream);
-	while ((c = *src++)) {
-		if (c == nl) {
-			fputc(bq, stream);
-			fputc('n', stream);
-			continue;
-		}
-		if (c == sq || c == bq)
-			fputc(bq, stream);
-		fputc(c, stream);
-	}
-	fputc(sq, stream);
-}
-
-void tcl_quote_print(FILE *stream, const char *src)
-{
-	char c;
-
-	fputc('"', stream);
-	while ((c = *src++)) {
-		switch (c) {
-		case '[': case ']':
-		case '{': case '}':
-		case '$': case '\\': case '"':
-			fputc('\\', stream);
-		default:
-			fputc(c, stream);
-			break;
-		case '\f':
-			fputs("\\f", stream);
-			break;
-		case '\r':
-			fputs("\\r", stream);
-			break;
-		case '\n':
-			fputs("\\n", stream);
-			break;
-		case '\t':
-			fputs("\\t", stream);
-			break;
-		case '\v':
-			fputs("\\v", stream);
-			break;
-		}
-	}
-	fputc('"', stream);
-}
diff --git a/Documentation/perf_counter/util/quote.h b/Documentation/perf_counter/util/quote.h
deleted file mode 100644
index 5dfad89..0000000
--- a/Documentation/perf_counter/util/quote.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef QUOTE_H
-#define QUOTE_H
-
-#include <stddef.h>
-#include <stdio.h>
-
-/* Help to copy the thing properly quoted for the shell safety.
- * any single quote is replaced with '\'', any exclamation point
- * is replaced with '\!', and the whole thing is enclosed in a
- * single quote pair.
- *
- * For example, if you are passing the result to system() as an
- * argument:
- *
- * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
- *
- * would be appropriate.  If the system() is going to call ssh to
- * run the command on the other side:
- *
- * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
- * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
- *
- * Note that the above examples leak memory!  Remember to free result from
- * sq_quote() in a real application.
- *
- * sq_quote_buf() writes to an existing buffer of specified size; it
- * will return the number of characters that would have been written
- * excluding the final null regardless of the buffer size.
- */
-
-extern void sq_quote_print(FILE *stream, const char *src);
-
-extern void sq_quote_buf(struct strbuf *, const char *src);
-extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
-
-/* This unwraps what sq_quote() produces in place, but returns
- * NULL if the input does not look like what sq_quote would have
- * produced.
- */
-extern char *sq_dequote(char *);
-
-/*
- * Same as the above, but can be used to unwrap many arguments in the
- * same string separated by space. "next" is changed to point to the
- * next argument that should be passed as first parameter. When there
- * is no more argument to be dequoted, "next" is updated to point to NULL.
- */
-extern char *sq_dequote_step(char *arg, char **next);
-extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc);
-
-extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
-extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq);
-extern void quote_two_c_style(struct strbuf *, const char *, const char *, int);
-
-extern void write_name_quoted(const char *name, FILE *, int terminator);
-extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
-                                 const char *name, FILE *, int terminator);
-
-/* quote path as relative to the given prefix */
-char *quote_path_relative(const char *in, int len,
-			  struct strbuf *out, const char *prefix);
-
-/* quoting as a string literal for other languages */
-extern void perl_quote_print(FILE *stream, const char *src);
-extern void python_quote_print(FILE *stream, const char *src);
-extern void tcl_quote_print(FILE *stream, const char *src);
-
-#endif
diff --git a/Documentation/perf_counter/util/rbtree.c b/Documentation/perf_counter/util/rbtree.c
deleted file mode 100644
index b15ba9c..0000000
--- a/Documentation/perf_counter/util/rbtree.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
-  Red Black Trees
-  (C) 1999  Andrea Arcangeli <andrea@suse.de>
-  (C) 2002  David Woodhouse <dwmw2@infradead.org>
-  
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  linux/lib/rbtree.c
-*/
-
-#include "rbtree.h"
-
-static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
-{
-	struct rb_node *right = node->rb_right;
-	struct rb_node *parent = rb_parent(node);
-
-	if ((node->rb_right = right->rb_left))
-		rb_set_parent(right->rb_left, node);
-	right->rb_left = node;
-
-	rb_set_parent(right, parent);
-
-	if (parent)
-	{
-		if (node == parent->rb_left)
-			parent->rb_left = right;
-		else
-			parent->rb_right = right;
-	}
-	else
-		root->rb_node = right;
-	rb_set_parent(node, right);
-}
-
-static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
-{
-	struct rb_node *left = node->rb_left;
-	struct rb_node *parent = rb_parent(node);
-
-	if ((node->rb_left = left->rb_right))
-		rb_set_parent(left->rb_right, node);
-	left->rb_right = node;
-
-	rb_set_parent(left, parent);
-
-	if (parent)
-	{
-		if (node == parent->rb_right)
-			parent->rb_right = left;
-		else
-			parent->rb_left = left;
-	}
-	else
-		root->rb_node = left;
-	rb_set_parent(node, left);
-}
-
-void rb_insert_color(struct rb_node *node, struct rb_root *root)
-{
-	struct rb_node *parent, *gparent;
-
-	while ((parent = rb_parent(node)) && rb_is_red(parent))
-	{
-		gparent = rb_parent(parent);
-
-		if (parent == gparent->rb_left)
-		{
-			{
-				register struct rb_node *uncle = gparent->rb_right;
-				if (uncle && rb_is_red(uncle))
-				{
-					rb_set_black(uncle);
-					rb_set_black(parent);
-					rb_set_red(gparent);
-					node = gparent;
-					continue;
-				}
-			}
-
-			if (parent->rb_right == node)
-			{
-				register struct rb_node *tmp;
-				__rb_rotate_left(parent, root);
-				tmp = parent;
-				parent = node;
-				node = tmp;
-			}
-
-			rb_set_black(parent);
-			rb_set_red(gparent);
-			__rb_rotate_right(gparent, root);
-		} else {
-			{
-				register struct rb_node *uncle = gparent->rb_left;
-				if (uncle && rb_is_red(uncle))
-				{
-					rb_set_black(uncle);
-					rb_set_black(parent);
-					rb_set_red(gparent);
-					node = gparent;
-					continue;
-				}
-			}
-
-			if (parent->rb_left == node)
-			{
-				register struct rb_node *tmp;
-				__rb_rotate_right(parent, root);
-				tmp = parent;
-				parent = node;
-				node = tmp;
-			}
-
-			rb_set_black(parent);
-			rb_set_red(gparent);
-			__rb_rotate_left(gparent, root);
-		}
-	}
-
-	rb_set_black(root->rb_node);
-}
-
-static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
-			     struct rb_root *root)
-{
-	struct rb_node *other;
-
-	while ((!node || rb_is_black(node)) && node != root->rb_node)
-	{
-		if (parent->rb_left == node)
-		{
-			other = parent->rb_right;
-			if (rb_is_red(other))
-			{
-				rb_set_black(other);
-				rb_set_red(parent);
-				__rb_rotate_left(parent, root);
-				other = parent->rb_right;
-			}
-			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
-			    (!other->rb_right || rb_is_black(other->rb_right)))
-			{
-				rb_set_red(other);
-				node = parent;
-				parent = rb_parent(node);
-			}
-			else
-			{
-				if (!other->rb_right || rb_is_black(other->rb_right))
-				{
-					rb_set_black(other->rb_left);
-					rb_set_red(other);
-					__rb_rotate_right(other, root);
-					other = parent->rb_right;
-				}
-				rb_set_color(other, rb_color(parent));
-				rb_set_black(parent);
-				rb_set_black(other->rb_right);
-				__rb_rotate_left(parent, root);
-				node = root->rb_node;
-				break;
-			}
-		}
-		else
-		{
-			other = parent->rb_left;
-			if (rb_is_red(other))
-			{
-				rb_set_black(other);
-				rb_set_red(parent);
-				__rb_rotate_right(parent, root);
-				other = parent->rb_left;
-			}
-			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
-			    (!other->rb_right || rb_is_black(other->rb_right)))
-			{
-				rb_set_red(other);
-				node = parent;
-				parent = rb_parent(node);
-			}
-			else
-			{
-				if (!other->rb_left || rb_is_black(other->rb_left))
-				{
-					rb_set_black(other->rb_right);
-					rb_set_red(other);
-					__rb_rotate_left(other, root);
-					other = parent->rb_left;
-				}
-				rb_set_color(other, rb_color(parent));
-				rb_set_black(parent);
-				rb_set_black(other->rb_left);
-				__rb_rotate_right(parent, root);
-				node = root->rb_node;
-				break;
-			}
-		}
-	}
-	if (node)
-		rb_set_black(node);
-}
-
-void rb_erase(struct rb_node *node, struct rb_root *root)
-{
-	struct rb_node *child, *parent;
-	int color;
-
-	if (!node->rb_left)
-		child = node->rb_right;
-	else if (!node->rb_right)
-		child = node->rb_left;
-	else
-	{
-		struct rb_node *old = node, *left;
-
-		node = node->rb_right;
-		while ((left = node->rb_left) != NULL)
-			node = left;
-		child = node->rb_right;
-		parent = rb_parent(node);
-		color = rb_color(node);
-
-		if (child)
-			rb_set_parent(child, parent);
-		if (parent == old) {
-			parent->rb_right = child;
-			parent = node;
-		} else
-			parent->rb_left = child;
-
-		node->rb_parent_color = old->rb_parent_color;
-		node->rb_right = old->rb_right;
-		node->rb_left = old->rb_left;
-
-		if (rb_parent(old))
-		{
-			if (rb_parent(old)->rb_left == old)
-				rb_parent(old)->rb_left = node;
-			else
-				rb_parent(old)->rb_right = node;
-		} else
-			root->rb_node = node;
-
-		rb_set_parent(old->rb_left, node);
-		if (old->rb_right)
-			rb_set_parent(old->rb_right, node);
-		goto color;
-	}
-
-	parent = rb_parent(node);
-	color = rb_color(node);
-
-	if (child)
-		rb_set_parent(child, parent);
-	if (parent)
-	{
-		if (parent->rb_left == node)
-			parent->rb_left = child;
-		else
-			parent->rb_right = child;
-	}
-	else
-		root->rb_node = child;
-
- color:
-	if (color == RB_BLACK)
-		__rb_erase_color(child, parent, root);
-}
-
-/*
- * This function returns the first node (in sort order) of the tree.
- */
-struct rb_node *rb_first(const struct rb_root *root)
-{
-	struct rb_node	*n;
-
-	n = root->rb_node;
-	if (!n)
-		return NULL;
-	while (n->rb_left)
-		n = n->rb_left;
-	return n;
-}
-
-struct rb_node *rb_last(const struct rb_root *root)
-{
-	struct rb_node	*n;
-
-	n = root->rb_node;
-	if (!n)
-		return NULL;
-	while (n->rb_right)
-		n = n->rb_right;
-	return n;
-}
-
-struct rb_node *rb_next(const struct rb_node *node)
-{
-	struct rb_node *parent;
-
-	if (rb_parent(node) == node)
-		return NULL;
-
-	/* If we have a right-hand child, go down and then left as far
-	   as we can. */
-	if (node->rb_right) {
-		node = node->rb_right; 
-		while (node->rb_left)
-			node=node->rb_left;
-		return (struct rb_node *)node;
-	}
-
-	/* No right-hand children.  Everything down and left is
-	   smaller than us, so any 'next' node must be in the general
-	   direction of our parent. Go up the tree; any time the
-	   ancestor is a right-hand child of its parent, keep going
-	   up. First time it's a left-hand child of its parent, said
-	   parent is our 'next' node. */
-	while ((parent = rb_parent(node)) && node == parent->rb_right)
-		node = parent;
-
-	return parent;
-}
-
-struct rb_node *rb_prev(const struct rb_node *node)
-{
-	struct rb_node *parent;
-
-	if (rb_parent(node) == node)
-		return NULL;
-
-	/* If we have a left-hand child, go down and then right as far
-	   as we can. */
-	if (node->rb_left) {
-		node = node->rb_left; 
-		while (node->rb_right)
-			node=node->rb_right;
-		return (struct rb_node *)node;
-	}
-
-	/* No left-hand children. Go up till we find an ancestor which
-	   is a right-hand child of its parent */
-	while ((parent = rb_parent(node)) && node == parent->rb_left)
-		node = parent;
-
-	return parent;
-}
-
-void rb_replace_node(struct rb_node *victim, struct rb_node *new,
-		     struct rb_root *root)
-{
-	struct rb_node *parent = rb_parent(victim);
-
-	/* Set the surrounding nodes to point to the replacement */
-	if (parent) {
-		if (victim == parent->rb_left)
-			parent->rb_left = new;
-		else
-			parent->rb_right = new;
-	} else {
-		root->rb_node = new;
-	}
-	if (victim->rb_left)
-		rb_set_parent(victim->rb_left, new);
-	if (victim->rb_right)
-		rb_set_parent(victim->rb_right, new);
-
-	/* Copy the pointers/colour from the victim to the replacement */
-	*new = *victim;
-}
diff --git a/Documentation/perf_counter/util/rbtree.h b/Documentation/perf_counter/util/rbtree.h
deleted file mode 100644
index 6bdc488..0000000
--- a/Documentation/perf_counter/util/rbtree.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
-  Red Black Trees
-  (C) 1999  Andrea Arcangeli <andrea@suse.de>
-  
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  linux/include/linux/rbtree.h
-
-  To use rbtrees you'll have to implement your own insert and search cores.
-  This will avoid us to use callbacks and to drop drammatically performances.
-  I know it's not the cleaner way,  but in C (not in C++) to get
-  performances and genericity...
-
-  Some example of insert and search follows here. The search is a plain
-  normal search over an ordered tree. The insert instead must be implemented
-  int two steps: as first thing the code must insert the element in
-  order as a red leaf in the tree, then the support library function
-  rb_insert_color() must be called. Such function will do the
-  not trivial work to rebalance the rbtree if necessary.
-
------------------------------------------------------------------------
-static inline struct page * rb_search_page_cache(struct inode * inode,
-						 unsigned long offset)
-{
-	struct rb_node * n = inode->i_rb_page_cache.rb_node;
-	struct page * page;
-
-	while (n)
-	{
-		page = rb_entry(n, struct page, rb_page_cache);
-
-		if (offset < page->offset)
-			n = n->rb_left;
-		else if (offset > page->offset)
-			n = n->rb_right;
-		else
-			return page;
-	}
-	return NULL;
-}
-
-static inline struct page * __rb_insert_page_cache(struct inode * inode,
-						   unsigned long offset,
-						   struct rb_node * node)
-{
-	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
-	struct rb_node * parent = NULL;
-	struct page * page;
-
-	while (*p)
-	{
-		parent = *p;
-		page = rb_entry(parent, struct page, rb_page_cache);
-
-		if (offset < page->offset)
-			p = &(*p)->rb_left;
-		else if (offset > page->offset)
-			p = &(*p)->rb_right;
-		else
-			return page;
-	}
-
-	rb_link_node(node, parent, p);
-
-	return NULL;
-}
-
-static inline struct page * rb_insert_page_cache(struct inode * inode,
-						 unsigned long offset,
-						 struct rb_node * node)
-{
-	struct page * ret;
-	if ((ret = __rb_insert_page_cache(inode, offset, node)))
-		goto out;
-	rb_insert_color(node, &inode->i_rb_page_cache);
- out:
-	return ret;
-}
------------------------------------------------------------------------
-*/
-
-#ifndef	_LINUX_RBTREE_H
-#define	_LINUX_RBTREE_H
-
-#include <stddef.h>
-
-/**
- * container_of - cast a member of a structure out to the containing structure
- * @ptr:	the pointer to the member.
- * @type:	the type of the container struct this is embedded in.
- * @member:	the name of the member within the struct.
- *
- */
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-struct rb_node
-{
-	unsigned long  rb_parent_color;
-#define	RB_RED		0
-#define	RB_BLACK	1
-	struct rb_node *rb_right;
-	struct rb_node *rb_left;
-} __attribute__((aligned(sizeof(long))));
-    /* The alignment might seem pointless, but allegedly CRIS needs it */
-
-struct rb_root
-{
-	struct rb_node *rb_node;
-};
-
-
-#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
-#define rb_color(r)   ((r)->rb_parent_color & 1)
-#define rb_is_red(r)   (!rb_color(r))
-#define rb_is_black(r) rb_color(r)
-#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
-#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
-
-static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
-{
-	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
-}
-static inline void rb_set_color(struct rb_node *rb, int color)
-{
-	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
-}
-
-#define RB_ROOT	(struct rb_root) { NULL, }
-#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
-
-#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
-#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
-
-extern void rb_insert_color(struct rb_node *, struct rb_root *);
-extern void rb_erase(struct rb_node *, struct rb_root *);
-
-/* Find logical next and previous nodes in a tree */
-extern struct rb_node *rb_next(const struct rb_node *);
-extern struct rb_node *rb_prev(const struct rb_node *);
-extern struct rb_node *rb_first(const struct rb_root *);
-extern struct rb_node *rb_last(const struct rb_root *);
-
-/* Fast replacement of a single node without remove/rebalance/add/rebalance */
-extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
-			    struct rb_root *root);
-
-static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
-				struct rb_node ** rb_link)
-{
-	node->rb_parent_color = (unsigned long )parent;
-	node->rb_left = node->rb_right = NULL;
-
-	*rb_link = node;
-}
-
-#endif	/* _LINUX_RBTREE_H */
diff --git a/Documentation/perf_counter/util/run-command.c b/Documentation/perf_counter/util/run-command.c
deleted file mode 100644
index b2f5e85..0000000
--- a/Documentation/perf_counter/util/run-command.c
+++ /dev/null
@@ -1,395 +0,0 @@
-#include "cache.h"
-#include "run-command.h"
-#include "exec_cmd.h"
-
-static inline void close_pair(int fd[2])
-{
-	close(fd[0]);
-	close(fd[1]);
-}
-
-static inline void dup_devnull(int to)
-{
-	int fd = open("/dev/null", O_RDWR);
-	dup2(fd, to);
-	close(fd);
-}
-
-int start_command(struct child_process *cmd)
-{
-	int need_in, need_out, need_err;
-	int fdin[2], fdout[2], fderr[2];
-
-	/*
-	 * In case of errors we must keep the promise to close FDs
-	 * that have been passed in via ->in and ->out.
-	 */
-
-	need_in = !cmd->no_stdin && cmd->in < 0;
-	if (need_in) {
-		if (pipe(fdin) < 0) {
-			if (cmd->out > 0)
-				close(cmd->out);
-			return -ERR_RUN_COMMAND_PIPE;
-		}
-		cmd->in = fdin[1];
-	}
-
-	need_out = !cmd->no_stdout
-		&& !cmd->stdout_to_stderr
-		&& cmd->out < 0;
-	if (need_out) {
-		if (pipe(fdout) < 0) {
-			if (need_in)
-				close_pair(fdin);
-			else if (cmd->in)
-				close(cmd->in);
-			return -ERR_RUN_COMMAND_PIPE;
-		}
-		cmd->out = fdout[0];
-	}
-
-	need_err = !cmd->no_stderr && cmd->err < 0;
-	if (need_err) {
-		if (pipe(fderr) < 0) {
-			if (need_in)
-				close_pair(fdin);
-			else if (cmd->in)
-				close(cmd->in);
-			if (need_out)
-				close_pair(fdout);
-			else if (cmd->out)
-				close(cmd->out);
-			return -ERR_RUN_COMMAND_PIPE;
-		}
-		cmd->err = fderr[0];
-	}
-
-#ifndef __MINGW32__
-	fflush(NULL);
-	cmd->pid = fork();
-	if (!cmd->pid) {
-		if (cmd->no_stdin)
-			dup_devnull(0);
-		else if (need_in) {
-			dup2(fdin[0], 0);
-			close_pair(fdin);
-		} else if (cmd->in) {
-			dup2(cmd->in, 0);
-			close(cmd->in);
-		}
-
-		if (cmd->no_stderr)
-			dup_devnull(2);
-		else if (need_err) {
-			dup2(fderr[1], 2);
-			close_pair(fderr);
-		}
-
-		if (cmd->no_stdout)
-			dup_devnull(1);
-		else if (cmd->stdout_to_stderr)
-			dup2(2, 1);
-		else if (need_out) {
-			dup2(fdout[1], 1);
-			close_pair(fdout);
-		} else if (cmd->out > 1) {
-			dup2(cmd->out, 1);
-			close(cmd->out);
-		}
-
-		if (cmd->dir && chdir(cmd->dir))
-			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
-			    cmd->dir, strerror(errno));
-		if (cmd->env) {
-			for (; *cmd->env; cmd->env++) {
-				if (strchr(*cmd->env, '='))
-					putenv((char*)*cmd->env);
-				else
-					unsetenv(*cmd->env);
-			}
-		}
-		if (cmd->preexec_cb)
-			cmd->preexec_cb();
-		if (cmd->perf_cmd) {
-			execv_perf_cmd(cmd->argv);
-		} else {
-			execvp(cmd->argv[0], (char *const*) cmd->argv);
-		}
-		exit(127);
-	}
-#else
-	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
-	const char **sargv = cmd->argv;
-	char **env = environ;
-
-	if (cmd->no_stdin) {
-		s0 = dup(0);
-		dup_devnull(0);
-	} else if (need_in) {
-		s0 = dup(0);
-		dup2(fdin[0], 0);
-	} else if (cmd->in) {
-		s0 = dup(0);
-		dup2(cmd->in, 0);
-	}
-
-	if (cmd->no_stderr) {
-		s2 = dup(2);
-		dup_devnull(2);
-	} else if (need_err) {
-		s2 = dup(2);
-		dup2(fderr[1], 2);
-	}
-
-	if (cmd->no_stdout) {
-		s1 = dup(1);
-		dup_devnull(1);
-	} else if (cmd->stdout_to_stderr) {
-		s1 = dup(1);
-		dup2(2, 1);
-	} else if (need_out) {
-		s1 = dup(1);
-		dup2(fdout[1], 1);
-	} else if (cmd->out > 1) {
-		s1 = dup(1);
-		dup2(cmd->out, 1);
-	}
-
-	if (cmd->dir)
-		die("chdir in start_command() not implemented");
-	if (cmd->env) {
-		env = copy_environ();
-		for (; *cmd->env; cmd->env++)
-			env = env_setenv(env, *cmd->env);
-	}
-
-	if (cmd->perf_cmd) {
-		cmd->argv = prepare_perf_cmd(cmd->argv);
-	}
-
-	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
-
-	if (cmd->env)
-		free_environ(env);
-	if (cmd->perf_cmd)
-		free(cmd->argv);
-
-	cmd->argv = sargv;
-	if (s0 >= 0)
-		dup2(s0, 0), close(s0);
-	if (s1 >= 0)
-		dup2(s1, 1), close(s1);
-	if (s2 >= 0)
-		dup2(s2, 2), close(s2);
-#endif
-
-	if (cmd->pid < 0) {
-		int err = errno;
-		if (need_in)
-			close_pair(fdin);
-		else if (cmd->in)
-			close(cmd->in);
-		if (need_out)
-			close_pair(fdout);
-		else if (cmd->out)
-			close(cmd->out);
-		if (need_err)
-			close_pair(fderr);
-		return err == ENOENT ?
-			-ERR_RUN_COMMAND_EXEC :
-			-ERR_RUN_COMMAND_FORK;
-	}
-
-	if (need_in)
-		close(fdin[0]);
-	else if (cmd->in)
-		close(cmd->in);
-
-	if (need_out)
-		close(fdout[1]);
-	else if (cmd->out)
-		close(cmd->out);
-
-	if (need_err)
-		close(fderr[1]);
-
-	return 0;
-}
-
-static int wait_or_whine(pid_t pid)
-{
-	for (;;) {
-		int status, code;
-		pid_t waiting = waitpid(pid, &status, 0);
-
-		if (waiting < 0) {
-			if (errno == EINTR)
-				continue;
-			error("waitpid failed (%s)", strerror(errno));
-			return -ERR_RUN_COMMAND_WAITPID;
-		}
-		if (waiting != pid)
-			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
-		if (WIFSIGNALED(status))
-			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
-
-		if (!WIFEXITED(status))
-			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
-		code = WEXITSTATUS(status);
-		switch (code) {
-		case 127:
-			return -ERR_RUN_COMMAND_EXEC;
-		case 0:
-			return 0;
-		default:
-			return -code;
-		}
-	}
-}
-
-int finish_command(struct child_process *cmd)
-{
-	return wait_or_whine(cmd->pid);
-}
-
-int run_command(struct child_process *cmd)
-{
-	int code = start_command(cmd);
-	if (code)
-		return code;
-	return finish_command(cmd);
-}
-
-static void prepare_run_command_v_opt(struct child_process *cmd,
-				      const char **argv,
-				      int opt)
-{
-	memset(cmd, 0, sizeof(*cmd));
-	cmd->argv = argv;
-	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
-	cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
-	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
-}
-
-int run_command_v_opt(const char **argv, int opt)
-{
-	struct child_process cmd;
-	prepare_run_command_v_opt(&cmd, argv, opt);
-	return run_command(&cmd);
-}
-
-int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
-{
-	struct child_process cmd;
-	prepare_run_command_v_opt(&cmd, argv, opt);
-	cmd.dir = dir;
-	cmd.env = env;
-	return run_command(&cmd);
-}
-
-#ifdef __MINGW32__
-static __stdcall unsigned run_thread(void *data)
-{
-	struct async *async = data;
-	return async->proc(async->fd_for_proc, async->data);
-}
-#endif
-
-int start_async(struct async *async)
-{
-	int pipe_out[2];
-
-	if (pipe(pipe_out) < 0)
-		return error("cannot create pipe: %s", strerror(errno));
-	async->out = pipe_out[0];
-
-#ifndef __MINGW32__
-	/* Flush stdio before fork() to avoid cloning buffers */
-	fflush(NULL);
-
-	async->pid = fork();
-	if (async->pid < 0) {
-		error("fork (async) failed: %s", strerror(errno));
-		close_pair(pipe_out);
-		return -1;
-	}
-	if (!async->pid) {
-		close(pipe_out[0]);
-		exit(!!async->proc(pipe_out[1], async->data));
-	}
-	close(pipe_out[1]);
-#else
-	async->fd_for_proc = pipe_out[1];
-	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
-	if (!async->tid) {
-		error("cannot create thread: %s", strerror(errno));
-		close_pair(pipe_out);
-		return -1;
-	}
-#endif
-	return 0;
-}
-
-int finish_async(struct async *async)
-{
-#ifndef __MINGW32__
-	int ret = 0;
-
-	if (wait_or_whine(async->pid))
-		ret = error("waitpid (async) failed");
-#else
-	DWORD ret = 0;
-	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
-		ret = error("waiting for thread failed: %lu", GetLastError());
-	else if (!GetExitCodeThread(async->tid, &ret))
-		ret = error("cannot get thread exit code: %lu", GetLastError());
-	CloseHandle(async->tid);
-#endif
-	return ret;
-}
-
-int run_hook(const char *index_file, const char *name, ...)
-{
-	struct child_process hook;
-	const char **argv = NULL, *env[2];
-	char index[PATH_MAX];
-	va_list args;
-	int ret;
-	size_t i = 0, alloc = 0;
-
-	if (access(perf_path("hooks/%s", name), X_OK) < 0)
-		return 0;
-
-	va_start(args, name);
-	ALLOC_GROW(argv, i + 1, alloc);
-	argv[i++] = perf_path("hooks/%s", name);
-	while (argv[i-1]) {
-		ALLOC_GROW(argv, i + 1, alloc);
-		argv[i++] = va_arg(args, const char *);
-	}
-	va_end(args);
-
-	memset(&hook, 0, sizeof(hook));
-	hook.argv = argv;
-	hook.no_stdin = 1;
-	hook.stdout_to_stderr = 1;
-	if (index_file) {
-		snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file);
-		env[0] = index;
-		env[1] = NULL;
-		hook.env = env;
-	}
-
-	ret = start_command(&hook);
-	free(argv);
-	if (ret) {
-		warning("Could not spawn %s", argv[0]);
-		return ret;
-	}
-	ret = finish_command(&hook);
-	if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
-		warning("%s exited due to uncaught signal", argv[0]);
-
-	return ret;
-}
diff --git a/Documentation/perf_counter/util/run-command.h b/Documentation/perf_counter/util/run-command.h
deleted file mode 100644
index 328289f..0000000
--- a/Documentation/perf_counter/util/run-command.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef RUN_COMMAND_H
-#define RUN_COMMAND_H
-
-enum {
-	ERR_RUN_COMMAND_FORK = 10000,
-	ERR_RUN_COMMAND_EXEC,
-	ERR_RUN_COMMAND_PIPE,
-	ERR_RUN_COMMAND_WAITPID,
-	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
-	ERR_RUN_COMMAND_WAITPID_SIGNAL,
-	ERR_RUN_COMMAND_WAITPID_NOEXIT,
-};
-#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
-
-struct child_process {
-	const char **argv;
-	pid_t pid;
-	/*
-	 * Using .in, .out, .err:
-	 * - Specify 0 for no redirections (child inherits stdin, stdout,
-	 *   stderr from parent).
-	 * - Specify -1 to have a pipe allocated as follows:
-	 *     .in: returns the writable pipe end; parent writes to it,
-	 *          the readable pipe end becomes child's stdin
-	 *     .out, .err: returns the readable pipe end; parent reads from
-	 *          it, the writable pipe end becomes child's stdout/stderr
-	 *   The caller of start_command() must close the returned FDs
-	 *   after it has completed reading from/writing to it!
-	 * - Specify > 0 to set a channel to a particular FD as follows:
-	 *     .in: a readable FD, becomes child's stdin
-	 *     .out: a writable FD, becomes child's stdout/stderr
-	 *     .err > 0 not supported
-	 *   The specified FD is closed by start_command(), even in case
-	 *   of errors!
-	 */
-	int in;
-	int out;
-	int err;
-	const char *dir;
-	const char *const *env;
-	unsigned no_stdin:1;
-	unsigned no_stdout:1;
-	unsigned no_stderr:1;
-	unsigned perf_cmd:1; /* if this is to be perf sub-command */
-	unsigned stdout_to_stderr:1;
-	void (*preexec_cb)(void);
-};
-
-int start_command(struct child_process *);
-int finish_command(struct child_process *);
-int run_command(struct child_process *);
-
-extern int run_hook(const char *index_file, const char *name, ...);
-
-#define RUN_COMMAND_NO_STDIN 1
-#define RUN_PERF_CMD	     2	/*If this is to be perf sub-command */
-#define RUN_COMMAND_STDOUT_TO_STDERR 4
-int run_command_v_opt(const char **argv, int opt);
-
-/*
- * env (the environment) is to be formatted like environ: "VAR=VALUE".
- * To unset an environment variable use just "VAR".
- */
-int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env);
-
-/*
- * The purpose of the following functions is to feed a pipe by running
- * a function asynchronously and providing output that the caller reads.
- *
- * It is expected that no synchronization and mutual exclusion between
- * the caller and the feed function is necessary so that the function
- * can run in a thread without interfering with the caller.
- */
-struct async {
-	/*
-	 * proc writes to fd and closes it;
-	 * returns 0 on success, non-zero on failure
-	 */
-	int (*proc)(int fd, void *data);
-	void *data;
-	int out;	/* caller reads from here and closes it */
-#ifndef __MINGW32__
-	pid_t pid;
-#else
-	HANDLE tid;
-	int fd_for_proc;
-#endif
-};
-
-int start_async(struct async *async);
-int finish_async(struct async *async);
-
-#endif
diff --git a/Documentation/perf_counter/util/sigchain.c b/Documentation/perf_counter/util/sigchain.c
deleted file mode 100644
index 1118b99..0000000
--- a/Documentation/perf_counter/util/sigchain.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "sigchain.h"
-#include "cache.h"
-
-#define SIGCHAIN_MAX_SIGNALS 32
-
-struct sigchain_signal {
-	sigchain_fun *old;
-	int n;
-	int alloc;
-};
-static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS];
-
-static void check_signum(int sig)
-{
-	if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS)
-		die("BUG: signal out of range: %d", sig);
-}
-
-int sigchain_push(int sig, sigchain_fun f)
-{
-	struct sigchain_signal *s = signals + sig;
-	check_signum(sig);
-
-	ALLOC_GROW(s->old, s->n + 1, s->alloc);
-	s->old[s->n] = signal(sig, f);
-	if (s->old[s->n] == SIG_ERR)
-		return -1;
-	s->n++;
-	return 0;
-}
-
-int sigchain_pop(int sig)
-{
-	struct sigchain_signal *s = signals + sig;
-	check_signum(sig);
-	if (s->n < 1)
-		return 0;
-
-	if (signal(sig, s->old[s->n - 1]) == SIG_ERR)
-		return -1;
-	s->n--;
-	return 0;
-}
-
-void sigchain_push_common(sigchain_fun f)
-{
-	sigchain_push(SIGINT, f);
-	sigchain_push(SIGHUP, f);
-	sigchain_push(SIGTERM, f);
-	sigchain_push(SIGQUIT, f);
-	sigchain_push(SIGPIPE, f);
-}
diff --git a/Documentation/perf_counter/util/sigchain.h b/Documentation/perf_counter/util/sigchain.h
deleted file mode 100644
index 618083bc..0000000
--- a/Documentation/perf_counter/util/sigchain.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef SIGCHAIN_H
-#define SIGCHAIN_H
-
-typedef void (*sigchain_fun)(int);
-
-int sigchain_push(int sig, sigchain_fun f);
-int sigchain_pop(int sig);
-
-void sigchain_push_common(sigchain_fun f);
-
-#endif /* SIGCHAIN_H */
diff --git a/Documentation/perf_counter/util/strbuf.c b/Documentation/perf_counter/util/strbuf.c
deleted file mode 100644
index eaba093..0000000
--- a/Documentation/perf_counter/util/strbuf.c
+++ /dev/null
@@ -1,359 +0,0 @@
-#include "cache.h"
-
-int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
-/*
- * Used as the default ->buf value, so that people can always assume
- * buf is non NULL and ->buf is NUL terminated even for a freshly
- * initialized strbuf.
- */
-char strbuf_slopbuf[1];
-
-void strbuf_init(struct strbuf *sb, size_t hint)
-{
-	sb->alloc = sb->len = 0;
-	sb->buf = strbuf_slopbuf;
-	if (hint)
-		strbuf_grow(sb, hint);
-}
-
-void strbuf_release(struct strbuf *sb)
-{
-	if (sb->alloc) {
-		free(sb->buf);
-		strbuf_init(sb, 0);
-	}
-}
-
-char *strbuf_detach(struct strbuf *sb, size_t *sz)
-{
-	char *res = sb->alloc ? sb->buf : NULL;
-	if (sz)
-		*sz = sb->len;
-	strbuf_init(sb, 0);
-	return res;
-}
-
-void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
-{
-	strbuf_release(sb);
-	sb->buf   = buf;
-	sb->len   = len;
-	sb->alloc = alloc;
-	strbuf_grow(sb, 0);
-	sb->buf[sb->len] = '\0';
-}
-
-void strbuf_grow(struct strbuf *sb, size_t extra)
-{
-	if (sb->len + extra + 1 <= sb->len)
-		die("you want to use way too much memory");
-	if (!sb->alloc)
-		sb->buf = NULL;
-	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
-}
-
-void strbuf_trim(struct strbuf *sb)
-{
-	char *b = sb->buf;
-	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
-		sb->len--;
-	while (sb->len > 0 && isspace(*b)) {
-		b++;
-		sb->len--;
-	}
-	memmove(sb->buf, b, sb->len);
-	sb->buf[sb->len] = '\0';
-}
-void strbuf_rtrim(struct strbuf *sb)
-{
-	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
-		sb->len--;
-	sb->buf[sb->len] = '\0';
-}
-
-void strbuf_ltrim(struct strbuf *sb)
-{
-	char *b = sb->buf;
-	while (sb->len > 0 && isspace(*b)) {
-		b++;
-		sb->len--;
-	}
-	memmove(sb->buf, b, sb->len);
-	sb->buf[sb->len] = '\0';
-}
-
-void strbuf_tolower(struct strbuf *sb)
-{
-	int i;
-	for (i = 0; i < sb->len; i++)
-		sb->buf[i] = tolower(sb->buf[i]);
-}
-
-struct strbuf **strbuf_split(const struct strbuf *sb, int delim)
-{
-	int alloc = 2, pos = 0;
-	char *n, *p;
-	struct strbuf **ret;
-	struct strbuf *t;
-
-	ret = calloc(alloc, sizeof(struct strbuf *));
-	p = n = sb->buf;
-	while (n < sb->buf + sb->len) {
-		int len;
-		n = memchr(n, delim, sb->len - (n - sb->buf));
-		if (pos + 1 >= alloc) {
-			alloc = alloc * 2;
-			ret = realloc(ret, sizeof(struct strbuf *) * alloc);
-		}
-		if (!n)
-			n = sb->buf + sb->len - 1;
-		len = n - p + 1;
-		t = malloc(sizeof(struct strbuf));
-		strbuf_init(t, len);
-		strbuf_add(t, p, len);
-		ret[pos] = t;
-		ret[++pos] = NULL;
-		p = ++n;
-	}
-	return ret;
-}
-
-void strbuf_list_free(struct strbuf **sbs)
-{
-	struct strbuf **s = sbs;
-
-	while (*s) {
-		strbuf_release(*s);
-		free(*s++);
-	}
-	free(sbs);
-}
-
-int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
-{
-	int len = a->len < b->len ? a->len: b->len;
-	int cmp = memcmp(a->buf, b->buf, len);
-	if (cmp)
-		return cmp;
-	return a->len < b->len ? -1: a->len != b->len;
-}
-
-void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
-				   const void *data, size_t dlen)
-{
-	if (pos + len < pos)
-		die("you want to use way too much memory");
-	if (pos > sb->len)
-		die("`pos' is too far after the end of the buffer");
-	if (pos + len > sb->len)
-		die("`pos + len' is too far after the end of the buffer");
-
-	if (dlen >= len)
-		strbuf_grow(sb, dlen - len);
-	memmove(sb->buf + pos + dlen,
-			sb->buf + pos + len,
-			sb->len - pos - len);
-	memcpy(sb->buf + pos, data, dlen);
-	strbuf_setlen(sb, sb->len + dlen - len);
-}
-
-void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
-{
-	strbuf_splice(sb, pos, 0, data, len);
-}
-
-void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
-{
-	strbuf_splice(sb, pos, len, NULL, 0);
-}
-
-void strbuf_add(struct strbuf *sb, const void *data, size_t len)
-{
-	strbuf_grow(sb, len);
-	memcpy(sb->buf + sb->len, data, len);
-	strbuf_setlen(sb, sb->len + len);
-}
-
-void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
-{
-	strbuf_grow(sb, len);
-	memcpy(sb->buf + sb->len, sb->buf + pos, len);
-	strbuf_setlen(sb, sb->len + len);
-}
-
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
-{
-	int len;
-	va_list ap;
-
-	if (!strbuf_avail(sb))
-		strbuf_grow(sb, 64);
-	va_start(ap, fmt);
-	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
-	va_end(ap);
-	if (len < 0)
-		die("your vsnprintf is broken");
-	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
-		va_start(ap, fmt);
-		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
-		va_end(ap);
-		if (len > strbuf_avail(sb)) {
-			die("this should not happen, your snprintf is broken");
-		}
-	}
-	strbuf_setlen(sb, sb->len + len);
-}
-
-void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
-		   void *context)
-{
-	for (;;) {
-		const char *percent;
-		size_t consumed;
-
-		percent = strchrnul(format, '%');
-		strbuf_add(sb, format, percent - format);
-		if (!*percent)
-			break;
-		format = percent + 1;
-
-		consumed = fn(sb, format, context);
-		if (consumed)
-			format += consumed;
-		else
-			strbuf_addch(sb, '%');
-	}
-}
-
-size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
-		void *context)
-{
-	struct strbuf_expand_dict_entry *e = context;
-	size_t len;
-
-	for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
-		if (!strncmp(placeholder, e->placeholder, len)) {
-			if (e->value)
-				strbuf_addstr(sb, e->value);
-			return len;
-		}
-	}
-	return 0;
-}
-
-size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
-{
-	size_t res;
-	size_t oldalloc = sb->alloc;
-
-	strbuf_grow(sb, size);
-	res = fread(sb->buf + sb->len, 1, size, f);
-	if (res > 0)
-		strbuf_setlen(sb, sb->len + res);
-	else if (res < 0 && oldalloc == 0)
-		strbuf_release(sb);
-	return res;
-}
-
-ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
-{
-	size_t oldlen = sb->len;
-	size_t oldalloc = sb->alloc;
-
-	strbuf_grow(sb, hint ? hint : 8192);
-	for (;;) {
-		ssize_t cnt;
-
-		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
-		if (cnt < 0) {
-			if (oldalloc == 0)
-				strbuf_release(sb);
-			else
-				strbuf_setlen(sb, oldlen);
-			return -1;
-		}
-		if (!cnt)
-			break;
-		sb->len += cnt;
-		strbuf_grow(sb, 8192);
-	}
-
-	sb->buf[sb->len] = '\0';
-	return sb->len - oldlen;
-}
-
-#define STRBUF_MAXLINK (2*PATH_MAX)
-
-int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
-{
-	size_t oldalloc = sb->alloc;
-
-	if (hint < 32)
-		hint = 32;
-
-	while (hint < STRBUF_MAXLINK) {
-		int len;
-
-		strbuf_grow(sb, hint);
-		len = readlink(path, sb->buf, hint);
-		if (len < 0) {
-			if (errno != ERANGE)
-				break;
-		} else if (len < hint) {
-			strbuf_setlen(sb, len);
-			return 0;
-		}
-
-		/* .. the buffer was too small - try again */
-		hint *= 2;
-	}
-	if (oldalloc == 0)
-		strbuf_release(sb);
-	return -1;
-}
-
-int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
-{
-	int ch;
-
-	strbuf_grow(sb, 0);
-	if (feof(fp))
-		return EOF;
-
-	strbuf_reset(sb);
-	while ((ch = fgetc(fp)) != EOF) {
-		if (ch == term)
-			break;
-		strbuf_grow(sb, 1);
-		sb->buf[sb->len++] = ch;
-	}
-	if (ch == EOF && sb->len == 0)
-		return EOF;
-
-	sb->buf[sb->len] = '\0';
-	return 0;
-}
-
-int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
-{
-	int fd, len;
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return -1;
-	len = strbuf_read(sb, fd, hint);
-	close(fd);
-	if (len < 0)
-		return -1;
-
-	return len;
-}
diff --git a/Documentation/perf_counter/util/strbuf.h b/Documentation/perf_counter/util/strbuf.h
deleted file mode 100644
index 9ee908a..0000000
--- a/Documentation/perf_counter/util/strbuf.h
+++ /dev/null
@@ -1,137 +0,0 @@
-#ifndef STRBUF_H
-#define STRBUF_H
-
-/*
- * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
- * long, overflow safe strings.
- *
- * Strbufs has some invariants that are very important to keep in mind:
- *
- * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
- *    build complex strings/buffers whose final size isn't easily known.
- *
- *    It is NOT legal to copy the ->buf pointer away.
- *    `strbuf_detach' is the operation that detachs a buffer from its shell
- *    while keeping the shell valid wrt its invariants.
- *
- * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
- *    allocated. The extra byte is used to store a '\0', allowing the ->buf
- *    member to be a valid C-string. Every strbuf function ensure this
- *    invariant is preserved.
- *
- *    Note that it is OK to "play" with the buffer directly if you work it
- *    that way:
- *
- *    strbuf_grow(sb, SOME_SIZE);
- *       ... Here, the memory array starting at sb->buf, and of length
- *       ... strbuf_avail(sb) is all yours, and you are sure that
- *       ... strbuf_avail(sb) is at least SOME_SIZE.
- *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
- *
- *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
- *
- *    Doing so is safe, though if it has to be done in many places, adding the
- *    missing API to the strbuf module is the way to go.
- *
- *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
- *         even if it's true in the current implementation. Alloc is somehow a
- *         "private" member that should not be messed with.
- */
-
-#include <assert.h>
-
-extern char strbuf_slopbuf[];
-struct strbuf {
-	size_t alloc;
-	size_t len;
-	char *buf;
-};
-
-#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
-
-/*----- strbuf life cycle -----*/
-extern void strbuf_init(struct strbuf *, size_t);
-extern void strbuf_release(struct strbuf *);
-extern char *strbuf_detach(struct strbuf *, size_t *);
-extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
-static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) {
-	struct strbuf tmp = *a;
-	*a = *b;
-	*b = tmp;
-}
-
-/*----- strbuf size related -----*/
-static inline size_t strbuf_avail(const struct strbuf *sb) {
-	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
-}
-
-extern void strbuf_grow(struct strbuf *, size_t);
-
-static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
-	if (!sb->alloc)
-		strbuf_grow(sb, 0);
-	assert(len < sb->alloc);
-	sb->len = len;
-	sb->buf[len] = '\0';
-}
-#define strbuf_reset(sb)  strbuf_setlen(sb, 0)
-
-/*----- content related -----*/
-extern void strbuf_trim(struct strbuf *);
-extern void strbuf_rtrim(struct strbuf *);
-extern void strbuf_ltrim(struct strbuf *);
-extern int strbuf_cmp(const struct strbuf *, const struct strbuf *);
-extern void strbuf_tolower(struct strbuf *);
-
-extern struct strbuf **strbuf_split(const struct strbuf *, int delim);
-extern void strbuf_list_free(struct strbuf **);
-
-/*----- add data in your buffer -----*/
-static inline void strbuf_addch(struct strbuf *sb, int c) {
-	strbuf_grow(sb, 1);
-	sb->buf[sb->len++] = c;
-	sb->buf[sb->len] = '\0';
-}
-
-extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
-extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
-
-/* splice pos..pos+len with given data */
-extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
-                          const void *, size_t);
-
-extern void strbuf_add(struct strbuf *, const void *, size_t);
-static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
-	strbuf_add(sb, s, strlen(s));
-}
-static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) {
-	strbuf_add(sb, sb2->buf, sb2->len);
-}
-extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
-
-typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
-struct strbuf_expand_dict_entry {
-	const char *placeholder;
-	const char *value;
-};
-extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context);
-
-__attribute__((format(printf,2,3)))
-extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-
-extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
-/* XXX: if read fails, any partial read is undone */
-extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
-extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
-extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
-
-extern int strbuf_getline(struct strbuf *, FILE *, int);
-
-extern void stripspace(struct strbuf *buf, int skip_comments);
-extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env);
-
-extern int strbuf_branchname(struct strbuf *sb, const char *name);
-extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
-
-#endif /* STRBUF_H */
diff --git a/Documentation/perf_counter/util/string.c b/Documentation/perf_counter/util/string.c
deleted file mode 100644
index ec33c0c..0000000
--- a/Documentation/perf_counter/util/string.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "string.h"
-
-static int hex(char ch)
-{
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int hex2u64(const char *ptr, __u64 *long_val)
-{
-	const char *p = ptr;
-	*long_val = 0;
-
-	while (*p) {
-		const int hex_val = hex(*p);
-
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		p++;
-	}
-
-	return p - ptr;
-}
diff --git a/Documentation/perf_counter/util/string.h b/Documentation/perf_counter/util/string.h
deleted file mode 100644
index 72812c1..0000000
--- a/Documentation/perf_counter/util/string.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _PERF_STRING_H_
-#define _PERF_STRING_H_
-
-#include <linux/types.h>
-
-int hex2u64(const char *ptr, __u64 *val);
-
-#endif
diff --git a/Documentation/perf_counter/util/symbol.c b/Documentation/perf_counter/util/symbol.c
deleted file mode 100644
index 23f4f7b..0000000
--- a/Documentation/perf_counter/util/symbol.c
+++ /dev/null
@@ -1,574 +0,0 @@
-#include "util.h"
-#include "../perf.h"
-#include "string.h"
-#include "symbol.h"
-
-#include <libelf.h>
-#include <gelf.h>
-#include <elf.h>
-
-const char *sym_hist_filter;
-
-static struct symbol *symbol__new(uint64_t start, uint64_t len,
-				  const char *name, unsigned int priv_size,
-				  uint64_t obj_start, int verbose)
-{
-	size_t namelen = strlen(name) + 1;
-	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
-
-	if (!self)
-		return NULL;
-
-	if (verbose >= 2)
-		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
-			(__u64)start, len, name, self->hist, (void *)obj_start);
-
-	self->obj_start= obj_start;
-	self->hist = NULL;
-	self->hist_sum = 0;
-
-	if (sym_hist_filter && !strcmp(name, sym_hist_filter))
-		self->hist = calloc(sizeof(__u64), len);
-
-	if (priv_size) {
-		memset(self, 0, priv_size);
-		self = ((void *)self) + priv_size;
-	}
-	self->start = start;
-	self->end   = start + len - 1;
-	memcpy(self->name, name, namelen);
-
-	return self;
-}
-
-static void symbol__delete(struct symbol *self, unsigned int priv_size)
-{
-	free(((void *)self) - priv_size);
-}
-
-static size_t symbol__fprintf(struct symbol *self, FILE *fp)
-{
-	return fprintf(fp, " %llx-%llx %s\n",
-		       self->start, self->end, self->name);
-}
-
-struct dso *dso__new(const char *name, unsigned int sym_priv_size)
-{
-	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
-
-	if (self != NULL) {
-		strcpy(self->name, name);
-		self->syms = RB_ROOT;
-		self->sym_priv_size = sym_priv_size;
-		self->find_symbol = dso__find_symbol;
-	}
-
-	return self;
-}
-
-static void dso__delete_symbols(struct dso *self)
-{
-	struct symbol *pos;
-	struct rb_node *next = rb_first(&self->syms);
-
-	while (next) {
-		pos = rb_entry(next, struct symbol, rb_node);
-		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, &self->syms);
-		symbol__delete(pos, self->sym_priv_size);
-	}
-}
-
-void dso__delete(struct dso *self)
-{
-	dso__delete_symbols(self);
-	free(self);
-}
-
-static void dso__insert_symbol(struct dso *self, struct symbol *sym)
-{
-	struct rb_node **p = &self->syms.rb_node;
-	struct rb_node *parent = NULL;
-	const uint64_t ip = sym->start;
-	struct symbol *s;
-
-	while (*p != NULL) {
-		parent = *p;
-		s = rb_entry(parent, struct symbol, rb_node);
-		if (ip < s->start)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-	rb_link_node(&sym->rb_node, parent, p);
-	rb_insert_color(&sym->rb_node, &self->syms);
-}
-
-struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
-{
-	struct rb_node *n;
-
-	if (self == NULL)
-		return NULL;
-
-	n = self->syms.rb_node;
-
-	while (n) {
-		struct symbol *s = rb_entry(n, struct symbol, rb_node);
-
-		if (ip < s->start)
-			n = n->rb_left;
-		else if (ip > s->end)
-			n = n->rb_right;
-		else
-			return s;
-	}
-
-	return NULL;
-}
-
-size_t dso__fprintf(struct dso *self, FILE *fp)
-{
-	size_t ret = fprintf(fp, "dso: %s\n", self->name);
-
-	struct rb_node *nd;
-	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
-		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
-		ret += symbol__fprintf(pos, fp);
-	}
-
-	return ret;
-}
-
-static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose)
-{
-	struct rb_node *nd, *prevnd;
-	char *line = NULL;
-	size_t n;
-	FILE *file = fopen("/proc/kallsyms", "r");
-
-	if (file == NULL)
-		goto out_failure;
-
-	while (!feof(file)) {
-		__u64 start;
-		struct symbol *sym;
-		int line_len, len;
-		char symbol_type;
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0)
-			break;
-
-		if (!line)
-			goto out_failure;
-
-		line[--line_len] = '\0'; /* \n */
-
-		len = hex2u64(line, &start);
-
-		len++;
-		if (len + 2 >= line_len)
-			continue;
-
-		symbol_type = toupper(line[len]);
-		/*
-		 * We're interested only in code ('T'ext)
-		 */
-		if (symbol_type != 'T' && symbol_type != 'W')
-			continue;
-		/*
-		 * Well fix up the end later, when we have all sorted.
-		 */
-		sym = symbol__new(start, 0xdead, line + len + 2,
-				  self->sym_priv_size, 0, verbose);
-
-		if (sym == NULL)
-			goto out_delete_line;
-
-		if (filter && filter(self, sym))
-			symbol__delete(sym, self->sym_priv_size);
-		else
-			dso__insert_symbol(self, sym);
-	}
-
-	/*
-	 * Now that we have all sorted out, just set the ->end of all
-	 * symbols
-	 */
-	prevnd = rb_first(&self->syms);
-
-	if (prevnd == NULL)
-		goto out_delete_line;
-
-	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
-		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
-			      *curr = rb_entry(nd, struct symbol, rb_node);
-
-		prev->end = curr->start - 1;
-		prevnd = nd;
-	}
-
-	free(line);
-	fclose(file);
-
-	return 0;
-
-out_delete_line:
-	free(line);
-out_failure:
-	return -1;
-}
-
-/**
- * elf_symtab__for_each_symbol - iterate thru all the symbols
- *
- * @self: struct elf_symtab instance to iterate
- * @index: uint32_t index
- * @sym: GElf_Sym iterator
- */
-#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
-	for (index = 0, gelf_getsym(syms, index, &sym);\
-	     index < nr_syms; \
-	     index++, gelf_getsym(syms, index, &sym))
-
-static inline uint8_t elf_sym__type(const GElf_Sym *sym)
-{
-	return GELF_ST_TYPE(sym->st_info);
-}
-
-static inline int elf_sym__is_function(const GElf_Sym *sym)
-{
-	return elf_sym__type(sym) == STT_FUNC &&
-	       sym->st_name != 0 &&
-	       sym->st_shndx != SHN_UNDEF &&
-	       sym->st_size != 0;
-}
-
-static inline const char *elf_sym__name(const GElf_Sym *sym,
-					const Elf_Data *symstrs)
-{
-	return symstrs->d_buf + sym->st_name;
-}
-
-static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
-				    GElf_Shdr *shp, const char *name,
-				    size_t *index)
-{
-	Elf_Scn *sec = NULL;
-	size_t cnt = 1;
-
-	while ((sec = elf_nextscn(elf, sec)) != NULL) {
-		char *str;
-
-		gelf_getshdr(sec, shp);
-		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
-		if (!strcmp(name, str)) {
-			if (index)
-				*index = cnt;
-			break;
-		}
-		++cnt;
-	}
-
-	return sec;
-}
-
-#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
-	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
-	     idx < nr_entries; \
-	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
-
-#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
-	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
-	     idx < nr_entries; \
-	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
-
-static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
-				       GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym,
-				       GElf_Shdr *shdr_dynsym,
-				       size_t dynsym_idx, int verbose)
-{
-	uint32_t nr_rel_entries, idx;
-	GElf_Sym sym;
-	__u64 plt_offset;
-	GElf_Shdr shdr_plt;
-	struct symbol *f;
-	GElf_Shdr shdr_rel_plt;
-	Elf_Data *reldata, *syms, *symstrs;
-	Elf_Scn *scn_plt_rel, *scn_symstrs;
-	char sympltname[1024];
-	int nr = 0, symidx;
-
-	scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
-					  ".rela.plt", NULL);
-	if (scn_plt_rel == NULL) {
-		scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
-						  ".rel.plt", NULL);
-		if (scn_plt_rel == NULL)
-			return 0;
-	}
-
-	if (shdr_rel_plt.sh_link != dynsym_idx)
-		return 0;
-
-	if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL)
-		return 0;
-
-	/*
-	 * Fetch the relocation section to find the indexes to the GOT
-	 * and the symbols in the .dynsym they refer to.
-	 */
-	reldata = elf_getdata(scn_plt_rel, NULL);
-	if (reldata == NULL)
-		return -1;
-
-	syms = elf_getdata(scn_dynsym, NULL);
-	if (syms == NULL)
-		return -1;
-
-	scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link);
-	if (scn_symstrs == NULL)
-		return -1;
-
-	symstrs = elf_getdata(scn_symstrs, NULL);
-	if (symstrs == NULL)
-		return -1;
-
-	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
-	plt_offset = shdr_plt.sh_offset;
-
-	if (shdr_rel_plt.sh_type == SHT_RELA) {
-		GElf_Rela pos_mem, *pos;
-
-		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
-					   nr_rel_entries) {
-			symidx = GELF_R_SYM(pos->r_info);
-			plt_offset += shdr_plt.sh_entsize;
-			gelf_getsym(syms, symidx, &sym);
-			snprintf(sympltname, sizeof(sympltname),
-				 "%s@plt", elf_sym__name(&sym, symstrs));
-
-			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
-					sympltname, self->sym_priv_size, 0, verbose);
-			if (!f)
-				return -1;
-
-			dso__insert_symbol(self, f);
-			++nr;
-		}
-	} else if (shdr_rel_plt.sh_type == SHT_REL) {
-		GElf_Rel pos_mem, *pos;
-		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
-					  nr_rel_entries) {
-			symidx = GELF_R_SYM(pos->r_info);
-			plt_offset += shdr_plt.sh_entsize;
-			gelf_getsym(syms, symidx, &sym);
-			snprintf(sympltname, sizeof(sympltname),
-				 "%s@plt", elf_sym__name(&sym, symstrs));
-
-			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
-					sympltname, self->sym_priv_size, 0, verbose);
-			if (!f)
-				return -1;
-
-			dso__insert_symbol(self, f);
-			++nr;
-		}
-	} else {
-		/*
-		 * TODO: There are still one more shdr_rel_plt.sh_type
-		 * I have to investigate, but probably should be ignored.
-		 */
-	}
-
-	return nr;
-}
-
-static int dso__load_sym(struct dso *self, int fd, const char *name,
-			 symbol_filter_t filter, int verbose)
-{
-	Elf_Data *symstrs;
-	uint32_t nr_syms;
-	int err = -1;
-	uint32_t index;
-	GElf_Ehdr ehdr;
-	GElf_Shdr shdr;
-	Elf_Data *syms;
-	GElf_Sym sym;
-	Elf_Scn *sec, *sec_dynsym;
-	Elf *elf;
-	size_t dynsym_idx;
-	int nr = 0;
-
-	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
-	if (elf == NULL) {
-		if (verbose)
-			fprintf(stderr, "%s: cannot read %s ELF file.\n",
-				__func__, name);
-		goto out_close;
-	}
-
-	if (gelf_getehdr(elf, &ehdr) == NULL) {
-		if (verbose)
-			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
-		goto out_elf_end;
-	}
-
-	/*
-	 * We need to check if we have a .dynsym, so that we can handle the
-	 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
-	 * .dynsym or .symtab)
-	 */
-	sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr,
-					 ".dynsym", &dynsym_idx);
-	if (sec_dynsym != NULL) {
-		nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
-						 sec_dynsym, &shdr,
-						 dynsym_idx, verbose);
-		if (nr < 0)
-			goto out_elf_end;
-	}
-
-	/*
-	 * But if we have a full .symtab (that is a superset of .dynsym) we
-	 * should add the symbols not in the .dynsyn
-	 */
-	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
-	if (sec == NULL) {
-		if (sec_dynsym == NULL)
-			goto out_elf_end;
-
-		sec = sec_dynsym;
-		gelf_getshdr(sec, &shdr);
-	}
-
-	syms = elf_getdata(sec, NULL);
-	if (syms == NULL)
-		goto out_elf_end;
-
-	sec = elf_getscn(elf, shdr.sh_link);
-	if (sec == NULL)
-		goto out_elf_end;
-
-	symstrs = elf_getdata(sec, NULL);
-	if (symstrs == NULL)
-		goto out_elf_end;
-
-	nr_syms = shdr.sh_size / shdr.sh_entsize;
-
-	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
-		struct symbol *f;
-		uint64_t obj_start;
-
-		if (!elf_sym__is_function(&sym))
-			continue;
-
-		sec = elf_getscn(elf, sym.st_shndx);
-		if (!sec)
-			goto out_elf_end;
-
-		gelf_getshdr(sec, &shdr);
-		obj_start = sym.st_value;
-
-		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
-
-		f = symbol__new(sym.st_value, sym.st_size,
-				elf_sym__name(&sym, symstrs),
-				self->sym_priv_size, obj_start, verbose);
-		if (!f)
-			goto out_elf_end;
-
-		if (filter && filter(self, f))
-			symbol__delete(f, self->sym_priv_size);
-		else {
-			dso__insert_symbol(self, f);
-			nr++;
-		}
-	}
-
-	err = nr;
-out_elf_end:
-	elf_end(elf);
-out_close:
-	return err;
-}
-
-int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
-{
-	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
-	char *name = malloc(size);
-	int variant = 0;
-	int ret = -1;
-	int fd;
-
-	if (!name)
-		return -1;
-
-more:
-	do {
-		switch (variant) {
-		case 0: /* Fedora */
-			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
-			break;
-		case 1: /* Ubuntu */
-			snprintf(name, size, "/usr/lib/debug%s", self->name);
-			break;
-		case 2: /* Sane people */
-			snprintf(name, size, "%s", self->name);
-			break;
-
-		default:
-			goto out;
-		}
-		variant++;
-
-		fd = open(name, O_RDONLY);
-	} while (fd < 0);
-
-	ret = dso__load_sym(self, fd, name, filter, verbose);
-	close(fd);
-
-	/*
-	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
-	 */
-	if (!ret)
-		goto more;
-
-out:
-	free(name);
-	return ret;
-}
-
-static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
-			     symbol_filter_t filter, int verbose)
-{
-	int err, fd = open(vmlinux, O_RDONLY);
-
-	if (fd < 0)
-		return -1;
-
-	err = dso__load_sym(self, fd, vmlinux, filter, verbose);
-	close(fd);
-
-	return err;
-}
-
-int dso__load_kernel(struct dso *self, const char *vmlinux,
-		     symbol_filter_t filter, int verbose)
-{
-	int err = -1;
-
-	if (vmlinux)
-		err = dso__load_vmlinux(self, vmlinux, filter, verbose);
-
-	if (err)
-		err = dso__load_kallsyms(self, filter, verbose);
-
-	return err;
-}
-
-void symbol__init(void)
-{
-	elf_version(EV_CURRENT);
-}
diff --git a/Documentation/perf_counter/util/symbol.h b/Documentation/perf_counter/util/symbol.h
deleted file mode 100644
index 4839d68..0000000
--- a/Documentation/perf_counter/util/symbol.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _PERF_SYMBOL_
-#define _PERF_SYMBOL_ 1
-
-#include <linux/types.h>
-#include "list.h"
-#include "rbtree.h"
-
-struct symbol {
-	struct rb_node	rb_node;
-	__u64		start;
-	__u64		end;
-	__u64		obj_start;
-	__u64		hist_sum;
-	__u64		*hist;
-	char		name[0];
-};
-
-struct dso {
-	struct list_head node;
-	struct rb_root	 syms;
-	unsigned int	 sym_priv_size;
-	struct symbol    *(*find_symbol)(struct dso *, uint64_t ip);
-	char		 name[0];
-};
-
-const char *sym_hist_filter;
-
-typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
-
-struct dso *dso__new(const char *name, unsigned int sym_priv_size);
-void dso__delete(struct dso *self);
-
-static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
-{
-	return ((void *)sym) - self->sym_priv_size;
-}
-
-struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
-
-int dso__load_kernel(struct dso *self, const char *vmlinux,
-		     symbol_filter_t filter, int verbose);
-int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
-
-size_t dso__fprintf(struct dso *self, FILE *fp);
-
-void symbol__init(void);
-#endif /* _PERF_SYMBOL_ */
diff --git a/Documentation/perf_counter/util/usage.c b/Documentation/perf_counter/util/usage.c
deleted file mode 100644
index 2cad286..0000000
--- a/Documentation/perf_counter/util/usage.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * GIT - The information manager from hell
- *
- * Copyright (C) Linus Torvalds, 2005
- */
-#include "util.h"
-
-static void report(const char *prefix, const char *err, va_list params)
-{
-	char msg[1024];
-	vsnprintf(msg, sizeof(msg), err, params);
-	fprintf(stderr, "%s%s\n", prefix, msg);
-}
-
-static NORETURN void usage_builtin(const char *err)
-{
-	fprintf(stderr, "\n usage: %s\n", err);
-	exit(129);
-}
-
-static NORETURN void die_builtin(const char *err, va_list params)
-{
-	report("fatal: ", err, params);
-	exit(128);
-}
-
-static void error_builtin(const char *err, va_list params)
-{
-	report("error: ", err, params);
-}
-
-static void warn_builtin(const char *warn, va_list params)
-{
-	report("warning: ", warn, params);
-}
-
-/* If we are in a dlopen()ed .so write to a global variable would segfault
- * (ugh), so keep things static. */
-static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
-static void (*error_routine)(const char *err, va_list params) = error_builtin;
-static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
-
-void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
-{
-	die_routine = routine;
-}
-
-void usage(const char *err)
-{
-	usage_routine(err);
-}
-
-void die(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	die_routine(err, params);
-	va_end(params);
-}
-
-int error(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	error_routine(err, params);
-	va_end(params);
-	return -1;
-}
-
-void warning(const char *warn, ...)
-{
-	va_list params;
-
-	va_start(params, warn);
-	warn_routine(warn, params);
-	va_end(params);
-}
diff --git a/Documentation/perf_counter/util/util.h b/Documentation/perf_counter/util/util.h
deleted file mode 100644
index 76590a1..0000000
--- a/Documentation/perf_counter/util/util.h
+++ /dev/null
@@ -1,410 +0,0 @@
-#ifndef GIT_COMPAT_UTIL_H
-#define GIT_COMPAT_UTIL_H
-
-#define _FILE_OFFSET_BITS 64
-
-#ifndef FLEX_ARRAY
-/*
- * See if our compiler is known to support flexible array members.
- */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
-# define FLEX_ARRAY /* empty */
-#elif defined(__GNUC__)
-# if (__GNUC__ >= 3)
-#  define FLEX_ARRAY /* empty */
-# else
-#  define FLEX_ARRAY 0 /* older GNU extension */
-# endif
-#endif
-
-/*
- * Otherwise, default to safer but a bit wasteful traditional style
- */
-#ifndef FLEX_ARRAY
-# define FLEX_ARRAY 1
-#endif
-#endif
-
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-
-#ifdef __GNUC__
-#define TYPEOF(x) (__typeof__(x))
-#else
-#define TYPEOF(x)
-#endif
-
-#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
-#define HAS_MULTI_BITS(i)  ((i) & ((i) - 1))  /* checks if an integer has more than 1 bit set */
-
-/* Approximation of the length of the decimal representation of this type. */
-#define decimal_length(x)	((int)(sizeof(x) * 2.56 + 0.5) + 1)
-
-#if !defined(__APPLE__) && !defined(__FreeBSD__)  && !defined(__USLC__) && !defined(_M_UNIX)
-#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
-#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
-#endif
-#define _ALL_SOURCE 1
-#define _GNU_SOURCE 1
-#define _BSD_SOURCE 1
-
-#include <unistd.h>
-#include <stdio.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <dirent.h>
-#include <sys/time.h>
-#include <time.h>
-#include <signal.h>
-#include <fnmatch.h>
-#include <assert.h>
-#include <regex.h>
-#include <utime.h>
-#ifndef __MINGW32__
-#include <sys/wait.h>
-#include <sys/poll.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#ifndef NO_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <pwd.h>
-#include <inttypes.h>
-#if defined(__CYGWIN__)
-#undef _XOPEN_SOURCE
-#include <grp.h>
-#define _XOPEN_SOURCE 600
-#include "compat/cygwin.h"
-#else
-#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
-#include <grp.h>
-#define _ALL_SOURCE 1
-#endif
-#else 	/* __MINGW32__ */
-/* pull in Windows compatibility stuff */
-#include "compat/mingw.h"
-#endif	/* __MINGW32__ */
-
-#ifndef NO_ICONV
-#include <iconv.h>
-#endif
-
-#ifndef NO_OPENSSL
-#include <openssl/ssl.h>
-#include <openssl/err.h>
-#endif
-
-/* On most systems <limits.h> would have given us this, but
- * not on some systems (e.g. GNU/Hurd).
- */
-#ifndef PATH_MAX
-#define PATH_MAX 4096
-#endif
-
-#ifndef PRIuMAX
-#define PRIuMAX "llu"
-#endif
-
-#ifndef PRIu32
-#define PRIu32 "u"
-#endif
-
-#ifndef PRIx32
-#define PRIx32 "x"
-#endif
-
-#ifndef PATH_SEP
-#define PATH_SEP ':'
-#endif
-
-#ifndef STRIP_EXTENSION
-#define STRIP_EXTENSION ""
-#endif
-
-#ifndef has_dos_drive_prefix
-#define has_dos_drive_prefix(path) 0
-#endif
-
-#ifndef is_dir_sep
-#define is_dir_sep(c) ((c) == '/')
-#endif
-
-#ifdef __GNUC__
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN
-#ifndef __attribute__
-#define __attribute__(x)
-#endif
-#endif
-
-/* General helper functions */
-extern void usage(const char *err) NORETURN;
-extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
-
-extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
-
-extern int prefixcmp(const char *str, const char *prefix);
-extern time_t tm_to_time_t(const struct tm *tm);
-
-static inline const char *skip_prefix(const char *str, const char *prefix)
-{
-	size_t len = strlen(prefix);
-	return strncmp(str, prefix, len) ? NULL : str + len;
-}
-
-#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
-
-#ifndef PROT_READ
-#define PROT_READ 1
-#define PROT_WRITE 2
-#define MAP_PRIVATE 1
-#define MAP_FAILED ((void*)-1)
-#endif
-
-#define mmap git_mmap
-#define munmap git_munmap
-extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
-extern int git_munmap(void *start, size_t length);
-
-#else /* NO_MMAP || USE_WIN32_MMAP */
-
-#include <sys/mman.h>
-
-#endif /* NO_MMAP || USE_WIN32_MMAP */
-
-#ifdef NO_MMAP
-
-/* This value must be multiple of (pagesize * 2) */
-#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
-
-#else /* NO_MMAP */
-
-/* This value must be multiple of (pagesize * 2) */
-#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
-	(sizeof(void*) >= 8 \
-		?  1 * 1024 * 1024 * 1024 \
-		: 32 * 1024 * 1024)
-
-#endif /* NO_MMAP */
-
-#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
-#define on_disk_bytes(st) ((st).st_size)
-#else
-#define on_disk_bytes(st) ((st).st_blocks * 512)
-#endif
-
-#define DEFAULT_PACKED_GIT_LIMIT \
-	((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
-
-#ifdef NO_PREAD
-#define pread git_pread
-extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
-#endif
-/*
- * Forward decl that will remind us if its twin in cache.h changes.
- * This function is used in compat/pread.c.  But we can't include
- * cache.h there.
- */
-extern ssize_t read_in_full(int fd, void *buf, size_t count);
-
-#ifdef NO_SETENV
-#define setenv gitsetenv
-extern int gitsetenv(const char *, const char *, int);
-#endif
-
-#ifdef NO_MKDTEMP
-#define mkdtemp gitmkdtemp
-extern char *gitmkdtemp(char *);
-#endif
-
-#ifdef NO_UNSETENV
-#define unsetenv gitunsetenv
-extern void gitunsetenv(const char *);
-#endif
-
-#ifdef NO_STRCASESTR
-#define strcasestr gitstrcasestr
-extern char *gitstrcasestr(const char *haystack, const char *needle);
-#endif
-
-#ifdef NO_STRLCPY
-#define strlcpy gitstrlcpy
-extern size_t gitstrlcpy(char *, const char *, size_t);
-#endif
-
-#ifdef NO_STRTOUMAX
-#define strtoumax gitstrtoumax
-extern uintmax_t gitstrtoumax(const char *, char **, int);
-#endif
-
-#ifdef NO_HSTRERROR
-#define hstrerror githstrerror
-extern const char *githstrerror(int herror);
-#endif
-
-#ifdef NO_MEMMEM
-#define memmem gitmemmem
-void *gitmemmem(const void *haystack, size_t haystacklen,
-                const void *needle, size_t needlelen);
-#endif
-
-#ifdef FREAD_READS_DIRECTORIES
-#ifdef fopen
-#undef fopen
-#endif
-#define fopen(a,b) git_fopen(a,b)
-extern FILE *git_fopen(const char*, const char*);
-#endif
-
-#ifdef SNPRINTF_RETURNS_BOGUS
-#define snprintf git_snprintf
-extern int git_snprintf(char *str, size_t maxsize,
-			const char *format, ...);
-#define vsnprintf git_vsnprintf
-extern int git_vsnprintf(char *str, size_t maxsize,
-			 const char *format, va_list ap);
-#endif
-
-#ifdef __GLIBC_PREREQ
-#if __GLIBC_PREREQ(2, 1)
-#define HAVE_STRCHRNUL
-#endif
-#endif
-
-#ifndef HAVE_STRCHRNUL
-#define strchrnul gitstrchrnul
-static inline char *gitstrchrnul(const char *s, int c)
-{
-	while (*s && *s != c)
-		s++;
-	return (char *)s;
-}
-#endif
-
-/*
- * Wrappers:
- */
-extern char *xstrdup(const char *str);
-extern void *xmalloc(size_t size);
-extern void *xmemdupz(const void *data, size_t len);
-extern char *xstrndup(const char *str, size_t len);
-extern void *xrealloc(void *ptr, size_t size);
-extern void *xcalloc(size_t nmemb, size_t size);
-extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
-extern ssize_t xread(int fd, void *buf, size_t len);
-extern ssize_t xwrite(int fd, const void *buf, size_t len);
-extern int xdup(int fd);
-extern FILE *xfdopen(int fd, const char *mode);
-extern int xmkstemp(char *template);
-
-static inline size_t xsize_t(off_t len)
-{
-	return (size_t)len;
-}
-
-static inline int has_extension(const char *filename, const char *ext)
-{
-	size_t len = strlen(filename);
-	size_t extlen = strlen(ext);
-	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
-}
-
-/* Sane ctype - no locale, and works with signed chars */
-#undef isascii
-#undef isspace
-#undef isdigit
-#undef isalpha
-#undef isalnum
-#undef tolower
-#undef toupper
-extern unsigned char sane_ctype[256];
-#define GIT_SPACE 0x01
-#define GIT_DIGIT 0x02
-#define GIT_ALPHA 0x04
-#define GIT_GLOB_SPECIAL 0x08
-#define GIT_REGEX_SPECIAL 0x10
-#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
-#define isascii(x) (((x) & ~0x7f) == 0)
-#define isspace(x) sane_istest(x,GIT_SPACE)
-#define isdigit(x) sane_istest(x,GIT_DIGIT)
-#define isalpha(x) sane_istest(x,GIT_ALPHA)
-#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
-#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
-#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
-#define tolower(x) sane_case((unsigned char)(x), 0x20)
-#define toupper(x) sane_case((unsigned char)(x), 0)
-
-static inline int sane_case(int x, int high)
-{
-	if (sane_istest(x, GIT_ALPHA))
-		x = (x & ~0x20) | high;
-	return x;
-}
-
-static inline int strtoul_ui(char const *s, int base, unsigned int *result)
-{
-	unsigned long ul;
-	char *p;
-
-	errno = 0;
-	ul = strtoul(s, &p, base);
-	if (errno || *p || p == s || (unsigned int) ul != ul)
-		return -1;
-	*result = ul;
-	return 0;
-}
-
-static inline int strtol_i(char const *s, int base, int *result)
-{
-	long ul;
-	char *p;
-
-	errno = 0;
-	ul = strtol(s, &p, base);
-	if (errno || *p || p == s || (int) ul != ul)
-		return -1;
-	*result = ul;
-	return 0;
-}
-
-#ifdef INTERNAL_QSORT
-void git_qsort(void *base, size_t nmemb, size_t size,
-	       int(*compar)(const void *, const void *));
-#define qsort git_qsort
-#endif
-
-#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
-# define FORCE_DIR_SET_GID S_ISGID
-#else
-# define FORCE_DIR_SET_GID 0
-#endif
-
-#ifdef NO_NSEC
-#undef USE_NSEC
-#define ST_CTIME_NSEC(st) 0
-#define ST_MTIME_NSEC(st) 0
-#else
-#ifdef USE_ST_TIMESPEC
-#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
-#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
-#else
-#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
-#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
-#endif
-#endif
-
-#endif
diff --git a/Documentation/perf_counter/util/wrapper.c b/Documentation/perf_counter/util/wrapper.c
deleted file mode 100644
index 6350d65..0000000
--- a/Documentation/perf_counter/util/wrapper.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Various trivial helper wrappers around standard functions
- */
-#include "cache.h"
-
-/*
- * There's no pack memory to release - but stay close to the Git
- * version so wrap this away:
- */
-static inline void release_pack_memory(size_t size, int flag)
-{
-}
-
-char *xstrdup(const char *str)
-{
-	char *ret = strdup(str);
-	if (!ret) {
-		release_pack_memory(strlen(str) + 1, -1);
-		ret = strdup(str);
-		if (!ret)
-			die("Out of memory, strdup failed");
-	}
-	return ret;
-}
-
-void *xmalloc(size_t size)
-{
-	void *ret = malloc(size);
-	if (!ret && !size)
-		ret = malloc(1);
-	if (!ret) {
-		release_pack_memory(size, -1);
-		ret = malloc(size);
-		if (!ret && !size)
-			ret = malloc(1);
-		if (!ret)
-			die("Out of memory, malloc failed");
-	}
-#ifdef XMALLOC_POISON
-	memset(ret, 0xA5, size);
-#endif
-	return ret;
-}
-
-/*
- * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
- * "data" to the allocated memory, zero terminates the allocated memory,
- * and returns a pointer to the allocated memory. If the allocation fails,
- * the program dies.
- */
-void *xmemdupz(const void *data, size_t len)
-{
-	char *p = xmalloc(len + 1);
-	memcpy(p, data, len);
-	p[len] = '\0';
-	return p;
-}
-
-char *xstrndup(const char *str, size_t len)
-{
-	char *p = memchr(str, '\0', len);
-	return xmemdupz(str, p ? p - str : len);
-}
-
-void *xrealloc(void *ptr, size_t size)
-{
-	void *ret = realloc(ptr, size);
-	if (!ret && !size)
-		ret = realloc(ptr, 1);
-	if (!ret) {
-		release_pack_memory(size, -1);
-		ret = realloc(ptr, size);
-		if (!ret && !size)
-			ret = realloc(ptr, 1);
-		if (!ret)
-			die("Out of memory, realloc failed");
-	}
-	return ret;
-}
-
-void *xcalloc(size_t nmemb, size_t size)
-{
-	void *ret = calloc(nmemb, size);
-	if (!ret && (!nmemb || !size))
-		ret = calloc(1, 1);
-	if (!ret) {
-		release_pack_memory(nmemb * size, -1);
-		ret = calloc(nmemb, size);
-		if (!ret && (!nmemb || !size))
-			ret = calloc(1, 1);
-		if (!ret)
-			die("Out of memory, calloc failed");
-	}
-	return ret;
-}
-
-void *xmmap(void *start, size_t length,
-	int prot, int flags, int fd, off_t offset)
-{
-	void *ret = mmap(start, length, prot, flags, fd, offset);
-	if (ret == MAP_FAILED) {
-		if (!length)
-			return NULL;
-		release_pack_memory(length, fd);
-		ret = mmap(start, length, prot, flags, fd, offset);
-		if (ret == MAP_FAILED)
-			die("Out of memory? mmap failed: %s", strerror(errno));
-	}
-	return ret;
-}
-
-/*
- * xread() is the same a read(), but it automatically restarts read()
- * operations with a recoverable error (EAGAIN and EINTR). xread()
- * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
- */
-ssize_t xread(int fd, void *buf, size_t len)
-{
-	ssize_t nr;
-	while (1) {
-		nr = read(fd, buf, len);
-		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
-			continue;
-		return nr;
-	}
-}
-
-/*
- * xwrite() is the same a write(), but it automatically restarts write()
- * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
- * GUARANTEE that "len" bytes is written even if the operation is successful.
- */
-ssize_t xwrite(int fd, const void *buf, size_t len)
-{
-	ssize_t nr;
-	while (1) {
-		nr = write(fd, buf, len);
-		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
-			continue;
-		return nr;
-	}
-}
-
-ssize_t read_in_full(int fd, void *buf, size_t count)
-{
-	char *p = buf;
-	ssize_t total = 0;
-
-	while (count > 0) {
-		ssize_t loaded = xread(fd, p, count);
-		if (loaded <= 0)
-			return total ? total : loaded;
-		count -= loaded;
-		p += loaded;
-		total += loaded;
-	}
-
-	return total;
-}
-
-ssize_t write_in_full(int fd, const void *buf, size_t count)
-{
-	const char *p = buf;
-	ssize_t total = 0;
-
-	while (count > 0) {
-		ssize_t written = xwrite(fd, p, count);
-		if (written < 0)
-			return -1;
-		if (!written) {
-			errno = ENOSPC;
-			return -1;
-		}
-		count -= written;
-		p += written;
-		total += written;
-	}
-
-	return total;
-}
-
-int xdup(int fd)
-{
-	int ret = dup(fd);
-	if (ret < 0)
-		die("dup failed: %s", strerror(errno));
-	return ret;
-}
-
-FILE *xfdopen(int fd, const char *mode)
-{
-	FILE *stream = fdopen(fd, mode);
-	if (stream == NULL)
-		die("Out of memory? fdopen failed: %s", strerror(errno));
-	return stream;
-}
-
-int xmkstemp(char *template)
-{
-	int fd;
-
-	fd = mkstemp(template);
-	if (fd < 0)
-		die("Unable to create temporary file: %s", strerror(errno));
-	return fd;
-}
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
new file mode 100644
index 0000000..d69a759
--- /dev/null
+++ b/tools/perf/.gitignore
@@ -0,0 +1,16 @@
+PERF-BUILD-OPTIONS
+PERF-CFLAGS
+PERF-GUI-VARS
+PERF-VERSION-FILE
+perf
+perf-help
+perf-record
+perf-report
+perf-stat
+perf-top
+perf*.1
+perf*.xml
+common-cmds.h
+tags
+TAGS
+cscope*
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
new file mode 100644
index 0000000..5457192
--- /dev/null
+++ b/tools/perf/Documentation/Makefile
@@ -0,0 +1,300 @@
+MAN1_TXT= \
+	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
+		$(wildcard perf-*.txt)) \
+	perf.txt
+MAN5_TXT=
+MAN7_TXT=
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
+MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+
+DOC_HTML=$(MAN_HTML)
+
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
+SP_ARTICLES += $(API_DOCS)
+SP_ARTICLES += technical/api-index
+
+DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+
+DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+prefix?=$(HOME)
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/perf-doc
+pdfdir?=$(prefix)/share/doc/perf-doc
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+man7dir=$(mandir)/man7
+# DESTDIR=
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA =
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+DOC_REF = origin/man
+HTML_REF = origin/html
+
+infodir?=$(prefix)/share/info
+MAKEINFO=makeinfo
+INSTALL_INFO=install-info
+DOCBOOK2X_TEXI=docbook2x-texi
+DBLATEX=dblatex
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+-include ../config.mak.autogen
+-include ../config.mak
+
+#
+# For asciidoc ...
+#	-7.1.2,	no extra settings are needed.
+#	8.0-,	set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+#	1.69.0,		no extra settings are needed?
+#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
+#	1.71.1,		no extra settings are needed?
+#	1.72.0,		set DOCBOOK_XSL_172.
+#	1.73.0-,	set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+	ifdef ASCIIDOC_NO_ROFF
+	# docbook-xsl after 1.72 needs the regular XSL, but will not
+	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
+	ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+	endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+#   http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_ASCIIDOC	= @echo '   ' ASCIIDOC $@;
+	QUIET_XMLTO	= @echo '   ' XMLTO $@;
+	QUIET_DB2TEXI	= @echo '   ' DB2TEXI $@;
+	QUIET_MAKEINFO	= @echo '   ' MAKEINFO $@;
+	QUIET_DBLATEX	= @echo '   ' DBLATEX $@;
+	QUIET_XSLTPROC	= @echo '   ' XSLTPROC $@;
+	QUIET_GEN	= @echo '   ' GEN $@;
+	QUIET_STDERR	= 2> /dev/null
+	QUIET_SUBDIR0	= +@subdir=
+	QUIET_SUBDIR1	= ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			  $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+endif
+endif
+
+all: html man
+
+html: $(DOC_HTML)
+
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
+
+man: man1 man5 man7
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+man7: $(DOC_MAN7)
+
+info: perf.info perfman.info
+
+pdf: user-manual.pdf
+
+install: install-man
+
+install-man: man
+	$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
+#	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
+#	$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+	$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
+#	$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
+#	$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-info: info
+	$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
+	$(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
+	if test -r $(DESTDIR)$(infodir)/dir; then \
+	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+	else \
+	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
+	fi
+
+install-pdf: pdf
+	$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
+	$(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
+
+install-html: html
+	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+
+../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
+
+-include ../PERF-VERSION-FILE
+
+#
+# Determine "include::" file references in asciidoc files.
+#
+doc.dep : $(wildcard *.txt) build-docdep.perl
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+	mv $@+ $@
+
+-include doc.dep
+
+cmds_txt = cmds-ancillaryinterrogators.txt \
+	cmds-ancillarymanipulators.txt \
+	cmds-mainporcelain.txt \
+	cmds-plumbinginterrogators.txt \
+	cmds-plumbingmanipulators.txt \
+	cmds-synchingrepositories.txt \
+	cmds-synchelpers.txt \
+	cmds-purehelpers.txt \
+	cmds-foreignscminterface.txt
+
+$(cmds_txt): cmd-list.made
+
+cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+	$(QUIET_GEN)$(RM) $@ && \
+	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
+	date >$@
+
+clean:
+	$(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
+	$(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
+	$(RM) howto-index.txt howto/*.html doc.dep
+	$(RM) technical/api-*.html technical/api-index.txt
+	$(RM) $(cmds_txt) *.made
+
+$(MAN_HTML): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+%.1 %.5 %.7 : %.xml
+	$(QUIET_XMLTO)$(RM) $@ && \
+	xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+%.xml : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+XSLT = docbook.xsl
+XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
+
+user-manual.html: user-manual.xml
+	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
+
+perf.info: user-manual.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
+
+user-manual.texi: user-manual.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+user-manual.pdf: user-manual.xml
+	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
+	$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
+	mv $@+ $@
+
+perfman.texi: $(MAN_XML) cat-texi.perl
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
+		--to-stdout $(xml) &&) true) > $@++ && \
+	$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+perfman.info: perfman.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+
+$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+	mv $@+ $@
+
+howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
+	mv $@+ $@
+
+$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+
+WEBDOC_DEST = /pub/software/tools/perf/docs
+
+$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+	mv $@+ $@
+
+install-webdoc : html
+	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+
+quick-install: quick-install-man
+
+quick-install-man:
+	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+
+quick-install-html:
+	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
+
+.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf
new file mode 100644
index 0000000..356b23a
--- /dev/null
+++ b/tools/perf/Documentation/asciidoc.conf
@@ -0,0 +1,91 @@
+## linkperf: macro
+#
+# Usage: linkperf:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show PERF link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linkperf-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::perf-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::perf-asciidoc-no-roff[]
+
+ifdef::perf-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::perf-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">perf</refmiscinfo>
+<refmiscinfo class="version">{perf_version}</refmiscinfo>
+<refmiscinfo class="manual">perf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+  <refname>{manname}</refname>
+  <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linkperf-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/tools/perf/Documentation/manpage-1.72.xsl b/tools/perf/Documentation/manpage-1.72.xsl
new file mode 100644
index 0000000..b4d315c
--- /dev/null
+++ b/tools/perf/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+     needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-base.xsl b/tools/perf/Documentation/manpage-base.xsl
new file mode 100644
index 0000000..a264fa61
--- /dev/null
+++ b/tools/perf/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+     git.docbook.backslash and git.docbook.dot params
+     must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB(',
+			      substring-after(@id,'-'),')',
+			      $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>sp&#10;</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB',
+			      substring-after(@arearefs,'-'),
+			      '. ',$git.docbook.backslash,'fR')"/>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-bold-literal.xsl b/tools/perf/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 0000000..608eb5d
--- /dev/null
+++ b/tools/perf/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+     this makes literal text easier to distinguish in manpages
+     viewed on a tty -->
+<xsl:template match="literal">
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fB</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-normal.xsl b/tools/perf/Documentation/manpage-normal.xsl
new file mode 100644
index 0000000..a48f5b1
--- /dev/null
+++ b/tools/perf/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot"	>.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-suppress-sp.xsl b/tools/perf/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 0000000..a63c763
--- /dev/null
+++ b/tools/perf/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles erroneous, inline .sp in manpage output of some
+     versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+     that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+  <xsl:variable name="content">
+    <xsl:apply-templates/>
+  </xsl:variable>
+  <xsl:value-of select="normalize-space($content)"/>
+  <xsl:if test="not(ancestor::authorblurb) and
+                not(ancestor::personblurb)">
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
new file mode 100644
index 0000000..c9dcade
--- /dev/null
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -0,0 +1,29 @@
+perf-annotate(1)
+==============
+
+NAME
+----
+perf-annotate - Read perf.data (created by perf record) and display annotated code
+
+SYNOPSIS
+--------
+[verse]
+'perf annotate' [-i <file> | --input=file] symbol_name
+
+DESCRIPTION
+-----------
+This command reads the input file and displays an annotated version of the
+code. If the object file has debug symbols then the source code will be
+displayed alongside assembly code.
+
+If there is no debug info in the object, then annotated assembly is displayed.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data)
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-help.txt b/tools/perf/Documentation/perf-help.txt
new file mode 100644
index 0000000..5143918
--- /dev/null
+++ b/tools/perf/Documentation/perf-help.txt
@@ -0,0 +1,38 @@
+perf-help(1)
+============
+
+NAME
+----
+perf-help - display help information about perf
+
+SYNOPSIS
+--------
+'perf help' [-a|--all] [COMMAND]
+
+DESCRIPTION
+-----------
+
+With no options and no COMMAND given, the synopsis of the 'perf'
+command and a list of the most commonly used perf commands are printed
+on the standard output.
+
+If the option '--all' or '-a' is given, then all available commands are
+printed on the standard output.
+
+If a perf command is named, a manual page for that command is brought
+up. The 'man' program is used by default for this purpose, but this
+can be overridden by other options or configuration variables.
+
+Note that `perf --help ...` is identical to `perf help ...` because the
+former is internally converted into the latter.
+
+OPTIONS
+-------
+-a::
+--all::
+	Prints all the available commands on the standard output. This
+	option supersedes any other option.
+
+PERF
+----
+Part of the linkperf:perf[1] suite
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
new file mode 100644
index 0000000..8290b94
--- /dev/null
+++ b/tools/perf/Documentation/perf-list.txt
@@ -0,0 +1,25 @@
+perf-list(1)
+============
+
+NAME
+----
+perf-list - List all symbolic event types
+
+SYNOPSIS
+--------
+[verse]
+'perf list'
+
+DESCRIPTION
+-----------
+This command displays the symbolic event types which can be selected in the
+various perf commands with the -e option.
+
+OPTIONS
+-------
+None
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
new file mode 100644
index 0000000..1dbc1ee
--- /dev/null
+++ b/tools/perf/Documentation/perf-record.txt
@@ -0,0 +1,42 @@
+perf-record(1)
+==============
+
+NAME
+----
+perf-record - Run a command and record its profile into perf.data
+
+SYNOPSIS
+--------
+[verse]
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it, into perf.data - without displaying anything.
+
+This file can then be inspected later on, using 'perf report'.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
new file mode 100644
index 0000000..52d3fc6
--- /dev/null
+++ b/tools/perf/Documentation/perf-report.txt
@@ -0,0 +1,26 @@
+perf-report(1)
+==============
+
+NAME
+----
+perf-report - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf report' [-i <file> | --input=file]
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf report.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data)
+
+SEE ALSO
+--------
+linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
new file mode 100644
index 0000000..c368a72
--- /dev/null
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -0,0 +1,66 @@
+perf-stat(1)
+============
+
+NAME
+----
+perf-stat - Run a command and gather performance counter statistics
+
+SYNOPSIS
+--------
+[verse]
+'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers performance counter statistics
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+
+-e::
+--event=::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
+
+-i::
+--inherit::
+        child tasks inherit counters
+-p::
+--pid=<pid>::
+        stat events on existing pid
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+EXAMPLES
+--------
+
+$ perf stat -- make -j
+
+ Performance counter stats for 'make -j':
+
+    8117.370256  task clock ticks     #      11.281 CPU utilization factor
+            678  context switches     #       0.000 M/sec
+            133  CPU migrations       #       0.000 M/sec
+         235724  pagefaults           #       0.029 M/sec
+    24821162526  CPU cycles           #    3057.784 M/sec
+    18687303457  instructions         #    2302.138 M/sec
+      172158895  cache references     #      21.209 M/sec
+       27075259  cache misses         #       3.335 M/sec
+
+ Wall-clock time elapsed:   719.554352 msecs
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
new file mode 100644
index 0000000..539d012
--- /dev/null
+++ b/tools/perf/Documentation/perf-top.txt
@@ -0,0 +1,39 @@
+perf-top(1)
+===========
+
+NAME
+----
+perf-top - Run a command and profile it
+
+SYNOPSIS
+--------
+[verse]
+'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	 hexadecimal event descriptor.
+
+-a::
+        system-wide collection
+
+-l::
+        scale counter values
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
new file mode 100644
index 0000000..69c8325
--- /dev/null
+++ b/tools/perf/Documentation/perf.txt
@@ -0,0 +1,24 @@
+perf(1)
+=======
+
+NAME
+----
+perf - Performance analysis tools for Linux
+
+SYNOPSIS
+--------
+[verse]
+'perf' [--version] [--help] COMMAND [ARGS]
+
+DESCRIPTION
+-----------
+Performance counters for Linux are are a new kernel-based subsystem
+that provide a framework for all things performance analysis. It
+covers hardware level (CPU/PMU, Performance Monitoring Unit) features
+and software features (software counters, tracepoints) as well.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-list[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
new file mode 100644
index 0000000..0cbd5d6
--- /dev/null
+++ b/tools/perf/Makefile
@@ -0,0 +1,929 @@
+# The default target of this Makefile is...
+all::
+
+# Define V=1 to have a more verbose compile.
+#
+# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
+# or vsnprintf() return -1 instead of number of characters which would
+# have been written to the final string if enough space had been available.
+#
+# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
+# when attempting to read from an fopen'ed directory.
+#
+# Define NO_OPENSSL environment variable if you do not have OpenSSL.
+# This also implies MOZILLA_SHA1.
+#
+# Define CURLDIR=/foo/bar if your curl header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+# Define EXPATDIR=/foo/bar if your expat header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
+# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
+#
+# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
+# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
+#
+# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
+# do not support the 'size specifiers' introduced by C99, namely ll, hh,
+# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
+# some C compilers supported these specifiers prior to C99 as an extension.
+#
+# Define NO_STRCASESTR if you don't have strcasestr.
+#
+# Define NO_MEMMEM if you don't have memmem.
+#
+# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
+# If your compiler also does not support long long or does not have
+# strtoull, define NO_STRTOULL.
+#
+# Define NO_SETENV if you don't have setenv in the C library.
+#
+# Define NO_UNSETENV if you don't have unsetenv in the C library.
+#
+# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
+#
+# Define NO_SYS_SELECT_H if you don't have sys/select.h.
+#
+# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
+# Enable it on Windows.  By default, symrefs are still used.
+#
+# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
+# tests.  These tests take up a significant amount of the total test time
+# but are not needed unless you plan to talk to SVN repos.
+#
+# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
+# installed in /sw, but don't want PERF to link against any libraries
+# installed there.  If defined you may specify your own (or Fink's)
+# include directories and library directories by defining CFLAGS
+# and LDFLAGS appropriately.
+#
+# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
+# have DarwinPorts installed in /opt/local, but don't want PERF to
+# link against any libraries installed there.  If defined you may
+# specify your own (or DarwinPort's) include directories and
+# library directories by defining CFLAGS and LDFLAGS appropriately.
+#
+# Define PPC_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine optimized for PowerPC.
+#
+# Define ARM_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine optimized for ARM.
+#
+# Define MOZILLA_SHA1 environment variable when running make to make use of
+# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
+# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
+# choice) has very fast version optimized for i586.
+#
+# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
+#
+# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
+#
+# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
+# Patrick Mauritz).
+#
+# Define NO_MMAP if you want to avoid mmap.
+#
+# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
+#
+# Define NO_PREAD if you have a problem with pread() system call (e.g.
+# cygwin.dll before v1.5.22).
+#
+# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
+# generally faster on your platform than accessing the working directory.
+#
+# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
+# the executable mode bit, but doesn't really do so.
+#
+# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
+#
+# Define NO_SOCKADDR_STORAGE if your platform does not have struct
+# sockaddr_storage.
+#
+# Define NO_ICONV if your libc does not properly support iconv.
+#
+# Define OLD_ICONV if your library has an old iconv(), where the second
+# (input buffer pointer) parameter is declared with type (const char **).
+#
+# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
+#
+# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
+# that tells runtime paths to dynamic libraries;
+# "-Wl,-rpath=/path/lib" is used instead.
+#
+# Define USE_NSEC below if you want perf to care about sub-second file mtimes
+# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
+# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
+# randomly break unless your underlying filesystem supports those sub-second
+# times (my ext3 doesn't).
+#
+# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
+# "st_ctim"
+#
+# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
+# available.  This automatically turns USE_NSEC off.
+#
+# Define USE_STDEV below if you want perf to care about the underlying device
+# change being considered an inode change from the update-index perspective.
+#
+# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
+# field that counts the on-disk footprint in 512-byte blocks.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
+# MakeMaker (e.g. using ActiveState under Cygwin).
+#
+# Define NO_PERL if you do not want Perl scripts or libraries at all.
+#
+# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
+# is a simplified version of the merge sort used in glibc. This is
+# recommended if Git triggers O(n^2) behavior in your platform's qsort().
+#
+# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
+# your external grep (e.g., if your system lacks grep, if its grep is
+# broken, or spawning external process is slower than built-in grep perf has).
+
+PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
+	@$(SHELL_PATH) util/PERF-VERSION-GEN
+-include PERF-VERSION-FILE
+
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
+uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
+uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
+uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
+uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
+
+# CFLAGS and LDFLAGS are for the users to override from the command line.
+
+CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
+LDFLAGS = -lpthread -lrt -lelf
+ALL_CFLAGS = $(CFLAGS)
+ALL_LDFLAGS = $(LDFLAGS)
+STRIP ?= strip
+
+# Among the variables below, these:
+#   perfexecdir
+#   template_dir
+#   mandir
+#   infodir
+#   htmldir
+#   ETC_PERFCONFIG (but not sysconfdir)
+# can be specified as a relative path some/where/else;
+# this is interpreted as relative to $(prefix) and "perf" at
+# runtime figures out where they are based on the path to the executable.
+# This can help installing the suite in a relocatable way.
+
+prefix = $(HOME)
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+mandir = share/man
+infodir = share/info
+perfexecdir = libexec/perf-core
+sharedir = $(prefix)/share
+template_dir = share/perf-core/templates
+htmldir = share/doc/perf-doc
+ifeq ($(prefix),/usr)
+sysconfdir = /etc
+ETC_PERFCONFIG = $(sysconfdir)/perfconfig
+else
+sysconfdir = $(prefix)/etc
+ETC_PERFCONFIG = etc/perfconfig
+endif
+lib = lib
+# DESTDIR=
+
+export prefix bindir sharedir sysconfdir
+
+CC = gcc
+AR = ar
+RM = rm -f
+TAR = tar
+FIND = find
+INSTALL = install
+RPMBUILD = rpmbuild
+PTHREAD_LIBS = -lpthread
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+
+
+### --- END CONFIGURATION SECTION ---
+
+# Those must not be GNU-specific; they are shared with perl/ which may
+# be built by a different compiler. (Note that this is an artifact now
+# but it still might be nice to keep that distinction.)
+BASIC_CFLAGS =
+BASIC_LDFLAGS =
+
+# Guard against environment variables
+BUILTIN_OBJS =
+BUILT_INS =
+COMPAT_CFLAGS =
+COMPAT_OBJS =
+LIB_H =
+LIB_OBJS =
+SCRIPT_PERL =
+SCRIPT_SH =
+TEST_PROGRAMS =
+
+#
+# No scripts right now:
+#
+
+# SCRIPT_SH += perf-am.sh
+
+#
+# No Perl scripts right now:
+#
+
+# SCRIPT_PERL += perf-add--interactive.perl
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
+	  $(patsubst %.perl,%,$(SCRIPT_PERL))
+
+# Empty...
+EXTRA_PROGRAMS =
+
+# ... and all the rest that could be moved out of bindir to perfexecdir
+PROGRAMS += $(EXTRA_PROGRAMS)
+
+#
+# Single 'perf' binary right now:
+#
+PROGRAMS += perf
+
+# List built-in command $C whose implementation cmd_$C() is not in
+# builtin-$C.o but is linked in as part of some other command.
+#
+# None right now:
+#
+# BUILT_INS += perf-init $X
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = perf$X
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+	SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+LIB_FILE=libperf.a
+
+LIB_H += ../../include/linux/perf_counter.h
+LIB_H += perf.h
+LIB_H += util/list.h
+LIB_H += util/rbtree.h
+LIB_H += util/levenshtein.h
+LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/help.h
+LIB_H += util/strbuf.h
+LIB_H += util/string.h
+LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
+LIB_H += util/symbol.h
+LIB_H += util/color.h
+
+LIB_OBJS += util/abspath.o
+LIB_OBJS += util/alias.o
+LIB_OBJS += util/config.o
+LIB_OBJS += util/ctype.o
+LIB_OBJS += util/environment.o
+LIB_OBJS += util/exec_cmd.o
+LIB_OBJS += util/help.o
+LIB_OBJS += util/levenshtein.o
+LIB_OBJS += util/parse-options.o
+LIB_OBJS += util/parse-events.o
+LIB_OBJS += util/path.o
+LIB_OBJS += util/rbtree.o
+LIB_OBJS += util/run-command.o
+LIB_OBJS += util/quote.o
+LIB_OBJS += util/strbuf.o
+LIB_OBJS += util/string.o
+LIB_OBJS += util/usage.o
+LIB_OBJS += util/wrapper.o
+LIB_OBJS += util/sigchain.o
+LIB_OBJS += util/symbol.o
+LIB_OBJS += util/color.o
+LIB_OBJS += util/pager.o
+
+BUILTIN_OBJS += builtin-annotate.o
+BUILTIN_OBJS += builtin-help.o
+BUILTIN_OBJS += builtin-list.o
+BUILTIN_OBJS += builtin-record.o
+BUILTIN_OBJS += builtin-report.o
+BUILTIN_OBJS += builtin-stat.o
+BUILTIN_OBJS += builtin-top.o
+
+PERFLIBS = $(LIB_FILE)
+EXTLIBS =
+
+#
+# Platform specific tweaks
+#
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain.  If
+# we had "elif" things would have been much nicer...
+
+-include config.mak.autogen
+-include config.mak
+
+ifeq ($(uname_S),Darwin)
+	ifndef NO_FINK
+		ifeq ($(shell test -d /sw/lib && echo y),y)
+			BASIC_CFLAGS += -I/sw/include
+			BASIC_LDFLAGS += -L/sw/lib
+		endif
+	endif
+	ifndef NO_DARWIN_PORTS
+		ifeq ($(shell test -d /opt/local/lib && echo y),y)
+			BASIC_CFLAGS += -I/opt/local/include
+			BASIC_LDFLAGS += -L/opt/local/lib
+		endif
+	endif
+	PTHREAD_LIBS =
+endif
+
+ifndef CC_LD_DYNPATH
+	ifdef NO_R_TO_GCC_LINKER
+		# Some gcc does not accept and pass -R to the linker to specify
+		# the runtime dynamic library path.
+		CC_LD_DYNPATH = -Wl,-rpath,
+	else
+		CC_LD_DYNPATH = -R
+	endif
+endif
+
+ifdef ZLIB_PATH
+	BASIC_CFLAGS += -I$(ZLIB_PATH)/include
+	EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib)
+endif
+EXTLIBS += -lz
+
+ifdef NEEDS_SOCKET
+	EXTLIBS += -lsocket
+endif
+ifdef NEEDS_NSL
+	EXTLIBS += -lnsl
+endif
+ifdef NO_D_TYPE_IN_DIRENT
+	BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
+endif
+ifdef NO_D_INO_IN_DIRENT
+	BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
+endif
+ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+	BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
+endif
+ifdef USE_NSEC
+	BASIC_CFLAGS += -DUSE_NSEC
+endif
+ifdef USE_ST_TIMESPEC
+	BASIC_CFLAGS += -DUSE_ST_TIMESPEC
+endif
+ifdef NO_NSEC
+	BASIC_CFLAGS += -DNO_NSEC
+endif
+ifdef NO_C99_FORMAT
+	BASIC_CFLAGS += -DNO_C99_FORMAT
+endif
+ifdef SNPRINTF_RETURNS_BOGUS
+	COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
+	COMPAT_OBJS += compat/snprintf.o
+endif
+ifdef FREAD_READS_DIRECTORIES
+	COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
+	COMPAT_OBJS += compat/fopen.o
+endif
+ifdef NO_SYMLINK_HEAD
+	BASIC_CFLAGS += -DNO_SYMLINK_HEAD
+endif
+ifdef NO_STRCASESTR
+	COMPAT_CFLAGS += -DNO_STRCASESTR
+	COMPAT_OBJS += compat/strcasestr.o
+endif
+ifdef NO_STRTOUMAX
+	COMPAT_CFLAGS += -DNO_STRTOUMAX
+	COMPAT_OBJS += compat/strtoumax.o
+endif
+ifdef NO_STRTOULL
+	COMPAT_CFLAGS += -DNO_STRTOULL
+endif
+ifdef NO_SETENV
+	COMPAT_CFLAGS += -DNO_SETENV
+	COMPAT_OBJS += compat/setenv.o
+endif
+ifdef NO_MKDTEMP
+	COMPAT_CFLAGS += -DNO_MKDTEMP
+	COMPAT_OBJS += compat/mkdtemp.o
+endif
+ifdef NO_UNSETENV
+	COMPAT_CFLAGS += -DNO_UNSETENV
+	COMPAT_OBJS += compat/unsetenv.o
+endif
+ifdef NO_SYS_SELECT_H
+	BASIC_CFLAGS += -DNO_SYS_SELECT_H
+endif
+ifdef NO_MMAP
+	COMPAT_CFLAGS += -DNO_MMAP
+	COMPAT_OBJS += compat/mmap.o
+else
+	ifdef USE_WIN32_MMAP
+		COMPAT_CFLAGS += -DUSE_WIN32_MMAP
+		COMPAT_OBJS += compat/win32mmap.o
+	endif
+endif
+ifdef NO_PREAD
+	COMPAT_CFLAGS += -DNO_PREAD
+	COMPAT_OBJS += compat/pread.o
+endif
+ifdef NO_FAST_WORKING_DIRECTORY
+	BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
+endif
+ifdef NO_TRUSTABLE_FILEMODE
+	BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
+endif
+ifdef NO_IPV6
+	BASIC_CFLAGS += -DNO_IPV6
+endif
+ifdef NO_UINTMAX_T
+	BASIC_CFLAGS += -Duintmax_t=uint32_t
+endif
+ifdef NO_SOCKADDR_STORAGE
+ifdef NO_IPV6
+	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
+else
+	BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
+endif
+endif
+ifdef NO_INET_NTOP
+	LIB_OBJS += compat/inet_ntop.o
+endif
+ifdef NO_INET_PTON
+	LIB_OBJS += compat/inet_pton.o
+endif
+
+ifdef NO_ICONV
+	BASIC_CFLAGS += -DNO_ICONV
+endif
+
+ifdef OLD_ICONV
+	BASIC_CFLAGS += -DOLD_ICONV
+endif
+
+ifdef NO_DEFLATE_BOUND
+	BASIC_CFLAGS += -DNO_DEFLATE_BOUND
+endif
+
+ifdef PPC_SHA1
+	SHA1_HEADER = "ppc/sha1.h"
+	LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
+else
+ifdef ARM_SHA1
+	SHA1_HEADER = "arm/sha1.h"
+	LIB_OBJS += arm/sha1.o arm/sha1_arm.o
+else
+ifdef MOZILLA_SHA1
+	SHA1_HEADER = "mozilla-sha1/sha1.h"
+	LIB_OBJS += mozilla-sha1/sha1.o
+else
+	SHA1_HEADER = <openssl/sha.h>
+	EXTLIBS += $(LIB_4_CRYPTO)
+endif
+endif
+endif
+ifdef NO_PERL_MAKEMAKER
+	export NO_PERL_MAKEMAKER
+endif
+ifdef NO_HSTRERROR
+	COMPAT_CFLAGS += -DNO_HSTRERROR
+	COMPAT_OBJS += compat/hstrerror.o
+endif
+ifdef NO_MEMMEM
+	COMPAT_CFLAGS += -DNO_MEMMEM
+	COMPAT_OBJS += compat/memmem.o
+endif
+ifdef INTERNAL_QSORT
+	COMPAT_CFLAGS += -DINTERNAL_QSORT
+	COMPAT_OBJS += compat/qsort.o
+endif
+ifdef RUNTIME_PREFIX
+	COMPAT_CFLAGS += -DRUNTIME_PREFIX
+endif
+
+ifdef DIR_HAS_BSD_GROUP_SEMANTICS
+	COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
+endif
+ifdef NO_EXTERNAL_GREP
+	BASIC_CFLAGS += -DNO_EXTERNAL_GREP
+endif
+
+ifeq ($(PERL_PATH),)
+NO_PERL=NoThanks
+endif
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifndef V
+	QUIET_CC       = @echo '   ' CC $@;
+	QUIET_AR       = @echo '   ' AR $@;
+	QUIET_LINK     = @echo '   ' LINK $@;
+	QUIET_BUILT_IN = @echo '   ' BUILTIN $@;
+	QUIET_GEN      = @echo '   ' GEN $@;
+	QUIET_SUBDIR0  = +@subdir=
+	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+			 $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+	export QUIET_GEN
+	export QUIET_BUILT_IN
+endif
+endif
+
+ifdef ASCIIDOC8
+	export ASCIIDOC8
+endif
+
+# Shell quote (do not use $(call) to accommodate ancient setups);
+
+SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
+ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+mandir_SQ = $(subst ','\'',$(mandir))
+infodir_SQ = $(subst ','\'',$(infodir))
+perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+template_dir_SQ = $(subst ','\'',$(template_dir))
+htmldir_SQ = $(subst ','\'',$(htmldir))
+prefix_SQ = $(subst ','\'',$(prefix))
+
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
+
+LIBS = $(PERFLIBS) $(EXTLIBS)
+
+BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
+	$(COMPAT_CFLAGS)
+LIB_OBJS += $(COMPAT_OBJS)
+
+ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_LDFLAGS += $(BASIC_LDFLAGS)
+
+export TAR INSTALL DESTDIR SHELL_PATH
+
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS
+ifneq (,$X)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
+endif
+
+all::
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+	@$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) perf$X
+	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X
+
+perf.o: perf.c common-cmds.h PERF-CFLAGS
+	$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		$(ALL_CFLAGS) -c $(filter %.c,$^)
+
+perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \
+		$(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
+
+builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
+		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(BUILT_INS): perf$X
+	$(QUIET_BUILT_IN)$(RM) $@ && \
+	ln perf$X $@ 2>/dev/null || \
+	ln -s perf$X $@ 2>/dev/null || \
+	cp perf$X $@
+
+common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+common-cmds.h: $(wildcard Documentation/perf-*.txt)
+	$(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
+	$(QUIET_GEN)$(RM) $@ $@+ && \
+	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
+	    -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
+	    -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
+	    -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
+	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
+	    $@.sh >$@+ && \
+	chmod +x $@+ && \
+	mv $@+ $@
+
+configure: configure.ac
+	$(QUIET_GEN)$(RM) $@ $<+ && \
+	sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
+	    $< > $<+ && \
+	autoconf -o $@ $<+ && \
+	$(RM) $<+
+
+# These can record PERF_VERSION
+perf.o perf.spec \
+	$(patsubst %.sh,%,$(SCRIPT_SH)) \
+	$(patsubst %.perl,%,$(SCRIPT_PERL)) \
+	: PERF-VERSION-FILE
+
+%.o: %.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
+%.s: %.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
+%.o: %.S
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $<
+
+util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
+		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
+		'-DBINDIR="$(bindir_relative_SQ)"' \
+		'-DPREFIX="$(prefix_SQ)"' \
+		$<
+
+builtin-init-db.o: builtin-init-db.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
+
+util/config.o: util/config.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+perf-%$X: %.o $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+builtin-revert.o wt-status.o: wt-status.h
+
+$(LIB_FILE): $(LIB_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+doc:
+	$(MAKE) -C Documentation all
+
+man:
+	$(MAKE) -C Documentation man
+
+html:
+	$(MAKE) -C Documentation html
+
+info:
+	$(MAKE) -C Documentation info
+
+pdf:
+	$(MAKE) -C Documentation pdf
+
+TAGS:
+	$(RM) TAGS
+	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
+
+tags:
+	$(RM) tags
+	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+
+cscope:
+	$(RM) cscope*
+	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
+             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
+
+PERF-CFLAGS: .FORCE-PERF-CFLAGS
+	@FLAGS='$(TRACK_CFLAGS)'; \
+	    if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \
+		echo 1>&2 "    * new build flags or prefix"; \
+		echo "$$FLAGS" >PERF-CFLAGS; \
+            fi
+
+# We need to apply sq twice, once to protect from the shell
+# that runs PERF-BUILD-OPTIONS, and then again to protect it
+# and the first level quoting from the shell that runs "echo".
+PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
+	@echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
+	@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
+	@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
+	@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
+
+### Testing rules
+
+#
+# None right now:
+#
+# TEST_PROGRAMS += test-something$X
+
+all:: $(TEST_PROGRAMS)
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+export NO_SVN_TESTS
+
+check: common-cmds.h
+	if sparse; \
+	then \
+		for i in *.c */*.c; \
+		do \
+			sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+		done; \
+	else \
+		echo 2>&1 "Did you mean 'make test'?"; \
+		exit 1; \
+	fi
+
+remove-dashes:
+	./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
+
+### Installation rules
+
+ifneq ($(filter /%,$(firstword $(template_dir))),)
+template_instdir = $(template_dir)
+else
+template_instdir = $(prefix)/$(template_dir)
+endif
+export template_instdir
+
+ifneq ($(filter /%,$(firstword $(perfexecdir))),)
+perfexec_instdir = $(perfexecdir)
+else
+perfexec_instdir = $(prefix)/$(perfexecdir)
+endif
+perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+export perfexec_instdir
+
+install: all
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
+ifdef BUILT_INS
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifneq (,$X)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
+endif
+endif
+
+install-doc:
+	$(MAKE) -C Documentation install
+
+install-man:
+	$(MAKE) -C Documentation install-man
+
+install-html:
+	$(MAKE) -C Documentation install-html
+
+install-info:
+	$(MAKE) -C Documentation install-info
+
+install-pdf:
+	$(MAKE) -C Documentation install-pdf
+
+quick-install-doc:
+	$(MAKE) -C Documentation quick-install
+
+quick-install-man:
+	$(MAKE) -C Documentation quick-install-man
+
+quick-install-html:
+	$(MAKE) -C Documentation quick-install-html
+
+
+### Maintainer's dist rules
+#
+# None right now
+#
+#
+# perf.spec: perf.spec.in
+#	sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
+#	mv $@+ $@
+#
+# PERF_TARNAME=perf-$(PERF_VERSION)
+# dist: perf.spec perf-archive$(X) configure
+#	./perf-archive --format=tar \
+#		--prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
+#	@mkdir -p $(PERF_TARNAME)
+#	@cp perf.spec configure $(PERF_TARNAME)
+#	@echo $(PERF_VERSION) > $(PERF_TARNAME)/version
+#	$(TAR) rf $(PERF_TARNAME).tar \
+#		$(PERF_TARNAME)/perf.spec \
+#		$(PERF_TARNAME)/configure \
+#		$(PERF_TARNAME)/version
+#	@$(RM) -r $(PERF_TARNAME)
+#	gzip -f -9 $(PERF_TARNAME).tar
+#
+# htmldocs = perf-htmldocs-$(PERF_VERSION)
+# manpages = perf-manpages-$(PERF_VERSION)
+# dist-doc:
+#	$(RM) -r .doc-tmp-dir
+#	mkdir .doc-tmp-dir
+#	$(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
+#	cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
+#	gzip -n -9 -f $(htmldocs).tar
+#	:
+#	$(RM) -r .doc-tmp-dir
+#	mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
+#	$(MAKE) -C Documentation DESTDIR=./ \
+#		man1dir=../.doc-tmp-dir/man1 \
+#		man5dir=../.doc-tmp-dir/man5 \
+#		man7dir=../.doc-tmp-dir/man7 \
+#		install
+#	cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
+#	gzip -n -9 -f $(manpages).tar
+#	$(RM) -r .doc-tmp-dir
+#
+# rpm: dist
+#	$(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
+
+### Cleaning rules
+
+distclean: clean
+#	$(RM) configure
+
+clean:
+	$(RM) *.o */*.o $(LIB_FILE)
+	$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
+	$(RM) $(TEST_PROGRAMS)
+	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
+	$(RM) -r autom4te.cache
+	$(RM) config.log config.mak.autogen config.mak.append config.status config.cache
+	$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
+	$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
+	$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
+	$(MAKE) -C Documentation/ clean
+	$(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS
+
+.PHONY: all install clean strip
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: .FORCE-PERF-BUILD-OPTIONS
+
+### Make sure built-ins do not have dups and listed in perf.c
+#
+check-builtins::
+	./check-builtins.sh
+
+### Test suite coverage testing
+#
+# None right now
+#
+# .PHONY: coverage coverage-clean coverage-build coverage-report
+#
+# coverage:
+#	$(MAKE) coverage-build
+#	$(MAKE) coverage-report
+#
+# coverage-clean:
+#	rm -f *.gcda *.gcno
+#
+# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
+# COVERAGE_LDFLAGS = $(CFLAGS)  -O0 -lgcov
+#
+# coverage-build: coverage-clean
+#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
+#	$(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
+#		-j1 test
+#
+# coverage-report:
+#	gcov -b *.c */*.c
+#	grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
+#		| sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
+#		| tee coverage-untested-functions
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
new file mode 100644
index 0000000..116a397
--- /dev/null
+++ b/tools/perf/builtin-annotate.c
@@ -0,0 +1,1355 @@
+/*
+ * builtin-annotate.c
+ *
+ * Builtin annotate command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include "util/list.h"
+#include "util/cache.h"
+#include "util/rbtree.h"
+#include "util/symbol.h"
+#include "util/string.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
+static char		const *input_name = "perf.data";
+static char		*vmlinux = NULL;
+
+static char		default_sort_order[] = "comm,symbol";
+static char		*sort_order = default_sort_order;
+
+static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
+
+static int		dump_trace = 0;
+#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
+
+static int		verbose;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+};
+
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	char comm[16];
+};
+
+struct fork_event {
+	struct perf_event_header header;
+	__u32 pid, ppid;
+};
+
+struct period_event {
+	struct perf_event_header header;
+	__u64 time;
+	__u64 id;
+	__u64 sample_period;
+};
+
+typedef union event_union {
+	struct perf_event_header	header;
+	struct ip_event			ip;
+	struct mmap_event		mmap;
+	struct comm_event		comm;
+	struct fork_event		fork;
+	struct period_event		period;
+} event_t;
+
+static LIST_HEAD(dsos);
+static struct dso *kernel_dso;
+static struct dso *vdso;
+
+
+static void dsos__add(struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos);
+}
+
+static struct dso *dsos__find(const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		if (strcmp(pos->name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+static struct dso *dsos__findnew(const char *name)
+{
+	struct dso *dso = dsos__find(name);
+	int nr;
+
+	if (dso)
+		return dso;
+
+	dso = dso__new(name, 0);
+	if (!dso)
+		goto out_delete_dso;
+
+	nr = dso__load(dso, NULL, verbose);
+	if (nr < 0) {
+		if (verbose)
+			fprintf(stderr, "Failed to open: %s\n", name);
+		goto out_delete_dso;
+	}
+	if (!nr && verbose) {
+		fprintf(stderr,
+		"No symbols found in: %s, maybe install a debug package?\n",
+				name);
+	}
+
+	dsos__add(dso);
+
+	return dso;
+
+out_delete_dso:
+	dso__delete(dso);
+	return NULL;
+}
+
+static void dsos__fprintf(FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		dso__fprintf(pos, fp);
+}
+
+static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
+{
+	return dso__find_symbol(kernel_dso, ip);
+}
+
+static int load_kernel(void)
+{
+	int err;
+
+	kernel_dso = dso__new("[kernel]", 0);
+	if (!kernel_dso)
+		return -1;
+
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
+	if (err) {
+		dso__delete(kernel_dso);
+		kernel_dso = NULL;
+	} else
+		dsos__add(kernel_dso);
+
+	vdso = dso__new("[vdso]", 0);
+	if (!vdso)
+		return -1;
+
+	vdso->find_symbol = vdso__find_symbol;
+
+	dsos__add(vdso);
+
+	return err;
+}
+
+struct map {
+	struct list_head node;
+	uint64_t	 start;
+	uint64_t	 end;
+	uint64_t	 pgoff;
+	uint64_t	 (*map_ip)(struct map *, uint64_t);
+	struct dso	 *dso;
+};
+
+static uint64_t map__map_ip(struct map *map, uint64_t ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
+{
+	return ip;
+}
+
+static struct map *map__new(struct mmap_event *event)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		const char *filename = event->filename;
+
+		self->start = event->start;
+		self->end   = event->start + event->len;
+		self->pgoff = event->pgoff;
+
+		self->dso = dsos__findnew(filename);
+		if (self->dso == NULL)
+			goto out_delete;
+
+		if (self->dso == vdso)
+			self->map_ip = vdso__map_ip;
+		else
+			self->map_ip = map__map_ip;
+	}
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static struct map *map__clone(struct map *self)
+{
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
+}
+
+static int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
+
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+
+struct thread {
+	struct rb_node	 rb_node;
+	struct list_head maps;
+	pid_t		 pid;
+	char		 *comm;
+};
+
+static struct thread *thread__new(pid_t pid)
+{
+	struct thread *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->pid = pid;
+		self->comm = malloc(32);
+		if (self->comm)
+			snprintf(self->comm, 32, ":%d", self->pid);
+		INIT_LIST_HEAD(&self->maps);
+	}
+
+	return self;
+}
+
+static int thread__set_comm(struct thread *self, const char *comm)
+{
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(comm);
+	return self->comm ? 0 : -ENOMEM;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+
+static struct rb_root threads;
+static struct thread *last_match;
+
+static struct thread *threads__findnew(pid_t pid)
+{
+	struct rb_node **p = &threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->pid == pid) {
+			last_match = th;
+			return th;
+		}
+
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
+	}
+
+	return th;
+}
+
+static void thread__insert_map(struct thread *self, struct map *map)
+{
+	struct map *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
+		if (map__overlap(pos, map)) {
+			list_del_init(&pos->node);
+			/* XXX leaks dsos */
+			free(pos);
+		}
+	}
+
+	list_add_tail(&map->node, &self->maps);
+}
+
+static int thread__fork(struct thread *self, struct thread *parent)
+{
+	struct map *map;
+
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(parent->comm);
+	if (!self->comm)
+		return -ENOMEM;
+
+	list_for_each_entry(map, &parent->maps, node) {
+		struct map *new = map__clone(map);
+		if (!new)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+
+	return 0;
+}
+
+static struct map *thread__find_map(struct thread *self, uint64_t ip)
+{
+	struct map *pos;
+
+	if (self == NULL)
+		return NULL;
+
+	list_for_each_entry(pos, &self->maps, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static size_t threads__fprintf(FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+static struct rb_root hist;
+
+struct hist_entry {
+	struct rb_node	 rb_node;
+
+	struct thread	 *thread;
+	struct map	 *map;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	uint64_t	 ip;
+	char		 level;
+
+	uint32_t	 count;
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	char *header;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *);
+};
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static size_t
+sort__thread_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
+}
+
+static struct sort_entry sort_thread = {
+	.header = "         Command:  Pid",
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+};
+
+/* --sort comm */
+
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r) {
+		if (!comm_l && !comm_r)
+			return 0;
+		else if (!comm_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(comm_l, comm_r);
+}
+
+static size_t
+sort__comm_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s", self->thread->comm);
+}
+
+static struct sort_entry sort_comm = {
+	.header		= "         Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
+};
+
+/* --sort dso */
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->dso;
+	struct dso *dso_r = right->dso;
+
+	if (!dso_l || !dso_r) {
+		if (!dso_l && !dso_r)
+			return 0;
+		else if (!dso_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(dso_l->name, dso_r->name);
+}
+
+static size_t
+sort__dso_print(FILE *fp, struct hist_entry *self)
+{
+	if (self->dso)
+		return fprintf(fp, "%-25s", self->dso->name);
+
+	return fprintf(fp, "%016llx         ", (__u64)self->ip);
+}
+
+static struct sort_entry sort_dso = {
+	.header = "Shared Object            ",
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+};
+
+/* --sort symbol */
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t ip_l, ip_r;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+static size_t
+sort__sym_print(FILE *fp, struct hist_entry *self)
+{
+	size_t ret = 0;
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
+
+	if (self->sym) {
+		ret += fprintf(fp, "[%c] %s",
+			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
+	} else {
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+	}
+
+	return ret;
+}
+
+static struct sort_entry sort_sym = {
+	.header = "Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+static int sort__need_collapse = 0;
+
+struct sort_dimension {
+	char			*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+};
+
+static LIST_HEAD(hist_entry__sort_list);
+
+static int sort_dimension__add(char *tok)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+
+		return 0;
+	}
+
+	return -ESRCH;
+}
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+/*
+ * collect histogram counts
+ */
+static void hist_hit(struct hist_entry *he, uint64_t ip)
+{
+	unsigned int sym_size, offset;
+	struct symbol *sym = he->sym;
+
+	he->count++;
+
+	if (!sym || !sym->hist)
+		return;
+
+	sym_size = sym->end - sym->start;
+	offset = ip - sym->start;
+
+	if (offset >= sym_size)
+		return;
+
+	sym->hist_sum++;
+	sym->hist[offset]++;
+
+	if (verbose >= 3)
+		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
+			(void *)he->sym->start,
+			he->sym->name,
+			(void *)ip, ip - he->sym->start,
+			sym->hist[offset]);
+}
+
+static int
+hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+		struct symbol *sym, uint64_t ip, char level)
+{
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= thread,
+		.map	= map,
+		.dso	= dso,
+		.sym	= sym,
+		.ip	= ip,
+		.level	= level,
+		.count	= 1,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			hist_hit(he, ip);
+
+			return 0;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return -ENOMEM;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+
+	return 0;
+}
+
+static void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+static struct rb_root collapse_hists;
+
+static void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+static void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+static struct rb_root output_hists;
+
+static void output__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &output_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (he->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
+}
+
+static void output__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+	struct rb_root *tree = &hist;
+
+	if (sort__need_collapse)
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, tree);
+		output__insert_entry(n);
+	}
+}
+
+static void register_idle_thread(void)
+{
+	struct thread *thread = threads__findnew(0);
+
+	if (thread == NULL ||
+			thread__set_comm(thread, "[idle]")) {
+		fprintf(stderr, "problem inserting idle task.\n");
+		exit(-1);
+	}
+}
+
+static unsigned long total = 0,
+		     total_mmap = 0,
+		     total_comm = 0,
+		     total_fork = 0,
+		     total_unknown = 0;
+
+static int
+process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	char level;
+	int show = 0;
+	struct dso *dso = NULL;
+	struct thread *thread = threads__findnew(event->ip.pid);
+	uint64_t ip = event->ip.ip;
+	struct map *map = NULL;
+
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->header.misc,
+		event->ip.pid,
+		(void *)(long)ip);
+
+	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (thread == NULL) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+		show = SHOW_KERNEL;
+		level = 'k';
+
+		dso = kernel_dso;
+
+		dprintf(" ...... dso: %s\n", dso->name);
+
+	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+
+		show = SHOW_USER;
+		level = '.';
+
+		map = thread__find_map(thread, ip);
+		if (map != NULL) {
+			ip = map->map_ip(map, ip);
+			dso = map->dso;
+		} else {
+			/*
+			 * If this is outside of all known maps,
+			 * and is a negative address, try to look it
+			 * up in the kernel dso, as it might be a
+			 * vsyscall (which executes in user-mode):
+			 */
+			if ((long long)ip < 0)
+				dso = kernel_dso;
+		}
+		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+
+	} else {
+		show = SHOW_HV;
+		level = 'H';
+		dprintf(" ...... dso: [hypervisor]\n");
+	}
+
+	if (show & show_mask) {
+		struct symbol *sym = NULL;
+
+		if (dso)
+			sym = dso->find_symbol(dso, ip);
+
+		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+			fprintf(stderr,
+		"problem incrementing symbol count, skipping event\n");
+			return -1;
+		}
+	}
+	total++;
+
+	return 0;
+}
+
+static int
+process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->mmap.pid);
+	struct map *map = map__new(&event->mmap);
+
+	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->mmap.pid,
+		(void *)(long)event->mmap.start,
+		(void *)(long)event->mmap.len,
+		(void *)(long)event->mmap.pgoff,
+		event->mmap.filename);
+
+	if (thread == NULL || map == NULL) {
+		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		return 0;
+	}
+
+	thread__insert_map(thread, map);
+	total_mmap++;
+
+	return 0;
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->comm.pid);
+
+	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->comm.comm, event->comm.pid);
+
+	if (thread == NULL ||
+	    thread__set_comm(thread, event->comm.comm)) {
+		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		return -1;
+	}
+	total_comm++;
+
+	return 0;
+}
+
+static int
+process_fork_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->fork.pid);
+	struct thread *parent = threads__findnew(event->fork.ppid);
+
+	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->fork.pid, event->fork.ppid);
+
+	if (!thread || !parent || thread__fork(thread, parent)) {
+		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		return -1;
+	}
+	total_fork++;
+
+	return 0;
+}
+
+static int
+process_period_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->period.time,
+		event->period.id,
+		event->period.sample_period);
+
+	return 0;
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
+		return process_overflow_event(event, offset, head);
+
+	switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		return process_mmap_event(event, offset, head);
+
+	case PERF_EVENT_COMM:
+		return process_comm_event(event, offset, head);
+
+	case PERF_EVENT_FORK:
+		return process_fork_event(event, offset, head);
+
+	case PERF_EVENT_PERIOD:
+		return process_period_event(event, offset, head);
+	/*
+	 * We dont process them right now but they are fine:
+	 */
+
+	case PERF_EVENT_THROTTLE:
+	case PERF_EVENT_UNTHROTTLE:
+		return 0;
+
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len)
+{
+	char *line = NULL, *tmp, *tmp2;
+	unsigned int offset;
+	size_t line_len;
+	__u64 line_ip;
+	int ret;
+	char *c;
+
+	if (getline(&line, &line_len, file) < 0)
+		return -1;
+	if (!line)
+		return -1;
+
+	c = strchr(line, '\n');
+	if (c)
+		*c = 0;
+
+	line_ip = -1;
+	offset = 0;
+	ret = -2;
+
+	/*
+	 * Strip leading spaces:
+	 */
+	tmp = line;
+	while (*tmp) {
+		if (*tmp != ' ')
+			break;
+		tmp++;
+	}
+
+	if (*tmp) {
+		/*
+		 * Parse hexa addresses followed by ':'
+		 */
+		line_ip = strtoull(tmp, &tmp2, 16);
+		if (*tmp2 != ':')
+			line_ip = -1;
+	}
+
+	if (line_ip != -1) {
+		unsigned int hits = 0;
+		double percent = 0.0;
+		char *color = PERF_COLOR_NORMAL;
+
+		offset = line_ip - start;
+		if (offset < len)
+			hits = sym->hist[offset];
+
+		if (sym->hist_sum)
+			percent = 100.0 * hits / sym->hist_sum;
+
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (percent >= 5.0)
+			color = PERF_COLOR_RED;
+		else {
+			if (percent > 0.5)
+				color = PERF_COLOR_GREEN;
+		}
+
+		color_fprintf(stdout, color, " %7.2f", percent);
+		printf(" :	");
+		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line);
+	} else {
+		if (!*line)
+			printf("         :\n");
+		else
+			printf("         :	%s\n", line);
+	}
+
+	return 0;
+}
+
+static void annotate_sym(struct dso *dso, struct symbol *sym)
+{
+	char *filename = dso->name;
+	uint64_t start, end, len;
+	char command[PATH_MAX*2];
+	FILE *file;
+
+	if (!filename)
+		return;
+	if (dso == kernel_dso)
+		filename = vmlinux;
+
+	printf("\n------------------------------------------------\n");
+	printf(" Percent |	Source code & Disassembly of %s\n", filename);
+	printf("------------------------------------------------\n");
+
+	if (verbose >= 2)
+		printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+
+	start = sym->obj_start;
+	if (!start)
+		start = sym->start;
+
+	end = start + sym->end - sym->start + 1;
+	len = sym->end - sym->start;
+
+	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename);
+
+	if (verbose >= 3)
+		printf("doing: %s\n", command);
+
+	file = popen(command, "r");
+	if (!file)
+		return;
+
+	while (!feof(file)) {
+		if (parse_line(file, sym, start, len) < 0)
+			break;
+	}
+
+	pclose(file);
+}
+
+static void find_annotations(void)
+{
+	struct rb_node *nd;
+	struct dso *dso;
+	int count = 0;
+
+	list_for_each_entry(dso, &dsos, node) {
+
+		for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
+			struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+
+			if (sym->hist) {
+				annotate_sym(dso, sym);
+				count++;
+			}
+		}
+	}
+
+	if (!count)
+		printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
+}
+
+static int __cmd_annotate(void)
+{
+	int ret, rc = EXIT_FAILURE;
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	event_t *event;
+	uint32_t size;
+	char *buf;
+
+	register_idle_thread();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kernel() < 0) {
+		perror("failed to load kernel symbols");
+		return EXIT_FAILURE;
+	}
+
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dprintf("%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (!size || process_event(event, offset, head) < 0) {
+
+		dprintf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
+
+		total_unknown++;
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	rc = EXIT_SUCCESS;
+	close(input);
+
+	dprintf("      IP events: %10ld\n", total);
+	dprintf("    mmap events: %10ld\n", total_mmap);
+	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf("    fork events: %10ld\n", total_fork);
+	dprintf(" unknown events: %10ld\n", total_unknown);
+
+	if (dump_trace)
+		return 0;
+
+	if (verbose >= 3)
+		threads__fprintf(stdout);
+
+	if (verbose >= 2)
+		dsos__fprintf(stdout);
+
+	collapse__resort();
+	output__resort();
+
+	find_annotations();
+
+	return rc;
+}
+
+static const char * const annotate_usage[] = {
+	"perf annotate [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_STRING('s', "symbol", &sym_hist_filter, "file",
+		    "symbol to annotate"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+	OPT_END()
+};
+
+static void setup_sorting(void)
+{
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok) < 0) {
+			error("Unknown --sort key: `%s'", tok);
+			usage_with_options(annotate_usage, options);
+		}
+	}
+
+	free(str);
+}
+
+int cmd_annotate(int argc, const char **argv, const char *prefix)
+{
+	symbol__init();
+
+	page_size = getpagesize();
+
+	argc = parse_options(argc, argv, options, annotate_usage, 0);
+
+	setup_sorting();
+
+	if (argc) {
+		/*
+		 * Special case: if there's an argument left then assume tha
+		 * it's a symbol filter:
+		 */
+		if (argc > 1)
+			usage_with_options(annotate_usage, options);
+
+		sym_hist_filter = argv[0];
+	}
+
+	if (!sym_hist_filter)
+		usage_with_options(annotate_usage, options);
+
+	setup_pager();
+
+	return __cmd_annotate();
+}
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
new file mode 100644
index 0000000..0f32dc3
--- /dev/null
+++ b/tools/perf/builtin-help.c
@@ -0,0 +1,461 @@
+/*
+ * builtin-help.c
+ *
+ * Builtin help command
+ */
+#include "util/cache.h"
+#include "builtin.h"
+#include "util/exec_cmd.h"
+#include "common-cmds.h"
+#include "util/parse-options.h"
+#include "util/run-command.h"
+#include "util/help.h"
+
+static struct man_viewer_list {
+	struct man_viewer_list *next;
+	char name[FLEX_ARRAY];
+} *man_viewer_list;
+
+static struct man_viewer_info_list {
+	struct man_viewer_info_list *next;
+	const char *info;
+	char name[FLEX_ARRAY];
+} *man_viewer_info_list;
+
+enum help_format {
+	HELP_FORMAT_MAN,
+	HELP_FORMAT_INFO,
+	HELP_FORMAT_WEB,
+};
+
+static int show_all = 0;
+static enum help_format help_format = HELP_FORMAT_MAN;
+static struct option builtin_help_options[] = {
+	OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
+	OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
+	OPT_SET_INT('w', "web", &help_format, "show manual in web browser",
+			HELP_FORMAT_WEB),
+	OPT_SET_INT('i', "info", &help_format, "show info page",
+			HELP_FORMAT_INFO),
+	OPT_END(),
+};
+
+static const char * const builtin_help_usage[] = {
+	"perf help [--all] [--man|--web|--info] [command]",
+	NULL
+};
+
+static enum help_format parse_help_format(const char *format)
+{
+	if (!strcmp(format, "man"))
+		return HELP_FORMAT_MAN;
+	if (!strcmp(format, "info"))
+		return HELP_FORMAT_INFO;
+	if (!strcmp(format, "web") || !strcmp(format, "html"))
+		return HELP_FORMAT_WEB;
+	die("unrecognized help format '%s'", format);
+}
+
+static const char *get_man_viewer_info(const char *name)
+{
+	struct man_viewer_info_list *viewer;
+
+	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
+	{
+		if (!strcasecmp(name, viewer->name))
+			return viewer->info;
+	}
+	return NULL;
+}
+
+static int check_emacsclient_version(void)
+{
+	struct strbuf buffer = STRBUF_INIT;
+	struct child_process ec_process;
+	const char *argv_ec[] = { "emacsclient", "--version", NULL };
+	int version;
+
+	/* emacsclient prints its version number on stderr */
+	memset(&ec_process, 0, sizeof(ec_process));
+	ec_process.argv = argv_ec;
+	ec_process.err = -1;
+	ec_process.stdout_to_stderr = 1;
+	if (start_command(&ec_process)) {
+		fprintf(stderr, "Failed to start emacsclient.\n");
+		return -1;
+	}
+	strbuf_read(&buffer, ec_process.err, 20);
+	close(ec_process.err);
+
+	/*
+	 * Don't bother checking return value, because "emacsclient --version"
+	 * seems to always exits with code 1.
+	 */
+	finish_command(&ec_process);
+
+	if (prefixcmp(buffer.buf, "emacsclient")) {
+		fprintf(stderr, "Failed to parse emacsclient version.\n");
+		strbuf_release(&buffer);
+		return -1;
+	}
+
+	strbuf_remove(&buffer, 0, strlen("emacsclient"));
+	version = atoi(buffer.buf);
+
+	if (version < 22) {
+		fprintf(stderr,
+			"emacsclient version '%d' too old (< 22).\n",
+			version);
+		strbuf_release(&buffer);
+		return -1;
+	}
+
+	strbuf_release(&buffer);
+	return 0;
+}
+
+static void exec_woman_emacs(const char* path, const char *page)
+{
+	if (!check_emacsclient_version()) {
+		/* This works only with emacsclient version >= 22. */
+		struct strbuf man_page = STRBUF_INIT;
+
+		if (!path)
+			path = "emacsclient";
+		strbuf_addf(&man_page, "(woman \"%s\")", page);
+		execlp(path, "emacsclient", "-e", man_page.buf, NULL);
+		warning("failed to exec '%s': %s", path, strerror(errno));
+	}
+}
+
+static void exec_man_konqueror(const char* path, const char *page)
+{
+	const char *display = getenv("DISPLAY");
+	if (display && *display) {
+		struct strbuf man_page = STRBUF_INIT;
+		const char *filename = "kfmclient";
+
+		/* It's simpler to launch konqueror using kfmclient. */
+		if (path) {
+			const char *file = strrchr(path, '/');
+			if (file && !strcmp(file + 1, "konqueror")) {
+				char *new = strdup(path);
+				char *dest = strrchr(new, '/');
+
+				/* strlen("konqueror") == strlen("kfmclient") */
+				strcpy(dest + 1, "kfmclient");
+				path = new;
+			}
+			if (file)
+				filename = file;
+		} else
+			path = "kfmclient";
+		strbuf_addf(&man_page, "man:%s(1)", page);
+		execlp(path, filename, "newTab", man_page.buf, NULL);
+		warning("failed to exec '%s': %s", path, strerror(errno));
+	}
+}
+
+static void exec_man_man(const char* path, const char *page)
+{
+	if (!path)
+		path = "man";
+	execlp(path, "man", page, NULL);
+	warning("failed to exec '%s': %s", path, strerror(errno));
+}
+
+static void exec_man_cmd(const char *cmd, const char *page)
+{
+	struct strbuf shell_cmd = STRBUF_INIT;
+	strbuf_addf(&shell_cmd, "%s %s", cmd, page);
+	execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
+	warning("failed to exec '%s': %s", cmd, strerror(errno));
+}
+
+static void add_man_viewer(const char *name)
+{
+	struct man_viewer_list **p = &man_viewer_list;
+	size_t len = strlen(name);
+
+	while (*p)
+		p = &((*p)->next);
+	*p = calloc(1, (sizeof(**p) + len + 1));
+	strncpy((*p)->name, name, len);
+}
+
+static int supported_man_viewer(const char *name, size_t len)
+{
+	return (!strncasecmp("man", name, len) ||
+		!strncasecmp("woman", name, len) ||
+		!strncasecmp("konqueror", name, len));
+}
+
+static void do_add_man_viewer_info(const char *name,
+				   size_t len,
+				   const char *value)
+{
+	struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
+
+	strncpy(new->name, name, len);
+	new->info = strdup(value);
+	new->next = man_viewer_info_list;
+	man_viewer_info_list = new;
+}
+
+static int add_man_viewer_path(const char *name,
+			       size_t len,
+			       const char *value)
+{
+	if (supported_man_viewer(name, len))
+		do_add_man_viewer_info(name, len, value);
+	else
+		warning("'%s': path for unsupported man viewer.\n"
+			"Please consider using 'man.<tool>.cmd' instead.",
+			name);
+
+	return 0;
+}
+
+static int add_man_viewer_cmd(const char *name,
+			      size_t len,
+			      const char *value)
+{
+	if (supported_man_viewer(name, len))
+		warning("'%s': cmd for supported man viewer.\n"
+			"Please consider using 'man.<tool>.path' instead.",
+			name);
+	else
+		do_add_man_viewer_info(name, len, value);
+
+	return 0;
+}
+
+static int add_man_viewer_info(const char *var, const char *value)
+{
+	const char *name = var + 4;
+	const char *subkey = strrchr(name, '.');
+
+	if (!subkey)
+		return error("Config with no key for man viewer: %s", name);
+
+	if (!strcmp(subkey, ".path")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_path(name, subkey - name, value);
+	}
+	if (!strcmp(subkey, ".cmd")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_cmd(name, subkey - name, value);
+	}
+
+	warning("'%s': unsupported man viewer sub key.", subkey);
+	return 0;
+}
+
+static int perf_help_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.format")) {
+		if (!value)
+			return config_error_nonbool(var);
+		help_format = parse_help_format(value);
+		return 0;
+	}
+	if (!strcmp(var, "man.viewer")) {
+		if (!value)
+			return config_error_nonbool(var);
+		add_man_viewer(value);
+		return 0;
+	}
+	if (!prefixcmp(var, "man."))
+		return add_man_viewer_info(var, value);
+
+	return perf_default_config(var, value, cb);
+}
+
+static struct cmdnames main_cmds, other_cmds;
+
+void list_common_cmds_help(void)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		if (longest < strlen(common_cmds[i].name))
+			longest = strlen(common_cmds[i].name);
+	}
+
+	puts(" The most commonly used perf commands are:");
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		printf("   %s   ", common_cmds[i].name);
+		mput_char(' ', longest - strlen(common_cmds[i].name));
+		puts(common_cmds[i].help);
+	}
+}
+
+static int is_perf_command(const char *s)
+{
+	return is_in_cmdlist(&main_cmds, s) ||
+		is_in_cmdlist(&other_cmds, s);
+}
+
+static const char *prepend(const char *prefix, const char *cmd)
+{
+	size_t pre_len = strlen(prefix);
+	size_t cmd_len = strlen(cmd);
+	char *p = malloc(pre_len + cmd_len + 1);
+	memcpy(p, prefix, pre_len);
+	strcpy(p + pre_len, cmd);
+	return p;
+}
+
+static const char *cmd_to_page(const char *perf_cmd)
+{
+	if (!perf_cmd)
+		return "perf";
+	else if (!prefixcmp(perf_cmd, "perf"))
+		return perf_cmd;
+	else if (is_perf_command(perf_cmd))
+		return prepend("perf-", perf_cmd);
+	else
+		return prepend("perf-", perf_cmd);
+}
+
+static void setup_man_path(void)
+{
+	struct strbuf new_path = STRBUF_INIT;
+	const char *old_path = getenv("MANPATH");
+
+	/* We should always put ':' after our path. If there is no
+	 * old_path, the ':' at the end will let 'man' to try
+	 * system-wide paths after ours to find the manual page. If
+	 * there is old_path, we need ':' as delimiter. */
+	strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
+	strbuf_addch(&new_path, ':');
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+
+	setenv("MANPATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+static void exec_viewer(const char *name, const char *page)
+{
+	const char *info = get_man_viewer_info(name);
+
+	if (!strcasecmp(name, "man"))
+		exec_man_man(info, page);
+	else if (!strcasecmp(name, "woman"))
+		exec_woman_emacs(info, page);
+	else if (!strcasecmp(name, "konqueror"))
+		exec_man_konqueror(info, page);
+	else if (info)
+		exec_man_cmd(info, page);
+	else
+		warning("'%s': unknown man viewer.", name);
+}
+
+static void show_man_page(const char *perf_cmd)
+{
+	struct man_viewer_list *viewer;
+	const char *page = cmd_to_page(perf_cmd);
+	const char *fallback = getenv("PERF_MAN_VIEWER");
+
+	setup_man_path();
+	for (viewer = man_viewer_list; viewer; viewer = viewer->next)
+	{
+		exec_viewer(viewer->name, page); /* will return when unable */
+	}
+	if (fallback)
+		exec_viewer(fallback, page);
+	exec_viewer("man", page);
+	die("no man viewer handled the request");
+}
+
+static void show_info_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
+	execlp("info", "info", "perfman", page, NULL);
+}
+
+static void get_html_page_path(struct strbuf *page_path, const char *page)
+{
+	struct stat st;
+	const char *html_path = system_path(PERF_HTML_PATH);
+
+	/* Check that we have a perf documentation directory. */
+	if (stat(mkpath("%s/perf.html", html_path), &st)
+	    || !S_ISREG(st.st_mode))
+		die("'%s': not a documentation directory.", html_path);
+
+	strbuf_init(page_path, 0);
+	strbuf_addf(page_path, "%s/%s.html", html_path, page);
+}
+
+/*
+ * If open_html is not defined in a platform-specific way (see for
+ * example compat/mingw.h), we use the script web--browse to display
+ * HTML.
+ */
+#ifndef open_html
+static void open_html(const char *path)
+{
+	execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
+}
+#endif
+
+static void show_html_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	struct strbuf page_path; /* it leaks but we exec bellow */
+
+	get_html_page_path(&page_path, page);
+
+	open_html(page_path.buf);
+}
+
+int cmd_help(int argc, const char **argv, const char *prefix)
+{
+	const char *alias;
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	perf_config(perf_help_config, NULL);
+
+	argc = parse_options(argc, argv, builtin_help_options,
+			builtin_help_usage, 0);
+
+	if (show_all) {
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_commands("perf commands", &main_cmds, &other_cmds);
+		printf(" %s\n\n", perf_more_info_string);
+		return 0;
+	}
+
+	if (!argv[0]) {
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n %s\n\n", perf_more_info_string);
+		return 0;
+	}
+
+	alias = alias_lookup(argv[0]);
+	if (alias && !is_perf_command(argv[0])) {
+		printf("`perf %s' is aliased to `%s'\n", argv[0], alias);
+		return 0;
+	}
+
+	switch (help_format) {
+	case HELP_FORMAT_MAN:
+		show_man_page(argv[0]);
+		break;
+	case HELP_FORMAT_INFO:
+		show_info_page(argv[0]);
+		break;
+	case HELP_FORMAT_WEB:
+		show_html_page(argv[0]);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
new file mode 100644
index 0000000..fe60e37
--- /dev/null
+++ b/tools/perf/builtin-list.c
@@ -0,0 +1,20 @@
+/*
+ * builtin-list.c
+ *
+ * Builtin list command: list all event types
+ *
+ * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+int cmd_list(int argc, const char **argv, const char *prefix)
+{
+	print_events();
+	return 0;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
new file mode 100644
index 0000000..aeab9c4
--- /dev/null
+++ b/tools/perf/builtin-record.c
@@ -0,0 +1,544 @@
+/*
+ * builtin-record.c
+ *
+ * Builtin record command: Record the profile of a workload
+ * (or a CPU, or a PID) into the perf.data output file - for
+ * later analysis via perf report.
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+#include "util/string.h"
+
+#include <unistd.h>
+#include <sched.h>
+
+#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
+
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static long			default_interval		= 100000;
+
+static int			nr_cpus				= 0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			= 128;
+static int			freq				= 0;
+static int			output;
+static const char		*output_name			= "perf.data";
+static int			group				= 0;
+static unsigned int		realtime_prio			= 0;
+static int			system_wide			= 0;
+static pid_t			target_pid			= -1;
+static int			inherit				= 1;
+static int			force				= 0;
+static int			append_file			= 0;
+
+static long			samples;
+static struct timeval		last_read;
+static struct timeval		this_read;
+
+static __u64			bytes_written;
+
+static struct pollfd		event_array[MAX_NR_CPUS * MAX_COUNTERS];
+
+static int			nr_poll;
+static int			nr_cpu;
+
+struct mmap_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	__u64				start;
+	__u64				len;
+	__u64				pgoff;
+	char				filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	char				comm[16];
+};
+
+
+struct mmap_data {
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
+};
+
+static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and mess up the samples under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	if (old != head)
+		samples++;
+
+	size = head - old;
+
+	if ((old & md->mask) + size != (head & md->mask)) {
+		buf = &data[old & md->mask];
+		size = md->mask + 1 - (old & md->mask);
+		old += size;
+
+		while (size) {
+			int ret = write(output, buf, size);
+
+			if (ret < 0)
+				die("failed to write");
+
+			size -= ret;
+			buf += ret;
+
+			bytes_written += ret;
+		}
+	}
+
+	buf = &data[old & md->mask];
+	size = head - old;
+	old += size;
+
+	while (size) {
+		int ret = write(output, buf, size);
+
+		if (ret < 0)
+			die("failed to write");
+
+		size -= ret;
+		buf += ret;
+
+		bytes_written += ret;
+	}
+
+	md->prev = old;
+}
+
+static volatile int done = 0;
+
+static void sig_handler(int sig)
+{
+	done = 1;
+}
+
+static void pid_synthesize_comm_event(pid_t pid, int full)
+{
+	struct comm_event comm_ev;
+	char filename[PATH_MAX];
+	char bf[BUFSIZ];
+	int fd, ret;
+	size_t size;
+	char *field, *sep;
+	DIR *tasks;
+	struct dirent dirent, *next;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "couldn't open %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	if (read(fd, bf, sizeof(bf)) < 0) {
+		fprintf(stderr, "couldn't read %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	close(fd);
+
+	/* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
+	memset(&comm_ev, 0, sizeof(comm_ev));
+	field = strchr(bf, '(');
+	if (field == NULL)
+		goto out_failure;
+	sep = strchr(++field, ')');
+	if (sep == NULL)
+		goto out_failure;
+	size = sep - field;
+	memcpy(comm_ev.comm, field, size++);
+
+	comm_ev.pid = pid;
+	comm_ev.header.type = PERF_EVENT_COMM;
+	size = ALIGN(size, sizeof(uint64_t));
+	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
+
+	if (!full) {
+		comm_ev.tid = pid;
+
+		ret = write(output, &comm_ev, comm_ev.header.size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+		return;
+	}
+
+	snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
+
+	tasks = opendir(filename);
+	while (!readdir_r(tasks, &dirent, &next) && next) {
+		char *end;
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end)
+			continue;
+
+		comm_ev.tid = pid;
+
+		ret = write(output, &comm_ev, comm_ev.header.size);
+		if (ret < 0) {
+			perror("failed to write");
+			exit(-1);
+		}
+	}
+	closedir(tasks);
+	return;
+
+out_failure:
+	fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
+		filename);
+	exit(EXIT_FAILURE);
+}
+
+static void pid_synthesize_mmap_samples(pid_t pid)
+{
+	char filename[PATH_MAX];
+	FILE *fp;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", filename);
+		exit(EXIT_FAILURE);
+	}
+	while (1) {
+		char bf[BUFSIZ], *pbf = bf;
+		struct mmap_event mmap_ev = {
+			.header.type = PERF_EVENT_MMAP,
+		};
+		int n;
+		size_t size;
+		if (fgets(bf, sizeof(bf), fp) == NULL)
+			break;
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		n = hex2u64(pbf, &mmap_ev.start);
+		if (n < 0)
+			continue;
+		pbf += n + 1;
+		n = hex2u64(pbf, &mmap_ev.len);
+		if (n < 0)
+			continue;
+		pbf += n + 3;
+		if (*pbf == 'x') { /* vm_exec */
+			char *execname = strrchr(bf, ' ');
+
+			if (execname == NULL || execname[1] != '/')
+				continue;
+
+			execname += 1;
+			size = strlen(execname);
+			execname[size - 1] = '\0'; /* Remove \n */
+			memcpy(mmap_ev.filename, execname, size);
+			size = ALIGN(size, sizeof(uint64_t));
+			mmap_ev.len -= mmap_ev.start;
+			mmap_ev.header.size = (sizeof(mmap_ev) -
+					       (sizeof(mmap_ev.filename) - size));
+			mmap_ev.pid = pid;
+			mmap_ev.tid = pid;
+
+			if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
+				perror("failed to write");
+				exit(-1);
+			}
+		}
+	}
+
+	fclose(fp);
+}
+
+static void synthesize_samples(void)
+{
+	DIR *proc;
+	struct dirent dirent, *next;
+
+	proc = opendir("/proc");
+
+	while (!readdir_r(proc, &dirent, &next) && next) {
+		char *end;
+		pid_t pid;
+
+		pid = strtol(dirent.d_name, &end, 10);
+		if (*end) /* only interested in proper numerical dirents */
+			continue;
+
+		pid_synthesize_comm_event(pid, 1);
+		pid_synthesize_mmap_samples(pid);
+	}
+
+	closedir(proc);
+}
+
+static int group_fd;
+
+static void create_counter(int counter, int cpu, pid_t pid)
+{
+	struct perf_counter_attr *attr = attrs + counter;
+	int track = 1;
+
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
+	if (freq) {
+		attr->freq		= 1;
+		attr->sample_freq	= freq;
+	}
+	attr->mmap		= track;
+	attr->comm		= track;
+	attr->inherit		= (cpu < 0) && inherit;
+
+	track = 0; /* only the first counter needs these */
+
+	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
+
+	if (fd[nr_cpu][counter] < 0) {
+		int err = errno;
+
+		error("syscall returned with %d (%s)\n",
+				fd[nr_cpu][counter], strerror(err));
+		if (err == EPERM)
+			printf("Are you root?\n");
+		exit(-1);
+	}
+	assert(fd[nr_cpu][counter] >= 0);
+	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
+
+	/*
+	 * First counter acts as the group leader:
+	 */
+	if (group && group_fd == -1)
+		group_fd = fd[nr_cpu][counter];
+
+	event_array[nr_poll].fd = fd[nr_cpu][counter];
+	event_array[nr_poll].events = POLLIN;
+	nr_poll++;
+
+	mmap_array[nr_cpu][counter].counter = counter;
+	mmap_array[nr_cpu][counter].prev = 0;
+	mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+	mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+			PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
+	if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+		error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+		exit(-1);
+	}
+}
+
+static void open_counters(int cpu, pid_t pid)
+{
+	int counter;
+
+	if (pid > 0) {
+		pid_synthesize_comm_event(pid, 0);
+		pid_synthesize_mmap_samples(pid);
+	}
+
+	group_fd = -1;
+	for (counter = 0; counter < nr_counters; counter++)
+		create_counter(counter, cpu, pid);
+
+	nr_cpu++;
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	int i, counter;
+	struct stat st;
+	pid_t pid;
+	int flags;
+	int ret;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (!stat(output_name, &st) && !force && !append_file) {
+		fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
+				output_name);
+		exit(-1);
+	}
+
+	flags = O_CREAT|O_RDWR;
+	if (append_file)
+		flags |= O_APPEND;
+	else
+		flags |= O_TRUNC;
+
+	output = open(output_name, flags, S_IRUSR|S_IWUSR);
+	if (output < 0) {
+		perror("failed to create output file");
+		exit(-1);
+	}
+
+	if (!system_wide) {
+		open_counters(-1, target_pid != -1 ? target_pid : getpid());
+	} else for (i = 0; i < nr_cpus; i++)
+		open_counters(i, target_pid);
+
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
+
+	if (target_pid == -1 && argc) {
+		pid = fork();
+		if (pid < 0)
+			perror("failed to fork");
+
+		if (!pid) {
+			if (execvp(argv[0], (char **)argv)) {
+				perror(argv[0]);
+				exit(-1);
+			}
+		}
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	if (system_wide)
+		synthesize_samples();
+
+	while (!done) {
+		int hits = samples;
+
+		for (i = 0; i < nr_cpu; i++) {
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
+		}
+
+		if (hits == samples)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	/*
+	 * Approximate RIP event size: 24 bytes.
+	 */
+	fprintf(stderr,
+		"[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
+		(double)bytes_written / 1024.0 / 1024.0,
+		output_name,
+		bytes_written / 24);
+
+	return 0;
+}
+
+static const char * const record_usage[] = {
+	"perf record [<options>] [<command>]",
+	"perf record [<options>] -- <command> [<options>]",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "record events on existing pid"),
+	OPT_INTEGER('r', "realtime", &realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('A', "append", &append_file,
+			    "append to the output file to do incremental profiling"),
+	OPT_BOOLEAN('f', "force", &force,
+			"overwrite existing data file"),
+	OPT_LONG('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_STRING('o', "output", &output_name, "file",
+		    "output file name"),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('F', "freq", &freq,
+		    "profile at this frequency"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
+	OPT_END()
+};
+
+int cmd_record(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	argc = parse_options(argc, argv, options, record_usage, 0);
+	if (!argc && target_pid == -1 && !system_wide)
+		usage_with_options(record_usage, options);
+
+	if (!nr_counters)
+		nr_counters = 1;
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (attrs[counter].sample_period)
+			continue;
+
+		attrs[counter].sample_period = default_interval;
+	}
+
+	return __cmd_record(argc, argv);
+}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
new file mode 100644
index 0000000..242e09f
--- /dev/null
+++ b/tools/perf/builtin-report.c
@@ -0,0 +1,1291 @@
+/*
+ * builtin-report.c
+ *
+ * Builtin report command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include "util/list.h"
+#include "util/cache.h"
+#include "util/rbtree.h"
+#include "util/symbol.h"
+#include "util/string.h"
+
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#define SHOW_KERNEL	1
+#define SHOW_USER	2
+#define SHOW_HV		4
+
+static char		const *input_name = "perf.data";
+static char		*vmlinux = NULL;
+
+static char		default_sort_order[] = "comm,dso";
+static char		*sort_order = default_sort_order;
+
+static int		input;
+static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
+
+static int		dump_trace = 0;
+#define dprintf(x...)	do { if (dump_trace) printf(x); } while (0)
+
+static int		verbose;
+static int		full_paths;
+
+static unsigned long	page_size;
+static unsigned long	mmap_window = 32;
+
+struct ip_event {
+	struct perf_event_header header;
+	__u64 ip;
+	__u32 pid, tid;
+};
+
+struct mmap_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	__u64 start;
+	__u64 len;
+	__u64 pgoff;
+	char filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header header;
+	__u32 pid, tid;
+	char comm[16];
+};
+
+struct fork_event {
+	struct perf_event_header header;
+	__u32 pid, ppid;
+};
+
+struct period_event {
+	struct perf_event_header header;
+	__u64 time;
+	__u64 id;
+	__u64 sample_period;
+};
+
+typedef union event_union {
+	struct perf_event_header	header;
+	struct ip_event			ip;
+	struct mmap_event		mmap;
+	struct comm_event		comm;
+	struct fork_event		fork;
+	struct period_event		period;
+} event_t;
+
+static LIST_HEAD(dsos);
+static struct dso *kernel_dso;
+static struct dso *vdso;
+
+static void dsos__add(struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos);
+}
+
+static struct dso *dsos__find(const char *name)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		if (strcmp(pos->name, name) == 0)
+			return pos;
+	return NULL;
+}
+
+static struct dso *dsos__findnew(const char *name)
+{
+	struct dso *dso = dsos__find(name);
+	int nr;
+
+	if (dso)
+		return dso;
+
+	dso = dso__new(name, 0);
+	if (!dso)
+		goto out_delete_dso;
+
+	nr = dso__load(dso, NULL, verbose);
+	if (nr < 0) {
+		if (verbose)
+			fprintf(stderr, "Failed to open: %s\n", name);
+		goto out_delete_dso;
+	}
+	if (!nr && verbose) {
+		fprintf(stderr,
+		"No symbols found in: %s, maybe install a debug package?\n",
+				name);
+	}
+
+	dsos__add(dso);
+
+	return dso;
+
+out_delete_dso:
+	dso__delete(dso);
+	return NULL;
+}
+
+static void dsos__fprintf(FILE *fp)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, &dsos, node)
+		dso__fprintf(pos, fp);
+}
+
+static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
+{
+	return dso__find_symbol(kernel_dso, ip);
+}
+
+static int load_kernel(void)
+{
+	int err;
+
+	kernel_dso = dso__new("[kernel]", 0);
+	if (!kernel_dso)
+		return -1;
+
+	err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose);
+	if (err) {
+		dso__delete(kernel_dso);
+		kernel_dso = NULL;
+	} else
+		dsos__add(kernel_dso);
+
+	vdso = dso__new("[vdso]", 0);
+	if (!vdso)
+		return -1;
+
+	vdso->find_symbol = vdso__find_symbol;
+
+	dsos__add(vdso);
+
+	return err;
+}
+
+static char __cwd[PATH_MAX];
+static char *cwd = __cwd;
+static int cwdlen;
+
+static int strcommon(const char *pathname)
+{
+	int n = 0;
+
+	while (pathname[n] == cwd[n] && n < cwdlen)
+		++n;
+
+	return n;
+}
+
+struct map {
+	struct list_head node;
+	uint64_t	 start;
+	uint64_t	 end;
+	uint64_t	 pgoff;
+	uint64_t	 (*map_ip)(struct map *, uint64_t);
+	struct dso	 *dso;
+};
+
+static uint64_t map__map_ip(struct map *map, uint64_t ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
+{
+	return ip;
+}
+
+static struct map *map__new(struct mmap_event *event)
+{
+	struct map *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		const char *filename = event->filename;
+		char newfilename[PATH_MAX];
+
+		if (cwd) {
+			int n = strcommon(filename);
+
+			if (n == cwdlen) {
+				snprintf(newfilename, sizeof(newfilename),
+					 ".%s", filename + n);
+				filename = newfilename;
+			}
+		}
+
+		self->start = event->start;
+		self->end   = event->start + event->len;
+		self->pgoff = event->pgoff;
+
+		self->dso = dsos__findnew(filename);
+		if (self->dso == NULL)
+			goto out_delete;
+
+		if (self->dso == vdso)
+			self->map_ip = vdso__map_ip;
+		else
+			self->map_ip = map__map_ip;
+	}
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static struct map *map__clone(struct map *self)
+{
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
+}
+
+static int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
+
+static size_t map__fprintf(struct map *self, FILE *fp)
+{
+	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
+		       self->start, self->end, self->pgoff, self->dso->name);
+}
+
+
+struct thread {
+	struct rb_node	 rb_node;
+	struct list_head maps;
+	pid_t		 pid;
+	char		 *comm;
+};
+
+static struct thread *thread__new(pid_t pid)
+{
+	struct thread *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->pid = pid;
+		self->comm = malloc(32);
+		if (self->comm)
+			snprintf(self->comm, 32, ":%d", self->pid);
+		INIT_LIST_HEAD(&self->maps);
+	}
+
+	return self;
+}
+
+static int thread__set_comm(struct thread *self, const char *comm)
+{
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(comm);
+	return self->comm ? 0 : -ENOMEM;
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	struct map *pos;
+	size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
+
+	list_for_each_entry(pos, &self->maps, node)
+		ret += map__fprintf(pos, fp);
+
+	return ret;
+}
+
+
+static struct rb_root threads;
+static struct thread *last_match;
+
+static struct thread *threads__findnew(pid_t pid)
+{
+	struct rb_node **p = &threads.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Font-end cache - PID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	if (last_match && last_match->pid == pid)
+		return last_match;
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->pid == pid) {
+			last_match = th;
+			return th;
+		}
+
+		if (pid < th->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	th = thread__new(pid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads);
+		last_match = th;
+	}
+
+	return th;
+}
+
+static void thread__insert_map(struct thread *self, struct map *map)
+{
+	struct map *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
+		if (map__overlap(pos, map)) {
+			list_del_init(&pos->node);
+			/* XXX leaks dsos */
+			free(pos);
+		}
+	}
+
+	list_add_tail(&map->node, &self->maps);
+}
+
+static int thread__fork(struct thread *self, struct thread *parent)
+{
+	struct map *map;
+
+	if (self->comm)
+		free(self->comm);
+	self->comm = strdup(parent->comm);
+	if (!self->comm)
+		return -ENOMEM;
+
+	list_for_each_entry(map, &parent->maps, node) {
+		struct map *new = map__clone(map);
+		if (!new)
+			return -ENOMEM;
+		thread__insert_map(self, new);
+	}
+
+	return 0;
+}
+
+static struct map *thread__find_map(struct thread *self, uint64_t ip)
+{
+	struct map *pos;
+
+	if (self == NULL)
+		return NULL;
+
+	list_for_each_entry(pos, &self->maps, node)
+		if (ip >= pos->start && ip <= pos->end)
+			return pos;
+
+	return NULL;
+}
+
+static size_t threads__fprintf(FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+/*
+ * histogram, sorted on item, collects counts
+ */
+
+static struct rb_root hist;
+
+struct hist_entry {
+	struct rb_node	 rb_node;
+
+	struct thread	 *thread;
+	struct map	 *map;
+	struct dso	 *dso;
+	struct symbol	 *sym;
+	uint64_t	 ip;
+	char		 level;
+
+	uint32_t	 count;
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	struct list_head list;
+
+	char *header;
+
+	int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
+	size_t	(*print)(FILE *fp, struct hist_entry *);
+};
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static size_t
+sort__thread_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
+}
+
+static struct sort_entry sort_thread = {
+	.header = "         Command:  Pid",
+	.cmp	= sort__thread_cmp,
+	.print	= sort__thread_print,
+};
+
+/* --sort comm */
+
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r) {
+		if (!comm_l && !comm_r)
+			return 0;
+		else if (!comm_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(comm_l, comm_r);
+}
+
+static size_t
+sort__comm_print(FILE *fp, struct hist_entry *self)
+{
+	return fprintf(fp, "%16s", self->thread->comm);
+}
+
+static struct sort_entry sort_comm = {
+	.header		= "         Command",
+	.cmp		= sort__comm_cmp,
+	.collapse	= sort__comm_collapse,
+	.print		= sort__comm_print,
+};
+
+/* --sort dso */
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct dso *dso_l = left->dso;
+	struct dso *dso_r = right->dso;
+
+	if (!dso_l || !dso_r) {
+		if (!dso_l && !dso_r)
+			return 0;
+		else if (!dso_l)
+			return -1;
+		else
+			return 1;
+	}
+
+	return strcmp(dso_l->name, dso_r->name);
+}
+
+static size_t
+sort__dso_print(FILE *fp, struct hist_entry *self)
+{
+	if (self->dso)
+		return fprintf(fp, "%-25s", self->dso->name);
+
+	return fprintf(fp, "%016llx         ", (__u64)self->ip);
+}
+
+static struct sort_entry sort_dso = {
+	.header = "Shared Object            ",
+	.cmp	= sort__dso_cmp,
+	.print	= sort__dso_print,
+};
+
+/* --sort symbol */
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t ip_l, ip_r;
+
+	if (left->sym == right->sym)
+		return 0;
+
+	ip_l = left->sym ? left->sym->start : left->ip;
+	ip_r = right->sym ? right->sym->start : right->ip;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
+static size_t
+sort__sym_print(FILE *fp, struct hist_entry *self)
+{
+	size_t ret = 0;
+
+	if (verbose)
+		ret += fprintf(fp, "%#018llx  ", (__u64)self->ip);
+
+	if (self->sym) {
+		ret += fprintf(fp, "[%c] %s",
+			self->dso == kernel_dso ? 'k' : '.', self->sym->name);
+	} else {
+		ret += fprintf(fp, "%#016llx", (__u64)self->ip);
+	}
+
+	return ret;
+}
+
+static struct sort_entry sort_sym = {
+	.header = "Symbol",
+	.cmp	= sort__sym_cmp,
+	.print	= sort__sym_print,
+};
+
+static int sort__need_collapse = 0;
+
+struct sort_dimension {
+	char			*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+};
+
+static LIST_HEAD(hist_entry__sort_list);
+
+static int sort_dimension__add(char *tok)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
+
+		if (sd->taken)
+			continue;
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry->collapse)
+			sort__need_collapse = 1;
+
+		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
+		sd->taken = 1;
+
+		return 0;
+	}
+
+	return -ESRCH;
+}
+
+static int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		cmp = se->cmp(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct sort_entry *se;
+	int64_t cmp = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int64_t (*f)(struct hist_entry *, struct hist_entry *);
+
+		f = se->collapse ?: se->cmp;
+
+		cmp = f(left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static size_t
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
+{
+	struct sort_entry *se;
+	size_t ret;
+
+	if (total_samples) {
+		double percent = self->count * 100.0 / total_samples;
+		char *color = PERF_COLOR_NORMAL;
+
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (percent >= 5.0)
+			color = PERF_COLOR_RED;
+		if (percent < 0.5)
+			color = PERF_COLOR_GREEN;
+
+		ret = color_fprintf(fp, color, "   %6.2f%%",
+				(self->count * 100.0) / total_samples);
+	} else
+		ret = fprintf(fp, "%12d ", self->count);
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		fprintf(fp, "  ");
+		ret += se->print(fp, self);
+	}
+
+	ret += fprintf(fp, "\n");
+
+	return ret;
+}
+
+/*
+ * collect histogram counts
+ */
+
+static int
+hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+		struct symbol *sym, uint64_t ip, char level)
+{
+	struct rb_node **p = &hist.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct hist_entry entry = {
+		.thread	= thread,
+		.map	= map,
+		.dso	= dso,
+		.sym	= sym,
+		.ip	= ip,
+		.level	= level,
+		.count	= 1,
+	};
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__cmp(&entry, he);
+
+		if (!cmp) {
+			he->count++;
+			return 0;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = malloc(sizeof(*he));
+	if (!he)
+		return -ENOMEM;
+	*he = entry;
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &hist);
+
+	return 0;
+}
+
+static void hist_entry__free(struct hist_entry *he)
+{
+	free(he);
+}
+
+/*
+ * collapse the histogram
+ */
+
+static struct rb_root collapse_hists;
+
+static void collapse__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &collapse_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			iter->count += he->count;
+			hist_entry__free(he);
+			return;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &collapse_hists);
+}
+
+static void collapse__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+
+	if (!sort__need_collapse)
+		return;
+
+	next = rb_first(&hist);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, &hist);
+		collapse__insert_entry(n);
+	}
+}
+
+/*
+ * reverse the map, sort on count.
+ */
+
+static struct rb_root output_hists;
+
+static void output__insert_entry(struct hist_entry *he)
+{
+	struct rb_node **p = &output_hists.rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (he->count > iter->count)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, &output_hists);
+}
+
+static void output__resort(void)
+{
+	struct rb_node *next;
+	struct hist_entry *n;
+	struct rb_root *tree = &hist;
+
+	if (sort__need_collapse)
+		tree = &collapse_hists;
+
+	next = rb_first(tree);
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		rb_erase(&n->rb_node, tree);
+		output__insert_entry(n);
+	}
+}
+
+static size_t output__fprintf(FILE *fp, uint64_t total_samples)
+{
+	struct hist_entry *pos;
+	struct sort_entry *se;
+	struct rb_node *nd;
+	size_t ret = 0;
+
+	fprintf(fp, "\n");
+	fprintf(fp, "#\n");
+	fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples);
+	fprintf(fp, "#\n");
+
+	fprintf(fp, "# Overhead");
+	list_for_each_entry(se, &hist_entry__sort_list, list)
+		fprintf(fp, "  %s", se->header);
+	fprintf(fp, "\n");
+
+	fprintf(fp, "# ........");
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		int i;
+
+		fprintf(fp, "  ");
+		for (i = 0; i < strlen(se->header); i++)
+			fprintf(fp, ".");
+	}
+	fprintf(fp, "\n");
+
+	fprintf(fp, "#\n");
+
+	for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node);
+		ret += hist_entry__fprintf(fp, pos, total_samples);
+	}
+
+	if (!strcmp(sort_order, default_sort_order)) {
+		fprintf(fp, "#\n");
+		fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n");
+		fprintf(fp, "#\n");
+	}
+	fprintf(fp, "\n");
+
+	return ret;
+}
+
+static void register_idle_thread(void)
+{
+	struct thread *thread = threads__findnew(0);
+
+	if (thread == NULL ||
+			thread__set_comm(thread, "[idle]")) {
+		fprintf(stderr, "problem inserting idle task.\n");
+		exit(-1);
+	}
+}
+
+static unsigned long total = 0,
+		     total_mmap = 0,
+		     total_comm = 0,
+		     total_fork = 0,
+		     total_unknown = 0;
+
+static int
+process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	char level;
+	int show = 0;
+	struct dso *dso = NULL;
+	struct thread *thread = threads__findnew(event->ip.pid);
+	uint64_t ip = event->ip.ip;
+	struct map *map = NULL;
+
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->header.misc,
+		event->ip.pid,
+		(void *)(long)ip);
+
+	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (thread == NULL) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+		show = SHOW_KERNEL;
+		level = 'k';
+
+		dso = kernel_dso;
+
+		dprintf(" ...... dso: %s\n", dso->name);
+
+	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+
+		show = SHOW_USER;
+		level = '.';
+
+		map = thread__find_map(thread, ip);
+		if (map != NULL) {
+			ip = map->map_ip(map, ip);
+			dso = map->dso;
+		} else {
+			/*
+			 * If this is outside of all known maps,
+			 * and is a negative address, try to look it
+			 * up in the kernel dso, as it might be a
+			 * vsyscall (which executes in user-mode):
+			 */
+			if ((long long)ip < 0)
+				dso = kernel_dso;
+		}
+		dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+
+	} else {
+		show = SHOW_HV;
+		level = 'H';
+		dprintf(" ...... dso: [hypervisor]\n");
+	}
+
+	if (show & show_mask) {
+		struct symbol *sym = NULL;
+
+		if (dso)
+			sym = dso->find_symbol(dso, ip);
+
+		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+			fprintf(stderr,
+		"problem incrementing symbol count, skipping event\n");
+			return -1;
+		}
+	}
+	total++;
+
+	return 0;
+}
+
+static int
+process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->mmap.pid);
+	struct map *map = map__new(&event->mmap);
+
+	dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->mmap.pid,
+		(void *)(long)event->mmap.start,
+		(void *)(long)event->mmap.len,
+		(void *)(long)event->mmap.pgoff,
+		event->mmap.filename);
+
+	if (thread == NULL || map == NULL) {
+		dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		return 0;
+	}
+
+	thread__insert_map(thread, map);
+	total_mmap++;
+
+	return 0;
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->comm.pid);
+
+	dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->comm.comm, event->comm.pid);
+
+	if (thread == NULL ||
+	    thread__set_comm(thread, event->comm.comm)) {
+		dprintf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		return -1;
+	}
+	total_comm++;
+
+	return 0;
+}
+
+static int
+process_fork_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	struct thread *thread = threads__findnew(event->fork.pid);
+	struct thread *parent = threads__findnew(event->fork.ppid);
+
+	dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->fork.pid, event->fork.ppid);
+
+	if (!thread || !parent || thread__fork(thread, parent)) {
+		dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		return -1;
+	}
+	total_fork++;
+
+	return 0;
+}
+
+static int
+process_period_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
+		(void *)(offset + head),
+		(void *)(long)(event->header.size),
+		event->period.time,
+		event->period.id,
+		event->period.sample_period);
+
+	return 0;
+}
+
+static int
+process_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
+		return process_overflow_event(event, offset, head);
+
+	switch (event->header.type) {
+	case PERF_EVENT_MMAP:
+		return process_mmap_event(event, offset, head);
+
+	case PERF_EVENT_COMM:
+		return process_comm_event(event, offset, head);
+
+	case PERF_EVENT_FORK:
+		return process_fork_event(event, offset, head);
+
+	case PERF_EVENT_PERIOD:
+		return process_period_event(event, offset, head);
+	/*
+	 * We dont process them right now but they are fine:
+	 */
+
+	case PERF_EVENT_THROTTLE:
+	case PERF_EVENT_UNTHROTTLE:
+		return 0;
+
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __cmd_report(void)
+{
+	int ret, rc = EXIT_FAILURE;
+	unsigned long offset = 0;
+	unsigned long head = 0;
+	struct stat stat;
+	event_t *event;
+	uint32_t size;
+	char *buf;
+
+	register_idle_thread();
+
+	input = open(input_name, O_RDONLY);
+	if (input < 0) {
+		perror("failed to open file");
+		exit(-1);
+	}
+
+	ret = fstat(input, &stat);
+	if (ret < 0) {
+		perror("failed to stat file");
+		exit(-1);
+	}
+
+	if (!stat.st_size) {
+		fprintf(stderr, "zero-sized file, nothing to do!\n");
+		exit(0);
+	}
+
+	if (load_kernel() < 0) {
+		perror("failed to load kernel symbols");
+		return EXIT_FAILURE;
+	}
+
+	if (!full_paths) {
+		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
+			perror("failed to get the current directory");
+			return EXIT_FAILURE;
+		}
+		cwdlen = strlen(cwd);
+	} else {
+		cwd = NULL;
+		cwdlen = 0;
+	}
+remap:
+	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
+			   MAP_SHARED, input, offset);
+	if (buf == MAP_FAILED) {
+		perror("failed to mmap file");
+		exit(-1);
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (!size)
+		size = 8;
+
+	if (head + event->header.size >= page_size * mmap_window) {
+		unsigned long shift = page_size * (head / page_size);
+		int ret;
+
+		ret = munmap(buf, page_size * mmap_window);
+		assert(ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dprintf("%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (!size || process_event(event, offset, head) < 0) {
+
+		dprintf("%p [%p]: skipping unknown header type: %d\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->header.type);
+
+		total_unknown++;
+
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head < stat.st_size)
+		goto more;
+
+	rc = EXIT_SUCCESS;
+	close(input);
+
+	dprintf("      IP events: %10ld\n", total);
+	dprintf("    mmap events: %10ld\n", total_mmap);
+	dprintf("    comm events: %10ld\n", total_comm);
+	dprintf("    fork events: %10ld\n", total_fork);
+	dprintf(" unknown events: %10ld\n", total_unknown);
+
+	if (dump_trace)
+		return 0;
+
+	if (verbose >= 3)
+		threads__fprintf(stdout);
+
+	if (verbose >= 2)
+		dsos__fprintf(stdout);
+
+	collapse__resort();
+	output__resort();
+	output__fprintf(stdout, total);
+
+	return rc;
+}
+
+static const char * const report_usage[] = {
+	"perf report [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"),
+	OPT_BOOLEAN('P', "full-paths", &full_paths,
+		    "Don't shorten the pathnames taking into account the cwd"),
+	OPT_END()
+};
+
+static void setup_sorting(void)
+{
+	char *tmp, *tok, *str = strdup(sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok) < 0) {
+			error("Unknown --sort key: `%s'", tok);
+			usage_with_options(report_usage, options);
+		}
+	}
+
+	free(str);
+}
+
+int cmd_report(int argc, const char **argv, const char *prefix)
+{
+	symbol__init();
+
+	page_size = getpagesize();
+
+	argc = parse_options(argc, argv, options, report_usage, 0);
+
+	setup_sorting();
+
+	/*
+	 * Any (unrecognized) arguments left?
+	 */
+	if (argc)
+		usage_with_options(report_usage, options);
+
+	setup_pager();
+
+	return __cmd_report();
+}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
new file mode 100644
index 0000000..2cbf5a1
--- /dev/null
+++ b/tools/perf/builtin-stat.c
@@ -0,0 +1,339 @@
+/*
+ * builtin-stat.c
+ *
+ * Builtin stat command: Give a precise performance counters summary
+ * overview about any workload, CPU or specific PID.
+ *
+ * Sample output:
+
+   $ perf stat ~/hackbench 10
+   Time: 0.104
+
+    Performance counter stats for '/home/mingo/hackbench':
+
+       1255.538611  task clock ticks     #      10.143 CPU utilization factor
+             54011  context switches     #       0.043 M/sec
+               385  CPU migrations       #       0.000 M/sec
+             17755  pagefaults           #       0.014 M/sec
+        3808323185  CPU cycles           #    3033.219 M/sec
+        1575111190  instructions         #    1254.530 M/sec
+          17367895  cache references     #      13.833 M/sec
+           7674421  cache misses         #       6.112 M/sec
+
+    Wall-clock time elapsed:   123.786620 msecs
+
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "perf.h"
+#include "builtin.h"
+#include "util/util.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#include <sys/prctl.h>
+
+static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
+
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS	},
+
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES	},
+};
+
+static int			system_wide			=  0;
+static int			inherit				=  1;
+
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int			target_pid			= -1;
+static int			nr_cpus				=  0;
+static unsigned int		page_size;
+
+static int			scale				=  1;
+
+static const unsigned int default_count[] = {
+	1000000,
+	1000000,
+	  10000,
+	  10000,
+	1000000,
+	  10000,
+};
+
+static __u64			event_res[MAX_COUNTERS][3];
+static __u64			event_scaled[MAX_COUNTERS];
+
+static __u64			runtime_nsecs;
+static __u64			walltime_nsecs;
+
+static void create_perfstat_counter(int counter)
+{
+	struct perf_counter_attr *attr = attrs + counter;
+
+	if (scale)
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	if (system_wide) {
+		int cpu;
+		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
+			if (fd[cpu][counter] < 0) {
+				printf("perfstat error: syscall returned with %d (%s)\n",
+						fd[cpu][counter], strerror(errno));
+				exit(-1);
+			}
+		}
+	} else {
+		attr->inherit	= inherit;
+		attr->disabled	= 1;
+
+		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
+		if (fd[0][counter] < 0) {
+			printf("perfstat error: syscall returned with %d (%s)\n",
+					fd[0][counter], strerror(errno));
+			exit(-1);
+		}
+	}
+}
+
+/*
+ * Does the counter have nsecs as a unit?
+ */
+static inline int nsec_counter(int counter)
+{
+	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
+		return 0;
+
+	if (attrs[counter].config == PERF_COUNT_CPU_CLOCK)
+		return 1;
+
+	if (attrs[counter].config == PERF_COUNT_TASK_CLOCK)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Read out the results of a single counter:
+ */
+static void read_counter(int counter)
+{
+	__u64 *count, single_count[3];
+	ssize_t res;
+	int cpu, nv;
+	int scaled;
+
+	count = event_res[counter];
+
+	count[0] = count[1] = count[2] = 0;
+
+	nv = scale ? 3 : 1;
+	for (cpu = 0; cpu < nr_cpus; cpu ++) {
+		res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
+		assert(res == nv * sizeof(__u64));
+
+		count[0] += single_count[0];
+		if (scale) {
+			count[1] += single_count[1];
+			count[2] += single_count[2];
+		}
+	}
+
+	scaled = 0;
+	if (scale) {
+		if (count[2] == 0) {
+			event_scaled[counter] = -1;
+			count[0] = 0;
+			return;
+		}
+
+		if (count[2] < count[1]) {
+			event_scaled[counter] = 1;
+			count[0] = (unsigned long long)
+				((double)count[0] * count[1] / count[2] + 0.5);
+		}
+	}
+	/*
+	 * Save the full runtime - to allow normalization during printout:
+	 */
+	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
+		runtime_nsecs = count[0];
+}
+
+/*
+ * Print out the results of a single counter:
+ */
+static void print_counter(int counter)
+{
+	__u64 *count;
+	int scaled;
+
+	count = event_res[counter];
+	scaled = event_scaled[counter];
+
+	if (scaled == -1) {
+		fprintf(stderr, " %14s  %-20s\n",
+			"<not counted>", event_name(counter));
+		return;
+	}
+
+	if (nsec_counter(counter)) {
+		double msecs = (double)count[0] / 1000000;
+
+		fprintf(stderr, " %14.6f  %-20s",
+			msecs, event_name(counter));
+		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
+
+			fprintf(stderr, " # %11.3f CPU utilization factor",
+				(double)count[0] / (double)walltime_nsecs);
+		}
+	} else {
+		fprintf(stderr, " %14Ld  %-20s",
+			count[0], event_name(counter));
+		if (runtime_nsecs)
+			fprintf(stderr, " # %11.3f M/sec",
+				(double)count[0]/runtime_nsecs*1000.0);
+	}
+	if (scaled)
+		fprintf(stderr, "  (scaled from %.2f%%)",
+			(double) count[2] / count[1] * 100);
+	fprintf(stderr, "\n");
+}
+
+static int do_perfstat(int argc, const char **argv)
+{
+	unsigned long long t0, t1;
+	int counter;
+	int status;
+	int pid;
+	int i;
+
+	if (!system_wide)
+		nr_cpus = 1;
+
+	for (counter = 0; counter < nr_counters; counter++)
+		create_perfstat_counter(counter);
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+
+	if (!pid) {
+		if (execvp(argv[0], (char **)argv)) {
+			perror(argv[0]);
+			exit(-1);
+		}
+	}
+
+	while (wait(&status) >= 0)
+		;
+
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	t1 = rdclock();
+
+	walltime_nsecs = t1 - t0;
+
+	fflush(stdout);
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
+
+	for (i = 1; i < argc; i++)
+		fprintf(stderr, " %s", argv[i]);
+
+	fprintf(stderr, "\':\n");
+	fprintf(stderr, "\n");
+
+	for (counter = 0; counter < nr_counters; counter++)
+		read_counter(counter);
+
+	for (counter = 0; counter < nr_counters; counter++)
+		print_counter(counter);
+
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
+			(double)(t1-t0)/1e6);
+	fprintf(stderr, "\n");
+
+	return 0;
+}
+
+static void skip_signal(int signo)
+{
+}
+
+static const char * const stat_usage[] = {
+	"perf stat [<options>] <command>",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
+	OPT_BOOLEAN('i', "inherit", &inherit,
+		    "child tasks inherit counters"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "stat events on existing pid"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('S', "scale", &scale,
+			    "scale/normalize counters"),
+	OPT_END()
+};
+
+int cmd_stat(int argc, const char **argv, const char *prefix)
+{
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	memcpy(attrs, default_attrs, sizeof(attrs));
+
+	argc = parse_options(argc, argv, options, stat_usage, 0);
+	if (!argc)
+		usage_with_options(stat_usage, options);
+
+	if (!nr_counters)
+		nr_counters = 8;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	/*
+	 * We dont want to block the signals - that would cause
+	 * child tasks to inherit that and Ctrl-C would not work.
+	 * What we want is for Ctrl-C to work in the exec()-ed
+	 * task, but being ignored by perf stat itself:
+	 */
+	signal(SIGINT,  skip_signal);
+	signal(SIGALRM, skip_signal);
+	signal(SIGABRT, skip_signal);
+
+	return do_perfstat(argc, argv);
+}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
new file mode 100644
index 0000000..f2e7312
--- /dev/null
+++ b/tools/perf/builtin-top.c
@@ -0,0 +1,692 @@
+/*
+ * builtin-top.c
+ *
+ * Builtin top command: Display a continuously updated profile of
+ * any workload, CPU or specific PID.
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/symbol.h"
+#include "util/color.h"
+#include "util/util.h"
+#include "util/rbtree.h"
+#include "util/parse-options.h"
+#include "util/parse-events.h"
+
+#include <assert.h>
+#include <fcntl.h>
+
+#include <stdio.h>
+
+#include <errno.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int			system_wide			=  0;
+
+static int			default_interval		= 100000;
+
+static __u64			count_filter			=  5;
+static int			print_entries			= 15;
+
+static int			target_pid			= -1;
+static int			profile_cpu			= -1;
+static int			nr_cpus				=  0;
+static unsigned int		realtime_prio			=  0;
+static int			group				=  0;
+static unsigned int		page_size;
+static unsigned int		mmap_pages			= 16;
+static int			freq				=  0;
+
+static char			*sym_filter;
+static unsigned long		filter_start;
+static unsigned long		filter_end;
+
+static int			delay_secs			=  2;
+static int			zero;
+static int			dump_symtab;
+
+/*
+ * Symbols
+ */
+
+static uint64_t			min_ip;
+static uint64_t			max_ip = -1ll;
+
+struct sym_entry {
+	struct rb_node		rb_node;
+	struct list_head	node;
+	unsigned long		count[MAX_COUNTERS];
+	unsigned long		snap_count;
+	double			weight;
+	int			skip;
+};
+
+struct sym_entry		*sym_filter_entry;
+
+struct dso			*kernel_dso;
+
+/*
+ * Symbols will be added here in record_ip and will get out
+ * after decayed.
+ */
+static LIST_HEAD(active_symbols);
+static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Ordering weight: count-1 * count-2 * ... / count-n
+ */
+static double sym_weight(const struct sym_entry *sym)
+{
+	double weight = sym->snap_count;
+	int counter;
+
+	for (counter = 1; counter < nr_counters-1; counter++)
+		weight *= sym->count[counter];
+
+	weight /= (sym->count[counter] + 1);
+
+	return weight;
+}
+
+static long			samples;
+static long			userspace_samples;
+static const char		CONSOLE_CLEAR[] = "[H[2J";
+
+static void __list_insert_active_sym(struct sym_entry *syme)
+{
+	list_add(&syme->node, &active_symbols);
+}
+
+static void list_remove_active_sym(struct sym_entry *syme)
+{
+	pthread_mutex_lock(&active_symbols_lock);
+	list_del_init(&syme->node);
+	pthread_mutex_unlock(&active_symbols_lock);
+}
+
+static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
+{
+	struct rb_node **p = &tree->rb_node;
+	struct rb_node *parent = NULL;
+	struct sym_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct sym_entry, rb_node);
+
+		if (se->weight > iter->weight)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&se->rb_node, parent, p);
+	rb_insert_color(&se->rb_node, tree);
+}
+
+static void print_sym_table(void)
+{
+	int printed = 0, j;
+	int counter;
+	float samples_per_sec = samples/delay_secs;
+	float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
+	float sum_ksamples = 0.0;
+	struct sym_entry *syme, *n;
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *nd;
+
+	samples = userspace_samples = 0;
+
+	/* Sort the active symbols */
+	pthread_mutex_lock(&active_symbols_lock);
+	syme = list_entry(active_symbols.next, struct sym_entry, node);
+	pthread_mutex_unlock(&active_symbols_lock);
+
+	list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+		syme->snap_count = syme->count[0];
+		if (syme->snap_count != 0) {
+			syme->weight = sym_weight(syme);
+			rb_insert_active_sym(&tmp, syme);
+			sum_ksamples += syme->snap_count;
+
+			for (j = 0; j < nr_counters; j++)
+				syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
+		} else
+			list_remove_active_sym(syme);
+	}
+
+	puts(CONSOLE_CLEAR);
+
+	printf(
+"------------------------------------------------------------------------------\n");
+	printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
+		samples_per_sec,
+		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
+
+	if (nr_counters == 1) {
+		printf("%Ld", attrs[0].sample_period);
+		if (freq)
+			printf("Hz ");
+		else
+			printf(" ");
+	}
+
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (counter)
+			printf("/");
+
+		printf("%s", event_name(counter));
+	}
+
+	printf( "], ");
+
+	if (target_pid != -1)
+		printf(" (target_pid: %d", target_pid);
+	else
+		printf(" (all");
+
+	if (profile_cpu != -1)
+		printf(", cpu: %d)\n", profile_cpu);
+	else {
+		if (target_pid != -1)
+			printf(")\n");
+		else
+			printf(", %d CPUs)\n", nr_cpus);
+	}
+
+	printf("------------------------------------------------------------------------------\n\n");
+
+	if (nr_counters == 1)
+		printf("             samples    pcnt");
+	else
+		printf("  weight     samples    pcnt");
+
+	printf("         RIP          kernel function\n"
+	       	       "  ______     _______   _____   ________________   _______________\n\n"
+	);
+
+	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
+		struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
+		struct symbol *sym = (struct symbol *)(syme + 1);
+		char *color = PERF_COLOR_NORMAL;
+		double pcnt;
+
+		if (++printed > print_entries || syme->snap_count < count_filter)
+			continue;
+
+		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
+					 sum_ksamples));
+
+		/*
+		 * We color high-overhead entries in red, low-overhead
+		 * entries in green - and keep the middle ground normal:
+		 */
+		if (pcnt >= 5.0)
+			color = PERF_COLOR_RED;
+		if (pcnt < 0.5)
+			color = PERF_COLOR_GREEN;
+
+		if (nr_counters == 1)
+			printf("%20.2f - ", syme->weight);
+		else
+			printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
+
+		color_fprintf(stdout, color, "%4.1f%%", pcnt);
+		printf(" - %016llx : %s\n", sym->start, sym->name);
+	}
+}
+
+static void *display_thread(void *arg)
+{
+	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+	int delay_msecs = delay_secs * 1000;
+
+	printf("PerfTop refresh period: %d seconds\n", delay_secs);
+
+	do {
+		print_sym_table();
+	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
+
+	printf("key pressed - exiting.\n");
+	exit(0);
+
+	return NULL;
+}
+
+static int symbol_filter(struct dso *self, struct symbol *sym)
+{
+	static int filter_match;
+	struct sym_entry *syme;
+	const char *name = sym->name;
+
+	if (!strcmp(name, "_text") ||
+	    !strcmp(name, "_etext") ||
+	    !strcmp(name, "_sinittext") ||
+	    !strncmp("init_module", name, 11) ||
+	    !strncmp("cleanup_module", name, 14) ||
+	    strstr(name, "_text_start") ||
+	    strstr(name, "_text_end"))
+		return 1;
+
+	syme = dso__sym_priv(self, sym);
+	/* Tag samples to be skipped. */
+	if (!strcmp("default_idle", name) ||
+	    !strcmp("cpu_idle", name) ||
+	    !strcmp("enter_idle", name) ||
+	    !strcmp("exit_idle", name) ||
+	    !strcmp("mwait_idle", name))
+		syme->skip = 1;
+
+	if (filter_match == 1) {
+		filter_end = sym->start;
+		filter_match = -1;
+		if (filter_end - filter_start > 10000) {
+			fprintf(stderr,
+				"hm, too large filter symbol <%s> - skipping.\n",
+				sym_filter);
+			fprintf(stderr, "symbol filter start: %016lx\n",
+				filter_start);
+			fprintf(stderr, "                end: %016lx\n",
+				filter_end);
+			filter_end = filter_start = 0;
+			sym_filter = NULL;
+			sleep(1);
+		}
+	}
+
+	if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
+		filter_match = 1;
+		filter_start = sym->start;
+	}
+
+
+	return 0;
+}
+
+static int parse_symbols(void)
+{
+	struct rb_node *node;
+	struct symbol  *sym;
+
+	kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
+	if (kernel_dso == NULL)
+		return -1;
+
+	if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0)
+		goto out_delete_dso;
+
+	node = rb_first(&kernel_dso->syms);
+	sym = rb_entry(node, struct symbol, rb_node);
+	min_ip = sym->start;
+
+	node = rb_last(&kernel_dso->syms);
+	sym = rb_entry(node, struct symbol, rb_node);
+	max_ip = sym->end;
+
+	if (dump_symtab)
+		dso__fprintf(kernel_dso, stderr);
+
+	return 0;
+
+out_delete_dso:
+	dso__delete(kernel_dso);
+	kernel_dso = NULL;
+	return -1;
+}
+
+#define TRACE_COUNT     3
+
+/*
+ * Binary search in the histogram table and record the hit:
+ */
+static void record_ip(uint64_t ip, int counter)
+{
+	struct symbol *sym = dso__find_symbol(kernel_dso, ip);
+
+	if (sym != NULL) {
+		struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
+
+		if (!syme->skip) {
+			syme->count[counter]++;
+			pthread_mutex_lock(&active_symbols_lock);
+			if (list_empty(&syme->node) || !syme->node.next)
+				__list_insert_active_sym(syme);
+			pthread_mutex_unlock(&active_symbols_lock);
+			return;
+		}
+	}
+
+	samples--;
+}
+
+static void process_event(uint64_t ip, int counter)
+{
+	samples++;
+
+	if (ip < min_ip || ip > max_ip) {
+		userspace_samples++;
+		return;
+	}
+
+	record_ip(ip, counter);
+}
+
+struct mmap_data {
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
+};
+
+static unsigned int mmap_read_head(struct mmap_data *md)
+{
+	struct perf_counter_mmap_page *pc = md->base;
+	int head;
+
+	head = pc->data_head;
+	rmb();
+
+	return head;
+}
+
+struct timeval last_read, this_read;
+
+static void mmap_read(struct mmap_data *md)
+{
+	unsigned int head = mmap_read_head(md);
+	unsigned int old = md->prev;
+	unsigned char *data = md->base + page_size;
+	int diff;
+
+	gettimeofday(&this_read, NULL);
+
+	/*
+	 * If we're further behind than half the buffer, there's a chance
+	 * the writer will bite our tail and mess up the samples under us.
+	 *
+	 * If we somehow ended up ahead of the head, we got messed up.
+	 *
+	 * In either case, truncate and restart at head.
+	 */
+	diff = head - old;
+	if (diff > md->mask / 2 || diff < 0) {
+		struct timeval iv;
+		unsigned long msecs;
+
+		timersub(&this_read, &last_read, &iv);
+		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+		fprintf(stderr, "WARNING: failed to keep up with mmap data."
+				"  Last read %lu msecs ago.\n", msecs);
+
+		/*
+		 * head points to a known good entry, start there.
+		 */
+		old = head;
+	}
+
+	last_read = this_read;
+
+	for (; old != head;) {
+		struct ip_event {
+			struct perf_event_header header;
+			__u64 ip;
+			__u32 pid, target_pid;
+		};
+		struct mmap_event {
+			struct perf_event_header header;
+			__u32 pid, target_pid;
+			__u64 start;
+			__u64 len;
+			__u64 pgoff;
+			char filename[PATH_MAX];
+		};
+
+		typedef union event_union {
+			struct perf_event_header header;
+			struct ip_event ip;
+			struct mmap_event mmap;
+		} event_t;
+
+		event_t *event = (event_t *)&data[old & md->mask];
+
+		event_t event_copy;
+
+		size_t size = event->header.size;
+
+		/*
+		 * Event straddles the mmap boundary -- header should always
+		 * be inside due to u64 alignment of output.
+		 */
+		if ((old & md->mask) + size != ((old + size) & md->mask)) {
+			unsigned int offset = old;
+			unsigned int len = min(sizeof(*event), size), cpy;
+			void *dst = &event_copy;
+
+			do {
+				cpy = min(md->mask + 1 - (offset & md->mask), len);
+				memcpy(dst, &data[offset & md->mask], cpy);
+				offset += cpy;
+				dst += cpy;
+				len -= cpy;
+			} while (len);
+
+			event = &event_copy;
+		}
+
+		old += size;
+
+		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
+			if (event->header.type & PERF_SAMPLE_IP)
+				process_event(event->ip.ip, md->counter);
+		}
+	}
+
+	md->prev = old;
+}
+
+static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
+static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
+static int __cmd_top(void)
+{
+	struct perf_counter_attr *attr;
+	pthread_t thread;
+	int i, counter, group_fd, nr_poll = 0;
+	unsigned int cpu;
+	int ret;
+
+	for (i = 0; i < nr_cpus; i++) {
+		group_fd = -1;
+		for (counter = 0; counter < nr_counters; counter++) {
+
+			cpu	= profile_cpu;
+			if (target_pid == -1 && profile_cpu == -1)
+				cpu = i;
+
+			attr = attrs + counter;
+
+			attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+			attr->freq		= freq;
+
+			fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
+			if (fd[i][counter] < 0) {
+				int err = errno;
+
+				error("syscall returned with %d (%s)\n",
+					fd[i][counter], strerror(err));
+				if (err == EPERM)
+					printf("Are you root?\n");
+				exit(-1);
+			}
+			assert(fd[i][counter] >= 0);
+			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+
+			/*
+			 * First counter acts as the group leader:
+			 */
+			if (group && group_fd == -1)
+				group_fd = fd[i][counter];
+
+			event_array[nr_poll].fd = fd[i][counter];
+			event_array[nr_poll].events = POLLIN;
+			nr_poll++;
+
+			mmap_array[i][counter].counter = counter;
+			mmap_array[i][counter].prev = 0;
+			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+					PROT_READ, MAP_SHARED, fd[i][counter], 0);
+			if (mmap_array[i][counter].base == MAP_FAILED)
+				die("failed to mmap with %d (%s)\n", errno, strerror(errno));
+		}
+	}
+
+	if (pthread_create(&thread, NULL, display_thread, NULL)) {
+		printf("Could not create display thread.\n");
+		exit(-1);
+	}
+
+	if (realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			printf("Could not set realtime priority.\n");
+			exit(-1);
+		}
+	}
+
+	while (1) {
+		int hits = samples;
+
+		for (i = 0; i < nr_cpus; i++) {
+			for (counter = 0; counter < nr_counters; counter++)
+				mmap_read(&mmap_array[i][counter]);
+		}
+
+		if (hits == samples)
+			ret = poll(event_array, nr_poll, 100);
+	}
+
+	return 0;
+}
+
+static const char * const top_usage[] = {
+	"perf top [<options>]",
+	NULL
+};
+
+static const struct option options[] = {
+	OPT_CALLBACK('e', "event", NULL, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events),
+	OPT_INTEGER('c', "count", &default_interval,
+		    "event period to sample"),
+	OPT_INTEGER('p', "pid", &target_pid,
+		    "profile events on existing pid"),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_INTEGER('C', "CPU", &profile_cpu,
+		    "CPU to profile on"),
+	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
+		    "number of mmap data pages"),
+	OPT_INTEGER('r', "realtime", &realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_INTEGER('d', "delay", &delay_secs,
+		    "number of seconds to delay between refreshes"),
+	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
+			    "dump the symbol table used for profiling"),
+	OPT_INTEGER('f', "count-filter", &count_filter,
+		    "only display functions with more events than this"),
+	OPT_BOOLEAN('g', "group", &group,
+			    "put the counters into a counter group"),
+	OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
+		    "only display symbols matchig this pattern"),
+	OPT_BOOLEAN('z', "zero", &group,
+		    "zero history across updates"),
+	OPT_INTEGER('F', "freq", &freq,
+		    "profile at this frequency"),
+	OPT_INTEGER('E', "entries", &print_entries,
+		    "display this many functions"),
+	OPT_END()
+};
+
+int cmd_top(int argc, const char **argv, const char *prefix)
+{
+	int counter;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	argc = parse_options(argc, argv, options, top_usage, 0);
+	if (argc)
+		usage_with_options(top_usage, options);
+
+	if (freq) {
+		default_interval = freq;
+		freq = 1;
+	}
+
+	/* CPU and PID are mutually exclusive */
+	if (target_pid != -1 && profile_cpu != -1) {
+		printf("WARNING: PID switch overriding CPU\n");
+		sleep(1);
+		profile_cpu = -1;
+	}
+
+	if (!nr_counters)
+		nr_counters = 1;
+
+	if (delay_secs < 1)
+		delay_secs = 1;
+
+	parse_symbols();
+
+	/*
+	 * Fill in the ones not specifically initialized via -c:
+	 */
+	for (counter = 0; counter < nr_counters; counter++) {
+		if (attrs[counter].sample_period)
+			continue;
+
+		attrs[counter].sample_period = default_interval;
+	}
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	assert(nr_cpus <= MAX_NR_CPUS);
+	assert(nr_cpus >= 0);
+
+	if (target_pid != -1 || profile_cpu != -1)
+		nr_cpus = 1;
+
+	return __cmd_top();
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
new file mode 100644
index 0000000..51d1682
--- /dev/null
+++ b/tools/perf/builtin.h
@@ -0,0 +1,26 @@
+#ifndef BUILTIN_H
+#define BUILTIN_H
+
+#include "util/util.h"
+#include "util/strbuf.h"
+
+extern const char perf_version_string[];
+extern const char perf_usage_string[];
+extern const char perf_more_info_string[];
+
+extern void list_common_cmds_help(void);
+extern const char *help_unknown_cmd(const char *cmd);
+extern void prune_packed_objects(int);
+extern int read_line_with_nul(char *buf, int size, FILE *file);
+extern int check_pager_config(const char *cmd);
+
+extern int cmd_annotate(int argc, const char **argv, const char *prefix);
+extern int cmd_help(int argc, const char **argv, const char *prefix);
+extern int cmd_record(int argc, const char **argv, const char *prefix);
+extern int cmd_report(int argc, const char **argv, const char *prefix);
+extern int cmd_stat(int argc, const char **argv, const char *prefix);
+extern int cmd_top(int argc, const char **argv, const char *prefix);
+extern int cmd_version(int argc, const char **argv, const char *prefix);
+extern int cmd_list(int argc, const char **argv, const char *prefix);
+
+#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
new file mode 100644
index 0000000..eebce30
--- /dev/null
+++ b/tools/perf/command-list.txt
@@ -0,0 +1,10 @@
+#
+# List of known perf commands.
+# command name			category [deprecated] [common]
+#
+perf-annotate			mainporcelain common
+perf-list			mainporcelain common
+perf-record			mainporcelain common
+perf-report			mainporcelain common
+perf-stat			mainporcelain common
+perf-top			mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
new file mode 100644
index 0000000..d325076
--- /dev/null
+++ b/tools/perf/design.txt
@@ -0,0 +1,442 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those.  It
+provides "virtual" 64-bit counters, regardless of the width of the
+underlying hardware counters.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the perf_counter_open()
+system call:
+
+   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+			     pid_t pid, int cpu, int group_fd,
+			     unsigned long flags);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'perf_counter_hw_event' is:
+
+struct perf_counter_hw_event {
+        /*
+         * The MSB of the config word signifies if the rest contains cpu
+         * specific (raw) counter configuration data, if unset, the next
+         * 7 bits are an event type and the rest of the bits are the event
+         * identifier.
+         */
+        __u64                   config;
+
+        __u64                   irq_period;
+        __u32                   record_type;
+        __u32                   read_format;
+
+        __u64                   disabled       :  1, /* off by default        */
+                                inherit        :  1, /* children inherit it   */
+                                pinned         :  1, /* must always be on PMU */
+                                exclusive      :  1, /* only group on PMU     */
+                                exclude_user   :  1, /* don't count user      */
+                                exclude_kernel :  1, /* ditto kernel          */
+                                exclude_hv     :  1, /* ditto hypervisor      */
+                                exclude_idle   :  1, /* don't count when idle */
+                                mmap           :  1, /* include mmap data     */
+                                munmap         :  1, /* include munmap data   */
+                                comm           :  1, /* include comm data     */
+
+                                __reserved_1   : 52;
+
+        __u32                   extra_config_len;
+        __u32                   wakeup_events;  /* wakeup every n events */
+
+        __u64                   __reserved_2;
+        __u64                   __reserved_3;
+};
+
+The 'config' field specifies what the counter should count.  It
+is divided into 3 bit-fields:
+
+raw_type: 1 bit   (most significant bit)	0x8000_0000_0000_0000
+type:	  7 bits  (next most significant)	0x7f00_0000_0000_0000
+event_id: 56 bits (least significant)		0x00ff_ffff_ffff_ffff
+
+If 'raw_type' is 1, then the counter will count a hardware event
+specified by the remaining 63 bits of event_config.  The encoding is
+machine-specific.
+
+If 'raw_type' is 0, then the 'type' field says what kind of counter
+this is, with the following encoding:
+
+enum perf_event_types {
+	PERF_TYPE_HARDWARE		= 0,
+	PERF_TYPE_SOFTWARE		= 1,
+	PERF_TYPE_TRACEPOINT		= 2,
+};
+
+A counter of PERF_TYPE_HARDWARE will count the hardware event
+specified by 'event_id':
+
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_ids {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_CPU_CYCLES		= 0,
+	PERF_COUNT_INSTRUCTIONS		= 1,
+	PERF_COUNT_CACHE_REFERENCES	= 2,
+	PERF_COUNT_CACHE_MISSES		= 3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_BRANCH_MISSES	= 5,
+	PERF_COUNT_BUS_CYCLES		= 6,
+};
+
+These are standardized types of events that work relatively uniformly
+on all CPUs that implement Performance Counters support under Linux,
+although there may be variations (e.g., different CPUs might count
+cache references and misses at different levels of the cache hierarchy).
+If a CPU is not able to count the selected event, then the system call
+will return -EINVAL.
+
+More hw_event_types are supported as well, but they are CPU-specific
+and accessed as raw events.  For example, to count "External bus
+cycles while bus lock signal asserted" events on Intel Core CPUs, pass
+in a 0x4064 event_id value and set hw_event.raw_type to 1.
+
+A counter of type PERF_TYPE_SOFTWARE will count one of the available
+software events, selected by 'event_id':
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum sw_event_ids {
+	PERF_COUNT_CPU_CLOCK		= 0,
+	PERF_COUNT_TASK_CLOCK		= 1,
+	PERF_COUNT_PAGE_FAULTS		= 2,
+	PERF_COUNT_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
+};
+
+Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
+tracer is available, and event_id values can be obtained from
+/debug/tracing/events/*/*/id
+
+
+Counters come in two flavours: counting counters and sampling
+counters.  A "counting" counter is one that is used for counting the
+number of events that occur, and is characterised by having
+irq_period = 0.
+
+
+A read() on a counter returns the current value of the counter and possible
+additional values as specified by 'read_format', each value is a u64 (8 bytes)
+in size.
+
+/*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_counter_read_format {
+        PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
+        PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
+};
+
+Using these additional values one can establish the overcommit ratio for a
+particular counter allowing one to take the round-robin scheduling effect
+into account.
+
+
+A "sampling" counter is one that is set up to generate an interrupt
+every N events, where N is given by 'irq_period'.  A sampling counter
+has irq_period > 0. The record_type controls what data is recorded on each
+interrupt:
+
+/*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_record_format {
+        PERF_RECORD_IP          = 1U << 0,
+        PERF_RECORD_TID         = 1U << 1,
+        PERF_RECORD_TIME        = 1U << 2,
+        PERF_RECORD_ADDR        = 1U << 3,
+        PERF_RECORD_GROUP       = 1U << 4,
+        PERF_RECORD_CALLCHAIN   = 1U << 5,
+};
+
+Such (and other) events will be recorded in a ring-buffer, which is
+available to user-space using mmap() (see below).
+
+The 'disabled' bit specifies whether the counter starts out disabled
+or enabled.  If it is initially disabled, it can be enabled by ioctl
+or prctl (see below).
+
+The 'inherit' bit, if set, specifies that this counter should count
+events on descendant tasks as well as the task specified.  This only
+applies to new descendents, not to any existing descendents at the
+time the counter is created (nor to any new descendents of existing
+descendents).
+
+The 'pinned' bit, if set, specifies that the counter should always be
+on the CPU if at all possible.  It only applies to hardware counters
+and only to group leaders.  If a pinned counter cannot be put onto the
+CPU (e.g. because there are not enough hardware counters or because of
+a conflict with some other event), then the counter goes into an
+'error' state, where reads return end-of-file (i.e. read() returns 0)
+until the counter is subsequently enabled or disabled.
+
+The 'exclusive' bit, if set, specifies that when this counter's group
+is on the CPU, it should be the only group using the CPU's counters.
+In future, this will allow sophisticated monitoring programs to supply
+extra configuration information via 'extra_config_len' to exploit
+advanced features of the CPU's Performance Monitor Unit (PMU) that are
+not otherwise accessible and that might disrupt other hardware
+counters.
+
+The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
+way to request that counting of events be restricted to times when the
+CPU is in user, kernel and/or hypervisor mode.
+
+The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
+operations, these can be used to relate userspace IP addresses to actual
+code, even after the mapping (or even the whole process) is gone,
+these events are recorded in the ring-buffer (see below).
+
+The 'comm' bit allows tracking of process comm data on process creation.
+This too is recorded in the ring-buffer (see below).
+
+The 'pid' parameter to the perf_counter_open() system call allows the
+counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
+The 'flags' parameter is currently unused and must be zero.
+
+The 'group_fd' parameter allows counter "groups" to be set up.  A
+counter group has one counter which is the group "leader".  The leader
+is created first, with group_fd = -1 in the perf_counter_open call
+that creates it.  The rest of the group members are created
+subsequently, with group_fd giving the fd of the group leader.
+(A single counter on its own is created with group_fd = -1 and is
+considered to be a group with only 1 member.)
+
+A counter group is scheduled onto the CPU as a unit, that is, it will
+only be put onto the CPU if all of the counters in the group can be
+put onto the CPU.  This means that the values of the member counters
+can be meaningfully compared, added, divided (to get ratios), etc.,
+with each other, since they have counted events for the same set of
+executed instructions.
+
+
+Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
+tracking are logged into a ring-buffer. This ring-buffer is created and
+accessed through mmap().
+
+The mmap size should be 1+2^n pages, where the first page is a meta-data page
+(struct perf_counter_mmap_page) that contains various bits of information such
+as where the ring-buffer head is.
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+        __u32   version;                /* version number of this structure */
+        __u32   compat_version;         /* lowest version this is compat with */
+
+        /*
+         * Bits needed to read the hw counters in user-space.
+         *
+         *   u32 seq;
+         *   s64 count;
+         *
+         *   do {
+         *     seq = pc->lock;
+         *
+         *     barrier()
+         *     if (pc->index) {
+         *       count = pmc_read(pc->index - 1);
+         *       count += pc->offset;
+         *     } else
+         *       goto regular_read;
+         *
+         *     barrier();
+         *   } while (pc->lock != seq);
+         *
+         * NOTE: for obvious reason this only works on self-monitoring
+         *       processes.
+         */
+        __u32   lock;                   /* seqlock for synchronization */
+        __u32   index;                  /* hardware counter identifier */
+        __s64   offset;                 /* add to hardware counter value */
+
+        /*
+         * Control data for the mmap() data buffer.
+         *
+         * User-space reading this value should issue an rmb(), on SMP capable
+         * platforms, after reading this value -- see perf_counter_wakeup().
+         */
+        __u32   data_head;              /* head in the data section */
+};
+
+NOTE: the hw-counter userspace bits are arch specific and are currently only
+      implemented on powerpc.
+
+The following 2^n pages are the ring-buffer which contains events of the form:
+
+#define PERF_EVENT_MISC_KERNEL          (1 << 0)
+#define PERF_EVENT_MISC_USER            (1 << 1)
+#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
+
+struct perf_event_header {
+        __u32   type;
+        __u16   misc;
+        __u16   size;
+};
+
+enum perf_event_type {
+
+        /*
+         * The MMAP events record the PROT_EXEC mappings so that we can
+         * correlate userspace IPs to code. They have the following structure:
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      u64                             addr;
+         *      u64                             len;
+         *      u64                             pgoff;
+         *      char                            filename[];
+         * };
+         */
+        PERF_EVENT_MMAP                 = 1,
+        PERF_EVENT_MUNMAP               = 2,
+
+        /*
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      char                            comm[];
+         * };
+         */
+        PERF_EVENT_COMM                 = 3,
+
+        /*
+         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+         * will be PERF_RECORD_*
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      { u64                   ip;       } && PERF_RECORD_IP
+         *      { u32                   pid, tid; } && PERF_RECORD_TID
+         *      { u64                   time;     } && PERF_RECORD_TIME
+         *      { u64                   addr;     } && PERF_RECORD_ADDR
+         *
+         *      { u64                   nr;
+         *        { u64 event, val; }   cnt[nr];  } && PERF_RECORD_GROUP
+         *
+         *      { u16                   nr,
+         *                              hv,
+         *                              kernel,
+         *                              user;
+         *        u64                   ips[nr];  } && PERF_RECORD_CALLCHAIN
+         * };
+         */
+};
+
+NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
+      on x86.
+
+Notification of new events is possible through poll()/select()/epoll() and
+fcntl() managing signals.
+
+Normally a notification is generated for every page filled, however one can
+additionally set perf_counter_hw_event.wakeup_events to generate one every
+so many counter overflow events.
+
+Future work will include a splice() interface to the ring-buffer.
+
+
+Counters can be enabled and disabled in two ways: via ioctl and via
+prctl.  When a counter is disabled, it doesn't count or generate
+events but does continue to exist and maintain its count value.
+
+An individual counter or counter group can be enabled with
+
+	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
+
+or disabled with
+
+	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
+
+Enabling or disabling the leader of a group enables or disables the
+whole group; that is, while the group leader is disabled, none of the
+counters in the group will count.  Enabling or disabling a member of a
+group other than the leader only affects that counter - disabling an
+non-leader stops that counter from counting but doesn't affect any
+other counter.
+
+Additionally, non-inherited overflow counters can use
+
+	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
+
+to enable a counter for 'nr' events, after which it gets disabled again.
+
+A process can enable or disable all the counter groups that are
+attached to it, using prctl:
+
+	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+
+This applies to all counters on the current process, whether created
+by this process or by another, and doesn't affect any counters that
+this process has created on other processes.  It only enables or
+disables the group leaders, not any other members in the groups.
+
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
new file mode 100644
index 0000000..4eb7259
--- /dev/null
+++ b/tools/perf/perf.c
@@ -0,0 +1,428 @@
+/*
+ * perf.c
+ *
+ * Performance analysis utility.
+ *
+ * This is the main hub from which the sub-commands (perf stat,
+ * perf top, perf record, perf report, etc.) are started.
+ */
+#include "builtin.h"
+
+#include "util/exec_cmd.h"
+#include "util/cache.h"
+#include "util/quote.h"
+#include "util/run-command.h"
+
+const char perf_usage_string[] =
+	"perf [--version] [--help] COMMAND [ARGS]";
+
+const char perf_more_info_string[] =
+	"See 'perf help COMMAND' for more information on a specific command.";
+
+static int use_pager = -1;
+struct pager_config {
+	const char *cmd;
+	int val;
+};
+
+static int pager_command_config(const char *var, const char *value, void *data)
+{
+	struct pager_config *c = data;
+	if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd))
+		c->val = perf_config_bool(var, value);
+	return 0;
+}
+
+/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
+int check_pager_config(const char *cmd)
+{
+	struct pager_config c;
+	c.cmd = cmd;
+	c.val = -1;
+	perf_config(pager_command_config, &c);
+	return c.val;
+}
+
+static void commit_pager_choice(void) {
+	switch (use_pager) {
+	case 0:
+		setenv("PERF_PAGER", "cat", 1);
+		break;
+	case 1:
+		/* setup_pager(); */
+		break;
+	default:
+		break;
+	}
+}
+
+static int handle_options(const char*** argv, int* argc, int* envchanged)
+{
+	int handled = 0;
+
+	while (*argc > 0) {
+		const char *cmd = (*argv)[0];
+		if (cmd[0] != '-')
+			break;
+
+		/*
+		 * For legacy reasons, the "version" and "help"
+		 * commands can be written with "--" prepended
+		 * to make them look like flags.
+		 */
+		if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
+			break;
+
+		/*
+		 * Check remaining flags.
+		 */
+		if (!prefixcmp(cmd, "--exec-path")) {
+			cmd += 11;
+			if (*cmd == '=')
+				perf_set_argv_exec_path(cmd + 1);
+			else {
+				puts(perf_exec_path());
+				exit(0);
+			}
+		} else if (!strcmp(cmd, "--html-path")) {
+			puts(system_path(PERF_HTML_PATH));
+			exit(0);
+		} else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
+			use_pager = 1;
+		} else if (!strcmp(cmd, "--no-pager")) {
+			use_pager = 0;
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--perf-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --perf-dir.\n" );
+				usage(perf_usage_string);
+			}
+			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+			handled++;
+		} else if (!prefixcmp(cmd, "--perf-dir=")) {
+			setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1);
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--work-tree")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --work-tree.\n" );
+				usage(perf_usage_string);
+			}
+			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+		} else if (!prefixcmp(cmd, "--work-tree=")) {
+			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1);
+			if (envchanged)
+				*envchanged = 1;
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", cmd);
+			usage(perf_usage_string);
+		}
+
+		(*argv)++;
+		(*argc)--;
+		handled++;
+	}
+	return handled;
+}
+
+static int handle_alias(int *argcp, const char ***argv)
+{
+	int envchanged = 0, ret = 0, saved_errno = errno;
+	int count, option_count;
+	const char** new_argv;
+	const char *alias_command;
+	char *alias_string;
+
+	alias_command = (*argv)[0];
+	alias_string = alias_lookup(alias_command);
+	if (alias_string) {
+		if (alias_string[0] == '!') {
+			if (*argcp > 1) {
+				struct strbuf buf;
+
+				strbuf_init(&buf, PATH_MAX);
+				strbuf_addstr(&buf, alias_string);
+				sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
+				free(alias_string);
+				alias_string = buf.buf;
+			}
+			ret = system(alias_string + 1);
+			if (ret >= 0 && WIFEXITED(ret) &&
+			    WEXITSTATUS(ret) != 127)
+				exit(WEXITSTATUS(ret));
+			die("Failed to run '%s' when expanding alias '%s'",
+			    alias_string + 1, alias_command);
+		}
+		count = split_cmdline(alias_string, &new_argv);
+		if (count < 0)
+			die("Bad alias.%s string", alias_command);
+		option_count = handle_options(&new_argv, &count, &envchanged);
+		if (envchanged)
+			die("alias '%s' changes environment variables\n"
+				 "You can use '!perf' in the alias to do this.",
+				 alias_command);
+		memmove(new_argv - option_count, new_argv,
+				count * sizeof(char *));
+		new_argv -= option_count;
+
+		if (count < 1)
+			die("empty alias for %s", alias_command);
+
+		if (!strcmp(alias_command, new_argv[0]))
+			die("recursive alias: %s", alias_command);
+
+		new_argv = realloc(new_argv, sizeof(char*) *
+				    (count + *argcp + 1));
+		/* insert after command name */
+		memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp);
+		new_argv[count+*argcp] = NULL;
+
+		*argv = new_argv;
+		*argcp += count - 1;
+
+		ret = 1;
+	}
+
+	errno = saved_errno;
+
+	return ret;
+}
+
+const char perf_version_string[] = PERF_VERSION;
+
+#define RUN_SETUP	(1<<0)
+#define USE_PAGER	(1<<1)
+/*
+ * require working tree to be present -- anything uses this needs
+ * RUN_SETUP for reading from the configuration file.
+ */
+#define NEED_WORK_TREE	(1<<2)
+
+struct cmd_struct {
+	const char *cmd;
+	int (*fn)(int, const char **, const char *);
+	int option;
+};
+
+static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
+{
+	int status;
+	struct stat st;
+	const char *prefix;
+
+	prefix = NULL;
+	if (p->option & RUN_SETUP)
+		prefix = NULL; /* setup_perf_directory(); */
+
+	if (use_pager == -1 && p->option & RUN_SETUP)
+		use_pager = check_pager_config(p->cmd);
+	if (use_pager == -1 && p->option & USE_PAGER)
+		use_pager = 1;
+	commit_pager_choice();
+
+	if (p->option & NEED_WORK_TREE)
+		/* setup_work_tree() */;
+
+	status = p->fn(argc, argv, prefix);
+	if (status)
+		return status & 0xff;
+
+	/* Somebody closed stdout? */
+	if (fstat(fileno(stdout), &st))
+		return 0;
+	/* Ignore write errors for pipes and sockets.. */
+	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
+		return 0;
+
+	/* Check for ENOSPC and EIO errors.. */
+	if (fflush(stdout))
+		die("write failure on standard output: %s", strerror(errno));
+	if (ferror(stdout))
+		die("unknown write failure on standard output");
+	if (fclose(stdout))
+		die("close failed on standard output: %s", strerror(errno));
+	return 0;
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+	const char *cmd = argv[0];
+	static struct cmd_struct commands[] = {
+		{ "help", cmd_help, 0 },
+		{ "list", cmd_list, 0 },
+		{ "record", cmd_record, 0 },
+		{ "report", cmd_report, 0 },
+		{ "stat", cmd_stat, 0 },
+		{ "top", cmd_top, 0 },
+		{ "annotate", cmd_annotate, 0 },
+		{ "version", cmd_version, 0 },
+	};
+	int i;
+	static const char ext[] = STRIP_EXTENSION;
+
+	if (sizeof(ext) > 1) {
+		i = strlen(argv[0]) - strlen(ext);
+		if (i > 0 && !strcmp(argv[0] + i, ext)) {
+			char *argv0 = strdup(argv[0]);
+			argv[0] = cmd = argv0;
+			argv0[i] = '\0';
+		}
+	}
+
+	/* Turn "perf cmd --help" into "perf help cmd" */
+	if (argc > 1 && !strcmp(argv[1], "--help")) {
+		argv[1] = argv[0];
+		argv[0] = cmd = "help";
+	}
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		struct cmd_struct *p = commands+i;
+		if (strcmp(p->cmd, cmd))
+			continue;
+		exit(run_builtin(p, argc, argv));
+	}
+}
+
+static void execv_dashed_external(const char **argv)
+{
+	struct strbuf cmd = STRBUF_INIT;
+	const char *tmp;
+	int status;
+
+	strbuf_addf(&cmd, "perf-%s", argv[0]);
+
+	/*
+	 * argv[0] must be the perf command, but the argv array
+	 * belongs to the caller, and may be reused in
+	 * subsequent loop iterations. Save argv[0] and
+	 * restore it on error.
+	 */
+	tmp = argv[0];
+	argv[0] = cmd.buf;
+
+	/*
+	 * if we fail because the command is not found, it is
+	 * OK to return. Otherwise, we just pass along the status code.
+	 */
+	status = run_command_v_opt(argv, 0);
+	if (status != -ERR_RUN_COMMAND_EXEC) {
+		if (IS_RUN_COMMAND_ERR(status))
+			die("unable to run '%s'", argv[0]);
+		exit(-status);
+	}
+	errno = ENOENT; /* as if we called execvp */
+
+	argv[0] = tmp;
+
+	strbuf_release(&cmd);
+}
+
+static int run_argv(int *argcp, const char ***argv)
+{
+	int done_alias = 0;
+
+	while (1) {
+		/* See if it's an internal command */
+		handle_internal_command(*argcp, *argv);
+
+		/* .. then try the external ones */
+		execv_dashed_external(*argv);
+
+		/* It could be an alias -- this works around the insanity
+		 * of overriding "perf log" with "perf show" by having
+		 * alias.log = show
+		 */
+		if (done_alias || !handle_alias(argcp, argv))
+			break;
+		done_alias = 1;
+	}
+
+	return done_alias;
+}
+
+
+int main(int argc, const char **argv)
+{
+	const char *cmd;
+
+	cmd = perf_extract_argv0_path(argv[0]);
+	if (!cmd)
+		cmd = "perf-help";
+
+	/*
+	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
+	 *
+	 *  - cannot take flags in between the "perf" and the "xxxx".
+	 *  - cannot execute it externally (since it would just do
+	 *    the same thing over again)
+	 *
+	 * So we just directly call the internal command handler, and
+	 * die if that one cannot handle it.
+	 */
+	if (!prefixcmp(cmd, "perf-")) {
+		cmd += 5;
+		argv[0] = cmd;
+		handle_internal_command(argc, argv);
+		die("cannot handle %s internally", cmd);
+	}
+
+	/* Look for flags.. */
+	argv++;
+	argc--;
+	handle_options(&argv, &argc, NULL);
+	commit_pager_choice();
+	if (argc > 0) {
+		if (!prefixcmp(argv[0], "--"))
+			argv[0] += 2;
+	} else {
+		/* The user didn't specify a command; give them help */
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n %s\n\n", perf_more_info_string);
+		exit(1);
+	}
+	cmd = argv[0];
+
+	/*
+	 * We use PATH to find perf commands, but we prepend some higher
+	 * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH
+	 * environment, and the $(perfexecdir) from the Makefile at build
+	 * time.
+	 */
+	setup_path();
+
+	while (1) {
+		static int done_help = 0;
+		static int was_alias = 0;
+
+		was_alias = run_argv(&argc, &argv);
+		if (errno != ENOENT)
+			break;
+
+		if (was_alias) {
+			fprintf(stderr, "Expansion of alias '%s' failed; "
+				"'%s' is not a perf-command\n",
+				cmd, argv[0]);
+			exit(1);
+		}
+		if (!done_help) {
+			cmd = argv[0] = help_unknown_cmd(cmd);
+			done_help = 1;
+		} else
+			break;
+	}
+
+	fprintf(stderr, "Failed to run command '%s': %s\n",
+		cmd, strerror(errno));
+
+	return 1;
+}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
new file mode 100644
index 0000000..af0a504
--- /dev/null
+++ b/tools/perf/perf.h
@@ -0,0 +1,67 @@
+#ifndef _PERF_PERF_H
+#define _PERF_PERF_H
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "../../arch/x86/include/asm/unistd.h"
+#define rmb()		asm volatile("lfence" ::: "memory")
+#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
+#endif
+
+#ifdef __powerpc__
+#include "../../arch/powerpc/include/asm/unistd.h"
+#define rmb()		asm volatile ("sync" ::: "memory")
+#define cpu_relax()	asm volatile ("" ::: "memory");
+#endif
+
+#include <time.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+
+#include "../../include/linux/perf_counter.h"
+
+/*
+ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * counters in the current task.
+ */
+#define PR_TASK_PERF_COUNTERS_DISABLE   31
+#define PR_TASK_PERF_COUNTERS_ENABLE    32
+
+#ifndef NSEC_PER_SEC
+# define NSEC_PER_SEC			1000000000ULL
+#endif
+
+static inline unsigned long long rdclock(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+/*
+ * Pick up some kernel type conventions:
+ */
+#define __user
+#define asmlinkage
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define min(x, y) ({				\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	(void) (&_min1 == &_min2);		\
+	_min1 < _min2 ? _min1 : _min2; })
+
+static inline int
+sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
+		      pid_t pid, int cpu, int group_fd,
+		      unsigned long flags)
+{
+	return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
+		       group_fd, flags);
+}
+
+#define MAX_COUNTERS			256
+#define MAX_NR_CPUS			256
+
+#endif
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
new file mode 100755
index 0000000..c561d15
--- /dev/null
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+GVF=PERF-VERSION-FILE
+DEF_VER=v0.0.1.PERF
+
+LF='
+'
+
+# First see if there is a version file (included in release tarballs),
+# then try git-describe, then default.
+if test -f version
+then
+	VN=$(cat version) || VN="$DEF_VER"
+elif test -d .git -o -f .git &&
+	VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
+	case "$VN" in
+	*$LF*) (exit 1) ;;
+	v[0-9]*)
+		git update-index -q --refresh
+		test -z "$(git diff-index --name-only HEAD --)" ||
+		VN="$VN-dirty" ;;
+	esac
+then
+	VN=$(echo "$VN" | sed -e 's/-/./g');
+else
+	VN="$DEF_VER"
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
+else
+	VC=unset
+fi
+test "$VN" = "$VC" || {
+	echo >&2 "PERF_VERSION = $VN"
+	echo "PERF_VERSION = $VN" >$GVF
+}
+
+
diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c
new file mode 100644
index 0000000..61d33b8
--- /dev/null
+++ b/tools/perf/util/abspath.c
@@ -0,0 +1,117 @@
+#include "cache.h"
+
+/*
+ * Do not use this for inspecting *tracked* content.  When path is a
+ * symlink to a directory, we do not want to say it is a directory when
+ * dealing with tracked content in the working tree.
+ */
+static int is_directory(const char *path)
+{
+	struct stat st;
+	return (!stat(path, &st) && S_ISDIR(st.st_mode));
+}
+
+/* We allow "recursive" symbolic links. Only within reason, though. */
+#define MAXDEPTH 5
+
+const char *make_absolute_path(const char *path)
+{
+	static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1];
+	char cwd[1024] = "";
+	int buf_index = 1, len;
+
+	int depth = MAXDEPTH;
+	char *last_elem = NULL;
+	struct stat st;
+
+	if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+		die ("Too long path: %.*s", 60, path);
+
+	while (depth--) {
+		if (!is_directory(buf)) {
+			char *last_slash = strrchr(buf, '/');
+			if (last_slash) {
+				*last_slash = '\0';
+				last_elem = xstrdup(last_slash + 1);
+			} else {
+				last_elem = xstrdup(buf);
+				*buf = '\0';
+			}
+		}
+
+		if (*buf) {
+			if (!*cwd && !getcwd(cwd, sizeof(cwd)))
+				die ("Could not get current working directory");
+
+			if (chdir(buf))
+				die ("Could not switch to '%s'", buf);
+		}
+		if (!getcwd(buf, PATH_MAX))
+			die ("Could not get current working directory");
+
+		if (last_elem) {
+			int len = strlen(buf);
+			if (len + strlen(last_elem) + 2 > PATH_MAX)
+				die ("Too long path name: '%s/%s'",
+						buf, last_elem);
+			buf[len] = '/';
+			strcpy(buf + len + 1, last_elem);
+			free(last_elem);
+			last_elem = NULL;
+		}
+
+		if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) {
+			len = readlink(buf, next_buf, PATH_MAX);
+			if (len < 0)
+				die ("Invalid symlink: %s", buf);
+			if (PATH_MAX <= len)
+				die("symbolic link too long: %s", buf);
+			next_buf[len] = '\0';
+			buf = next_buf;
+			buf_index = 1 - buf_index;
+			next_buf = bufs[buf_index];
+		} else
+			break;
+	}
+
+	if (*cwd && chdir(cwd))
+		die ("Could not change back to '%s'", cwd);
+
+	return buf;
+}
+
+static const char *get_pwd_cwd(void)
+{
+	static char cwd[PATH_MAX + 1];
+	char *pwd;
+	struct stat cwd_stat, pwd_stat;
+	if (getcwd(cwd, PATH_MAX) == NULL)
+		return NULL;
+	pwd = getenv("PWD");
+	if (pwd && strcmp(pwd, cwd)) {
+		stat(cwd, &cwd_stat);
+		if (!stat(pwd, &pwd_stat) &&
+		    pwd_stat.st_dev == cwd_stat.st_dev &&
+		    pwd_stat.st_ino == cwd_stat.st_ino) {
+			strlcpy(cwd, pwd, PATH_MAX);
+		}
+	}
+	return cwd;
+}
+
+const char *make_nonrelative_path(const char *path)
+{
+	static char buf[PATH_MAX + 1];
+
+	if (is_absolute_path(path)) {
+		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	} else {
+		const char *cwd = get_pwd_cwd();
+		if (!cwd)
+			die("Cannot determine the current working directory");
+		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+			die("Too long path: %.*s", 60, path);
+	}
+	return buf;
+}
diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c
new file mode 100644
index 0000000..9b3dd2b
--- /dev/null
+++ b/tools/perf/util/alias.c
@@ -0,0 +1,77 @@
+#include "cache.h"
+
+static const char *alias_key;
+static char *alias_val;
+
+static int alias_lookup_cb(const char *k, const char *v, void *cb)
+{
+	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
+		if (!v)
+			return config_error_nonbool(k);
+		alias_val = strdup(v);
+		return 0;
+	}
+	return 0;
+}
+
+char *alias_lookup(const char *alias)
+{
+	alias_key = alias;
+	alias_val = NULL;
+	perf_config(alias_lookup_cb, NULL);
+	return alias_val;
+}
+
+int split_cmdline(char *cmdline, const char ***argv)
+{
+	int src, dst, count = 0, size = 16;
+	char quoted = 0;
+
+	*argv = malloc(sizeof(char*) * size);
+
+	/* split alias_string */
+	(*argv)[count++] = cmdline;
+	for (src = dst = 0; cmdline[src];) {
+		char c = cmdline[src];
+		if (!quoted && isspace(c)) {
+			cmdline[dst++] = 0;
+			while (cmdline[++src]
+					&& isspace(cmdline[src]))
+				; /* skip */
+			if (count >= size) {
+				size += 16;
+				*argv = realloc(*argv, sizeof(char*) * size);
+			}
+			(*argv)[count++] = cmdline + dst;
+		} else if (!quoted && (c == '\'' || c == '"')) {
+			quoted = c;
+			src++;
+		} else if (c == quoted) {
+			quoted = 0;
+			src++;
+		} else {
+			if (c == '\\' && quoted != '\'') {
+				src++;
+				c = cmdline[src];
+				if (!c) {
+					free(*argv);
+					*argv = NULL;
+					return error("cmdline ends with \\");
+				}
+			}
+			cmdline[dst++] = c;
+			src++;
+		}
+	}
+
+	cmdline[dst] = 0;
+
+	if (quoted) {
+		free(*argv);
+		*argv = NULL;
+		return error("unclosed quote");
+	}
+
+	return count;
+}
+
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
new file mode 100644
index 0000000..393d614
--- /dev/null
+++ b/tools/perf/util/cache.h
@@ -0,0 +1,119 @@
+#ifndef CACHE_H
+#define CACHE_H
+
+#include "util.h"
+#include "strbuf.h"
+
+#define PERF_DIR_ENVIRONMENT "PERF_DIR"
+#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
+#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
+#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY"
+#define INDEX_ENVIRONMENT "PERF_INDEX_FILE"
+#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE"
+#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR"
+#define CONFIG_ENVIRONMENT "PERF_CONFIG"
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES"
+#define PERFATTRIBUTES_FILE ".perfattributes"
+#define INFOATTRIBUTES_FILE "info/attributes"
+#define ATTRIBUTE_MACRO_PREFIX "[attr]"
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+extern int perf_default_config(const char *, const char *, void *);
+extern int perf_config_from_file(config_fn_t fn, const char *, void *);
+extern int perf_config(config_fn_t fn, void *);
+extern int perf_parse_ulong(const char *, unsigned long *);
+extern int perf_config_int(const char *, const char *);
+extern unsigned long perf_config_ulong(const char *, const char *);
+extern int perf_config_bool_or_int(const char *, const char *, int *);
+extern int perf_config_bool(const char *, const char *);
+extern int perf_config_string(const char **, const char *, const char *);
+extern int perf_config_set(const char *, const char *);
+extern int perf_config_set_multivar(const char *, const char *, const char *, int);
+extern int perf_config_rename_section(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
+extern int check_repository_format_version(const char *var, const char *value, void *cb);
+extern int perf_config_system(void);
+extern int perf_config_global(void);
+extern int config_error_nonbool(const char *);
+extern const char *config_exclusive_filename;
+
+#define MAX_PERFNAME (1000)
+extern char perf_default_email[MAX_PERFNAME];
+extern char perf_default_name[MAX_PERFNAME];
+extern int user_ident_explicitly_given;
+
+extern const char *perf_log_output_encoding;
+extern const char *perf_mailmap_file;
+
+/* IO helper functions */
+extern void maybe_flush_or_die(FILE *, const char *);
+extern int copy_fd(int ifd, int ofd);
+extern int copy_file(const char *dst, const char *src, int mode);
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+extern ssize_t write_in_full(int fd, const void *buf, size_t count);
+extern void write_or_die(int fd, const void *buf, size_t count);
+extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
+extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
+extern void fsync_or_die(int fd, const char *);
+
+/* pager.c */
+extern void setup_pager(void);
+extern const char *pager_program;
+extern int pager_in_use(void);
+extern int pager_use_color;
+
+extern const char *editor_program;
+extern const char *excludes_file;
+
+char *alias_lookup(const char *alias);
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+	do { \
+		if ((nr) > alloc) { \
+			if (alloc_nr(alloc) < (nr)) \
+				alloc = (nr); \
+			else \
+				alloc = alloc_nr(alloc); \
+			x = xrealloc((x), alloc * sizeof(*(x))); \
+		} \
+	} while(0)
+
+
+static inline int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+
+const char *make_absolute_path(const char *path);
+const char *make_nonrelative_path(const char *path);
+const char *make_relative_path(const char *abs, const char *base);
+int normalize_path_copy(char *dst, const char *src);
+int longest_ancestor_length(const char *path, const char *prefix_list);
+char *strip_path_suffix(const char *path, const char *suffix);
+
+extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+extern int perf_mkstemp(char *path, size_t len, const char *template);
+
+extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+	__attribute__((format (printf, 3, 4)));
+extern char *perf_pathdup(const char *fmt, ...)
+	__attribute__((format (printf, 1, 2)));
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+
+#endif /* CACHE_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
new file mode 100644
index 0000000..9a8c20c
--- /dev/null
+++ b/tools/perf/util/color.c
@@ -0,0 +1,241 @@
+#include "cache.h"
+#include "color.h"
+
+int perf_use_color_default = -1;
+
+static int parse_color(const char *name, int len)
+{
+	static const char * const color_names[] = {
+		"normal", "black", "red", "green", "yellow",
+		"blue", "magenta", "cyan", "white"
+	};
+	char *end;
+	int i;
+	for (i = 0; i < ARRAY_SIZE(color_names); i++) {
+		const char *str = color_names[i];
+		if (!strncasecmp(name, str, len) && !str[len])
+			return i - 1;
+	}
+	i = strtol(name, &end, 10);
+	if (end - name == len && i >= -1 && i <= 255)
+		return i;
+	return -2;
+}
+
+static int parse_attr(const char *name, int len)
+{
+	static const int attr_values[] = { 1, 2, 4, 5, 7 };
+	static const char * const attr_names[] = {
+		"bold", "dim", "ul", "blink", "reverse"
+	};
+	int i;
+	for (i = 0; i < ARRAY_SIZE(attr_names); i++) {
+		const char *str = attr_names[i];
+		if (!strncasecmp(name, str, len) && !str[len])
+			return attr_values[i];
+	}
+	return -1;
+}
+
+void color_parse(const char *value, const char *var, char *dst)
+{
+	color_parse_mem(value, strlen(value), var, dst);
+}
+
+void color_parse_mem(const char *value, int value_len, const char *var,
+		char *dst)
+{
+	const char *ptr = value;
+	int len = value_len;
+	int attr = -1;
+	int fg = -2;
+	int bg = -2;
+
+	if (!strncasecmp(value, "reset", len)) {
+		strcpy(dst, PERF_COLOR_RESET);
+		return;
+	}
+
+	/* [fg [bg]] [attr] */
+	while (len > 0) {
+		const char *word = ptr;
+		int val, wordlen = 0;
+
+		while (len > 0 && !isspace(word[wordlen])) {
+			wordlen++;
+			len--;
+		}
+
+		ptr = word + wordlen;
+		while (len > 0 && isspace(*ptr)) {
+			ptr++;
+			len--;
+		}
+
+		val = parse_color(word, wordlen);
+		if (val >= -1) {
+			if (fg == -2) {
+				fg = val;
+				continue;
+			}
+			if (bg == -2) {
+				bg = val;
+				continue;
+			}
+			goto bad;
+		}
+		val = parse_attr(word, wordlen);
+		if (val < 0 || attr != -1)
+			goto bad;
+		attr = val;
+	}
+
+	if (attr >= 0 || fg >= 0 || bg >= 0) {
+		int sep = 0;
+
+		*dst++ = '\033';
+		*dst++ = '[';
+		if (attr >= 0) {
+			*dst++ = '0' + attr;
+			sep++;
+		}
+		if (fg >= 0) {
+			if (sep++)
+				*dst++ = ';';
+			if (fg < 8) {
+				*dst++ = '3';
+				*dst++ = '0' + fg;
+			} else {
+				dst += sprintf(dst, "38;5;%d", fg);
+			}
+		}
+		if (bg >= 0) {
+			if (sep++)
+				*dst++ = ';';
+			if (bg < 8) {
+				*dst++ = '4';
+				*dst++ = '0' + bg;
+			} else {
+				dst += sprintf(dst, "48;5;%d", bg);
+			}
+		}
+		*dst++ = 'm';
+	}
+	*dst = 0;
+	return;
+bad:
+	die("bad color value '%.*s' for variable '%s'", value_len, value, var);
+}
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+	if (value) {
+		if (!strcasecmp(value, "never"))
+			return 0;
+		if (!strcasecmp(value, "always"))
+			return 1;
+		if (!strcasecmp(value, "auto"))
+			goto auto_color;
+	}
+
+	/* Missing or explicit false to turn off colorization */
+	if (!perf_config_bool(var, value))
+		return 0;
+
+	/* any normal truth value defaults to 'auto' */
+ auto_color:
+	if (stdout_is_tty < 0)
+		stdout_is_tty = isatty(1);
+	if (stdout_is_tty || (pager_in_use() && pager_use_color)) {
+		char *term = getenv("TERM");
+		if (term && strcmp(term, "dumb"))
+			return 1;
+	}
+	return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "color.ui")) {
+		perf_use_color_default = perf_config_colorbool(var, value, -1);
+		return 0;
+	}
+
+	return perf_default_config(var, value, cb);
+}
+
+static int color_vfprintf(FILE *fp, const char *color, const char *fmt,
+		va_list args, const char *trail)
+{
+	int r = 0;
+
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(1) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
+		r += fprintf(fp, "%s", color);
+	r += vfprintf(fp, fmt, args);
+	if (perf_use_color_default && *color)
+		r += fprintf(fp, "%s", PERF_COLOR_RESET);
+	if (trail)
+		r += fprintf(fp, "%s", trail);
+	return r;
+}
+
+
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args, NULL);
+	va_end(args);
+	return r;
+}
+
+int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args, "\n");
+	va_end(args);
+	return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+		size_t count, const char *buf)
+{
+	if (!*color)
+		return fwrite(buf, count, 1, fp) != 1;
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+		if (p != buf && (fputs(color, fp) < 0 ||
+				fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
+				fputs(PERF_COLOR_RESET, fp) < 0))
+			return -1;
+		if (!p)
+			return 0;
+		if (fputc('\n', fp) < 0)
+			return -1;
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+	return 0;
+}
+
+
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
new file mode 100644
index 0000000..5abfd37
--- /dev/null
+++ b/tools/perf/util/color.h
@@ -0,0 +1,36 @@
+#ifndef COLOR_H
+#define COLOR_H
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL	""
+#define PERF_COLOR_RESET	"\033[m"
+#define PERF_COLOR_BOLD		"\033[1m"
+#define PERF_COLOR_RED		"\033[31m"
+#define PERF_COLOR_GREEN	"\033[32m"
+#define PERF_COLOR_YELLOW	"\033[33m"
+#define PERF_COLOR_BLUE		"\033[34m"
+#define PERF_COLOR_MAGENTA	"\033[35m"
+#define PERF_COLOR_CYAN		"\033[36m"
+#define PERF_COLOR_BG_RED	"\033[41m"
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+void color_parse(const char *value, const char *var, char *dst);
+void color_parse_mem(const char *value, int len, const char *var, char *dst);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+
+#endif /* COLOR_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
new file mode 100644
index 0000000..3dd13fa
--- /dev/null
+++ b/tools/perf/util/config.c
@@ -0,0 +1,873 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include "util.h"
+#include "cache.h"
+#include "exec_cmd.h"
+
+#define MAXNAME (256)
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+
+const char *config_exclusive_filename = NULL;
+
+static int get_next_char(void)
+{
+	int c;
+	FILE *f;
+
+	c = '\n';
+	if ((f = config_file) != NULL) {
+		c = fgetc(f);
+		if (c == '\r') {
+			/* DOS like systems */
+			c = fgetc(f);
+			if (c != '\n') {
+				ungetc(c, f);
+				c = '\r';
+			}
+		}
+		if (c == '\n')
+			config_linenr++;
+		if (c == EOF) {
+			config_file_eof = 1;
+			c = '\n';
+		}
+	}
+	return c;
+}
+
+static char *parse_value(void)
+{
+	static char value[1024];
+	int quote = 0, comment = 0, len = 0, space = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (len >= sizeof(value) - 1)
+			return NULL;
+		if (c == '\n') {
+			if (quote)
+				return NULL;
+			value[len] = 0;
+			return value;
+		}
+		if (comment)
+			continue;
+		if (isspace(c) && !quote) {
+			space = 1;
+			continue;
+		}
+		if (!quote) {
+			if (c == ';' || c == '#') {
+				comment = 1;
+				continue;
+			}
+		}
+		if (space) {
+			if (len)
+				value[len++] = ' ';
+			space = 0;
+		}
+		if (c == '\\') {
+			c = get_next_char();
+			switch (c) {
+			case '\n':
+				continue;
+			case 't':
+				c = '\t';
+				break;
+			case 'b':
+				c = '\b';
+				break;
+			case 'n':
+				c = '\n';
+				break;
+			/* Some characters escape as themselves */
+			case '\\': case '"':
+				break;
+			/* Reject unknown escape sequences */
+			default:
+				return NULL;
+			}
+			value[len++] = c;
+			continue;
+		}
+		if (c == '"') {
+			quote = 1-quote;
+			continue;
+		}
+		value[len++] = c;
+	}
+}
+
+static inline int iskeychar(int c)
+{
+	return isalnum(c) || c == '-';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+	int c;
+	char *value;
+
+	/* Get the full name */
+	for (;;) {
+		c = get_next_char();
+		if (config_file_eof)
+			break;
+		if (!iskeychar(c))
+			break;
+		name[len++] = tolower(c);
+		if (len >= MAXNAME)
+			return -1;
+	}
+	name[len] = 0;
+	while (c == ' ' || c == '\t')
+		c = get_next_char();
+
+	value = NULL;
+	if (c != '\n') {
+		if (c != '=')
+			return -1;
+		value = parse_value();
+		if (!value)
+			return -1;
+	}
+	return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+	do {
+		if (c == '\n')
+			return -1;
+		c = get_next_char();
+	} while (isspace(c));
+
+	/* We require the format to be '[base "extension"]' */
+	if (c != '"')
+		return -1;
+	name[baselen++] = '.';
+
+	for (;;) {
+		int c = get_next_char();
+		if (c == '\n')
+			return -1;
+		if (c == '"')
+			break;
+		if (c == '\\') {
+			c = get_next_char();
+			if (c == '\n')
+				return -1;
+		}
+		name[baselen++] = c;
+		if (baselen > MAXNAME / 2)
+			return -1;
+	}
+
+	/* Final ']' */
+	if (get_next_char() != ']')
+		return -1;
+	return baselen;
+}
+
+static int get_base_var(char *name)
+{
+	int baselen = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (config_file_eof)
+			return -1;
+		if (c == ']')
+			return baselen;
+		if (isspace(c))
+			return get_extended_base_var(name, baselen, c);
+		if (!iskeychar(c) && c != '.')
+			return -1;
+		if (baselen > MAXNAME / 2)
+			return -1;
+		name[baselen++] = tolower(c);
+	}
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+	int comment = 0;
+	int baselen = 0;
+	static char var[MAXNAME];
+
+	/* U+FEFF Byte Order Mark in UTF8 */
+	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+	const unsigned char *bomptr = utf8_bom;
+
+	for (;;) {
+		int c = get_next_char();
+		if (bomptr && *bomptr) {
+			/* We are at the file beginning; skip UTF8-encoded BOM
+			 * if present. Sane editors won't put this in on their
+			 * own, but e.g. Windows Notepad will do it happily. */
+			if ((unsigned char) c == *bomptr) {
+				bomptr++;
+				continue;
+			} else {
+				/* Do not tolerate partial BOM. */
+				if (bomptr != utf8_bom)
+					break;
+				/* No BOM at file beginning. Cool. */
+				bomptr = NULL;
+			}
+		}
+		if (c == '\n') {
+			if (config_file_eof)
+				return 0;
+			comment = 0;
+			continue;
+		}
+		if (comment || isspace(c))
+			continue;
+		if (c == '#' || c == ';') {
+			comment = 1;
+			continue;
+		}
+		if (c == '[') {
+			baselen = get_base_var(var);
+			if (baselen <= 0)
+				break;
+			var[baselen++] = '.';
+			var[baselen] = 0;
+			continue;
+		}
+		if (!isalpha(c))
+			break;
+		var[baselen] = tolower(c);
+		if (get_value(fn, data, var, baselen+1) < 0)
+			break;
+	}
+	die("bad config file line %d in %s", config_linenr, config_file_name);
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+	if (!*end)
+		return 1;
+	else if (!strcasecmp(end, "k")) {
+		*val *= 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "m")) {
+		*val *= 1024 * 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "g")) {
+		*val *= 1024 * 1024 * 1024;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long val = strtol(value, &end, 0);
+		unsigned long factor = 1;
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+int perf_parse_ulong(const char *value, unsigned long *ret)
+{
+	if (value && *value) {
+		char *end;
+		unsigned long val = strtoul(value, &end, 0);
+		if (!parse_unit_factor(end, &val))
+			return 0;
+		*ret = val;
+		return 1;
+	}
+	return 0;
+}
+
+static void die_bad_config(const char *name)
+{
+	if (config_file_name)
+		die("bad config value for '%s' in %s", name, config_file_name);
+	die("bad config value for '%s'", name);
+}
+
+int perf_config_int(const char *name, const char *value)
+{
+	long ret = 0;
+	if (!perf_parse_long(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+unsigned long perf_config_ulong(const char *name, const char *value)
+{
+	unsigned long ret;
+	if (!perf_parse_ulong(value, &ret))
+		die_bad_config(name);
+	return ret;
+}
+
+int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+	*is_bool = 1;
+	if (!value)
+		return 1;
+	if (!*value)
+		return 0;
+	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+		return 1;
+	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+		return 0;
+	*is_bool = 0;
+	return perf_config_int(name, value);
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+	int discard;
+	return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+int perf_config_string(const char **dest, const char *var, const char *value)
+{
+	if (!value)
+		return config_error_nonbool(var);
+	*dest = strdup(value);
+	return 0;
+}
+
+static int perf_default_core_config(const char *var, const char *value)
+{
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_default_config(const char *var, const char *value, void *dummy)
+{
+	if (!prefixcmp(var, "core."))
+		return perf_default_core_config(var, value);
+
+	/* Add other config variables here and to Documentation/config.txt. */
+	return 0;
+}
+
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+	int ret;
+	FILE *f = fopen(filename, "r");
+
+	ret = -1;
+	if (f) {
+		config_file = f;
+		config_file_name = filename;
+		config_linenr = 1;
+		config_file_eof = 0;
+		ret = perf_parse_file(fn, data);
+		fclose(f);
+		config_file_name = NULL;
+	}
+	return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+	static const char *system_wide;
+	if (!system_wide)
+		system_wide = system_path(ETC_PERFCONFIG);
+	return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+	const char *v = getenv(k);
+	return v ? perf_config_bool(k, v) : def;
+}
+
+int perf_config_system(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+int perf_config_global(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+	int ret = 0, found = 0;
+	char *repo_config = NULL;
+	const char *home = NULL;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(fn, config_exclusive_filename, data);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		ret += perf_config_from_file(fn, perf_etc_perfconfig(),
+					    data);
+		found += 1;
+	}
+
+	home = getenv("HOME");
+	if (perf_config_global() && home) {
+		char *user_config = strdup(mkpath("%s/.perfconfig", home));
+		if (!access(user_config, R_OK)) {
+			ret += perf_config_from_file(fn, user_config, data);
+			found += 1;
+		}
+		free(user_config);
+	}
+
+	repo_config = perf_pathdup("config");
+	if (!access(repo_config, R_OK)) {
+		ret += perf_config_from_file(fn, repo_config, data);
+		found += 1;
+	}
+	free(repo_config);
+	if (found == 0)
+		return -1;
+	return ret;
+}
+
+/*
+ * Find all the stuff for perf_config_set() below.
+ */
+
+#define MAX_MATCHES 512
+
+static struct {
+	int baselen;
+	char* key;
+	int do_not_match;
+	regex_t* value_regex;
+	int multi_replace;
+	size_t offset[MAX_MATCHES];
+	enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state;
+	int seen;
+} store;
+
+static int matches(const char* key, const char* value)
+{
+	return !strcmp(key, store.key) &&
+		(store.value_regex == NULL ||
+		 (store.do_not_match ^
+		  !regexec(store.value_regex, value, 0, NULL, 0)));
+}
+
+static int store_aux(const char* key, const char* value, void *cb)
+{
+	const char *ep;
+	size_t section_len;
+
+	switch (store.state) {
+	case KEY_SEEN:
+		if (matches(key, value)) {
+			if (store.seen == 1 && store.multi_replace == 0) {
+				warning("%s has multiple values", key);
+			} else if (store.seen >= MAX_MATCHES) {
+				error("too many matches for %s", key);
+				return 1;
+			}
+
+			store.offset[store.seen] = ftell(config_file);
+			store.seen++;
+		}
+		break;
+	case SECTION_SEEN:
+		/*
+		 * What we are looking for is in store.key (both
+		 * section and var), and its section part is baselen
+		 * long.  We found key (again, both section and var).
+		 * We would want to know if this key is in the same
+		 * section as what we are looking for.  We already
+		 * know we are in the same section as what should
+		 * hold store.key.
+		 */
+		ep = strrchr(key, '.');
+		section_len = ep - key;
+
+		if ((section_len != store.baselen) ||
+		    memcmp(key, store.key, section_len+1)) {
+			store.state = SECTION_END_SEEN;
+			break;
+		}
+
+		/*
+		 * Do not increment matches: this is no match, but we
+		 * just made sure we are in the desired section.
+		 */
+		store.offset[store.seen] = ftell(config_file);
+		/* fallthru */
+	case SECTION_END_SEEN:
+	case START:
+		if (matches(key, value)) {
+			store.offset[store.seen] = ftell(config_file);
+			store.state = KEY_SEEN;
+			store.seen++;
+		} else {
+			if (strrchr(key, '.') - key == store.baselen &&
+			      !strncmp(key, store.key, store.baselen)) {
+					store.state = SECTION_SEEN;
+					store.offset[store.seen] = ftell(config_file);
+			}
+		}
+	}
+	return 0;
+}
+
+static int store_write_section(int fd, const char* key)
+{
+	const char *dot;
+	int i, success;
+	struct strbuf sb = STRBUF_INIT;
+
+	dot = memchr(key, '.', store.baselen);
+	if (dot) {
+		strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key);
+		for (i = dot - key + 1; i < store.baselen; i++) {
+			if (key[i] == '"' || key[i] == '\\')
+				strbuf_addch(&sb, '\\');
+			strbuf_addch(&sb, key[i]);
+		}
+		strbuf_addstr(&sb, "\"]\n");
+	} else {
+		strbuf_addf(&sb, "[%.*s]\n", store.baselen, key);
+	}
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static int store_write_pair(int fd, const char* key, const char* value)
+{
+	int i, success;
+	int length = strlen(key + store.baselen + 1);
+	const char *quote = "";
+	struct strbuf sb = STRBUF_INIT;
+
+	/*
+	 * Check to see if the value needs to be surrounded with a dq pair.
+	 * Note that problematic characters are always backslash-quoted; this
+	 * check is about not losing leading or trailing SP and strings that
+	 * follow beginning-of-comment characters (i.e. ';' and '#') by the
+	 * configuration parser.
+	 */
+	if (value[0] == ' ')
+		quote = "\"";
+	for (i = 0; value[i]; i++)
+		if (value[i] == ';' || value[i] == '#')
+			quote = "\"";
+	if (i && value[i - 1] == ' ')
+		quote = "\"";
+
+	strbuf_addf(&sb, "\t%.*s = %s",
+		    length, key + store.baselen + 1, quote);
+
+	for (i = 0; value[i]; i++)
+		switch (value[i]) {
+		case '\n':
+			strbuf_addstr(&sb, "\\n");
+			break;
+		case '\t':
+			strbuf_addstr(&sb, "\\t");
+			break;
+		case '"':
+		case '\\':
+			strbuf_addch(&sb, '\\');
+		default:
+			strbuf_addch(&sb, value[i]);
+			break;
+		}
+	strbuf_addf(&sb, "%s\n", quote);
+
+	success = write_in_full(fd, sb.buf, sb.len) == sb.len;
+	strbuf_release(&sb);
+
+	return success;
+}
+
+static ssize_t find_beginning_of_line(const char* contents, size_t size,
+	size_t offset_, int* found_bracket)
+{
+	size_t equal_offset = size, bracket_offset = size;
+	ssize_t offset;
+
+contline:
+	for (offset = offset_-2; offset > 0
+			&& contents[offset] != '\n'; offset--)
+		switch (contents[offset]) {
+			case '=': equal_offset = offset; break;
+			case ']': bracket_offset = offset; break;
+		}
+	if (offset > 0 && contents[offset-1] == '\\') {
+		offset_ = offset;
+		goto contline;
+	}
+	if (bracket_offset < equal_offset) {
+		*found_bracket = 1;
+		offset = bracket_offset+1;
+	} else
+		offset++;
+
+	return offset;
+}
+
+int perf_config_set(const char* key, const char* value)
+{
+	return perf_config_set_multivar(key, value, NULL, 0);
+}
+
+/*
+ * If value==NULL, unset in (remove from) config,
+ * if value_regex!=NULL, disregard key/value pairs where value does not match.
+ * if multi_replace==0, nothing, or only one matching key/value is replaced,
+ *     else all matching key/values (regardless how many) are removed,
+ *     before the new pair is written.
+ *
+ * Returns 0 on success.
+ *
+ * This function does this:
+ *
+ * - it locks the config file by creating ".perf/config.lock"
+ *
+ * - it then parses the config using store_aux() as validator to find
+ *   the position on the key/value pair to replace. If it is to be unset,
+ *   it must be found exactly once.
+ *
+ * - the config file is mmap()ed and the part before the match (if any) is
+ *   written to the lock file, then the changed part and the rest.
+ *
+ * - the config file is removed and the lock file rename()d to it.
+ *
+ */
+int perf_config_set_multivar(const char* key, const char* value,
+	const char* value_regex, int multi_replace)
+{
+	int i, dot;
+	int fd = -1, in_fd;
+	int ret = 0;
+	char* config_filename;
+	const char* last_dot = strrchr(key, '.');
+
+	if (config_exclusive_filename)
+		config_filename = strdup(config_exclusive_filename);
+	else
+		config_filename = perf_pathdup("config");
+
+	/*
+	 * Since "key" actually contains the section name and the real
+	 * key name separated by a dot, we have to know where the dot is.
+	 */
+
+	if (last_dot == NULL) {
+		error("key does not contain a section: %s", key);
+		ret = 2;
+		goto out_free;
+	}
+	store.baselen = last_dot - key;
+
+	store.multi_replace = multi_replace;
+
+	/*
+	 * Validate the key and while at it, lower case it for matching.
+	 */
+	store.key = malloc(strlen(key) + 1);
+	dot = 0;
+	for (i = 0; key[i]; i++) {
+		unsigned char c = key[i];
+		if (c == '.')
+			dot = 1;
+		/* Leave the extended basename untouched.. */
+		if (!dot || i > store.baselen) {
+			if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) {
+				error("invalid key: %s", key);
+				free(store.key);
+				ret = 1;
+				goto out_free;
+			}
+			c = tolower(c);
+		} else if (c == '\n') {
+			error("invalid key (newline): %s", key);
+			free(store.key);
+			ret = 1;
+			goto out_free;
+		}
+		store.key[i] = c;
+	}
+	store.key[i] = 0;
+
+	/*
+	 * If .perf/config does not exist yet, write a minimal version.
+	 */
+	in_fd = open(config_filename, O_RDONLY);
+	if ( in_fd < 0 ) {
+		free(store.key);
+
+		if ( ENOENT != errno ) {
+			error("opening %s: %s", config_filename,
+			      strerror(errno));
+			ret = 3; /* same as "invalid config file" */
+			goto out_free;
+		}
+		/* if nothing to unset, error out */
+		if (value == NULL) {
+			ret = 5;
+			goto out_free;
+		}
+
+		store.key = (char*)key;
+		if (!store_write_section(fd, key) ||
+		    !store_write_pair(fd, key, value))
+			goto write_err_out;
+	} else {
+		struct stat st;
+		char* contents;
+		size_t contents_sz, copy_begin, copy_end;
+		int i, new_line = 0;
+
+		if (value_regex == NULL)
+			store.value_regex = NULL;
+		else {
+			if (value_regex[0] == '!') {
+				store.do_not_match = 1;
+				value_regex++;
+			} else
+				store.do_not_match = 0;
+
+			store.value_regex = (regex_t*)malloc(sizeof(regex_t));
+			if (regcomp(store.value_regex, value_regex,
+					REG_EXTENDED)) {
+				error("invalid pattern: %s", value_regex);
+				free(store.value_regex);
+				ret = 6;
+				goto out_free;
+			}
+		}
+
+		store.offset[0] = 0;
+		store.state = START;
+		store.seen = 0;
+
+		/*
+		 * After this, store.offset will contain the *end* offset
+		 * of the last match, or remain at 0 if no match was found.
+		 * As a side effect, we make sure to transform only a valid
+		 * existing config file.
+		 */
+		if (perf_config_from_file(store_aux, config_filename, NULL)) {
+			error("invalid config file %s", config_filename);
+			free(store.key);
+			if (store.value_regex != NULL) {
+				regfree(store.value_regex);
+				free(store.value_regex);
+			}
+			ret = 3;
+			goto out_free;
+		}
+
+		free(store.key);
+		if (store.value_regex != NULL) {
+			regfree(store.value_regex);
+			free(store.value_regex);
+		}
+
+		/* if nothing to unset, or too many matches, error out */
+		if ((store.seen == 0 && value == NULL) ||
+				(store.seen > 1 && multi_replace == 0)) {
+			ret = 5;
+			goto out_free;
+		}
+
+		fstat(in_fd, &st);
+		contents_sz = xsize_t(st.st_size);
+		contents = mmap(NULL, contents_sz, PROT_READ,
+			MAP_PRIVATE, in_fd, 0);
+		close(in_fd);
+
+		if (store.seen == 0)
+			store.seen = 1;
+
+		for (i = 0, copy_begin = 0; i < store.seen; i++) {
+			if (store.offset[i] == 0) {
+				store.offset[i] = copy_end = contents_sz;
+			} else if (store.state != KEY_SEEN) {
+				copy_end = store.offset[i];
+			} else
+				copy_end = find_beginning_of_line(
+					contents, contents_sz,
+					store.offset[i]-2, &new_line);
+
+			if (copy_end > 0 && contents[copy_end-1] != '\n')
+				new_line = 1;
+
+			/* write the first part of the config */
+			if (copy_end > copy_begin) {
+				if (write_in_full(fd, contents + copy_begin,
+						  copy_end - copy_begin) <
+				    copy_end - copy_begin)
+					goto write_err_out;
+				if (new_line &&
+				    write_in_full(fd, "\n", 1) != 1)
+					goto write_err_out;
+			}
+			copy_begin = store.offset[i];
+		}
+
+		/* write the pair (value == NULL means unset) */
+		if (value != NULL) {
+			if (store.state == START) {
+				if (!store_write_section(fd, key))
+					goto write_err_out;
+			}
+			if (!store_write_pair(fd, key, value))
+				goto write_err_out;
+		}
+
+		/* write the rest of the config */
+		if (copy_begin < contents_sz)
+			if (write_in_full(fd, contents + copy_begin,
+					  contents_sz - copy_begin) <
+			    contents_sz - copy_begin)
+				goto write_err_out;
+
+		munmap(contents, contents_sz);
+	}
+
+	ret = 0;
+
+out_free:
+	free(config_filename);
+	return ret;
+
+write_err_out:
+	goto out_free;
+
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+	return error("Missing value for '%s'", var);
+}
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
new file mode 100644
index 0000000..b90ec00
--- /dev/null
+++ b/tools/perf/util/ctype.c
@@ -0,0 +1,26 @@
+/*
+ * Sane locale-independent, ASCII ctype.
+ *
+ * No surprises, and works with signed and unsigned chars.
+ */
+#include "cache.h"
+
+enum {
+	S = GIT_SPACE,
+	A = GIT_ALPHA,
+	D = GIT_DIGIT,
+	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
+	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
+};
+
+unsigned char sane_ctype[256] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
+	S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0,		/*  32.. 47 */
+	D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G,		/*  48.. 63 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
+	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0,		/*  80.. 95 */
+	0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
+	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0,		/* 112..127 */
+	/* Nothing in the 128.. range */
+};
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
new file mode 100644
index 0000000..275b0ee
--- /dev/null
+++ b/tools/perf/util/environment.c
@@ -0,0 +1,9 @@
+/*
+ * We put all the perf config variables in this same object
+ * file, so that programs can link against the config parser
+ * without having to link against all the rest of perf.
+ */
+#include "cache.h"
+
+const char *pager_program;
+int pager_use_color = 1;
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
new file mode 100644
index 0000000..d392922
--- /dev/null
+++ b/tools/perf/util/exec_cmd.c
@@ -0,0 +1,165 @@
+#include "cache.h"
+#include "exec_cmd.h"
+#include "quote.h"
+#define MAX_ARGS	32
+
+extern char **environ;
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+const char *system_path(const char *path)
+{
+#ifdef RUNTIME_PREFIX
+	static const char *prefix;
+#else
+	static const char *prefix = PREFIX;
+#endif
+	struct strbuf d = STRBUF_INIT;
+
+	if (is_absolute_path(path))
+		return path;
+
+#ifdef RUNTIME_PREFIX
+	assert(argv0_path);
+	assert(is_absolute_path(argv0_path));
+
+	if (!prefix &&
+	    !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
+	    !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
+	    !(prefix = strip_path_suffix(argv0_path, "perf"))) {
+		prefix = PREFIX;
+		fprintf(stderr, "RUNTIME_PREFIX requested, "
+				"but prefix computation failed.  "
+				"Using static fallback '%s'.\n", prefix);
+	}
+#endif
+
+	strbuf_addf(&d, "%s/%s", prefix, path);
+	path = strbuf_detach(&d, NULL);
+	return path;
+}
+
+const char *perf_extract_argv0_path(const char *argv0)
+{
+	const char *slash;
+
+	if (!argv0 || !*argv0)
+		return NULL;
+	slash = argv0 + strlen(argv0);
+
+	while (argv0 <= slash && !is_dir_sep(*slash))
+		slash--;
+
+	if (slash >= argv0) {
+		argv0_path = strndup(argv0, slash - argv0);
+		return slash + 1;
+	}
+
+	return argv0;
+}
+
+void perf_set_argv_exec_path(const char *exec_path)
+{
+	argv_exec_path = exec_path;
+	/*
+	 * Propagate this setting to external programs.
+	 */
+	setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
+}
+
+
+/* Returns the highest-priority, location to look for perf programs. */
+const char *perf_exec_path(void)
+{
+	const char *env;
+
+	if (argv_exec_path)
+		return argv_exec_path;
+
+	env = getenv(EXEC_PATH_ENVIRONMENT);
+	if (env && *env) {
+		return env;
+	}
+
+	return system_path(PERF_EXEC_PATH);
+}
+
+static void add_path(struct strbuf *out, const char *path)
+{
+	if (path && *path) {
+		if (is_absolute_path(path))
+			strbuf_addstr(out, path);
+		else
+			strbuf_addstr(out, make_nonrelative_path(path));
+
+		strbuf_addch(out, PATH_SEP);
+	}
+}
+
+void setup_path(void)
+{
+	const char *old_path = getenv("PATH");
+	struct strbuf new_path = STRBUF_INIT;
+
+	add_path(&new_path, perf_exec_path());
+	add_path(&new_path, argv0_path);
+
+	if (old_path)
+		strbuf_addstr(&new_path, old_path);
+	else
+		strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+	setenv("PATH", new_path.buf, 1);
+
+	strbuf_release(&new_path);
+}
+
+const char **prepare_perf_cmd(const char **argv)
+{
+	int argc;
+	const char **nargv;
+
+	for (argc = 0; argv[argc]; argc++)
+		; /* just counting */
+	nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+	nargv[0] = "perf";
+	for (argc = 0; argv[argc]; argc++)
+		nargv[argc + 1] = argv[argc];
+	nargv[argc + 1] = NULL;
+	return nargv;
+}
+
+int execv_perf_cmd(const char **argv) {
+	const char **nargv = prepare_perf_cmd(argv);
+
+	/* execvp() can only ever return if it fails */
+	execvp("perf", (char **)nargv);
+
+	free(nargv);
+	return -1;
+}
+
+
+int execl_perf_cmd(const char *cmd,...)
+{
+	int argc;
+	const char *argv[MAX_ARGS + 1];
+	const char *arg;
+	va_list param;
+
+	va_start(param, cmd);
+	argv[0] = cmd;
+	argc = 1;
+	while (argc < MAX_ARGS) {
+		arg = argv[argc++] = va_arg(param, char *);
+		if (!arg)
+			break;
+	}
+	va_end(param);
+	if (MAX_ARGS <= argc)
+		return error("too many args to run %s", cmd);
+
+	argv[argc] = NULL;
+	return execv_perf_cmd(argv);
+}
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
new file mode 100644
index 0000000..effe25e
--- /dev/null
+++ b/tools/perf/util/exec_cmd.h
@@ -0,0 +1,13 @@
+#ifndef PERF_EXEC_CMD_H
+#define PERF_EXEC_CMD_H
+
+extern void perf_set_argv_exec_path(const char *exec_path);
+extern const char *perf_extract_argv0_path(const char *path);
+extern const char *perf_exec_path(void);
+extern void setup_path(void);
+extern const char **prepare_perf_cmd(const char **argv);
+extern int execv_perf_cmd(const char **argv); /* NULL terminated */
+extern int execl_perf_cmd(const char *cmd, ...);
+extern const char *system_path(const char *path);
+
+#endif /* PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
new file mode 100755
index 0000000..f06f6fd
--- /dev/null
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+    char name[16];
+    char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "};"
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c
new file mode 100644
index 0000000..6653f7d
--- /dev/null
+++ b/tools/perf/util/help.c
@@ -0,0 +1,367 @@
+#include "cache.h"
+#include "../builtin.h"
+#include "exec_cmd.h"
+#include "levenshtein.h"
+#include "help.h"
+
+/* most GUI terminals set COLUMNS (although some don't export it) */
+static int term_columns(void)
+{
+	char *col_string = getenv("COLUMNS");
+	int n_cols;
+
+	if (col_string && (n_cols = atoi(col_string)) > 0)
+		return n_cols;
+
+#ifdef TIOCGWINSZ
+	{
+		struct winsize ws;
+		if (!ioctl(1, TIOCGWINSZ, &ws)) {
+			if (ws.ws_col)
+				return ws.ws_col;
+		}
+	}
+#endif
+
+	return 80;
+}
+
+void add_cmdname(struct cmdnames *cmds, const char *name, int len)
+{
+	struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+
+	ent->len = len;
+	memcpy(ent->name, name, len);
+	ent->name[len] = 0;
+
+	ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
+	cmds->names[cmds->cnt++] = ent;
+}
+
+static void clean_cmdnames(struct cmdnames *cmds)
+{
+	int i;
+	for (i = 0; i < cmds->cnt; ++i)
+		free(cmds->names[i]);
+	free(cmds->names);
+	cmds->cnt = 0;
+	cmds->alloc = 0;
+}
+
+static int cmdname_compare(const void *a_, const void *b_)
+{
+	struct cmdname *a = *(struct cmdname **)a_;
+	struct cmdname *b = *(struct cmdname **)b_;
+	return strcmp(a->name, b->name);
+}
+
+static void uniq(struct cmdnames *cmds)
+{
+	int i, j;
+
+	if (!cmds->cnt)
+		return;
+
+	for (i = j = 1; i < cmds->cnt; i++)
+		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			cmds->names[j++] = cmds->names[i];
+
+	cmds->cnt = j;
+}
+
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+{
+	int ci, cj, ei;
+	int cmp;
+
+	ci = cj = ei = 0;
+	while (ci < cmds->cnt && ei < excludes->cnt) {
+		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
+		if (cmp < 0)
+			cmds->names[cj++] = cmds->names[ci++];
+		else if (cmp == 0)
+			ci++, ei++;
+		else if (cmp > 0)
+			ei++;
+	}
+
+	while (ci < cmds->cnt)
+		cmds->names[cj++] = cmds->names[ci++];
+
+	cmds->cnt = cj;
+}
+
+static void pretty_print_string_list(struct cmdnames *cmds, int longest)
+{
+	int cols = 1, rows;
+	int space = longest + 1; /* min 1 SP between words */
+	int max_cols = term_columns() - 1; /* don't print *on* the edge */
+	int i, j;
+
+	if (space < max_cols)
+		cols = max_cols / space;
+	rows = (cmds->cnt + cols - 1) / cols;
+
+	for (i = 0; i < rows; i++) {
+		printf("  ");
+
+		for (j = 0; j < cols; j++) {
+			int n = j * rows + i;
+			int size = space;
+			if (n >= cmds->cnt)
+				break;
+			if (j == cols-1 || n + rows >= cmds->cnt)
+				size = 1;
+			printf("%-*s", size, cmds->names[n]->name);
+		}
+		putchar('\n');
+	}
+}
+
+static int is_executable(const char *name)
+{
+	struct stat st;
+
+	if (stat(name, &st) || /* stat, not lstat */
+	    !S_ISREG(st.st_mode))
+		return 0;
+
+#ifdef __MINGW32__
+	/* cannot trust the executable bit, peek into the file instead */
+	char buf[3] = { 0 };
+	int n;
+	int fd = open(name, O_RDONLY);
+	st.st_mode &= ~S_IXUSR;
+	if (fd >= 0) {
+		n = read(fd, buf, 2);
+		if (n == 2)
+			/* DOS executables start with "MZ" */
+			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
+				st.st_mode |= S_IXUSR;
+		close(fd);
+	}
+#endif
+	return st.st_mode & S_IXUSR;
+}
+
+static void list_commands_in_dir(struct cmdnames *cmds,
+					 const char *path,
+					 const char *prefix)
+{
+	int prefix_len;
+	DIR *dir = opendir(path);
+	struct dirent *de;
+	struct strbuf buf = STRBUF_INIT;
+	int len;
+
+	if (!dir)
+		return;
+	if (!prefix)
+		prefix = "perf-";
+	prefix_len = strlen(prefix);
+
+	strbuf_addf(&buf, "%s/", path);
+	len = buf.len;
+
+	while ((de = readdir(dir)) != NULL) {
+		int entlen;
+
+		if (prefixcmp(de->d_name, prefix))
+			continue;
+
+		strbuf_setlen(&buf, len);
+		strbuf_addstr(&buf, de->d_name);
+		if (!is_executable(buf.buf))
+			continue;
+
+		entlen = strlen(de->d_name) - prefix_len;
+		if (has_extension(de->d_name, ".exe"))
+			entlen -= 4;
+
+		add_cmdname(cmds, de->d_name + prefix_len, entlen);
+	}
+	closedir(dir);
+	strbuf_release(&buf);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds)
+{
+	const char *env_path = getenv("PATH");
+	const char *exec_path = perf_exec_path();
+
+	if (exec_path) {
+		list_commands_in_dir(main_cmds, exec_path, prefix);
+		qsort(main_cmds->names, main_cmds->cnt,
+		      sizeof(*main_cmds->names), cmdname_compare);
+		uniq(main_cmds);
+	}
+
+	if (env_path) {
+		char *paths, *path, *colon;
+		path = paths = strdup(env_path);
+		while (1) {
+			if ((colon = strchr(path, PATH_SEP)))
+				*colon = 0;
+			if (!exec_path || strcmp(path, exec_path))
+				list_commands_in_dir(other_cmds, path, prefix);
+
+			if (!colon)
+				break;
+			path = colon + 1;
+		}
+		free(paths);
+
+		qsort(other_cmds->names, other_cmds->cnt,
+		      sizeof(*other_cmds->names), cmdname_compare);
+		uniq(other_cmds);
+	}
+	exclude_cmds(other_cmds, main_cmds);
+}
+
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds)
+{
+	int i, longest = 0;
+
+	for (i = 0; i < main_cmds->cnt; i++)
+		if (longest < main_cmds->names[i]->len)
+			longest = main_cmds->names[i]->len;
+	for (i = 0; i < other_cmds->cnt; i++)
+		if (longest < other_cmds->names[i]->len)
+			longest = other_cmds->names[i]->len;
+
+	if (main_cmds->cnt) {
+		const char *exec_path = perf_exec_path();
+		printf("available %s in '%s'\n", title, exec_path);
+		printf("----------------");
+		mput_char('-', strlen(title) + strlen(exec_path));
+		putchar('\n');
+		pretty_print_string_list(main_cmds, longest);
+		putchar('\n');
+	}
+
+	if (other_cmds->cnt) {
+		printf("%s available from elsewhere on your $PATH\n", title);
+		printf("---------------------------------------");
+		mput_char('-', strlen(title));
+		putchar('\n');
+		pretty_print_string_list(other_cmds, longest);
+		putchar('\n');
+	}
+}
+
+int is_in_cmdlist(struct cmdnames *c, const char *s)
+{
+	int i;
+	for (i = 0; i < c->cnt; i++)
+		if (!strcmp(s, c->names[i]->name))
+			return 1;
+	return 0;
+}
+
+static int autocorrect;
+static struct cmdnames aliases;
+
+static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "help.autocorrect"))
+		autocorrect = perf_config_int(var,value);
+	/* Also use aliases for command lookup */
+	if (!prefixcmp(var, "alias."))
+		add_cmdname(&aliases, var + 6, strlen(var + 6));
+
+	return perf_default_config(var, value, cb);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+	int i;
+	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
+
+	for (i = 0; i < old->cnt; i++)
+		cmds->names[cmds->cnt++] = old->names[i];
+	free(old->names);
+	old->cnt = 0;
+	old->names = NULL;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+	int i, n = 0, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+	memset(&aliases, 0, sizeof(aliases));
+
+	perf_config(perf_unknown_cmd_config, NULL);
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	add_cmd_list(&main_cmds, &aliases);
+	add_cmd_list(&main_cmds, &other_cmds);
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(main_cmds.names), cmdname_compare);
+	uniq(&main_cmds);
+
+	if (main_cmds.cnt) {
+		/* This reuses cmdname->len for similarity index */
+		for (i = 0; i < main_cmds.cnt; ++i)
+			main_cmds.names[i]->len =
+				levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+		qsort(main_cmds.names, main_cmds.cnt,
+		      sizeof(*main_cmds.names), levenshtein_compare);
+
+		best_similarity = main_cmds.names[0]->len;
+		n = 1;
+		while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+			++n;
+	}
+
+	if (autocorrect && n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a Git program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		if (autocorrect > 0) {
+			fprintf(stderr, "in %0.1f seconds automatically...\n",
+				(float)autocorrect/10.0);
+			poll(NULL, 0, autocorrect * 100);
+		}
+		return assumed;
+	}
+
+	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+	if (main_cmds.cnt && best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+
+	exit(1);
+}
+
+int cmd_version(int argc, const char **argv, const char *prefix)
+{
+	printf("perf version %s\n", perf_version_string);
+	return 0;
+}
diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h
new file mode 100644
index 0000000..56bc154
--- /dev/null
+++ b/tools/perf/util/help.h
@@ -0,0 +1,29 @@
+#ifndef HELP_H
+#define HELP_H
+
+struct cmdnames {
+	int alloc;
+	int cnt;
+	struct cmdname {
+		size_t len; /* also used for similarity index in help.c */
+		char name[FLEX_ARRAY];
+	} **names;
+};
+
+static inline void mput_char(char c, unsigned int num)
+{
+	while(num--)
+		putchar(c);
+}
+
+void load_command_list(const char *prefix,
+		struct cmdnames *main_cmds,
+		struct cmdnames *other_cmds);
+void add_cmdname(struct cmdnames *cmds, const char *name, int len);
+/* Here we require that excludes is a sorted list. */
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
+int is_in_cmdlist(struct cmdnames *c, const char *s);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+		   struct cmdnames *other_cmds);
+
+#endif /* HELP_H */
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c
new file mode 100644
index 0000000..e521d15
--- /dev/null
+++ b/tools/perf/util/levenshtein.c
@@ -0,0 +1,84 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings.  To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substition, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = malloc(sizeof(int) * (len2 + 1));
+	int *row1 = malloc(sizeof(int) * (len2 + 1));
+	int *row2 = malloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
new file mode 100644
index 0000000..0173abe
--- /dev/null
+++ b/tools/perf/util/levenshtein.h
@@ -0,0 +1,8 @@
+#ifndef LEVENSHTEIN_H
+#define LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif
diff --git a/tools/perf/util/list.h b/tools/perf/util/list.h
new file mode 100644
index 0000000..e2548e8
--- /dev/null
+++ b/tools/perf/util/list.h
@@ -0,0 +1,603 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+/*
+  Copyright (C) Cast of dozens, comes from the Linux kernel
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+*/
+
+#include <stddef.h>
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *)0x00100100)
+#define LIST_POISON2 ((void *)0x00200200)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({			\
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+        (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_range - deletes range of entries from list.
+ * @beging: first element in the range to delete from the list.
+ * @beging: first element in the range to delete from the list.
+ * Note: list_empty on the range of entries does not return true after this,
+ * the entries is in an undefined state.
+ */
+static inline void list_del_range(struct list_head *begin,
+				  struct list_head *end)
+{
+	begin->prev->next = end->next;
+	end->next->prev = begin->prev;
+}
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ * Note: if 'old' was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+static inline void __list_splice(struct list_head *list,
+				 struct list_head *head)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:       the list head to take the element from.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); \
+        	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; pos != (head); \
+        	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) 			\
+	for (; &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) 			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos; \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && 			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
new file mode 100644
index 0000000..a28bcca
--- /dev/null
+++ b/tools/perf/util/pager.c
@@ -0,0 +1,99 @@
+#include "cache.h"
+#include "run-command.h"
+#include "sigchain.h"
+
+/*
+ * This is split up from the rest of git so that we can do
+ * something different on Windows.
+ */
+
+static int spawned_pager;
+
+#ifndef __MINGW32__
+static void pager_preexec(void)
+{
+	/*
+	 * Work around bug in "less" by not starting it until we
+	 * have real input
+	 */
+	fd_set in;
+
+	FD_ZERO(&in);
+	FD_SET(0, &in);
+	select(1, &in, NULL, &in, NULL);
+
+	setenv("LESS", "FRSX", 0);
+}
+#endif
+
+static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
+static struct child_process pager_process;
+
+static void wait_for_pager(void)
+{
+	fflush(stdout);
+	fflush(stderr);
+	/* signal EOF to pager */
+	close(1);
+	close(2);
+	finish_command(&pager_process);
+}
+
+static void wait_for_pager_signal(int signo)
+{
+	wait_for_pager();
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+void setup_pager(void)
+{
+	const char *pager = getenv("PERF_PAGER");
+
+	if (!isatty(1))
+		return;
+	if (!pager) {
+		if (!pager_program)
+			perf_config(perf_default_config, NULL);
+		pager = pager_program;
+	}
+	if (!pager)
+		pager = getenv("PAGER");
+	if (!pager)
+		pager = "less";
+	else if (!*pager || !strcmp(pager, "cat"))
+		return;
+
+	spawned_pager = 1; /* means we are emitting to terminal */
+
+	/* spawn the pager */
+	pager_argv[2] = pager;
+	pager_process.argv = pager_argv;
+	pager_process.in = -1;
+#ifndef __MINGW32__
+	pager_process.preexec_cb = pager_preexec;
+#endif
+	if (start_command(&pager_process))
+		return;
+
+	/* original process continues, but writes to the pipe */
+	dup2(pager_process.in, 1);
+	if (isatty(2))
+		dup2(pager_process.in, 2);
+	close(pager_process.in);
+
+	/* this makes sure that the parent terminates after the pager */
+	sigchain_push_common(wait_for_pager_signal);
+	atexit(wait_for_pager);
+}
+
+int pager_in_use(void)
+{
+	const char *env;
+
+	if (spawned_pager)
+		return 1;
+
+	env = getenv("PERF_PAGER_IN_USE");
+	return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0;
+}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
new file mode 100644
index 0000000..e0820b4
--- /dev/null
+++ b/tools/perf/util/parse-events.c
@@ -0,0 +1,316 @@
+
+#include "../perf.h"
+#include "util.h"
+#include "parse-options.h"
+#include "parse-events.h"
+#include "exec_cmd.h"
+#include "string.h"
+
+extern char *strcasestr(const char *haystack, const char *needle);
+
+int					nr_counters;
+
+struct perf_counter_attr		attrs[MAX_COUNTERS];
+
+struct event_symbol {
+	__u8	type;
+	__u64	config;
+	char	*symbol;
+};
+
+#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
+#define CR(x, y) .type = PERF_TYPE_##x, .config = y
+
+static struct event_symbol event_symbols[] = {
+  { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
+  { C(HARDWARE, CPU_CYCLES),		"cycles",		},
+  { C(HARDWARE, INSTRUCTIONS),		"instructions",		},
+  { C(HARDWARE, CACHE_REFERENCES),	"cache-references",	},
+  { C(HARDWARE, CACHE_MISSES),		"cache-misses",		},
+  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branches",		},
+  { C(HARDWARE, BRANCH_MISSES),		"branch-misses",	},
+  { C(HARDWARE, BUS_CYCLES),		"bus-cycles",		},
+
+  { C(SOFTWARE, CPU_CLOCK),		"cpu-clock",		},
+  { C(SOFTWARE, TASK_CLOCK),		"task-clock",		},
+  { C(SOFTWARE, PAGE_FAULTS),		"page-faults",		},
+  { C(SOFTWARE, PAGE_FAULTS),		"faults",		},
+  { C(SOFTWARE, PAGE_FAULTS_MIN),	"minor-faults",		},
+  { C(SOFTWARE, PAGE_FAULTS_MAJ),	"major-faults",		},
+  { C(SOFTWARE, CONTEXT_SWITCHES),	"context-switches",	},
+  { C(SOFTWARE, CONTEXT_SWITCHES),	"cs",			},
+  { C(SOFTWARE, CPU_MIGRATIONS),	"cpu-migrations",	},
+  { C(SOFTWARE, CPU_MIGRATIONS),	"migrations",		},
+};
+
+#define __PERF_COUNTER_FIELD(config, name) \
+	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
+#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
+#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
+#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+
+static char *hw_event_names[] = {
+	"cycles",
+	"instructions",
+	"cache-references",
+	"cache-misses",
+	"branches",
+	"branch-misses",
+	"bus-cycles",
+};
+
+static char *sw_event_names[] = {
+	"cpu-clock-ticks",
+	"task-clock-ticks",
+	"page-faults",
+	"context-switches",
+	"CPU-migrations",
+	"minor-faults",
+	"major-faults",
+};
+
+#define MAX_ALIASES 8
+
+static char *hw_cache [][MAX_ALIASES] = {
+	{ "L1-data"		, "l1-d", "l1d", "l1"				},
+	{ "L1-instruction"	, "l1-i", "l1i"					},
+	{ "L2"			, "l2"						},
+	{ "Data-TLB"		, "dtlb", "d-tlb"				},
+	{ "Instruction-TLB"	, "itlb", "i-tlb"				},
+	{ "Branch"		, "bpu" , "btb", "bpc"				},
+};
+
+static char *hw_cache_op [][MAX_ALIASES] = {
+	{ "Load"		, "read"					},
+	{ "Store"		, "write"					},
+	{ "Prefetch"		, "speculative-read", "speculative-load"	},
+};
+
+static char *hw_cache_result [][MAX_ALIASES] = {
+	{ "Reference"		, "ops", "access"				},
+	{ "Miss"								},
+};
+
+char *event_name(int counter)
+{
+	__u64 config = attrs[counter].config;
+	int type = attrs[counter].type;
+	static char buf[32];
+
+	if (attrs[counter].type == PERF_TYPE_RAW) {
+		sprintf(buf, "raw 0x%llx", config);
+		return buf;
+	}
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (config < PERF_HW_EVENTS_MAX)
+			return hw_event_names[config];
+		return "unknown-hardware";
+
+	case PERF_TYPE_HW_CACHE: {
+		__u8 cache_type, cache_op, cache_result;
+		static char name[100];
+
+		cache_type   = (config >>  0) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
+			return "unknown-ext-hardware-cache-type";
+
+		cache_op     = (config >>  8) & 0xff;
+		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
+			return "unknown-ext-hardware-cache-op";
+
+		cache_result = (config >> 16) & 0xff;
+		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+			return "unknown-ext-hardware-cache-result";
+
+		sprintf(name, "%s-Cache-%s-%ses",
+			hw_cache[cache_type][0],
+			hw_cache_op[cache_op][0],
+			hw_cache_result[cache_result][0]);
+
+		return name;
+	}
+
+	case PERF_TYPE_SOFTWARE:
+		if (config < PERF_SW_EVENTS_MAX)
+			return sw_event_names[config];
+		return "unknown-software";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
+{
+	int i, j;
+
+	for (i = 0; i < size; i++) {
+		for (j = 0; j < MAX_ALIASES; j++) {
+			if (!names[i][j])
+				break;
+			if (strcasestr(str, names[i][j]))
+				return i;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
+{
+	__u8 cache_type = -1, cache_op = 0, cache_result = 0;
+
+	cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
+	/*
+	 * No fallback - if we cannot get a clear cache type
+	 * then bail out:
+	 */
+	if (cache_type == -1)
+		return -EINVAL;
+
+	cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
+	/*
+	 * Fall back to reads:
+	 */
+	if (cache_type == -1)
+		cache_type = PERF_COUNT_HW_CACHE_OP_READ;
+
+	cache_result = parse_aliases(str, hw_cache_result,
+					PERF_COUNT_HW_CACHE_RESULT_MAX);
+	/*
+	 * Fall back to accesses:
+	 */
+	if (cache_result == -1)
+		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+	attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
+	attr->type = PERF_TYPE_HW_CACHE;
+
+	return 0;
+}
+
+/*
+ * Each event can have multiple symbolic names.
+ * Symbolic names are (almost) exactly matched.
+ */
+static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
+{
+	__u64 config, id;
+	int type;
+	unsigned int i;
+	const char *sep, *pstr;
+
+	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) {
+		attr->type = PERF_TYPE_RAW;
+		attr->config = config;
+
+		return 0;
+	}
+
+	pstr = str;
+	sep = strchr(pstr, ':');
+	if (sep) {
+		type = atoi(pstr);
+		pstr = sep + 1;
+		id = atoi(pstr);
+		sep = strchr(pstr, ':');
+		if (sep) {
+			pstr = sep + 1;
+			if (strchr(pstr, 'k'))
+				attr->exclude_user = 1;
+			if (strchr(pstr, 'u'))
+				attr->exclude_kernel = 1;
+		}
+		attr->type = type;
+		attr->config = id;
+
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+		if (!strncmp(str, event_symbols[i].symbol,
+			     strlen(event_symbols[i].symbol))) {
+
+			attr->type = event_symbols[i].type;
+			attr->config = event_symbols[i].config;
+
+			return 0;
+		}
+	}
+
+	return parse_generic_hw_symbols(str, attr);
+}
+
+int parse_events(const struct option *opt, const char *str, int unset)
+{
+	struct perf_counter_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+again:
+	if (nr_counters == MAX_COUNTERS)
+		return -1;
+
+	ret = parse_event_symbols(str, &attr);
+	if (ret < 0)
+		return ret;
+
+	attrs[nr_counters] = attr;
+	nr_counters++;
+
+	str = strstr(str, ",");
+	if (str) {
+		str++;
+		goto again;
+	}
+
+	return 0;
+}
+
+static const char * const event_type_descriptors[] = {
+	"",
+	"Hardware event",
+	"Software event",
+	"Tracepoint event",
+	"Hardware cache event",
+};
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(void)
+{
+	struct event_symbol *syms = event_symbols;
+	unsigned int i, type, prev_type = -1;
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
+
+	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
+		type = syms->type + 1;
+		if (type > ARRAY_SIZE(event_type_descriptors))
+			type = 0;
+
+		if (type != prev_type)
+			fprintf(stderr, "\n");
+
+		fprintf(stderr, "  %-30s [%s]\n", syms->symbol,
+			event_type_descriptors[type]);
+
+		prev_type = type;
+	}
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "  %-30s [raw hardware event descriptor]\n",
+		"rNNN");
+	fprintf(stderr, "\n");
+
+	exit(129);
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
new file mode 100644
index 0000000..e3d5529
--- /dev/null
+++ b/tools/perf/util/parse-events.h
@@ -0,0 +1,17 @@
+
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
+extern int			nr_counters;
+
+extern struct perf_counter_attr attrs[MAX_COUNTERS];
+
+extern char *event_name(int ctr);
+
+extern int parse_events(const struct option *opt, const char *str, int unset);
+
+#define EVENTS_HELP_MAX (128*1024)
+
+extern void print_events(void);
+
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
new file mode 100644
index 0000000..b3affb1
--- /dev/null
+++ b/tools/perf/util/parse-options.c
@@ -0,0 +1,508 @@
+#include "util.h"
+#include "parse-options.h"
+#include "cache.h"
+
+#define OPT_SHORT 1
+#define OPT_UNSET 2
+
+static int opterror(const struct option *opt, const char *reason, int flags)
+{
+	if (flags & OPT_SHORT)
+		return error("switch `%c' %s", opt->short_name, reason);
+	if (flags & OPT_UNSET)
+		return error("option `no-%s' %s", opt->long_name, reason);
+	return error("option `%s' %s", opt->long_name, reason);
+}
+
+static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
+		   int flags, const char **arg)
+{
+	if (p->opt) {
+		*arg = p->opt;
+		p->opt = NULL;
+	} else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) {
+		*arg = (const char *)opt->defval;
+	} else if (p->argc > 1) {
+		p->argc--;
+		*arg = *++p->argv;
+	} else
+		return opterror(opt, "requires a value", flags);
+	return 0;
+}
+
+static int get_value(struct parse_opt_ctx_t *p,
+		     const struct option *opt, int flags)
+{
+	const char *s, *arg = NULL;
+	const int unset = flags & OPT_UNSET;
+
+	if (unset && p->opt)
+		return opterror(opt, "takes no value", flags);
+	if (unset && (opt->flags & PARSE_OPT_NONEG))
+		return opterror(opt, "isn't available", flags);
+
+	if (!(flags & OPT_SHORT) && p->opt) {
+		switch (opt->type) {
+		case OPTION_CALLBACK:
+			if (!(opt->flags & PARSE_OPT_NOARG))
+				break;
+			/* FALLTHROUGH */
+		case OPTION_BOOLEAN:
+		case OPTION_BIT:
+		case OPTION_SET_INT:
+		case OPTION_SET_PTR:
+			return opterror(opt, "takes no value", flags);
+		default:
+			break;
+		}
+	}
+
+	switch (opt->type) {
+	case OPTION_BIT:
+		if (unset)
+			*(int *)opt->value &= ~opt->defval;
+		else
+			*(int *)opt->value |= opt->defval;
+		return 0;
+
+	case OPTION_BOOLEAN:
+		*(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
+		return 0;
+
+	case OPTION_SET_INT:
+		*(int *)opt->value = unset ? 0 : opt->defval;
+		return 0;
+
+	case OPTION_SET_PTR:
+		*(void **)opt->value = unset ? NULL : (void *)opt->defval;
+		return 0;
+
+	case OPTION_STRING:
+		if (unset)
+			*(const char **)opt->value = NULL;
+		else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			*(const char **)opt->value = (const char *)opt->defval;
+		else
+			return get_arg(p, opt, flags, (const char **)opt->value);
+		return 0;
+
+	case OPTION_CALLBACK:
+		if (unset)
+			return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_NOARG)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+			return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
+
+	case OPTION_INTEGER:
+		if (unset) {
+			*(int *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(int *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(int *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	case OPTION_LONG:
+		if (unset) {
+			*(long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(long *)opt->value = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
+	default:
+		die("should not happen, someone must be hit on the forehead");
+	}
+}
+
+static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
+{
+	for (; options->type != OPTION_END; options++) {
+		if (options->short_name == *p->opt) {
+			p->opt = p->opt[1] ? p->opt + 1 : NULL;
+			return get_value(p, options, OPT_SHORT);
+		}
+	}
+	return -2;
+}
+
+static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
+                          const struct option *options)
+{
+	const char *arg_end = strchr(arg, '=');
+	const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
+	int abbrev_flags = 0, ambiguous_flags = 0;
+
+	if (!arg_end)
+		arg_end = arg + strlen(arg);
+
+	for (; options->type != OPTION_END; options++) {
+		const char *rest;
+		int flags = 0;
+
+		if (!options->long_name)
+			continue;
+
+		rest = skip_prefix(arg, options->long_name);
+		if (options->type == OPTION_ARGUMENT) {
+			if (!rest)
+				continue;
+			if (*rest == '=')
+				return opterror(options, "takes no value", flags);
+			if (*rest)
+				continue;
+			p->out[p->cpidx++] = arg - 2;
+			return 0;
+		}
+		if (!rest) {
+			/* abbreviated? */
+			if (!strncmp(options->long_name, arg, arg_end - arg)) {
+is_abbreviated:
+				if (abbrev_option) {
+					/*
+					 * If this is abbreviated, it is
+					 * ambiguous. So when there is no
+					 * exact match later, we need to
+					 * error out.
+					 */
+					ambiguous_option = abbrev_option;
+					ambiguous_flags = abbrev_flags;
+				}
+				if (!(flags & OPT_UNSET) && *arg_end)
+					p->opt = arg_end + 1;
+				abbrev_option = options;
+				abbrev_flags = flags;
+				continue;
+			}
+			/* negated and abbreviated very much? */
+			if (!prefixcmp("no-", arg)) {
+				flags |= OPT_UNSET;
+				goto is_abbreviated;
+			}
+			/* negated? */
+			if (strncmp(arg, "no-", 3))
+				continue;
+			flags |= OPT_UNSET;
+			rest = skip_prefix(arg + 3, options->long_name);
+			/* abbreviated and negated? */
+			if (!rest && !prefixcmp(options->long_name, arg + 3))
+				goto is_abbreviated;
+			if (!rest)
+				continue;
+		}
+		if (*rest) {
+			if (*rest != '=')
+				continue;
+			p->opt = rest + 1;
+		}
+		return get_value(p, options, flags);
+	}
+
+	if (ambiguous_option)
+		return error("Ambiguous option: %s "
+			"(could be --%s%s or --%s%s)",
+			arg,
+			(ambiguous_flags & OPT_UNSET) ?  "no-" : "",
+			ambiguous_option->long_name,
+			(abbrev_flags & OPT_UNSET) ?  "no-" : "",
+			abbrev_option->long_name);
+	if (abbrev_option)
+		return get_value(p, abbrev_option, abbrev_flags);
+	return -2;
+}
+
+static void check_typos(const char *arg, const struct option *options)
+{
+	if (strlen(arg) < 3)
+		return;
+
+	if (!prefixcmp(arg, "no-")) {
+		error ("did you mean `--%s` (with two dashes ?)", arg);
+		exit(129);
+	}
+
+	for (; options->type != OPTION_END; options++) {
+		if (!options->long_name)
+			continue;
+		if (!prefixcmp(options->long_name, arg)) {
+			error ("did you mean `--%s` (with two dashes ?)", arg);
+			exit(129);
+		}
+	}
+}
+
+void parse_options_start(struct parse_opt_ctx_t *ctx,
+			 int argc, const char **argv, int flags)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->argc = argc - 1;
+	ctx->argv = argv + 1;
+	ctx->out  = argv;
+	ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
+	ctx->flags = flags;
+	if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
+	    (flags & PARSE_OPT_STOP_AT_NON_OPTION))
+		die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
+}
+
+static int usage_with_options_internal(const char * const *,
+				       const struct option *, int);
+
+int parse_options_step(struct parse_opt_ctx_t *ctx,
+		       const struct option *options,
+		       const char * const usagestr[])
+{
+	int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
+
+	/* we must reset ->opt, unknown short option leave it dangling */
+	ctx->opt = NULL;
+
+	for (; ctx->argc; ctx->argc--, ctx->argv++) {
+		const char *arg = ctx->argv[0];
+
+		if (*arg != '-' || !arg[1]) {
+			if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
+				break;
+			ctx->out[ctx->cpidx++] = ctx->argv[0];
+			continue;
+		}
+
+		if (arg[1] != '-') {
+			ctx->opt = arg + 1;
+			if (internal_help && *ctx->opt == 'h')
+				return parse_options_usage(usagestr, options);
+			switch (parse_short_opt(ctx, options)) {
+			case -1:
+				return parse_options_usage(usagestr, options);
+			case -2:
+				goto unknown;
+			}
+			if (ctx->opt)
+				check_typos(arg + 1, options);
+			while (ctx->opt) {
+				if (internal_help && *ctx->opt == 'h')
+					return parse_options_usage(usagestr, options);
+				switch (parse_short_opt(ctx, options)) {
+				case -1:
+					return parse_options_usage(usagestr, options);
+				case -2:
+					/* fake a short option thing to hide the fact that we may have
+					 * started to parse aggregated stuff
+					 *
+					 * This is leaky, too bad.
+					 */
+					ctx->argv[0] = strdup(ctx->opt - 1);
+					*(char *)ctx->argv[0] = '-';
+					goto unknown;
+				}
+			}
+			continue;
+		}
+
+		if (!arg[2]) { /* "--" */
+			if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
+				ctx->argc--;
+				ctx->argv++;
+			}
+			break;
+		}
+
+		if (internal_help && !strcmp(arg + 2, "help-all"))
+			return usage_with_options_internal(usagestr, options, 1);
+		if (internal_help && !strcmp(arg + 2, "help"))
+			return parse_options_usage(usagestr, options);
+		switch (parse_long_opt(ctx, arg + 2, options)) {
+		case -1:
+			return parse_options_usage(usagestr, options);
+		case -2:
+			goto unknown;
+		}
+		continue;
+unknown:
+		if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
+			return PARSE_OPT_UNKNOWN;
+		ctx->out[ctx->cpidx++] = ctx->argv[0];
+		ctx->opt = NULL;
+	}
+	return PARSE_OPT_DONE;
+}
+
+int parse_options_end(struct parse_opt_ctx_t *ctx)
+{
+	memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
+	ctx->out[ctx->cpidx + ctx->argc] = NULL;
+	return ctx->cpidx + ctx->argc;
+}
+
+int parse_options(int argc, const char **argv, const struct option *options,
+		  const char * const usagestr[], int flags)
+{
+	struct parse_opt_ctx_t ctx;
+
+	parse_options_start(&ctx, argc, argv, flags);
+	switch (parse_options_step(&ctx, options, usagestr)) {
+	case PARSE_OPT_HELP:
+		exit(129);
+	case PARSE_OPT_DONE:
+		break;
+	default: /* PARSE_OPT_UNKNOWN */
+		if (ctx.argv[0][1] == '-') {
+			error("unknown option `%s'", ctx.argv[0] + 2);
+		} else {
+			error("unknown switch `%c'", *ctx.opt);
+		}
+		usage_with_options(usagestr, options);
+	}
+
+	return parse_options_end(&ctx);
+}
+
+#define USAGE_OPTS_WIDTH 24
+#define USAGE_GAP         2
+
+int usage_with_options_internal(const char * const *usagestr,
+				const struct option *opts, int full)
+{
+	if (!usagestr)
+		return PARSE_OPT_HELP;
+
+	fprintf(stderr, "\n usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "    or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+
+	if (opts->type != OPTION_GROUP)
+		fputc('\n', stderr);
+
+	for (; opts->type != OPTION_END; opts++) {
+		size_t pos;
+		int pad;
+
+		if (opts->type == OPTION_GROUP) {
+			fputc('\n', stderr);
+			if (*opts->help)
+				fprintf(stderr, "%s\n", opts->help);
+			continue;
+		}
+		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+			continue;
+
+		pos = fprintf(stderr, "    ");
+		if (opts->short_name)
+			pos += fprintf(stderr, "-%c", opts->short_name);
+		if (opts->long_name && opts->short_name)
+			pos += fprintf(stderr, ", ");
+		if (opts->long_name)
+			pos += fprintf(stderr, "--%s", opts->long_name);
+
+		switch (opts->type) {
+		case OPTION_ARGUMENT:
+			break;
+		case OPTION_INTEGER:
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=<n>]");
+				else
+					pos += fprintf(stderr, "[<n>]");
+			else
+				pos += fprintf(stderr, " <n>");
+			break;
+		case OPTION_CALLBACK:
+			if (opts->flags & PARSE_OPT_NOARG)
+				break;
+			/* FALLTHROUGH */
+		case OPTION_STRING:
+			if (opts->argh) {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=<%s>]", opts->argh);
+					else
+						pos += fprintf(stderr, "[<%s>]", opts->argh);
+				else
+					pos += fprintf(stderr, " <%s>", opts->argh);
+			} else {
+				if (opts->flags & PARSE_OPT_OPTARG)
+					if (opts->long_name)
+						pos += fprintf(stderr, "[=...]");
+					else
+						pos += fprintf(stderr, "[...]");
+				else
+					pos += fprintf(stderr, " ...");
+			}
+			break;
+		default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */
+			break;
+		}
+
+		if (pos <= USAGE_OPTS_WIDTH)
+			pad = USAGE_OPTS_WIDTH - pos;
+		else {
+			fputc('\n', stderr);
+			pad = USAGE_OPTS_WIDTH;
+		}
+		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+	}
+	fputc('\n', stderr);
+
+	return PARSE_OPT_HELP;
+}
+
+void usage_with_options(const char * const *usagestr,
+			const struct option *opts)
+{
+	usage_with_options_internal(usagestr, opts, 0);
+	exit(129);
+}
+
+int parse_options_usage(const char * const *usagestr,
+			const struct option *opts)
+{
+	return usage_with_options_internal(usagestr, opts, 0);
+}
+
+
+int parse_opt_verbosity_cb(const struct option *opt, const char *arg,
+			   int unset)
+{
+	int *target = opt->value;
+
+	if (unset)
+		/* --no-quiet, --no-verbose */
+		*target = 0;
+	else if (opt->short_name == 'v') {
+		if (*target >= 0)
+			(*target)++;
+		else
+			*target = 1;
+	} else {
+		if (*target <= 0)
+			(*target)--;
+		else
+			*target = -1;
+	}
+	return 0;
+}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
new file mode 100644
index 0000000..a1039a6
--- /dev/null
+++ b/tools/perf/util/parse-options.h
@@ -0,0 +1,174 @@
+#ifndef PARSE_OPTIONS_H
+#define PARSE_OPTIONS_H
+
+enum parse_opt_type {
+	/* special types */
+	OPTION_END,
+	OPTION_ARGUMENT,
+	OPTION_GROUP,
+	/* options with no arguments */
+	OPTION_BIT,
+	OPTION_BOOLEAN, /* _INCR would have been a better name */
+	OPTION_SET_INT,
+	OPTION_SET_PTR,
+	/* options with arguments (usually) */
+	OPTION_STRING,
+	OPTION_INTEGER,
+	OPTION_LONG,
+	OPTION_CALLBACK,
+};
+
+enum parse_opt_flags {
+	PARSE_OPT_KEEP_DASHDASH = 1,
+	PARSE_OPT_STOP_AT_NON_OPTION = 2,
+	PARSE_OPT_KEEP_ARGV0 = 4,
+	PARSE_OPT_KEEP_UNKNOWN = 8,
+	PARSE_OPT_NO_INTERNAL_HELP = 16,
+};
+
+enum parse_opt_option_flags {
+	PARSE_OPT_OPTARG  = 1,
+	PARSE_OPT_NOARG   = 2,
+	PARSE_OPT_NONEG   = 4,
+	PARSE_OPT_HIDDEN  = 8,
+	PARSE_OPT_LASTARG_DEFAULT = 16,
+};
+
+struct option;
+typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
+
+/*
+ * `type`::
+ *   holds the type of the option, you must have an OPTION_END last in your
+ *   array.
+ *
+ * `short_name`::
+ *   the character to use as a short option name, '\0' if none.
+ *
+ * `long_name`::
+ *   the long option name, without the leading dashes, NULL if none.
+ *
+ * `value`::
+ *   stores pointers to the values to be filled.
+ *
+ * `argh`::
+ *   token to explain the kind of argument this option wants. Keep it
+ *   homogenous across the repository.
+ *
+ * `help`::
+ *   the short help associated to what the option does.
+ *   Must never be NULL (except for OPTION_END).
+ *   OPTION_GROUP uses this pointer to store the group header.
+ *
+ * `flags`::
+ *   mask of parse_opt_option_flags.
+ *   PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ *   PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
+ *   PARSE_OPT_NONEG: says that this option cannot be negated
+ *   PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
+ *                    the long one.
+ *
+ * `callback`::
+ *   pointer to the callback to use for OPTION_CALLBACK.
+ *
+ * `defval`::
+ *   default value to fill (*->value) with for PARSE_OPT_OPTARG.
+ *   OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in
+ *   the value when met.
+ *   CALLBACKS can use it like they want.
+ */
+struct option {
+	enum parse_opt_type type;
+	int short_name;
+	const char *long_name;
+	void *value;
+	const char *argh;
+	const char *help;
+
+	int flags;
+	parse_opt_cb *callback;
+	intptr_t defval;
+};
+
+#define OPT_END()                   { OPTION_END }
+#define OPT_ARGUMENT(l, h)          { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) }
+#define OPT_GROUP(h)                { OPTION_GROUP, 0, NULL, NULL, NULL, (h) }
+#define OPT_BIT(s, l, v, h, b)      { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) }
+#define OPT_BOOLEAN(s, l, v, h)     { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) }
+#define OPT_SET_INT(s, l, v, h, i)  { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) }
+#define OPT_SET_PTR(s, l, v, h, p)  { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) }
+#define OPT_INTEGER(s, l, v, h)     { OPTION_INTEGER, (s), (l), (v), NULL, (h) }
+#define OPT_LONG(s, l, v, h)        { OPTION_LONG, (s), (l), (v), NULL, (h) }
+#define OPT_STRING(s, l, v, a, h)   { OPTION_STRING,  (s), (l), (v), (a), (h) }
+#define OPT_DATE(s, l, v, h) \
+	{ OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \
+	  parse_opt_approxidate_cb }
+#define OPT_CALLBACK(s, l, v, a, h, f) \
+	{ OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) }
+
+/* parse_options() will filter out the processed options and leave the
+ * non-option argments in argv[].
+ * Returns the number of arguments left in argv[].
+ */
+extern int parse_options(int argc, const char **argv,
+                         const struct option *options,
+                         const char * const usagestr[], int flags);
+
+extern NORETURN void usage_with_options(const char * const *usagestr,
+                                        const struct option *options);
+
+/*----- incremantal advanced APIs -----*/
+
+enum {
+	PARSE_OPT_HELP = -1,
+	PARSE_OPT_DONE,
+	PARSE_OPT_UNKNOWN,
+};
+
+/*
+ * It's okay for the caller to consume argv/argc in the usual way.
+ * Other fields of that structure are private to parse-options and should not
+ * be modified in any way.
+ */
+struct parse_opt_ctx_t {
+	const char **argv;
+	const char **out;
+	int argc, cpidx;
+	const char *opt;
+	int flags;
+};
+
+extern int parse_options_usage(const char * const *usagestr,
+			       const struct option *opts);
+
+extern void parse_options_start(struct parse_opt_ctx_t *ctx,
+				int argc, const char **argv, int flags);
+
+extern int parse_options_step(struct parse_opt_ctx_t *ctx,
+			      const struct option *options,
+			      const char * const usagestr[]);
+
+extern int parse_options_end(struct parse_opt_ctx_t *ctx);
+
+
+/*----- some often used options -----*/
+extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
+extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
+extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
+
+#define OPT__VERBOSE(var)  OPT_BOOLEAN('v', "verbose", (var), "be verbose")
+#define OPT__QUIET(var)    OPT_BOOLEAN('q', "quiet",   (var), "be quiet")
+#define OPT__VERBOSITY(var) \
+	{ OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
+	{ OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
+	  PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
+#define OPT__DRY_RUN(var)  OPT_BOOLEAN('n', "dry-run", (var), "dry run")
+#define OPT__ABBREV(var)  \
+	{ OPTION_CALLBACK, 0, "abbrev", (var), "n", \
+	  "use <n> digits to display SHA-1s", \
+	  PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
+
+extern const char *parse_options_fix_filename(const char *prefix, const char *file);
+
+#endif
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
new file mode 100644
index 0000000..a501a40
--- /dev/null
+++ b/tools/perf/util/path.c
@@ -0,0 +1,353 @@
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "cache.h"
+
+static char bad_path[] = "/bad-path/";
+/*
+ * Two hacks:
+ */
+
+static char *get_perf_dir(void)
+{
+	return ".";
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+
+
+static char *get_pathname(void)
+{
+	static char pathname_array[4][PATH_MAX];
+	static int index;
+	return pathname_array[3 & ++index];
+}
+
+static char *cleanup_path(char *path)
+{
+	/* Clean it up */
+	if (!memcmp(path, "./", 2)) {
+		path += 2;
+		while (*path == '/')
+			path++;
+	}
+	return path;
+}
+
+char *mksnpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+
+	va_start(args, fmt);
+	len = vsnprintf(buf, n, fmt, args);
+	va_end(args);
+	if (len >= n) {
+		strlcpy(buf, bad_path, n);
+		return buf;
+	}
+	return cleanup_path(buf);
+}
+
+static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
+{
+	const char *perf_dir = get_perf_dir();
+	size_t len;
+
+	len = strlen(perf_dir);
+	if (n < len + 1)
+		goto bad;
+	memcpy(buf, perf_dir, len);
+	if (len && !is_dir_sep(perf_dir[len-1]))
+		buf[len++] = '/';
+	len += vsnprintf(buf + len, n - len, fmt, args);
+	if (len >= n)
+		goto bad;
+	return cleanup_path(buf);
+bad:
+	strlcpy(buf, bad_path, n);
+	return buf;
+}
+
+char *perf_snpath(char *buf, size_t n, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(buf, n, fmt, args);
+	va_end(args);
+	return buf;
+}
+
+char *perf_pathdup(const char *fmt, ...)
+{
+	char path[PATH_MAX];
+	va_list args;
+	va_start(args, fmt);
+	(void)perf_vsnpath(path, sizeof(path), fmt, args);
+	va_end(args);
+	return xstrdup(path);
+}
+
+char *mkpath(const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+	char *pathname = get_pathname();
+
+	va_start(args, fmt);
+	len = vsnprintf(pathname, PATH_MAX, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+char *perf_path(const char *fmt, ...)
+{
+	const char *perf_dir = get_perf_dir();
+	char *pathname = get_pathname();
+	va_list args;
+	unsigned len;
+
+	len = strlen(perf_dir);
+	if (len > PATH_MAX-100)
+		return bad_path;
+	memcpy(pathname, perf_dir, len);
+	if (len && perf_dir[len-1] != '/')
+		pathname[len++] = '/';
+	va_start(args, fmt);
+	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+
+/* perf_mkstemp() - create tmp file honoring TMPDIR variable */
+int perf_mkstemp(char *path, size_t len, const char *template)
+{
+	const char *tmp;
+	size_t n;
+
+	tmp = getenv("TMPDIR");
+	if (!tmp)
+		tmp = "/tmp";
+	n = snprintf(path, len, "%s/%s", tmp, template);
+	if (len <= n) {
+		errno = ENAMETOOLONG;
+		return -1;
+	}
+	return mkstemp(path);
+}
+
+
+const char *make_relative_path(const char *abs, const char *base)
+{
+	static char buf[PATH_MAX + 1];
+	int baselen;
+	if (!base)
+		return abs;
+	baselen = strlen(base);
+	if (prefixcmp(abs, base))
+		return abs;
+	if (abs[baselen] == '/')
+		baselen++;
+	else if (base[baselen - 1] != '/')
+		return abs;
+	strcpy(buf, abs + baselen);
+	return buf;
+}
+
+/*
+ * It is okay if dst == src, but they should not overlap otherwise.
+ *
+ * Performs the following normalizations on src, storing the result in dst:
+ * - Ensures that components are separated by '/' (Windows only)
+ * - Squashes sequences of '/'.
+ * - Removes "." components.
+ * - Removes ".." components, and the components the precede them.
+ * Returns failure (non-zero) if a ".." component appears as first path
+ * component anytime during the normalization. Otherwise, returns success (0).
+ *
+ * Note that this function is purely textual.  It does not follow symlinks,
+ * verify the existence of the path, or make any system calls.
+ */
+int normalize_path_copy(char *dst, const char *src)
+{
+	char *dst0;
+
+	if (has_dos_drive_prefix(src)) {
+		*dst++ = *src++;
+		*dst++ = *src++;
+	}
+	dst0 = dst;
+
+	if (is_dir_sep(*src)) {
+		*dst++ = '/';
+		while (is_dir_sep(*src))
+			src++;
+	}
+
+	for (;;) {
+		char c = *src;
+
+		/*
+		 * A path component that begins with . could be
+		 * special:
+		 * (1) "." and ends   -- ignore and terminate.
+		 * (2) "./"           -- ignore them, eat slash and continue.
+		 * (3) ".." and ends  -- strip one and terminate.
+		 * (4) "../"          -- strip one, eat slash and continue.
+		 */
+		if (c == '.') {
+			if (!src[1]) {
+				/* (1) */
+				src++;
+			} else if (is_dir_sep(src[1])) {
+				/* (2) */
+				src += 2;
+				while (is_dir_sep(*src))
+					src++;
+				continue;
+			} else if (src[1] == '.') {
+				if (!src[2]) {
+					/* (3) */
+					src += 2;
+					goto up_one;
+				} else if (is_dir_sep(src[2])) {
+					/* (4) */
+					src += 3;
+					while (is_dir_sep(*src))
+						src++;
+					goto up_one;
+				}
+			}
+		}
+
+		/* copy up to the next '/', and eat all '/' */
+		while ((c = *src++) != '\0' && !is_dir_sep(c))
+			*dst++ = c;
+		if (is_dir_sep(c)) {
+			*dst++ = '/';
+			while (is_dir_sep(c))
+				c = *src++;
+			src--;
+		} else if (!c)
+			break;
+		continue;
+
+	up_one:
+		/*
+		 * dst0..dst is prefix portion, and dst[-1] is '/';
+		 * go up one level.
+		 */
+		dst--;	/* go to trailing '/' */
+		if (dst <= dst0)
+			return -1;
+		/* Windows: dst[-1] cannot be backslash anymore */
+		while (dst0 < dst && dst[-1] != '/')
+			dst--;
+	}
+	*dst = '\0';
+	return 0;
+}
+
+/*
+ * path = Canonical absolute path
+ * prefix_list = Colon-separated list of absolute paths
+ *
+ * Determines, for each path in prefix_list, whether the "prefix" really
+ * is an ancestor directory of path.  Returns the length of the longest
+ * ancestor directory, excluding any trailing slashes, or -1 if no prefix
+ * is an ancestor.  (Note that this means 0 is returned if prefix_list is
+ * "/".) "/foo" is not considered an ancestor of "/foobar".  Directories
+ * are not considered to be their own ancestors.  path must be in a
+ * canonical form: empty components, or "." or ".." components are not
+ * allowed.  prefix_list may be null, which is like "".
+ */
+int longest_ancestor_length(const char *path, const char *prefix_list)
+{
+	char buf[PATH_MAX+1];
+	const char *ceil, *colon;
+	int len, max_len = -1;
+
+	if (prefix_list == NULL || !strcmp(path, "/"))
+		return -1;
+
+	for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
+		for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
+		len = colon - ceil;
+		if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
+			continue;
+		strlcpy(buf, ceil, len+1);
+		if (normalize_path_copy(buf, buf) < 0)
+			continue;
+		len = strlen(buf);
+		if (len > 0 && buf[len-1] == '/')
+			buf[--len] = '\0';
+
+		if (!strncmp(path, buf, len) &&
+		    path[len] == '/' &&
+		    len > max_len) {
+			max_len = len;
+		}
+	}
+
+	return max_len;
+}
+
+/* strip arbitrary amount of directory separators at end of path */
+static inline int chomp_trailing_dir_sep(const char *path, int len)
+{
+	while (len && is_dir_sep(path[len - 1]))
+		len--;
+	return len;
+}
+
+/*
+ * If path ends with suffix (complete path components), returns the
+ * part before suffix (sans trailing directory separators).
+ * Otherwise returns NULL.
+ */
+char *strip_path_suffix(const char *path, const char *suffix)
+{
+	int path_len = strlen(path), suffix_len = strlen(suffix);
+
+	while (suffix_len) {
+		if (!path_len)
+			return NULL;
+
+		if (is_dir_sep(path[path_len - 1])) {
+			if (!is_dir_sep(suffix[suffix_len - 1]))
+				return NULL;
+			path_len = chomp_trailing_dir_sep(path, path_len);
+			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
+		}
+		else if (path[--path_len] != suffix[--suffix_len])
+			return NULL;
+	}
+
+	if (path_len && !is_dir_sep(path[path_len - 1]))
+		return NULL;
+	return xstrndup(path, chomp_trailing_dir_sep(path, path_len));
+}
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
new file mode 100644
index 0000000..f18c521
--- /dev/null
+++ b/tools/perf/util/quote.c
@@ -0,0 +1,481 @@
+#include "cache.h"
+#include "quote.h"
+
+int quote_path_fully = 1;
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ *
+ * E.g.
+ *  original     sq_quote     result
+ *  name     ==> name      ==> 'name'
+ *  a b      ==> a b       ==> 'a b'
+ *  a'b      ==> a'\''b    ==> 'a'\''b'
+ *  a!b      ==> a'\!'b    ==> 'a'\!'b'
+ */
+static inline int need_bs_quote(char c)
+{
+	return (c == '\'' || c == '!');
+}
+
+void sq_quote_buf(struct strbuf *dst, const char *src)
+{
+	char *to_free = NULL;
+
+	if (dst->buf == src)
+		to_free = strbuf_detach(dst, NULL);
+
+	strbuf_addch(dst, '\'');
+	while (*src) {
+		size_t len = strcspn(src, "'!");
+		strbuf_add(dst, src, len);
+		src += len;
+		while (need_bs_quote(*src)) {
+			strbuf_addstr(dst, "'\\");
+			strbuf_addch(dst, *src++);
+			strbuf_addch(dst, '\'');
+		}
+	}
+	strbuf_addch(dst, '\'');
+	free(to_free);
+}
+
+void sq_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('\'', stream);
+	while ((c = *src++)) {
+		if (need_bs_quote(c)) {
+			fputs("'\\", stream);
+			fputc(c, stream);
+			fputc('\'', stream);
+		} else {
+			fputc(c, stream);
+		}
+	}
+	fputc('\'', stream);
+}
+
+void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+{
+	int i;
+
+	/* Copy into destination buffer. */
+	strbuf_grow(dst, 255);
+	for (i = 0; argv[i]; ++i) {
+		strbuf_addch(dst, ' ');
+		sq_quote_buf(dst, argv[i]);
+		if (maxlen && dst->len > maxlen)
+			die("Too many or long arguments");
+	}
+}
+
+char *sq_dequote_step(char *arg, char **next)
+{
+	char *dst = arg;
+	char *src = arg;
+	char c;
+
+	if (*src != '\'')
+		return NULL;
+	for (;;) {
+		c = *++src;
+		if (!c)
+			return NULL;
+		if (c != '\'') {
+			*dst++ = c;
+			continue;
+		}
+		/* We stepped out of sq */
+		switch (*++src) {
+		case '\0':
+			*dst = 0;
+			if (next)
+				*next = NULL;
+			return arg;
+		case '\\':
+			c = *++src;
+			if (need_bs_quote(c) && *++src == '\'') {
+				*dst++ = c;
+				continue;
+			}
+		/* Fallthrough */
+		default:
+			if (!next || !isspace(*src))
+				return NULL;
+			do {
+				c = *++src;
+			} while (isspace(c));
+			*dst = 0;
+			*next = src;
+			return arg;
+		}
+	}
+}
+
+char *sq_dequote(char *arg)
+{
+	return sq_dequote_step(arg, NULL);
+}
+
+int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc)
+{
+	char *next = arg;
+
+	if (!*arg)
+		return 0;
+	do {
+		char *dequoted = sq_dequote_step(next, &next);
+		if (!dequoted)
+			return -1;
+		ALLOC_GROW(*argv, *nr + 1, *alloc);
+		(*argv)[(*nr)++] = dequoted;
+	} while (next);
+
+	return 0;
+}
+
+/* 1 means: quote as octal
+ * 0 means: quote as octal if (quote_path_fully)
+ * -1 means: never quote
+ * c: quote as "\\c"
+ */
+#define X8(x)   x, x, x, x, x, x, x, x
+#define X16(x)  X8(x), X8(x)
+static signed char const sq_lookup[256] = {
+	/*           0    1    2    3    4    5    6    7 */
+	/* 0x00 */   1,   1,   1,   1,   1,   1,   1, 'a',
+	/* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r',   1,   1,
+	/* 0x10 */ X16(1),
+	/* 0x20 */  -1,  -1, '"',  -1,  -1,  -1,  -1,  -1,
+	/* 0x28 */ X16(-1), X16(-1), X16(-1),
+	/* 0x58 */  -1,  -1,  -1,  -1,'\\',  -1,  -1,  -1,
+	/* 0x60 */ X16(-1), X8(-1),
+	/* 0x78 */  -1,  -1,  -1,  -1,  -1,  -1,  -1,   1,
+	/* 0x80 */ /* set to 0 */
+};
+
+static inline int sq_must_quote(char c)
+{
+	return sq_lookup[(unsigned char)c] + quote_path_fully > 0;
+}
+
+/* returns the longest prefix not needing a quote up to maxlen if positive.
+   This stops at the first \0 because it's marked as a character needing an
+   escape */
+static size_t next_quote_pos(const char *s, ssize_t maxlen)
+{
+	size_t len;
+	if (maxlen < 0) {
+		for (len = 0; !sq_must_quote(s[len]); len++);
+	} else {
+		for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++);
+	}
+	return len;
+}
+
+/*
+ * C-style name quoting.
+ *
+ * (1) if sb and fp are both NULL, inspect the input name and counts the
+ *     number of bytes that are needed to hold c_style quoted version of name,
+ *     counting the double quotes around it but not terminating NUL, and
+ *     returns it.
+ *     However, if name does not need c_style quoting, it returns 0.
+ *
+ * (2) if sb or fp are not NULL, it emits the c_style quoted version
+ *     of name, enclosed with double quotes if asked and needed only.
+ *     Return value is the same as in (1).
+ */
+static size_t quote_c_style_counted(const char *name, ssize_t maxlen,
+                                    struct strbuf *sb, FILE *fp, int no_dq)
+{
+#undef EMIT
+#define EMIT(c)                                 \
+	do {                                        \
+		if (sb) strbuf_addch(sb, (c));          \
+		if (fp) fputc((c), fp);                 \
+		count++;                                \
+	} while (0)
+#define EMITBUF(s, l)                           \
+	do {                                        \
+		int __ret;				\
+		if (sb) strbuf_add(sb, (s), (l));       \
+		if (fp) __ret = fwrite((s), (l), 1, fp);        \
+		count += (l);                           \
+	} while (0)
+
+	size_t len, count = 0;
+	const char *p = name;
+
+	for (;;) {
+		int ch;
+
+		len = next_quote_pos(p, maxlen);
+		if (len == maxlen || !p[len])
+			break;
+
+		if (!no_dq && p == name)
+			EMIT('"');
+
+		EMITBUF(p, len);
+		EMIT('\\');
+		p += len;
+		ch = (unsigned char)*p++;
+		if (sq_lookup[ch] >= ' ') {
+			EMIT(sq_lookup[ch]);
+		} else {
+			EMIT(((ch >> 6) & 03) + '0');
+			EMIT(((ch >> 3) & 07) + '0');
+			EMIT(((ch >> 0) & 07) + '0');
+		}
+	}
+
+	EMITBUF(p, len);
+	if (p == name)   /* no ending quote needed */
+		return 0;
+
+	if (!no_dq)
+		EMIT('"');
+	return count;
+}
+
+size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq)
+{
+	return quote_c_style_counted(name, -1, sb, fp, nodq);
+}
+
+void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq)
+{
+	if (quote_c_style(prefix, NULL, NULL, 0) ||
+	    quote_c_style(path, NULL, NULL, 0)) {
+		if (!nodq)
+			strbuf_addch(sb, '"');
+		quote_c_style(prefix, sb, NULL, 1);
+		quote_c_style(path, sb, NULL, 1);
+		if (!nodq)
+			strbuf_addch(sb, '"');
+	} else {
+		strbuf_addstr(sb, prefix);
+		strbuf_addstr(sb, path);
+	}
+}
+
+void write_name_quoted(const char *name, FILE *fp, int terminator)
+{
+	if (terminator) {
+		quote_c_style(name, NULL, fp, 0);
+	} else {
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *fp, int terminator)
+{
+	int needquote = 0;
+
+	if (terminator) {
+		needquote = next_quote_pos(pfx, pfxlen) < pfxlen
+			|| name[next_quote_pos(name, -1)];
+	}
+	if (needquote) {
+		fputc('"', fp);
+		quote_c_style_counted(pfx, pfxlen, NULL, fp, 1);
+		quote_c_style(name, NULL, fp, 1);
+		fputc('"', fp);
+	} else {
+		int ret;
+
+		ret = fwrite(pfx, pfxlen, 1, fp);
+		fputs(name, fp);
+	}
+	fputc(terminator, fp);
+}
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix)
+{
+	int needquote;
+
+	if (len < 0)
+		len = strlen(in);
+
+	/* "../" prefix itself does not need quoting, but "in" might. */
+	needquote = next_quote_pos(in, len) < len;
+	strbuf_setlen(out, 0);
+	strbuf_grow(out, len);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (prefix) {
+		int off = 0;
+		while (prefix[off] && off < len && prefix[off] == in[off])
+			if (prefix[off] == '/') {
+				prefix += off + 1;
+				in += off + 1;
+				len -= off + 1;
+				off = 0;
+			} else
+				off++;
+
+		for (; *prefix; prefix++)
+			if (*prefix == '/')
+				strbuf_addstr(out, "../");
+	}
+
+	quote_c_style_counted (in, len, out, NULL, 1);
+
+	if (needquote)
+		strbuf_addch(out, '"');
+	if (!out->len)
+		strbuf_addstr(out, "./");
+
+	return out->buf;
+}
+
+/*
+ * C-style name unquoting.
+ *
+ * Quoted should point at the opening double quote.
+ * + Returns 0 if it was able to unquote the string properly, and appends the
+ *   result in the strbuf `sb'.
+ * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
+ *   that this function will allocate memory in the strbuf, so calling
+ *   strbuf_release is mandatory whichever result unquote_c_style returns.
+ *
+ * Updates endp pointer to point at one past the ending double quote if given.
+ */
+int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
+{
+	size_t oldlen = sb->len, len;
+	int ch, ac;
+
+	if (*quoted++ != '"')
+		return -1;
+
+	for (;;) {
+		len = strcspn(quoted, "\"\\");
+		strbuf_add(sb, quoted, len);
+		quoted += len;
+
+		switch (*quoted++) {
+		  case '"':
+			if (endp)
+				*endp = quoted;
+			return 0;
+		  case '\\':
+			break;
+		  default:
+			goto error;
+		}
+
+		switch ((ch = *quoted++)) {
+		case 'a': ch = '\a'; break;
+		case 'b': ch = '\b'; break;
+		case 'f': ch = '\f'; break;
+		case 'n': ch = '\n'; break;
+		case 'r': ch = '\r'; break;
+		case 't': ch = '\t'; break;
+		case 'v': ch = '\v'; break;
+
+		case '\\': case '"':
+			break; /* verbatim */
+
+		/* octal values with first digit over 4 overflow */
+		case '0': case '1': case '2': case '3':
+					ac = ((ch - '0') << 6);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= ((ch - '0') << 3);
+			if ((ch = *quoted++) < '0' || '7' < ch)
+				goto error;
+					ac |= (ch - '0');
+					ch = ac;
+					break;
+				default:
+			goto error;
+			}
+		strbuf_addch(sb, ch);
+		}
+
+  error:
+	strbuf_setlen(sb, oldlen);
+	return -1;
+}
+
+/* quoting as a string literal for other languages */
+
+void perl_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void python_quote_print(FILE *stream, const char *src)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char nl = '\n';
+	char c;
+
+	fputc(sq, stream);
+	while ((c = *src++)) {
+		if (c == nl) {
+			fputc(bq, stream);
+			fputc('n', stream);
+			continue;
+		}
+		if (c == sq || c == bq)
+			fputc(bq, stream);
+		fputc(c, stream);
+	}
+	fputc(sq, stream);
+}
+
+void tcl_quote_print(FILE *stream, const char *src)
+{
+	char c;
+
+	fputc('"', stream);
+	while ((c = *src++)) {
+		switch (c) {
+		case '[': case ']':
+		case '{': case '}':
+		case '$': case '\\': case '"':
+			fputc('\\', stream);
+		default:
+			fputc(c, stream);
+			break;
+		case '\f':
+			fputs("\\f", stream);
+			break;
+		case '\r':
+			fputs("\\r", stream);
+			break;
+		case '\n':
+			fputs("\\n", stream);
+			break;
+		case '\t':
+			fputs("\\t", stream);
+			break;
+		case '\v':
+			fputs("\\v", stream);
+			break;
+		}
+	}
+	fputc('"', stream);
+}
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
new file mode 100644
index 0000000..5dfad89
--- /dev/null
+++ b/tools/perf/util/quote.h
@@ -0,0 +1,68 @@
+#ifndef QUOTE_H
+#define QUOTE_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ * single quote pair.
+ *
+ * For example, if you are passing the result to system() as an
+ * argument:
+ *
+ * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
+ *
+ * would be appropriate.  If the system() is going to call ssh to
+ * run the command on the other side:
+ *
+ * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
+ * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
+ *
+ * Note that the above examples leak memory!  Remember to free result from
+ * sq_quote() in a real application.
+ *
+ * sq_quote_buf() writes to an existing buffer of specified size; it
+ * will return the number of characters that would have been written
+ * excluding the final null regardless of the buffer size.
+ */
+
+extern void sq_quote_print(FILE *stream, const char *src);
+
+extern void sq_quote_buf(struct strbuf *, const char *src);
+extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+
+/* This unwraps what sq_quote() produces in place, but returns
+ * NULL if the input does not look like what sq_quote would have
+ * produced.
+ */
+extern char *sq_dequote(char *);
+
+/*
+ * Same as the above, but can be used to unwrap many arguments in the
+ * same string separated by space. "next" is changed to point to the
+ * next argument that should be passed as first parameter. When there
+ * is no more argument to be dequoted, "next" is updated to point to NULL.
+ */
+extern char *sq_dequote_step(char *arg, char **next);
+extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc);
+
+extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
+extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq);
+extern void quote_two_c_style(struct strbuf *, const char *, const char *, int);
+
+extern void write_name_quoted(const char *name, FILE *, int terminator);
+extern void write_name_quotedpfx(const char *pfx, size_t pfxlen,
+                                 const char *name, FILE *, int terminator);
+
+/* quote path as relative to the given prefix */
+char *quote_path_relative(const char *in, int len,
+			  struct strbuf *out, const char *prefix);
+
+/* quoting as a string literal for other languages */
+extern void perl_quote_print(FILE *stream, const char *src);
+extern void python_quote_print(FILE *stream, const char *src);
+extern void tcl_quote_print(FILE *stream, const char *src);
+
+#endif
diff --git a/tools/perf/util/rbtree.c b/tools/perf/util/rbtree.c
new file mode 100644
index 0000000..b15ba9c
--- /dev/null
+++ b/tools/perf/util/rbtree.c
@@ -0,0 +1,383 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *right = node->rb_right;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_right = right->rb_left))
+		rb_set_parent(right->rb_left, node);
+	right->rb_left = node;
+
+	rb_set_parent(right, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_left)
+			parent->rb_left = right;
+		else
+			parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *left = node->rb_left;
+	struct rb_node *parent = rb_parent(node);
+
+	if ((node->rb_left = left->rb_right))
+		rb_set_parent(left->rb_right, node);
+	left->rb_right = node;
+
+	rb_set_parent(left, parent);
+
+	if (parent)
+	{
+		if (node == parent->rb_right)
+			parent->rb_right = left;
+		else
+			parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *parent, *gparent;
+
+	while ((parent = rb_parent(node)) && rb_is_red(parent))
+	{
+		gparent = rb_parent(parent);
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register struct rb_node *uncle = gparent->rb_right;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				register struct rb_node *uncle = gparent->rb_left;
+				if (uncle && rb_is_red(uncle))
+				{
+					rb_set_black(uncle);
+					rb_set_black(parent);
+					rb_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				register struct rb_node *tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			rb_set_black(parent);
+			rb_set_red(gparent);
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	rb_set_black(root->rb_node);
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+			     struct rb_root *root)
+{
+	struct rb_node *other;
+
+	while ((!node || rb_is_black(node)) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_right || rb_is_black(other->rb_right))
+				{
+					rb_set_black(other->rb_left);
+					rb_set_red(other);
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_right);
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (rb_is_red(other))
+			{
+				rb_set_black(other);
+				rb_set_red(parent);
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+			    (!other->rb_right || rb_is_black(other->rb_right)))
+			{
+				rb_set_red(other);
+				node = parent;
+				parent = rb_parent(node);
+			}
+			else
+			{
+				if (!other->rb_left || rb_is_black(other->rb_left))
+				{
+					rb_set_black(other->rb_right);
+					rb_set_red(other);
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				rb_set_color(other, rb_color(parent));
+				rb_set_black(parent);
+				rb_set_black(other->rb_left);
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *child, *parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		struct rb_node *old = node, *left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left) != NULL)
+			node = left;
+		child = node->rb_right;
+		parent = rb_parent(node);
+		color = rb_color(node);
+
+		if (child)
+			rb_set_parent(child, parent);
+		if (parent == old) {
+			parent->rb_right = child;
+			parent = node;
+		} else
+			parent->rb_left = child;
+
+		node->rb_parent_color = old->rb_parent_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (rb_parent(old))
+		{
+			if (rb_parent(old)->rb_left == old)
+				rb_parent(old)->rb_left = node;
+			else
+				rb_parent(old)->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		rb_set_parent(old->rb_left, node);
+		if (old->rb_right)
+			rb_set_parent(old->rb_right, node);
+		goto color;
+	}
+
+	parent = rb_parent(node);
+	color = rb_color(node);
+
+	if (child)
+		rb_set_parent(child, parent);
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->rb_right) {
+		node = node->rb_right; 
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (rb_parent(node) == node)
+		return NULL;
+
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->rb_left) {
+		node = node->rb_left; 
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (parent) {
+		if (victim == parent->rb_left)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else {
+		root->rb_node = new;
+	}
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
diff --git a/tools/perf/util/rbtree.h b/tools/perf/util/rbtree.h
new file mode 100644
index 0000000..6bdc488
--- /dev/null
+++ b/tools/perf/util/rbtree.h
@@ -0,0 +1,171 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	struct rb_node * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   struct rb_node * node)
+{
+	struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+	struct rb_node * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	rb_link_node(node, parent, p);
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 struct rb_node * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <stddef.h>
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+struct rb_node
+{
+	unsigned long  rb_parent_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root
+{
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
+#define rb_color(r)   ((r)->rb_parent_color & 1)
+#define rb_is_red(r)   (!rb_color(r))
+#define rb_is_black(r) rb_color(r)
+#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
+#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
+}
+static inline void rb_set_color(struct rb_node *rb, int color)
+{
+	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
+}
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
+#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
+#define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+				struct rb_node ** rb_link)
+{
+	node->rb_parent_color = (unsigned long )parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#endif	/* _LINUX_RBTREE_H */
diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c
new file mode 100644
index 0000000..b2f5e85
--- /dev/null
+++ b/tools/perf/util/run-command.c
@@ -0,0 +1,395 @@
+#include "cache.h"
+#include "run-command.h"
+#include "exec_cmd.h"
+
+static inline void close_pair(int fd[2])
+{
+	close(fd[0]);
+	close(fd[1]);
+}
+
+static inline void dup_devnull(int to)
+{
+	int fd = open("/dev/null", O_RDWR);
+	dup2(fd, to);
+	close(fd);
+}
+
+int start_command(struct child_process *cmd)
+{
+	int need_in, need_out, need_err;
+	int fdin[2], fdout[2], fderr[2];
+
+	/*
+	 * In case of errors we must keep the promise to close FDs
+	 * that have been passed in via ->in and ->out.
+	 */
+
+	need_in = !cmd->no_stdin && cmd->in < 0;
+	if (need_in) {
+		if (pipe(fdin) < 0) {
+			if (cmd->out > 0)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->in = fdin[1];
+	}
+
+	need_out = !cmd->no_stdout
+		&& !cmd->stdout_to_stderr
+		&& cmd->out < 0;
+	if (need_out) {
+		if (pipe(fdout) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->out = fdout[0];
+	}
+
+	need_err = !cmd->no_stderr && cmd->err < 0;
+	if (need_err) {
+		if (pipe(fderr) < 0) {
+			if (need_in)
+				close_pair(fdin);
+			else if (cmd->in)
+				close(cmd->in);
+			if (need_out)
+				close_pair(fdout);
+			else if (cmd->out)
+				close(cmd->out);
+			return -ERR_RUN_COMMAND_PIPE;
+		}
+		cmd->err = fderr[0];
+	}
+
+#ifndef __MINGW32__
+	fflush(NULL);
+	cmd->pid = fork();
+	if (!cmd->pid) {
+		if (cmd->no_stdin)
+			dup_devnull(0);
+		else if (need_in) {
+			dup2(fdin[0], 0);
+			close_pair(fdin);
+		} else if (cmd->in) {
+			dup2(cmd->in, 0);
+			close(cmd->in);
+		}
+
+		if (cmd->no_stderr)
+			dup_devnull(2);
+		else if (need_err) {
+			dup2(fderr[1], 2);
+			close_pair(fderr);
+		}
+
+		if (cmd->no_stdout)
+			dup_devnull(1);
+		else if (cmd->stdout_to_stderr)
+			dup2(2, 1);
+		else if (need_out) {
+			dup2(fdout[1], 1);
+			close_pair(fdout);
+		} else if (cmd->out > 1) {
+			dup2(cmd->out, 1);
+			close(cmd->out);
+		}
+
+		if (cmd->dir && chdir(cmd->dir))
+			die("exec %s: cd to %s failed (%s)", cmd->argv[0],
+			    cmd->dir, strerror(errno));
+		if (cmd->env) {
+			for (; *cmd->env; cmd->env++) {
+				if (strchr(*cmd->env, '='))
+					putenv((char*)*cmd->env);
+				else
+					unsetenv(*cmd->env);
+			}
+		}
+		if (cmd->preexec_cb)
+			cmd->preexec_cb();
+		if (cmd->perf_cmd) {
+			execv_perf_cmd(cmd->argv);
+		} else {
+			execvp(cmd->argv[0], (char *const*) cmd->argv);
+		}
+		exit(127);
+	}
+#else
+	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
+	const char **sargv = cmd->argv;
+	char **env = environ;
+
+	if (cmd->no_stdin) {
+		s0 = dup(0);
+		dup_devnull(0);
+	} else if (need_in) {
+		s0 = dup(0);
+		dup2(fdin[0], 0);
+	} else if (cmd->in) {
+		s0 = dup(0);
+		dup2(cmd->in, 0);
+	}
+
+	if (cmd->no_stderr) {
+		s2 = dup(2);
+		dup_devnull(2);
+	} else if (need_err) {
+		s2 = dup(2);
+		dup2(fderr[1], 2);
+	}
+
+	if (cmd->no_stdout) {
+		s1 = dup(1);
+		dup_devnull(1);
+	} else if (cmd->stdout_to_stderr) {
+		s1 = dup(1);
+		dup2(2, 1);
+	} else if (need_out) {
+		s1 = dup(1);
+		dup2(fdout[1], 1);
+	} else if (cmd->out > 1) {
+		s1 = dup(1);
+		dup2(cmd->out, 1);
+	}
+
+	if (cmd->dir)
+		die("chdir in start_command() not implemented");
+	if (cmd->env) {
+		env = copy_environ();
+		for (; *cmd->env; cmd->env++)
+			env = env_setenv(env, *cmd->env);
+	}
+
+	if (cmd->perf_cmd) {
+		cmd->argv = prepare_perf_cmd(cmd->argv);
+	}
+
+	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
+
+	if (cmd->env)
+		free_environ(env);
+	if (cmd->perf_cmd)
+		free(cmd->argv);
+
+	cmd->argv = sargv;
+	if (s0 >= 0)
+		dup2(s0, 0), close(s0);
+	if (s1 >= 0)
+		dup2(s1, 1), close(s1);
+	if (s2 >= 0)
+		dup2(s2, 2), close(s2);
+#endif
+
+	if (cmd->pid < 0) {
+		int err = errno;
+		if (need_in)
+			close_pair(fdin);
+		else if (cmd->in)
+			close(cmd->in);
+		if (need_out)
+			close_pair(fdout);
+		else if (cmd->out)
+			close(cmd->out);
+		if (need_err)
+			close_pair(fderr);
+		return err == ENOENT ?
+			-ERR_RUN_COMMAND_EXEC :
+			-ERR_RUN_COMMAND_FORK;
+	}
+
+	if (need_in)
+		close(fdin[0]);
+	else if (cmd->in)
+		close(cmd->in);
+
+	if (need_out)
+		close(fdout[1]);
+	else if (cmd->out)
+		close(cmd->out);
+
+	if (need_err)
+		close(fderr[1]);
+
+	return 0;
+}
+
+static int wait_or_whine(pid_t pid)
+{
+	for (;;) {
+		int status, code;
+		pid_t waiting = waitpid(pid, &status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			error("waitpid failed (%s)", strerror(errno));
+			return -ERR_RUN_COMMAND_WAITPID;
+		}
+		if (waiting != pid)
+			return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+		if (WIFSIGNALED(status))
+			return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+
+		if (!WIFEXITED(status))
+			return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+		code = WEXITSTATUS(status);
+		switch (code) {
+		case 127:
+			return -ERR_RUN_COMMAND_EXEC;
+		case 0:
+			return 0;
+		default:
+			return -code;
+		}
+	}
+}
+
+int finish_command(struct child_process *cmd)
+{
+	return wait_or_whine(cmd->pid);
+}
+
+int run_command(struct child_process *cmd)
+{
+	int code = start_command(cmd);
+	if (code)
+		return code;
+	return finish_command(cmd);
+}
+
+static void prepare_run_command_v_opt(struct child_process *cmd,
+				      const char **argv,
+				      int opt)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->argv = argv;
+	cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
+	cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
+	cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
+}
+
+int run_command_v_opt(const char **argv, int opt)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	return run_command(&cmd);
+}
+
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env)
+{
+	struct child_process cmd;
+	prepare_run_command_v_opt(&cmd, argv, opt);
+	cmd.dir = dir;
+	cmd.env = env;
+	return run_command(&cmd);
+}
+
+#ifdef __MINGW32__
+static __stdcall unsigned run_thread(void *data)
+{
+	struct async *async = data;
+	return async->proc(async->fd_for_proc, async->data);
+}
+#endif
+
+int start_async(struct async *async)
+{
+	int pipe_out[2];
+
+	if (pipe(pipe_out) < 0)
+		return error("cannot create pipe: %s", strerror(errno));
+	async->out = pipe_out[0];
+
+#ifndef __MINGW32__
+	/* Flush stdio before fork() to avoid cloning buffers */
+	fflush(NULL);
+
+	async->pid = fork();
+	if (async->pid < 0) {
+		error("fork (async) failed: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+	if (!async->pid) {
+		close(pipe_out[0]);
+		exit(!!async->proc(pipe_out[1], async->data));
+	}
+	close(pipe_out[1]);
+#else
+	async->fd_for_proc = pipe_out[1];
+	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
+	if (!async->tid) {
+		error("cannot create thread: %s", strerror(errno));
+		close_pair(pipe_out);
+		return -1;
+	}
+#endif
+	return 0;
+}
+
+int finish_async(struct async *async)
+{
+#ifndef __MINGW32__
+	int ret = 0;
+
+	if (wait_or_whine(async->pid))
+		ret = error("waitpid (async) failed");
+#else
+	DWORD ret = 0;
+	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
+		ret = error("waiting for thread failed: %lu", GetLastError());
+	else if (!GetExitCodeThread(async->tid, &ret))
+		ret = error("cannot get thread exit code: %lu", GetLastError());
+	CloseHandle(async->tid);
+#endif
+	return ret;
+}
+
+int run_hook(const char *index_file, const char *name, ...)
+{
+	struct child_process hook;
+	const char **argv = NULL, *env[2];
+	char index[PATH_MAX];
+	va_list args;
+	int ret;
+	size_t i = 0, alloc = 0;
+
+	if (access(perf_path("hooks/%s", name), X_OK) < 0)
+		return 0;
+
+	va_start(args, name);
+	ALLOC_GROW(argv, i + 1, alloc);
+	argv[i++] = perf_path("hooks/%s", name);
+	while (argv[i-1]) {
+		ALLOC_GROW(argv, i + 1, alloc);
+		argv[i++] = va_arg(args, const char *);
+	}
+	va_end(args);
+
+	memset(&hook, 0, sizeof(hook));
+	hook.argv = argv;
+	hook.no_stdin = 1;
+	hook.stdout_to_stderr = 1;
+	if (index_file) {
+		snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file);
+		env[0] = index;
+		env[1] = NULL;
+		hook.env = env;
+	}
+
+	ret = start_command(&hook);
+	free(argv);
+	if (ret) {
+		warning("Could not spawn %s", argv[0]);
+		return ret;
+	}
+	ret = finish_command(&hook);
+	if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
+		warning("%s exited due to uncaught signal", argv[0]);
+
+	return ret;
+}
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h
new file mode 100644
index 0000000..328289f
--- /dev/null
+++ b/tools/perf/util/run-command.h
@@ -0,0 +1,93 @@
+#ifndef RUN_COMMAND_H
+#define RUN_COMMAND_H
+
+enum {
+	ERR_RUN_COMMAND_FORK = 10000,
+	ERR_RUN_COMMAND_EXEC,
+	ERR_RUN_COMMAND_PIPE,
+	ERR_RUN_COMMAND_WAITPID,
+	ERR_RUN_COMMAND_WAITPID_WRONG_PID,
+	ERR_RUN_COMMAND_WAITPID_SIGNAL,
+	ERR_RUN_COMMAND_WAITPID_NOEXIT,
+};
+#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
+
+struct child_process {
+	const char **argv;
+	pid_t pid;
+	/*
+	 * Using .in, .out, .err:
+	 * - Specify 0 for no redirections (child inherits stdin, stdout,
+	 *   stderr from parent).
+	 * - Specify -1 to have a pipe allocated as follows:
+	 *     .in: returns the writable pipe end; parent writes to it,
+	 *          the readable pipe end becomes child's stdin
+	 *     .out, .err: returns the readable pipe end; parent reads from
+	 *          it, the writable pipe end becomes child's stdout/stderr
+	 *   The caller of start_command() must close the returned FDs
+	 *   after it has completed reading from/writing to it!
+	 * - Specify > 0 to set a channel to a particular FD as follows:
+	 *     .in: a readable FD, becomes child's stdin
+	 *     .out: a writable FD, becomes child's stdout/stderr
+	 *     .err > 0 not supported
+	 *   The specified FD is closed by start_command(), even in case
+	 *   of errors!
+	 */
+	int in;
+	int out;
+	int err;
+	const char *dir;
+	const char *const *env;
+	unsigned no_stdin:1;
+	unsigned no_stdout:1;
+	unsigned no_stderr:1;
+	unsigned perf_cmd:1; /* if this is to be perf sub-command */
+	unsigned stdout_to_stderr:1;
+	void (*preexec_cb)(void);
+};
+
+int start_command(struct child_process *);
+int finish_command(struct child_process *);
+int run_command(struct child_process *);
+
+extern int run_hook(const char *index_file, const char *name, ...);
+
+#define RUN_COMMAND_NO_STDIN 1
+#define RUN_PERF_CMD	     2	/*If this is to be perf sub-command */
+#define RUN_COMMAND_STDOUT_TO_STDERR 4
+int run_command_v_opt(const char **argv, int opt);
+
+/*
+ * env (the environment) is to be formatted like environ: "VAR=VALUE".
+ * To unset an environment variable use just "VAR".
+ */
+int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env);
+
+/*
+ * The purpose of the following functions is to feed a pipe by running
+ * a function asynchronously and providing output that the caller reads.
+ *
+ * It is expected that no synchronization and mutual exclusion between
+ * the caller and the feed function is necessary so that the function
+ * can run in a thread without interfering with the caller.
+ */
+struct async {
+	/*
+	 * proc writes to fd and closes it;
+	 * returns 0 on success, non-zero on failure
+	 */
+	int (*proc)(int fd, void *data);
+	void *data;
+	int out;	/* caller reads from here and closes it */
+#ifndef __MINGW32__
+	pid_t pid;
+#else
+	HANDLE tid;
+	int fd_for_proc;
+#endif
+};
+
+int start_async(struct async *async);
+int finish_async(struct async *async);
+
+#endif
diff --git a/tools/perf/util/sigchain.c b/tools/perf/util/sigchain.c
new file mode 100644
index 0000000..1118b99
--- /dev/null
+++ b/tools/perf/util/sigchain.c
@@ -0,0 +1,52 @@
+#include "sigchain.h"
+#include "cache.h"
+
+#define SIGCHAIN_MAX_SIGNALS 32
+
+struct sigchain_signal {
+	sigchain_fun *old;
+	int n;
+	int alloc;
+};
+static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS];
+
+static void check_signum(int sig)
+{
+	if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS)
+		die("BUG: signal out of range: %d", sig);
+}
+
+int sigchain_push(int sig, sigchain_fun f)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+
+	ALLOC_GROW(s->old, s->n + 1, s->alloc);
+	s->old[s->n] = signal(sig, f);
+	if (s->old[s->n] == SIG_ERR)
+		return -1;
+	s->n++;
+	return 0;
+}
+
+int sigchain_pop(int sig)
+{
+	struct sigchain_signal *s = signals + sig;
+	check_signum(sig);
+	if (s->n < 1)
+		return 0;
+
+	if (signal(sig, s->old[s->n - 1]) == SIG_ERR)
+		return -1;
+	s->n--;
+	return 0;
+}
+
+void sigchain_push_common(sigchain_fun f)
+{
+	sigchain_push(SIGINT, f);
+	sigchain_push(SIGHUP, f);
+	sigchain_push(SIGTERM, f);
+	sigchain_push(SIGQUIT, f);
+	sigchain_push(SIGPIPE, f);
+}
diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h
new file mode 100644
index 0000000..618083bc
--- /dev/null
+++ b/tools/perf/util/sigchain.h
@@ -0,0 +1,11 @@
+#ifndef SIGCHAIN_H
+#define SIGCHAIN_H
+
+typedef void (*sigchain_fun)(int);
+
+int sigchain_push(int sig, sigchain_fun f);
+int sigchain_pop(int sig);
+
+void sigchain_push_common(sigchain_fun f);
+
+#endif /* SIGCHAIN_H */
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
new file mode 100644
index 0000000..eaba093
--- /dev/null
+++ b/tools/perf/util/strbuf.c
@@ -0,0 +1,359 @@
+#include "cache.h"
+
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+void strbuf_init(struct strbuf *sb, size_t hint)
+{
+	sb->alloc = sb->len = 0;
+	sb->buf = strbuf_slopbuf;
+	if (hint)
+		strbuf_grow(sb, hint);
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+	if (sb->alloc) {
+		free(sb->buf);
+		strbuf_init(sb, 0);
+	}
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+	char *res = sb->alloc ? sb->buf : NULL;
+	if (sz)
+		*sz = sb->len;
+	strbuf_init(sb, 0);
+	return res;
+}
+
+void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
+{
+	strbuf_release(sb);
+	sb->buf   = buf;
+	sb->len   = len;
+	sb->alloc = alloc;
+	strbuf_grow(sb, 0);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_grow(struct strbuf *sb, size_t extra)
+{
+	if (sb->len + extra + 1 <= sb->len)
+		die("you want to use way too much memory");
+	if (!sb->alloc)
+		sb->buf = NULL;
+	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+}
+
+void strbuf_trim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+void strbuf_rtrim(struct strbuf *sb)
+{
+	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+		sb->len--;
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_ltrim(struct strbuf *sb)
+{
+	char *b = sb->buf;
+	while (sb->len > 0 && isspace(*b)) {
+		b++;
+		sb->len--;
+	}
+	memmove(sb->buf, b, sb->len);
+	sb->buf[sb->len] = '\0';
+}
+
+void strbuf_tolower(struct strbuf *sb)
+{
+	int i;
+	for (i = 0; i < sb->len; i++)
+		sb->buf[i] = tolower(sb->buf[i]);
+}
+
+struct strbuf **strbuf_split(const struct strbuf *sb, int delim)
+{
+	int alloc = 2, pos = 0;
+	char *n, *p;
+	struct strbuf **ret;
+	struct strbuf *t;
+
+	ret = calloc(alloc, sizeof(struct strbuf *));
+	p = n = sb->buf;
+	while (n < sb->buf + sb->len) {
+		int len;
+		n = memchr(n, delim, sb->len - (n - sb->buf));
+		if (pos + 1 >= alloc) {
+			alloc = alloc * 2;
+			ret = realloc(ret, sizeof(struct strbuf *) * alloc);
+		}
+		if (!n)
+			n = sb->buf + sb->len - 1;
+		len = n - p + 1;
+		t = malloc(sizeof(struct strbuf));
+		strbuf_init(t, len);
+		strbuf_add(t, p, len);
+		ret[pos] = t;
+		ret[++pos] = NULL;
+		p = ++n;
+	}
+	return ret;
+}
+
+void strbuf_list_free(struct strbuf **sbs)
+{
+	struct strbuf **s = sbs;
+
+	while (*s) {
+		strbuf_release(*s);
+		free(*s++);
+	}
+	free(sbs);
+}
+
+int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
+{
+	int len = a->len < b->len ? a->len: b->len;
+	int cmp = memcmp(a->buf, b->buf, len);
+	if (cmp)
+		return cmp;
+	return a->len < b->len ? -1: a->len != b->len;
+}
+
+void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
+				   const void *data, size_t dlen)
+{
+	if (pos + len < pos)
+		die("you want to use way too much memory");
+	if (pos > sb->len)
+		die("`pos' is too far after the end of the buffer");
+	if (pos + len > sb->len)
+		die("`pos + len' is too far after the end of the buffer");
+
+	if (dlen >= len)
+		strbuf_grow(sb, dlen - len);
+	memmove(sb->buf + pos + dlen,
+			sb->buf + pos + len,
+			sb->len - pos - len);
+	memcpy(sb->buf + pos, data, dlen);
+	strbuf_setlen(sb, sb->len + dlen - len);
+}
+
+void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
+{
+	strbuf_splice(sb, pos, 0, data, len);
+}
+
+void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_splice(sb, pos, len, NULL, 0);
+}
+
+void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, data, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, sb->buf + pos, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+	int len;
+	va_list ap;
+
+	if (!strbuf_avail(sb))
+		strbuf_grow(sb, 64);
+	va_start(ap, fmt);
+	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+	va_end(ap);
+	if (len < 0)
+		die("your vsnprintf is broken");
+	if (len > strbuf_avail(sb)) {
+		strbuf_grow(sb, len);
+		va_start(ap, fmt);
+		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+		va_end(ap);
+		if (len > strbuf_avail(sb)) {
+			die("this should not happen, your snprintf is broken");
+		}
+	}
+	strbuf_setlen(sb, sb->len + len);
+}
+
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+		   void *context)
+{
+	for (;;) {
+		const char *percent;
+		size_t consumed;
+
+		percent = strchrnul(format, '%');
+		strbuf_add(sb, format, percent - format);
+		if (!*percent)
+			break;
+		format = percent + 1;
+
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
+			strbuf_addch(sb, '%');
+	}
+}
+
+size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
+		void *context)
+{
+	struct strbuf_expand_dict_entry *e = context;
+	size_t len;
+
+	for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
+		if (!strncmp(placeholder, e->placeholder, len)) {
+			if (e->value)
+				strbuf_addstr(sb, e->value);
+			return len;
+		}
+	}
+	return 0;
+}
+
+size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
+{
+	size_t res;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, size);
+	res = fread(sb->buf + sb->len, 1, size, f);
+	if (res > 0)
+		strbuf_setlen(sb, sb->len + res);
+	else if (res < 0 && oldalloc == 0)
+		strbuf_release(sb);
+	return res;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
+{
+	size_t oldlen = sb->len;
+	size_t oldalloc = sb->alloc;
+
+	strbuf_grow(sb, hint ? hint : 8192);
+	for (;;) {
+		ssize_t cnt;
+
+		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+		if (cnt < 0) {
+			if (oldalloc == 0)
+				strbuf_release(sb);
+			else
+				strbuf_setlen(sb, oldlen);
+			return -1;
+		}
+		if (!cnt)
+			break;
+		sb->len += cnt;
+		strbuf_grow(sb, 8192);
+	}
+
+	sb->buf[sb->len] = '\0';
+	return sb->len - oldlen;
+}
+
+#define STRBUF_MAXLINK (2*PATH_MAX)
+
+int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
+{
+	size_t oldalloc = sb->alloc;
+
+	if (hint < 32)
+		hint = 32;
+
+	while (hint < STRBUF_MAXLINK) {
+		int len;
+
+		strbuf_grow(sb, hint);
+		len = readlink(path, sb->buf, hint);
+		if (len < 0) {
+			if (errno != ERANGE)
+				break;
+		} else if (len < hint) {
+			strbuf_setlen(sb, len);
+			return 0;
+		}
+
+		/* .. the buffer was too small - try again */
+		hint *= 2;
+	}
+	if (oldalloc == 0)
+		strbuf_release(sb);
+	return -1;
+}
+
+int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
+{
+	int ch;
+
+	strbuf_grow(sb, 0);
+	if (feof(fp))
+		return EOF;
+
+	strbuf_reset(sb);
+	while ((ch = fgetc(fp)) != EOF) {
+		if (ch == term)
+			break;
+		strbuf_grow(sb, 1);
+		sb->buf[sb->len++] = ch;
+	}
+	if (ch == EOF && sb->len == 0)
+		return EOF;
+
+	sb->buf[sb->len] = '\0';
+	return 0;
+}
+
+int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
+{
+	int fd, len;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -1;
+	len = strbuf_read(sb, fd, hint);
+	close(fd);
+	if (len < 0)
+		return -1;
+
+	return len;
+}
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
new file mode 100644
index 0000000..9ee908a
--- /dev/null
+++ b/tools/perf/util/strbuf.h
@@ -0,0 +1,137 @@
+#ifndef STRBUF_H
+#define STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ *    build complex strings/buffers whose final size isn't easily known.
+ *
+ *    It is NOT legal to copy the ->buf pointer away.
+ *    `strbuf_detach' is the operation that detachs a buffer from its shell
+ *    while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ *    allocated. The extra byte is used to store a '\0', allowing the ->buf
+ *    member to be a valid C-string. Every strbuf function ensure this
+ *    invariant is preserved.
+ *
+ *    Note that it is OK to "play" with the buffer directly if you work it
+ *    that way:
+ *
+ *    strbuf_grow(sb, SOME_SIZE);
+ *       ... Here, the memory array starting at sb->buf, and of length
+ *       ... strbuf_avail(sb) is all yours, and you are sure that
+ *       ... strbuf_avail(sb) is at least SOME_SIZE.
+ *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ *    Doing so is safe, though if it has to be done in many places, adding the
+ *    missing API to the strbuf module is the way to go.
+ *
+ *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ *         even if it's true in the current implementation. Alloc is somehow a
+ *         "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+	size_t alloc;
+	size_t len;
+	char *buf;
+};
+
+#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+extern void strbuf_init(struct strbuf *, size_t);
+extern void strbuf_release(struct strbuf *);
+extern char *strbuf_detach(struct strbuf *, size_t *);
+extern void strbuf_attach(struct strbuf *, void *, size_t, size_t);
+static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) {
+	struct strbuf tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+/*----- strbuf size related -----*/
+static inline size_t strbuf_avail(const struct strbuf *sb) {
+	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+extern void strbuf_grow(struct strbuf *, size_t);
+
+static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
+	if (!sb->alloc)
+		strbuf_grow(sb, 0);
+	assert(len < sb->alloc);
+	sb->len = len;
+	sb->buf[len] = '\0';
+}
+#define strbuf_reset(sb)  strbuf_setlen(sb, 0)
+
+/*----- content related -----*/
+extern void strbuf_trim(struct strbuf *);
+extern void strbuf_rtrim(struct strbuf *);
+extern void strbuf_ltrim(struct strbuf *);
+extern int strbuf_cmp(const struct strbuf *, const struct strbuf *);
+extern void strbuf_tolower(struct strbuf *);
+
+extern struct strbuf **strbuf_split(const struct strbuf *, int delim);
+extern void strbuf_list_free(struct strbuf **);
+
+/*----- add data in your buffer -----*/
+static inline void strbuf_addch(struct strbuf *sb, int c) {
+	strbuf_grow(sb, 1);
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+}
+
+extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t);
+extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
+
+/* splice pos..pos+len with given data */
+extern void strbuf_splice(struct strbuf *, size_t pos, size_t len,
+                          const void *, size_t);
+
+extern void strbuf_add(struct strbuf *, const void *, size_t);
+static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
+	strbuf_add(sb, s, strlen(s));
+}
+static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) {
+	strbuf_add(sb, sb2->buf, sb2->len);
+}
+extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
+
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
+struct strbuf_expand_dict_entry {
+	const char *placeholder;
+	const char *value;
+};
+extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context);
+
+__attribute__((format(printf,2,3)))
+extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+
+extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
+/* XXX: if read fails, any partial read is undone */
+extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
+extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
+extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
+
+extern int strbuf_getline(struct strbuf *, FILE *, int);
+
+extern void stripspace(struct strbuf *buf, int skip_comments);
+extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env);
+
+extern int strbuf_branchname(struct strbuf *sb, const char *name);
+extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
+
+#endif /* STRBUF_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
new file mode 100644
index 0000000..ec33c0c
--- /dev/null
+++ b/tools/perf/util/string.c
@@ -0,0 +1,34 @@
+#include "string.h"
+
+static int hex(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2u64(const char *ptr, __u64 *long_val)
+{
+	const char *p = ptr;
+	*long_val = 0;
+
+	while (*p) {
+		const int hex_val = hex(*p);
+
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		p++;
+	}
+
+	return p - ptr;
+}
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
new file mode 100644
index 0000000..72812c1
--- /dev/null
+++ b/tools/perf/util/string.h
@@ -0,0 +1,8 @@
+#ifndef _PERF_STRING_H_
+#define _PERF_STRING_H_
+
+#include <linux/types.h>
+
+int hex2u64(const char *ptr, __u64 *val);
+
+#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
new file mode 100644
index 0000000..23f4f7b
--- /dev/null
+++ b/tools/perf/util/symbol.c
@@ -0,0 +1,574 @@
+#include "util.h"
+#include "../perf.h"
+#include "string.h"
+#include "symbol.h"
+
+#include <libelf.h>
+#include <gelf.h>
+#include <elf.h>
+
+const char *sym_hist_filter;
+
+static struct symbol *symbol__new(uint64_t start, uint64_t len,
+				  const char *name, unsigned int priv_size,
+				  uint64_t obj_start, int verbose)
+{
+	size_t namelen = strlen(name) + 1;
+	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
+
+	if (!self)
+		return NULL;
+
+	if (verbose >= 2)
+		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
+			(__u64)start, len, name, self->hist, (void *)obj_start);
+
+	self->obj_start= obj_start;
+	self->hist = NULL;
+	self->hist_sum = 0;
+
+	if (sym_hist_filter && !strcmp(name, sym_hist_filter))
+		self->hist = calloc(sizeof(__u64), len);
+
+	if (priv_size) {
+		memset(self, 0, priv_size);
+		self = ((void *)self) + priv_size;
+	}
+	self->start = start;
+	self->end   = start + len - 1;
+	memcpy(self->name, name, namelen);
+
+	return self;
+}
+
+static void symbol__delete(struct symbol *self, unsigned int priv_size)
+{
+	free(((void *)self) - priv_size);
+}
+
+static size_t symbol__fprintf(struct symbol *self, FILE *fp)
+{
+	return fprintf(fp, " %llx-%llx %s\n",
+		       self->start, self->end, self->name);
+}
+
+struct dso *dso__new(const char *name, unsigned int sym_priv_size)
+{
+	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+
+	if (self != NULL) {
+		strcpy(self->name, name);
+		self->syms = RB_ROOT;
+		self->sym_priv_size = sym_priv_size;
+		self->find_symbol = dso__find_symbol;
+	}
+
+	return self;
+}
+
+static void dso__delete_symbols(struct dso *self)
+{
+	struct symbol *pos;
+	struct rb_node *next = rb_first(&self->syms);
+
+	while (next) {
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, &self->syms);
+		symbol__delete(pos, self->sym_priv_size);
+	}
+}
+
+void dso__delete(struct dso *self)
+{
+	dso__delete_symbols(self);
+	free(self);
+}
+
+static void dso__insert_symbol(struct dso *self, struct symbol *sym)
+{
+	struct rb_node **p = &self->syms.rb_node;
+	struct rb_node *parent = NULL;
+	const uint64_t ip = sym->start;
+	struct symbol *s;
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol, rb_node);
+		if (ip < s->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&sym->rb_node, parent, p);
+	rb_insert_color(&sym->rb_node, &self->syms);
+}
+
+struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
+{
+	struct rb_node *n;
+
+	if (self == NULL)
+		return NULL;
+
+	n = self->syms.rb_node;
+
+	while (n) {
+		struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+		if (ip < s->start)
+			n = n->rb_left;
+		else if (ip > s->end)
+			n = n->rb_right;
+		else
+			return s;
+	}
+
+	return NULL;
+}
+
+size_t dso__fprintf(struct dso *self, FILE *fp)
+{
+	size_t ret = fprintf(fp, "dso: %s\n", self->name);
+
+	struct rb_node *nd;
+	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		ret += symbol__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose)
+{
+	struct rb_node *nd, *prevnd;
+	char *line = NULL;
+	size_t n;
+	FILE *file = fopen("/proc/kallsyms", "r");
+
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		__u64 start;
+		struct symbol *sym;
+		int line_len, len;
+		char symbol_type;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2u64(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		symbol_type = toupper(line[len]);
+		/*
+		 * We're interested only in code ('T'ext)
+		 */
+		if (symbol_type != 'T' && symbol_type != 'W')
+			continue;
+		/*
+		 * Well fix up the end later, when we have all sorted.
+		 */
+		sym = symbol__new(start, 0xdead, line + len + 2,
+				  self->sym_priv_size, 0, verbose);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		if (filter && filter(self, sym))
+			symbol__delete(sym, self->sym_priv_size);
+		else
+			dso__insert_symbol(self, sym);
+	}
+
+	/*
+	 * Now that we have all sorted out, just set the ->end of all
+	 * symbols
+	 */
+	prevnd = rb_first(&self->syms);
+
+	if (prevnd == NULL)
+		goto out_delete_line;
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
+			      *curr = rb_entry(nd, struct symbol, rb_node);
+
+		prev->end = curr->start - 1;
+		prevnd = nd;
+	}
+
+	free(line);
+	fclose(file);
+
+	return 0;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @self: struct elf_symtab instance to iterate
+ * @index: uint32_t index
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
+	for (index = 0, gelf_getsym(syms, index, &sym);\
+	     index < nr_syms; \
+	     index++, gelf_getsym(syms, index, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+	return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_FUNC &&
+	       sym->st_name != 0 &&
+	       sym->st_shndx != SHN_UNDEF &&
+	       sym->st_size != 0;
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+					const Elf_Data *symstrs)
+{
+	return symstrs->d_buf + sym->st_name;
+}
+
+static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+				    GElf_Shdr *shp, const char *name,
+				    size_t *index)
+{
+	Elf_Scn *sec = NULL;
+	size_t cnt = 1;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		char *str;
+
+		gelf_getshdr(sec, shp);
+		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+		if (!strcmp(name, str)) {
+			if (index)
+				*index = cnt;
+			break;
+		}
+		++cnt;
+	}
+
+	return sec;
+}
+
+#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+
+#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+
+static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
+				       GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym,
+				       GElf_Shdr *shdr_dynsym,
+				       size_t dynsym_idx, int verbose)
+{
+	uint32_t nr_rel_entries, idx;
+	GElf_Sym sym;
+	__u64 plt_offset;
+	GElf_Shdr shdr_plt;
+	struct symbol *f;
+	GElf_Shdr shdr_rel_plt;
+	Elf_Data *reldata, *syms, *symstrs;
+	Elf_Scn *scn_plt_rel, *scn_symstrs;
+	char sympltname[1024];
+	int nr = 0, symidx;
+
+	scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
+					  ".rela.plt", NULL);
+	if (scn_plt_rel == NULL) {
+		scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
+						  ".rel.plt", NULL);
+		if (scn_plt_rel == NULL)
+			return 0;
+	}
+
+	if (shdr_rel_plt.sh_link != dynsym_idx)
+		return 0;
+
+	if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL)
+		return 0;
+
+	/*
+	 * Fetch the relocation section to find the indexes to the GOT
+	 * and the symbols in the .dynsym they refer to.
+	 */
+	reldata = elf_getdata(scn_plt_rel, NULL);
+	if (reldata == NULL)
+		return -1;
+
+	syms = elf_getdata(scn_dynsym, NULL);
+	if (syms == NULL)
+		return -1;
+
+	scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link);
+	if (scn_symstrs == NULL)
+		return -1;
+
+	symstrs = elf_getdata(scn_symstrs, NULL);
+	if (symstrs == NULL)
+		return -1;
+
+	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+	plt_offset = shdr_plt.sh_offset;
+
+	if (shdr_rel_plt.sh_type == SHT_RELA) {
+		GElf_Rela pos_mem, *pos;
+
+		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
+					   nr_rel_entries) {
+			symidx = GELF_R_SYM(pos->r_info);
+			plt_offset += shdr_plt.sh_entsize;
+			gelf_getsym(syms, symidx, &sym);
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_sym__name(&sym, symstrs));
+
+			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+					sympltname, self->sym_priv_size, 0, verbose);
+			if (!f)
+				return -1;
+
+			dso__insert_symbol(self, f);
+			++nr;
+		}
+	} else if (shdr_rel_plt.sh_type == SHT_REL) {
+		GElf_Rel pos_mem, *pos;
+		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
+					  nr_rel_entries) {
+			symidx = GELF_R_SYM(pos->r_info);
+			plt_offset += shdr_plt.sh_entsize;
+			gelf_getsym(syms, symidx, &sym);
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_sym__name(&sym, symstrs));
+
+			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
+					sympltname, self->sym_priv_size, 0, verbose);
+			if (!f)
+				return -1;
+
+			dso__insert_symbol(self, f);
+			++nr;
+		}
+	} else {
+		/*
+		 * TODO: There are still one more shdr_rel_plt.sh_type
+		 * I have to investigate, but probably should be ignored.
+		 */
+	}
+
+	return nr;
+}
+
+static int dso__load_sym(struct dso *self, int fd, const char *name,
+			 symbol_filter_t filter, int verbose)
+{
+	Elf_Data *symstrs;
+	uint32_t nr_syms;
+	int err = -1;
+	uint32_t index;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *syms;
+	GElf_Sym sym;
+	Elf_Scn *sec, *sec_dynsym;
+	Elf *elf;
+	size_t dynsym_idx;
+	int nr = 0;
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		if (verbose)
+			fprintf(stderr, "%s: cannot read %s ELF file.\n",
+				__func__, name);
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		if (verbose)
+			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	/*
+	 * We need to check if we have a .dynsym, so that we can handle the
+	 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+	 * .dynsym or .symtab)
+	 */
+	sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr,
+					 ".dynsym", &dynsym_idx);
+	if (sec_dynsym != NULL) {
+		nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
+						 sec_dynsym, &shdr,
+						 dynsym_idx, verbose);
+		if (nr < 0)
+			goto out_elf_end;
+	}
+
+	/*
+	 * But if we have a full .symtab (that is a superset of .dynsym) we
+	 * should add the symbols not in the .dynsyn
+	 */
+	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
+	if (sec == NULL) {
+		if (sec_dynsym == NULL)
+			goto out_elf_end;
+
+		sec = sec_dynsym;
+		gelf_getshdr(sec, &shdr);
+	}
+
+	syms = elf_getdata(sec, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	sec = elf_getscn(elf, shdr.sh_link);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(sec, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
+		struct symbol *f;
+		uint64_t obj_start;
+
+		if (!elf_sym__is_function(&sym))
+			continue;
+
+		sec = elf_getscn(elf, sym.st_shndx);
+		if (!sec)
+			goto out_elf_end;
+
+		gelf_getshdr(sec, &shdr);
+		obj_start = sym.st_value;
+
+		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
+		f = symbol__new(sym.st_value, sym.st_size,
+				elf_sym__name(&sym, symstrs),
+				self->sym_priv_size, obj_start, verbose);
+		if (!f)
+			goto out_elf_end;
+
+		if (filter && filter(self, f))
+			symbol__delete(f, self->sym_priv_size);
+		else {
+			dso__insert_symbol(self, f);
+			nr++;
+		}
+	}
+
+	err = nr;
+out_elf_end:
+	elf_end(elf);
+out_close:
+	return err;
+}
+
+int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
+{
+	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
+	char *name = malloc(size);
+	int variant = 0;
+	int ret = -1;
+	int fd;
+
+	if (!name)
+		return -1;
+
+more:
+	do {
+		switch (variant) {
+		case 0: /* Fedora */
+			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
+			break;
+		case 1: /* Ubuntu */
+			snprintf(name, size, "/usr/lib/debug%s", self->name);
+			break;
+		case 2: /* Sane people */
+			snprintf(name, size, "%s", self->name);
+			break;
+
+		default:
+			goto out;
+		}
+		variant++;
+
+		fd = open(name, O_RDONLY);
+	} while (fd < 0);
+
+	ret = dso__load_sym(self, fd, name, filter, verbose);
+	close(fd);
+
+	/*
+	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
+	 */
+	if (!ret)
+		goto more;
+
+out:
+	free(name);
+	return ret;
+}
+
+static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
+			     symbol_filter_t filter, int verbose)
+{
+	int err, fd = open(vmlinux, O_RDONLY);
+
+	if (fd < 0)
+		return -1;
+
+	err = dso__load_sym(self, fd, vmlinux, filter, verbose);
+	close(fd);
+
+	return err;
+}
+
+int dso__load_kernel(struct dso *self, const char *vmlinux,
+		     symbol_filter_t filter, int verbose)
+{
+	int err = -1;
+
+	if (vmlinux)
+		err = dso__load_vmlinux(self, vmlinux, filter, verbose);
+
+	if (err)
+		err = dso__load_kallsyms(self, filter, verbose);
+
+	return err;
+}
+
+void symbol__init(void)
+{
+	elf_version(EV_CURRENT);
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
new file mode 100644
index 0000000..4839d68
--- /dev/null
+++ b/tools/perf/util/symbol.h
@@ -0,0 +1,47 @@
+#ifndef _PERF_SYMBOL_
+#define _PERF_SYMBOL_ 1
+
+#include <linux/types.h>
+#include "list.h"
+#include "rbtree.h"
+
+struct symbol {
+	struct rb_node	rb_node;
+	__u64		start;
+	__u64		end;
+	__u64		obj_start;
+	__u64		hist_sum;
+	__u64		*hist;
+	char		name[0];
+};
+
+struct dso {
+	struct list_head node;
+	struct rb_root	 syms;
+	unsigned int	 sym_priv_size;
+	struct symbol    *(*find_symbol)(struct dso *, uint64_t ip);
+	char		 name[0];
+};
+
+const char *sym_hist_filter;
+
+typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
+
+struct dso *dso__new(const char *name, unsigned int sym_priv_size);
+void dso__delete(struct dso *self);
+
+static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
+{
+	return ((void *)sym) - self->sym_priv_size;
+}
+
+struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
+
+int dso__load_kernel(struct dso *self, const char *vmlinux,
+		     symbol_filter_t filter, int verbose);
+int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
+
+size_t dso__fprintf(struct dso *self, FILE *fp);
+
+void symbol__init(void);
+#endif /* _PERF_SYMBOL_ */
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
new file mode 100644
index 0000000..2cad286
--- /dev/null
+++ b/tools/perf/util/usage.c
@@ -0,0 +1,80 @@
+/*
+ * GIT - The information manager from hell
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+
+static void report(const char *prefix, const char *err, va_list params)
+{
+	char msg[1024];
+	vsnprintf(msg, sizeof(msg), err, params);
+	fprintf(stderr, "%s%s\n", prefix, msg);
+}
+
+static NORETURN void usage_builtin(const char *err)
+{
+	fprintf(stderr, "\n usage: %s\n", err);
+	exit(129);
+}
+
+static NORETURN void die_builtin(const char *err, va_list params)
+{
+	report("fatal: ", err, params);
+	exit(128);
+}
+
+static void error_builtin(const char *err, va_list params)
+{
+	report("error: ", err, params);
+}
+
+static void warn_builtin(const char *warn, va_list params)
+{
+	report("warning: ", warn, params);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
+static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
+static void (*error_routine)(const char *err, va_list params) = error_builtin;
+static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
+
+void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
+{
+	die_routine = routine;
+}
+
+void usage(const char *err)
+{
+	usage_routine(err);
+}
+
+void die(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	die_routine(err, params);
+	va_end(params);
+}
+
+int error(const char *err, ...)
+{
+	va_list params;
+
+	va_start(params, err);
+	error_routine(err, params);
+	va_end(params);
+	return -1;
+}
+
+void warning(const char *warn, ...)
+{
+	va_list params;
+
+	va_start(params, warn);
+	warn_routine(warn, params);
+	va_end(params);
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
new file mode 100644
index 0000000..76590a1
--- /dev/null
+++ b/tools/perf/util/util.h
@@ -0,0 +1,410 @@
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _FILE_OFFSET_BITS 64
+
+#ifndef FLEX_ARRAY
+/*
+ * See if our compiler is known to support flexible array members.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+# define FLEX_ARRAY /* empty */
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 3)
+#  define FLEX_ARRAY /* empty */
+# else
+#  define FLEX_ARRAY 0 /* older GNU extension */
+# endif
+#endif
+
+/*
+ * Otherwise, default to safer but a bit wasteful traditional style
+ */
+#ifndef FLEX_ARRAY
+# define FLEX_ARRAY 1
+#endif
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+#ifdef __GNUC__
+#define TYPEOF(x) (__typeof__(x))
+#else
+#define TYPEOF(x)
+#endif
+
+#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits))))
+#define HAS_MULTI_BITS(i)  ((i) & ((i) - 1))  /* checks if an integer has more than 1 bit set */
+
+/* Approximation of the length of the decimal representation of this type. */
+#define decimal_length(x)	((int)(sizeof(x) * 2.56 + 0.5) + 1)
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__)  && !defined(__USLC__) && !defined(_M_UNIX)
+#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
+#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
+#endif
+#define _ALL_SOURCE 1
+#define _GNU_SOURCE 1
+#define _BSD_SOURCE 1
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/time.h>
+#include <time.h>
+#include <signal.h>
+#include <fnmatch.h>
+#include <assert.h>
+#include <regex.h>
+#include <utime.h>
+#ifndef __MINGW32__
+#include <sys/wait.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#ifndef NO_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <pwd.h>
+#include <inttypes.h>
+#if defined(__CYGWIN__)
+#undef _XOPEN_SOURCE
+#include <grp.h>
+#define _XOPEN_SOURCE 600
+#include "compat/cygwin.h"
+#else
+#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
+#include <grp.h>
+#define _ALL_SOURCE 1
+#endif
+#else 	/* __MINGW32__ */
+/* pull in Windows compatibility stuff */
+#include "compat/mingw.h"
+#endif	/* __MINGW32__ */
+
+#ifndef NO_ICONV
+#include <iconv.h>
+#endif
+
+#ifndef NO_OPENSSL
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#endif
+
+/* On most systems <limits.h> would have given us this, but
+ * not on some systems (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef PRIuMAX
+#define PRIuMAX "llu"
+#endif
+
+#ifndef PRIu32
+#define PRIu32 "u"
+#endif
+
+#ifndef PRIx32
+#define PRIx32 "x"
+#endif
+
+#ifndef PATH_SEP
+#define PATH_SEP ':'
+#endif
+
+#ifndef STRIP_EXTENSION
+#define STRIP_EXTENSION ""
+#endif
+
+#ifndef has_dos_drive_prefix
+#define has_dos_drive_prefix(path) 0
+#endif
+
+#ifndef is_dir_sep
+#define is_dir_sep(c) ((c) == '/')
+#endif
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#ifndef __attribute__
+#define __attribute__(x)
+#endif
+#endif
+
+/* General helper functions */
+extern void usage(const char *err) NORETURN;
+extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+
+extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
+
+extern int prefixcmp(const char *str, const char *prefix);
+extern time_t tm_to_time_t(const struct tm *tm);
+
+static inline const char *skip_prefix(const char *str, const char *prefix)
+{
+	size_t len = strlen(prefix);
+	return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
+
+#ifndef PROT_READ
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define MAP_PRIVATE 1
+#define MAP_FAILED ((void*)-1)
+#endif
+
+#define mmap git_mmap
+#define munmap git_munmap
+extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern int git_munmap(void *start, size_t length);
+
+#else /* NO_MMAP || USE_WIN32_MMAP */
+
+#include <sys/mman.h>
+
+#endif /* NO_MMAP || USE_WIN32_MMAP */
+
+#ifdef NO_MMAP
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
+
+#else /* NO_MMAP */
+
+/* This value must be multiple of (pagesize * 2) */
+#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
+	(sizeof(void*) >= 8 \
+		?  1 * 1024 * 1024 * 1024 \
+		: 32 * 1024 * 1024)
+
+#endif /* NO_MMAP */
+
+#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
+#define on_disk_bytes(st) ((st).st_size)
+#else
+#define on_disk_bytes(st) ((st).st_blocks * 512)
+#endif
+
+#define DEFAULT_PACKED_GIT_LIMIT \
+	((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
+
+#ifdef NO_PREAD
+#define pread git_pread
+extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
+#endif
+/*
+ * Forward decl that will remind us if its twin in cache.h changes.
+ * This function is used in compat/pread.c.  But we can't include
+ * cache.h there.
+ */
+extern ssize_t read_in_full(int fd, void *buf, size_t count);
+
+#ifdef NO_SETENV
+#define setenv gitsetenv
+extern int gitsetenv(const char *, const char *, int);
+#endif
+
+#ifdef NO_MKDTEMP
+#define mkdtemp gitmkdtemp
+extern char *gitmkdtemp(char *);
+#endif
+
+#ifdef NO_UNSETENV
+#define unsetenv gitunsetenv
+extern void gitunsetenv(const char *);
+#endif
+
+#ifdef NO_STRCASESTR
+#define strcasestr gitstrcasestr
+extern char *gitstrcasestr(const char *haystack, const char *needle);
+#endif
+
+#ifdef NO_STRLCPY
+#define strlcpy gitstrlcpy
+extern size_t gitstrlcpy(char *, const char *, size_t);
+#endif
+
+#ifdef NO_STRTOUMAX
+#define strtoumax gitstrtoumax
+extern uintmax_t gitstrtoumax(const char *, char **, int);
+#endif
+
+#ifdef NO_HSTRERROR
+#define hstrerror githstrerror
+extern const char *githstrerror(int herror);
+#endif
+
+#ifdef NO_MEMMEM
+#define memmem gitmemmem
+void *gitmemmem(const void *haystack, size_t haystacklen,
+                const void *needle, size_t needlelen);
+#endif
+
+#ifdef FREAD_READS_DIRECTORIES
+#ifdef fopen
+#undef fopen
+#endif
+#define fopen(a,b) git_fopen(a,b)
+extern FILE *git_fopen(const char*, const char*);
+#endif
+
+#ifdef SNPRINTF_RETURNS_BOGUS
+#define snprintf git_snprintf
+extern int git_snprintf(char *str, size_t maxsize,
+			const char *format, ...);
+#define vsnprintf git_vsnprintf
+extern int git_vsnprintf(char *str, size_t maxsize,
+			 const char *format, va_list ap);
+#endif
+
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 1)
+#define HAVE_STRCHRNUL
+#endif
+#endif
+
+#ifndef HAVE_STRCHRNUL
+#define strchrnul gitstrchrnul
+static inline char *gitstrchrnul(const char *s, int c)
+{
+	while (*s && *s != c)
+		s++;
+	return (char *)s;
+}
+#endif
+
+/*
+ * Wrappers:
+ */
+extern char *xstrdup(const char *str);
+extern void *xmalloc(size_t size);
+extern void *xmemdupz(const void *data, size_t len);
+extern char *xstrndup(const char *str, size_t len);
+extern void *xrealloc(void *ptr, size_t size);
+extern void *xcalloc(size_t nmemb, size_t size);
+extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
+extern ssize_t xread(int fd, void *buf, size_t len);
+extern ssize_t xwrite(int fd, const void *buf, size_t len);
+extern int xdup(int fd);
+extern FILE *xfdopen(int fd, const char *mode);
+extern int xmkstemp(char *template);
+
+static inline size_t xsize_t(off_t len)
+{
+	return (size_t)len;
+}
+
+static inline int has_extension(const char *filename, const char *ext)
+{
+	size_t len = strlen(filename);
+	size_t extlen = strlen(ext);
+	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isalpha
+#undef isalnum
+#undef tolower
+#undef toupper
+extern unsigned char sane_ctype[256];
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
+#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+	if (sane_istest(x, GIT_ALPHA))
+		x = (x & ~0x20) | high;
+	return x;
+}
+
+static inline int strtoul_ui(char const *s, int base, unsigned int *result)
+{
+	unsigned long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtoul(s, &p, base);
+	if (errno || *p || p == s || (unsigned int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+static inline int strtol_i(char const *s, int base, int *result)
+{
+	long ul;
+	char *p;
+
+	errno = 0;
+	ul = strtol(s, &p, base);
+	if (errno || *p || p == s || (int) ul != ul)
+		return -1;
+	*result = ul;
+	return 0;
+}
+
+#ifdef INTERNAL_QSORT
+void git_qsort(void *base, size_t nmemb, size_t size,
+	       int(*compar)(const void *, const void *));
+#define qsort git_qsort
+#endif
+
+#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
+# define FORCE_DIR_SET_GID S_ISGID
+#else
+# define FORCE_DIR_SET_GID 0
+#endif
+
+#ifdef NO_NSEC
+#undef USE_NSEC
+#define ST_CTIME_NSEC(st) 0
+#define ST_MTIME_NSEC(st) 0
+#else
+#ifdef USE_ST_TIMESPEC
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
+#else
+#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
+#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
+#endif
+#endif
+
+#endif
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
new file mode 100644
index 0000000..6350d65
--- /dev/null
+++ b/tools/perf/util/wrapper.c
@@ -0,0 +1,206 @@
+/*
+ * Various trivial helper wrappers around standard functions
+ */
+#include "cache.h"
+
+/*
+ * There's no pack memory to release - but stay close to the Git
+ * version so wrap this away:
+ */
+static inline void release_pack_memory(size_t size, int flag)
+{
+}
+
+char *xstrdup(const char *str)
+{
+	char *ret = strdup(str);
+	if (!ret) {
+		release_pack_memory(strlen(str) + 1, -1);
+		ret = strdup(str);
+		if (!ret)
+			die("Out of memory, strdup failed");
+	}
+	return ret;
+}
+
+void *xmalloc(size_t size)
+{
+	void *ret = malloc(size);
+	if (!ret && !size)
+		ret = malloc(1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = malloc(size);
+		if (!ret && !size)
+			ret = malloc(1);
+		if (!ret)
+			die("Out of memory, malloc failed");
+	}
+#ifdef XMALLOC_POISON
+	memset(ret, 0xA5, size);
+#endif
+	return ret;
+}
+
+/*
+ * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
+ * "data" to the allocated memory, zero terminates the allocated memory,
+ * and returns a pointer to the allocated memory. If the allocation fails,
+ * the program dies.
+ */
+void *xmemdupz(const void *data, size_t len)
+{
+	char *p = xmalloc(len + 1);
+	memcpy(p, data, len);
+	p[len] = '\0';
+	return p;
+}
+
+char *xstrndup(const char *str, size_t len)
+{
+	char *p = memchr(str, '\0', len);
+	return xmemdupz(str, p ? p - str : len);
+}
+
+void *xrealloc(void *ptr, size_t size)
+{
+	void *ret = realloc(ptr, size);
+	if (!ret && !size)
+		ret = realloc(ptr, 1);
+	if (!ret) {
+		release_pack_memory(size, -1);
+		ret = realloc(ptr, size);
+		if (!ret && !size)
+			ret = realloc(ptr, 1);
+		if (!ret)
+			die("Out of memory, realloc failed");
+	}
+	return ret;
+}
+
+void *xcalloc(size_t nmemb, size_t size)
+{
+	void *ret = calloc(nmemb, size);
+	if (!ret && (!nmemb || !size))
+		ret = calloc(1, 1);
+	if (!ret) {
+		release_pack_memory(nmemb * size, -1);
+		ret = calloc(nmemb, size);
+		if (!ret && (!nmemb || !size))
+			ret = calloc(1, 1);
+		if (!ret)
+			die("Out of memory, calloc failed");
+	}
+	return ret;
+}
+
+void *xmmap(void *start, size_t length,
+	int prot, int flags, int fd, off_t offset)
+{
+	void *ret = mmap(start, length, prot, flags, fd, offset);
+	if (ret == MAP_FAILED) {
+		if (!length)
+			return NULL;
+		release_pack_memory(length, fd);
+		ret = mmap(start, length, prot, flags, fd, offset);
+		if (ret == MAP_FAILED)
+			die("Out of memory? mmap failed: %s", strerror(errno));
+	}
+	return ret;
+}
+
+/*
+ * xread() is the same a read(), but it automatically restarts read()
+ * operations with a recoverable error (EAGAIN and EINTR). xread()
+ * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
+ */
+ssize_t xread(int fd, void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = read(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+/*
+ * xwrite() is the same a write(), but it automatically restarts write()
+ * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
+ * GUARANTEE that "len" bytes is written even if the operation is successful.
+ */
+ssize_t xwrite(int fd, const void *buf, size_t len)
+{
+	ssize_t nr;
+	while (1) {
+		nr = write(fd, buf, len);
+		if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
+			continue;
+		return nr;
+	}
+}
+
+ssize_t read_in_full(int fd, void *buf, size_t count)
+{
+	char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t loaded = xread(fd, p, count);
+		if (loaded <= 0)
+			return total ? total : loaded;
+		count -= loaded;
+		p += loaded;
+		total += loaded;
+	}
+
+	return total;
+}
+
+ssize_t write_in_full(int fd, const void *buf, size_t count)
+{
+	const char *p = buf;
+	ssize_t total = 0;
+
+	while (count > 0) {
+		ssize_t written = xwrite(fd, p, count);
+		if (written < 0)
+			return -1;
+		if (!written) {
+			errno = ENOSPC;
+			return -1;
+		}
+		count -= written;
+		p += written;
+		total += written;
+	}
+
+	return total;
+}
+
+int xdup(int fd)
+{
+	int ret = dup(fd);
+	if (ret < 0)
+		die("dup failed: %s", strerror(errno));
+	return ret;
+}
+
+FILE *xfdopen(int fd, const char *mode)
+{
+	FILE *stream = fdopen(fd, mode);
+	if (stream == NULL)
+		die("Out of memory? fdopen failed: %s", strerror(errno));
+	return stream;
+}
+
+int xmkstemp(char *template)
+{
+	int fd;
+
+	fd = mkstemp(template);
+	if (fd < 0)
+		die("Unable to create temporary file: %s", strerror(errno));
+	return fd;
+}
-- 
cgit v0.10.2


From 7d37a0cbd68c875fa984fa97bcf5c7f4b7950b6d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 6 Jun 2009 20:36:38 +0200
Subject: perf_counter tools: Warning fixes on 32-bit

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 116a397..4a3c279 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -668,9 +668,9 @@ static void hist_hit(struct hist_entry *he, uint64_t ip)
 
 	if (verbose >= 3)
 		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
-			(void *)he->sym->start,
+			(void *)(unsigned long)he->sym->start,
 			he->sym->name,
-			(void *)ip, ip - he->sym->start,
+			(void *)(unsigned long)ip, ip - he->sym->start,
 			sym->hist[offset]);
 }
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 23f4f7b..253821d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -21,7 +21,7 @@ static struct symbol *symbol__new(uint64_t start, uint64_t len,
 
 	if (verbose >= 2)
 		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
-			(__u64)start, len, name, self->hist, (void *)obj_start);
+			(__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start);
 
 	self->obj_start= obj_start;
 	self->hist = NULL;
-- 
cgit v0.10.2


From 8953645fec933f992223286ad407dc371ac2caa5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 21:04:17 +0200
Subject: perf_counter tools: Fix error condition in parse_aliases()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc warned about this bug:

util/parse-events.c: In function ‘parse_generic_hw_symbols’:
util/parse-events.c:175: warning: comparison is always false due to limited range of data type
util/parse-events.c:182: warning: comparison is always false due to limited range of data type
util/parse-events.c:190: warning: comparison is always false due to limited range of data type

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e0820b4..f18a9a0 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -160,12 +160,12 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
 		}
 	}
 
-	return 0;
+	return -1;
 }
 
 static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
 {
-	__u8 cache_type = -1, cache_op = 0, cache_result = 0;
+	int cache_type = -1, cache_op = 0, cache_result = 0;
 
 	cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
 	/*
@@ -179,8 +179,8 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a
 	/*
 	 * Fall back to reads:
 	 */
-	if (cache_type == -1)
-		cache_type = PERF_COUNT_HW_CACHE_OP_READ;
+	if (cache_op == -1)
+		cache_op = PERF_COUNT_HW_CACHE_OP_READ;
 
 	cache_result = parse_aliases(str, hw_cache_result,
 					PERF_COUNT_HW_CACHE_RESULT_MAX);
-- 
cgit v0.10.2


From 39273ee9756917129de3190d469b0b120f87e763 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 21:17:03 +0200
Subject: perf annotate: Automatically pick up vmlinux in the local directory

Right now kernel debug info does not get resolved by default, because
we dont know where to look for the vmlinux.

The -k option can be used for that - but if no option is given, pick
up vmlinux files in the current directory - in case a kernel hacker
runs profiling from the source directory that the kernel was built in.

The real solution would be to embedd the location (and perhaps the
date/timestamp) of the vmlinux file in /proc/kallsyms, so that
tools can pick it up automatically.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 4a3c279..80c5aa0 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -26,7 +26,7 @@
 #define SHOW_HV		4
 
 static char		const *input_name = "perf.data";
-static char		*vmlinux = NULL;
+static char		*vmlinux = "vmlinux";
 
 static char		default_sort_order[] = "comm,symbol";
 static char		*sort_order = default_sort_order;
-- 
cgit v0.10.2


From e9fbc9dc9214d6a9de7d62627be5414804fd7b9f Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 6 Jun 2009 21:22:33 +0200
Subject: perf_counter tools: Initialize a stack variable before use

the "perf report" utility crashed in some circumstances
because the "sym" stack variable was not initialized before used
(as also proven by valgrind).

With this fix both the crash goes away and valgrind no longer complains.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 253821d..158588c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -457,6 +457,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
+	memset(&sym, 0, sizeof(sym));
+
 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
 		struct symbol *f;
 		uint64_t obj_start;
-- 
cgit v0.10.2


From 23b87116c7c4f73597965218b66041acbdb4e79f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 21:25:29 +0200
Subject: perf annotate: Fix command line help text

Arjan noticed this bug in the perf annotate help output:

    -s, --symbol <file>   symbol to annotate

that should be <symbol> instead.

Reported-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 80c5aa0..0e23fe9 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -1300,7 +1300,7 @@ static const char * const annotate_usage[] = {
 static const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
-	OPT_STRING('s', "symbol", &sym_hist_filter, "file",
+	OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
 		    "symbol to annotate"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
-- 
cgit v0.10.2


From 4ae1507f6d266d0cc3dd36e474d83aad70fec9e4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sun, 24 May 2009 18:45:15 -0400
Subject: cifs: make overriding of ownership conditional on new mount options

We have a bit of a problem with the uid= option. The basic issue is that
it means too many things and has too many side-effects.

It's possible to allow an unprivileged user to mount a filesystem if the
user owns the mountpoint, /bin/mount is setuid root, and the mount is
set up in /etc/fstab with the "user" option.

When doing this though, /bin/mount automatically adds the "uid=" and
"gid=" options to the share. This is fortunate since the correct uid=
option is needed in order to tell the upcall what user's credcache to
use when generating the SPNEGO blob.

On a mount without unix extensions this is fine -- you generally will
want the files to be owned by the "owner" of the mount. The problem
comes in on a mount with unix extensions. With those enabled, the
uid/gid options cause the ownership of files to be overriden even though
the server is sending along the ownership info.

This means that it's not possible to have a mount by an unprivileged
user that shows the server's file ownership info. The result is also
inode permissions that have no reflection at all on the server. You
simply cannot separate ownership from the mode in this fashion.

This behavior also makes MultiuserMount option less usable. Once you
pass in the uid= option for a mount, then you can't use unix ownership
info and allow someone to share the mount.

While I'm not thrilled with it, the only solution I can see is to stop
making uid=/gid= force the overriding of ownership on mounts, and to add
new mount options that turn this behavior on.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 74b5a87..10151f8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1096,17 +1096,17 @@ cifs_parse_mount_options(char *options, const char *devname,
 				return 1;
 			}
 		} else if (strnicmp(data, "uid", 3) == 0) {
-			if (value && *value) {
+			if (value && *value)
 				vol->linux_uid =
 					simple_strtoul(value, &value, 0);
+		} else if (strnicmp(data, "forceuid", 8) == 0) {
 				vol->override_uid = 1;
-			}
 		} else if (strnicmp(data, "gid", 3) == 0) {
-			if (value && *value) {
+			if (value && *value)
 				vol->linux_gid =
 					simple_strtoul(value, &value, 0);
+		} else if (strnicmp(data, "forcegid", 8) == 0) {
 				vol->override_gid = 1;
-			}
 		} else if (strnicmp(data, "file_mode", 4) == 0) {
 			if (value && *value) {
 				vol->file_mode =
-- 
cgit v0.10.2


From f0472d0ec89bef2ea4432828c3daa1b26ef569aa Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 6 Jun 2009 21:09:39 +0000
Subject: [CIFS] Add mention of new mount parm (forceuid) to cifs readme

Also update fs/cifs/CHANGES

Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 227c681..b486898 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -2,7 +2,10 @@ Version 1.59
 ------------
 Client uses server inode numbers (which are persistent) rather than
 client generated ones by default (mount option "serverino" turned
-on by default if server supports it).
+on by default if server supports it).  Add forceuid and forcegid
+mount options (so that when negotiating unix extensions specifying
+which uid mounted does not immediately force the server's reported
+uids to be overridden).
 
 Version 1.58
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 6d1608f..ad92921 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -262,7 +262,8 @@ A partial list of the supported mount options follows:
 		mount.	
   domain	Set the SMB/CIFS workgroup name prepended to the
 		username during CIFS session establishment
-  uid		Set the default uid for inodes. For mounts to servers
+  forceuid	Set the default uid for inodes based on the uid
+		passed in. For mounts to servers
 		which do support the CIFS Unix extensions, such as a
 		properly configured Samba server, the server provides
 		the uid, gid and mode so this parameter should  not be
@@ -292,6 +293,12 @@ A partial list of the supported mount options follows:
 		the client.  Note that the mount.cifs helper must be
 		at version 1.10 or higher to support specifying the uid
 		(or gid) in non-numeric form.
+  forcegid	(similar to above but for the groupid instead of uid)
+  uid		Set the default uid for inodes, and indicate to the
+		cifs kernel driver which local user mounted . If the server
+		supports the unix extensions the default uid is
+		not used to fill in the owner fields of inodes (files)
+		unless the "forceuid" parameter is specified.
   gid		Set the default gid for inodes (similar to above).
   file_mode     If CIFS Unix extensions are not supported by the server
 		this overrides the default mode for file inodes.
-- 
cgit v0.10.2


From f07502dae230a2c3b65381fd1b06e8a18b2c7525 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Sat, 6 Jun 2009 21:18:09 +0100
Subject: integrity: fix IMA inode leak

CONFIG_IMA=y inode activity leaks iint_cache and radix_tree_node objects
until the system runs out of memory.  Nowhere is calling ima_inode_free()
a.k.a. ima_iint_delete().  Fix that by calling it from destroy_inode().

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/inode.c b/fs/inode.c
index a4876e5..bca0c61 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -219,6 +219,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 void destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
+	ima_inode_free(inode);
 	security_inode_free(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
-- 
cgit v0.10.2


From ad0b0822f98ef547e2461ce463e4233bad7848a8 Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Sun, 7 Jun 2009 13:37:27 +0800
Subject: ALSA: sgio2audio.c: clean up checking

vfree() does it's own 'NULL' check,so no need for check before
calling it.

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c
index 66f3b48..e497525 100644
--- a/sound/mips/sgio2audio.c
+++ b/sound/mips/sgio2audio.c
@@ -619,8 +619,7 @@ static int snd_sgio2audio_pcm_hw_params(struct snd_pcm_substream *substream,
 /* hw_free callback */
 static int snd_sgio2audio_pcm_hw_free(struct snd_pcm_substream *substream)
 {
-	if (substream->runtime->dma_area)
-		vfree(substream->runtime->dma_area);
+	vfree(substream->runtime->dma_area);
 	substream->runtime->dma_area = NULL;
 	return 0;
 }
-- 
cgit v0.10.2


From 2f01190aa62fe9dd0a98205927b9f09fd191c017 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 6 Jun 2009 23:10:43 +0200
Subject: perf top: Wait for a minimal set of events before reading first
 snapshot

The first snapshot reading often occur before any events have
been read in the mapped perfcounter files.

Just wait until we have at least one event before starting the
snapshot, or the delay before the first set of entries to be
displayed may be long in case of low refresh rate.

Note: we could also use a semaphore to wait before
"print_entries" number of eveents is reached, but again this
value is tunable and we can't ensure we will even reach it.
Also we could base on a default mimimum set of entries for the
first refresh, say 15, but again, the minimal sample is
tunable, and we could end up displaying nothing until we have a
minimal default set of events, which can take some time in case
of high samples filters.

Hence this simple solution which partially covers the default
case.

[ Impact: fix display artifacts in perf top ]

Signed-off-by: Frederic Weisbecker <fweisbeec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1244322643-6447-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f2e7312..fdc1d58 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -420,7 +420,7 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 
 struct timeval last_read, this_read;
 
-static void mmap_read(struct mmap_data *md)
+static void mmap_read_counter(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
 	unsigned int old = md->prev;
@@ -517,6 +517,16 @@ static void mmap_read(struct mmap_data *md)
 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
+static void mmap_read(void)
+{
+	int i, counter;
+
+	for (i = 0; i < nr_cpus; i++) {
+		for (counter = 0; counter < nr_counters; counter++)
+			mmap_read_counter(&mmap_array[i][counter]);
+	}
+}
+
 static int __cmd_top(void)
 {
 	struct perf_counter_attr *attr;
@@ -571,6 +581,11 @@ static int __cmd_top(void)
 		}
 	}
 
+	/* Wait for a minimal set of events before starting the snapshot */
+	poll(event_array, nr_poll, 100);
+
+	mmap_read();
+
 	if (pthread_create(&thread, NULL, display_thread, NULL)) {
 		printf("Could not create display thread.\n");
 		exit(-1);
@@ -589,10 +604,7 @@ static int __cmd_top(void)
 	while (1) {
 		int hits = samples;
 
-		for (i = 0; i < nr_cpus; i++) {
-			for (counter = 0; counter < nr_counters; counter++)
-				mmap_read(&mmap_array[i][counter]);
-		}
+		mmap_read();
 
 		if (hits == samples)
 			ret = poll(event_array, nr_poll, 100);
-- 
cgit v0.10.2


From 7caf6a49bb17d0377210693af5737563b31aa5ee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 5 Jun 2009 12:01:35 +0200
Subject: dma-debug: change hash_bucket_find from first-fit to best-fit

Some device drivers map the same physical address multiple times to a
dma address. Without an IOMMU this results in the same dma address being
put into the dma-debug hash multiple times. With a first-fit match in
hash_bucket_find() this function may return the wrong dma_debug_entry.

This can result in false positive warnings. This patch fixes it by
changing the first-fit behavior of hash_bucket_find() into a best-fit
algorithm.

Reported-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Reported-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: lethal@linux-sh.org
Cc: just.for.lkml@googlemail.com
Cc: hancockrwd@gmail.com
Cc: jens.axboe@oracle.com
Cc: bharrosh@panasas.com
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
LKML-Reference: <20090605104132.GE24836@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index cdd205d..8fcc09c 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -186,15 +186,50 @@ static void put_hash_bucket(struct hash_bucket *bucket,
 static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
 						struct dma_debug_entry *ref)
 {
-	struct dma_debug_entry *entry;
+	struct dma_debug_entry *entry, *ret = NULL;
+	int matches = 0, match_lvl, last_lvl = 0;
 
 	list_for_each_entry(entry, &bucket->list, list) {
-		if ((entry->dev_addr == ref->dev_addr) &&
-		    (entry->dev == ref->dev))
+		if ((entry->dev_addr != ref->dev_addr) ||
+		    (entry->dev != ref->dev))
+			continue;
+
+		/*
+		 * Some drivers map the same physical address multiple
+		 * times. Without a hardware IOMMU this results in the
+		 * same device addresses being put into the dma-debug
+		 * hash multiple times too. This can result in false
+		 * positives being reported. Therfore we implement a
+		 * best-fit algorithm here which returns the entry from
+		 * the hash which fits best to the reference value
+		 * instead of the first-fit.
+		 */
+		matches += 1;
+		match_lvl = 0;
+		entry->size      == ref->size      ? ++match_lvl : match_lvl;
+		entry->type      == ref->type      ? ++match_lvl : match_lvl;
+		entry->direction == ref->direction ? ++match_lvl : match_lvl;
+
+		if (match_lvl == 3) {
+			/* perfect-fit - return the result */
 			return entry;
+		} else if (match_lvl > last_lvl) {
+			/*
+			 * We found an entry that fits better then the
+			 * previous one
+			 */
+			last_lvl = match_lvl;
+			ret      = entry;
+		}
 	}
 
-	return NULL;
+	/*
+	 * If we have multiple matches but no perfect-fit, just return
+	 * NULL.
+	 */
+	ret = (matches == 1) ? ret : NULL;
+
+	return ret;
 }
 
 /*
-- 
cgit v0.10.2


From ab1863fc9bc18c806338564124b1e5e7e3ef53d1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 7 Jun 2009 12:09:17 +0200
Subject: ALSA: pcm - Fix update of runtime->hw_ptr_interrupt

The commit 13f040f9e55d41e92e485389123654971e03b819 made another
regression, the missing update of runtime->hw_ptr_interrupt.
Since this field is only checked in snd_pcmupdate__hw_ptr_interrupt(),
not in snd_pcm_update_hw_ptr(), it must be updated before the hw_ptr
change check.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index bf34603..adb306f 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -299,6 +299,8 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 		hw_ptr_interrupt =
 			new_hw_ptr - new_hw_ptr % runtime->period_size;
 	}
+	runtime->hw_ptr_interrupt = hw_ptr_interrupt;
+
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
 		snd_pcm_playback_silence(substream, new_hw_ptr);
@@ -309,7 +311,6 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
 	runtime->hw_ptr_jiffies = jiffies;
-	runtime->hw_ptr_interrupt = hw_ptr_interrupt;
 	if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE)
 		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 
-- 
cgit v0.10.2


From 5095f59bda6793a7b8f0856096d6893fe98e0e51 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Fri, 5 Jun 2009 23:27:17 +0530
Subject: x86: cpu_debug: Remove model information to reduce encoding-decoding

Remove model information, encoding/decoding and reduce bookkeeping.

This, besides removing a lot of code and cleaning up the code, also
enables these features on many more CPUs that were enumerated before.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
LKML-Reference: <1244224637.8212.6.camel@ht.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
index 2228020..d96c1ee 100644
--- a/arch/x86/include/asm/cpu_debug.h
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -86,105 +86,7 @@ enum cpu_file_bit {
 	CPU_VALUE_BIT,				/* value		*/
 };
 
-#define	CPU_FILE_VALUE			(1 << CPU_VALUE_BIT)
-
-/*
- * DisplayFamily_DisplayModel	Processor Families/Processor Number Series
- * --------------------------	------------------------------------------
- * 05_01, 05_02, 05_04		Pentium, Pentium with MMX
- *
- * 06_01			Pentium Pro
- * 06_03, 06_05			Pentium II Xeon, Pentium II
- * 06_07, 06_08, 06_0A, 06_0B	Pentium III Xeon, Pentum III
- *
- * 06_09, 060D			Pentium M
- *
- * 06_0E			Core Duo, Core Solo
- *
- * 06_0F			Xeon 3000, 3200, 5100, 5300, 7300 series,
- *				Core 2 Quad, Core 2 Extreme, Core 2 Duo,
- *				Pentium dual-core
- * 06_17			Xeon 5200, 5400 series, Core 2 Quad Q9650
- *
- * 06_1C			Atom
- *
- * 0F_00, 0F_01, 0F_02		Xeon, Xeon MP, Pentium 4
- * 0F_03, 0F_04			Xeon, Xeon MP, Pentium 4, Pentium D
- *
- * 0F_06			Xeon 7100, 5000 Series, Xeon MP,
- *				Pentium 4, Pentium D
- */
-
-/* Register processors bits */
-enum cpu_processor_bit {
-	CPU_NONE,
-/* Intel */
-	CPU_INTEL_PENTIUM_BIT,
-	CPU_INTEL_P6_BIT,
-	CPU_INTEL_PENTIUM_M_BIT,
-	CPU_INTEL_CORE_BIT,
-	CPU_INTEL_CORE2_BIT,
-	CPU_INTEL_ATOM_BIT,
-	CPU_INTEL_XEON_P4_BIT,
-	CPU_INTEL_XEON_MP_BIT,
-/* AMD */
-	CPU_AMD_K6_BIT,
-	CPU_AMD_K7_BIT,
-	CPU_AMD_K8_BIT,
-	CPU_AMD_0F_BIT,
-	CPU_AMD_10_BIT,
-	CPU_AMD_11_BIT,
-};
-
-#define	CPU_INTEL_PENTIUM	(1 << CPU_INTEL_PENTIUM_BIT)
-#define	CPU_INTEL_P6		(1 << CPU_INTEL_P6_BIT)
-#define	CPU_INTEL_PENTIUM_M	(1 << CPU_INTEL_PENTIUM_M_BIT)
-#define	CPU_INTEL_CORE		(1 << CPU_INTEL_CORE_BIT)
-#define	CPU_INTEL_CORE2		(1 << CPU_INTEL_CORE2_BIT)
-#define	CPU_INTEL_ATOM		(1 << CPU_INTEL_ATOM_BIT)
-#define	CPU_INTEL_XEON_P4	(1 << CPU_INTEL_XEON_P4_BIT)
-#define	CPU_INTEL_XEON_MP	(1 << CPU_INTEL_XEON_MP_BIT)
-
-#define	CPU_INTEL_PX		(CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
-#define	CPU_INTEL_COREX		(CPU_INTEL_CORE | CPU_INTEL_CORE2)
-#define	CPU_INTEL_XEON		(CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
-#define	CPU_CO_AT		(CPU_INTEL_CORE | CPU_INTEL_ATOM)
-#define	CPU_C2_AT		(CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
-#define	CPU_CX_AT		(CPU_INTEL_COREX | CPU_INTEL_ATOM)
-#define	CPU_CX_XE		(CPU_INTEL_COREX | CPU_INTEL_XEON)
-#define	CPU_P6_XE		(CPU_INTEL_P6 | CPU_INTEL_XEON)
-#define	CPU_PM_CO_AT		(CPU_INTEL_PENTIUM_M | CPU_CO_AT)
-#define	CPU_C2_AT_XE		(CPU_C2_AT | CPU_INTEL_XEON)
-#define	CPU_CX_AT_XE		(CPU_CX_AT | CPU_INTEL_XEON)
-#define	CPU_P6_CX_AT		(CPU_INTEL_P6 | CPU_CX_AT)
-#define	CPU_P6_CX_XE		(CPU_P6_XE | CPU_INTEL_COREX)
-#define	CPU_P6_CX_AT_XE		(CPU_INTEL_P6 | CPU_CX_AT_XE)
-#define	CPU_PM_CX_AT_XE		(CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
-#define	CPU_PM_CX_AT		(CPU_INTEL_PENTIUM_M | CPU_CX_AT)
-#define	CPU_PM_CX_XE		(CPU_INTEL_PENTIUM_M | CPU_CX_XE)
-#define	CPU_PX_CX_AT		(CPU_INTEL_PX | CPU_CX_AT)
-#define	CPU_PX_CX_AT_XE		(CPU_INTEL_PX | CPU_CX_AT_XE)
-
-/* Select all supported Intel CPUs */
-#define	CPU_INTEL_ALL		(CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
-
-#define	CPU_AMD_K6		(1 << CPU_AMD_K6_BIT)
-#define	CPU_AMD_K7		(1 << CPU_AMD_K7_BIT)
-#define	CPU_AMD_K8		(1 << CPU_AMD_K8_BIT)
-#define	CPU_AMD_0F		(1 << CPU_AMD_0F_BIT)
-#define	CPU_AMD_10		(1 << CPU_AMD_10_BIT)
-#define	CPU_AMD_11		(1 << CPU_AMD_11_BIT)
-
-#define	CPU_K10_PLUS		(CPU_AMD_10 | CPU_AMD_11)
-#define	CPU_K0F_PLUS		(CPU_AMD_0F | CPU_K10_PLUS)
-#define	CPU_K8_PLUS		(CPU_AMD_K8 | CPU_K0F_PLUS)
-#define	CPU_K7_PLUS		(CPU_AMD_K7 | CPU_K8_PLUS)
-
-/* Select all supported AMD CPUs */
-#define	CPU_AMD_ALL		(CPU_AMD_K6 | CPU_K7_PLUS)
-
-/* Select all supported CPUs */
-#define	CPU_ALL			(CPU_INTEL_ALL | CPU_AMD_ALL)
+#define	CPU_FILE_VALUE		(1 << CPU_VALUE_BIT)
 
 #define MAX_CPU_FILES		512
 
@@ -220,7 +122,6 @@ struct cpu_debug_range {
 	unsigned		min;		/* Register range min	*/
 	unsigned		max;		/* Register range max	*/
 	unsigned		flag;		/* Supported flags	*/
-	unsigned		model;		/* Supported models	*/
 };
 
 #endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab..86afe13 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -32,9 +32,7 @@
 
 static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
 static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
-static DEFINE_PER_CPU(unsigned, cpu_modelflag);
 static DEFINE_PER_CPU(int, cpu_priv_count);
-static DEFINE_PER_CPU(unsigned, cpu_model);
 
 static DEFINE_MUTEX(cpu_debug_lock);
 
@@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = {
 	{ "value",	CPU_REG_ALL,	1	},
 };
 
-/* Intel Registers Range */
-static struct cpu_debug_range cpu_intel_range[] = {
-	{ 0x00000000, 0x00000001, CPU_MC,	CPU_INTEL_ALL		},
-	{ 0x00000006, 0x00000007, CPU_MONITOR,	CPU_CX_AT_XE		},
-	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_INTEL_ALL		},
-	{ 0x00000011, 0x00000013, CPU_PMC,	CPU_INTEL_PENTIUM	},
-	{ 0x00000017, 0x00000017, CPU_PLATFORM,	CPU_PX_CX_AT_XE		},
-	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_P6_CX_AT_XE		},
-
-	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_PX_CX_AT_XE		},
-	{ 0x0000002B, 0x0000002B, CPU_POWERON,	CPU_INTEL_XEON		},
-	{ 0x0000002C, 0x0000002C, CPU_FREQ,	CPU_INTEL_XEON		},
-	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	CPU_CX_AT_XE		},
-
-	{ 0x00000040, 0x00000043, CPU_LBRANCH,	CPU_PM_CX_AT_XE		},
-	{ 0x00000044, 0x00000047, CPU_LBRANCH,	CPU_PM_CO_AT		},
-	{ 0x00000060, 0x00000063, CPU_LBRANCH,	CPU_C2_AT		},
-	{ 0x00000064, 0x00000067, CPU_LBRANCH,	CPU_INTEL_ATOM		},
-
-	{ 0x00000079, 0x00000079, CPU_BIOS,	CPU_P6_CX_AT_XE		},
-	{ 0x00000088, 0x0000008A, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x0000008B, 0x0000008B, CPU_BIOS,	CPU_P6_CX_AT_XE		},
-	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	CPU_INTEL_XEON		},
-
-	{ 0x000000C1, 0x000000C2, CPU_PMC,	CPU_P6_CX_AT		},
-	{ 0x000000CD, 0x000000CD, CPU_FREQ,	CPU_CX_AT		},
-	{ 0x000000E7, 0x000000E8, CPU_PERF,	CPU_CX_AT		},
-	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_P6_CX_XE		},
-
-	{ 0x00000116, 0x00000116, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x00000118, 0x00000118, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x00000119, 0x00000119, CPU_CACHE,	CPU_INTEL_PX		},
-	{ 0x0000011A, 0x0000011B, CPU_CACHE,	CPU_INTEL_P6		},
-	{ 0x0000011E, 0x0000011E, CPU_CACHE,	CPU_PX_CX_AT		},
-
-	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_P6_CX_AT_XE		},
-	{ 0x00000179, 0x0000017A, CPU_MC,	CPU_PX_CX_AT_XE		},
-	{ 0x0000017B, 0x0000017B, CPU_MC,	CPU_P6_XE		},
-	{ 0x00000186, 0x00000187, CPU_PMC,	CPU_P6_CX_AT		},
-	{ 0x00000198, 0x00000199, CPU_PERF,	CPU_PM_CX_AT_XE		},
-	{ 0x0000019A, 0x0000019A, CPU_TIME,	CPU_PM_CX_AT_XE		},
-	{ 0x0000019B, 0x0000019D, CPU_THERM,	CPU_PM_CX_AT_XE		},
-	{ 0x000001A0, 0x000001A0, CPU_MISC,	CPU_PM_CX_AT_XE		},
-
-	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	CPU_PM_CX_AT		},
-	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	CPU_INTEL_XEON		},
-	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_CX_AT_XE		},
-	{ 0x000001DA, 0x000001DA, CPU_LBRANCH,	CPU_INTEL_XEON		},
-	{ 0x000001DB, 0x000001DB, CPU_LBRANCH,	CPU_P6_XE		},
-	{ 0x000001DC, 0x000001DC, CPU_LBRANCH,	CPU_INTEL_P6		},
-	{ 0x000001DD, 0x000001DE, CPU_LBRANCH,	CPU_PX_CX_AT_XE		},
-	{ 0x000001E0, 0x000001E0, CPU_LBRANCH,	CPU_INTEL_P6		},
-
-	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_P6_CX_XE		},
-	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_C2_AT_XE		},
-	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_P6_CX_XE		},
-
-	{ 0x00000300, 0x00000308, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x00000309, 0x0000030B, CPU_PMC,	CPU_C2_AT_XE		},
-	{ 0x0000030C, 0x00000311, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x00000345, 0x00000345, CPU_PMC,	CPU_C2_AT		},
-	{ 0x00000360, 0x00000371, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x0000038D, 0x00000390, CPU_PMC,	CPU_C2_AT		},
-	{ 0x000003A0, 0x000003BE, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003C0, 0x000003CD, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003E0, 0x000003E1, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003F0, 0x000003F0, CPU_PMC,	CPU_INTEL_XEON		},
-	{ 0x000003F1, 0x000003F1, CPU_PMC,	CPU_C2_AT_XE		},
-	{ 0x000003F2, 0x000003F2, CPU_PMC,	CPU_INTEL_XEON		},
-
-	{ 0x00000400, 0x00000402, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x00000403, 0x00000403, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x00000404, 0x00000406, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x00000407, 0x00000407, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x00000408, 0x0000040A, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x0000040B, 0x0000040B, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x0000040C, 0x0000040E, CPU_MC,	CPU_PM_CX_XE		},
-	{ 0x0000040F, 0x0000040F, CPU_MC,	CPU_INTEL_XEON		},
-	{ 0x00000410, 0x00000412, CPU_MC,	CPU_PM_CX_AT_XE		},
-	{ 0x00000413, 0x00000417, CPU_MC,	CPU_CX_AT_XE		},
-	{ 0x00000480, 0x0000048B, CPU_VMX,	CPU_CX_AT_XE		},
-
-	{ 0x00000600, 0x00000600, CPU_DEBUG,	CPU_PM_CX_AT_XE		},
-	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	CPU_INTEL_XEON		},
-	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	CPU_INTEL_XEON		},
-
-	{ 0x000107CC, 0x000107D3, CPU_PMC,	CPU_INTEL_XEON_MP	},
-
-	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_INTEL_XEON		},
-	{ 0xC0000081, 0xC0000082, CPU_CALL,	CPU_INTEL_XEON		},
-	{ 0xC0000084, 0xC0000084, CPU_CALL,	CPU_INTEL_XEON		},
-	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_INTEL_XEON		},
+/* CPU Registers Range */
+static struct cpu_debug_range cpu_reg_range[] = {
+	{ 0x00000000, 0x00000001, CPU_MC,	},
+	{ 0x00000006, 0x00000007, CPU_MONITOR,	},
+	{ 0x00000010, 0x00000010, CPU_TIME,	},
+	{ 0x00000011, 0x00000013, CPU_PMC,	},
+	{ 0x00000017, 0x00000017, CPU_PLATFORM,	},
+	{ 0x0000001B, 0x0000001B, CPU_APIC,	},
+	{ 0x0000002A, 0x0000002B, CPU_POWERON,	},
+	{ 0x0000002C, 0x0000002C, CPU_FREQ,	},
+	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	},
+	{ 0x00000040, 0x00000047, CPU_LBRANCH,	},
+	{ 0x00000060, 0x00000067, CPU_LBRANCH,	},
+	{ 0x00000079, 0x00000079, CPU_BIOS,	},
+	{ 0x00000088, 0x0000008A, CPU_CACHE,	},
+	{ 0x0000008B, 0x0000008B, CPU_BIOS,	},
+	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	},
+	{ 0x000000C1, 0x000000C4, CPU_PMC,	},
+	{ 0x000000CD, 0x000000CD, CPU_FREQ,	},
+	{ 0x000000E7, 0x000000E8, CPU_PERF,	},
+	{ 0x000000FE, 0x000000FE, CPU_MTRR,	},
+
+	{ 0x00000116, 0x0000011E, CPU_CACHE,	},
+	{ 0x00000174, 0x00000176, CPU_SYSENTER,	},
+	{ 0x00000179, 0x0000017B, CPU_MC,	},
+	{ 0x00000186, 0x00000189, CPU_PMC,	},
+	{ 0x00000198, 0x00000199, CPU_PERF,	},
+	{ 0x0000019A, 0x0000019A, CPU_TIME,	},
+	{ 0x0000019B, 0x0000019D, CPU_THERM,	},
+	{ 0x000001A0, 0x000001A0, CPU_MISC,	},
+	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	},
+	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	},
+	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	},
+	{ 0x000001DA, 0x000001E0, CPU_LBRANCH,	},
+
+	{ 0x00000200, 0x0000020F, CPU_MTRR,	},
+	{ 0x00000250, 0x00000250, CPU_MTRR,	},
+	{ 0x00000258, 0x00000259, CPU_MTRR,	},
+	{ 0x00000268, 0x0000026F, CPU_MTRR,	},
+	{ 0x00000277, 0x00000277, CPU_PAT,	},
+	{ 0x000002FF, 0x000002FF, CPU_MTRR,	},
+
+	{ 0x00000300, 0x00000311, CPU_PMC,	},
+	{ 0x00000345, 0x00000345, CPU_PMC,	},
+	{ 0x00000360, 0x00000371, CPU_PMC,	},
+	{ 0x0000038D, 0x00000390, CPU_PMC,	},
+	{ 0x000003A0, 0x000003BE, CPU_PMC,	},
+	{ 0x000003C0, 0x000003CD, CPU_PMC,	},
+	{ 0x000003E0, 0x000003E1, CPU_PMC,	},
+	{ 0x000003F0, 0x000003F2, CPU_PMC,	},
+
+	{ 0x00000400, 0x00000417, CPU_MC,	},
+	{ 0x00000480, 0x0000048B, CPU_VMX,	},
+
+	{ 0x00000600, 0x00000600, CPU_DEBUG,	},
+	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	},
+	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	},
+
+	{ 0x000107CC, 0x000107D3, CPU_PMC,	},
+
+	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	},
+	{ 0xC0000081, 0xC0000084, CPU_CALL,	},
+	{ 0xC0000100, 0xC0000102, CPU_BASE,	},
+	{ 0xC0000103, 0xC0000103, CPU_TIME,	},
+
+	{ 0xC0010000, 0xC0010007, CPU_PMC,	},
+	{ 0xC0010010, 0xC0010010, CPU_CONF,	},
+	{ 0xC0010015, 0xC0010015, CPU_CONF,	},
+	{ 0xC0010016, 0xC001001A, CPU_MTRR,	},
+	{ 0xC001001D, 0xC001001D, CPU_MTRR,	},
+	{ 0xC001001F, 0xC001001F, CPU_CONF,	},
+	{ 0xC0010030, 0xC0010035, CPU_BIOS,	},
+	{ 0xC0010044, 0xC0010048, CPU_MC,	},
+	{ 0xC0010050, 0xC0010056, CPU_SMM,	},
+	{ 0xC0010058, 0xC0010058, CPU_CONF,	},
+	{ 0xC0010060, 0xC0010060, CPU_CACHE,	},
+	{ 0xC0010061, 0xC0010068, CPU_SMM,	},
+	{ 0xC0010069, 0xC001006B, CPU_SMM,	},
+	{ 0xC0010070, 0xC0010071, CPU_SMM,	},
+	{ 0xC0010111, 0xC0010113, CPU_SMM,	},
+	{ 0xC0010114, 0xC0010118, CPU_SVM,	},
+	{ 0xC0010140, 0xC0010141, CPU_OSVM,	},
+	{ 0xC0011022, 0xC0011023, CPU_CONF,	},
 };
 
-/* AMD Registers Range */
-static struct cpu_debug_range cpu_amd_range[] = {
-	{ 0x00000000, 0x00000001, CPU_MC,	CPU_K10_PLUS,		},
-	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_K8_PLUS,		},
-	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_K8_PLUS,		},
-	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_K7_PLUS		},
-	{ 0x0000008B, 0x0000008B, CPU_VER,	CPU_K8_PLUS		},
-	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_K8_PLUS,		},
-
-	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_K8_PLUS,		},
-	{ 0x00000179, 0x0000017B, CPU_MC,	CPU_K8_PLUS,		},
-	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_K8_PLUS,		},
-	{ 0x000001DB, 0x000001DE, CPU_LBRANCH,	CPU_K8_PLUS,		},
-
-	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_K8_PLUS,		},
-	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_K8_PLUS,		},
-
-	{ 0x00000400, 0x00000413, CPU_MC,	CPU_K8_PLUS,		},
-
-	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_AMD_ALL,		},
-	{ 0xC0000081, 0xC0000084, CPU_CALL,	CPU_K8_PLUS,		},
-	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_K8_PLUS,		},
-	{ 0xC0000103, 0xC0000103, CPU_TIME,	CPU_K10_PLUS,		},
-
-	{ 0xC0010000, 0xC0010007, CPU_PMC,	CPU_K8_PLUS,		},
-	{ 0xC0010010, 0xC0010010, CPU_CONF,	CPU_K7_PLUS,		},
-	{ 0xC0010015, 0xC0010015, CPU_CONF,	CPU_K7_PLUS,		},
-	{ 0xC0010016, 0xC001001A, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0xC001001D, 0xC001001D, CPU_MTRR,	CPU_K8_PLUS,		},
-	{ 0xC001001F, 0xC001001F, CPU_CONF,	CPU_K8_PLUS,		},
-	{ 0xC0010030, 0xC0010035, CPU_BIOS,	CPU_K8_PLUS,		},
-	{ 0xC0010044, 0xC0010048, CPU_MC,	CPU_K8_PLUS,		},
-	{ 0xC0010050, 0xC0010056, CPU_SMM,	CPU_K0F_PLUS,		},
-	{ 0xC0010058, 0xC0010058, CPU_CONF,	CPU_K10_PLUS,		},
-	{ 0xC0010060, 0xC0010060, CPU_CACHE,	CPU_AMD_11,		},
-	{ 0xC0010061, 0xC0010068, CPU_SMM,	CPU_K10_PLUS,		},
-	{ 0xC0010069, 0xC001006B, CPU_SMM,	CPU_AMD_11,		},
-	{ 0xC0010070, 0xC0010071, CPU_SMM,	CPU_K10_PLUS,		},
-	{ 0xC0010111, 0xC0010113, CPU_SMM,	CPU_K8_PLUS,		},
-	{ 0xC0010114, 0xC0010118, CPU_SVM,	CPU_K10_PLUS,		},
-	{ 0xC0010140, 0xC0010141, CPU_OSVM,	CPU_K10_PLUS,		},
-	{ 0xC0011022, 0xC0011023, CPU_CONF,	CPU_K10_PLUS,		},
-};
-
-
-/* Intel */
-static int get_intel_modelflag(unsigned model)
-{
-	int flag;
-
-	switch (model) {
-	case 0x0501:
-	case 0x0502:
-	case 0x0504:
-		flag = CPU_INTEL_PENTIUM;
-		break;
-	case 0x0601:
-	case 0x0603:
-	case 0x0605:
-	case 0x0607:
-	case 0x0608:
-	case 0x060A:
-	case 0x060B:
-		flag = CPU_INTEL_P6;
-		break;
-	case 0x0609:
-	case 0x060D:
-		flag = CPU_INTEL_PENTIUM_M;
-		break;
-	case 0x060E:
-		flag = CPU_INTEL_CORE;
-		break;
-	case 0x060F:
-	case 0x0617:
-		flag = CPU_INTEL_CORE2;
-		break;
-	case 0x061C:
-		flag = CPU_INTEL_ATOM;
-		break;
-	case 0x0F00:
-	case 0x0F01:
-	case 0x0F02:
-	case 0x0F03:
-	case 0x0F04:
-		flag = CPU_INTEL_XEON_P4;
-		break;
-	case 0x0F06:
-		flag = CPU_INTEL_XEON_MP;
-		break;
-	default:
-		flag = CPU_NONE;
-		break;
-	}
-
-	return flag;
-}
-
-/* AMD */
-static int get_amd_modelflag(unsigned model)
-{
-	int flag;
-
-	switch (model >> 8) {
-	case 0x6:
-		flag = CPU_AMD_K6;
-		break;
-	case 0x7:
-		flag = CPU_AMD_K7;
-		break;
-	case 0x8:
-		flag = CPU_AMD_K8;
-		break;
-	case 0xf:
-		flag = CPU_AMD_0F;
-		break;
-	case 0x10:
-		flag = CPU_AMD_10;
-		break;
-	case 0x11:
-		flag = CPU_AMD_11;
-		break;
-	default:
-		flag = CPU_NONE;
-		break;
-	}
-
-	return flag;
-}
-
-static int get_cpu_modelflag(unsigned cpu)
-{
-	int flag;
-
-	flag = per_cpu(cpu_model, cpu);
-
-	switch (flag >> 16) {
-	case X86_VENDOR_INTEL:
-		flag = get_intel_modelflag(flag);
-		break;
-	case X86_VENDOR_AMD:
-		flag = get_amd_modelflag(flag & 0xffff);
-		break;
-	default:
-		flag = CPU_NONE;
-		break;
-	}
-
-	return flag;
-}
-
-static int get_cpu_range_count(unsigned cpu)
-{
-	int index;
-
-	switch (per_cpu(cpu_model, cpu) >> 16) {
-	case X86_VENDOR_INTEL:
-		index = ARRAY_SIZE(cpu_intel_range);
-		break;
-	case X86_VENDOR_AMD:
-		index = ARRAY_SIZE(cpu_amd_range);
-		break;
-	default:
-		index = 0;
-		break;
-	}
-
-	return index;
-}
-
 static int is_typeflag_valid(unsigned cpu, unsigned flag)
 {
-	unsigned vendor, modelflag;
-	int i, index;
+	int i;
 
 	/* Standard Registers should be always valid */
 	if (flag >= CPU_TSS)
 		return 1;
 
-	modelflag = per_cpu(cpu_modelflag, cpu);
-	vendor = per_cpu(cpu_model, cpu) >> 16;
-	index = get_cpu_range_count(cpu);
-
-	for (i = 0; i < index; i++) {
-		switch (vendor) {
-		case X86_VENDOR_INTEL:
-			if ((cpu_intel_range[i].model & modelflag) &&
-			    (cpu_intel_range[i].flag & flag))
-				return 1;
-			break;
-		case X86_VENDOR_AMD:
-			if ((cpu_amd_range[i].model & modelflag) &&
-			    (cpu_amd_range[i].flag & flag))
-				return 1;
-			break;
-		}
+	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
+		if (cpu_reg_range[i].flag == flag)
+			return 1;
 	}
 
 	/* Invalid */
@@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag)
 static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
 			      int index, unsigned flag)
 {
-	unsigned modelflag;
-
-	modelflag = per_cpu(cpu_modelflag, cpu);
-	*max = 0;
-	switch (per_cpu(cpu_model, cpu) >> 16) {
-	case X86_VENDOR_INTEL:
-		if ((cpu_intel_range[index].model & modelflag) &&
-		    (cpu_intel_range[index].flag & flag)) {
-			*min = cpu_intel_range[index].min;
-			*max = cpu_intel_range[index].max;
-		}
-		break;
-	case X86_VENDOR_AMD:
-		if ((cpu_amd_range[index].model & modelflag) &&
-		    (cpu_amd_range[index].flag & flag)) {
-			*min = cpu_amd_range[index].min;
-			*max = cpu_amd_range[index].max;
-		}
-		break;
-	}
+	if (cpu_reg_range[index].flag == flag) {
+		*min = cpu_reg_range[index].min;
+		*max = cpu_reg_range[index].max;
+	} else
+		*max = 0;
 
 	return *max;
 }
@@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
 	unsigned msr, msr_min, msr_max;
 	struct cpu_private *priv;
 	u32 low, high;
-	int i, range;
+	int i;
 
 	if (seq) {
 		priv = seq->private;
@@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
 		}
 	}
 
-	range = get_cpu_range_count(cpu);
-
-	for (i = 0; i < range; i++) {
+	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
 		if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
 			continue;
 
@@ -788,13 +569,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
 {
 	struct dentry *cpu_dentry = NULL;
 	unsigned reg, reg_min, reg_max;
-	int i, range, err = 0;
+	int i, err = 0;
 	char reg_dir[12];
 	u32 low, high;
 
-	range = get_cpu_range_count(cpu);
-
-	for (i = 0; i < range; i++) {
+	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
 		if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
 				   cpu_base[type].flag))
 			continue;
@@ -850,10 +629,6 @@ static int cpu_init_cpu(void)
 		cpui = &cpu_data(cpu);
 		if (!cpu_has(cpui, X86_FEATURE_MSR))
 			continue;
-		per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
-					   (cpui->x86 << 8) |
-					   (cpui->x86_model));
-		per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
 
 		sprintf(cpu_dir, "cpu%d", cpu);
 		cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
-- 
cgit v0.10.2


From 521a415c9f6d4e5463807ce6d36598acabcd204f Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Sun, 7 Jun 2009 13:52:50 +0200
Subject: pdc202xx_old: fix 'pdc20246_dma_ops'

Commit ac95beedf8bc97b24f9540d4da9952f07221c023 (ide: add struct ide_port_ops
(take 2)) erroneously converted the driver's dma_timeout() and dma_lost_irq()
methods to call the driver's resetproc() method regardless of whether it was
defined for this specific controller while it hadn't been defined and hence
called for PDC20246. So the dma_clear() method, the successor of dma_timeout(),
shouldn't exist and the dma_lost_irq() method should be standard for PDC20246.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 248a54b..8df2630 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -328,9 +328,8 @@ static const struct ide_dma_ops pdc20246_dma_ops = {
 	.dma_start		= ide_dma_start,
 	.dma_end		= ide_dma_end,
 	.dma_test_irq		= pdc202xx_dma_test_irq,
-	.dma_lost_irq		= pdc202xx_dma_lost_irq,
+	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_clear		= pdc202xx_reset,
 	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
-- 
cgit v0.10.2


From 669165daad2ec839df85b8c5f7bc155e76a2f404 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Sun, 7 Jun 2009 13:52:50 +0200
Subject: pdc202xx_old: fix resetproc() method

pdc202xx_reset() calls pdc202xx_reset_host() twice, for both channels, while
that function actually twiddles the single, shared software reset bit -- the
net effect is a duplicated reset and horrendous 4 second delay happening not
only on a channel reset but also when dma_lost_irq() and dma_clear() methods
are called.  Fold pdc202xx_reset_host() into pdc202xx_reset(), fix printk(),
and move it before the actual reset...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 8df2630..b3bc96f 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -1,6 +1,6 @@
 /*
  *  Copyright (C) 1998-2002		Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2006-2007		MontaVista Software, Inc.
+ *  Copyright (C) 2006-2007, 2009	MontaVista Software, Inc.
  *  Copyright (C) 2007			Bartlomiej Zolnierkiewicz
  *
  *  Portions Copyright (C) 1999 Promise Technology, Inc.
@@ -227,28 +227,19 @@ somebody_else:
 	return (dma_stat & 4) == 4;	/* return 1 if INTR asserted */
 }
 
-static void pdc202xx_reset_host (ide_hwif_t *hwif)
+static void pdc202xx_reset(ide_drive_t *drive)
 {
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long high_16	= hwif->extra_base - 16;
 	u8 udma_speed_flag	= inb(high_16 | 0x001f);
 
+	printk(KERN_WARNING "PDC202xx: software reset...\n");
+
 	outb(udma_speed_flag | 0x10, high_16 | 0x001f);
 	mdelay(100);
 	outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
 	mdelay(2000);	/* 2 seconds ?! */
 
-	printk(KERN_WARNING "PDC202XX: %s channel reset.\n",
-		hwif->channel ? "Secondary" : "Primary");
-}
-
-static void pdc202xx_reset (ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	ide_hwif_t *mate	= hwif->mate;
-
-	pdc202xx_reset_host(hwif);
-	pdc202xx_reset_host(mate);
-
 	ide_set_max_pio(drive);
 }
 
-- 
cgit v0.10.2


From 02c33b123e59cab5771e52a012aeb810500260a2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:51 +0200
Subject: partitions: warn about the partition exceeding device capacity

The current warning message says only about the kernel's action taken
without mentioning the underlying reason behind it.

Noticed-by: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 99e33ef..137a708 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -564,7 +564,8 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 			 * creating invalid block devices
 			 */
 			printk(KERN_WARNING
-			       "%s: p%d size %llu limited to end of disk\n",
+			       "%s: p%d size %llu exceeds device capacity, "
+			       "limited to end of disk\n",
 			       disk->disk_name, p, (unsigned long long) size);
 			size = get_capacity(disk) - from;
 		}
-- 
cgit v0.10.2


From db429e9ec0f9dee2d8e50c154f04f29f880fc9d6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:52 +0200
Subject: partitions: add ->set_capacity block device method

* Add ->set_capacity block device method and use it in rescan_partitions()
  to attempt enabling native capacity of the device upon detecting the
  partition which exceeds device capacity.

* Add GENHD_FL_NATIVE_CAPACITY flag to try limit attempts of enabling
  native capacity during partition scan.

Together with the consecutive patch implementing ->set_capacity method in
ide-gd device driver this allows automatic disabling of Host Protected Area
(HPA) if any partitions overlapping HPA are detected.

Cc: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 137a708..4bc2c43 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -546,28 +546,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 
 	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
-		sector_t size = state->parts[p].size;
-		sector_t from = state->parts[p].from;
+		sector_t size, from;
+try_scan:
+		size = state->parts[p].size;
 		if (!size)
 			continue;
+
+		from = state->parts[p].from;
 		if (from >= get_capacity(disk)) {
 			printk(KERN_WARNING
 			       "%s: p%d ignored, start %llu is behind the end of the disk\n",
 			       disk->disk_name, p, (unsigned long long) from);
 			continue;
 		}
+
 		if (from + size > get_capacity(disk)) {
-			/*
-			 * we can not ignore partitions of broken tables
-			 * created by for example camera firmware, but we
-			 * limit them to the end of the disk to avoid
-			 * creating invalid block devices
-			 */
+			struct block_device_operations *bdops = disk->fops;
+			unsigned long long capacity;
+
 			printk(KERN_WARNING
-			       "%s: p%d size %llu exceeds device capacity, "
-			       "limited to end of disk\n",
+			       "%s: p%d size %llu exceeds device capacity, ",
 			       disk->disk_name, p, (unsigned long long) size);
-			size = get_capacity(disk) - from;
+
+			if (bdops->set_capacity &&
+			    (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
+				printk(KERN_CONT "enabling native capacity\n");
+				capacity = bdops->set_capacity(disk, ~0ULL);
+				disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+				if (capacity > get_capacity(disk)) {
+					set_capacity(disk, capacity);
+					check_disk_size_change(disk, bdev);
+					bdev->bd_invalidated = 0;
+				}
+				goto try_scan;
+			} else {
+				/*
+				 * we can not ignore partitions of broken tables
+				 * created by for example camera firmware, but
+				 * we limit them to the end of the disk to avoid
+				 * creating invalid block devices
+				 */
+				printk(KERN_CONT "limited to end of disk\n");
+				size = get_capacity(disk) - from;
+			}
 		}
 		part = add_partition(disk, p, from, size,
 				     state->parts[p].flags);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6f841fb..a2d7298 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1106,6 +1106,8 @@ struct block_device_operations {
 	int (*direct_access) (struct block_device *, sector_t,
 						void **, unsigned long *);
 	int (*media_changed) (struct gendisk *);
+	unsigned long long (*set_capacity) (struct gendisk *,
+						unsigned long long);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	struct module *owner;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 634c530..239e24b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -113,6 +113,7 @@ struct hd_struct {
 #define GENHD_FL_UP				16
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
+#define GENHD_FL_NATIVE_CAPACITY		128
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-- 
cgit v0.10.2


From e957b60d1583022a0f7c03267d37fcae2ddb78b1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:52 +0200
Subject: ide-gd: implement block device ->set_capacity method (v2)

* Use ->probed_capacity to store native device capacity for ATA disks.

* Add ->set_capacity method to struct ide_disk_ops.

* Implement disk device ->set_capacity method for ATA disks.

* Implement block device ->set_capacity method.

v2:
* Check if LBA and HPA are supported in ide_disk_set_capacity().

* According to the spec the SET MAX ADDRESS command shall be
  immediately preceded by a READ NATIVE MAX ADDRESS command.

* Add ide_disk_hpa_{get_native,set}_capacity() helpers.

Together with the previous patch adding ->set_capacity block device
method this allows automatic disabling of Host Protected Area (HPA)
if any partitions overlapping HPA are detected.

Cc: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c..61a6d35 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -302,14 +302,12 @@ static const struct drive_list_entry hpa_list[] = {
 	{ NULL,		NULL }
 };
 
-static void idedisk_check_hpa(ide_drive_t *drive)
+static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
 {
-	unsigned long long capacity, set_max;
-	int lba48 = ata_id_lba48_enabled(drive->id);
+	u64 capacity, set_max;
 
 	capacity = drive->capacity64;
-
-	set_max = idedisk_read_native_max_address(drive, lba48);
+	set_max  = idedisk_read_native_max_address(drive, lba48);
 
 	if (ide_in_drive_list(drive->id, hpa_list)) {
 		/*
@@ -320,9 +318,31 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			set_max--;
 	}
 
+	return set_max;
+}
+
+static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
+{
+	set_max = idedisk_set_max_address(drive, set_max, lba48);
+	if (set_max)
+		drive->capacity64 = set_max;
+
+	return set_max;
+}
+
+static void idedisk_check_hpa(ide_drive_t *drive)
+{
+	u64 capacity, set_max;
+	int lba48 = ata_id_lba48_enabled(drive->id);
+
+	capacity = drive->capacity64;
+	set_max  = ide_disk_hpa_get_native_capacity(drive, lba48);
+
 	if (set_max <= capacity)
 		return;
 
+	drive->probed_capacity = set_max;
+
 	printk(KERN_INFO "%s: Host Protected Area detected.\n"
 			 "\tcurrent capacity is %llu sectors (%llu MB)\n"
 			 "\tnative  capacity is %llu sectors (%llu MB)\n",
@@ -330,13 +350,10 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			 capacity, sectors_to_MB(capacity),
 			 set_max, sectors_to_MB(set_max));
 
-	set_max = idedisk_set_max_address(drive, set_max, lba48);
-
-	if (set_max) {
-		drive->capacity64 = set_max;
+	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
+	if (set_max)
 		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
 				 drive->name);
-	}
 }
 
 static int ide_disk_get_capacity(ide_drive_t *drive)
@@ -358,6 +375,8 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 		drive->capacity64 = drive->cyl * drive->head * drive->sect;
 	}
 
+	drive->probed_capacity = drive->capacity64;
+
 	if (lba) {
 		drive->dev_flags |= IDE_DFLAG_LBA;
 
@@ -376,7 +395,7 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 		       "%llu sectors (%llu MB)\n",
 		       drive->name, (unsigned long long)drive->capacity64,
 		       sectors_to_MB(drive->capacity64));
-		drive->capacity64 = 1ULL << 28;
+		drive->probed_capacity = drive->capacity64 = 1ULL << 28;
 	}
 
 	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
@@ -392,6 +411,31 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 	return 0;
 }
 
+static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
+{
+	u64 set = min(capacity, drive->probed_capacity);
+	u16 *id = drive->id;
+	int lba48 = ata_id_lba48_enabled(id);
+
+	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
+	    ata_id_hpa_enabled(id) == 0)
+		goto out;
+
+	/*
+	 * according to the spec the SET MAX ADDRESS command shall be
+	 * immediately preceded by a READ NATIVE MAX ADDRESS command
+	 */
+	capacity = ide_disk_hpa_get_native_capacity(drive, lba48);
+	if (capacity == 0)
+		goto out;
+
+	set = ide_disk_hpa_set_capacity(drive, set, lba48);
+	if (set)
+		return set;
+out:
+	return drive->capacity64;
+}
+
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
@@ -741,6 +785,7 @@ static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
 
 const struct ide_disk_ops ide_ata_disk_ops = {
 	.check		= ide_disk_check,
+	.set_capacity	= ide_disk_set_capacity,
 	.get_capacity	= ide_disk_get_capacity,
 	.setup		= ide_disk_setup,
 	.flush		= ide_disk_flush,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 4b6b71e..2141190 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -287,6 +287,19 @@ static int ide_gd_media_changed(struct gendisk *disk)
 	return ret;
 }
 
+static unsigned long long ide_gd_set_capacity(struct gendisk *disk,
+					      unsigned long long capacity)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	const struct ide_disk_ops *disk_ops = drive->disk_ops;
+
+	if (disk_ops->set_capacity)
+		return disk_ops->set_capacity(drive, capacity);
+
+	return drive->capacity64;
+}
+
 static int ide_gd_revalidate_disk(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -315,6 +328,7 @@ static struct block_device_operations ide_gd_ops = {
 	.locked_ioctl		= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
+	.set_capacity		= ide_gd_set_capacity,
 	.revalidate_disk	= ide_gd_revalidate_disk
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9fed365..e96ace1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -397,6 +397,7 @@ struct ide_drive_s;
 struct ide_disk_ops {
 	int		(*check)(struct ide_drive_s *, const char *);
 	int		(*get_capacity)(struct ide_drive_s *);
+	u64		(*set_capacity)(struct ide_drive_s *, u64);
 	void		(*setup)(struct ide_drive_s *);
 	void		(*flush)(struct ide_drive_s *);
 	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
@@ -568,8 +569,7 @@ struct ide_drive_s {
 	unsigned int	drive_data;	/* used by set_pio_mode/dev_select() */
 	unsigned int	failures;	/* current failure count */
 	unsigned int	max_failures;	/* maximum allowed failure count */
-	u64		probed_capacity;/* initial reported media capacity (ide-cd only currently) */
-
+	u64		probed_capacity;/* initial/native media capacity */
 	u64		capacity64;	/* total number of sectors */
 
 	int		lun;		/* logical unit */
-- 
cgit v0.10.2


From 075affcbe01d4d7cefcd0e30a98df1253bcf8d92 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:52 +0200
Subject: ide: preserve Host Protected Area by default (v2)

From the perspective of most users of recent systems, disabling Host
Protected Area (HPA) can break vendor RAID formats, GPT partitions and
risks corrupting firmware or overwriting vendor system recovery tools.

Unfortunately the original (kernels < 2.6.30) behavior (unconditionally
disabling HPA and using full disk capacity) was introduced at the time
when the main use of HPA was to make the drive look small enough for the
BIOS to allow the system to boot with large capacity drives.

Thus to allow the maximum compatibility with the existing setups (using
HPA and partitioned with HPA disabled) we automically disable HPA if
any partitions overlapping HPA are detected.  Additionally HPA can also
be disabled using the "nohpa" module parameter (i.e. "ide_core.nohpa=0.0"
to disable HPA on /dev/hda).

v2:
Fix ->resume HPA support.

While at it:
- remove stale "idebus=" entry from Documentation/kernel-parameters.txt

Cc: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
[patch description was based on input from Alan Cox and Frans Pop]
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
index 0c78f4b..e77bebf 100644
--- a/Documentation/ide/ide.txt
+++ b/Documentation/ide/ide.txt
@@ -216,6 +216,8 @@ Other kernel parameters for ide_core are:
 
 * "noflush=[interface_number.device_number]" to disable flush requests
 
+* "nohpa=[interface_number.device_number]" to disable Host Protected Area
+
 * "noprobe=[interface_number.device_number]" to skip probing
 
 * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a19f021..e58c91c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -835,11 +835,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	ide-core.nodma=	[HW] (E)IDE subsystem
 			Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
-			.vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom
-			.chs .ignore_cable are additional options
-			See Documentation/ide/ide.txt.
-
-	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
+			.vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
+			.cdrom .chs .ignore_cable are additional options
 			See Documentation/ide/ide.txt.
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 61a6d35..3d92c9d 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -350,6 +350,9 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			 capacity, sectors_to_MB(capacity),
 			 set_max, sectors_to_MB(set_max));
 
+	if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
+		return;
+
 	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
 	if (set_max)
 		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
@@ -430,8 +433,11 @@ static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
 		goto out;
 
 	set = ide_disk_hpa_set_capacity(drive, set, lba48);
-	if (set)
+	if (set) {
+		/* needed for ->resume to disable HPA */
+		drive->dev_flags |= IDE_DFLAG_NOHPA;
 		return set;
+	}
 out:
 	return drive->capacity64;
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 92c9b90..16d0569 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -211,6 +211,11 @@ static unsigned int ide_noflush;
 module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
 MODULE_PARM_DESC(noflush, "disable flush requests for a device");
 
+static unsigned int ide_nohpa;
+
+module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
+MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
+
 static unsigned int ide_noprobe;
 
 module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
@@ -281,6 +286,11 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
 				 drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOFLUSH;
 	}
+	if (ide_nohpa & (1 << i)) {
+		printk(KERN_INFO "ide: disabling Host Protected Area for %s\n",
+				 drive->name);
+		drive->dev_flags |= IDE_DFLAG_NOHPA;
+	}
 	if (ide_noprobe & (1 << i)) {
 		printk(KERN_INFO "ide: skipping probe for %s\n", drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOPROBE;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e96ace1..45dce3b 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -475,6 +475,8 @@ enum {
 	IDE_DFLAG_NICE1			= (1 << 5),
 	/* device is physically present */
 	IDE_DFLAG_PRESENT		= (1 << 6),
+	/* disable Host Protected Area */
+	IDE_DFLAG_NOHPA			= (1 << 7),
 	/* id read from device (synthetic if not set) */
 	IDE_DFLAG_ID_READ		= (1 << 8),
 	IDE_DFLAG_NOPROBE		= (1 << 9),
-- 
cgit v0.10.2


From 72b9304f04d0724a25251e9e9041aa95f89c15dd Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Sun, 7 Jun 2009 15:37:03 +0200
Subject: pdc202xx_old: use ide_dma_test_irq()

The driver's dma_test_irq() method, although tests some chip specific interrupt
bits, finally always relies on the SFF-8038i standard interrupt bit.  I see no
point in testing the bits that are not trusted anyway -- the driver should be
fully able to use the standard method implemetation, ide_dma_test_irq().

With this change 'pdc202xx_dma_ops' finally becomes identical to 'sff_dma_ops',
and we can get rid of it...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index b3bc96f..668f452 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -203,30 +203,6 @@ static int pdc202xx_dma_end(ide_drive_t *drive)
 	return ide_dma_end(drive);
 }
 
-static int pdc202xx_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	unsigned long high_16	= hwif->extra_base - 16;
-	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
-	u8 sc1d			= inb(high_16 + 0x001d);
-
-	if (hwif->channel) {
-		/* bit7: Error, bit6: Interrupting, bit5: FIFO Full, bit4: FIFO Empty */
-		if ((sc1d & 0x50) == 0x50)
-			goto somebody_else;
-		else if ((sc1d & 0x40) == 0x40)
-			return (dma_stat & 4) == 4;
-	} else {
-		/* bit3: Error, bit2: Interrupting, bit1: FIFO Full, bit0: FIFO Empty */
-		if ((sc1d & 0x05) == 0x05)
-			goto somebody_else;
-		else if ((sc1d & 0x04) == 0x04)
-			return (dma_stat & 4) == 4;
-	}
-somebody_else:
-	return (dma_stat & 4) == 4;	/* return 1 if INTR asserted */
-}
-
 static void pdc202xx_reset(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= drive->hwif;
@@ -313,23 +289,12 @@ static const struct ide_port_ops pdc2026x_port_ops = {
 	.cable_detect		= pdc2026x_cable_detect,
 };
 
-static const struct ide_dma_ops pdc20246_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= pdc202xx_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
 static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_host_set		= ide_dma_host_set,
 	.dma_setup		= ide_dma_setup,
 	.dma_start		= pdc202xx_dma_start,
 	.dma_end		= pdc202xx_dma_end,
-	.dma_test_irq		= pdc202xx_dma_test_irq,
+	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= pdc202xx_dma_lost_irq,
 	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
 	.dma_clear		= pdc202xx_reset,
@@ -354,7 +319,7 @@ static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_pdc202xx,
 		.port_ops	= &pdc20246_port_ops,
-		.dma_ops	= &pdc20246_dma_ops,
+		.dma_ops	= &sff_dma_ops,
 		.host_flags	= IDE_HFLAGS_PDC202XX,
 		.pio_mask	= ATA_PIO4,
 		.mwdma_mask	= ATA_MWDMA2,
-- 
cgit v0.10.2


From 1221e241e3a6f1ff5b0de03d58d871f7c995781b Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Sun, 7 Jun 2009 15:37:04 +0200
Subject: pdc202xx_old: don't call pdc202xx_reset() on IRQ timeout

The driver's dma_lost_irq() and dma_clear() methods call pdc202xx_reset()
which resets both channels at once -- most probably by driving RESET- on them.
Not only such reset can severely disturb concurrent operations on another
channel, it is also a clear overkill (especially in the first case) and is
completely unexpected and thus not properly handled by the IDE core in this
context (in the second case the usual SRST reset would most probably ensue
anyway though); it also causes quite arbitrary 2-second delay. Hence, use the
standard ide_dma_lost_irq() method and don't install the optional dma_clear()
method at all -- the driver should do well without this age-old cruft...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 668f452..494b540 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -219,12 +219,6 @@ static void pdc202xx_reset(ide_drive_t *drive)
 	ide_set_max_pio(drive);
 }
 
-static void pdc202xx_dma_lost_irq(ide_drive_t *drive)
-{
-	pdc202xx_reset(drive);
-	ide_dma_lost_irq(drive);
-}
-
 static int init_chipset_pdc202xx(struct pci_dev *dev)
 {
 	unsigned long dmabase = pci_resource_start(dev, 4);
@@ -295,9 +289,8 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_start		= pdc202xx_dma_start,
 	.dma_end		= pdc202xx_dma_end,
 	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= pdc202xx_dma_lost_irq,
+	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_clear		= pdc202xx_reset,
 	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
-- 
cgit v0.10.2


From ffddf1717b0d388879c646eaf6261a2b393c06ad Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Sun, 7 Jun 2009 15:37:05 +0200
Subject: pdc202xx_old: kill resetproc() method

The driver's resetproc() method resets both channels at once -- most probably
by driving RESET- on them.  Not only such reset can severely disturb concurrent
operations on another channel, it also ensues 2-second delay, while there's no
apparent reason why SRST reset being performed prior to resetproc() call needs
to be followed up by another reset.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 494b540..4980dd7 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -203,22 +203,6 @@ static int pdc202xx_dma_end(ide_drive_t *drive)
 	return ide_dma_end(drive);
 }
 
-static void pdc202xx_reset(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	unsigned long high_16	= hwif->extra_base - 16;
-	u8 udma_speed_flag	= inb(high_16 | 0x001f);
-
-	printk(KERN_WARNING "PDC202xx: software reset...\n");
-
-	outb(udma_speed_flag | 0x10, high_16 | 0x001f);
-	mdelay(100);
-	outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
-	mdelay(2000);	/* 2 seconds ?! */
-
-	ide_set_max_pio(drive);
-}
-
 static int init_chipset_pdc202xx(struct pci_dev *dev)
 {
 	unsigned long dmabase = pci_resource_start(dev, 4);
@@ -279,7 +263,6 @@ static const struct ide_port_ops pdc2026x_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
 	.quirkproc		= pdc202xx_quirkproc,
-	.resetproc		= pdc202xx_reset,
 	.cable_detect		= pdc2026x_cable_detect,
 };
 
-- 
cgit v0.10.2


From 626542ca2277961aaa64855206574f8ca4f360e3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Sun, 7 Jun 2009 15:37:05 +0200
Subject: ide-tape: change IDE_AFLAG_IGNORE_DSC non-atomically

There are two sites where the flag is being changed: ide_retry_pc
and idetape_do_request. Both codepaths are protected by hwif->busy
(ide_lock_port) and therefore we shouldn't need the atomic accesses.

Spotted-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index afe5a43..fbcb851 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -258,7 +258,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	pc->req_xfer = sense_rq->data_len;
 
 	if (drive->media == ide_tape)
-		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
+		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
 
 	if (ide_queue_sense_rq(drive, pc))
 		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 203bbea..f1d3c7b 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -656,15 +656,15 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 
 	if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
 	    (rq->cmd[13] & REQ_IDETAPE_PC2) == 0)
-		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
+		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
 
 	if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
-		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
+		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
 		drive->dev_flags &= ~IDE_DFLAG_POST_RESET;
 	}
 
-	if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) &&
-	    (stat & ATA_DSC) == 0) {
+	if (!(drive->atapi_flags & IDE_AFLAG_IGNORE_DSC) &&
+	    !(stat & ATA_DSC)) {
 		if (postponed_rq == NULL) {
 			tape->dsc_polling_start = jiffies;
 			tape->dsc_poll_freq = tape->best_dsc_rw_freq;
@@ -684,7 +684,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			tape->dsc_poll_freq = IDETAPE_DSC_MA_SLOW;
 		idetape_postpone_request(drive);
 		return ide_stopped;
-	}
+	} else
+		drive->atapi_flags &= ~IDE_AFLAG_IGNORE_DSC;
+
 	if (rq->cmd[13] & REQ_IDETAPE_READ) {
 		pc = &tape->queued_pc;
 		ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
-- 
cgit v0.10.2


From 49d8078ad1c3dca5b11ce18391bf6bd9af9acdf5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 7 Jun 2009 15:37:06 +0200
Subject: ide-tape: fix IDE_AFLAG_* atomic accesses

These flags used to be bit numbers and now are single bits in the
->atapi_flags vector. Use them properly.

Spotted-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index f1d3c7b..055f52e 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -397,7 +397,8 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		if (readpos[0] & 0x4) {
 			printk(KERN_INFO "ide-tape: Block location is unknown"
 					 "to the tape\n");
-			clear_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
+				  &drive->atapi_flags);
 			uptodate = 0;
 			err = IDE_DRV_ERROR_GENERAL;
 		} else {
@@ -406,7 +407,8 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 
 			tape->partition = readpos[1];
 			tape->first_frame = be32_to_cpup((__be32 *)&readpos[4]);
-			set_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags);
+			set_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
+				&drive->atapi_flags);
 		}
 	}
 
@@ -746,7 +748,7 @@ static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
 	int load_attempted = 0;
 
 	/* Wait for the tape to become ready */
-	set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
+	set_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT), &drive->atapi_flags);
 	timeout += jiffies;
 	while (time_before(jiffies, timeout)) {
 		if (ide_do_test_unit_ready(drive, disk) == 0)
@@ -822,7 +824,7 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
 	if (tape->chrdev_dir != IDETAPE_DIR_READ)
 		return;
 
-	clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
+	clear_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags);
 	tape->valid = 0;
 	if (tape->buf != NULL) {
 		kfree(tape->buf);
@@ -1115,7 +1117,8 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
 
 	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
 		tape->valid = 0;
-		if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+		if (test_and_clear_bit(ilog2(IDE_AFLAG_FILEMARK),
+				       &drive->atapi_flags))
 			++count;
 		ide_tape_discard_merge_buffer(drive, 0);
 	}
@@ -1170,7 +1173,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 	debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
 	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (test_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags))
+		if (test_bit(ilog2(IDE_AFLAG_DETECT_BS), &drive->atapi_flags))
 			if (count > tape->blk_size &&
 			    (count % tape->blk_size) == 0)
 				tape->user_bs_factor = count / tape->blk_size;
@@ -1186,7 +1189,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		/* refill if staging buffer is empty */
 		if (!tape->valid) {
 			/* If we are at a filemark, nothing more to read */
-			if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+			if (test_bit(ilog2(IDE_AFLAG_FILEMARK),
+				     &drive->atapi_flags))
 				break;
 			/* read */
 			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
@@ -1204,7 +1208,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 		done += todo;
 	}
 
-	if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
+	if (!done && test_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags)) {
 		debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
 		idetape_space_over_filemarks(drive, MTFSF, 1);
@@ -1338,7 +1342,8 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
 		ide_tape_discard_merge_buffer(drive, 0);
 		retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK);
 		if (!retval)
-			clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
+				  &drive->atapi_flags);
 		return retval;
 	case MTNOP:
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1360,9 +1365,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
 			    mt_count % tape->blk_size)
 				return -EIO;
 			tape->user_bs_factor = mt_count / tape->blk_size;
-			clear_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_DETECT_BS),
+				  &drive->atapi_flags);
 		} else
-			set_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags);
+			set_bit(ilog2(IDE_AFLAG_DETECT_BS),
+				&drive->atapi_flags);
 		return 0;
 	case MTSEEK:
 		ide_tape_discard_merge_buffer(drive, 0);
@@ -1507,20 +1514,20 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
 
 	filp->private_data = tape;
 
-	if (test_and_set_bit(IDE_AFLAG_BUSY, &drive->atapi_flags)) {
+	if (test_and_set_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags)) {
 		retval = -EBUSY;
 		goto out_put_tape;
 	}
 
 	retval = idetape_wait_ready(drive, 60 * HZ);
 	if (retval) {
-		clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
+		clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
 		printk(KERN_ERR "ide-tape: %s: drive not ready\n", tape->name);
 		goto out_put_tape;
 	}
 
 	idetape_read_position(drive);
-	if (!test_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags))
+	if (!test_bit(ilog2(IDE_AFLAG_ADDRESS_VALID), &drive->atapi_flags))
 		(void)idetape_rewind_tape(drive);
 
 	/* Read block size and write protect status from drive. */
@@ -1536,7 +1543,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
 	if (tape->write_prot) {
 		if ((filp->f_flags & O_ACCMODE) == O_WRONLY ||
 		    (filp->f_flags & O_ACCMODE) == O_RDWR) {
-			clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
+			clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
 			retval = -EROFS;
 			goto out_put_tape;
 		}
@@ -1593,15 +1600,17 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp)
 			ide_tape_discard_merge_buffer(drive, 1);
 	}
 
-	if (minor < 128 && test_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags))
+	if (minor < 128 && test_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
+				    &drive->atapi_flags))
 		(void) idetape_rewind_tape(drive);
+
 	if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
 		if (tape->door_locked == DOOR_LOCKED) {
 			if (!ide_set_media_lock(drive, tape->disk, 0))
 				tape->door_locked = DOOR_UNLOCKED;
 		}
 	}
-	clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
+	clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
 	ide_tape_put(tape);
 	unlock_kernel();
 	return 0;
-- 
cgit v0.10.2


From dff8817b78e6e6a4913f2caf7637d62dcc49a03c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:06 +0200
Subject: icside: remove superfluous ->maskproc method

[inspired by pata_icside]

Enabling/disabling of card IRQs is handled fine by IRQ and IDE
subsystems so there is no need for custom ->maskproc method.

Moreover icside_maskproc() would enable IRQ only if it was already
enabled [because of 'if (state->enabled && !mask)' check].

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index c5269fa..5af3d0f 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -65,8 +65,6 @@ static struct cardinfo icside_cardinfo_v6_2 = {
 };
 
 struct icside_state {
-	unsigned int channel;
-	unsigned int enabled;
 	void __iomem *irq_port;
 	void __iomem *ioc_base;
 	unsigned int sel;
@@ -116,18 +114,11 @@ static void icside_irqenable_arcin_v6 (struct expansion_card *ec, int irqnr)
 	struct icside_state *state = ec->irq_data;
 	void __iomem *base = state->irq_port;
 
-	state->enabled = 1;
+	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
+	readb(base + ICS_ARCIN_V6_INTROFFSET_2);
 
-	switch (state->channel) {
-	case 0:
-		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
-		readb(base + ICS_ARCIN_V6_INTROFFSET_2);
-		break;
-	case 1:
-		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
-		readb(base + ICS_ARCIN_V6_INTROFFSET_1);
-		break;
-	}
+	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
+	readb(base + ICS_ARCIN_V6_INTROFFSET_1);
 }
 
 /* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
@@ -137,8 +128,6 @@ static void icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
 {
 	struct icside_state *state = ec->irq_data;
 
-	state->enabled = 0;
-
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
 }
@@ -160,44 +149,6 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
 	.irqpending	= icside_irqpending_arcin_v6,
 };
 
-/*
- * Handle routing of interrupts.  This is called before
- * we write the command to the drive.
- */
-static void icside_maskproc(ide_drive_t *drive, int mask)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-	struct icside_state *state = ecard_get_drvdata(ec);
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	state->channel = hwif->channel;
-
-	if (state->enabled && !mask) {
-		switch (hwif->channel) {
-		case 0:
-			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-			break;
-		case 1:
-			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-			break;
-		}
-	} else {
-		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-	}
-
-	local_irq_restore(flags);
-}
-
-static const struct ide_port_ops icside_v6_no_dma_port_ops = {
-	.maskproc		= icside_maskproc,
-};
-
 #ifdef CONFIG_BLK_DEV_IDEDMA_ICS
 /*
  * SG-DMA support.
@@ -275,7 +226,6 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
 
 static const struct ide_port_ops icside_v6_port_ops = {
 	.set_dma_mode		= icside_set_dma_mode,
-	.maskproc		= icside_maskproc,
 };
 
 static void icside_dma_host_set(ide_drive_t *drive, int on)
@@ -320,11 +270,6 @@ static int icside_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
 	BUG_ON(dma_channel_active(ec->dma));
 
 	/*
-	 * Ensure that we have the right interrupt routed.
-	 */
-	icside_maskproc(drive, 0);
-
-	/*
 	 * Route the DMA signals to the correct interface.
 	 */
 	writeb(state->sel | hwif->channel, state->ioc_base);
@@ -452,7 +397,6 @@ err_free:
 
 static const struct ide_port_info icside_v6_port_info __initdata = {
 	.init_dma		= icside_dma_off_init,
-	.port_ops		= &icside_v6_no_dma_port_ops,
 	.dma_ops		= &icside_v6_dma_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
 	.mwdma_mask		= ATA_MWDMA2,
-- 
cgit v0.10.2


From f9952beeaa851f7f79a4dd895bfed3f3ff6deebc Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:06 +0200
Subject: ide: remove superfluous SELECT_MASK() call from ide_driveid_update()

We always call SELECT_MASK(drive, 0) after ide_dev_read_id() call
so there is no need to do it again in the error path.

Moreover with the combination of HPT36x controller and the drive on
the quirk_drives[] list this can result in superfluous enable_irq()
call which in turn will trigger WARN() in __enable_irq().

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 06fe002..8dff623 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -311,7 +311,6 @@ int ide_driveid_update(ide_drive_t *drive)
 
 	return 1;
 out_err:
-	SELECT_MASK(drive, 0);
 	if (rc == 2)
 		printk(KERN_ERR "%s: %s: bad status\n", drive->name, __func__);
 	kfree(id);
-- 
cgit v0.10.2


From d6dcdea726855d78048e4bfb950342afc0e83e47 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:07 +0200
Subject: ide: remove superfluous SELECT_MASK() call from do_rw_taskfile()

With ->write_devctl method call (which unmasks drive IRQ) preceding
SELECT_MASK() call there is really no need for the latter.

Moreover with the combination of HPT36x controller and the drive on
the quirk_drives[] list this can result in superfluous enable_irq()
call which in turn will trigger WARN() in __enable_irq().

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 8cab3c2..fbcb415 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -98,7 +98,6 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
 	if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
 		ide_tf_dump(drive->name, cmd);
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-		SELECT_MASK(drive, 0);
 
 		if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
 			u8 data[2] = { cmd->tf.data, cmd->hob.data };
-- 
cgit v0.10.2


From d328e7657de1fde30141365466589ab259cf4f64 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:07 +0200
Subject: hpt366: sync quirk_drives[] list with pdc202xx_{new,old}.c

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 0feb66c..47b31da 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -141,6 +141,10 @@
 static const char *quirk_drives[] = {
 	"QUANTUM FIREBALLlct08 08",
 	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP KA9.1",
+	"QUANTUM FIREBALLP KX13.6",
+	"QUANTUM FIREBALLP KX20.5",
+	"QUANTUM FIREBALLP KX27.3",
 	"QUANTUM FIREBALLP LM20.4",
 	"QUANTUM FIREBALLP LM20.5",
 	NULL
-- 
cgit v0.10.2


From 0fcef027f60318cfa64ae4cdf5aa33905607d650 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:08 +0200
Subject: hpt366: enable all quirks for devices on quirk_drives[] list

Enable also quirks in do_reset1() and ide_config_drive_speed()
for devices on quirk_drives[] list.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 47b31da..cb04523 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -740,7 +740,7 @@ static void hpt3xx_quirkproc(ide_drive_t *drive)
 
 	while (*list)
 		if (strstr(m, *list++)) {
-			drive->quirk_list = 1;
+			drive->quirk_list = 2;
 			return;
 		}
 
-- 
cgit v0.10.2


From 8bc1e5aa06a2a9a425c4a6795fc564cba1521487 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:09 +0200
Subject: ide: respect quirk_drives[] list on all controllers

* Add ide_check_nien_quirk_list() helper to the core code
  and then use it in ide_port_tune_devices().

* Remove no longer needed ->quirkproc methods from hpt366.c
  and pdc202xx_{new,old}.c.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index cb04523..a2e9f6c 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -138,18 +138,6 @@
 #undef	HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
 
-static const char *quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static const char *bad_ata100_5[] = {
 	"IBM-DTLA-307075",
 	"IBM-DTLA-307060",
@@ -733,20 +721,6 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
 }
 
-static void hpt3xx_quirkproc(ide_drive_t *drive)
-{
-	char *m			= (char *)&drive->id[ATA_ID_PROD];
-	const  char **list	= quirk_drives;
-
-	while (*list)
-		if (strstr(m, *list++)) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
 	ide_hwif_t *hwif	= drive->hwif;
@@ -1408,7 +1382,6 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
 static const struct ide_port_ops hpt3xx_port_ops = {
 	.set_pio_mode		= hpt3xx_set_pio_mode,
 	.set_dma_mode		= hpt3xx_set_mode,
-	.quirkproc		= hpt3xx_quirkproc,
 	.maskproc		= hpt3xx_maskproc,
 	.mdma_filter		= hpt3xx_mdma_filter,
 	.udma_filter		= hpt3xx_udma_filter,
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 8dff623..c553495 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -282,6 +282,31 @@ no_80w:
 	return 0;
 }
 
+static const char *nien_quirk_list[] = {
+	"QUANTUM FIREBALLlct08 08",
+	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP KA9.1",
+	"QUANTUM FIREBALLP KX13.6",
+	"QUANTUM FIREBALLP KX20.5",
+	"QUANTUM FIREBALLP KX27.3",
+	"QUANTUM FIREBALLP LM20.4",
+	"QUANTUM FIREBALLP LM20.5",
+	NULL
+};
+
+void ide_check_nien_quirk_list(ide_drive_t *drive)
+{
+	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
+
+	for (list = nien_quirk_list; *list != NULL; list++)
+		if (strstr(m, *list) != NULL) {
+			drive->quirk_list = 2;
+			return;
+		}
+
+	drive->quirk_list = 0;
+}
+
 int ide_driveid_update(ide_drive_t *drive)
 {
 	u16 *id;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 89574b0..28f95cb4 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -732,6 +732,8 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	int i;
 
 	ide_port_for_each_present_dev(i, drive, hwif) {
+		ide_check_nien_quirk_list(drive);
+
 		if (port_ops && port_ops->quirkproc)
 			port_ops->quirkproc(drive);
 	}
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index b68906c..65ba823 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -40,18 +40,6 @@
 #define DBG(fmt, args...)
 #endif
 
-static const char *pdc_quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static u8 max_dma_rate(struct pci_dev *pdev)
 {
 	u8 mode;
@@ -200,19 +188,6 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
 		return ATA_CBL_PATA80;
 }
 
-static void pdcnew_quirkproc(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = pdc_quirk_drives; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void pdcnew_reset(ide_drive_t *drive)
 {
 	/*
@@ -473,7 +448,6 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
 static const struct ide_port_ops pdcnew_port_ops = {
 	.set_pio_mode		= pdcnew_set_pio_mode,
 	.set_dma_mode		= pdcnew_set_dma_mode,
-	.quirkproc		= pdcnew_quirkproc,
 	.resetproc		= pdcnew_reset,
 	.cable_detect		= pdcnew_cable_detect,
 };
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 4980dd7..fe01db6 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -23,18 +23,6 @@
 
 #define PDC202XX_DEBUG_DRIVE_INFO	0
 
-static const char *pdc_quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
 
 static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
@@ -151,19 +139,6 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif)
 	outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
 }
 
-static void pdc202xx_quirkproc(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = pdc_quirk_drives; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void pdc202xx_dma_start(ide_drive_t *drive)
 {
 	if (drive->current_speed > XFER_UDMA_2)
@@ -256,13 +231,11 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
 static const struct ide_port_ops pdc20246_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
-	.quirkproc		= pdc202xx_quirkproc,
 };
 
 static const struct ide_port_ops pdc2026x_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
-	.quirkproc		= pdc202xx_quirkproc,
 	.cable_detect		= pdc2026x_cable_detect,
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c8f7b96..6caaae0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1453,6 +1453,7 @@ static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
 void ide_register_region(struct gendisk *);
 void ide_unregister_region(struct gendisk *);
 
+void ide_check_nien_quirk_list(ide_drive_t *);
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
-- 
cgit v0.10.2


From 734affdcae20af4fec95e46a64fb29f063a15c19 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:10 +0200
Subject: ide: add IDE_DFLAG_NIEN_QUIRK device flag

Add IDE_DFLAG_NIEN_QUIRK device flag and use it instead of
drive->quirk_list.

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index a2e9f6c..7ce68ef 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -727,7 +727,7 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if (drive->quirk_list == 0)
+	if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
 		return;
 
 	if (info->chip_type >= HPT370) {
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 39d5892..2b91419 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -407,8 +407,9 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 	/* more than enough time */
 	udelay(10);
 	/* clear SRST, leave nIEN (unless device is on the quirk list) */
-	tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) |
-			     ATA_DEVCTL_OBS);
+	tp_ops->write_devctl(hwif,
+		((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) |
+		 ATA_DEVCTL_OBS);
 	/* more than enough time */
 	udelay(10);
 	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9654bd3..243cf65 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -488,11 +488,15 @@ repeat:
 
 		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
 		    hwif != prev_port) {
+			ide_drive_t *cur_dev =
+				prev_port ? prev_port->cur_dev : NULL;
+
 			/*
 			 * set nIEN for previous port, drives in the
-			 * quirk_list may not like intr setups/cleanups
+			 * quirk list may not like intr setups/cleanups
 			 */
-			if (prev_port && prev_port->cur_dev->quirk_list == 0)
+			if (cur_dev &&
+			    (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
 				prev_port->tp_ops->write_devctl(prev_port,
 								ATA_NIEN |
 								ATA_DEVCTL_OBS);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c553495..fa04715 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -300,11 +300,9 @@ void ide_check_nien_quirk_list(ide_drive_t *drive)
 
 	for (list = nien_quirk_list; *list != NULL; list++)
 		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
+			drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK;
 			return;
 		}
-
-	drive->quirk_list = 0;
 }
 
 int ide_driveid_update(ide_drive_t *drive)
@@ -389,7 +387,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 
 	tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
 
-	if (drive->quirk_list == 2)
+	if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK)
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
 
 	error = __ide_wait_stat(drive, drive->ready_stat,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6caaae0..a6c6a2f 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -499,6 +499,7 @@ enum {
 	/* write protect */
 	IDE_DFLAG_WP			= (1 << 29),
 	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
+	IDE_DFLAG_NIEN_QUIRK		= (1 << 31),
 };
 
 struct ide_drive_s {
@@ -530,7 +531,6 @@ struct ide_drive_s {
 	u8	waiting_for_dma;	/* dma currently in progress */
 	u8	dma;			/* atapi dma flag */
 
-        u8	quirk_list;	/* considered quirky, set for a specific host */
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
-- 
cgit v0.10.2


From 4a4aca641bc4598e77b866804f47c651ec4a764d Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Fri, 5 Jun 2009 12:02:38 +0200
Subject: x86: Add quirk for reboot stalls on a Dell Optiplex 360

The Dell Optiplex 360 hangs on reboot, just like the Optiplex 330, so
the same quirk is needed.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Steve Conklin <steve.conklin@canonical.com>
Cc: Leann Ogasawara <leann.ogasawara@canonical.com>
Cc: <stable@kernel.org>
LKML-Reference: <200906051202.38311.jdelvare@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 667188e..d2d1ce8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -192,6 +192,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
 		},
 	},
+	{   /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 360",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
+			DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
+		},
+	},
 	{	/* Handle problems with rebooting on Dell 2400's */
 		.callback = set_bios_reboot,
 		.ident = "Dell PowerEdge 2400",
-- 
cgit v0.10.2


From 103428e57be323c3c5545db8ad12667099bc6005 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 7 Jun 2009 16:48:40 +0400
Subject: x86, apic: Fix dummy apic read operation together with broken MP
 handling

Ingo Molnar reported that read_apic is buggy novadays:

[    0.000000] Using APIC driver default
[    0.000000] SMP: Allowing 1 CPUs, 0 hotplug CPUs
[    0.000000] Local APIC disabled by BIOS -- you can enable it with "lapic"
[    0.000000] APIC: disable apic facility
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/apic/apic.c:254 native_apic_read_dummy+0x2d/0x3b()
[    0.000000] Hardware name: HP OmniBook PC

Indeed we still rely on apic->read operation for SMP compiled
kernel. And instead of disfigure the SMP code with #ifdef we
allow to call apic->read. To capture any unexpected results
we check for apic->read being called for sane reason via
WARN_ON_ONCE but(!) instead of OR we should use AND logical
operation (thanks Yinghai for spotting the root of the problem).

Along with that we could be have bad MP table and we are
to fix it that way no SMP started and no complains about
BIOS bug if apic was just disabled via command line.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20090607124840.GD4547@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e82488d..a4c9cf0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -249,7 +249,7 @@ static void native_apic_write_dummy(u32 reg, u32 v)
 
 static u32 native_apic_read_dummy(u32 reg)
 {
-	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
 	return 0;
 }
 
@@ -1609,6 +1609,13 @@ void __init init_apic_mappings(void)
 	new_apicid = read_apic_id();
 	if (boot_cpu_physical_apicid != new_apicid) {
 		boot_cpu_physical_apicid = new_apicid;
+		/*
+		 * yeah -- we lie about apic_version
+		 * in case if apic was disabled via boot option
+		 * but it's not a problem for SMP compiled kernel
+		 * since smp_sanity_check is prepared for such a case
+		 * and disable smp mode
+		 */
 		apic_version[new_apicid] =
 			 GET_APIC_VERSION(apic_read(APIC_LVR));
 	}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d2e8de9..7c80007 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -992,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	 */
 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
 	    !cpu_has_apic) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-			boot_cpu_physical_apicid);
-		printk(KERN_ERR "... forcing use of dummy APIC emulation."
+		if (!disable_apic) {
+			pr_err("BIOS bug, local APIC #%d not detected!...\n",
+				boot_cpu_physical_apicid);
+			pr_err("... forcing use of dummy APIC emulation."
 				"(tell your hw vendor)\n");
+		}
 		smpboot_clear_io_apic();
 		arch_disable_smp_support();
 		return -1;
-- 
cgit v0.10.2


From a4046f8d299e00e9855ae292527c2d66a42670eb Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 7 Jun 2009 12:19:37 +0400
Subject: x86, nmi: Use predefined numbers instead of hardcoded one

[ Impact: cleanup ]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
LKML-Reference: <20090607081937.GC4547@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c45a0a5..c972644 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void)
 	 * but since they are power of two we could use a
 	 * cheaper way --cvg
 	 */
-	return nmi_watchdog & 0x3;
+	return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
 }
 #endif
 
-- 
cgit v0.10.2


From 3aa6b186f86c5d06d6d92d14311ffed51f091f40 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Sun, 7 Jun 2009 16:23:48 +0200
Subject: x86: Fix non-lazy GS handling in sys_vm86()

This fixes a stack corruption panic or null dereference oops
due to a bad GS in resume_userspace() when returning from
sys_vm86() and calling lockdep_sys_exit().

Only a problem when CONFIG_LOCKDEP and CONFIG_CC_STACKPROTECTOR
enabled.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Cc: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1244384628.2323.4.camel@bimbo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index d7ac84e..6a17769 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -287,10 +287,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	info->regs.pt.ds = 0;
 	info->regs.pt.es = 0;
 	info->regs.pt.fs = 0;
-
-/* we are clearing gs later just before "jmp resume_userspace",
- * because it is not saved/restored.
- */
+#ifndef CONFIG_X86_32_LAZY_GS
+	info->regs.pt.gs = 0;
+#endif
 
 /*
  * The flags register is also special: we cannot trust that the user
@@ -343,7 +342,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
 		"movl %1,%%ebp\n\t"
+#ifdef CONFIG_X86_32_LAZY_GS
 		"mov  %2, %%gs\n\t"
+#endif
 		"jmp resume_userspace"
 		: /* no outputs */
 		:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
-- 
cgit v0.10.2


From aeef50bc0483fa70ce0bddb686ec84a274b7f3d4 Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Sun, 7 Jun 2009 22:30:36 +0800
Subject: x86, microcode: Simplify vfree() use

vfree() does its own 'NULL' check, so no need for check before
calling it.

In v2, remove the stray newline.

[ Impact: cleanup ]

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Cc: Dmitry Adamushko <dmitry.adamushko@gmail.com>
LKML-Reference: <1244385036.3402.11.camel@myhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c8be20f..366baa1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -241,10 +241,8 @@ static int install_equiv_cpu_table(const u8 *buf)
 
 static void free_equiv_cpu_table(void)
 {
-	if (equiv_cpu_table) {
-		vfree(equiv_cpu_table);
-		equiv_cpu_table = NULL;
-	}
+	vfree(equiv_cpu_table);
+	equiv_cpu_table = NULL;
 }
 
 static enum ucode_state
@@ -279,8 +277,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
 
 		mc_header = (struct microcode_header_amd *)mc;
 		if (get_matching_microcode(cpu, mc, new_rev)) {
-			if (new_mc)
-				vfree(new_mc);
+			vfree(new_mc);
 			new_rev = mc_header->patch_id;
 			new_mc  = mc;
 		} else
@@ -292,8 +289,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
 
 	if (new_mc) {
 		if (!leftover) {
-			if (uci->mc)
-				vfree(uci->mc);
+			vfree(uci->mc);
 			uci->mc = new_mc;
 			pr_debug("microcode: CPU%d found a matching microcode "
 				 "update with version 0x%x (current=0x%x)\n",
-- 
cgit v0.10.2


From 743ee1f80434138495bbb95ffb897acf46b51d54 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 7 Jun 2009 17:06:46 +0200
Subject: perf stat: Continue even on counter creation error

Before:

 $ perf stat ~/hackbench 5

 error: syscall returned with -1 (No such device)

After:

 $ perf stat ~/hackbench 5
 Time: 1.640

 Performance counter stats for '/home/mingo/hackbench 5':

    6524.570382  task-clock-ticks     #       3.838 CPU utilization factor
          35704  context-switches     #       0.005 M/sec
            191  CPU-migrations       #       0.000 M/sec
           8958  page-faults          #       0.001 M/sec
  <not counted>  cycles
  <not counted>  instructions
  <not counted>  cache-references
  <not counted>  cache-misses

 Wall-clock time elapsed:  1699.999995 msecs

Also add -v (--verbose) option to allow the printing of failed
counter opens.

Plus dont print 'inf' if wall-time is zero (due to jiffies granularity),
instead skip the printing of the CPU utilization factor.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2cbf5a1..184ff95 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,6 +59,7 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
 
 static int			system_wide			=  0;
 static int			inherit				=  1;
+static int			verbose				=  0;
 
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
@@ -83,7 +84,7 @@ static __u64			event_scaled[MAX_COUNTERS];
 static __u64			runtime_nsecs;
 static __u64			walltime_nsecs;
 
-static void create_perfstat_counter(int counter)
+static void create_perf_stat_counter(int counter)
 {
 	struct perf_counter_attr *attr = attrs + counter;
 
@@ -95,10 +96,8 @@ static void create_perfstat_counter(int counter)
 		int cpu;
 		for (cpu = 0; cpu < nr_cpus; cpu ++) {
 			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
-			if (fd[cpu][counter] < 0) {
-				printf("perfstat error: syscall returned with %d (%s)\n",
-						fd[cpu][counter], strerror(errno));
-				exit(-1);
+			if (fd[cpu][counter] < 0 && verbose) {
+				printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno));
 			}
 		}
 	} else {
@@ -106,10 +105,8 @@ static void create_perfstat_counter(int counter)
 		attr->disabled	= 1;
 
 		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
-		if (fd[0][counter] < 0) {
-			printf("perfstat error: syscall returned with %d (%s)\n",
-					fd[0][counter], strerror(errno));
-			exit(-1);
+		if (fd[0][counter] < 0 && verbose) {
+			printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno));
 		}
 	}
 }
@@ -147,6 +144,9 @@ static void read_counter(int counter)
 
 	nv = scale ? 3 : 1;
 	for (cpu = 0; cpu < nr_cpus; cpu ++) {
+		if (fd[cpu][counter] < 0)
+			continue;
+
 		res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
 		assert(res == nv * sizeof(__u64));
 
@@ -204,8 +204,9 @@ static void print_counter(int counter)
 		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
 			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
 
-			fprintf(stderr, " # %11.3f CPU utilization factor",
-				(double)count[0] / (double)walltime_nsecs);
+			if (walltime_nsecs)
+				fprintf(stderr, " # %11.3f CPU utilization factor",
+					(double)count[0] / (double)walltime_nsecs);
 		}
 	} else {
 		fprintf(stderr, " %14Ld  %-20s",
@@ -220,7 +221,7 @@ static void print_counter(int counter)
 	fprintf(stderr, "\n");
 }
 
-static int do_perfstat(int argc, const char **argv)
+static int do_perf_stat(int argc, const char **argv)
 {
 	unsigned long long t0, t1;
 	int counter;
@@ -232,7 +233,7 @@ static int do_perfstat(int argc, const char **argv)
 		nr_cpus = 1;
 
 	for (counter = 0; counter < nr_counters; counter++)
-		create_perfstat_counter(counter);
+		create_perf_stat_counter(counter);
 
 	/*
 	 * Enable counters and exec the command:
@@ -305,6 +306,8 @@ static const struct option options[] = {
 			    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('S', "scale", &scale,
 			    "scale/normalize counters"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
 	OPT_END()
 };
 
@@ -335,5 +338,5 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 	signal(SIGALRM, skip_signal);
 	signal(SIGABRT, skip_signal);
 
-	return do_perfstat(argc, argv);
+	return do_perf_stat(argc, argv);
 }
-- 
cgit v0.10.2


From aa853f85d9ed593672d0f24a98c72a2518cb63e6 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Sat, 6 Jun 2009 10:17:57 +0100
Subject: [ARM] 5543/1: arm: serial amba: add missing declaration in serial.h

This header is sometimes included in the uncompress stage to get
register values, but no <linux/amba/bus.h> can be included there.
So declare "struct amba_device" here before using it in a prototype.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Acked-by: Andrea Gallo <andrea.gallo@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 48ee32a..64a982e 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -159,6 +159,7 @@
 #define UART01x_FR_MODEM_ANY	(UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS)
 
 #ifndef __ASSEMBLY__
+struct amba_device; /* in uncompress this is included but amba/bus.h is not */
 struct amba_pl010_data {
 	void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
 };
-- 
cgit v0.10.2


From 716c69fecacd42f2a304a97158e04af2786a3f65 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 7 Jun 2009 17:31:52 +0200
Subject: perf top: Fall back to cpu-clock-tick hrtimer sampling if no cycle
 counter available

On architectures/CPUs without PMU support but with perfcounters
enabled 'perf top' currently fails because it cannot create a
cycle based hw-perfcounter.

Fall back to the cpu-clock-tick sw-perfcounter in this case, which
is hrtimer based and will always work (as long as perfcounters
is enabled).

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index fdc1d58..6da30a1 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -527,58 +527,81 @@ static void mmap_read(void)
 	}
 }
 
-static int __cmd_top(void)
+int nr_poll;
+int group_fd;
+
+static void start_counter(int i, int counter)
 {
 	struct perf_counter_attr *attr;
-	pthread_t thread;
-	int i, counter, group_fd, nr_poll = 0;
 	unsigned int cpu;
+
+	cpu = profile_cpu;
+	if (target_pid == -1 && profile_cpu == -1)
+		cpu = i;
+
+	attr = attrs + counter;
+
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+	attr->freq		= freq;
+
+try_again:
+	fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
+
+	if (fd[i][counter] < 0) {
+		int err = errno;
+
+		error("sys_perf_counter_open() syscall returned with %d (%s)\n",
+			fd[i][counter], strerror(err));
+
+		if (err == EPERM)
+			die(" No permission - are you root?\n");
+		/*
+		 * If it's cycles then fall back to hrtimer
+		 * based cpu-clock-tick sw counter, which
+		 * is always available even if no PMU support:
+		 */
+		if (attr->type == PERF_TYPE_HARDWARE
+			&& attr->config == PERF_COUNT_CPU_CYCLES) {
+
+			warning(" ... trying to fall back to cpu-clock-ticks\n");
+			attr->type = PERF_TYPE_SOFTWARE;
+			attr->config = PERF_COUNT_CPU_CLOCK;
+			goto try_again;
+		}
+		exit(-1);
+	}
+	assert(fd[i][counter] >= 0);
+	fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
+
+	/*
+	 * First counter acts as the group leader:
+	 */
+	if (group && group_fd == -1)
+		group_fd = fd[i][counter];
+
+	event_array[nr_poll].fd = fd[i][counter];
+	event_array[nr_poll].events = POLLIN;
+	nr_poll++;
+
+	mmap_array[i][counter].counter = counter;
+	mmap_array[i][counter].prev = 0;
+	mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+	mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+			PROT_READ, MAP_SHARED, fd[i][counter], 0);
+	if (mmap_array[i][counter].base == MAP_FAILED)
+		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
+}
+
+static int __cmd_top(void)
+{
+	pthread_t thread;
+	int i, counter;
 	int ret;
 
 	for (i = 0; i < nr_cpus; i++) {
 		group_fd = -1;
-		for (counter = 0; counter < nr_counters; counter++) {
-
-			cpu	= profile_cpu;
-			if (target_pid == -1 && profile_cpu == -1)
-				cpu = i;
-
-			attr = attrs + counter;
-
-			attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-			attr->freq		= freq;
-
-			fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
-			if (fd[i][counter] < 0) {
-				int err = errno;
-
-				error("syscall returned with %d (%s)\n",
-					fd[i][counter], strerror(err));
-				if (err == EPERM)
-					printf("Are you root?\n");
-				exit(-1);
-			}
-			assert(fd[i][counter] >= 0);
-			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
-
-			/*
-			 * First counter acts as the group leader:
-			 */
-			if (group && group_fd == -1)
-				group_fd = fd[i][counter];
-
-			event_array[nr_poll].fd = fd[i][counter];
-			event_array[nr_poll].events = POLLIN;
-			nr_poll++;
-
-			mmap_array[i][counter].counter = counter;
-			mmap_array[i][counter].prev = 0;
-			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
-			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
-					PROT_READ, MAP_SHARED, fd[i][counter], 0);
-			if (mmap_array[i][counter].base == MAP_FAILED)
-				die("failed to mmap with %d (%s)\n", errno, strerror(errno));
-		}
+		for (counter = 0; counter < nr_counters; counter++)
+			start_counter(i, counter);
 	}
 
 	/* Wait for a minimal set of events before starting the snapshot */
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 2cad286..e16bf9a7 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -9,29 +9,29 @@ static void report(const char *prefix, const char *err, va_list params)
 {
 	char msg[1024];
 	vsnprintf(msg, sizeof(msg), err, params);
-	fprintf(stderr, "%s%s\n", prefix, msg);
+	fprintf(stderr, " %s%s\n", prefix, msg);
 }
 
 static NORETURN void usage_builtin(const char *err)
 {
-	fprintf(stderr, "\n usage: %s\n", err);
+	fprintf(stderr, "\n Usage: %s\n", err);
 	exit(129);
 }
 
 static NORETURN void die_builtin(const char *err, va_list params)
 {
-	report("fatal: ", err, params);
+	report(" Fatal: ", err, params);
 	exit(128);
 }
 
 static void error_builtin(const char *err, va_list params)
 {
-	report("error: ", err, params);
+	report(" Error: ", err, params);
 }
 
 static void warn_builtin(const char *warn, va_list params)
 {
-	report("warning: ", warn, params);
+	report(" Warning: ", warn, params);
 }
 
 /* If we are in a dlopen()ed .so write to a global variable would segfault
-- 
cgit v0.10.2


From 3da297a60f7e8840f79f7d0b343af078890939ea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 7 Jun 2009 17:39:02 +0200
Subject: perf record: Fall back to cpu-clock-ticks if no PMU

On architectures/CPUs without PMU support but with perfcounters
enabled 'perf record' currently fails because it cannot create a
cycle based hw-perfcounter.

Fall back to the cpu-clock-tick sw-perfcounter in this case, which
is hrtimer based and will always work (as long as perfcounters
are enabled).

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index aeab9c4..8786629 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -37,6 +37,7 @@ static pid_t			target_pid			= -1;
 static int			inherit				= 1;
 static int			force				= 0;
 static int			append_file			= 0;
+static int			verbose				= 0;
 
 static long			samples;
 static struct timeval		last_read;
@@ -349,17 +350,35 @@ static void create_counter(int counter, int cpu, pid_t pid)
 
 	track = 0; /* only the first counter needs these */
 
+try_again:
 	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
 
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
 
-		error("syscall returned with %d (%s)\n",
+		if (verbose)
+			error("sys_perf_counter_open() syscall returned with %d (%s)\n",
 				fd[nr_cpu][counter], strerror(err));
 		if (err == EPERM)
-			printf("Are you root?\n");
+			die("Permission error - are you root?\n");
+
+		/*
+		 * If it's cycles then fall back to hrtimer
+		 * based cpu-clock-tick sw counter, which
+		 * is always available even if no PMU support:
+		 */
+		if (attr->type == PERF_TYPE_HARDWARE
+			&& attr->config == PERF_COUNT_CPU_CYCLES) {
+
+			if (verbose)
+				warning(" ... trying to fall back to cpu-clock-ticks\n");
+			attr->type = PERF_TYPE_SOFTWARE;
+			attr->config = PERF_COUNT_CPU_CLOCK;
+			goto try_again;
+		}
 		exit(-1);
 	}
+
 	assert(fd[nr_cpu][counter] >= 0);
 	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
 
@@ -519,6 +538,8 @@ static const struct option options[] = {
 		    "profile at this frequency"),
 	OPT_INTEGER('m', "mmap-pages", &mmap_pages,
 		    "number of mmap data pages"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6da30a1..1f8c97d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -65,6 +65,7 @@ static int			group				=  0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			= 16;
 static int			freq				=  0;
+static int			verbose				=  0;
 
 static char			*sym_filter;
 static unsigned long		filter_start;
@@ -550,11 +551,12 @@ try_again:
 	if (fd[i][counter] < 0) {
 		int err = errno;
 
-		error("sys_perf_counter_open() syscall returned with %d (%s)\n",
-			fd[i][counter], strerror(err));
+		if (verbose)
+			error("sys_perf_counter_open() syscall returned with %d (%s)\n",
+				fd[i][counter], strerror(err));
 
 		if (err == EPERM)
-			die(" No permission - are you root?\n");
+			die("No permission - are you root?\n");
 		/*
 		 * If it's cycles then fall back to hrtimer
 		 * based cpu-clock-tick sw counter, which
@@ -563,7 +565,9 @@ try_again:
 		if (attr->type == PERF_TYPE_HARDWARE
 			&& attr->config == PERF_COUNT_CPU_CYCLES) {
 
-			warning(" ... trying to fall back to cpu-clock-ticks\n");
+			if (verbose)
+				warning(" ... trying to fall back to cpu-clock-ticks\n");
+
 			attr->type = PERF_TYPE_SOFTWARE;
 			attr->config = PERF_COUNT_CPU_CLOCK;
 			goto try_again;
@@ -673,6 +677,8 @@ static const struct option options[] = {
 		    "profile at this frequency"),
 	OPT_INTEGER('E', "entries", &print_entries,
 		    "display this many functions"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
 	OPT_END()
 };
 
-- 
cgit v0.10.2


From 30c806a094493beb7691bc7957dfa02dee96230a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 7 Jun 2009 17:46:24 +0200
Subject: perf_counter tools: Handle kernels with !CONFIG_PERF_COUNTER

If perf is run on a !CONFIG_PERF_COUNTER kernel right now it
bails out with no messages or with confusing messages.

Standardize this case some more and explain the situation.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8786629..deaee42 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -356,9 +356,6 @@ try_again:
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
 
-		if (verbose)
-			error("sys_perf_counter_open() syscall returned with %d (%s)\n",
-				fd[nr_cpu][counter], strerror(err));
 		if (err == EPERM)
 			die("Permission error - are you root?\n");
 
@@ -376,6 +373,10 @@ try_again:
 			attr->config = PERF_COUNT_CPU_CLOCK;
 			goto try_again;
 		}
+		printf("\n");
+		error("perfcounter syscall returned with %d (%s)\n",
+			fd[nr_cpu][counter], strerror(err));
+		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
 		exit(-1);
 	}
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1f8c97d..be1698f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -551,10 +551,6 @@ try_again:
 	if (fd[i][counter] < 0) {
 		int err = errno;
 
-		if (verbose)
-			error("sys_perf_counter_open() syscall returned with %d (%s)\n",
-				fd[i][counter], strerror(err));
-
 		if (err == EPERM)
 			die("No permission - are you root?\n");
 		/*
@@ -572,6 +568,10 @@ try_again:
 			attr->config = PERF_COUNT_CPU_CLOCK;
 			goto try_again;
 		}
+		printf("\n");
+		error("perfcounter syscall returned with %d (%s)\n",
+			fd[i][counter], strerror(err));
+		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
 		exit(-1);
 	}
 	assert(fd[i][counter] >= 0);
-- 
cgit v0.10.2


From a14832ff977e78d1982cdf78cdabb1f2320d9ac8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 7 Jun 2009 17:58:23 +0200
Subject: perf report: Print more expressive message in case of file open error

Before:

 $ perf report
 failed to open file: No such file or directory

After:

 $ perf report
  failed to open file: perf.data  (try 'perf record' first)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 242e09f..f053a74 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1120,7 +1120,10 @@ static int __cmd_report(void)
 
 	input = open(input_name, O_RDONLY);
 	if (input < 0) {
-		perror("failed to open file");
+		fprintf(stderr, " failed to open file: %s", input_name);
+		if (!strcmp(input_name, "perf.data"))
+			fprintf(stderr, "  (try 'perf record' first)");
+		fprintf(stderr, "\n");
 		exit(-1);
 	}
 
-- 
cgit v0.10.2


From e779898aa74cd2e97216368b3f3689ceffe8aeed Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 7 Jun 2009 18:14:46 +0200
Subject: perf stat: Print out instructins/cycle metric

Before:

     7549326754  cycles               #    3201.811 M/sec
    10007594937  instructions         #    4244.408 M/sec

After:

     7542051194  cycles               #    3201.996 M/sec
    10007743852  instructions         #    4248.811 M/sec # 1.327 per cycle

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 184ff95..8085509 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -83,6 +83,7 @@ static __u64			event_scaled[MAX_COUNTERS];
 
 static __u64			runtime_nsecs;
 static __u64			walltime_nsecs;
+static __u64			runtime_cycles;
 
 static void create_perf_stat_counter(int counter)
 {
@@ -177,6 +178,9 @@ static void read_counter(int counter)
 	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
 		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
 		runtime_nsecs = count[0];
+	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
+		attrs[counter].config == PERF_COUNT_CPU_CYCLES)
+		runtime_cycles = count[0];
 }
 
 /*
@@ -214,6 +218,13 @@ static void print_counter(int counter)
 		if (runtime_nsecs)
 			fprintf(stderr, " # %11.3f M/sec",
 				(double)count[0]/runtime_nsecs*1000.0);
+		if (runtime_cycles &&
+			attrs[counter].type == PERF_TYPE_HARDWARE &&
+				attrs[counter].config == PERF_COUNT_INSTRUCTIONS) {
+
+			fprintf(stderr, " # %1.3f per cycle",
+				(double)count[0] / (double)runtime_cycles);
+		}
 	}
 	if (scaled)
 		fprintf(stderr, "  (scaled from %.2f%%)",
-- 
cgit v0.10.2


From a4c0364be3f43d3e17fe19270f8b3d64881606e6 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 6 Jun 2009 12:34:39 +0300
Subject: KVM: Explicity initialize cpus_hardware_enabled

Under CONFIG_MAXSMP, cpus_hardware_enabled is allocated from the heap and
not statically initialized.  This causes a crash on reboot when kvm thinks
vmx is enabled on random nonexistent cpus and accesses nonexistent percpu
lists.

Fix by explicitly clearing the variable.

Cc: stable@kernel.org
Reported-and-tested-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ecbe23..4293528 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2305,6 +2305,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 		r = -ENOMEM;
 		goto out_free_0;
 	}
+	cpumask_clear(cpus_hardware_enabled);
 
 	r = kvm_arch_hardware_setup();
 	if (r < 0)
-- 
cgit v0.10.2


From 0312af84164215a452f2a94957ebd9bce86e0204 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Jun 2009 07:42:04 +0200
Subject: perf_counter, x86: Implement generalized cache event types, add Core2
 support

Fill in core2_hw_cache_event_id[] with the Core2 model specific events.

The events can be used in all the tools via the -e (--event) parameter,
for example "-e l1-misses" or -"-e l2-accesses" or "-e l2-write-misses".

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index e86679f..b1f71ff 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -194,7 +194,90 @@ static const u64 core2_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 {
-	/* To be filled in */
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L2  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
 };
 
 static const u64 atom_hw_cache_event_ids
-- 
cgit v0.10.2


From ad689220614b6c7c0b13b70d742f358e9310e71e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Jun 2009 09:30:41 +0200
Subject: perf_counter, x86: Implement generalized cache event types, add Atom
 support

Fill in core2_hw_cache_event_id[] with the Atom model specific events.

The events can be used in all the tools via the -e (--event) parameter,
for example "-e l1-misses" or -"-e l2-accesses" or "-e l2-write-misses".

( Note: these are straight from the Intel manuals - not tested yet.)

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b1f71ff..71590e0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -285,7 +285,90 @@ static const u64 atom_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
 {
-	/* To be filled in */
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2241, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L2  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
 };
 
 static u64 intel_pmu_raw_event(u64 event)
-- 
cgit v0.10.2


From 1123e3ad73697d64ad99f0104bbe49f8b52d7d65 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 May 2009 11:25:09 +0200
Subject: perf_counter: Clean up x86 boot messages

Standardize and tidy up all the messages we print during
perfcounter initialization.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 71590e0..0339d19 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1298,23 +1298,22 @@ static int intel_pmu_init(void)
 	if (version < 2)
 		return -ENODEV;
 
-	x86_pmu = intel_pmu;
-	x86_pmu.version = version;
-	x86_pmu.num_counters = eax.split.num_counters;
+	x86_pmu				= intel_pmu;
+	x86_pmu.version			= version;
+	x86_pmu.num_counters		= eax.split.num_counters;
+	x86_pmu.counter_bits		= eax.split.bit_width;
+	x86_pmu.counter_mask		= (1ULL << eax.split.bit_width) - 1;
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
 	 * assume at least 3 counters:
 	 */
-	x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
-
-	x86_pmu.counter_bits = eax.split.bit_width;
-	x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.num_counters_fixed	= max((int)edx.split.num_counters_fixed, 3);
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
 	/*
-	 * Nehalem:
+	 * Install the hw-cache-events table:
 	 */
 	switch (boot_cpu_data.x86_model) {
 	case 17:
@@ -1322,7 +1321,7 @@ static int intel_pmu_init(void)
 		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
 			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
 
-		pr_info("... installed Core2 event tables\n");
+		pr_cont("Core2 events, ");
 		break;
 	default:
 	case 26:
@@ -1330,14 +1329,14 @@ static int intel_pmu_init(void)
 		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
 			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
 
-		pr_info("... installed Nehalem/Corei7 event tables\n");
+		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
 			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
 
-		pr_info("... installed Atom event tables\n");
+		pr_cont("Atom events, ");
 		break;
 	}
 	return 0;
@@ -1353,6 +1352,8 @@ void __init init_hw_perf_counters(void)
 {
 	int err;
 
+	pr_info("Performance Counters: ");
+
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_INTEL:
 		err = intel_pmu_init();
@@ -1363,14 +1364,13 @@ void __init init_hw_perf_counters(void)
 	default:
 		return;
 	}
-	if (err != 0)
+	if (err != 0) {
+		pr_cont("no PMU driver, software counters only.\n");
 		return;
+	}
 
-	pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
-	pr_info("... version:         %d\n", x86_pmu.version);
-	pr_info("... bit width:       %d\n", x86_pmu.counter_bits);
+	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
-	pr_info("... num counters:    %d\n", x86_pmu.num_counters);
 	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
 		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
@@ -1379,23 +1379,25 @@ void __init init_hw_perf_counters(void)
 	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
 	perf_max_counters = x86_pmu.num_counters;
 
-	pr_info("... value mask:      %016Lx\n", x86_pmu.counter_mask);
-	pr_info("... max period:      %016Lx\n", x86_pmu.max_period);
-
 	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
 		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
 		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
 	}
-	pr_info("... fixed counters:  %d\n", x86_pmu.num_counters_fixed);
 
 	perf_counter_mask |=
 		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
-	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
-
 	perf_counters_lapic_init();
 	register_die_notifier(&perf_counter_nmi_notifier);
+
+	pr_info("... version:                 %d\n",     x86_pmu.version);
+	pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
+	pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
+	pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
+	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
+	pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
 }
 
 static inline void x86_pmu_read(struct perf_counter *counter)
-- 
cgit v0.10.2


From 54de6bc8b2437f642844cecb8d183df2368ffceb Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 10:21:07 +0200
Subject: ALSA: ctxfi - Optimize the native timer handling using wc counter

Optimize the timer update routine to look up wall clock once instead of
checking the position of each stream at each timer update.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h
index 35350cf..8f11644 100644
--- a/sound/pci/ctxfi/cthardware.h
+++ b/sound/pci/ctxfi/cthardware.h
@@ -147,6 +147,7 @@ struct hw {
 
 	int (*set_timer_irq)(struct hw *hw, int enable);
 	int (*set_timer_tick)(struct hw *hw, unsigned int tick);
+	unsigned int (*get_wc)(struct hw *hw);
 
 	void (*irq_callback)(void *data, unsigned int bit);
 	void *irq_callback_data;
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index df565c1..b165466 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1186,6 +1186,11 @@ static int set_timer_tick(struct hw *hw, unsigned int ticks)
 	return 0;
 }
 
+static unsigned int get_wc(struct hw *hw)
+{
+	return hw_read_20kx(hw, WC);
+}
+
 /* Card hardware initialization block */
 struct dac_conf {
 	unsigned int msr; /* master sample rate in rsrs */
@@ -2235,6 +2240,7 @@ static struct hw ct20k1_preset __devinitdata = {
 
 	.set_timer_irq = set_timer_irq,
 	.set_timer_tick = set_timer_tick,
+	.get_wc = get_wc,
 };
 
 int __devinit create_20k1_hw_obj(struct hw **rhw)
diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
index ceda74e..ec869a4 100644
--- a/sound/pci/ctxfi/cttimer.c
+++ b/sound/pci/ctxfi/cttimer.c
@@ -47,6 +47,7 @@ struct ct_timer {
 	struct ct_timer_ops *ops;
 	struct list_head instance_head;
 	struct list_head running_head;
+	unsigned int wc;		/* current wallclock */
 	unsigned int irq_handling:1;	/* in IRQ handling */
 	unsigned int reprogram:1;	/* need to reprogram the internval */
 	unsigned int running:1;		/* global timer running */
@@ -136,6 +137,7 @@ static struct ct_timer_ops ct_systimer_ops = {
  */
 
 #define CT_TIMER_FREQ	48000
+#define MIN_TICKS	1
 #define MAX_TICKS	((1 << 13) - 1)
 
 static void ct_xfitimer_irq_rearm(struct ct_timer *atimer, int ticks)
@@ -159,6 +161,12 @@ static void ct_xfitimer_irq_stop(struct ct_timer *atimer)
 	}
 }
 
+static inline unsigned int ct_xfitimer_get_wc(struct ct_timer *atimer)
+{
+	struct hw *hw = atimer->atc->hw;
+	return hw->get_wc(hw);
+}
+
 /*
  * reprogram the timer interval;
  * checks the running instance list and determines the next timer interval.
@@ -170,37 +178,46 @@ static void ct_xfitimer_irq_stop(struct ct_timer *atimer)
 static int ct_xfitimer_reprogram(struct ct_timer *atimer)
 {
 	struct ct_timer_instance *ti;
-	int min_intr = -1;
+	unsigned int min_intr = (unsigned int)-1;
 	int updates = 0;
+	unsigned int wc, diff;
 
+	if (list_empty(&atimer->running_head)) {
+		ct_xfitimer_irq_stop(atimer);
+		atimer->reprogram = 0; /* clear flag */
+		return 0;
+	}
+
+	wc = ct_xfitimer_get_wc(atimer);
+	diff = wc - atimer->wc;
+	atimer->wc = wc;
 	list_for_each_entry(ti, &atimer->running_head, running_list) {
-		struct snd_pcm_runtime *runtime;
-		unsigned int pos, diff;
-		int intr;
-		runtime = ti->substream->runtime;
-		pos = ti->substream->ops->pointer(ti->substream);
-		if (pos < ti->position)
-			diff = runtime->buffer_size - ti->position + pos;
-		else
-			diff = pos - ti->position;
-		ti->position = pos;
-		while (diff >= ti->frag_count) {
-			ti->frag_count += runtime->period_size;
-			ti->need_update = 1;
-			updates++;
+		if (ti->frag_count > diff)
+			ti->frag_count -= diff;
+		else {
+			unsigned int pos;
+			unsigned int period_size, rate;
+
+			period_size = ti->substream->runtime->period_size;
+			rate = ti->substream->runtime->rate;
+			pos = ti->substream->ops->pointer(ti->substream);
+			if (pos / period_size != ti->position / period_size) {
+				ti->need_update = 1;
+				ti->position = pos;
+				updates++;
+			}
+			pos %= period_size;
+			pos = period_size - pos;
+			ti->frag_count = div_u64((u64)pos * CT_TIMER_FREQ +
+						 rate - 1, rate);
 		}
-		ti->frag_count -= diff;
-		intr = div_u64((u64)ti->frag_count * CT_TIMER_FREQ,
-			       runtime->rate);
-		if (min_intr < 0 || intr < min_intr)
-			min_intr = intr;
+		if (ti->frag_count < min_intr)
+			min_intr = ti->frag_count;
 	}
 
-	if (min_intr > 0)
-		ct_xfitimer_irq_rearm(atimer, min_intr);
-	else
-		ct_xfitimer_irq_stop(atimer);
-
+	if (min_intr < MIN_TICKS)
+		min_intr = MIN_TICKS;
+	ct_xfitimer_irq_rearm(atimer, min_intr);
 	atimer->reprogram = 0; /* clear flag */
 	return updates;
 }
@@ -253,13 +270,14 @@ static void ct_xfitimer_update(struct ct_timer *atimer)
 	unsigned long flags;
 	int update;
 
+	spin_lock_irqsave(&atimer->lock, flags);
 	if (atimer->irq_handling) {
 		/* reached from IRQ handler; let it handle later */
 		atimer->reprogram = 1;
+		spin_unlock_irqrestore(&atimer->lock, flags);
 		return;
 	}
 
-	spin_lock_irqsave(&atimer->lock, flags);
 	ct_xfitimer_irq_stop(atimer);
 	update = ct_xfitimer_reprogram(atimer);
 	spin_unlock_irqrestore(&atimer->lock, flags);
@@ -273,6 +291,8 @@ static void ct_xfitimer_start(struct ct_timer_instance *ti)
 	unsigned long flags;
 
 	spin_lock_irqsave(&atimer->lock, flags);
+	if (list_empty(&ti->running_list))
+		atimer->wc = ct_xfitimer_get_wc(atimer);
 	list_add(&ti->running_list, &atimer->running_head);
 	spin_unlock_irqrestore(&atimer->lock, flags);
 	ct_xfitimer_update(atimer);
@@ -396,12 +416,12 @@ struct ct_timer *ct_timer_new(struct ct_atc *atc)
 	atimer->atc = atc;
 	hw = atc->hw;
 	if (!USE_SYSTEM_TIMER && hw->set_timer_irq) {
-		printk(KERN_INFO "ctxfi: Use xfi-native timer\n");
+		snd_printd(KERN_INFO "ctxfi: Use xfi-native timer\n");
 		atimer->ops = &ct_xfitimer_ops;
 		hw->irq_callback_data = atimer;
 		hw->irq_callback = ct_timer_interrupt;
 	} else {
-		printk(KERN_INFO "ctxfi: Use system timer\n");
+		snd_printd(KERN_INFO "ctxfi: Use system timer\n");
 		atimer->ops = &ct_systimer_ops;
 	}
 	return atimer;
-- 
cgit v0.10.2


From a89ab11454b476d2d9e8a10f903360a62e21bac8 Mon Sep 17 00:00:00 2001
From: Peter Ma <pma@mediamatech.com>
Date: Mon, 8 Jun 2009 12:54:02 +0200
Subject: avr32: Change Atmel ATNGW100 config to add choice of add-on board

Signed-off-by: Peter Ma <pma@mediamatech.com>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/boards/atngw100/Kconfig b/arch/avr32/boards/atngw100/Kconfig
index b3f9947..f41c533 100644
--- a/arch/avr32/boards/atngw100/Kconfig
+++ b/arch/avr32/boards/atngw100/Kconfig
@@ -2,8 +2,15 @@
 
 if BOARD_ATNGW100
 
+choice
+	prompt "Select an NGW100 add-on board to support"
+	default BOARD_ATNGW100_ADDON_NONE
+
+config BOARD_ATNGW100_ADDON_NONE
+	bool "None"
+
 config BOARD_ATNGW100_EVKLCD10X
-	bool "Add support for EVKLCD10X addon board"
+	bool "EVKLCD10X addon board"
 	help
 	  This enables support for the EVKLCD100 (QVGA) or EVKLCD101 (VGA)
 	  addon board for the NGW100. By enabling this the LCD controller and
@@ -14,7 +21,7 @@ config BOARD_ATNGW100_EVKLCD10X
 	  The MCI pins can be reenabled by editing the "add device function" but
 	  this may break the setup for other displays that use these pins.
 
-	  Choose 'Y' here if you have a EVKLCD100/101 connected to the NGW100.
+endchoice
 
 choice
 	prompt "LCD panel resolution on EVKLCD10X"
-- 
cgit v0.10.2


From 4024533e60787a5507818b0c0fdb44ddb522cdf5 Mon Sep 17 00:00:00 2001
From: Peter Ma <pma@mediamatech.com>
Date: Mon, 8 Jun 2009 12:55:35 +0200
Subject: avr32: Add support for Mediama RMTx add-on board for ATNGW100

Signed-off-by: Peter Ma <pma@mediamatech.com>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/boards/atngw100/Kconfig b/arch/avr32/boards/atngw100/Kconfig
index f41c533..be27a02 100644
--- a/arch/avr32/boards/atngw100/Kconfig
+++ b/arch/avr32/boards/atngw100/Kconfig
@@ -21,6 +21,18 @@ config BOARD_ATNGW100_EVKLCD10X
 	  The MCI pins can be reenabled by editing the "add device function" but
 	  this may break the setup for other displays that use these pins.
 
+config BOARD_ATNGW100_MRMT
+	bool "Mediama RMT1/2 add-on board"
+	help
+	  This enables support for the Mediama RMT1 or RMT2 board.
+	  RMT provides LCD support, AC97 codec and other
+	  optional peripherals to the Atmel NGW100.
+
+	  This choice disables the detect pin and the write-protect pin for the
+	  MCI platform device, since it conflicts with the LCD platform device.
+	  The MCI pins can be reenabled by editing the "add device function" but
+	  this may break the setup for other displays that use these pins.
+
 endchoice
 
 choice
@@ -39,4 +51,8 @@ config BOARD_ATNGW100_EVKLCD10X_POW_QVGA
 
 endchoice
 
+if BOARD_ATNGW100_MRMT
+source	"arch/avr32/boards/atngw100/Kconfig_mrmt"
+endif
+
 endif	# BOARD_ATNGW100
diff --git a/arch/avr32/boards/atngw100/Kconfig_mrmt b/arch/avr32/boards/atngw100/Kconfig_mrmt
new file mode 100644
index 0000000..9a199a2
--- /dev/null
+++ b/arch/avr32/boards/atngw100/Kconfig_mrmt
@@ -0,0 +1,80 @@
+# RMT for NGW100 customization
+
+choice
+	prompt "RMT Version"
+	help
+	  Select the RMTx board version.
+
+config BOARD_MRMT_REV1
+	bool "RMT1"
+config BOARD_MRMT_REV2
+	bool "RMT2"
+
+endchoice
+
+config BOARD_MRMT_AC97
+	bool "Enable AC97 CODEC"
+	help
+	  Enable the UCB1400 AC97 CODEC driver.
+
+choice
+	prompt "Touchscreen Driver"
+	default BOARD_MRMT_ADS7846_TS
+
+config BOARD_MRMT_UCB1400_TS
+	bool "Use UCB1400 Touchscreen"
+
+config BOARD_MRMT_ADS7846_TS
+	bool "Use ADS7846 Touchscreen"
+
+endchoice
+
+choice
+	prompt "RMTx LCD Selection"
+	default BOARD_MRMT_LCD_DISABLE
+
+config BOARD_MRMT_LCD_DISABLE
+	bool "LCD Disabled"
+
+config BOARD_MRMT_LCD_LQ043T3DX0X
+	bool "Sharp LQ043T3DX0x or compatible"
+	help
+	  If using RMT2, be sure to load the resistor pack selectors accordingly
+
+if BOARD_MRMT_REV2
+config BOARD_MRMT_LCD_KWH043GM08
+	bool "Formike KWH043GM08 or compatible"
+	help
+	  Be sure to load the RMT2 resistor pack selectors accordingly
+endif
+
+endchoice
+
+if !BOARD_MRMT_LCD_DISABLE
+config BOARD_MRMT_BL_PWM
+	bool "Use PWM control for LCD Backlight"
+	help
+		Use PWM driver for controlling LCD Backlight.
+		Otherwise, LCD Backlight is always on.
+endif
+
+config BOARD_MRMT_RTC_I2C
+	bool "Use External RTC on I2C Bus"
+	help
+		RMT1 has an optional RTC device on the I2C bus.
+		It is a SII S35390A.  Be sure to select the
+		matching RTC driver.
+
+choice
+	prompt "Wireless Module on ttyS2"
+	default BOARD_MRMT_WIRELESS_ZB
+
+config BOARD_MRMT_WIRELESS_ZB
+	bool "Use ZigBee/802.15.4 Module"
+
+config BOARD_MRMT_WIRELESS_BT
+	bool "Use Bluetooth (HCI) Module"
+
+config BOARD_MRMT_WIRELESS_NONE
+	bool "Not Installed"
+endchoice
diff --git a/arch/avr32/boards/atngw100/Makefile b/arch/avr32/boards/atngw100/Makefile
index 6376f53..f4ebe42 100644
--- a/arch/avr32/boards/atngw100/Makefile
+++ b/arch/avr32/boards/atngw100/Makefile
@@ -1,2 +1,3 @@
 obj-y					+= setup.o flash.o
 obj-$(CONFIG_BOARD_ATNGW100_EVKLCD10X)	+= evklcd10x.o
+obj-$(CONFIG_BOARD_ATNGW100_MRMT)	+= mrmt.o
diff --git a/arch/avr32/boards/atngw100/mrmt.c b/arch/avr32/boards/atngw100/mrmt.c
new file mode 100644
index 0000000..bf78e51
--- /dev/null
+++ b/arch/avr32/boards/atngw100/mrmt.c
@@ -0,0 +1,373 @@
+/*
+ * Board-specific setup code for Remote Media Terminal 1 (RMT1)
+ * add-on board for the ATNGW100 Network Gateway
+ *
+ * Copyright (C) 2008 Mediama Technologies
+ * Based on ATNGW100 Network Gateway (Copyright (C) Atmel)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/linkage.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/fb.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+#include <linux/atmel_serial.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/ads7846.h>
+
+#include <video/atmel_lcdc.h>
+#include <sound/atmel-ac97c.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+
+#include <mach/at32ap700x.h>
+#include <mach/board.h>
+#include <mach/init.h>
+#include <mach/portmux.h>
+
+/* Define board-specifoic GPIO assignments */
+#define PIN_LCD_BL	GPIO_PIN_PA(28)
+#define PWM_CH_BL	0	/* Must match with GPIO pin definition */
+#define PIN_LCD_DISP	GPIO_PIN_PA(31)
+#define	PIN_AC97_RST_N	GPIO_PIN_PA(30)
+#define PB_EXTINT_BASE	25
+#define TS_IRQ		0
+#define PIN_TS_EXTINT	GPIO_PIN_PB(PB_EXTINT_BASE+TS_IRQ)
+#define PIN_PB_LEFT	GPIO_PIN_PB(11)
+#define PIN_PB_RIGHT	GPIO_PIN_PB(12)
+#define PIN_PWR_SW_N	GPIO_PIN_PB(14)
+#define PIN_PWR_ON	GPIO_PIN_PB(13)
+#define PIN_ZB_RST_N	GPIO_PIN_PA(21)
+#define PIN_BT_RST	GPIO_PIN_PA(22)
+#define PIN_LED_SYS	GPIO_PIN_PA(16)
+#define PIN_LED_A	GPIO_PIN_PA(19)
+#define PIN_LED_B	GPIO_PIN_PE(19)
+
+#ifdef CONFIG_BOARD_MRMT_LCD_LQ043T3DX0X
+/* Sharp LQ043T3DX0x (or compatible) panel */
+static struct fb_videomode __initdata lcd_fb_modes[] = {
+	{
+		.name		= "480x272 @ 59.94Hz",
+		.refresh	= 59.94,
+		.xres		= 480,		.yres		= 272,
+		.pixclock	= KHZ2PICOS(9000),
+
+		.left_margin	= 2,		.right_margin	= 2,
+		.upper_margin	= 3,		.lower_margin	= 9,
+		.hsync_len	= 41,		.vsync_len	= 1,
+
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+};
+
+static struct fb_monspecs __initdata lcd_fb_default_monspecs = {
+	.manufacturer		= "SHA",
+	.monitor		= "LQ043T3DX02",
+	.modedb			= lcd_fb_modes,
+	.modedb_len		= ARRAY_SIZE(lcd_fb_modes),
+	.hfmin			= 14915,
+	.hfmax			= 17638,
+	.vfmin			= 53,
+	.vfmax			= 61,
+	.dclkmax		= 9260000,
+};
+
+static struct atmel_lcdfb_info __initdata rmt_lcdc_data = {
+	.default_bpp		= 24,
+	.default_dmacon		= ATMEL_LCDC_DMAEN | ATMEL_LCDC_DMA2DEN,
+	.default_lcdcon2	= (ATMEL_LCDC_DISTYPE_TFT
+				   | ATMEL_LCDC_CLKMOD_ALWAYSACTIVE
+				   | ATMEL_LCDC_INVCLK_NORMAL
+				   | ATMEL_LCDC_MEMOR_BIG),
+	.lcd_wiring_mode	= ATMEL_LCDC_WIRING_RGB,
+	.default_monspecs	= &lcd_fb_default_monspecs,
+	.guard_time		= 2,
+};
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_LCD_KWH043GM08
+/* Sharp KWH043GM08-Fxx (or compatible) panel */
+static struct fb_videomode __initdata lcd_fb_modes[] = {
+	{
+		.name		= "480x272 @ 59.94Hz",
+		.refresh	= 59.94,
+		.xres		= 480,		.yres		= 272,
+		.pixclock	= KHZ2PICOS(9000),
+
+		.left_margin	= 2,		.right_margin	= 2,
+		.upper_margin	= 3,		.lower_margin	= 9,
+		.hsync_len	= 41,		.vsync_len	= 1,
+
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+};
+
+static struct fb_monspecs __initdata lcd_fb_default_monspecs = {
+	.manufacturer		= "FOR",
+	.monitor		= "KWH043GM08",
+	.modedb			= lcd_fb_modes,
+	.modedb_len		= ARRAY_SIZE(lcd_fb_modes),
+	.hfmin			= 14915,
+	.hfmax			= 17638,
+	.vfmin			= 53,
+	.vfmax			= 61,
+	.dclkmax		= 9260000,
+};
+
+static struct atmel_lcdfb_info __initdata rmt_lcdc_data = {
+	.default_bpp		= 24,
+	.default_dmacon		= ATMEL_LCDC_DMAEN | ATMEL_LCDC_DMA2DEN,
+	.default_lcdcon2	= (ATMEL_LCDC_DISTYPE_TFT
+				   | ATMEL_LCDC_CLKMOD_ALWAYSACTIVE
+				   | ATMEL_LCDC_INVCLK_INVERTED
+				   | ATMEL_LCDC_MEMOR_BIG),
+	.lcd_wiring_mode	= ATMEL_LCDC_WIRING_RGB,
+	.default_monspecs	= &lcd_fb_default_monspecs,
+	.guard_time		= 2,
+};
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_AC97
+static struct ac97c_platform_data __initdata ac97c0_data = {
+	.reset_pin		= PIN_AC97_RST_N,
+};
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_UCB1400_TS
+/* NOTE: IRQ assignment relies on kernel module parameter */
+static struct platform_device rmt_ts_device = {
+	.name	= "ucb1400_ts",
+	.id	= -1,
+	}
+};
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_BL_PWM
+/* PWM LEDs: LCD Backlight, etc */
+static struct gpio_led rmt_pwm_led[] = {
+	/* here the "gpio" is actually a PWM channel */
+	{ .name = "backlight",	.gpio = PWM_CH_BL, },
+};
+
+static struct gpio_led_platform_data rmt_pwm_led_data = {
+	.num_leds	= ARRAY_SIZE(rmt_pwm_led),
+	.leds		= rmt_pwm_led,
+};
+
+static struct platform_device rmt_pwm_led_dev = {
+	.name		= "leds-atmel-pwm",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &rmt_pwm_led_data,
+	},
+};
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_ADS7846_TS
+static int ads7846_pendown_state(void)
+{
+	return !gpio_get_value( PIN_TS_EXTINT );	/* PENIRQ.*/
+}
+
+static struct ads7846_platform_data ads_info = {
+	.model				= 7846,
+	.keep_vref_on			= 0,	/* Use external VREF pin */
+	.vref_delay_usecs		= 0,
+	.vref_mv			= 3300,	/* VREF = 3.3V */
+	.settle_delay_usecs		= 800,
+	.penirq_recheck_delay_usecs	= 800,
+	.x_plate_ohms			= 750,
+	.y_plate_ohms			= 300,
+	.pressure_max			= 4096,
+	.debounce_max			= 1,
+	.debounce_rep			= 0,
+	.debounce_tol			= (~0),
+	.get_pendown_state		= ads7846_pendown_state,
+	.filter				= NULL,
+	.filter_init			= NULL,
+};
+
+static struct spi_board_info spi01_board_info[] __initdata = {
+	{
+		.modalias	= "ads7846",
+		.max_speed_hz	= 31250*26,
+		.bus_num	= 0,
+		.chip_select	= 1,
+		.platform_data	= &ads_info,
+		.irq		= AT32_EXTINT(TS_IRQ),
+	},
+};
+#endif
+
+/* GPIO Keys: left, right, power, etc */
+static const struct gpio_keys_button rmt_gpio_keys_buttons[] = {
+	[0] = {
+		.type		= EV_KEY,
+		.code		= KEY_POWER,
+		.gpio		= PIN_PWR_SW_N,
+		.active_low	= 1,
+		.desc		= "power button",
+	},
+	[1] = {
+		.type		= EV_KEY,
+		.code		= KEY_LEFT,
+		.gpio		= PIN_PB_LEFT,
+		.active_low	= 1,
+		.desc		= "left button",
+	},
+	[2] = {
+		.type		= EV_KEY,
+		.code		= KEY_RIGHT,
+		.gpio		= PIN_PB_RIGHT,
+		.active_low	= 1,
+		.desc		= "right button",
+	},
+};
+
+static const struct gpio_keys_platform_data rmt_gpio_keys_data = {
+	.nbuttons =	ARRAY_SIZE(rmt_gpio_keys_buttons),
+	.buttons =	(void *) rmt_gpio_keys_buttons,
+};
+
+static struct platform_device rmt_gpio_keys = {
+	.name =		"gpio-keys",
+	.id =		-1,
+	.dev = {
+		.platform_data = (void *) &rmt_gpio_keys_data,
+	}
+};
+
+#ifdef CONFIG_BOARD_MRMT_RTC_I2C
+static struct i2c_board_info __initdata mrmt1_i2c_rtc = {
+	I2C_BOARD_INFO("s35390a", 0x30),
+	.irq		= 0,
+};
+#endif
+
+static void mrmt_power_off(void)
+{
+	/* PWR_ON=0 will force power off */
+	gpio_set_value( PIN_PWR_ON, 0 );
+}
+
+static int __init mrmt1_init(void)
+{
+	gpio_set_value( PIN_PWR_ON, 1 );	/* Ensure PWR_ON is enabled */
+
+	pm_power_off = mrmt_power_off;
+
+	/* Setup USARTS (other than console) */
+	at32_map_usart(2, 1, 0);	/* USART 2: /dev/ttyS1, RMT1:DB9M */
+	at32_map_usart(3, 2, ATMEL_USART_RTS | ATMEL_USART_CTS);
+			/* USART 3: /dev/ttyS2, RMT1:Wireless, w/ RTS/CTS */
+	at32_add_device_usart(1);
+	at32_add_device_usart(2);
+
+	/* Select GPIO Key pins */
+	at32_select_gpio( PIN_PWR_SW_N, AT32_GPIOF_DEGLITCH);
+	at32_select_gpio( PIN_PB_LEFT, AT32_GPIOF_DEGLITCH);
+	at32_select_gpio( PIN_PB_RIGHT, AT32_GPIOF_DEGLITCH);
+	platform_device_register(&rmt_gpio_keys);
+
+#ifdef CONFIG_BOARD_MRMT_RTC_I2C
+	i2c_register_board_info(0, &mrmt1_i2c_rtc, 1);
+#endif
+
+#ifndef CONFIG_BOARD_MRMT_LCD_DISABLE
+	/* User "alternate" LCDC inferface on Port E & D */
+	/* NB: exclude LCDC_CC pin, as NGW100 reserves it for other use */
+	at32_add_device_lcdc(0, &rmt_lcdc_data,
+		fbmem_start, fbmem_size,
+		(ATMEL_LCDC_ALT_24BIT | ATMEL_LCDC_PE_DVAL ) );
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_AC97
+	at32_add_device_ac97c(0, &ac97c0_data, AC97C_BOTH);
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_ADS7846_TS
+	/* Select the Touchscreen interrupt pin mode */
+	at32_select_periph( GPIO_PIOB_BASE, 1 << (PB_EXTINT_BASE+TS_IRQ),
+			GPIO_PERIPH_A, AT32_GPIOF_DEGLITCH);
+	set_irq_type( AT32_EXTINT(TS_IRQ), IRQ_TYPE_EDGE_FALLING );
+	spi_register_board_info(spi01_board_info,ARRAY_SIZE(spi01_board_info));
+#endif
+
+#ifdef CONFIG_BOARD_MRMT_UCB1400_TS
+	/* Select the Touchscreen interrupt pin mode */
+	at32_select_periph( GPIO_PIOB_BASE, 1 << (PB_EXTINT_BASE+TS_IRQ),
+			GPIO_PERIPH_A, AT32_GPIOF_DEGLITCH);
+	platform_device_register(&rmt_ts_device);
+#endif
+
+	at32_select_gpio( PIN_LCD_DISP, AT32_GPIOF_OUTPUT );
+	gpio_request( PIN_LCD_DISP, "LCD_DISP" );
+	gpio_direction_output( PIN_LCD_DISP, 0 );	/* LCD DISP */
+#ifdef CONFIG_BOARD_MRMT_LCD_DISABLE
+	/* Keep Backlight and DISP off */
+	at32_select_gpio( PIN_LCD_BL, AT32_GPIOF_OUTPUT );
+	gpio_request( PIN_LCD_BL, "LCD_BL" );
+	gpio_direction_output( PIN_LCD_BL, 0 );		/* Backlight */
+#else
+	gpio_set_value( PIN_LCD_DISP, 1 );	/* DISP asserted first */
+#ifdef CONFIG_BOARD_MRMT_BL_PWM
+	/* Use PWM for Backlight controls */
+	at32_add_device_pwm(1 << PWM_CH_BL);
+	platform_device_register(&rmt_pwm_led_dev);
+#else
+	/* Backlight always on */
+	udelay( 1 );
+	at32_select_gpio( PIN_LCD_BL, AT32_GPIOF_OUTPUT );
+	gpio_request( PIN_LCD_BL, "LCD_BL" );
+	gpio_direction_output( PIN_LCD_BL, 1 );
+#endif
+#endif
+
+	/* Make sure BT and Zigbee modules in reset */
+	at32_select_gpio( PIN_BT_RST, AT32_GPIOF_OUTPUT );
+	gpio_request( PIN_BT_RST, "BT_RST" );
+	gpio_direction_output( PIN_BT_RST, 1 );
+	/* BT Module in Reset */
+
+	at32_select_gpio( PIN_ZB_RST_N, AT32_GPIOF_OUTPUT );
+	gpio_request( PIN_ZB_RST_N, "ZB_RST_N" );
+	gpio_direction_output( PIN_ZB_RST_N, 0 );
+	/* XBee Module in Reset */
+
+#ifdef CONFIG_BOARD_MRMT_WIRELESS_ZB
+	udelay( 1000 );
+	/* Unreset the XBee Module */
+	gpio_set_value( PIN_ZB_RST_N, 1 );
+#endif
+#ifdef CONFIG_BOARD_MRMT_WIRELESS_BT
+	udelay( 1000 );
+	/* Unreset the BT Module */
+	gpio_set_value( PIN_BT_RST, 0 );
+#endif
+
+	return 0;
+}
+arch_initcall(mrmt1_init);
+
+static int __init mrmt1_early_init(void)
+{
+	/* To maintain power-on signal in case boot loader did not already */
+	at32_select_gpio( PIN_PWR_ON, AT32_GPIOF_OUTPUT );
+	gpio_request( PIN_PWR_ON, "PIN_PWR_ON" );
+	gpio_direction_output( PIN_PWR_ON, 1 );
+
+	return 0;
+}
+core_initcall(mrmt1_early_init);
diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index 5b022aa..bc299fb 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -56,8 +56,13 @@ static struct spi_board_info spi0_board_info[] __initdata = {
 static struct mci_platform_data __initdata mci0_data = {
 	.slot[0] = {
 		.bus_width	= 4,
+#if defined(CONFIG_BOARD_ATNGW100_EVKLCD10X) || defined(CONFIG_BOARD_ATNGW100_MRMT1)
+		.detect_pin     = GPIO_PIN_NONE,
+		.wp_pin         = GPIO_PIN_NONE,
+#else
 		.detect_pin	= GPIO_PIN_PC(25),
 		.wp_pin		= GPIO_PIN_PE(0),
+#endif
 	},
 };
 
diff --git a/arch/avr32/configs/atngw100_mrmt_defconfig b/arch/avr32/configs/atngw100_mrmt_defconfig
new file mode 100644
index 0000000..17b0307
--- /dev/null
+++ b/arch/avr32/configs/atngw100_mrmt_defconfig
@@ -0,0 +1,1363 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.30-rc1
+# Wed Jun  3 00:24:53 2009
+#
+CONFIG_AVR32=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_BUG=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+# CONFIG_BASE_FULL is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_SLUB_DEBUG is not set
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_CLK=y
+# CONFIG_SLOW_WORK is not set
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=1
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IOSCHED_AS is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# System Type and features
+#
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_SUBARCH_AVR32B=y
+CONFIG_MMU=y
+CONFIG_PERFORMANCE_COUNTERS=y
+CONFIG_PLATFORM_AT32AP=y
+CONFIG_CPU_AT32AP700X=y
+CONFIG_CPU_AT32AP7000=y
+# CONFIG_BOARD_ATSTK1000 is not set
+CONFIG_BOARD_ATNGW100=y
+# CONFIG_BOARD_HAMMERHEAD is not set
+# CONFIG_BOARD_FAVR_32 is not set
+# CONFIG_BOARD_MERISC is not set
+# CONFIG_BOARD_MIMC200 is not set
+# CONFIG_BOARD_ATNGW100_ADDON_NONE is not set
+# CONFIG_BOARD_ATNGW100_EVKLCD10X is not set
+CONFIG_BOARD_ATNGW100_MRMT=y
+CONFIG_BOARD_MRMT_REV1=y
+# CONFIG_BOARD_MRMT_REV2 is not set
+CONFIG_BOARD_MRMT_AC97=y
+# CONFIG_BOARD_MRMT_UCB1400_TS is not set
+CONFIG_BOARD_MRMT_ADS7846_TS=y
+# CONFIG_BOARD_MRMT_LCD_DISABLE is not set
+CONFIG_BOARD_MRMT_LCD_LQ043T3DX0X=y
+# CONFIG_BOARD_MRMT_LCD_KWH043GM08 is not set
+CONFIG_BOARD_MRMT_BL_PWM=y
+CONFIG_BOARD_MRMT_RTC_I2C=y
+CONFIG_BOARD_MRMT_WIRELESS_ZB=y
+# CONFIG_BOARD_MRMT_WIRELESS_BT is not set
+# CONFIG_BOARD_MRMT_WIRELESS_NONE is not set
+CONFIG_LOADER_U_BOOT=y
+
+#
+# Atmel AVR32 AP options
+#
+# CONFIG_AP700X_32_BIT_SMC is not set
+CONFIG_AP700X_16_BIT_SMC=y
+# CONFIG_AP700X_8_BIT_SMC is not set
+CONFIG_LOAD_ADDRESS=0x10000000
+CONFIG_ENTRY_ADDRESS=0x90000000
+CONFIG_PHYS_OFFSET=0x10000000
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_QUICKLIST=y
+# CONFIG_HAVE_ARCH_BOOTMEM is not set
+# CONFIG_ARCH_HAVE_MEMORY_PRESENT is not set
+# CONFIG_NEED_NODE_MEMMAP_SIZE is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+# CONFIG_ARCH_SPARSEMEM_ENABLE is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_NR_QUICK=2
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+# CONFIG_OWNERSHIP_TRACE is not set
+# CONFIG_NMI_DEBUGGING is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_CMDLINE=""
+
+#
+# Power management options
+#
+CONFIG_PM=y
+# CONFIG_PM_DEBUG is not set
+# CONFIG_SUSPEND is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_AT32AP=y
+
+#
+# Bus options
+#
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+# CONFIG_BT_SCO is not set
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+# CONFIG_BT_BNEP is not set
+CONFIG_BT_HIDP=m
+
+#
+# Bluetooth device drivers
+#
+# CONFIG_BT_HCIBTSDIO is not set
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+# CONFIG_BT_HCIUART_LL is not set
+# CONFIG_BT_HCIVHCI is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+CONFIG_MTD_DATAFLASH=y
+# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
+# CONFIG_MTD_DATAFLASH_OTP is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+CONFIG_MISC_DEVICES=y
+CONFIG_ATMEL_PWM=y
+# CONFIG_ATMEL_TCLIB is not set
+# CONFIG_ICS932S401 is not set
+# CONFIG_ATMEL_SSC is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_AT25 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+# CONFIG_SMSC_PHY is not set
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_MDIO_BITBANG is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+CONFIG_MACB=y
+# CONFIG_ENC28J60 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+CONFIG_KEYBOARD_GPIO=y
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_ADS7846=m
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_WM97XX is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_ATMEL=y
+CONFIG_SERIAL_ATMEL_CONSOLE=y
+CONFIG_SERIAL_ATMEL_PDC=y
+# CONFIG_SERIAL_ATMEL_TTYAT is not set
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_ALGOBIT=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+CONFIG_I2C_GPIO=y
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_ATMEL=y
+# CONFIG_SPI_BITBANG is not set
+# CONFIG_SPI_GPIO is not set
+
+#
+# SPI Protocol Masters
+#
+CONFIG_SPI_SPIDEV=y
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
+# CONFIG_SENSORS_ADCXX is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7473 is not set
+# CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM70 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
+# CONFIG_SENSORS_MAX1111 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX6650 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_DME1737 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_THMC50 is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_SENSORS_LIS3_SPI is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_AT32AP700X_WDT=y
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_UCB1400_CORE is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_PMIC_DA903X is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+CONFIG_FB_ATMEL=y
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_LCD_LTV350QV is not set
+# CONFIG_LCD_ILI9320 is not set
+# CONFIG_LCD_TDO24M is not set
+# CONFIG_LCD_VGG2432A4 is not set
+# CONFIG_LCD_PLATFORM is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_ATMEL_LCDC is not set
+# CONFIG_BACKLIGHT_ATMEL_PWM is not set
+CONFIG_BACKLIGHT_GENERIC=y
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE is not set
+# CONFIG_LOGO is not set
+CONFIG_SOUND=m
+CONFIG_SOUND_OSS_CORE=y
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+# CONFIG_SND_SEQUENCER is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
+# CONFIG_SND_DYNAMIC_MINORS is not set
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_VMASTER=y
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_DRIVERS=y
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+# CONFIG_SND_AC97_POWER_SAVE is not set
+
+#
+# Atmel devices (AVR32 and AT91)
+#
+# CONFIG_SND_ATMEL_ABDAC is not set
+CONFIG_SND_ATMEL_AC97C=m
+# CONFIG_SND_SPI is not set
+# CONFIG_SND_SOC is not set
+# CONFIG_SOUND_PRIME is not set
+CONFIG_AC97_BUS=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID=y
+# CONFIG_HID_DEBUG is not set
+# CONFIG_HIDRAW is not set
+# CONFIG_HID_PID is not set
+
+#
+# Special HID drivers
+#
+# CONFIG_HID_APPLE is not set
+CONFIG_USB_SUPPORT=y
+# CONFIG_USB_ARCH_HAS_HCD is not set
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+# CONFIG_USB_MUSB_HDRC is not set
+# CONFIG_USB_GADGET_MUSB_HDRC is not set
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+CONFIG_USB_GADGET=m
+# CONFIG_USB_GADGET_DEBUG is not set
+CONFIG_USB_GADGET_DEBUG_FILES=y
+# CONFIG_USB_GADGET_DEBUG_FS is not set
+CONFIG_USB_GADGET_VBUS_DRAW=2
+CONFIG_USB_GADGET_SELECTED=y
+# CONFIG_USB_GADGET_AT91 is not set
+CONFIG_USB_GADGET_ATMEL_USBA=y
+CONFIG_USB_ATMEL_USBA=m
+# CONFIG_USB_GADGET_FSL_USB2 is not set
+# CONFIG_USB_GADGET_LH7A40X is not set
+# CONFIG_USB_GADGET_OMAP is not set
+# CONFIG_USB_GADGET_PXA25X is not set
+# CONFIG_USB_GADGET_PXA27X is not set
+# CONFIG_USB_GADGET_S3C2410 is not set
+# CONFIG_USB_GADGET_IMX is not set
+# CONFIG_USB_GADGET_M66592 is not set
+# CONFIG_USB_GADGET_AMD5536UDC is not set
+# CONFIG_USB_GADGET_FSL_QE is not set
+# CONFIG_USB_GADGET_CI13XXX is not set
+# CONFIG_USB_GADGET_NET2280 is not set
+# CONFIG_USB_GADGET_GOKU is not set
+# CONFIG_USB_GADGET_DUMMY_HCD is not set
+CONFIG_USB_GADGET_DUALSPEED=y
+# CONFIG_USB_ZERO is not set
+# CONFIG_USB_ETH is not set
+# CONFIG_USB_GADGETFS is not set
+CONFIG_USB_FILE_STORAGE=m
+# CONFIG_USB_FILE_STORAGE_TEST is not set
+CONFIG_USB_G_SERIAL=m
+# CONFIG_USB_MIDI_GADGET is not set
+# CONFIG_USB_G_PRINTER is not set
+# CONFIG_USB_CDC_COMPOSITE is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_NOP_USB_XCEIV is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+# CONFIG_MMC_SDHCI is not set
+CONFIG_MMC_ATMELMCI=y
+# CONFIG_MMC_ATMELMCI_DMA is not set
+# CONFIG_MMC_SPI is not set
+# CONFIG_MEMSTICK is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+
+#
+# LED drivers
+#
+CONFIG_LEDS_ATMEL_PWM=y
+# CONFIG_LEDS_PCA9532 is not set
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_GPIO_PLATFORM=y
+# CONFIG_LEDS_LP5521 is not set
+# CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_DAC124S085 is not set
+# CONFIG_LEDS_BD2802 is not set
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
+# CONFIG_LEDS_TRIGGER_GPIO is not set
+# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_RTC_LIB=m
+CONFIG_RTC_CLASS=m
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+CONFIG_RTC_DRV_S35390A=m
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+
+#
+# SPI RTC drivers
+#
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_DS1390 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_R9701 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+# CONFIG_RTC_DRV_DS3234 is not set
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_AT32AP700X=m
+CONFIG_DMADEVICES=y
+
+#
+# DMA Devices
+#
+CONFIG_DW_DMAC=y
+CONFIG_DMA_ENGINE=y
+
+#
+# DMA Clients
+#
+# CONFIG_NET_DMA is not set
+# CONFIG_ASYNC_TX_DMA is not set
+# CONFIG_DMATEST is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_UIO=y
+# CONFIG_UIO_PDRV is not set
+# CONFIG_UIO_PDRV_GENIRQ is not set
+# CONFIG_UIO_SMX is not set
+# CONFIG_UIO_SERCOS3 is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+# CONFIG_EXT2_FS_POSIX_ACL is not set
+# CONFIG_EXT2_FS_SECURITY is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT4_FS is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=850
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+CONFIG_NTFS_RW=y
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+# CONFIG_PROC_KCORE is not set
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_CONFIGFS_FS=y
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+CONFIG_ROOT_NFS=y
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp437"
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+# CONFIG_CIFS_STATS2 is not set
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+CONFIG_NLS_CODEPAGE_850=y
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=y
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_SAMPLES is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_CRC_CCITT=y
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_GENERIC_ALLOCATOR=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
-- 
cgit v0.10.2


From c4865679df5453d1bc94c534f4d94a364c0546df Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 12:57:17 +0200
Subject: ALSA: ca0106 - Fix master volume scale

The master volume dB scale was wrongly defined as 0.50dB setp while
it must be 0.25dB step.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index 7724252..c8c6f43 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -739,7 +739,7 @@ static int __devinit rename_ctl(struct snd_card *card, const char *src, const ch
 	} while (0)
 
 static __devinitdata
-DECLARE_TLV_DB_SCALE(snd_ca0106_master_db_scale, -6375, 50, 1);
+DECLARE_TLV_DB_SCALE(snd_ca0106_master_db_scale, -6375, 25, 1);
 
 static char *slave_vols[] __devinitdata = {
 	"Analog Front Playback Volume",
-- 
cgit v0.10.2


From d436dd063be605dc29f17b2cb0b99a852db89bed Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 13:50:18 +0200
Subject: ALSA: ctxfi - Make volume controls more intuitive

Change the volume control to dB scale (as the raw data seems so).
Also added the TLV dB-scale information.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
index fac783f..796156e 100644
--- a/sound/pci/ctxfi/ctmixer.c
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -18,11 +18,12 @@
 
 #include "ctmixer.h"
 #include "ctamixer.h"
+#include <linux/slab.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/asoundef.h>
 #include <sound/pcm.h>
-#include <linux/slab.h>
+#include <sound/tlv.h>
 
 enum CT_SUM_CTL {
 	SUM_IN_F,
@@ -292,6 +293,7 @@ set_switch_state(struct ct_mixer *mixer,
 		mixer->switch_state &= ~(0x1 << (type - SWH_MIXER_START));
 }
 
+#if 0 /* not used */
 /* Map integer value ranging from 0 to 65535 to 14-bit float value ranging
  * from 2^-6 to (1+1023/1024) */
 static unsigned int uint16_to_float14(unsigned int x)
@@ -331,6 +333,12 @@ static unsigned int float14_to_uint16(unsigned int x)
 
 	return x;
 }
+#endif /* not used */
+
+#define VOL_SCALE	0x1c
+#define VOL_MAX		0x100
+
+static const DECLARE_TLV_DB_SCALE(ct_vol_db_scale, -6400, 25, 1);
 
 static int ct_alsa_mix_volume_info(struct snd_kcontrol *kcontrol,
 				   struct snd_ctl_elem_info *uinfo)
@@ -338,8 +346,7 @@ static int ct_alsa_mix_volume_info(struct snd_kcontrol *kcontrol,
 	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
 	uinfo->count = 2;
 	uinfo->value.integer.min = 0;
-	uinfo->value.integer.max = 43690;
-	uinfo->value.integer.step = 128;
+	uinfo->value.integer.max = VOL_MAX;
 
 	return 0;
 }
@@ -349,15 +356,18 @@ static int ct_alsa_mix_volume_get(struct snd_kcontrol *kcontrol,
 {
 	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
 	enum CT_AMIXER_CTL type = get_amixer_index(kcontrol->private_value);
-	struct amixer *amixer = NULL;
-	int i = 0;
+	struct amixer *amixer;
+	int i, val;
 
 	for (i = 0; i < 2; i++) {
 		amixer = ((struct ct_mixer *)atc->mixer)->
 						amixers[type*CHN_NUM+i];
-		/* Convert 14-bit float-point scale to 16-bit integer volume */
-		ucontrol->value.integer.value[i] =
-		(float14_to_uint16(amixer->ops->get_scale(amixer)) & 0xffff);
+		val = amixer->ops->get_scale(amixer) / VOL_SCALE;
+		if (val < 0)
+			val = 0;
+		else if (val > VOL_MAX)
+			val = VOL_MAX;
+		ucontrol->value.integer.value[i] = val;
 	}
 
 	return 0;
@@ -369,16 +379,19 @@ static int ct_alsa_mix_volume_put(struct snd_kcontrol *kcontrol,
 	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
 	struct ct_mixer *mixer = atc->mixer;
 	enum CT_AMIXER_CTL type = get_amixer_index(kcontrol->private_value);
-	struct amixer *amixer = NULL;
-	int i = 0, j = 0, change = 0, val = 0;
+	struct amixer *amixer;
+	int i, j, val, oval, change = 0;
 
 	for (i = 0; i < 2; i++) {
-		/* Convert 16-bit integer volume to 14-bit float-point scale */
-		val = (ucontrol->value.integer.value[i] & 0xffff);
+		val = ucontrol->value.integer.value[i];
+		if (val < 0)
+			val = 0;
+		else if (val > VOL_MAX)
+			val = VOL_MAX;
+		val *= VOL_SCALE;
 		amixer = mixer->amixers[type*CHN_NUM+i];
-		if ((float14_to_uint16(amixer->ops->get_scale(amixer)) & 0xff80)
-				!= (val & 0xff80)) {
-			val = uint16_to_float14(val);
+		oval = amixer->ops->get_scale(amixer);
+		if (val != oval) {
 			amixer->ops->set_scale(amixer, val);
 			amixer->ops->commit_write(amixer);
 			change = 1;
@@ -398,11 +411,13 @@ static int ct_alsa_mix_volume_put(struct snd_kcontrol *kcontrol,
 }
 
 static struct snd_kcontrol_new vol_ctl = {
-	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.access		= SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			  SNDRV_CTL_ELEM_ACCESS_TLV_READ,
 	.iface		= SNDRV_CTL_ELEM_IFACE_MIXER,
 	.info		= ct_alsa_mix_volume_info,
 	.get		= ct_alsa_mix_volume_get,
-	.put		= ct_alsa_mix_volume_put
+	.put		= ct_alsa_mix_volume_put,
+	.tlv		= { .p =  ct_vol_db_scale },
 };
 
 static void
-- 
cgit v0.10.2


From 4836ac655410e7f126d316b0be062b38746f7529 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 14:49:26 +0200
Subject: ALSA: ctxfi - Fix DMA mask for emu20k2 chip

Allow 64bit DMA mask for emu20k2 chip, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index 041199f..edbfb48 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -26,7 +26,11 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 
-#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bits */
+#if BITS_PER_LONG == 32
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
+#else
+#define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
+#endif
 
 static u32 hw_read_20kx(struct hw *hw, u32 reg);
 static void hw_write_20kx(struct hw *hw, u32 reg, u32 data);
@@ -1834,18 +1838,16 @@ static int hw_card_start(struct hw *hw)
 	int err = 0;
 	struct pci_dev *pci = hw->pci;
 	unsigned int gctl;
-	unsigned int dma_mask = 0;
 
 	err = pci_enable_device(pci);
 	if (err < 0)
 		return err;
 
 	/* Set DMA transfer mask */
-	dma_mask = CT_XFI_DMA_MASK;
-	if (pci_set_dma_mask(pci, dma_mask) < 0 ||
-	    pci_set_consistent_dma_mask(pci, dma_mask) < 0) {
+	if (pci_set_dma_mask(pci, CT_XFI_DMA_MASK) < 0 ||
+	    pci_set_consistent_dma_mask(pci, CT_XFI_DMA_MASK) < 0) {
 		printk(KERN_ERR "ctxfi: architecture does not support PCI "
-		"busmaster DMA with mask 0x%x\n", dma_mask);
+		"busmaster DMA with mask 0x%llx\n", CT_XFI_DMA_MASK);
 		err = -ENXIO;
 		goto error1;
 	}
-- 
cgit v0.10.2


From 514eef9c2a711b4c24b97bb456d39695a6fe1775 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 14:57:57 +0200
Subject: ALSA: ctxfi - Remove useless initializations and cast

Remove useless variable initializations and cast at the beginning of
functions.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c
index 859e996..a1db51b3 100644
--- a/sound/pci/ctxfi/ctamixer.c
+++ b/sound/pci/ctxfi/ctamixer.c
@@ -58,9 +58,9 @@ static struct rsc_ops amixer_basic_rsc_ops = {
 
 static int amixer_set_input(struct amixer *amixer, struct rsc *rsc)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	hw->amixer_set_mode(amixer->rsc.ctrl_blk, AMIXER_Y_IMMEDIATE);
 	amixer->input = rsc;
 	if (NULL == rsc)
@@ -75,9 +75,9 @@ static int amixer_set_input(struct amixer *amixer, struct rsc *rsc)
 /* y is a 14-bit immediate constant */
 static int amixer_set_y(struct amixer *amixer, unsigned int y)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	hw->amixer_set_y(amixer->rsc.ctrl_blk, y);
 
 	return 0;
@@ -85,9 +85,9 @@ static int amixer_set_y(struct amixer *amixer, unsigned int y)
 
 static int amixer_set_invalid_squash(struct amixer *amixer, unsigned int iv)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	hw->amixer_set_iv(amixer->rsc.ctrl_blk, iv);
 
 	return 0;
@@ -95,9 +95,9 @@ static int amixer_set_invalid_squash(struct amixer *amixer, unsigned int iv)
 
 static int amixer_set_sum(struct amixer *amixer, struct sum *sum)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	amixer->sum = sum;
 	if (NULL == sum) {
 		hw->amixer_set_se(amixer->rsc.ctrl_blk, 0);
@@ -112,13 +112,13 @@ static int amixer_set_sum(struct amixer *amixer, struct sum *sum)
 
 static int amixer_commit_write(struct amixer *amixer)
 {
-	struct hw *hw = NULL;
-	unsigned int index = 0;
-	int i = 0;
-	struct rsc *input = NULL;
-	struct sum *sum = NULL;
+	struct hw *hw;
+	unsigned int index;
+	int i;
+	struct rsc *input;
+	struct sum *sum;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	input = amixer->input;
 	sum = amixer->sum;
 
@@ -158,10 +158,10 @@ static int amixer_commit_write(struct amixer *amixer)
 
 static int amixer_commit_raw_write(struct amixer *amixer)
 {
-	struct hw *hw = NULL;
-	unsigned int index = 0;
+	struct hw *hw;
+	unsigned int index;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	index = amixer->rsc.ops->output_slot(&amixer->rsc);
 	hw->amixer_commit_write(hw, index, amixer->rsc.ctrl_blk);
 
@@ -170,9 +170,9 @@ static int amixer_commit_raw_write(struct amixer *amixer)
 
 static int amixer_get_y(struct amixer *amixer)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)amixer->rsc.hw;
+	hw = amixer->rsc.hw;
 	return hw->amixer_get_y(amixer->rsc.ctrl_blk);
 }
 
@@ -201,7 +201,7 @@ static int amixer_rsc_init(struct amixer *amixer,
 			   const struct amixer_desc *desc,
 			   struct amixer_mgr *mgr)
 {
-	int err = 0;
+	int err;
 
 	err = rsc_init(&amixer->rsc, amixer->idx[0],
 			AMIXER, desc->msr, mgr->mgr.hw);
@@ -233,9 +233,9 @@ static int get_amixer_rsc(struct amixer_mgr *mgr,
 			  const struct amixer_desc *desc,
 			  struct amixer **ramixer)
 {
-	int err = 0, i = 0;
-	unsigned int idx = 0;
-	struct amixer *amixer = NULL;
+	int err, i;
+	unsigned int idx;
+	struct amixer *amixer;
 	unsigned long flags;
 
 	*ramixer = NULL;
@@ -284,7 +284,7 @@ error:
 static int put_amixer_rsc(struct amixer_mgr *mgr, struct amixer *amixer)
 {
 	unsigned long flags;
-	int i = 0;
+	int i;
 
 	spin_lock_irqsave(&mgr->mgr_lock, flags);
 	for (i = 0; i < amixer->rsc.msr; i++)
@@ -299,7 +299,7 @@ static int put_amixer_rsc(struct amixer_mgr *mgr, struct amixer *amixer)
 
 int amixer_mgr_create(void *hw, struct amixer_mgr **ramixer_mgr)
 {
-	int err = 0;
+	int err;
 	struct amixer_mgr *amixer_mgr;
 
 	*ramixer_mgr = NULL;
@@ -367,7 +367,7 @@ static int sum_rsc_init(struct sum *sum,
 			const struct sum_desc *desc,
 			struct sum_mgr *mgr)
 {
-	int err = 0;
+	int err;
 
 	err = rsc_init(&sum->rsc, sum->idx[0], SUM, desc->msr, mgr->mgr.hw);
 	if (err)
@@ -388,9 +388,9 @@ static int get_sum_rsc(struct sum_mgr *mgr,
 		       const struct sum_desc *desc,
 		       struct sum **rsum)
 {
-	int err = 0, i = 0;
-	unsigned int idx = 0;
-	struct sum *sum = NULL;
+	int err, i;
+	unsigned int idx;
+	struct sum *sum;
 	unsigned long flags;
 
 	*rsum = NULL;
@@ -438,7 +438,7 @@ error:
 static int put_sum_rsc(struct sum_mgr *mgr, struct sum *sum)
 {
 	unsigned long flags;
-	int i = 0;
+	int i;
 
 	spin_lock_irqsave(&mgr->mgr_lock, flags);
 	for (i = 0; i < sum->rsc.msr; i++)
@@ -453,7 +453,7 @@ static int put_sum_rsc(struct sum_mgr *mgr, struct sum *sum)
 
 int sum_mgr_create(void *hw, struct sum_mgr **rsum_mgr)
 {
-	int err = 0;
+	int err;
 	struct sum_mgr *sum_mgr;
 
 	*rsum_mgr = NULL;
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 9b13245..7898a37 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -190,8 +190,8 @@ static unsigned int convert_format(snd_pcm_format_t snd_format)
 static unsigned int
 atc_get_pitch(unsigned int input_rate, unsigned int output_rate)
 {
-	unsigned int pitch = 0;
-	int b = 0;
+	unsigned int pitch;
+	int b;
 
 	/* get pitch and convert to fixed-point 8.24 format. */
 	pitch = (input_rate / output_rate) << 24;
@@ -241,12 +241,12 @@ static int atc_pcm_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
 	struct src_desc desc = {0};
 	struct amixer_desc mix_dsc = {0};
-	struct src *src = NULL;
-	struct amixer *amixer = NULL;
-	int err = 0;
+	struct src *src;
+	struct amixer *amixer;
+	int err;
 	int n_amixer = apcm->substream->runtime->channels, i = 0;
 	int device = apcm->substream->pcm->device;
-	unsigned int pitch = 0;
+	unsigned int pitch;
 	unsigned long flags;
 
 	if (NULL != apcm->src) {
@@ -324,8 +324,8 @@ atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	struct srcimp_mgr *srcimp_mgr = atc->rsc_mgrs[SRCIMP];
 	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
 	struct sum_mgr *sum_mgr = atc->rsc_mgrs[SUM];
-	struct srcimp *srcimp = NULL;
-	int i = 0;
+	struct srcimp *srcimp;
+	int i;
 
 	if (NULL != apcm->srcimps) {
 		for (i = 0; i < apcm->n_srcimp; i++) {
@@ -377,7 +377,7 @@ atc_pcm_release_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 static int atc_pcm_playback_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	unsigned int max_cisz = 0;
+	unsigned int max_cisz;
 	struct src *src = apcm->src;
 
 	max_cisz = src->multi * src->rsc.msr;
@@ -398,8 +398,8 @@ static int atc_pcm_playback_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 static int atc_pcm_stop(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	struct src *src = NULL;
-	int i = 0;
+	struct src *src;
+	int i;
 
 	ct_timer_stop(apcm->timer);
 
@@ -426,8 +426,8 @@ static int
 atc_pcm_playback_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
 	struct src *src = apcm->src;
-	u32 size = 0, max_cisz = 0;
-	int position = 0;
+	u32 size, max_cisz;
+	int position;
 
 	position = src->ops->get_ca(src);
 
@@ -449,7 +449,7 @@ struct src_node_conf_t {
 static void setup_src_node_conf(struct ct_atc *atc, struct ct_atc_pcm *apcm,
 				struct src_node_conf_t *conf, int *n_srcc)
 {
-	unsigned int pitch = 0;
+	unsigned int pitch;
 
 	/* get pitch and convert to fixed-point 8.24 format. */
 	pitch = atc_get_pitch((atc->rsr * atc->msr),
@@ -494,14 +494,14 @@ atc_pcm_capture_get_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
 	struct sum_mgr *sum_mgr = atc->rsc_mgrs[SUM];
 	struct src_desc src_dsc = {0};
-	struct src *src = NULL;
+	struct src *src;
 	struct srcimp_desc srcimp_dsc = {0};
-	struct srcimp *srcimp = NULL;
+	struct srcimp *srcimp;
 	struct amixer_desc mix_dsc = {0};
 	struct sum_desc sum_dsc = {0};
-	unsigned int pitch = 0;
-	int multi = 0, err = 0, i = 0;
-	int n_srcimp = 0, n_amixer = 0, n_srcc = 0, n_sum = 0;
+	unsigned int pitch;
+	int multi, err, i;
+	int n_srcimp, n_amixer, n_srcc, n_sum;
 	struct src_node_conf_t src_node_conf[2] = {{0} };
 
 	/* first release old resources */
@@ -518,8 +518,8 @@ atc_pcm_capture_get_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 	setup_src_node_conf(atc, apcm, src_node_conf, &n_srcc);
 	n_sum = (1 == multi) ? 1 : 0;
-	n_amixer += n_sum * 2 + n_srcc;
-	n_srcimp += n_srcc;
+	n_amixer = n_sum * 2 + n_srcc;
+	n_srcimp = n_srcc;
 	if ((multi > 1) && (0x8000000 >= pitch)) {
 		/* Need extra AMIXERs and SRCIMPs for special treatment
 		 * of interleaved recording of conjugate channels */
@@ -633,14 +633,14 @@ error1:
 
 static int atc_pcm_capture_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	struct src *src = NULL;
-	struct amixer *amixer = NULL;
-	struct srcimp *srcimp = NULL;
+	struct src *src;
+	struct amixer *amixer;
+	struct srcimp *srcimp;
 	struct ct_mixer *mixer = atc->mixer;
-	struct sum *mono = NULL;
+	struct sum *mono;
 	struct rsc *out_ports[8] = {NULL};
-	int err = 0, i = 0, j = 0, n_sum = 0, multi = 0;
-	unsigned int pitch = 0;
+	int err, i, j, n_sum, multi;
+	unsigned int pitch;
 	int mix_base = 0, imp_base = 0;
 
 	if (NULL != apcm->src) {
@@ -714,9 +714,9 @@ static int atc_pcm_capture_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 static int atc_pcm_capture_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	struct src *src = NULL;
+	struct src *src;
 	struct src_mgr *src_mgr = atc->rsc_mgrs[SRC];
-	int i = 0, multi = 0;
+	int i, multi;
 
 	if (apcm->started)
 		return 0;
@@ -776,10 +776,10 @@ static int spdif_passthru_playback_get_resources(struct ct_atc *atc,
 	struct amixer_mgr *amixer_mgr = atc->rsc_mgrs[AMIXER];
 	struct src_desc desc = {0};
 	struct amixer_desc mix_dsc = {0};
-	struct src *src = NULL;
-	int err = 0;
-	int n_amixer = apcm->substream->runtime->channels, i = 0;
-	unsigned int pitch = 0, rsr = atc->pll_rate;
+	struct src *src;
+	int err;
+	int n_amixer = apcm->substream->runtime->channels, i;
+	unsigned int pitch, rsr = atc->pll_rate;
 
 	/* first release old resources */
 	atc->pcm_release_resources(atc, apcm);
@@ -832,15 +832,24 @@ error1:
 	return err;
 }
 
+static int atc_pll_init(struct ct_atc *atc, int rate)
+{
+	struct hw *hw = atc->hw;
+	int err;
+	err = hw->pll_init(hw, rate);
+	atc->pll_rate = err ? 0 : rate;
+	return err;
+}
+
 static int
 spdif_passthru_playback_setup(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
 	struct dao *dao = container_of(atc->daios[SPDIFOO], struct dao, daio);
 	unsigned long flags;
 	unsigned int rate = apcm->substream->runtime->rate;
-	unsigned int status = 0;
-	int err = 0;
-	unsigned char iec958_con_fs = 0;
+	unsigned int status;
+	int err;
+	unsigned char iec958_con_fs;
 
 	switch (rate) {
 	case 48000:
@@ -864,10 +873,8 @@ spdif_passthru_playback_setup(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 		dao->ops->set_spos(dao, status);
 		dao->ops->commit_write(dao);
 	}
-	if ((rate != atc->pll_rate) && (32000 != rate)) {
-		err = ((struct hw *)atc->hw)->pll_init(atc->hw, rate);
-		atc->pll_rate = err ? 0 : rate;
-	}
+	if ((rate != atc->pll_rate) && (32000 != rate))
+		err = atc_pll_init(atc, rate);
 	spin_unlock_irqrestore(&atc->atc_lock, flags);
 
 	return err;
@@ -876,11 +883,11 @@ spdif_passthru_playback_setup(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 static int
 spdif_passthru_playback_prepare(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
-	struct src *src = NULL;
-	struct amixer *amixer = NULL;
-	struct dao *dao = NULL;
-	int err = 0;
-	int i = 0;
+	struct src *src;
+	struct amixer *amixer;
+	struct dao *dao;
+	int err;
+	int i;
 	unsigned long flags;
 
 	if (NULL != apcm->src)
@@ -924,7 +931,7 @@ static int atc_select_line_in(struct ct_atc *atc)
 {
 	struct hw *hw = atc->hw;
 	struct ct_mixer *mixer = atc->mixer;
-	struct src *src = NULL;
+	struct src *src;
 
 	if (hw->is_adc_source_selected(hw, ADC_LINEIN))
 		return 0;
@@ -946,7 +953,7 @@ static int atc_select_mic_in(struct ct_atc *atc)
 {
 	struct hw *hw = atc->hw;
 	struct ct_mixer *mixer = atc->mixer;
-	struct src *src = NULL;
+	struct src *src;
 
 	if (hw->is_adc_source_selected(hw, ADC_MICIN))
 		return 0;
@@ -1063,8 +1070,8 @@ static int atc_spdif_out_passthru(struct ct_atc *atc, unsigned char state)
 {
 	unsigned long flags;
 	struct dao_desc da_dsc = {0};
-	struct dao *dao = NULL;
-	int err = 0;
+	struct dao *dao;
+	int err;
 	struct ct_mixer *mixer = atc->mixer;
 	struct rsc *rscs[2] = {NULL};
 	unsigned int spos = 0;
@@ -1082,11 +1089,8 @@ static int atc_spdif_out_passthru(struct ct_atc *atc, unsigned char state)
 		dao->ops->set_left_input(dao, rscs[0]);
 		dao->ops->set_right_input(dao, rscs[1]);
 		/* Restore PLL to atc->rsr if needed. */
-		if (atc->pll_rate != atc->rsr) {
-			err = ((struct hw *)atc->hw)->pll_init(atc->hw,
-							       atc->rsr);
-			atc->pll_rate = err ? 0 : atc->rsr;
-		}
+		if (atc->pll_rate != atc->rsr)
+			err = atc_pll_init(atc, atc->rsr);
 	}
 	dao->ops->set_spos(dao, spos);
 	dao->ops->commit_write(dao);
@@ -1097,15 +1101,15 @@ static int atc_spdif_out_passthru(struct ct_atc *atc, unsigned char state)
 
 static int ct_atc_destroy(struct ct_atc *atc)
 {
-	struct daio_mgr *daio_mgr = NULL;
-	struct dao *dao = NULL;
-	struct dai *dai = NULL;
-	struct daio *daio = NULL;
-	struct sum_mgr *sum_mgr = NULL;
-	struct src_mgr *src_mgr = NULL;
-	struct srcimp_mgr *srcimp_mgr = NULL;
-	struct srcimp *srcimp = NULL;
-	struct ct_mixer *mixer = NULL;
+	struct daio_mgr *daio_mgr;
+	struct dao *dao;
+	struct dai *dai;
+	struct daio *daio;
+	struct sum_mgr *sum_mgr;
+	struct src_mgr *src_mgr;
+	struct srcimp_mgr *srcimp_mgr;
+	struct srcimp *srcimp;
+	struct ct_mixer *mixer;
 	int i = 0;
 
 	if (NULL == atc)
@@ -1279,9 +1283,9 @@ int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc)
 
 static int __devinit atc_create_hw_devs(struct ct_atc *atc)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 	struct card_conf info = {0};
-	int i = 0, err = 0;
+	int i, err;
 
 	err = create_hw_obj(atc->pci, &hw);
 	if (err) {
@@ -1316,14 +1320,14 @@ static int __devinit atc_create_hw_devs(struct ct_atc *atc)
 static int __devinit atc_get_resources(struct ct_atc *atc)
 {
 	struct daio_desc da_desc = {0};
-	struct daio_mgr *daio_mgr = NULL;
+	struct daio_mgr *daio_mgr;
 	struct src_desc src_dsc = {0};
-	struct src_mgr *src_mgr = NULL;
+	struct src_mgr *src_mgr;
 	struct srcimp_desc srcimp_dsc = {0};
-	struct srcimp_mgr *srcimp_mgr = NULL;
+	struct srcimp_mgr *srcimp_mgr;
 	struct sum_desc sum_dsc = {0};
-	struct sum_mgr *sum_mgr = NULL;
-	int err = 0, i = 0;
+	struct sum_mgr *sum_mgr;
+	int err, i;
 	unsigned short subsys_id;
 
 	atc->daios = kzalloc(sizeof(void *)*(DAIONUM), GFP_KERNEL);
@@ -1428,8 +1432,8 @@ atc_connect_dai(struct src_mgr *src_mgr, struct dai *dai,
 		struct src **srcs, struct srcimp **srcimps)
 {
 	struct rsc *rscs[2] = {NULL};
-	struct src *src = NULL;
-	struct srcimp *srcimp = NULL;
+	struct src *src;
+	struct srcimp *srcimp;
 	int i = 0;
 
 	rscs[0] = &dai->daio.rscl;
@@ -1464,13 +1468,13 @@ atc_connect_dai(struct src_mgr *src_mgr, struct dai *dai,
 
 static void __devinit atc_connect_resources(struct ct_atc *atc)
 {
-	struct dai *dai = NULL;
-	struct dao *dao = NULL;
-	struct src *src = NULL;
-	struct sum *sum = NULL;
-	struct ct_mixer *mixer = NULL;
+	struct dai *dai;
+	struct dao *dao;
+	struct src *src;
+	struct sum *sum;
+	struct ct_mixer *mixer;
 	struct rsc *rscs[2] = {NULL};
-	int i = 0, j = 0;
+	int i, j;
 
 	mixer = atc->mixer;
 
@@ -1553,11 +1557,11 @@ static struct ct_atc atc_preset __devinitdata = {
 int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 		  unsigned int rsr, unsigned int msr, struct ct_atc **ratc)
 {
-	struct ct_atc *atc = NULL;
+	struct ct_atc *atc;
 	static struct snd_device_ops ops = {
 		.dev_free = atc_dev_free,
 	};
-	int err = 0;
+	int err;
 
 	*ratc = NULL;
 
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
index a2aea39..befead4 100644
--- a/sound/pci/ctxfi/ctdaio.c
+++ b/sound/pci/ctxfi/ctdaio.c
@@ -168,9 +168,9 @@ static int dao_commit_write(struct dao *dao)
 
 static int dao_set_left_input(struct dao *dao, struct rsc *input)
 {
-	struct imapper *entry = NULL;
+	struct imapper *entry;
 	struct daio *daio = &dao->daio;
-	int i = 0;
+	int i;
 
 	entry = kzalloc((sizeof(*entry) * daio->rscl.msr), GFP_KERNEL);
 	if (NULL == entry)
@@ -196,9 +196,9 @@ static int dao_set_left_input(struct dao *dao, struct rsc *input)
 
 static int dao_set_right_input(struct dao *dao, struct rsc *input)
 {
-	struct imapper *entry = NULL;
+	struct imapper *entry;
 	struct daio *daio = &dao->daio;
-	int i = 0;
+	int i;
 
 	entry = kzalloc((sizeof(*entry) * daio->rscr.msr), GFP_KERNEL);
 	if (NULL == entry)
@@ -224,9 +224,9 @@ static int dao_set_right_input(struct dao *dao, struct rsc *input)
 
 static int dao_clear_left_input(struct dao *dao)
 {
-	struct imapper *entry = NULL;
+	struct imapper *entry;
 	struct daio *daio = &dao->daio;
-	int i = 0;
+	int i;
 
 	if (NULL == dao->imappers[0])
 		return 0;
@@ -248,9 +248,9 @@ static int dao_clear_left_input(struct dao *dao)
 
 static int dao_clear_right_input(struct dao *dao)
 {
-	struct imapper *entry = NULL;
+	struct imapper *entry;
 	struct daio *daio = &dao->daio;
-	int i = 0;
+	int i;
 
 	if (NULL == dao->imappers[daio->rscl.msr])
 		return 0;
@@ -299,7 +299,7 @@ static int dai_set_srt_srcr(struct dai *dai, struct rsc *src)
 
 static int dai_set_srt_msr(struct dai *dai, unsigned int msr)
 {
-	unsigned int rsr = 0;
+	unsigned int rsr;
 
 	for (rsr = 0; msr > 1; msr >>= 1)
 		rsr++;
@@ -340,8 +340,8 @@ static int daio_rsc_init(struct daio *daio,
 			 const struct daio_desc *desc,
 			 void *hw)
 {
-	int err = 0;
-	unsigned int idx_l = 0, idx_r = 0;
+	int err;
+	unsigned int idx_l, idx_r;
 
 	switch (((struct hw *)hw)->get_chip_type(hw)) {
 	case ATC20K1:
@@ -400,8 +400,8 @@ static int dao_rsc_init(struct dao *dao,
 			struct daio_mgr *mgr)
 {
 	struct hw *hw = mgr->mgr.hw;
-	unsigned int conf = 0;
-	int err = 0;
+	unsigned int conf;
+	int err;
 
 	err = daio_rsc_init(&dao->daio, desc, mgr->mgr.hw);
 	if (err)
@@ -423,7 +423,7 @@ static int dao_rsc_init(struct dao *dao,
 			daio_device_index(dao->daio.type, hw));
 	hw->daio_mgr_commit_write(hw, mgr->mgr.ctrl_blk);
 
-	conf |= (desc->msr & 0x7) | (desc->passthru << 3);
+	conf = (desc->msr & 0x7) | (desc->passthru << 3);
 	hw->daio_mgr_dao_init(mgr->mgr.ctrl_blk,
 			daio_device_index(dao->daio.type, hw), conf);
 	hw->daio_mgr_enb_dao(mgr->mgr.ctrl_blk,
@@ -475,9 +475,9 @@ static int dai_rsc_init(struct dai *dai,
 			const struct daio_desc *desc,
 			struct daio_mgr *mgr)
 {
-	int err = 0;
+	int err;
 	struct hw *hw = mgr->mgr.hw;
-	unsigned int rsr = 0, msr = 0;
+	unsigned int rsr, msr;
 
 	err = daio_rsc_init(&dai->daio, desc, mgr->mgr.hw);
 	if (err)
@@ -536,7 +536,7 @@ static int get_daio_rsc(struct daio_mgr *mgr,
 			const struct daio_desc *desc,
 			struct daio **rdaio)
 {
-	int err = 0;
+	int err;
 	struct dai *dai = NULL;
 	struct dao *dao = NULL;
 	unsigned long flags;
@@ -660,7 +660,7 @@ static int daio_map_op(void *data, struct imapper *entry)
 static int daio_imap_add(struct daio_mgr *mgr, struct imapper *entry)
 {
 	unsigned long flags;
-	int err = 0;
+	int err;
 
 	spin_lock_irqsave(&mgr->imap_lock, flags);
 	if ((0 == entry->addr) && (mgr->init_imap_added)) {
@@ -677,7 +677,7 @@ static int daio_imap_add(struct daio_mgr *mgr, struct imapper *entry)
 static int daio_imap_delete(struct daio_mgr *mgr, struct imapper *entry)
 {
 	unsigned long flags;
-	int err = 0;
+	int err;
 
 	spin_lock_irqsave(&mgr->imap_lock, flags);
 	err = input_mapper_delete(&mgr->imappers, entry, daio_map_op, mgr);
@@ -701,7 +701,7 @@ static int daio_mgr_commit_write(struct daio_mgr *mgr)
 
 int daio_mgr_create(void *hw, struct daio_mgr **rdaio_mgr)
 {
-	int err = 0, i = 0;
+	int err, i;
 	struct daio_mgr *daio_mgr;
 	struct imapper *entry;
 
diff --git a/sound/pci/ctxfi/cthardware.c b/sound/pci/ctxfi/cthardware.c
index 53d1aca..5ec6813 100644
--- a/sound/pci/ctxfi/cthardware.c
+++ b/sound/pci/ctxfi/cthardware.c
@@ -22,7 +22,7 @@
 
 static enum CHIPTYP __devinitdata get_chip_type(struct hw *hw)
 {
-	enum CHIPTYP type = ATCNONE;
+	enum CHIPTYP type;
 
 	switch (hw->pci->device) {
 	case 0x0005:	/* 20k1 device */
@@ -41,7 +41,7 @@ static enum CHIPTYP __devinitdata get_chip_type(struct hw *hw)
 
 int __devinit create_hw_obj(struct pci_dev *pci, struct hw **rhw)
 {
-	int err = 0;
+	int err;
 
 	switch (pci->device) {
 	case 0x0005:	/* 20k1 device */
@@ -65,7 +65,7 @@ int __devinit create_hw_obj(struct pci_dev *pci, struct hw **rhw)
 
 int destroy_hw_obj(struct hw *hw)
 {
-	int err = 0;
+	int err;
 
 	switch (hw->pci->device) {
 	case 0x0005:	/* 20k1 device */
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index b165466..38b87b6 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -369,7 +369,7 @@ static unsigned int src_param_pitch_mixer(unsigned int src_idx)
 static int src_commit_write(struct hw *hw, unsigned int idx, void *blk)
 {
 	struct src_rsc_ctrl_blk *ctl = blk;
-	int i = 0;
+	int i;
 
 	if (ctl->dirty.bf.czbfs) {
 		/* Clear Z-Buffer registers */
@@ -468,8 +468,8 @@ static int src_mgr_dsb_src(void *blk, unsigned int idx)
 static int src_mgr_commit_write(struct hw *hw, void *blk)
 {
 	struct src_mgr_ctrl_blk *ctl = blk;
-	int i = 0;
-	unsigned int ret = 0;
+	int i;
+	unsigned int ret;
 
 	if (ctl->dirty.bf.enbsa) {
 		do {
@@ -1108,7 +1108,7 @@ static int daio_mgr_set_imapaddr(void *blk, unsigned int addr)
 static int daio_mgr_commit_write(struct hw *hw, void *blk)
 {
 	struct daio_mgr_ctrl_blk *ctl = blk;
-	int i = 0;
+	int i;
 
 	if (ctl->dirty.bf.i2sictl || ctl->dirty.bf.i2soctl) {
 		for (i = 0; i < 4; i++) {
@@ -1212,8 +1212,8 @@ struct trn_conf {
 
 static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 {
-	u32 i2sorg = 0;
-	u32 spdorg = 0;
+	u32 i2sorg;
+	u32 spdorg;
 
 	/* Read I2S CTL.  Keep original value. */
 	/*i2sorg = hw_read_20kx(hw, I2SCTL);*/
@@ -1263,8 +1263,8 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 /* TRANSPORT operations */
 static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
 {
-	u32 trnctl = 0;
-	unsigned long ptp_phys_low = 0, ptp_phys_high = 0;
+	u32 trnctl;
+	u32 ptp_phys_low, ptp_phys_high;
 
 	/* Set up device page table */
 	if ((~0UL) == info->vm_pgt_phys) {
@@ -1316,7 +1316,7 @@ static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
 static int hw_pll_init(struct hw *hw, unsigned int rsr)
 {
 	unsigned int pllctl;
-	int i = 0;
+	int i;
 
 	pllctl = (48000 == rsr) ? 0x1480a001 : 0x1480a731;
 	for (i = 0; i < 3; i++) {
@@ -1384,7 +1384,7 @@ static void i2c_lock(struct hw *hw)
 
 static void i2c_write(struct hw *hw, u32 device, u32 addr, u32 data)
 {
-	unsigned int ret = 0;
+	unsigned int ret;
 
 	do {
 		ret = hw_read_pci(hw, 0xEC);
@@ -1397,9 +1397,9 @@ static void i2c_write(struct hw *hw, u32 device, u32 addr, u32 data)
 
 static int hw_reset_dac(struct hw *hw)
 {
-	u32 i = 0;
-	u16 gpioorg = 0;
-	unsigned int ret = 0;
+	u32 i;
+	u16 gpioorg;
+	unsigned int ret;
 
 	if (i2c_unlock(hw))
 		return -1;
@@ -1430,10 +1430,10 @@ static int hw_reset_dac(struct hw *hw)
 
 static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 {
-	u32 data = 0;
-	u16 gpioorg = 0;
-	u16 subsys_id = 0;
-	unsigned int ret = 0;
+	u32 data;
+	u16 gpioorg;
+	u16 subsys_id;
+	unsigned int ret;
 
 	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
@@ -1494,13 +1494,12 @@ static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 
 static int is_adc_input_selected_SB055x(struct hw *hw, enum ADCSRC type)
 {
-	u32 data = 0;
-	return data;
+	return 0;
 }
 
 static int is_adc_input_selected_SBx(struct hw *hw, enum ADCSRC type)
 {
-	u32 data = 0;
+	u32 data;
 
 	data = hw_read_20kx(hw, GPIO);
 	switch (type) {
@@ -1521,7 +1520,7 @@ static int is_adc_input_selected_SBx(struct hw *hw, enum ADCSRC type)
 
 static int is_adc_input_selected_hendrix(struct hw *hw, enum ADCSRC type)
 {
-	u32 data = 0;
+	u32 data;
 
 	data = hw_read_20kx(hw, GPIO);
 	switch (type) {
@@ -1539,7 +1538,7 @@ static int is_adc_input_selected_hendrix(struct hw *hw, enum ADCSRC type)
 
 static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
 {
-	u16 subsys_id = 0;
+	u16 subsys_id;
 
 	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
@@ -1559,7 +1558,7 @@ static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
 static int
 adc_input_select_SB055x(struct hw *hw, enum ADCSRC type, unsigned char boost)
 {
-	u32 data = 0;
+	u32 data;
 
 	/*
 	 * check and set the following GPIO bits accordingly
@@ -1599,9 +1598,9 @@ adc_input_select_SB055x(struct hw *hw, enum ADCSRC type, unsigned char boost)
 static int
 adc_input_select_SBx(struct hw *hw, enum ADCSRC type, unsigned char boost)
 {
-	u32 data = 0;
-	u32 i2c_data = 0;
-	unsigned int ret = 0;
+	u32 data;
+	u32 i2c_data;
+	unsigned int ret;
 
 	if (i2c_unlock(hw))
 		return -1;
@@ -1649,9 +1648,9 @@ adc_input_select_SBx(struct hw *hw, enum ADCSRC type, unsigned char boost)
 static int
 adc_input_select_hendrix(struct hw *hw, enum ADCSRC type, unsigned char boost)
 {
-	u32 data = 0;
-	u32 i2c_data = 0;
-	unsigned int ret = 0;
+	u32 data;
+	u32 i2c_data;
+	unsigned int ret;
 
 	if (i2c_unlock(hw))
 		return -1;
@@ -1693,7 +1692,7 @@ adc_input_select_hendrix(struct hw *hw, enum ADCSRC type, unsigned char boost)
 
 static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 {
-	u16 subsys_id = 0;
+	u16 subsys_id;
 
 	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
@@ -1719,8 +1718,8 @@ static int adc_init_SBx(struct hw *hw, int input, int mic20db)
 {
 	u16 gpioorg;
 	u16 input_source;
-	u32 adcdata = 0;
-	unsigned int ret = 0;
+	u32 adcdata;
+	unsigned int ret;
 
 	input_source = 0x100;  /* default to analog */
 	switch (input) {
@@ -1742,6 +1741,7 @@ static int adc_init_SBx(struct hw *hw, int input, int mic20db)
 		input_source = 0x0;  /* set to Digital */
 		break;
 	default:
+		adcdata = 0x0;
 		break;
 	}
 
@@ -1781,8 +1781,8 @@ static int adc_init_SBx(struct hw *hw, int input, int mic20db)
 
 static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 {
-	int err = 0;
-	u16 subsys_id = 0;
+	int err;
+	u16 subsys_id;
 
 	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
@@ -1797,7 +1797,7 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 
 static int hw_have_digit_io_switch(struct hw *hw)
 {
-	u16 subsys_id = 0;
+	u16 subsys_id;
 
 	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	/* SB073x and Vista compatible cards have no digit IO switch */
@@ -1814,11 +1814,11 @@ static int uaa_to_xfi(struct pci_dev *pci)
 {
 	unsigned int bar0, bar1, bar2, bar3, bar4, bar5;
 	unsigned int cmd, irq, cl_size, l_timer, pwr;
-	unsigned int is_uaa = 0;
+	unsigned int is_uaa;
 	unsigned int data[4] = {0};
 	unsigned int io_base;
 	void *mem_base;
-	int i = 0;
+	int i;
 	const u32 CTLX = CTLBITS('C', 'T', 'L', 'X');
 	const u32 CTL_ = CTLBITS('C', 'T', 'L', '-');
 	const u32 CTLF = CTLBITS('C', 'T', 'L', 'F');
@@ -1916,9 +1916,9 @@ static irqreturn_t ct_20k1_interrupt(int irq, void *dev_id)
 
 static int hw_card_start(struct hw *hw)
 {
-	int err = 0;
+	int err;
 	struct pci_dev *pci = hw->pci;
-	u16 subsys_id = 0;
+	u16 subsys_id;
 
 	err = pci_enable_device(pci);
 	if (err < 0)
@@ -2004,8 +2004,8 @@ static int hw_card_init(struct hw *hw, struct card_conf *info)
 {
 	int err;
 	unsigned int gctl;
-	u16 subsys_id = 0;
-	u32 data = 0;
+	u16 subsys_id;
+	u32 data;
 	struct dac_conf dac_info = {0};
 	struct adc_conf adc_info = {0};
 	struct daio_conf daio_info = {0};
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index edbfb48..7d6dcba 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -168,7 +168,7 @@ static int src_get_rsc_ctrl_blk(void **rblk)
 
 static int src_put_rsc_ctrl_blk(void *blk)
 {
-	kfree((struct src_rsc_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -359,7 +359,7 @@ static unsigned int src_param_pitch_mixer(unsigned int src_idx)
 static int src_commit_write(struct hw *hw, unsigned int idx, void *blk)
 {
 	struct src_rsc_ctrl_blk *ctl = blk;
-	int i = 0;
+	int i;
 
 	if (ctl->dirty.bf.czbfs) {
 		/* Clear Z-Buffer registers */
@@ -458,8 +458,8 @@ static int src_mgr_dsb_src(void *blk, unsigned int idx)
 static int src_mgr_commit_write(struct hw *hw, void *blk)
 {
 	struct src_mgr_ctrl_blk *ctl = blk;
-	int i = 0;
-	unsigned int ret = 0;
+	int i;
+	unsigned int ret;
 
 	if (ctl->dirty.bf.enbsa) {
 		do {
@@ -494,7 +494,7 @@ static int src_mgr_get_ctrl_blk(void **rblk)
 
 static int src_mgr_put_ctrl_blk(void *blk)
 {
-	kfree((struct src_mgr_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -515,7 +515,7 @@ static int srcimp_mgr_get_ctrl_blk(void **rblk)
 
 static int srcimp_mgr_put_ctrl_blk(void *blk)
 {
-	kfree((struct srcimp_mgr_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -704,7 +704,7 @@ static int amixer_rsc_get_ctrl_blk(void **rblk)
 
 static int amixer_rsc_put_ctrl_blk(void *blk)
 {
-	kfree((struct amixer_rsc_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -893,7 +893,7 @@ static int dai_get_ctrl_blk(void **rblk)
 
 static int dai_put_ctrl_blk(void *blk)
 {
-	kfree((struct dai_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -943,7 +943,7 @@ static int dao_get_ctrl_blk(void **rblk)
 
 static int dao_put_ctrl_blk(void *blk)
 {
-	kfree((struct dao_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -1051,8 +1051,8 @@ static int daio_mgr_set_imapaddr(void *blk, unsigned int addr)
 static int daio_mgr_commit_write(struct hw *hw, void *blk)
 {
 	struct daio_mgr_ctrl_blk *ctl = blk;
-	unsigned int data = 0;
-	int i = 0;
+	unsigned int data;
+	int i;
 
 	for (i = 0; i < 8; i++) {
 		if ((ctl->dirty.bf.atxctl & (0x1 << i))) {
@@ -1080,7 +1080,7 @@ static int daio_mgr_commit_write(struct hw *hw, void *blk)
 static int daio_mgr_get_ctrl_blk(struct hw *hw, void **rblk)
 {
 	struct daio_mgr_ctrl_blk *blk;
-	int i = 0;
+	int i;
 
 	*rblk = NULL;
 	blk = kzalloc(sizeof(*blk), GFP_KERNEL);
@@ -1099,7 +1099,7 @@ static int daio_mgr_get_ctrl_blk(struct hw *hw, void **rblk)
 
 static int daio_mgr_put_ctrl_blk(void *blk)
 {
-	kfree((struct daio_mgr_ctrl_blk *)blk);
+	kfree(blk);
 
 	return 0;
 }
@@ -1125,7 +1125,7 @@ struct trn_conf {
 
 static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 {
-	u32 dwData = 0;
+	u32 dwData;
 	int i;
 
 	/* Program I2S with proper sample rate and enable the correct I2S
@@ -1195,9 +1195,9 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 /* TRANSPORT operations */
 static int hw_trn_init(struct hw *hw, const struct trn_conf *info)
 {
-	u32 vmctl = 0, data = 0;
-	unsigned long ptp_phys_low = 0, ptp_phys_high = 0;
-	int i = 0;
+	u32 vmctl, data;
+	u32 ptp_phys_low, ptp_phys_high;
+	int i;
 
 	/* Set up device page table */
 	if ((~0UL) == info->vm_pgt_phys) {
@@ -1433,7 +1433,7 @@ static int I2CLockChip(struct hw *hw)
 
 static int I2CInit(struct hw *hw, u8 bDeviceID, u8 bAddressSize, u8 bDataSize)
 {
-	int err = 0;
+	int err;
 	unsigned int RegI2CStatus;
 	unsigned int RegI2CAddress;
 
@@ -1481,7 +1481,7 @@ static int I2CUninit(struct hw *hw)
 static int I2CWaitDataReady(struct hw *hw)
 {
 	int i = 0x400000;
-	unsigned int ret = 0;
+	unsigned int ret;
 
 	do {
 		ret = hw_read_20kx(hw, I2C_IF_STATUS);
@@ -1541,9 +1541,9 @@ static int I2CWrite(struct hw *hw, u16 wAddress, u32 dwData)
 
 static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 {
-	int err = 0;
-	u32 dwData = 0;
-	int i = 0;
+	int err;
+	u32 dwData;
+	int i;
 	struct REGS_CS4382 cs4382_Read = {0};
 	struct REGS_CS4382 cs4382_Def = {
 				   0x00000001,  /* Mode Control 1 */
@@ -1696,7 +1696,7 @@ End:
 
 static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
 {
-	u32 data = 0;
+	u32 data;
 
 	data = hw_read_20kx(hw, GPIO_DATA);
 	switch (type) {
@@ -1714,7 +1714,7 @@ static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
 
 static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 {
-	u32 data = 0;
+	u32 data;
 
 	data = hw_read_20kx(hw, GPIO_DATA);
 	switch (type) {
@@ -1747,8 +1747,8 @@ static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 
 static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 {
-	int err = 0;
-	u32 dwMux = 2, dwData = 0, dwCtl = 0;
+	int err;
+	u32 dwMux = 2, dwData, dwCtl;
 
 	/*  Set ADC reset bit as output */
 	dwData = hw_read_20kx(hw, GPIO_CTRL);
diff --git a/sound/pci/ctxfi/ctimap.c b/sound/pci/ctxfi/ctimap.c
index d34eacd..0b73368 100644
--- a/sound/pci/ctxfi/ctimap.c
+++ b/sound/pci/ctxfi/ctimap.c
@@ -99,8 +99,8 @@ int input_mapper_delete(struct list_head *mappers, struct imapper *entry,
 
 void free_input_mapper_list(struct list_head *head)
 {
-	struct imapper *entry = NULL;
-	struct list_head *pos = NULL;
+	struct imapper *entry;
+	struct list_head *pos;
 
 	while (!list_empty(head)) {
 		pos = head->next;
diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c
index 796156e..666722d 100644
--- a/sound/pci/ctxfi/ctmixer.c
+++ b/sound/pci/ctxfi/ctmixer.c
@@ -298,7 +298,7 @@ set_switch_state(struct ct_mixer *mixer,
  * from 2^-6 to (1+1023/1024) */
 static unsigned int uint16_to_float14(unsigned int x)
 {
-	unsigned int i = 0;
+	unsigned int i;
 
 	if (x < 17)
 		return 0;
@@ -318,7 +318,7 @@ static unsigned int uint16_to_float14(unsigned int x)
 
 static unsigned int float14_to_uint16(unsigned int x)
 {
-	unsigned int e = 0;
+	unsigned int e;
 
 	if (!x)
 		return x;
@@ -491,7 +491,7 @@ static int ct_alsa_mix_switch_put(struct snd_kcontrol *kcontrol,
 	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
 	struct ct_mixer *mixer = atc->mixer;
 	enum CTALSA_MIXER_CTL type = kcontrol->private_value;
-	int state = 0;
+	int state;
 
 	state = ucontrol->value.integer.value[0];
 	if (get_switch_state(mixer, type) == state)
@@ -574,7 +574,7 @@ static int ct_spdif_get(struct snd_kcontrol *kcontrol,
 			struct snd_ctl_elem_value *ucontrol)
 {
 	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
-	unsigned int status = 0;
+	unsigned int status;
 
 	atc->spdif_out_get_status(atc, &status);
 	ucontrol->value.iec958.status[0] = (status >> 0) & 0xff;
@@ -589,8 +589,8 @@ static int ct_spdif_put(struct snd_kcontrol *kcontrol,
 			struct snd_ctl_elem_value *ucontrol)
 {
 	struct ct_atc *atc = snd_kcontrol_chip(kcontrol);
-	int change = 1;
-	unsigned int status = 0, old_status = 0;
+	int change;
+	unsigned int status, old_status;
 
 	status = (ucontrol->value.iec958.status[0] << 0) |
 		 (ucontrol->value.iec958.status[1] << 8) |
@@ -641,8 +641,8 @@ static struct snd_kcontrol_new iec958_ctl = {
 static int
 ct_mixer_kcontrol_new(struct ct_mixer *mixer, struct snd_kcontrol_new *new)
 {
-	struct snd_kcontrol *kctl = NULL;
-	int err = 0;
+	struct snd_kcontrol *kctl;
+	int err;
 
 	kctl = snd_ctl_new1(new, mixer->atc);
 	if (NULL == kctl)
@@ -669,9 +669,9 @@ ct_mixer_kcontrol_new(struct ct_mixer *mixer, struct snd_kcontrol_new *new)
 
 static int ct_mixer_kcontrols_create(struct ct_mixer *mixer)
 {
-	enum CTALSA_MIXER_CTL type = 0;
+	enum CTALSA_MIXER_CTL type;
 	struct ct_atc *atc = mixer->atc;
-	int err = 0;
+	int err;
 
 	/* Create snd kcontrol instances on demand */
 	for (type = VOL_MIXER_START; type <= VOL_MIXER_END; type++) {
@@ -733,9 +733,9 @@ static int ct_mixer_kcontrols_create(struct ct_mixer *mixer)
 static void
 ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
 {
-	struct amixer *amix_d = NULL;
-	struct sum *sum_c = NULL;
-	int i = 0;
+	struct amixer *amix_d;
+	struct sum *sum_c;
+	int i;
 
 	for (i = 0; i < 2; i++) {
 		amix_d = mixer->amixers[type*CHN_NUM+i];
@@ -748,8 +748,8 @@ ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
 static void
 ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
 {
-	struct amixer *amix_d = NULL;
-	int i = 0;
+	struct amixer *amix_d;
+	int i;
 
 	for (i = 0; i < 2; i++) {
 		amix_d = mixer->amixers[type*CHN_NUM+i];
@@ -760,14 +760,14 @@ ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type)
 
 static int ct_mixer_get_resources(struct ct_mixer *mixer)
 {
-	struct sum_mgr *sum_mgr = NULL;
-	struct sum *sum = NULL;
+	struct sum_mgr *sum_mgr;
+	struct sum *sum;
 	struct sum_desc sum_desc = {0};
-	struct amixer_mgr *amixer_mgr = NULL;
-	struct amixer *amixer = NULL;
+	struct amixer_mgr *amixer_mgr;
+	struct amixer *amixer;
 	struct amixer_desc am_desc = {0};
-	int err = 0;
-	int i = 0;
+	int err;
+	int i;
 
 	/* Allocate sum resources for mixer obj */
 	sum_mgr = (struct sum_mgr *)mixer->atc->rsc_mgrs[SUM];
@@ -822,8 +822,8 @@ error1:
 
 static int ct_mixer_get_mem(struct ct_mixer **rmixer)
 {
-	struct ct_mixer *mixer = NULL;
-	int err = 0;
+	struct ct_mixer *mixer;
+	int err;
 
 	*rmixer = NULL;
 	/* Allocate mem for mixer obj */
@@ -855,9 +855,9 @@ error1:
 
 static int ct_mixer_topology_build(struct ct_mixer *mixer)
 {
-	struct sum *sum = NULL;
-	struct amixer *amix_d = NULL, *amix_s = NULL;
-	enum CT_AMIXER_CTL i = 0, j = 0;
+	struct sum *sum;
+	struct amixer *amix_d, *amix_s;
+	enum CT_AMIXER_CTL i, j;
 
 	/* Build topology from destination to source */
 
@@ -1044,7 +1044,7 @@ int ct_mixer_destroy(struct ct_mixer *mixer)
 	struct sum_mgr *sum_mgr = (struct sum_mgr *)mixer->atc->rsc_mgrs[SUM];
 	struct amixer_mgr *amixer_mgr =
 			(struct amixer_mgr *)mixer->atc->rsc_mgrs[AMIXER];
-	struct amixer *amixer = NULL;
+	struct amixer *amixer;
 	int i = 0;
 
 	/* Release amixer resources */
@@ -1071,8 +1071,8 @@ int ct_mixer_destroy(struct ct_mixer *mixer)
 
 int ct_mixer_create(struct ct_atc *atc, struct ct_mixer **rmixer)
 {
-	struct ct_mixer *mixer = NULL;
-	int err = 0;
+	struct ct_mixer *mixer;
+	int err;
 
 	*rmixer = NULL;
 
@@ -1109,7 +1109,7 @@ int ct_alsa_mix_create(struct ct_atc *atc,
 		       enum CTALSADEVS device,
 		       const char *device_name)
 {
-	int err = 0;
+	int err;
 
 	/* Create snd kcontrol instances on demand */
 	/* vol_ctl.device = swh_ctl.device = device; */ /* better w/ device 0 */
diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c
index da21a71..889c495 100644
--- a/sound/pci/ctxfi/ctresource.c
+++ b/sound/pci/ctxfi/ctresource.c
@@ -27,7 +27,7 @@ static int
 get_resource(u8 *rscs, unsigned int amount,
 	     unsigned int multi, unsigned int *ridx)
 {
-	int i = 0, j = 0, k = 0, n = 0;
+	int i, j, k, n;
 
 	/* Check whether there are sufficient resources to meet request. */
 	for (i = 0, n = multi; i < amount; i++) {
@@ -61,7 +61,7 @@ get_resource(u8 *rscs, unsigned int amount,
 
 static int put_resource(u8 *rscs, unsigned int multi, unsigned int idx)
 {
-	unsigned int i = 0, j = 0, k = 0, n = 0;
+	unsigned int i, j, k, n;
 
 	/* Mark the contiguous bits in resource bit-map as used */
 	for (n = multi, i = idx; n > 0; n--) {
@@ -76,7 +76,7 @@ static int put_resource(u8 *rscs, unsigned int multi, unsigned int idx)
 
 int mgr_get_resource(struct rsc_mgr *mgr, unsigned int n, unsigned int *ridx)
 {
-	int err = 0;
+	int err;
 
 	if (n > mgr->avail)
 		return -ENOENT;
diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c
index 77e118c..e1c145d 100644
--- a/sound/pci/ctxfi/ctsrc.c
+++ b/sound/pci/ctxfi/ctsrc.c
@@ -37,9 +37,9 @@ static int (*src_default_config[3])(struct src *) = {
 
 static int src_set_state(struct src *src, unsigned int state)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_state(src->rsc.ctrl_blk, state);
 
 	return 0;
@@ -47,9 +47,9 @@ static int src_set_state(struct src *src, unsigned int state)
 
 static int src_set_bm(struct src *src, unsigned int bm)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_bm(src->rsc.ctrl_blk, bm);
 
 	return 0;
@@ -57,9 +57,9 @@ static int src_set_bm(struct src *src, unsigned int bm)
 
 static int src_set_sf(struct src *src, unsigned int sf)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_sf(src->rsc.ctrl_blk, sf);
 
 	return 0;
@@ -67,9 +67,9 @@ static int src_set_sf(struct src *src, unsigned int sf)
 
 static int src_set_pm(struct src *src, unsigned int pm)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_pm(src->rsc.ctrl_blk, pm);
 
 	return 0;
@@ -77,9 +77,9 @@ static int src_set_pm(struct src *src, unsigned int pm)
 
 static int src_set_rom(struct src *src, unsigned int rom)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_rom(src->rsc.ctrl_blk, rom);
 
 	return 0;
@@ -87,9 +87,9 @@ static int src_set_rom(struct src *src, unsigned int rom)
 
 static int src_set_vo(struct src *src, unsigned int vo)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_vo(src->rsc.ctrl_blk, vo);
 
 	return 0;
@@ -97,9 +97,9 @@ static int src_set_vo(struct src *src, unsigned int vo)
 
 static int src_set_st(struct src *src, unsigned int st)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_st(src->rsc.ctrl_blk, st);
 
 	return 0;
@@ -107,9 +107,9 @@ static int src_set_st(struct src *src, unsigned int st)
 
 static int src_set_bp(struct src *src, unsigned int bp)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_bp(src->rsc.ctrl_blk, bp);
 
 	return 0;
@@ -117,9 +117,9 @@ static int src_set_bp(struct src *src, unsigned int bp)
 
 static int src_set_cisz(struct src *src, unsigned int cisz)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_cisz(src->rsc.ctrl_blk, cisz);
 
 	return 0;
@@ -127,9 +127,9 @@ static int src_set_cisz(struct src *src, unsigned int cisz)
 
 static int src_set_ca(struct src *src, unsigned int ca)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_ca(src->rsc.ctrl_blk, ca);
 
 	return 0;
@@ -137,9 +137,9 @@ static int src_set_ca(struct src *src, unsigned int ca)
 
 static int src_set_sa(struct src *src, unsigned int sa)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_sa(src->rsc.ctrl_blk, sa);
 
 	return 0;
@@ -147,9 +147,9 @@ static int src_set_sa(struct src *src, unsigned int sa)
 
 static int src_set_la(struct src *src, unsigned int la)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_la(src->rsc.ctrl_blk, la);
 
 	return 0;
@@ -157,9 +157,9 @@ static int src_set_la(struct src *src, unsigned int la)
 
 static int src_set_pitch(struct src *src, unsigned int pitch)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_pitch(src->rsc.ctrl_blk, pitch);
 
 	return 0;
@@ -167,9 +167,9 @@ static int src_set_pitch(struct src *src, unsigned int pitch)
 
 static int src_set_clear_zbufs(struct src *src)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	hw->src_set_clear_zbufs(src->rsc.ctrl_blk, 1);
 
 	return 0;
@@ -177,11 +177,11 @@ static int src_set_clear_zbufs(struct src *src)
 
 static int src_commit_write(struct src *src)
 {
-	struct hw *hw = NULL;
-	int i = 0;
+	struct hw *hw;
+	int i;
 	unsigned int dirty = 0;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	src->rsc.ops->master(&src->rsc);
 	if (src->rsc.msr > 1) {
 		/* Save dirty flags for conjugate resource programming */
@@ -207,9 +207,9 @@ static int src_commit_write(struct src *src)
 
 static int src_get_ca(struct src *src)
 {
-	struct hw *hw = NULL;
+	struct hw *hw;
 
-	hw = (struct hw *)src->rsc.hw;
+	hw = src->rsc.hw;
 	return hw->src_get_ca(hw, src->rsc.ops->index(&src->rsc),
 						src->rsc.ctrl_blk);
 }
@@ -229,7 +229,7 @@ static struct src *src_next_interleave(struct src *src)
 static int src_default_config_memrd(struct src *src)
 {
 	struct hw *hw = src->rsc.hw;
-	unsigned int rsr = 0, msr = 0;
+	unsigned int rsr, msr;
 
 	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
 	hw->src_set_bm(src->rsc.ctrl_blk, 1);
@@ -297,7 +297,7 @@ static int src_default_config_memwr(struct src *src)
 static int src_default_config_arcrw(struct src *src)
 {
 	struct hw *hw = src->rsc.hw;
-	unsigned int rsr = 0, msr = 0;
+	unsigned int rsr, msr;
 	unsigned int dirty;
 
 	hw->src_set_state(src->rsc.ctrl_blk, SRC_STATE_OFF);
@@ -360,8 +360,8 @@ static int
 src_rsc_init(struct src *src, u32 idx,
 	     const struct src_desc *desc, struct src_mgr *mgr)
 {
-	int err = 0;
-	int i = 0, n = 0;
+	int err;
+	int i, n;
 	struct src *p;
 
 	n = (MEMRD == desc->mode) ? desc->multi : 1;
@@ -395,7 +395,7 @@ error1:
 
 static int src_rsc_uninit(struct src *src, struct src_mgr *mgr)
 {
-	int i = 0, n = 0;
+	int i, n;
 	struct src *p;
 
 	n = (MEMRD == src->mode) ? src->multi : 1;
@@ -416,8 +416,8 @@ static int
 get_src_rsc(struct src_mgr *mgr, const struct src_desc *desc, struct src **rsrc)
 {
 	unsigned int idx = SRC_RESOURCE_NUM;
-	int err = 0;
-	struct src *src = NULL;
+	int err;
+	struct src *src;
 	unsigned long flags;
 
 	*rsrc = NULL;
@@ -489,7 +489,7 @@ static int put_src_rsc(struct src_mgr *mgr, struct src *src)
 static int src_enable_s(struct src_mgr *mgr, struct src *src)
 {
 	struct hw *hw = mgr->mgr.hw;
-	int i = 0;
+	int i;
 
 	src->rsc.ops->master(&src->rsc);
 	for (i = 0; i < src->rsc.msr; i++) {
@@ -505,7 +505,7 @@ static int src_enable_s(struct src_mgr *mgr, struct src *src)
 static int src_enable(struct src_mgr *mgr, struct src *src)
 {
 	struct hw *hw = mgr->mgr.hw;
-	int i = 0;
+	int i;
 
 	src->rsc.ops->master(&src->rsc);
 	for (i = 0; i < src->rsc.msr; i++) {
@@ -521,7 +521,7 @@ static int src_enable(struct src_mgr *mgr, struct src *src)
 static int src_disable(struct src_mgr *mgr, struct src *src)
 {
 	struct hw *hw = mgr->mgr.hw;
-	int i = 0;
+	int i;
 
 	src->rsc.ops->master(&src->rsc);
 	for (i = 0; i < src->rsc.msr; i++) {
@@ -545,7 +545,7 @@ static int src_mgr_commit_write(struct src_mgr *mgr)
 
 int src_mgr_create(void *hw, struct src_mgr **rsrc_mgr)
 {
-	int err = 0, i = 0;
+	int err, i;
 	struct src_mgr *src_mgr;
 
 	*rsrc_mgr = NULL;
@@ -618,8 +618,8 @@ static struct rsc_ops srcimp_basic_rsc_ops = {
 
 static int srcimp_map(struct srcimp *srcimp, struct src *src, struct rsc *input)
 {
-	struct imapper *entry = NULL;
-	int i = 0;
+	struct imapper *entry;
+	int i;
 
 	srcimp->rsc.ops->master(&srcimp->rsc);
 	src->rsc.ops->master(&src->rsc);
@@ -646,7 +646,7 @@ static int srcimp_map(struct srcimp *srcimp, struct src *src, struct rsc *input)
 
 static int srcimp_unmap(struct srcimp *srcimp)
 {
-	int i = 0;
+	int i;
 
 	/* Program master and conjugate resources */
 	for (i = 0; i < srcimp->rsc.msr; i++) {
@@ -669,7 +669,7 @@ static int srcimp_rsc_init(struct srcimp *srcimp,
 			   const struct srcimp_desc *desc,
 			   struct srcimp_mgr *mgr)
 {
-	int err = 0;
+	int err;
 
 	err = rsc_init(&srcimp->rsc, srcimp->idx[0],
 		       SRCIMP, desc->msr, mgr->mgr.hw);
@@ -715,9 +715,9 @@ static int get_srcimp_rsc(struct srcimp_mgr *mgr,
 			  const struct srcimp_desc *desc,
 			  struct srcimp **rsrcimp)
 {
-	int err = 0, i = 0;
-	unsigned int idx = 0;
-	struct srcimp *srcimp = NULL;
+	int err, i;
+	unsigned int idx;
+	struct srcimp *srcimp;
 	unsigned long flags;
 
 	*rsrcimp = NULL;
@@ -765,7 +765,7 @@ error1:
 static int put_srcimp_rsc(struct srcimp_mgr *mgr, struct srcimp *srcimp)
 {
 	unsigned long flags;
-	int i = 0;
+	int i;
 
 	spin_lock_irqsave(&mgr->mgr_lock, flags);
 	for (i = 0; i < srcimp->rsc.msr; i++)
@@ -795,7 +795,7 @@ static int srcimp_map_op(void *data, struct imapper *entry)
 static int srcimp_imap_add(struct srcimp_mgr *mgr, struct imapper *entry)
 {
 	unsigned long flags;
-	int err = 0;
+	int err;
 
 	spin_lock_irqsave(&mgr->imap_lock, flags);
 	if ((0 == entry->addr) && (mgr->init_imap_added)) {
@@ -812,7 +812,7 @@ static int srcimp_imap_add(struct srcimp_mgr *mgr, struct imapper *entry)
 static int srcimp_imap_delete(struct srcimp_mgr *mgr, struct imapper *entry)
 {
 	unsigned long flags;
-	int err = 0;
+	int err;
 
 	spin_lock_irqsave(&mgr->imap_lock, flags);
 	err = input_mapper_delete(&mgr->imappers, entry, srcimp_map_op, mgr);
@@ -828,7 +828,7 @@ static int srcimp_imap_delete(struct srcimp_mgr *mgr, struct imapper *entry)
 
 int srcimp_mgr_create(void *hw, struct srcimp_mgr **rsrcimp_mgr)
 {
-	int err = 0;
+	int err;
 	struct srcimp_mgr *srcimp_mgr;
 	struct imapper *entry;
 
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
index b7f8e58..67665a7 100644
--- a/sound/pci/ctxfi/ctvmem.c
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -31,8 +31,8 @@
 static struct ct_vm_block *
 get_vm_block(struct ct_vm *vm, unsigned int size)
 {
-	struct ct_vm_block *block = NULL, *entry = NULL;
-	struct list_head *pos = NULL;
+	struct ct_vm_block *block = NULL, *entry;
+	struct list_head *pos;
 
 	size = CT_PAGE_ALIGN(size);
 	if (size > vm->size) {
@@ -77,8 +77,8 @@ get_vm_block(struct ct_vm *vm, unsigned int size)
 
 static void put_vm_block(struct ct_vm *vm, struct ct_vm_block *block)
 {
-	struct ct_vm_block *entry = NULL, *pre_ent = NULL;
-	struct list_head *pos = NULL, *pre = NULL;
+	struct ct_vm_block *entry, *pre_ent;
+	struct list_head *pos, *pre;
 
 	block->size = CT_PAGE_ALIGN(block->size);
 
@@ -223,8 +223,8 @@ int ct_vm_create(struct ct_vm **rvm)
 void ct_vm_destroy(struct ct_vm *vm)
 {
 	int i;
-	struct list_head *pos = NULL;
-	struct ct_vm_block *entry = NULL;
+	struct list_head *pos;
+	struct ct_vm_block *entry;
 
 	/* free used and unused list nodes */
 	while (!list_empty(&vm->used)) {
-- 
cgit v0.10.2


From 312325094785566a0e42a88c1bf6e7eb54c5d70e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Jun 2009 15:07:08 +0200
Subject: dma-debug: comment style fixes

Last patch series introduced some new comment which does not fit the
Kernel comment style guidelines. Fix it with this patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 77053d9..b8a61ff 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -542,7 +542,8 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 
 	write_lock_irqsave(&driver_name_lock, flags);
 
-	/* Now handle the string we got from userspace very carefully.
+	/*
+	 * Now handle the string we got from userspace very carefully.
 	 * The rules are:
 	 *         - only use the first token we got
 	 *         - token delimiter is everything looking like a space
@@ -551,7 +552,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 	 */
 	if (!isalnum(buf[0])) {
 		/*
-		   If the first character userspace gave us is not
+		 * If the first character userspace gave us is not
 		 * alphanumerical then assume the filter should be
 		 * switched off.
 		 */
-- 
cgit v0.10.2


From af8500bbbd18438495d2f91ad07bda49fff3b770 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 15:07:46 +0200
Subject: ALSA: ctxfi - Fix possible buffer pointer overrun

Fix possible buffer pointer overruns.  Back to zero when it's equal
or over the buffer size.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index a0bd31c..870fa17 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -243,6 +243,8 @@ ct_pcm_playback_pointer(struct snd_pcm_substream *substream)
 	/* Read out playback position */
 	position = atc->pcm_playback_position(atc, apcm);
 	position = bytes_to_frames(runtime, position);
+	if (position >= runtime->buffer_size)
+		position = 0;
 	return position;
 }
 
@@ -343,6 +345,8 @@ ct_pcm_capture_pointer(struct snd_pcm_substream *substream)
 	/* Read out playback position */
 	position = atc->pcm_capture_position(atc, apcm);
 	position = bytes_to_frames(runtime, position);
+	if (position >= runtime->buffer_size)
+		position = 0;
 	return position;
 }
 
-- 
cgit v0.10.2


From c17e2cf7376a2010b8b114fdeebd4e340a5e9cb2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Jun 2009 15:19:29 +0200
Subject: dma-debug: code style fixes

This patch changes the recent updates to dma-debug to conform with
coding style guidelines of Linux and the -tip tree.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index b8a61ff..9561825 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -501,8 +501,8 @@ out_err:
 static ssize_t filter_read(struct file *file, char __user *user_buf,
 			   size_t count, loff_t *ppos)
 {
-	unsigned long flags;
 	char buf[NAME_MAX_LEN + 1];
+	unsigned long flags;
 	int len;
 
 	if (!current_driver_name[0])
@@ -523,9 +523,9 @@ static ssize_t filter_read(struct file *file, char __user *user_buf,
 static ssize_t filter_write(struct file *file, const char __user *userbuf,
 			    size_t count, loff_t *ppos)
 {
-	unsigned long flags;
 	char buf[NAME_MAX_LEN];
-	size_t len = NAME_MAX_LEN - 1;
+	unsigned long flags;
+	size_t len;
 	int i;
 
 	/*
@@ -534,7 +534,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 	 * disabled. Since copy_from_user can fault and may sleep we
 	 * need to copy to temporary buffer first
 	 */
-	len = min(count, len);
+	len = min(count, NAME_MAX_LEN - 1);
 	if (copy_from_user(buf, userbuf, len))
 		return -EFAULT;
 
@@ -1040,18 +1040,19 @@ EXPORT_SYMBOL(debug_dma_map_sg);
 
 static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
 {
-	struct dma_debug_entry *entry;
+	struct dma_debug_entry *entry, ref;
 	struct hash_bucket *bucket;
 	unsigned long flags;
-	int mapped_ents = 0;
-	struct dma_debug_entry ref;
+	int mapped_ents;
 
-	ref.dev = dev;
+	ref.dev      = dev;
 	ref.dev_addr = sg_dma_address(s);
-	ref.size = sg_dma_len(s),
+	ref.size     = sg_dma_len(s),
+
+	bucket       = get_hash_bucket(&ref, &flags);
+	entry        = hash_bucket_find(bucket, &ref);
+	mapped_ents  = 0;
 
-	bucket = get_hash_bucket(&ref, &flags);
-	entry = hash_bucket_find(bucket, &ref);
 	if (entry)
 		mapped_ents = entry->sg_mapped_ents;
 	put_hash_bucket(bucket, &flags);
-- 
cgit v0.10.2


From d362af62ed98f58c64a2b3dd58c79d25ad181b0b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 15:31:22 +0200
Subject: ALSA: ctxfi - Fix / clean up hw20k2 chip code

- Clean up Hungarian coding style
- Don't use static variables for I2C information; this unables to use
  multiple instances.  Now they are stored in struct hw20k2 fields.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index 7d6dcba..4493a51 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -15,8 +15,6 @@
  *
  */
 
-#include "cthw20k2.h"
-#include "ct20k2reg.h"
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
@@ -25,6 +23,8 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include "cthw20k2.h"
+#include "ct20k2reg.h"
 
 #if BITS_PER_LONG == 32
 #define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
@@ -32,6 +32,14 @@
 #define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
 #endif
 
+struct hw20k2 {
+	struct hw hw;
+	/* for i2c */
+	unsigned char dev_id;
+	unsigned char addr_size;
+	unsigned char data_size;
+};
+
 static u32 hw_read_20kx(struct hw *hw, u32 reg);
 static void hw_write_20kx(struct hw *hw, u32 reg, u32 data);
 
@@ -1125,7 +1133,7 @@ struct trn_conf {
 
 static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 {
-	u32 dwData;
+	u32 data;
 	int i;
 
 	/* Program I2S with proper sample rate and enable the correct I2S
@@ -1156,12 +1164,12 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 		if (i <= 3) {
 			/* 1st 3 channels are SPDIFs (SB0960) */
 			if (i == 3)
-				dwData = 0x1001001;
+				data = 0x1001001;
 			else
-				dwData = 0x1000001;
+				data = 0x1000001;
 
-			hw_write_20kx(hw, (AUDIO_IO_TX_CTL+(0x40*i)), dwData);
-			hw_write_20kx(hw, (AUDIO_IO_RX_CTL+(0x40*i)), dwData);
+			hw_write_20kx(hw, (AUDIO_IO_TX_CTL+(0x40*i)), data);
+			hw_write_20kx(hw, (AUDIO_IO_RX_CTL+(0x40*i)), data);
 
 			/* Initialize the SPDIF Out Channel status registers.
 			 * The value specified here is based on the typical
@@ -1179,13 +1187,13 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info)
 			hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_H+(0x40*i), 0x0B);
 		} else {
 			/* Next 5 channels are I2S (SB0960) */
-			dwData = 0x11;
-			hw_write_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i), dwData);
+			data = 0x11;
+			hw_write_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i), data);
 			if (2 == info->msr) {
 				/* Four channels per sample period */
-				dwData |= 0x1000;
+				data |= 0x1000;
 			}
-			hw_write_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i), dwData);
+			hw_write_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i), data);
 		}
 	}
 
@@ -1377,34 +1385,32 @@ static int hw_auto_init(struct hw *hw)
 #define I2C_ADDRESS_PTAD	0x0000FFFF
 #define I2C_ADDRESS_SLAD	0x007F0000
 
-struct REGS_CS4382 {
-	u32 dwModeControl_1;
-	u32 dwModeControl_2;
-	u32 dwModeControl_3;
+struct regs_cs4382 {
+	u32 mode_control_1;
+	u32 mode_control_2;
+	u32 mode_control_3;
 
-	u32 dwFilterControl;
-	u32 dwInvertControl;
+	u32 filter_control;
+	u32 invert_control;
 
-	u32 dwMixControl_P1;
-	u32 dwVolControl_A1;
-	u32 dwVolControl_B1;
+	u32 mix_control_P1;
+	u32 vol_control_A1;
+	u32 vol_control_B1;
 
-	u32 dwMixControl_P2;
-	u32 dwVolControl_A2;
-	u32 dwVolControl_B2;
+	u32 mix_control_P2;
+	u32 vol_control_A2;
+	u32 vol_control_B2;
 
-	u32 dwMixControl_P3;
-	u32 dwVolControl_A3;
-	u32 dwVolControl_B3;
+	u32 mix_control_P3;
+	u32 vol_control_A3;
+	u32 vol_control_B3;
 
-	u32 dwMixControl_P4;
-	u32 dwVolControl_A4;
-	u32 dwVolControl_B4;
+	u32 mix_control_P4;
+	u32 vol_control_A4;
+	u32 vol_control_B4;
 };
 
-static u8 m_bAddressSize, m_bDataSize, m_bDeviceID;
-
-static int I2CUnlockFullAccess(struct hw *hw)
+static int hw20k2_i2c_unlock_full_access(struct hw *hw)
 {
 	u8 UnlockKeySequence_FLASH_FULLACCESS_MODE[2] =  {0xB3, 0xD4};
 
@@ -1420,7 +1426,7 @@ static int I2CUnlockFullAccess(struct hw *hw)
 	return -1;
 }
 
-static int I2CLockChip(struct hw *hw)
+static int hw20k2_i2c_lock_chip(struct hw *hw)
 {
 	/* Write twice */
 	hw_write_20kx(hw, I2C_IF_WLOCK, STATE_LOCKED);
@@ -1431,54 +1437,55 @@ static int I2CLockChip(struct hw *hw)
 	return -1;
 }
 
-static int I2CInit(struct hw *hw, u8 bDeviceID, u8 bAddressSize, u8 bDataSize)
+static int hw20k2_i2c_init(struct hw *hw, u8 dev_id, u8 addr_size, u8 data_size)
 {
+	struct hw20k2 *hw20k2 = (struct hw20k2 *)hw;
 	int err;
-	unsigned int RegI2CStatus;
-	unsigned int RegI2CAddress;
+	unsigned int i2c_status;
+	unsigned int i2c_addr;
 
-	err = I2CUnlockFullAccess(hw);
+	err = hw20k2_i2c_unlock_full_access(hw);
 	if (err < 0)
 		return err;
 
-	m_bAddressSize = bAddressSize;
-	m_bDataSize = bDataSize;
-	m_bDeviceID = bDeviceID;
+	hw20k2->addr_size = addr_size;
+	hw20k2->data_size = data_size;
+	hw20k2->dev_id = dev_id;
 
-	RegI2CAddress = 0;
-	set_field(&RegI2CAddress, I2C_ADDRESS_SLAD, bDeviceID);
+	i2c_addr = 0;
+	set_field(&i2c_addr, I2C_ADDRESS_SLAD, dev_id);
 
-	hw_write_20kx(hw, I2C_IF_ADDRESS, RegI2CAddress);
+	hw_write_20kx(hw, I2C_IF_ADDRESS, i2c_addr);
 
-	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
 
-	set_field(&RegI2CStatus, I2C_STATUS_DCM, 1); /* Direct control mode */
+	set_field(&i2c_status, I2C_STATUS_DCM, 1); /* Direct control mode */
 
-	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
 
 	return 0;
 }
 
-static int I2CUninit(struct hw *hw)
+static int hw20k2_i2c_uninit(struct hw *hw)
 {
-	unsigned int RegI2CStatus;
-	unsigned int RegI2CAddress;
+	unsigned int i2c_status;
+	unsigned int i2c_addr;
 
-	RegI2CAddress = 0;
-	set_field(&RegI2CAddress, I2C_ADDRESS_SLAD, 0x57); /* I2C id */
+	i2c_addr = 0;
+	set_field(&i2c_addr, I2C_ADDRESS_SLAD, 0x57); /* I2C id */
 
-	hw_write_20kx(hw, I2C_IF_ADDRESS, RegI2CAddress);
+	hw_write_20kx(hw, I2C_IF_ADDRESS, i2c_addr);
 
-	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
 
-	set_field(&RegI2CStatus, I2C_STATUS_DCM, 0); /* I2C mode */
+	set_field(&i2c_status, I2C_STATUS_DCM, 0); /* I2C mode */
 
-	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
 
-	return I2CLockChip(hw);
+	return hw20k2_i2c_lock_chip(hw);
 }
 
-static int I2CWaitDataReady(struct hw *hw)
+static int hw20k2_i2c_wait_data_ready(struct hw *hw)
 {
 	int i = 0x400000;
 	unsigned int ret;
@@ -1490,51 +1497,53 @@ static int I2CWaitDataReady(struct hw *hw)
 	return i;
 }
 
-static int I2CRead(struct hw *hw, u16 wAddress, u32 *pdwData)
+static int hw20k2_i2c_read(struct hw *hw, u16 addr, u32 *datap)
 {
-	unsigned int RegI2CStatus;
+	struct hw20k2 *hw20k2 = (struct hw20k2 *)hw;
+	unsigned int i2c_status;
 
-	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
-	set_field(&RegI2CStatus, I2C_STATUS_BC,
-			(4 == m_bAddressSize) ? 0 : m_bAddressSize);
-	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
-	if (!I2CWaitDataReady(hw))
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
+	set_field(&i2c_status, I2C_STATUS_BC,
+		  (4 == hw20k2->addr_size) ? 0 : hw20k2->addr_size);
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
+	if (!hw20k2_i2c_wait_data_ready(hw))
 		return -1;
 
-	hw_write_20kx(hw, I2C_IF_WDATA, (u32)wAddress);
-	if (!I2CWaitDataReady(hw))
+	hw_write_20kx(hw, I2C_IF_WDATA, addr);
+	if (!hw20k2_i2c_wait_data_ready(hw))
 		return -1;
 
 	/* Force a read operation */
 	hw_write_20kx(hw, I2C_IF_RDATA, 0);
-	if (!I2CWaitDataReady(hw))
+	if (!hw20k2_i2c_wait_data_ready(hw))
 		return -1;
 
-	*pdwData = hw_read_20kx(hw, I2C_IF_RDATA);
+	*datap = hw_read_20kx(hw, I2C_IF_RDATA);
 
 	return 0;
 }
 
-static int I2CWrite(struct hw *hw, u16 wAddress, u32 dwData)
+static int hw20k2_i2c_write(struct hw *hw, u16 addr, u32 data)
 {
-	unsigned int dwI2CData = (dwData << (m_bAddressSize * 8)) | wAddress;
-	unsigned int RegI2CStatus;
+	struct hw20k2 *hw20k2 = (struct hw20k2 *)hw;
+	unsigned int i2c_data = (data << (hw20k2->addr_size * 8)) | addr;
+	unsigned int i2c_status;
 
-	RegI2CStatus = hw_read_20kx(hw, I2C_IF_STATUS);
+	i2c_status = hw_read_20kx(hw, I2C_IF_STATUS);
 
-	set_field(&RegI2CStatus, I2C_STATUS_BC,
-		  (4 == (m_bAddressSize + m_bDataSize)) ?
-		  0 : (m_bAddressSize + m_bDataSize));
+	set_field(&i2c_status, I2C_STATUS_BC,
+		  (4 == (hw20k2->addr_size + hw20k2->data_size)) ?
+		  0 : (hw20k2->addr_size + hw20k2->data_size));
 
-	hw_write_20kx(hw, I2C_IF_STATUS, RegI2CStatus);
-	I2CWaitDataReady(hw);
+	hw_write_20kx(hw, I2C_IF_STATUS, i2c_status);
+	hw20k2_i2c_wait_data_ready(hw);
 	/* Dummy write to trigger the write oprtation */
 	hw_write_20kx(hw, I2C_IF_WDATA, 0);
-	I2CWaitDataReady(hw);
+	hw20k2_i2c_wait_data_ready(hw);
 
 	/* This is the real data */
-	hw_write_20kx(hw, I2C_IF_WDATA, dwI2CData);
-	I2CWaitDataReady(hw);
+	hw_write_20kx(hw, I2C_IF_WDATA, i2c_data);
+	hw20k2_i2c_wait_data_ready(hw);
 
 	return 0;
 }
@@ -1542,10 +1551,10 @@ static int I2CWrite(struct hw *hw, u16 wAddress, u32 dwData)
 static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 {
 	int err;
-	u32 dwData;
+	u32 data;
 	int i;
-	struct REGS_CS4382 cs4382_Read = {0};
-	struct REGS_CS4382 cs4382_Def = {
+	struct regs_cs4382 cs_read = {0};
+	struct regs_cs4382 cs_def = {
 				   0x00000001,  /* Mode Control 1 */
 				   0x00000000,  /* Mode Control 2 */
 				   0x00000084,  /* Mode Control 3 */
@@ -1566,87 +1575,86 @@ static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 				 };
 
 	/* Set DAC reset bit as output */
-	dwData = hw_read_20kx(hw, GPIO_CTRL);
-	dwData |= 0x02;
-	hw_write_20kx(hw, GPIO_CTRL, dwData);
+	data = hw_read_20kx(hw, GPIO_CTRL);
+	data |= 0x02;
+	hw_write_20kx(hw, GPIO_CTRL, data);
 
-	err = I2CInit(hw, 0x18, 1, 1);
+	err = hw20k2_i2c_init(hw, 0x18, 1, 1);
 	if (err < 0)
 		goto End;
 
 	for (i = 0; i < 2; i++) {
 		/* Reset DAC twice just in-case the chip
 		 * didn't initialized properly */
-		dwData = hw_read_20kx(hw, GPIO_DATA);
+		data = hw_read_20kx(hw, GPIO_DATA);
 		/* GPIO data bit 1 */
-		dwData &= 0xFFFFFFFD;
-		hw_write_20kx(hw, GPIO_DATA, dwData);
+		data &= 0xFFFFFFFD;
+		hw_write_20kx(hw, GPIO_DATA, data);
 		mdelay(10);
-		dwData |= 0x2;
-		hw_write_20kx(hw, GPIO_DATA, dwData);
+		data |= 0x2;
+		hw_write_20kx(hw, GPIO_DATA, data);
 		mdelay(50);
 
 		/* Reset the 2nd time */
-		dwData &= 0xFFFFFFFD;
-		hw_write_20kx(hw, GPIO_DATA, dwData);
+		data &= 0xFFFFFFFD;
+		hw_write_20kx(hw, GPIO_DATA, data);
 		mdelay(10);
-		dwData |= 0x2;
-		hw_write_20kx(hw, GPIO_DATA, dwData);
+		data |= 0x2;
+		hw_write_20kx(hw, GPIO_DATA, data);
 		mdelay(50);
 
-		if (I2CRead(hw, CS4382_MC1,  &cs4382_Read.dwModeControl_1))
+		if (hw20k2_i2c_read(hw, CS4382_MC1,  &cs_read.mode_control_1))
 			continue;
 
-		if (I2CRead(hw, CS4382_MC2,  &cs4382_Read.dwModeControl_2))
+		if (hw20k2_i2c_read(hw, CS4382_MC2,  &cs_read.mode_control_2))
 			continue;
 
-		if (I2CRead(hw, CS4382_MC3,  &cs4382_Read.dwModeControl_3))
+		if (hw20k2_i2c_read(hw, CS4382_MC3,  &cs_read.mode_control_3))
 			continue;
 
-		if (I2CRead(hw, CS4382_FC,   &cs4382_Read.dwFilterControl))
+		if (hw20k2_i2c_read(hw, CS4382_FC,   &cs_read.filter_control))
 			continue;
 
-		if (I2CRead(hw, CS4382_IC,   &cs4382_Read.dwInvertControl))
+		if (hw20k2_i2c_read(hw, CS4382_IC,   &cs_read.invert_control))
 			continue;
 
-		if (I2CRead(hw, CS4382_XC1,  &cs4382_Read.dwMixControl_P1))
+		if (hw20k2_i2c_read(hw, CS4382_XC1,  &cs_read.mix_control_P1))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCA1, &cs4382_Read.dwVolControl_A1))
+		if (hw20k2_i2c_read(hw, CS4382_VCA1, &cs_read.vol_control_A1))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCB1, &cs4382_Read.dwVolControl_B1))
+		if (hw20k2_i2c_read(hw, CS4382_VCB1, &cs_read.vol_control_B1))
 			continue;
 
-		if (I2CRead(hw, CS4382_XC2,  &cs4382_Read.dwMixControl_P2))
+		if (hw20k2_i2c_read(hw, CS4382_XC2,  &cs_read.mix_control_P2))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCA2, &cs4382_Read.dwVolControl_A2))
+		if (hw20k2_i2c_read(hw, CS4382_VCA2, &cs_read.vol_control_A2))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCB2, &cs4382_Read.dwVolControl_B2))
+		if (hw20k2_i2c_read(hw, CS4382_VCB2, &cs_read.vol_control_B2))
 			continue;
 
-		if (I2CRead(hw, CS4382_XC3,  &cs4382_Read.dwMixControl_P3))
+		if (hw20k2_i2c_read(hw, CS4382_XC3,  &cs_read.mix_control_P3))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCA3, &cs4382_Read.dwVolControl_A3))
+		if (hw20k2_i2c_read(hw, CS4382_VCA3, &cs_read.vol_control_A3))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCB3, &cs4382_Read.dwVolControl_B3))
+		if (hw20k2_i2c_read(hw, CS4382_VCB3, &cs_read.vol_control_B3))
 			continue;
 
-		if (I2CRead(hw, CS4382_XC4,  &cs4382_Read.dwMixControl_P4))
+		if (hw20k2_i2c_read(hw, CS4382_XC4,  &cs_read.mix_control_P4))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCA4, &cs4382_Read.dwVolControl_A4))
+		if (hw20k2_i2c_read(hw, CS4382_VCA4, &cs_read.vol_control_A4))
 			continue;
 
-		if (I2CRead(hw, CS4382_VCB4, &cs4382_Read.dwVolControl_B4))
+		if (hw20k2_i2c_read(hw, CS4382_VCB4, &cs_read.vol_control_B4))
 			continue;
 
-		if (memcmp(&cs4382_Read, &cs4382_Def,
-						sizeof(struct REGS_CS4382)))
+		if (memcmp(&cs_read, &cs_def, sizeof(cs_read)))
 			continue;
 		else
 			break;
@@ -1657,29 +1665,29 @@ static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 
 	/* Note: Every I2C write must have some delay.
 	 * This is not a requirement but the delay works here... */
-	I2CWrite(hw, CS4382_MC1, 0x80);
-	I2CWrite(hw, CS4382_MC2, 0x10);
+	hw20k2_i2c_write(hw, CS4382_MC1, 0x80);
+	hw20k2_i2c_write(hw, CS4382_MC2, 0x10);
 	if (1 == info->msr) {
-		I2CWrite(hw, CS4382_XC1, 0x24);
-		I2CWrite(hw, CS4382_XC2, 0x24);
-		I2CWrite(hw, CS4382_XC3, 0x24);
-		I2CWrite(hw, CS4382_XC4, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC1, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC2, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC3, 0x24);
+		hw20k2_i2c_write(hw, CS4382_XC4, 0x24);
 	} else if (2 == info->msr) {
-		I2CWrite(hw, CS4382_XC1, 0x25);
-		I2CWrite(hw, CS4382_XC2, 0x25);
-		I2CWrite(hw, CS4382_XC3, 0x25);
-		I2CWrite(hw, CS4382_XC4, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC1, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC2, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC3, 0x25);
+		hw20k2_i2c_write(hw, CS4382_XC4, 0x25);
 	} else {
-		I2CWrite(hw, CS4382_XC1, 0x26);
-		I2CWrite(hw, CS4382_XC2, 0x26);
-		I2CWrite(hw, CS4382_XC3, 0x26);
-		I2CWrite(hw, CS4382_XC4, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC1, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC2, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC3, 0x26);
+		hw20k2_i2c_write(hw, CS4382_XC4, 0x26);
 	}
 
 	return 0;
 End:
 
-	I2CUninit(hw);
+	hw20k2_i2c_uninit(hw);
 	return -1;
 }
 
@@ -1721,21 +1729,21 @@ static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 	case ADC_MICIN:
 		data |= (0x1 << 14);
 		hw_write_20kx(hw, GPIO_DATA, data);
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
 				MAKE_WM8775_DATA(0x101)); /* Mic-in */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
 				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
 				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
 		break;
 	case ADC_LINEIN:
 		data &= ~(0x1 << 14);
 		hw_write_20kx(hw, GPIO_DATA, data);
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
 				MAKE_WM8775_DATA(0x102)); /* Line-in */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
 				MAKE_WM8775_DATA(0xCF)); /* No boost */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
 				MAKE_WM8775_DATA(0xCF)); /* No boost */
 		break;
 	default:
@@ -1748,34 +1756,34 @@ static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 {
 	int err;
-	u32 dwMux = 2, dwData, dwCtl;
+	u32 mux = 2, data, ctl;
 
 	/*  Set ADC reset bit as output */
-	dwData = hw_read_20kx(hw, GPIO_CTRL);
-	dwData |= (0x1 << 15);
-	hw_write_20kx(hw, GPIO_CTRL, dwData);
+	data = hw_read_20kx(hw, GPIO_CTRL);
+	data |= (0x1 << 15);
+	hw_write_20kx(hw, GPIO_CTRL, data);
 
 	/* Initialize I2C */
-	err = I2CInit(hw, 0x1A, 1, 1);
+	err = hw20k2_i2c_init(hw, 0x1A, 1, 1);
 	if (err < 0) {
 		printk(KERN_ALERT "ctxfi: Failure to acquire I2C!!!\n");
 		goto error;
 	}
 
 	/* Make ADC in normal operation */
-	dwData = hw_read_20kx(hw, GPIO_DATA);
-	dwData &= ~(0x1 << 15);
+	data = hw_read_20kx(hw, GPIO_DATA);
+	data &= ~(0x1 << 15);
 	mdelay(10);
-	dwData |= (0x1 << 15);
-	hw_write_20kx(hw, GPIO_DATA, dwData);
+	data |= (0x1 << 15);
+	hw_write_20kx(hw, GPIO_DATA, data);
 	mdelay(50);
 
 	/* Set the master mode (256fs) */
 	if (1 == info->msr) {
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x02),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x02),
 						MAKE_WM8775_DATA(0x02));
 	} else if (2 == info->msr) {
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x0A),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x0A),
 						MAKE_WM8775_DATA(0x0A));
 	} else {
 		printk(KERN_ALERT "ctxfi: Invalid master sampling "
@@ -1785,35 +1793,35 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 	}
 
 	/* Configure GPIO bit 14 change to line-in/mic-in */
-	dwCtl = hw_read_20kx(hw, GPIO_CTRL);
-	dwCtl |= 0x1<<14;
-	hw_write_20kx(hw, GPIO_CTRL, dwCtl);
+	ctl = hw_read_20kx(hw, GPIO_CTRL);
+	ctl |= 0x1 << 14;
+	hw_write_20kx(hw, GPIO_CTRL, ctl);
 
 	/* Check using Mic-in or Line-in */
-	dwData = hw_read_20kx(hw, GPIO_DATA);
+	data = hw_read_20kx(hw, GPIO_DATA);
 
-	if (dwMux == 1) {
+	if (mux == 1) {
 		/* Configures GPIO data to select Mic-in */
-		dwData |= 0x1<<14;
-		hw_write_20kx(hw, GPIO_DATA, dwData);
+		data |= 0x1 << 14;
+		hw_write_20kx(hw, GPIO_DATA, data);
 
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101),
 				MAKE_WM8775_DATA(0x101)); /* Mic-in */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xE7),
 				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xE7),
 				MAKE_WM8775_DATA(0xE7)); /* +12dB boost */
-	} else if (dwMux == 2) {
+	} else if (mux == 2) {
 		/* Configures GPIO data to select Line-in */
-		dwData &= ~(0x1<<14);
-		hw_write_20kx(hw, GPIO_DATA, dwData);
+		data &= ~(0x1 << 14);
+		hw_write_20kx(hw, GPIO_DATA, data);
 
 		/* Setup ADC */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102),
 				MAKE_WM8775_DATA(0x102)); /* Line-in */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF),
 				MAKE_WM8775_DATA(0xCF)); /* No boost */
-		I2CWrite(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
+		hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF),
 				MAKE_WM8775_DATA(0xCF)); /* No boost */
 	} else {
 		printk(KERN_ALERT "ctxfi: ERROR!!! Invalid input mux!!!\n");
@@ -1824,7 +1832,7 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 	return 0;
 
 error:
-	I2CUninit(hw);
+	hw20k2_i2c_uninit(hw);
 	return err;
 }
 
@@ -2106,15 +2114,15 @@ static struct hw ct20k2_preset __devinitdata = {
 
 int __devinit create_20k2_hw_obj(struct hw **rhw)
 {
-	struct hw *hw;
+	struct hw20k2 *hw20k2;
 
 	*rhw = NULL;
-	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
-	if (NULL == hw)
+	hw20k2 = kzalloc(sizeof(*hw20k2), GFP_KERNEL);
+	if (!hw20k2)
 		return -ENOMEM;
 
-	*hw = ct20k2_preset;
-	*rhw = hw;
+	hw20k2->hw = ct20k2_preset;
+	*rhw = &hw20k2->hw;
 
 	return 0;
 }
-- 
cgit v0.10.2


From e1696834e8a15d7ef9ae8ffdffe00bac1399a2e3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:32:59 +0200
Subject: xfs: update max log size

Commit a6634fba3dec4a92f0a2c4e30c80b634c0576ad5 in xfsprogs increased the
maximum log size supported by mkfs.  Merged back the changes to xfs_fs.h
so the growfs enforced the same limit and the headers are in sync.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f7c06fa..c4ea51b 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks {
  * Minimum and maximum sizes need for growth checks
  */
 #define XFS_MIN_AG_BLOCKS	64
-#define XFS_MIN_LOG_BLOCKS	512
-#define XFS_MAX_LOG_BLOCKS	(64 * 1024)
-#define XFS_MIN_LOG_BYTES	(256 * 1024)
-#define XFS_MAX_LOG_BYTES	(128 * 1024 * 1024)
+#define XFS_MIN_LOG_BLOCKS	512ULL
+#define XFS_MAX_LOG_BLOCKS	(1024 * 1024ULL)
+#define XFS_MIN_LOG_BYTES	(10 * 1024 * 1024ULL)
+
+/* keep the maximum size under 2^31 by a small amount */
+#define XFS_MAX_LOG_BYTES \
+	((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
 
 /*
  * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
-- 
cgit v0.10.2


From 0c5e1ce89f1eacc366ec421c0f5f681159479c28 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:33:21 +0200
Subject: xfs: validate quota log items during log recovery

Arkadiusz has seen really strange crashes in xfs_qm_dqcheck that
I can only explain by a log item being too smal to actually fit the
xfs_dqblk_t we're dereferencing all over xfs_qm_dqcheck.  So add
graceful checks for NULL or too small quota items to the log recovery
code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7ba4501..47da2fb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer(
 		error = 0;
 		if (buf_f->blf_flags &
 		   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+			if (item->ri_buf[i].i_addr == NULL) {
+				cmn_err(CE_ALERT,
+					"XFS: NULL dquot in %s.", __func__);
+				goto next;
+			}
+			if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
+				cmn_err(CE_ALERT,
+					"XFS: dquot too small (%d) in %s.",
+					item->ri_buf[i].i_len, __func__);
+				goto next;
+			}
 			error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
 					       item->ri_buf[i].i_addr,
 					       -1, 0, XFS_QMOPT_DOWARN,
 					       "dquot_buf_recover");
+			if (error)
+				goto next;
 		}
-		if (!error)
-			memcpy(xfs_buf_offset(bp,
-				(uint)bit << XFS_BLI_SHIFT),	/* dest */
-				item->ri_buf[i].i_addr,		/* source */
-				nbits<<XFS_BLI_SHIFT);		/* length */
+
+		memcpy(xfs_buf_offset(bp,
+			(uint)bit << XFS_BLI_SHIFT),	/* dest */
+			item->ri_buf[i].i_addr,		/* source */
+			nbits<<XFS_BLI_SHIFT);		/* length */
+ next:
 		i++;
 		bit += nbits;
 	}
@@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans(
 		return (0);
 
 	recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
-	ASSERT(recddq);
+
+	if (item->ri_buf[1].i_addr == NULL) {
+		cmn_err(CE_ALERT,
+			"XFS: NULL dquot in %s.", __func__);
+		return XFS_ERROR(EIO);
+	}
+	if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
+		cmn_err(CE_ALERT,
+			"XFS: dquot too small (%d) in %s.",
+			item->ri_buf[1].i_len, __func__);
+		return XFS_ERROR(EIO);
+	}
+
 	/*
 	 * This type of quotas was turned off, so ignore this record.
 	 */
-- 
cgit v0.10.2


From 7d095257e321214e4cf359abd131ba1f09c60cba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:33:32 +0200
Subject: xfs: kill xfs_qmops

Kill the quota ops function vector and replace it with direct calls or
stubs in the CONFIG_XFS_QUOTA=n case.

Make sure we check XFS_IS_QUOTA_RUNNING in the right spots.  We can remove
the number of those checks because the XFS_TRANS_DQ_DIRTY flag can't be set
otherwise.

This brings us back closer to the way this code worked in IRIX and earlier
Linux versions, but we keep a lot of the more useful factoring of common
code.

Eventually we should also kill xfs_qm_bhv.c, but that's left for a later
patch.

Reduces the size of the source code by about 250 lines and the size of
XFS module by about 1.5 kilobytes with quotas enabled:

   text	   data	    bss	    dec	    hex	filename
 615957	   2960	   3848	 622765	  980ad	fs/xfs/xfs.o
 617231	   3152	   3848	 624231	  98667	fs/xfs/xfs.o.old

Fallout:

 - xfs_qm_dqattach is split into xfs_qm_dqattach_locked which expects
   the inode locked and xfs_qm_dqattach which does the locking around it,
   thus removing XFS_QMOPT_ILOCKED.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 60f107e..222c59e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,8 +88,7 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_utils.o \
 				   xfs_vnodeops.o \
 				   xfs_rw.o \
-				   xfs_dmops.o \
-				   xfs_qmops.o
+				   xfs_dmops.o
 
 xfs-$(CONFIG_XFS_TRACE)		+= xfs_btree_trace.o \
 				   xfs_dir2_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b4994..c7d684f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -908,7 +908,8 @@ xfs_ioctl_setattr(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	unsigned int		lock_flags = 0;
-	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
+	struct xfs_dquot	*udqp = NULL;
+	struct xfs_dquot	*gdqp = NULL;
 	struct xfs_dquot	*olddquot = NULL;
 	int			code;
 
@@ -928,7 +929,7 @@ xfs_ioctl_setattr(
 	 * because the i_*dquot fields will get updated anyway.
 	 */
 	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-		code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
 					 ip->i_d.di_gid, fa->fsx_projid,
 					 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
 		if (code)
@@ -963,10 +964,11 @@ xfs_ioctl_setattr(
 	 * Do a quota reservation only if projid is actually going to change.
 	 */
 	if (mask & FSX_PROJID) {
-		if (XFS_IS_PQUOTA_ON(mp) &&
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    XFS_IS_PQUOTA_ON(mp) &&
 		    ip->i_d.di_projid != fa->fsx_projid) {
 			ASSERT(tp);
-			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 						capable(CAP_FOWNER) ?
 						XFS_QMOPT_FORCE_RES : 0);
 			if (code)	/* out of quota */
@@ -1068,8 +1070,8 @@ xfs_ioctl_setattr(
 		 * in the transaction.
 		 */
 		if (ip->i_d.di_projid != fa->fsx_projid) {
-			if (XFS_IS_PQUOTA_ON(mp)) {
-				olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+				olddquot = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_projid = fa->fsx_projid;
@@ -1115,9 +1117,9 @@ xfs_ioctl_setattr(
 	/*
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	XFS_QM_DQRELE(mp, olddquot);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(olddquot);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (code)
 		return code;
@@ -1131,8 +1133,8 @@ xfs_ioctl_setattr(
 	return 0;
 
  error_return:
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 	xfs_trans_cancel(tp, 0);
 	if (lock_flags)
 		xfs_iunlock(ip, lock_flags);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bb68526..0d9b64b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -405,6 +405,14 @@ xfs_parseargs(
 		return EINVAL;
 	}
 
+#ifndef CONFIG_XFS_QUOTA
+	if (XFS_IS_QUOTA_RUNNING(mp)) {
+		cmn_err(CE_WARN,
+			"XFS: quota support not available in this kernel.");
+		return EINVAL;
+	}
+#endif
+
 	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
 	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
 		cmn_err(CE_WARN,
@@ -1098,7 +1106,6 @@ xfs_fs_put_super(
 	xfs_freesb(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
-	xfs_qmops_put(mp);
 	xfs_dmops_put(mp);
 	xfs_free_fsname(mp);
 	kfree(mp);
@@ -1168,6 +1175,7 @@ xfs_fs_statfs(
 {
 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
 	xfs_sb_t		*sbp = &mp->m_sb;
+	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
 	__uint64_t		fakeinos, id;
 	xfs_extlen_t		lsize;
 
@@ -1196,7 +1204,10 @@ xfs_fs_statfs(
 	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
 	spin_unlock(&mp->m_sb_lock);
 
-	XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+		xfs_qm_statvfs(ip, statp);
 	return 0;
 }
 
@@ -1404,16 +1415,13 @@ xfs_fs_fill_super(
 	error = xfs_dmops_get(mp);
 	if (error)
 		goto out_free_fsname;
-	error = xfs_qmops_get(mp);
-	if (error)
-		goto out_put_dmops;
 
 	if (silent)
 		flags |= XFS_MFSI_QUIET;
 
 	error = xfs_open_devices(mp);
 	if (error)
-		goto out_put_qmops;
+		goto out_put_dmops;
 
 	if (xfs_icsb_init_counters(mp))
 		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1482,8 +1490,6 @@ xfs_fs_fill_super(
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
- out_put_qmops:
-	xfs_qmops_put(mp);
  out_put_dmops:
 	xfs_dmops_put(mp);
  out_free_fsname:
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index f7ba766..b06b95c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,6 +43,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_rw.h"
+#include "xfs_quota.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -317,12 +318,12 @@ xfs_quiesce_data(
 
 	/* push non-blocking */
 	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
-	XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+	xfs_qm_sync(mp, SYNC_BDFLUSH);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
 	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
-	XFS_QM_DQSYNC(mp, SYNC_WAIT);
+	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
 	error = xfs_sync_fsdata(mp, 0);
@@ -467,7 +468,7 @@ xfs_sync_worker(
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
 		xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
-		error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+		error = xfs_qm_sync(mp, SYNC_BDFLUSH);
 		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
 		if (xfs_log_need_covered(mp))
 			error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e4babcc..4d6d051 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1194,7 +1194,9 @@ void
 xfs_qm_dqrele(
 	xfs_dquot_t	*dqp)
 {
-	ASSERT(dqp);
+	if (!dqp)
+		return;
+
 	xfs_dqtrace_entry(dqp, "DQRELE");
 
 	xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index de0f402..6533ead 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -181,7 +181,6 @@ extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 					xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void		xfs_qm_dqput(xfs_dquot_t *);
-extern void		xfs_qm_dqrele(xfs_dquot_t *);
 extern void		xfs_dqlock(xfs_dquot_t *);
 extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
 extern void		xfs_dqunlock(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5b66950..aa5d821 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -287,11 +287,13 @@ xfs_qm_rele_quotafs_ref(
  * Just destroy the quotainfo structure.
  */
 void
-xfs_qm_unmount_quotadestroy(
-	xfs_mount_t	*mp)
+xfs_qm_unmount(
+	struct xfs_mount	*mp)
 {
-	if (mp->m_quotainfo)
+	if (mp->m_quotainfo) {
+		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 		xfs_qm_destroy_quotainfo(mp);
+	}
 }
 
 
@@ -385,8 +387,13 @@ xfs_qm_mount_quotas(
 	if (error) {
 		xfs_fs_cmn_err(CE_WARN, mp,
 			"Failed to initialize disk quotas.");
+		return;
 	}
-	return;
+
+#ifdef QUOTADEBUG
+	if (XFS_IS_QUOTA_ON(mp))
+		xfs_qm_internalqcheck(mp);
+#endif
 }
 
 /*
@@ -774,12 +781,11 @@ xfs_qm_dqattach_grouphint(
  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
  * into account.
  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
  * Inode may get unlocked and relocked in here, and the caller must deal with
  * the consequences.
  */
 int
-xfs_qm_dqattach(
+xfs_qm_dqattach_locked(
 	xfs_inode_t	*ip,
 	uint		flags)
 {
@@ -787,17 +793,14 @@ xfs_qm_dqattach(
 	uint		nquotas = 0;
 	int		error = 0;
 
-	if ((! XFS_IS_QUOTA_ON(mp)) ||
-	    (! XFS_NOT_DQATTACHED(mp, ip)) ||
-	    (ip->i_ino == mp->m_sb.sb_uquotino) ||
-	    (ip->i_ino == mp->m_sb.sb_gquotino))
+	if (!XFS_IS_QUOTA_RUNNING(mp) ||
+	    !XFS_IS_QUOTA_ON(mp) ||
+	    !XFS_NOT_DQATTACHED(mp, ip) ||
+	    ip->i_ino == mp->m_sb.sb_uquotino ||
+	    ip->i_ino == mp->m_sb.sb_gquotino)
 		return 0;
 
-	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
-	       xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-	if (! (flags & XFS_QMOPT_ILOCKED))
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
 	if (XFS_IS_UQUOTA_ON(mp)) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -849,8 +852,7 @@ xfs_qm_dqattach(
 		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
 	}
 
-      done:
-
+ done:
 #ifdef QUOTADEBUG
 	if (! error) {
 		if (XFS_IS_UQUOTA_ON(mp))
@@ -858,15 +860,22 @@ xfs_qm_dqattach(
 		if (XFS_IS_OQUOTA_ON(mp))
 			ASSERT(ip->i_gdquot);
 	}
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 #endif
+	return error;
+}
 
-	if (! (flags & XFS_QMOPT_ILOCKED))
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+int
+xfs_qm_dqattach(
+	struct xfs_inode	*ip,
+	uint			flags)
+{
+	int			error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	error = xfs_qm_dqattach_locked(ip, flags);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
-#ifdef QUOTADEBUG
-	else
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
 	return error;
 }
 
@@ -912,7 +921,7 @@ xfs_qm_sync(
 	boolean_t	nowait;
 	int		error;
 
-	if (! XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	restarts = 0;
@@ -2319,20 +2328,20 @@ xfs_qm_write_sb_changes(
  */
 int
 xfs_qm_vop_dqalloc(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,
-	uid_t		uid,
-	gid_t		gid,
-	prid_t		prid,
-	uint		flags,
-	xfs_dquot_t	**O_udqpp,
-	xfs_dquot_t	**O_gdqpp)
+	struct xfs_inode	*ip,
+	uid_t			uid,
+	gid_t			gid,
+	prid_t			prid,
+	uint			flags,
+	struct xfs_dquot	**O_udqpp,
+	struct xfs_dquot	**O_gdqpp)
 {
-	int		error;
-	xfs_dquot_t	*uq, *gq;
-	uint		lockflags;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_dquot	*uq, *gq;
+	int			error;
+	uint			lockflags;
 
-	if (!XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	lockflags = XFS_ILOCK_EXCL;
@@ -2346,8 +2355,8 @@ xfs_qm_vop_dqalloc(
 	 * if necessary. The dquot(s) will not be locked.
 	 */
 	if (XFS_NOT_DQATTACHED(mp, ip)) {
-		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
-					    XFS_QMOPT_ILOCKED))) {
+		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+		if (error) {
 			xfs_iunlock(ip, lockflags);
 			return error;
 		}
@@ -2469,6 +2478,7 @@ xfs_qm_vop_chown(
 	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
 				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
 
+
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 
@@ -2508,13 +2518,13 @@ xfs_qm_vop_chown_reserve(
 	xfs_dquot_t	*gdqp,
 	uint		flags)
 {
-	int		error;
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = ip->i_mount;
 	uint		delblks, blkflags, prjflags = 0;
 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
+	int		error;
+
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-	mp = ip->i_mount;
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
 	delblks = ip->i_delayed_blks;
@@ -2582,28 +2592,23 @@ xfs_qm_vop_chown_reserve(
 
 int
 xfs_qm_vop_rename_dqattach(
-	xfs_inode_t	**i_tab)
+	struct xfs_inode	**i_tab)
 {
-	xfs_inode_t	*ip;
-	int		i;
-	int		error;
-
-	ip = i_tab[0];
+	struct xfs_mount	*mp = i_tab[0]->i_mount;
+	int			i;
 
-	if (! XFS_IS_QUOTA_ON(ip->i_mount))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
-	if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
-		error = xfs_qm_dqattach(ip, 0);
-		if (error)
-			return error;
-	}
-	for (i = 1; (i < 4 && i_tab[i]); i++) {
+	for (i = 0; (i < 4 && i_tab[i]); i++) {
+		struct xfs_inode	*ip = i_tab[i];
+		int			error;
+
 		/*
 		 * Watch out for duplicate entries in the table.
 		 */
-		if ((ip = i_tab[i]) != i_tab[i-1]) {
-			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+		if (i == 0 || ip != i_tab[i-1]) {
+			if (XFS_NOT_DQATTACHED(mp, ip)) {
 				error = xfs_qm_dqattach(ip, 0);
 				if (error)
 					return error;
@@ -2614,17 +2619,19 @@ xfs_qm_vop_rename_dqattach(
 }
 
 void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp)
+xfs_qm_vop_create_dqattach(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	struct xfs_dquot	*udqp,
+	struct xfs_dquot	*gdqp)
 {
-	if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+	struct xfs_mount	*mp = tp->t_mountp;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
 	if (udqp) {
 		xfs_dqlock(udqp);
@@ -2632,7 +2639,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(udqp);
 		ASSERT(ip->i_udquot == NULL);
 		ip->i_udquot = udqp;
-		ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
+		ASSERT(XFS_IS_UQUOTA_ON(mp));
 		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
@@ -2642,8 +2649,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(gdqp);
 		ASSERT(ip->i_gdquot == NULL);
 		ip->i_gdquot = gdqp;
-		ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
-		ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+		ASSERT(XFS_IS_OQUOTA_ON(mp));
+		ASSERT((XFS_IS_GQUOTA_ON(mp) ?
 			ip->i_d.di_gid : ip->i_d.di_projid) ==
 				be32_to_cpu(gdqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index a371954..495564b 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -127,8 +127,6 @@ typedef struct xfs_quotainfo {
 } xfs_quotainfo_t;
 
 
-extern xfs_dqtrxops_t	xfs_trans_dquot_ops;
-
 extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
 extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
 			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_RTBWARNLIMIT	5
 
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void		xfs_qm_mount_quotas(xfs_mount_t *);
 extern int		xfs_qm_quotacheck(xfs_mount_t *);
-extern void		xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern void		xfs_qm_unmount_quotas(xfs_mount_t *);
 extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int		xfs_qm_sync(xfs_mount_t *, int);
 
 /* dquot stuff */
 extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int		xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void		xfs_qm_dqdetach(xfs_inode_t *);
 extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
@@ -183,19 +175,6 @@ extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
 
-/* vop stuff */
-extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
-					uid_t, gid_t, prid_t, uint,
-					xfs_dquot_t **, xfs_dquot_t **);
-extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
-					xfs_trans_t *, xfs_inode_t *,
-					xfs_dquot_t *, xfs_dquot_t *);
-extern int		xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t *	xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
-					xfs_dquot_t **, xfs_dquot_t *);
-extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
-					xfs_dquot_t *, xfs_dquot_t *, uint);
-
 /* list stuff */
 extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 63037c6..56a5965 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -84,7 +84,7 @@ xfs_fill_statvfs_from_dquot(
  * return a statvfs of the project, not the entire filesystem.
  * This makes such trees appear as if they are filesystems in themselves.
  */
-STATIC void
+void
 xfs_qm_statvfs(
 	xfs_inode_t		*ip,
 	struct kstatfs		*statp)
@@ -92,20 +92,13 @@ xfs_qm_statvfs(
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_dquot_t		*dqp;
 
-	if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-	    !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-	    		      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-		return;
-
 	if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
-		xfs_disk_dquot_t	*dp = &dqp->q_core;
-
-		xfs_fill_statvfs_from_dquot(statp, dp);
+		xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
 		xfs_qm_dqput(dqp);
 	}
 }
 
-STATIC int
+int
 xfs_qm_newmount(
 	xfs_mount_t	*mp,
 	uint		*needquotamount,
@@ -114,9 +107,6 @@ xfs_qm_newmount(
 	uint		quotaondisk;
 	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
 
-	*quotaflags = 0;
-	*needquotamount = B_FALSE;
-
 	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
 				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
@@ -179,66 +169,6 @@ xfs_qm_newmount(
 	return 0;
 }
 
-STATIC int
-xfs_qm_endmount(
-	xfs_mount_t	*mp,
-	uint		needquotamount,
-	uint		quotaflags)
-{
-	if (needquotamount) {
-		ASSERT(mp->m_qflags == 0);
-		mp->m_qflags = quotaflags;
-		xfs_qm_mount_quotas(mp);
-	}
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-	if (! (XFS_IS_QUOTA_ON(mp)))
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
-	else
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
-	if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
-		cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-
-	return 0;
-}
-
-STATIC void
-xfs_qm_dqrele_null(
-	xfs_dquot_t	*dq)
-{
-	/*
-	 * Called from XFS, where we always check first for a NULL dquot.
-	 */
-	if (!dq)
-		return;
-	xfs_qm_dqrele(dq);
-}
-
-
-struct xfs_qmops xfs_qmcore_xfs = {
-	.xfs_qminit		= xfs_qm_newmount,
-	.xfs_qmdone		= xfs_qm_unmount_quotadestroy,
-	.xfs_qmmount		= xfs_qm_endmount,
-	.xfs_qmunmount		= xfs_qm_unmount_quotas,
-	.xfs_dqrele		= xfs_qm_dqrele_null,
-	.xfs_dqattach		= xfs_qm_dqattach,
-	.xfs_dqdetach		= xfs_qm_dqdetach,
-	.xfs_dqpurgeall		= xfs_qm_dqpurge_all,
-	.xfs_dqvopalloc		= xfs_qm_vop_dqalloc,
-	.xfs_dqvopcreate	= xfs_qm_vop_dqattach_and_dqmod_newinode,
-	.xfs_dqvoprename	= xfs_qm_vop_rename_dqattach,
-	.xfs_dqvopchown		= xfs_qm_vop_chown,
-	.xfs_dqvopchownresv	= xfs_qm_vop_chown_reserve,
-	.xfs_dqstatvfs		= xfs_qm_statvfs,
-	.xfs_dqsync		= xfs_qm_sync,
-	.xfs_dqtrxops		= &xfs_trans_dquot_ops,
-};
-EXPORT_SYMBOL(xfs_qmcore_xfs);
-
 void __init
 xfs_qm_init(void)
 {
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 447173b..eafa7ab 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -111,7 +111,7 @@ xfs_trans_log_dquot(
  * Carry forward whatever is left of the quota blk reservation to
  * the spanky new transaction
  */
-STATIC void
+void
 xfs_trans_dup_dqinfo(
 	xfs_trans_t	*otp,
 	xfs_trans_t	*ntp)
@@ -167,19 +167,17 @@ xfs_trans_dup_dqinfo(
 /*
  * Wrap around mod_dquot to account for both user and group quotas.
  */
-STATIC void
+void
 xfs_trans_mod_dquot_byino(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
 	uint		field,
 	long		delta)
 {
-	xfs_mount_t	*mp;
-
-	ASSERT(tp);
-	mp = tp->t_mountp;
+	xfs_mount_t	*mp = tp->t_mountp;
 
-	if (!XFS_IS_QUOTA_ON(mp) ||
+	if (!XFS_IS_QUOTA_RUNNING(mp) ||
+	    !XFS_IS_QUOTA_ON(mp) ||
 	    ip->i_ino == mp->m_sb.sb_uquotino ||
 	    ip->i_ino == mp->m_sb.sb_gquotino)
 		return;
@@ -229,6 +227,7 @@ xfs_trans_mod_dquot(
 	xfs_dqtrx_t	*qtrx;
 
 	ASSERT(tp);
+	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
 	qtrx = NULL;
 
 	if (tp->t_dqinfo == NULL)
@@ -346,7 +345,7 @@ xfs_trans_dqlockedjoin(
  * Unreserve just the reservations done by this transaction.
  * dquot is still left locked at exit.
  */
-STATIC void
+void
 xfs_trans_apply_dquot_deltas(
 	xfs_trans_t		*tp)
 {
@@ -357,7 +356,7 @@ xfs_trans_apply_dquot_deltas(
 	long			totalbdelta;
 	long			totalrtbdelta;
 
-	if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
 		return;
 
 	ASSERT(tp->t_dqinfo);
@@ -531,7 +530,7 @@ xfs_trans_apply_dquot_deltas(
  * we simply throw those away, since that's the expected behavior
  * when a transaction is curtailed without a commit.
  */
-STATIC void
+void
 xfs_trans_unreserve_and_mod_dquots(
 	xfs_trans_t		*tp)
 {
@@ -768,7 +767,7 @@ xfs_trans_reserve_quota_bydquots(
 {
 	int		resvd = 0, error;
 
-	if (!XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	if (tp && tp->t_dqinfo == NULL)
@@ -811,18 +810,17 @@ xfs_trans_reserve_quota_bydquots(
  * This doesn't change the actual usage, just the reservation.
  * The inode sent in is locked.
  */
-STATIC int
+int
 xfs_trans_reserve_quota_nblks(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,
-	long		nblks,
-	long		ninos,
-	uint		flags)
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	long			nblks,
+	long			ninos,
+	uint			flags)
 {
-	int		error;
+	struct xfs_mount	*mp = ip->i_mount;
 
-	if (!XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 	if (XFS_IS_PQUOTA_ON(mp))
 		flags |= XFS_QMOPT_ENOSPC;
@@ -831,7 +829,6 @@ xfs_trans_reserve_quota_nblks(
 	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
 				XFS_TRANS_DQ_RES_RTBLKS ||
 	       (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -840,11 +837,9 @@ xfs_trans_reserve_quota_nblks(
 	/*
 	 * Reserve nblks against these dquots, with trans as the mediator.
 	 */
-	error = xfs_trans_reserve_quota_bydquots(tp, mp,
-						 ip->i_udquot, ip->i_gdquot,
-						 nblks, ninos,
-						 flags);
-	return error;
+	return xfs_trans_reserve_quota_bydquots(tp, mp,
+						ip->i_udquot, ip->i_gdquot,
+						nblks, ninos, flags);
 }
 
 /*
@@ -895,25 +890,15 @@ STATIC void
 xfs_trans_alloc_dqinfo(
 	xfs_trans_t	*tp)
 {
-	(tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+	tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
 }
 
-STATIC void
+void
 xfs_trans_free_dqinfo(
 	xfs_trans_t	*tp)
 {
 	if (!tp->t_dqinfo)
 		return;
-	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
-	(tp)->t_dqinfo = NULL;
+	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+	tp->t_dqinfo = NULL;
 }
-
-xfs_dqtrxops_t	xfs_trans_dquot_ops = {
-	.qo_dup_dqinfo			= xfs_trans_dup_dqinfo,
-	.qo_free_dqinfo			= xfs_trans_free_dqinfo,
-	.qo_mod_dquot_byino		= xfs_trans_mod_dquot_byino,
-	.qo_apply_dquot_deltas		= xfs_trans_apply_dquot_deltas,
-	.qo_reserve_quota_nblks		= xfs_trans_reserve_quota_nblks,
-	.qo_reserve_quota_bydquots	= xfs_trans_reserve_quota_bydquots,
-	.qo_unreserve_and_mod_dquots	= xfs_trans_unreserve_and_mod_dquots,
-};
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5fde165..cd1008b 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -249,8 +249,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
-		return (error);
+	error = xfs_qm_dqattach(dp, 0);
+	if (error)
+		return error;
 
 	/*
 	 * If the inode doesn't have an attribute fork, add one.
@@ -311,7 +312,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	}
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 
-	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
 				       XFS_QMOPT_RES_REGBLKS);
 	if (error) {
@@ -501,8 +502,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
-		return (error);
+	error = xfs_qm_dqattach(dp, 0);
+	if (error)
+		return error;
 
 	/*
 	 * Start our first transaction of the day.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ca7c600..4b0f6ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
 					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
 	} else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
 					XFS_TRANS_DQ_BCOUNT,
 			(long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
 		return error;
 	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
 	ip->i_d.di_nblocks--;
-	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
 	if (cur->bc_bufs[0] == cbp)
 		cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
 	 * Adjust quota data.
 	 */
 	if (qfield)
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
 
 	/*
 	 * Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
-	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 	/*
 	 * Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
 		XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 		ip->i_d.di_nblocks = 1;
-		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+		xfs_trans_mod_dquot_byino(tp, ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 		flags |= xfs_ilog_fext(whichfork);
 	} else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
 			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
 		goto error0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
 			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
 			XFS_QMOPT_RES_REGBLKS);
 	if (error) {
@@ -4983,10 +4983,11 @@ xfs_bmapi(
 				 * adjusted later.  We return if we haven't
 				 * allocated blocks already inside this loop.
 				 */
-				if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
-						mp, NULL, ip, (long)alen, 0,
+				error = xfs_trans_reserve_quota_nblks(
+						NULL, ip, (long)alen, 0,
 						rt ? XFS_QMOPT_RES_RTBLKS :
-						     XFS_QMOPT_RES_REGBLKS))) {
+						     XFS_QMOPT_RES_REGBLKS);
+				if (error) {
 					if (n == 0) {
 						*nmap = 0;
 						ASSERT(cur == NULL);
@@ -5035,8 +5036,8 @@ xfs_bmapi(
 					if (XFS_IS_QUOTA_ON(mp))
 						/* unreserve the blocks now */
 						(void)
-						XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
-							mp, NULL, ip,
+						xfs_trans_unreserve_quota_nblks(
+							NULL, ip,
 							(long)alen, 0, rt ?
 							XFS_QMOPT_RES_RTBLKS :
 							XFS_QMOPT_RES_REGBLKS);
@@ -5691,14 +5692,14 @@ xfs_bunmapi(
 				do_div(rtexts, mp->m_sb.sb_rextsize);
 				xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
 						(int64_t)rtexts, rsvd);
-				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-					NULL, ip, -((long)del.br_blockcount), 0,
+				(void)xfs_trans_reserve_quota_nblks(NULL,
+					ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_RTBLKS);
 			} else {
 				xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
 						(int64_t)del.br_blockcount, rsvd);
-				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-					NULL, ip, -((long)del.br_blockcount), 0,
+				(void)xfs_trans_reserve_quota_nblks(NULL,
+					ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_REGBLKS);
 			}
 			ip->i_delayed_blks -= del.br_blockcount;
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0760d35..5c1ade0 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
 	cur->bc_private.b.allocated++;
 	cur->bc_private.b.ip->i_d.di_nblocks++;
 	xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-	XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+	xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 
 	new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, bp);
 	return 0;
 }
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 89b81ee..73e1c0d 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -500,10 +500,7 @@ xfs_ireclaim(
 	 * ilock one but will still hold the iolock.
 	 */
 	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	/*
-	 * Release dquots (and their references) if any.
-	 */
-	XFS_QM_DQDETACH(ip->i_mount, ip);
+	xfs_qm_dqdetach(ip);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
 	switch (ip->i_d.di_mode & S_IFMT) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5aaa2d7..feb30a9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -385,7 +385,7 @@ xfs_iomap_write_direct(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+	error = xfs_qm_dqattach_locked(ip, 0);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -444,8 +444,7 @@ xfs_iomap_write_direct(
 	if (error)
 		goto error_out;
 
-	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-					      qblocks, 0, quota_flag);
+	error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 	if (error)
 		goto error1;
 
@@ -495,7 +494,7 @@ xfs_iomap_write_direct(
 
 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
-	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -582,7 +581,7 @@ xfs_iomap_write_delay(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+	error = xfs_qm_dqattach_locked(ip, 0);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -684,7 +683,8 @@ xfs_iomap_write_allocate(
 	/*
 	 * Make sure that the dquots are there.
 	 */
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return XFS_ERROR(error);
 
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 65a9972..b659db5 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp)
 }
 
 /*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(
+	struct xfs_mount	*mp)
+{
+	int			error;
+	struct xfs_trans	*tp;
+
+	mp->m_qflags = 0;
+
+	/*
+	 * It is OK to look at sb_qflags here in mount path,
+	 * without m_sb_lock.
+	 */
+	if (mp->m_sb.sb_qflags == 0)
+		return 0;
+	spin_lock(&mp->m_sb_lock);
+	mp->m_sb.sb_qflags = 0;
+	spin_unlock(&mp->m_sb_lock);
+
+	/*
+	 * If the fs is readonly, let the incore superblock run
+	 * with quotas off but don't flush the update out to disk
+	 */
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return 0;
+
+#ifdef QUOTADEBUG
+	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+				      XFS_DEFAULT_LOG_COUNT);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		xfs_fs_cmn_err(CE_ALERT, mp,
+			"xfs_mount_reset_sbqflags: Superblock update failed!");
+		return error;
+	}
+
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+	return xfs_trans_commit(tp, 0);
+}
+
+/*
  * This function does the following on an initial mount of a file system:
  *	- reads the superblock from disk and init the mount struct
  *	- if we're a 32-bit kernel, do a size check on the superblock
@@ -976,7 +1023,8 @@ xfs_mountfs(
 	xfs_sb_t	*sbp = &(mp->m_sb);
 	xfs_inode_t	*rip;
 	__uint64_t	resblks;
-	uint		quotamount, quotaflags;
+	uint		quotamount = 0;
+	uint		quotaflags = 0;
 	int		error = 0;
 
 	xfs_mount_common(mp, sbp);
@@ -1210,9 +1258,28 @@ xfs_mountfs(
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
 	 */
-	error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
-	if (error)
-		goto out_rtunmount;
+	if (XFS_IS_QUOTA_RUNNING(mp)) {
+		error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
+		if (error)
+			goto out_rtunmount;
+	} else {
+		ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+		/*
+		 * If a file system had quotas running earlier, but decided to
+		 * mount without -o uquota/pquota/gquota options, revoke the
+		 * quotachecked license.
+		 */
+		if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+			cmn_err(CE_NOTE,
+				"XFS: resetting qflags for filesystem %s",
+				mp->m_fsname);
+
+			error = xfs_mount_reset_sbqflags(mp);
+			if (error)
+				return error;
+		}
+	}
 
 	/*
 	 * Finish recovering the file system.  This part needed to be
@@ -1228,9 +1295,19 @@ xfs_mountfs(
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
-	error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
-	if (error)
-		goto out_rtunmount;
+	if (quotamount) {
+		ASSERT(mp->m_qflags == 0);
+		mp->m_qflags = quotaflags;
+
+		xfs_qm_mount_quotas(mp);
+	}
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+	if (XFS_IS_QUOTA_ON(mp))
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+	else
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+#endif
 
 	/*
 	 * Now we are mounted, reserve a small amount of unused space for
@@ -1279,12 +1356,7 @@ xfs_unmountfs(
 	__uint64_t		resblks;
 	int			error;
 
-	/*
-	 * Release dquot that rootinode, rbmino and rsumino might be holding,
-	 * and release the quota inodes.
-	 */
-	XFS_QM_UNMOUNT(mp);
-
+	xfs_qm_unmount_quotas(mp);
 	xfs_rtunmount_inodes(mp);
 	IRELE(mp->m_rootip);
 
@@ -1301,10 +1373,7 @@ xfs_unmountfs(
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
 	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
 
-	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
-
-	if (mp->m_quotainfo)
-		XFS_QM_DONE(mp);
+	xfs_qm_unmount(mp);
 
 	/*
 	 * Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d6a6439..a512238 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -64,6 +64,8 @@ struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
+struct xfs_quotainfo;
+
 
 /*
  * Prototypes and functions for the Data Migration subsystem.
@@ -107,86 +109,6 @@ typedef struct xfs_dmops {
 	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
 
 
-/*
- * Prototypes and functions for the Quota Management subsystem.
- */
-
-struct xfs_dquot;
-struct xfs_dqtrxops;
-struct xfs_quotainfo;
-
-typedef int	(*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int	(*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
-typedef void	(*xfs_qmunmount_t)(struct xfs_mount *);
-typedef void	(*xfs_qmdone_t)(struct xfs_mount *);
-typedef void	(*xfs_dqrele_t)(struct xfs_dquot *);
-typedef int	(*xfs_dqattach_t)(struct xfs_inode *, uint);
-typedef void	(*xfs_dqdetach_t)(struct xfs_inode *);
-typedef int	(*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
-typedef int	(*xfs_dqvopalloc_t)(struct xfs_mount *,
-			struct xfs_inode *, uid_t, gid_t, prid_t, uint,
-			struct xfs_dquot **, struct xfs_dquot **);
-typedef void	(*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
-			struct xfs_dquot *, struct xfs_dquot *);
-typedef int	(*xfs_dqvoprename_t)(struct xfs_inode **);
-typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
-			struct xfs_trans *, struct xfs_inode *,
-			struct xfs_dquot **, struct xfs_dquot *);
-typedef int	(*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
-			struct xfs_dquot *, struct xfs_dquot *, uint);
-typedef void	(*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
-typedef int	(*xfs_dqsync_t)(struct xfs_mount *, int flags);
-
-typedef struct xfs_qmops {
-	xfs_qminit_t		xfs_qminit;
-	xfs_qmdone_t		xfs_qmdone;
-	xfs_qmmount_t		xfs_qmmount;
-	xfs_qmunmount_t		xfs_qmunmount;
-	xfs_dqrele_t		xfs_dqrele;
-	xfs_dqattach_t		xfs_dqattach;
-	xfs_dqdetach_t		xfs_dqdetach;
-	xfs_dqpurgeall_t	xfs_dqpurgeall;
-	xfs_dqvopalloc_t	xfs_dqvopalloc;
-	xfs_dqvopcreate_t	xfs_dqvopcreate;
-	xfs_dqvoprename_t	xfs_dqvoprename;
-	xfs_dqvopchown_t	xfs_dqvopchown;
-	xfs_dqvopchownresv_t	xfs_dqvopchownresv;
-	xfs_dqstatvfs_t		xfs_dqstatvfs;
-	xfs_dqsync_t		xfs_dqsync;
-	struct xfs_dqtrxops	*xfs_dqtrxops;
-} xfs_qmops_t;
-
-#define XFS_QM_INIT(mp, mnt, fl) \
-	(*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl) \
-	(*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
-#define XFS_QM_UNMOUNT(mp) \
-	(*(mp)->m_qm_ops->xfs_qmunmount)(mp)
-#define XFS_QM_DONE(mp) \
-	(*(mp)->m_qm_ops->xfs_qmdone)(mp)
-#define XFS_QM_DQRELE(mp, dq) \
-	(*(mp)->m_qm_ops->xfs_dqrele)(dq)
-#define XFS_QM_DQATTACH(mp, ip, fl) \
-	(*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
-#define XFS_QM_DQDETACH(mp, ip) \
-	(*(mp)->m_qm_ops->xfs_dqdetach)(ip)
-#define XFS_QM_DQPURGEALL(mp, fl) \
-	(*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
-	(*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
-#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
-	(*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
-#define XFS_QM_DQVOPRENAME(mp, ip) \
-	(*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
-#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
-	(*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
-#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
-	(*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
-#define XFS_QM_DQSTATVFS(ip, statp) \
-	(*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
-#define XFS_QM_DQSYNC(mp, flags) \
-	(*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
-
 #ifdef HAVE_PERCPU_SB
 
 /*
@@ -510,8 +432,6 @@ extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 
 extern int	xfs_dmops_get(struct xfs_mount *);
 extern void	xfs_dmops_put(struct xfs_mount *);
-extern int	xfs_qmops_get(struct xfs_mount *);
-extern void	xfs_qmops_put(struct xfs_mount *);
 
 extern struct xfs_dmops xfs_dmcore_xfs;
 
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
deleted file mode 100644
index e101790..0000000
--- a/fs/xfs/xfs_qmops.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_error.h"
-
-
-STATIC struct xfs_dquot *
-xfs_dqvopchown_default(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*ip,
-	struct xfs_dquot	**dqp,
-	struct xfs_dquot	*dq)
-{
-	return NULL;
-}
-
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(xfs_mount_t *mp)
-{
-	int			error;
-	xfs_trans_t		*tp;
-
-	mp->m_qflags = 0;
-	/*
-	 * It is OK to look at sb_qflags here in mount path,
-	 * without m_sb_lock.
-	 */
-	if (mp->m_sb.sb_qflags == 0)
-		return 0;
-	spin_lock(&mp->m_sb_lock);
-	mp->m_sb.sb_qflags = 0;
-	spin_unlock(&mp->m_sb_lock);
-
-	/*
-	 * if the fs is readonly, let the incore superblock run
-	 * with quotas off but don't flush the update out to disk
-	 */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
-#ifdef QUOTADEBUG
-	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-				      XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		xfs_fs_cmn_err(CE_ALERT, mp,
-			"xfs_mount_reset_sbqflags: Superblock update failed!");
-		return error;
-	}
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-	error = xfs_trans_commit(tp, 0);
-	return error;
-}
-
-STATIC int
-xfs_noquota_init(
-	xfs_mount_t	*mp,
-	uint		*needquotamount,
-	uint		*quotaflags)
-{
-	int		error = 0;
-
-	*quotaflags = 0;
-	*needquotamount = B_FALSE;
-
-	ASSERT(!XFS_IS_QUOTA_ON(mp));
-
-	/*
-	 * If a file system had quotas running earlier, but decided to
-	 * mount without -o uquota/pquota/gquota options, revoke the
-	 * quotachecked license.
-	 */
-	if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
-		cmn_err(CE_NOTE,
-                        "XFS resetting qflags for filesystem %s",
-                        mp->m_fsname);
-
-		error = xfs_mount_reset_sbqflags(mp);
-	}
-	return error;
-}
-
-static struct xfs_qmops xfs_qmcore_stub = {
-	.xfs_qminit		= (xfs_qminit_t) xfs_noquota_init,
-	.xfs_qmdone		= (xfs_qmdone_t) fs_noerr,
-	.xfs_qmmount		= (xfs_qmmount_t) fs_noerr,
-	.xfs_qmunmount		= (xfs_qmunmount_t) fs_noerr,
-	.xfs_dqrele		= (xfs_dqrele_t) fs_noerr,
-	.xfs_dqattach		= (xfs_dqattach_t) fs_noerr,
-	.xfs_dqdetach		= (xfs_dqdetach_t) fs_noerr,
-	.xfs_dqpurgeall		= (xfs_dqpurgeall_t) fs_noerr,
-	.xfs_dqvopalloc		= (xfs_dqvopalloc_t) fs_noerr,
-	.xfs_dqvopcreate	= (xfs_dqvopcreate_t) fs_noerr,
-	.xfs_dqvoprename	= (xfs_dqvoprename_t) fs_noerr,
-	.xfs_dqvopchown		= xfs_dqvopchown_default,
-	.xfs_dqvopchownresv	= (xfs_dqvopchownresv_t) fs_noerr,
-	.xfs_dqstatvfs		= (xfs_dqstatvfs_t) fs_noval,
-	.xfs_dqsync		= (xfs_dqsync_t) fs_noerr,
-};
-
-int
-xfs_qmops_get(struct xfs_mount *mp)
-{
-	if (XFS_IS_QUOTA_RUNNING(mp)) {
-#ifdef CONFIG_XFS_QUOTA
-		mp->m_qm_ops = &xfs_qmcore_xfs;
-#else
-		cmn_err(CE_WARN,
-			"XFS: qouta support not available in this kernel.");
-		return EINVAL;
-#endif
-	} else {
-		mp->m_qm_ops = &xfs_qmcore_stub;
-	}
-
-	return 0;
-}
-
-void
-xfs_qmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index f5d1202..079d2e6 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_UMOUNTING	0x0000100 /* filesys is being unmounted */
 #define XFS_QMOPT_DOLOG		0x0000200 /* log buf changes (in quotacheck) */
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_ILOCKED	0x0000800 /* inode is already locked (excl) */
 #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
@@ -302,69 +301,72 @@ typedef struct xfs_dqtrx {
 	long		qt_delrtb_delta;  /* delayed RT blk count changes */
 } xfs_dqtrx_t;
 
-/*
- * Dquot transaction functions, used if quota is enabled.
- */
-typedef void	(*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
-typedef void	(*qo_mod_dquot_byino_t)(struct xfs_trans *,
-				struct xfs_inode *, uint, long);
-typedef void	(*qo_free_dqinfo_t)(struct xfs_trans *);
-typedef void	(*qo_apply_dquot_deltas_t)(struct xfs_trans *);
-typedef void	(*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
-typedef int	(*qo_reserve_quota_nblks_t)(
-				struct xfs_trans *, struct xfs_mount *,
-				struct xfs_inode *, long, long, uint);
-typedef int	(*qo_reserve_quota_bydquots_t)(
-				struct xfs_trans *, struct xfs_mount *,
-				struct xfs_dquot *, struct xfs_dquot *,
-				long, long, uint);
-typedef struct xfs_dqtrxops {
-	qo_dup_dqinfo_t			qo_dup_dqinfo;
-	qo_free_dqinfo_t		qo_free_dqinfo;
-	qo_mod_dquot_byino_t		qo_mod_dquot_byino;
-	qo_apply_dquot_deltas_t		qo_apply_dquot_deltas;
-	qo_reserve_quota_nblks_t	qo_reserve_quota_nblks;
-	qo_reserve_quota_bydquots_t	qo_reserve_quota_bydquots;
-	qo_unreserve_and_mod_dquots_t	qo_unreserve_and_mod_dquots;
-} xfs_dqtrxops_t;
-
-#define XFS_DQTRXOP(mp, tp, op, args...) \
-		((mp)->m_qm_ops->xfs_dqtrxops ? \
-		((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
-
-#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
-		((mp)->m_qm_ops->xfs_dqtrxops ? \
-		((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
-
-#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
-	XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
-#define XFS_TRANS_FREE_DQINFO(mp, tp) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
-#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
-#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
-#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
-	XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
-#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
-	XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
-#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
-
-#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
-#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
-	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
-				f | XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
-	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
+extern void xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
+		uint, long);
+extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+		struct xfs_inode *, long, long, uint);
+extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+		struct xfs_mount *, struct xfs_dquot *,
+		struct xfs_dquot *, long, long, uint);
+
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
+		struct xfs_dquot **, struct xfs_dquot **);
+extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
+		struct xfs_dquot *, struct xfs_dquot *);
+extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
+		struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
+extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
+		struct xfs_dquot *, struct xfs_dquot *, uint);
+extern int xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
+extern void xfs_qm_dqdetach(struct xfs_inode *);
+extern void xfs_qm_dqrele(struct xfs_dquot *);
+extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
+extern int xfs_qm_sync(struct xfs_mount *, int);
+extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+extern void xfs_qm_mount_quotas(struct xfs_mount *);
+extern void xfs_qm_unmount(struct xfs_mount *);
+extern void xfs_qm_unmount_quotas(struct xfs_mount *);
+
+#else
+#define xfs_trans_dup_dqinfo(tp, tp2)
+#define xfs_trans_free_dqinfo(tp)
+#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
+#define xfs_trans_apply_dquot_deltas(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)	(0)
+#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)	(0)
+#define xfs_qm_vop_dqalloc(ip, uid, gid, prid, fl, ou, og)		(0)
+#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
+#define xfs_qm_vop_rename_dqattach(it)					(0)
+#define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)
+#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl)			(0)
+#define xfs_qm_dqattach(ip, fl)						(0)
+#define xfs_qm_dqattach_locked(ip, fl)					(0)
+#define xfs_qm_dqdetach(ip)
+#define xfs_qm_dqrele(d)
+#define xfs_qm_statvfs(ip, s)
+#define xfs_qm_sync(mp, fl)						(0)
+#define xfs_qm_newmount(mp, a, b)					(0)
+#define xfs_qm_mount_quotas(mp)
+#define xfs_qm_unmount(mp)
+#define xfs_qm_unmount_quotas(mp)					(0)
+#endif /* CONFIG_XFS_QUOTA */
+
+#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
+	xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
+#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
+	xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
 				f | XFS_QMOPT_RES_REGBLKS)
 
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
-extern struct xfs_qmops xfs_qmcore_xfs;
-
 #endif	/* __KERNEL__ */
-
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 58f85e9..b81deea 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -166,7 +166,8 @@ xfs_rename(
 	/*
 	 * Attach the dquots to the inodes
 	 */
-	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+	error = xfs_qm_vop_rename_dqattach(inodes);
+	if (error) {
 		xfs_trans_cancel(tp, cancel_flags);
 		goto std_return;
 	}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8570b82..fffabc0 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
 
-	XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
+	xfs_trans_dup_dqinfo(tp, ntp);
 
 	atomic_inc(&tp->t_mountp->m_active_trans);
 	return ntp;
@@ -831,7 +831,7 @@ shut_us_down:
 		 * means is that we have some (non-persistent) quota
 		 * reservations that need to be unreserved.
 		 */
-		XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+		xfs_trans_unreserve_and_mod_dquots(tp);
 		if (tp->t_ticket) {
 			commit_lsn = xfs_log_done(mp, tp->t_ticket,
 							NULL, log_flags);
@@ -850,10 +850,9 @@ shut_us_down:
 	/*
 	 * If we need to update the superblock, then do it now.
 	 */
-	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+	if (tp->t_flags & XFS_TRANS_SB_DIRTY)
 		xfs_trans_apply_sb_deltas(tp);
-	}
-	XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
+	xfs_trans_apply_dquot_deltas(tp);
 
 	/*
 	 * Ask each log item how many log_vector entries it will
@@ -1058,7 +1057,7 @@ xfs_trans_uncommit(
 	}
 
 	xfs_trans_unreserve_and_mod_sb(tp);
-	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+	xfs_trans_unreserve_and_mod_dquots(tp);
 
 	xfs_trans_free_items(tp, flags);
 	xfs_trans_free_busy(tp);
@@ -1183,7 +1182,7 @@ xfs_trans_cancel(
 	}
 #endif
 	xfs_trans_unreserve_and_mod_sb(tp);
-	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+	xfs_trans_unreserve_and_mod_dquots(tp);
 
 	if (tp->t_ticket) {
 		if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1213,7 +1212,7 @@ xfs_trans_free(
 	xfs_trans_t	*tp)
 {
 	atomic_dec(&tp->t_mountp->m_active_trans);
-	XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+	xfs_trans_free_dqinfo(tp);
 	kmem_zone_free(xfs_trans_zone, tp);
 }
 
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 79b9e5e..4d88616 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
 			xfs_buf_relse(ialloc_context);
 			if (dqinfo) {
 				tp->t_dqinfo = dqinfo;
-				XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+				xfs_trans_free_dqinfo(tp);
 			}
 			*tpp = ntp;
 			*ipp = NULL;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 19cf90a..b56321b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -118,7 +118,7 @@ xfs_setattr(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+		code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
 					 qflags, &udqp, &gdqp);
 		if (code)
 			return code;
@@ -180,10 +180,11 @@ xfs_setattr(
 		 * Do a quota reservation only if uid/gid is actually
 		 * going to change.
 		 */
-		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
 			ASSERT(tp);
-			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 						capable(CAP_FOWNER) ?
 						XFS_QMOPT_FORCE_RES : 0);
 			if (code)	/* out of quota */
@@ -217,7 +218,7 @@ xfs_setattr(
 		/*
 		 * Make sure that the dquots are attached to the inode.
 		 */
-		code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+		code = xfs_qm_dqattach_locked(ip, 0);
 		if (code)
 			goto error_return;
 
@@ -351,21 +352,21 @@ xfs_setattr(
 		 * in the transaction.
 		 */
 		if (iuid != uid) {
-			if (XFS_IS_UQUOTA_ON(mp)) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
 				ASSERT(mask & ATTR_UID);
 				ASSERT(udqp);
-				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+				olddquot1 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_udquot, udqp);
 			}
 			ip->i_d.di_uid = uid;
 			inode->i_uid = uid;
 		}
 		if (igid != gid) {
-			if (XFS_IS_GQUOTA_ON(mp)) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & ATTR_GID);
 				ASSERT(gdqp);
-				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+				olddquot2 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_gid = gid;
@@ -461,10 +462,10 @@ xfs_setattr(
 	/*
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	XFS_QM_DQRELE(mp, olddquot1);
-	XFS_QM_DQRELE(mp, olddquot2);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(olddquot1);
+	xfs_qm_dqrele(olddquot2);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (code) {
 		return code;
@@ -482,8 +483,8 @@ xfs_setattr(
 	commit_flags |= XFS_TRANS_ABORT;
 	/* FALLTHROUGH */
  error_return:
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 	if (tp) {
 		xfs_trans_cancel(tp, commit_flags);
 	}
@@ -739,7 +740,8 @@ xfs_free_eofblocks(
 		/*
 		 * Attach the dquots to the inode up front.
 		 */
-		if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		error = xfs_qm_dqattach(ip, 0);
+		if (error)
 			return error;
 
 		/*
@@ -1181,7 +1183,8 @@ xfs_inactive(
 
 	ASSERT(ip->i_d.di_nlink == 0);
 
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return VN_INACTIVE_CACHE;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1307,7 +1310,7 @@ xfs_inactive(
 		/*
 		 * Credit the quota account(s). The inode is gone.
 		 */
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
 		/*
 		 * Just ignore errors at this point.  There is nothing we can
@@ -1323,11 +1326,11 @@ xfs_inactive(
 			xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
 				"xfs_trans_commit() returned error %d", error);
 	}
+
 	/*
 	 * Release the dquots held by inode, if any.
 	 */
-	XFS_QM_DQDETACH(mp, ip);
-
+	xfs_qm_dqdetach(ip);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
  out:
@@ -1427,8 +1430,7 @@ xfs_create(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(), current_fsgid(), prid,
+	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -1489,7 +1491,7 @@ xfs_create(
 	/*
 	 * Reserve disk quota and the inode.
 	 */
-	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1561,7 +1563,7 @@ xfs_create(
 	 * These ids of the inode couldn't have changed since the new
 	 * inode has been locked ever since it was created.
 	 */
-	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
 
 	/*
 	 * xfs_trans_commit normally decrements the vnode ref count
@@ -1580,8 +1582,8 @@ xfs_create(
 		goto out_dqrele;
 	}
 
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	*ipp = ip;
 
@@ -1602,8 +1604,8 @@ xfs_create(
  out_trans_cancel:
 	xfs_trans_cancel(tp, cancel_flags);
  out_dqrele:
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1837,11 +1839,11 @@ xfs_remove(
 			return error;
 	}
 
-	error = XFS_QM_DQATTACH(mp, dp, 0);
+	error = xfs_qm_dqattach(dp, 0);
 	if (error)
 		goto std_return;
 
-	error = XFS_QM_DQATTACH(mp, ip, 0);
+	error = xfs_qm_dqattach(ip, 0);
 	if (error)
 		goto std_return;
 
@@ -2028,11 +2030,11 @@ xfs_link(
 
 	/* Return through std_return after this point. */
 
-	error = XFS_QM_DQATTACH(mp, sip, 0);
+	error = xfs_qm_dqattach(sip, 0);
 	if (error)
 		goto std_return;
 
-	error = XFS_QM_DQATTACH(mp, tdp, 0);
+	error = xfs_qm_dqattach(tdp, 0);
 	if (error)
 		goto std_return;
 
@@ -2205,8 +2207,7 @@ xfs_symlink(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(), current_fsgid(), prid,
+	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -2248,7 +2249,7 @@ xfs_symlink(
 	/*
 	 * Reserve disk quota : blocks and inode.
 	 */
-	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
 	if (error)
 		goto error_return;
 
@@ -2288,7 +2289,7 @@ xfs_symlink(
 	/*
 	 * Also attach the dquot(s) to it, if applicable.
 	 */
-	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
 
 	if (resblks)
 		resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2376,8 +2377,8 @@ xfs_symlink(
 		goto error2;
 	}
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	/* Fall through to std_return with error = 0 or errno from
 	 * xfs_trans_commit	*/
@@ -2401,8 +2402,8 @@ std_return:
 	cancel_flags |= XFS_TRANS_ABORT;
  error_return:
 	xfs_trans_cancel(tp, cancel_flags);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2541,7 +2542,8 @@ xfs_alloc_file_space(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return error;
 
 	if (len <= 0)
@@ -2628,8 +2630,8 @@ retry:
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-						      qblocks, 0, quota_flag);
+		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
+						      0, quota_flag);
 		if (error)
 			goto error1;
 
@@ -2688,7 +2690,7 @@ dmapi_enospc_check:
 
 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
-	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2827,7 +2829,8 @@ xfs_free_file_space(
 
 	xfs_itrace_entry(ip);
 
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return error;
 
 	error = 0;
@@ -2953,9 +2956,9 @@ xfs_free_file_space(
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-				ip->i_udquot, ip->i_gdquot, resblks, 0,
-				XFS_QMOPT_RES_REGBLKS);
+		error = xfs_trans_reserve_quota(tp, mp,
+				ip->i_udquot, ip->i_gdquot,
+				resblks, 0, XFS_QMOPT_RES_REGBLKS);
 		if (error)
 			goto error1;
 
-- 
cgit v0.10.2


From 5a34d5cd096310133f9208db294021208a96660d Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:03 +0200
Subject: xfs: split inode data writeback from xfs_sync_inodes_ag

In many cases we only want to sync inode data. Start spliting the inode sync
into data sync and inode sync by factoring out the inode data flush.

[hch: minor cleanups]

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b06b95c..7adc62d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -48,6 +48,35 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 
+
+STATIC int
+xfs_sync_inode_data(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+	struct inode		*inode = VFS_I(ip);
+	struct address_space *mapping = inode->i_mapping;
+	int			error = 0;
+
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+		goto out_wait;
+
+	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+		if (flags & SYNC_TRYLOCK)
+			goto out_wait;
+		xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	}
+
+	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+				0 : XFS_B_ASYNC, FI_NONE);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ out_wait:
+	if (flags & SYNC_IOWAIT)
+		xfs_ioend_wait(ip);
+	return error;
+}
+
 /*
  * Sync all the inodes in the given AG according to the
  * direction given by the flags.
@@ -123,27 +152,10 @@ xfs_sync_inodes_ag(
 		 * If we have to flush data or wait for I/O completion
 		 * we need to hold the iolock.
 		 */
-		if (flags & SYNC_DELWRI) {
-			if (VN_DIRTY(inode)) {
-				if (flags & SYNC_TRYLOCK) {
-					if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
-						lock_flags |= XFS_IOLOCK_SHARED;
-				} else {
-					xfs_ilock(ip, XFS_IOLOCK_SHARED);
-					lock_flags |= XFS_IOLOCK_SHARED;
-				}
-				if (lock_flags & XFS_IOLOCK_SHARED) {
-					error = xfs_flush_pages(ip, 0, -1,
-							(flags & SYNC_WAIT) ? 0
-								: XFS_B_ASYNC,
-							FI_NONE);
-				}
-			}
-			if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
-				xfs_ioend_wait(ip);
-		}
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		if (flags & SYNC_DELWRI)
+			error = xfs_sync_inode_data(ip, flags);
 
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
 		if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
 			if (flags & SYNC_WAIT) {
 				xfs_iflock(ip);
-- 
cgit v0.10.2


From 845b6d0cbbc2304e8a54ed4038272c55f85b2269 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:35:05 +0200
Subject: xfs: split inode flushing from xfs_sync_inodes_ag

In many cases we only want to sync inode metadata. Split out the inode
flushing into a separate helper to prepare factoring the inode sync code.

Based on a patch from Dave Chinner, but redone to keep the current behaviour
exactly and leave changes to the flushing logic to another patch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 7adc62d..1712caa 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -77,6 +77,35 @@ xfs_sync_inode_data(
 	return error;
 }
 
+STATIC int
+xfs_sync_inode_attr(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+	int			error = 0;
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	if (xfs_inode_clean(ip))
+		goto out_unlock;
+	if (!xfs_iflock_nowait(ip)) {
+		if (!(flags & SYNC_WAIT))
+			goto out_unlock;
+		xfs_iflock(ip);
+	}
+
+	if (xfs_inode_clean(ip)) {
+		xfs_ifunlock(ip);
+		goto out_unlock;
+	}
+
+	error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+			   XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
+
+ out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return error;
+}
+
 /*
  * Sync all the inodes in the given AG according to the
  * direction given by the flags.
@@ -96,7 +125,6 @@ xfs_sync_inodes_ag(
 	do {
 		struct inode	*inode;
 		xfs_inode_t	*ip = NULL;
-		int		lock_flags = XFS_ILOCK_SHARED;
 
 		/*
 		 * use a gang lookup to find the next inode in the tree
@@ -155,22 +183,10 @@ xfs_sync_inodes_ag(
 		if (flags & SYNC_DELWRI)
 			error = xfs_sync_inode_data(ip, flags);
 
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-		if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
-			if (flags & SYNC_WAIT) {
-				xfs_iflock(ip);
-				if (!xfs_inode_clean(ip))
-					error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
-				else
-					xfs_ifunlock(ip);
-			} else if (xfs_iflock_nowait(ip)) {
-				if (!xfs_inode_clean(ip))
-					error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
-				else
-					xfs_ifunlock(ip);
-			}
-		}
-		xfs_iput(ip, lock_flags);
+		if (flags & SYNC_ATTR)
+			error = xfs_sync_inode_attr(ip, flags);
+
+		IRELE(ip);
 
 		if (error)
 			last_error = error;
-- 
cgit v0.10.2


From 1da8eecab5f866b4f5be43adbaadf18e259a8cc5 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:07 +0200
Subject: xfs: factor out inode validation for sync

Separate the validation of inodes found by the radix
tree walk from the radix tree lookup.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 1712caa..c91d5b2 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -49,6 +49,39 @@
 #include <linux/freezer.h>
 
 
+/* must be called with pag_ici_lock held and releases it */
+STATIC int
+xfs_sync_inode_valid(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag)
+{
+	struct inode		*inode = VFS_I(ip);
+
+	/* nothing to sync during shutdown */
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		read_unlock(&pag->pag_ici_lock);
+		return EFSCORRUPTED;
+	}
+
+	/*
+	 * If we can't get a reference on the inode, it must be in reclaim.
+	 * Leave it for the reclaim code to flush. Also avoid inodes that
+	 * haven't been fully initialised.
+	 */
+	if (!igrab(inode)) {
+		read_unlock(&pag->pag_ici_lock);
+		return ENOENT;
+	}
+	read_unlock(&pag->pag_ici_lock);
+
+	if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+		IRELE(ip);
+		return ENOENT;
+	}
+
+	return 0;
+}
+
 STATIC int
 xfs_sync_inode_data(
 	struct xfs_inode	*ip,
@@ -123,7 +156,6 @@ xfs_sync_inodes_ag(
 	int		last_error = 0;
 
 	do {
-		struct inode	*inode;
 		xfs_inode_t	*ip = NULL;
 
 		/*
@@ -152,27 +184,10 @@ xfs_sync_inodes_ag(
 			break;
 		}
 
-		/* nothing to sync during shutdown */
-		if (XFS_FORCED_SHUTDOWN(mp)) {
-			read_unlock(&pag->pag_ici_lock);
-			return 0;
-		}
-
-		/*
-		 * If we can't get a reference on the inode, it must be
-		 * in reclaim. Leave it for the reclaim code to flush.
-		 */
-		inode = VFS_I(ip);
-		if (!igrab(inode)) {
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
-		read_unlock(&pag->pag_ici_lock);
-
-		/* avoid new or bad inodes */
-		if (is_bad_inode(inode) ||
-		    xfs_iflags_test(ip, XFS_INEW)) {
-			IRELE(ip);
+		error = xfs_sync_inode_valid(ip, pag);
+		if (error) {
+			if (error == EFSCORRUPTED)
+				return 0;
 			continue;
 		}
 
-- 
cgit v0.10.2


From abc1064742604e60a47a65fa3214dc1a84db093d Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:12 +0200
Subject: xfs: remove unused parameter from xfs_reclaim_inodes

The noblock parameter of xfs_reclaim_inodes is only ever set to zero. Remove
it and all the conditional code that is never executed.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index c91d5b2..7eb9d9c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -385,7 +385,7 @@ xfs_quiesce_fs(
 	int	count = 0, pincount;
 
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
-	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+	xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 
 	/*
 	 * This loop must run at least twice.  The first instance of the loop
@@ -509,7 +509,7 @@ xfs_sync_worker(
 
 	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-		xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+		xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
 		error = xfs_qm_sync(mp, SYNC_BDFLUSH);
 		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
@@ -703,7 +703,6 @@ STATIC void
 xfs_reclaim_inodes_ag(
 	xfs_mount_t	*mp,
 	int		ag,
-	int		noblock,
 	int		mode)
 {
 	xfs_inode_t	*ip = NULL;
@@ -749,25 +748,13 @@ restart:
 			continue;
 		}
 
-		if (noblock) {
-			if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
-				read_unlock(&pag->pag_ici_lock);
-				continue;
-			}
-			if (xfs_ipincount(ip) ||
-			    !xfs_iflock_nowait(ip)) {
-				xfs_iunlock(ip, XFS_ILOCK_EXCL);
-				read_unlock(&pag->pag_ici_lock);
-				continue;
-			}
-		}
 		read_unlock(&pag->pag_ici_lock);
 
 		/*
 		 * hmmm - this is an inode already in reclaim. Do
 		 * we even bother catching it here?
 		 */
-		if (xfs_reclaim_inode(ip, noblock, mode))
+		if (xfs_reclaim_inode(ip, 0, mode))
 			skipped++;
 	} while (nr_found);
 
@@ -782,7 +769,6 @@ restart:
 int
 xfs_reclaim_inodes(
 	xfs_mount_t	*mp,
-	int		 noblock,
 	int		mode)
 {
 	int		i;
@@ -790,7 +776,7 @@ xfs_reclaim_inodes(
 	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
 		if (!mp->m_perag[i].pag_ici_init)
 			continue;
-		xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+		xfs_reclaim_inodes_ag(mp, i, mode);
 	}
 	return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf..d7b2b5f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,7 +48,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 void xfs_flush_inodes(struct xfs_inode *ip);
 
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b659db5..5c6f092 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1371,7 +1371,7 @@ xfs_unmountfs(
 	 * need to force the log first.
 	 */
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
+	xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
 
 	xfs_qm_unmount(mp);
 
-- 
cgit v0.10.2


From 75f3cb1393133682958db6f157e1b6473e5a366b Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:14 +0200
Subject: xfs: introduce a per-ag inode iterator

Given that we walk across the per-ag inode lists so often, it makes sense to
introduce an iterator for this.

Convert the sync and reclaim code to use this new iterator, quota code will
follow in the next patch.

Also change xfs_reclaim_inode to return -EGAIN instead of 1 for an inode
already under reclaim.  This simplifies the AG iterator and doesn't
matter for the only other caller.

[hch: merged the lookup and execute callbacks back into one to get the
 pag_ici_lock locking correct and simplify the code flow]

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 7eb9d9c..9798643 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -49,6 +49,123 @@
 #include <linux/freezer.h>
 
 
+STATIC xfs_inode_t *
+xfs_inode_ag_lookup(
+	struct xfs_mount	*mp,
+	struct xfs_perag	*pag,
+	uint32_t		*first_index,
+	int			tag)
+{
+	int			nr_found;
+	struct xfs_inode	*ip;
+
+	/*
+	 * use a gang lookup to find the next inode in the tree
+	 * as the tree is sparse and a gang lookup walks to find
+	 * the number of objects requested.
+	 */
+	read_lock(&pag->pag_ici_lock);
+	if (tag == XFS_ICI_NO_TAG) {
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+				(void **)&ip, *first_index, 1);
+	} else {
+		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+				(void **)&ip, *first_index, 1, tag);
+	}
+	if (!nr_found)
+		goto unlock;
+
+	/*
+	 * Update the index for the next lookup. Catch overflows
+	 * into the next AG range which can occur if we have inodes
+	 * in the last block of the AG and we are currently
+	 * pointing to the last inode.
+	 */
+	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+		goto unlock;
+
+	return ip;
+
+unlock:
+	read_unlock(&pag->pag_ici_lock);
+	return NULL;
+}
+
+STATIC int
+xfs_inode_ag_walk(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		ag,
+	int			(*execute)(struct xfs_inode *ip,
+					   struct xfs_perag *pag, int flags),
+	int			flags,
+	int			tag)
+{
+	struct xfs_perag	*pag = &mp->m_perag[ag];
+	uint32_t		first_index;
+	int			last_error = 0;
+	int			skipped;
+
+restart:
+	skipped = 0;
+	first_index = 0;
+	do {
+		int		error = 0;
+		xfs_inode_t	*ip;
+
+		ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+		if (!ip)
+			break;
+
+		error = execute(ip, pag, flags);
+		if (error == EAGAIN) {
+			skipped++;
+			continue;
+		}
+		if (error)
+			last_error = error;
+		/*
+		 * bail out if the filesystem is corrupted.
+		 */
+		if (error == EFSCORRUPTED)
+			break;
+
+	} while (1);
+
+	if (skipped) {
+		delay(1);
+		goto restart;
+	}
+
+	xfs_put_perag(mp, pag);
+	return last_error;
+}
+
+STATIC int
+xfs_inode_ag_iterator(
+	struct xfs_mount	*mp,
+	int			(*execute)(struct xfs_inode *ip,
+					   struct xfs_perag *pag, int flags),
+	int			flags,
+	int			tag)
+{
+	int			error = 0;
+	int			last_error = 0;
+	xfs_agnumber_t		ag;
+
+	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+		if (!mp->m_perag[ag].pag_ici_init)
+			continue;
+		error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+		if (error) {
+			last_error = error;
+			if (error == EFSCORRUPTED)
+				break;
+		}
+	}
+	return XFS_ERROR(last_error);
+}
+
 /* must be called with pag_ici_lock held and releases it */
 STATIC int
 xfs_sync_inode_valid(
@@ -85,12 +202,17 @@ xfs_sync_inode_valid(
 STATIC int
 xfs_sync_inode_data(
 	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
 	int			flags)
 {
 	struct inode		*inode = VFS_I(ip);
 	struct address_space *mapping = inode->i_mapping;
 	int			error = 0;
 
+	error = xfs_sync_inode_valid(ip, pag);
+	if (error)
+		return error;
+
 	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		goto out_wait;
 
@@ -107,16 +229,22 @@ xfs_sync_inode_data(
  out_wait:
 	if (flags & SYNC_IOWAIT)
 		xfs_ioend_wait(ip);
+	IRELE(ip);
 	return error;
 }
 
 STATIC int
 xfs_sync_inode_attr(
 	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
 	int			flags)
 {
 	int			error = 0;
 
+	error = xfs_sync_inode_valid(ip, pag);
+	if (error)
+		return error;
+
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	if (xfs_inode_clean(ip))
 		goto out_unlock;
@@ -136,117 +264,33 @@ xfs_sync_inode_attr(
 
  out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	IRELE(ip);
 	return error;
 }
 
-/*
- * Sync all the inodes in the given AG according to the
- * direction given by the flags.
- */
-STATIC int
-xfs_sync_inodes_ag(
-	xfs_mount_t	*mp,
-	int		ag,
-	int		flags)
-{
-	xfs_perag_t	*pag = &mp->m_perag[ag];
-	int		nr_found;
-	uint32_t	first_index = 0;
-	int		error = 0;
-	int		last_error = 0;
-
-	do {
-		xfs_inode_t	*ip = NULL;
-
-		/*
-		 * use a gang lookup to find the next inode in the tree
-		 * as the tree is sparse and a gang lookup walks to find
-		 * the number of objects requested.
-		 */
-		read_lock(&pag->pag_ici_lock);
-		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-				(void**)&ip, first_index, 1);
-
-		if (!nr_found) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/*
-		 * Update the index for the next lookup. Catch overflows
-		 * into the next AG range which can occur if we have inodes
-		 * in the last block of the AG and we are currently
-		 * pointing to the last inode.
-		 */
-		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		error = xfs_sync_inode_valid(ip, pag);
-		if (error) {
-			if (error == EFSCORRUPTED)
-				return 0;
-			continue;
-		}
-
-		/*
-		 * If we have to flush data or wait for I/O completion
-		 * we need to hold the iolock.
-		 */
-		if (flags & SYNC_DELWRI)
-			error = xfs_sync_inode_data(ip, flags);
-
-		if (flags & SYNC_ATTR)
-			error = xfs_sync_inode_attr(ip, flags);
-
-		IRELE(ip);
-
-		if (error)
-			last_error = error;
-		/*
-		 * bail out if the filesystem is corrupted.
-		 */
-		if (error == EFSCORRUPTED)
-			return XFS_ERROR(error);
-
-	} while (nr_found);
-
-	return last_error;
-}
-
 int
 xfs_sync_inodes(
 	xfs_mount_t	*mp,
 	int		flags)
 {
-	int		error;
-	int		last_error;
-	int		i;
+	int		error = 0;
 	int		lflags = XFS_LOG_FORCE;
 
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
 		return 0;
-	error = 0;
-	last_error = 0;
 
 	if (flags & SYNC_WAIT)
 		lflags |= XFS_LOG_SYNC;
 
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		if (!mp->m_perag[i].pag_ici_init)
-			continue;
-		error = xfs_sync_inodes_ag(mp, i, flags);
-		if (error)
-			last_error = error;
-		if (error == EFSCORRUPTED)
-			break;
-	}
 	if (flags & SYNC_DELWRI)
-		xfs_log_force(mp, 0, lflags);
+		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, XFS_ICI_NO_TAG);
 
-	return XFS_ERROR(last_error);
+	if (flags & SYNC_ATTR)
+		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, XFS_ICI_NO_TAG);
+
+	if (!error && (flags & SYNC_DELWRI))
+		xfs_log_force(mp, 0, lflags);
+	return XFS_ERROR(error);
 }
 
 STATIC int
@@ -613,7 +657,7 @@ xfs_reclaim_inode(
 			xfs_ifunlock(ip);
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
-		return 1;
+		return -EAGAIN;
 	}
 	__xfs_iflags_set(ip, XFS_IRECLAIM);
 	spin_unlock(&ip->i_flags_lock);
@@ -698,72 +742,20 @@ xfs_inode_clear_reclaim_tag(
 	xfs_put_perag(mp, pag);
 }
 
-
-STATIC void
-xfs_reclaim_inodes_ag(
-	xfs_mount_t	*mp,
-	int		ag,
-	int		mode)
+STATIC int
+xfs_reclaim_inode_now(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
 {
-	xfs_inode_t	*ip = NULL;
-	xfs_perag_t	*pag = &mp->m_perag[ag];
-	int		nr_found;
-	uint32_t	first_index;
-	int		skipped;
-
-restart:
-	first_index = 0;
-	skipped = 0;
-	do {
-		/*
-		 * use a gang lookup to find the next inode in the tree
-		 * as the tree is sparse and a gang lookup walks to find
-		 * the number of objects requested.
-		 */
-		read_lock(&pag->pag_ici_lock);
-		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-					(void**)&ip, first_index, 1,
-					XFS_ICI_RECLAIM_TAG);
-
-		if (!nr_found) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/*
-		 * Update the index for the next lookup. Catch overflows
-		 * into the next AG range which can occur if we have inodes
-		 * in the last block of the AG and we are currently
-		 * pointing to the last inode.
-		 */
-		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/* ignore if already under reclaim */
-		if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
-
+	/* ignore if already under reclaim */
+	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
 		read_unlock(&pag->pag_ici_lock);
-
-		/*
-		 * hmmm - this is an inode already in reclaim. Do
-		 * we even bother catching it here?
-		 */
-		if (xfs_reclaim_inode(ip, 0, mode))
-			skipped++;
-	} while (nr_found);
-
-	if (skipped) {
-		delay(1);
-		goto restart;
+		return 0;
 	}
-	return;
+	read_unlock(&pag->pag_ici_lock);
 
+	return xfs_reclaim_inode(ip, 0, flags);
 }
 
 int
@@ -771,14 +763,6 @@ xfs_reclaim_inodes(
 	xfs_mount_t	*mp,
 	int		mode)
 {
-	int		i;
-
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		if (!mp->m_perag[i].pag_ici_init)
-			continue;
-		xfs_reclaim_inodes_ag(mp, i, mode);
-	}
-	return 0;
+	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+					XFS_ICI_RECLAIM_TAG);
 }
-
-
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index c8641f7..f24b50b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -212,6 +212,8 @@ typedef struct xfs_perag
 /*
  * tags for inode radix tree
  */
+#define XFS_ICI_NO_TAG		(-1)	/* special flag for an untagged lookup
+					   in xfs_inode_ag_iterator */
 #define XFS_ICI_RECLAIM_TAG	0	/* inode is to be reclaimed */
 
 #define	XFS_AG_MAXLEVELS(mp)		((mp)->m_ag_maxlevels)
-- 
cgit v0.10.2


From fe588ed32867b42e0d906db558ca92fd9f8b128e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:35:27 +0200
Subject: xfs: use generic inode iterator in xfs_qm_dqrele_all_inodes

Use xfs_inode_ag_iterator instead of opencoding the inode walk in the
quota code.  Mark xfs_inode_ag_iterator and xfs_sync_inode_valid non-static
to allow using them from the quota code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 9798643..a3d2e77 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -141,7 +141,7 @@ restart:
 	return last_error;
 }
 
-STATIC int
+int
 xfs_inode_ag_iterator(
 	struct xfs_mount	*mp,
 	int			(*execute)(struct xfs_inode *ip,
@@ -167,7 +167,7 @@ xfs_inode_ag_iterator(
 }
 
 /* must be called with pag_ici_lock held and releases it */
-STATIC int
+int
 xfs_sync_inode_valid(
 	struct xfs_inode	*ip,
 	struct xfs_perag	*pag)
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d7b2b5f..9bb1253 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,4 +54,10 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 				struct xfs_inode *ip);
+
+int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+	int flags, int tag);
+
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c7b66f6..7126f85 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -847,105 +847,55 @@ xfs_qm_export_flags(
 }
 
 
-/*
- * Release all the dquots on the inodes in an AG.
- */
-STATIC void
-xfs_qm_dqrele_inodes_ag(
-	xfs_mount_t	*mp,
-	int		ag,
-	uint		flags)
+STATIC int
+xfs_dqrele_inode(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
 {
-	xfs_inode_t	*ip = NULL;
-	xfs_perag_t	*pag = &mp->m_perag[ag];
-	int		first_index = 0;
-	int		nr_found;
-
-	do {
-		/*
-		 * use a gang lookup to find the next inode in the tree
-		 * as the tree is sparse and a gang lookup walks to find
-		 * the number of objects requested.
-		 */
-		read_lock(&pag->pag_ici_lock);
-		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-				(void**)&ip, first_index, 1);
-
-		if (!nr_found) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/*
-		 * Update the index for the next lookup. Catch overflows
-		 * into the next AG range which can occur if we have inodes
-		 * in the last block of the AG and we are currently
-		 * pointing to the last inode.
-		 */
-		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/* skip quota inodes */
-		if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
-			ASSERT(ip->i_udquot == NULL);
-			ASSERT(ip->i_gdquot == NULL);
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
+	int			error;
 
-		/*
-		 * If we can't get a reference on the inode, it must be
-		 * in reclaim. Leave it for the reclaim code to flush.
-		 */
-		if (!igrab(VFS_I(ip))) {
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
+	/* skip quota inodes */
+	if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
+		ASSERT(ip->i_udquot == NULL);
+		ASSERT(ip->i_gdquot == NULL);
 		read_unlock(&pag->pag_ici_lock);
+		return 0;
+	}
 
-		/* avoid new inodes though we shouldn't find any here */
-		if (xfs_iflags_test(ip, XFS_INEW)) {
-			IRELE(ip);
-			continue;
-		}
+	error = xfs_sync_inode_valid(ip, pag);
+	if (error)
+		return error;
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-			xfs_qm_dqrele(ip->i_udquot);
-			ip->i_udquot = NULL;
-		}
-		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
-		    ip->i_gdquot) {
-			xfs_qm_dqrele(ip->i_gdquot);
-			ip->i_gdquot = NULL;
-		}
-		xfs_iput(ip, XFS_ILOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+		xfs_qm_dqrele(ip->i_udquot);
+		ip->i_udquot = NULL;
+	}
+	if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+		xfs_qm_dqrele(ip->i_gdquot);
+		ip->i_gdquot = NULL;
+	}
+	xfs_iput(ip, XFS_ILOCK_EXCL);
+	IRELE(ip);
 
-	} while (nr_found);
+	return 0;
 }
 
+
 /*
  * Go thru all the inodes in the file system, releasing their dquots.
+ *
  * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * AFTER this, in the case of quotaoff.
  */
 void
 xfs_qm_dqrele_all_inodes(
 	struct xfs_mount *mp,
 	uint		 flags)
 {
-	int		i;
-
 	ASSERT(mp->m_quotainfo);
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		if (!mp->m_perag[i].pag_ici_init)
-			continue;
-		xfs_qm_dqrele_inodes_ag(mp, i, flags);
-	}
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
 }
 
 /*------------------------------------------------------------------------*/
-- 
cgit v0.10.2


From 075fe1028699f6a280545dfc2cfc5ac82d555c8c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:35:48 +0200
Subject: xfs: split xfs_sync_inodes

xfs_sync_inodes is used to write back either file data or inode metadata.
In general we always do these separately, except for one fishy case in
xfs_fs_put_super that does both.  So separate xfs_sync_inodes into
separate xfs_sync_data and xfs_sync_attr functions.  In xfs_fs_put_super
we first call the data sync and then the attr sync as that was the previous
order.  The moved log force in that path doesn't make a difference because
we will force the log again as part of the real unmount process.

The filesystem readonly checks are not performed by the new function but
instead moved into the callers, given that most callers alredy have it
further up in the stack.  Also add debug checks that we do not pass in
incorrect flags in the new xfs_sync_data and xfs_sync_attr function and
fix the one place that did pass in a wrong flag.

Also remove a comment mentioning xfs_sync_inodes that has been incorrect
for a while because we always take either the iolock or ilock in the
sync path these days.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 94d9a63..cb6e2cc 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,9 +50,11 @@ xfs_fs_quota_sync(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
 	if (!XFS_IS_QUOTA_RUNNING(mp))
 		return -ENOSYS;
-	return -xfs_sync_inodes(mp, SYNC_DELWRI);
+	return -xfs_sync_data(mp, 0);
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0d9b64b..d4e7ef8 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1071,7 +1071,18 @@ xfs_fs_put_super(
 	int			unmount_event_flags = 0;
 
 	xfs_syncd_stop(mp);
-	xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		/*
+		 * XXX(hch): this should be SYNC_WAIT.
+		 *
+		 * Or more likely not needed at all because the VFS is already
+		 * calling ->sync_fs after shutting down all filestem
+		 * operations and just before calling ->put_super.
+		 */
+		xfs_sync_data(mp, 0);
+		xfs_sync_attr(mp, 0);
+	}
 
 #ifdef HAVE_DMAPI
 	if (mp->m_flags & XFS_MOUNT_DMAPI) {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a3d2e77..c1a9a11 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -268,29 +268,42 @@ xfs_sync_inode_attr(
 	return error;
 }
 
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
 int
-xfs_sync_inodes(
-	xfs_mount_t	*mp,
-	int		flags)
+xfs_sync_data(
+	struct xfs_mount	*mp,
+	int			flags)
 {
-	int		error = 0;
-	int		lflags = XFS_LOG_FORCE;
+	int			error;
 
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
+	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT|SYNC_IOWAIT)) == 0);
 
-	if (flags & SYNC_WAIT)
-		lflags |= XFS_LOG_SYNC;
+	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+				      XFS_ICI_NO_TAG);
+	if (error)
+		return XFS_ERROR(error);
 
-	if (flags & SYNC_DELWRI)
-		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, XFS_ICI_NO_TAG);
+	xfs_log_force(mp, 0,
+		      (flags & SYNC_WAIT) ?
+		       XFS_LOG_FORCE | XFS_LOG_SYNC :
+		       XFS_LOG_FORCE);
+	return 0;
+}
 
-	if (flags & SYNC_ATTR)
-		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, XFS_ICI_NO_TAG);
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+int
+xfs_sync_attr(
+	struct xfs_mount	*mp,
+	int			flags)
+{
+	ASSERT((flags & ~SYNC_WAIT) == 0);
 
-	if (!error && (flags & SYNC_DELWRI))
-		xfs_log_force(mp, 0, lflags);
-	return XFS_ERROR(error);
+	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+				     XFS_ICI_NO_TAG);
 }
 
 STATIC int
@@ -404,12 +417,12 @@ xfs_quiesce_data(
 	int error;
 
 	/* push non-blocking */
-	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+	xfs_sync_data(mp, 0);
 	xfs_qm_sync(mp, SYNC_BDFLUSH);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
-	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_WAIT|SYNC_IOWAIT);
 	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
@@ -438,7 +451,7 @@ xfs_quiesce_fs(
 	 * logged before we can write the unmount record.
 	 */
 	do {
-		xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+		xfs_sync_attr(mp, SYNC_WAIT);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
 		if (!pincount) {
 			delay(50);
@@ -521,8 +534,8 @@ xfs_flush_inodes_work(
 	void		*arg)
 {
 	struct inode	*inode = arg;
-	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
-	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_TRYLOCK);
+	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_IOWAIT);
 	iput(inode);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 9bb1253..26bfb5c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,8 +29,6 @@ typedef struct xfs_sync_work {
 	struct completion	*w_completion;
 } xfs_sync_work_t;
 
-#define SYNC_ATTR		0x0001	/* sync attributes */
-#define SYNC_DELWRI		0x0002	/* look at delayed writes */
 #define SYNC_WAIT		0x0004	/* wait for i/o to complete */
 #define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
 #define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
@@ -39,7 +37,8 @@ typedef struct xfs_sync_work {
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
 
-int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_attr(struct xfs_mount *mp, int flags);
+int xfs_sync_data(struct xfs_mount *mp, int flags);
 int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 
 int xfs_quiesce_data(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 6c87c8f..edf8bdf 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -542,10 +542,8 @@ xfs_filestream_associate(
 	 * waiting for the lock because someone else is waiting on the lock we
 	 * hold and we cannot drop that as we are in a transaction here.
 	 *
-	 * Lucky for us, this inversion is rarely a problem because it's a
-	 * directory inode that we are trying to lock here and that means the
-	 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
-	 * used. i.e. freeze, remount-ro, quotasync or unmount.
+	 * Lucky for us, this inversion is not a problem because it's a
+	 * directory inode that we are trying to lock here.
 	 *
 	 * So, if we can't get the iolock without sleeping then just give up
 	 */
-- 
cgit v0.10.2


From b0710ccc6d9fa8fb908b5f6d1b0782a09d80e24f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:37:11 +0200
Subject: xfs: remove SYNC_IOWAIT

We want to wait for all I/O to finish when we do data integrity syncs.  So
there is no reason to keep SYNC_WAIT separate from SYNC_IOWAIT.  This
causes a little change in behaviour for the ENOSPC flushing code which now
does a second submission and wait of buffered I/O, but that should finish
ASAP as we already did an asynchronous writeout earlier.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index c1a9a11..32abd96 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -227,7 +227,7 @@ xfs_sync_inode_data(
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
  out_wait:
-	if (flags & SYNC_IOWAIT)
+	if (flags & SYNC_WAIT)
 		xfs_ioend_wait(ip);
 	IRELE(ip);
 	return error;
@@ -278,7 +278,7 @@ xfs_sync_data(
 {
 	int			error;
 
-	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT|SYNC_IOWAIT)) == 0);
+	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
 				      XFS_ICI_NO_TAG);
@@ -422,7 +422,7 @@ xfs_quiesce_data(
 	xfs_filestream_flush(mp);
 
 	/* push and block */
-	xfs_sync_data(mp, SYNC_WAIT|SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_WAIT);
 	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
@@ -535,7 +535,7 @@ xfs_flush_inodes_work(
 {
 	struct inode	*inode = arg;
 	xfs_sync_data(mp, SYNC_TRYLOCK);
-	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
 	iput(inode);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 26bfb5c..bda33a0 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -31,7 +31,6 @@ typedef struct xfs_sync_work {
 
 #define SYNC_WAIT		0x0004	/* wait for i/o to complete */
 #define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
-#define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
 #define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);
-- 
cgit v0.10.2


From 8b5403a6d772d340541cfb30a668fde119c40ac1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:37:16 +0200
Subject: xfs: remove SYNC_BDFLUSH

SYNC_BDFLUSH is a leftover from IRIX and rather misnamed for todays
code.  Make xfs_sync_fsdata and xfs_dq_sync use the SYNC_TRYLOCK flag
for not blocking on logs just as the inode sync code already does.

For xfs_sync_fsdata it's a trivial 1:1 replacement, but for xfs_qm_sync
I use the opportunity to decouple the non-blocking lock case from the
different flushing modes, similar to the inode sync code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 32abd96..b619d6b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -353,7 +353,7 @@ xfs_sync_fsdata(
 	 * If this is xfssyncd() then only sync the superblock if we can
 	 * lock it without sleeping and it is not pinned.
 	 */
-	if (flags & SYNC_BDFLUSH) {
+	if (flags & SYNC_TRYLOCK) {
 		ASSERT(!(flags & SYNC_WAIT));
 
 		bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -418,7 +418,7 @@ xfs_quiesce_data(
 
 	/* push non-blocking */
 	xfs_sync_data(mp, 0);
-	xfs_qm_sync(mp, SYNC_BDFLUSH);
+	xfs_qm_sync(mp, SYNC_TRYLOCK);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
@@ -568,8 +568,8 @@ xfs_sync_worker(
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
 		xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
-		error = xfs_qm_sync(mp, SYNC_BDFLUSH);
-		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+		error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
 		if (xfs_log_need_covered(mp))
 			error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
 	}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index bda33a0..2a10301 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,9 +29,8 @@ typedef struct xfs_sync_work {
 	struct completion	*w_completion;
 } xfs_sync_work_t;
 
-#define SYNC_WAIT		0x0004	/* wait for i/o to complete */
-#define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
-#define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
+#define SYNC_WAIT		0x0001	/* wait for i/o to complete */
+#define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index aa5d821..22b7c8d 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -905,11 +905,6 @@ xfs_qm_dqdetach(
 	}
 }
 
-/*
- * This is called to sync quotas. We can be told to use non-blocking
- * semantics by either the SYNC_BDFLUSH flag or the absence of the
- * SYNC_WAIT flag.
- */
 int
 xfs_qm_sync(
 	xfs_mount_t	*mp,
@@ -918,17 +913,13 @@ xfs_qm_sync(
 	int		recl, restarts;
 	xfs_dquot_t	*dqp;
 	uint		flush_flags;
-	boolean_t	nowait;
 	int		error;
 
 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
+	flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
 	restarts = 0;
-	/*
-	 * We won't block unless we are asked to.
-	 */
-	nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
 
   again:
 	xfs_qm_mplist_lock(mp);
@@ -948,18 +939,10 @@ xfs_qm_sync(
 		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
 		 * This is very similar to what xfs_sync does with inodes.
 		 */
-		if (flags & SYNC_BDFLUSH) {
-			if (! XFS_DQ_IS_DIRTY(dqp))
+		if (flags & SYNC_TRYLOCK) {
+			if (!XFS_DQ_IS_DIRTY(dqp))
 				continue;
-		}
-
-		if (nowait) {
-			/*
-			 * Try to acquire the dquot lock. We are NOT out of
-			 * lock order, but we just don't want to wait for this
-			 * lock, unless somebody wanted us to.
-			 */
-			if (! xfs_qm_dqlock_nowait(dqp))
+			if (!xfs_qm_dqlock_nowait(dqp))
 				continue;
 		} else {
 			xfs_dqlock(dqp);
@@ -976,7 +959,7 @@ xfs_qm_sync(
 		/* XXX a sentinel would be better */
 		recl = XFS_QI_MPLRECLAIMS(mp);
 		if (!xfs_dqflock_nowait(dqp)) {
-			if (nowait) {
+			if (flags & SYNC_TRYLOCK) {
 				xfs_dqunlock(dqp);
 				continue;
 			}
@@ -994,7 +977,6 @@ xfs_qm_sync(
 		 * Let go of the mplist lock. We don't want to hold it
 		 * across a disk write
 		 */
-		flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
 		xfs_qm_mplist_unlock(mp);
 		xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
 		error = xfs_qm_dqflush(dqp, flush_flags);
-- 
cgit v0.10.2


From e7ed70eedccc78e79ce6da2155e9caf90aff4003 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Jun 2009 15:39:24 +0200
Subject: dma-debug: use pr_* instead of printk(KERN_* ...)

The pr_* macros are shorter than the old printk(KERN_ ...) variant.
Change the dma-debug code to use the new macros and save a few
unnecessary line breaks. If lines don't break the source code can also
be grepped more easily.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9561825..24c4a2c 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -139,7 +139,7 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
 {
 #ifdef CONFIG_STACKTRACE
 	if (entry) {
-		printk(KERN_WARNING "Mapped at:\n");
+		pr_warning("Mapped at:\n");
 		print_stack_trace(&entry->stacktrace, 0);
 	}
 #endif
@@ -377,8 +377,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 	spin_lock_irqsave(&free_entries_lock, flags);
 
 	if (list_empty(&free_entries)) {
-		printk(KERN_ERR "DMA-API: debugging out of memory "
-				"- disabling\n");
+		pr_err("DMA-API: debugging out of memory - disabling\n");
 		global_disable = true;
 		goto out;
 	}
@@ -483,8 +482,7 @@ static int prealloc_memory(u32 num_entries)
 	num_free_entries = num_entries;
 	min_free_entries = num_entries;
 
-	printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
-			num_entries);
+	pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
 
 	return 0;
 
@@ -534,7 +532,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 	 * disabled. Since copy_from_user can fault and may sleep we
 	 * need to copy to temporary buffer first
 	 */
-	len = min(count, NAME_MAX_LEN - 1);
+	len = min(count, (size_t)(NAME_MAX_LEN - 1));
 	if (copy_from_user(buf, userbuf, len))
 		return -EFAULT;
 
@@ -557,8 +555,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 		 * switched off.
 		 */
 		if (current_driver_name[0])
-			printk(KERN_INFO "DMA-API: switching off dma-debug "
-					 "driver filter\n");
+			pr_info("DMA-API: switching off dma-debug driver filter\n");
 		current_driver_name[0] = 0;
 		current_driver = NULL;
 		goto out_unlock;
@@ -576,8 +573,8 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 	current_driver_name[i] = 0;
 	current_driver = NULL;
 
-	printk(KERN_INFO "DMA-API: enable driver filter for driver [%s]\n",
-	       current_driver_name);
+	pr_info("DMA-API: enable driver filter for driver [%s]\n",
+		current_driver_name);
 
 out_unlock:
 	write_unlock_irqrestore(&driver_name_lock, flags);
@@ -594,7 +591,7 @@ static int dma_debug_fs_init(void)
 {
 	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
 	if (!dma_debug_dent) {
-		printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
+		pr_err("DMA-API: can not create debugfs directory\n");
 		return -ENOMEM;
 	}
 
@@ -693,7 +690,7 @@ void dma_debug_add_bus(struct bus_type *bus)
 
 	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
 	if (nb == NULL) {
-		printk(KERN_ERR "dma_debug_add_bus: out of memory\n");
+		pr_err("dma_debug_add_bus: out of memory\n");
 		return;
 	}
 
@@ -718,8 +715,7 @@ void dma_debug_init(u32 num_entries)
 	}
 
 	if (dma_debug_fs_init() != 0) {
-		printk(KERN_ERR "DMA-API: error creating debugfs entries "
-				"- disabling\n");
+		pr_err("DMA-API: error creating debugfs entries - disabling\n");
 		global_disable = true;
 
 		return;
@@ -729,8 +725,7 @@ void dma_debug_init(u32 num_entries)
 		num_entries = req_entries;
 
 	if (prealloc_memory(num_entries) != 0) {
-		printk(KERN_ERR "DMA-API: debugging out of memory error "
-				"- disabled\n");
+		pr_err("DMA-API: debugging out of memory error - disabled\n");
 		global_disable = true;
 
 		return;
@@ -738,7 +733,7 @@ void dma_debug_init(u32 num_entries)
 
 	nr_total_entries = num_free_entries;
 
-	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
+	pr_info("DMA-API: debugging enabled by kernel config\n");
 }
 
 static __init int dma_debug_cmdline(char *str)
@@ -747,8 +742,7 @@ static __init int dma_debug_cmdline(char *str)
 		return -EINVAL;
 
 	if (strncmp(str, "off", 3) == 0) {
-		printk(KERN_INFO "DMA-API: debugging disabled on kernel "
-				 "command line\n");
+		pr_info("DMA-API: debugging disabled on kernel command line\n");
 		global_disable = true;
 	}
 
@@ -1239,8 +1233,8 @@ static int __init dma_debug_driver_setup(char *str)
 	}
 
 	if (current_driver_name[0])
-		printk(KERN_INFO "DMA-API: enable driver filter for "
-				 "driver [%s]\n", current_driver_name);
+		pr_info("DMA-API: enable driver filter for driver [%s]\n",
+			current_driver_name);
 
 
 	return 1;
-- 
cgit v0.10.2


From be81c6ea23b8b471141734ef4bc005f5127aaf43 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Jun 2009 15:46:19 +0200
Subject: dma-debug: disable/enable irqs only once in device_dma_allocations

There is no need to disable/enable irqs on each loop iteration. Just
disable irqs for the whole time the loop runs.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 24c4a2c..27b369d 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,15 +649,19 @@ static int device_dma_allocations(struct device *dev)
 	unsigned long flags;
 	int count = 0, i;
 
+	local_irq_save(flags);
+
 	for (i = 0; i < HASH_SIZE; ++i) {
-		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
+		spin_lock(&dma_entry_hash[i].lock);
 		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
 			if (entry->dev == dev)
 				count += 1;
 		}
-		spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
+		spin_unlock(&dma_entry_hash[i].lock);
 	}
 
+	local_irq_restore(flags);
+
 	return count;
 }
 
-- 
cgit v0.10.2


From 806d31d73843a018cb239fc16ba53d82f4d8dc33 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 15:50:42 +0200
Subject: ALSA: emu10k1 - Fix minimum periods for efx playback

EFX playback stream should have periods_min = 2 to avoid the buffer
position overflow (due to restrictions of the pcm-indirect helper).

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 78f62fd..55b83ef 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -1736,7 +1736,7 @@ static struct snd_pcm_hardware snd_emu10k1_fx8010_playback =
 	.buffer_bytes_max =	(128*1024),
 	.period_bytes_min =	1024,
 	.period_bytes_max =	(128*1024),
-	.periods_min =		1,
+	.periods_min =		2,
 	.periods_max =		1024,
 	.fifo_size =		0,
 };
-- 
cgit v0.10.2


From 0bf841281e58d0b3cc9fe9dc4383df7694bde6bd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Jun 2009 15:53:46 +0200
Subject: dma-debug: simplify logic in driver_filter()

This patch makes the driver_filter function more readable by
reorganizing the code. The removal of a code code block to an upper
indentation level makes hard-to-read line-wraps unnecessary.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 27b369d..ad65fc0 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -147,6 +147,10 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
 
 static bool driver_filter(struct device *dev)
 {
+	struct device_driver *drv;
+	unsigned long flags;
+	bool ret;
+
 	/* driver filter off */
 	if (likely(!current_driver_name[0]))
 		return true;
@@ -155,32 +159,28 @@ static bool driver_filter(struct device *dev)
 	if (current_driver && dev->driver == current_driver)
 		return true;
 
-	/* driver filter on but not yet initialized */
-	if (!current_driver && current_driver_name[0]) {
-		struct device_driver *drv = get_driver(dev->driver);
-		unsigned long flags;
-		bool ret = false;
-
-		if (!drv)
-			return false;
-
-		/* lock to protect against change of current_driver_name */
-		read_lock_irqsave(&driver_name_lock, flags);
+	if (current_driver || !current_driver_name[0])
+		return false;
 
-		if (drv->name &&
-		    strncmp(current_driver_name, drv->name,
-			    NAME_MAX_LEN-1) == 0) {
-			current_driver = drv;
-			ret = true;
-		}
+	/* driver filter on but not yet initialized */
+	drv = get_driver(dev->driver);
+	if (!drv)
+		return false;
 
-		read_unlock_irqrestore(&driver_name_lock, flags);
-		put_driver(drv);
+	/* lock to protect against change of current_driver_name */
+	read_lock_irqsave(&driver_name_lock, flags);
 
-		return ret;
+	ret = false;
+	if (drv->name &&
+	    strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
+		current_driver = drv;
+		ret = true;
 	}
 
-	return false;
+	read_unlock_irqrestore(&driver_name_lock, flags);
+	put_driver(drv);
+
+	return ret;
 }
 
 #define err_printk(dev, entry, format, arg...) do {		\
-- 
cgit v0.10.2


From c00701101b82f2bc61dfc259748ec6e5288af6a9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 15:58:48 +0200
Subject: ALSA: pcm - A helper function to compose PCM stream name for debug
 prints

Use a common helper function for the PCM stream name displayed in
XRUN and buffer-pointer debug prints.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index adb306f..2288fa0 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -136,6 +136,16 @@ void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_ufram
 			dump_stack();			\
 	} while (0)
 
+static void pcm_debug_name(struct snd_pcm_substream *substream,
+			   char *name, size_t len)
+{
+	snprintf(name, len, "pcmC%dD%d%c:%d",
+		 substream->pcm->card->number,
+		 substream->pcm->device,
+		 substream->stream ? 'c' : 'p',
+		 substream->number);
+}
+
 static void xrun(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -144,10 +154,9 @@ static void xrun(struct snd_pcm_substream *substream)
 		snd_pcm_gettime(runtime, (struct timespec *)&runtime->status->tstamp);
 	snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
 	if (xrun_debug(substream, 1)) {
-		snd_printd(KERN_DEBUG "XRUN: pcmC%dD%d%c\n",
-			   substream->pcm->card->number,
-			   substream->pcm->device,
-			   substream->stream ? 'c' : 'p');
+		char name[16];
+		pcm_debug_name(substream, name, sizeof(name));
+		snd_printd(KERN_DEBUG "XRUN: %s\n", name);
 		dump_stack_on_xrun(substream);
 	}
 }
@@ -163,9 +172,11 @@ snd_pcm_update_hw_ptr_pos(struct snd_pcm_substream *substream,
 		return pos; /* XRUN */
 	if (pos >= runtime->buffer_size) {
 		if (printk_ratelimit()) {
-			snd_printd(KERN_ERR  "BUG: stream = %i, pos = 0x%lx, "
+			char name[16];
+			pcm_debug_name(substream, name, sizeof(name));
+			snd_printd(KERN_ERR  "BUG: %s, pos = 0x%lx, "
 				   "buffer size = 0x%lx, period size = 0x%lx\n",
-				   substream->stream, pos, runtime->buffer_size,
+				   name, pos, runtime->buffer_size,
 				   runtime->period_size);
 		}
 		pos = 0;
-- 
cgit v0.10.2


From 64a8be74357477558183b43156c5536b642de134 Mon Sep 17 00:00:00 2001
From: David Heidelberger <d.okias@gmail.com>
Date: Mon, 8 Jun 2009 16:15:18 +0200
Subject: ALSA: hda - Add 7.1 support for MSI GX620

Added 7.1 support for MSI GX620 and jack quirk.

Reference: kernel bug#13430
	http://bugzilla.kernel.org/show_bug.cgi?id=13430

Signed-off-by: David Heidelberger <d.okias@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 54b0805..de8e10a 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -143,7 +143,8 @@ ALC883/888
   medion	Medion Laptops
   medion-md2	Medion MD2
   targa-dig	Targa/MSI
-  targa-2ch-dig	Targs/MSI with 2-channel
+  targa-2ch-dig	Targa/MSI with 2-channel
+  targa-8ch-dig Targa/MSI with 8-channel (MSI GX620)
   laptop-eapd   3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE)
   lenovo-101e	Lenovo 101E
   lenovo-nb0763	Lenovo NB0763
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1ab1b92..cfe4808 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -220,6 +220,7 @@ enum {
 	ALC883_6ST_DIG,
 	ALC883_TARGA_DIG,
 	ALC883_TARGA_2ch_DIG,
+	ALC883_TARGA_8ch_DIG,
 	ALC883_ACER,
 	ALC883_ACER_ASPIRE,
 	ALC888_ACER_ASPIRE_4930G,
@@ -2722,6 +2723,7 @@ static struct hda_verb alc880_pin_asus_init_verbs[] = {
 /* Enable GPIO mask and set output */
 #define alc880_gpio1_init_verbs	alc_gpio1_init_verbs
 #define alc880_gpio2_init_verbs	alc_gpio2_init_verbs
+#define alc880_gpio3_init_verbs	alc_gpio3_init_verbs
 
 /* Clevo m520g init */
 static struct hda_verb alc880_pin_clevo_init_verbs[] = {
@@ -7719,6 +7721,73 @@ static struct hda_channel_mode alc883_3ST_6ch_modes[3] = {
 	{ 6, alc883_3ST_ch6_init },
 };
 
+
+/*
+ * 2ch mode
+ */
+static struct hda_verb alc883_4ST_ch2_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PhIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+	{ } /* end */
+};
+
+/*
+ * 4ch mode
+ */
+static struct hda_verb alc883_4ST_ch4_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x1a, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	{ } /* end */
+};
+
+/*
+ * 6ch mode
+ */
+static struct hda_verb alc883_4ST_ch6_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_CONNECT_SEL, 0x02 },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x1a, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	{ } /* end */
+};
+
+/*
+ * 8ch mode
+ */
+static struct hda_verb alc883_4ST_ch8_init[] = {
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x17, AC_VERB_SET_CONNECT_SEL, 0x03 },
+	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x18, AC_VERB_SET_CONNECT_SEL, 0x02 },
+	{ 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+	{ 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+	{ 0x1a, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc883_4ST_8ch_modes[4] = {
+	{ 2, alc883_4ST_ch2_init },
+	{ 4, alc883_4ST_ch4_init },
+	{ 6, alc883_4ST_ch6_init },
+	{ 8, alc883_4ST_ch8_init },
+};
+
+
 /*
  * 2ch mode
  */
@@ -8355,14 +8424,24 @@ static struct hda_verb alc883_tagra_verbs[] = {
 	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
 
-	{0x18, AC_VERB_SET_CONNECT_SEL, 0x02}, /* mic/clfe */
-	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x01}, /* line/surround */
-	{0x1b, AC_VERB_SET_CONNECT_SEL, 0x00}, /* HP */
+/* Connect Line-Out side jack (SPDIF) to Side */
+	{0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x17, AC_VERB_SET_CONNECT_SEL, 0x03},
+/* Connect Mic jack to CLFE */
+	{0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x18, AC_VERB_SET_CONNECT_SEL, 0x02},
+/* Connect Line-in jack to Surround */
+	{0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1a, AC_VERB_SET_CONNECT_SEL, 0x01},
+/* Connect HP out jack to Front */
+	{0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x1b, AC_VERB_SET_CONNECT_SEL, 0x00},
 
 	{0x14, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
-	{0x01, AC_VERB_SET_GPIO_MASK, 0x03},
-	{0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03},
-	{0x01, AC_VERB_SET_GPIO_DATA, 0x03},
 
 	{ } /* end */
 };
@@ -8607,8 +8686,8 @@ static void alc883_lenovo_101e_ispeaker_automute(struct hda_codec *codec)
  	unsigned int present;
 	unsigned char bits;
 
- 	present = snd_hda_codec_read(codec, 0x14, 0,
-				     AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
+	present = snd_hda_codec_read(codec, 0x14, 0, AC_VERB_GET_PIN_SENSE, 0)
+		& AC_PINSENSE_PRESENCE;
 	bits = present ? HDA_AMP_MUTE : 0;
 	snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0,
 				 HDA_AMP_MUTE, bits);
@@ -8895,6 +8974,7 @@ static const char *alc883_models[ALC883_MODEL_LAST] = {
 	[ALC883_6ST_DIG]	= "6stack-dig",
 	[ALC883_TARGA_DIG]	= "targa-dig",
 	[ALC883_TARGA_2ch_DIG]	= "targa-2ch-dig",
+	[ALC883_TARGA_8ch_DIG]	= "targa-8ch-dig",
 	[ALC883_ACER]		= "acer",
 	[ALC883_ACER_ASPIRE]	= "acer-aspire",
 	[ALC888_ACER_ASPIRE_4930G]	= "acer-aspire-4930g",
@@ -8979,6 +9059,7 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x1462, 0x4314, "MSI", ALC883_TARGA_DIG),
 	SND_PCI_QUIRK(0x1462, 0x4319, "MSI", ALC883_TARGA_DIG),
 	SND_PCI_QUIRK(0x1462, 0x4324, "MSI", ALC883_TARGA_DIG),
+	SND_PCI_QUIRK(0x1462, 0x6510, "MSI GX620", ALC883_TARGA_8ch_DIG),
 	SND_PCI_QUIRK(0x1462, 0x6668, "MSI", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x1462, 0x7187, "MSI", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x1462, 0x7250, "MSI", ALC883_6ST_DIG),
@@ -9108,6 +9189,24 @@ static struct alc_config_preset alc883_presets[] = {
 		.unsol_event = alc883_tagra_unsol_event,
 		.init_hook = alc883_tagra_init_hook,
 	},
+	[ALC883_TARGA_8ch_DIG] = {
+		.mixers = { alc883_base_mixer, alc883_chmode_mixer },
+		.init_verbs = { alc883_init_verbs, alc880_gpio3_init_verbs,
+				alc883_tagra_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids_rev),
+		.adc_nids = alc883_adc_nids_rev,
+		.capsrc_nids = alc883_capsrc_nids_rev,
+		.dig_out_nid = ALC883_DIGOUT_NID,
+		.dig_in_nid = ALC883_DIGIN_NID,
+		.num_channel_mode = ARRAY_SIZE(alc883_4ST_8ch_modes),
+		.channel_mode = alc883_4ST_8ch_modes,
+		.need_dac_fix = 1,
+		.input_mux = &alc883_capture_source,
+		.unsol_event = alc883_tagra_unsol_event,
+		.init_hook = alc883_tagra_automute,
+	},
 	[ALC883_ACER] = {
 		.mixers = { alc883_base_mixer },
 		/* On TravelMate laptops, GPIO 0 enables the internal speaker
-- 
cgit v0.10.2


From f03ecf50534a81b06544c58a713076d59d54baf9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 16:38:17 +0200
Subject: ALSA: hda - Fix the previous tagra-8ch patch

- Fix a typo in the patch
- Adapted to follow the recent change for unsol event handling

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index cfe4808..337d2a5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7726,7 +7726,7 @@ static struct hda_channel_mode alc883_3ST_6ch_modes[3] = {
  * 2ch mode
  */
 static struct hda_verb alc883_4ST_ch2_init[] = {
-	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PhIN_OUT },
+	{ 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
 	{ 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
 	{ 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
 	{ 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
@@ -9205,7 +9205,7 @@ static struct alc_config_preset alc883_presets[] = {
 		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc883_tagra_unsol_event,
-		.init_hook = alc883_tagra_automute,
+		.init_hook = alc883_tagra_init_hook,
 	},
 	[ALC883_ACER] = {
 		.mixers = { alc883_base_mixer },
-- 
cgit v0.10.2


From e36b80b658d471be5a8a40f00e2c7614524b86a2 Mon Sep 17 00:00:00 2001
From: Matthieu Castet <castet.matthieu@free.fr>
Date: Fri, 22 May 2009 22:25:04 +0200
Subject: SSB: BCM47xx: Export ssb_watchdog_timer_set

this patch export ssb_watchdog_timer_set to allow to use it in a Linux
watchdog driver.

Signed-off-by: Matthieu CASTET <castet.matthieu@free.fr>
Acked-by : Michael Buesch <mb@bu3sch.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c
index 7dc3a6b..a0e0d24 100644
--- a/drivers/ssb/embedded.c
+++ b/drivers/ssb/embedded.c
@@ -29,6 +29,7 @@ int ssb_watchdog_timer_set(struct ssb_bus *bus, u32 ticks)
 	}
 	return -ENODEV;
 }
+EXPORT_SYMBOL(ssb_watchdog_timer_set);
 
 u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
 {
-- 
cgit v0.10.2


From e082f188f774544bc2c2edf51176157503c98fe4 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 2 Jun 2009 19:05:28 +0100
Subject: MIPS: Sibyte: Honor CONFIG_CMDLINE

Original patch by Imre Kaloz <kaloz@openwrt.org>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/sibyte/cfe/setup.c b/arch/mips/sibyte/cfe/setup.c
index 3de30f7..eb5396c 100644
--- a/arch/mips/sibyte/cfe/setup.c
+++ b/arch/mips/sibyte/cfe/setup.c
@@ -288,13 +288,7 @@ void __init prom_init(void)
 	 */
 	cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE);
 	if (cfe_getenv("LINUX_CMDLINE", arcs_cmdline, CL_SIZE) < 0) {
-		if (argc < 0) {
-			/*
-			 * It's OK for direct boot to not provide a
-			 *  command line
-			 */
-			strcpy(arcs_cmdline, "root=/dev/ram0 ");
-		} else {
+		if (argc >= 0) {
 			/* The loader should have set the command line */
 			/* too early for panic to do any good */
 			printk("LINUX_CMDLINE not defined in cfe.");
-- 
cgit v0.10.2


From c9d89d97f0d174b9154820dd5c6726d1c794cd99 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Tue, 2 Jun 2009 23:15:10 +0900
Subject: MIPS: Kconfig: Remove "Support for" from Cavium system type

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Acked-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 09b1287..28119e6 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -593,7 +593,7 @@ config WR_PPMC
 	  board, which is based on GT64120 bridge chip.
 
 config CAVIUM_OCTEON_SIMULATOR
-	bool "Support for the Cavium Networks Octeon Simulator"
+	bool "Cavium Networks Octeon Simulator"
 	select CEVT_R4K
 	select 64BIT_PHYS_ADDR
 	select DMA_COHERENT
@@ -607,7 +607,7 @@ config CAVIUM_OCTEON_SIMULATOR
 	  hardware.
 
 config CAVIUM_OCTEON_REFERENCE_BOARD
-	bool "Support for the Cavium Networks Octeon reference board"
+	bool "Cavium Networks Octeon reference board"
 	select CEVT_R4K
 	select 64BIT_PHYS_ADDR
 	select DMA_COHERENT
-- 
cgit v0.10.2


From e25bfc9243f2eab12a2ce92b7f4b8a2e3e6949a6 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Tue, 2 Jun 2009 23:17:07 +0900
Subject: MIPS: Cobalt: PCI bus is always required to obtain the board ID

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 28119e6..25f3b0a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -72,6 +72,7 @@ config MIPS_COBALT
 	select IRQ_CPU
 	select IRQ_GT641XX
 	select PCI_GT64XXX_PCI0
+	select PCI
 	select SYS_HAS_CPU_NEVADA
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
-- 
cgit v0.10.2


From 3a553147eaad5d4de90ab1f695aa13ddbea684ec Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Thu, 4 Jun 2009 18:05:49 +0530
Subject: MIPS: ioctl.h: Fix headers_check warnings

Make ioctl.h compatible with asm-generic/ioctl.h and userspace

fix the following 'make headers_check' warning:

  usr/include/asm-mips/ioctl.h:64: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/include/asm/ioctl.h b/arch/mips/include/asm/ioctl.h
index 85067e2..9161634 100644
--- a/arch/mips/include/asm/ioctl.h
+++ b/arch/mips/include/asm/ioctl.h
@@ -60,12 +60,16 @@
 	 ((nr)   << _IOC_NRSHIFT) | \
 	 ((size) << _IOC_SIZESHIFT))
 
+#ifdef __KERNEL__
 /* provoke compile error for invalid uses of size argument */
 extern unsigned int __invalid_size_argument_for_IOC;
 #define _IOC_TYPECHECK(t) \
 	((sizeof(t) == sizeof(t[1]) && \
 	  sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
 	  sizeof(t) : __invalid_size_argument_for_IOC)
+#else
+#define _IOC_TYPECHECK(t)	(sizeof(t))
+#endif
 
 /* used to create numbers */
 #define _IO(type, nr)		_IOC(_IOC_NONE, (type), (nr), 0)
-- 
cgit v0.10.2


From 5636919b5c909fee54a6ef5226475ecae012ad02 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sat, 28 Feb 2009 09:44:28 +0000
Subject: MIPS: Outline udelay and fix a few issues.

Outlining fixes the issue were on certain CPUs such as the R10000 family
the delay loop would need an extra cycle if it overlaps a cacheline
boundary.

The rewrite also fixes build errors with GCC 4.4 which was changed in
way incompatible with the kernel's inline assembly.

Relying on pure C for computation of the delay value removes the need for
explicit.  The price we pay is a slight slowdown of the computation - to
be fixed on another day.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 744cd8f..1260443 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -39,8 +39,8 @@ struct cache_desc {
 #define MIPS_CACHE_PINDEX	0x00000020	/* Physically indexed cache */
 
 struct cpuinfo_mips {
-	unsigned long		udelay_val;
-	unsigned long		asid_cache;
+	unsigned int		udelay_val;
+	unsigned int		asid_cache;
 
 	/*
 	 * Capability and feature descriptor structure for MIPS CPU
diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h
index b0bccd2..a07e51b 100644
--- a/arch/mips/include/asm/delay.h
+++ b/arch/mips/include/asm/delay.h
@@ -11,94 +11,12 @@
 #ifndef _ASM_DELAY_H
 #define _ASM_DELAY_H
 
-#include <linux/param.h>
-#include <linux/smp.h>
+extern void __delay(unsigned int loops);
+extern void __ndelay(unsigned int ns);
+extern void __udelay(unsigned int us);
 
-#include <asm/compiler.h>
-#include <asm/war.h>
-
-static inline void __delay(unsigned long loops)
-{
-	if (sizeof(long) == 4)
-		__asm__ __volatile__ (
-		"	.set	noreorder				\n"
-		"	.align	3					\n"
-		"1:	bnez	%0, 1b					\n"
-		"	subu	%0, 1					\n"
-		"	.set	reorder					\n"
-		: "=r" (loops)
-		: "0" (loops));
-	else if (sizeof(long) == 8 && !DADDI_WAR)
-		__asm__ __volatile__ (
-		"	.set	noreorder				\n"
-		"	.align	3					\n"
-		"1:	bnez	%0, 1b					\n"
-		"	dsubu	%0, 1					\n"
-		"	.set	reorder					\n"
-		: "=r" (loops)
-		: "0" (loops));
-	else if (sizeof(long) == 8 && DADDI_WAR)
-		__asm__ __volatile__ (
-		"	.set	noreorder				\n"
-		"	.align	3					\n"
-		"1:	bnez	%0, 1b					\n"
-		"	dsubu	%0, %2					\n"
-		"	.set	reorder					\n"
-		: "=r" (loops)
-		: "0" (loops), "r" (1));
-}
-
-
-/*
- * Division by multiplication: you don't have to worry about
- * loss of precision.
- *
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)
- */
-
-static inline void __udelay(unsigned long usecs, unsigned long lpj)
-{
-	unsigned long hi, lo;
-
-	/*
-	 * The rates of 128 is rounded wrongly by the catchall case
-	 * for 64-bit.  Excessive precission?  Probably ...
-	 */
-#if defined(CONFIG_64BIT) && (HZ == 128)
-	usecs *= 0x0008637bd05af6c7UL;		/* 2**64 / (1000000 / HZ) */
-#elif defined(CONFIG_64BIT)
-	usecs *= (0x8000000000000000UL / (500000 / HZ));
-#else /* 32-bit junk follows here */
-	usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
-	                           0x80000000ULL) >> 32);
-#endif
-
-	if (sizeof(long) == 4)
-		__asm__("multu\t%2, %3"
-		: "=h" (usecs), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-	else if (sizeof(long) == 8 && !R4000_WAR)
-		__asm__("dmultu\t%2, %3"
-		: "=h" (usecs), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-	else if (sizeof(long) == 8 && R4000_WAR)
-		__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
-		: "=r" (usecs), "=h" (hi), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-
-	__delay(usecs);
-}
-
-#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val
-
-#define udelay(usecs) __udelay((usecs), __udelay_val)
+#define ndelay(ns) __udelay(ns)
+#define udelay(us) __udelay(us)
 
 /* make sure "usecs *= ..." in udelay do not overflow. */
 #if HZ >= 1000
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 26760ca..e0a4ac1 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_printf(m, fmt, __cpu_name[n],
 	                           (version >> 4) & 0x0f, version & 0x0f,
 	                           (fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
-	seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n",
+	seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
 	              cpu_data[n].udelay_val / (500000/HZ),
 	              (cpu_data[n].udelay_val / (5000/HZ)) % 100);
 	seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no");
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index c13c7ad..2adead5 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -2,8 +2,8 @@
 # Makefile for MIPS-specific library files..
 #
 
-lib-y	+= csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \
-	   strncpy_user.o strnlen_user.o uncached.o
+lib-y	+= csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \
+	   strlen_user.o strncpy_user.o strnlen_user.o uncached.o
 
 obj-y			+= iomap.o
 obj-$(CONFIG_PCI)	+= iomap-pci.o
diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c
new file mode 100644
index 0000000..f69c6b5
--- /dev/null
+++ b/arch/mips/lib/delay.c
@@ -0,0 +1,56 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 by Waldorf Electronics
+ * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/smp.h>
+
+#include <asm/compiler.h>
+#include <asm/war.h>
+
+inline void __delay(unsigned int loops)
+{
+	__asm__ __volatile__ (
+	"	.set	noreorder				\n"
+	"	.align	3					\n"
+	"1:	bnez	%0, 1b					\n"
+	"	subu	%0, 1					\n"
+	"	.set	reorder					\n"
+	: "=r" (loops)
+	: "0" (loops));
+}
+EXPORT_SYMBOL(__delay);
+
+/*
+ * Division by multiplication: you don't have to worry about
+ * loss of precision.
+ *
+ * Use only for very small delays ( < 1 msec).  Should probably use a
+ * lookup table, really, as the multiplications take much too long with
+ * short delays.  This is a "reasonable" implementation, though (and the
+ * first constant multiplications gets optimized away if the delay is
+ * a constant)
+ */
+
+void __udelay(unsigned long us)
+{
+	unsigned int lpj = current_cpu_data.udelay_val;
+
+	__delay((us * 0x000010c7 * HZ * lpj) >> 32);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long ns)
+{
+	unsigned int lpj = current_cpu_data.udelay_val;
+
+	__delay((us * 0x00000005 * HZ * lpj) >> 32);
+}
+EXPORT_SYMBOL(__ndelay);
-- 
cgit v0.10.2


From a0895162fbc1a4168c8cf29e1eb1bbc8c260a80a Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Sat, 6 Jun 2009 16:36:44 +0200
Subject: MXC : update i.MX21 clock support for USB host.

* Use correct clkdev style usb clock name
* Implement rate setting for USB clock
* Introduce _clk_generic_round_rate to factorize the (now 3) uses of rounding code.

Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/arch/arm/mach-mx2/clock_imx21.c b/arch/arm/mach-mx2/clock_imx21.c
index 999d013..fa2b292 100644
--- a/arch/arm/mach-mx2/clock_imx21.c
+++ b/arch/arm/mach-mx2/clock_imx21.c
@@ -48,6 +48,25 @@ static void _clk_disable(struct clk *clk)
 	__raw_writel(reg, clk->enable_reg);
 }
 
+static unsigned long _clk_generic_round_rate(struct clk *clk,
+			unsigned long rate,
+			u32 max_divisor)
+{
+	u32 div;
+	unsigned long parent_rate;
+
+	parent_rate = clk_get_rate(clk->parent);
+
+	div = parent_rate / rate;
+	if (parent_rate % rate)
+		div++;
+
+	if (div > max_divisor)
+		div = max_divisor;
+
+	return parent_rate / div;
+}
+
 static int _clk_spll_enable(struct clk *clk)
 {
 	u32 reg;
@@ -78,19 +97,7 @@ static void _clk_spll_disable(struct clk *clk)
 static unsigned long _clk_perclkx_round_rate(struct clk *clk,
 					     unsigned long rate)
 {
-	u32 div;
-	unsigned long parent_rate;
-
-	parent_rate = clk_get_rate(clk->parent);
-
-	div = parent_rate / rate;
-	if (parent_rate % rate)
-		div++;
-
-	if (div > 64)
-		div = 64;
-
-	return parent_rate / div;
+	return _clk_generic_round_rate(clk, rate, 64);
 }
 
 static int _clk_perclkx_set_rate(struct clk *clk, unsigned long rate)
@@ -130,6 +137,32 @@ static unsigned long _clk_usb_recalc(struct clk *clk)
 	return parent_rate / (usb_pdf + 1U);
 }
 
+static unsigned long _clk_usb_round_rate(struct clk *clk,
+					     unsigned long rate)
+{
+	return _clk_generic_round_rate(clk, rate, 8);
+}
+
+static int _clk_usb_set_rate(struct clk *clk, unsigned long rate)
+{
+	u32 reg;
+	u32 div;
+	unsigned long parent_rate;
+
+	parent_rate = clk_get_rate(clk->parent);
+
+	div = parent_rate / rate;
+	if (div > 8 || div < 1 || ((parent_rate / div) != rate))
+		return -EINVAL;
+	div--;
+
+	reg = CSCR() & ~CCM_CSCR_USB_MASK;
+	reg |= div << CCM_CSCR_USB_OFFSET;
+	__raw_writel(reg, CCM_CSCR);
+
+	return 0;
+}
+
 static unsigned long _clk_ssix_recalc(struct clk *clk, unsigned long pdf)
 {
 	unsigned long parent_rate;
@@ -595,11 +628,14 @@ static struct clk csi_clk[] = {
 static struct clk usb_clk[] = {
 	{
 		.parent = &spll_clk,
+		.secondary = &usb_clk[1],
 		.get_rate = _clk_usb_recalc,
 		.enable = _clk_enable,
 		.enable_reg = CCM_PCCR_USBOTG_REG,
 		.enable_shift = CCM_PCCR_USBOTG_OFFSET,
 		.disable = _clk_disable,
+		.round_rate = _clk_usb_round_rate,
+		.set_rate = _clk_usb_set_rate,
 	}, {
 		.parent = &hclk_clk,
 		.enable = _clk_enable,
@@ -768,18 +804,7 @@ static struct clk rtc_clk = {
 
 static unsigned long _clk_clko_round_rate(struct clk *clk, unsigned long rate)
 {
-	u32 div;
-	unsigned long parent_rate;
-
-	parent_rate = clk_get_rate(clk->parent);
-	div = parent_rate / rate;
-	if (parent_rate % rate)
-		div++;
-
-	if (div > 8)
-		div = 8;
-
-	return parent_rate / div;
+	return _clk_generic_round_rate(clk, rate, 8);
 }
 
 static int _clk_clko_set_rate(struct clk *clk, unsigned long rate)
@@ -921,7 +946,7 @@ static struct clk_lookup lookups[] __initdata = {
 	_REGISTER_CLOCK(NULL, "cspi3", cspi_clk[2])
 	_REGISTER_CLOCK("imx-fb.0", NULL, lcdc_clk[0])
 	_REGISTER_CLOCK(NULL, "csi", csi_clk[0])
-	_REGISTER_CLOCK(NULL, "usb", usb_clk[0])
+	_REGISTER_CLOCK("imx21-hcd.0", NULL, usb_clk[0])
 	_REGISTER_CLOCK(NULL, "ssi1", ssi_clk[0])
 	_REGISTER_CLOCK(NULL, "ssi2", ssi_clk[1])
 	_REGISTER_CLOCK("mxc_nand.0", NULL, nfc_clk)
-- 
cgit v0.10.2


From 9470195a9cd13e6d90221b8b5d897e9232da8d28 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 18:10:32 +0200
Subject: ALSA: ctxfi - Clean up probe routines

Clean up probe routines and model detection routines so that the driver
won't call and check the PCI subsystem id at each time.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 7898a37..002a70e 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -39,29 +39,40 @@
 			    | (0x10 << 16) \
 			    | ((IEC958_AES3_CON_FS_48000) << 24))
 
-static const struct ct_atc_chip_sub_details atc_sub_details[NUM_CTCARDS] = {
-	[CTSB0760] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB0760,
-		      .nm_model = "SB076x"},
-	[CTHENDRIX] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_HENDRIX,
-		      .nm_model = "Hendrix"},
-	[CTSB08801] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08801,
-		      .nm_model = "SB0880"},
-	[CTSB08802] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08802,
-		      .nm_model = "SB0880"},
-	[CTSB08803] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08803,
-		      .nm_model = "SB0880"}
+static struct snd_pci_quirk __devinitdata subsys_20k1_list[] = {
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0022, "SB055x", CTSB055X),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x002f, "SB055x", CTSB055X),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0029, "SB073x", CTSB073X),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0031, "SB073x", CTSB073X),
+	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0x6000,
+			   PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, "UAA", CTUAA),
+	SND_PCI_QUIRK_VENDOR(PCI_VENDOR_ID_CREATIVE,
+			     "Unknown", CT20K1_UNKNOWN),
+	{ } /* terminator */
 };
 
-static struct ct_atc_chip_details atc_chip_details[] = {
-	{.vendor = PCI_VENDOR_ID_CREATIVE,
-	 .device = PCI_DEVICE_ID_CREATIVE_20K1,
-	 .sub_details = NULL,
-	 .nm_card = "X-Fi 20k1"},
-	{.vendor = PCI_VENDOR_ID_CREATIVE,
-	 .device = PCI_DEVICE_ID_CREATIVE_20K2,
-	 .sub_details = atc_sub_details,
-	 .nm_card = "X-Fi 20k2"},
-	{} /* terminator */
+static struct snd_pci_quirk __devinitdata subsys_20k2_list[] = {
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB0760,
+		      "SB0760", CTSB0760),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08801,
+		      "SB0880", CTSB0880),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08802,
+		      "SB0880", CTSB0880),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08803,
+		      "SB0880", CTSB0880),
+	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0x6000,
+			   PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, "UAA", CTHENDRIX),
+	{ } /* terminator */
+};
+
+static const char *ct_subsys_name[NUM_CTCARDS] = {
+	[CTSB055X]	= "SB055x",
+	[CTSB073X]	= "SB073x",
+	[CTSB0760]	= "SB076x",
+	[CTUAA]		= "UAA",
+	[CT20K1_UNKNOWN] = "Unknown",
+	[CTHENDRIX]	= "Hendrix",
+	[CTSB0880]	= "SB0880",
 };
 
 static struct {
@@ -1208,62 +1219,39 @@ static int atc_dev_free(struct snd_device *dev)
 
 static int __devinit atc_identify_card(struct ct_atc *atc)
 {
-	u16 subsys;
-	u8 revision;
-	struct pci_dev *pci = atc->pci;
-	const struct ct_atc_chip_details *d;
-	enum CTCARDS i;
-
-	subsys = pci->subsystem_device;
-	revision = pci->revision;
-	atc->chip_details = NULL;
-	atc->model = NUM_CTCARDS;
-	for (d = atc_chip_details; d->vendor; d++) {
-		if (d->vendor != pci->vendor || d->device != pci->device)
-			continue;
+	const struct snd_pci_quirk *p;
+	const struct snd_pci_quirk *list;
 
-		if (NULL == d->sub_details) {
-			atc->chip_details = d;
-			break;
-		}
-		for (i = 0; i < NUM_CTCARDS; i++) {
-			if ((d->sub_details[i].subsys == subsys) ||
-			    (((subsys & 0x6000) == 0x6000) &&
-			    ((d->sub_details[i].subsys & 0x6000) == 0x6000))) {
-				atc->model = i;
-				break;
-			}
-		}
-		if (i >= NUM_CTCARDS)
-			continue;
-
-		atc->chip_details = d;
+	switch (atc->chip_type) {
+	case ATC20K1:
+		atc->chip_name = "20K1";
+		list = subsys_20k1_list;
+		break;
+	case ATC20K2:
+		atc->chip_name = "20K2";
+		list = subsys_20k2_list;
 		break;
-		/* not take revision into consideration now */
+	default:
+		return -ENOENT;
 	}
-	if (!d->vendor)
+	p = snd_pci_quirk_lookup(atc->pci, list);
+	if (!p)
 		return -ENOENT;
-
+	atc->model = p->value;
+	atc->model_name = ct_subsys_name[atc->model];
+	snd_printd("ctxfi: chip %s model %s (%04x:%04x) is found\n",
+		   atc->chip_name, atc->model_name,
+		   atc->pci->subsystem_vendor,
+		   atc->pci->subsystem_device);
 	return 0;
 }
 
 int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc)
 {
 	enum CTALSADEVS i;
-	struct hw *hw = atc->hw;
 	int err;
 
-	switch (hw->get_chip_type(hw)) {
-	case ATC20K1:
-		alsa_dev_funcs[MIXER].public_name = "20K1";
-		break;
-	case ATC20K2:
-		alsa_dev_funcs[MIXER].public_name = "20K2";
-		break;
-	default:
-		alsa_dev_funcs[MIXER].public_name = "Unknown";
-		break;
-	}
+	alsa_dev_funcs[MIXER].public_name = atc->chip_name;
 
 	for (i = 0; i < NUM_CTALSADEVS; i++) {
 		if (NULL == alsa_dev_funcs[i].create)
@@ -1287,7 +1275,7 @@ static int __devinit atc_create_hw_devs(struct ct_atc *atc)
 	struct card_conf info = {0};
 	int i, err;
 
-	err = create_hw_obj(atc->pci, &hw);
+	err = create_hw_obj(atc->pci, atc->chip_type, atc->model, &hw);
 	if (err) {
 		printk(KERN_ERR "Failed to create hw obj!!!\n");
 		return err;
@@ -1328,7 +1316,6 @@ static int __devinit atc_get_resources(struct ct_atc *atc)
 	struct sum_desc sum_dsc = {0};
 	struct sum_mgr *sum_mgr;
 	int err, i;
-	unsigned short subsys_id;
 
 	atc->daios = kzalloc(sizeof(void *)*(DAIONUM), GFP_KERNEL);
 	if (NULL == atc->daios)
@@ -1359,13 +1346,10 @@ static int __devinit atc_get_resources(struct ct_atc *atc)
 		}
 		atc->n_daio++;
 	}
-	subsys_id = atc->pci->subsystem_device;
-	if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
-		/* SB073x cards */
+	if (atc->model == CTSB073X)
 		da_desc.type = SPDIFI1;
-	} else {
+	else
 		da_desc.type = SPDIFIO;
-	}
 	err = daio_mgr->get_daio(daio_mgr, &da_desc,
 				(struct daio **)&atc->daios[i]);
 	if (err) {
@@ -1555,7 +1539,8 @@ static struct ct_atc atc_preset __devinitdata = {
  */
 
 int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
-		  unsigned int rsr, unsigned int msr, struct ct_atc **ratc)
+			    unsigned int rsr, unsigned int msr,
+			    int chip_type, struct ct_atc **ratc)
 {
 	struct ct_atc *atc;
 	static struct snd_device_ops ops = {
@@ -1576,6 +1561,7 @@ int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 	atc->pci = pci;
 	atc->rsr = rsr;
 	atc->msr = msr;
+	atc->chip_type = chip_type;
 
 	spin_lock_init(&atc->atc_lock);
 
diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h
index 04459aa..a033472 100644
--- a/sound/pci/ctxfi/ctatc.h
+++ b/sound/pci/ctxfi/ctatc.h
@@ -37,15 +37,6 @@ enum CTALSADEVS {		/* Types of alsa devices */
 	NUM_CTALSADEVS		/* This should always be the last */
 };
 
-enum CTCARDS {
-	CTSB0760,
-	CTHENDRIX,
-	CTSB08801,
-	CTSB08802,
-	CTSB08803,
-	NUM_CTCARDS		/* This should always be the last */
-};
-
 struct ct_atc_chip_sub_details {
 	u16 subsys;
 	const char *nm_model;
@@ -89,8 +80,10 @@ struct ct_atc {
 	unsigned int msr; /* master sample rate in rsr */
 	unsigned int pll_rate; /* current rate of Phase Lock Loop */
 
-	const struct ct_atc_chip_details *chip_details;
-	enum CTCARDS model;
+	int chip_type;
+	int model;
+	const char *chip_name;
+	const char *model_name;
 
 	struct ct_vm *vm; /* device virtual memory manager for this card */
 	int (*map_audio_buffer)(struct ct_atc *atc, struct ct_atc_pcm *apcm);
@@ -147,7 +140,7 @@ struct ct_atc {
 
 
 int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
-			    unsigned int rsr, unsigned int msr,
+			    unsigned int rsr, unsigned int msr, int chip_type,
 			    struct ct_atc **ratc);
 int __devinit ct_atc_create_alsa_devs(struct ct_atc *atc);
 
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
index befead4..082e35c 100644
--- a/sound/pci/ctxfi/ctdaio.c
+++ b/sound/pci/ctxfi/ctdaio.c
@@ -116,7 +116,7 @@ static struct rsc_ops daio_in_rsc_ops_20k2 = {
 
 static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw)
 {
-	switch (hw->get_chip_type(hw)) {
+	switch (hw->chip_type) {
 	case ATC20K1:
 		switch (type) {
 		case SPDIFOO:	return 0;
@@ -343,7 +343,7 @@ static int daio_rsc_init(struct daio *daio,
 	int err;
 	unsigned int idx_l, idx_r;
 
-	switch (((struct hw *)hw)->get_chip_type(hw)) {
+	switch (((struct hw *)hw)->chip_type) {
 	case ATC20K1:
 		idx_l = idx_20k1[desc->type].left;
 		idx_r = idx_20k1[desc->type].right;
@@ -367,7 +367,7 @@ static int daio_rsc_init(struct daio *daio,
 	if (desc->type <= DAIO_OUT_MAX) {
 		daio->rscl.ops = daio->rscr.ops = &daio_out_rsc_ops;
 	} else {
-		switch (((struct hw *)hw)->get_chip_type(hw)) {
+		switch (((struct hw *)hw)->chip_type) {
 		case ATC20K1:
 			daio->rscl.ops = daio->rscr.ops = &daio_in_rsc_ops_20k1;
 			break;
diff --git a/sound/pci/ctxfi/cthardware.c b/sound/pci/ctxfi/cthardware.c
index 5ec6813..8e64f48 100644
--- a/sound/pci/ctxfi/cthardware.c
+++ b/sound/pci/ctxfi/cthardware.c
@@ -20,34 +20,16 @@
 #include "cthw20k2.h"
 #include <linux/bug.h>
 
-static enum CHIPTYP __devinitdata get_chip_type(struct hw *hw)
-{
-	enum CHIPTYP type;
-
-	switch (hw->pci->device) {
-	case 0x0005:	/* 20k1 device */
-		type = ATC20K1;
-		break;
-	case 0x000B:	/* 20k2 device */
-		type = ATC20K2;
-		break;
-	default:
-		type = ATCNONE;
-		break;
-	}
-
-	return type;
-}
-
-int __devinit create_hw_obj(struct pci_dev *pci, struct hw **rhw)
+int __devinit create_hw_obj(struct pci_dev *pci, enum CHIPTYP chip_type,
+			    enum CTCARDS model, struct hw **rhw)
 {
 	int err;
 
-	switch (pci->device) {
-	case 0x0005:	/* 20k1 device */
+	switch (chip_type) {
+	case ATC20K1:
 		err = create_20k1_hw_obj(rhw);
 		break;
-	case 0x000B:	/* 20k2 device */
+	case ATC20K2:
 		err = create_20k2_hw_obj(rhw);
 		break;
 	default:
@@ -58,7 +40,8 @@ int __devinit create_hw_obj(struct pci_dev *pci, struct hw **rhw)
 		return err;
 
 	(*rhw)->pci = pci;
-	(*rhw)->get_chip_type = get_chip_type;
+	(*rhw)->chip_type = chip_type;
+	(*rhw)->model = model;
 
 	return 0;
 }
diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h
index 8f11644..4a8e04f 100644
--- a/sound/pci/ctxfi/cthardware.h
+++ b/sound/pci/ctxfi/cthardware.h
@@ -27,6 +27,19 @@ enum CHIPTYP {
 	ATCNONE
 };
 
+enum CTCARDS {
+	/* 20k1 models */
+	CTSB055X,
+	CTSB073X,
+	CTUAA,
+	CT20K1_UNKNOWN,
+	/* 20k2 models */
+	CTSB0760,
+	CTHENDRIX,
+	CTSB0880,
+	NUM_CTCARDS		/* This should always be the last */
+};
+
 /* Type of input source for ADC */
 enum ADCSRC{
 	ADC_MICIN,
@@ -48,7 +61,6 @@ struct hw {
 	int (*card_init)(struct hw *hw, struct card_conf *info);
 	int (*card_stop)(struct hw *hw);
 	int (*pll_init)(struct hw *hw, unsigned int rsr);
-	enum CHIPTYP (*get_chip_type)(struct hw *hw);
 	int (*is_adc_source_selected)(struct hw *hw, enum ADCSRC source);
 	int (*select_adc_source)(struct hw *hw, enum ADCSRC source);
 	int (*have_digit_io_switch)(struct hw *hw);
@@ -156,9 +168,13 @@ struct hw {
 	int irq;
 	unsigned long io_base;
 	unsigned long mem_base;
+
+	enum CHIPTYP chip_type;
+	enum CTCARDS model;
 };
 
-int create_hw_obj(struct pci_dev *pci, struct hw **rhw);
+int create_hw_obj(struct pci_dev *pci, enum CHIPTYP chip_type,
+		  enum CTCARDS model, struct hw **rhw);
 int destroy_hw_obj(struct hw *hw);
 
 unsigned int get_field(unsigned int data, unsigned int field);
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 38b87b6..5d58650 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1432,11 +1432,9 @@ static int hw_dac_init(struct hw *hw, const struct dac_conf *info)
 {
 	u32 data;
 	u16 gpioorg;
-	u16 subsys_id;
 	unsigned int ret;
 
-	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
-	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
+	if (hw->model == CTSB055X) {
 		/* SB055x, unmute outputs */
 		gpioorg = (u16)hw_read_20kx(hw, GPIO);
 		gpioorg &= 0xffbf;	/* set GPIO6 to low */
@@ -1538,19 +1536,14 @@ static int is_adc_input_selected_hendrix(struct hw *hw, enum ADCSRC type)
 
 static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
 {
-	u16 subsys_id;
-
-	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
-	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
-		/* SB055x cards */
+	switch (hw->model) {
+	case CTSB055X:
 		return is_adc_input_selected_SB055x(hw, type);
-	} else if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
-		/* SB073x cards */
+	case CTSB073X:
 		return is_adc_input_selected_hendrix(hw, type);
-	} else if ((subsys_id & 0xf000) == 0x6000) {
-		/* Vista compatible cards */
+	case CTHENDRIX:
 		return is_adc_input_selected_hendrix(hw, type);
-	} else {
+	default:
 		return is_adc_input_selected_SBx(hw, type);
 	}
 }
@@ -1692,20 +1685,17 @@ adc_input_select_hendrix(struct hw *hw, enum ADCSRC type, unsigned char boost)
 
 static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 {
-	u16 subsys_id;
-
-	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
-	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
-		/* SB055x cards */
-		return adc_input_select_SB055x(hw, type, (ADC_MICIN == type));
-	} else if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
-		/* SB073x cards */
-		return adc_input_select_hendrix(hw, type, (ADC_MICIN == type));
-	} else if ((subsys_id & 0xf000) == 0x6000) {
-		/* Vista compatible cards */
-		return adc_input_select_hendrix(hw, type, (ADC_MICIN == type));
-	} else {
-		return adc_input_select_SBx(hw, type, (ADC_MICIN == type));
+	int state = type == ADC_MICIN;
+
+	switch (hw->model) {
+	case CTSB055X:
+		return adc_input_select_SB055x(hw, type, state);
+	case CTSB073X:
+		return adc_input_select_hendrix(hw, type, state);
+	case CTHENDRIX:
+		return adc_input_select_hendrix(hw, type, state);
+	default:
+		return adc_input_select_SBx(hw, type, state);
 	}
 }
 
@@ -1781,28 +1771,16 @@ static int adc_init_SBx(struct hw *hw, int input, int mic20db)
 
 static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 {
-	int err;
-	u16 subsys_id;
-
-	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
-	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
-		/* Sb055x card */
-		err = adc_init_SB055x(hw, info->input, info->mic20db);
-	} else {
-		err = adc_init_SBx(hw, info->input, info->mic20db);
-	}
-
-	return err;
+	if (hw->model == CTSB055X)
+		return adc_init_SB055x(hw, info->input, info->mic20db);
+	else
+		return adc_init_SBx(hw, info->input, info->mic20db);
 }
 
 static int hw_have_digit_io_switch(struct hw *hw)
 {
-	u16 subsys_id;
-
-	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	/* SB073x and Vista compatible cards have no digit IO switch */
-	return !((subsys_id == 0x0029) || (subsys_id == 0x0031)
-				|| ((subsys_id & 0xf000) == 0x6000));
+	return !(hw->model == CTSB073X || hw->model == CTHENDRIX);
 }
 
 #define CTLBITS(a, b, c, d)	(((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
@@ -1918,7 +1896,6 @@ static int hw_card_start(struct hw *hw)
 {
 	int err;
 	struct pci_dev *pci = hw->pci;
-	u16 subsys_id;
 
 	err = pci_enable_device(pci);
 	if (err < 0)
@@ -1939,8 +1916,7 @@ static int hw_card_start(struct hw *hw)
 		goto error1;
 
 	/* Switch to X-Fi mode from UAA mode if neeeded */
-	pci_read_config_word(pci, PCI_SUBSYSTEM_ID, &subsys_id);
-	if ((0x5 == pci->device) && (0x6000 == (subsys_id & 0x6000))) {
+	if (hw->model == CTHENDRIX) {
 		err = uaa_to_xfi(pci);
 		if (err)
 			goto error2;
@@ -2004,7 +1980,6 @@ static int hw_card_init(struct hw *hw, struct card_conf *info)
 {
 	int err;
 	unsigned int gctl;
-	u16 subsys_id;
 	u32 data;
 	struct dac_conf dac_info = {0};
 	struct adc_conf adc_info = {0};
@@ -2044,19 +2019,20 @@ static int hw_card_init(struct hw *hw, struct card_conf *info)
 	hw_write_20kx(hw, SRCIP, 0);
 	mdelay(30);
 
-	pci_read_config_word(hw->pci, PCI_SUBSYSTEM_ID, &subsys_id);
 	/* Detect the card ID and configure GPIO accordingly. */
-	if ((subsys_id == 0x0022) || (subsys_id == 0x002F)) {
-		/* SB055x cards */
+	switch (hw->model) {
+	case CTSB055X:
 		hw_write_20kx(hw, GPIOCTL, 0x13fe);
-	} else if ((subsys_id == 0x0029) || (subsys_id == 0x0031)) {
-		/* SB073x cards */
+		break;
+	case CTSB073X:
 		hw_write_20kx(hw, GPIOCTL, 0x00e6);
-	} else if ((subsys_id & 0xf000) == 0x6000) {
-		/* Vista compatible cards */
+		break;
+	case CTHENDRIX: /* Vista compatible cards */
 		hw_write_20kx(hw, GPIOCTL, 0x00c2);
-	} else {
+		break;
+	default:
 		hw_write_20kx(hw, GPIOCTL, 0x01e6);
+		break;
 	}
 
 	trn_info.vm_pgt_phys = info->vm_pgt_phys;
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index 279dac6..2d3dd89 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -15,6 +15,7 @@
 #include <sound/core.h>
 #include <sound/initval.h>
 #include "ctatc.h"
+#include "cthardware.h"
 
 MODULE_AUTHOR("Creative Technology Ltd");
 MODULE_DESCRIPTION("X-Fi driver version 1.03");
@@ -41,8 +42,12 @@ MODULE_PARM_DESC(enable, "Enable Creative X-Fi driver");
 
 static struct pci_device_id ct_pci_dev_ids[] = {
 	/* only X-Fi is supported, so... */
-	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1),
+	  .driver_data = ATC20K1,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2),
+	  .driver_data = ATC20K2,
+	},
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ct_pci_dev_ids);
@@ -79,7 +84,8 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 		       "1 and 2, Value 2 is assumed.\n");
 		multiple = 2;
 	}
-	err = ct_atc_create(card, pci, reference_rate, multiple, &atc);
+	err = ct_atc_create(card, pci, reference_rate, multiple,
+			    pci_id->driver_data, &atc);
 	if (err < 0)
 		goto error;
 
@@ -92,7 +98,8 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 
 	strcpy(card->driver, "SB-XFi");
 	strcpy(card->shortname, "Creative X-Fi");
-	strcpy(card->longname, "Creative ALSA Driver X-Fi");
+	snprintf(card->longname, sizeof(card->longname), "%s %s %s",
+		 card->shortname, atc->chip_name, atc->model_name);
 
 	err = snd_card_register(card);
 	if (err < 0)
-- 
cgit v0.10.2


From 5284c6b99ea017f73c09b50f34a637ff9d5d26a0 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 8 Jun 2009 12:31:00 +0100
Subject: pata_netcell: Fix typo

The previous patch submission had a I typo I didn't catch but Bartlomiej
noted. Guess this proves the point about any patch being risky late in an rc

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index 9a69809..f0d52f7 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c
@@ -26,7 +26,7 @@ static unsigned int netcell_read_id(struct ata_device *adev,
 	unsigned int err_mask = ata_do_dev_read_id(adev, tf, id);
 	/* Firmware forgets to mark words 85-87 valid */
 	if (err_mask == 0)
-		id[ATA_ID_CSF_DEFAULT] |= 0x0400;
+		id[ATA_ID_CSF_DEFAULT] |= 0x4000;
 	return err_mask;
 }
 
-- 
cgit v0.10.2


From 3772a99175f5378b5001e8da364341a8b8226a4a Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:50:54 -0400
Subject: [SCSI] lpfc 8.3.2 : Reorganization for SLI4

Preps the organization of the driver so that the bottom half, which
interacts with the hardware, can share common code sequences for
attachment, detachment, initialization, teardown, etc with new hardware.

For very common code sections, which become specific to the interface
type, the driver uses an indirect function call. The function is set at
initialization. For less common sections, such as initialization, the
driver looks at the interface type and calls the routines relative to
the interface.

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 1105f9a..6c24c9a 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -23,6 +23,13 @@
 
 struct lpfc_sli2_slim;
 
+#define LPFC_PCI_DEV_LP		0x1
+#define LPFC_PCI_DEV_OC		0x2
+
+#define LPFC_SLI_REV2		2
+#define LPFC_SLI_REV3		3
+#define LPFC_SLI_REV4		4
+
 #define LPFC_MAX_TARGET		4096	/* max number of targets supported */
 #define LPFC_MAX_DISC_THREADS	64	/* max outstanding discovery els
 					   requests */
@@ -264,8 +271,8 @@ enum hba_state {
 };
 
 struct lpfc_vport {
-	struct list_head listentry;
 	struct lpfc_hba *phba;
+	struct list_head listentry;
 	uint8_t port_type;
 #define LPFC_PHYSICAL_PORT 1
 #define LPFC_NPIV_PORT  2
@@ -420,8 +427,66 @@ enum intr_type_t {
 };
 
 struct lpfc_hba {
+	/* SCSI interface function jump table entries */
+	int (*lpfc_new_scsi_buf)
+		(struct lpfc_vport *, int);
+	struct lpfc_scsi_buf * (*lpfc_get_scsi_buf)
+		(struct lpfc_hba *);
+	int (*lpfc_scsi_prep_dma_buf)
+		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+	void (*lpfc_scsi_unprep_dma_buf)
+		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+	void (*lpfc_release_scsi_buf)
+		(struct lpfc_hba *, struct lpfc_scsi_buf *);
+	void (*lpfc_rampdown_queue_depth)
+		(struct lpfc_hba *);
+	void (*lpfc_scsi_prep_cmnd)
+		(struct lpfc_vport *, struct lpfc_scsi_buf *,
+		 struct lpfc_nodelist *);
+	int (*lpfc_scsi_prep_task_mgmt_cmd)
+		(struct lpfc_vport *, struct lpfc_scsi_buf *,
+		 unsigned int, uint8_t);
+
+	/* IOCB interface function jump table entries */
+	int (*__lpfc_sli_issue_iocb)
+		(struct lpfc_hba *, uint32_t,
+		 struct lpfc_iocbq *, uint32_t);
+	void (*__lpfc_sli_release_iocbq)(struct lpfc_hba *,
+			 struct lpfc_iocbq *);
+	int (*lpfc_hba_down_post)(struct lpfc_hba *phba);
+
+
+	IOCB_t * (*lpfc_get_iocb_from_iocbq)
+		(struct lpfc_iocbq *);
+	void (*lpfc_scsi_cmd_iocb_cmpl)
+		(struct lpfc_hba *, struct lpfc_iocbq *, struct lpfc_iocbq *);
+
+	/* MBOX interface function jump table entries */
+	int (*lpfc_sli_issue_mbox)
+		(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
+	/* Slow-path IOCB process function jump table entries */
+	void (*lpfc_sli_handle_slow_ring_event)
+		(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+		 uint32_t mask);
+	/* INIT device interface function jump table entries */
+	int (*lpfc_sli_hbq_to_firmware)
+		(struct lpfc_hba *, uint32_t, struct hbq_dmabuf *);
+	int (*lpfc_sli_brdrestart)
+		(struct lpfc_hba *);
+	int (*lpfc_sli_brdready)
+		(struct lpfc_hba *, uint32_t);
+	void (*lpfc_handle_eratt)
+		(struct lpfc_hba *);
+	void (*lpfc_stop_port)
+		(struct lpfc_hba *);
+
+
+	/* SLI4 specific HBA data structure */
+	struct lpfc_sli4_hba sli4_hba;
+
 	struct lpfc_sli sli;
-	uint32_t sli_rev;		/* SLI2 or SLI3 */
+	uint8_t pci_dev_grp;	/* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
+	uint32_t sli_rev;		/* SLI2, SLI3, or SLI4 */
 	uint32_t sli3_options;		/* Mask of enabled SLI3 options */
 #define LPFC_SLI3_HBQ_ENABLED		0x01
 #define LPFC_SLI3_NPIV_ENABLED		0x02
@@ -526,11 +591,12 @@ struct lpfc_hba {
 	unsigned long data_flags;
 
 	uint32_t hbq_in_use;		/* HBQs in use flag */
-	struct list_head hbqbuf_in_list;  /* in-fly hbq buffer list */
+	struct list_head rb_pend_list;  /* Received buffers to be processed */
 	uint32_t hbq_count;	        /* Count of configured HBQs */
 	struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies  */
 
 	unsigned long pci_bar0_map;     /* Physical address for PCI BAR0 */
+	unsigned long pci_bar1_map;     /* Physical address for PCI BAR1 */
 	unsigned long pci_bar2_map;     /* Physical address for PCI BAR2 */
 	void __iomem *slim_memmap_p;	/* Kernel memory mapped address for
 					   PCI BAR0 */
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 896c7b0..4164b93 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -267,8 +267,6 @@ lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	     uint32_t tmo, uint8_t retry)
 {
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli  *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	IOCB_t *icmd;
 	struct lpfc_iocbq *geniocb;
 	int rc;
@@ -331,7 +329,7 @@ lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT;
 	geniocb->vport = vport;
 	geniocb->retry = retry;
-	rc = lpfc_sli_issue_iocb(phba, pring, geniocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, geniocb, 0);
 
 	if (rc == IOCB_ERROR) {
 		lpfc_sli_release_iocbq(phba, geniocb);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 52be564..5dd6692 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -280,6 +280,8 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size)
 	struct lpfc_dmabuf *d_buf;
 	struct hbq_dmabuf *hbq_buf;
 
+	if (phba->sli_rev != 3)
+		return 0;
 	cnt = LPFC_HBQINFO_SIZE;
 	spin_lock_irq(&phba->hbalock);
 
@@ -489,12 +491,15 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size)
 				 pring->next_cmdidx, pring->local_getidx,
 				 pring->flag, pgpp->rspPutInx, pring->numRiocb);
 	}
-	word0 = readl(phba->HAregaddr);
-	word1 = readl(phba->CAregaddr);
-	word2 = readl(phba->HSregaddr);
-	word3 = readl(phba->HCregaddr);
-	len +=  snprintf(buf+len, size-len, "HA:%08x CA:%08x HS:%08x HC:%08x\n",
-	word0, word1, word2, word3);
+
+	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		word0 = readl(phba->HAregaddr);
+		word1 = readl(phba->CAregaddr);
+		word2 = readl(phba->HSregaddr);
+		word3 = readl(phba->HCregaddr);
+		len +=  snprintf(buf+len, size-len, "HA:%08x CA:%08x HS:%08x "
+				 "HC:%08x\n", word0, word1, word2, word3);
+	}
 	spin_unlock_irq(&phba->hbalock);
 	return len;
 }
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index b8b34cf..8c5c3ae 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -84,7 +84,8 @@ lpfc_els_chk_latt(struct lpfc_vport *vport)
 	uint32_t ha_copy;
 
 	if (vport->port_state >= LPFC_VPORT_READY ||
-	    phba->link_state == LPFC_LINK_DOWN)
+	    phba->link_state == LPFC_LINK_DOWN ||
+	    phba->sli_rev > LPFC_SLI_REV3)
 		return 0;
 
 	/* Read the HBA Host Attention Register */
@@ -305,7 +306,7 @@ els_iocb_free_pcmb_exit:
  *   0 - successfully issued fabric registration login for @vport
  *   -ENXIO -- failed to issue fabric registration login for @vport
  **/
-static int
+int
 lpfc_issue_fabric_reglogin(struct lpfc_vport *vport)
 {
 	struct lpfc_hba  *phba = vport->phba;
@@ -345,8 +346,7 @@ lpfc_issue_fabric_reglogin(struct lpfc_vport *vport)
 		err = 4;
 		goto fail;
 	}
-	rc = lpfc_reg_login(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox,
-			    0);
+	rc = lpfc_reg_rpi(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox, 0);
 	if (rc) {
 		err = 5;
 		goto fail_free_mbox;
@@ -1350,14 +1350,12 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
 	IOCB_t *icmd;
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int ret;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 
 	ndlp = lpfc_findnode_did(vport, did);
 	if (ndlp && !NLP_CHK_NODE_ACT(ndlp))
@@ -1391,7 +1389,7 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
 
 	phba->fc_stat.elsXmitPLOGI++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
-	ret = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 
 	if (ret == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
@@ -1501,14 +1499,9 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	PRLI *npr;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 
-	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
-
 	cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
 	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
 				     ndlp->nlp_DID, ELS_CMD_PRLI);
@@ -1550,7 +1543,8 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_PRLI_SND;
 	spin_unlock_irq(shost->host_lock);
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_PRLI_SND;
 		spin_unlock_irq(shost->host_lock);
@@ -1788,8 +1782,6 @@ lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	ADISC *ap;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 
@@ -1822,7 +1814,8 @@ lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_ADISC_SND;
 	spin_unlock_irq(shost->host_lock);
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_ADISC_SND;
 		spin_unlock_irq(shost->host_lock);
@@ -1937,15 +1930,10 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct lpfc_hba  *phba = vport->phba;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
-	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];
-
 	spin_lock_irq(shost->host_lock);
 	if (ndlp->nlp_flag & NLP_LOGO_SND) {
 		spin_unlock_irq(shost->host_lock);
@@ -1978,7 +1966,7 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_LOGO_SND;
 	spin_unlock_irq(shost->host_lock);
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 
 	if (rc == IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
@@ -2058,14 +2046,12 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 	struct lpfc_hba  *phba = vport->phba;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	struct lpfc_nodelist *ndlp;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 	cmdsize = (sizeof(uint32_t) + sizeof(SCR));
 
 	ndlp = lpfc_findnode_did(vport, nportid);
@@ -2108,7 +2094,8 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 
 	phba->fc_stat.elsXmitSCR++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		/* The additional lpfc_nlp_put will cause the following
 		 * lpfc_els_free_iocb routine to trigger the rlease of
 		 * the node.
@@ -2152,7 +2139,6 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 	struct lpfc_hba  *phba = vport->phba;
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	FARP *fp;
 	uint8_t *pcmd;
@@ -2162,7 +2148,6 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 	struct lpfc_nodelist *ndlp;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 	cmdsize = (sizeof(uint32_t) + sizeof(FARP));
 
 	ndlp = lpfc_findnode_did(vport, nportid);
@@ -2219,7 +2204,8 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
 
 	phba->fc_stat.elsXmitFARPR++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		/* The additional lpfc_nlp_put will cause the following
 		 * lpfc_els_free_iocb routine to trigger the release of
 		 * the node.
@@ -2961,6 +2947,7 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		 */
 		lpfc_nlp_not_used(ndlp);
 	}
+
 	return;
 }
 
@@ -3170,7 +3157,6 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
@@ -3178,7 +3164,6 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 	ELS_PKT *els_pkt_ptr;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 	oldcmd = &oldiocb->iocb;
 
 	switch (flag) {
@@ -3266,7 +3251,7 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 	}
 
 	phba->fc_stat.elsXmitACC++;
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3305,15 +3290,12 @@ lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError,
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
-
 	cmdsize = 2 * sizeof(uint32_t);
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
 				     ndlp->nlp_DID, ELS_CMD_LS_RJT);
@@ -3346,7 +3328,7 @@ lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError,
 
 	phba->fc_stat.elsXmitLSRJT++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
@@ -3379,8 +3361,6 @@ lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 		       struct lpfc_nodelist *ndlp)
 {
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli  *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	ADISC *ap;
 	IOCB_t *icmd, *oldcmd;
 	struct lpfc_iocbq *elsiocb;
@@ -3422,7 +3402,7 @@ lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 
 	phba->fc_stat.elsXmitACC++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3459,14 +3439,12 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 
 	cmdsize = sizeof(uint32_t) + sizeof(PRLI);
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
@@ -3520,7 +3498,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 	phba->fc_stat.elsXmitACC++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -3562,15 +3540,12 @@ lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format,
 	RNID *rn;
 	IOCB_t *icmd, *oldcmd;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
 	int rc;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];
-
 	cmdsize = sizeof(uint32_t) + sizeof(uint32_t)
 					+ (2 * sizeof(struct lpfc_name));
 	if (format)
@@ -3626,7 +3601,7 @@ lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format,
 	elsiocb->context1 = NULL;  /* Don't need ndlp for cmpl,
 				    * it could be freed */
 
-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
 	if (rc == IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
@@ -4440,8 +4415,6 @@ lpfc_els_rcv_lirr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 static void
 lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	MAILBOX_t *mb;
 	IOCB_t *icmd;
 	RPS_RSP *rps_rsp;
@@ -4507,7 +4480,7 @@ lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 			 ndlp->nlp_rpi);
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 	phba->fc_stat.elsXmitACC++;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR)
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) == IOCB_ERROR)
 		lpfc_els_free_iocb(phba, elsiocb);
 	return;
 }
@@ -4616,8 +4589,6 @@ lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize,
 	IOCB_t *icmd, *oldcmd;
 	RPL_RSP rpl_rsp;
 	struct lpfc_iocbq *elsiocb;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
 	uint8_t *pcmd;
 
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
@@ -4654,7 +4625,8 @@ lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize,
 			 ndlp->nlp_rpi);
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 	phba->fc_stat.elsXmitACC++;
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
 	}
@@ -6139,7 +6111,6 @@ lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 	IOCB_t *icmd;
 	struct lpfc_iocbq *elsiocb;
 	uint8_t *pcmd;
@@ -6169,7 +6140,8 @@ lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_LOGO_SND;
 	spin_unlock_irq(shost->host_lock);
-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) ==
+	    IOCB_ERROR) {
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_LOGO_SND;
 		spin_unlock_irq(shost->host_lock);
@@ -6224,7 +6196,6 @@ lpfc_resume_fabric_iocbs(struct lpfc_hba *phba)
 	struct lpfc_iocbq *iocb;
 	unsigned long iflags;
 	int ret;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 	IOCB_t *cmd;
 
 repeat:
@@ -6248,7 +6219,7 @@ repeat:
 			"Fabric sched1:   ste:x%x",
 			iocb->vport->port_state, 0, 0);
 
-		ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
+		ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocb, 0);
 
 		if (ret == IOCB_ERROR) {
 			iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
@@ -6394,7 +6365,6 @@ static int
 lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
 {
 	unsigned long iflags;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 	int ready;
 	int ret;
 
@@ -6418,7 +6388,7 @@ lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
 			"Fabric sched2:   ste:x%x",
 			iocb->vport->port_state, 0, 0);
 
-		ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
+		ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocb, 0);
 
 		if (ret == IOCB_ERROR) {
 			iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index e764ce0..25fc96c 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -555,23 +555,24 @@ lpfc_work_done(struct lpfc_hba *phba)
 		/*
 		 * Turn on Ring interrupts
 		 */
-		spin_lock_irq(&phba->hbalock);
-		control = readl(phba->HCregaddr);
-		if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) {
-			lpfc_debugfs_slow_ring_trc(phba,
-				"WRK Enable ring: cntl:x%x hacopy:x%x",
-				control, ha_copy, 0);
-
-			control |= (HC_R0INT_ENA << LPFC_ELS_RING);
-			writel(control, phba->HCregaddr);
-			readl(phba->HCregaddr); /* flush */
-		}
-		else {
-			lpfc_debugfs_slow_ring_trc(phba,
-				"WRK Ring ok:     cntl:x%x hacopy:x%x",
-				control, ha_copy, 0);
+		if (phba->sli_rev <= LPFC_SLI_REV3) {
+			spin_lock_irq(&phba->hbalock);
+			control = readl(phba->HCregaddr);
+			if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) {
+				lpfc_debugfs_slow_ring_trc(phba,
+					"WRK Enable ring: cntl:x%x hacopy:x%x",
+					control, ha_copy, 0);
+
+				control |= (HC_R0INT_ENA << LPFC_ELS_RING);
+				writel(control, phba->HCregaddr);
+				readl(phba->HCregaddr); /* flush */
+			} else {
+				lpfc_debugfs_slow_ring_trc(phba,
+					"WRK Ring ok:     cntl:x%x hacopy:x%x",
+					control, ha_copy, 0);
+			}
+			spin_unlock_irq(&phba->hbalock);
 		}
-		spin_unlock_irq(&phba->hbalock);
 	}
 	lpfc_work_list_done(phba);
 }
@@ -689,7 +690,7 @@ lpfc_port_link_failure(struct lpfc_vport *vport)
 	lpfc_can_disctmo(vport);
 }
 
-static void
+void
 lpfc_linkdown_port(struct lpfc_vport *vport)
 {
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
@@ -1147,10 +1148,12 @@ lpfc_enable_la(struct lpfc_hba *phba)
 	struct lpfc_sli *psli = &phba->sli;
 	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag |= LPFC_PROCESS_LA;
-	control = readl(phba->HCregaddr);
-	control |= HC_LAINT_ENA;
-	writel(control, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
+	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		control = readl(phba->HCregaddr);
+		control |= HC_LAINT_ENA;
+		writel(control, phba->HCregaddr);
+		readl(phba->HCregaddr); /* flush */
+	}
 	spin_unlock_irq(&phba->hbalock);
 }
 
@@ -2919,11 +2922,13 @@ restart_disc:
 		 * set port_state to PORT_READY if SLI2.
 		 * cmpl_reg_vpi will set port_state to READY for SLI3.
 		 */
-		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
-			lpfc_issue_reg_vpi(phba, vport);
-		else  {	/* NPIV Not enabled */
-			lpfc_issue_clear_la(phba, vport);
-			vport->port_state = LPFC_VPORT_READY;
+		if (phba->sli_rev < LPFC_SLI_REV4) {
+			if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
+				lpfc_issue_reg_vpi(phba, vport);
+			else  {	/* NPIV Not enabled */
+				lpfc_issue_clear_la(phba, vport);
+				vport->port_state = LPFC_VPORT_READY;
+			}
 		}
 
 		/* Setup and issue mailbox INITIALIZE LINK command */
@@ -2959,11 +2964,13 @@ restart_disc:
 		 * set port_state to PORT_READY if SLI2.
 		 * cmpl_reg_vpi will set port_state to READY for SLI3.
 		 */
-		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
-			lpfc_issue_reg_vpi(phba, vport);
-		else {	/* NPIV Not enabled */
-			lpfc_issue_clear_la(phba, vport);
-			vport->port_state = LPFC_VPORT_READY;
+		if (phba->sli_rev < LPFC_SLI_REV4) {
+			if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
+				lpfc_issue_reg_vpi(phba, vport);
+			else  {	/* NPIV Not enabled */
+				lpfc_issue_clear_la(phba, vport);
+				vport->port_state = LPFC_VPORT_READY;
+			}
 		}
 		break;
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 86d1bdc..3f06ce2 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -571,16 +571,20 @@ lpfc_hba_down_prep(struct lpfc_hba *phba)
 {
 	struct lpfc_vport **vports;
 	int i;
-	/* Disable interrupts */
-	writel(0, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
+
+	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		/* Disable interrupts */
+		writel(0, phba->HCregaddr);
+		readl(phba->HCregaddr); /* flush */
+	}
 
 	if (phba->pport->load_flag & FC_UNLOADING)
 		lpfc_cleanup_discovery_resources(phba->pport);
 	else {
 		vports = lpfc_create_vport_work_array(phba);
 		if (vports != NULL)
-			for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++)
+			for (i = 0; i <= phba->max_vports &&
+				vports[i] != NULL; i++)
 				lpfc_cleanup_discovery_resources(vports[i]);
 		lpfc_destroy_vport_work_array(phba, vports);
 	}
@@ -588,7 +592,7 @@ lpfc_hba_down_prep(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_hba_down_post - Perform lpfc uninitialization after HBA reset
+ * lpfc_hba_down_post_s3 - Perform lpfc uninitialization after HBA reset
  * @phba: pointer to lpfc HBA data structure.
  *
  * This routine will do uninitialization after the HBA is reset when bring
@@ -598,8 +602,8 @@ lpfc_hba_down_prep(struct lpfc_hba *phba)
  *   0 - sucess.
  *   Any other value - error.
  **/
-int
-lpfc_hba_down_post(struct lpfc_hba *phba)
+static int
+lpfc_hba_down_post_s3(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
@@ -909,13 +913,30 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
 	if ((!phba->work_hs) && (!(phba->pport->load_flag & FC_UNLOADING)))
 		phba->work_hs = old_host_status & ~HS_FFER1;
 
+	spin_lock_irq(&phba->hbalock);
 	phba->hba_flag &= ~DEFER_ERATT;
+	spin_unlock_irq(&phba->hbalock);
 	phba->work_status[0] = readl(phba->MBslimaddr + 0xa8);
 	phba->work_status[1] = readl(phba->MBslimaddr + 0xac);
 }
 
+static void
+lpfc_board_errevt_to_mgmt(struct lpfc_hba *phba)
+{
+	struct lpfc_board_event_header board_event;
+	struct Scsi_Host *shost;
+
+	board_event.event_type = FC_REG_BOARD_EVENT;
+	board_event.subcategory = LPFC_EVENT_PORTINTERR;
+	shost = lpfc_shost_from_vport(phba->pport);
+	fc_host_post_vendor_event(shost, fc_get_event_number(),
+				  sizeof(board_event),
+				  (char *) &board_event,
+				  LPFC_NL_VENDOR_ID);
+}
+
 /**
- * lpfc_handle_eratt - The HBA hardware error handler
+ * lpfc_handle_eratt_s3 - The SLI3 HBA hardware error handler
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to handle the following HBA hardware error
@@ -924,8 +945,8 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
  * 2 - DMA ring index out of range
  * 3 - Mailbox command came back as unknown
  **/
-void
-lpfc_handle_eratt(struct lpfc_hba *phba)
+static void
+lpfc_handle_eratt_s3(struct lpfc_hba *phba)
 {
 	struct lpfc_vport *vport = phba->pport;
 	struct lpfc_sli   *psli = &phba->sli;
@@ -934,24 +955,23 @@ lpfc_handle_eratt(struct lpfc_hba *phba)
 	unsigned long temperature;
 	struct temp_event temp_event_data;
 	struct Scsi_Host  *shost;
-	struct lpfc_board_event_header board_event;
 
 	/* If the pci channel is offline, ignore possible errors,
-	 * since we cannot communicate with the pci card anyway. */
-	if (pci_channel_offline(phba->pcidev))
+	 * since we cannot communicate with the pci card anyway.
+	 */
+	if (pci_channel_offline(phba->pcidev)) {
+		spin_lock_irq(&phba->hbalock);
+		phba->hba_flag &= ~DEFER_ERATT;
+		spin_unlock_irq(&phba->hbalock);
 		return;
+	}
+
 	/* If resets are disabled then leave the HBA alone and return */
 	if (!phba->cfg_enable_hba_reset)
 		return;
 
 	/* Send an internal error event to mgmt application */
-	board_event.event_type = FC_REG_BOARD_EVENT;
-	board_event.subcategory = LPFC_EVENT_PORTINTERR;
-	shost = lpfc_shost_from_vport(phba->pport);
-	fc_host_post_vendor_event(shost, fc_get_event_number(),
-				  sizeof(board_event),
-				  (char *) &board_event,
-				  LPFC_NL_VENDOR_ID);
+	lpfc_board_errevt_to_mgmt(phba);
 
 	if (phba->hba_flag & DEFER_ERATT)
 		lpfc_handle_deferred_eratt(phba);
@@ -1137,7 +1157,7 @@ lpfc_handle_latt_err_exit:
  *   0 - pointer to the VPD passed in is NULL
  *   1 - success
  **/
-static int
+int
 lpfc_parse_vpd(struct lpfc_hba *phba, uint8_t *vpd, int len)
 {
 	uint8_t lenlo, lenhi;
@@ -1533,7 +1553,8 @@ lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt)
 		icmd->ulpCommand = CMD_QUE_RING_BUF64_CN;
 		icmd->ulpLe = 1;
 
-		if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) {
+		if (lpfc_sli_issue_iocb(phba, pring->ringno, iocb, 0) ==
+		    IOCB_ERROR) {
 			lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
 			kfree(mp1);
 			cnt++;
@@ -1761,7 +1782,6 @@ lpfc_cleanup(struct lpfc_vport *vport)
 	 * Lets wait for this to happen, if needed.
 	 */
 	while (!list_empty(&vport->fc_nodes)) {
-
 		if (i++ > 3000) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
 				"0233 Nodelist not empty\n");
@@ -1782,7 +1802,6 @@ lpfc_cleanup(struct lpfc_vport *vport)
 		/* Wait for any activity on ndlps to settle */
 		msleep(10);
 	}
-	return;
 }
 
 /**
@@ -1803,22 +1822,36 @@ lpfc_stop_vport_timers(struct lpfc_vport *vport)
 }
 
 /**
- * lpfc_stop_phba_timers - Stop all the timers associated with an HBA
+ * lpfc_stop_hba_timers - Stop all the timers associated with an HBA
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine stops all the timers associated with a HBA. This function is
  * invoked before either putting a HBA offline or unloading the driver.
  **/
-static void
-lpfc_stop_phba_timers(struct lpfc_hba *phba)
+void
+lpfc_stop_hba_timers(struct lpfc_hba *phba)
 {
-	del_timer_sync(&phba->fcp_poll_timer);
 	lpfc_stop_vport_timers(phba->pport);
 	del_timer_sync(&phba->sli.mbox_tmo);
 	del_timer_sync(&phba->fabric_block_timer);
-	phba->hb_outstanding = 0;
-	del_timer_sync(&phba->hb_tmofunc);
 	del_timer_sync(&phba->eratt_poll);
+	del_timer_sync(&phba->hb_tmofunc);
+	phba->hb_outstanding = 0;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		/* Stop any LightPulse device specific driver timers */
+		del_timer_sync(&phba->fcp_poll_timer);
+		break;
+	case LPFC_PCI_DEV_OC:
+		/* Stop any OneConnect device sepcific driver timers */
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0297 Invalid device group (x%x)\n",
+				phba->pci_dev_grp);
+		break;
+	}
 	return;
 }
 
@@ -2509,9 +2542,8 @@ lpfc_disable_msi(struct lpfc_hba *phba)
  *
  * This routine it invoked to log the currently used active interrupt mode
  * to the device.
- */
-static void
-lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode)
+ **/
+static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode)
 {
 	switch (intr_mode) {
 	case 0:
@@ -2534,293 +2566,380 @@ lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode)
 	return;
 }
 
-static void
-lpfc_stop_port(struct lpfc_hba *phba)
-{
-	/* Clear all interrupt enable conditions */
-	writel(0, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
-	/* Clear all pending interrupts */
-	writel(0xffffffff, phba->HAregaddr);
-	readl(phba->HAregaddr); /* flush */
-
-	/* Reset some HBA SLI setup states */
-	lpfc_stop_phba_timers(phba);
-	phba->pport->work_port_events = 0;
-
-	return;
-}
-
 /**
- * lpfc_enable_intr - Enable device interrupt
+ * lpfc_enable_pci_dev - Enable a generic PCI device.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable device interrupt and associate driver's
- * interrupt handler(s) to interrupt vector(s). Depends on the interrupt
- * mode configured to the driver, the driver will try to fallback from the
- * configured interrupt mode to an interrupt mode which is supported by the
- * platform, kernel, and device in the order of: MSI-X -> MSI -> IRQ.
+ * This routine is invoked to enable the PCI device that is common to all
+ * PCI devices.
  *
  * Return codes
- *   0 - sucessful
- *   other values - error
+ * 	0 - sucessful
+ * 	other values - error
  **/
-static uint32_t
-lpfc_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+static int
+lpfc_enable_pci_dev(struct lpfc_hba *phba)
 {
-	uint32_t intr_mode = LPFC_INTR_ERROR;
-	int retval;
+	struct pci_dev *pdev;
+	int bars;
 
-	if (cfg_mode == 2) {
-		/* Need to issue conf_port mbox cmd before conf_msi mbox cmd */
-		retval = lpfc_sli_config_port(phba, 3);
-		if (!retval) {
-			/* Now, try to enable MSI-X interrupt mode */
-			retval = lpfc_enable_msix(phba);
-			if (!retval) {
-				/* Indicate initialization to MSI-X mode */
-				phba->intr_type = MSIX;
-				intr_mode = 2;
-			}
-		}
-	}
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		goto out_error;
+	else
+		pdev = phba->pcidev;
+	/* Select PCI BARs */
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	/* Enable PCI device */
+	if (pci_enable_device_mem(pdev))
+		goto out_error;
+	/* Request PCI resource for the device */
+	if (pci_request_selected_regions(pdev, bars, LPFC_DRIVER_NAME))
+		goto out_disable_device;
+	/* Set up device as PCI master and save state for EEH */
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+	pci_save_state(pdev);
 
-	/* Fallback to MSI if MSI-X initialization failed */
-	if (cfg_mode >= 1 && phba->intr_type == NONE) {
-		retval = lpfc_enable_msi(phba);
-		if (!retval) {
-			/* Indicate initialization to MSI mode */
-			phba->intr_type = MSI;
-			intr_mode = 1;
-		}
-	}
+	return 0;
 
-	/* Fallback to INTx if both MSI-X/MSI initalization failed */
-	if (phba->intr_type == NONE) {
-		retval = request_irq(phba->pcidev->irq, lpfc_intr_handler,
-				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
-		if (!retval) {
-			/* Indicate initialization to INTx mode */
-			phba->intr_type = INTx;
-			intr_mode = 0;
-		}
-	}
-	return intr_mode;
+out_disable_device:
+	pci_disable_device(pdev);
+out_error:
+	return -ENODEV;
 }
 
 /**
- * lpfc_disable_intr - Disable device interrupt
+ * lpfc_disable_pci_dev - Disable a generic PCI device.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to disable device interrupt and disassociate the
- * driver's interrupt handler(s) from interrupt vector(s). Depending on the
- * interrupt mode, the driver will release the interrupt vector(s) for the
- * message signaled interrupt.
+ * This routine is invoked to disable the PCI device that is common to all
+ * PCI devices.
  **/
 static void
-lpfc_disable_intr(struct lpfc_hba *phba)
+lpfc_disable_pci_dev(struct lpfc_hba *phba)
 {
-	/* Disable the currently initialized interrupt mode */
-	if (phba->intr_type == MSIX)
-		lpfc_disable_msix(phba);
-	else if (phba->intr_type == MSI)
-		lpfc_disable_msi(phba);
-	else if (phba->intr_type == INTx)
-		free_irq(phba->pcidev->irq, phba);
+	struct pci_dev *pdev;
+	int bars;
 
-	/* Reset interrupt management states */
-	phba->intr_type = NONE;
-	phba->sli.slistat.sli_intr = 0;
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return;
+	else
+		pdev = phba->pcidev;
+	/* Select PCI BARs */
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	/* Release PCI resource and disable PCI device */
+	pci_release_selected_regions(pdev, bars);
+	pci_disable_device(pdev);
+	/* Null out PCI private reference to driver */
+	pci_set_drvdata(pdev, NULL);
 
 	return;
 }
 
 /**
- * lpfc_pci_probe_one - lpfc PCI probe func to register device to PCI subsystem
- * @pdev: pointer to PCI device
- * @pid: pointer to PCI device identifier
- *
- * This routine is to be registered to the kernel's PCI subsystem. When an
- * Emulex HBA is presented in PCI bus, the kernel PCI subsystem looks at
- * PCI device-specific information of the device and driver to see if the
- * driver state that it can support this kind of device. If the match is
- * successful, the driver core invokes this routine. If this routine
- * determines it can claim the HBA, it does all the initialization that it
- * needs to do to handle the HBA properly.
+ * lpfc_reset_hba - Reset a hba
+ * @phba: pointer to lpfc hba data structure.
  *
- * Return code
- *   0 - driver can claim the device
- *   negative value - driver can not claim the device
+ * This routine is invoked to reset a hba device. It brings the HBA
+ * offline, performs a board restart, and then brings the board back
+ * online. The lpfc_offline calls lpfc_sli_hba_down which will clean up
+ * on outstanding mailbox commands.
  **/
-static int __devinit
-lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
+void
+lpfc_reset_hba(struct lpfc_hba *phba)
 {
-	struct lpfc_vport *vport = NULL;
-	struct lpfc_hba   *phba;
-	struct lpfc_sli   *psli;
-	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
-	struct Scsi_Host  *shost = NULL;
-	void *ptr;
-	unsigned long bar0map_len, bar2map_len;
-	int error = -ENODEV, retval;
-	int  i, hbq_count;
-	uint16_t iotag;
-	uint32_t cfg_mode, intr_mode;
-	int bars = pci_select_bars(pdev, IORESOURCE_MEM);
-	struct lpfc_adapter_event_header adapter_event;
-
-	if (pci_enable_device_mem(pdev))
-		goto out;
-	if (pci_request_selected_regions(pdev, bars, LPFC_DRIVER_NAME))
-		goto out_disable_device;
-
-	phba = kzalloc(sizeof (struct lpfc_hba), GFP_KERNEL);
-	if (!phba)
-		goto out_release_regions;
-
-	atomic_set(&phba->fast_event_count, 0);
-	spin_lock_init(&phba->hbalock);
-
-	/* Initialize ndlp management spinlock */
-	spin_lock_init(&phba->ndlp_lock);
-
-	phba->pcidev = pdev;
+	/* If resets are disabled then set error state and return. */
+	if (!phba->cfg_enable_hba_reset) {
+		phba->link_state = LPFC_HBA_ERROR;
+		return;
+	}
+	lpfc_offline_prep(phba);
+	lpfc_offline(phba);
+	lpfc_sli_brdrestart(phba);
+	lpfc_online(phba);
+	lpfc_unblock_mgmt_io(phba);
+}
 
-	/* Assign an unused board number */
-	if ((phba->brd_no = lpfc_get_instance()) < 0)
-		goto out_free_phba;
+/**
+ * lpfc_sli_driver_resource_setup - Setup driver internal resources for SLI3 dev.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources specific to
+ * support the SLI-3 HBA device it attached to.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli;
 
-	INIT_LIST_HEAD(&phba->port_list);
-	init_waitqueue_head(&phba->wait_4_mlo_m_q);
 	/*
-	 * Get all the module params for configuring this host and then
-	 * establish the host.
+	 * Initialize timers used by driver
 	 */
-	lpfc_get_cfgparam(phba);
-	phba->max_vpi = LPFC_MAX_VPI;
 
-	/* Initialize timers used by driver */
+	/* Heartbeat timer */
 	init_timer(&phba->hb_tmofunc);
 	phba->hb_tmofunc.function = lpfc_hb_timeout;
 	phba->hb_tmofunc.data = (unsigned long)phba;
 
 	psli = &phba->sli;
+	/* MBOX heartbeat timer */
 	init_timer(&psli->mbox_tmo);
 	psli->mbox_tmo.function = lpfc_mbox_timeout;
 	psli->mbox_tmo.data = (unsigned long) phba;
+	/* FCP polling mode timer */
 	init_timer(&phba->fcp_poll_timer);
 	phba->fcp_poll_timer.function = lpfc_poll_timeout;
 	phba->fcp_poll_timer.data = (unsigned long) phba;
+	/* Fabric block timer */
 	init_timer(&phba->fabric_block_timer);
 	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
 	phba->fabric_block_timer.data = (unsigned long) phba;
+	/* EA polling mode timer */
 	init_timer(&phba->eratt_poll);
 	phba->eratt_poll.function = lpfc_poll_eratt;
 	phba->eratt_poll.data = (unsigned long) phba;
 
-	pci_set_master(pdev);
-	pci_save_state(pdev);
-	pci_try_set_mwi(pdev);
-
-	if (pci_set_dma_mask(phba->pcidev, DMA_BIT_MASK(64)) != 0)
-		if (pci_set_dma_mask(phba->pcidev, DMA_BIT_MASK(32)) != 0)
-			goto out_idr_remove;
+	/* Host attention work mask setup */
+	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
+	phba->work_ha_mask |= (HA_RXMASK << (LPFC_ELS_RING * 4));
 
+	/* Get all the module params for configuring this host */
+	lpfc_get_cfgparam(phba);
 	/*
-	 * Get the bus address of Bar0 and Bar2 and the number of bytes
-	 * required by each mapping.
+	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+	 * used to create the sg_dma_buf_pool must be dynamically calculated.
+	 * 2 segments are added since the IOCB needs a command and response bde.
 	 */
-	phba->pci_bar0_map = pci_resource_start(phba->pcidev, 0);
-	bar0map_len        = pci_resource_len(phba->pcidev, 0);
-
-	phba->pci_bar2_map = pci_resource_start(phba->pcidev, 2);
-	bar2map_len        = pci_resource_len(phba->pcidev, 2);
-
-	/* Map HBA SLIM to a kernel virtual address. */
-	phba->slim_memmap_p = ioremap(phba->pci_bar0_map, bar0map_len);
-	if (!phba->slim_memmap_p) {
-		error = -ENODEV;
-		dev_printk(KERN_ERR, &pdev->dev,
-			   "ioremap failed for SLIM memory.\n");
-		goto out_idr_remove;
-	}
-
-	/* Map HBA Control Registers to a kernel virtual address. */
-	phba->ctrl_regs_memmap_p = ioremap(phba->pci_bar2_map, bar2map_len);
-	if (!phba->ctrl_regs_memmap_p) {
-		error = -ENODEV;
-		dev_printk(KERN_ERR, &pdev->dev,
-			   "ioremap failed for HBA control registers.\n");
-		goto out_iounmap_slim;
+	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+		sizeof(struct fcp_rsp) +
+			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
+
+	if (phba->cfg_enable_bg) {
+		phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT;
+		phba->cfg_sg_dma_buf_size +=
+			phba->cfg_prot_sg_seg_cnt * sizeof(struct ulp_bde64);
 	}
 
-	/* Allocate memory for SLI-2 structures */
-	phba->slim2p.virt = dma_alloc_coherent(&phba->pcidev->dev,
-					       SLI2_SLIM_SIZE,
-					       &phba->slim2p.phys,
-					       GFP_KERNEL);
-	if (!phba->slim2p.virt)
-		goto out_iounmap;
+	/* Also reinitialize the host templates with new values. */
+	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 
-	memset(phba->slim2p.virt, 0, SLI2_SLIM_SIZE);
-	phba->mbox = phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, mbx);
-	phba->pcb = (phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, pcb));
-	phba->IOCBs = (phba->slim2p.virt +
-		       offsetof(struct lpfc_sli2_slim, IOCBs));
+	phba->max_vpi = LPFC_MAX_VPI;
+	/* This will be set to correct value after config_port mbox */
+	phba->max_vports = 0;
 
-	phba->hbqslimp.virt = dma_alloc_coherent(&phba->pcidev->dev,
-						 lpfc_sli_hbq_size(),
-						 &phba->hbqslimp.phys,
-						 GFP_KERNEL);
-	if (!phba->hbqslimp.virt)
-		goto out_free_slim;
+	/*
+	 * Initialize the SLI Layer to run with lpfc HBAs.
+	 */
+	lpfc_sli_setup(phba);
+	lpfc_sli_queue_setup(phba);
 
-	hbq_count = lpfc_sli_hbq_count();
-	ptr = phba->hbqslimp.virt;
-	for (i = 0; i < hbq_count; ++i) {
-		phba->hbqs[i].hbq_virt = ptr;
-		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
-		ptr += (lpfc_hbq_defs[i]->entry_count *
-			sizeof(struct lpfc_hbq_entry));
-	}
-	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_els_hbq_alloc;
-	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer  = lpfc_els_hbq_free;
+	/* Allocate device driver memory */
+	if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
+		return -ENOMEM;
 
-	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
+	return 0;
+}
 
-	INIT_LIST_HEAD(&phba->hbqbuf_in_list);
+/**
+ * lpfc_sli_driver_resource_unset - Unset drvr internal resources for SLI3 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the driver internal resources set up
+ * specific for supporting the SLI-3 HBA device it attached to.
+ **/
+static void
+lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
+{
+	/* Free device driver memory allocated */
+	lpfc_mem_free_all(phba);
 
-	/* Initialize the SLI Layer to run with lpfc HBAs. */
-	lpfc_sli_setup(phba);
-	lpfc_sli_queue_setup(phba);
+	return;
+}
 
-	retval = lpfc_mem_alloc(phba);
-	if (retval) {
-		error = retval;
-		goto out_free_hbqslimp;
+/**
+ * lpfc_init_api_table_setup - Set up init api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the device INIT interface API function jump table
+ * in @phba struct.
+ *
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_hba_down_post = lpfc_hba_down_post_s3;
+		phba->lpfc_handle_eratt = lpfc_handle_eratt_s3;
+		phba->lpfc_stop_port = lpfc_stop_port_s3;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1431 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources before the
+ * device specific resource setup to support the HBA device it attached to.
+ *
+ * Return codes
+ *	0 - sucessful
+ *	other values - error
+ **/
+static int
+lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
+{
+	/*
+	 * Driver resources common to all SLI revisions
+	 */
+	atomic_set(&phba->fast_event_count, 0);
+	spin_lock_init(&phba->hbalock);
+
+	/* Initialize ndlp management spinlock */
+	spin_lock_init(&phba->ndlp_lock);
+
+	INIT_LIST_HEAD(&phba->port_list);
+	INIT_LIST_HEAD(&phba->work_list);
+	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+
+	/* Initialize the wait queue head for the kernel thread */
+	init_waitqueue_head(&phba->work_waitq);
+
+	/* Initialize the scsi buffer list used by driver for scsi IO */
+	spin_lock_init(&phba->scsi_buf_list_lock);
+	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+
+	/* Initialize the fabric iocb list */
+	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+
+	/* Initialize list to save ELS buffers */
+	INIT_LIST_HEAD(&phba->elsbuf);
+
+	/* Initialize FCF connection rec list */
+	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
+
+	return 0;
+}
+
+/**
+ * lpfc_setup_driver_resource_phase2 - Phase2 setup driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources after the
+ * device specific resource setup to support the HBA device it attached to.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
+{
+	int error;
+
+	/* Startup the kernel thread for this host adapter. */
+	phba->worker_thread = kthread_run(lpfc_do_work, phba,
+					  "lpfc_worker_%d", phba->brd_no);
+	if (IS_ERR(phba->worker_thread)) {
+		error = PTR_ERR(phba->worker_thread);
+		return error;
+	}
+
+	return 0;
+}
+
+/**
+ * lpfc_unset_driver_resource_phase2 - Phase2 unset driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the driver internal resources set up after
+ * the device specific resource setup for supporting the HBA device it
+ * attached to.
+ **/
+static void
+lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
+{
+	/* Stop kernel worker thread */
+	kthread_stop(phba->worker_thread);
+}
+
+/**
+ * lpfc_free_iocb_list - Free iocb list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver's IOCB list and memory.
+ **/
+static void
+lpfc_free_iocb_list(struct lpfc_hba *phba)
+{
+	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
+
+	spin_lock_irq(&phba->hbalock);
+	list_for_each_entry_safe(iocbq_entry, iocbq_next,
+				 &phba->lpfc_iocb_list, list) {
+		list_del(&iocbq_entry->list);
+		kfree(iocbq_entry);
+		phba->total_iocbq_bufs--;
 	}
+	spin_unlock_irq(&phba->hbalock);
+
+	return;
+}
+
+/**
+ * lpfc_init_iocb_list - Allocate and initialize iocb list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate and initizlize the driver's IOCB
+ * list and set up the IOCB tag array accordingly.
+ *
+ * Return codes
+ *	0 - sucessful
+ *	other values - error
+ **/
+static int
+lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count)
+{
+	struct lpfc_iocbq *iocbq_entry = NULL;
+	uint16_t iotag;
+	int i;
 
 	/* Initialize and populate the iocb list per host.  */
 	INIT_LIST_HEAD(&phba->lpfc_iocb_list);
-	for (i = 0; i < LPFC_IOCB_LIST_CNT; i++) {
+	for (i = 0; i < iocb_count; i++) {
 		iocbq_entry = kzalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL);
 		if (iocbq_entry == NULL) {
 			printk(KERN_ERR "%s: only allocated %d iocbs of "
 				"expected %d count. Unloading driver.\n",
 				__func__, i, LPFC_IOCB_LIST_CNT);
-			error = -ENOMEM;
 			goto out_free_iocbq;
 		}
 
 		iotag = lpfc_sli_next_iotag(phba, iocbq_entry);
 		if (iotag == 0) {
-			kfree (iocbq_entry);
+			kfree(iocbq_entry);
 			printk(KERN_ERR "%s: failed to allocate IOTAG. "
-			       "Unloading driver.\n",
-				__func__);
-			error = -ENOMEM;
+				"Unloading driver.\n", __func__);
 			goto out_free_iocbq;
 		}
+		iocbq_entry->sli4_xritag = NO_XRI;
 
 		spin_lock_irq(&phba->hbalock);
 		list_add(&iocbq_entry->list, &phba->lpfc_iocb_list);
@@ -2828,71 +2947,799 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
 		spin_unlock_irq(&phba->hbalock);
 	}
 
-	/* Initialize HBA structure */
-	phba->fc_edtov = FF_DEF_EDTOV;
-	phba->fc_ratov = FF_DEF_RATOV;
-	phba->fc_altov = FF_DEF_ALTOV;
-	phba->fc_arbtov = FF_DEF_ARBTOV;
+	return 0;
 
-	INIT_LIST_HEAD(&phba->work_list);
-	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
-	phba->work_ha_mask |= (HA_RXMASK << (LPFC_ELS_RING * 4));
+out_free_iocbq:
+	lpfc_free_iocb_list(phba);
 
-	/* Initialize the wait queue head for the kernel thread */
-	init_waitqueue_head(&phba->work_waitq);
+	return -ENOMEM;
+}
 
-	/* Startup the kernel thread for this host adapter. */
-	phba->worker_thread = kthread_run(lpfc_do_work, phba,
-				       "lpfc_worker_%d", phba->brd_no);
-	if (IS_ERR(phba->worker_thread)) {
-		error = PTR_ERR(phba->worker_thread);
-		goto out_free_iocbq;
+/**
+ * lpfc_hba_alloc - Allocate driver hba data structure for a device.
+ * @pdev: pointer to pci device data structure.
+ *
+ * This routine is invoked to allocate the driver hba data structure for an
+ * HBA device. If the allocation is successful, the phba reference to the
+ * PCI device data structure is set.
+ *
+ * Return codes
+ *      pointer to @phba - sucessful
+ *      NULL - error
+ **/
+static struct lpfc_hba *
+lpfc_hba_alloc(struct pci_dev *pdev)
+{
+	struct lpfc_hba *phba;
+
+	/* Allocate memory for HBA structure */
+	phba = kzalloc(sizeof(struct lpfc_hba), GFP_KERNEL);
+	if (!phba) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1417 Failed to allocate hba struct.\n");
+		return NULL;
 	}
 
-	/* Initialize the list of scsi buffers used by driver for scsi IO. */
-	spin_lock_init(&phba->scsi_buf_list_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+	/* Set reference to PCI device in HBA structure */
+	phba->pcidev = pdev;
 
-	/* Initialize list of fabric iocbs */
-	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+	/* Assign an unused board number */
+	phba->brd_no = lpfc_get_instance();
+	if (phba->brd_no < 0) {
+		kfree(phba);
+		return NULL;
+	}
 
-	/* Initialize list to save ELS buffers */
-	INIT_LIST_HEAD(&phba->elsbuf);
+	return phba;
+}
+
+/**
+ * lpfc_hba_free - Free driver hba data structure with a device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver hba data structure with an
+ * HBA device.
+ **/
+static void
+lpfc_hba_free(struct lpfc_hba *phba)
+{
+	/* Release the driver assigned board number */
+	idr_remove(&lpfc_hba_index, phba->brd_no);
+
+	kfree(phba);
+	return;
+}
+
+/**
+ * lpfc_create_shost - Create hba physical port with associated scsi host.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to create HBA physical port and associate a SCSI
+ * host with it.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      other values - error
+ **/
+static int
+lpfc_create_shost(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport;
+	struct Scsi_Host  *shost;
+
+	/* Initialize HBA FC structure */
+	phba->fc_edtov = FF_DEF_EDTOV;
+	phba->fc_ratov = FF_DEF_RATOV;
+	phba->fc_altov = FF_DEF_ALTOV;
+	phba->fc_arbtov = FF_DEF_ARBTOV;
 
 	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
 	if (!vport)
-		goto out_kthread_stop;
+		return -ENODEV;
 
 	shost = lpfc_shost_from_vport(vport);
 	phba->pport = vport;
 	lpfc_debugfs_initialize(vport);
+	/* Put reference to SCSI host to driver's device private data */
+	pci_set_drvdata(phba->pcidev, shost);
 
-	pci_set_drvdata(pdev, shost);
+	return 0;
+}
 
-	phba->MBslimaddr = phba->slim_memmap_p;
-	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
-	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
-	phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
-	phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
+/**
+ * lpfc_destroy_shost - Destroy hba physical port with associated scsi host.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to destroy HBA physical port and the associated
+ * SCSI host.
+ **/
+static void
+lpfc_destroy_shost(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+
+	/* Destroy physical port that associated with the SCSI host */
+	destroy_port(vport);
+
+	return;
+}
+
+/**
+ * lpfc_setup_bg - Setup Block guard structures and debug areas.
+ * @phba: pointer to lpfc hba data structure.
+ * @shost: the shost to be used to detect Block guard settings.
+ *
+ * This routine sets up the local Block guard protocol settings for @shost.
+ * This routine also allocates memory for debugging bg buffers.
+ **/
+static void
+lpfc_setup_bg(struct lpfc_hba *phba, struct Scsi_Host *shost)
+{
+	int pagecnt = 10;
+	if (lpfc_prot_mask && lpfc_prot_guard) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"1478 Registering BlockGuard with the "
+				"SCSI layer\n");
+		scsi_host_set_prot(shost, lpfc_prot_mask);
+		scsi_host_set_guard(shost, lpfc_prot_guard);
+	}
+	if (!_dump_buf_data) {
+		while (pagecnt) {
+			spin_lock_init(&_dump_buf_lock);
+			_dump_buf_data =
+				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
+			if (_dump_buf_data) {
+				printk(KERN_ERR "BLKGRD allocated %d pages for "
+				       "_dump_buf_data at 0x%p\n",
+				       (1 << pagecnt), _dump_buf_data);
+				_dump_buf_data_order = pagecnt;
+				memset(_dump_buf_data, 0,
+				       ((1 << PAGE_SHIFT) << pagecnt));
+				break;
+			} else
+				--pagecnt;
+		}
+		if (!_dump_buf_data_order)
+			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
+			       "memory for hexdump\n");
+	} else
+		printk(KERN_ERR "BLKGRD already allocated _dump_buf_data=0x%p"
+		       "\n", _dump_buf_data);
+	if (!_dump_buf_dif) {
+		while (pagecnt) {
+			_dump_buf_dif =
+				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
+			if (_dump_buf_dif) {
+				printk(KERN_ERR "BLKGRD allocated %d pages for "
+				       "_dump_buf_dif at 0x%p\n",
+				       (1 << pagecnt), _dump_buf_dif);
+				_dump_buf_dif_order = pagecnt;
+				memset(_dump_buf_dif, 0,
+				       ((1 << PAGE_SHIFT) << pagecnt));
+				break;
+			} else
+				--pagecnt;
+		}
+		if (!_dump_buf_dif_order)
+			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
+			       "memory for hexdump\n");
+	} else
+		printk(KERN_ERR "BLKGRD already allocated _dump_buf_dif=0x%p\n",
+		       _dump_buf_dif);
+}
+
+/**
+ * lpfc_post_init_setup - Perform necessary device post initialization setup.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to perform all the necessary post initialization
+ * setup for the device.
+ **/
+static void
+lpfc_post_init_setup(struct lpfc_hba *phba)
+{
+	struct Scsi_Host  *shost;
+	struct lpfc_adapter_event_header adapter_event;
+
+	/* Get the default values for Model Name and Description */
+	lpfc_get_hba_model_desc(phba, phba->ModelName, phba->ModelDesc);
+
+	/*
+	 * hba setup may have changed the hba_queue_depth so we need to
+	 * adjust the value of can_queue.
+	 */
+	shost = pci_get_drvdata(phba->pcidev);
+	shost->can_queue = phba->cfg_hba_queue_depth - 10;
+	if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
+		lpfc_setup_bg(phba, shost);
+
+	lpfc_host_attrib_init(shost);
+
+	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
+		spin_lock_irq(shost->host_lock);
+		lpfc_poll_start_timer(phba);
+		spin_unlock_irq(shost->host_lock);
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0428 Perform SCSI scan\n");
+	/* Send board arrival event to upper layer */
+	adapter_event.event_type = FC_REG_ADAPTER_EVENT;
+	adapter_event.subcategory = LPFC_EVENT_ARRIVAL;
+	fc_host_post_vendor_event(shost, fc_get_event_number(),
+				  sizeof(adapter_event),
+				  (char *) &adapter_event,
+				  LPFC_NL_VENDOR_ID);
+	return;
+}
+
+/**
+ * lpfc_sli_pci_mem_setup - Setup SLI3 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the PCI device memory space for device
+ * with SLI-3 interface spec.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli_pci_mem_setup(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+	unsigned long bar0map_len, bar2map_len;
+	int i, hbq_count;
+	void *ptr;
+	int error = -ENODEV;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return error;
+	else
+		pdev = phba->pcidev;
+
+	/* Set the device DMA mask size */
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)
+		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
+			return error;
+
+	/* Get the bus address of Bar0 and Bar2 and the number of bytes
+	 * required by each mapping.
+	 */
+	phba->pci_bar0_map = pci_resource_start(pdev, 0);
+	bar0map_len = pci_resource_len(pdev, 0);
+
+	phba->pci_bar2_map = pci_resource_start(pdev, 2);
+	bar2map_len = pci_resource_len(pdev, 2);
+
+	/* Map HBA SLIM to a kernel virtual address. */
+	phba->slim_memmap_p = ioremap(phba->pci_bar0_map, bar0map_len);
+	if (!phba->slim_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLIM memory.\n");
+		goto out;
+	}
+
+	/* Map HBA Control Registers to a kernel virtual address. */
+	phba->ctrl_regs_memmap_p = ioremap(phba->pci_bar2_map, bar2map_len);
+	if (!phba->ctrl_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for HBA control registers.\n");
+		goto out_iounmap_slim;
+	}
+
+	/* Allocate memory for SLI-2 structures */
+	phba->slim2p.virt = dma_alloc_coherent(&pdev->dev,
+					       SLI2_SLIM_SIZE,
+					       &phba->slim2p.phys,
+					       GFP_KERNEL);
+	if (!phba->slim2p.virt)
+		goto out_iounmap;
+
+	memset(phba->slim2p.virt, 0, SLI2_SLIM_SIZE);
+	phba->mbox = phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, mbx);
+	phba->pcb = (phba->slim2p.virt + offsetof(struct lpfc_sli2_slim, pcb));
+	phba->IOCBs = (phba->slim2p.virt +
+		       offsetof(struct lpfc_sli2_slim, IOCBs));
+
+	phba->hbqslimp.virt = dma_alloc_coherent(&pdev->dev,
+						 lpfc_sli_hbq_size(),
+						 &phba->hbqslimp.phys,
+						 GFP_KERNEL);
+	if (!phba->hbqslimp.virt)
+		goto out_free_slim;
+
+	hbq_count = lpfc_sli_hbq_count();
+	ptr = phba->hbqslimp.virt;
+	for (i = 0; i < hbq_count; ++i) {
+		phba->hbqs[i].hbq_virt = ptr;
+		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
+		ptr += (lpfc_hbq_defs[i]->entry_count *
+			sizeof(struct lpfc_hbq_entry));
+	}
+	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_els_hbq_alloc;
+	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer = lpfc_els_hbq_free;
+
+	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
+
+	INIT_LIST_HEAD(&phba->rb_pend_list);
+
+	phba->MBslimaddr = phba->slim_memmap_p;
+	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
+	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
+	phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
+	phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
+
+	return 0;
+
+out_free_slim:
+	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
+			  phba->slim2p.virt, phba->slim2p.phys);
+out_iounmap:
+	iounmap(phba->ctrl_regs_memmap_p);
+out_iounmap_slim:
+	iounmap(phba->slim_memmap_p);
+out:
+	return error;
+}
+
+/**
+ * lpfc_sli_pci_mem_unset - Unset SLI3 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the PCI device memory space for device
+ * with SLI-3 interface spec.
+ **/
+static void
+lpfc_sli_pci_mem_unset(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return;
+	else
+		pdev = phba->pcidev;
+
+	/* Free coherent DMA memory allocated */
+	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
+			  phba->hbqslimp.virt, phba->hbqslimp.phys);
+	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
+			  phba->slim2p.virt, phba->slim2p.phys);
+
+	/* I/O memory unmap */
+	iounmap(phba->ctrl_regs_memmap_p);
+	iounmap(phba->slim_memmap_p);
+
+	return;
+}
+
+/**
+ * lpfc_sli_enable_msix - Enable MSI-X interrupt mode on SLI-3 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI-X interrupt vectors to device
+ * with SLI-3 interface specs. The kernel function pci_enable_msix() is
+ * called to enable the MSI-X vectors. Note that pci_enable_msix(), once
+ * invoked, enables either all or nothing, depending on the current
+ * availability of PCI vector resources. The device driver is responsible
+ * for calling the individual request_irq() to register each MSI-X vector
+ * with a interrupt handler, which is done in this function. Note that
+ * later when device is unloading, the driver should always call free_irq()
+ * on all MSI-X vectors it has done request_irq() on before calling
+ * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
+ * will be left with MSI-X enabled and leaks its vectors.
+ *
+ * Return codes
+ *   0 - sucessful
+ *   other values - error
+ **/
+static int
+lpfc_sli_enable_msix(struct lpfc_hba *phba)
+{
+	int rc, i;
+	LPFC_MBOXQ_t *pmb;
+
+	/* Set up MSI-X multi-message vectors */
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		phba->msix_entries[i].entry = i;
+
+	/* Configure MSI-X capability structure */
+	rc = pci_enable_msix(phba->pcidev, phba->msix_entries,
+				ARRAY_SIZE(phba->msix_entries));
+	if (rc) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0420 PCI enable MSI-X failed (%d)\n", rc);
+		goto msi_fail_out;
+	}
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0477 MSI-X entry[%d]: vector=x%x "
+				"message=%d\n", i,
+				phba->msix_entries[i].vector,
+				phba->msix_entries[i].entry);
+	/*
+	 * Assign MSI-X vectors to interrupt handlers
+	 */
+
+	/* vector-0 is associated to slow-path handler */
+	rc = request_irq(phba->msix_entries[0].vector,
+			 &lpfc_sli_sp_intr_handler, IRQF_SHARED,
+			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0421 MSI-X slow-path request_irq failed "
+				"(%d)\n", rc);
+		goto msi_fail_out;
+	}
+
+	/* vector-1 is associated to fast-path handler */
+	rc = request_irq(phba->msix_entries[1].vector,
+			 &lpfc_sli_fp_intr_handler, IRQF_SHARED,
+			 LPFC_FP_DRIVER_HANDLER_NAME, phba);
+
+	if (rc) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0429 MSI-X fast-path request_irq failed "
+				"(%d)\n", rc);
+		goto irq_fail_out;
+	}
+
+	/*
+	 * Configure HBA MSI-X attention conditions to messages
+	 */
+	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+
+	if (!pmb) {
+		rc = -ENOMEM;
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0474 Unable to allocate memory for issuing "
+				"MBOX_CONFIG_MSI command\n");
+		goto mem_fail_out;
+	}
+	rc = lpfc_config_msi(phba, pmb);
+	if (rc)
+		goto mbx_fail_out;
+	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+				"0351 Config MSI mailbox command failed, "
+				"mbxCmd x%x, mbxStatus x%x\n",
+				pmb->u.mb.mbxCommand, pmb->u.mb.mbxStatus);
+		goto mbx_fail_out;
+	}
+
+	/* Free memory allocated for mailbox command */
+	mempool_free(pmb, phba->mbox_mem_pool);
+	return rc;
+
+mbx_fail_out:
+	/* Free memory allocated for mailbox command */
+	mempool_free(pmb, phba->mbox_mem_pool);
+
+mem_fail_out:
+	/* free the irq already requested */
+	free_irq(phba->msix_entries[1].vector, phba);
+
+irq_fail_out:
+	/* free the irq already requested */
+	free_irq(phba->msix_entries[0].vector, phba);
+
+msi_fail_out:
+	/* Unconfigure MSI-X capability structure */
+	pci_disable_msix(phba->pcidev);
+	return rc;
+}
+
+/**
+ * lpfc_sli_disable_msix - Disable MSI-X interrupt mode on SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release the MSI-X vectors and then disable the
+ * MSI-X interrupt mode to device with SLI-3 interface spec.
+ **/
+static void
+lpfc_sli_disable_msix(struct lpfc_hba *phba)
+{
+	int i;
+
+	/* Free up MSI-X multi-message vectors */
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		free_irq(phba->msix_entries[i].vector, phba);
+	/* Disable MSI-X */
+	pci_disable_msix(phba->pcidev);
+
+	return;
+}
+
+/**
+ * lpfc_sli_enable_msi - Enable MSI interrupt mode on SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI interrupt mode to device with
+ * SLI-3 interface spec. The kernel function pci_enable_msi() is called to
+ * enable the MSI vector. The device driver is responsible for calling the
+ * request_irq() to register MSI vector with a interrupt the handler, which
+ * is done in this function.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ */
+static int
+lpfc_sli_enable_msi(struct lpfc_hba *phba)
+{
+	int rc;
+
+	rc = pci_enable_msi(phba->pcidev);
+	if (!rc)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0462 PCI enable MSI mode success.\n");
+	else {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0471 PCI enable MSI mode failed (%d)\n", rc);
+		return rc;
+	}
+
+	rc = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
+			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+	if (rc) {
+		pci_disable_msi(phba->pcidev);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0478 MSI request_irq failed (%d)\n", rc);
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli_disable_msi - Disable MSI interrupt mode to SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable the MSI interrupt mode to device with
+ * SLI-3 interface spec. The driver calls free_irq() on MSI vector it has
+ * done request_irq() on before calling pci_disable_msi(). Failure to do so
+ * results in a BUG_ON() and a device will be left with MSI enabled and leaks
+ * its vector.
+ */
+static void
+lpfc_sli_disable_msi(struct lpfc_hba *phba)
+{
+	free_irq(phba->pcidev->irq, phba);
+	pci_disable_msi(phba->pcidev);
+	return;
+}
+
+/**
+ * lpfc_sli_enable_intr - Enable device interrupt to SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable device interrupt and associate driver's
+ * interrupt handler(s) to interrupt vector(s) to device with SLI-3 interface
+ * spec. Depends on the interrupt mode configured to the driver, the driver
+ * will try to fallback from the configured interrupt mode to an interrupt
+ * mode which is supported by the platform, kernel, and device in the order
+ * of:
+ * MSI-X -> MSI -> IRQ.
+ *
+ * Return codes
+ *   0 - sucessful
+ *   other values - error
+ **/
+static uint32_t
+lpfc_sli_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+{
+	uint32_t intr_mode = LPFC_INTR_ERROR;
+	int retval;
+
+	if (cfg_mode == 2) {
+		/* Need to issue conf_port mbox cmd before conf_msi mbox cmd */
+		retval = lpfc_sli_config_port(phba, LPFC_SLI_REV3);
+		if (!retval) {
+			/* Now, try to enable MSI-X interrupt mode */
+			retval = lpfc_sli_enable_msix(phba);
+			if (!retval) {
+				/* Indicate initialization to MSI-X mode */
+				phba->intr_type = MSIX;
+				intr_mode = 2;
+			}
+		}
+	}
+
+	/* Fallback to MSI if MSI-X initialization failed */
+	if (cfg_mode >= 1 && phba->intr_type == NONE) {
+		retval = lpfc_sli_enable_msi(phba);
+		if (!retval) {
+			/* Indicate initialization to MSI mode */
+			phba->intr_type = MSI;
+			intr_mode = 1;
+		}
+	}
+
+	/* Fallback to INTx if both MSI-X/MSI initalization failed */
+	if (phba->intr_type == NONE) {
+		retval = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
+				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+		if (!retval) {
+			/* Indicate initialization to INTx mode */
+			phba->intr_type = INTx;
+			intr_mode = 0;
+		}
+	}
+	return intr_mode;
+}
+
+/**
+ * lpfc_sli_disable_intr - Disable device interrupt to SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable device interrupt and disassociate the
+ * driver's interrupt handler(s) from interrupt vector(s) to device with
+ * SLI-3 interface spec. Depending on the interrupt mode, the driver will
+ * release the interrupt vector(s) for the message signaled interrupt.
+ **/
+static void
+lpfc_sli_disable_intr(struct lpfc_hba *phba)
+{
+	/* Disable the currently initialized interrupt mode */
+	if (phba->intr_type == MSIX)
+		lpfc_sli_disable_msix(phba);
+	else if (phba->intr_type == MSI)
+		lpfc_sli_disable_msi(phba);
+	else if (phba->intr_type == INTx)
+		free_irq(phba->pcidev->irq, phba);
+
+	/* Reset interrupt management states */
+	phba->intr_type = NONE;
+	phba->sli.slistat.sli_intr = 0;
+
+	return;
+}
+
+/**
+ * lpfc_unset_hba - Unset SLI3 hba device initialization
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the HBA device initialization steps to
+ * a device with SLI-3 interface spec.
+ **/
+static void
+lpfc_unset_hba(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+
+	spin_lock_irq(shost->host_lock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(shost->host_lock);
+
+	lpfc_stop_hba_timers(phba);
+
+	phba->pport->work_port_events = 0;
+
+	lpfc_sli_hba_down(phba);
+
+	lpfc_sli_brdrestart(phba);
+
+	lpfc_sli_disable_intr(phba);
+
+	return;
+}
+
+/**
+ * lpfc_pci_probe_one_s3 - PCI probe func to reg SLI-3 device to PCI subsystem.
+ * @pdev: pointer to PCI device
+ * @pid: pointer to PCI device identifier
+ *
+ * This routine is to be called to attach a device with SLI-3 interface spec
+ * to the PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * presented on PCI bus, the kernel PCI subsystem looks at PCI device-specific
+ * information of the device and driver to see if the driver state that it can
+ * support this kind of device. If the match is successful, the driver core
+ * invokes this routine. If this routine determines it can claim the HBA, it
+ * does all the initialization that it needs to do to handle the HBA properly.
+ *
+ * Return code
+ * 	0 - driver can claim the device
+ * 	negative value - driver can not claim the device
+ **/
+static int __devinit
+lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	struct lpfc_hba   *phba;
+	struct lpfc_vport *vport = NULL;
+	int error;
+	uint32_t cfg_mode, intr_mode;
+
+	/* Allocate memory for HBA structure */
+	phba = lpfc_hba_alloc(pdev);
+	if (!phba)
+		return -ENOMEM;
+
+	/* Perform generic PCI device enabling operation */
+	error = lpfc_enable_pci_dev(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1401 Failed to enable pci device.\n");
+		goto out_free_phba;
+	}
+
+	/* Set up SLI API function jump table for PCI-device group-0 HBAs */
+	error = lpfc_api_table_setup(phba, LPFC_PCI_DEV_LP);
+	if (error)
+		goto out_disable_pci_dev;
+
+	/* Set up SLI-3 specific device PCI memory space */
+	error = lpfc_sli_pci_mem_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1402 Failed to set up pci memory space.\n");
+		goto out_disable_pci_dev;
+	}
+
+	/* Set up phase-1 common device driver resources */
+	error = lpfc_setup_driver_resource_phase1(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1403 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s3;
+	}
+
+	/* Set up SLI-3 specific device driver resources */
+	error = lpfc_sli_driver_resource_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1404 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s3;
+	}
+
+	/* Initialize and populate the iocb list per host */
+	error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1405 Failed to initialize iocb list.\n");
+		goto out_unset_driver_resource_s3;
+	}
+
+	/* Set up common device driver resources */
+	error = lpfc_setup_driver_resource_phase2(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1406 Failed to set up driver resource.\n");
+		goto out_free_iocb_list;
+	}
+
+	/* Create SCSI host to the physical port */
+	error = lpfc_create_shost(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1407 Failed to create scsi host.\n");
+		goto out_unset_driver_resource;
+	}
 
 	/* Configure sysfs attributes */
-	if (lpfc_alloc_sysfs_attr(vport)) {
+	vport = phba->pport;
+	error = lpfc_alloc_sysfs_attr(vport);
+	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1476 Failed to allocate sysfs attr\n");
-		error = -ENOMEM;
-		goto out_destroy_port;
+		goto out_destroy_shost;
 	}
 
+	/* Now, trying to enable interrupt and bring up the device */
 	cfg_mode = phba->cfg_use_msi;
 	while (true) {
+		/* Put device to a known state before enabling interrupt */
+		lpfc_stop_port(phba);
 		/* Configure and enable interrupt */
-		intr_mode = lpfc_enable_intr(phba, cfg_mode);
+		intr_mode = lpfc_sli_enable_intr(phba, cfg_mode);
 		if (intr_mode == LPFC_INTR_ERROR) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0426 Failed to enable interrupt.\n");
+					"0431 Failed to enable interrupt.\n");
+			error = -ENODEV;
 			goto out_free_sysfs_attr;
 		}
-		/* HBA SLI setup */
+		/* SLI-3 HBA setup */
 		if (lpfc_sli_hba_setup(phba)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"1477 Failed to set up hba\n");
@@ -2902,185 +3749,65 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
 
 		/* Wait 50ms for the interrupts of previous mailbox commands */
 		msleep(50);
-		/* Check active interrupts received */
-		if (phba->sli.slistat.sli_intr > LPFC_MSIX_VECTORS) {
+		/* Check active interrupts on message signaled interrupts */
+		if (intr_mode == 0 ||
+		    phba->sli.slistat.sli_intr > LPFC_MSIX_VECTORS) {
 			/* Log the current active interrupt mode */
 			phba->intr_mode = intr_mode;
 			lpfc_log_intr_mode(phba, intr_mode);
 			break;
 		} else {
 			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"0451 Configure interrupt mode (%d) "
+					"0447 Configure interrupt mode (%d) "
 					"failed active interrupt test.\n",
 					intr_mode);
-			if (intr_mode == 0) {
-				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-						"0479 Failed to enable "
-						"interrupt.\n");
-				error = -ENODEV;
-				goto out_remove_device;
-			}
-			/* Stop HBA SLI setups */
-			lpfc_stop_port(phba);
 			/* Disable the current interrupt mode */
-			lpfc_disable_intr(phba);
+			lpfc_sli_disable_intr(phba);
 			/* Try next level of interrupt mode */
 			cfg_mode = --intr_mode;
 		}
 	}
 
-	/*
-	 * hba setup may have changed the hba_queue_depth so we need to adjust
-	 * the value of can_queue.
-	 */
-	shost->can_queue = phba->cfg_hba_queue_depth - 10;
-	if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
-
-		if (lpfc_prot_mask && lpfc_prot_guard) {
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"1478 Registering BlockGuard with the "
-					"SCSI layer\n");
-
-			scsi_host_set_prot(shost, lpfc_prot_mask);
-			scsi_host_set_guard(shost, lpfc_prot_guard);
-		}
-	}
-
-	if (!_dump_buf_data) {
-		int pagecnt = 10;
-		while (pagecnt) {
-			spin_lock_init(&_dump_buf_lock);
-			_dump_buf_data =
-				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
-			if (_dump_buf_data) {
-				printk(KERN_ERR "BLKGRD allocated %d pages for "
-						"_dump_buf_data at 0x%p\n",
-						(1 << pagecnt), _dump_buf_data);
-				_dump_buf_data_order = pagecnt;
-				memset(_dump_buf_data, 0, ((1 << PAGE_SHIFT)
-							   << pagecnt));
-				break;
-			} else {
-				--pagecnt;
-			}
-
-		}
-
-		if (!_dump_buf_data_order)
-			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
-					"memory for hexdump\n");
-
-	} else {
-		printk(KERN_ERR "BLKGRD already allocated _dump_buf_data=0x%p"
-		       "\n", _dump_buf_data);
-	}
-
-
-	if (!_dump_buf_dif) {
-		int pagecnt = 10;
-		while (pagecnt) {
-			_dump_buf_dif =
-				(char *) __get_free_pages(GFP_KERNEL, pagecnt);
-			if (_dump_buf_dif) {
-				printk(KERN_ERR "BLKGRD allocated %d pages for "
-						"_dump_buf_dif at 0x%p\n",
-						(1 << pagecnt), _dump_buf_dif);
-				_dump_buf_dif_order = pagecnt;
-				memset(_dump_buf_dif, 0, ((1 << PAGE_SHIFT)
-							  << pagecnt));
-				break;
-			} else {
-				--pagecnt;
-			}
-
-		}
-
-		if (!_dump_buf_dif_order)
-			printk(KERN_ERR "BLKGRD ERROR unable to allocate "
-					"memory for hexdump\n");
-
-	} else {
-		printk(KERN_ERR "BLKGRD already allocated _dump_buf_dif=0x%p\n",
-				_dump_buf_dif);
-	}
-
-	lpfc_host_attrib_init(shost);
-
-	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		spin_lock_irq(shost->host_lock);
-		lpfc_poll_start_timer(phba);
-		spin_unlock_irq(shost->host_lock);
-	}
+	/* Perform post initialization setup */
+	lpfc_post_init_setup(phba);
 
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"0428 Perform SCSI scan\n");
-	/* Send board arrival event to upper layer */
-	adapter_event.event_type = FC_REG_ADAPTER_EVENT;
-	adapter_event.subcategory = LPFC_EVENT_ARRIVAL;
-	fc_host_post_vendor_event(shost, fc_get_event_number(),
-		sizeof(adapter_event),
-		(char *) &adapter_event,
-		LPFC_NL_VENDOR_ID);
+	/* Check if there are static vports to be created. */
+	lpfc_create_static_vport(phba);
 
 	return 0;
 
 out_remove_device:
-	spin_lock_irq(shost->host_lock);
-	vport->load_flag |= FC_UNLOADING;
-	spin_unlock_irq(shost->host_lock);
-	lpfc_stop_phba_timers(phba);
-	phba->pport->work_port_events = 0;
-	lpfc_disable_intr(phba);
-	lpfc_sli_hba_down(phba);
-	lpfc_sli_brdrestart(phba);
+	lpfc_unset_hba(phba);
 out_free_sysfs_attr:
 	lpfc_free_sysfs_attr(vport);
-out_destroy_port:
-	destroy_port(vport);
-out_kthread_stop:
-	kthread_stop(phba->worker_thread);
-out_free_iocbq:
-	list_for_each_entry_safe(iocbq_entry, iocbq_next,
-						&phba->lpfc_iocb_list, list) {
-		kfree(iocbq_entry);
-		phba->total_iocbq_bufs--;
-	}
-	lpfc_mem_free(phba);
-out_free_hbqslimp:
-	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
-			  phba->hbqslimp.virt, phba->hbqslimp.phys);
-out_free_slim:
-	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
-			  phba->slim2p.virt, phba->slim2p.phys);
-out_iounmap:
-	iounmap(phba->ctrl_regs_memmap_p);
-out_iounmap_slim:
-	iounmap(phba->slim_memmap_p);
-out_idr_remove:
-	idr_remove(&lpfc_hba_index, phba->brd_no);
+out_destroy_shost:
+	lpfc_destroy_shost(phba);
+out_unset_driver_resource:
+	lpfc_unset_driver_resource_phase2(phba);
+out_free_iocb_list:
+	lpfc_free_iocb_list(phba);
+out_unset_driver_resource_s3:
+	lpfc_sli_driver_resource_unset(phba);
+out_unset_pci_mem_s3:
+	lpfc_sli_pci_mem_unset(phba);
+out_disable_pci_dev:
+	lpfc_disable_pci_dev(phba);
 out_free_phba:
-	kfree(phba);
-out_release_regions:
-	pci_release_selected_regions(pdev, bars);
-out_disable_device:
-	pci_disable_device(pdev);
-out:
-	pci_set_drvdata(pdev, NULL);
-	if (shost)
-		scsi_host_put(shost);
+	lpfc_hba_free(phba);
 	return error;
 }
 
 /**
- * lpfc_pci_remove_one - lpfc PCI func to unregister device from PCI subsystem
+ * lpfc_pci_remove_one_s3 - PCI func to unreg SLI-3 device from PCI subsystem.
  * @pdev: pointer to PCI device
  *
- * This routine is to be registered to the kernel's PCI subsystem. When an
- * Emulex HBA is removed from PCI bus, it performs all the necessary cleanup
- * for the HBA device to be removed from the PCI subsystem properly.
+ * This routine is to be called to disattach a device with SLI-3 interface
+ * spec from PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * removed from PCI bus, it performs all the necessary cleanup for the HBA
+ * device to be removed from the PCI subsystem properly.
  **/
 static void __devexit
-lpfc_pci_remove_one(struct pci_dev *pdev)
+lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host  *shost = pci_get_drvdata(pdev);
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
@@ -3098,7 +3825,7 @@ lpfc_pci_remove_one(struct pci_dev *pdev)
 	/* Release all the vports against this physical port */
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for (i = 1; i <= phba->max_vpi && vports[i] != NULL; i++)
+		for (i = 1; i <= phba->max_vports && vports[i] != NULL; i++)
 			fc_vport_terminate(vports[i]->fc_vport);
 	lpfc_destroy_vport_work_array(phba, vports);
 
@@ -3120,7 +3847,7 @@ lpfc_pci_remove_one(struct pci_dev *pdev)
 	/* Final cleanup of txcmplq and reset the HBA */
 	lpfc_sli_brdrestart(phba);
 
-	lpfc_stop_phba_timers(phba);
+	lpfc_stop_hba_timers(phba);
 	spin_lock_irq(&phba->hbalock);
 	list_del_init(&vport->listentry);
 	spin_unlock_irq(&phba->hbalock);
@@ -3128,7 +3855,7 @@ lpfc_pci_remove_one(struct pci_dev *pdev)
 	lpfc_debugfs_terminate(vport);
 
 	/* Disable interrupt */
-	lpfc_disable_intr(phba);
+	lpfc_sli_disable_intr(phba);
 
 	pci_set_drvdata(pdev, NULL);
 	scsi_host_put(shost);
@@ -3138,7 +3865,7 @@ lpfc_pci_remove_one(struct pci_dev *pdev)
 	 * corresponding pools here.
 	 */
 	lpfc_scsi_free(phba);
-	lpfc_mem_free(phba);
+	lpfc_mem_free_all(phba);
 
 	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
 			  phba->hbqslimp.virt, phba->hbqslimp.phys);
@@ -3151,36 +3878,35 @@ lpfc_pci_remove_one(struct pci_dev *pdev)
 	iounmap(phba->ctrl_regs_memmap_p);
 	iounmap(phba->slim_memmap_p);
 
-	idr_remove(&lpfc_hba_index, phba->brd_no);
-
-	kfree(phba);
+	lpfc_hba_free(phba);
 
 	pci_release_selected_regions(pdev, bars);
 	pci_disable_device(pdev);
 }
 
 /**
- * lpfc_pci_suspend_one - lpfc PCI func to suspend device for power management
+ * lpfc_pci_suspend_one_s3 - PCI func to suspend SLI-3 device for power mgmnt
  * @pdev: pointer to PCI device
  * @msg: power management message
  *
- * This routine is to be registered to the kernel's PCI subsystem to support
- * system Power Management (PM). When PM invokes this method, it quiesces the
- * device by stopping the driver's worker thread for the device, turning off
- * device's interrupt and DMA, and bring the device offline. Note that as the
- * driver implements the minimum PM requirements to a power-aware driver's PM
- * support for suspend/resume -- all the possible PM messages (SUSPEND,
- * HIBERNATE, FREEZE) to the suspend() method call will be treated as SUSPEND
- * and the driver will fully reinitialize its device during resume() method
- * call, the driver will set device to PCI_D3hot state in PCI config space
- * instead of setting it according to the @msg provided by the PM.
+ * This routine is to be called from the kernel's PCI subsystem to support
+ * system Power Management (PM) to device with SLI-3 interface spec. When
+ * PM invokes this method, it quiesces the device by stopping the driver's
+ * worker thread for the device, turning off device's interrupt and DMA,
+ * and bring the device offline. Note that as the driver implements the
+ * minimum PM requirements to a power-aware driver's PM support for the
+ * suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE, FREEZE)
+ * to the suspend() method call will be treated as SUSPEND and the driver will
+ * fully reinitialize its device during resume() method call, the driver will
+ * set device to PCI_D3hot state in PCI config space instead of setting it
+ * according to the @msg provided by the PM.
  *
  * Return code
- *   0 - driver suspended the device
- *   Error otherwise
+ * 	0 - driver suspended the device
+ * 	Error otherwise
  **/
 static int
-lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
+lpfc_pci_suspend_one_s3(struct pci_dev *pdev, pm_message_t msg)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3194,7 +3920,7 @@ lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
 	kthread_stop(phba->worker_thread);
 
 	/* Disable interrupt from device */
-	lpfc_disable_intr(phba);
+	lpfc_sli_disable_intr(phba);
 
 	/* Save device state to PCI config space */
 	pci_save_state(pdev);
@@ -3204,25 +3930,26 @@ lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
 }
 
 /**
- * lpfc_pci_resume_one - lpfc PCI func to resume device for power management
+ * lpfc_pci_resume_one_s3 - PCI func to resume SLI-3 device for power mgmnt
  * @pdev: pointer to PCI device
  *
- * This routine is to be registered to the kernel's PCI subsystem to support
- * system Power Management (PM). When PM invokes this method, it restores
- * the device's PCI config space state and fully reinitializes the device
- * and brings it online. Note that as the driver implements the minimum PM
- * requirements to a power-aware driver's PM for suspend/resume -- all
- * the possible PM messages (SUSPEND, HIBERNATE, FREEZE) to the suspend()
- * method call will be treated as SUSPEND and the driver will fully
- * reinitialize its device during resume() method call, the device will be
- * set to PCI_D0 directly in PCI config space before restoring the state.
+ * This routine is to be called from the kernel's PCI subsystem to support
+ * system Power Management (PM) to device with SLI-3 interface spec. When PM
+ * invokes this method, it restores the device's PCI config space state and
+ * fully reinitializes the device and brings it online. Note that as the
+ * driver implements the minimum PM requirements to a power-aware driver's
+ * PM for suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE,
+ * FREEZE) to the suspend() method call will be treated as SUSPEND and the
+ * driver will fully reinitialize its device during resume() method call,
+ * the device will be set to PCI_D0 directly in PCI config space before
+ * restoring the state.
  *
  * Return code
- *   0 - driver suspended the device
- *   Error otherwise
+ * 	0 - driver suspended the device
+ * 	Error otherwise
  **/
 static int
-lpfc_pci_resume_one(struct pci_dev *pdev)
+lpfc_pci_resume_one_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3250,7 +3977,7 @@ lpfc_pci_resume_one(struct pci_dev *pdev)
 	}
 
 	/* Configure and enable interrupt */
-	intr_mode = lpfc_enable_intr(phba, phba->intr_mode);
+	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0430 PM resume Failed to enable interrupt\n");
@@ -3269,23 +3996,24 @@ lpfc_pci_resume_one(struct pci_dev *pdev)
 }
 
 /**
- * lpfc_io_error_detected - Driver method for handling PCI I/O error detected
+ * lpfc_io_error_detected_s3 - Method for handling SLI-3 device PCI I/O error
  * @pdev: pointer to PCI device.
  * @state: the current PCI connection state.
  *
- * This routine is registered to the PCI subsystem for error handling. This
- * function is called by the PCI subsystem after a PCI bus error affecting
- * this device has been detected. When this function is invoked, it will
- * need to stop all the I/Os and interrupt(s) to the device. Once that is
- * done, it will return PCI_ERS_RESULT_NEED_RESET for the PCI subsystem to
- * perform proper recovery as desired.
+ * This routine is called from the PCI subsystem for I/O error handling to
+ * device with SLI-3 interface spec. This function is called by the PCI
+ * subsystem after a PCI bus error affecting this device has been detected.
+ * When this function is invoked, it will need to stop all the I/Os and
+ * interrupt(s) to the device. Once that is done, it will return
+ * PCI_ERS_RESULT_NEED_RESET for the PCI subsystem to perform proper recovery
+ * as desired.
  *
  * Return codes
- *   PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
- *   PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
  **/
-static pci_ers_result_t lpfc_io_error_detected(struct pci_dev *pdev,
-				pci_channel_state_t state)
+static pci_ers_result_t
+lpfc_io_error_detected_s3(struct pci_dev *pdev, pci_channel_state_t state)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3312,30 +4040,32 @@ static pci_ers_result_t lpfc_io_error_detected(struct pci_dev *pdev,
 	lpfc_sli_abort_iocb_ring(phba, pring);
 
 	/* Disable interrupt */
-	lpfc_disable_intr(phba);
+	lpfc_sli_disable_intr(phba);
 
 	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 
 /**
- * lpfc_io_slot_reset - Restart a PCI device from scratch
+ * lpfc_io_slot_reset_s3 - Method for restarting PCI SLI-3 device from scratch.
  * @pdev: pointer to PCI device.
  *
- * This routine is registered to the PCI subsystem for error handling. This is
- * called after PCI bus has been reset to restart the PCI card from scratch,
- * as if from a cold-boot. During the PCI subsystem error recovery, after the
- * driver returns PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform
- * proper error recovery and then call this routine before calling the .resume
- * method to recover the device. This function will initialize the HBA device,
- * enable the interrupt, but it will just put the HBA to offline state without
- * passing any I/O traffic.
+ * This routine is called from the PCI subsystem for error handling to
+ * device with SLI-3 interface spec. This is called after PCI bus has been
+ * reset to restart the PCI card from scratch, as if from a cold-boot.
+ * During the PCI subsystem error recovery, after driver returns
+ * PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform proper error
+ * recovery and then call this routine before calling the .resume method
+ * to recover the device. This function will initialize the HBA device,
+ * enable the interrupt, but it will just put the HBA to offline state
+ * without passing any I/O traffic.
  *
  * Return codes
- *   PCI_ERS_RESULT_RECOVERED - the device has been recovered
- *   PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
  */
-static pci_ers_result_t lpfc_io_slot_reset(struct pci_dev *pdev)
+static pci_ers_result_t
+lpfc_io_slot_reset_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3354,11 +4084,11 @@ static pci_ers_result_t lpfc_io_slot_reset(struct pci_dev *pdev)
 		pci_set_master(pdev);
 
 	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
 	/* Configure and enable interrupt */
-	intr_mode = lpfc_enable_intr(phba, phba->intr_mode);
+	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0427 Cannot re-enable interrupt after "
@@ -3378,15 +4108,17 @@ static pci_ers_result_t lpfc_io_slot_reset(struct pci_dev *pdev)
 }
 
 /**
- * lpfc_io_resume - Resume PCI I/O operation
+ * lpfc_io_resume_s3 - Method for resuming PCI I/O operation on SLI-3 device.
  * @pdev: pointer to PCI device
  *
- * This routine is registered to the PCI subsystem for error handling. It is
- * called when kernel error recovery tells the lpfc driver that it is ok to
- * resume normal PCI operation after PCI bus error recovery. After this call,
- * traffic can start to flow from this device again.
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-3 interface spec. It is called when kernel error recovery tells
+ * the lpfc driver that it is ok to resume normal PCI operation after PCI bus
+ * error recovery. After this call, traffic can start to flow from this device
+ * again.
  */
-static void lpfc_io_resume(struct pci_dev *pdev)
+static void
+lpfc_io_resume_s3(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3394,6 +4126,235 @@ static void lpfc_io_resume(struct pci_dev *pdev)
 	lpfc_online(phba);
 }
 
+/**
+ * lpfc_pci_probe_one - lpfc PCI probe func to reg dev to PCI subsystem
+ * @pdev: pointer to PCI device
+ * @pid: pointer to PCI device identifier
+ *
+ * This routine is to be registered to the kernel's PCI subsystem. When an
+ * Emulex HBA device is presented on PCI bus, the kernel PCI subsystem looks
+ * at PCI device-specific information of the device and driver to see if the
+ * driver state that it can support this kind of device. If the match is
+ * successful, the driver core invokes this routine. This routine dispatches
+ * the action to the proper SLI-3 or SLI-4 device probing routine, which will
+ * do all the initialization that it needs to do to handle the HBA device
+ * properly.
+ *
+ * Return code
+ * 	0 - driver can claim the device
+ * 	negative value - driver can not claim the device
+ **/
+static int __devinit
+lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	int rc;
+	uint16_t dev_id;
+
+	if (pci_read_config_word(pdev, PCI_DEVICE_ID, &dev_id))
+		return -ENODEV;
+
+	switch (dev_id) {
+	default:
+		rc = lpfc_pci_probe_one_s3(pdev, pid);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_pci_remove_one - lpfc PCI func to unreg dev from PCI subsystem
+ * @pdev: pointer to PCI device
+ *
+ * This routine is to be registered to the kernel's PCI subsystem. When an
+ * Emulex HBA is removed from PCI bus, the driver core invokes this routine.
+ * This routine dispatches the action to the proper SLI-3 or SLI-4 device
+ * remove routine, which will perform all the necessary cleanup for the
+ * device to be removed from the PCI subsystem properly.
+ **/
+static void __devexit
+lpfc_pci_remove_one(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		lpfc_pci_remove_one_s3(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1424 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return;
+}
+
+/**
+ * lpfc_pci_suspend_one - lpfc PCI func to suspend dev for power management
+ * @pdev: pointer to PCI device
+ * @msg: power management message
+ *
+ * This routine is to be registered to the kernel's PCI subsystem to support
+ * system Power Management (PM). When PM invokes this method, it dispatches
+ * the action to the proper SLI-3 or SLI-4 device suspend routine, which will
+ * suspend the device.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	int rc = -ENODEV;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_pci_suspend_one_s3(pdev, msg);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1425 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_pci_resume_one - lpfc PCI func to resume dev for power management
+ * @pdev: pointer to PCI device
+ *
+ * This routine is to be registered to the kernel's PCI subsystem to support
+ * system Power Management (PM). When PM invokes this method, it dispatches
+ * the action to the proper SLI-3 or SLI-4 device resume routine, which will
+ * resume the device.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_resume_one(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	int rc = -ENODEV;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_pci_resume_one_s3(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1426 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_io_error_detected - lpfc method for handling PCI I/O error
+ * @pdev: pointer to PCI device.
+ * @state: the current PCI connection state.
+ *
+ * This routine is registered to the PCI subsystem for error handling. This
+ * function is called by the PCI subsystem after a PCI bus error affecting
+ * this device has been detected. When this routine is invoked, it dispatches
+ * the action to the proper SLI-3 or SLI-4 device error detected handling
+ * routine, which will perform the proper error detected operation.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ **/
+static pci_ers_result_t
+lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_io_error_detected_s3(pdev, state);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1427 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_io_slot_reset - lpfc method for restart PCI dev from scratch
+ * @pdev: pointer to PCI device.
+ *
+ * This routine is registered to the PCI subsystem for error handling. This
+ * function is called after PCI bus has been reset to restart the PCI card
+ * from scratch, as if from a cold-boot. When this routine is invoked, it
+ * dispatches the action to the proper SLI-3 or SLI-4 device reset handling
+ * routine, which will perform the proper device reset.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ **/
+static pci_ers_result_t
+lpfc_io_slot_reset(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		rc = lpfc_io_slot_reset_s3(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1428 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_io_resume - lpfc method for resuming PCI I/O operation
+ * @pdev: pointer to PCI device
+ *
+ * This routine is registered to the PCI subsystem for error handling. It
+ * is called when kernel error recovery tells the lpfc driver that it is
+ * OK to resume normal PCI operation after PCI bus error recovery. When
+ * this routine is invoked, it dispatches the action to the proper SLI-3
+ * or SLI-4 device io_resume routine, which will resume the device operation.
+ **/
+static void
+lpfc_io_resume(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+	switch (phba->pci_dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		lpfc_io_resume_s3(pdev);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1429 Invalid PCI device group: 0x%x\n",
+				phba->pci_dev_grp);
+		break;
+	}
+	return;
+}
+
 static struct pci_device_id lpfc_id_table[] = {
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_VIPER,
 		PCI_ANY_ID, PCI_ANY_ID, },
@@ -3469,6 +4430,10 @@ static struct pci_device_id lpfc_id_table[] = {
 		PCI_ANY_ID, PCI_ANY_ID, },
 	{PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_PROTEUS_S,
 		PCI_ANY_ID, PCI_ANY_ID, },
+	{PCI_VENDOR_ID_SERVERENGINE, PCI_DEVICE_ID_TIGERSHARK,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{PCI_VENDOR_ID_SERVERENGINE, PCI_DEVICE_ID_TIGERSHARK_S,
+		PCI_ANY_ID, PCI_ANY_ID, },
 	{ 0 }
 };
 
@@ -3486,7 +4451,7 @@ static struct pci_driver lpfc_driver = {
 	.probe		= lpfc_pci_probe_one,
 	.remove		= __devexit_p(lpfc_pci_remove_one),
 	.suspend        = lpfc_pci_suspend_one,
-	.resume         = lpfc_pci_resume_one,
+	.resume		= lpfc_pci_resume_one,
 	.err_handler    = &lpfc_err_handler,
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 167b66d..a226c05 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -438,22 +438,23 @@ lpfc_scsi_dev_block(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_new_scsi_buf - Scsi buffer allocator
+ * lpfc_new_scsi_buf_s3 - Scsi buffer allocator for HBA with SLI3 IF spec
  * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
  *
- * This routine allocates a scsi buffer, which contains all the necessary
- * information needed to initiate a SCSI I/O.  The non-DMAable buffer region
- * contains information to build the IOCB.  The DMAable region contains
- * memory for the FCP CMND, FCP RSP, and the initial BPL.  In addition to
- * allocating memory, the FCP CMND and FCP RSP BDEs are setup in the BPL
- * and the BPL BDE is setup in the IOCB.
+ * This routine allocates a scsi buffer for device with SLI-3 interface spec,
+ * the scsi buffer contains all the necessary information needed to initiate
+ * a SCSI I/O. The non-DMAable buffer region contains information to build
+ * the IOCB. The DMAable region contains memory for the FCP CMND, FCP RSP,
+ * and the initial BPL. In addition to allocating memory, the FCP CMND and
+ * FCP RSP BDEs are setup in the BPL and the BPL BDE is setup in the IOCB.
  *
  * Return codes:
- *   NULL - Error
- *   Pointer to lpfc_scsi_buf data structure - Success
+ *   int - number of scsi buffers that were allocated.
+ *   0 = failure, less than num_to_alloc is a partial failure.
  **/
-static struct lpfc_scsi_buf *
-lpfc_new_scsi_buf(struct lpfc_vport *vport)
+static int
+lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 {
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_scsi_buf *psb;
@@ -463,107 +464,134 @@ lpfc_new_scsi_buf(struct lpfc_vport *vport)
 	dma_addr_t pdma_phys_fcp_rsp;
 	dma_addr_t pdma_phys_bpl;
 	uint16_t iotag;
+	int bcnt;
 
-	psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
-	if (!psb)
-		return NULL;
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
+		if (!psb)
+			break;
 
-	/*
-	 * Get memory from the pci pool to map the virt space to pci bus space
-	 * for an I/O.  The DMA buffer includes space for the struct fcp_cmnd,
-	 * struct fcp_rsp and the number of bde's necessary to support the
-	 * sg_tablesize.
-	 */
-	psb->data = pci_pool_alloc(phba->lpfc_scsi_dma_buf_pool, GFP_KERNEL,
-							&psb->dma_handle);
-	if (!psb->data) {
-		kfree(psb);
-		return NULL;
-	}
-
-	/* Initialize virtual ptrs to dma_buf region. */
-	memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
-
-	/* Allocate iotag for psb->cur_iocbq. */
-	iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
-	if (iotag == 0) {
-		pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-			      psb->data, psb->dma_handle);
-		kfree (psb);
-		return NULL;
-	}
-	psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
-
-	psb->fcp_cmnd = psb->data;
-	psb->fcp_rsp = psb->data + sizeof(struct fcp_cmnd);
-	psb->fcp_bpl = psb->data + sizeof(struct fcp_cmnd) +
-							sizeof(struct fcp_rsp);
-
-	/* Initialize local short-hand pointers. */
-	bpl = psb->fcp_bpl;
-	pdma_phys_fcp_cmd = psb->dma_handle;
-	pdma_phys_fcp_rsp = psb->dma_handle + sizeof(struct fcp_cmnd);
-	pdma_phys_bpl = psb->dma_handle + sizeof(struct fcp_cmnd) +
+		/*
+		 * Get memory from the pci pool to map the virt space to pci
+		 * bus space for an I/O.  The DMA buffer includes space for the
+		 * struct fcp_cmnd, struct fcp_rsp and the number of bde's
+		 * necessary to support the sg_tablesize.
+		 */
+		psb->data = pci_pool_alloc(phba->lpfc_scsi_dma_buf_pool,
+					GFP_KERNEL, &psb->dma_handle);
+		if (!psb->data) {
+			kfree(psb);
+			break;
+		}
+
+		/* Initialize virtual ptrs to dma_buf region. */
+		memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
+
+		/* Allocate iotag for psb->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
+		if (iotag == 0) {
+			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+					psb->data, psb->dma_handle);
+			kfree(psb);
+			break;
+		}
+		psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
+
+		psb->fcp_cmnd = psb->data;
+		psb->fcp_rsp = psb->data + sizeof(struct fcp_cmnd);
+		psb->fcp_bpl = psb->data + sizeof(struct fcp_cmnd) +
 			sizeof(struct fcp_rsp);
 
-	/*
-	 * The first two bdes are the FCP_CMD and FCP_RSP.  The balance are sg
-	 * list bdes.  Initialize the first two and leave the rest for
-	 * queuecommand.
-	 */
-	bpl[0].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_cmd));
-	bpl[0].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_cmd));
-	bpl[0].tus.f.bdeSize = sizeof(struct fcp_cmnd);
-	bpl[0].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-	bpl[0].tus.w = le32_to_cpu(bpl[0].tus.w);
-
-	/* Setup the physical region for the FCP RSP */
-	bpl[1].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_rsp));
-	bpl[1].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_rsp));
-	bpl[1].tus.f.bdeSize = sizeof(struct fcp_rsp);
-	bpl[1].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-	bpl[1].tus.w = le32_to_cpu(bpl[1].tus.w);
+		/* Initialize local short-hand pointers. */
+		bpl = psb->fcp_bpl;
+		pdma_phys_fcp_cmd = psb->dma_handle;
+		pdma_phys_fcp_rsp = psb->dma_handle + sizeof(struct fcp_cmnd);
+		pdma_phys_bpl = psb->dma_handle + sizeof(struct fcp_cmnd) +
+			sizeof(struct fcp_rsp);
+
+		/*
+		 * The first two bdes are the FCP_CMD and FCP_RSP. The balance
+		 * are sg list bdes.  Initialize the first two and leave the
+		 * rest for queuecommand.
+		 */
+		bpl[0].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_cmd));
+		bpl[0].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_cmd));
+		bpl[0].tus.f.bdeSize = sizeof(struct fcp_cmnd);
+		bpl[0].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		bpl[0].tus.w = le32_to_cpu(bpl[0].tus.w);
+
+		/* Setup the physical region for the FCP RSP */
+		bpl[1].addrHigh = le32_to_cpu(putPaddrHigh(pdma_phys_fcp_rsp));
+		bpl[1].addrLow = le32_to_cpu(putPaddrLow(pdma_phys_fcp_rsp));
+		bpl[1].tus.f.bdeSize = sizeof(struct fcp_rsp);
+		bpl[1].tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		bpl[1].tus.w = le32_to_cpu(bpl[1].tus.w);
+
+		/*
+		 * Since the IOCB for the FCP I/O is built into this
+		 * lpfc_scsi_buf, initialize it with all known data now.
+		 */
+		iocb = &psb->cur_iocbq.iocb;
+		iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
+		if ((phba->sli_rev == 3) &&
+				!(phba->sli3_options & LPFC_SLI3_BG_ENABLED)) {
+			/* fill in immediate fcp command BDE */
+			iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_IMMED;
+			iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
+			iocb->un.fcpi64.bdl.addrLow = offsetof(IOCB_t,
+					unsli3.fcp_ext.icd);
+			iocb->un.fcpi64.bdl.addrHigh = 0;
+			iocb->ulpBdeCount = 0;
+			iocb->ulpLe = 0;
+			/* fill in responce BDE */
+			iocb->unsli3.fcp_ext.rbde.tus.f.bdeFlags =
+							BUFF_TYPE_BDE_64;
+			iocb->unsli3.fcp_ext.rbde.tus.f.bdeSize =
+				sizeof(struct fcp_rsp);
+			iocb->unsli3.fcp_ext.rbde.addrLow =
+				putPaddrLow(pdma_phys_fcp_rsp);
+			iocb->unsli3.fcp_ext.rbde.addrHigh =
+				putPaddrHigh(pdma_phys_fcp_rsp);
+		} else {
+			iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BLP_64;
+			iocb->un.fcpi64.bdl.bdeSize =
+					(2 * sizeof(struct ulp_bde64));
+			iocb->un.fcpi64.bdl.addrLow =
+					putPaddrLow(pdma_phys_bpl);
+			iocb->un.fcpi64.bdl.addrHigh =
+					putPaddrHigh(pdma_phys_bpl);
+			iocb->ulpBdeCount = 1;
+			iocb->ulpLe = 1;
+		}
+		iocb->ulpClass = CLASS3;
+		psb->status = IOSTAT_SUCCESS;
 
-	/*
-	 * Since the IOCB for the FCP I/O is built into this lpfc_scsi_buf,
-	 * initialize it with all known data now.
-	 */
-	iocb = &psb->cur_iocbq.iocb;
-	iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
-	if ((phba->sli_rev == 3) &&
-	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED)) {
-		/* fill in immediate fcp command BDE */
-		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_IMMED;
-		iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
-		iocb->un.fcpi64.bdl.addrLow = offsetof(IOCB_t,
-						       unsli3.fcp_ext.icd);
-		iocb->un.fcpi64.bdl.addrHigh = 0;
-		iocb->ulpBdeCount = 0;
-		iocb->ulpLe = 0;
-		/* fill in responce BDE */
-		iocb->unsli3.fcp_ext.rbde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
-		iocb->unsli3.fcp_ext.rbde.tus.f.bdeSize =
-						sizeof(struct fcp_rsp);
-		iocb->unsli3.fcp_ext.rbde.addrLow =
-						putPaddrLow(pdma_phys_fcp_rsp);
-		iocb->unsli3.fcp_ext.rbde.addrHigh =
-						putPaddrHigh(pdma_phys_fcp_rsp);
-	} else {
-		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BLP_64;
-		iocb->un.fcpi64.bdl.bdeSize = (2 * sizeof(struct ulp_bde64));
-		iocb->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys_bpl);
-		iocb->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys_bpl);
-		iocb->ulpBdeCount = 1;
-		iocb->ulpLe = 1;
 	}
-	iocb->ulpClass = CLASS3;
 
-	return psb;
+	return bcnt;
 }
 
 /**
- * lpfc_get_scsi_buf - Get a scsi buffer from lpfc_scsi_buf_list list of Hba
- * @phba: The Hba for which this call is being executed.
+ * lpfc_new_scsi_buf - Wrapper funciton for scsi buffer allocator
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine wraps the actual SCSI buffer allocator function pointer from
+ * the lpfc_hba struct.
+ *
+ * Return codes:
+ *   int - number of scsi buffers that were allocated.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+static inline int
+lpfc_new_scsi_buf(struct lpfc_vport *vport, int num_to_alloc)
+{
+	return vport->phba->lpfc_new_scsi_buf(vport, num_to_alloc);
+}
+
+/**
+ * lpfc_get_scsi_buf - Get a scsi buffer from lpfc_scsi_buf_list of the HBA
+ * @phba: The HBA for which this call is being executed.
  *
  * This routine removes a scsi buffer from head of @phba lpfc_scsi_buf_list list
  * and returns to caller.
@@ -591,7 +619,7 @@ lpfc_get_scsi_buf(struct lpfc_hba * phba)
 }
 
 /**
- * lpfc_release_scsi_buf - Return a scsi buffer back to hba's lpfc_scsi_buf_list
+ * lpfc_release_scsi_buf - Return a scsi buffer back to hba scsi buf list
  * @phba: The Hba for which this call is being executed.
  * @psb: The scsi buffer which is being released.
  *
@@ -599,7 +627,7 @@ lpfc_get_scsi_buf(struct lpfc_hba * phba)
  * lpfc_scsi_buf_list list.
  **/
 static void
-lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 {
 	unsigned long iflag = 0;
 
@@ -610,21 +638,36 @@ lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 }
 
 /**
- * lpfc_scsi_prep_dma_buf - Routine to do DMA mapping for scsi buffer
+ * lpfc_release_scsi_buf: Return a scsi buffer back to hba scsi buf list.
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is being released.
+ *
+ * This routine releases @psb scsi buffer by adding it to tail of @phba
+ * lpfc_scsi_buf_list list.
+ **/
+static void
+lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+
+	phba->lpfc_release_scsi_buf(phba, psb);
+}
+
+/**
+ * lpfc_scsi_prep_dma_buf_s3 - DMA mapping for scsi buffer to SLI3 IF spec
  * @phba: The Hba for which this call is being executed.
  * @lpfc_cmd: The scsi buffer which is going to be mapped.
  *
  * This routine does the pci dma mapping for scatter-gather list of scsi cmnd
- * field of @lpfc_cmd. This routine scans through sg elements and format the
- * bdea. This routine also initializes all IOCB fields which are dependent on
- * scsi command request buffer.
+ * field of @lpfc_cmd for device with SLI-3 interface spec. This routine scans
+ * through sg elements and format the bdea. This routine also initializes all
+ * IOCB fields which are dependent on scsi command request buffer.
  *
  * Return codes:
  *   1 - Error
  *   0 - Success
  **/
 static int
-lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 {
 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
 	struct scatterlist *sgel = NULL;
@@ -1412,6 +1455,24 @@ out:
 }
 
 /**
+ * lpfc_scsi_prep_dma_buf - Wrapper function for DMA mapping of scsi buffer
+ * @phba: The Hba for which this call is being executed.
+ * @lpfc_cmd: The scsi buffer which is going to be mapped.
+ *
+ * This routine wraps the actual DMA mapping function pointer from the
+ * lpfc_hba struct.
+ *
+ * Return codes:
+ * 	1 - Error
+ * 	0 - Success
+ **/
+static inline int
+lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+{
+	return phba->lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
+}
+
+/**
  * lpfc_send_scsi_error_event - Posts an event when there is SCSI error
  * @phba: Pointer to hba context object.
  * @vport: Pointer to vport object.
@@ -1504,15 +1565,15 @@ lpfc_send_scsi_error_event(struct lpfc_hba *phba, struct lpfc_vport *vport,
 }
 
 /**
- * lpfc_scsi_unprep_dma_buf - Routine to un-map DMA mapping of scatter gather
- * @phba: The Hba for which this call is being executed.
+ * lpfc_scsi_unprep_dma_buf_s3 - Un-map DMA mapping of SG-list for SLI3 dev
+ * @phba: The HBA for which this call is being executed.
  * @psb: The scsi buffer which is going to be un-mapped.
  *
  * This routine does DMA un-mapping of scatter gather list of scsi command
- * field of @lpfc_cmd.
+ * field of @lpfc_cmd for device with SLI-3 interface spec.
  **/
 static void
-lpfc_scsi_unprep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
+lpfc_scsi_unprep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 {
 	/*
 	 * There are only two special cases to consider.  (1) the scsi command
@@ -1529,6 +1590,20 @@ lpfc_scsi_unprep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
 }
 
 /**
+ * lpfc_scsi_unprep_dma_buf - Wrapper function for unmap DMA mapping of SG-list
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is going to be un-mapped.
+ *
+ * This routine does DMA un-mapping of scatter gather list of scsi command
+ * field of @lpfc_cmd for device with SLI-4 interface spec.
+ **/
+static void
+lpfc_scsi_unprep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+	phba->lpfc_scsi_unprep_dma_buf(phba, psb);
+}
+
+/**
  * lpfc_handler_fcp_err - FCP response handler
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
@@ -1676,7 +1751,7 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
  * lpfc_scsi_cmd_iocb_cmpl - Scsi cmnd IOCB completion routine
  * @phba: The Hba for which this call is being executed.
  * @pIocbIn: The command IOCBQ for the scsi cmnd.
- * @pIocbOut: The response IOCBQ for the scsi cmnd .
+ * @pIocbOut: The response IOCBQ for the scsi cmnd.
  *
  * This routine assigns scsi command result by looking into response IOCB
  * status field appropriately. This routine handles QUEUE FULL condition as
@@ -1957,16 +2032,16 @@ lpfc_fcpcmd_to_iocb(uint8_t *data, struct fcp_cmnd *fcp_cmnd)
 }
 
 /**
- * lpfc_scsi_prep_cmnd -  Routine to convert scsi cmnd to FCP information unit
+ * lpfc_scsi_prep_cmnd_s3 - Convert scsi cmnd to FCP infor unit for SLI3 dev
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: The scsi command which needs to send.
  * @pnode: Pointer to lpfc_nodelist.
  *
  * This routine initializes fcp_cmnd and iocb data structure from scsi command
- * to transfer.
+ * to transfer for device with SLI3 interface spec.
  **/
 static void
-lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+lpfc_scsi_prep_cmnd_s3(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 		    struct lpfc_nodelist *pnode)
 {
 	struct lpfc_hba *phba = vport->phba;
@@ -2013,8 +2088,11 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 	if (scsi_sg_count(scsi_cmnd)) {
 		if (datadir == DMA_TO_DEVICE) {
 			iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
-			iocb_cmd->un.fcpi.fcpi_parm = 0;
-			iocb_cmd->ulpPU = 0;
+			if (phba->sli_rev < LPFC_SLI_REV4) {
+				iocb_cmd->un.fcpi.fcpi_parm = 0;
+				iocb_cmd->ulpPU = 0;
+			} else
+				iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
 			phba->fc4OutputRequests++;
 		} else {
@@ -2051,20 +2129,37 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 }
 
 /**
- * lpfc_scsi_prep_task_mgmt_cmnd - Convert scsi TM cmnd to FCP information unit
+ * lpfc_scsi_prep_cmnd - Wrapper func for convert scsi cmnd to FCP info unit
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: The scsi command which needs to send.
+ * @pnode: Pointer to lpfc_nodelist.
+ *
+ * This routine wraps the actual convert SCSI cmnd function pointer from
+ * the lpfc_hba struct.
+ **/
+static inline void
+lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+		    struct lpfc_nodelist *pnode)
+{
+	vport->phba->lpfc_scsi_prep_cmnd(vport, lpfc_cmd, pnode);
+}
+
+/**
+ * lpfc_scsi_prep_task_mgmt_cmnd_s3 - Convert SLI3 scsi TM cmd to FCP info unit
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
  * @lun: Logical unit number.
  * @task_mgmt_cmd: SCSI task management command.
  *
- * This routine creates FCP information unit corresponding to @task_mgmt_cmd.
+ * This routine creates FCP information unit corresponding to @task_mgmt_cmd
+ * for device with SLI-3 interface spec.
  *
  * Return codes:
  *   0 - Error
  *   1 - Success
  **/
 static int
-lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
+lpfc_scsi_prep_task_mgmt_cmd_s3(struct lpfc_vport *vport,
 			     struct lpfc_scsi_buf *lpfc_cmd,
 			     unsigned int lun,
 			     uint8_t task_mgmt_cmd)
@@ -2114,6 +2209,67 @@ lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
 }
 
 /**
+ * lpfc_scsi_prep_task_mgmt_cmnd - Wrapper func convert scsi TM cmd to FCP info
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lun: Logical unit number.
+ * @task_mgmt_cmd: SCSI task management command.
+ *
+ * This routine wraps the actual convert SCSI TM to FCP information unit
+ * function pointer from the lpfc_hba struct.
+ *
+ * Return codes:
+ * 	0 - Error
+ * 	1 - Success
+ **/
+static inline int
+lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
+			     struct lpfc_scsi_buf *lpfc_cmd,
+			     unsigned int lun,
+			     uint8_t task_mgmt_cmd)
+{
+	struct lpfc_hba *phba = vport->phba;
+
+	return phba->lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun,
+						  task_mgmt_cmd);
+}
+
+/**
+ * lpfc_scsi_api_table_setup - Set up scsi api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the SCSI interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_scsi_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_new_scsi_buf = lpfc_new_scsi_buf_s3;
+		phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s3;
+		phba->lpfc_scsi_prep_cmnd = lpfc_scsi_prep_cmnd_s3;
+		phba->lpfc_scsi_unprep_dma_buf = lpfc_scsi_unprep_dma_buf_s3;
+		phba->lpfc_scsi_prep_task_mgmt_cmd =
+					lpfc_scsi_prep_task_mgmt_cmd_s3;
+		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s3;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1418 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	phba->lpfc_get_scsi_buf = lpfc_get_scsi_buf;
+	phba->lpfc_rampdown_queue_depth = lpfc_rampdown_queue_depth;
+	return 0;
+}
+
+/**
  * lpfc_taskmgmt_def_cmpl - IOCB completion routine for task management command
  * @phba: The Hba for which this call is being executed.
  * @cmdiocbq: Pointer to lpfc_iocbq data structure.
@@ -2178,9 +2334,8 @@ lpfc_scsi_tgt_reset(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_vport *vport,
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
 			 "0702 Issue Target Reset to TGT %d Data: x%x x%x\n",
 			 tgt_id, rdata->pnode->nlp_rpi, rdata->pnode->nlp_flag);
-	status = lpfc_sli_issue_iocb_wait(phba,
-				       &phba->sli.ring[phba->sli.fcp_ring],
-				       iocbq, iocbqrsp, lpfc_cmd->timeout);
+	status = lpfc_sli_issue_iocb_wait(phba, LPFC_FCP_RING,
+					  iocbq, iocbqrsp, lpfc_cmd->timeout);
 	if (status != IOCB_SUCCESS) {
 		if (status == IOCB_TIMEDOUT) {
 			iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl;
@@ -2305,7 +2460,6 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 	struct Scsi_Host  *shost = cmnd->device->host;
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_sli   *psli = &phba->sli;
 	struct lpfc_rport_data *rdata = cmnd->device->hostdata;
 	struct lpfc_nodelist *ndlp = rdata->pnode;
 	struct lpfc_scsi_buf *lpfc_cmd;
@@ -2427,7 +2581,7 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 	lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
 
 	atomic_inc(&ndlp->cmd_pending);
-	err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring],
+	err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
 				  &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
 	if (err) {
 		atomic_dec(&ndlp->cmd_pending);
@@ -2490,7 +2644,6 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	struct Scsi_Host  *shost = cmnd->device->host;
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring];
 	struct lpfc_iocbq *iocb;
 	struct lpfc_iocbq *abtsiocb;
 	struct lpfc_scsi_buf *lpfc_cmd;
@@ -2531,7 +2684,10 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	icmd = &abtsiocb->iocb;
 	icmd->un.acxri.abortType = ABORT_TYPE_ABTS;
 	icmd->un.acxri.abortContextTag = cmd->ulpContext;
-	icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		icmd->un.acxri.abortIoTag = iocb->sli4_xritag;
+	else
+		icmd->un.acxri.abortIoTag = cmd->ulpIoTag;
 
 	icmd->ulpLe = 1;
 	icmd->ulpClass = cmd->ulpClass;
@@ -2542,7 +2698,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
 	abtsiocb->vport = vport;
-	if (lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0) == IOCB_ERROR) {
+	if (lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0) ==
+	    IOCB_ERROR) {
 		lpfc_sli_release_iocbq(phba, abtsiocb);
 		ret = FAILED;
 		goto out;
@@ -2668,8 +2825,7 @@ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
 			 "0703 Issue target reset to TGT %d LUN %d "
 			 "rpi x%x nlp_flag x%x\n", cmnd->device->id,
 			 cmnd->device->lun, pnode->nlp_rpi, pnode->nlp_flag);
-	status = lpfc_sli_issue_iocb_wait(phba,
-					  &phba->sli.ring[phba->sli.fcp_ring],
+	status = lpfc_sli_issue_iocb_wait(phba, LPFC_FCP_RING,
 					  iocbq, iocbqrsp, lpfc_cmd->timeout);
 	if (status == IOCB_TIMEDOUT) {
 		iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl;
@@ -2825,11 +2981,10 @@ lpfc_slave_alloc(struct scsi_device *sdev)
 {
 	struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
-	struct lpfc_scsi_buf *scsi_buf = NULL;
 	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
-	uint32_t total = 0, i;
+	uint32_t total = 0;
 	uint32_t num_to_alloc = 0;
-	unsigned long flags;
+	int num_allocated = 0;
 
 	if (!rport || fc_remote_port_chkready(rport))
 		return -ENXIO;
@@ -2863,20 +3018,13 @@ lpfc_slave_alloc(struct scsi_device *sdev)
 				 (phba->cfg_hba_queue_depth - total));
 		num_to_alloc = phba->cfg_hba_queue_depth - total;
 	}
-
-	for (i = 0; i < num_to_alloc; i++) {
-		scsi_buf = lpfc_new_scsi_buf(vport);
-		if (!scsi_buf) {
-			lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
-					 "0706 Failed to allocate "
-					 "command buffer\n");
-			break;
-		}
-
-		spin_lock_irqsave(&phba->scsi_buf_list_lock, flags);
-		phba->total_scsi_bufs++;
-		list_add_tail(&scsi_buf->list, &phba->lpfc_scsi_buf_list);
-		spin_unlock_irqrestore(&phba->scsi_buf_list_lock, flags);
+	num_allocated = lpfc_new_scsi_buf(vport, num_to_alloc);
+	if (num_to_alloc != num_allocated) {
+			lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+				 "0708 Allocation request of %d "
+				 "command buffers did not succeed.  "
+				 "Allocated %d buffers.\n",
+				 num_to_alloc, num_allocated);
 	}
 	return 0;
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index eb5c75c..e2d07d9 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -142,7 +142,7 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba)
 }
 
 /**
- * __lpfc_sli_release_iocbq - Release iocb to the iocb pool
+ * __lpfc_sli_release_iocbq_s3 - Release iocb to the iocb pool
  * @phba: Pointer to HBA context object.
  * @iocbq: Pointer to driver iocb object.
  *
@@ -152,7 +152,7 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba)
  * clears all other fields of the iocb object when it is freed.
  **/
 static void
-__lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+__lpfc_sli_release_iocbq_s3(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 {
 	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
 
@@ -160,10 +160,27 @@ __lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
 	memset((char*)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
+	iocbq->sli4_xritag = NO_XRI;
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
 
 /**
+ * __lpfc_sli_release_iocbq - Release iocb to the iocb pool
+ * @phba: Pointer to HBA context object.
+ * @iocbq: Pointer to driver iocb object.
+ *
+ * This function is called with hbalock held to release driver
+ * iocb object to the iocb pool. The iotag in the iocb object
+ * does not change for each use of the iocb object. This function
+ * clears all other fields of the iocb object when it is freed.
+ **/
+static void
+__lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+{
+	phba->__lpfc_sli_release_iocbq(phba, iocbq);
+}
+
+/**
  * lpfc_sli_release_iocbq - Release iocb to the iocb pool
  * @phba: Pointer to HBA context object.
  * @iocbq: Pointer to driver iocb object.
@@ -779,8 +796,8 @@ lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
 		phba->hbqs[i].buffer_count = 0;
 	}
 	/* Return all HBQ buffer that are in-fly */
-	list_for_each_entry_safe(dmabuf, next_dmabuf,
-			&phba->hbqbuf_in_list, list) {
+	list_for_each_entry_safe(dmabuf, next_dmabuf, &phba->rb_pend_list,
+				 list) {
 		hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf);
 		list_del(&hbq_buf->dbuf.list);
 		if (hbq_buf->tag == -1) {
@@ -814,10 +831,28 @@ lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
  * pointer to the hbq entry if it successfully post the buffer
  * else it will return NULL.
  **/
-static struct lpfc_hbq_entry *
+static int
 lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno,
 			 struct hbq_dmabuf *hbq_buf)
 {
+	return phba->lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buf);
+}
+
+/**
+ * lpfc_sli_hbq_to_firmware_s3 - Post the hbq buffer to SLI3 firmware
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ * @hbq_buf: Pointer to HBQ buffer.
+ *
+ * This function is called with the hbalock held to post a hbq buffer to the
+ * firmware. If the function finds an empty slot in the HBQ, it will post the
+ * buffer and place it on the hbq_buffer_list. The function will return zero if
+ * it successfully post the buffer else it will return an error.
+ **/
+static int
+lpfc_sli_hbq_to_firmware_s3(struct lpfc_hba *phba, uint32_t hbqno,
+			    struct hbq_dmabuf *hbq_buf)
+{
 	struct lpfc_hbq_entry *hbqe;
 	dma_addr_t physaddr = hbq_buf->dbuf.phys;
 
@@ -838,8 +873,9 @@ lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno,
 				/* flush */
 		readl(phba->hbq_put + hbqno);
 		list_add_tail(&hbq_buf->dbuf.list, &hbqp->hbq_buffer_list);
-	}
-	return hbqe;
+		return 0;
+	} else
+		return -ENOMEM;
 }
 
 /* HBQ for ELS and CT traffic. */
@@ -914,7 +950,7 @@ lpfc_sli_hbqbuf_fill_hbqs(struct lpfc_hba *phba, uint32_t hbqno, uint32_t count)
 				 dbuf.list);
 		hbq_buffer->tag = (phba->hbqs[hbqno].buffer_count |
 				      (hbqno << 16));
-		if (lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer)) {
+		if (!lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer)) {
 			phba->hbqs[hbqno].buffer_count++;
 			posted++;
 		} else
@@ -965,6 +1001,25 @@ lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *phba, uint32_t qno)
 }
 
 /**
+ * lpfc_sli_hbqbuf_get - Remove the first hbq off of an hbq list
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ *
+ * This function removes the first hbq buffer on an hbq list and returns a
+ * pointer to that buffer. If it finds no buffers on the list it returns NULL.
+ **/
+static struct hbq_dmabuf *
+lpfc_sli_hbqbuf_get(struct list_head *rb_list)
+{
+	struct lpfc_dmabuf *d_buf;
+
+	list_remove_head(rb_list, d_buf, struct lpfc_dmabuf, list);
+	if (!d_buf)
+		return NULL;
+	return container_of(d_buf, struct hbq_dmabuf, dbuf);
+}
+
+/**
  * lpfc_sli_hbqbuf_find - Find the hbq buffer associated with a tag
  * @phba: Pointer to HBA context object.
  * @tag: Tag of the hbq buffer.
@@ -985,12 +1040,15 @@ lpfc_sli_hbqbuf_find(struct lpfc_hba *phba, uint32_t tag)
 	if (hbqno >= LPFC_MAX_HBQS)
 		return NULL;
 
+	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry(d_buf, &phba->hbqs[hbqno].hbq_buffer_list, list) {
 		hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf);
 		if (hbq_buf->tag == tag) {
+			spin_unlock_irq(&phba->hbalock);
 			return hbq_buf;
 		}
 	}
+	spin_unlock_irq(&phba->hbalock);
 	lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_VPORT,
 			"1803 Bad hbq tag. Data: x%x x%x\n",
 			tag, phba->hbqs[tag >> 16].buffer_count);
@@ -1013,9 +1071,8 @@ lpfc_sli_free_hbq(struct lpfc_hba *phba, struct hbq_dmabuf *hbq_buffer)
 
 	if (hbq_buffer) {
 		hbqno = hbq_buffer->tag >> 16;
-		if (!lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer)) {
+		if (lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer))
 			(phba->hbqs[hbqno].hbq_free_buffer)(phba, hbq_buffer);
-		}
 	}
 }
 
@@ -1317,6 +1374,45 @@ lpfc_sli_get_buff(struct lpfc_hba *phba,
 	return &hbq_entry->dbuf;
 }
 
+/**
+ * lpfc_complete_unsol_iocb - Complete an unsolicited sequence
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @saveq: Pointer to the iocbq struct representing the sequence starting frame.
+ * @fch_r_ctl: the r_ctl for the first frame of the sequence.
+ * @fch_type: the type for the first frame of the sequence.
+ *
+ * This function is called with no lock held. This function uses the r_ctl and
+ * type of the received sequence to find the correct callback function to call
+ * to process the sequence.
+ **/
+static int
+lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			 struct lpfc_iocbq *saveq, uint32_t fch_r_ctl,
+			 uint32_t fch_type)
+{
+	int i;
+
+	/* unSolicited Responses */
+	if (pring->prt[0].profile) {
+		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
+			(pring->prt[0].lpfc_sli_rcv_unsol_event) (phba, pring,
+									saveq);
+		return 1;
+	}
+	/* We must search, based on rctl / type
+	   for the right routine */
+	for (i = 0; i < pring->num_mask; i++) {
+		if ((pring->prt[i].rctl == fch_r_ctl) &&
+		    (pring->prt[i].type == fch_type)) {
+			if (pring->prt[i].lpfc_sli_rcv_unsol_event)
+				(pring->prt[i].lpfc_sli_rcv_unsol_event)
+						(phba, pring, saveq);
+			return 1;
+		}
+	}
+	return 0;
+}
 
 /**
  * lpfc_sli_process_unsol_iocb - Unsolicited iocb handler
@@ -1339,7 +1435,7 @@ lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	IOCB_t           * irsp;
 	WORD5            * w5p;
 	uint32_t           Rctl, Type;
-	uint32_t           match, i;
+	uint32_t           match;
 	struct lpfc_iocbq *iocbq;
 	struct lpfc_dmabuf *dmzbuf;
 
@@ -1482,35 +1578,12 @@ lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		}
 	}
 
-	/* unSolicited Responses */
-	if (pring->prt[0].profile) {
-		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
-			(pring->prt[0].lpfc_sli_rcv_unsol_event) (phba, pring,
-									saveq);
-		match = 1;
-	} else {
-		/* We must search, based on rctl / type
-		   for the right routine */
-		for (i = 0; i < pring->num_mask; i++) {
-			if ((pring->prt[i].rctl == Rctl)
-			    && (pring->prt[i].type == Type)) {
-				if (pring->prt[i].lpfc_sli_rcv_unsol_event)
-					(pring->prt[i].lpfc_sli_rcv_unsol_event)
-							(phba, pring, saveq);
-				match = 1;
-				break;
-			}
-		}
-	}
-	if (match == 0) {
-		/* Unexpected Rctl / Type received */
-		/* Ring <ringno> handler: unexpected
-		   Rctl <Rctl> Type <Type> received */
+	if (!lpfc_complete_unsol_iocb(phba, pring, saveq, Rctl, Type))
 		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
 				"0313 Ring %d handler: unexpected Rctl x%x "
 				"Type x%x received\n",
 				pring->ringno, Rctl, Type);
-	}
+
 	return 1;
 }
 
@@ -1552,6 +1625,37 @@ lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_sli_iocbq_lookup_by_tag - Find command iocb for the iotag
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @iotag: IOCB tag.
+ *
+ * This function looks up the iocb_lookup table to get the command iocb
+ * corresponding to the given iotag. This function is called with the
+ * hbalock held.
+ * This function returns the command iocb object if it finds the command
+ * iocb else returns NULL.
+ **/
+static struct lpfc_iocbq *
+lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
+			     struct lpfc_sli_ring *pring, uint16_t iotag)
+{
+	struct lpfc_iocbq *cmd_iocb;
+
+	if (iotag != 0 && iotag <= phba->sli.last_iotag) {
+		cmd_iocb = phba->sli.iocbq_lookup[iotag];
+		list_del_init(&cmd_iocb->list);
+		pring->txcmplq_cnt--;
+		return cmd_iocb;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"0372 iotag x%x is out off range: max iotag (x%x)\n",
+			iotag, phba->sli.last_iotag);
+	return NULL;
+}
+
+/**
  * lpfc_sli_process_sol_iocb - process solicited iocb completion
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -1954,7 +2058,7 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba,
 			if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
 				(irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
 				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				lpfc_rampdown_queue_depth(phba);
+				phba->lpfc_rampdown_queue_depth(phba);
 				spin_lock_irqsave(&phba->hbalock, iflag);
 			}
 
@@ -2068,39 +2172,215 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba,
 }
 
 /**
- * lpfc_sli_handle_slow_ring_event - Handle ring events for non-FCP rings
+ * lpfc_sli_sp_handle_rspiocb - Handle slow-path response iocb
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @rspiocbp: Pointer to driver response IOCB object.
+ *
+ * This function is called from the worker thread when there is a slow-path
+ * response IOCB to process. This function chains all the response iocbs until
+ * seeing the iocb with the LE bit set. The function will call
+ * lpfc_sli_process_sol_iocb function if the response iocb indicates a
+ * completion of a command iocb. The function will call the
+ * lpfc_sli_process_unsol_iocb function if this is an unsolicited iocb.
+ * The function frees the resources or calls the completion handler if this
+ * iocb is an abort completion. The function returns NULL when the response
+ * iocb has the LE bit set and all the chained iocbs are processed, otherwise
+ * this function shall chain the iocb on to the iocb_continueq and return the
+ * response iocb passed in.
+ **/
+static struct lpfc_iocbq *
+lpfc_sli_sp_handle_rspiocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			struct lpfc_iocbq *rspiocbp)
+{
+	struct lpfc_iocbq *saveq;
+	struct lpfc_iocbq *cmdiocbp;
+	struct lpfc_iocbq *next_iocb;
+	IOCB_t *irsp = NULL;
+	uint32_t free_saveq;
+	uint8_t iocb_cmd_type;
+	lpfc_iocb_type type;
+	unsigned long iflag;
+	int rc;
+
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	/* First add the response iocb to the countinueq list */
+	list_add_tail(&rspiocbp->list, &(pring->iocb_continueq));
+	pring->iocb_continueq_cnt++;
+
+	/* Now, determine whetehr the list is completed for processing */
+	irsp = &rspiocbp->iocb;
+	if (irsp->ulpLe) {
+		/*
+		 * By default, the driver expects to free all resources
+		 * associated with this iocb completion.
+		 */
+		free_saveq = 1;
+		saveq = list_get_first(&pring->iocb_continueq,
+				       struct lpfc_iocbq, list);
+		irsp = &(saveq->iocb);
+		list_del_init(&pring->iocb_continueq);
+		pring->iocb_continueq_cnt = 0;
+
+		pring->stats.iocb_rsp++;
+
+		/*
+		 * If resource errors reported from HBA, reduce
+		 * queuedepths of the SCSI device.
+		 */
+		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+		    (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			phba->lpfc_rampdown_queue_depth(phba);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+		}
+
+		if (irsp->ulpStatus) {
+			/* Rsp ring <ringno> error: IOCB */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"0328 Rsp Ring %d error: "
+					"IOCB Data: "
+					"x%x x%x x%x x%x "
+					"x%x x%x x%x x%x "
+					"x%x x%x x%x x%x "
+					"x%x x%x x%x x%x\n",
+					pring->ringno,
+					irsp->un.ulpWord[0],
+					irsp->un.ulpWord[1],
+					irsp->un.ulpWord[2],
+					irsp->un.ulpWord[3],
+					irsp->un.ulpWord[4],
+					irsp->un.ulpWord[5],
+					*(((uint32_t *) irsp) + 6),
+					*(((uint32_t *) irsp) + 7),
+					*(((uint32_t *) irsp) + 8),
+					*(((uint32_t *) irsp) + 9),
+					*(((uint32_t *) irsp) + 10),
+					*(((uint32_t *) irsp) + 11),
+					*(((uint32_t *) irsp) + 12),
+					*(((uint32_t *) irsp) + 13),
+					*(((uint32_t *) irsp) + 14),
+					*(((uint32_t *) irsp) + 15));
+		}
+
+		/*
+		 * Fetch the IOCB command type and call the correct completion
+		 * routine. Solicited and Unsolicited IOCBs on the ELS ring
+		 * get freed back to the lpfc_iocb_list by the discovery
+		 * kernel thread.
+		 */
+		iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK;
+		type = lpfc_sli_iocb_cmd_type(iocb_cmd_type);
+		switch (type) {
+		case LPFC_SOL_IOCB:
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			rc = lpfc_sli_process_sol_iocb(phba, pring, saveq);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+			break;
+
+		case LPFC_UNSOL_IOCB:
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			rc = lpfc_sli_process_unsol_iocb(phba, pring, saveq);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+			if (!rc)
+				free_saveq = 0;
+			break;
+
+		case LPFC_ABORT_IOCB:
+			cmdiocbp = NULL;
+			if (irsp->ulpCommand != CMD_XRI_ABORTED_CX)
+				cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring,
+								 saveq);
+			if (cmdiocbp) {
+				/* Call the specified completion routine */
+				if (cmdiocbp->iocb_cmpl) {
+					spin_unlock_irqrestore(&phba->hbalock,
+							       iflag);
+					(cmdiocbp->iocb_cmpl)(phba, cmdiocbp,
+							      saveq);
+					spin_lock_irqsave(&phba->hbalock,
+							  iflag);
+				} else
+					__lpfc_sli_release_iocbq(phba,
+								 cmdiocbp);
+			}
+			break;
+
+		case LPFC_UNKNOWN_IOCB:
+			if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
+				char adaptermsg[LPFC_MAX_ADPTMSG];
+				memset(adaptermsg, 0, LPFC_MAX_ADPTMSG);
+				memcpy(&adaptermsg[0], (uint8_t *)irsp,
+				       MAX_MSG_DATA);
+				dev_warn(&((phba->pcidev)->dev),
+					 "lpfc%d: %s\n",
+					 phba->brd_no, adaptermsg);
+			} else {
+				/* Unknown IOCB command */
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"0335 Unknown IOCB "
+						"command Data: x%x "
+						"x%x x%x x%x\n",
+						irsp->ulpCommand,
+						irsp->ulpStatus,
+						irsp->ulpIoTag,
+						irsp->ulpContext);
+			}
+			break;
+		}
+
+		if (free_saveq) {
+			list_for_each_entry_safe(rspiocbp, next_iocb,
+						 &saveq->list, list) {
+				list_del(&rspiocbp->list);
+				__lpfc_sli_release_iocbq(phba, rspiocbp);
+			}
+			__lpfc_sli_release_iocbq(phba, saveq);
+		}
+		rspiocbp = NULL;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+	return rspiocbp;
+}
+
+/**
+ * lpfc_sli_handle_slow_ring_event - Wrapper func for handling slow-path iocbs
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
  * @mask: Host attention register mask for this ring.
  *
- * This function is called from the worker thread when there is a ring
- * event for non-fcp rings. The caller does not hold any lock .
- * The function processes each response iocb in the response ring until it
- * finds an iocb with LE bit set and chains all the iocbs upto the iocb with
- * LE bit set. The function will call lpfc_sli_process_sol_iocb function if the
- * response iocb indicates a completion of a command iocb. The function
- * will call lpfc_sli_process_unsol_iocb function if this is an unsolicited
- * iocb. The function frees the resources or calls the completion handler if
- * this iocb is an abort completion. The function returns 0 when the allocated
- * iocbs are not freed, otherwise returns 1.
+ * This routine wraps the actual slow_ring event process routine from the
+ * API jump table function pointer from the lpfc_hba struct.
  **/
-int
+void
 lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
 				struct lpfc_sli_ring *pring, uint32_t mask)
 {
+	phba->lpfc_sli_handle_slow_ring_event(phba, pring, mask);
+}
+
+/**
+ * lpfc_sli_handle_slow_ring_event_s3 - Handle SLI3 ring event for non-FCP rings
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @mask: Host attention register mask for this ring.
+ *
+ * This function is called from the worker thread when there is a ring event
+ * for non-fcp rings. The caller does not hold any lock. The function will
+ * remove each response iocb in the response ring and calls the handle
+ * response iocb routine (lpfc_sli_sp_handle_rspiocb) to process it.
+ **/
+static void
+lpfc_sli_handle_slow_ring_event_s3(struct lpfc_hba *phba,
+				   struct lpfc_sli_ring *pring, uint32_t mask)
+{
 	struct lpfc_pgp *pgp;
 	IOCB_t *entry;
 	IOCB_t *irsp = NULL;
 	struct lpfc_iocbq *rspiocbp = NULL;
-	struct lpfc_iocbq *next_iocb;
-	struct lpfc_iocbq *cmdiocbp;
-	struct lpfc_iocbq *saveq;
-	uint8_t iocb_cmd_type;
-	lpfc_iocb_type type;
-	uint32_t status, free_saveq;
 	uint32_t portRspPut, portRspMax;
-	int rc = 1;
 	unsigned long iflag;
+	uint32_t status;
 
 	pgp = &phba->port_gp[pring->ringno];
 	spin_lock_irqsave(&phba->hbalock, iflag);
@@ -2128,7 +2408,7 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
 		phba->work_hs = HS_FFER3;
 		lpfc_handle_eratt(phba);
 
-		return 1;
+		return;
 	}
 
 	rmb();
@@ -2173,138 +2453,10 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
 
 		writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
 
-		list_add_tail(&rspiocbp->list, &(pring->iocb_continueq));
-
-		pring->iocb_continueq_cnt++;
-		if (irsp->ulpLe) {
-			/*
-			 * By default, the driver expects to free all resources
-			 * associated with this iocb completion.
-			 */
-			free_saveq = 1;
-			saveq = list_get_first(&pring->iocb_continueq,
-					       struct lpfc_iocbq, list);
-			irsp = &(saveq->iocb);
-			list_del_init(&pring->iocb_continueq);
-			pring->iocb_continueq_cnt = 0;
-
-			pring->stats.iocb_rsp++;
-
-			/*
-			 * If resource errors reported from HBA, reduce
-			 * queuedepths of the SCSI device.
-			 */
-			if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
-			     (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
-				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				lpfc_rampdown_queue_depth(phba);
-				spin_lock_irqsave(&phba->hbalock, iflag);
-			}
-
-			if (irsp->ulpStatus) {
-				/* Rsp ring <ringno> error: IOCB */
-				lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-						"0328 Rsp Ring %d error: "
-						"IOCB Data: "
-						"x%x x%x x%x x%x "
-						"x%x x%x x%x x%x "
-						"x%x x%x x%x x%x "
-						"x%x x%x x%x x%x\n",
-						pring->ringno,
-						irsp->un.ulpWord[0],
-						irsp->un.ulpWord[1],
-						irsp->un.ulpWord[2],
-						irsp->un.ulpWord[3],
-						irsp->un.ulpWord[4],
-						irsp->un.ulpWord[5],
-						*(((uint32_t *) irsp) + 6),
-						*(((uint32_t *) irsp) + 7),
-						*(((uint32_t *) irsp) + 8),
-						*(((uint32_t *) irsp) + 9),
-						*(((uint32_t *) irsp) + 10),
-						*(((uint32_t *) irsp) + 11),
-						*(((uint32_t *) irsp) + 12),
-						*(((uint32_t *) irsp) + 13),
-						*(((uint32_t *) irsp) + 14),
-						*(((uint32_t *) irsp) + 15));
-			}
-
-			/*
-			 * Fetch the IOCB command type and call the correct
-			 * completion routine.  Solicited and Unsolicited
-			 * IOCBs on the ELS ring get freed back to the
-			 * lpfc_iocb_list by the discovery kernel thread.
-			 */
-			iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK;
-			type = lpfc_sli_iocb_cmd_type(iocb_cmd_type);
-			if (type == LPFC_SOL_IOCB) {
-				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				rc = lpfc_sli_process_sol_iocb(phba, pring,
-							       saveq);
-				spin_lock_irqsave(&phba->hbalock, iflag);
-			} else if (type == LPFC_UNSOL_IOCB) {
-				spin_unlock_irqrestore(&phba->hbalock, iflag);
-				rc = lpfc_sli_process_unsol_iocb(phba, pring,
-								 saveq);
-				spin_lock_irqsave(&phba->hbalock, iflag);
-				if (!rc)
-					free_saveq = 0;
-			} else if (type == LPFC_ABORT_IOCB) {
-				if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) &&
-				    ((cmdiocbp =
-				      lpfc_sli_iocbq_lookup(phba, pring,
-							    saveq)))) {
-					/* Call the specified completion
-					   routine */
-					if (cmdiocbp->iocb_cmpl) {
-						spin_unlock_irqrestore(
-						       &phba->hbalock,
-						       iflag);
-						(cmdiocbp->iocb_cmpl) (phba,
-							     cmdiocbp, saveq);
-						spin_lock_irqsave(
-							  &phba->hbalock,
-							  iflag);
-					} else
-						__lpfc_sli_release_iocbq(phba,
-								      cmdiocbp);
-				}
-			} else if (type == LPFC_UNKNOWN_IOCB) {
-				if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
-
-					char adaptermsg[LPFC_MAX_ADPTMSG];
-
-					memset(adaptermsg, 0,
-					       LPFC_MAX_ADPTMSG);
-					memcpy(&adaptermsg[0], (uint8_t *) irsp,
-					       MAX_MSG_DATA);
-					dev_warn(&((phba->pcidev)->dev),
-						 "lpfc%d: %s\n",
-						 phba->brd_no, adaptermsg);
-				} else {
-					/* Unknown IOCB command */
-					lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-							"0335 Unknown IOCB "
-							"command Data: x%x "
-							"x%x x%x x%x\n",
-							irsp->ulpCommand,
-							irsp->ulpStatus,
-							irsp->ulpIoTag,
-							irsp->ulpContext);
-				}
-			}
-
-			if (free_saveq) {
-				list_for_each_entry_safe(rspiocbp, next_iocb,
-							 &saveq->list, list) {
-					list_del(&rspiocbp->list);
-					__lpfc_sli_release_iocbq(phba,
-								 rspiocbp);
-				}
-				__lpfc_sli_release_iocbq(phba, saveq);
-			}
-			rspiocbp = NULL;
-		}
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		/* Handle the response IOCB */
+		rspiocbp = lpfc_sli_sp_handle_rspiocb(phba, pring, rspiocbp);
+		spin_lock_irqsave(&phba->hbalock, iflag);
 
 		/*
 		 * If the port response put pointer has not been updated, sync
@@ -2338,7 +2490,7 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
 	}
 
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
-	return rc;
+	return;
 }
 
 /**
@@ -2420,7 +2572,7 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_brdready - Check for host status bits
+ * lpfc_sli_brdready_s3 - Check for sli3 host ready status
  * @phba: Pointer to HBA context object.
  * @mask: Bit mask to be checked.
  *
@@ -2432,8 +2584,8 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
  * function returns 1 when HBA fail to restart otherwise returns
  * zero.
  **/
-int
-lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask)
+static int
+lpfc_sli_brdready_s3(struct lpfc_hba *phba, uint32_t mask)
 {
 	uint32_t status;
 	int i = 0;
@@ -2647,7 +2799,7 @@ lpfc_sli_brdkill(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_brdreset - Reset the HBA
+ * lpfc_sli_brdreset - Reset a sli-2 or sli-3 HBA
  * @phba: Pointer to HBA context object.
  *
  * This function resets the HBA by writing HC_INITFF to the control
@@ -2683,7 +2835,8 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
 			      (cfg_value &
 			       ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
 
-	psli->sli_flag &= ~(LPFC_SLI2_ACTIVE | LPFC_PROCESS_LA);
+	psli->sli_flag &= ~(LPFC_SLI_ACTIVE | LPFC_PROCESS_LA);
+
 	/* Now toggle INITFF bit in the Host Control Register */
 	writel(HC_INITFF, phba->HCregaddr);
 	mdelay(1);
@@ -3289,32 +3442,20 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 			"0345 Resetting board due to mailbox timeout\n");
-	/*
-	 * lpfc_offline calls lpfc_sli_hba_down which will clean up
-	 * on oustanding mailbox commands.
-	 */
-	/* If resets are disabled then set error state and return. */
-	if (!phba->cfg_enable_hba_reset) {
-		phba->link_state = LPFC_HBA_ERROR;
-		return;
-	}
-	lpfc_offline_prep(phba);
-	lpfc_offline(phba);
-	lpfc_sli_brdrestart(phba);
-	lpfc_online(phba);
-	lpfc_unblock_mgmt_io(phba);
-	return;
+
+	/* Reset the HBA device */
+	lpfc_reset_hba(phba);
 }
 
 /**
- * lpfc_sli_issue_mbox - Issue a mailbox command to firmware
+ * lpfc_sli_issue_mbox_s3 - Issue an SLI3 mailbox command to firmware
  * @phba: Pointer to HBA context object.
  * @pmbox: Pointer to mailbox object.
  * @flag: Flag indicating how the mailbox need to be processed.
  *
  * This function is called by discovery code and HBA management code
- * to submit a mailbox command to firmware. This function gets the
- * hbalock to protect the data structures.
+ * to submit a mailbox command to firmware with SLI-3 interface spec. This
+ * function gets the hbalock to protect the data structures.
  * The mailbox command can be submitted in polling mode, in which case
  * this function will wait in a polling loop for the completion of the
  * mailbox.
@@ -3332,8 +3473,9 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
  * return codes the caller owns the mailbox command after the return of
  * the function.
  **/
-int
-lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
+static int
+lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
+		       uint32_t flag)
 {
 	MAILBOX_t *mb;
 	struct lpfc_sli *psli = &phba->sli;
@@ -3349,6 +3491,10 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 	spin_lock_irqsave(&phba->hbalock, drvr_flag);
 	if (!pmbox) {
 		/* processing mbox queue from intr_handler */
+		if (unlikely(psli->sli_flag & LPFC_SLI_ASYNC_MBX_BLK)) {
+			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+			return MBX_SUCCESS;
+		}
 		processing_queue = 1;
 		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 		pmbox = lpfc_mbox_get(phba);
@@ -3365,7 +3511,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 			lpfc_printf_log(phba, KERN_ERR,
 					LOG_MBOX | LOG_VPORT,
 					"1806 Mbox x%x failed. No vport\n",
-					pmbox->mb.mbxCommand);
+					pmbox->u.mb.mbxCommand);
 			dump_stack();
 			goto out_not_finished;
 		}
@@ -3385,21 +3531,29 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 
 	psli = &phba->sli;
 
-	mb = &pmbox->mb;
+	mb = &pmbox->u.mb;
 	status = MBX_SUCCESS;
 
 	if (phba->link_state == LPFC_HBA_ERROR) {
 		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 
 		/* Mbox command <mbxCommand> cannot issue */
-		LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):0311 Mailbox command x%x cannot "
+				"issue Data: x%x x%x\n",
+				pmbox->vport ? pmbox->vport->vpi : 0,
+				pmbox->u.mb.mbxCommand, psli->sli_flag, flag);
 		goto out_not_finished;
 	}
 
 	if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT &&
 	    !(readl(phba->HCregaddr) & HC_MBINT_ENA)) {
 		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
-		LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2528 Mailbox command x%x cannot "
+				"issue Data: x%x x%x\n",
+				pmbox->vport ? pmbox->vport->vpi : 0,
+				pmbox->u.mb.mbxCommand, psli->sli_flag, flag);
 		goto out_not_finished;
 	}
 
@@ -3413,14 +3567,24 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 
 			/* Mbox command <mbxCommand> cannot issue */
-			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2529 Mailbox command x%x "
+					"cannot issue Data: x%x x%x\n",
+					pmbox->vport ? pmbox->vport->vpi : 0,
+					pmbox->u.mb.mbxCommand,
+					psli->sli_flag, flag);
 			goto out_not_finished;
 		}
 
-		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) {
+		if (!(psli->sli_flag & LPFC_SLI_ACTIVE)) {
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 			/* Mbox command <mbxCommand> cannot issue */
-			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2530 Mailbox command x%x "
+					"cannot issue Data: x%x x%x\n",
+					pmbox->vport ? pmbox->vport->vpi : 0,
+					pmbox->u.mb.mbxCommand,
+					psli->sli_flag, flag);
 			goto out_not_finished;
 		}
 
@@ -3462,12 +3626,17 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 
 	/* If we are not polling, we MUST be in SLI2 mode */
 	if (flag != MBX_POLL) {
-		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) &&
+		if (!(psli->sli_flag & LPFC_SLI_ACTIVE) &&
 		    (mb->mbxCommand != MBX_KILL_BOARD)) {
 			psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 			/* Mbox command <mbxCommand> cannot issue */
-			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2531 Mailbox command x%x "
+					"cannot issue Data: x%x x%x\n",
+					pmbox->vport ? pmbox->vport->vpi : 0,
+					pmbox->u.mb.mbxCommand,
+					psli->sli_flag, flag);
 			goto out_not_finished;
 		}
 		/* timeout active mbox command */
@@ -3506,7 +3675,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 	/* next set own bit for the adapter and copy over command word */
 	mb->mbxOwner = OWN_CHIP;
 
-	if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+	if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 		/* First copy command data to host SLIM area */
 		lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE);
 	} else {
@@ -3529,7 +3698,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 
 		if (mb->mbxCommand == MBX_CONFIG_PORT) {
 			/* switch over to host mailbox */
-			psli->sli_flag |= LPFC_SLI2_ACTIVE;
+			psli->sli_flag |= LPFC_SLI_ACTIVE;
 		}
 	}
 
@@ -3552,7 +3721,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 		writel(CA_MBATT, phba->CAregaddr);
 		readl(phba->CAregaddr); /* flush */
 
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* First read mbox status word */
 			word0 = *((uint32_t *)phba->mbox);
 			word0 = le32_to_cpu(word0);
@@ -3591,7 +3760,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 				spin_lock_irqsave(&phba->hbalock, drvr_flag);
 			}
 
-			if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+			if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 				/* First copy command data */
 				word0 = *((uint32_t *)phba->mbox);
 				word0 = le32_to_cpu(word0);
@@ -3604,7 +3773,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 					if (((slimword0 & OWN_CHIP) != OWN_CHIP)
 					    && slimmb->mbxStatus) {
 						psli->sli_flag &=
-						    ~LPFC_SLI2_ACTIVE;
+						    ~LPFC_SLI_ACTIVE;
 						word0 = slimword0;
 					}
 				}
@@ -3616,7 +3785,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
 			ha_copy = readl(phba->HAregaddr);
 		}
 
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* copy results back to user */
 			lpfc_sli_pcimem_bcopy(phba->mbox, mb, MAILBOX_CMD_SIZE);
 		} else {
@@ -3701,35 +3870,34 @@ lpfc_sli_next_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 }
 
 /**
- * __lpfc_sli_issue_iocb - Lockless version of lpfc_sli_issue_iocb
+ * __lpfc_sli_issue_iocb_s3 - SLI3 device lockless ver of lpfc_sli_issue_iocb
  * @phba: Pointer to HBA context object.
- * @pring: Pointer to driver SLI ring object.
+ * @ring_number: SLI ring number to issue iocb on.
  * @piocb: Pointer to command iocb.
  * @flag: Flag indicating if this command can be put into txq.
  *
- * __lpfc_sli_issue_iocb is used by other functions in the driver
- * to issue an iocb command to the HBA. If the PCI slot is recovering
- * from error state or if HBA is resetting or if LPFC_STOP_IOCB_EVENT
- * flag is turned on, the function returns IOCB_ERROR.
- * When the link is down, this function allows only iocbs for
- * posting buffers.
- * This function finds next available slot in the command ring and
- * posts the command to the available slot and writes the port
- * attention register to request HBA start processing new iocb.
- * If there is no slot available in the ring and
- * flag & SLI_IOCB_RET_IOCB is set, the new iocb is added to the
- * txq, otherwise the function returns IOCB_BUSY.
+ * __lpfc_sli_issue_iocb_s3 is used by other functions in the driver to issue
+ * an iocb command to an HBA with SLI-3 interface spec. If the PCI slot is
+ * recovering from error state, if HBA is resetting or if LPFC_STOP_IOCB_EVENT
+ * flag is turned on, the function returns IOCB_ERROR. When the link is down,
+ * this function allows only iocbs for posting buffers. This function finds
+ * next available slot in the command ring and posts the command to the
+ * available slot and writes the port attention register to request HBA start
+ * processing new iocb. If there is no slot available in the ring and
+ * flag & SLI_IOCB_RET_IOCB is set, the new iocb is added to the txq, otherwise
+ * the function returns IOCB_BUSY.
  *
- * This function is called with hbalock held.
- * The function will return success after it successfully submit the
- * iocb to firmware or after adding to the txq.
+ * This function is called with hbalock held. The function will return success
+ * after it successfully submit the iocb to firmware or after adding to the
+ * txq.
  **/
 static int
-__lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+__lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
 	struct lpfc_iocbq *nextiocb;
 	IOCB_t *iocb;
+	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
 
 	if (piocb->iocb_cmpl && (!piocb->vport) &&
 	   (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
@@ -3833,6 +4001,52 @@ __lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	return IOCB_BUSY;
 }
 
+/**
+ * __lpfc_sli_issue_iocb - Wrapper func of lockless version for issuing iocb
+ *
+ * This routine wraps the actual lockless version for issusing IOCB function
+ * pointer from the lpfc_hba struct.
+ *
+ * Return codes:
+ * 	IOCB_ERROR - Error
+ * 	IOCB_SUCCESS - Success
+ * 	IOCB_BUSY - Busy
+ **/
+static inline int
+__lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
+		struct lpfc_iocbq *piocb, uint32_t flag)
+{
+	return phba->__lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
+}
+
+/**
+ * lpfc_sli_api_table_setup - Set up sli api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the SLI interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->__lpfc_sli_issue_iocb = __lpfc_sli_issue_iocb_s3;
+		phba->__lpfc_sli_release_iocbq = __lpfc_sli_release_iocbq_s3;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1419 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	phba->lpfc_get_iocb_from_iocbq = lpfc_get_iocb_from_iocbq;
+	return 0;
+}
 
 /**
  * lpfc_sli_issue_iocb - Wrapper function for __lpfc_sli_issue_iocb
@@ -3848,14 +4062,14 @@ __lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
  * functions which do not hold hbalock.
  **/
 int
-lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
 	unsigned long iflags;
 	int rc;
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
-	rc = __lpfc_sli_issue_iocb(phba, pring, piocb, flag);
+	rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 	spin_unlock_irqrestore(&phba->hbalock, iflags);
 
 	return rc;
@@ -5077,53 +5291,104 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
 }
 
 /**
- * lpfc_sli_flush_mbox_queue - mailbox queue cleanup function
+ * lpfc_sli_mbox_sys_shutdown - shutdown mailbox command sub-system
  * @phba: Pointer to HBA context.
  *
- * This function is called to cleanup any pending mailbox
- * objects in the driver queue before bringing the HBA offline.
- * This function is called while resetting the HBA.
- * The function is called without any lock held. The function
- * takes hbalock to update SLI data structure.
- * This function returns 1 when there is an active mailbox
- * command pending else returns 0.
+ * This function is called to shutdown the driver's mailbox sub-system.
+ * It first marks the mailbox sub-system is in a block state to prevent
+ * the asynchronous mailbox command from issued off the pending mailbox
+ * command queue. If the mailbox command sub-system shutdown is due to
+ * HBA error conditions such as EEH or ERATT, this routine shall invoke
+ * the mailbox sub-system flush routine to forcefully bring down the
+ * mailbox sub-system. Otherwise, if it is due to normal condition (such
+ * as with offline or HBA function reset), this routine will wait for the
+ * outstanding mailbox command to complete before invoking the mailbox
+ * sub-system flush routine to gracefully bring down mailbox sub-system.
  **/
-int
-lpfc_sli_flush_mbox_queue(struct lpfc_hba * phba)
+void
+lpfc_sli_mbox_sys_shutdown(struct lpfc_hba *phba)
 {
-	struct lpfc_vport *vport = phba->pport;
-	int i = 0;
-	uint32_t ha_copy;
+	struct lpfc_sli *psli = &phba->sli;
+	uint8_t actcmd = MBX_HEARTBEAT;
+	unsigned long timeout;
 
-	while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !vport->stopped) {
-		if (i++ > LPFC_MBOX_TMO * 1000)
-			return 1;
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag |= LPFC_SLI_ASYNC_MBX_BLK;
+	spin_unlock_irq(&phba->hbalock);
 
-		/*
-		 * Call lpfc_sli_handle_mb_event only if a mailbox cmd
-		 * did finish. This way we won't get the misleading
-		 * "Stray Mailbox Interrupt" message.
-		 */
+	if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 		spin_lock_irq(&phba->hbalock);
-		ha_copy = phba->work_ha;
-		phba->work_ha &= ~HA_MBATT;
+		if (phba->sli.mbox_active)
+			actcmd = phba->sli.mbox_active->u.mb.mbxCommand;
 		spin_unlock_irq(&phba->hbalock);
+		/* Determine how long we might wait for the active mailbox
+		 * command to be gracefully completed by firmware.
+		 */
+		timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, actcmd) *
+					   1000) + jiffies;
+		while (phba->sli.mbox_active) {
+			/* Check active mailbox complete status every 2ms */
+			msleep(2);
+			if (time_after(jiffies, timeout))
+				/* Timeout, let the mailbox flush routine to
+				 * forcefully release active mailbox command
+				 */
+				break;
+		}
+	}
+	lpfc_sli_mbox_sys_flush(phba);
+}
 
-		if (ha_copy & HA_MBATT)
-			if (lpfc_sli_handle_mb_event(phba) == 0)
-				i = 0;
+/**
+ * lpfc_sli_eratt_read - read sli-3 error attention events
+ * @phba: Pointer to HBA context.
+ *
+ * This function is called to read the SLI3 device error attention registers
+ * for possible error attention events. The caller must hold the hostlock
+ * with spin_lock_irq().
+ *
+ * This fucntion returns 1 when there is Error Attention in the Host Attention
+ * Register and returns 0 otherwise.
+ **/
+static int
+lpfc_sli_eratt_read(struct lpfc_hba *phba)
+{
+	uint32_t ha_copy;
 
-		msleep(1);
-	}
+	/* Read chip Host Attention (HA) register */
+	ha_copy = readl(phba->HAregaddr);
+	if (ha_copy & HA_ERATT) {
+		/* Read host status register to retrieve error event */
+		lpfc_sli_read_hs(phba);
 
-	return (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) ? 1 : 0;
+		/* Check if there is a deferred error condition is active */
+		if ((HS_FFER1 & phba->work_hs) &&
+		    ((HS_FFER2 | HS_FFER3 | HS_FFER4 | HS_FFER5 |
+		     HS_FFER6 | HS_FFER7) & phba->work_hs)) {
+			spin_lock_irq(&phba->hbalock);
+			phba->hba_flag |= DEFER_ERATT;
+			spin_unlock_irq(&phba->hbalock);
+			/* Clear all interrupt enable conditions */
+			writel(0, phba->HCregaddr);
+			readl(phba->HCregaddr);
+		}
+
+		/* Set the driver HA work bitmap */
+		spin_lock_irq(&phba->hbalock);
+		phba->work_ha |= HA_ERATT;
+		/* Indicate polling handles this ERATT */
+		phba->hba_flag |= HBA_ERATT_HANDLED;
+		spin_unlock_irq(&phba->hbalock);
+		return 1;
+	}
+	return 0;
 }
 
 /**
  * lpfc_sli_check_eratt - check error attention events
  * @phba: Pointer to HBA context.
  *
- * This function is called form timer soft interrupt context to check HBA's
+ * This function is called from timer soft interrupt context to check HBA's
  * error attention register bit for error attention events.
  *
  * This fucntion returns 1 when there is Error Attention in the Host Attention
@@ -5134,10 +5399,6 @@ lpfc_sli_check_eratt(struct lpfc_hba *phba)
 {
 	uint32_t ha_copy;
 
-	/* If PCI channel is offline, don't process it */
-	if (unlikely(pci_channel_offline(phba->pcidev)))
-		return 0;
-
 	/* If somebody is waiting to handle an eratt, don't process it
 	 * here. The brdkill function will do this.
 	 */
@@ -5161,56 +5422,80 @@ lpfc_sli_check_eratt(struct lpfc_hba *phba)
 		return 0;
 	}
 
-	/* Read chip Host Attention (HA) register */
-	ha_copy = readl(phba->HAregaddr);
-	if (ha_copy & HA_ERATT) {
-		/* Read host status register to retrieve error event */
-		lpfc_sli_read_hs(phba);
-
-		/* Check if there is a deferred error condition is active */
-		if ((HS_FFER1 & phba->work_hs) &&
-			((HS_FFER2 | HS_FFER3 | HS_FFER4 | HS_FFER5 |
-			HS_FFER6 | HS_FFER7) & phba->work_hs)) {
-			phba->hba_flag |= DEFER_ERATT;
-			/* Clear all interrupt enable conditions */
-			writel(0, phba->HCregaddr);
-			readl(phba->HCregaddr);
-		}
-
-		/* Set the driver HA work bitmap */
-		phba->work_ha |= HA_ERATT;
-		/* Indicate polling handles this ERATT */
-		phba->hba_flag |= HBA_ERATT_HANDLED;
+	/* If PCI channel is offline, don't process it */
+	if (unlikely(pci_channel_offline(phba->pcidev))) {
 		spin_unlock_irq(&phba->hbalock);
-		return 1;
+		return 0;
+	}
+
+	switch (phba->sli_rev) {
+	case LPFC_SLI_REV2:
+	case LPFC_SLI_REV3:
+		/* Read chip Host Attention (HA) register */
+		ha_copy = lpfc_sli_eratt_read(phba);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0299 Invalid SLI revision (%d)\n",
+				phba->sli_rev);
+		ha_copy = 0;
+		break;
 	}
 	spin_unlock_irq(&phba->hbalock);
+
+	return ha_copy;
+}
+
+/**
+ * lpfc_intr_state_check - Check device state for interrupt handling
+ * @phba: Pointer to HBA context.
+ *
+ * This inline routine checks whether a device or its PCI slot is in a state
+ * that the interrupt should be handled.
+ *
+ * This function returns 0 if the device or the PCI slot is in a state that
+ * interrupt should be handled, otherwise -EIO.
+ */
+static inline int
+lpfc_intr_state_check(struct lpfc_hba *phba)
+{
+	/* If the pci channel is offline, ignore all the interrupts */
+	if (unlikely(pci_channel_offline(phba->pcidev)))
+		return -EIO;
+
+	/* Update device level interrupt statistics */
+	phba->sli.slistat.sli_intr++;
+
+	/* Ignore all interrupts during initialization. */
+	if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+		return -EIO;
+
 	return 0;
 }
 
 /**
- * lpfc_sp_intr_handler - The slow-path interrupt handler of lpfc driver
+ * lpfc_sli_sp_intr_handler - Slow-path interrupt handler to SLI-3 device
  * @irq: Interrupt number.
  * @dev_id: The device context pointer.
  *
  * This function is directly called from the PCI layer as an interrupt
- * service routine when the device is enabled with MSI-X multi-message
- * interrupt mode and there are slow-path events in the HBA. However,
- * when the device is enabled with either MSI or Pin-IRQ interrupt mode,
- * this function is called as part of the device-level interrupt handler.
- * When the PCI slot is in error recovery or the HBA is undergoing
- * initialization, the interrupt handler will not process the interrupt.
- * The link attention and ELS ring attention events are handled by the
- * worker thread. The interrupt handler signals the worker thread and
- * and returns for these events. This function is called without any
- * lock held. It gets the hbalock to access and update SLI data
+ * service routine when device with SLI-3 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there are slow-path events in
+ * the HBA. However, when the device is enabled with either MSI or Pin-IRQ
+ * interrupt mode, this function is called as part of the device-level
+ * interrupt handler. When the PCI slot is in error recovery or the HBA
+ * is undergoing initialization, the interrupt handler will not process
+ * the interrupt. The link attention and ELS ring attention events are
+ * handled by the worker thread. The interrupt handler signals the worker
+ * thread and returns for these events. This function is called without
+ * any lock held. It gets the hbalock to access and update SLI data
  * structures.
  *
  * This function returns IRQ_HANDLED when interrupt is handled else it
  * returns IRQ_NONE.
  **/
 irqreturn_t
-lpfc_sp_intr_handler(int irq, void *dev_id)
+lpfc_sli_sp_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba  *phba;
 	uint32_t ha_copy;
@@ -5240,13 +5525,8 @@ lpfc_sp_intr_handler(int irq, void *dev_id)
 	 * individual interrupt handler in MSI-X multi-message interrupt mode
 	 */
 	if (phba->intr_type == MSIX) {
-		/* If the pci channel is offline, ignore all the interrupts */
-		if (unlikely(pci_channel_offline(phba->pcidev)))
-			return IRQ_NONE;
-		/* Update device-level interrupt statistics */
-		phba->sli.slistat.sli_intr++;
-		/* Ignore all interrupts during initialization. */
-		if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+		/* Check device state for handling interrupt */
+		if (lpfc_intr_state_check(phba))
 			return IRQ_NONE;
 		/* Need to read HA REG for slow-path events */
 		spin_lock_irqsave(&phba->hbalock, iflag);
@@ -5271,7 +5551,7 @@ lpfc_sp_intr_handler(int irq, void *dev_id)
 		 * interrupt.
 		 */
 		if (unlikely(phba->hba_flag & DEFER_ERATT)) {
-			spin_unlock_irq(&phba->hbalock);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			return IRQ_NONE;
 		}
 
@@ -5434,7 +5714,8 @@ lpfc_sp_intr_handler(int irq, void *dev_id)
 							LOG_MBOX | LOG_SLI,
 							"0350 rc should have"
 							"been MBX_BUSY");
-						goto send_current_mbox;
+						if (rc != MBX_NOT_FINISHED)
+							goto send_current_mbox;
 					}
 				}
 				spin_lock_irqsave(
@@ -5471,29 +5752,29 @@ send_current_mbox:
 	}
 	return IRQ_HANDLED;
 
-} /* lpfc_sp_intr_handler */
+} /* lpfc_sli_sp_intr_handler */
 
 /**
- * lpfc_fp_intr_handler - The fast-path interrupt handler of lpfc driver
+ * lpfc_sli_fp_intr_handler - Fast-path interrupt handler to SLI-3 device.
  * @irq: Interrupt number.
  * @dev_id: The device context pointer.
  *
  * This function is directly called from the PCI layer as an interrupt
- * service routine when the device is enabled with MSI-X multi-message
- * interrupt mode and there is a fast-path FCP IOCB ring event in the
- * HBA. However, when the device is enabled with either MSI or Pin-IRQ
- * interrupt mode, this function is called as part of the device-level
- * interrupt handler. When the PCI slot is in error recovery or the HBA
- * is undergoing initialization, the interrupt handler will not process
- * the interrupt. The SCSI FCP fast-path ring event are handled in the
- * intrrupt context. This function is called without any lock held. It
- * gets the hbalock to access and update SLI data structures.
+ * service routine when device with SLI-3 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there is a fast-path FCP IOCB
+ * ring event in the HBA. However, when the device is enabled with either
+ * MSI or Pin-IRQ interrupt mode, this function is called as part of the
+ * device-level interrupt handler. When the PCI slot is in error recovery
+ * or the HBA is undergoing initialization, the interrupt handler will not
+ * process the interrupt. The SCSI FCP fast-path ring event are handled in
+ * the intrrupt context. This function is called without any lock held.
+ * It gets the hbalock to access and update SLI data structures.
  *
  * This function returns IRQ_HANDLED when interrupt is handled else it
  * returns IRQ_NONE.
  **/
 irqreturn_t
-lpfc_fp_intr_handler(int irq, void *dev_id)
+lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba  *phba;
 	uint32_t ha_copy;
@@ -5513,13 +5794,8 @@ lpfc_fp_intr_handler(int irq, void *dev_id)
 	 * individual interrupt handler in MSI-X multi-message interrupt mode
 	 */
 	if (phba->intr_type == MSIX) {
-		/* If pci channel is offline, ignore all the interrupts */
-		if (unlikely(pci_channel_offline(phba->pcidev)))
-			return IRQ_NONE;
-		/* Update device-level interrupt statistics */
-		phba->sli.slistat.sli_intr++;
-		/* Ignore all interrupts during initialization. */
-		if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+		/* Check device state for handling interrupt */
+		if (lpfc_intr_state_check(phba))
 			return IRQ_NONE;
 		/* Need to read HA REG for FCP ring and other ring events */
 		ha_copy = readl(phba->HAregaddr);
@@ -5530,7 +5806,7 @@ lpfc_fp_intr_handler(int irq, void *dev_id)
 		 * any interrupt.
 		 */
 		if (unlikely(phba->hba_flag & DEFER_ERATT)) {
-			spin_unlock_irq(&phba->hbalock);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			return IRQ_NONE;
 		}
 		writel((ha_copy & (HA_R0_CLR_MSK | HA_R1_CLR_MSK)),
@@ -5566,26 +5842,27 @@ lpfc_fp_intr_handler(int irq, void *dev_id)
 		}
 	}
 	return IRQ_HANDLED;
-}  /* lpfc_fp_intr_handler */
+}  /* lpfc_sli_fp_intr_handler */
 
 /**
- * lpfc_intr_handler - The device-level interrupt handler of lpfc driver
+ * lpfc_sli_intr_handler - Device-level interrupt handler to SLI-3 device
  * @irq: Interrupt number.
  * @dev_id: The device context pointer.
  *
- * This function is the device-level interrupt handler called from the PCI
- * layer when either MSI or Pin-IRQ interrupt mode is enabled and there is
- * an event in the HBA which requires driver attention. This function
- * invokes the slow-path interrupt attention handling function and fast-path
- * interrupt attention handling function in turn to process the relevant
- * HBA attention events. This function is called without any lock held. It
- * gets the hbalock to access and update SLI data structures.
+ * This function is the HBA device-level interrupt handler to device with
+ * SLI-3 interface spec, called from the PCI layer when either MSI or
+ * Pin-IRQ interrupt mode is enabled and there is an event in the HBA which
+ * requires driver attention. This function invokes the slow-path interrupt
+ * attention handling function and fast-path interrupt attention handling
+ * function in turn to process the relevant HBA attention events. This
+ * function is called without any lock held. It gets the hbalock to access
+ * and update SLI data structures.
  *
  * This function returns IRQ_HANDLED when interrupt is handled, else it
  * returns IRQ_NONE.
  **/
 irqreturn_t
-lpfc_intr_handler(int irq, void *dev_id)
+lpfc_sli_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba  *phba;
 	irqreturn_t sp_irq_rc, fp_irq_rc;
@@ -5600,15 +5877,8 @@ lpfc_intr_handler(int irq, void *dev_id)
 	if (unlikely(!phba))
 		return IRQ_NONE;
 
-	/* If the pci channel is offline, ignore all the interrupts. */
-	if (unlikely(pci_channel_offline(phba->pcidev)))
-		return IRQ_NONE;
-
-	/* Update device level interrupt statistics */
-	phba->sli.slistat.sli_intr++;
-
-	/* Ignore all interrupts during initialization. */
-	if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+	/* Check device state for handling interrupt */
+	if (lpfc_intr_state_check(phba))
 		return IRQ_NONE;
 
 	spin_lock(&phba->hbalock);
@@ -5650,7 +5920,7 @@ lpfc_intr_handler(int irq, void *dev_id)
 	status2 >>= (4*LPFC_ELS_RING);
 
 	if (status1 || (status2 & HA_RXMASK))
-		sp_irq_rc = lpfc_sp_intr_handler(irq, dev_id);
+		sp_irq_rc = lpfc_sli_sp_intr_handler(irq, dev_id);
 	else
 		sp_irq_rc = IRQ_NONE;
 
@@ -5670,10 +5940,10 @@ lpfc_intr_handler(int irq, void *dev_id)
 		status2 = 0;
 
 	if ((status1 & HA_RXMASK) || (status2 & HA_RXMASK))
-		fp_irq_rc = lpfc_fp_intr_handler(irq, dev_id);
+		fp_irq_rc = lpfc_sli_fp_intr_handler(irq, dev_id);
 	else
 		fp_irq_rc = IRQ_NONE;
 
 	/* Return device-level interrupt handling status */
 	return (sp_irq_rc == IRQ_HANDLED) ? sp_irq_rc : fp_irq_rc;
-}  /* lpfc_intr_handler */
+}  /* lpfc_sli_intr_handler */
-- 
cgit v0.10.2


From da0436e915a5c17ee79e72c1bf978a4ebb1cbf4d Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:51:39 -0400
Subject: [SCSI] lpfc 8.3.2 : Addition of SLI4 Interface - Base Support

Adds new hardware and interface definitions.

Adds new interface routines - utilizing the reorganized layout of the
driver. Adds SLI-4 specific functions for attachment, initialization,
teardown, etc.

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6c24c9a..13ac108 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -105,9 +105,11 @@ struct lpfc_dma_pool {
 };
 
 struct hbq_dmabuf {
+	struct lpfc_dmabuf hbuf;
 	struct lpfc_dmabuf dbuf;
 	uint32_t size;
 	uint32_t tag;
+	struct lpfc_rcqe rcqe;
 };
 
 /* Priority bit.  Set value to exceed low water mark in lpfc_mem. */
@@ -141,7 +143,10 @@ typedef struct lpfc_vpd {
 	} rev;
 	struct {
 #ifdef __BIG_ENDIAN_BITFIELD
-		uint32_t rsvd2  :24;  /* Reserved                             */
+		uint32_t rsvd3  :19;  /* Reserved                             */
+		uint32_t cdss	: 1;  /* Configure Data Security SLI          */
+		uint32_t rsvd2	: 3;  /* Reserved                             */
+		uint32_t cbg	: 1;  /* Configure BlockGuard                 */
 		uint32_t cmv	: 1;  /* Configure Max VPIs                   */
 		uint32_t ccrp   : 1;  /* Config Command Ring Polling          */
 		uint32_t csah   : 1;  /* Configure Synchronous Abort Handling */
@@ -159,7 +164,10 @@ typedef struct lpfc_vpd {
 		uint32_t csah   : 1;  /* Configure Synchronous Abort Handling */
 		uint32_t ccrp   : 1;  /* Config Command Ring Polling          */
 		uint32_t cmv	: 1;  /* Configure Max VPIs                   */
-		uint32_t rsvd2  :24;  /* Reserved                             */
+		uint32_t cbg	: 1;  /* Configure BlockGuard                 */
+		uint32_t rsvd2	: 3;  /* Reserved                             */
+		uint32_t cdss	: 1;  /* Configure Data Security SLI          */
+		uint32_t rsvd3  :19;  /* Reserved                             */
 #endif
 	} sli3Feat;
 } lpfc_vpd_t;
@@ -280,6 +288,9 @@ struct lpfc_vport {
 	enum discovery_state port_state;
 
 	uint16_t vpi;
+	uint16_t vfi;
+	uint8_t vfi_state;
+#define LPFC_VFI_REGISTERED	0x1
 
 	uint32_t fc_flag;	/* FC flags */
 /* Several of these flags are HBA centric and should be moved to
@@ -392,6 +403,9 @@ struct lpfc_vport {
 #endif
 	uint8_t stat_data_enabled;
 	uint8_t stat_data_blocked;
+	struct list_head rcv_buffer_list;
+	uint32_t vport_flag;
+#define STATIC_VPORT	1
 };
 
 struct hbq_s {
@@ -494,6 +508,7 @@ struct lpfc_hba {
 #define LPFC_SLI3_CRP_ENABLED		0x08
 #define LPFC_SLI3_INB_ENABLED		0x10
 #define LPFC_SLI3_BG_ENABLED		0x20
+#define LPFC_SLI3_DSS_ENABLED		0x40
 	uint32_t iocb_cmd_size;
 	uint32_t iocb_rsp_size;
 
@@ -507,8 +522,13 @@ struct lpfc_hba {
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */
-
-#define DEFER_ERATT		0x4 /* Deferred error attention in progress */
+#define DEFER_ERATT		0x2 /* Deferred error attention in progress */
+#define HBA_FCOE_SUPPORT	0x4 /* HBA function supports FCOE */
+#define HBA_RECEIVE_BUFFER	0x8 /* Rcv buffer posted to worker thread */
+#define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */
+#define FCP_XRI_ABORT_EVENT	0x20
+#define ELS_XRI_ABORT_EVENT	0x40
+#define ASYNC_EVENT		0x80
 	struct lpfc_dmabuf slim2p;
 
 	MAILBOX_t *mbox;
@@ -567,6 +587,9 @@ struct lpfc_hba {
 	uint32_t cfg_poll;
 	uint32_t cfg_poll_tmo;
 	uint32_t cfg_use_msi;
+	uint32_t cfg_fcp_imax;
+	uint32_t cfg_fcp_wq_count;
+	uint32_t cfg_fcp_eq_count;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_prot_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
@@ -576,6 +599,8 @@ struct lpfc_hba {
 	uint32_t cfg_enable_hba_reset;
 	uint32_t cfg_enable_hba_heartbeat;
 	uint32_t cfg_enable_bg;
+	uint32_t cfg_enable_fip;
+	uint32_t cfg_log_verbose;
 
 	lpfc_vpd_t vpd;		/* vital product data */
 
@@ -659,7 +684,8 @@ struct lpfc_hba {
 	/* pci_mem_pools */
 	struct pci_pool *lpfc_scsi_dma_buf_pool;
 	struct pci_pool *lpfc_mbuf_pool;
-	struct pci_pool *lpfc_hbq_pool;
+	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
+	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
 
 	mempool_t *mbox_mem_pool;
@@ -675,6 +701,14 @@ struct lpfc_hba {
 	struct lpfc_vport *pport;	/* physical lpfc_vport pointer */
 	uint16_t max_vpi;		/* Maximum virtual nports */
 #define LPFC_MAX_VPI 0xFFFF		/* Max number of VPI supported */
+	uint16_t max_vports;            /*
+					 * For IOV HBAs max_vpi can change
+					 * after a reset. max_vports is max
+					 * number of vports present. This can
+					 * be greater than max_vpi.
+					 */
+	uint16_t vpi_base;
+	uint16_t vfi_base;
 	unsigned long *vpi_bmask;	/* vpi allocation table */
 
 	/* Data structure used by fabric iocb scheduler */
@@ -733,6 +767,11 @@ struct lpfc_hba {
 /* Maximum number of events that can be outstanding at any time*/
 #define LPFC_MAX_EVT_COUNT 512
 	atomic_t fast_event_count;
+	struct lpfc_fcf fcf;
+	uint8_t fc_map[3];
+	uint8_t valid_vlan;
+	uint16_t vlan_id;
+	struct list_head fcf_conn_rec_list;
 };
 
 static inline struct Scsi_Host *
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index c14f0cb..82016fc 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -30,8 +30,10 @@
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -828,18 +830,37 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
 		return 0;
 	}
 
-	if (mrpi)
-		*mrpi = pmb->un.varRdConfig.max_rpi;
-	if (arpi)
-		*arpi = pmb->un.varRdConfig.avail_rpi;
-	if (mxri)
-		*mxri = pmb->un.varRdConfig.max_xri;
-	if (axri)
-		*axri = pmb->un.varRdConfig.avail_xri;
-	if (mvpi)
-		*mvpi = pmb->un.varRdConfig.max_vpi;
-	if (avpi)
-		*avpi = pmb->un.varRdConfig.avail_vpi;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rd_config = &pmboxq->u.mqe.un.rd_config;
+		if (mrpi)
+			*mrpi = bf_get(lpfc_mbx_rd_conf_rpi_count, rd_config);
+		if (arpi)
+			*arpi = bf_get(lpfc_mbx_rd_conf_rpi_count, rd_config) -
+					phba->sli4_hba.max_cfg_param.rpi_used;
+		if (mxri)
+			*mxri = bf_get(lpfc_mbx_rd_conf_xri_count, rd_config);
+		if (axri)
+			*axri = bf_get(lpfc_mbx_rd_conf_xri_count, rd_config) -
+					phba->sli4_hba.max_cfg_param.xri_used;
+		if (mvpi)
+			*mvpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config);
+		if (avpi)
+			*avpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) -
+					phba->sli4_hba.max_cfg_param.vpi_used;
+	} else {
+		if (mrpi)
+			*mrpi = pmb->un.varRdConfig.max_rpi;
+		if (arpi)
+			*arpi = pmb->un.varRdConfig.avail_rpi;
+		if (mxri)
+			*mxri = pmb->un.varRdConfig.max_xri;
+		if (axri)
+			*axri = pmb->un.varRdConfig.avail_xri;
+		if (mvpi)
+			*mvpi = pmb->un.varRdConfig.max_vpi;
+		if (avpi)
+			*avpi = pmb->un.varRdConfig.avail_vpi;
+	}
 
 	mempool_free(pmboxq, phba->mbox_mem_pool);
 	return 1;
@@ -2844,15 +2865,39 @@ LPFC_ATTR_RW(poll_tmo, 10, 1, 255,
 /*
 # lpfc_use_msi: Use MSI (Message Signaled Interrupts) in systems that
 #		support this feature
-#       0  = MSI disabled
+#       0  = MSI disabled (default)
 #       1  = MSI enabled
-#       2  = MSI-X enabled (default)
-# Value range is [0,2]. Default value is 2.
+#       2  = MSI-X enabled
+# Value range is [0,2]. Default value is 0.
 */
-LPFC_ATTR_R(use_msi, 2, 0, 2, "Use Message Signaled Interrupts (1) or "
+LPFC_ATTR_R(use_msi, 0, 0, 2, "Use Message Signaled Interrupts (1) or "
 	    "MSI-X (2), if possible");
 
 /*
+# lpfc_fcp_imax: Set the maximum number of fast-path FCP interrupts per second
+#
+# Value range is [636,651042]. Default value is 10000.
+*/
+LPFC_ATTR_R(fcp_imax, LPFC_FP_DEF_IMAX, LPFC_MIM_IMAX, LPFC_DMULT_CONST,
+	    "Set the maximum number of fast-path FCP interrupts per second");
+
+/*
+# lpfc_fcp_wq_count: Set the number of fast-path FCP work queues
+#
+# Value range is [1,31]. Default value is 4.
+*/
+LPFC_ATTR_R(fcp_wq_count, LPFC_FP_WQN_DEF, LPFC_FP_WQN_MIN, LPFC_FP_WQN_MAX,
+	    "Set the number of fast-path FCP work queues, if possible");
+
+/*
+# lpfc_fcp_eq_count: Set the number of fast-path FCP event queues
+#
+# Value range is [1,7]. Default value is 1.
+*/
+LPFC_ATTR_R(fcp_eq_count, LPFC_FP_EQN_DEF, LPFC_FP_EQN_MIN, LPFC_FP_EQN_MAX,
+	    "Set the number of fast-path FCP event queues, if possible");
+
+/*
 # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
 #       0  = HBA resets disabled
 #       1  = HBA resets enabled (default)
@@ -2969,6 +3014,9 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_poll,
 	&dev_attr_lpfc_poll_tmo,
 	&dev_attr_lpfc_use_msi,
+	&dev_attr_lpfc_fcp_imax,
+	&dev_attr_lpfc_fcp_wq_count,
+	&dev_attr_lpfc_fcp_eq_count,
 	&dev_attr_lpfc_enable_bg,
 	&dev_attr_lpfc_soft_wwnn,
 	&dev_attr_lpfc_soft_wwpn,
@@ -4105,6 +4153,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
 	lpfc_enable_npiv_init(phba, lpfc_enable_npiv);
 	lpfc_use_msi_init(phba, lpfc_use_msi);
+	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
+	lpfc_fcp_wq_count_init(phba, lpfc_fcp_wq_count);
+	lpfc_fcp_eq_count_init(phba, lpfc_fcp_eq_count);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
 	lpfc_enable_bg_init(phba, lpfc_enable_bg);
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index f88ce3f..3802e45 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -35,17 +35,19 @@ int lpfc_config_msi(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *, int);
 void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_read_lnk_stat(struct lpfc_hba *, LPFC_MBOXQ_t *);
-int lpfc_reg_login(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *,
-		   LPFC_MBOXQ_t *, uint32_t);
+int lpfc_reg_rpi(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *,
+		 LPFC_MBOXQ_t *, uint32_t);
 void lpfc_unreg_login(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
 void lpfc_unreg_did(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
-void lpfc_reg_vpi(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
+void lpfc_reg_vpi(struct lpfc_vport *, LPFC_MBOXQ_t *);
 void lpfc_unreg_vpi(struct lpfc_hba *, uint16_t, LPFC_MBOXQ_t *);
 void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t);
+void lpfc_request_features(struct lpfc_hba *, struct lpfcMboxq *);
 
 struct lpfc_vport *lpfc_find_vport_by_did(struct lpfc_hba *, uint32_t);
 void lpfc_cleanup_rpis(struct lpfc_vport *, int);
 int lpfc_linkdown(struct lpfc_hba *);
+void lpfc_linkdown_port(struct lpfc_vport *);
 void lpfc_port_link_failure(struct lpfc_vport *);
 void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
 
@@ -54,6 +56,7 @@ void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+void lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_enqueue_node(struct lpfc_vport *, struct lpfc_nodelist *);
 void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *);
 struct lpfc_nodelist *lpfc_enable_node(struct lpfc_vport *,
@@ -149,15 +152,19 @@ int lpfc_online(struct lpfc_hba *);
 void lpfc_unblock_mgmt_io(struct lpfc_hba *);
 void lpfc_offline_prep(struct lpfc_hba *);
 void lpfc_offline(struct lpfc_hba *);
+void lpfc_reset_hba(struct lpfc_hba *);
 
 int lpfc_sli_setup(struct lpfc_hba *);
 int lpfc_sli_queue_setup(struct lpfc_hba *);
 
 void lpfc_handle_eratt(struct lpfc_hba *);
 void lpfc_handle_latt(struct lpfc_hba *);
-irqreturn_t lpfc_intr_handler(int, void *);
-irqreturn_t lpfc_sp_intr_handler(int, void *);
-irqreturn_t lpfc_fp_intr_handler(int, void *);
+irqreturn_t lpfc_sli_intr_handler(int, void *);
+irqreturn_t lpfc_sli_sp_intr_handler(int, void *);
+irqreturn_t lpfc_sli_fp_intr_handler(int, void *);
+irqreturn_t lpfc_sli4_intr_handler(int, void *);
+irqreturn_t lpfc_sli4_sp_intr_handler(int, void *);
+irqreturn_t lpfc_sli4_fp_intr_handler(int, void *);
 
 void lpfc_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_config_ring(struct lpfc_hba *, int, LPFC_MBOXQ_t *);
@@ -165,16 +172,32 @@ void lpfc_config_port(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
 LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
+void __lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_mbox_cmd_check(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_mbox_dev_check(struct lpfc_hba *);
 int lpfc_mbox_tmo_val(struct lpfc_hba *, int);
+void lpfc_init_vfi(struct lpfcMboxq *, struct lpfc_vport *);
+void lpfc_reg_vfi(struct lpfcMboxq *, struct lpfc_vport *, dma_addr_t);
+void lpfc_init_vpi(struct lpfcMboxq *, uint16_t);
+void lpfc_unreg_vfi(struct lpfcMboxq *, uint16_t);
+void lpfc_reg_fcfi(struct lpfc_hba *, struct lpfcMboxq *);
+void lpfc_unreg_fcfi(struct lpfcMboxq *, uint16_t);
+void lpfc_resume_rpi(struct lpfcMboxq *, struct lpfc_nodelist *);
 
 void lpfc_config_hbq(struct lpfc_hba *, uint32_t, struct lpfc_hbq_init *,
 	uint32_t , LPFC_MBOXQ_t *);
 struct hbq_dmabuf *lpfc_els_hbq_alloc(struct lpfc_hba *);
 void lpfc_els_hbq_free(struct lpfc_hba *, struct hbq_dmabuf *);
+struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
+void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
+void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
+			uint16_t);
+void lpfc_unregister_unused_fcf(struct lpfc_hba *);
 
-int lpfc_mem_alloc(struct lpfc_hba *);
+int lpfc_mem_alloc(struct lpfc_hba *, int align);
 void lpfc_mem_free(struct lpfc_hba *);
+void lpfc_mem_free_all(struct lpfc_hba *);
 void lpfc_stop_vport_timers(struct lpfc_vport *);
 
 void lpfc_poll_timeout(unsigned long ptr);
@@ -198,12 +221,13 @@ int lpfc_sli_host_down(struct lpfc_vport *);
 int lpfc_sli_hba_down(struct lpfc_hba *);
 int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
 int lpfc_sli_handle_mb_event(struct lpfc_hba *);
-int lpfc_sli_flush_mbox_queue(struct lpfc_hba *);
+void lpfc_sli_mbox_sys_shutdown(struct lpfc_hba *);
 int lpfc_sli_check_eratt(struct lpfc_hba *);
-int lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
+void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *,
 				    struct lpfc_sli_ring *, uint32_t);
+int lpfc_sli4_handle_received_buffer(struct lpfc_hba *);
 void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
-int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *,
+int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 			struct lpfc_iocbq *, uint32_t);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
@@ -237,7 +261,7 @@ struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_vport *,
 
 int lpfc_sli_issue_mbox_wait(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
 
-int lpfc_sli_issue_iocb_wait(struct lpfc_hba *, struct lpfc_sli_ring *,
+int lpfc_sli_issue_iocb_wait(struct lpfc_hba *, uint32_t,
 			     struct lpfc_iocbq *, struct lpfc_iocbq *,
 			     uint32_t);
 void lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *, struct lpfc_iocbq *,
@@ -254,6 +278,12 @@ void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
 const char* lpfc_info(struct Scsi_Host *);
 int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
 
+int lpfc_init_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_sli_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_scsi_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_mbox_api_table_setup(struct lpfc_hba *, uint8_t);
+int lpfc_api_table_setup(struct lpfc_hba *, uint8_t);
+
 void lpfc_get_cfgparam(struct lpfc_hba *);
 void lpfc_get_vport_cfgparam(struct lpfc_vport *);
 int lpfc_alloc_sysfs_attr(struct lpfc_vport *);
@@ -314,8 +344,15 @@ lpfc_send_els_failure_event(struct lpfc_hba *, struct lpfc_iocbq *,
 				struct lpfc_iocbq *);
 struct lpfc_fast_path_event *lpfc_alloc_fast_evt(struct lpfc_hba *);
 void lpfc_free_fast_evt(struct lpfc_hba *, struct lpfc_fast_path_event *);
+void lpfc_create_static_vport(struct lpfc_hba *);
+void lpfc_stop_hba_timers(struct lpfc_hba *);
+void lpfc_stop_port(struct lpfc_hba *);
+void lpfc_parse_fcoe_conf(struct lpfc_hba *, uint8_t *, uint32_t);
+int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int);
+void lpfc_start_fdiscs(struct lpfc_hba *phba);
 
 #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
 #define HBA_EVENT_RSCN                   5
 #define HBA_EVENT_LINK_UP                2
 #define HBA_EVENT_LINK_DOWN              3
+
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 4164b93..5199078 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -32,8 +32,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 5dd6692..42ef258 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -33,8 +33,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 8c5c3ae..9fe36bf 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -28,8 +28,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -220,7 +222,7 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp,
 		icmd->un.elsreq64.myID = vport->fc_myDID;
 
 		/* For ELS_REQUEST64_CR, use the VPI by default */
-		icmd->ulpContext = vport->vpi;
+		icmd->ulpContext = vport->vpi + phba->vpi_base;
 		icmd->ulpCt_h = 0;
 		/* The CT field must be 0=INVALID_RPI for the ECHO cmd */
 		if (elscmd == ELS_CMD_ECHO)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 25fc96c..0fc6600 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -29,10 +29,12 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_scsi.h"
 #include "lpfc.h"
 #include "lpfc_logmsg.h"
@@ -491,6 +493,10 @@ lpfc_work_done(struct lpfc_hba *phba)
 	phba->work_ha = 0;
 	spin_unlock_irq(&phba->hbalock);
 
+	/* First, try to post the next mailbox command to SLI4 device */
+	if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
+		lpfc_sli4_post_async_mbox(phba);
+
 	if (ha_copy & HA_ERATT)
 		/* Handle the error attention event */
 		lpfc_handle_eratt(phba);
@@ -501,9 +507,27 @@ lpfc_work_done(struct lpfc_hba *phba)
 	if (ha_copy & HA_LATT)
 		lpfc_handle_latt(phba);
 
+	/* Process SLI4 events */
+	if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) {
+		if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
+			lpfc_sli4_fcp_xri_abort_event_proc(phba);
+		if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
+			lpfc_sli4_els_xri_abort_event_proc(phba);
+		if (phba->hba_flag & ASYNC_EVENT)
+			lpfc_sli4_async_event_proc(phba);
+		if (phba->hba_flag & HBA_POST_RECEIVE_BUFFER) {
+			spin_lock_irq(&phba->hbalock);
+			phba->hba_flag &= ~HBA_POST_RECEIVE_BUFFER;
+			spin_unlock_irq(&phba->hbalock);
+			lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ);
+		}
+		if (phba->hba_flag & HBA_RECEIVE_BUFFER)
+			lpfc_sli4_handle_received_buffer(phba);
+	}
+
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi; i++) {
+		for (i = 0; i <= phba->max_vports; i++) {
 			/*
 			 * We could have no vports in array if unloading, so if
 			 * this happens then just use the pport
@@ -2556,7 +2580,8 @@ lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
 	 * clear_la then don't send it.
 	 */
 	if ((phba->link_state >= LPFC_CLEAR_LA) ||
-	    (vport->port_type != LPFC_PHYSICAL_PORT))
+	    (vport->port_type != LPFC_PHYSICAL_PORT) ||
+		(phba->sli_rev == LPFC_SLI_REV4))
 		return;
 
 			/* Link up discovery */
@@ -2585,7 +2610,7 @@ lpfc_issue_reg_vpi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 
 	regvpimbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (regvpimbox) {
-		lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, regvpimbox);
+		lpfc_reg_vpi(vport, regvpimbox);
 		regvpimbox->mbox_cmpl = lpfc_mbx_cmpl_reg_vpi;
 		regvpimbox->vport = vport;
 		if (lpfc_sli_issue_mbox(phba, regvpimbox, MBX_NOWAIT)
@@ -2645,7 +2670,8 @@ lpfc_disc_start(struct lpfc_vport *vport)
 	 */
 	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
 	    !(vport->fc_flag & FC_PT2PT) &&
-	    !(vport->fc_flag & FC_RSCN_MODE)) {
+	    !(vport->fc_flag & FC_RSCN_MODE) &&
+	    (phba->sli_rev < LPFC_SLI_REV4)) {
 		lpfc_issue_reg_vpi(phba, vport);
 		return;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 4168c7b..a9d64cf 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -471,6 +471,35 @@ struct serv_parm {	/* Structure is in Big Endian format */
 };
 
 /*
+ * Virtual Fabric Tagging Header
+ */
+struct fc_vft_header {
+	 uint32_t word0;
+#define fc_vft_hdr_r_ctl_SHIFT		24
+#define fc_vft_hdr_r_ctl_MASK		0xFF
+#define fc_vft_hdr_r_ctl_WORD		word0
+#define fc_vft_hdr_ver_SHIFT		22
+#define fc_vft_hdr_ver_MASK		0x3
+#define fc_vft_hdr_ver_WORD		word0
+#define fc_vft_hdr_type_SHIFT		18
+#define fc_vft_hdr_type_MASK		0xF
+#define fc_vft_hdr_type_WORD		word0
+#define fc_vft_hdr_e_SHIFT		16
+#define fc_vft_hdr_e_MASK		0x1
+#define fc_vft_hdr_e_WORD		word0
+#define fc_vft_hdr_priority_SHIFT	13
+#define fc_vft_hdr_priority_MASK	0x7
+#define fc_vft_hdr_priority_WORD	word0
+#define fc_vft_hdr_vf_id_SHIFT		1
+#define fc_vft_hdr_vf_id_MASK		0xFFF
+#define fc_vft_hdr_vf_id_WORD		word0
+	uint32_t word1;
+#define fc_vft_hdr_hopct_SHIFT		24
+#define fc_vft_hdr_hopct_MASK		0xFF
+#define fc_vft_hdr_hopct_WORD		word1
+};
+
+/*
  *  Extended Link Service LS_COMMAND codes (Payload Word 0)
  */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -1152,6 +1181,9 @@ typedef struct {
 #define PCI_DEVICE_ID_HORNET        0xfe05
 #define PCI_DEVICE_ID_ZEPHYR_SCSP   0xfe11
 #define PCI_DEVICE_ID_ZEPHYR_DCSP   0xfe12
+#define PCI_VENDOR_ID_SERVERENGINE  0x19a2
+#define PCI_DEVICE_ID_TIGERSHARK    0x0704
+#define PCI_DEVICE_ID_TIGERSHARK_S  0x0705
 
 #define JEDEC_ID_ADDRESS            0x0080001c
 #define FIREFLY_JEDEC_ID            0x1ACC
@@ -1342,15 +1374,21 @@ typedef struct {		/* FireFly BIU registers */
 #define MBX_READ_LA64       0x95
 #define MBX_REG_VPI	    0x96
 #define MBX_UNREG_VPI	    0x97
-#define MBX_REG_VNPID	    0x96
-#define MBX_UNREG_VNPID	    0x97
 
 #define MBX_WRITE_WWN       0x98
 #define MBX_SET_DEBUG       0x99
 #define MBX_LOAD_EXP_ROM    0x9C
-
-#define MBX_MAX_CMDS        0x9D
+#define MBX_SLI4_CONFIG	    0x9B
+#define MBX_SLI4_REQ_FTRS   0x9D
+#define MBX_MAX_CMDS        0x9E
+#define MBX_RESUME_RPI      0x9E
 #define MBX_SLI2_CMD_MASK   0x80
+#define MBX_REG_VFI         0x9F
+#define MBX_REG_FCFI        0xA0
+#define MBX_UNREG_VFI       0xA1
+#define MBX_UNREG_FCFI	    0xA2
+#define MBX_INIT_VFI        0xA3
+#define MBX_INIT_VPI        0xA4
 
 /* IOCB Commands */
 
@@ -1440,6 +1478,16 @@ typedef struct {		/* FireFly BIU registers */
 #define CMD_IOCB_LOGENTRY_CN		0x94
 #define CMD_IOCB_LOGENTRY_ASYNC_CN	0x96
 
+/* Unhandled Data Security SLI Commands */
+#define DSSCMD_IWRITE64_CR 		0xD8
+#define DSSCMD_IWRITE64_CX		0xD9
+#define DSSCMD_IREAD64_CR		0xDA
+#define DSSCMD_IREAD64_CX		0xDB
+#define DSSCMD_INVALIDATE_DEK		0xDC
+#define DSSCMD_SET_KEK			0xDD
+#define DSSCMD_GET_KEK_ID		0xDE
+#define DSSCMD_GEN_XFER			0xDF
+
 #define CMD_MAX_IOCB_CMD        0xE6
 #define CMD_IOCB_MASK           0xff
 
@@ -1466,6 +1514,7 @@ typedef struct {		/* FireFly BIU registers */
 #define MBXERR_BAD_RCV_LENGTH       14
 #define MBXERR_DMA_ERROR            15
 #define MBXERR_ERROR                16
+#define MBXERR_LINK_DOWN            0x33
 #define MBX_NOT_FINISHED           255
 
 #define MBX_BUSY                   0xffffff /* Attempted cmd to busy Mailbox */
@@ -1504,32 +1553,6 @@ struct ulp_bde {
 #endif
 };
 
-struct ulp_bde64 {	/* SLI-2 */
-	union ULP_BDE_TUS {
-		uint32_t w;
-		struct {
-#ifdef __BIG_ENDIAN_BITFIELD
-			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
-						   VALUE !! */
-			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
-#else	/*  __LITTLE_ENDIAN_BITFIELD */
-			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
-			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
-						   VALUE !! */
-#endif
-#define BUFF_TYPE_BDE_64    0x00	/* BDE (Host_resident) */
-#define BUFF_TYPE_BDE_IMMED 0x01	/* Immediate Data BDE */
-#define BUFF_TYPE_BDE_64P   0x02	/* BDE (Port-resident) */
-#define BUFF_TYPE_BDE_64I   0x08	/* Input BDE (Host-resident) */
-#define BUFF_TYPE_BDE_64IP  0x0A	/* Input BDE (Port-resident) */
-#define BUFF_TYPE_BLP_64    0x40	/* BLP (Host-resident) */
-#define BUFF_TYPE_BLP_64P   0x42	/* BLP (Port-resident) */
-		} f;
-	} tus;
-	uint32_t addrLow;
-	uint32_t addrHigh;
-};
-
 typedef struct ULP_BDL {	/* SLI-2 */
 #ifdef __BIG_ENDIAN_BITFIELD
 	uint32_t bdeFlags:8;	/* BDL Flags */
@@ -2287,7 +2310,7 @@ typedef struct {
 	uint32_t rsvd3;
 	uint32_t rsvd4;
 	uint32_t rsvd5;
-	uint16_t rsvd6;
+	uint16_t vfi;
 	uint16_t vpi;
 #else	/*  __LITTLE_ENDIAN */
 	uint32_t rsvd1;
@@ -2297,7 +2320,7 @@ typedef struct {
 	uint32_t rsvd4;
 	uint32_t rsvd5;
 	uint16_t vpi;
-	uint16_t rsvd6;
+	uint16_t vfi;
 #endif
 } REG_VPI_VAR;
 
@@ -2457,7 +2480,7 @@ typedef struct {
 	uint32_t entry_index:16;
 #endif
 
-	uint32_t rsvd1;
+	uint32_t sli4_length;
 	uint32_t word_cnt;
 	uint32_t resp_offset;
 } DUMP_VAR;
@@ -2470,9 +2493,32 @@ typedef struct {
 #define  DMP_RSP_OFFSET          0x14   /* word 5 contains first word of rsp */
 #define  DMP_RSP_SIZE            0x6C   /* maximum of 27 words of rsp data */
 
+#define  DMP_REGION_VPORT	 0x16   /* VPort info region */
+#define  DMP_VPORT_REGION_SIZE	 0x200
+#define  DMP_MBOX_OFFSET_WORD	 0x5
+
+#define  DMP_REGION_FCOEPARAM	 0x17   /* fcoe param region */
+#define  DMP_FCOEPARAM_RGN_SIZE	 0x400
+
 #define  WAKE_UP_PARMS_REGION_ID    4
 #define  WAKE_UP_PARMS_WORD_SIZE   15
 
+struct vport_rec {
+	uint8_t wwpn[8];
+	uint8_t wwnn[8];
+};
+
+#define VPORT_INFO_SIG 0x32324752
+#define VPORT_INFO_REV_MASK 0xff
+#define VPORT_INFO_REV 0x1
+#define MAX_STATIC_VPORT_COUNT 16
+struct static_vport_info {
+	uint32_t 		signature;
+	uint32_t		rev;
+	struct vport_rec 	vport_list[MAX_STATIC_VPORT_COUNT];
+	uint32_t		resvd[66];
+};
+
 /* Option rom version structure */
 struct prog_id {
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -2697,7 +2743,9 @@ typedef struct {
 #endif
 
 #ifdef __BIG_ENDIAN_BITFIELD
-	uint32_t rsvd1     : 23;  /* Reserved                             */
+	uint32_t rsvd1     : 19;  /* Reserved                             */
+	uint32_t cdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd2     :  3;  /* Reserved                             */
 	uint32_t cbg       :  1;  /* Configure BlockGuard                 */
 	uint32_t cmv       :  1;  /* Configure Max VPIs                   */
 	uint32_t ccrp      :  1;  /* Config Command Ring Polling          */
@@ -2717,10 +2765,14 @@ typedef struct {
 	uint32_t ccrp      :  1;  /* Config Command Ring Polling          */
 	uint32_t cmv	   :  1;  /* Configure Max VPIs                   */
 	uint32_t cbg       :  1;  /* Configure BlockGuard                 */
-	uint32_t rsvd1     : 23;  /* Reserved                             */
+	uint32_t rsvd2     :  3;  /* Reserved                             */
+	uint32_t cdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd1     : 19;  /* Reserved                             */
 #endif
 #ifdef __BIG_ENDIAN_BITFIELD
-	uint32_t rsvd2     : 23;  /* Reserved                             */
+	uint32_t rsvd3     : 19;  /* Reserved                             */
+	uint32_t gdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd4     :  3;  /* Reserved                             */
 	uint32_t gbg       :  1;  /* Grant BlockGuard                     */
 	uint32_t gmv	   :  1;  /* Grant Max VPIs                       */
 	uint32_t gcrp	   :  1;  /* Grant Command Ring Polling           */
@@ -2740,7 +2792,9 @@ typedef struct {
 	uint32_t gcrp	   :  1;  /* Grant Command Ring Polling           */
 	uint32_t gmv	   :  1;  /* Grant Max VPIs                       */
 	uint32_t gbg       :  1;  /* Grant BlockGuard                     */
-	uint32_t rsvd2     : 23;  /* Reserved                             */
+	uint32_t rsvd4     :  3;  /* Reserved                             */
+	uint32_t gdss      :  1;  /* Configure Data Security SLI          */
+	uint32_t rsvd3     : 19;  /* Reserved                             */
 #endif
 
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -2753,20 +2807,20 @@ typedef struct {
 
 #ifdef __BIG_ENDIAN_BITFIELD
 	uint32_t max_hbq   : 16;  /* Max HBQs Host expect to configure    */
-	uint32_t rsvd3     : 16;  /* Max HBQs Host expect to configure    */
+	uint32_t rsvd5     : 16;  /* Max HBQs Host expect to configure    */
 #else	/*  __LITTLE_ENDIAN */
-	uint32_t rsvd3     : 16;  /* Max HBQs Host expect to configure    */
+	uint32_t rsvd5     : 16;  /* Max HBQs Host expect to configure    */
 	uint32_t max_hbq   : 16;  /* Max HBQs Host expect to configure    */
 #endif
 
-	uint32_t rsvd4;           /* Reserved                             */
+	uint32_t rsvd6;           /* Reserved                             */
 
 #ifdef __BIG_ENDIAN_BITFIELD
-	uint32_t rsvd5      : 16;  /* Reserved                             */
+	uint32_t rsvd7      : 16;  /* Reserved                             */
 	uint32_t max_vpi    : 16;  /* Max number of virt N-Ports           */
 #else	/*  __LITTLE_ENDIAN */
 	uint32_t max_vpi    : 16;  /* Max number of virt N-Ports           */
-	uint32_t rsvd5      : 16;  /* Reserved                             */
+	uint32_t rsvd7      : 16;  /* Reserved                             */
 #endif
 
 } CONFIG_PORT_VAR;
@@ -3666,3 +3720,5 @@ lpfc_error_lost_link(IOCB_t *iocbp)
 #define MENLO_TIMEOUT 30
 #define SETVAR_MLOMNT 0x103107
 #define SETVAR_MLORST 0x103007
+
+#define BPL_ALIGN_SZ 8 /* 8 byte alignment for bpl and mbufs */
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
new file mode 100644
index 0000000..39c34b3
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -0,0 +1,2141 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2009 Emulex.  All rights reserved.                *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *******************************************************************/
+
+/* Macros to deal with bit fields. Each bit field must have 3 #defines
+ * associated with it (_SHIFT, _MASK, and _WORD).
+ * EG. For a bit field that is in the 7th bit of the "field4" field of a
+ * structure and is 2 bits in size the following #defines must exist:
+ *	struct temp {
+ *		uint32_t	field1;
+ *		uint32_t	field2;
+ *		uint32_t	field3;
+ *		uint32_t	field4;
+ *	#define example_bit_field_SHIFT		7
+ *	#define example_bit_field_MASK		0x03
+ *	#define example_bit_field_WORD		field4
+ *		uint32_t	field5;
+ *	};
+ * Then the macros below may be used to get or set the value of that field.
+ * EG. To get the value of the bit field from the above example:
+ *	struct temp t1;
+ *	value = bf_get(example_bit_field, &t1);
+ * And then to set that bit field:
+ *	bf_set(example_bit_field, &t1, 2);
+ * Or clear that bit field:
+ *	bf_set(example_bit_field, &t1, 0);
+ */
+#define bf_get(name, ptr) \
+	(((ptr)->name##_WORD >> name##_SHIFT) & name##_MASK)
+#define bf_set(name, ptr, value) \
+	((ptr)->name##_WORD = ((((value) & name##_MASK) << name##_SHIFT) | \
+		 ((ptr)->name##_WORD & ~(name##_MASK << name##_SHIFT))))
+
+struct dma_address {
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+};
+
+#define LPFC_SLI4_BAR0		1
+#define LPFC_SLI4_BAR1		2
+#define LPFC_SLI4_BAR2		4
+
+#define LPFC_SLI4_MBX_EMBED	true
+#define LPFC_SLI4_MBX_NEMBED	false
+
+#define LPFC_SLI4_MB_WORD_COUNT		64
+#define LPFC_MAX_MQ_PAGE		8
+#define LPFC_MAX_WQ_PAGE		8
+#define LPFC_MAX_CQ_PAGE		4
+#define LPFC_MAX_EQ_PAGE		8
+
+#define LPFC_VIR_FUNC_MAX       32 /* Maximum number of virtual functions */
+#define LPFC_PCI_FUNC_MAX        5 /* Maximum number of PCI functions */
+#define LPFC_VFR_PAGE_SIZE	0x1000 /* 4KB BAR2 per-VF register page size */
+
+/* Define SLI4 Alignment requirements. */
+#define LPFC_ALIGN_16_BYTE	16
+#define LPFC_ALIGN_64_BYTE	64
+
+/* Define SLI4 specific definitions. */
+#define LPFC_MQ_CQE_BYTE_OFFSET	256
+#define LPFC_MBX_CMD_HDR_LENGTH 16
+#define LPFC_MBX_ERROR_RANGE	0x4000
+#define LPFC_BMBX_BIT1_ADDR_HI	0x2
+#define LPFC_BMBX_BIT1_ADDR_LO	0
+#define LPFC_RPI_HDR_COUNT	64
+#define LPFC_HDR_TEMPLATE_SIZE	4096
+#define LPFC_RPI_ALLOC_ERROR 	0xFFFF
+#define LPFC_FCF_RECORD_WD_CNT	132
+#define LPFC_ENTIRE_FCF_DATABASE 0
+#define LPFC_DFLT_FCF_INDEX	 0
+
+/* Virtual function numbers */
+#define LPFC_VF0		0
+#define LPFC_VF1		1
+#define LPFC_VF2		2
+#define LPFC_VF3		3
+#define LPFC_VF4		4
+#define LPFC_VF5		5
+#define LPFC_VF6		6
+#define LPFC_VF7		7
+#define LPFC_VF8		8
+#define LPFC_VF9		9
+#define LPFC_VF10		10
+#define LPFC_VF11		11
+#define LPFC_VF12		12
+#define LPFC_VF13		13
+#define LPFC_VF14		14
+#define LPFC_VF15		15
+#define LPFC_VF16		16
+#define LPFC_VF17		17
+#define LPFC_VF18		18
+#define LPFC_VF19		19
+#define LPFC_VF20		20
+#define LPFC_VF21		21
+#define LPFC_VF22		22
+#define LPFC_VF23		23
+#define LPFC_VF24		24
+#define LPFC_VF25		25
+#define LPFC_VF26		26
+#define LPFC_VF27		27
+#define LPFC_VF28		28
+#define LPFC_VF29		29
+#define LPFC_VF30		30
+#define LPFC_VF31		31
+
+/* PCI function numbers */
+#define LPFC_PCI_FUNC0		0
+#define LPFC_PCI_FUNC1		1
+#define LPFC_PCI_FUNC2		2
+#define LPFC_PCI_FUNC3		3
+#define LPFC_PCI_FUNC4		4
+
+/* Active interrupt test count */
+#define LPFC_ACT_INTR_CNT	4
+
+/* Delay Multiplier constant */
+#define LPFC_DMULT_CONST       651042
+#define LPFC_MIM_IMAX          636
+#define LPFC_FP_DEF_IMAX       10000
+#define LPFC_SP_DEF_IMAX       10000
+
+struct ulp_bde64 {
+	union ULP_BDE_TUS {
+		uint32_t w;
+		struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
+						   VALUE !! */
+			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
+#else	/*  __LITTLE_ENDIAN_BITFIELD */
+			uint32_t bdeSize:24;	/* Size of buffer (in bytes) */
+			uint32_t bdeFlags:8;	/* BDE Flags 0 IS A SUPPORTED
+						   VALUE !! */
+#endif
+#define BUFF_TYPE_BDE_64    0x00	/* BDE (Host_resident) */
+#define BUFF_TYPE_BDE_IMMED 0x01	/* Immediate Data BDE */
+#define BUFF_TYPE_BDE_64P   0x02	/* BDE (Port-resident) */
+#define BUFF_TYPE_BDE_64I   0x08	/* Input BDE (Host-resident) */
+#define BUFF_TYPE_BDE_64IP  0x0A	/* Input BDE (Port-resident) */
+#define BUFF_TYPE_BLP_64    0x40	/* BLP (Host-resident) */
+#define BUFF_TYPE_BLP_64P   0x42	/* BLP (Port-resident) */
+		} f;
+	} tus;
+	uint32_t addrLow;
+	uint32_t addrHigh;
+};
+
+struct lpfc_sli4_flags {
+	uint32_t word0;
+#define lpfc_fip_flag_SHIFT 0
+#define lpfc_fip_flag_MASK 0x00000001
+#define lpfc_fip_flag_WORD word0
+};
+
+/* event queue entry structure */
+struct lpfc_eqe {
+	uint32_t word0;
+#define lpfc_eqe_resource_id_SHIFT	16
+#define lpfc_eqe_resource_id_MASK	0x000000FF
+#define lpfc_eqe_resource_id_WORD	word0
+#define lpfc_eqe_minor_code_SHIFT	4
+#define lpfc_eqe_minor_code_MASK	0x00000FFF
+#define lpfc_eqe_minor_code_WORD	word0
+#define lpfc_eqe_major_code_SHIFT	1
+#define lpfc_eqe_major_code_MASK	0x00000007
+#define lpfc_eqe_major_code_WORD	word0
+#define lpfc_eqe_valid_SHIFT		0
+#define lpfc_eqe_valid_MASK		0x00000001
+#define lpfc_eqe_valid_WORD		word0
+};
+
+/* completion queue entry structure (common fields for all cqe types) */
+struct lpfc_cqe {
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t reserved2;
+	uint32_t word3;
+#define lpfc_cqe_valid_SHIFT		31
+#define lpfc_cqe_valid_MASK		0x00000001
+#define lpfc_cqe_valid_WORD		word3
+#define lpfc_cqe_code_SHIFT		16
+#define lpfc_cqe_code_MASK		0x000000FF
+#define lpfc_cqe_code_WORD		word3
+};
+
+/* Completion Queue Entry Status Codes */
+#define CQE_STATUS_SUCCESS		0x0
+#define CQE_STATUS_FCP_RSP_FAILURE	0x1
+#define CQE_STATUS_REMOTE_STOP		0x2
+#define CQE_STATUS_LOCAL_REJECT		0x3
+#define CQE_STATUS_NPORT_RJT		0x4
+#define CQE_STATUS_FABRIC_RJT		0x5
+#define CQE_STATUS_NPORT_BSY		0x6
+#define CQE_STATUS_FABRIC_BSY		0x7
+#define CQE_STATUS_INTERMED_RSP		0x8
+#define CQE_STATUS_LS_RJT		0x9
+#define CQE_STATUS_CMD_REJECT		0xb
+#define CQE_STATUS_FCP_TGT_LENCHECK	0xc
+#define CQE_STATUS_NEED_BUFF_ENTRY	0xf
+
+/* Status returned by hardware (valid only if status = CQE_STATUS_SUCCESS). */
+#define CQE_HW_STATUS_NO_ERR		0x0
+#define CQE_HW_STATUS_UNDERRUN		0x1
+#define CQE_HW_STATUS_OVERRUN		0x2
+
+/* Completion Queue Entry Codes */
+#define CQE_CODE_COMPL_WQE		0x1
+#define CQE_CODE_RELEASE_WQE		0x2
+#define CQE_CODE_RECEIVE		0x4
+#define CQE_CODE_XRI_ABORTED		0x5
+
+/* completion queue entry for wqe completions */
+struct lpfc_wcqe_complete {
+	uint32_t word0;
+#define lpfc_wcqe_c_request_tag_SHIFT	16
+#define lpfc_wcqe_c_request_tag_MASK	0x0000FFFF
+#define lpfc_wcqe_c_request_tag_WORD	word0
+#define lpfc_wcqe_c_status_SHIFT	8
+#define lpfc_wcqe_c_status_MASK		0x000000FF
+#define lpfc_wcqe_c_status_WORD		word0
+#define lpfc_wcqe_c_hw_status_SHIFT	0
+#define lpfc_wcqe_c_hw_status_MASK	0x000000FF
+#define lpfc_wcqe_c_hw_status_WORD	word0
+	uint32_t total_data_placed;
+	uint32_t parameter;
+	uint32_t word3;
+#define lpfc_wcqe_c_valid_SHIFT		lpfc_cqe_valid_SHIFT
+#define lpfc_wcqe_c_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_wcqe_c_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_wcqe_c_xb_SHIFT		28
+#define lpfc_wcqe_c_xb_MASK		0x00000001
+#define lpfc_wcqe_c_xb_WORD		word3
+#define lpfc_wcqe_c_pv_SHIFT		27
+#define lpfc_wcqe_c_pv_MASK		0x00000001
+#define lpfc_wcqe_c_pv_WORD		word3
+#define lpfc_wcqe_c_priority_SHIFT	24
+#define lpfc_wcqe_c_priority_MASK		0x00000007
+#define lpfc_wcqe_c_priority_WORD		word3
+#define lpfc_wcqe_c_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_wcqe_c_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_wcqe_c_code_WORD		lpfc_cqe_code_WORD
+};
+
+/* completion queue entry for wqe release */
+struct lpfc_wcqe_release {
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t word2;
+#define lpfc_wcqe_r_wq_id_SHIFT		16
+#define lpfc_wcqe_r_wq_id_MASK		0x0000FFFF
+#define lpfc_wcqe_r_wq_id_WORD		word2
+#define lpfc_wcqe_r_wqe_index_SHIFT	0
+#define lpfc_wcqe_r_wqe_index_MASK	0x0000FFFF
+#define lpfc_wcqe_r_wqe_index_WORD	word2
+	uint32_t word3;
+#define lpfc_wcqe_r_valid_SHIFT		lpfc_cqe_valid_SHIFT
+#define lpfc_wcqe_r_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_wcqe_r_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_wcqe_r_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_wcqe_r_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_wcqe_r_code_WORD		lpfc_cqe_code_WORD
+};
+
+struct sli4_wcqe_xri_aborted {
+	uint32_t word0;
+#define lpfc_wcqe_xa_status_SHIFT		8
+#define lpfc_wcqe_xa_status_MASK		0x000000FF
+#define lpfc_wcqe_xa_status_WORD		word0
+	uint32_t parameter;
+	uint32_t word2;
+#define lpfc_wcqe_xa_remote_xid_SHIFT	16
+#define lpfc_wcqe_xa_remote_xid_MASK	0x0000FFFF
+#define lpfc_wcqe_xa_remote_xid_WORD	word2
+#define lpfc_wcqe_xa_xri_SHIFT		0
+#define lpfc_wcqe_xa_xri_MASK		0x0000FFFF
+#define lpfc_wcqe_xa_xri_WORD		word2
+	uint32_t word3;
+#define lpfc_wcqe_xa_valid_SHIFT	lpfc_cqe_valid_SHIFT
+#define lpfc_wcqe_xa_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_wcqe_xa_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_wcqe_xa_ia_SHIFT		30
+#define lpfc_wcqe_xa_ia_MASK		0x00000001
+#define lpfc_wcqe_xa_ia_WORD		word3
+#define CQE_XRI_ABORTED_IA_REMOTE	0
+#define CQE_XRI_ABORTED_IA_LOCAL	1
+#define lpfc_wcqe_xa_br_SHIFT		29
+#define lpfc_wcqe_xa_br_MASK		0x00000001
+#define lpfc_wcqe_xa_br_WORD		word3
+#define CQE_XRI_ABORTED_BR_BA_ACC	0
+#define CQE_XRI_ABORTED_BR_BA_RJT	1
+#define lpfc_wcqe_xa_eo_SHIFT		28
+#define lpfc_wcqe_xa_eo_MASK		0x00000001
+#define lpfc_wcqe_xa_eo_WORD		word3
+#define CQE_XRI_ABORTED_EO_REMOTE	0
+#define CQE_XRI_ABORTED_EO_LOCAL	1
+#define lpfc_wcqe_xa_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_wcqe_xa_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_wcqe_xa_code_WORD		lpfc_cqe_code_WORD
+};
+
+/* completion queue entry structure for rqe completion */
+struct lpfc_rcqe {
+	uint32_t word0;
+#define lpfc_rcqe_bindex_SHIFT		16
+#define lpfc_rcqe_bindex_MASK		0x0000FFF
+#define lpfc_rcqe_bindex_WORD		word0
+#define lpfc_rcqe_status_SHIFT		8
+#define lpfc_rcqe_status_MASK		0x000000FF
+#define lpfc_rcqe_status_WORD		word0
+#define FC_STATUS_RQ_SUCCESS		0x10 /* Async receive successful */
+#define FC_STATUS_RQ_BUF_LEN_EXCEEDED 	0x11 /* payload truncated */
+#define FC_STATUS_INSUFF_BUF_NEED_BUF 	0x12 /* Insufficient buffers */
+#define FC_STATUS_INSUFF_BUF_FRM_DISC 	0x13 /* Frame Discard */
+	uint32_t reserved1;
+	uint32_t word2;
+#define lpfc_rcqe_length_SHIFT		16
+#define lpfc_rcqe_length_MASK		0x0000FFFF
+#define lpfc_rcqe_length_WORD		word2
+#define lpfc_rcqe_rq_id_SHIFT		6
+#define lpfc_rcqe_rq_id_MASK		0x000003FF
+#define lpfc_rcqe_rq_id_WORD		word2
+#define lpfc_rcqe_fcf_id_SHIFT		0
+#define lpfc_rcqe_fcf_id_MASK		0x0000003F
+#define lpfc_rcqe_fcf_id_WORD		word2
+	uint32_t word3;
+#define lpfc_rcqe_valid_SHIFT		lpfc_cqe_valid_SHIFT
+#define lpfc_rcqe_valid_MASK		lpfc_cqe_valid_MASK
+#define lpfc_rcqe_valid_WORD		lpfc_cqe_valid_WORD
+#define lpfc_rcqe_port_SHIFT		30
+#define lpfc_rcqe_port_MASK		0x00000001
+#define lpfc_rcqe_port_WORD		word3
+#define lpfc_rcqe_hdr_length_SHIFT	24
+#define lpfc_rcqe_hdr_length_MASK	0x0000001F
+#define lpfc_rcqe_hdr_length_WORD	word3
+#define lpfc_rcqe_code_SHIFT		lpfc_cqe_code_SHIFT
+#define lpfc_rcqe_code_MASK		lpfc_cqe_code_MASK
+#define lpfc_rcqe_code_WORD		lpfc_cqe_code_WORD
+#define lpfc_rcqe_eof_SHIFT		8
+#define lpfc_rcqe_eof_MASK		0x000000FF
+#define lpfc_rcqe_eof_WORD		word3
+#define FCOE_EOFn	0x41
+#define FCOE_EOFt	0x42
+#define FCOE_EOFni	0x49
+#define FCOE_EOFa	0x50
+#define lpfc_rcqe_sof_SHIFT		0
+#define lpfc_rcqe_sof_MASK		0x000000FF
+#define lpfc_rcqe_sof_WORD		word3
+#define FCOE_SOFi2	0x2d
+#define FCOE_SOFi3	0x2e
+#define FCOE_SOFn2	0x35
+#define FCOE_SOFn3	0x36
+};
+
+struct lpfc_wqe_generic{
+	struct ulp_bde64 bde;
+	uint32_t word3;
+	uint32_t word4;
+	uint32_t word5;
+	uint32_t word6;
+#define lpfc_wqe_gen_context_SHIFT	16
+#define lpfc_wqe_gen_context_MASK	0x0000FFFF
+#define lpfc_wqe_gen_context_WORD	word6
+#define lpfc_wqe_gen_xri_SHIFT		0
+#define lpfc_wqe_gen_xri_MASK		0x0000FFFF
+#define lpfc_wqe_gen_xri_WORD		word6
+	uint32_t word7;
+#define lpfc_wqe_gen_lnk_SHIFT		23
+#define lpfc_wqe_gen_lnk_MASK		0x00000001
+#define lpfc_wqe_gen_lnk_WORD		word7
+#define lpfc_wqe_gen_erp_SHIFT		22
+#define lpfc_wqe_gen_erp_MASK		0x00000001
+#define lpfc_wqe_gen_erp_WORD		word7
+#define lpfc_wqe_gen_pu_SHIFT		20
+#define lpfc_wqe_gen_pu_MASK		0x00000003
+#define lpfc_wqe_gen_pu_WORD		word7
+#define lpfc_wqe_gen_class_SHIFT	16
+#define lpfc_wqe_gen_class_MASK		0x00000007
+#define lpfc_wqe_gen_class_WORD		word7
+#define lpfc_wqe_gen_command_SHIFT	8
+#define lpfc_wqe_gen_command_MASK	0x000000FF
+#define lpfc_wqe_gen_command_WORD	word7
+#define lpfc_wqe_gen_status_SHIFT	4
+#define lpfc_wqe_gen_status_MASK	0x0000000F
+#define lpfc_wqe_gen_status_WORD	word7
+#define lpfc_wqe_gen_ct_SHIFT		2
+#define lpfc_wqe_gen_ct_MASK		0x00000007
+#define lpfc_wqe_gen_ct_WORD		word7
+	uint32_t abort_tag;
+	uint32_t word9;
+#define lpfc_wqe_gen_request_tag_SHIFT	0
+#define lpfc_wqe_gen_request_tag_MASK	0x0000FFFF
+#define lpfc_wqe_gen_request_tag_WORD	word9
+	uint32_t word10;
+#define lpfc_wqe_gen_ccp_SHIFT		24
+#define lpfc_wqe_gen_ccp_MASK		0x000000FF
+#define lpfc_wqe_gen_ccp_WORD		word10
+#define lpfc_wqe_gen_ccpe_SHIFT		23
+#define lpfc_wqe_gen_ccpe_MASK		0x00000001
+#define lpfc_wqe_gen_ccpe_WORD		word10
+#define lpfc_wqe_gen_pv_SHIFT		19
+#define lpfc_wqe_gen_pv_MASK		0x00000001
+#define lpfc_wqe_gen_pv_WORD		word10
+#define lpfc_wqe_gen_pri_SHIFT		16
+#define lpfc_wqe_gen_pri_MASK		0x00000007
+#define lpfc_wqe_gen_pri_WORD		word10
+	uint32_t word11;
+#define lpfc_wqe_gen_cq_id_SHIFT	16
+#define lpfc_wqe_gen_cq_id_MASK		0x000003FF
+#define lpfc_wqe_gen_cq_id_WORD		word11
+#define LPFC_WQE_CQ_ID_DEFAULT	0x3ff
+#define lpfc_wqe_gen_wqec_SHIFT		7
+#define lpfc_wqe_gen_wqec_MASK		0x00000001
+#define lpfc_wqe_gen_wqec_WORD		word11
+#define lpfc_wqe_gen_cmd_type_SHIFT	0
+#define lpfc_wqe_gen_cmd_type_MASK	0x0000000F
+#define lpfc_wqe_gen_cmd_type_WORD	word11
+	uint32_t payload[4];
+};
+
+struct lpfc_rqe {
+	uint32_t address_hi;
+	uint32_t address_lo;
+};
+
+/* buffer descriptors */
+struct lpfc_bde4 {
+	uint32_t addr_hi;
+	uint32_t addr_lo;
+	uint32_t word2;
+#define lpfc_bde4_last_SHIFT		31
+#define lpfc_bde4_last_MASK		0x00000001
+#define lpfc_bde4_last_WORD		word2
+#define lpfc_bde4_sge_offset_SHIFT	0
+#define lpfc_bde4_sge_offset_MASK	0x000003FF
+#define lpfc_bde4_sge_offset_WORD	word2
+	uint32_t word3;
+#define lpfc_bde4_length_SHIFT		0
+#define lpfc_bde4_length_MASK		0x000000FF
+#define lpfc_bde4_length_WORD		word3
+};
+
+struct lpfc_register {
+	uint32_t word0;
+};
+
+#define LPFC_UERR_STATUS_HI		0x00A4
+#define LPFC_UERR_STATUS_LO		0x00A0
+#define LPFC_ONLINE0			0x00B0
+#define LPFC_ONLINE1			0x00B4
+#define LPFC_SCRATCHPAD			0x0058
+
+/* BAR0 Registers */
+#define LPFC_HST_STATE			0x00AC
+#define lpfc_hst_state_perr_SHIFT	31
+#define lpfc_hst_state_perr_MASK	0x1
+#define lpfc_hst_state_perr_WORD	word0
+#define lpfc_hst_state_sfi_SHIFT	30
+#define lpfc_hst_state_sfi_MASK		0x1
+#define lpfc_hst_state_sfi_WORD		word0
+#define lpfc_hst_state_nip_SHIFT	29
+#define lpfc_hst_state_nip_MASK		0x1
+#define lpfc_hst_state_nip_WORD		word0
+#define lpfc_hst_state_ipc_SHIFT	28
+#define lpfc_hst_state_ipc_MASK		0x1
+#define lpfc_hst_state_ipc_WORD		word0
+#define lpfc_hst_state_xrom_SHIFT	27
+#define lpfc_hst_state_xrom_MASK	0x1
+#define lpfc_hst_state_xrom_WORD	word0
+#define lpfc_hst_state_dl_SHIFT		26
+#define lpfc_hst_state_dl_MASK		0x1
+#define lpfc_hst_state_dl_WORD		word0
+#define lpfc_hst_state_port_status_SHIFT	0
+#define lpfc_hst_state_port_status_MASK		0xFFFF
+#define lpfc_hst_state_port_status_WORD		word0
+
+#define LPFC_POST_STAGE_POWER_ON_RESET			0x0000
+#define LPFC_POST_STAGE_AWAITING_HOST_RDY		0x0001
+#define LPFC_POST_STAGE_HOST_RDY			0x0002
+#define LPFC_POST_STAGE_BE_RESET			0x0003
+#define LPFC_POST_STAGE_SEEPROM_CS_START		0x0100
+#define LPFC_POST_STAGE_SEEPROM_CS_DONE			0x0101
+#define LPFC_POST_STAGE_DDR_CONFIG_START		0x0200
+#define LPFC_POST_STAGE_DDR_CONFIG_DONE			0x0201
+#define LPFC_POST_STAGE_DDR_CALIBRATE_START		0x0300
+#define LPFC_POST_STAGE_DDR_CALIBRATE_DONE		0x0301
+#define LPFC_POST_STAGE_DDR_TEST_START			0x0400
+#define LPFC_POST_STAGE_DDR_TEST_DONE			0x0401
+#define LPFC_POST_STAGE_REDBOOT_INIT_START		0x0600
+#define LPFC_POST_STAGE_REDBOOT_INIT_DONE		0x0601
+#define LPFC_POST_STAGE_FW_IMAGE_LOAD_START		0x0700
+#define LPFC_POST_STAGE_FW_IMAGE_LOAD_DONE		0x0701
+#define LPFC_POST_STAGE_ARMFW_START			0x0800
+#define LPFC_POST_STAGE_DHCP_QUERY_START		0x0900
+#define LPFC_POST_STAGE_DHCP_QUERY_DONE			0x0901
+#define LPFC_POST_STAGE_BOOT_TARGET_DISCOVERY_START	0x0A00
+#define LPFC_POST_STAGE_BOOT_TARGET_DISCOVERY_DONE	0x0A01
+#define LPFC_POST_STAGE_RC_OPTION_SET			0x0B00
+#define LPFC_POST_STAGE_SWITCH_LINK			0x0B01
+#define LPFC_POST_STAGE_SEND_ICDS_MESSAGE		0x0B02
+#define LPFC_POST_STAGE_PERFROM_TFTP			0x0B03
+#define LPFC_POST_STAGE_PARSE_XML			0x0B04
+#define LPFC_POST_STAGE_DOWNLOAD_IMAGE			0x0B05
+#define LPFC_POST_STAGE_FLASH_IMAGE			0x0B06
+#define LPFC_POST_STAGE_RC_DONE				0x0B07
+#define LPFC_POST_STAGE_REBOOT_SYSTEM			0x0B08
+#define LPFC_POST_STAGE_MAC_ADDRESS			0x0C00
+#define LPFC_POST_STAGE_ARMFW_READY			0xC000
+#define LPFC_POST_STAGE_ARMFW_UE 			0xF000
+
+#define lpfc_scratchpad_slirev_SHIFT			4
+#define lpfc_scratchpad_slirev_MASK			0xF
+#define lpfc_scratchpad_slirev_WORD			word0
+#define lpfc_scratchpad_chiptype_SHIFT			8
+#define lpfc_scratchpad_chiptype_MASK			0xFF
+#define lpfc_scratchpad_chiptype_WORD			word0
+#define lpfc_scratchpad_featurelevel1_SHIFT		16
+#define lpfc_scratchpad_featurelevel1_MASK		0xFF
+#define lpfc_scratchpad_featurelevel1_WORD		word0
+#define lpfc_scratchpad_featurelevel2_SHIFT		24
+#define lpfc_scratchpad_featurelevel2_MASK		0xFF
+#define lpfc_scratchpad_featurelevel2_WORD		word0
+
+/* BAR1 Registers */
+#define LPFC_IMR_MASK_ALL	0xFFFFFFFF
+#define LPFC_ISCR_CLEAR_ALL	0xFFFFFFFF
+
+#define LPFC_HST_ISR0		0x0C18
+#define LPFC_HST_ISR1		0x0C1C
+#define LPFC_HST_ISR2		0x0C20
+#define LPFC_HST_ISR3		0x0C24
+#define LPFC_HST_ISR4		0x0C28
+
+#define LPFC_HST_IMR0		0x0C48
+#define LPFC_HST_IMR1		0x0C4C
+#define LPFC_HST_IMR2		0x0C50
+#define LPFC_HST_IMR3		0x0C54
+#define LPFC_HST_IMR4		0x0C58
+
+#define LPFC_HST_ISCR0		0x0C78
+#define LPFC_HST_ISCR1		0x0C7C
+#define LPFC_HST_ISCR2		0x0C80
+#define LPFC_HST_ISCR3		0x0C84
+#define LPFC_HST_ISCR4		0x0C88
+
+#define LPFC_SLI4_INTR0			BIT0
+#define LPFC_SLI4_INTR1			BIT1
+#define LPFC_SLI4_INTR2			BIT2
+#define LPFC_SLI4_INTR3			BIT3
+#define LPFC_SLI4_INTR4			BIT4
+#define LPFC_SLI4_INTR5			BIT5
+#define LPFC_SLI4_INTR6			BIT6
+#define LPFC_SLI4_INTR7			BIT7
+#define LPFC_SLI4_INTR8			BIT8
+#define LPFC_SLI4_INTR9			BIT9
+#define LPFC_SLI4_INTR10		BIT10
+#define LPFC_SLI4_INTR11		BIT11
+#define LPFC_SLI4_INTR12		BIT12
+#define LPFC_SLI4_INTR13		BIT13
+#define LPFC_SLI4_INTR14		BIT14
+#define LPFC_SLI4_INTR15		BIT15
+#define LPFC_SLI4_INTR16		BIT16
+#define LPFC_SLI4_INTR17		BIT17
+#define LPFC_SLI4_INTR18		BIT18
+#define LPFC_SLI4_INTR19		BIT19
+#define LPFC_SLI4_INTR20		BIT20
+#define LPFC_SLI4_INTR21		BIT21
+#define LPFC_SLI4_INTR22		BIT22
+#define LPFC_SLI4_INTR23		BIT23
+#define LPFC_SLI4_INTR24		BIT24
+#define LPFC_SLI4_INTR25		BIT25
+#define LPFC_SLI4_INTR26		BIT26
+#define LPFC_SLI4_INTR27		BIT27
+#define LPFC_SLI4_INTR28		BIT28
+#define LPFC_SLI4_INTR29		BIT29
+#define LPFC_SLI4_INTR30		BIT30
+#define LPFC_SLI4_INTR31		BIT31
+
+/* BAR2 Registers */
+#define LPFC_RQ_DOORBELL		0x00A0
+#define lpfc_rq_doorbell_num_posted_SHIFT	16
+#define lpfc_rq_doorbell_num_posted_MASK	0x3FFF
+#define lpfc_rq_doorbell_num_posted_WORD	word0
+#define LPFC_RQ_POST_BATCH		8	/* RQEs to post at one time */
+#define lpfc_rq_doorbell_id_SHIFT		0
+#define lpfc_rq_doorbell_id_MASK		0x03FF
+#define lpfc_rq_doorbell_id_WORD		word0
+
+#define LPFC_WQ_DOORBELL		0x0040
+#define lpfc_wq_doorbell_num_posted_SHIFT	24
+#define lpfc_wq_doorbell_num_posted_MASK	0x00FF
+#define lpfc_wq_doorbell_num_posted_WORD	word0
+#define lpfc_wq_doorbell_index_SHIFT		16
+#define lpfc_wq_doorbell_index_MASK		0x00FF
+#define lpfc_wq_doorbell_index_WORD		word0
+#define lpfc_wq_doorbell_id_SHIFT		0
+#define lpfc_wq_doorbell_id_MASK		0xFFFF
+#define lpfc_wq_doorbell_id_WORD		word0
+
+#define LPFC_EQCQ_DOORBELL		0x0120
+#define lpfc_eqcq_doorbell_arm_SHIFT		29
+#define lpfc_eqcq_doorbell_arm_MASK		0x0001
+#define lpfc_eqcq_doorbell_arm_WORD		word0
+#define lpfc_eqcq_doorbell_num_released_SHIFT	16
+#define lpfc_eqcq_doorbell_num_released_MASK	0x1FFF
+#define lpfc_eqcq_doorbell_num_released_WORD	word0
+#define lpfc_eqcq_doorbell_qt_SHIFT		10
+#define lpfc_eqcq_doorbell_qt_MASK		0x0001
+#define lpfc_eqcq_doorbell_qt_WORD		word0
+#define LPFC_QUEUE_TYPE_COMPLETION	0
+#define LPFC_QUEUE_TYPE_EVENT		1
+#define lpfc_eqcq_doorbell_eqci_SHIFT		9
+#define lpfc_eqcq_doorbell_eqci_MASK		0x0001
+#define lpfc_eqcq_doorbell_eqci_WORD		word0
+#define lpfc_eqcq_doorbell_cqid_SHIFT		0
+#define lpfc_eqcq_doorbell_cqid_MASK		0x03FF
+#define lpfc_eqcq_doorbell_cqid_WORD		word0
+#define lpfc_eqcq_doorbell_eqid_SHIFT		0
+#define lpfc_eqcq_doorbell_eqid_MASK		0x01FF
+#define lpfc_eqcq_doorbell_eqid_WORD		word0
+
+#define LPFC_BMBX			0x0160
+#define lpfc_bmbx_addr_SHIFT		2
+#define lpfc_bmbx_addr_MASK		0x3FFFFFFF
+#define lpfc_bmbx_addr_WORD		word0
+#define lpfc_bmbx_hi_SHIFT		1
+#define lpfc_bmbx_hi_MASK		0x0001
+#define lpfc_bmbx_hi_WORD		word0
+#define lpfc_bmbx_rdy_SHIFT		0
+#define lpfc_bmbx_rdy_MASK		0x0001
+#define lpfc_bmbx_rdy_WORD		word0
+
+#define LPFC_MQ_DOORBELL			0x0140
+#define lpfc_mq_doorbell_num_posted_SHIFT	16
+#define lpfc_mq_doorbell_num_posted_MASK	0x3FFF
+#define lpfc_mq_doorbell_num_posted_WORD	word0
+#define lpfc_mq_doorbell_id_SHIFT		0
+#define lpfc_mq_doorbell_id_MASK		0x03FF
+#define lpfc_mq_doorbell_id_WORD		word0
+
+struct lpfc_sli4_cfg_mhdr {
+	uint32_t word1;
+#define lpfc_mbox_hdr_emb_SHIFT		0
+#define lpfc_mbox_hdr_emb_MASK		0x00000001
+#define lpfc_mbox_hdr_emb_WORD		word1
+#define lpfc_mbox_hdr_sge_cnt_SHIFT	3
+#define lpfc_mbox_hdr_sge_cnt_MASK	0x0000001F
+#define lpfc_mbox_hdr_sge_cnt_WORD	word1
+	uint32_t payload_length;
+	uint32_t tag_lo;
+	uint32_t tag_hi;
+	uint32_t reserved5;
+};
+
+union lpfc_sli4_cfg_shdr {
+	struct {
+		uint32_t word6;
+#define lpfc_mbox_hdr_opcode_SHIFT		0
+#define lpfc_mbox_hdr_opcode_MASK		0x000000FF
+#define lpfc_mbox_hdr_opcode_WORD		word6
+#define lpfc_mbox_hdr_subsystem_SHIFT		8
+#define lpfc_mbox_hdr_subsystem_MASK		0x000000FF
+#define lpfc_mbox_hdr_subsystem_WORD		word6
+#define lpfc_mbox_hdr_port_number_SHIFT		16
+#define lpfc_mbox_hdr_port_number_MASK		0x000000FF
+#define lpfc_mbox_hdr_port_number_WORD		word6
+#define lpfc_mbox_hdr_domain_SHIFT		24
+#define lpfc_mbox_hdr_domain_MASK		0x000000FF
+#define lpfc_mbox_hdr_domain_WORD		word6
+		uint32_t timeout;
+		uint32_t request_length;
+		uint32_t reserved9;
+	} request;
+	struct {
+		uint32_t word6;
+#define lpfc_mbox_hdr_opcode_SHIFT		0
+#define lpfc_mbox_hdr_opcode_MASK		0x000000FF
+#define lpfc_mbox_hdr_opcode_WORD		word6
+#define lpfc_mbox_hdr_subsystem_SHIFT		8
+#define lpfc_mbox_hdr_subsystem_MASK		0x000000FF
+#define lpfc_mbox_hdr_subsystem_WORD		word6
+#define lpfc_mbox_hdr_domain_SHIFT		24
+#define lpfc_mbox_hdr_domain_MASK		0x000000FF
+#define lpfc_mbox_hdr_domain_WORD		word6
+		uint32_t word7;
+#define lpfc_mbox_hdr_status_SHIFT		0
+#define lpfc_mbox_hdr_status_MASK		0x000000FF
+#define lpfc_mbox_hdr_status_WORD		word7
+#define lpfc_mbox_hdr_add_status_SHIFT		8
+#define lpfc_mbox_hdr_add_status_MASK		0x000000FF
+#define lpfc_mbox_hdr_add_status_WORD		word7
+		uint32_t response_length;
+		uint32_t actual_response_length;
+	} response;
+};
+
+/* Mailbox structures */
+struct mbox_header {
+	struct lpfc_sli4_cfg_mhdr cfg_mhdr;
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+};
+
+/* Subsystem Definitions */
+#define LPFC_MBOX_SUBSYSTEM_COMMON	0x1
+#define LPFC_MBOX_SUBSYSTEM_FCOE	0xC
+
+/* Device Specific Definitions */
+
+/* The HOST ENDIAN defines are in Big Endian format. */
+#define HOST_ENDIAN_LOW_WORD0   0xFF3412FF
+#define HOST_ENDIAN_HIGH_WORD1	0xFF7856FF
+
+/* Common Opcodes */
+#define LPFC_MBOX_OPCODE_CQ_CREATE		0x0C
+#define LPFC_MBOX_OPCODE_EQ_CREATE		0x0D
+#define LPFC_MBOX_OPCODE_MQ_CREATE		0x15
+#define LPFC_MBOX_OPCODE_GET_CNTL_ATTRIBUTES	0x20
+#define LPFC_MBOX_OPCODE_NOP			0x21
+#define LPFC_MBOX_OPCODE_MQ_DESTROY		0x35
+#define LPFC_MBOX_OPCODE_CQ_DESTROY		0x36
+#define LPFC_MBOX_OPCODE_EQ_DESTROY		0x37
+#define LPFC_MBOX_OPCODE_FUNCTION_RESET		0x3D
+
+/* FCoE Opcodes */
+#define LPFC_MBOX_OPCODE_FCOE_WQ_CREATE			0x01
+#define LPFC_MBOX_OPCODE_FCOE_WQ_DESTROY		0x02
+#define LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES		0x03
+#define LPFC_MBOX_OPCODE_FCOE_REMOVE_SGL_PAGES		0x04
+#define LPFC_MBOX_OPCODE_FCOE_RQ_CREATE			0x05
+#define LPFC_MBOX_OPCODE_FCOE_RQ_DESTROY		0x06
+#define LPFC_MBOX_OPCODE_FCOE_READ_FCF_TABLE		0x08
+#define LPFC_MBOX_OPCODE_FCOE_ADD_FCF			0x09
+#define LPFC_MBOX_OPCODE_FCOE_DELETE_FCF		0x0A
+#define LPFC_MBOX_OPCODE_FCOE_POST_HDR_TEMPLATE		0x0B
+
+/* Mailbox command structures */
+struct eq_context {
+	uint32_t word0;
+#define lpfc_eq_context_size_SHIFT	31
+#define lpfc_eq_context_size_MASK	0x00000001
+#define lpfc_eq_context_size_WORD	word0
+#define LPFC_EQE_SIZE_4			0x0
+#define LPFC_EQE_SIZE_16		0x1
+#define lpfc_eq_context_valid_SHIFT	29
+#define lpfc_eq_context_valid_MASK	0x00000001
+#define lpfc_eq_context_valid_WORD	word0
+	uint32_t word1;
+#define lpfc_eq_context_count_SHIFT	26
+#define lpfc_eq_context_count_MASK	0x00000003
+#define lpfc_eq_context_count_WORD	word1
+#define LPFC_EQ_CNT_256		0x0
+#define LPFC_EQ_CNT_512		0x1
+#define LPFC_EQ_CNT_1024	0x2
+#define LPFC_EQ_CNT_2048	0x3
+#define LPFC_EQ_CNT_4096	0x4
+	uint32_t word2;
+#define lpfc_eq_context_delay_multi_SHIFT	13
+#define lpfc_eq_context_delay_multi_MASK	0x000003FF
+#define lpfc_eq_context_delay_multi_WORD	word2
+	uint32_t reserved3;
+};
+
+struct sgl_page_pairs {
+	uint32_t sgl_pg0_addr_lo;
+	uint32_t sgl_pg0_addr_hi;
+	uint32_t sgl_pg1_addr_lo;
+	uint32_t sgl_pg1_addr_hi;
+};
+
+struct lpfc_mbx_post_sgl_pages {
+	struct mbox_header header;
+	uint32_t word0;
+#define lpfc_post_sgl_pages_xri_SHIFT	0
+#define lpfc_post_sgl_pages_xri_MASK	0x0000FFFF
+#define lpfc_post_sgl_pages_xri_WORD	word0
+#define lpfc_post_sgl_pages_xricnt_SHIFT	16
+#define lpfc_post_sgl_pages_xricnt_MASK	0x0000FFFF
+#define lpfc_post_sgl_pages_xricnt_WORD	word0
+	struct sgl_page_pairs  sgl_pg_pairs[1];
+};
+
+/* word0 of page-1 struct shares the same SHIFT/MASK/WORD defines as above */
+struct lpfc_mbx_post_uembed_sgl_page1 {
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+	uint32_t word0;
+	struct sgl_page_pairs sgl_pg_pairs;
+};
+
+struct lpfc_mbx_sge {
+	uint32_t pa_lo;
+	uint32_t pa_hi;
+	uint32_t length;
+};
+
+struct lpfc_mbx_nembed_cmd {
+	struct lpfc_sli4_cfg_mhdr cfg_mhdr;
+#define LPFC_SLI4_MBX_SGE_MAX_PAGES	19
+	struct lpfc_mbx_sge sge[LPFC_SLI4_MBX_SGE_MAX_PAGES];
+};
+
+struct lpfc_mbx_nembed_sge_virt {
+	void *addr[LPFC_SLI4_MBX_SGE_MAX_PAGES];
+};
+
+struct lpfc_mbx_eq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_eq_create_num_pages_SHIFT	0
+#define lpfc_mbx_eq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_eq_create_num_pages_WORD	word0
+			struct eq_context context;
+			struct dma_address page[LPFC_MAX_EQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_eq_create_q_id_SHIFT	0
+#define lpfc_mbx_eq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_eq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_eq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_eq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_eq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_eq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_nop {
+	struct mbox_header header;
+	uint32_t context[2];
+};
+
+struct cq_context {
+	uint32_t word0;
+#define lpfc_cq_context_event_SHIFT	31
+#define lpfc_cq_context_event_MASK	0x00000001
+#define lpfc_cq_context_event_WORD	word0
+#define lpfc_cq_context_valid_SHIFT	29
+#define lpfc_cq_context_valid_MASK	0x00000001
+#define lpfc_cq_context_valid_WORD	word0
+#define lpfc_cq_context_count_SHIFT	27
+#define lpfc_cq_context_count_MASK	0x00000003
+#define lpfc_cq_context_count_WORD	word0
+#define LPFC_CQ_CNT_256		0x0
+#define LPFC_CQ_CNT_512		0x1
+#define LPFC_CQ_CNT_1024	0x2
+	uint32_t word1;
+#define lpfc_cq_eq_id_SHIFT		22
+#define lpfc_cq_eq_id_MASK		0x000000FF
+#define lpfc_cq_eq_id_WORD		word1
+	uint32_t reserved0;
+	uint32_t reserved1;
+};
+
+struct lpfc_mbx_cq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_num_pages_SHIFT	0
+#define lpfc_mbx_cq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_num_pages_WORD	word0
+			struct cq_context context;
+			struct dma_address page[LPFC_MAX_CQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_q_id_SHIFT	0
+#define lpfc_mbx_cq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_cq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_cq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_cq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct wq_context {
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t reserved2;
+	uint32_t reserved3;
+};
+
+struct lpfc_mbx_wq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_create_num_pages_SHIFT	0
+#define lpfc_mbx_wq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_num_pages_WORD	word0
+#define lpfc_mbx_wq_create_cq_id_SHIFT		16
+#define lpfc_mbx_wq_create_cq_id_MASK		0x0000FFFF
+#define lpfc_mbx_wq_create_cq_id_WORD		word0
+			struct dma_address page[LPFC_MAX_WQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_create_q_id_SHIFT	0
+#define lpfc_mbx_wq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_wq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_wq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_wq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_wq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+#define LPFC_HDR_BUF_SIZE 128
+#define LPFC_DATA_BUF_SIZE 4096
+struct rq_context {
+	uint32_t word0;
+#define lpfc_rq_context_rq_size_SHIFT	16
+#define lpfc_rq_context_rq_size_MASK	0x0000000F
+#define lpfc_rq_context_rq_size_WORD	word0
+#define LPFC_RQ_RING_SIZE_512		9	/* 512 entries */
+#define LPFC_RQ_RING_SIZE_1024		10	/* 1024 entries */
+#define LPFC_RQ_RING_SIZE_2048		11	/* 2048 entries */
+#define LPFC_RQ_RING_SIZE_4096		12	/* 4096 entries */
+	uint32_t reserved1;
+	uint32_t word2;
+#define lpfc_rq_context_cq_id_SHIFT	16
+#define lpfc_rq_context_cq_id_MASK	0x000003FF
+#define lpfc_rq_context_cq_id_WORD	word2
+#define lpfc_rq_context_buf_size_SHIFT	0
+#define lpfc_rq_context_buf_size_MASK	0x0000FFFF
+#define lpfc_rq_context_buf_size_WORD	word2
+	uint32_t reserved3;
+};
+
+struct lpfc_mbx_rq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_num_pages_SHIFT	0
+#define lpfc_mbx_rq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_num_pages_WORD	word0
+			struct rq_context context;
+			struct dma_address page[LPFC_MAX_WQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_q_id_SHIFT	0
+#define lpfc_mbx_rq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_rq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_rq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_rq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct mq_context {
+	uint32_t word0;
+#define lpfc_mq_context_cq_id_SHIFT	22
+#define lpfc_mq_context_cq_id_MASK	0x000003FF
+#define lpfc_mq_context_cq_id_WORD	word0
+#define lpfc_mq_context_count_SHIFT	16
+#define lpfc_mq_context_count_MASK	0x0000000F
+#define lpfc_mq_context_count_WORD	word0
+#define LPFC_MQ_CNT_16		0x5
+#define LPFC_MQ_CNT_32		0x6
+#define LPFC_MQ_CNT_64		0x7
+#define LPFC_MQ_CNT_128		0x8
+	uint32_t word1;
+#define lpfc_mq_context_valid_SHIFT	31
+#define lpfc_mq_context_valid_MASK	0x00000001
+#define lpfc_mq_context_valid_WORD	word1
+	uint32_t reserved2;
+	uint32_t reserved3;
+};
+
+struct lpfc_mbx_mq_create {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_mq_create_num_pages_SHIFT	0
+#define lpfc_mbx_mq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_mq_create_num_pages_WORD	word0
+			struct mq_context context;
+			struct dma_address page[LPFC_MAX_MQ_PAGE];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_mq_create_q_id_SHIFT	0
+#define lpfc_mbx_mq_create_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_mq_create_q_id_WORD	word0
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_mq_destroy {
+	struct mbox_header header;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_mq_destroy_q_id_SHIFT	0
+#define lpfc_mbx_mq_destroy_q_id_MASK	0x0000FFFF
+#define lpfc_mbx_mq_destroy_q_id_WORD	word0
+		} request;
+		struct {
+			uint32_t word0;
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_post_hdr_tmpl {
+	struct mbox_header header;
+	uint32_t word10;
+#define lpfc_mbx_post_hdr_tmpl_rpi_offset_SHIFT  0
+#define lpfc_mbx_post_hdr_tmpl_rpi_offset_MASK   0x0000FFFF
+#define lpfc_mbx_post_hdr_tmpl_rpi_offset_WORD   word10
+#define lpfc_mbx_post_hdr_tmpl_page_cnt_SHIFT   16
+#define lpfc_mbx_post_hdr_tmpl_page_cnt_MASK    0x0000FFFF
+#define lpfc_mbx_post_hdr_tmpl_page_cnt_WORD    word10
+	uint32_t rpi_paddr_lo;
+	uint32_t rpi_paddr_hi;
+};
+
+struct sli4_sge {	/* SLI-4 */
+	uint32_t addr_hi;
+	uint32_t addr_lo;
+
+	uint32_t word2;
+#define lpfc_sli4_sge_offset_SHIFT	0 /* Offset of buffer - Not used*/
+#define lpfc_sli4_sge_offset_MASK	0x00FFFFFF
+#define lpfc_sli4_sge_offset_WORD	word2
+#define lpfc_sli4_sge_last_SHIFT	31 /* Last SEG in the SGL sets
+						this  flag !! */
+#define lpfc_sli4_sge_last_MASK		0x00000001
+#define lpfc_sli4_sge_last_WORD		word2
+	uint32_t word3;
+#define lpfc_sli4_sge_len_SHIFT		0
+#define lpfc_sli4_sge_len_MASK		0x0001FFFF
+#define lpfc_sli4_sge_len_WORD		word3
+};
+
+struct fcf_record {
+	uint32_t max_rcv_size;
+	uint32_t fka_adv_period;
+	uint32_t fip_priority;
+	uint32_t word3;
+#define lpfc_fcf_record_mac_0_SHIFT		0
+#define lpfc_fcf_record_mac_0_MASK		0x000000FF
+#define lpfc_fcf_record_mac_0_WORD		word3
+#define lpfc_fcf_record_mac_1_SHIFT		8
+#define lpfc_fcf_record_mac_1_MASK		0x000000FF
+#define lpfc_fcf_record_mac_1_WORD		word3
+#define lpfc_fcf_record_mac_2_SHIFT		16
+#define lpfc_fcf_record_mac_2_MASK		0x000000FF
+#define lpfc_fcf_record_mac_2_WORD		word3
+#define lpfc_fcf_record_mac_3_SHIFT		24
+#define lpfc_fcf_record_mac_3_MASK		0x000000FF
+#define lpfc_fcf_record_mac_3_WORD		word3
+	uint32_t word4;
+#define lpfc_fcf_record_mac_4_SHIFT		0
+#define lpfc_fcf_record_mac_4_MASK		0x000000FF
+#define lpfc_fcf_record_mac_4_WORD		word4
+#define lpfc_fcf_record_mac_5_SHIFT		8
+#define lpfc_fcf_record_mac_5_MASK		0x000000FF
+#define lpfc_fcf_record_mac_5_WORD		word4
+#define lpfc_fcf_record_fcf_avail_SHIFT		16
+#define lpfc_fcf_record_fcf_avail_MASK		0x000000FF
+#define lpfc_fcf_record_fc_avail_WORD		word4
+#define lpfc_fcf_record_mac_addr_prov_SHIFT	24
+#define lpfc_fcf_record_mac_addr_prov_MASK	0x000000FF
+#define lpfc_fcf_record_mac_addr_prov_WORD	word4
+#define LPFC_FCF_FPMA           1 	/* Fabric Provided MAC Address */
+#define LPFC_FCF_SPMA           2       /* Server Provided MAC Address */
+	uint32_t word5;
+#define lpfc_fcf_record_fab_name_0_SHIFT	0
+#define lpfc_fcf_record_fab_name_0_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_0_WORD		word5
+#define lpfc_fcf_record_fab_name_1_SHIFT	8
+#define lpfc_fcf_record_fab_name_1_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_1_WORD		word5
+#define lpfc_fcf_record_fab_name_2_SHIFT	16
+#define lpfc_fcf_record_fab_name_2_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_2_WORD		word5
+#define lpfc_fcf_record_fab_name_3_SHIFT	24
+#define lpfc_fcf_record_fab_name_3_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_3_WORD		word5
+	uint32_t word6;
+#define lpfc_fcf_record_fab_name_4_SHIFT	0
+#define lpfc_fcf_record_fab_name_4_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_4_WORD		word6
+#define lpfc_fcf_record_fab_name_5_SHIFT	8
+#define lpfc_fcf_record_fab_name_5_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_5_WORD		word6
+#define lpfc_fcf_record_fab_name_6_SHIFT	16
+#define lpfc_fcf_record_fab_name_6_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_6_WORD		word6
+#define lpfc_fcf_record_fab_name_7_SHIFT	24
+#define lpfc_fcf_record_fab_name_7_MASK		0x000000FF
+#define lpfc_fcf_record_fab_name_7_WORD		word6
+	uint32_t word7;
+#define lpfc_fcf_record_fc_map_0_SHIFT		0
+#define lpfc_fcf_record_fc_map_0_MASK		0x000000FF
+#define lpfc_fcf_record_fc_map_0_WORD		word7
+#define lpfc_fcf_record_fc_map_1_SHIFT		8
+#define lpfc_fcf_record_fc_map_1_MASK		0x000000FF
+#define lpfc_fcf_record_fc_map_1_WORD		word7
+#define lpfc_fcf_record_fc_map_2_SHIFT		16
+#define lpfc_fcf_record_fc_map_2_MASK		0x000000FF
+#define lpfc_fcf_record_fc_map_2_WORD		word7
+#define lpfc_fcf_record_fcf_valid_SHIFT		24
+#define lpfc_fcf_record_fcf_valid_MASK		0x000000FF
+#define lpfc_fcf_record_fcf_valid_WORD		word7
+	uint32_t word8;
+#define lpfc_fcf_record_fcf_index_SHIFT		0
+#define lpfc_fcf_record_fcf_index_MASK		0x0000FFFF
+#define lpfc_fcf_record_fcf_index_WORD		word8
+#define lpfc_fcf_record_fcf_state_SHIFT		16
+#define lpfc_fcf_record_fcf_state_MASK		0x0000FFFF
+#define lpfc_fcf_record_fcf_state_WORD		word8
+	uint8_t vlan_bitmap[512];
+};
+
+struct lpfc_mbx_read_fcf_tbl {
+	union lpfc_sli4_cfg_shdr cfg_shdr;
+	union {
+		struct {
+			uint32_t word10;
+#define lpfc_mbx_read_fcf_tbl_indx_SHIFT	0
+#define lpfc_mbx_read_fcf_tbl_indx_MASK		0x0000FFFF
+#define lpfc_mbx_read_fcf_tbl_indx_WORD		word10
+		} request;
+		struct {
+			uint32_t eventag;
+		} response;
+	} u;
+	uint32_t word11;
+#define lpfc_mbx_read_fcf_tbl_nxt_vindx_SHIFT	0
+#define lpfc_mbx_read_fcf_tbl_nxt_vindx_MASK	0x0000FFFF
+#define lpfc_mbx_read_fcf_tbl_nxt_vindx_WORD	word11
+};
+
+struct lpfc_mbx_add_fcf_tbl_entry {
+	union lpfc_sli4_cfg_shdr cfg_shdr;
+	uint32_t word10;
+#define lpfc_mbx_add_fcf_tbl_fcfi_SHIFT        0
+#define lpfc_mbx_add_fcf_tbl_fcfi_MASK         0x0000FFFF
+#define lpfc_mbx_add_fcf_tbl_fcfi_WORD         word10
+	struct lpfc_mbx_sge fcf_sge;
+};
+
+struct lpfc_mbx_del_fcf_tbl_entry {
+	struct mbox_header header;
+	uint32_t word10;
+#define lpfc_mbx_del_fcf_tbl_count_SHIFT	0
+#define lpfc_mbx_del_fcf_tbl_count_MASK		0x0000FFFF
+#define lpfc_mbx_del_fcf_tbl_count_WORD		word10
+#define lpfc_mbx_del_fcf_tbl_index_SHIFT	16
+#define lpfc_mbx_del_fcf_tbl_index_MASK		0x0000FFFF
+#define lpfc_mbx_del_fcf_tbl_index_WORD		word10
+};
+
+/* Status field for embedded SLI_CONFIG mailbox command */
+#define STATUS_SUCCESS					0x0
+#define STATUS_FAILED 					0x1
+#define STATUS_ILLEGAL_REQUEST				0x2
+#define STATUS_ILLEGAL_FIELD				0x3
+#define STATUS_INSUFFICIENT_BUFFER 			0x4
+#define STATUS_UNAUTHORIZED_REQUEST			0x5
+#define STATUS_FLASHROM_SAVE_FAILED			0x17
+#define STATUS_FLASHROM_RESTORE_FAILED			0x18
+#define STATUS_ICCBINDEX_ALLOC_FAILED			0x1a
+#define STATUS_IOCTLHANDLE_ALLOC_FAILED 		0x1b
+#define STATUS_INVALID_PHY_ADDR_FROM_OSM		0x1c
+#define STATUS_INVALID_PHY_ADDR_LEN_FROM_OSM		0x1d
+#define STATUS_ASSERT_FAILED				0x1e
+#define STATUS_INVALID_SESSION				0x1f
+#define STATUS_INVALID_CONNECTION			0x20
+#define STATUS_BTL_PATH_EXCEEDS_OSM_LIMIT		0x21
+#define STATUS_BTL_NO_FREE_SLOT_PATH			0x24
+#define STATUS_BTL_NO_FREE_SLOT_TGTID			0x25
+#define STATUS_OSM_DEVSLOT_NOT_FOUND			0x26
+#define STATUS_FLASHROM_READ_FAILED			0x27
+#define STATUS_POLL_IOCTL_TIMEOUT			0x28
+#define STATUS_ERROR_ACITMAIN				0x2a
+#define STATUS_REBOOT_REQUIRED				0x2c
+#define STATUS_FCF_IN_USE				0x3a
+
+struct lpfc_mbx_sli4_config {
+	struct mbox_header header;
+};
+
+struct lpfc_mbx_init_vfi {
+	uint32_t word1;
+#define lpfc_init_vfi_vr_SHIFT		31
+#define lpfc_init_vfi_vr_MASK		0x00000001
+#define lpfc_init_vfi_vr_WORD		word1
+#define lpfc_init_vfi_vt_SHIFT		30
+#define lpfc_init_vfi_vt_MASK		0x00000001
+#define lpfc_init_vfi_vt_WORD		word1
+#define lpfc_init_vfi_vf_SHIFT		29
+#define lpfc_init_vfi_vf_MASK		0x00000001
+#define lpfc_init_vfi_vf_WORD		word1
+#define lpfc_init_vfi_vfi_SHIFT		0
+#define lpfc_init_vfi_vfi_MASK		0x0000FFFF
+#define lpfc_init_vfi_vfi_WORD		word1
+	uint32_t word2;
+#define lpfc_init_vfi_fcfi_SHIFT	0
+#define lpfc_init_vfi_fcfi_MASK		0x0000FFFF
+#define lpfc_init_vfi_fcfi_WORD		word2
+	uint32_t word3;
+#define lpfc_init_vfi_pri_SHIFT		13
+#define lpfc_init_vfi_pri_MASK		0x00000007
+#define lpfc_init_vfi_pri_WORD		word3
+#define lpfc_init_vfi_vf_id_SHIFT	1
+#define lpfc_init_vfi_vf_id_MASK	0x00000FFF
+#define lpfc_init_vfi_vf_id_WORD	word3
+	uint32_t word4;
+#define lpfc_init_vfi_hop_count_SHIFT	24
+#define lpfc_init_vfi_hop_count_MASK	0x000000FF
+#define lpfc_init_vfi_hop_count_WORD	word4
+};
+
+struct lpfc_mbx_reg_vfi {
+	uint32_t word1;
+#define lpfc_reg_vfi_vp_SHIFT		28
+#define lpfc_reg_vfi_vp_MASK		0x00000001
+#define lpfc_reg_vfi_vp_WORD		word1
+#define lpfc_reg_vfi_vfi_SHIFT		0
+#define lpfc_reg_vfi_vfi_MASK		0x0000FFFF
+#define lpfc_reg_vfi_vfi_WORD		word1
+	uint32_t word2;
+#define lpfc_reg_vfi_vpi_SHIFT		16
+#define lpfc_reg_vfi_vpi_MASK		0x0000FFFF
+#define lpfc_reg_vfi_vpi_WORD		word2
+#define lpfc_reg_vfi_fcfi_SHIFT		0
+#define lpfc_reg_vfi_fcfi_MASK		0x0000FFFF
+#define lpfc_reg_vfi_fcfi_WORD		word2
+	uint32_t word3_rsvd;
+	uint32_t word4_rsvd;
+	struct ulp_bde64 bde;
+	uint32_t word8_rsvd;
+	uint32_t word9_rsvd;
+	uint32_t word10;
+#define lpfc_reg_vfi_nport_id_SHIFT		0
+#define lpfc_reg_vfi_nport_id_MASK		0x00FFFFFF
+#define lpfc_reg_vfi_nport_id_WORD		word10
+};
+
+struct lpfc_mbx_init_vpi {
+	uint32_t word1;
+#define lpfc_init_vpi_vfi_SHIFT		16
+#define lpfc_init_vpi_vfi_MASK		0x0000FFFF
+#define lpfc_init_vpi_vfi_WORD		word1
+#define lpfc_init_vpi_vpi_SHIFT		0
+#define lpfc_init_vpi_vpi_MASK		0x0000FFFF
+#define lpfc_init_vpi_vpi_WORD		word1
+};
+
+struct lpfc_mbx_read_vpi {
+	uint32_t word1_rsvd;
+	uint32_t word2;
+#define lpfc_mbx_read_vpi_vnportid_SHIFT	0
+#define lpfc_mbx_read_vpi_vnportid_MASK		0x00FFFFFF
+#define lpfc_mbx_read_vpi_vnportid_WORD		word2
+	uint32_t word3_rsvd;
+	uint32_t word4;
+#define lpfc_mbx_read_vpi_acq_alpa_SHIFT	0
+#define lpfc_mbx_read_vpi_acq_alpa_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_acq_alpa_WORD		word4
+#define lpfc_mbx_read_vpi_pb_SHIFT		15
+#define lpfc_mbx_read_vpi_pb_MASK		0x00000001
+#define lpfc_mbx_read_vpi_pb_WORD		word4
+#define lpfc_mbx_read_vpi_spec_alpa_SHIFT	16
+#define lpfc_mbx_read_vpi_spec_alpa_MASK	0x000000FF
+#define lpfc_mbx_read_vpi_spec_alpa_WORD	word4
+#define lpfc_mbx_read_vpi_ns_SHIFT		30
+#define lpfc_mbx_read_vpi_ns_MASK		0x00000001
+#define lpfc_mbx_read_vpi_ns_WORD		word4
+#define lpfc_mbx_read_vpi_hl_SHIFT		31
+#define lpfc_mbx_read_vpi_hl_MASK		0x00000001
+#define lpfc_mbx_read_vpi_hl_WORD		word4
+	uint32_t word5_rsvd;
+	uint32_t word6;
+#define lpfc_mbx_read_vpi_vpi_SHIFT		0
+#define lpfc_mbx_read_vpi_vpi_MASK		0x0000FFFF
+#define lpfc_mbx_read_vpi_vpi_WORD		word6
+	uint32_t word7;
+#define lpfc_mbx_read_vpi_mac_0_SHIFT		0
+#define lpfc_mbx_read_vpi_mac_0_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_0_WORD		word7
+#define lpfc_mbx_read_vpi_mac_1_SHIFT		8
+#define lpfc_mbx_read_vpi_mac_1_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_1_WORD		word7
+#define lpfc_mbx_read_vpi_mac_2_SHIFT		16
+#define lpfc_mbx_read_vpi_mac_2_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_2_WORD		word7
+#define lpfc_mbx_read_vpi_mac_3_SHIFT		24
+#define lpfc_mbx_read_vpi_mac_3_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_3_WORD		word7
+	uint32_t word8;
+#define lpfc_mbx_read_vpi_mac_4_SHIFT		0
+#define lpfc_mbx_read_vpi_mac_4_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_4_WORD		word8
+#define lpfc_mbx_read_vpi_mac_5_SHIFT		8
+#define lpfc_mbx_read_vpi_mac_5_MASK		0x000000FF
+#define lpfc_mbx_read_vpi_mac_5_WORD		word8
+#define lpfc_mbx_read_vpi_vlan_tag_SHIFT	16
+#define lpfc_mbx_read_vpi_vlan_tag_MASK		0x00000FFF
+#define lpfc_mbx_read_vpi_vlan_tag_WORD		word8
+#define lpfc_mbx_read_vpi_vv_SHIFT		28
+#define lpfc_mbx_read_vpi_vv_MASK		0x0000001
+#define lpfc_mbx_read_vpi_vv_WORD		word8
+};
+
+struct lpfc_mbx_unreg_vfi {
+	uint32_t word1_rsvd;
+	uint32_t word2;
+#define lpfc_unreg_vfi_vfi_SHIFT	0
+#define lpfc_unreg_vfi_vfi_MASK		0x0000FFFF
+#define lpfc_unreg_vfi_vfi_WORD		word2
+};
+
+struct lpfc_mbx_resume_rpi {
+	uint32_t word1;
+#define lpfc_resume_rpi_rpi_SHIFT	0
+#define lpfc_resume_rpi_rpi_MASK	0x0000FFFF
+#define lpfc_resume_rpi_rpi_WORD	word1
+	uint32_t event_tag;
+	uint32_t word3_rsvd;
+	uint32_t word4_rsvd;
+	uint32_t word5_rsvd;
+	uint32_t word6;
+#define lpfc_resume_rpi_vpi_SHIFT	0
+#define lpfc_resume_rpi_vpi_MASK	0x0000FFFF
+#define lpfc_resume_rpi_vpi_WORD	word6
+#define lpfc_resume_rpi_vfi_SHIFT	16
+#define lpfc_resume_rpi_vfi_MASK	0x0000FFFF
+#define lpfc_resume_rpi_vfi_WORD	word6
+};
+
+#define REG_FCF_INVALID_QID	0xFFFF
+struct lpfc_mbx_reg_fcfi {
+	uint32_t word1;
+#define lpfc_reg_fcfi_info_index_SHIFT	0
+#define lpfc_reg_fcfi_info_index_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_info_index_WORD	word1
+#define lpfc_reg_fcfi_fcfi_SHIFT	16
+#define lpfc_reg_fcfi_fcfi_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_fcfi_WORD		word1
+	uint32_t word2;
+#define lpfc_reg_fcfi_rq_id1_SHIFT	0
+#define lpfc_reg_fcfi_rq_id1_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id1_WORD	word2
+#define lpfc_reg_fcfi_rq_id0_SHIFT	16
+#define lpfc_reg_fcfi_rq_id0_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id0_WORD	word2
+	uint32_t word3;
+#define lpfc_reg_fcfi_rq_id3_SHIFT	0
+#define lpfc_reg_fcfi_rq_id3_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id3_WORD	word3
+#define lpfc_reg_fcfi_rq_id2_SHIFT	16
+#define lpfc_reg_fcfi_rq_id2_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_rq_id2_WORD	word3
+	uint32_t word4;
+#define lpfc_reg_fcfi_type_match0_SHIFT	24
+#define lpfc_reg_fcfi_type_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match0_WORD	word4
+#define lpfc_reg_fcfi_type_mask0_SHIFT	16
+#define lpfc_reg_fcfi_type_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask0_WORD	word4
+#define lpfc_reg_fcfi_rctl_match0_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match0_WORD	word4
+#define lpfc_reg_fcfi_rctl_mask0_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask0_WORD	word4
+	uint32_t word5;
+#define lpfc_reg_fcfi_type_match1_SHIFT	24
+#define lpfc_reg_fcfi_type_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match1_WORD	word5
+#define lpfc_reg_fcfi_type_mask1_SHIFT	16
+#define lpfc_reg_fcfi_type_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask1_WORD	word5
+#define lpfc_reg_fcfi_rctl_match1_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match1_WORD	word5
+#define lpfc_reg_fcfi_rctl_mask1_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask1_WORD	word5
+	uint32_t word6;
+#define lpfc_reg_fcfi_type_match2_SHIFT	24
+#define lpfc_reg_fcfi_type_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match2_WORD	word6
+#define lpfc_reg_fcfi_type_mask2_SHIFT	16
+#define lpfc_reg_fcfi_type_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask2_WORD	word6
+#define lpfc_reg_fcfi_rctl_match2_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match2_WORD	word6
+#define lpfc_reg_fcfi_rctl_mask2_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask2_WORD	word6
+	uint32_t word7;
+#define lpfc_reg_fcfi_type_match3_SHIFT	24
+#define lpfc_reg_fcfi_type_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_match3_WORD	word7
+#define lpfc_reg_fcfi_type_mask3_SHIFT	16
+#define lpfc_reg_fcfi_type_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_type_mask3_WORD	word7
+#define lpfc_reg_fcfi_rctl_match3_SHIFT	8
+#define lpfc_reg_fcfi_rctl_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_match3_WORD	word7
+#define lpfc_reg_fcfi_rctl_mask3_SHIFT	0
+#define lpfc_reg_fcfi_rctl_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_rctl_mask3_WORD	word7
+	uint32_t word8;
+#define lpfc_reg_fcfi_mam_SHIFT		13
+#define lpfc_reg_fcfi_mam_MASK		0x00000003
+#define lpfc_reg_fcfi_mam_WORD		word8
+#define LPFC_MAM_BOTH		0	/* Both SPMA and FPMA */
+#define LPFC_MAM_SPMA		1	/* Server Provided MAC Address */
+#define LPFC_MAM_FPMA		2	/* Fabric Provided MAC Address */
+#define lpfc_reg_fcfi_vv_SHIFT		12
+#define lpfc_reg_fcfi_vv_MASK		0x00000001
+#define lpfc_reg_fcfi_vv_WORD		word8
+#define lpfc_reg_fcfi_vlan_tag_SHIFT	0
+#define lpfc_reg_fcfi_vlan_tag_MASK	0x00000FFF
+#define lpfc_reg_fcfi_vlan_tag_WORD	word8
+};
+
+struct lpfc_mbx_unreg_fcfi {
+	uint32_t word1_rsv;
+	uint32_t word2;
+#define lpfc_unreg_fcfi_SHIFT		0
+#define lpfc_unreg_fcfi_MASK		0x0000FFFF
+#define lpfc_unreg_fcfi_WORD		word2
+};
+
+struct lpfc_mbx_read_rev {
+	uint32_t word1;
+#define lpfc_mbx_rd_rev_sli_lvl_SHIFT  		16
+#define lpfc_mbx_rd_rev_sli_lvl_MASK   		0x0000000F
+#define lpfc_mbx_rd_rev_sli_lvl_WORD   		word1
+#define lpfc_mbx_rd_rev_fcoe_SHIFT		20
+#define lpfc_mbx_rd_rev_fcoe_MASK		0x00000001
+#define lpfc_mbx_rd_rev_fcoe_WORD		word1
+#define lpfc_mbx_rd_rev_vpd_SHIFT		29
+#define lpfc_mbx_rd_rev_vpd_MASK		0x00000001
+#define lpfc_mbx_rd_rev_vpd_WORD		word1
+	uint32_t first_hw_rev;
+	uint32_t second_hw_rev;
+	uint32_t word4_rsvd;
+	uint32_t third_hw_rev;
+	uint32_t word6;
+#define lpfc_mbx_rd_rev_fcph_low_SHIFT		0
+#define lpfc_mbx_rd_rev_fcph_low_MASK		0x000000FF
+#define lpfc_mbx_rd_rev_fcph_low_WORD		word6
+#define lpfc_mbx_rd_rev_fcph_high_SHIFT		8
+#define lpfc_mbx_rd_rev_fcph_high_MASK		0x000000FF
+#define lpfc_mbx_rd_rev_fcph_high_WORD		word6
+#define lpfc_mbx_rd_rev_ftr_lvl_low_SHIFT	16
+#define lpfc_mbx_rd_rev_ftr_lvl_low_MASK	0x000000FF
+#define lpfc_mbx_rd_rev_ftr_lvl_low_WORD	word6
+#define lpfc_mbx_rd_rev_ftr_lvl_high_SHIFT	24
+#define lpfc_mbx_rd_rev_ftr_lvl_high_MASK	0x000000FF
+#define lpfc_mbx_rd_rev_ftr_lvl_high_WORD	word6
+	uint32_t word7_rsvd;
+	uint32_t fw_id_rev;
+	uint8_t  fw_name[16];
+	uint32_t ulp_fw_id_rev;
+	uint8_t  ulp_fw_name[16];
+	uint32_t word18_47_rsvd[30];
+	uint32_t word48;
+#define lpfc_mbx_rd_rev_avail_len_SHIFT		0
+#define lpfc_mbx_rd_rev_avail_len_MASK		0x00FFFFFF
+#define lpfc_mbx_rd_rev_avail_len_WORD		word48
+	uint32_t vpd_paddr_low;
+	uint32_t vpd_paddr_high;
+	uint32_t avail_vpd_len;
+	uint32_t rsvd_52_63[12];
+};
+
+struct lpfc_mbx_read_config {
+	uint32_t word1;
+#define lpfc_mbx_rd_conf_max_bbc_SHIFT		0
+#define lpfc_mbx_rd_conf_max_bbc_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_max_bbc_WORD		word1
+#define lpfc_mbx_rd_conf_init_bbc_SHIFT		8
+#define lpfc_mbx_rd_conf_init_bbc_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_init_bbc_WORD		word1
+	uint32_t word2;
+#define lpfc_mbx_rd_conf_nport_did_SHIFT	0
+#define lpfc_mbx_rd_conf_nport_did_MASK		0x00FFFFFF
+#define lpfc_mbx_rd_conf_nport_did_WORD		word2
+#define lpfc_mbx_rd_conf_topology_SHIFT		24
+#define lpfc_mbx_rd_conf_topology_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_topology_WORD		word2
+	uint32_t word3;
+#define lpfc_mbx_rd_conf_ao_SHIFT		0
+#define lpfc_mbx_rd_conf_ao_MASK		0x00000001
+#define lpfc_mbx_rd_conf_ao_WORD		word3
+#define lpfc_mbx_rd_conf_bb_scn_SHIFT		8
+#define lpfc_mbx_rd_conf_bb_scn_MASK		0x0000000F
+#define lpfc_mbx_rd_conf_bb_scn_WORD		word3
+#define lpfc_mbx_rd_conf_cbb_scn_SHIFT		12
+#define lpfc_mbx_rd_conf_cbb_scn_MASK		0x0000000F
+#define lpfc_mbx_rd_conf_cbb_scn_WORD		word3
+#define lpfc_mbx_rd_conf_mc_SHIFT		29
+#define lpfc_mbx_rd_conf_mc_MASK		0x00000001
+#define lpfc_mbx_rd_conf_mc_WORD		word3
+	uint32_t word4;
+#define lpfc_mbx_rd_conf_e_d_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_e_d_tov_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_e_d_tov_WORD		word4
+	uint32_t word5;
+#define lpfc_mbx_rd_conf_lp_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_lp_tov_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_lp_tov_WORD		word5
+	uint32_t word6;
+#define lpfc_mbx_rd_conf_r_a_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_r_a_tov_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_r_a_tov_WORD		word6
+	uint32_t word7;
+#define lpfc_mbx_rd_conf_r_t_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_r_t_tov_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_r_t_tov_WORD		word7
+	uint32_t word8;
+#define lpfc_mbx_rd_conf_al_tov_SHIFT		0
+#define lpfc_mbx_rd_conf_al_tov_MASK		0x0000000F
+#define lpfc_mbx_rd_conf_al_tov_WORD		word8
+	uint32_t word9;
+#define lpfc_mbx_rd_conf_lmt_SHIFT		0
+#define lpfc_mbx_rd_conf_lmt_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_lmt_WORD		word9
+	uint32_t word10;
+#define lpfc_mbx_rd_conf_max_alpa_SHIFT		0
+#define lpfc_mbx_rd_conf_max_alpa_MASK		0x000000FF
+#define lpfc_mbx_rd_conf_max_alpa_WORD		word10
+	uint32_t word11_rsvd;
+	uint32_t word12;
+#define lpfc_mbx_rd_conf_xri_base_SHIFT		0
+#define lpfc_mbx_rd_conf_xri_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_xri_base_WORD		word12
+#define lpfc_mbx_rd_conf_xri_count_SHIFT	16
+#define lpfc_mbx_rd_conf_xri_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_xri_count_WORD		word12
+	uint32_t word13;
+#define lpfc_mbx_rd_conf_rpi_base_SHIFT		0
+#define lpfc_mbx_rd_conf_rpi_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_rpi_base_WORD		word13
+#define lpfc_mbx_rd_conf_rpi_count_SHIFT	16
+#define lpfc_mbx_rd_conf_rpi_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_rpi_count_WORD		word13
+	uint32_t word14;
+#define lpfc_mbx_rd_conf_vpi_base_SHIFT		0
+#define lpfc_mbx_rd_conf_vpi_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_vpi_base_WORD		word14
+#define lpfc_mbx_rd_conf_vpi_count_SHIFT	16
+#define lpfc_mbx_rd_conf_vpi_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_vpi_count_WORD		word14
+	uint32_t word15;
+#define lpfc_mbx_rd_conf_vfi_base_SHIFT         0
+#define lpfc_mbx_rd_conf_vfi_base_MASK          0x0000FFFF
+#define lpfc_mbx_rd_conf_vfi_base_WORD          word15
+#define lpfc_mbx_rd_conf_vfi_count_SHIFT        16
+#define lpfc_mbx_rd_conf_vfi_count_MASK         0x0000FFFF
+#define lpfc_mbx_rd_conf_vfi_count_WORD         word15
+	uint32_t word16;
+#define lpfc_mbx_rd_conf_fcfi_base_SHIFT	0
+#define lpfc_mbx_rd_conf_fcfi_base_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_fcfi_base_WORD		word16
+#define lpfc_mbx_rd_conf_fcfi_count_SHIFT	16
+#define lpfc_mbx_rd_conf_fcfi_count_MASK	0x0000FFFF
+#define lpfc_mbx_rd_conf_fcfi_count_WORD	word16
+	uint32_t word17;
+#define lpfc_mbx_rd_conf_rq_count_SHIFT		0
+#define lpfc_mbx_rd_conf_rq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_rq_count_WORD		word17
+#define lpfc_mbx_rd_conf_eq_count_SHIFT		16
+#define lpfc_mbx_rd_conf_eq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_eq_count_WORD		word17
+	uint32_t word18;
+#define lpfc_mbx_rd_conf_wq_count_SHIFT		0
+#define lpfc_mbx_rd_conf_wq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_wq_count_WORD		word18
+#define lpfc_mbx_rd_conf_cq_count_SHIFT		16
+#define lpfc_mbx_rd_conf_cq_count_MASK		0x0000FFFF
+#define lpfc_mbx_rd_conf_cq_count_WORD		word18
+};
+
+struct lpfc_mbx_request_features {
+	uint32_t word1;
+#define lpfc_mbx_rq_ftr_qry_SHIFT		0
+#define lpfc_mbx_rq_ftr_qry_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_qry_WORD		word1
+	uint32_t word2;
+#define lpfc_mbx_rq_ftr_rq_iaab_SHIFT		0
+#define lpfc_mbx_rq_ftr_rq_iaab_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_iaab_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_npiv_SHIFT		1
+#define lpfc_mbx_rq_ftr_rq_npiv_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_npiv_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_dif_SHIFT		2
+#define lpfc_mbx_rq_ftr_rq_dif_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_dif_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_vf_SHIFT		3
+#define lpfc_mbx_rq_ftr_rq_vf_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_vf_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_fcpi_SHIFT		4
+#define lpfc_mbx_rq_ftr_rq_fcpi_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_fcpi_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_fcpt_SHIFT		5
+#define lpfc_mbx_rq_ftr_rq_fcpt_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_fcpt_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_fcpc_SHIFT		6
+#define lpfc_mbx_rq_ftr_rq_fcpc_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_fcpc_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_ifip_SHIFT		7
+#define lpfc_mbx_rq_ftr_rq_ifip_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_ifip_WORD		word2
+	uint32_t word3;
+#define lpfc_mbx_rq_ftr_rsp_iaab_SHIFT		0
+#define lpfc_mbx_rq_ftr_rsp_iaab_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_iaab_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_npiv_SHIFT		1
+#define lpfc_mbx_rq_ftr_rsp_npiv_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_npiv_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_dif_SHIFT		2
+#define lpfc_mbx_rq_ftr_rsp_dif_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_dif_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_vf_SHIFT		3
+#define lpfc_mbx_rq_ftr_rsp_vf__MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_vf_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_fcpi_SHIFT		4
+#define lpfc_mbx_rq_ftr_rsp_fcpi_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_fcpi_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_fcpt_SHIFT		5
+#define lpfc_mbx_rq_ftr_rsp_fcpt_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_fcpt_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_fcpc_SHIFT		6
+#define lpfc_mbx_rq_ftr_rsp_fcpc_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_fcpc_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_ifip_SHIFT		7
+#define lpfc_mbx_rq_ftr_rsp_ifip_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_ifip_WORD		word3
+};
+
+/* Mailbox Completion Queue Error Messages */
+#define MB_CQE_STATUS_SUCCESS 			0x0
+#define MB_CQE_STATUS_INSUFFICIENT_PRIVILEGES	0x1
+#define MB_CQE_STATUS_INVALID_PARAMETER		0x2
+#define MB_CQE_STATUS_INSUFFICIENT_RESOURCES	0x3
+#define MB_CEQ_STATUS_QUEUE_FLUSHING		0x4
+#define MB_CQE_STATUS_DMA_FAILED		0x5
+
+/* mailbox queue entry structure */
+struct lpfc_mqe {
+	uint32_t word0;
+#define lpfc_mqe_status_SHIFT		16
+#define lpfc_mqe_status_MASK		0x0000FFFF
+#define lpfc_mqe_status_WORD		word0
+#define lpfc_mqe_command_SHIFT		8
+#define lpfc_mqe_command_MASK		0x000000FF
+#define lpfc_mqe_command_WORD		word0
+	union {
+		uint32_t mb_words[LPFC_SLI4_MB_WORD_COUNT - 1];
+		/* sli4 mailbox commands */
+		struct lpfc_mbx_sli4_config sli4_config;
+		struct lpfc_mbx_init_vfi init_vfi;
+		struct lpfc_mbx_reg_vfi reg_vfi;
+		struct lpfc_mbx_reg_vfi unreg_vfi;
+		struct lpfc_mbx_init_vpi init_vpi;
+		struct lpfc_mbx_resume_rpi resume_rpi;
+		struct lpfc_mbx_read_fcf_tbl read_fcf_tbl;
+		struct lpfc_mbx_add_fcf_tbl_entry add_fcf_entry;
+		struct lpfc_mbx_del_fcf_tbl_entry del_fcf_entry;
+		struct lpfc_mbx_reg_fcfi reg_fcfi;
+		struct lpfc_mbx_unreg_fcfi unreg_fcfi;
+		struct lpfc_mbx_mq_create mq_create;
+		struct lpfc_mbx_eq_create eq_create;
+		struct lpfc_mbx_cq_create cq_create;
+		struct lpfc_mbx_wq_create wq_create;
+		struct lpfc_mbx_rq_create rq_create;
+		struct lpfc_mbx_mq_destroy mq_destroy;
+		struct lpfc_mbx_eq_destroy eq_destroy;
+		struct lpfc_mbx_cq_destroy cq_destroy;
+		struct lpfc_mbx_wq_destroy wq_destroy;
+		struct lpfc_mbx_rq_destroy rq_destroy;
+		struct lpfc_mbx_post_sgl_pages post_sgl_pages;
+		struct lpfc_mbx_nembed_cmd nembed_cmd;
+		struct lpfc_mbx_read_rev read_rev;
+		struct lpfc_mbx_read_vpi read_vpi;
+		struct lpfc_mbx_read_config rd_config;
+		struct lpfc_mbx_request_features req_ftrs;
+		struct lpfc_mbx_post_hdr_tmpl hdr_tmpl;
+		struct lpfc_mbx_nop nop;
+	} un;
+};
+
+struct lpfc_mcqe {
+	uint32_t word0;
+#define lpfc_mcqe_status_SHIFT		0
+#define lpfc_mcqe_status_MASK		0x0000FFFF
+#define lpfc_mcqe_status_WORD		word0
+#define lpfc_mcqe_ext_status_SHIFT	16
+#define lpfc_mcqe_ext_status_MASK  	0x0000FFFF
+#define lpfc_mcqe_ext_status_WORD 	word0
+	uint32_t mcqe_tag0;
+	uint32_t mcqe_tag1;
+	uint32_t trailer;
+#define lpfc_trailer_valid_SHIFT	31
+#define lpfc_trailer_valid_MASK		0x00000001
+#define lpfc_trailer_valid_WORD		trailer
+#define lpfc_trailer_async_SHIFT	30
+#define lpfc_trailer_async_MASK		0x00000001
+#define lpfc_trailer_async_WORD		trailer
+#define lpfc_trailer_hpi_SHIFT		29
+#define lpfc_trailer_hpi_MASK		0x00000001
+#define lpfc_trailer_hpi_WORD		trailer
+#define lpfc_trailer_completed_SHIFT	28
+#define lpfc_trailer_completed_MASK	0x00000001
+#define lpfc_trailer_completed_WORD	trailer
+#define lpfc_trailer_consumed_SHIFT	27
+#define lpfc_trailer_consumed_MASK	0x00000001
+#define lpfc_trailer_consumed_WORD	trailer
+#define lpfc_trailer_type_SHIFT		16
+#define lpfc_trailer_type_MASK		0x000000FF
+#define lpfc_trailer_type_WORD		trailer
+#define lpfc_trailer_code_SHIFT		8
+#define lpfc_trailer_code_MASK		0x000000FF
+#define lpfc_trailer_code_WORD		trailer
+#define LPFC_TRAILER_CODE_LINK	0x1
+#define LPFC_TRAILER_CODE_FCOE	0x2
+#define LPFC_TRAILER_CODE_DCBX	0x3
+};
+
+struct lpfc_acqe_link {
+	uint32_t word0;
+#define lpfc_acqe_link_speed_SHIFT		24
+#define lpfc_acqe_link_speed_MASK		0x000000FF
+#define lpfc_acqe_link_speed_WORD		word0
+#define LPFC_ASYNC_LINK_SPEED_ZERO		0x0
+#define LPFC_ASYNC_LINK_SPEED_10MBPS		0x1
+#define LPFC_ASYNC_LINK_SPEED_100MBPS		0x2
+#define LPFC_ASYNC_LINK_SPEED_1GBPS		0x3
+#define LPFC_ASYNC_LINK_SPEED_10GBPS		0x4
+#define lpfc_acqe_link_duplex_SHIFT		16
+#define lpfc_acqe_link_duplex_MASK		0x000000FF
+#define lpfc_acqe_link_duplex_WORD		word0
+#define LPFC_ASYNC_LINK_DUPLEX_NONE		0x0
+#define LPFC_ASYNC_LINK_DUPLEX_HALF		0x1
+#define LPFC_ASYNC_LINK_DUPLEX_FULL		0x2
+#define lpfc_acqe_link_status_SHIFT		8
+#define lpfc_acqe_link_status_MASK		0x000000FF
+#define lpfc_acqe_link_status_WORD		word0
+#define LPFC_ASYNC_LINK_STATUS_DOWN		0x0
+#define LPFC_ASYNC_LINK_STATUS_UP		0x1
+#define LPFC_ASYNC_LINK_STATUS_LOGICAL_DOWN	0x2
+#define LPFC_ASYNC_LINK_STATUS_LOGICAL_UP	0x3
+#define lpfc_acqe_link_physical_SHIFT		0
+#define lpfc_acqe_link_physical_MASK		0x000000FF
+#define lpfc_acqe_link_physical_WORD		word0
+#define LPFC_ASYNC_LINK_PORT_A			0x0
+#define LPFC_ASYNC_LINK_PORT_B			0x1
+	uint32_t word1;
+#define lpfc_acqe_link_fault_SHIFT	0
+#define lpfc_acqe_link_fault_MASK	0x000000FF
+#define lpfc_acqe_link_fault_WORD	word1
+#define LPFC_ASYNC_LINK_FAULT_NONE	0x0
+#define LPFC_ASYNC_LINK_FAULT_LOCAL	0x1
+#define LPFC_ASYNC_LINK_FAULT_REMOTE	0x2
+	uint32_t event_tag;
+	uint32_t trailer;
+};
+
+struct lpfc_acqe_fcoe {
+	uint32_t fcf_index;
+	uint32_t word1;
+#define lpfc_acqe_fcoe_fcf_count_SHIFT		0
+#define lpfc_acqe_fcoe_fcf_count_MASK		0x0000FFFF
+#define lpfc_acqe_fcoe_fcf_count_WORD		word1
+#define lpfc_acqe_fcoe_event_type_SHIFT		16
+#define lpfc_acqe_fcoe_event_type_MASK		0x0000FFFF
+#define lpfc_acqe_fcoe_event_type_WORD		word1
+#define LPFC_FCOE_EVENT_TYPE_NEW_FCF		0x1
+#define LPFC_FCOE_EVENT_TYPE_FCF_TABLE_FULL	0x2
+#define LPFC_FCOE_EVENT_TYPE_FCF_DEAD		0x3
+	uint32_t event_tag;
+	uint32_t trailer;
+};
+
+struct lpfc_acqe_dcbx {
+	uint32_t tlv_ttl;
+	uint32_t reserved;
+	uint32_t event_tag;
+	uint32_t trailer;
+};
+
+/*
+ * Define the bootstrap mailbox (bmbx) region used to communicate
+ * mailbox command between the host and port. The mailbox consists
+ * of a payload area of 256 bytes and a completion queue of length
+ * 16 bytes.
+ */
+struct lpfc_bmbx_create {
+	struct lpfc_mqe mqe;
+	struct lpfc_mcqe mcqe;
+};
+
+#define SGL_ALIGN_SZ 64
+#define SGL_PAGE_SIZE 4096
+/* align SGL addr on a size boundary - adjust address up */
+#define NO_XRI ((uint16_t)-1)
+struct wqe_common {
+	uint32_t word6;
+#define wqe_xri_SHIFT         0
+#define wqe_xri_MASK          0x0000FFFF
+#define wqe_xri_WORD          word6
+#define wqe_ctxt_tag_SHIFT    16
+#define wqe_ctxt_tag_MASK     0x0000FFFF
+#define wqe_ctxt_tag_WORD     word6
+	uint32_t word7;
+#define wqe_ct_SHIFT          2
+#define wqe_ct_MASK           0x00000003
+#define wqe_ct_WORD           word7
+#define wqe_status_SHIFT      4
+#define wqe_status_MASK       0x0000000f
+#define wqe_status_WORD       word7
+#define wqe_cmnd_SHIFT        8
+#define wqe_cmnd_MASK         0x000000ff
+#define wqe_cmnd_WORD         word7
+#define wqe_class_SHIFT       16
+#define wqe_class_MASK        0x00000007
+#define wqe_class_WORD        word7
+#define wqe_pu_SHIFT          20
+#define wqe_pu_MASK           0x00000003
+#define wqe_pu_WORD           word7
+#define wqe_erp_SHIFT         22
+#define wqe_erp_MASK          0x00000001
+#define wqe_erp_WORD          word7
+#define wqe_lnk_SHIFT         23
+#define wqe_lnk_MASK          0x00000001
+#define wqe_lnk_WORD          word7
+#define wqe_tmo_SHIFT         24
+#define wqe_tmo_MASK          0x000000ff
+#define wqe_tmo_WORD          word7
+	uint32_t abort_tag; /* word 8 in WQE */
+	uint32_t word9;
+#define wqe_reqtag_SHIFT      0
+#define wqe_reqtag_MASK       0x0000FFFF
+#define wqe_reqtag_WORD       word9
+#define wqe_rcvoxid_SHIFT     16
+#define wqe_rcvoxid_MASK       0x0000FFFF
+#define wqe_rcvoxid_WORD       word9
+	uint32_t word10;
+#define wqe_pri_SHIFT         16
+#define wqe_pri_MASK          0x00000007
+#define wqe_pri_WORD          word10
+#define wqe_pv_SHIFT          19
+#define wqe_pv_MASK           0x00000001
+#define wqe_pv_WORD           word10
+#define wqe_xc_SHIFT          21
+#define wqe_xc_MASK           0x00000001
+#define wqe_xc_WORD           word10
+#define wqe_ccpe_SHIFT        23
+#define wqe_ccpe_MASK         0x00000001
+#define wqe_ccpe_WORD         word10
+#define wqe_ccp_SHIFT         24
+#define wqe_ccp_MASK         0x000000ff
+#define wqe_ccp_WORD         word10
+	uint32_t word11;
+#define wqe_cmd_type_SHIFT  0
+#define wqe_cmd_type_MASK   0x0000000f
+#define wqe_cmd_type_WORD   word11
+#define wqe_wqec_SHIFT      7
+#define wqe_wqec_MASK       0x00000001
+#define wqe_wqec_WORD       word11
+#define wqe_cqid_SHIFT      16
+#define wqe_cqid_MASK       0x000003ff
+#define wqe_cqid_WORD       word11
+};
+
+struct wqe_did {
+	uint32_t word5;
+#define wqe_els_did_SHIFT         0
+#define wqe_els_did_MASK          0x00FFFFFF
+#define wqe_els_did_WORD          word5
+#define wqe_xmit_bls_ar_SHIFT         30
+#define wqe_xmit_bls_ar_MASK          0x00000001
+#define wqe_xmit_bls_ar_WORD          word5
+#define wqe_xmit_bls_xo_SHIFT         31
+#define wqe_xmit_bls_xo_MASK          0x00000001
+#define wqe_xmit_bls_xo_WORD          word5
+};
+
+struct els_request64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_len;
+	uint32_t word4;
+#define els_req64_sid_SHIFT         0
+#define els_req64_sid_MASK          0x00FFFFFF
+#define els_req64_sid_WORD          word4
+#define els_req64_sp_SHIFT          24
+#define els_req64_sp_MASK           0x00000001
+#define els_req64_sp_WORD           word4
+#define els_req64_vf_SHIFT          25
+#define els_req64_vf_MASK           0x00000001
+#define els_req64_vf_WORD           word4
+	struct wqe_did	wqe_dest;
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t word12;
+#define els_req64_vfid_SHIFT        1
+#define els_req64_vfid_MASK         0x00000FFF
+#define els_req64_vfid_WORD         word12
+#define els_req64_pri_SHIFT         13
+#define els_req64_pri_MASK          0x00000007
+#define els_req64_pri_WORD          word12
+	uint32_t word13;
+#define els_req64_hopcnt_SHIFT      24
+#define els_req64_hopcnt_MASK       0x000000ff
+#define els_req64_hopcnt_WORD       word13
+	uint32_t reserved[2];
+};
+
+struct xmit_els_rsp64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t rsvd3;
+	uint32_t rsvd4;
+	struct wqe_did	wqe_dest;
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+
+struct xmit_bls_rsp64_wqe {
+	uint32_t payload0;
+	uint32_t word1;
+#define xmit_bls_rsp64_rxid_SHIFT  0
+#define xmit_bls_rsp64_rxid_MASK   0x0000ffff
+#define xmit_bls_rsp64_rxid_WORD   word1
+#define xmit_bls_rsp64_oxid_SHIFT  16
+#define xmit_bls_rsp64_oxid_MASK   0x0000ffff
+#define xmit_bls_rsp64_oxid_WORD   word1
+	uint32_t word2;
+#define xmit_bls_rsp64_seqcntlo_SHIFT  0
+#define xmit_bls_rsp64_seqcntlo_MASK   0x0000ffff
+#define xmit_bls_rsp64_seqcntlo_WORD   word2
+#define xmit_bls_rsp64_seqcnthi_SHIFT  16
+#define xmit_bls_rsp64_seqcnthi_MASK   0x0000ffff
+#define xmit_bls_rsp64_seqcnthi_WORD   word2
+	uint32_t rsrvd3;
+	uint32_t rsrvd4;
+	struct wqe_did	wqe_dest;
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+struct wqe_rctl_dfctl {
+	uint32_t word5;
+#define wqe_si_SHIFT 2
+#define wqe_si_MASK  0x000000001
+#define wqe_si_WORD  word5
+#define wqe_la_SHIFT 3
+#define wqe_la_MASK  0x000000001
+#define wqe_la_WORD  word5
+#define wqe_ls_SHIFT 7
+#define wqe_ls_MASK  0x000000001
+#define wqe_ls_WORD  word5
+#define wqe_dfctl_SHIFT 8
+#define wqe_dfctl_MASK  0x0000000ff
+#define wqe_dfctl_WORD  word5
+#define wqe_type_SHIFT 16
+#define wqe_type_MASK  0x0000000ff
+#define wqe_type_WORD  word5
+#define wqe_rctl_SHIFT 24
+#define wqe_rctl_MASK  0x0000000ff
+#define wqe_rctl_WORD  word5
+};
+
+struct xmit_seq64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t paylaod_offset;
+	uint32_t relative_offset;
+	struct wqe_rctl_dfctl wge_ctl;
+	struct wqe_common wqe_com; /* words 6-11 */
+	/* Note: word10 different REVISIT */
+	uint32_t xmit_len;
+	uint32_t rsvd_12_15[3];
+};
+struct xmit_bcast64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t paylaod_len;
+	uint32_t rsvd4;
+	struct wqe_rctl_dfctl wge_ctl; /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+
+struct gen_req64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t command_len;
+	uint32_t payload_len;
+	struct wqe_rctl_dfctl wge_ctl; /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];
+};
+
+struct create_xri_wqe {
+	uint32_t rsrvd[5];           /* words 0-4 */
+	struct wqe_did	wqe_dest;  /* word 5 */
+	struct wqe_common wqe_com; /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+#define T_REQUEST_TAG 3
+#define T_XRI_TAG 1
+
+struct abort_cmd_wqe {
+	uint32_t rsrvd[3];
+	uint32_t word3;
+#define	abort_cmd_ia_SHIFT  0
+#define	abort_cmd_ia_MASK  0x000000001
+#define	abort_cmd_ia_WORD  word3
+#define	abort_cmd_criteria_SHIFT  8
+#define	abort_cmd_criteria_MASK  0x0000000ff
+#define	abort_cmd_criteria_WORD  word3
+	uint32_t rsrvd4;
+	uint32_t rsrvd5;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_iwrite64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_len;
+	uint32_t total_xfer_len;
+	uint32_t initial_xfer_len;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_iread64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_len;          /* word 3 */
+	uint32_t total_xfer_len;       /* word 4 */
+	uint32_t rsrvd5;               /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_icmnd64_wqe {
+	struct ulp_bde64 bde;	 /* words 0-2 */
+	uint32_t rsrvd[3];             /* words 3-5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+
+union lpfc_wqe {
+	uint32_t words[16];
+	struct lpfc_wqe_generic generic;
+	struct fcp_icmnd64_wqe fcp_icmd;
+	struct fcp_iread64_wqe fcp_iread;
+	struct fcp_iwrite64_wqe fcp_iwrite;
+	struct abort_cmd_wqe abort_cmd;
+	struct create_xri_wqe create_xri;
+	struct xmit_bcast64_wqe xmit_bcast64;
+	struct xmit_seq64_wqe xmit_sequence;
+	struct xmit_bls_rsp64_wqe xmit_bls_rsp;
+	struct xmit_els_rsp64_wqe xmit_els_rsp;
+	struct els_request64_wqe els_req;
+	struct gen_req64_wqe gen_req;
+};
+
+#define FCP_COMMAND 0x0
+#define FCP_COMMAND_DATA_OUT 0x1
+#define ELS_COMMAND_NON_FIP 0xC
+#define ELS_COMMAND_FIP 0xD
+#define OTHER_COMMAND 0x8
+
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 3f06ce2..e9e4a1d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -34,8 +34,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -51,9 +53,23 @@ char *_dump_buf_dif;
 unsigned long _dump_buf_dif_order;
 spinlock_t _dump_buf_lock;
 
-static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int);
 static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
 static int lpfc_post_rcv_buf(struct lpfc_hba *);
+static int lpfc_sli4_queue_create(struct lpfc_hba *);
+static void lpfc_sli4_queue_destroy(struct lpfc_hba *);
+static int lpfc_create_bootstrap_mbox(struct lpfc_hba *);
+static int lpfc_setup_endian_order(struct lpfc_hba *);
+static int lpfc_sli4_read_config(struct lpfc_hba *);
+static void lpfc_destroy_bootstrap_mbox(struct lpfc_hba *);
+static void lpfc_free_sgl_list(struct lpfc_hba *);
+static int lpfc_init_sgl_list(struct lpfc_hba *);
+static int lpfc_init_active_sgl_array(struct lpfc_hba *);
+static void lpfc_free_active_sgl(struct lpfc_hba *);
+static int lpfc_hba_down_post_s3(struct lpfc_hba *phba);
+static int lpfc_hba_down_post_s4(struct lpfc_hba *phba);
+static int lpfc_sli4_cq_event_pool_create(struct lpfc_hba *);
+static void lpfc_sli4_cq_event_pool_destroy(struct lpfc_hba *);
+static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -646,6 +662,77 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba)
 
 	return 0;
 }
+/**
+ * lpfc_hba_down_post_s4 - Perform lpfc uninitialization after HBA reset
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine will do uninitialization after the HBA is reset when bring
+ * down the SLI Layer.
+ *
+ * Return codes
+ *   0 - sucess.
+ *   Any other value - error.
+ **/
+static int
+lpfc_hba_down_post_s4(struct lpfc_hba *phba)
+{
+	struct lpfc_scsi_buf *psb, *psb_next;
+	LIST_HEAD(aborts);
+	int ret;
+	unsigned long iflag = 0;
+	ret = lpfc_hba_down_post_s3(phba);
+	if (ret)
+		return ret;
+	/* At this point in time the HBA is either reset or DOA. Either
+	 * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be
+	 * on the lpfc_sgl_list so that it can either be freed if the
+	 * driver is unloading or reposted if the driver is restarting
+	 * the port.
+	 */
+	spin_lock_irq(&phba->hbalock);  /* required for lpfc_sgl_list and */
+					/* scsl_buf_list */
+	/* abts_sgl_list_lock required because worker thread uses this
+	 * list.
+	 */
+	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
+			&phba->sli4_hba.lpfc_sgl_list);
+	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+	/* abts_scsi_buf_list_lock required because worker thread uses this
+	 * list.
+	 */
+	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
+			&aborts);
+	spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	spin_unlock_irq(&phba->hbalock);
+
+	list_for_each_entry_safe(psb, psb_next, &aborts, list) {
+		psb->pCmd = NULL;
+		psb->status = IOSTAT_SUCCESS;
+	}
+	spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
+	list_splice(&aborts, &phba->lpfc_scsi_buf_list);
+	spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
+	return 0;
+}
+
+/**
+ * lpfc_hba_down_post - Wrapper func for hba down post routine
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine wraps the actual SLI3 or SLI4 routine for performing
+ * uninitialization after the HBA is reset when bring down the SLI Layer.
+ *
+ * Return codes
+ *   0 - sucess.
+ *   Any other value - error.
+ **/
+int
+lpfc_hba_down_post(struct lpfc_hba *phba)
+{
+	return (*phba->lpfc_hba_down_post)(phba);
+}
 
 /**
  * lpfc_hb_timeout - The HBA-timer timeout handler
@@ -853,6 +940,25 @@ lpfc_offline_eratt(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_sli4_offline_eratt - Bring lpfc offline on SLI4 hardware error attention
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is called to bring a SLI4 HBA offline when HBA hardware error
+ * other than Port Error 6 has been detected.
+ **/
+static void
+lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
+{
+	lpfc_offline_prep(phba);
+	lpfc_offline(phba);
+	lpfc_sli4_brdreset(phba);
+	lpfc_hba_down_post(phba);
+	lpfc_sli4_post_status_check(phba);
+	lpfc_unblock_mgmt_io(phba);
+	phba->link_state = LPFC_HBA_ERROR;
+}
+
+/**
  * lpfc_handle_deferred_eratt - The HBA hardware deferred error handler
  * @phba: pointer to lpfc hba data structure.
  *
@@ -1057,6 +1163,65 @@ lpfc_handle_eratt_s3(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_handle_eratt_s4 - The SLI4 HBA hardware error handler
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to handle the SLI4 HBA hardware error attention
+ * conditions.
+ **/
+static void
+lpfc_handle_eratt_s4(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	uint32_t event_data;
+	struct Scsi_Host *shost;
+
+	/* If the pci channel is offline, ignore possible errors, since
+	 * we cannot communicate with the pci card anyway.
+	 */
+	if (pci_channel_offline(phba->pcidev))
+		return;
+	/* If resets are disabled then leave the HBA alone and return */
+	if (!phba->cfg_enable_hba_reset)
+		return;
+
+	/* Send an internal error event to mgmt application */
+	lpfc_board_errevt_to_mgmt(phba);
+
+	/* For now, the actual action for SLI4 device handling is not
+	 * specified yet, just treated it as adaptor hardware failure
+	 */
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0143 SLI4 Adapter Hardware Error Data: x%x x%x\n",
+			phba->work_status[0], phba->work_status[1]);
+
+	event_data = FC_REG_DUMP_EVENT;
+	shost = lpfc_shost_from_vport(vport);
+	fc_host_post_vendor_event(shost, fc_get_event_number(),
+				  sizeof(event_data), (char *) &event_data,
+				  SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
+
+	lpfc_sli4_offline_eratt(phba);
+}
+
+/**
+ * lpfc_handle_eratt - Wrapper func for handling hba error attention
+ * @phba: pointer to lpfc HBA data structure.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba error attention handling
+ * routine from the API jump table function pointer from the lpfc_hba struct.
+ *
+ * Return codes
+ *   0 - sucess.
+ *   Any other value - error.
+ **/
+void
+lpfc_handle_eratt(struct lpfc_hba *phba)
+{
+	(*phba->lpfc_handle_eratt)(phba);
+}
+
+/**
  * lpfc_handle_latt - The HBA link event handler
  * @phba: pointer to lpfc hba data structure.
  *
@@ -1312,6 +1477,7 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp)
 	uint16_t dev_id = phba->pcidev->device;
 	int max_speed;
 	int GE = 0;
+	int oneConnect = 0; /* default is not a oneConnect */
 	struct {
 		char * name;
 		int    max_speed;
@@ -1457,6 +1623,14 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp)
 	case PCI_DEVICE_ID_PROTEUS_S:
 		m = (typeof(m)) {"LPemv12002-S", max_speed, "PCIe IOV"};
 		break;
+	case PCI_DEVICE_ID_TIGERSHARK:
+		oneConnect = 1;
+		m = (typeof(m)) {"OCe10100-F", max_speed, "PCIe"};
+		break;
+	case PCI_DEVICE_ID_TIGERSHARK_S:
+		oneConnect = 1;
+		m = (typeof(m)) {"OCe10100-F-S", max_speed, "PCIe"};
+		break;
 	default:
 		m = (typeof(m)){ NULL };
 		break;
@@ -1464,13 +1638,24 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp)
 
 	if (mdp && mdp[0] == '\0')
 		snprintf(mdp, 79,"%s", m.name);
-	if (descp && descp[0] == '\0')
-		snprintf(descp, 255,
-			"Emulex %s %d%s %s %s",
-			m.name, m.max_speed,
-			(GE) ? "GE" : "Gb",
-			m.bus,
-			(GE) ? "FCoE Adapter" : "Fibre Channel Adapter");
+	/* oneConnect hba requires special processing, they are all initiators
+	 * and we put the port number on the end
+	 */
+	if (descp && descp[0] == '\0') {
+		if (oneConnect)
+			snprintf(descp, 255,
+				"Emulex OneConnect %s, FCoE Initiator, Port %s",
+				m.name,
+				phba->Port);
+		else
+			snprintf(descp, 255,
+				"Emulex %s %d%s %s %s",
+				m.name, m.max_speed,
+				(GE) ? "GE" : "Gb",
+				m.bus,
+				(GE) ? "FCoE Adapter" :
+					"Fibre Channel Adapter");
+	}
 }
 
 /**
@@ -1911,14 +2096,21 @@ lpfc_online(struct lpfc_hba *phba)
 		return 1;
 	}
 
-	if (lpfc_sli_hba_setup(phba)) {	/* Initialize the HBA */
-		lpfc_unblock_mgmt_io(phba);
-		return 1;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (lpfc_sli4_hba_setup(phba)) { /* Initialize SLI4 HBA */
+			lpfc_unblock_mgmt_io(phba);
+			return 1;
+		}
+	} else {
+		if (lpfc_sli_hba_setup(phba)) {	/* Initialize SLI2/SLI3 HBA */
+			lpfc_unblock_mgmt_io(phba);
+			return 1;
+		}
 	}
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 			shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(shost->host_lock);
@@ -1980,11 +2172,12 @@ lpfc_offline_prep(struct lpfc_hba * phba)
 	/* Issue an unreg_login to all nodes on all vports */
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL) {
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 
 			if (vports[i]->load_flag & FC_UNLOADING)
 				continue;
+			vports[i]->vfi_state &= ~LPFC_VFI_REGISTERED;
 			shost =	lpfc_shost_from_vport(vports[i]);
 			list_for_each_entry_safe(ndlp, next_ndlp,
 						 &vports[i]->fc_nodes,
@@ -2029,11 +2222,11 @@ lpfc_offline(struct lpfc_hba *phba)
 	if (phba->pport->fc_flag & FC_OFFLINE_MODE)
 		return;
 
-	/* stop all timers associated with this hba */
-	lpfc_stop_phba_timers(phba);
+	/* stop port and all timers associated with this hba */
+	lpfc_stop_port(phba);
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++)
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++)
 			lpfc_stop_vport_timers(vports[i]);
 	lpfc_destroy_vport_work_array(phba, vports);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
@@ -2046,7 +2239,7 @@ lpfc_offline(struct lpfc_hba *phba)
 	spin_unlock_irq(&phba->hbalock);
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(shost->host_lock);
 			vports[i]->work_port_events = 0;
@@ -2139,6 +2332,10 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	shost->max_lun = vport->cfg_max_luns;
 	shost->this_id = -1;
 	shost->max_cmd_len = 16;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		shost->dma_boundary = LPFC_SLI4_MAX_SEGMENT_SIZE;
+		shost->sg_tablesize = phba->cfg_sg_seg_cnt;
+	}
 
 	/*
 	 * Set initial can_queue value since 0 is no longer supported and
@@ -2156,6 +2353,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 
 	/* Initialize all internally managed lists. */
 	INIT_LIST_HEAD(&vport->fc_nodes);
+	INIT_LIST_HEAD(&vport->rcv_buffer_list);
 	spin_lock_init(&vport->work_port_lock);
 
 	init_timer(&vport->fc_disctmo);
@@ -2347,192 +2545,501 @@ void lpfc_host_attrib_init(struct Scsi_Host *shost)
 }
 
 /**
- * lpfc_enable_msix - Enable MSI-X interrupt mode
+ * lpfc_stop_port_s3 - Stop SLI3 device port
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable the MSI-X interrupt vectors. The kernel
- * function pci_enable_msix() is called to enable the MSI-X vectors. Note that
- * pci_enable_msix(), once invoked, enables either all or nothing, depending
- * on the current availability of PCI vector resources. The device driver is
- * responsible for calling the individual request_irq() to register each MSI-X
- * vector with a interrupt handler, which is done in this function. Note that
- * later when device is unloading, the driver should always call free_irq()
- * on all MSI-X vectors it has done request_irq() on before calling
- * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
- * will be left with MSI-X enabled and leaks its vectors.
- *
- * Return codes
- *   0 - sucessful
- *   other values - error
+ * This routine is invoked to stop an SLI3 device port, it stops the device
+ * from generating interrupts and stops the device driver's timers for the
+ * device.
  **/
-static int
-lpfc_enable_msix(struct lpfc_hba *phba)
+static void
+lpfc_stop_port_s3(struct lpfc_hba *phba)
 {
-	int rc, i;
-	LPFC_MBOXQ_t *pmb;
+	/* Clear all interrupt enable conditions */
+	writel(0, phba->HCregaddr);
+	readl(phba->HCregaddr); /* flush */
+	/* Clear all pending interrupts */
+	writel(0xffffffff, phba->HAregaddr);
+	readl(phba->HAregaddr); /* flush */
 
-	/* Set up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		phba->msix_entries[i].entry = i;
+	/* Reset some HBA SLI setup states */
+	lpfc_stop_hba_timers(phba);
+	phba->pport->work_port_events = 0;
+}
 
-	/* Configure MSI-X capability structure */
-	rc = pci_enable_msix(phba->pcidev, phba->msix_entries,
-				ARRAY_SIZE(phba->msix_entries));
-	if (rc) {
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0420 PCI enable MSI-X failed (%d)\n", rc);
-		goto msi_fail_out;
-	} else
-		for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"0477 MSI-X entry[%d]: vector=x%x "
-					"message=%d\n", i,
-					phba->msix_entries[i].vector,
-					phba->msix_entries[i].entry);
-	/*
-	 * Assign MSI-X vectors to interrupt handlers
-	 */
+/**
+ * lpfc_stop_port_s4 - Stop SLI4 device port
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to stop an SLI4 device port, it stops the device
+ * from generating interrupts and stops the device driver's timers for the
+ * device.
+ **/
+static void
+lpfc_stop_port_s4(struct lpfc_hba *phba)
+{
+	/* Reset some HBA SLI4 setup states */
+	lpfc_stop_hba_timers(phba);
+	phba->pport->work_port_events = 0;
+	phba->sli4_hba.intr_enable = 0;
+	/* Hard clear it for now, shall have more graceful way to wait later */
+	phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+}
 
-	/* vector-0 is associated to slow-path handler */
-	rc = request_irq(phba->msix_entries[0].vector, &lpfc_sp_intr_handler,
-			 IRQF_SHARED, LPFC_SP_DRIVER_HANDLER_NAME, phba);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0421 MSI-X slow-path request_irq failed "
-				"(%d)\n", rc);
-		goto msi_fail_out;
-	}
+/**
+ * lpfc_stop_port - Wrapper function for stopping hba port
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba stop port routine from
+ * the API jump table function pointer from the lpfc_hba struct.
+ **/
+void
+lpfc_stop_port(struct lpfc_hba *phba)
+{
+	phba->lpfc_stop_port(phba);
+}
 
-	/* vector-1 is associated to fast-path handler */
-	rc = request_irq(phba->msix_entries[1].vector, &lpfc_fp_intr_handler,
-			 IRQF_SHARED, LPFC_FP_DRIVER_HANDLER_NAME, phba);
+/**
+ * lpfc_sli4_remove_dflt_fcf - Remove the driver default fcf record from the port.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove the driver default fcf record from
+ * the port.  This routine currently acts on FCF Index 0.
+ *
+ **/
+void
+lpfc_sli_remove_dflt_fcf(struct lpfc_hba *phba)
+{
+	int rc = 0;
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mbx_del_fcf_tbl_entry *del_fcf_record;
+	uint32_t mbox_tmo, req_len;
+	uint32_t shdr_status, shdr_add_status;
 
-	if (rc) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0429 MSI-X fast-path request_irq failed "
-				"(%d)\n", rc);
-		goto irq_fail_out;
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2020 Failed to allocate mbox for ADD_FCF cmd\n");
+		return;
 	}
 
+	req_len = sizeof(struct lpfc_mbx_del_fcf_tbl_entry) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr);
+	rc = lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+			      LPFC_MBOX_OPCODE_FCOE_DELETE_FCF,
+			      req_len, LPFC_SLI4_MBX_EMBED);
 	/*
-	 * Configure HBA MSI-X attention conditions to messages
+	 * In phase 1, there is a single FCF index, 0.  In phase2, the driver
+	 * supports multiple FCF indices.
 	 */
-	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	del_fcf_record = &mboxq->u.mqe.un.del_fcf_entry;
+	bf_set(lpfc_mbx_del_fcf_tbl_count, del_fcf_record, 1);
+	bf_set(lpfc_mbx_del_fcf_tbl_index, del_fcf_record,
+	       phba->fcf.fcf_indx);
 
-	if (!pmb) {
-		rc = -ENOMEM;
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0474 Unable to allocate memory for issuing "
-				"MBOX_CONFIG_MSI command\n");
-		goto mem_fail_out;
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
 	}
-	rc = lpfc_config_msi(phba, pmb);
-	if (rc)
-		goto mbx_fail_out;
-	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
-	if (rc != MBX_SUCCESS) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
-				"0351 Config MSI mailbox command failed, "
-				"mbxCmd x%x, mbxStatus x%x\n",
-				pmb->mb.mbxCommand, pmb->mb.mbxStatus);
-		goto mbx_fail_out;
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr_status = bf_get(lpfc_mbox_hdr_status,
+			     &del_fcf_record->header.cfg_shdr.response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+				 &del_fcf_record->header.cfg_shdr.response);
+	if (shdr_status || shdr_add_status || rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2516 DEL FCF of default FCF Index failed "
+				"mbx status x%x, status x%x add_status x%x\n",
+				rc, shdr_status, shdr_add_status);
 	}
-
-	/* Free memory allocated for mailbox command */
-	mempool_free(pmb, phba->mbox_mem_pool);
-	return rc;
-
-mbx_fail_out:
-	/* Free memory allocated for mailbox command */
-	mempool_free(pmb, phba->mbox_mem_pool);
-
-mem_fail_out:
-	/* free the irq already requested */
-	free_irq(phba->msix_entries[1].vector, phba);
-
-irq_fail_out:
-	/* free the irq already requested */
-	free_irq(phba->msix_entries[0].vector, phba);
-
-msi_fail_out:
-	/* Unconfigure MSI-X capability structure */
-	pci_disable_msix(phba->pcidev);
-	return rc;
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
 }
 
 /**
- * lpfc_disable_msix - Disable MSI-X interrupt mode
+ * lpfc_sli4_parse_latt_fault - Parse sli4 link-attention link fault code
  * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
  *
- * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode.
+ * This routine is to parse the SLI4 link-attention link fault code and
+ * translate it into the base driver's read link attention mailbox command
+ * status.
+ *
+ * Return: Link-attention status in terms of base driver's coding.
  **/
-static void
-lpfc_disable_msix(struct lpfc_hba *phba)
+static uint16_t
+lpfc_sli4_parse_latt_fault(struct lpfc_hba *phba,
+			   struct lpfc_acqe_link *acqe_link)
 {
-	int i;
+	uint16_t latt_fault;
 
-	/* Free up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		free_irq(phba->msix_entries[i].vector, phba);
-	/* Disable MSI-X */
-	pci_disable_msix(phba->pcidev);
+	switch (bf_get(lpfc_acqe_link_fault, acqe_link)) {
+	case LPFC_ASYNC_LINK_FAULT_NONE:
+	case LPFC_ASYNC_LINK_FAULT_LOCAL:
+	case LPFC_ASYNC_LINK_FAULT_REMOTE:
+		latt_fault = 0;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0398 Invalid link fault code: x%x\n",
+				bf_get(lpfc_acqe_link_fault, acqe_link));
+		latt_fault = MBXERR_ERROR;
+		break;
+	}
+	return latt_fault;
 }
 
 /**
- * lpfc_enable_msi - Enable MSI interrupt mode
+ * lpfc_sli4_parse_latt_type - Parse sli4 link attention type
  * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
  *
- * This routine is invoked to enable the MSI interrupt mode. The kernel
- * function pci_enable_msi() is called to enable the MSI vector. The
- * device driver is responsible for calling the request_irq() to register
- * MSI vector with a interrupt the handler, which is done in this function.
+ * This routine is to parse the SLI4 link attention type and translate it
+ * into the base driver's link attention type coding.
  *
- * Return codes
- * 	0 - sucessful
- * 	other values - error
- */
-static int
-lpfc_enable_msi(struct lpfc_hba *phba)
+ * Return: Link attention type in terms of base driver's coding.
+ **/
+static uint8_t
+lpfc_sli4_parse_latt_type(struct lpfc_hba *phba,
+			  struct lpfc_acqe_link *acqe_link)
 {
-	int rc;
-
-	rc = pci_enable_msi(phba->pcidev);
-	if (!rc)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0462 PCI enable MSI mode success.\n");
-	else {
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0471 PCI enable MSI mode failed (%d)\n", rc);
-		return rc;
-	}
+	uint8_t att_type;
 
-	rc = request_irq(phba->pcidev->irq, lpfc_intr_handler,
-			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
-	if (rc) {
-		pci_disable_msi(phba->pcidev);
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0478 MSI request_irq failed (%d)\n", rc);
+	switch (bf_get(lpfc_acqe_link_status, acqe_link)) {
+	case LPFC_ASYNC_LINK_STATUS_DOWN:
+	case LPFC_ASYNC_LINK_STATUS_LOGICAL_DOWN:
+		att_type = AT_LINK_DOWN;
+		break;
+	case LPFC_ASYNC_LINK_STATUS_UP:
+		/* Ignore physical link up events - wait for logical link up */
+		att_type = AT_RESERVED;
+		break;
+	case LPFC_ASYNC_LINK_STATUS_LOGICAL_UP:
+		att_type = AT_LINK_UP;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0399 Invalid link attention type: x%x\n",
+				bf_get(lpfc_acqe_link_status, acqe_link));
+		att_type = AT_RESERVED;
+		break;
 	}
-	return rc;
+	return att_type;
 }
 
 /**
- * lpfc_disable_msi - Disable MSI interrupt mode
+ * lpfc_sli4_parse_latt_link_speed - Parse sli4 link-attention link speed
  * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
  *
- * This routine is invoked to disable the MSI interrupt mode. The driver
- * calls free_irq() on MSI vector it has done request_irq() on before
- * calling pci_disable_msi(). Failure to do so results in a BUG_ON() and
- * a device will be left with MSI enabled and leaks its vector.
- */
-
-static void
-lpfc_disable_msi(struct lpfc_hba *phba)
+ * This routine is to parse the SLI4 link-attention link speed and translate
+ * it into the base driver's link-attention link speed coding.
+ *
+ * Return: Link-attention link speed in terms of base driver's coding.
+ **/
+static uint8_t
+lpfc_sli4_parse_latt_link_speed(struct lpfc_hba *phba,
+				struct lpfc_acqe_link *acqe_link)
 {
-	free_irq(phba->pcidev->irq, phba);
-	pci_disable_msi(phba->pcidev);
+	uint8_t link_speed;
+
+	switch (bf_get(lpfc_acqe_link_speed, acqe_link)) {
+	case LPFC_ASYNC_LINK_SPEED_ZERO:
+		link_speed = LA_UNKNW_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_10MBPS:
+		link_speed = LA_UNKNW_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_100MBPS:
+		link_speed = LA_UNKNW_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_1GBPS:
+		link_speed = LA_1GHZ_LINK;
+		break;
+	case LPFC_ASYNC_LINK_SPEED_10GBPS:
+		link_speed = LA_10GHZ_LINK;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0483 Invalid link-attention link speed: x%x\n",
+				bf_get(lpfc_acqe_link_speed, acqe_link));
+		link_speed = LA_UNKNW_LINK;
+		break;
+	}
+	return link_speed;
+}
+
+/**
+ * lpfc_sli4_async_link_evt - Process the asynchronous link event
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async link completion queue entry.
+ *
+ * This routine is to handle the SLI4 asynchronous link event.
+ **/
+static void
+lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
+			 struct lpfc_acqe_link *acqe_link)
+{
+	struct lpfc_dmabuf *mp;
+	LPFC_MBOXQ_t *pmb;
+	MAILBOX_t *mb;
+	READ_LA_VAR *la;
+	uint8_t att_type;
+
+	att_type = lpfc_sli4_parse_latt_type(phba, acqe_link);
+	if (att_type != AT_LINK_DOWN && att_type != AT_LINK_UP)
+		return;
+	pmb = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmb) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0395 The mboxq allocation failed\n");
+		return;
+	}
+	mp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!mp) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0396 The lpfc_dmabuf allocation failed\n");
+		goto out_free_pmb;
+	}
+	mp->virt = lpfc_mbuf_alloc(phba, 0, &mp->phys);
+	if (!mp->virt) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0397 The mbuf allocation failed\n");
+		goto out_free_dmabuf;
+	}
+
+	/* Cleanup any outstanding ELS commands */
+	lpfc_els_flush_all_cmd(phba);
+
+	/* Block ELS IOCBs until we have done process link event */
+	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+
+	/* Update link event statistics */
+	phba->sli.slistat.link_event++;
+
+	/* Create pseudo lpfc_handle_latt mailbox command from link ACQE */
+	lpfc_read_la(phba, pmb, mp);
+	pmb->vport = phba->pport;
+
+	/* Parse and translate status field */
+	mb = &pmb->u.mb;
+	mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba, acqe_link);
+
+	/* Parse and translate link attention fields */
+	la = (READ_LA_VAR *) &pmb->u.mb.un.varReadLA;
+	la->eventTag = acqe_link->event_tag;
+	la->attType = att_type;
+	la->UlnkSpeed = lpfc_sli4_parse_latt_link_speed(phba, acqe_link);
+
+	/* Fake the the following irrelvant fields */
+	la->topology = TOPOLOGY_PT_PT;
+	la->granted_AL_PA = 0;
+	la->il = 0;
+	la->pb = 0;
+	la->fa = 0;
+	la->mm = 0;
+
+	/* Keep the link status for extra SLI4 state machine reference */
+	phba->sli4_hba.link_state.speed =
+				bf_get(lpfc_acqe_link_speed, acqe_link);
+	phba->sli4_hba.link_state.duplex =
+				bf_get(lpfc_acqe_link_duplex, acqe_link);
+	phba->sli4_hba.link_state.status =
+				bf_get(lpfc_acqe_link_status, acqe_link);
+	phba->sli4_hba.link_state.physical =
+				bf_get(lpfc_acqe_link_physical, acqe_link);
+	phba->sli4_hba.link_state.fault =
+				bf_get(lpfc_acqe_link_fault, acqe_link);
+
+	/* Invoke the lpfc_handle_latt mailbox command callback function */
+	lpfc_mbx_cmpl_read_la(phba, pmb);
+
 	return;
+
+out_free_dmabuf:
+	kfree(mp);
+out_free_pmb:
+	mempool_free(pmb, phba->mbox_mem_pool);
+}
+
+/**
+ * lpfc_sli4_async_fcoe_evt - Process the asynchronous fcoe event
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async fcoe completion queue entry.
+ *
+ * This routine is to handle the SLI4 asynchronous fcoe event.
+ **/
+static void
+lpfc_sli4_async_fcoe_evt(struct lpfc_hba *phba,
+			 struct lpfc_acqe_fcoe *acqe_fcoe)
+{
+	uint8_t event_type = bf_get(lpfc_acqe_fcoe_event_type, acqe_fcoe);
+	int rc;
+
+	switch (event_type) {
+	case LPFC_FCOE_EVENT_TYPE_NEW_FCF:
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+			"2546 New FCF found index 0x%x tag 0x%x \n",
+			acqe_fcoe->fcf_index,
+			acqe_fcoe->event_tag);
+		/*
+		 * If the current FCF is in discovered state,
+		 * do nothing.
+		 */
+		spin_lock_irq(&phba->hbalock);
+		if (phba->fcf.fcf_flag & FCF_DISCOVERED) {
+			spin_unlock_irq(&phba->hbalock);
+			break;
+		}
+		spin_unlock_irq(&phba->hbalock);
+
+		/* Read the FCF table and re-discover SAN. */
+		rc = lpfc_sli4_read_fcf_record(phba,
+			LPFC_FCOE_FCF_GET_FIRST);
+		if (rc)
+			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+				"2547 Read FCF record failed 0x%x\n",
+				rc);
+		break;
+
+	case LPFC_FCOE_EVENT_TYPE_FCF_TABLE_FULL:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2548 FCF Table full count 0x%x tag 0x%x \n",
+			bf_get(lpfc_acqe_fcoe_fcf_count, acqe_fcoe),
+			acqe_fcoe->event_tag);
+		break;
+
+	case LPFC_FCOE_EVENT_TYPE_FCF_DEAD:
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+			"2549 FCF disconnected fron network index 0x%x"
+			" tag 0x%x \n", acqe_fcoe->fcf_index,
+			acqe_fcoe->event_tag);
+		/* If the event is not for currently used fcf do nothing */
+		if (phba->fcf.fcf_indx != acqe_fcoe->fcf_index)
+			break;
+		/*
+		 * Currently, driver support only one FCF - so treat this as
+		 * a link down.
+		 */
+		lpfc_linkdown(phba);
+		/* Unregister FCF if no devices connected to it */
+		lpfc_unregister_unused_fcf(phba);
+		break;
+
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"0288 Unknown FCoE event type 0x%x event tag "
+			"0x%x\n", event_type, acqe_fcoe->event_tag);
+		break;
+	}
+}
+
+/**
+ * lpfc_sli4_async_dcbx_evt - Process the asynchronous dcbx event
+ * @phba: pointer to lpfc hba data structure.
+ * @acqe_link: pointer to the async dcbx completion queue entry.
+ *
+ * This routine is to handle the SLI4 asynchronous dcbx event.
+ **/
+static void
+lpfc_sli4_async_dcbx_evt(struct lpfc_hba *phba,
+			 struct lpfc_acqe_dcbx *acqe_dcbx)
+{
+	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"0290 The SLI4 DCBX asynchronous event is not "
+			"handled yet\n");
+}
+
+/**
+ * lpfc_sli4_async_event_proc - Process all the pending asynchronous event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 asynchronous events.
+ **/
+void lpfc_sli4_async_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the async event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~ASYNC_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the async events */
+	while (!list_empty(&phba->sli4_hba.sp_asynce_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_asynce_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Process the asynchronous event */
+		switch (bf_get(lpfc_trailer_code, &cq_event->cqe.mcqe_cmpl)) {
+		case LPFC_TRAILER_CODE_LINK:
+			lpfc_sli4_async_link_evt(phba,
+						 &cq_event->cqe.acqe_link);
+			break;
+		case LPFC_TRAILER_CODE_FCOE:
+			lpfc_sli4_async_fcoe_evt(phba,
+						 &cq_event->cqe.acqe_fcoe);
+			break;
+		case LPFC_TRAILER_CODE_DCBX:
+			lpfc_sli4_async_dcbx_evt(phba,
+						 &cq_event->cqe.acqe_dcbx);
+			break;
+		default:
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"1804 Invalid asynchrous event code: "
+					"x%x\n", bf_get(lpfc_trailer_code,
+					&cq_event->cqe.mcqe_cmpl));
+			break;
+		}
+		/* Free the completion event processed to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
+/**
+ * lpfc_api_table_setup - Set up per hba pci-device group func api jump table
+ * @phba: pointer to lpfc hba data structure.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine is invoked to set up the per HBA PCI-Device group function
+ * API jump table entries.
+ *
+ * Return: 0 if success, otherwise -ENODEV
+ **/
+int
+lpfc_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+	int rc;
+
+	/* Set up lpfc PCI-device group */
+	phba->pci_dev_grp = dev_grp;
+
+	/* The LPFC_PCI_DEV_OC uses SLI4 */
+	if (dev_grp == LPFC_PCI_DEV_OC)
+		phba->sli_rev = LPFC_SLI_REV4;
+
+	/* Set up device INIT API function jump table */
+	rc = lpfc_init_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+	/* Set up SCSI API function jump table */
+	rc = lpfc_scsi_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+	/* Set up SLI API function jump table */
+	rc = lpfc_sli_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+	/* Set up MBOX API function jump table */
+	rc = lpfc_mbox_api_table_setup(phba, dev_grp);
+	if (rc)
+		return -ENODEV;
+
+	return 0;
 }
 
 /**
@@ -2764,102 +3271,393 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_init_api_table_setup - Set up init api fucntion jump table
- * @phba: The hba struct for which this call is being executed.
- * @dev_grp: The HBA PCI-Device group number.
- *
- * This routine sets up the device INIT interface API function jump table
- * in @phba struct.
- *
- * Returns: 0 - success, -ENODEV - failure.
- **/
-int
-lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
-{
-	switch (dev_grp) {
-	case LPFC_PCI_DEV_LP:
-		phba->lpfc_hba_down_post = lpfc_hba_down_post_s3;
-		phba->lpfc_handle_eratt = lpfc_handle_eratt_s3;
-		phba->lpfc_stop_port = lpfc_stop_port_s3;
-		break;
-	default:
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1431 Invalid HBA PCI-device group: 0x%x\n",
-				dev_grp);
-		return -ENODEV;
-		break;
-	}
-	return 0;
-}
-
-/**
- * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
+ * lpfc_sli4_driver_resource_setup - Setup drvr internal resources for SLI4 dev
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to set up the driver internal resources before the
- * device specific resource setup to support the HBA device it attached to.
+ * This routine is invoked to set up the driver internal resources specific to
+ * support the SLI-4 HBA device it attached to.
  *
  * Return codes
- *	0 - sucessful
- *	other values - error
+ * 	0 - sucessful
+ * 	other values - error
  **/
 static int
-lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
+lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 {
+	struct lpfc_sli *psli;
+	int rc;
+	int i, hbq_count;
+
+	/* Before proceed, wait for POST done and device ready */
+	rc = lpfc_sli4_post_status_check(phba);
+	if (rc)
+		return -ENODEV;
+
 	/*
-	 * Driver resources common to all SLI revisions
+	 * Initialize timers used by driver
 	 */
-	atomic_set(&phba->fast_event_count, 0);
-	spin_lock_init(&phba->hbalock);
 
-	/* Initialize ndlp management spinlock */
-	spin_lock_init(&phba->ndlp_lock);
+	/* Heartbeat timer */
+	init_timer(&phba->hb_tmofunc);
+	phba->hb_tmofunc.function = lpfc_hb_timeout;
+	phba->hb_tmofunc.data = (unsigned long)phba;
 
-	INIT_LIST_HEAD(&phba->port_list);
-	INIT_LIST_HEAD(&phba->work_list);
-	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+	psli = &phba->sli;
+	/* MBOX heartbeat timer */
+	init_timer(&psli->mbox_tmo);
+	psli->mbox_tmo.function = lpfc_mbox_timeout;
+	psli->mbox_tmo.data = (unsigned long) phba;
+	/* Fabric block timer */
+	init_timer(&phba->fabric_block_timer);
+	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
+	phba->fabric_block_timer.data = (unsigned long) phba;
+	/* EA polling mode timer */
+	init_timer(&phba->eratt_poll);
+	phba->eratt_poll.function = lpfc_poll_eratt;
+	phba->eratt_poll.data = (unsigned long) phba;
+	/*
+	 * We need to do a READ_CONFIG mailbox command here before
+	 * calling lpfc_get_cfgparam. For VFs this will report the
+	 * MAX_XRI, MAX_VPI, MAX_RPI, MAX_IOCB, and MAX_VFI settings.
+	 * All of the resources allocated
+	 * for this Port are tied to these values.
+	 */
+	/* Get all the module params for configuring this host */
+	lpfc_get_cfgparam(phba);
+	phba->max_vpi = LPFC_MAX_VPI;
+	/* This will be set to correct value after the read_config mbox */
+	phba->max_vports = 0;
 
-	/* Initialize the wait queue head for the kernel thread */
-	init_waitqueue_head(&phba->work_waitq);
+	/* Program the default value of vlan_id and fc_map */
+	phba->valid_vlan = 0;
+	phba->fc_map[0] = LPFC_FCOE_FCF_MAP0;
+	phba->fc_map[1] = LPFC_FCOE_FCF_MAP1;
+	phba->fc_map[2] = LPFC_FCOE_FCF_MAP2;
 
-	/* Initialize the scsi buffer list used by driver for scsi IO */
-	spin_lock_init(&phba->scsi_buf_list_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+	/*
+	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+	 * used to create the sg_dma_buf_pool must be dynamically calculated.
+	 * 2 segments are added since the IOCB needs a command and response bde.
+	 * To insure that the scsi sgl does not cross a 4k page boundary only
+	 * sgl sizes of 1k, 2k, 4k, and 8k are supported.
+	 * Table of sgl sizes and seg_cnt:
+	 * sgl size, 	sg_seg_cnt	total seg
+	 * 1k		50		52
+	 * 2k		114		116
+	 * 4k		242		244
+	 * 8k		498		500
+	 * cmd(32) + rsp(160) + (52 * sizeof(sli4_sge)) = 1024
+	 * cmd(32) + rsp(160) + (116 * sizeof(sli4_sge)) = 2048
+	 * cmd(32) + rsp(160) + (244 * sizeof(sli4_sge)) = 4096
+	 * cmd(32) + rsp(160) + (500 * sizeof(sli4_sge)) = 8192
+	 */
+	if (phba->cfg_sg_seg_cnt <= LPFC_DEFAULT_SG_SEG_CNT)
+		phba->cfg_sg_seg_cnt = 50;
+	else if (phba->cfg_sg_seg_cnt <= 114)
+		phba->cfg_sg_seg_cnt = 114;
+	else if (phba->cfg_sg_seg_cnt <= 242)
+		phba->cfg_sg_seg_cnt = 242;
+	else
+		phba->cfg_sg_seg_cnt = 498;
 
-	/* Initialize the fabric iocb list */
-	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd)
+					+ sizeof(struct fcp_rsp);
+	phba->cfg_sg_dma_buf_size +=
+		((phba->cfg_sg_seg_cnt + 2) * sizeof(struct sli4_sge));
 
-	/* Initialize list to save ELS buffers */
-	INIT_LIST_HEAD(&phba->elsbuf);
+	/* Initialize buffer queue management fields */
+	hbq_count = lpfc_sli_hbq_count();
+	for (i = 0; i < hbq_count; ++i)
+		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
+	INIT_LIST_HEAD(&phba->rb_pend_list);
+	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
+	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer = lpfc_sli4_rb_free;
 
-	/* Initialize FCF connection rec list */
-	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
+	/*
+	 * Initialize the SLI Layer to run with lpfc SLI4 HBAs.
+	 */
+	/* Initialize the Abort scsi buffer list used by driver */
+	spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	/* This abort list used by worker thread */
+	spin_lock_init(&phba->sli4_hba.abts_sgl_list_lock);
 
-	return 0;
-}
+	/*
+	 * Initialize dirver internal slow-path work queues
+	 */
 
-/**
- * lpfc_setup_driver_resource_phase2 - Phase2 setup driver internal resources.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to set up the driver internal resources after the
- * device specific resource setup to support the HBA device it attached to.
- *
- * Return codes
- * 	0 - sucessful
- * 	other values - error
- **/
-static int
-lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
-{
-	int error;
+	/* Driver internel slow-path CQ Event pool */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_cqe_event_pool);
+	/* Response IOCB work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_rspiocb_work_queue);
+	/* Asynchronous event CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_asynce_work_queue);
+	/* Fast-path XRI aborted CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
+	/* Slow-path XRI aborted CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_els_xri_aborted_work_queue);
+	/* Receive queue CQ Event work queue list */
+	INIT_LIST_HEAD(&phba->sli4_hba.sp_unsol_work_queue);
+
+	/* Initialize the driver internal SLI layer lists. */
+	lpfc_sli_setup(phba);
+	lpfc_sli_queue_setup(phba);
 
-	/* Startup the kernel thread for this host adapter. */
-	phba->worker_thread = kthread_run(lpfc_do_work, phba,
-					  "lpfc_worker_%d", phba->brd_no);
-	if (IS_ERR(phba->worker_thread)) {
-		error = PTR_ERR(phba->worker_thread);
-		return error;
+	/* Allocate device driver memory */
+	rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ);
+	if (rc)
+		return -ENOMEM;
+
+	/* Create the bootstrap mailbox command */
+	rc = lpfc_create_bootstrap_mbox(phba);
+	if (unlikely(rc))
+		goto out_free_mem;
+
+	/* Set up the host's endian order with the device. */
+	rc = lpfc_setup_endian_order(phba);
+	if (unlikely(rc))
+		goto out_free_bsmbx;
+
+	/* Set up the hba's configuration parameters. */
+	rc = lpfc_sli4_read_config(phba);
+	if (unlikely(rc))
+		goto out_free_bsmbx;
+
+	/* Perform a function reset */
+	rc = lpfc_pci_function_reset(phba);
+	if (unlikely(rc))
+		goto out_free_bsmbx;
+
+	/* Create all the SLI4 queues */
+	rc = lpfc_sli4_queue_create(phba);
+	if (rc)
+		goto out_free_bsmbx;
+
+	/* Create driver internal CQE event pool */
+	rc = lpfc_sli4_cq_event_pool_create(phba);
+	if (rc)
+		goto out_destroy_queue;
+
+	/* Initialize and populate the iocb list per host */
+	rc = lpfc_init_sgl_list(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1400 Failed to initialize sgl list.\n");
+		goto out_destroy_cq_event_pool;
+	}
+	rc = lpfc_init_active_sgl_array(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1430 Failed to initialize sgl list.\n");
+		goto out_free_sgl_list;
+	}
+
+	rc = lpfc_sli4_init_rpi_hdrs(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1432 Failed to initialize rpi headers.\n");
+		goto out_free_active_sgl;
+	}
+
+	phba->sli4_hba.fcp_eq_hdl = kzalloc((sizeof(struct lpfc_fcp_eq_hdl) *
+				    phba->cfg_fcp_eq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fcp_eq_hdl) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2572 Failed allocate memory for fast-path "
+				"per-EQ handle array\n");
+		goto out_remove_rpi_hdrs;
+	}
+
+	phba->sli4_hba.msix_entries = kzalloc((sizeof(struct msix_entry) *
+				      phba->sli4_hba.cfg_eqn), GFP_KERNEL);
+	if (!phba->sli4_hba.msix_entries) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2573 Failed allocate memory for msi-x "
+				"interrupt vector entries\n");
+		goto out_free_fcp_eq_hdl;
+	}
+
+	return rc;
+
+out_free_fcp_eq_hdl:
+	kfree(phba->sli4_hba.fcp_eq_hdl);
+out_remove_rpi_hdrs:
+	lpfc_sli4_remove_rpi_hdrs(phba);
+out_free_active_sgl:
+	lpfc_free_active_sgl(phba);
+out_free_sgl_list:
+	lpfc_free_sgl_list(phba);
+out_destroy_cq_event_pool:
+	lpfc_sli4_cq_event_pool_destroy(phba);
+out_destroy_queue:
+	lpfc_sli4_queue_destroy(phba);
+out_free_bsmbx:
+	lpfc_destroy_bootstrap_mbox(phba);
+out_free_mem:
+	lpfc_mem_free(phba);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_driver_resource_unset - Unset drvr internal resources for SLI4 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the driver internal resources set up
+ * specific for supporting the SLI-4 HBA device it attached to.
+ **/
+static void
+lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
+{
+	struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
+
+	/* unregister default FCFI from the HBA */
+	lpfc_sli4_fcfi_unreg(phba, phba->fcf.fcfi);
+
+	/* Free the default FCR table */
+	lpfc_sli_remove_dflt_fcf(phba);
+
+	/* Free memory allocated for msi-x interrupt vector entries */
+	kfree(phba->sli4_hba.msix_entries);
+
+	/* Free memory allocated for fast-path work queue handles */
+	kfree(phba->sli4_hba.fcp_eq_hdl);
+
+	/* Free the allocated rpi headers. */
+	lpfc_sli4_remove_rpi_hdrs(phba);
+
+	/* Free the ELS sgl list */
+	lpfc_free_active_sgl(phba);
+	lpfc_free_sgl_list(phba);
+
+	/* Free the SCSI sgl management array */
+	kfree(phba->sli4_hba.lpfc_scsi_psb_array);
+
+	/* Free the SLI4 queues */
+	lpfc_sli4_queue_destroy(phba);
+
+	/* Free the completion queue EQ event pool */
+	lpfc_sli4_cq_event_release_all(phba);
+	lpfc_sli4_cq_event_pool_destroy(phba);
+
+	/* Reset SLI4 HBA FCoE function */
+	lpfc_pci_function_reset(phba);
+
+	/* Free the bsmbx region. */
+	lpfc_destroy_bootstrap_mbox(phba);
+
+	/* Free the SLI Layer memory with SLI4 HBAs */
+	lpfc_mem_free_all(phba);
+
+	/* Free the current connect table */
+	list_for_each_entry_safe(conn_entry, next_conn_entry,
+		&phba->fcf_conn_rec_list, list)
+		kfree(conn_entry);
+
+	return;
+}
+
+/**
+ * lpfc_init_api_table_setup - Set up init api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the device INIT interface API function jump table
+ * in @phba struct.
+ *
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_hba_down_post = lpfc_hba_down_post_s3;
+		phba->lpfc_handle_eratt = lpfc_handle_eratt_s3;
+		phba->lpfc_stop_port = lpfc_stop_port_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->lpfc_hba_down_post = lpfc_hba_down_post_s4;
+		phba->lpfc_handle_eratt = lpfc_handle_eratt_s4;
+		phba->lpfc_stop_port = lpfc_stop_port_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1431 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources before the
+ * device specific resource setup to support the HBA device it attached to.
+ *
+ * Return codes
+ *	0 - sucessful
+ *	other values - error
+ **/
+static int
+lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
+{
+	/*
+	 * Driver resources common to all SLI revisions
+	 */
+	atomic_set(&phba->fast_event_count, 0);
+	spin_lock_init(&phba->hbalock);
+
+	/* Initialize ndlp management spinlock */
+	spin_lock_init(&phba->ndlp_lock);
+
+	INIT_LIST_HEAD(&phba->port_list);
+	INIT_LIST_HEAD(&phba->work_list);
+	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+
+	/* Initialize the wait queue head for the kernel thread */
+	init_waitqueue_head(&phba->work_waitq);
+
+	/* Initialize the scsi buffer list used by driver for scsi IO */
+	spin_lock_init(&phba->scsi_buf_list_lock);
+	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+
+	/* Initialize the fabric iocb list */
+	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+
+	/* Initialize list to save ELS buffers */
+	INIT_LIST_HEAD(&phba->elsbuf);
+
+	/* Initialize FCF connection rec list */
+	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
+
+	return 0;
+}
+
+/**
+ * lpfc_setup_driver_resource_phase2 - Phase2 setup driver internal resources.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources after the
+ * device specific resource setup to support the HBA device it attached to.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
+{
+	int error;
+
+	/* Startup the kernel thread for this host adapter. */
+	phba->worker_thread = kthread_run(lpfc_do_work, phba,
+					  "lpfc_worker_%d", phba->brd_no);
+	if (IS_ERR(phba->worker_thread)) {
+		error = PTR_ERR(phba->worker_thread);
+		return error;
 	}
 
 	return 0;
@@ -2956,92 +3754,432 @@ out_free_iocbq:
 }
 
 /**
- * lpfc_hba_alloc - Allocate driver hba data structure for a device.
- * @pdev: pointer to pci device data structure.
- *
- * This routine is invoked to allocate the driver hba data structure for an
- * HBA device. If the allocation is successful, the phba reference to the
- * PCI device data structure is set.
+ * lpfc_free_sgl_list - Free sgl list.
+ * @phba: pointer to lpfc hba data structure.
  *
- * Return codes
- *      pointer to @phba - sucessful
- *      NULL - error
+ * This routine is invoked to free the driver's sgl list and memory.
  **/
-static struct lpfc_hba *
-lpfc_hba_alloc(struct pci_dev *pdev)
+static void
+lpfc_free_sgl_list(struct lpfc_hba *phba)
 {
-	struct lpfc_hba *phba;
-
-	/* Allocate memory for HBA structure */
-	phba = kzalloc(sizeof(struct lpfc_hba), GFP_KERNEL);
-	if (!phba) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1417 Failed to allocate hba struct.\n");
-		return NULL;
-	}
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
+	LIST_HEAD(sglq_list);
+	int rc = 0;
 
-	/* Set reference to PCI device in HBA structure */
-	phba->pcidev = pdev;
+	spin_lock_irq(&phba->hbalock);
+	list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &sglq_list);
+	spin_unlock_irq(&phba->hbalock);
 
-	/* Assign an unused board number */
-	phba->brd_no = lpfc_get_instance();
-	if (phba->brd_no < 0) {
-		kfree(phba);
-		return NULL;
+	list_for_each_entry_safe(sglq_entry, sglq_next,
+				 &sglq_list, list) {
+		list_del(&sglq_entry->list);
+		lpfc_mbuf_free(phba, sglq_entry->virt, sglq_entry->phys);
+		kfree(sglq_entry);
+		phba->sli4_hba.total_sglq_bufs--;
+	}
+	rc = lpfc_sli4_remove_all_sgl_pages(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2005 Unable to deregister pages from HBA: %x", rc);
 	}
+	kfree(phba->sli4_hba.lpfc_els_sgl_array);
+}
 
-	return phba;
+/**
+ * lpfc_init_active_sgl_array - Allocate the buf to track active ELS XRIs.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate the driver's active sgl memory.
+ * This array will hold the sglq_entry's for active IOs.
+ **/
+static int
+lpfc_init_active_sgl_array(struct lpfc_hba *phba)
+{
+	int size;
+	size = sizeof(struct lpfc_sglq *);
+	size *= phba->sli4_hba.max_cfg_param.max_xri;
+
+	phba->sli4_hba.lpfc_sglq_active_list =
+		kzalloc(size, GFP_KERNEL);
+	if (!phba->sli4_hba.lpfc_sglq_active_list)
+		return -ENOMEM;
+	return 0;
 }
 
 /**
- * lpfc_hba_free - Free driver hba data structure with a device.
+ * lpfc_free_active_sgl - Free the buf that tracks active ELS XRIs.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to free the driver hba data structure with an
- * HBA device.
+ * This routine is invoked to walk through the array of active sglq entries
+ * and free all of the resources.
+ * This is just a place holder for now.
  **/
 static void
-lpfc_hba_free(struct lpfc_hba *phba)
+lpfc_free_active_sgl(struct lpfc_hba *phba)
 {
-	/* Release the driver assigned board number */
-	idr_remove(&lpfc_hba_index, phba->brd_no);
-
-	kfree(phba);
-	return;
+	kfree(phba->sli4_hba.lpfc_sglq_active_list);
 }
 
 /**
- * lpfc_create_shost - Create hba physical port with associated scsi host.
+ * lpfc_init_sgl_list - Allocate and initialize sgl list.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to create HBA physical port and associate a SCSI
- * host with it.
+ * This routine is invoked to allocate and initizlize the driver's sgl
+ * list and set up the sgl xritag tag array accordingly.
  *
  * Return codes
- *      0 - sucessful
- *      other values - error
+ *	0 - sucessful
+ *	other values - error
  **/
 static int
-lpfc_create_shost(struct lpfc_hba *phba)
+lpfc_init_sgl_list(struct lpfc_hba *phba)
 {
-	struct lpfc_vport *vport;
-	struct Scsi_Host  *shost;
+	struct lpfc_sglq *sglq_entry = NULL;
+	int i;
+	int els_xri_cnt;
+
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"2400 lpfc_init_sgl_list els %d.\n",
+				els_xri_cnt);
+	/* Initialize and populate the sglq list per host/VF. */
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+
+	/* Sanity check on XRI management */
+	if (phba->sli4_hba.max_cfg_param.max_xri <= els_xri_cnt) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2562 No room left for SCSI XRI allocation: "
+				"max_xri=%d, els_xri=%d\n",
+				phba->sli4_hba.max_cfg_param.max_xri,
+				els_xri_cnt);
+		return -ENOMEM;
+	}
 
-	/* Initialize HBA FC structure */
-	phba->fc_edtov = FF_DEF_EDTOV;
-	phba->fc_ratov = FF_DEF_RATOV;
-	phba->fc_altov = FF_DEF_ALTOV;
-	phba->fc_arbtov = FF_DEF_ARBTOV;
+	/* Allocate memory for the ELS XRI management array */
+	phba->sli4_hba.lpfc_els_sgl_array =
+			kzalloc((sizeof(struct lpfc_sglq *) * els_xri_cnt),
+			GFP_KERNEL);
 
-	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
-	if (!vport)
-		return -ENODEV;
+	if (!phba->sli4_hba.lpfc_els_sgl_array) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2401 Failed to allocate memory for ELS "
+				"XRI management array of size %d.\n",
+				els_xri_cnt);
+		return -ENOMEM;
+	}
 
-	shost = lpfc_shost_from_vport(vport);
-	phba->pport = vport;
-	lpfc_debugfs_initialize(vport);
-	/* Put reference to SCSI host to driver's device private data */
-	pci_set_drvdata(phba->pcidev, shost);
+	/* Keep the SCSI XRI into the XRI management array */
+	phba->sli4_hba.scsi_xri_max =
+			phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	phba->sli4_hba.scsi_xri_cnt = 0;
+
+	phba->sli4_hba.lpfc_scsi_psb_array =
+			kzalloc((sizeof(struct lpfc_scsi_buf *) *
+			phba->sli4_hba.scsi_xri_max), GFP_KERNEL);
+
+	if (!phba->sli4_hba.lpfc_scsi_psb_array) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2563 Failed to allocate memory for SCSI "
+				"XRI management array of size %d.\n",
+				phba->sli4_hba.scsi_xri_max);
+		kfree(phba->sli4_hba.lpfc_els_sgl_array);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < els_xri_cnt; i++) {
+		sglq_entry = kzalloc(sizeof(struct lpfc_sglq), GFP_KERNEL);
+		if (sglq_entry == NULL) {
+			printk(KERN_ERR "%s: only allocated %d sgls of "
+				"expected %d count. Unloading driver.\n",
+				__func__, i, els_xri_cnt);
+			goto out_free_mem;
+		}
+
+		sglq_entry->sli4_xritag = lpfc_sli4_next_xritag(phba);
+		if (sglq_entry->sli4_xritag == NO_XRI) {
+			kfree(sglq_entry);
+			printk(KERN_ERR "%s: failed to allocate XRI.\n"
+				"Unloading driver.\n", __func__);
+			goto out_free_mem;
+		}
+		sglq_entry->buff_type = GEN_BUFF_TYPE;
+		sglq_entry->virt = lpfc_mbuf_alloc(phba, 0, &sglq_entry->phys);
+		if (sglq_entry->virt == NULL) {
+			kfree(sglq_entry);
+			printk(KERN_ERR "%s: failed to allocate mbuf.\n"
+				"Unloading driver.\n", __func__);
+			goto out_free_mem;
+		}
+		sglq_entry->sgl = sglq_entry->virt;
+		memset(sglq_entry->sgl, 0, LPFC_BPL_SIZE);
+
+		/* The list order is used by later block SGL registraton */
+		spin_lock_irq(&phba->hbalock);
+		list_add_tail(&sglq_entry->list, &phba->sli4_hba.lpfc_sgl_list);
+		phba->sli4_hba.lpfc_els_sgl_array[i] = sglq_entry;
+		phba->sli4_hba.total_sglq_bufs++;
+		spin_unlock_irq(&phba->hbalock);
+	}
+	return 0;
+
+out_free_mem:
+	kfree(phba->sli4_hba.lpfc_scsi_psb_array);
+	lpfc_free_sgl_list(phba);
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_sli4_init_rpi_hdrs - Post the rpi header memory region to the port
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post rpi header templates to the
+ * HBA consistent with the SLI-4 interface spec.  This routine
+ * posts a PAGE_SIZE memory region to the port to hold up to
+ * PAGE_SIZE modulo 64 rpi context headers.
+ * No locks are held here because this is an initialization routine
+ * called only from probe or lpfc_online when interrupts are not
+ * enabled and the driver is reinitializing the device.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_sli4_init_rpi_hdrs(struct lpfc_hba *phba)
+{
+	int rc = 0;
+	int longs;
+	uint16_t rpi_count;
+	struct lpfc_rpi_hdr *rpi_hdr;
+
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_rpi_hdr_list);
+
+	/*
+	 * Provision an rpi bitmask range for discovery. The total count
+	 * is the difference between max and base + 1.
+	 */
+	rpi_count = phba->sli4_hba.max_cfg_param.rpi_base +
+		    phba->sli4_hba.max_cfg_param.max_rpi - 1;
+
+	longs = ((rpi_count) + BITS_PER_LONG - 1) / BITS_PER_LONG;
+	phba->sli4_hba.rpi_bmask = kzalloc(longs * sizeof(unsigned long),
+					   GFP_KERNEL);
+	if (!phba->sli4_hba.rpi_bmask)
+		return -ENOMEM;
+
+	rpi_hdr = lpfc_sli4_create_rpi_hdr(phba);
+	if (!rpi_hdr) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0391 Error during rpi post operation\n");
+		lpfc_sli4_remove_rpis(phba);
+		rc = -ENODEV;
+	}
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_create_rpi_hdr - Allocate an rpi header memory region
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate a single 4KB memory region to
+ * support rpis and stores them in the phba.  This single region
+ * provides support for up to 64 rpis.  The region is used globally
+ * by the device.
+ *
+ * Returns:
+ *   A valid rpi hdr on success.
+ *   A NULL pointer on any failure.
+ **/
+struct lpfc_rpi_hdr *
+lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
+{
+	uint16_t rpi_limit, curr_rpi_range;
+	struct lpfc_dmabuf *dmabuf;
+	struct lpfc_rpi_hdr *rpi_hdr;
+
+	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
+		    phba->sli4_hba.max_cfg_param.max_rpi - 1;
+
+	spin_lock_irq(&phba->hbalock);
+	curr_rpi_range = phba->sli4_hba.next_rpi;
+	spin_unlock_irq(&phba->hbalock);
+
+	/*
+	 * The port has a limited number of rpis. The increment here
+	 * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value
+	 * and to allow the full max_rpi range per port.
+	 */
+	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
+		return NULL;
+
+	/*
+	 * First allocate the protocol header region for the port.  The
+	 * port expects a 4KB DMA-mapped memory region that is 4K aligned.
+	 */
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return NULL;
+
+	dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+					  LPFC_HDR_TEMPLATE_SIZE,
+					  &dmabuf->phys,
+					  GFP_KERNEL);
+	if (!dmabuf->virt) {
+		rpi_hdr = NULL;
+		goto err_free_dmabuf;
+	}
+
+	memset(dmabuf->virt, 0, LPFC_HDR_TEMPLATE_SIZE);
+	if (!IS_ALIGNED(dmabuf->phys, LPFC_HDR_TEMPLATE_SIZE)) {
+		rpi_hdr = NULL;
+		goto err_free_coherent;
+	}
+
+	/* Save the rpi header data for cleanup later. */
+	rpi_hdr = kzalloc(sizeof(struct lpfc_rpi_hdr), GFP_KERNEL);
+	if (!rpi_hdr)
+		goto err_free_coherent;
+
+	rpi_hdr->dmabuf = dmabuf;
+	rpi_hdr->len = LPFC_HDR_TEMPLATE_SIZE;
+	rpi_hdr->page_count = 1;
+	spin_lock_irq(&phba->hbalock);
+	rpi_hdr->start_rpi = phba->sli4_hba.next_rpi;
+	list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list);
+
+	/*
+	 * The next_rpi stores the next module-64 rpi value to post
+	 * in any subsequent rpi memory region postings.
+	 */
+	phba->sli4_hba.next_rpi += LPFC_RPI_HDR_COUNT;
+	spin_unlock_irq(&phba->hbalock);
+	return rpi_hdr;
+
+ err_free_coherent:
+	dma_free_coherent(&phba->pcidev->dev, LPFC_HDR_TEMPLATE_SIZE,
+			  dmabuf->virt, dmabuf->phys);
+ err_free_dmabuf:
+	kfree(dmabuf);
+	return NULL;
+}
+
+/**
+ * lpfc_sli4_remove_rpi_hdrs - Remove all rpi header memory regions
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove all memory resources allocated
+ * to support rpis. This routine presumes the caller has released all
+ * rpis consumed by fabric or port logins and is prepared to have
+ * the header pages removed.
+ **/
+void
+lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *phba)
+{
+	struct lpfc_rpi_hdr *rpi_hdr, *next_rpi_hdr;
+
+	list_for_each_entry_safe(rpi_hdr, next_rpi_hdr,
+				 &phba->sli4_hba.lpfc_rpi_hdr_list, list) {
+		list_del(&rpi_hdr->list);
+		dma_free_coherent(&phba->pcidev->dev, rpi_hdr->len,
+				  rpi_hdr->dmabuf->virt, rpi_hdr->dmabuf->phys);
+		kfree(rpi_hdr->dmabuf);
+		kfree(rpi_hdr);
+	}
+
+	phba->sli4_hba.next_rpi = phba->sli4_hba.max_cfg_param.rpi_base;
+	memset(phba->sli4_hba.rpi_bmask, 0, sizeof(*phba->sli4_hba.rpi_bmask));
+}
+
+/**
+ * lpfc_hba_alloc - Allocate driver hba data structure for a device.
+ * @pdev: pointer to pci device data structure.
+ *
+ * This routine is invoked to allocate the driver hba data structure for an
+ * HBA device. If the allocation is successful, the phba reference to the
+ * PCI device data structure is set.
+ *
+ * Return codes
+ *      pointer to @phba - sucessful
+ *      NULL - error
+ **/
+static struct lpfc_hba *
+lpfc_hba_alloc(struct pci_dev *pdev)
+{
+	struct lpfc_hba *phba;
+
+	/* Allocate memory for HBA structure */
+	phba = kzalloc(sizeof(struct lpfc_hba), GFP_KERNEL);
+	if (!phba) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1417 Failed to allocate hba struct.\n");
+		return NULL;
+	}
+
+	/* Set reference to PCI device in HBA structure */
+	phba->pcidev = pdev;
+
+	/* Assign an unused board number */
+	phba->brd_no = lpfc_get_instance();
+	if (phba->brd_no < 0) {
+		kfree(phba);
+		return NULL;
+	}
+
+	return phba;
+}
+
+/**
+ * lpfc_hba_free - Free driver hba data structure with a device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver hba data structure with an
+ * HBA device.
+ **/
+static void
+lpfc_hba_free(struct lpfc_hba *phba)
+{
+	/* Release the driver assigned board number */
+	idr_remove(&lpfc_hba_index, phba->brd_no);
+
+	kfree(phba);
+	return;
+}
+
+/**
+ * lpfc_create_shost - Create hba physical port with associated scsi host.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to create HBA physical port and associate a SCSI
+ * host with it.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      other values - error
+ **/
+static int
+lpfc_create_shost(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport;
+	struct Scsi_Host  *shost;
+
+	/* Initialize HBA FC structure */
+	phba->fc_edtov = FF_DEF_EDTOV;
+	phba->fc_ratov = FF_DEF_RATOV;
+	phba->fc_altov = FF_DEF_ALTOV;
+	phba->fc_arbtov = FF_DEF_ARBTOV;
+
+	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
+	if (!vport)
+		return -ENODEV;
+
+	shost = lpfc_shost_from_vport(vport);
+	phba->pport = vport;
+	lpfc_debugfs_initialize(vport);
+	/* Put reference to SCSI host to driver's device private data */
+	pci_set_drvdata(phba->pcidev, shost);
 
 	return 0;
 }
@@ -3316,340 +4454,2755 @@ lpfc_sli_pci_mem_unset(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_enable_msix - Enable MSI-X interrupt mode on SLI-3 device
+ * lpfc_sli4_post_status_check - Wait for SLI4 POST done and check status
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable the MSI-X interrupt vectors to device
- * with SLI-3 interface specs. The kernel function pci_enable_msix() is
- * called to enable the MSI-X vectors. Note that pci_enable_msix(), once
- * invoked, enables either all or nothing, depending on the current
- * availability of PCI vector resources. The device driver is responsible
- * for calling the individual request_irq() to register each MSI-X vector
- * with a interrupt handler, which is done in this function. Note that
- * later when device is unloading, the driver should always call free_irq()
- * on all MSI-X vectors it has done request_irq() on before calling
- * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
- * will be left with MSI-X enabled and leaks its vectors.
+ * This routine is invoked to wait for SLI4 device Power On Self Test (POST)
+ * done and check status.
  *
- * Return codes
- *   0 - sucessful
- *   other values - error
+ * Return 0 if successful, otherwise -ENODEV.
  **/
-static int
-lpfc_sli_enable_msix(struct lpfc_hba *phba)
+int
+lpfc_sli4_post_status_check(struct lpfc_hba *phba)
 {
-	int rc, i;
-	LPFC_MBOXQ_t *pmb;
+	struct lpfc_register sta_reg, uerrlo_reg, uerrhi_reg, scratchpad;
+	uint32_t onlnreg0, onlnreg1;
+	int i, port_error = -ENODEV;
 
-	/* Set up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		phba->msix_entries[i].entry = i;
+	if (!phba->sli4_hba.STAregaddr)
+		return -ENODEV;
 
-	/* Configure MSI-X capability structure */
-	rc = pci_enable_msix(phba->pcidev, phba->msix_entries,
-				ARRAY_SIZE(phba->msix_entries));
-	if (rc) {
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0420 PCI enable MSI-X failed (%d)\n", rc);
-		goto msi_fail_out;
+	/* With uncoverable error, log the error message and return error */
+	onlnreg0 = readl(phba->sli4_hba.ONLINE0regaddr);
+	onlnreg1 = readl(phba->sli4_hba.ONLINE1regaddr);
+	if ((onlnreg0 != LPFC_ONLINE_NERR) || (onlnreg1 != LPFC_ONLINE_NERR)) {
+		uerrlo_reg.word0 = readl(phba->sli4_hba.UERRLOregaddr);
+		uerrhi_reg.word0 = readl(phba->sli4_hba.UERRHIregaddr);
+		if (uerrlo_reg.word0 || uerrhi_reg.word0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1422 HBA Unrecoverable error: "
+					"uerr_lo_reg=0x%x, uerr_hi_reg=0x%x, "
+					"online0_reg=0x%x, online1_reg=0x%x\n",
+					uerrlo_reg.word0, uerrhi_reg.word0,
+					onlnreg0, onlnreg1);
+		}
+		return -ENODEV;
 	}
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0477 MSI-X entry[%d]: vector=x%x "
-				"message=%d\n", i,
-				phba->msix_entries[i].vector,
-				phba->msix_entries[i].entry);
-	/*
-	 * Assign MSI-X vectors to interrupt handlers
-	 */
 
-	/* vector-0 is associated to slow-path handler */
-	rc = request_irq(phba->msix_entries[0].vector,
-			 &lpfc_sli_sp_intr_handler, IRQF_SHARED,
-			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0421 MSI-X slow-path request_irq failed "
-				"(%d)\n", rc);
-		goto msi_fail_out;
+	/* Wait up to 30 seconds for the SLI Port POST done and ready */
+	for (i = 0; i < 3000; i++) {
+		sta_reg.word0 = readl(phba->sli4_hba.STAregaddr);
+		/* Encounter fatal POST error, break out */
+		if (bf_get(lpfc_hst_state_perr, &sta_reg)) {
+			port_error = -ENODEV;
+			break;
+		}
+		if (LPFC_POST_STAGE_ARMFW_READY ==
+		    bf_get(lpfc_hst_state_port_status, &sta_reg)) {
+			port_error = 0;
+			break;
+		}
+		msleep(10);
 	}
 
-	/* vector-1 is associated to fast-path handler */
-	rc = request_irq(phba->msix_entries[1].vector,
-			 &lpfc_sli_fp_intr_handler, IRQF_SHARED,
-			 LPFC_FP_DRIVER_HANDLER_NAME, phba);
+	if (port_error)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"1408 Failure HBA POST Status: sta_reg=0x%x, "
+			"perr=x%x, sfi=x%x, nip=x%x, ipc=x%x, xrom=x%x, "
+			"dl=x%x, pstatus=x%x\n", sta_reg.word0,
+			bf_get(lpfc_hst_state_perr, &sta_reg),
+			bf_get(lpfc_hst_state_sfi, &sta_reg),
+			bf_get(lpfc_hst_state_nip, &sta_reg),
+			bf_get(lpfc_hst_state_ipc, &sta_reg),
+			bf_get(lpfc_hst_state_xrom, &sta_reg),
+			bf_get(lpfc_hst_state_dl, &sta_reg),
+			bf_get(lpfc_hst_state_port_status, &sta_reg));
+
+	/* Log device information */
+	scratchpad.word0 =  readl(phba->sli4_hba.SCRATCHPADregaddr);
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2534 Device Info: ChipType=0x%x, SliRev=0x%x, "
+			"FeatureL1=0x%x, FeatureL2=0x%x\n",
+			bf_get(lpfc_scratchpad_chiptype, &scratchpad),
+			bf_get(lpfc_scratchpad_slirev, &scratchpad),
+			bf_get(lpfc_scratchpad_featurelevel1, &scratchpad),
+			bf_get(lpfc_scratchpad_featurelevel2, &scratchpad));
+
+	return port_error;
+}
 
-	if (rc) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0429 MSI-X fast-path request_irq failed "
-				"(%d)\n", rc);
-		goto irq_fail_out;
-	}
+/**
+ * lpfc_sli4_bar0_register_memmap - Set up SLI4 BAR0 register memory map.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up SLI4 BAR0 PCI config space register
+ * memory map.
+ **/
+static void
+lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba)
+{
+	phba->sli4_hba.UERRLOregaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_UERR_STATUS_LO;
+	phba->sli4_hba.UERRHIregaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_UERR_STATUS_HI;
+	phba->sli4_hba.ONLINE0regaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_ONLINE0;
+	phba->sli4_hba.ONLINE1regaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_ONLINE1;
+	phba->sli4_hba.SCRATCHPADregaddr = phba->sli4_hba.conf_regs_memmap_p +
+					LPFC_SCRATCHPAD;
+}
 
-	/*
-	 * Configure HBA MSI-X attention conditions to messages
-	 */
-	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+/**
+ * lpfc_sli4_bar1_register_memmap - Set up SLI4 BAR1 register memory map.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up SLI4 BAR1 control status register (CSR)
+ * memory map.
+ **/
+static void
+lpfc_sli4_bar1_register_memmap(struct lpfc_hba *phba)
+{
 
-	if (!pmb) {
-		rc = -ENOMEM;
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0474 Unable to allocate memory for issuing "
-				"MBOX_CONFIG_MSI command\n");
-		goto mem_fail_out;
-	}
-	rc = lpfc_config_msi(phba, pmb);
-	if (rc)
-		goto mbx_fail_out;
-	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
-	if (rc != MBX_SUCCESS) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
-				"0351 Config MSI mailbox command failed, "
-				"mbxCmd x%x, mbxStatus x%x\n",
-				pmb->u.mb.mbxCommand, pmb->u.mb.mbxStatus);
-		goto mbx_fail_out;
-	}
-
-	/* Free memory allocated for mailbox command */
-	mempool_free(pmb, phba->mbox_mem_pool);
-	return rc;
-
-mbx_fail_out:
-	/* Free memory allocated for mailbox command */
-	mempool_free(pmb, phba->mbox_mem_pool);
-
-mem_fail_out:
-	/* free the irq already requested */
-	free_irq(phba->msix_entries[1].vector, phba);
-
-irq_fail_out:
-	/* free the irq already requested */
-	free_irq(phba->msix_entries[0].vector, phba);
-
-msi_fail_out:
-	/* Unconfigure MSI-X capability structure */
-	pci_disable_msix(phba->pcidev);
-	return rc;
+	phba->sli4_hba.STAregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				    LPFC_HST_STATE;
+	phba->sli4_hba.ISRregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				    LPFC_HST_ISR0;
+	phba->sli4_hba.IMRregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				    LPFC_HST_IMR0;
+	phba->sli4_hba.ISCRregaddr = phba->sli4_hba.ctrl_regs_memmap_p +
+				     LPFC_HST_ISCR0;
+	return;
 }
 
 /**
- * lpfc_sli_disable_msix - Disable MSI-X interrupt mode on SLI-3 device.
+ * lpfc_sli4_bar2_register_memmap - Set up SLI4 BAR2 register memory map.
  * @phba: pointer to lpfc hba data structure.
+ * @vf: virtual function number
  *
- * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode to device with SLI-3 interface spec.
+ * This routine is invoked to set up SLI4 BAR2 doorbell register memory map
+ * based on the given viftual function number, @vf.
+ *
+ * Return 0 if successful, otherwise -ENODEV.
  **/
-static void
-lpfc_sli_disable_msix(struct lpfc_hba *phba)
+static int
+lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf)
 {
-	int i;
-
-	/* Free up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		free_irq(phba->msix_entries[i].vector, phba);
-	/* Disable MSI-X */
-	pci_disable_msix(phba->pcidev);
+	if (vf > LPFC_VIR_FUNC_MAX)
+		return -ENODEV;
 
-	return;
+	phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL);
+	phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL);
+	phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL);
+	phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_MQ_DOORBELL);
+	phba->sli4_hba.BMBXregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
+				vf * LPFC_VFR_PAGE_SIZE + LPFC_BMBX);
+	return 0;
 }
 
 /**
- * lpfc_sli_enable_msi - Enable MSI interrupt mode on SLI-3 device.
+ * lpfc_create_bootstrap_mbox - Create the bootstrap mailbox
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable the MSI interrupt mode to device with
- * SLI-3 interface spec. The kernel function pci_enable_msi() is called to
- * enable the MSI vector. The device driver is responsible for calling the
- * request_irq() to register MSI vector with a interrupt the handler, which
- * is done in this function.
+ * This routine is invoked to create the bootstrap mailbox
+ * region consistent with the SLI-4 interface spec.  This
+ * routine allocates all memory necessary to communicate
+ * mailbox commands to the port and sets up all alignment
+ * needs.  No locks are expected to be held when calling
+ * this routine.
  *
  * Return codes
  * 	0 - sucessful
- * 	other values - error
- */
+ * 	ENOMEM - could not allocated memory.
+ **/
 static int
-lpfc_sli_enable_msi(struct lpfc_hba *phba)
+lpfc_create_bootstrap_mbox(struct lpfc_hba *phba)
 {
-	int rc;
+	uint32_t bmbx_size;
+	struct lpfc_dmabuf *dmabuf;
+	struct dma_address *dma_address;
+	uint32_t pa_addr;
+	uint64_t phys_addr;
+
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return -ENOMEM;
 
-	rc = pci_enable_msi(phba->pcidev);
-	if (!rc)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0462 PCI enable MSI mode success.\n");
-	else {
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0471 PCI enable MSI mode failed (%d)\n", rc);
-		return rc;
+	/*
+	 * The bootstrap mailbox region is comprised of 2 parts
+	 * plus an alignment restriction of 16 bytes.
+	 */
+	bmbx_size = sizeof(struct lpfc_bmbx_create) + (LPFC_ALIGN_16_BYTE - 1);
+	dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+					  bmbx_size,
+					  &dmabuf->phys,
+					  GFP_KERNEL);
+	if (!dmabuf->virt) {
+		kfree(dmabuf);
+		return -ENOMEM;
 	}
+	memset(dmabuf->virt, 0, bmbx_size);
 
-	rc = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
-			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
-	if (rc) {
-		pci_disable_msi(phba->pcidev);
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0478 MSI request_irq failed (%d)\n", rc);
-	}
-	return rc;
+	/*
+	 * Initialize the bootstrap mailbox pointers now so that the register
+	 * operations are simple later.  The mailbox dma address is required
+	 * to be 16-byte aligned.  Also align the virtual memory as each
+	 * maibox is copied into the bmbx mailbox region before issuing the
+	 * command to the port.
+	 */
+	phba->sli4_hba.bmbx.dmabuf = dmabuf;
+	phba->sli4_hba.bmbx.bmbx_size = bmbx_size;
+
+	phba->sli4_hba.bmbx.avirt = PTR_ALIGN(dmabuf->virt,
+					      LPFC_ALIGN_16_BYTE);
+	phba->sli4_hba.bmbx.aphys = ALIGN(dmabuf->phys,
+					      LPFC_ALIGN_16_BYTE);
+
+	/*
+	 * Set the high and low physical addresses now.  The SLI4 alignment
+	 * requirement is 16 bytes and the mailbox is posted to the port
+	 * as two 30-bit addresses.  The other data is a bit marking whether
+	 * the 30-bit address is the high or low address.
+	 * Upcast bmbx aphys to 64bits so shift instruction compiles
+	 * clean on 32 bit machines.
+	 */
+	dma_address = &phba->sli4_hba.bmbx.dma_address;
+	phys_addr = (uint64_t)phba->sli4_hba.bmbx.aphys;
+	pa_addr = (uint32_t) ((phys_addr >> 34) & 0x3fffffff);
+	dma_address->addr_hi = (uint32_t) ((pa_addr << 2) |
+					   LPFC_BMBX_BIT1_ADDR_HI);
+
+	pa_addr = (uint32_t) ((phba->sli4_hba.bmbx.aphys >> 4) & 0x3fffffff);
+	dma_address->addr_lo = (uint32_t) ((pa_addr << 2) |
+					   LPFC_BMBX_BIT1_ADDR_LO);
+	return 0;
 }
 
 /**
- * lpfc_sli_disable_msi - Disable MSI interrupt mode to SLI-3 device.
+ * lpfc_destroy_bootstrap_mbox - Destroy all bootstrap mailbox resources
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to disable the MSI interrupt mode to device with
- * SLI-3 interface spec. The driver calls free_irq() on MSI vector it has
- * done request_irq() on before calling pci_disable_msi(). Failure to do so
- * results in a BUG_ON() and a device will be left with MSI enabled and leaks
- * its vector.
- */
+ * This routine is invoked to teardown the bootstrap mailbox
+ * region and release all host resources. This routine requires
+ * the caller to ensure all mailbox commands recovered, no
+ * additional mailbox comands are sent, and interrupts are disabled
+ * before calling this routine.
+ *
+ **/
 static void
-lpfc_sli_disable_msi(struct lpfc_hba *phba)
+lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba)
 {
-	free_irq(phba->pcidev->irq, phba);
-	pci_disable_msi(phba->pcidev);
-	return;
+	dma_free_coherent(&phba->pcidev->dev,
+			  phba->sli4_hba.bmbx.bmbx_size,
+			  phba->sli4_hba.bmbx.dmabuf->virt,
+			  phba->sli4_hba.bmbx.dmabuf->phys);
+
+	kfree(phba->sli4_hba.bmbx.dmabuf);
+	memset(&phba->sli4_hba.bmbx, 0, sizeof(struct lpfc_bmbx));
 }
 
 /**
- * lpfc_sli_enable_intr - Enable device interrupt to SLI-3 device.
+ * lpfc_sli4_read_config - Get the config parameters.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to enable device interrupt and associate driver's
- * interrupt handler(s) to interrupt vector(s) to device with SLI-3 interface
- * spec. Depends on the interrupt mode configured to the driver, the driver
- * will try to fallback from the configured interrupt mode to an interrupt
- * mode which is supported by the platform, kernel, and device in the order
- * of:
- * MSI-X -> MSI -> IRQ.
+ * This routine is invoked to read the configuration parameters from the HBA.
+ * The configuration parameters are used to set the base and maximum values
+ * for RPI's XRI's VPI's VFI's and FCFIs. These values also affect the resource
+ * allocation for the port.
  *
  * Return codes
- *   0 - sucessful
- *   other values - error
+ * 	0 - sucessful
+ * 	ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
  **/
-static uint32_t
-lpfc_sli_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+static int
+lpfc_sli4_read_config(struct lpfc_hba *phba)
 {
-	uint32_t intr_mode = LPFC_INTR_ERROR;
-	int retval;
+	LPFC_MBOXQ_t *pmb;
+	struct lpfc_mbx_read_config *rd_config;
+	uint32_t rc = 0;
 
-	if (cfg_mode == 2) {
-		/* Need to issue conf_port mbox cmd before conf_msi mbox cmd */
-		retval = lpfc_sli_config_port(phba, LPFC_SLI_REV3);
-		if (!retval) {
-			/* Now, try to enable MSI-X interrupt mode */
-			retval = lpfc_sli_enable_msix(phba);
-			if (!retval) {
-				/* Indicate initialization to MSI-X mode */
-				phba->intr_type = MSIX;
-				intr_mode = 2;
-			}
-		}
+	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmb) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2011 Unable to allocate memory for issuing "
+				"SLI_CONFIG_SPECIAL mailbox command\n");
+		return -ENOMEM;
 	}
 
-	/* Fallback to MSI if MSI-X initialization failed */
-	if (cfg_mode >= 1 && phba->intr_type == NONE) {
-		retval = lpfc_sli_enable_msi(phba);
-		if (!retval) {
-			/* Indicate initialization to MSI mode */
-			phba->intr_type = MSI;
-			intr_mode = 1;
-		}
-	}
+	lpfc_read_config(phba, pmb);
 
-	/* Fallback to INTx if both MSI-X/MSI initalization failed */
-	if (phba->intr_type == NONE) {
-		retval = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
-				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
-		if (!retval) {
-			/* Indicate initialization to INTx mode */
-			phba->intr_type = INTx;
-			intr_mode = 0;
-		}
+	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2012 Mailbox failed , mbxCmd x%x "
+			"READ_CONFIG, mbxStatus x%x\n",
+			bf_get(lpfc_mqe_command, &pmb->u.mqe),
+			bf_get(lpfc_mqe_status, &pmb->u.mqe));
+		rc = -EIO;
+	} else {
+		rd_config = &pmb->u.mqe.un.rd_config;
+		phba->sli4_hba.max_cfg_param.max_xri =
+			bf_get(lpfc_mbx_rd_conf_xri_count, rd_config);
+		phba->sli4_hba.max_cfg_param.xri_base =
+			bf_get(lpfc_mbx_rd_conf_xri_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_vpi =
+			bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.vpi_base =
+			bf_get(lpfc_mbx_rd_conf_vpi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_rpi =
+			bf_get(lpfc_mbx_rd_conf_rpi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.rpi_base =
+			bf_get(lpfc_mbx_rd_conf_rpi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_vfi =
+			bf_get(lpfc_mbx_rd_conf_vfi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.vfi_base =
+			bf_get(lpfc_mbx_rd_conf_vfi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_fcfi =
+			bf_get(lpfc_mbx_rd_conf_fcfi_count, rd_config);
+		phba->sli4_hba.max_cfg_param.fcfi_base =
+			bf_get(lpfc_mbx_rd_conf_fcfi_base, rd_config);
+		phba->sli4_hba.max_cfg_param.max_eq =
+			bf_get(lpfc_mbx_rd_conf_eq_count, rd_config);
+		phba->sli4_hba.max_cfg_param.max_rq =
+			bf_get(lpfc_mbx_rd_conf_rq_count, rd_config);
+		phba->sli4_hba.max_cfg_param.max_wq =
+			bf_get(lpfc_mbx_rd_conf_wq_count, rd_config);
+		phba->sli4_hba.max_cfg_param.max_cq =
+			bf_get(lpfc_mbx_rd_conf_cq_count, rd_config);
+		phba->lmt = bf_get(lpfc_mbx_rd_conf_lmt, rd_config);
+		phba->sli4_hba.next_xri = phba->sli4_hba.max_cfg_param.xri_base;
+		phba->vpi_base = phba->sli4_hba.max_cfg_param.vpi_base;
+		phba->vfi_base = phba->sli4_hba.max_cfg_param.vfi_base;
+		phba->sli4_hba.next_rpi = phba->sli4_hba.max_cfg_param.rpi_base;
+		phba->max_vpi = phba->sli4_hba.max_cfg_param.max_vpi;
+		phba->max_vports = phba->max_vpi;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"2003 cfg params XRI(B:%d M:%d), "
+				"VPI(B:%d M:%d) "
+				"VFI(B:%d M:%d) "
+				"RPI(B:%d M:%d) "
+				"FCFI(B:%d M:%d)\n",
+				phba->sli4_hba.max_cfg_param.xri_base,
+				phba->sli4_hba.max_cfg_param.max_xri,
+				phba->sli4_hba.max_cfg_param.vpi_base,
+				phba->sli4_hba.max_cfg_param.max_vpi,
+				phba->sli4_hba.max_cfg_param.vfi_base,
+				phba->sli4_hba.max_cfg_param.max_vfi,
+				phba->sli4_hba.max_cfg_param.rpi_base,
+				phba->sli4_hba.max_cfg_param.max_rpi,
+				phba->sli4_hba.max_cfg_param.fcfi_base,
+				phba->sli4_hba.max_cfg_param.max_fcfi);
 	}
-	return intr_mode;
+	mempool_free(pmb, phba->mbox_mem_pool);
+
+	/* Reset the DFT_HBA_Q_DEPTH to the max xri  */
+	if (phba->cfg_hba_queue_depth > (phba->sli4_hba.max_cfg_param.max_xri))
+		phba->cfg_hba_queue_depth =
+				phba->sli4_hba.max_cfg_param.max_xri;
+	return rc;
 }
 
 /**
- * lpfc_sli_disable_intr - Disable device interrupt to SLI-3 device.
+ * lpfc_dev_endian_order_setup - Notify the port of the host's endian order.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to disable device interrupt and disassociate the
- * driver's interrupt handler(s) from interrupt vector(s) to device with
- * SLI-3 interface spec. Depending on the interrupt mode, the driver will
- * release the interrupt vector(s) for the message signaled interrupt.
+ * This routine is invoked to setup the host-side endian order to the
+ * HBA consistent with the SLI-4 interface spec.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
  **/
-static void
-lpfc_sli_disable_intr(struct lpfc_hba *phba)
+static int
+lpfc_setup_endian_order(struct lpfc_hba *phba)
 {
-	/* Disable the currently initialized interrupt mode */
-	if (phba->intr_type == MSIX)
-		lpfc_sli_disable_msix(phba);
-	else if (phba->intr_type == MSI)
-		lpfc_sli_disable_msi(phba);
-	else if (phba->intr_type == INTx)
-		free_irq(phba->pcidev->irq, phba);
+	LPFC_MBOXQ_t *mboxq;
+	uint32_t rc = 0;
+	uint32_t endian_mb_data[2] = {HOST_ENDIAN_LOW_WORD0,
+				      HOST_ENDIAN_HIGH_WORD1};
 
-	/* Reset interrupt management states */
-	phba->intr_type = NONE;
-	phba->sli.slistat.sli_intr = 0;
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0492 Unable to allocate memory for issuing "
+				"SLI_CONFIG_SPECIAL mailbox command\n");
+		return -ENOMEM;
+	}
 
-	return;
+	/*
+	 * The SLI4_CONFIG_SPECIAL mailbox command requires the first two
+	 * words to contain special data values and no other data.
+	 */
+	memset(mboxq, 0, sizeof(LPFC_MBOXQ_t));
+	memcpy(&mboxq->u.mqe, &endian_mb_data, sizeof(endian_mb_data));
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0493 SLI_CONFIG_SPECIAL mailbox failed with "
+				"status x%x\n",
+				rc);
+		rc = -EIO;
+	}
+
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return rc;
 }
 
 /**
- * lpfc_unset_hba - Unset SLI3 hba device initialization
+ * lpfc_sli4_queue_create - Create all the SLI4 queues
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to unset the HBA device initialization steps to
- * a device with SLI-3 interface spec.
+ * This routine is invoked to allocate all the SLI4 queues for the FCoE HBA
+ * operation. For each SLI4 queue type, the parameters such as queue entry
+ * count (queue depth) shall be taken from the module parameter. For now,
+ * we just use some constant number as place holder.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
  **/
-static void
-lpfc_unset_hba(struct lpfc_hba *phba)
+static int
+lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
-	struct lpfc_vport *vport = phba->pport;
-	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_queue *qdesc;
+	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
+	int cfg_fcp_wq_count;
+	int cfg_fcp_eq_count;
 
-	spin_lock_irq(shost->host_lock);
-	vport->load_flag |= FC_UNLOADING;
-	spin_unlock_irq(shost->host_lock);
+	/*
+	 * Sanity check for confiugred queue parameters against the run-time
+	 * device parameters
+	 */
 
-	lpfc_stop_hba_timers(phba);
+	/* Sanity check on FCP fast-path WQ parameters */
+	cfg_fcp_wq_count = phba->cfg_fcp_wq_count;
+	if (cfg_fcp_wq_count >
+	    (phba->sli4_hba.max_cfg_param.max_wq - LPFC_SP_WQN_DEF)) {
+		cfg_fcp_wq_count = phba->sli4_hba.max_cfg_param.max_wq -
+				   LPFC_SP_WQN_DEF;
+		if (cfg_fcp_wq_count < LPFC_FP_WQN_MIN) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2581 Not enough WQs (%d) from "
+					"the pci function for supporting "
+					"FCP WQs (%d)\n",
+					phba->sli4_hba.max_cfg_param.max_wq,
+					phba->cfg_fcp_wq_count);
+			goto out_error;
+		}
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2582 Not enough WQs (%d) from the pci "
+				"function for supporting the requested "
+				"FCP WQs (%d), the actual FCP WQs can "
+				"be supported: %d\n",
+				phba->sli4_hba.max_cfg_param.max_wq,
+				phba->cfg_fcp_wq_count, cfg_fcp_wq_count);
+	}
+	/* The actual number of FCP work queues adopted */
+	phba->cfg_fcp_wq_count = cfg_fcp_wq_count;
+
+	/* Sanity check on FCP fast-path EQ parameters */
+	cfg_fcp_eq_count = phba->cfg_fcp_eq_count;
+	if (cfg_fcp_eq_count >
+	    (phba->sli4_hba.max_cfg_param.max_eq - LPFC_SP_EQN_DEF)) {
+		cfg_fcp_eq_count = phba->sli4_hba.max_cfg_param.max_eq -
+				   LPFC_SP_EQN_DEF;
+		if (cfg_fcp_eq_count < LPFC_FP_EQN_MIN) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2574 Not enough EQs (%d) from the "
+					"pci function for supporting FCP "
+					"EQs (%d)\n",
+					phba->sli4_hba.max_cfg_param.max_eq,
+					phba->cfg_fcp_eq_count);
+			goto out_error;
+		}
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2575 Not enough EQs (%d) from the pci "
+				"function for supporting the requested "
+				"FCP EQs (%d), the actual FCP EQs can "
+				"be supported: %d\n",
+				phba->sli4_hba.max_cfg_param.max_eq,
+				phba->cfg_fcp_eq_count, cfg_fcp_eq_count);
+	}
+	/* It does not make sense to have more EQs than WQs */
+	if (cfg_fcp_eq_count > phba->cfg_fcp_wq_count) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2593 The number of FCP EQs (%d) is more "
+				"than the number of FCP WQs (%d), take "
+				"the number of FCP EQs same as than of "
+				"WQs (%d)\n", cfg_fcp_eq_count,
+				phba->cfg_fcp_wq_count,
+				phba->cfg_fcp_wq_count);
+		cfg_fcp_eq_count = phba->cfg_fcp_wq_count;
+	}
+	/* The actual number of FCP event queues adopted */
+	phba->cfg_fcp_eq_count = cfg_fcp_eq_count;
+	/* The overall number of event queues used */
+	phba->sli4_hba.cfg_eqn = phba->cfg_fcp_eq_count + LPFC_SP_EQN_DEF;
 
-	phba->pport->work_port_events = 0;
+	/*
+	 * Create Event Queues (EQs)
+	 */
 
-	lpfc_sli_hba_down(phba);
+	/* Get EQ depth from module parameter, fake the default for now */
+	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
+	phba->sli4_hba.eq_ecount = LPFC_EQE_DEF_COUNT;
 
-	lpfc_sli_brdrestart(phba);
+	/* Create slow path event queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
+				      phba->sli4_hba.eq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0496 Failed allocate slow-path EQ\n");
+		goto out_error;
+	}
+	phba->sli4_hba.sp_eq = qdesc;
+
+	/* Create fast-path FCP Event Queue(s) */
+	phba->sli4_hba.fp_eq = kzalloc((sizeof(struct lpfc_queue *) *
+			       phba->cfg_fcp_eq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fp_eq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2576 Failed allocate memory for fast-path "
+				"EQ record array\n");
+		goto out_free_sp_eq;
+	}
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++) {
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
+					      phba->sli4_hba.eq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0497 Failed allocate fast-path EQ\n");
+			goto out_free_fp_eq;
+		}
+		phba->sli4_hba.fp_eq[fcp_eqidx] = qdesc;
+	}
+
+	/*
+	 * Create Complete Queues (CQs)
+	 */
+
+	/* Get CQ depth from module parameter, fake the default for now */
+	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
+	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
+
+	/* Create slow-path Mailbox Command Complete Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+				      phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0500 Failed allocate slow-path mailbox CQ\n");
+		goto out_free_fp_eq;
+	}
+	phba->sli4_hba.mbx_cq = qdesc;
+
+	/* Create slow-path ELS Complete Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+				      phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0501 Failed allocate slow-path ELS CQ\n");
+		goto out_free_mbx_cq;
+	}
+	phba->sli4_hba.els_cq = qdesc;
+
+	/* Create slow-path Unsolicited Receive Complete Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+				      phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0502 Failed allocate slow-path USOL RX CQ\n");
+		goto out_free_els_cq;
+	}
+	phba->sli4_hba.rxq_cq = qdesc;
+
+	/* Create fast-path FCP Completion Queue(s), one-to-one with EQs */
+	phba->sli4_hba.fcp_cq = kzalloc((sizeof(struct lpfc_queue *) *
+				phba->cfg_fcp_eq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fcp_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2577 Failed allocate memory for fast-path "
+				"CQ record array\n");
+		goto out_free_rxq_cq;
+	}
+	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_eq_count; fcp_cqidx++) {
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					      phba->sli4_hba.cq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0499 Failed allocate fast-path FCP "
+					"CQ (%d)\n", fcp_cqidx);
+			goto out_free_fcp_cq;
+		}
+		phba->sli4_hba.fcp_cq[fcp_cqidx] = qdesc;
+	}
+
+	/* Create Mailbox Command Queue */
+	phba->sli4_hba.mq_esize = LPFC_MQE_SIZE;
+	phba->sli4_hba.mq_ecount = LPFC_MQE_DEF_COUNT;
+
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.mq_esize,
+				      phba->sli4_hba.mq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0505 Failed allocate slow-path MQ\n");
+		goto out_free_fcp_cq;
+	}
+	phba->sli4_hba.mbx_wq = qdesc;
+
+	/*
+	 * Create all the Work Queues (WQs)
+	 */
+	phba->sli4_hba.wq_esize = LPFC_WQE_SIZE;
+	phba->sli4_hba.wq_ecount = LPFC_WQE_DEF_COUNT;
+
+	/* Create slow-path ELS Work Queue */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+				      phba->sli4_hba.wq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0504 Failed allocate slow-path ELS WQ\n");
+		goto out_free_mbx_wq;
+	}
+	phba->sli4_hba.els_wq = qdesc;
+
+	/* Create fast-path FCP Work Queue(s) */
+	phba->sli4_hba.fcp_wq = kzalloc((sizeof(struct lpfc_queue *) *
+				phba->cfg_fcp_wq_count), GFP_KERNEL);
+	if (!phba->sli4_hba.fcp_wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2578 Failed allocate memory for fast-path "
+				"WQ record array\n");
+		goto out_free_els_wq;
+	}
+	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_wq_count; fcp_wqidx++) {
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+					      phba->sli4_hba.wq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0503 Failed allocate fast-path FCP "
+					"WQ (%d)\n", fcp_wqidx);
+			goto out_free_fcp_wq;
+		}
+		phba->sli4_hba.fcp_wq[fcp_wqidx] = qdesc;
+	}
+
+	/*
+	 * Create Receive Queue (RQ)
+	 */
+	phba->sli4_hba.rq_esize = LPFC_RQE_SIZE;
+	phba->sli4_hba.rq_ecount = LPFC_RQE_DEF_COUNT;
+
+	/* Create Receive Queue for header */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.rq_esize,
+				      phba->sli4_hba.rq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0506 Failed allocate receive HRQ\n");
+		goto out_free_fcp_wq;
+	}
+	phba->sli4_hba.hdr_rq = qdesc;
+
+	/* Create Receive Queue for data */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.rq_esize,
+				      phba->sli4_hba.rq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0507 Failed allocate receive DRQ\n");
+		goto out_free_hdr_rq;
+	}
+	phba->sli4_hba.dat_rq = qdesc;
+
+	return 0;
+
+out_free_hdr_rq:
+	lpfc_sli4_queue_free(phba->sli4_hba.hdr_rq);
+	phba->sli4_hba.hdr_rq = NULL;
+out_free_fcp_wq:
+	for (--fcp_wqidx; fcp_wqidx >= 0; fcp_wqidx--) {
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_wq[fcp_wqidx]);
+		phba->sli4_hba.fcp_wq[fcp_wqidx] = NULL;
+	}
+	kfree(phba->sli4_hba.fcp_wq);
+out_free_els_wq:
+	lpfc_sli4_queue_free(phba->sli4_hba.els_wq);
+	phba->sli4_hba.els_wq = NULL;
+out_free_mbx_wq:
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_wq);
+	phba->sli4_hba.mbx_wq = NULL;
+out_free_fcp_cq:
+	for (--fcp_cqidx; fcp_cqidx >= 0; fcp_cqidx--) {
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_cq[fcp_cqidx]);
+		phba->sli4_hba.fcp_cq[fcp_cqidx] = NULL;
+	}
+	kfree(phba->sli4_hba.fcp_cq);
+out_free_rxq_cq:
+	lpfc_sli4_queue_free(phba->sli4_hba.rxq_cq);
+	phba->sli4_hba.rxq_cq = NULL;
+out_free_els_cq:
+	lpfc_sli4_queue_free(phba->sli4_hba.els_cq);
+	phba->sli4_hba.els_cq = NULL;
+out_free_mbx_cq:
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_cq);
+	phba->sli4_hba.mbx_cq = NULL;
+out_free_fp_eq:
+	for (--fcp_eqidx; fcp_eqidx >= 0; fcp_eqidx--) {
+		lpfc_sli4_queue_free(phba->sli4_hba.fp_eq[fcp_eqidx]);
+		phba->sli4_hba.fp_eq[fcp_eqidx] = NULL;
+	}
+	kfree(phba->sli4_hba.fp_eq);
+out_free_sp_eq:
+	lpfc_sli4_queue_free(phba->sli4_hba.sp_eq);
+	phba->sli4_hba.sp_eq = NULL;
+out_error:
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_sli4_queue_destroy - Destroy all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release all the SLI4 queues with the FCoE HBA
+ * operation.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+static void
+lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
+{
+	int fcp_qidx;
+
+	/* Release mailbox command work queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_wq);
+	phba->sli4_hba.mbx_wq = NULL;
+
+	/* Release ELS work queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.els_wq);
+	phba->sli4_hba.els_wq = NULL;
+
+	/* Release FCP work queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_wq_count; fcp_qidx++)
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_wq[fcp_qidx]);
+	kfree(phba->sli4_hba.fcp_wq);
+	phba->sli4_hba.fcp_wq = NULL;
+
+	/* Release unsolicited receive queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.hdr_rq);
+	phba->sli4_hba.hdr_rq = NULL;
+	lpfc_sli4_queue_free(phba->sli4_hba.dat_rq);
+	phba->sli4_hba.dat_rq = NULL;
+
+	/* Release unsolicited receive complete queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.rxq_cq);
+	phba->sli4_hba.rxq_cq = NULL;
+
+	/* Release ELS complete queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.els_cq);
+	phba->sli4_hba.els_cq = NULL;
+
+	/* Release mailbox command complete queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.mbx_cq);
+	phba->sli4_hba.mbx_cq = NULL;
+
+	/* Release FCP response complete queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_sli4_queue_free(phba->sli4_hba.fcp_cq[fcp_qidx]);
+	kfree(phba->sli4_hba.fcp_cq);
+	phba->sli4_hba.fcp_cq = NULL;
+
+	/* Release fast-path event queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_sli4_queue_free(phba->sli4_hba.fp_eq[fcp_qidx]);
+	kfree(phba->sli4_hba.fp_eq);
+	phba->sli4_hba.fp_eq = NULL;
+
+	/* Release slow-path event queue */
+	lpfc_sli4_queue_free(phba->sli4_hba.sp_eq);
+	phba->sli4_hba.sp_eq = NULL;
+
+	return;
+}
+
+/**
+ * lpfc_sli4_queue_setup - Set up all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up all the SLI4 queues for the FCoE HBA
+ * operation.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_sli4_queue_setup(struct lpfc_hba *phba)
+{
+	int rc = -ENOMEM;
+	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
+	int fcp_cq_index = 0;
+
+	/*
+	 * Set up Event Queues (EQs)
+	 */
+
+	/* Set up slow-path event queue */
+	if (!phba->sli4_hba.sp_eq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0520 Slow-path EQ not allocated\n");
+		goto out_error;
+	}
+	rc = lpfc_eq_create(phba, phba->sli4_hba.sp_eq,
+			    LPFC_SP_DEF_IMAX);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0521 Failed setup of slow-path EQ: "
+				"rc = 0x%x\n", rc);
+		goto out_error;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2583 Slow-path EQ setup: queue-id=%d\n",
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up fast-path event queue */
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++) {
+		if (!phba->sli4_hba.fp_eq[fcp_eqidx]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0522 Fast-path EQ (%d) not "
+					"allocated\n", fcp_eqidx);
+			goto out_destroy_fp_eq;
+		}
+		rc = lpfc_eq_create(phba, phba->sli4_hba.fp_eq[fcp_eqidx],
+				    phba->cfg_fcp_imax);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0523 Failed setup of fast-path EQ "
+					"(%d), rc = 0x%x\n", fcp_eqidx, rc);
+			goto out_destroy_fp_eq;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2584 Fast-path EQ setup: "
+				"queue[%d]-id=%d\n", fcp_eqidx,
+				phba->sli4_hba.fp_eq[fcp_eqidx]->queue_id);
+	}
+
+	/*
+	 * Set up Complete Queues (CQs)
+	 */
+
+	/* Set up slow-path MBOX Complete Queue as the first CQ */
+	if (!phba->sli4_hba.mbx_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0528 Mailbox CQ not allocated\n");
+		goto out_destroy_fp_eq;
+	}
+	rc = lpfc_cq_create(phba, phba->sli4_hba.mbx_cq, phba->sli4_hba.sp_eq,
+			    LPFC_MCQ, LPFC_MBOX);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0529 Failed setup of slow-path mailbox CQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_fp_eq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2585 MBX CQ setup: cq-id=%d, parent eq-id=%d\n",
+			phba->sli4_hba.mbx_cq->queue_id,
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up slow-path ELS Complete Queue */
+	if (!phba->sli4_hba.els_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0530 ELS CQ not allocated\n");
+		goto out_destroy_mbx_cq;
+	}
+	rc = lpfc_cq_create(phba, phba->sli4_hba.els_cq, phba->sli4_hba.sp_eq,
+			    LPFC_WCQ, LPFC_ELS);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0531 Failed setup of slow-path ELS CQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_mbx_cq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2586 ELS CQ setup: cq-id=%d, parent eq-id=%d\n",
+			phba->sli4_hba.els_cq->queue_id,
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up slow-path Unsolicited Receive Complete Queue */
+	if (!phba->sli4_hba.rxq_cq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0532 USOL RX CQ not allocated\n");
+		goto out_destroy_els_cq;
+	}
+	rc = lpfc_cq_create(phba, phba->sli4_hba.rxq_cq, phba->sli4_hba.sp_eq,
+			    LPFC_RCQ, LPFC_USOL);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0533 Failed setup of slow-path USOL RX CQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_els_cq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2587 USL CQ setup: cq-id=%d, parent eq-id=%d\n",
+			phba->sli4_hba.rxq_cq->queue_id,
+			phba->sli4_hba.sp_eq->queue_id);
+
+	/* Set up fast-path FCP Response Complete Queue */
+	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_eq_count; fcp_cqidx++) {
+		if (!phba->sli4_hba.fcp_cq[fcp_cqidx]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0526 Fast-path FCP CQ (%d) not "
+					"allocated\n", fcp_cqidx);
+			goto out_destroy_fcp_cq;
+		}
+		rc = lpfc_cq_create(phba, phba->sli4_hba.fcp_cq[fcp_cqidx],
+				    phba->sli4_hba.fp_eq[fcp_cqidx],
+				    LPFC_WCQ, LPFC_FCP);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0527 Failed setup of fast-path FCP "
+					"CQ (%d), rc = 0x%x\n", fcp_cqidx, rc);
+			goto out_destroy_fcp_cq;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2588 FCP CQ setup: cq[%d]-id=%d, "
+				"parent eq[%d]-id=%d\n",
+				fcp_cqidx,
+				phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id,
+				fcp_cqidx,
+				phba->sli4_hba.fp_eq[fcp_cqidx]->queue_id);
+	}
+
+	/*
+	 * Set up all the Work Queues (WQs)
+	 */
+
+	/* Set up Mailbox Command Queue */
+	if (!phba->sli4_hba.mbx_wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0538 Slow-path MQ not allocated\n");
+		goto out_destroy_fcp_cq;
+	}
+	rc = lpfc_mq_create(phba, phba->sli4_hba.mbx_wq,
+			    phba->sli4_hba.mbx_cq, LPFC_MBOX);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0539 Failed setup of slow-path MQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_fcp_cq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2589 MBX MQ setup: wq-id=%d, parent cq-id=%d\n",
+			phba->sli4_hba.mbx_wq->queue_id,
+			phba->sli4_hba.mbx_cq->queue_id);
+
+	/* Set up slow-path ELS Work Queue */
+	if (!phba->sli4_hba.els_wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0536 Slow-path ELS WQ not allocated\n");
+		goto out_destroy_mbx_wq;
+	}
+	rc = lpfc_wq_create(phba, phba->sli4_hba.els_wq,
+			    phba->sli4_hba.els_cq, LPFC_ELS);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0537 Failed setup of slow-path ELS WQ: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_mbx_wq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
+			phba->sli4_hba.els_wq->queue_id,
+			phba->sli4_hba.els_cq->queue_id);
+
+	/* Set up fast-path FCP Work Queue */
+	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_wq_count; fcp_wqidx++) {
+		if (!phba->sli4_hba.fcp_wq[fcp_wqidx]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0534 Fast-path FCP WQ (%d) not "
+					"allocated\n", fcp_wqidx);
+			goto out_destroy_fcp_wq;
+		}
+		rc = lpfc_wq_create(phba, phba->sli4_hba.fcp_wq[fcp_wqidx],
+				    phba->sli4_hba.fcp_cq[fcp_cq_index],
+				    LPFC_FCP);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0535 Failed setup of fast-path FCP "
+					"WQ (%d), rc = 0x%x\n", fcp_wqidx, rc);
+			goto out_destroy_fcp_wq;
+		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2591 FCP WQ setup: wq[%d]-id=%d, "
+				"parent cq[%d]-id=%d\n",
+				fcp_wqidx,
+				phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
+				fcp_cq_index,
+				phba->sli4_hba.fcp_cq[fcp_cq_index]->queue_id);
+		/* Round robin FCP Work Queue's Completion Queue assignment */
+		fcp_cq_index = ((fcp_cq_index + 1) % phba->cfg_fcp_eq_count);
+	}
+
+	/*
+	 * Create Receive Queue (RQ)
+	 */
+	if (!phba->sli4_hba.hdr_rq || !phba->sli4_hba.dat_rq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0540 Receive Queue not allocated\n");
+		goto out_destroy_fcp_wq;
+	}
+	rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
+			    phba->sli4_hba.rxq_cq, LPFC_USOL);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0541 Failed setup of Receive Queue: "
+				"rc = 0x%x\n", rc);
+		goto out_destroy_fcp_wq;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2592 USL RQ setup: hdr-rq-id=%d, dat-rq-id=%d "
+			"parent cq-id=%d\n",
+			phba->sli4_hba.hdr_rq->queue_id,
+			phba->sli4_hba.dat_rq->queue_id,
+			phba->sli4_hba.rxq_cq->queue_id);
+	return 0;
+
+out_destroy_fcp_wq:
+	for (--fcp_wqidx; fcp_wqidx >= 0; fcp_wqidx--)
+		lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_wqidx]);
+	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+out_destroy_mbx_wq:
+	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+out_destroy_fcp_cq:
+	for (--fcp_cqidx; fcp_cqidx >= 0; fcp_cqidx--)
+		lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_cqidx]);
+	lpfc_cq_destroy(phba, phba->sli4_hba.rxq_cq);
+out_destroy_els_cq:
+	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+out_destroy_mbx_cq:
+	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+out_destroy_fp_eq:
+	for (--fcp_eqidx; fcp_eqidx >= 0; fcp_eqidx--)
+		lpfc_eq_destroy(phba, phba->sli4_hba.fp_eq[fcp_eqidx]);
+	lpfc_eq_destroy(phba, phba->sli4_hba.sp_eq);
+out_error:
+	return rc;
+}
+
+/**
+ * lpfc_sli4_queue_unset - Unset all the SLI4 queues
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset all the SLI4 queues with the FCoE HBA
+ * operation.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+void
+lpfc_sli4_queue_unset(struct lpfc_hba *phba)
+{
+	int fcp_qidx;
+
+	/* Unset mailbox command work queue */
+	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+	/* Unset ELS work queue */
+	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+	/* Unset unsolicited receive queue */
+	lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq);
+	/* Unset FCP work queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_wq_count; fcp_qidx++)
+		lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_qidx]);
+	/* Unset mailbox command complete queue */
+	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+	/* Unset ELS complete queue */
+	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+	/* Unset unsolicited receive complete queue */
+	lpfc_cq_destroy(phba, phba->sli4_hba.rxq_cq);
+	/* Unset FCP response complete queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_qidx]);
+	/* Unset fast-path event queue */
+	for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_eq_count; fcp_qidx++)
+		lpfc_eq_destroy(phba, phba->sli4_hba.fp_eq[fcp_qidx]);
+	/* Unset slow-path event queue */
+	lpfc_eq_destroy(phba, phba->sli4_hba.sp_eq);
+}
+
+/**
+ * lpfc_sli4_cq_event_pool_create - Create completion-queue event free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to allocate and set up a pool of completion queue
+ * events. The body of the completion queue event is a completion queue entry
+ * CQE. For now, this pool is used for the interrupt service routine to queue
+ * the following HBA completion queue events for the worker thread to process:
+ *   - Mailbox asynchronous events
+ *   - Receive queue completion unsolicited events
+ * Later, this can be used for all the slow-path events.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      -ENOMEM - No availble memory
+ **/
+static int
+lpfc_sli4_cq_event_pool_create(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+	int i;
+
+	for (i = 0; i < (4 * phba->sli4_hba.cq_ecount); i++) {
+		cq_event = kmalloc(sizeof(struct lpfc_cq_event), GFP_KERNEL);
+		if (!cq_event)
+			goto out_pool_create_fail;
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_cqe_event_pool);
+	}
+	return 0;
+
+out_pool_create_fail:
+	lpfc_sli4_cq_event_pool_destroy(phba);
+	return -ENOMEM;
+}
+
+/**
+ * lpfc_sli4_cq_event_pool_destroy - Free completion-queue event free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the pool of completion queue events at
+ * driver unload time. Note that, it is the responsibility of the driver
+ * cleanup routine to free all the outstanding completion-queue events
+ * allocated from this pool back into the pool before invoking this routine
+ * to destroy the pool.
+ **/
+static void
+lpfc_sli4_cq_event_pool_destroy(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event, *next_cq_event;
+
+	list_for_each_entry_safe(cq_event, next_cq_event,
+				 &phba->sli4_hba.sp_cqe_event_pool, list) {
+		list_del(&cq_event->list);
+		kfree(cq_event);
+	}
+}
+
+/**
+ * __lpfc_sli4_cq_event_alloc - Allocate a completion-queue event from free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is the lock free version of the API invoked to allocate a
+ * completion-queue event from the free pool.
+ *
+ * Return: Pointer to the newly allocated completion-queue event if successful
+ *         NULL otherwise.
+ **/
+struct lpfc_cq_event *
+__lpfc_sli4_cq_event_alloc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event = NULL;
+
+	list_remove_head(&phba->sli4_hba.sp_cqe_event_pool, cq_event,
+			 struct lpfc_cq_event, list);
+	return cq_event;
+}
+
+/**
+ * lpfc_sli4_cq_event_alloc - Allocate a completion-queue event from free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is the lock version of the API invoked to allocate a
+ * completion-queue event from the free pool.
+ *
+ * Return: Pointer to the newly allocated completion-queue event if successful
+ *         NULL otherwise.
+ **/
+struct lpfc_cq_event *
+lpfc_sli4_cq_event_alloc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	cq_event = __lpfc_sli4_cq_event_alloc(phba);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	return cq_event;
+}
+
+/**
+ * __lpfc_sli4_cq_event_release - Release a completion-queue event to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @cq_event: pointer to the completion queue event to be freed.
+ *
+ * This routine is the lock free version of the API invoked to release a
+ * completion-queue event back into the free pool.
+ **/
+void
+__lpfc_sli4_cq_event_release(struct lpfc_hba *phba,
+			     struct lpfc_cq_event *cq_event)
+{
+	list_add_tail(&cq_event->list, &phba->sli4_hba.sp_cqe_event_pool);
+}
+
+/**
+ * lpfc_sli4_cq_event_release - Release a completion-queue event to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @cq_event: pointer to the completion queue event to be freed.
+ *
+ * This routine is the lock version of the API invoked to release a
+ * completion-queue event back into the free pool.
+ **/
+void
+lpfc_sli4_cq_event_release(struct lpfc_hba *phba,
+			   struct lpfc_cq_event *cq_event)
+{
+	unsigned long iflags;
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	__lpfc_sli4_cq_event_release(phba, cq_event);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+}
+
+/**
+ * lpfc_sli4_cq_event_release_all - Release all cq events to the free pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to free all the pending completion-queue events to the
+ * back into the free pool for device reset.
+ **/
+static void
+lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
+{
+	LIST_HEAD(cqelist);
+	struct lpfc_cq_event *cqe;
+	unsigned long iflags;
+
+	/* Retrieve all the pending WCQEs from pending WCQE lists */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	/* Pending FCP XRI abort events */
+	list_splice_init(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
+			 &cqelist);
+	/* Pending ELS XRI abort events */
+	list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
+			 &cqelist);
+	/* Pending asynnc events */
+	list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
+			 &cqelist);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	while (!list_empty(&cqelist)) {
+		list_remove_head(&cqelist, cqe, struct lpfc_cq_event, list);
+		lpfc_sli4_cq_event_release(phba, cqe);
+	}
+}
+
+/**
+ * lpfc_pci_function_reset - Reset pci function.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to request a PCI function reset. It will destroys
+ * all resources assigned to the PCI function which originates this request.
+ *
+ * Return codes
+ *      0 - sucessful
+ *      ENOMEM - No availble memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_pci_function_reset(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mboxq;
+	uint32_t rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0494 Unable to allocate memory for issuing "
+				"SLI_FUNCTION_RESET mailbox command\n");
+		return -ENOMEM;
+	}
+
+	/* Set up PCI function reset SLI4_CONFIG mailbox-ioctl command */
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_FUNCTION_RESET, 0,
+			 LPFC_SLI4_MBX_EMBED);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0495 SLI_FUNCTION_RESET mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands
+ * @phba: pointer to lpfc hba data structure.
+ * @cnt: number of nop mailbox commands to send.
+ *
+ * This routine is invoked to send a number @cnt of NOP mailbox command and
+ * wait for each command to complete.
+ *
+ * Return: the number of NOP mailbox command completed.
+ **/
+static int
+lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt)
+{
+	LPFC_MBOXQ_t *mboxq;
+	int length, cmdsent;
+	uint32_t mbox_tmo;
+	uint32_t rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (cnt == 0) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2518 Requested to send 0 NOP mailbox cmd\n");
+		return cnt;
+	}
+
+	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2519 Unable to allocate memory for issuing "
+				"NOP mailbox command\n");
+		return 0;
+	}
+
+	/* Set up NOP SLI4_CONFIG mailbox-ioctl command */
+	length = (sizeof(struct lpfc_mbx_nop) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_NOP, length, LPFC_SLI4_MBX_EMBED);
+
+	mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+	for (cmdsent = 0; cmdsent < cnt; cmdsent++) {
+		if (!phba->sli4_hba.intr_enable)
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+		else
+			rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+		if (rc == MBX_TIMEOUT)
+			break;
+		/* Check return status */
+		shdr = (union lpfc_sli4_cfg_shdr *)
+			&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+		shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+		shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+					 &shdr->response);
+		if (shdr_status || shdr_add_status || rc) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+					"2520 NOP mailbox command failed "
+					"status x%x add_status x%x mbx "
+					"status x%x\n", shdr_status,
+					shdr_add_status, rc);
+			break;
+		}
+	}
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+
+	return cmdsent;
+}
+
+/**
+ * lpfc_sli4_fcfi_unreg - Unregister fcfi to device
+ * @phba: pointer to lpfc hba data structure.
+ * @fcfi: fcf index.
+ *
+ * This routine is invoked to unregister a FCFI from device.
+ **/
+void
+lpfc_sli4_fcfi_unreg(struct lpfc_hba *phba, uint16_t fcfi)
+{
+	LPFC_MBOXQ_t *mbox;
+	uint32_t mbox_tmo;
+	int rc;
+	unsigned long flags;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+
+	if (!mbox)
+		return;
+
+	lpfc_unreg_fcfi(mbox, fcfi);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	if (rc != MBX_SUCCESS)
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2517 Unregister FCFI command failed "
+				"status %d, mbxStatus x%x\n", rc,
+				bf_get(lpfc_mqe_status, &mbox->u.mqe));
+	else {
+		spin_lock_irqsave(&phba->hbalock, flags);
+		/* Mark the FCFI is no longer registered */
+		phba->fcf.fcf_flag &=
+			~(FCF_AVAILABLE | FCF_REGISTERED | FCF_DISCOVERED);
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+	}
+}
+
+/**
+ * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the PCI device memory space for device
+ * with SLI-4 interface spec.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+	unsigned long bar0map_len, bar1map_len, bar2map_len;
+	int error = -ENODEV;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return error;
+	else
+		pdev = phba->pcidev;
+
+	/* Set the device DMA mask size */
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) != 0)
+		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
+			return error;
+
+	/* Get the bus address of SLI4 device Bar0, Bar1, and Bar2 and the
+	 * number of bytes required by each mapping. They are actually
+	 * mapping to the PCI BAR regions 1, 2, and 4 by the SLI4 device.
+	 */
+	phba->pci_bar0_map = pci_resource_start(pdev, LPFC_SLI4_BAR0);
+	bar0map_len = pci_resource_len(pdev, LPFC_SLI4_BAR0);
+
+	phba->pci_bar1_map = pci_resource_start(pdev, LPFC_SLI4_BAR1);
+	bar1map_len = pci_resource_len(pdev, LPFC_SLI4_BAR1);
+
+	phba->pci_bar2_map = pci_resource_start(pdev, LPFC_SLI4_BAR2);
+	bar2map_len = pci_resource_len(pdev, LPFC_SLI4_BAR2);
+
+	/* Map SLI4 PCI Config Space Register base to a kernel virtual addr */
+	phba->sli4_hba.conf_regs_memmap_p =
+				ioremap(phba->pci_bar0_map, bar0map_len);
+	if (!phba->sli4_hba.conf_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLI4 PCI config registers.\n");
+		goto out;
+	}
+
+	/* Map SLI4 HBA Control Register base to a kernel virtual address. */
+	phba->sli4_hba.ctrl_regs_memmap_p =
+				ioremap(phba->pci_bar1_map, bar1map_len);
+	if (!phba->sli4_hba.ctrl_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLI4 HBA control registers.\n");
+		goto out_iounmap_conf;
+	}
+
+	/* Map SLI4 HBA Doorbell Register base to a kernel virtual address. */
+	phba->sli4_hba.drbl_regs_memmap_p =
+				ioremap(phba->pci_bar2_map, bar2map_len);
+	if (!phba->sli4_hba.drbl_regs_memmap_p) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			   "ioremap failed for SLI4 HBA doorbell registers.\n");
+		goto out_iounmap_ctrl;
+	}
+
+	/* Set up BAR0 PCI config space register memory map */
+	lpfc_sli4_bar0_register_memmap(phba);
+
+	/* Set up BAR1 register memory map */
+	lpfc_sli4_bar1_register_memmap(phba);
+
+	/* Set up BAR2 register memory map */
+	error = lpfc_sli4_bar2_register_memmap(phba, LPFC_VF0);
+	if (error)
+		goto out_iounmap_all;
+
+	return 0;
+
+out_iounmap_all:
+	iounmap(phba->sli4_hba.drbl_regs_memmap_p);
+out_iounmap_ctrl:
+	iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
+out_iounmap_conf:
+	iounmap(phba->sli4_hba.conf_regs_memmap_p);
+out:
+	return error;
+}
+
+/**
+ * lpfc_sli4_pci_mem_unset - Unset SLI4 HBA PCI memory space.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the PCI device memory space for device
+ * with SLI-4 interface spec.
+ **/
+static void
+lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
+{
+	struct pci_dev *pdev;
+
+	/* Obtain PCI device reference */
+	if (!phba->pcidev)
+		return;
+	else
+		pdev = phba->pcidev;
+
+	/* Free coherent DMA memory allocated */
+
+	/* Unmap I/O memory space */
+	iounmap(phba->sli4_hba.drbl_regs_memmap_p);
+	iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
+	iounmap(phba->sli4_hba.conf_regs_memmap_p);
+
+	return;
+}
+
+/**
+ * lpfc_sli_enable_msix - Enable MSI-X interrupt mode on SLI-3 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI-X interrupt vectors to device
+ * with SLI-3 interface specs. The kernel function pci_enable_msix() is
+ * called to enable the MSI-X vectors. Note that pci_enable_msix(), once
+ * invoked, enables either all or nothing, depending on the current
+ * availability of PCI vector resources. The device driver is responsible
+ * for calling the individual request_irq() to register each MSI-X vector
+ * with a interrupt handler, which is done in this function. Note that
+ * later when device is unloading, the driver should always call free_irq()
+ * on all MSI-X vectors it has done request_irq() on before calling
+ * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
+ * will be left with MSI-X enabled and leaks its vectors.
+ *
+ * Return codes
+ *   0 - sucessful
+ *   other values - error
+ **/
+static int
+lpfc_sli_enable_msix(struct lpfc_hba *phba)
+{
+	int rc, i;
+	LPFC_MBOXQ_t *pmb;
+
+	/* Set up MSI-X multi-message vectors */
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		phba->msix_entries[i].entry = i;
+
+	/* Configure MSI-X capability structure */
+	rc = pci_enable_msix(phba->pcidev, phba->msix_entries,
+				ARRAY_SIZE(phba->msix_entries));
+	if (rc) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0420 PCI enable MSI-X failed (%d)\n", rc);
+		goto msi_fail_out;
+	}
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0477 MSI-X entry[%d]: vector=x%x "
+				"message=%d\n", i,
+				phba->msix_entries[i].vector,
+				phba->msix_entries[i].entry);
+	/*
+	 * Assign MSI-X vectors to interrupt handlers
+	 */
+
+	/* vector-0 is associated to slow-path handler */
+	rc = request_irq(phba->msix_entries[0].vector,
+			 &lpfc_sli_sp_intr_handler, IRQF_SHARED,
+			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0421 MSI-X slow-path request_irq failed "
+				"(%d)\n", rc);
+		goto msi_fail_out;
+	}
+
+	/* vector-1 is associated to fast-path handler */
+	rc = request_irq(phba->msix_entries[1].vector,
+			 &lpfc_sli_fp_intr_handler, IRQF_SHARED,
+			 LPFC_FP_DRIVER_HANDLER_NAME, phba);
+
+	if (rc) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0429 MSI-X fast-path request_irq failed "
+				"(%d)\n", rc);
+		goto irq_fail_out;
+	}
+
+	/*
+	 * Configure HBA MSI-X attention conditions to messages
+	 */
+	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+
+	if (!pmb) {
+		rc = -ENOMEM;
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0474 Unable to allocate memory for issuing "
+				"MBOX_CONFIG_MSI command\n");
+		goto mem_fail_out;
+	}
+	rc = lpfc_config_msi(phba, pmb);
+	if (rc)
+		goto mbx_fail_out;
+	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+				"0351 Config MSI mailbox command failed, "
+				"mbxCmd x%x, mbxStatus x%x\n",
+				pmb->u.mb.mbxCommand, pmb->u.mb.mbxStatus);
+		goto mbx_fail_out;
+	}
+
+	/* Free memory allocated for mailbox command */
+	mempool_free(pmb, phba->mbox_mem_pool);
+	return rc;
+
+mbx_fail_out:
+	/* Free memory allocated for mailbox command */
+	mempool_free(pmb, phba->mbox_mem_pool);
+
+mem_fail_out:
+	/* free the irq already requested */
+	free_irq(phba->msix_entries[1].vector, phba);
+
+irq_fail_out:
+	/* free the irq already requested */
+	free_irq(phba->msix_entries[0].vector, phba);
+
+msi_fail_out:
+	/* Unconfigure MSI-X capability structure */
+	pci_disable_msix(phba->pcidev);
+	return rc;
+}
+
+/**
+ * lpfc_sli_disable_msix - Disable MSI-X interrupt mode on SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release the MSI-X vectors and then disable the
+ * MSI-X interrupt mode to device with SLI-3 interface spec.
+ **/
+static void
+lpfc_sli_disable_msix(struct lpfc_hba *phba)
+{
+	int i;
+
+	/* Free up MSI-X multi-message vectors */
+	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
+		free_irq(phba->msix_entries[i].vector, phba);
+	/* Disable MSI-X */
+	pci_disable_msix(phba->pcidev);
+
+	return;
+}
+
+/**
+ * lpfc_sli_enable_msi - Enable MSI interrupt mode on SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI interrupt mode to device with
+ * SLI-3 interface spec. The kernel function pci_enable_msi() is called to
+ * enable the MSI vector. The device driver is responsible for calling the
+ * request_irq() to register MSI vector with a interrupt the handler, which
+ * is done in this function.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ */
+static int
+lpfc_sli_enable_msi(struct lpfc_hba *phba)
+{
+	int rc;
+
+	rc = pci_enable_msi(phba->pcidev);
+	if (!rc)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0462 PCI enable MSI mode success.\n");
+	else {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0471 PCI enable MSI mode failed (%d)\n", rc);
+		return rc;
+	}
+
+	rc = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
+			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+	if (rc) {
+		pci_disable_msi(phba->pcidev);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0478 MSI request_irq failed (%d)\n", rc);
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli_disable_msi - Disable MSI interrupt mode to SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable the MSI interrupt mode to device with
+ * SLI-3 interface spec. The driver calls free_irq() on MSI vector it has
+ * done request_irq() on before calling pci_disable_msi(). Failure to do so
+ * results in a BUG_ON() and a device will be left with MSI enabled and leaks
+ * its vector.
+ */
+static void
+lpfc_sli_disable_msi(struct lpfc_hba *phba)
+{
+	free_irq(phba->pcidev->irq, phba);
+	pci_disable_msi(phba->pcidev);
+	return;
+}
+
+/**
+ * lpfc_sli_enable_intr - Enable device interrupt to SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable device interrupt and associate driver's
+ * interrupt handler(s) to interrupt vector(s) to device with SLI-3 interface
+ * spec. Depends on the interrupt mode configured to the driver, the driver
+ * will try to fallback from the configured interrupt mode to an interrupt
+ * mode which is supported by the platform, kernel, and device in the order
+ * of:
+ * MSI-X -> MSI -> IRQ.
+ *
+ * Return codes
+ *   0 - sucessful
+ *   other values - error
+ **/
+static uint32_t
+lpfc_sli_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+{
+	uint32_t intr_mode = LPFC_INTR_ERROR;
+	int retval;
+
+	if (cfg_mode == 2) {
+		/* Need to issue conf_port mbox cmd before conf_msi mbox cmd */
+		retval = lpfc_sli_config_port(phba, LPFC_SLI_REV3);
+		if (!retval) {
+			/* Now, try to enable MSI-X interrupt mode */
+			retval = lpfc_sli_enable_msix(phba);
+			if (!retval) {
+				/* Indicate initialization to MSI-X mode */
+				phba->intr_type = MSIX;
+				intr_mode = 2;
+			}
+		}
+	}
+
+	/* Fallback to MSI if MSI-X initialization failed */
+	if (cfg_mode >= 1 && phba->intr_type == NONE) {
+		retval = lpfc_sli_enable_msi(phba);
+		if (!retval) {
+			/* Indicate initialization to MSI mode */
+			phba->intr_type = MSI;
+			intr_mode = 1;
+		}
+	}
+
+	/* Fallback to INTx if both MSI-X/MSI initalization failed */
+	if (phba->intr_type == NONE) {
+		retval = request_irq(phba->pcidev->irq, lpfc_sli_intr_handler,
+				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+		if (!retval) {
+			/* Indicate initialization to INTx mode */
+			phba->intr_type = INTx;
+			intr_mode = 0;
+		}
+	}
+	return intr_mode;
+}
+
+/**
+ * lpfc_sli_disable_intr - Disable device interrupt to SLI-3 device.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable device interrupt and disassociate the
+ * driver's interrupt handler(s) from interrupt vector(s) to device with
+ * SLI-3 interface spec. Depending on the interrupt mode, the driver will
+ * release the interrupt vector(s) for the message signaled interrupt.
+ **/
+static void
+lpfc_sli_disable_intr(struct lpfc_hba *phba)
+{
+	/* Disable the currently initialized interrupt mode */
+	if (phba->intr_type == MSIX)
+		lpfc_sli_disable_msix(phba);
+	else if (phba->intr_type == MSI)
+		lpfc_sli_disable_msi(phba);
+	else if (phba->intr_type == INTx)
+		free_irq(phba->pcidev->irq, phba);
+
+	/* Reset interrupt management states */
+	phba->intr_type = NONE;
+	phba->sli.slistat.sli_intr = 0;
+
+	return;
+}
+
+/**
+ * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI-X interrupt vectors to device
+ * with SLI-4 interface spec. The kernel function pci_enable_msix() is called
+ * to enable the MSI-X vectors. Note that pci_enable_msix(), once invoked,
+ * enables either all or nothing, depending on the current availability of
+ * PCI vector resources. The device driver is responsible for calling the
+ * individual request_irq() to register each MSI-X vector with a interrupt
+ * handler, which is done in this function. Note that later when device is
+ * unloading, the driver should always call free_irq() on all MSI-X vectors
+ * it has done request_irq() on before calling pci_disable_msix(). Failure
+ * to do so results in a BUG_ON() and a device will be left with MSI-X
+ * enabled and leaks its vectors.
+ *
+ * Return codes
+ * 0 - sucessful
+ * other values - error
+ **/
+static int
+lpfc_sli4_enable_msix(struct lpfc_hba *phba)
+{
+	int rc, index;
+
+	/* Set up MSI-X multi-message vectors */
+	for (index = 0; index < phba->sli4_hba.cfg_eqn; index++)
+		phba->sli4_hba.msix_entries[index].entry = index;
+
+	/* Configure MSI-X capability structure */
+	rc = pci_enable_msix(phba->pcidev, phba->sli4_hba.msix_entries,
+			     phba->sli4_hba.cfg_eqn);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0484 PCI enable MSI-X failed (%d)\n", rc);
+		goto msi_fail_out;
+	}
+	/* Log MSI-X vector assignment */
+	for (index = 0; index < phba->sli4_hba.cfg_eqn; index++)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0489 MSI-X entry[%d]: vector=x%x "
+				"message=%d\n", index,
+				phba->sli4_hba.msix_entries[index].vector,
+				phba->sli4_hba.msix_entries[index].entry);
+	/*
+	 * Assign MSI-X vectors to interrupt handlers
+	 */
+
+	/* The first vector must associated to slow-path handler for MQ */
+	rc = request_irq(phba->sli4_hba.msix_entries[0].vector,
+			 &lpfc_sli4_sp_intr_handler, IRQF_SHARED,
+			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0485 MSI-X slow-path request_irq failed "
+				"(%d)\n", rc);
+		goto msi_fail_out;
+	}
+
+	/* The rest of the vector(s) are associated to fast-path handler(s) */
+	for (index = 1; index < phba->sli4_hba.cfg_eqn; index++) {
+		phba->sli4_hba.fcp_eq_hdl[index - 1].idx = index - 1;
+		phba->sli4_hba.fcp_eq_hdl[index - 1].phba = phba;
+		rc = request_irq(phba->sli4_hba.msix_entries[index].vector,
+				 &lpfc_sli4_fp_intr_handler, IRQF_SHARED,
+				 LPFC_FP_DRIVER_HANDLER_NAME,
+				 &phba->sli4_hba.fcp_eq_hdl[index - 1]);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+					"0486 MSI-X fast-path (%d) "
+					"request_irq failed (%d)\n", index, rc);
+			goto cfg_fail_out;
+		}
+	}
+
+	return rc;
+
+cfg_fail_out:
+	/* free the irq already requested */
+	for (--index; index >= 1; index--)
+		free_irq(phba->sli4_hba.msix_entries[index - 1].vector,
+			 &phba->sli4_hba.fcp_eq_hdl[index - 1]);
+
+	/* free the irq already requested */
+	free_irq(phba->sli4_hba.msix_entries[0].vector, phba);
+
+msi_fail_out:
+	/* Unconfigure MSI-X capability structure */
+	pci_disable_msix(phba->pcidev);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_disable_msix - Disable MSI-X interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release the MSI-X vectors and then disable the
+ * MSI-X interrupt mode to device with SLI-4 interface spec.
+ **/
+static void
+lpfc_sli4_disable_msix(struct lpfc_hba *phba)
+{
+	int index;
+
+	/* Free up MSI-X multi-message vectors */
+	free_irq(phba->sli4_hba.msix_entries[0].vector, phba);
+
+	for (index = 1; index < phba->sli4_hba.cfg_eqn; index++)
+		free_irq(phba->sli4_hba.msix_entries[index].vector,
+			 &phba->sli4_hba.fcp_eq_hdl[index - 1]);
+	/* Disable MSI-X */
+	pci_disable_msix(phba->pcidev);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_enable_msi - Enable MSI interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable the MSI interrupt mode to device with
+ * SLI-4 interface spec. The kernel function pci_enable_msi() is called
+ * to enable the MSI vector. The device driver is responsible for calling
+ * the request_irq() to register MSI vector with a interrupt the handler,
+ * which is done in this function.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static int
+lpfc_sli4_enable_msi(struct lpfc_hba *phba)
+{
+	int rc, index;
+
+	rc = pci_enable_msi(phba->pcidev);
+	if (!rc)
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0487 PCI enable MSI mode success.\n");
+	else {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0488 PCI enable MSI mode failed (%d)\n", rc);
+		return rc;
+	}
+
+	rc = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
+			 IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+	if (rc) {
+		pci_disable_msi(phba->pcidev);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0490 MSI request_irq failed (%d)\n", rc);
+	}
+
+	for (index = 0; index < phba->cfg_fcp_eq_count; index++) {
+		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
+		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+	}
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_disable_msi - Disable MSI interrupt mode to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable the MSI interrupt mode to device with
+ * SLI-4 interface spec. The driver calls free_irq() on MSI vector it has
+ * done request_irq() on before calling pci_disable_msi(). Failure to do so
+ * results in a BUG_ON() and a device will be left with MSI enabled and leaks
+ * its vector.
+ **/
+static void
+lpfc_sli4_disable_msi(struct lpfc_hba *phba)
+{
+	free_irq(phba->pcidev->irq, phba);
+	pci_disable_msi(phba->pcidev);
+	return;
+}
+
+/**
+ * lpfc_sli4_enable_intr - Enable device interrupt to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to enable device interrupt and associate driver's
+ * interrupt handler(s) to interrupt vector(s) to device with SLI-4
+ * interface spec. Depends on the interrupt mode configured to the driver,
+ * the driver will try to fallback from the configured interrupt mode to an
+ * interrupt mode which is supported by the platform, kernel, and device in
+ * the order of:
+ * MSI-X -> MSI -> IRQ.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	other values - error
+ **/
+static uint32_t
+lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
+{
+	uint32_t intr_mode = LPFC_INTR_ERROR;
+	int retval, index;
+
+	if (cfg_mode == 2) {
+		/* Preparation before conf_msi mbox cmd */
+		retval = 0;
+		if (!retval) {
+			/* Now, try to enable MSI-X interrupt mode */
+			retval = lpfc_sli4_enable_msix(phba);
+			if (!retval) {
+				/* Indicate initialization to MSI-X mode */
+				phba->intr_type = MSIX;
+				intr_mode = 2;
+			}
+		}
+	}
+
+	/* Fallback to MSI if MSI-X initialization failed */
+	if (cfg_mode >= 1 && phba->intr_type == NONE) {
+		retval = lpfc_sli4_enable_msi(phba);
+		if (!retval) {
+			/* Indicate initialization to MSI mode */
+			phba->intr_type = MSI;
+			intr_mode = 1;
+		}
+	}
+
+	/* Fallback to INTx if both MSI-X/MSI initalization failed */
+	if (phba->intr_type == NONE) {
+		retval = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
+				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
+		if (!retval) {
+			/* Indicate initialization to INTx mode */
+			phba->intr_type = INTx;
+			intr_mode = 0;
+			for (index = 0; index < phba->cfg_fcp_eq_count;
+			     index++) {
+				phba->sli4_hba.fcp_eq_hdl[index].idx = index;
+				phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+			}
+		}
+	}
+	return intr_mode;
+}
+
+/**
+ * lpfc_sli4_disable_intr - Disable device interrupt to SLI-4 device
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to disable device interrupt and disassociate
+ * the driver's interrupt handler(s) from interrupt vector(s) to device
+ * with SLI-4 interface spec. Depending on the interrupt mode, the driver
+ * will release the interrupt vector(s) for the message signaled interrupt.
+ **/
+static void
+lpfc_sli4_disable_intr(struct lpfc_hba *phba)
+{
+	/* Disable the currently initialized interrupt mode */
+	if (phba->intr_type == MSIX)
+		lpfc_sli4_disable_msix(phba);
+	else if (phba->intr_type == MSI)
+		lpfc_sli4_disable_msi(phba);
+	else if (phba->intr_type == INTx)
+		free_irq(phba->pcidev->irq, phba);
+
+	/* Reset interrupt management states */
+	phba->intr_type = NONE;
+	phba->sli.slistat.sli_intr = 0;
+
+	return;
+}
+
+/**
+ * lpfc_unset_hba - Unset SLI3 hba device initialization
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the HBA device initialization steps to
+ * a device with SLI-3 interface spec.
+ **/
+static void
+lpfc_unset_hba(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+
+	spin_lock_irq(shost->host_lock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(shost->host_lock);
+
+	lpfc_stop_hba_timers(phba);
+
+	phba->pport->work_port_events = 0;
+
+	lpfc_sli_hba_down(phba);
+
+	lpfc_sli_brdrestart(phba);
+
+	lpfc_sli_disable_intr(phba);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to unset the HBA device initialization steps to
+ * a device with SLI-4 interface spec.
+ **/
+static void
+lpfc_sli4_unset_hba(struct lpfc_hba *phba)
+{
+	struct lpfc_vport *vport = phba->pport;
+	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+
+	spin_lock_irq(shost->host_lock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(shost->host_lock);
+
+	phba->pport->work_port_events = 0;
+
+	lpfc_sli4_hba_down(phba);
+
+	lpfc_sli4_disable_intr(phba);
+
+	return;
+}
+
+/**
+ * lpfc_sli4_hba_unset - Unset the fcoe hba
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called in the SLI4 code path to reset the HBA's FCoE
+ * function. The caller is not required to hold any lock. This routine
+ * issues PCI function reset mailbox command to reset the FCoE function.
+ * At the end of the function, it calls lpfc_hba_down_post function to
+ * free any pending commands.
+ **/
+static void
+lpfc_sli4_hba_unset(struct lpfc_hba *phba)
+{
+	int wait_cnt = 0;
+	LPFC_MBOXQ_t *mboxq;
+
+	lpfc_stop_hba_timers(phba);
+	phba->sli4_hba.intr_enable = 0;
+
+	/*
+	 * Gracefully wait out the potential current outstanding asynchronous
+	 * mailbox command.
+	 */
+
+	/* First, block any pending async mailbox command from posted */
+	spin_lock_irq(&phba->hbalock);
+	phba->sli.sli_flag |= LPFC_SLI_ASYNC_MBX_BLK;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, trying to wait it out if we can */
+	while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		msleep(10);
+		if (++wait_cnt > LPFC_ACTIVE_MBOX_WAIT_CNT)
+			break;
+	}
+	/* Forcefully release the outstanding mailbox command if timed out */
+	if (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		spin_lock_irq(&phba->hbalock);
+		mboxq = phba->sli.mbox_active;
+		mboxq->u.mb.mbxStatus = MBX_NOT_FINISHED;
+		__lpfc_mbox_cmpl_put(phba, mboxq);
+		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+		phba->sli.mbox_active = NULL;
+		spin_unlock_irq(&phba->hbalock);
+	}
+
+	/* Tear down the queues in the HBA */
+	lpfc_sli4_queue_unset(phba);
+
+	/* Disable PCI subsystem interrupt */
+	lpfc_sli4_disable_intr(phba);
+
+	/* Stop kthread signal shall trigger work_done one more time */
+	kthread_stop(phba->worker_thread);
+
+	/* Stop the SLI4 device port */
+	phba->pport->work_port_events = 0;
+}
+
+/**
+ * lpfc_pci_probe_one_s3 - PCI probe func to reg SLI-3 device to PCI subsystem.
+ * @pdev: pointer to PCI device
+ * @pid: pointer to PCI device identifier
+ *
+ * This routine is to be called to attach a device with SLI-3 interface spec
+ * to the PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * presented on PCI bus, the kernel PCI subsystem looks at PCI device-specific
+ * information of the device and driver to see if the driver state that it can
+ * support this kind of device. If the match is successful, the driver core
+ * invokes this routine. If this routine determines it can claim the HBA, it
+ * does all the initialization that it needs to do to handle the HBA properly.
+ *
+ * Return code
+ * 	0 - driver can claim the device
+ * 	negative value - driver can not claim the device
+ **/
+static int __devinit
+lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	struct lpfc_hba   *phba;
+	struct lpfc_vport *vport = NULL;
+	int error;
+	uint32_t cfg_mode, intr_mode;
+
+	/* Allocate memory for HBA structure */
+	phba = lpfc_hba_alloc(pdev);
+	if (!phba)
+		return -ENOMEM;
+
+	/* Perform generic PCI device enabling operation */
+	error = lpfc_enable_pci_dev(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1401 Failed to enable pci device.\n");
+		goto out_free_phba;
+	}
+
+	/* Set up SLI API function jump table for PCI-device group-0 HBAs */
+	error = lpfc_api_table_setup(phba, LPFC_PCI_DEV_LP);
+	if (error)
+		goto out_disable_pci_dev;
+
+	/* Set up SLI-3 specific device PCI memory space */
+	error = lpfc_sli_pci_mem_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1402 Failed to set up pci memory space.\n");
+		goto out_disable_pci_dev;
+	}
+
+	/* Set up phase-1 common device driver resources */
+	error = lpfc_setup_driver_resource_phase1(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1403 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s3;
+	}
+
+	/* Set up SLI-3 specific device driver resources */
+	error = lpfc_sli_driver_resource_setup(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1404 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s3;
+	}
+
+	/* Initialize and populate the iocb list per host */
+	error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1405 Failed to initialize iocb list.\n");
+		goto out_unset_driver_resource_s3;
+	}
+
+	/* Set up common device driver resources */
+	error = lpfc_setup_driver_resource_phase2(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1406 Failed to set up driver resource.\n");
+		goto out_free_iocb_list;
+	}
+
+	/* Create SCSI host to the physical port */
+	error = lpfc_create_shost(phba);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1407 Failed to create scsi host.\n");
+		goto out_unset_driver_resource;
+	}
+
+	/* Configure sysfs attributes */
+	vport = phba->pport;
+	error = lpfc_alloc_sysfs_attr(vport);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1476 Failed to allocate sysfs attr\n");
+		goto out_destroy_shost;
+	}
+
+	/* Now, trying to enable interrupt and bring up the device */
+	cfg_mode = phba->cfg_use_msi;
+	while (true) {
+		/* Put device to a known state before enabling interrupt */
+		lpfc_stop_port(phba);
+		/* Configure and enable interrupt */
+		intr_mode = lpfc_sli_enable_intr(phba, cfg_mode);
+		if (intr_mode == LPFC_INTR_ERROR) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0431 Failed to enable interrupt.\n");
+			error = -ENODEV;
+			goto out_free_sysfs_attr;
+		}
+		/* SLI-3 HBA setup */
+		if (lpfc_sli_hba_setup(phba)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1477 Failed to set up hba\n");
+			error = -ENODEV;
+			goto out_remove_device;
+		}
+
+		/* Wait 50ms for the interrupts of previous mailbox commands */
+		msleep(50);
+		/* Check active interrupts on message signaled interrupts */
+		if (intr_mode == 0 ||
+		    phba->sli.slistat.sli_intr > LPFC_MSIX_VECTORS) {
+			/* Log the current active interrupt mode */
+			phba->intr_mode = intr_mode;
+			lpfc_log_intr_mode(phba, intr_mode);
+			break;
+		} else {
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"0447 Configure interrupt mode (%d) "
+					"failed active interrupt test.\n",
+					intr_mode);
+			/* Disable the current interrupt mode */
+			lpfc_sli_disable_intr(phba);
+			/* Try next level of interrupt mode */
+			cfg_mode = --intr_mode;
+		}
+	}
+
+	/* Perform post initialization setup */
+	lpfc_post_init_setup(phba);
+
+	/* Check if there are static vports to be created. */
+	lpfc_create_static_vport(phba);
+
+	return 0;
+
+out_remove_device:
+	lpfc_unset_hba(phba);
+out_free_sysfs_attr:
+	lpfc_free_sysfs_attr(vport);
+out_destroy_shost:
+	lpfc_destroy_shost(phba);
+out_unset_driver_resource:
+	lpfc_unset_driver_resource_phase2(phba);
+out_free_iocb_list:
+	lpfc_free_iocb_list(phba);
+out_unset_driver_resource_s3:
+	lpfc_sli_driver_resource_unset(phba);
+out_unset_pci_mem_s3:
+	lpfc_sli_pci_mem_unset(phba);
+out_disable_pci_dev:
+	lpfc_disable_pci_dev(phba);
+out_free_phba:
+	lpfc_hba_free(phba);
+	return error;
+}
+
+/**
+ * lpfc_pci_remove_one_s3 - PCI func to unreg SLI-3 device from PCI subsystem.
+ * @pdev: pointer to PCI device
+ *
+ * This routine is to be called to disattach a device with SLI-3 interface
+ * spec from PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * removed from PCI bus, it performs all the necessary cleanup for the HBA
+ * device to be removed from the PCI subsystem properly.
+ **/
+static void __devexit
+lpfc_pci_remove_one_s3(struct pci_dev *pdev)
+{
+	struct Scsi_Host  *shost = pci_get_drvdata(pdev);
+	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
+	struct lpfc_vport **vports;
+	struct lpfc_hba   *phba = vport->phba;
+	int i;
+	int bars = pci_select_bars(pdev, IORESOURCE_MEM);
+
+	spin_lock_irq(&phba->hbalock);
+	vport->load_flag |= FC_UNLOADING;
+	spin_unlock_irq(&phba->hbalock);
+
+	lpfc_free_sysfs_attr(vport);
+
+	/* Release all the vports against this physical port */
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL)
+		for (i = 1; i <= phba->max_vports && vports[i] != NULL; i++)
+			fc_vport_terminate(vports[i]->fc_vport);
+	lpfc_destroy_vport_work_array(phba, vports);
+
+	/* Remove FC host and then SCSI host with the physical port */
+	fc_remove_host(shost);
+	scsi_remove_host(shost);
+	lpfc_cleanup(vport);
+
+	/*
+	 * Bring down the SLI Layer. This step disable all interrupts,
+	 * clears the rings, discards all mailbox commands, and resets
+	 * the HBA.
+	 */
+
+	/* HBA interrupt will be diabled after this call */
+	lpfc_sli_hba_down(phba);
+	/* Stop kthread signal shall trigger work_done one more time */
+	kthread_stop(phba->worker_thread);
+	/* Final cleanup of txcmplq and reset the HBA */
+	lpfc_sli_brdrestart(phba);
+
+	lpfc_stop_hba_timers(phba);
+	spin_lock_irq(&phba->hbalock);
+	list_del_init(&vport->listentry);
+	spin_unlock_irq(&phba->hbalock);
+
+	lpfc_debugfs_terminate(vport);
+
+	/* Disable interrupt */
+	lpfc_sli_disable_intr(phba);
+
+	pci_set_drvdata(pdev, NULL);
+	scsi_host_put(shost);
+
+	/*
+	 * Call scsi_free before mem_free since scsi bufs are released to their
+	 * corresponding pools here.
+	 */
+	lpfc_scsi_free(phba);
+	lpfc_mem_free_all(phba);
+
+	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
+			  phba->hbqslimp.virt, phba->hbqslimp.phys);
+
+	/* Free resources associated with SLI2 interface */
+	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
+			  phba->slim2p.virt, phba->slim2p.phys);
+
+	/* unmap adapter SLIM and Control Registers */
+	iounmap(phba->ctrl_regs_memmap_p);
+	iounmap(phba->slim_memmap_p);
+
+	lpfc_hba_free(phba);
+
+	pci_release_selected_regions(pdev, bars);
+	pci_disable_device(pdev);
+}
+
+/**
+ * lpfc_pci_suspend_one_s3 - PCI func to suspend SLI-3 device for power mgmnt
+ * @pdev: pointer to PCI device
+ * @msg: power management message
+ *
+ * This routine is to be called from the kernel's PCI subsystem to support
+ * system Power Management (PM) to device with SLI-3 interface spec. When
+ * PM invokes this method, it quiesces the device by stopping the driver's
+ * worker thread for the device, turning off device's interrupt and DMA,
+ * and bring the device offline. Note that as the driver implements the
+ * minimum PM requirements to a power-aware driver's PM support for the
+ * suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE, FREEZE)
+ * to the suspend() method call will be treated as SUSPEND and the driver will
+ * fully reinitialize its device during resume() method call, the driver will
+ * set device to PCI_D3hot state in PCI config space instead of setting it
+ * according to the @msg provided by the PM.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_suspend_one_s3(struct pci_dev *pdev, pm_message_t msg)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0473 PCI device Power Management suspend.\n");
+
+	/* Bring down the device */
+	lpfc_offline_prep(phba);
+	lpfc_offline(phba);
+	kthread_stop(phba->worker_thread);
+
+	/* Disable interrupt from device */
+	lpfc_sli_disable_intr(phba);
+
+	/* Save device state to PCI config space */
+	pci_save_state(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	return 0;
+}
+
+/**
+ * lpfc_pci_resume_one_s3 - PCI func to resume SLI-3 device for power mgmnt
+ * @pdev: pointer to PCI device
+ *
+ * This routine is to be called from the kernel's PCI subsystem to support
+ * system Power Management (PM) to device with SLI-3 interface spec. When PM
+ * invokes this method, it restores the device's PCI config space state and
+ * fully reinitializes the device and brings it online. Note that as the
+ * driver implements the minimum PM requirements to a power-aware driver's
+ * PM for suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE,
+ * FREEZE) to the suspend() method call will be treated as SUSPEND and the
+ * driver will fully reinitialize its device during resume() method call,
+ * the device will be set to PCI_D0 directly in PCI config space before
+ * restoring the state.
+ *
+ * Return code
+ * 	0 - driver suspended the device
+ * 	Error otherwise
+ **/
+static int
+lpfc_pci_resume_one_s3(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	uint32_t intr_mode;
+	int error;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0452 PCI device Power Management resume.\n");
+
+	/* Restore device state from PCI config space */
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	if (pdev->is_busmaster)
+		pci_set_master(pdev);
+
+	/* Startup the kernel thread for this host adapter. */
+	phba->worker_thread = kthread_run(lpfc_do_work, phba,
+					"lpfc_worker_%d", phba->brd_no);
+	if (IS_ERR(phba->worker_thread)) {
+		error = PTR_ERR(phba->worker_thread);
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0434 PM resume failed to start worker "
+				"thread: error=x%x.\n", error);
+		return error;
+	}
+
+	/* Configure and enable interrupt */
+	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
+	if (intr_mode == LPFC_INTR_ERROR) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0430 PM resume Failed to enable interrupt\n");
+		return -EIO;
+	} else
+		phba->intr_mode = intr_mode;
+
+	/* Restart HBA and bring it online */
+	lpfc_sli_brdrestart(phba);
+	lpfc_online(phba);
+
+	/* Log the current active interrupt mode */
+	lpfc_log_intr_mode(phba, phba->intr_mode);
+
+	return 0;
+}
+
+/**
+ * lpfc_io_error_detected_s3 - Method for handling SLI-3 device PCI I/O error
+ * @pdev: pointer to PCI device.
+ * @state: the current PCI connection state.
+ *
+ * This routine is called from the PCI subsystem for I/O error handling to
+ * device with SLI-3 interface spec. This function is called by the PCI
+ * subsystem after a PCI bus error affecting this device has been detected.
+ * When this function is invoked, it will need to stop all the I/Os and
+ * interrupt(s) to the device. Once that is done, it will return
+ * PCI_ERS_RESULT_NEED_RESET for the PCI subsystem to perform proper recovery
+ * as desired.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ **/
+static pci_ers_result_t
+lpfc_io_error_detected_s3(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring  *pring;
+
+	if (state == pci_channel_io_perm_failure) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0472 PCI channel I/O permanent failure\n");
+		/* Block all SCSI devices' I/Os on the host */
+		lpfc_scsi_dev_block(phba);
+		/* Clean up all driver's outstanding SCSI I/Os */
+		lpfc_sli_flush_fcp_rings(phba);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_disable_device(pdev);
+	/*
+	 * There may be I/Os dropped by the firmware.
+	 * Error iocb (I/O) on txcmplq and let the SCSI layer
+	 * retry it after re-establishing link.
+	 */
+	pring = &psli->ring[psli->fcp_ring];
+	lpfc_sli_abort_iocb_ring(phba, pring);
+
+	/* Disable interrupt */
+	lpfc_sli_disable_intr(phba);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * lpfc_io_slot_reset_s3 - Method for restarting PCI SLI-3 device from scratch.
+ * @pdev: pointer to PCI device.
+ *
+ * This routine is called from the PCI subsystem for error handling to
+ * device with SLI-3 interface spec. This is called after PCI bus has been
+ * reset to restart the PCI card from scratch, as if from a cold-boot.
+ * During the PCI subsystem error recovery, after driver returns
+ * PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform proper error
+ * recovery and then call this routine before calling the .resume method
+ * to recover the device. This function will initialize the HBA device,
+ * enable the interrupt, but it will just put the HBA to offline state
+ * without passing any I/O traffic.
+ *
+ * Return codes
+ * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
+ * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
+ */
+static pci_ers_result_t
+lpfc_io_slot_reset_s3(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+	struct lpfc_sli *psli = &phba->sli;
+	uint32_t intr_mode;
+
+	dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n");
+	if (pci_enable_device_mem(pdev)) {
+		printk(KERN_ERR "lpfc: Cannot re-enable "
+			"PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_restore_state(pdev);
+	if (pdev->is_busmaster)
+		pci_set_master(pdev);
+
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Configure and enable interrupt */
+	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
+	if (intr_mode == LPFC_INTR_ERROR) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0427 Cannot re-enable interrupt after "
+				"slot reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	} else
+		phba->intr_mode = intr_mode;
+
+	/* Take device offline; this will perform cleanup */
+	lpfc_offline(phba);
+	lpfc_sli_brdrestart(phba);
+
+	/* Log the current active interrupt mode */
+	lpfc_log_intr_mode(phba, phba->intr_mode);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * lpfc_io_resume_s3 - Method for resuming PCI I/O operation on SLI-3 device.
+ * @pdev: pointer to PCI device
+ *
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-3 interface spec. It is called when kernel error recovery tells
+ * the lpfc driver that it is ok to resume normal PCI operation after PCI bus
+ * error recovery. After this call, traffic can start to flow from this device
+ * again.
+ */
+static void
+lpfc_io_resume_s3(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
 
-	lpfc_sli_disable_intr(phba);
+	lpfc_online(phba);
+}
 
-	return;
+/**
+ * lpfc_sli4_get_els_iocb_cnt - Calculate the # of ELS IOCBs to reserve
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * returns the number of ELS/CT IOCBs to reserve
+ **/
+int
+lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *phba)
+{
+	int max_xri = phba->sli4_hba.max_cfg_param.max_xri;
+
+	if (max_xri <= 100)
+		return 4;
+	else if (max_xri <= 256)
+		return 8;
+	else if (max_xri <= 512)
+		return 16;
+	else if (max_xri <= 1024)
+		return 32;
+	else
+		return 48;
 }
 
 /**
- * lpfc_pci_probe_one_s3 - PCI probe func to reg SLI-3 device to PCI subsystem.
+ * lpfc_pci_probe_one_s4 - PCI probe func to reg SLI-4 device to PCI subsys
  * @pdev: pointer to PCI device
  * @pid: pointer to PCI device identifier
  *
- * This routine is to be called to attach a device with SLI-3 interface spec
- * to the PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * This routine is called from the kernel's PCI subsystem to device with
+ * SLI-4 interface spec. When an Emulex HBA with SLI-4 interface spec is
  * presented on PCI bus, the kernel PCI subsystem looks at PCI device-specific
- * information of the device and driver to see if the driver state that it can
- * support this kind of device. If the match is successful, the driver core
- * invokes this routine. If this routine determines it can claim the HBA, it
- * does all the initialization that it needs to do to handle the HBA properly.
+ * information of the device and driver to see if the driver state that it
+ * can support this kind of device. If the match is successful, the driver
+ * core invokes this routine. If this routine determines it can claim the HBA,
+ * it does all the initialization that it needs to do to handle the HBA
+ * properly.
  *
  * Return code
  * 	0 - driver can claim the device
  * 	negative value - driver can not claim the device
  **/
 static int __devinit
-lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
+lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 {
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	int error;
 	uint32_t cfg_mode, intr_mode;
+	int mcnt;
 
 	/* Allocate memory for HBA structure */
 	phba = lpfc_hba_alloc(pdev);
@@ -3660,20 +7213,20 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 	error = lpfc_enable_pci_dev(phba);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1401 Failed to enable pci device.\n");
+				"1409 Failed to enable pci device.\n");
 		goto out_free_phba;
 	}
 
-	/* Set up SLI API function jump table for PCI-device group-0 HBAs */
-	error = lpfc_api_table_setup(phba, LPFC_PCI_DEV_LP);
+	/* Set up SLI API function jump table for PCI-device group-1 HBAs */
+	error = lpfc_api_table_setup(phba, LPFC_PCI_DEV_OC);
 	if (error)
 		goto out_disable_pci_dev;
 
-	/* Set up SLI-3 specific device PCI memory space */
-	error = lpfc_sli_pci_mem_setup(phba);
+	/* Set up SLI-4 specific device PCI memory space */
+	error = lpfc_sli4_pci_mem_setup(phba);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1402 Failed to set up pci memory space.\n");
+				"1410 Failed to set up pci memory space.\n");
 		goto out_disable_pci_dev;
 	}
 
@@ -3681,31 +7234,32 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 	error = lpfc_setup_driver_resource_phase1(phba);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1403 Failed to set up driver resource.\n");
-		goto out_unset_pci_mem_s3;
+				"1411 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s4;
 	}
 
-	/* Set up SLI-3 specific device driver resources */
-	error = lpfc_sli_driver_resource_setup(phba);
+	/* Set up SLI-4 Specific device driver resources */
+	error = lpfc_sli4_driver_resource_setup(phba);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1404 Failed to set up driver resource.\n");
-		goto out_unset_pci_mem_s3;
+				"1412 Failed to set up driver resource.\n");
+		goto out_unset_pci_mem_s4;
 	}
 
 	/* Initialize and populate the iocb list per host */
-	error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT);
+	error = lpfc_init_iocb_list(phba,
+			phba->sli4_hba.max_cfg_param.max_xri);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1405 Failed to initialize iocb list.\n");
-		goto out_unset_driver_resource_s3;
+				"1413 Failed to initialize iocb list.\n");
+		goto out_unset_driver_resource_s4;
 	}
 
 	/* Set up common device driver resources */
 	error = lpfc_setup_driver_resource_phase2(phba);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1406 Failed to set up driver resource.\n");
+				"1414 Failed to set up driver resource.\n");
 		goto out_free_iocb_list;
 	}
 
@@ -3713,7 +7267,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 	error = lpfc_create_shost(phba);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1407 Failed to create scsi host.\n");
+				"1415 Failed to create scsi host.\n");
 		goto out_unset_driver_resource;
 	}
 
@@ -3722,7 +7276,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 	error = lpfc_alloc_sysfs_attr(vport);
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1476 Failed to allocate sysfs attr\n");
+				"1416 Failed to allocate sysfs attr\n");
 		goto out_destroy_shost;
 	}
 
@@ -3732,52 +7286,51 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 		/* Put device to a known state before enabling interrupt */
 		lpfc_stop_port(phba);
 		/* Configure and enable interrupt */
-		intr_mode = lpfc_sli_enable_intr(phba, cfg_mode);
+		intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
 		if (intr_mode == LPFC_INTR_ERROR) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0431 Failed to enable interrupt.\n");
+					"0426 Failed to enable interrupt.\n");
 			error = -ENODEV;
 			goto out_free_sysfs_attr;
 		}
-		/* SLI-3 HBA setup */
-		if (lpfc_sli_hba_setup(phba)) {
+		/* Set up SLI-4 HBA */
+		if (lpfc_sli4_hba_setup(phba)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"1477 Failed to set up hba\n");
+					"1421 Failed to set up hba\n");
 			error = -ENODEV;
-			goto out_remove_device;
+			goto out_disable_intr;
 		}
 
-		/* Wait 50ms for the interrupts of previous mailbox commands */
-		msleep(50);
-		/* Check active interrupts on message signaled interrupts */
+		/* Send NOP mbx cmds for non-INTx mode active interrupt test */
+		if (intr_mode != 0)
+			mcnt = lpfc_sli4_send_nop_mbox_cmds(phba,
+							    LPFC_ACT_INTR_CNT);
+
+		/* Check active interrupts received only for MSI/MSI-X */
 		if (intr_mode == 0 ||
-		    phba->sli.slistat.sli_intr > LPFC_MSIX_VECTORS) {
+		    phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) {
 			/* Log the current active interrupt mode */
 			phba->intr_mode = intr_mode;
 			lpfc_log_intr_mode(phba, intr_mode);
 			break;
-		} else {
-			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-					"0447 Configure interrupt mode (%d) "
-					"failed active interrupt test.\n",
-					intr_mode);
-			/* Disable the current interrupt mode */
-			lpfc_sli_disable_intr(phba);
-			/* Try next level of interrupt mode */
-			cfg_mode = --intr_mode;
 		}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"0451 Configure interrupt mode (%d) "
+				"failed active interrupt test.\n",
+				intr_mode);
+		/* Unset the preivous SLI-4 HBA setup */
+		lpfc_sli4_unset_hba(phba);
+		/* Try next level of interrupt mode */
+		cfg_mode = --intr_mode;
 	}
 
 	/* Perform post initialization setup */
 	lpfc_post_init_setup(phba);
 
-	/* Check if there are static vports to be created. */
-	lpfc_create_static_vport(phba);
-
 	return 0;
 
-out_remove_device:
-	lpfc_unset_hba(phba);
+out_disable_intr:
+	lpfc_sli4_disable_intr(phba);
 out_free_sysfs_attr:
 	lpfc_free_sysfs_attr(vport);
 out_destroy_shost:
@@ -3786,10 +7339,10 @@ out_unset_driver_resource:
 	lpfc_unset_driver_resource_phase2(phba);
 out_free_iocb_list:
 	lpfc_free_iocb_list(phba);
-out_unset_driver_resource_s3:
-	lpfc_sli_driver_resource_unset(phba);
-out_unset_pci_mem_s3:
-	lpfc_sli_pci_mem_unset(phba);
+out_unset_driver_resource_s4:
+	lpfc_sli4_driver_resource_unset(phba);
+out_unset_pci_mem_s4:
+	lpfc_sli4_pci_mem_unset(phba);
 out_disable_pci_dev:
 	lpfc_disable_pci_dev(phba);
 out_free_phba:
@@ -3798,28 +7351,29 @@ out_free_phba:
 }
 
 /**
- * lpfc_pci_remove_one_s3 - PCI func to unreg SLI-3 device from PCI subsystem.
+ * lpfc_pci_remove_one_s4 - PCI func to unreg SLI-4 device from PCI subsystem
  * @pdev: pointer to PCI device
  *
- * This routine is to be called to disattach a device with SLI-3 interface
- * spec from PCI subsystem. When an Emulex HBA with SLI-3 interface spec is
+ * This routine is called from the kernel's PCI subsystem to device with
+ * SLI-4 interface spec. When an Emulex HBA with SLI-4 interface spec is
  * removed from PCI bus, it performs all the necessary cleanup for the HBA
  * device to be removed from the PCI subsystem properly.
  **/
 static void __devexit
-lpfc_pci_remove_one_s3(struct pci_dev *pdev)
+lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 {
-	struct Scsi_Host  *shost = pci_get_drvdata(pdev);
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_vport **vports;
-	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_hba *phba = vport->phba;
 	int i;
-	int bars = pci_select_bars(pdev, IORESOURCE_MEM);
 
+	/* Mark the device unloading flag */
 	spin_lock_irq(&phba->hbalock);
 	vport->load_flag |= FC_UNLOADING;
 	spin_unlock_irq(&phba->hbalock);
 
+	/* Free the HBA sysfs attributes */
 	lpfc_free_sysfs_attr(vport);
 
 	/* Release all the vports against this physical port */
@@ -3832,73 +7386,56 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
+
+	/* Perform cleanup on the physical port */
 	lpfc_cleanup(vport);
 
 	/*
-	 * Bring down the SLI Layer. This step disable all interrupts,
+	 * Bring down the SLI Layer. This step disables all interrupts,
 	 * clears the rings, discards all mailbox commands, and resets
-	 * the HBA.
+	 * the HBA FCoE function.
 	 */
+	lpfc_debugfs_terminate(vport);
+	lpfc_sli4_hba_unset(phba);
 
-	/* HBA interrupt will be diabled after this call */
-	lpfc_sli_hba_down(phba);
-	/* Stop kthread signal shall trigger work_done one more time */
-	kthread_stop(phba->worker_thread);
-	/* Final cleanup of txcmplq and reset the HBA */
-	lpfc_sli_brdrestart(phba);
-
-	lpfc_stop_hba_timers(phba);
 	spin_lock_irq(&phba->hbalock);
 	list_del_init(&vport->listentry);
 	spin_unlock_irq(&phba->hbalock);
 
-	lpfc_debugfs_terminate(vport);
-
-	/* Disable interrupt */
-	lpfc_sli_disable_intr(phba);
-
-	pci_set_drvdata(pdev, NULL);
-	scsi_host_put(shost);
-
-	/*
-	 * Call scsi_free before mem_free since scsi bufs are released to their
-	 * corresponding pools here.
+	/* Call scsi_free before lpfc_sli4_driver_resource_unset since scsi
+	 * buffers are released to their corresponding pools here.
 	 */
 	lpfc_scsi_free(phba);
-	lpfc_mem_free_all(phba);
-
-	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(),
-			  phba->hbqslimp.virt, phba->hbqslimp.phys);
+	lpfc_sli4_driver_resource_unset(phba);
 
-	/* Free resources associated with SLI2 interface */
-	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
-			  phba->slim2p.virt, phba->slim2p.phys);
+	/* Unmap adapter Control and Doorbell registers */
+	lpfc_sli4_pci_mem_unset(phba);
 
-	/* unmap adapter SLIM and Control Registers */
-	iounmap(phba->ctrl_regs_memmap_p);
-	iounmap(phba->slim_memmap_p);
+	/* Release PCI resources and disable device's PCI function */
+	scsi_host_put(shost);
+	lpfc_disable_pci_dev(phba);
 
+	/* Finally, free the driver's device data structure */
 	lpfc_hba_free(phba);
 
-	pci_release_selected_regions(pdev, bars);
-	pci_disable_device(pdev);
+	return;
 }
 
 /**
- * lpfc_pci_suspend_one_s3 - PCI func to suspend SLI-3 device for power mgmnt
+ * lpfc_pci_suspend_one_s4 - PCI func to suspend SLI-4 device for power mgmnt
  * @pdev: pointer to PCI device
  * @msg: power management message
  *
- * This routine is to be called from the kernel's PCI subsystem to support
- * system Power Management (PM) to device with SLI-3 interface spec. When
- * PM invokes this method, it quiesces the device by stopping the driver's
- * worker thread for the device, turning off device's interrupt and DMA,
- * and bring the device offline. Note that as the driver implements the
- * minimum PM requirements to a power-aware driver's PM support for the
- * suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE, FREEZE)
- * to the suspend() method call will be treated as SUSPEND and the driver will
- * fully reinitialize its device during resume() method call, the driver will
- * set device to PCI_D3hot state in PCI config space instead of setting it
+ * This routine is called from the kernel's PCI subsystem to support system
+ * Power Management (PM) to device with SLI-4 interface spec. When PM invokes
+ * this method, it quiesces the device by stopping the driver's worker
+ * thread for the device, turning off device's interrupt and DMA, and bring
+ * the device offline. Note that as the driver implements the minimum PM
+ * requirements to a power-aware driver's PM support for suspend/resume -- all
+ * the possible PM messages (SUSPEND, HIBERNATE, FREEZE) to the suspend()
+ * method call will be treated as SUSPEND and the driver will fully
+ * reinitialize its device during resume() method call, the driver will set
+ * device to PCI_D3hot state in PCI config space instead of setting it
  * according to the @msg provided by the PM.
  *
  * Return code
@@ -3906,13 +7443,13 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
  * 	Error otherwise
  **/
 static int
-lpfc_pci_suspend_one_s3(struct pci_dev *pdev, pm_message_t msg)
+lpfc_pci_suspend_one_s4(struct pci_dev *pdev, pm_message_t msg)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"0473 PCI device Power Management suspend.\n");
+			"0298 PCI device Power Management suspend.\n");
 
 	/* Bring down the device */
 	lpfc_offline_prep(phba);
@@ -3920,7 +7457,7 @@ lpfc_pci_suspend_one_s3(struct pci_dev *pdev, pm_message_t msg)
 	kthread_stop(phba->worker_thread);
 
 	/* Disable interrupt from device */
-	lpfc_sli_disable_intr(phba);
+	lpfc_sli4_disable_intr(phba);
 
 	/* Save device state to PCI config space */
 	pci_save_state(pdev);
@@ -3930,26 +7467,26 @@ lpfc_pci_suspend_one_s3(struct pci_dev *pdev, pm_message_t msg)
 }
 
 /**
- * lpfc_pci_resume_one_s3 - PCI func to resume SLI-3 device for power mgmnt
+ * lpfc_pci_resume_one_s4 - PCI func to resume SLI-4 device for power mgmnt
  * @pdev: pointer to PCI device
  *
- * This routine is to be called from the kernel's PCI subsystem to support
- * system Power Management (PM) to device with SLI-3 interface spec. When PM
- * invokes this method, it restores the device's PCI config space state and
- * fully reinitializes the device and brings it online. Note that as the
- * driver implements the minimum PM requirements to a power-aware driver's
- * PM for suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE,
- * FREEZE) to the suspend() method call will be treated as SUSPEND and the
- * driver will fully reinitialize its device during resume() method call,
- * the device will be set to PCI_D0 directly in PCI config space before
- * restoring the state.
+ * This routine is called from the kernel's PCI subsystem to support system
+ * Power Management (PM) to device with SLI-4 interface spac. When PM invokes
+ * this method, it restores the device's PCI config space state and fully
+ * reinitializes the device and brings it online. Note that as the driver
+ * implements the minimum PM requirements to a power-aware driver's PM for
+ * suspend/resume -- all the possible PM messages (SUSPEND, HIBERNATE, FREEZE)
+ * to the suspend() method call will be treated as SUSPEND and the driver
+ * will fully reinitialize its device during resume() method call, the device
+ * will be set to PCI_D0 directly in PCI config space before restoring the
+ * state.
  *
  * Return code
  * 	0 - driver suspended the device
  * 	Error otherwise
  **/
 static int
-lpfc_pci_resume_one_s3(struct pci_dev *pdev)
+lpfc_pci_resume_one_s4(struct pci_dev *pdev)
 {
 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
 	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
@@ -3957,7 +7494,7 @@ lpfc_pci_resume_one_s3(struct pci_dev *pdev)
 	int error;
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"0452 PCI device Power Management resume.\n");
+			"0292 PCI device Power Management resume.\n");
 
 	/* Restore device state from PCI config space */
 	pci_set_power_state(pdev, PCI_D0);
@@ -3965,22 +7502,22 @@ lpfc_pci_resume_one_s3(struct pci_dev *pdev)
 	if (pdev->is_busmaster)
 		pci_set_master(pdev);
 
-	/* Startup the kernel thread for this host adapter. */
+	 /* Startup the kernel thread for this host adapter. */
 	phba->worker_thread = kthread_run(lpfc_do_work, phba,
 					"lpfc_worker_%d", phba->brd_no);
 	if (IS_ERR(phba->worker_thread)) {
 		error = PTR_ERR(phba->worker_thread);
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0434 PM resume failed to start worker "
+				"0293 PM resume failed to start worker "
 				"thread: error=x%x.\n", error);
 		return error;
 	}
 
 	/* Configure and enable interrupt */
-	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
+	intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0430 PM resume Failed to enable interrupt\n");
+				"0294 PM resume Failed to enable interrupt\n");
 		return -EIO;
 	} else
 		phba->intr_mode = intr_mode;
@@ -3996,134 +7533,65 @@ lpfc_pci_resume_one_s3(struct pci_dev *pdev)
 }
 
 /**
- * lpfc_io_error_detected_s3 - Method for handling SLI-3 device PCI I/O error
+ * lpfc_io_error_detected_s4 - Method for handling PCI I/O error to SLI-4 device
  * @pdev: pointer to PCI device.
  * @state: the current PCI connection state.
  *
- * This routine is called from the PCI subsystem for I/O error handling to
- * device with SLI-3 interface spec. This function is called by the PCI
- * subsystem after a PCI bus error affecting this device has been detected.
- * When this function is invoked, it will need to stop all the I/Os and
- * interrupt(s) to the device. Once that is done, it will return
- * PCI_ERS_RESULT_NEED_RESET for the PCI subsystem to perform proper recovery
- * as desired.
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-4 interface spec. This function is called by the PCI subsystem
+ * after a PCI bus error affecting this device has been detected. When this
+ * function is invoked, it will need to stop all the I/Os and interrupt(s)
+ * to the device. Once that is done, it will return PCI_ERS_RESULT_NEED_RESET
+ * for the PCI subsystem to perform proper recovery as desired.
  *
  * Return codes
  * 	PCI_ERS_RESULT_NEED_RESET - need to reset before recovery
  * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
  **/
 static pci_ers_result_t
-lpfc_io_error_detected_s3(struct pci_dev *pdev, pci_channel_state_t state)
+lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
 {
-	struct Scsi_Host *shost = pci_get_drvdata(pdev);
-	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring  *pring;
-
-	if (state == pci_channel_io_perm_failure) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0472 PCI channel I/O permanent failure\n");
-		/* Block all SCSI devices' I/Os on the host */
-		lpfc_scsi_dev_block(phba);
-		/* Clean up all driver's outstanding SCSI I/Os */
-		lpfc_sli_flush_fcp_rings(phba);
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	pci_disable_device(pdev);
-	/*
-	 * There may be I/Os dropped by the firmware.
-	 * Error iocb (I/O) on txcmplq and let the SCSI layer
-	 * retry it after re-establishing link.
-	 */
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
-
-	/* Disable interrupt */
-	lpfc_sli_disable_intr(phba);
-
-	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 
 /**
- * lpfc_io_slot_reset_s3 - Method for restarting PCI SLI-3 device from scratch.
+ * lpfc_io_slot_reset_s4 - Method for restart PCI SLI-4 device from scratch
  * @pdev: pointer to PCI device.
  *
- * This routine is called from the PCI subsystem for error handling to
- * device with SLI-3 interface spec. This is called after PCI bus has been
- * reset to restart the PCI card from scratch, as if from a cold-boot.
- * During the PCI subsystem error recovery, after driver returns
+ * This routine is called from the PCI subsystem for error handling to device
+ * with SLI-4 interface spec. It is called after PCI bus has been reset to
+ * restart the PCI card from scratch, as if from a cold-boot. During the
+ * PCI subsystem error recovery, after the driver returns
  * PCI_ERS_RESULT_NEED_RESET, the PCI subsystem will perform proper error
- * recovery and then call this routine before calling the .resume method
- * to recover the device. This function will initialize the HBA device,
- * enable the interrupt, but it will just put the HBA to offline state
- * without passing any I/O traffic.
+ * recovery and then call this routine before calling the .resume method to
+ * recover the device. This function will initialize the HBA device, enable
+ * the interrupt, but it will just put the HBA to offline state without
+ * passing any I/O traffic.
  *
  * Return codes
  * 	PCI_ERS_RESULT_RECOVERED - the device has been recovered
  * 	PCI_ERS_RESULT_DISCONNECT - device could not be recovered
  */
 static pci_ers_result_t
-lpfc_io_slot_reset_s3(struct pci_dev *pdev)
+lpfc_io_slot_reset_s4(struct pci_dev *pdev)
 {
-	struct Scsi_Host *shost = pci_get_drvdata(pdev);
-	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
-	struct lpfc_sli *psli = &phba->sli;
-	uint32_t intr_mode;
-
-	dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n");
-	if (pci_enable_device_mem(pdev)) {
-		printk(KERN_ERR "lpfc: Cannot re-enable "
-			"PCI device after reset.\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	pci_restore_state(pdev);
-	if (pdev->is_busmaster)
-		pci_set_master(pdev);
-
-	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
-	spin_unlock_irq(&phba->hbalock);
-
-	/* Configure and enable interrupt */
-	intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
-	if (intr_mode == LPFC_INTR_ERROR) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0427 Cannot re-enable interrupt after "
-				"slot reset.\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	} else
-		phba->intr_mode = intr_mode;
-
-	/* Take device offline; this will perform cleanup */
-	lpfc_offline(phba);
-	lpfc_sli_brdrestart(phba);
-
-	/* Log the current active interrupt mode */
-	lpfc_log_intr_mode(phba, phba->intr_mode);
-
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
 /**
- * lpfc_io_resume_s3 - Method for resuming PCI I/O operation on SLI-3 device.
+ * lpfc_io_resume_s4 - Method for resuming PCI I/O operation to SLI-4 device
  * @pdev: pointer to PCI device
  *
  * This routine is called from the PCI subsystem for error handling to device
- * with SLI-3 interface spec. It is called when kernel error recovery tells
+ * with SLI-4 interface spec. It is called when kernel error recovery tells
  * the lpfc driver that it is ok to resume normal PCI operation after PCI bus
  * error recovery. After this call, traffic can start to flow from this device
  * again.
- */
+ **/
 static void
-lpfc_io_resume_s3(struct pci_dev *pdev)
+lpfc_io_resume_s4(struct pci_dev *pdev)
 {
-	struct Scsi_Host *shost = pci_get_drvdata(pdev);
-	struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
-
-	lpfc_online(phba);
+	return;
 }
 
 /**
@@ -4154,6 +7622,10 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
 		return -ENODEV;
 
 	switch (dev_id) {
+	case PCI_DEVICE_ID_TIGERSHARK:
+	case PCI_DEVICE_ID_TIGERSHARK_S:
+		rc = lpfc_pci_probe_one_s4(pdev, pid);
+		break;
 	default:
 		rc = lpfc_pci_probe_one_s3(pdev, pid);
 		break;
@@ -4181,6 +7653,9 @@ lpfc_pci_remove_one(struct pci_dev *pdev)
 	case LPFC_PCI_DEV_LP:
 		lpfc_pci_remove_one_s3(pdev);
 		break;
+	case LPFC_PCI_DEV_OC:
+		lpfc_pci_remove_one_s4(pdev);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1424 Invalid PCI device group: 0x%x\n",
@@ -4215,6 +7690,9 @@ lpfc_pci_suspend_one(struct pci_dev *pdev, pm_message_t msg)
 	case LPFC_PCI_DEV_LP:
 		rc = lpfc_pci_suspend_one_s3(pdev, msg);
 		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_pci_suspend_one_s4(pdev, msg);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1425 Invalid PCI device group: 0x%x\n",
@@ -4248,6 +7726,9 @@ lpfc_pci_resume_one(struct pci_dev *pdev)
 	case LPFC_PCI_DEV_LP:
 		rc = lpfc_pci_resume_one_s3(pdev);
 		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_pci_resume_one_s4(pdev);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1426 Invalid PCI device group: 0x%x\n",
@@ -4283,6 +7764,9 @@ lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 	case LPFC_PCI_DEV_LP:
 		rc = lpfc_io_error_detected_s3(pdev, state);
 		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_io_error_detected_s4(pdev, state);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1427 Invalid PCI device group: 0x%x\n",
@@ -4317,6 +7801,9 @@ lpfc_io_slot_reset(struct pci_dev *pdev)
 	case LPFC_PCI_DEV_LP:
 		rc = lpfc_io_slot_reset_s3(pdev);
 		break;
+	case LPFC_PCI_DEV_OC:
+		rc = lpfc_io_slot_reset_s4(pdev);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1428 Invalid PCI device group: 0x%x\n",
@@ -4346,6 +7833,9 @@ lpfc_io_resume(struct pci_dev *pdev)
 	case LPFC_PCI_DEV_LP:
 		lpfc_io_resume_s3(pdev);
 		break;
+	case LPFC_PCI_DEV_OC:
+		lpfc_io_resume_s4(pdev);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1429 Invalid PCI device group: 0x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 134fc7f..7f5899b 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -28,8 +28,10 @@
 
 #include <scsi/scsi.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 35a9767..516f480 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -28,8 +28,10 @@
 
 #include <scsi/scsi.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -45,7 +47,7 @@
  * @phba: HBA to allocate pools for
  *
  * Description: Creates and allocates PCI pools lpfc_scsi_dma_buf_pool,
- * lpfc_mbuf_pool, lpfc_hbq_pool.  Creates and allocates kmalloc-backed mempools
+ * lpfc_mbuf_pool, lpfc_hrb_pool.  Creates and allocates kmalloc-backed mempools
  * for LPFC_MBOXQ_t and lpfc_nodelist.  Also allocates the VPI bitmask.
  *
  * Notes: Not interrupt-safe.  Must be called with no locks held.  If any
@@ -56,19 +58,30 @@
  *   -ENOMEM on failure (if any memory allocations fail)
  **/
 int
-lpfc_mem_alloc(struct lpfc_hba * phba)
+lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 {
 	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
 	int longs;
 	int i;
 
-	phba->lpfc_scsi_dma_buf_pool = pci_pool_create("lpfc_scsi_dma_buf_pool",
-				phba->pcidev, phba->cfg_sg_dma_buf_size, 8, 0);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		phba->lpfc_scsi_dma_buf_pool =
+			pci_pool_create("lpfc_scsi_dma_buf_pool",
+				phba->pcidev,
+				phba->cfg_sg_dma_buf_size,
+				phba->cfg_sg_dma_buf_size,
+				0);
+	else
+		phba->lpfc_scsi_dma_buf_pool =
+			pci_pool_create("lpfc_scsi_dma_buf_pool",
+				phba->pcidev, phba->cfg_sg_dma_buf_size,
+				align, 0);
 	if (!phba->lpfc_scsi_dma_buf_pool)
 		goto fail;
 
 	phba->lpfc_mbuf_pool = pci_pool_create("lpfc_mbuf_pool", phba->pcidev,
-							LPFC_BPL_SIZE, 8,0);
+							LPFC_BPL_SIZE,
+							align, 0);
 	if (!phba->lpfc_mbuf_pool)
 		goto fail_free_dma_buf_pool;
 
@@ -97,23 +110,31 @@ lpfc_mem_alloc(struct lpfc_hba * phba)
 						sizeof(struct lpfc_nodelist));
 	if (!phba->nlp_mem_pool)
 		goto fail_free_mbox_pool;
-
-	phba->lpfc_hbq_pool = pci_pool_create("lpfc_hbq_pool",phba->pcidev,
-					      LPFC_BPL_SIZE, 8, 0);
-	if (!phba->lpfc_hbq_pool)
+	phba->lpfc_hrb_pool = pci_pool_create("lpfc_hrb_pool",
+					      phba->pcidev,
+					      LPFC_HDR_BUF_SIZE, align, 0);
+	if (!phba->lpfc_hrb_pool)
 		goto fail_free_nlp_mem_pool;
+	phba->lpfc_drb_pool = pci_pool_create("lpfc_drb_pool",
+					      phba->pcidev,
+					      LPFC_DATA_BUF_SIZE, align, 0);
+	if (!phba->lpfc_drb_pool)
+		goto fail_free_hbq_pool;
 
 	/* vpi zero is reserved for the physical port so add 1 to max */
 	longs = ((phba->max_vpi + 1) + BITS_PER_LONG - 1) / BITS_PER_LONG;
 	phba->vpi_bmask = kzalloc(longs * sizeof(unsigned long), GFP_KERNEL);
 	if (!phba->vpi_bmask)
-		goto fail_free_hbq_pool;
+		goto fail_free_dbq_pool;
 
 	return 0;
 
+ fail_free_dbq_pool:
+	pci_pool_destroy(phba->lpfc_drb_pool);
+	phba->lpfc_drb_pool = NULL;
  fail_free_hbq_pool:
-	lpfc_sli_hbqbuf_free_all(phba);
-	pci_pool_destroy(phba->lpfc_hbq_pool);
+	pci_pool_destroy(phba->lpfc_hrb_pool);
+	phba->lpfc_hrb_pool = NULL;
  fail_free_nlp_mem_pool:
 	mempool_destroy(phba->nlp_mem_pool);
 	phba->nlp_mem_pool = NULL;
@@ -136,27 +157,73 @@ lpfc_mem_alloc(struct lpfc_hba * phba)
 }
 
 /**
- * lpfc_mem_free - Frees all PCI and memory allocated by lpfc_mem_alloc
+ * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
  * @phba: HBA to free memory for
  *
- * Description: Frees PCI pools lpfc_scsi_dma_buf_pool, lpfc_mbuf_pool,
- * lpfc_hbq_pool.  Frees kmalloc-backed mempools for LPFC_MBOXQ_t and
- * lpfc_nodelist.  Also frees the VPI bitmask
+ * Description: Free the memory allocated by lpfc_mem_alloc routine. This
+ * routine is a the counterpart of lpfc_mem_alloc.
  *
  * Returns: None
  **/
 void
-lpfc_mem_free(struct lpfc_hba * phba)
+lpfc_mem_free(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
-	LPFC_MBOXQ_t *mbox, *next_mbox;
-	struct lpfc_dmabuf   *mp;
 	int i;
+	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
 
+	/* Free VPI bitmask memory */
 	kfree(phba->vpi_bmask);
+
+	/* Free HBQ pools */
 	lpfc_sli_hbqbuf_free_all(phba);
+	pci_pool_destroy(phba->lpfc_drb_pool);
+	phba->lpfc_drb_pool = NULL;
+	pci_pool_destroy(phba->lpfc_hrb_pool);
+	phba->lpfc_hrb_pool = NULL;
+
+	/* Free NLP memory pool */
+	mempool_destroy(phba->nlp_mem_pool);
+	phba->nlp_mem_pool = NULL;
+
+	/* Free mbox memory pool */
+	mempool_destroy(phba->mbox_mem_pool);
+	phba->mbox_mem_pool = NULL;
+
+	/* Free MBUF memory pool */
+	for (i = 0; i < pool->current_count; i++)
+		pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
+			      pool->elements[i].phys);
+	kfree(pool->elements);
+
+	pci_pool_destroy(phba->lpfc_mbuf_pool);
+	phba->lpfc_mbuf_pool = NULL;
 
+	/* Free DMA buffer memory pool */
+	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
+	phba->lpfc_scsi_dma_buf_pool = NULL;
+
+	return;
+}
+
+/**
+ * lpfc_mem_free_all - Frees all PCI and driver memory
+ * @phba: HBA to free memory for
+ *
+ * Description: Free memory from PCI and driver memory pools and also those
+ * used : lpfc_scsi_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
+ * kmalloc-backed mempools for LPFC_MBOXQ_t and lpfc_nodelist. Also frees
+ * the VPI bitmask.
+ *
+ * Returns: None
+ **/
+void
+lpfc_mem_free_all(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	LPFC_MBOXQ_t *mbox, *next_mbox;
+	struct lpfc_dmabuf   *mp;
+
+	/* Free memory used in mailbox queue back to mailbox memory pool */
 	list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
 		mp = (struct lpfc_dmabuf *) (mbox->context1);
 		if (mp) {
@@ -166,6 +233,7 @@ lpfc_mem_free(struct lpfc_hba * phba)
 		list_del(&mbox->list);
 		mempool_free(mbox, phba->mbox_mem_pool);
 	}
+	/* Free memory used in mailbox cmpl list back to mailbox memory pool */
 	list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) {
 		mp = (struct lpfc_dmabuf *) (mbox->context1);
 		if (mp) {
@@ -175,8 +243,10 @@ lpfc_mem_free(struct lpfc_hba * phba)
 		list_del(&mbox->list);
 		mempool_free(mbox, phba->mbox_mem_pool);
 	}
-
+	/* Free the active mailbox command back to the mailbox memory pool */
+	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	spin_unlock_irq(&phba->hbalock);
 	if (psli->mbox_active) {
 		mbox = psli->mbox_active;
 		mp = (struct lpfc_dmabuf *) (mbox->context1);
@@ -188,27 +258,14 @@ lpfc_mem_free(struct lpfc_hba * phba)
 		psli->mbox_active = NULL;
 	}
 
-	for (i = 0; i < pool->current_count; i++)
-		pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
-						 pool->elements[i].phys);
-	kfree(pool->elements);
-
-	pci_pool_destroy(phba->lpfc_hbq_pool);
-	mempool_destroy(phba->nlp_mem_pool);
-	mempool_destroy(phba->mbox_mem_pool);
-
-	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
-	pci_pool_destroy(phba->lpfc_mbuf_pool);
-
-	phba->lpfc_hbq_pool = NULL;
-	phba->nlp_mem_pool = NULL;
-	phba->mbox_mem_pool = NULL;
-	phba->lpfc_scsi_dma_buf_pool = NULL;
-	phba->lpfc_mbuf_pool = NULL;
+	/* Free and destroy all the allocated memory pools */
+	lpfc_mem_free(phba);
 
 	/* Free the iocb lookup array */
 	kfree(psli->iocbq_lookup);
 	psli->iocbq_lookup = NULL;
+
+	return;
 }
 
 /**
@@ -305,7 +362,7 @@ lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
  * lpfc_els_hbq_alloc - Allocate an HBQ buffer
  * @phba: HBA to allocate HBQ buffer for
  *
- * Description: Allocates a DMA-mapped HBQ buffer from the lpfc_hbq_pool PCI
+ * Description: Allocates a DMA-mapped HBQ buffer from the lpfc_hrb_pool PCI
  * pool along a non-DMA-mapped container for it.
  *
  * Notes: Not interrupt-safe.  Must be called with no locks held.
@@ -323,7 +380,7 @@ lpfc_els_hbq_alloc(struct lpfc_hba *phba)
 	if (!hbqbp)
 		return NULL;
 
-	hbqbp->dbuf.virt = pci_pool_alloc(phba->lpfc_hbq_pool, GFP_KERNEL,
+	hbqbp->dbuf.virt = pci_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
 					  &hbqbp->dbuf.phys);
 	if (!hbqbp->dbuf.virt) {
 		kfree(hbqbp);
@@ -334,7 +391,7 @@ lpfc_els_hbq_alloc(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_mem_hbq_free - Frees an HBQ buffer allocated with lpfc_els_hbq_alloc
+ * lpfc_els_hbq_free - Frees an HBQ buffer allocated with lpfc_els_hbq_alloc
  * @phba: HBA buffer was allocated for
  * @hbqbp: HBQ container returned by lpfc_els_hbq_alloc
  *
@@ -348,12 +405,73 @@ lpfc_els_hbq_alloc(struct lpfc_hba *phba)
 void
 lpfc_els_hbq_free(struct lpfc_hba *phba, struct hbq_dmabuf *hbqbp)
 {
-	pci_pool_free(phba->lpfc_hbq_pool, hbqbp->dbuf.virt, hbqbp->dbuf.phys);
+	pci_pool_free(phba->lpfc_hrb_pool, hbqbp->dbuf.virt, hbqbp->dbuf.phys);
 	kfree(hbqbp);
 	return;
 }
 
 /**
+ * lpfc_sli4_rb_alloc - Allocate an SLI4 Receive buffer
+ * @phba: HBA to allocate a receive buffer for
+ *
+ * Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI
+ * pool along a non-DMA-mapped container for it.
+ *
+ * Notes: Not interrupt-safe.  Must be called with no locks held.
+ *
+ * Returns:
+ *   pointer to HBQ on success
+ *   NULL on failure
+ **/
+struct hbq_dmabuf *
+lpfc_sli4_rb_alloc(struct lpfc_hba *phba)
+{
+	struct hbq_dmabuf *dma_buf;
+
+	dma_buf = kmalloc(sizeof(struct hbq_dmabuf), GFP_KERNEL);
+	if (!dma_buf)
+		return NULL;
+
+	dma_buf->hbuf.virt = pci_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
+					    &dma_buf->hbuf.phys);
+	if (!dma_buf->hbuf.virt) {
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &dma_buf->dbuf.phys);
+	if (!dma_buf->dbuf.virt) {
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->size = LPFC_BPL_SIZE;
+	return dma_buf;
+}
+
+/**
+ * lpfc_sli4_rb_free - Frees a receive buffer
+ * @phba: HBA buffer was allocated for
+ * @dmab: DMA Buffer container returned by lpfc_sli4_hbq_alloc
+ *
+ * Description: Frees both the container and the DMA-mapped buffers returned by
+ * lpfc_sli4_rb_alloc.
+ *
+ * Notes: Can be called with or without locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
+{
+	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
+	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	kfree(dmab);
+	return;
+}
+
+/**
  * lpfc_in_buf_free - Free a DMA buffer
  * @phba: HBA buffer is associated with
  * @mp: Buffer to free
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 08cdc77..6ba5a72 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -28,8 +28,10 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index a226c05..9af2db3 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -31,8 +31,10 @@
 #include <scsi/scsi_transport_fc.h>
 
 #include "lpfc_version.h"
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -57,6 +59,8 @@ static char *dif_op_str[] = {
 	"SCSI_PROT_READ_CONVERT",
 	"SCSI_PROT_WRITE_CONVERT"
 };
+static void
+lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb);
 
 static void
 lpfc_debug_save_data(struct scsi_cmnd *cmnd)
@@ -565,6 +569,8 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 		}
 		iocb->ulpClass = CLASS3;
 		psb->status = IOSTAT_SUCCESS;
+		/* Put it back into the SCSI buffer list */
+		lpfc_release_scsi_buf_s4(phba, psb);
 
 	}
 
@@ -572,6 +578,271 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 }
 
 /**
+ * lpfc_sli4_fcp_xri_aborted - Fast-path process of fcp xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
+			  struct sli4_wcqe_xri_aborted *axri)
+{
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	struct lpfc_scsi_buf *psb, *next_psb;
+	unsigned long iflag = 0;
+
+	spin_lock_irqsave(&phba->sli4_hba.abts_scsi_buf_list_lock, iflag);
+	list_for_each_entry_safe(psb, next_psb,
+		&phba->sli4_hba.lpfc_abts_scsi_buf_list, list) {
+		if (psb->cur_iocbq.sli4_xritag == xri) {
+			list_del(&psb->list);
+			psb->status = IOSTAT_SUCCESS;
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.abts_scsi_buf_list_lock,
+				iflag);
+			lpfc_release_scsi_buf_s4(phba, psb);
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.abts_scsi_buf_list_lock,
+				iflag);
+}
+
+/**
+ * lpfc_sli4_repost_scsi_sgl_list - Repsot the Scsi buffers sgl pages as block
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of scsi buffers that have been allocated and
+ * repost them to the HBA by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all scsi buffers on the lpfc_abts_scsi_sgl_list
+ * to the lpfc_scsi_buf_list. If the repost fails, reject all scsi buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_scsi_buf *psb;
+	int index, status, bcnt = 0, rcnt = 0, rc = 0;
+	LIST_HEAD(sblist);
+
+	for (index = 0; index < phba->sli4_hba.scsi_xri_cnt; index++) {
+		psb = phba->sli4_hba.lpfc_scsi_psb_array[index];
+		if (psb) {
+			/* Remove from SCSI buffer list */
+			list_del(&psb->list);
+			/* Add it to a local SCSI buffer list */
+			list_add_tail(&psb->list, &sblist);
+			if (++rcnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+				bcnt = rcnt;
+				rcnt = 0;
+			}
+		} else
+			/* A hole present in the XRI array, need to skip */
+			bcnt = rcnt;
+
+		if (index == phba->sli4_hba.scsi_xri_cnt - 1)
+			/* End of XRI array for SCSI buffer, complete */
+			bcnt = rcnt;
+
+		/* Continue until collect up to a nembed page worth of sgls */
+		if (bcnt == 0)
+			continue;
+		/* Now, post the SCSI buffer list sgls as a block */
+		status = lpfc_sli4_post_scsi_sgl_block(phba, &sblist, bcnt);
+		/* Reset SCSI buffer count for next round of posting */
+		bcnt = 0;
+		while (!list_empty(&sblist)) {
+			list_remove_head(&sblist, psb, struct lpfc_scsi_buf,
+					 list);
+			if (status) {
+				/* Put this back on the abort scsi list */
+				psb->status = IOSTAT_LOCAL_REJECT;
+				psb->result = IOERR_ABORT_REQUESTED;
+				rc++;
+			} else
+				psb->status = IOSTAT_SUCCESS;
+			/* Put it back into the SCSI buffer list */
+			lpfc_release_scsi_buf_s4(phba, psb);
+		}
+	}
+	return rc;
+}
+
+/**
+ * lpfc_new_scsi_buf_s4 - Scsi buffer allocator for HBA with SLI4 IF spec
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine allocates a scsi buffer for device with SLI-4 interface spec,
+ * the scsi buffer contains all the necessary information needed to initiate
+ * a SCSI I/O.
+ *
+ * Return codes:
+ *   int - number of scsi buffers that were allocated.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+static int
+lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_scsi_buf *psb;
+	struct sli4_sge *sgl;
+	IOCB_t *iocb;
+	dma_addr_t pdma_phys_fcp_cmd;
+	dma_addr_t pdma_phys_fcp_rsp;
+	dma_addr_t pdma_phys_bpl, pdma_phys_bpl1;
+	uint16_t iotag, last_xritag = NO_XRI;
+	int status = 0, index;
+	int bcnt;
+	int non_sequential_xri = 0;
+	int rc = 0;
+	LIST_HEAD(sblist);
+
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
+		if (!psb)
+			break;
+
+		/*
+		 * Get memory from the pci pool to map the virt space to pci bus
+		 * space for an I/O.  The DMA buffer includes space for the
+		 * struct fcp_cmnd, struct fcp_rsp and the number of bde's
+		 * necessary to support the sg_tablesize.
+		 */
+		psb->data = pci_pool_alloc(phba->lpfc_scsi_dma_buf_pool,
+						GFP_KERNEL, &psb->dma_handle);
+		if (!psb->data) {
+			kfree(psb);
+			break;
+		}
+
+		/* Initialize virtual ptrs to dma_buf region. */
+		memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
+
+		/* Allocate iotag for psb->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
+		if (iotag == 0) {
+			kfree(psb);
+			break;
+		}
+
+		psb->cur_iocbq.sli4_xritag = lpfc_sli4_next_xritag(phba);
+		if (psb->cur_iocbq.sli4_xritag == NO_XRI) {
+			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+			      psb->data, psb->dma_handle);
+			kfree(psb);
+			break;
+		}
+		if (last_xritag != NO_XRI
+			&& psb->cur_iocbq.sli4_xritag != (last_xritag+1)) {
+			non_sequential_xri = 1;
+		} else
+			list_add_tail(&psb->list, &sblist);
+		last_xritag = psb->cur_iocbq.sli4_xritag;
+
+		index = phba->sli4_hba.scsi_xri_cnt++;
+		psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
+
+		psb->fcp_bpl = psb->data;
+		psb->fcp_cmnd = (psb->data + phba->cfg_sg_dma_buf_size)
+			- (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+		psb->fcp_rsp = (struct fcp_rsp *)((uint8_t *)psb->fcp_cmnd +
+					sizeof(struct fcp_cmnd));
+
+		/* Initialize local short-hand pointers. */
+		sgl = (struct sli4_sge *)psb->fcp_bpl;
+		pdma_phys_bpl = psb->dma_handle;
+		pdma_phys_fcp_cmd =
+			(psb->dma_handle + phba->cfg_sg_dma_buf_size)
+			 - (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+		pdma_phys_fcp_rsp = pdma_phys_fcp_cmd + sizeof(struct fcp_cmnd);
+
+		/*
+		 * The first two bdes are the FCP_CMD and FCP_RSP.  The balance
+		 * are sg list bdes.  Initialize the first two and leave the
+		 * rest for queuecommand.
+		 */
+		sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd));
+		sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd));
+		bf_set(lpfc_sli4_sge_len, sgl, sizeof(struct fcp_cmnd));
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->word3 = cpu_to_le32(sgl->word3);
+		sgl++;
+
+		/* Setup the physical region for the FCP RSP */
+		sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_rsp));
+		sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_rsp));
+		bf_set(lpfc_sli4_sge_len, sgl, sizeof(struct fcp_rsp));
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->word3 = cpu_to_le32(sgl->word3);
+
+		/*
+		 * Since the IOCB for the FCP I/O is built into this
+		 * lpfc_scsi_buf, initialize it with all known data now.
+		 */
+		iocb = &psb->cur_iocbq.iocb;
+		iocb->un.fcpi64.bdl.ulpIoTag32 = 0;
+		iocb->un.fcpi64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+		/* setting the BLP size to 2 * sizeof BDE may not be correct.
+		 * We are setting the bpl to point to out sgl. An sgl's
+		 * entries are 16 bytes, a bpl entries are 12 bytes.
+		 */
+		iocb->un.fcpi64.bdl.bdeSize = sizeof(struct fcp_cmnd);
+		iocb->un.fcpi64.bdl.addrLow = putPaddrLow(pdma_phys_fcp_cmd);
+		iocb->un.fcpi64.bdl.addrHigh = putPaddrHigh(pdma_phys_fcp_cmd);
+		iocb->ulpBdeCount = 1;
+		iocb->ulpLe = 1;
+		iocb->ulpClass = CLASS3;
+		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+			pdma_phys_bpl1 = pdma_phys_bpl + SGL_PAGE_SIZE;
+		else
+			pdma_phys_bpl1 = 0;
+		psb->dma_phys_bpl = pdma_phys_bpl;
+		phba->sli4_hba.lpfc_scsi_psb_array[index] = psb;
+		if (non_sequential_xri) {
+			status = lpfc_sli4_post_sgl(phba, pdma_phys_bpl,
+						pdma_phys_bpl1,
+						psb->cur_iocbq.sli4_xritag);
+			if (status) {
+				/* Put this back on the abort scsi list */
+				psb->status = IOSTAT_LOCAL_REJECT;
+				psb->result = IOERR_ABORT_REQUESTED;
+				rc++;
+			} else
+				psb->status = IOSTAT_SUCCESS;
+			/* Put it back into the SCSI buffer list */
+			lpfc_release_scsi_buf_s4(phba, psb);
+			break;
+		}
+	}
+	if (bcnt) {
+		status = lpfc_sli4_post_scsi_sgl_block(phba, &sblist, bcnt);
+		/* Reset SCSI buffer count for next round of posting */
+		while (!list_empty(&sblist)) {
+			list_remove_head(&sblist, psb, struct lpfc_scsi_buf,
+				 list);
+			if (status) {
+				/* Put this back on the abort scsi list */
+				psb->status = IOSTAT_LOCAL_REJECT;
+				psb->result = IOERR_ABORT_REQUESTED;
+				rc++;
+			} else
+				psb->status = IOSTAT_SUCCESS;
+			/* Put it back into the SCSI buffer list */
+			lpfc_release_scsi_buf_s4(phba, psb);
+		}
+	}
+
+	return bcnt + non_sequential_xri - rc;
+}
+
+/**
  * lpfc_new_scsi_buf - Wrapper funciton for scsi buffer allocator
  * @vport: The virtual port for which this call being executed.
  * @num_to_allocate: The requested number of buffers to allocate.
@@ -638,6 +909,39 @@ lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 }
 
 /**
+ * lpfc_release_scsi_buf_s4: Return a scsi buffer back to hba scsi buf list.
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is being released.
+ *
+ * This routine releases @psb scsi buffer by adding it to tail of @phba
+ * lpfc_scsi_buf_list list. For SLI4 XRI's are tied to the scsi buffer
+ * and cannot be reused for at least RA_TOV amount of time if it was
+ * aborted.
+ **/
+static void
+lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+	unsigned long iflag = 0;
+
+	if (psb->status == IOSTAT_LOCAL_REJECT
+		&& psb->result == IOERR_ABORT_REQUESTED) {
+		spin_lock_irqsave(&phba->sli4_hba.abts_scsi_buf_list_lock,
+					iflag);
+		psb->pCmd = NULL;
+		list_add_tail(&psb->list,
+			&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.abts_scsi_buf_list_lock,
+					iflag);
+	} else {
+
+		spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
+		psb->pCmd = NULL;
+		list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list);
+		spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
+	}
+}
+
+/**
  * lpfc_release_scsi_buf: Return a scsi buffer back to hba scsi buf list.
  * @phba: The Hba for which this call is being executed.
  * @psb: The scsi buffer which is being released.
@@ -1455,6 +1759,115 @@ out:
 }
 
 /**
+ * lpfc_scsi_prep_dma_buf_s4 - DMA mapping for scsi buffer to SLI4 IF spec
+ * @phba: The Hba for which this call is being executed.
+ * @lpfc_cmd: The scsi buffer which is going to be mapped.
+ *
+ * This routine does the pci dma mapping for scatter-gather list of scsi cmnd
+ * field of @lpfc_cmd for device with SLI-4 interface spec.
+ *
+ * Return codes:
+ * 	1 - Error
+ * 	0 - Success
+ **/
+static int
+lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+{
+	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
+	struct scatterlist *sgel = NULL;
+	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
+	struct sli4_sge *sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
+	dma_addr_t physaddr;
+	uint32_t num_bde = 0;
+	uint32_t dma_len;
+	uint32_t dma_offset = 0;
+	int nseg;
+
+	/*
+	 * There are three possibilities here - use scatter-gather segment, use
+	 * the single mapping, or neither.  Start the lpfc command prep by
+	 * bumping the bpl beyond the fcp_cmnd and fcp_rsp regions to the first
+	 * data bde entry.
+	 */
+	if (scsi_sg_count(scsi_cmnd)) {
+		/*
+		 * The driver stores the segment count returned from pci_map_sg
+		 * because this a count of dma-mappings used to map the use_sg
+		 * pages.  They are not guaranteed to be the same for those
+		 * architectures that implement an IOMMU.
+		 */
+
+		nseg = scsi_dma_map(scsi_cmnd);
+		if (unlikely(!nseg))
+			return 1;
+		sgl += 1;
+		/* clear the last flag in the fcp_rsp map entry */
+		sgl->word2 = le32_to_cpu(sgl->word2);
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl += 1;
+
+		lpfc_cmd->seg_cnt = nseg;
+		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+			printk(KERN_ERR "%s: Too many sg segments from "
+			       "dma_map_sg.  Config %d, seg_cnt %d\n",
+			       __func__, phba->cfg_sg_seg_cnt,
+			       lpfc_cmd->seg_cnt);
+			scsi_dma_unmap(scsi_cmnd);
+			return 1;
+		}
+
+		/*
+		 * The driver established a maximum scatter-gather segment count
+		 * during probe that limits the number of sg elements in any
+		 * single scsi command.  Just run through the seg_cnt and format
+		 * the sge's.
+		 * When using SLI-3 the driver will try to fit all the BDEs into
+		 * the IOCB. If it can't then the BDEs get added to a BPL as it
+		 * does for SLI-2 mode.
+		 */
+		scsi_for_each_sg(scsi_cmnd, sgel, nseg, num_bde) {
+			physaddr = sg_dma_address(sgel);
+			dma_len = sg_dma_len(sgel);
+			bf_set(lpfc_sli4_sge_len, sgl, sg_dma_len(sgel));
+			sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
+			sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
+			if ((num_bde + 1) == nseg)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->word3 = cpu_to_le32(sgl->word3);
+			dma_offset += dma_len;
+			sgl++;
+		}
+	} else {
+		sgl += 1;
+		/* clear the last flag in the fcp_rsp map entry */
+		sgl->word2 = le32_to_cpu(sgl->word2);
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+	}
+
+	/*
+	 * Finish initializing those IOCB fields that are dependent on the
+	 * scsi_cmnd request_buffer.  Note that for SLI-2 the bdeSize is
+	 * explicitly reinitialized.
+	 * all iocb memory resources are reused.
+	 */
+	fcp_cmnd->fcpDl = cpu_to_be32(scsi_bufflen(scsi_cmnd));
+
+	/*
+	 * Due to difference in data length between DIF/non-DIF paths,
+	 * we need to set word 4 of IOCB here
+	 */
+	iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd);
+	return 0;
+}
+
+/**
  * lpfc_scsi_prep_dma_buf - Wrapper function for DMA mapping of scsi buffer
  * @phba: The Hba for which this call is being executed.
  * @lpfc_cmd: The scsi buffer which is going to be mapped.
@@ -1590,6 +2003,22 @@ lpfc_scsi_unprep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
 }
 
 /**
+ * lpfc_scsi_unprep_dma_buf_s4 - Un-map DMA mapping of SG-list for SLI4 dev
+ * @phba: The Hba for which this call is being executed.
+ * @psb: The scsi buffer which is going to be un-mapped.
+ *
+ * This routine does DMA un-mapping of scatter gather list of scsi command
+ * field of @lpfc_cmd for device with SLI-4 interface spec. If we have to
+ * remove the sgl for this scsi buffer then we will do it here. For now
+ * we should be able to just call the sli3 unprep routine.
+ **/
+static void
+lpfc_scsi_unprep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+{
+	lpfc_scsi_unprep_dma_buf_s3(phba, psb);
+}
+
+/**
  * lpfc_scsi_unprep_dma_buf - Wrapper function for unmap DMA mapping of SG-list
  * @phba: The Hba for which this call is being executed.
  * @psb: The scsi buffer which is going to be un-mapped.
@@ -2129,6 +2558,29 @@ lpfc_scsi_prep_cmnd_s3(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 }
 
 /**
+ * lpfc_scsi_prep_cmnd_s4 - Convert scsi cmnd to FCP infor unit for SLI4 dev
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: The scsi command which needs to send.
+ * @pnode: Pointer to lpfc_nodelist.
+ *
+ * This routine initializes fcp_cmnd and iocb data structure from scsi command
+ * to transfer for device with SLI4 interface spec.
+ **/
+static void
+lpfc_scsi_prep_cmnd_s4(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+		       struct lpfc_nodelist *pnode)
+{
+	/*
+	 * The prep cmnd routines do not touch the sgl or its
+	 * entries. We may not have to do anything different.
+	 * I will leave this function in place until we can
+	 * run some IO through the driver and determine if changes
+	 * are needed.
+	 */
+	return lpfc_scsi_prep_cmnd_s3(vport, lpfc_cmd, pnode);
+}
+
+/**
  * lpfc_scsi_prep_cmnd - Wrapper func for convert scsi cmnd to FCP info unit
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: The scsi command which needs to send.
@@ -2209,6 +2661,37 @@ lpfc_scsi_prep_task_mgmt_cmd_s3(struct lpfc_vport *vport,
 }
 
 /**
+ * lpfc_scsi_prep_task_mgmt_cmnd_s4 - Convert SLI4 scsi TM cmd to FCP info unit
+ * @vport: The virtual port for which this call is being executed.
+ * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
+ * @lun: Logical unit number.
+ * @task_mgmt_cmd: SCSI task management command.
+ *
+ * This routine creates FCP information unit corresponding to @task_mgmt_cmd
+ * for device with SLI-4 interface spec.
+ *
+ * Return codes:
+ * 	0 - Error
+ * 	1 - Success
+ **/
+static int
+lpfc_scsi_prep_task_mgmt_cmd_s4(struct lpfc_vport *vport,
+				struct lpfc_scsi_buf *lpfc_cmd,
+				unsigned int lun,
+				uint8_t task_mgmt_cmd)
+{
+	/*
+	 * The prep cmnd routines do not touch the sgl or its
+	 * entries. We may not have to do anything different.
+	 * I will leave this function in place until we can
+	 * run some IO through the driver and determine if changes
+	 * are needed.
+	 */
+	return lpfc_scsi_prep_task_mgmt_cmd_s3(vport, lpfc_cmd, lun,
+						task_mgmt_cmd);
+}
+
+/**
  * lpfc_scsi_prep_task_mgmt_cmnd - Wrapper func convert scsi TM cmd to FCP info
  * @vport: The virtual port for which this call is being executed.
  * @lpfc_cmd: Pointer to lpfc_scsi_buf data structure.
@@ -2257,6 +2740,15 @@ lpfc_scsi_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 					lpfc_scsi_prep_task_mgmt_cmd_s3;
 		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s3;
 		break;
+	case LPFC_PCI_DEV_OC:
+		phba->lpfc_new_scsi_buf = lpfc_new_scsi_buf_s4;
+		phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s4;
+		phba->lpfc_scsi_prep_cmnd = lpfc_scsi_prep_cmnd_s4;
+		phba->lpfc_scsi_unprep_dma_buf = lpfc_scsi_unprep_dma_buf_s4;
+		phba->lpfc_scsi_prep_task_mgmt_cmd =
+					lpfc_scsi_prep_task_mgmt_cmd_s4;
+		phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s4;
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1418 Invalid HBA PCI-device group: 0x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index c7c440d..65dfc8b 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -140,6 +140,8 @@ struct lpfc_scsi_buf {
 	struct fcp_rsp *fcp_rsp;
 	struct ulp_bde64 *fcp_bpl;
 
+	dma_addr_t dma_phys_bpl;
+
 	/* cur_iocbq has phys of the dma-able buffer.
 	 * Iotag is in here
 	 */
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index e2d07d9..706bb22 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -29,9 +29,12 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
 
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -121,6 +124,76 @@ __lpfc_sli_get_iocbq(struct lpfc_hba *phba)
 }
 
 /**
+ * __lpfc_clear_active_sglq - Remove the active sglq for this XRI.
+ * @phba: Pointer to HBA context object.
+ * @xritag: XRI value.
+ *
+ * This function clears the sglq pointer from the array of acive
+ * sglq's. The xritag that is passed in is used to index into the
+ * array. Before the xritag can be used it needs to be adjusted
+ * by subtracting the xribase.
+ *
+ * Returns sglq ponter = success, NULL = Failure.
+ **/
+static struct lpfc_sglq *
+__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xritag)
+{
+	uint16_t adj_xri;
+	struct lpfc_sglq *sglq;
+	adj_xri = xritag - phba->sli4_hba.max_cfg_param.xri_base;
+	if (adj_xri > phba->sli4_hba.max_cfg_param.max_xri)
+		return NULL;
+	sglq = phba->sli4_hba.lpfc_sglq_active_list[adj_xri];
+	phba->sli4_hba.lpfc_sglq_active_list[adj_xri] = NULL;
+	return sglq;
+}
+
+/**
+ * __lpfc_get_active_sglq - Get the active sglq for this XRI.
+ * @phba: Pointer to HBA context object.
+ * @xritag: XRI value.
+ *
+ * This function returns the sglq pointer from the array of acive
+ * sglq's. The xritag that is passed in is used to index into the
+ * array. Before the xritag can be used it needs to be adjusted
+ * by subtracting the xribase.
+ *
+ * Returns sglq ponter = success, NULL = Failure.
+ **/
+static struct lpfc_sglq *
+__lpfc_get_active_sglq(struct lpfc_hba *phba, uint16_t xritag)
+{
+	uint16_t adj_xri;
+	struct lpfc_sglq *sglq;
+	adj_xri = xritag - phba->sli4_hba.max_cfg_param.xri_base;
+	if (adj_xri > phba->sli4_hba.max_cfg_param.max_xri)
+		return NULL;
+	sglq =  phba->sli4_hba.lpfc_sglq_active_list[adj_xri];
+	return sglq;
+}
+
+/**
+ * __lpfc_sli_get_sglq - Allocates an iocb object from sgl pool
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called with hbalock held. This function
+ * Gets a new driver sglq object from the sglq list. If the
+ * list is not empty then it is successful, it returns pointer to the newly
+ * allocated sglq object else it returns NULL.
+ **/
+static struct lpfc_sglq *
+__lpfc_sli_get_sglq(struct lpfc_hba *phba)
+{
+	struct list_head *lpfc_sgl_list = &phba->sli4_hba.lpfc_sgl_list;
+	struct lpfc_sglq *sglq = NULL;
+	uint16_t adj_xri;
+	list_remove_head(lpfc_sgl_list, sglq, struct lpfc_sglq, list);
+	adj_xri = sglq->sli4_xritag - phba->sli4_hba.max_cfg_param.xri_base;
+	phba->sli4_hba.lpfc_sglq_active_list[adj_xri] = sglq;
+	return sglq;
+}
+
+/**
  * lpfc_sli_get_iocbq - Allocates an iocb object from iocb pool
  * @phba: Pointer to HBA context object.
  *
@@ -298,6 +371,14 @@ lpfc_sli_iocb_cmd_type(uint8_t iocb_cmnd)
 	case CMD_GEN_REQUEST64_CR:
 	case CMD_GEN_REQUEST64_CX:
 	case CMD_XMIT_ELS_RSP64_CX:
+	case DSSCMD_IWRITE64_CR:
+	case DSSCMD_IWRITE64_CX:
+	case DSSCMD_IREAD64_CR:
+	case DSSCMD_IREAD64_CX:
+	case DSSCMD_INVALIDATE_DEK:
+	case DSSCMD_SET_KEK:
+	case DSSCMD_GET_KEK_ID:
+	case DSSCMD_GEN_XFER:
 		type = LPFC_SOL_IOCB;
 		break;
 	case CMD_ABORT_XRI_CN:
@@ -2629,6 +2710,56 @@ lpfc_sli_brdready_s3(struct lpfc_hba *phba, uint32_t mask)
 	return retval;
 }
 
+/**
+ * lpfc_sli_brdready_s4 - Check for sli4 host ready status
+ * @phba: Pointer to HBA context object.
+ * @mask: Bit mask to be checked.
+ *
+ * This function checks the host status register to check if HBA is
+ * ready. This function will wait in a loop for the HBA to be ready
+ * If the HBA is not ready , the function will will reset the HBA PCI
+ * function again. The function returns 1 when HBA fail to be ready
+ * otherwise returns zero.
+ **/
+static int
+lpfc_sli_brdready_s4(struct lpfc_hba *phba, uint32_t mask)
+{
+	uint32_t status;
+	int retval = 0;
+
+	/* Read the HBA Host Status Register */
+	status = lpfc_sli4_post_status_check(phba);
+
+	if (status) {
+		phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+		lpfc_sli_brdrestart(phba);
+		status = lpfc_sli4_post_status_check(phba);
+	}
+
+	/* Check to see if any errors occurred during init */
+	if (status) {
+		phba->link_state = LPFC_HBA_ERROR;
+		retval = 1;
+	} else
+		phba->sli4_hba.intr_enable = 0;
+
+	return retval;
+}
+
+/**
+ * lpfc_sli_brdready - Wrapper func for checking the hba readyness
+ * @phba: Pointer to HBA context object.
+ * @mask: Bit mask to be checked.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba readyness check routine
+ * from the API jump table function pointer from the lpfc_hba struct.
+ **/
+int
+lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask)
+{
+	return phba->lpfc_sli_brdready(phba, mask);
+}
+
 #define BARRIER_TEST_PATTERN (0xdeadbeef)
 
 /**
@@ -2863,7 +2994,66 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_brdrestart - Restart the HBA
+ * lpfc_sli4_brdreset - Reset a sli-4 HBA
+ * @phba: Pointer to HBA context object.
+ *
+ * This function resets a SLI4 HBA. This function disables PCI layer parity
+ * checking during resets the device. The caller is not required to hold
+ * any locks.
+ *
+ * This function returns 0 always.
+ **/
+int
+lpfc_sli4_brdreset(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	uint16_t cfg_value;
+	uint8_t qindx;
+
+	/* Reset HBA */
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0295 Reset HBA Data: x%x x%x\n",
+			phba->pport->port_state, psli->sli_flag);
+
+	/* perform board reset */
+	phba->fc_eventTag = 0;
+	phba->pport->fc_myDID = 0;
+	phba->pport->fc_prevDID = 0;
+
+	/* Turn off parity checking and serr during the physical reset */
+	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
+	pci_write_config_word(phba->pcidev, PCI_COMMAND,
+			      (cfg_value &
+			      ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
+
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag &= ~(LPFC_PROCESS_LA);
+	phba->fcf.fcf_flag = 0;
+	/* Clean up the child queue list for the CQs */
+	list_del_init(&phba->sli4_hba.mbx_wq->list);
+	list_del_init(&phba->sli4_hba.els_wq->list);
+	list_del_init(&phba->sli4_hba.hdr_rq->list);
+	list_del_init(&phba->sli4_hba.dat_rq->list);
+	list_del_init(&phba->sli4_hba.mbx_cq->list);
+	list_del_init(&phba->sli4_hba.els_cq->list);
+	list_del_init(&phba->sli4_hba.rxq_cq->list);
+	for (qindx = 0; qindx < phba->cfg_fcp_wq_count; qindx++)
+		list_del_init(&phba->sli4_hba.fcp_wq[qindx]->list);
+	for (qindx = 0; qindx < phba->cfg_fcp_eq_count; qindx++)
+		list_del_init(&phba->sli4_hba.fcp_cq[qindx]->list);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Now physically reset the device */
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"0389 Performing PCI function reset!\n");
+	/* Perform FCoE PCI function reset */
+	lpfc_pci_function_reset(phba);
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_brdrestart_s3 - Restart a sli-3 hba
  * @phba: Pointer to HBA context object.
  *
  * This function is called in the SLI initialization code path to
@@ -2875,8 +3065,8 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
  * The function does not guarantee completion of MBX_RESTART mailbox
  * command before the return of this function.
  **/
-int
-lpfc_sli_brdrestart(struct lpfc_hba *phba)
+static int
+lpfc_sli_brdrestart_s3(struct lpfc_hba *phba)
 {
 	MAILBOX_t *mb;
 	struct lpfc_sli *psli;
@@ -2915,7 +3105,7 @@ lpfc_sli_brdrestart(struct lpfc_hba *phba)
 	lpfc_sli_brdreset(phba);
 	phba->pport->stopped = 0;
 	phba->link_state = LPFC_INIT_START;
-
+	phba->hba_flag = 0;
 	spin_unlock_irq(&phba->hbalock);
 
 	memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
@@ -2930,6 +3120,55 @@ lpfc_sli_brdrestart(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_sli_brdrestart_s4 - Restart the sli-4 hba
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called in the SLI initialization code path to restart
+ * a SLI4 HBA. The caller is not required to hold any lock.
+ * At the end of the function, it calls lpfc_hba_down_post function to
+ * free any pending commands.
+ **/
+static int
+lpfc_sli_brdrestart_s4(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+
+
+	/* Restart HBA */
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0296 Restart HBA Data: x%x x%x\n",
+			phba->pport->port_state, psli->sli_flag);
+
+	lpfc_sli4_brdreset(phba);
+
+	spin_lock_irq(&phba->hbalock);
+	phba->pport->stopped = 0;
+	phba->link_state = LPFC_INIT_START;
+	phba->hba_flag = 0;
+	spin_unlock_irq(&phba->hbalock);
+
+	memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
+	psli->stats_start = get_seconds();
+
+	lpfc_hba_down_post(phba);
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_brdrestart - Wrapper func for restarting hba
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine wraps the actual SLI3 or SLI4 hba restart routine from the
+ * API jump table function pointer from the lpfc_hba struct.
+**/
+int
+lpfc_sli_brdrestart(struct lpfc_hba *phba)
+{
+	return phba->lpfc_sli_brdrestart(phba);
+}
+
+/**
  * lpfc_sli_chipset_init - Wait for the restart of the HBA after a restart
  * @phba: Pointer to HBA context object.
  *
@@ -3353,125 +3592,607 @@ lpfc_sli_hba_setup_error:
 	return rc;
 }
 
-
 /**
- * lpfc_mbox_timeout - Timeout call back function for mbox timer
- * @ptr: context object - pointer to hba structure.
- *
- * This is the callback function for mailbox timer. The mailbox
- * timer is armed when a new mailbox command is issued and the timer
- * is deleted when the mailbox complete. The function is called by
- * the kernel timer code when a mailbox does not complete within
- * expected time. This function wakes up the worker thread to
- * process the mailbox timeout and returns. All the processing is
- * done by the worker thread function lpfc_mbox_timeout_handler.
+ * lpfc_sli4_read_fcoe_params - Read fcoe params from conf region
+ * @phba: Pointer to HBA context object.
+ * @mboxq: mailbox pointer.
+ * This function issue a dump mailbox command to read config region
+ * 23 and parse the records in the region and populate driver
+ * data structure.
  **/
-void
-lpfc_mbox_timeout(unsigned long ptr)
+static int
+lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba,
+		LPFC_MBOXQ_t *mboxq)
 {
-	struct lpfc_hba  *phba = (struct lpfc_hba *) ptr;
-	unsigned long iflag;
-	uint32_t tmo_posted;
+	struct lpfc_dmabuf *mp;
+	struct lpfc_mqe *mqe;
+	uint32_t data_length;
+	int rc;
 
-	spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
-	tmo_posted = phba->pport->work_port_events & WORKER_MBOX_TMO;
-	if (!tmo_posted)
-		phba->pport->work_port_events |= WORKER_MBOX_TMO;
-	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
+	/* Program the default value of vlan_id and fc_map */
+	phba->valid_vlan = 0;
+	phba->fc_map[0] = LPFC_FCOE_FCF_MAP0;
+	phba->fc_map[1] = LPFC_FCOE_FCF_MAP1;
+	phba->fc_map[2] = LPFC_FCOE_FCF_MAP2;
 
-	if (!tmo_posted)
-		lpfc_worker_wake_up(phba);
-	return;
-}
+	mqe = &mboxq->u.mqe;
+	if (lpfc_dump_fcoe_param(phba, mboxq))
+		return -ENOMEM;
+
+	mp = (struct lpfc_dmabuf *) mboxq->context1;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):2571 Mailbox cmd x%x Status x%x "
+			"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"CQ: x%x x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0,
+			bf_get(lpfc_mqe_command, mqe),
+			bf_get(lpfc_mqe_status, mqe),
+			mqe->un.mb_words[0], mqe->un.mb_words[1],
+			mqe->un.mb_words[2], mqe->un.mb_words[3],
+			mqe->un.mb_words[4], mqe->un.mb_words[5],
+			mqe->un.mb_words[6], mqe->un.mb_words[7],
+			mqe->un.mb_words[8], mqe->un.mb_words[9],
+			mqe->un.mb_words[10], mqe->un.mb_words[11],
+			mqe->un.mb_words[12], mqe->un.mb_words[13],
+			mqe->un.mb_words[14], mqe->un.mb_words[15],
+			mqe->un.mb_words[16], mqe->un.mb_words[50],
+			mboxq->mcqe.word0,
+			mboxq->mcqe.mcqe_tag0, 	mboxq->mcqe.mcqe_tag1,
+			mboxq->mcqe.trailer);
+
+	if (rc) {
+		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+		kfree(mp);
+		return -EIO;
+	}
+	data_length = mqe->un.mb_words[5];
+	if (data_length > DMP_FCOEPARAM_RGN_SIZE)
+		return -EIO;
 
+	lpfc_parse_fcoe_conf(phba, mp->virt, data_length);
+	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+	kfree(mp);
+	return 0;
+}
 
 /**
- * lpfc_mbox_timeout_handler - Worker thread function to handle mailbox timeout
- * @phba: Pointer to HBA context object.
+ * lpfc_sli4_read_rev - Issue READ_REV and collect vpd data
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: pointer to the LPFC_MBOXQ_t structure.
+ * @vpd: pointer to the memory to hold resulting port vpd data.
+ * @vpd_size: On input, the number of bytes allocated to @vpd.
+ *	      On output, the number of data bytes in @vpd.
  *
- * This function is called from worker thread when a mailbox command times out.
- * The caller is not required to hold any locks. This function will reset the
- * HBA and recover all the pending commands.
+ * This routine executes a READ_REV SLI4 mailbox command.  In
+ * addition, this routine gets the port vpd data.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - could not allocated memory.
  **/
-void
-lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
+static int
+lpfc_sli4_read_rev(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
+		    uint8_t *vpd, uint32_t *vpd_size)
 {
-	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
-	MAILBOX_t *mb = &pmbox->mb;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
+	int rc = 0;
+	uint32_t dma_size;
+	struct lpfc_dmabuf *dmabuf;
+	struct lpfc_mqe *mqe;
 
-	/* Check the pmbox pointer first.  There is a race condition
-	 * between the mbox timeout handler getting executed in the
-	 * worklist and the mailbox actually completing. When this
-	 * race condition occurs, the mbox_active will be NULL.
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return -ENOMEM;
+
+	/*
+	 * Get a DMA buffer for the vpd data resulting from the READ_REV
+	 * mailbox command.
 	 */
-	spin_lock_irq(&phba->hbalock);
-	if (pmbox == NULL) {
-		lpfc_printf_log(phba, KERN_WARNING,
-				LOG_MBOX | LOG_SLI,
-				"0353 Active Mailbox cleared - mailbox timeout "
-				"exiting\n");
-		spin_unlock_irq(&phba->hbalock);
-		return;
+	dma_size = *vpd_size;
+	dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+					  dma_size,
+					  &dmabuf->phys,
+					  GFP_KERNEL);
+	if (!dmabuf->virt) {
+		kfree(dmabuf);
+		return -ENOMEM;
 	}
+	memset(dmabuf->virt, 0, dma_size);
 
-	/* Mbox cmd <mbxCommand> timeout */
-	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-			"0310 Mailbox command x%x timeout Data: x%x x%x x%p\n",
-			mb->mbxCommand,
-			phba->pport->port_state,
-			phba->sli.sli_flag,
-			phba->sli.mbox_active);
-	spin_unlock_irq(&phba->hbalock);
-
-	/* Setting state unknown so lpfc_sli_abort_iocb_ring
-	 * would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing
-	 * it to fail all oustanding SCSI IO.
+	/*
+	 * The SLI4 implementation of READ_REV conflicts at word1,
+	 * bits 31:16 and SLI4 adds vpd functionality not present
+	 * in SLI3.  This code corrects the conflicts.
 	 */
-	spin_lock_irq(&phba->pport->work_port_lock);
-	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
-	spin_unlock_irq(&phba->pport->work_port_lock);
-	spin_lock_irq(&phba->hbalock);
-	phba->link_state = LPFC_LINK_UNKNOWN;
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
-	spin_unlock_irq(&phba->hbalock);
+	lpfc_read_rev(phba, mboxq);
+	mqe = &mboxq->u.mqe;
+	mqe->un.read_rev.vpd_paddr_high = putPaddrHigh(dmabuf->phys);
+	mqe->un.read_rev.vpd_paddr_low = putPaddrLow(dmabuf->phys);
+	mqe->un.read_rev.word1 &= 0x0000FFFF;
+	bf_set(lpfc_mbx_rd_rev_vpd, &mqe->un.read_rev, 1);
+	bf_set(lpfc_mbx_rd_rev_avail_len, &mqe->un.read_rev, dma_size);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (rc) {
+		dma_free_coherent(&phba->pcidev->dev, dma_size,
+				  dmabuf->virt, dmabuf->phys);
+		return -EIO;
+	}
 
-	pring = &psli->ring[psli->fcp_ring];
-	lpfc_sli_abort_iocb_ring(phba, pring);
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0380 Mailbox cmd x%x Status x%x "
+			"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"x%x x%x x%x x%x x%x x%x x%x x%x x%x "
+			"CQ: x%x x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0,
+			bf_get(lpfc_mqe_command, mqe),
+			bf_get(lpfc_mqe_status, mqe),
+			mqe->un.mb_words[0], mqe->un.mb_words[1],
+			mqe->un.mb_words[2], mqe->un.mb_words[3],
+			mqe->un.mb_words[4], mqe->un.mb_words[5],
+			mqe->un.mb_words[6], mqe->un.mb_words[7],
+			mqe->un.mb_words[8], mqe->un.mb_words[9],
+			mqe->un.mb_words[10], mqe->un.mb_words[11],
+			mqe->un.mb_words[12], mqe->un.mb_words[13],
+			mqe->un.mb_words[14], mqe->un.mb_words[15],
+			mqe->un.mb_words[16], mqe->un.mb_words[50],
+			mboxq->mcqe.word0,
+			mboxq->mcqe.mcqe_tag0, 	mboxq->mcqe.mcqe_tag1,
+			mboxq->mcqe.trailer);
 
-	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-			"0345 Resetting board due to mailbox timeout\n");
+	/*
+	 * The available vpd length cannot be bigger than the
+	 * DMA buffer passed to the port.  Catch the less than
+	 * case and update the caller's size.
+	 */
+	if (mqe->un.read_rev.avail_vpd_len < *vpd_size)
+		*vpd_size = mqe->un.read_rev.avail_vpd_len;
 
-	/* Reset the HBA device */
-	lpfc_reset_hba(phba);
+	lpfc_sli_pcimem_bcopy(dmabuf->virt, vpd, *vpd_size);
+	dma_free_coherent(&phba->pcidev->dev, dma_size,
+			  dmabuf->virt, dmabuf->phys);
+	kfree(dmabuf);
+	return 0;
 }
 
 /**
- * lpfc_sli_issue_mbox_s3 - Issue an SLI3 mailbox command to firmware
- * @phba: Pointer to HBA context object.
- * @pmbox: Pointer to mailbox object.
- * @flag: Flag indicating how the mailbox need to be processed.
+ * lpfc_sli4_arm_cqeq_intr - Arm sli-4 device completion and event queues
+ * @phba: pointer to lpfc hba data structure.
  *
- * This function is called by discovery code and HBA management code
- * to submit a mailbox command to firmware with SLI-3 interface spec. This
- * function gets the hbalock to protect the data structures.
- * The mailbox command can be submitted in polling mode, in which case
- * this function will wait in a polling loop for the completion of the
- * mailbox.
- * If the mailbox is submitted in no_wait mode (not polling) the
- * function will submit the command and returns immediately without waiting
- * for the mailbox completion. The no_wait is supported only when HBA
- * is in SLI2/SLI3 mode - interrupts are enabled.
- * The SLI interface allows only one mailbox pending at a time. If the
- * mailbox is issued in polling mode and there is already a mailbox
- * pending, then the function will return an error. If the mailbox is issued
- * in NO_WAIT mode and there is a mailbox pending already, the function
- * will return MBX_BUSY after queuing the mailbox into mailbox queue.
- * The sli layer owns the mailbox object until the completion of mailbox
- * command if this function return MBX_BUSY or MBX_SUCCESS. For all other
- * return codes the caller owns the mailbox command after the return of
- * the function.
+ * This routine is called to explicitly arm the SLI4 device's completion and
+ * event queues
+ **/
+static void
+lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
+{
+	uint8_t fcp_eqidx;
+
+	lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM);
+	lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM);
+	lpfc_sli4_cq_release(phba->sli4_hba.rxq_cq, LPFC_QUEUE_REARM);
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++)
+		lpfc_sli4_cq_release(phba->sli4_hba.fcp_cq[fcp_eqidx],
+				     LPFC_QUEUE_REARM);
+	lpfc_sli4_eq_release(phba->sli4_hba.sp_eq, LPFC_QUEUE_REARM);
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++)
+		lpfc_sli4_eq_release(phba->sli4_hba.fp_eq[fcp_eqidx],
+				     LPFC_QUEUE_REARM);
+}
+
+/**
+ * lpfc_sli4_hba_setup - SLI4 device intialization PCI function
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is the main SLI4 device intialization PCI function. This
+ * function is called by the HBA intialization code, HBA reset code and
+ * HBA error attention handler code. Caller is not required to hold any
+ * locks.
+ **/
+int
+lpfc_sli4_hba_setup(struct lpfc_hba *phba)
+{
+	int rc;
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mqe *mqe;
+	uint8_t *vpd;
+	uint32_t vpd_size;
+	uint32_t ftr_rsp = 0;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(phba->pport);
+	struct lpfc_vport *vport = phba->pport;
+	struct lpfc_dmabuf *mp;
+
+	/* Perform a PCI function reset to start from clean */
+	rc = lpfc_pci_function_reset(phba);
+	if (unlikely(rc))
+		return -ENODEV;
+
+	/* Check the HBA Host Status Register for readyness */
+	rc = lpfc_sli4_post_status_check(phba);
+	if (unlikely(rc))
+		return -ENODEV;
+	else {
+		spin_lock_irq(&phba->hbalock);
+		phba->sli.sli_flag |= LPFC_SLI_ACTIVE;
+		spin_unlock_irq(&phba->hbalock);
+	}
+
+	/*
+	 * Allocate a single mailbox container for initializing the
+	 * port.
+	 */
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+
+	/*
+	 * Continue initialization with default values even if driver failed
+	 * to read FCoE param config regions
+	 */
+	if (lpfc_sli4_read_fcoe_params(phba, mboxq))
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_INIT,
+			"2570 Failed to read FCoE parameters \n");
+
+	/* Issue READ_REV to collect vpd and FW information. */
+	vpd_size = PAGE_SIZE;
+	vpd = kzalloc(vpd_size, GFP_KERNEL);
+	if (!vpd) {
+		rc = -ENOMEM;
+		goto out_free_mbox;
+	}
+
+	rc = lpfc_sli4_read_rev(phba, mboxq, vpd, &vpd_size);
+	if (unlikely(rc))
+		goto out_free_vpd;
+
+	mqe = &mboxq->u.mqe;
+	if ((bf_get(lpfc_mbx_rd_rev_sli_lvl,
+		    &mqe->un.read_rev) != LPFC_SLI_REV4) ||
+	    (bf_get(lpfc_mbx_rd_rev_fcoe, &mqe->un.read_rev) == 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+			"0376 READ_REV Error. SLI Level %d "
+			"FCoE enabled %d\n",
+			bf_get(lpfc_mbx_rd_rev_sli_lvl, &mqe->un.read_rev),
+			bf_get(lpfc_mbx_rd_rev_fcoe, &mqe->un.read_rev));
+		rc = -EIO;
+		goto out_free_vpd;
+	}
+	/* Single threaded at this point, no need for lock */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag |= HBA_FCOE_SUPPORT;
+	spin_unlock_irq(&phba->hbalock);
+	/*
+	 * Evaluate the read rev and vpd data. Populate the driver
+	 * state with the results. If this routine fails, the failure
+	 * is not fatal as the driver will use generic values.
+	 */
+	rc = lpfc_parse_vpd(phba, vpd, vpd_size);
+	if (unlikely(!rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0377 Error %d parsing vpd. "
+				"Using defaults.\n", rc);
+		rc = 0;
+	}
+
+	/* By now, we should determine the SLI revision, hard code for now */
+	phba->sli_rev = LPFC_SLI_REV4;
+
+	/*
+	 * Discover the port's supported feature set and match it against the
+	 * hosts requests.
+	 */
+	lpfc_request_features(phba, mboxq);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (unlikely(rc)) {
+		rc = -EIO;
+		goto out_free_vpd;
+	}
+
+	/*
+	 * The port must support FCP initiator mode as this is the
+	 * only mode running in the host.
+	 */
+	if (!(bf_get(lpfc_mbx_rq_ftr_rsp_fcpi, &mqe->un.req_ftrs))) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+				"0378 No support for fcpi mode.\n");
+		ftr_rsp++;
+	}
+
+	/*
+	 * If the port cannot support the host's requested features
+	 * then turn off the global config parameters to disable the
+	 * feature in the driver.  This is not a fatal error.
+	 */
+	if ((phba->cfg_enable_bg) &&
+	    !(bf_get(lpfc_mbx_rq_ftr_rsp_dif, &mqe->un.req_ftrs)))
+		ftr_rsp++;
+
+	if (phba->max_vpi && phba->cfg_enable_npiv &&
+	    !(bf_get(lpfc_mbx_rq_ftr_rsp_npiv, &mqe->un.req_ftrs)))
+		ftr_rsp++;
+
+	if (ftr_rsp) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+				"0379 Feature Mismatch Data: x%08x %08x "
+				"x%x x%x x%x\n", mqe->un.req_ftrs.word2,
+				mqe->un.req_ftrs.word3, phba->cfg_enable_bg,
+				phba->cfg_enable_npiv, phba->max_vpi);
+		if (!(bf_get(lpfc_mbx_rq_ftr_rsp_dif, &mqe->un.req_ftrs)))
+			phba->cfg_enable_bg = 0;
+		if (!(bf_get(lpfc_mbx_rq_ftr_rsp_npiv, &mqe->un.req_ftrs)))
+			phba->cfg_enable_npiv = 0;
+	}
+
+	/* These SLI3 features are assumed in SLI4 */
+	spin_lock_irq(&phba->hbalock);
+	phba->sli3_options |= (LPFC_SLI3_NPIV_ENABLED | LPFC_SLI3_HBQ_ENABLED);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Read the port's service parameters. */
+	lpfc_read_sparam(phba, mboxq, vport->vpi);
+	mboxq->vport = vport;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	mp = (struct lpfc_dmabuf *) mboxq->context1;
+	if (rc == MBX_SUCCESS) {
+		memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm));
+		rc = 0;
+	}
+
+	/*
+	 * This memory was allocated by the lpfc_read_sparam routine. Release
+	 * it to the mbuf pool.
+	 */
+	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+	kfree(mp);
+	mboxq->context1 = NULL;
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0382 READ_SPARAM command failed "
+				"status %d, mbxStatus x%x\n",
+				rc, bf_get(lpfc_mqe_status, mqe));
+		phba->link_state = LPFC_HBA_ERROR;
+		rc = -EIO;
+		goto out_free_vpd;
+	}
+
+	if (phba->cfg_soft_wwnn)
+		u64_to_wwn(phba->cfg_soft_wwnn,
+			   vport->fc_sparam.nodeName.u.wwn);
+	if (phba->cfg_soft_wwpn)
+		u64_to_wwn(phba->cfg_soft_wwpn,
+			   vport->fc_sparam.portName.u.wwn);
+	memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
+	       sizeof(struct lpfc_name));
+	memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
+	       sizeof(struct lpfc_name));
+
+	/* Update the fc_host data structures with new wwn. */
+	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
+	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
+
+	/* Register SGL pool to the device using non-embedded mailbox command */
+	rc = lpfc_sli4_post_sgl_list(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0582 Error %d during sgl post operation", rc);
+		rc = -ENODEV;
+		goto out_free_vpd;
+	}
+
+	/* Register SCSI SGL pool to the device */
+	rc = lpfc_sli4_repost_scsi_sgl_list(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_SLI,
+				"0383 Error %d during scsi sgl post opeation",
+				rc);
+		/* Some Scsi buffers were moved to the abort scsi list */
+		/* A pci function reset will repost them */
+		rc = -ENODEV;
+		goto out_free_vpd;
+	}
+
+	/* Post the rpi header region to the device. */
+	rc = lpfc_sli4_post_all_rpi_hdrs(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0393 Error %d during rpi post operation\n",
+				rc);
+		rc = -ENODEV;
+		goto out_free_vpd;
+	}
+	/* Temporary initialization of lpfc_fip_flag to non-fip */
+	bf_set(lpfc_fip_flag, &phba->sli4_hba.sli4_flags, 0);
+
+	/* Set up all the queues to the device */
+	rc = lpfc_sli4_queue_setup(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0381 Error %d during queue setup.\n ", rc);
+		goto out_stop_timers;
+	}
+
+	/* Arm the CQs and then EQs on device */
+	lpfc_sli4_arm_cqeq_intr(phba);
+
+	/* Indicate device interrupt mode */
+	phba->sli4_hba.intr_enable = 1;
+
+	/* Allow asynchronous mailbox command to go through */
+	spin_lock_irq(&phba->hbalock);
+	phba->sli.sli_flag &= ~LPFC_SLI_ASYNC_MBX_BLK;
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Post receive buffers to the device */
+	lpfc_sli4_rb_setup(phba);
+
+	/* Start the ELS watchdog timer */
+	/*
+	 * The driver for SLI4 is not yet ready to process timeouts
+	 * or interrupts.  Once it is, the comment bars can be removed.
+	 */
+	/* mod_timer(&vport->els_tmofunc,
+	 *           jiffies + HZ * (phba->fc_ratov*2)); */
+
+	/* Start heart beat timer */
+	mod_timer(&phba->hb_tmofunc,
+		  jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+	phba->hb_outstanding = 0;
+	phba->last_completion_time = jiffies;
+
+	/* Start error attention (ERATT) polling timer */
+	mod_timer(&phba->eratt_poll, jiffies + HZ * LPFC_ERATT_POLL_INTERVAL);
+
+	/*
+	 * The port is ready, set the host's link state to LINK_DOWN
+	 * in preparation for link interrupts.
+	 */
+	lpfc_init_link(phba, mboxq, phba->cfg_topology, phba->cfg_link_speed);
+	mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	lpfc_set_loopback_flag(phba);
+	/* Change driver state to LPFC_LINK_DOWN right before init link */
+	spin_lock_irq(&phba->hbalock);
+	phba->link_state = LPFC_LINK_DOWN;
+	spin_unlock_irq(&phba->hbalock);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (unlikely(rc != MBX_NOT_FINISHED)) {
+		kfree(vpd);
+		return 0;
+	} else
+		rc = -EIO;
+
+	/* Unset all the queues set up in this routine when error out */
+	if (rc)
+		lpfc_sli4_queue_unset(phba);
+
+out_stop_timers:
+	if (rc)
+		lpfc_stop_hba_timers(phba);
+out_free_vpd:
+	kfree(vpd);
+out_free_mbox:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return rc;
+}
+
+/**
+ * lpfc_mbox_timeout - Timeout call back function for mbox timer
+ * @ptr: context object - pointer to hba structure.
+ *
+ * This is the callback function for mailbox timer. The mailbox
+ * timer is armed when a new mailbox command is issued and the timer
+ * is deleted when the mailbox complete. The function is called by
+ * the kernel timer code when a mailbox does not complete within
+ * expected time. This function wakes up the worker thread to
+ * process the mailbox timeout and returns. All the processing is
+ * done by the worker thread function lpfc_mbox_timeout_handler.
+ **/
+void
+lpfc_mbox_timeout(unsigned long ptr)
+{
+	struct lpfc_hba  *phba = (struct lpfc_hba *) ptr;
+	unsigned long iflag;
+	uint32_t tmo_posted;
+
+	spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
+	tmo_posted = phba->pport->work_port_events & WORKER_MBOX_TMO;
+	if (!tmo_posted)
+		phba->pport->work_port_events |= WORKER_MBOX_TMO;
+	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
+
+	if (!tmo_posted)
+		lpfc_worker_wake_up(phba);
+	return;
+}
+
+
+/**
+ * lpfc_mbox_timeout_handler - Worker thread function to handle mailbox timeout
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called from worker thread when a mailbox command times out.
+ * The caller is not required to hold any locks. This function will reset the
+ * HBA and recover all the pending commands.
+ **/
+void
+lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
+	MAILBOX_t *mb = &pmbox->mb;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
+
+	/* Check the pmbox pointer first.  There is a race condition
+	 * between the mbox timeout handler getting executed in the
+	 * worklist and the mailbox actually completing. When this
+	 * race condition occurs, the mbox_active will be NULL.
+	 */
+	spin_lock_irq(&phba->hbalock);
+	if (pmbox == NULL) {
+		lpfc_printf_log(phba, KERN_WARNING,
+				LOG_MBOX | LOG_SLI,
+				"0353 Active Mailbox cleared - mailbox timeout "
+				"exiting\n");
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
+	/* Mbox cmd <mbxCommand> timeout */
+	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+			"0310 Mailbox command x%x timeout Data: x%x x%x x%p\n",
+			mb->mbxCommand,
+			phba->pport->port_state,
+			phba->sli.sli_flag,
+			phba->sli.mbox_active);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Setting state unknown so lpfc_sli_abort_iocb_ring
+	 * would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing
+	 * it to fail all oustanding SCSI IO.
+	 */
+	spin_lock_irq(&phba->pport->work_port_lock);
+	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
+	spin_unlock_irq(&phba->pport->work_port_lock);
+	spin_lock_irq(&phba->hbalock);
+	phba->link_state = LPFC_LINK_UNKNOWN;
+	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	spin_unlock_irq(&phba->hbalock);
+
+	pring = &psli->ring[psli->fcp_ring];
+	lpfc_sli_abort_iocb_ring(phba, pring);
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+			"0345 Resetting board due to mailbox timeout\n");
+
+	/* Reset the HBA device */
+	lpfc_reset_hba(phba);
+}
+
+/**
+ * lpfc_sli_issue_mbox_s3 - Issue an SLI3 mailbox command to firmware
+ * @phba: Pointer to HBA context object.
+ * @pmbox: Pointer to mailbox object.
+ * @flag: Flag indicating how the mailbox need to be processed.
+ *
+ * This function is called by discovery code and HBA management code
+ * to submit a mailbox command to firmware with SLI-3 interface spec. This
+ * function gets the hbalock to protect the data structures.
+ * The mailbox command can be submitted in polling mode, in which case
+ * this function will wait in a polling loop for the completion of the
+ * mailbox.
+ * If the mailbox is submitted in no_wait mode (not polling) the
+ * function will submit the command and returns immediately without waiting
+ * for the mailbox completion. The no_wait is supported only when HBA
+ * is in SLI2/SLI3 mode - interrupts are enabled.
+ * The SLI interface allows only one mailbox pending at a time. If the
+ * mailbox is issued in polling mode and there is already a mailbox
+ * pending, then the function will return an error. If the mailbox is issued
+ * in NO_WAIT mode and there is a mailbox pending already, the function
+ * will return MBX_BUSY after queuing the mailbox into mailbox queue.
+ * The sli layer owns the mailbox object until the completion of mailbox
+ * command if this function return MBX_BUSY or MBX_SUCCESS. For all other
+ * return codes the caller owns the mailbox command after the return of
+ * the function.
  **/
 static int
 lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
@@ -3812,13 +4533,420 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 
 out_not_finished:
 	if (processing_queue) {
-		pmbox->mb.mbxStatus = MBX_NOT_FINISHED;
+		pmbox->u.mb.mbxStatus = MBX_NOT_FINISHED;
 		lpfc_mbox_cmpl_put(phba, pmbox);
 	}
 	return MBX_NOT_FINISHED;
 }
 
 /**
+ * lpfc_sli4_post_sync_mbox - Post an SLI4 mailbox to the bootstrap mailbox
+ * @phba: Pointer to HBA context object.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * The function posts a mailbox to the port.  The mailbox is expected
+ * to be comletely filled in and ready for the port to operate on it.
+ * This routine executes a synchronous completion operation on the
+ * mailbox by polling for its completion.
+ *
+ * The caller must not be holding any locks when calling this routine.
+ *
+ * Returns:
+ *	MBX_SUCCESS - mailbox posted successfully
+ *	Any of the MBX error values.
+ **/
+static int
+lpfc_sli4_post_sync_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	int rc = MBX_SUCCESS;
+	unsigned long iflag;
+	uint32_t db_ready;
+	uint32_t mcqe_status;
+	uint32_t mbx_cmnd;
+	unsigned long timeout;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_mqe *mb = &mboxq->u.mqe;
+	struct lpfc_bmbx_create *mbox_rgn;
+	struct dma_address *dma_address;
+	struct lpfc_register bmbx_reg;
+
+	/*
+	 * Only one mailbox can be active to the bootstrap mailbox region
+	 * at a time and there is no queueing provided.
+	 */
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2532 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, MBX_POLL);
+		return MBXERR_ERROR;
+	}
+	/* The server grabs the token and owns it until release */
+	psli->sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+	phba->sli.mbox_active = mboxq;
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	/*
+	 * Initialize the bootstrap memory region to avoid stale data areas
+	 * in the mailbox post.  Then copy the caller's mailbox contents to
+	 * the bmbx mailbox region.
+	 */
+	mbx_cmnd = bf_get(lpfc_mqe_command, mb);
+	memset(phba->sli4_hba.bmbx.avirt, 0, sizeof(struct lpfc_bmbx_create));
+	lpfc_sli_pcimem_bcopy(mb, phba->sli4_hba.bmbx.avirt,
+			      sizeof(struct lpfc_mqe));
+
+	/* Post the high mailbox dma address to the port and wait for ready. */
+	dma_address = &phba->sli4_hba.bmbx.dma_address;
+	writel(dma_address->addr_hi, phba->sli4_hba.BMBXregaddr);
+
+	timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mbx_cmnd)
+				   * 1000) + jiffies;
+	do {
+		bmbx_reg.word0 = readl(phba->sli4_hba.BMBXregaddr);
+		db_ready = bf_get(lpfc_bmbx_rdy, &bmbx_reg);
+		if (!db_ready)
+			msleep(2);
+
+		if (time_after(jiffies, timeout)) {
+			rc = MBXERR_ERROR;
+			goto exit;
+		}
+	} while (!db_ready);
+
+	/* Post the low mailbox dma address to the port. */
+	writel(dma_address->addr_lo, phba->sli4_hba.BMBXregaddr);
+	timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mbx_cmnd)
+				   * 1000) + jiffies;
+	do {
+		bmbx_reg.word0 = readl(phba->sli4_hba.BMBXregaddr);
+		db_ready = bf_get(lpfc_bmbx_rdy, &bmbx_reg);
+		if (!db_ready)
+			msleep(2);
+
+		if (time_after(jiffies, timeout)) {
+			rc = MBXERR_ERROR;
+			goto exit;
+		}
+	} while (!db_ready);
+
+	/*
+	 * Read the CQ to ensure the mailbox has completed.
+	 * If so, update the mailbox status so that the upper layers
+	 * can complete the request normally.
+	 */
+	lpfc_sli_pcimem_bcopy(phba->sli4_hba.bmbx.avirt, mb,
+			      sizeof(struct lpfc_mqe));
+	mbox_rgn = (struct lpfc_bmbx_create *) phba->sli4_hba.bmbx.avirt;
+	lpfc_sli_pcimem_bcopy(&mbox_rgn->mcqe, &mboxq->mcqe,
+			      sizeof(struct lpfc_mcqe));
+	mcqe_status = bf_get(lpfc_mcqe_status, &mbox_rgn->mcqe);
+
+	/* Prefix the mailbox status with range x4000 to note SLI4 status. */
+	if (mcqe_status != MB_CQE_STATUS_SUCCESS) {
+		bf_set(lpfc_mqe_status, mb, LPFC_MBX_ERROR_RANGE | mcqe_status);
+		rc = MBXERR_ERROR;
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0356 Mailbox cmd x%x (x%x) Status x%x "
+			"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x x%x x%x"
+			" x%x x%x CQ: x%x x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0,
+			mbx_cmnd, lpfc_sli4_mbox_opcode_get(phba, mboxq),
+			bf_get(lpfc_mqe_status, mb),
+			mb->un.mb_words[0], mb->un.mb_words[1],
+			mb->un.mb_words[2], mb->un.mb_words[3],
+			mb->un.mb_words[4], mb->un.mb_words[5],
+			mb->un.mb_words[6], mb->un.mb_words[7],
+			mb->un.mb_words[8], mb->un.mb_words[9],
+			mb->un.mb_words[10], mb->un.mb_words[11],
+			mb->un.mb_words[12], mboxq->mcqe.word0,
+			mboxq->mcqe.mcqe_tag0, 	mboxq->mcqe.mcqe_tag1,
+			mboxq->mcqe.trailer);
+exit:
+	/* We are holding the token, no needed for lock when release */
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	phba->sli.mbox_active = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+	return rc;
+}
+
+/**
+ * lpfc_sli_issue_mbox_s4 - Issue an SLI4 mailbox command to firmware
+ * @phba: Pointer to HBA context object.
+ * @pmbox: Pointer to mailbox object.
+ * @flag: Flag indicating how the mailbox need to be processed.
+ *
+ * This function is called by discovery code and HBA management code to submit
+ * a mailbox command to firmware with SLI-4 interface spec.
+ *
+ * Return codes the caller owns the mailbox command after the return of the
+ * function.
+ **/
+static int
+lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
+		       uint32_t flag)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	unsigned long iflags;
+	int rc;
+
+	/* Detect polling mode and jump to a handler */
+	if (!phba->sli4_hba.intr_enable) {
+		if (flag == MBX_POLL)
+			rc = lpfc_sli4_post_sync_mbox(phba, mboxq);
+		else
+			rc = -EIO;
+		if (rc != MBX_SUCCESS)
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"(%d):2541 Mailbox command x%x "
+					"(x%x) cannot issue Data: x%x x%x\n",
+					mboxq->vport ? mboxq->vport->vpi : 0,
+					mboxq->u.mb.mbxCommand,
+					lpfc_sli4_mbox_opcode_get(phba, mboxq),
+					psli->sli_flag, flag);
+		return rc;
+	} else if (flag == MBX_POLL) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2542 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, flag);
+		return -EIO;
+	}
+
+	/* Now, interrupt mode asynchrous mailbox command */
+	rc = lpfc_mbox_cmd_check(phba, mboxq);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2543 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, flag);
+		goto out_not_finished;
+	}
+	rc = lpfc_mbox_dev_check(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2544 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, flag);
+		goto out_not_finished;
+	}
+
+	/* Put the mailbox command to the driver internal FIFO */
+	psli->slistat.mbox_busy++;
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	lpfc_mbox_put(phba, mboxq);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0354 Mbox cmd issue - Enqueue Data: "
+			"x%x (x%x) x%x x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0xffffff,
+			bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+			lpfc_sli4_mbox_opcode_get(phba, mboxq),
+			phba->pport->port_state,
+			psli->sli_flag, MBX_NOWAIT);
+	/* Wake up worker thread to transport mailbox command from head */
+	lpfc_worker_wake_up(phba);
+
+	return MBX_BUSY;
+
+out_not_finished:
+	return MBX_NOT_FINISHED;
+}
+
+/**
+ * lpfc_sli4_post_async_mbox - Post an SLI4 mailbox command to device
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called by worker thread to send a mailbox command to
+ * SLI4 HBA firmware.
+ *
+ **/
+int
+lpfc_sli4_post_async_mbox(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	LPFC_MBOXQ_t *mboxq;
+	int rc = MBX_SUCCESS;
+	unsigned long iflags;
+	struct lpfc_mqe *mqe;
+	uint32_t mbx_cmnd;
+
+	/* Check interrupt mode before post async mailbox command */
+	if (unlikely(!phba->sli4_hba.intr_enable))
+		return MBX_NOT_FINISHED;
+
+	/* Check for mailbox command service token */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	if (unlikely(psli->sli_flag & LPFC_SLI_ASYNC_MBX_BLK)) {
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		return MBX_NOT_FINISHED;
+	}
+	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		return MBX_NOT_FINISHED;
+	}
+	if (unlikely(phba->sli.mbox_active)) {
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0384 There is pending active mailbox cmd\n");
+		return MBX_NOT_FINISHED;
+	}
+	/* Take the mailbox command service token */
+	psli->sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+
+	/* Get the next mailbox command from head of queue */
+	mboxq = lpfc_mbox_get(phba);
+
+	/* If no more mailbox command waiting for post, we're done */
+	if (!mboxq) {
+		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		return MBX_SUCCESS;
+	}
+	phba->sli.mbox_active = mboxq;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	/* Check device readiness for posting mailbox command */
+	rc = lpfc_mbox_dev_check(phba);
+	if (unlikely(rc))
+		/* Driver clean routine will clean up pending mailbox */
+		goto out_not_finished;
+
+	/* Prepare the mbox command to be posted */
+	mqe = &mboxq->u.mqe;
+	mbx_cmnd = bf_get(lpfc_mqe_command, mqe);
+
+	/* Start timer for the mbox_tmo and log some mailbox post messages */
+	mod_timer(&psli->mbox_tmo, (jiffies +
+		  (HZ * lpfc_mbox_tmo_val(phba, mbx_cmnd))));
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
+			"(%d):0355 Mailbox cmd x%x (x%x) issue Data: "
+			"x%x x%x\n",
+			mboxq->vport ? mboxq->vport->vpi : 0, mbx_cmnd,
+			lpfc_sli4_mbox_opcode_get(phba, mboxq),
+			phba->pport->port_state, psli->sli_flag);
+
+	if (mbx_cmnd != MBX_HEARTBEAT) {
+		if (mboxq->vport) {
+			lpfc_debugfs_disc_trc(mboxq->vport,
+				LPFC_DISC_TRC_MBOX_VPORT,
+				"MBOX Send vport: cmd:x%x mb:x%x x%x",
+				mbx_cmnd, mqe->un.mb_words[0],
+				mqe->un.mb_words[1]);
+		} else {
+			lpfc_debugfs_disc_trc(phba->pport,
+				LPFC_DISC_TRC_MBOX,
+				"MBOX Send: cmd:x%x mb:x%x x%x",
+				mbx_cmnd, mqe->un.mb_words[0],
+				mqe->un.mb_words[1]);
+		}
+	}
+	psli->slistat.mbox_cmd++;
+
+	/* Post the mailbox command to the port */
+	rc = lpfc_sli4_mq_put(phba->sli4_hba.mbx_wq, mqe);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"(%d):2533 Mailbox command x%x (x%x) "
+				"cannot issue Data: x%x x%x\n",
+				mboxq->vport ? mboxq->vport->vpi : 0,
+				mboxq->u.mb.mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, mboxq),
+				psli->sli_flag, MBX_NOWAIT);
+		goto out_not_finished;
+	}
+
+	return rc;
+
+out_not_finished:
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	mboxq->u.mb.mbxStatus = MBX_NOT_FINISHED;
+	__lpfc_mbox_cmpl_put(phba, mboxq);
+	/* Release the token */
+	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	phba->sli.mbox_active = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	return MBX_NOT_FINISHED;
+}
+
+/**
+ * lpfc_sli_issue_mbox - Wrapper func for issuing mailbox command
+ * @phba: Pointer to HBA context object.
+ * @pmbox: Pointer to mailbox object.
+ * @flag: Flag indicating how the mailbox need to be processed.
+ *
+ * This routine wraps the actual SLI3 or SLI4 mailbox issuing routine from
+ * the API jump table function pointer from the lpfc_hba struct.
+ *
+ * Return codes the caller owns the mailbox command after the return of the
+ * function.
+ **/
+int
+lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
+{
+	return phba->lpfc_sli_issue_mbox(phba, pmbox, flag);
+}
+
+/**
+ * lpfc_mbox_api_table_setup - Set up mbox api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the mbox interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_mbox_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->lpfc_sli_issue_mbox = lpfc_sli_issue_mbox_s3;
+		phba->lpfc_sli_handle_slow_ring_event =
+				lpfc_sli_handle_slow_ring_event_s3;
+		phba->lpfc_sli_hbq_to_firmware = lpfc_sli_hbq_to_firmware_s3;
+		phba->lpfc_sli_brdrestart = lpfc_sli_brdrestart_s3;
+		phba->lpfc_sli_brdready = lpfc_sli_brdready_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->lpfc_sli_issue_mbox = lpfc_sli_issue_mbox_s4;
+		phba->lpfc_sli_handle_slow_ring_event =
+				lpfc_sli_handle_slow_ring_event_s4;
+		phba->lpfc_sli_hbq_to_firmware = lpfc_sli_hbq_to_firmware_s4;
+		phba->lpfc_sli_brdrestart = lpfc_sli_brdrestart_s4;
+		phba->lpfc_sli_brdready = lpfc_sli_brdready_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1420 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	return 0;
+}
+
+/**
  * __lpfc_sli_ringtx_put - Add an iocb to the txq
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -4501,28 +5629,42 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
 
 	/* Return any active mbox cmds */
 	del_timer_sync(&psli->mbox_tmo);
-	spin_lock_irqsave(&phba->hbalock, flags);
 
-	spin_lock(&phba->pport->work_port_lock);
+	spin_lock_irqsave(&phba->pport->work_port_lock, flags);
 	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
-	spin_unlock(&phba->pport->work_port_lock);
+	spin_unlock_irqrestore(&phba->pport->work_port_lock, flags);
 
-	/* Return any pending or completed mbox cmds */
-	list_splice_init(&phba->sli.mboxq, &completions);
-	if (psli->mbox_active) {
-		list_add_tail(&psli->mbox_active->list, &completions);
-		psli->mbox_active = NULL;
-		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
-	}
-	list_splice_init(&phba->sli.mboxq_cmpl, &completions);
-	spin_unlock_irqrestore(&phba->hbalock, flags);
+	return 1;
+}
+
+/**
+ * lpfc_sli4_hba_down - PCI function resource cleanup for the SLI4 HBA
+ * @phba: Pointer to HBA context object.
+ *
+ * This function cleans up all queues, iocb, buffers, mailbox commands while
+ * shutting down the SLI4 HBA FCoE function. This function is called with no
+ * lock held and always returns 1.
+ *
+ * This function does the following to cleanup driver FCoE function resources:
+ * - Free discovery resources for each virtual port
+ * - Cleanup any pending fabric iocbs
+ * - Iterate through the iocb txq and free each entry in the list.
+ * - Free up any buffer posted to the HBA.
+ * - Clean up all the queue entries: WQ, RQ, MQ, EQ, CQ, etc.
+ * - Free mailbox commands in the mailbox queue.
+ **/
+int
+lpfc_sli4_hba_down(struct lpfc_hba *phba)
+{
+	/* Stop the SLI4 device port */
+	lpfc_stop_port(phba);
+
+	/* Tear down the queues in the HBA */
+	lpfc_sli4_queue_unset(phba);
+
+	/* unregister default FCFI from the HBA */
+	lpfc_sli4_fcfi_unreg(phba, phba->fcf.fcfi);
 
-	while (!list_empty(&completions)) {
-		list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list);
-		pmb->mb.mbxStatus = MBX_NOT_FINISHED;
-		if (pmb->mbox_cmpl)
-			pmb->mbox_cmpl(phba,pmb);
-	}
 	return 1;
 }
 
@@ -4853,7 +5995,10 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	iabt = &abtsiocbp->iocb;
 	iabt->un.acxri.abortType = ABORT_TYPE_ABTS;
 	iabt->un.acxri.abortContextTag = icmd->ulpContext;
-	iabt->un.acxri.abortIoTag = icmd->ulpIoTag;
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		iabt->un.acxri.abortIoTag = cmdiocb->sli4_xritag;
+	else
+		iabt->un.acxri.abortIoTag = icmd->ulpIoTag;
 	iabt->ulpLe = 1;
 	iabt->ulpClass = icmd->ulpClass;
 
@@ -4869,7 +6014,7 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			 "abort cmd iotag x%x\n",
 			 iabt->un.acxri.abortContextTag,
 			 iabt->un.acxri.abortIoTag, abtsiocbp->iotag);
-	retval = __lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0);
+	retval = __lpfc_sli_issue_iocb(phba, pring->ringno, abtsiocbp, 0);
 
 	if (retval)
 		__lpfc_sli_release_iocbq(phba, abtsiocbp);
@@ -5052,7 +6197,10 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		cmd = &iocbq->iocb;
 		abtsiocb->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
 		abtsiocb->iocb.un.acxri.abortContextTag = cmd->ulpContext;
-		abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
+		if (phba->sli_rev == LPFC_SLI_REV4)
+			abtsiocb->iocb.un.acxri.abortIoTag = iocbq->sli4_xritag;
+		else
+			abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
 		abtsiocb->iocb.ulpLe = 1;
 		abtsiocb->iocb.ulpClass = cmd->ulpClass;
 		abtsiocb->vport = phba->pport;
@@ -5064,7 +6212,8 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 
 		/* Setup callback routine and issue the command. */
 		abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
-		ret_val = lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0);
+		ret_val = lpfc_sli_issue_iocb(phba, pring->ringno,
+					      abtsiocb, 0);
 		if (ret_val == IOCB_ERROR) {
 			lpfc_sli_release_iocbq(phba, abtsiocb);
 			errcnt++;
@@ -5145,7 +6294,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
  **/
 int
 lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
-			 struct lpfc_sli_ring *pring,
+			 uint32_t ring_number,
 			 struct lpfc_iocbq *piocb,
 			 struct lpfc_iocbq *prspiocbq,
 			 uint32_t timeout)
@@ -5176,7 +6325,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
 		readl(phba->HCregaddr); /* flush */
 	}
 
-	retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0);
+	retval = lpfc_sli_issue_iocb(phba, ring_number, piocb, 0);
 	if (retval == IOCB_SUCCESS) {
 		timeout_req = timeout * HZ;
 		timeleft = wait_event_timeout(done_q,
@@ -5385,6 +6534,58 @@ lpfc_sli_eratt_read(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_sli4_eratt_read - read sli-4 error attention events
+ * @phba: Pointer to HBA context.
+ *
+ * This function is called to read the SLI4 device error attention registers
+ * for possible error attention events. The caller must hold the hostlock
+ * with spin_lock_irq().
+ *
+ * This fucntion returns 1 when there is Error Attention in the Host Attention
+ * Register and returns 0 otherwise.
+ **/
+static int
+lpfc_sli4_eratt_read(struct lpfc_hba *phba)
+{
+	uint32_t uerr_sta_hi, uerr_sta_lo;
+	uint32_t onlnreg0, onlnreg1;
+
+	/* For now, use the SLI4 device internal unrecoverable error
+	 * registers for error attention. This can be changed later.
+	 */
+	onlnreg0 = readl(phba->sli4_hba.ONLINE0regaddr);
+	onlnreg1 = readl(phba->sli4_hba.ONLINE1regaddr);
+	if ((onlnreg0 != LPFC_ONLINE_NERR) || (onlnreg1 != LPFC_ONLINE_NERR)) {
+		uerr_sta_lo = readl(phba->sli4_hba.UERRLOregaddr);
+		uerr_sta_hi = readl(phba->sli4_hba.UERRHIregaddr);
+		if (uerr_sta_lo || uerr_sta_hi) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1423 HBA Unrecoverable error: "
+					"uerr_lo_reg=0x%x, uerr_hi_reg=0x%x, "
+					"online0_reg=0x%x, online1_reg=0x%x\n",
+					uerr_sta_lo, uerr_sta_hi,
+					onlnreg0, onlnreg1);
+			/* TEMP: as the driver error recover logic is not
+			 * fully developed, we just log the error message
+			 * and the device error attention action is now
+			 * temporarily disabled.
+			 */
+			return 0;
+			phba->work_status[0] = uerr_sta_lo;
+			phba->work_status[1] = uerr_sta_hi;
+			spin_lock_irq(&phba->hbalock);
+			/* Set the driver HA work bitmap */
+			phba->work_ha |= HA_ERATT;
+			/* Indicate polling handles this ERATT */
+			phba->hba_flag |= HBA_ERATT_HANDLED;
+			spin_unlock_irq(&phba->hbalock);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/**
  * lpfc_sli_check_eratt - check error attention events
  * @phba: Pointer to HBA context.
  *
@@ -5434,6 +6635,10 @@ lpfc_sli_check_eratt(struct lpfc_hba *phba)
 		/* Read chip Host Attention (HA) register */
 		ha_copy = lpfc_sli_eratt_read(phba);
 		break;
+	case LPFC_SLI_REV4:
+		/* Read devcie Uncoverable Error (UERR) registers */
+		ha_copy = lpfc_sli4_eratt_read(phba);
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0299 Invalid SLI revision (%d)\n",
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 8839386..e6c88ee 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -29,13 +29,23 @@ typedef enum _lpfc_ctx_cmd {
 	LPFC_CTX_HOST
 } lpfc_ctx_cmd;
 
+/* This structure is used to carry the needed response IOCB states */
+struct lpfc_sli4_rspiocb_info {
+	uint8_t hw_status;
+	uint8_t bfield;
+#define LPFC_XB	0x1
+#define LPFC_PV	0x2
+	uint8_t priority;
+	uint8_t reserved;
+};
+
 /* This structure is used to handle IOCB requests / responses */
 struct lpfc_iocbq {
 	/* lpfc_iocbqs are used in double linked lists */
 	struct list_head list;
 	struct list_head clist;
 	uint16_t iotag;         /* pre-assigned IO tag */
-	uint16_t rsvd1;
+	uint16_t sli4_xritag;   /* pre-assigned XRI, (OXID) tag. */
 
 	IOCB_t iocb;		/* IOCB cmd */
 	uint8_t retry;		/* retry counter for IOCB cmd - if needed */
@@ -65,7 +75,7 @@ struct lpfc_iocbq {
 			   struct lpfc_iocbq *);
 	void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
-
+	struct lpfc_sli4_rspiocb_info sli4_info;
 };
 
 #define SLI_IOCB_RET_IOCB      1	/* Return IOCB if cmd ring full */
@@ -81,14 +91,18 @@ struct lpfc_iocbq {
 typedef struct lpfcMboxq {
 	/* MBOXQs are used in single linked lists */
 	struct list_head list;	/* ptr to next mailbox command */
-	MAILBOX_t mb;		/* Mailbox cmd */
-	struct lpfc_vport *vport;/* virutal port pointer */
+	union {
+		MAILBOX_t mb;		/* Mailbox cmd */
+		struct lpfc_mqe mqe;
+	} u;
+	struct lpfc_vport *vport;/* virtual port pointer */
 	void *context1;		/* caller context information */
 	void *context2;		/* caller context information */
 
 	void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
 	uint8_t mbox_flag;
-
+	struct lpfc_mcqe mcqe;
+	struct lpfc_mbx_nembed_sge_virt *sge_array;
 } LPFC_MBOXQ_t;
 
 #define MBX_POLL        1	/* poll mailbox till command done, then
@@ -234,6 +248,7 @@ struct lpfc_sli {
 #define LPFC_PROCESS_LA           0x400	/* Able to process link attention */
 #define LPFC_BLOCK_MGMT_IO        0x800	/* Don't allow mgmt mbx or iocb cmds */
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
+#define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
 
 	struct lpfc_sli_ring ring[LPFC_MAX_RING];
 	int fcp_ring;		/* ring used for FCP initiator commands */
@@ -261,6 +276,8 @@ struct lpfc_sli {
 
 #define LPFC_MBOX_TMO           30	/* Sec tmo for outstanding mbox
 					   command */
+#define LPFC_MBOX_SLI4_CONFIG_TMO 60	/* Sec tmo for outstanding mbox
+					   command */
 #define LPFC_MBOX_TMO_FLASH_CMD 300     /* Sec tmo for outstanding FLASH write
 					 * or erase cmds. This is especially
 					 * long because of the potential of
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
new file mode 100644
index 0000000..5196b466
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -0,0 +1,467 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2009 Emulex.  All rights reserved.                *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *******************************************************************/
+
+#define LPFC_ACTIVE_MBOX_WAIT_CNT               100
+#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32
+#define LPFC_GET_QE_REL_INT			32
+#define LPFC_RPI_LOW_WATER_MARK			10
+/* Number of SGL entries can be posted in a 4KB nonembedded mbox command */
+#define LPFC_NEMBED_MBOX_SGL_CNT		254
+
+/* Multi-queue arrangement for fast-path FCP work queues */
+#define LPFC_FN_EQN_MAX       8
+#define LPFC_SP_EQN_DEF       1
+#define LPFC_FP_EQN_DEF       1
+#define LPFC_FP_EQN_MIN       1
+#define LPFC_FP_EQN_MAX       (LPFC_FN_EQN_MAX - LPFC_SP_EQN_DEF)
+
+#define LPFC_FN_WQN_MAX       32
+#define LPFC_SP_WQN_DEF       1
+#define LPFC_FP_WQN_DEF       4
+#define LPFC_FP_WQN_MIN       1
+#define LPFC_FP_WQN_MAX       (LPFC_FN_WQN_MAX - LPFC_SP_WQN_DEF)
+
+/*
+ * Provide the default FCF Record attributes used by the driver
+ * when nonFIP mode is configured and there is no other default
+ * FCF Record attributes.
+ */
+#define LPFC_FCOE_FCF_DEF_INDEX	0
+#define LPFC_FCOE_FCF_GET_FIRST	0xFFFF
+#define LPFC_FCOE_FCF_NEXT_NONE	0xFFFF
+
+/* First 3 bytes of default FCF MAC is specified by FC_MAP */
+#define LPFC_FCOE_FCF_MAC3	0xFF
+#define LPFC_FCOE_FCF_MAC4	0xFF
+#define LPFC_FCOE_FCF_MAC5	0xFE
+#define LPFC_FCOE_FCF_MAP0	0x0E
+#define LPFC_FCOE_FCF_MAP1	0xFC
+#define LPFC_FCOE_FCF_MAP2	0x00
+#define LPFC_FCOE_MAX_RCV_SIZE	0x5AC
+#define LPFC_FCOE_FKA_ADV_PER	0
+#define LPFC_FCOE_FIP_PRIORITY	0x80
+
+enum lpfc_sli4_queue_type {
+	LPFC_EQ,
+	LPFC_GCQ,
+	LPFC_MCQ,
+	LPFC_WCQ,
+	LPFC_RCQ,
+	LPFC_MQ,
+	LPFC_WQ,
+	LPFC_HRQ,
+	LPFC_DRQ
+};
+
+/* The queue sub-type defines the functional purpose of the queue */
+enum lpfc_sli4_queue_subtype {
+	LPFC_NONE,
+	LPFC_MBOX,
+	LPFC_FCP,
+	LPFC_ELS,
+	LPFC_USOL
+};
+
+union sli4_qe {
+	void *address;
+	struct lpfc_eqe *eqe;
+	struct lpfc_cqe *cqe;
+	struct lpfc_mcqe *mcqe;
+	struct lpfc_wcqe_complete *wcqe_complete;
+	struct lpfc_wcqe_release *wcqe_release;
+	struct sli4_wcqe_xri_aborted *wcqe_xri_aborted;
+	struct lpfc_rcqe_complete *rcqe_complete;
+	struct lpfc_mqe *mqe;
+	union  lpfc_wqe *wqe;
+	struct lpfc_rqe *rqe;
+};
+
+struct lpfc_queue {
+	struct list_head list;
+	enum lpfc_sli4_queue_type type;
+	enum lpfc_sli4_queue_subtype subtype;
+	struct lpfc_hba *phba;
+	struct list_head child_list;
+	uint32_t entry_count;	/* Number of entries to support on the queue */
+	uint32_t entry_size;	/* Size of each queue entry. */
+	uint32_t queue_id;	/* Queue ID assigned by the hardware */
+	struct list_head page_list;
+	uint32_t page_count;	/* Number of pages allocated for this queue */
+
+	uint32_t host_index;	/* The host's index for putting or getting */
+	uint32_t hba_index;	/* The last known hba index for get or put */
+	union sli4_qe qe[1];	/* array to index entries (must be last) */
+};
+
+struct lpfc_cq_event {
+	struct list_head list;
+	union {
+		struct lpfc_mcqe		mcqe_cmpl;
+		struct lpfc_acqe_link		acqe_link;
+		struct lpfc_acqe_fcoe		acqe_fcoe;
+		struct lpfc_acqe_dcbx		acqe_dcbx;
+		struct lpfc_rcqe		rcqe_cmpl;
+		struct sli4_wcqe_xri_aborted	wcqe_axri;
+	} cqe;
+};
+
+struct lpfc_sli4_link {
+	uint8_t speed;
+	uint8_t duplex;
+	uint8_t status;
+	uint8_t physical;
+	uint8_t fault;
+};
+
+struct lpfc_fcf {
+	uint8_t	 fabric_name[8];
+	uint8_t  mac_addr[6];
+	uint16_t fcf_indx;
+	uint16_t fcfi;
+	uint32_t fcf_flag;
+#define FCF_AVAILABLE	0x01 /* FCF available for discovery */
+#define FCF_REGISTERED	0x02 /* FCF registered with FW */
+#define FCF_DISCOVERED	0x04 /* FCF discovery started  */
+#define FCF_BOOT_ENABLE 0x08 /* Boot bios use this FCF */
+#define FCF_IN_USE	0x10 /* Atleast one discovery completed */
+#define FCF_VALID_VLAN	0x20 /* Use the vlan id specified */
+	uint32_t priority;
+	uint32_t addr_mode;
+	uint16_t vlan_id;
+};
+
+#define LPFC_REGION23_SIGNATURE "RG23"
+#define LPFC_REGION23_VERSION	1
+#define LPFC_REGION23_LAST_REC  0xff
+struct lpfc_fip_param_hdr {
+	uint8_t type;
+#define FCOE_PARAM_TYPE		0xA0
+	uint8_t length;
+#define FCOE_PARAM_LENGTH	2
+	uint8_t parm_version;
+#define FIPP_VERSION		0x01
+	uint8_t parm_flags;
+#define	lpfc_fip_param_hdr_fipp_mode_SHIFT	6
+#define	lpfc_fip_param_hdr_fipp_mode_MASK	0x3
+#define lpfc_fip_param_hdr_fipp_mode_WORD	parm_flags
+#define	FIPP_MODE_ON				0x2
+#define	FIPP_MODE_OFF				0x0
+#define FIPP_VLAN_VALID				0x1
+};
+
+struct lpfc_fcoe_params {
+	uint8_t fc_map[3];
+	uint8_t reserved1;
+	uint16_t vlan_tag;
+	uint8_t reserved[2];
+};
+
+struct lpfc_fcf_conn_hdr {
+	uint8_t type;
+#define FCOE_CONN_TBL_TYPE		0xA1
+	uint8_t length;   /* words */
+	uint8_t reserved[2];
+};
+
+struct lpfc_fcf_conn_rec {
+	uint16_t flags;
+#define	FCFCNCT_VALID		0x0001
+#define	FCFCNCT_BOOT		0x0002
+#define	FCFCNCT_PRIMARY		0x0004   /* if not set, Secondary */
+#define	FCFCNCT_FBNM_VALID	0x0008
+#define	FCFCNCT_SWNM_VALID	0x0010
+#define	FCFCNCT_VLAN_VALID	0x0020
+#define	FCFCNCT_AM_VALID	0x0040
+#define	FCFCNCT_AM_PREFERRED	0x0080   /* if not set, AM Required */
+#define	FCFCNCT_AM_SPMA		0x0100	 /* if not set, FPMA */
+
+	uint16_t vlan_tag;
+	uint8_t fabric_name[8];
+	uint8_t switch_name[8];
+};
+
+struct lpfc_fcf_conn_entry {
+	struct list_head list;
+	struct lpfc_fcf_conn_rec conn_rec;
+};
+
+/*
+ * Define the host's bootstrap mailbox.  This structure contains
+ * the member attributes needed to create, use, and destroy the
+ * bootstrap mailbox region.
+ *
+ * The macro definitions for the bmbx data structure are defined
+ * in lpfc_hw4.h with the register definition.
+ */
+struct lpfc_bmbx {
+	struct lpfc_dmabuf *dmabuf;
+	struct dma_address dma_address;
+	void *avirt;
+	dma_addr_t aphys;
+	uint32_t bmbx_size;
+};
+
+#define LPFC_EQE_SIZE LPFC_EQE_SIZE_4
+
+#define LPFC_EQE_SIZE_4B 	4
+#define LPFC_EQE_SIZE_16B	16
+#define LPFC_CQE_SIZE		16
+#define LPFC_WQE_SIZE		64
+#define LPFC_MQE_SIZE		256
+#define LPFC_RQE_SIZE		8
+
+#define LPFC_EQE_DEF_COUNT	1024
+#define LPFC_CQE_DEF_COUNT      256
+#define LPFC_WQE_DEF_COUNT      64
+#define LPFC_MQE_DEF_COUNT      16
+#define LPFC_RQE_DEF_COUNT	512
+
+#define LPFC_QUEUE_NOARM	false
+#define LPFC_QUEUE_REARM	true
+
+
+/*
+ * SLI4 CT field defines
+ */
+#define SLI4_CT_RPI 0
+#define SLI4_CT_VPI 1
+#define SLI4_CT_VFI 2
+#define SLI4_CT_FCFI 3
+
+#define LPFC_SLI4_MAX_SEGMENT_SIZE 0x10000
+
+/*
+ * SLI4 specific data structures
+ */
+struct lpfc_max_cfg_param {
+	uint16_t max_xri;
+	uint16_t xri_base;
+	uint16_t xri_used;
+	uint16_t max_rpi;
+	uint16_t rpi_base;
+	uint16_t rpi_used;
+	uint16_t max_vpi;
+	uint16_t vpi_base;
+	uint16_t vpi_used;
+	uint16_t max_vfi;
+	uint16_t vfi_base;
+	uint16_t vfi_used;
+	uint16_t max_fcfi;
+	uint16_t fcfi_base;
+	uint16_t fcfi_used;
+	uint16_t max_eq;
+	uint16_t max_rq;
+	uint16_t max_cq;
+	uint16_t max_wq;
+};
+
+struct lpfc_hba;
+/* SLI4 HBA multi-fcp queue handler struct */
+struct lpfc_fcp_eq_hdl {
+	uint32_t idx;
+	struct lpfc_hba *phba;
+};
+
+/* SLI4 HBA data structure entries */
+struct lpfc_sli4_hba {
+	void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
+					     PCI BAR0, config space registers */
+	void __iomem *ctrl_regs_memmap_p; /* Kernel memory mapped address for
+					     PCI BAR1, control registers */
+	void __iomem *drbl_regs_memmap_p; /* Kernel memory mapped address for
+					     PCI BAR2, doorbell registers */
+	/* BAR0 PCI config space register memory map */
+	void __iomem *UERRLOregaddr; /* Address to UERR_STATUS_LO register */
+	void __iomem *UERRHIregaddr; /* Address to UERR_STATUS_HI register */
+	void __iomem *ONLINE0regaddr; /* Address to components of internal UE */
+	void __iomem *ONLINE1regaddr; /* Address to components of internal UE */
+#define LPFC_ONLINE_NERR	0xFFFFFFFF
+	void __iomem *SCRATCHPADregaddr; /* Address to scratchpad register */
+	/* BAR1 FCoE function CSR register memory map */
+	void __iomem *STAregaddr;    /* Address to HST_STATE register */
+	void __iomem *ISRregaddr;    /* Address to HST_ISR register */
+	void __iomem *IMRregaddr;    /* Address to HST_IMR register */
+	void __iomem *ISCRregaddr;   /* Address to HST_ISCR register */
+	/* BAR2 VF-0 doorbell register memory map */
+	void __iomem *RQDBregaddr;   /* Address to RQ_DOORBELL register */
+	void __iomem *WQDBregaddr;   /* Address to WQ_DOORBELL register */
+	void __iomem *EQCQDBregaddr; /* Address to EQCQ_DOORBELL register */
+	void __iomem *MQDBregaddr;   /* Address to MQ_DOORBELL register */
+	void __iomem *BMBXregaddr;   /* Address to BootStrap MBX register */
+
+	struct msix_entry *msix_entries;
+	uint32_t cfg_eqn;
+	struct lpfc_fcp_eq_hdl *fcp_eq_hdl; /* FCP per-WQ handle */
+	/* Pointers to the constructed SLI4 queues */
+	struct lpfc_queue **fp_eq; /* Fast-path event queue */
+	struct lpfc_queue *sp_eq;  /* Slow-path event queue */
+	struct lpfc_queue **fcp_wq;/* Fast-path FCP work queue */
+	struct lpfc_queue *mbx_wq; /* Slow-path MBOX work queue */
+	struct lpfc_queue *els_wq; /* Slow-path ELS work queue */
+	struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
+	struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
+	struct lpfc_queue **fcp_cq;/* Fast-path FCP compl queue */
+	struct lpfc_queue *mbx_cq; /* Slow-path mailbox complete queue */
+	struct lpfc_queue *els_cq; /* Slow-path ELS response complete queue */
+	struct lpfc_queue *rxq_cq; /* Slow-path unsolicited complete queue */
+
+	/* Setup information for various queue parameters */
+	int eq_esize;
+	int eq_ecount;
+	int cq_esize;
+	int cq_ecount;
+	int wq_esize;
+	int wq_ecount;
+	int mq_esize;
+	int mq_ecount;
+	int rq_esize;
+	int rq_ecount;
+#define LPFC_SP_EQ_MAX_INTR_SEC         10000
+#define LPFC_FP_EQ_MAX_INTR_SEC         10000
+
+	uint32_t intr_enable;
+	struct lpfc_bmbx bmbx;
+	struct lpfc_max_cfg_param max_cfg_param;
+	uint16_t next_xri; /* last_xri - max_cfg_param.xri_base = used */
+	uint16_t next_rpi;
+	uint16_t scsi_xri_max;
+	uint16_t scsi_xri_cnt;
+	struct list_head lpfc_free_sgl_list;
+	struct list_head lpfc_sgl_list;
+	struct lpfc_sglq **lpfc_els_sgl_array;
+	struct list_head lpfc_abts_els_sgl_list;
+	struct lpfc_scsi_buf **lpfc_scsi_psb_array;
+	struct list_head lpfc_abts_scsi_buf_list;
+	uint32_t total_sglq_bufs;
+	struct lpfc_sglq **lpfc_sglq_active_list;
+	struct list_head lpfc_rpi_hdr_list;
+	unsigned long *rpi_bmask;
+	uint16_t rpi_count;
+	struct lpfc_sli4_flags sli4_flags;
+	struct list_head sp_rspiocb_work_queue;
+	struct list_head sp_cqe_event_pool;
+	struct list_head sp_asynce_work_queue;
+	struct list_head sp_fcp_xri_aborted_work_queue;
+	struct list_head sp_els_xri_aborted_work_queue;
+	struct list_head sp_unsol_work_queue;
+	struct lpfc_sli4_link link_state;
+	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
+	spinlock_t abts_sgl_list_lock; /* list of aborted els IOs */
+};
+
+enum lpfc_sge_type {
+	GEN_BUFF_TYPE,
+	SCSI_BUFF_TYPE
+};
+
+struct lpfc_sglq {
+	/* lpfc_sglqs are used in double linked lists */
+	struct list_head list;
+	struct list_head clist;
+	enum lpfc_sge_type buff_type; /* is this a scsi sgl */
+	uint16_t iotag;         /* pre-assigned IO tag */
+	uint16_t sli4_xritag;   /* pre-assigned XRI, (OXID) tag. */
+	struct sli4_sge *sgl;	/* pre-assigned SGL */
+	void *virt;		/* virtual address. */
+	dma_addr_t phys;	/* physical address */
+};
+
+struct lpfc_rpi_hdr {
+	struct list_head list;
+	uint32_t len;
+	struct lpfc_dmabuf *dmabuf;
+	uint32_t page_count;
+	uint32_t start_rpi;
+};
+
+/*
+ * SLI4 specific function prototypes
+ */
+int lpfc_pci_function_reset(struct lpfc_hba *);
+int lpfc_sli4_hba_setup(struct lpfc_hba *);
+int lpfc_sli4_hba_down(struct lpfc_hba *);
+int lpfc_sli4_config(struct lpfc_hba *, struct lpfcMboxq *, uint8_t,
+		     uint8_t, uint32_t, bool);
+void lpfc_sli4_mbox_cmd_free(struct lpfc_hba *, struct lpfcMboxq *);
+void lpfc_sli4_mbx_sge_set(struct lpfcMboxq *, uint32_t, dma_addr_t, uint32_t);
+void lpfc_sli4_mbx_sge_get(struct lpfcMboxq *, uint32_t,
+			   struct lpfc_mbx_sge *);
+
+void lpfc_sli4_hba_reset(struct lpfc_hba *);
+struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
+			uint32_t);
+void lpfc_sli4_queue_free(struct lpfc_queue *);
+uint32_t lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint16_t);
+uint32_t lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, uint32_t, uint32_t);
+uint32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, uint32_t);
+uint32_t lpfc_wq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, uint32_t);
+uint32_t lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_queue *, struct lpfc_queue *, uint32_t);
+uint32_t lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_wq_destroy(struct lpfc_hba *, struct lpfc_queue *);
+uint32_t lpfc_rq_destroy(struct lpfc_hba *, struct lpfc_queue *,
+			 struct lpfc_queue *);
+int lpfc_sli4_queue_setup(struct lpfc_hba *);
+void lpfc_sli4_queue_unset(struct lpfc_hba *);
+int lpfc_sli4_post_sgl(struct lpfc_hba *, dma_addr_t, dma_addr_t, uint16_t);
+int lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *);
+int lpfc_sli4_remove_all_sgl_pages(struct lpfc_hba *);
+uint16_t lpfc_sli4_next_xritag(struct lpfc_hba *);
+int lpfc_sli4_post_async_mbox(struct lpfc_hba *);
+int lpfc_sli4_post_sgl_list(struct lpfc_hba *phba);
+int lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *, struct list_head *, int);
+struct lpfc_cq_event *__lpfc_sli4_cq_event_alloc(struct lpfc_hba *);
+struct lpfc_cq_event *lpfc_sli4_cq_event_alloc(struct lpfc_hba *);
+void __lpfc_sli4_cq_event_release(struct lpfc_hba *, struct lpfc_cq_event *);
+void lpfc_sli4_cq_event_release(struct lpfc_hba *, struct lpfc_cq_event *);
+int lpfc_sli4_init_rpi_hdrs(struct lpfc_hba *);
+int lpfc_sli4_post_rpi_hdr(struct lpfc_hba *, struct lpfc_rpi_hdr *);
+int lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *);
+struct lpfc_rpi_hdr *lpfc_sli4_create_rpi_hdr(struct lpfc_hba *);
+void lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *);
+int lpfc_sli4_alloc_rpi(struct lpfc_hba *);
+void lpfc_sli4_free_rpi(struct lpfc_hba *, int);
+void lpfc_sli4_remove_rpis(struct lpfc_hba *);
+void lpfc_sli4_async_event_proc(struct lpfc_hba *);
+int lpfc_sli4_resume_rpi(struct lpfc_nodelist *);
+void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
+			       struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
+			       struct sli4_wcqe_xri_aborted *);
+int lpfc_sli4_brdreset(struct lpfc_hba *);
+int lpfc_sli4_add_fcf_record(struct lpfc_hba *, struct fcf_record *);
+void lpfc_sli_remove_dflt_fcf(struct lpfc_hba *);
+int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
+int lpfc_sli4_init_vpi(struct lpfc_hba *, uint16_t);
+uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool);
+uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool);
+void lpfc_sli4_fcfi_unreg(struct lpfc_hba *, uint16_t);
+int lpfc_sli4_read_fcf_record(struct lpfc_hba *, uint16_t);
+void lpfc_mbx_cmpl_read_fcf_record(struct lpfc_hba *, LPFC_MBOXQ_t *);
+int lpfc_sli4_post_status_check(struct lpfc_hba *);
+uint8_t lpfc_sli4_mbox_opcode_get(struct lpfc_hba *, struct lpfcMboxq *);
+
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 917ad56..59e67f7 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -32,8 +32,10 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
+#include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
 #include "lpfc_scsi.h"
@@ -89,6 +91,8 @@ lpfc_alloc_vpi(struct lpfc_hba *phba)
 		vpi = 0;
 	else
 		set_bit(vpi, phba->vpi_bmask);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		phba->sli4_hba.max_cfg_param.vpi_used++;
 	spin_unlock_irq(&phba->hbalock);
 	return vpi;
 }
@@ -96,8 +100,12 @@ lpfc_alloc_vpi(struct lpfc_hba *phba)
 static void
 lpfc_free_vpi(struct lpfc_hba *phba, int vpi)
 {
+	if (vpi == 0)
+		return;
 	spin_lock_irq(&phba->hbalock);
 	clear_bit(vpi, phba->vpi_bmask);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		phba->sli4_hba.max_cfg_param.vpi_used--;
 	spin_unlock_irq(&phba->hbalock);
 }
 
@@ -308,6 +316,21 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		goto error_out;
 	}
 
+	/*
+	 * In SLI4, the vpi must be activated before it can be used
+	 * by the port.
+	 */
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rc = lpfc_sli4_init_vpi(phba, vpi);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
+					"1838 Failed to INIT_VPI on vpi %d "
+					"status %d\n", vpi, rc);
+			rc = VPORT_NORESOURCES;
+			lpfc_free_vpi(phba, vpi);
+			goto error_out;
+		}
+	}
 
 	/* Assign an unused board number */
 	if ((instance = lpfc_get_instance()) < 0) {
-- 
cgit v0.10.2


From 4f774513f7b3fe96648b8936f60f835e6ceaa88e Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:52:35 -0400
Subject: [SCSI] lpfc 8.3.2 : Addition of SLI4 Interface - Queues

Adds support for the new queues in the SLI-4 interface.  There are :
- Work Queues - host-to-adapter for fast-path traffic
- Mailbox Queues - host-to-adapter for control (slow-path)
- Buffer Queues - host-to-adapter for posting buffers for async receive
- Completion Queues - adapter-to-host for posting async events,
       completions for fast or slow patch work, receipt of async
       receive traffic
- Event Queues - tied to MSI-X vectors, binds completion queues with
       interrupts

These patches add the all the support code to tie into command submission
and response paths, updates the interrupt handling, etc.

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 706bb22..cf42ada 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -70,6 +70,350 @@ typedef enum _lpfc_iocb_type {
 	LPFC_ABORT_IOCB
 } lpfc_iocb_type;
 
+
+/* Provide function prototypes local to this module. */
+static int lpfc_sli_issue_mbox_s4(struct lpfc_hba *, LPFC_MBOXQ_t *,
+				  uint32_t);
+static int lpfc_sli4_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *,
+			    uint8_t *, uint32_t *);
+
+static IOCB_t *
+lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq)
+{
+	return &iocbq->iocb;
+}
+
+/**
+ * lpfc_sli4_wq_put - Put a Work Queue Entry on an Work Queue
+ * @q: The Work Queue to operate on.
+ * @wqe: The work Queue Entry to put on the Work queue.
+ *
+ * This routine will copy the contents of @wqe to the next available entry on
+ * the @q. This function will then ring the Work Queue Doorbell to signal the
+ * HBA to start processing the Work Queue Entry. This function returns 0 if
+ * successful. If no entries are available on @q then this function will return
+ * -ENOMEM.
+ * The caller is expected to hold the hbalock when calling this routine.
+ **/
+static uint32_t
+lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
+{
+	union lpfc_wqe *temp_wqe = q->qe[q->host_index].wqe;
+	struct lpfc_register doorbell;
+	uint32_t host_index;
+
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->host_index + 1) % q->entry_count) == q->hba_index)
+		return -ENOMEM;
+	/* set consumption flag every once in a while */
+	if (!((q->host_index + 1) % LPFC_RELEASE_NOTIFICATION_INTERVAL))
+		bf_set(lpfc_wqe_gen_wqec, &wqe->generic, 1);
+
+	lpfc_sli_pcimem_bcopy(wqe, temp_wqe, q->entry_size);
+
+	/* Update the host index before invoking device */
+	host_index = q->host_index;
+	q->host_index = ((q->host_index + 1) % q->entry_count);
+
+	/* Ring Doorbell */
+	doorbell.word0 = 0;
+	bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1);
+	bf_set(lpfc_wq_doorbell_index, &doorbell, host_index);
+	bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr);
+	readl(q->phba->sli4_hba.WQDBregaddr); /* Flush */
+
+	return 0;
+}
+
+/**
+ * lpfc_sli4_wq_release - Updates internal hba index for WQ
+ * @q: The Work Queue to operate on.
+ * @index: The index to advance the hba index to.
+ *
+ * This routine will update the HBA index of a queue to reflect consumption of
+ * Work Queue Entries by the HBA. When the HBA indicates that it has consumed
+ * an entry the host calls this function to update the queue's internal
+ * pointers. This routine returns the number of entries that were consumed by
+ * the HBA.
+ **/
+static uint32_t
+lpfc_sli4_wq_release(struct lpfc_queue *q, uint32_t index)
+{
+	uint32_t released = 0;
+
+	if (q->hba_index == index)
+		return 0;
+	do {
+		q->hba_index = ((q->hba_index + 1) % q->entry_count);
+		released++;
+	} while (q->hba_index != index);
+	return released;
+}
+
+/**
+ * lpfc_sli4_mq_put - Put a Mailbox Queue Entry on an Mailbox Queue
+ * @q: The Mailbox Queue to operate on.
+ * @wqe: The Mailbox Queue Entry to put on the Work queue.
+ *
+ * This routine will copy the contents of @mqe to the next available entry on
+ * the @q. This function will then ring the Work Queue Doorbell to signal the
+ * HBA to start processing the Work Queue Entry. This function returns 0 if
+ * successful. If no entries are available on @q then this function will return
+ * -ENOMEM.
+ * The caller is expected to hold the hbalock when calling this routine.
+ **/
+static uint32_t
+lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe)
+{
+	struct lpfc_mqe *temp_mqe = q->qe[q->host_index].mqe;
+	struct lpfc_register doorbell;
+	uint32_t host_index;
+
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->host_index + 1) % q->entry_count) == q->hba_index)
+		return -ENOMEM;
+	lpfc_sli_pcimem_bcopy(mqe, temp_mqe, q->entry_size);
+	/* Save off the mailbox pointer for completion */
+	q->phba->mbox = (MAILBOX_t *)temp_mqe;
+
+	/* Update the host index before invoking device */
+	host_index = q->host_index;
+	q->host_index = ((q->host_index + 1) % q->entry_count);
+
+	/* Ring Doorbell */
+	doorbell.word0 = 0;
+	bf_set(lpfc_mq_doorbell_num_posted, &doorbell, 1);
+	bf_set(lpfc_mq_doorbell_id, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.MQDBregaddr);
+	readl(q->phba->sli4_hba.MQDBregaddr); /* Flush */
+	return 0;
+}
+
+/**
+ * lpfc_sli4_mq_release - Updates internal hba index for MQ
+ * @q: The Mailbox Queue to operate on.
+ *
+ * This routine will update the HBA index of a queue to reflect consumption of
+ * a Mailbox Queue Entry by the HBA. When the HBA indicates that it has consumed
+ * an entry the host calls this function to update the queue's internal
+ * pointers. This routine returns the number of entries that were consumed by
+ * the HBA.
+ **/
+static uint32_t
+lpfc_sli4_mq_release(struct lpfc_queue *q)
+{
+	/* Clear the mailbox pointer for completion */
+	q->phba->mbox = NULL;
+	q->hba_index = ((q->hba_index + 1) % q->entry_count);
+	return 1;
+}
+
+/**
+ * lpfc_sli4_eq_get - Gets the next valid EQE from a EQ
+ * @q: The Event Queue to get the first valid EQE from
+ *
+ * This routine will get the first valid Event Queue Entry from @q, update
+ * the queue's internal hba index, and return the EQE. If no valid EQEs are in
+ * the Queue (no more work to do), or the Queue is full of EQEs that have been
+ * processed, but not popped back to the HBA then this routine will return NULL.
+ **/
+static struct lpfc_eqe *
+lpfc_sli4_eq_get(struct lpfc_queue *q)
+{
+	struct lpfc_eqe *eqe = q->qe[q->hba_index].eqe;
+
+	/* If the next EQE is not valid then we are done */
+	if (!bf_get(lpfc_eqe_valid, eqe))
+		return NULL;
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->hba_index + 1) % q->entry_count) == q->host_index)
+		return NULL;
+
+	q->hba_index = ((q->hba_index + 1) % q->entry_count);
+	return eqe;
+}
+
+/**
+ * lpfc_sli4_eq_release - Indicates the host has finished processing an EQ
+ * @q: The Event Queue that the host has completed processing for.
+ * @arm: Indicates whether the host wants to arms this CQ.
+ *
+ * This routine will mark all Event Queue Entries on @q, from the last
+ * known completed entry to the last entry that was processed, as completed
+ * by clearing the valid bit for each completion queue entry. Then it will
+ * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
+ * The internal host index in the @q will be updated by this routine to indicate
+ * that the host has finished processing the entries. The @arm parameter
+ * indicates that the queue should be rearmed when ringing the doorbell.
+ *
+ * This function will return the number of EQEs that were popped.
+ **/
+uint32_t
+lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
+{
+	uint32_t released = 0;
+	struct lpfc_eqe *temp_eqe;
+	struct lpfc_register doorbell;
+
+	/* while there are valid entries */
+	while (q->hba_index != q->host_index) {
+		temp_eqe = q->qe[q->host_index].eqe;
+		bf_set(lpfc_eqe_valid, temp_eqe, 0);
+		released++;
+		q->host_index = ((q->host_index + 1) % q->entry_count);
+	}
+	if (unlikely(released == 0 && !arm))
+		return 0;
+
+	/* ring doorbell for number popped */
+	doorbell.word0 = 0;
+	if (arm) {
+		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
+		bf_set(lpfc_eqcq_doorbell_eqci, &doorbell, 1);
+	}
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_EVENT);
+	bf_set(lpfc_eqcq_doorbell_eqid, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.EQCQDBregaddr);
+	return released;
+}
+
+/**
+ * lpfc_sli4_cq_get - Gets the next valid CQE from a CQ
+ * @q: The Completion Queue to get the first valid CQE from
+ *
+ * This routine will get the first valid Completion Queue Entry from @q, update
+ * the queue's internal hba index, and return the CQE. If no valid CQEs are in
+ * the Queue (no more work to do), or the Queue is full of CQEs that have been
+ * processed, but not popped back to the HBA then this routine will return NULL.
+ **/
+static struct lpfc_cqe *
+lpfc_sli4_cq_get(struct lpfc_queue *q)
+{
+	struct lpfc_cqe *cqe;
+
+	/* If the next CQE is not valid then we are done */
+	if (!bf_get(lpfc_cqe_valid, q->qe[q->hba_index].cqe))
+		return NULL;
+	/* If the host has not yet processed the next entry then we are done */
+	if (((q->hba_index + 1) % q->entry_count) == q->host_index)
+		return NULL;
+
+	cqe = q->qe[q->hba_index].cqe;
+	q->hba_index = ((q->hba_index + 1) % q->entry_count);
+	return cqe;
+}
+
+/**
+ * lpfc_sli4_cq_release - Indicates the host has finished processing a CQ
+ * @q: The Completion Queue that the host has completed processing for.
+ * @arm: Indicates whether the host wants to arms this CQ.
+ *
+ * This routine will mark all Completion queue entries on @q, from the last
+ * known completed entry to the last entry that was processed, as completed
+ * by clearing the valid bit for each completion queue entry. Then it will
+ * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
+ * The internal host index in the @q will be updated by this routine to indicate
+ * that the host has finished processing the entries. The @arm parameter
+ * indicates that the queue should be rearmed when ringing the doorbell.
+ *
+ * This function will return the number of CQEs that were released.
+ **/
+uint32_t
+lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
+{
+	uint32_t released = 0;
+	struct lpfc_cqe *temp_qe;
+	struct lpfc_register doorbell;
+
+	/* while there are valid entries */
+	while (q->hba_index != q->host_index) {
+		temp_qe = q->qe[q->host_index].cqe;
+		bf_set(lpfc_cqe_valid, temp_qe, 0);
+		released++;
+		q->host_index = ((q->host_index + 1) % q->entry_count);
+	}
+	if (unlikely(released == 0 && !arm))
+		return 0;
+
+	/* ring doorbell for number popped */
+	doorbell.word0 = 0;
+	if (arm)
+		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_COMPLETION);
+	bf_set(lpfc_eqcq_doorbell_cqid, &doorbell, q->queue_id);
+	writel(doorbell.word0, q->phba->sli4_hba.EQCQDBregaddr);
+	return released;
+}
+
+/**
+ * lpfc_sli4_rq_put - Put a Receive Buffer Queue Entry on a Receive Queue
+ * @q: The Header Receive Queue to operate on.
+ * @wqe: The Receive Queue Entry to put on the Receive queue.
+ *
+ * This routine will copy the contents of @wqe to the next available entry on
+ * the @q. This function will then ring the Receive Queue Doorbell to signal the
+ * HBA to start processing the Receive Queue Entry. This function returns the
+ * index that the rqe was copied to if successful. If no entries are available
+ * on @q then this function will return -ENOMEM.
+ * The caller is expected to hold the hbalock when calling this routine.
+ **/
+static int
+lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
+		 struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe)
+{
+	struct lpfc_rqe *temp_hrqe = hq->qe[hq->host_index].rqe;
+	struct lpfc_rqe *temp_drqe = dq->qe[dq->host_index].rqe;
+	struct lpfc_register doorbell;
+	int put_index = hq->host_index;
+
+	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
+		return -EINVAL;
+	if (hq->host_index != dq->host_index)
+		return -EINVAL;
+	/* If the host has not yet processed the next entry then we are done */
+	if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index)
+		return -EBUSY;
+	lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size);
+	lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size);
+
+	/* Update the host index to point to the next slot */
+	hq->host_index = ((hq->host_index + 1) % hq->entry_count);
+	dq->host_index = ((dq->host_index + 1) % dq->entry_count);
+
+	/* Ring The Header Receive Queue Doorbell */
+	if (!(hq->host_index % LPFC_RQ_POST_BATCH)) {
+		doorbell.word0 = 0;
+		bf_set(lpfc_rq_doorbell_num_posted, &doorbell,
+		       LPFC_RQ_POST_BATCH);
+		bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id);
+		writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr);
+	}
+	return put_index;
+}
+
+/**
+ * lpfc_sli4_rq_release - Updates internal hba index for RQ
+ * @q: The Header Receive Queue to operate on.
+ *
+ * This routine will update the HBA index of a queue to reflect consumption of
+ * one Receive Queue Entry by the HBA. When the HBA indicates that it has
+ * consumed an entry the host calls this function to update the queue's
+ * internal pointers. This routine returns the number of entries that were
+ * consumed by the HBA.
+ **/
+static uint32_t
+lpfc_sli4_rq_release(struct lpfc_queue *hq, struct lpfc_queue *dq)
+{
+	if ((hq->type != LPFC_HRQ) || (dq->type != LPFC_DRQ))
+		return 0;
+	hq->hba_index = ((hq->hba_index + 1) % hq->entry_count);
+	dq->hba_index = ((dq->hba_index + 1) % dq->entry_count);
+	return 1;
+}
+
 /**
  * lpfc_cmd_iocb - Get next command iocb entry in the ring
  * @phba: Pointer to HBA context object.
@@ -215,6 +559,59 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba)
 }
 
 /**
+ * __lpfc_sli_release_iocbq_s4 - Release iocb to the iocb pool
+ * @phba: Pointer to HBA context object.
+ * @iocbq: Pointer to driver iocb object.
+ *
+ * This function is called with hbalock held to release driver
+ * iocb object to the iocb pool. The iotag in the iocb object
+ * does not change for each use of the iocb object. This function
+ * clears all other fields of the iocb object when it is freed.
+ * The sqlq structure that holds the xritag and phys and virtual
+ * mappings for the scatter gather list is retrieved from the
+ * active array of sglq. The get of the sglq pointer also clears
+ * the entry in the array. If the status of the IO indiactes that
+ * this IO was aborted then the sglq entry it put on the
+ * lpfc_abts_els_sgl_list until the CQ_ABORTED_XRI is received. If the
+ * IO has good status or fails for any other reason then the sglq
+ * entry is added to the free list (lpfc_sgl_list).
+ **/
+static void
+__lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+{
+	struct lpfc_sglq *sglq;
+	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
+	unsigned long iflag;
+
+	if (iocbq->sli4_xritag == NO_XRI)
+		sglq = NULL;
+	else
+		sglq = __lpfc_clear_active_sglq(phba, iocbq->sli4_xritag);
+	if (sglq)  {
+		if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED
+			|| ((iocbq->iocb.ulpStatus == IOSTAT_LOCAL_REJECT)
+			&& (iocbq->iocb.un.ulpWord[4]
+				== IOERR_SLI_ABORTED))) {
+			spin_lock_irqsave(&phba->sli4_hba.abts_sgl_list_lock,
+					iflag);
+			list_add(&sglq->list,
+				&phba->sli4_hba.lpfc_abts_els_sgl_list);
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.abts_sgl_list_lock, iflag);
+		} else
+			list_add(&sglq->list, &phba->sli4_hba.lpfc_sgl_list);
+	}
+
+
+	/*
+	 * Clean all volatile data fields, preserve iotag and node struct.
+	 */
+	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
+	iocbq->sli4_xritag = NO_XRI;
+	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
+}
+
+/**
  * __lpfc_sli_release_iocbq_s3 - Release iocb to the iocb pool
  * @phba: Pointer to HBA context object.
  * @iocbq: Pointer to driver iocb object.
@@ -959,6 +1356,37 @@ lpfc_sli_hbq_to_firmware_s3(struct lpfc_hba *phba, uint32_t hbqno,
 		return -ENOMEM;
 }
 
+/**
+ * lpfc_sli_hbq_to_firmware_s4 - Post the hbq buffer to SLI4 firmware
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ * @hbq_buf: Pointer to HBQ buffer.
+ *
+ * This function is called with the hbalock held to post an RQE to the SLI4
+ * firmware. If able to post the RQE to the RQ it will queue the hbq entry to
+ * the hbq_buffer_list and return zero, otherwise it will return an error.
+ **/
+static int
+lpfc_sli_hbq_to_firmware_s4(struct lpfc_hba *phba, uint32_t hbqno,
+			    struct hbq_dmabuf *hbq_buf)
+{
+	int rc;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+
+	hrqe.address_lo = putPaddrLow(hbq_buf->hbuf.phys);
+	hrqe.address_hi = putPaddrHigh(hbq_buf->hbuf.phys);
+	drqe.address_lo = putPaddrLow(hbq_buf->dbuf.phys);
+	drqe.address_hi = putPaddrHigh(hbq_buf->dbuf.phys);
+	rc = lpfc_sli4_rq_put(phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
+			      &hrqe, &drqe);
+	if (rc < 0)
+		return rc;
+	hbq_buf->tag = rc;
+	list_add_tail(&hbq_buf->dbuf.list, &phba->hbqs[hbqno].hbq_buffer_list);
+	return 0;
+}
+
 /* HBQ for ELS and CT traffic. */
 static struct lpfc_hbq_init lpfc_els_hbq = {
 	.rn = 1,
@@ -2575,6 +3003,36 @@ lpfc_sli_handle_slow_ring_event_s3(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_sli_handle_slow_ring_event_s4 - Handle SLI4 slow-path els events
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @mask: Host attention register mask for this ring.
+ *
+ * This function is called from the worker thread when there is a pending
+ * ELS response iocb on the driver internal slow-path response iocb worker
+ * queue. The caller does not hold any lock. The function will remove each
+ * response iocb from the response worker queue and calls the handle
+ * response iocb routine (lpfc_sli_sp_handle_rspiocb) to process it.
+ **/
+static void
+lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba,
+				   struct lpfc_sli_ring *pring, uint32_t mask)
+{
+	struct lpfc_iocbq *irspiocbq;
+	unsigned long iflag;
+
+	while (!list_empty(&phba->sli4_hba.sp_rspiocb_work_queue)) {
+		/* Get the response iocb from the head of work queue */
+		spin_lock_irqsave(&phba->hbalock, iflag);
+		list_remove_head(&phba->sli4_hba.sp_rspiocb_work_queue,
+				 irspiocbq, struct lpfc_iocbq, list);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		/* Process the response iocb */
+		lpfc_sli_sp_handle_rspiocb(phba, pring, irspiocbq);
+	}
+}
+
+/**
  * lpfc_sli_abort_iocb_ring - Abort all iocbs in the ring
  * @phba: Pointer to HBA context object.
  * @pring: Pointer to driver SLI ring object.
@@ -3376,6 +3834,26 @@ lpfc_sli_hbq_setup(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_sli4_rb_setup - Initialize and post RBs to HBA
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called during the SLI initialization to configure
+ * all the HBQs and post buffers to the HBQ. The caller is not
+ * required to hold any locks. This function will return zero if successful
+ * else it will return negative error code.
+ **/
+static int
+lpfc_sli4_rb_setup(struct lpfc_hba *phba)
+{
+	phba->hbq_in_use = 1;
+	phba->hbqs[0].entry_count = lpfc_hbq_defs[0]->entry_count;
+	phba->hbq_count = 1;
+	/* Initially populate or replenish the HBQs */
+	lpfc_sli_hbqbuf_init_hbqs(phba, 0);
+	return 0;
+}
+
+/**
  * lpfc_sli_config_port - Issue config port mailbox command
  * @phba: Pointer to HBA context object.
  * @sli_mode: sli mode - 2/3
@@ -5130,104 +5608,550 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 }
 
 /**
- * __lpfc_sli_issue_iocb - Wrapper func of lockless version for issuing iocb
- *
- * This routine wraps the actual lockless version for issusing IOCB function
- * pointer from the lpfc_hba struct.
- *
- * Return codes:
- * 	IOCB_ERROR - Error
- * 	IOCB_SUCCESS - Success
- * 	IOCB_BUSY - Busy
- **/
-static inline int
-__lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
-		struct lpfc_iocbq *piocb, uint32_t flag)
-{
-	return phba->__lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
-}
-
-/**
- * lpfc_sli_api_table_setup - Set up sli api fucntion jump table
- * @phba: The hba struct for which this call is being executed.
- * @dev_grp: The HBA PCI-Device group number.
- *
- * This routine sets up the SLI interface API function jump table in @phba
- * struct.
- * Returns: 0 - success, -ENODEV - failure.
- **/
-int
-lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+ * lpfc_sli4_bpl2sgl - Convert the bpl/bde to a sgl.
+ * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to command iocb.
+ * @sglq: Pointer to the scatter gather queue object.
+ *
+ * This routine converts the bpl or bde that is in the IOCB
+ * to a sgl list for the sli4 hardware. The physical address
+ * of the bpl/bde is converted back to a virtual address.
+ * If the IOCB contains a BPL then the list of BDE's is
+ * converted to sli4_sge's. If the IOCB contains a single
+ * BDE then it is converted to a single sli_sge.
+ * The IOCB is still in cpu endianess so the contents of
+ * the bpl can be used without byte swapping.
+ *
+ * Returns valid XRI = Success, NO_XRI = Failure.
+**/
+static uint16_t
+lpfc_sli4_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
+		struct lpfc_sglq *sglq)
 {
+	uint16_t xritag = NO_XRI;
+	struct ulp_bde64 *bpl = NULL;
+	struct ulp_bde64 bde;
+	struct sli4_sge *sgl  = NULL;
+	IOCB_t *icmd;
+	int numBdes = 0;
+	int i = 0;
 
-	switch (dev_grp) {
-	case LPFC_PCI_DEV_LP:
-		phba->__lpfc_sli_issue_iocb = __lpfc_sli_issue_iocb_s3;
-		phba->__lpfc_sli_release_iocbq = __lpfc_sli_release_iocbq_s3;
-		break;
-	default:
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1419 Invalid HBA PCI-device group: 0x%x\n",
-				dev_grp);
-		return -ENODEV;
-		break;
-	}
-	phba->lpfc_get_iocb_from_iocbq = lpfc_get_iocb_from_iocbq;
-	return 0;
+	if (!piocbq || !sglq)
+		return xritag;
+
+	sgl  = (struct sli4_sge *)sglq->sgl;
+	icmd = &piocbq->iocb;
+	if (icmd->un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
+		numBdes = icmd->un.genreq64.bdl.bdeSize /
+				sizeof(struct ulp_bde64);
+		/* The addrHigh and addrLow fields within the IOCB
+		 * have not been byteswapped yet so there is no
+		 * need to swap them back.
+		 */
+		bpl  = (struct ulp_bde64 *)
+			((struct lpfc_dmabuf *)piocbq->context3)->virt;
+
+		if (!bpl)
+			return xritag;
+
+		for (i = 0; i < numBdes; i++) {
+			/* Should already be byte swapped. */
+			sgl->addr_hi =  bpl->addrHigh;
+			sgl->addr_lo =  bpl->addrLow;
+			/* swap the size field back to the cpu so we
+			 * can assign it to the sgl.
+			 */
+			bde.tus.w  = le32_to_cpu(bpl->tus.w);
+			bf_set(lpfc_sli4_sge_len, sgl, bde.tus.f.bdeSize);
+			if ((i+1) == numBdes)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->word3 = cpu_to_le32(sgl->word3);
+			bpl++;
+			sgl++;
+		}
+	} else if (icmd->un.genreq64.bdl.bdeFlags == BUFF_TYPE_BDE_64) {
+			/* The addrHigh and addrLow fields of the BDE have not
+			 * been byteswapped yet so they need to be swapped
+			 * before putting them in the sgl.
+			 */
+			sgl->addr_hi =
+				cpu_to_le32(icmd->un.genreq64.bdl.addrHigh);
+			sgl->addr_lo =
+				cpu_to_le32(icmd->un.genreq64.bdl.addrLow);
+			bf_set(lpfc_sli4_sge_len, sgl,
+				icmd->un.genreq64.bdl.bdeSize);
+			bf_set(lpfc_sli4_sge_last, sgl, 1);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->word3 = cpu_to_le32(sgl->word3);
+	}
+	return sglq->sli4_xritag;
 }
 
 /**
- * lpfc_sli_issue_iocb - Wrapper function for __lpfc_sli_issue_iocb
+ * lpfc_sli4_scmd_to_wqidx_distr - scsi command to SLI4 WQ index distribution
  * @phba: Pointer to HBA context object.
- * @pring: Pointer to driver SLI ring object.
  * @piocb: Pointer to command iocb.
- * @flag: Flag indicating if this command can be put into txq.
  *
- * lpfc_sli_issue_iocb is a wrapper around __lpfc_sli_issue_iocb
- * function. This function gets the hbalock and calls
- * __lpfc_sli_issue_iocb function and will return the error returned
- * by __lpfc_sli_issue_iocb function. This wrapper is used by
- * functions which do not hold hbalock.
+ * This routine performs a round robin SCSI command to SLI4 FCP WQ index
+ * distribution.
+ *
+ * Return: index into SLI4 fast-path FCP queue index.
  **/
-int
-lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
-		    struct lpfc_iocbq *piocb, uint32_t flag)
+static uint32_t
+lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
 {
-	unsigned long iflags;
-	int rc;
-
-	spin_lock_irqsave(&phba->hbalock, iflags);
-	rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
-	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	static uint32_t fcp_qidx;
 
-	return rc;
+	return fcp_qidx++ % phba->cfg_fcp_wq_count;
 }
 
 /**
- * lpfc_extra_ring_setup - Extra ring setup function
+ * lpfc_sli_iocb2wqe - Convert the IOCB to a work queue entry.
  * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to command iocb.
+ * @wqe: Pointer to the work queue entry.
  *
- * This function is called while driver attaches with the
- * HBA to setup the extra ring. The extra ring is used
- * only when driver needs to support target mode functionality
- * or IP over FC functionalities.
+ * This routine converts the iocb command to its Work Queue Entry
+ * equivalent. The wqe pointer should not have any fields set when
+ * this routine is called because it will memcpy over them.
+ * This routine does not set the CQ_ID or the WQEC bits in the
+ * wqe.
  *
- * This function is called with no lock held.
+ * Returns: 0 = Success, IOCB_ERROR = Failure.
  **/
 static int
-lpfc_extra_ring_setup( struct lpfc_hba *phba)
+lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
+		union lpfc_wqe *wqe)
 {
-	struct lpfc_sli *psli;
-	struct lpfc_sli_ring *pring;
+	uint32_t payload_len = 0;
+	uint8_t ct = 0;
+	uint32_t fip;
+	uint32_t abort_tag;
+	uint8_t command_type = ELS_COMMAND_NON_FIP;
+	uint8_t cmnd;
+	uint16_t xritag;
+	struct ulp_bde64 *bpl = NULL;
+
+	fip = bf_get(lpfc_fip_flag, &phba->sli4_hba.sli4_flags);
+	/* The fcp commands will set command type */
+	if ((!(iocbq->iocb_flag &  LPFC_IO_FCP)) && (!fip))
+		command_type = ELS_COMMAND_NON_FIP;
+	else if (!(iocbq->iocb_flag &  LPFC_IO_FCP))
+		command_type = ELS_COMMAND_FIP;
+	else if (iocbq->iocb_flag &  LPFC_IO_FCP)
+		command_type = FCP_COMMAND;
+	else {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2019 Invalid cmd 0x%x\n",
+			iocbq->iocb.ulpCommand);
+		return IOCB_ERROR;
+	}
+	/* Some of the fields are in the right position already */
+	memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
+	abort_tag = (uint32_t) iocbq->iotag;
+	xritag = iocbq->sli4_xritag;
+	wqe->words[7] = 0; /* The ct field has moved so reset */
+	/* words0-2 bpl convert bde */
+	if (iocbq->iocb.un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
+		bpl  = (struct ulp_bde64 *)
+			((struct lpfc_dmabuf *)iocbq->context3)->virt;
+		if (!bpl)
+			return IOCB_ERROR;
 
-	psli = &phba->sli;
+		/* Should already be byte swapped. */
+		wqe->generic.bde.addrHigh =  le32_to_cpu(bpl->addrHigh);
+		wqe->generic.bde.addrLow =  le32_to_cpu(bpl->addrLow);
+		/* swap the size field back to the cpu so we
+		 * can assign it to the sgl.
+		 */
+		wqe->generic.bde.tus.w  = le32_to_cpu(bpl->tus.w);
+		payload_len = wqe->generic.bde.tus.f.bdeSize;
+	} else
+		payload_len = iocbq->iocb.un.fcpi64.bdl.bdeSize;
 
-	/* Adjust cmd/rsp ring iocb entries more evenly */
+	iocbq->iocb.ulpIoTag = iocbq->iotag;
+	cmnd = iocbq->iocb.ulpCommand;
 
-	/* Take some away from the FCP ring */
-	pring = &psli->ring[psli->fcp_ring];
-	pring->numCiocb -= SLI2_IOCB_CMD_R1XTRA_ENTRIES;
-	pring->numRiocb -= SLI2_IOCB_RSP_R1XTRA_ENTRIES;
+	switch (iocbq->iocb.ulpCommand) {
+	case CMD_ELS_REQUEST64_CR:
+		if (!iocbq->iocb.ulpLe) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2007 Only Limited Edition cmd Format"
+				" supported 0x%x\n",
+				iocbq->iocb.ulpCommand);
+			return IOCB_ERROR;
+		}
+		wqe->els_req.payload_len = payload_len;
+		/* Els_reguest64 has a TMO */
+		bf_set(wqe_tmo, &wqe->els_req.wqe_com,
+			iocbq->iocb.ulpTimeout);
+		/* Need a VF for word 4 set the vf bit*/
+		bf_set(els_req64_vf, &wqe->els_req, 0);
+		/* And a VFID for word 12 */
+		bf_set(els_req64_vfid, &wqe->els_req, 0);
+		/*
+		 * Set ct field to 3, indicates that the context_tag field
+		 * contains the FCFI and remote N_Port_ID is
+		 * in word 5.
+		 */
+
+		ct = ((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l);
+		bf_set(lpfc_wqe_gen_context, &wqe->generic,
+				iocbq->iocb.ulpContext);
+
+		if (iocbq->vport->fc_myDID != 0) {
+			bf_set(els_req64_sid, &wqe->els_req,
+				 iocbq->vport->fc_myDID);
+			bf_set(els_req64_sp, &wqe->els_req, 1);
+		}
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic, ct);
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, 0);
+		/* CCP CCPE PV PRI in word10 were set in the memcpy */
+	break;
+	case CMD_XMIT_SEQUENCE64_CR:
+		/* word3 iocb=io_tag32 wqe=payload_offset */
+		/* payload offset used for multilpe outstanding
+		 * sequences on the same exchange
+		 */
+		wqe->words[3] = 0;
+		/* word4 relative_offset memcpy */
+		/* word5 r_ctl/df_ctl memcpy */
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, 0);
+		wqe->xmit_sequence.xmit_len = payload_len;
+	break;
+	case CMD_XMIT_BCAST64_CN:
+		/* word3 iocb=iotag32 wqe=payload_len */
+		wqe->words[3] = 0; /* no definition for this in wqe */
+		/* word4 iocb=rsvd wqe=rsvd */
+		/* word5 iocb=rctl/type/df_ctl wqe=rctl/type/df_ctl memcpy */
+		/* word6 iocb=ctxt_tag/io_tag wqe=ctxt_tag/xri */
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic,
+			((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l));
+	break;
+	case CMD_FCP_IWRITE64_CR:
+		command_type = FCP_COMMAND_DATA_OUT;
+		/* The struct for wqe fcp_iwrite has 3 fields that are somewhat
+		 * confusing.
+		 * word3 is payload_len: byte offset to the sgl entry for the
+		 * fcp_command.
+		 * word4 is total xfer len, same as the IOCB->ulpParameter.
+		 * word5 is initial xfer len 0 = wait for xfer-ready
+		 */
+
+		/* Always wait for xfer-ready before sending data */
+		wqe->fcp_iwrite.initial_xfer_len = 0;
+		/* word 4 (xfer length) should have been set on the memcpy */
+
+	/* allow write to fall through to read */
+	case CMD_FCP_IREAD64_CR:
+		/* FCP_CMD is always the 1st sgl entry */
+		wqe->fcp_iread.payload_len =
+			payload_len + sizeof(struct fcp_rsp);
+
+		/* word 4 (xfer length) should have been set on the memcpy */
+
+		bf_set(lpfc_wqe_gen_erp, &wqe->generic,
+			iocbq->iocb.ulpFCP2Rcvy);
+		bf_set(lpfc_wqe_gen_lnk, &wqe->generic, iocbq->iocb.ulpXS);
+		/* The XC bit and the XS bit are similar. The driver never
+		 * tracked whether or not the exchange was previouslly open.
+		 * XC = Exchange create, 0 is create. 1 is already open.
+		 * XS = link cmd: 1 do not close the exchange after command.
+		 * XS = 0 close exchange when command completes.
+		 * The only time we would not set the XC bit is when the XS bit
+		 * is set and we are sending our 2nd or greater command on
+		 * this exchange.
+		 */
+
+	/* ALLOW read & write to fall through to ICMD64 */
+	case CMD_FCP_ICMND64_CR:
+		/* Always open the exchange */
+		bf_set(wqe_xc, &wqe->fcp_iread.wqe_com, 0);
+
+		wqe->words[10] &= 0xffff0000; /* zero out ebde count */
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, iocbq->iocb.ulpPU);
+	break;
+	case CMD_GEN_REQUEST64_CR:
+		/* word3 command length is described as byte offset to the
+		 * rsp_data. Would always be 16, sizeof(struct sli4_sge)
+		 * sgl[0] = cmnd
+		 * sgl[1] = rsp.
+		 *
+		 */
+		wqe->gen_req.command_len = payload_len;
+		/* Word4 parameter  copied in the memcpy */
+		/* Word5 [rctl, type, df_ctl, la] copied in memcpy */
+		/* word6 context tag copied in memcpy */
+		if (iocbq->iocb.ulpCt_h  || iocbq->iocb.ulpCt_l) {
+			ct = ((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l);
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2015 Invalid CT %x command 0x%x\n",
+				ct, iocbq->iocb.ulpCommand);
+			return IOCB_ERROR;
+		}
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic, 0);
+		bf_set(wqe_tmo, &wqe->gen_req.wqe_com,
+			iocbq->iocb.ulpTimeout);
+
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, iocbq->iocb.ulpPU);
+		command_type = OTHER_COMMAND;
+	break;
+	case CMD_XMIT_ELS_RSP64_CX:
+		/* words0-2 BDE memcpy */
+		/* word3 iocb=iotag32 wqe=rsvd */
+		wqe->words[3] = 0;
+		/* word4 iocb=did wge=rsvd. */
+		wqe->words[4] = 0;
+		/* word5 iocb=rsvd wge=did */
+		bf_set(wqe_els_did, &wqe->xmit_els_rsp.wqe_dest,
+			 iocbq->iocb.un.elsreq64.remoteID);
+
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic,
+			((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l));
+
+		bf_set(lpfc_wqe_gen_pu, &wqe->generic, iocbq->iocb.ulpPU);
+		bf_set(wqe_rcvoxid, &wqe->generic, iocbq->iocb.ulpContext);
+		if (!iocbq->iocb.ulpCt_h && iocbq->iocb.ulpCt_l)
+			bf_set(lpfc_wqe_gen_context, &wqe->generic,
+			       iocbq->vport->vpi + phba->vpi_base);
+		command_type = OTHER_COMMAND;
+	break;
+	case CMD_CLOSE_XRI_CN:
+	case CMD_ABORT_XRI_CN:
+	case CMD_ABORT_XRI_CX:
+		/* words 0-2 memcpy should be 0 rserved */
+		/* port will send abts */
+		if (iocbq->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+			/*
+			 * The link is down so the fw does not need to send abts
+			 * on the wire.
+			 */
+			bf_set(abort_cmd_ia, &wqe->abort_cmd, 1);
+		else
+			bf_set(abort_cmd_ia, &wqe->abort_cmd, 0);
+		bf_set(abort_cmd_criteria, &wqe->abort_cmd, T_XRI_TAG);
+		abort_tag = iocbq->iocb.un.acxri.abortIoTag;
+		wqe->words[5] = 0;
+		bf_set(lpfc_wqe_gen_ct, &wqe->generic,
+			((iocbq->iocb.ulpCt_h << 1) | iocbq->iocb.ulpCt_l));
+		abort_tag = iocbq->iocb.un.acxri.abortIoTag;
+		wqe->generic.abort_tag = abort_tag;
+		/*
+		 * The abort handler will send us CMD_ABORT_XRI_CN or
+		 * CMD_CLOSE_XRI_CN and the fw only accepts CMD_ABORT_XRI_CX
+		 */
+		bf_set(lpfc_wqe_gen_command, &wqe->generic, CMD_ABORT_XRI_CX);
+		cmnd = CMD_ABORT_XRI_CX;
+		command_type = OTHER_COMMAND;
+		xritag = 0;
+	break;
+	case CMD_XRI_ABORTED_CX:
+	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
+		/* words0-2 are all 0's no bde */
+		/* word3 and word4 are rsvrd */
+		wqe->words[3] = 0;
+		wqe->words[4] = 0;
+		/* word5 iocb=rsvd wge=did */
+		/* There is no remote port id in the IOCB? */
+		/* Let this fall through and fail */
+	case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */
+	case CMD_FCP_TSEND64_CX: /* Target mode send xfer-ready */
+	case CMD_FCP_TRSP64_CX: /* Target mode rcv */
+	case CMD_FCP_AUTO_TRSP_CX: /* Auto target rsp */
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2014 Invalid command 0x%x\n",
+				iocbq->iocb.ulpCommand);
+		return IOCB_ERROR;
+	break;
+
+	}
+	bf_set(lpfc_wqe_gen_xri, &wqe->generic, xritag);
+	bf_set(lpfc_wqe_gen_request_tag, &wqe->generic, iocbq->iotag);
+	wqe->generic.abort_tag = abort_tag;
+	bf_set(lpfc_wqe_gen_cmd_type, &wqe->generic, command_type);
+	bf_set(lpfc_wqe_gen_command, &wqe->generic, cmnd);
+	bf_set(lpfc_wqe_gen_class, &wqe->generic, iocbq->iocb.ulpClass);
+	bf_set(lpfc_wqe_gen_cq_id, &wqe->generic, LPFC_WQE_CQ_ID_DEFAULT);
+
+	return 0;
+}
+
+/**
+ * __lpfc_sli_issue_iocb_s4 - SLI4 device lockless ver of lpfc_sli_issue_iocb
+ * @phba: Pointer to HBA context object.
+ * @ring_number: SLI ring number to issue iocb on.
+ * @piocb: Pointer to command iocb.
+ * @flag: Flag indicating if this command can be put into txq.
+ *
+ * __lpfc_sli_issue_iocb_s4 is used by other functions in the driver to issue
+ * an iocb command to an HBA with SLI-4 interface spec.
+ *
+ * This function is called with hbalock held. The function will return success
+ * after it successfully submit the iocb to firmware or after adding to the
+ * txq.
+ **/
+static int
+__lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
+			 struct lpfc_iocbq *piocb, uint32_t flag)
+{
+	struct lpfc_sglq *sglq;
+	uint16_t xritag;
+	union lpfc_wqe wqe;
+	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
+	uint32_t fcp_wqidx;
+
+	if (piocb->sli4_xritag == NO_XRI) {
+		if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
+			piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+			sglq = NULL;
+		else {
+			sglq = __lpfc_sli_get_sglq(phba);
+			if (!sglq)
+				return IOCB_ERROR;
+			piocb->sli4_xritag = sglq->sli4_xritag;
+		}
+	} else if (piocb->iocb_flag &  LPFC_IO_FCP) {
+		sglq = NULL; /* These IO's already have an XRI and
+			      * a mapped sgl.
+			      */
+	} else {
+		/* This is a continuation of a commandi,(CX) so this
+		 * sglq is on the active list
+		 */
+		sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag);
+		if (!sglq)
+			return IOCB_ERROR;
+	}
+
+	if (sglq) {
+		xritag = lpfc_sli4_bpl2sgl(phba, piocb, sglq);
+		if (xritag != sglq->sli4_xritag)
+			return IOCB_ERROR;
+	}
+
+	if (lpfc_sli4_iocb2wqe(phba, piocb, &wqe))
+		return IOCB_ERROR;
+
+	if (piocb->iocb_flag &  LPFC_IO_FCP) {
+		fcp_wqidx = lpfc_sli4_scmd_to_wqidx_distr(phba, piocb);
+		if (lpfc_sli4_wq_put(phba->sli4_hba.fcp_wq[fcp_wqidx], &wqe))
+			return IOCB_ERROR;
+	} else {
+		if (lpfc_sli4_wq_put(phba->sli4_hba.els_wq, &wqe))
+			return IOCB_ERROR;
+	}
+	lpfc_sli_ringtxcmpl_put(phba, pring, piocb);
+
+	return 0;
+}
+
+/**
+ * __lpfc_sli_issue_iocb - Wrapper func of lockless version for issuing iocb
+ *
+ * This routine wraps the actual lockless version for issusing IOCB function
+ * pointer from the lpfc_hba struct.
+ *
+ * Return codes:
+ * 	IOCB_ERROR - Error
+ * 	IOCB_SUCCESS - Success
+ * 	IOCB_BUSY - Busy
+ **/
+static inline int
+__lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
+		struct lpfc_iocbq *piocb, uint32_t flag)
+{
+	return phba->__lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
+}
+
+/**
+ * lpfc_sli_api_table_setup - Set up sli api fucntion jump table
+ * @phba: The hba struct for which this call is being executed.
+ * @dev_grp: The HBA PCI-Device group number.
+ *
+ * This routine sets up the SLI interface API function jump table in @phba
+ * struct.
+ * Returns: 0 - success, -ENODEV - failure.
+ **/
+int
+lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
+{
+
+	switch (dev_grp) {
+	case LPFC_PCI_DEV_LP:
+		phba->__lpfc_sli_issue_iocb = __lpfc_sli_issue_iocb_s3;
+		phba->__lpfc_sli_release_iocbq = __lpfc_sli_release_iocbq_s3;
+		break;
+	case LPFC_PCI_DEV_OC:
+		phba->__lpfc_sli_issue_iocb = __lpfc_sli_issue_iocb_s4;
+		phba->__lpfc_sli_release_iocbq = __lpfc_sli_release_iocbq_s4;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1419 Invalid HBA PCI-device group: 0x%x\n",
+				dev_grp);
+		return -ENODEV;
+		break;
+	}
+	phba->lpfc_get_iocb_from_iocbq = lpfc_get_iocb_from_iocbq;
+	return 0;
+}
+
+/**
+ * lpfc_sli_issue_iocb - Wrapper function for __lpfc_sli_issue_iocb
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @piocb: Pointer to command iocb.
+ * @flag: Flag indicating if this command can be put into txq.
+ *
+ * lpfc_sli_issue_iocb is a wrapper around __lpfc_sli_issue_iocb
+ * function. This function gets the hbalock and calls
+ * __lpfc_sli_issue_iocb function and will return the error returned
+ * by __lpfc_sli_issue_iocb function. This wrapper is used by
+ * functions which do not hold hbalock.
+ **/
+int
+lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
+		    struct lpfc_iocbq *piocb, uint32_t flag)
+{
+	unsigned long iflags;
+	int rc;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	return rc;
+}
+
+/**
+ * lpfc_extra_ring_setup - Extra ring setup function
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called while driver attaches with the
+ * HBA to setup the extra ring. The extra ring is used
+ * only when driver needs to support target mode functionality
+ * or IP over FC functionalities.
+ *
+ * This function is called with no lock held.
+ **/
+static int
+lpfc_extra_ring_setup( struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli;
+	struct lpfc_sli_ring *pring;
+
+	psli = &phba->sli;
+
+	/* Adjust cmd/rsp ring iocb entries more evenly */
+
+	/* Take some away from the FCP ring */
+	pring = &psli->ring[psli->fcp_ring];
+	pring->numCiocb -= SLI2_IOCB_CMD_R1XTRA_ENTRIES;
+	pring->numRiocb -= SLI2_IOCB_RSP_R1XTRA_ENTRIES;
 	pring->numCiocb -= SLI2_IOCB_CMD_R3XTRA_ENTRIES;
 	pring->numRiocb -= SLI2_IOCB_RSP_R3XTRA_ENTRIES;
 
@@ -7152,3 +8076,2461 @@ lpfc_sli_intr_handler(int irq, void *dev_id)
 	/* Return device-level interrupt handling status */
 	return (sp_irq_rc == IRQ_HANDLED) ? sp_irq_rc : fp_irq_rc;
 }  /* lpfc_sli_intr_handler */
+
+/**
+ * lpfc_sli4_fcp_xri_abort_event_proc - Process fcp xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 FCP abort XRI events.
+ **/
+void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the fcp xri abort event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~FCP_XRI_ABORT_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the fcp xri abort events */
+	while (!list_empty(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Notify aborted XRI for FCP work queue */
+		lpfc_sli4_fcp_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
+		/* Free the event processed back to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
+/**
+ * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 els abort xri events.
+ **/
+void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the els xri abort event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~ELS_XRI_ABORT_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the els xri abort events */
+	while (!list_empty(&phba->sli4_hba.sp_els_xri_aborted_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Notify aborted XRI for ELS work queue */
+		lpfc_sli4_els_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
+		/* Free the event processed back to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
+static void
+lpfc_sli4_iocb_param_transfer(struct lpfc_iocbq *pIocbIn,
+			      struct lpfc_iocbq *pIocbOut,
+			      struct lpfc_wcqe_complete *wcqe)
+{
+	size_t offset = offsetof(struct lpfc_iocbq, iocb);
+
+	memcpy((char *)pIocbIn + offset, (char *)pIocbOut + offset,
+	       sizeof(struct lpfc_iocbq) - offset);
+	memset(&pIocbIn->sli4_info, 0,
+	       sizeof(struct lpfc_sli4_rspiocb_info));
+	/* Map WCQE parameters into irspiocb parameters */
+	pIocbIn->iocb.ulpStatus = bf_get(lpfc_wcqe_c_status, wcqe);
+	if (pIocbOut->iocb_flag & LPFC_IO_FCP)
+		if (pIocbIn->iocb.ulpStatus == IOSTAT_FCP_RSP_ERROR)
+			pIocbIn->iocb.un.fcpi.fcpi_parm =
+					pIocbOut->iocb.un.fcpi.fcpi_parm -
+					wcqe->total_data_placed;
+		else
+			pIocbIn->iocb.un.ulpWord[4] = wcqe->parameter;
+	else
+		pIocbIn->iocb.un.ulpWord[4] = wcqe->parameter;
+	/* Load in additional WCQE parameters */
+	pIocbIn->sli4_info.hw_status = bf_get(lpfc_wcqe_c_hw_status, wcqe);
+	pIocbIn->sli4_info.bfield = 0;
+	if (bf_get(lpfc_wcqe_c_xb, wcqe))
+		pIocbIn->sli4_info.bfield |= LPFC_XB;
+	if (bf_get(lpfc_wcqe_c_pv, wcqe)) {
+		pIocbIn->sli4_info.bfield |= LPFC_PV;
+		pIocbIn->sli4_info.priority =
+					bf_get(lpfc_wcqe_c_priority, wcqe);
+	}
+}
+
+/**
+ * lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event
+ * @phba: Pointer to HBA context object.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles an ELS work-queue completion event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_iocbq *cmdiocbq;
+	struct lpfc_iocbq *irspiocbq;
+	unsigned long iflags;
+	bool workposted = false;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	pring->stats.iocb_event++;
+	/* Look up the ELS command IOCB and create pseudo response IOCB */
+	cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring,
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	if (unlikely(!cmdiocbq)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0386 ELS complete with no corresponding "
+				"cmdiocb: iotag (%d)\n",
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+		return workposted;
+	}
+
+	/* Fake the irspiocbq and copy necessary response information */
+	irspiocbq = lpfc_sli_get_iocbq(phba);
+	if (!irspiocbq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0387 Failed to allocate an iocbq\n");
+		return workposted;
+	}
+	lpfc_sli4_iocb_param_transfer(irspiocbq, cmdiocbq, wcqe);
+
+	/* Add the irspiocb to the response IOCB work list */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	list_add_tail(&irspiocbq->list, &phba->sli4_hba.sp_rspiocb_work_queue);
+	/* Indicate ELS ring attention */
+	phba->work_ha |= (HA_R0ATT << (4*LPFC_ELS_RING));
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	workposted = true;
+
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_rel_wcqe - Handle slow-path WQ entry consumed event
+ * @phba: Pointer to HBA context object.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles slow-path WQ entry comsumed event by invoking the
+ * proper WQ release routine to the slow-path WQ.
+ **/
+static void
+lpfc_sli4_sp_handle_rel_wcqe(struct lpfc_hba *phba,
+			     struct lpfc_wcqe_release *wcqe)
+{
+	/* Check for the slow-path ELS work queue */
+	if (bf_get(lpfc_wcqe_r_wq_id, wcqe) == phba->sli4_hba.els_wq->queue_id)
+		lpfc_sli4_wq_release(phba->sli4_hba.els_wq,
+				     bf_get(lpfc_wcqe_r_wqe_index, wcqe));
+	else
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"2579 Slow-path wqe consume event carries "
+				"miss-matched qid: wcqe-qid=x%x, sp-qid=x%x\n",
+				bf_get(lpfc_wcqe_r_wqe_index, wcqe),
+				phba->sli4_hba.els_wq->queue_id);
+}
+
+/**
+ * lpfc_sli4_sp_handle_abort_xri_wcqe - Handle a xri abort event
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to a WQ completion queue.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles an XRI abort event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
+				   struct lpfc_queue *cq,
+				   struct sli4_wcqe_xri_aborted *wcqe)
+{
+	bool workposted = false;
+	struct lpfc_cq_event *cq_event;
+	unsigned long iflags;
+
+	/* Allocate a new internal CQ_EVENT entry */
+	cq_event = lpfc_sli4_cq_event_alloc(phba);
+	if (!cq_event) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0602 Failed to allocate CQ_EVENT entry\n");
+		return false;
+	}
+
+	/* Move the CQE into the proper xri abort event list */
+	memcpy(&cq_event->cqe, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
+	switch (cq->subtype) {
+	case LPFC_FCP:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
+		/* Set the fcp xri abort event flag */
+		phba->hba_flag |= FCP_XRI_ABORT_EVENT;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	case LPFC_ELS:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_els_xri_aborted_work_queue);
+		/* Set the els xri abort event flag */
+		phba->hba_flag |= ELS_XRI_ABORT_EVENT;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0603 Invalid work queue CQE subtype (x%x)\n",
+				cq->subtype);
+		workposted = false;
+		break;
+	}
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_wcqe - Process a work-queue completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to the completion queue.
+ * @wcqe: Pointer to a completion queue entry.
+ *
+ * This routine process a slow-path work-queue completion queue entry.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
+{
+	struct lpfc_wcqe_complete wcqe;
+	bool workposted = false;
+
+	/* Copy the work queue CQE and convert endian order if needed */
+	lpfc_sli_pcimem_bcopy(cqe, &wcqe, sizeof(struct lpfc_cqe));
+
+	/* Check and process for different type of WCQE and dispatch */
+	switch (bf_get(lpfc_wcqe_c_code, &wcqe)) {
+	case CQE_CODE_COMPL_WQE:
+		/* Process the WQ complete event */
+		workposted = lpfc_sli4_sp_handle_els_wcqe(phba,
+					(struct lpfc_wcqe_complete *)&wcqe);
+		break;
+	case CQE_CODE_RELEASE_WQE:
+		/* Process the WQ release event */
+		lpfc_sli4_sp_handle_rel_wcqe(phba,
+					(struct lpfc_wcqe_release *)&wcqe);
+		break;
+	case CQE_CODE_XRI_ABORTED:
+		/* Process the WQ XRI abort event */
+		workposted = lpfc_sli4_sp_handle_abort_xri_wcqe(phba, cq,
+					(struct sli4_wcqe_xri_aborted *)&wcqe);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0388 Not a valid WCQE code: x%x\n",
+				bf_get(lpfc_wcqe_c_code, &wcqe));
+		break;
+	}
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_rcqe - Process a receive-queue completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @rcqe: Pointer to receive-queue completion queue entry.
+ *
+ * This routine process a receive-queue completion queue entry.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+{
+	struct lpfc_rcqe rcqe;
+	bool workposted = false;
+	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
+	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
+	struct hbq_dmabuf *dma_buf;
+	uint32_t status;
+	unsigned long iflags;
+
+	/* Copy the receive queue CQE and convert endian order if needed */
+	lpfc_sli_pcimem_bcopy(cqe, &rcqe, sizeof(struct lpfc_rcqe));
+	lpfc_sli4_rq_release(hrq, drq);
+	if (bf_get(lpfc_rcqe_code, &rcqe) != CQE_CODE_RECEIVE)
+		goto out;
+	if (bf_get(lpfc_rcqe_rq_id, &rcqe) != hrq->queue_id)
+		goto out;
+
+	status = bf_get(lpfc_rcqe_status, &rcqe);
+	switch (status) {
+	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2537 Receive Frame Truncated!!\n");
+	case FC_STATUS_RQ_SUCCESS:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		dma_buf = lpfc_sli_hbqbuf_get(&phba->hbqs[0].hbq_buffer_list);
+		if (!dma_buf) {
+			spin_unlock_irqrestore(&phba->hbalock, iflags);
+			goto out;
+		}
+		memcpy(&dma_buf->rcqe, &rcqe, sizeof(rcqe));
+		/* save off the frame for the word thread to process */
+		list_add_tail(&dma_buf->dbuf.list, &phba->rb_pend_list);
+		/* Frame received */
+		phba->hba_flag |= HBA_RECEIVE_BUFFER;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
+	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		/* Post more buffers if possible */
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	}
+out:
+	return workposted;
+
+}
+
+/**
+ * lpfc_sli4_sp_handle_eqe - Process a slow-path event queue entry
+ * @phba: Pointer to HBA context object.
+ * @eqe: Pointer to fast-path event queue entry.
+ *
+ * This routine process a event queue entry from the slow-path event queue.
+ * It will check the MajorCode and MinorCode to determine this is for a
+ * completion event on a completion queue, if not, an error shall be logged
+ * and just return. Otherwise, it will get to the corresponding completion
+ * queue and process all the entries on that completion queue, rearm the
+ * completion queue, and then return.
+ *
+ **/
+static void
+lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
+{
+	struct lpfc_queue *cq = NULL, *childq, *speq;
+	struct lpfc_cqe *cqe;
+	bool workposted = false;
+	int ecount = 0;
+	uint16_t cqid;
+
+	if (bf_get(lpfc_eqe_major_code, eqe) != 0 ||
+	    bf_get(lpfc_eqe_minor_code, eqe) != 0) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0359 Not a valid slow-path completion "
+				"event: majorcode=x%x, minorcode=x%x\n",
+				bf_get(lpfc_eqe_major_code, eqe),
+				bf_get(lpfc_eqe_minor_code, eqe));
+		return;
+	}
+
+	/* Get the reference to the corresponding CQ */
+	cqid = bf_get(lpfc_eqe_resource_id, eqe);
+
+	/* Search for completion queue pointer matching this cqid */
+	speq = phba->sli4_hba.sp_eq;
+	list_for_each_entry(childq, &speq->child_list, list) {
+		if (childq->queue_id == cqid) {
+			cq = childq;
+			break;
+		}
+	}
+	if (unlikely(!cq)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0365 Slow-path CQ identifier (%d) does "
+				"not exist\n", cqid);
+		return;
+	}
+
+	/* Process all the entries to the CQ */
+	switch (cq->type) {
+	case LPFC_MCQ:
+		while ((cqe = lpfc_sli4_cq_get(cq))) {
+			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
+			if (!(++ecount % LPFC_GET_QE_REL_INT))
+				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+		}
+		break;
+	case LPFC_WCQ:
+		while ((cqe = lpfc_sli4_cq_get(cq))) {
+			workposted |= lpfc_sli4_sp_handle_wcqe(phba, cq, cqe);
+			if (!(++ecount % LPFC_GET_QE_REL_INT))
+				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+		}
+		break;
+	case LPFC_RCQ:
+		while ((cqe = lpfc_sli4_cq_get(cq))) {
+			workposted |= lpfc_sli4_sp_handle_rcqe(phba, cqe);
+			if (!(++ecount % LPFC_GET_QE_REL_INT))
+				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+		}
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0370 Invalid completion queue type (%d)\n",
+				cq->type);
+		return;
+	}
+
+	/* Catch the no cq entry condition, log an error */
+	if (unlikely(ecount == 0))
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0371 No entry from the CQ: identifier "
+				"(x%x), type (%d)\n", cq->queue_id, cq->type);
+
+	/* In any case, flash and re-arm the RCQ */
+	lpfc_sli4_cq_release(cq, LPFC_QUEUE_REARM);
+
+	/* wake up worker thread if there are works to be done */
+	if (workposted)
+		lpfc_worker_wake_up(phba);
+}
+
+/**
+ * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
+ * @eqe: Pointer to fast-path completion queue entry.
+ *
+ * This routine process a fast-path work queue completion entry from fast-path
+ * event queue for FCP command response completion.
+ **/
+static void
+lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_FCP_RING];
+	struct lpfc_iocbq *cmdiocbq;
+	struct lpfc_iocbq irspiocbq;
+	unsigned long iflags;
+
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	pring->stats.iocb_event++;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	/* Check for response status */
+	if (unlikely(bf_get(lpfc_wcqe_c_status, wcqe))) {
+		/* If resource errors reported from HBA, reduce queue
+		 * depth of the SCSI device.
+		 */
+		if ((bf_get(lpfc_wcqe_c_status, wcqe) ==
+		     IOSTAT_LOCAL_REJECT) &&
+		    (wcqe->parameter == IOERR_NO_RESOURCES)) {
+			phba->lpfc_rampdown_queue_depth(phba);
+		}
+		/* Log the error status */
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0373 FCP complete error: status=x%x, "
+				"hw_status=x%x, total_data_specified=%d, "
+				"parameter=x%x, word3=x%x\n",
+				bf_get(lpfc_wcqe_c_status, wcqe),
+				bf_get(lpfc_wcqe_c_hw_status, wcqe),
+				wcqe->total_data_placed, wcqe->parameter,
+				wcqe->word3);
+	}
+
+	/* Look up the FCP command IOCB and create pseudo response IOCB */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring,
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	if (unlikely(!cmdiocbq)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0374 FCP complete with no corresponding "
+				"cmdiocb: iotag (%d)\n",
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+		return;
+	}
+	if (unlikely(!cmdiocbq->iocb_cmpl)) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"0375 FCP cmdiocb not callback function "
+				"iotag: (%d)\n",
+				bf_get(lpfc_wcqe_c_request_tag, wcqe));
+		return;
+	}
+
+	/* Fake the irspiocb and copy necessary response information */
+	lpfc_sli4_iocb_param_transfer(&irspiocbq, cmdiocbq, wcqe);
+
+	/* Pass the cmd_iocb and the rsp state to the upper layer */
+	(cmdiocbq->iocb_cmpl)(phba, cmdiocbq, &irspiocbq);
+}
+
+/**
+ * lpfc_sli4_fp_handle_rel_wcqe - Handle fast-path WQ entry consumed event
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to completion queue.
+ * @wcqe: Pointer to work-queue completion queue entry.
+ *
+ * This routine handles an fast-path WQ entry comsumed event by invoking the
+ * proper WQ release routine to the slow-path WQ.
+ **/
+static void
+lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			     struct lpfc_wcqe_release *wcqe)
+{
+	struct lpfc_queue *childwq;
+	bool wqid_matched = false;
+	uint16_t fcp_wqid;
+
+	/* Check for fast-path FCP work queue release */
+	fcp_wqid = bf_get(lpfc_wcqe_r_wq_id, wcqe);
+	list_for_each_entry(childwq, &cq->child_list, list) {
+		if (childwq->queue_id == fcp_wqid) {
+			lpfc_sli4_wq_release(childwq,
+					bf_get(lpfc_wcqe_r_wqe_index, wcqe));
+			wqid_matched = true;
+			break;
+		}
+	}
+	/* Report warning log message if no match found */
+	if (wqid_matched != true)
+		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+				"2580 Fast-path wqe consume event carries "
+				"miss-matched qid: wcqe-qid=x%x\n", fcp_wqid);
+}
+
+/**
+ * lpfc_sli4_fp_handle_wcqe - Process fast-path work queue completion entry
+ * @cq: Pointer to the completion queue.
+ * @eqe: Pointer to fast-path completion queue entry.
+ *
+ * This routine process a fast-path work queue completion entry from fast-path
+ * event queue for FCP command response completion.
+ **/
+static int
+lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
+{
+	struct lpfc_wcqe_release wcqe;
+	bool workposted = false;
+
+	/* Copy the work queue CQE and convert endian order if needed */
+	lpfc_sli_pcimem_bcopy(cqe, &wcqe, sizeof(struct lpfc_cqe));
+
+	/* Check and process for different type of WCQE and dispatch */
+	switch (bf_get(lpfc_wcqe_c_code, &wcqe)) {
+	case CQE_CODE_COMPL_WQE:
+		/* Process the WQ complete event */
+		lpfc_sli4_fp_handle_fcp_wcqe(phba,
+				(struct lpfc_wcqe_complete *)&wcqe);
+		break;
+	case CQE_CODE_RELEASE_WQE:
+		/* Process the WQ release event */
+		lpfc_sli4_fp_handle_rel_wcqe(phba, cq,
+				(struct lpfc_wcqe_release *)&wcqe);
+		break;
+	case CQE_CODE_XRI_ABORTED:
+		/* Process the WQ XRI abort event */
+		workposted = lpfc_sli4_sp_handle_abort_xri_wcqe(phba, cq,
+				(struct sli4_wcqe_xri_aborted *)&wcqe);
+		break;
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0144 Not a valid WCQE code: x%x\n",
+				bf_get(lpfc_wcqe_c_code, &wcqe));
+		break;
+	}
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_fp_handle_eqe - Process a fast-path event queue entry
+ * @phba: Pointer to HBA context object.
+ * @eqe: Pointer to fast-path event queue entry.
+ *
+ * This routine process a event queue entry from the fast-path event queue.
+ * It will check the MajorCode and MinorCode to determine this is for a
+ * completion event on a completion queue, if not, an error shall be logged
+ * and just return. Otherwise, it will get to the corresponding completion
+ * queue and process all the entries on the completion queue, rearm the
+ * completion queue, and then return.
+ **/
+static void
+lpfc_sli4_fp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
+			uint32_t fcp_cqidx)
+{
+	struct lpfc_queue *cq;
+	struct lpfc_cqe *cqe;
+	bool workposted = false;
+	uint16_t cqid;
+	int ecount = 0;
+
+	if (unlikely(bf_get(lpfc_eqe_major_code, eqe) != 0) ||
+	    unlikely(bf_get(lpfc_eqe_minor_code, eqe) != 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0366 Not a valid fast-path completion "
+				"event: majorcode=x%x, minorcode=x%x\n",
+				bf_get(lpfc_eqe_major_code, eqe),
+				bf_get(lpfc_eqe_minor_code, eqe));
+		return;
+	}
+
+	cq = phba->sli4_hba.fcp_cq[fcp_cqidx];
+	if (unlikely(!cq)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0367 Fast-path completion queue does not "
+				"exist\n");
+		return;
+	}
+
+	/* Get the reference to the corresponding CQ */
+	cqid = bf_get(lpfc_eqe_resource_id, eqe);
+	if (unlikely(cqid != cq->queue_id)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0368 Miss-matched fast-path completion "
+				"queue identifier: eqcqid=%d, fcpcqid=%d\n",
+				cqid, cq->queue_id);
+		return;
+	}
+
+	/* Process all the entries to the CQ */
+	while ((cqe = lpfc_sli4_cq_get(cq))) {
+		workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq, cqe);
+		if (!(++ecount % LPFC_GET_QE_REL_INT))
+			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+	}
+
+	/* Catch the no cq entry condition */
+	if (unlikely(ecount == 0))
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0369 No entry from fast-path completion "
+				"queue fcpcqid=%d\n", cq->queue_id);
+
+	/* In any case, flash and re-arm the CQ */
+	lpfc_sli4_cq_release(cq, LPFC_QUEUE_REARM);
+
+	/* wake up worker thread if there are works to be done */
+	if (workposted)
+		lpfc_worker_wake_up(phba);
+}
+
+static void
+lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+
+	/* walk all the EQ entries and drop on the floor */
+	while ((eqe = lpfc_sli4_eq_get(eq)))
+		;
+
+	/* Clear and re-arm the EQ */
+	lpfc_sli4_eq_release(eq, LPFC_QUEUE_REARM);
+}
+
+/**
+ * lpfc_sli4_sp_intr_handler - Slow-path interrupt handler to SLI-4 device
+ * @irq: Interrupt number.
+ * @dev_id: The device context pointer.
+ *
+ * This function is directly called from the PCI layer as an interrupt
+ * service routine when device with SLI-4 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there are slow-path events in
+ * the HBA. However, when the device is enabled with either MSI or Pin-IRQ
+ * interrupt mode, this function is called as part of the device-level
+ * interrupt handler. When the PCI slot is in error recovery or the HBA is
+ * undergoing initialization, the interrupt handler will not process the
+ * interrupt. The link attention and ELS ring attention events are handled
+ * by the worker thread. The interrupt handler signals the worker thread
+ * and returns for these events. This function is called without any lock
+ * held. It gets the hbalock to access and update SLI data structures.
+ *
+ * This function returns IRQ_HANDLED when interrupt is handled else it
+ * returns IRQ_NONE.
+ **/
+irqreturn_t
+lpfc_sli4_sp_intr_handler(int irq, void *dev_id)
+{
+	struct lpfc_hba *phba;
+	struct lpfc_queue *speq;
+	struct lpfc_eqe *eqe;
+	unsigned long iflag;
+	int ecount = 0;
+
+	/*
+	 * Get the driver's phba structure from the dev_id
+	 */
+	phba = (struct lpfc_hba *)dev_id;
+
+	if (unlikely(!phba))
+		return IRQ_NONE;
+
+	/* Get to the EQ struct associated with this vector */
+	speq = phba->sli4_hba.sp_eq;
+
+	/* Check device state for handling interrupt */
+	if (unlikely(lpfc_intr_state_check(phba))) {
+		/* Check again for link_state with lock held */
+		spin_lock_irqsave(&phba->hbalock, iflag);
+		if (phba->link_state < LPFC_LINK_DOWN)
+			/* Flush, clear interrupt, and rearm the EQ */
+			lpfc_sli4_eq_flush(phba, speq);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Process all the event on FCP slow-path EQ
+	 */
+	while ((eqe = lpfc_sli4_eq_get(speq))) {
+		lpfc_sli4_sp_handle_eqe(phba, eqe);
+		if (!(++ecount % LPFC_GET_QE_REL_INT))
+			lpfc_sli4_eq_release(speq, LPFC_QUEUE_NOARM);
+	}
+
+	/* Always clear and re-arm the slow-path EQ */
+	lpfc_sli4_eq_release(speq, LPFC_QUEUE_REARM);
+
+	/* Catch the no cq entry condition */
+	if (unlikely(ecount == 0)) {
+		if (phba->intr_type == MSIX)
+			/* MSI-X treated interrupt served as no EQ share INT */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"0357 MSI-X interrupt with no EQE\n");
+		else
+			/* Non MSI-X treated on interrupt as EQ share INT */
+			return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+} /* lpfc_sli4_sp_intr_handler */
+
+/**
+ * lpfc_sli4_fp_intr_handler - Fast-path interrupt handler to SLI-4 device
+ * @irq: Interrupt number.
+ * @dev_id: The device context pointer.
+ *
+ * This function is directly called from the PCI layer as an interrupt
+ * service routine when device with SLI-4 interface spec is enabled with
+ * MSI-X multi-message interrupt mode and there is a fast-path FCP IOCB
+ * ring event in the HBA. However, when the device is enabled with either
+ * MSI or Pin-IRQ interrupt mode, this function is called as part of the
+ * device-level interrupt handler. When the PCI slot is in error recovery
+ * or the HBA is undergoing initialization, the interrupt handler will not
+ * process the interrupt. The SCSI FCP fast-path ring event are handled in
+ * the intrrupt context. This function is called without any lock held.
+ * It gets the hbalock to access and update SLI data structures. Note that,
+ * the FCP EQ to FCP CQ are one-to-one map such that the FCP EQ index is
+ * equal to that of FCP CQ index.
+ *
+ * This function returns IRQ_HANDLED when interrupt is handled else it
+ * returns IRQ_NONE.
+ **/
+irqreturn_t
+lpfc_sli4_fp_intr_handler(int irq, void *dev_id)
+{
+	struct lpfc_hba *phba;
+	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_queue *fpeq;
+	struct lpfc_eqe *eqe;
+	unsigned long iflag;
+	int ecount = 0;
+	uint32_t fcp_eqidx;
+
+	/* Get the driver's phba structure from the dev_id */
+	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
+	phba = fcp_eq_hdl->phba;
+	fcp_eqidx = fcp_eq_hdl->idx;
+
+	if (unlikely(!phba))
+		return IRQ_NONE;
+
+	/* Get to the EQ struct associated with this vector */
+	fpeq = phba->sli4_hba.fp_eq[fcp_eqidx];
+
+	/* Check device state for handling interrupt */
+	if (unlikely(lpfc_intr_state_check(phba))) {
+		/* Check again for link_state with lock held */
+		spin_lock_irqsave(&phba->hbalock, iflag);
+		if (phba->link_state < LPFC_LINK_DOWN)
+			/* Flush, clear interrupt, and rearm the EQ */
+			lpfc_sli4_eq_flush(phba, fpeq);
+		spin_unlock_irqrestore(&phba->hbalock, iflag);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * Process all the event on FCP fast-path EQ
+	 */
+	while ((eqe = lpfc_sli4_eq_get(fpeq))) {
+		lpfc_sli4_fp_handle_eqe(phba, eqe, fcp_eqidx);
+		if (!(++ecount % LPFC_GET_QE_REL_INT))
+			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
+	}
+
+	/* Always clear and re-arm the fast-path EQ */
+	lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+
+	if (unlikely(ecount == 0)) {
+		if (phba->intr_type == MSIX)
+			/* MSI-X treated interrupt served as no EQ share INT */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"0358 MSI-X interrupt with no EQE\n");
+		else
+			/* Non MSI-X treated on interrupt as EQ share INT */
+			return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+} /* lpfc_sli4_fp_intr_handler */
+
+/**
+ * lpfc_sli4_intr_handler - Device-level interrupt handler for SLI-4 device
+ * @irq: Interrupt number.
+ * @dev_id: The device context pointer.
+ *
+ * This function is the device-level interrupt handler to device with SLI-4
+ * interface spec, called from the PCI layer when either MSI or Pin-IRQ
+ * interrupt mode is enabled and there is an event in the HBA which requires
+ * driver attention. This function invokes the slow-path interrupt attention
+ * handling function and fast-path interrupt attention handling function in
+ * turn to process the relevant HBA attention events. This function is called
+ * without any lock held. It gets the hbalock to access and update SLI data
+ * structures.
+ *
+ * This function returns IRQ_HANDLED when interrupt is handled, else it
+ * returns IRQ_NONE.
+ **/
+irqreturn_t
+lpfc_sli4_intr_handler(int irq, void *dev_id)
+{
+	struct lpfc_hba  *phba;
+	irqreturn_t sp_irq_rc, fp_irq_rc;
+	bool fp_handled = false;
+	uint32_t fcp_eqidx;
+
+	/* Get the driver's phba structure from the dev_id */
+	phba = (struct lpfc_hba *)dev_id;
+
+	if (unlikely(!phba))
+		return IRQ_NONE;
+
+	/*
+	 * Invokes slow-path host attention interrupt handling as appropriate.
+	 */
+	sp_irq_rc = lpfc_sli4_sp_intr_handler(irq, dev_id);
+
+	/*
+	 * Invoke fast-path host attention interrupt handling as appropriate.
+	 */
+	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_eq_count; fcp_eqidx++) {
+		fp_irq_rc = lpfc_sli4_fp_intr_handler(irq,
+					&phba->sli4_hba.fcp_eq_hdl[fcp_eqidx]);
+		if (fp_irq_rc == IRQ_HANDLED)
+			fp_handled |= true;
+	}
+
+	return (fp_handled == true) ? IRQ_HANDLED : sp_irq_rc;
+} /* lpfc_sli4_intr_handler */
+
+/**
+ * lpfc_sli4_queue_free - free a queue structure and associated memory
+ * @queue: The queue structure to free.
+ *
+ * This function frees a queue structure and the DMAable memeory used for
+ * the host resident queue. This function must be called after destroying the
+ * queue on the HBA.
+ **/
+void
+lpfc_sli4_queue_free(struct lpfc_queue *queue)
+{
+	struct lpfc_dmabuf *dmabuf;
+
+	if (!queue)
+		return;
+
+	while (!list_empty(&queue->page_list)) {
+		list_remove_head(&queue->page_list, dmabuf, struct lpfc_dmabuf,
+				 list);
+		dma_free_coherent(&queue->phba->pcidev->dev, PAGE_SIZE,
+				  dmabuf->virt, dmabuf->phys);
+		kfree(dmabuf);
+	}
+	kfree(queue);
+	return;
+}
+
+/**
+ * lpfc_sli4_queue_alloc - Allocate and initialize a queue structure
+ * @phba: The HBA that this queue is being created on.
+ * @entry_size: The size of each queue entry for this queue.
+ * @entry count: The number of entries that this queue will handle.
+ *
+ * This function allocates a queue structure and the DMAable memory used for
+ * the host resident queue. This function must be called before creating the
+ * queue on the HBA.
+ **/
+struct lpfc_queue *
+lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
+		      uint32_t entry_count)
+{
+	struct lpfc_queue *queue;
+	struct lpfc_dmabuf *dmabuf;
+	int x, total_qe_count;
+	void *dma_pointer;
+
+
+	queue = kzalloc(sizeof(struct lpfc_queue) +
+			(sizeof(union sli4_qe) * entry_count), GFP_KERNEL);
+	if (!queue)
+		return NULL;
+	queue->page_count = (PAGE_ALIGN(entry_size * entry_count))/PAGE_SIZE;
+	INIT_LIST_HEAD(&queue->list);
+	INIT_LIST_HEAD(&queue->page_list);
+	INIT_LIST_HEAD(&queue->child_list);
+	for (x = 0, total_qe_count = 0; x < queue->page_count; x++) {
+		dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+		if (!dmabuf)
+			goto out_fail;
+		dmabuf->virt = dma_alloc_coherent(&phba->pcidev->dev,
+						  PAGE_SIZE, &dmabuf->phys,
+						  GFP_KERNEL);
+		if (!dmabuf->virt) {
+			kfree(dmabuf);
+			goto out_fail;
+		}
+		dmabuf->buffer_tag = x;
+		list_add_tail(&dmabuf->list, &queue->page_list);
+		/* initialize queue's entry array */
+		dma_pointer = dmabuf->virt;
+		for (; total_qe_count < entry_count &&
+		     dma_pointer < (PAGE_SIZE + dmabuf->virt);
+		     total_qe_count++, dma_pointer += entry_size) {
+			queue->qe[total_qe_count].address = dma_pointer;
+		}
+	}
+	queue->entry_size = entry_size;
+	queue->entry_count = entry_count;
+	queue->phba = phba;
+
+	return queue;
+out_fail:
+	lpfc_sli4_queue_free(queue);
+	return NULL;
+}
+
+/**
+ * lpfc_eq_create - Create an Event Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @eq: The queue structure to use to create the event queue.
+ * @imax: The maximum interrupt per second limit.
+ *
+ * This function creates an event queue, as detailed in @eq, on a port,
+ * described by @phba by sending an EQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @eq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. This
+ * function will send the EQ_CREATE mailbox command to the HBA to setup the
+ * event queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint16_t imax)
+{
+	struct lpfc_mbx_eq_create *eq_create;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	struct lpfc_dmabuf *dmabuf;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint16_t dmult;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_eq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_EQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	eq_create = &mbox->u.mqe.un.eq_create;
+	bf_set(lpfc_mbx_eq_create_num_pages, &eq_create->u.request,
+	       eq->page_count);
+	bf_set(lpfc_eq_context_size, &eq_create->u.request.context,
+	       LPFC_EQE_SIZE);
+	bf_set(lpfc_eq_context_valid, &eq_create->u.request.context, 1);
+	/* Calculate delay multiper from maximum interrupt per second */
+	dmult = LPFC_DMULT_CONST/imax - 1;
+	bf_set(lpfc_eq_context_delay_multi, &eq_create->u.request.context,
+	       dmult);
+	switch (eq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0360 Unsupported EQ count. (%d)\n",
+				eq->entry_count);
+		if (eq->entry_count < 256)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 256:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_256);
+		break;
+	case 512:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_512);
+		break;
+	case 1024:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_1024);
+		break;
+	case 2048:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_2048);
+		break;
+	case 4096:
+		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
+		       LPFC_EQ_CNT_4096);
+		break;
+	}
+	list_for_each_entry(dmabuf, &eq->page_list, list) {
+		eq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		eq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	mbox->context1 = NULL;
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *) &eq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2500 EQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	eq->type = LPFC_EQ;
+	eq->subtype = LPFC_NONE;
+	eq->queue_id = bf_get(lpfc_mbx_eq_create_q_id, &eq_create->u.response);
+	if (eq->queue_id == 0xFFFF)
+		status = -ENXIO;
+	eq->host_index = 0;
+	eq->hba_index = 0;
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_cq_create - Create a Completion Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @cq: The queue structure to use to create the completion queue.
+ * @eq: The event queue to bind this completion queue to.
+ *
+ * This function creates a completion queue, as detailed in @wq, on a port,
+ * described by @phba by sending a CQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @cq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. The @eq
+ * is used to indicate which event queue to bind this completion queue to. This
+ * function will send the CQ_CREATE mailbox command to the HBA to setup the
+ * completion queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
+	       struct lpfc_queue *eq, uint32_t type, uint32_t subtype)
+{
+	struct lpfc_mbx_cq_create *cq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_cq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_CQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	cq_create = &mbox->u.mqe.un.cq_create;
+	bf_set(lpfc_mbx_cq_create_num_pages, &cq_create->u.request,
+		    cq->page_count);
+	bf_set(lpfc_cq_context_event, &cq_create->u.request.context, 1);
+	bf_set(lpfc_cq_context_valid, &cq_create->u.request.context, 1);
+	bf_set(lpfc_cq_eq_id, &cq_create->u.request.context, eq->queue_id);
+	switch (cq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0361 Unsupported CQ count. (%d)\n",
+				cq->entry_count);
+		if (cq->entry_count < 256)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 256:
+		bf_set(lpfc_cq_context_count, &cq_create->u.request.context,
+		       LPFC_CQ_CNT_256);
+		break;
+	case 512:
+		bf_set(lpfc_cq_context_count, &cq_create->u.request.context,
+		       LPFC_CQ_CNT_512);
+		break;
+	case 1024:
+		bf_set(lpfc_cq_context_count, &cq_create->u.request.context,
+		       LPFC_CQ_CNT_1024);
+		break;
+	}
+	list_for_each_entry(dmabuf, &cq->page_list, list) {
+		cq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		cq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &cq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2501 CQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
+	if (cq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	/* link the cq onto the parent eq child list */
+	list_add_tail(&cq->list, &eq->child_list);
+	/* Set up completion queue's type and subtype */
+	cq->type = type;
+	cq->subtype = subtype;
+	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
+	cq->host_index = 0;
+	cq->hba_index = 0;
+out:
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_wq_create - Create a Work Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @wq: The queue structure to use to create the work queue.
+ * @cq: The completion queue to bind this work queue to.
+ * @subtype: The subtype of the work queue indicating its functionality.
+ *
+ * This function creates a work queue, as detailed in @wq, on a port, described
+ * by @phba by sending a WQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @wq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. The @cq
+ * is used to indicate which completion queue to bind this work queue to. This
+ * function will send the WQ_CREATE mailbox command to the HBA to setup the
+ * work queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
+	       struct lpfc_queue *cq, uint32_t subtype)
+{
+	struct lpfc_mbx_wq_create *wq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_wq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_WQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	wq_create = &mbox->u.mqe.un.wq_create;
+	bf_set(lpfc_mbx_wq_create_num_pages, &wq_create->u.request,
+		    wq->page_count);
+	bf_set(lpfc_mbx_wq_create_cq_id, &wq_create->u.request,
+		    cq->queue_id);
+	list_for_each_entry(dmabuf, &wq->page_list, list) {
+		wq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		wq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &wq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2503 WQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	wq->queue_id = bf_get(lpfc_mbx_wq_create_q_id, &wq_create->u.response);
+	if (wq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	wq->type = LPFC_WQ;
+	wq->subtype = subtype;
+	wq->host_index = 0;
+	wq->hba_index = 0;
+
+	/* link the wq onto the parent cq child list */
+	list_add_tail(&wq->list, &cq->child_list);
+out:
+	if (rc == MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_rq_create - Create a Receive Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @hrq: The queue structure to use to create the header receive queue.
+ * @drq: The queue structure to use to create the data receive queue.
+ * @cq: The completion queue to bind this work queue to.
+ *
+ * This function creates a receive buffer queue pair , as detailed in @hrq and
+ * @drq, on a port, described by @phba by sending a RQ_CREATE mailbox command
+ * to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @drq and @hrq
+ * struct is used to get the entry count that is necessary to determine the
+ * number of pages to use for this queue. The @cq is used to indicate which
+ * completion queue to bind received buffers that are posted to these queues to.
+ * This function will send the RQ_CREATE mailbox command to the HBA to setup the
+ * receive queue pair. This function is asynchronous and will wait for the
+ * mailbox command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+	       struct lpfc_queue *drq, struct lpfc_queue *cq, uint32_t subtype)
+{
+	struct lpfc_mbx_rq_create *rq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (hrq->entry_count != drq->entry_count)
+		return -EINVAL;
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_rq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_RQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	rq_create = &mbox->u.mqe.un.rq_create;
+	switch (hrq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2535 Unsupported RQ count. (%d)\n",
+				hrq->entry_count);
+		if (hrq->entry_count < 512)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 512:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_512);
+		break;
+	case 1024:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_1024);
+		break;
+	case 2048:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_2048);
+		break;
+	case 4096:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_4096);
+		break;
+	}
+	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
+	       cq->queue_id);
+	bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request,
+	       hrq->page_count);
+	bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
+	       LPFC_HDR_BUF_SIZE);
+	list_for_each_entry(dmabuf, &hrq->page_list, list) {
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2504 RQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	hrq->queue_id = bf_get(lpfc_mbx_rq_create_q_id, &rq_create->u.response);
+	if (hrq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	hrq->type = LPFC_HRQ;
+	hrq->subtype = subtype;
+	hrq->host_index = 0;
+	hrq->hba_index = 0;
+
+	/* now create the data queue */
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_RQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	switch (drq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2536 Unsupported RQ count. (%d)\n",
+				drq->entry_count);
+		if (drq->entry_count < 512)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 512:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_512);
+		break;
+	case 1024:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_1024);
+		break;
+	case 2048:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_2048);
+		break;
+	case 4096:
+		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
+		       LPFC_RQ_RING_SIZE_4096);
+		break;
+	}
+	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
+	       cq->queue_id);
+	bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request,
+	       drq->page_count);
+	bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
+	       LPFC_DATA_BUF_SIZE);
+	list_for_each_entry(dmabuf, &drq->page_list, list) {
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		status = -ENXIO;
+		goto out;
+	}
+	drq->queue_id = bf_get(lpfc_mbx_rq_create_q_id, &rq_create->u.response);
+	if (drq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	drq->type = LPFC_DRQ;
+	drq->subtype = subtype;
+	drq->host_index = 0;
+	drq->hba_index = 0;
+
+	/* link the header and data RQs onto the parent cq child list */
+	list_add_tail(&hrq->list, &cq->child_list);
+	list_add_tail(&drq->list, &cq->child_list);
+
+out:
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_eq_destroy - Destroy an event Queue on the HBA
+ * @eq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @eq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @eq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_eq_destroy(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!eq)
+		return -ENODEV;
+	mbox = mempool_alloc(eq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_eq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_EQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_eq_destroy_q_id, &mbox->u.mqe.un.eq_destroy.u.request,
+	       eq->queue_id);
+	mbox->vport = eq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+
+	rc = lpfc_sli_issue_mbox(eq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.eq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2505 EQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+
+	/* Remove eq from any list */
+	list_del_init(&eq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, eq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_cq_destroy - Destroy a Completion Queue on the HBA
+ * @cq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @cq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @cq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_cq_destroy(struct lpfc_hba *phba, struct lpfc_queue *cq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!cq)
+		return -ENODEV;
+	mbox = mempool_alloc(cq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_cq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_CQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_cq_destroy_q_id, &mbox->u.mqe.un.cq_destroy.u.request,
+	       cq->queue_id);
+	mbox->vport = cq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(cq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.wq_create.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2506 CQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	/* Remove cq from any list */
+	list_del_init(&cq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, cq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_wq_destroy - Destroy a Work Queue on the HBA
+ * @wq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @wq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @wq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!wq)
+		return -ENODEV;
+	mbox = mempool_alloc(wq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_wq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_WQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_wq_destroy_q_id, &mbox->u.mqe.un.wq_destroy.u.request,
+	       wq->queue_id);
+	mbox->vport = wq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(wq->phba, mbox, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.wq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2508 WQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	/* Remove wq from any list */
+	list_del_init(&wq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, wq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_rq_destroy - Destroy a Receive Queue on the HBA
+ * @rq: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @rq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @rq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_rq_destroy(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		struct lpfc_queue *drq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!hrq || !drq)
+		return -ENODEV;
+	mbox = mempool_alloc(hrq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_rq_destroy) -
+		  sizeof(struct mbox_header));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_RQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_rq_destroy_q_id, &mbox->u.mqe.un.rq_destroy.u.request,
+	       hrq->queue_id);
+	mbox->vport = hrq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(hrq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.rq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2509 RQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		if (rc != MBX_TIMEOUT)
+			mempool_free(mbox, hrq->phba->mbox_mem_pool);
+		return -ENXIO;
+	}
+	bf_set(lpfc_mbx_rq_destroy_q_id, &mbox->u.mqe.un.rq_destroy.u.request,
+	       drq->queue_id);
+	rc = lpfc_sli_issue_mbox(drq->phba, mbox, MBX_POLL);
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.rq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2510 RQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	list_del_init(&hrq->list);
+	list_del_init(&drq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, hrq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
+ * lpfc_sli4_post_sgl - Post scatter gather list for an XRI to HBA
+ * @phba: The virtual port for which this call being executed.
+ * @pdma_phys_addr0: Physical address of the 1st SGL page.
+ * @pdma_phys_addr1: Physical address of the 2nd SGL page.
+ * @xritag: the xritag that ties this io to the SGL pages.
+ *
+ * This routine will post the sgl pages for the IO that has the xritag
+ * that is in the iocbq structure. The xritag is assigned during iocbq
+ * creation and persists for as long as the driver is loaded.
+ * if the caller has fewer than 256 scatter gather segments to map then
+ * pdma_phys_addr1 should be 0.
+ * If the caller needs to map more than 256 scatter gather segment then
+ * pdma_phys_addr1 should be a valid physical address.
+ * physical address for SGLs must be 64 byte aligned.
+ * If you are going to map 2 SGL's then the first one must have 256 entries
+ * the second sgl can have between 1 and 256 entries.
+ *
+ * Return codes:
+ * 	0 - Success
+ * 	-ENXIO, -ENOMEM - Failure
+ **/
+int
+lpfc_sli4_post_sgl(struct lpfc_hba *phba,
+		dma_addr_t pdma_phys_addr0,
+		dma_addr_t pdma_phys_addr1,
+		uint16_t xritag)
+{
+	struct lpfc_mbx_post_sgl_pages *post_sgl_pages;
+	LPFC_MBOXQ_t *mbox;
+	int rc;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (xritag == NO_XRI) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0364 Invalid param:\n");
+		return -EINVAL;
+	}
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES,
+			sizeof(struct lpfc_mbx_post_sgl_pages) -
+			sizeof(struct mbox_header), LPFC_SLI4_MBX_EMBED);
+
+	post_sgl_pages = (struct lpfc_mbx_post_sgl_pages *)
+				&mbox->u.mqe.un.post_sgl_pages;
+	bf_set(lpfc_post_sgl_pages_xri, post_sgl_pages, xritag);
+	bf_set(lpfc_post_sgl_pages_xricnt, post_sgl_pages, 1);
+
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg0_addr_lo	=
+				cpu_to_le32(putPaddrLow(pdma_phys_addr0));
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg0_addr_hi =
+				cpu_to_le32(putPaddrHigh(pdma_phys_addr0));
+
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg1_addr_lo	=
+				cpu_to_le32(putPaddrLow(pdma_phys_addr1));
+	post_sgl_pages->sgl_pg_pairs[0].sgl_pg1_addr_hi =
+				cpu_to_le32(putPaddrHigh(pdma_phys_addr1));
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &post_sgl_pages->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2511 POST_SGL mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return 0;
+}
+/**
+ * lpfc_sli4_remove_all_sgl_pages - Post scatter gather list for an XRI to HBA
+ * @phba: The virtual port for which this call being executed.
+ *
+ * This routine will remove all of the sgl pages registered with the hba.
+ *
+ * Return codes:
+ * 	0 - Success
+ * 	-ENXIO, -ENOMEM - Failure
+ **/
+int
+lpfc_sli4_remove_all_sgl_pages(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			LPFC_MBOX_OPCODE_FCOE_REMOVE_SGL_PAGES, 0,
+			LPFC_SLI4_MBX_EMBED);
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2512 REMOVE_ALL_SGL_PAGES mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_next_xritag - Get an xritag for the io
+ * @phba: Pointer to HBA context object.
+ *
+ * This function gets an xritag for the iocb. If there is no unused xritag
+ * it will return 0xffff.
+ * The function returns the allocated xritag if successful, else returns zero.
+ * Zero is not a valid xritag.
+ * The caller is not required to hold any lock.
+ **/
+uint16_t
+lpfc_sli4_next_xritag(struct lpfc_hba *phba)
+{
+	uint16_t xritag;
+
+	spin_lock_irq(&phba->hbalock);
+	xritag = phba->sli4_hba.next_xri;
+	if ((xritag != (uint16_t) -1) && xritag <
+		(phba->sli4_hba.max_cfg_param.max_xri
+			+ phba->sli4_hba.max_cfg_param.xri_base)) {
+		phba->sli4_hba.next_xri++;
+		phba->sli4_hba.max_cfg_param.xri_used++;
+		spin_unlock_irq(&phba->hbalock);
+		return xritag;
+	}
+	spin_unlock_irq(&phba->hbalock);
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+			"2004 Failed to allocate XRI.last XRITAG is %d"
+			" Max XRI is %d, Used XRI is %d\n",
+			phba->sli4_hba.next_xri,
+			phba->sli4_hba.max_cfg_param.max_xri,
+			phba->sli4_hba.max_cfg_param.xri_used);
+	return -1;
+}
+
+/**
+ * lpfc_sli4_post_sgl_list - post a block of sgl list to the firmware.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post a block of driver's sgl pages to the
+ * HBA using non-embedded mailbox command. No Lock is held. This routine
+ * is only called when the driver is loading and after all IO has been
+ * stopped.
+ **/
+int
+lpfc_sli4_post_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry;
+	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
+	struct sgl_page_pairs *sgl_pg_pairs;
+	void *viraddr;
+	LPFC_MBOXQ_t *mbox;
+	uint32_t reqlen, alloclen, pg_pairs;
+	uint32_t mbox_tmo;
+	uint16_t xritag_start = 0;
+	int els_xri_cnt, rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* The number of sgls to be posted */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+
+	reqlen = els_xri_cnt * sizeof(struct sgl_page_pairs) +
+		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
+	if (reqlen > PAGE_SIZE) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"2559 Block sgl registration required DMA "
+				"size (%d) great than a page\n", reqlen);
+		return -ENOMEM;
+	}
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2560 Failed to allocate mbox cmd memory\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
+			 LPFC_SLI4_MBX_NEMBED);
+
+	if (alloclen < reqlen) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0285 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory "
+				"size (%d)\n", alloclen, reqlen);
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory */
+	if (unlikely(!mbox->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2525 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+	viraddr = mbox->sge_array->addr[0];
+
+	/* Set up the SGL pages in the non-embedded DMA pages */
+	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
+	sgl_pg_pairs = &sgl->sgl_pg_pairs;
+
+	for (pg_pairs = 0; pg_pairs < els_xri_cnt; pg_pairs++) {
+		sglq_entry = phba->sli4_hba.lpfc_els_sgl_array[pg_pairs];
+		/* Set up the sge entry */
+		sgl_pg_pairs->sgl_pg0_addr_lo =
+				cpu_to_le32(putPaddrLow(sglq_entry->phys));
+		sgl_pg_pairs->sgl_pg0_addr_hi =
+				cpu_to_le32(putPaddrHigh(sglq_entry->phys));
+		sgl_pg_pairs->sgl_pg1_addr_lo =
+				cpu_to_le32(putPaddrLow(0));
+		sgl_pg_pairs->sgl_pg1_addr_hi =
+				cpu_to_le32(putPaddrHigh(0));
+		/* Keep the first xritag on the list */
+		if (pg_pairs == 0)
+			xritag_start = sglq_entry->sli4_xritag;
+		sgl_pg_pairs++;
+	}
+	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
+	pg_pairs = (pg_pairs > 0) ? (pg_pairs - 1) : pg_pairs;
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
+	/* Perform endian conversion if necessary */
+	sgl->word0 = cpu_to_le32(sgl->word0);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2513 POST_SGL_BLOCK mailbox command failed "
+				"status x%x add_status x%x mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_post_scsi_sgl_block - post a block of scsi sgl list to firmware
+ * @phba: pointer to lpfc hba data structure.
+ * @sblist: pointer to scsi buffer list.
+ * @count: number of scsi buffers on the list.
+ *
+ * This routine is invoked to post a block of @count scsi sgl pages from a
+ * SCSI buffer list @sblist to the HBA using non-embedded mailbox command.
+ * No Lock is held.
+ *
+ **/
+int
+lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba, struct list_head *sblist,
+			      int cnt)
+{
+	struct lpfc_scsi_buf *psb;
+	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
+	struct sgl_page_pairs *sgl_pg_pairs;
+	void *viraddr;
+	LPFC_MBOXQ_t *mbox;
+	uint32_t reqlen, alloclen, pg_pairs;
+	uint32_t mbox_tmo;
+	uint16_t xritag_start = 0;
+	int rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	dma_addr_t pdma_phys_bpl1;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* Calculate the requested length of the dma memory */
+	reqlen = cnt * sizeof(struct sgl_page_pairs) +
+		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
+	if (reqlen > PAGE_SIZE) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0217 Block sgl registration required DMA "
+				"size (%d) great than a page\n", reqlen);
+		return -ENOMEM;
+	}
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0283 Failed to allocate mbox cmd memory\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
+				LPFC_SLI4_MBX_NEMBED);
+
+	if (alloclen < reqlen) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2561 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory "
+				"size (%d)\n", alloclen, reqlen);
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory */
+	if (unlikely(!mbox->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2565 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+	viraddr = mbox->sge_array->addr[0];
+
+	/* Set up the SGL pages in the non-embedded DMA pages */
+	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
+	sgl_pg_pairs = &sgl->sgl_pg_pairs;
+
+	pg_pairs = 0;
+	list_for_each_entry(psb, sblist, list) {
+		/* Set up the sge entry */
+		sgl_pg_pairs->sgl_pg0_addr_lo =
+			cpu_to_le32(putPaddrLow(psb->dma_phys_bpl));
+		sgl_pg_pairs->sgl_pg0_addr_hi =
+			cpu_to_le32(putPaddrHigh(psb->dma_phys_bpl));
+		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+			pdma_phys_bpl1 = psb->dma_phys_bpl + SGL_PAGE_SIZE;
+		else
+			pdma_phys_bpl1 = 0;
+		sgl_pg_pairs->sgl_pg1_addr_lo =
+			cpu_to_le32(putPaddrLow(pdma_phys_bpl1));
+		sgl_pg_pairs->sgl_pg1_addr_hi =
+			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
+		/* Keep the first xritag on the list */
+		if (pg_pairs == 0)
+			xritag_start = psb->cur_iocbq.sli4_xritag;
+		sgl_pg_pairs++;
+		pg_pairs++;
+	}
+	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
+	/* Perform endian conversion if necessary */
+	sgl->word0 = cpu_to_le32(sgl->word0);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2564 POST_SGL_BLOCK mailbox command failed "
+				"status x%x add_status x%x mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_fc_frame_check - Check that this frame is a valid frame to handle
+ * @phba: pointer to lpfc_hba struct that the frame was received on
+ * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
+ *
+ * This function checks the fields in the @fc_hdr to see if the FC frame is a
+ * valid type of frame that the LPFC driver will handle. This function will
+ * return a zero if the frame is a valid frame or a non zero value when the
+ * frame does not pass the check.
+ **/
+static int
+lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
+{
+	char *rctl_names[] = FC_RCTL_NAMES_INIT;
+	char *type_names[] = FC_TYPE_NAMES_INIT;
+	struct fc_vft_header *fc_vft_hdr;
+
+	switch (fc_hdr->fh_r_ctl) {
+	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
+	case FC_RCTL_DD_SOL_DATA:	/* solicited data */
+	case FC_RCTL_DD_UNSOL_CTL:	/* unsolicited control */
+	case FC_RCTL_DD_SOL_CTL:	/* solicited control or reply */
+	case FC_RCTL_DD_UNSOL_DATA:	/* unsolicited data */
+	case FC_RCTL_DD_DATA_DESC:	/* data descriptor */
+	case FC_RCTL_DD_UNSOL_CMD:	/* unsolicited command */
+	case FC_RCTL_DD_CMD_STATUS:	/* command status */
+	case FC_RCTL_ELS_REQ:	/* extended link services request */
+	case FC_RCTL_ELS_REP:	/* extended link services reply */
+	case FC_RCTL_ELS4_REQ:	/* FC-4 ELS request */
+	case FC_RCTL_ELS4_REP:	/* FC-4 ELS reply */
+	case FC_RCTL_BA_NOP:  	/* basic link service NOP */
+	case FC_RCTL_BA_ABTS: 	/* basic link service abort */
+	case FC_RCTL_BA_RMC: 	/* remove connection */
+	case FC_RCTL_BA_ACC:	/* basic accept */
+	case FC_RCTL_BA_RJT:	/* basic reject */
+	case FC_RCTL_BA_PRMT:
+	case FC_RCTL_ACK_1:	/* acknowledge_1 */
+	case FC_RCTL_ACK_0:	/* acknowledge_0 */
+	case FC_RCTL_P_RJT:	/* port reject */
+	case FC_RCTL_F_RJT:	/* fabric reject */
+	case FC_RCTL_P_BSY:	/* port busy */
+	case FC_RCTL_F_BSY:	/* fabric busy to data frame */
+	case FC_RCTL_F_BSYL:	/* fabric busy to link control frame */
+	case FC_RCTL_LCR:	/* link credit reset */
+	case FC_RCTL_END:	/* end */
+		break;
+	case FC_RCTL_VFTH:	/* Virtual Fabric tagging Header */
+		fc_vft_hdr = (struct fc_vft_header *)fc_hdr;
+		fc_hdr = &((struct fc_frame_header *)fc_vft_hdr)[1];
+		return lpfc_fc_frame_check(phba, fc_hdr);
+	default:
+		goto drop;
+	}
+	switch (fc_hdr->fh_type) {
+	case FC_TYPE_BLS:
+	case FC_TYPE_ELS:
+	case FC_TYPE_FCP:
+	case FC_TYPE_CT:
+		break;
+	case FC_TYPE_IP:
+	case FC_TYPE_ILS:
+	default:
+		goto drop;
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+			"2538 Received frame rctl:%s type:%s\n",
+			rctl_names[fc_hdr->fh_r_ctl],
+			type_names[fc_hdr->fh_type]);
+	return 0;
+drop:
+	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
+			"2539 Dropped frame rctl:%s type:%s\n",
+			rctl_names[fc_hdr->fh_r_ctl],
+			type_names[fc_hdr->fh_type]);
+	return 1;
+}
+
+/**
+ * lpfc_fc_hdr_get_vfi - Get the VFI from an FC frame
+ * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
+ *
+ * This function processes the FC header to retrieve the VFI from the VF
+ * header, if one exists. This function will return the VFI if one exists
+ * or 0 if no VSAN Header exists.
+ **/
+static uint32_t
+lpfc_fc_hdr_get_vfi(struct fc_frame_header *fc_hdr)
+{
+	struct fc_vft_header *fc_vft_hdr = (struct fc_vft_header *)fc_hdr;
+
+	if (fc_hdr->fh_r_ctl != FC_RCTL_VFTH)
+		return 0;
+	return bf_get(fc_vft_hdr_vf_id, fc_vft_hdr);
+}
+
+/**
+ * lpfc_fc_frame_to_vport - Finds the vport that a frame is destined to
+ * @phba: Pointer to the HBA structure to search for the vport on
+ * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
+ * @fcfi: The FC Fabric ID that the frame came from
+ *
+ * This function searches the @phba for a vport that matches the content of the
+ * @fc_hdr passed in and the @fcfi. This function uses the @fc_hdr to fetch the
+ * VFI, if the Virtual Fabric Tagging Header exists, and the DID. This function
+ * returns the matching vport pointer or NULL if unable to match frame to a
+ * vport.
+ **/
+static struct lpfc_vport *
+lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
+		       uint16_t fcfi)
+{
+	struct lpfc_vport **vports;
+	struct lpfc_vport *vport = NULL;
+	int i;
+	uint32_t did = (fc_hdr->fh_d_id[0] << 16 |
+			fc_hdr->fh_d_id[1] << 8 |
+			fc_hdr->fh_d_id[2]);
+
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL)
+		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+			if (phba->fcf.fcfi == fcfi &&
+			    vports[i]->vfi == lpfc_fc_hdr_get_vfi(fc_hdr) &&
+			    vports[i]->fc_myDID == did) {
+				vport = vports[i];
+				break;
+			}
+		}
+	lpfc_destroy_vport_work_array(phba, vports);
+	return vport;
+}
+
+/**
+ * lpfc_fc_frame_add - Adds a frame to the vport's list of received sequences
+ * @dmabuf: pointer to a dmabuf that describes the hdr and data of the FC frame
+ *
+ * This function searches through the existing incomplete sequences that have
+ * been sent to this @vport. If the frame matches one of the incomplete
+ * sequences then the dbuf in the @dmabuf is added to the list of frames that
+ * make up that sequence. If no sequence is found that matches this frame then
+ * the function will add the hbuf in the @dmabuf to the @vport's rcv_buffer_list
+ * This function returns a pointer to the first dmabuf in the sequence list that
+ * the frame was linked to.
+ **/
+static struct hbq_dmabuf *
+lpfc_fc_frame_add(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *new_hdr;
+	struct fc_frame_header *temp_hdr;
+	struct lpfc_dmabuf *d_buf;
+	struct lpfc_dmabuf *h_buf;
+	struct hbq_dmabuf *seq_dmabuf = NULL;
+	struct hbq_dmabuf *temp_dmabuf = NULL;
+
+	new_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	/* Use the hdr_buf to find the sequence that this frame belongs to */
+	list_for_each_entry(h_buf, &vport->rcv_buffer_list, list) {
+		temp_hdr = (struct fc_frame_header *)h_buf->virt;
+		if ((temp_hdr->fh_seq_id != new_hdr->fh_seq_id) ||
+		    (temp_hdr->fh_ox_id != new_hdr->fh_ox_id) ||
+		    (memcmp(&temp_hdr->fh_s_id, &new_hdr->fh_s_id, 3)))
+			continue;
+		/* found a pending sequence that matches this frame */
+		seq_dmabuf = container_of(h_buf, struct hbq_dmabuf, hbuf);
+		break;
+	}
+	if (!seq_dmabuf) {
+		/*
+		 * This indicates first frame received for this sequence.
+		 * Queue the buffer on the vport's rcv_buffer_list.
+		 */
+		list_add_tail(&dmabuf->hbuf.list, &vport->rcv_buffer_list);
+		return dmabuf;
+	}
+	temp_hdr = seq_dmabuf->hbuf.virt;
+	if (new_hdr->fh_seq_cnt < temp_hdr->fh_seq_cnt) {
+		list_add(&seq_dmabuf->dbuf.list, &dmabuf->dbuf.list);
+		return dmabuf;
+	}
+	/* find the correct place in the sequence to insert this frame */
+	list_for_each_entry_reverse(d_buf, &seq_dmabuf->dbuf.list, list) {
+		temp_dmabuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+		temp_hdr = (struct fc_frame_header *)temp_dmabuf->hbuf.virt;
+		/*
+		 * If the frame's sequence count is greater than the frame on
+		 * the list then insert the frame right after this frame
+		 */
+		if (new_hdr->fh_seq_cnt > temp_hdr->fh_seq_cnt) {
+			list_add(&dmabuf->dbuf.list, &temp_dmabuf->dbuf.list);
+			return seq_dmabuf;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * lpfc_seq_complete - Indicates if a sequence is complete
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function checks the sequence, starting with the frame described by
+ * @dmabuf, to see if all the frames associated with this sequence are present.
+ * the frames associated with this sequence are linked to the @dmabuf using the
+ * dbuf list. This function looks for two major things. 1) That the first frame
+ * has a sequence count of zero. 2) There is a frame with last frame of sequence
+ * set. 3) That there are no holes in the sequence count. The function will
+ * return 1 when the sequence is complete, otherwise it will return 0.
+ **/
+static int
+lpfc_seq_complete(struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *hdr;
+	struct lpfc_dmabuf *d_buf;
+	struct hbq_dmabuf *seq_dmabuf;
+	uint32_t fctl;
+	int seq_count = 0;
+
+	hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	/* make sure first fame of sequence has a sequence count of zero */
+	if (hdr->fh_seq_cnt != seq_count)
+		return 0;
+	fctl = (hdr->fh_f_ctl[0] << 16 |
+		hdr->fh_f_ctl[1] << 8 |
+		hdr->fh_f_ctl[2]);
+	/* If last frame of sequence we can return success. */
+	if (fctl & FC_FC_END_SEQ)
+		return 1;
+	list_for_each_entry(d_buf, &dmabuf->dbuf.list, list) {
+		seq_dmabuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+		hdr = (struct fc_frame_header *)seq_dmabuf->hbuf.virt;
+		/* If there is a hole in the sequence count then fail. */
+		if (++seq_count != hdr->fh_seq_cnt)
+			return 0;
+		fctl = (hdr->fh_f_ctl[0] << 16 |
+			hdr->fh_f_ctl[1] << 8 |
+			hdr->fh_f_ctl[2]);
+		/* If last frame of sequence we can return success. */
+		if (fctl & FC_FC_END_SEQ)
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_prep_seq - Prep sequence for ULP processing
+ * @vport: Pointer to the vport on which this sequence was received
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function takes a sequence, described by a list of frames, and creates
+ * a list of iocbq structures to describe the sequence. This iocbq list will be
+ * used to issue to the generic unsolicited sequence handler. This routine
+ * returns a pointer to the first iocbq in the list. If the function is unable
+ * to allocate an iocbq then it throw out the received frames that were not
+ * able to be described and return a pointer to the first iocbq. If unable to
+ * allocate any iocbqs (including the first) this function will return NULL.
+ **/
+static struct lpfc_iocbq *
+lpfc_prep_seq(struct lpfc_vport *vport, struct hbq_dmabuf *seq_dmabuf)
+{
+	struct lpfc_dmabuf *d_buf, *n_buf;
+	struct lpfc_iocbq *first_iocbq, *iocbq;
+	struct fc_frame_header *fc_hdr;
+	uint32_t sid;
+
+	fc_hdr = (struct fc_frame_header *)seq_dmabuf->hbuf.virt;
+	/* remove from receive buffer list */
+	list_del_init(&seq_dmabuf->hbuf.list);
+	/* get the Remote Port's SID */
+	sid = (fc_hdr->fh_s_id[0] << 16 |
+	       fc_hdr->fh_s_id[1] << 8 |
+	       fc_hdr->fh_s_id[2]);
+	/* Get an iocbq struct to fill in. */
+	first_iocbq = lpfc_sli_get_iocbq(vport->phba);
+	if (first_iocbq) {
+		/* Initialize the first IOCB. */
+		first_iocbq->iocb.ulpStatus = IOSTAT_SUCCESS;
+		first_iocbq->iocb.ulpCommand = CMD_IOCB_RCV_SEQ64_CX;
+		first_iocbq->iocb.ulpContext = be16_to_cpu(fc_hdr->fh_ox_id);
+		first_iocbq->iocb.unsli3.rcvsli3.vpi =
+					vport->vpi + vport->phba->vpi_base;
+		/* put the first buffer into the first IOCBq */
+		first_iocbq->context2 = &seq_dmabuf->dbuf;
+		first_iocbq->context3 = NULL;
+		first_iocbq->iocb.ulpBdeCount = 1;
+		first_iocbq->iocb.un.cont64[0].tus.f.bdeSize =
+							LPFC_DATA_BUF_SIZE;
+		first_iocbq->iocb.un.rcvels.remoteID = sid;
+	}
+	iocbq = first_iocbq;
+	/*
+	 * Each IOCBq can have two Buffers assigned, so go through the list
+	 * of buffers for this sequence and save two buffers in each IOCBq
+	 */
+	list_for_each_entry_safe(d_buf, n_buf, &seq_dmabuf->dbuf.list, list) {
+		if (!iocbq) {
+			lpfc_in_buf_free(vport->phba, d_buf);
+			continue;
+		}
+		if (!iocbq->context3) {
+			iocbq->context3 = d_buf;
+			iocbq->iocb.ulpBdeCount++;
+			iocbq->iocb.unsli3.rcvsli3.bde2.tus.f.bdeSize =
+							LPFC_DATA_BUF_SIZE;
+		} else {
+			iocbq = lpfc_sli_get_iocbq(vport->phba);
+			if (!iocbq) {
+				if (first_iocbq) {
+					first_iocbq->iocb.ulpStatus =
+							IOSTAT_FCP_RSP_ERROR;
+					first_iocbq->iocb.un.ulpWord[4] =
+							IOERR_NO_RESOURCES;
+				}
+				lpfc_in_buf_free(vport->phba, d_buf);
+				continue;
+			}
+			iocbq->context2 = d_buf;
+			iocbq->context3 = NULL;
+			iocbq->iocb.ulpBdeCount = 1;
+			iocbq->iocb.un.cont64[0].tus.f.bdeSize =
+							LPFC_DATA_BUF_SIZE;
+			iocbq->iocb.un.rcvels.remoteID = sid;
+			list_add_tail(&iocbq->list, &first_iocbq->list);
+		}
+	}
+	return first_iocbq;
+}
+
+/**
+ * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware
+ * @phba: Pointer to HBA context object.
+ *
+ * This function is called with no lock held. This function processes all
+ * the received buffers and gives it to upper layers when a received buffer
+ * indicates that it is the final frame in the sequence. The interrupt
+ * service routine processes received buffers at interrupt contexts and adds
+ * received dma buffers to the rb_pend_list queue and signals the worker thread.
+ * Worker thread calls lpfc_sli4_handle_received_buffer, which will call the
+ * appropriate receive function when the final frame in a sequence is received.
+ **/
+int
+lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba)
+{
+	LIST_HEAD(cmplq);
+	struct hbq_dmabuf *dmabuf, *seq_dmabuf;
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_vport *vport;
+	uint32_t fcfi;
+	struct lpfc_iocbq *iocbq;
+
+	/* Clear hba flag and get all received buffers into the cmplq */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~HBA_RECEIVE_BUFFER;
+	list_splice_init(&phba->rb_pend_list, &cmplq);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Process each received buffer */
+	while ((dmabuf = lpfc_sli_hbqbuf_get(&cmplq)) != NULL) {
+		fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+		/* check to see if this a valid type of frame */
+		if (lpfc_fc_frame_check(phba, fc_hdr)) {
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+			continue;
+		}
+		fcfi = bf_get(lpfc_rcqe_fcf_id, &dmabuf->rcqe);
+		vport = lpfc_fc_frame_to_vport(phba, fc_hdr, fcfi);
+		if (!vport) {
+			/* throw out the frame */
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+			continue;
+		}
+		/* Link this frame */
+		seq_dmabuf = lpfc_fc_frame_add(vport, dmabuf);
+		if (!seq_dmabuf) {
+			/* unable to add frame to vport - throw it out */
+			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+			continue;
+		}
+		/* If not last frame in sequence continue processing frames. */
+		if (!lpfc_seq_complete(seq_dmabuf)) {
+			/*
+			 * When saving off frames post a new one and mark this
+			 * frame to be freed when it is finished.
+			 **/
+			lpfc_sli_hbqbuf_fill_hbqs(phba, LPFC_ELS_HBQ, 1);
+			dmabuf->tag = -1;
+			continue;
+		}
+		fc_hdr = (struct fc_frame_header *)seq_dmabuf->hbuf.virt;
+		iocbq = lpfc_prep_seq(vport, seq_dmabuf);
+		if (!lpfc_complete_unsol_iocb(phba,
+					      &phba->sli.ring[LPFC_ELS_RING],
+					      iocbq, fc_hdr->fh_r_ctl,
+					      fc_hdr->fh_type))
+			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+					"2540 Ring %d handler: unexpected Rctl "
+					"x%x Type x%x received\n",
+					LPFC_ELS_RING,
+					fc_hdr->fh_r_ctl, fc_hdr->fh_type);
+	};
+	return 0;
+}
-- 
cgit v0.10.2


From 04c684968487eb4f98728363a97b8da48f3bb958 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:52:52 -0400
Subject: [SCSI] lpfc 8.3.2 : Addition of SLI4 Interface - Mailbox handling

The mailbox commands themselves are the same, or very similar to
their SLI3 counterparts. This patch genericizes mailbox command
handling and adds support for the new SLI4 mailbox queue.

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 82016fc..463104d 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -507,12 +507,14 @@ lpfc_issue_lip(struct Scsi_Host *shost)
 		return -ENOMEM;
 
 	memset((void *)pmboxq, 0, sizeof (LPFC_MBOXQ_t));
-	pmboxq->mb.mbxCommand = MBX_DOWN_LINK;
-	pmboxq->mb.mbxOwner = OWN_HOST;
+	pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK;
+	pmboxq->u.mb.mbxOwner = OWN_HOST;
 
 	mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2);
 
-	if ((mbxstatus == MBX_SUCCESS) && (pmboxq->mb.mbxStatus == 0)) {
+	if ((mbxstatus == MBX_SUCCESS) &&
+	    (pmboxq->u.mb.mbxStatus == 0 ||
+	     pmboxq->u.mb.mbxStatus == MBXERR_LINK_DOWN)) {
 		memset((void *)pmboxq, 0, sizeof (LPFC_MBOXQ_t));
 		lpfc_init_link(phba, pmboxq, phba->cfg_topology,
 			       phba->cfg_link_speed);
@@ -791,7 +793,8 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
 		  uint32_t *mrpi, uint32_t *arpi,
 		  uint32_t *mvpi, uint32_t *avpi)
 {
-	struct lpfc_sli   *psli = &phba->sli;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_mbx_read_config *rd_config;
 	LPFC_MBOXQ_t *pmboxq;
 	MAILBOX_t *pmb;
 	int rc = 0;
@@ -813,7 +816,7 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
 		return 0;
 	memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
 
-	pmb = &pmboxq->mb;
+	pmb = &pmboxq->u.mb;
 	pmb->mbxCommand = MBX_READ_CONFIG;
 	pmb->mbxOwner = OWN_HOST;
 	pmboxq->context1 = NULL;
@@ -3247,7 +3250,7 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 		}
 	}
 
-	memcpy((uint8_t *) & phba->sysfs_mbox.mbox->mb + off,
+	memcpy((uint8_t *) &phba->sysfs_mbox.mbox->u.mb + off,
 	       buf, count);
 
 	phba->sysfs_mbox.offset = off + count;
@@ -3289,6 +3292,7 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
 	int rc;
+	MAILBOX_t *pmb;
 
 	if (off > MAILBOX_CMD_SIZE)
 		return -ERANGE;
@@ -3313,8 +3317,8 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 	if (off == 0 &&
 	    phba->sysfs_mbox.state  == SMBOX_WRITING &&
 	    phba->sysfs_mbox.offset >= 2 * sizeof(uint32_t)) {
-
-		switch (phba->sysfs_mbox.mbox->mb.mbxCommand) {
+		pmb = &phba->sysfs_mbox.mbox->u.mb;
+		switch (pmb->mbxCommand) {
 			/* Offline only */
 		case MBX_INIT_LINK:
 		case MBX_DOWN_LINK:
@@ -3331,7 +3335,7 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 			if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
 				printk(KERN_WARNING "mbox_read:Command 0x%x "
 				       "is illegal in on-line state\n",
-				       phba->sysfs_mbox.mbox->mb.mbxCommand);
+				       pmb->mbxCommand);
 				sysfs_mbox_idle(phba);
 				spin_unlock_irq(&phba->hbalock);
 				return -EPERM;
@@ -3367,13 +3371,13 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 		case MBX_CONFIG_PORT:
 		case MBX_RUN_BIU_DIAG:
 			printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n",
-			       phba->sysfs_mbox.mbox->mb.mbxCommand);
+			       pmb->mbxCommand);
 			sysfs_mbox_idle(phba);
 			spin_unlock_irq(&phba->hbalock);
 			return -EPERM;
 		default:
 			printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n",
-			       phba->sysfs_mbox.mbox->mb.mbxCommand);
+			       pmb->mbxCommand);
 			sysfs_mbox_idle(phba);
 			spin_unlock_irq(&phba->hbalock);
 			return -EPERM;
@@ -3383,14 +3387,14 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 		 * or RESTART mailbox commands until the HBA is restarted.
 		 */
 		if (phba->pport->stopped &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_DUMP_MEMORY &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_RESTART &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_WRITE_VPARMS &&
-		    phba->sysfs_mbox.mbox->mb.mbxCommand != MBX_WRITE_WWN)
+		    pmb->mbxCommand != MBX_DUMP_MEMORY &&
+		    pmb->mbxCommand != MBX_RESTART &&
+		    pmb->mbxCommand != MBX_WRITE_VPARMS &&
+		    pmb->mbxCommand != MBX_WRITE_WWN)
 			lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
 					"1259 mbox: Issued mailbox cmd "
 					"0x%x while in stopped state.\n",
-					phba->sysfs_mbox.mbox->mb.mbxCommand);
+					pmb->mbxCommand);
 
 		phba->sysfs_mbox.mbox->vport = vport;
 
@@ -3416,8 +3420,7 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 			spin_unlock_irq(&phba->hbalock);
 			rc = lpfc_sli_issue_mbox_wait (phba,
 						       phba->sysfs_mbox.mbox,
-				lpfc_mbox_tmo_val(phba,
-				    phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ);
+				lpfc_mbox_tmo_val(phba, pmb->mbxCommand) * HZ);
 			spin_lock_irq(&phba->hbalock);
 		}
 
@@ -3439,7 +3442,7 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 		return -EAGAIN;
 	}
 
-	memcpy(buf, (uint8_t *) & phba->sysfs_mbox.mbox->mb + off, count);
+	memcpy(buf, (uint8_t *) &pmb + off, count);
 
 	phba->sysfs_mbox.offset = off + count;
 
@@ -3711,14 +3714,14 @@ lpfc_get_stats(struct Scsi_Host *shost)
 		return NULL;
 	memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
 
-	pmb = &pmboxq->mb;
+	pmb = &pmboxq->u.mb;
 	pmb->mbxCommand = MBX_READ_STATUS;
 	pmb->mbxOwner = OWN_HOST;
 	pmboxq->context1 = NULL;
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		(!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3817,7 +3820,7 @@ lpfc_reset_stats(struct Scsi_Host *shost)
 		return;
 	memset(pmboxq, 0, sizeof(LPFC_MBOXQ_t));
 
-	pmb = &pmboxq->mb;
+	pmb = &pmboxq->u.mb;
 	pmb->mbxCommand = MBX_READ_STATUS;
 	pmb->mbxOwner = OWN_HOST;
 	pmb->un.varWords[0] = 0x1; /* reset request */
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 3802e45..e0f1cd4 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -209,6 +209,7 @@ void lpfc_sli_release_iocbq(struct lpfc_hba *, struct lpfc_iocbq *);
 uint16_t lpfc_sli_next_iotag(struct lpfc_hba *, struct lpfc_iocbq *);
 void lpfc_sli_cancel_iocbs(struct lpfc_hba *, struct list_head *, uint32_t,
 			   uint32_t);
+void lpfc_sli_wake_mbox_wait(struct lpfc_hba *, LPFC_MBOXQ_t *);
 
 void lpfc_reset_barrier(struct lpfc_hba * phba);
 int lpfc_sli_brdready(struct lpfc_hba *, uint32_t);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 9fe36bf..2c034a5 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -4277,7 +4277,7 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 			lpfc_init_link(phba, mbox,
 				       phba->cfg_topology,
 				       phba->cfg_link_speed);
-			mbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
+			mbox->u.mb.un.varInitLnk.lipsr_AL_PA = 0;
 			mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 			mbox->vport = vport;
 			rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
@@ -4426,7 +4426,7 @@ lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	uint16_t xri, status;
 	uint32_t cmdsize;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 
 	ndlp = (struct lpfc_nodelist *) pmb->context2;
 	xri = (uint16_t) ((unsigned long)(pmb->context1));
@@ -5755,7 +5755,7 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	spin_lock_irq(shost->host_lock);
 	vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0fc6600..2270d9a 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -879,7 +879,7 @@ lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_sli   *psli = &phba->sli;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t control;
 
 	/* Since we don't do discovery right now, turn these off here */
@@ -942,7 +942,7 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_vport *vport = pmb->vport;
 
-	if (pmb->mb.mbxStatus)
+	if (pmb->u.mb.mbxStatus)
 		goto out;
 
 	mempool_free(pmb, phba->mbox_mem_pool);
@@ -970,7 +970,7 @@ out:
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
 			 "0306 CONFIG_LINK mbxStatus error x%x "
 			 "HBA state x%x\n",
-			 pmb->mb.mbxStatus, vport->port_state);
+			 pmb->u.mb.mbxStatus, vport->port_state);
 	mempool_free(pmb, phba->mbox_mem_pool);
 
 	lpfc_linkdown(phba);
@@ -1202,7 +1202,7 @@ lpfc_mbx_cmpl_read_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	READ_LA_VAR *la;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 
 	/* Unblock ELS traffic */
@@ -1217,7 +1217,7 @@ lpfc_mbx_cmpl_read_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		goto lpfc_mbx_cmpl_read_la_free_mbuf;
 	}
 
-	la = (READ_LA_VAR *) & pmb->mb.un.varReadLA;
+	la = (READ_LA_VAR *) &pmb->u.mb.un.varReadLA;
 
 	memcpy(&phba->alpa_map[0], mp->virt, 128);
 
@@ -1355,7 +1355,7 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 static void
 lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 
@@ -1408,7 +1408,7 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	switch (mb->mbxStatus) {
 	case 0x0011:
@@ -2279,7 +2279,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 
 	/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
 	if ((mb = phba->sli.mbox_active)) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			mb->context2 = NULL;
 			mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
@@ -2288,7 +2288,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 
 	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		    (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			mp = (struct lpfc_dmabuf *) (mb->context1);
 			if (mp) {
@@ -2970,7 +2970,7 @@ restart_disc:
 		lpfc_linkdown(phba);
 		lpfc_init_link(phba, initlinkmbox, phba->cfg_topology,
 			       phba->cfg_link_speed);
-		initlinkmbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
+		initlinkmbox->u.mb.un.varInitLnk.lipsr_AL_PA = 0;
 		initlinkmbox->vport = vport;
 		initlinkmbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 		rc = lpfc_sli_issue_mbox(phba, initlinkmbox, MBX_NOWAIT);
@@ -3069,7 +3069,7 @@ restart_disc:
 void
 lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf   *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
 	struct lpfc_vport    *vport = pmb->vport;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e9e4a1d..ff821bb 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -108,7 +108,7 @@ lpfc_config_port_prep(struct lpfc_hba *phba)
 		return -ENOMEM;
 	}
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	phba->link_state = LPFC_INIT_MBX_CMDS;
 
 	if (lpfc_is_LC_HBA(phba->pcidev->device)) {
@@ -221,6 +221,11 @@ lpfc_config_port_prep(struct lpfc_hba *phba)
 					mb->mbxCommand, mb->mbxStatus);
 			mb->un.varDmp.word_cnt = 0;
 		}
+		/* dump mem may return a zero when finished or we got a
+		 * mailbox error, either way we are done.
+		 */
+		if (mb->un.varDmp.word_cnt == 0)
+			break;
 		if (mb->un.varDmp.word_cnt > DMP_VPD_SIZE - offset)
 			mb->un.varDmp.word_cnt = DMP_VPD_SIZE - offset;
 		lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
@@ -249,7 +254,7 @@ out_free_mbox:
 static void
 lpfc_config_async_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
 {
-	if (pmboxq->mb.mbxStatus == MBX_SUCCESS)
+	if (pmboxq->u.mb.mbxStatus == MBX_SUCCESS)
 		phba->temp_sensor_support = 1;
 	else
 		phba->temp_sensor_support = 0;
@@ -276,7 +281,7 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 	/* character array used for decoding dist type. */
 	char dist_char[] = "nabx";
 
-	if (pmboxq->mb.mbxStatus != MBX_SUCCESS) {
+	if (pmboxq->u.mb.mbxStatus != MBX_SUCCESS) {
 		mempool_free(pmboxq, phba->mbox_mem_pool);
 		return;
 	}
@@ -284,7 +289,7 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 	prg = (struct prog_id *) &prog_id_word;
 
 	/* word 7 contain option rom version */
-	prog_id_word = pmboxq->mb.un.varWords[7];
+	prog_id_word = pmboxq->u.mb.un.varWords[7];
 
 	/* Decode the Option rom version word to a readable string */
 	if (prg->dist < 4)
@@ -341,7 +346,7 @@ lpfc_config_port_post(struct lpfc_hba *phba)
 		phba->link_state = LPFC_HBA_ERROR;
 		return -ENOMEM;
 	}
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 
 	/* Get login parameters for NID.  */
 	lpfc_read_sparam(phba, pmb, 0);
@@ -476,17 +481,18 @@ lpfc_config_port_post(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
 					"0352 Config MSI mailbox command "
 					"failed, mbxCmd x%x, mbxStatus x%x\n",
-					pmb->mb.mbxCommand, pmb->mb.mbxStatus);
+					pmb->u.mb.mbxCommand,
+					pmb->u.mb.mbxStatus);
 			mempool_free(pmb, phba->mbox_mem_pool);
 			return -EIO;
 		}
 	}
 
+	spin_lock_irq(&phba->hbalock);
 	/* Initialize ERATT handling flag */
 	phba->hba_flag &= ~HBA_ERATT_HANDLED;
 
 	/* Enable appropriate host interrupts */
-	spin_lock_irq(&phba->hbalock);
 	status = readl(phba->HCregaddr);
 	status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA;
 	if (psli->num_rings > 0)
@@ -2201,7 +2207,7 @@ lpfc_offline_prep(struct lpfc_hba * phba)
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
 
-	lpfc_sli_flush_mbox_queue(phba);
+	lpfc_sli_mbox_sys_shutdown(phba);
 }
 
 /**
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 7f5899b..6aeb1c6 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -60,7 +60,7 @@ lpfc_dump_mem(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, uint16_t offset)
 	MAILBOX_t *mb;
 	void *ctx;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	ctx = pmb->context2;
 
 	/* Setup to dump VPD region */
@@ -92,7 +92,7 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	MAILBOX_t *mb;
 	void *ctx;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	/* Save context so that we can restore after memset */
 	ctx = pmb->context2;
 
@@ -127,7 +127,7 @@ lpfc_read_nv(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_READ_NV;
 	mb->mbxOwner = OWN_HOST;
@@ -153,7 +153,7 @@ lpfc_config_async(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb,
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_ASYNCEVT_ENABLE;
 	mb->un.varCfgAsyncEvent.ring = ring;
@@ -179,7 +179,7 @@ lpfc_heart_beat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_HEARTBEAT;
 	mb->mbxOwner = OWN_HOST;
@@ -213,7 +213,7 @@ lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, struct lpfc_dmabuf *mp)
 	struct lpfc_sli *psli;
 
 	psli = &phba->sli;
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	INIT_LIST_HEAD(&mp->list);
@@ -250,7 +250,7 @@ lpfc_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varClearLA.eventTag = phba->fc_eventTag;
@@ -277,7 +277,7 @@ void
 lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	struct lpfc_vport  *vport = phba->pport;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	/* NEW_FEATURE
@@ -323,7 +323,7 @@ lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 int
 lpfc_config_msi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t attentionConditions[2];
 
 	/* Sanity check */
@@ -407,7 +407,7 @@ lpfc_init_link(struct lpfc_hba * phba,
 	struct lpfc_sli *psli;
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	psli = &phba->sli;
@@ -494,7 +494,7 @@ lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi)
 	struct lpfc_sli *psli;
 
 	psli = &phba->sli;
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->mbxOwner = OWN_HOST;
@@ -517,7 +517,7 @@ lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi)
 	mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
 	mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys);
 	mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys);
-	mb->un.varRdSparm.vpi = vpi;
+	mb->un.varRdSparm.vpi = vpi + phba->vpi_base;
 
 	/* save address for completion */
 	pmb->context1 = mp;
@@ -546,10 +546,12 @@ lpfc_unreg_did(struct lpfc_hba * phba, uint16_t vpi, uint32_t did,
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varUnregDID.did = did;
+	if (vpi != 0xffff)
+		vpi += phba->vpi_base;
 	mb->un.varUnregDID.vpi = vpi;
 
 	mb->mbxCommand = MBX_UNREG_D_ID;
@@ -575,7 +577,7 @@ lpfc_read_config(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->mbxCommand = MBX_READ_CONFIG;
@@ -600,7 +602,7 @@ lpfc_read_lnk_stat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->mbxCommand = MBX_READ_LNK_STAT;
@@ -609,7 +611,7 @@ lpfc_read_lnk_stat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 }
 
 /**
- * lpfc_reg_login - Prepare a mailbox command for registering remote login
+ * lpfc_reg_rpi - Prepare a mailbox command for registering remote login
  * @phba: pointer to lpfc hba data structure.
  * @vpi: virtual N_Port identifier.
  * @did: remote port identifier.
@@ -633,17 +635,23 @@ lpfc_read_lnk_stat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
  *    1 - DMA memory allocation failed
  **/
 int
-lpfc_reg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
+lpfc_reg_rpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
 	       uint8_t *param, LPFC_MBOXQ_t *pmb, uint32_t flag)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint8_t *sparam;
 	struct lpfc_dmabuf *mp;
 
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varRegLogin.rpi = 0;
-	mb->un.varRegLogin.vpi = vpi;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		mb->un.varRegLogin.rpi = lpfc_sli4_alloc_rpi(phba);
+		if (mb->un.varRegLogin.rpi == LPFC_RPI_ALLOC_ERROR)
+			return 1;
+	}
+
+	mb->un.varRegLogin.vpi = vpi + phba->vpi_base;
 	mb->un.varRegLogin.did = did;
 	mb->un.varWords[30] = flag;	/* Set flag to issue action on cmpl */
 
@@ -699,15 +707,16 @@ lpfc_unreg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t rpi,
 {
 	MAILBOX_t *mb;
 
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
 	mb->un.varUnregLogin.rpi = (uint16_t) rpi;
 	mb->un.varUnregLogin.rsvd1 = 0;
-	mb->un.varUnregLogin.vpi = vpi;
+	mb->un.varUnregLogin.vpi = vpi + phba->vpi_base;
 
 	mb->mbxCommand = MBX_UNREG_LOGIN;
 	mb->mbxOwner = OWN_HOST;
+
 	return;
 }
 
@@ -727,15 +736,15 @@ lpfc_unreg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t rpi,
  * This routine prepares the mailbox command for registering a virtual N_Port.
  **/
 void
-lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid,
-	     LPFC_MBOXQ_t *pmb)
+lpfc_reg_vpi(struct lpfc_vport *vport, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
-	mb->un.varRegVpi.vpi = vpi;
-	mb->un.varRegVpi.sid = sid;
+	mb->un.varRegVpi.vpi = vport->vpi + vport->phba->vpi_base;
+	mb->un.varRegVpi.sid = vport->fc_myDID;
+	mb->un.varRegVpi.vfi = vport->vfi + vport->phba->vfi_base;
 
 	mb->mbxCommand = MBX_REG_VPI;
 	mb->mbxOwner = OWN_HOST;
@@ -762,10 +771,10 @@ lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid,
 void
 lpfc_unreg_vpi(struct lpfc_hba *phba, uint16_t vpi, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 
-	mb->un.varUnregVpi.vpi = vpi;
+	mb->un.varUnregVpi.vpi = vpi + phba->vpi_base;
 
 	mb->mbxCommand = MBX_UNREG_VPI;
 	mb->mbxOwner = OWN_HOST;
@@ -854,7 +863,7 @@ lpfc_config_pcb_setup(struct lpfc_hba * phba)
 void
 lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
 	mb->un.varRdRev.cv = 1;
 	mb->un.varRdRev.v3req = 1; /* Request SLI3 info */
@@ -947,7 +956,7 @@ lpfc_config_hbq(struct lpfc_hba *phba, uint32_t id,
 		uint32_t hbq_entry_index, LPFC_MBOXQ_t *pmb)
 {
 	int i;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct config_hbq_var *hbqmb = &mb->un.varCfgHbq;
 
 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
@@ -1022,7 +1031,7 @@ void
 lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
 {
 	int i;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_sli *psli;
 	struct lpfc_sli_ring *pring;
 
@@ -1077,7 +1086,7 @@ void
 lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	MAILBOX_t __iomem *mb_slim = (MAILBOX_t __iomem *) phba->MBslimaddr;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	dma_addr_t pdma_addr;
 	uint32_t bar_low, bar_high;
 	size_t offset;
@@ -1101,21 +1110,22 @@ lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	/* If HBA supports SLI=3 ask for it */
 
-	if (phba->sli_rev == 3 && phba->vpd.sli3Feat.cerbm) {
+	if (phba->sli_rev == LPFC_SLI_REV3 && phba->vpd.sli3Feat.cerbm) {
 		if (phba->cfg_enable_bg)
 			mb->un.varCfgPort.cbg = 1; /* configure BlockGuard */
+		mb->un.varCfgPort.cdss = 1; /* Configure Security */
 		mb->un.varCfgPort.cerbm = 1; /* Request HBQs */
 		mb->un.varCfgPort.ccrp = 1; /* Command Ring Polling */
 		mb->un.varCfgPort.cinb = 1; /* Interrupt Notification Block */
 		mb->un.varCfgPort.max_hbq = lpfc_sli_hbq_count();
 		if (phba->max_vpi && phba->cfg_enable_npiv &&
 		    phba->vpd.sli3Feat.cmv) {
-			mb->un.varCfgPort.max_vpi = phba->max_vpi;
+			mb->un.varCfgPort.max_vpi = LPFC_MAX_VPI;
 			mb->un.varCfgPort.cmv = 1;
 		} else
 			mb->un.varCfgPort.max_vpi = phba->max_vpi = 0;
 	} else
-		phba->sli_rev = 2;
+		phba->sli_rev = LPFC_SLI_REV2;
 	mb->un.varCfgPort.sli_mode = phba->sli_rev;
 
 	/* Now setup pcb */
@@ -1247,7 +1257,7 @@ lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 void
 lpfc_kill_board(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 
 	memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
 	mb->mbxCommand = MBX_KILL_BOARD;
@@ -1307,29 +1317,98 @@ lpfc_mbox_get(struct lpfc_hba * phba)
 }
 
 /**
+ * __lpfc_mbox_cmpl_put - Put mailbox cmd into mailbox cmd complete list
+ * @phba: pointer to lpfc hba data structure.
+ * @mbq: pointer to the driver internal queue element for mailbox command.
+ *
+ * This routine put the completed mailbox command into the mailbox command
+ * complete list. This is the unlocked version of the routine. The mailbox
+ * complete list is used by the driver worker thread to process mailbox
+ * complete callback functions outside the driver interrupt handler.
+ **/
+void
+__lpfc_mbox_cmpl_put(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbq)
+{
+	list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl);
+}
+
+/**
  * lpfc_mbox_cmpl_put - Put mailbox command into mailbox command complete list
  * @phba: pointer to lpfc hba data structure.
  * @mbq: pointer to the driver internal queue element for mailbox command.
  *
  * This routine put the completed mailbox command into the mailbox command
- * complete list. This routine is called from driver interrupt handler
- * context.The mailbox complete list is used by the driver worker thread
- * to process mailbox complete callback functions outside the driver interrupt
- * handler.
+ * complete list. This is the locked version of the routine. The mailbox
+ * complete list is used by the driver worker thread to process mailbox
+ * complete callback functions outside the driver interrupt handler.
  **/
 void
-lpfc_mbox_cmpl_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq)
+lpfc_mbox_cmpl_put(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbq)
 {
 	unsigned long iflag;
 
 	/* This function expects to be called from interrupt context */
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl);
+	__lpfc_mbox_cmpl_put(phba, mbq);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 	return;
 }
 
 /**
+ * lpfc_mbox_cmd_check - Check the validality of a mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: pointer to the driver internal queue element for mailbox command.
+ *
+ * This routine is to check whether a mailbox command is valid to be issued.
+ * This check will be performed by both the mailbox issue API when a client
+ * is to issue a mailbox command to the mailbox transport.
+ *
+ * Return 0 - pass the check, -ENODEV - fail the check
+ **/
+int
+lpfc_mbox_cmd_check(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	/* Mailbox command that have a completion handler must also have a
+	 * vport specified.
+	 */
+	if (mboxq->mbox_cmpl && mboxq->mbox_cmpl != lpfc_sli_def_mbox_cmpl &&
+	    mboxq->mbox_cmpl != lpfc_sli_wake_mbox_wait) {
+		if (!mboxq->vport) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT,
+					"1814 Mbox x%x failed, no vport\n",
+					mboxq->u.mb.mbxCommand);
+			dump_stack();
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+
+/**
+ * lpfc_mbox_dev_check - Check the device state for issuing a mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to check whether the HBA device is ready for posting a
+ * mailbox command. It is used by the mailbox transport API at the time the
+ * to post a mailbox command to the device.
+ *
+ * Return 0 - pass the check, -ENODEV - fail the check
+ **/
+int
+lpfc_mbox_dev_check(struct lpfc_hba *phba)
+{
+	/* If the PCI channel is in offline state, do not issue mbox */
+	if (unlikely(pci_channel_offline(phba->pcidev)))
+		return -ENODEV;
+
+	/* If the HBA is in error state, do not issue mbox */
+	if (phba->link_state == LPFC_HBA_ERROR)
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
  * lpfc_mbox_tmo_val - Retrieve mailbox command timeout value
  * @phba: pointer to lpfc hba data structure.
  * @cmd: mailbox command code.
@@ -1352,6 +1431,475 @@ lpfc_mbox_tmo_val(struct lpfc_hba *phba, int cmd)
 	case MBX_WRITE_WWN:     /* 0x98 */
 	case MBX_LOAD_EXP_ROM:	/* 0x9C */
 		return LPFC_MBOX_TMO_FLASH_CMD;
+	case MBX_SLI4_CONFIG:	/* 0x9b */
+		return LPFC_MBOX_SLI4_CONFIG_TMO;
 	}
 	return LPFC_MBOX_TMO;
 }
+
+/**
+ * lpfc_sli4_mbx_sge_set - Set a sge entry in non-embedded mailbox command
+ * @mbox: pointer to lpfc mbox command.
+ * @sgentry: sge entry index.
+ * @phyaddr: physical address for the sge
+ * @length: Length of the sge.
+ *
+ * This routine sets up an entry in the non-embedded mailbox command at the sge
+ * index location.
+ **/
+void
+lpfc_sli4_mbx_sge_set(struct lpfcMboxq *mbox, uint32_t sgentry,
+		      dma_addr_t phyaddr, uint32_t length)
+{
+	struct lpfc_mbx_nembed_cmd *nembed_sge;
+
+	nembed_sge = (struct lpfc_mbx_nembed_cmd *)
+				&mbox->u.mqe.un.nembed_cmd;
+	nembed_sge->sge[sgentry].pa_lo = putPaddrLow(phyaddr);
+	nembed_sge->sge[sgentry].pa_hi = putPaddrHigh(phyaddr);
+	nembed_sge->sge[sgentry].length = length;
+}
+
+/**
+ * lpfc_sli4_mbx_sge_get - Get a sge entry from non-embedded mailbox command
+ * @mbox: pointer to lpfc mbox command.
+ * @sgentry: sge entry index.
+ *
+ * This routine gets an entry from the non-embedded mailbox command at the sge
+ * index location.
+ **/
+void
+lpfc_sli4_mbx_sge_get(struct lpfcMboxq *mbox, uint32_t sgentry,
+		      struct lpfc_mbx_sge *sge)
+{
+	struct lpfc_mbx_nembed_cmd *nembed_sge;
+
+	nembed_sge = (struct lpfc_mbx_nembed_cmd *)
+				&mbox->u.mqe.un.nembed_cmd;
+	sge->pa_lo = nembed_sge->sge[sgentry].pa_lo;
+	sge->pa_hi = nembed_sge->sge[sgentry].pa_hi;
+	sge->length = nembed_sge->sge[sgentry].length;
+}
+
+/**
+ * lpfc_sli4_mbox_cmd_free - Free a sli4 mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mbox: pointer to lpfc mbox command.
+ *
+ * This routine frees SLI4 specific mailbox command for sending IOCTL command.
+ **/
+void
+lpfc_sli4_mbox_cmd_free(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
+{
+	struct lpfc_mbx_sli4_config *sli4_cfg;
+	struct lpfc_mbx_sge sge;
+	dma_addr_t phyaddr;
+	uint32_t sgecount, sgentry;
+
+	sli4_cfg = &mbox->u.mqe.un.sli4_config;
+
+	/* For embedded mbox command, just free the mbox command */
+	if (bf_get(lpfc_mbox_hdr_emb, &sli4_cfg->header.cfg_mhdr)) {
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+
+	/* For non-embedded mbox command, we need to free the pages first */
+	sgecount = bf_get(lpfc_mbox_hdr_sge_cnt, &sli4_cfg->header.cfg_mhdr);
+	/* There is nothing we can do if there is no sge address array */
+	if (unlikely(!mbox->sge_array)) {
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+	/* Each non-embedded DMA memory was allocated in the length of a page */
+	for (sgentry = 0; sgentry < sgecount; sgentry++) {
+		lpfc_sli4_mbx_sge_get(mbox, sgentry, &sge);
+		phyaddr = getPaddr(sge.pa_hi, sge.pa_lo);
+		dma_free_coherent(&phba->pcidev->dev, PAGE_SIZE,
+				  mbox->sge_array->addr[sgentry], phyaddr);
+	}
+	/* Free the sge address array memory */
+	kfree(mbox->sge_array);
+	/* Finally, free the mailbox command itself */
+	mempool_free(mbox, phba->mbox_mem_pool);
+}
+
+/**
+ * lpfc_sli4_config - Initialize the  SLI4 Config Mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mbox: pointer to lpfc mbox command.
+ * @subsystem: The sli4 config sub mailbox subsystem.
+ * @opcode: The sli4 config sub mailbox command opcode.
+ * @length: Length of the sli4 config mailbox command.
+ *
+ * This routine sets up the header fields of SLI4 specific mailbox command
+ * for sending IOCTL command.
+ *
+ * Return: the actual length of the mbox command allocated (mostly useful
+ *         for none embedded mailbox command).
+ **/
+int
+lpfc_sli4_config(struct lpfc_hba *phba, struct lpfcMboxq *mbox,
+		 uint8_t subsystem, uint8_t opcode, uint32_t length, bool emb)
+{
+	struct lpfc_mbx_sli4_config *sli4_config;
+	union lpfc_sli4_cfg_shdr *cfg_shdr = NULL;
+	uint32_t alloc_len;
+	uint32_t resid_len;
+	uint32_t pagen, pcount;
+	void *viraddr;
+	dma_addr_t phyaddr;
+
+	/* Set up SLI4 mailbox command header fields */
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_SLI4_CONFIG);
+
+	/* Set up SLI4 ioctl command header fields */
+	sli4_config = &mbox->u.mqe.un.sli4_config;
+
+	/* Setup for the embedded mbox command */
+	if (emb) {
+		/* Set up main header fields */
+		bf_set(lpfc_mbox_hdr_emb, &sli4_config->header.cfg_mhdr, 1);
+		sli4_config->header.cfg_mhdr.payload_length =
+					LPFC_MBX_CMD_HDR_LENGTH + length;
+		/* Set up sub-header fields following main header */
+		bf_set(lpfc_mbox_hdr_opcode,
+			&sli4_config->header.cfg_shdr.request, opcode);
+		bf_set(lpfc_mbox_hdr_subsystem,
+			&sli4_config->header.cfg_shdr.request, subsystem);
+		sli4_config->header.cfg_shdr.request.request_length = length;
+		return length;
+	}
+
+	/* Setup for the none-embedded mbox command */
+	pcount = (PAGE_ALIGN(length))/PAGE_SIZE;
+	pcount = (pcount > LPFC_SLI4_MBX_SGE_MAX_PAGES) ?
+				LPFC_SLI4_MBX_SGE_MAX_PAGES : pcount;
+	/* Allocate record for keeping SGE virtual addresses */
+	mbox->sge_array = kmalloc(sizeof(struct lpfc_mbx_nembed_sge_virt),
+				  GFP_KERNEL);
+	if (!mbox->sge_array)
+		return 0;
+
+	for (pagen = 0, alloc_len = 0; pagen < pcount; pagen++) {
+		/* The DMA memory is always allocated in the length of a
+		 * page even though the last SGE might not fill up to a
+		 * page, this is used as a priori size of PAGE_SIZE for
+		 * the later DMA memory free.
+		 */
+		viraddr = dma_alloc_coherent(&phba->pcidev->dev, PAGE_SIZE,
+					     &phyaddr, GFP_KERNEL);
+		/* In case of malloc fails, proceed with whatever we have */
+		if (!viraddr)
+			break;
+		mbox->sge_array->addr[pagen] = viraddr;
+		/* Keep the first page for later sub-header construction */
+		if (pagen == 0)
+			cfg_shdr = (union lpfc_sli4_cfg_shdr *)viraddr;
+		resid_len = length - alloc_len;
+		if (resid_len > PAGE_SIZE) {
+			lpfc_sli4_mbx_sge_set(mbox, pagen, phyaddr,
+					      PAGE_SIZE);
+			alloc_len += PAGE_SIZE;
+		} else {
+			lpfc_sli4_mbx_sge_set(mbox, pagen, phyaddr,
+					      resid_len);
+			alloc_len = length;
+		}
+	}
+
+	/* Set up main header fields in mailbox command */
+	sli4_config->header.cfg_mhdr.payload_length = alloc_len;
+	bf_set(lpfc_mbox_hdr_sge_cnt, &sli4_config->header.cfg_mhdr, pagen);
+
+	/* Set up sub-header fields into the first page */
+	if (pagen > 0) {
+		bf_set(lpfc_mbox_hdr_opcode, &cfg_shdr->request, opcode);
+		bf_set(lpfc_mbox_hdr_subsystem, &cfg_shdr->request, subsystem);
+		cfg_shdr->request.request_length =
+				alloc_len - sizeof(union  lpfc_sli4_cfg_shdr);
+	}
+	/* The sub-header is in DMA memory, which needs endian converstion */
+	lpfc_sli_pcimem_bcopy(cfg_shdr, cfg_shdr,
+			      sizeof(union  lpfc_sli4_cfg_shdr));
+
+	return alloc_len;
+}
+
+/**
+ * lpfc_sli4_mbox_opcode_get - Get the opcode from a sli4 mailbox command
+ * @phba: pointer to lpfc hba data structure.
+ * @mbox: pointer to lpfc mbox command.
+ *
+ * This routine gets the opcode from a SLI4 specific mailbox command for
+ * sending IOCTL command. If the mailbox command is not MBX_SLI4_CONFIG
+ * (0x9B) or if the IOCTL sub-header is not present, opcode 0x0 shall be
+ * returned.
+ **/
+uint8_t
+lpfc_sli4_mbox_opcode_get(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
+{
+	struct lpfc_mbx_sli4_config *sli4_cfg;
+	union lpfc_sli4_cfg_shdr *cfg_shdr;
+
+	if (mbox->u.mb.mbxCommand != MBX_SLI4_CONFIG)
+		return 0;
+	sli4_cfg = &mbox->u.mqe.un.sli4_config;
+
+	/* For embedded mbox command, get opcode from embedded sub-header*/
+	if (bf_get(lpfc_mbox_hdr_emb, &sli4_cfg->header.cfg_mhdr)) {
+		cfg_shdr = &mbox->u.mqe.un.sli4_config.header.cfg_shdr;
+		return bf_get(lpfc_mbox_hdr_opcode, &cfg_shdr->request);
+	}
+
+	/* For non-embedded mbox command, get opcode from first dma page */
+	if (unlikely(!mbox->sge_array))
+		return 0;
+	cfg_shdr = (union lpfc_sli4_cfg_shdr *)mbox->sge_array->addr[0];
+	return bf_get(lpfc_mbox_hdr_opcode, &cfg_shdr->request);
+}
+
+/**
+ * lpfc_request_features: Configure SLI4 REQUEST_FEATURES mailbox
+ * @mboxq: pointer to lpfc mbox command.
+ *
+ * This routine sets up the mailbox for an SLI4 REQUEST_FEATURES
+ * mailbox command.
+ **/
+void
+lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
+{
+	/* Set up SLI4 mailbox command header fields */
+	memset(mboxq, 0, sizeof(LPFC_MBOXQ_t));
+	bf_set(lpfc_mqe_command, &mboxq->u.mqe, MBX_SLI4_REQ_FTRS);
+
+	/* Set up host requested features. */
+	bf_set(lpfc_mbx_rq_ftr_rq_fcpi, &mboxq->u.mqe.un.req_ftrs, 1);
+
+	/* Virtual fabrics and FIPs are not supported yet. */
+	bf_set(lpfc_mbx_rq_ftr_rq_ifip, &mboxq->u.mqe.un.req_ftrs, 0);
+
+	/* Enable DIF (block guard) only if configured to do so. */
+	if (phba->cfg_enable_bg)
+		bf_set(lpfc_mbx_rq_ftr_rq_dif, &mboxq->u.mqe.un.req_ftrs, 1);
+
+	/* Enable NPIV only if configured to do so. */
+	if (phba->max_vpi && phba->cfg_enable_npiv)
+		bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1);
+
+	return;
+}
+
+/**
+ * lpfc_init_vfi - Initialize the INIT_VFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vport: Vport associated with the VF.
+ *
+ * This routine initializes @mbox to all zeros and then fills in the mailbox
+ * fields from @vport. INIT_VFI configures virtual fabrics identified by VFI
+ * in the context of an FCF. The driver issues this command to setup a VFI
+ * before issuing a FLOGI to login to the VSAN. The driver should also issue a
+ * REG_VFI after a successful VSAN login.
+ **/
+void
+lpfc_init_vfi(struct lpfcMboxq *mbox, struct lpfc_vport *vport)
+{
+	struct lpfc_mbx_init_vfi *init_vfi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	init_vfi = &mbox->u.mqe.un.init_vfi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_INIT_VFI);
+	bf_set(lpfc_init_vfi_vr, init_vfi, 1);
+	bf_set(lpfc_init_vfi_vt, init_vfi, 1);
+	bf_set(lpfc_init_vfi_vfi, init_vfi, vport->vfi + vport->phba->vfi_base);
+	bf_set(lpfc_init_vfi_fcfi, init_vfi, vport->phba->fcf.fcfi);
+}
+
+/**
+ * lpfc_reg_vfi - Initialize the REG_VFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vport: vport associated with the VF.
+ * @phys: BDE DMA bus address used to send the service parameters to the HBA.
+ *
+ * This routine initializes @mbox to all zeros and then fills in the mailbox
+ * fields from @vport, and uses @buf as a DMAable buffer to send the vport's
+ * fc service parameters to the HBA for this VFI. REG_VFI configures virtual
+ * fabrics identified by VFI in the context of an FCF.
+ **/
+void
+lpfc_reg_vfi(struct lpfcMboxq *mbox, struct lpfc_vport *vport, dma_addr_t phys)
+{
+	struct lpfc_mbx_reg_vfi *reg_vfi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	reg_vfi = &mbox->u.mqe.un.reg_vfi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_VFI);
+	bf_set(lpfc_reg_vfi_vp, reg_vfi, 1);
+	bf_set(lpfc_reg_vfi_vfi, reg_vfi, vport->vfi + vport->phba->vfi_base);
+	bf_set(lpfc_reg_vfi_fcfi, reg_vfi, vport->phba->fcf.fcfi);
+	bf_set(lpfc_reg_vfi_vpi, reg_vfi, vport->vpi + vport->phba->vpi_base);
+	reg_vfi->bde.addrHigh = putPaddrHigh(phys);
+	reg_vfi->bde.addrLow = putPaddrLow(phys);
+	reg_vfi->bde.tus.f.bdeSize = sizeof(vport->fc_sparam);
+	reg_vfi->bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+	bf_set(lpfc_reg_vfi_nport_id, reg_vfi, vport->fc_myDID);
+}
+
+/**
+ * lpfc_init_vpi - Initialize the INIT_VPI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vpi: VPI to be initialized.
+ *
+ * The INIT_VPI mailbox command supports virtual N_Ports. The driver uses the
+ * command to activate a virtual N_Port. The HBA assigns a MAC address to use
+ * with the virtual N Port.  The SLI Host issues this command before issuing a
+ * FDISC to connect to the Fabric. The SLI Host should issue a REG_VPI after a
+ * successful virtual NPort login.
+ **/
+void
+lpfc_init_vpi(struct lpfcMboxq *mbox, uint16_t vpi)
+{
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_INIT_VPI);
+	bf_set(lpfc_init_vpi_vpi, &mbox->u.mqe.un.init_vpi, vpi);
+}
+
+/**
+ * lpfc_unreg_vfi - Initialize the UNREG_VFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @vfi: VFI to be unregistered.
+ *
+ * The UNREG_VFI mailbox command causes the SLI Host to put a virtual fabric
+ * (logical NPort) into the inactive state. The SLI Host must have logged out
+ * and unregistered all remote N_Ports to abort any activity on the virtual
+ * fabric. The SLI Port posts the mailbox response after marking the virtual
+ * fabric inactive.
+ **/
+void
+lpfc_unreg_vfi(struct lpfcMboxq *mbox, uint16_t vfi)
+{
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_UNREG_VFI);
+	bf_set(lpfc_unreg_vfi_vfi, &mbox->u.mqe.un.unreg_vfi, vfi);
+}
+
+/**
+ * lpfc_dump_fcoe_param - Dump config region 23 to get FCoe parameters.
+ * @phba: pointer to the hba structure containing.
+ * @mbox: pointer to lpfc mbox command to initialize.
+ *
+ * This function create a SLI4 dump mailbox command to dump FCoE
+ * parameters stored in region 23.
+ **/
+int
+lpfc_dump_fcoe_param(struct lpfc_hba *phba,
+		struct lpfcMboxq *mbox)
+{
+	struct lpfc_dmabuf *mp = NULL;
+	MAILBOX_t *mb;
+
+	memset(mbox, 0, sizeof(*mbox));
+	mb = &mbox->u.mb;
+
+	mp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (mp)
+		mp->virt = lpfc_mbuf_alloc(phba, 0, &mp->phys);
+
+	if (!mp || !mp->virt) {
+		kfree(mp);
+		/* dump_fcoe_param failed to allocate memory */
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+			"2569 lpfc_dump_fcoe_param: memory"
+			" allocation failed \n");
+		return 1;
+	}
+
+	memset(mp->virt, 0, LPFC_BPL_SIZE);
+	INIT_LIST_HEAD(&mp->list);
+
+	/* save address for completion */
+	mbox->context1 = (uint8_t *) mp;
+
+	mb->mbxCommand = MBX_DUMP_MEMORY;
+	mb->un.varDmp.type = DMP_NV_PARAMS;
+	mb->un.varDmp.region_id = DMP_REGION_FCOEPARAM;
+	mb->un.varDmp.sli4_length = DMP_FCOEPARAM_RGN_SIZE;
+	mb->un.varWords[3] = putPaddrLow(mp->phys);
+	mb->un.varWords[4] = putPaddrHigh(mp->phys);
+	return 0;
+}
+
+/**
+ * lpfc_reg_fcfi - Initialize the REG_FCFI mailbox command
+ * @phba: pointer to the hba structure containing the FCF index and RQ ID.
+ * @mbox: pointer to lpfc mbox command to initialize.
+ *
+ * The REG_FCFI mailbox command supports Fibre Channel Forwarders (FCFs). The
+ * SLI Host uses the command to activate an FCF after it has acquired FCF
+ * information via a READ_FCF mailbox command. This mailbox command also is used
+ * to indicate where received unsolicited frames from this FCF will be sent. By
+ * default this routine will set up the FCF to forward all unsolicited frames
+ * the the RQ ID passed in the @phba. This can be overridden by the caller for
+ * more complicated setups.
+ **/
+void
+lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
+{
+	struct lpfc_mbx_reg_fcfi *reg_fcfi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	reg_fcfi = &mbox->u.mqe.un.reg_fcfi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_FCFI);
+	bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi, phba->sli4_hba.hdr_rq->queue_id);
+	bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_info_index, reg_fcfi, phba->fcf.fcf_indx);
+	/* reg_fcf addr mode is bit wise inverted value of fcf addr_mode */
+	bf_set(lpfc_reg_fcfi_mam, reg_fcfi,
+		(~phba->fcf.addr_mode) & 0x3);
+	if (phba->fcf.fcf_flag & FCF_VALID_VLAN) {
+		bf_set(lpfc_reg_fcfi_vv, reg_fcfi, 1);
+		bf_set(lpfc_reg_fcfi_vlan_tag, reg_fcfi, phba->fcf.vlan_id);
+	}
+}
+
+/**
+ * lpfc_unreg_fcfi - Initialize the UNREG_FCFI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @fcfi: FCFI to be unregistered.
+ *
+ * The UNREG_FCFI mailbox command supports Fibre Channel Forwarders (FCFs).
+ * The SLI Host uses the command to inactivate an FCFI.
+ **/
+void
+lpfc_unreg_fcfi(struct lpfcMboxq *mbox, uint16_t fcfi)
+{
+	memset(mbox, 0, sizeof(*mbox));
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_UNREG_FCFI);
+	bf_set(lpfc_unreg_fcfi, &mbox->u.mqe.un.unreg_fcfi, fcfi);
+}
+
+/**
+ * lpfc_resume_rpi - Initialize the RESUME_RPI mailbox command
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @ndlp: The nodelist structure that describes the RPI to resume.
+ *
+ * The RESUME_RPI mailbox command is used to restart I/O to an RPI after a
+ * link event.
+ **/
+void
+lpfc_resume_rpi(struct lpfcMboxq *mbox, struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_mbx_resume_rpi *resume_rpi;
+
+	memset(mbox, 0, sizeof(*mbox));
+	resume_rpi = &mbox->u.mqe.un.resume_rpi;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_RESUME_RPI);
+	bf_set(lpfc_resume_rpi_rpi, resume_rpi, ndlp->nlp_rpi);
+	bf_set(lpfc_resume_rpi_vpi, resume_rpi,
+	       ndlp->vport->vpi + ndlp->vport->phba->vpi_base);
+	bf_set(lpfc_resume_rpi_vfi, resume_rpi,
+	       ndlp->vport->vfi + ndlp->vport->phba->vfi_base);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 6ba5a72..6efe459 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1192,7 +1192,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
 
 	/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
 	if ((mb = phba->sli.mbox_active)) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			lpfc_nlp_put(ndlp);
 			mb->context2 = NULL;
@@ -1202,7 +1202,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
 
 	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
-		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+		if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
 			mp = (struct lpfc_dmabuf *) (mb->context1);
 			if (mp) {
@@ -1253,7 +1253,7 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t did  = mb->un.varWords[1];
 
 	if (mb->mbxStatus) {
@@ -1880,11 +1880,12 @@ lpfc_cmpl_reglogin_npr_node(struct lpfc_vport *vport,
 			    void *arg, uint32_t evt)
 {
 	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
-	MAILBOX_t    *mb = &pmb->mb;
+	MAILBOX_t    *mb = &pmb->u.mb;
 
-	if (!mb->mbxStatus)
+	if (!mb->mbxStatus) {
 		ndlp->nlp_rpi = mb->un.varWords[0];
-	else {
+		ndlp->nlp_flag |= NLP_RPI_VALID;
+	} else {
 		if (ndlp->nlp_flag & NLP_NODEV_REMOVE) {
 			lpfc_drop_node(vport, ndlp);
 			return NLP_STE_FREED_NODE;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index cf42ada..b53af99 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -43,24 +43,7 @@
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
 #include "lpfc_debugfs.h"
-
-/*
- * Define macro to log: Mailbox command x%x cannot issue Data
- * This allows multiple uses of lpfc_msgBlk0311
- * w/o perturbing log msg utility.
- */
-#define LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) \
-			lpfc_printf_log(phba, \
-				KERN_INFO, \
-				LOG_MBOX | LOG_SLI, \
-				"(%d):0311 Mailbox command x%x cannot " \
-				"issue Data: x%x x%x x%x\n", \
-				pmbox->vport ? pmbox->vport->vpi : 0, \
-				pmbox->mb.mbxCommand,		\
-				phba->pport->port_state,	\
-				psli->sli_flag,	\
-				flag)
-
+#include "lpfc_vport.h"
 
 /* There are only four IOCB completion types. */
 typedef enum _lpfc_iocb_type {
@@ -843,7 +826,7 @@ lpfc_sli_ring_map(struct lpfc_hba *phba)
 	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!pmb)
 		return -ENOMEM;
-	pmbox = &pmb->mb;
+	pmbox = &pmb->u.mb;
 	phba->link_state = LPFC_INIT_MBX_CMDS;
 	for (i = 0; i < psli->num_rings; i++) {
 		lpfc_config_ring(phba, i, pmb);
@@ -1652,6 +1635,15 @@ lpfc_sli_chk_mbx_command(uint8_t mbxCommand)
 	case MBX_HEARTBEAT:
 	case MBX_PORT_CAPABILITIES:
 	case MBX_PORT_IOV_CONTROL:
+	case MBX_SLI4_CONFIG:
+	case MBX_SLI4_REQ_FTRS:
+	case MBX_REG_FCFI:
+	case MBX_UNREG_FCFI:
+	case MBX_REG_VFI:
+	case MBX_UNREG_VFI:
+	case MBX_INIT_VPI:
+	case MBX_INIT_VFI:
+	case MBX_RESUME_RPI:
 		ret = mbxCommand;
 		break;
 	default:
@@ -1672,7 +1664,7 @@ lpfc_sli_chk_mbx_command(uint8_t mbxCommand)
  * will wake up thread waiting on the wait queue pointed by context1
  * of the mailbox.
  **/
-static void
+void
 lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
 {
 	wait_queue_head_t *pdone_q;
@@ -1706,7 +1698,7 @@ void
 lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_dmabuf *mp;
-	uint16_t rpi;
+	uint16_t rpi, vpi;
 	int rc;
 
 	mp = (struct lpfc_dmabuf *) (pmb->context1);
@@ -1716,24 +1708,30 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		kfree(mp);
 	}
 
+	if ((pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) &&
+	    (phba->sli_rev == LPFC_SLI_REV4))
+		lpfc_sli4_free_rpi(phba, pmb->u.mb.un.varUnregLogin.rpi);
+
 	/*
 	 * If a REG_LOGIN succeeded  after node is destroyed or node
 	 * is in re-discovery driver need to cleanup the RPI.
 	 */
 	if (!(phba->pport->load_flag & FC_UNLOADING) &&
-	    pmb->mb.mbxCommand == MBX_REG_LOGIN64 &&
-	    !pmb->mb.mbxStatus) {
-
-		rpi = pmb->mb.un.varWords[0];
-		lpfc_unreg_login(phba, pmb->mb.un.varRegLogin.vpi, rpi, pmb);
+	    pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 &&
+	    !pmb->u.mb.mbxStatus) {
+		rpi = pmb->u.mb.un.varWords[0];
+		vpi = pmb->u.mb.un.varRegLogin.vpi - phba->vpi_base;
+		lpfc_unreg_login(phba, vpi, rpi, pmb);
 		pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 		rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
 		if (rc != MBX_NOT_FINISHED)
 			return;
 	}
 
-	mempool_free(pmb, phba->mbox_mem_pool);
-	return;
+	if (bf_get(lpfc_mqe_command, &pmb->u.mqe) == MBX_SLI4_CONFIG)
+		lpfc_sli4_mbox_cmd_free(phba, pmb);
+	else
+		mempool_free(pmb, phba->mbox_mem_pool);
 }
 
 /**
@@ -1770,7 +1768,7 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
 		if (pmb == NULL)
 			break;
 
-		pmbox = &pmb->mb;
+		pmbox = &pmb->u.mb;
 
 		if (pmbox->mbxCommand != MBX_HEARTBEAT) {
 			if (pmb->vport) {
@@ -1799,9 +1797,10 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
 			/* Unknow mailbox command compl */
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 					"(%d):0323 Unknown Mailbox command "
-					"%x Cmpl\n",
+					"x%x (x%x) Cmpl\n",
 					pmb->vport ? pmb->vport->vpi : 0,
-					pmbox->mbxCommand);
+					pmbox->mbxCommand,
+					lpfc_sli4_mbox_opcode_get(phba, pmb));
 			phba->link_state = LPFC_HBA_ERROR;
 			phba->work_hs = HS_FFER3;
 			lpfc_handle_eratt(phba);
@@ -1816,29 +1815,29 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
 						LOG_MBOX | LOG_SLI,
 						"(%d):0305 Mbox cmd cmpl "
 						"error - RETRYing Data: x%x "
-						"x%x x%x x%x\n",
+						"(x%x) x%x x%x x%x\n",
 						pmb->vport ? pmb->vport->vpi :0,
 						pmbox->mbxCommand,
+						lpfc_sli4_mbox_opcode_get(phba,
+									  pmb),
 						pmbox->mbxStatus,
 						pmbox->un.varWords[0],
 						pmb->vport->port_state);
 				pmbox->mbxStatus = 0;
 				pmbox->mbxOwner = OWN_HOST;
-				spin_lock_irq(&phba->hbalock);
-				phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
-				spin_unlock_irq(&phba->hbalock);
 				rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
-				if (rc == MBX_SUCCESS)
+				if (rc != MBX_NOT_FINISHED)
 					continue;
 			}
 		}
 
 		/* Mailbox cmd <cmd> Cmpl <cmpl> */
 		lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
-				"(%d):0307 Mailbox cmd x%x Cmpl x%p "
+				"(%d):0307 Mailbox cmd x%x (x%x) Cmpl x%p "
 				"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n",
 				pmb->vport ? pmb->vport->vpi : 0,
 				pmbox->mbxCommand,
+				lpfc_sli4_mbox_opcode_get(phba, pmb),
 				pmb->mbox_cmpl,
 				*((uint32_t *) pmbox),
 				pmbox->un.varWords[0],
@@ -3377,10 +3376,10 @@ lpfc_sli_brdkill(struct lpfc_hba *phba)
 	}
 	spin_lock_irq(&phba->hbalock);
 	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	psli->mbox_active = NULL;
 	phba->link_flag &= ~LS_IGNORE_ERATT;
 	spin_unlock_irq(&phba->hbalock);
 
-	psli->mbox_active = NULL;
 	lpfc_hba_down_post(phba);
 	phba->link_state = LPFC_HBA_ERROR;
 
@@ -3790,7 +3789,7 @@ lpfc_sli_hbq_setup(struct lpfc_hba *phba)
 	if (!pmb)
 		return -ENOMEM;
 
-	pmbox = &pmb->mb;
+	pmbox = &pmb->u.mb;
 
 	/* Initialize the struct lpfc_sli_hbq structure for each hbq */
 	phba->link_state = LPFC_INIT_MBX_CMDS;
@@ -3917,33 +3916,43 @@ lpfc_sli_config_port(struct lpfc_hba *phba, int sli_mode)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0442 Adapter failed to init, mbxCmd x%x "
 				"CONFIG_PORT, mbxStatus x%x Data: x%x\n",
-				pmb->mb.mbxCommand, pmb->mb.mbxStatus, 0);
+				pmb->u.mb.mbxCommand, pmb->u.mb.mbxStatus, 0);
 			spin_lock_irq(&phba->hbalock);
-			phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE;
+			phba->sli.sli_flag &= ~LPFC_SLI_ACTIVE;
 			spin_unlock_irq(&phba->hbalock);
 			rc = -ENXIO;
-		} else
+		} else {
+			/* Allow asynchronous mailbox command to go through */
+			spin_lock_irq(&phba->hbalock);
+			phba->sli.sli_flag &= ~LPFC_SLI_ASYNC_MBX_BLK;
+			spin_unlock_irq(&phba->hbalock);
 			done = 1;
+		}
 	}
 	if (!done) {
 		rc = -EINVAL;
 		goto do_prep_failed;
 	}
-	if (pmb->mb.un.varCfgPort.sli_mode == 3) {
-		if (!pmb->mb.un.varCfgPort.cMA) {
+	if (pmb->u.mb.un.varCfgPort.sli_mode == 3) {
+		if (!pmb->u.mb.un.varCfgPort.cMA) {
 			rc = -ENXIO;
 			goto do_prep_failed;
 		}
-		if (phba->max_vpi && pmb->mb.un.varCfgPort.gmv) {
+		if (phba->max_vpi && pmb->u.mb.un.varCfgPort.gmv) {
 			phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
-			phba->max_vpi = pmb->mb.un.varCfgPort.max_vpi;
+			phba->max_vpi = pmb->u.mb.un.varCfgPort.max_vpi;
+			phba->max_vports = (phba->max_vpi > phba->max_vports) ?
+				phba->max_vpi : phba->max_vports;
+
 		} else
 			phba->max_vpi = 0;
-		if (pmb->mb.un.varCfgPort.gerbm)
+		if (pmb->u.mb.un.varCfgPort.gdss)
+			phba->sli3_options |= LPFC_SLI3_DSS_ENABLED;
+		if (pmb->u.mb.un.varCfgPort.gerbm)
 			phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED;
-		if (pmb->mb.un.varCfgPort.gcrp)
+		if (pmb->u.mb.un.varCfgPort.gcrp)
 			phba->sli3_options |= LPFC_SLI3_CRP_ENABLED;
-		if (pmb->mb.un.varCfgPort.ginb) {
+		if (pmb->u.mb.un.varCfgPort.ginb) {
 			phba->sli3_options |= LPFC_SLI3_INB_ENABLED;
 			phba->hbq_get = phba->mbox->us.s3_inb_pgp.hbq_get;
 			phba->port_gp = phba->mbox->us.s3_inb_pgp.port;
@@ -3959,7 +3968,7 @@ lpfc_sli_config_port(struct lpfc_hba *phba, int sli_mode)
 		}
 
 		if (phba->cfg_enable_bg) {
-			if (pmb->mb.un.varCfgPort.gbg)
+			if (pmb->u.mb.un.varCfgPort.gbg)
 				phba->sli3_options |= LPFC_SLI3_BG_ENABLED;
 			else
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -4054,8 +4063,9 @@ lpfc_sli_hba_setup(struct lpfc_hba *phba)
 		if (rc)
 			goto lpfc_sli_hba_setup_error;
 	}
-
+	spin_lock_irq(&phba->hbalock);
 	phba->sli.sli_flag |= LPFC_PROCESS_LA;
+	spin_unlock_irq(&phba->hbalock);
 
 	rc = lpfc_config_port_post(phba);
 	if (rc)
@@ -4596,7 +4606,7 @@ void
 lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 {
 	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
-	MAILBOX_t *mb = &pmbox->mb;
+	MAILBOX_t *mb = &pmbox->u.mb;
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
 
@@ -6414,6 +6424,52 @@ lpfc_sli_queue_setup(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_sli_mbox_sys_flush - Flush mailbox command sub-system
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine flushes the mailbox command subsystem. It will unconditionally
+ * flush all the mailbox commands in the three possible stages in the mailbox
+ * command sub-system: pending mailbox command queue; the outstanding mailbox
+ * command; and completed mailbox command queue. It is caller's responsibility
+ * to make sure that the driver is in the proper state to flush the mailbox
+ * command sub-system. Namely, the posting of mailbox commands into the
+ * pending mailbox command queue from the various clients must be stopped;
+ * either the HBA is in a state that it will never works on the outstanding
+ * mailbox command (such as in EEH or ERATT conditions) or the outstanding
+ * mailbox command has been completed.
+ **/
+static void
+lpfc_sli_mbox_sys_flush(struct lpfc_hba *phba)
+{
+	LIST_HEAD(completions);
+	struct lpfc_sli *psli = &phba->sli;
+	LPFC_MBOXQ_t *pmb;
+	unsigned long iflag;
+
+	/* Flush all the mailbox commands in the mbox system */
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	/* The pending mailbox command queue */
+	list_splice_init(&phba->sli.mboxq, &completions);
+	/* The outstanding active mailbox command */
+	if (psli->mbox_active) {
+		list_add_tail(&psli->mbox_active->list, &completions);
+		psli->mbox_active = NULL;
+		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	}
+	/* The completed mailbox command queue */
+	list_splice_init(&phba->sli.mboxq_cmpl, &completions);
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	/* Return all flushed mailbox commands with MBX_NOT_FINISHED status */
+	while (!list_empty(&completions)) {
+		list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list);
+		pmb->u.mb.mbxStatus = MBX_NOT_FINISHED;
+		if (pmb->mbox_cmpl)
+			pmb->mbox_cmpl(phba, pmb);
+	}
+}
+
+/**
  * lpfc_sli_host_down - Vport cleanup function
  * @vport: Pointer to virtual port object.
  *
@@ -6506,9 +6562,11 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_dmabuf *buf_ptr;
-	LPFC_MBOXQ_t *pmb;
-	int i;
 	unsigned long flags = 0;
+	int i;
+
+	/* Shutdown the mailbox command sub-system */
+	lpfc_sli_mbox_sys_shutdown(phba);
 
 	lpfc_hba_down_prep(phba);
 
@@ -7773,7 +7831,7 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
 
 		if ((work_ha_copy & HA_MBATT) && (phba->sli.mbox_active)) {
 			pmb = phba->sli.mbox_active;
-			pmbox = &pmb->mb;
+			pmbox = &pmb->u.mb;
 			mbox = phba->mbox;
 			vport = pmb->vport;
 
@@ -8170,6 +8228,183 @@ lpfc_sli4_iocb_param_transfer(struct lpfc_iocbq *pIocbIn,
 }
 
 /**
+ * lpfc_sli4_sp_handle_async_event - Handle an asynchroous event
+ * @phba: Pointer to HBA context object.
+ * @cqe: Pointer to mailbox completion queue entry.
+ *
+ * This routine process a mailbox completion queue entry with asynchrous
+ * event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_async_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
+{
+	struct lpfc_cq_event *cq_event;
+	unsigned long iflags;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"0392 Async Event: word0:x%x, word1:x%x, "
+			"word2:x%x, word3:x%x\n", mcqe->word0,
+			mcqe->mcqe_tag0, mcqe->mcqe_tag1, mcqe->trailer);
+
+	/* Allocate a new internal CQ_EVENT entry */
+	cq_event = lpfc_sli4_cq_event_alloc(phba);
+	if (!cq_event) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0394 Failed to allocate CQ_EVENT entry\n");
+		return false;
+	}
+
+	/* Move the CQE into an asynchronous event entry */
+	memcpy(&cq_event->cqe, mcqe, sizeof(struct lpfc_mcqe));
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	list_add_tail(&cq_event->list, &phba->sli4_hba.sp_asynce_work_queue);
+	/* Set the async event flag */
+	phba->hba_flag |= ASYNC_EVENT;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+
+	return true;
+}
+
+/**
+ * lpfc_sli4_sp_handle_mbox_event - Handle a mailbox completion event
+ * @phba: Pointer to HBA context object.
+ * @cqe: Pointer to mailbox completion queue entry.
+ *
+ * This routine process a mailbox completion queue entry with mailbox
+ * completion event.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
+{
+	uint32_t mcqe_status;
+	MAILBOX_t *mbox, *pmbox;
+	struct lpfc_mqe *mqe;
+	struct lpfc_vport *vport;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_dmabuf *mp;
+	unsigned long iflags;
+	LPFC_MBOXQ_t *pmb;
+	bool workposted = false;
+	int rc;
+
+	/* If not a mailbox complete MCQE, out by checking mailbox consume */
+	if (!bf_get(lpfc_trailer_completed, mcqe))
+		goto out_no_mqe_complete;
+
+	/* Get the reference to the active mbox command */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	pmb = phba->sli.mbox_active;
+	if (unlikely(!pmb)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"1832 No pending MBOX command to handle\n");
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		goto out_no_mqe_complete;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	mqe = &pmb->u.mqe;
+	pmbox = (MAILBOX_t *)&pmb->u.mqe;
+	mbox = phba->mbox;
+	vport = pmb->vport;
+
+	/* Reset heartbeat timer */
+	phba->last_completion_time = jiffies;
+	del_timer(&phba->sli.mbox_tmo);
+
+	/* Move mbox data to caller's mailbox region, do endian swapping */
+	if (pmb->mbox_cmpl && mbox)
+		lpfc_sli_pcimem_bcopy(mbox, mqe, sizeof(struct lpfc_mqe));
+	/* Set the mailbox status with SLI4 range 0x4000 */
+	mcqe_status = bf_get(lpfc_mcqe_status, mcqe);
+	if (mcqe_status != MB_CQE_STATUS_SUCCESS)
+		bf_set(lpfc_mqe_status, mqe,
+		       (LPFC_MBX_ERROR_RANGE | mcqe_status));
+
+	if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
+		pmb->mbox_flag &= ~LPFC_MBX_IMED_UNREG;
+		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_MBOX_VPORT,
+				      "MBOX dflt rpi: status:x%x rpi:x%x",
+				      mcqe_status,
+				      pmbox->un.varWords[0], 0);
+		if (mcqe_status == MB_CQE_STATUS_SUCCESS) {
+			mp = (struct lpfc_dmabuf *)(pmb->context1);
+			ndlp = (struct lpfc_nodelist *)pmb->context2;
+			/* Reg_LOGIN of dflt RPI was successful. Now lets get
+			 * RID of the PPI using the same mbox buffer.
+			 */
+			lpfc_unreg_login(phba, vport->vpi,
+					 pmbox->un.varWords[0], pmb);
+			pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
+			pmb->context1 = mp;
+			pmb->context2 = ndlp;
+			pmb->vport = vport;
+			rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+			if (rc != MBX_BUSY)
+				lpfc_printf_log(phba, KERN_ERR, LOG_MBOX |
+						LOG_SLI, "0385 rc should "
+						"have been MBX_BUSY\n");
+			if (rc != MBX_NOT_FINISHED)
+				goto send_current_mbox;
+		}
+	}
+	spin_lock_irqsave(&phba->pport->work_port_lock, iflags);
+	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
+	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflags);
+
+	/* There is mailbox completion work to do */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	__lpfc_mbox_cmpl_put(phba, pmb);
+	phba->work_ha |= HA_MBATT;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	workposted = true;
+
+send_current_mbox:
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	/* Release the mailbox command posting token */
+	phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	/* Setting active mailbox pointer need to be in sync to flag clear */
+	phba->sli.mbox_active = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	/* Wake up worker thread to post the next pending mailbox command */
+	lpfc_worker_wake_up(phba);
+out_no_mqe_complete:
+	if (bf_get(lpfc_trailer_consumed, mcqe))
+		lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq);
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_handle_mcqe - Process a mailbox completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @cqe: Pointer to mailbox completion queue entry.
+ *
+ * This routine process a mailbox completion queue entry, it invokes the
+ * proper mailbox complete handling or asynchrous event handling routine
+ * according to the MCQE's async bit.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+{
+	struct lpfc_mcqe mcqe;
+	bool workposted;
+
+	/* Copy the mailbox MCQE and convert endian order as needed */
+	lpfc_sli_pcimem_bcopy(cqe, &mcqe, sizeof(struct lpfc_mcqe));
+
+	/* Invoke the proper event handling routine */
+	if (!bf_get(lpfc_trailer_async, &mcqe))
+		workposted = lpfc_sli4_sp_handle_mbox_event(phba, &mcqe);
+	else
+		workposted = lpfc_sli4_sp_handle_async_event(phba, &mcqe);
+	return workposted;
+}
+
+/**
  * lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event
  * @phba: Pointer to HBA context object.
  * @wcqe: Pointer to work-queue completion queue entry.
@@ -9247,6 +9482,112 @@ out:
 }
 
 /**
+ * lpfc_mq_create - Create a mailbox Queue on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @mq: The queue structure to use to create the mailbox queue.
+ *
+ * This function creates a mailbox queue, as detailed in @mq, on a port,
+ * described by @phba by sending a MQ_CREATE mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @cq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. This
+ * function will send the MQ_CREATE mailbox command to the HBA to setup the
+ * mailbox queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return ENOMEM. If the queue create mailbox command
+ * fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
+	       struct lpfc_queue *cq, uint32_t subtype)
+{
+	struct lpfc_mbx_mq_create *mq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_mq_create) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_MQ_CREATE,
+			 length, LPFC_SLI4_MBX_EMBED);
+	mq_create = &mbox->u.mqe.un.mq_create;
+	bf_set(lpfc_mbx_mq_create_num_pages, &mq_create->u.request,
+		    mq->page_count);
+	bf_set(lpfc_mq_context_cq_id, &mq_create->u.request.context,
+		    cq->queue_id);
+	bf_set(lpfc_mq_context_valid, &mq_create->u.request.context, 1);
+	switch (mq->entry_count) {
+	default:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0362 Unsupported MQ count. (%d)\n",
+				mq->entry_count);
+		if (mq->entry_count < 16)
+			return -EINVAL;
+		/* otherwise default to smallest count (drop through) */
+	case 16:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_16);
+		break;
+	case 32:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_32);
+		break;
+	case 64:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_64);
+		break;
+	case 128:
+		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
+		       LPFC_MQ_CNT_128);
+		break;
+	}
+	list_for_each_entry(dmabuf, &mq->page_list, list) {
+		mq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
+					putPaddrLow(dmabuf->phys);
+		mq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+	}
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) &mq_create->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2502 MQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	mq->queue_id = bf_get(lpfc_mbx_mq_create_q_id, &mq_create->u.response);
+	if (mq->queue_id == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+	mq->type = LPFC_MQ;
+	mq->subtype = subtype;
+	mq->host_index = 0;
+	mq->hba_index = 0;
+
+	/* link the mq onto the parent cq child list */
+	list_add_tail(&mq->list, &cq->child_list);
+out:
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, phba->mbox_mem_pool);
+	return status;
+}
+
+/**
  * lpfc_wq_create - Create a Work Queue on the HBA
  * @phba: HBA structure that indicates port to create a queue on.
  * @wq: The queue structure to use to create the work queue.
@@ -9615,6 +9956,60 @@ lpfc_cq_destroy(struct lpfc_hba *phba, struct lpfc_queue *cq)
 }
 
 /**
+ * lpfc_mq_destroy - Destroy a Mailbox Queue on the HBA
+ * @qm: The queue structure associated with the queue to destroy.
+ *
+ * This function destroys a queue, as detailed in @mq by sending an mailbox
+ * command, specific to the type of queue, to the HBA.
+ *
+ * The @mq struct is used to get the queue ID of the queue to destroy.
+ *
+ * On success this function will return a zero. If the queue destroy mailbox
+ * command fails this function will return ENXIO.
+ **/
+uint32_t
+lpfc_mq_destroy(struct lpfc_hba *phba, struct lpfc_queue *mq)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, status = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	if (!mq)
+		return -ENODEV;
+	mbox = mempool_alloc(mq->phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	length = (sizeof(struct lpfc_mbx_mq_destroy) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_MQ_DESTROY,
+			 length, LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_mq_destroy_q_id, &mbox->u.mqe.un.mq_destroy.u.request,
+	       mq->queue_id);
+	mbox->vport = mq->phba->pport;
+	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	rc = lpfc_sli_issue_mbox(mq->phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *)
+		&mbox->u.mqe.un.mq_destroy.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2507 MQ_DESTROY mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+	}
+	/* Remove mq from any list */
+	list_del_init(&mq->list);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mbox, mq->phba->mbox_mem_pool);
+	return status;
+}
+
+/**
  * lpfc_wq_destroy - Destroy a Work Queue on the HBA
  * @wq: The queue structure associated with the queue to destroy.
  *
-- 
cgit v0.10.2


From 6fb120a7ed882aae9636545142a51cf3182a3ace Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:52:59 -0400
Subject: [SCSI] lpfc 8.3.2 : Addition of SLI4 Interface - FCOE Discovery
 support

SLI4 supports both FC and FCOE, with some extended topology objects.
This patch adss support for the objects, and updates the disovery
engines for their use.

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 463104d..270a4c6 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -2924,6 +2924,14 @@ LPFC_ATTR_R(enable_hba_heartbeat, 1, 0, 1, "Enable HBA Heartbeat.");
 */
 LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support");
 
+/*
+# lpfc_enable_fip: When set, FIP is required to start discovery. If not
+# set, the driver will add an FCF record manually if the port has no
+# FCF records available and start discovery.
+# Value range is [0,1]. Default value is 1 (enabled)
+*/
+LPFC_ATTR_RW(enable_fip, 0, 0, 1, "Enable FIP Discovery");
+
 
 /*
 # lpfc_prot_mask: i
@@ -2990,6 +2998,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_peer_port_login,
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
+	&dev_attr_lpfc_enable_fip,
 	&dev_attr_lpfc_fcp_class,
 	&dev_attr_lpfc_use_adisc,
 	&dev_attr_lpfc_ack0,
@@ -3042,6 +3051,7 @@ struct device_attribute *lpfc_vport_attrs[] = {
 	&dev_attr_lpfc_lun_queue_depth,
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
+	&dev_attr_lpfc_enable_fip,
 	&dev_attr_lpfc_hba_queue_depth,
 	&dev_attr_lpfc_peer_port_login,
 	&dev_attr_lpfc_restrict_login,
@@ -4167,26 +4177,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	phba->cfg_soft_wwpn = 0L;
 	lpfc_sg_seg_cnt_init(phba, lpfc_sg_seg_cnt);
 	lpfc_prot_sg_seg_cnt_init(phba, lpfc_prot_sg_seg_cnt);
-	/*
-	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
-	 * used to create the sg_dma_buf_pool must be dynamically calculated.
-	 * 2 segments are added since the IOCB needs a command and response bde.
-	 */
-	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-			sizeof(struct fcp_rsp) +
-			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
-
-	if (phba->cfg_enable_bg) {
-		phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT;
-		phba->cfg_sg_dma_buf_size +=
-			phba->cfg_prot_sg_seg_cnt * sizeof(struct ulp_bde64);
-	}
-
-	/* Also reinitialize the host templates with new values. */
-	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-
 	lpfc_hba_queue_depth_init(phba, lpfc_hba_queue_depth);
+	lpfc_enable_fip_init(phba, lpfc_enable_fip);
+	lpfc_hba_log_verbose_init(phba, lpfc_log_verbose);
+
 	return;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index e0f1cd4..d2a9229 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -23,6 +23,8 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *);
 struct fc_rport;
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
 void lpfc_dump_wakeup_param(struct lpfc_hba *, LPFC_MBOXQ_t *);
+void lpfc_dump_static_vport(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
+int lpfc_dump_fcoe_param(struct lpfc_hba *, struct lpfcMboxq *);
 void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_config_async(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
 
@@ -108,6 +110,7 @@ int lpfc_issue_els_adisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
 int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
 int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *);
 int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t);
+int lpfc_issue_fabric_reglogin(struct lpfc_vport *);
 int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
 int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
 int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2c034a5..a3b56d7 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -388,6 +388,75 @@ fail:
 }
 
 /**
+ * lpfc_issue_reg_vfi - Register VFI for this vport's fabric login
+ * @vport: pointer to a host virtual N_Port data structure.
+ *
+ * This routine issues a REG_VFI mailbox for the vfi, vpi, fcfi triplet for
+ * the @vport. This mailbox command is necessary for FCoE only.
+ *
+ * Return code
+ *   0 - successfully issued REG_VFI for @vport
+ *   A failure code otherwise.
+ **/
+static int
+lpfc_issue_reg_vfi(struct lpfc_vport *vport)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_nodelist *ndlp;
+	struct serv_parm *sp;
+	struct lpfc_dmabuf *dmabuf;
+	int rc = 0;
+
+	sp = &phba->fc_fabparam;
+	ndlp = lpfc_findnode_did(vport, Fabric_DID);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		rc = -ENODEV;
+		goto fail;
+	}
+
+	dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!dmabuf) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	dmabuf->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &dmabuf->phys);
+	if (!dmabuf->virt) {
+		rc = -ENOMEM;
+		goto fail_free_dmabuf;
+	}
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		rc = -ENOMEM;
+		goto fail_free_coherent;
+	}
+	vport->port_state = LPFC_FABRIC_CFG_LINK;
+	memcpy(dmabuf->virt, &phba->fc_fabparam, sizeof(vport->fc_sparam));
+	lpfc_reg_vfi(mboxq, vport, dmabuf->phys);
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_reg_vfi;
+	mboxq->vport = vport;
+	mboxq->context1 = dmabuf;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		rc = -ENXIO;
+		goto fail_free_mbox;
+	}
+	return 0;
+
+fail_free_mbox:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+fail_free_coherent:
+	lpfc_mbuf_free(phba, dmabuf->virt, dmabuf->phys);
+fail_free_dmabuf:
+	kfree(dmabuf);
+fail:
+	lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+		"0289 Issue Register VFI failed: Err %d\n", rc);
+	return rc;
+}
+
+/**
  * lpfc_cmpl_els_flogi_fabric - Completion function for flogi to a fabric port
  * @vport: pointer to a host virtual N_Port data structure.
  * @ndlp: pointer to a node-list data structure.
@@ -499,17 +568,24 @@ lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		}
 	}
 
-	lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
-
-	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
-	    vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) {
-		lpfc_register_new_vport(phba, vport, ndlp);
-		return 0;
+	if (phba->sli_rev < LPFC_SLI_REV4) {
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
+		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
+		    vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)
+			lpfc_register_new_vport(phba, vport, ndlp);
+		else
+			lpfc_issue_fabric_reglogin(vport);
+	} else {
+		ndlp->nlp_type |= NLP_FABRIC;
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		if (vport->vfi_state & LPFC_VFI_REGISTERED) {
+			lpfc_start_fdiscs(phba);
+			lpfc_do_scr_ns_plogi(phba, vport);
+		} else
+			lpfc_issue_reg_vfi(vport);
 	}
-	lpfc_issue_fabric_reglogin(vport);
 	return 0;
 }
-
 /**
  * lpfc_cmpl_els_flogi_nport - Completion function for flogi to an N_Port
  * @vport: pointer to a host virtual N_Port data structure.
@@ -817,9 +893,14 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	if (sp->cmn.fcphHigh < FC_PH3)
 		sp->cmn.fcphHigh = FC_PH3;
 
-	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
+	if  (phba->sli_rev == LPFC_SLI_REV4) {
+		elsiocb->iocb.ulpCt_h = ((SLI4_CT_FCFI >> 1) & 1);
+		elsiocb->iocb.ulpCt_l = (SLI4_CT_FCFI & 1);
+		/* FLOGI needs to be 3 for WQE FCFI */
+		/* Set the fcfi to the fcfi we registered with */
+		elsiocb->iocb.ulpContext = phba->fcf.fcfi;
+	} else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
 		sp->cmn.request_multiple_Nport = 1;
-
 		/* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */
 		icmd->ulpCt_h = 1;
 		icmd->ulpCt_l = 0;
@@ -932,6 +1013,8 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
 		if (!ndlp)
 			return 0;
 		lpfc_nlp_init(vport, ndlp, Fabric_DID);
+		/* Set the node type */
+		ndlp->nlp_type |= NLP_FABRIC;
 		/* Put ndlp onto node list */
 		lpfc_enqueue_node(vport, ndlp);
 	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
@@ -1604,7 +1687,8 @@ lpfc_adisc_done(struct lpfc_vport *vport)
 	 * and continue discovery.
 	 */
 	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
-	    !(vport->fc_flag & FC_RSCN_MODE)) {
+	    !(vport->fc_flag & FC_RSCN_MODE) &&
+	    (phba->sli_rev < LPFC_SLI_REV4)) {
 		lpfc_issue_reg_vpi(phba, vport);
 		return;
 	}
@@ -2937,6 +3021,14 @@ lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
 
+	/*
+	 * This routine is used to register and unregister in previous SLI
+	 * modes.
+	 */
+	if ((pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) &&
+	    (phba->sli_rev == LPFC_SLI_REV4))
+		lpfc_sli4_free_rpi(phba, pmb->u.mb.un.varUnregLogin.rpi);
+
 	pmb->context1 = NULL;
 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
 	kfree(mp);
@@ -3816,7 +3908,9 @@ lpfc_rscn_payload_check(struct lpfc_vport *vport, uint32_t did)
 			payload_len -= sizeof(uint32_t);
 			switch (rscn_did.un.b.resv & RSCN_ADDRESS_FORMAT_MASK) {
 			case RSCN_ADDRESS_FORMAT_PORT:
-				if (ns_did.un.word == rscn_did.un.word)
+				if ((ns_did.un.b.domain == rscn_did.un.b.domain)
+				    && (ns_did.un.b.area == rscn_did.un.b.area)
+				    && (ns_did.un.b.id == rscn_did.un.b.id))
 					goto return_did_out;
 				break;
 			case RSCN_ADDRESS_FORMAT_AREA:
@@ -4857,7 +4951,10 @@ lpfc_els_rcv_fan(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 		} else {
 			/* FAN verified - skip FLOGI */
 			vport->fc_myDID = vport->fc_prevDID;
-			lpfc_issue_fabric_reglogin(vport);
+			if (phba->sli_rev < LPFC_SLI_REV4)
+				lpfc_issue_fabric_reglogin(vport);
+			else
+				lpfc_issue_reg_vfi(vport);
 		}
 	}
 	return 0;
@@ -5540,11 +5637,10 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 dropit:
 	if (vport && !(vport->load_flag & FC_UNLOADING))
-		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
-			"(%d):0111 Dropping received ELS cmd "
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+			"0111 Dropping received ELS cmd "
 			"Data: x%x x%x x%x\n",
-			vport->vpi, icmd->ulpStatus,
-			icmd->un.ulpWord[4], icmd->ulpTimeout);
+			icmd->ulpStatus, icmd->un.ulpWord[4], icmd->ulpTimeout);
 	phba->fc_stat.elsRcvDrop++;
 }
 
@@ -5620,10 +5716,9 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	     icmd->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) {
 		if (icmd->unsli3.rcvsli3.vpi == 0xffff)
 			vport = phba->pport;
-		else {
-			uint16_t vpi = icmd->unsli3.rcvsli3.vpi;
-			vport = lpfc_find_vport_by_vpid(phba, vpi);
-		}
+		else
+			vport = lpfc_find_vport_by_vpid(phba,
+				icmd->unsli3.rcvsli3.vpi - phba->vpi_base);
 	}
 	/* If there are no BDEs associated
 	 * with this IOCB, there is nothing to do.
@@ -5792,7 +5887,10 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	} else {
 		if (vport == phba->pport)
-			lpfc_issue_fabric_reglogin(vport);
+			if (phba->sli_rev < LPFC_SLI_REV4)
+				lpfc_issue_fabric_reglogin(vport);
+			else
+				lpfc_issue_reg_vfi(vport);
 		else
 			lpfc_do_scr_ns_plogi(phba, vport);
 	}
@@ -5824,7 +5922,7 @@ lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport,
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (mbox) {
-		lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, mbox);
+		lpfc_reg_vpi(vport, mbox);
 		mbox->vport = vport;
 		mbox->context2 = lpfc_nlp_get(ndlp);
 		mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport;
@@ -6496,3 +6594,38 @@ void lpfc_fabric_abort_hba(struct lpfc_hba *phba)
 	lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
 			      IOERR_SLI_ABORTED);
 }
+
+/**
+ * lpfc_sli4_els_xri_aborted - Slow-path process of els xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the els xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 slow-path
+ * ELS aborted xri.
+ **/
+void
+lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
+			  struct sli4_wcqe_xri_aborted *axri)
+{
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
+	unsigned long iflag = 0;
+
+	spin_lock_irqsave(&phba->sli4_hba.abts_sgl_list_lock, iflag);
+	list_for_each_entry_safe(sglq_entry, sglq_next,
+			&phba->sli4_hba.lpfc_abts_els_sgl_list, list) {
+		if (sglq_entry->sli4_xritag == xri) {
+			list_del(&sglq_entry->list);
+			spin_unlock_irqrestore(
+					&phba->sli4_hba.abts_sgl_list_lock,
+					 iflag);
+			spin_lock_irqsave(&phba->hbalock, iflag);
+
+			list_add_tail(&sglq_entry->list,
+				&phba->sli4_hba.lpfc_sgl_list);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.abts_sgl_list_lock, iflag);
+}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 2270d9a..9bd7a89 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -275,6 +275,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
 	    (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE))
 		lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
+
+	lpfc_unregister_unused_fcf(phba);
 }
 
 /**
@@ -297,10 +299,11 @@ lpfc_alloc_fast_evt(struct lpfc_hba *phba) {
 
 	ret = kzalloc(sizeof(struct lpfc_fast_path_event),
 			GFP_ATOMIC);
-	if (ret)
+	if (ret) {
 		atomic_inc(&phba->fast_event_count);
-	INIT_LIST_HEAD(&ret->work_evt.evt_listp);
-	ret->work_evt.evt = LPFC_EVT_FASTPATH_MGMT_EVT;
+		INIT_LIST_HEAD(&ret->work_evt.evt_listp);
+		ret->work_evt.evt = LPFC_EVT_FASTPATH_MGMT_EVT;
+	}
 	return ret;
 }
 
@@ -741,6 +744,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
 	if (phba->link_state == LPFC_LINK_DOWN)
 		return 0;
 	spin_lock_irq(&phba->hbalock);
+	phba->fcf.fcf_flag &= ~(FCF_AVAILABLE | FCF_DISCOVERED);
 	if (phba->link_state > LPFC_LINK_DOWN) {
 		phba->link_state = LPFC_LINK_DOWN;
 		phba->pport->fc_flag &= ~FC_LBIT;
@@ -748,7 +752,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
 	spin_unlock_irq(&phba->hbalock);
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			/* Issue a LINK DOWN event to all nodes */
 			lpfc_linkdown_port(vports[i]);
 		}
@@ -858,10 +862,11 @@ lpfc_linkup(struct lpfc_hba *phba)
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++)
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++)
 			lpfc_linkup_port(vports[i]);
 	lpfc_destroy_vport_work_array(phba, vports);
-	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
+	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
+	    (phba->sli_rev < LPFC_SLI_REV4))
 		lpfc_issue_clear_la(phba, phba->pport);
 
 	return 0;
@@ -984,9 +989,592 @@ out:
 }
 
 static void
+lpfc_mbx_cmpl_reg_fcfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_vport *vport = mboxq->vport;
+	unsigned long flags;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
+			 "2017 REG_FCFI mbxStatus error x%x "
+			 "HBA state x%x\n",
+			 mboxq->u.mb.mbxStatus, vport->port_state);
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return;
+	}
+
+	/* Start FCoE discovery by sending a FLOGI. */
+	phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi, &mboxq->u.mqe.un.reg_fcfi);
+	/* Set the FCFI registered flag */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	phba->fcf.fcf_flag |= FCF_REGISTERED;
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (vport->port_state != LPFC_FLOGI) {
+		spin_lock_irqsave(&phba->hbalock, flags);
+		phba->fcf.fcf_flag |= (FCF_DISCOVERED | FCF_IN_USE);
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_initial_flogi(vport);
+	}
+
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return;
+}
+
+/**
+ * lpfc_fab_name_match - Check if the fcf fabric name match.
+ * @fab_name: pointer to fabric name.
+ * @new_fcf_record: pointer to fcf record.
+ *
+ * This routine compare the fcf record's fabric name with provided
+ * fabric name. If the fabric name are identical this function
+ * returns 1 else return 0.
+ **/
+static uint32_t
+lpfc_fab_name_match(uint8_t *fab_name, struct fcf_record *new_fcf_record)
+{
+	if ((fab_name[0] ==
+		bf_get(lpfc_fcf_record_fab_name_0, new_fcf_record)) &&
+	    (fab_name[1] ==
+		bf_get(lpfc_fcf_record_fab_name_1, new_fcf_record)) &&
+	    (fab_name[2] ==
+		bf_get(lpfc_fcf_record_fab_name_2, new_fcf_record)) &&
+	    (fab_name[3] ==
+		bf_get(lpfc_fcf_record_fab_name_3, new_fcf_record)) &&
+	    (fab_name[4] ==
+		bf_get(lpfc_fcf_record_fab_name_4, new_fcf_record)) &&
+	    (fab_name[5] ==
+		bf_get(lpfc_fcf_record_fab_name_5, new_fcf_record)) &&
+	    (fab_name[6] ==
+		bf_get(lpfc_fcf_record_fab_name_6, new_fcf_record)) &&
+	    (fab_name[7] ==
+		bf_get(lpfc_fcf_record_fab_name_7, new_fcf_record)))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * lpfc_mac_addr_match - Check if the fcf mac address match.
+ * @phba: pointer to lpfc hba data structure.
+ * @new_fcf_record: pointer to fcf record.
+ *
+ * This routine compare the fcf record's mac address with HBA's
+ * FCF mac address. If the mac addresses are identical this function
+ * returns 1 else return 0.
+ **/
+static uint32_t
+lpfc_mac_addr_match(struct lpfc_hba *phba, struct fcf_record *new_fcf_record)
+{
+	if ((phba->fcf.mac_addr[0] ==
+		bf_get(lpfc_fcf_record_mac_0, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[1] ==
+		bf_get(lpfc_fcf_record_mac_1, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[2] ==
+		bf_get(lpfc_fcf_record_mac_2, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[3] ==
+		bf_get(lpfc_fcf_record_mac_3, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[4] ==
+		bf_get(lpfc_fcf_record_mac_4, new_fcf_record)) &&
+	    (phba->fcf.mac_addr[5] ==
+		bf_get(lpfc_fcf_record_mac_5, new_fcf_record)))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * lpfc_copy_fcf_record - Copy fcf information to lpfc_hba.
+ * @phba: pointer to lpfc hba data structure.
+ * @new_fcf_record: pointer to fcf record.
+ *
+ * This routine copies the FCF information from the FCF
+ * record to lpfc_hba data structure.
+ **/
+static void
+lpfc_copy_fcf_record(struct lpfc_hba *phba, struct fcf_record *new_fcf_record)
+{
+	phba->fcf.fabric_name[0] =
+		bf_get(lpfc_fcf_record_fab_name_0, new_fcf_record);
+	phba->fcf.fabric_name[1] =
+		bf_get(lpfc_fcf_record_fab_name_1, new_fcf_record);
+	phba->fcf.fabric_name[2] =
+		bf_get(lpfc_fcf_record_fab_name_2, new_fcf_record);
+	phba->fcf.fabric_name[3] =
+		bf_get(lpfc_fcf_record_fab_name_3, new_fcf_record);
+	phba->fcf.fabric_name[4] =
+		bf_get(lpfc_fcf_record_fab_name_4, new_fcf_record);
+	phba->fcf.fabric_name[5] =
+		bf_get(lpfc_fcf_record_fab_name_5, new_fcf_record);
+	phba->fcf.fabric_name[6] =
+		bf_get(lpfc_fcf_record_fab_name_6, new_fcf_record);
+	phba->fcf.fabric_name[7] =
+		bf_get(lpfc_fcf_record_fab_name_7, new_fcf_record);
+	phba->fcf.mac_addr[0] =
+		bf_get(lpfc_fcf_record_mac_0, new_fcf_record);
+	phba->fcf.mac_addr[1] =
+		bf_get(lpfc_fcf_record_mac_1, new_fcf_record);
+	phba->fcf.mac_addr[2] =
+		bf_get(lpfc_fcf_record_mac_2, new_fcf_record);
+	phba->fcf.mac_addr[3] =
+		bf_get(lpfc_fcf_record_mac_3, new_fcf_record);
+	phba->fcf.mac_addr[4] =
+		bf_get(lpfc_fcf_record_mac_4, new_fcf_record);
+	phba->fcf.mac_addr[5] =
+		bf_get(lpfc_fcf_record_mac_5, new_fcf_record);
+	phba->fcf.fcf_indx = bf_get(lpfc_fcf_record_fcf_index, new_fcf_record);
+	phba->fcf.priority = new_fcf_record->fip_priority;
+}
+
+/**
+ * lpfc_register_fcf - Register the FCF with hba.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine issues a register fcfi mailbox command to register
+ * the fcf with HBA.
+ **/
+static void
+lpfc_register_fcf(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *fcf_mbxq;
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+
+	/* If the FCF is not availabe do nothing. */
+	if (!(phba->fcf.fcf_flag & FCF_AVAILABLE)) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		return;
+	}
+
+	/* The FCF is already registered, start discovery */
+	if (phba->fcf.fcf_flag & FCF_REGISTERED) {
+		phba->fcf.fcf_flag |= (FCF_DISCOVERED | FCF_IN_USE);
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		if (phba->pport->port_state != LPFC_FLOGI)
+			lpfc_initial_flogi(phba->pport);
+		return;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+
+	fcf_mbxq = mempool_alloc(phba->mbox_mem_pool,
+		GFP_KERNEL);
+	if (!fcf_mbxq)
+		return;
+
+	lpfc_reg_fcfi(phba, fcf_mbxq);
+	fcf_mbxq->vport = phba->pport;
+	fcf_mbxq->mbox_cmpl = lpfc_mbx_cmpl_reg_fcfi;
+	rc = lpfc_sli_issue_mbox(phba, fcf_mbxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED)
+		mempool_free(fcf_mbxq, phba->mbox_mem_pool);
+
+	return;
+}
+
+/**
+ * lpfc_match_fcf_conn_list - Check if the FCF record can be used for discovery.
+ * @phba: pointer to lpfc hba data structure.
+ * @new_fcf_record: pointer to fcf record.
+ * @boot_flag: Indicates if this record used by boot bios.
+ * @addr_mode: The address mode to be used by this FCF
+ *
+ * This routine compare the fcf record with connect list obtained from the
+ * config region to decide if this FCF can be used for SAN discovery. It returns
+ * 1 if this record can be used for SAN discovery else return zero. If this FCF
+ * record can be used for SAN discovery, the boot_flag will indicate if this FCF
+ * is used by boot bios and addr_mode will indicate the addressing mode to be
+ * used for this FCF when the function returns.
+ * If the FCF record need to be used with a particular vlan id, the vlan is
+ * set in the vlan_id on return of the function. If not VLAN tagging need to
+ * be used with the FCF vlan_id will be set to 0xFFFF;
+ **/
+static int
+lpfc_match_fcf_conn_list(struct lpfc_hba *phba,
+			struct fcf_record *new_fcf_record,
+			uint32_t *boot_flag, uint32_t *addr_mode,
+			uint16_t *vlan_id)
+{
+	struct lpfc_fcf_conn_entry *conn_entry;
+
+	if (!phba->cfg_enable_fip) {
+		*boot_flag = 0;
+		*addr_mode = bf_get(lpfc_fcf_record_mac_addr_prov,
+				new_fcf_record);
+		if (phba->valid_vlan)
+			*vlan_id = phba->vlan_id;
+		else
+			*vlan_id = 0xFFFF;
+		return 1;
+	}
+
+	/*
+	 * If there are no FCF connection table entry, driver connect to all
+	 * FCFs.
+	 */
+	if (list_empty(&phba->fcf_conn_rec_list)) {
+		*boot_flag = 0;
+		*addr_mode = bf_get(lpfc_fcf_record_mac_addr_prov,
+			new_fcf_record);
+		*vlan_id = 0xFFFF;
+		return 1;
+	}
+
+	list_for_each_entry(conn_entry, &phba->fcf_conn_rec_list, list) {
+		if (!(conn_entry->conn_rec.flags & FCFCNCT_VALID))
+			continue;
+
+		if ((conn_entry->conn_rec.flags & FCFCNCT_FBNM_VALID) &&
+			!lpfc_fab_name_match(conn_entry->conn_rec.fabric_name,
+				new_fcf_record))
+			continue;
+
+		if (conn_entry->conn_rec.flags & FCFCNCT_VLAN_VALID) {
+			/*
+			 * If the vlan bit map does not have the bit set for the
+			 * vlan id to be used, then it is not a match.
+			 */
+			if (!(new_fcf_record->vlan_bitmap
+				[conn_entry->conn_rec.vlan_tag / 8] &
+				(1 << (conn_entry->conn_rec.vlan_tag % 8))))
+				continue;
+		}
+
+		/*
+		 * Check if the connection record specifies a required
+		 * addressing mode.
+		 */
+		if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			!(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED)) {
+
+			/*
+			 * If SPMA required but FCF not support this continue.
+			 */
+			if ((conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+				!(bf_get(lpfc_fcf_record_mac_addr_prov,
+					new_fcf_record) & LPFC_FCF_SPMA))
+				continue;
+
+			/*
+			 * If FPMA required but FCF not support this continue.
+			 */
+			if (!(conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+				!(bf_get(lpfc_fcf_record_mac_addr_prov,
+				new_fcf_record) & LPFC_FCF_FPMA))
+				continue;
+		}
+
+		/*
+		 * This fcf record matches filtering criteria.
+		 */
+		if (conn_entry->conn_rec.flags & FCFCNCT_BOOT)
+			*boot_flag = 1;
+		else
+			*boot_flag = 0;
+
+		*addr_mode = bf_get(lpfc_fcf_record_mac_addr_prov,
+				new_fcf_record);
+		/*
+		 * If the user specified a required address mode, assign that
+		 * address mode
+		 */
+		if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			(!(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED)))
+			*addr_mode = (conn_entry->conn_rec.flags &
+				FCFCNCT_AM_SPMA) ?
+				LPFC_FCF_SPMA : LPFC_FCF_FPMA;
+		/*
+		 * If the user specified a prefered address mode, use the
+		 * addr mode only if FCF support the addr_mode.
+		 */
+		else if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED) &&
+			(conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+			(*addr_mode & LPFC_FCF_SPMA))
+				*addr_mode = LPFC_FCF_SPMA;
+		else if ((conn_entry->conn_rec.flags & FCFCNCT_AM_VALID) &&
+			(conn_entry->conn_rec.flags & FCFCNCT_AM_PREFERRED) &&
+			!(conn_entry->conn_rec.flags & FCFCNCT_AM_SPMA) &&
+			(*addr_mode & LPFC_FCF_FPMA))
+				*addr_mode = LPFC_FCF_FPMA;
+		/*
+		 * If user did not specify any addressing mode, use FPMA if
+		 * possible else use SPMA.
+		 */
+		else if (*addr_mode & LPFC_FCF_FPMA)
+			*addr_mode = LPFC_FCF_FPMA;
+
+		if (conn_entry->conn_rec.flags & FCFCNCT_VLAN_VALID)
+			*vlan_id = conn_entry->conn_rec.vlan_tag;
+		else
+			*vlan_id = 0xFFFF;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * lpfc_mbx_cmpl_read_fcf_record - Completion handler for read_fcf mbox.
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: pointer to mailbox object.
+ *
+ * This function iterate through all the fcf records available in
+ * HBA and choose the optimal FCF record for discovery. After finding
+ * the FCF for discovery it register the FCF record and kick start
+ * discovery.
+ * If FCF_IN_USE flag is set in currently used FCF, the routine try to
+ * use a FCF record which match fabric name and mac address of the
+ * currently used FCF record.
+ * If the driver support only one FCF, it will try to use the FCF record
+ * used by BOOT_BIOS.
+ */
+void
+lpfc_mbx_cmpl_read_fcf_record(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	uint8_t *bytep;
+	struct lpfc_mbx_sge sge;
+	struct lpfc_mbx_read_fcf_tbl *read_fcf;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	struct fcf_record *new_fcf_record;
+	int rc;
+	uint32_t boot_flag, addr_mode;
+	uint32_t next_fcf_index;
+	unsigned long flags;
+	uint16_t vlan_id;
+
+	/* Get the first SGE entry from the non-embedded DMA memory. This
+	 * routine only uses a single SGE.
+	 */
+	lpfc_sli4_mbx_sge_get(mboxq, 0, &sge);
+	phys_addr = getPaddr(sge.pa_hi, sge.pa_lo);
+	if (unlikely(!mboxq->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2524 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		goto out;
+	}
+	virt_addr = mboxq->sge_array->addr[0];
+
+	shdr = (union lpfc_sli4_cfg_shdr *)virt_addr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
+				 &shdr->response);
+	/*
+	 * The FCF Record was read and there is no reason for the driver
+	 * to maintain the FCF record data or memory. Instead, just need
+	 * to book keeping the FCFIs can be used.
+	 */
+	if (shdr_status || shdr_add_status) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2521 READ_FCF_RECORD mailbox failed "
+				"with status x%x add_status x%x, mbx\n",
+				shdr_status, shdr_add_status);
+		goto out;
+	}
+	/* Interpreting the returned information of FCF records */
+	read_fcf = (struct lpfc_mbx_read_fcf_tbl *)virt_addr;
+	lpfc_sli_pcimem_bcopy(read_fcf, read_fcf,
+			      sizeof(struct lpfc_mbx_read_fcf_tbl));
+	next_fcf_index = bf_get(lpfc_mbx_read_fcf_tbl_nxt_vindx, read_fcf);
+
+	new_fcf_record = (struct fcf_record *)(virt_addr +
+			  sizeof(struct lpfc_mbx_read_fcf_tbl));
+	lpfc_sli_pcimem_bcopy(new_fcf_record, new_fcf_record,
+			      sizeof(struct fcf_record));
+	bytep = virt_addr + sizeof(union lpfc_sli4_cfg_shdr);
+
+	rc = lpfc_match_fcf_conn_list(phba, new_fcf_record,
+				      &boot_flag, &addr_mode,
+					&vlan_id);
+	/*
+	 * If the fcf record does not match with connect list entries
+	 * read the next entry.
+	 */
+	if (!rc)
+		goto read_next_fcf;
+	/*
+	 * If this is not the first FCF discovery of the HBA, use last
+	 * FCF record for the discovery.
+	 */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	if (phba->fcf.fcf_flag & FCF_IN_USE) {
+		if (lpfc_fab_name_match(phba->fcf.fabric_name,
+			new_fcf_record) &&
+		    lpfc_mac_addr_match(phba, new_fcf_record)) {
+			phba->fcf.fcf_flag |= FCF_AVAILABLE;
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto out;
+		}
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		goto read_next_fcf;
+	}
+	if (phba->fcf.fcf_flag & FCF_AVAILABLE) {
+		/*
+		 * If the current FCF record does not have boot flag
+		 * set and new fcf record has boot flag set, use the
+		 * new fcf record.
+		 */
+		if (boot_flag && !(phba->fcf.fcf_flag & FCF_BOOT_ENABLE)) {
+			/* Use this FCF record */
+			lpfc_copy_fcf_record(phba, new_fcf_record);
+			phba->fcf.addr_mode = addr_mode;
+			phba->fcf.fcf_flag |= FCF_BOOT_ENABLE;
+			if (vlan_id != 0xFFFF) {
+				phba->fcf.fcf_flag |= FCF_VALID_VLAN;
+				phba->fcf.vlan_id = vlan_id;
+			}
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto read_next_fcf;
+		}
+		/*
+		 * If the current FCF record has boot flag set and the
+		 * new FCF record does not have boot flag, read the next
+		 * FCF record.
+		 */
+		if (!boot_flag && (phba->fcf.fcf_flag & FCF_BOOT_ENABLE)) {
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto read_next_fcf;
+		}
+		/*
+		 * If there is a record with lower priority value for
+		 * the current FCF, use that record.
+		 */
+		if (lpfc_fab_name_match(phba->fcf.fabric_name, new_fcf_record)
+			&& (new_fcf_record->fip_priority <
+				phba->fcf.priority)) {
+			/* Use this FCF record */
+			lpfc_copy_fcf_record(phba, new_fcf_record);
+			phba->fcf.addr_mode = addr_mode;
+			if (vlan_id != 0xFFFF) {
+				phba->fcf.fcf_flag |= FCF_VALID_VLAN;
+				phba->fcf.vlan_id = vlan_id;
+			}
+			spin_unlock_irqrestore(&phba->hbalock, flags);
+			goto read_next_fcf;
+		}
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		goto read_next_fcf;
+	}
+	/*
+	 * This is the first available FCF record, use this
+	 * record.
+	 */
+	lpfc_copy_fcf_record(phba, new_fcf_record);
+	phba->fcf.addr_mode = addr_mode;
+	if (boot_flag)
+		phba->fcf.fcf_flag |= FCF_BOOT_ENABLE;
+	phba->fcf.fcf_flag |= FCF_AVAILABLE;
+	if (vlan_id != 0xFFFF) {
+		phba->fcf.fcf_flag |= FCF_VALID_VLAN;
+		phba->fcf.vlan_id = vlan_id;
+	}
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	goto read_next_fcf;
+
+read_next_fcf:
+	lpfc_sli4_mbox_cmd_free(phba, mboxq);
+	if (next_fcf_index == LPFC_FCOE_FCF_NEXT_NONE || next_fcf_index == 0)
+		lpfc_register_fcf(phba);
+	else
+		lpfc_sli4_read_fcf_record(phba, next_fcf_index);
+	return;
+
+out:
+	lpfc_sli4_mbox_cmd_free(phba, mboxq);
+	lpfc_register_fcf(phba);
+
+	return;
+}
+
+/**
+ * lpfc_start_fdiscs - send fdiscs for each vports on this port.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This function loops through the list of vports on the @phba and issues an
+ * FDISC if possible.
+ */
+void
+lpfc_start_fdiscs(struct lpfc_hba *phba)
+{
+	struct lpfc_vport **vports;
+	int i;
+
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports != NULL) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+			if (vports[i]->port_type == LPFC_PHYSICAL_PORT)
+				continue;
+			/* There are no vpi for this vport */
+			if (vports[i]->vpi > phba->max_vpi) {
+				lpfc_vport_set_state(vports[i],
+						     FC_VPORT_FAILED);
+				continue;
+			}
+			if (phba->fc_topology == TOPOLOGY_LOOP) {
+				lpfc_vport_set_state(vports[i],
+						     FC_VPORT_LINKDOWN);
+				continue;
+			}
+			if (phba->link_flag & LS_NPIV_FAB_SUPPORTED)
+				lpfc_initial_fdisc(vports[i]);
+			else {
+				lpfc_vport_set_state(vports[i],
+						     FC_VPORT_NO_FABRIC_SUPP);
+				lpfc_printf_vlog(vports[i], KERN_ERR,
+						 LOG_ELS,
+						 "0259 No NPIV "
+						 "Fabric support\n");
+			}
+		}
+	}
+	lpfc_destroy_vport_work_array(phba, vports);
+}
+
+void
+lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_dmabuf *dmabuf = mboxq->context1;
+	struct lpfc_vport *vport = mboxq->vport;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
+			 "2018 REG_VFI mbxStatus error x%x "
+			 "HBA state x%x\n",
+			 mboxq->u.mb.mbxStatus, vport->port_state);
+		if (phba->fc_topology == TOPOLOGY_LOOP) {
+			/* FLOGI failed, use loop map to make discovery list */
+			lpfc_disc_list_loopmap(vport);
+			/* Start discovery */
+			lpfc_disc_start(vport);
+			goto fail_free_mem;
+		}
+		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+		goto fail_free_mem;
+	}
+	/* Mark the vport has registered with its VFI */
+	vport->vfi_state |= LPFC_VFI_REGISTERED;
+
+	if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
+		lpfc_start_fdiscs(phba);
+		lpfc_do_scr_ns_plogi(phba, vport);
+	}
+
+fail_free_mem:
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	lpfc_mbuf_free(phba, dmabuf->virt, dmabuf->phys);
+	kfree(dmabuf);
+	return;
+}
+
+static void
 lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
 	struct lpfc_vport  *vport = pmb->vport;
 
@@ -1037,13 +1625,13 @@ static void
 lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la)
 {
 	struct lpfc_vport *vport = phba->pport;
-	LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox;
+	LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox = NULL;
 	int i;
 	struct lpfc_dmabuf *mp;
 	int rc;
+	struct fcf_record *fcf_record;
 
 	sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-	cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 
 	spin_lock_irq(&phba->hbalock);
 	switch (la->UlnkSpeed) {
@@ -1140,22 +1728,66 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la)
 			lpfc_mbuf_free(phba, mp->virt, mp->phys);
 			kfree(mp);
 			mempool_free(sparam_mbox, phba->mbox_mem_pool);
-			if (cfglink_mbox)
-				mempool_free(cfglink_mbox, phba->mbox_mem_pool);
 			goto out;
 		}
 	}
 
-	if (cfglink_mbox) {
+	if (!(phba->hba_flag & HBA_FCOE_SUPPORT)) {
+		cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+		if (!cfglink_mbox)
+			goto out;
 		vport->port_state = LPFC_LOCAL_CFG_LINK;
 		lpfc_config_link(phba, cfglink_mbox);
 		cfglink_mbox->vport = vport;
 		cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
 		rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT);
-		if (rc != MBX_NOT_FINISHED)
-			return;
-		mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+		if (rc == MBX_NOT_FINISHED) {
+			mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+			goto out;
+		}
+	} else {
+		/*
+		 * Add the driver's default FCF record at FCF index 0 now. This
+		 * is phase 1 implementation that support FCF index 0 and driver
+		 * defaults.
+		 */
+		if (phba->cfg_enable_fip == 0) {
+			fcf_record = kzalloc(sizeof(struct fcf_record),
+					GFP_KERNEL);
+			if (unlikely(!fcf_record)) {
+				lpfc_printf_log(phba, KERN_ERR,
+					LOG_MBOX | LOG_SLI,
+					"2554 Could not allocate memmory for "
+					"fcf record\n");
+				rc = -ENODEV;
+				goto out;
+			}
+
+			lpfc_sli4_build_dflt_fcf_record(phba, fcf_record,
+						LPFC_FCOE_FCF_DEF_INDEX);
+			rc = lpfc_sli4_add_fcf_record(phba, fcf_record);
+			if (unlikely(rc)) {
+				lpfc_printf_log(phba, KERN_ERR,
+					LOG_MBOX | LOG_SLI,
+					"2013 Could not manually add FCF "
+					"record 0, status %d\n", rc);
+				rc = -ENODEV;
+				kfree(fcf_record);
+				goto out;
+			}
+			kfree(fcf_record);
+		}
+		/*
+		 * The driver is expected to do FIP/FCF. Call the port
+		 * and get the FCF Table.
+		 */
+		rc = lpfc_sli4_read_fcf_record(phba,
+					LPFC_FCOE_FCF_GET_FIRST);
+		if (rc)
+			goto out;
 	}
+
+	return;
 out:
 	lpfc_vport_set_state(vport, FC_VPORT_FAILED);
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
@@ -1186,6 +1818,7 @@ lpfc_mbx_issue_link_down(struct lpfc_hba *phba)
 {
 	lpfc_linkdown(phba);
 	lpfc_enable_la(phba);
+	lpfc_unregister_unused_fcf(phba);
 	/* turn on Link Attention interrupts - no CLEAR_LA needed */
 }
 
@@ -3330,3 +3963,395 @@ lpfc_nlp_not_used(struct lpfc_nodelist *ndlp)
 			return 1;
 	return 0;
 }
+
+/**
+ * lpfc_fcf_inuse - Check if FCF can be unregistered.
+ * @phba: Pointer to hba context object.
+ *
+ * This function iterate through all FC nodes associated
+ * will all vports to check if there is any node with
+ * fc_rports associated with it. If there is an fc_rport
+ * associated with the node, then the node is either in
+ * discovered state or its devloss_timer is pending.
+ */
+static int
+lpfc_fcf_inuse(struct lpfc_hba *phba)
+{
+	struct lpfc_vport **vports;
+	int i, ret = 0;
+	struct lpfc_nodelist *ndlp;
+	struct Scsi_Host  *shost;
+
+	vports = lpfc_create_vport_work_array(phba);
+
+	for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+		shost = lpfc_shost_from_vport(vports[i]);
+		spin_lock_irq(shost->host_lock);
+		list_for_each_entry(ndlp, &vports[i]->fc_nodes, nlp_listp) {
+			if (NLP_CHK_NODE_ACT(ndlp) && ndlp->rport &&
+			  (ndlp->rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
+				ret = 1;
+				spin_unlock_irq(shost->host_lock);
+				goto out;
+			}
+		}
+		spin_unlock_irq(shost->host_lock);
+	}
+out:
+	lpfc_destroy_vport_work_array(phba, vports);
+	return ret;
+}
+
+/**
+ * lpfc_unregister_vfi_cmpl - Completion handler for unreg vfi.
+ * @phba: Pointer to hba context object.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * This function frees memory associated with the mailbox command.
+ */
+static void
+lpfc_unregister_vfi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_vport *vport = mboxq->vport;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2555 UNREG_VFI mbxStatus error x%x "
+			"HBA state x%x\n",
+			mboxq->u.mb.mbxStatus, vport->port_state);
+	}
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return;
+}
+
+/**
+ * lpfc_unregister_fcfi_cmpl - Completion handler for unreg fcfi.
+ * @phba: Pointer to hba context object.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * This function frees memory associated with the mailbox command.
+ */
+static void
+lpfc_unregister_fcfi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	struct lpfc_vport *vport = mboxq->vport;
+
+	if (mboxq->u.mb.mbxStatus) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2550 UNREG_FCFI mbxStatus error x%x "
+			"HBA state x%x\n",
+			mboxq->u.mb.mbxStatus, vport->port_state);
+	}
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	return;
+}
+
+/**
+ * lpfc_unregister_unused_fcf - Unregister FCF if all devices are disconnected.
+ * @phba: Pointer to hba context object.
+ *
+ * This function check if there are any connected remote port for the FCF and
+ * if all the devices are disconnected, this function unregister FCFI.
+ * This function also tries to use another FCF for discovery.
+ */
+void
+lpfc_unregister_unused_fcf(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *mbox;
+	int rc;
+	struct lpfc_vport **vports;
+	int i;
+
+	spin_lock_irq(&phba->hbalock);
+	/*
+	 * If HBA is not running in FIP mode or
+	 * If HBA does not support FCoE or
+	 * If FCF is not registered.
+	 * do nothing.
+	 */
+	if (!(phba->hba_flag & HBA_FCOE_SUPPORT) ||
+		!(phba->fcf.fcf_flag & FCF_REGISTERED) ||
+		(phba->cfg_enable_fip == 0)) {
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+	spin_unlock_irq(&phba->hbalock);
+
+	if (lpfc_fcf_inuse(phba))
+		return;
+
+
+	/* Unregister VPIs */
+	vports = lpfc_create_vport_work_array(phba);
+	if (vports &&
+		(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED))
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
+			lpfc_mbx_unreg_vpi(vports[i]);
+			vports[i]->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+			vports[i]->vfi_state &= ~LPFC_VFI_REGISTERED;
+		}
+	lpfc_destroy_vport_work_array(phba, vports);
+
+	/* Unregister VFI */
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2556 UNREG_VFI mbox allocation failed"
+			"HBA state x%x\n",
+			phba->pport->port_state);
+		return;
+	}
+
+	lpfc_unreg_vfi(mbox, phba->pport->vfi);
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_unregister_vfi_cmpl;
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2557 UNREG_VFI issue mbox failed rc x%x "
+			"HBA state x%x\n",
+			rc, phba->pport->port_state);
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+
+	/* Unregister FCF */
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2551 UNREG_FCFI mbox allocation failed"
+			"HBA state x%x\n",
+			phba->pport->port_state);
+		return;
+	}
+
+	lpfc_unreg_fcfi(mbox, phba->fcf.fcfi);
+	mbox->vport = phba->pport;
+	mbox->mbox_cmpl = lpfc_unregister_fcfi_cmpl;
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
+
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2552 UNREG_FCFI issue mbox failed rc x%x "
+			"HBA state x%x\n",
+			rc, phba->pport->port_state);
+		mempool_free(mbox, phba->mbox_mem_pool);
+		return;
+	}
+
+	spin_lock_irq(&phba->hbalock);
+	phba->fcf.fcf_flag &= ~(FCF_AVAILABLE | FCF_REGISTERED |
+		FCF_DISCOVERED | FCF_BOOT_ENABLE | FCF_IN_USE |
+		FCF_VALID_VLAN);
+	spin_unlock_irq(&phba->hbalock);
+
+	/*
+	 * If driver is not unloading, check if there is any other
+	 * FCF record that can be used for discovery.
+	 */
+	if ((phba->pport->load_flag & FC_UNLOADING) ||
+		(phba->link_state < LPFC_LINK_UP))
+		return;
+
+	rc = lpfc_sli4_read_fcf_record(phba, LPFC_FCOE_FCF_GET_FIRST);
+
+	if (rc)
+		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY|LOG_MBOX,
+			"2553 lpfc_unregister_unused_fcf failed to read FCF"
+			" record HBA state x%x\n",
+			phba->pport->port_state);
+}
+
+/**
+ * lpfc_read_fcf_conn_tbl - Create driver FCF connection table.
+ * @phba: Pointer to hba context object.
+ * @buff: Buffer containing the FCF connection table as in the config
+ *         region.
+ * This function create driver data structure for the FCF connection
+ * record table read from config region 23.
+ */
+static void
+lpfc_read_fcf_conn_tbl(struct lpfc_hba *phba,
+	uint8_t *buff)
+{
+	struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
+	struct lpfc_fcf_conn_hdr *conn_hdr;
+	struct lpfc_fcf_conn_rec *conn_rec;
+	uint32_t record_count;
+	int i;
+
+	/* Free the current connect table */
+	list_for_each_entry_safe(conn_entry, next_conn_entry,
+		&phba->fcf_conn_rec_list, list)
+		kfree(conn_entry);
+
+	conn_hdr = (struct lpfc_fcf_conn_hdr *) buff;
+	record_count = conn_hdr->length * sizeof(uint32_t)/
+		sizeof(struct lpfc_fcf_conn_rec);
+
+	conn_rec = (struct lpfc_fcf_conn_rec *)
+		(buff + sizeof(struct lpfc_fcf_conn_hdr));
+
+	for (i = 0; i < record_count; i++) {
+		if (!(conn_rec[i].flags & FCFCNCT_VALID))
+			continue;
+		conn_entry = kzalloc(sizeof(struct lpfc_fcf_conn_entry),
+			GFP_KERNEL);
+		if (!conn_entry) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2566 Failed to allocate connection"
+				" table entry\n");
+			return;
+		}
+
+		memcpy(&conn_entry->conn_rec, &conn_rec[i],
+			sizeof(struct lpfc_fcf_conn_rec));
+		conn_entry->conn_rec.vlan_tag =
+			le16_to_cpu(conn_entry->conn_rec.vlan_tag) & 0xFFF;
+		conn_entry->conn_rec.flags =
+			le16_to_cpu(conn_entry->conn_rec.flags);
+		list_add_tail(&conn_entry->list,
+			&phba->fcf_conn_rec_list);
+	}
+}
+
+/**
+ * lpfc_read_fcoe_param - Read FCoe parameters from conf region..
+ * @phba: Pointer to hba context object.
+ * @buff: Buffer containing the FCoE parameter data structure.
+ *
+ *  This function update driver data structure with config
+ *  parameters read from config region 23.
+ */
+static void
+lpfc_read_fcoe_param(struct lpfc_hba *phba,
+			uint8_t *buff)
+{
+	struct lpfc_fip_param_hdr *fcoe_param_hdr;
+	struct lpfc_fcoe_params *fcoe_param;
+
+	fcoe_param_hdr = (struct lpfc_fip_param_hdr *)
+		buff;
+	fcoe_param = (struct lpfc_fcoe_params *)
+		buff + sizeof(struct lpfc_fip_param_hdr);
+
+	if ((fcoe_param_hdr->parm_version != FIPP_VERSION) ||
+		(fcoe_param_hdr->length != FCOE_PARAM_LENGTH))
+		return;
+
+	if (bf_get(lpfc_fip_param_hdr_fipp_mode, fcoe_param_hdr) ==
+			FIPP_MODE_ON)
+		phba->cfg_enable_fip = 1;
+
+	if (bf_get(lpfc_fip_param_hdr_fipp_mode, fcoe_param_hdr) ==
+		FIPP_MODE_OFF)
+		phba->cfg_enable_fip = 0;
+
+	if (fcoe_param_hdr->parm_flags & FIPP_VLAN_VALID) {
+		phba->valid_vlan = 1;
+		phba->vlan_id = le16_to_cpu(fcoe_param->vlan_tag) &
+			0xFFF;
+	}
+
+	phba->fc_map[0] = fcoe_param->fc_map[0];
+	phba->fc_map[1] = fcoe_param->fc_map[1];
+	phba->fc_map[2] = fcoe_param->fc_map[2];
+	return;
+}
+
+/**
+ * lpfc_get_rec_conf23 - Get a record type in config region data.
+ * @buff: Buffer containing config region 23 data.
+ * @size: Size of the data buffer.
+ * @rec_type: Record type to be searched.
+ *
+ * This function searches config region data to find the begining
+ * of the record specified by record_type. If record found, this
+ * function return pointer to the record else return NULL.
+ */
+static uint8_t *
+lpfc_get_rec_conf23(uint8_t *buff, uint32_t size, uint8_t rec_type)
+{
+	uint32_t offset = 0, rec_length;
+
+	if ((buff[0] == LPFC_REGION23_LAST_REC) ||
+		(size < sizeof(uint32_t)))
+		return NULL;
+
+	rec_length = buff[offset + 1];
+
+	/*
+	 * One TLV record has one word header and number of data words
+	 * specified in the rec_length field of the record header.
+	 */
+	while ((offset + rec_length * sizeof(uint32_t) + sizeof(uint32_t))
+		<= size) {
+		if (buff[offset] == rec_type)
+			return &buff[offset];
+
+		if (buff[offset] == LPFC_REGION23_LAST_REC)
+			return NULL;
+
+		offset += rec_length * sizeof(uint32_t) + sizeof(uint32_t);
+		rec_length = buff[offset + 1];
+	}
+	return NULL;
+}
+
+/**
+ * lpfc_parse_fcoe_conf - Parse FCoE config data read from config region 23.
+ * @phba: Pointer to lpfc_hba data structure.
+ * @buff: Buffer containing config region 23 data.
+ * @size: Size of the data buffer.
+ *
+ * This fuction parse the FCoE config parameters in config region 23 and
+ * populate driver data structure with the parameters.
+ */
+void
+lpfc_parse_fcoe_conf(struct lpfc_hba *phba,
+		uint8_t *buff,
+		uint32_t size)
+{
+	uint32_t offset = 0, rec_length;
+	uint8_t *rec_ptr;
+
+	/*
+	 * If data size is less than 2 words signature and version cannot be
+	 * verified.
+	 */
+	if (size < 2*sizeof(uint32_t))
+		return;
+
+	/* Check the region signature first */
+	if (memcmp(buff, LPFC_REGION23_SIGNATURE, 4)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2567 Config region 23 has bad signature\n");
+		return;
+	}
+
+	offset += 4;
+
+	/* Check the data structure version */
+	if (buff[offset] != LPFC_REGION23_VERSION) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2568 Config region 23 has bad version\n");
+		return;
+	}
+	offset += 4;
+
+	rec_length = buff[offset + 1];
+
+	/* Read FCoE param record */
+	rec_ptr = lpfc_get_rec_conf23(&buff[offset],
+			size - offset, FCOE_PARAM_TYPE);
+	if (rec_ptr)
+		lpfc_read_fcoe_param(phba, rec_ptr);
+
+	/* Read FCF connection table */
+	rec_ptr = lpfc_get_rec_conf23(&buff[offset],
+		size - offset, FCOE_CONN_TBL_TYPE);
+	if (rec_ptr)
+		lpfc_read_fcf_conn_tbl(phba, rec_ptr);
+
+}
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 6efe459..2c7eba6 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -363,7 +363,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	if (!mbox)
 		goto out;
 
-	rc = lpfc_reg_login(phba, vport->vpi, icmd->un.rcvels.remoteID,
+	rc = lpfc_reg_rpi(phba, vport->vpi, icmd->un.rcvels.remoteID,
 			    (uint8_t *) sp, mbox, 0);
 	if (rc) {
 		mempool_free(mbox, phba->mbox_mem_pool);
@@ -497,11 +497,19 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL);
 	else
 		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
+	if ((ndlp->nlp_type & NLP_FABRIC) &&
+		vport->port_type == LPFC_NPIV_PORT) {
+		lpfc_linkdown_port(vport);
+		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+		spin_lock_irq(shost->host_lock);
+		ndlp->nlp_flag |= NLP_DELAY_TMO;
+		spin_unlock_irq(shost->host_lock);
 
-	if ((!(ndlp->nlp_type & NLP_FABRIC) &&
-	     ((ndlp->nlp_type & NLP_FCP_TARGET) ||
-	      !(ndlp->nlp_type & NLP_FCP_INITIATOR))) ||
-	    (ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) {
+		ndlp->nlp_last_elscmd = ELS_CMD_FDISC;
+	} else if ((!(ndlp->nlp_type & NLP_FABRIC) &&
+		((ndlp->nlp_type & NLP_FCP_TARGET) ||
+		!(ndlp->nlp_type & NLP_FCP_INITIATOR))) ||
+		(ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) {
 		/* Only try to re-login if this is NOT a Fabric Node */
 		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
 		spin_lock_irq(shost->host_lock);
@@ -569,7 +577,7 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
-	if (!ndlp->nlp_rpi) {
+	if (!(ndlp->nlp_flag & NLP_RPI_VALID)) {
 		ndlp->nlp_flag &= ~NLP_NPR_ADISC;
 		return 0;
 	}
@@ -859,7 +867,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
 
 	lpfc_unreg_rpi(vport, ndlp);
 
-	if (lpfc_reg_login(phba, vport->vpi, irsp->un.elsreq64.remoteID,
+	if (lpfc_reg_rpi(phba, vport->vpi, irsp->un.elsreq64.remoteID,
 			   (uint8_t *) sp, mbox, 0) == 0) {
 		switch (ndlp->nlp_DID) {
 		case NameServer_DID:
@@ -1070,6 +1078,7 @@ lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport,
 	struct lpfc_iocbq *cmdiocb, *rspiocb;
 	IOCB_t *irsp;
 	ADISC *ap;
+	int rc;
 
 	cmdiocb = (struct lpfc_iocbq *) arg;
 	rspiocb = cmdiocb->context_un.rsp_iocb;
@@ -1095,6 +1104,15 @@ lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport,
 		return ndlp->nlp_state;
 	}
 
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		rc = lpfc_sli4_resume_rpi(ndlp);
+		if (rc) {
+			/* Stay in state and retry. */
+			ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+			return ndlp->nlp_state;
+		}
+	}
+
 	if (ndlp->nlp_type & NLP_FCP_TARGET) {
 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
@@ -1102,6 +1120,7 @@ lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport,
 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 	}
+
 	return ndlp->nlp_state;
 }
 
@@ -1285,6 +1304,7 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 	}
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 
 	/* Only if we are not a fabric nport do we issue PRLI */
 	if (!(ndlp->nlp_type & NLP_FABRIC)) {
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index b53af99..9645d32 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -10929,3 +10929,520 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba)
 	};
 	return 0;
 }
+
+/**
+ * lpfc_sli4_post_all_rpi_hdrs - Post the rpi header memory region to the port
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post rpi header templates to the
+ * HBA consistent with the SLI-4 interface spec.  This routine
+ * posts a PAGE_SIZE memory region to the port to hold up to
+ * PAGE_SIZE modulo 64 rpi context headers.
+ *
+ * This routine does not require any locks.  It's usage is expected
+ * to be driver load or reset recovery when the driver is
+ * sequential.
+ *
+ * Return codes
+ * 	0 - sucessful
+ *      EIO - The mailbox failed to complete successfully.
+ * 	When this error occurs, the driver is not guaranteed
+ *	to have any rpi regions posted to the device and
+ *	must either attempt to repost the regions or take a
+ *	fatal error.
+ **/
+int
+lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *phba)
+{
+	struct lpfc_rpi_hdr *rpi_page;
+	uint32_t rc = 0;
+
+	/* Post all rpi memory regions to the port. */
+	list_for_each_entry(rpi_page, &phba->sli4_hba.lpfc_rpi_hdr_list, list) {
+		rc = lpfc_sli4_post_rpi_hdr(phba, rpi_page);
+		if (rc != MBX_SUCCESS) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2008 Error %d posting all rpi "
+					"headers\n", rc);
+			rc = -EIO;
+			break;
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_post_rpi_hdr - Post an rpi header memory region to the port
+ * @phba: pointer to lpfc hba data structure.
+ * @rpi_page:  pointer to the rpi memory region.
+ *
+ * This routine is invoked to post a single rpi header to the
+ * HBA consistent with the SLI-4 interface spec.  This memory region
+ * maps up to 64 rpi context regions.
+ *
+ * Return codes
+ * 	0 - sucessful
+ * 	ENOMEM - No available memory
+ *      EIO - The mailbox failed to complete successfully.
+ **/
+int
+lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
+{
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_mbx_post_hdr_tmpl *hdr_tmpl;
+	uint32_t rc = 0;
+	uint32_t mbox_tmo;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* The port is notified of the header region via a mailbox command. */
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2001 Unable to allocate memory for issuing "
+				"SLI_CONFIG_SPECIAL mailbox command\n");
+		return -ENOMEM;
+	}
+
+	/* Post all rpi memory regions to the port. */
+	hdr_tmpl = &mboxq->u.mqe.un.hdr_tmpl;
+	mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_SLI4_CONFIG);
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_POST_HDR_TEMPLATE,
+			 sizeof(struct lpfc_mbx_post_hdr_tmpl) -
+			 sizeof(struct mbox_header), LPFC_SLI4_MBX_EMBED);
+	bf_set(lpfc_mbx_post_hdr_tmpl_page_cnt,
+	       hdr_tmpl, rpi_page->page_count);
+	bf_set(lpfc_mbx_post_hdr_tmpl_rpi_offset, hdr_tmpl,
+	       rpi_page->start_rpi);
+	hdr_tmpl->rpi_paddr_lo = putPaddrLow(rpi_page->dmabuf->phys);
+	hdr_tmpl->rpi_paddr_hi = putPaddrHigh(rpi_page->dmabuf->phys);
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+	shdr = (union lpfc_sli4_cfg_shdr *) &hdr_tmpl->header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2514 POST_RPI_HDR mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_sli4_alloc_rpi - Get an available rpi in the device's range
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to post rpi header templates to the
+ * HBA consistent with the SLI-4 interface spec.  This routine
+ * posts a PAGE_SIZE memory region to the port to hold up to
+ * PAGE_SIZE modulo 64 rpi context headers.
+ *
+ * Returns
+ * 	A nonzero rpi defined as rpi_base <= rpi < max_rpi if sucessful
+ * 	LPFC_RPI_ALLOC_ERROR if no rpis are available.
+ **/
+int
+lpfc_sli4_alloc_rpi(struct lpfc_hba *phba)
+{
+	int rpi;
+	uint16_t max_rpi, rpi_base, rpi_limit;
+	uint16_t rpi_remaining;
+	struct lpfc_rpi_hdr *rpi_hdr;
+
+	max_rpi = phba->sli4_hba.max_cfg_param.max_rpi;
+	rpi_base = phba->sli4_hba.max_cfg_param.rpi_base;
+	rpi_limit = phba->sli4_hba.next_rpi;
+
+	/*
+	 * The valid rpi range is not guaranteed to be zero-based.  Start
+	 * the search at the rpi_base as reported by the port.
+	 */
+	spin_lock_irq(&phba->hbalock);
+	rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, rpi_base);
+	if (rpi >= rpi_limit || rpi < rpi_base)
+		rpi = LPFC_RPI_ALLOC_ERROR;
+	else {
+		set_bit(rpi, phba->sli4_hba.rpi_bmask);
+		phba->sli4_hba.max_cfg_param.rpi_used++;
+		phba->sli4_hba.rpi_count++;
+	}
+
+	/*
+	 * Don't try to allocate more rpi header regions if the device limit
+	 * on available rpis max has been exhausted.
+	 */
+	if ((rpi == LPFC_RPI_ALLOC_ERROR) &&
+	    (phba->sli4_hba.rpi_count >= max_rpi)) {
+		spin_unlock_irq(&phba->hbalock);
+		return rpi;
+	}
+
+	/*
+	 * If the driver is running low on rpi resources, allocate another
+	 * page now.  Note that the next_rpi value is used because
+	 * it represents how many are actually in use whereas max_rpi notes
+	 * how many are supported max by the device.
+	 */
+	rpi_remaining = phba->sli4_hba.next_rpi - rpi_base -
+		phba->sli4_hba.rpi_count;
+	spin_unlock_irq(&phba->hbalock);
+	if (rpi_remaining < LPFC_RPI_LOW_WATER_MARK) {
+		rpi_hdr = lpfc_sli4_create_rpi_hdr(phba);
+		if (!rpi_hdr) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2002 Error Could not grow rpi "
+					"count\n");
+		} else {
+			lpfc_sli4_post_rpi_hdr(phba, rpi_hdr);
+		}
+	}
+
+	return rpi;
+}
+
+/**
+ * lpfc_sli4_free_rpi - Release an rpi for reuse.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to release an rpi to the pool of
+ * available rpis maintained by the driver.
+ **/
+void
+lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi)
+{
+	spin_lock_irq(&phba->hbalock);
+	clear_bit(rpi, phba->sli4_hba.rpi_bmask);
+	phba->sli4_hba.rpi_count--;
+	phba->sli4_hba.max_cfg_param.rpi_used--;
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_sli4_remove_rpis - Remove the rpi bitmask region
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove the memory region that
+ * provided rpi via a bitmask.
+ **/
+void
+lpfc_sli4_remove_rpis(struct lpfc_hba *phba)
+{
+	kfree(phba->sli4_hba.rpi_bmask);
+}
+
+/**
+ * lpfc_sli4_resume_rpi - Remove the rpi bitmask region
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to remove the memory region that
+ * provided rpi via a bitmask.
+ **/
+int
+lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp)
+{
+	LPFC_MBOXQ_t *mboxq;
+	struct lpfc_hba *phba = ndlp->phba;
+	int rc;
+
+	/* The port is notified of the header region via a mailbox command. */
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+
+	/* Post all rpi memory regions to the port. */
+	lpfc_resume_rpi(mboxq, ndlp);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2010 Resume RPI Mailbox failed "
+				"status %d, mbxStatus x%x\n", rc,
+				bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return -EIO;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_sli4_init_vpi - Initialize a vpi with the port
+ * @phba: pointer to lpfc hba data structure.
+ * @vpi: vpi value to activate with the port.
+ *
+ * This routine is invoked to activate a vpi with the
+ * port when the host intends to use vports with a
+ * nonzero vpi.
+ *
+ * Returns:
+ *    0 success
+ *    -Evalue otherwise
+ **/
+int
+lpfc_sli4_init_vpi(struct lpfc_hba *phba, uint16_t vpi)
+{
+	LPFC_MBOXQ_t *mboxq;
+	int rc = 0;
+	uint32_t mbox_tmo;
+
+	if (vpi == 0)
+		return -EINVAL;
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq)
+		return -ENOMEM;
+	lpfc_init_vpi(mboxq, vpi);
+	mbox_tmo = lpfc_mbox_tmo_val(phba, MBX_INIT_VPI);
+	rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"2022 INIT VPI Mailbox failed "
+				"status %d, mbxStatus x%x\n", rc,
+				bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+		rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_mbx_cmpl_add_fcf_record - add fcf mbox completion handler.
+ * @phba: pointer to lpfc hba data structure.
+ * @mboxq: Pointer to mailbox object.
+ *
+ * This routine is invoked to manually add a single FCF record. The caller
+ * must pass a completely initialized FCF_Record.  This routine takes
+ * care of the nonembedded mailbox operations.
+ **/
+static void
+lpfc_mbx_cmpl_add_fcf_record(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
+{
+	void *virt_addr;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint32_t shdr_status, shdr_add_status;
+
+	virt_addr = mboxq->sge_array->addr[0];
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr = (union lpfc_sli4_cfg_shdr *) virt_addr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+
+	if ((shdr_status || shdr_add_status) &&
+		(shdr_status != STATUS_FCF_IN_USE))
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2558 ADD_FCF_RECORD mailbox failed with "
+			"status x%x add_status x%x\n",
+			shdr_status, shdr_add_status);
+
+	lpfc_sli4_mbox_cmd_free(phba, mboxq);
+}
+
+/**
+ * lpfc_sli4_add_fcf_record - Manually add an FCF Record.
+ * @phba: pointer to lpfc hba data structure.
+ * @fcf_record:  pointer to the initialized fcf record to add.
+ *
+ * This routine is invoked to manually add a single FCF record. The caller
+ * must pass a completely initialized FCF_Record.  This routine takes
+ * care of the nonembedded mailbox operations.
+ **/
+int
+lpfc_sli4_add_fcf_record(struct lpfc_hba *phba, struct fcf_record *fcf_record)
+{
+	int rc = 0;
+	LPFC_MBOXQ_t *mboxq;
+	uint8_t *bytep;
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	struct lpfc_mbx_sge sge;
+	uint32_t alloc_len, req_len;
+	uint32_t fcfindex;
+
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2009 Failed to allocate mbox for ADD_FCF cmd\n");
+		return -ENOMEM;
+	}
+
+	req_len = sizeof(struct fcf_record) + sizeof(union lpfc_sli4_cfg_shdr) +
+		  sizeof(uint32_t);
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloc_len = lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+				     LPFC_MBOX_OPCODE_FCOE_ADD_FCF,
+				     req_len, LPFC_SLI4_MBX_NEMBED);
+	if (alloc_len < req_len) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2523 Allocated DMA memory size (x%x) is "
+			"less than the requested DMA memory "
+			"size (x%x)\n", alloc_len, req_len);
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the first SGE entry from the non-embedded DMA memory.  This
+	 * routine only uses a single SGE.
+	 */
+	lpfc_sli4_mbx_sge_get(mboxq, 0, &sge);
+	phys_addr = getPaddr(sge.pa_hi, sge.pa_lo);
+	if (unlikely(!mboxq->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2526 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+	virt_addr = mboxq->sge_array->addr[0];
+	/*
+	 * Configure the FCF record for FCFI 0.  This is the driver's
+	 * hardcoded default and gets used in nonFIP mode.
+	 */
+	fcfindex = bf_get(lpfc_fcf_record_fcf_index, fcf_record);
+	bytep = virt_addr + sizeof(union lpfc_sli4_cfg_shdr);
+	lpfc_sli_pcimem_bcopy(&fcfindex, bytep, sizeof(uint32_t));
+
+	/*
+	 * Copy the fcf_index and the FCF Record Data. The data starts after
+	 * the FCoE header plus word10. The data copy needs to be endian
+	 * correct.
+	 */
+	bytep += sizeof(uint32_t);
+	lpfc_sli_pcimem_bcopy(fcf_record, bytep, sizeof(struct fcf_record));
+	mboxq->vport = phba->pport;
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_add_fcf_record;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2515 ADD_FCF_RECORD mailbox failed with "
+			"status 0x%x\n", rc);
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		rc = -EIO;
+	} else
+		rc = 0;
+
+	return rc;
+}
+
+/**
+ * lpfc_sli4_build_dflt_fcf_record - Build the driver's default FCF Record.
+ * @phba: pointer to lpfc hba data structure.
+ * @fcf_record:  pointer to the fcf record to write the default data.
+ * @fcf_index: FCF table entry index.
+ *
+ * This routine is invoked to build the driver's default FCF record.  The
+ * values used are hardcoded.  This routine handles memory initialization.
+ *
+ **/
+void
+lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *phba,
+				struct fcf_record *fcf_record,
+				uint16_t fcf_index)
+{
+	memset(fcf_record, 0, sizeof(struct fcf_record));
+	fcf_record->max_rcv_size = LPFC_FCOE_MAX_RCV_SIZE;
+	fcf_record->fka_adv_period = LPFC_FCOE_FKA_ADV_PER;
+	fcf_record->fip_priority = LPFC_FCOE_FIP_PRIORITY;
+	bf_set(lpfc_fcf_record_mac_0, fcf_record, phba->fc_map[0]);
+	bf_set(lpfc_fcf_record_mac_1, fcf_record, phba->fc_map[1]);
+	bf_set(lpfc_fcf_record_mac_2, fcf_record, phba->fc_map[2]);
+	bf_set(lpfc_fcf_record_mac_3, fcf_record, LPFC_FCOE_FCF_MAC3);
+	bf_set(lpfc_fcf_record_mac_4, fcf_record, LPFC_FCOE_FCF_MAC4);
+	bf_set(lpfc_fcf_record_mac_5, fcf_record, LPFC_FCOE_FCF_MAC5);
+	bf_set(lpfc_fcf_record_fc_map_0, fcf_record, phba->fc_map[0]);
+	bf_set(lpfc_fcf_record_fc_map_1, fcf_record, phba->fc_map[1]);
+	bf_set(lpfc_fcf_record_fc_map_2, fcf_record, phba->fc_map[2]);
+	bf_set(lpfc_fcf_record_fcf_valid, fcf_record, 1);
+	bf_set(lpfc_fcf_record_fcf_index, fcf_record, fcf_index);
+	bf_set(lpfc_fcf_record_mac_addr_prov, fcf_record,
+		LPFC_FCF_FPMA | LPFC_FCF_SPMA);
+	/* Set the VLAN bit map */
+	if (phba->valid_vlan) {
+		fcf_record->vlan_bitmap[phba->vlan_id / 8]
+			= 1 << (phba->vlan_id % 8);
+	}
+}
+
+/**
+ * lpfc_sli4_read_fcf_record - Read the driver's default FCF Record.
+ * @phba: pointer to lpfc hba data structure.
+ * @fcf_index: FCF table entry offset.
+ *
+ * This routine is invoked to read up to @fcf_num of FCF record from the
+ * device starting with the given @fcf_index.
+ **/
+int
+lpfc_sli4_read_fcf_record(struct lpfc_hba *phba, uint16_t fcf_index)
+{
+	int rc = 0, error;
+	LPFC_MBOXQ_t *mboxq;
+	void *virt_addr;
+	dma_addr_t phys_addr;
+	uint8_t *bytep;
+	struct lpfc_mbx_sge sge;
+	uint32_t alloc_len, req_len;
+	struct lpfc_mbx_read_fcf_tbl *read_fcf;
+
+	mboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"2000 Failed to allocate mbox for "
+				"READ_FCF cmd\n");
+		return -ENOMEM;
+	}
+
+	req_len = sizeof(struct fcf_record) +
+		  sizeof(union lpfc_sli4_cfg_shdr) + 2 * sizeof(uint32_t);
+
+	/* Set up READ_FCF SLI4_CONFIG mailbox-ioctl command */
+	alloc_len = lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_FCOE,
+			 LPFC_MBOX_OPCODE_FCOE_READ_FCF_TABLE, req_len,
+			 LPFC_SLI4_MBX_NEMBED);
+
+	if (alloc_len < req_len) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0291 Allocated DMA memory size (x%x) is "
+				"less than the requested DMA memory "
+				"size (x%x)\n", alloc_len, req_len);
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory. This
+	 * routine only uses a single SGE.
+	 */
+	lpfc_sli4_mbx_sge_get(mboxq, 0, &sge);
+	phys_addr = getPaddr(sge.pa_hi, sge.pa_lo);
+	if (unlikely(!mboxq->sge_array)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+				"2527 Failed to get the non-embedded SGE "
+				"virtual address\n");
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		return -ENOMEM;
+	}
+	virt_addr = mboxq->sge_array->addr[0];
+	read_fcf = (struct lpfc_mbx_read_fcf_tbl *)virt_addr;
+
+	/* Set up command fields */
+	bf_set(lpfc_mbx_read_fcf_tbl_indx, &read_fcf->u.request, fcf_index);
+	/* Perform necessary endian conversion */
+	bytep = virt_addr + sizeof(union lpfc_sli4_cfg_shdr);
+	lpfc_sli_pcimem_bcopy(bytep, bytep, sizeof(uint32_t));
+	mboxq->vport = phba->pport;
+	mboxq->mbox_cmpl = lpfc_mbx_cmpl_read_fcf_record;
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_NOWAIT);
+	if (rc == MBX_NOT_FINISHED) {
+		lpfc_sli4_mbox_cmd_free(phba, mboxq);
+		error = -EIO;
+	} else
+		error = 0;
+	return error;
+}
-- 
cgit v0.10.2


From d8e93df13c8f7bde45a7756944aab528c58df4cf Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:53:05 -0400
Subject: [SCSI] lpfc 8.3.2 : Update of copyrights

Update of copyrights on modified files

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 13ac108..5405698 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 270a4c6..f032d8b 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 5199078..38c1851 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 42ef258..2b02b1f 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2007-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2007-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index a3b56d7..6bdeb14 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 9bd7a89..d507a58 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index a9d64cf..02aa016 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index ff821bb..8c30f57 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 1aa8570..db1ba22 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 6aeb1c6..29fe5c1 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 516f480..e198c91 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 2c7eba6..09f659f 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,7 +1,7 @@
  /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 9af2db3..1a7659c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 9645d32..a53c267 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2008 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
-- 
cgit v0.10.2


From f4b4c68f74dcd5da03df851090cad28ad4e8d7cc Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:53:12 -0400
Subject: [SCSI] lpfc 8.3.2 : Miscellaneous Changes

Miscellaneous Changes:
- Convert from SLI2_ACTIVE flag to more correct SLI_ACTIVE (generic) flag
- Reposition log verbose messaging definitions
- Update naming for vpi object name from vport slang name
- Handle deferred error attention condition
- Add 10G link support
- Small bug fixup

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index f032d8b..46e032a 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -805,7 +805,7 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
 	 */
 	if (phba->link_state < LPFC_LINK_DOWN ||
 	    !phba->mbox_mem_pool ||
-	    (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
+	    (phba->sli.sli_flag & LPFC_SLI_ACTIVE) == 0)
 		return 0;
 
 	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
@@ -822,7 +822,7 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
 	pmboxq->context1 = NULL;
 
 	if ((phba->pport->fc_flag & FC_OFFLINE_MODE) ||
-		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		(!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = MBX_NOT_FINISHED;
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -2045,22 +2045,9 @@ static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
 # You can set a bit mask to record specific types of verbose messages:
-#
-# LOG_ELS                       0x1        ELS events
-# LOG_DISCOVERY                 0x2        Link discovery events
-# LOG_MBOX                      0x4        Mailbox events
-# LOG_INIT                      0x8        Initialization events
-# LOG_LINK_EVENT                0x10       Link events
-# LOG_FCP                       0x40       FCP traffic history
-# LOG_NODE                      0x80       Node table events
-# LOG_BG                        0x200      BlockBuard events
-# LOG_MISC                      0x400      Miscellaneous events
-# LOG_SLI                       0x800      SLI events
-# LOG_FCP_ERROR                 0x1000     Only log FCP errors
-# LOG_LIBDFC                    0x2000     LIBDFC events
-# LOG_ALL_MSG                   0xffff     LOG all messages
+# See lpfc_logmsh.h for definitions.
 */
-LPFC_VPORT_ATTR_HEX_RW(log_verbose, 0x0, 0x0, 0xffff,
+LPFC_VPORT_ATTR_HEX_RW(log_verbose, 0x0, 0x0, 0xffffffff,
 		       "Verbose logging bit-mask");
 
 /*
@@ -2365,7 +2352,7 @@ lpfc_stat_data_ctrl_store(struct device *dev, struct device_attribute *attr,
 		if (vports == NULL)
 			return -ENOMEM;
 
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			v_shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(v_shost->host_lock);
 			/* Block and reset data collection */
@@ -2380,7 +2367,7 @@ lpfc_stat_data_ctrl_store(struct device *dev, struct device_attribute *attr,
 		phba->bucket_base = base;
 		phba->bucket_step = step;
 
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			v_shost = lpfc_shost_from_vport(vports[i]);
 
 			/* Unblock data collection */
@@ -2397,7 +2384,7 @@ lpfc_stat_data_ctrl_store(struct device *dev, struct device_attribute *attr,
 		if (vports == NULL)
 			return -ENOMEM;
 
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			v_shost = lpfc_shost_from_vport(vports[i]);
 			spin_lock_irq(shost->host_lock);
 			vports[i]->stat_data_blocked = 1;
@@ -3418,7 +3405,7 @@ sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 		}
 
 		if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-		    (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){
+		    (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE))) {
 
 			spin_unlock_irq(&phba->hbalock);
 			rc = lpfc_sli_issue_mbox (phba,
@@ -3646,6 +3633,9 @@ lpfc_get_host_speed(struct Scsi_Host *shost)
 			case LA_8GHZ_LINK:
 				fc_host_speed(shost) = FC_PORTSPEED_8GBIT;
 			break;
+			case LA_10GHZ_LINK:
+				fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
+			break;
 			default:
 				fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
 			break;
@@ -3713,7 +3703,7 @@ lpfc_get_stats(struct Scsi_Host *shost)
 	 */
 	if (phba->link_state < LPFC_LINK_DOWN ||
 	    !phba->mbox_mem_pool ||
-	    (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
+	    (phba->sli.sli_flag & LPFC_SLI_ACTIVE) == 0)
 		return NULL;
 
 	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
@@ -3756,7 +3746,7 @@ lpfc_get_stats(struct Scsi_Host *shost)
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+	    (!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3838,7 +3828,7 @@ lpfc_reset_stats(struct Scsi_Host *shost)
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		(!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3856,7 +3846,7 @@ lpfc_reset_stats(struct Scsi_Host *shost)
 	pmboxq->vport = vport;
 
 	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+	    (!(psli->sli_flag & LPFC_SLI_ACTIVE)))
 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
 	else
 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -4023,6 +4013,21 @@ lpfc_set_vport_symbolic_name(struct fc_vport *fc_vport)
 		lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
 }
 
+/**
+ * lpfc_hba_log_verbose_init - Set hba's log verbose level
+ * @phba: Pointer to lpfc_hba struct.
+ *
+ * This function is called by the lpfc_get_cfgparam() routine to set the
+ * module lpfc_log_verbose into the @phba cfg_log_verbose for use with
+ * log messsage according to the module's lpfc_log_verbose parameter setting
+ * before hba port or vport created.
+ **/
+static void
+lpfc_hba_log_verbose_init(struct lpfc_hba *phba, uint32_t verbose)
+{
+	phba->cfg_log_verbose = verbose;
+}
+
 struct fc_function_template lpfc_transport_functions = {
 	/* fixed attributes the driver supports */
 	.show_host_node_name = 1,
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 38c1851..1dbccfd 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1578,6 +1578,9 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
 				case LA_8GHZ_LINK:
 					ae->un.PortSpeed = HBA_PORTSPEED_8GBIT;
 				break;
+				case LA_10GHZ_LINK:
+					ae->un.PortSpeed = HBA_PORTSPEED_10GBIT;
+				break;
 				default:
 					ae->un.PortSpeed =
 						HBA_PORTSPEED_UNKNOWN;
@@ -1730,7 +1733,7 @@ lpfc_decode_firmware_rev(struct lpfc_hba *phba, char *fwrevision, int flag)
 	uint8_t *fwname;
 
 	if (vp->rev.rBit) {
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE)
+		if (psli->sli_flag & LPFC_SLI_ACTIVE)
 			rev = vp->rev.sli2FwRev;
 		else
 			rev = vp->rev.sli1FwRev;
@@ -1756,7 +1759,7 @@ lpfc_decode_firmware_rev(struct lpfc_hba *phba, char *fwrevision, int flag)
 		}
 		b4 = (rev & 0x0000000f);
 
-		if (psli->sli_flag & LPFC_SLI2_ACTIVE)
+		if (psli->sli_flag & LPFC_SLI_ACTIVE)
 			fwname = vp->rev.sli2FwName;
 		else
 			fwname = vp->rev.sli1FwName;
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index ffd1089..1142070 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -135,6 +135,7 @@ struct lpfc_nodelist {
 #define NLP_NODEV_REMOVE   0x08000000	/* Defer removal till discovery ends */
 #define NLP_TARGET_REMOVE  0x10000000   /* Target remove in process */
 #define NLP_SC_REQ         0x20000000	/* Target requires authentication */
+#define NLP_RPI_VALID      0x80000000	/* nlp_rpi is valid */
 
 /* ndlp usage management macros */
 #define NLP_CHK_NODE_ACT(ndlp)		(((ndlp)->nlp_usg_map \
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d507a58..126323a 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1647,6 +1647,9 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la)
 	case LA_8GHZ_LINK:
 		phba->fc_linkspeed = LA_8GHZ_LINK;
 		break;
+	case LA_10GHZ_LINK:
+		phba->fc_linkspeed = LA_10GHZ_LINK;
+		break;
 	default:
 		phba->fc_linkspeed = LA_UNKNW_LINK;
 		break;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 8c30f57..65cd3fe 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -906,7 +906,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 					"taking this port offline.\n");
 
 			spin_lock_irq(&phba->hbalock);
-			psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+			psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 			spin_unlock_irq(&phba->hbalock);
 
 			lpfc_offline_prep(phba);
@@ -931,13 +931,15 @@ lpfc_offline_eratt(struct lpfc_hba *phba)
 	struct lpfc_sli   *psli = &phba->sli;
 
 	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 	lpfc_offline_prep(phba);
 
 	lpfc_offline(phba);
 	lpfc_reset_barrier(phba);
+	spin_lock_irq(&phba->hbalock);
 	lpfc_sli_brdreset(phba);
+	spin_unlock_irq(&phba->hbalock);
 	lpfc_hba_down_post(phba);
 	lpfc_sli_brdready(phba, HS_MBRDY);
 	lpfc_unblock_mgmt_io(phba);
@@ -980,6 +982,16 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
 	struct lpfc_sli_ring  *pring;
 	struct lpfc_sli *psli = &phba->sli;
 
+	/* If the pci channel is offline, ignore possible errors,
+	 * since we cannot communicate with the pci card anyway.
+	 */
+	if (pci_channel_offline(phba->pcidev)) {
+		spin_lock_irq(&phba->hbalock);
+		phba->hba_flag &= ~DEFER_ERATT;
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 		"0479 Deferred Adapter Hardware Error "
 		"Data: x%x x%x x%x\n",
@@ -987,7 +999,7 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
 		phba->work_status[0], phba->work_status[1]);
 
 	spin_lock_irq(&phba->hbalock);
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
 
@@ -1097,7 +1109,7 @@ lpfc_handle_eratt_s3(struct lpfc_hba *phba)
 				phba->work_status[0], phba->work_status[1]);
 
 		spin_lock_irq(&phba->hbalock);
-		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+		psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 		spin_unlock_irq(&phba->hbalock);
 
 		/*
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index db1ba22..954ba57 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -18,33 +18,39 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LOG_ELS                       0x1	/* ELS events */
-#define LOG_DISCOVERY                 0x2	/* Link discovery events */
-#define LOG_MBOX                      0x4	/* Mailbox events */
-#define LOG_INIT                      0x8	/* Initialization events */
-#define LOG_LINK_EVENT                0x10	/* Link events */
-#define LOG_IP                        0x20	/* IP traffic history */
-#define LOG_FCP                       0x40	/* FCP traffic history */
-#define LOG_NODE                      0x80	/* Node table events */
-#define LOG_TEMP                      0x100	/* Temperature sensor events */
-#define LOG_BG			      0x200	/* BlockGuard events */
-#define LOG_MISC                      0x400	/* Miscellaneous events */
-#define LOG_SLI                       0x800	/* SLI events */
-#define LOG_FCP_ERROR                 0x1000	/* log errors, not underruns */
-#define LOG_LIBDFC                    0x2000	/* Libdfc events */
-#define LOG_VPORT                     0x4000	/* NPIV events */
-#define LOG_ALL_MSG                   0xffff	/* LOG all messages */
+#define LOG_ELS		0x00000001	/* ELS events */
+#define LOG_DISCOVERY	0x00000002	/* Link discovery events */
+#define LOG_MBOX	0x00000004	/* Mailbox events */
+#define LOG_INIT	0x00000008	/* Initialization events */
+#define LOG_LINK_EVENT	0x00000010	/* Link events */
+#define LOG_IP		0x00000020	/* IP traffic history */
+#define LOG_FCP		0x00000040	/* FCP traffic history */
+#define LOG_NODE	0x00000080	/* Node table events */
+#define LOG_TEMP	0x00000100	/* Temperature sensor events */
+#define LOG_BG		0x00000200	/* BlockGuard events */
+#define LOG_MISC	0x00000400	/* Miscellaneous events */
+#define LOG_SLI		0x00000800	/* SLI events */
+#define LOG_FCP_ERROR	0x00001000	/* log errors, not underruns */
+#define LOG_LIBDFC	0x00002000	/* Libdfc events */
+#define LOG_VPORT	0x00004000	/* NPIV events */
+#define LOF_SECURITY	0x00008000	/* Security events */
+#define LOG_EVENT	0x00010000	/* CT,TEMP,DUMP, logging */
+#define LOG_ALL_MSG	0xffffffff	/* LOG all messages */
 
 #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \
-	do { \
-	{ if (((mask) &(vport)->cfg_log_verbose) || (level[1] <= '3')) \
+do { \
+	{ if (((mask) & (vport)->cfg_log_verbose) || (level[1] <= '3')) \
 		dev_printk(level, &((vport)->phba->pcidev)->dev, "%d:(%d):" \
 			   fmt, (vport)->phba->brd_no, vport->vpi, ##arg); } \
-	} while (0)
+} while (0)
 
 #define lpfc_printf_log(phba, level, mask, fmt, arg...) \
-	do { \
-	{ if (((mask) &(phba)->pport->cfg_log_verbose) || (level[1] <= '3')) \
+do { \
+	{ uint32_t log_verbose = (phba)->pport ? \
+				 (phba)->pport->cfg_log_verbose : \
+				 (phba)->cfg_log_verbose; \
+	  if (((mask) & log_verbose) || (level[1] <= '3')) \
 		dev_printk(level, &((phba)->pcidev)->dev, "%d:" \
-			   fmt, phba->brd_no, ##arg); } \
-	} while (0)
+			   fmt, phba->brd_no, ##arg); \
+	} \
+} while (0)
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index a53c267..ff04daf 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -3272,7 +3272,7 @@ void lpfc_reset_barrier(struct lpfc_hba *phba)
 		mdelay(1);
 
 	if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) {
-		if (phba->sli.sli_flag & LPFC_SLI2_ACTIVE ||
+		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE ||
 		    phba->pport->stopped)
 			goto restore_hc;
 		else
@@ -3353,7 +3353,9 @@ lpfc_sli_brdkill(struct lpfc_hba *phba)
 		return 1;
 	}
 
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	spin_lock_irq(&phba->hbalock);
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
+	spin_unlock_irq(&phba->hbalock);
 
 	mempool_free(pmb, phba->mbox_mem_pool);
 
@@ -4643,7 +4645,7 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
 	spin_unlock_irq(&phba->pport->work_port_lock);
 	spin_lock_irq(&phba->hbalock);
 	phba->link_state = LPFC_LINK_UNKNOWN;
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~LPFC_SLI_ACTIVE;
 	spin_unlock_irq(&phba->hbalock);
 
 	pring = &psli->ring[psli->fcp_ring];
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index e6c88ee..7d37eb7 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -244,7 +244,7 @@ struct lpfc_sli {
 
 	/* Additional sli_flags */
 #define LPFC_SLI_MBOX_ACTIVE      0x100	/* HBA mailbox is currently active */
-#define LPFC_SLI2_ACTIVE          0x200	/* SLI2 overlay in firmware is active */
+#define LPFC_SLI_ACTIVE           0x200	/* SLI in firmware is active */
 #define LPFC_PROCESS_LA           0x400	/* Able to process link attention */
 #define LPFC_BLOCK_MGMT_IO        0x800	/* Don't allow mgmt mbx or iocb cmds */
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 59e67f7..a415ec0 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -121,7 +121,7 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport)
 	if (!pmb) {
 		return -ENOMEM;
 	}
-	mb = &pmb->mb;
+	mb = &pmb->u.mb;
 
 	lpfc_read_sparam(phba, pmb, vport->vpi);
 	/*
-- 
cgit v0.10.2


From 21e9a0a5fbd2b7cb3ae29f6d491a30bc0e688422 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:53:21 -0400
Subject: [SCSI] lpfc 8.3.2 : Persistent Vport Support

Add support for persistent vport definitions at creation at boot time

Also includes a few misc fixes for:
- conversion to vpi name from vport slang name
- couple of small mailbox references
- some additional discovery mods

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 46e032a..d73e677 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -2277,6 +2277,36 @@ lpfc_param_init(topology, 0, 0, 6)
 static DEVICE_ATTR(lpfc_topology, S_IRUGO | S_IWUSR,
 		lpfc_topology_show, lpfc_topology_store);
 
+/**
+ * lpfc_static_vport_show: Read callback function for
+ *   lpfc_static_vport sysfs file.
+ * @dev: Pointer to class device object.
+ * @attr: device attribute structure.
+ * @buf: Data buffer.
+ *
+ * This function is the read call back function for
+ * lpfc_static_vport sysfs file. The lpfc_static_vport
+ * sysfs file report the mageability of the vport.
+ **/
+static ssize_t
+lpfc_static_vport_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct Scsi_Host  *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
+	if (vport->vport_flag & STATIC_VPORT)
+		sprintf(buf, "1\n");
+	else
+		sprintf(buf, "0\n");
+
+	return strlen(buf);
+}
+
+/*
+ * Sysfs attribute to control the statistical data collection.
+ */
+static DEVICE_ATTR(lpfc_static_vport, S_IRUGO,
+		   lpfc_static_vport_show, NULL);
 
 /**
  * lpfc_stat_data_ctrl_store - write call back for lpfc_stat_data_ctrl sysfs file
@@ -3051,6 +3081,7 @@ struct device_attribute *lpfc_vport_attrs[] = {
 	&dev_attr_lpfc_enable_da_id,
 	&dev_attr_lpfc_max_scsicmpl_time,
 	&dev_attr_lpfc_stat_data_ctrl,
+	&dev_attr_lpfc_static_vport,
 	NULL,
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 126323a..35c41ae 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -2079,6 +2079,128 @@ out:
 	return;
 }
 
+/**
+ * lpfc_create_static_vport - Read HBA config region to create static vports.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine issue a DUMP mailbox command for config region 22 to get
+ * the list of static vports to be created. The function create vports
+ * based on the information returned from the HBA.
+ **/
+void
+lpfc_create_static_vport(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *pmb = NULL;
+	MAILBOX_t *mb;
+	struct static_vport_info *vport_info;
+	int rc, i;
+	struct fc_vport_identifiers vport_id;
+	struct fc_vport *new_fc_vport;
+	struct Scsi_Host *shost;
+	struct lpfc_vport *vport;
+	uint16_t offset = 0;
+	uint8_t *vport_buff;
+
+	pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmb) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0542 lpfc_create_static_vport failed to"
+				" allocate mailbox memory\n");
+		return;
+	}
+
+	mb = &pmb->u.mb;
+
+	vport_info = kzalloc(sizeof(struct static_vport_info), GFP_KERNEL);
+	if (!vport_info) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0543 lpfc_create_static_vport failed to"
+				" allocate vport_info\n");
+		mempool_free(pmb, phba->mbox_mem_pool);
+		return;
+	}
+
+	vport_buff = (uint8_t *) vport_info;
+	do {
+		lpfc_dump_static_vport(phba, pmb, offset);
+		pmb->vport = phba->pport;
+		rc = lpfc_sli_issue_mbox_wait(phba, pmb, LPFC_MBOX_TMO);
+
+		if ((rc != MBX_SUCCESS) || mb->mbxStatus) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0544 lpfc_create_static_vport failed to"
+				" issue dump mailbox command ret 0x%x "
+				"status 0x%x\n",
+				rc, mb->mbxStatus);
+			goto out;
+		}
+
+		if (mb->un.varDmp.word_cnt >
+			sizeof(struct static_vport_info) - offset)
+			mb->un.varDmp.word_cnt =
+			sizeof(struct static_vport_info) - offset;
+
+		lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
+			vport_buff + offset,
+			mb->un.varDmp.word_cnt);
+		offset += mb->un.varDmp.word_cnt;
+
+	} while (mb->un.varDmp.word_cnt &&
+		offset < sizeof(struct static_vport_info));
+
+
+	if ((le32_to_cpu(vport_info->signature) != VPORT_INFO_SIG) ||
+		((le32_to_cpu(vport_info->rev) & VPORT_INFO_REV_MASK)
+			!= VPORT_INFO_REV)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0545 lpfc_create_static_vport bad"
+			" information header 0x%x 0x%x\n",
+			le32_to_cpu(vport_info->signature),
+			le32_to_cpu(vport_info->rev) & VPORT_INFO_REV_MASK);
+
+		goto out;
+	}
+
+	shost = lpfc_shost_from_vport(phba->pport);
+
+	for (i = 0; i < MAX_STATIC_VPORT_COUNT; i++) {
+		memset(&vport_id, 0, sizeof(vport_id));
+		vport_id.port_name = wwn_to_u64(vport_info->vport_list[i].wwpn);
+		vport_id.node_name = wwn_to_u64(vport_info->vport_list[i].wwnn);
+		if (!vport_id.port_name || !vport_id.node_name)
+			continue;
+
+		vport_id.roles = FC_PORT_ROLE_FCP_INITIATOR;
+		vport_id.vport_type = FC_PORTTYPE_NPIV;
+		vport_id.disable = false;
+		new_fc_vport = fc_vport_create(shost, 0, &vport_id);
+
+		if (!new_fc_vport) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"0546 lpfc_create_static_vport failed to"
+				" create vport \n");
+			continue;
+		}
+
+		vport = *(struct lpfc_vport **)new_fc_vport->dd_data;
+		vport->vport_flag |= STATIC_VPORT;
+	}
+
+out:
+	/*
+	 * If this is timed out command, setting NULL to context2 tell SLI
+	 * layer not to use this buffer.
+	 */
+	spin_lock_irq(&phba->hbalock);
+	pmb->context2 = NULL;
+	spin_unlock_irq(&phba->hbalock);
+	kfree(vport_info);
+	if (rc != MBX_TIMEOUT)
+		mempool_free(pmb, phba->mbox_mem_pool);
+
+	return;
+}
+
 /*
  * This routine handles processing a Fabric REG_LOGIN mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
@@ -2089,16 +2211,17 @@ void
 lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_vport *vport = pmb->vport;
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_vport **vports;
-	int i;
 
 	ndlp = (struct lpfc_nodelist *) pmb->context2;
 	pmb->context1 = NULL;
 	pmb->context2 = NULL;
 	if (mb->mbxStatus) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
+				 "0258 Register Fabric login error: 0x%x\n",
+				 mb->mbxStatus);
 		lpfc_mbuf_free(phba, mp->virt, mp->phys);
 		kfree(mp);
 		mempool_free(pmb, phba->mbox_mem_pool);
@@ -2117,9 +2240,6 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		}
 
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_MBOX,
-				 "0258 Register Fabric login error: 0x%x\n",
-				 mb->mbxStatus);
 		/* Decrement the reference count to ndlp after the reference
 		 * to the ndlp are done.
 		 */
@@ -2128,34 +2248,12 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	}
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 
 	if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
-		vports = lpfc_create_vport_work_array(phba);
-		if (vports != NULL)
-			for(i = 0;
-			    i <= phba->max_vpi && vports[i] != NULL;
-			    i++) {
-				if (vports[i]->port_type == LPFC_PHYSICAL_PORT)
-					continue;
-				if (phba->fc_topology == TOPOLOGY_LOOP) {
-					lpfc_vport_set_state(vports[i],
-							FC_VPORT_LINKDOWN);
-					continue;
-				}
-				if (phba->link_flag & LS_NPIV_FAB_SUPPORTED)
-					lpfc_initial_fdisc(vports[i]);
-				else {
-					lpfc_vport_set_state(vports[i],
-						FC_VPORT_NO_FABRIC_SUPP);
-					lpfc_printf_vlog(vport, KERN_ERR,
-							 LOG_ELS,
-							"0259 No NPIV "
-							"Fabric support\n");
-				}
-			}
-		lpfc_destroy_vport_work_array(phba, vports);
+		lpfc_start_fdiscs(phba);
 		lpfc_do_scr_ns_plogi(phba, vport);
 	}
 
@@ -2179,13 +2277,16 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 void
 lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
-	MAILBOX_t *mb = &pmb->mb;
+	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
 	struct lpfc_vport *vport = pmb->vport;
 
 	if (mb->mbxStatus) {
 out:
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+				 "0260 Register NameServer error: 0x%x\n",
+				 mb->mbxStatus);
 		/* decrement the node reference count held for this
 		 * callback function.
 		 */
@@ -2209,15 +2310,13 @@ out:
 			return;
 		}
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-				 "0260 Register NameServer error: 0x%x\n",
-				 mb->mbxStatus);
 		return;
 	}
 
 	pmb->context1 = NULL;
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 
@@ -2718,7 +2817,7 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 	if (pring->ringno == LPFC_ELS_RING) {
 		switch (icmd->ulpCommand) {
 		case CMD_GEN_REQUEST64_CR:
-			if (icmd->ulpContext == (volatile ushort)ndlp->nlp_rpi)
+			if (iocb->context_un.ndlp == ndlp)
 				return 1;
 		case CMD_ELS_REQUEST64_CR:
 			if (icmd->un.elsreq64.remoteID == ndlp->nlp_DID)
@@ -2765,7 +2864,7 @@ lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	 */
 	psli = &phba->sli;
 	rpi = ndlp->nlp_rpi;
-	if (rpi) {
+	if (ndlp->nlp_flag & NLP_RPI_VALID) {
 		/* Now process each ring */
 		for (i = 0; i < psli->num_rings; i++) {
 			pring = &psli->ring[i];
@@ -2813,7 +2912,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	LPFC_MBOXQ_t    *mbox;
 	int rc;
 
-	if (ndlp->nlp_rpi) {
+	if (ndlp->nlp_flag & NLP_RPI_VALID) {
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
 			lpfc_unreg_login(phba, vport->vpi, ndlp->nlp_rpi, mbox);
@@ -2825,6 +2924,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		}
 		lpfc_no_rpi(phba, ndlp);
 		ndlp->nlp_rpi = 0;
+		ndlp->nlp_flag &= ~NLP_RPI_VALID;
 		return 1;
 	}
 	return 0;
@@ -2972,13 +3072,14 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	int rc;
 
 	lpfc_cancel_retry_delay_tmo(vport, ndlp);
-	if (ndlp->nlp_flag & NLP_DEFER_RM && !ndlp->nlp_rpi) {
+	if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
+	    !(ndlp->nlp_flag & NLP_RPI_VALID)) {
 		/* For this case we need to cleanup the default rpi
 		 * allocated by the firmware.
 		 */
 		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))
 			!= NULL) {
-			rc = lpfc_reg_login(phba, vport->vpi, ndlp->nlp_DID,
+			rc = lpfc_reg_rpi(phba, vport->vpi, ndlp->nlp_DID,
 			    (uint8_t *) &vport->fc_sparam, mbox, 0);
 			if (rc) {
 				mempool_free(mbox, phba->mbox_mem_pool);
@@ -3713,6 +3814,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	pmb->context1 = NULL;
 
 	ndlp->nlp_rpi = mb->un.varWords[0];
+	ndlp->nlp_flag |= NLP_RPI_VALID;
 	ndlp->nlp_type |= NLP_FABRIC;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 65cd3fe..2f5907f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -385,6 +385,7 @@ lpfc_config_port_post(struct lpfc_hba *phba)
 	/* Update the fc_host data structures with new wwn. */
 	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
 	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
+	fc_host_max_npiv_vports(shost) = phba->max_vpi;
 
 	/* If no serial number in VPD data, use low 6 bytes of WWNN */
 	/* This should be consolidated into parse_vpd ? - mr */
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 29fe5c1..b9b451c 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -41,6 +41,44 @@
 #include "lpfc_compat.h"
 
 /**
+ * lpfc_dump_static_vport - Dump HBA's static vport information.
+ * @phba: pointer to lpfc hba data structure.
+ * @pmb: pointer to the driver internal queue element for mailbox command.
+ * @offset: offset for dumping vport info.
+ *
+ * The dump mailbox command provides a method for the device driver to obtain
+ * various types of information from the HBA device.
+ *
+ * This routine prepares the mailbox command for dumping list of static
+ * vports to be created.
+ **/
+void
+lpfc_dump_static_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb,
+		uint16_t offset)
+{
+	MAILBOX_t *mb;
+	void *ctx;
+
+	mb = &pmb->u.mb;
+	ctx = pmb->context2;
+
+	/* Setup to dump vport info region */
+	memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
+	mb->mbxCommand = MBX_DUMP_MEMORY;
+	mb->un.varDmp.cv = 1;
+	mb->un.varDmp.type = DMP_NV_PARAMS;
+	mb->un.varDmp.entry_index = offset;
+	mb->un.varDmp.region_id = DMP_REGION_VPORT;
+	mb->un.varDmp.word_cnt = DMP_RSP_SIZE/sizeof(uint32_t);
+	mb->un.varDmp.co = 0;
+	mb->un.varDmp.resp_offset = 0;
+	pmb->context2 = ctx;
+	mb->mbxOwner = OWN_HOST;
+
+	return;
+}
+
+/**
  * lpfc_dump_mem - Prepare a mailbox command for retrieving HBA's VPD memory
  * @phba: pointer to lpfc hba data structure.
  * @pmb: pointer to the driver internal queue element for mailbox command.
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 1a7659c..ccbde41c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -329,7 +329,7 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			shost_for_each_device(sdev, shost) {
 				new_queue_depth =
@@ -383,7 +383,7 @@ lpfc_ramp_up_queue_handler(struct lpfc_hba *phba)
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for(i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			shost_for_each_device(sdev, shost) {
 				if (vports[i]->cfg_lun_queue_depth <=
@@ -431,7 +431,7 @@ lpfc_scsi_dev_block(struct lpfc_hba *phba)
 
 	vports = lpfc_create_vport_work_array(phba);
 	if (vports != NULL)
-		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
+		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			shost = lpfc_shost_from_vport(vports[i]);
 			shost_for_each_device(sdev, shost) {
 				rport = starget_to_rport(scsi_target(sdev));
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index a415ec0..a6313ee 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -251,23 +251,22 @@ static void lpfc_discovery_wait(struct lpfc_vport *vport)
 		    (vport->fc_flag & wait_flags)  ||
 		    ((vport->port_state > LPFC_VPORT_FAILED) &&
 		     (vport->port_state < LPFC_VPORT_READY))) {
-			lpfc_printf_log(phba, KERN_INFO, LOG_VPORT,
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT,
 					"1833 Vport discovery quiesce Wait:"
-					" vpi x%x state x%x fc_flags x%x"
+					" state x%x fc_flags x%x"
 					" num_nodes x%x, waiting 1000 msecs"
 					" total wait msecs x%x\n",
-					vport->vpi, vport->port_state,
-					vport->fc_flag, vport->num_disc_nodes,
+					vport->port_state, vport->fc_flag,
+					vport->num_disc_nodes,
 					jiffies_to_msecs(jiffies - start_time));
 			msleep(1000);
 		} else {
 			/* Base case.  Wait variants satisfied.  Break out */
-			lpfc_printf_log(phba, KERN_INFO, LOG_VPORT,
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT,
 					 "1834 Vport discovery quiesced:"
-					 " vpi x%x state x%x fc_flags x%x"
+					 " state x%x fc_flags x%x"
 					 " wait msecs x%x\n",
-					 vport->vpi, vport->port_state,
-					 vport->fc_flag,
+					 vport->port_state, vport->fc_flag,
 					 jiffies_to_msecs(jiffies
 						- start_time));
 			break;
@@ -275,12 +274,10 @@ static void lpfc_discovery_wait(struct lpfc_vport *vport)
 	}
 
 	if (time_after(jiffies, wait_time_max))
-		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_VPORT,
 				"1835 Vport discovery quiesce failed:"
-				" vpi x%x state x%x fc_flags x%x"
-				" wait msecs x%x\n",
-				vport->vpi, vport->port_state,
-				vport->fc_flag,
+				" state x%x fc_flags x%x wait msecs x%x\n",
+				vport->port_state, vport->fc_flag,
 				jiffies_to_msecs(jiffies - start_time));
 }
 
@@ -558,6 +555,16 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
 				 "physical host\n");
 		return VPORT_ERROR;
 	}
+
+	/* If the vport is a static vport fail the deletion. */
+	if ((vport->vport_flag & STATIC_VPORT) &&
+		!(phba->pport->load_flag & FC_UNLOADING)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_VPORT,
+				 "1837 vport_delete failed: Cannot delete "
+				 "static vport.\n");
+		return VPORT_ERROR;
+	}
+
 	/*
 	 * If we are not unloading the driver then prevent the vport_delete
 	 * from happening until after this vport's discovery is finished.
@@ -733,7 +740,7 @@ lpfc_create_vport_work_array(struct lpfc_hba *phba)
 	struct lpfc_vport *port_iterator;
 	struct lpfc_vport **vports;
 	int index = 0;
-	vports = kzalloc((phba->max_vpi + 1) * sizeof(struct lpfc_vport *),
+	vports = kzalloc((phba->max_vports + 1) * sizeof(struct lpfc_vport *),
 			 GFP_KERNEL);
 	if (vports == NULL)
 		return NULL;
@@ -757,7 +764,7 @@ lpfc_destroy_vport_work_array(struct lpfc_hba *phba, struct lpfc_vport **vports)
 	int i;
 	if (vports == NULL)
 		return;
-	for (i=0; vports[i] != NULL && i <= phba->max_vpi; i++)
+	for (i = 0; vports[i] != NULL && i <= phba->max_vports; i++)
 		scsi_host_put(lpfc_shost_from_vport(vports[i]));
 	kfree(vports);
 }
-- 
cgit v0.10.2


From 53331aa1c721336b661567e4c0aacc04ab9725d8 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Fri, 22 May 2009 14:53:27 -0400
Subject: [SCSI] lpfc 8.3.2 : Update the lpfc driver version to 8.3.2

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index e599519..6b8a148 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.1"
+#define LPFC_DRIVER_VERSION "8.3.2"
 
 #define LPFC_DRIVER_NAME		"lpfc"
 #define LPFC_SP_DRIVER_HANDLER_NAME	"lpfc:sp"
-- 
cgit v0.10.2


From 09521d2e3edd0bf02b66e5b8c13f1559f2d6958a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 18:29:38 +0200
Subject: ALSA: ctxfi - Fix wrong model id for UAA

CTUAA should be checked instead of CTHENDRIX.  The latter is for 20k2 chip.
Also, fixed the detection of UAA/HENDRIX models by fixing the mask bits.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 002a70e..4e25b24 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -44,8 +44,8 @@ static struct snd_pci_quirk __devinitdata subsys_20k1_list[] = {
 	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x002f, "SB055x", CTSB055X),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0029, "SB073x", CTSB073X),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0031, "SB073x", CTSB073X),
-	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0x6000,
-			   PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, "UAA", CTUAA),
+	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0xf000, 0x6000,
+			   "UAA", CTUAA),
 	SND_PCI_QUIRK_VENDOR(PCI_VENDOR_ID_CREATIVE,
 			     "Unknown", CT20K1_UNKNOWN),
 	{ } /* terminator */
@@ -60,8 +60,9 @@ static struct snd_pci_quirk __devinitdata subsys_20k2_list[] = {
 		      "SB0880", CTSB0880),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08803,
 		      "SB0880", CTSB0880),
-	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0x6000,
-			   PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, "UAA", CTHENDRIX),
+	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_CREATIVE, 0xf000,
+			   PCI_SUBDEVICE_ID_CREATIVE_HENDRIX, "HENDRIX",
+			   CTHENDRIX),
 	{ } /* terminator */
 };
 
diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 5d58650..cb69d9d 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1541,7 +1541,7 @@ static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type)
 		return is_adc_input_selected_SB055x(hw, type);
 	case CTSB073X:
 		return is_adc_input_selected_hendrix(hw, type);
-	case CTHENDRIX:
+	case CTUAA:
 		return is_adc_input_selected_hendrix(hw, type);
 	default:
 		return is_adc_input_selected_SBx(hw, type);
@@ -1692,7 +1692,7 @@ static int hw_adc_input_select(struct hw *hw, enum ADCSRC type)
 		return adc_input_select_SB055x(hw, type, state);
 	case CTSB073X:
 		return adc_input_select_hendrix(hw, type, state);
-	case CTHENDRIX:
+	case CTUAA:
 		return adc_input_select_hendrix(hw, type, state);
 	default:
 		return adc_input_select_SBx(hw, type, state);
@@ -1780,7 +1780,7 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info)
 static int hw_have_digit_io_switch(struct hw *hw)
 {
 	/* SB073x and Vista compatible cards have no digit IO switch */
-	return !(hw->model == CTSB073X || hw->model == CTHENDRIX);
+	return !(hw->model == CTSB073X || hw->model == CTUAA);
 }
 
 #define CTLBITS(a, b, c, d)	(((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
@@ -1916,7 +1916,7 @@ static int hw_card_start(struct hw *hw)
 		goto error1;
 
 	/* Switch to X-Fi mode from UAA mode if neeeded */
-	if (hw->model == CTHENDRIX) {
+	if (hw->model == CTUAA) {
 		err = uaa_to_xfi(pci);
 		if (err)
 			goto error2;
@@ -2027,7 +2027,7 @@ static int hw_card_init(struct hw *hw, struct card_conf *info)
 	case CTSB073X:
 		hw_write_20kx(hw, GPIOCTL, 0x00e6);
 		break;
-	case CTHENDRIX: /* Vista compatible cards */
+	case CTUAA:
 		hw_write_20kx(hw, GPIOCTL, 0x00c2);
 		break;
 	default:
-- 
cgit v0.10.2


From 9aee2286071c23c535fe9928eec1a26e0bcf256d Mon Sep 17 00:00:00 2001
From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Date: Mon, 8 Jun 2009 12:41:35 -0400
Subject: ext4: fix dx_map_entry to support 256k directory blocks

The dx_map_entry structure doesn't support over 64KB block size by
current usage of its member("offs"). Because "offs" treats an offset
of copies of the ext4_dir_entry_2 structure as is. This member size is
16 bits. But real offset for over 64KB(256KB) block size needs 18
bits. However, real offset keeps 4 byte boundary, so lower 2 bits is
not used.

Therefore, we do the following to fix this limitation:
For "store":
	we divide the real offset by 4 and then store this result to "offs"
	member.
For "use":
	we multiply "offs" member by 4 and then use this result
	as real offset.

Signed-off-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f2bc160..07eb664 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -749,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 			ext4fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
-			map_tail->offs = (u16) ((char *) de - base);
+			map_tail->offs = ((char *) de - base)>>2;
 			map_tail->size = le16_to_cpu(de->rec_len);
 			count++;
 			cond_resched();
@@ -1147,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
 	unsigned rec_len = 0;
 
 	while (count--) {
-		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+						(from + (map->offs<<2));
 		rec_len = EXT4_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
 		((struct ext4_dir_entry_2 *) to)->rec_len =
-- 
cgit v0.10.2


From c4ed3f04ba9defe22aa729d1646f970f791c03d7 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 8 Jun 2009 10:44:05 -0500
Subject: x86, UV: Fix macros for multiple coherency domains

Fix bug in the SGI UV macros that support systems with multiple
coherency domains.  The macros used for referencing global MMR
(chipset registers) are failing to correctly "or" the NASID
(node identifier) bits that reside above M+N. These high bits
are supplied automatically by the chipset for memory accesses
coming from the processor socket.

However, the bits must be present for references to the special
global MMR space used to map chipset registers. (See uv_hub.h
for more details ...)

The bug results in references to invalid/incorrect nodes.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: <stable@kernel.org>
LKML-Reference: <20090608154405.GA16395@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d3a98ea..341070f 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -133,6 +133,7 @@ struct uv_scir_s {
 struct uv_hub_info_s {
 	unsigned long		global_mmr_base;
 	unsigned long		gpa_mask;
+	unsigned int		gnode_extra;
 	unsigned long		gnode_upper;
 	unsigned long		lowmem_remap_top;
 	unsigned long		lowmem_remap_base;
@@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
  * 		p -  PNODE (local part of nsids, right shifted 1)
  */
 #define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
-#define UV_PNODE_TO_NASID(p)		(((p) << 1) | uv_hub_info->gnode_upper)
+#define UV_PNODE_TO_GNODE(p)		((p) |uv_hub_info->gnode_extra)
+#define UV_PNODE_TO_NASID(p)		(UV_PNODE_TO_GNODE(p) << 1)
 
 #define UV_LOCAL_MMR_BASE		0xf4000000UL
 #define UV_GLOBAL_MMR32_BASE		0xf8000000UL
@@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define UV_GLOBAL_MMR32_PNODE_BITS(p)	((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
 
 #define UV_GLOBAL_MMR64_PNODE_BITS(p)					\
-	((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+	((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
 
 #define UV_APIC_PNODE_SHIFT	6
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2bda693..39f2af4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -562,7 +562,7 @@ void __init uv_system_init(void)
 	union uvh_node_id_u node_id;
 	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
 	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
-	int max_pnode = 0;
+	int gnode_extra, max_pnode = 0;
 	unsigned long mmr_base, present, paddr;
 	unsigned short pnode_mask;
 
@@ -574,6 +574,13 @@ void __init uv_system_init(void)
 	mmr_base =
 	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
 	    ~UV_MMR_ENABLE;
+	pnode_mask = (1 << n_val) - 1;
+	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+	gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
+	gnode_upper = ((unsigned long)gnode_extra  << m_val);
+	printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
+			n_val, m_val, gnode_upper, gnode_extra);
+
 	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
 	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
@@ -607,11 +614,6 @@ void __init uv_system_init(void)
 		}
 	}
 
-	pnode_mask = (1 << n_val) - 1;
-	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
-	gnode_upper = (((unsigned long)node_id.s.node_id) &
-		       ~((1 << n_val) - 1)) << m_val;
-
 	uv_bios_init();
 	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
 			    &sn_coherency_id, &sn_region_size);
@@ -634,6 +636,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
 		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+		uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
 		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
-- 
cgit v0.10.2


From e50b6befae9ea91c2d190fb78ff1e06a0b950add Mon Sep 17 00:00:00 2001
From: Rabeeh Khoury <rabeeh@marvell.com>
Date: Tue, 24 Mar 2009 16:10:15 +0200
Subject: [ARM] Kirkwood: CPU idle driver

The patch adds support for Kirkwood cpu idle.
Two idle states are defined:
1. Wait-for-interrupt (replacing default kirkwood wfi)
2. Wait-for-interrupt and DDR self refresh

Signed-off-by: Rabeeh Khoury <rabeeh@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/configs/kirkwood_defconfig b/arch/arm/configs/kirkwood_defconfig
index dcf8153..bde56f4 100644
--- a/arch/arm/configs/kirkwood_defconfig
+++ b/arch/arm/configs/kirkwood_defconfig
@@ -270,7 +270,9 @@ CONFIG_CMDLINE=""
 #
 # CPU Power Management
 #
-# CONFIG_CPU_IDLE is not set
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
 
 #
 # Floating point emulation
diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile
index 8f03c9b..f21f35d 100644
--- a/arch/arm/mach-kirkwood/Makefile
+++ b/arch/arm/mach-kirkwood/Makefile
@@ -5,3 +5,5 @@ obj-$(CONFIG_MACH_RD88F6192_NAS)	+= rd88f6192-nas-setup.o
 obj-$(CONFIG_MACH_RD88F6281)		+= rd88f6281-setup.o
 obj-$(CONFIG_MACH_SHEEVAPLUG)		+= sheevaplug-setup.o
 obj-$(CONFIG_MACH_TS219)		+= ts219-setup.o
+
+obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
diff --git a/arch/arm/mach-kirkwood/cpuidle.c b/arch/arm/mach-kirkwood/cpuidle.c
new file mode 100644
index 0000000..f68d33f
--- /dev/null
+++ b/arch/arm/mach-kirkwood/cpuidle.c
@@ -0,0 +1,96 @@
+/*
+ * arch/arm/mach-kirkwood/cpuidle.c
+ *
+ * CPU idle Marvell Kirkwood SoCs
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * The cpu idle uses wait-for-interrupt and DDR self refresh in order
+ * to implement two idle states -
+ * #1 wait-for-interrupt
+ * #2 wait-for-interrupt and DDR self refresh
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/cpuidle.h>
+#include <linux/io.h>
+#include <asm/proc-fns.h>
+#include <mach/kirkwood.h>
+
+#define KIRKWOOD_MAX_STATES	2
+
+static struct cpuidle_driver kirkwood_idle_driver = {
+	.name =         "kirkwood_idle",
+	.owner =        THIS_MODULE,
+};
+
+static DEFINE_PER_CPU(struct cpuidle_device, kirkwood_cpuidle_device);
+
+/* Actual code that puts the SoC in different idle states */
+static int kirkwood_enter_idle(struct cpuidle_device *dev,
+			       struct cpuidle_state *state)
+{
+	struct timeval before, after;
+	int idle_time;
+
+	local_irq_disable();
+	do_gettimeofday(&before);
+	if (state == &dev->states[0])
+		/* Wait for interrupt state */
+		cpu_do_idle();
+	else if (state == &dev->states[1]) {
+		/*
+		 * Following write will put DDR in self refresh.
+		 * Note that we have 256 cycles before DDR puts it
+		 * self in self-refresh, so the wait-for-interrupt
+		 * call afterwards won't get the DDR from self refresh
+		 * mode.
+		 */
+		writel(0x7, DDR_OPERATION_BASE);
+		cpu_do_idle();
+	}
+	do_gettimeofday(&after);
+	local_irq_enable();
+	idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC +
+			(after.tv_usec - before.tv_usec);
+	return idle_time;
+}
+
+/* Initialize CPU idle by registering the idle states */
+static int kirkwood_init_cpuidle(void)
+{
+	struct cpuidle_device *device;
+
+	cpuidle_register_driver(&kirkwood_idle_driver);
+
+	device = &per_cpu(kirkwood_cpuidle_device, smp_processor_id());
+	device->state_count = KIRKWOOD_MAX_STATES;
+
+	/* Wait for interrupt state */
+	device->states[0].enter = kirkwood_enter_idle;
+	device->states[0].exit_latency = 1;
+	device->states[0].target_residency = 10000;
+	device->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(device->states[0].name, "WFI");
+	strcpy(device->states[0].desc, "Wait for interrupt");
+
+	/* Wait for interrupt and DDR self refresh state */
+	device->states[1].enter = kirkwood_enter_idle;
+	device->states[1].exit_latency = 10;
+	device->states[1].target_residency = 10000;
+	device->states[1].flags = CPUIDLE_FLAG_TIME_VALID;
+	strcpy(device->states[1].name, "DDR SR");
+	strcpy(device->states[1].desc, "WFI and DDR Self Refresh");
+
+	if (cpuidle_register_device(device)) {
+		printk(KERN_ERR "kirkwood_init_cpuidle: Failed registering\n");
+		return -EIO;
+	}
+	return 0;
+}
+
+device_initcall(kirkwood_init_cpuidle);
diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
index b3e1395..f20ff64 100644
--- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h
+++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
@@ -48,6 +48,7 @@
  */
 #define DDR_VIRT_BASE		(KIRKWOOD_REGS_VIRT_BASE | 0x00000)
 #define  DDR_WINDOW_CPU_BASE	(DDR_VIRT_BASE | 0x1500)
+#define DDR_OPERATION_BASE	(DDR_VIRT_BASE | 0x1418)
 
 #define DEV_BUS_PHYS_BASE	(KIRKWOOD_REGS_PHYS_BASE | 0x10000)
 #define DEV_BUS_VIRT_BASE	(KIRKWOOD_REGS_VIRT_BASE | 0x10000)
-- 
cgit v0.10.2


From 8a3269fc21cc4405d80b362139c078cf655a505a Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 12 May 2009 10:30:41 -0700
Subject: [ARM] orion: sched_clock implementation for orion platforms

sched_clock implementation for orion platform. Its realized using
free-running clocksource timer, which provides a resolution of 7.5ns
(depending on tclk). It's derived from PXA's sched_clock implementation.

[ nico: renamed orion2ns to tclk2ns, fixed max value in the comment ]

Signed-off-by: Stefan Agner <stefan.agner@yahoo.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index de8a001..b856cec 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -17,6 +17,9 @@
 #include <linux/irq.h>
 #include <asm/mach/time.h>
 #include <mach/bridge-regs.h>
+#include <mach/hardware.h>
+#include <linux/sched.h>
+#include <linux/cnt32_to_63.h>
 
 /*
  * Number of timer ticks per jiffy.
@@ -39,6 +42,36 @@ static u32 ticks_per_jiffy;
 
 
 /*
+ * Orion's sched_clock implementation. It has a resolution of
+ * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
+ */
+#define TCLK2NS_SCALE_FACTOR 8
+
+static unsigned long tclk2ns_scale;
+
+static void __init set_tclk2ns_scale(unsigned long tclk)
+{
+	unsigned long long v = NSEC_PER_SEC;
+	v <<= TCLK2NS_SCALE_FACTOR;
+	v += tclk/2;
+	do_div(v, tclk);
+	/*
+	 * We want an even value to automatically clear the top bit
+	 * returned by cnt32_to_63() without an additional run time
+	 * instruction. So if the LSB is 1 then round it up.
+	 */
+	if (v & 1)
+		v++;
+	tclk2ns_scale = v;
+}
+
+unsigned long long sched_clock(void)
+{
+	unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
+	return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
+}
+
+/*
  * Clocksource handling.
  */
 static cycle_t orion_clksrc_read(struct clocksource *cs)
@@ -176,6 +209,10 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 
 	ticks_per_jiffy = (tclk + HZ/2) / HZ;
 
+	/*
+	 * Set scale for sched_clock
+	 */
+	set_tclk2ns_scale(tclk);
 
 	/*
 	 * Setup free-running clocksource timer (interrupts
-- 
cgit v0.10.2


From a399e3fa795afa058e4485b25c498e0c5a860428 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 15 May 2009 00:42:36 -0400
Subject: [ARM] orion: make sure sched_clock() usage of cnt32_to_63() is safe

With a TCLK = 200MHz, the half period of the hardware timer is roughly
10 seconds. Because cnt32_to_63() must be called at least once per
half period of the base hardware counter, it is a bit risky to rely
solely on scheduling to generate frequent enough calls. Let's use a
kernel timer to ensure this.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index b856cec..715a301 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -12,14 +12,15 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cnt32_to_63.h>
+#include <linux/timer.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <asm/mach/time.h>
 #include <mach/bridge-regs.h>
 #include <mach/hardware.h>
-#include <linux/sched.h>
-#include <linux/cnt32_to_63.h>
 
 /*
  * Number of timer ticks per jiffy.
@@ -44,14 +45,36 @@ static u32 ticks_per_jiffy;
 /*
  * Orion's sched_clock implementation. It has a resolution of
  * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
+ *
+ * Because the hardware timer period is quite short (21 secs if
+ * 200MHz TCLK) and because cnt32_to_63() needs to be called at
+ * least once per half period to work properly, a kernel timer is
+ * set up to ensure this requirement is always met.
  */
 #define TCLK2NS_SCALE_FACTOR 8
 
 static unsigned long tclk2ns_scale;
 
-static void __init set_tclk2ns_scale(unsigned long tclk)
+unsigned long long sched_clock(void)
+{
+	unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
+	return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
+}
+
+static struct timer_list cnt32_to_63_keepwarm_timer;
+
+static void cnt32_to_63_keepwarm(unsigned long data)
+{
+	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
+	(void) sched_clock();
+}
+
+static void __init setup_sched_clock(unsigned long tclk)
 {
-	unsigned long long v = NSEC_PER_SEC;
+	unsigned long long v;
+	unsigned long data;
+
+	v = NSEC_PER_SEC;
 	v <<= TCLK2NS_SCALE_FACTOR;
 	v += tclk/2;
 	do_div(v, tclk);
@@ -63,12 +86,10 @@ static void __init set_tclk2ns_scale(unsigned long tclk)
 	if (v & 1)
 		v++;
 	tclk2ns_scale = v;
-}
 
-unsigned long long sched_clock(void)
-{
-	unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
-	return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
+	data = (0xffffffffUL / tclk / 2 - 2) * HZ;
+	setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
+	mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
 }
 
 /*
@@ -210,9 +231,9 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	ticks_per_jiffy = (tclk + HZ/2) / HZ;
 
 	/*
-	 * Set scale for sched_clock
+	 * Set scale and timer for sched_clock
 	 */
-	set_tclk2ns_scale(tclk);
+	setup_sched_clock(tclk);
 
 	/*
 	 * Setup free-running clocksource timer (interrupts
@@ -227,7 +248,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift);
 	clocksource_register(&orion_clksrc);
 
-
 	/*
 	 * Setup clockevent timer (interrupt-driven.)
 	 */
-- 
cgit v0.10.2


From 91af7bb2f48e14892c5c961bca1fae5c7886532e Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Sun, 19 Oct 2008 02:38:25 +0200
Subject: [ARM] Kirkwood: add Marvell 88F6281 GTW GE board support

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>

diff --git a/arch/arm/configs/kirkwood_defconfig b/arch/arm/configs/kirkwood_defconfig
index bde56f4..0a1abb9 100644
--- a/arch/arm/configs/kirkwood_defconfig
+++ b/arch/arm/configs/kirkwood_defconfig
@@ -182,6 +182,7 @@ CONFIG_ARCH_KIRKWOOD=y
 CONFIG_MACH_DB88F6281_BP=y
 CONFIG_MACH_RD88F6192_NAS=y
 CONFIG_MACH_RD88F6281=y
+CONFIG_MACH_MV88F6281GTW_GE=y
 CONFIG_MACH_SHEEVAPLUG=y
 CONFIG_MACH_TS219=y
 CONFIG_PLAT_ORION=y
diff --git a/arch/arm/mach-kirkwood/Kconfig b/arch/arm/mach-kirkwood/Kconfig
index b5421cc..25100f7 100644
--- a/arch/arm/mach-kirkwood/Kconfig
+++ b/arch/arm/mach-kirkwood/Kconfig
@@ -20,6 +20,12 @@ config MACH_RD88F6281
 	  Say 'Y' here if you want your kernel to support the
 	  Marvell RD-88F6281 Reference Board.
 
+config MACH_MV88F6281GTW_GE
+	bool "Marvell 88F6281 GTW GE Board"
+	help
+	  Say 'Y' here if you want your kernel to support the
+	  Marvell 88F6281 GTW GE Board.
+
 config MACH_SHEEVAPLUG
 	bool "Marvell SheevaPlug Reference Board"
 	help
diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile
index f21f35d..9dd680e 100644
--- a/arch/arm/mach-kirkwood/Makefile
+++ b/arch/arm/mach-kirkwood/Makefile
@@ -3,6 +3,7 @@ obj-y				+= common.o addr-map.o irq.o pcie.o mpp.o
 obj-$(CONFIG_MACH_DB88F6281_BP)		+= db88f6281-bp-setup.o
 obj-$(CONFIG_MACH_RD88F6192_NAS)	+= rd88f6192-nas-setup.o
 obj-$(CONFIG_MACH_RD88F6281)		+= rd88f6281-setup.o
+obj-$(CONFIG_MACH_MV88F6281GTW_GE)	+= mv88f6281gtw_ge-setup.o
 obj-$(CONFIG_MACH_SHEEVAPLUG)		+= sheevaplug-setup.o
 obj-$(CONFIG_MACH_TS219)		+= ts219-setup.o
 
diff --git a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
new file mode 100644
index 0000000..4fb03f4
--- /dev/null
+++ b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
@@ -0,0 +1,99 @@
+/*
+ * arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
+ *
+ * Marvell 88F6281 GTW GE Board Setup
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/mtd/physmap.h>
+#include <linux/timer.h>
+#include <linux/mv643xx_eth.h>
+#include <linux/ethtool.h>
+#include <linux/spi/flash.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/orion_spi.h>
+#include <net/dsa.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/pci.h>
+#include <mach/kirkwood.h>
+#include "common.h"
+
+static struct mv643xx_eth_platform_data mv88f6281gtw_ge_ge00_data = {
+	.phy_addr	= MV643XX_ETH_PHY_NONE,
+	.speed		= SPEED_1000,
+	.duplex		= DUPLEX_FULL,
+};
+
+static struct dsa_chip_data mv88f6281gtw_ge_switch_chip_data = {
+	.port_names[0]	= "lan1",
+	.port_names[1]	= "lan2",
+	.port_names[2]	= "lan3",
+	.port_names[3]	= "lan4",
+	.port_names[4]	= "wan",
+	.port_names[5]	= "cpu",
+};
+
+static struct dsa_platform_data mv88f6281gtw_ge_switch_plat_data = {
+	.nr_chips	= 1,
+	.chip		= &mv88f6281gtw_ge_switch_chip_data,
+};
+
+static const struct flash_platform_data mv88f6281gtw_ge_spi_slave_data = {
+	.type		= "mx25l12805d",
+};
+
+static struct spi_board_info __initdata mv88f6281gtw_ge_spi_slave_info[] = {
+	{
+		.modalias	= "m25p80",
+		.platform_data	= &mv88f6281gtw_ge_spi_slave_data,
+		.irq		= -1,
+		.max_speed_hz	= 50000000,
+		.bus_num	= 0,
+		.chip_select	= 0,
+	},
+};
+
+static void __init mv88f6281gtw_ge_init(void)
+{
+	/*
+	 * Basic setup. Needs to be called early.
+	 */
+	kirkwood_init();
+
+	kirkwood_ehci_init();
+	kirkwood_ge00_init(&mv88f6281gtw_ge_ge00_data);
+	kirkwood_ge00_switch_init(&mv88f6281gtw_ge_switch_plat_data, NO_IRQ);
+	spi_register_board_info(mv88f6281gtw_ge_spi_slave_info,
+				ARRAY_SIZE(mv88f6281gtw_ge_spi_slave_info));
+	kirkwood_spi_init();
+	kirkwood_uart0_init();
+}
+
+static int __init mv88f6281gtw_ge_pci_init(void)
+{
+	if (machine_is_mv88f6281gtw_ge())
+		kirkwood_pcie_init();
+
+	return 0;
+}
+subsys_initcall(mv88f6281gtw_ge_pci_init);
+
+MACHINE_START(MV88F6281GTW_GE, "Marvell 88F6281 GTW GE Board")
+	/* Maintainer: Lennert Buytenhek <buytenh@marvell.com> */
+	.phys_io	= KIRKWOOD_REGS_PHYS_BASE,
+	.io_pg_offst	= ((KIRKWOOD_REGS_VIRT_BASE) >> 18) & 0xfffc,
+	.boot_params	= 0x00000100,
+	.init_machine	= mv88f6281gtw_ge_init,
+	.map_io		= kirkwood_map_io,
+	.init_irq	= kirkwood_init_irq,
+	.timer		= &kirkwood_timer,
+MACHINE_END
-- 
cgit v0.10.2


From 96e7d211b46ce838ceca5d9734d6e166cfafdef4 Mon Sep 17 00:00:00 2001
From: Siddarth Gore <gores@marvell.com>
Date: Tue, 5 May 2009 14:52:09 +0530
Subject: [ARM] Kirkwood: enable gpio leds/buttons for the mv88f6281gtw_ge
 board

Signed-off-by: Siddarth Gore <gores@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>

diff --git a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
index 4fb03f4..0358f45 100644
--- a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
+++ b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
@@ -17,6 +17,10 @@
 #include <linux/timer.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ethtool.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/orion_spi.h>
@@ -26,6 +30,7 @@
 #include <asm/mach/pci.h>
 #include <mach/kirkwood.h>
 #include "common.h"
+#include "mpp.h"
 
 static struct mv643xx_eth_platform_data mv88f6281gtw_ge_ge00_data = {
 	.phy_addr	= MV643XX_ETH_PHY_NONE,
@@ -62,12 +67,79 @@ static struct spi_board_info __initdata mv88f6281gtw_ge_spi_slave_info[] = {
 	},
 };
 
+static struct gpio_keys_button mv88f6281gtw_ge_button_pins[] = {
+	{
+		.code		= KEY_RESTART,
+		.gpio		= 47,
+		.desc		= "SWR Button",
+		.active_low	= 1,
+	}, {
+		.code		= KEY_F1,
+		.gpio		= 46,
+		.desc		= "WPS Button(F1)",
+		.active_low	= 1,
+	},
+};
+
+static struct gpio_keys_platform_data mv88f6281gtw_ge_button_data = {
+	.buttons	= mv88f6281gtw_ge_button_pins,
+	.nbuttons	= ARRAY_SIZE(mv88f6281gtw_ge_button_pins),
+};
+
+static struct platform_device mv88f6281gtw_ge_buttons = {
+	.name		= "gpio-keys",
+	.id		= -1,
+	.num_resources	= 0,
+	.dev		= {
+		.platform_data	= &mv88f6281gtw_ge_button_data,
+	},
+};
+
+static struct gpio_led mv88f6281gtw_ge_led_pins[] = {
+	{
+		.name		= "gtw:green:Status",
+		.gpio		= 20,
+		.active_low	= 0,
+	}, {
+		.name		= "gtw:red:Status",
+		.gpio		= 21,
+		.active_low	= 0,
+	}, {
+		.name		= "gtw:green:USB",
+		.gpio		= 12,
+		.active_low	= 0,
+	},
+};
+
+static struct gpio_led_platform_data mv88f6281gtw_ge_led_data = {
+	.leds		= mv88f6281gtw_ge_led_pins,
+	.num_leds	= ARRAY_SIZE(mv88f6281gtw_ge_led_pins),
+};
+
+static struct platform_device mv88f6281gtw_ge_leds = {
+	.name	= "leds-gpio",
+	.id	= -1,
+	.dev	= {
+		.platform_data	= &mv88f6281gtw_ge_led_data,
+	},
+};
+
+static unsigned int mv88f6281gtw_ge_mpp_config[] __initdata = {
+	MPP12_GPO,	/* Status#_USB pin  */
+	MPP20_GPIO,	/* Status#_GLED pin */
+	MPP21_GPIO,	/* Status#_RLED pin */
+	MPP46_GPIO,	/* WPS_Switch pin   */
+	MPP47_GPIO,	/* SW_Init pin      */
+	0
+};
+
 static void __init mv88f6281gtw_ge_init(void)
 {
 	/*
 	 * Basic setup. Needs to be called early.
 	 */
 	kirkwood_init();
+	kirkwood_mpp_conf(mv88f6281gtw_ge_mpp_config);
 
 	kirkwood_ehci_init();
 	kirkwood_ge00_init(&mv88f6281gtw_ge_ge00_data);
@@ -76,6 +148,8 @@ static void __init mv88f6281gtw_ge_init(void)
 				ARRAY_SIZE(mv88f6281gtw_ge_spi_slave_info));
 	kirkwood_spi_init();
 	kirkwood_uart0_init();
+	platform_device_register(&mv88f6281gtw_ge_leds);
+	platform_device_register(&mv88f6281gtw_ge_buttons);
 }
 
 static int __init mv88f6281gtw_ge_pci_init(void)
-- 
cgit v0.10.2


From 797b2c80e8e111bc9673fc5e6c67ac03e8b7c7ef Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 26 May 2009 22:06:25 -0400
Subject: [ARM] Kirkwood: only map peripheral register space once

Just like commit 1419468ab548, let's save some TLB entries by making
ioremap() return pointers into the boot-time Kirkwood peripheral
iotable mapping whenever someone tries to ioremap any part of the Kirkwood
peripheral register space.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/include/mach/io.h b/arch/arm/mach-kirkwood/include/mach/io.h
index be07be0..a643a84 100644
--- a/arch/arm/mach-kirkwood/include/mach/io.h
+++ b/arch/arm/mach-kirkwood/include/mach/io.h
@@ -19,6 +19,31 @@ static inline void __iomem *__io(unsigned long addr)
 					+ KIRKWOOD_PCIE_IO_VIRT_BASE);
 }
 
+static inline void __iomem *
+__arch_ioremap(unsigned long paddr, size_t size, unsigned int mtype)
+{
+	void __iomem *retval;
+	unsigned long offs = paddr - KIRKWOOD_REGS_PHYS_BASE;
+	if (mtype == MT_DEVICE && size && offs < KIRKWOOD_REGS_SIZE &&
+	    size <= KIRKWOOD_REGS_SIZE && offs + size <= KIRKWOOD_REGS_SIZE) {
+		retval = (void __iomem *)KIRKWOOD_REGS_VIRT_BASE + offs;
+	} else {
+		retval = __arm_ioremap(paddr, size, mtype);
+	}
+
+	return retval;
+}
+
+static inline void
+__arch_iounmap(void __iomem *addr)
+{
+	if (addr < (void __iomem *)KIRKWOOD_REGS_VIRT_BASE ||
+	    addr >= (void __iomem *)(KIRKWOOD_REGS_VIRT_BASE + KIRKWOOD_REGS_SIZE))
+		__iounmap(addr);
+}
+
+#define __arch_ioremap(p, s, m)	__arch_ioremap(p, s, m)
+#define __arch_iounmap(a)	__arch_iounmap(a)
 #define __io(a)			__io(a)
 #define __mem_pci(a)		(a)
 
-- 
cgit v0.10.2


From f14081e109e7074be3052f5410641161b6479abb Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 29 May 2009 22:29:01 -0400
Subject: [ARM] Kirkwood: comment type fix

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/sheevaplug-setup.c b/arch/arm/mach-kirkwood/sheevaplug-setup.c
index 831e4a5..dcc448c 100644
--- a/arch/arm/mach-kirkwood/sheevaplug-setup.c
+++ b/arch/arm/mach-kirkwood/sheevaplug-setup.c
@@ -71,7 +71,7 @@ static struct mv643xx_eth_platform_data sheevaplug_ge00_data = {
 };
 
 static struct mvsdio_platform_data sheevaplug_mvsdio_data = {
-	// unfortunately the CD signal has not been connected */
+	/* unfortunately the CD signal has not been connected */
 };
 
 static struct gpio_led sheevaplug_led_pins[] = {
-- 
cgit v0.10.2


From a88656553d18c324554855fccc730c9644048111 Mon Sep 17 00:00:00 2001
From: Erik Benada <erikbenada@yahoo.ca>
Date: Thu, 28 May 2009 17:08:55 -0700
Subject: [ARM] orion: convert gpio to use gpiolib

Signed-off-by: Erik Benada <erikbenada@yahoo.ca>

[ nico: fix locking, additional cleanups ]

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d02cdb..c6225ea 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -398,6 +398,7 @@ config ARCH_KIRKWOOD
 	select CPU_FEROCEON
 	select PCI
 	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select PLAT_ORION
@@ -441,6 +442,7 @@ config ARCH_MV78XX0
 	select CPU_FEROCEON
 	select PCI
 	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select PLAT_ORION
@@ -465,6 +467,7 @@ config ARCH_ORION5X
 	select CPU_FEROCEON
 	select PCI
 	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select PLAT_ORION
diff --git a/arch/arm/mach-kirkwood/mpp.c b/arch/arm/mach-kirkwood/mpp.c
index 63c4493..a5900f6 100644
--- a/arch/arm/mach-kirkwood/mpp.c
+++ b/arch/arm/mach-kirkwood/mpp.c
@@ -48,6 +48,9 @@ void __init kirkwood_mpp_conf(unsigned int *mpp_list)
 	if (!variant_mask)
 		return;
 
+	/* Initialize gpiolib. */
+	orion_gpio_init();
+
 	printk(KERN_DEBUG "initial MPP regs:");
 	for (i = 0; i < MPP_NR_REGS; i++) {
 		mpp_ctrl[i] = readl(MPP_CTRL(i));
diff --git a/arch/arm/mach-mv78xx0/irq.c b/arch/arm/mach-mv78xx0/irq.c
index f289b0e..22b4ff8 100644
--- a/arch/arm/mach-mv78xx0/irq.c
+++ b/arch/arm/mach-mv78xx0/irq.c
@@ -28,6 +28,9 @@ void __init mv78xx0_init_irq(void)
 {
 	int i;
 
+	/* Initialize gpiolib. */
+	orion_gpio_init();
+
 	orion_irq_init(0, (void __iomem *)(IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF));
 	orion_irq_init(32, (void __iomem *)(IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF));
 	orion_irq_init(64, (void __iomem *)(IRQ_VIRT_BASE + IRQ_MASK_ERR_OFF));
diff --git a/arch/arm/mach-orion5x/mpp.c b/arch/arm/mach-orion5x/mpp.c
index e23a3f9..bc4c3b9 100644
--- a/arch/arm/mach-orion5x/mpp.c
+++ b/arch/arm/mach-orion5x/mpp.c
@@ -124,6 +124,9 @@ void __init orion5x_mpp_conf(struct orion5x_mpp_mode *mode)
 	u32 mpp_8_15_ctrl = readl(MPP_8_15_CTRL);
 	u32 mpp_16_19_ctrl = readl(MPP_16_19_CTRL);
 
+	/* Initialize gpiolib. */
+	orion_gpio_init();
+
 	while (mode->mpp >= 0) {
 		u32 *reg;
 		int num_type;
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 32eb9e3..e814803 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -15,10 +15,9 @@
 #include <linux/spinlock.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
-#include <asm/gpio.h>
+#include <linux/gpio.h>
 
 static DEFINE_SPINLOCK(gpio_lock);
-static const char *gpio_label[GPIO_MAX];  /* non null for allocated GPIOs */
 static unsigned long gpio_valid_input[BITS_TO_LONGS(GPIO_MAX)];
 static unsigned long gpio_valid_output[BITS_TO_LONGS(GPIO_MAX)];
 
@@ -46,82 +45,54 @@ static void __set_level(unsigned pin, int high)
 	writel(u, GPIO_OUT(pin));
 }
 
-
-/*
- * GENERIC_GPIO primitives.
- */
-int gpio_direction_input(unsigned pin)
+static inline void __set_blinking(unsigned pin, int blink)
 {
-	unsigned long flags;
-
-	if (pin >= GPIO_MAX || !test_bit(pin, gpio_valid_input)) {
-		pr_debug("%s: invalid GPIO %d\n", __func__, pin);
-		return -EINVAL;
-	}
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	/*
-	 * Some callers might not have used gpio_request(),
-	 * so flag this pin as requested now.
-	 */
-	if (gpio_label[pin] == NULL)
-		gpio_label[pin] = "?";
+	u32 u;
 
-	/*
-	 * Configure GPIO direction.
-	 */
-	__set_direction(pin, 1);
+	u = readl(GPIO_BLINK_EN(pin));
+	if (blink)
+		u |= 1 << (pin & 31);
+	else
+		u &= ~(1 << (pin & 31));
+	writel(u, GPIO_BLINK_EN(pin));
+}
 
-	spin_unlock_irqrestore(&gpio_lock, flags);
+static inline int orion_gpio_is_valid(unsigned pin, int mode)
+{
+	if (pin < GPIO_MAX) {
+		if ((mode & GPIO_INPUT_OK) && !test_bit(pin, gpio_valid_input))
+			goto err_out;
+		if ((mode & GPIO_OUTPUT_OK) && !test_bit(pin, gpio_valid_output))
+			goto err_out;
+		return true;
+	}
 
-	return 0;
+err_out:
+	pr_debug("%s: invalid GPIO %d\n", __func__, pin);
+	return false;
 }
-EXPORT_SYMBOL(gpio_direction_input);
 
-int gpio_direction_output(unsigned pin, int value)
+/*
+ * GENERIC_GPIO primitives.
+ */
+static int orion_gpio_direction_input(struct gpio_chip *chip, unsigned pin)
 {
 	unsigned long flags;
-	u32 u;
 
-	if (pin >= GPIO_MAX || !test_bit(pin, gpio_valid_output)) {
-		pr_debug("%s: invalid GPIO %d\n", __func__, pin);
+	if (!orion_gpio_is_valid(pin, GPIO_INPUT_OK))
 		return -EINVAL;
-	}
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
-	/*
-	 * Some callers might not have used gpio_request(),
-	 * so flag this pin as requested now.
-	 */
-	if (gpio_label[pin] == NULL)
-		gpio_label[pin] = "?";
-
-	/*
-	 * Disable blinking.
-	 */
-	u = readl(GPIO_BLINK_EN(pin));
-	u &= ~(1 << (pin & 31));
-	writel(u, GPIO_BLINK_EN(pin));
-
-	/*
-	 * Configure GPIO output value.
-	 */
-	__set_level(pin, value);
-
-	/*
-	 * Configure GPIO direction.
-	 */
-	__set_direction(pin, 0);
+	/* Configure GPIO direction. */
+	__set_direction(pin, 1);
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	return 0;
 }
-EXPORT_SYMBOL(gpio_direction_output);
 
-int gpio_get_value(unsigned pin)
+static int orion_gpio_get_value(struct gpio_chip *chip, unsigned pin)
 {
 	int val;
 
@@ -132,83 +103,75 @@ int gpio_get_value(unsigned pin)
 
 	return (val >> (pin & 31)) & 1;
 }
-EXPORT_SYMBOL(gpio_get_value);
 
-void gpio_set_value(unsigned pin, int value)
+static int orion_gpio_direction_output(struct gpio_chip *chip, unsigned pin,
+	int value)
 {
 	unsigned long flags;
-	u32 u;
+
+	if (!orion_gpio_is_valid(pin, GPIO_OUTPUT_OK))
+		return -EINVAL;
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
-	/*
-	 * Disable blinking.
-	 */
-	u = readl(GPIO_BLINK_EN(pin));
-	u &= ~(1 << (pin & 31));
-	writel(u, GPIO_BLINK_EN(pin));
+	/* Disable blinking. */
+	__set_blinking(pin, 0);
 
-	/*
-	 * Configure GPIO output value.
-	 */
+	/* Configure GPIO output value. */
 	__set_level(pin, value);
 
+	/* Configure GPIO direction. */
+	__set_direction(pin, 0);
+
 	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return 0;
 }
-EXPORT_SYMBOL(gpio_set_value);
 
-int gpio_request(unsigned pin, const char *label)
+static void orion_gpio_set_value(struct gpio_chip *chip, unsigned pin,
+	int value)
 {
 	unsigned long flags;
-	int ret;
-
-	if (pin >= GPIO_MAX ||
-	    !(test_bit(pin, gpio_valid_input) ||
-	      test_bit(pin, gpio_valid_output))) {
-		pr_debug("%s: invalid GPIO %d\n", __func__, pin);
-		return -EINVAL;
-	}
 
 	spin_lock_irqsave(&gpio_lock, flags);
-	if (gpio_label[pin] == NULL) {
-		gpio_label[pin] = label ? label : "?";
-		ret = 0;
-	} else {
-		pr_debug("%s: GPIO %d already used as %s\n",
-			 __func__, pin, gpio_label[pin]);
-		ret = -EBUSY;
-	}
-	spin_unlock_irqrestore(&gpio_lock, flags);
 
-	return ret;
+	/* Configure GPIO output value. */
+	__set_level(pin, value);
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
 }
-EXPORT_SYMBOL(gpio_request);
 
-void gpio_free(unsigned pin)
+static int orion_gpio_request(struct gpio_chip *chip, unsigned pin)
 {
-	if (pin >= GPIO_MAX ||
-	    !(test_bit(pin, gpio_valid_input) ||
-	      test_bit(pin, gpio_valid_output))) {
-		pr_debug("%s: invalid GPIO %d\n", __func__, pin);
-		return;
-	}
-
-	if (gpio_label[pin] == NULL)
-		pr_warning("%s: GPIO %d already freed\n", __func__, pin);
-	else
-		gpio_label[pin] = NULL;
+	if (orion_gpio_is_valid(pin, GPIO_INPUT_OK) ||
+	    orion_gpio_is_valid(pin, GPIO_OUTPUT_OK))
+		return 0;
+	return -EINVAL;
 }
-EXPORT_SYMBOL(gpio_free);
 
+static struct gpio_chip orion_gpiochip = {
+	.label			= "orion_gpio",
+	.direction_input	= orion_gpio_direction_input,
+	.get			= orion_gpio_get_value,
+	.direction_output	= orion_gpio_direction_output,
+	.set			= orion_gpio_set_value,
+	.request		= orion_gpio_request,
+	.base			= 0,
+	.ngpio			= GPIO_MAX,
+	.can_sleep		= 0,
+};
+
+void __init orion_gpio_init(void)
+{
+	gpiochip_add(&orion_gpiochip);
+}
 
 /*
  * Orion-specific GPIO API extensions.
  */
 void __init orion_gpio_set_unused(unsigned pin)
 {
-	/*
-	 * Configure as output, drive low.
-	 */
+	/* Configure as output, drive low. */
 	__set_level(pin, 0);
 	__set_direction(pin, 0);
 }
@@ -230,21 +193,14 @@ void __init orion_gpio_set_valid(unsigned pin, int mode)
 void orion_gpio_set_blink(unsigned pin, int blink)
 {
 	unsigned long flags;
-	u32 u;
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
-	/*
-	 * Set output value to zero.
-	 */
+	/* Set output value to zero. */
 	__set_level(pin, 0);
 
-	u = readl(GPIO_BLINK_EN(pin));
-	if (blink)
-		u |= 1 << (pin & 31);
-	else
-		u &= ~(1 << (pin & 31));
-	writel(u, GPIO_BLINK_EN(pin));
+	/* Set blinking. */
+	__set_blinking(pin, blink);
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
 }
@@ -368,7 +324,7 @@ static int gpio_irq_set_type(u32 irq, u32 type)
 }
 
 struct irq_chip orion_gpio_irq_chip = {
-	.name		= "orion_gpio",
+	.name		= "orion_gpio_irq",
 	.ack		= gpio_irq_ack,
 	.mask		= gpio_irq_mask,
 	.unmask		= gpio_irq_unmask,
diff --git a/arch/arm/plat-orion/include/plat/gpio.h b/arch/arm/plat-orion/include/plat/gpio.h
index 33f6c6a..9646a94 100644
--- a/arch/arm/plat-orion/include/plat/gpio.h
+++ b/arch/arm/plat-orion/include/plat/gpio.h
@@ -14,12 +14,9 @@
 /*
  * GENERIC_GPIO primitives.
  */
-int gpio_request(unsigned pin, const char *label);
-void gpio_free(unsigned pin);
-int gpio_direction_input(unsigned pin);
-int gpio_direction_output(unsigned pin, int value);
-int gpio_get_value(unsigned pin);
-void gpio_set_value(unsigned pin, int value);
+#define gpio_get_value  __gpio_get_value
+#define gpio_set_value  __gpio_set_value
+#define gpio_cansleep   __gpio_cansleep
 
 /*
  * Orion-specific GPIO API extensions.
@@ -27,11 +24,13 @@ void gpio_set_value(unsigned pin, int value);
 void orion_gpio_set_unused(unsigned pin);
 void orion_gpio_set_blink(unsigned pin, int blink);
 
-#define GPIO_BIDI_OK		(1 << 0)
-#define GPIO_INPUT_OK		(1 << 1)
-#define GPIO_OUTPUT_OK		(1 << 2)
+#define GPIO_INPUT_OK		(1 << 0)
+#define GPIO_OUTPUT_OK		(1 << 1)
 void orion_gpio_set_valid(unsigned pin, int mode);
 
+/* Initialize gpiolib. */
+void __init orion_gpio_init(void);
+
 /*
  * GPIO interrupt handling.
  */
-- 
cgit v0.10.2


From fb7b2d3f0dda3fbd711c191fd3b9496653e81ac7 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 1 Jun 2009 15:36:36 -0400
Subject: [ARM] Kirkwood: rationalize NAND setup a bit

Common resource and platform device structures are moved to common.c
and only the partition table and chip delay remains a per board
parameter.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index be1ca28..6e3eb1a 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -16,6 +16,7 @@
 #include <linux/mv643xx_eth.h>
 #include <linux/mv643xx_i2c.h>
 #include <linux/ata_platform.h>
+#include <linux/mtd/nand.h>
 #include <linux/spi/orion_spi.h>
 #include <net/dsa.h>
 #include <asm/page.h>
@@ -258,6 +259,42 @@ void __init kirkwood_ge00_switch_init(struct dsa_platform_data *d, int irq)
 
 
 /*****************************************************************************
+ * NAND flash
+ ****************************************************************************/
+static struct resource kirkwood_nand_resource = {
+	.flags		= IORESOURCE_MEM,
+	.start		= KIRKWOOD_NAND_MEM_PHYS_BASE,
+	.end		= KIRKWOOD_NAND_MEM_PHYS_BASE +
+				KIRKWOOD_NAND_MEM_SIZE - 1,
+};
+
+static struct orion_nand_data kirkwood_nand_data = {
+	.cle		= 0,
+	.ale		= 1,
+	.width		= 8,
+};
+
+static struct platform_device kirkwood_nand_flash = {
+	.name		= "orion_nand",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &kirkwood_nand_data,
+	},
+	.resource	= &kirkwood_nand_resource,
+	.num_resources	= 1,
+};
+
+void __init kirkwood_nand_init(struct mtd_partition *parts, int nr_parts,
+			       int chip_delay)
+{
+	kirkwood_nand_data.parts = parts;
+	kirkwood_nand_data.nr_parts = nr_parts;
+	kirkwood_nand_data.chip_delay = chip_delay;
+	platform_device_register(&kirkwood_nand_flash);
+}
+
+
+/*****************************************************************************
  * SoC RTC
  ****************************************************************************/
 static struct resource kirkwood_rtc_resource = {
diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h
index 6ee8840..9de5256 100644
--- a/arch/arm/mach-kirkwood/common.h
+++ b/arch/arm/mach-kirkwood/common.h
@@ -15,6 +15,7 @@ struct dsa_platform_data;
 struct mv643xx_eth_platform_data;
 struct mv_sata_platform_data;
 struct mvsdio_platform_data;
+struct mtd_partition;
 
 /*
  * Basic Kirkwood init functions used early by machine-setup.
@@ -40,9 +41,11 @@ void kirkwood_spi_init(void);
 void kirkwood_i2c_init(void);
 void kirkwood_uart0_init(void);
 void kirkwood_uart1_init(void);
+void kirkwood_nand_init(struct mtd_partition *parts, int nr_parts, int delay);
 
 extern int kirkwood_tclk;
 extern struct sys_timer kirkwood_timer;
 
+#define ARRAY_AND_SIZE(x)	(x), ARRAY_SIZE(x)
 
 #endif
diff --git a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
index 5505d58..39bdf4b 100644
--- a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
+++ b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
@@ -11,14 +11,12 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/ata_platform.h>
 #include <linux/mv643xx_eth.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <mach/kirkwood.h>
-#include <plat/orion_nand.h>
 #include <plat/mvsdio.h>
 #include "common.h"
 #include "mpp.h"
@@ -39,32 +37,6 @@ static struct mtd_partition db88f6281_nand_parts[] = {
 	},
 };
 
-static struct resource db88f6281_nand_resource = {
-	.flags		= IORESOURCE_MEM,
-	.start		= KIRKWOOD_NAND_MEM_PHYS_BASE,
-	.end		= KIRKWOOD_NAND_MEM_PHYS_BASE +
-			  KIRKWOOD_NAND_MEM_SIZE - 1,
-};
-
-static struct orion_nand_data db88f6281_nand_data = {
-	.parts		= db88f6281_nand_parts,
-	.nr_parts	= ARRAY_SIZE(db88f6281_nand_parts),
-	.cle		= 0,
-	.ale		= 1,
-	.width		= 8,
-	.chip_delay	= 25,
-};
-
-static struct platform_device db88f6281_nand_flash = {
-	.name		= "orion_nand",
-	.id		= -1,
-	.dev		= {
-		.platform_data	= &db88f6281_nand_data,
-	},
-	.resource	= &db88f6281_nand_resource,
-	.num_resources	= 1,
-};
-
 static struct mv643xx_eth_platform_data db88f6281_ge00_data = {
 	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
 };
@@ -92,13 +64,12 @@ static void __init db88f6281_init(void)
 	kirkwood_init();
 	kirkwood_mpp_conf(db88f6281_mpp_config);
 
+	kirkwood_nand_init(ARRAY_AND_SIZE(db88f6281_nand_parts), 25);
 	kirkwood_ehci_init();
 	kirkwood_ge00_init(&db88f6281_ge00_data);
 	kirkwood_sata_init(&db88f6281_sata_data);
 	kirkwood_uart0_init();
 	kirkwood_sdio_init(&db88f6281_mvsdio_data);
-	
-	platform_device_register(&db88f6281_nand_flash);
 }
 
 static int __init db88f6281_pci_init(void)
diff --git a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
index 2f0e4ef..8bf4153 100644
--- a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
@@ -11,8 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
 #include <linux/ata_platform.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/spi/flash.h>
diff --git a/arch/arm/mach-kirkwood/rd88f6281-setup.c b/arch/arm/mach-kirkwood/rd88f6281-setup.c
index 31e996d..31708dd 100644
--- a/arch/arm/mach-kirkwood/rd88f6281-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6281-setup.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
-#include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/ata_platform.h>
 #include <linux/mv643xx_eth.h>
@@ -22,7 +21,6 @@
 #include <asm/mach/arch.h>
 #include <mach/kirkwood.h>
 #include <plat/mvsdio.h>
-#include <plat/orion_nand.h>
 #include "common.h"
 #include "mpp.h"
 
@@ -42,32 +40,6 @@ static struct mtd_partition rd88f6281_nand_parts[] = {
 	},
 };
 
-static struct resource rd88f6281_nand_resource = {
-	.flags		= IORESOURCE_MEM,
-	.start		= KIRKWOOD_NAND_MEM_PHYS_BASE,
-	.end		= KIRKWOOD_NAND_MEM_PHYS_BASE +
-			  KIRKWOOD_NAND_MEM_SIZE - 1,
-};
-
-static struct orion_nand_data rd88f6281_nand_data = {
-	.parts		= rd88f6281_nand_parts,
-	.nr_parts	= ARRAY_SIZE(rd88f6281_nand_parts),
-	.cle		= 0,
-	.ale		= 1,
-	.width		= 8,
-	.chip_delay	= 25,
-};
-
-static struct platform_device rd88f6281_nand_flash = {
-	.name		= "orion_nand",
-	.id		= -1,
-	.dev		= {
-		.platform_data	= &rd88f6281_nand_data,
-	},
-	.resource	= &rd88f6281_nand_resource,
-	.num_resources	= 1,
-};
-
 static struct mv643xx_eth_platform_data rd88f6281_ge00_data = {
 	.phy_addr	= MV643XX_ETH_PHY_NONE,
 	.speed		= SPEED_1000,
@@ -114,6 +86,7 @@ static void __init rd88f6281_init(void)
 	kirkwood_init();
 	kirkwood_mpp_conf(rd88f6281_mpp_config);
 
+	kirkwood_nand_init(ARRAY_AND_SIZE(rd88f6281_nand_parts), 25);
 	kirkwood_ehci_init();
 
 	kirkwood_ge00_init(&rd88f6281_ge00_data);
@@ -129,8 +102,6 @@ static void __init rd88f6281_init(void)
 	kirkwood_sata_init(&rd88f6281_sata_data);
 	kirkwood_sdio_init(&rd88f6281_mvsdio_data);
 	kirkwood_uart0_init();
-
-	platform_device_register(&rd88f6281_nand_flash);
 }
 
 static int __init rd88f6281_pci_init(void)
diff --git a/arch/arm/mach-kirkwood/sheevaplug-setup.c b/arch/arm/mach-kirkwood/sheevaplug-setup.c
index dcc448c..c7319ee 100644
--- a/arch/arm/mach-kirkwood/sheevaplug-setup.c
+++ b/arch/arm/mach-kirkwood/sheevaplug-setup.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/gpio.h>
@@ -20,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <mach/kirkwood.h>
 #include <plat/mvsdio.h>
-#include <plat/orion_nand.h>
 #include "common.h"
 #include "mpp.h"
 
@@ -40,32 +38,6 @@ static struct mtd_partition sheevaplug_nand_parts[] = {
 	},
 };
 
-static struct resource sheevaplug_nand_resource = {
-	.flags		= IORESOURCE_MEM,
-	.start		= KIRKWOOD_NAND_MEM_PHYS_BASE,
-	.end		= KIRKWOOD_NAND_MEM_PHYS_BASE +
-			  KIRKWOOD_NAND_MEM_SIZE - 1,
-};
-
-static struct orion_nand_data sheevaplug_nand_data = {
-	.parts		= sheevaplug_nand_parts,
-	.nr_parts	= ARRAY_SIZE(sheevaplug_nand_parts),
-	.cle		= 0,
-	.ale		= 1,
-	.width		= 8,
-	.chip_delay	= 25,
-};
-
-static struct platform_device sheevaplug_nand_flash = {
-	.name		= "orion_nand",
-	.id		= -1,
-	.dev		= {
-		.platform_data	= &sheevaplug_nand_data,
-	},
-	.resource	= &sheevaplug_nand_resource,
-	.num_resources	= 1,
-};
-
 static struct mv643xx_eth_platform_data sheevaplug_ge00_data = {
 	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
 };
@@ -111,6 +83,7 @@ static void __init sheevaplug_init(void)
 	kirkwood_mpp_conf(sheevaplug_mpp_config);
 
 	kirkwood_uart0_init();
+	kirkwood_nand_init(ARRAY_AND_SIZE(sheevaplug_nand_parts), 25);
 
 	if (gpio_request(29, "USB Power Enable") != 0 ||
 	    gpio_direction_output(29, 1) != 0)
@@ -120,7 +93,6 @@ static void __init sheevaplug_init(void)
 	kirkwood_ge00_init(&sheevaplug_ge00_data);
 	kirkwood_sdio_init(&sheevaplug_mvsdio_data);
 
-	platform_device_register(&sheevaplug_nand_flash);
 	platform_device_register(&sheevaplug_leds);
 }
 
-- 
cgit v0.10.2


From e8b2b7ba1200de8e50788e358e2d945a0c0fcfad Mon Sep 17 00:00:00 2001
From: Rabeeh Khoury <rabeeh@marvell.com>
Date: Sun, 22 Mar 2009 17:30:32 +0200
Subject: [ARM] Kirkwood: clock gating for unused peripherals

To save power:

1. Enabling clock gating of unused peripherals

2. PLL and PHY of the units are also disabled (when possible.

Signed-off-by: Rabeeh Khoury <rabeeh@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 6e3eb1a..d127731 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -55,6 +55,13 @@ void __init kirkwood_map_io(void)
 	iotable_init(kirkwood_io_desc, ARRAY_SIZE(kirkwood_io_desc));
 }
 
+/*
+ * Default clock control bits.  Any bit _not_ set in this variable
+ * will be cleared from the hardware after platform devices have been
+ * registered.  Some reserved bits must be set to 1.
+ */
+unsigned int kirkwood_clk_ctrl = CGC_DUNIT | CGC_RESERVED;
+	
 
 /*****************************************************************************
  * EHCI
@@ -96,6 +103,7 @@ static struct platform_device kirkwood_ehci = {
 
 void __init kirkwood_ehci_init(void)
 {
+	kirkwood_clk_ctrl |= CGC_USB0;
 	platform_device_register(&kirkwood_ehci);
 }
 
@@ -152,6 +160,7 @@ static struct platform_device kirkwood_ge00 = {
 
 void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
+	kirkwood_clk_ctrl |= CGC_GE0;
 	eth_data->shared = &kirkwood_ge00_shared;
 	kirkwood_ge00.dev.platform_data = eth_data;
 
@@ -213,6 +222,7 @@ static struct platform_device kirkwood_ge01 = {
 
 void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 {
+	kirkwood_clk_ctrl |= CGC_GE1;
 	eth_data->shared = &kirkwood_ge01_shared;
 	kirkwood_ge01.dev.platform_data = eth_data;
 
@@ -287,6 +297,7 @@ static struct platform_device kirkwood_nand_flash = {
 void __init kirkwood_nand_init(struct mtd_partition *parts, int nr_parts,
 			       int chip_delay)
 {
+	kirkwood_clk_ctrl |= CGC_RUNIT;
 	kirkwood_nand_data.parts = parts;
 	kirkwood_nand_data.nr_parts = nr_parts;
 	kirkwood_nand_data.chip_delay = chip_delay;
@@ -338,6 +349,9 @@ static struct platform_device kirkwood_sata = {
 
 void __init kirkwood_sata_init(struct mv_sata_platform_data *sata_data)
 {
+	kirkwood_clk_ctrl |= CGC_SATA0;
+	if (sata_data->n_ports > 1)
+		kirkwood_clk_ctrl |= CGC_SATA1;
 	sata_data->dram = &kirkwood_mbus_dram_info;
 	kirkwood_sata.dev.platform_data = sata_data;
 	platform_device_register(&kirkwood_sata);
@@ -383,6 +397,7 @@ void __init kirkwood_sdio_init(struct mvsdio_platform_data *mvsdio_data)
 	else
 		mvsdio_data->clock = 200000000;
 	mvsdio_data->dram = &kirkwood_mbus_dram_info;
+	kirkwood_clk_ctrl |= CGC_SDIO;
 	kirkwood_sdio.dev.platform_data = mvsdio_data;
 	platform_device_register(&kirkwood_sdio);
 }
@@ -414,6 +429,7 @@ static struct platform_device kirkwood_spi = {
 
 void __init kirkwood_spi_init()
 {
+	kirkwood_clk_ctrl |= CGC_RUNIT;
 	platform_device_register(&kirkwood_spi);
 }
 
@@ -634,6 +650,7 @@ static struct platform_device kirkwood_xor01_channel = {
 
 static void __init kirkwood_xor0_init(void)
 {
+	kirkwood_clk_ctrl |= CGC_XOR0;
 	platform_device_register(&kirkwood_xor0_shared);
 
 	/*
@@ -732,6 +749,7 @@ static struct platform_device kirkwood_xor11_channel = {
 
 static void __init kirkwood_xor1_init(void)
 {
+	kirkwood_clk_ctrl |= CGC_XOR1;
 	platform_device_register(&kirkwood_xor1_shared);
 
 	/*
@@ -844,3 +862,44 @@ void __init kirkwood_init(void)
 	kirkwood_xor0_init();
 	kirkwood_xor1_init();
 }
+
+static int __init kirkwood_clock_gate(void)
+{
+	unsigned int curr = readl(CLOCK_GATING_CTRL);
+
+	printk(KERN_DEBUG "Gating clock of unused units\n");
+	printk(KERN_DEBUG "before: 0x%08x\n", curr);
+
+	/* Make sure those units are accessible */
+	writel(curr | CGC_SATA0 | CGC_SATA1 | CGC_PEX0, CLOCK_GATING_CTRL);
+
+	/* For SATA: first shutdown the phy */
+	if (!(kirkwood_clk_ctrl & CGC_SATA0)) {
+		/* Disable PLL and IVREF */
+		writel(readl(SATA0_PHY_MODE_2) & ~0xf, SATA0_PHY_MODE_2);
+		/* Disable PHY */
+		writel(readl(SATA0_IF_CTRL) | 0x200, SATA0_IF_CTRL);
+	}
+	if (!(kirkwood_clk_ctrl & CGC_SATA1)) {
+		/* Disable PLL and IVREF */
+		writel(readl(SATA1_PHY_MODE_2) & ~0xf, SATA1_PHY_MODE_2);
+		/* Disable PHY */
+		writel(readl(SATA1_IF_CTRL) | 0x200, SATA1_IF_CTRL);
+	}
+	
+	/* For PCIe: first shutdown the phy */
+	if (!(kirkwood_clk_ctrl & CGC_PEX0)) {
+		writel(readl(PCIE_LINK_CTRL) | 0x10, PCIE_LINK_CTRL);
+		while (1)
+			if (readl(PCIE_STATUS) & 0x1)
+				break;
+		writel(readl(PCIE_LINK_CTRL) & ~0x10, PCIE_LINK_CTRL);
+	}
+
+	/* Now gate clock the required units */
+	writel(kirkwood_clk_ctrl, CLOCK_GATING_CTRL);
+	printk(KERN_DEBUG " after: 0x%08x\n", readl(CLOCK_GATING_CTRL));
+
+	return 0;
+}
+late_initcall(kirkwood_clock_gate);
diff --git a/arch/arm/mach-kirkwood/include/mach/bridge-regs.h b/arch/arm/mach-kirkwood/include/mach/bridge-regs.h
index 4f7029f..00d96ab 100644
--- a/arch/arm/mach-kirkwood/include/mach/bridge-regs.h
+++ b/arch/arm/mach-kirkwood/include/mach/bridge-regs.h
@@ -39,4 +39,22 @@
 #define L2_CONFIG_REG		(BRIDGE_VIRT_BASE | 0x0128)
 #define L2_WRITETHROUGH		0x00000010
 
+#define CLOCK_GATING_CTRL	(BRIDGE_VIRT_BASE | 0x11c)
+#define CGC_GE0			(1 << 0)
+#define CGC_PEX0		(1 << 2)
+#define CGC_USB0		(1 << 3)
+#define CGC_SDIO		(1 << 4)
+#define CGC_TSU			(1 << 5)
+#define CGC_DUNIT		(1 << 6)
+#define CGC_RUNIT		(1 << 7)
+#define CGC_XOR0		(1 << 8)
+#define CGC_AUDIO		(1 << 9)
+#define CGC_SATA0		(1 << 14)
+#define CGC_SATA1		(1 << 15)
+#define CGC_XOR1		(1 << 16)
+#define CGC_CRYPTO		(1 << 17)
+#define CGC_GE1			(1 << 19)
+#define CGC_TDM			(1 << 20)
+#define CGC_RESERVED		((1 << 18) | (0x6 << 21))
+
 #endif
diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
index f20ff64..f49a29b 100644
--- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h
+++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
@@ -65,6 +65,8 @@
 #define BRIDGE_VIRT_BASE	(KIRKWOOD_REGS_VIRT_BASE | 0x20000)
 
 #define PCIE_VIRT_BASE		(KIRKWOOD_REGS_VIRT_BASE | 0x40000)
+#define PCIE_LINK_CTRL		(PCIE_VIRT_BASE | 0x70)
+#define PCIE_STATUS		(PCIE_VIRT_BASE | 0x1a04)
 
 #define USB_PHYS_BASE		(KIRKWOOD_REGS_PHYS_BASE | 0x50000)
 
@@ -81,6 +83,11 @@
 #define GE01_PHYS_BASE		(KIRKWOOD_REGS_PHYS_BASE | 0x74000)
 
 #define SATA_PHYS_BASE		(KIRKWOOD_REGS_PHYS_BASE | 0x80000)
+#define SATA_VIRT_BASE		(KIRKWOOD_REGS_VIRT_BASE | 0x80000)
+#define SATA0_IF_CTRL		(SATA_VIRT_BASE | 0x2050)
+#define SATA0_PHY_MODE_2	(SATA_VIRT_BASE | 0x2330)
+#define SATA1_IF_CTRL		(SATA_VIRT_BASE | 0x4050)
+#define SATA1_PHY_MODE_2	(SATA_VIRT_BASE | 0x4330)
 
 #define SDIO_PHYS_BASE		(KIRKWOOD_REGS_PHYS_BASE | 0x90000)
 
diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c
index 73fccac..d90b9aa 100644
--- a/arch/arm/mach-kirkwood/pcie.c
+++ b/arch/arm/mach-kirkwood/pcie.c
@@ -14,6 +14,7 @@
 #include <asm/irq.h>
 #include <asm/mach/pci.h>
 #include <plat/pcie.h>
+#include <mach/bridge-regs.h>
 #include "common.h"
 
 
@@ -95,6 +96,7 @@ static struct pci_ops pcie_ops = {
 static int kirkwood_pcie_setup(int nr, struct pci_sys_data *sys)
 {
 	struct resource *res;
+	extern unsigned int kirkwood_clk_ctrl;
 
 	/*
 	 * Generic PCIe unit setup.
@@ -133,6 +135,8 @@ static int kirkwood_pcie_setup(int nr, struct pci_sys_data *sys)
 	sys->resource[2] = NULL;
 	sys->io_offset = 0;
 
+	kirkwood_clk_ctrl |= CGC_PEX0;
+
 	return 1;
 }
 
-- 
cgit v0.10.2


From 6462c6160af557c310d5941f4700ea2c7f6c67b2 Mon Sep 17 00:00:00 2001
From: Thomas Reitmayr <treitmayr@devbase.at>
Date: Mon, 1 Jun 2009 13:38:33 +0200
Subject: [ARM] orion5x: Change names of defines for Reset-Out-Mask register

The name of the define for the Reset-Out-Mask register as well as its
bit for the watchdog reset are changed to match the names used for
Kirkwood (which in turn match the processor specification more
closely). There is no functional change.

This patch prepares for adding orion5x_wdt as a platform device to
Kirkwood.

Signed-off-by: Thomas Reitmayr <treitmayr@devbase.at>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-orion5x/include/mach/bridge-regs.h b/arch/arm/mach-orion5x/include/mach/bridge-regs.h
index be896e5..5c9744c 100644
--- a/arch/arm/mach-orion5x/include/mach/bridge-regs.h
+++ b/arch/arm/mach-orion5x/include/mach/bridge-regs.h
@@ -17,8 +17,8 @@
 
 #define CPU_CTRL		(ORION5X_BRIDGE_VIRT_BASE | 0x104)
 
-#define CPU_RESET_MASK		(ORION5X_BRIDGE_VIRT_BASE | 0x108)
-#define WDT_RESET		0x0002
+#define RSTOUTn_MASK		(ORION5X_BRIDGE_VIRT_BASE | 0x108)
+#define WDT_RESET_OUT_EN	0x0002
 
 #define CPU_SOFT_RESET		(ORION5X_BRIDGE_VIRT_BASE | 0x10c)
 
diff --git a/arch/arm/mach-orion5x/include/mach/system.h b/arch/arm/mach-orion5x/include/mach/system.h
index e912490..60e734c 100644
--- a/arch/arm/mach-orion5x/include/mach/system.h
+++ b/arch/arm/mach-orion5x/include/mach/system.h
@@ -23,7 +23,7 @@ static inline void arch_reset(char mode, const char *cmd)
 	/*
 	 * Enable and issue soft reset
 	 */
-	orion5x_setbits(CPU_RESET_MASK, (1 << 2));
+	orion5x_setbits(RSTOUTn_MASK, (1 << 2));
 	orion5x_setbits(CPU_SOFT_RESET, 1);
 }
 
diff --git a/arch/arm/mach-orion5x/mss2-setup.c b/arch/arm/mach-orion5x/mss2-setup.c
index 41e6d50..61c086b 100644
--- a/arch/arm/mach-orion5x/mss2-setup.c
+++ b/arch/arm/mach-orion5x/mss2-setup.c
@@ -181,9 +181,9 @@ static void mss2_power_off(void)
 	/*
 	 * Enable and issue soft reset
 	 */
-	reg = readl(CPU_RESET_MASK);
+	reg = readl(RSTOUTn_MASK);
 	reg |= 1 << 2;
-	writel(reg, CPU_RESET_MASK);
+	writel(reg, RSTOUTn_MASK);
 
 	reg = readl(CPU_SOFT_RESET);
 	reg |= 1;
diff --git a/drivers/watchdog/orion5x_wdt.c b/drivers/watchdog/orion5x_wdt.c
index 2cde568..d2dc976 100644
--- a/drivers/watchdog/orion5x_wdt.c
+++ b/drivers/watchdog/orion5x_wdt.c
@@ -73,9 +73,9 @@ static void orion5x_wdt_enable(void)
 	writel(reg, TIMER_CTRL);
 
 	/* Enable reset on watchdog */
-	reg = readl(CPU_RESET_MASK);
-	reg |= WDT_RESET;
-	writel(reg, CPU_RESET_MASK);
+	reg = readl(RSTOUTn_MASK);
+	reg |= WDT_RESET_OUT_EN;
+	writel(reg, RSTOUTn_MASK);
 
 	spin_unlock(&wdt_lock);
 }
@@ -87,9 +87,9 @@ static void orion5x_wdt_disable(void)
 	spin_lock(&wdt_lock);
 
 	/* Disable reset on watchdog */
-	reg = readl(CPU_RESET_MASK);
-	reg &= ~WDT_RESET;
-	writel(reg, CPU_RESET_MASK);
+	reg = readl(RSTOUTn_MASK);
+	reg &= ~WDT_RESET_OUT_EN;
+	writel(reg, RSTOUTn_MASK);
 
 	/* Disable watchdog timer */
 	reg = readl(TIMER_CTRL);
-- 
cgit v0.10.2


From 054bd3f053de54c81b20df11f354476389826e61 Mon Sep 17 00:00:00 2001
From: Thomas Reitmayr <treitmayr@devbase.at>
Date: Mon, 1 Jun 2009 13:38:34 +0200
Subject: [ARM] Kirkwood: Add the watchdog timer as a platform device.

The Kirkwood architecture uses the same watchdog device as the Orion
architecture. This patch adds orion5x_wdt as a platform device for
Kirkwood.

Signed-off-by: Thomas Reitmayr <treitmayr@devbase.at>
Tested-by: Martin Michlmayr <tbm@cyrius.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index d127731..a053184 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -30,6 +30,7 @@
 #include <plat/mvsdio.h>
 #include <plat/mv_xor.h>
 #include <plat/orion_nand.h>
+#include <plat/orion5x_wdt.h>
 #include <plat/time.h>
 #include "common.h"
 
@@ -768,6 +769,29 @@ static void __init kirkwood_xor1_init(void)
 
 
 /*****************************************************************************
+ * Watchdog
+ ****************************************************************************/
+static struct orion5x_wdt_platform_data kirkwood_wdt_data = {
+	.tclk		= 0,
+};
+
+static struct platform_device kirkwood_wdt_device = {
+	.name		= "orion5x_wdt",
+	.id		= -1,
+	.dev		= {
+		.platform_data	= &kirkwood_wdt_data,
+	},
+	.num_resources	= 0,
+};
+
+static void __init kirkwood_wdt_init(void)
+{
+	kirkwood_wdt_data.tclk = kirkwood_tclk;
+	platform_device_register(&kirkwood_wdt_device);
+}
+
+
+/*****************************************************************************
  * Time handling
  ****************************************************************************/
 int kirkwood_tclk;
@@ -859,6 +883,7 @@ void __init kirkwood_init(void)
 
 	/* internal devices that every board has */
 	kirkwood_rtc_init();
+	kirkwood_wdt_init();
 	kirkwood_xor0_init();
 	kirkwood_xor1_init();
 }
diff --git a/arch/arm/mach-kirkwood/include/mach/bridge-regs.h b/arch/arm/mach-kirkwood/include/mach/bridge-regs.h
index 00d96ab..9e80d92 100644
--- a/arch/arm/mach-kirkwood/include/mach/bridge-regs.h
+++ b/arch/arm/mach-kirkwood/include/mach/bridge-regs.h
@@ -17,12 +17,15 @@
 #define CPU_RESET		0x00000002
 
 #define RSTOUTn_MASK		(BRIDGE_VIRT_BASE | 0x0108)
+#define WDT_RESET_OUT_EN	0x00000002
 #define SOFT_RESET_OUT_EN	0x00000004
 
 #define SYSTEM_SOFT_RESET	(BRIDGE_VIRT_BASE | 0x010c)
 #define SOFT_RESET		0x00000001
 
 #define BRIDGE_CAUSE		(BRIDGE_VIRT_BASE | 0x0110)
+#define WDT_INT_REQ		0x0008
+
 #define BRIDGE_MASK		(BRIDGE_VIRT_BASE | 0x0114)
 #define BRIDGE_INT_TIMER0	0x0002
 #define BRIDGE_INT_TIMER1	0x0004
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 5eb8f21..1e983b1 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -233,10 +233,10 @@ config DAVINCI_WATCHDOG
 
 config ORION5X_WATCHDOG
 	tristate "Orion5x watchdog"
-	depends on ARCH_ORION5X
+	depends on ARCH_ORION5X || ARCH_KIRKWOOD
 	help
 	  Say Y here if to include support for the watchdog timer
-	  in the Orion5x ARM SoCs.
+	  in the Orion5x and Kirkwood ARM SoCs.
 	  To compile this driver as a module, choose M here: the
 	  module will be called orion5x_wdt.
 
-- 
cgit v0.10.2


From 3b937a7dbddbedd9457b33fcc8fa369c0c229c6e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 1 Jun 2009 13:56:02 -0400
Subject: [ARM] Orion/Kirkwood: rename orion5x_wdt to orion_wdt

The Orion watchdog driver is also used on Kirkwood.

Convention is to use orion5x for stuff specific to 88F5xxx Orion chips
and simply "orion" for shared stuff across SoCs including Kirkwood.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index a053184..3da32b1 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -30,7 +30,7 @@
 #include <plat/mvsdio.h>
 #include <plat/mv_xor.h>
 #include <plat/orion_nand.h>
-#include <plat/orion5x_wdt.h>
+#include <plat/orion_wdt.h>
 #include <plat/time.h>
 #include "common.h"
 
@@ -771,12 +771,12 @@ static void __init kirkwood_xor1_init(void)
 /*****************************************************************************
  * Watchdog
  ****************************************************************************/
-static struct orion5x_wdt_platform_data kirkwood_wdt_data = {
+static struct orion_wdt_platform_data kirkwood_wdt_data = {
 	.tclk		= 0,
 };
 
 static struct platform_device kirkwood_wdt_device = {
-	.name		= "orion5x_wdt",
+	.name		= "orion_wdt",
 	.id		= -1,
 	.dev		= {
 		.platform_data	= &kirkwood_wdt_data,
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index b1c7778..c3e2bea 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -31,7 +31,7 @@
 #include <plat/ehci-orion.h>
 #include <plat/mv_xor.h>
 #include <plat/orion_nand.h>
-#include <plat/orion5x_wdt.h>
+#include <plat/orion_wdt.h>
 #include <plat/time.h>
 #include "common.h"
 
@@ -540,12 +540,12 @@ void __init orion5x_xor_init(void)
 /*****************************************************************************
  * Watchdog
  ****************************************************************************/
-static struct orion5x_wdt_platform_data orion5x_wdt_data = {
+static struct orion_wdt_platform_data orion5x_wdt_data = {
 	.tclk			= 0,
 };
 
 static struct platform_device orion5x_wdt_device = {
-	.name		= "orion5x_wdt",
+	.name		= "orion_wdt",
 	.id		= -1,
 	.dev		= {
 		.platform_data	= &orion5x_wdt_data,
diff --git a/arch/arm/plat-orion/include/plat/orion5x_wdt.h b/arch/arm/plat-orion/include/plat/orion5x_wdt.h
deleted file mode 100644
index 3c9cf6a3..0000000
--- a/arch/arm/plat-orion/include/plat/orion5x_wdt.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * arch/arm/plat-orion/include/plat/orion5x_wdt.h
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __PLAT_ORION5X_WDT_H
-#define __PLAT_ORION5X_WDT_H
-
-struct orion5x_wdt_platform_data {
-	u32	tclk;		/* no <linux/clk.h> support yet */
-};
-
-
-#endif
-
diff --git a/arch/arm/plat-orion/include/plat/orion_wdt.h b/arch/arm/plat-orion/include/plat/orion_wdt.h
new file mode 100644
index 0000000..665c362
--- /dev/null
+++ b/arch/arm/plat-orion/include/plat/orion_wdt.h
@@ -0,0 +1,18 @@
+/*
+ * arch/arm/plat-orion/include/plat/orion_wdt.h
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __PLAT_ORION_WDT_H
+#define __PLAT_ORION_WDT_H
+
+struct orion_wdt_platform_data {
+	u32	tclk;		/* no <linux/clk.h> support yet */
+};
+
+
+#endif
+
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1e983b1..5744cac 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -231,14 +231,14 @@ config DAVINCI_WATCHDOG
 	  NOTE: once enabled, this timer cannot be disabled.
 	  Say N if you are unsure.
 
-config ORION5X_WATCHDOG
-	tristate "Orion5x watchdog"
+config ORION_WATCHDOG
+	tristate "Orion watchdog"
 	depends on ARCH_ORION5X || ARCH_KIRKWOOD
 	help
 	  Say Y here if to include support for the watchdog timer
-	  in the Orion5x and Kirkwood ARM SoCs.
+	  in the Marvell Orion5x and Kirkwood ARM SoCs.
 	  To compile this driver as a module, choose M here: the
-	  module will be called orion5x_wdt.
+	  module will be called orion_wdt.
 
 # AVR32 Architecture
 
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 7f8c56b..c3afa14 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o
 obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o
 obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o
 obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o
-obj-$(CONFIG_ORION5X_WATCHDOG) += orion5x_wdt.o
+obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
diff --git a/drivers/watchdog/orion5x_wdt.c b/drivers/watchdog/orion5x_wdt.c
deleted file mode 100644
index d2dc976..0000000
--- a/drivers/watchdog/orion5x_wdt.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * drivers/watchdog/orion5x_wdt.c
- *
- * Watchdog driver for Orion5x processors
- *
- * Author: Sylver Bruneau <sylver.bruneau@googlemail.com>
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/platform_device.h>
-#include <linux/watchdog.h>
-#include <linux/init.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-#include <linux/spinlock.h>
-#include <mach/bridge-regs.h>
-#include <plat/orion5x_wdt.h>
-
-/*
- * Watchdog timer block registers.
- */
-#define TIMER_CTRL		(TIMER_VIRT_BASE + 0x0000)
-#define  WDT_EN			0x0010
-#define WDT_VAL			(TIMER_VIRT_BASE + 0x0024)
-
-#define WDT_MAX_CYCLE_COUNT	0xffffffff
-#define WDT_IN_USE		0
-#define WDT_OK_TO_CLOSE		1
-
-static int nowayout = WATCHDOG_NOWAYOUT;
-static int heartbeat = -1;		/* module parameter (seconds) */
-static unsigned int wdt_max_duration;	/* (seconds) */
-static unsigned int wdt_tclk;
-static unsigned long wdt_status;
-static spinlock_t wdt_lock;
-
-static void orion5x_wdt_ping(void)
-{
-	spin_lock(&wdt_lock);
-
-	/* Reload watchdog duration */
-	writel(wdt_tclk * heartbeat, WDT_VAL);
-
-	spin_unlock(&wdt_lock);
-}
-
-static void orion5x_wdt_enable(void)
-{
-	u32 reg;
-
-	spin_lock(&wdt_lock);
-
-	/* Set watchdog duration */
-	writel(wdt_tclk * heartbeat, WDT_VAL);
-
-	/* Clear watchdog timer interrupt */
-	reg = readl(BRIDGE_CAUSE);
-	reg &= ~WDT_INT_REQ;
-	writel(reg, BRIDGE_CAUSE);
-
-	/* Enable watchdog timer */
-	reg = readl(TIMER_CTRL);
-	reg |= WDT_EN;
-	writel(reg, TIMER_CTRL);
-
-	/* Enable reset on watchdog */
-	reg = readl(RSTOUTn_MASK);
-	reg |= WDT_RESET_OUT_EN;
-	writel(reg, RSTOUTn_MASK);
-
-	spin_unlock(&wdt_lock);
-}
-
-static void orion5x_wdt_disable(void)
-{
-	u32 reg;
-
-	spin_lock(&wdt_lock);
-
-	/* Disable reset on watchdog */
-	reg = readl(RSTOUTn_MASK);
-	reg &= ~WDT_RESET_OUT_EN;
-	writel(reg, RSTOUTn_MASK);
-
-	/* Disable watchdog timer */
-	reg = readl(TIMER_CTRL);
-	reg &= ~WDT_EN;
-	writel(reg, TIMER_CTRL);
-
-	spin_unlock(&wdt_lock);
-}
-
-static int orion5x_wdt_get_timeleft(int *time_left)
-{
-	spin_lock(&wdt_lock);
-	*time_left = readl(WDT_VAL) / wdt_tclk;
-	spin_unlock(&wdt_lock);
-	return 0;
-}
-
-static int orion5x_wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(WDT_IN_USE, &wdt_status))
-		return -EBUSY;
-	clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
-	orion5x_wdt_enable();
-	return nonseekable_open(inode, file);
-}
-
-static ssize_t orion5x_wdt_write(struct file *file, const char *data,
-					size_t len, loff_t *ppos)
-{
-	if (len) {
-		if (!nowayout) {
-			size_t i;
-
-			clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
-			for (i = 0; i != len; i++) {
-				char c;
-
-				if (get_user(c, data + i))
-					return -EFAULT;
-				if (c == 'V')
-					set_bit(WDT_OK_TO_CLOSE, &wdt_status);
-			}
-		}
-		orion5x_wdt_ping();
-	}
-	return len;
-}
-
-static int orion5x_wdt_settimeout(int new_time)
-{
-	if ((new_time <= 0) || (new_time > wdt_max_duration))
-		return -EINVAL;
-
-	/* Set new watchdog time to be used when
-	 * orion5x_wdt_enable() or orion5x_wdt_ping() is called. */
-	heartbeat = new_time;
-	return 0;
-}
-
-static const struct watchdog_info ident = {
-	.options	= WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT |
-			  WDIOF_KEEPALIVEPING,
-	.identity	= "Orion5x Watchdog",
-};
-
-static long orion5x_wdt_ioctl(struct file *file, unsigned int cmd,
-				unsigned long arg)
-{
-	int ret = -ENOTTY;
-	int time;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		ret = copy_to_user((struct watchdog_info *)arg, &ident,
-				   sizeof(ident)) ? -EFAULT : 0;
-		break;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		ret = put_user(0, (int *)arg);
-		break;
-
-	case WDIOC_KEEPALIVE:
-		orion5x_wdt_ping();
-		ret = 0;
-		break;
-
-	case WDIOC_SETTIMEOUT:
-		ret = get_user(time, (int *)arg);
-		if (ret)
-			break;
-
-		if (orion5x_wdt_settimeout(time)) {
-			ret = -EINVAL;
-			break;
-		}
-		orion5x_wdt_ping();
-		/* Fall through */
-
-	case WDIOC_GETTIMEOUT:
-		ret = put_user(heartbeat, (int *)arg);
-		break;
-
-	case WDIOC_GETTIMELEFT:
-		if (orion5x_wdt_get_timeleft(&time)) {
-			ret = -EINVAL;
-			break;
-		}
-		ret = put_user(time, (int *)arg);
-		break;
-	}
-	return ret;
-}
-
-static int orion5x_wdt_release(struct inode *inode, struct file *file)
-{
-	if (test_bit(WDT_OK_TO_CLOSE, &wdt_status))
-		orion5x_wdt_disable();
-	else
-		printk(KERN_CRIT "WATCHDOG: Device closed unexpectedly - "
-					"timer will not stop\n");
-	clear_bit(WDT_IN_USE, &wdt_status);
-	clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
-
-	return 0;
-}
-
-
-static const struct file_operations orion5x_wdt_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.write		= orion5x_wdt_write,
-	.unlocked_ioctl	= orion5x_wdt_ioctl,
-	.open		= orion5x_wdt_open,
-	.release	= orion5x_wdt_release,
-};
-
-static struct miscdevice orion5x_wdt_miscdev = {
-	.minor		= WATCHDOG_MINOR,
-	.name		= "watchdog",
-	.fops		= &orion5x_wdt_fops,
-};
-
-static int __devinit orion5x_wdt_probe(struct platform_device *pdev)
-{
-	struct orion5x_wdt_platform_data *pdata = pdev->dev.platform_data;
-	int ret;
-
-	if (pdata) {
-		wdt_tclk = pdata->tclk;
-	} else {
-		printk(KERN_ERR "Orion5x Watchdog misses platform data\n");
-		return -ENODEV;
-	}
-
-	if (orion5x_wdt_miscdev.parent)
-		return -EBUSY;
-	orion5x_wdt_miscdev.parent = &pdev->dev;
-
-	wdt_max_duration = WDT_MAX_CYCLE_COUNT / wdt_tclk;
-	if (orion5x_wdt_settimeout(heartbeat))
-		heartbeat = wdt_max_duration;
-
-	ret = misc_register(&orion5x_wdt_miscdev);
-	if (ret)
-		return ret;
-
-	printk(KERN_INFO "Orion5x Watchdog Timer: Initial timeout %d sec%s\n",
-				heartbeat, nowayout ? ", nowayout" : "");
-	return 0;
-}
-
-static int __devexit orion5x_wdt_remove(struct platform_device *pdev)
-{
-	int ret;
-
-	if (test_bit(WDT_IN_USE, &wdt_status)) {
-		orion5x_wdt_disable();
-		clear_bit(WDT_IN_USE, &wdt_status);
-	}
-
-	ret = misc_deregister(&orion5x_wdt_miscdev);
-	if (!ret)
-		orion5x_wdt_miscdev.parent = NULL;
-
-	return ret;
-}
-
-static void orion5x_wdt_shutdown(struct platform_device *pdev)
-{
-	if (test_bit(WDT_IN_USE, &wdt_status))
-		orion5x_wdt_disable();
-}
-
-static struct platform_driver orion5x_wdt_driver = {
-	.probe		= orion5x_wdt_probe,
-	.remove		= __devexit_p(orion5x_wdt_remove),
-	.shutdown	= orion5x_wdt_shutdown,
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= "orion5x_wdt",
-	},
-};
-
-static int __init orion5x_wdt_init(void)
-{
-	spin_lock_init(&wdt_lock);
-	return platform_driver_register(&orion5x_wdt_driver);
-}
-
-static void __exit orion5x_wdt_exit(void)
-{
-	platform_driver_unregister(&orion5x_wdt_driver);
-}
-
-module_init(orion5x_wdt_init);
-module_exit(orion5x_wdt_exit);
-
-MODULE_AUTHOR("Sylver Bruneau <sylver.bruneau@googlemail.com>");
-MODULE_DESCRIPTION("Orion5x Processor Watchdog");
-
-module_param(heartbeat, int, 0);
-MODULE_PARM_DESC(heartbeat, "Initial watchdog heartbeat in seconds");
-
-module_param(nowayout, int, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
-				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
new file mode 100644
index 0000000..2d9fb96
--- /dev/null
+++ b/drivers/watchdog/orion_wdt.c
@@ -0,0 +1,322 @@
+/*
+ * drivers/watchdog/orion_wdt.c
+ *
+ * Watchdog driver for Orion/Kirkwood processors
+ *
+ * Author: Sylver Bruneau <sylver.bruneau@googlemail.com>
+ *
+ * This file is licensed under  the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/init.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <mach/bridge-regs.h>
+#include <plat/orion_wdt.h>
+
+/*
+ * Watchdog timer block registers.
+ */
+#define TIMER_CTRL		(TIMER_VIRT_BASE + 0x0000)
+#define  WDT_EN			0x0010
+#define WDT_VAL			(TIMER_VIRT_BASE + 0x0024)
+
+#define WDT_MAX_CYCLE_COUNT	0xffffffff
+#define WDT_IN_USE		0
+#define WDT_OK_TO_CLOSE		1
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+static int heartbeat = -1;		/* module parameter (seconds) */
+static unsigned int wdt_max_duration;	/* (seconds) */
+static unsigned int wdt_tclk;
+static unsigned long wdt_status;
+static spinlock_t wdt_lock;
+
+static void orion_wdt_ping(void)
+{
+	spin_lock(&wdt_lock);
+
+	/* Reload watchdog duration */
+	writel(wdt_tclk * heartbeat, WDT_VAL);
+
+	spin_unlock(&wdt_lock);
+}
+
+static void orion_wdt_enable(void)
+{
+	u32 reg;
+
+	spin_lock(&wdt_lock);
+
+	/* Set watchdog duration */
+	writel(wdt_tclk * heartbeat, WDT_VAL);
+
+	/* Clear watchdog timer interrupt */
+	reg = readl(BRIDGE_CAUSE);
+	reg &= ~WDT_INT_REQ;
+	writel(reg, BRIDGE_CAUSE);
+
+	/* Enable watchdog timer */
+	reg = readl(TIMER_CTRL);
+	reg |= WDT_EN;
+	writel(reg, TIMER_CTRL);
+
+	/* Enable reset on watchdog */
+	reg = readl(RSTOUTn_MASK);
+	reg |= WDT_RESET_OUT_EN;
+	writel(reg, RSTOUTn_MASK);
+
+	spin_unlock(&wdt_lock);
+}
+
+static void orion_wdt_disable(void)
+{
+	u32 reg;
+
+	spin_lock(&wdt_lock);
+
+	/* Disable reset on watchdog */
+	reg = readl(RSTOUTn_MASK);
+	reg &= ~WDT_RESET_OUT_EN;
+	writel(reg, RSTOUTn_MASK);
+
+	/* Disable watchdog timer */
+	reg = readl(TIMER_CTRL);
+	reg &= ~WDT_EN;
+	writel(reg, TIMER_CTRL);
+
+	spin_unlock(&wdt_lock);
+}
+
+static int orion_wdt_get_timeleft(int *time_left)
+{
+	spin_lock(&wdt_lock);
+	*time_left = readl(WDT_VAL) / wdt_tclk;
+	spin_unlock(&wdt_lock);
+	return 0;
+}
+
+static int orion_wdt_open(struct inode *inode, struct file *file)
+{
+	if (test_and_set_bit(WDT_IN_USE, &wdt_status))
+		return -EBUSY;
+	clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
+	orion_wdt_enable();
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t orion_wdt_write(struct file *file, const char *data,
+					size_t len, loff_t *ppos)
+{
+	if (len) {
+		if (!nowayout) {
+			size_t i;
+
+			clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
+			for (i = 0; i != len; i++) {
+				char c;
+
+				if (get_user(c, data + i))
+					return -EFAULT;
+				if (c == 'V')
+					set_bit(WDT_OK_TO_CLOSE, &wdt_status);
+			}
+		}
+		orion_wdt_ping();
+	}
+	return len;
+}
+
+static int orion_wdt_settimeout(int new_time)
+{
+	if ((new_time <= 0) || (new_time > wdt_max_duration))
+		return -EINVAL;
+
+	/* Set new watchdog time to be used when
+	 * orion_wdt_enable() or orion_wdt_ping() is called. */
+	heartbeat = new_time;
+	return 0;
+}
+
+static const struct watchdog_info ident = {
+	.options	= WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT |
+			  WDIOF_KEEPALIVEPING,
+	.identity	= "Orion Watchdog",
+};
+
+static long orion_wdt_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	int ret = -ENOTTY;
+	int time;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user((struct watchdog_info *)arg, &ident,
+				   sizeof(ident)) ? -EFAULT : 0;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, (int *)arg);
+		break;
+
+	case WDIOC_KEEPALIVE:
+		orion_wdt_ping();
+		ret = 0;
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(time, (int *)arg);
+		if (ret)
+			break;
+
+		if (orion_wdt_settimeout(time)) {
+			ret = -EINVAL;
+			break;
+		}
+		orion_wdt_ping();
+		/* Fall through */
+
+	case WDIOC_GETTIMEOUT:
+		ret = put_user(heartbeat, (int *)arg);
+		break;
+
+	case WDIOC_GETTIMELEFT:
+		if (orion_wdt_get_timeleft(&time)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = put_user(time, (int *)arg);
+		break;
+	}
+	return ret;
+}
+
+static int orion_wdt_release(struct inode *inode, struct file *file)
+{
+	if (test_bit(WDT_OK_TO_CLOSE, &wdt_status))
+		orion_wdt_disable();
+	else
+		printk(KERN_CRIT "WATCHDOG: Device closed unexpectedly - "
+					"timer will not stop\n");
+	clear_bit(WDT_IN_USE, &wdt_status);
+	clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
+
+	return 0;
+}
+
+
+static const struct file_operations orion_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.write		= orion_wdt_write,
+	.unlocked_ioctl	= orion_wdt_ioctl,
+	.open		= orion_wdt_open,
+	.release	= orion_wdt_release,
+};
+
+static struct miscdevice orion_wdt_miscdev = {
+	.minor		= WATCHDOG_MINOR,
+	.name		= "watchdog",
+	.fops		= &orion_wdt_fops,
+};
+
+static int __devinit orion_wdt_probe(struct platform_device *pdev)
+{
+	struct orion_wdt_platform_data *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (pdata) {
+		wdt_tclk = pdata->tclk;
+	} else {
+		printk(KERN_ERR "Orion Watchdog misses platform data\n");
+		return -ENODEV;
+	}
+
+	if (orion_wdt_miscdev.parent)
+		return -EBUSY;
+	orion_wdt_miscdev.parent = &pdev->dev;
+
+	wdt_max_duration = WDT_MAX_CYCLE_COUNT / wdt_tclk;
+	if (orion_wdt_settimeout(heartbeat))
+		heartbeat = wdt_max_duration;
+
+	ret = misc_register(&orion_wdt_miscdev);
+	if (ret)
+		return ret;
+
+	printk(KERN_INFO "Orion Watchdog Timer: Initial timeout %d sec%s\n",
+				heartbeat, nowayout ? ", nowayout" : "");
+	return 0;
+}
+
+static int __devexit orion_wdt_remove(struct platform_device *pdev)
+{
+	int ret;
+
+	if (test_bit(WDT_IN_USE, &wdt_status)) {
+		orion_wdt_disable();
+		clear_bit(WDT_IN_USE, &wdt_status);
+	}
+
+	ret = misc_deregister(&orion_wdt_miscdev);
+	if (!ret)
+		orion_wdt_miscdev.parent = NULL;
+
+	return ret;
+}
+
+static void orion_wdt_shutdown(struct platform_device *pdev)
+{
+	if (test_bit(WDT_IN_USE, &wdt_status))
+		orion_wdt_disable();
+}
+
+static struct platform_driver orion_wdt_driver = {
+	.probe		= orion_wdt_probe,
+	.remove		= __devexit_p(orion_wdt_remove),
+	.shutdown	= orion_wdt_shutdown,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "orion_wdt",
+	},
+};
+
+static int __init orion_wdt_init(void)
+{
+	spin_lock_init(&wdt_lock);
+	return platform_driver_register(&orion_wdt_driver);
+}
+
+static void __exit orion_wdt_exit(void)
+{
+	platform_driver_unregister(&orion_wdt_driver);
+}
+
+module_init(orion_wdt_init);
+module_exit(orion_wdt_exit);
+
+MODULE_AUTHOR("Sylver Bruneau <sylver.bruneau@googlemail.com>");
+MODULE_DESCRIPTION("Orion Processor Watchdog");
+
+module_param(heartbeat, int, 0);
+MODULE_PARM_DESC(heartbeat, "Initial watchdog heartbeat in seconds");
+
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
-- 
cgit v0.10.2


From 9ffbe87370403d6d5c4010f5b0f692f9d0715776 Mon Sep 17 00:00:00 2001
From: Imre Kaloz <kaloz@openwrt.org>
Date: Tue, 2 Jun 2009 14:31:43 +0200
Subject: [ARM] orion5x: WNR854T switch support

This patch adds support for the switch found on the Netgear
WNR854T router.

Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-orion5x/wnr854t-setup.c b/arch/arm/mach-orion5x/wnr854t-setup.c
index 7ddc22c..6920821 100644
--- a/arch/arm/mach-orion5x/wnr854t-setup.c
+++ b/arch/arm/mach-orion5x/wnr854t-setup.c
@@ -15,6 +15,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ethtool.h>
+#include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/gpio.h>
 #include <asm/mach/arch.h>
@@ -97,6 +98,20 @@ static struct mv643xx_eth_platform_data wnr854t_eth_data = {
 	.duplex		= DUPLEX_FULL,
 };
 
+static struct dsa_chip_data wnr854t_switch_chip_data = {
+	.port_names[0] = "lan3",
+	.port_names[1] = "lan4",
+	.port_names[2] = "wan",
+	.port_names[3] = "cpu",
+	.port_names[5] = "lan1",
+	.port_names[7] = "lan2",
+};
+
+static struct dsa_platform_data wnr854t_switch_plat_data = {
+	.nr_chips	= 1,
+	.chip		= &wnr854t_switch_chip_data,
+};
+
 static void __init wnr854t_init(void)
 {
 	/*
@@ -110,6 +125,7 @@ static void __init wnr854t_init(void)
 	 * Configure peripherals.
 	 */
 	orion5x_eth_init(&wnr854t_eth_data);
+	orion5x_eth_switch_init(&wnr854t_switch_plat_data, NO_IRQ);
 	orion5x_uart0_init();
 
 	orion5x_setup_dev_boot_win(WNR854T_NOR_BOOT_BASE,
-- 
cgit v0.10.2


From 3a8f744169ebcb0064c46a755d9e3e27233f048a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Thu, 7 May 2009 22:59:24 +0200
Subject: [ARM] orion5x: add sram support for crypto

The security accelerator which can act as a puppet player for the crypto
engine requires its commands in the sram. This patch adds support for the
phys mapping and creates a platform device for the actual driver.

[ nico: renamed device name from "mv,orion5x-crypto" to "mv_crypto"
  so to match the module name and be more generic for Kirkwood use ]

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-orion5x/addr-map.c b/arch/arm/mach-orion5x/addr-map.c
index c14d121..6f3f77d 100644
--- a/arch/arm/mach-orion5x/addr-map.c
+++ b/arch/arm/mach-orion5x/addr-map.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/mbus.h>
 #include <linux/io.h>
+#include <linux/errno.h>
 #include <mach/hardware.h>
 #include "common.h"
 
@@ -44,6 +45,7 @@
 #define TARGET_DEV_BUS		1
 #define TARGET_PCI		3
 #define TARGET_PCIE		4
+#define TARGET_SRAM		9
 #define ATTR_PCIE_MEM		0x59
 #define ATTR_PCIE_IO		0x51
 #define ATTR_PCIE_WA		0x79
@@ -53,6 +55,7 @@
 #define ATTR_DEV_CS1		0x1d
 #define ATTR_DEV_CS2		0x1b
 #define ATTR_DEV_BOOT		0xf
+#define ATTR_SRAM		0x0
 
 /*
  * Helpers to get DDR bank info
@@ -87,13 +90,13 @@ static int __init orion5x_cpu_win_can_remap(int win)
 	return 0;
 }
 
-static void __init setup_cpu_win(int win, u32 base, u32 size,
+static int __init setup_cpu_win(int win, u32 base, u32 size,
 				 u8 target, u8 attr, int remap)
 {
 	if (win >= 8) {
 		printk(KERN_ERR "setup_cpu_win: trying to allocate "
 				"window %d\n", win);
-		return;
+		return -ENOSPC;
 	}
 
 	writel(base & 0xffff0000, CPU_WIN_BASE(win));
@@ -107,6 +110,7 @@ static void __init setup_cpu_win(int win, u32 base, u32 size,
 		writel(remap & 0xffff0000, CPU_WIN_REMAP_LO(win));
 		writel(0, CPU_WIN_REMAP_HI(win));
 	}
+	return 0;
 }
 
 void __init orion5x_setup_cpu_mbus_bridge(void)
@@ -193,3 +197,9 @@ void __init orion5x_setup_pcie_wa_win(u32 base, u32 size)
 	setup_cpu_win(win_alloc_count++, base, size,
 		      TARGET_PCIE, ATTR_PCIE_WA, -1);
 }
+
+int __init orion5x_setup_sram_win(void)
+{
+	return setup_cpu_win(win_alloc_count, ORION5X_SRAM_PHYS_BASE,
+			ORION5X_SRAM_SIZE, TARGET_SRAM, ATTR_SRAM, -1);
+}
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index c3e2bea..eafcc49 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -536,6 +536,42 @@ void __init orion5x_xor_init(void)
 	platform_device_register(&orion5x_xor1_channel);
 }
 
+static struct resource orion5x_crypto_res[] = {
+	{
+		.name   = "regs",
+		.start  = ORION5X_CRYPTO_PHYS_BASE,
+		.end    = ORION5X_CRYPTO_PHYS_BASE + 0xffff,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name   = "sram",
+		.start  = ORION5X_SRAM_PHYS_BASE,
+		.end    = ORION5X_SRAM_PHYS_BASE + SZ_8K - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name   = "crypto interrupt",
+		.start  = IRQ_ORION5X_CESA,
+		.end    = IRQ_ORION5X_CESA,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device orion5x_crypto_device = {
+	.name           = "mv_crypto",
+	.id             = -1,
+	.num_resources  = ARRAY_SIZE(orion5x_crypto_res),
+	.resource       = orion5x_crypto_res,
+};
+
+int __init orion5x_crypto_init(void)
+{
+	int ret;
+
+	ret = orion5x_setup_sram_win();
+	if (ret)
+		return ret;
+
+	return platform_device_register(&orion5x_crypto_device);
+}
 
 /*****************************************************************************
  * Watchdog
diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
index 798b9a5..de483e8 100644
--- a/arch/arm/mach-orion5x/common.h
+++ b/arch/arm/mach-orion5x/common.h
@@ -26,6 +26,7 @@ void orion5x_setup_dev0_win(u32 base, u32 size);
 void orion5x_setup_dev1_win(u32 base, u32 size);
 void orion5x_setup_dev2_win(u32 base, u32 size);
 void orion5x_setup_pcie_wa_win(u32 base, u32 size);
+int orion5x_setup_sram_win(void);
 
 void orion5x_ehci0_init(void);
 void orion5x_ehci1_init(void);
@@ -37,6 +38,7 @@ void orion5x_spi_init(void);
 void orion5x_uart0_init(void);
 void orion5x_uart1_init(void);
 void orion5x_xor_init(void);
+int orion5x_crypto_init(void);
 
 /*
  * PCIe/PCI functions.
diff --git a/arch/arm/mach-orion5x/include/mach/orion5x.h b/arch/arm/mach-orion5x/include/mach/orion5x.h
index 377a773..2d87665 100644
--- a/arch/arm/mach-orion5x/include/mach/orion5x.h
+++ b/arch/arm/mach-orion5x/include/mach/orion5x.h
@@ -24,6 +24,7 @@
  * f1000000	on-chip peripheral registers
  * f2000000	PCIe I/O space
  * f2100000	PCI I/O space
+ * f2200000	SRAM dedicated for the crypto unit
  * f4000000	device bus mappings (boot)
  * fa000000	device bus mappings (cs0)
  * fa800000	device bus mappings (cs2)
@@ -49,6 +50,9 @@
 #define ORION5X_PCI_IO_BUS_BASE		0x00100000
 #define ORION5X_PCI_IO_SIZE		SZ_1M
 
+#define ORION5X_SRAM_PHYS_BASE		(0xf2200000)
+#define ORION5X_SRAM_SIZE		SZ_8K
+
 /* Relevant only for Orion-1/Orion-NAS */
 #define ORION5X_PCIE_WA_PHYS_BASE	0xf0000000
 #define ORION5X_PCIE_WA_VIRT_BASE	0xfe000000
@@ -94,6 +98,8 @@
 #define ORION5X_SATA_PHYS_BASE		(ORION5X_REGS_PHYS_BASE | 0x80000)
 #define ORION5X_SATA_VIRT_BASE		(ORION5X_REGS_VIRT_BASE | 0x80000)
 
+#define ORION5X_CRYPTO_PHYS_BASE	(ORION5X_REGS_PHYS_BASE | 0x90000)
+
 #define ORION5X_USB1_PHYS_BASE		(ORION5X_REGS_PHYS_BASE | 0xa0000)
 #define ORION5X_USB1_VIRT_BASE		(ORION5X_REGS_VIRT_BASE | 0xa0000)
 
-- 
cgit v0.10.2


From fc63b7239a9c939f38450620f773d057b037976b Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 2 Jun 2009 21:51:14 -0400
Subject: [ARM] Kirkwood: let's use real size for resources

We don't have to define resources to the minimal physical window size
as setup_cpu_win() will cope with smaller sizes already.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
index f49a29b..00be8c5 100644
--- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h
+++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
@@ -27,9 +27,7 @@
  */
 
 #define KIRKWOOD_NAND_MEM_PHYS_BASE	0xf3000000
-#define KIRKWOOD_NAND_MEM_SIZE		SZ_64K /* 1K is sufficient, but 64K
-						* is the minimal window size
-						*/
+#define KIRKWOOD_NAND_MEM_SIZE		SZ_1K
 
 #define KIRKWOOD_PCIE_IO_PHYS_BASE	0xf2000000
 #define KIRKWOOD_PCIE_IO_VIRT_BASE	0xfef00000
-- 
cgit v0.10.2


From c1191b0e3b5fc080e0b09859024f39c4a8c5d4d5 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 2 Jun 2009 21:43:45 -0400
Subject: [ARM] Kirkwood: create a mapping for the Security Accelerator SRAM

Always creating the physical mapping should do no harm, so let's remove
the interface that was provided for its optional creation and make the
mapping static.

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/include/asm/sizes.h b/arch/arm/include/asm/sizes.h
index ada93a8..4fc1565 100644
--- a/arch/arm/include/asm/sizes.h
+++ b/arch/arm/include/asm/sizes.h
@@ -29,6 +29,7 @@
 #define SZ_512				0x00000200
 
 #define SZ_1K                           0x00000400
+#define SZ_2K                           0x00000800
 #define SZ_4K                           0x00001000
 #define SZ_8K                           0x00002000
 #define SZ_16K                          0x00004000
diff --git a/arch/arm/mach-kirkwood/addr-map.c b/arch/arm/mach-kirkwood/addr-map.c
index 5db4f0b..1da5d1c 100644
--- a/arch/arm/mach-kirkwood/addr-map.c
+++ b/arch/arm/mach-kirkwood/addr-map.c
@@ -20,6 +20,7 @@
  */
 #define TARGET_DDR		0
 #define TARGET_DEV_BUS		1
+#define TARGET_SRAM		3
 #define TARGET_PCIE		4
 #define ATTR_DEV_SPI_ROM	0x1e
 #define ATTR_DEV_BOOT		0x1d
@@ -30,6 +31,7 @@
 #define ATTR_DEV_CS0		0x3e
 #define ATTR_PCIE_IO		0xe0
 #define ATTR_PCIE_MEM		0xe8
+#define ATTR_SRAM		0x01
 
 /*
  * Helpers to get DDR bank info
@@ -48,7 +50,6 @@
 
 
 struct mbus_dram_target_info kirkwood_mbus_dram_info;
-static int __initdata win_alloc_count;
 
 static int __init cpu_win_can_remap(int win)
 {
@@ -112,7 +113,11 @@ void __init kirkwood_setup_cpu_mbus(void)
 	setup_cpu_win(2, KIRKWOOD_NAND_MEM_PHYS_BASE, KIRKWOOD_NAND_MEM_SIZE,
 		      TARGET_DEV_BUS, ATTR_DEV_NAND, -1);
 
-	win_alloc_count = 3;
+	/*
+	 * Setup window for SRAM.
+	 */
+	setup_cpu_win(3, KIRKWOOD_SRAM_PHYS_BASE, KIRKWOOD_SRAM_SIZE,
+		      TARGET_SRAM, ATTR_SRAM, -1);
 
 	/*
 	 * Setup MBUS dram target info.
@@ -140,8 +145,3 @@ void __init kirkwood_setup_cpu_mbus(void)
 	}
 	kirkwood_mbus_dram_info.num_cs = cs;
 }
-
-void __init kirkwood_setup_sram_win(u32 base, u32 size)
-{
-	setup_cpu_win(win_alloc_count++, base, size, 0x03, 0x00, -1);
-}
diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h
index 9de5256..d7de434 100644
--- a/arch/arm/mach-kirkwood/common.h
+++ b/arch/arm/mach-kirkwood/common.h
@@ -26,7 +26,6 @@ void kirkwood_init_irq(void);
 
 extern struct mbus_dram_target_info kirkwood_mbus_dram_info;
 void kirkwood_setup_cpu_mbus(void);
-void kirkwood_setup_sram_win(u32 base, u32 size);
 
 void kirkwood_pcie_id(u32 *dev, u32 *rev);
 
diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
index 00be8c5..2172224 100644
--- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h
+++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
@@ -20,12 +20,16 @@
  * f1000000	on-chip peripheral registers
  * f2000000	PCIe I/O space
  * f3000000	NAND controller address window
+ * f4000000	Security Accelerator SRAM
  *
  * virt		phys		size
  * fee00000	f1000000	1M	on-chip peripheral registers
  * fef00000	f2000000	1M	PCIe I/O space
  */
 
+#define KIRKWOOD_SRAM_PHYS_BASE		0xf4000000
+#define KIRKWOOD_SRAM_SIZE		SZ_2K
+
 #define KIRKWOOD_NAND_MEM_PHYS_BASE	0xf3000000
 #define KIRKWOOD_NAND_MEM_SIZE		SZ_1K
 
-- 
cgit v0.10.2


From ae5c8c83735f5fcb09b380944e4854a383006998 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 3 Jun 2009 15:24:36 -0400
Subject: [ARM] Kirkwood: platform device registration for the crypto engine

Signed-off-by: Nicolas Pitre <nico@marvell.com>

diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 3da32b1..0f69198 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -561,6 +561,43 @@ void __init kirkwood_uart1_init(void)
 
 
 /*****************************************************************************
+ * Cryptographic Engines and Security Accelerator (CESA)
+ ****************************************************************************/
+
+static struct resource kirkwood_crypto_res[] = {
+	{
+		.name   = "regs",
+		.start  = CRYPTO_PHYS_BASE,
+		.end    = CRYPTO_PHYS_BASE + 0xffff,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name   = "sram",
+		.start  = KIRKWOOD_SRAM_PHYS_BASE,
+		.end    = KIRKWOOD_SRAM_PHYS_BASE + KIRKWOOD_SRAM_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name   = "crypto interrupt",
+		.start  = IRQ_KIRKWOOD_CRYPTO,
+		.end    = IRQ_KIRKWOOD_CRYPTO,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device kirkwood_crypto_device = {
+	.name           = "mv_crypto",
+	.id             = -1,
+	.num_resources  = ARRAY_SIZE(kirkwood_crypto_res),
+	.resource       = kirkwood_crypto_res,
+};
+
+void __init kirkwood_crypto_init(void)
+{
+	kirkwood_clk_ctrl |= CGC_CRYPTO;
+	platform_device_register(&kirkwood_crypto_device);
+}
+
+
+/*****************************************************************************
  * XOR
  ****************************************************************************/
 static struct mv_xor_platform_shared_data kirkwood_xor_shared_data = {
@@ -886,6 +923,7 @@ void __init kirkwood_init(void)
 	kirkwood_wdt_init();
 	kirkwood_xor0_init();
 	kirkwood_xor1_init();
+	kirkwood_crypto_init();
 }
 
 static int __init kirkwood_clock_gate(void)
diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
index 2172224..07af858 100644
--- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h
+++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h
@@ -66,6 +66,8 @@
 
 #define BRIDGE_VIRT_BASE	(KIRKWOOD_REGS_VIRT_BASE | 0x20000)
 
+#define CRYPTO_PHYS_BASE	(KIRKWOOD_REGS_PHYS_BASE | 0x30000)
+
 #define PCIE_VIRT_BASE		(KIRKWOOD_REGS_VIRT_BASE | 0x40000)
 #define PCIE_LINK_CTRL		(PCIE_VIRT_BASE | 0x70)
 #define PCIE_STATUS		(PCIE_VIRT_BASE | 0x1a04)
-- 
cgit v0.10.2


From 92a43793a908f395dc687e6c5fc90d63f999d6d5 Mon Sep 17 00:00:00 2001
From: Dan Allongo <gongo2k1@gmail.com>
Date: Mon, 8 Jun 2009 11:21:52 -0400
Subject: ALSA: usb - Add boot quirk for C-Media 6206 USB Audio

Added boot quirk for C-Media CM6206 device in snd_usb_audio_probe.
The function snd_usb_cm6206_boot_quirk sets up six internal 16-bit
registers in order to initialize the device. Values for the registers
came from sniffing USB traffic under Windows since only four of the six
are documented in the datasheet for CM106 and some reserved bits were
also being set.

[Minor coding-style fixes by tiwai]

Signed-off-by: Dan Allongo <gongo2k1@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index a6b8848..e871217 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -3279,6 +3279,25 @@ static int snd_usb_cm106_boot_quirk(struct usb_device *dev)
 	return snd_usb_cm106_write_int_reg(dev, 2, 0x8004);
 }
 
+/*
+ * C-Media CM6206 is based on CM106 with two additional
+ * registers that are not documented in the data sheet.
+ * Values here are chosen based on sniffing USB traffic
+ * under Windows.
+ */
+static int snd_usb_cm6206_boot_quirk(struct usb_device *dev)
+{
+	int err, reg;
+	int val[] = {0x200c, 0x3000, 0xf800, 0x143f, 0x0000, 0x3000};
+
+	for (reg = 0; reg < ARRAY_SIZE(val); reg++) {
+		err = snd_usb_cm106_write_int_reg(dev, reg, val[reg]);
+		if (err < 0)
+			return err;
+	}
+
+	return err;
+}
 
 /*
  * Setup quirks
@@ -3565,6 +3584,12 @@ static void *snd_usb_audio_probe(struct usb_device *dev,
 			goto __err_val;
 	}
 
+	/* C-Media CM6206 / CM106-Like Sound Device */
+	if (id == USB_ID(0x0d8c, 0x0102)) {
+		if (snd_usb_cm6206_boot_quirk(dev) < 0)
+			goto __err_val;
+	}
+
 	/*
 	 * found a config.  now register to ALSA
 	 */
-- 
cgit v0.10.2


From c9690998ef48ffefeccb91c70a7739eebdea57f9 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Mon, 8 Jun 2009 19:09:39 +0200
Subject: x86: memtest: remove 64-bit division

Using gcc 3.3.5 a "make allmodconfig" + "CONFIG_KVM=n"
triggers a build error:

 arch/x86/mm/built-in.o(.init.text+0x43f7): In function `__change_page_attr':
 arch/x86/mm/pageattr.c:114: undefined reference to `__udivdi3'
 make: *** [.tmp_vmlinux1] Error 1

The culprit turned out to be a division in arch/x86/mm/memtest.c
For more info see this thread:

  http://marc.info/?l=linux-kernel&m=124416232620683

The patch entirely removes the division that caused the build
error.

[ Impact: build fix with certain GCC versions ]

Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: xiyou.wangcong@gmail.com
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@kernel.org>
LKML-Reference: <20090608170939.GB12431@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 605c8be..c0bedcd 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -40,23 +40,23 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
 
 static void __init memtest(u64 pattern, u64 start_phys, u64 size)
 {
-	u64 i, count;
-	u64 *start;
+	u64 *p;
+	void *start, *end;
 	u64 start_bad, last_bad;
 	u64 start_phys_aligned;
 	size_t incr;
 
 	incr = sizeof(pattern);
 	start_phys_aligned = ALIGN(start_phys, incr);
-	count = (size - (start_phys_aligned - start_phys))/incr;
 	start = __va(start_phys_aligned);
+	end = start + size - (start_phys_aligned - start_phys);
 	start_bad = 0;
 	last_bad = 0;
 
-	for (i = 0; i < count; i++)
-		start[i] = pattern;
-	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
-		if (*start == pattern)
+	for (p = start; p < end; p++)
+		*p = pattern;
+	for (p = start; p < end; p++, start_phys_aligned += incr) {
+		if (*p == pattern)
 			continue;
 		if (start_phys_aligned == last_bad + incr) {
 			last_bad += incr;
-- 
cgit v0.10.2


From 477e608c03eb2f561a23994bee38a32a9fd3357d Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 27 Apr 2009 20:54:22 +0200
Subject: [SCSI] fix documentation for two functions

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 166417a..2de5f3a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1225,8 +1225,8 @@ EXPORT_SYMBOL(__scsi_device_lookup_by_target);
  * @starget:	SCSI target pointer
  * @lun:	SCSI Logical Unit Number
  *
- * Description: Looks up the scsi_device with the specified @channel, @id, @lun
- * for a given host.  The returned scsi_device has an additional reference that
+ * Description: Looks up the scsi_device with the specified @lun for a given
+ * @starget.  The returned scsi_device has an additional reference that
  * needs to be released with scsi_device_put once you're done with it.
  **/
 struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget,
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 0c2c73b..a95d2ba 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -641,9 +641,9 @@ EXPORT_SYMBOL(scsi_eh_prep_cmnd);
 /**
  * scsi_eh_restore_cmnd  - Restore a scsi command info as part of error recory
  * @scmd:       SCSI command structure to restore
- * @ses:        saved information from a coresponding call to scsi_prep_eh_cmnd
+ * @ses:        saved information from a coresponding call to scsi_eh_prep_cmnd
  *
- * Undo any damage done by above scsi_prep_eh_cmnd().
+ * Undo any damage done by above scsi_eh_prep_cmnd().
  */
 void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
 {
-- 
cgit v0.10.2


From 91bc31fb3bae4e55832c7c39d4f9c193285e6ab2 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 17 May 2009 09:30:48 -0500
Subject: [SCSI] fix up scsi_eh_lock_door()

The Documentation is incorrect (we removed some functions referred to), and
none of the bug warnings now apply.  Additionally remove the spurious check on
the return from blk_get_request() which can't fail if __GFP_WAIT is passed in.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a95d2ba..a168935 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1451,28 +1451,21 @@ static void eh_lock_door_done(struct request *req, int uptodate)
  * @sdev:	SCSI device to prevent medium removal
  *
  * Locking:
- * 	We must be called from process context; scsi_allocate_request()
- * 	may sleep.
+ * 	We must be called from process context.
  *
  * Notes:
  * 	We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
  * 	head of the devices request queue, and continue.
- *
- * Bugs:
- * 	scsi_allocate_request() may sleep waiting for existing requests to
- * 	be processed.  However, since we haven't kicked off any request
- * 	processing for this host, this may deadlock.
- *
- *	If scsi_allocate_request() fails for what ever reason, we
- *	completely forget to lock the door.
  */
 static void scsi_eh_lock_door(struct scsi_device *sdev)
 {
 	struct request *req;
 
+	/*
+	 * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a
+	 * request becomes available
+	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
-	if (!req)
-		return;
 
 	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	req->cmd[1] = 0;
-- 
cgit v0.10.2


From 601e7638254c118fca135af9b1a9f35061420f62 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Tue, 26 May 2009 20:35:48 +0000
Subject: [SCSI] sd: fix bug in SCSI async probing

The async split up of probing in sd.c created a potential failure case where
something goes wrong with device_add(), but which we don't recover properly.
Since, in general, asynchronous error handling is hard, move the device_add()
into the asynchronous path (it should be fast) and make sure all the deferred
processing cannot fail.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8404423..d8e1d15 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1902,24 +1902,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 	index = sdkp->index;
 	dev = &sdp->sdev_gendev;
 
-	if (!sdp->request_queue->rq_timeout) {
-		if (sdp->type != TYPE_MOD)
-			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
-		else
-			blk_queue_rq_timeout(sdp->request_queue,
-					     SD_MOD_TIMEOUT);
-	}
-
-	device_initialize(&sdkp->dev);
-	sdkp->dev.parent = &sdp->sdev_gendev;
-	sdkp->dev.class = &sd_disk_class;
-	dev_set_name(&sdkp->dev, dev_name(&sdp->sdev_gendev));
-
-	if (device_add(&sdkp->dev))
-		goto out_free_index;
-
-	get_device(&sdp->sdev_gendev);
-
 	if (index < SD_MAX_DISKS) {
 		gd->major = sd_major((index & 0xf0) >> 4);
 		gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
@@ -1954,11 +1936,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 
 	sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
 		  sdp->removable ? "removable " : "");
-
-	return;
-
- out_free_index:
-	ida_remove(&sd_index_ida, index);
 }
 
 /**
@@ -2026,6 +2003,24 @@ static int sd_probe(struct device *dev)
 	sdkp->openers = 0;
 	sdkp->previous_state = 1;
 
+	if (!sdp->request_queue->rq_timeout) {
+		if (sdp->type != TYPE_MOD)
+			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
+		else
+			blk_queue_rq_timeout(sdp->request_queue,
+					     SD_MOD_TIMEOUT);
+	}
+
+	device_initialize(&sdkp->dev);
+	sdkp->dev.parent = &sdp->sdev_gendev;
+	sdkp->dev.class = &sd_disk_class;
+	dev_set_name(&sdkp->dev, dev_name(&sdp->sdev_gendev));
+
+	if (device_add(&sdkp->dev))
+		goto out_free_index;
+
+	get_device(&sdp->sdev_gendev);
+
 	async_schedule(sd_probe_async, sdkp);
 
 	return 0;
@@ -2055,8 +2050,10 @@ static int sd_probe(struct device *dev)
  **/
 static int sd_remove(struct device *dev)
 {
-	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_disk *sdkp;
 
+	async_synchronize_full();
+	sdkp = dev_get_drvdata(dev);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
-- 
cgit v0.10.2


From 4a2837d4fcaf8a2c2ad61523287073d0c14b9ed0 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:22 -0500
Subject: [SCSI] ibmvfc: Fix invalid error response handling

Fix an obvious bug in processing error responses for SCSI commands
which can result in successful responses being incorrectly returned
with DID_ERROR.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ea4abee..879c511 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -275,7 +275,7 @@ static int ibmvfc_get_err_result(struct ibmvfc_cmd *vfc_cmd)
 	int fc_rsp_len = rsp->fcp_rsp_len;
 
 	if ((rsp->flags & FCP_RSP_LEN_VALID) &&
-	    ((!fc_rsp_len && fc_rsp_len != 4 && fc_rsp_len != 8) ||
+	    ((fc_rsp_len && fc_rsp_len != 4 && fc_rsp_len != 8) ||
 	     rsp->data.info.rsp_code))
 		return DID_ERROR << 16;
 
-- 
cgit v0.10.2


From 7270b9bde5f382e730e1ef69d6c1b34d388df2b0 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:24 -0500
Subject: [SCSI] ibmvfc: Fixup GFP flags for target allocations

Since target allocations can occur while resetting the virtual adapter,
we shouldn't be using GFP_KERNEL for them as it could hang. Switch to
use GFP_NOIO.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 879c511..c450a34 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3420,7 +3420,7 @@ static int ibmvfc_alloc_target(struct ibmvfc_host *vhost, u64 scsi_id)
 	}
 	spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
-	tgt = mempool_alloc(vhost->tgt_pool, GFP_KERNEL);
+	tgt = mempool_alloc(vhost->tgt_pool, GFP_NOIO);
 	if (!tgt) {
 		dev_err(vhost->dev, "Target allocation failure for scsi id %08llx\n",
 			scsi_id);
-- 
cgit v0.10.2


From 85e2399e925e0afa04dd6e185a910bdd3dc4626b Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:25 -0500
Subject: [SCSI] ibmvfc: Use DEVICE_ATTR macro

Use DEVICE_ATTR macro for defining device sysfs attributes.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index c450a34..26abed2 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2434,14 +2434,6 @@ static ssize_t ibmvfc_show_host_partition_name(struct device *dev,
 			vhost->login_buf->resp.partition_name);
 }
 
-static struct device_attribute ibmvfc_host_partition_name = {
-	.attr = {
-		.name = "partition_name",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_partition_name,
-};
-
 static ssize_t ibmvfc_show_host_device_name(struct device *dev,
 					    struct device_attribute *attr, char *buf)
 {
@@ -2452,14 +2444,6 @@ static ssize_t ibmvfc_show_host_device_name(struct device *dev,
 			vhost->login_buf->resp.device_name);
 }
 
-static struct device_attribute ibmvfc_host_device_name = {
-	.attr = {
-		.name = "device_name",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_device_name,
-};
-
 static ssize_t ibmvfc_show_host_loc_code(struct device *dev,
 					 struct device_attribute *attr, char *buf)
 {
@@ -2470,14 +2454,6 @@ static ssize_t ibmvfc_show_host_loc_code(struct device *dev,
 			vhost->login_buf->resp.port_loc_code);
 }
 
-static struct device_attribute ibmvfc_host_loc_code = {
-	.attr = {
-		.name = "port_loc_code",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_loc_code,
-};
-
 static ssize_t ibmvfc_show_host_drc_name(struct device *dev,
 					 struct device_attribute *attr, char *buf)
 {
@@ -2488,14 +2464,6 @@ static ssize_t ibmvfc_show_host_drc_name(struct device *dev,
 			vhost->login_buf->resp.drc_name);
 }
 
-static struct device_attribute ibmvfc_host_drc_name = {
-	.attr = {
-		.name = "drc_name",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_drc_name,
-};
-
 static ssize_t ibmvfc_show_host_npiv_version(struct device *dev,
 					     struct device_attribute *attr, char *buf)
 {
@@ -2504,14 +2472,6 @@ static ssize_t ibmvfc_show_host_npiv_version(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", vhost->login_buf->resp.version);
 }
 
-static struct device_attribute ibmvfc_host_npiv_version = {
-	.attr = {
-		.name = "npiv_version",
-		.mode = S_IRUGO,
-	},
-	.show = ibmvfc_show_host_npiv_version,
-};
-
 /**
  * ibmvfc_show_log_level - Show the adapter's error logging level
  * @dev:	class device struct
@@ -2556,14 +2516,13 @@ static ssize_t ibmvfc_store_log_level(struct device *dev,
 	return strlen(buf);
 }
 
-static struct device_attribute ibmvfc_log_level_attr = {
-	.attr = {
-		.name =		"log_level",
-		.mode =		S_IRUGO | S_IWUSR,
-	},
-	.show = ibmvfc_show_log_level,
-	.store = ibmvfc_store_log_level
-};
+static DEVICE_ATTR(partition_name, S_IRUGO, ibmvfc_show_host_partition_name, NULL);
+static DEVICE_ATTR(device_name, S_IRUGO, ibmvfc_show_host_device_name, NULL);
+static DEVICE_ATTR(port_loc_code, S_IRUGO, ibmvfc_show_host_loc_code, NULL);
+static DEVICE_ATTR(drc_name, S_IRUGO, ibmvfc_show_host_drc_name, NULL);
+static DEVICE_ATTR(npiv_version, S_IRUGO, ibmvfc_show_host_npiv_version, NULL);
+static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
+		   ibmvfc_show_log_level, ibmvfc_store_log_level);
 
 #ifdef CONFIG_SCSI_IBMVFC_TRACE
 /**
@@ -2612,12 +2571,12 @@ static struct bin_attribute ibmvfc_trace_attr = {
 #endif
 
 static struct device_attribute *ibmvfc_attrs[] = {
-	&ibmvfc_host_partition_name,
-	&ibmvfc_host_device_name,
-	&ibmvfc_host_loc_code,
-	&ibmvfc_host_drc_name,
-	&ibmvfc_host_npiv_version,
-	&ibmvfc_log_level_attr,
+	&dev_attr_partition_name,
+	&dev_attr_device_name,
+	&dev_attr_port_loc_code,
+	&dev_attr_drc_name,
+	&dev_attr_npiv_version,
+	&dev_attr_log_level,
 	NULL
 };
 
-- 
cgit v0.10.2


From 7d0e462247241b8ec2d377306203b58c7f423553 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:26 -0500
Subject: [SCSI] ibmvfc: Reduce error logging noise

The ibmvfc driver currently logs errors during discovery for several
transient fabric errors, which generally get retried. If retries
do not work, we see multiple errors in the log. If retries do work,
we see errors in the log which may be confusing since the retry worked.
This patch enhances the discovery time error logging to only log errors
for command failures during discovery if all allowed retries have been
used up. The existing behavior of logging all failures can be restored
by setting the hosts log_level to a value of 3 or greater.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 26abed2..182c8e7 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -842,9 +842,13 @@ static void ibmvfc_reset_host(struct ibmvfc_host *vhost)
  * ibmvfc_retry_host_init - Retry host initialization if allowed
  * @vhost:	ibmvfc host struct
  *
+ * Returns: 1 if init will be retried / 0 if not
+ *
  **/
-static void ibmvfc_retry_host_init(struct ibmvfc_host *vhost)
+static int ibmvfc_retry_host_init(struct ibmvfc_host *vhost)
 {
+	int retry = 0;
+
 	if (vhost->action == IBMVFC_HOST_ACTION_INIT_WAIT) {
 		vhost->delay_init = 1;
 		if (++vhost->init_retries > IBMVFC_MAX_HOST_INIT_RETRIES) {
@@ -853,11 +857,14 @@ static void ibmvfc_retry_host_init(struct ibmvfc_host *vhost)
 			ibmvfc_link_down(vhost, IBMVFC_HOST_OFFLINE);
 		} else if (vhost->init_retries == IBMVFC_MAX_HOST_INIT_RETRIES)
 			__ibmvfc_reset_host(vhost);
-		else
+		else {
 			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
+			retry = 1;
+		}
 	}
 
 	wake_up(&vhost->work_wait_q);
+	return retry;
 }
 
 /**
@@ -2733,15 +2740,19 @@ static void ibmvfc_init_tgt(struct ibmvfc_target *tgt,
  * @tgt:		ibmvfc target struct
  * @job_step:	initialization job step
  *
+ * Returns: 1 if step will be retried / 0 if not
+ *
  **/
-static void ibmvfc_retry_tgt_init(struct ibmvfc_target *tgt,
+static int ibmvfc_retry_tgt_init(struct ibmvfc_target *tgt,
 				  void (*job_step) (struct ibmvfc_target *))
 {
 	if (++tgt->init_retries > IBMVFC_MAX_TGT_INIT_RETRIES) {
 		ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		wake_up(&tgt->vhost->work_wait_q);
+		return 0;
 	} else
 		ibmvfc_init_tgt(tgt, job_step);
+	return 1;
 }
 
 /* Defined in FC-LS */
@@ -2790,7 +2801,7 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt)
 	struct ibmvfc_process_login *rsp = &evt->xfer_iu->prli;
 	struct ibmvfc_prli_svc_parms *parms = &rsp->parms;
 	u32 status = rsp->common.status;
-	int index;
+	int index, level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	vhost->discovery_threads--;
 	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
@@ -2826,13 +2837,14 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt)
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt_err(tgt, "Process Login failed: %s (%x:%x) rc=0x%02X\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error),
-			rsp->status, rsp->error, status);
 		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_prli);
+			level += ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_prli);
 		else
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+
+		tgt_log(tgt, level, "Process Login failed: %s (%x:%x) rc=0x%02X\n",
+			ibmvfc_get_cmd_error(rsp->status, rsp->error),
+			rsp->status, rsp->error, status);
 		break;
 	};
 
@@ -2891,6 +2903,7 @@ static void ibmvfc_tgt_plogi_done(struct ibmvfc_event *evt)
 	struct ibmvfc_host *vhost = evt->vhost;
 	struct ibmvfc_port_login *rsp = &evt->xfer_iu->plogi;
 	u32 status = rsp->common.status;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	vhost->discovery_threads--;
 	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
@@ -2919,15 +2932,15 @@ static void ibmvfc_tgt_plogi_done(struct ibmvfc_event *evt)
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt_err(tgt, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
-			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
-			ibmvfc_get_ls_explain(rsp->fc_explain), rsp->fc_explain, status);
-
 		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_plogi);
+			level += ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_plogi);
 		else
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+
+		tgt_log(tgt, level, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
+			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
+			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
+			ibmvfc_get_ls_explain(rsp->fc_explain), rsp->fc_explain, status);
 		break;
 	};
 
@@ -3281,6 +3294,7 @@ static void ibmvfc_tgt_query_target_done(struct ibmvfc_event *evt)
 	struct ibmvfc_host *vhost = evt->vhost;
 	struct ibmvfc_query_tgt *rsp = &evt->xfer_iu->query_tgt;
 	u32 status = rsp->common.status;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	vhost->discovery_threads--;
 	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
@@ -3300,19 +3314,19 @@ static void ibmvfc_tgt_query_target_done(struct ibmvfc_event *evt)
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt_err(tgt, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
-			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
-			ibmvfc_get_gs_explain(rsp->fc_explain), rsp->fc_explain, status);
-
 		if ((rsp->status & IBMVFC_FABRIC_MAPPED) == IBMVFC_FABRIC_MAPPED &&
 		    rsp->error == IBMVFC_UNABLE_TO_PERFORM_REQ &&
 		    rsp->fc_explain == IBMVFC_PORT_NAME_NOT_REG)
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		else if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_query_target);
+			level += ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_query_target);
 		else
 			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+
+		tgt_log(tgt, level, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
+			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error,
+			ibmvfc_get_fc_type(rsp->fc_type), rsp->fc_type,
+			ibmvfc_get_gs_explain(rsp->fc_explain), rsp->fc_explain, status);
 		break;
 	};
 
@@ -3431,6 +3445,7 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt)
 	struct ibmvfc_host *vhost = evt->vhost;
 	struct ibmvfc_discover_targets *rsp = &evt->xfer_iu->discover_targets;
 	u32 mad_status = rsp->common.status;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	switch (mad_status) {
 	case IBMVFC_MAD_SUCCESS:
@@ -3439,9 +3454,9 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt)
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_ALLOC_TGTS);
 		break;
 	case IBMVFC_MAD_FAILED:
-		dev_err(vhost->dev, "Discover Targets failed: %s (%x:%x)\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
-		ibmvfc_retry_host_init(vhost);
+		level += ibmvfc_retry_host_init(vhost);
+		ibmvfc_log(vhost, level, "Discover Targets failed: %s (%x:%x)\n",
+			   ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
 		break;
 	case IBMVFC_MAD_DRIVER_FAILED:
 		break;
@@ -3493,18 +3508,19 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
 	u32 mad_status = evt->xfer_iu->npiv_login.common.status;
 	struct ibmvfc_npiv_login_resp *rsp = &vhost->login_buf->resp;
 	unsigned int npiv_max_sectors;
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
 	switch (mad_status) {
 	case IBMVFC_MAD_SUCCESS:
 		ibmvfc_free_event(evt);
 		break;
 	case IBMVFC_MAD_FAILED:
-		dev_err(vhost->dev, "NPIV Login failed: %s (%x:%x)\n",
-			ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
 		if (ibmvfc_retry_cmd(rsp->status, rsp->error))
-			ibmvfc_retry_host_init(vhost);
+			level += ibmvfc_retry_host_init(vhost);
 		else
 			ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+		ibmvfc_log(vhost, level, "NPIV Login failed: %s (%x:%x)\n",
+			   ibmvfc_get_cmd_error(rsp->status, rsp->error), rsp->status, rsp->error);
 		ibmvfc_free_event(evt);
 		return;
 	case IBMVFC_MAD_CRQ_ERROR:
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index ca1dcf7..4dac356 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -707,6 +707,12 @@ struct ibmvfc_host {
 #define tgt_err(t, fmt, ...)		\
 	dev_err((t)->vhost->dev, "%llX: " fmt, (t)->scsi_id, ##__VA_ARGS__)
 
+#define tgt_log(t, level, fmt, ...) \
+	do { \
+		if ((t)->vhost->log_level >= level) \
+			tgt_err(t, fmt, ##__VA_ARGS__); \
+	} while (0)
+
 #define ibmvfc_dbg(vhost, ...) \
 	DBG_CMD(dev_info((vhost)->dev, ##__VA_ARGS__))
 
-- 
cgit v0.10.2


From 43c8da907ccc656935d1085701f4db83385d8a59 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:28 -0500
Subject: [SCSI] ibmvfc: Fix deadlock in EH

Fixes the following deadlock scenario shown below. We currently allow
queuecommand to send commands when the ibmvfc workqueue is scanning for
new rports, so we should also allow EH to function at this time as well.

scsi_eh_3     D 0000000000000000 12304  1279      2
Call Trace:
[c0000002f7257730] [c0000002f72577e0] 0xc0000002f72577e0 (unreliable)
[c0000002f7257900] [c0000000000118f4] .__switch_to+0x158/0x1a0
[c0000002f72579a0] [c0000000004f8b40] .schedule+0x8d4/0x9dc
[c0000002f7257b60] [c0000000004f8f08] .schedule_timeout+0xa8/0xe8
[c0000002f7257c50] [d0000000001d23e0] .ibmvfc_wait_while_resetting+0xe4/0x140 [ibmvfc]
[c0000002f7257d20] [d0000000001d3984] .ibmvfc_eh_abort_handler+0x60/0xe4 [ibmvfc]
[c0000002f7257dc0] [d000000000366714] .scsi_error_handler+0x38c/0x674 [scsi_mod]
[c0000002f7257f00] [c0000000000a7470] .kthread+0x78/0xc4
[c0000002f7257f90] [c000000000029b8c] .kernel_thread+0x4c/0x68
ibmvfc_3      D 0000000000000000 12432  1280      2
Call Trace:
[c0000002f7253540] [c0000002f72535f0] 0xc0000002f72535f0 (unreliable)
[c0000002f7253710] [c0000000000118f4] .__switch_to+0x158/0x1a0
[c0000002f72537b0] [c0000000004f8b40] .schedule+0x8d4/0x9dc
[c0000002f7253970] [c0000000004f8e98] .schedule_timeout+0x38/0xe8
[c0000002f7253a60] [c0000000004f80cc] .wait_for_common+0x138/0x220
[c0000002f7253b40] [c0000000000a2784] .flush_cpu_workqueue+0xac/0xcc
[c0000002f7253c10] [c0000000000a2960] .flush_workqueue+0x58/0xa0
[c0000002f7253ca0] [d0000000000827fc] .fc_flush_work+0x4c/0x64 [scsi_transport_fc]
[c0000002f7253d20] [d000000000082db4] .fc_remote_port_add+0x48/0x6c4 [scsi_transport_fc]
[c0000002f7253dd0] [d0000000001d7d04] .ibmvfc_work+0x820/0xa7c [ibmvfc]
[c0000002f7253f00] [c0000000000a7470] .kthread+0x78/0xc4
[c0000002f7253f90] [c000000000029b8c] .kernel_thread+0x4c/0x68
fc_wq_3       D 0000000000000000 10720  1283      2
Call Trace:
[c0000002f559ac30] [c0000002f559ace0] 0xc0000002f559ace0 (unreliable)
[c0000002f559ae00] [c0000000000118f4] .__switch_to+0x158/0x1a0
[c0000002f559aea0] [c0000000004f8b40] .schedule+0x8d4/0x9dc
[c0000002f559b060] [c0000000004f8e98] .schedule_timeout+0x38/0xe8
[c0000002f559b150] [c0000000004f80cc] .wait_for_common+0x138/0x220
[c0000002f559b230] [c0000000002721c4] .blk_execute_rq+0xb4/0x100
[c0000002f559b360] [d00000000036a1f8] .scsi_execute+0x118/0x194 [scsi_mod]
[c0000002f559b420] [d00000000036a32c] .scsi_execute_req+0xb8/0x124 [scsi_mod]
[c0000002f559b500] [d0000000000c1330] .sd_sync_cache+0x8c/0x108 [sd_mod]
[c0000002f559b5e0] [d0000000000c15b4] .sd_shutdown+0x9c/0x158 [sd_mod]
[c0000002f559b660] [d0000000000c16d0] .sd_remove+0x60/0xb4 [sd_mod]
[c0000002f559b700] [c000000000392ecc] .__device_release_driver+0xd0/0x118
[c0000002f559b7a0] [c000000000393080] .device_release_driver+0x30/0x54
[c0000002f559b830] [c000000000392108] .bus_remove_device+0x128/0x16c
[c0000002f559b8d0] [c00000000038f94c] .device_del+0x158/0x234
[c0000002f559b960] [d00000000036f078] .__scsi_remove_device+0x5c/0xd4 [scsi_mod]
[c0000002f559b9f0] [d00000000036f124] .scsi_remove_device+0x34/0x58 [scsi_mod]
[c0000002f559ba80] [d00000000036f204] .__scsi_remove_target+0xb4/0x120 [scsi_mod]
[c0000002f559bb10] [d00000000036f338] .__remove_child+0x2c/0x44 [scsi_mod]
[c0000002f559bb90] [c00000000038f11c] .device_for_each_child+0x54/0xb4
[c0000002f559bc50] [d00000000036f2e0] .scsi_remove_target+0x70/0x9c [scsi_mod]
[c0000002f559bce0] [d000000000083454] .fc_starget_delete+0x24/0x3c [scsi_transport_fc]
[c0000002f559bd70] [c0000000000a2368] .run_workqueue+0x118/0x208
[c0000002f559be30] [c0000000000a2580] .worker_thread+0x128/0x154
[c0000002f559bf00] [c0000000000a7470] .kthread+0x78/0xc4
[c0000002f559bf90] [c000000000029b8c] .kernel_thread+0x4c/0x68

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 182c8e7..76ae266 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -431,6 +431,8 @@ static void ibmvfc_set_tgt_action(struct ibmvfc_target *tgt,
 	case IBMVFC_TGT_ACTION_DEL_RPORT:
 		break;
 	default:
+		if (action == IBMVFC_TGT_ACTION_DEL_RPORT)
+			tgt->add_rport = 0;
 		tgt->action = action;
 		break;
 	}
@@ -483,7 +485,7 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
 		switch (vhost->action) {
 		case IBMVFC_HOST_ACTION_INIT_WAIT:
 		case IBMVFC_HOST_ACTION_NONE:
-		case IBMVFC_HOST_ACTION_TGT_ADD:
+		case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
 			vhost->action = action;
 			break;
 		default:
@@ -498,7 +500,6 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
 	case IBMVFC_HOST_ACTION_TGT_DEL:
 	case IBMVFC_HOST_ACTION_QUERY_TGTS:
 	case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
-	case IBMVFC_HOST_ACTION_TGT_ADD:
 	case IBMVFC_HOST_ACTION_NONE:
 	default:
 		vhost->action = action;
@@ -2306,7 +2307,7 @@ static int ibmvfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
 		done = 1;
 	}
 
-	if (vhost->state != IBMVFC_NO_CRQ && vhost->action == IBMVFC_HOST_ACTION_NONE)
+	if (vhost->scan_complete)
 		done = 1;
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	return done;
@@ -2820,7 +2821,7 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt)
 						tgt->ids.roles |= FC_PORT_ROLE_FCP_TARGET;
 					if (parms->service_parms & IBMVFC_PRLI_INITIATOR_FUNC)
 						tgt->ids.roles |= FC_PORT_ROLE_FCP_INITIATOR;
-					ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_ADD_RPORT);
+					tgt->add_rport = 1;
 				} else
 					ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 			} else if (prli_rsp[index].retry)
@@ -3660,7 +3661,6 @@ static int __ibmvfc_work_to_do(struct ibmvfc_host *vhost)
 		return 1;
 	case IBMVFC_HOST_ACTION_INIT:
 	case IBMVFC_HOST_ACTION_ALLOC_TGTS:
-	case IBMVFC_HOST_ACTION_TGT_ADD:
 	case IBMVFC_HOST_ACTION_TGT_DEL:
 	case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
 	case IBMVFC_HOST_ACTION_QUERY:
@@ -3715,25 +3715,26 @@ static void ibmvfc_log_ae(struct ibmvfc_host *vhost, int events)
 static void ibmvfc_tgt_add_rport(struct ibmvfc_target *tgt)
 {
 	struct ibmvfc_host *vhost = tgt->vhost;
-	struct fc_rport *rport = tgt->rport;
+	struct fc_rport *rport;
 	unsigned long flags;
 
-	if (rport) {
-		tgt_dbg(tgt, "Setting rport roles\n");
-		fc_remote_port_rolechg(rport, tgt->ids.roles);
-		spin_lock_irqsave(vhost->host->host_lock, flags);
-		ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
+	tgt_dbg(tgt, "Adding rport\n");
+	rport = fc_remote_port_add(vhost->host, 0, &tgt->ids);
+	spin_lock_irqsave(vhost->host->host_lock, flags);
+
+	if (rport && tgt->action == IBMVFC_TGT_ACTION_DEL_RPORT) {
+		tgt_dbg(tgt, "Deleting rport\n");
+		list_del(&tgt->queue);
 		spin_unlock_irqrestore(vhost->host->host_lock, flags);
+		fc_remote_port_delete(rport);
+		del_timer_sync(&tgt->timer);
+		kref_put(&tgt->kref, ibmvfc_release_tgt);
 		return;
 	}
 
-	tgt_dbg(tgt, "Adding rport\n");
-	rport = fc_remote_port_add(vhost->host, 0, &tgt->ids);
-	spin_lock_irqsave(vhost->host->host_lock, flags);
-	tgt->rport = rport;
-	ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
 	if (rport) {
 		tgt_dbg(tgt, "rport add succeeded\n");
+		tgt->rport = rport;
 		rport->maxframe_size = tgt->service_parms.common.bb_rcv_sz & 0x0fff;
 		rport->supported_classes = 0;
 		tgt->target_id = rport->scsi_target_id;
@@ -3811,11 +3812,21 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
 
 		if (vhost->state == IBMVFC_INITIALIZING) {
 			if (vhost->action == IBMVFC_HOST_ACTION_TGT_DEL_FAILED) {
-				ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
-				ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_ADD);
-				vhost->init_retries = 0;
-				spin_unlock_irqrestore(vhost->host->host_lock, flags);
-				scsi_unblock_requests(vhost->host);
+				if (vhost->reinit) {
+					vhost->reinit = 0;
+					scsi_block_requests(vhost->host);
+					ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+				} else {
+					ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
+					ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
+					wake_up(&vhost->init_wait_q);
+					schedule_work(&vhost->rport_add_work_q);
+					vhost->init_retries = 0;
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+					scsi_unblock_requests(vhost->host);
+				}
+
 				return;
 			} else {
 				ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
@@ -3846,24 +3857,6 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
 		if (!ibmvfc_dev_init_to_do(vhost))
 			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_DEL_FAILED);
 		break;
-	case IBMVFC_HOST_ACTION_TGT_ADD:
-		list_for_each_entry(tgt, &vhost->targets, queue) {
-			if (tgt->action == IBMVFC_TGT_ACTION_ADD_RPORT) {
-				spin_unlock_irqrestore(vhost->host->host_lock, flags);
-				ibmvfc_tgt_add_rport(tgt);
-				return;
-			}
-		}
-
-		if (vhost->reinit && !ibmvfc_set_host_state(vhost, IBMVFC_INITIALIZING)) {
-			vhost->reinit = 0;
-			scsi_block_requests(vhost->host);
-			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
-		} else {
-			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
-			wake_up(&vhost->init_wait_q);
-		}
-		break;
 	default:
 		break;
 	};
@@ -4093,6 +4086,56 @@ nomem:
 }
 
 /**
+ * ibmvfc_rport_add_thread - Worker thread for rport adds
+ * @work:	work struct
+ *
+ **/
+static void ibmvfc_rport_add_thread(struct work_struct *work)
+{
+	struct ibmvfc_host *vhost = container_of(work, struct ibmvfc_host,
+						 rport_add_work_q);
+	struct ibmvfc_target *tgt;
+	struct fc_rport *rport;
+	unsigned long flags;
+	int did_work;
+
+	ENTER;
+	spin_lock_irqsave(vhost->host->host_lock, flags);
+	do {
+		did_work = 0;
+		if (vhost->state != IBMVFC_ACTIVE)
+			break;
+
+		list_for_each_entry(tgt, &vhost->targets, queue) {
+			if (tgt->add_rport) {
+				did_work = 1;
+				tgt->add_rport = 0;
+				kref_get(&tgt->kref);
+				rport = tgt->rport;
+				if (!rport) {
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+					ibmvfc_tgt_add_rport(tgt);
+				} else if (get_device(&rport->dev)) {
+					spin_unlock_irqrestore(vhost->host->host_lock, flags);
+					tgt_dbg(tgt, "Setting rport roles\n");
+					fc_remote_port_rolechg(rport, tgt->ids.roles);
+					put_device(&rport->dev);
+				}
+
+				kref_put(&tgt->kref, ibmvfc_release_tgt);
+				spin_lock_irqsave(vhost->host->host_lock, flags);
+				break;
+			}
+		}
+	} while(did_work);
+
+	if (vhost->state == IBMVFC_ACTIVE)
+		vhost->scan_complete = 1;
+	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+	LEAVE;
+}
+
+/**
  * ibmvfc_probe - Adapter hot plug add entry point
  * @vdev:	vio device struct
  * @id:	vio device id struct
@@ -4135,6 +4178,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	strcpy(vhost->partition_name, "UNKNOWN");
 	init_waitqueue_head(&vhost->work_wait_q);
 	init_waitqueue_head(&vhost->init_wait_q);
+	INIT_WORK(&vhost->rport_add_work_q, ibmvfc_rport_add_thread);
 
 	if ((rc = ibmvfc_alloc_mem(vhost)))
 		goto free_scsi_host;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 4dac356..3a6a725 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -575,7 +575,6 @@ enum ibmvfc_target_action {
 	IBMVFC_TGT_ACTION_NONE = 0,
 	IBMVFC_TGT_ACTION_INIT,
 	IBMVFC_TGT_ACTION_INIT_WAIT,
-	IBMVFC_TGT_ACTION_ADD_RPORT,
 	IBMVFC_TGT_ACTION_DEL_RPORT,
 };
 
@@ -588,6 +587,7 @@ struct ibmvfc_target {
 	int target_id;
 	enum ibmvfc_target_action action;
 	int need_login;
+	int add_rport;
 	int init_retries;
 	u32 cancel_key;
 	struct ibmvfc_service_parms service_parms;
@@ -635,7 +635,6 @@ enum ibmvfc_host_action {
 	IBMVFC_HOST_ACTION_ALLOC_TGTS,
 	IBMVFC_HOST_ACTION_TGT_INIT,
 	IBMVFC_HOST_ACTION_TGT_DEL_FAILED,
-	IBMVFC_HOST_ACTION_TGT_ADD,
 };
 
 enum ibmvfc_host_state {
@@ -682,6 +681,7 @@ struct ibmvfc_host {
 	int client_migrated;
 	int reinit;
 	int delay_init;
+	int scan_complete;
 	int events_to_log;
 #define IBMVFC_AE_LINKUP	0x0001
 #define IBMVFC_AE_LINKDOWN	0x0002
@@ -692,6 +692,7 @@ struct ibmvfc_host {
 	void (*job_step) (struct ibmvfc_host *);
 	struct task_struct *work_thread;
 	struct tasklet_struct tasklet;
+	struct work_struct rport_add_work_q;
 	wait_queue_head_t init_wait_q;
 	wait_queue_head_t work_wait_q;
 };
-- 
cgit v0.10.2


From 79111d0899a122fa3cf0a2921292c3e6a25452d0 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:29 -0500
Subject: [SCSI] ibmvfc: Add support for NPIV Logout

This patch adds support for a new command supported by the Virtual I/O
Server, NPIV Logout. The command will abort all outstanding commands
and log out of the fabric. Currently, the only way to do this is
by breaking the CRQ, which can take a fairly long time when lots of
commands are outstanding. The NPIV Logout commands provides a mechanism
to accomplish virtually the same function, but is much faster.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 76ae266..6eb3b75 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -143,6 +143,7 @@ static void ibmvfc_npiv_login(struct ibmvfc_host *);
 static void ibmvfc_tgt_send_prli(struct ibmvfc_target *);
 static void ibmvfc_tgt_send_plogi(struct ibmvfc_target *);
 static void ibmvfc_tgt_query_target(struct ibmvfc_target *);
+static void ibmvfc_npiv_logout(struct ibmvfc_host *);
 
 static const char *unknown_error = "unknown error";
 
@@ -477,6 +478,10 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
 		if (vhost->action == IBMVFC_HOST_ACTION_INIT_WAIT)
 			vhost->action = action;
 		break;
+	case IBMVFC_HOST_ACTION_LOGO_WAIT:
+		if (vhost->action == IBMVFC_HOST_ACTION_LOGO)
+			vhost->action = action;
+		break;
 	case IBMVFC_HOST_ACTION_INIT_WAIT:
 		if (vhost->action == IBMVFC_HOST_ACTION_INIT)
 			vhost->action = action;
@@ -496,6 +501,7 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
 		if (vhost->action == IBMVFC_HOST_ACTION_ALLOC_TGTS)
 			vhost->action = action;
 		break;
+	case IBMVFC_HOST_ACTION_LOGO:
 	case IBMVFC_HOST_ACTION_INIT:
 	case IBMVFC_HOST_ACTION_TGT_DEL:
 	case IBMVFC_HOST_ACTION_QUERY_TGTS:
@@ -647,6 +653,7 @@ static void ibmvfc_release_crq_queue(struct ibmvfc_host *vhost)
 	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
 	vhost->state = IBMVFC_NO_CRQ;
+	vhost->logged_in = 0;
 	dma_unmap_single(vhost->dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	free_page((unsigned long)crq->msgs);
 }
@@ -693,6 +700,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
 	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
 	vhost->state = IBMVFC_NO_CRQ;
+	vhost->logged_in = 0;
 	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
 
 	/* Clean out the queue */
@@ -808,10 +816,10 @@ static void ibmvfc_purge_requests(struct ibmvfc_host *vhost, int error_code)
 }
 
 /**
- * __ibmvfc_reset_host - Reset the connection to the server (no locking)
+ * ibmvfc_hard_reset_host - Reset the connection to the server by breaking the CRQ
  * @vhost:	struct ibmvfc host to reset
  **/
-static void __ibmvfc_reset_host(struct ibmvfc_host *vhost)
+static void ibmvfc_hard_reset_host(struct ibmvfc_host *vhost)
 {
 	int rc;
 
@@ -827,9 +835,25 @@ static void __ibmvfc_reset_host(struct ibmvfc_host *vhost)
 }
 
 /**
- * ibmvfc_reset_host - Reset the connection to the server
+ * __ibmvfc_reset_host - Reset the connection to the server (no locking)
  * @vhost:	struct ibmvfc host to reset
  **/
+static void __ibmvfc_reset_host(struct ibmvfc_host *vhost)
+{
+	if (vhost->logged_in && vhost->action != IBMVFC_HOST_ACTION_LOGO_WAIT &&
+	    !ibmvfc_set_host_state(vhost, IBMVFC_INITIALIZING)) {
+		scsi_block_requests(vhost->host);
+		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_LOGO);
+		vhost->job_step = ibmvfc_npiv_logout;
+		wake_up(&vhost->work_wait_q);
+	} else
+		ibmvfc_hard_reset_host(vhost);
+}
+
+/**
+ * ibmvfc_reset_host - Reset the connection to the server
+ * @vhost:	ibmvfc host struct
+ **/
 static void ibmvfc_reset_host(struct ibmvfc_host *vhost)
 {
 	unsigned long flags;
@@ -2230,6 +2254,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
 		return;
 	case IBMVFC_CRQ_XPORT_EVENT:
 		vhost->state = IBMVFC_NO_CRQ;
+		vhost->logged_in = 0;
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
 		if (crq->format == IBMVFC_PARTITION_MIGRATED) {
 			/* We need to re-setup the interpartition connection */
@@ -3554,6 +3579,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
 		return;
 	}
 
+	vhost->logged_in = 1;
 	npiv_max_sectors = min((uint)(rsp->max_dma_len >> 9), IBMVFC_MAX_SECTORS);
 	dev_info(vhost->dev, "Host partition: %s, device: %s %s %s max sectors %u\n",
 		 rsp->partition_name, rsp->device_name, rsp->port_loc_code,
@@ -3612,6 +3638,65 @@ static void ibmvfc_npiv_login(struct ibmvfc_host *vhost)
 };
 
 /**
+ * ibmvfc_npiv_logout_done - Completion handler for NPIV Logout
+ * @vhost:		ibmvfc host struct
+ *
+ **/
+static void ibmvfc_npiv_logout_done(struct ibmvfc_event *evt)
+{
+	struct ibmvfc_host *vhost = evt->vhost;
+	u32 mad_status = evt->xfer_iu->npiv_logout.common.status;
+
+	ibmvfc_free_event(evt);
+
+	switch (mad_status) {
+	case IBMVFC_MAD_SUCCESS:
+		if (list_empty(&vhost->sent) &&
+		    vhost->action == IBMVFC_HOST_ACTION_LOGO_WAIT) {
+			ibmvfc_init_host(vhost, 0);
+			return;
+		}
+		break;
+	case IBMVFC_MAD_FAILED:
+	case IBMVFC_MAD_NOT_SUPPORTED:
+	case IBMVFC_MAD_CRQ_ERROR:
+	case IBMVFC_MAD_DRIVER_FAILED:
+	default:
+		ibmvfc_dbg(vhost, "NPIV Logout failed. 0x%X\n", mad_status);
+		break;
+	}
+
+	ibmvfc_hard_reset_host(vhost);
+}
+
+/**
+ * ibmvfc_npiv_logout - Issue an NPIV Logout
+ * @vhost:		ibmvfc host struct
+ *
+ **/
+static void ibmvfc_npiv_logout(struct ibmvfc_host *vhost)
+{
+	struct ibmvfc_npiv_logout_mad *mad;
+	struct ibmvfc_event *evt;
+
+	evt = ibmvfc_get_event(vhost);
+	ibmvfc_init_event(evt, ibmvfc_npiv_logout_done, IBMVFC_MAD_FORMAT);
+
+	mad = &evt->iu.npiv_logout;
+	memset(mad, 0, sizeof(*mad));
+	mad->common.version = 1;
+	mad->common.opcode = IBMVFC_NPIV_LOGOUT;
+	mad->common.length = sizeof(struct ibmvfc_npiv_logout_mad);
+
+	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_LOGO_WAIT);
+
+	if (!ibmvfc_send_event(evt, vhost, default_timeout))
+		ibmvfc_dbg(vhost, "Sent NPIV logout\n");
+	else
+		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+}
+
+/**
  * ibmvfc_dev_init_to_do - Is there target initialization work to do?
  * @vhost:		ibmvfc host struct
  *
@@ -3647,6 +3732,7 @@ static int __ibmvfc_work_to_do(struct ibmvfc_host *vhost)
 	switch (vhost->action) {
 	case IBMVFC_HOST_ACTION_NONE:
 	case IBMVFC_HOST_ACTION_INIT_WAIT:
+	case IBMVFC_HOST_ACTION_LOGO_WAIT:
 		return 0;
 	case IBMVFC_HOST_ACTION_TGT_INIT:
 	case IBMVFC_HOST_ACTION_QUERY_TGTS:
@@ -3659,6 +3745,7 @@ static int __ibmvfc_work_to_do(struct ibmvfc_host *vhost)
 			if (tgt->action == IBMVFC_TGT_ACTION_INIT_WAIT)
 				return 0;
 		return 1;
+	case IBMVFC_HOST_ACTION_LOGO:
 	case IBMVFC_HOST_ACTION_INIT:
 	case IBMVFC_HOST_ACTION_ALLOC_TGTS:
 	case IBMVFC_HOST_ACTION_TGT_DEL:
@@ -3765,8 +3852,12 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
 	vhost->events_to_log = 0;
 	switch (vhost->action) {
 	case IBMVFC_HOST_ACTION_NONE:
+	case IBMVFC_HOST_ACTION_LOGO_WAIT:
 	case IBMVFC_HOST_ACTION_INIT_WAIT:
 		break;
+	case IBMVFC_HOST_ACTION_LOGO:
+		vhost->job_step(vhost);
+		break;
 	case IBMVFC_HOST_ACTION_INIT:
 		BUG_ON(vhost->state != IBMVFC_INITIALIZING);
 		if (vhost->delay_init) {
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 3a6a725..6adaad8 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -57,9 +57,10 @@
  * Ensure we have resources for ERP and initialization:
  * 1 for ERP
  * 1 for initialization
+ * 1 for NPIV Logout
  * 2 for each discovery thread
  */
-#define IBMVFC_NUM_INTERNAL_REQ	(1 + 1 + (disc_threads * 2))
+#define IBMVFC_NUM_INTERNAL_REQ	(1 + 1 + 1 + (disc_threads * 2))
 
 #define IBMVFC_MAD_SUCCESS		0x00
 #define IBMVFC_MAD_NOT_SUPPORTED	0xF1
@@ -127,6 +128,7 @@ enum ibmvfc_mad_types {
 	IBMVFC_IMPLICIT_LOGOUT	= 0x0040,
 	IBMVFC_PASSTHRU		= 0x0200,
 	IBMVFC_TMF_MAD		= 0x0100,
+	IBMVFC_NPIV_LOGOUT	= 0x0800,
 };
 
 struct ibmvfc_mad_common {
@@ -143,6 +145,10 @@ struct ibmvfc_npiv_login_mad {
 	struct srp_direct_buf buffer;
 }__attribute__((packed, aligned (8)));
 
+struct ibmvfc_npiv_logout_mad {
+	struct ibmvfc_mad_common common;
+}__attribute__((packed, aligned (8)));
+
 #define IBMVFC_MAX_NAME 256
 
 struct ibmvfc_npiv_login {
@@ -561,6 +567,7 @@ struct ibmvfc_async_crq_queue {
 union ibmvfc_iu {
 	struct ibmvfc_mad_common mad_common;
 	struct ibmvfc_npiv_login_mad npiv_login;
+	struct ibmvfc_npiv_logout_mad npiv_logout;
 	struct ibmvfc_discover_targets discover_targets;
 	struct ibmvfc_port_login plogi;
 	struct ibmvfc_process_login prli;
@@ -627,6 +634,8 @@ struct ibmvfc_event_pool {
 
 enum ibmvfc_host_action {
 	IBMVFC_HOST_ACTION_NONE = 0,
+	IBMVFC_HOST_ACTION_LOGO,
+	IBMVFC_HOST_ACTION_LOGO_WAIT,
 	IBMVFC_HOST_ACTION_INIT,
 	IBMVFC_HOST_ACTION_INIT_WAIT,
 	IBMVFC_HOST_ACTION_QUERY,
@@ -682,6 +691,7 @@ struct ibmvfc_host {
 	int reinit;
 	int delay_init;
 	int scan_complete;
+	int logged_in;
 	int events_to_log;
 #define IBMVFC_AE_LINKUP	0x0001
 #define IBMVFC_AE_LINKDOWN	0x0002
-- 
cgit v0.10.2


From 497f9c504f76e7a751cd370604e1c8521743746d Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:30 -0500
Subject: [SCSI] ibmvfc: Add flush on halt support

The virtual I/O server controlling the NPIV adapter associated with
a virtual fibre channel adapter can send a HALT event to the client.
When this occurs, the client can no longer send commands until a RESUME
is received. By adding support for flush on halt, we will get all of
our outstanding commands flushed back before the Virtual I/O server
enters the halt state, eliminating potential command timeouts for
outstanding commands which might occur if we did not support this feature.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 6eb3b75..04d97d9 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -110,7 +110,7 @@ static const struct {
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_XPORT_DEAD, DID_ERROR, 0, 1, "transport dead" },
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_CONFIG_ERROR, DID_ERROR, 1, 1, "configuration error" },
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_NAME_SERVER_FAIL, DID_ERROR, 1, 1, "name server failure" },
-	{ IBMVFC_FABRIC_MAPPED, IBMVFC_LINK_HALTED, DID_REQUEUE, 0, 0, "link halted" },
+	{ IBMVFC_FABRIC_MAPPED, IBMVFC_LINK_HALTED, DID_REQUEUE, 1, 0, "link halted" },
 	{ IBMVFC_FABRIC_MAPPED, IBMVFC_XPORT_GENERAL, DID_OK, 1, 0, "general transport error" },
 
 	{ IBMVFC_VIOS_FAILURE, IBMVFC_CRQ_FAILURE, DID_REQUEUE, 1, 1, "CRQ failure" },
@@ -1169,8 +1169,9 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
 	login_info->partition_num = vhost->partition_number;
 	login_info->vfc_frame_version = 1;
 	login_info->fcp_version = 3;
+	login_info->flags = IBMVFC_FLUSH_ON_HALT;
 	if (vhost->client_migrated)
-		login_info->flags = IBMVFC_CLIENT_MIGRATED;
+		login_info->flags |= IBMVFC_CLIENT_MIGRATED;
 
 	login_info->max_cmds = max_requests + IBMVFC_NUM_INTERNAL_REQ;
 	login_info->capabilities = IBMVFC_CAN_MIGRATE;
@@ -2185,8 +2186,25 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
 		   " node_name: %llx\n", desc, crq->scsi_id, crq->wwpn, crq->node_name);
 
 	switch (crq->event) {
-	case IBMVFC_AE_LINK_UP:
 	case IBMVFC_AE_RESUME:
+		switch (crq->link_state) {
+		case IBMVFC_AE_LS_LINK_DOWN:
+			ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+			break;
+		case IBMVFC_AE_LS_LINK_DEAD:
+			ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+			break;
+		case IBMVFC_AE_LS_LINK_UP:
+		case IBMVFC_AE_LS_LINK_BOUNCED:
+		default:
+			vhost->events_to_log |= IBMVFC_AE_LINKUP;
+			vhost->delay_init = 1;
+			__ibmvfc_reset_host(vhost);
+			break;
+		};
+
+		break;
+	case IBMVFC_AE_LINK_UP:
 		vhost->events_to_log |= IBMVFC_AE_LINKUP;
 		vhost->delay_init = 1;
 		__ibmvfc_reset_host(vhost);
@@ -2505,6 +2523,14 @@ static ssize_t ibmvfc_show_host_npiv_version(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", vhost->login_buf->resp.version);
 }
 
+static ssize_t ibmvfc_show_host_capabilities(struct device *dev,
+					     struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvfc_host *vhost = shost_priv(shost);
+	return snprintf(buf, PAGE_SIZE, "%llx\n", vhost->login_buf->resp.capabilities);
+}
+
 /**
  * ibmvfc_show_log_level - Show the adapter's error logging level
  * @dev:	class device struct
@@ -2554,6 +2580,7 @@ static DEVICE_ATTR(device_name, S_IRUGO, ibmvfc_show_host_device_name, NULL);
 static DEVICE_ATTR(port_loc_code, S_IRUGO, ibmvfc_show_host_loc_code, NULL);
 static DEVICE_ATTR(drc_name, S_IRUGO, ibmvfc_show_host_drc_name, NULL);
 static DEVICE_ATTR(npiv_version, S_IRUGO, ibmvfc_show_host_npiv_version, NULL);
+static DEVICE_ATTR(capabilities, S_IRUGO, ibmvfc_show_host_capabilities, NULL);
 static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
 		   ibmvfc_show_log_level, ibmvfc_store_log_level);
 
@@ -2609,6 +2636,7 @@ static struct device_attribute *ibmvfc_attrs[] = {
 	&dev_attr_port_loc_code,
 	&dev_attr_drc_name,
 	&dev_attr_npiv_version,
+	&dev_attr_capabilities,
 	&dev_attr_log_level,
 	NULL
 };
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 6adaad8..cf26380 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -207,7 +207,8 @@ struct ibmvfc_npiv_login_resp {
 #define IBMVFC_NATIVE_FC		0x01
 #define IBMVFC_CAN_FLUSH_ON_HALT	0x08
 	u32 reserved;
-	u64 capabilites;
+	u64 capabilities;
+#define IBMVFC_CAN_FLUSH_ON_HALT	0x08
 	u32 max_cmds;
 	u32 scsi_id_sz;
 	u64 max_dma_len;
@@ -547,9 +548,17 @@ struct ibmvfc_crq_queue {
 	dma_addr_t msg_token;
 };
 
+enum ibmvfc_ae_link_state {
+	IBMVFC_AE_LS_LINK_UP		= 0x01,
+	IBMVFC_AE_LS_LINK_BOUNCED	= 0x02,
+	IBMVFC_AE_LS_LINK_DOWN		= 0x04,
+	IBMVFC_AE_LS_LINK_DEAD		= 0x08,
+};
+
 struct ibmvfc_async_crq {
 	volatile u8 valid;
-	u8 pad[3];
+	u8 link_state;
+	u8 pad[2];
 	u32 pad2;
 	volatile u64 event;
 	volatile u64 scsi_id;
-- 
cgit v0.10.2


From 5e47167b6be0ca24cbb04fb71ea611ab7c089aff Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:32 -0500
Subject: [SCSI] ibmvfc: Improve device rediscovery

For certain scenarios during device rediscovery, we detect we need
to log back into a target. Currently we do just that - PLOGI/PRLI
back into the target. Change the code to delete and add the target
from the FC transport layer as well, to ensure we handle any cases
where the target may have changed.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 04d97d9..da23325 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -583,7 +583,7 @@ static void ibmvfc_init_host(struct ibmvfc_host *vhost, int relogin)
 		}
 
 		list_for_each_entry(tgt, &vhost->targets, queue)
-			tgt->need_login = 1;
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		scsi_block_requests(vhost->host);
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
 		vhost->job_step = ibmvfc_npiv_login;
@@ -3155,13 +3155,13 @@ static void ibmvfc_tgt_adisc_done(struct ibmvfc_event *evt)
 	case IBMVFC_MAD_SUCCESS:
 		tgt_dbg(tgt, "ADISC succeeded\n");
 		if (ibmvfc_adisc_needs_plogi(mad, tgt))
-			tgt->need_login = 1;
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		break;
 	case IBMVFC_MAD_DRIVER_FAILED:
 		break;
 	case IBMVFC_MAD_FAILED:
 	default:
-		tgt->need_login = 1;
+		ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
 		fc_reason = (mad->fc_iu.response[1] & 0x00ff0000) >> 16;
 		fc_explain = (mad->fc_iu.response[1] & 0x0000ff00) >> 8;
 		tgt_info(tgt, "ADISC failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
-- 
cgit v0.10.2


From 6d29cc56bead73b6f386cf43333708579deb5eed Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:33 -0500
Subject: [SCSI] ibmvfc: Improve LOGO/PRLO ELS handling

There are several scenarios where the ibmvfc driver needs to
try to log back into a target on the fabric. Today when these events
occur, we simply go through re-discovery for all attached targets,
assuming that either the query of the name server or an ADISC will
indicate we might need to log back into the target, which doesn't
work for all scenarios. Fix this by taking note of the affected target(s)
in these conditions and ensuring we try to PLOGI back into the target.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index da23325..b4b805e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1485,6 +1485,27 @@ static void ibmvfc_log_error(struct ibmvfc_event *evt)
 }
 
 /**
+ * ibmvfc_relogin - Log back into the specified device
+ * @sdev:	scsi device struct
+ *
+ **/
+static void ibmvfc_relogin(struct scsi_device *sdev)
+{
+	struct ibmvfc_host *vhost = shost_priv(sdev->host);
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	struct ibmvfc_target *tgt;
+
+	list_for_each_entry(tgt, &vhost->targets, queue) {
+		if (rport == tgt->rport) {
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+			break;
+		}
+	}
+
+	ibmvfc_reinit_host(vhost);
+}
+
+/**
  * ibmvfc_scsi_done - Handle responses from commands
  * @evt:	ibmvfc event to be handled
  *
@@ -1516,7 +1537,7 @@ static void ibmvfc_scsi_done(struct ibmvfc_event *evt)
 			if ((rsp->flags & FCP_SNS_LEN_VALID) && rsp->fcp_sense_len && rsp_len <= 8)
 				memcpy(cmnd->sense_buffer, rsp->data.sense + rsp_len, sense_len);
 			if ((vfc_cmd->status & IBMVFC_VIOS_FAILURE) && (vfc_cmd->error == IBMVFC_PLOGI_REQUIRED))
-				ibmvfc_reinit_host(evt->vhost);
+				ibmvfc_relogin(cmnd->device);
 
 			if (!cmnd->result && (!scsi_get_resid(cmnd) || (rsp->flags & FCP_RESID_OVER)))
 				cmnd->result = (DID_ERROR << 16);
@@ -2181,6 +2202,7 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
 				struct ibmvfc_host *vhost)
 {
 	const char *desc = ibmvfc_get_ae_desc(crq->event);
+	struct ibmvfc_target *tgt;
 
 	ibmvfc_log(vhost, 3, "%s event received. scsi_id: %llx, wwpn: %llx,"
 		   " node_name: %llx\n", desc, crq->scsi_id, crq->wwpn, crq->node_name);
@@ -2218,9 +2240,23 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
 	case IBMVFC_AE_SCN_NPORT:
 	case IBMVFC_AE_SCN_GROUP:
 		vhost->events_to_log |= IBMVFC_AE_RSCN;
+		ibmvfc_reinit_host(vhost);
+		break;
 	case IBMVFC_AE_ELS_LOGO:
 	case IBMVFC_AE_ELS_PRLO:
 	case IBMVFC_AE_ELS_PLOGI:
+		list_for_each_entry(tgt, &vhost->targets, queue) {
+			if (!crq->scsi_id && !crq->wwpn && !crq->node_name)
+				break;
+			if (crq->scsi_id && tgt->scsi_id != crq->scsi_id)
+				continue;
+			if (crq->wwpn && tgt->ids.port_name != crq->wwpn)
+				continue;
+			if (crq->node_name && tgt->ids.node_name != crq->node_name)
+				continue;
+			ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+		}
+
 		ibmvfc_reinit_host(vhost);
 		break;
 	case IBMVFC_AE_LINK_DOWN:
-- 
cgit v0.10.2


From cbbf58f2e2cb73dbed660fdc7b741a010d6bdbef Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Thu, 28 May 2009 16:17:34 -0500
Subject: [SCSI] ibmvfc: Driver version 1.0.6

Bump driver version

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index cf26380..c2668d7 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -29,8 +29,8 @@
 #include "viosrp.h"
 
 #define IBMVFC_NAME	"ibmvfc"
-#define IBMVFC_DRIVER_VERSION		"1.0.5"
-#define IBMVFC_DRIVER_DATE		"(March 19, 2009)"
+#define IBMVFC_DRIVER_VERSION		"1.0.6"
+#define IBMVFC_DRIVER_DATE		"(May 28, 2009)"
 
 #define IBMVFC_DEFAULT_TIMEOUT	60
 #define IBMVFC_ADISC_CANCEL_TIMEOUT	45
-- 
cgit v0.10.2


From f00a3328bf9ecff46abd68a421693ba71cd16fc8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 30 May 2009 13:40:04 +1000
Subject: [SCSI] cxgb3i: Include net/dst.h for struct dst_cache

This driver needs dst_cache->dev so it should include net/dst.h
to ensure that it builds.  While net/tcp.h probably includes it
already, we shouldn't rely on that since there is no guarantee
that this won't change in future.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index 04a4374..74369a3 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -13,6 +13,7 @@
 
 #include <linux/inet.h>
 #include <linux/crypto.h>
+#include <net/dst.h>
 #include <net/tcp.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
-- 
cgit v0.10.2


From 5f48f70ecef25df93e122985272ff647f5653836 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 6 May 2009 10:52:12 -0700
Subject: [SCSI] libfcoe: fip: fix non-FIP-mode FLOGI state after reset.

When a reset is sent using fcoeadm on a non-FIP mode NIC,
there's no link flap, so the fcoe_ctlr stays in non-FIP mode.

In that case, FIP wasn't setting the flogi_oxid or map_dest flag,
causing the FLOGI to be sent with the both wrong source MAC and
the wrong destination MAC address, causing it to fail.

This leads to a non-functioning HBA until a link flap or
instance delete/create.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index 62ba0f3..a7ecafb 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -447,14 +447,10 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	u16 old_xid;
 	u8 op;
 
-	if (fip->state == FIP_ST_NON_FIP)
-		return 0;
-
 	fh = (struct fc_frame_header *)skb->data;
 	op = *(u8 *)(fh + 1);
 
-	switch (op) {
-	case ELS_FLOGI:
+	if (op == ELS_FLOGI) {
 		old_xid = fip->flogi_oxid;
 		fip->flogi_oxid = ntohs(fh->fh_ox_id);
 		if (fip->state == FIP_ST_AUTO) {
@@ -466,6 +462,15 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
 			fip->map_dest = 1;
 			return 0;
 		}
+		if (fip->state == FIP_ST_NON_FIP)
+			fip->map_dest = 1;
+	}
+
+	if (fip->state == FIP_ST_NON_FIP)
+		return 0;
+
+	switch (op) {
+	case ELS_FLOGI:
 		op = FIP_DT_FLOGI;
 		break;
 	case ELS_FDISC:
-- 
cgit v0.10.2


From 0f4915398a4233cdbfc4e9bf4436323546945b3f Mon Sep 17 00:00:00 2001
From: Chris Leech <christopher.leech@intel.com>
Date: Wed, 6 May 2009 10:52:18 -0700
Subject: [SCSI] fcoe: use ETH_P_FIP for skb->protocol of FIP frames

FIP frames should leave the fcoe layer with skb->protocol set to
ETH_P_FIP, not ETH_P_802_3.

Signed-off-by: Chris Leech <christopher.leech@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index a7ecafb..9294118 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -213,7 +213,7 @@ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf)
 	sol->desc.size.fd_size = htons(fcoe_size);
 
 	skb_put(skb, sizeof(*sol));
-	skb->protocol = htons(ETH_P_802_3);
+	skb->protocol = htons(ETH_P_FIP);
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	fip->send(fip, skb);
@@ -365,7 +365,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa)
 	}
 
 	skb_put(skb, len);
-	skb->protocol = htons(ETH_P_802_3);
+	skb->protocol = htons(ETH_P_FIP);
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	fip->send(fip, skb);
@@ -424,7 +424,7 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip,
 	if (dtype != ELS_FLOGI)
 		memcpy(mac->fd_mac, fip->data_src_addr, ETH_ALEN);
 
-	skb->protocol = htons(ETH_P_802_3);
+	skb->protocol = htons(ETH_P_FIP);
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	return 0;
-- 
cgit v0.10.2


From d5e6054a0a097527b3920a8a0aefe7f830c014fd Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 6 May 2009 10:52:23 -0700
Subject: [SCSI] libfc: use DID_ERROR when we have internall aborted command

If we aborted a command, because it timed out we should not use
DID_ABORT. It will fail the command right away back to the upper
layer. We want to use something that indicated that the problem
did not complete normally, but it was not a fatal problem.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 521f996..ad8b747 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1896,7 +1896,7 @@ static void fc_io_compl(struct fc_fcp_pkt *fsp)
 		sc_cmd->result = (DID_ERROR << 16) | fsp->cdb_status;
 		break;
 	case FC_CMD_ABORTED:
-		sc_cmd->result = (DID_ABORT << 16) | fsp->io_status;
+		sc_cmd->result = (DID_ERROR << 16) | fsp->io_status;
 		break;
 	case FC_CMD_TIME_OUT:
 		sc_cmd->result = (DID_BUS_BUSY << 16) | fsp->io_status;
-- 
cgit v0.10.2


From 30121d14f503dac056ee7f68d99eb5d548899b59 Mon Sep 17 00:00:00 2001
From: Steve Ma <steve.ma@intel.com>
Date: Wed, 6 May 2009 10:52:29 -0700
Subject: [SCSI] libfc: Check if exchange is completed when receiving a
 sequence

When a sequence is received in response to an exchange we issued previously,
we should check to see if the exchange has completed. If yes, the sequence
should be discarded. Since the exchange might be still in the completion
process, it should be untouched.

Signed-off-by: Steve Ma <steve.ma@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 992af05..7af9bce 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1159,6 +1159,10 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
 		atomic_inc(&mp->stats.xid_not_found);
 		goto out;
 	}
+	if (ep->esb_stat & ESB_ST_COMPLETE) {
+		atomic_inc(&mp->stats.xid_not_found);
+		goto out;
+	}
 	if (ep->rxid == FC_XID_UNKNOWN)
 		ep->rxid = ntohs(fh->fh_rx_id);
 	if (ep->sid != 0 && ep->sid != ntoh24(fh->fh_d_id)) {
-- 
cgit v0.10.2


From 4bb6b5153313269b4b328f4f5ddc558c45c50713 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Wed, 6 May 2009 10:52:34 -0700
Subject: [SCSI] fcoe: reduces lock cost when adding a new skb to
 fcoe_pending_queue

Currently fcoe_pending_queue.lock held twice for every new skb
adding to this queue when already least one pkt is pending in this
queue and that is not uncommon once skb pkts starts getting queued
here upon fcoe_start_io => dev_queue_xmit failure.

This patch moves most fcoe_pending_queue logic to fcoe_check_wait_queue
function, this new logic grabs fcoe_pending_queue.lock only once to
add a new skb instead twice as used to be.

I think after this patch call flow around fcoe_check_wait_queue
calling in fcoe_xmit is bit simplified with modified
fcoe_check_wait_queue function taking care of adding and
removing pending skb in one function.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 6acb777..30eba75 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -71,7 +71,7 @@ static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *);
 static int fcoe_hostlist_add(const struct fc_lport *);
 static int fcoe_hostlist_remove(const struct fc_lport *);
 
-static int fcoe_check_wait_queue(struct fc_lport *);
+static void fcoe_check_wait_queue(struct fc_lport *, struct sk_buff *);
 static int fcoe_device_notification(struct notifier_block *, ulong, void *);
 static void fcoe_dev_setup(void);
 static void fcoe_dev_cleanup(void);
@@ -989,7 +989,7 @@ u32 fcoe_fc_crc(struct fc_frame *fp)
  */
 int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp)
 {
-	int wlen, rc = 0;
+	int wlen;
 	u32 crc;
 	struct ethhdr *eh;
 	struct fcoe_crc_eof *cp;
@@ -1108,18 +1108,9 @@ int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp)
 	/* send down to lld */
 	fr_dev(fp) = lp;
 	if (fc->fcoe_pending_queue.qlen)
-		rc = fcoe_check_wait_queue(lp);
-
-	if (rc == 0)
-		rc = fcoe_start_io(skb);
-
-	if (rc) {
-		spin_lock_bh(&fc->fcoe_pending_queue.lock);
-		__skb_queue_tail(&fc->fcoe_pending_queue, skb);
-		spin_unlock_bh(&fc->fcoe_pending_queue.lock);
-		if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
-			lp->qfull = 1;
-	}
+		fcoe_check_wait_queue(lp, skb);
+	else if (fcoe_start_io(skb))
+		fcoe_check_wait_queue(lp, skb);
 
 	return 0;
 }
@@ -1285,7 +1276,7 @@ void fcoe_watchdog(ulong vp)
 	read_lock(&fcoe_hostlist_lock);
 	list_for_each_entry(fc, &fcoe_hostlist, list) {
 		if (fc->ctlr.lp)
-			fcoe_check_wait_queue(fc->ctlr.lp);
+			fcoe_check_wait_queue(fc->ctlr.lp, NULL);
 	}
 	read_unlock(&fcoe_hostlist_lock);
 
@@ -1306,16 +1297,17 @@ void fcoe_watchdog(ulong vp)
  * The wait_queue is used when the skb transmit fails. skb will go
  * in the wait_queue which will be emptied by the timer function or
  * by the next skb transmit.
- *
- * Returns: 0 for success
  */
-static int fcoe_check_wait_queue(struct fc_lport *lp)
+static void fcoe_check_wait_queue(struct fc_lport *lp, struct sk_buff *skb)
 {
 	struct fcoe_softc *fc = lport_priv(lp);
-	struct sk_buff *skb;
-	int rc = -1;
+	int rc;
 
 	spin_lock_bh(&fc->fcoe_pending_queue.lock);
+
+	if (skb)
+		__skb_queue_tail(&fc->fcoe_pending_queue, skb);
+
 	if (fc->fcoe_pending_queue_active)
 		goto out;
 	fc->fcoe_pending_queue_active = 1;
@@ -1342,10 +1334,11 @@ static int fcoe_check_wait_queue(struct fc_lport *lp)
 	if (fc->fcoe_pending_queue.qlen < FCOE_LOW_QUEUE_DEPTH)
 		lp->qfull = 0;
 	fc->fcoe_pending_queue_active = 0;
-	rc = fc->fcoe_pending_queue.qlen;
 out:
+	if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
+		lp->qfull = 1;
 	spin_unlock_bh(&fc->fcoe_pending_queue.lock);
-	return rc;
+	return;
 }
 
 /**
-- 
cgit v0.10.2


From 1047f22108bd9bfedefd3ff014cb56691dfbaa3f Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Wed, 6 May 2009 10:52:40 -0700
Subject: [SCSI] fcoe: removes fcoe_watchdog

Removes periodic fcoe_watchdog timer used across all fcoe interface
maintained in fcoe_hostlist instead added new fcoe_queue_timer
per fcoe interface.

Added timer is armed only when some pending skb need to be flushed
as oppose to periodic 1 second fcoe_watchdog, since now
fcoe_queue_timer is used on demand thus set this to 2 jiffies.

Now fcoe_queue_timer is much simple than fcoe_watchdog using lock to
process all fcoe interface from fcoe_hostlist.

I noticed +ve performance result with using 2 jiffies timer as
this helps flushing fcoe_pending_queue quickly.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 30eba75..6e7a700 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -54,7 +54,6 @@ MODULE_LICENSE("GPL v2");
 /* fcoe host list */
 LIST_HEAD(fcoe_hostlist);
 DEFINE_RWLOCK(fcoe_hostlist_lock);
-DEFINE_TIMER(fcoe_timer, NULL, 0, 0);
 DEFINE_PER_CPU(struct fcoe_percpu_s, fcoe_percpu);
 
 /* Function Prototypes */
@@ -168,6 +167,18 @@ static int fcoe_lport_config(struct fc_lport *lp)
 }
 
 /**
+ * fcoe_queue_timer() - fcoe queue timer
+ * @lp: the fc_lport pointer
+ *
+ * Calls fcoe_check_wait_queue on timeout
+ *
+ */
+static void fcoe_queue_timer(ulong lp)
+{
+	fcoe_check_wait_queue((struct fc_lport *)lp, NULL);
+}
+
+/**
  * fcoe_netdev_config() - Set up netdev for SW FCoE
  * @lp : ptr to the fc_lport
  * @netdev : ptr to the associated netdevice struct
@@ -237,6 +248,7 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev)
 	}
 	skb_queue_head_init(&fc->fcoe_pending_queue);
 	fc->fcoe_pending_queue_active = 0;
+	setup_timer(&fc->timer, fcoe_queue_timer, (unsigned long)lp);
 
 	/* setup Source Mac Address */
 	memcpy(fc->ctlr.ctl_src_addr, fc->real_dev->dev_addr,
@@ -387,6 +399,9 @@ static int fcoe_if_destroy(struct net_device *netdev)
 	/* Free existing skbs */
 	fcoe_clean_pending_queue(lp);
 
+	/* Stop the timer */
+	del_timer_sync(&fc->timer);
+
 	/* Free memory used by statistical counters */
 	fc_lport_free_stats(lp);
 
@@ -1260,32 +1275,6 @@ int fcoe_percpu_receive_thread(void *arg)
 }
 
 /**
- * fcoe_watchdog() - fcoe timer callback
- * @vp:
- *
- * This checks the pending queue length for fcoe and set lport qfull
- * if the FCOE_MAX_QUEUE_DEPTH is reached. This is done for all fc_lport on the
- * fcoe_hostlist.
- *
- * Returns: 0 for success
- */
-void fcoe_watchdog(ulong vp)
-{
-	struct fcoe_softc *fc;
-
-	read_lock(&fcoe_hostlist_lock);
-	list_for_each_entry(fc, &fcoe_hostlist, list) {
-		if (fc->ctlr.lp)
-			fcoe_check_wait_queue(fc->ctlr.lp, NULL);
-	}
-	read_unlock(&fcoe_hostlist_lock);
-
-	fcoe_timer.expires = jiffies + (1 * HZ);
-	add_timer(&fcoe_timer);
-}
-
-
-/**
  * fcoe_check_wait_queue() - attempt to clear the transmit backlog
  * @lp: the fc_lport
  *
@@ -1333,6 +1322,8 @@ static void fcoe_check_wait_queue(struct fc_lport *lp, struct sk_buff *skb)
 
 	if (fc->fcoe_pending_queue.qlen < FCOE_LOW_QUEUE_DEPTH)
 		lp->qfull = 0;
+	if (fc->fcoe_pending_queue.qlen && !timer_pending(&fc->timer))
+		mod_timer(&fc->timer, jiffies + 2);
 	fc->fcoe_pending_queue_active = 0;
 out:
 	if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
@@ -1809,10 +1800,6 @@ static int __init fcoe_init(void)
 	/* Setup link change notification */
 	fcoe_dev_setup();
 
-	setup_timer(&fcoe_timer, fcoe_watchdog, 0);
-
-	mod_timer(&fcoe_timer, jiffies + (10 * HZ));
-
 	fcoe_if_init();
 
 	return 0;
@@ -1838,9 +1825,6 @@ static void __exit fcoe_exit(void)
 
 	fcoe_dev_cleanup();
 
-	/* Stop the timer */
-	del_timer_sync(&fcoe_timer);
-
 	/* releases the associated fcoe hosts */
 	list_for_each_entry_safe(fc, tmp, &fcoe_hostlist, list)
 		fcoe_if_destroy(fc->real_dev);
diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h
index 917aae8..a1eb8c1 100644
--- a/drivers/scsi/fcoe/fcoe.h
+++ b/drivers/scsi/fcoe/fcoe.h
@@ -61,6 +61,7 @@ struct fcoe_softc {
 	struct packet_type  fip_packet_type;
 	struct sk_buff_head fcoe_pending_queue;
 	u8	fcoe_pending_queue_active;
+	struct timer_list timer;		/* queue timer */
 	struct fcoe_ctlr ctlr;
 };
 
-- 
cgit v0.10.2


From 4e57e1cbbd1435b523b9cedb949728e9fdcfb5d4 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Wed, 6 May 2009 10:52:46 -0700
Subject: [SCSI] fcoe: removes reserving memory for vlan_ethdr on tx path

This is not required as VLAN header is added by device
interface driver, this was causing bad FC_CRC in FCoE pkts when
using VLAN interface.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 6e7a700..e606b48 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1037,8 +1037,7 @@ int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp)
 	sof = fr_sof(fp);
 	eof = fr_eof(fp);
 
-	elen = (fc->real_dev->priv_flags & IFF_802_1Q_VLAN) ?
-		sizeof(struct vlan_ethhdr) : sizeof(struct ethhdr);
+	elen = sizeof(struct ethhdr);
 	hlen = sizeof(struct fcoe_hdr);
 	tlen = sizeof(struct fcoe_crc_eof);
 	wlen = (skb->len - tlen + sizeof(crc)) / FCOE_WORD_TO_BYTE;
-- 
cgit v0.10.2


From 226c7ffe74474257b4b87bd38ae8ba0030cf65e2 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 6 May 2009 10:52:51 -0700
Subject: [SCSI] net, libfcoe: Add the FCoE Initialization Protocol ethertype

FIP is the FCoE Initialization Protocol and this patch
adds the protocol ethertype to the kernel's list of
ethertypes.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index cfe4fe1..60e8934 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -79,6 +79,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
+#define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 /*
diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h
index 0627a9a..3d138c1 100644
--- a/include/scsi/fc/fc_fip.h
+++ b/include/scsi/fc/fc_fip.h
@@ -22,13 +22,6 @@
  * http://www.t11.org/ftp/t11/pub/fc/bb-5/08-543v1.pdf
  */
 
-/*
- * The FIP ethertype eventually goes in net/if_ether.h.
- */
-#ifndef ETH_P_FIP
-#define ETH_P_FIP	0x8914	/* FIP Ethertype */
-#endif
-
 #define FIP_DEF_PRI	128	/* default selection priority */
 #define FIP_DEF_FC_MAP	0x0efc00 /* default FCoE MAP (MAC OUI) value */
 #define FIP_DEF_FKA	8000	/* default FCF keep-alive/advert period (mS) */
-- 
cgit v0.10.2


From 3af968e066d593bc4dacc021715f3e95ddf0996f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 8 Jun 2009 12:31:53 -0700
Subject: async: Fix lack of boot-time console due to insufficient
 synchronization

Our async work synchronization was broken by "async: make sure
independent async domains can't accidentally entangle" (commit
d5a877e8dd409d8c702986d06485c374b705d340), because it would report
the wrong lowest active async ID when there was both running and
pending async work.

This caused things like no being able to read the root filesystem,
resulting in missing console devices and inability to run 'init',
causing a boot-time panic.

This fixes it by properly returning the lowest pending async ID: if
there is any running async work, that will have a lower ID than any
pending work, and we should _not_ look at the pending work list.

There were alternative patches from Jaswinder and James, but this one
also cleans up the code by removing the pointless 'ret' variable and
the unnecesary testing for an empty list around 'for_each_entry()' (if
the list is empty, the for_each_entry() thing just won't execute).

Fixes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13474
Reported-and-tested-by: Chris Clayton <chris2553@googlemail.com>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/async.c b/kernel/async.c
index 5054030..27235f5 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -92,23 +92,18 @@ extern int initcall_debug;
 static async_cookie_t  __lowest_in_progress(struct list_head *running)
 {
 	struct async_entry *entry;
-	async_cookie_t ret = next_cookie; /* begin with "infinity" value */
 
 	if (!list_empty(running)) {
 		entry = list_first_entry(running,
 			struct async_entry, list);
-		ret = entry->cookie;
+		return entry->cookie;
 	}
 
-	if (!list_empty(&async_pending)) {
-		list_for_each_entry(entry, &async_pending, list)
-			if (entry->running == running) {
-				ret = entry->cookie;
-				break;
-			}
-	}
+	list_for_each_entry(entry, &async_pending, list)
+		if (entry->running == running)
+			return entry->cookie;
 
-	return ret;
+	return next_cookie;	/* "infinity" value */
 }
 
 static async_cookie_t  lowest_in_progress(struct list_head *running)
-- 
cgit v0.10.2


From 04145f2bfbc3ac4dbb71fa085b82380444d50b4f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 8 Jun 2009 21:31:50 +0200
Subject: ALSA: ctxfi - Add use_system_timer module option

Added use_system_timer module option to force to use the system timer
instead of emu20k1 timer irq for debugging.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
index ec869a4..779c6c3 100644
--- a/sound/pci/ctxfi/cttimer.c
+++ b/sound/pci/ctxfi/cttimer.c
@@ -8,12 +8,17 @@
 
 #include <linux/slab.h>
 #include <linux/math64.h>
+#include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include "ctatc.h"
 #include "cthardware.h"
 #include "cttimer.h"
 
+static int use_system_timer;
+MODULE_PARM_DESC(use_system_timer, "Foce to use system-timer");
+module_param(use_system_timer, bool, S_IRUGO);
+
 struct ct_timer_ops {
 	void (*init)(struct ct_timer_instance *);
 	void (*prepare)(struct ct_timer_instance *);
@@ -390,8 +395,6 @@ void ct_timer_instance_free(struct ct_timer_instance *ti)
  * timer manager
  */
 
-#define USE_SYSTEM_TIMER	0
-
 static void ct_timer_interrupt(void *data, unsigned int status)
 {
 	struct ct_timer *timer = data;
@@ -415,7 +418,7 @@ struct ct_timer *ct_timer_new(struct ct_atc *atc)
 	INIT_LIST_HEAD(&atimer->running_head);
 	atimer->atc = atc;
 	hw = atc->hw;
-	if (!USE_SYSTEM_TIMER && hw->set_timer_irq) {
+	if (!use_system_timer && hw->set_timer_irq) {
 		snd_printd(KERN_INFO "ctxfi: Use xfi-native timer\n");
 		atimer->ops = &ct_xfitimer_ops;
 		hw->irq_callback_data = atimer;
-- 
cgit v0.10.2


From 7f774025171f626fc1a6a97781967c84a869d277 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:12 -0700
Subject: [SCSI] qla2xxx: Export negotiated fabric-parameters for application
 support.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index fc30f8e..524ceb9 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1115,6 +1115,15 @@ qla2x00_vn_port_mac_address_show(struct device *dev,
 	    vha->fcoe_vn_port_mac[1], vha->fcoe_vn_port_mac[0]);
 }
 
+static ssize_t
+qla2x00_fabric_param_show(struct device *dev, struct device_attribute *attr,
+    char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vha->hw->switch_cap);
+}
+
 static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_drvr_version_show, NULL);
 static DEVICE_ATTR(fw_version, S_IRUGO, qla2x00_fw_version_show, NULL);
 static DEVICE_ATTR(serial_num, S_IRUGO, qla2x00_serial_num_show, NULL);
@@ -1146,6 +1155,7 @@ static DEVICE_ATTR(flash_block_size, S_IRUGO, qla2x00_flash_block_size_show,
 static DEVICE_ATTR(vlan_id, S_IRUGO, qla2x00_vlan_id_show, NULL);
 static DEVICE_ATTR(vn_port_mac_address, S_IRUGO,
 		   qla2x00_vn_port_mac_address_show, NULL);
+static DEVICE_ATTR(fabric_param, S_IRUGO, qla2x00_fabric_param_show, NULL);
 
 struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_driver_version,
@@ -1170,6 +1180,7 @@ struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_flash_block_size,
 	&dev_attr_vlan_id,
 	&dev_attr_vn_port_mac_address,
+	&dev_attr_fabric_param,
 	NULL,
 };
 
-- 
cgit v0.10.2


From ce0423f4a23317d0166addd7d6fcc4a0fa95e751 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:13 -0700
Subject: [SCSI] qla2xxx: Export XGMAC statistics on supported ISPs.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 524ceb9..e8c1c9e 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -692,6 +692,58 @@ static struct bin_attribute sysfs_edc_status_attr = {
 	.read = qla2x00_sysfs_read_edc_status,
 };
 
+static ssize_t
+qla2x00_sysfs_read_xgmac_stats(struct kobject *kobj,
+		       struct bin_attribute *bin_attr,
+		       char *buf, loff_t off, size_t count)
+{
+	struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	struct qla_hw_data *ha = vha->hw;
+	int rval;
+	uint16_t actual_size;
+
+	if (!capable(CAP_SYS_ADMIN) || off != 0 || count > XGMAC_DATA_SIZE)
+		return 0;
+
+	if (ha->xgmac_data)
+		goto do_read;
+
+	ha->xgmac_data = dma_alloc_coherent(&ha->pdev->dev, XGMAC_DATA_SIZE,
+	    &ha->xgmac_data_dma, GFP_KERNEL);
+	if (!ha->xgmac_data) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to allocate memory for XGMAC read-data.\n");
+		return 0;
+	}
+
+do_read:
+	actual_size = 0;
+	memset(ha->xgmac_data, 0, XGMAC_DATA_SIZE);
+
+	rval = qla2x00_get_xgmac_stats(vha, ha->xgmac_data_dma,
+	    XGMAC_DATA_SIZE, &actual_size);
+	if (rval != QLA_SUCCESS) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to read XGMAC data (%x).\n", rval);
+		count = 0;
+	}
+
+	count = actual_size > count ? count: actual_size;
+	memcpy(buf, ha->xgmac_data, count);
+
+	return count;
+}
+
+static struct bin_attribute sysfs_xgmac_stats_attr = {
+	.attr = {
+		.name = "xgmac_stats",
+		.mode = S_IRUSR,
+	},
+	.size = 0,
+	.read = qla2x00_sysfs_read_xgmac_stats,
+};
+
 static struct sysfs_entry {
 	char *name;
 	struct bin_attribute *attr;
@@ -706,6 +758,7 @@ static struct sysfs_entry {
 	{ "reset", &sysfs_reset_attr, },
 	{ "edc", &sysfs_edc_attr, 2 },
 	{ "edc_status", &sysfs_edc_status_attr, 2 },
+	{ "xgmac_stats", &sysfs_xgmac_stats_attr, 3 },
 	{ NULL },
 };
 
@@ -721,6 +774,8 @@ qla2x00_alloc_sysfs_attr(scsi_qla_host_t *vha)
 			continue;
 		if (iter->is4GBp_only == 2 && !IS_QLA25XX(vha->hw))
 			continue;
+		if (iter->is4GBp_only == 3 && !IS_QLA81XX(vha->hw))
+			continue;
 
 		ret = sysfs_create_bin_file(&host->shost_gendev.kobj,
 		    iter->attr);
@@ -743,6 +798,8 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *vha)
 			continue;
 		if (iter->is4GBp_only == 2 && !IS_QLA25XX(ha))
 			continue;
+		if (iter->is4GBp_only == 3 && !IS_QLA81XX(ha))
+			continue;
 
 		sysfs_remove_bin_file(&host->shost_gendev.kobj,
 		    iter->attr);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 721bae9..da941be 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2397,6 +2397,10 @@ struct qla_hw_data {
 	dma_addr_t	edc_data_dma;
 	uint16_t	edc_data_len;
 
+#define XGMAC_DATA_SIZE	PAGE_SIZE
+	void		*xgmac_data;
+	dma_addr_t	xgmac_data_dma;
+
 	struct task_struct	*dpc_thread;
 	uint8_t dpc_active;                  /* DPC routine is active */
 
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index f389f3d..80ab46c 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1405,6 +1405,7 @@ struct access_chip_rsp_84xx {
 #define MBC_IDC_ACK		0x101
 #define MBC_RESTART_MPI_FW	0x3d
 #define MBC_FLASH_ACCESS_CTRL	0x3e	/* Control flash access. */
+#define MBC_GET_XGMAC_STATS	0x7a
 
 /* Flash access control option field bit definitions */
 #define FAC_OPT_FORCE_SEMAPHORE		BIT_15
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index f17d525..66ba399 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -293,6 +293,9 @@ qla81xx_fac_do_write_enable(scsi_qla_host_t *, int);
 extern int
 qla81xx_fac_erase_sector(scsi_qla_host_t *, uint32_t, uint32_t);
 
+extern int
+qla2x00_get_xgmac_stats(scsi_qla_host_t *, dma_addr_t, uint16_t, uint16_t *);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 7d0eeec..2497fe4 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3462,3 +3462,41 @@ qla2x00_write_edc(scsi_qla_host_t *vha, uint16_t dev, uint16_t adr,
 
 	return rval;
 }
+
+int
+qla2x00_get_xgmac_stats(scsi_qla_host_t *vha, dma_addr_t stats_dma,
+    uint16_t size_in_bytes, uint16_t *actual_size)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA81XX(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_GET_XGMAC_STATS;
+	mcp->mb[2] = MSW(stats_dma);
+	mcp->mb[3] = LSW(stats_dma);
+	mcp->mb[6] = MSW(MSD(stats_dma));
+	mcp->mb[7] = LSW(MSD(stats_dma));
+	mcp->mb[8] = size_in_bytes >> 2;
+	mcp->out_mb = MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
+	mcp->in_mb = MBX_2|MBX_1|MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=0x%x "
+		    "mb[1]=0x%x mb[2]=0x%x.\n", __func__, vha->host_no, rval,
+		    mcp->mb[0], mcp->mb[1], mcp->mb[2]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+
+		*actual_size = mcp->mb[2] << 2;
+	}
+
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index f4f5355..642e976 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2434,6 +2434,10 @@ qla2x00_mem_free(struct qla_hw_data *ha)
 		vfree(ha->fw_dump);
 	}
 
+	if (ha->xgmac_data)
+		dma_free_coherent(&ha->pdev->dev, XGMAC_DATA_SIZE,
+		    ha->xgmac_data, ha->xgmac_data_dma);
+
 	if (ha->sns_cmd)
 		dma_free_coherent(&ha->pdev->dev, sizeof(struct sns_cmd_pkt),
 		ha->sns_cmd, ha->sns_cmd_dma);
-- 
cgit v0.10.2


From 11bbc1d896637c1d83b11cc3b97ed3d6d2076c63 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:14 -0700
Subject: [SCSI] qla2xxx: Export TLV data on supported ISPs.

Firmware currently provides PB and PGF TLVs.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index e8c1c9e..9aa00f2 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -744,6 +744,57 @@ static struct bin_attribute sysfs_xgmac_stats_attr = {
 	.read = qla2x00_sysfs_read_xgmac_stats,
 };
 
+static ssize_t
+qla2x00_sysfs_read_dcbx_tlv(struct kobject *kobj,
+		       struct bin_attribute *bin_attr,
+		       char *buf, loff_t off, size_t count)
+{
+	struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	struct qla_hw_data *ha = vha->hw;
+	int rval;
+	uint16_t actual_size;
+
+	if (!capable(CAP_SYS_ADMIN) || off != 0 || count > DCBX_TLV_DATA_SIZE)
+		return 0;
+
+	if (ha->dcbx_tlv)
+		goto do_read;
+
+	ha->dcbx_tlv = dma_alloc_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE,
+	    &ha->dcbx_tlv_dma, GFP_KERNEL);
+	if (!ha->dcbx_tlv) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to allocate memory for DCBX TLV read-data.\n");
+		return 0;
+	}
+
+do_read:
+	actual_size = 0;
+	memset(ha->dcbx_tlv, 0, DCBX_TLV_DATA_SIZE);
+
+	rval = qla2x00_get_dcbx_params(vha, ha->dcbx_tlv_dma,
+	    DCBX_TLV_DATA_SIZE);
+	if (rval != QLA_SUCCESS) {
+		qla_printk(KERN_WARNING, ha,
+		    "Unable to read DCBX TLV data (%x).\n", rval);
+		count = 0;
+	}
+
+	memcpy(buf, ha->dcbx_tlv, count);
+
+	return count;
+}
+
+static struct bin_attribute sysfs_dcbx_tlv_attr = {
+	.attr = {
+		.name = "dcbx_tlv",
+		.mode = S_IRUSR,
+	},
+	.size = 0,
+	.read = qla2x00_sysfs_read_dcbx_tlv,
+};
+
 static struct sysfs_entry {
 	char *name;
 	struct bin_attribute *attr;
@@ -759,6 +810,7 @@ static struct sysfs_entry {
 	{ "edc", &sysfs_edc_attr, 2 },
 	{ "edc_status", &sysfs_edc_status_attr, 2 },
 	{ "xgmac_stats", &sysfs_xgmac_stats_attr, 3 },
+	{ "dcbx_tlv", &sysfs_dcbx_tlv_attr, 3 },
 	{ NULL },
 };
 
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index da941be..bb6bfd7 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2401,6 +2401,10 @@ struct qla_hw_data {
 	void		*xgmac_data;
 	dma_addr_t	xgmac_data_dma;
 
+#define DCBX_TLV_DATA_SIZE PAGE_SIZE
+	void		*dcbx_tlv;
+	dma_addr_t	dcbx_tlv_dma;
+
 	struct task_struct	*dpc_thread;
 	uint8_t dpc_active;                  /* DPC routine is active */
 
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 80ab46c..152d16c 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1406,6 +1406,7 @@ struct access_chip_rsp_84xx {
 #define MBC_RESTART_MPI_FW	0x3d
 #define MBC_FLASH_ACCESS_CTRL	0x3e	/* Control flash access. */
 #define MBC_GET_XGMAC_STATS	0x7a
+#define MBC_GET_DCBX_PARAMS	0x51
 
 /* Flash access control option field bit definitions */
 #define FAC_OPT_FORCE_SEMAPHORE		BIT_15
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 66ba399..fbf9972 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -296,6 +296,9 @@ qla81xx_fac_erase_sector(scsi_qla_host_t *, uint32_t, uint32_t);
 extern int
 qla2x00_get_xgmac_stats(scsi_qla_host_t *, dma_addr_t, uint16_t, uint16_t *);
 
+extern int
+qla2x00_get_dcbx_params(scsi_qla_host_t *, dma_addr_t, uint16_t);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 2497fe4..e0fee48 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3500,3 +3500,40 @@ qla2x00_get_xgmac_stats(scsi_qla_host_t *vha, dma_addr_t stats_dma,
 
 	return rval;
 }
+
+int
+qla2x00_get_dcbx_params(scsi_qla_host_t *vha, dma_addr_t tlv_dma,
+    uint16_t size)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA81XX(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_GET_DCBX_PARAMS;
+	mcp->mb[1] = 0;
+	mcp->mb[2] = MSW(tlv_dma);
+	mcp->mb[3] = LSW(tlv_dma);
+	mcp->mb[6] = MSW(MSD(tlv_dma));
+	mcp->mb[7] = LSW(MSD(tlv_dma));
+	mcp->mb[8] = size;
+	mcp->out_mb = MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_2|MBX_1|MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=0x%x "
+		    "mb[1]=0x%x mb[2]=0x%x.\n", __func__, vha->host_no, rval,
+		    mcp->mb[0], mcp->mb[1], mcp->mb[2]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+	}
+
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 642e976..7415ead 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2434,6 +2434,10 @@ qla2x00_mem_free(struct qla_hw_data *ha)
 		vfree(ha->fw_dump);
 	}
 
+	if (ha->dcbx_tlv)
+		dma_free_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE,
+		    ha->dcbx_tlv, ha->dcbx_tlv_dma);
+
 	if (ha->xgmac_data)
 		dma_free_coherent(&ha->pdev->dev, XGMAC_DATA_SIZE,
 		    ha->xgmac_data, ha->xgmac_data_dma);
-- 
cgit v0.10.2


From 1b91a2e6712393cffc33eeff5cf857f7d5b62fed Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:15 -0700
Subject: [SCSI] qla2xxx: Correct logic-bug in set-model-info().

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 33e9248..a7abc10 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1538,7 +1538,7 @@ qla2x00_set_model_info(scsi_qla_host_t *vha, uint8_t *model, size_t len,
 	char *st, *en;
 	uint16_t index;
 	struct qla_hw_data *ha = vha->hw;
-	int use_tbl = !IS_QLA25XX(ha) && IS_QLA81XX(ha);
+	int use_tbl = !IS_QLA25XX(ha) && !IS_QLA81XX(ha);
 
 	if (memcmp(model, BINZERO, len) != 0) {
 		strncpy(ha->model_number, model, len);
-- 
cgit v0.10.2


From 40859ae5f13534624cc35a05179b4f93ecbf531a Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:16 -0700
Subject: [SCSI] qla2xxx: Correct queue-creation bug when driver loaded in QoS
 mode.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 9aa00f2..2bd017f 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1651,13 +1651,13 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 	qla24xx_vport_disable(fc_vport, disable);
 
 	ret = 0;
-	if (ha->cur_vport_count <= ha->flex_port_count || ql2xmultique_tag
-		|| ha->max_req_queues == 1 || !ha->npiv_info)
+	if (ql2xmaxqueues == 1 || ql2xmultique_tag || !ha->npiv_info)
 		goto vport_queue;
 	/* Create a request queue in QoS mode for the vport */
-	for (cnt = ha->flex_port_count; cnt < ha->nvram_npiv_size; cnt++) {
-		if (ha->npiv_info[cnt].port_name == vha->port_name &&
-			ha->npiv_info[cnt].node_name == vha->node_name) {
+	for (cnt = 0; cnt < ha->nvram_npiv_size; cnt++) {
+		if (memcmp(ha->npiv_info[cnt].port_name, vha->port_name, 8) == 0
+			&& memcmp(ha->npiv_info[cnt].node_name, vha->node_name,
+			8) == 0) {
 			qos = ha->npiv_info[cnt].q_qos;
 			break;
 		}
@@ -1671,8 +1671,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 			vha->vp_idx);
 		else
 			DEBUG2(qla_printk(KERN_INFO, ha,
-			"Request Que:%d created for vp_idx:%d\n",
-			ret, vha->vp_idx));
+			"Request Que:%d (QoS: %d) created for vp_idx:%d\n",
+			ret, qos, vha->vp_idx));
 	}
 
 vport_queue:
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index bb6bfd7..4e846ae 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2545,7 +2545,6 @@ struct qla_hw_data {
 	uint16_t        num_vsans;      /* number of vsan created */
 	uint16_t        max_npiv_vports;        /* 63 or 125 per topoloty */
 	int             cur_vport_count;
-	uint16_t	flex_port_count;
 
 	struct qla_chip_state_84xx *cs84xx;
 	struct qla_statistics qla_stats;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 7415ead..181ed97 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1853,6 +1853,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		ha->init_cb_size = sizeof(struct mid_init_cb_81xx);
 		ha->gid_list_info_size = 8;
 		ha->optrom_size = OPTROM_SIZE_81XX;
+		ha->nvram_npiv_size = QLA_MAX_VPORTS_QLA25XX;
 		ha->isp_ops = &qla81xx_isp_ops;
 		ha->flash_conf_off = FARX_ACCESS_FLASH_CONF_81XX;
 		ha->flash_data_off = FARX_ACCESS_FLASH_DATA_81XX;
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 22f97eb..e239203 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -920,12 +920,13 @@ qla2xxx_flash_npiv_conf(scsi_qla_host_t *vha)
 
 	entry = data + sizeof(struct qla_npiv_header);
 	cnt = le16_to_cpu(hdr.entries);
-	ha->flex_port_count = cnt;
 	for (i = 0; cnt; cnt--, entry++, i++) {
 		uint16_t flags;
 		struct fc_vport_identifiers vid;
 		struct fc_vport *vport;
 
+		memcpy(&ha->npiv_info[i], entry, sizeof(struct qla_npiv_entry));
+
 		flags = le16_to_cpu(entry->flags);
 		if (flags == 0xffff)
 			continue;
@@ -939,9 +940,7 @@ qla2xxx_flash_npiv_conf(scsi_qla_host_t *vha)
 		vid.port_name = wwn_to_u64(entry->port_name);
 		vid.node_name = wwn_to_u64(entry->node_name);
 
-		memcpy(&ha->npiv_info[i], entry, sizeof(struct qla_npiv_entry));
-
-		DEBUG2(qla_printk(KERN_DEBUG, ha, "NPIV[%02x]: wwpn=%llx "
+		DEBUG2(qla_printk(KERN_INFO, ha, "NPIV[%02x]: wwpn=%llx "
 			"wwnn=%llx vf_id=0x%x Q_qos=0x%x F_qos=0x%x.\n", cnt,
 			vid.port_name, vid.node_name, le16_to_cpu(entry->vf_id),
 			entry->q_qos, entry->f_qos));
@@ -957,7 +956,6 @@ qla2xxx_flash_npiv_conf(scsi_qla_host_t *vha)
 	}
 done:
 	kfree(data);
-	ha->npiv_info = NULL;
 }
 
 static int
-- 
cgit v0.10.2


From cbc8eb67da11a4972834f61fe4729f4c037a17c9 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:17 -0700
Subject: [SCSI] qla2xxx: Fallback to 'golden-firmware' operation on supported
 ISPs.

In case the onboard firmware is unable to be read or loaded for
operation, attempt to fallback to a limited-operational firmware
image stored in a different flash region.  This will allow a user
to reflash and correct a board with proper operational firmware.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 4e846ae..88ddae0 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2223,6 +2223,7 @@ struct qla_hw_data {
 		uint32_t	fac_supported		:1;
 		uint32_t	chip_reset_done		:1;
 		uint32_t	port0			:1;
+		uint32_t	running_gold_fw		:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
@@ -2523,6 +2524,7 @@ struct qla_hw_data {
 	uint32_t        flt_region_vpd;
 	uint32_t        flt_region_nvram;
 	uint32_t        flt_region_npiv_conf;
+	uint32_t	flt_region_gold_fw;
 
 	/* Needed for BEACON */
 	uint16_t        beacon_blink_led;
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 152d16c..9e56d4a 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1241,6 +1241,7 @@ struct qla_flt_header {
 #define FLT_REG_HW_EVENT_1	0x1f
 #define FLT_REG_NPIV_CONF_0	0x29
 #define FLT_REG_NPIV_CONF_1	0x2a
+#define FLT_REG_GOLD_FW		0x2f
 
 struct qla_flt_region {
 	uint32_t code;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a7abc10..34e6508 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3806,11 +3806,11 @@ qla24xx_nvram_config(scsi_qla_host_t *vha)
 }
 
 static int
-qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr)
+qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr,
+    uint32_t faddr)
 {
 	int	rval = QLA_SUCCESS;
 	int	segments, fragment;
-	uint32_t faddr;
 	uint32_t *dcode, dlen;
 	uint32_t risc_addr;
 	uint32_t risc_size;
@@ -3819,12 +3819,11 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr)
 	struct req_que *req = ha->req_q_map[0];
 
 	qla_printk(KERN_INFO, ha,
-	    "FW: Loading from flash (%x)...\n", ha->flt_region_fw);
+	    "FW: Loading from flash (%x)...\n", faddr);
 
 	rval = QLA_SUCCESS;
 
 	segments = FA_RISC_CODE_SEGMENTS;
-	faddr = ha->flt_region_fw;
 	dcode = (uint32_t *)req->ring;
 	*srisc_addr = 0;
 
@@ -4124,27 +4123,45 @@ qla24xx_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
 	if (rval == QLA_SUCCESS)
 		return rval;
 
-	return qla24xx_load_risc_flash(vha, srisc_addr);
+	return qla24xx_load_risc_flash(vha, srisc_addr,
+	    vha->hw->flt_region_fw);
 }
 
 int
 qla81xx_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
 {
 	int rval;
+	struct qla_hw_data *ha = vha->hw;
 
 	if (ql2xfwloadbin == 2)
-		return qla24xx_load_risc(vha, srisc_addr);
+		goto try_blob_fw;
 
 	/*
 	 * FW Load priority:
 	 * 1) Firmware residing in flash.
 	 * 2) Firmware via request-firmware interface (.bin file).
+	 * 3) Golden-Firmware residing in flash -- limited operation.
 	 */
-	rval = qla24xx_load_risc_flash(vha, srisc_addr);
+	rval = qla24xx_load_risc_flash(vha, srisc_addr, ha->flt_region_fw);
 	if (rval == QLA_SUCCESS)
 		return rval;
 
-	return qla24xx_load_risc_blob(vha, srisc_addr);
+try_blob_fw:
+	rval = qla24xx_load_risc_blob(vha, srisc_addr);
+	if (rval == QLA_SUCCESS || !ha->flt_region_gold_fw)
+		return rval;
+
+	qla_printk(KERN_ERR, ha,
+	    "FW: Attempting to fallback to golden firmware...\n");
+	rval = qla24xx_load_risc_flash(vha, srisc_addr, ha->flt_region_gold_fw);
+	if (rval != QLA_SUCCESS)
+		return rval;
+
+	qla_printk(KERN_ERR, ha,
+	    "FW: Please update operational firmware...\n");
+	ha->flags.running_gold_fw = 1;
+
+	return rval;
 }
 
 void
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 181ed97..128b3d5 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1690,6 +1690,9 @@ qla2xxx_scan_start(struct Scsi_Host *shost)
 {
 	scsi_qla_host_t *vha = shost_priv(shost);
 
+	if (vha->hw->flags.running_gold_fw)
+		return;
+
 	set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 	set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 	set_bit(RSCN_UPDATE, &vha->dpc_flags);
@@ -1962,6 +1965,9 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 				"Can't create queues, falling back to single"
 				" queue mode\n");
 
+	if (ha->flags.running_gold_fw)
+		goto skip_dpc;
+
 	/*
 	 * Startup the kernel thread for this host adapter
 	 */
@@ -1974,6 +1980,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto probe_failed;
 	}
 
+skip_dpc:
 	list_add_tail(&base_vha->list, &ha->vp_list);
 	base_vha->host->irq = ha->pdev->irq;
 
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index e239203..6260505 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -728,6 +728,9 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
 			if (!ha->flags.port0)
 				ha->flt_region_npiv_conf = start;
 			break;
+		case FLT_REG_GOLD_FW:
+			ha->flt_region_gold_fw = start;
+			break;
 		}
 	}
 	goto done;
-- 
cgit v0.10.2


From 94b3aa47ac1ea0aa31b3f59ad121cdf55e038594 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:18 -0700
Subject: [SCSI] qla2xxx: Use 'proper' DID_* status code for dropped-frame
 scenarios.

The SCSI-midlayer's fast-fail codes consider an DID_ERROR status
as a driver-error and the failed I/O would then be retried in the
midlayer without being fast-failed to dm-multipath.  DID_BUS_BUSY
status returns would induce unneeded path-failures events being
propagated to the DM/MD.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index eb35d20..c8d0a17 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1211,7 +1211,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 			/*
 			 * If RISC reports underrun and target does not report
 			 * it then we must have a lost frame, so tell upper
-			 * layer to retry it by reporting a bus busy.
+			 * layer to retry it by reporting an error.
 			 */
 			if (!(scsi_status & SS_RESIDUAL_UNDER)) {
 				DEBUG2(printk("scsi(%ld:%d:%d:%d) Dropped "
@@ -1221,7 +1221,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 					cp->device->id, cp->device->lun, resid,
 					scsi_bufflen(cp)));
 
-				cp->result = DID_BUS_BUSY << 16;
+				cp->result = DID_ERROR << 16;
 				break;
 			}
 
-- 
cgit v0.10.2


From 59e0b8b088031b3b751f0608f797f2581f49a827 Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:19 -0700
Subject: [SCSI] qla2xxx: Correct NULL pointer bug in cpu affinity mode.

This patch fixes a NULL pointer bug that occurs when IO is being
carried out on a vport for which the cpu affinity mode is turned on.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 2bd017f..74e6970 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1595,6 +1595,7 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 	struct qla_hw_data *ha = base_vha->hw;
 	uint16_t options = 0;
 	int	cnt;
+	struct req_que *req = ha->req_q_map[0];
 
 	ret = qla24xx_vport_create_req_sanity_check(fc_vport);
 	if (ret) {
@@ -1650,14 +1651,16 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 
 	qla24xx_vport_disable(fc_vport, disable);
 
-	ret = 0;
-	if (ql2xmaxqueues == 1 || ql2xmultique_tag || !ha->npiv_info)
+	if (ql2xmultique_tag) {
+		req = ha->req_q_map[1];
+		goto vport_queue;
+	} else if (ql2xmaxqueues == 1 || !ha->npiv_info)
 		goto vport_queue;
 	/* Create a request queue in QoS mode for the vport */
 	for (cnt = 0; cnt < ha->nvram_npiv_size; cnt++) {
 		if (memcmp(ha->npiv_info[cnt].port_name, vha->port_name, 8) == 0
 			&& memcmp(ha->npiv_info[cnt].node_name, vha->node_name,
-			8) == 0) {
+					8) == 0) {
 			qos = ha->npiv_info[cnt].q_qos;
 			break;
 		}
@@ -1669,14 +1672,16 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
 			qla_printk(KERN_WARNING, ha,
 			"Can't create request queue for vp_idx:%d\n",
 			vha->vp_idx);
-		else
+		else {
 			DEBUG2(qla_printk(KERN_INFO, ha,
 			"Request Que:%d (QoS: %d) created for vp_idx:%d\n",
 			ret, qos, vha->vp_idx));
+			req = ha->req_q_map[ret];
+		}
 	}
 
 vport_queue:
-	vha->req = ha->req_q_map[ret];
+	vha->req = req;
 	return 0;
 
 vport_create_failed_2:
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index b4c6010..13396be 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -15,7 +15,7 @@ static request_t *qla2x00_req_pkt(struct scsi_qla_host *, struct req_que *,
 							struct rsp_que *rsp);
 static void qla2x00_isp_cmd(struct scsi_qla_host *, struct req_que *);
 
-static void qla25xx_set_que(srb_t *, struct req_que **, struct rsp_que **);
+static void qla25xx_set_que(srb_t *, struct rsp_que **);
 /**
  * qla2x00_get_cmd_direction() - Determine control_flag data direction.
  * @cmd: SCSI command
@@ -722,7 +722,8 @@ qla24xx_start_scsi(srb_t *sp)
 	/* Setup device pointers. */
 	ret = 0;
 
-	qla25xx_set_que(sp, &req, &rsp);
+	qla25xx_set_que(sp, &rsp);
+	req = vha->req;
 
 	/* So we know we haven't pci_map'ed anything yet */
 	tot_dsds = 0;
@@ -845,20 +846,15 @@ queuing_error:
 	return QLA_FUNCTION_FAILED;
 }
 
-static void qla25xx_set_que(srb_t *sp, struct req_que **req,
-	struct rsp_que **rsp)
+static void qla25xx_set_que(srb_t *sp, struct rsp_que **rsp)
 {
 	struct scsi_cmnd *cmd = sp->cmd;
-	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct qla_hw_data *ha = sp->fcport->vha->hw;
 	int affinity = cmd->request->cpu;
 
 	if (ql2xmultique_tag && affinity >= 0 &&
-		affinity < ha->max_rsp_queues - 1) {
+		affinity < ha->max_rsp_queues - 1)
 		*rsp = ha->rsp_q_map[affinity + 1];
-		*req = ha->req_q_map[1];
-	} else {
-		*req = vha->req;
+	 else
 		*rsp = ha->rsp_q_map[0];
-	}
 }
-- 
cgit v0.10.2


From ca9e9c3eb118d0cb9dc2e5232f6f2dcaa4b7a5e0 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:20 -0700
Subject: [SCSI] qla2xxx: Check status of qla2x00_get_fw_version() call.

Unlike earlier ISPs, recent ISPs (ISP81xx) can in fact fail this
mailbox command.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index fbf9972..1ef18cc 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -148,7 +148,7 @@ qla2x00_dump_ram(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t);
 extern int
 qla2x00_execute_fw(scsi_qla_host_t *, uint32_t);
 
-extern void
+extern int
 qla2x00_get_fw_version(scsi_qla_host_t *, uint16_t *, uint16_t *, uint16_t *,
     uint16_t *, uint32_t *, uint8_t *, uint32_t *, uint8_t *);
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 34e6508..415fbf6 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -929,13 +929,16 @@ qla2x00_setup_chip(scsi_qla_host_t *vha)
 			/* Retrieve firmware information. */
 			if (rval == QLA_SUCCESS) {
 				fw_major_version = ha->fw_major_version;
-				qla2x00_get_fw_version(vha,
+				rval = qla2x00_get_fw_version(vha,
 				    &ha->fw_major_version,
 				    &ha->fw_minor_version,
 				    &ha->fw_subminor_version,
 				    &ha->fw_attributes, &ha->fw_memory_size,
 				    ha->mpi_version, &ha->mpi_capabilities,
 				    ha->phy_version);
+				if (rval != QLA_SUCCESS)
+					goto failed;
+
 				ha->flags.npiv_supported = 0;
 				if (IS_QLA2XXX_MIDTYPE(ha) &&
 					 (ha->fw_attributes & BIT_2)) {
@@ -987,7 +990,7 @@ qla2x00_setup_chip(scsi_qla_host_t *vha)
 			    ha->fw_subminor_version);
 		}
 	}
-
+failed:
 	if (rval) {
 		DEBUG2_3(printk("scsi(%ld): Setup chip **** FAILED ****.\n",
 		    vha->host_no));
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index e0fee48..b32eb69 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -408,7 +408,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr)
  * Context:
  *	Kernel context.
  */
-void
+int
 qla2x00_get_fw_version(scsi_qla_host_t *vha, uint16_t *major, uint16_t *minor,
     uint16_t *subminor, uint16_t *attributes, uint32_t *memory, uint8_t *mpi,
     uint32_t *mpi_caps, uint8_t *phy)
@@ -427,6 +427,8 @@ qla2x00_get_fw_version(scsi_qla_host_t *vha, uint16_t *major, uint16_t *minor,
 	mcp->flags = 0;
 	mcp->tov = MBX_TOV_SECONDS;
 	rval = qla2x00_mailbox_command(vha, mcp);
+	if (rval != QLA_SUCCESS)
+		goto failed;
 
 	/* Return mailbox data. */
 	*major = mcp->mb[1];
@@ -446,7 +448,7 @@ qla2x00_get_fw_version(scsi_qla_host_t *vha, uint16_t *major, uint16_t *minor,
 		phy[1] = mcp->mb[9] >> 8;
 		phy[2] = mcp->mb[9] & 0xff;
 	}
-
+failed:
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("%s(%ld): failed=%x.\n", __func__,
@@ -455,6 +457,7 @@ qla2x00_get_fw_version(scsi_qla_host_t *vha, uint16_t *major, uint16_t *minor,
 		/*EMPTY*/
 		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
 	}
+	return rval;
 }
 
 /*
-- 
cgit v0.10.2


From f4658b6ccc9d54b28b89004accc989db185b6a2e Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:21 -0700
Subject: [SCSI] qla2xxx: Mark a port's state as needing-rediscovery during
 link disruptions.

With RSCN states not being kept across qla2x00_configure_loop()
invocations, loop-resync distruptions during fabric-discovery may
cause ports to remain in a lost state.  Force state
renegotiation during a follow-on configure-loop iteration.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 415fbf6..d145de0 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2064,8 +2064,10 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 	if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) {
 		if (test_bit(LOCAL_LOOP_UPDATE, &save_flags))
 			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
-		if (test_bit(RSCN_UPDATE, &save_flags))
+		if (test_bit(RSCN_UPDATE, &save_flags)) {
 			set_bit(RSCN_UPDATE, &vha->dpc_flags);
+			vha->flags.rscn_queue_overflow = 1;
+		}
 	}
 
 	return (rval);
-- 
cgit v0.10.2


From 9f8fddeef2264a0315032b0aa2ee0052dad90076 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:22 -0700
Subject: [SCSI] qla2xxx: Add 10Gb iiDMA support.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 3dbb9e7..917534b 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -1879,6 +1879,9 @@ qla2x00_gpsc(scsi_qla_host_t *vha, sw_info_t *list)
 			case BIT_13:
 				list[i].fp_speed = PORT_SPEED_4GB;
 				break;
+			case BIT_12:
+				list[i].fp_speed = PORT_SPEED_10GB;
+				break;
 			case BIT_11:
 				list[i].fp_speed = PORT_SPEED_8GB;
 				break;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index d145de0..cb4d952 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2248,7 +2248,8 @@ static void
 qla2x00_iidma_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
 #define LS_UNKNOWN      2
-	static char *link_speeds[5] = { "1", "2", "?", "4", "8" };
+	static char *link_speeds[] = { "1", "2", "?", "4", "8", "10" };
+	char *link_speed;
 	int rval;
 	uint16_t mb[6];
 	struct qla_hw_data *ha = vha->hw;
@@ -2271,10 +2272,15 @@ qla2x00_iidma_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 		    fcport->port_name[6], fcport->port_name[7], rval,
 		    fcport->fp_speed, mb[0], mb[1]));
 	} else {
+		link_speed = link_speeds[LS_UNKNOWN];
+		if (fcport->fp_speed < 5)
+			link_speed = link_speeds[fcport->fp_speed];
+		else if (fcport->fp_speed == 0x13)
+			link_speed = link_speeds[5];
 		DEBUG2(qla_printk(KERN_INFO, ha,
 		    "iIDMA adjusted to %s GB/s on "
 		    "%02x%02x%02x%02x%02x%02x%02x%02x.\n",
-		    link_speeds[fcport->fp_speed], fcport->port_name[0],
+		    link_speed, fcport->port_name[0],
 		    fcport->port_name[1], fcport->port_name[2],
 		    fcport->port_name[3], fcport->port_name[4],
 		    fcport->port_name[5], fcport->port_name[6],
-- 
cgit v0.10.2


From 81eb9b4985b9cf965f9c05f4679a77fae2a85fe5 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:23 -0700
Subject: [SCSI] qla2xxx: Add notification message when an NPIV fails to
 acquire a port-id.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index b32eb69..df919c0 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2753,8 +2753,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 		if (vp_idx == 0)
 			return;
 
-		if (MSB(stat) == 1)
+		if (MSB(stat) == 1) {
+			DEBUG2(printk("scsi(%ld): Could not acquire ID for "
+			    "VP[%d].\n", vha->host_no, vp_idx));
 			return;
+		}
 
 		list_for_each_entry_safe(vp, tvp, &ha->vp_list, list)
 			if (vp_idx == vp->vp_idx)
-- 
cgit v0.10.2


From eeebcc922326a2ea0302937b425a0d1471cbd6a7 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:24 -0700
Subject: [SCSI] qla2xxx: Fallback enode-mac should not be a multicast address.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index cb4d952..4649b2a 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4406,7 +4406,7 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 		nv->max_luns_per_target = __constant_cpu_to_le16(128);
 		nv->port_down_retry_count = __constant_cpu_to_le16(30);
 		nv->link_down_timeout = __constant_cpu_to_le16(30);
-		nv->enode_mac[0] = 0x01;
+		nv->enode_mac[0] = 0x00;
 		nv->enode_mac[1] = 0x02;
 		nv->enode_mac[2] = 0x03;
 		nv->enode_mac[3] = 0x04;
-- 
cgit v0.10.2


From e8233ca40bfe7b9dade6cefc984e305516c4eceb Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:25 -0700
Subject: [SCSI] qla2xxx: Avoid redundant RISC reset during
 (re)-initialization.

ISP24xx and above ISPs perform a RISC reset in
qla24xx_reset_chip(), which is called prior to
qla24xx_chip_diag().

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 4649b2a..46bd085 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -730,9 +730,6 @@ qla24xx_chip_diag(scsi_qla_host_t *vha)
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = ha->req_q_map[0];
 
-	/* Perform RISC reset. */
-	qla24xx_reset_risc(vha);
-
 	ha->fw_transfer_size = REQUEST_ENTRY_SIZE * req->length;
 
 	rval = qla2x00_mbx_reg_test(vha);
-- 
cgit v0.10.2


From aed10881129c52f0e5dc1c96ac706b5ce7708a13 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:26 -0700
Subject: [SCSI] qla2xxx: Query supported RISC registers bits in determining a
 paused-state.

ISP24xx and above must query the host-status register, not HCCR.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 68671a2..4a990f4 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -149,11 +149,9 @@ qla24xx_pause_risc(struct device_reg_24xx __iomem *reg)
 	int rval = QLA_SUCCESS;
 	uint32_t cnt;
 
-	if (RD_REG_DWORD(&reg->hccr) & HCCRX_RISC_PAUSE)
-		return rval;
-
 	WRT_REG_DWORD(&reg->hccr, HCCRX_SET_RISC_PAUSE);
-	for (cnt = 30000; (RD_REG_DWORD(&reg->hccr) & HCCRX_RISC_PAUSE) == 0 &&
+	for (cnt = 30000;
+	    ((RD_REG_DWORD(&reg->host_status) & HSRX_RISC_PAUSED) == 0) &&
 	    rval == QLA_SUCCESS; cnt--) {
 		if (cnt)
 			udelay(100);
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 9e56d4a..dfde2dd 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -878,7 +878,6 @@ struct device_reg_24xx {
 					/* HCCR statuses. */
 #define HCCRX_HOST_INT		BIT_6	/* Host to RISC interrupt bit. */
 #define HCCRX_RISC_RESET	BIT_5	/* RISC Reset mode bit. */
-#define HCCRX_RISC_PAUSE	BIT_4	/* RISC Pause mode bit. */
 					/* HCCR commands. */
 					/* NOOP. */
 #define HCCRX_NOOP		0x00000000
-- 
cgit v0.10.2


From 6805c1504eb4cfd4a31c05ed88fdeb56228eb3ba Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:27 -0700
Subject: [SCSI] qla2xxx: Avoid explicit LOGO during driver host tear-down.

As firmware will ultimately terminate (stop) and port
states-cleared.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 74e6970..cbdafb0 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1465,7 +1465,8 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
 	 * At this point all fcport's software-states are cleared.  Perform any
 	 * final cleanup of firmware resources (PCBs and XCBs).
 	 */
-	if (fcport->loop_id != FC_NO_LOOP_ID)
+	if (fcport->loop_id != FC_NO_LOOP_ID &&
+	    !test_bit(UNLOADING, &fcport->vha->dpc_flags))
 		fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
 			fcport->loop_id, fcport->d_id.b.domain,
 			fcport->d_id.b.area, fcport->d_id.b.al_pa);
-- 
cgit v0.10.2


From f999f4c1961fe5399fd66c95860cc2d5d67e591e Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:28 -0700
Subject: [SCSI] qla2xxx: Reduce lock-contention during do-work processing.

Queued work processing will now be serialized with its own
lower-priority spinlock.  This also simplifies the work-queue
interface for future work-queue consumers.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 88ddae0..00aa48d 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2561,6 +2561,8 @@ typedef struct scsi_qla_host {
 	struct list_head list;
 	struct list_head vp_fcports;	/* list of fcports */
 	struct list_head work_list;
+	spinlock_t work_lock;
+
 	/* Commonly used flags and state information. */
 	struct Scsi_Host *host;
 	unsigned long	host_no;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 128b3d5..dcf0116 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2533,6 +2533,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	INIT_LIST_HEAD(&vha->work_list);
 	INIT_LIST_HEAD(&vha->list);
 
+	spin_lock_init(&vha->work_lock);
+
 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
 	return vha;
 
@@ -2541,13 +2543,11 @@ fail:
 }
 
 static struct qla_work_evt *
-qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type,
-    int locked)
+qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 {
 	struct qla_work_evt *e;
 
-	e = kzalloc(sizeof(struct qla_work_evt), locked ? GFP_ATOMIC:
-	    GFP_KERNEL);
+	e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC);
 	if (!e)
 		return NULL;
 
@@ -2558,17 +2558,15 @@ qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type,
 }
 
 static int
-qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e, int locked)
+qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 {
-	unsigned long uninitialized_var(flags);
-	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
 
-	if (!locked)
-		spin_lock_irqsave(&ha->hardware_lock, flags);
+	spin_lock_irqsave(&vha->work_lock, flags);
 	list_add_tail(&e->list, &vha->work_list);
+	spin_unlock_irqrestore(&vha->work_lock, flags);
 	qla2xxx_wake_dpc(vha);
-	if (!locked)
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	return QLA_SUCCESS;
 }
 
@@ -2578,13 +2576,13 @@ qla2x00_post_aen_work(struct scsi_qla_host *vha, enum fc_host_event_code code,
 {
 	struct qla_work_evt *e;
 
-	e = qla2x00_alloc_work(vha, QLA_EVT_AEN, 1);
+	e = qla2x00_alloc_work(vha, QLA_EVT_AEN);
 	if (!e)
 		return QLA_FUNCTION_FAILED;
 
 	e->u.aen.code = code;
 	e->u.aen.data = data;
-	return qla2x00_post_work(vha, e, 1);
+	return qla2x00_post_work(vha, e);
 }
 
 int
@@ -2592,25 +2590,27 @@ qla2x00_post_idc_ack_work(struct scsi_qla_host *vha, uint16_t *mb)
 {
 	struct qla_work_evt *e;
 
-	e = qla2x00_alloc_work(vha, QLA_EVT_IDC_ACK, 1);
+	e = qla2x00_alloc_work(vha, QLA_EVT_IDC_ACK);
 	if (!e)
 		return QLA_FUNCTION_FAILED;
 
 	memcpy(e->u.idc_ack.mb, mb, QLA_IDC_ACK_REGS * sizeof(uint16_t));
-	return qla2x00_post_work(vha, e, 1);
+	return qla2x00_post_work(vha, e);
 }
 
 static void
 qla2x00_do_work(struct scsi_qla_host *vha)
 {
-	struct qla_work_evt *e;
-	struct qla_hw_data *ha = vha->hw;
+	struct qla_work_evt *e, *tmp;
+	unsigned long flags;
+	LIST_HEAD(work);
 
-	spin_lock_irq(&ha->hardware_lock);
-	while (!list_empty(&vha->work_list)) {
-		e = list_entry(vha->work_list.next, struct qla_work_evt, list);
+	spin_lock_irqsave(&vha->work_lock, flags);
+	list_splice_init(&vha->work_list, &work);
+	spin_unlock_irqrestore(&vha->work_lock, flags);
+
+	list_for_each_entry_safe(e, tmp, &work, list) {
 		list_del_init(&e->list);
-		spin_unlock_irq(&ha->hardware_lock);
 
 		switch (e->type) {
 		case QLA_EVT_AEN:
@@ -2623,10 +2623,9 @@ qla2x00_do_work(struct scsi_qla_host *vha)
 		}
 		if (e->flags & QLA_EVT_FLAG_FREE)
 			kfree(e);
-		spin_lock_irq(&ha->hardware_lock);
 	}
-	spin_unlock_irq(&ha->hardware_lock);
 }
+
 /* Relogins all the fcports of a vport
  * Context: dpc thread
  */
-- 
cgit v0.10.2


From 656e89122a737b60cebc7b8fcb669faf0e7bc905 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:29 -0700
Subject: [SCSI] qla2xxx: Export additional firmware-states for application
 support.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index cbdafb0..0f87962 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1233,6 +1233,22 @@ qla2x00_fabric_param_show(struct device *dev, struct device_attribute *attr,
 	return snprintf(buf, PAGE_SIZE, "%d\n", vha->hw->switch_cap);
 }
 
+static ssize_t
+qla2x00_fw_state_show(struct device *dev, struct device_attribute *attr,
+    char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+	int rval;
+	uint16_t state[5];
+
+	rval = qla2x00_get_firmware_state(vha, state);
+	if (rval != QLA_SUCCESS)
+		memset(state, -1, sizeof(state));
+
+	return snprintf(buf, PAGE_SIZE, "0x%x 0x%x 0x%x 0x%x 0x%x\n", state[0],
+	    state[1], state[2], state[3], state[4]);
+}
+
 static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_drvr_version_show, NULL);
 static DEVICE_ATTR(fw_version, S_IRUGO, qla2x00_fw_version_show, NULL);
 static DEVICE_ATTR(serial_num, S_IRUGO, qla2x00_serial_num_show, NULL);
@@ -1265,6 +1281,7 @@ static DEVICE_ATTR(vlan_id, S_IRUGO, qla2x00_vlan_id_show, NULL);
 static DEVICE_ATTR(vn_port_mac_address, S_IRUGO,
 		   qla2x00_vn_port_mac_address_show, NULL);
 static DEVICE_ATTR(fabric_param, S_IRUGO, qla2x00_fabric_param_show, NULL);
+static DEVICE_ATTR(fw_state, S_IRUGO, qla2x00_fw_state_show, NULL);
 
 struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_driver_version,
@@ -1290,6 +1307,7 @@ struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_vlan_id,
 	&dev_attr_vn_port_mac_address,
 	&dev_attr_fabric_param,
+	&dev_attr_fw_state,
 	NULL,
 };
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 46bd085..36cea22 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1304,7 +1304,7 @@ qla2x00_fw_ready(scsi_qla_host_t *vha)
 	unsigned long	wtime, mtime, cs84xx_time;
 	uint16_t	min_wait;	/* Minimum wait time if loop is down */
 	uint16_t	wait_time;	/* Wait time if loop is coming ready */
-	uint16_t	state[3];
+	uint16_t	state[5];
 	struct qla_hw_data *ha = vha->hw;
 
 	rval = QLA_SUCCESS;
@@ -1403,8 +1403,9 @@ qla2x00_fw_ready(scsi_qla_host_t *vha)
 		    vha->host_no, state[0], jiffies));
 	} while (1);
 
-	DEBUG(printk("scsi(%ld): fw_state=%x curr time=%lx.\n",
-	    vha->host_no, state[0], jiffies));
+	DEBUG(printk("scsi(%ld): fw_state=%x (%x, %x, %x, %x) curr time=%lx.\n",
+	    vha->host_no, state[0], state[1], state[2], state[3], state[4],
+	    jiffies));
 
 	if (rval) {
 		DEBUG2_3(printk("scsi(%ld): Firmware ready **** FAILED ****.\n",
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index df919c0..f215578 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1267,7 +1267,7 @@ qla2x00_get_firmware_state(scsi_qla_host_t *vha, uint16_t *states)
 
 	mcp->mb[0] = MBC_GET_FIRMWARE_STATE;
 	mcp->out_mb = MBX_0;
-	mcp->in_mb = MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_5|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0;
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
 	rval = qla2x00_mailbox_command(vha, mcp);
@@ -1276,6 +1276,8 @@ qla2x00_get_firmware_state(scsi_qla_host_t *vha, uint16_t *states)
 	states[0] = mcp->mb[1];
 	states[1] = mcp->mb[2];
 	states[2] = mcp->mb[3];
+	states[3] = mcp->mb[4];
+	states[4] = mcp->mb[5];
 
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
-- 
cgit v0.10.2


From 18e7555a38eaadb757353c4b76667e07166ad26c Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:30 -0700
Subject: [SCSI] qla2xxx: Synchronize MPI settings after a PE Reset.

Ensure MPS remains in synchronization across all NIC/FCoE
functions after a reset.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 1ef18cc..65b12d8 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -299,6 +299,12 @@ qla2x00_get_xgmac_stats(scsi_qla_host_t *, dma_addr_t, uint16_t, uint16_t *);
 extern int
 qla2x00_get_dcbx_params(scsi_qla_host_t *, dma_addr_t, uint16_t);
 
+extern int
+qla2x00_read_ram_word(scsi_qla_host_t *, uint32_t, uint32_t *);
+
+extern int
+qla2x00_write_ram_word(scsi_qla_host_t *, uint32_t, uint32_t);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 36cea22..2620261 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -886,6 +886,56 @@ cont_alloc:
 	    htonl(offsetof(struct qla2xxx_fw_dump, isp));
 }
 
+static int
+qla81xx_mpi_sync(scsi_qla_host_t *vha)
+{
+#define MPS_MASK	0xe0
+	int rval;
+	uint16_t dc;
+	uint32_t dw;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!IS_QLA81XX(vha->hw))
+		return QLA_SUCCESS;
+
+	rval = qla2x00_write_ram_word(vha, 0x7c00, 1);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to acquire semaphore.\n"));
+		goto done;
+	}
+
+	pci_read_config_word(vha->hw->pdev, 0x54, &dc);
+	rval = qla2x00_read_ram_word(vha, 0x7a15, &dw);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to read sync.\n"));
+		goto done_release;
+	}
+
+	dc &= MPS_MASK;
+	if (dc == (dw & MPS_MASK))
+		goto done_release;
+
+	dw &= ~MPS_MASK;
+	dw |= dc;
+	rval = qla2x00_write_ram_word(vha, 0x7a15, dw);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to gain sync.\n"));
+	}
+
+done_release:
+	rval = qla2x00_write_ram_word(vha, 0x7c00, 0);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2(qla_printk(KERN_WARNING, ha,
+		    "Sync-MPI: Unable to release semaphore.\n"));
+	}
+
+done:
+	return rval;
+}
+
 /**
  * qla2x00_setup_chip() - Load and start RISC firmware.
  * @ha: HA context
@@ -910,6 +960,8 @@ qla2x00_setup_chip(scsi_qla_host_t *vha)
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	}
 
+	qla81xx_mpi_sync(vha);
+
 	/* Load firmware sequences */
 	rval = ha->isp_ops->load_risc(vha, &srisc_address);
 	if (rval == QLA_SUCCESS) {
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index f215578..451ece0 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3545,3 +3545,66 @@ qla2x00_get_dcbx_params(scsi_qla_host_t *vha, dma_addr_t tlv_dma,
 
 	return rval;
 }
+
+int
+qla2x00_read_ram_word(scsi_qla_host_t *vha, uint32_t risc_addr, uint32_t *data)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_FWI2_CAPABLE(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_READ_RAM_EXTENDED;
+	mcp->mb[1] = LSW(risc_addr);
+	mcp->mb[8] = MSW(risc_addr);
+	mcp->out_mb = MBX_8|MBX_1|MBX_0;
+	mcp->in_mb = MBX_3|MBX_2|MBX_0;
+	mcp->tov = 30;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=%x.\n", __func__,
+		    vha->host_no, rval, mcp->mb[0]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+		*data = mcp->mb[3] << 16 | mcp->mb[2];
+	}
+
+	return rval;
+}
+
+int
+qla2x00_write_ram_word(scsi_qla_host_t *vha, uint32_t risc_addr, uint32_t data)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_FWI2_CAPABLE(vha->hw))
+                return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
+
+	mcp->mb[0] = MBC_WRITE_RAM_WORD_EXTENDED;
+	mcp->mb[1] = LSW(risc_addr);
+	mcp->mb[2] = LSW(data);
+	mcp->mb[3] = MSW(data);
+	mcp->mb[8] = MSW(risc_addr);
+	mcp->out_mb = MBX_8|MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = 30;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=%x.\n", __func__,
+		    vha->host_no, rval, mcp->mb[0]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, vha->host_no));
+	}
+
+	return rval;
+}
-- 
cgit v0.10.2


From 714df9399b3d2c0a7484e8cfb7c9cb100b0b7f19 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 3 Jun 2009 09:55:31 -0700
Subject: [SCSI] qla2xxx: Update version number to 8.03.01-k3.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index a1094e7..b63feaf 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.03.01-k2"
+#define QLA2XXX_VERSION      "8.03.01-k3"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	3
-- 
cgit v0.10.2


From 9d01e4cd7eb4a70b04cf5a5b4f79c99e8e3e3edc Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Mon, 8 Jun 2009 22:03:03 +0200
Subject: ide-tape: fix proc warning

ide_tape_chrdev_get() was missing an ide_device_get() refcount increment
which lead to the following warning:

[  278.147906] ------------[ cut here ]------------
[  278.152685] WARNING: at fs/proc/generic.c:847 remove_proc_entry+0x199/0x1b8()
[  278.160070] Hardware name: P4I45PE    1.00
[  278.160076] remove_proc_entry: removing non-empty directory 'ide0/hdb', leaking at least 'name'
[  278.160080] Modules linked in: rtc intel_agp pcspkr thermal processor thermal_sys parport_pc parport agpgart button
[  278.160100] Pid: 2312, comm: mt Not tainted 2.6.30-rc2 #3
[  278.160105] Call Trace:
[  278.160117]  [<c012141d>] warn_slowpath+0x71/0xa0
[  278.160126]  [<c035f219>] ? _spin_unlock_irqrestore+0x29/0x2c
[  278.160132]  [<c011c686>] ? try_to_wake_up+0x1b6/0x1c0
[  278.160141]  [<c011c69b>] ? default_wake_function+0xb/0xd
[  278.160149]  [<c0177ead>] ? pollwake+0x4a/0x55
[  278.160156]  [<c035f240>] ? _spin_unlock+0x24/0x26
[  278.160163]  [<c0165d38>] ? add_partial+0x44/0x49
[  278.160169]  [<c01669e8>] ? __slab_free+0xba/0x29c
[  278.160177]  [<c01a13d8>] ? sysfs_delete_inode+0x0/0x3c
[  278.160184]  [<c019ca92>] remove_proc_entry+0x199/0x1b8
[  278.160191]  [<c01a297e>] ? remove_dir+0x27/0x2e
[  278.160199]  [<c025f3ab>] ide_proc_unregister_device+0x40/0x4c
[  278.160207]  [<c02599cd>] drive_release_dev+0x14/0x47
[  278.160214]  [<c0250538>] device_release+0x35/0x5a
[  278.160221]  [<c01f8bed>] kobject_release+0x40/0x50
[  278.160226]  [<c01f8bad>] ? kobject_release+0x0/0x50
[  278.160232]  [<c01f96ac>] kref_put+0x3c/0x4a
[  278.160238]  [<c01f8b29>] kobject_put+0x37/0x3c
[  278.160243]  [<c025020c>] put_device+0xf/0x11
[  278.160249]  [<c025789f>] ide_device_put+0x2d/0x30
[  278.160255]  [<c02658da>] ide_tape_put+0x24/0x32
[  278.160261]  [<c0266e0c>] idetape_chrdev_release+0x17f/0x18e
[  278.160269]  [<c016c4f5>] __fput+0xca/0x175
[  278.160275]  [<c016c5b9>] fput+0x19/0x1b
[  278.160280]  [<c0169d19>] filp_close+0x51/0x5b
[  278.160286]  [<c0169d96>] sys_close+0x73/0xad
[  278.160293]  [<c0102a61>] syscall_call+0x7/0xb
[  278.160298] ---[ end trace f16d907ea1f89336 ]---

Instead of trivially fixing it by adding the missing call,
ide_tape_chrdev_get() and ide_tape_get() were merged into one function
since both were almost identical. The only difference was that
ide_tape_chrdev_get() was accessing the ide-tape reference through the
idetape_devs[] array of minors instead of through the gendisk.

Accomodate that by adding two additional parameters to ide_tape_get() to
annotate the call site and invoke the proper behavior.

As a result, remove ide_tape_chrdev_get().

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 055f52e..51ea59e 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -240,18 +240,27 @@ static struct class *idetape_sysfs_class;
 
 static void ide_tape_release(struct device *);
 
-static struct ide_tape_obj *ide_tape_get(struct gendisk *disk)
+static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
+
+static struct ide_tape_obj *ide_tape_get(struct gendisk *disk, bool cdev,
+					 unsigned int i)
 {
 	struct ide_tape_obj *tape = NULL;
 
 	mutex_lock(&idetape_ref_mutex);
-	tape = ide_drv_g(disk, ide_tape_obj);
+
+	if (cdev)
+		tape = idetape_devs[i];
+	else
+		tape = ide_drv_g(disk, ide_tape_obj);
+
 	if (tape) {
 		if (ide_device_get(tape->drive))
 			tape = NULL;
 		else
 			get_device(&tape->dev);
 	}
+
 	mutex_unlock(&idetape_ref_mutex);
 	return tape;
 }
@@ -267,24 +276,6 @@ static void ide_tape_put(struct ide_tape_obj *tape)
 }
 
 /*
- * The variables below are used for the character device interface. Additional
- * state variables are defined in our ide_drive_t structure.
- */
-static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
-
-static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
-{
-	struct ide_tape_obj *tape = NULL;
-
-	mutex_lock(&idetape_ref_mutex);
-	tape = idetape_devs[i];
-	if (tape)
-		get_device(&tape->dev);
-	mutex_unlock(&idetape_ref_mutex);
-	return tape;
-}
-
-/*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
  */
@@ -1495,7 +1486,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
 		return -ENXIO;
 
 	lock_kernel();
-	tape = ide_tape_chrdev_get(i);
+	tape = ide_tape_get(NULL, true, i);
 	if (!tape) {
 		unlock_kernel();
 		return -ENXIO;
@@ -1916,7 +1907,7 @@ static const struct file_operations idetape_fops = {
 
 static int idetape_open(struct block_device *bdev, fmode_t mode)
 {
-	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk);
+	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk, false, 0);
 
 	if (!tape)
 		return -ENXIO;
-- 
cgit v0.10.2


From 75c2d7d71a85d02594da07d5d2ad587451b64b02 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Mon, 8 Jun 2009 22:03:03 +0200
Subject: sl82c105: add printk() logging facility

Add missing printk() logging facility in sl82c105_dma_lost_irq().

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index b0a4606..0924abf 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -10,7 +10,7 @@
  * with the timing registers setup.
  *  -- Benjamin Herrenschmidt (01/11/03) benh@kernel.crashing.org
  *
- * Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
+ * Copyright (C) 2006-2007,2009 MontaVista Software, Inc. <source@mvista.com>
  * Copyright (C)      2007 Bartlomiej Zolnierkiewicz
  */
 
@@ -146,14 +146,15 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
 	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
 	u8 dma_cmd;
 
-	printk("sl82c105: lost IRQ, resetting host\n");
+	printk(KERN_WARNING "sl82c105: lost IRQ, resetting host\n");
 
 	/*
 	 * Check the raw interrupt from the drive.
 	 */
 	pci_read_config_dword(dev, 0x40, &val);
 	if (val & mask)
-		printk("sl82c105: drive was requesting IRQ, but host lost it\n");
+		printk(KERN_INFO "sl82c105: drive was requesting IRQ, "
+		       "but host lost it\n");
 
 	/*
 	 * Was DMA enabled?  If so, disable it - we're resetting the
@@ -162,7 +163,7 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
 	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
 	if (dma_cmd & 1) {
 		outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
-		printk("sl82c105: DMA was enabled\n");
+		printk(KERN_INFO "sl82c105: DMA was enabled\n");
 	}
 
 	sl82c105_reset_host(dev);
-- 
cgit v0.10.2


From a20b2a44eca52818ef52a94959480b7e6ea2f528 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 8 Jun 2009 22:07:28 +0200
Subject: ide: skip probe if there are no devices on the port (v2)

In ide_probe_port() skip probe if ide_port_wait_ready() returns -ENODEV
and print error message instead of debug one if it returns -EBUSY.

v2:
Fix the default 'rc' value.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 28f95cb4..f9c2fb7 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -702,8 +702,14 @@ static int ide_probe_port(ide_hwif_t *hwif)
 	if (irqd)
 		disable_irq(hwif->irq);
 
-	if (ide_port_wait_ready(hwif) == -EBUSY)
-		printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
+	rc = ide_port_wait_ready(hwif);
+	if (rc == -ENODEV) {
+		printk(KERN_INFO "%s: no devices on the port\n", hwif->name);
+		goto out;
+	} else if (rc == -EBUSY)
+		printk(KERN_ERR "%s: not ready before the probe\n", hwif->name);
+	else
+		rc = -ENODEV;
 
 	/*
 	 * Second drive should only exist if first drive was found,
@@ -714,7 +720,7 @@ static int ide_probe_port(ide_hwif_t *hwif)
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			rc = 0;
 	}
-
+out:
 	/*
 	 * Use cached IRQ number. It might be (and is...) changed by probe
 	 * code above
-- 
cgit v0.10.2


From f86748e91a14bd6cc49477560f33ed5d59896e89 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Jun 2009 22:33:10 +0200
Subject: perf_counter, x86: Implement generalized cache event types, add AMD
 support

Fill in amd_hw_cache_event_id[] with the AMD CPU specific events,
for family 0x0f, 0x10 and 0x11.

There's apparently no distinction between load and store events, so
we only fill in the load events.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 0339d19..93af821 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -389,6 +389,97 @@ static u64 intel_pmu_raw_event(u64 event)
 	return event & CORE_EVNTSEL_MASK;
 }
 
+static const u64 amd_0f_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L2  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -1345,6 +1436,17 @@ static int intel_pmu_init(void)
 static int amd_pmu_init(void)
 {
 	x86_pmu = amd_pmu;
+
+	switch (boot_cpu_data.x86) {
+	case 0x0f:
+	case 0x10:
+	case 0x11:
+		memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("AMD Family 0f/10/11 events, ");
+		break;
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From 820a644211bc1ac7715333abdb0f0b9ea4fbb549 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Jun 2009 19:10:25 +0200
Subject: perf_counter, x86: Clean up hw_cache_event ids copies

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 93af821..56001fe 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1409,23 +1409,20 @@ static int intel_pmu_init(void)
 	switch (boot_cpu_data.x86_model) {
 	case 17:
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
-			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+		       sizeof(hw_cache_event_ids));
 
 		pr_cont("Core2 events, ");
 		break;
 	default:
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
-			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+		       sizeof(hw_cache_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
-			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+		       sizeof(hw_cache_event_ids));
 
 		pr_cont("Atom events, ");
 		break;
-- 
cgit v0.10.2


From dab5855b12411334355ba21349a06700e4ae7a3b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 8 Jun 2009 21:11:57 +0300
Subject: perf_counter: Add mmap event hooks to mprotect()

Some JIT compilers allocate memory for generated code with
posix_memalign() + mprotect() so we need to hook into mprotect()
to make sure 'perf' is aware that we're executing code in
anonymous memory.

[ penberg@cs.helsinki.fi: move the hook to sys_mprotect() ]
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <Pine.LNX.4.64.0906082111030.12407@melkki.cs.Helsinki.FI>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 258197b..d80311b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -23,6 +23,7 @@
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
+#include <linux/perf_counter.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -299,6 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
 		if (error)
 			goto out;
+		perf_counter_mmap(vma);
 		nstart = tmp;
 
 		if (nstart < prev->vm_end)
-- 
cgit v0.10.2


From 80d496be89ed7dede5abee5c057634e80a31c82d Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Mon, 8 Jun 2009 21:12:48 +0300
Subject: perf report: Add support for profiling JIT generated code

This patch adds support for profiling JIT generated code to 'perf
report'. A JIT compiler is required to generate a "/tmp/perf-$PID.map"
symbols map that is parsed when looking and displaying symbols.

Thanks to Peter Zijlstra for his help with this patch!

Example "perf report" output with the Jato JIT:

 #
 # (40311 samples)
 #
 # Overhead           Command  Shared Object              Symbol
 # ........  ................  .........................  ......
 #
     97.80%              jato  /tmp/perf-11915.map        [.] Fibonacci.fib(I)I
      0.56%              jato  00000000b7fa023b           0x000000b7fa023b
      0.45%              jato  /tmp/perf-11915.map        [.] Fibonacci.main([Ljava/lang/String;)V
      0.38%              jato  [kernel]                   [k] get_page_from_freelist
      0.06%              jato  [kernel]                   [k] kunmap_atomic
      0.05%              jato  ./jato                     [.] utf8Hash
      0.04%              jato  ./jato                     [.] executeJava
      0.04%              jato  ./jato                     [.] defineClass

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: a.p.zijlstra@chello.nl
Cc: acme@redhat.com
LKML-Reference: <Pine.LNX.4.64.0906082111590.12407@melkki.cs.Helsinki.FI>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f053a74..61d8718 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -209,6 +209,11 @@ static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
 	return ip;
 }
 
+static inline int is_anon_memory(const char *filename)
+{
+     return strcmp(filename, "//anon") == 0;
+}
+
 static struct map *map__new(struct mmap_event *event)
 {
 	struct map *self = malloc(sizeof(*self));
@@ -216,6 +221,7 @@ static struct map *map__new(struct mmap_event *event)
 	if (self != NULL) {
 		const char *filename = event->filename;
 		char newfilename[PATH_MAX];
+		int anon;
 
 		if (cwd) {
 			int n = strcommon(filename);
@@ -227,6 +233,13 @@ static struct map *map__new(struct mmap_event *event)
 			}
 		}
 
+		anon = is_anon_memory(filename);
+
+		if (anon) {
+			snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid);
+			filename = newfilename;
+		}
+
 		self->start = event->start;
 		self->end   = event->start + event->len;
 		self->pgoff = event->pgoff;
@@ -235,7 +248,7 @@ static struct map *map__new(struct mmap_event *event)
 		if (self->dso == NULL)
 			goto out_delete;
 
-		if (self->dso == vdso)
+		if (self->dso == vdso || anon)
 			self->map_ip = vdso__map_ip;
 		else
 			self->map_ip = map__map_ip;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 158588c..32dd47d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -220,6 +220,68 @@ out_failure:
 	return -1;
 }
 
+static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verbose)
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int nr_syms = 0;
+
+	file = fopen(self->name, "r");
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		__u64 start, size;
+		struct symbol *sym;
+		int line_len, len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2u64(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		len += hex2u64(line + len, &size);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		sym = symbol__new(start, size, line + len,
+				  self->sym_priv_size, start, verbose);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		if (filter && filter(self, sym))
+			symbol__delete(sym, self->sym_priv_size);
+		else {
+			dso__insert_symbol(self, sym);
+			nr_syms++;
+		}
+	}
+
+	free(line);
+	fclose(file);
+
+	return nr_syms;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
 /**
  * elf_symtab__for_each_symbol - iterate thru all the symbols
  *
@@ -507,6 +569,9 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
 	if (!name)
 		return -1;
 
+	if (strncmp(self->name, "/tmp/perf-", 10) == 0)
+		return dso__load_perf_map(self, filter, verbose);
+
 more:
 	do {
 		switch (variant) {
-- 
cgit v0.10.2


From aefcf37b82886260d8540c9fb815e613c8977e06 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 8 Jun 2009 23:15:28 +0200
Subject: perf_counter tools: Standardize color printing

The rule is:

 - high overhead: red
 -  mid overhead: green
 -  low overhead: normal (white/black)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 0e23fe9..3334a8b 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -1085,8 +1085,9 @@ parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len)
 			percent = 100.0 * hits / sym->hist_sum;
 
 		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
+		 * We color high-overhead entries in red, mid-overhead
+		 * entries in green - and keep the low overhead places
+		 * normal:
 		 */
 		if (percent >= 5.0)
 			color = PERF_COLOR_RED;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 61d8718..0b18cb9 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -693,13 +693,16 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 		char *color = PERF_COLOR_NORMAL;
 
 		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
+		 * We color high-overhead entries in red, mid-overhead
+		 * entries in green - and keep the low overhead places
+		 * normal:
 		 */
-		if (percent >= 5.0)
+		if (percent >= 5.0) {
 			color = PERF_COLOR_RED;
-		if (percent < 0.5)
-			color = PERF_COLOR_GREEN;
+		} else {
+			if (percent >= 0.5)
+				color = PERF_COLOR_GREEN;
+		}
 
 		ret = color_fprintf(fp, color, "   %6.2f%%",
 				(self->count * 100.0) / total_samples);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index be1698f..8ba2480 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -248,13 +248,16 @@ static void print_sym_table(void)
 					 sum_ksamples));
 
 		/*
-		 * We color high-overhead entries in red, low-overhead
-		 * entries in green - and keep the middle ground normal:
+		 * We color high-overhead entries in red, mid-overhead
+		 * entries in green - and keep the low overhead places
+		 * normal:
 		 */
-		if (pcnt >= 5.0)
+		if (pcnt >= 5.0) {
 			color = PERF_COLOR_RED;
-		if (pcnt < 0.5)
-			color = PERF_COLOR_GREEN;
+		} else {
+			if (pcnt >= 0.5)
+				color = PERF_COLOR_GREEN;
+		}
 
 		if (nr_counters == 1)
 			printf("%20.2f - ", syme->weight);
-- 
cgit v0.10.2


From 1f8a6a10fb9437eac3f516ea4324a19087872f30 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 8 Jun 2009 18:18:39 +0200
Subject: ring-buffer: pass in lockdep class key for reader_lock

On Sun, 7 Jun 2009, Ingo Molnar wrote:
> Testing tracer sched_switch: <6>Starting ring buffer hammer
> PASSED
> Testing tracer sysprof: PASSED
> Testing tracer function: PASSED
> Testing tracer irqsoff:
> =============================================
> PASSED
> Testing tracer preemptoff: PASSED
> Testing tracer preemptirqsoff: [ INFO: possible recursive locking detected ]
> PASSED
> Testing tracer branch: 2.6.30-rc8-tip-01972-ge5b9078-dirty #5760
> ---------------------------------------------
> rb_consumer/431 is trying to acquire lock:
>  (&cpu_buffer->reader_lock){......}, at: [<c109eef7>] ring_buffer_reset_cpu+0x37/0x70
>
> but task is already holding lock:
>  (&cpu_buffer->reader_lock){......}, at: [<c10a019e>] ring_buffer_consume+0x7e/0xc0
>
> other info that might help us debug this:
> 1 lock held by rb_consumer/431:
>  #0:  (&cpu_buffer->reader_lock){......}, at: [<c10a019e>] ring_buffer_consume+0x7e/0xc0

The ring buffer is a generic structure, and can be used outside of
ftrace. If ftrace traces within the use of the ring buffer, it can produce
false positives with lockdep.

This patch passes in a static lock key into the allocation of the ring
buffer, so that different ring buffers will have their own lock class.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1244477919.13761.9042.camel@twins>

[ store key in ring buffer descriptor ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index f134582..8670f15 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -105,7 +105,19 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
  * size is in bytes for each per CPU buffer.
  */
 struct ring_buffer *
-ring_buffer_alloc(unsigned long size, unsigned flags);
+__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
+
+/*
+ * Because the ring buffer is generic, if other users of the ring buffer get
+ * traced by ftrace, it can produce lockdep warnings. We need to keep each
+ * ring buffer's lock class separate.
+ */
+#define ring_buffer_alloc(size, flags)			\
+({							\
+	static struct lock_class_key __key;		\
+	__ring_buffer_alloc((size), (flags), &__key);	\
+})
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7102d7a..22878b0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -426,6 +426,8 @@ struct ring_buffer {
 	atomic_t			record_disabled;
 	cpumask_var_t			cpumask;
 
+	struct lock_class_key		*reader_lock_key;
+
 	struct mutex			mutex;
 
 	struct ring_buffer_per_cpu	**buffers;
@@ -565,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	cpu_buffer->cpu = cpu;
 	cpu_buffer->buffer = buffer;
 	spin_lock_init(&cpu_buffer->reader_lock);
+	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
 	cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
@@ -635,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self,
  * when the buffer wraps. If this flag is not set, the buffer will
  * drop data when the tail hits the head.
  */
-struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+					struct lock_class_key *key)
 {
 	struct ring_buffer *buffer;
 	int bsize;
@@ -658,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 	buffer->clock = trace_clock_local;
+	buffer->reader_lock_key = key;
 
 	/* need at least two pages */
 	if (buffer->pages == 1)
@@ -715,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	kfree(buffer);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
-- 
cgit v0.10.2


From fbc56f0801f58041a4372a030933bac076b46aad Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Mon, 8 Jun 2009 16:19:01 -0500
Subject: [SCSI] ibmvscsi: Add 16 byte CDB support

Adds support for 16 byte CDBs to the ibmvscsi driver.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 8d3925f..9804b42 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1684,6 +1684,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	host->max_lun = 8;
 	host->max_id = max_id;
 	host->max_channel = max_channel;
+	host->max_cmd_len = 16;
 
 	if (scsi_add_host(hostdata->host, hostdata->dev))
 		goto add_host_failed;
-- 
cgit v0.10.2


From e1a5ce5b88d06344caec0c71b4ee33e7296358dd Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Mon, 8 Jun 2009 16:19:03 -0500
Subject: [SCSI] ibmvscsi: Add specific timeouts for operations

Previously we had one timeout that was used for all types of operations.
This adds specific timeout values for different operations (init, login,
adapter info MAD, abort task, and LUN reset).

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 9804b42..6038c04 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -87,7 +87,11 @@
  */
 static int max_id = 64;
 static int max_channel = 3;
-static int init_timeout = 5;
+static int init_timeout = 300;
+static int login_timeout = 60;
+static int info_timeout = 30;
+static int abort_timeout = 60;
+static int reset_timeout = 60;
 static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT;
 static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2;
 
@@ -849,7 +853,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 	init_event_struct(evt_struct,
 			  adapter_info_rsp,
 			  VIOSRP_MAD_FORMAT,
-			  init_timeout);
+			  info_timeout);
 	
 	req = &evt_struct->iu.mad.adapter_info;
 	memset(req, 0x00, sizeof(*req));
@@ -871,7 +875,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 	}
 	
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	if (ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2)) {
+	if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2)) {
 		dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n");
 		dma_unmap_single(hostdata->dev,
 				 addr,
@@ -944,7 +948,7 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	init_event_struct(evt_struct,
 			  login_rsp,
 			  VIOSRP_SRP_FORMAT,
-			  init_timeout);
+			  login_timeout);
 
 	login = &evt_struct->iu.srp.login_req;
 	memset(login, 0x00, sizeof(struct srp_login_req));
@@ -959,7 +963,7 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 	 */
 	atomic_set(&hostdata->request_limit, 0);
 
-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	dev_info(hostdata->dev, "sent SRP login\n");
 	return rc;
@@ -1026,7 +1030,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd)
 		init_event_struct(evt,
 				  sync_completion,
 				  VIOSRP_SRP_FORMAT,
-				  init_timeout);
+				  abort_timeout);
 
 		tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 	
@@ -1040,7 +1044,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd)
 		evt->sync_srp = &srp_rsp;
 
 		init_completion(&evt->comp);
-		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, abort_timeout * 2);
 
 		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
 			break;
@@ -1149,7 +1153,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd)
 		init_event_struct(evt,
 				  sync_completion,
 				  VIOSRP_SRP_FORMAT,
-				  init_timeout);
+				  reset_timeout);
 
 		tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 
@@ -1162,7 +1166,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd)
 		evt->sync_srp = &srp_rsp;
 
 		init_completion(&evt->comp);
-		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, reset_timeout * 2);
 
 		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
 			break;
@@ -1394,7 +1398,7 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
 	init_event_struct(evt_struct,
 			  sync_completion,
 			  VIOSRP_MAD_FORMAT,
-			  init_timeout);
+			  info_timeout);
 
 	host_config = &evt_struct->iu.mad.host_config;
 
@@ -1416,7 +1420,7 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
 
 	init_completion(&evt_struct->comp);
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 	if (rc == 0)
 		wait_for_completion(&evt_struct->comp);
@@ -1441,7 +1445,7 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev)
 	spin_lock_irqsave(shost->host_lock, lock_flags);
 	if (sdev->type == TYPE_DISK) {
 		sdev->allow_restart = 1;
-		blk_queue_rq_timeout(sdev->request_queue, 60 * HZ);
+		blk_queue_rq_timeout(sdev->request_queue, 120 * HZ);
 	}
 	scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun);
 	spin_unlock_irqrestore(shost->host_lock, lock_flags);
-- 
cgit v0.10.2


From 3507e13fcba6b97501891a410ec8ef9f1f188620 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Mon, 8 Jun 2009 16:19:04 -0500
Subject: [SCSI] ibmvscsi: Send adapter info before login

The ibmvscsi driver currently sends the SRP Login before sending the Adapter
Info MAD, which can result in commands getting sent to the virtual adapter
before we are ready for them. This results in a slight window where the target
devices may not behave as expected. Change the order and close the window.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 6038c04..2ed46b8 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -785,6 +785,83 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
 /* ------------------------------------------------------------
  * Routines for driver initialization
  */
+
+/**
+ * login_rsp: - Handle response to SRP login request
+ * @evt_struct:	srp_event_struct with the response
+ *
+ * Used as a "done" callback by when sending srp_login. Gets called
+ * by ibmvscsi_handle_crq()
+*/
+static void login_rsp(struct srp_event_struct *evt_struct)
+{
+	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
+	switch (evt_struct->xfer_iu->srp.login_rsp.opcode) {
+	case SRP_LOGIN_RSP:	/* it worked! */
+		break;
+	case SRP_LOGIN_REJ:	/* refused! */
+		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
+			 evt_struct->xfer_iu->srp.login_rej.reason);
+		/* Login failed.  */
+		atomic_set(&hostdata->request_limit, -1);
+		return;
+	default:
+		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
+			evt_struct->xfer_iu->srp.login_rsp.opcode);
+		/* Login failed.  */
+		atomic_set(&hostdata->request_limit, -1);
+		return;
+	}
+
+	dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
+
+	/* Now we know what the real request-limit is.
+	 * This value is set rather than added to request_limit because
+	 * request_limit could have been set to -1 by this client.
+	 */
+	atomic_set(&hostdata->request_limit,
+		   evt_struct->xfer_iu->srp.login_rsp.req_lim_delta);
+
+	/* If we had any pending I/Os, kick them */
+	scsi_unblock_requests(hostdata->host);
+}
+
+/**
+ * send_srp_login: - Sends the srp login
+ * @hostdata:	ibmvscsi_host_data of host
+ *
+ * Returns zero if successful.
+*/
+static int send_srp_login(struct ibmvscsi_host_data *hostdata)
+{
+	int rc;
+	unsigned long flags;
+	struct srp_login_req *login;
+	struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool);
+
+	BUG_ON(!evt_struct);
+	init_event_struct(evt_struct, login_rsp,
+			  VIOSRP_SRP_FORMAT, login_timeout);
+
+	login = &evt_struct->iu.srp.login_req;
+	memset(login, 0, sizeof(*login));
+	login->opcode = SRP_LOGIN_REQ;
+	login->req_it_iu_len = sizeof(union srp_iu);
+	login->req_buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	/* Start out with a request limit of 0, since this is negotiated in
+	 * the login request we are just sending and login requests always
+	 * get sent by the driver regardless of request_limit.
+	 */
+	atomic_set(&hostdata->request_limit, 0);
+
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+	dev_info(hostdata->dev, "sent SRP login\n");
+	return rc;
+};
+
 /**
  * adapter_info_rsp: - Handle response to MAD adapter info request
  * @evt_struct:	srp_event_struct with the response
@@ -825,6 +902,8 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
 			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
 		}
 	}
+
+	send_srp_login(hostdata);
 }
 
 /**
@@ -844,11 +923,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 	dma_addr_t addr;
 
 	evt_struct = get_event_struct(&hostdata->pool);
-	if (!evt_struct) {
-		dev_err(hostdata->dev,
-			"couldn't allocate an event for ADAPTER_INFO_REQ!\n");
-		return;
-	}
+	BUG_ON(!evt_struct);
 
 	init_event_struct(evt_struct,
 			  adapter_info_rsp,
@@ -886,90 +961,15 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 };
 
 /**
- * login_rsp: - Handle response to SRP login request
- * @evt_struct:	srp_event_struct with the response
+ * init_adapter: Start virtual adapter initialization sequence
  *
- * Used as a "done" callback by when sending srp_login. Gets called
- * by ibmvscsi_handle_crq()
-*/
-static void login_rsp(struct srp_event_struct *evt_struct)
+ */
+static void init_adapter(struct ibmvscsi_host_data *hostdata)
 {
-	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
-	switch (evt_struct->xfer_iu->srp.login_rsp.opcode) {
-	case SRP_LOGIN_RSP:	/* it worked! */
-		break;
-	case SRP_LOGIN_REJ:	/* refused! */
-		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
-			 evt_struct->xfer_iu->srp.login_rej.reason);
-		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
-		return;
-	default:
-		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
-			evt_struct->xfer_iu->srp.login_rsp.opcode);
-		/* Login failed.  */
-		atomic_set(&hostdata->request_limit, -1);
-		return;
-	}
-
-	dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
-
-	/* Now we know what the real request-limit is.
-	 * This value is set rather than added to request_limit because
-	 * request_limit could have been set to -1 by this client.
-	 */
-	atomic_set(&hostdata->request_limit,
-		   evt_struct->xfer_iu->srp.login_rsp.req_lim_delta);
-
-	/* If we had any pending I/Os, kick them */
-	scsi_unblock_requests(hostdata->host);
-
 	send_mad_adapter_info(hostdata);
-	return;
 }
 
 /**
- * send_srp_login: - Sends the srp login
- * @hostdata:	ibmvscsi_host_data of host
- * 
- * Returns zero if successful.
-*/
-static int send_srp_login(struct ibmvscsi_host_data *hostdata)
-{
-	int rc;
-	unsigned long flags;
-	struct srp_login_req *login;
-	struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool);
-	if (!evt_struct) {
-		dev_err(hostdata->dev, "couldn't allocate an event for login req!\n");
-		return FAILED;
-	}
-
-	init_event_struct(evt_struct,
-			  login_rsp,
-			  VIOSRP_SRP_FORMAT,
-			  login_timeout);
-
-	login = &evt_struct->iu.srp.login_req;
-	memset(login, 0x00, sizeof(struct srp_login_req));
-	login->opcode = SRP_LOGIN_REQ;
-	login->req_it_iu_len = sizeof(union srp_iu);
-	login->req_buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
-	
-	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	/* Start out with a request limit of 0, since this is negotiated in
-	 * the login request we are just sending and login requests always
-	 * get sent by the driver regardless of request_limit.
-	 */
-	atomic_set(&hostdata->request_limit, 0);
-
-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-	dev_info(hostdata->dev, "sent SRP login\n");
-	return rc;
-};
-
-/**
  * sync_completion: Signal that a synchronous command has completed
  * Note that after returning from this call, the evt_struct is freed.
  * the caller waiting on this completion shouldn't touch the evt_struct
@@ -1282,7 +1282,7 @@ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 			if ((rc = ibmvscsi_ops->send_crq(hostdata,
 							 0xC002000000000000LL, 0)) == 0) {
 				/* Now login */
-				send_srp_login(hostdata);
+				init_adapter(hostdata);
 			} else {
 				dev_err(hostdata->dev, "Unable to send init rsp. rc=%ld\n", rc);
 			}
@@ -1292,7 +1292,7 @@ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 			dev_info(hostdata->dev, "partner initialization complete\n");
 
 			/* Now login */
-			send_srp_login(hostdata);
+			init_adapter(hostdata);
 			break;
 		default:
 			dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format);
-- 
cgit v0.10.2


From c1988e3123751fd425fbae99d5c1776608e965a9 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Mon, 8 Jun 2009 16:19:07 -0500
Subject: [SCSI] ibmvscsi: Enable fast fail feature

A new mode of error reporting, fast fail, has been added to the VIOS
which allows failover to happen more quickly.

If this new fast fail mode is enabled on the VIOS and the vSCSI client
supports the mode, the VIOS will not return MEDIUM error on path failures,
but rather return VIOSRP_ADAPTER_FAIL in the crq response, which
ibmvscsi will translate to DID_ERROR.

This new mode can be enabled for single path configurations as well,
so it is the new default error reporting mode. A module parameter is
provided to disable this new behavior on the off chance it causes a
problem on some old VIOS version.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 2ed46b8..822fbc3 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -94,6 +94,7 @@ static int abort_timeout = 60;
 static int reset_timeout = 60;
 static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT;
 static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2;
+static int fast_fail = 1;
 
 static struct scsi_transport_template *ibmvscsi_transport_template;
 
@@ -114,6 +115,8 @@ module_param_named(init_timeout, init_timeout, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(init_timeout, "Initialization timeout in seconds");
 module_param_named(max_requests, max_requests, int, S_IRUGO);
 MODULE_PARM_DESC(max_requests, "Maximum requests for this adapter");
+module_param_named(fast_fail, fast_fail, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(fast_fail, "Enable fast fail. [Default=1]");
 
 /* ------------------------------------------------------------
  * Routines for the event pool and event structs
@@ -863,6 +866,60 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 };
 
 /**
+ * fast_fail_rsp: - Handle response to MAD enable fast fail
+ * @evt_struct:	srp_event_struct with the response
+ *
+ * Used as a "done" callback by when sending enable fast fail. Gets called
+ * by ibmvscsi_handle_crq()
+ */
+static void fast_fail_rsp(struct srp_event_struct *evt_struct)
+{
+	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
+	u8 status = evt_struct->xfer_iu->mad.fast_fail.common.status;
+
+	if (status == VIOSRP_MAD_NOT_SUPPORTED)
+		dev_err(hostdata->dev, "fast_fail not supported in server\n");
+	else if (status == VIOSRP_MAD_FAILED)
+		dev_err(hostdata->dev, "fast_fail request failed\n");
+	else if (status != VIOSRP_MAD_SUCCESS)
+		dev_err(hostdata->dev, "error 0x%X enabling fast_fail\n", status);
+
+	send_srp_login(hostdata);
+}
+
+/**
+ * init_host - Start host initialization
+ * @hostdata:	ibmvscsi_host_data of host
+ *
+ * Returns zero if successful.
+ */
+static int enable_fast_fail(struct ibmvscsi_host_data *hostdata)
+{
+	int rc;
+	unsigned long flags;
+	struct viosrp_fast_fail *fast_fail_mad;
+	struct srp_event_struct *evt_struct;
+
+	if (!fast_fail)
+		return send_srp_login(hostdata);
+
+	evt_struct = get_event_struct(&hostdata->pool);
+	BUG_ON(!evt_struct);
+
+	init_event_struct(evt_struct, fast_fail_rsp, VIOSRP_MAD_FORMAT, info_timeout);
+
+	fast_fail_mad = &evt_struct->iu.mad.fast_fail;
+	memset(fast_fail_mad, 0, sizeof(*fast_fail_mad));
+	fast_fail_mad->common.type = VIOSRP_ENABLE_FAST_FAIL;
+	fast_fail_mad->common.length = sizeof(*fast_fail_mad);
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2);
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+	return rc;
+}
+
+/**
  * adapter_info_rsp: - Handle response to MAD adapter info request
  * @evt_struct:	srp_event_struct with the response
  *
@@ -903,7 +960,7 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
 		}
 	}
 
-	send_srp_login(hostdata);
+	enable_fast_fail(hostdata);
 }
 
 /**
diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h
index 2046045..f5a9c26 100644
--- a/drivers/scsi/ibmvscsi/viosrp.h
+++ b/drivers/scsi/ibmvscsi/viosrp.h
@@ -86,7 +86,14 @@ enum viosrp_mad_types {
 	VIOSRP_EMPTY_IU_TYPE = 0x01,
 	VIOSRP_ERROR_LOG_TYPE = 0x02,
 	VIOSRP_ADAPTER_INFO_TYPE = 0x03,
-	VIOSRP_HOST_CONFIG_TYPE = 0x04
+	VIOSRP_HOST_CONFIG_TYPE = 0x04,
+	VIOSRP_ENABLE_FAST_FAIL = 0x08,
+};
+
+enum viosrp_mad_status {
+	VIOSRP_MAD_SUCCESS = 0x00,
+	VIOSRP_MAD_NOT_SUPPORTED = 0xF1,
+	VIOSRP_MAD_FAILED = 0xF7,
 };
 
 /* 
@@ -127,11 +134,16 @@ struct viosrp_host_config {
 	u64 buffer;
 };
 
+struct viosrp_fast_fail {
+	struct mad_common common;
+};
+
 union mad_iu {
 	struct viosrp_empty_iu empty_iu;
 	struct viosrp_error_log error_log;
 	struct viosrp_adapter_info adapter_info;
 	struct viosrp_host_config host_config;
+	struct viosrp_fast_fail fast_fail;
 };
 
 union viosrp_iu {
-- 
cgit v0.10.2


From 126c5cc37e682e7c5ae96754994b1cb50c2d0cb5 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Mon, 8 Jun 2009 16:19:08 -0500
Subject: [SCSI] ibmvscsi: Add support for capabilities MAD

Add support to ibmvscsi for the capabilities MAD. This command gets sent
to the Virtual I/O server prior to login in order to communicate client
capabilities. Additionally it returns information regarding capabilities
that the server supports. The two main capabilities communicated in this
MAD are related to partition migration and client reserve. Client reserve
allows for SCSI-2 reservations to be sent to virtual disks which are backed
by physical LUNs and will result in the reservation being sent to the
physical LUN.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 822fbc3..11d2602 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -70,6 +70,7 @@
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 #include <asm/firmware.h>
 #include <asm/vio.h>
 #include <asm/firmware.h>
@@ -95,6 +96,7 @@ static int reset_timeout = 60;
 static int max_requests = IBMVSCSI_MAX_REQUESTS_DEFAULT;
 static int max_events = IBMVSCSI_MAX_REQUESTS_DEFAULT + 2;
 static int fast_fail = 1;
+static int client_reserve = 1;
 
 static struct scsi_transport_template *ibmvscsi_transport_template;
 
@@ -117,6 +119,8 @@ module_param_named(max_requests, max_requests, int, S_IRUGO);
 MODULE_PARM_DESC(max_requests, "Maximum requests for this adapter");
 module_param_named(fast_fail, fast_fail, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(fast_fail, "Enable fast fail. [Default=1]");
+module_param_named(client_reserve, client_reserve, int, S_IRUGO );
+MODULE_PARM_DESC(client_reserve, "Attempt client managed reserve/release");
 
 /* ------------------------------------------------------------
  * Routines for the event pool and event structs
@@ -790,6 +794,53 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
  */
 
 /**
+ * map_persist_bufs: - Pre-map persistent data for adapter logins
+ * @hostdata:   ibmvscsi_host_data of host
+ *
+ * Map the capabilities and adapter info DMA buffers to avoid runtime failures.
+ * Return 1 on error, 0 on success.
+ */
+static int map_persist_bufs(struct ibmvscsi_host_data *hostdata)
+{
+
+	hostdata->caps_addr = dma_map_single(hostdata->dev, &hostdata->caps,
+					     sizeof(hostdata->caps), DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(hostdata->dev, hostdata->caps_addr)) {
+		dev_err(hostdata->dev, "Unable to map capabilities buffer!\n");
+		return 1;
+	}
+
+	hostdata->adapter_info_addr = dma_map_single(hostdata->dev,
+						     &hostdata->madapter_info,
+						     sizeof(hostdata->madapter_info),
+						     DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(hostdata->dev, hostdata->adapter_info_addr)) {
+		dev_err(hostdata->dev, "Unable to map adapter info buffer!\n");
+		dma_unmap_single(hostdata->dev, hostdata->caps_addr,
+				 sizeof(hostdata->caps), DMA_BIDIRECTIONAL);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * unmap_persist_bufs: - Unmap persistent data needed for adapter logins
+ * @hostdata:   ibmvscsi_host_data of host
+ *
+ * Unmap the capabilities and adapter info DMA buffers
+ */
+static void unmap_persist_bufs(struct ibmvscsi_host_data *hostdata)
+{
+	dma_unmap_single(hostdata->dev, hostdata->caps_addr,
+			 sizeof(hostdata->caps), DMA_BIDIRECTIONAL);
+
+	dma_unmap_single(hostdata->dev, hostdata->adapter_info_addr,
+			 sizeof(hostdata->madapter_info), DMA_BIDIRECTIONAL);
+}
+
+/**
  * login_rsp: - Handle response to SRP login request
  * @evt_struct:	srp_event_struct with the response
  *
@@ -817,6 +868,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
 	}
 
 	dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
+	hostdata->client_migrated = 0;
 
 	/* Now we know what the real request-limit is.
 	 * This value is set rather than added to request_limit because
@@ -866,6 +918,93 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
 };
 
 /**
+ * capabilities_rsp: - Handle response to MAD adapter capabilities request
+ * @evt_struct:	srp_event_struct with the response
+ *
+ * Used as a "done" callback by when sending adapter_info.
+ */
+static void capabilities_rsp(struct srp_event_struct *evt_struct)
+{
+	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
+
+	if (evt_struct->xfer_iu->mad.capabilities.common.status) {
+		dev_err(hostdata->dev, "error 0x%X getting capabilities info\n",
+			evt_struct->xfer_iu->mad.capabilities.common.status);
+	} else {
+		if (hostdata->caps.migration.common.server_support != SERVER_SUPPORTS_CAP)
+			dev_info(hostdata->dev, "Partition migration not supported\n");
+
+		if (client_reserve) {
+			if (hostdata->caps.reserve.common.server_support ==
+			    SERVER_SUPPORTS_CAP)
+				dev_info(hostdata->dev, "Client reserve enabled\n");
+			else
+				dev_info(hostdata->dev, "Client reserve not supported\n");
+		}
+	}
+
+	send_srp_login(hostdata);
+}
+
+/**
+ * send_mad_capabilities: - Sends the mad capabilities request
+ *      and stores the result so it can be retrieved with
+ * @hostdata:	ibmvscsi_host_data of host
+ */
+static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata)
+{
+	struct viosrp_capabilities *req;
+	struct srp_event_struct *evt_struct;
+	unsigned long flags;
+	struct device_node *of_node = hostdata->dev->archdata.of_node;
+	const char *location;
+
+	evt_struct = get_event_struct(&hostdata->pool);
+	BUG_ON(!evt_struct);
+
+	init_event_struct(evt_struct, capabilities_rsp,
+			  VIOSRP_MAD_FORMAT, info_timeout);
+
+	req = &evt_struct->iu.mad.capabilities;
+	memset(req, 0, sizeof(*req));
+
+	hostdata->caps.flags = CAP_LIST_SUPPORTED;
+	if (hostdata->client_migrated)
+		hostdata->caps.flags |= CLIENT_MIGRATED;
+
+	strncpy(hostdata->caps.name, dev_name(&hostdata->host->shost_gendev),
+		sizeof(hostdata->caps.name));
+	hostdata->caps.name[sizeof(hostdata->caps.name) - 1] = '\0';
+
+	location = of_get_property(of_node, "ibm,loc-code", NULL);
+	location = location ? location : dev_name(hostdata->dev);
+	strncpy(hostdata->caps.loc, location, sizeof(hostdata->caps.loc));
+	hostdata->caps.loc[sizeof(hostdata->caps.loc) - 1] = '\0';
+
+	req->common.type = VIOSRP_CAPABILITIES_TYPE;
+	req->buffer = hostdata->caps_addr;
+
+	hostdata->caps.migration.common.cap_type = MIGRATION_CAPABILITIES;
+	hostdata->caps.migration.common.length = sizeof(hostdata->caps.migration);
+	hostdata->caps.migration.common.server_support = SERVER_SUPPORTS_CAP;
+	hostdata->caps.migration.ecl = 1;
+
+	if (client_reserve) {
+		hostdata->caps.reserve.common.cap_type = RESERVATION_CAPABILITIES;
+		hostdata->caps.reserve.common.length = sizeof(hostdata->caps.reserve);
+		hostdata->caps.reserve.common.server_support = SERVER_SUPPORTS_CAP;
+		hostdata->caps.reserve.type = CLIENT_RESERVE_SCSI_2;
+		req->common.length = sizeof(hostdata->caps);
+	} else
+		req->common.length = sizeof(hostdata->caps) - sizeof(hostdata->caps.reserve);
+
+	spin_lock_irqsave(hostdata->host->host_lock, flags);
+	if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2))
+		dev_err(hostdata->dev, "couldn't send CAPABILITIES_REQ!\n");
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+};
+
+/**
  * fast_fail_rsp: - Handle response to MAD enable fast fail
  * @evt_struct:	srp_event_struct with the response
  *
@@ -884,7 +1023,7 @@ static void fast_fail_rsp(struct srp_event_struct *evt_struct)
 	else if (status != VIOSRP_MAD_SUCCESS)
 		dev_err(hostdata->dev, "error 0x%X enabling fast_fail\n", status);
 
-	send_srp_login(hostdata);
+	send_mad_capabilities(hostdata);
 }
 
 /**
@@ -900,8 +1039,10 @@ static int enable_fast_fail(struct ibmvscsi_host_data *hostdata)
 	struct viosrp_fast_fail *fast_fail_mad;
 	struct srp_event_struct *evt_struct;
 
-	if (!fast_fail)
-		return send_srp_login(hostdata);
+	if (!fast_fail) {
+		send_mad_capabilities(hostdata);
+		return 0;
+	}
 
 	evt_struct = get_event_struct(&hostdata->pool);
 	BUG_ON(!evt_struct);
@@ -929,10 +1070,6 @@ static int enable_fast_fail(struct ibmvscsi_host_data *hostdata)
 static void adapter_info_rsp(struct srp_event_struct *evt_struct)
 {
 	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
-	dma_unmap_single(hostdata->dev,
-			 evt_struct->iu.mad.adapter_info.buffer,
-			 evt_struct->iu.mad.adapter_info.common.length,
-			 DMA_BIDIRECTIONAL);
 
 	if (evt_struct->xfer_iu->mad.adapter_info.common.status) {
 		dev_err(hostdata->dev, "error %d getting adapter info\n",
@@ -977,7 +1114,6 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 	struct viosrp_adapter_info *req;
 	struct srp_event_struct *evt_struct;
 	unsigned long flags;
-	dma_addr_t addr;
 
 	evt_struct = get_event_struct(&hostdata->pool);
 	BUG_ON(!evt_struct);
@@ -992,28 +1128,11 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 	
 	req->common.type = VIOSRP_ADAPTER_INFO_TYPE;
 	req->common.length = sizeof(hostdata->madapter_info);
-	req->buffer = addr = dma_map_single(hostdata->dev,
-					    &hostdata->madapter_info,
-					    sizeof(hostdata->madapter_info),
-					    DMA_BIDIRECTIONAL);
+	req->buffer = hostdata->adapter_info_addr;
 
-	if (dma_mapping_error(hostdata->dev, req->buffer)) {
-		if (!firmware_has_feature(FW_FEATURE_CMO))
-			dev_err(hostdata->dev,
-			        "Unable to map request_buffer for "
-			        "adapter_info!\n");
-		free_event_struct(&hostdata->pool, evt_struct);
-		return;
-	}
-	
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2)) {
+	if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2))
 		dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n");
-		dma_unmap_single(hostdata->dev,
-				 addr,
-				 sizeof(hostdata->madapter_info),
-				 DMA_BIDIRECTIONAL);
-	}
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 };
 
@@ -1361,6 +1480,7 @@ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			dev_info(hostdata->dev, "Re-enabling adapter!\n");
+			hostdata->client_migrated = 1;
 			purge_requests(hostdata, DID_REQUEUE);
 			if ((ibmvscsi_ops->reenable_crq_queue(&hostdata->queue,
 							      hostdata)) ||
@@ -1529,6 +1649,46 @@ static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 /* ------------------------------------------------------------
  * sysfs attributes
  */
+static ssize_t show_host_vhost_loc(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvscsi_host_data *hostdata = shost_priv(shost);
+	int len;
+
+	len = snprintf(buf, sizeof(hostdata->caps.loc), "%s\n",
+		       hostdata->caps.loc);
+	return len;
+}
+
+static struct device_attribute ibmvscsi_host_vhost_loc = {
+	.attr = {
+		 .name = "vhost_loc",
+		 .mode = S_IRUGO,
+		 },
+	.show = show_host_vhost_loc,
+};
+
+static ssize_t show_host_vhost_name(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvscsi_host_data *hostdata = shost_priv(shost);
+	int len;
+
+	len = snprintf(buf, sizeof(hostdata->caps.name), "%s\n",
+		       hostdata->caps.name);
+	return len;
+}
+
+static struct device_attribute ibmvscsi_host_vhost_name = {
+	.attr = {
+		 .name = "vhost_name",
+		 .mode = S_IRUGO,
+		 },
+	.show = show_host_vhost_name,
+};
+
 static ssize_t show_host_srp_version(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
@@ -1652,6 +1812,8 @@ static struct device_attribute ibmvscsi_host_config = {
 };
 
 static struct device_attribute *ibmvscsi_attrs[] = {
+	&ibmvscsi_host_vhost_loc,
+	&ibmvscsi_host_vhost_name,
 	&ibmvscsi_host_srp_version,
 	&ibmvscsi_host_partition_name,
 	&ibmvscsi_host_partition_number,
@@ -1732,6 +1894,11 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	atomic_set(&hostdata->request_limit, -1);
 	hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
 
+	if (map_persist_bufs(hostdata)) {
+		dev_err(&vdev->dev, "couldn't map persistent buffers\n");
+		goto persist_bufs_failed;
+	}
+
 	rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_events);
 	if (rc != 0 && rc != H_RESOURCE) {
 		dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc);
@@ -1792,6 +1959,8 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
       init_pool_failed:
 	ibmvscsi_ops->release_crq_queue(&hostdata->queue, hostdata, max_events);
       init_crq_failed:
+	unmap_persist_bufs(hostdata);
+      persist_bufs_failed:
 	scsi_host_put(host);
       scsi_host_alloc_failed:
 	return -1;
@@ -1800,6 +1969,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 static int ibmvscsi_remove(struct vio_dev *vdev)
 {
 	struct ibmvscsi_host_data *hostdata = vdev->dev.driver_data;
+	unmap_persist_bufs(hostdata);
 	release_event_pool(&hostdata->pool, hostdata);
 	ibmvscsi_ops->release_crq_queue(&hostdata->queue, hostdata,
 					max_events);
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h
index 2d4339d..7642530 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -90,6 +90,7 @@ struct event_pool {
 /* all driver data associated with a host adapter */
 struct ibmvscsi_host_data {
 	atomic_t request_limit;
+	int client_migrated;
 	struct device *dev;
 	struct event_pool pool;
 	struct crq_queue queue;
@@ -97,6 +98,9 @@ struct ibmvscsi_host_data {
 	struct list_head sent;
 	struct Scsi_Host *host;
 	struct mad_adapter_info_data madapter_info;
+	struct capabilities caps;
+	dma_addr_t caps_addr;
+	dma_addr_t adapter_info_addr;
 };
 
 /* routines for managing a command/response queue */
diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h
index f5a9c26..2cd735d 100644
--- a/drivers/scsi/ibmvscsi/viosrp.h
+++ b/drivers/scsi/ibmvscsi/viosrp.h
@@ -37,6 +37,7 @@
 
 #define SRP_VERSION "16.a"
 #define SRP_MAX_IU_LEN	256
+#define SRP_MAX_LOC_LEN 32
 
 union srp_iu {
 	struct srp_login_req login_req;
@@ -87,6 +88,7 @@ enum viosrp_mad_types {
 	VIOSRP_ERROR_LOG_TYPE = 0x02,
 	VIOSRP_ADAPTER_INFO_TYPE = 0x03,
 	VIOSRP_HOST_CONFIG_TYPE = 0x04,
+	VIOSRP_CAPABILITIES_TYPE = 0x05,
 	VIOSRP_ENABLE_FAST_FAIL = 0x08,
 };
 
@@ -96,6 +98,28 @@ enum viosrp_mad_status {
 	VIOSRP_MAD_FAILED = 0xF7,
 };
 
+enum viosrp_capability_type {
+	MIGRATION_CAPABILITIES = 0x01,
+	RESERVATION_CAPABILITIES = 0x02,
+};
+
+enum viosrp_capability_support {
+	SERVER_DOES_NOT_SUPPORTS_CAP = 0x0,
+	SERVER_SUPPORTS_CAP = 0x01,
+	SERVER_CAP_DATA = 0x02,
+};
+
+enum viosrp_reserve_type {
+	CLIENT_RESERVE_SCSI_2 = 0x01,
+};
+
+enum viosrp_capability_flag {
+	CLIENT_MIGRATED = 0x01,
+	CLIENT_RECONNECT = 0x02,
+	CAP_LIST_SUPPORTED = 0x04,
+	CAP_LIST_DATA = 0x08,
+};
+
 /* 
  * Common MAD header
  */
@@ -138,12 +162,42 @@ struct viosrp_fast_fail {
 	struct mad_common common;
 };
 
+struct viosrp_capabilities {
+	struct mad_common common;
+	u64 buffer;
+};
+
+struct mad_capability_common {
+	u32 cap_type;
+	u16 length;
+	u16 server_support;
+};
+
+struct mad_reserve_cap {
+	struct mad_capability_common common;
+	u32 type;
+};
+
+struct mad_migration_cap {
+	struct mad_capability_common common;
+	u32 ecl;
+};
+
+struct capabilities{
+	u32 flags;
+	char name[SRP_MAX_LOC_LEN];
+	char loc[SRP_MAX_LOC_LEN];
+	struct mad_migration_cap migration;
+	struct mad_reserve_cap reserve;
+};
+
 union mad_iu {
 	struct viosrp_empty_iu empty_iu;
 	struct viosrp_error_log error_log;
 	struct viosrp_adapter_info adapter_info;
 	struct viosrp_host_config host_config;
 	struct viosrp_fast_fail fast_fail;
+	struct viosrp_capabilities capabilities;
 };
 
 union viosrp_iu {
-- 
cgit v0.10.2


From 5bf1692f65c12a8aa359dc883468284ffc3c4587 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Fri, 5 Jun 2009 14:44:58 +0900
Subject: TOMOYO: Remove unused field.

TOMOYO 2.2.0 is not using total_len field of "struct tomoyo_path_info".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 6d25612..a44f655 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -428,7 +428,6 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr)
 	const char *name = ptr->name;
 	const int len = strlen(name);
 
-	ptr->total_len = len;
 	ptr->const_len = tomoyo_const_part_length(name);
 	ptr->is_dir = len && (name[len - 1] == '/');
 	ptr->is_patterned = (ptr->const_len < len);
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 678f4ff..d8b9504 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -35,7 +35,6 @@ struct tomoyo_page_buffer {
 struct tomoyo_path_info {
 	const char *name;
 	u32 hash;          /* = full_name_hash(name, strlen(name)) */
-	u16 total_len;     /* = strlen(name)                       */
 	u16 const_len;     /* = tomoyo_const_part_length(name)     */
 	bool is_dir;       /* = tomoyo_strendswith(name, "/")      */
 	bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
-- 
cgit v0.10.2


From c3fa109a5894077d1eaf8731ea741a15dd117b3c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Mon, 8 Jun 2009 12:37:39 +0900
Subject: TOMOYO: Add description of lists and structures.

This patch adds some descriptions of lists and structures.
This patch contains no code changes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index a44f655..fdd1f4b 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -28,7 +28,13 @@ static const char *tomoyo_mode_2[4] = {
 	"disabled", "enabled", "enabled", "enabled"
 };
 
-/* Table for profile. */
+/*
+ * tomoyo_control_array is a static data which contains
+ *
+ *  (1) functionality name used by /sys/kernel/security/tomoyo/profile .
+ *  (2) initial values for "struct tomoyo_profile".
+ *  (3) max values for "struct tomoyo_profile".
+ */
 static struct {
 	const char *keyword;
 	unsigned int current_value;
@@ -39,7 +45,13 @@ static struct {
 	[TOMOYO_VERBOSE]          = { "TOMOYO_VERBOSE",      1,       1 },
 };
 
-/* Profile table. Memory is allocated as needed. */
+/*
+ * tomoyo_profile is a structure which is used for holding the mode of access
+ * controls. TOMOYO has 4 modes: disabled, learning, permissive, enforcing.
+ * An administrator can define up to 256 profiles.
+ * The ->profile of "struct tomoyo_domain_info" is used for remembering
+ * the profile's number (0 - 255) assigned to that domain.
+ */
 static struct tomoyo_profile {
 	unsigned int value[TOMOYO_MAX_CONTROL_INDEX];
 	const struct tomoyo_path_info *comment;
@@ -1006,7 +1018,19 @@ static int tomoyo_read_profile(struct tomoyo_io_buffer *head)
 	return 0;
 }
 
-/* Structure for policy manager. */
+/*
+ * tomoyo_policy_manager_entry is a structure which is used for holding list of
+ * domainnames or programs which are permitted to modify configuration via
+ * /sys/kernel/security/tomoyo/ interface.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_policy_manager_list .
+ *  (2) "manager" is a domainname or a program's pathname.
+ *  (3) "is_domain" is a bool which is true if "manager" is a domainname, false
+ *      otherwise.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_policy_manager_entry {
 	struct list_head list;
 	/* A path to program or a domainname. */
@@ -1015,7 +1039,36 @@ struct tomoyo_policy_manager_entry {
 	bool is_deleted; /* True if this entry is deleted. */
 };
 
-/* The list for "struct tomoyo_policy_manager_entry". */
+/*
+ * tomoyo_policy_manager_list is used for holding list of domainnames or
+ * programs which are permitted to modify configuration via
+ * /sys/kernel/security/tomoyo/ interface.
+ *
+ * An entry is added by
+ *
+ * # echo '<kernel> /sbin/mingetty /bin/login /bin/bash' > \
+ *                                        /sys/kernel/security/tomoyo/manager
+ *  (if you want to specify by a domainname)
+ *
+ *  or
+ *
+ * # echo '/usr/lib/ccs/editpolicy' > /sys/kernel/security/tomoyo/manager
+ *  (if you want to specify by a program's location)
+ *
+ * and is deleted by
+ *
+ * # echo 'delete <kernel> /sbin/mingetty /bin/login /bin/bash' > \
+ *                                        /sys/kernel/security/tomoyo/manager
+ *
+ *  or
+ *
+ * # echo 'delete /usr/lib/ccs/editpolicy' > \
+ *                                        /sys/kernel/security/tomoyo/manager
+ *
+ * and all entries are retrieved by
+ *
+ * # cat /sys/kernel/security/tomoyo/manager
+ */
 static LIST_HEAD(tomoyo_policy_manager_list);
 static DECLARE_RWSEM(tomoyo_policy_manager_list_lock);
 
@@ -2124,7 +2177,13 @@ static ssize_t tomoyo_write(struct file *file, const char __user *buf,
 	return tomoyo_write_control(file, buf, count);
 }
 
-/* Operations for /sys/kernel/security/tomoyo/ interface. */
+/*
+ * tomoyo_operations is a "struct file_operations" which is used for handling
+ * /sys/kernel/security/tomoyo/ interface.
+ *
+ * Some files under /sys/kernel/security/tomoyo/ directory accept open(O_RDWR).
+ * See tomoyo_io_buffer for internals.
+ */
 static const struct file_operations tomoyo_operations = {
 	.open    = tomoyo_open,
 	.release = tomoyo_release,
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index d8b9504..6d6ba09 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -26,12 +26,40 @@
 struct dentry;
 struct vfsmount;
 
-/* Temporary buffer for holding pathnames. */
+/*
+ * tomoyo_page_buffer is a structure which is used for holding a pathname
+ * obtained from "struct dentry" and "struct vfsmount" pair.
+ * As of now, it is 4096 bytes. If users complain that 4096 bytes is too small
+ * (because TOMOYO escapes non ASCII printable characters using \ooo format),
+ * we will make the buffer larger.
+ */
 struct tomoyo_page_buffer {
 	char buffer[4096];
 };
 
-/* Structure for holding a token. */
+/*
+ * tomoyo_path_info is a structure which is used for holding a string data
+ * used by TOMOYO.
+ * This structure has several fields for supporting pattern matching.
+ *
+ * (1) "name" is the '\0' terminated string data.
+ * (2) "hash" is full_name_hash(name, strlen(name)).
+ *     This allows tomoyo_pathcmp() to compare by hash before actually compare
+ *     using strcmp().
+ * (3) "const_len" is the length of the initial segment of "name" which
+ *     consists entirely of non wildcard characters. In other words, the length
+ *     which we can compare two strings using strncmp().
+ * (4) "is_dir" is a bool which is true if "name" ends with "/",
+ *     false otherwise.
+ *     TOMOYO distinguishes directory and non-directory. A directory ends with
+ *     "/" and non-directory does not end with "/".
+ * (5) "is_patterned" is a bool which is true if "name" contains wildcard
+ *     characters, false otherwise. This allows TOMOYO to use "hash" and
+ *     strcmp() for string comparison if "is_patterned" is false.
+ * (6) "depth" is calculated using the number of "/" characters in "name".
+ *     This allows TOMOYO to avoid comparing two pathnames which never match
+ *     (e.g. whether "/var/www/html/index.html" matches "/tmp/sh-thd-\$").
+ */
 struct tomoyo_path_info {
 	const char *name;
 	u32 hash;          /* = full_name_hash(name, strlen(name)) */
@@ -50,7 +78,20 @@ struct tomoyo_path_info {
  */
 #define TOMOYO_MAX_PATHNAME_LEN 4000
 
-/* Structure for holding requested pathname. */
+/*
+ * tomoyo_path_info_with_data is a structure which is used for holding a
+ * pathname obtained from "struct dentry" and "struct vfsmount" pair.
+ *
+ * "struct tomoyo_path_info_with_data" consists of "struct tomoyo_path_info"
+ * and buffer for the pathname, while "struct tomoyo_page_buffer" consists of
+ * buffer for the pathname only.
+ *
+ * "struct tomoyo_path_info_with_data" is intended to allow TOMOYO to release
+ * both "struct tomoyo_path_info" and buffer for the pathname by single kfree()
+ * so that we don't need to return two pointers to the caller. If the caller
+ * puts "struct tomoyo_path_info" on stack memory, we will be able to remove
+ * "struct tomoyo_path_info_with_data".
+ */
 struct tomoyo_path_info_with_data {
 	/* Keep "head" first, for this pointer is passed to tomoyo_free(). */
 	struct tomoyo_path_info head;
@@ -60,7 +101,15 @@ struct tomoyo_path_info_with_data {
 };
 
 /*
- * Common header for holding ACL entries.
+ * tomoyo_acl_info is a structure which is used for holding
+ *
+ *  (1) "list" which is linked to the ->acl_info_list of
+ *      "struct tomoyo_domain_info"
+ *  (2) "type" which tells
+ *      (a) type & 0x7F : type of the entry (either
+ *          "struct tomoyo_single_path_acl_record" or
+ *          "struct tomoyo_double_path_acl_record")
+ *      (b) type & 0x80 : whether the entry is marked as "deleted".
  *
  * Packing "struct tomoyo_acl_info" allows
  * "struct tomoyo_single_path_acl_record" to embed "u16" and
@@ -80,7 +129,28 @@ struct tomoyo_acl_info {
 /* This ACL entry is deleted.           */
 #define TOMOYO_ACL_DELETED        0x80
 
-/* Structure for domain information. */
+/*
+ * tomoyo_domain_info is a structure which is used for holding permissions
+ * (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_domain_list .
+ *  (2) "acl_info_list" which is linked to "struct tomoyo_acl_info".
+ *  (3) "domainname" which holds the name of the domain.
+ *  (4) "profile" which remembers profile number assigned to this domain.
+ *  (5) "is_deleted" is a bool which is true if this domain is marked as
+ *      "deleted", false otherwise.
+ *  (6) "quota_warned" is a bool which is used for suppressing warning message
+ *      when learning mode learned too much entries.
+ *  (7) "flags" which remembers this domain's attributes.
+ *
+ * A domain's lifecycle is an analogy of files on / directory.
+ * Multiple domains with the same domainname cannot be created (as with
+ * creating files with the same filename fails with -EEXIST).
+ * If a process reached a domain, that process can reside in that domain after
+ * that domain is marked as "deleted" (as with a process can access an already
+ * open()ed file after that file was unlink()ed).
+ */
 struct tomoyo_domain_info {
 	struct list_head list;
 	struct list_head acl_info_list;
@@ -107,10 +177,18 @@ struct tomoyo_domain_info {
 #define TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED        2
 
 /*
- * Structure for "allow_read/write", "allow_execute", "allow_read",
- * "allow_write", "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir",
- * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar",
- * "allow_truncate", "allow_symlink" and "allow_rewrite" directive.
+ * tomoyo_single_path_acl_record is a structure which is used for holding an
+ * entry with one pathname operation (e.g. open(), mkdir()).
+ * It has following fields.
+ *
+ *  (1) "head" which is a "struct tomoyo_acl_info".
+ *  (2) "perm" which is a bitmask of permitted operations.
+ *  (3) "filename" is the pathname.
+ *
+ * Directives held by this structure are "allow_read/write", "allow_execute",
+ * "allow_read", "allow_write", "allow_create", "allow_unlink", "allow_mkdir",
+ * "allow_rmdir", "allow_mkfifo", "allow_mksock", "allow_mkblock",
+ * "allow_mkchar", "allow_truncate", "allow_symlink" and "allow_rewrite".
  */
 struct tomoyo_single_path_acl_record {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_SINGLE_PATH_ACL */
@@ -119,7 +197,18 @@ struct tomoyo_single_path_acl_record {
 	const struct tomoyo_path_info *filename;
 };
 
-/* Structure for "allow_rename" and "allow_link" directive. */
+/*
+ * tomoyo_double_path_acl_record is a structure which is used for holding an
+ * entry with two pathnames operation (i.e. link() and rename()).
+ * It has following fields.
+ *
+ *  (1) "head" which is a "struct tomoyo_acl_info".
+ *  (2) "perm" which is a bitmask of permitted operations.
+ *  (3) "filename1" is the source/old pathname.
+ *  (4) "filename2" is the destination/new pathname.
+ *
+ * Directives held by this structure are "allow_rename" and "allow_link".
+ */
 struct tomoyo_double_path_acl_record {
 	struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_DOUBLE_PATH_ACL */
 	u8 perm;
@@ -152,7 +241,29 @@ struct tomoyo_double_path_acl_record {
 #define TOMOYO_VERBOSE                       2
 #define TOMOYO_MAX_CONTROL_INDEX             3
 
-/* Structure for reading/writing policy via securityfs interfaces. */
+/*
+ * tomoyo_io_buffer is a structure which is used for reading and modifying
+ * configuration via /sys/kernel/security/tomoyo/ interface.
+ * It has many fields. ->read_var1 , ->read_var2 , ->write_var1 are used as
+ * cursors.
+ *
+ * Since the content of /sys/kernel/security/tomoyo/domain_policy is a list of
+ * "struct tomoyo_domain_info" entries and each "struct tomoyo_domain_info"
+ * entry has a list of "struct tomoyo_acl_info", we need two cursors when
+ * reading (one is for traversing tomoyo_domain_list and the other is for
+ * traversing "struct tomoyo_acl_info"->acl_info_list ).
+ *
+ * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
+ * "select ", TOMOYO seeks the cursor ->read_var1 and ->write_var1 to the
+ * domain with the domainname specified by the rest of that line (NULL is set
+ * if seek failed).
+ * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
+ * "delete ", TOMOYO deletes an entry or a domain specified by the rest of that
+ * line (->write_var1 is set to NULL if a domain was deleted).
+ * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with
+ * neither "select " nor "delete ", an entry or a domain specified by that line
+ * is appended.
+ */
 struct tomoyo_io_buffer {
 	int (*read) (struct tomoyo_io_buffer *);
 	int (*write) (struct tomoyo_io_buffer *);
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index eb75401..1d8b169 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -19,11 +19,63 @@
 /* The initial domain. */
 struct tomoyo_domain_info tomoyo_kernel_domain;
 
-/* The list for "struct tomoyo_domain_info". */
+/*
+ * tomoyo_domain_list is used for holding list of domains.
+ * The ->acl_info_list of "struct tomoyo_domain_info" is used for holding
+ * permissions (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
+ *
+ * An entry is added by
+ *
+ * # ( echo "<kernel>"; echo "allow_execute /sbin/init" ) > \
+ *                                  /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and is deleted by
+ *
+ * # ( echo "<kernel>"; echo "delete allow_execute /sbin/init" ) > \
+ *                                  /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # cat /sys/kernel/security/tomoyo/domain_policy
+ *
+ * A domain is added by
+ *
+ * # echo "<kernel>" > /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and is deleted by
+ *
+ * # echo "delete <kernel>" > /sys/kernel/security/tomoyo/domain_policy
+ *
+ * and all domains are retrieved by
+ *
+ * # grep '^<kernel>' /sys/kernel/security/tomoyo/domain_policy
+ *
+ * Normally, a domainname is monotonically getting longer because a domainname
+ * which the process will belong to if an execve() operation succeeds is
+ * defined as a concatenation of "current domainname" + "pathname passed to
+ * execve()".
+ * See tomoyo_domain_initializer_list and tomoyo_domain_keeper_list for
+ * exceptions.
+ */
 LIST_HEAD(tomoyo_domain_list);
 DECLARE_RWSEM(tomoyo_domain_list_lock);
 
-/* Structure for "initialize_domain" and "no_initialize_domain" keyword. */
+/*
+ * tomoyo_domain_initializer_entry is a structure which is used for holding
+ * "initialize_domain" and "no_initialize_domain" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_domain_initializer_list .
+ *  (2) "domainname" which is "a domainname" or "the last component of a
+ *      domainname". This field is NULL if "from" clause is not specified.
+ *  (3) "program" which is a program's pathname.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ *  (5) "is_not" is a bool which is true if "no_initialize_domain", false
+ *      otherwise.
+ *  (6) "is_last_name" is a bool which is true if "domainname" is "the last
+ *      component of a domainname", false otherwise.
+ */
 struct tomoyo_domain_initializer_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *domainname;    /* This may be NULL */
@@ -34,7 +86,23 @@ struct tomoyo_domain_initializer_entry {
 	bool is_last_name;
 };
 
-/* Structure for "keep_domain" and "no_keep_domain" keyword. */
+/*
+ * tomoyo_domain_keeper_entry is a structure which is used for holding
+ * "keep_domain" and "no_keep_domain" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_domain_keeper_list .
+ *  (2) "domainname" which is "a domainname" or "the last component of a
+ *      domainname".
+ *  (3) "program" which is a program's pathname.
+ *      This field is NULL if "from" clause is not specified.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ *  (5) "is_not" is a bool which is true if "no_initialize_domain", false
+ *      otherwise.
+ *  (6) "is_last_name" is a bool which is true if "domainname" is "the last
+ *      component of a domainname", false otherwise.
+ */
 struct tomoyo_domain_keeper_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *domainname;
@@ -45,7 +113,16 @@ struct tomoyo_domain_keeper_entry {
 	bool is_last_name;
 };
 
-/* Structure for "alias" keyword. */
+/*
+ * tomoyo_alias_entry is a structure which is used for holding "alias" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_alias_list .
+ *  (2) "original_name" which is a dereferenced pathname.
+ *  (3) "aliased_name" which is a symlink's pathname.
+ *  (4) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_alias_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *original_name;
@@ -92,7 +169,42 @@ const char *tomoyo_get_last_name(const struct tomoyo_domain_info *domain)
 	return cp0;
 }
 
-/* The list for "struct tomoyo_domain_initializer_entry". */
+/*
+ * tomoyo_domain_initializer_list is used for holding list of programs which
+ * triggers reinitialization of domainname. Normally, a domainname is
+ * monotonically getting longer. But sometimes, we restart daemon programs.
+ * It would be convenient for us that "a daemon started upon system boot" and
+ * "the daemon restarted from console" belong to the same domain. Thus, TOMOYO
+ * provides a way to shorten domainnames.
+ *
+ * An entry is added by
+ *
+ * # echo 'initialize_domain /usr/sbin/httpd' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete initialize_domain /usr/sbin/httpd' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^initialize_domain /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, /usr/sbin/httpd will belong to
+ * "<kernel> /usr/sbin/httpd" domain.
+ *
+ * You may specify a domainname using "from" keyword.
+ * "initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd"
+ * will cause "/usr/sbin/httpd" executed from "<kernel> /etc/rc.d/init.d/httpd"
+ * domain to belong to "<kernel> /usr/sbin/httpd" domain.
+ *
+ * You may add "no_" prefix to "initialize_domain".
+ * "initialize_domain /usr/sbin/httpd" and
+ * "no_initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd"
+ * will cause "/usr/sbin/httpd" to belong to "<kernel> /usr/sbin/httpd" domain
+ * unless executed from "<kernel> /etc/rc.d/init.d/httpd" domain.
+ */
 static LIST_HEAD(tomoyo_domain_initializer_list);
 static DECLARE_RWSEM(tomoyo_domain_initializer_list_lock);
 
@@ -268,7 +380,44 @@ static bool tomoyo_is_domain_initializer(const struct tomoyo_path_info *
 	return flag;
 }
 
-/* The list for "struct tomoyo_domain_keeper_entry". */
+/*
+ * tomoyo_domain_keeper_list is used for holding list of domainnames which
+ * suppresses domain transition. Normally, a domainname is monotonically
+ * getting longer. But sometimes, we want to suppress domain transition.
+ * It would be convenient for us that programs executed from a login session
+ * belong to the same domain. Thus, TOMOYO provides a way to suppress domain
+ * transition.
+ *
+ * An entry is added by
+ *
+ * # echo 'keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^keep_domain /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, any process which belongs to
+ * "<kernel> /usr/sbin/sshd /bin/bash" domain will remain in that domain,
+ * unless explicitly specified by "initialize_domain" or "no_keep_domain".
+ *
+ * You may specify a program using "from" keyword.
+ * "keep_domain /bin/pwd from <kernel> /usr/sbin/sshd /bin/bash"
+ * will cause "/bin/pwd" executed from "<kernel> /usr/sbin/sshd /bin/bash"
+ * domain to remain in "<kernel> /usr/sbin/sshd /bin/bash" domain.
+ *
+ * You may add "no_" prefix to "keep_domain".
+ * "keep_domain <kernel> /usr/sbin/sshd /bin/bash" and
+ * "no_keep_domain /usr/bin/passwd from <kernel> /usr/sbin/sshd /bin/bash" will
+ * cause "/usr/bin/passwd" to belong to
+ * "<kernel> /usr/sbin/sshd /bin/bash /usr/bin/passwd" domain, unless
+ * explicitly specified by "initialize_domain".
+ */
 static LIST_HEAD(tomoyo_domain_keeper_list);
 static DECLARE_RWSEM(tomoyo_domain_keeper_list_lock);
 
@@ -437,7 +586,36 @@ static bool tomoyo_is_domain_keeper(const struct tomoyo_path_info *domainname,
 	return flag;
 }
 
-/* The list for "struct tomoyo_alias_entry". */
+/*
+ * tomoyo_alias_list is used for holding list of symlink's pathnames which are
+ * allowed to be passed to an execve() request. Normally, the domainname which
+ * the current process will belong to after execve() succeeds is calculated
+ * using dereferenced pathnames. But some programs behave differently depending
+ * on the name passed to argv[0]. For busybox, calculating domainname using
+ * dereferenced pathnames will cause all programs in the busybox to belong to
+ * the same domain. Thus, TOMOYO provides a way to allow use of symlink's
+ * pathname for checking execve()'s permission and calculating domainname which
+ * the current process will belong to after execve() succeeds.
+ *
+ * An entry is added by
+ *
+ * # echo 'alias /bin/busybox /bin/cat' > \
+ *                            /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete alias /bin/busybox /bin/cat' > \
+ *                            /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^alias /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, if /bin/cat is a symlink to /bin/busybox and execution
+ * of /bin/cat is requested, permission is checked for /bin/cat rather than
+ * /bin/busybox and domainname which the current process will belong to after
+ * execve() succeeds is calculated using /bin/cat rather than /bin/busybox .
+ */
 static LIST_HEAD(tomoyo_alias_list);
 static DECLARE_RWSEM(tomoyo_alias_list_lock);
 
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index ab0cd35..5ae3a57 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -14,21 +14,50 @@
 #include "realpath.h"
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
-/* Structure for "allow_read" keyword. */
+/*
+ * tomoyo_globally_readable_file_entry is a structure which is used for holding
+ * "allow_read" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_globally_readable_list .
+ *  (2) "filename" is a pathname which is allowed to open(O_RDONLY).
+ *  (3) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_globally_readable_file_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *filename;
 	bool is_deleted;
 };
 
-/* Structure for "file_pattern" keyword. */
+/*
+ * tomoyo_pattern_entry is a structure which is used for holding
+ * "tomoyo_pattern_list" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_pattern_list .
+ *  (2) "pattern" is a pathname pattern which is used for converting pathnames
+ *      to pathname patterns during learning mode.
+ *  (3) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_pattern_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *pattern;
 	bool is_deleted;
 };
 
-/* Structure for "deny_rewrite" keyword. */
+/*
+ * tomoyo_no_rewrite_entry is a structure which is used for holding
+ * "deny_rewrite" entries.
+ * It has following fields.
+ *
+ *  (1) "list" which is linked to tomoyo_no_rewrite_list .
+ *  (2) "pattern" is a pathname which is by default not permitted to modify
+ *      already existing content.
+ *  (3) "is_deleted" is a bool which is true if marked as deleted, false
+ *      otherwise.
+ */
 struct tomoyo_no_rewrite_entry {
 	struct list_head list;
 	const struct tomoyo_path_info *pattern;
@@ -141,7 +170,31 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename,
 					 struct tomoyo_domain_info *
 					 const domain, const bool is_delete);
 
-/* The list for "struct tomoyo_globally_readable_file_entry". */
+/*
+ * tomoyo_globally_readable_list is used for holding list of pathnames which
+ * are by default allowed to be open()ed for reading by any process.
+ *
+ * An entry is added by
+ *
+ * # echo 'allow_read /lib/libc-2.5.so' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete allow_read /lib/libc-2.5.so' > \
+ *                               /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^allow_read /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, any process is allowed to
+ * open("/lib/libc-2.5.so", O_RDONLY).
+ * One exception is, if the domain which current process belongs to is marked
+ * as "ignore_global_allow_read", current process can't do so unless explicitly
+ * given "allow_read /lib/libc-2.5.so" to the domain which current process
+ * belongs to.
+ */
 static LIST_HEAD(tomoyo_globally_readable_list);
 static DECLARE_RWSEM(tomoyo_globally_readable_list_lock);
 
@@ -256,7 +309,35 @@ bool tomoyo_read_globally_readable_policy(struct tomoyo_io_buffer *head)
 	return done;
 }
 
-/* The list for "struct tomoyo_pattern_entry". */
+/* tomoyo_pattern_list is used for holding list of pathnames which are used for
+ * converting pathnames to pathname patterns during learning mode.
+ *
+ * An entry is added by
+ *
+ * # echo 'file_pattern /proc/\$/mounts' > \
+ *                             /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete file_pattern /proc/\$/mounts' > \
+ *                             /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^file_pattern /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, if a process which belongs to a domain which is in
+ * learning mode requested open("/proc/1/mounts", O_RDONLY),
+ * "allow_read /proc/\$/mounts" is automatically added to the domain which that
+ * process belongs to.
+ *
+ * It is not a desirable behavior that we have to use /proc/\$/ instead of
+ * /proc/self/ when current process needs to access only current process's
+ * information. As of now, LSM version of TOMOYO is using __d_path() for
+ * calculating pathname. Non LSM version of TOMOYO is using its own function
+ * which pretends as if /proc/self/ is not a symlink; so that we can forbid
+ * current process from accessing other process's information.
+ */
 static LIST_HEAD(tomoyo_pattern_list);
 static DECLARE_RWSEM(tomoyo_pattern_list_lock);
 
@@ -377,7 +458,35 @@ bool tomoyo_read_file_pattern(struct tomoyo_io_buffer *head)
 	return done;
 }
 
-/* The list for "struct tomoyo_no_rewrite_entry". */
+/*
+ * tomoyo_no_rewrite_list is used for holding list of pathnames which are by
+ * default forbidden to modify already written content of a file.
+ *
+ * An entry is added by
+ *
+ * # echo 'deny_rewrite /var/log/messages' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and is deleted by
+ *
+ * # echo 'delete deny_rewrite /var/log/messages' > \
+ *                              /sys/kernel/security/tomoyo/exception_policy
+ *
+ * and all entries are retrieved by
+ *
+ * # grep ^deny_rewrite /sys/kernel/security/tomoyo/exception_policy
+ *
+ * In the example above, if a process requested to rewrite /var/log/messages ,
+ * the process can't rewrite unless the domain which that process belongs to
+ * has "allow_rewrite /var/log/messages" entry.
+ *
+ * It is not a desirable behavior that we have to add "\040(deleted)" suffix
+ * when we want to allow rewriting already unlink()ed file. As of now,
+ * LSM version of TOMOYO is using __d_path() for calculating pathname.
+ * Non LSM version of TOMOYO is using its own function which doesn't append
+ * " (deleted)" suffix if the file is already unlink()ed; so that we don't
+ * need to worry whether the file is already unlink()ed or not.
+ */
 static LIST_HEAD(tomoyo_no_rewrite_list);
 static DECLARE_RWSEM(tomoyo_no_rewrite_list_lock);
 
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 3948f6b..5f2e332 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -265,7 +265,16 @@ static unsigned int tomoyo_quota_for_savename;
  */
 #define TOMOYO_MAX_HASH 256
 
-/* Structure for string data. */
+/*
+ * tomoyo_name_entry is a structure which is used for linking
+ * "struct tomoyo_path_info" into tomoyo_name_list .
+ *
+ * Since tomoyo_name_list manages a list of strings which are shared by
+ * multiple processes (whereas "struct tomoyo_path_info" inside
+ * "struct tomoyo_path_info_with_data" is not shared), a reference counter will
+ * be added to "struct tomoyo_name_entry" rather than "struct tomoyo_path_info"
+ * when TOMOYO starts supporting garbage collector.
+ */
 struct tomoyo_name_entry {
 	struct list_head list;
 	struct tomoyo_path_info entry;
@@ -279,10 +288,10 @@ struct tomoyo_free_memory_block_list {
 };
 
 /*
- * The list for "struct tomoyo_name_entry".
- *
- * This list is updated only inside tomoyo_save_name(), thus
- * no global mutex exists.
+ * tomoyo_name_list is used for holding string data used by TOMOYO.
+ * Since same string data is likely used for multiple times (e.g.
+ * "/lib/libc-2.5.so"), TOMOYO shares string data in the form of
+ * "const struct tomoyo_path_info *".
  */
 static struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
 
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index e42be5c..3194d09 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -262,6 +262,10 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 	return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags);
 }
 
+/*
+ * tomoyo_security_ops is a "struct security_operations" which is used for
+ * registering TOMOYO.
+ */
 static struct security_operations tomoyo_security_ops = {
 	.name                = "tomoyo",
 	.cred_prepare        = tomoyo_cred_prepare,
-- 
cgit v0.10.2


From dbb66c4be020b01dc2f3d7c609ddb0e09d2c0af7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 9 Jun 2009 05:47:10 +0200
Subject: block: needs to set the residual length of a bidi request

Tejun's "block: set rq->resid_len to blk_rq_bytes() on issue" patch
seems to be incomplete; It doesn't set rq->resid_len to blk_rq_bytes()
for a bidi request (req->next_rq). As a result, all bidi users are
broken.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 7ae83a1..03c5a64 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1846,6 +1846,9 @@ void blk_start_request(struct request *req)
 	 * resid_len to full count and add the timeout handler.
 	 */
 	req->resid_len = blk_rq_bytes(req);
+	if (unlikely(blk_bidi_rq(req)))
+		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
+
 	blk_add_timer(req);
 }
 EXPORT_SYMBOL(blk_start_request);
-- 
cgit v0.10.2


From 40df6ae4277a67e97aa0a8bd8e293fdbb00e5623 Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 15:59:38 -0500
Subject: cciss: Use schedule_timeout_uninterruptible in SCSI error handling
 code

Use schedule_timeout_uninterruptible instead of schedule_timeout in the
scsi error handling code when waiting between TUR polls since we are not
interested in nor want to be interrupted by signals.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 2edfc9b..134fdf2 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -1608,7 +1608,7 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
 		/* Wait for a bit.  do this first, because if we send
 		 * the TUR right away, the reset will just abort it.
 		 */
-		schedule_timeout_interruptible(waittime);
+		schedule_timeout_uninterruptible(waittime);
 		count++;
 
 		/* Increase wait time with each try, up to a point. */
-- 
cgit v0.10.2


From 5390cfc3fea49d015ae1eed8551c0bf00489b50e Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:01:11 -0500
Subject: cciss: factor out core of sendcmd_withirq() for use by SCSI error
 handling code

Factor the core of sendcmd_withirq out to provide a simpler interface
which provides access to full error information.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e7d0095..74fc85a 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2278,114 +2278,123 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 	return status;
 }
 
-static int sendcmd_withirq(__u8 cmd,
-			   int ctlr,
-			   void *buff,
-			   size_t size,
-			   unsigned int use_unit_num,
-			   unsigned int log_unit, __u8 page_code, int cmd_type)
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c)
 {
-	ctlr_info_t *h = hba[ctlr];
-	CommandList_struct *c;
+	DECLARE_COMPLETION_ONSTACK(wait);
 	u64bit buff_dma_handle;
 	unsigned long flags;
-	int return_status;
-	DECLARE_COMPLETION_ONSTACK(wait);
+	int return_status = IO_OK;
 
-	if ((c = cmd_alloc(h, 0)) == NULL)
-		return -ENOMEM;
-	return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
-				 log_unit, page_code, NULL, cmd_type);
-	if (return_status != IO_OK) {
-		cmd_free(h, c, 0);
-		return return_status;
-	}
-      resend_cmd2:
+resend_cmd2:
 	c->waiting = &wait;
-
 	/* Put the request on the tail of the queue and send it */
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
 	addQ(&h->reqQ, c);
 	h->Qdepth++;
 	start_io(h);
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 
 	wait_for_completion(&wait);
 
-	if (c->err_info->CommandStatus != 0) {	/* an error has occurred */
-		switch (c->err_info->CommandStatus) {
-		case CMD_TARGET_STATUS:
-			printk(KERN_WARNING "cciss: cmd %p has "
-			       " completed with errors\n", c);
-			if (c->err_info->ScsiStatus) {
-				printk(KERN_WARNING "cciss: cmd %p "
-				       "has SCSI Status = %x\n",
-				       c, c->err_info->ScsiStatus);
-			}
+	if (c->err_info->CommandStatus == 0)
+		goto command_done;
 
-			break;
-		case CMD_DATA_UNDERRUN:
-		case CMD_DATA_OVERRUN:
-			/* expected for inquire and report lun commands */
-			break;
-		case CMD_INVALID:
-			printk(KERN_WARNING "cciss: Cmd %p is "
-			       "reported invalid\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_PROTOCOL_ERR:
-			printk(KERN_WARNING "cciss: cmd %p has "
-			       "protocol error \n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_HARDWARE_ERR:
-			printk(KERN_WARNING "cciss: cmd %p had "
-			       " hardware error\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_CONNECTION_LOST:
-			printk(KERN_WARNING "cciss: cmd %p had "
-			       "connection lost\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_ABORTED:
-			printk(KERN_WARNING "cciss: cmd %p was "
-			       "aborted\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_ABORT_FAILED:
-			printk(KERN_WARNING "cciss: cmd %p reports "
-			       "abort failed\n", c);
-			return_status = IO_ERROR;
-			break;
-		case CMD_UNSOLICITED_ABORT:
+	switch (c->err_info->CommandStatus) {
+	case CMD_TARGET_STATUS:
+		printk(KERN_WARNING "cciss: cmd 0x%02x "
+		"has completed with errors\n", c->Request.CDB[0]);
+		if (c->err_info->ScsiStatus) {
+			printk(KERN_WARNING "cciss: cmd 0x%02x "
+			       "has SCSI Status = %x\n",
+			       c->Request.CDB[0], c->err_info->ScsiStatus);
+		}
+		break;
+	case CMD_DATA_UNDERRUN:
+	case CMD_DATA_OVERRUN:
+		/* expected for inquiry and report lun commands */
+		break;
+	case CMD_INVALID:
+		printk(KERN_WARNING "cciss: Cmd 0x%02x is "
+		       "reported invalid\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_PROTOCOL_ERR:
+		printk(KERN_WARNING "cciss: cmd 0x%02x has "
+		       "protocol error \n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_HARDWARE_ERR:
+		printk(KERN_WARNING "cciss: cmd 0x%02x had "
+		       " hardware error\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_CONNECTION_LOST:
+		printk(KERN_WARNING "cciss: cmd 0x%02x had "
+		       "connection lost\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_ABORTED:
+		printk(KERN_WARNING "cciss: cmd 0x%02x was "
+		       "aborted\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_ABORT_FAILED:
+		printk(KERN_WARNING "cciss: cmd 0x%02x reports "
+		       "abort failed\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_UNSOLICITED_ABORT:
+		printk(KERN_WARNING
+		       "cciss%d: unsolicited abort 0x%02x\n", h->ctlr,
+			c->Request.CDB[0]);
+		if (c->retry_count < MAX_CMD_RETRIES) {
 			printk(KERN_WARNING
-			       "cciss%d: unsolicited abort %p\n", ctlr, c);
-			if (c->retry_count < MAX_CMD_RETRIES) {
-				printk(KERN_WARNING
-				       "cciss%d: retrying %p\n", ctlr, c);
-				c->retry_count++;
-				/* erase the old error information */
-				memset(c->err_info, 0,
-				       sizeof(ErrorInfo_struct));
-				return_status = IO_OK;
-				INIT_COMPLETION(wait);
-				goto resend_cmd2;
-			}
-			return_status = IO_ERROR;
-			break;
-		default:
-			printk(KERN_WARNING "cciss: cmd %p returned "
-			       "unknown status %x\n", c,
-			       c->err_info->CommandStatus);
-			return_status = IO_ERROR;
+			       "cciss%d: retrying 0x%02x\n", h->ctlr,
+				c->Request.CDB[0]);
+			c->retry_count++;
+			/* erase the old error information */
+			memset(c->err_info, 0,
+			       sizeof(ErrorInfo_struct));
+			return_status = IO_OK;
+			INIT_COMPLETION(wait);
+			goto resend_cmd2;
 		}
+		return_status = IO_ERROR;
+		break;
+	default:
+		printk(KERN_WARNING "cciss: cmd 0x%02x returned "
+		       "unknown status %x\n", c->Request.CDB[0],
+		       c->err_info->CommandStatus);
+		return_status = IO_ERROR;
 	}
+
+command_done:
 	/* unlock the buffers from DMA */
 	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
 	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
 	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
 			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
+	return return_status;
+}
+
+static int sendcmd_withirq(__u8 cmd,
+			   int ctlr,
+			   void *buff,
+			   size_t size,
+			   unsigned int use_unit_num,
+			   unsigned int log_unit, __u8 page_code, int cmd_type)
+{
+	ctlr_info_t *h = hba[ctlr];
+	CommandList_struct *c;
+	int return_status;
+
+	c = cmd_alloc(h, 0);
+	if (!c)
+		return -ENOMEM;
+	return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
+				 log_unit, page_code, NULL, cmd_type);
+	if (return_status == IO_OK)
+		return_status = sendcmd_withirq_core(h, c);
 	cmd_free(h, c, 0);
 	return return_status;
 }
-- 
cgit v0.10.2


From b57695fe131b13d3f2460cfeb9175cff673ed337 Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:02:17 -0500
Subject: cciss: simplify interface of sendcmd() and sendcmd_withirq()

Simplify interfaces of sendcmd() and sendcmd_withirq() so that they
provide only one way to address commands instead of three ways.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 74fc85a..885ea1e 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -180,11 +180,10 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
 					   __u32);
 static void start_io(ctlr_info_t *h);
 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
-		   unsigned int use_unit_num, unsigned int log_unit,
 		   __u8 page_code, unsigned char *scsi3addr, int cmd_type);
 static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
-			   unsigned int use_unit_num, unsigned int log_unit,
-			   __u8 page_code, int cmd_type);
+			__u8 page_code, unsigned char scsi3addr[],
+			int cmd_type);
 
 static void fail_all_cmds(unsigned long ctlr);
 static int scan_thread(void *data);
@@ -1520,6 +1519,15 @@ static void cciss_softirq_done(struct request *rq)
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
+static void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[],
+	uint32_t log_unit)
+{
+	log_unit = h->drv[log_unit].LunID & 0x03fff;
+	memset(&scsi3addr[4], 0, 4);
+	memcpy(&scsi3addr[0], &log_unit, 4);
+	scsi3addr[3] |= 0x40;
+}
+
 /* This function gets the SCSI vendor, model, and revision of a logical drive
  * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
  * they cannot be read.
@@ -1529,6 +1537,7 @@ static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
 {
 	int rc;
 	InquiryData_struct *inq_buf;
+	unsigned char scsi3addr[8];
 
 	*vendor = '\0';
 	*model = '\0';
@@ -1538,14 +1547,15 @@ static void cciss_get_device_descr(int ctlr, int logvol, int withirq,
 	if (!inq_buf)
 		return;
 
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf,
-				     sizeof(InquiryData_struct), 1, logvol,
-				     0, TYPE_CMD);
+			     sizeof(InquiryData_struct), 0,
+				scsi3addr, TYPE_CMD);
 	else
 		rc = sendcmd(CISS_INQUIRY, ctlr, inq_buf,
-			     sizeof(InquiryData_struct), 1, logvol, 0, NULL,
-			     TYPE_CMD);
+			     sizeof(InquiryData_struct), 0,
+				scsi3addr, TYPE_CMD);
 	if (rc == IO_OK) {
 		memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
 		vendor[VENDOR_LEN] = '\0';
@@ -1570,6 +1580,7 @@ static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
 #define PAGE_83_INQ_BYTES 64
 	int rc;
 	unsigned char *buf;
+	unsigned char scsi3addr[8];
 
 	if (buflen > 16)
 		buflen = 16;
@@ -1578,12 +1589,13 @@ static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
 	if (!buf)
 		return;
 	memset(serial_no, 0, buflen);
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
-			PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
+			PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
 	else
 		rc = sendcmd(CISS_INQUIRY, ctlr, buf,
-			PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
+			PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
 	if (rc == IO_OK)
 		memcpy(serial_no, &buf[8], buflen);
 	kfree(buf);
@@ -1902,8 +1914,8 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
 		goto mem_msg;
 
 	return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
-				      sizeof(ReportLunData_struct), 0,
-				      0, 0, TYPE_CMD);
+				      sizeof(ReportLunData_struct),
+				      0, CTLR_LUNID, TYPE_CMD);
 
 	if (return_code == IO_OK)
 		listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
@@ -2112,11 +2124,9 @@ static int deregister_disk(ctlr_info_t *h, int drv_index,
 	return 0;
 }
 
-static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,	/* 0: address the controller,
-															   1: address logical volume log_unit,
-															   2: periph device address is scsi3addr */
-		    unsigned int log_unit, __u8 page_code,
-		    unsigned char *scsi3addr, int cmd_type)
+static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+		size_t size, __u8 page_code, unsigned char *scsi3addr,
+		int cmd_type)
 {
 	ctlr_info_t *h = hba[ctlr];
 	u64bit buff_dma_handle;
@@ -2132,27 +2142,12 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 		c->Header.SGTotal = 0;
 	}
 	c->Header.Tag.lower = c->busaddr;
+	memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
 
 	c->Request.Type.Type = cmd_type;
 	if (cmd_type == TYPE_CMD) {
 		switch (cmd) {
 		case CISS_INQUIRY:
-			/* If the logical unit number is 0 then, this is going
-			   to controller so It's a physical command
-			   mode = 0 target = 0.  So we have nothing to write.
-			   otherwise, if use_unit_num == 1,
-			   mode = 1(volume set addressing) target = LUNID
-			   otherwise, if use_unit_num == 2,
-			   mode = 0(periph dev addr) target = scsi3addr */
-			if (use_unit_num == 1) {
-				c->Header.LUN.LogDev.VolId =
-				    h->drv[log_unit].LunID;
-				c->Header.LUN.LogDev.Mode = 1;
-			} else if (use_unit_num == 2) {
-				memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
-				       8);
-				c->Header.LUN.LogDev.Mode = 0;
-			}
 			/* are we trying to read a vital product page */
 			if (page_code != 0) {
 				c->Request.CDB[1] = 0x01;
@@ -2182,8 +2177,6 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 			break;
 
 		case CCISS_READ_CAPACITY:
-			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
-			c->Header.LUN.LogDev.Mode = 1;
 			c->Request.CDBLen = 10;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_READ;
@@ -2191,8 +2184,6 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 			c->Request.CDB[0] = cmd;
 			break;
 		case CCISS_READ_CAPACITY_16:
-			c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
-			c->Header.LUN.LogDev.Mode = 1;
 			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_READ;
@@ -2215,7 +2206,6 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
 			break;
 		case TEST_UNIT_READY:
-			memcpy(c->Header. LUN.LunAddrBytes, scsi3addr, 8);
 			c->Request.CDBLen = 6;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_NONE;
@@ -2239,7 +2229,6 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_
 			memcpy(&c->Request.CDB[4], buff, 8);
 			break;
 		case 1:	/* RESET message */
-			memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
 			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_NONE;
@@ -2307,6 +2296,9 @@ resend_cmd2:
 			printk(KERN_WARNING "cciss: cmd 0x%02x "
 			       "has SCSI Status = %x\n",
 			       c->Request.CDB[0], c->err_info->ScsiStatus);
+			if (c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION)
+				printk(KERN_WARNING "sense key = 0x%02x\n",
+					0xf & c->err_info->SenseInfo[2]);
 		}
 		break;
 	case CMD_DATA_UNDERRUN:
@@ -2377,12 +2369,9 @@ command_done:
 	return return_status;
 }
 
-static int sendcmd_withirq(__u8 cmd,
-			   int ctlr,
-			   void *buff,
-			   size_t size,
-			   unsigned int use_unit_num,
-			   unsigned int log_unit, __u8 page_code, int cmd_type)
+static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
+			   __u8 page_code, unsigned char scsi3addr[],
+			int cmd_type)
 {
 	ctlr_info_t *h = hba[ctlr];
 	CommandList_struct *c;
@@ -2391,8 +2380,8 @@ static int sendcmd_withirq(__u8 cmd,
 	c = cmd_alloc(h, 0);
 	if (!c)
 		return -ENOMEM;
-	return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
-				 log_unit, page_code, NULL, cmd_type);
+	return_status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
+		scsi3addr, cmd_type);
 	if (return_status == IO_OK)
 		return_status = sendcmd_withirq_core(h, c);
 	cmd_free(h, c, 0);
@@ -2407,15 +2396,17 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
 {
 	int return_code;
 	unsigned long t;
+	unsigned char scsi3addr[8];
 
 	memset(inq_buff, 0, sizeof(InquiryData_struct));
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
-					      inq_buff, sizeof(*inq_buff), 1,
-					      logvol, 0xC1, TYPE_CMD);
+					      inq_buff, sizeof(*inq_buff),
+					      0xC1, scsi3addr, TYPE_CMD);
 	else
 		return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
-				      sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
+				      sizeof(*inq_buff), 0xC1, scsi3addr,
 				      TYPE_CMD);
 	if (return_code == IO_OK) {
 		if (inq_buff->data_byte[8] == 0xFF) {
@@ -2456,6 +2447,7 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
 {
 	ReadCapdata_struct *buf;
 	int return_code;
+	unsigned char scsi3addr[8];
 
 	buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
 	if (!buf) {
@@ -2463,14 +2455,15 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
 		return;
 	}
 
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq)
 		return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
 				ctlr, buf, sizeof(ReadCapdata_struct),
-					1, logvol, 0, TYPE_CMD);
+					0, scsi3addr, TYPE_CMD);
 	else
 		return_code = sendcmd(CCISS_READ_CAPACITY,
 				ctlr, buf, sizeof(ReadCapdata_struct),
-					1, logvol, 0, NULL, TYPE_CMD);
+					0, scsi3addr, TYPE_CMD);
 	if (return_code == IO_OK) {
 		*total_size = be32_to_cpu(*(__be32 *) buf->total_size);
 		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
@@ -2490,6 +2483,7 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
 {
 	ReadCapdata_struct_16 *buf;
 	int return_code;
+	unsigned char scsi3addr[8];
 
 	buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
 	if (!buf) {
@@ -2497,15 +2491,16 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
 		return;
 	}
 
+	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
 	if (withirq) {
 		return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
 			ctlr, buf, sizeof(ReadCapdata_struct_16),
-				1, logvol, 0, TYPE_CMD);
+				0, scsi3addr, TYPE_CMD);
 	}
 	else {
 		return_code = sendcmd(CCISS_READ_CAPACITY_16,
 			ctlr, buf, sizeof(ReadCapdata_struct_16),
-				1, logvol, 0, NULL, TYPE_CMD);
+				0, scsi3addr, TYPE_CMD);
 	}
 	if (return_code == IO_OK) {
 		*total_size = be64_to_cpu(*(__be64 *) buf->total_size);
@@ -2760,10 +2755,6 @@ cleanup1:
  * Used at init time, and during SCSI error recovery.
  */
 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
-	unsigned int use_unit_num,/* 0: address the controller,
-				     1: address logical volume log_unit,
-				     2: periph device address is scsi3addr */
-	unsigned int log_unit,
 	__u8 page_code, unsigned char *scsi3addr, int cmd_type)
 {
 	CommandList_struct *c;
@@ -2774,8 +2765,8 @@ static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
 		printk(KERN_WARNING "cciss: unable to get memory");
 		return IO_ERROR;
 	}
-	status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
-			  log_unit, page_code, scsi3addr, cmd_type);
+	status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
+		scsi3addr, cmd_type);
 	if (status == IO_OK)
 		status = sendcmd_core(hba[ctlr], c);
 	cmd_free(hba[ctlr], c, 1);
@@ -4076,7 +4067,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	}
 
 	return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
-		sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD);
+		sizeof(InquiryData_struct), 0, CTLR_LUNID, TYPE_CMD);
 	if (return_code == IO_OK) {
 		hba[i]->firm_ver[0] = inq_buff->data_byte[32];
 		hba[i]->firm_ver[1] = inq_buff->data_byte[33];
@@ -4157,8 +4148,8 @@ static void cciss_shutdown(struct pci_dev *pdev)
 	/* sendcmd will turn off interrupt, and send the flush...
 	 * To write all data in the battery backed cache to disks */
 	memset(flush_buf, 0, 4);
-	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
-			      TYPE_CMD);
+	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0,
+		CTLR_LUNID, TYPE_CMD);
 	if (return_code == IO_OK) {
 		printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
 	} else {
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 40b1b92..cd665b0 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -217,6 +217,8 @@ typedef union _LUNAddr_struct {
   LogDevAddr_struct  LogDev;
 } LUNAddr_struct;
 
+#define CTLR_LUNID "\0\0\0\0\0\0\0\0"
+
 typedef struct _CommandListHeader_struct {
   BYTE              ReplyQueue;
   BYTE              SGList;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 134fdf2..007ab8a 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -44,26 +44,9 @@
 #define CCISS_ABORT_MSG 0x00
 #define CCISS_RESET_MSG 0x01
 
-/* some prototypes... */ 
-static int sendcmd(
-	__u8	cmd,
-	int	ctlr,
-	void	*buff,
-	size_t	size,
-	unsigned int use_unit_num, /* 0: address the controller,
-				      1: address logical volume log_unit, 
-				      2: address is in scsi3addr */
-	unsigned int log_unit,
-	__u8	page_code,
-	unsigned char *scsi3addr,
-	int cmd_type);
-
 static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
 	size_t size,
-	unsigned int use_unit_num, /* 0: address the controller,
-				      1: address logical volume log_unit,
-				      2: periph device address is scsi3addr */
-	unsigned int log_unit, __u8 page_code, unsigned char *scsi3addr,
+	__u8 page_code, unsigned char *scsi3addr,
 	int cmd_type);
 
 static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c);
@@ -1616,7 +1599,7 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
 			waittime = waittime * 2;
 
 		/* Send the Test Unit Ready */
-		rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0, 0, 0,
+		rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0,
 			lunaddr, TYPE_CMD);
 		if (rc == 0) {
 			rc = sendcmd_core(h, c);
@@ -1680,7 +1663,7 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
 		return FAILED;
 	memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
 	/* send a reset to the SCSI LUN which the command was sent to */
-	rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 2, 0, 0, lunaddr,
+	rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
 		TYPE_MSG);
 	/* sendcmd turned off interrupts on the board, turn 'em back on. */
 	(*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
@@ -1708,8 +1691,7 @@ static int  cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
 	cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
 	if (cmd_to_abort == NULL) /* paranoia */
 		return FAILED;
-	rc = sendcmd(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag, 
-		0, 2, 0, 0, 
+	rc = sendcmd(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag, 0, 0,
 		(unsigned char *) &cmd_to_abort->Header.LUN.LunAddrBytes[0], 
 		TYPE_MSG);
 	/* sendcmd turned off interrupts on the board, turn 'em back on. */
-- 
cgit v0.10.2


From 3c2ab40296894d1f7ad9714550fdf9b96d4e9ee6 Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:04:35 -0500
Subject: cciss: factor out fix target status processing code from sendcmd
 functions

Factor out code to process target status of completed commands in sendcmd()
and sendcmd_withirq_core(), and fix problem that bad target status was ignored in
sendcmd_withirq_core.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 885ea1e..2d12883 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2267,6 +2267,31 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
 	return status;
 }
 
+static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
+{
+	switch (c->err_info->ScsiStatus) {
+	case SAM_STAT_GOOD:
+		return IO_OK;
+	case SAM_STAT_CHECK_CONDITION:
+		switch (0xf & c->err_info->SenseInfo[2]) {
+		case 0: return IO_OK; /* no sense */
+		case 1: return IO_OK; /* recovered error */
+		default:
+			printk(KERN_WARNING "cciss%d: cmd 0x%02x "
+				"check condition, sense key = 0x%02x\n",
+				h->ctlr, c->Request.CDB[0],
+				c->err_info->SenseInfo[2]);
+		}
+		break;
+	default:
+		printk(KERN_WARNING "cciss%d: cmd 0x%02x"
+			"scsi status = 0x%02x\n", h->ctlr,
+			c->Request.CDB[0], c->err_info->ScsiStatus);
+		break;
+	}
+	return IO_ERROR;
+}
+
 static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
@@ -2290,16 +2315,7 @@ resend_cmd2:
 
 	switch (c->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
-		printk(KERN_WARNING "cciss: cmd 0x%02x "
-		"has completed with errors\n", c->Request.CDB[0]);
-		if (c->err_info->ScsiStatus) {
-			printk(KERN_WARNING "cciss: cmd 0x%02x "
-			       "has SCSI Status = %x\n",
-			       c->Request.CDB[0], c->err_info->ScsiStatus);
-			if (c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION)
-				printk(KERN_WARNING "sense key = 0x%02x\n",
-					0xf & c->err_info->SenseInfo[2]);
-		}
+		return_status = check_target_status(h, c);
 		break;
 	case CMD_DATA_UNDERRUN:
 	case CMD_DATA_OVERRUN:
@@ -2710,33 +2726,29 @@ resend_cmd1:
 			printk(KERN_WARNING "cciss%d: retried %p too many "
 				"times\n", h->ctlr, c);
 			status = IO_ERROR;
-			goto cleanup1;
+			break;
 		}
 
 		if (c->err_info->CommandStatus == CMD_UNABORTABLE) {
 			printk(KERN_WARNING "cciss%d: command could not be "
 				"aborted.\n", h->ctlr);
 			status = IO_ERROR;
-			goto cleanup1;
+			break;
 		}
 
-		printk(KERN_WARNING "cciss%d: sendcmd error\n", h->ctlr);
-		printk(KERN_WARNING "cmd = 0x%02x, CommandStatus = 0x%02x\n",
-			c->Request.CDB[0], c->err_info->CommandStatus);
 		if (c->err_info->CommandStatus == CMD_TARGET_STATUS) {
-			printk(KERN_WARNING "Target status = 0x%02x\n",
-			c->err_info->ScsiStatus);
-			if (c->err_info->ScsiStatus == 2) /* chk cond */
-				printk(KERN_WARNING "Sense key = 0x%02x\n",
-					0xf & c->err_info->SenseInfo[2]);
+			status = check_target_status(h, c);
+			break;
 		}
 
+		printk(KERN_WARNING "cciss%d: sendcmd error\n", h->ctlr);
+		printk(KERN_WARNING "cmd = 0x%02x, CommandStatus = 0x%02x\n",
+			c->Request.CDB[0], c->err_info->CommandStatus);
 		status = IO_ERROR;
-		goto cleanup1;
+		break;
 
 	} while (1);
 
-cleanup1:
 	/* unlock the data buffer from DMA */
 	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
 	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
-- 
cgit v0.10.2


From 789a424ad1352b335960e7c56494d0410577fa61 Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:05:56 -0500
Subject: cciss: separate error processing and command retrying code in
 sendcmd_withirq_core()

Separate the error processing from sendcmd_withirq_core from the code
which retries commands.  The rationale for this is that the SCSI error
handling code can then be made to use sendcmd_withirq_core, but avoid
retrying commands.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 2d12883..7001707 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2292,26 +2292,12 @@ static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
 	return IO_ERROR;
 }
 
-static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c)
+static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
-	u64bit buff_dma_handle;
-	unsigned long flags;
 	int return_status = IO_OK;
 
-resend_cmd2:
-	c->waiting = &wait;
-	/* Put the request on the tail of the queue and send it */
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-	addQ(&h->reqQ, c);
-	h->Qdepth++;
-	start_io(h);
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-
-	wait_for_completion(&wait);
-
-	if (c->err_info->CommandStatus == 0)
-		goto command_done;
+	if (c->err_info->CommandStatus == CMD_SUCCESS)
+		return IO_OK;
 
 	switch (c->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
@@ -2322,7 +2308,7 @@ resend_cmd2:
 		/* expected for inquiry and report lun commands */
 		break;
 	case CMD_INVALID:
-		printk(KERN_WARNING "cciss: Cmd 0x%02x is "
+		printk(KERN_WARNING "cciss: cmd 0x%02x is "
 		       "reported invalid\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
@@ -2355,19 +2341,7 @@ resend_cmd2:
 		printk(KERN_WARNING
 		       "cciss%d: unsolicited abort 0x%02x\n", h->ctlr,
 			c->Request.CDB[0]);
-		if (c->retry_count < MAX_CMD_RETRIES) {
-			printk(KERN_WARNING
-			       "cciss%d: retrying 0x%02x\n", h->ctlr,
-				c->Request.CDB[0]);
-			c->retry_count++;
-			/* erase the old error information */
-			memset(c->err_info, 0,
-			       sizeof(ErrorInfo_struct));
-			return_status = IO_OK;
-			INIT_COMPLETION(wait);
-			goto resend_cmd2;
-		}
-		return_status = IO_ERROR;
+		return_status = IO_NEEDS_RETRY;
 		break;
 	default:
 		printk(KERN_WARNING "cciss: cmd 0x%02x returned "
@@ -2375,6 +2349,44 @@ resend_cmd2:
 		       c->err_info->CommandStatus);
 		return_status = IO_ERROR;
 	}
+	return return_status;
+}
+
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
+	int attempt_retry)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	u64bit buff_dma_handle;
+	unsigned long flags;
+	int return_status = IO_OK;
+
+resend_cmd2:
+	c->waiting = &wait;
+	/* Put the request on the tail of the queue and send it */
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	start_io(h);
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+
+	wait_for_completion(&wait);
+
+	if (c->err_info->CommandStatus == 0 || !attempt_retry)
+		goto command_done;
+
+	return_status = process_sendcmd_error(h, c);
+
+	if (return_status == IO_NEEDS_RETRY &&
+		c->retry_count < MAX_CMD_RETRIES) {
+		printk(KERN_WARNING "cciss%d: retrying 0x%02x\n", h->ctlr,
+			c->Request.CDB[0]);
+		c->retry_count++;
+		/* erase the old error information */
+		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+		return_status = IO_OK;
+		INIT_COMPLETION(wait);
+		goto resend_cmd2;
+	}
 
 command_done:
 	/* unlock the buffers from DMA */
@@ -2399,7 +2411,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
 	return_status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
 		scsi3addr, cmd_type);
 	if (return_status == IO_OK)
-		return_status = sendcmd_withirq_core(h, c);
+		return_status = sendcmd_withirq_core(h, c, 1);
+
 	cmd_free(h, c, 0);
 	return return_status;
 }
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index dd1926d..25cd58e 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -11,6 +11,7 @@
 
 #define IO_OK		0
 #define IO_ERROR	1
+#define IO_NEEDS_RETRY  3
 
 #define VENDOR_LEN	8
 #define MODEL_LEN	16
-- 
cgit v0.10.2


From 85cc61ae41084cb6d8ecc6c9e01ac4563005c8ac Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:07:45 -0500
Subject: cciss: change SCSI error handling routines to work with interrupts
 enabled.

Change cciss scsi error handling routines to work with interrupts enabled.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 7001707..9a9db6d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -184,6 +184,9 @@ static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
 static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
 			__u8 page_code, unsigned char scsi3addr[],
 			int cmd_type);
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
+	int attempt_retry);
+static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
 
 static void fail_all_cmds(unsigned long ctlr);
 static int scan_thread(void *data);
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 007ab8a..d2eb489 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -49,8 +49,6 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
 	__u8 page_code, unsigned char *scsi3addr,
 	int cmd_type);
 
-static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c);
-
 static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool);
 static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool);
 
@@ -1601,11 +1599,10 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
 		/* Send the Test Unit Ready */
 		rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0,
 			lunaddr, TYPE_CMD);
-		if (rc == 0) {
-			rc = sendcmd_core(h, c);
-			/* sendcmd turned off interrupts, turn 'em back on. */
-			h->access.set_intr_mask(h, CCISS_INTR_ON);
-		}
+		if (rc == 0)
+			rc = sendcmd_withirq_core(h, c, 0);
+
+		(void) process_sendcmd_error(h, c);
 
 		if (rc == 0 && c->err_info->CommandStatus == CMD_SUCCESS)
 			break;
@@ -1663,10 +1660,8 @@ static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
 		return FAILED;
 	memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
 	/* send a reset to the SCSI LUN which the command was sent to */
-	rc = sendcmd(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
+	rc = sendcmd_withirq(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
 		TYPE_MSG);
-	/* sendcmd turned off interrupts on the board, turn 'em back on. */
-	(*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
 	if (rc == 0 && wait_for_device_to_become_ready(*c, lunaddr) == 0)
 		return SUCCESS;
 	printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
@@ -1677,6 +1672,7 @@ static int  cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
 {
 	int rc;
 	CommandList_struct *cmd_to_abort;
+	unsigned char lunaddr[8];
 	ctlr_info_t **c;
 	int ctlr;
 
@@ -1691,11 +1687,9 @@ static int  cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
 	cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
 	if (cmd_to_abort == NULL) /* paranoia */
 		return FAILED;
-	rc = sendcmd(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag, 0, 0,
-		(unsigned char *) &cmd_to_abort->Header.LUN.LunAddrBytes[0], 
-		TYPE_MSG);
-	/* sendcmd turned off interrupts on the board, turn 'em back on. */
-	(*c)->access.set_intr_mask(*c, CCISS_INTR_ON);
+	memcpy(lunaddr, &cmd_to_abort->Header.LUN.LunAddrBytes[0], 8);
+	rc = sendcmd_withirq(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag,
+		0, 0, lunaddr, TYPE_MSG);
 	if (rc == 0)
 		return SUCCESS;
 	return FAILED;
-- 
cgit v0.10.2


From 72f9f1324fc4cd450c92e4600a710231b0445c75 Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:09:32 -0500
Subject: cciss: Remove no longer needed sendcmd reject processing code

Now that the cciss SCSI error handling routines operate with interrupts
enabled, we no longer need to maintain the list of command completions that
sendcmd() might inadvertantly scoop up, since now it only runs at driver init
time, and there won't be any other commands for it to scoop up.  So we
can remove that list and the code that adds to it and processes it.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9a9db6d..b22cec9 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2612,51 +2612,6 @@ static unsigned long pollcomplete(int ctlr)
 	return 1;
 }
 
-static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
-{
-	/* We get in here if sendcmd() is polling for completions
-	   and gets some command back that it wasn't expecting --
-	   something other than that which it just sent down.
-	   Ordinarily, that shouldn't happen, but it can happen when
-	   the scsi tape stuff gets into error handling mode, and
-	   starts using sendcmd() to try to abort commands and
-	   reset tape drives.  In that case, sendcmd may pick up
-	   completions of commands that were sent to logical drives
-	   through the block i/o system, or cciss ioctls completing, etc.
-	   In that case, we need to save those completions for later
-	   processing by the interrupt handler.
-	 */
-
-#ifdef CONFIG_CISS_SCSI_TAPE
-	struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
-
-	/* If it's not the scsi tape stuff doing error handling, (abort */
-	/* or reset) then we don't expect anything weird. */
-	if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
-#endif
-		printk(KERN_WARNING "cciss cciss%d: SendCmd "
-		       "Invalid command list address returned! (%lx)\n",
-		       ctlr, complete);
-		/* not much we can do. */
-#ifdef CONFIG_CISS_SCSI_TAPE
-		return 1;
-	}
-
-	/* We've sent down an abort or reset, but something else
-	   has completed */
-	if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) {
-		/* Uh oh.  No room to save it for later... */
-		printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
-		       "reject list overflow, command lost!\n", ctlr);
-		return 1;
-	}
-	/* Save it for later */
-	srl->complete[srl->ncompletions] = complete;
-	srl->ncompletions++;
-#endif
-	return 0;
-}
-
 /* Send command c to controller h and poll for it to complete.
  * Turns interrupts off on the board.  Used at driver init time
  * and during SCSI error recovery.
@@ -2701,11 +2656,10 @@ resend_cmd1:
 			break;
 		}
 
-		/* If it's not the cmd we're looking for, save it for later */
+		/* Make sure it's the command we're expecting. */
 		if ((complete & ~CISS_ERROR_BIT) != c->busaddr) {
-			if (add_sendcmd_reject(c->Request.CDB[0],
-				h->ctlr, complete) != 0)
-				BUG(); /* we are hosed if we get here. */
+			printk(KERN_WARNING "cciss%d: Unexpected command "
+				"completion.\n", h->ctlr);
 			continue;
 		}
 
@@ -2770,11 +2724,6 @@ resend_cmd1:
 	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
 	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
 			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
-#ifdef CONFIG_CISS_SCSI_TAPE
-	/* if we saved some commands for later, process them now. */
-	if (h->scsi_rejects.ncompletions > 0)
-		do_cciss_intr(0, h);
-#endif
 	return status;
 }
 
@@ -3195,44 +3144,18 @@ startio:
 
 static inline unsigned long get_next_completion(ctlr_info_t *h)
 {
-#ifdef CONFIG_CISS_SCSI_TAPE
-	/* Any rejects from sendcmd() lying around? Process them first */
-	if (h->scsi_rejects.ncompletions == 0)
-		return h->access.command_completed(h);
-	else {
-		struct sendcmd_reject_list *srl;
-		int n;
-		srl = &h->scsi_rejects;
-		n = --srl->ncompletions;
-		/* printk("cciss%d: processing saved reject\n", h->ctlr); */
-		printk("p");
-		return srl->complete[n];
-	}
-#else
 	return h->access.command_completed(h);
-#endif
 }
 
 static inline int interrupt_pending(ctlr_info_t *h)
 {
-#ifdef CONFIG_CISS_SCSI_TAPE
-	return (h->access.intr_pending(h)
-		|| (h->scsi_rejects.ncompletions > 0));
-#else
 	return h->access.intr_pending(h);
-#endif
 }
 
 static inline long interrupt_not_for_us(ctlr_info_t *h)
 {
-#ifdef CONFIG_CISS_SCSI_TAPE
-	return (((h->access.intr_pending(h) == 0) ||
-		 (h->interrupts_enabled == 0))
-		&& (h->scsi_rejects.ncompletions == 0));
-#else
 	return (((h->access.intr_pending(h) == 0) ||
 		 (h->interrupts_enabled == 0)));
-#endif
 }
 
 static irqreturn_t do_cciss_intr(int irq, void *dev_id)
@@ -4054,15 +3977,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 		printk(KERN_ERR "cciss: out of memory");
 		goto clean4;
 	}
-#ifdef CONFIG_CISS_SCSI_TAPE
-	hba[i]->scsi_rejects.complete =
-	    kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
-		    (hba[i]->nr_cmds + 5), GFP_KERNEL);
-	if (hba[i]->scsi_rejects.complete == NULL) {
-		printk(KERN_ERR "cciss: out of memory");
-		goto clean4;
-	}
-#endif
 	spin_lock_init(&hba[i]->lock);
 
 	/* Initialize the pdev driver private data.
@@ -4122,9 +4036,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 
 clean4:
 	kfree(inq_buff);
-#ifdef CONFIG_CISS_SCSI_TAPE
-	kfree(hba[i]->scsi_rejects.complete);
-#endif
 	kfree(hba[i]->cmd_pool_bits);
 	if (hba[i]->cmd_pool)
 		pci_free_consistent(hba[i]->pdev,
@@ -4242,9 +4153,6 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 	pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
 			    hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
 	kfree(hba[i]->cmd_pool_bits);
-#ifdef CONFIG_CISS_SCSI_TAPE
-	kfree(hba[i]->scsi_rejects.complete);
-#endif
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 25cd58e..06a5db2 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -53,14 +53,6 @@ typedef struct _drive_info_struct
 	char rev[REV_LEN + 1];       /* SCSI revision string */
 } drive_info_struct;
 
-#ifdef CONFIG_CISS_SCSI_TAPE
-
-struct sendcmd_reject_list {
-	int ncompletions;
-	unsigned long *complete; /* array of NR_CMDS tags */
-};
-
-#endif
 struct ctlr_info 
 {
 	int	ctlr;
@@ -128,7 +120,6 @@ struct ctlr_info
 	void *scsi_ctlr; /* ptr to structure containing scsi related stuff */
 	/* list of block side commands the scsi error handling sucked up */
 	/* and saved for later processing */
-	struct sendcmd_reject_list scsi_rejects;
 #endif
 	unsigned char alive;
 	struct completion *rescan_wait;
-- 
cgit v0.10.2


From 3969251b80a7b143d01576780073fe0cc9ef6253 Mon Sep 17 00:00:00 2001
From: "scameron@beardog.cca.cpqcorp.net" <scameron@beardog.cca.cpqcorp.net>
Date: Mon, 8 Jun 2009 16:10:57 -0500
Subject: cciss: decode unit attention in SCSI error handling code

Make SCSI reset error handler decode unit attention ASC
and after a target reset wait for a unit attention that indicates
a reset occurred rather than just for any old unit attention.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index d2eb489..3315268 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -1604,16 +1604,25 @@ static int wait_for_device_to_become_ready(ctlr_info_t *h,
 
 		(void) process_sendcmd_error(h, c);
 
-		if (rc == 0 && c->err_info->CommandStatus == CMD_SUCCESS)
-			break;
+		if (rc != 0)
+			goto retry_tur;
 
-		if (rc == 0 &&
-			c->err_info->CommandStatus == CMD_TARGET_STATUS &&
-			c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION &&
-			(c->err_info->SenseInfo[2] == NO_SENSE ||
-			c->err_info->SenseInfo[2] == UNIT_ATTENTION))
+		if (c->err_info->CommandStatus == CMD_SUCCESS)
 			break;
 
+		if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION) {
+			if (c->err_info->SenseInfo[2] == NO_SENSE)
+				break;
+			if (c->err_info->SenseInfo[2] == UNIT_ATTENTION) {
+				unsigned char asc;
+				asc = c->err_info->SenseInfo[12];
+				check_for_unit_attention(h, c);
+				if (asc == POWER_OR_RESET)
+					break;
+			}
+		}
+retry_tur:
 		printk(KERN_WARNING "cciss%d: Waiting %d secs "
 			"for device to become ready.\n",
 			h->ctlr, waittime / HZ);
-- 
cgit v0.10.2


From 9df1bb9b516daeece159ab7fb262d01a0359247c Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 9 Jun 2009 06:22:57 +0200
Subject: Revert "block: Fix bounce limit setting in DM"

This reverts commit a05c0205ba031c01bba33a21bf0a35920eb64833.

DM doesn't need to access the bounce_pfn directly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 9acd0b7..8d33934 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -194,23 +194,6 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_queue_bounce_pfn - set the bounce buffer limit for queue
- * @q: the request queue for the device
- * @pfn: max address
- *
- * Description:
- *    This function is similar to blk_queue_bounce_limit except it
- *    neither changes allocation flags, nor does it set up the ISA DMA
- *    pool. This function should only be used by stacking drivers.
- *    Hardware drivers should use blk_queue_bounce_limit instead.
- */
-void blk_queue_bounce_pfn(struct request_queue *q, u64 pfn)
-{
-	q->limits.bounce_pfn = pfn;
-}
-EXPORT_SYMBOL(blk_queue_bounce_pfn);
-
-/**
  * blk_queue_max_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
  * @max_sectors:  max sectors in the usual 512b unit
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3ca1604..e9a73bb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -920,7 +920,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	blk_queue_max_segment_size(q, t->limits.max_segment_size);
 	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
 	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
-	blk_queue_bounce_pfn(q, t->limits.bounce_pfn);
+	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 989aa17..5e740a1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -910,7 +910,6 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_queue_bounce_pfn(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-- 
cgit v0.10.2


From 77634f33d4078542cf1087995cced0ffdae25aa2 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 9 Jun 2009 06:23:22 +0200
Subject: block: Add missing bounce_pfn stacking and fix comments

DM no longer needs to set limits explicitly when calling blk_stack_limits.
Let the latter automatically deal with bounce_pfn scaling.

Fix kerneldoc variable names.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8d33934..1c4df9b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -445,7 +445,7 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top)
- * @bdev:  the underlying queue limits (bottom)
+ * @b:  the underlying queue limits (bottom)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
@@ -458,6 +458,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 {
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
 
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
 					    b->seg_boundary_mask);
@@ -504,7 +505,7 @@ EXPORT_SYMBOL(blk_stack_limits);
 
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
- * @t:	MD/DM gendisk (top)
+ * @disk:  MD/DM gendisk (top)
  * @bdev:  the underlying block device (bottom)
  * @offset:  offset to beginning of data within component device
  *
-- 
cgit v0.10.2


From f001a70cdc61c01452d42e8b32fd7c7842ef62d5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 9 Jun 2009 14:30:31 +1000
Subject: md/raid5: use conf->raid_disks in preference to mddev->raid_disk

mddev->raid_disks can be changed and any time by a request from
user-space.  It is a suggestion as to what number of raid_disks is
desired.

conf->raid_disks can only be changed by the raid5 module with suitable
locks in place.  It is a statement as to the current number of
raid_disks.

There are two places where the latter should be used, but the former
is used.  This can lead to a crash when reshaping an array.

This patch changes to mddev-> to conf->

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5d400ae..75469e6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3288,7 +3288,7 @@ static void unplug_slaves(mddev_t *mddev)
 	int i;
 
 	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks; i++) {
+	for (i = 0; i < conf->raid_disks; i++) {
 		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
 			struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -4034,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 	 * We don't need to check the 'failed' flag as when that gets set,
 	 * recovery aborts.
 	 */
-	for (i=0; i<mddev->raid_disks; i++)
+	for (i = 0; i < conf->raid_disks; i++)
 		if (conf->disks[i].rdev == NULL)
 			still_degraded = 1;
 
-- 
cgit v0.10.2


From a8c906ca3f63d05f0d25490cf82276f73c6fe095 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 9 Jun 2009 14:39:59 +1000
Subject: md/raid5 - avoid deadlocks in get_active_stripe during reshape

md has functionality to 'quiesce' and array so that all pending
IO completed and no new IO starts.  This is used to achieve a
stable state before making internal changes.

Currently this quiescing applies equally to normal IO, resync
IO, and reshape IO.
However there is a problem with applying it to reshape IO.
Reshape can have multiple 'stripe_heads' that must be active together.
If the quiesce come between allocating the first and the last of
such a collection, then we deadlock, as the last will not be allocated
until the quiesce is lifted, the quiesce will not be lifted until the
first (which has been allocated) gets used, and that first cannot be
used until the last is allocated.

It is not necessary to inhibit reshape IO when a quiesce is
requested.  Those places in the code that require a full quiesce will
ensure the reshape thread is not running at all.

So allow reshape requests to get access to new stripe_heads without
being blocked by a 'quiesce'.

This only affects in-place reshapes (i.e. where the array does not
grow or shrink) and these are only newly supported.  So this patch is
not needed in earlier kernels.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 75469e6..59f2ec0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q);
 
 static struct stripe_head *
 get_active_stripe(raid5_conf_t *conf, sector_t sector,
-		  int previous, int noblock)
+		  int previous, int noblock, int noquiesce)
 {
 	struct stripe_head *sh;
 
@@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
 
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
-				    conf->quiesce == 0,
+				    conf->quiesce == 0 || noquiesce,
 				    conf->device_lock, /* nothing */);
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
@@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 			sector_t bn = compute_blocknr(sh, i, 1);
 			sector_t s = raid5_compute_sector(conf, bn, 0,
 							  &dd_idx, NULL);
-			sh2 = get_active_stripe(conf, s, 0, 1);
+			sh2 = get_active_stripe(conf, s, 0, 1, 1);
 			if (sh2 == NULL)
 				/* so far only the early blocks of this stripe
 				 * have been requested.  When later blocks
@@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh)
 	/* Finish reconstruct operations initiated by the expansion process */
 	if (sh->reconstruct_state == reconstruct_state_result) {
 		struct stripe_head *sh2
-			= get_active_stripe(conf, sh->sector, 1, 1);
+			= get_active_stripe(conf, sh->sector, 1, 1, 1);
 		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
 			/* sh cannot be written until sh2 has been read.
 			 * so arrange for sh to be delayed a little
@@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 
 	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
 		struct stripe_head *sh2
-			= get_active_stripe(conf, sh->sector, 1, 1);
+			= get_active_stripe(conf, sh->sector, 1, 1, 1);
 		if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
 			/* sh cannot be written until sh2 has been read.
 			 * so arrange for sh to be delayed a little
@@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
 			(unsigned long long)logical_sector);
 
 		sh = get_active_stripe(conf, new_sector, previous,
-				       (bi->bi_rw&RWA_MASK));
+				       (bi->bi_rw&RWA_MASK), 0);
 		if (sh) {
 			if (unlikely(previous)) {
 				/* expansion might have moved on while waiting for a
@@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
 		int j;
 		int skipped = 0;
-		sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
+		sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
 		set_bit(STRIPE_EXPANDING, &sh->state);
 		atomic_inc(&conf->reshape_stripes);
 		/* If any of this stripe is beyond the end of the old
@@ -3922,7 +3922,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 	if (last_sector >= mddev->dev_sectors)
 		last_sector = mddev->dev_sectors - 1;
 	while (first_sector <= last_sector) {
-		sh = get_active_stripe(conf, first_sector, 1, 0);
+		sh = get_active_stripe(conf, first_sector, 1, 0, 1);
 		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 		set_bit(STRIPE_HANDLE, &sh->state);
 		release_stripe(sh);
@@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
 	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 
-	sh = get_active_stripe(conf, sector_nr, 0, 1);
+	sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
 	if (sh == NULL) {
-		sh = get_active_stripe(conf, sector_nr, 0, 0);
+		sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
 		/* make sure we don't swamp the stripe cache if someone else
 		 * is trying to get access
 		 */
@@ -4086,7 +4086,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
 			/* already done this stripe */
 			continue;
 
-		sh = get_active_stripe(conf, sector, 0, 1);
+		sh = get_active_stripe(conf, sector, 0, 1, 0);
 
 		if (!sh) {
 			/* failed to get a stripe - must wait */
-- 
cgit v0.10.2


From c399f3be82e1249221a7f410855ab1aa747fa82b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 9 Jun 2009 08:16:20 +0200
Subject: ALSA: ctxfi - Add missing start check in atc_pcm_playback_start()

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 4e25b24..799eb98 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -392,6 +392,10 @@ static int atc_pcm_playback_start(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	unsigned int max_cisz;
 	struct src *src = apcm->src;
 
+	if (apcm->started)
+		return 0;
+	apcm->started = 1;
+
 	max_cisz = src->multi * src->rsc.msr;
 	max_cisz = 0x80 * (max_cisz < 8 ? max_cisz : 8);
 
-- 
cgit v0.10.2


From 5242bc7613311aa1a3d5ed41e9cf81015b65563f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 9 Jun 2009 08:17:14 +0200
Subject: ALSA: ctxfi - Check the presence of SRC instance in PCM pointer
 callbacks

The SRC instances may not exist when PCM pointer callback is called at
the state before initialization is finished.  Add the NULL check just
to be sure.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 799eb98..e54006e 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -445,6 +445,8 @@ atc_pcm_playback_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	u32 size, max_cisz;
 	int position;
 
+	if (!src)
+		return 0;
 	position = src->ops->get_ca(src);
 
 	size = apcm->vm_block->size;
@@ -782,6 +784,8 @@ atc_pcm_capture_position(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 {
 	struct src *src = apcm->src;
 
+	if (!src)
+		return 0;
 	return src->ops->get_ca(src) - apcm->vm_block->addr;
 }
 
-- 
cgit v0.10.2


From a5990dc5b96f537618b0f057c8723a6a0b0cdc74 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 9 Jun 2009 08:19:02 +0200
Subject: ALSA: ctxfi - Clear PCM resources at hw_params and hw_free

Currently the PCM resources are allocated only once and ever in prepare
callback, assuming that the PCM parameters are never changed.  But it's
not true.

This patch adds the call of atc->pcm_release_resources() at hw_params
and hw_free callbacks to assure that the PCM setup is done correctly
for each h/w parameter changes.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index e54006e..80fb2ba 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -523,7 +523,7 @@ atc_pcm_capture_get_resources(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 	struct src_node_conf_t src_node_conf[2] = {{0} };
 
 	/* first release old resources */
-	atc->pcm_release_resources(atc, apcm);
+	atc_pcm_release_resources(atc, apcm);
 
 	/* The numbers of converting SRCs and SRCIMPs should be determined
 	 * by pitch value. */
@@ -802,7 +802,7 @@ static int spdif_passthru_playback_get_resources(struct ct_atc *atc,
 	unsigned int pitch, rsr = atc->pll_rate;
 
 	/* first release old resources */
-	atc->pcm_release_resources(atc, apcm);
+	atc_pcm_release_resources(atc, apcm);
 
 	/* Get SRC resource */
 	desc.multi = apcm->substream->runtime->channels;
diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c
index 870fa17..9e5c0c4 100644
--- a/sound/pci/ctxfi/ctpcm.c
+++ b/sound/pci/ctxfi/ctpcm.c
@@ -176,12 +176,26 @@ static int ct_pcm_playback_close(struct snd_pcm_substream *substream)
 static int ct_pcm_hw_params(struct snd_pcm_substream *substream,
 				     struct snd_pcm_hw_params *hw_params)
 {
-	return snd_pcm_lib_malloc_pages(substream,
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct ct_atc_pcm *apcm = substream->runtime->private_data;
+	int err;
+
+	err = snd_pcm_lib_malloc_pages(substream,
 					params_buffer_bytes(hw_params));
+	if (err < 0)
+		return err;
+	/* clear previous resources */
+	atc->pcm_release_resources(atc, apcm);
+	return err;
 }
 
 static int ct_pcm_hw_free(struct snd_pcm_substream *substream)
 {
+	struct ct_atc *atc = snd_pcm_substream_chip(substream);
+	struct ct_atc_pcm *apcm = substream->runtime->private_data;
+
+	/* clear previous resources */
+	atc->pcm_release_resources(atc, apcm);
 	/* Free snd-allocated pages */
 	return snd_pcm_lib_free_pages(substream);
 }
-- 
cgit v0.10.2


From 0e6e0271a210817e202c8a4bfffbde3e3c0616d1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 9 Jun 2009 16:32:22 +1000
Subject: md/raid5: fix bug in reshape code when chunk_size decreases.

Now that we support changing the chunksize, we calculate
"reshape_sectors" to be the max of number of sectors in old
and new chunk size.
However there is one please where we still use 'chunksize'
rather than 'reshape_sectors'.
This causes a reshape that reduces the size of chunks to freeze.

Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 59f2ec0..bb37fb1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3916,7 +3916,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
 		raid5_compute_sector(conf, stripe_addr*(new_data_disks),
 				     1, &dd_idx, NULL);
 	last_sector =
-		raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+		raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
 					    *(new_data_disks) - 1),
 				     1, &dd_idx, NULL);
 	if (last_sector >= mddev->dev_sectors)
-- 
cgit v0.10.2


From 367cd31ee0cbc948fe3b83960b1dbf931e2eaa90 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 28 Apr 2009 20:51:52 +0530
Subject: ARM: OMAP4: SMP: Add OMAP4430 SMP board files

This patch adds SMP platform files support for OMAP4430SDP. TI's OMAP4430
SOC is based on ARM Cortex-A9 SMP architecture. It's a dual core SOC
with GIC used for interrupt handling and SCU for cache coherency.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S
new file mode 100644
index 0000000..4afadba
--- /dev/null
+++ b/arch/arm/mach-omap2/omap-headsmp.S
@@ -0,0 +1,46 @@
+/*
+ * Secondary CPU startup routine source file.
+ *
+ * Copyright (C) 2009 Texas Instruments, Inc.
+ *
+ * Author:
+ *      Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * Interface functions needed for the SMP. This file is based on arm
+ * realview smp platform.
+ * Copyright (c) 2003 ARM Limited.
+ *
+ * This program is free software,you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+/* Physical address needed since MMU not enabled yet on secondary core */
+#define OMAP4_AUX_CORE_BOOT1_PA			0x48281804
+
+	__INIT
+
+/*
+ * OMAP4 specific entry point for secondary CPU to jump from ROM
+ * code.  This routine also provides a holding flag into which
+ * secondary core is held until we're ready for it to initialise.
+ * The primary core will update the this flag using a hardware
+ * register AuxCoreBoot1.
+ */
+ENTRY(omap_secondary_startup)
+	mrc	p15, 0, r0, c0, c0, 5
+	and	r0, r0, #0x0f
+hold:	ldr	r1, =OMAP4_AUX_CORE_BOOT1_PA	@ read from AuxCoreBoot1
+	ldr	r2, [r1]
+	cmp	r2, r0
+	bne	hold
+
+	/*
+	 * we've been released from the cpu_release,secondary_stack
+	 * should now contain the SVC stack for this core
+	 */
+	b	secondary_startup
+
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
new file mode 100644
index 0000000..8fe8d23
--- /dev/null
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -0,0 +1,178 @@
+/*
+ * OMAP4 SMP source file. It contains platform specific fucntions
+ * needed for the linux smp kernel.
+ *
+ * Copyright (C) 2009 Texas Instruments, Inc.
+ *
+ * Author:
+ *      Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * Platform file needed for the OMAP4 SMP. This file is based on arm
+ * realview smp platform.
+ * * Copyright (c) 2002 ARM Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+
+#include <asm/localtimer.h>
+#include <asm/smp_scu.h>
+#include <mach/hardware.h>
+
+/* Registers used for communicating startup information */
+#define OMAP4_AUXCOREBOOT_REG0		(OMAP44XX_VA_WKUPGEN_BASE + 0x800)
+#define OMAP4_AUXCOREBOOT_REG1		(OMAP44XX_VA_WKUPGEN_BASE + 0x804)
+
+/* SCU base address */
+static void __iomem *scu_base = OMAP44XX_VA_SCU_BASE;
+
+/*
+ * Use SCU config register to count number of cores
+ */
+static inline unsigned int get_core_count(void)
+{
+	if (scu_base)
+		return scu_get_core_count(scu_base);
+	return 1;
+}
+
+static DEFINE_SPINLOCK(boot_lock);
+
+void __cpuinit platform_secondary_init(unsigned int cpu)
+{
+	trace_hardirqs_off();
+
+	/*
+	 * If any interrupts are already enabled for the primary
+	 * core (e.g. timer irq), then they will not have been enabled
+	 * for us: do so
+	 */
+
+	gic_cpu_init(0, IO_ADDRESS(OMAP44XX_GIC_CPU_BASE));
+
+	/*
+	 * Synchronise with the boot thread.
+	 */
+	spin_lock(&boot_lock);
+	spin_unlock(&boot_lock);
+}
+
+int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned long timeout;
+
+	/*
+	 * Set synchronisation state between this boot processor
+	 * and the secondary one
+	 */
+	spin_lock(&boot_lock);
+
+	/*
+	 * Update the AuxCoreBoot1 with boot state for secondary core.
+	 * omap_secondary_startup() routine will hold the secondary core till
+	 * the AuxCoreBoot1 register is updated with cpu state
+	 * A barrier is added to ensure that write buffer is drained
+	 */
+	__raw_writel(cpu, OMAP4_AUXCOREBOOT_REG1);
+	smp_wmb();
+
+	timeout = jiffies + (1 * HZ);
+	while (time_before(jiffies, timeout))
+		;
+
+	/*
+	 * Now the secondary core is starting up let it run its
+	 * calibrations, then wait for it to finish
+	 */
+	spin_unlock(&boot_lock);
+
+	return 0;
+}
+
+static void __init wakeup_secondary(void)
+{
+	/*
+	 * Write the address of secondary startup routine into the
+	 * AuxCoreBoot0 where ROM code will jump and start executing
+	 * on secondary core once out of WFE
+	 * A barrier is added to ensure that write buffer is drained
+	 */
+	__raw_writel(virt_to_phys(omap_secondary_startup),	   \
+					OMAP4_AUXCOREBOOT_REG0);
+	smp_wmb();
+
+	/*
+	 * Send a 'sev' to wake the secondary core from WFE.
+	 */
+	set_event();
+	mb();
+}
+
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+	unsigned int i, ncores = get_core_count();
+
+	for (i = 0; i < ncores; i++)
+		set_cpu_possible(i, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned int ncores = get_core_count();
+	unsigned int cpu = smp_processor_id();
+	int i;
+
+	/* sanity check */
+	if (ncores == 0) {
+		printk(KERN_ERR
+		       "OMAP4: strange core count of 0? Default to 1\n");
+		ncores = 1;
+	}
+
+	if (ncores > NR_CPUS) {
+		printk(KERN_WARNING
+		       "OMAP4: no. of cores (%d) greater than configured "
+		       "maximum of %d - clipping\n",
+		       ncores, NR_CPUS);
+		ncores = NR_CPUS;
+	}
+	smp_store_cpu_info(cpu);
+
+	/*
+	 * are we trying to boot more cores than exist?
+	 */
+	if (max_cpus > ncores)
+		max_cpus = ncores;
+
+	/*
+	 * Initialise the present map, which describes the set of CPUs
+	 * actually populated at the present time.
+	 */
+	for (i = 0; i < max_cpus; i++)
+		set_cpu_present(i, true);
+
+	if (max_cpus > 1) {
+		/*
+		 * Enable the local timer or broadcast device for the
+		 * boot CPU, but only if we have more than one CPU.
+		 */
+		percpu_timer_setup();
+
+		/*
+		 * Initialise the SCU and wake up the secondary core using
+		 * wakeup_secondary().
+		 */
+		scu_enable(scu_base);
+		wakeup_secondary();
+	}
+}
diff --git a/arch/arm/plat-omap/include/mach/smp.h b/arch/arm/plat-omap/include/mach/smp.h
new file mode 100644
index 0000000..dcaa8fd
--- /dev/null
+++ b/arch/arm/plat-omap/include/mach/smp.h
@@ -0,0 +1,51 @@
+/*
+ * OMAP4 machine specific smp.h
+ *
+ * Copyright (C) 2009 Texas Instruments, Inc.
+ *
+ * Author:
+ *	Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * Interface functions needed for the SMP. This file is based on arm
+ * realview smp platform.
+ * Copyright (c) 2003 ARM Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef OMAP_ARCH_SMP_H
+#define OMAP_ARCH_SMP_H
+
+#include <asm/hardware/gic.h>
+
+/*
+ * set_event() is used to wake up secondary core from wfe using sev. ROM
+ * code puts the second core into wfe(standby).
+ *
+ */
+#define set_event()	__asm__ __volatile__ ("sev" : : : "memory")
+
+/* Needed for secondary core boot */
+extern void omap_secondary_startup(void);
+
+/*
+ * We use Soft IRQ1 as the IPI
+ */
+static inline void smp_cross_call(const struct cpumask *mask)
+{
+	gic_raise_softirq(mask, 1);
+}
+
+/*
+ * Read MPIDR: Multiprocessor affinity register
+ */
+#define hard_smp_processor_id()			\
+	({						\
+		unsigned int cpunum;			\
+		__asm__("mrc p15, 0, %0, c0, c0, 5"	\
+			: "=r" (cpunum));		\
+		cpunum &= 0x0F;				\
+	})
+
+#endif
-- 
cgit v0.10.2


From 39e1d4c18f34190c739f765ae56bfaa9cbbc6fdb Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 28 Apr 2009 20:52:00 +0530
Subject: ARM: OMAP4: SMP: Add mpu timer support for OMAP4430

This patch adds SMP platform specific parts for local(mpu) timer support
for OMAP4430 platform. Each Cortex-a9 core has it's own local timer in the
MPU domain. These timers are not in wakeup domain.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 2ce474a..97eeeeb 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -38,6 +38,7 @@
 
 #include <asm/mach/time.h>
 #include <mach/dmtimer.h>
+#include <asm/localtimer.h>
 
 /* MAX_GPTIMER_ID: number of GPTIMERs on the chip */
 #define MAX_GPTIMER_ID		12
@@ -229,6 +230,9 @@ static void __init omap2_gp_clocksource_init(void)
 
 static void __init omap2_gp_timer_init(void)
 {
+#ifdef CONFIG_LOCAL_TIMERS
+	twd_base = IO_ADDRESS(OMAP44XX_LOCAL_TWD_BASE);
+#endif
 	omap_dm_timer_init();
 
 	omap2_gp_clockevent_init();
diff --git a/arch/arm/mach-omap2/timer-mpu.c b/arch/arm/mach-omap2/timer-mpu.c
new file mode 100644
index 0000000..c1a650a
--- /dev/null
+++ b/arch/arm/mach-omap2/timer-mpu.c
@@ -0,0 +1,34 @@
+/*
+ * The MPU local timer source file. In OMAP4, both cortex-a9 cores have
+ * own timer in it's MPU domain. These timers will be driving the
+ * linux kernel SMP tick framework when active. These timers are not
+ * part of the wake up domain.
+ *
+ * Copyright (C) 2009 Texas Instruments, Inc.
+ *
+ * Author:
+ *      Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This file is based on arm realview smp platform file.
+ * Copyright (C) 2002 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/clockchips.h>
+#include <asm/irq.h>
+#include <asm/smp_twd.h>
+#include <asm/localtimer.h>
+
+/*
+ * Setup the local clock events for a CPU.
+ */
+void __cpuinit local_timer_setup(struct clock_event_device *evt)
+{
+	evt->irq = INT_44XX_LOCALTIMER_IRQ;
+	twd_timer_setup(evt);
+}
+
diff --git a/arch/arm/plat-omap/include/mach/entry-macro.S b/arch/arm/plat-omap/include/mach/entry-macro.S
index 00f45c0..56426ed 100644
--- a/arch/arm/plat-omap/include/mach/entry-macro.S
+++ b/arch/arm/plat-omap/include/mach/entry-macro.S
@@ -136,6 +136,34 @@
 		cmpne   \irqnr, \tmp
 		cmpcs   \irqnr, \irqnr
 		.endm
+
+		/* We assume that irqstat (the raw value of the IRQ acknowledge
+		 * register) is preserved from the macro above.
+		 * If there is an IPI, we immediately signal end of interrupt
+		 * on the controller, since this requires the original irqstat
+		 * value which we won't easily be able to recreate later.
+		 */
+
+		.macro test_for_ipi, irqnr, irqstat, base, tmp
+		bic	\irqnr, \irqstat, #0x1c00
+		cmp	\irqnr, #16
+		it	cc
+		strcc	\irqstat, [\base, #GIC_CPU_EOI]
+		it	cs
+		cmpcs	\irqnr, \irqnr
+		.endm
+
+		/* As above, this assumes that irqstat and base are preserved */
+
+		.macro test_for_ltirq, irqnr, irqstat, base, tmp
+		bic	\irqnr, \irqstat, #0x1c00
+		mov 	\tmp, #0
+		cmp	\irqnr, #29
+		itt	eq
+		moveq	\tmp, #1
+		streq	\irqstat, [\base, #GIC_CPU_EOI]
+		cmp	\tmp, #0
+		.endm
 #endif
 
 		.macro	irq_prio_table
diff --git a/arch/arm/plat-omap/include/mach/irqs.h b/arch/arm/plat-omap/include/mach/irqs.h
index 8015fe2..fb7cb77 100644
--- a/arch/arm/plat-omap/include/mach/irqs.h
+++ b/arch/arm/plat-omap/include/mach/irqs.h
@@ -427,6 +427,8 @@
 
 
 #define IRQ_GIC_START		32
+#define INT_44XX_LOCALTIMER_IRQ	29
+#define INT_44XX_LOCALWDT_IRQ	30
 
 #define INT_44XX_BENCH_MPU_EMUL	(3 + IRQ_GIC_START)
 #define INT_44XX_SSM_ABORT_IRQ	(6 + IRQ_GIC_START)
-- 
cgit v0.10.2


From 934f8be7b1020ad899bdba62842a5383e4c72aa8 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 28 Apr 2009 20:52:05 +0530
Subject: ARM: OMAP4: SMP: Enable SMP support for OMAP4430

This patch enables SMP on OMAP4430 SDP platform.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 28248b5..f42cf3a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -843,10 +843,10 @@ source "kernel/time/Kconfig"
 
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP)
+	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP ||ARCH_OMAP4)
 	depends on GENERIC_CLOCKEVENTS
 	select USE_GENERIC_SMP_HELPERS
-	select HAVE_ARM_SCU if ARCH_REALVIEW
+	select HAVE_ARM_SCU if (ARCH_REALVIEW || ARCH_OMAP4)
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -914,9 +914,9 @@ config HOTPLUG_CPU
 
 config LOCAL_TIMERS
 	bool "Use local timer interrupts"
-	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || REALVIEW_EB_A9MP)
+	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || REALVIEW_EB_A9MP || ARCH_OMAP4)
 	default y
-	select HAVE_ARM_TWD if ARCH_REALVIEW
+	select HAVE_ARM_TWD if (ARCH_REALVIEW || ARCH_OMAP4)
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 6226e64..735bae5 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -14,6 +14,10 @@ obj-$(CONFIG_ARCH_OMAP3) += $(omap-2-3-common) $(prcm-common) $(clock-common)
 
 obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o
 
+# SMP support ONLY available for OMAP4
+obj-$(CONFIG_SMP)			+= omap-smp.o omap-headsmp.o
+obj-$(CONFIG_LOCAL_TIMERS)		+= timer-mpu.o
+
 # Functions loaded to SRAM
 obj-$(CONFIG_ARCH_OMAP2420)		+= sram242x.o
 obj-$(CONFIG_ARCH_OMAP2430)		+= sram243x.o
-- 
cgit v0.10.2


From b5495610669984af931055e49c1c1d2957950335 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 28 Apr 2009 20:52:05 +0530
Subject: ARM: OMAP4: SMP: Update defconfig for OMAP4430

This patch updates omap_4430sdp_defconfig to add SMP and LOCAL_TIMER
support for OMAP4430 SDP platform.
Additionally the defconfig is made in sync with 2.6.30-rc7

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

diff --git a/arch/arm/configs/omap_4430sdp_defconfig b/arch/arm/configs/omap_4430sdp_defconfig
index 67a3a77..23e43ea 100644
--- a/arch/arm/configs/omap_4430sdp_defconfig
+++ b/arch/arm/configs/omap_4430sdp_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29
-# Fri April 19 19:58:24 20089
+# Linux kernel version: 2.6.30-rc7
+# Tue Jun  9 12:36:23 2009
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
@@ -9,17 +9,13 @@ CONFIG_GENERIC_GPIO=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_MMU=y
-# CONFIG_NO_IOPORT is not set
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_HAVE_LATENCYTOP_SUPPORT=y
 CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 CONFIG_HARDIRQS_SW_RESEND=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
@@ -30,7 +26,7 @@ CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 # General setup
 #
 CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
@@ -62,6 +58,9 @@ CONFIG_SYSFS_DEPRECATED_V2=y
 # CONFIG_NAMESPACES is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -71,6 +70,7 @@ CONFIG_UID16=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -84,16 +84,20 @@ CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
 CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
 CONFIG_COMPAT_BRK=y
 # CONFIG_SLAB is not set
-# CONFIG_SLUB is not set
+CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_HAVE_CLK=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -104,9 +108,9 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -133,12 +137,13 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 # CONFIG_ARCH_VERSATILE is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
 # CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_MXC is not set
 # CONFIG_ARCH_NETX is not set
 # CONFIG_ARCH_H720X is not set
-# CONFIG_ARCH_IMX is not set
 # CONFIG_ARCH_IOP13XX is not set
 # CONFIG_ARCH_IOP32X is not set
 # CONFIG_ARCH_IOP33X is not set
@@ -147,14 +152,16 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 # CONFIG_ARCH_IXP4XX is not set
 # CONFIG_ARCH_L7200 is not set
 # CONFIG_ARCH_KIRKWOOD is not set
-# CONFIG_ARCH_KS8695 is not set
-# CONFIG_ARCH_NS9XXX is not set
 # CONFIG_ARCH_LOKI is not set
 # CONFIG_ARCH_MV78XX0 is not set
-# CONFIG_ARCH_MXC is not set
 # CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_W90X900 is not set
 # CONFIG_ARCH_PNX4008 is not set
 # CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MSM is not set
 # CONFIG_ARCH_RPC is not set
 # CONFIG_ARCH_SA1100 is not set
 # CONFIG_ARCH_S3C2410 is not set
@@ -163,8 +170,6 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 # CONFIG_ARCH_LH7A40X is not set
 # CONFIG_ARCH_DAVINCI is not set
 CONFIG_ARCH_OMAP=y
-# CONFIG_ARCH_MSM is not set
-# CONFIG_ARCH_W90X900 is not set
 
 #
 # TI OMAP Implementations
@@ -178,8 +183,6 @@ CONFIG_ARCH_OMAP4=y
 # OMAP Feature Selections
 #
 # CONFIG_OMAP_RESET_CLOCKS is not set
-# CONFIG_OMAP_COMPONENT_VERSION is not set
-# CONFIG_OMAP_GPIO_SWITCH is not set
 # CONFIG_OMAP_MUX is not set
 # CONFIG_OMAP_MCBSP is not set
 # CONFIG_OMAP_MBOX_FWK is not set
@@ -191,8 +194,6 @@ CONFIG_OMAP_LL_DEBUG_UART1=y
 # CONFIG_OMAP_LL_DEBUG_UART2 is not set
 # CONFIG_OMAP_LL_DEBUG_UART3 is not set
 
-
-
 #
 # OMAP Board Type
 #
@@ -224,7 +225,9 @@ CONFIG_CPU_CP15_MMU=y
 CONFIG_CPU_DCACHE_DISABLE=y
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_HAS_TLS_REG=y
-# CONFIG_OUTER_CACHE is not set
+# CONFIG_ARM_ERRATA_430973 is not set
+# CONFIG_ARM_ERRATA_458693 is not set
+# CONFIG_ARM_ERRATA_460075 is not set
 CONFIG_ARM_GIC=y
 
 #
@@ -240,17 +243,23 @@ CONFIG_ARM_GIC=y
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_SMP=y
+CONFIG_HAVE_ARM_SCU=y
+CONFIG_HAVE_ARM_TWD=y
 CONFIG_VMSPLIT_3G=y
 # CONFIG_VMSPLIT_2G is not set
 # CONFIG_VMSPLIT_1G is not set
 CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_LOCAL_TIMERS=y
 # CONFIG_PREEMPT is not set
 CONFIG_HZ=128
 CONFIG_AEABI=y
 # CONFIG_OABI_COMPAT is not set
-CONFIG_ARCH_FLATMEM_HAS_HOLES=y
 # CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
 # CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
@@ -263,6 +272,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
 # CONFIG_UNEVICTABLE_LRU is not set
+CONFIG_HAVE_MLOCK=y
 # CONFIG_LEDS is not set
 CONFIG_ALIGNMENT_TRAP=y
 
@@ -288,9 +298,6 @@ CONFIG_CMDLINE="root=/dev/ram0 rw mem=128M console=ttyS0,115200n8 initrd=0x81600
 #
 # At least one emulation must be selected
 #
-CONFIG_FPE_NWFPE=y
-# CONFIG_FPE_NWFPE_XP is not set
-# CONFIG_FPE_FASTFPE is not set
 CONFIG_VFP=y
 CONFIG_VFPv3=y
 # CONFIG_NEON is not set
@@ -299,7 +306,6 @@ CONFIG_VFPv3=y
 # Userspace binary formats
 #
 CONFIG_BINFMT_ELF=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_HAVE_AOUT=y
 # CONFIG_BINFMT_AOUT is not set
 CONFIG_BINFMT_MISC=y
@@ -415,6 +421,7 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
 # CONFIG_TCG_TPM is not set
@@ -509,22 +516,19 @@ CONFIG_DUMMY_CONSOLE=y
 CONFIG_RTC_LIB=y
 # CONFIG_RTC_CLASS is not set
 # CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_REGULATOR is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 
 #
-# CBUS support
-#
-# CONFIG_CBUS is not set
-
-#
 # File systems
 #
 CONFIG_EXT2_FS=y
 # CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
 # CONFIG_EXT4_FS is not set
 CONFIG_JBD=y
@@ -548,6 +552,11 @@ CONFIG_QUOTACTL=y
 # CONFIG_FUSE_FS is not set
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -568,6 +577,7 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 #
 CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 # CONFIG_TMPFS_POSIX_ACL is not set
@@ -591,6 +601,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -666,11 +677,37 @@ CONFIG_MAGIC_SYSRQ=y
 # CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+CONFIG_SCHED_DEBUG=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_WRITECOUNT is not set
 # CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
 CONFIG_FRAME_POINTER=y
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
@@ -678,8 +715,9 @@ CONFIG_FRAME_POINTER=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_LATENCYTOP is not set
+# CONFIG_PAGE_POISONING is not set
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -688,17 +726,21 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_STACK_TRACER is not set
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
+# CONFIG_ARM_UNWIND is not set
 # CONFIG_DEBUG_USER is not set
 # CONFIG_DEBUG_ERRORS is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_ICEDCC is not set
+# CONFIG_DEBUG_LL is not set
 
 #
 # Security options
@@ -712,11 +754,21 @@ CONFIG_CRYPTO=y
 #
 # Crypto core or helper
 #
+# CONFIG_CRYPTO_FIPS is not set
 CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 # CONFIG_CRYPTO_AUTHENC is not set
 # CONFIG_CRYPTO_TEST is not set
@@ -785,8 +837,15 @@ CONFIG_CRYPTO_DES=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
@@ -800,7 +859,8 @@ CONFIG_CRC_T10DIF=y
 CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 CONFIG_LIBCRC32C=y
-CONFIG_PLIST=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From 29150078d7a1758df8c7a6cd2ec066ac65e1fab9 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 9 Jun 2009 10:54:18 +0200
Subject: amd-iommu: remove BUS_NOTIFY_BOUND_DRIVER handling

Handling this event causes device assignment in KVM to fail because the
device gets re-attached as soon as the pci-stub registers as the driver
for the device.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8510e90..8187260 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1145,17 +1145,6 @@ static int device_change_notifier(struct notifier_block *nb,
 			  "to a non-dma-ops domain\n", dev_name(dev));
 
 	switch (action) {
-	case BUS_NOTIFY_BOUND_DRIVER:
-		if (domain)
-			goto out;
-		dma_domain = find_protection_domain(devid);
-		if (!dma_domain)
-			dma_domain = iommu->default_dom;
-		attach_device(iommu, &dma_domain->domain, devid);
-		DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
-			    "%d for device %s\n",
-			    dma_domain->domain.id, dev_name(dev));
-		break;
 	case BUS_NOTIFY_UNBOUND_DRIVER:
 		if (!domain)
 			goto out;
-- 
cgit v0.10.2


From 71ff3bca2f70264effe8cbbdd5bc10cf6be5f2f0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Jun 2009 13:47:33 -0700
Subject: amd-iommu: detach device explicitly before attaching it to a new
 domain

This fixes a bug with a device that could not be assigned to a KVM guest
because it is still assigned to a dma_ops protection domain.

[chrisw: simply remove WARN_ON(), will always fire since dev->driver
will be pci-sub]

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8187260..772e910 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2073,7 +2073,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 
 	old_domain = domain_for_device(devid);
 	if (old_domain)
-		return -EBUSY;
+		detach_device(old_domain, devid);
 
 	attach_device(iommu, domain, devid);
 
-- 
cgit v0.10.2


From 151060ac13144208bd7601d17e4c92c59b98072f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Apr 2009 10:54:54 +0900
Subject: CUSE: implement CUSE - Character device in Userspace

CUSE enables implementing character devices in userspace.  With recent
additions of ioctl and poll support, FUSE already has most of what's
necessary to implement character devices.  All CUSE has to do is
bonding all those components - FUSE, chardev and the driver model -
nicely.

When client opens /dev/cuse, kernel starts conversation with
CUSE_INIT.  The client tells CUSE which device it wants to create.  As
the previous patch made fuse_file usable without associated
fuse_inode, CUSE doesn't create super block or inodes.  It attaches
fuse_file to cdev file->private_data during open and set ff->fi to
NULL.  The rest of the operation is almost identical to FUSE direct IO
case.

Each CUSE device has a corresponding directory /sys/class/cuse/DEVNAME
(which is symlink to /sys/devices/virtual/class/DEVNAME if
SYSFS_DEPRECATED is turned off) which hosts "waiting" and "abort"
among other things.  Those two files have the same meaning as the FUSE
control files.

The only notable lacking feature compared to in-kernel implementation
is mmap support.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/Kconfig b/fs/Kconfig
index 9f7270f..525da2e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,6 +62,16 @@ source "fs/autofs/Kconfig"
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
 
+config CUSE
+	tristate "Character device in Userpace support"
+	depends on FUSE_FS
+	help
+	  This FUSE extension allows character devices to be
+	  implemented in userspace.
+
+	  If you want to develop or use userspace character device
+	  based on CUSE, answer Y or M.
+
 config GENERIC_ACL
 	bool
 	select FS_POSIX_ACL
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 7243706..e95eeb4 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -3,5 +3,6 @@
 #
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 0000000..de792dc
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,610 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009  SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009  Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems.  On initialization /dev/cuse is
+ * created.  By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device.  After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn	: contains fuse_conn and serves as bonding structure
+ * channel	: file handle connected to the userland CUSE server
+ * cdev		: the implemented character device
+ * dev		: generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE.  As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies.  When channel is
+ * closed, everything begins to destruct.  The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+
+#include "fuse_i.h"
+
+#define CUSE_CONNTBL_LEN	64
+
+struct cuse_conn {
+	struct list_head	list;	/* linked on cuse_conntbl */
+	struct fuse_conn	fc;	/* fuse connection */
+	struct cdev		*cdev;	/* associated character device */
+	struct device		*dev;	/* device representing @cdev */
+
+	/* init parameters, set once during initialization */
+	bool			unrestricted_ioctl;
+};
+
+static DEFINE_SPINLOCK(cuse_lock);		/* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+	return container_of(fc, struct cuse_conn, fc);
+}
+
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+	return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+
+
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file.  All other ops call FUSE ops on the
+ * FUSE file.
+ */
+
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+			 loff_t *ppos)
+{
+	loff_t pos = 0;
+
+	return fuse_direct_io(file, buf, count, &pos, 0);
+}
+
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	loff_t pos = 0;
+	/*
+	 * No locking or generic_write_checks(), the server is
+	 * responsible for locking and sanity checks.
+	 */
+	return fuse_direct_io(file, buf, count, &pos, 1);
+}
+
+static int cuse_open(struct inode *inode, struct file *file)
+{
+	dev_t devt = inode->i_cdev->dev;
+	struct cuse_conn *cc = NULL, *pos;
+	int rc;
+
+	/* look up and get the connection */
+	spin_lock(&cuse_lock);
+	list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+		if (pos->dev->devt == devt) {
+			fuse_conn_get(&pos->fc);
+			cc = pos;
+			break;
+		}
+	spin_unlock(&cuse_lock);
+
+	/* dead? */
+	if (!cc)
+		return -ENODEV;
+
+	/*
+	 * Generic permission check is already done against the chrdev
+	 * file, proceed to open.
+	 */
+	rc = fuse_do_open(&cc->fc, 0, file, 0);
+	if (rc)
+		fuse_conn_put(&cc->fc);
+	return rc;
+}
+
+static int cuse_release(struct inode *inode, struct file *file)
+{
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
+
+	fuse_sync_release(ff, file->f_flags);
+	fuse_conn_put(fc);
+
+	return 0;
+}
+
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct fuse_file *ff = file->private_data;
+	struct cuse_conn *cc = fc_to_cc(ff->fc);
+	unsigned int flags = 0;
+
+	if (cc->unrestricted_ioctl)
+		flags |= FUSE_IOCTL_UNRESTRICTED;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fuse_file *ff = file->private_data;
+	struct cuse_conn *cc = fc_to_cc(ff->fc);
+	unsigned int flags = FUSE_IOCTL_COMPAT;
+
+	if (cc->unrestricted_ioctl)
+		flags |= FUSE_IOCTL_UNRESTRICTED;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static const struct file_operations cuse_frontend_fops = {
+	.owner			= THIS_MODULE,
+	.read			= cuse_read,
+	.write			= cuse_write,
+	.open			= cuse_open,
+	.release		= cuse_release,
+	.unlocked_ioctl		= cuse_file_ioctl,
+	.compat_ioctl		= cuse_file_compat_ioctl,
+	.poll			= fuse_file_poll,
+};
+
+
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+
+struct cuse_devinfo {
+	const char		*name;
+};
+
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1.  This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value.  Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'.  *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+	char *p = *pp;
+	char *key, *val;
+
+	while (p < end && *p == '\0')
+		p++;
+	if (p == end)
+		return 0;
+
+	if (end[-1] != '\0') {
+		printk(KERN_ERR "CUSE: info not properly terminated\n");
+		return -EINVAL;
+	}
+
+	key = val = p;
+	p += strlen(p);
+
+	if (valp) {
+		strsep(&val, "=");
+		if (!val)
+			val = key + strlen(key);
+		key = strstrip(key);
+		val = strstrip(val);
+	} else
+		key = strstrip(key);
+
+	if (!strlen(key)) {
+		printk(KERN_ERR "CUSE: zero length info key specified\n");
+		return -EINVAL;
+	}
+
+	*pp = p;
+	*keyp = key;
+	if (valp)
+		*valp = val;
+
+	return 1;
+}
+
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo.  String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+	char *end = p + len;
+	char *key, *val;
+	int rc;
+
+	while (true) {
+		rc = cuse_parse_one(&p, end, &key, &val);
+		if (rc < 0)
+			return rc;
+		if (!rc)
+			break;
+		if (strcmp(key, "DEVNAME") == 0)
+			devinfo->name = val;
+		else
+			printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+			       key);
+	}
+
+	if (!devinfo->name || !strlen(devinfo->name)) {
+		printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void cuse_gendev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it.  Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	struct cuse_conn *cc = fc_to_cc(fc);
+	struct cuse_init_out *arg = &req->misc.cuse_init_out;
+	struct page *page = req->pages[0];
+	struct cuse_devinfo devinfo = { };
+	struct device *dev;
+	struct cdev *cdev;
+	dev_t devt;
+	int rc;
+
+	if (req->out.h.error ||
+	    arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+		goto err;
+	}
+
+	fc->minor = arg->minor;
+	fc->max_read = max_t(unsigned, arg->max_read, 4096);
+	fc->max_write = max_t(unsigned, arg->max_write, 4096);
+
+	/* parse init reply */
+	cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+
+	rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+				&devinfo);
+	if (rc)
+		goto err;
+
+	/* determine and reserve devt */
+	devt = MKDEV(arg->dev_major, arg->dev_minor);
+	if (!MAJOR(devt))
+		rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+	else
+		rc = register_chrdev_region(devt, 1, devinfo.name);
+	if (rc) {
+		printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+		goto err;
+	}
+
+	/* devt determined, create device */
+	rc = -ENOMEM;
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		goto err_region;
+
+	device_initialize(dev);
+	dev_set_uevent_suppress(dev, 1);
+	dev->class = cuse_class;
+	dev->devt = devt;
+	dev->release = cuse_gendev_release;
+	dev_set_drvdata(dev, cc);
+	dev_set_name(dev, "%s", devinfo.name);
+
+	rc = device_add(dev);
+	if (rc)
+		goto err_device;
+
+	/* register cdev */
+	rc = -ENOMEM;
+	cdev = cdev_alloc();
+	if (!cdev)
+		goto err_device;
+
+	cdev->owner = THIS_MODULE;
+	cdev->ops = &cuse_frontend_fops;
+
+	rc = cdev_add(cdev, devt, 1);
+	if (rc)
+		goto err_cdev;
+
+	cc->dev = dev;
+	cc->cdev = cdev;
+
+	/* make the device available */
+	spin_lock(&cuse_lock);
+	list_add(&cc->list, cuse_conntbl_head(devt));
+	spin_unlock(&cuse_lock);
+
+	/* announce device availability */
+	dev_set_uevent_suppress(dev, 0);
+	kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+	__free_page(page);
+	return;
+
+err_cdev:
+	cdev_del(cdev);
+err_device:
+	put_device(dev);
+err_region:
+	unregister_chrdev_region(devt, 1);
+err:
+	fc->conn_error = 1;
+	goto out;
+}
+
+static int cuse_send_init(struct cuse_conn *cc)
+{
+	int rc;
+	struct fuse_req *req;
+	struct page *page;
+	struct fuse_conn *fc = &cc->fc;
+	struct cuse_init_in *arg;
+
+	BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req)) {
+		rc = PTR_ERR(req);
+		goto err;
+	}
+
+	rc = -ENOMEM;
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		goto err_put_req;
+
+	arg = &req->misc.cuse_init_in;
+	arg->major = FUSE_KERNEL_VERSION;
+	arg->minor = FUSE_KERNEL_MINOR_VERSION;
+	arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+	req->in.h.opcode = CUSE_INIT;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct cuse_init_in);
+	req->in.args[0].value = arg;
+	req->out.numargs = 2;
+	req->out.args[0].size = sizeof(struct cuse_init_out);
+	req->out.args[0].value = &req->misc.cuse_init_out;
+	req->out.args[1].size = CUSE_INIT_INFO_MAX;
+	req->out.argvar = 1;
+	req->out.argpages = 1;
+	req->pages[0] = page;
+	req->num_pages = 1;
+	req->end = cuse_process_init_reply;
+	fuse_request_send_background(fc, req);
+
+	return 0;
+
+err_put_req:
+	fuse_put_request(fc, req);
+err:
+	return rc;
+}
+
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+	struct cuse_conn *cc = fc_to_cc(fc);
+	kfree(cc);
+}
+
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initilaization request kernel sends.  This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init.  The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+	struct cuse_conn *cc;
+	int rc;
+
+	/* set up cuse_conn */
+	cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+	if (!cc)
+		return -ENOMEM;
+
+	fuse_conn_init(&cc->fc);
+
+	INIT_LIST_HEAD(&cc->list);
+	cc->fc.release = cuse_fc_release;
+
+	cc->fc.connected = 1;
+	cc->fc.blocked = 0;
+	rc = cuse_send_init(cc);
+	if (rc) {
+		fuse_conn_put(&cc->fc);
+		return rc;
+	}
+	file->private_data = &cc->fc;	/* channel owns base reference to cc */
+
+	return 0;
+}
+
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+	struct cuse_conn *cc = fc_to_cc(file->private_data);
+	int rc;
+
+	/* remove from the conntbl, no more access from this point on */
+	spin_lock(&cuse_lock);
+	list_del_init(&cc->list);
+	spin_unlock(&cuse_lock);
+
+	/* remove device */
+	if (cc->dev)
+		device_unregister(cc->dev);
+	if (cc->cdev) {
+		unregister_chrdev_region(cc->cdev->dev, 1);
+		cdev_del(cc->cdev);
+	}
+
+	/* kill connection and shutdown channel */
+	fuse_conn_kill(&cc->fc);
+	rc = fuse_dev_release(inode, file);	/* puts the base reference */
+
+	return rc;
+}
+
+static struct file_operations cuse_channel_fops; /* initialized during init */
+
+
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+
+static ssize_t cuse_class_waiting_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct cuse_conn *cc = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+
+static ssize_t cuse_class_abort_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct cuse_conn *cc = dev_get_drvdata(dev);
+
+	fuse_abort_conn(&cc->fc);
+	return count;
+}
+
+static struct device_attribute cuse_class_dev_attrs[] = {
+	__ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+	__ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+	{ }
+};
+
+static struct miscdevice cuse_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "cuse",
+	.fops		= &cuse_channel_fops,
+};
+
+static int __init cuse_init(void)
+{
+	int i, rc;
+
+	/* init conntbl */
+	for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+		INIT_LIST_HEAD(&cuse_conntbl[i]);
+
+	/* inherit and extend fuse_dev_operations */
+	cuse_channel_fops		= fuse_dev_operations;
+	cuse_channel_fops.owner		= THIS_MODULE;
+	cuse_channel_fops.open		= cuse_channel_open;
+	cuse_channel_fops.release	= cuse_channel_release;
+
+	cuse_class = class_create(THIS_MODULE, "cuse");
+	if (IS_ERR(cuse_class))
+		return PTR_ERR(cuse_class);
+
+	cuse_class->dev_attrs = cuse_class_dev_attrs;
+
+	rc = misc_register(&cuse_miscdev);
+	if (rc) {
+		class_destroy(cuse_class);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit cuse_exit(void)
+{
+	misc_deregister(&cuse_miscdev);
+	class_destroy(cuse_class);
+}
+
+module_init(cuse_init);
+module_exit(cuse_exit);
+
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 162e5de..d41ed59 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -121,6 +121,13 @@ struct fuse_file_lock {
 #define FUSE_BIG_WRITES		(1 << 5)
 
 /**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL	(1 << 0)
+
+/**
  * Release flags
  */
 #define FUSE_RELEASE_FLUSH	(1 << 0)
@@ -210,6 +217,9 @@ enum fuse_opcode {
 	FUSE_DESTROY       = 38,
 	FUSE_IOCTL         = 39,
 	FUSE_POLL          = 40,
+
+	/* CUSE specific operations */
+	CUSE_INIT          = 4096,
 };
 
 enum fuse_notify_code {
@@ -401,6 +411,27 @@ struct fuse_init_out {
 	__u32	max_write;
 };
 
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+	__u32	major;
+	__u32	minor;
+	__u32	unused;
+	__u32	flags;
+};
+
+struct cuse_init_out {
+	__u32	major;
+	__u32	minor;
+	__u32	unused;
+	__u32	flags;
+	__u32	max_read;
+	__u32	max_write;
+	__u32	dev_major;		/* chardev major */
+	__u32	dev_minor;		/* chardev minor */
+	__u32	spare[10];
+};
+
 struct fuse_interrupt_in {
 	__u64	unique;
 };
-- 
cgit v0.10.2


From e9a22a13c71986851e931bdfa054f68839ff8576 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 9 Jun 2009 12:00:37 +0200
Subject: amd-iommu: remove unnecessary "AMD IOMMU: " prefix

That prefix is already included in the DUMP_printk macro. So there is no
need to repeat it in the format string.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 772e910..1c60554 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1266,9 +1266,8 @@ static int get_device_resources(struct device *dev,
 			dma_dom = (*iommu)->default_dom;
 		*domain = &dma_dom->domain;
 		attach_device(*iommu, *domain, *bdf);
-		DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
-				"%d for device %s\n",
-				(*domain)->id, dev_name(dev));
+		DUMP_printk("Using protection domain %d for device %s\n",
+			    (*domain)->id, dev_name(dev));
 	}
 
 	if (domain_for_device(_bdf) == NULL)
-- 
cgit v0.10.2


From fdd7b4c3302c93f6833e338903ea77245eb510b4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Jun 2009 04:01:02 -0700
Subject: r8169: fix crash when large packets are received

Michael Tokarev reported receiving a large packet could crash
a machine with RTL8169 NIC.
( original thread at http://lkml.org/lkml/2009/6/8/192 )

Problem is this driver tells that NIC frames up to 16383 bytes
can be received but provides skb to rx ring allocated with
smaller sizes (1536 bytes in case standard 1500 bytes MTU is used)

When a frame larger than what was allocated by driver is received,
dma transfert can occurs past the end of buffer and corrupt
kernel memory.

Fix is to tell to NIC what is the maximum size a frame can be.

This bug is very old, (before git introduction, linux-2.6.10), and
should be backported to stable versions.

Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 8247a94..3b19e0c 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -66,7 +66,6 @@ static const int multicast_filter_limit = 32;
 #define RX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
 #define TX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
 #define EarlyTxThld	0x3F	/* 0x3F means NO early transmit */
-#define RxPacketMaxSize	0x3FE8	/* 16K - 1 - ETH_HLEN - VLAN - CRC... */
 #define SafeMtu		0x1c20	/* ... actually life sucks beyond ~7k */
 #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
 
@@ -2357,10 +2356,10 @@ static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
 	return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr)
+static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
 {
 	/* Low hurts. Let's disable the filtering. */
-	RTL_W16(RxMaxSize, 16383);
+	RTL_W16(RxMaxSize, rx_buf_sz);
 }
 
 static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
@@ -2407,7 +2406,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr);
+	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_01) ||
 	    (tp->mac_version == RTL_GIGA_MAC_VER_02) ||
@@ -2668,7 +2667,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr);
+	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
 	tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
 
@@ -2846,7 +2845,7 @@ static void rtl_hw_start_8101(struct net_device *dev)
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr);
+	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
 	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
 
-- 
cgit v0.10.2


From 52ea3a56a3268bc2a5a7c75e98c81463004e38ef Mon Sep 17 00:00:00 2001
From: Minoru Usui <usui@mxm.nes.nec.co.jp>
Date: Tue, 9 Jun 2009 04:03:09 -0700
Subject: cls_cgroup: Fix oops when user send improperly 'tc filter add'
 request

I found a bug in cls_cgroup_change() in cls_cgroup.c.
cls_cgroup_change() expected tca[TCA_OPTIONS] was set from user space properly,
but tc in iproute2-2.6.29-1 (which I used) didn't set it.

In the current source code of tc in git, it set tca[TCA_OPTIONS].

  git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git

If we always use a newest iproute2 in git when we use cls_cgroup,
we don't face this oops probably.
But I think, kernel shouldn't panic regardless of use program's behaviour.

Signed-off-by: Minoru Usui <usui@mxm.nes.nec.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index cc29b44..e5becb9 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
 	struct tcf_exts e;
 	int err;
 
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
 	if (head == NULL) {
 		if (!handle)
 			return -EINVAL;
-- 
cgit v0.10.2


From 1d589bb16b825b3a7b4edd34d997f1f1f953033d Mon Sep 17 00:00:00 2001
From: john cooper <john.cooper@redhat.com>
Date: Tue, 9 Jun 2009 14:41:40 +0200
Subject: Add serial number support for virtio_blk, V4a

This patch extracts the opaque data from pci i/o
region 0 via the added VIRTIO_BLK_F_IDENTIFY
field.  By convention this data takes the form of
that returned by an ATA IDENTIFY DEVICE command,
however the driver (except for structure size)
makes no interpretation of the data.  The structure
data is copied wholesale to userspace via a
HDIO_GET_IDENTITY ioctl command (eg: hdparm -i <dev>).

Signed-off-by: john cooper <john.cooper@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c4845b1..c0facaa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -171,11 +171,43 @@ static void do_virtblk_request(struct request_queue *q)
 		vblk->vq->vq_ops->kick(vblk->vq);
 }
 
+/* return ATA identify data
+ */
+static int virtblk_identify(struct gendisk *disk, void *argp)
+{
+	struct virtio_blk *vblk = disk->private_data;
+	void *opaque;
+	int err = -ENOMEM;
+
+	opaque = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
+	if (!opaque)
+		goto out;
+
+	err = virtio_config_buf(vblk->vdev, VIRTIO_BLK_F_IDENTIFY,
+		offsetof(struct virtio_blk_config, identify), opaque,
+		VIRTIO_BLK_ID_BYTES);
+
+	if (err)
+		goto out_kfree;
+
+	if (copy_to_user(argp, opaque, VIRTIO_BLK_ID_BYTES))
+		err = -EFAULT;
+
+out_kfree:
+	kfree(opaque);
+out:
+	return err;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct virtio_blk *vblk = disk->private_data;
+	void __user *argp = (void __user *)data;
+
+	if (cmd == HDIO_GET_IDENTITY)
+		return virtblk_identify(disk, argp);
 
 	/*
 	 * Only allow the generic SCSI ioctls if the host can support it.
@@ -183,8 +215,7 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 		return -ENOIOCTLCMD;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
-			      (void __user *)data);
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
@@ -390,7 +421,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI,
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 4dbcbc1..be7d255 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -16,6 +16,9 @@
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
+#define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
+
+#define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
 struct virtio_blk_config
 {
@@ -33,6 +36,7 @@ struct virtio_blk_config
 	} geometry;
 	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
 	__u32 blk_size;
+	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
 /* These two define direction. */
-- 
cgit v0.10.2


From 0281b5dc0350cbf6dd21ed558a33cccce77abc02 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 6 Jun 2009 14:50:36 -0700
Subject: cpumask: introduce zalloc_cpumask_var

So can get cpumask_var with cpumask_clear

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9f31538..c5ac87c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t;
 
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
 void free_cpumask_var(cpumask_var_t mask);
 void free_bootmem_cpumask_var(cpumask_var_t mask);
@@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
 	return true;
 }
 
+static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	cpumask_clear(*mask);
+	return true;
+}
+
+static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+					  int node)
+{
+	cpumask_clear(*mask);
+	return true;
+}
+
 static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 }
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 1f71b97..eb23aaa 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -119,6 +119,12 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 }
 EXPORT_SYMBOL(alloc_cpumask_var_node);
 
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
+{
+	return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var_node);
+
 /**
  * alloc_cpumask_var - allocate a struct cpumask
  * @mask: pointer to cpumask_var_t where the cpumask is returned
@@ -135,6 +141,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 }
 EXPORT_SYMBOL(alloc_cpumask_var);
 
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	return alloc_cpumask_var(mask, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var);
+
 /**
  * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.
  * @mask: pointer to cpumask_var_t where the cpumask is returned
-- 
cgit v0.10.2


From eaa958402ea40851097d051f52ba1bb7a885efe9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 6 Jun 2009 14:51:36 -0700
Subject: cpumask: alloc zeroed cpumask for static cpumask_var_ts

These are defined as static cpumask_var_t so if MAXSMP is not used,
they are cleared already.  Avoid surprises when MAXSMP is enabled.

Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 54b6de2..752e8c6 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -550,7 +550,7 @@ static int __init acpi_cpufreq_early_init(void)
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
-		if (!alloc_cpumask_var_node(
+		if (!zalloc_cpumask_var_node(
 			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
 			GFP_KERNEL, cpu_to_node(i))) {
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index a8363e5..d47c775 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -322,7 +322,7 @@ static int powernow_acpi_init(void)
 		goto err0;
 	}
 
-	if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+	if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
 								GFP_KERNEL)) {
 		retval = -ENOMEM;
 		goto err05;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35dc8fb..cf52215 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -887,7 +887,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
-	if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
 		printk(KERN_ERR PFX
 				"unable to alloc powernow_k8_data cpumask\n");
 		ret_val = -ENOMEM;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index c9f1fdc..55c831e 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -471,7 +471,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 
 	if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
 		return -ENOMEM;
-	if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
+	if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
 		free_cpumask_var(saved_mask);
 		return -ENOMEM;
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 6fb0b35..09dd1d4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -1163,7 +1163,7 @@ static __init int mce_init_device(void)
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+	zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
 
 	err = mce_init_banks();
 	if (err)
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index ed0c337..8c7b03b 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -832,7 +832,7 @@ static int __init uv_bau_init(void)
 		return 0;
 
 	for_each_possible_cpu(cur_cpu)
-		alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+		zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
 				       GFP_KERNEL, cpu_to_node(cur_cpu));
 
 	uv_bau_retry_limit = 1;
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 45ad328..23f0fb8 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -844,7 +844,7 @@ static int acpi_processor_add(struct acpi_device *device)
 	if (!pr)
 		return -ENOMEM;
 
-	if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
 		kfree(pr);
 		return -ENOMEM;
 	}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 47d2ad0..6e2ec0b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -808,7 +808,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 		ret = -ENOMEM;
 		goto nomem_out;
 	}
-	if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
 		free_cpumask_var(policy->cpus);
 		kfree(policy);
 		ret = -ENOMEM;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index cdd3c89..344712a 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -165,7 +165,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
 		vec->count = 0;
 		if (bootmem)
 			alloc_bootmem_cpumask_var(&vec->mask);
-		else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
+		else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
 			goto cleanup;
 	}
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f2c66f8..9bf0d2a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void)
 	unsigned int i;
 
 	for_each_possible_cpu(i)
-		alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
+		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
 					GFP_KERNEL, cpu_to_node(i));
 }
 #endif /* CONFIG_SMP */
diff --git a/kernel/smp.c b/kernel/smp.c
index 858baac..ad63d85 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+		if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
 				cpu_to_node(cpu)))
 			return NOTIFY_BAD;
 		break;
-- 
cgit v0.10.2


From 8437a617708d014d6f220df201a24960e00d57b1 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 6 Jun 2009 14:52:35 -0700
Subject: kvm: fix kvm reboot crash when MAXSMP is used

one system was found there is crash during reboot then kvm/MAXSMP
Sending all processes the KILL signal...                              done
Please stand by while rebooting the system...
[ 1721.856538] md: stopping all md devices.
[ 1722.852139] kvm: exiting hardware virtualization
[ 1722.854601] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 1722.872219] IP: [<ffffffff8102c6b6>] hardware_disable+0x4c/0xb4
[ 1722.877955] PGD 0
[ 1722.880042] Oops: 0000 [#1] SMP
[ 1722.892548] last sysfs file: /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/host0/target0:2:0/0:2:0:0/vendor
[ 1722.900977] CPU 9
[ 1722.912606] Modules linked in:
[ 1722.914226] Pid: 0, comm: swapper Not tainted 2.6.30-rc7-tip-01843-g2305324-dirty #299 ...
[ 1722.932589] RIP: 0010:[<ffffffff8102c6b6>]  [<ffffffff8102c6b6>] hardware_disable+0x4c/0xb4
[ 1722.942709] RSP: 0018:ffffc900010b6ed8  EFLAGS: 00010046
[ 1722.956121] RAX: 0000000000000000 RBX: ffffc9000e253140 RCX: 0000000000000009
[ 1722.972202] RDX: 000000000000b020 RSI: ffffc900010c3220 RDI: ffffffffffffd790
[ 1722.977399] RBP: ffffc900010b6f08 R08: 0000000000000000 R09: 0000000000000000
[ 1722.995149] R10: 00000000000004b8 R11: 966912b6c78fddbd R12: 0000000000000009
[ 1723.011551] R13: 000000000000b020 R14: 0000000000000009 R15: 0000000000000000
[ 1723.019898] FS:  0000000000000000(0000) GS:ffffc900010b3000(0000) knlGS:0000000000000000
[ 1723.034389] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
[ 1723.041164] CR2: 0000000000000000 CR3: 0000000001001000 CR4: 00000000000006e0
[ 1723.056192] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1723.072546] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1723.080562] Process swapper (pid: 0, threadinfo ffff88107e464000, task ffff88047e5a2550)
[ 1723.096144] Stack:
[ 1723.099071]  0000000000000046 ffffc9000e253168 966912b6c78fddbd ffffc9000e253140
[ 1723.115471]  ffff880c7d4304d0 ffffc9000e253168 ffffc900010b6f28 ffffffff81011022
[ 1723.132428]  ffffc900010b6f48 966912b6c78fddbd ffffc900010b6f48 ffffffff8100b83b
[ 1723.141973] Call Trace:
[ 1723.142981]  <IRQ> <0> [<ffffffff81011022>] kvm_arch_hardware_disable+0x26/0x3c
[ 1723.158153]  [<ffffffff8100b83b>] hardware_disable+0x3f/0x55
[ 1723.172168]  [<ffffffff810b95f6>] generic_smp_call_function_interrupt+0x76/0x13c
[ 1723.178836]  [<ffffffff8104cbea>] smp_call_function_interrupt+0x3a/0x5e
[ 1723.194689]  [<ffffffff81035bf3>] call_function_interrupt+0x13/0x20
[ 1723.199750]  <EOI> <0> [<ffffffff814ad3b4>] ? acpi_idle_enter_c1+0xd3/0xf4
[ 1723.217508]  [<ffffffff814ad3ae>] ? acpi_idle_enter_c1+0xcd/0xf4
[ 1723.232172]  [<ffffffff814ad4bc>] ? acpi_idle_enter_bm+0xe7/0x2ce
[ 1723.235141]  [<ffffffff81a8d93f>] ? __atomic_notifier_call_chain+0x0/0xac
[ 1723.253381]  [<ffffffff818c3dff>] ? menu_select+0x58/0xd2
[ 1723.258179]  [<ffffffff818c2c9d>] ? cpuidle_idle_call+0xa4/0xf3
[ 1723.272828]  [<ffffffff81034085>] ? cpu_idle+0xb8/0x101
[ 1723.277085]  [<ffffffff81a80163>] ? start_secondary+0x1bc/0x1d7
[ 1723.293708] Code: b0 00 00 65 48 8b 04 25 28 00 00 00 48 89 45 e0 31 c0 48 8b 04 cd 30 ee 27 82 49 89 cc 49 89 d5 48 8b 04 10 48 8d b8 90 d7 ff ff <48> 8b 87 70 28 00 00 48 8d 98 90 d7 ff ff eb 16 e8 e9 fe ff ff
[ 1723.335524] RIP  [<ffffffff8102c6b6>] hardware_disable+0x4c/0xb4
[ 1723.342076]  RSP <ffffc900010b6ed8>
[ 1723.352021] CR2: 0000000000000000
[ 1723.354348] ---[ end trace e2aec53dae150aa1 ]---

it turns out that we need clear cpus_hardware_enabled in that case.

Reported-and-tested-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4293528..4d0dd39 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2301,7 +2301,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	bad_pfn = page_to_pfn(bad_page);
 
-	if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
+	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
 	}
-- 
cgit v0.10.2


From c1d4c41f2fdfe66dea957b76d005affba3e56b26 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 9 Jun 2009 15:17:37 +0200
Subject: bsg: setting rq->bio to NULL

Due to commit 1cd96c242a829d52f7a5ae98f554ca9775429685 ("block: WARN
in __blk_put_request() for potential bio leak"), BSG SMP requests get
the false warnings:

WARNING: at block/blk-core.c:1068 __blk_put_request+0x52/0xc0()

This sets rq->bio to NULL to avoid that false warnings.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/bsg.c b/block/bsg.c
index 206060e..dd81be4 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -315,6 +315,7 @@ out:
 	blk_put_request(rq);
 	if (next_rq) {
 		blk_rq_unmap_user(next_rq->bio);
+		next_rq->bio = NULL;
 		blk_put_request(next_rq);
 	}
 	return ERR_PTR(ret);
@@ -448,6 +449,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 		hdr->dout_resid = rq->data_len;
 		hdr->din_resid = rq->next_rq->data_len;
 		blk_rq_unmap_user(bidi_bio);
+		rq->next_rq->bio = NULL;
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
 		hdr->din_resid = rq->data_len;
@@ -466,6 +468,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 	blk_rq_unmap_user(bio);
 	if (rq->cmd != rq->__cmd)
 		kfree(rq->cmd);
+	rq->bio = NULL;
 	blk_put_request(rq);
 
 	return ret;
-- 
cgit v0.10.2


From 42937e81a82b6bbc51a309c83da140b3a7ca5945 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Mon, 8 Jun 2009 15:55:09 +0200
Subject: x86: Detect use of extended APIC ID for AMD CPUs

Booting a 32-bit kernel on Magny-Cours results in the following panic:

  ...
  Using APIC driver default
  ...
  Overriding APIC driver with bigsmp
  ...
  Getting VERSION: 80050010
  Getting VERSION: 80050010
  Getting ID: 10000000
  Getting ID: ef000000
  Getting LVT0: 700
  Getting LVT1: 10000
  Kernel panic - not syncing: Boot APIC ID in local APIC unexpected (16 vs 0)
  Pid: 1, comm: swapper Not tainted 2.6.30-rcX #2
  Call Trace:
   [<c05194da>] ? panic+0x38/0xd3
   [<c0743102>] ? native_smp_prepare_cpus+0x259/0x31f
   [<c073b19d>] ? kernel_init+0x3e/0x141
   [<c073b15f>] ? kernel_init+0x0/0x141
   [<c020325f>] ? kernel_thread_helper+0x7/0x10

The reason is that default_get_apic_id handled extension of local APIC
ID field just in case of XAPIC.

Thus for this AMD CPU, default_get_apic_id() returns 0 and
bigsmp_get_apic_id() returns 16 which leads to the respective kernel
panic.

This patch introduces a Linux specific feature flag to indicate
support for extended APIC id (8 bits instead of 4 bits width) and sets
the flag on AMD CPUs if applicable.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: <stable@kernel.org>
LKML-Reference: <20090608135509.GA12431@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 42f2f83..9b2c049 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -410,7 +410,7 @@ static inline unsigned default_get_apic_id(unsigned long x)
 {
 	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
 
-	if (APIC_XAPIC(ver))
+	if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
 		return (x >> 24) & 0xFF;
 	else
 		return (x >> 24) & 0x0F;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c..78dee4f 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -94,6 +94,7 @@
 #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
 #define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459..0802e15 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -6,6 +6,7 @@
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
+#include <asm/pci-direct.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/numa_64.h>
@@ -351,6 +352,15 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 		    (c->x86_model == 8 && c->x86_mask >= 8))
 			set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+	/* check CPU config space for extended APIC ID */
+	if (c->x86 >= 0xf) {
+		unsigned int val;
+		val = read_pci_config(0, 24, 0, 0x68);
+		if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+	}
+#endif
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
-- 
cgit v0.10.2


From 0eab928221bac8895a0b494a16a8810002bd8645 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 9 Jun 2009 09:54:40 -0400
Subject: ext4: Don't treat a truncation of a zero-length file as
 replace-via-truncate

If a non-existent file is opened via O_WRONLY|O_CREAT|O_TRUNC, there's
no need to treat this as a true file truncation, so we shouldn't
activate the replace-via-truncate hueristic.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2c10d34..875db94 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4127,7 +4127,8 @@ void ext4_truncate(struct inode *inode)
 	if (!ext4_can_truncate(inode))
 		return;
 
-	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+	if (ei->i_disksize && inode->i_size == 0 &&
+	    !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
 
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-- 
cgit v0.10.2


From fecc8ac8496fce96069724f54daba8e7078b0082 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Tue, 9 Jun 2009 21:15:53 +0800
Subject: perf_counter, x86: Correct some event and umask values for Intel
 processors

Correct some event and UMASK values according to Intel SDM,
in the Nehalem and Atom tables.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090609131553.GA12489@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 56001fe..40978aa 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -119,7 +119,7 @@ static const u64 nehalem_hw_cache_event_ids
  },
  [ C(L1I ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0480, /* L1I.READS                    */
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
 		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
 	},
 	[ C(OP_WRITE) ] = {
@@ -162,7 +162,7 @@ static const u64 nehalem_hw_cache_event_ids
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISS_RETIRED            */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
@@ -291,7 +291,7 @@ static const u64 atom_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2241, /* L1D_CACHE.ST               */
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
 		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_PREFETCH) ] = {
@@ -301,8 +301,8 @@ static const u64 atom_hw_cache_event_ids
  },
  [ C(L1I ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
@@ -329,11 +329,11 @@ static const u64 atom_hw_cache_event_ids
  },
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
 		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
 		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
 	},
 	[ C(OP_PREFETCH) ] = {
-- 
cgit v0.10.2


From 0b8c3d5ab000c22889af7f9409799a6cdc31a2b2 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Tue, 9 Jun 2009 10:40:50 -0400
Subject: x86: Clear TS in irq_ts_save() when in an atomic section

The dynamic FPU context allocation changes caused the padlock driver
to generate the below warning. Fix it by masking TS when doing padlock
encryption operations in an atomic section.

This solves:

BUG: sleeping function called from invalid context at mm/slub.c:1602
in_atomic(): 1, irqs_disabled(): 0, pid: 82, name: cryptomgr_test
Pid: 82, comm: cryptomgr_test Not tainted 2.6.29.4-168.test7.fc11.x86_64 #1
Call Trace:
[<ffffffff8103ff16>] __might_sleep+0x10b/0x110
[<ffffffff810cd3b2>] kmem_cache_alloc+0x37/0xf1
[<ffffffff81018505>] init_fpu+0x49/0x8a
[<ffffffff81012a83>] math_state_restore+0x3e/0xbc
[<ffffffff813ac6d0>] do_device_not_available+0x9/0xb
[<ffffffff810123ab>] device_not_available+0x1b/0x20
[<ffffffffa001c066>] ? aes_crypt+0x66/0x74 [padlock_aes]
[<ffffffff8119a51a>] ? blkcipher_walk_next+0x257/0x2e0
[<ffffffff8119a731>] ? blkcipher_walk_first+0x18e/0x19d
[<ffffffffa001c1fe>] aes_encrypt+0x9d/0xe5 [padlock_aes]
[<ffffffffa0027253>] crypt+0x6b/0x114 [xts]
[<ffffffffa001c161>] ? aes_encrypt+0x0/0xe5 [padlock_aes]
[<ffffffffa001c161>] ? aes_encrypt+0x0/0xe5 [padlock_aes]
[<ffffffffa0027390>] encrypt+0x49/0x4b [xts]
[<ffffffff81199acc>] async_encrypt+0x3c/0x3e
[<ffffffff8119dafc>] test_skcipher+0x1da/0x658
[<ffffffff811979c3>] ? crypto_spawn_tfm+0x8e/0xb1
[<ffffffff8119672d>] ? __crypto_alloc_tfm+0x11b/0x15f
[<ffffffff811979c3>] ? crypto_spawn_tfm+0x8e/0xb1
[<ffffffff81199dbe>] ? skcipher_geniv_init+0x2b/0x47
[<ffffffff8119a905>] ? async_chainiv_init+0x5c/0x61
[<ffffffff8119dfdd>] alg_test_skcipher+0x63/0x9b
[<ffffffff8119e1bc>] alg_test+0x12d/0x175
[<ffffffff8119c488>] cryptomgr_test+0x38/0x54
[<ffffffff8119c450>] ? cryptomgr_test+0x0/0x54
[<ffffffff8105c6c9>] kthread+0x4d/0x78
[<ffffffff8101264a>] child_rip+0xa/0x20
[<ffffffff81011f67>] ? restore_args+0x0/0x30
[<ffffffff8105c67c>] ? kthread+0x0/0x78
[<ffffffff81012640>] ? child_rip+0x0/0x20

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20090609104050.50158cfe@dhcp-100-2-144.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 71c9e51..4aab52f 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -305,18 +305,18 @@ static inline void kernel_fpu_end(void)
 /*
  * Some instructions like VIA's padlock instructions generate a spurious
  * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context aswell. To prevent these kernel instructions
- * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
  * should use them only in the context of irq_ts_save/restore()
  */
 static inline int irq_ts_save(void)
 {
 	/*
-	 * If we are in process context, we are ok to take a spurious DNA fault.
-	 * Otherwise, doing clts() in process context require pre-emption to
-	 * be disabled or some heavy lifting like kernel_fpu_begin()
+	 * If in process context and not atomic, we can take a spurious DNA fault.
+	 * Otherwise, doing clts() in process context requires disabling preemption
+	 * or some heavy lifting like kernel_fpu_begin()
 	 */
-	if (!in_interrupt())
+	if (!in_atomic())
 		return 0;
 
 	if (read_cr0() & X86_CR0_TS) {
-- 
cgit v0.10.2


From 43514774ff40c4fbe0cbbd3d8293a359f1a9fe71 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 8 Jun 2009 18:14:41 -0700
Subject: [SCSI] iscsi class: Add new NETLINK_ISCSI messages for cnic/bnx2i
 driver.

Add ISCSI_NETLINK messages for iSCSI NICs to get information such as
path from userspace.  Original iscsid messages are now always sent as
multicast to group 1.  The new messages are sent to group 2.

The multicast changes were made by Mike Christie.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index d69a53a..f3e6646 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -37,7 +37,6 @@
 #define ISCSI_TRANSPORT_VERSION "2.0-870"
 
 struct iscsi_internal {
-	int daemon_pid;
 	struct scsi_transport_template t;
 	struct iscsi_transport *iscsi_transport;
 	struct list_head list;
@@ -938,23 +937,9 @@ iscsi_if_transport_lookup(struct iscsi_transport *tt)
 }
 
 static int
-iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp)
+iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp)
 {
-	return netlink_broadcast(nls, skb, 0, 1, gfp);
-}
-
-static int
-iscsi_unicast_skb(struct sk_buff *skb, int pid)
-{
-	int rc;
-
-	rc = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
-	if (rc < 0) {
-		printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc);
-		return rc;
-	}
-
-	return 0;
+	return nlmsg_multicast(nls, skb, 0, group, gfp);
 }
 
 int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
@@ -980,7 +965,7 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	memset(ev, 0, sizeof(*ev));
 	ev->transport_handle = iscsi_handle(conn->transport);
@@ -991,10 +976,45 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 	memcpy(pdu, hdr, sizeof(struct iscsi_hdr));
 	memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size);
 
-	return iscsi_unicast_skb(skb, priv->daemon_pid);
+	return iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(iscsi_recv_pdu);
 
+int iscsi_offload_mesg(struct Scsi_Host *shost,
+		       struct iscsi_transport *transport, uint32_t type,
+		       char *data, uint16_t data_size)
+{
+	struct nlmsghdr	*nlh;
+	struct sk_buff *skb;
+	struct iscsi_uevent *ev;
+	int len = NLMSG_SPACE(sizeof(*ev) + data_size);
+
+	skb = alloc_skb(len, GFP_NOIO);
+	if (!skb) {
+		printk(KERN_ERR "can not deliver iscsi offload message:OOM\n");
+		return -ENOMEM;
+	}
+
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
+	ev = NLMSG_DATA(nlh);
+	memset(ev, 0, sizeof(*ev));
+	ev->type = type;
+	ev->transport_handle = iscsi_handle(transport);
+	switch (type) {
+	case ISCSI_KEVENT_PATH_REQ:
+		ev->r.req_path.host_no = shost->host_no;
+		break;
+	case ISCSI_KEVENT_IF_DOWN:
+		ev->r.notify_if_down.host_no = shost->host_no;
+		break;
+	}
+
+	memcpy((char *)ev + sizeof(*ev), data, data_size);
+
+	return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO);
+}
+EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
+
 void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 {
 	struct nlmsghdr	*nlh;
@@ -1014,7 +1034,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 		return;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(conn->transport);
 	ev->type = ISCSI_KEVENT_CONN_ERROR;
@@ -1022,7 +1042,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	ev->r.connerror.cid = conn->cid;
 	ev->r.connerror.sid = iscsi_conn_get_sid(conn);
 
-	iscsi_broadcast_skb(skb, GFP_ATOMIC);
+	iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC);
 
 	iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn error (%d)\n",
 			      error);
@@ -1030,8 +1050,8 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 EXPORT_SYMBOL_GPL(iscsi_conn_error_event);
 
 static int
-iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
-		      void *payload, int size)
+iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi,
+		    void *payload, int size)
 {
 	struct sk_buff	*skb;
 	struct nlmsghdr	*nlh;
@@ -1045,10 +1065,10 @@ iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0);
 	nlh->nlmsg_flags = flags;
 	memcpy(NLMSG_DATA(nlh), payload, size);
-	return iscsi_unicast_skb(skb, pid);
+	return iscsi_multicast_skb(skb, group, GFP_ATOMIC);
 }
 
 static int
@@ -1085,7 +1105,7 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 			return -ENOMEM;
 		}
 
-		nlhstat = __nlmsg_put(skbstat, priv->daemon_pid, 0, 0,
+		nlhstat = __nlmsg_put(skbstat, 0, 0, 0,
 				      (len - sizeof(*nlhstat)), 0);
 		evstat = NLMSG_DATA(nlhstat);
 		memset(evstat, 0, sizeof(*evstat));
@@ -1109,7 +1129,8 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 		skb_trim(skbstat, NLMSG_ALIGN(actual_size));
 		nlhstat->nlmsg_len = actual_size;
 
-		err = iscsi_unicast_skb(skbstat, priv->daemon_pid);
+		err = iscsi_multicast_skb(skbstat, ISCSI_NL_GRP_ISCSID,
+					  GFP_ATOMIC);
 	} while (err < 0 && err != -ECONNREFUSED);
 
 	return err;
@@ -1143,7 +1164,7 @@ int iscsi_session_event(struct iscsi_cls_session *session,
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(session->transport);
 
@@ -1172,7 +1193,7 @@ int iscsi_session_event(struct iscsi_cls_session *session,
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(skb, GFP_KERNEL);
+	rc = iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_KERNEL);
 	if (rc == -ESRCH)
 		iscsi_cls_session_printk(KERN_ERR, session,
 					 "Cannot notify userspace of session "
@@ -1393,7 +1414,31 @@ iscsi_set_host_param(struct iscsi_transport *transport,
 }
 
 static int
-iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+{
+	struct Scsi_Host *shost;
+	struct iscsi_path *params;
+	int err;
+
+	if (!transport->set_path)
+		return -ENOSYS;
+
+	shost = scsi_host_lookup(ev->u.set_path.host_no);
+	if (!shost) {
+		printk(KERN_ERR "set path could not find host no %u\n",
+		       ev->u.set_path.host_no);
+		return -ENODEV;
+	}
+
+	params = (struct iscsi_path *)((char *)ev + sizeof(*ev));
+	err = transport->set_path(shost, params);
+
+	scsi_host_put(shost);
+	return err;
+}
+
+static int
+iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
 	int err = 0;
 	struct iscsi_uevent *ev = NLMSG_DATA(nlh);
@@ -1403,6 +1448,11 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct iscsi_cls_conn *conn;
 	struct iscsi_endpoint *ep = NULL;
 
+	if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
+		*group = ISCSI_NL_GRP_UIP;
+	else
+		*group = ISCSI_NL_GRP_ISCSID;
+
 	priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle));
 	if (!priv)
 		return -EINVAL;
@@ -1411,8 +1461,6 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (!try_module_get(transport->owner))
 		return -EINVAL;
 
-	priv->daemon_pid = NETLINK_CREDS(skb)->pid;
-
 	switch (nlh->nlmsg_type) {
 	case ISCSI_UEVENT_CREATE_SESSION:
 		err = iscsi_if_create_session(priv, ep, ev,
@@ -1506,6 +1554,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case ISCSI_UEVENT_SET_HOST_PARAM:
 		err = iscsi_set_host_param(transport, ev);
 		break;
+	case ISCSI_UEVENT_PATH_UPDATE:
+		err = iscsi_set_path(transport, ev);
+		break;
 	default:
 		err = -ENOSYS;
 		break;
@@ -1528,6 +1579,7 @@ iscsi_if_rx(struct sk_buff *skb)
 		uint32_t rlen;
 		struct nlmsghdr	*nlh;
 		struct iscsi_uevent *ev;
+		uint32_t group;
 
 		nlh = nlmsg_hdr(skb);
 		if (nlh->nlmsg_len < sizeof(*nlh) ||
@@ -1540,7 +1592,7 @@ iscsi_if_rx(struct sk_buff *skb)
 		if (rlen > skb->len)
 			rlen = skb->len;
 
-		err = iscsi_if_recv_msg(skb, nlh);
+		err = iscsi_if_recv_msg(skb, nlh, &group);
 		if (err) {
 			ev->type = ISCSI_KEVENT_IF_ERROR;
 			ev->iferror = err;
@@ -1554,8 +1606,7 @@ iscsi_if_rx(struct sk_buff *skb)
 			 */
 			if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
 				break;
-			err = iscsi_if_send_reply(
-				NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
+			err = iscsi_if_send_reply(group, nlh->nlmsg_seq,
 				nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
 		} while (err < 0 && err != -ECONNREFUSED);
 		skb_pull(skb, rlen);
@@ -1803,7 +1854,6 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	if (!priv)
 		return NULL;
 	INIT_LIST_HEAD(&priv->list);
-	priv->daemon_pid = -1;
 	priv->iscsi_transport = tt;
 	priv->t.user_scan = iscsi_user_scan;
 	priv->t.create_work_queue = 1;
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 2c1a4af..4426f00 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -22,6 +22,11 @@
 #define ISCSI_IF_H
 
 #include <scsi/iscsi_proto.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#define ISCSI_NL_GRP_ISCSID	1
+#define ISCSI_NL_GRP_UIP	2
 
 #define UEVENT_BASE			10
 #define KEVENT_BASE			100
@@ -53,6 +58,8 @@ enum iscsi_uevent_e {
 	ISCSI_UEVENT_CREATE_BOUND_SESSION		= UEVENT_BASE + 18,
 	ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST	= UEVENT_BASE + 19,
 
+	ISCSI_UEVENT_PATH_UPDATE	= UEVENT_BASE + 20,
+
 	/* up events */
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
 	ISCSI_KEVENT_CONN_ERROR		= KEVENT_BASE + 2,
@@ -60,6 +67,9 @@ enum iscsi_uevent_e {
 	ISCSI_KEVENT_DESTROY_SESSION	= KEVENT_BASE + 4,
 	ISCSI_KEVENT_UNBIND_SESSION	= KEVENT_BASE + 5,
 	ISCSI_KEVENT_CREATE_SESSION	= KEVENT_BASE + 6,
+
+	ISCSI_KEVENT_PATH_REQ		= KEVENT_BASE + 7,
+	ISCSI_KEVENT_IF_DOWN		= KEVENT_BASE + 8,
 };
 
 enum iscsi_tgt_dscvr {
@@ -159,6 +169,9 @@ struct iscsi_uevent {
 			uint32_t	param; /* enum iscsi_host_param */
 			uint32_t	len;
 		} set_host_param;
+		struct msg_set_path {
+			uint32_t	host_no;
+		} set_path;
 	} u;
 	union {
 		/* messages k -> u */
@@ -192,10 +205,39 @@ struct iscsi_uevent {
 		struct msg_transport_connect_ret {
 			uint64_t	handle;
 		} ep_connect_ret;
+		struct msg_req_path {
+			uint32_t	host_no;
+		} req_path;
+		struct msg_notify_if_down {
+			uint32_t	host_no;
+		} notify_if_down;
 	} r;
 } __attribute__ ((aligned (sizeof(uint64_t))));
 
 /*
+ * To keep the struct iscsi_uevent size the same for userspace code
+ * compatibility, the main structure for ISCSI_UEVENT_PATH_UPDATE and
+ * ISCSI_KEVENT_PATH_REQ is defined separately and comes after the
+ * struct iscsi_uevent in the NETLINK_ISCSI message.
+ */
+struct iscsi_path {
+	uint64_t	handle;
+	uint8_t		mac_addr[6];
+	uint8_t		mac_addr_old[6];
+	uint32_t	ip_addr_len;	/* 4 or 16 */
+	union {
+		struct in_addr	v4_addr;
+		struct in6_addr	v6_addr;
+	} src;
+	union {
+		struct in_addr	v4_addr;
+		struct in6_addr	v6_addr;
+	} dst;
+	uint16_t	vlan_id;
+	uint16_t	pmtu;
+} __attribute__ ((aligned (sizeof(uint64_t))));
+
+/*
  * Common error codes
  */
 enum iscsi_err {
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 8cb7a31..349c7f3 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -133,6 +133,7 @@ struct iscsi_transport {
 	void (*ep_disconnect) (struct iscsi_endpoint *ep);
 	int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
 			  uint32_t enable, struct sockaddr *dst_addr);
+	int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params);
 };
 
 /*
@@ -149,6 +150,10 @@ extern void iscsi_conn_error_event(struct iscsi_cls_conn *conn,
 extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 			  char *data, uint32_t data_size);
 
+extern int iscsi_offload_mesg(struct Scsi_Host *shost,
+			      struct iscsi_transport *transport, uint32_t type,
+			      char *data, uint16_t data_size);
+
 struct iscsi_cls_conn {
 	struct list_head conn_list;	/* item in connlist */
 	void *dd_data;			/* LLD private data */
-- 
cgit v0.10.2


From 4edd473f208cff77ce1f7ef26d5a41f31fa198e0 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 8 Jun 2009 18:14:42 -0700
Subject: [SCSI] bnx2: Add support for CNIC driver.

Add interface and functions to support a new CNIC driver to drive
the Broadcom bnx2 hardware for iSCSI offload.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index b0cb29d..3f5fcb0 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -49,6 +49,10 @@
 #include <linux/firmware.h>
 #include <linux/log2.h>
 
+#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+#define BCM_CNIC 1
+#include "cnic_if.h"
+#endif
 #include "bnx2.h"
 #include "bnx2_fw.h"
 
@@ -315,6 +319,158 @@ bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val)
 	spin_unlock_bh(&bp->indirect_lock);
 }
 
+#ifdef BCM_CNIC
+static int
+bnx2_drv_ctl(struct net_device *dev, struct drv_ctl_info *info)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct drv_ctl_io *io = &info->data.io;
+
+	switch (info->cmd) {
+	case DRV_CTL_IO_WR_CMD:
+		bnx2_reg_wr_ind(bp, io->offset, io->data);
+		break;
+	case DRV_CTL_IO_RD_CMD:
+		io->data = bnx2_reg_rd_ind(bp, io->offset);
+		break;
+	case DRV_CTL_CTX_WR_CMD:
+		bnx2_ctx_wr(bp, io->cid_addr, io->offset, io->data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void bnx2_setup_cnic_irq_info(struct bnx2 *bp)
+{
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+	struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
+	int sb_id;
+
+	if (bp->flags & BNX2_FLAG_USING_MSIX) {
+		cp->drv_state |= CNIC_DRV_STATE_USING_MSIX;
+		bnapi->cnic_present = 0;
+		sb_id = bp->irq_nvecs;
+		cp->irq_arr[0].irq_flags |= CNIC_IRQ_FL_MSIX;
+	} else {
+		cp->drv_state &= ~CNIC_DRV_STATE_USING_MSIX;
+		bnapi->cnic_tag = bnapi->last_status_idx;
+		bnapi->cnic_present = 1;
+		sb_id = 0;
+		cp->irq_arr[0].irq_flags &= ~CNIC_IRQ_FL_MSIX;
+	}
+
+	cp->irq_arr[0].vector = bp->irq_tbl[sb_id].vector;
+	cp->irq_arr[0].status_blk = (void *)
+		((unsigned long) bnapi->status_blk.msi +
+		(BNX2_SBLK_MSIX_ALIGN_SIZE * sb_id));
+	cp->irq_arr[0].status_blk_num = sb_id;
+	cp->num_irq = 1;
+}
+
+static int bnx2_register_cnic(struct net_device *dev, struct cnic_ops *ops,
+			      void *data)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+	if (ops == NULL)
+		return -EINVAL;
+
+	if (cp->drv_state & CNIC_DRV_STATE_REGD)
+		return -EBUSY;
+
+	bp->cnic_data = data;
+	rcu_assign_pointer(bp->cnic_ops, ops);
+
+	cp->num_irq = 0;
+	cp->drv_state = CNIC_DRV_STATE_REGD;
+
+	bnx2_setup_cnic_irq_info(bp);
+
+	return 0;
+}
+
+static int bnx2_unregister_cnic(struct net_device *dev)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+	cp->drv_state = 0;
+	bnapi->cnic_present = 0;
+	rcu_assign_pointer(bp->cnic_ops, NULL);
+	synchronize_rcu();
+	return 0;
+}
+
+struct cnic_eth_dev *bnx2_cnic_probe(struct net_device *dev)
+{
+	struct bnx2 *bp = netdev_priv(dev);
+	struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
+
+	cp->drv_owner = THIS_MODULE;
+	cp->chip_id = bp->chip_id;
+	cp->pdev = bp->pdev;
+	cp->io_base = bp->regview;
+	cp->drv_ctl = bnx2_drv_ctl;
+	cp->drv_register_cnic = bnx2_register_cnic;
+	cp->drv_unregister_cnic = bnx2_unregister_cnic;
+
+	return cp;
+}
+EXPORT_SYMBOL(bnx2_cnic_probe);
+
+static void
+bnx2_cnic_stop(struct bnx2 *bp)
+{
+	struct cnic_ops *c_ops;
+	struct cnic_ctl_info info;
+
+	rcu_read_lock();
+	c_ops = rcu_dereference(bp->cnic_ops);
+	if (c_ops) {
+		info.cmd = CNIC_CTL_STOP_CMD;
+		c_ops->cnic_ctl(bp->cnic_data, &info);
+	}
+	rcu_read_unlock();
+}
+
+static void
+bnx2_cnic_start(struct bnx2 *bp)
+{
+	struct cnic_ops *c_ops;
+	struct cnic_ctl_info info;
+
+	rcu_read_lock();
+	c_ops = rcu_dereference(bp->cnic_ops);
+	if (c_ops) {
+		if (!(bp->flags & BNX2_FLAG_USING_MSIX)) {
+			struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
+
+			bnapi->cnic_tag = bnapi->last_status_idx;
+		}
+		info.cmd = CNIC_CTL_START_CMD;
+		c_ops->cnic_ctl(bp->cnic_data, &info);
+	}
+	rcu_read_unlock();
+}
+
+#else
+
+static void
+bnx2_cnic_stop(struct bnx2 *bp)
+{
+}
+
+static void
+bnx2_cnic_start(struct bnx2 *bp)
+{
+}
+
+#endif
+
 static int
 bnx2_read_phy(struct bnx2 *bp, u32 reg, u32 *val)
 {
@@ -488,6 +644,7 @@ bnx2_napi_enable(struct bnx2 *bp)
 static void
 bnx2_netif_stop(struct bnx2 *bp)
 {
+	bnx2_cnic_stop(bp);
 	bnx2_disable_int_sync(bp);
 	if (netif_running(bp->dev)) {
 		bnx2_napi_disable(bp);
@@ -504,6 +661,7 @@ bnx2_netif_start(struct bnx2 *bp)
 			netif_tx_wake_all_queues(bp->dev);
 			bnx2_napi_enable(bp);
 			bnx2_enable_int(bp);
+			bnx2_cnic_start(bp);
 		}
 	}
 }
@@ -3164,6 +3322,11 @@ bnx2_has_work(struct bnx2_napi *bnapi)
 	if (bnx2_has_fast_work(bnapi))
 		return 1;
 
+#ifdef BCM_CNIC
+	if (bnapi->cnic_present && (bnapi->cnic_tag != sblk->status_idx))
+		return 1;
+#endif
+
 	if ((sblk->status_attn_bits & STATUS_ATTN_EVENTS) !=
 	    (sblk->status_attn_bits_ack & STATUS_ATTN_EVENTS))
 		return 1;
@@ -3193,6 +3356,23 @@ bnx2_chk_missed_msi(struct bnx2 *bp)
 	bp->idle_chk_status_idx = bnapi->last_status_idx;
 }
 
+#ifdef BCM_CNIC
+static void bnx2_poll_cnic(struct bnx2 *bp, struct bnx2_napi *bnapi)
+{
+	struct cnic_ops *c_ops;
+
+	if (!bnapi->cnic_present)
+		return;
+
+	rcu_read_lock();
+	c_ops = rcu_dereference(bp->cnic_ops);
+	if (c_ops)
+		bnapi->cnic_tag = c_ops->cnic_handler(bp->cnic_data,
+						      bnapi->status_blk.msi);
+	rcu_read_unlock();
+}
+#endif
+
 static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi)
 {
 	struct status_block *sblk = bnapi->status_blk.msi;
@@ -3267,6 +3447,10 @@ static int bnx2_poll(struct napi_struct *napi, int budget)
 
 		work_done = bnx2_poll_work(bp, bnapi, work_done, budget);
 
+#ifdef BCM_CNIC
+		bnx2_poll_cnic(bp, bnapi);
+#endif
+
 		/* bnapi->last_status_idx is used below to tell the hw how
 		 * much work has been processed, so we must read it before
 		 * checking for more work.
@@ -4632,8 +4816,11 @@ bnx2_init_chip(struct bnx2 *bp)
 	val = REG_RD(bp, BNX2_MQ_CONFIG);
 	val &= ~BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE;
 	val |= BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE_256;
-	if (CHIP_ID(bp) == CHIP_ID_5709_A0 || CHIP_ID(bp) == CHIP_ID_5709_A1)
-		val |= BNX2_MQ_CONFIG_HALT_DIS;
+	if (CHIP_NUM(bp) == CHIP_NUM_5709) {
+		val |= BNX2_MQ_CONFIG_BIN_MQ_MODE;
+		if (CHIP_REV(bp) == CHIP_REV_Ax)
+			val |= BNX2_MQ_CONFIG_HALT_DIS;
+	}
 
 	REG_WR(bp, BNX2_MQ_CONFIG, val);
 
@@ -7471,7 +7658,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 	INIT_WORK(&bp->reset_task, bnx2_reset_task);
 
 	dev->base_addr = dev->mem_start = pci_resource_start(pdev, 0);
-	mem_len = MB_GET_CID_ADDR(TX_TSS_CID + TX_MAX_TSS_RINGS);
+	mem_len = MB_GET_CID_ADDR(TX_TSS_CID + TX_MAX_TSS_RINGS + 1);
 	dev->mem_end = dev->mem_start + mem_len;
 	dev->irq = pdev->irq;
 
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index 5b570e1..a1ff739 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -361,6 +361,9 @@ struct l2_fhdr {
 #define BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE	 (1<<28)
 
 #define BNX2_L2CTX_HOST_BDIDX				0x00000004
+#define BNX2_L2CTX_STATUSB_NUM_SHIFT			 16
+#define BNX2_L2CTX_STATUSB_NUM(sb_id)			 \
+	(((sb_id) > 0) ? (((sb_id) + 7) << BNX2_L2CTX_STATUSB_NUM_SHIFT) : 0)
 #define BNX2_L2CTX_HOST_BSEQ				0x00000008
 #define BNX2_L2CTX_NX_BSEQ				0x0000000c
 #define BNX2_L2CTX_NX_BDHADDR_HI			0x00000010
@@ -5900,6 +5903,7 @@ struct l2_fhdr {
 #define BNX2_RXP_FTQ_CTL_CUR_DEPTH			 (0x3ffL<<22)
 
 #define BNX2_RXP_SCRATCH				0x000e0000
+#define BNX2_RXP_SCRATCH_RXP_FLOOD			 0x000e0024
 #define BNX2_RXP_SCRATCH_RSS_TBL_SZ			 0x000e0038
 #define BNX2_RXP_SCRATCH_RSS_TBL			 0x000e003c
 #define BNX2_RXP_SCRATCH_RSS_TBL_MAX_ENTRIES		 128
@@ -6678,6 +6682,11 @@ struct bnx2_napi {
 	u32 			last_status_idx;
 	u32			int_num;
 
+#ifdef BCM_CNIC
+	u32			cnic_tag;
+	int			cnic_present;
+#endif
+
 	struct bnx2_rx_ring_info	rx_ring;
 	struct bnx2_tx_ring_info	tx_ring;
 };
@@ -6727,6 +6736,11 @@ struct bnx2 {
 	int		tx_ring_size;
 	u32		tx_wake_thresh;
 
+#ifdef BCM_CNIC
+	struct cnic_ops		*cnic_ops;
+	void			*cnic_data;
+#endif
+
 	/* End of fields used in the performance code paths. */
 
 	unsigned int		current_interval;
@@ -6885,6 +6899,10 @@ struct bnx2 {
 
 	u32			idle_chk_status_idx;
 
+#ifdef BCM_CNIC
+	struct cnic_eth_dev	cnic_eth_dev;
+#endif
+
 	const struct firmware	*mips_firmware;
 	const struct firmware	*rv2p_firmware;
 };
-- 
cgit v0.10.2


From a463696039f7097ce87c21db3cf5c16cdcb3850d Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 8 Jun 2009 18:14:43 -0700
Subject: [SCSI] cnic: Add new Broadcom CNIC driver.

The CNIC driver controls BNX2 hardware rings and resources used by
iSCSI.  Most hardware resources for iSCSI are separate from those
used for ethernet networking.

iSCSI uses a separate MAC address and IP address.  The CNIC driver
creates a UIO interface to handle the non-offloaded packets such as
ARP, etc in userspace.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 214a92d..f3c4a3b 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2264,6 +2264,17 @@ config BNX2
 	  To compile this driver as a module, choose M here: the module
 	  will be called bnx2.  This is recommended.
 
+config CNIC
+	tristate "Broadcom CNIC support"
+	depends on BNX2
+	depends on UIO
+	help
+	  This driver supports offload features of Broadcom NetXtremeII
+	  gigabit Ethernet cards.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called cnic.  This is recommended.
+
 config SPIDER_NET
 	tristate "Spider Gigabit Ethernet driver"
 	depends on PCI && (PPC_IBM_CELL_BLADE || PPC_CELLEB)
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 1fc4602..e6f1f8c 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -73,6 +73,7 @@ obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_TIGON3) += tg3.o
 obj-$(CONFIG_BNX2) += bnx2.o
+obj-$(CONFIG_CNIC) += cnic.o
 obj-$(CONFIG_BNX2X) += bnx2x.o
 bnx2x-objs := bnx2x_main.o bnx2x_link.o
 spidernet-y += spider_net.o spider_net_ethtool.o
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
new file mode 100644
index 0000000..8d74037
--- /dev/null
+++ b/drivers/net/cnic.c
@@ -0,0 +1,2711 @@
+/* cnic.c: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006-2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Original skeleton written by: John(Zongxi) Chen (zongxi@broadcom.com)
+ * Modified and maintained by: Michael Chan <mchan@broadcom.com>
+ */
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/uio_driver.h>
+#include <linux/in.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#define BCM_VLAN 1
+#endif
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <scsi/iscsi_if.h>
+
+#include "cnic_if.h"
+#include "bnx2.h"
+#include "cnic.h"
+#include "cnic_defs.h"
+
+#define DRV_MODULE_NAME		"cnic"
+#define PFX DRV_MODULE_NAME	": "
+
+static char version[] __devinitdata =
+	"Broadcom NetXtreme II CNIC Driver " DRV_MODULE_NAME " v" CNIC_MODULE_VERSION " (" CNIC_MODULE_RELDATE ")\n";
+
+MODULE_AUTHOR("Michael Chan <mchan@broadcom.com> and John(Zongxi) "
+	      "Chen (zongxi@broadcom.com");
+MODULE_DESCRIPTION("Broadcom NetXtreme II CNIC Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CNIC_MODULE_VERSION);
+
+static LIST_HEAD(cnic_dev_list);
+static DEFINE_RWLOCK(cnic_dev_lock);
+static DEFINE_MUTEX(cnic_lock);
+
+static struct cnic_ulp_ops *cnic_ulp_tbl[MAX_CNIC_ULP_TYPE];
+
+static int cnic_service_bnx2(void *, void *);
+static int cnic_ctl(void *, struct cnic_ctl_info *);
+
+static struct cnic_ops cnic_bnx2_ops = {
+	.cnic_owner	= THIS_MODULE,
+	.cnic_handler	= cnic_service_bnx2,
+	.cnic_ctl	= cnic_ctl,
+};
+
+static void cnic_shutdown_bnx2_rx_ring(struct cnic_dev *);
+static void cnic_init_bnx2_tx_ring(struct cnic_dev *);
+static void cnic_init_bnx2_rx_ring(struct cnic_dev *);
+static int cnic_cm_set_pg(struct cnic_sock *);
+
+static int cnic_uio_open(struct uio_info *uinfo, struct inode *inode)
+{
+	struct cnic_dev *dev = uinfo->priv;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (cp->uio_dev != -1)
+		return -EBUSY;
+
+	cp->uio_dev = iminor(inode);
+
+	cnic_shutdown_bnx2_rx_ring(dev);
+
+	cnic_init_bnx2_tx_ring(dev);
+	cnic_init_bnx2_rx_ring(dev);
+
+	return 0;
+}
+
+static int cnic_uio_close(struct uio_info *uinfo, struct inode *inode)
+{
+	struct cnic_dev *dev = uinfo->priv;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	cp->uio_dev = -1;
+	return 0;
+}
+
+static inline void cnic_hold(struct cnic_dev *dev)
+{
+	atomic_inc(&dev->ref_count);
+}
+
+static inline void cnic_put(struct cnic_dev *dev)
+{
+	atomic_dec(&dev->ref_count);
+}
+
+static inline void csk_hold(struct cnic_sock *csk)
+{
+	atomic_inc(&csk->ref_count);
+}
+
+static inline void csk_put(struct cnic_sock *csk)
+{
+	atomic_dec(&csk->ref_count);
+}
+
+static struct cnic_dev *cnic_from_netdev(struct net_device *netdev)
+{
+	struct cnic_dev *cdev;
+
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(cdev, &cnic_dev_list, list) {
+		if (netdev == cdev->netdev) {
+			cnic_hold(cdev);
+			read_unlock(&cnic_dev_lock);
+			return cdev;
+		}
+	}
+	read_unlock(&cnic_dev_lock);
+	return NULL;
+}
+
+static void cnic_ctx_wr(struct cnic_dev *dev, u32 cid_addr, u32 off, u32 val)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+	struct drv_ctl_io *io = &info.data.io;
+
+	info.cmd = DRV_CTL_CTX_WR_CMD;
+	io->cid_addr = cid_addr;
+	io->offset = off;
+	io->data = val;
+	ethdev->drv_ctl(dev->netdev, &info);
+}
+
+static void cnic_reg_wr_ind(struct cnic_dev *dev, u32 off, u32 val)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+	struct drv_ctl_io *io = &info.data.io;
+
+	info.cmd = DRV_CTL_IO_WR_CMD;
+	io->offset = off;
+	io->data = val;
+	ethdev->drv_ctl(dev->netdev, &info);
+}
+
+static u32 cnic_reg_rd_ind(struct cnic_dev *dev, u32 off)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+	struct drv_ctl_io *io = &info.data.io;
+
+	info.cmd = DRV_CTL_IO_RD_CMD;
+	io->offset = off;
+	ethdev->drv_ctl(dev->netdev, &info);
+	return io->data;
+}
+
+static int cnic_in_use(struct cnic_sock *csk)
+{
+	return test_bit(SK_F_INUSE, &csk->flags);
+}
+
+static void cnic_kwq_completion(struct cnic_dev *dev, u32 count)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct drv_ctl_info info;
+
+	info.cmd = DRV_CTL_COMPLETION_CMD;
+	info.data.comp.comp_count = count;
+	ethdev->drv_ctl(dev->netdev, &info);
+}
+
+static int cnic_send_nlmsg(struct cnic_local *cp, u32 type,
+			   struct cnic_sock *csk)
+{
+	struct iscsi_path path_req;
+	char *buf = NULL;
+	u16 len = 0;
+	u32 msg_type = ISCSI_KEVENT_IF_DOWN;
+	struct cnic_ulp_ops *ulp_ops;
+
+	if (cp->uio_dev == -1)
+		return -ENODEV;
+
+	if (csk) {
+		len = sizeof(path_req);
+		buf = (char *) &path_req;
+		memset(&path_req, 0, len);
+
+		msg_type = ISCSI_KEVENT_PATH_REQ;
+		path_req.handle = (u64) csk->l5_cid;
+		if (test_bit(SK_F_IPV6, &csk->flags)) {
+			memcpy(&path_req.dst.v6_addr, &csk->dst_ip[0],
+			       sizeof(struct in6_addr));
+			path_req.ip_addr_len = 16;
+		} else {
+			memcpy(&path_req.dst.v4_addr, &csk->dst_ip[0],
+			       sizeof(struct in_addr));
+			path_req.ip_addr_len = 4;
+		}
+		path_req.vlan_id = csk->vlan_id;
+		path_req.pmtu = csk->mtu;
+	}
+
+	rcu_read_lock();
+	ulp_ops = rcu_dereference(cp->ulp_ops[CNIC_ULP_ISCSI]);
+	if (ulp_ops)
+		ulp_ops->iscsi_nl_send_msg(cp->dev, msg_type, buf, len);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int cnic_iscsi_nl_msg_recv(struct cnic_dev *dev, u32 msg_type,
+				  char *buf, u16 len)
+{
+	int rc = -EINVAL;
+
+	switch (msg_type) {
+	case ISCSI_UEVENT_PATH_UPDATE: {
+		struct cnic_local *cp;
+		u32 l5_cid;
+		struct cnic_sock *csk;
+		struct iscsi_path *path_resp;
+
+		if (len < sizeof(*path_resp))
+			break;
+
+		path_resp = (struct iscsi_path *) buf;
+		cp = dev->cnic_priv;
+		l5_cid = (u32) path_resp->handle;
+		if (l5_cid >= MAX_CM_SK_TBL_SZ)
+			break;
+
+		csk = &cp->csk_tbl[l5_cid];
+		csk_hold(csk);
+		if (cnic_in_use(csk)) {
+			memcpy(csk->ha, path_resp->mac_addr, 6);
+			if (test_bit(SK_F_IPV6, &csk->flags))
+				memcpy(&csk->src_ip[0], &path_resp->src.v6_addr,
+				       sizeof(struct in6_addr));
+			else
+				memcpy(&csk->src_ip[0], &path_resp->src.v4_addr,
+				       sizeof(struct in_addr));
+			if (is_valid_ether_addr(csk->ha))
+				cnic_cm_set_pg(csk);
+		}
+		csk_put(csk);
+		rc = 0;
+	}
+	}
+
+	return rc;
+}
+
+static int cnic_offld_prep(struct cnic_sock *csk)
+{
+	if (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
+		return 0;
+
+	if (!test_bit(SK_F_CONNECT_START, &csk->flags)) {
+		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int cnic_close_prep(struct cnic_sock *csk)
+{
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	smp_mb__after_clear_bit();
+
+	if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags)) {
+		while (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
+			msleep(1);
+
+		return 1;
+	}
+	return 0;
+}
+
+static int cnic_abort_prep(struct cnic_sock *csk)
+{
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	smp_mb__after_clear_bit();
+
+	while (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
+		msleep(1);
+
+	if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags)) {
+		csk->state = L4_KCQE_OPCODE_VALUE_RESET_COMP;
+		return 1;
+	}
+
+	return 0;
+}
+
+int cnic_register_driver(int ulp_type, struct cnic_ulp_ops *ulp_ops)
+{
+	struct cnic_dev *dev;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_register_driver: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (cnic_ulp_tbl[ulp_type]) {
+		printk(KERN_ERR PFX "cnic_register_driver: Type %d has already "
+				    "been registered\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EBUSY;
+	}
+
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(dev, &cnic_dev_list, list) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		clear_bit(ULP_F_INIT, &cp->ulp_flags[ulp_type]);
+	}
+	read_unlock(&cnic_dev_lock);
+
+	rcu_assign_pointer(cnic_ulp_tbl[ulp_type], ulp_ops);
+	mutex_unlock(&cnic_lock);
+
+	/* Prevent race conditions with netdev_event */
+	rtnl_lock();
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(dev, &cnic_dev_list, list) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		if (!test_and_set_bit(ULP_F_INIT, &cp->ulp_flags[ulp_type]))
+			ulp_ops->cnic_init(dev);
+	}
+	read_unlock(&cnic_dev_lock);
+	rtnl_unlock();
+
+	return 0;
+}
+
+int cnic_unregister_driver(int ulp_type)
+{
+	struct cnic_dev *dev;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_unregister_driver: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (!cnic_ulp_tbl[ulp_type]) {
+		printk(KERN_ERR PFX "cnic_unregister_driver: Type %d has not "
+				    "been registered\n", ulp_type);
+		goto out_unlock;
+	}
+	read_lock(&cnic_dev_lock);
+	list_for_each_entry(dev, &cnic_dev_list, list) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		if (rcu_dereference(cp->ulp_ops[ulp_type])) {
+			printk(KERN_ERR PFX "cnic_unregister_driver: Type %d "
+			       "still has devices registered\n", ulp_type);
+			read_unlock(&cnic_dev_lock);
+			goto out_unlock;
+		}
+	}
+	read_unlock(&cnic_dev_lock);
+
+	rcu_assign_pointer(cnic_ulp_tbl[ulp_type], NULL);
+
+	mutex_unlock(&cnic_lock);
+	synchronize_rcu();
+	return 0;
+
+out_unlock:
+	mutex_unlock(&cnic_lock);
+	return -EINVAL;
+}
+
+static int cnic_start_hw(struct cnic_dev *);
+static void cnic_stop_hw(struct cnic_dev *);
+
+static int cnic_register_device(struct cnic_dev *dev, int ulp_type,
+				void *ulp_ctx)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_ulp_ops *ulp_ops;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_register_device: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (cnic_ulp_tbl[ulp_type] == NULL) {
+		printk(KERN_ERR PFX "cnic_register_device: Driver with type %d "
+				    "has not been registered\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EAGAIN;
+	}
+	if (rcu_dereference(cp->ulp_ops[ulp_type])) {
+		printk(KERN_ERR PFX "cnic_register_device: Type %d has already "
+		       "been registered to this device\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EBUSY;
+	}
+
+	clear_bit(ULP_F_START, &cp->ulp_flags[ulp_type]);
+	cp->ulp_handle[ulp_type] = ulp_ctx;
+	ulp_ops = cnic_ulp_tbl[ulp_type];
+	rcu_assign_pointer(cp->ulp_ops[ulp_type], ulp_ops);
+	cnic_hold(dev);
+
+	if (test_bit(CNIC_F_CNIC_UP, &dev->flags))
+		if (!test_and_set_bit(ULP_F_START, &cp->ulp_flags[ulp_type]))
+			ulp_ops->cnic_start(cp->ulp_handle[ulp_type]);
+
+	mutex_unlock(&cnic_lock);
+
+	return 0;
+
+}
+EXPORT_SYMBOL(cnic_register_driver);
+
+static int cnic_unregister_device(struct cnic_dev *dev, int ulp_type)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	if (ulp_type >= MAX_CNIC_ULP_TYPE) {
+		printk(KERN_ERR PFX "cnic_unregister_device: Bad type %d\n",
+		       ulp_type);
+		return -EINVAL;
+	}
+	mutex_lock(&cnic_lock);
+	if (rcu_dereference(cp->ulp_ops[ulp_type])) {
+		rcu_assign_pointer(cp->ulp_ops[ulp_type], NULL);
+		cnic_put(dev);
+	} else {
+		printk(KERN_ERR PFX "cnic_unregister_device: device not "
+		       "registered to this ulp type %d\n", ulp_type);
+		mutex_unlock(&cnic_lock);
+		return -EINVAL;
+	}
+	mutex_unlock(&cnic_lock);
+
+	synchronize_rcu();
+
+	return 0;
+}
+EXPORT_SYMBOL(cnic_unregister_driver);
+
+static int cnic_init_id_tbl(struct cnic_id_tbl *id_tbl, u32 size, u32 start_id)
+{
+	id_tbl->start = start_id;
+	id_tbl->max = size;
+	id_tbl->next = 0;
+	spin_lock_init(&id_tbl->lock);
+	id_tbl->table = kzalloc(DIV_ROUND_UP(size, 32) * 4, GFP_KERNEL);
+	if (!id_tbl->table)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void cnic_free_id_tbl(struct cnic_id_tbl *id_tbl)
+{
+	kfree(id_tbl->table);
+	id_tbl->table = NULL;
+}
+
+static int cnic_alloc_id(struct cnic_id_tbl *id_tbl, u32 id)
+{
+	int ret = -1;
+
+	id -= id_tbl->start;
+	if (id >= id_tbl->max)
+		return ret;
+
+	spin_lock(&id_tbl->lock);
+	if (!test_bit(id, id_tbl->table)) {
+		set_bit(id, id_tbl->table);
+		ret = 0;
+	}
+	spin_unlock(&id_tbl->lock);
+	return ret;
+}
+
+/* Returns -1 if not successful */
+static u32 cnic_alloc_new_id(struct cnic_id_tbl *id_tbl)
+{
+	u32 id;
+
+	spin_lock(&id_tbl->lock);
+	id = find_next_zero_bit(id_tbl->table, id_tbl->max, id_tbl->next);
+	if (id >= id_tbl->max) {
+		id = -1;
+		if (id_tbl->next != 0) {
+			id = find_first_zero_bit(id_tbl->table, id_tbl->next);
+			if (id >= id_tbl->next)
+				id = -1;
+		}
+	}
+
+	if (id < id_tbl->max) {
+		set_bit(id, id_tbl->table);
+		id_tbl->next = (id + 1) & (id_tbl->max - 1);
+		id += id_tbl->start;
+	}
+
+	spin_unlock(&id_tbl->lock);
+
+	return id;
+}
+
+static void cnic_free_id(struct cnic_id_tbl *id_tbl, u32 id)
+{
+	if (id == -1)
+		return;
+
+	id -= id_tbl->start;
+	if (id >= id_tbl->max)
+		return;
+
+	clear_bit(id, id_tbl->table);
+}
+
+static void cnic_free_dma(struct cnic_dev *dev, struct cnic_dma *dma)
+{
+	int i;
+
+	if (!dma->pg_arr)
+		return;
+
+	for (i = 0; i < dma->num_pages; i++) {
+		if (dma->pg_arr[i]) {
+			pci_free_consistent(dev->pcidev, BCM_PAGE_SIZE,
+					    dma->pg_arr[i], dma->pg_map_arr[i]);
+			dma->pg_arr[i] = NULL;
+		}
+	}
+	if (dma->pgtbl) {
+		pci_free_consistent(dev->pcidev, dma->pgtbl_size,
+				    dma->pgtbl, dma->pgtbl_map);
+		dma->pgtbl = NULL;
+	}
+	kfree(dma->pg_arr);
+	dma->pg_arr = NULL;
+	dma->num_pages = 0;
+}
+
+static void cnic_setup_page_tbl(struct cnic_dev *dev, struct cnic_dma *dma)
+{
+	int i;
+	u32 *page_table = dma->pgtbl;
+
+	for (i = 0; i < dma->num_pages; i++) {
+		/* Each entry needs to be in big endian format. */
+		*page_table = (u32) ((u64) dma->pg_map_arr[i] >> 32);
+		page_table++;
+		*page_table = (u32) dma->pg_map_arr[i];
+		page_table++;
+	}
+}
+
+static int cnic_alloc_dma(struct cnic_dev *dev, struct cnic_dma *dma,
+			  int pages, int use_pg_tbl)
+{
+	int i, size;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	size = pages * (sizeof(void *) + sizeof(dma_addr_t));
+	dma->pg_arr = kzalloc(size, GFP_ATOMIC);
+	if (dma->pg_arr == NULL)
+		return -ENOMEM;
+
+	dma->pg_map_arr = (dma_addr_t *) (dma->pg_arr + pages);
+	dma->num_pages = pages;
+
+	for (i = 0; i < pages; i++) {
+		dma->pg_arr[i] = pci_alloc_consistent(dev->pcidev,
+						      BCM_PAGE_SIZE,
+						      &dma->pg_map_arr[i]);
+		if (dma->pg_arr[i] == NULL)
+			goto error;
+	}
+	if (!use_pg_tbl)
+		return 0;
+
+	dma->pgtbl_size = ((pages * 8) + BCM_PAGE_SIZE - 1) &
+			  ~(BCM_PAGE_SIZE - 1);
+	dma->pgtbl = pci_alloc_consistent(dev->pcidev, dma->pgtbl_size,
+					  &dma->pgtbl_map);
+	if (dma->pgtbl == NULL)
+		goto error;
+
+	cp->setup_pgtbl(dev, dma);
+
+	return 0;
+
+error:
+	cnic_free_dma(dev, dma);
+	return -ENOMEM;
+}
+
+static void cnic_free_resc(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int i = 0;
+
+	if (cp->cnic_uinfo) {
+		cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL);
+		while (cp->uio_dev != -1 && i < 15) {
+			msleep(100);
+			i++;
+		}
+		uio_unregister_device(cp->cnic_uinfo);
+		kfree(cp->cnic_uinfo);
+		cp->cnic_uinfo = NULL;
+	}
+
+	if (cp->l2_buf) {
+		pci_free_consistent(dev->pcidev, cp->l2_buf_size,
+				    cp->l2_buf, cp->l2_buf_map);
+		cp->l2_buf = NULL;
+	}
+
+	if (cp->l2_ring) {
+		pci_free_consistent(dev->pcidev, cp->l2_ring_size,
+				    cp->l2_ring, cp->l2_ring_map);
+		cp->l2_ring = NULL;
+	}
+
+	for (i = 0; i < cp->ctx_blks; i++) {
+		if (cp->ctx_arr[i].ctx) {
+			pci_free_consistent(dev->pcidev, cp->ctx_blk_size,
+					    cp->ctx_arr[i].ctx,
+					    cp->ctx_arr[i].mapping);
+			cp->ctx_arr[i].ctx = NULL;
+		}
+	}
+	kfree(cp->ctx_arr);
+	cp->ctx_arr = NULL;
+	cp->ctx_blks = 0;
+
+	cnic_free_dma(dev, &cp->gbl_buf_info);
+	cnic_free_dma(dev, &cp->conn_buf_info);
+	cnic_free_dma(dev, &cp->kwq_info);
+	cnic_free_dma(dev, &cp->kcq_info);
+	kfree(cp->iscsi_tbl);
+	cp->iscsi_tbl = NULL;
+	kfree(cp->ctx_tbl);
+	cp->ctx_tbl = NULL;
+
+	cnic_free_id_tbl(&cp->cid_tbl);
+}
+
+static int cnic_alloc_context(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	if (CHIP_NUM(cp) == CHIP_NUM_5709) {
+		int i, k, arr_size;
+
+		cp->ctx_blk_size = BCM_PAGE_SIZE;
+		cp->cids_per_blk = BCM_PAGE_SIZE / 128;
+		arr_size = BNX2_MAX_CID / cp->cids_per_blk *
+			   sizeof(struct cnic_ctx);
+		cp->ctx_arr = kzalloc(arr_size, GFP_KERNEL);
+		if (cp->ctx_arr == NULL)
+			return -ENOMEM;
+
+		k = 0;
+		for (i = 0; i < 2; i++) {
+			u32 j, reg, off, lo, hi;
+
+			if (i == 0)
+				off = BNX2_PG_CTX_MAP;
+			else
+				off = BNX2_ISCSI_CTX_MAP;
+
+			reg = cnic_reg_rd_ind(dev, off);
+			lo = reg >> 16;
+			hi = reg & 0xffff;
+			for (j = lo; j < hi; j += cp->cids_per_blk, k++)
+				cp->ctx_arr[k].cid = j;
+		}
+
+		cp->ctx_blks = k;
+		if (cp->ctx_blks >= (BNX2_MAX_CID / cp->cids_per_blk)) {
+			cp->ctx_blks = 0;
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < cp->ctx_blks; i++) {
+			cp->ctx_arr[i].ctx =
+				pci_alloc_consistent(dev->pcidev, BCM_PAGE_SIZE,
+						     &cp->ctx_arr[i].mapping);
+			if (cp->ctx_arr[i].ctx == NULL)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static int cnic_alloc_bnx2_resc(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct uio_info *uinfo;
+	int ret;
+
+	ret = cnic_alloc_dma(dev, &cp->kwq_info, KWQ_PAGE_CNT, 1);
+	if (ret)
+		goto error;
+	cp->kwq = (struct kwqe **) cp->kwq_info.pg_arr;
+
+	ret = cnic_alloc_dma(dev, &cp->kcq_info, KCQ_PAGE_CNT, 1);
+	if (ret)
+		goto error;
+	cp->kcq = (struct kcqe **) cp->kcq_info.pg_arr;
+
+	ret = cnic_alloc_context(dev);
+	if (ret)
+		goto error;
+
+	cp->l2_ring_size = 2 * BCM_PAGE_SIZE;
+	cp->l2_ring = pci_alloc_consistent(dev->pcidev, cp->l2_ring_size,
+					   &cp->l2_ring_map);
+	if (!cp->l2_ring)
+		goto error;
+
+	cp->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
+	cp->l2_buf_size = PAGE_ALIGN(cp->l2_buf_size);
+	cp->l2_buf = pci_alloc_consistent(dev->pcidev, cp->l2_buf_size,
+					   &cp->l2_buf_map);
+	if (!cp->l2_buf)
+		goto error;
+
+	uinfo = kzalloc(sizeof(*uinfo), GFP_ATOMIC);
+	if (!uinfo)
+		goto error;
+
+	uinfo->mem[0].addr = dev->netdev->base_addr;
+	uinfo->mem[0].internal_addr = dev->regview;
+	uinfo->mem[0].size = dev->netdev->mem_end - dev->netdev->mem_start;
+	uinfo->mem[0].memtype = UIO_MEM_PHYS;
+
+	uinfo->mem[1].addr = (unsigned long) cp->status_blk & PAGE_MASK;
+	if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX)
+		uinfo->mem[1].size = BNX2_SBLK_MSIX_ALIGN_SIZE * 9;
+	else
+		uinfo->mem[1].size = BNX2_SBLK_MSIX_ALIGN_SIZE;
+	uinfo->mem[1].memtype = UIO_MEM_LOGICAL;
+
+	uinfo->mem[2].addr = (unsigned long) cp->l2_ring;
+	uinfo->mem[2].size = cp->l2_ring_size;
+	uinfo->mem[2].memtype = UIO_MEM_LOGICAL;
+
+	uinfo->mem[3].addr = (unsigned long) cp->l2_buf;
+	uinfo->mem[3].size = cp->l2_buf_size;
+	uinfo->mem[3].memtype = UIO_MEM_LOGICAL;
+
+	uinfo->name = "bnx2_cnic";
+	uinfo->version = CNIC_MODULE_VERSION;
+	uinfo->irq = UIO_IRQ_CUSTOM;
+
+	uinfo->open = cnic_uio_open;
+	uinfo->release = cnic_uio_close;
+
+	uinfo->priv = dev;
+
+	ret = uio_register_device(&dev->pcidev->dev, uinfo);
+	if (ret) {
+		kfree(uinfo);
+		goto error;
+	}
+
+	cp->cnic_uinfo = uinfo;
+
+	return 0;
+
+error:
+	cnic_free_resc(dev);
+	return ret;
+}
+
+static inline u32 cnic_kwq_avail(struct cnic_local *cp)
+{
+	return cp->max_kwq_idx -
+		((cp->kwq_prod_idx - cp->kwq_con_idx) & cp->max_kwq_idx);
+}
+
+static int cnic_submit_bnx2_kwqes(struct cnic_dev *dev, struct kwqe *wqes[],
+				  u32 num_wqes)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct kwqe *prod_qe;
+	u16 prod, sw_prod, i;
+
+	if (!test_bit(CNIC_F_CNIC_UP, &dev->flags))
+		return -EAGAIN;		/* bnx2 is down */
+
+	spin_lock_bh(&cp->cnic_ulp_lock);
+	if (num_wqes > cnic_kwq_avail(cp) &&
+	    !(cp->cnic_local_flags & CNIC_LCL_FL_KWQ_INIT)) {
+		spin_unlock_bh(&cp->cnic_ulp_lock);
+		return -EAGAIN;
+	}
+
+	cp->cnic_local_flags &= ~CNIC_LCL_FL_KWQ_INIT;
+
+	prod = cp->kwq_prod_idx;
+	sw_prod = prod & MAX_KWQ_IDX;
+	for (i = 0; i < num_wqes; i++) {
+		prod_qe = &cp->kwq[KWQ_PG(sw_prod)][KWQ_IDX(sw_prod)];
+		memcpy(prod_qe, wqes[i], sizeof(struct kwqe));
+		prod++;
+		sw_prod = prod & MAX_KWQ_IDX;
+	}
+	cp->kwq_prod_idx = prod;
+
+	CNIC_WR16(dev, cp->kwq_io_addr, cp->kwq_prod_idx);
+
+	spin_unlock_bh(&cp->cnic_ulp_lock);
+	return 0;
+}
+
+static void service_kcqes(struct cnic_dev *dev, int num_cqes)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int i, j;
+
+	i = 0;
+	j = 1;
+	while (num_cqes) {
+		struct cnic_ulp_ops *ulp_ops;
+		int ulp_type;
+		u32 kcqe_op_flag = cp->completed_kcq[i]->kcqe_op_flag;
+		u32 kcqe_layer = kcqe_op_flag & KCQE_FLAGS_LAYER_MASK;
+
+		if (unlikely(kcqe_op_flag & KCQE_RAMROD_COMPLETION))
+			cnic_kwq_completion(dev, 1);
+
+		while (j < num_cqes) {
+			u32 next_op = cp->completed_kcq[i + j]->kcqe_op_flag;
+
+			if ((next_op & KCQE_FLAGS_LAYER_MASK) != kcqe_layer)
+				break;
+
+			if (unlikely(next_op & KCQE_RAMROD_COMPLETION))
+				cnic_kwq_completion(dev, 1);
+			j++;
+		}
+
+		if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L5_RDMA)
+			ulp_type = CNIC_ULP_RDMA;
+		else if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L5_ISCSI)
+			ulp_type = CNIC_ULP_ISCSI;
+		else if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L4)
+			ulp_type = CNIC_ULP_L4;
+		else if (kcqe_layer == KCQE_FLAGS_LAYER_MASK_L2)
+			goto end;
+		else {
+			printk(KERN_ERR PFX "%s: Unknown type of KCQE(0x%x)\n",
+			       dev->netdev->name, kcqe_op_flag);
+			goto end;
+		}
+
+		rcu_read_lock();
+		ulp_ops = rcu_dereference(cp->ulp_ops[ulp_type]);
+		if (likely(ulp_ops)) {
+			ulp_ops->indicate_kcqes(cp->ulp_handle[ulp_type],
+						  cp->completed_kcq + i, j);
+		}
+		rcu_read_unlock();
+end:
+		num_cqes -= j;
+		i += j;
+		j = 1;
+	}
+	return;
+}
+
+static u16 cnic_bnx2_next_idx(u16 idx)
+{
+	return idx + 1;
+}
+
+static u16 cnic_bnx2_hw_idx(u16 idx)
+{
+	return idx;
+}
+
+static int cnic_get_kcqes(struct cnic_dev *dev, u16 hw_prod, u16 *sw_prod)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u16 i, ri, last;
+	struct kcqe *kcqe;
+	int kcqe_cnt = 0, last_cnt = 0;
+
+	i = ri = last = *sw_prod;
+	ri &= MAX_KCQ_IDX;
+
+	while ((i != hw_prod) && (kcqe_cnt < MAX_COMPLETED_KCQE)) {
+		kcqe = &cp->kcq[KCQ_PG(ri)][KCQ_IDX(ri)];
+		cp->completed_kcq[kcqe_cnt++] = kcqe;
+		i = cp->next_idx(i);
+		ri = i & MAX_KCQ_IDX;
+		if (likely(!(kcqe->kcqe_op_flag & KCQE_FLAGS_NEXT))) {
+			last_cnt = kcqe_cnt;
+			last = i;
+		}
+	}
+
+	*sw_prod = last;
+	return last_cnt;
+}
+
+static void cnic_chk_bnx2_pkt_rings(struct cnic_local *cp)
+{
+	u16 rx_cons = *cp->rx_cons_ptr;
+	u16 tx_cons = *cp->tx_cons_ptr;
+
+	if (cp->tx_cons != tx_cons || cp->rx_cons != rx_cons) {
+		cp->tx_cons = tx_cons;
+		cp->rx_cons = rx_cons;
+		uio_event_notify(cp->cnic_uinfo);
+	}
+}
+
+static int cnic_service_bnx2(void *data, void *status_blk)
+{
+	struct cnic_dev *dev = data;
+	struct status_block *sblk = status_blk;
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 status_idx = sblk->status_idx;
+	u16 hw_prod, sw_prod;
+	int kcqe_cnt;
+
+	if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags)))
+		return status_idx;
+
+	cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
+
+	hw_prod = sblk->status_completion_producer_index;
+	sw_prod = cp->kcq_prod_idx;
+	while (sw_prod != hw_prod) {
+		kcqe_cnt = cnic_get_kcqes(dev, hw_prod, &sw_prod);
+		if (kcqe_cnt == 0)
+			goto done;
+
+		service_kcqes(dev, kcqe_cnt);
+
+		/* Tell compiler that status_blk fields can change. */
+		barrier();
+		if (status_idx != sblk->status_idx) {
+			status_idx = sblk->status_idx;
+			cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
+			hw_prod = sblk->status_completion_producer_index;
+		} else
+			break;
+	}
+
+done:
+	CNIC_WR16(dev, cp->kcq_io_addr, sw_prod);
+
+	cp->kcq_prod_idx = sw_prod;
+
+	cnic_chk_bnx2_pkt_rings(cp);
+	return status_idx;
+}
+
+static void cnic_service_bnx2_msix(unsigned long data)
+{
+	struct cnic_dev *dev = (struct cnic_dev *) data;
+	struct cnic_local *cp = dev->cnic_priv;
+	struct status_block_msix *status_blk = cp->bnx2_status_blk;
+	u32 status_idx = status_blk->status_idx;
+	u16 hw_prod, sw_prod;
+	int kcqe_cnt;
+
+	cp->kwq_con_idx = status_blk->status_cmd_consumer_index;
+
+	hw_prod = status_blk->status_completion_producer_index;
+	sw_prod = cp->kcq_prod_idx;
+	while (sw_prod != hw_prod) {
+		kcqe_cnt = cnic_get_kcqes(dev, hw_prod, &sw_prod);
+		if (kcqe_cnt == 0)
+			goto done;
+
+		service_kcqes(dev, kcqe_cnt);
+
+		/* Tell compiler that status_blk fields can change. */
+		barrier();
+		if (status_idx != status_blk->status_idx) {
+			status_idx = status_blk->status_idx;
+			cp->kwq_con_idx = status_blk->status_cmd_consumer_index;
+			hw_prod = status_blk->status_completion_producer_index;
+		} else
+			break;
+	}
+
+done:
+	CNIC_WR16(dev, cp->kcq_io_addr, sw_prod);
+	cp->kcq_prod_idx = sw_prod;
+
+	cnic_chk_bnx2_pkt_rings(cp);
+
+	cp->last_status_idx = status_idx;
+	CNIC_WR(dev, BNX2_PCICFG_INT_ACK_CMD, cp->int_num |
+		BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | cp->last_status_idx);
+}
+
+static irqreturn_t cnic_irq(int irq, void *dev_instance)
+{
+	struct cnic_dev *dev = dev_instance;
+	struct cnic_local *cp = dev->cnic_priv;
+	u16 prod = cp->kcq_prod_idx & MAX_KCQ_IDX;
+
+	if (cp->ack_int)
+		cp->ack_int(dev);
+
+	prefetch(cp->status_blk);
+	prefetch(&cp->kcq[KCQ_PG(prod)][KCQ_IDX(prod)]);
+
+	if (likely(test_bit(CNIC_F_CNIC_UP, &dev->flags)))
+		tasklet_schedule(&cp->cnic_irq_task);
+
+	return IRQ_HANDLED;
+}
+
+static void cnic_ulp_stop(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int if_type;
+
+	rcu_read_lock();
+	for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
+		if (!ulp_ops)
+			continue;
+
+		if (test_and_clear_bit(ULP_F_START, &cp->ulp_flags[if_type]))
+			ulp_ops->cnic_stop(cp->ulp_handle[if_type]);
+	}
+	rcu_read_unlock();
+}
+
+static void cnic_ulp_start(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int if_type;
+
+	rcu_read_lock();
+	for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
+		if (!ulp_ops || !ulp_ops->cnic_start)
+			continue;
+
+		if (!test_and_set_bit(ULP_F_START, &cp->ulp_flags[if_type]))
+			ulp_ops->cnic_start(cp->ulp_handle[if_type]);
+	}
+	rcu_read_unlock();
+}
+
+static int cnic_ctl(void *data, struct cnic_ctl_info *info)
+{
+	struct cnic_dev *dev = data;
+
+	switch (info->cmd) {
+	case CNIC_CTL_STOP_CMD:
+		cnic_hold(dev);
+		mutex_lock(&cnic_lock);
+
+		cnic_ulp_stop(dev);
+		cnic_stop_hw(dev);
+
+		mutex_unlock(&cnic_lock);
+		cnic_put(dev);
+		break;
+	case CNIC_CTL_START_CMD:
+		cnic_hold(dev);
+		mutex_lock(&cnic_lock);
+
+		if (!cnic_start_hw(dev))
+			cnic_ulp_start(dev);
+
+		mutex_unlock(&cnic_lock);
+		cnic_put(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void cnic_ulp_init(struct cnic_dev *dev)
+{
+	int i;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	rcu_read_lock();
+	for (i = 0; i < MAX_CNIC_ULP_TYPE_EXT; i++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cnic_ulp_tbl[i]);
+		if (!ulp_ops || !ulp_ops->cnic_init)
+			continue;
+
+		if (!test_and_set_bit(ULP_F_INIT, &cp->ulp_flags[i]))
+			ulp_ops->cnic_init(dev);
+
+	}
+	rcu_read_unlock();
+}
+
+static void cnic_ulp_exit(struct cnic_dev *dev)
+{
+	int i;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	rcu_read_lock();
+	for (i = 0; i < MAX_CNIC_ULP_TYPE_EXT; i++) {
+		struct cnic_ulp_ops *ulp_ops;
+
+		ulp_ops = rcu_dereference(cnic_ulp_tbl[i]);
+		if (!ulp_ops || !ulp_ops->cnic_exit)
+			continue;
+
+		if (test_and_clear_bit(ULP_F_INIT, &cp->ulp_flags[i]))
+			ulp_ops->cnic_exit(dev);
+
+	}
+	rcu_read_unlock();
+}
+
+static int cnic_cm_offload_pg(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_offload_pg *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_offload_pg *) &csk->kwqe1;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->op_code = L4_KWQE_OPCODE_VALUE_OFFLOAD_PG;
+	l4kwqe->flags =
+		L4_LAYER_CODE << L4_KWQ_OFFLOAD_PG_LAYER_CODE_SHIFT;
+	l4kwqe->l2hdr_nbytes = ETH_HLEN;
+
+	l4kwqe->da0 = csk->ha[0];
+	l4kwqe->da1 = csk->ha[1];
+	l4kwqe->da2 = csk->ha[2];
+	l4kwqe->da3 = csk->ha[3];
+	l4kwqe->da4 = csk->ha[4];
+	l4kwqe->da5 = csk->ha[5];
+
+	l4kwqe->sa0 = dev->mac_addr[0];
+	l4kwqe->sa1 = dev->mac_addr[1];
+	l4kwqe->sa2 = dev->mac_addr[2];
+	l4kwqe->sa3 = dev->mac_addr[3];
+	l4kwqe->sa4 = dev->mac_addr[4];
+	l4kwqe->sa5 = dev->mac_addr[5];
+
+	l4kwqe->etype = ETH_P_IP;
+	l4kwqe->ipid_count = DEF_IPID_COUNT;
+	l4kwqe->host_opaque = csk->l5_cid;
+
+	if (csk->vlan_id) {
+		l4kwqe->pg_flags |= L4_KWQ_OFFLOAD_PG_VLAN_TAGGING;
+		l4kwqe->vlan_tag = csk->vlan_id;
+		l4kwqe->l2hdr_nbytes += 4;
+	}
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_update_pg(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_update_pg *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_update_pg *) &csk->kwqe1;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->opcode = L4_KWQE_OPCODE_VALUE_UPDATE_PG;
+	l4kwqe->flags =
+		L4_LAYER_CODE << L4_KWQ_UPDATE_PG_LAYER_CODE_SHIFT;
+	l4kwqe->pg_cid = csk->pg_cid;
+
+	l4kwqe->da0 = csk->ha[0];
+	l4kwqe->da1 = csk->ha[1];
+	l4kwqe->da2 = csk->ha[2];
+	l4kwqe->da3 = csk->ha[3];
+	l4kwqe->da4 = csk->ha[4];
+	l4kwqe->da5 = csk->ha[5];
+
+	l4kwqe->pg_host_opaque = csk->l5_cid;
+	l4kwqe->pg_valids = L4_KWQ_UPDATE_PG_VALIDS_DA;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_upload_pg(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_upload *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_upload *) &csk->kwqe1;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->opcode = L4_KWQE_OPCODE_VALUE_UPLOAD_PG;
+	l4kwqe->flags =
+		L4_LAYER_CODE << L4_KWQ_UPLOAD_LAYER_CODE_SHIFT;
+	l4kwqe->cid = csk->pg_cid;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_conn_req(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_connect_req1 *l4kwqe1;
+	struct l4_kwq_connect_req2 *l4kwqe2;
+	struct l4_kwq_connect_req3 *l4kwqe3;
+	struct kwqe *wqes[3];
+	u8 tcp_flags = 0;
+	int num_wqes = 2;
+
+	l4kwqe1 = (struct l4_kwq_connect_req1 *) &csk->kwqe1;
+	l4kwqe2 = (struct l4_kwq_connect_req2 *) &csk->kwqe2;
+	l4kwqe3 = (struct l4_kwq_connect_req3 *) &csk->kwqe3;
+	memset(l4kwqe1, 0, sizeof(*l4kwqe1));
+	memset(l4kwqe2, 0, sizeof(*l4kwqe2));
+	memset(l4kwqe3, 0, sizeof(*l4kwqe3));
+
+	l4kwqe3->op_code = L4_KWQE_OPCODE_VALUE_CONNECT3;
+	l4kwqe3->flags =
+		L4_LAYER_CODE << L4_KWQ_CONNECT_REQ3_LAYER_CODE_SHIFT;
+	l4kwqe3->ka_timeout = csk->ka_timeout;
+	l4kwqe3->ka_interval = csk->ka_interval;
+	l4kwqe3->ka_max_probe_count = csk->ka_max_probe_count;
+	l4kwqe3->tos = csk->tos;
+	l4kwqe3->ttl = csk->ttl;
+	l4kwqe3->snd_seq_scale = csk->snd_seq_scale;
+	l4kwqe3->pmtu = csk->mtu;
+	l4kwqe3->rcv_buf = csk->rcv_buf;
+	l4kwqe3->snd_buf = csk->snd_buf;
+	l4kwqe3->seed = csk->seed;
+
+	wqes[0] = (struct kwqe *) l4kwqe1;
+	if (test_bit(SK_F_IPV6, &csk->flags)) {
+		wqes[1] = (struct kwqe *) l4kwqe2;
+		wqes[2] = (struct kwqe *) l4kwqe3;
+		num_wqes = 3;
+
+		l4kwqe1->conn_flags = L4_KWQ_CONNECT_REQ1_IP_V6;
+		l4kwqe2->op_code = L4_KWQE_OPCODE_VALUE_CONNECT2;
+		l4kwqe2->flags =
+			L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT |
+			L4_LAYER_CODE << L4_KWQ_CONNECT_REQ2_LAYER_CODE_SHIFT;
+		l4kwqe2->src_ip_v6_2 = be32_to_cpu(csk->src_ip[1]);
+		l4kwqe2->src_ip_v6_3 = be32_to_cpu(csk->src_ip[2]);
+		l4kwqe2->src_ip_v6_4 = be32_to_cpu(csk->src_ip[3]);
+		l4kwqe2->dst_ip_v6_2 = be32_to_cpu(csk->dst_ip[1]);
+		l4kwqe2->dst_ip_v6_3 = be32_to_cpu(csk->dst_ip[2]);
+		l4kwqe2->dst_ip_v6_4 = be32_to_cpu(csk->dst_ip[3]);
+		l4kwqe3->mss = l4kwqe3->pmtu - sizeof(struct ipv6hdr) -
+			       sizeof(struct tcphdr);
+	} else {
+		wqes[1] = (struct kwqe *) l4kwqe3;
+		l4kwqe3->mss = l4kwqe3->pmtu - sizeof(struct iphdr) -
+			       sizeof(struct tcphdr);
+	}
+
+	l4kwqe1->op_code = L4_KWQE_OPCODE_VALUE_CONNECT1;
+	l4kwqe1->flags =
+		(L4_LAYER_CODE << L4_KWQ_CONNECT_REQ1_LAYER_CODE_SHIFT) |
+		 L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT;
+	l4kwqe1->cid = csk->cid;
+	l4kwqe1->pg_cid = csk->pg_cid;
+	l4kwqe1->src_ip = be32_to_cpu(csk->src_ip[0]);
+	l4kwqe1->dst_ip = be32_to_cpu(csk->dst_ip[0]);
+	l4kwqe1->src_port = be16_to_cpu(csk->src_port);
+	l4kwqe1->dst_port = be16_to_cpu(csk->dst_port);
+	if (csk->tcp_flags & SK_TCP_NO_DELAY_ACK)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK;
+	if (csk->tcp_flags & SK_TCP_KEEP_ALIVE)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_KEEP_ALIVE;
+	if (csk->tcp_flags & SK_TCP_NAGLE)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE;
+	if (csk->tcp_flags & SK_TCP_TIMESTAMP)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_TIME_STAMP;
+	if (csk->tcp_flags & SK_TCP_SACK)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_SACK;
+	if (csk->tcp_flags & SK_TCP_SEG_SCALING)
+		tcp_flags |= L4_KWQ_CONNECT_REQ1_SEG_SCALING;
+
+	l4kwqe1->tcp_flags = tcp_flags;
+
+	return dev->submit_kwqes(dev, wqes, num_wqes);
+}
+
+static int cnic_cm_close_req(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_close_req *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_close_req *) &csk->kwqe2;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->op_code = L4_KWQE_OPCODE_VALUE_CLOSE;
+	l4kwqe->flags = L4_LAYER_CODE << L4_KWQ_CLOSE_REQ_LAYER_CODE_SHIFT;
+	l4kwqe->cid = csk->cid;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_abort_req(struct cnic_sock *csk)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct l4_kwq_reset_req *l4kwqe;
+	struct kwqe *wqes[1];
+
+	l4kwqe = (struct l4_kwq_reset_req *) &csk->kwqe2;
+	memset(l4kwqe, 0, sizeof(*l4kwqe));
+	wqes[0] = (struct kwqe *) l4kwqe;
+
+	l4kwqe->op_code = L4_KWQE_OPCODE_VALUE_RESET;
+	l4kwqe->flags = L4_LAYER_CODE << L4_KWQ_RESET_REQ_LAYER_CODE_SHIFT;
+	l4kwqe->cid = csk->cid;
+
+	return dev->submit_kwqes(dev, wqes, 1);
+}
+
+static int cnic_cm_create(struct cnic_dev *dev, int ulp_type, u32 cid,
+			  u32 l5_cid, struct cnic_sock **csk, void *context)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_sock *csk1;
+
+	if (l5_cid >= MAX_CM_SK_TBL_SZ)
+		return -EINVAL;
+
+	csk1 = &cp->csk_tbl[l5_cid];
+	if (atomic_read(&csk1->ref_count))
+		return -EAGAIN;
+
+	if (test_and_set_bit(SK_F_INUSE, &csk1->flags))
+		return -EBUSY;
+
+	csk1->dev = dev;
+	csk1->cid = cid;
+	csk1->l5_cid = l5_cid;
+	csk1->ulp_type = ulp_type;
+	csk1->context = context;
+
+	csk1->ka_timeout = DEF_KA_TIMEOUT;
+	csk1->ka_interval = DEF_KA_INTERVAL;
+	csk1->ka_max_probe_count = DEF_KA_MAX_PROBE_COUNT;
+	csk1->tos = DEF_TOS;
+	csk1->ttl = DEF_TTL;
+	csk1->snd_seq_scale = DEF_SND_SEQ_SCALE;
+	csk1->rcv_buf = DEF_RCV_BUF;
+	csk1->snd_buf = DEF_SND_BUF;
+	csk1->seed = DEF_SEED;
+
+	*csk = csk1;
+	return 0;
+}
+
+static void cnic_cm_cleanup(struct cnic_sock *csk)
+{
+	if (csk->src_port) {
+		struct cnic_dev *dev = csk->dev;
+		struct cnic_local *cp = dev->cnic_priv;
+
+		cnic_free_id(&cp->csk_port_tbl, csk->src_port);
+		csk->src_port = 0;
+	}
+}
+
+static void cnic_close_conn(struct cnic_sock *csk)
+{
+	if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags)) {
+		cnic_cm_upload_pg(csk);
+		clear_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags);
+	}
+	cnic_cm_cleanup(csk);
+}
+
+static int cnic_cm_destroy(struct cnic_sock *csk)
+{
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	csk_hold(csk);
+	clear_bit(SK_F_INUSE, &csk->flags);
+	smp_mb__after_clear_bit();
+	while (atomic_read(&csk->ref_count) != 1)
+		msleep(1);
+	cnic_cm_cleanup(csk);
+
+	csk->flags = 0;
+	csk_put(csk);
+	return 0;
+}
+
+static inline u16 cnic_get_vlan(struct net_device *dev,
+				struct net_device **vlan_dev)
+{
+	if (dev->priv_flags & IFF_802_1Q_VLAN) {
+		*vlan_dev = vlan_dev_real_dev(dev);
+		return vlan_dev_vlan_id(dev);
+	}
+	*vlan_dev = dev;
+	return 0;
+}
+
+static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
+			     struct dst_entry **dst)
+{
+	struct flowi fl;
+	int err;
+	struct rtable *rt;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.nl_u.ip4_u.daddr = dst_addr->sin_addr.s_addr;
+
+	err = ip_route_output_key(&init_net, &rt, &fl);
+	if (!err)
+		*dst = &rt->u.dst;
+	return err;
+}
+
+static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr,
+			     struct dst_entry **dst)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	ipv6_addr_copy(&fl.fl6_dst, &dst_addr->sin6_addr);
+	if (ipv6_addr_type(&fl.fl6_dst) & IPV6_ADDR_LINKLOCAL)
+		fl.oif = dst_addr->sin6_scope_id;
+
+	*dst = ip6_route_output(&init_net, NULL, &fl);
+	if (*dst)
+		return 0;
+#endif
+
+	return -ENETUNREACH;
+}
+
+static struct cnic_dev *cnic_cm_select_dev(struct sockaddr_in *dst_addr,
+					   int ulp_type)
+{
+	struct cnic_dev *dev = NULL;
+	struct dst_entry *dst;
+	struct net_device *netdev = NULL;
+	int err = -ENETUNREACH;
+
+	if (dst_addr->sin_family == AF_INET)
+		err = cnic_get_v4_route(dst_addr, &dst);
+	else if (dst_addr->sin_family == AF_INET6) {
+		struct sockaddr_in6 *dst_addr6 =
+			(struct sockaddr_in6 *) dst_addr;
+
+		err = cnic_get_v6_route(dst_addr6, &dst);
+	} else
+		return NULL;
+
+	if (err)
+		return NULL;
+
+	if (!dst->dev)
+		goto done;
+
+	cnic_get_vlan(dst->dev, &netdev);
+
+	dev = cnic_from_netdev(netdev);
+
+done:
+	dst_release(dst);
+	if (dev)
+		cnic_put(dev);
+	return dev;
+}
+
+static int cnic_resolve_addr(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	return cnic_send_nlmsg(cp, ISCSI_KEVENT_PATH_REQ, csk);
+}
+
+static int cnic_get_route(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct cnic_local *cp = dev->cnic_priv;
+	int is_v6, err, rc = -ENETUNREACH;
+	struct dst_entry *dst;
+	struct net_device *realdev;
+	u32 local_port;
+
+	if (saddr->local.v6.sin6_family == AF_INET6 &&
+	    saddr->remote.v6.sin6_family == AF_INET6)
+		is_v6 = 1;
+	else if (saddr->local.v4.sin_family == AF_INET &&
+		 saddr->remote.v4.sin_family == AF_INET)
+		is_v6 = 0;
+	else
+		return -EINVAL;
+
+	clear_bit(SK_F_IPV6, &csk->flags);
+
+	if (is_v6) {
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		set_bit(SK_F_IPV6, &csk->flags);
+		err = cnic_get_v6_route(&saddr->remote.v6, &dst);
+		if (err)
+			return err;
+
+		if (!dst || dst->error || !dst->dev)
+			goto err_out;
+
+		memcpy(&csk->dst_ip[0], &saddr->remote.v6.sin6_addr,
+		       sizeof(struct in6_addr));
+		csk->dst_port = saddr->remote.v6.sin6_port;
+		local_port = saddr->local.v6.sin6_port;
+#else
+		return rc;
+#endif
+
+	} else {
+		err = cnic_get_v4_route(&saddr->remote.v4, &dst);
+		if (err)
+			return err;
+
+		if (!dst || dst->error || !dst->dev)
+			goto err_out;
+
+		csk->dst_ip[0] = saddr->remote.v4.sin_addr.s_addr;
+		csk->dst_port = saddr->remote.v4.sin_port;
+		local_port = saddr->local.v4.sin_port;
+	}
+
+	csk->vlan_id = cnic_get_vlan(dst->dev, &realdev);
+	if (realdev != dev->netdev)
+		goto err_out;
+
+	if (local_port >= CNIC_LOCAL_PORT_MIN &&
+	    local_port < CNIC_LOCAL_PORT_MAX) {
+		if (cnic_alloc_id(&cp->csk_port_tbl, local_port))
+			local_port = 0;
+	} else
+		local_port = 0;
+
+	if (!local_port) {
+		local_port = cnic_alloc_new_id(&cp->csk_port_tbl);
+		if (local_port == -1) {
+			rc = -ENOMEM;
+			goto err_out;
+		}
+	}
+	csk->src_port = local_port;
+
+	csk->mtu = dst_mtu(dst);
+	rc = 0;
+
+err_out:
+	dst_release(dst);
+	return rc;
+}
+
+static void cnic_init_csk_state(struct cnic_sock *csk)
+{
+	csk->state = 0;
+	clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+	clear_bit(SK_F_CLOSING, &csk->flags);
+}
+
+static int cnic_cm_connect(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
+{
+	int err = 0;
+
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	if (test_and_set_bit(SK_F_CONNECT_START, &csk->flags))
+		return -EINVAL;
+
+	cnic_init_csk_state(csk);
+
+	err = cnic_get_route(csk, saddr);
+	if (err)
+		goto err_out;
+
+	err = cnic_resolve_addr(csk, saddr);
+	if (!err)
+		return 0;
+
+err_out:
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	return err;
+}
+
+static int cnic_cm_abort(struct cnic_sock *csk)
+{
+	struct cnic_local *cp = csk->dev->cnic_priv;
+	u32 opcode;
+
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	if (cnic_abort_prep(csk))
+		return cnic_cm_abort_req(csk);
+
+	/* Getting here means that we haven't started connect, or
+	 * connect was not successful.
+	 */
+
+	csk->state = L4_KCQE_OPCODE_VALUE_RESET_COMP;
+	if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags))
+		opcode = csk->state;
+	else
+		opcode = L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD;
+	cp->close_conn(csk, opcode);
+
+	return 0;
+}
+
+static int cnic_cm_close(struct cnic_sock *csk)
+{
+	if (!cnic_in_use(csk))
+		return -EINVAL;
+
+	if (cnic_close_prep(csk)) {
+		csk->state = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
+		return cnic_cm_close_req(csk);
+	}
+	return 0;
+}
+
+static void cnic_cm_upcall(struct cnic_local *cp, struct cnic_sock *csk,
+			   u8 opcode)
+{
+	struct cnic_ulp_ops *ulp_ops;
+	int ulp_type = csk->ulp_type;
+
+	rcu_read_lock();
+	ulp_ops = rcu_dereference(cp->ulp_ops[ulp_type]);
+	if (ulp_ops) {
+		if (opcode == L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE)
+			ulp_ops->cm_connect_complete(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_CLOSE_COMP)
+			ulp_ops->cm_close_complete(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_RESET_RECEIVED)
+			ulp_ops->cm_remote_abort(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_RESET_COMP)
+			ulp_ops->cm_abort_complete(csk);
+		else if (opcode == L4_KCQE_OPCODE_VALUE_CLOSE_RECEIVED)
+			ulp_ops->cm_remote_close(csk);
+	}
+	rcu_read_unlock();
+}
+
+static int cnic_cm_set_pg(struct cnic_sock *csk)
+{
+	if (cnic_offld_prep(csk)) {
+		if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags))
+			cnic_cm_update_pg(csk);
+		else
+			cnic_cm_offload_pg(csk);
+	}
+	return 0;
+}
+
+static void cnic_cm_process_offld_pg(struct cnic_dev *dev, struct l4_kcq *kcqe)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 l5_cid = kcqe->pg_host_opaque;
+	u8 opcode = kcqe->op_code;
+	struct cnic_sock *csk = &cp->csk_tbl[l5_cid];
+
+	csk_hold(csk);
+	if (!cnic_in_use(csk))
+		goto done;
+
+	if (opcode == L4_KCQE_OPCODE_VALUE_UPDATE_PG) {
+		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+		goto done;
+	}
+	csk->pg_cid = kcqe->pg_cid;
+	set_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags);
+	cnic_cm_conn_req(csk);
+
+done:
+	csk_put(csk);
+}
+
+static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct l4_kcq *l4kcqe = (struct l4_kcq *) kcqe;
+	u8 opcode = l4kcqe->op_code;
+	u32 l5_cid;
+	struct cnic_sock *csk;
+
+	if (opcode == L4_KCQE_OPCODE_VALUE_OFFLOAD_PG ||
+	    opcode == L4_KCQE_OPCODE_VALUE_UPDATE_PG) {
+		cnic_cm_process_offld_pg(dev, l4kcqe);
+		return;
+	}
+
+	l5_cid = l4kcqe->conn_id;
+	if (opcode & 0x80)
+		l5_cid = l4kcqe->cid;
+	if (l5_cid >= MAX_CM_SK_TBL_SZ)
+		return;
+
+	csk = &cp->csk_tbl[l5_cid];
+	csk_hold(csk);
+
+	if (!cnic_in_use(csk)) {
+		csk_put(csk);
+		return;
+	}
+
+	switch (opcode) {
+	case L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE:
+		if (l4kcqe->status == 0)
+			set_bit(SK_F_OFFLD_COMPLETE, &csk->flags);
+
+		smp_mb__before_clear_bit();
+		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
+		cnic_cm_upcall(cp, csk, opcode);
+		break;
+
+	case L4_KCQE_OPCODE_VALUE_RESET_RECEIVED:
+		if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags))
+			csk->state = opcode;
+		/* fall through */
+	case L4_KCQE_OPCODE_VALUE_CLOSE_COMP:
+	case L4_KCQE_OPCODE_VALUE_RESET_COMP:
+		cp->close_conn(csk, opcode);
+		break;
+
+	case L4_KCQE_OPCODE_VALUE_CLOSE_RECEIVED:
+		cnic_cm_upcall(cp, csk, opcode);
+		break;
+	}
+	csk_put(csk);
+}
+
+static void cnic_cm_indicate_kcqe(void *data, struct kcqe *kcqe[], u32 num)
+{
+	struct cnic_dev *dev = data;
+	int i;
+
+	for (i = 0; i < num; i++)
+		cnic_cm_process_kcqe(dev, kcqe[i]);
+}
+
+static struct cnic_ulp_ops cm_ulp_ops = {
+	.indicate_kcqes		= cnic_cm_indicate_kcqe,
+};
+
+static void cnic_cm_free_mem(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	kfree(cp->csk_tbl);
+	cp->csk_tbl = NULL;
+	cnic_free_id_tbl(&cp->csk_port_tbl);
+}
+
+static int cnic_cm_alloc_mem(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+
+	cp->csk_tbl = kzalloc(sizeof(struct cnic_sock) * MAX_CM_SK_TBL_SZ,
+			      GFP_KERNEL);
+	if (!cp->csk_tbl)
+		return -ENOMEM;
+
+	if (cnic_init_id_tbl(&cp->csk_port_tbl, CNIC_LOCAL_PORT_RANGE,
+			     CNIC_LOCAL_PORT_MIN)) {
+		cnic_cm_free_mem(dev);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int cnic_ready_to_close(struct cnic_sock *csk, u32 opcode)
+{
+	if ((opcode == csk->state) ||
+	    (opcode == L4_KCQE_OPCODE_VALUE_RESET_RECEIVED &&
+	     csk->state == L4_KCQE_OPCODE_VALUE_CLOSE_COMP)) {
+		if (!test_and_set_bit(SK_F_CLOSING, &csk->flags))
+			return 1;
+	}
+	return 0;
+}
+
+static void cnic_close_bnx2_conn(struct cnic_sock *csk, u32 opcode)
+{
+	struct cnic_dev *dev = csk->dev;
+	struct cnic_local *cp = dev->cnic_priv;
+
+	clear_bit(SK_F_CONNECT_START, &csk->flags);
+	if (cnic_ready_to_close(csk, opcode)) {
+		cnic_close_conn(csk);
+		cnic_cm_upcall(cp, csk, opcode);
+	}
+}
+
+static void cnic_cm_stop_bnx2_hw(struct cnic_dev *dev)
+{
+}
+
+static int cnic_cm_init_bnx2_hw(struct cnic_dev *dev)
+{
+	u32 seed;
+
+	get_random_bytes(&seed, 4);
+	cnic_ctx_wr(dev, 45, 0, seed);
+	return 0;
+}
+
+static int cnic_cm_open(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int err;
+
+	err = cnic_cm_alloc_mem(dev);
+	if (err)
+		return err;
+
+	err = cp->start_cm(dev);
+
+	if (err)
+		goto err_out;
+
+	dev->cm_create = cnic_cm_create;
+	dev->cm_destroy = cnic_cm_destroy;
+	dev->cm_connect = cnic_cm_connect;
+	dev->cm_abort = cnic_cm_abort;
+	dev->cm_close = cnic_cm_close;
+	dev->cm_select_dev = cnic_cm_select_dev;
+
+	cp->ulp_handle[CNIC_ULP_L4] = dev;
+	rcu_assign_pointer(cp->ulp_ops[CNIC_ULP_L4], &cm_ulp_ops);
+	return 0;
+
+err_out:
+	cnic_cm_free_mem(dev);
+	return err;
+}
+
+static int cnic_cm_shutdown(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int i;
+
+	cp->stop_cm(dev);
+
+	if (!cp->csk_tbl)
+		return 0;
+
+	for (i = 0; i < MAX_CM_SK_TBL_SZ; i++) {
+		struct cnic_sock *csk = &cp->csk_tbl[i];
+
+		clear_bit(SK_F_INUSE, &csk->flags);
+		cnic_cm_cleanup(csk);
+	}
+	cnic_cm_free_mem(dev);
+
+	return 0;
+}
+
+static void cnic_init_context(struct cnic_dev *dev, u32 cid)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 cid_addr;
+	int i;
+
+	if (CHIP_NUM(cp) == CHIP_NUM_5709)
+		return;
+
+	cid_addr = GET_CID_ADDR(cid);
+
+	for (i = 0; i < CTX_SIZE; i += 4)
+		cnic_ctx_wr(dev, cid_addr, i, 0);
+}
+
+static int cnic_setup_5709_context(struct cnic_dev *dev, int valid)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	int ret = 0, i;
+	u32 valid_bit = valid ? BNX2_CTX_HOST_PAGE_TBL_DATA0_VALID : 0;
+
+	if (CHIP_NUM(cp) != CHIP_NUM_5709)
+		return 0;
+
+	for (i = 0; i < cp->ctx_blks; i++) {
+		int j;
+		u32 idx = cp->ctx_arr[i].cid / cp->cids_per_blk;
+		u32 val;
+
+		memset(cp->ctx_arr[i].ctx, 0, BCM_PAGE_SIZE);
+
+		CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_DATA0,
+			(cp->ctx_arr[i].mapping & 0xffffffff) | valid_bit);
+		CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_DATA1,
+			(u64) cp->ctx_arr[i].mapping >> 32);
+		CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_CTRL, idx |
+			BNX2_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ);
+		for (j = 0; j < 10; j++) {
+
+			val = CNIC_RD(dev, BNX2_CTX_HOST_PAGE_TBL_CTRL);
+			if (!(val & BNX2_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ))
+				break;
+			udelay(5);
+		}
+		if (val & BNX2_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ) {
+			ret = -EBUSY;
+			break;
+		}
+	}
+	return ret;
+}
+
+static void cnic_free_irq(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		cp->disable_int_sync(dev);
+		tasklet_disable(&cp->cnic_irq_task);
+		free_irq(ethdev->irq_arr[0].vector, dev);
+	}
+}
+
+static int cnic_init_bnx2_irq(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		int err, i = 0;
+		int sblk_num = cp->status_blk_num;
+		u32 base = ((sblk_num - 1) * BNX2_HC_SB_CONFIG_SIZE) +
+			   BNX2_HC_SB_CONFIG_1;
+
+		CNIC_WR(dev, base, BNX2_HC_SB_CONFIG_1_ONE_SHOT);
+
+		CNIC_WR(dev, base + BNX2_HC_COMP_PROD_TRIP_OFF, (2 << 16) | 8);
+		CNIC_WR(dev, base + BNX2_HC_COM_TICKS_OFF, (64 << 16) | 220);
+		CNIC_WR(dev, base + BNX2_HC_CMD_TICKS_OFF, (64 << 16) | 220);
+
+		cp->bnx2_status_blk = cp->status_blk;
+		cp->last_status_idx = cp->bnx2_status_blk->status_idx;
+		tasklet_init(&cp->cnic_irq_task, &cnic_service_bnx2_msix,
+			     (unsigned long) dev);
+		err = request_irq(ethdev->irq_arr[0].vector, cnic_irq, 0,
+				  "cnic", dev);
+		if (err) {
+			tasklet_disable(&cp->cnic_irq_task);
+			return err;
+		}
+		while (cp->bnx2_status_blk->status_completion_producer_index &&
+		       i < 10) {
+			CNIC_WR(dev, BNX2_HC_COALESCE_NOW,
+				1 << (11 + sblk_num));
+			udelay(10);
+			i++;
+			barrier();
+		}
+		if (cp->bnx2_status_blk->status_completion_producer_index) {
+			cnic_free_irq(dev);
+			goto failed;
+		}
+
+	} else {
+		struct status_block *sblk = cp->status_blk;
+		u32 hc_cmd = CNIC_RD(dev, BNX2_HC_COMMAND);
+		int i = 0;
+
+		while (sblk->status_completion_producer_index && i < 10) {
+			CNIC_WR(dev, BNX2_HC_COMMAND,
+				hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+			udelay(10);
+			i++;
+			barrier();
+		}
+		if (sblk->status_completion_producer_index)
+			goto failed;
+
+	}
+	return 0;
+
+failed:
+	printk(KERN_ERR PFX "%s: " "KCQ index not resetting to 0.\n",
+	       dev->netdev->name);
+	return -EBUSY;
+}
+
+static void cnic_enable_bnx2_int(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (!(ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX))
+		return;
+
+	CNIC_WR(dev, BNX2_PCICFG_INT_ACK_CMD, cp->int_num |
+		BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | cp->last_status_idx);
+}
+
+static void cnic_disable_bnx2_int_sync(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	if (!(ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX))
+		return;
+
+	CNIC_WR(dev, BNX2_PCICFG_INT_ACK_CMD, cp->int_num |
+		BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
+	CNIC_RD(dev, BNX2_PCICFG_INT_ACK_CMD);
+	synchronize_irq(ethdev->irq_arr[0].vector);
+}
+
+static void cnic_init_bnx2_tx_ring(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	u32 cid_addr, tx_cid, sb_id;
+	u32 val, offset0, offset1, offset2, offset3;
+	int i;
+	struct tx_bd *txbd;
+	dma_addr_t buf_map;
+	struct status_block *s_blk = cp->status_blk;
+
+	sb_id = cp->status_blk_num;
+	tx_cid = 20;
+	cnic_init_context(dev, tx_cid);
+	cnic_init_context(dev, tx_cid + 1);
+	cp->tx_cons_ptr = &s_blk->status_tx_quick_consumer_index2;
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		struct status_block_msix *sblk = cp->status_blk;
+
+		tx_cid = TX_TSS_CID + sb_id - 1;
+		cnic_init_context(dev, tx_cid);
+		CNIC_WR(dev, BNX2_TSCH_TSS_CFG, (sb_id << 24) |
+			(TX_TSS_CID << 7));
+		cp->tx_cons_ptr = &sblk->status_tx_quick_consumer_index;
+	}
+	cp->tx_cons = *cp->tx_cons_ptr;
+
+	cid_addr = GET_CID_ADDR(tx_cid);
+	if (CHIP_NUM(cp) == CHIP_NUM_5709) {
+		u32 cid_addr2 = GET_CID_ADDR(tx_cid + 4) + 0x40;
+
+		for (i = 0; i < PHY_CTX_SIZE; i += 4)
+			cnic_ctx_wr(dev, cid_addr2, i, 0);
+
+		offset0 = BNX2_L2CTX_TYPE_XI;
+		offset1 = BNX2_L2CTX_CMD_TYPE_XI;
+		offset2 = BNX2_L2CTX_TBDR_BHADDR_HI_XI;
+		offset3 = BNX2_L2CTX_TBDR_BHADDR_LO_XI;
+	} else {
+		offset0 = BNX2_L2CTX_TYPE;
+		offset1 = BNX2_L2CTX_CMD_TYPE;
+		offset2 = BNX2_L2CTX_TBDR_BHADDR_HI;
+		offset3 = BNX2_L2CTX_TBDR_BHADDR_LO;
+	}
+	val = BNX2_L2CTX_TYPE_TYPE_L2 | BNX2_L2CTX_TYPE_SIZE_L2;
+	cnic_ctx_wr(dev, cid_addr, offset0, val);
+
+	val = BNX2_L2CTX_CMD_TYPE_TYPE_L2 | (8 << 16);
+	cnic_ctx_wr(dev, cid_addr, offset1, val);
+
+	txbd = (struct tx_bd *) cp->l2_ring;
+
+	buf_map = cp->l2_buf_map;
+	for (i = 0; i < MAX_TX_DESC_CNT; i++, txbd++) {
+		txbd->tx_bd_haddr_hi = (u64) buf_map >> 32;
+		txbd->tx_bd_haddr_lo = (u64) buf_map & 0xffffffff;
+	}
+	val = (u64) cp->l2_ring_map >> 32;
+	cnic_ctx_wr(dev, cid_addr, offset2, val);
+	txbd->tx_bd_haddr_hi = val;
+
+	val = (u64) cp->l2_ring_map & 0xffffffff;
+	cnic_ctx_wr(dev, cid_addr, offset3, val);
+	txbd->tx_bd_haddr_lo = val;
+}
+
+static void cnic_init_bnx2_rx_ring(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	u32 cid_addr, sb_id, val, coal_reg, coal_val;
+	int i;
+	struct rx_bd *rxbd;
+	struct status_block *s_blk = cp->status_blk;
+
+	sb_id = cp->status_blk_num;
+	cnic_init_context(dev, 2);
+	cp->rx_cons_ptr = &s_blk->status_rx_quick_consumer_index2;
+	coal_reg = BNX2_HC_COMMAND;
+	coal_val = CNIC_RD(dev, coal_reg);
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		struct status_block_msix *sblk = cp->status_blk;
+
+		cp->rx_cons_ptr = &sblk->status_rx_quick_consumer_index;
+		coal_reg = BNX2_HC_COALESCE_NOW;
+		coal_val = 1 << (11 + sb_id);
+	}
+	i = 0;
+	while (!(*cp->rx_cons_ptr != 0) && i < 10) {
+		CNIC_WR(dev, coal_reg, coal_val);
+		udelay(10);
+		i++;
+		barrier();
+	}
+	cp->rx_cons = *cp->rx_cons_ptr;
+
+	cid_addr = GET_CID_ADDR(2);
+	val = BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE |
+	      BNX2_L2CTX_CTX_TYPE_SIZE_L2 | (0x02 << 8);
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_CTX_TYPE, val);
+
+	if (sb_id == 0)
+		val = 2 << BNX2_L2CTX_STATUSB_NUM_SHIFT;
+	else
+		val = BNX2_L2CTX_STATUSB_NUM(sb_id);
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_HOST_BDIDX, val);
+
+	rxbd = (struct rx_bd *) (cp->l2_ring + BCM_PAGE_SIZE);
+	for (i = 0; i < MAX_RX_DESC_CNT; i++, rxbd++) {
+		dma_addr_t buf_map;
+		int n = (i % cp->l2_rx_ring_size) + 1;
+
+		buf_map = cp->l2_buf_map + (n * cp->l2_single_buf_size);
+		rxbd->rx_bd_len = cp->l2_single_buf_size;
+		rxbd->rx_bd_flags = RX_BD_FLAGS_START | RX_BD_FLAGS_END;
+		rxbd->rx_bd_haddr_hi = (u64) buf_map >> 32;
+		rxbd->rx_bd_haddr_lo = (u64) buf_map & 0xffffffff;
+	}
+	val = (u64) (cp->l2_ring_map + BCM_PAGE_SIZE) >> 32;
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_NX_BDHADDR_HI, val);
+	rxbd->rx_bd_haddr_hi = val;
+
+	val = (u64) (cp->l2_ring_map + BCM_PAGE_SIZE) & 0xffffffff;
+	cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_NX_BDHADDR_LO, val);
+	rxbd->rx_bd_haddr_lo = val;
+
+	val = cnic_reg_rd_ind(dev, BNX2_RXP_SCRATCH_RXP_FLOOD);
+	cnic_reg_wr_ind(dev, BNX2_RXP_SCRATCH_RXP_FLOOD, val | (1 << 2));
+}
+
+static void cnic_shutdown_bnx2_rx_ring(struct cnic_dev *dev)
+{
+	struct kwqe *wqes[1], l2kwqe;
+
+	memset(&l2kwqe, 0, sizeof(l2kwqe));
+	wqes[0] = &l2kwqe;
+	l2kwqe.kwqe_op_flag = (L2_LAYER_CODE << KWQE_FLAGS_LAYER_SHIFT) |
+			      (L2_KWQE_OPCODE_VALUE_FLUSH <<
+			       KWQE_OPCODE_SHIFT) | 2;
+	dev->submit_kwqes(dev, wqes, 1);
+}
+
+static void cnic_set_bnx2_mac(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	u32 val;
+
+	val = cp->func << 2;
+
+	cp->shmem_base = cnic_reg_rd_ind(dev, BNX2_SHM_HDR_ADDR_0 + val);
+
+	val = cnic_reg_rd_ind(dev, cp->shmem_base +
+			      BNX2_PORT_HW_CFG_ISCSI_MAC_UPPER);
+	dev->mac_addr[0] = (u8) (val >> 8);
+	dev->mac_addr[1] = (u8) val;
+
+	CNIC_WR(dev, BNX2_EMAC_MAC_MATCH4, val);
+
+	val = cnic_reg_rd_ind(dev, cp->shmem_base +
+			      BNX2_PORT_HW_CFG_ISCSI_MAC_LOWER);
+	dev->mac_addr[2] = (u8) (val >> 24);
+	dev->mac_addr[3] = (u8) (val >> 16);
+	dev->mac_addr[4] = (u8) (val >> 8);
+	dev->mac_addr[5] = (u8) val;
+
+	CNIC_WR(dev, BNX2_EMAC_MAC_MATCH5, val);
+
+	val = 4 | BNX2_RPM_SORT_USER2_BC_EN;
+	if (CHIP_NUM(cp) != CHIP_NUM_5709)
+		val |= BNX2_RPM_SORT_USER2_PROM_VLAN;
+
+	CNIC_WR(dev, BNX2_RPM_SORT_USER2, 0x0);
+	CNIC_WR(dev, BNX2_RPM_SORT_USER2, val);
+	CNIC_WR(dev, BNX2_RPM_SORT_USER2, val | BNX2_RPM_SORT_USER2_ENA);
+}
+
+static int cnic_start_bnx2_hw(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	struct status_block *sblk = cp->status_blk;
+	u32 val;
+	int err;
+
+	cnic_set_bnx2_mac(dev);
+
+	val = CNIC_RD(dev, BNX2_MQ_CONFIG);
+	val &= ~BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE;
+	if (BCM_PAGE_BITS > 12)
+		val |= (12 - 8)  << 4;
+	else
+		val |= (BCM_PAGE_BITS - 8)  << 4;
+
+	CNIC_WR(dev, BNX2_MQ_CONFIG, val);
+
+	CNIC_WR(dev, BNX2_HC_COMP_PROD_TRIP, (2 << 16) | 8);
+	CNIC_WR(dev, BNX2_HC_COM_TICKS, (64 << 16) | 220);
+	CNIC_WR(dev, BNX2_HC_CMD_TICKS, (64 << 16) | 220);
+
+	err = cnic_setup_5709_context(dev, 1);
+	if (err)
+		return err;
+
+	cnic_init_context(dev, KWQ_CID);
+	cnic_init_context(dev, KCQ_CID);
+
+	cp->kwq_cid_addr = GET_CID_ADDR(KWQ_CID);
+	cp->kwq_io_addr = MB_GET_CID_ADDR(KWQ_CID) + L5_KRNLQ_HOST_QIDX;
+
+	cp->max_kwq_idx = MAX_KWQ_IDX;
+	cp->kwq_prod_idx = 0;
+	cp->kwq_con_idx = 0;
+	cp->cnic_local_flags |= CNIC_LCL_FL_KWQ_INIT;
+
+	if (CHIP_NUM(cp) == CHIP_NUM_5706 || CHIP_NUM(cp) == CHIP_NUM_5708)
+		cp->kwq_con_idx_ptr = &sblk->status_rx_quick_consumer_index15;
+	else
+		cp->kwq_con_idx_ptr = &sblk->status_cmd_consumer_index;
+
+	/* Initialize the kernel work queue context. */
+	val = KRNLQ_TYPE_TYPE_KRNLQ | KRNLQ_SIZE_TYPE_SIZE |
+	      (BCM_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_TYPE, val);
+
+	val = (BCM_PAGE_SIZE / sizeof(struct kwqe) - 1) << 16;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_QE_SELF_SEQ_MAX, val);
+
+	val = ((BCM_PAGE_SIZE / sizeof(struct kwqe)) << 16) | KWQ_PAGE_CNT;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_PGTBL_NPAGES, val);
+
+	val = (u32) ((u64) cp->kwq_info.pgtbl_map >> 32);
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_PGTBL_HADDR_HI, val);
+
+	val = (u32) cp->kwq_info.pgtbl_map;
+	cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_PGTBL_HADDR_LO, val);
+
+	cp->kcq_cid_addr = GET_CID_ADDR(KCQ_CID);
+	cp->kcq_io_addr = MB_GET_CID_ADDR(KCQ_CID) + L5_KRNLQ_HOST_QIDX;
+
+	cp->kcq_prod_idx = 0;
+
+	/* Initialize the kernel complete queue context. */
+	val = KRNLQ_TYPE_TYPE_KRNLQ | KRNLQ_SIZE_TYPE_SIZE |
+	      (BCM_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_TYPE, val);
+
+	val = (BCM_PAGE_SIZE / sizeof(struct kcqe) - 1) << 16;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_QE_SELF_SEQ_MAX, val);
+
+	val = ((BCM_PAGE_SIZE / sizeof(struct kcqe)) << 16) | KCQ_PAGE_CNT;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_PGTBL_NPAGES, val);
+
+	val = (u32) ((u64) cp->kcq_info.pgtbl_map >> 32);
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_PGTBL_HADDR_HI, val);
+
+	val = (u32) cp->kcq_info.pgtbl_map;
+	cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_PGTBL_HADDR_LO, val);
+
+	cp->int_num = 0;
+	if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) {
+		u32 sb_id = cp->status_blk_num;
+		u32 sb = BNX2_L2CTX_STATUSB_NUM(sb_id);
+
+		cp->int_num = sb_id << BNX2_PCICFG_INT_ACK_CMD_INT_NUM_SHIFT;
+		cnic_ctx_wr(dev, cp->kwq_cid_addr, L5_KRNLQ_HOST_QIDX, sb);
+		cnic_ctx_wr(dev, cp->kcq_cid_addr, L5_KRNLQ_HOST_QIDX, sb);
+	}
+
+	/* Enable Commnad Scheduler notification when we write to the
+	 * host producer index of the kernel contexts. */
+	CNIC_WR(dev, BNX2_MQ_KNL_CMD_MASK1, 2);
+
+	/* Enable Command Scheduler notification when we write to either
+	 * the Send Queue or Receive Queue producer indexes of the kernel
+	 * bypass contexts. */
+	CNIC_WR(dev, BNX2_MQ_KNL_BYP_CMD_MASK1, 7);
+	CNIC_WR(dev, BNX2_MQ_KNL_BYP_WRITE_MASK1, 7);
+
+	/* Notify COM when the driver post an application buffer. */
+	CNIC_WR(dev, BNX2_MQ_KNL_RX_V2P_MASK2, 0x2000);
+
+	/* Set the CP and COM doorbells.  These two processors polls the
+	 * doorbell for a non zero value before running.  This must be done
+	 * after setting up the kernel queue contexts. */
+	cnic_reg_wr_ind(dev, BNX2_CP_SCRATCH + 0x20, 1);
+	cnic_reg_wr_ind(dev, BNX2_COM_SCRATCH + 0x20, 1);
+
+	cnic_init_bnx2_tx_ring(dev);
+	cnic_init_bnx2_rx_ring(dev);
+
+	err = cnic_init_bnx2_irq(dev);
+	if (err) {
+		printk(KERN_ERR PFX "%s: cnic_init_irq failed\n",
+		       dev->netdev->name);
+		cnic_reg_wr_ind(dev, BNX2_CP_SCRATCH + 0x20, 0);
+		cnic_reg_wr_ind(dev, BNX2_COM_SCRATCH + 0x20, 0);
+		return err;
+	}
+
+	return 0;
+}
+
+static int cnic_start_hw(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+	int err;
+
+	if (test_bit(CNIC_F_CNIC_UP, &dev->flags))
+		return -EALREADY;
+
+	err = ethdev->drv_register_cnic(dev->netdev, cp->cnic_ops, dev);
+	if (err) {
+		printk(KERN_ERR PFX "%s: register_cnic failed\n",
+		       dev->netdev->name);
+		goto err2;
+	}
+
+	dev->regview = ethdev->io_base;
+	cp->chip_id = ethdev->chip_id;
+	pci_dev_get(dev->pcidev);
+	cp->func = PCI_FUNC(dev->pcidev->devfn);
+	cp->status_blk = ethdev->irq_arr[0].status_blk;
+	cp->status_blk_num = ethdev->irq_arr[0].status_blk_num;
+
+	err = cp->alloc_resc(dev);
+	if (err) {
+		printk(KERN_ERR PFX "%s: allocate resource failure\n",
+		       dev->netdev->name);
+		goto err1;
+	}
+
+	err = cp->start_hw(dev);
+	if (err)
+		goto err1;
+
+	err = cnic_cm_open(dev);
+	if (err)
+		goto err1;
+
+	set_bit(CNIC_F_CNIC_UP, &dev->flags);
+
+	cp->enable_int(dev);
+
+	return 0;
+
+err1:
+	ethdev->drv_unregister_cnic(dev->netdev);
+	cp->free_resc(dev);
+	pci_dev_put(dev->pcidev);
+err2:
+	return err;
+}
+
+static void cnic_stop_bnx2_hw(struct cnic_dev *dev)
+{
+	struct cnic_local *cp = dev->cnic_priv;
+	struct cnic_eth_dev *ethdev = cp->ethdev;
+
+	cnic_disable_bnx2_int_sync(dev);
+
+	cnic_reg_wr_ind(dev, BNX2_CP_SCRATCH + 0x20, 0);
+	cnic_reg_wr_ind(dev, BNX2_COM_SCRATCH + 0x20, 0);
+
+	cnic_init_context(dev, KWQ_CID);
+	cnic_init_context(dev, KCQ_CID);
+
+	cnic_setup_5709_context(dev, 0);
+	cnic_free_irq(dev);
+
+	ethdev->drv_unregister_cnic(dev->netdev);
+
+	cnic_free_resc(dev);
+}
+
+static void cnic_stop_hw(struct cnic_dev *dev)
+{
+	if (test_bit(CNIC_F_CNIC_UP, &dev->flags)) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		clear_bit(CNIC_F_CNIC_UP, &dev->flags);
+		rcu_assign_pointer(cp->ulp_ops[CNIC_ULP_L4], NULL);
+		synchronize_rcu();
+		cnic_cm_shutdown(dev);
+		cp->stop_hw(dev);
+		pci_dev_put(dev->pcidev);
+	}
+}
+
+static void cnic_free_dev(struct cnic_dev *dev)
+{
+	int i = 0;
+
+	while ((atomic_read(&dev->ref_count) != 0) && i < 10) {
+		msleep(100);
+		i++;
+	}
+	if (atomic_read(&dev->ref_count) != 0)
+		printk(KERN_ERR PFX "%s: Failed waiting for ref count to go"
+				    " to zero.\n", dev->netdev->name);
+
+	printk(KERN_INFO PFX "Removed CNIC device: %s\n", dev->netdev->name);
+	dev_put(dev->netdev);
+	kfree(dev);
+}
+
+static struct cnic_dev *cnic_alloc_dev(struct net_device *dev,
+				       struct pci_dev *pdev)
+{
+	struct cnic_dev *cdev;
+	struct cnic_local *cp;
+	int alloc_size;
+
+	alloc_size = sizeof(struct cnic_dev) + sizeof(struct cnic_local);
+
+	cdev = kzalloc(alloc_size , GFP_KERNEL);
+	if (cdev == NULL) {
+		printk(KERN_ERR PFX "%s: allocate dev struct failure\n",
+		       dev->name);
+		return NULL;
+	}
+
+	cdev->netdev = dev;
+	cdev->cnic_priv = (char *)cdev + sizeof(struct cnic_dev);
+	cdev->register_device = cnic_register_device;
+	cdev->unregister_device = cnic_unregister_device;
+	cdev->iscsi_nl_msg_recv = cnic_iscsi_nl_msg_recv;
+
+	cp = cdev->cnic_priv;
+	cp->dev = cdev;
+	cp->uio_dev = -1;
+	cp->l2_single_buf_size = 0x400;
+	cp->l2_rx_ring_size = 3;
+
+	spin_lock_init(&cp->cnic_ulp_lock);
+
+	printk(KERN_INFO PFX "Added CNIC device: %s\n", dev->name);
+
+	return cdev;
+}
+
+static struct cnic_dev *init_bnx2_cnic(struct net_device *dev)
+{
+	struct pci_dev *pdev;
+	struct cnic_dev *cdev;
+	struct cnic_local *cp;
+	struct cnic_eth_dev *ethdev = NULL;
+	struct cnic_eth_dev *(*probe)(void *) = NULL;
+
+	probe = __symbol_get("bnx2_cnic_probe");
+	if (probe) {
+		ethdev = (*probe)(dev);
+		symbol_put_addr(probe);
+	}
+	if (!ethdev)
+		return NULL;
+
+	pdev = ethdev->pdev;
+	if (!pdev)
+		return NULL;
+
+	dev_hold(dev);
+	pci_dev_get(pdev);
+	if (pdev->device == PCI_DEVICE_ID_NX2_5709 ||
+	    pdev->device == PCI_DEVICE_ID_NX2_5709S) {
+		u8 rev;
+
+		pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
+		if (rev < 0x10) {
+			pci_dev_put(pdev);
+			goto cnic_err;
+		}
+	}
+	pci_dev_put(pdev);
+
+	cdev = cnic_alloc_dev(dev, pdev);
+	if (cdev == NULL)
+		goto cnic_err;
+
+	set_bit(CNIC_F_BNX2_CLASS, &cdev->flags);
+	cdev->submit_kwqes = cnic_submit_bnx2_kwqes;
+
+	cp = cdev->cnic_priv;
+	cp->ethdev = ethdev;
+	cdev->pcidev = pdev;
+
+	cp->cnic_ops = &cnic_bnx2_ops;
+	cp->start_hw = cnic_start_bnx2_hw;
+	cp->stop_hw = cnic_stop_bnx2_hw;
+	cp->setup_pgtbl = cnic_setup_page_tbl;
+	cp->alloc_resc = cnic_alloc_bnx2_resc;
+	cp->free_resc = cnic_free_resc;
+	cp->start_cm = cnic_cm_init_bnx2_hw;
+	cp->stop_cm = cnic_cm_stop_bnx2_hw;
+	cp->enable_int = cnic_enable_bnx2_int;
+	cp->disable_int_sync = cnic_disable_bnx2_int_sync;
+	cp->close_conn = cnic_close_bnx2_conn;
+	cp->next_idx = cnic_bnx2_next_idx;
+	cp->hw_idx = cnic_bnx2_hw_idx;
+	return cdev;
+
+cnic_err:
+	dev_put(dev);
+	return NULL;
+}
+
+static struct cnic_dev *is_cnic_dev(struct net_device *dev)
+{
+	struct ethtool_drvinfo drvinfo;
+	struct cnic_dev *cdev = NULL;
+
+	if (dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) {
+		memset(&drvinfo, 0, sizeof(drvinfo));
+		dev->ethtool_ops->get_drvinfo(dev, &drvinfo);
+
+		if (!strcmp(drvinfo.driver, "bnx2"))
+			cdev = init_bnx2_cnic(dev);
+		if (cdev) {
+			write_lock(&cnic_dev_lock);
+			list_add(&cdev->list, &cnic_dev_list);
+			write_unlock(&cnic_dev_lock);
+		}
+	}
+	return cdev;
+}
+
+/**
+ * netdev event handler
+ */
+static int cnic_netdev_event(struct notifier_block *this, unsigned long event,
+							 void *ptr)
+{
+	struct net_device *netdev = ptr;
+	struct cnic_dev *dev;
+	int if_type;
+	int new_dev = 0;
+
+	dev = cnic_from_netdev(netdev);
+
+	if (!dev && (event == NETDEV_REGISTER || event == NETDEV_UP)) {
+		/* Check for the hot-plug device */
+		dev = is_cnic_dev(netdev);
+		if (dev) {
+			new_dev = 1;
+			cnic_hold(dev);
+		}
+	}
+	if (dev) {
+		struct cnic_local *cp = dev->cnic_priv;
+
+		if (new_dev)
+			cnic_ulp_init(dev);
+		else if (event == NETDEV_UNREGISTER)
+			cnic_ulp_exit(dev);
+		else if (event == NETDEV_UP) {
+			mutex_lock(&cnic_lock);
+			if (!cnic_start_hw(dev))
+				cnic_ulp_start(dev);
+			mutex_unlock(&cnic_lock);
+		}
+
+		rcu_read_lock();
+		for (if_type = 0; if_type < MAX_CNIC_ULP_TYPE; if_type++) {
+			struct cnic_ulp_ops *ulp_ops;
+			void *ctx;
+
+			ulp_ops = rcu_dereference(cp->ulp_ops[if_type]);
+			if (!ulp_ops || !ulp_ops->indicate_netevent)
+				continue;
+
+			ctx = cp->ulp_handle[if_type];
+
+			ulp_ops->indicate_netevent(ctx, event);
+		}
+		rcu_read_unlock();
+
+		if (event == NETDEV_GOING_DOWN) {
+			mutex_lock(&cnic_lock);
+			cnic_ulp_stop(dev);
+			cnic_stop_hw(dev);
+			mutex_unlock(&cnic_lock);
+		} else if (event == NETDEV_UNREGISTER) {
+			write_lock(&cnic_dev_lock);
+			list_del_init(&dev->list);
+			write_unlock(&cnic_dev_lock);
+
+			cnic_put(dev);
+			cnic_free_dev(dev);
+			goto done;
+		}
+		cnic_put(dev);
+	}
+done:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cnic_netdev_notifier = {
+	.notifier_call = cnic_netdev_event
+};
+
+static void cnic_release(void)
+{
+	struct cnic_dev *dev;
+
+	while (!list_empty(&cnic_dev_list)) {
+		dev = list_entry(cnic_dev_list.next, struct cnic_dev, list);
+		if (test_bit(CNIC_F_CNIC_UP, &dev->flags)) {
+			cnic_ulp_stop(dev);
+			cnic_stop_hw(dev);
+		}
+
+		cnic_ulp_exit(dev);
+		list_del_init(&dev->list);
+		cnic_free_dev(dev);
+	}
+}
+
+static int __init cnic_init(void)
+{
+	int rc = 0;
+
+	printk(KERN_INFO "%s", version);
+
+	rc = register_netdevice_notifier(&cnic_netdev_notifier);
+	if (rc) {
+		cnic_release();
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit cnic_exit(void)
+{
+	unregister_netdevice_notifier(&cnic_netdev_notifier);
+	cnic_release();
+	return;
+}
+
+module_init(cnic_init);
+module_exit(cnic_exit);
diff --git a/drivers/net/cnic.h b/drivers/net/cnic.h
new file mode 100644
index 0000000..5192d4a
--- /dev/null
+++ b/drivers/net/cnic.h
@@ -0,0 +1,299 @@
+/* cnic.h: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006-2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ */
+
+
+#ifndef CNIC_H
+#define CNIC_H
+
+#define KWQ_PAGE_CNT	4
+#define KCQ_PAGE_CNT	16
+
+#define KWQ_CID 		24
+#define KCQ_CID 		25
+
+/*
+ *	krnlq_context definition
+ */
+#define L5_KRNLQ_FLAGS	0x00000000
+#define L5_KRNLQ_SIZE	0x00000000
+#define L5_KRNLQ_TYPE	0x00000000
+#define KRNLQ_FLAGS_PG_SZ					(0xf<<0)
+#define KRNLQ_FLAGS_PG_SZ_256					(0<<0)
+#define KRNLQ_FLAGS_PG_SZ_512					(1<<0)
+#define KRNLQ_FLAGS_PG_SZ_1K					(2<<0)
+#define KRNLQ_FLAGS_PG_SZ_2K					(3<<0)
+#define KRNLQ_FLAGS_PG_SZ_4K					(4<<0)
+#define KRNLQ_FLAGS_PG_SZ_8K					(5<<0)
+#define KRNLQ_FLAGS_PG_SZ_16K					(6<<0)
+#define KRNLQ_FLAGS_PG_SZ_32K					(7<<0)
+#define KRNLQ_FLAGS_PG_SZ_64K					(8<<0)
+#define KRNLQ_FLAGS_PG_SZ_128K					(9<<0)
+#define KRNLQ_FLAGS_PG_SZ_256K					(10<<0)
+#define KRNLQ_FLAGS_PG_SZ_512K					(11<<0)
+#define KRNLQ_FLAGS_PG_SZ_1M					(12<<0)
+#define KRNLQ_FLAGS_PG_SZ_2M					(13<<0)
+#define KRNLQ_FLAGS_QE_SELF_SEQ					(1<<15)
+#define KRNLQ_SIZE_TYPE_SIZE	((((0x28 + 0x1f) & ~0x1f) / 0x20) << 16)
+#define KRNLQ_TYPE_TYPE						(0xf<<28)
+#define KRNLQ_TYPE_TYPE_EMPTY					(0<<28)
+#define KRNLQ_TYPE_TYPE_KRNLQ					(6<<28)
+
+#define L5_KRNLQ_HOST_QIDX		0x00000004
+#define L5_KRNLQ_HOST_FW_QIDX		0x00000008
+#define L5_KRNLQ_NX_QE_SELF_SEQ 	0x0000000c
+#define L5_KRNLQ_QE_SELF_SEQ_MAX	0x0000000c
+#define L5_KRNLQ_NX_QE_HADDR_HI 	0x00000010
+#define L5_KRNLQ_NX_QE_HADDR_LO 	0x00000014
+#define L5_KRNLQ_PGTBL_PGIDX		0x00000018
+#define L5_KRNLQ_NX_PG_QIDX 		0x00000018
+#define L5_KRNLQ_PGTBL_NPAGES		0x0000001c
+#define L5_KRNLQ_QIDX_INCR		0x0000001c
+#define L5_KRNLQ_PGTBL_HADDR_HI 	0x00000020
+#define L5_KRNLQ_PGTBL_HADDR_LO 	0x00000024
+
+#define BNX2_PG_CTX_MAP			0x1a0034
+#define BNX2_ISCSI_CTX_MAP		0x1a0074
+
+struct cnic_redirect_entry {
+	struct dst_entry *old_dst;
+	struct dst_entry *new_dst;
+};
+
+#define MAX_COMPLETED_KCQE	64
+
+#define MAX_CNIC_L5_CONTEXT	256
+
+#define MAX_CM_SK_TBL_SZ	MAX_CNIC_L5_CONTEXT
+
+#define MAX_ISCSI_TBL_SZ	256
+
+#define CNIC_LOCAL_PORT_MIN	60000
+#define CNIC_LOCAL_PORT_MAX	61000
+#define CNIC_LOCAL_PORT_RANGE	(CNIC_LOCAL_PORT_MAX - CNIC_LOCAL_PORT_MIN)
+
+#define KWQE_CNT (BCM_PAGE_SIZE / sizeof(struct kwqe))
+#define KCQE_CNT (BCM_PAGE_SIZE / sizeof(struct kcqe))
+#define MAX_KWQE_CNT (KWQE_CNT - 1)
+#define MAX_KCQE_CNT (KCQE_CNT - 1)
+
+#define MAX_KWQ_IDX	((KWQ_PAGE_CNT * KWQE_CNT) - 1)
+#define MAX_KCQ_IDX	((KCQ_PAGE_CNT * KCQE_CNT) - 1)
+
+#define KWQ_PG(x) (((x) & ~MAX_KWQE_CNT) >> (BCM_PAGE_BITS - 5))
+#define KWQ_IDX(x) ((x) & MAX_KWQE_CNT)
+
+#define KCQ_PG(x) (((x) & ~MAX_KCQE_CNT) >> (BCM_PAGE_BITS - 5))
+#define KCQ_IDX(x) ((x) & MAX_KCQE_CNT)
+
+#define BNX2X_NEXT_KCQE(x) (((x) & (MAX_KCQE_CNT - 1)) ==		\
+		(MAX_KCQE_CNT - 1)) ?					\
+		(x) + 2 : (x) + 1
+
+#define BNX2X_KWQ_DATA_PG(cp, x) ((x) / (cp)->kwq_16_data_pp)
+#define BNX2X_KWQ_DATA_IDX(cp, x) ((x) % (cp)->kwq_16_data_pp)
+#define BNX2X_KWQ_DATA(cp, x)						\
+	&(cp)->kwq_16_data[BNX2X_KWQ_DATA_PG(cp, x)][BNX2X_KWQ_DATA_IDX(cp, x)]
+
+#define DEF_IPID_COUNT		0xc001
+
+#define DEF_KA_TIMEOUT		10000
+#define DEF_KA_INTERVAL		300000
+#define DEF_KA_MAX_PROBE_COUNT	3
+#define DEF_TOS			0
+#define DEF_TTL			0xfe
+#define DEF_SND_SEQ_SCALE	0
+#define DEF_RCV_BUF		0xffff
+#define DEF_SND_BUF		0xffff
+#define DEF_SEED		0
+#define DEF_MAX_RT_TIME		500
+#define DEF_MAX_DA_COUNT	2
+#define DEF_SWS_TIMER		1000
+#define DEF_MAX_CWND		0xffff
+
+struct cnic_ctx {
+	u32		cid;
+	void		*ctx;
+	dma_addr_t	mapping;
+};
+
+#define BNX2_MAX_CID		0x2000
+
+struct cnic_dma {
+	int		num_pages;
+	void		**pg_arr;
+	dma_addr_t	*pg_map_arr;
+	int		pgtbl_size;
+	u32		*pgtbl;
+	dma_addr_t	pgtbl_map;
+};
+
+struct cnic_id_tbl {
+	spinlock_t	lock;
+	u32		start;
+	u32		max;
+	u32		next;
+	unsigned long	*table;
+};
+
+#define CNIC_KWQ16_DATA_SIZE	128
+
+struct kwqe_16_data {
+	u8	data[CNIC_KWQ16_DATA_SIZE];
+};
+
+struct cnic_iscsi {
+	struct cnic_dma		task_array_info;
+	struct cnic_dma		r2tq_info;
+	struct cnic_dma		hq_info;
+};
+
+struct cnic_context {
+	u32			cid;
+	struct kwqe_16_data	*kwqe_data;
+	dma_addr_t		kwqe_data_mapping;
+	wait_queue_head_t	waitq;
+	int			wait_cond;
+	unsigned long		timestamp;
+	u32			ctx_flags;
+#define	CTX_FL_OFFLD_START	0x00000001
+	u8			ulp_proto_id;
+	union {
+		struct cnic_iscsi	*iscsi;
+	} proto;
+};
+
+struct cnic_local {
+
+	spinlock_t cnic_ulp_lock;
+	void *ulp_handle[MAX_CNIC_ULP_TYPE];
+	unsigned long ulp_flags[MAX_CNIC_ULP_TYPE];
+#define ULP_F_INIT	0
+#define ULP_F_START	1
+	struct cnic_ulp_ops *ulp_ops[MAX_CNIC_ULP_TYPE];
+
+	/* protected by ulp_lock */
+	u32 cnic_local_flags;
+#define	CNIC_LCL_FL_KWQ_INIT	0x00000001
+
+	struct cnic_dev *dev;
+
+	struct cnic_eth_dev *ethdev;
+
+	void		*l2_ring;
+	dma_addr_t	l2_ring_map;
+	int		l2_ring_size;
+	int		l2_rx_ring_size;
+
+	void		*l2_buf;
+	dma_addr_t	l2_buf_map;
+	int		l2_buf_size;
+	int		l2_single_buf_size;
+
+	u16		*rx_cons_ptr;
+	u16		*tx_cons_ptr;
+	u16		rx_cons;
+	u16		tx_cons;
+
+	u32 kwq_cid_addr;
+	u32 kcq_cid_addr;
+
+	struct cnic_dma		kwq_info;
+	struct kwqe		**kwq;
+
+	struct cnic_dma		kwq_16_data_info;
+
+	u16		max_kwq_idx;
+
+	u16		kwq_prod_idx;
+	u32		kwq_io_addr;
+
+	u16		*kwq_con_idx_ptr;
+	u16		kwq_con_idx;
+
+	struct cnic_dma	kcq_info;
+	struct kcqe	**kcq;
+
+	u16		kcq_prod_idx;
+	u32		kcq_io_addr;
+
+	void				*status_blk;
+	struct status_block_msix	*bnx2_status_blk;
+	struct host_status_block	*bnx2x_status_blk;
+
+	u32				status_blk_num;
+	u32				int_num;
+	u32				last_status_idx;
+	struct tasklet_struct		cnic_irq_task;
+
+	struct kcqe		*completed_kcq[MAX_COMPLETED_KCQE];
+
+	struct cnic_sock	*csk_tbl;
+	struct cnic_id_tbl	csk_port_tbl;
+
+	struct cnic_dma		conn_buf_info;
+	struct cnic_dma		gbl_buf_info;
+
+	struct cnic_iscsi	*iscsi_tbl;
+	struct cnic_context	*ctx_tbl;
+	struct cnic_id_tbl	cid_tbl;
+	int			max_iscsi_conn;
+	atomic_t		iscsi_conn;
+
+	/* per connection parameters */
+	int			num_iscsi_tasks;
+	int			num_ccells;
+	int			task_array_size;
+	int			r2tq_size;
+	int			hq_size;
+	int			num_cqs;
+
+	struct cnic_ctx		*ctx_arr;
+	int			ctx_blks;
+	int			ctx_blk_size;
+	int			cids_per_blk;
+
+	u32			chip_id;
+	int			func;
+	u32			shmem_base;
+
+	u32			uio_dev;
+	struct uio_info		*cnic_uinfo;
+
+	struct cnic_ops		*cnic_ops;
+	int			(*start_hw)(struct cnic_dev *);
+	void			(*stop_hw)(struct cnic_dev *);
+	void			(*setup_pgtbl)(struct cnic_dev *,
+					       struct cnic_dma *);
+	int			(*alloc_resc)(struct cnic_dev *);
+	void			(*free_resc)(struct cnic_dev *);
+	int			(*start_cm)(struct cnic_dev *);
+	void			(*stop_cm)(struct cnic_dev *);
+	void			(*enable_int)(struct cnic_dev *);
+	void			(*disable_int_sync)(struct cnic_dev *);
+	void			(*ack_int)(struct cnic_dev *);
+	void			(*close_conn)(struct cnic_sock *, u32 opcode);
+	u16			(*next_idx)(u16);
+	u16			(*hw_idx)(u16);
+};
+
+struct bnx2x_bd_chain_next {
+	u32	addr_lo;
+	u32	addr_hi;
+	u8	reserved[8];
+};
+
+#define ISCSI_RAMROD_CMD_ID_UPDATE_CONN		(ISCSI_KCQE_OPCODE_UPDATE_CONN)
+#define ISCSI_RAMROD_CMD_ID_INIT		(ISCSI_KCQE_OPCODE_INIT)
+
+#define CDU_REGION_NUMBER_XCM_AG 2
+#define CDU_REGION_NUMBER_UCM_AG 4
+
+#endif
+
diff --git a/drivers/net/cnic_defs.h b/drivers/net/cnic_defs.h
new file mode 100644
index 0000000..cee80f6
--- /dev/null
+++ b/drivers/net/cnic_defs.h
@@ -0,0 +1,580 @@
+
+/* cnic.c: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006-2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ */
+
+#ifndef CNIC_DEFS_H
+#define CNIC_DEFS_H
+
+/* KWQ (kernel work queue) request op codes */
+#define L2_KWQE_OPCODE_VALUE_FLUSH                  (4)
+
+#define L4_KWQE_OPCODE_VALUE_CONNECT1               (50)
+#define L4_KWQE_OPCODE_VALUE_CONNECT2               (51)
+#define L4_KWQE_OPCODE_VALUE_CONNECT3               (52)
+#define L4_KWQE_OPCODE_VALUE_RESET                  (53)
+#define L4_KWQE_OPCODE_VALUE_CLOSE                  (54)
+#define L4_KWQE_OPCODE_VALUE_UPDATE_SECRET          (60)
+#define L4_KWQE_OPCODE_VALUE_INIT_ULP               (61)
+
+#define L4_KWQE_OPCODE_VALUE_OFFLOAD_PG             (1)
+#define L4_KWQE_OPCODE_VALUE_UPDATE_PG              (9)
+#define L4_KWQE_OPCODE_VALUE_UPLOAD_PG              (14)
+
+#define L5CM_RAMROD_CMD_ID_BASE			(0x80)
+#define L5CM_RAMROD_CMD_ID_TCP_CONNECT		(L5CM_RAMROD_CMD_ID_BASE + 3)
+#define L5CM_RAMROD_CMD_ID_CLOSE		(L5CM_RAMROD_CMD_ID_BASE + 12)
+#define L5CM_RAMROD_CMD_ID_ABORT		(L5CM_RAMROD_CMD_ID_BASE + 13)
+#define L5CM_RAMROD_CMD_ID_SEARCHER_DELETE	(L5CM_RAMROD_CMD_ID_BASE + 14)
+#define L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD	(L5CM_RAMROD_CMD_ID_BASE + 15)
+
+/* KCQ (kernel completion queue) response op codes */
+#define L4_KCQE_OPCODE_VALUE_CLOSE_COMP             (53)
+#define L4_KCQE_OPCODE_VALUE_RESET_COMP             (54)
+#define L4_KCQE_OPCODE_VALUE_FW_TCP_UPDATE          (55)
+#define L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE       (56)
+#define L4_KCQE_OPCODE_VALUE_RESET_RECEIVED         (57)
+#define L4_KCQE_OPCODE_VALUE_CLOSE_RECEIVED         (58)
+#define L4_KCQE_OPCODE_VALUE_INIT_ULP               (61)
+
+#define L4_KCQE_OPCODE_VALUE_OFFLOAD_PG             (1)
+#define L4_KCQE_OPCODE_VALUE_UPDATE_PG              (9)
+#define L4_KCQE_OPCODE_VALUE_UPLOAD_PG              (14)
+
+/* KCQ (kernel completion queue) completion status */
+#define L4_KCQE_COMPLETION_STATUS_SUCCESS		    (0)
+#define L4_KCQE_COMPLETION_STATUS_TIMEOUT        (0x93)
+
+#define L4_LAYER_CODE (4)
+#define L2_LAYER_CODE (2)
+
+/*
+ * L4 KCQ CQE
+ */
+struct l4_kcq {
+	u32 cid;
+	u32 pg_cid;
+	u32 conn_id;
+	u32 pg_host_opaque;
+#if defined(__BIG_ENDIAN)
+	u16 status;
+	u16 reserved1;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved1;
+	u16 status;
+#endif
+	u32 reserved2[2];
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KCQ_RESERVED3 (0x7<<0)
+#define L4_KCQ_RESERVED3_SHIFT 0
+#define L4_KCQ_RAMROD_COMPLETION (0x1<<3) /* Everest only */
+#define L4_KCQ_RAMROD_COMPLETION_SHIFT 3
+#define L4_KCQ_LAYER_CODE (0x7<<4)
+#define L4_KCQ_LAYER_CODE_SHIFT 4
+#define L4_KCQ_RESERVED4 (0x1<<7)
+#define L4_KCQ_RESERVED4_SHIFT 7
+	u8 op_code;
+	u16 qe_self_seq;
+#elif defined(__LITTLE_ENDIAN)
+	u16 qe_self_seq;
+	u8 op_code;
+	u8 flags;
+#define L4_KCQ_RESERVED3 (0xF<<0)
+#define L4_KCQ_RESERVED3_SHIFT 0
+#define L4_KCQ_RAMROD_COMPLETION (0x1<<3) /* Everest only */
+#define L4_KCQ_RAMROD_COMPLETION_SHIFT 3
+#define L4_KCQ_LAYER_CODE (0x7<<4)
+#define L4_KCQ_LAYER_CODE_SHIFT 4
+#define L4_KCQ_RESERVED4 (0x1<<7)
+#define L4_KCQ_RESERVED4_SHIFT 7
+#endif
+};
+
+
+/*
+ * L4 KCQ CQE PG upload
+ */
+struct l4_kcq_upload_pg {
+	u32 pg_cid;
+#if defined(__BIG_ENDIAN)
+	u16 pg_status;
+	u16 pg_ipid_count;
+#elif defined(__LITTLE_ENDIAN)
+	u16 pg_ipid_count;
+	u16 pg_status;
+#endif
+	u32 reserved1[5];
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KCQ_UPLOAD_PG_RESERVED3 (0xF<<0)
+#define L4_KCQ_UPLOAD_PG_RESERVED3_SHIFT 0
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KCQ_UPLOAD_PG_RESERVED4 (0x1<<7)
+#define L4_KCQ_UPLOAD_PG_RESERVED4_SHIFT 7
+	u8 op_code;
+	u16 qe_self_seq;
+#elif defined(__LITTLE_ENDIAN)
+	u16 qe_self_seq;
+	u8 op_code;
+	u8 flags;
+#define L4_KCQ_UPLOAD_PG_RESERVED3 (0xF<<0)
+#define L4_KCQ_UPLOAD_PG_RESERVED3_SHIFT 0
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KCQ_UPLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KCQ_UPLOAD_PG_RESERVED4 (0x1<<7)
+#define L4_KCQ_UPLOAD_PG_RESERVED4_SHIFT 7
+#endif
+};
+
+
+/*
+ * Gracefully close the connection request
+ */
+struct l4_kwq_close_req {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CLOSE_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_CLOSE_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CLOSE_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_CLOSE_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CLOSE_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CLOSE_REQ_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 reserved2[6];
+};
+
+
+/*
+ * The first request to be passed in order to establish connection in option2
+ */
+struct l4_kwq_connect_req1 {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ1_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ1_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u8 reserved0;
+	u8 conn_flags;
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_IP_V6 (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_IP_V6_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_RSRV (0x1F<<3)
+#define L4_KWQ_CONNECT_REQ1_RSRV_SHIFT 3
+#elif defined(__LITTLE_ENDIAN)
+	u8 conn_flags;
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_IS_PG_HOST_OPAQUE_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_IP_V6 (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_IP_V6_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_PASSIVE_FLAG_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_RSRV (0x1F<<3)
+#define L4_KWQ_CONNECT_REQ1_RSRV_SHIFT 3
+	u8 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ1_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ1_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ1_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ1_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 pg_cid;
+	u32 src_ip;
+	u32 dst_ip;
+#if defined(__BIG_ENDIAN)
+	u16 dst_port;
+	u16 src_port;
+#elif defined(__LITTLE_ENDIAN)
+	u16 src_port;
+	u16 dst_port;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 rsrv1[3];
+	u8 tcp_flags;
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP (0x1<<3)
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP_SHIFT 3
+#define L4_KWQ_CONNECT_REQ1_SACK (0x1<<4)
+#define L4_KWQ_CONNECT_REQ1_SACK_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING (0x1<<5)
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING_SHIFT 5
+#define L4_KWQ_CONNECT_REQ1_RESERVED2 (0x3<<6)
+#define L4_KWQ_CONNECT_REQ1_RESERVED2_SHIFT 6
+#elif defined(__LITTLE_ENDIAN)
+	u8 tcp_flags;
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK (0x1<<0)
+#define L4_KWQ_CONNECT_REQ1_NO_DELAY_ACK_SHIFT 0
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE (0x1<<1)
+#define L4_KWQ_CONNECT_REQ1_KEEP_ALIVE_SHIFT 1
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE (0x1<<2)
+#define L4_KWQ_CONNECT_REQ1_NAGLE_ENABLE_SHIFT 2
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP (0x1<<3)
+#define L4_KWQ_CONNECT_REQ1_TIME_STAMP_SHIFT 3
+#define L4_KWQ_CONNECT_REQ1_SACK (0x1<<4)
+#define L4_KWQ_CONNECT_REQ1_SACK_SHIFT 4
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING (0x1<<5)
+#define L4_KWQ_CONNECT_REQ1_SEG_SCALING_SHIFT 5
+#define L4_KWQ_CONNECT_REQ1_RESERVED2 (0x3<<6)
+#define L4_KWQ_CONNECT_REQ1_RESERVED2_SHIFT 6
+	u8 rsrv1[3];
+#endif
+	u32 rsrv2;
+};
+
+
+/*
+ * The second ( optional )request to be passed in order to establish
+ * connection in option2 - for IPv6 only
+ */
+struct l4_kwq_connect_req2 {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ2_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ2_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u8 reserved0;
+	u8 rsrv;
+#elif defined(__LITTLE_ENDIAN)
+	u8 rsrv;
+	u8 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ2_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ2_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ2_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ2_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 reserved2;
+	u32 src_ip_v6_2;
+	u32 src_ip_v6_3;
+	u32 src_ip_v6_4;
+	u32 dst_ip_v6_2;
+	u32 dst_ip_v6_3;
+	u32 dst_ip_v6_4;
+};
+
+
+/*
+ * The third ( and last )request to be passed in order to establish
+ * connection in option2
+ */
+struct l4_kwq_connect_req3 {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ3_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ3_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_CONNECT_REQ3_RESERVED1 (0xF<<0)
+#define L4_KWQ_CONNECT_REQ3_RESERVED1_SHIFT 0
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE (0x7<<4)
+#define L4_KWQ_CONNECT_REQ3_LAYER_CODE_SHIFT 4
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_CONNECT_REQ3_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 ka_timeout;
+	u32 ka_interval ;
+#if defined(__BIG_ENDIAN)
+	u8 snd_seq_scale;
+	u8 ttl;
+	u8 tos;
+	u8 ka_max_probe_count;
+#elif defined(__LITTLE_ENDIAN)
+	u8 ka_max_probe_count;
+	u8 tos;
+	u8 ttl;
+	u8 snd_seq_scale;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 pmtu;
+	u16 mss;
+#elif defined(__LITTLE_ENDIAN)
+	u16 mss;
+	u16 pmtu;
+#endif
+	u32 rcv_buf;
+	u32 snd_buf;
+	u32 seed;
+};
+
+
+/*
+ * a KWQE request to offload a PG connection
+ */
+struct l4_kwq_offload_pg {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_OFFLOAD_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_OFFLOAD_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_OFFLOAD_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_OFFLOAD_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_OFFLOAD_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_OFFLOAD_PG_LINKED_WITH_NEXT_SHIFT 7
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 l2hdr_nbytes;
+	u8 pg_flags;
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP (0x1<<0)
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING (0x1<<1)
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING_SHIFT 1
+#define L4_KWQ_OFFLOAD_PG_RESERVED2 (0x3F<<2)
+#define L4_KWQ_OFFLOAD_PG_RESERVED2_SHIFT 2
+	u8 da0;
+	u8 da1;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da1;
+	u8 da0;
+	u8 pg_flags;
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP (0x1<<0)
+#define L4_KWQ_OFFLOAD_PG_SNAP_ENCAP_SHIFT 0
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING (0x1<<1)
+#define L4_KWQ_OFFLOAD_PG_VLAN_TAGGING_SHIFT 1
+#define L4_KWQ_OFFLOAD_PG_RESERVED2 (0x3F<<2)
+#define L4_KWQ_OFFLOAD_PG_RESERVED2_SHIFT 2
+	u8 l2hdr_nbytes;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 da2;
+	u8 da3;
+	u8 da4;
+	u8 da5;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da5;
+	u8 da4;
+	u8 da3;
+	u8 da2;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 sa0;
+	u8 sa1;
+	u8 sa2;
+	u8 sa3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 sa3;
+	u8 sa2;
+	u8 sa1;
+	u8 sa0;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 sa4;
+	u8 sa5;
+	u16 etype;
+#elif defined(__LITTLE_ENDIAN)
+	u16 etype;
+	u8 sa5;
+	u8 sa4;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 vlan_tag;
+	u16 ipid_start;
+#elif defined(__LITTLE_ENDIAN)
+	u16 ipid_start;
+	u16 vlan_tag;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 ipid_count;
+	u16 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved3;
+	u16 ipid_count;
+#endif
+	u32 host_opaque;
+};
+
+
+/*
+ * Abortively close the connection request
+ */
+struct l4_kwq_reset_req {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_RESET_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_RESET_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_RESET_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_RESET_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT_SHIFT 7
+	u8 op_code;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_code;
+	u8 flags;
+#define L4_KWQ_RESET_REQ_RESERVED1 (0xF<<0)
+#define L4_KWQ_RESET_REQ_RESERVED1_SHIFT 0
+#define L4_KWQ_RESET_REQ_LAYER_CODE (0x7<<4)
+#define L4_KWQ_RESET_REQ_LAYER_CODE_SHIFT 4
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_RESET_REQ_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 reserved2[6];
+};
+
+
+/*
+ * a KWQE request to update a PG connection
+ */
+struct l4_kwq_update_pg {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_UPDATE_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPDATE_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_UPDATE_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPDATE_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT_SHIFT 7
+	u8 opcode;
+	u16 oper16;
+#elif defined(__LITTLE_ENDIAN)
+	u16 oper16;
+	u8 opcode;
+	u8 flags;
+#define L4_KWQ_UPDATE_PG_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPDATE_PG_RESERVED1_SHIFT 0
+#define L4_KWQ_UPDATE_PG_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPDATE_PG_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPDATE_PG_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 pg_cid;
+	u32 pg_host_opaque;
+#if defined(__BIG_ENDIAN)
+	u8 pg_valids;
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT (0x1<<0)
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT_SHIFT 0
+#define L4_KWQ_UPDATE_PG_VALIDS_DA (0x1<<1)
+#define L4_KWQ_UPDATE_PG_VALIDS_DA_SHIFT 1
+#define L4_KWQ_UPDATE_PG_RESERVERD2 (0x3F<<2)
+#define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2
+	u8 pg_unused_a;
+	u16 pg_ipid_count;
+#elif defined(__LITTLE_ENDIAN)
+	u16 pg_ipid_count;
+	u8 pg_unused_a;
+	u8 pg_valids;
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT (0x1<<0)
+#define L4_KWQ_UPDATE_PG_VALIDS_IPID_COUNT_SHIFT 0
+#define L4_KWQ_UPDATE_PG_VALIDS_DA (0x1<<1)
+#define L4_KWQ_UPDATE_PG_VALIDS_DA_SHIFT 1
+#define L4_KWQ_UPDATE_PG_RESERVERD2 (0x3F<<2)
+#define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 reserverd3;
+	u8 da0;
+	u8 da1;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da1;
+	u8 da0;
+	u16 reserverd3;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 da2;
+	u8 da3;
+	u8 da4;
+	u8 da5;
+#elif defined(__LITTLE_ENDIAN)
+	u8 da5;
+	u8 da4;
+	u8 da3;
+	u8 da2;
+#endif
+	u32 reserved4;
+	u32 reserved5;
+};
+
+
+/*
+ * a KWQE request to upload a PG or L4 context
+ */
+struct l4_kwq_upload {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define L4_KWQ_UPLOAD_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPLOAD_RESERVED1_SHIFT 0
+#define L4_KWQ_UPLOAD_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPLOAD_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT_SHIFT 7
+	u8 opcode;
+	u16 oper16;
+#elif defined(__LITTLE_ENDIAN)
+	u16 oper16;
+	u8 opcode;
+	u8 flags;
+#define L4_KWQ_UPLOAD_RESERVED1 (0xF<<0)
+#define L4_KWQ_UPLOAD_RESERVED1_SHIFT 0
+#define L4_KWQ_UPLOAD_LAYER_CODE (0x7<<4)
+#define L4_KWQ_UPLOAD_LAYER_CODE_SHIFT 4
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT (0x1<<7)
+#define L4_KWQ_UPLOAD_LINKED_WITH_NEXT_SHIFT 7
+#endif
+	u32 cid;
+	u32 reserved2[6];
+};
+
+#endif /* CNIC_DEFS_H */
diff --git a/drivers/net/cnic_if.h b/drivers/net/cnic_if.h
new file mode 100644
index 0000000..0638096
--- /dev/null
+++ b/drivers/net/cnic_if.h
@@ -0,0 +1,299 @@
+/* cnic_if.h: Broadcom CNIC core network driver.
+ *
+ * Copyright (c) 2006 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ */
+
+
+#ifndef CNIC_IF_H
+#define CNIC_IF_H
+
+#define CNIC_MODULE_VERSION	"2.0.0"
+#define CNIC_MODULE_RELDATE	"May 21, 2009"
+
+#define CNIC_ULP_RDMA		0
+#define CNIC_ULP_ISCSI		1
+#define CNIC_ULP_L4		2
+#define MAX_CNIC_ULP_TYPE_EXT	2
+#define MAX_CNIC_ULP_TYPE	3
+
+struct kwqe {
+	u32 kwqe_op_flag;
+
+#define KWQE_OPCODE_MASK	0x00ff0000
+#define KWQE_OPCODE_SHIFT	16
+#define KWQE_FLAGS_LAYER_SHIFT	28
+#define KWQE_OPCODE(x)		((x & KWQE_OPCODE_MASK) >> KWQE_OPCODE_SHIFT)
+
+	u32 kwqe_info0;
+	u32 kwqe_info1;
+	u32 kwqe_info2;
+	u32 kwqe_info3;
+	u32 kwqe_info4;
+	u32 kwqe_info5;
+	u32 kwqe_info6;
+};
+
+struct kwqe_16 {
+	u32 kwqe_info0;
+	u32 kwqe_info1;
+	u32 kwqe_info2;
+	u32 kwqe_info3;
+};
+
+struct kcqe {
+	u32 kcqe_info0;
+	u32 kcqe_info1;
+	u32 kcqe_info2;
+	u32 kcqe_info3;
+	u32 kcqe_info4;
+	u32 kcqe_info5;
+	u32 kcqe_info6;
+	u32 kcqe_op_flag;
+		#define KCQE_RAMROD_COMPLETION		(0x1<<27) /* Everest */
+		#define KCQE_FLAGS_LAYER_MASK		(0x7<<28)
+		#define KCQE_FLAGS_LAYER_MASK_MISC	(0<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L2	(2<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L3	(3<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L4	(4<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L5_RDMA	(5<<28)
+		#define KCQE_FLAGS_LAYER_MASK_L5_ISCSI	(6<<28)
+		#define KCQE_FLAGS_NEXT 		(1<<31)
+		#define KCQE_FLAGS_OPCODE_MASK		(0xff<<16)
+		#define KCQE_FLAGS_OPCODE_SHIFT		(16)
+		#define KCQE_OPCODE(op)			\
+		(((op) & KCQE_FLAGS_OPCODE_MASK) >> KCQE_FLAGS_OPCODE_SHIFT)
+};
+
+#define MAX_CNIC_CTL_DATA	64
+#define MAX_DRV_CTL_DATA	64
+
+#define CNIC_CTL_STOP_CMD		1
+#define CNIC_CTL_START_CMD		2
+#define CNIC_CTL_COMPLETION_CMD		3
+
+#define DRV_CTL_IO_WR_CMD		0x101
+#define DRV_CTL_IO_RD_CMD		0x102
+#define DRV_CTL_CTX_WR_CMD		0x103
+#define DRV_CTL_CTXTBL_WR_CMD		0x104
+#define DRV_CTL_COMPLETION_CMD		0x105
+
+struct cnic_ctl_completion {
+	u32	cid;
+};
+
+struct drv_ctl_completion {
+	u32	comp_count;
+};
+
+struct cnic_ctl_info {
+	int	cmd;
+	union {
+		struct cnic_ctl_completion comp;
+		char bytes[MAX_CNIC_CTL_DATA];
+	} data;
+};
+
+struct drv_ctl_io {
+	u32		cid_addr;
+	u32		offset;
+	u32		data;
+	dma_addr_t	dma_addr;
+};
+
+struct drv_ctl_info {
+	int	cmd;
+	union {
+		struct drv_ctl_completion comp;
+		struct drv_ctl_io io;
+		char bytes[MAX_DRV_CTL_DATA];
+	} data;
+};
+
+struct cnic_ops {
+	struct module	*cnic_owner;
+	/* Calls to these functions are protected by RCU.  When
+	 * unregistering, we wait for any calls to complete before
+	 * continuing.
+	 */
+	int		(*cnic_handler)(void *, void *);
+	int		(*cnic_ctl)(void *, struct cnic_ctl_info *);
+};
+
+#define MAX_CNIC_VEC	8
+
+struct cnic_irq {
+	unsigned int	vector;
+	void		*status_blk;
+	u32		status_blk_num;
+	u32		irq_flags;
+#define CNIC_IRQ_FL_MSIX		0x00000001
+};
+
+struct cnic_eth_dev {
+	struct module	*drv_owner;
+	u32		drv_state;
+#define CNIC_DRV_STATE_REGD		0x00000001
+#define CNIC_DRV_STATE_USING_MSIX	0x00000002
+	u32		chip_id;
+	u32		max_kwqe_pending;
+	struct pci_dev	*pdev;
+	void __iomem	*io_base;
+
+	u32		ctx_tbl_offset;
+	u32		ctx_tbl_len;
+	int		ctx_blk_size;
+	u32		starting_cid;
+	u32		max_iscsi_conn;
+	u32		max_fcoe_conn;
+	u32		max_rdma_conn;
+	u32		reserved0[2];
+
+	int		num_irq;
+	struct cnic_irq	irq_arr[MAX_CNIC_VEC];
+	int		(*drv_register_cnic)(struct net_device *,
+					     struct cnic_ops *, void *);
+	int		(*drv_unregister_cnic)(struct net_device *);
+	int		(*drv_submit_kwqes_32)(struct net_device *,
+					       struct kwqe *[], u32);
+	int		(*drv_submit_kwqes_16)(struct net_device *,
+					       struct kwqe_16 *[], u32);
+	int		(*drv_ctl)(struct net_device *, struct drv_ctl_info *);
+	unsigned long	reserved1[2];
+};
+
+struct cnic_sockaddr {
+	union {
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} local;
+	union {
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} remote;
+};
+
+struct cnic_sock {
+	struct cnic_dev *dev;
+	void	*context;
+	u32	src_ip[4];
+	u32	dst_ip[4];
+	u16	src_port;
+	u16	dst_port;
+	u16	vlan_id;
+	unsigned char old_ha[6];
+	unsigned char ha[6];
+	u32	mtu;
+	u32	cid;
+	u32	l5_cid;
+	u32	pg_cid;
+	int	ulp_type;
+
+	u32	ka_timeout;
+	u32	ka_interval;
+	u8	ka_max_probe_count;
+	u8	tos;
+	u8	ttl;
+	u8	snd_seq_scale;
+	u32	rcv_buf;
+	u32	snd_buf;
+	u32	seed;
+
+	unsigned long	tcp_flags;
+#define SK_TCP_NO_DELAY_ACK	0x1
+#define SK_TCP_KEEP_ALIVE	0x2
+#define SK_TCP_NAGLE		0x4
+#define SK_TCP_TIMESTAMP	0x8
+#define SK_TCP_SACK		0x10
+#define SK_TCP_SEG_SCALING	0x20
+	unsigned long	flags;
+#define SK_F_INUSE		0
+#define SK_F_OFFLD_COMPLETE	1
+#define SK_F_OFFLD_SCHED	2
+#define SK_F_PG_OFFLD_COMPLETE	3
+#define SK_F_CONNECT_START	4
+#define SK_F_IPV6		5
+#define SK_F_CLOSING		7
+
+	atomic_t ref_count;
+	u32 state;
+	struct kwqe kwqe1;
+	struct kwqe kwqe2;
+	struct kwqe kwqe3;
+};
+
+struct cnic_dev {
+	struct net_device	*netdev;
+	struct pci_dev		*pcidev;
+	void __iomem		*regview;
+	struct list_head	list;
+
+	int (*register_device)(struct cnic_dev *dev, int ulp_type,
+			       void *ulp_ctx);
+	int (*unregister_device)(struct cnic_dev *dev, int ulp_type);
+	int (*submit_kwqes)(struct cnic_dev *dev, struct kwqe *wqes[],
+				u32 num_wqes);
+	int (*submit_kwqes_16)(struct cnic_dev *dev, struct kwqe_16 *wqes[],
+				u32 num_wqes);
+
+	int (*cm_create)(struct cnic_dev *, int, u32, u32, struct cnic_sock **,
+			 void *);
+	int (*cm_destroy)(struct cnic_sock *);
+	int (*cm_connect)(struct cnic_sock *, struct cnic_sockaddr *);
+	int (*cm_abort)(struct cnic_sock *);
+	int (*cm_close)(struct cnic_sock *);
+	struct cnic_dev *(*cm_select_dev)(struct sockaddr_in *, int ulp_type);
+	int (*iscsi_nl_msg_recv)(struct cnic_dev *dev, u32 msg_type,
+				 char *data, u16 data_size);
+	unsigned long	flags;
+#define CNIC_F_CNIC_UP		1
+#define CNIC_F_BNX2_CLASS	3
+#define CNIC_F_BNX2X_CLASS	4
+	atomic_t	ref_count;
+	u8		mac_addr[6];
+
+	int		max_iscsi_conn;
+	int		max_fcoe_conn;
+	int		max_rdma_conn;
+
+	void		*cnic_priv;
+};
+
+#define CNIC_WR(dev, off, val)		writel(val, dev->regview + off)
+#define CNIC_WR16(dev, off, val)	writew(val, dev->regview + off)
+#define CNIC_WR8(dev, off, val)		writeb(val, dev->regview + off)
+#define CNIC_RD(dev, off)		readl(dev->regview + off)
+#define CNIC_RD16(dev, off)		readw(dev->regview + off)
+
+struct cnic_ulp_ops {
+	/* Calls to these functions are protected by RCU.  When
+	 * unregistering, we wait for any calls to complete before
+	 * continuing.
+	 */
+
+	void (*cnic_init)(struct cnic_dev *dev);
+	void (*cnic_exit)(struct cnic_dev *dev);
+	void (*cnic_start)(void *ulp_ctx);
+	void (*cnic_stop)(void *ulp_ctx);
+	void (*indicate_kcqes)(void *ulp_ctx, struct kcqe *cqes[],
+				u32 num_cqes);
+	void (*indicate_netevent)(void *ulp_ctx, unsigned long event);
+	void (*cm_connect_complete)(struct cnic_sock *);
+	void (*cm_close_complete)(struct cnic_sock *);
+	void (*cm_abort_complete)(struct cnic_sock *);
+	void (*cm_remote_close)(struct cnic_sock *);
+	void (*cm_remote_abort)(struct cnic_sock *);
+	void (*iscsi_nl_send_msg)(struct cnic_dev *dev, u32 msg_type,
+				  char *data, u16 data_size);
+	struct module *owner;
+};
+
+extern int cnic_register_driver(int ulp_type, struct cnic_ulp_ops *ulp_ops);
+
+extern int cnic_unregister_driver(int ulp_type);
+
+#endif
-- 
cgit v0.10.2


From cf4e6363859d30f24f8cd3e8930dbff399cc3550 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 8 Jun 2009 18:14:44 -0700
Subject: [SCSI] bnx2i: Add bnx2i iSCSI driver.

New iSCSI driver for Broadcom BNX2 devices.  The driver interfaces with
the CNIC driver to access the hardware.

Signed-off-by: Anil Veerabhadrappa <anilgv@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 759e150..6a19ed9 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -354,6 +354,7 @@ config ISCSI_TCP
 	 http://open-iscsi.org
 
 source "drivers/scsi/cxgb3i/Kconfig"
+source "drivers/scsi/bnx2i/Kconfig"
 
 config SGIWD93_SCSI
 	tristate "SGI WD93C93 SCSI Driver"
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 8795c30..25429ea 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -129,6 +129,7 @@ obj-$(CONFIG_SCSI_STEX)		+= stex.o
 obj-$(CONFIG_SCSI_MVSAS)	+= mvsas/
 obj-$(CONFIG_PS3_ROM)		+= ps3rom.o
 obj-$(CONFIG_SCSI_CXGB3_ISCSI)	+= libiscsi.o libiscsi_tcp.o cxgb3i/
+obj-$(CONFIG_SCSI_BNX2_ISCSI)	+= libiscsi.o bnx2i/
 
 obj-$(CONFIG_ARM)		+= arm/
 
diff --git a/drivers/scsi/bnx2i/57xx_iscsi_constants.h b/drivers/scsi/bnx2i/57xx_iscsi_constants.h
new file mode 100644
index 0000000..2fceb19
--- /dev/null
+++ b/drivers/scsi/bnx2i/57xx_iscsi_constants.h
@@ -0,0 +1,155 @@
+/* 57xx_iscsi_constants.h: Broadcom NetXtreme II iSCSI HSI
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+#ifndef __57XX_ISCSI_CONSTANTS_H_
+#define __57XX_ISCSI_CONSTANTS_H_
+
+/**
+* This file defines HSI constants for the iSCSI flows
+*/
+
+/* iSCSI request op codes */
+#define ISCSI_OPCODE_CLEANUP_REQUEST    (7)
+
+/* iSCSI response/messages op codes */
+#define ISCSI_OPCODE_CLEANUP_RESPONSE 		(0x27)
+#define ISCSI_OPCODE_NOPOUT_LOCAL_COMPLETION    (0)
+
+/* iSCSI task types */
+#define ISCSI_TASK_TYPE_READ    (0)
+#define ISCSI_TASK_TYPE_WRITE   (1)
+#define ISCSI_TASK_TYPE_MPATH   (2)
+
+/* initial CQ sequence numbers */
+#define ISCSI_INITIAL_SN    (1)
+
+/* KWQ (kernel work queue) layer codes */
+#define ISCSI_KWQE_LAYER_CODE   (6)
+
+/* KWQ (kernel work queue) request op codes */
+#define ISCSI_KWQE_OPCODE_OFFLOAD_CONN1 (0)
+#define ISCSI_KWQE_OPCODE_OFFLOAD_CONN2 (1)
+#define ISCSI_KWQE_OPCODE_UPDATE_CONN   (2)
+#define ISCSI_KWQE_OPCODE_DESTROY_CONN  (3)
+#define ISCSI_KWQE_OPCODE_INIT1         (4)
+#define ISCSI_KWQE_OPCODE_INIT2         (5)
+
+/* KCQ (kernel completion queue) response op codes */
+#define ISCSI_KCQE_OPCODE_OFFLOAD_CONN  (0x10)
+#define ISCSI_KCQE_OPCODE_UPDATE_CONN   (0x12)
+#define ISCSI_KCQE_OPCODE_DESTROY_CONN  (0x13)
+#define ISCSI_KCQE_OPCODE_INIT          (0x14)
+#define ISCSI_KCQE_OPCODE_FW_CLEAN_TASK	(0x15)
+#define ISCSI_KCQE_OPCODE_TCP_RESET     (0x16)
+#define ISCSI_KCQE_OPCODE_TCP_SYN       (0x17)
+#define ISCSI_KCQE_OPCODE_TCP_FIN       (0X18)
+#define ISCSI_KCQE_OPCODE_TCP_ERROR     (0x19)
+#define ISCSI_KCQE_OPCODE_CQ_EVENT_NOTIFICATION (0x20)
+#define ISCSI_KCQE_OPCODE_ISCSI_ERROR   (0x21)
+
+/* KCQ (kernel completion queue) completion status */
+#define ISCSI_KCQE_COMPLETION_STATUS_SUCCESS                            (0x0)
+#define ISCSI_KCQE_COMPLETION_STATUS_INVALID_OPCODE                     (0x1)
+#define ISCSI_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAILURE                  (0x2)
+#define ISCSI_KCQE_COMPLETION_STATUS_CTX_FREE_FAILURE                   (0x3)
+#define ISCSI_KCQE_COMPLETION_STATUS_NIC_ERROR                          (0x4)
+
+#define ISCSI_KCQE_COMPLETION_STATUS_HDR_DIG_ERR                        (0x5)
+#define ISCSI_KCQE_COMPLETION_STATUS_DATA_DIG_ERR                       (0x6)
+
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_UNEXPECTED_OPCODE     (0xa)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_OPCODE                (0xb)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_AHS_LEN               (0xc)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ITT                   (0xd)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_STATSN                (0xe)
+
+/* Response */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_EXP_DATASN            (0xf)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T              (0x10)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_IS_ZERO  (0x2c)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_TOO_BIG  (0x2d)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_0                 (0x11)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_1                 (0x12)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_2                 (0x13)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_3                 (0x14)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_4                 (0x15)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_5                 (0x16)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_6                 (0x17)
+
+/* Data-In */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_RCV_LEN        (0x18)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_RCV_PDU_LEN       (0x19)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_F_BIT_ZERO            (0x1a)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_NOT_RSRV          (0x1b)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATASN                (0x1c)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_BURST_LEN      (0x1d)
+
+/* R2T */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_BUFFER_OFF            (0x1f)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN                   (0x20)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_R2TSN                 (0x21)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0 (0x22)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1 (0x23)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_EXCEED       (0x24)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_IS_RSRV           (0x25)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_BURST_LEN         (0x26)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_NOT_ZERO (0x27)
+
+/* TMF */
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REJECT_PDU_LEN        (0x28)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ASYNC_PDU_LEN         (0x29)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_NOPIN_PDU_LEN         (0x2a)
+#define ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_IN_CLEANUP   (0x2b)
+
+/* IP/TCP processing errors: */
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_FRAGMENT               (0x40)
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_OPTIONS                (0x41)
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_URGENT_FLAG               (0x42)
+#define ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_MAX_RTRANS                (0x43)
+
+/* iSCSI licensing errors */
+/* general iSCSI license not installed */
+#define ISCSI_KCQE_COMPLETION_STATUS_ISCSI_NOT_SUPPORTED                (0x50)
+/* additional LOM specific iSCSI license not installed */
+#define ISCSI_KCQE_COMPLETION_STATUS_LOM_ISCSI_NOT_ENABLED              (0x51)
+
+/* SQ/RQ/CQ DB structure sizes */
+#define ISCSI_SQ_DB_SIZE    (16)
+#define ISCSI_RQ_DB_SIZE    (16)
+#define ISCSI_CQ_DB_SIZE    (80)
+
+#define ISCSI_SQN_TO_NOTIFY_NOT_VALID                                   0xFFFF
+
+/* Page size codes (for flags field in connection offload request) */
+#define ISCSI_PAGE_SIZE_256     (0)
+#define ISCSI_PAGE_SIZE_512     (1)
+#define ISCSI_PAGE_SIZE_1K      (2)
+#define ISCSI_PAGE_SIZE_2K      (3)
+#define ISCSI_PAGE_SIZE_4K      (4)
+#define ISCSI_PAGE_SIZE_8K      (5)
+#define ISCSI_PAGE_SIZE_16K     (6)
+#define ISCSI_PAGE_SIZE_32K     (7)
+#define ISCSI_PAGE_SIZE_64K     (8)
+#define ISCSI_PAGE_SIZE_128K    (9)
+#define ISCSI_PAGE_SIZE_256K    (10)
+#define ISCSI_PAGE_SIZE_512K    (11)
+#define ISCSI_PAGE_SIZE_1M      (12)
+#define ISCSI_PAGE_SIZE_2M      (13)
+#define ISCSI_PAGE_SIZE_4M      (14)
+#define ISCSI_PAGE_SIZE_8M      (15)
+
+/* Iscsi PDU related defines */
+#define ISCSI_HEADER_SIZE   (48)
+#define ISCSI_DIGEST_SHIFT  (2)
+#define ISCSI_DIGEST_SIZE   (4)
+
+#define B577XX_ISCSI_CONNECTION_TYPE    3
+
+#endif /*__57XX_ISCSI_CONSTANTS_H_ */
diff --git a/drivers/scsi/bnx2i/57xx_iscsi_hsi.h b/drivers/scsi/bnx2i/57xx_iscsi_hsi.h
new file mode 100644
index 0000000..36af1af
--- /dev/null
+++ b/drivers/scsi/bnx2i/57xx_iscsi_hsi.h
@@ -0,0 +1,1509 @@
+/* 57xx_iscsi_hsi.h: Broadcom NetXtreme II iSCSI HSI.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+#ifndef __57XX_ISCSI_HSI_LINUX_LE__
+#define __57XX_ISCSI_HSI_LINUX_LE__
+
+/*
+ * iSCSI Async CQE
+ */
+struct bnx2i_async_msg {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved3[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved5;
+	u8 err_code;
+	u8 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved4;
+	u8 err_code;
+	u16 reserved5;
+#endif
+	u32 reserved6;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u8 async_event;
+	u8 async_vcode;
+	u16 param1;
+#elif defined(__LITTLE_ENDIAN)
+	u16 param1;
+	u8 async_vcode;
+	u8 async_event;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 param2;
+	u16 param3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 param3;
+	u16 param2;
+#endif
+	u32 reserved7[3];
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI Buffer Descriptor (BD)
+ */
+struct iscsi_bd {
+	u32 buffer_addr_hi;
+	u32 buffer_addr_lo;
+#if defined(__BIG_ENDIAN)
+	u16 reserved0;
+	u16 buffer_length;
+#elif defined(__LITTLE_ENDIAN)
+	u16 buffer_length;
+	u16 reserved0;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u16 flags;
+#define ISCSI_BD_RESERVED1 (0x3F<<0)
+#define ISCSI_BD_RESERVED1_SHIFT 0
+#define ISCSI_BD_LAST_IN_BD_CHAIN (0x1<<6)
+#define ISCSI_BD_LAST_IN_BD_CHAIN_SHIFT 6
+#define ISCSI_BD_FIRST_IN_BD_CHAIN (0x1<<7)
+#define ISCSI_BD_FIRST_IN_BD_CHAIN_SHIFT 7
+#define ISCSI_BD_RESERVED2 (0xFF<<8)
+#define ISCSI_BD_RESERVED2_SHIFT 8
+#elif defined(__LITTLE_ENDIAN)
+	u16 flags;
+#define ISCSI_BD_RESERVED1 (0x3F<<0)
+#define ISCSI_BD_RESERVED1_SHIFT 0
+#define ISCSI_BD_LAST_IN_BD_CHAIN (0x1<<6)
+#define ISCSI_BD_LAST_IN_BD_CHAIN_SHIFT 6
+#define ISCSI_BD_FIRST_IN_BD_CHAIN (0x1<<7)
+#define ISCSI_BD_FIRST_IN_BD_CHAIN_SHIFT 7
+#define ISCSI_BD_RESERVED2 (0xFF<<8)
+#define ISCSI_BD_RESERVED2_SHIFT 8
+	u16 reserved3;
+#endif
+};
+
+
+/*
+ * iSCSI Cleanup SQ WQE
+ */
+struct bnx2i_cleanup_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2[3];
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u16 itt;
+#define ISCSI_CLEANUP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CLEANUP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_REQUEST_TYPE_SHIFT 14
+	u16 reserved3;
+#endif
+	u32 reserved4[10];
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved6;
+	u16 reserved5;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved5;
+	u8 reserved6;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * iSCSI Cleanup CQE
+ */
+struct bnx2i_cleanup_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 status;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 status;
+	u8 op_code;
+#endif
+	u32 reserved1[3];
+	u32 reserved2[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5[7];
+#if defined(__BIG_ENDIAN)
+	u16 reserved6;
+	u16 itt;
+#define ISCSI_CLEANUP_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CLEANUP_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CLEANUP_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CLEANUP_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CLEANUP_RESPONSE_TYPE_SHIFT 14
+	u16 reserved6;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * SCSI read/write SQ WQE
+ */
+struct bnx2i_cmd_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_CMD_REQUEST_TASK_ATTR (0x7<<0)
+#define ISCSI_CMD_REQUEST_TASK_ATTR_SHIFT 0
+#define ISCSI_CMD_REQUEST_RESERVED1 (0x3<<3)
+#define ISCSI_CMD_REQUEST_RESERVED1_SHIFT 3
+#define ISCSI_CMD_REQUEST_WRITE (0x1<<5)
+#define ISCSI_CMD_REQUEST_WRITE_SHIFT 5
+#define ISCSI_CMD_REQUEST_READ (0x1<<6)
+#define ISCSI_CMD_REQUEST_READ_SHIFT 6
+#define ISCSI_CMD_REQUEST_FINAL (0x1<<7)
+#define ISCSI_CMD_REQUEST_FINAL_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_CMD_REQUEST_TASK_ATTR (0x7<<0)
+#define ISCSI_CMD_REQUEST_TASK_ATTR_SHIFT 0
+#define ISCSI_CMD_REQUEST_RESERVED1 (0x3<<3)
+#define ISCSI_CMD_REQUEST_RESERVED1_SHIFT 3
+#define ISCSI_CMD_REQUEST_WRITE (0x1<<5)
+#define ISCSI_CMD_REQUEST_WRITE_SHIFT 5
+#define ISCSI_CMD_REQUEST_READ (0x1<<6)
+#define ISCSI_CMD_REQUEST_READ_SHIFT 6
+#define ISCSI_CMD_REQUEST_FINAL (0x1<<7)
+#define ISCSI_CMD_REQUEST_FINAL_SHIFT 7
+	u8 op_code;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 ud_buffer_offset;
+	u16 sd_buffer_offset;
+#elif defined(__LITTLE_ENDIAN)
+	u16 sd_buffer_offset;
+	u16 ud_buffer_offset;
+#endif
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_CMD_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CMD_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CMD_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CMD_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_REQUEST_INDEX_SHIFT 0
+#define ISCSI_CMD_REQUEST_TYPE (0x3<<14)
+#define ISCSI_CMD_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+	u32 total_data_transfer_length;
+	u32 cmd_sn;
+	u32 reserved3;
+	u32 cdb[4];
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 sd_start_bd_index;
+	u8 ud_start_bd_index;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 ud_start_bd_index;
+	u8 sd_start_bd_index;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * task statistics for write response
+ */
+struct bnx2i_write_resp_task_stat {
+	u32 num_data_ins;
+};
+
+/*
+ * task statistics for read response
+ */
+struct bnx2i_read_resp_task_stat {
+#if defined(__BIG_ENDIAN)
+	u16 num_data_outs;
+	u16 num_r2ts;
+#elif defined(__LITTLE_ENDIAN)
+	u16 num_r2ts;
+	u16 num_data_outs;
+#endif
+};
+
+/*
+ * task statistics for iSCSI cmd response
+ */
+union bnx2i_cmd_resp_task_stat {
+	struct bnx2i_write_resp_task_stat write_stat;
+	struct bnx2i_read_resp_task_stat read_stat;
+};
+
+/*
+ * SCSI Command CQE
+ */
+struct bnx2i_cmd_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 response_flags;
+#define ISCSI_CMD_RESPONSE_RESERVED0 (0x1<<0)
+#define ISCSI_CMD_RESPONSE_RESERVED0_SHIFT 0
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW (0x1<<1)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW_SHIFT 1
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW (0x1<<2)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW_SHIFT 2
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW (0x1<<3)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW_SHIFT 3
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW (0x1<<4)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW_SHIFT 4
+#define ISCSI_CMD_RESPONSE_RESERVED1 (0x7<<5)
+#define ISCSI_CMD_RESPONSE_RESERVED1_SHIFT 5
+	u8 response;
+	u8 status;
+#elif defined(__LITTLE_ENDIAN)
+	u8 status;
+	u8 response;
+	u8 response_flags;
+#define ISCSI_CMD_RESPONSE_RESERVED0 (0x1<<0)
+#define ISCSI_CMD_RESPONSE_RESERVED0_SHIFT 0
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW (0x1<<1)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_UNDERFLOW_SHIFT 1
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW (0x1<<2)
+#define ISCSI_CMD_RESPONSE_RESIDUAL_OVERFLOW_SHIFT 2
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW (0x1<<3)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_UNDERFLOW_SHIFT 3
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW (0x1<<4)
+#define ISCSI_CMD_RESPONSE_BR_RESIDUAL_OVERFLOW_SHIFT 4
+#define ISCSI_CMD_RESPONSE_RESERVED1 (0x7<<5)
+#define ISCSI_CMD_RESPONSE_RESERVED1_SHIFT 5
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved2;
+	u32 residual_count;
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5[5];
+	union bnx2i_cmd_resp_task_stat task_stat;
+	u32 reserved6;
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_CMD_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CMD_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CMD_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_CMD_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_CMD_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_CMD_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_CMD_RESPONSE_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+
+
+/*
+ * firmware middle-path request SQ WQE
+ */
+struct bnx2i_fw_mp_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+	u16 hdr_opaque1;
+#elif defined(__LITTLE_ENDIAN)
+	u16 hdr_opaque1;
+	u8 op_attr;
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 hdr_opaque2[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved0;
+	u16 itt;
+#define ISCSI_FW_MP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_FW_MP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_FW_MP_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_FW_MP_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_FW_MP_REQUEST_INDEX_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_TYPE (0x3<<14)
+#define ISCSI_FW_MP_REQUEST_TYPE_SHIFT 14
+	u16 reserved0;
+#endif
+	u32 hdr_opaque3[4];
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_FW_MP_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_FW_MP_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_FW_MP_REQUEST_NUM_RESP_BDS_SHIFT 24
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 reserved3;
+	u8 flags;
+#define ISCSI_FW_MP_REQUEST_RESERVED1 (0x1<<0)
+#define ISCSI_FW_MP_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_FW_MP_REQUEST_RESERVED2 (0x1F<<3)
+#define ISCSI_FW_MP_REQUEST_RESERVED2_SHIFT 3
+#elif defined(__LITTLE_ENDIAN)
+	u8 flags;
+#define ISCSI_FW_MP_REQUEST_RESERVED1 (0x1<<0)
+#define ISCSI_FW_MP_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_FW_MP_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_FW_MP_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_FW_MP_REQUEST_RESERVED2 (0x1F<<3)
+#define ISCSI_FW_MP_REQUEST_RESERVED2_SHIFT 3
+	u8 reserved3;
+	u16 reserved4;
+#endif
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved6;
+	u8 reserved5;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved5;
+	u8 reserved6;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * firmware response - CQE: used only by firmware
+ */
+struct bnx2i_fw_response {
+	u32 hdr_dword1[2];
+	u32 hdr_exp_cmd_sn;
+	u32 hdr_max_cmd_sn;
+	u32 hdr_ttt;
+	u32 hdr_res_cnt;
+	u32 cqe_flags;
+#define ISCSI_FW_RESPONSE_RESERVED2 (0xFF<<0)
+#define ISCSI_FW_RESPONSE_RESERVED2_SHIFT 0
+#define ISCSI_FW_RESPONSE_ERR_CODE (0xFF<<8)
+#define ISCSI_FW_RESPONSE_ERR_CODE_SHIFT 8
+#define ISCSI_FW_RESPONSE_RESERVED3 (0xFFFF<<16)
+#define ISCSI_FW_RESPONSE_RESERVED3_SHIFT 16
+	u32 stat_sn;
+	u32 hdr_dword2[2];
+	u32 hdr_dword3[2];
+	u32 task_stat;
+	u32 reserved0;
+	u32 hdr_itt;
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI KCQ CQE parameters
+ */
+union iscsi_kcqe_params {
+	u32 reserved0[4];
+};
+
+/*
+ * iSCSI KCQ CQE
+ */
+struct iscsi_kcqe {
+	u32 iscsi_conn_id;
+	u32 completion_status;
+	u32 iscsi_conn_context_id;
+	union iscsi_kcqe_params params;
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define ISCSI_KCQE_RESERVED0 (0xF<<0)
+#define ISCSI_KCQE_RESERVED0_SHIFT 0
+#define ISCSI_KCQE_LAYER_CODE (0x7<<4)
+#define ISCSI_KCQE_LAYER_CODE_SHIFT 4
+#define ISCSI_KCQE_RESERVED1 (0x1<<7)
+#define ISCSI_KCQE_RESERVED1_SHIFT 7
+	u8 op_code;
+	u16 qe_self_seq;
+#elif defined(__LITTLE_ENDIAN)
+	u16 qe_self_seq;
+	u8 op_code;
+	u8 flags;
+#define ISCSI_KCQE_RESERVED0 (0xF<<0)
+#define ISCSI_KCQE_RESERVED0_SHIFT 0
+#define ISCSI_KCQE_LAYER_CODE (0x7<<4)
+#define ISCSI_KCQE_LAYER_CODE_SHIFT 4
+#define ISCSI_KCQE_RESERVED1 (0x1<<7)
+#define ISCSI_KCQE_RESERVED1_SHIFT 7
+#endif
+};
+
+
+
+/*
+ * iSCSI KWQE header
+ */
+struct iscsi_kwqe_header {
+#if defined(__BIG_ENDIAN)
+	u8 flags;
+#define ISCSI_KWQE_HEADER_RESERVED0 (0xF<<0)
+#define ISCSI_KWQE_HEADER_RESERVED0_SHIFT 0
+#define ISCSI_KWQE_HEADER_LAYER_CODE (0x7<<4)
+#define ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT 4
+#define ISCSI_KWQE_HEADER_RESERVED1 (0x1<<7)
+#define ISCSI_KWQE_HEADER_RESERVED1_SHIFT 7
+	u8 op_code;
+#elif defined(__LITTLE_ENDIAN)
+	u8 op_code;
+	u8 flags;
+#define ISCSI_KWQE_HEADER_RESERVED0 (0xF<<0)
+#define ISCSI_KWQE_HEADER_RESERVED0_SHIFT 0
+#define ISCSI_KWQE_HEADER_LAYER_CODE (0x7<<4)
+#define ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT 4
+#define ISCSI_KWQE_HEADER_RESERVED1 (0x1<<7)
+#define ISCSI_KWQE_HEADER_RESERVED1_SHIFT 7
+#endif
+};
+
+/*
+ * iSCSI firmware init request 1
+ */
+struct iscsi_kwqe_init1 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u8 reserved0;
+	u8 num_cqs;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_cqs;
+	u8 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 dummy_buffer_addr_lo;
+	u32 dummy_buffer_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u16 num_ccells_per_conn;
+	u16 num_tasks_per_conn;
+#elif defined(__LITTLE_ENDIAN)
+	u16 num_tasks_per_conn;
+	u16 num_ccells_per_conn;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 sq_wqes_per_page;
+	u16 sq_num_wqes;
+#elif defined(__LITTLE_ENDIAN)
+	u16 sq_num_wqes;
+	u16 sq_wqes_per_page;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 cq_log_wqes_per_page;
+	u8 flags;
+#define ISCSI_KWQE_INIT1_PAGE_SIZE (0xF<<0)
+#define ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT 0
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE (0x1<<4)
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE_SHIFT 4
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE (0x1<<5)
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE_SHIFT 5
+#define ISCSI_KWQE_INIT1_RESERVED1 (0x3<<6)
+#define ISCSI_KWQE_INIT1_RESERVED1_SHIFT 6
+	u16 cq_num_wqes;
+#elif defined(__LITTLE_ENDIAN)
+	u16 cq_num_wqes;
+	u8 flags;
+#define ISCSI_KWQE_INIT1_PAGE_SIZE (0xF<<0)
+#define ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT 0
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE (0x1<<4)
+#define ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE_SHIFT 4
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE (0x1<<5)
+#define ISCSI_KWQE_INIT1_KEEP_ALIVE_ENABLE_SHIFT 5
+#define ISCSI_KWQE_INIT1_RESERVED1 (0x3<<6)
+#define ISCSI_KWQE_INIT1_RESERVED1_SHIFT 6
+	u8 cq_log_wqes_per_page;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 cq_num_pages;
+	u16 sq_num_pages;
+#elif defined(__LITTLE_ENDIAN)
+	u16 sq_num_pages;
+	u16 cq_num_pages;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 rq_buffer_size;
+	u16 rq_num_wqes;
+#elif defined(__LITTLE_ENDIAN)
+	u16 rq_num_wqes;
+	u16 rq_buffer_size;
+#endif
+};
+
+/*
+ * iSCSI firmware init request 2
+ */
+struct iscsi_kwqe_init2 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 max_cq_sqn;
+#elif defined(__LITTLE_ENDIAN)
+	u16 max_cq_sqn;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 error_bit_map[2];
+	u32 reserved1[5];
+};
+
+/*
+ * Initial iSCSI connection offload request 1
+ */
+struct iscsi_kwqe_conn_offload1 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 iscsi_conn_id;
+#elif defined(__LITTLE_ENDIAN)
+	u16 iscsi_conn_id;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 sq_page_table_addr_lo;
+	u32 sq_page_table_addr_hi;
+	u32 cq_page_table_addr_lo;
+	u32 cq_page_table_addr_hi;
+	u32 reserved0[3];
+};
+
+/*
+ * iSCSI Page Table Entry (PTE)
+ */
+struct iscsi_pte {
+	u32 hi;
+	u32 lo;
+};
+
+/*
+ * Initial iSCSI connection offload request 2
+ */
+struct iscsi_kwqe_conn_offload2 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 rq_page_table_addr_lo;
+	u32 rq_page_table_addr_hi;
+	struct iscsi_pte sq_first_pte;
+	struct iscsi_pte cq_first_pte;
+	u32 num_additional_wqes;
+};
+
+
+/*
+ * Initial iSCSI connection offload request 3
+ */
+struct iscsi_kwqe_conn_offload3 {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 reserved1;
+	struct iscsi_pte qp_first_pte[3];
+};
+
+
+/*
+ * iSCSI connection update request
+ */
+struct iscsi_kwqe_conn_update {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 session_error_recovery_level;
+	u8 max_outstanding_r2ts;
+	u8 reserved2;
+	u8 conn_flags;
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST (0x1<<0)
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST_SHIFT 0
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST (0x1<<1)
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST_SHIFT 1
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T (0x1<<2)
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T_SHIFT 2
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA (0x1<<3)
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA_SHIFT 3
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1 (0xF<<4)
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1_SHIFT 4
+#elif defined(__LITTLE_ENDIAN)
+	u8 conn_flags;
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST (0x1<<0)
+#define ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST_SHIFT 0
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST (0x1<<1)
+#define ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST_SHIFT 1
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T (0x1<<2)
+#define ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T_SHIFT 2
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA (0x1<<3)
+#define ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA_SHIFT 3
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1 (0xF<<4)
+#define ISCSI_KWQE_CONN_UPDATE_RESERVED1_SHIFT 4
+	u8 reserved2;
+	u8 max_outstanding_r2ts;
+	u8 session_error_recovery_level;
+#endif
+	u32 context_id;
+	u32 max_send_pdu_length;
+	u32 max_recv_pdu_length;
+	u32 first_burst_length;
+	u32 max_burst_length;
+	u32 exp_stat_sn;
+};
+
+/*
+ * iSCSI destroy connection request
+ */
+struct iscsi_kwqe_conn_destroy {
+#if defined(__BIG_ENDIAN)
+	struct iscsi_kwqe_header hdr;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	struct iscsi_kwqe_header hdr;
+#endif
+	u32 context_id;
+	u32 reserved1[6];
+};
+
+/*
+ * iSCSI KWQ WQE
+ */
+union iscsi_kwqe {
+	struct iscsi_kwqe_init1 init1;
+	struct iscsi_kwqe_init2 init2;
+	struct iscsi_kwqe_conn_offload1 conn_offload1;
+	struct iscsi_kwqe_conn_offload2 conn_offload2;
+	struct iscsi_kwqe_conn_update conn_update;
+	struct iscsi_kwqe_conn_destroy conn_destroy;
+};
+
+/*
+ * iSCSI Login SQ WQE
+ */
+struct bnx2i_login_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_REQUEST_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_REQUEST_CONT (0x1<<6)
+#define ISCSI_LOGIN_REQUEST_CONT_SHIFT 6
+#define ISCSI_LOGIN_REQUEST_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_REQUEST_TRANSIT_SHIFT 7
+	u8 version_max;
+	u8 version_min;
+#elif defined(__LITTLE_ENDIAN)
+	u8 version_min;
+	u8 version_max;
+	u8 op_attr;
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_REQUEST_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_REQUEST_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_REQUEST_CONT (0x1<<6)
+#define ISCSI_LOGIN_REQUEST_CONT_SHIFT 6
+#define ISCSI_LOGIN_REQUEST_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_REQUEST_TRANSIT_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 isid_lo;
+#if defined(__BIG_ENDIAN)
+	u16 isid_hi;
+	u16 tsih;
+#elif defined(__LITTLE_ENDIAN)
+	u16 tsih;
+	u16 isid_hi;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_LOGIN_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGIN_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGIN_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGIN_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 cid;
+	u16 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved3;
+	u16 cid;
+#endif
+	u32 cmd_sn;
+	u32 exp_stat_sn;
+	u32 reserved4;
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_LOGIN_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_LOGIN_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_LOGIN_REQUEST_NUM_RESP_BDS_SHIFT 24
+#if defined(__BIG_ENDIAN)
+	u16 reserved8;
+	u8 reserved7;
+	u8 flags;
+#define ISCSI_LOGIN_REQUEST_RESERVED5 (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_RESERVED5_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED6 (0x1F<<3)
+#define ISCSI_LOGIN_REQUEST_RESERVED6_SHIFT 3
+#elif defined(__LITTLE_ENDIAN)
+	u8 flags;
+#define ISCSI_LOGIN_REQUEST_RESERVED5 (0x3<<0)
+#define ISCSI_LOGIN_REQUEST_RESERVED5_SHIFT 0
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN (0x1<<2)
+#define ISCSI_LOGIN_REQUEST_UPDATE_EXP_STAT_SN_SHIFT 2
+#define ISCSI_LOGIN_REQUEST_RESERVED6 (0x1F<<3)
+#define ISCSI_LOGIN_REQUEST_RESERVED6_SHIFT 3
+	u8 reserved7;
+	u16 reserved8;
+#endif
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved10;
+	u8 reserved9;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved9;
+	u8 reserved10;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * iSCSI Login CQE
+ */
+struct bnx2i_login_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 response_flags;
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_RESPONSE_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_RESPONSE_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_RESPONSE_CONT (0x1<<6)
+#define ISCSI_LOGIN_RESPONSE_CONT_SHIFT 6
+#define ISCSI_LOGIN_RESPONSE_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_RESPONSE_TRANSIT_SHIFT 7
+	u8 version_max;
+	u8 version_active;
+#elif defined(__LITTLE_ENDIAN)
+	u8 version_active;
+	u8 version_max;
+	u8 response_flags;
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE (0x3<<0)
+#define ISCSI_LOGIN_RESPONSE_NEXT_STAGE_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE (0x3<<2)
+#define ISCSI_LOGIN_RESPONSE_CURRENT_STAGE_SHIFT 2
+#define ISCSI_LOGIN_RESPONSE_RESERVED0 (0x3<<4)
+#define ISCSI_LOGIN_RESPONSE_RESERVED0_SHIFT 4
+#define ISCSI_LOGIN_RESPONSE_CONT (0x1<<6)
+#define ISCSI_LOGIN_RESPONSE_CONT_SHIFT 6
+#define ISCSI_LOGIN_RESPONSE_TRANSIT (0x1<<7)
+#define ISCSI_LOGIN_RESPONSE_TRANSIT_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved1[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u8 err_code;
+	u8 reserved2;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved2;
+	u8 err_code;
+	u16 reserved3;
+#endif
+	u32 stat_sn;
+	u32 isid_lo;
+#if defined(__BIG_ENDIAN)
+	u16 isid_hi;
+	u16 tsih;
+#elif defined(__LITTLE_ENDIAN)
+	u16 tsih;
+	u16 isid_hi;
+#endif
+#if defined(__BIG_ENDIAN)
+	u8 status_class;
+	u8 status_detail;
+	u16 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved4;
+	u8 status_detail;
+	u8 status_class;
+#endif
+	u32 reserved5[3];
+#if defined(__BIG_ENDIAN)
+	u16 reserved6;
+	u16 itt;
+#define ISCSI_LOGIN_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGIN_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGIN_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGIN_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGIN_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGIN_RESPONSE_TYPE_SHIFT 14
+	u16 reserved6;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI Logout SQ WQE
+ */
+struct bnx2i_logout_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_LOGOUT_REQUEST_REASON (0x7F<<0)
+#define ISCSI_LOGOUT_REQUEST_REASON_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_LOGOUT_REQUEST_REASON (0x7F<<0)
+#define ISCSI_LOGOUT_REQUEST_REASON_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_LOGOUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 reserved1[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_LOGOUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGOUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+#if defined(__BIG_ENDIAN)
+	u16 cid;
+	u16 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved3;
+	u16 cid;
+#endif
+	u32 cmd_sn;
+	u32 reserved4[5];
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved6;
+	u8 reserved5;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved5;
+	u8 reserved6;
+	u8 cq_index;
+#endif
+};
+
+
+/*
+ * iSCSI Logout CQE
+ */
+struct bnx2i_logout_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u8 response;
+	u8 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved0;
+	u8 response;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved3[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved5;
+	u8 err_code;
+	u8 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved4;
+	u8 err_code;
+	u16 reserved5;
+#endif
+	u32 reserved6[3];
+#if defined(__BIG_ENDIAN)
+	u16 time_to_wait;
+	u16 time_to_retain;
+#elif defined(__LITTLE_ENDIAN)
+	u16 time_to_retain;
+	u16 time_to_wait;
+#endif
+	u32 reserved7[3];
+#if defined(__BIG_ENDIAN)
+	u16 reserved8;
+	u16 itt;
+#define ISCSI_LOGOUT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_LOGOUT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_LOGOUT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_LOGOUT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_LOGOUT_RESPONSE_TYPE_SHIFT 14
+	u16 reserved8;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI Nop-In CQE
+ */
+struct bnx2i_nop_in_msg {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 ttt;
+	u32 reserved2;
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5;
+	u32 lun[2];
+	u32 reserved6[4];
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_NOP_IN_MSG_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_IN_MSG_INDEX_SHIFT 0
+#define ISCSI_NOP_IN_MSG_TYPE (0x3<<14)
+#define ISCSI_NOP_IN_MSG_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_NOP_IN_MSG_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_IN_MSG_INDEX_SHIFT 0
+#define ISCSI_NOP_IN_MSG_TYPE (0x3<<14)
+#define ISCSI_NOP_IN_MSG_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+
+/*
+ * iSCSI NOP-OUT SQ WQE
+ */
+struct bnx2i_nop_out_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1 (0x7F<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1 (0x7F<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_NOP_OUT_REQUEST_ALWAYS_ONE_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved2;
+	u16 itt;
+#define ISCSI_NOP_OUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_OUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_NOP_OUT_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_NOP_OUT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_NOP_OUT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_NOP_OUT_REQUEST_TYPE_SHIFT 14
+	u16 reserved2;
+#endif
+	u32 ttt;
+	u32 cmd_sn;
+	u32 reserved3[2];
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_NOP_OUT_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_NOP_OUT_REQUEST_NUM_RESP_BDS_SHIFT 24
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u8 reserved6;
+	u8 flags;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4 (0x1<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL (0x3F<<2)
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL_SHIFT 2
+#elif defined(__LITTLE_ENDIAN)
+	u8 flags;
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4 (0x1<<0)
+#define ISCSI_NOP_OUT_REQUEST_RESERVED4_SHIFT 0
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION (0x1<<1)
+#define ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION_SHIFT 1
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL (0x3F<<2)
+#define ISCSI_NOP_OUT_REQUEST_ZERO_FILL_SHIFT 2
+	u8 reserved6;
+	u16 reserved7;
+#endif
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved9;
+	u8 reserved8;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved8;
+	u8 reserved9;
+	u8 cq_index;
+#endif
+};
+
+/*
+ * iSCSI Reject CQE
+ */
+struct bnx2i_reject_msg {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u8 reason;
+	u8 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved0;
+	u8 reason;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved2[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5[8];
+	u32 cq_req_sn;
+};
+
+/*
+ * bnx2i iSCSI TMF SQ WQE
+ */
+struct bnx2i_tmf_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_TMF_REQUEST_FUNCTION (0x7F<<0)
+#define ISCSI_TMF_REQUEST_FUNCTION_SHIFT 0
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_TMF_REQUEST_FUNCTION (0x7F<<0)
+#define ISCSI_TMF_REQUEST_FUNCTION_SHIFT 0
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE (0x1<<7)
+#define ISCSI_TMF_REQUEST_ALWAYS_ONE_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved1;
+	u16 itt;
+#define ISCSI_TMF_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TMF_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TMF_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TMF_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TMF_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TMF_REQUEST_TYPE_SHIFT 14
+	u16 reserved1;
+#endif
+	u32 ref_itt;
+	u32 cmd_sn;
+	u32 reserved2;
+	u32 ref_cmd_sn;
+	u32 reserved3[3];
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved5;
+	u8 reserved4;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved4;
+	u8 reserved5;
+	u8 cq_index;
+#endif
+};
+
+/*
+ * iSCSI Text SQ WQE
+ */
+struct bnx2i_text_request {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 op_attr;
+#define ISCSI_TEXT_REQUEST_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_REQUEST_CONT (0x1<<6)
+#define ISCSI_TEXT_REQUEST_CONT_SHIFT 6
+#define ISCSI_TEXT_REQUEST_FINAL (0x1<<7)
+#define ISCSI_TEXT_REQUEST_FINAL_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 op_attr;
+#define ISCSI_TEXT_REQUEST_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_REQUEST_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_REQUEST_CONT (0x1<<6)
+#define ISCSI_TEXT_REQUEST_CONT_SHIFT 6
+#define ISCSI_TEXT_REQUEST_FINAL (0x1<<7)
+#define ISCSI_TEXT_REQUEST_FINAL_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 lun[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved3;
+	u16 itt;
+#define ISCSI_TEXT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TEXT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TEXT_REQUEST_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TEXT_REQUEST_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_REQUEST_INDEX_SHIFT 0
+#define ISCSI_TEXT_REQUEST_TYPE (0x3<<14)
+#define ISCSI_TEXT_REQUEST_TYPE_SHIFT 14
+	u16 reserved3;
+#endif
+	u32 ttt;
+	u32 cmd_sn;
+	u32 reserved4[2];
+	u32 resp_bd_list_addr_lo;
+	u32 resp_bd_list_addr_hi;
+	u32 resp_buffer;
+#define ISCSI_TEXT_REQUEST_RESP_BUFFER_LENGTH (0xFFFFFF<<0)
+#define ISCSI_TEXT_REQUEST_RESP_BUFFER_LENGTH_SHIFT 0
+#define ISCSI_TEXT_REQUEST_NUM_RESP_BDS (0xFF<<24)
+#define ISCSI_TEXT_REQUEST_NUM_RESP_BDS_SHIFT 24
+	u32 zero_fill;
+	u32 bd_list_addr_lo;
+	u32 bd_list_addr_hi;
+#if defined(__BIG_ENDIAN)
+	u8 cq_index;
+	u8 reserved7;
+	u8 reserved6;
+	u8 num_bds;
+#elif defined(__LITTLE_ENDIAN)
+	u8 num_bds;
+	u8 reserved6;
+	u8 reserved7;
+	u8 cq_index;
+#endif
+};
+
+/*
+ * iSCSI SQ WQE
+ */
+union iscsi_request {
+	struct bnx2i_cmd_request cmd;
+	struct bnx2i_tmf_request tmf;
+	struct bnx2i_nop_out_request nop_out;
+	struct bnx2i_login_request login_req;
+	struct bnx2i_text_request text;
+	struct bnx2i_logout_request logout_req;
+	struct bnx2i_cleanup_request cleanup;
+};
+
+
+/*
+ * iSCSI TMF CQE
+ */
+struct bnx2i_tmf_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 reserved1;
+	u8 response;
+	u8 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved0;
+	u8 response;
+	u8 reserved1;
+	u8 op_code;
+#endif
+	u32 reserved2;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 reserved3[2];
+#if defined(__BIG_ENDIAN)
+	u16 reserved5;
+	u8 err_code;
+	u8 reserved4;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved4;
+	u8 err_code;
+	u16 reserved5;
+#endif
+	u32 reserved6[7];
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_TMF_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TMF_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TMF_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TMF_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TMF_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TMF_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TMF_RESPONSE_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+/*
+ * iSCSI Text CQE
+ */
+struct bnx2i_text_response {
+#if defined(__BIG_ENDIAN)
+	u8 op_code;
+	u8 response_flags;
+#define ISCSI_TEXT_RESPONSE_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_RESPONSE_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_CONT (0x1<<6)
+#define ISCSI_TEXT_RESPONSE_CONT_SHIFT 6
+#define ISCSI_TEXT_RESPONSE_FINAL (0x1<<7)
+#define ISCSI_TEXT_RESPONSE_FINAL_SHIFT 7
+	u16 reserved0;
+#elif defined(__LITTLE_ENDIAN)
+	u16 reserved0;
+	u8 response_flags;
+#define ISCSI_TEXT_RESPONSE_RESERVED1 (0x3F<<0)
+#define ISCSI_TEXT_RESPONSE_RESERVED1_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_CONT (0x1<<6)
+#define ISCSI_TEXT_RESPONSE_CONT_SHIFT 6
+#define ISCSI_TEXT_RESPONSE_FINAL (0x1<<7)
+#define ISCSI_TEXT_RESPONSE_FINAL_SHIFT 7
+	u8 op_code;
+#endif
+	u32 data_length;
+	u32 exp_cmd_sn;
+	u32 max_cmd_sn;
+	u32 ttt;
+	u32 reserved2;
+#if defined(__BIG_ENDIAN)
+	u16 reserved4;
+	u8 err_code;
+	u8 reserved3;
+#elif defined(__LITTLE_ENDIAN)
+	u8 reserved3;
+	u8 err_code;
+	u16 reserved4;
+#endif
+	u32 reserved5;
+	u32 lun[2];
+	u32 reserved6[4];
+#if defined(__BIG_ENDIAN)
+	u16 reserved7;
+	u16 itt;
+#define ISCSI_TEXT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TEXT_RESPONSE_TYPE_SHIFT 14
+#elif defined(__LITTLE_ENDIAN)
+	u16 itt;
+#define ISCSI_TEXT_RESPONSE_INDEX (0x3FFF<<0)
+#define ISCSI_TEXT_RESPONSE_INDEX_SHIFT 0
+#define ISCSI_TEXT_RESPONSE_TYPE (0x3<<14)
+#define ISCSI_TEXT_RESPONSE_TYPE_SHIFT 14
+	u16 reserved7;
+#endif
+	u32 cq_req_sn;
+};
+
+/*
+ * iSCSI CQE
+ */
+union iscsi_response {
+	struct bnx2i_cmd_response cmd;
+	struct bnx2i_tmf_response tmf;
+	struct bnx2i_login_response login_resp;
+	struct bnx2i_text_response text;
+	struct bnx2i_logout_response logout_resp;
+	struct bnx2i_cleanup_response cleanup;
+	struct bnx2i_reject_msg reject;
+	struct bnx2i_async_msg async;
+	struct bnx2i_nop_in_msg nop_in;
+};
+
+#endif /* __57XX_ISCSI_HSI_LINUX_LE__ */
diff --git a/drivers/scsi/bnx2i/Kconfig b/drivers/scsi/bnx2i/Kconfig
new file mode 100644
index 0000000..820d428
--- /dev/null
+++ b/drivers/scsi/bnx2i/Kconfig
@@ -0,0 +1,7 @@
+config SCSI_BNX2_ISCSI
+	tristate "Broadcom NetXtreme II iSCSI support"
+	select SCSI_ISCSI_ATTRS
+	select CNIC
+	---help---
+	This driver supports iSCSI offload for the Broadcom NetXtreme II
+	devices.
diff --git a/drivers/scsi/bnx2i/Makefile b/drivers/scsi/bnx2i/Makefile
new file mode 100644
index 0000000..b5802bd
--- /dev/null
+++ b/drivers/scsi/bnx2i/Makefile
@@ -0,0 +1,3 @@
+bnx2i-y := bnx2i_init.o bnx2i_hwi.o bnx2i_iscsi.o bnx2i_sysfs.o
+
+obj-$(CONFIG_SCSI_BNX2_ISCSI) += bnx2i.o
diff --git a/drivers/scsi/bnx2i/bnx2i.h b/drivers/scsi/bnx2i/bnx2i.h
new file mode 100644
index 0000000..d7576f2
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i.h
@@ -0,0 +1,771 @@
+/* bnx2i.h: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#ifndef _BNX2I_H_
+#define _BNX2I_H_
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/kfifo.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/iscsi_proto.h>
+#include <scsi/libiscsi.h>
+#include <scsi/scsi_transport_iscsi.h>
+
+#include "../../net/cnic_if.h"
+#include "57xx_iscsi_hsi.h"
+#include "57xx_iscsi_constants.h"
+
+#define BNX2_ISCSI_DRIVER_NAME		"bnx2i"
+
+#define BNX2I_MAX_ADAPTERS		8
+
+#define ISCSI_MAX_CONNS_PER_HBA		128
+#define ISCSI_MAX_SESS_PER_HBA		ISCSI_MAX_CONNS_PER_HBA
+#define ISCSI_MAX_CMDS_PER_SESS		128
+
+/* Total active commands across all connections supported by devices */
+#define ISCSI_MAX_CMDS_PER_HBA_5708	(28 * (ISCSI_MAX_CMDS_PER_SESS - 1))
+#define ISCSI_MAX_CMDS_PER_HBA_5709	(128 * (ISCSI_MAX_CMDS_PER_SESS - 1))
+#define ISCSI_MAX_CMDS_PER_HBA_57710	(256 * (ISCSI_MAX_CMDS_PER_SESS - 1))
+
+#define ISCSI_MAX_BDS_PER_CMD		32
+
+#define MAX_PAGES_PER_CTRL_STRUCT_POOL	8
+#define BNX2I_RESERVED_SLOW_PATH_CMD_SLOTS	4
+
+/* 5706/08 hardware has limit on maximum buffer size per BD it can handle */
+#define MAX_BD_LENGTH			65535
+#define BD_SPLIT_SIZE			32768
+
+/* min, max & default values for SQ/RQ/CQ size, configurable via' modparam */
+#define BNX2I_SQ_WQES_MIN 		16
+#define BNX2I_570X_SQ_WQES_MAX 		128
+#define BNX2I_5770X_SQ_WQES_MAX 	512
+#define BNX2I_570X_SQ_WQES_DEFAULT 	128
+#define BNX2I_5770X_SQ_WQES_DEFAULT 	256
+
+#define BNX2I_570X_CQ_WQES_MAX 		128
+#define BNX2I_5770X_CQ_WQES_MAX 	512
+
+#define BNX2I_RQ_WQES_MIN 		16
+#define BNX2I_RQ_WQES_MAX 		32
+#define BNX2I_RQ_WQES_DEFAULT 		16
+
+/* CCELLs per conn */
+#define BNX2I_CCELLS_MIN		16
+#define BNX2I_CCELLS_MAX		96
+#define BNX2I_CCELLS_DEFAULT		64
+
+#define ITT_INVALID_SIGNATURE		0xFFFF
+
+#define ISCSI_CMD_CLEANUP_TIMEOUT	100
+
+#define BNX2I_CONN_CTX_BUF_SIZE		16384
+
+#define BNX2I_SQ_WQE_SIZE		64
+#define BNX2I_RQ_WQE_SIZE		256
+#define BNX2I_CQE_SIZE			64
+
+#define MB_KERNEL_CTX_SHIFT		8
+#define MB_KERNEL_CTX_SIZE		(1 << MB_KERNEL_CTX_SHIFT)
+
+#define CTX_SHIFT			7
+#define GET_CID_NUM(cid_addr)		((cid_addr) >> CTX_SHIFT)
+
+#define CTX_OFFSET 			0x10000
+#define MAX_CID_CNT			0x4000
+
+/* 5709 context registers */
+#define BNX2_MQ_CONFIG2			0x00003d00
+#define BNX2_MQ_CONFIG2_CONT_SZ		(0x7L<<4)
+#define BNX2_MQ_CONFIG2_FIRST_L4L5	(0x1fL<<8)
+
+/* 57710's BAR2 is mapped to doorbell registers */
+#define BNX2X_DOORBELL_PCI_BAR		2
+#define BNX2X_MAX_CQS			8
+
+#define CNIC_ARM_CQE			1
+#define CNIC_DISARM_CQE			0
+
+#define REG_RD(__hba, offset)				\
+		readl(__hba->regview + offset)
+#define REG_WR(__hba, offset, val)			\
+		writel(val, __hba->regview + offset)
+
+
+/**
+ * struct generic_pdu_resc - login pdu resource structure
+ *
+ * @req_buf:            driver buffer used to stage payload associated with
+ *                      the login request
+ * @req_dma_addr:       dma address for iscsi login request payload buffer
+ * @req_buf_size:       actual login request payload length
+ * @req_wr_ptr:         pointer into login request buffer when next data is
+ *                      to be written
+ * @resp_hdr:           iscsi header where iscsi login response header is to
+ *                      be recreated
+ * @resp_buf:           buffer to stage login response payload
+ * @resp_dma_addr:      login response payload buffer dma address
+ * @resp_buf_size:      login response paylod length
+ * @resp_wr_ptr:        pointer into login response buffer when next data is
+ *                      to be written
+ * @req_bd_tbl:         iscsi login request payload BD table
+ * @req_bd_dma:         login request BD table dma address
+ * @resp_bd_tbl:        iscsi login response payload BD table
+ * @resp_bd_dma:        login request BD table dma address
+ *
+ * following structure defines buffer info for generic pdus such as iSCSI Login,
+ *	Logout and NOP
+ */
+struct generic_pdu_resc {
+	char *req_buf;
+	dma_addr_t req_dma_addr;
+	u32 req_buf_size;
+	char *req_wr_ptr;
+	struct iscsi_hdr resp_hdr;
+	char *resp_buf;
+	dma_addr_t resp_dma_addr;
+	u32 resp_buf_size;
+	char *resp_wr_ptr;
+	char *req_bd_tbl;
+	dma_addr_t req_bd_dma;
+	char *resp_bd_tbl;
+	dma_addr_t resp_bd_dma;
+};
+
+
+/**
+ * struct bd_resc_page - tracks DMA'able memory allocated for BD tables
+ *
+ * @link:               list head to link elements
+ * @max_ptrs:           maximun pointers that can be stored in this page
+ * @num_valid:          number of pointer valid in this page
+ * @page:               base addess for page pointer array
+ *
+ * structure to track DMA'able memory allocated for command BD tables
+ */
+struct bd_resc_page {
+	struct list_head link;
+	u32 max_ptrs;
+	u32 num_valid;
+	void *page[1];
+};
+
+
+/**
+ * struct io_bdt - I/O buffer destricptor table
+ *
+ * @bd_tbl:             BD table's virtual address
+ * @bd_tbl_dma:         BD table's dma address
+ * @bd_valid:           num valid BD entries
+ *
+ * IO BD table
+ */
+struct io_bdt {
+	struct iscsi_bd *bd_tbl;
+	dma_addr_t bd_tbl_dma;
+	u16 bd_valid;
+};
+
+
+/**
+ * bnx2i_cmd - iscsi command structure
+ *
+ * @scsi_cmd:           SCSI-ML task pointer corresponding to this iscsi cmd
+ * @sg:                 SG list
+ * @io_tbl:             buffer descriptor (BD) table
+ * @bd_tbl_dma:         buffer descriptor (BD) table's dma address
+ */
+struct bnx2i_cmd {
+	struct iscsi_hdr hdr;
+	struct bnx2i_conn *conn;
+	struct scsi_cmnd *scsi_cmd;
+	struct scatterlist *sg;
+	struct io_bdt io_tbl;
+	dma_addr_t bd_tbl_dma;
+	struct bnx2i_cmd_request req;
+};
+
+
+/**
+ * struct bnx2i_conn - iscsi connection structure
+ *
+ * @cls_conn:              pointer to iscsi cls conn
+ * @hba:                   adapter structure pointer
+ * @iscsi_conn_cid:        iscsi conn id
+ * @fw_cid:                firmware iscsi context id
+ * @ep:                    endpoint structure pointer
+ * @gen_pdu:               login/nopout/logout pdu resources
+ * @violation_notified:    bit mask used to track iscsi error/warning messages
+ *                         already printed out
+ *
+ * iSCSI connection structure
+ */
+struct bnx2i_conn {
+	struct iscsi_cls_conn *cls_conn;
+	struct bnx2i_hba *hba;
+	struct completion cmd_cleanup_cmpl;
+	int is_bound;
+
+	u32 iscsi_conn_cid;
+#define BNX2I_CID_RESERVED	0x5AFF
+	u32 fw_cid;
+
+	struct timer_list poll_timer;
+	/*
+	 * Queue Pair (QP) related structure elements.
+	 */
+	struct bnx2i_endpoint *ep;
+
+	/*
+	 * Buffer for login negotiation process
+	 */
+	struct generic_pdu_resc gen_pdu;
+	u64 violation_notified;
+};
+
+
+
+/**
+ * struct iscsi_cid_queue - Per adapter iscsi cid queue
+ *
+ * @cid_que_base:           queue base memory
+ * @cid_que:                queue memory pointer
+ * @cid_q_prod_idx:         produce index
+ * @cid_q_cons_idx:         consumer index
+ * @cid_q_max_idx:          max index. used to detect wrap around condition
+ * @cid_free_cnt:           queue size
+ * @conn_cid_tbl:           iscsi cid to conn structure mapping table
+ *
+ * Per adapter iSCSI CID Queue
+ */
+struct iscsi_cid_queue {
+	void *cid_que_base;
+	u32 *cid_que;
+	u32 cid_q_prod_idx;
+	u32 cid_q_cons_idx;
+	u32 cid_q_max_idx;
+	u32 cid_free_cnt;
+	struct bnx2i_conn **conn_cid_tbl;
+};
+
+/**
+ * struct bnx2i_hba - bnx2i adapter structure
+ *
+ * @link:                  list head to link elements
+ * @cnic:                  pointer to cnic device
+ * @pcidev:                pointer to pci dev
+ * @netdev:                pointer to netdev structure
+ * @regview:               mapped PCI register space
+ * @age:                   age, incremented by every recovery
+ * @cnic_dev_type:         cnic device type, 5706/5708/5709/57710
+ * @mail_queue_access:     mailbox queue access mode, applicable to 5709 only
+ * @reg_with_cnic:         indicates whether the device is register with CNIC
+ * @adapter_state:         adapter state, UP, GOING_DOWN, LINK_DOWN
+ * @mtu_supported:         Ethernet MTU supported
+ * @shost:                 scsi host pointer
+ * @max_sqes:              SQ size
+ * @max_rqes:              RQ size
+ * @max_cqes:              CQ size
+ * @num_ccell:             number of command cells per connection
+ * @ofld_conns_active:     active connection list
+ * @max_active_conns:      max offload connections supported by this device
+ * @cid_que:               iscsi cid queue
+ * @ep_rdwr_lock:          read / write lock to synchronize various ep lists
+ * @ep_ofld_list:          connection list for pending offload completion
+ * @ep_destroy_list:       connection list for pending offload completion
+ * @mp_bd_tbl:             BD table to be used with middle path requests
+ * @mp_bd_dma:             DMA address of 'mp_bd_tbl' memory buffer
+ * @dummy_buffer:          Dummy buffer to be used with zero length scsicmd reqs
+ * @dummy_buf_dma:         DMA address of 'dummy_buffer' memory buffer
+ * @lock:              	   lock to synchonize access to hba structure
+ * @pci_did:               PCI device ID
+ * @pci_vid:               PCI vendor ID
+ * @pci_sdid:              PCI subsystem device ID
+ * @pci_svid:              PCI subsystem vendor ID
+ * @pci_func:              PCI function number in system pci tree
+ * @pci_devno:             PCI device number in system pci tree
+ * @num_wqe_sent:          statistic counter, total wqe's sent
+ * @num_cqe_rcvd:          statistic counter, total cqe's received
+ * @num_intr_claimed:      statistic counter, total interrupts claimed
+ * @link_changed_count:    statistic counter, num of link change notifications
+ *                         received
+ * @ipaddr_changed_count:  statistic counter, num times IP address changed while
+ *                         at least one connection is offloaded
+ * @num_sess_opened:       statistic counter, total num sessions opened
+ * @num_conn_opened:       statistic counter, total num conns opened on this hba
+ * @ctx_ccell_tasks:       captures number of ccells and tasks supported by
+ *                         currently offloaded connection, used to decode
+ *                         context memory
+ *
+ * Adapter Data Structure
+ */
+struct bnx2i_hba {
+	struct list_head link;
+	struct cnic_dev *cnic;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	void __iomem *regview;
+
+	u32 age;
+	unsigned long cnic_dev_type;
+		#define BNX2I_NX2_DEV_5706		0x0
+		#define BNX2I_NX2_DEV_5708		0x1
+		#define BNX2I_NX2_DEV_5709		0x2
+		#define BNX2I_NX2_DEV_57710		0x3
+	u32 mail_queue_access;
+		#define BNX2I_MQ_KERNEL_MODE		0x0
+		#define BNX2I_MQ_KERNEL_BYPASS_MODE	0x1
+		#define BNX2I_MQ_BIN_MODE		0x2
+	unsigned long  reg_with_cnic;
+		#define BNX2I_CNIC_REGISTERED		1
+
+	unsigned long  adapter_state;
+		#define ADAPTER_STATE_UP		0
+		#define ADAPTER_STATE_GOING_DOWN	1
+		#define ADAPTER_STATE_LINK_DOWN		2
+		#define ADAPTER_STATE_INIT_FAILED	31
+	unsigned int mtu_supported;
+		#define BNX2I_MAX_MTU_SUPPORTED		1500
+
+	struct Scsi_Host *shost;
+
+	u32 max_sqes;
+	u32 max_rqes;
+	u32 max_cqes;
+	u32 num_ccell;
+
+	int ofld_conns_active;
+
+	int max_active_conns;
+	struct iscsi_cid_queue cid_que;
+
+	rwlock_t ep_rdwr_lock;
+	struct list_head ep_ofld_list;
+	struct list_head ep_destroy_list;
+
+	/*
+	 * BD table to be used with MP (Middle Path requests.
+	 */
+	char *mp_bd_tbl;
+	dma_addr_t mp_bd_dma;
+	char *dummy_buffer;
+	dma_addr_t dummy_buf_dma;
+
+	spinlock_t lock;	/* protects hba structure access */
+	struct mutex net_dev_lock;/* sync net device access */
+
+	/*
+	 * PCI related info.
+	 */
+	u16 pci_did;
+	u16 pci_vid;
+	u16 pci_sdid;
+	u16 pci_svid;
+	u16 pci_func;
+	u16 pci_devno;
+
+	/*
+	 * Following are a bunch of statistics useful during development
+	 * and later stage for score boarding.
+	 */
+	u32 num_wqe_sent;
+	u32 num_cqe_rcvd;
+	u32 num_intr_claimed;
+	u32 link_changed_count;
+	u32 ipaddr_changed_count;
+	u32 num_sess_opened;
+	u32 num_conn_opened;
+	unsigned int ctx_ccell_tasks;
+};
+
+
+/*******************************************************************************
+ * 	QP [ SQ / RQ / CQ ] info.
+ ******************************************************************************/
+
+/*
+ * SQ/RQ/CQ generic structure definition
+ */
+struct 	sqe {
+	u8 sqe_byte[BNX2I_SQ_WQE_SIZE];
+};
+
+struct 	rqe {
+	u8 rqe_byte[BNX2I_RQ_WQE_SIZE];
+};
+
+struct 	cqe {
+	u8 cqe_byte[BNX2I_CQE_SIZE];
+};
+
+
+enum {
+#if defined(__LITTLE_ENDIAN)
+	CNIC_EVENT_COAL_INDEX	= 0x0,
+	CNIC_SEND_DOORBELL	= 0x4,
+	CNIC_EVENT_CQ_ARM	= 0x7,
+	CNIC_RECV_DOORBELL	= 0x8
+#elif defined(__BIG_ENDIAN)
+	CNIC_EVENT_COAL_INDEX	= 0x2,
+	CNIC_SEND_DOORBELL	= 0x6,
+	CNIC_EVENT_CQ_ARM	= 0x4,
+	CNIC_RECV_DOORBELL	= 0xa
+#endif
+};
+
+
+/*
+ * CQ DB
+ */
+struct bnx2x_iscsi_cq_pend_cmpl {
+	/* CQ producer, updated by Ustorm */
+	u16 ustrom_prod;
+	/* CQ pending completion counter */
+	u16 pend_cntr;
+};
+
+
+struct bnx2i_5771x_cq_db {
+	struct bnx2x_iscsi_cq_pend_cmpl qp_pend_cmpl[BNX2X_MAX_CQS];
+	/* CQ pending completion ITT array */
+	u16 itt[BNX2X_MAX_CQS];
+	/* Cstorm CQ sequence to notify array, updated by driver */;
+	u16 sqn[BNX2X_MAX_CQS];
+	u32 reserved[4] /* 16 byte allignment */;
+};
+
+
+struct bnx2i_5771x_sq_rq_db {
+	u16 prod_idx;
+	u8 reserved0[14]; /* Pad structure size to 16 bytes */
+};
+
+
+struct bnx2i_5771x_dbell_hdr {
+	u8 header;
+	/* 1 for rx doorbell, 0 for tx doorbell */
+#define B577XX_DOORBELL_HDR_RX				(0x1<<0)
+#define B577XX_DOORBELL_HDR_RX_SHIFT			0
+	/* 0 for normal doorbell, 1 for advertise wnd doorbell */
+#define B577XX_DOORBELL_HDR_DB_TYPE			(0x1<<1)
+#define B577XX_DOORBELL_HDR_DB_TYPE_SHIFT		1
+	/* rdma tx only: DPM transaction size specifier (64/128/256/512B) */
+#define B577XX_DOORBELL_HDR_DPM_SIZE			(0x3<<2)
+#define B577XX_DOORBELL_HDR_DPM_SIZE_SHIFT		2
+	/* connection type */
+#define B577XX_DOORBELL_HDR_CONN_TYPE			(0xF<<4)
+#define B577XX_DOORBELL_HDR_CONN_TYPE_SHIFT		4
+};
+
+struct bnx2i_5771x_dbell {
+	struct bnx2i_5771x_dbell_hdr dbell;
+	u8 pad[3];
+
+};
+
+/**
+ * struct qp_info - QP (share queue region) atrributes structure
+ *
+ * @ctx_base:           ioremapped pci register base to access doorbell register
+ *                      pertaining to this offloaded connection
+ * @sq_virt:            virtual address of send queue (SQ) region
+ * @sq_phys:            DMA address of SQ memory region
+ * @sq_mem_size:        SQ size
+ * @sq_prod_qe:         SQ producer entry pointer
+ * @sq_cons_qe:         SQ consumer entry pointer
+ * @sq_first_qe:        virtaul address of first entry in SQ
+ * @sq_last_qe:         virtaul address of last entry in SQ
+ * @sq_prod_idx:        SQ producer index
+ * @sq_cons_idx:        SQ consumer index
+ * @sqe_left:           number sq entry left
+ * @sq_pgtbl_virt:      page table describing buffer consituting SQ region
+ * @sq_pgtbl_phys:      dma address of 'sq_pgtbl_virt'
+ * @sq_pgtbl_size:      SQ page table size
+ * @cq_virt:            virtual address of completion queue (CQ) region
+ * @cq_phys:            DMA address of RQ memory region
+ * @cq_mem_size:        CQ size
+ * @cq_prod_qe:         CQ producer entry pointer
+ * @cq_cons_qe:         CQ consumer entry pointer
+ * @cq_first_qe:        virtaul address of first entry in CQ
+ * @cq_last_qe:         virtaul address of last entry in CQ
+ * @cq_prod_idx:        CQ producer index
+ * @cq_cons_idx:        CQ consumer index
+ * @cqe_left:           number cq entry left
+ * @cqe_size:           size of each CQ entry
+ * @cqe_exp_seq_sn:     next expected CQE sequence number
+ * @cq_pgtbl_virt:      page table describing buffer consituting CQ region
+ * @cq_pgtbl_phys:      dma address of 'cq_pgtbl_virt'
+ * @cq_pgtbl_size:    	CQ page table size
+ * @rq_virt:            virtual address of receive queue (RQ) region
+ * @rq_phys:            DMA address of RQ memory region
+ * @rq_mem_size:        RQ size
+ * @rq_prod_qe:         RQ producer entry pointer
+ * @rq_cons_qe:         RQ consumer entry pointer
+ * @rq_first_qe:        virtaul address of first entry in RQ
+ * @rq_last_qe:         virtaul address of last entry in RQ
+ * @rq_prod_idx:        RQ producer index
+ * @rq_cons_idx:        RQ consumer index
+ * @rqe_left:           number rq entry left
+ * @rq_pgtbl_virt:      page table describing buffer consituting RQ region
+ * @rq_pgtbl_phys:      dma address of 'rq_pgtbl_virt'
+ * @rq_pgtbl_size:      RQ page table size
+ *
+ * queue pair (QP) is a per connection shared data structure which is used
+ *	to send work requests (SQ), receive completion notifications (CQ)
+ *	and receive asynchoronous / scsi sense info (RQ). 'qp_info' structure
+ *	below holds queue memory, consumer/producer indexes and page table
+ *	information
+ */
+struct qp_info {
+	void __iomem *ctx_base;
+#define DPM_TRIGER_TYPE			0x40
+
+#define BNX2I_570x_QUE_DB_SIZE		0
+#define BNX2I_5771x_QUE_DB_SIZE		16
+	struct sqe *sq_virt;
+	dma_addr_t sq_phys;
+	u32 sq_mem_size;
+
+	struct sqe *sq_prod_qe;
+	struct sqe *sq_cons_qe;
+	struct sqe *sq_first_qe;
+	struct sqe *sq_last_qe;
+	u16 sq_prod_idx;
+	u16 sq_cons_idx;
+	u32 sqe_left;
+
+	void *sq_pgtbl_virt;
+	dma_addr_t sq_pgtbl_phys;
+	u32 sq_pgtbl_size;	/* set to PAGE_SIZE for 5708 & 5709 */
+
+	struct cqe *cq_virt;
+	dma_addr_t cq_phys;
+	u32 cq_mem_size;
+
+	struct cqe *cq_prod_qe;
+	struct cqe *cq_cons_qe;
+	struct cqe *cq_first_qe;
+	struct cqe *cq_last_qe;
+	u16 cq_prod_idx;
+	u16 cq_cons_idx;
+	u32 cqe_left;
+	u32 cqe_size;
+	u32 cqe_exp_seq_sn;
+
+	void *cq_pgtbl_virt;
+	dma_addr_t cq_pgtbl_phys;
+	u32 cq_pgtbl_size;	/* set to PAGE_SIZE for 5708 & 5709 */
+
+	struct rqe *rq_virt;
+	dma_addr_t rq_phys;
+	u32 rq_mem_size;
+
+	struct rqe *rq_prod_qe;
+	struct rqe *rq_cons_qe;
+	struct rqe *rq_first_qe;
+	struct rqe *rq_last_qe;
+	u16 rq_prod_idx;
+	u16 rq_cons_idx;
+	u32 rqe_left;
+
+	void *rq_pgtbl_virt;
+	dma_addr_t rq_pgtbl_phys;
+	u32 rq_pgtbl_size;	/* set to PAGE_SIZE for 5708 & 5709 */
+};
+
+
+
+/*
+ * CID handles
+ */
+struct ep_handles {
+	u32 fw_cid;
+	u32 drv_iscsi_cid;
+	u16 pg_cid;
+	u16 rsvd;
+};
+
+
+enum {
+	EP_STATE_IDLE                   = 0x0,
+	EP_STATE_PG_OFLD_START          = 0x1,
+	EP_STATE_PG_OFLD_COMPL          = 0x2,
+	EP_STATE_OFLD_START             = 0x4,
+	EP_STATE_OFLD_COMPL             = 0x8,
+	EP_STATE_CONNECT_START          = 0x10,
+	EP_STATE_CONNECT_COMPL          = 0x20,
+	EP_STATE_ULP_UPDATE_START       = 0x40,
+	EP_STATE_ULP_UPDATE_COMPL       = 0x80,
+	EP_STATE_DISCONN_START          = 0x100,
+	EP_STATE_DISCONN_COMPL          = 0x200,
+	EP_STATE_CLEANUP_START          = 0x400,
+	EP_STATE_CLEANUP_CMPL           = 0x800,
+	EP_STATE_TCP_FIN_RCVD           = 0x1000,
+	EP_STATE_TCP_RST_RCVD           = 0x2000,
+	EP_STATE_PG_OFLD_FAILED         = 0x1000000,
+	EP_STATE_ULP_UPDATE_FAILED      = 0x2000000,
+	EP_STATE_CLEANUP_FAILED         = 0x4000000,
+	EP_STATE_OFLD_FAILED            = 0x8000000,
+	EP_STATE_CONNECT_FAILED         = 0x10000000,
+	EP_STATE_DISCONN_TIMEDOUT       = 0x20000000,
+};
+
+/**
+ * struct bnx2i_endpoint - representation of tcp connection in NX2 world
+ *
+ * @link:               list head to link elements
+ * @hba:                adapter to which this connection belongs
+ * @conn:               iscsi connection this EP is linked to
+ * @sess:               iscsi session this EP is linked to
+ * @cm_sk:              cnic sock struct
+ * @hba_age:            age to detect if 'iscsid' issues ep_disconnect()
+ *                      after HBA reset is completed by bnx2i/cnic/bnx2
+ *                      modules
+ * @state:              tracks offload connection state machine
+ * @teardown_mode:      indicates if conn teardown is abortive or orderly
+ * @qp:                 QP information
+ * @ids:                contains chip allocated *context id* & driver assigned
+ *                      *iscsi cid*
+ * @ofld_timer:         offload timer to detect timeout
+ * @ofld_wait:          wait queue
+ *
+ * Endpoint Structure - equivalent of tcp socket structure
+ */
+struct bnx2i_endpoint {
+	struct list_head link;
+	struct bnx2i_hba *hba;
+	struct bnx2i_conn *conn;
+	struct cnic_sock *cm_sk;
+	u32 hba_age;
+	u32 state;
+	unsigned long timestamp;
+	int num_active_cmds;
+
+	struct qp_info qp;
+	struct ep_handles ids;
+		#define ep_iscsi_cid	ids.drv_iscsi_cid
+		#define ep_cid		ids.fw_cid
+		#define ep_pg_cid	ids.pg_cid
+	struct timer_list ofld_timer;
+	wait_queue_head_t ofld_wait;
+};
+
+
+
+/* Global variables */
+extern unsigned int error_mask1, error_mask2;
+extern u64 iscsi_error_mask;
+extern unsigned int en_tcp_dack;
+extern unsigned int event_coal_div;
+
+extern struct scsi_transport_template *bnx2i_scsi_xport_template;
+extern struct iscsi_transport bnx2i_iscsi_transport;
+extern struct cnic_ulp_ops bnx2i_cnic_cb;
+
+extern unsigned int sq_size;
+extern unsigned int rq_size;
+
+extern struct device_attribute *bnx2i_dev_attributes[];
+
+
+
+/*
+ * Function Prototypes
+ */
+extern void bnx2i_identify_device(struct bnx2i_hba *hba);
+extern void bnx2i_register_device(struct bnx2i_hba *hba);
+
+extern void bnx2i_ulp_init(struct cnic_dev *dev);
+extern void bnx2i_ulp_exit(struct cnic_dev *dev);
+extern void bnx2i_start(void *handle);
+extern void bnx2i_stop(void *handle);
+extern void bnx2i_reg_dev_all(void);
+extern void bnx2i_unreg_dev_all(void);
+extern struct bnx2i_hba *get_adapter_list_head(void);
+
+struct bnx2i_conn *bnx2i_get_conn_from_id(struct bnx2i_hba *hba,
+					  u16 iscsi_cid);
+
+int bnx2i_alloc_ep_pool(void);
+void bnx2i_release_ep_pool(void);
+struct bnx2i_endpoint *bnx2i_ep_ofld_list_next(struct bnx2i_hba *hba);
+struct bnx2i_endpoint *bnx2i_ep_destroy_list_next(struct bnx2i_hba *hba);
+
+struct bnx2i_hba *bnx2i_find_hba_for_cnic(struct cnic_dev *cnic);
+
+struct bnx2i_hba *bnx2i_alloc_hba(struct cnic_dev *cnic);
+void bnx2i_free_hba(struct bnx2i_hba *hba);
+
+void bnx2i_get_rq_buf(struct bnx2i_conn *conn, char *ptr, int len);
+void bnx2i_put_rq_buf(struct bnx2i_conn *conn, int count);
+
+void bnx2i_iscsi_unmap_sg_list(struct bnx2i_cmd *cmd);
+
+void bnx2i_drop_session(struct iscsi_cls_session *session);
+
+extern int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba);
+extern int bnx2i_send_iscsi_login(struct bnx2i_conn *conn,
+				  struct iscsi_task *mtask);
+extern int bnx2i_send_iscsi_tmf(struct bnx2i_conn *conn,
+				  struct iscsi_task *mtask);
+extern int bnx2i_send_iscsi_scsicmd(struct bnx2i_conn *conn,
+				    struct bnx2i_cmd *cmnd);
+extern int bnx2i_send_iscsi_nopout(struct bnx2i_conn *conn,
+				   struct iscsi_task *mtask, u32 ttt,
+				   char *datap, int data_len, int unsol);
+extern int bnx2i_send_iscsi_logout(struct bnx2i_conn *conn,
+				   struct iscsi_task *mtask);
+extern void bnx2i_send_cmd_cleanup_req(struct bnx2i_hba *hba,
+				       struct bnx2i_cmd *cmd);
+extern void bnx2i_send_conn_ofld_req(struct bnx2i_hba *hba,
+				     struct bnx2i_endpoint *ep);
+extern void bnx2i_update_iscsi_conn(struct iscsi_conn *conn);
+extern void bnx2i_send_conn_destroy(struct bnx2i_hba *hba,
+				    struct bnx2i_endpoint *ep);
+
+extern int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba,
+			       struct bnx2i_endpoint *ep);
+extern void bnx2i_free_qp_resc(struct bnx2i_hba *hba,
+			       struct bnx2i_endpoint *ep);
+extern void bnx2i_ep_ofld_timer(unsigned long data);
+extern struct bnx2i_endpoint *bnx2i_find_ep_in_ofld_list(
+		struct bnx2i_hba *hba, u32 iscsi_cid);
+extern struct bnx2i_endpoint *bnx2i_find_ep_in_destroy_list(
+		struct bnx2i_hba *hba, u32 iscsi_cid);
+
+extern int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep);
+extern void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action);
+
+/* Debug related function prototypes */
+extern void bnx2i_print_pend_cmd_queue(struct bnx2i_conn *conn);
+extern void bnx2i_print_active_cmd_queue(struct bnx2i_conn *conn);
+extern void bnx2i_print_xmit_pdu_queue(struct bnx2i_conn *conn);
+extern void bnx2i_print_recv_state(struct bnx2i_conn *conn);
+
+#endif
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
new file mode 100644
index 0000000..906cef5
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -0,0 +1,2405 @@
+/* bnx2i_hwi.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include <scsi/scsi_tcq.h>
+#include <scsi/libiscsi.h>
+#include "bnx2i.h"
+
+/**
+ * bnx2i_get_cid_num - get cid from ep
+ * @ep: 	endpoint pointer
+ *
+ * Only applicable to 57710 family of devices
+ */
+static u32 bnx2i_get_cid_num(struct bnx2i_endpoint *ep)
+{
+	u32 cid;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		cid = ep->ep_cid;
+	else
+		cid = GET_CID_NUM(ep->ep_cid);
+	return cid;
+}
+
+
+/**
+ * bnx2i_adjust_qp_size - Adjust SQ/RQ/CQ size for 57710 device type
+ * @hba: 		Adapter for which adjustments is to be made
+ *
+ * Only applicable to 57710 family of devices
+ */
+static void bnx2i_adjust_qp_size(struct bnx2i_hba *hba)
+{
+	u32 num_elements_per_pg;
+
+	if (test_bit(BNX2I_NX2_DEV_5706, &hba->cnic_dev_type) ||
+	    test_bit(BNX2I_NX2_DEV_5708, &hba->cnic_dev_type) ||
+	    test_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type)) {
+		if (!is_power_of_2(hba->max_sqes))
+			hba->max_sqes = rounddown_pow_of_two(hba->max_sqes);
+
+		if (!is_power_of_2(hba->max_rqes))
+			hba->max_rqes = rounddown_pow_of_two(hba->max_rqes);
+	}
+
+	/* Adjust each queue size if the user selection does not
+	 * yield integral num of page buffers
+	 */
+	/* adjust SQ */
+	num_elements_per_pg = PAGE_SIZE / BNX2I_SQ_WQE_SIZE;
+	if (hba->max_sqes < num_elements_per_pg)
+		hba->max_sqes = num_elements_per_pg;
+	else if (hba->max_sqes % num_elements_per_pg)
+		hba->max_sqes = (hba->max_sqes + num_elements_per_pg - 1) &
+				 ~(num_elements_per_pg - 1);
+
+	/* adjust CQ */
+	num_elements_per_pg = PAGE_SIZE / BNX2I_CQE_SIZE;
+	if (hba->max_cqes < num_elements_per_pg)
+		hba->max_cqes = num_elements_per_pg;
+	else if (hba->max_cqes % num_elements_per_pg)
+		hba->max_cqes = (hba->max_cqes + num_elements_per_pg - 1) &
+				 ~(num_elements_per_pg - 1);
+
+	/* adjust RQ */
+	num_elements_per_pg = PAGE_SIZE / BNX2I_RQ_WQE_SIZE;
+	if (hba->max_rqes < num_elements_per_pg)
+		hba->max_rqes = num_elements_per_pg;
+	else if (hba->max_rqes % num_elements_per_pg)
+		hba->max_rqes = (hba->max_rqes + num_elements_per_pg - 1) &
+				 ~(num_elements_per_pg - 1);
+}
+
+
+/**
+ * bnx2i_get_link_state - get network interface link state
+ * @hba:	adapter instance pointer
+ *
+ * updates adapter structure flag based on netdev state
+ */
+static void bnx2i_get_link_state(struct bnx2i_hba *hba)
+{
+	if (test_bit(__LINK_STATE_NOCARRIER, &hba->netdev->state))
+		set_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state);
+	else
+		clear_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state);
+}
+
+
+/**
+ * bnx2i_iscsi_license_error - displays iscsi license related error message
+ * @hba:		adapter instance pointer
+ * @error_code:		error classification
+ *
+ * Puts out an error log when driver is unable to offload iscsi connection
+ *	due to license restrictions
+ */
+static void bnx2i_iscsi_license_error(struct bnx2i_hba *hba, u32 error_code)
+{
+	if (error_code == ISCSI_KCQE_COMPLETION_STATUS_ISCSI_NOT_SUPPORTED)
+		/* iSCSI offload not supported on this device */
+		printk(KERN_ERR "bnx2i: iSCSI not supported, dev=%s\n",
+				hba->netdev->name);
+	if (error_code == ISCSI_KCQE_COMPLETION_STATUS_LOM_ISCSI_NOT_ENABLED)
+		/* iSCSI offload not supported on this LOM device */
+		printk(KERN_ERR "bnx2i: LOM is not enable to "
+				"offload iSCSI connections, dev=%s\n",
+				hba->netdev->name);
+	set_bit(ADAPTER_STATE_INIT_FAILED, &hba->adapter_state);
+}
+
+
+/**
+ * bnx2i_arm_cq_event_coalescing - arms CQ to enable EQ notification
+ * @ep:		endpoint (transport indentifier) structure
+ * @action:	action, ARM or DISARM. For now only ARM_CQE is used
+ *
+ * Arm'ing CQ will enable chip to generate global EQ events inorder to interrupt
+ *	the driver. EQ event is generated CQ index is hit or at least 1 CQ is
+ *	outstanding and on chip timer expires
+ */
+void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
+{
+	struct bnx2i_5771x_cq_db *cq_db;
+	u16 cq_index;
+
+	if (!test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		return;
+
+	if (action == CNIC_ARM_CQE) {
+		cq_index = ep->qp.cqe_exp_seq_sn +
+			   ep->num_active_cmds / event_coal_div;
+		cq_index %= (ep->qp.cqe_size * 2 + 1);
+		if (!cq_index) {
+			cq_index = 1;
+			cq_db = (struct bnx2i_5771x_cq_db *)
+					ep->qp.cq_pgtbl_virt;
+			cq_db->sqn[0] = cq_index;
+		}
+	}
+}
+
+
+/**
+ * bnx2i_get_rq_buf - copy RQ buffer contents to driver buffer
+ * @conn:		iscsi connection on which RQ event occured
+ * @ptr:		driver buffer to which RQ buffer contents is to
+ *			be copied
+ * @len:		length of valid data inside RQ buf
+ *
+ * Copies RQ buffer contents from shared (DMA'able) memory region to
+ *	driver buffer. RQ is used to DMA unsolicitated iscsi pdu's and
+ *	scsi sense info
+ */
+void bnx2i_get_rq_buf(struct bnx2i_conn *bnx2i_conn, char *ptr, int len)
+{
+	if (!bnx2i_conn->ep->qp.rqe_left)
+		return;
+
+	bnx2i_conn->ep->qp.rqe_left--;
+	memcpy(ptr, (u8 *) bnx2i_conn->ep->qp.rq_cons_qe, len);
+	if (bnx2i_conn->ep->qp.rq_cons_qe == bnx2i_conn->ep->qp.rq_last_qe) {
+		bnx2i_conn->ep->qp.rq_cons_qe = bnx2i_conn->ep->qp.rq_first_qe;
+		bnx2i_conn->ep->qp.rq_cons_idx = 0;
+	} else {
+		bnx2i_conn->ep->qp.rq_cons_qe++;
+		bnx2i_conn->ep->qp.rq_cons_idx++;
+	}
+}
+
+
+static void bnx2i_ring_577xx_doorbell(struct bnx2i_conn *conn)
+{
+	struct bnx2i_5771x_dbell dbell;
+	u32 msg;
+
+	memset(&dbell, 0, sizeof(dbell));
+	dbell.dbell.header = (B577XX_ISCSI_CONNECTION_TYPE <<
+			      B577XX_DOORBELL_HDR_CONN_TYPE_SHIFT);
+	msg = *((u32 *)&dbell);
+	/* TODO : get doorbell register mapping */
+	writel(cpu_to_le32(msg), conn->ep->qp.ctx_base);
+}
+
+
+/**
+ * bnx2i_put_rq_buf - Replenish RQ buffer, if required ring on chip doorbell
+ * @conn:	iscsi connection on which event to post
+ * @count:	number of RQ buffer being posted to chip
+ *
+ * No need to ring hardware doorbell for 57710 family of devices
+ */
+void bnx2i_put_rq_buf(struct bnx2i_conn *bnx2i_conn, int count)
+{
+	struct bnx2i_5771x_sq_rq_db *rq_db;
+	u16 hi_bit = (bnx2i_conn->ep->qp.rq_prod_idx & 0x8000);
+	struct bnx2i_endpoint *ep = bnx2i_conn->ep;
+
+	ep->qp.rqe_left += count;
+	ep->qp.rq_prod_idx &= 0x7FFF;
+	ep->qp.rq_prod_idx += count;
+
+	if (ep->qp.rq_prod_idx > bnx2i_conn->hba->max_rqes) {
+		ep->qp.rq_prod_idx %= bnx2i_conn->hba->max_rqes;
+		if (!hi_bit)
+			ep->qp.rq_prod_idx |= 0x8000;
+	} else
+		ep->qp.rq_prod_idx |= hi_bit;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		rq_db = (struct bnx2i_5771x_sq_rq_db *) ep->qp.rq_pgtbl_virt;
+		rq_db->prod_idx = ep->qp.rq_prod_idx;
+		/* no need to ring hardware doorbell for 57710 */
+	} else {
+		writew(ep->qp.rq_prod_idx,
+		       ep->qp.ctx_base + CNIC_RECV_DOORBELL);
+	}
+	mmiowb();
+}
+
+
+/**
+ * bnx2i_ring_sq_dbell - Ring SQ doorbell to wake-up the processing engine
+ * @conn: 		iscsi connection to which new SQ entries belong
+ * @count: 		number of SQ WQEs to post
+ *
+ * SQ DB is updated in host memory and TX Doorbell is rung for 57710 family
+ *	of devices. For 5706/5708/5709 new SQ WQE count is written into the
+ *	doorbell register
+ */
+static void bnx2i_ring_sq_dbell(struct bnx2i_conn *bnx2i_conn, int count)
+{
+	struct bnx2i_5771x_sq_rq_db *sq_db;
+	struct bnx2i_endpoint *ep = bnx2i_conn->ep;
+
+	ep->num_active_cmds++;
+	wmb();	/* flush SQ WQE memory before the doorbell is rung */
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		sq_db = (struct bnx2i_5771x_sq_rq_db *) ep->qp.sq_pgtbl_virt;
+		sq_db->prod_idx = ep->qp.sq_prod_idx;
+		bnx2i_ring_577xx_doorbell(bnx2i_conn);
+	} else
+		writew(count, ep->qp.ctx_base + CNIC_SEND_DOORBELL);
+
+	mmiowb(); /* flush posted PCI writes */
+}
+
+
+/**
+ * bnx2i_ring_dbell_update_sq_params - update SQ driver parameters
+ * @conn:	iscsi connection to which new SQ entries belong
+ * @count:	number of SQ WQEs to post
+ *
+ * this routine will update SQ driver parameters and ring the doorbell
+ */
+static void bnx2i_ring_dbell_update_sq_params(struct bnx2i_conn *bnx2i_conn,
+					      int count)
+{
+	int tmp_cnt;
+
+	if (count == 1) {
+		if (bnx2i_conn->ep->qp.sq_prod_qe ==
+		    bnx2i_conn->ep->qp.sq_last_qe)
+			bnx2i_conn->ep->qp.sq_prod_qe =
+						bnx2i_conn->ep->qp.sq_first_qe;
+		else
+			bnx2i_conn->ep->qp.sq_prod_qe++;
+	} else {
+		if ((bnx2i_conn->ep->qp.sq_prod_qe + count) <=
+		    bnx2i_conn->ep->qp.sq_last_qe)
+			bnx2i_conn->ep->qp.sq_prod_qe += count;
+		else {
+			tmp_cnt = bnx2i_conn->ep->qp.sq_last_qe -
+				bnx2i_conn->ep->qp.sq_prod_qe;
+			bnx2i_conn->ep->qp.sq_prod_qe =
+				&bnx2i_conn->ep->qp.sq_first_qe[count -
+								(tmp_cnt + 1)];
+		}
+	}
+	bnx2i_conn->ep->qp.sq_prod_idx += count;
+	/* Ring the doorbell */
+	bnx2i_ring_sq_dbell(bnx2i_conn, bnx2i_conn->ep->qp.sq_prod_idx);
+}
+
+
+/**
+ * bnx2i_send_iscsi_login - post iSCSI login request MP WQE to hardware
+ * @conn:	iscsi connection
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post an iSCSI Login request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_login(struct bnx2i_conn *bnx2i_conn,
+			   struct iscsi_task *task)
+{
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_login_request *login_wqe;
+	struct iscsi_login *login_hdr;
+	u32 dword;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
+	login_hdr = (struct iscsi_login *)task->hdr;
+	login_wqe = (struct bnx2i_login_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+
+	login_wqe->op_code = login_hdr->opcode;
+	login_wqe->op_attr = login_hdr->flags;
+	login_wqe->version_max = login_hdr->max_version;
+	login_wqe->version_min = login_hdr->min_version;
+	login_wqe->data_length = ntoh24(login_hdr->dlength);
+	login_wqe->isid_lo = *((u32 *) login_hdr->isid);
+	login_wqe->isid_hi = *((u16 *) login_hdr->isid + 2);
+	login_wqe->tsih = login_hdr->tsih;
+	login_wqe->itt = task->itt |
+		(ISCSI_TASK_TYPE_MPATH << ISCSI_LOGIN_REQUEST_TYPE_SHIFT);
+	login_wqe->cid = login_hdr->cid;
+
+	login_wqe->cmd_sn = be32_to_cpu(login_hdr->cmdsn);
+	login_wqe->exp_stat_sn = be32_to_cpu(login_hdr->exp_statsn);
+
+	login_wqe->resp_bd_list_addr_lo = (u32) bnx2i_conn->gen_pdu.resp_bd_dma;
+	login_wqe->resp_bd_list_addr_hi =
+		(u32) ((u64) bnx2i_conn->gen_pdu.resp_bd_dma >> 32);
+
+	dword = ((1 << ISCSI_LOGIN_REQUEST_NUM_RESP_BDS_SHIFT) |
+		 (bnx2i_conn->gen_pdu.resp_buf_size <<
+		  ISCSI_LOGIN_REQUEST_RESP_BUFFER_LENGTH_SHIFT));
+	login_wqe->resp_buffer = dword;
+	login_wqe->flags = 0;
+	login_wqe->bd_list_addr_lo = (u32) bnx2i_conn->gen_pdu.req_bd_dma;
+	login_wqe->bd_list_addr_hi =
+		(u32) ((u64) bnx2i_conn->gen_pdu.req_bd_dma >> 32);
+	login_wqe->num_bds = 1;
+	login_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+/**
+ * bnx2i_send_iscsi_tmf - post iSCSI task management request MP WQE to hardware
+ * @conn:	iscsi connection
+ * @mtask:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post an iSCSI Login request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_tmf(struct bnx2i_conn *bnx2i_conn,
+			 struct iscsi_task *mtask)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_tm *tmfabort_hdr;
+	struct scsi_cmnd *ref_sc;
+	struct iscsi_task *ctask;
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_tmf_request *tmfabort_wqe;
+	u32 dword;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)mtask->dd_data;
+	tmfabort_hdr = (struct iscsi_tm *)mtask->hdr;
+	tmfabort_wqe = (struct bnx2i_tmf_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+
+	tmfabort_wqe->op_code = tmfabort_hdr->opcode;
+	tmfabort_wqe->op_attr = 0;
+	tmfabort_wqe->op_attr =
+		ISCSI_TMF_REQUEST_ALWAYS_ONE | ISCSI_TM_FUNC_ABORT_TASK;
+	tmfabort_wqe->lun[0] = be32_to_cpu(tmfabort_hdr->lun[0]);
+	tmfabort_wqe->lun[1] = be32_to_cpu(tmfabort_hdr->lun[1]);
+
+	tmfabort_wqe->itt = (mtask->itt | (ISCSI_TASK_TYPE_MPATH << 14));
+	tmfabort_wqe->reserved2 = 0;
+	tmfabort_wqe->cmd_sn = be32_to_cpu(tmfabort_hdr->cmdsn);
+
+	ctask = iscsi_itt_to_task(conn, tmfabort_hdr->rtt);
+	if (!ctask || ctask->sc)
+		/*
+		 * the iscsi layer must have completed the cmd while this
+		 * was starting up.
+		 */
+		return 0;
+	ref_sc = ctask->sc;
+
+	if (ref_sc->sc_data_direction == DMA_TO_DEVICE)
+		dword = (ISCSI_TASK_TYPE_WRITE << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+	else
+		dword = (ISCSI_TASK_TYPE_READ << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+	tmfabort_wqe->ref_itt = (dword | tmfabort_hdr->rtt);
+	tmfabort_wqe->ref_cmd_sn = be32_to_cpu(tmfabort_hdr->refcmdsn);
+
+	tmfabort_wqe->bd_list_addr_lo = (u32) bnx2i_conn->hba->mp_bd_dma;
+	tmfabort_wqe->bd_list_addr_hi = (u32)
+				((u64) bnx2i_conn->hba->mp_bd_dma >> 32);
+	tmfabort_wqe->num_bds = 1;
+	tmfabort_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+/**
+ * bnx2i_send_iscsi_scsicmd - post iSCSI scsicmd request WQE to hardware
+ * @conn:	iscsi connection
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post an iSCSI SCSI-CMD request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_scsicmd(struct bnx2i_conn *bnx2i_conn,
+			     struct bnx2i_cmd *cmd)
+{
+	struct bnx2i_cmd_request *scsi_cmd_wqe;
+
+	scsi_cmd_wqe = (struct bnx2i_cmd_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+	memcpy(scsi_cmd_wqe, &cmd->req, sizeof(struct bnx2i_cmd_request));
+	scsi_cmd_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+/**
+ * bnx2i_send_iscsi_nopout - post iSCSI NOPOUT request WQE to hardware
+ * @conn:		iscsi connection
+ * @cmd:		driver command structure which is requesting
+ *			a WQE to sent to chip for further processing
+ * @ttt:		TTT to be used when building pdu header
+ * @datap:		payload buffer pointer
+ * @data_len:		payload data length
+ * @unsol:		indicated whether nopout pdu is unsolicited pdu or
+ *			in response to target's NOPIN w/ TTT != FFFFFFFF
+ *
+ * prepare and post a nopout request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_nopout(struct bnx2i_conn *bnx2i_conn,
+			    struct iscsi_task *task, u32 ttt,
+			    char *datap, int data_len, int unsol)
+{
+	struct bnx2i_endpoint *ep = bnx2i_conn->ep;
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_nop_out_request *nopout_wqe;
+	struct iscsi_nopout *nopout_hdr;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
+	nopout_hdr = (struct iscsi_nopout *)task->hdr;
+	nopout_wqe = (struct bnx2i_nop_out_request *)ep->qp.sq_prod_qe;
+	nopout_wqe->op_code = nopout_hdr->opcode;
+	nopout_wqe->op_attr = ISCSI_FLAG_CMD_FINAL;
+	memcpy(nopout_wqe->lun, nopout_hdr->lun, 8);
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		u32 tmp = nopout_hdr->lun[0];
+		/* 57710 requires LUN field to be swapped */
+		nopout_hdr->lun[0] = nopout_hdr->lun[1];
+		nopout_hdr->lun[1] = tmp;
+	}
+
+	nopout_wqe->itt = ((u16)task->itt |
+			   (ISCSI_TASK_TYPE_MPATH <<
+			    ISCSI_TMF_REQUEST_TYPE_SHIFT));
+	nopout_wqe->ttt = ttt;
+	nopout_wqe->flags = 0;
+	if (!unsol)
+		nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION;
+	else if (nopout_hdr->itt == RESERVED_ITT)
+		nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION;
+
+	nopout_wqe->cmd_sn = be32_to_cpu(nopout_hdr->cmdsn);
+	nopout_wqe->data_length = data_len;
+	if (data_len) {
+		/* handle payload data, not required in first release */
+		printk(KERN_ALERT "NOPOUT: WARNING!! payload len != 0\n");
+	} else {
+		nopout_wqe->bd_list_addr_lo = (u32)
+					bnx2i_conn->hba->mp_bd_dma;
+		nopout_wqe->bd_list_addr_hi =
+			(u32) ((u64) bnx2i_conn->hba->mp_bd_dma >> 32);
+		nopout_wqe->num_bds = 1;
+	}
+	nopout_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+
+/**
+ * bnx2i_send_iscsi_logout - post iSCSI logout request WQE to hardware
+ * @conn:	iscsi connection
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepare and post logout request WQE to CNIC firmware
+ */
+int bnx2i_send_iscsi_logout(struct bnx2i_conn *bnx2i_conn,
+			    struct iscsi_task *task)
+{
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct bnx2i_logout_request *logout_wqe;
+	struct iscsi_logout *logout_hdr;
+
+	bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
+	logout_hdr = (struct iscsi_logout *)task->hdr;
+
+	logout_wqe = (struct bnx2i_logout_request *)
+						bnx2i_conn->ep->qp.sq_prod_qe;
+	memset(logout_wqe, 0x00, sizeof(struct bnx2i_logout_request));
+
+	logout_wqe->op_code = logout_hdr->opcode;
+	logout_wqe->cmd_sn = be32_to_cpu(logout_hdr->cmdsn);
+	logout_wqe->op_attr =
+			logout_hdr->flags | ISCSI_LOGOUT_REQUEST_ALWAYS_ONE;
+	logout_wqe->itt = ((u16)task->itt |
+			   (ISCSI_TASK_TYPE_MPATH <<
+			    ISCSI_LOGOUT_REQUEST_TYPE_SHIFT));
+	logout_wqe->data_length = 0;
+	logout_wqe->cid = 0;
+
+	logout_wqe->bd_list_addr_lo = (u32) bnx2i_conn->hba->mp_bd_dma;
+	logout_wqe->bd_list_addr_hi = (u32)
+				((u64) bnx2i_conn->hba->mp_bd_dma >> 32);
+	logout_wqe->num_bds = 1;
+	logout_wqe->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(bnx2i_conn, 1);
+	return 0;
+}
+
+
+/**
+ * bnx2i_update_iscsi_conn - post iSCSI logout request WQE to hardware
+ * @conn:	iscsi connection which requires iscsi parameter update
+ *
+ * sends down iSCSI Conn Update request to move iSCSI conn to FFP
+ */
+void bnx2i_update_iscsi_conn(struct iscsi_conn *conn)
+{
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct bnx2i_hba *hba = bnx2i_conn->hba;
+	struct kwqe *kwqe_arr[2];
+	struct iscsi_kwqe_conn_update *update_wqe;
+	struct iscsi_kwqe_conn_update conn_update_kwqe;
+
+	update_wqe = &conn_update_kwqe;
+
+	update_wqe->hdr.op_code = ISCSI_KWQE_OPCODE_UPDATE_CONN;
+	update_wqe->hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	/* 5771x requires conn context id to be passed as is */
+	if (test_bit(BNX2I_NX2_DEV_57710, &bnx2i_conn->ep->hba->cnic_dev_type))
+		update_wqe->context_id = bnx2i_conn->ep->ep_cid;
+	else
+		update_wqe->context_id = (bnx2i_conn->ep->ep_cid >> 7);
+	update_wqe->conn_flags = 0;
+	if (conn->hdrdgst_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_HEADER_DIGEST;
+	if (conn->datadgst_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_DATA_DIGEST;
+	if (conn->session->initial_r2t_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_INITIAL_R2T;
+	if (conn->session->imm_data_en)
+		update_wqe->conn_flags |= ISCSI_KWQE_CONN_UPDATE_IMMEDIATE_DATA;
+
+	update_wqe->max_send_pdu_length = conn->max_xmit_dlength;
+	update_wqe->max_recv_pdu_length = conn->max_recv_dlength;
+	update_wqe->first_burst_length = conn->session->first_burst;
+	update_wqe->max_burst_length = conn->session->max_burst;
+	update_wqe->exp_stat_sn = conn->exp_statsn;
+	update_wqe->max_outstanding_r2ts = conn->session->max_r2t;
+	update_wqe->session_error_recovery_level = conn->session->erl;
+	iscsi_conn_printk(KERN_ALERT, conn,
+			  "bnx2i: conn update - MBL 0x%x FBL 0x%x"
+			  "MRDSL_I 0x%x MRDSL_T 0x%x \n",
+			  update_wqe->max_burst_length,
+			  update_wqe->first_burst_length,
+			  update_wqe->max_recv_pdu_length,
+			  update_wqe->max_send_pdu_length);
+
+	kwqe_arr[0] = (struct kwqe *) update_wqe;
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, 1);
+}
+
+
+/**
+ * bnx2i_ep_ofld_timer - post iSCSI logout request WQE to hardware
+ * @data:	endpoint (transport handle) structure pointer
+ *
+ * routine to handle connection offload/destroy request timeout
+ */
+void bnx2i_ep_ofld_timer(unsigned long data)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) data;
+
+	if (ep->state == EP_STATE_OFLD_START) {
+		printk(KERN_ALERT "ofld_timer: CONN_OFLD timeout\n");
+		ep->state = EP_STATE_OFLD_FAILED;
+	} else if (ep->state == EP_STATE_DISCONN_START) {
+		printk(KERN_ALERT "ofld_timer: CONN_DISCON timeout\n");
+		ep->state = EP_STATE_DISCONN_TIMEDOUT;
+	} else if (ep->state == EP_STATE_CLEANUP_START) {
+		printk(KERN_ALERT "ofld_timer: CONN_CLEANUP timeout\n");
+		ep->state = EP_STATE_CLEANUP_FAILED;
+	}
+
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+static int bnx2i_power_of2(u32 val)
+{
+	u32 power = 0;
+	if (val & (val - 1))
+		return power;
+	val--;
+	while (val) {
+		val = val >> 1;
+		power++;
+	}
+	return power;
+}
+
+
+/**
+ * bnx2i_send_cmd_cleanup_req - send iscsi cmd context clean-up request
+ * @hba:	adapter structure pointer
+ * @cmd:	driver command structure which is requesting
+ *		a WQE to sent to chip for further processing
+ *
+ * prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+void bnx2i_send_cmd_cleanup_req(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
+{
+	struct bnx2i_cleanup_request *cmd_cleanup;
+
+	cmd_cleanup =
+		(struct bnx2i_cleanup_request *)cmd->conn->ep->qp.sq_prod_qe;
+	memset(cmd_cleanup, 0x00, sizeof(struct bnx2i_cleanup_request));
+
+	cmd_cleanup->op_code = ISCSI_OPCODE_CLEANUP_REQUEST;
+	cmd_cleanup->itt = cmd->req.itt;
+	cmd_cleanup->cq_index = 0; /* CQ# used for completion, 5771x only */
+
+	bnx2i_ring_dbell_update_sq_params(cmd->conn, 1);
+}
+
+
+/**
+ * bnx2i_send_conn_destroy - initiates iscsi connection teardown process
+ * @hba:	adapter structure pointer
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE to initiate
+ * 	iscsi connection context clean-up process
+ */
+void bnx2i_send_conn_destroy(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	struct kwqe *kwqe_arr[2];
+	struct iscsi_kwqe_conn_destroy conn_cleanup;
+
+	memset(&conn_cleanup, 0x00, sizeof(struct iscsi_kwqe_conn_destroy));
+
+	conn_cleanup.hdr.op_code = ISCSI_KWQE_OPCODE_DESTROY_CONN;
+	conn_cleanup.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+	/* 5771x requires conn context id to be passed as is */
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		conn_cleanup.context_id = ep->ep_cid;
+	else
+		conn_cleanup.context_id = (ep->ep_cid >> 7);
+
+	conn_cleanup.reserved0 = (u16)ep->ep_iscsi_cid;
+
+	kwqe_arr[0] = (struct kwqe *) &conn_cleanup;
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, 1);
+}
+
+
+/**
+ * bnx2i_570x_send_conn_ofld_req - initiates iscsi conn context setup process
+ * @hba: 		adapter structure pointer
+ * @ep: 		endpoint (transport indentifier) structure
+ *
+ * 5706/5708/5709 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+static void bnx2i_570x_send_conn_ofld_req(struct bnx2i_hba *hba,
+					  struct bnx2i_endpoint *ep)
+{
+	struct kwqe *kwqe_arr[2];
+	struct iscsi_kwqe_conn_offload1 ofld_req1;
+	struct iscsi_kwqe_conn_offload2 ofld_req2;
+	dma_addr_t dma_addr;
+	int num_kwqes = 2;
+	u32 *ptbl;
+
+	ofld_req1.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN1;
+	ofld_req1.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	ofld_req1.iscsi_conn_id = (u16) ep->ep_iscsi_cid;
+
+	dma_addr = ep->qp.sq_pgtbl_phys;
+	ofld_req1.sq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.sq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	dma_addr = ep->qp.cq_pgtbl_phys;
+	ofld_req1.cq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.cq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ofld_req2.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN2;
+	ofld_req2.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	dma_addr = ep->qp.rq_pgtbl_phys;
+	ofld_req2.rq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req2.rq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ptbl = (u32 *) ep->qp.sq_pgtbl_virt;
+
+	ofld_req2.sq_first_pte.hi = *ptbl++;
+	ofld_req2.sq_first_pte.lo = *ptbl;
+
+	ptbl = (u32 *) ep->qp.cq_pgtbl_virt;
+	ofld_req2.cq_first_pte.hi = *ptbl++;
+	ofld_req2.cq_first_pte.lo = *ptbl;
+
+	kwqe_arr[0] = (struct kwqe *) &ofld_req1;
+	kwqe_arr[1] = (struct kwqe *) &ofld_req2;
+	ofld_req2.num_additional_wqes = 0;
+
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, num_kwqes);
+}
+
+
+/**
+ * bnx2i_5771x_send_conn_ofld_req - initiates iscsi connection context creation
+ * @hba: 		adapter structure pointer
+ * @ep: 		endpoint (transport indentifier) structure
+ *
+ * 57710 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+static void bnx2i_5771x_send_conn_ofld_req(struct bnx2i_hba *hba,
+					   struct bnx2i_endpoint *ep)
+{
+	struct kwqe *kwqe_arr[5];
+	struct iscsi_kwqe_conn_offload1 ofld_req1;
+	struct iscsi_kwqe_conn_offload2 ofld_req2;
+	struct iscsi_kwqe_conn_offload3 ofld_req3[1];
+	dma_addr_t dma_addr;
+	int num_kwqes = 2;
+	u32 *ptbl;
+
+	ofld_req1.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN1;
+	ofld_req1.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	ofld_req1.iscsi_conn_id = (u16) ep->ep_iscsi_cid;
+
+	dma_addr = ep->qp.sq_pgtbl_phys + ISCSI_SQ_DB_SIZE;
+	ofld_req1.sq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.sq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	dma_addr = ep->qp.cq_pgtbl_phys + ISCSI_CQ_DB_SIZE;
+	ofld_req1.cq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req1.cq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ofld_req2.hdr.op_code = ISCSI_KWQE_OPCODE_OFFLOAD_CONN2;
+	ofld_req2.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	dma_addr = ep->qp.rq_pgtbl_phys + ISCSI_RQ_DB_SIZE;
+	ofld_req2.rq_page_table_addr_lo = (u32) dma_addr;
+	ofld_req2.rq_page_table_addr_hi = (u32) ((u64) dma_addr >> 32);
+
+	ptbl = (u32 *)((u8 *)ep->qp.sq_pgtbl_virt + ISCSI_SQ_DB_SIZE);
+	ofld_req2.sq_first_pte.hi = *ptbl++;
+	ofld_req2.sq_first_pte.lo = *ptbl;
+
+	ptbl = (u32 *)((u8 *)ep->qp.cq_pgtbl_virt + ISCSI_CQ_DB_SIZE);
+	ofld_req2.cq_first_pte.hi = *ptbl++;
+	ofld_req2.cq_first_pte.lo = *ptbl;
+
+	kwqe_arr[0] = (struct kwqe *) &ofld_req1;
+	kwqe_arr[1] = (struct kwqe *) &ofld_req2;
+
+	ofld_req2.num_additional_wqes = 1;
+	memset(ofld_req3, 0x00, sizeof(ofld_req3[0]));
+	ptbl = (u32 *)((u8 *)ep->qp.rq_pgtbl_virt + ISCSI_RQ_DB_SIZE);
+	ofld_req3[0].qp_first_pte[0].hi = *ptbl++;
+	ofld_req3[0].qp_first_pte[0].lo = *ptbl;
+
+	kwqe_arr[2] = (struct kwqe *) ofld_req3;
+	/* need if we decide to go with multiple KCQE's per conn */
+	num_kwqes += 1;
+
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, num_kwqes);
+}
+
+/**
+ * bnx2i_send_conn_ofld_req - initiates iscsi connection context setup process
+ *
+ * @hba: 		adapter structure pointer
+ * @ep: 		endpoint (transport indentifier) structure
+ *
+ * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE
+ */
+void bnx2i_send_conn_ofld_req(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type))
+		bnx2i_5771x_send_conn_ofld_req(hba, ep);
+	else
+		bnx2i_570x_send_conn_ofld_req(hba, ep);
+}
+
+
+/**
+ * setup_qp_page_tables - iscsi QP page table setup function
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * Sets up page tables for SQ/RQ/CQ, 1G/sec (5706/5708/5709) devices requires
+ * 	64-bit address in big endian format. Whereas 10G/sec (57710) requires
+ * 	PT in little endian format
+ */
+static void setup_qp_page_tables(struct bnx2i_endpoint *ep)
+{
+	int num_pages;
+	u32 *ptbl;
+	dma_addr_t page;
+	int cnic_dev_10g;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
+		cnic_dev_10g = 1;
+	else
+		cnic_dev_10g = 0;
+
+	/* SQ page table */
+	memset(ep->qp.sq_pgtbl_virt, 0, ep->qp.sq_pgtbl_size);
+	num_pages = ep->qp.sq_mem_size / PAGE_SIZE;
+	page = ep->qp.sq_phys;
+
+	if (cnic_dev_10g)
+		ptbl = (u32 *)((u8 *)ep->qp.sq_pgtbl_virt + ISCSI_SQ_DB_SIZE);
+	else
+		ptbl = (u32 *) ep->qp.sq_pgtbl_virt;
+	while (num_pages--) {
+		if (cnic_dev_10g) {
+			/* PTE is written in little endian format for 57710 */
+			*ptbl = (u32) page;
+			ptbl++;
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			page += PAGE_SIZE;
+		} else {
+			/* PTE is written in big endian format for
+			 * 5706/5708/5709 devices */
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			*ptbl = (u32) page;
+			ptbl++;
+			page += PAGE_SIZE;
+		}
+	}
+
+	/* RQ page table */
+	memset(ep->qp.rq_pgtbl_virt, 0, ep->qp.rq_pgtbl_size);
+	num_pages = ep->qp.rq_mem_size / PAGE_SIZE;
+	page = ep->qp.rq_phys;
+
+	if (cnic_dev_10g)
+		ptbl = (u32 *)((u8 *)ep->qp.rq_pgtbl_virt + ISCSI_RQ_DB_SIZE);
+	else
+		ptbl = (u32 *) ep->qp.rq_pgtbl_virt;
+	while (num_pages--) {
+		if (cnic_dev_10g) {
+			/* PTE is written in little endian format for 57710 */
+			*ptbl = (u32) page;
+			ptbl++;
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			page += PAGE_SIZE;
+		} else {
+			/* PTE is written in big endian format for
+			 * 5706/5708/5709 devices */
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			*ptbl = (u32) page;
+			ptbl++;
+			page += PAGE_SIZE;
+		}
+	}
+
+	/* CQ page table */
+	memset(ep->qp.cq_pgtbl_virt, 0, ep->qp.cq_pgtbl_size);
+	num_pages = ep->qp.cq_mem_size / PAGE_SIZE;
+	page = ep->qp.cq_phys;
+
+	if (cnic_dev_10g)
+		ptbl = (u32 *)((u8 *)ep->qp.cq_pgtbl_virt + ISCSI_CQ_DB_SIZE);
+	else
+		ptbl = (u32 *) ep->qp.cq_pgtbl_virt;
+	while (num_pages--) {
+		if (cnic_dev_10g) {
+			/* PTE is written in little endian format for 57710 */
+			*ptbl = (u32) page;
+			ptbl++;
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			page += PAGE_SIZE;
+		} else {
+			/* PTE is written in big endian format for
+			 * 5706/5708/5709 devices */
+			*ptbl = (u32) ((u64) page >> 32);
+			ptbl++;
+			*ptbl = (u32) page;
+			ptbl++;
+			page += PAGE_SIZE;
+		}
+	}
+}
+
+
+/**
+ * bnx2i_alloc_qp_resc - allocates required resources for QP.
+ * @hba:	adapter structure pointer
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * Allocate QP (transport layer for iSCSI connection) resources, DMA'able
+ *	memory for SQ/RQ/CQ and page tables. EP structure elements such
+ *	as producer/consumer indexes/pointers, queue sizes and page table
+ *	contents are setup
+ */
+int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	struct bnx2i_5771x_cq_db *cq_db;
+
+	ep->hba = hba;
+	ep->conn = NULL;
+	ep->ep_cid = ep->ep_iscsi_cid = ep->ep_pg_cid = 0;
+
+	/* Allocate page table memory for SQ which is page aligned */
+	ep->qp.sq_mem_size = hba->max_sqes * BNX2I_SQ_WQE_SIZE;
+	ep->qp.sq_mem_size =
+		(ep->qp.sq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	ep->qp.sq_pgtbl_size =
+		(ep->qp.sq_mem_size / PAGE_SIZE) * sizeof(void *);
+	ep->qp.sq_pgtbl_size =
+		(ep->qp.sq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	ep->qp.sq_pgtbl_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.sq_pgtbl_size,
+				   &ep->qp.sq_pgtbl_phys, GFP_KERNEL);
+	if (!ep->qp.sq_pgtbl_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc SQ PT mem (%d)\n",
+				  ep->qp.sq_pgtbl_size);
+		goto mem_alloc_err;
+	}
+
+	/* Allocate memory area for actual SQ element */
+	ep->qp.sq_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.sq_mem_size,
+				   &ep->qp.sq_phys, GFP_KERNEL);
+	if (!ep->qp.sq_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc SQ BD memory %d\n",
+				  ep->qp.sq_mem_size);
+		goto mem_alloc_err;
+	}
+
+	memset(ep->qp.sq_virt, 0x00, ep->qp.sq_mem_size);
+	ep->qp.sq_first_qe = ep->qp.sq_virt;
+	ep->qp.sq_prod_qe = ep->qp.sq_first_qe;
+	ep->qp.sq_cons_qe = ep->qp.sq_first_qe;
+	ep->qp.sq_last_qe = &ep->qp.sq_first_qe[hba->max_sqes - 1];
+	ep->qp.sq_prod_idx = 0;
+	ep->qp.sq_cons_idx = 0;
+	ep->qp.sqe_left = hba->max_sqes;
+
+	/* Allocate page table memory for CQ which is page aligned */
+	ep->qp.cq_mem_size = hba->max_cqes * BNX2I_CQE_SIZE;
+	ep->qp.cq_mem_size =
+		(ep->qp.cq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	ep->qp.cq_pgtbl_size =
+		(ep->qp.cq_mem_size / PAGE_SIZE) * sizeof(void *);
+	ep->qp.cq_pgtbl_size =
+		(ep->qp.cq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	ep->qp.cq_pgtbl_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.cq_pgtbl_size,
+				   &ep->qp.cq_pgtbl_phys, GFP_KERNEL);
+	if (!ep->qp.cq_pgtbl_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc CQ PT memory %d\n",
+				  ep->qp.cq_pgtbl_size);
+		goto mem_alloc_err;
+	}
+
+	/* Allocate memory area for actual CQ element */
+	ep->qp.cq_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.cq_mem_size,
+				   &ep->qp.cq_phys, GFP_KERNEL);
+	if (!ep->qp.cq_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc CQ BD memory %d\n",
+				  ep->qp.cq_mem_size);
+		goto mem_alloc_err;
+	}
+	memset(ep->qp.cq_virt, 0x00, ep->qp.cq_mem_size);
+
+	ep->qp.cq_first_qe = ep->qp.cq_virt;
+	ep->qp.cq_prod_qe = ep->qp.cq_first_qe;
+	ep->qp.cq_cons_qe = ep->qp.cq_first_qe;
+	ep->qp.cq_last_qe = &ep->qp.cq_first_qe[hba->max_cqes - 1];
+	ep->qp.cq_prod_idx = 0;
+	ep->qp.cq_cons_idx = 0;
+	ep->qp.cqe_left = hba->max_cqes;
+	ep->qp.cqe_exp_seq_sn = ISCSI_INITIAL_SN;
+	ep->qp.cqe_size = hba->max_cqes;
+
+	/* Invalidate all EQ CQE index, req only for 57710 */
+	cq_db = (struct bnx2i_5771x_cq_db *) ep->qp.cq_pgtbl_virt;
+	memset(cq_db->sqn, 0xFF, sizeof(cq_db->sqn[0]) * BNX2X_MAX_CQS);
+
+	/* Allocate page table memory for RQ which is page aligned */
+	ep->qp.rq_mem_size = hba->max_rqes * BNX2I_RQ_WQE_SIZE;
+	ep->qp.rq_mem_size =
+		(ep->qp.rq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	ep->qp.rq_pgtbl_size =
+		(ep->qp.rq_mem_size / PAGE_SIZE) * sizeof(void *);
+	ep->qp.rq_pgtbl_size =
+		(ep->qp.rq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	ep->qp.rq_pgtbl_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.rq_pgtbl_size,
+				   &ep->qp.rq_pgtbl_phys, GFP_KERNEL);
+	if (!ep->qp.rq_pgtbl_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc RQ PT mem %d\n",
+				  ep->qp.rq_pgtbl_size);
+		goto mem_alloc_err;
+	}
+
+	/* Allocate memory area for actual RQ element */
+	ep->qp.rq_virt =
+		dma_alloc_coherent(&hba->pcidev->dev, ep->qp.rq_mem_size,
+				   &ep->qp.rq_phys, GFP_KERNEL);
+	if (!ep->qp.rq_virt) {
+		printk(KERN_ALERT "bnx2i: unable to alloc RQ BD memory %d\n",
+				  ep->qp.rq_mem_size);
+		goto mem_alloc_err;
+	}
+
+	ep->qp.rq_first_qe = ep->qp.rq_virt;
+	ep->qp.rq_prod_qe = ep->qp.rq_first_qe;
+	ep->qp.rq_cons_qe = ep->qp.rq_first_qe;
+	ep->qp.rq_last_qe = &ep->qp.rq_first_qe[hba->max_rqes - 1];
+	ep->qp.rq_prod_idx = 0x8000;
+	ep->qp.rq_cons_idx = 0;
+	ep->qp.rqe_left = hba->max_rqes;
+
+	setup_qp_page_tables(ep);
+
+	return 0;
+
+mem_alloc_err:
+	bnx2i_free_qp_resc(hba, ep);
+	return -ENOMEM;
+}
+
+
+
+/**
+ * bnx2i_free_qp_resc - free memory resources held by QP
+ * @hba:	adapter structure pointer
+ * @ep:	endpoint (transport indentifier) structure
+ *
+ * Free QP resources - SQ/RQ/CQ memory and page tables.
+ */
+void bnx2i_free_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep)
+{
+	if (ep->qp.ctx_base) {
+		iounmap(ep->qp.ctx_base);
+		ep->qp.ctx_base = NULL;
+	}
+	/* Free SQ mem */
+	if (ep->qp.sq_pgtbl_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.sq_pgtbl_size,
+				  ep->qp.sq_pgtbl_virt, ep->qp.sq_pgtbl_phys);
+		ep->qp.sq_pgtbl_virt = NULL;
+		ep->qp.sq_pgtbl_phys = 0;
+	}
+	if (ep->qp.sq_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.sq_mem_size,
+				  ep->qp.sq_virt, ep->qp.sq_phys);
+		ep->qp.sq_virt = NULL;
+		ep->qp.sq_phys = 0;
+	}
+
+	/* Free RQ mem */
+	if (ep->qp.rq_pgtbl_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.rq_pgtbl_size,
+				  ep->qp.rq_pgtbl_virt, ep->qp.rq_pgtbl_phys);
+		ep->qp.rq_pgtbl_virt = NULL;
+		ep->qp.rq_pgtbl_phys = 0;
+	}
+	if (ep->qp.rq_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.rq_mem_size,
+				  ep->qp.rq_virt, ep->qp.rq_phys);
+		ep->qp.rq_virt = NULL;
+		ep->qp.rq_phys = 0;
+	}
+
+	/* Free CQ mem */
+	if (ep->qp.cq_pgtbl_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.cq_pgtbl_size,
+				  ep->qp.cq_pgtbl_virt, ep->qp.cq_pgtbl_phys);
+		ep->qp.cq_pgtbl_virt = NULL;
+		ep->qp.cq_pgtbl_phys = 0;
+	}
+	if (ep->qp.cq_virt) {
+		dma_free_coherent(&hba->pcidev->dev, ep->qp.cq_mem_size,
+				  ep->qp.cq_virt, ep->qp.cq_phys);
+		ep->qp.cq_virt = NULL;
+		ep->qp.cq_phys = 0;
+	}
+}
+
+
+/**
+ * bnx2i_send_fw_iscsi_init_msg - initiates initial handshake with iscsi f/w
+ * @hba:	adapter structure pointer
+ *
+ * Send down iscsi_init KWQEs which initiates the initial handshake with the f/w
+ * 	This results in iSCSi support validation and on-chip context manager
+ * 	initialization.  Firmware completes this handshake with a CQE carrying
+ * 	the result of iscsi support validation. Parameter carried by
+ * 	iscsi init request determines the number of offloaded connection and
+ * 	tolerance level for iscsi protocol violation this hba/chip can support
+ */
+int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
+{
+	struct kwqe *kwqe_arr[3];
+	struct iscsi_kwqe_init1 iscsi_init;
+	struct iscsi_kwqe_init2 iscsi_init2;
+	int rc = 0;
+	u64 mask64;
+
+	bnx2i_adjust_qp_size(hba);
+
+	iscsi_init.flags =
+		ISCSI_PAGE_SIZE_4K << ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT;
+	if (en_tcp_dack)
+		iscsi_init.flags |= ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE;
+	iscsi_init.reserved0 = 0;
+	iscsi_init.num_cqs = 1;
+	iscsi_init.hdr.op_code = ISCSI_KWQE_OPCODE_INIT1;
+	iscsi_init.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+
+	iscsi_init.dummy_buffer_addr_lo = (u32) hba->dummy_buf_dma;
+	iscsi_init.dummy_buffer_addr_hi =
+		(u32) ((u64) hba->dummy_buf_dma >> 32);
+
+	hba->ctx_ccell_tasks =
+			((hba->num_ccell & 0xFFFF) | (hba->max_sqes << 16));
+	iscsi_init.num_ccells_per_conn = hba->num_ccell;
+	iscsi_init.num_tasks_per_conn = hba->max_sqes;
+	iscsi_init.sq_wqes_per_page = PAGE_SIZE / BNX2I_SQ_WQE_SIZE;
+	iscsi_init.sq_num_wqes = hba->max_sqes;
+	iscsi_init.cq_log_wqes_per_page =
+		(u8) bnx2i_power_of2(PAGE_SIZE / BNX2I_CQE_SIZE);
+	iscsi_init.cq_num_wqes = hba->max_cqes;
+	iscsi_init.cq_num_pages = (hba->max_cqes * BNX2I_CQE_SIZE +
+				   (PAGE_SIZE - 1)) / PAGE_SIZE;
+	iscsi_init.sq_num_pages = (hba->max_sqes * BNX2I_SQ_WQE_SIZE +
+				   (PAGE_SIZE - 1)) / PAGE_SIZE;
+	iscsi_init.rq_buffer_size = BNX2I_RQ_WQE_SIZE;
+	iscsi_init.rq_num_wqes = hba->max_rqes;
+
+
+	iscsi_init2.hdr.op_code = ISCSI_KWQE_OPCODE_INIT2;
+	iscsi_init2.hdr.flags =
+		(ISCSI_KWQE_LAYER_CODE << ISCSI_KWQE_HEADER_LAYER_CODE_SHIFT);
+	iscsi_init2.max_cq_sqn = hba->max_cqes * 2 + 1;
+	mask64 = 0x0ULL;
+	mask64 |= (
+		/* CISCO MDS */
+		(1UL <<
+		  ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_NOT_RSRV) |
+		/* HP MSA1510i */
+		(1UL <<
+		  ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_EXP_DATASN) |
+		/* EMC */
+		(1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN));
+	if (error_mask1)
+		iscsi_init2.error_bit_map[0] = error_mask1;
+	else
+		iscsi_init2.error_bit_map[0] = (u32) mask64;
+
+	if (error_mask2)
+		iscsi_init2.error_bit_map[1] = error_mask2;
+	else
+		iscsi_init2.error_bit_map[1] = (u32) (mask64 >> 32);
+
+	iscsi_error_mask = mask64;
+
+	kwqe_arr[0] = (struct kwqe *) &iscsi_init;
+	kwqe_arr[1] = (struct kwqe *) &iscsi_init2;
+
+	if (hba->cnic && hba->cnic->submit_kwqes)
+		rc = hba->cnic->submit_kwqes(hba->cnic, kwqe_arr, 2);
+	return rc;
+}
+
+
+/**
+ * bnx2i_process_scsi_cmd_resp - this function handles scsi cmd completion.
+ * @conn:	iscsi connection
+ * @cqe:	pointer to newly DMA'ed CQE entry for processing
+ *
+ * process SCSI CMD Response CQE & complete the request to SCSI-ML
+ */
+static int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
+				       struct bnx2i_conn *bnx2i_conn,
+				       struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct bnx2i_cmd_response *resp_cqe;
+	struct bnx2i_cmd *bnx2i_cmd;
+	struct iscsi_task *task;
+	struct iscsi_cmd_rsp *hdr;
+	u32 datalen = 0;
+
+	resp_cqe = (struct bnx2i_cmd_response *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 resp_cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
+	if (!task)
+		goto fail;
+
+	bnx2i_cmd = task->dd_data;
+
+	if (bnx2i_cmd->req.op_attr & ISCSI_CMD_REQUEST_READ) {
+		conn->datain_pdus_cnt +=
+			resp_cqe->task_stat.read_stat.num_data_outs;
+		conn->rxdata_octets +=
+			bnx2i_cmd->req.total_data_transfer_length;
+	} else {
+		conn->dataout_pdus_cnt +=
+			resp_cqe->task_stat.read_stat.num_data_outs;
+		conn->r2t_pdus_cnt +=
+			resp_cqe->task_stat.read_stat.num_r2ts;
+		conn->txdata_octets +=
+			bnx2i_cmd->req.total_data_transfer_length;
+	}
+	bnx2i_iscsi_unmap_sg_list(bnx2i_cmd);
+
+	hdr = (struct iscsi_cmd_rsp *)task->hdr;
+	resp_cqe = (struct bnx2i_cmd_response *)cqe;
+	hdr->opcode = resp_cqe->op_code;
+	hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn);
+	hdr->exp_cmdsn = cpu_to_be32(resp_cqe->exp_cmd_sn);
+	hdr->response = resp_cqe->response;
+	hdr->cmd_status = resp_cqe->status;
+	hdr->flags = resp_cqe->response_flags;
+	hdr->residual_count = cpu_to_be32(resp_cqe->residual_count);
+
+	if (resp_cqe->op_code == ISCSI_OP_SCSI_DATA_IN)
+		goto done;
+
+	if (resp_cqe->status == SAM_STAT_CHECK_CONDITION) {
+		datalen = resp_cqe->data_length;
+		if (datalen < 2)
+			goto done;
+
+		if (datalen > BNX2I_RQ_WQE_SIZE) {
+			iscsi_conn_printk(KERN_ERR, conn,
+					  "sense data len %d > RQ sz\n",
+					  datalen);
+			datalen = BNX2I_RQ_WQE_SIZE;
+		} else if (datalen > ISCSI_DEF_MAX_RECV_SEG_LEN) {
+			iscsi_conn_printk(KERN_ERR, conn,
+					  "sense data len %d > conn data\n",
+					  datalen);
+			datalen = ISCSI_DEF_MAX_RECV_SEG_LEN;
+		}
+
+		bnx2i_get_rq_buf(bnx2i_cmd->conn, conn->data, datalen);
+		bnx2i_put_rq_buf(bnx2i_cmd->conn, 1);
+	}
+
+done:
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr,
+			     conn->data, datalen);
+fail:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+
+/**
+ * bnx2i_process_login_resp - this function handles iscsi login response
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process Login Response CQE & complete it to open-iscsi user daemon
+ */
+static int bnx2i_process_login_resp(struct iscsi_session *session,
+				    struct bnx2i_conn *bnx2i_conn,
+				    struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_login_response *login;
+	struct iscsi_login_rsp *resp_hdr;
+	int pld_len;
+	int pad_len;
+
+	login = (struct bnx2i_login_response *) cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 login->itt & ISCSI_LOGIN_RESPONSE_INDEX);
+	if (!task)
+		goto done;
+
+	resp_hdr = (struct iscsi_login_rsp *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = login->op_code;
+	resp_hdr->flags = login->response_flags;
+	resp_hdr->max_version = login->version_max;
+	resp_hdr->active_version = login->version_active;;
+	resp_hdr->hlength = 0;
+
+	hton24(resp_hdr->dlength, login->data_length);
+	memcpy(resp_hdr->isid, &login->isid_lo, 6);
+	resp_hdr->tsih = cpu_to_be16(login->tsih);
+	resp_hdr->itt = task->hdr->itt;
+	resp_hdr->statsn = cpu_to_be32(login->stat_sn);
+	resp_hdr->exp_cmdsn = cpu_to_be32(login->exp_cmd_sn);
+	resp_hdr->max_cmdsn = cpu_to_be32(login->max_cmd_sn);
+	resp_hdr->status_class = login->status_class;
+	resp_hdr->status_detail = login->status_detail;
+	pld_len = login->data_length;
+	bnx2i_conn->gen_pdu.resp_wr_ptr =
+					bnx2i_conn->gen_pdu.resp_buf + pld_len;
+
+	pad_len = 0;
+	if (pld_len & 0x3)
+		pad_len = 4 - (pld_len % 4);
+
+	if (pad_len) {
+		int i = 0;
+		for (i = 0; i < pad_len; i++) {
+			bnx2i_conn->gen_pdu.resp_wr_ptr[0] = 0;
+			bnx2i_conn->gen_pdu.resp_wr_ptr++;
+		}
+	}
+
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr,
+		bnx2i_conn->gen_pdu.resp_buf,
+		bnx2i_conn->gen_pdu.resp_wr_ptr - bnx2i_conn->gen_pdu.resp_buf);
+done:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+/**
+ * bnx2i_process_tmf_resp - this function handles iscsi TMF response
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI TMF Response CQE and wake up the driver eh thread.
+ */
+static int bnx2i_process_tmf_resp(struct iscsi_session *session,
+				  struct bnx2i_conn *bnx2i_conn,
+				  struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_tmf_response *tmf_cqe;
+	struct iscsi_tm_rsp *resp_hdr;
+
+	tmf_cqe = (struct bnx2i_tmf_response *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 tmf_cqe->itt & ISCSI_TMF_RESPONSE_INDEX);
+	if (!task)
+		goto done;
+
+	resp_hdr = (struct iscsi_tm_rsp *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = tmf_cqe->op_code;
+	resp_hdr->max_cmdsn = cpu_to_be32(tmf_cqe->max_cmd_sn);
+	resp_hdr->exp_cmdsn = cpu_to_be32(tmf_cqe->exp_cmd_sn);
+	resp_hdr->itt = task->hdr->itt;
+	resp_hdr->response = tmf_cqe->response;
+
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr, NULL, 0);
+done:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+/**
+ * bnx2i_process_logout_resp - this function handles iscsi logout response
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI Logout Response CQE & make function call to
+ * notify the user daemon.
+ */
+static int bnx2i_process_logout_resp(struct iscsi_session *session,
+				     struct bnx2i_conn *bnx2i_conn,
+				     struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_logout_response *logout;
+	struct iscsi_logout_rsp *resp_hdr;
+
+	logout = (struct bnx2i_logout_response *) cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 logout->itt & ISCSI_LOGOUT_RESPONSE_INDEX);
+	if (!task)
+		goto done;
+
+	resp_hdr = (struct iscsi_logout_rsp *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = logout->op_code;
+	resp_hdr->flags = logout->response;
+	resp_hdr->hlength = 0;
+
+	resp_hdr->itt = task->hdr->itt;
+	resp_hdr->statsn = task->hdr->exp_statsn;
+	resp_hdr->exp_cmdsn = cpu_to_be32(logout->exp_cmd_sn);
+	resp_hdr->max_cmdsn = cpu_to_be32(logout->max_cmd_sn);
+
+	resp_hdr->t2wait = cpu_to_be32(logout->time_to_wait);
+	resp_hdr->t2retain = cpu_to_be32(logout->time_to_retain);
+
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr, NULL, 0);
+done:
+	spin_unlock(&session->lock);
+	return 0;
+}
+
+/**
+ * bnx2i_process_nopin_local_cmpl - this function handles iscsi nopin CQE
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI NOPIN local completion CQE, frees IIT and command structures
+ */
+static void bnx2i_process_nopin_local_cmpl(struct iscsi_session *session,
+					   struct bnx2i_conn *bnx2i_conn,
+					   struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct bnx2i_nop_in_msg *nop_in;
+	struct iscsi_task *task;
+
+	nop_in = (struct bnx2i_nop_in_msg *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+				 nop_in->itt & ISCSI_NOP_IN_MSG_INDEX);
+	if (task)
+		iscsi_put_task(task);
+	spin_unlock(&session->lock);
+}
+
+/**
+ * bnx2i_unsol_pdu_adjust_rq - makes adjustments to RQ after unsol pdu is recvd
+ * @conn:	iscsi connection
+ *
+ * Firmware advances RQ producer index for every unsolicited PDU even if
+ *	payload data length is '0'. This function makes corresponding
+ *	adjustments on the driver side to match this f/w behavior
+ */
+static void bnx2i_unsol_pdu_adjust_rq(struct bnx2i_conn *bnx2i_conn)
+{
+	char dummy_rq_data[2];
+	bnx2i_get_rq_buf(bnx2i_conn, dummy_rq_data, 1);
+	bnx2i_put_rq_buf(bnx2i_conn, 1);
+}
+
+
+/**
+ * bnx2i_process_nopin_mesg - this function handles iscsi nopin CQE
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI target's proactive iSCSI NOPIN request
+ */
+static int bnx2i_process_nopin_mesg(struct iscsi_session *session,
+				     struct bnx2i_conn *bnx2i_conn,
+				     struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+	struct bnx2i_nop_in_msg *nop_in;
+	struct iscsi_nopin *hdr;
+	u32 itt;
+	int tgt_async_nop = 0;
+
+	nop_in = (struct bnx2i_nop_in_msg *)cqe;
+	itt = nop_in->itt & ISCSI_NOP_IN_MSG_INDEX;
+
+	spin_lock(&session->lock);
+	hdr = (struct iscsi_nopin *)&bnx2i_conn->gen_pdu.resp_hdr;
+	memset(hdr, 0, sizeof(struct iscsi_hdr));
+	hdr->opcode = nop_in->op_code;
+	hdr->max_cmdsn = cpu_to_be32(nop_in->max_cmd_sn);
+	hdr->exp_cmdsn = cpu_to_be32(nop_in->exp_cmd_sn);
+	hdr->ttt = cpu_to_be32(nop_in->ttt);
+
+	if (itt == (u16) RESERVED_ITT) {
+		bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);
+		hdr->itt = RESERVED_ITT;
+		tgt_async_nop = 1;
+		goto done;
+	}
+
+	/* this is a response to one of our nop-outs */
+	task = iscsi_itt_to_task(conn, itt);
+	if (task) {
+		hdr->flags = ISCSI_FLAG_CMD_FINAL;
+		hdr->itt = task->hdr->itt;
+		hdr->ttt = cpu_to_be32(nop_in->ttt);
+		memcpy(hdr->lun, nop_in->lun, 8);
+	}
+done:
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, NULL, 0);
+	spin_unlock(&session->lock);
+
+	return tgt_async_nop;
+}
+
+
+/**
+ * bnx2i_process_async_mesg - this function handles iscsi async message
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI ASYNC Message
+ */
+static void bnx2i_process_async_mesg(struct iscsi_session *session,
+				     struct bnx2i_conn *bnx2i_conn,
+				     struct cqe *cqe)
+{
+	struct bnx2i_async_msg *async_cqe;
+	struct iscsi_async *resp_hdr;
+	u8 async_event;
+
+	bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);
+
+	async_cqe = (struct bnx2i_async_msg *)cqe;
+	async_event = async_cqe->async_event;
+
+	if (async_event == ISCSI_ASYNC_MSG_SCSI_EVENT) {
+		iscsi_conn_printk(KERN_ALERT, bnx2i_conn->cls_conn->dd_data,
+				  "async: scsi events not supported\n");
+		return;
+	}
+
+	spin_lock(&session->lock);
+	resp_hdr = (struct iscsi_async *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
+	resp_hdr->opcode = async_cqe->op_code;
+	resp_hdr->flags = 0x80;
+
+	memcpy(resp_hdr->lun, async_cqe->lun, 8);
+	resp_hdr->exp_cmdsn = cpu_to_be32(async_cqe->exp_cmd_sn);
+	resp_hdr->max_cmdsn = cpu_to_be32(async_cqe->max_cmd_sn);
+
+	resp_hdr->async_event = async_cqe->async_event;
+	resp_hdr->async_vcode = async_cqe->async_vcode;
+
+	resp_hdr->param1 = cpu_to_be16(async_cqe->param1);
+	resp_hdr->param2 = cpu_to_be16(async_cqe->param2);
+	resp_hdr->param3 = cpu_to_be16(async_cqe->param3);
+
+	__iscsi_complete_pdu(bnx2i_conn->cls_conn->dd_data,
+			     (struct iscsi_hdr *)resp_hdr, NULL, 0);
+	spin_unlock(&session->lock);
+}
+
+
+/**
+ * bnx2i_process_reject_mesg - process iscsi reject pdu
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process iSCSI REJECT message
+ */
+static void bnx2i_process_reject_mesg(struct iscsi_session *session,
+				      struct bnx2i_conn *bnx2i_conn,
+				      struct cqe *cqe)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct bnx2i_reject_msg *reject;
+	struct iscsi_reject *hdr;
+
+	reject = (struct bnx2i_reject_msg *) cqe;
+	if (reject->data_length) {
+		bnx2i_get_rq_buf(bnx2i_conn, conn->data, reject->data_length);
+		bnx2i_put_rq_buf(bnx2i_conn, 1);
+	} else
+		bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);
+
+	spin_lock(&session->lock);
+	hdr = (struct iscsi_reject *) &bnx2i_conn->gen_pdu.resp_hdr;
+	memset(hdr, 0, sizeof(struct iscsi_hdr));
+	hdr->opcode = reject->op_code;
+	hdr->reason = reject->reason;
+	hton24(hdr->dlength, reject->data_length);
+	hdr->max_cmdsn = cpu_to_be32(reject->max_cmd_sn);
+	hdr->exp_cmdsn = cpu_to_be32(reject->exp_cmd_sn);
+	hdr->ffffffff = cpu_to_be32(RESERVED_ITT);
+	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, conn->data,
+			     reject->data_length);
+	spin_unlock(&session->lock);
+}
+
+/**
+ * bnx2i_process_cmd_cleanup_resp - process scsi command clean-up completion
+ * @session:		iscsi session pointer
+ * @bnx2i_conn:		iscsi connection pointer
+ * @cqe:		pointer to newly DMA'ed CQE entry for processing
+ *
+ * process command cleanup response CQE during conn shutdown or error recovery
+ */
+static void bnx2i_process_cmd_cleanup_resp(struct iscsi_session *session,
+					   struct bnx2i_conn *bnx2i_conn,
+					   struct cqe *cqe)
+{
+	struct bnx2i_cleanup_response *cmd_clean_rsp;
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_task *task;
+
+	cmd_clean_rsp = (struct bnx2i_cleanup_response *)cqe;
+	spin_lock(&session->lock);
+	task = iscsi_itt_to_task(conn,
+			cmd_clean_rsp->itt & ISCSI_CLEANUP_RESPONSE_INDEX);
+	if (!task)
+		printk(KERN_ALERT "bnx2i: cmd clean ITT %x not active\n",
+			cmd_clean_rsp->itt & ISCSI_CLEANUP_RESPONSE_INDEX);
+	spin_unlock(&session->lock);
+	complete(&bnx2i_conn->cmd_cleanup_cmpl);
+}
+
+
+
+/**
+ * bnx2i_process_new_cqes - process newly DMA'ed CQE's
+ * @bnx2i_conn:		iscsi connection
+ *
+ * this function is called by generic KCQ handler to process all pending CQE's
+ */
+static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
+{
+	struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
+	struct iscsi_session *session = conn->session;
+	struct qp_info *qp = &bnx2i_conn->ep->qp;
+	struct bnx2i_nop_in_msg *nopin;
+	int tgt_async_msg;
+
+	while (1) {
+		nopin = (struct bnx2i_nop_in_msg *) qp->cq_cons_qe;
+		if (nopin->cq_req_sn != qp->cqe_exp_seq_sn)
+			break;
+
+		if (unlikely(test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx)))
+			break;
+
+		tgt_async_msg = 0;
+
+		switch (nopin->op_code) {
+		case ISCSI_OP_SCSI_CMD_RSP:
+		case ISCSI_OP_SCSI_DATA_IN:
+			bnx2i_process_scsi_cmd_resp(session, bnx2i_conn,
+						    qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_LOGIN_RSP:
+			bnx2i_process_login_resp(session, bnx2i_conn,
+						 qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_SCSI_TMFUNC_RSP:
+			bnx2i_process_tmf_resp(session, bnx2i_conn,
+					       qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_LOGOUT_RSP:
+			bnx2i_process_logout_resp(session, bnx2i_conn,
+						  qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_NOOP_IN:
+			if (bnx2i_process_nopin_mesg(session, bnx2i_conn,
+						     qp->cq_cons_qe))
+				tgt_async_msg = 1;
+			break;
+		case ISCSI_OPCODE_NOPOUT_LOCAL_COMPLETION:
+			bnx2i_process_nopin_local_cmpl(session, bnx2i_conn,
+						       qp->cq_cons_qe);
+			break;
+		case ISCSI_OP_ASYNC_EVENT:
+			bnx2i_process_async_mesg(session, bnx2i_conn,
+						 qp->cq_cons_qe);
+			tgt_async_msg = 1;
+			break;
+		case ISCSI_OP_REJECT:
+			bnx2i_process_reject_mesg(session, bnx2i_conn,
+						  qp->cq_cons_qe);
+			break;
+		case ISCSI_OPCODE_CLEANUP_RESPONSE:
+			bnx2i_process_cmd_cleanup_resp(session, bnx2i_conn,
+						       qp->cq_cons_qe);
+			break;
+		default:
+			printk(KERN_ALERT "bnx2i: unknown opcode 0x%x\n",
+					  nopin->op_code);
+		}
+
+		if (!tgt_async_msg)
+			bnx2i_conn->ep->num_active_cmds--;
+
+		/* clear out in production version only, till beta keep opcode
+		 * field intact, will be helpful in debugging (context dump)
+		 * nopin->op_code = 0;
+		 */
+		qp->cqe_exp_seq_sn++;
+		if (qp->cqe_exp_seq_sn == (qp->cqe_size * 2 + 1))
+			qp->cqe_exp_seq_sn = ISCSI_INITIAL_SN;
+
+		if (qp->cq_cons_qe == qp->cq_last_qe) {
+			qp->cq_cons_qe = qp->cq_first_qe;
+			qp->cq_cons_idx = 0;
+		} else {
+			qp->cq_cons_qe++;
+			qp->cq_cons_idx++;
+		}
+	}
+	bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE);
+}
+
+/**
+ * bnx2i_fastpath_notification - process global event queue (KCQ)
+ * @hba:		adapter structure pointer
+ * @new_cqe_kcqe:	pointer to newly DMA'ed KCQE entry
+ *
+ * Fast path event notification handler, KCQ entry carries context id
+ *	of the connection that has 1 or more pending CQ entries
+ */
+static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
+					struct iscsi_kcqe *new_cqe_kcqe)
+{
+	struct bnx2i_conn *conn;
+	u32 iscsi_cid;
+
+	iscsi_cid = new_cqe_kcqe->iscsi_conn_id;
+	conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+
+	if (!conn) {
+		printk(KERN_ALERT "cid #%x not valid\n", iscsi_cid);
+		return;
+	}
+	if (!conn->ep) {
+		printk(KERN_ALERT "cid #%x - ep not bound\n", iscsi_cid);
+		return;
+	}
+
+	bnx2i_process_new_cqes(conn);
+}
+
+
+/**
+ * bnx2i_process_update_conn_cmpl - process iscsi conn update completion KCQE
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * CONN_UPDATE completion handler, this completes iSCSI connection FFP migration
+ */
+static void bnx2i_process_update_conn_cmpl(struct bnx2i_hba *hba,
+					   struct iscsi_kcqe *update_kcqe)
+{
+	struct bnx2i_conn *conn;
+	u32 iscsi_cid;
+
+	iscsi_cid = update_kcqe->iscsi_conn_id;
+	conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+
+	if (!conn) {
+		printk(KERN_ALERT "conn_update: cid %x not valid\n", iscsi_cid);
+		return;
+	}
+	if (!conn->ep) {
+		printk(KERN_ALERT "cid %x does not have ep bound\n", iscsi_cid);
+		return;
+	}
+
+	if (update_kcqe->completion_status) {
+		printk(KERN_ALERT "request failed cid %x\n", iscsi_cid);
+		conn->ep->state = EP_STATE_ULP_UPDATE_FAILED;
+	} else
+		conn->ep->state = EP_STATE_ULP_UPDATE_COMPL;
+
+	wake_up_interruptible(&conn->ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_recovery_que_add_conn - add connection to recovery queue
+ * @hba:		adapter structure pointer
+ * @bnx2i_conn:		iscsi connection
+ *
+ * Add connection to recovery queue and schedule adapter eh worker
+ */
+static void bnx2i_recovery_que_add_conn(struct bnx2i_hba *hba,
+					struct bnx2i_conn *bnx2i_conn)
+{
+	iscsi_conn_failure(bnx2i_conn->cls_conn->dd_data,
+			   ISCSI_ERR_CONN_FAILED);
+}
+
+
+/**
+ * bnx2i_process_tcp_error - process error notification on a given connection
+ *
+ * @hba: 		adapter structure pointer
+ * @tcp_err: 		tcp error kcqe pointer
+ *
+ * handles tcp level error notifications from FW.
+ */
+static void bnx2i_process_tcp_error(struct bnx2i_hba *hba,
+				    struct iscsi_kcqe *tcp_err)
+{
+	struct bnx2i_conn *bnx2i_conn;
+	u32 iscsi_cid;
+
+	iscsi_cid = tcp_err->iscsi_conn_id;
+	bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+
+	if (!bnx2i_conn) {
+		printk(KERN_ALERT "bnx2i - cid 0x%x not valid\n", iscsi_cid);
+		return;
+	}
+
+	printk(KERN_ALERT "bnx2i - cid 0x%x had TCP errors, error code 0x%x\n",
+			  iscsi_cid, tcp_err->completion_status);
+	bnx2i_recovery_que_add_conn(bnx2i_conn->hba, bnx2i_conn);
+}
+
+
+/**
+ * bnx2i_process_iscsi_error - process error notification on a given connection
+ * @hba:		adapter structure pointer
+ * @iscsi_err:		iscsi error kcqe pointer
+ *
+ * handles iscsi error notifications from the FW. Firmware based in initial
+ *	handshake classifies iscsi protocol / TCP rfc violation into either
+ *	warning or error indications. If indication is of "Error" type, driver
+ *	will initiate session recovery for that connection/session. For
+ *	"Warning" type indication, driver will put out a system log message
+ *	(there will be only one message for each type for the life of the
+ *	session, this is to avoid un-necessarily overloading the system)
+ */
+static void bnx2i_process_iscsi_error(struct bnx2i_hba *hba,
+				      struct iscsi_kcqe *iscsi_err)
+{
+	struct bnx2i_conn *bnx2i_conn;
+	u32 iscsi_cid;
+	char warn_notice[] = "iscsi_warning";
+	char error_notice[] = "iscsi_error";
+	char additional_notice[64];
+	char *message;
+	int need_recovery;
+	u64 err_mask64;
+
+	iscsi_cid = iscsi_err->iscsi_conn_id;
+	bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
+	if (!bnx2i_conn) {
+		printk(KERN_ALERT "bnx2i - cid 0x%x not valid\n", iscsi_cid);
+		return;
+	}
+
+	err_mask64 = (0x1ULL << iscsi_err->completion_status);
+
+	if (err_mask64 & iscsi_error_mask) {
+		need_recovery = 0;
+		message = warn_notice;
+	} else {
+		need_recovery = 1;
+		message = error_notice;
+	}
+
+	switch (iscsi_err->completion_status) {
+	case ISCSI_KCQE_COMPLETION_STATUS_HDR_DIG_ERR:
+		strcpy(additional_notice, "hdr digest err");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_DATA_DIG_ERR:
+		strcpy(additional_notice, "data digest err");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_OPCODE:
+		strcpy(additional_notice, "wrong opcode rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_AHS_LEN:
+		strcpy(additional_notice, "AHS len > 0 rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ITT:
+		strcpy(additional_notice, "invalid ITT rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_STATSN:
+		strcpy(additional_notice, "wrong StatSN rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_EXP_DATASN:
+		strcpy(additional_notice, "wrong DataSN rcvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T:
+		strcpy(additional_notice, "pend R2T violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_0:
+		strcpy(additional_notice, "ERL0, UO");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_1:
+		strcpy(additional_notice, "ERL0, U1");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_2:
+		strcpy(additional_notice, "ERL0, U2");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_3:
+		strcpy(additional_notice, "ERL0, U3");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_4:
+		strcpy(additional_notice, "ERL0, U4");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_5:
+		strcpy(additional_notice, "ERL0, U5");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_O_U_6:
+		strcpy(additional_notice, "ERL0, U6");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_RCV_LEN:
+		strcpy(additional_notice, "invalid resi len");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_RCV_PDU_LEN:
+		strcpy(additional_notice, "MRDSL violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_F_BIT_ZERO:
+		strcpy(additional_notice, "F-bit not set");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_NOT_RSRV:
+		strcpy(additional_notice, "invalid TTT");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATASN:
+		strcpy(additional_notice, "invalid DataSN");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REMAIN_BURST_LEN:
+		strcpy(additional_notice, "burst len violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_BUFFER_OFF:
+		strcpy(additional_notice, "buf offset violation");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN:
+		strcpy(additional_notice, "invalid LUN field");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_R2TSN:
+		strcpy(additional_notice, "invalid R2TSN field");
+		break;
+#define BNX2I_ERR_DESIRED_DATA_TRNS_LEN_0 	\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_0
+	case BNX2I_ERR_DESIRED_DATA_TRNS_LEN_0:
+		strcpy(additional_notice, "invalid cmd len1");
+		break;
+#define BNX2I_ERR_DESIRED_DATA_TRNS_LEN_1 	\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DESIRED_DATA_TRNS_LEN_1
+	case BNX2I_ERR_DESIRED_DATA_TRNS_LEN_1:
+		strcpy(additional_notice, "invalid cmd len2");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_EXCEED:
+		strcpy(additional_notice,
+		       "pend r2t exceeds MaxOutstandingR2T value");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_TTT_IS_RSRV:
+		strcpy(additional_notice, "TTT is rsvd");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_MAX_BURST_LEN:
+		strcpy(additional_notice, "MBL violation");
+		break;
+#define BNX2I_ERR_DATA_SEG_LEN_NOT_ZERO 	\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_DATA_SEG_LEN_NOT_ZERO
+	case BNX2I_ERR_DATA_SEG_LEN_NOT_ZERO:
+		strcpy(additional_notice, "data seg len != 0");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_REJECT_PDU_LEN:
+		strcpy(additional_notice, "reject pdu len error");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_ASYNC_PDU_LEN:
+		strcpy(additional_notice, "async pdu len error");
+		break;
+	case ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_NOPIN_PDU_LEN:
+		strcpy(additional_notice, "nopin pdu len error");
+		break;
+#define BNX2_ERR_PEND_R2T_IN_CLEANUP			\
+	ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_PEND_R2T_IN_CLEANUP
+	case BNX2_ERR_PEND_R2T_IN_CLEANUP:
+		strcpy(additional_notice, "pend r2t in cleanup");
+		break;
+
+	case ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_FRAGMENT:
+		strcpy(additional_notice, "IP fragments rcvd");
+		break;
+	case ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_IP_OPTIONS:
+		strcpy(additional_notice, "IP options error");
+		break;
+	case ISCI_KCQE_COMPLETION_STATUS_TCP_ERROR_URGENT_FLAG:
+		strcpy(additional_notice, "urgent flag error");
+		break;
+	default:
+		printk(KERN_ALERT "iscsi_err - unknown err %x\n",
+				  iscsi_err->completion_status);
+	}
+
+	if (need_recovery) {
+		iscsi_conn_printk(KERN_ALERT,
+				  bnx2i_conn->cls_conn->dd_data,
+				  "bnx2i: %s - %s\n",
+				  message, additional_notice);
+
+		iscsi_conn_printk(KERN_ALERT,
+				  bnx2i_conn->cls_conn->dd_data,
+				  "conn_err - hostno %d conn %p, "
+				  "iscsi_cid %x cid %x\n",
+				  bnx2i_conn->hba->shost->host_no,
+				  bnx2i_conn, bnx2i_conn->ep->ep_iscsi_cid,
+				  bnx2i_conn->ep->ep_cid);
+		bnx2i_recovery_que_add_conn(bnx2i_conn->hba, bnx2i_conn);
+	} else
+		if (!test_and_set_bit(iscsi_err->completion_status,
+				      (void *) &bnx2i_conn->violation_notified))
+			iscsi_conn_printk(KERN_ALERT,
+					  bnx2i_conn->cls_conn->dd_data,
+					  "bnx2i: %s - %s\n",
+					  message, additional_notice);
+}
+
+
+/**
+ * bnx2i_process_conn_destroy_cmpl - process iscsi conn destroy completion
+ * @hba:		adapter structure pointer
+ * @conn_destroy:	conn destroy kcqe pointer
+ *
+ * handles connection destroy completion request.
+ */
+static void bnx2i_process_conn_destroy_cmpl(struct bnx2i_hba *hba,
+					    struct iscsi_kcqe *conn_destroy)
+{
+	struct bnx2i_endpoint *ep;
+
+	ep = bnx2i_find_ep_in_destroy_list(hba, conn_destroy->iscsi_conn_id);
+	if (!ep) {
+		printk(KERN_ALERT "bnx2i_conn_destroy_cmpl: no pending "
+				  "offload request, unexpected complection\n");
+		return;
+	}
+
+	if (hba != ep->hba) {
+		printk(KERN_ALERT "conn destroy- error hba mis-match\n");
+		return;
+	}
+
+	if (conn_destroy->completion_status) {
+		printk(KERN_ALERT "conn_destroy_cmpl: op failed\n");
+		ep->state = EP_STATE_CLEANUP_FAILED;
+	} else
+		ep->state = EP_STATE_CLEANUP_CMPL;
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_process_ofld_cmpl - process initial iscsi conn offload completion
+ * @hba:		adapter structure pointer
+ * @ofld_kcqe:		conn offload kcqe pointer
+ *
+ * handles initial connection offload completion, ep_connect() thread is
+ *	woken-up to continue with LLP connect process
+ */
+static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
+				    struct iscsi_kcqe *ofld_kcqe)
+{
+	u32 cid_addr;
+	struct bnx2i_endpoint *ep;
+	u32 cid_num;
+
+	ep = bnx2i_find_ep_in_ofld_list(hba, ofld_kcqe->iscsi_conn_id);
+	if (!ep) {
+		printk(KERN_ALERT "ofld_cmpl: no pend offload request\n");
+		return;
+	}
+
+	if (hba != ep->hba) {
+		printk(KERN_ALERT "ofld_cmpl: error hba mis-match\n");
+		return;
+	}
+
+	if (ofld_kcqe->completion_status) {
+		if (ofld_kcqe->completion_status ==
+		    ISCSI_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAILURE)
+			printk(KERN_ALERT "bnx2i: unable to allocate"
+					  " iSCSI context resources\n");
+		ep->state = EP_STATE_OFLD_FAILED;
+	} else {
+		ep->state = EP_STATE_OFLD_COMPL;
+		cid_addr = ofld_kcqe->iscsi_conn_context_id;
+		cid_num = bnx2i_get_cid_num(ep);
+		ep->ep_cid = cid_addr;
+		ep->qp.ctx_base = NULL;
+	}
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+/**
+ * bnx2i_indicate_kcqe - process iscsi conn update completion KCQE
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * Generic KCQ event handler/dispatcher
+ */
+static void bnx2i_indicate_kcqe(void *context, struct kcqe *kcqe[],
+				u32 num_cqe)
+{
+	struct bnx2i_hba *hba = context;
+	int i = 0;
+	struct iscsi_kcqe *ikcqe = NULL;
+
+	while (i < num_cqe) {
+		ikcqe = (struct iscsi_kcqe *) kcqe[i++];
+
+		if (ikcqe->op_code ==
+		    ISCSI_KCQE_OPCODE_CQ_EVENT_NOTIFICATION)
+			bnx2i_fastpath_notification(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_OFFLOAD_CONN)
+			bnx2i_process_ofld_cmpl(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_UPDATE_CONN)
+			bnx2i_process_update_conn_cmpl(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_INIT) {
+			if (ikcqe->completion_status !=
+			    ISCSI_KCQE_COMPLETION_STATUS_SUCCESS)
+				bnx2i_iscsi_license_error(hba, ikcqe->\
+							  completion_status);
+			else {
+				set_bit(ADAPTER_STATE_UP, &hba->adapter_state);
+				bnx2i_get_link_state(hba);
+				printk(KERN_INFO "bnx2i [%.2x:%.2x.%.2x]: "
+						 "ISCSI_INIT passed\n",
+						 (u8)hba->pcidev->bus->number,
+						 hba->pci_devno,
+						 (u8)hba->pci_func);
+
+
+			}
+		} else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_DESTROY_CONN)
+			bnx2i_process_conn_destroy_cmpl(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_ISCSI_ERROR)
+			bnx2i_process_iscsi_error(hba, ikcqe);
+		else if (ikcqe->op_code == ISCSI_KCQE_OPCODE_TCP_ERROR)
+			bnx2i_process_tcp_error(hba, ikcqe);
+		else
+			printk(KERN_ALERT "bnx2i: unknown opcode 0x%x\n",
+					  ikcqe->op_code);
+	}
+}
+
+
+/**
+ * bnx2i_indicate_netevent - Generic netdev event handler
+ * @context:	adapter structure pointer
+ * @event:	event type
+ *
+ * Handles four netdev events, NETDEV_UP, NETDEV_DOWN,
+ *	NETDEV_GOING_DOWN and NETDEV_CHANGE
+ */
+static void bnx2i_indicate_netevent(void *context, unsigned long event)
+{
+	struct bnx2i_hba *hba = context;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state))
+			bnx2i_send_fw_iscsi_init_msg(hba);
+		break;
+	case NETDEV_DOWN:
+		clear_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state);
+		clear_bit(ADAPTER_STATE_UP, &hba->adapter_state);
+		break;
+	case NETDEV_GOING_DOWN:
+		set_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state);
+		iscsi_host_for_each_session(hba->shost,
+					    bnx2i_drop_session);
+		break;
+	case NETDEV_CHANGE:
+		bnx2i_get_link_state(hba);
+		break;
+	default:
+		;
+	}
+}
+
+
+/**
+ * bnx2i_cm_connect_cmpl - process iscsi conn establishment completion
+ * @cm_sk: 		cnic sock structure pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate completion of option-2 TCP connect request.
+ */
+static void bnx2i_cm_connect_cmpl(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	if (test_bit(ADAPTER_STATE_GOING_DOWN, &ep->hba->adapter_state))
+		ep->state = EP_STATE_CONNECT_FAILED;
+	else if (test_bit(SK_F_OFFLD_COMPLETE, &cm_sk->flags))
+		ep->state = EP_STATE_CONNECT_COMPL;
+	else
+		ep->state = EP_STATE_CONNECT_FAILED;
+
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_cm_close_cmpl - process tcp conn close completion
+ * @cm_sk:	cnic sock structure pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate completion of option-2 graceful TCP connect shutdown
+ */
+static void bnx2i_cm_close_cmpl(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_DISCONN_COMPL;
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_cm_abort_cmpl - process abortive tcp conn teardown completion
+ * @cm_sk:	cnic sock structure pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate completion of option-2 abortive TCP connect termination
+ */
+static void bnx2i_cm_abort_cmpl(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_DISCONN_COMPL;
+	wake_up_interruptible(&ep->ofld_wait);
+}
+
+
+/**
+ * bnx2i_cm_remote_close - process received TCP FIN
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to indicate
+ *	async TCP events such as FIN
+ */
+static void bnx2i_cm_remote_close(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_TCP_FIN_RCVD;
+	if (ep->conn)
+		bnx2i_recovery_que_add_conn(ep->hba, ep->conn);
+}
+
+/**
+ * bnx2i_cm_remote_abort - process TCP RST and start conn cleanup
+ * @hba:		adapter structure pointer
+ * @update_kcqe:	kcqe pointer
+ *
+ * function callback exported via bnx2i - cnic driver interface to
+ *	indicate async TCP events (RST) sent by the peer.
+ */
+static void bnx2i_cm_remote_abort(struct cnic_sock *cm_sk)
+{
+	struct bnx2i_endpoint *ep = (struct bnx2i_endpoint *) cm_sk->context;
+
+	ep->state = EP_STATE_TCP_RST_RCVD;
+	if (ep->conn)
+		bnx2i_recovery_que_add_conn(ep->hba, ep->conn);
+}
+
+
+static void bnx2i_send_nl_mesg(struct cnic_dev *dev, u32 msg_type,
+			       char *buf, u16 buflen)
+{
+	struct bnx2i_hba *hba;
+
+	hba = bnx2i_find_hba_for_cnic(dev);
+	if (!hba)
+		return;
+
+	if (iscsi_offload_mesg(hba->shost, &bnx2i_iscsi_transport,
+				   msg_type, buf, buflen))
+		printk(KERN_ALERT "bnx2i: private nl message send error\n");
+
+}
+
+
+/**
+ * bnx2i_cnic_cb - global template of bnx2i - cnic driver interface structure
+ *			carrying callback function pointers
+ *
+ */
+struct cnic_ulp_ops bnx2i_cnic_cb = {
+	.cnic_init = bnx2i_ulp_init,
+	.cnic_exit = bnx2i_ulp_exit,
+	.cnic_start = bnx2i_start,
+	.cnic_stop = bnx2i_stop,
+	.indicate_kcqes = bnx2i_indicate_kcqe,
+	.indicate_netevent = bnx2i_indicate_netevent,
+	.cm_connect_complete = bnx2i_cm_connect_cmpl,
+	.cm_close_complete = bnx2i_cm_close_cmpl,
+	.cm_abort_complete = bnx2i_cm_abort_cmpl,
+	.cm_remote_close = bnx2i_cm_remote_close,
+	.cm_remote_abort = bnx2i_cm_remote_abort,
+	.iscsi_nl_send_msg = bnx2i_send_nl_mesg,
+	.owner = THIS_MODULE
+};
+
+
+/**
+ * bnx2i_map_ep_dbell_regs - map connection doorbell registers
+ * @ep: bnx2i endpoint
+ *
+ * maps connection's SQ and RQ doorbell registers, 5706/5708/5709 hosts these
+ *	register in BAR #0. Whereas in 57710 these register are accessed by
+ *	mapping BAR #1
+ */
+int bnx2i_map_ep_dbell_regs(struct bnx2i_endpoint *ep)
+{
+	u32 cid_num;
+	u32 reg_off;
+	u32 first_l4l5;
+	u32 ctx_sz;
+	u32 config2;
+	resource_size_t reg_base;
+
+	cid_num = bnx2i_get_cid_num(ep);
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
+		reg_base = pci_resource_start(ep->hba->pcidev,
+					      BNX2X_DOORBELL_PCI_BAR);
+		reg_off = PAGE_SIZE * (cid_num & 0x1FFFF) + DPM_TRIGER_TYPE;
+		ep->qp.ctx_base = ioremap_nocache(reg_base + reg_off, 4);
+		goto arm_cq;
+	}
+
+	reg_base = ep->hba->netdev->base_addr;
+	if ((test_bit(BNX2I_NX2_DEV_5709, &ep->hba->cnic_dev_type)) &&
+	    (ep->hba->mail_queue_access == BNX2I_MQ_BIN_MODE)) {
+		config2 = REG_RD(ep->hba, BNX2_MQ_CONFIG2);
+		first_l4l5 = config2 & BNX2_MQ_CONFIG2_FIRST_L4L5;
+		ctx_sz = (config2 & BNX2_MQ_CONFIG2_CONT_SZ) >> 3;
+		if (ctx_sz)
+			reg_off = CTX_OFFSET + MAX_CID_CNT * MB_KERNEL_CTX_SIZE
+				  + PAGE_SIZE *
+				  (((cid_num - first_l4l5) / ctx_sz) + 256);
+		else
+			reg_off = CTX_OFFSET + (MB_KERNEL_CTX_SIZE * cid_num);
+	} else
+		/* 5709 device in normal node and 5706/5708 devices */
+		reg_off = CTX_OFFSET + (MB_KERNEL_CTX_SIZE * cid_num);
+
+	ep->qp.ctx_base = ioremap_nocache(reg_base + reg_off,
+					  MB_KERNEL_CTX_SIZE);
+	if (!ep->qp.ctx_base)
+		return -ENOMEM;
+
+arm_cq:
+	bnx2i_arm_cq_event_coalescing(ep, CNIC_ARM_CQE);
+	return 0;
+}
diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
new file mode 100644
index 0000000..ae4b2d5
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -0,0 +1,438 @@
+/* bnx2i.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include "bnx2i.h"
+
+static struct list_head adapter_list = LIST_HEAD_INIT(adapter_list);
+static u32 adapter_count;
+static int bnx2i_reg_device;
+
+#define DRV_MODULE_NAME		"bnx2i"
+#define DRV_MODULE_VERSION	"2.0.1d"
+#define DRV_MODULE_RELDATE	"Mar 25, 2009"
+
+static char version[] __devinitdata =
+		"Broadcom NetXtreme II iSCSI Driver " DRV_MODULE_NAME \
+		" v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+
+MODULE_AUTHOR("Anil Veerabhadrappa <anilgv@broadcom.com>");
+MODULE_DESCRIPTION("Broadcom NetXtreme II BCM5706/5708/5709 iSCSI Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+static DEFINE_RWLOCK(bnx2i_dev_lock);
+
+unsigned int event_coal_div = 1;
+module_param(event_coal_div, int, 0664);
+MODULE_PARM_DESC(event_coal_div, "Event Coalescing Divide Factor");
+
+unsigned int en_tcp_dack = 1;
+module_param(en_tcp_dack, int, 0664);
+MODULE_PARM_DESC(en_tcp_dack, "Enable TCP Delayed ACK");
+
+unsigned int error_mask1 = 0x00;
+module_param(error_mask1, int, 0664);
+MODULE_PARM_DESC(error_mask1, "Config FW iSCSI Error Mask #1");
+
+unsigned int error_mask2 = 0x00;
+module_param(error_mask2, int, 0664);
+MODULE_PARM_DESC(error_mask2, "Config FW iSCSI Error Mask #2");
+
+unsigned int sq_size;
+module_param(sq_size, int, 0664);
+MODULE_PARM_DESC(sq_size, "Configure SQ size");
+
+unsigned int rq_size = BNX2I_RQ_WQES_DEFAULT;
+module_param(rq_size, int, 0664);
+MODULE_PARM_DESC(rq_size, "Configure RQ size");
+
+u64 iscsi_error_mask = 0x00;
+
+static void bnx2i_unreg_one_device(struct bnx2i_hba *hba) ;
+
+
+/**
+ * bnx2i_identify_device - identifies NetXtreme II device type
+ * @hba: 		Adapter structure pointer
+ *
+ * This function identifies the NX2 device type and sets appropriate
+ *	queue mailbox register access method, 5709 requires driver to
+ *	access MBOX regs using *bin* mode
+ */
+void bnx2i_identify_device(struct bnx2i_hba *hba)
+{
+	hba->cnic_dev_type = 0;
+	if ((hba->pci_did == PCI_DEVICE_ID_NX2_5706) ||
+	    (hba->pci_did == PCI_DEVICE_ID_NX2_5706S))
+		set_bit(BNX2I_NX2_DEV_5706, &hba->cnic_dev_type);
+	else if ((hba->pci_did == PCI_DEVICE_ID_NX2_5708) ||
+	    (hba->pci_did == PCI_DEVICE_ID_NX2_5708S))
+		set_bit(BNX2I_NX2_DEV_5708, &hba->cnic_dev_type);
+	else if ((hba->pci_did == PCI_DEVICE_ID_NX2_5709) ||
+	    (hba->pci_did == PCI_DEVICE_ID_NX2_5709S)) {
+		set_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type);
+		hba->mail_queue_access = BNX2I_MQ_BIN_MODE;
+	} else if (hba->pci_did == PCI_DEVICE_ID_NX2_57710 ||
+		   hba->pci_did == PCI_DEVICE_ID_NX2_57711)
+		set_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type);
+}
+
+
+/**
+ * get_adapter_list_head - returns head of adapter list
+ */
+struct bnx2i_hba *get_adapter_list_head(void)
+{
+	struct bnx2i_hba *hba = NULL;
+	struct bnx2i_hba *tmp_hba;
+
+	if (!adapter_count)
+		goto hba_not_found;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry(tmp_hba, &adapter_list, link) {
+		if (tmp_hba->cnic && tmp_hba->cnic->cm_select_dev) {
+			hba = tmp_hba;
+			break;
+		}
+	}
+	read_unlock(&bnx2i_dev_lock);
+hba_not_found:
+	return hba;
+}
+
+
+/**
+ * bnx2i_find_hba_for_cnic - maps cnic device instance to bnx2i adapter instance
+ * @cnic:	pointer to cnic device instance
+ *
+ */
+struct bnx2i_hba *bnx2i_find_hba_for_cnic(struct cnic_dev *cnic)
+{
+	struct bnx2i_hba *hba, *temp;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry_safe(hba, temp, &adapter_list, link) {
+		if (hba->cnic == cnic) {
+			read_unlock(&bnx2i_dev_lock);
+			return hba;
+		}
+	}
+	read_unlock(&bnx2i_dev_lock);
+	return NULL;
+}
+
+
+/**
+ * bnx2i_start - cnic callback to initialize & start adapter instance
+ * @handle:	transparent handle pointing to adapter structure
+ *
+ * This function maps adapter structure to pcidev structure and initiates
+ *	firmware handshake to enable/initialize on chip iscsi components
+ * 	This bnx2i - cnic interface api callback is issued after following
+ *	2 conditions are met -
+ *	  a) underlying network interface is up (marked by event 'NETDEV_UP'
+ *		from netdev
+ *	  b) bnx2i adapter instance is registered
+ */
+void bnx2i_start(void *handle)
+{
+#define BNX2I_INIT_POLL_TIME	(1000 / HZ)
+	struct bnx2i_hba *hba = handle;
+	int i = HZ;
+
+	bnx2i_send_fw_iscsi_init_msg(hba);
+	while (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state) && i--)
+		msleep(BNX2I_INIT_POLL_TIME);
+}
+
+
+/**
+ * bnx2i_stop - cnic callback to shutdown adapter instance
+ * @handle:	transparent handle pointing to adapter structure
+ *
+ * driver checks if adapter is already in shutdown mode, if not start
+ *	the shutdown process
+ */
+void bnx2i_stop(void *handle)
+{
+	struct bnx2i_hba *hba = handle;
+
+	/* check if cleanup happened in GOING_DOWN context */
+	clear_bit(ADAPTER_STATE_UP, &hba->adapter_state);
+	if (!test_and_clear_bit(ADAPTER_STATE_GOING_DOWN,
+				&hba->adapter_state))
+		iscsi_host_for_each_session(hba->shost,
+					    bnx2i_drop_session);
+}
+
+/**
+ * bnx2i_register_device - register bnx2i adapter instance with the cnic driver
+ * @hba:	Adapter instance to register
+ *
+ * registers bnx2i adapter instance with the cnic driver while holding the
+ *	adapter structure lock
+ */
+void bnx2i_register_device(struct bnx2i_hba *hba)
+{
+	if (test_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state) ||
+	    test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		return;
+	}
+
+	hba->cnic->register_device(hba->cnic, CNIC_ULP_ISCSI, hba);
+
+	spin_lock(&hba->lock);
+	bnx2i_reg_device++;
+	spin_unlock(&hba->lock);
+
+	set_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+}
+
+
+/**
+ * bnx2i_reg_dev_all - registers all adapter instances with the cnic driver
+ *
+ * registers all bnx2i adapter instances with the cnic driver while holding
+ *	the global resource lock
+ */
+void bnx2i_reg_dev_all(void)
+{
+	struct bnx2i_hba *hba, *temp;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry_safe(hba, temp, &adapter_list, link)
+		bnx2i_register_device(hba);
+	read_unlock(&bnx2i_dev_lock);
+}
+
+
+/**
+ * bnx2i_unreg_one_device - unregister adapter instance with the cnic driver
+ * @hba:	Adapter instance to unregister
+ *
+ * registers bnx2i adapter instance with the cnic driver while holding
+ *	the adapter structure lock
+ */
+static void bnx2i_unreg_one_device(struct bnx2i_hba *hba)
+{
+	if (hba->ofld_conns_active ||
+	    !test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic) ||
+	    test_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state))
+		return;
+
+	hba->cnic->unregister_device(hba->cnic, CNIC_ULP_ISCSI);
+
+	spin_lock(&hba->lock);
+	bnx2i_reg_device--;
+	spin_unlock(&hba->lock);
+
+	/* ep_disconnect could come before NETDEV_DOWN, driver won't
+	 * see NETDEV_DOWN as it already unregistered itself.
+	 */
+	hba->adapter_state = 0;
+	clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+}
+
+/**
+ * bnx2i_unreg_dev_all - unregisters all bnx2i instances with the cnic driver
+ *
+ * unregisters all bnx2i adapter instances with the cnic driver while holding
+ *	the global resource lock
+ */
+void bnx2i_unreg_dev_all(void)
+{
+	struct bnx2i_hba *hba, *temp;
+
+	read_lock(&bnx2i_dev_lock);
+	list_for_each_entry_safe(hba, temp, &adapter_list, link)
+		bnx2i_unreg_one_device(hba);
+	read_unlock(&bnx2i_dev_lock);
+}
+
+
+/**
+ * bnx2i_init_one - initialize an adapter instance and allocate memory resources
+ * @hba:	bnx2i adapter instance
+ * @cnic:	cnic device handle
+ *
+ * Global resource lock and host adapter lock is held during critical sections
+ *	below. This routine is called from cnic_register_driver() context and
+ *	work horse thread which does majority of device specific initialization
+ */
+static int bnx2i_init_one(struct bnx2i_hba *hba, struct cnic_dev *cnic)
+{
+	int rc;
+
+	read_lock(&bnx2i_dev_lock);
+	if (bnx2i_reg_device &&
+	    !test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		rc = cnic->register_device(cnic, CNIC_ULP_ISCSI, hba);
+		if (rc)		/* duplicate registration */
+			printk(KERN_ERR "bnx2i- dev reg failed\n");
+
+		spin_lock(&hba->lock);
+		bnx2i_reg_device++;
+		hba->age++;
+		spin_unlock(&hba->lock);
+
+		set_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+	}
+	read_unlock(&bnx2i_dev_lock);
+
+	write_lock(&bnx2i_dev_lock);
+	list_add_tail(&hba->link, &adapter_list);
+	adapter_count++;
+	write_unlock(&bnx2i_dev_lock);
+	return 0;
+}
+
+
+/**
+ * bnx2i_ulp_init - initialize an adapter instance
+ * @dev:	cnic device handle
+ *
+ * Called from cnic_register_driver() context to initialize all enumerated
+ *	cnic devices. This routine allocate adapter structure and other
+ *	device specific resources.
+ */
+void bnx2i_ulp_init(struct cnic_dev *dev)
+{
+	struct bnx2i_hba *hba;
+
+	/* Allocate a HBA structure for this device */
+	hba = bnx2i_alloc_hba(dev);
+	if (!hba) {
+		printk(KERN_ERR "bnx2i init: hba initialization failed\n");
+		return;
+	}
+
+	/* Get PCI related information and update hba struct members */
+	clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+	if (bnx2i_init_one(hba, dev)) {
+		printk(KERN_ERR "bnx2i - hba %p init failed\n", hba);
+		bnx2i_free_hba(hba);
+	} else
+		hba->cnic = dev;
+}
+
+
+/**
+ * bnx2i_ulp_exit - shuts down adapter instance and frees all resources
+ * @dev:	cnic device handle
+ *
+ */
+void bnx2i_ulp_exit(struct cnic_dev *dev)
+{
+	struct bnx2i_hba *hba;
+
+	hba = bnx2i_find_hba_for_cnic(dev);
+	if (!hba) {
+		printk(KERN_INFO "bnx2i_ulp_exit: hba not "
+				 "found, dev 0x%p\n", dev);
+		return;
+	}
+	write_lock(&bnx2i_dev_lock);
+	list_del_init(&hba->link);
+	adapter_count--;
+
+	if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		hba->cnic->unregister_device(hba->cnic, CNIC_ULP_ISCSI);
+		clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+
+		spin_lock(&hba->lock);
+		bnx2i_reg_device--;
+		spin_unlock(&hba->lock);
+	}
+	write_unlock(&bnx2i_dev_lock);
+
+	bnx2i_free_hba(hba);
+}
+
+
+/**
+ * bnx2i_mod_init - module init entry point
+ *
+ * initialize any driver wide global data structures such as endpoint pool,
+ *	tcp port manager/queue, sysfs. finally driver will register itself
+ *	with the cnic module
+ */
+static int __init bnx2i_mod_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "%s", version);
+
+	if (!is_power_of_2(sq_size))
+		sq_size = roundup_pow_of_two(sq_size);
+
+	bnx2i_scsi_xport_template =
+			iscsi_register_transport(&bnx2i_iscsi_transport);
+	if (!bnx2i_scsi_xport_template) {
+		printk(KERN_ERR "Could not register bnx2i transport.\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = cnic_register_driver(CNIC_ULP_ISCSI, &bnx2i_cnic_cb);
+	if (err) {
+		printk(KERN_ERR "Could not register bnx2i cnic driver.\n");
+		goto unreg_xport;
+	}
+
+	return 0;
+
+unreg_xport:
+	iscsi_unregister_transport(&bnx2i_iscsi_transport);
+out:
+	return err;
+}
+
+
+/**
+ * bnx2i_mod_exit - module cleanup/exit entry point
+ *
+ * Global resource lock and host adapter lock is held during critical sections
+ *	in this function. Driver will browse through the adapter list, cleans-up
+ *	each instance, unregisters iscsi transport name and finally driver will
+ *	unregister itself with the cnic module
+ */
+static void __exit bnx2i_mod_exit(void)
+{
+	struct bnx2i_hba *hba;
+
+	write_lock(&bnx2i_dev_lock);
+	while (!list_empty(&adapter_list)) {
+		hba = list_entry(adapter_list.next, struct bnx2i_hba, link);
+		list_del(&hba->link);
+		adapter_count--;
+
+		if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+			hba->cnic->unregister_device(hba->cnic, CNIC_ULP_ISCSI);
+			clear_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic);
+			bnx2i_reg_device--;
+		}
+
+		write_unlock(&bnx2i_dev_lock);
+		bnx2i_free_hba(hba);
+		write_lock(&bnx2i_dev_lock);
+	}
+	write_unlock(&bnx2i_dev_lock);
+
+	iscsi_unregister_transport(&bnx2i_iscsi_transport);
+	cnic_unregister_driver(CNIC_ULP_ISCSI);
+}
+
+module_init(bnx2i_mod_init);
+module_exit(bnx2i_mod_exit);
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
new file mode 100644
index 0000000..f741219
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -0,0 +1,2064 @@
+/*
+ * bnx2i_iscsi.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2006 - 2009 Broadcom Corporation
+ * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include <scsi/scsi_tcq.h>
+#include <scsi/libiscsi.h>
+#include "bnx2i.h"
+
+struct scsi_transport_template *bnx2i_scsi_xport_template;
+struct iscsi_transport bnx2i_iscsi_transport;
+static struct scsi_host_template bnx2i_host_template;
+
+/*
+ * Global endpoint resource info
+ */
+static DEFINE_SPINLOCK(bnx2i_resc_lock); /* protects global resources */
+
+
+static int bnx2i_adapter_ready(struct bnx2i_hba *hba)
+{
+	int retval = 0;
+
+	if (!hba || !test_bit(ADAPTER_STATE_UP, &hba->adapter_state) ||
+	    test_bit(ADAPTER_STATE_GOING_DOWN, &hba->adapter_state) ||
+	    test_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state))
+		retval = -EPERM;
+	return retval;
+}
+
+/**
+ * bnx2i_get_write_cmd_bd_idx - identifies various BD bookmarks
+ * @cmd:		iscsi cmd struct pointer
+ * @buf_off:		absolute buffer offset
+ * @start_bd_off:	u32 pointer to return the offset within the BD
+ *			indicated by 'start_bd_idx' on which 'buf_off' falls
+ * @start_bd_idx:	index of the BD on which 'buf_off' falls
+ *
+ * identifies & marks various bd info for scsi command's imm data,
+ * unsolicited data and the first solicited data seq.
+ */
+static void bnx2i_get_write_cmd_bd_idx(struct bnx2i_cmd *cmd, u32 buf_off,
+				       u32 *start_bd_off, u32 *start_bd_idx)
+{
+	struct iscsi_bd *bd_tbl = cmd->io_tbl.bd_tbl;
+	u32 cur_offset = 0;
+	u32 cur_bd_idx = 0;
+
+	if (buf_off) {
+		while (buf_off >= (cur_offset + bd_tbl->buffer_length)) {
+			cur_offset += bd_tbl->buffer_length;
+			cur_bd_idx++;
+			bd_tbl++;
+		}
+	}
+
+	*start_bd_off = buf_off - cur_offset;
+	*start_bd_idx = cur_bd_idx;
+}
+
+/**
+ * bnx2i_setup_write_cmd_bd_info - sets up BD various information
+ * @task:	transport layer's cmd struct pointer
+ *
+ * identifies & marks various bd info for scsi command's immediate data,
+ * unsolicited data and first solicited data seq which includes BD start
+ * index & BD buf off. his function takes into account iscsi parameter such
+ * as immediate data and unsolicited data is support on this connection.
+ */
+static void bnx2i_setup_write_cmd_bd_info(struct iscsi_task *task)
+{
+	struct bnx2i_cmd *cmd = task->dd_data;
+	u32 start_bd_offset;
+	u32 start_bd_idx;
+	u32 buffer_offset = 0;
+	u32 cmd_len = cmd->req.total_data_transfer_length;
+
+	/* if ImmediateData is turned off & IntialR2T is turned on,
+	 * there will be no immediate or unsolicited data, just return.
+	 */
+	if (!iscsi_task_has_unsol_data(task) && !task->imm_count)
+		return;
+
+	/* Immediate data */
+	buffer_offset += task->imm_count;
+	if (task->imm_count == cmd_len)
+		return;
+
+	if (iscsi_task_has_unsol_data(task)) {
+		bnx2i_get_write_cmd_bd_idx(cmd, buffer_offset,
+					   &start_bd_offset, &start_bd_idx);
+		cmd->req.ud_buffer_offset = start_bd_offset;
+		cmd->req.ud_start_bd_index = start_bd_idx;
+		buffer_offset += task->unsol_r2t.data_length;
+	}
+
+	if (buffer_offset != cmd_len) {
+		bnx2i_get_write_cmd_bd_idx(cmd, buffer_offset,
+					   &start_bd_offset, &start_bd_idx);
+		if ((start_bd_offset > task->conn->session->first_burst) ||
+		    (start_bd_idx > scsi_sg_count(cmd->scsi_cmd))) {
+			int i = 0;
+
+			iscsi_conn_printk(KERN_ALERT, task->conn,
+					  "bnx2i- error, buf offset 0x%x "
+					  "bd_valid %d use_sg %d\n",
+					  buffer_offset, cmd->io_tbl.bd_valid,
+					  scsi_sg_count(cmd->scsi_cmd));
+			for (i = 0; i < cmd->io_tbl.bd_valid; i++)
+				iscsi_conn_printk(KERN_ALERT, task->conn,
+						  "bnx2i err, bd[%d]: len %x\n",
+						  i, cmd->io_tbl.bd_tbl[i].\
+						  buffer_length);
+		}
+		cmd->req.sd_buffer_offset = start_bd_offset;
+		cmd->req.sd_start_bd_index = start_bd_idx;
+	}
+}
+
+
+
+/**
+ * bnx2i_map_scsi_sg - maps IO buffer and prepares the BD table
+ * @hba:	adapter instance
+ * @cmd:	iscsi cmd struct pointer
+ *
+ * map SG list
+ */
+static int bnx2i_map_scsi_sg(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd)
+{
+	struct scsi_cmnd *sc = cmd->scsi_cmd;
+	struct iscsi_bd *bd = cmd->io_tbl.bd_tbl;
+	struct scatterlist *sg;
+	int byte_count = 0;
+	int bd_count = 0;
+	int sg_count;
+	int sg_len;
+	u64 addr;
+	int i;
+
+	BUG_ON(scsi_sg_count(sc) > ISCSI_MAX_BDS_PER_CMD);
+
+	sg_count = scsi_dma_map(sc);
+
+	scsi_for_each_sg(sc, sg, sg_count, i) {
+		sg_len = sg_dma_len(sg);
+		addr = (u64) sg_dma_address(sg);
+		bd[bd_count].buffer_addr_lo = addr & 0xffffffff;
+		bd[bd_count].buffer_addr_hi = addr >> 32;
+		bd[bd_count].buffer_length = sg_len;
+		bd[bd_count].flags = 0;
+		if (bd_count == 0)
+			bd[bd_count].flags = ISCSI_BD_FIRST_IN_BD_CHAIN;
+
+		byte_count += sg_len;
+		bd_count++;
+	}
+
+	if (bd_count)
+		bd[bd_count - 1].flags |= ISCSI_BD_LAST_IN_BD_CHAIN;
+
+	BUG_ON(byte_count != scsi_bufflen(sc));
+	return bd_count;
+}
+
+/**
+ * bnx2i_iscsi_map_sg_list - maps SG list
+ * @cmd:	iscsi cmd struct pointer
+ *
+ * creates BD list table for the command
+ */
+static void bnx2i_iscsi_map_sg_list(struct bnx2i_cmd *cmd)
+{
+	int bd_count;
+
+	bd_count  = bnx2i_map_scsi_sg(cmd->conn->hba, cmd);
+	if (!bd_count) {
+		struct iscsi_bd *bd = cmd->io_tbl.bd_tbl;
+
+		bd[0].buffer_addr_lo = bd[0].buffer_addr_hi = 0;
+		bd[0].buffer_length = bd[0].flags = 0;
+	}
+	cmd->io_tbl.bd_valid = bd_count;
+}
+
+
+/**
+ * bnx2i_iscsi_unmap_sg_list - unmaps SG list
+ * @cmd:	iscsi cmd struct pointer
+ *
+ * unmap IO buffers and invalidate the BD table
+ */
+void bnx2i_iscsi_unmap_sg_list(struct bnx2i_cmd *cmd)
+{
+	struct scsi_cmnd *sc = cmd->scsi_cmd;
+
+	if (cmd->io_tbl.bd_valid && sc) {
+		scsi_dma_unmap(sc);
+		cmd->io_tbl.bd_valid = 0;
+	}
+}
+
+static void bnx2i_setup_cmd_wqe_template(struct bnx2i_cmd *cmd)
+{
+	memset(&cmd->req, 0x00, sizeof(cmd->req));
+	cmd->req.op_code = 0xFF;
+	cmd->req.bd_list_addr_lo = (u32) cmd->io_tbl.bd_tbl_dma;
+	cmd->req.bd_list_addr_hi =
+		(u32) ((u64) cmd->io_tbl.bd_tbl_dma >> 32);
+
+}
+
+
+/**
+ * bnx2i_bind_conn_to_iscsi_cid - bind conn structure to 'iscsi_cid'
+ * @hba:	pointer to adapter instance
+ * @conn:	pointer to iscsi connection
+ * @iscsi_cid:	iscsi context ID, range 0 - (MAX_CONN - 1)
+ *
+ * update iscsi cid table entry with connection pointer. This enables
+ *	driver to quickly get hold of connection structure pointer in
+ *	completion/interrupt thread using iscsi context ID
+ */
+static int bnx2i_bind_conn_to_iscsi_cid(struct bnx2i_hba *hba,
+					struct bnx2i_conn *bnx2i_conn,
+					u32 iscsi_cid)
+{
+	if (hba && hba->cid_que.conn_cid_tbl[iscsi_cid]) {
+		iscsi_conn_printk(KERN_ALERT, bnx2i_conn->cls_conn->dd_data,
+				 "conn bind - entry #%d not free\n", iscsi_cid);
+		return -EBUSY;
+	}
+
+	hba->cid_que.conn_cid_tbl[iscsi_cid] = bnx2i_conn;
+	return 0;
+}
+
+
+/**
+ * bnx2i_get_conn_from_id - maps an iscsi cid to corresponding conn ptr
+ * @hba:	pointer to adapter instance
+ * @iscsi_cid:	iscsi context ID, range 0 - (MAX_CONN - 1)
+ */
+struct bnx2i_conn *bnx2i_get_conn_from_id(struct bnx2i_hba *hba,
+					  u16 iscsi_cid)
+{
+	if (!hba->cid_que.conn_cid_tbl) {
+		printk(KERN_ERR "bnx2i: ERROR - missing conn<->cid table\n");
+		return NULL;
+
+	} else if (iscsi_cid >= hba->max_active_conns) {
+		printk(KERN_ERR "bnx2i: wrong cid #%d\n", iscsi_cid);
+		return NULL;
+	}
+	return hba->cid_que.conn_cid_tbl[iscsi_cid];
+}
+
+
+/**
+ * bnx2i_alloc_iscsi_cid - allocates a iscsi_cid from free pool
+ * @hba:	pointer to adapter instance
+ */
+static u32 bnx2i_alloc_iscsi_cid(struct bnx2i_hba *hba)
+{
+	int idx;
+
+	if (!hba->cid_que.cid_free_cnt)
+		return -1;
+
+	idx = hba->cid_que.cid_q_cons_idx;
+	hba->cid_que.cid_q_cons_idx++;
+	if (hba->cid_que.cid_q_cons_idx == hba->cid_que.cid_q_max_idx)
+		hba->cid_que.cid_q_cons_idx = 0;
+
+	hba->cid_que.cid_free_cnt--;
+	return hba->cid_que.cid_que[idx];
+}
+
+
+/**
+ * bnx2i_free_iscsi_cid - returns tcp port to free list
+ * @hba: 		pointer to adapter instance
+ * @iscsi_cid:		iscsi context ID to free
+ */
+static void bnx2i_free_iscsi_cid(struct bnx2i_hba *hba, u16 iscsi_cid)
+{
+	int idx;
+
+	if (iscsi_cid == (u16) -1)
+		return;
+
+	hba->cid_que.cid_free_cnt++;
+
+	idx = hba->cid_que.cid_q_prod_idx;
+	hba->cid_que.cid_que[idx] = iscsi_cid;
+	hba->cid_que.conn_cid_tbl[iscsi_cid] = NULL;
+	hba->cid_que.cid_q_prod_idx++;
+	if (hba->cid_que.cid_q_prod_idx == hba->cid_que.cid_q_max_idx)
+		hba->cid_que.cid_q_prod_idx = 0;
+}
+
+
+/**
+ * bnx2i_setup_free_cid_que - sets up free iscsi cid queue
+ * @hba:	pointer to adapter instance
+ *
+ * allocates memory for iscsi cid queue & 'cid - conn ptr' mapping table,
+ * 	and initialize table attributes
+ */
+static int bnx2i_setup_free_cid_que(struct bnx2i_hba *hba)
+{
+	int mem_size;
+	int i;
+
+	mem_size = hba->max_active_conns * sizeof(u32);
+	mem_size = (mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+
+	hba->cid_que.cid_que_base = kmalloc(mem_size, GFP_KERNEL);
+	if (!hba->cid_que.cid_que_base)
+		return -ENOMEM;
+
+	mem_size = hba->max_active_conns * sizeof(struct bnx2i_conn *);
+	mem_size = (mem_size + (PAGE_SIZE - 1)) & PAGE_MASK;
+	hba->cid_que.conn_cid_tbl = kmalloc(mem_size, GFP_KERNEL);
+	if (!hba->cid_que.conn_cid_tbl) {
+		kfree(hba->cid_que.cid_que_base);
+		hba->cid_que.cid_que_base = NULL;
+		return -ENOMEM;
+	}
+
+	hba->cid_que.cid_que = (u32 *)hba->cid_que.cid_que_base;
+	hba->cid_que.cid_q_prod_idx = 0;
+	hba->cid_que.cid_q_cons_idx = 0;
+	hba->cid_que.cid_q_max_idx = hba->max_active_conns;
+	hba->cid_que.cid_free_cnt = hba->max_active_conns;
+
+	for (i = 0; i < hba->max_active_conns; i++) {
+		hba->cid_que.cid_que[i] = i;
+		hba->cid_que.conn_cid_tbl[i] = NULL;
+	}
+	return 0;
+}
+
+
+/**
+ * bnx2i_release_free_cid_que - releases 'iscsi_cid' queue resources
+ * @hba:	pointer to adapter instance
+ */
+static void bnx2i_release_free_cid_que(struct bnx2i_hba *hba)
+{
+	kfree(hba->cid_que.cid_que_base);
+	hba->cid_que.cid_que_base = NULL;
+
+	kfree(hba->cid_que.conn_cid_tbl);
+	hba->cid_que.conn_cid_tbl = NULL;
+}
+
+
+/**
+ * bnx2i_alloc_ep - allocates ep structure from global pool
+ * @hba:	pointer to adapter instance
+ *
+ * routine allocates a free endpoint structure from global pool and
+ *	a tcp port to be used for this connection.  Global resource lock,
+ *	'bnx2i_resc_lock' is held while accessing shared global data structures
+ */
+static struct iscsi_endpoint *bnx2i_alloc_ep(struct bnx2i_hba *hba)
+{
+	struct iscsi_endpoint *ep;
+	struct bnx2i_endpoint *bnx2i_ep;
+
+	ep = iscsi_create_endpoint(sizeof(*bnx2i_ep));
+	if (!ep) {
+		printk(KERN_ERR "bnx2i: Could not allocate ep\n");
+		return NULL;
+	}
+
+	bnx2i_ep = ep->dd_data;
+	INIT_LIST_HEAD(&bnx2i_ep->link);
+	bnx2i_ep->state = EP_STATE_IDLE;
+	bnx2i_ep->hba = hba;
+	bnx2i_ep->hba_age = hba->age;
+	hba->ofld_conns_active++;
+	init_waitqueue_head(&bnx2i_ep->ofld_wait);
+	return ep;
+}
+
+
+/**
+ * bnx2i_free_ep - free endpoint
+ * @ep:		pointer to iscsi endpoint structure
+ */
+static void bnx2i_free_ep(struct iscsi_endpoint *ep)
+{
+	struct bnx2i_endpoint *bnx2i_ep = ep->dd_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnx2i_resc_lock, flags);
+	bnx2i_ep->state = EP_STATE_IDLE;
+	bnx2i_ep->hba->ofld_conns_active--;
+
+	bnx2i_free_iscsi_cid(bnx2i_ep->hba, bnx2i_ep->ep_iscsi_cid);
+	if (bnx2i_ep->conn) {
+		bnx2i_ep->conn->ep = NULL;
+		bnx2i_ep->conn = NULL;
+	}
+
+	bnx2i_ep->hba = NULL;
+	spin_unlock_irqrestore(&bnx2i_resc_lock, flags);
+	iscsi_destroy_endpoint(ep);
+}
+
+
+/**
+ * bnx2i_alloc_bdt - allocates buffer descriptor (BD) table for the command
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ * @cmd:	iscsi command structure
+ */
+static int bnx2i_alloc_bdt(struct bnx2i_hba *hba, struct iscsi_session *session,
+			   struct bnx2i_cmd *cmd)
+{
+	struct io_bdt *io = &cmd->io_tbl;
+	struct iscsi_bd *bd;
+
+	io->bd_tbl = dma_alloc_coherent(&hba->pcidev->dev,
+					ISCSI_MAX_BDS_PER_CMD * sizeof(*bd),
+					&io->bd_tbl_dma, GFP_KERNEL);
+	if (!io->bd_tbl) {
+		iscsi_session_printk(KERN_ERR, session, "Could not "
+				     "allocate bdt.\n");
+		return -ENOMEM;
+	}
+	io->bd_valid = 0;
+	return 0;
+}
+
+/**
+ * bnx2i_destroy_cmd_pool - destroys iscsi command pool and release BD table
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ * @cmd:	iscsi command structure
+ */
+static void bnx2i_destroy_cmd_pool(struct bnx2i_hba *hba,
+				   struct iscsi_session *session)
+{
+	int i;
+
+	for (i = 0; i < session->cmds_max; i++) {
+		struct iscsi_task *task = session->cmds[i];
+		struct bnx2i_cmd *cmd = task->dd_data;
+
+		if (cmd->io_tbl.bd_tbl)
+			dma_free_coherent(&hba->pcidev->dev,
+					  ISCSI_MAX_BDS_PER_CMD *
+					  sizeof(struct iscsi_bd),
+					  cmd->io_tbl.bd_tbl,
+					  cmd->io_tbl.bd_tbl_dma);
+	}
+
+}
+
+
+/**
+ * bnx2i_setup_cmd_pool - sets up iscsi command pool for the session
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ */
+static int bnx2i_setup_cmd_pool(struct bnx2i_hba *hba,
+				struct iscsi_session *session)
+{
+	int i;
+
+	for (i = 0; i < session->cmds_max; i++) {
+		struct iscsi_task *task = session->cmds[i];
+		struct bnx2i_cmd *cmd = task->dd_data;
+
+		/* Anil */
+		task->hdr = &cmd->hdr;
+		task->hdr_max = sizeof(struct iscsi_hdr);
+
+		if (bnx2i_alloc_bdt(hba, session, cmd))
+			goto free_bdts;
+	}
+
+	return 0;
+
+free_bdts:
+	bnx2i_destroy_cmd_pool(hba, session);
+	return -ENOMEM;
+}
+
+
+/**
+ * bnx2i_setup_mp_bdt - allocate BD table resources
+ * @hba:	pointer to adapter structure
+ *
+ * Allocate memory for dummy buffer and associated BD
+ * table to be used by middle path (MP) requests
+ */
+static int bnx2i_setup_mp_bdt(struct bnx2i_hba *hba)
+{
+	int rc = 0;
+	struct iscsi_bd *mp_bdt;
+	u64 addr;
+
+	hba->mp_bd_tbl = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+					    &hba->mp_bd_dma, GFP_KERNEL);
+	if (!hba->mp_bd_tbl) {
+		printk(KERN_ERR "unable to allocate Middle Path BDT\n");
+		rc = -1;
+		goto out;
+	}
+
+	hba->dummy_buffer = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+					       &hba->dummy_buf_dma, GFP_KERNEL);
+	if (!hba->dummy_buffer) {
+		printk(KERN_ERR "unable to alloc Middle Path Dummy Buffer\n");
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  hba->mp_bd_tbl, hba->mp_bd_dma);
+		hba->mp_bd_tbl = NULL;
+		rc = -1;
+		goto out;
+	}
+
+	mp_bdt = (struct iscsi_bd *) hba->mp_bd_tbl;
+	addr = (unsigned long) hba->dummy_buf_dma;
+	mp_bdt->buffer_addr_lo = addr & 0xffffffff;
+	mp_bdt->buffer_addr_hi = addr >> 32;
+	mp_bdt->buffer_length = PAGE_SIZE;
+	mp_bdt->flags = ISCSI_BD_LAST_IN_BD_CHAIN |
+			ISCSI_BD_FIRST_IN_BD_CHAIN;
+out:
+	return rc;
+}
+
+
+/**
+ * bnx2i_free_mp_bdt - releases ITT back to free pool
+ * @hba:	pointer to adapter instance
+ *
+ * free MP dummy buffer and associated BD table
+ */
+static void bnx2i_free_mp_bdt(struct bnx2i_hba *hba)
+{
+	if (hba->mp_bd_tbl) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  hba->mp_bd_tbl, hba->mp_bd_dma);
+		hba->mp_bd_tbl = NULL;
+	}
+	if (hba->dummy_buffer) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  hba->dummy_buffer, hba->dummy_buf_dma);
+		hba->dummy_buffer = NULL;
+	}
+		return;
+}
+
+/**
+ * bnx2i_drop_session - notifies iscsid of connection error.
+ * @hba:	adapter instance pointer
+ * @session:	iscsi session pointer
+ *
+ * This notifies iscsid that there is a error, so it can initiate
+ * recovery.
+ *
+ * This relies on caller using the iscsi class iterator so the object
+ * is refcounted and does not disapper from under us.
+ */
+void bnx2i_drop_session(struct iscsi_cls_session *cls_session)
+{
+	iscsi_session_failure(cls_session->dd_data, ISCSI_ERR_CONN_FAILED);
+}
+
+/**
+ * bnx2i_ep_destroy_list_add - add an entry to EP destroy list
+ * @hba:	pointer to adapter instance
+ * @ep:		pointer to endpoint (transport indentifier) structure
+ *
+ * EP destroy queue manager
+ */
+static int bnx2i_ep_destroy_list_add(struct bnx2i_hba *hba,
+				     struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_add_tail(&ep->link, &hba->ep_destroy_list);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+	return 0;
+}
+
+/**
+ * bnx2i_ep_destroy_list_del - add an entry to EP destroy list
+ *
+ * @hba: 		pointer to adapter instance
+ * @ep: 		pointer to endpoint (transport indentifier) structure
+ *
+ * EP destroy queue manager
+ */
+static int bnx2i_ep_destroy_list_del(struct bnx2i_hba *hba,
+				     struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_del_init(&ep->link);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+
+	return 0;
+}
+
+/**
+ * bnx2i_ep_ofld_list_add - add an entry to ep offload pending list
+ * @hba:	pointer to adapter instance
+ * @ep:		pointer to endpoint (transport indentifier) structure
+ *
+ * pending conn offload completion queue manager
+ */
+static int bnx2i_ep_ofld_list_add(struct bnx2i_hba *hba,
+				  struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_add_tail(&ep->link, &hba->ep_ofld_list);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+	return 0;
+}
+
+/**
+ * bnx2i_ep_ofld_list_del - add an entry to ep offload pending list
+ * @hba: 		pointer to adapter instance
+ * @ep: 		pointer to endpoint (transport indentifier) structure
+ *
+ * pending conn offload completion queue manager
+ */
+static int bnx2i_ep_ofld_list_del(struct bnx2i_hba *hba,
+				  struct bnx2i_endpoint *ep)
+{
+	write_lock_bh(&hba->ep_rdwr_lock);
+	list_del_init(&ep->link);
+	write_unlock_bh(&hba->ep_rdwr_lock);
+	return 0;
+}
+
+
+/**
+ * bnx2i_find_ep_in_ofld_list - find iscsi_cid in pending list of endpoints
+ *
+ * @hba: 		pointer to adapter instance
+ * @iscsi_cid:		iscsi context ID to find
+ *
+ */
+struct bnx2i_endpoint *
+bnx2i_find_ep_in_ofld_list(struct bnx2i_hba *hba, u32 iscsi_cid)
+{
+	struct list_head *list;
+	struct list_head *tmp;
+	struct bnx2i_endpoint *ep;
+
+	read_lock_bh(&hba->ep_rdwr_lock);
+	list_for_each_safe(list, tmp, &hba->ep_ofld_list) {
+		ep = (struct bnx2i_endpoint *)list;
+
+		if (ep->ep_iscsi_cid == iscsi_cid)
+			break;
+		ep = NULL;
+	}
+	read_unlock_bh(&hba->ep_rdwr_lock);
+
+	if (!ep)
+		printk(KERN_ERR "l5 cid %d not found\n", iscsi_cid);
+	return ep;
+}
+
+
+/**
+ * bnx2i_find_ep_in_destroy_list - find iscsi_cid in destroy list
+ * @hba: 		pointer to adapter instance
+ * @iscsi_cid:		iscsi context ID to find
+ *
+ */
+struct bnx2i_endpoint *
+bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba, u32 iscsi_cid)
+{
+	struct list_head *list;
+	struct list_head *tmp;
+	struct bnx2i_endpoint *ep;
+
+	read_lock_bh(&hba->ep_rdwr_lock);
+	list_for_each_safe(list, tmp, &hba->ep_destroy_list) {
+		ep = (struct bnx2i_endpoint *)list;
+
+		if (ep->ep_iscsi_cid == iscsi_cid)
+			break;
+		ep = NULL;
+	}
+	read_unlock_bh(&hba->ep_rdwr_lock);
+
+	if (!ep)
+		printk(KERN_ERR "l5 cid %d not found\n", iscsi_cid);
+
+	return ep;
+}
+
+/**
+ * bnx2i_setup_host_queue_size - assigns shost->can_queue param
+ * @hba:	pointer to adapter instance
+ * @shost:	scsi host pointer
+ *
+ * Initializes 'can_queue' parameter based on how many outstanding commands
+ * 	the device can handle. Each device 5708/5709/57710 has different
+ *	capabilities
+ */
+static void bnx2i_setup_host_queue_size(struct bnx2i_hba *hba,
+					struct Scsi_Host *shost)
+{
+	if (test_bit(BNX2I_NX2_DEV_5708, &hba->cnic_dev_type))
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_5708;
+	else if (test_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type))
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_5709;
+	else if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type))
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_57710;
+	else
+		shost->can_queue = ISCSI_MAX_CMDS_PER_HBA_5708;
+}
+
+
+/**
+ * bnx2i_alloc_hba - allocate and init adapter instance
+ * @cnic:	cnic device pointer
+ *
+ * allocate & initialize adapter structure and call other
+ *	support routines to do per adapter initialization
+ */
+struct bnx2i_hba *bnx2i_alloc_hba(struct cnic_dev *cnic)
+{
+	struct Scsi_Host *shost;
+	struct bnx2i_hba *hba;
+
+	shost = iscsi_host_alloc(&bnx2i_host_template, sizeof(*hba), 0);
+	if (!shost)
+		return NULL;
+	shost->dma_boundary = cnic->pcidev->dma_mask;
+	shost->transportt = bnx2i_scsi_xport_template;
+	shost->max_id = ISCSI_MAX_CONNS_PER_HBA;
+	shost->max_channel = 0;
+	shost->max_lun = 512;
+	shost->max_cmd_len = 16;
+
+	hba = iscsi_host_priv(shost);
+	hba->shost = shost;
+	hba->netdev = cnic->netdev;
+	/* Get PCI related information and update hba struct members */
+	hba->pcidev = cnic->pcidev;
+	pci_dev_get(hba->pcidev);
+	hba->pci_did = hba->pcidev->device;
+	hba->pci_vid = hba->pcidev->vendor;
+	hba->pci_sdid = hba->pcidev->subsystem_device;
+	hba->pci_svid = hba->pcidev->subsystem_vendor;
+	hba->pci_func = PCI_FUNC(hba->pcidev->devfn);
+	hba->pci_devno = PCI_SLOT(hba->pcidev->devfn);
+	bnx2i_identify_device(hba);
+
+	bnx2i_identify_device(hba);
+	bnx2i_setup_host_queue_size(hba, shost);
+
+	if (test_bit(BNX2I_NX2_DEV_5709, &hba->cnic_dev_type)) {
+		hba->regview = ioremap_nocache(hba->netdev->base_addr,
+					       BNX2_MQ_CONFIG2);
+		if (!hba->regview)
+			goto ioreg_map_err;
+	} else if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type)) {
+		hba->regview = ioremap_nocache(hba->netdev->base_addr, 4096);
+		if (!hba->regview)
+			goto ioreg_map_err;
+	}
+
+	if (bnx2i_setup_mp_bdt(hba))
+		goto mp_bdt_mem_err;
+
+	INIT_LIST_HEAD(&hba->ep_ofld_list);
+	INIT_LIST_HEAD(&hba->ep_destroy_list);
+	rwlock_init(&hba->ep_rdwr_lock);
+
+	hba->mtu_supported = BNX2I_MAX_MTU_SUPPORTED;
+
+	/* different values for 5708/5709/57710 */
+	hba->max_active_conns = ISCSI_MAX_CONNS_PER_HBA;
+
+	if (bnx2i_setup_free_cid_que(hba))
+		goto cid_que_err;
+
+	/* SQ/RQ/CQ size can be changed via sysfx interface */
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type)) {
+		if (sq_size && sq_size <= BNX2I_5770X_SQ_WQES_MAX)
+			hba->max_sqes = sq_size;
+		else
+			hba->max_sqes = BNX2I_5770X_SQ_WQES_DEFAULT;
+	} else {	/* 5706/5708/5709 */
+		if (sq_size && sq_size <= BNX2I_570X_SQ_WQES_MAX)
+			hba->max_sqes = sq_size;
+		else
+			hba->max_sqes = BNX2I_570X_SQ_WQES_DEFAULT;
+	}
+
+	hba->max_rqes = rq_size;
+	hba->max_cqes = hba->max_sqes + rq_size;
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type)) {
+		if (hba->max_cqes > BNX2I_5770X_CQ_WQES_MAX)
+			hba->max_cqes = BNX2I_5770X_CQ_WQES_MAX;
+	} else if (hba->max_cqes > BNX2I_570X_CQ_WQES_MAX)
+		hba->max_cqes = BNX2I_570X_CQ_WQES_MAX;
+
+	hba->num_ccell = hba->max_sqes / 2;
+
+	spin_lock_init(&hba->lock);
+	mutex_init(&hba->net_dev_lock);
+
+	if (iscsi_host_add(shost, &hba->pcidev->dev))
+		goto free_dump_mem;
+	return hba;
+
+free_dump_mem:
+	bnx2i_release_free_cid_que(hba);
+cid_que_err:
+	bnx2i_free_mp_bdt(hba);
+mp_bdt_mem_err:
+	if (hba->regview) {
+		iounmap(hba->regview);
+		hba->regview = NULL;
+	}
+ioreg_map_err:
+	pci_dev_put(hba->pcidev);
+	scsi_host_put(shost);
+	return NULL;
+}
+
+/**
+ * bnx2i_free_hba- releases hba structure and resources held by the adapter
+ * @hba:	pointer to adapter instance
+ *
+ * free adapter structure and call various cleanup routines.
+ */
+void bnx2i_free_hba(struct bnx2i_hba *hba)
+{
+	struct Scsi_Host *shost = hba->shost;
+
+	iscsi_host_remove(shost);
+	INIT_LIST_HEAD(&hba->ep_ofld_list);
+	INIT_LIST_HEAD(&hba->ep_destroy_list);
+	pci_dev_put(hba->pcidev);
+
+	if (hba->regview) {
+		iounmap(hba->regview);
+		hba->regview = NULL;
+	}
+	bnx2i_free_mp_bdt(hba);
+	bnx2i_release_free_cid_que(hba);
+	iscsi_host_free(shost);
+}
+
+/**
+ * bnx2i_conn_free_login_resources - free DMA resources used for login process
+ * @hba:		pointer to adapter instance
+ * @bnx2i_conn:		iscsi connection pointer
+ *
+ * Login related resources, mostly BDT & payload DMA memory is freed
+ */
+static void bnx2i_conn_free_login_resources(struct bnx2i_hba *hba,
+					    struct bnx2i_conn *bnx2i_conn)
+{
+	if (bnx2i_conn->gen_pdu.resp_bd_tbl) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  bnx2i_conn->gen_pdu.resp_bd_tbl,
+				  bnx2i_conn->gen_pdu.resp_bd_dma);
+		bnx2i_conn->gen_pdu.resp_bd_tbl = NULL;
+	}
+
+	if (bnx2i_conn->gen_pdu.req_bd_tbl) {
+		dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				  bnx2i_conn->gen_pdu.req_bd_tbl,
+				  bnx2i_conn->gen_pdu.req_bd_dma);
+		bnx2i_conn->gen_pdu.req_bd_tbl = NULL;
+	}
+
+	if (bnx2i_conn->gen_pdu.resp_buf) {
+		dma_free_coherent(&hba->pcidev->dev,
+				  ISCSI_DEF_MAX_RECV_SEG_LEN,
+				  bnx2i_conn->gen_pdu.resp_buf,
+				  bnx2i_conn->gen_pdu.resp_dma_addr);
+		bnx2i_conn->gen_pdu.resp_buf = NULL;
+	}
+
+	if (bnx2i_conn->gen_pdu.req_buf) {
+		dma_free_coherent(&hba->pcidev->dev,
+				  ISCSI_DEF_MAX_RECV_SEG_LEN,
+				  bnx2i_conn->gen_pdu.req_buf,
+				  bnx2i_conn->gen_pdu.req_dma_addr);
+		bnx2i_conn->gen_pdu.req_buf = NULL;
+	}
+}
+
+/**
+ * bnx2i_conn_alloc_login_resources - alloc DMA resources for login/nop.
+ * @hba:		pointer to adapter instance
+ * @bnx2i_conn:		iscsi connection pointer
+ *
+ * Mgmt task DNA resources are allocated in this routine.
+ */
+static int bnx2i_conn_alloc_login_resources(struct bnx2i_hba *hba,
+					    struct bnx2i_conn *bnx2i_conn)
+{
+	/* Allocate memory for login request/response buffers */
+	bnx2i_conn->gen_pdu.req_buf =
+		dma_alloc_coherent(&hba->pcidev->dev,
+				   ISCSI_DEF_MAX_RECV_SEG_LEN,
+				   &bnx2i_conn->gen_pdu.req_dma_addr,
+				   GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.req_buf == NULL)
+		goto login_req_buf_failure;
+
+	bnx2i_conn->gen_pdu.req_buf_size = 0;
+	bnx2i_conn->gen_pdu.req_wr_ptr = bnx2i_conn->gen_pdu.req_buf;
+
+	bnx2i_conn->gen_pdu.resp_buf =
+		dma_alloc_coherent(&hba->pcidev->dev,
+				   ISCSI_DEF_MAX_RECV_SEG_LEN,
+				   &bnx2i_conn->gen_pdu.resp_dma_addr,
+				   GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.resp_buf == NULL)
+		goto login_resp_buf_failure;
+
+	bnx2i_conn->gen_pdu.resp_buf_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
+	bnx2i_conn->gen_pdu.resp_wr_ptr = bnx2i_conn->gen_pdu.resp_buf;
+
+	bnx2i_conn->gen_pdu.req_bd_tbl =
+		dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				   &bnx2i_conn->gen_pdu.req_bd_dma, GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.req_bd_tbl == NULL)
+		goto login_req_bd_tbl_failure;
+
+	bnx2i_conn->gen_pdu.resp_bd_tbl =
+		dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE,
+				   &bnx2i_conn->gen_pdu.resp_bd_dma,
+				   GFP_KERNEL);
+	if (bnx2i_conn->gen_pdu.resp_bd_tbl == NULL)
+		goto login_resp_bd_tbl_failure;
+
+	return 0;
+
+login_resp_bd_tbl_failure:
+	dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE,
+			  bnx2i_conn->gen_pdu.req_bd_tbl,
+			  bnx2i_conn->gen_pdu.req_bd_dma);
+	bnx2i_conn->gen_pdu.req_bd_tbl = NULL;
+
+login_req_bd_tbl_failure:
+	dma_free_coherent(&hba->pcidev->dev, ISCSI_DEF_MAX_RECV_SEG_LEN,
+			  bnx2i_conn->gen_pdu.resp_buf,
+			  bnx2i_conn->gen_pdu.resp_dma_addr);
+	bnx2i_conn->gen_pdu.resp_buf = NULL;
+login_resp_buf_failure:
+	dma_free_coherent(&hba->pcidev->dev, ISCSI_DEF_MAX_RECV_SEG_LEN,
+			  bnx2i_conn->gen_pdu.req_buf,
+			  bnx2i_conn->gen_pdu.req_dma_addr);
+	bnx2i_conn->gen_pdu.req_buf = NULL;
+login_req_buf_failure:
+	iscsi_conn_printk(KERN_ERR, bnx2i_conn->cls_conn->dd_data,
+			  "login resource alloc failed!!\n");
+	return -ENOMEM;
+
+}
+
+
+/**
+ * bnx2i_iscsi_prep_generic_pdu_bd - prepares BD table.
+ * @bnx2i_conn:		iscsi connection pointer
+ *
+ * Allocates buffers and BD tables before shipping requests to cnic
+ *	for PDUs prepared by 'iscsid' daemon
+ */
+static void bnx2i_iscsi_prep_generic_pdu_bd(struct bnx2i_conn *bnx2i_conn)
+{
+	struct iscsi_bd *bd_tbl;
+
+	bd_tbl = (struct iscsi_bd *) bnx2i_conn->gen_pdu.req_bd_tbl;
+
+	bd_tbl->buffer_addr_hi =
+		(u32) ((u64) bnx2i_conn->gen_pdu.req_dma_addr >> 32);
+	bd_tbl->buffer_addr_lo = (u32) bnx2i_conn->gen_pdu.req_dma_addr;
+	bd_tbl->buffer_length = bnx2i_conn->gen_pdu.req_wr_ptr -
+				bnx2i_conn->gen_pdu.req_buf;
+	bd_tbl->reserved0 = 0;
+	bd_tbl->flags = ISCSI_BD_LAST_IN_BD_CHAIN |
+			ISCSI_BD_FIRST_IN_BD_CHAIN;
+
+	bd_tbl = (struct iscsi_bd  *) bnx2i_conn->gen_pdu.resp_bd_tbl;
+	bd_tbl->buffer_addr_hi = (u64) bnx2i_conn->gen_pdu.resp_dma_addr >> 32;
+	bd_tbl->buffer_addr_lo = (u32) bnx2i_conn->gen_pdu.resp_dma_addr;
+	bd_tbl->buffer_length = ISCSI_DEF_MAX_RECV_SEG_LEN;
+	bd_tbl->reserved0 = 0;
+	bd_tbl->flags = ISCSI_BD_LAST_IN_BD_CHAIN |
+			ISCSI_BD_FIRST_IN_BD_CHAIN;
+}
+
+
+/**
+ * bnx2i_iscsi_send_generic_request - called to send mgmt tasks.
+ * @task:	transport layer task pointer
+ *
+ * called to transmit PDUs prepared by the 'iscsid' daemon. iSCSI login,
+ *	Nop-out and Logout requests flow through this path.
+ */
+static int bnx2i_iscsi_send_generic_request(struct iscsi_task *task)
+{
+	struct bnx2i_cmd *cmd = task->dd_data;
+	struct bnx2i_conn *bnx2i_conn = cmd->conn;
+	int rc = 0;
+	char *buf;
+	int data_len;
+
+	bnx2i_iscsi_prep_generic_pdu_bd(bnx2i_conn);
+	switch (task->hdr->opcode & ISCSI_OPCODE_MASK) {
+	case ISCSI_OP_LOGIN:
+		bnx2i_send_iscsi_login(bnx2i_conn, task);
+		break;
+	case ISCSI_OP_NOOP_OUT:
+		data_len = bnx2i_conn->gen_pdu.req_buf_size;
+		buf = bnx2i_conn->gen_pdu.req_buf;
+		if (data_len)
+			rc = bnx2i_send_iscsi_nopout(bnx2i_conn, task,
+						     RESERVED_ITT,
+						     buf, data_len, 1);
+		else
+			rc = bnx2i_send_iscsi_nopout(bnx2i_conn, task,
+						     RESERVED_ITT,
+						     NULL, 0, 1);
+		break;
+	case ISCSI_OP_LOGOUT:
+		rc = bnx2i_send_iscsi_logout(bnx2i_conn, task);
+		break;
+	case ISCSI_OP_SCSI_TMFUNC:
+		rc = bnx2i_send_iscsi_tmf(bnx2i_conn, task);
+		break;
+	default:
+		iscsi_conn_printk(KERN_ALERT, bnx2i_conn->cls_conn->dd_data,
+				  "send_gen: unsupported op 0x%x\n",
+				  task->hdr->opcode);
+	}
+	return rc;
+}
+
+
+/**********************************************************************
+ *		SCSI-ML Interface
+ **********************************************************************/
+
+/**
+ * bnx2i_cpy_scsi_cdb - copies LUN & CDB fields in required format to sq wqe
+ * @sc:		SCSI-ML command pointer
+ * @cmd:	iscsi cmd pointer
+ */
+static void bnx2i_cpy_scsi_cdb(struct scsi_cmnd *sc, struct bnx2i_cmd *cmd)
+{
+	u32 dword;
+	int lpcnt;
+	u8 *srcp;
+	u32 *dstp;
+	u32 scsi_lun[2];
+
+	int_to_scsilun(sc->device->lun, (struct scsi_lun *) scsi_lun);
+	cmd->req.lun[0] = be32_to_cpu(scsi_lun[0]);
+	cmd->req.lun[1] = be32_to_cpu(scsi_lun[1]);
+
+	lpcnt = cmd->scsi_cmd->cmd_len / sizeof(dword);
+	srcp = (u8 *) sc->cmnd;
+	dstp = (u32 *) cmd->req.cdb;
+	while (lpcnt--) {
+		memcpy(&dword, (const void *) srcp, 4);
+		*dstp = cpu_to_be32(dword);
+		srcp += 4;
+		dstp++;
+	}
+	if (sc->cmd_len & 0x3) {
+		dword = (u32) srcp[0] | ((u32) srcp[1] << 8);
+		*dstp = cpu_to_be32(dword);
+	}
+}
+
+static void bnx2i_cleanup_task(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct bnx2i_hba *hba = bnx2i_conn->hba;
+
+	/*
+	 * mgmt task or cmd was never sent to us to transmit.
+	 */
+	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+		return;
+	/*
+	 * need to clean-up task context to claim dma buffers
+	 */
+	if (task->state == ISCSI_TASK_ABRT_TMF) {
+		bnx2i_send_cmd_cleanup_req(hba, task->dd_data);
+
+		spin_unlock_bh(&conn->session->lock);
+		wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl,
+				msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT));
+		spin_lock_bh(&conn->session->lock);
+	}
+	bnx2i_iscsi_unmap_sg_list(task->dd_data);
+}
+
+/**
+ * bnx2i_mtask_xmit - transmit mtask to chip for further processing
+ * @conn:	transport layer conn structure pointer
+ * @task:	transport layer command structure pointer
+ */
+static int
+bnx2i_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task)
+{
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct bnx2i_cmd *cmd = task->dd_data;
+
+	memset(bnx2i_conn->gen_pdu.req_buf, 0, ISCSI_DEF_MAX_RECV_SEG_LEN);
+
+	bnx2i_setup_cmd_wqe_template(cmd);
+	bnx2i_conn->gen_pdu.req_buf_size = task->data_count;
+	if (task->data_count) {
+		memcpy(bnx2i_conn->gen_pdu.req_buf, task->data,
+		       task->data_count);
+		bnx2i_conn->gen_pdu.req_wr_ptr =
+			bnx2i_conn->gen_pdu.req_buf + task->data_count;
+	}
+	cmd->conn = conn->dd_data;
+	cmd->scsi_cmd = NULL;
+	return bnx2i_iscsi_send_generic_request(task);
+}
+
+/**
+ * bnx2i_task_xmit - transmit iscsi command to chip for further processing
+ * @task:	transport layer command structure pointer
+ *
+ * maps SG buffers and send request to chip/firmware in the form of SQ WQE
+ */
+static int bnx2i_task_xmit(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+	struct iscsi_session *session = conn->session;
+	struct Scsi_Host *shost = iscsi_session_to_shost(session->cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct scsi_cmnd *sc = task->sc;
+	struct bnx2i_cmd *cmd = task->dd_data;
+	struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr;
+
+	if (test_bit(ADAPTER_STATE_LINK_DOWN, &hba->adapter_state))
+		return -ENOTCONN;
+
+	if (!bnx2i_conn->is_bound)
+		return -ENOTCONN;
+
+	/*
+	 * If there is no scsi_cmnd this must be a mgmt task
+	 */
+	if (!sc)
+		return bnx2i_mtask_xmit(conn, task);
+
+	bnx2i_setup_cmd_wqe_template(cmd);
+	cmd->req.op_code = ISCSI_OP_SCSI_CMD;
+	cmd->conn = bnx2i_conn;
+	cmd->scsi_cmd = sc;
+	cmd->req.total_data_transfer_length = scsi_bufflen(sc);
+	cmd->req.cmd_sn = be32_to_cpu(hdr->cmdsn);
+
+	bnx2i_iscsi_map_sg_list(cmd);
+	bnx2i_cpy_scsi_cdb(sc, cmd);
+
+	cmd->req.op_attr = ISCSI_ATTR_SIMPLE;
+	if (sc->sc_data_direction == DMA_TO_DEVICE) {
+		cmd->req.op_attr |= ISCSI_CMD_REQUEST_WRITE;
+		cmd->req.itt = task->itt |
+			(ISCSI_TASK_TYPE_WRITE << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+		bnx2i_setup_write_cmd_bd_info(task);
+	} else {
+		if (scsi_bufflen(sc))
+			cmd->req.op_attr |= ISCSI_CMD_REQUEST_READ;
+		cmd->req.itt = task->itt |
+			(ISCSI_TASK_TYPE_READ << ISCSI_CMD_REQUEST_TYPE_SHIFT);
+	}
+
+	cmd->req.num_bds = cmd->io_tbl.bd_valid;
+	if (!cmd->io_tbl.bd_valid) {
+		cmd->req.bd_list_addr_lo = (u32) hba->mp_bd_dma;
+		cmd->req.bd_list_addr_hi = (u32) ((u64) hba->mp_bd_dma >> 32);
+		cmd->req.num_bds = 1;
+	}
+
+	bnx2i_send_iscsi_scsicmd(bnx2i_conn, cmd);
+	return 0;
+}
+
+/**
+ * bnx2i_session_create - create a new iscsi session
+ * @cmds_max:		max commands supported
+ * @qdepth:		scsi queue depth to support
+ * @initial_cmdsn:	initial iscsi CMDSN to be used for this session
+ *
+ * Creates a new iSCSI session instance on given device.
+ */
+static struct iscsi_cls_session *
+bnx2i_session_create(struct iscsi_endpoint *ep,
+		     uint16_t cmds_max, uint16_t qdepth,
+		     uint32_t initial_cmdsn)
+{
+	struct Scsi_Host *shost;
+	struct iscsi_cls_session *cls_session;
+	struct bnx2i_hba *hba;
+	struct bnx2i_endpoint *bnx2i_ep;
+
+	if (!ep) {
+		printk(KERN_ERR "bnx2i: missing ep.\n");
+		return NULL;
+	}
+
+	bnx2i_ep = ep->dd_data;
+	shost = bnx2i_ep->hba->shost;
+	hba = iscsi_host_priv(shost);
+	if (bnx2i_adapter_ready(hba))
+		return NULL;
+
+	/*
+	 * user can override hw limit as long as it is within
+	 * the min/max.
+	 */
+	if (cmds_max > hba->max_sqes)
+		cmds_max = hba->max_sqes;
+	else if (cmds_max < BNX2I_SQ_WQES_MIN)
+		cmds_max = BNX2I_SQ_WQES_MIN;
+
+	cls_session = iscsi_session_setup(&bnx2i_iscsi_transport, shost,
+					  cmds_max, sizeof(struct bnx2i_cmd),
+					  initial_cmdsn, ISCSI_MAX_TARGET);
+	if (!cls_session)
+		return NULL;
+
+	if (bnx2i_setup_cmd_pool(hba, cls_session->dd_data))
+		goto session_teardown;
+	return cls_session;
+
+session_teardown:
+	iscsi_session_teardown(cls_session);
+	return NULL;
+}
+
+
+/**
+ * bnx2i_session_destroy - destroys iscsi session
+ * @cls_session:	pointer to iscsi cls session
+ *
+ * Destroys previously created iSCSI session instance and releases
+ *	all resources held by it
+ */
+static void bnx2i_session_destroy(struct iscsi_cls_session *cls_session)
+{
+	struct iscsi_session *session = cls_session->dd_data;
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+
+	bnx2i_destroy_cmd_pool(hba, session);
+	iscsi_session_teardown(cls_session);
+}
+
+
+/**
+ * bnx2i_conn_create - create iscsi connection instance
+ * @cls_session:	pointer to iscsi cls session
+ * @cid:		iscsi cid as per rfc (not NX2's CID terminology)
+ *
+ * Creates a new iSCSI connection instance for a given session
+ */
+static struct iscsi_cls_conn *
+bnx2i_conn_create(struct iscsi_cls_session *cls_session, uint32_t cid)
+{
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	struct bnx2i_conn *bnx2i_conn;
+	struct iscsi_cls_conn *cls_conn;
+	struct iscsi_conn *conn;
+
+	cls_conn = iscsi_conn_setup(cls_session, sizeof(*bnx2i_conn),
+				    cid);
+	if (!cls_conn)
+		return NULL;
+	conn = cls_conn->dd_data;
+
+	bnx2i_conn = conn->dd_data;
+	bnx2i_conn->cls_conn = cls_conn;
+	bnx2i_conn->hba = hba;
+	/* 'ep' ptr will be assigned in bind() call */
+	bnx2i_conn->ep = NULL;
+	init_completion(&bnx2i_conn->cmd_cleanup_cmpl);
+
+	if (bnx2i_conn_alloc_login_resources(hba, bnx2i_conn)) {
+		iscsi_conn_printk(KERN_ALERT, conn,
+				  "conn_new: login resc alloc failed!!\n");
+		goto free_conn;
+	}
+
+	return cls_conn;
+
+free_conn:
+	iscsi_conn_teardown(cls_conn);
+	return NULL;
+}
+
+/**
+ * bnx2i_conn_bind - binds iscsi sess, conn and ep objects together
+ * @cls_session:	pointer to iscsi cls session
+ * @cls_conn:		pointer to iscsi cls conn
+ * @transport_fd:	64-bit EP handle
+ * @is_leading:		leading connection on this session?
+ *
+ * Binds together iSCSI session instance, iSCSI connection instance
+ *	and the TCP connection. This routine returns error code if
+ *	TCP connection does not belong on the device iSCSI sess/conn
+ *	is bound
+ */
+static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session,
+			   struct iscsi_cls_conn *cls_conn,
+			   uint64_t transport_fd, int is_leading)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	struct bnx2i_endpoint *bnx2i_ep;
+	struct iscsi_endpoint *ep;
+	int ret_code;
+
+	ep = iscsi_lookup_endpoint(transport_fd);
+	if (!ep)
+		return -EINVAL;
+
+	bnx2i_ep = ep->dd_data;
+	if ((bnx2i_ep->state == EP_STATE_TCP_FIN_RCVD) ||
+	    (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD))
+		/* Peer disconnect via' FIN or RST */
+		return -EINVAL;
+
+	if (iscsi_conn_bind(cls_session, cls_conn, is_leading))
+		return -EINVAL;
+
+	if (bnx2i_ep->hba != hba) {
+		/* Error - TCP connection does not belong to this device
+		 */
+		iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data,
+				  "conn bind, ep=0x%p (%s) does not",
+				  bnx2i_ep, bnx2i_ep->hba->netdev->name);
+		iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data,
+				  "belong to hba (%s)\n",
+				  hba->netdev->name);
+		return -EEXIST;
+	}
+
+	bnx2i_ep->conn = bnx2i_conn;
+	bnx2i_conn->ep = bnx2i_ep;
+	bnx2i_conn->iscsi_conn_cid = bnx2i_ep->ep_iscsi_cid;
+	bnx2i_conn->fw_cid = bnx2i_ep->ep_cid;
+	bnx2i_conn->is_bound = 1;
+
+	ret_code = bnx2i_bind_conn_to_iscsi_cid(hba, bnx2i_conn,
+						bnx2i_ep->ep_iscsi_cid);
+
+	/* 5706/5708/5709 FW takes RQ as full when initiated, but for 57710
+	 * driver needs to explicitly replenish RQ index during setup.
+	 */
+	if (test_bit(BNX2I_NX2_DEV_57710, &bnx2i_ep->hba->cnic_dev_type))
+		bnx2i_put_rq_buf(bnx2i_conn, 0);
+
+	bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE);
+	return ret_code;
+}
+
+
+/**
+ * bnx2i_conn_destroy - destroy iscsi connection instance & release resources
+ * @cls_conn:	pointer to iscsi cls conn
+ *
+ * Destroy an iSCSI connection instance and release memory resources held by
+ *	this connection
+ */
+static void bnx2i_conn_destroy(struct iscsi_cls_conn *cls_conn)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	struct Scsi_Host *shost;
+	struct bnx2i_hba *hba;
+
+	shost = iscsi_session_to_shost(iscsi_conn_to_session(cls_conn));
+	hba = iscsi_host_priv(shost);
+
+	bnx2i_conn_free_login_resources(hba, bnx2i_conn);
+	iscsi_conn_teardown(cls_conn);
+}
+
+
+/**
+ * bnx2i_conn_get_param - return iscsi connection parameter to caller
+ * @cls_conn:	pointer to iscsi cls conn
+ * @param:	parameter type identifier
+ * @buf: 	buffer pointer
+ *
+ * returns iSCSI connection parameters
+ */
+static int bnx2i_conn_get_param(struct iscsi_cls_conn *cls_conn,
+				enum iscsi_param param, char *buf)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+	int len = 0;
+
+	switch (param) {
+	case ISCSI_PARAM_CONN_PORT:
+		if (bnx2i_conn->ep)
+			len = sprintf(buf, "%hu\n",
+				      bnx2i_conn->ep->cm_sk->dst_port);
+		break;
+	case ISCSI_PARAM_CONN_ADDRESS:
+		if (bnx2i_conn->ep)
+			len = sprintf(buf, NIPQUAD_FMT "\n",
+				      NIPQUAD(bnx2i_conn->ep->cm_sk->dst_ip));
+		break;
+	default:
+		return iscsi_conn_get_param(cls_conn, param, buf);
+	}
+
+	return len;
+}
+
+/**
+ * bnx2i_host_get_param - returns host (adapter) related parameters
+ * @shost:	scsi host pointer
+ * @param:	parameter type identifier
+ * @buf:	buffer pointer
+ */
+static int bnx2i_host_get_param(struct Scsi_Host *shost,
+				enum iscsi_host_param param, char *buf)
+{
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	int len = 0;
+
+	switch (param) {
+	case ISCSI_HOST_PARAM_HWADDRESS:
+		len = sysfs_format_mac(buf, hba->cnic->mac_addr, 6);
+		break;
+	case ISCSI_HOST_PARAM_NETDEV_NAME:
+		len = sprintf(buf, "%s\n", hba->netdev->name);
+		break;
+	default:
+		return iscsi_host_get_param(shost, param, buf);
+	}
+	return len;
+}
+
+/**
+ * bnx2i_conn_start - completes iscsi connection migration to FFP
+ * @cls_conn:	pointer to iscsi cls conn
+ *
+ * last call in FFP migration to handover iscsi conn to the driver
+ */
+static int bnx2i_conn_start(struct iscsi_cls_conn *cls_conn)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct bnx2i_conn *bnx2i_conn = conn->dd_data;
+
+	bnx2i_conn->ep->state = EP_STATE_ULP_UPDATE_START;
+	bnx2i_update_iscsi_conn(conn);
+
+	/*
+	 * this should normally not sleep for a long time so it should
+	 * not disrupt the caller.
+	 */
+	bnx2i_conn->ep->ofld_timer.expires = 1 * HZ + jiffies;
+	bnx2i_conn->ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	bnx2i_conn->ep->ofld_timer.data = (unsigned long) bnx2i_conn->ep;
+	add_timer(&bnx2i_conn->ep->ofld_timer);
+	/* update iSCSI context for this conn, wait for CNIC to complete */
+	wait_event_interruptible(bnx2i_conn->ep->ofld_wait,
+			bnx2i_conn->ep->state != EP_STATE_ULP_UPDATE_START);
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&bnx2i_conn->ep->ofld_timer);
+
+	iscsi_conn_start(cls_conn);
+	return 0;
+}
+
+
+/**
+ * bnx2i_conn_get_stats - returns iSCSI stats
+ * @cls_conn:	pointer to iscsi cls conn
+ * @stats:	pointer to iscsi statistic struct
+ */
+static void bnx2i_conn_get_stats(struct iscsi_cls_conn *cls_conn,
+				 struct iscsi_stats *stats)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+
+	stats->txdata_octets = conn->txdata_octets;
+	stats->rxdata_octets = conn->rxdata_octets;
+	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
+	stats->dataout_pdus = conn->dataout_pdus_cnt;
+	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
+	stats->datain_pdus = conn->datain_pdus_cnt;
+	stats->r2t_pdus = conn->r2t_pdus_cnt;
+	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
+	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
+	stats->custom_length = 3;
+	strcpy(stats->custom[2].desc, "eh_abort_cnt");
+	stats->custom[2].value = conn->eh_abort_cnt;
+	stats->digest_err = 0;
+	stats->timeout_err = 0;
+	stats->custom_length = 0;
+}
+
+
+/**
+ * bnx2i_check_route - checks if target IP route belongs to one of NX2 devices
+ * @dst_addr:	target IP address
+ *
+ * check if route resolves to BNX2 device
+ */
+static struct bnx2i_hba *bnx2i_check_route(struct sockaddr *dst_addr)
+{
+	struct sockaddr_in *desti = (struct sockaddr_in *) dst_addr;
+	struct bnx2i_hba *hba;
+	struct cnic_dev *cnic = NULL;
+
+	bnx2i_reg_dev_all();
+
+	hba = get_adapter_list_head();
+	if (hba && hba->cnic)
+		cnic = hba->cnic->cm_select_dev(desti, CNIC_ULP_ISCSI);
+	if (!cnic) {
+		printk(KERN_ALERT "bnx2i: no route,"
+		       "can't connect using cnic\n");
+		goto no_nx2_route;
+	}
+	hba = bnx2i_find_hba_for_cnic(cnic);
+	if (!hba)
+		goto no_nx2_route;
+
+	if (bnx2i_adapter_ready(hba)) {
+		printk(KERN_ALERT "bnx2i: check route, hba not found\n");
+		goto no_nx2_route;
+	}
+	if (hba->netdev->mtu > hba->mtu_supported) {
+		printk(KERN_ALERT "bnx2i: %s network i/f mtu is set to %d\n",
+				  hba->netdev->name, hba->netdev->mtu);
+		printk(KERN_ALERT "bnx2i: iSCSI HBA can support mtu of %d\n",
+				  hba->mtu_supported);
+		goto no_nx2_route;
+	}
+	return hba;
+no_nx2_route:
+	return NULL;
+}
+
+
+/**
+ * bnx2i_tear_down_conn - tear down iscsi/tcp connection and free resources
+ * @hba:	pointer to adapter instance
+ * @ep:		endpoint (transport indentifier) structure
+ *
+ * destroys cm_sock structure and on chip iscsi context
+ */
+static int bnx2i_tear_down_conn(struct bnx2i_hba *hba,
+				 struct bnx2i_endpoint *ep)
+{
+	if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic))
+		hba->cnic->cm_destroy(ep->cm_sk);
+
+	if (test_bit(ADAPTER_STATE_GOING_DOWN, &ep->hba->adapter_state))
+		ep->state = EP_STATE_DISCONN_COMPL;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type) &&
+	    ep->state == EP_STATE_DISCONN_TIMEDOUT) {
+		printk(KERN_ALERT "bnx2i - ERROR - please submit GRC Dump,"
+				  " NW/PCIe trace, driver msgs to developers"
+				  " for analysis\n");
+		return 1;
+	}
+
+	ep->state = EP_STATE_CLEANUP_START;
+	init_timer(&ep->ofld_timer);
+	ep->ofld_timer.expires = 10*HZ + jiffies;
+	ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	ep->ofld_timer.data = (unsigned long) ep;
+	add_timer(&ep->ofld_timer);
+
+	bnx2i_ep_destroy_list_add(hba, ep);
+
+	/* destroy iSCSI context, wait for it to complete */
+	bnx2i_send_conn_destroy(hba, ep);
+	wait_event_interruptible(ep->ofld_wait,
+				 (ep->state != EP_STATE_CLEANUP_START));
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&ep->ofld_timer);
+
+	bnx2i_ep_destroy_list_del(hba, ep);
+
+	if (ep->state != EP_STATE_CLEANUP_CMPL)
+		/* should never happen */
+		printk(KERN_ALERT "bnx2i - conn destroy failed\n");
+
+	return 0;
+}
+
+
+/**
+ * bnx2i_ep_connect - establish TCP connection to target portal
+ * @shost:		scsi host
+ * @dst_addr:		target IP address
+ * @non_blocking:	blocking or non-blocking call
+ *
+ * this routine initiates the TCP/IP connection by invoking Option-2 i/f
+ *	with l5_core and the CNIC. This is a multi-step process of resolving
+ *	route to target, create a iscsi connection context, handshaking with
+ *	CNIC module to create/initialize the socket struct and finally
+ *	sending down option-2 request to complete TCP 3-way handshake
+ */
+static struct iscsi_endpoint *bnx2i_ep_connect(struct Scsi_Host *shost,
+					       struct sockaddr *dst_addr,
+					       int non_blocking)
+{
+	u32 iscsi_cid = BNX2I_CID_RESERVED;
+	struct sockaddr_in *desti = (struct sockaddr_in *) dst_addr;
+	struct sockaddr_in6 *desti6;
+	struct bnx2i_endpoint *bnx2i_ep;
+	struct bnx2i_hba *hba;
+	struct cnic_dev *cnic;
+	struct cnic_sockaddr saddr;
+	struct iscsi_endpoint *ep;
+	int rc = 0;
+
+	if (shost)
+		/* driver is given scsi host to work with */
+		hba = iscsi_host_priv(shost);
+	else
+		/*
+		 * check if the given destination can be reached through
+		 * a iscsi capable NetXtreme2 device
+		 */
+		hba = bnx2i_check_route(dst_addr);
+	if (!hba) {
+		rc = -ENOMEM;
+		goto check_busy;
+	}
+
+	cnic = hba->cnic;
+	ep = bnx2i_alloc_ep(hba);
+	if (!ep) {
+		rc = -ENOMEM;
+		goto check_busy;
+	}
+	bnx2i_ep = ep->dd_data;
+
+	mutex_lock(&hba->net_dev_lock);
+	if (bnx2i_adapter_ready(hba)) {
+		rc = -EPERM;
+		goto net_if_down;
+	}
+
+	bnx2i_ep->state = EP_STATE_IDLE;
+	bnx2i_ep->ep_iscsi_cid = (u16) -1;
+	bnx2i_ep->num_active_cmds = 0;
+	iscsi_cid = bnx2i_alloc_iscsi_cid(hba);
+	if (iscsi_cid == -1) {
+		printk(KERN_ALERT "alloc_ep: unable to allocate iscsi cid\n");
+		rc = -ENOMEM;
+		goto iscsi_cid_err;
+	}
+	bnx2i_ep->hba_age = hba->age;
+
+	rc = bnx2i_alloc_qp_resc(hba, bnx2i_ep);
+	if (rc != 0) {
+		printk(KERN_ALERT "bnx2i: ep_conn, alloc QP resc error\n");
+		rc = -ENOMEM;
+		goto qp_resc_err;
+	}
+
+	bnx2i_ep->ep_iscsi_cid = (u16)iscsi_cid;
+	bnx2i_ep->state = EP_STATE_OFLD_START;
+	bnx2i_ep_ofld_list_add(hba, bnx2i_ep);
+
+	init_timer(&bnx2i_ep->ofld_timer);
+	bnx2i_ep->ofld_timer.expires = 2 * HZ + jiffies;
+	bnx2i_ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	bnx2i_ep->ofld_timer.data = (unsigned long) bnx2i_ep;
+	add_timer(&bnx2i_ep->ofld_timer);
+
+	bnx2i_send_conn_ofld_req(hba, bnx2i_ep);
+
+	/* Wait for CNIC hardware to setup conn context and return 'cid' */
+	wait_event_interruptible(bnx2i_ep->ofld_wait,
+				 bnx2i_ep->state != EP_STATE_OFLD_START);
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&bnx2i_ep->ofld_timer);
+
+	bnx2i_ep_ofld_list_del(hba, bnx2i_ep);
+
+	if (bnx2i_ep->state != EP_STATE_OFLD_COMPL) {
+		rc = -ENOSPC;
+		goto conn_failed;
+	}
+
+	rc = cnic->cm_create(cnic, CNIC_ULP_ISCSI, bnx2i_ep->ep_cid,
+			     iscsi_cid, &bnx2i_ep->cm_sk, bnx2i_ep);
+	if (rc) {
+		rc = -EINVAL;
+		goto conn_failed;
+	}
+
+	bnx2i_ep->cm_sk->rcv_buf = 256 * 1024;
+	bnx2i_ep->cm_sk->snd_buf = 256 * 1024;
+	clear_bit(SK_TCP_TIMESTAMP, &bnx2i_ep->cm_sk->tcp_flags);
+
+	memset(&saddr, 0, sizeof(saddr));
+	if (dst_addr->sa_family == AF_INET) {
+		desti = (struct sockaddr_in *) dst_addr;
+		saddr.remote.v4 = *desti;
+		saddr.local.v4.sin_family = desti->sin_family;
+	} else if (dst_addr->sa_family == AF_INET6) {
+		desti6 = (struct sockaddr_in6 *) dst_addr;
+		saddr.remote.v6 = *desti6;
+		saddr.local.v6.sin6_family = desti6->sin6_family;
+	}
+
+	bnx2i_ep->timestamp = jiffies;
+	bnx2i_ep->state = EP_STATE_CONNECT_START;
+	if (!test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		rc = -EINVAL;
+		goto conn_failed;
+	} else
+		rc = cnic->cm_connect(bnx2i_ep->cm_sk, &saddr);
+
+	if (rc)
+		goto release_ep;
+
+	if (bnx2i_map_ep_dbell_regs(bnx2i_ep))
+		goto release_ep;
+	mutex_unlock(&hba->net_dev_lock);
+	return ep;
+
+release_ep:
+	if (bnx2i_tear_down_conn(hba, bnx2i_ep)) {
+		mutex_unlock(&hba->net_dev_lock);
+		return ERR_PTR(rc);
+	}
+conn_failed:
+net_if_down:
+iscsi_cid_err:
+	bnx2i_free_qp_resc(hba, bnx2i_ep);
+qp_resc_err:
+	bnx2i_free_ep(ep);
+	mutex_unlock(&hba->net_dev_lock);
+check_busy:
+	bnx2i_unreg_dev_all();
+	return ERR_PTR(rc);
+}
+
+
+/**
+ * bnx2i_ep_poll - polls for TCP connection establishement
+ * @ep:			TCP connection (endpoint) handle
+ * @timeout_ms:		timeout value in milli secs
+ *
+ * polls for TCP connect request to complete
+ */
+static int bnx2i_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
+{
+	struct bnx2i_endpoint *bnx2i_ep;
+	int rc = 0;
+
+	bnx2i_ep = ep->dd_data;
+	if ((bnx2i_ep->state == EP_STATE_IDLE) ||
+	    (bnx2i_ep->state == EP_STATE_CONNECT_FAILED) ||
+	    (bnx2i_ep->state == EP_STATE_OFLD_FAILED))
+		return -1;
+	if (bnx2i_ep->state == EP_STATE_CONNECT_COMPL)
+		return 1;
+
+	rc = wait_event_interruptible_timeout(bnx2i_ep->ofld_wait,
+					      ((bnx2i_ep->state ==
+						EP_STATE_OFLD_FAILED) ||
+					       (bnx2i_ep->state ==
+						EP_STATE_CONNECT_FAILED) ||
+					       (bnx2i_ep->state ==
+						EP_STATE_CONNECT_COMPL)),
+					      msecs_to_jiffies(timeout_ms));
+	if (!rc || (bnx2i_ep->state == EP_STATE_OFLD_FAILED))
+		rc = -1;
+
+	if (rc > 0)
+		return 1;
+	else if (!rc)
+		return 0;	/* timeout */
+	else
+		return rc;
+}
+
+
+/**
+ * bnx2i_ep_tcp_conn_active - check EP state transition
+ * @ep:		endpoint pointer
+ *
+ * check if underlying TCP connection is active
+ */
+static int bnx2i_ep_tcp_conn_active(struct bnx2i_endpoint *bnx2i_ep)
+{
+	int ret;
+	int cnic_dev_10g = 0;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &bnx2i_ep->hba->cnic_dev_type))
+		cnic_dev_10g = 1;
+
+	switch (bnx2i_ep->state) {
+	case EP_STATE_CONNECT_START:
+	case EP_STATE_CLEANUP_FAILED:
+	case EP_STATE_OFLD_FAILED:
+	case EP_STATE_DISCONN_TIMEDOUT:
+		ret = 0;
+		break;
+	case EP_STATE_CONNECT_COMPL:
+	case EP_STATE_ULP_UPDATE_START:
+	case EP_STATE_ULP_UPDATE_COMPL:
+	case EP_STATE_TCP_FIN_RCVD:
+	case EP_STATE_ULP_UPDATE_FAILED:
+		ret = 1;
+		break;
+	case EP_STATE_TCP_RST_RCVD:
+		ret = 0;
+		break;
+	case EP_STATE_CONNECT_FAILED:
+		if (cnic_dev_10g)
+			ret = 1;
+		else
+			ret = 0;
+		break;
+	default:
+		ret = 0;
+	}
+
+	return ret;
+}
+
+
+/**
+ * bnx2i_ep_disconnect - executes TCP connection teardown process
+ * @ep:		TCP connection (endpoint) handle
+ *
+ * executes  TCP connection teardown process
+ */
+static void bnx2i_ep_disconnect(struct iscsi_endpoint *ep)
+{
+	struct bnx2i_endpoint *bnx2i_ep;
+	struct bnx2i_conn *bnx2i_conn = NULL;
+	struct iscsi_session *session = NULL;
+	struct iscsi_conn *conn;
+	struct cnic_dev *cnic;
+	struct bnx2i_hba *hba;
+
+	bnx2i_ep = ep->dd_data;
+
+	/* driver should not attempt connection cleanup untill TCP_CONNECT
+	 * completes either successfully or fails. Timeout is 9-secs, so
+	 * wait for it to complete
+	 */
+	while ((bnx2i_ep->state == EP_STATE_CONNECT_START) &&
+		!time_after(jiffies, bnx2i_ep->timestamp + (12 * HZ)))
+		msleep(250);
+
+	if (bnx2i_ep->conn) {
+		bnx2i_conn = bnx2i_ep->conn;
+		conn = bnx2i_conn->cls_conn->dd_data;
+		session = conn->session;
+
+		spin_lock_bh(&session->lock);
+		bnx2i_conn->is_bound = 0;
+		spin_unlock_bh(&session->lock);
+	}
+
+	hba = bnx2i_ep->hba;
+	if (bnx2i_ep->state == EP_STATE_IDLE)
+		goto return_bnx2i_ep;
+	cnic = hba->cnic;
+
+	mutex_lock(&hba->net_dev_lock);
+
+	if (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state))
+		goto free_resc;
+	if (bnx2i_ep->hba_age != hba->age)
+		goto free_resc;
+
+	if (!bnx2i_ep_tcp_conn_active(bnx2i_ep))
+		goto destory_conn;
+
+	bnx2i_ep->state = EP_STATE_DISCONN_START;
+
+	init_timer(&bnx2i_ep->ofld_timer);
+	bnx2i_ep->ofld_timer.expires = 10*HZ + jiffies;
+	bnx2i_ep->ofld_timer.function = bnx2i_ep_ofld_timer;
+	bnx2i_ep->ofld_timer.data = (unsigned long) bnx2i_ep;
+	add_timer(&bnx2i_ep->ofld_timer);
+
+	if (test_bit(BNX2I_CNIC_REGISTERED, &hba->reg_with_cnic)) {
+		int close = 0;
+
+		if (session) {
+			spin_lock_bh(&session->lock);
+			if (session->state == ISCSI_STATE_LOGGING_OUT)
+				close = 1;
+			spin_unlock_bh(&session->lock);
+		}
+		if (close)
+			cnic->cm_close(bnx2i_ep->cm_sk);
+		else
+			cnic->cm_abort(bnx2i_ep->cm_sk);
+	} else
+		goto free_resc;
+
+	/* wait for option-2 conn teardown */
+	wait_event_interruptible(bnx2i_ep->ofld_wait,
+				 bnx2i_ep->state != EP_STATE_DISCONN_START);
+
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&bnx2i_ep->ofld_timer);
+
+destory_conn:
+	if (bnx2i_tear_down_conn(hba, bnx2i_ep)) {
+		mutex_unlock(&hba->net_dev_lock);
+		return;
+	}
+free_resc:
+	mutex_unlock(&hba->net_dev_lock);
+	bnx2i_free_qp_resc(hba, bnx2i_ep);
+return_bnx2i_ep:
+	if (bnx2i_conn)
+		bnx2i_conn->ep = NULL;
+
+	bnx2i_free_ep(ep);
+
+	if (!hba->ofld_conns_active)
+		bnx2i_unreg_dev_all();
+}
+
+
+/**
+ * bnx2i_nl_set_path - ISCSI_UEVENT_PATH_UPDATE user message handler
+ * @buf:	pointer to buffer containing iscsi path message
+ *
+ */
+static int bnx2i_nl_set_path(struct Scsi_Host *shost, struct iscsi_path *params)
+{
+	struct bnx2i_hba *hba = iscsi_host_priv(shost);
+	char *buf = (char *) params;
+	u16 len = sizeof(*params);
+
+	/* handled by cnic driver */
+	hba->cnic->iscsi_nl_msg_recv(hba->cnic, ISCSI_UEVENT_PATH_UPDATE, buf,
+				     len);
+
+	return 0;
+}
+
+
+/*
+ * 'Scsi_Host_Template' structure and 'iscsi_tranport' structure template
+ * used while registering with the scsi host and iSCSI transport module.
+ */
+static struct scsi_host_template bnx2i_host_template = {
+	.module			= THIS_MODULE,
+	.name			= "Broadcom Offload iSCSI Initiator",
+	.proc_name		= "bnx2i",
+	.queuecommand		= iscsi_queuecommand,
+	.eh_abort_handler	= iscsi_eh_abort,
+	.eh_device_reset_handler = iscsi_eh_device_reset,
+	.eh_target_reset_handler = iscsi_eh_target_reset,
+	.can_queue		= 1024,
+	.max_sectors		= 127,
+	.cmd_per_lun		= 32,
+	.this_id		= -1,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.sg_tablesize		= ISCSI_MAX_BDS_PER_CMD,
+	.shost_attrs		= bnx2i_dev_attributes,
+};
+
+struct iscsi_transport bnx2i_iscsi_transport = {
+	.owner			= THIS_MODULE,
+	.name			= "bnx2i",
+	.caps			= CAP_RECOVERY_L0 | CAP_HDRDGST |
+				  CAP_MULTI_R2T | CAP_DATADGST |
+				  CAP_DATA_PATH_OFFLOAD,
+	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
+				  ISCSI_MAX_XMIT_DLENGTH |
+				  ISCSI_HDRDGST_EN |
+				  ISCSI_DATADGST_EN |
+				  ISCSI_INITIAL_R2T_EN |
+				  ISCSI_MAX_R2T |
+				  ISCSI_IMM_DATA_EN |
+				  ISCSI_FIRST_BURST |
+				  ISCSI_MAX_BURST |
+				  ISCSI_PDU_INORDER_EN |
+				  ISCSI_DATASEQ_INORDER_EN |
+				  ISCSI_ERL |
+				  ISCSI_CONN_PORT |
+				  ISCSI_CONN_ADDRESS |
+				  ISCSI_EXP_STATSN |
+				  ISCSI_PERSISTENT_PORT |
+				  ISCSI_PERSISTENT_ADDRESS |
+				  ISCSI_TARGET_NAME | ISCSI_TPGT |
+				  ISCSI_USERNAME | ISCSI_PASSWORD |
+				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
+				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
+				  ISCSI_LU_RESET_TMO |
+				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
+				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
+	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_NETDEV_NAME,
+	.create_session		= bnx2i_session_create,
+	.destroy_session	= bnx2i_session_destroy,
+	.create_conn		= bnx2i_conn_create,
+	.bind_conn		= bnx2i_conn_bind,
+	.destroy_conn		= bnx2i_conn_destroy,
+	.set_param		= iscsi_set_param,
+	.get_conn_param		= bnx2i_conn_get_param,
+	.get_session_param	= iscsi_session_get_param,
+	.get_host_param		= bnx2i_host_get_param,
+	.start_conn		= bnx2i_conn_start,
+	.stop_conn		= iscsi_conn_stop,
+	.send_pdu		= iscsi_conn_send_pdu,
+	.xmit_task		= bnx2i_task_xmit,
+	.get_stats		= bnx2i_conn_get_stats,
+	/* TCP connect - disconnect - option-2 interface calls */
+	.ep_connect		= bnx2i_ep_connect,
+	.ep_poll		= bnx2i_ep_poll,
+	.ep_disconnect		= bnx2i_ep_disconnect,
+	.set_path		= bnx2i_nl_set_path,
+	/* Error recovery timeout call */
+	.session_recovery_timedout = iscsi_session_recovery_timedout,
+	.cleanup_task		= bnx2i_cleanup_task,
+};
diff --git a/drivers/scsi/bnx2i/bnx2i_sysfs.c b/drivers/scsi/bnx2i/bnx2i_sysfs.c
new file mode 100644
index 0000000..96426b7
--- /dev/null
+++ b/drivers/scsi/bnx2i/bnx2i_sysfs.c
@@ -0,0 +1,142 @@
+/* bnx2i_sysfs.c: Broadcom NetXtreme II iSCSI driver.
+ *
+ * Copyright (c) 2004 - 2009 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Anil Veerabhadrappa (anilgv@broadcom.com)
+ */
+
+#include "bnx2i.h"
+
+/**
+ * bnx2i_dev_to_hba - maps dev pointer to adapter struct
+ * @dev:	device pointer
+ *
+ * Map device to hba structure
+ */
+static inline struct bnx2i_hba *bnx2i_dev_to_hba(struct device *dev)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	return iscsi_host_priv(shost);
+}
+
+
+/**
+ * bnx2i_show_sq_info - return(s currently configured send queue (SQ) size
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ *
+ * Returns current SQ size parameter, this paramater determines the number
+ * outstanding iSCSI commands supported on a connection
+ */
+static ssize_t bnx2i_show_sq_info(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+
+	return sprintf(buf, "0x%x\n", hba->max_sqes);
+}
+
+
+/**
+ * bnx2i_set_sq_info - update send queue (SQ) size parameter
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ * @count:	parameter buffer size
+ *
+ * Interface for user to change shared queue size allocated for each conn
+ * Must be within SQ limits and a power of 2. For the latter this is needed
+ * because of how libiscsi preallocates tasks.
+ */
+static ssize_t bnx2i_set_sq_info(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+	u32 val;
+	int max_sq_size;
+
+	if (hba->ofld_conns_active)
+		goto skip_config;
+
+	if (test_bit(BNX2I_NX2_DEV_57710, &hba->cnic_dev_type))
+		max_sq_size = BNX2I_5770X_SQ_WQES_MAX;
+	else
+		max_sq_size = BNX2I_570X_SQ_WQES_MAX;
+
+	if (sscanf(buf, " 0x%x ", &val) > 0) {
+		if ((val >= BNX2I_SQ_WQES_MIN) && (val <= max_sq_size) &&
+		    (is_power_of_2(val)))
+			hba->max_sqes = val;
+	}
+
+	return count;
+
+skip_config:
+	printk(KERN_ERR "bnx2i: device busy, cannot change SQ size\n");
+	return 0;
+}
+
+
+/**
+ * bnx2i_show_ccell_info - returns command cell (HQ) size
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ *
+ * returns per-connection TCP history queue size parameter
+ */
+static ssize_t bnx2i_show_ccell_info(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+
+	return sprintf(buf, "0x%x\n", hba->num_ccell);
+}
+
+
+/**
+ * bnx2i_get_link_state - set command cell (HQ) size
+ * @dev:	device pointer
+ * @buf:	buffer to return current SQ size parameter
+ * @count:	parameter buffer size
+ *
+ * updates per-connection TCP history queue size parameter
+ */
+static ssize_t bnx2i_set_ccell_info(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	u32 val;
+	struct bnx2i_hba *hba = bnx2i_dev_to_hba(dev);
+
+	if (hba->ofld_conns_active)
+		goto skip_config;
+
+	if (sscanf(buf, " 0x%x ", &val) > 0) {
+		if ((val >= BNX2I_CCELLS_MIN) &&
+		    (val <= BNX2I_CCELLS_MAX)) {
+			hba->num_ccell = val;
+		}
+	}
+
+	return count;
+
+skip_config:
+	printk(KERN_ERR "bnx2i: device busy, cannot change CCELL size\n");
+	return 0;
+}
+
+
+static DEVICE_ATTR(sq_size, S_IRUGO | S_IWUSR,
+		   bnx2i_show_sq_info, bnx2i_set_sq_info);
+static DEVICE_ATTR(num_ccell, S_IRUGO | S_IWUSR,
+		   bnx2i_show_ccell_info, bnx2i_set_ccell_info);
+
+struct device_attribute *bnx2i_dev_attributes[] = {
+	&dev_attr_sq_size,
+	&dev_attr_num_ccell,
+	NULL
+};
-- 
cgit v0.10.2


From 238ddbb98c327a7392ced5ae65216c55969749ea Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 9 Jun 2009 13:44:02 +0100
Subject: [SCSI] gdth: fix overlapping snprintf users

Closes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13438
Closes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13437
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c
index 59349a3..1258da3 100644
--- a/drivers/scsi/gdth_proc.c
+++ b/drivers/scsi/gdth_proc.c
@@ -152,6 +152,7 @@ static int gdth_get_info(char *buffer,char **start,off_t offset,int length,
                          struct Scsi_Host *host, gdth_ha_str *ha)
 {
     int size = 0,len = 0;
+    int hlen;
     off_t begin = 0,pos = 0;
     int id, i, j, k, sec, flag;
     int no_mdrv = 0, drv_no, is_mirr;
@@ -192,11 +193,11 @@ static int gdth_get_info(char *buffer,char **start,off_t offset,int length,
     if (reserve_list[0] == 0xff)
         strcpy(hrec, "--");
     else {
-        sprintf(hrec, "%d", reserve_list[0]);
+        hlen = sprintf(hrec, "%d", reserve_list[0]);
         for (i = 1;  i < MAX_RES_ARGS; i++) {
             if (reserve_list[i] == 0xff) 
                 break;
-            sprintf(hrec,"%s,%d", hrec, reserve_list[i]);
+            hlen += snprintf(hrec + hlen , 161 - hlen, ",%d", reserve_list[i]);
         }
     }
     size = sprintf(buffer+len,
-- 
cgit v0.10.2


From f57a8a1911342265e7acdc190333c4e9235a6632 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 5 Jun 2009 14:11:30 -0400
Subject: ring-buffer: fix ret in rb_add_time_stamp

The update of ret got mistakenly added to the if statement of
rb_try_to_discard. The variable ret should be 1 on commit and zero
otherwise.

[ Impact: fix compiler warning and real bug ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 22878b0..2e642b2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1433,8 +1433,8 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 			/* Darn, this is just wasted space */
 			event->time_delta = 0;
 			event->array[0] = 0;
-			ret = 0;
 		}
+		ret = 0;
 	}
 
 	*delta = 0;
-- 
cgit v0.10.2


From 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 9 Jun 2009 13:43:05 +0800
Subject: tracing/events: convert block trace points to TRACE_EVENT()

TRACE_EVENT is a more generic way to define tracepoints. Doing so adds
these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions
  ...

Cons:

  - no dev_t info for the output of plug, unplug_timer and unplug_io events.
    no dev_t info for getrq and sleeprq events if bio == NULL.
    no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL.

    This is mainly because we can't get the deivce from a request queue.
    But this may change in the future.

  - A packet command is converted to a string in TP_assign, not TP_print.
    While blktrace do the convertion just before output.

    Since pc requests should be rather rare, this is not a big issue.

  - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT
    has a unique format, which means we have some unused data in a trace entry.

    The overhead is minimized by using __dynamic_array() instead of __array().

I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing:

      dd                   dd + ioctl blktrace       dd + TRACE_EVENT (splice)
1     7.36s, 42.7 MB/s     7.50s, 42.0 MB/s          7.41s, 42.5 MB/s
2     7.43s, 42.3 MB/s     7.48s, 42.1 MB/s          7.43s, 42.4 MB/s
3     7.38s, 42.6 MB/s     7.45s, 42.2 MB/s          7.41s, 42.5 MB/s

So the overhead of tracing is very small, and no regression when using
those trace events vs blktrace.

And the binary output of TRACE_EVENT is much smaller than blktrace:

 # ls -l -h
 -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0
 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1
 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out

Following are some comparisons between TRACE_EVENT and blktrace:

plug:
  kjournald-480   [000]   303.084981: block_plug: [kjournald]
  kjournald-480   [000]   303.084981:   8,0    P   N [kjournald]

unplug_io:
  kblockd/0-118   [000]   300.052973: block_unplug_io: [kblockd/0] 1
  kblockd/0-118   [000]   300.052974:   8,0    U   N [kblockd/0] 1

remap:
  kjournald-480   [000]   303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384
  kjournald-480   [000]   303.085043:   8,0    A   W 102736992 + 8 <- (8,8) 33384

bio_backmerge:
  kjournald-480   [000]   303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald]
  kjournald-480   [000]   303.085086:   8,0    M   W 102737032 + 8 [kjournald]

getrq:
  kjournald-480   [000]   303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald]
  kjournald-480   [000]   303.084975:   8,0    G   W 102736984 + 8 [kjournald]

  bash-2066  [001]  1072.953770:   8,0    G   N [bash]
  bash-2066  [001]  1072.953773: block_getrq: 0,0 N 0 + 0 [bash]

rq_complete:
  konsole-2065  [001]   300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0]
  konsole-2065  [001]   300.053191:   8,0    C   W 103669040 + 16 [0]

  ksoftirqd/1-7   [001]  1072.953811:   8,0    C   N (5a 00 08 00 00 00 00 00 24 00) [0]
  ksoftirqd/1-7   [001]  1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]

rq_insert:
  kjournald-480   [000]   303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald]
  kjournald-480   [000]   303.084986:   8,0    I   W 102736984 + 8 [kjournald]

Changelog from v2 -> v3:

- use the newly introduced __dynamic_array().

Changelog from v1 -> v2:

- use __string() instead of __array() to minimize the memory required
  to store hex dump of rq->cmd().

- support large pc requests.

- add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT.

- some cleanups.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index 1306de9..9475bf9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,22 +28,14 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
 
 #include "blk.h"
 
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap);	/* Also used in drivers/md/dm.c */
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
 
diff --git a/block/elevator.c b/block/elevator.c
index 7073a90..e220f0c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,17 +33,16 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
-DEFINE_TRACE(block_rq_abort);
-
 /*
  * Merge hash stuff.
  */
@@ -55,9 +54,6 @@ static const int elv_hash_shift = 6;
 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
-DEFINE_TRACE(block_rq_insert);
-DEFINE_TRACE(block_rq_issue);
-
 /*
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e2ee4a7..3fd8b1e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,7 +20,8 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -53,8 +54,6 @@ struct dm_target_io {
 	union map_info info;
 };
 
-DEFINE_TRACE(block_bio_complete);
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
diff --git a/fs/bio.c b/fs/bio.c
index 9871164..740699c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,10 +26,9 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
-DEFINE_TRACE(block_split);
+#include <trace/events/block.h>
 
 /*
  * Test patch to inline a certain number of bi_io_vec's inside the bio
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 82b4636..c7ec31d 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -218,5 +218,18 @@ static inline int blk_trace_init_sysfs(struct device *dev)
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
+#ifdef CONFIG_EVENT_TRACING
+
+static inline int blk_cmd_buf_len(struct request *rq)
+{
+	return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+}
+
+extern void blk_dump_cmd(char *buf, struct request *rq);
+extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
+
+#endif /* CONFIG_EVENT_TRACING */
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/include/trace/block.h b/include/trace/block.h
deleted file mode 100644
index 5b12efa..0000000
--- a/include/trace/block.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _TRACE_BLOCK_H
-#define _TRACE_BLOCK_H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(block_rq_abort,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_insert,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_issue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_requeue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_complete,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_bio_bounce,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_complete,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_backmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_frontmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_queue,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_getrq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_sleeprq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_plug,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_timer,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_io,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_split,
-	TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
-	      TP_ARGS(q, bio, pdu));
-
-DECLARE_TRACE(block_remap,
-	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t from),
-	      TP_ARGS(q, bio, dev, from));
-
-#endif
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644
index 0000000..a99d1e5
--- /dev/null
+++ b/include/trace/events/block.h
@@ -0,0 +1,483 @@
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+TRACE_EVENT(block_rq_abort,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_insert,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,         comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_issue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,		comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_requeue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors	   = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_complete,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->errors)
+);
+TRACE_EVENT(block_bio_bounce,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_complete,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned,	nr_sector	)
+		__field( int,		error		)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->error)
+);
+
+TRACE_EVENT(block_bio_backmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_frontmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_queue,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_getrq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+        ),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			      bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+        ),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_sleeprq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			    bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_plug,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s]", __entry->comm)
+);
+
+TRACE_EVENT(block_unplug_timer,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_unplug_io,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_split,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio,
+		 unsigned int new_sector),
+
+	TP_ARGS(q, bio, new_sector),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev				)
+		__field( sector_t,	sector				)
+		__field( sector_t,	new_sector			)
+		__array( char,		rwbs,		6		)
+		__array( char,		comm,		TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->new_sector	= new_sector;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu / %llu [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->new_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_remap,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, bio, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  __entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 06b8585..844164d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+ifeq ($(CONFIG_BLOCK),y)
+obj-$(CONFIG_EVENT_TRACING) += blktrace.o
+endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e3abf55..7bd6a98 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,10 +23,14 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
-#include <trace/block.h>
 #include <linux/uaccess.h>
+
+#include <trace/events/block.h>
+
 #include "trace_output.h"
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+
 static unsigned int blktrace_seq __read_mostly = 1;
 
 static struct trace_array *blk_tr;
@@ -1658,3 +1662,75 @@ int blk_trace_init_sysfs(struct device *dev)
 	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
 }
 
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#ifdef CONFIG_EVENT_TRACING
+
+void blk_dump_cmd(char *buf, struct request *rq)
+{
+	int i, end;
+	int len = rq->cmd_len;
+	unsigned char *cmd = rq->cmd;
+
+	if (!blk_pc_request(rq)) {
+		buf[0] = '\0';
+		return;
+	}
+
+	for (end = len - 1; end >= 0; end--)
+		if (cmd[end])
+			break;
+	end++;
+
+	for (i = 0; i < len; i++) {
+		buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
+		if (i == end && end != len - 1) {
+			sprintf(buf, " ..");
+			break;
+		}
+	}
+}
+
+void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
+{
+	int i = 0;
+
+	if (rw & WRITE)
+		rwbs[i++] = 'W';
+	else if (rw & 1 << BIO_RW_DISCARD)
+		rwbs[i++] = 'D';
+	else if (bytes)
+		rwbs[i++] = 'R';
+	else
+		rwbs[i++] = 'N';
+
+	if (rw & 1 << BIO_RW_AHEAD)
+		rwbs[i++] = 'A';
+	if (rw & 1 << BIO_RW_BARRIER)
+		rwbs[i++] = 'B';
+	if (rw & 1 << BIO_RW_SYNCIO)
+		rwbs[i++] = 'S';
+	if (rw & 1 << BIO_RW_META)
+		rwbs[i++] = 'M';
+
+	rwbs[i] = '\0';
+}
+
+void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
+{
+	int rw = rq->cmd_flags & 0x03;
+	int bytes;
+
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
+	if (blk_pc_request(rq))
+		bytes = rq->data_len;
+	else
+		bytes = rq->hard_nr_sectors << 9;
+
+	blk_fill_rwbs(rwbs, rw, bytes);
+}
+
+#endif /* CONFIG_EVENT_TRACING */
+
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272..65f5e17 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,16 +14,15 @@
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/block.h>
+
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
 static mempool_t *page_pool, *isa_page_pool;
 
-DEFINE_TRACE(block_bio_bounce);
-
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
-- 
cgit v0.10.2


From d765898970f35acef960581f678b9da9d5c779fa Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes at virtuousgeek.org>
Date: Fri, 5 Jun 2009 14:41:29 +0000
Subject: drm/i915: enable MCHBAR if needed

Using the new PNP resource checking code, this patch allows the i915
driver to allocate MCHBAR space if needed and use the BAR to determine
current memory settings.

[apw@canonical.com: moved to the new generic PNP resource interface]
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

failure to update-index after git-am --reject to hand-apply

Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index db81f55..6a47145 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -150,6 +150,8 @@ typedef struct drm_i915_private {
 	drm_local_map_t hws_map;
 	struct drm_gem_object *hws_obj;
 
+	struct resource mch_res;
+
 	unsigned int cpp;
 	int back_offset;
 	int front_offset;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 07d976b..9a05cad 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -25,6 +25,8 @@
  *
  */
 
+#include <linux/acpi.h>
+#include <linux/pnp.h>
 #include "linux/string.h"
 #include "linux/bitops.h"
 #include "drmP.h"
@@ -81,6 +83,143 @@
  * to match what the GPU expects.
  */
 
+#define MCHBAR_I915 0x44
+#define MCHBAR_I965 0x48
+#define MCHBAR_SIZE (4*4096)
+
+#define DEVEN_REG 0x54
+#define   DEVEN_MCHBAR_EN (1 << 28)
+
+/* Allocate space for the MCH regs if needed, return nonzero on error */
+static int
+intel_alloc_mchbar_resource(struct drm_device *dev)
+{
+	struct pci_dev *bridge_dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
+	u32 temp_lo, temp_hi = 0;
+	u64 mchbar_addr;
+	int ret = 0;
+
+	bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+	if (!bridge_dev) {
+		DRM_DEBUG("no bridge dev?!\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (IS_I965G(dev))
+		pci_read_config_dword(bridge_dev, reg + 4, &temp_hi);
+	pci_read_config_dword(bridge_dev, reg, &temp_lo);
+	mchbar_addr = ((u64)temp_hi << 32) | temp_lo;
+
+	/* If ACPI doesn't have it, assume we need to allocate it ourselves */
+	if (mchbar_addr &&
+	    pnp_range_reserved(mchbar_addr, mchbar_addr + MCHBAR_SIZE)) {
+		ret = 0;
+		goto out_put;
+	}
+
+	/* Get some space for it */
+	ret = pci_bus_alloc_resource(bridge_dev->bus, &dev_priv->mch_res,
+				     MCHBAR_SIZE, MCHBAR_SIZE,
+				     PCIBIOS_MIN_MEM,
+				     0,   pcibios_align_resource,
+				     bridge_dev);
+	if (ret) {
+		DRM_DEBUG("failed bus alloc: %d\n", ret);
+		dev_priv->mch_res.start = 0;
+		goto out_put;
+	}
+
+	if (IS_I965G(dev))
+		pci_write_config_dword(bridge_dev, reg + 4,
+				       upper_32_bits(dev_priv->mch_res.start));
+
+	pci_write_config_dword(bridge_dev, reg,
+			       lower_32_bits(dev_priv->mch_res.start));
+out_put:
+	pci_dev_put(bridge_dev);
+out:
+	return ret;
+}
+
+/* Setup MCHBAR if possible, return true if we should disable it again */
+static bool
+intel_setup_mchbar(struct drm_device *dev)
+{
+	struct pci_dev *bridge_dev;
+	int mchbar_reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
+	u32 temp;
+	bool need_disable = false, enabled;
+
+	bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+	if (!bridge_dev) {
+		DRM_DEBUG("no bridge dev?!\n");
+		goto out;
+	}
+
+	if (IS_I915G(dev) || IS_I915GM(dev)) {
+		pci_read_config_dword(bridge_dev, DEVEN_REG, &temp);
+		enabled = !!(temp & DEVEN_MCHBAR_EN);
+	} else {
+		pci_read_config_dword(bridge_dev, mchbar_reg, &temp);
+		enabled = temp & 1;
+	}
+
+	/* If it's already enabled, don't have to do anything */
+	if (enabled)
+		goto out_put;
+
+	if (intel_alloc_mchbar_resource(dev))
+		goto out_put;
+
+	need_disable = true;
+
+	/* Space is allocated or reserved, so enable it. */
+	if (IS_I915G(dev) || IS_I915GM(dev)) {
+		pci_write_config_dword(bridge_dev, DEVEN_REG,
+				       temp | DEVEN_MCHBAR_EN);
+	} else {
+		pci_read_config_dword(bridge_dev, mchbar_reg, &temp);
+		pci_write_config_dword(bridge_dev, mchbar_reg, temp | 1);
+	}
+out_put:
+	pci_dev_put(bridge_dev);
+out:
+	return need_disable;
+}
+
+static void
+intel_teardown_mchbar(struct drm_device *dev, bool disable)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct pci_dev *bridge_dev;
+	int mchbar_reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
+	u32 temp;
+
+	bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+	if (!bridge_dev) {
+		DRM_DEBUG("no bridge dev?!\n");
+		return;
+	}
+
+	if (disable) {
+		if (IS_I915G(dev) || IS_I915GM(dev)) {
+			pci_read_config_dword(bridge_dev, DEVEN_REG, &temp);
+			temp &= ~DEVEN_MCHBAR_EN;
+			pci_write_config_dword(bridge_dev, DEVEN_REG, temp);
+		} else {
+			pci_read_config_dword(bridge_dev, mchbar_reg, &temp);
+			temp &= ~1;
+			pci_write_config_dword(bridge_dev, mchbar_reg, temp);
+		}
+	}
+
+	if (dev_priv->mch_res.start)
+		release_resource(&dev_priv->mch_res);
+}
+
 /**
  * Detects bit 6 swizzling of address lookup between IGD access and CPU
  * access through main memory.
@@ -91,6 +230,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+	bool need_disable;
 
 	if (!IS_I9XX(dev)) {
 		/* As far as we know, the 865 doesn't have these bit 6
@@ -101,6 +241,9 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	} else if (IS_MOBILE(dev)) {
 		uint32_t dcc;
 
+		/* Try to make sure MCHBAR is enabled before poking at it */
+		need_disable = intel_setup_mchbar(dev);
+
 		/* On mobile 9xx chipsets, channel interleave by the CPU is
 		 * determined by DCC.  For single-channel, neither the CPU
 		 * nor the GPU do swizzling.  For dual channel interleaved,
@@ -140,6 +283,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 		}
+
+		intel_teardown_mchbar(dev, need_disable);
 	} else {
 		/* The 965, G33, and newer, have a very flexible memory
 		 * configuration.  It will enable dual-channel mode
-- 
cgit v0.10.2


From 036a4a7d9272582fc7370359515d807393e2f728 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Mon, 8 Jun 2009 14:40:19 +0800
Subject: drm/i915: handle interrupt on new chipset

Update interrupt handling methods for IGDNG with new registers
for display and graphics interrupt functions. As we won't use
irq-based vblank sync in dri2, so display interrupt on new chip
will be used for hotplug only in future.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 68e882c..6bc716d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1162,7 +1162,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-	if (IS_G4X(dev)) {
+	if (IS_G4X(dev) || IS_IGDNG(dev)) {
 		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
 		dev->driver->get_vblank_counter = gm45_get_vblank_counter;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6a47145..8ef6bce 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -167,6 +167,11 @@ typedef struct drm_i915_private {
 	/** Cached value of IMR to avoid reads in updating the bitfield */
 	u32 irq_mask_reg;
 	u32 pipestat[2];
+	/** splitted irq regs for graphics and display engine on IGDNG,
+	    irq_mask_reg is still used for display irq. */
+	u32 gt_irq_mask_reg;
+	u32 gt_irq_enable_reg;
+	u32 de_irq_enable_reg;
 
 	u32 hotplug_supported_mask;
 	struct work_struct hotplug_work;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3fbd8a0..38e0f83 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1714,7 +1714,10 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 	BUG_ON(seqno == 0);
 
 	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
-		ier = I915_READ(IER);
+		if (IS_IGDNG(dev))
+			ier = I915_READ(DEIER) | I915_READ(GTIER);
+		else
+			ier = I915_READ(IER);
 		if (!ier) {
 			DRM_ERROR("something (likely vbetool) disabled "
 				  "interrupts, re-enabling\n");
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 701d680..b86b7b7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -58,6 +58,47 @@
 					 DRM_I915_VBLANK_PIPE_B)
 
 void
+igdng_enable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask)
+{
+	if ((dev_priv->gt_irq_mask_reg & mask) != 0) {
+		dev_priv->gt_irq_mask_reg &= ~mask;
+		I915_WRITE(GTIMR, dev_priv->gt_irq_mask_reg);
+		(void) I915_READ(GTIMR);
+	}
+}
+
+static inline void
+igdng_disable_graphics_irq(drm_i915_private_t *dev_priv, u32 mask)
+{
+	if ((dev_priv->gt_irq_mask_reg & mask) != mask) {
+		dev_priv->gt_irq_mask_reg |= mask;
+		I915_WRITE(GTIMR, dev_priv->gt_irq_mask_reg);
+		(void) I915_READ(GTIMR);
+	}
+}
+
+/* For display hotplug interrupt */
+void
+igdng_enable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
+{
+	if ((dev_priv->irq_mask_reg & mask) != 0) {
+		dev_priv->irq_mask_reg &= ~mask;
+		I915_WRITE(DEIMR, dev_priv->irq_mask_reg);
+		(void) I915_READ(DEIMR);
+	}
+}
+
+static inline void
+igdng_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask)
+{
+	if ((dev_priv->irq_mask_reg & mask) != mask) {
+		dev_priv->irq_mask_reg |= mask;
+		I915_WRITE(DEIMR, dev_priv->irq_mask_reg);
+		(void) I915_READ(DEIMR);
+	}
+}
+
+void
 i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
 {
 	if ((dev_priv->irq_mask_reg & mask) != 0) {
@@ -196,6 +237,47 @@ static void i915_hotplug_work_func(struct work_struct *work)
 	drm_sysfs_hotplug_event(dev);
 }
 
+irqreturn_t igdng_irq_handler(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	int ret = IRQ_NONE;
+	u32 de_iir, gt_iir;
+	u32 new_de_iir, new_gt_iir;
+	struct drm_i915_master_private *master_priv;
+
+	de_iir = I915_READ(DEIIR);
+	gt_iir = I915_READ(GTIIR);
+
+	for (;;) {
+		if (de_iir == 0 && gt_iir == 0)
+			break;
+
+		ret = IRQ_HANDLED;
+
+		I915_WRITE(DEIIR, de_iir);
+		new_de_iir = I915_READ(DEIIR);
+		I915_WRITE(GTIIR, gt_iir);
+		new_gt_iir = I915_READ(GTIIR);
+
+		if (dev->primary->master) {
+			master_priv = dev->primary->master->driver_priv;
+			if (master_priv->sarea_priv)
+				master_priv->sarea_priv->last_dispatch =
+					READ_BREADCRUMB(dev_priv);
+		}
+
+		if (gt_iir & GT_USER_INTERRUPT) {
+			dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+			DRM_WAKEUP(&dev_priv->irq_queue);
+		}
+
+		de_iir = new_de_iir;
+		gt_iir = new_gt_iir;
+	}
+
+	return ret;
+}
+
 irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -212,6 +294,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 
 	atomic_inc(&dev_priv->irq_received);
 
+	if (IS_IGDNG(dev))
+		return igdng_irq_handler(dev);
+
 	iir = I915_READ(IIR);
 
 	if (IS_I965G(dev)) {
@@ -349,8 +434,12 @@ void i915_user_irq_get(struct drm_device *dev)
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1))
-		i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
+	if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) {
+		if (IS_IGDNG(dev))
+			igdng_enable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
+		else
+			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
+	}
 	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
 }
 
@@ -361,8 +450,12 @@ void i915_user_irq_put(struct drm_device *dev)
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0);
-	if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0))
-		i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
+	if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) {
+		if (IS_IGDNG(dev))
+			igdng_disable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
+		else
+			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
+	}
 	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
 }
 
@@ -455,6 +548,9 @@ int i915_enable_vblank(struct drm_device *dev, int pipe)
 	if (!(pipeconf & PIPEACONF_ENABLE))
 		return -EINVAL;
 
+	if (IS_IGDNG(dev))
+		return 0;
+
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	if (IS_I965G(dev))
 		i915_enable_pipestat(dev_priv, pipe,
@@ -474,6 +570,9 @@ void i915_disable_vblank(struct drm_device *dev, int pipe)
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
 
+	if (IS_IGDNG(dev))
+		return;
+
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	i915_disable_pipestat(dev_priv, pipe,
 			      PIPE_VBLANK_INTERRUPT_ENABLE |
@@ -547,12 +646,65 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
 
 /* drm_dma.h hooks
 */
+static void igdng_irq_preinstall(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+
+	I915_WRITE(HWSTAM, 0xeffe);
+
+	/* XXX hotplug from PCH */
+
+	I915_WRITE(DEIMR, 0xffffffff);
+	I915_WRITE(DEIER, 0x0);
+	(void) I915_READ(DEIER);
+
+	/* and GT */
+	I915_WRITE(GTIMR, 0xffffffff);
+	I915_WRITE(GTIER, 0x0);
+	(void) I915_READ(GTIER);
+}
+
+static int igdng_irq_postinstall(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	/* enable kind of interrupts always enabled */
+	u32 display_mask = DE_MASTER_IRQ_CONTROL /*| DE_PCH_EVENT */;
+	u32 render_mask = GT_USER_INTERRUPT;
+
+	dev_priv->irq_mask_reg = ~display_mask;
+	dev_priv->de_irq_enable_reg = display_mask;
+
+	/* should always can generate irq */
+	I915_WRITE(DEIIR, I915_READ(DEIIR));
+	I915_WRITE(DEIMR, dev_priv->irq_mask_reg);
+	I915_WRITE(DEIER, dev_priv->de_irq_enable_reg);
+	(void) I915_READ(DEIER);
+
+	/* user interrupt should be enabled, but masked initial */
+	dev_priv->gt_irq_mask_reg = 0xffffffff;
+	dev_priv->gt_irq_enable_reg = render_mask;
+
+	I915_WRITE(GTIIR, I915_READ(GTIIR));
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask_reg);
+	I915_WRITE(GTIER, dev_priv->gt_irq_enable_reg);
+	(void) I915_READ(GTIER);
+
+	return 0;
+}
+
 void i915_driver_irq_preinstall(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
 	atomic_set(&dev_priv->irq_received, 0);
 
+	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
+
+	if (IS_IGDNG(dev)) {
+		igdng_irq_preinstall(dev);
+		return;
+	}
+
 	if (I915_HAS_HOTPLUG(dev)) {
 		I915_WRITE(PORT_HOTPLUG_EN, 0);
 		I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
@@ -564,7 +716,6 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
 	I915_WRITE(IMR, 0xffffffff);
 	I915_WRITE(IER, 0x0);
 	(void) I915_READ(IER);
-	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 }
 
 int i915_driver_irq_postinstall(struct drm_device *dev)
@@ -572,8 +723,13 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	u32 enable_mask = I915_INTERRUPT_ENABLE_FIX | I915_INTERRUPT_ENABLE_VAR;
 
+	DRM_INIT_WAITQUEUE(&dev_priv->irq_queue);
+
 	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 
+	if (IS_IGDNG(dev))
+		return igdng_irq_postinstall(dev);
+
 	/* Unmask the interrupts that we always want on. */
 	dev_priv->irq_mask_reg = ~I915_INTERRUPT_ENABLE_FIX;
 
@@ -613,11 +769,24 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 	(void) I915_READ(IER);
 
 	opregion_enable_asle(dev);
-	DRM_INIT_WAITQUEUE(&dev_priv->irq_queue);
 
 	return 0;
 }
 
+static void igdng_irq_uninstall(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	I915_WRITE(HWSTAM, 0xffffffff);
+
+	I915_WRITE(DEIMR, 0xffffffff);
+	I915_WRITE(DEIER, 0x0);
+	I915_WRITE(DEIIR, I915_READ(DEIIR));
+
+	I915_WRITE(GTIMR, 0xffffffff);
+	I915_WRITE(GTIER, 0x0);
+	I915_WRITE(GTIIR, I915_READ(GTIIR));
+}
+
 void i915_driver_irq_uninstall(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -627,6 +796,11 @@ void i915_driver_irq_uninstall(struct drm_device * dev)
 
 	dev_priv->vblank_pipe = 0;
 
+	if (IS_IGDNG(dev)) {
+		igdng_irq_uninstall(dev);
+		return;
+	}
+
 	if (I915_HAS_HOTPLUG(dev)) {
 		I915_WRITE(PORT_HOTPLUG_EN, 0);
 		I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
-- 
cgit v0.10.2


From 80a538e49daddbf3bf783f3464e91bd3181957b2 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Mon, 8 Jun 2009 14:40:20 +0800
Subject: drm/i915: Enable probe on new chipset

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index fc55db7..8b4c80c 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -536,4 +536,6 @@
 	{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x0042, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x0046, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0, 0, 0}
-- 
cgit v0.10.2


From 6556d1df88fe68f9836beeb43342a336691cb67c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 9 Jun 2009 14:04:26 -0400
Subject: tracing: fix the block trace points print size

The sector field is either u64 or unsigned long depending on
the arch. This patch casts the sector to unsigned long long to
prevent the printf warnings.

[ Impact: remove compile warnings ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index a99d1e5..53effd4 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -37,7 +37,8 @@ TRACE_EVENT(block_rq_abort,
 	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->errors)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 
 TRACE_EVENT(block_rq_insert,
@@ -71,7 +72,8 @@ TRACE_EVENT(block_rq_insert,
 	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_rq_issue,
@@ -105,7 +107,8 @@ TRACE_EVENT(block_rq_issue,
 	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_rq_requeue,
@@ -137,7 +140,8 @@ TRACE_EVENT(block_rq_requeue,
 	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->errors)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 
 TRACE_EVENT(block_rq_complete,
@@ -169,7 +173,8 @@ TRACE_EVENT(block_rq_complete,
 	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->errors)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 TRACE_EVENT(block_bio_bounce,
 
@@ -195,7 +200,8 @@ TRACE_EVENT(block_bio_bounce,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_bio_complete,
@@ -221,7 +227,8 @@ TRACE_EVENT(block_bio_complete,
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->error)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->error)
 );
 
 TRACE_EVENT(block_bio_backmerge,
@@ -248,7 +255,8 @@ TRACE_EVENT(block_bio_backmerge,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_bio_frontmerge,
@@ -275,7 +283,8 @@ TRACE_EVENT(block_bio_frontmerge,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_bio_queue,
@@ -302,7 +311,8 @@ TRACE_EVENT(block_bio_queue,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_getrq,
@@ -330,7 +340,8 @@ TRACE_EVENT(block_getrq,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_sleeprq,
@@ -358,7 +369,8 @@ TRACE_EVENT(block_sleeprq,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_plug,
@@ -441,7 +453,9 @@ TRACE_EVENT(block_split,
 
 	TP_printk("%d,%d %s %llu / %llu [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->new_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  (unsigned long long)__entry->new_sector,
+		  __entry->comm)
 );
 
 TRACE_EVENT(block_remap,
@@ -471,9 +485,10 @@ TRACE_EVENT(block_remap,
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
 		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
-		  __entry->old_sector)
+		  (unsigned long long)__entry->old_sector)
 );
 
 #endif /* _TRACE_BLOCK_H */
-- 
cgit v0.10.2


From fa0864b26b4bfa1dd4bb78eeffbc1f398cb56425 Mon Sep 17 00:00:00 2001
From: Michael Cousin <mika.cousin@gmail.com>
Date: Fri, 5 Jun 2009 21:16:22 +0200
Subject: drm/i915: Skip lvds with Aopen i945GTt-VFA

Signed-off-by: Michael Cousin <mika.cousin@gmail.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index e4ca6a3..a7ae9f4 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -493,6 +493,13 @@ static const struct dmi_system_id __initdata intel_no_lvds[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "i965GMx-IF"),
 		},
 	},
+	{
+		.callback = intel_no_lvds_dmi_callback,
+		.ident = "Aopen i945GTt-VFA",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_VERSION, "AO00001JW"),
+		},
+	},
 
 	{ }	/* terminating entry */
 };
-- 
cgit v0.10.2


From 725c624a58a10ef90a2ff889e122158fabf36147 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 8 Jun 2009 19:09:45 -0400
Subject: tracing: add trace_seq_vprint interface

The code to update the print formats for events requires a vprintf
format in the trace_seq. This patch adds that interface.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index ba9627f..c68bccb 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -27,6 +27,8 @@ trace_seq_init(struct trace_seq *s)
 #ifdef CONFIG_TRACING
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
 extern int
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 425725c..c05aff4 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -100,6 +100,38 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (!len)
+		return 0;
+
+	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len)
+		return 0;
+
+	s->len += ret;
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
+
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
 	int len = (PAGE_SIZE - 1) - s->len;
-- 
cgit v0.10.2


From af6c159885537eb6582a61156ccfb73e83c0478d Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <jacmet@sunsite.dk>
Date: Sun, 15 Feb 2009 22:15:16 +0100
Subject: kconfig: handle comment entries within choice/endchoice

Implement support for comment entries within choice groups. Comment entries
are displayed visually distinct from normal configs, and selecting them is
a no-op.

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c
index b2a878c..bcc6f19 100644
--- a/scripts/kconfig/lxdialog/checklist.c
+++ b/scripts/kconfig/lxdialog/checklist.c
@@ -41,7 +41,8 @@ static void print_item(WINDOW * win, int choice, int selected)
 	wmove(win, choice, check_x);
 	wattrset(win, selected ? dlg.check_selected.atr
 		 : dlg.check.atr);
-	wprintw(win, "(%c)", item_is_tag('X') ? 'X' : ' ');
+	if (!item_is_tag(':'))
+		wprintw(win, "(%c)", item_is_tag('X') ? 'X' : ' ');
 
 	wattrset(win, selected ? dlg.tag_selected.atr : dlg.tag.atr);
 	mvwaddch(win, choice, item_x, item_str()[0]);
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 6841e95..3bcacb4 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -732,7 +732,12 @@ static void conf_choice(struct menu *menu)
 		for (child = menu->list; child; child = child->next) {
 			if (!menu_is_visible(child))
 				continue;
-			item_make("%s", _(menu_get_prompt(child)));
+			if (child->sym)
+				item_make("%s", _(menu_get_prompt(child)));
+			else {
+				item_make("*** %s ***", _(menu_get_prompt(child)));
+				item_set_tag(':');
+			}
 			item_set_data(child);
 			if (child->sym == active)
 				item_set_selected(1);
@@ -748,6 +753,9 @@ static void conf_choice(struct menu *menu)
 		case 0:
 			if (selected) {
 				child = item_data();
+				if (!child->sym)
+					break;
+
 				sym_set_tristate_value(child->sym, yes);
 			}
 			return;
-- 
cgit v0.10.2


From 4f66199b4b372a530333d0f25aeb5d80362241ed Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:08:31 +0200
Subject: kbuild, deb-pkg: minor general improvements in builddeb script

Minor coding style improvements and typo fix in leading comment.

Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Andres Salomon <dilinger@debian.org>
Acked-by: maximilian attems <max@stro.at>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 1264b8e..eff7f9d 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -4,7 +4,7 @@
 # Copyright 2003 Wichert Akkerman <wichert@wiggy.net>
 #
 # Simple script to generate a deb package for a Linux kernel. All the
-# complexity of what to do with a kernel after it is installer or removed
+# complexity of what to do with a kernel after it is installed or removed
 # is left to other scripts and packages: they can install scripts in the
 # /etc/kernel/{pre,post}{inst,rm}.d/ directories that will be called on
 # package install and removal.
@@ -13,13 +13,13 @@ set -e
 
 # Some variables and settings used throughout the script
 version=$KERNELRELEASE
-revision=`cat .version`
+revision=$(cat .version)
 tmpdir="$objtree/debian/tmp"
 fwdir="$objtree/debian/fwtmp"
 packagename=linux-$version
 fwpackagename=linux-firmware-image
 
-if [ "$ARCH" == "um" ] ; then
+if [ "$ARCH" = "um" ] ; then
 	packagename=user-mode-linux-$version
 fi
 
@@ -27,12 +27,12 @@ fi
 rm -rf "$tmpdir" "$fwdir"
 mkdir -p "$tmpdir/DEBIAN" "$tmpdir/lib" "$tmpdir/boot"
 mkdir -p "$fwdir/DEBIAN" "$fwdir/lib"
-if [ "$ARCH" == "um" ] ; then
+if [ "$ARCH" = "um" ] ; then
 	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/share/doc/$packagename" "$tmpdir/usr/bin"
 fi
 
 # Build and install the kernel
-if [ "$ARCH" == "um" ] ; then
+if [ "$ARCH" = "um" ] ; then
 	$MAKE linux
 	cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map"
 	cp .config "$tmpdir/usr/share/doc/$packagename/config"
@@ -46,7 +46,7 @@ fi
 
 if grep -q '^CONFIG_MODULES=y' .config ; then
 	INSTALL_MOD_PATH="$tmpdir" make KBUILD_SRC= modules_install
-	if [ "$ARCH" == "um" ] ; then
+	if [ "$ARCH" = "um" ] ; then
 		mv "$tmpdir/lib/modules/$version"/* "$tmpdir/usr/lib/uml/modules/$version/"
 		rmdir "$tmpdir/lib/modules/$version"
 	fi
@@ -77,9 +77,8 @@ linux ($version-$revision) unstable; urgency=low
 EOF
 
 # Generate a control file
-if [ "$ARCH" == "um" ]; then
-
-cat <<EOF > debian/control
+if [ "$ARCH" = "um" ]; then
+	cat <<EOF > debian/control
 Source: linux
 Section: base
 Priority: optional
@@ -101,7 +100,7 @@ Description: User Mode Linux kernel, version $version
 EOF
 
 else
-cat <<EOF > debian/control
+	cat <<EOF > debian/control
 Source: linux
 Section: base
 Priority: optional
@@ -116,6 +115,7 @@ Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
  files version $version
 EOF
+
 fi
 
 # Fix some ownership and permissions
@@ -143,4 +143,3 @@ dpkg-gencontrol -isp -p$packagename
 dpkg --build "$tmpdir" ..
 
 exit 0
-
-- 
cgit v0.10.2


From 3e2ab2563a599c3d3fd03952c056af09fc03b74a Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:08:44 +0200
Subject: kbuild, deb-pkg: refactor code to reduce duplication

Factor out code to build package into separate function and
only write "source" section for the debian/control file once.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index eff7f9d..feebd69 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -11,6 +11,18 @@
 
 set -e
 
+create_package() {
+	local pname="$1" pdir="$2"
+
+	# Fix ownership and permissions
+	chown -R root:root "$pdir"
+	chmod -R go-w "$pdir"
+
+	# Create the package
+	dpkg-gencontrol -isp -p$pname -P"$pdir"
+	dpkg --build "$pdir" ..
+}
+
 # Some variables and settings used throughout the script
 version=$KERNELRELEASE
 revision=$(cat .version)
@@ -77,13 +89,16 @@ linux ($version-$revision) unstable; urgency=low
 EOF
 
 # Generate a control file
-if [ "$ARCH" = "um" ]; then
-	cat <<EOF > debian/control
+cat <<EOF > debian/control
 Source: linux
 Section: base
 Priority: optional
 Maintainer: $name
 Standards-Version: 3.6.1
+EOF
+
+if [ "$ARCH" = "um" ]; then
+	cat <<EOF >> debian/control
 
 Package: $packagename
 Provides: kernel-image-$version, linux-image-$version
@@ -100,12 +115,7 @@ Description: User Mode Linux kernel, version $version
 EOF
 
 else
-	cat <<EOF > debian/control
-Source: linux
-Section: base
-Priority: optional
-Maintainer: $name
-Standards-Version: 3.6.1
+	cat <<EOF >> debian/control
 
 Package: $packagename
 Provides: kernel-image-$version, linux-image-$version
@@ -118,10 +128,6 @@ EOF
 
 fi
 
-# Fix some ownership and permissions
-chown -R root:root "$tmpdir"
-chmod -R go-w "$tmpdir"
-
 # Do we have firmware? Move it out of the way and build it into a package.
 if [ -e "$tmpdir/lib/firmware" ]; then
 	mv "$tmpdir/lib/firmware" "$fwdir/lib/"
@@ -134,12 +140,9 @@ Description: Linux kernel firmware, version $version
  This package contains firmware from the Linux kernel, version $version
 EOF
 
-	dpkg-gencontrol -isp -p$fwpackagename -P"$fwdir"
-	dpkg --build "$fwdir" ..
+	create_package "$fwpackagename" "$fwdir"
 fi
 
-# Perform the final magic
-dpkg-gencontrol -isp -p$packagename
-dpkg --build "$tmpdir" ..
+create_package "$packagename" "$tmpdir"
 
 exit 0
-- 
cgit v0.10.2


From a89b433bddea41a743d7937c87be5290ef4f1057 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:09:04 +0200
Subject: kbuild, deb-pkg: fix 'file not found' error when building .deb
 package for arm

Not all architectures prepend the $(boot) path in $(KBUILD_IMAGE).
Allow for that fact in the builddeb script. Example is arm.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index feebd69..4876ff3 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -53,7 +53,10 @@ if [ "$ARCH" = "um" ] ; then
 else 
 	cp System.map "$tmpdir/boot/System.map-$version"
 	cp .config "$tmpdir/boot/config-$version"
-	cp $KBUILD_IMAGE "$tmpdir/boot/vmlinuz-$version"
+	# Not all arches include the boot path in KBUILD_IMAGE
+	if ! cp $KBUILD_IMAGE "$tmpdir/boot/vmlinuz-$version"; then
+		cp arch/$ARCH/boot/$KBUILD_IMAGE "$tmpdir/boot/vmlinuz-$version"
+	fi
 fi
 
 if grep -q '^CONFIG_MODULES=y' .config ; then
-- 
cgit v0.10.2


From 4964451a321cfd6a05ea32af1317345c53b3ecb7 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:09:25 +0200
Subject: kbuild, deb-pkg: pass Debian maintainer script parameters to
 packaging hook scripts

The Debian packaging scripts created by the deb-pkg target do not pass
on the standard Debian maintainer script parameters to hook scripts,
which means that those scripts cannot tell whether they are being called
during e.g. install vs. upgrade, or removal vs. purge of the package.

As there are several variantions in how hook scripts are called from
kernel packages, we pass the parameters in the environment variable
DEB_MAINT_PARAMS rather than as extra arguments.

Bump version of builddep script to 1.3.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 4876ff3..5eecbbe 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# builddeb 1.2
+# builddeb 1.3
 # Copyright 2003 Wichert Akkerman <wichert@wiggy.net>
 #
 # Simple script to generate a deb package for a Linux kernel. All the
@@ -75,6 +75,9 @@ for script in postinst postrm preinst prerm ; do
 
 set -e
 
+# Pass maintainer script parameters to hook scripts
+export DEB_MAINT_PARAMS="\$@"
+
 test -d /etc/kernel/$script.d && run-parts --arg="$version" /etc/kernel/$script.d
 exit 0
 EOF
-- 
cgit v0.10.2


From c72c75db86cf9f53c4c0df6724c0cf06db017652 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:09:44 +0200
Subject: kbuild, deb-pkg: allow to specify a custom revision for .deb packages

Allow to specify a custom revision for the generated .deb by
exporting the environment variable KDEB_PKGVERSION.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 5eecbbe..5868c0f 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -26,6 +26,11 @@ create_package() {
 # Some variables and settings used throughout the script
 version=$KERNELRELEASE
 revision=$(cat .version)
+if [ -n "$KDEB_PKGVERSION" ]; then
+	packageversion=$KDEB_PKGVERSION
+else
+	packageversion=$version-$revision
+fi
 tmpdir="$objtree/debian/tmp"
 fwdir="$objtree/debian/fwtmp"
 packagename=linux-$version
@@ -87,7 +92,7 @@ done
 name="Kernel Compiler <$(id -nu)@$(hostname -f)>"
 # Generate a simple changelog template
 cat <<EOF > debian/changelog
-linux ($version-$revision) unstable; urgency=low
+linux ($packageversion) unstable; urgency=low
 
   * A standard release
 
-- 
cgit v0.10.2


From fe233cb6bfd36fcf5a36bbde7fa116d8ab5f4301 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:10:10 +0200
Subject: kbuild, deb-pkg: allow alternative hook scripts directory in .deb
 packages

Hook scripts in the default directory /etc/kernel are also executed by
official Debian kernel packages as well as kernel packages created using
make-kpkg. Allow to specify an alternative hook scripts directory by
exporting the environment variable KDEB_HOOKDIR.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 5868c0f..c01f812 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -6,8 +6,9 @@
 # Simple script to generate a deb package for a Linux kernel. All the
 # complexity of what to do with a kernel after it is installed or removed
 # is left to other scripts and packages: they can install scripts in the
-# /etc/kernel/{pre,post}{inst,rm}.d/ directories that will be called on
-# package install and removal.
+# /etc/kernel/{pre,post}{inst,rm}.d/ directories (or an alternative location
+# specified in KDEB_HOOKDIR) that will be called on package install and
+# removal.
 
 set -e
 
@@ -73,8 +74,11 @@ if grep -q '^CONFIG_MODULES=y' .config ; then
 fi
 
 # Install the maintainer scripts
+# Note: hook scripts under /etc/kernel are also executed by official Debian
+# kernel packages, as well as kernel packages built using make-kpkg
+debhookdir=${KDEB_HOOKDIR:-/etc/kernel}
 for script in postinst postrm preinst prerm ; do
-	mkdir -p "$tmpdir/etc/kernel/$script.d"
+	mkdir -p "$tmpdir$debhookdir/$script.d"
 	cat <<EOF > "$tmpdir/DEBIAN/$script"
 #!/bin/sh
 
@@ -83,7 +87,7 @@ set -e
 # Pass maintainer script parameters to hook scripts
 export DEB_MAINT_PARAMS="\$@"
 
-test -d /etc/kernel/$script.d && run-parts --arg="$version" /etc/kernel/$script.d
+test -d $debhookdir/$script.d && run-parts --arg="$version" $debhookdir/$script.d
 exit 0
 EOF
 	chmod 755 "$tmpdir/DEBIAN/$script"
-- 
cgit v0.10.2


From a83ca2777f7f28e365de7c87903bad5e928f87ad Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:10:25 +0200
Subject: kbuild, deb-pkg: improve changelog entry and package descriptions

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index c01f812..0449147 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -98,7 +98,7 @@ name="Kernel Compiler <$(id -nu)@$(hostname -f)>"
 cat <<EOF > debian/changelog
 linux ($packageversion) unstable; urgency=low
 
-  * A standard release
+  * Custom built Linux kernel.
 
  -- $name  $(date -R)
 EOF
@@ -126,7 +126,7 @@ Description: User Mode Linux kernel, version $version
  many other things.
  .
  This package contains the Linux kernel, modules and corresponding other
- files version $version
+ files, version: $version.
 EOF
 
 else
@@ -138,7 +138,7 @@ Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
- files version $version
+ files, version: $version.
 EOF
 
 fi
@@ -152,7 +152,7 @@ if [ -e "$tmpdir/lib/firmware" ]; then
 Package: $fwpackagename
 Architecture: all
 Description: Linux kernel firmware, version $version
- This package contains firmware from the Linux kernel, version $version
+ This package contains firmware from the Linux kernel, version $version.
 EOF
 
 	create_package "$fwpackagename" "$fwdir"
-- 
cgit v0.10.2


From 9461f666e42f2412e134a49e90ffd4a3340dfc0a Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Fri, 24 Apr 2009 19:08:24 +0200
Subject: kbuild, deb-pkg: generate debian/copyright file

On Thursday 23 April 2009, Frans Pop wrote:
Add a basic debian/copyright to the binary packages.

Based on an earlier patch from Maximilian Attems.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 0449147..122becc 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -15,6 +15,8 @@ set -e
 create_package() {
 	local pname="$1" pdir="$2"
 
+	cp debian/copyright "$pdir/usr/share/doc/$pname/"
+
 	# Fix ownership and permissions
 	chown -R root:root "$pdir"
 	chmod -R go-w "$pdir"
@@ -43,10 +45,10 @@ fi
 
 # Setup the directory structure
 rm -rf "$tmpdir" "$fwdir"
-mkdir -p "$tmpdir/DEBIAN" "$tmpdir/lib" "$tmpdir/boot"
-mkdir -p "$fwdir/DEBIAN" "$fwdir/lib"
+mkdir -p "$tmpdir/DEBIAN" "$tmpdir/lib" "$tmpdir/boot" "$tmpdir/usr/share/doc/$packagename"
+mkdir -p "$fwdir/DEBIAN" "$fwdir/lib" "$fwdir/usr/share/doc/$fwpackagename"
 if [ "$ARCH" = "um" ] ; then
-	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/share/doc/$packagename" "$tmpdir/usr/bin"
+	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/bin"
 fi
 
 # Build and install the kernel
@@ -103,6 +105,26 @@ linux ($packageversion) unstable; urgency=low
  -- $name  $(date -R)
 EOF
 
+# Generate copyright file
+cat <<EOF > debian/copyright
+This is a packacked upstream version of the Linux kernel.
+
+The sources may be found at most Linux ftp sites, including:
+ftp://ftp.kernel.org/pub/linux/kernel
+
+Copyright: 1991 - 2009 Linus Torvalds and others.
+
+The git repository for mainline kernel development is at:
+git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 dated June, 1991.
+
+On Debian GNU/Linux systems, the complete text of the GNU General Public
+License version 2 can be found in \`/usr/share/common-licenses/GPL-2'.
+EOF
+
 # Generate a control file
 cat <<EOF > debian/control
 Source: linux
-- 
cgit v0.10.2


From edec611db0474e90503d46428e4f196d5e30c091 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 23 Apr 2009 01:11:20 +0200
Subject: kbuild, deb-pkg: improve maintainer identification

Try harder to find email and maintainer name.
Debian's own devscripts all use DEBEMAIL or DEBFULLNAME prior to an
eventual EMAIL or NAME environment variable. Match their logic.

"Anonymous" sounds nicer then "Kernel Compiler" if no name is found.

Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 122becc..9d464fd 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -95,14 +95,30 @@ EOF
 	chmod 755 "$tmpdir/DEBIAN/$script"
 done
 
-name="Kernel Compiler <$(id -nu)@$(hostname -f)>"
+# Try to determine maintainer and email values
+if [ -n "$DEBEMAIL" ]; then
+       email=$DEBEMAIL
+elif [ -n "$EMAIL" ]; then
+       email=$EMAIL
+else
+       email=$(id -nu)@$(hostname -f)
+fi
+if [ -n "$DEBFULLNAME" ]; then
+       name=$DEBFULLNAME
+elif [ -n "$NAME" ]; then
+       name=$NAME
+else
+       name="Anonymous"
+fi
+maintainer="$name <$email>"
+
 # Generate a simple changelog template
 cat <<EOF > debian/changelog
 linux ($packageversion) unstable; urgency=low
 
   * Custom built Linux kernel.
 
- -- $name  $(date -R)
+ -- $maintainer  $(date -R)
 EOF
 
 # Generate copyright file
@@ -130,7 +146,7 @@ cat <<EOF > debian/control
 Source: linux
 Section: base
 Priority: optional
-Maintainer: $name
+Maintainer: $maintainer
 Standards-Version: 3.6.1
 EOF
 
-- 
cgit v0.10.2


From 4bf4cd4939975c2f6375443212109dbb8c4e8731 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Thu, 23 Apr 2009 01:11:43 +0200
Subject: kbuild, deb-pkg: improve Source field

The Source: field is defined as the source package in the package
archive from which a binary packages are built. As deb-pkg does not
generate a source package, we should avoid to use any existing source
packages here.

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: maximilian attems <max@stro.at>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 9d464fd..6d3b7e5 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -114,7 +114,7 @@ maintainer="$name <$email>"
 
 # Generate a simple changelog template
 cat <<EOF > debian/changelog
-linux ($packageversion) unstable; urgency=low
+linux-upstream ($packageversion) unstable; urgency=low
 
   * Custom built Linux kernel.
 
@@ -143,7 +143,7 @@ EOF
 
 # Generate a control file
 cat <<EOF > debian/control
-Source: linux
+Source: linux-upstream
 Section: base
 Priority: optional
 Maintainer: $maintainer
-- 
cgit v0.10.2


From f7a2c31f1e4e622056c8a14260937e87f1277867 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 23 Apr 2009 01:12:01 +0200
Subject: kbuild, deb-pkg: fix generated package name

The binary package that make deb-pkg creates is a linux-image.
To be fixed may also be the addition of $DEB_ARCH.

Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 6d3b7e5..1ade570 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -36,7 +36,7 @@ else
 fi
 tmpdir="$objtree/debian/tmp"
 fwdir="$objtree/debian/fwtmp"
-packagename=linux-$version
+packagename=linux-image-$version
 fwpackagename=linux-firmware-image
 
 if [ "$ARCH" = "um" ] ; then
-- 
cgit v0.10.2


From 8ebc2fe9c5e502a0f34be498b0732bb3b897b12d Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 23 Apr 2009 01:12:21 +0200
Subject: kbuild, deb-pkg: fix Provides field

kernel-image naming has been dropped for the Lenny release
and was only transitional for Etch.

As it builds modules it provides linux-modules-$version.

Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 1ade570..e1dd189 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -154,7 +154,7 @@ if [ "$ARCH" = "um" ]; then
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: kernel-image-$version, linux-image-$version
+Provides: linux-image, linux-image-2.6, linux-modules-$version
 Architecture: any
 Description: User Mode Linux kernel, version $version
  User-mode Linux is a port of the Linux kernel to its own system call
@@ -171,7 +171,7 @@ else
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: kernel-image-$version, linux-image-$version
+Provides: linux-image, linux-image-2.6, linux-modules-$version
 Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
-- 
cgit v0.10.2


From 54c3355658bb66bbe20aef9b6297d72ac85571f4 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 23 Apr 2009 01:12:41 +0200
Subject: kbuild, deb-pkg: fix Section field

Section "base" has been removed, the base is defined by Priority field.

For Squeeze the section should be "kernel", but as that's not yet
supported for Sarge and Etch we stay with admin for now.

Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index e1dd189..9708d26 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -144,7 +144,7 @@ EOF
 # Generate a control file
 cat <<EOF > debian/control
 Source: linux-upstream
-Section: base
+Section: admin
 Priority: optional
 Maintainer: $maintainer
 Standards-Version: 3.6.1
-- 
cgit v0.10.2


From 6d9923219c6cd1df360f8823253717623f3ad348 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 23 Apr 2009 01:12:58 +0200
Subject: kbuild, deb-pkg: bump standards version

Latest Debian policy is 3.8.1.
Even if we are not yet compliant to it strive for the latest.

Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 9708d26..01c2d13 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -147,7 +147,7 @@ Source: linux-upstream
 Section: admin
 Priority: optional
 Maintainer: $maintainer
-Standards-Version: 3.6.1
+Standards-Version: 3.8.1
 EOF
 
 if [ "$ARCH" = "um" ]; then
-- 
cgit v0.10.2


From fd6c3a8dc44329d3aff9a578b5120982f63711ee Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 12 Mar 2009 10:58:33 +0000
Subject: initconst adjustments

- add .init.rodata to INIT_DATA, and group all initconst flavors
  together
- move strings generated from __setup_param() into .init.rodata
- add .*init.rodata to modpost's sets of init sections
- make modpost warn about references between meminit and cpuinit
  as well as memexit and cpuexit sections (as CPU and memory
  hotplug are independently selectable features)

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 89853bc..3edb114 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -336,10 +336,11 @@
 #define INIT_DATA							\
 	*(.init.data)							\
 	DEV_DISCARD(init.data)						\
-	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.data)						\
-	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.data)						\
+	*(.init.rodata)							\
+	DEV_DISCARD(init.rodata)					\
+	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)
 
 #define INIT_TEXT							\
diff --git a/include/linux/init.h b/include/linux/init.h
index 0e06c17..9f70c9f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -225,7 +225,8 @@ struct obs_kernel_param {
  * obs_kernel_param "array" too far apart in .init.setup.
  */
 #define __setup_param(str, unique_id, fn, early)			\
-	static char __setup_str_##unique_id[] __initdata __aligned(1) = str; \
+	static const char __setup_str_##unique_id[] __initconst	\
+		__aligned(1) = str; \
 	static struct obs_kernel_param __setup_##unique_id	\
 		__used __section(.init.setup)			\
 		__attribute__((aligned((sizeof(long)))))	\
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 161b784..94e71ef 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -763,6 +763,8 @@ static void check_section(const char *modname, struct elf_info *elf,
 
 
 #define ALL_INIT_DATA_SECTIONS \
+	".init.setup$", ".init.rodata$", \
+	".devinit.rodata$", ".cpuinit.rodata$", ".meminit.rodata$" \
 	".init.data$", ".devinit.data$", ".cpuinit.data$", ".meminit.data$"
 #define ALL_EXIT_DATA_SECTIONS \
 	".exit.data$", ".devexit.data$", ".cpuexit.data$", ".memexit.data$"
@@ -772,21 +774,23 @@ static void check_section(const char *modname, struct elf_info *elf,
 #define ALL_EXIT_TEXT_SECTIONS \
 	".exit.text$", ".devexit.text$", ".cpuexit.text$", ".memexit.text$"
 
-#define ALL_INIT_SECTIONS ALL_INIT_DATA_SECTIONS, ALL_INIT_TEXT_SECTIONS
-#define ALL_EXIT_SECTIONS ALL_EXIT_DATA_SECTIONS, ALL_EXIT_TEXT_SECTIONS
+#define ALL_INIT_SECTIONS INIT_SECTIONS, DEV_INIT_SECTIONS, \
+	CPU_INIT_SECTIONS, MEM_INIT_SECTIONS
+#define ALL_EXIT_SECTIONS EXIT_SECTIONS, DEV_EXIT_SECTIONS, \
+	CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS
 
 #define DATA_SECTIONS ".data$", ".data.rel$"
 #define TEXT_SECTIONS ".text$"
 
-#define INIT_SECTIONS      ".init.data$", ".init.text$"
-#define DEV_INIT_SECTIONS  ".devinit.data$", ".devinit.text$"
-#define CPU_INIT_SECTIONS  ".cpuinit.data$", ".cpuinit.text$"
-#define MEM_INIT_SECTIONS  ".meminit.data$", ".meminit.text$"
+#define INIT_SECTIONS      ".init.*"
+#define DEV_INIT_SECTIONS  ".devinit.*"
+#define CPU_INIT_SECTIONS  ".cpuinit.*"
+#define MEM_INIT_SECTIONS  ".meminit.*"
 
-#define EXIT_SECTIONS      ".exit.data$", ".exit.text$"
-#define DEV_EXIT_SECTIONS  ".devexit.data$", ".devexit.text$"
-#define CPU_EXIT_SECTIONS  ".cpuexit.data$", ".cpuexit.text$"
-#define MEM_EXIT_SECTIONS  ".memexit.data$", ".memexit.text$"
+#define EXIT_SECTIONS      ".exit.*"
+#define DEV_EXIT_SECTIONS  ".devexit.*"
+#define CPU_EXIT_SECTIONS  ".cpuexit.*"
+#define MEM_EXIT_SECTIONS  ".memexit.*"
 
 /* init data sections */
 static const char *init_data_sections[] = { ALL_INIT_DATA_SECTIONS, NULL };
@@ -869,12 +873,36 @@ const struct sectioncheck sectioncheck[] = {
 	.tosec   = { INIT_SECTIONS, NULL },
 	.mismatch = XXXINIT_TO_INIT,
 },
+/* Do not reference cpuinit code/data from meminit code/data */
+{
+	.fromsec = { MEM_INIT_SECTIONS, NULL },
+	.tosec   = { CPU_INIT_SECTIONS, NULL },
+	.mismatch = XXXINIT_TO_INIT,
+},
+/* Do not reference meminit code/data from cpuinit code/data */
+{
+	.fromsec = { CPU_INIT_SECTIONS, NULL },
+	.tosec   = { MEM_INIT_SECTIONS, NULL },
+	.mismatch = XXXINIT_TO_INIT,
+},
 /* Do not reference exit code/data from devexit/cpuexit/memexit code/data */
 {
 	.fromsec = { DEV_EXIT_SECTIONS, CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS, NULL },
 	.tosec   = { EXIT_SECTIONS, NULL },
 	.mismatch = XXXEXIT_TO_EXIT,
 },
+/* Do not reference cpuexit code/data from memexit code/data */
+{
+	.fromsec = { MEM_EXIT_SECTIONS, NULL },
+	.tosec   = { CPU_EXIT_SECTIONS, NULL },
+	.mismatch = XXXEXIT_TO_EXIT,
+},
+/* Do not reference memexit code/data from cpuexit code/data */
+{
+	.fromsec = { CPU_EXIT_SECTIONS, NULL },
+	.tosec   = { MEM_EXIT_SECTIONS, NULL },
+	.mismatch = XXXEXIT_TO_EXIT,
+},
 /* Do not use exit code/data from init code */
 {
 	.fromsec = { ALL_INIT_SECTIONS, NULL },
-- 
cgit v0.10.2


From cefdff4b4d5ba3f9b7eee2c1c1827553794cb465 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 4 Jun 2009 22:12:09 -0400
Subject: kbuild: clean up scripts/headers.sh

'drop' variable is unused.

'ppc' and 'sparc64' directories don't exist in arch/,
and I think their headers can be well exported now, so
just remove them.

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers.sh b/scripts/headers.sh
index d33426f..0308ecc 100755
--- a/scripts/headers.sh
+++ b/scripts/headers.sh
@@ -15,19 +15,12 @@ do_command()
 	fi
 }
 
-# Do not try this architecture
-drop="generic um ppc sparc64 cris"
-
 archs=$(ls ${srctree}/arch)
 
 for arch in ${archs}; do
 	case ${arch} in
 	um)        # no userspace export
 		;;
-	ppc)       # headers exported by powerpc
-		;;
-	sparc64)   # headers exported by sparc
-		;;
 	cris)      # headers export are known broken
 		;;
 	*)
-- 
cgit v0.10.2


From 2d51005c27a12e43d672debbbb918e7f74987f96 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sun, 31 May 2009 18:05:34 +0200
Subject: kbuild: allow docproc invocation from external

- getcwd returns path without a slash at the end, add the slash
- add KBUILD_SRC env support, so that we can specify path for
  kernel (to know where scripts/kernel-doc resides) and SRCTREE
  (for searching files referenced in .tmpl) separately

[v2]
- use KBUILD_SRC instead of a newly introduced environment variable

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c
index 35bdc68..4c9523e 100644
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -69,7 +69,7 @@ FILELINE * docsection;
 #define NOFUNCTION    "-nofunction"
 #define NODOCSECTIONS "-no-doc-sections"
 
-char *srctree;
+static char *srctree, *kernsrctree;
 
 void usage (void)
 {
@@ -77,7 +77,8 @@ void usage (void)
 	fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
 	fprintf(stderr, "doc: frontend when generating kernel documentation\n");
 	fprintf(stderr, "depend: generate list of files referenced within file\n");
-	fprintf(stderr, "Environment variable SRCTREE: absolute path to kernel source tree.\n");
+	fprintf(stderr, "Environment variable SRCTREE: absolute path to sources.\n");
+	fprintf(stderr, "                     KBUILD_SRC: absolute path to kernel source tree.\n");
 }
 
 /*
@@ -96,8 +97,8 @@ void exec_kernel_doc(char **svec)
 			exit(1);
 		case  0:
 			memset(real_filename, 0, sizeof(real_filename));
-			strncat(real_filename, srctree, PATH_MAX);
-			strncat(real_filename, KERNELDOCPATH KERNELDOC,
+			strncat(real_filename, kernsrctree, PATH_MAX);
+			strncat(real_filename, "/" KERNELDOCPATH KERNELDOC,
 					PATH_MAX - strlen(real_filename));
 			execvp(real_filename, svec);
 			fprintf(stderr, "exec ");
@@ -178,6 +179,7 @@ void find_export_symbols(char * filename)
 		char real_filename[PATH_MAX + 1];
 		memset(real_filename, 0, sizeof(real_filename));
 		strncat(real_filename, srctree, PATH_MAX);
+		strncat(real_filename, "/", PATH_MAX - strlen(real_filename));
 		strncat(real_filename, filename,
 				PATH_MAX - strlen(real_filename));
 		sym = add_new_file(filename);
@@ -382,6 +384,9 @@ int main(int argc, char *argv[])
 	srctree = getenv("SRCTREE");
 	if (!srctree)
 		srctree = getcwd(NULL, 0);
+	kernsrctree = getenv("KBUILD_SRC");
+	if (!kernsrctree)
+		kernsrctree = srctree;
 	if (argc != 3) {
 		usage();
 		exit(1);
-- 
cgit v0.10.2


From 3e56f08bffe9e3e2b936eb73bd51d8800d1b42c2 Mon Sep 17 00:00:00 2001
From: David VomLehn <dvomlehn@cisco.com>
Date: Sat, 30 May 2009 18:13:32 -0700
Subject: kbuild/Documentation: Incorrect makefile syntax in example

There is an error in the make syntax for one of the kbuild examples

Signed-off-by: David VomLehn <dvomlehn@cisco.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index b1096da..0767cf6 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -275,7 +275,7 @@ following files:
 
 		KERNELDIR := /lib/modules/`uname -r`/build
 		all::
-			$(MAKE) -C $KERNELDIR M=`pwd` $@
+			$(MAKE) -C $(KERNELDIR) M=`pwd` $@
 
 		# Module specific targets
 		genbin:
-- 
cgit v0.10.2


From b8b0618cf6fab3bd5b1da8c72f4b29847d81ac42 Mon Sep 17 00:00:00 2001
From: Cheng Renquan <crq@kernel.org>
Date: Tue, 26 May 2009 16:03:07 +0800
Subject: kbuild: remove extra ifdef/endif of top Makefile

The GNU make's origin function know undefined variable well,
so the outer ifdef/endif conditional checking is unneeded.

From `info make` documentation, origin will return

  `undefined'
     if VARIABLE was never defined.
  `command line'
     if VARIABLE was defined on the command line.
   ...

Therefore, $(origin V) will get a value anyway, killing ifdef/endif is
viable and safe.

Furthermore, I've checked the minimal requirements from
Documentation/Changes is GNU make 3.79.1, and that version of GNU make
has support of origin function well already, so now it's safe to kill
the outer conditional checking, without upgrading the minimal
requirements.

Signed-off-by: Cheng Renquan <crq@kernel.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Makefile b/Makefile
index 1065154..0c8ee91 100644
--- a/Makefile
+++ b/Makefile
@@ -35,10 +35,8 @@ MAKEFLAGS += -rR --no-print-directory
 # To put more focus on warnings, be less verbose as default
 # Use 'make V=1' to see the full commands
 
-ifdef V
-  ifeq ("$(origin V)", "command line")
-    KBUILD_VERBOSE = $(V)
-  endif
+ifeq ("$(origin V)", "command line")
+  KBUILD_VERBOSE = $(V)
 endif
 ifndef KBUILD_VERBOSE
   KBUILD_VERBOSE = 0
@@ -54,10 +52,8 @@ endif
 # See the file "Documentation/sparse.txt" for more details, including
 # where to get the "sparse" utility.
 
-ifdef C
-  ifeq ("$(origin C)", "command line")
-    KBUILD_CHECKSRC = $(C)
-  endif
+ifeq ("$(origin C)", "command line")
+  KBUILD_CHECKSRC = $(C)
 endif
 ifndef KBUILD_CHECKSRC
   KBUILD_CHECKSRC = 0
@@ -69,12 +65,10 @@ endif
 ifdef SUBDIRS
   KBUILD_EXTMOD ?= $(SUBDIRS)
 endif
-ifdef M
-  ifeq ("$(origin M)", "command line")
-    KBUILD_EXTMOD := $(M)
-  endif
-endif
 
+ifeq ("$(origin M)", "command line")
+  KBUILD_EXTMOD := $(M)
+endif
 
 # kbuild supports saving output files in a separate directory.
 # To locate output files in a separate directory two syntaxes are supported.
@@ -98,10 +92,8 @@ ifeq ($(KBUILD_SRC),)
 
 # OK, Make called in directory where kernel src resides
 # Do we want to locate output files in a separate directory?
-ifdef O
-  ifeq ("$(origin O)", "command line")
-    KBUILD_OUTPUT := $(O)
-  endif
+ifeq ("$(origin O)", "command line")
+  KBUILD_OUTPUT := $(O)
 endif
 
 # That's our default target when none is given on the command line
-- 
cgit v0.10.2


From 6f26e5e412ddc23e1d698390243e39802410ab15 Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:45 +0200
Subject: kconfig: fix typo "mconfig" to "menuconfig" in a comment

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index fa8c2dd..47e3f2e 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -104,7 +104,7 @@ HOST_EXTRACFLAGS += -DLOCALE
 # ===========================================================================
 # Shared Makefile for the various kconfig executables:
 # conf:	  Used for defconfig, oldconfig and related targets
-# mconf:  Used for the mconfig target.
+# mconf:  Used for the menuconfig target
 #         Utilizes the lxdialog package
 # qconf:  Used for the xconfig target
 #         Based on QT which needs to be installed to compile it
-- 
cgit v0.10.2


From 590a5857291e88c00a1be53fb373e8bf82f86284 Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:46 +0200
Subject: kconfig: add a note about the deps to the 'silentoldconfig' help

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/README b/README
index d6c6c74..7a078ff 100644
--- a/README
+++ b/README
@@ -174,6 +174,7 @@ CONFIGURING the kernel:
 	"make silentoldconfig"
 			   Like above, but avoids cluttering the screen
 			   with questions already answered.
+			   Additionally updates the dependencies.
 	"make defconfig"   Create a ./.config file by using the default
 			   symbol values from arch/$ARCH/defconfig.
 	"make allyesconfig"
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 47e3f2e..5ddf8be 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -83,7 +83,7 @@ help:
 	@echo  '  xconfig	  - Update current config utilising a QT based front-end'
 	@echo  '  gconfig	  - Update current config utilising a GTK based front-end'
 	@echo  '  oldconfig	  - Update current config utilising a provided .config as base'
-	@echo  '  silentoldconfig - Same as oldconfig, but quietly'
+	@echo  '  silentoldconfig - Same as oldconfig, but quietly, additionally update deps'
 	@echo  '  randconfig	  - New config with random answer to all options'
 	@echo  '  defconfig	  - New config with default answer to all options'
 	@echo  '  allmodconfig	  - New config selecting modules when possible'
-- 
cgit v0.10.2


From 98f540d31ba0d3598b52177e194dde0bc498352d Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:47 +0200
Subject: kconfig: resort the documentation of the environment variables

All the KCONFIG_ environment variables were previously located in a
section "Environment variables in 'menuconfig'", but neither are they
restricted to 'menuconfig' nor are they all used by 'menuconfig'.

Introduce the following three sections for these variables:
  * Environment variables for '*config'
  * Environment variables for '{allyes/allmod/allno/rand}config'
  * Environment variables for 'silentoldconfig'

Furthermore this puts MENUCONFIG_MODE next to MENUCONFIG_COLOR into a
common section "User interface options for 'menuconfig'".

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
index 26a7c0a..849b5e5 100644
--- a/Documentation/kbuild/kconfig.txt
+++ b/Documentation/kbuild/kconfig.txt
@@ -35,48 +35,26 @@ new .config files to see the differences:
 
 (Yes, we need something better here.)
 
-
-======================================================================
-menuconfig
---------------------------------------------------
-
-SEARCHING for CONFIG symbols
-
-Searching in menuconfig:
-
-	The Search function searches for kernel configuration symbol
-	names, so you have to know something close to what you are
-	looking for.
-
-	Example:
-		/hotplug
-		This lists all config symbols that contain "hotplug",
-		e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
-
-	For search help, enter / followed TAB-TAB-TAB (to highlight
-	<Help>) and Enter.  This will tell you that you can also use
-	regular expressions (regexes) in the search string, so if you
-	are not interested in MEMORY_HOTPLUG, you could try
-
-		/^hotplug
-
-
 ______________________________________________________________________
-Color Themes for 'menuconfig'
+Environment variables for '*config'
 
-It is possible to select different color themes using the variable
-MENUCONFIG_COLOR.  To select a theme use:
+KCONFIG_CONFIG
+--------------------------------------------------
+This environment variable can be used to specify a default kernel config
+file name to override the default name of ".config".
 
-	make MENUCONFIG_COLOR=<theme> menuconfig
+KCONFIG_OVERWRITECONFIG
+--------------------------------------------------
+If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
+break symlinks when .config is a symlink to somewhere else.
 
-Available themes are:
-  mono       => selects colors suitable for monochrome displays
-  blackbg    => selects a color scheme with black background
-  classic    => theme with blue background. The classic look
-  bluetitle  => a LCD friendly version of classic. (default)
+KCONFIG_NOTIMESTAMP
+--------------------------------------------------
+If this environment variable exists and is non-null, the timestamp line
+in generated .config files is omitted.
 
 ______________________________________________________________________
-Environment variables in 'menuconfig'
+Environment variables for '{allyes/allmod/allno/rand}config'
 
 KCONFIG_ALLCONFIG
 --------------------------------------------------
@@ -95,8 +73,7 @@ values.
 This enables you to create "miniature" config (miniconfig) or custom
 config files containing just the config symbols that you are interested
 in.  Then the kernel config system generates the full .config file,
-including dependencies of your miniconfig file, based on the miniconfig
-file.
+including symbols of your miniconfig file.
 
 This 'KCONFIG_ALLCONFIG' file is a config file which contains
 (usually a subset of all) preset config symbols.  These variable
@@ -113,26 +90,14 @@ These examples will disable most options (allnoconfig) but enable or
 disable the options that are explicitly listed in the specified
 mini-config files.
 
+______________________________________________________________________
+Environment variables for 'silentoldconfig'
+
 KCONFIG_NOSILENTUPDATE
 --------------------------------------------------
 If this variable has a non-blank value, it prevents silent kernel
 config udpates (requires explicit updates).
 
-KCONFIG_CONFIG
---------------------------------------------------
-This environment variable can be used to specify a default kernel config
-file name to override the default name of ".config".
-
-KCONFIG_OVERWRITECONFIG
---------------------------------------------------
-If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
-break symlinks when .config is a symlink to somewhere else.
-
-KCONFIG_NOTIMESTAMP
---------------------------------------------------
-If this environment variable exists and is non-null, the timestamp line
-in generated .config files is omitted.
-
 KCONFIG_AUTOCONFIG
 --------------------------------------------------
 This environment variable can be set to specify the path & name of the
@@ -143,15 +108,54 @@ KCONFIG_AUTOHEADER
 This environment variable can be set to specify the path & name of the
 "autoconf.h" (header) file.  Its default value is "include/linux/autoconf.h".
 
+
+======================================================================
+menuconfig
+--------------------------------------------------
+
+SEARCHING for CONFIG symbols
+
+Searching in menuconfig:
+
+	The Search function searches for kernel configuration symbol
+	names, so you have to know something close to what you are
+	looking for.
+
+	Example:
+		/hotplug
+		This lists all config symbols that contain "hotplug",
+		e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
+
+	For search help, enter / followed TAB-TAB-TAB (to highlight
+	<Help>) and Enter.  This will tell you that you can also use
+	regular expressions (regexes) in the search string, so if you
+	are not interested in MEMORY_HOTPLUG, you could try
+
+		/^hotplug
+
 ______________________________________________________________________
-menuconfig User Interface Options
-----------------------------------------------------------------------
+User interface options for 'menuconfig'
+
+MENUCONFIG_COLOR
+--------------------------------------------------
+It is possible to select different color themes using the variable
+MENUCONFIG_COLOR.  To select a theme use:
+
+	make MENUCONFIG_COLOR=<theme> menuconfig
+
+Available themes are:
+  mono       => selects colors suitable for monochrome displays
+  blackbg    => selects a color scheme with black background
+  classic    => theme with blue background. The classic look
+  bluetitle  => a LCD friendly version of classic. (default)
+
 MENUCONFIG_MODE
 --------------------------------------------------
 This mode shows all sub-menus in one large tree.
 
 Example:
-	MENUCONFIG_MODE=single_menu make menuconfig
+	make MENUCONFIG_MODE=single_menu menuconfig
+
 
 ======================================================================
 xconfig
-- 
cgit v0.10.2


From c26dd719a5b9d94d12211f2d101bd7dffb7f9f1f Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:48 +0200
Subject: gitignore: ignore Kconfig i18n files

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore
index b49584c..6a36a76 100644
--- a/scripts/kconfig/.gitignore
+++ b/scripts/kconfig/.gitignore
@@ -8,6 +8,9 @@ lex.*.c
 zconf.hash.c
 *.moc
 lkc_defs.h
+gconf.glade.h
+*.pot
+*.mo
 
 #
 # configuration programs
-- 
cgit v0.10.2


From 98403a91389d11edd954e8f89dcffc3db525657f Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:49 +0200
Subject: kconfig qconf: fix -Wall compiler warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These compiler warnings occure when adding -Wall to HOSTCXXFLAGS in
/Makefile

scripts/kconfig/qconf.h: In constructor ‘ConfigInfoView::ConfigInfoView(QWidget*, const char*)’:
scripts/kconfig/qconf.h:274: warning: ‘ConfigInfoView::menu’ will be initialized after
scripts/kconfig/qconf.h:273: warning:   ‘symbol* ConfigInfoView::sym’
scripts/kconfig/qconf.cc:922: warning:   when initialized here

scripts/kconfig/qconf.cc: In member function ‘void ConfigMainWindow::setMenuLink(menu*)’:
scripts/kconfig/qconf.cc:1498: warning: enumeration value ‘menuMode’ not handled in switch
scripts/kconfig/qconf.cc:1498: warning: enumeration value ‘listMode’ not handled in switch

scripts/kconfig/qconf.cc: In member function ‘void ConfigMainWindow::saveSettings()’:
scripts/kconfig/qconf.cc:1664: warning: enumeration value ‘menuMode’ not handled in switch
scripts/kconfig/qconf.cc:1664: warning: enumeration value ‘listMode’ not handled in switch

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 5d0fd38..2bd6ed0 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -920,7 +920,7 @@ void ConfigView::updateListAll(void)
 }
 
 ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name)
-	: Parent(parent, name), menu(0), sym(0)
+	: Parent(parent, name), sym(0), menu(0)
 {
 	if (name) {
 		configSettings->beginGroup(name);
@@ -1524,6 +1524,8 @@ void ConfigMainWindow::setMenuLink(struct menu *menu)
 	case fullMode:
 		list = configList;
 		break;
+	default:
+		break;
 	}
 
 	if (list) {
@@ -1673,6 +1675,9 @@ void ConfigMainWindow::saveSettings(void)
 	case fullMode :
 		entry = "full";
 		break;
+
+	default:
+		break;
 	}
 	configSettings->writeEntry("/listMode", entry);
 
-- 
cgit v0.10.2


From 7298b936017859fce4906e38f485c131520fe857 Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:50 +0200
Subject: kconfig qconf: fix namespace for Horizontal and Vertical enum values

They were used as QSplitter::Horizontal resp. QSplitter::Vertical, but
are defined in the 'Qt' namespace.

Fixes the following compiler errors after a quick conversion with 'qt3to4',
which occured with g++ 3.4.6 and 4.1.2, but not anymore with 4.3.2.

scripts/kconfig/qconf.cc: In constructor 'ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow*, const char*)':
scripts/kconfig/qconf.cc:1213: error: 'Vertical' is not a member of 'QSplitter'

scripts/kconfig/qconf.cc: In constructor 'ConfigMainWindow::ConfigMainWindow()':
scripts/kconfig/qconf.cc:1304: error: 'Horizontal' is not a member of 'QSplitter'
scripts/kconfig/qconf.cc:1311: error: 'Vertical' is not a member of 'QSplitter'

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 2bd6ed0..7433dac 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -1199,7 +1199,7 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *nam
 	layout1->addLayout(layout2);
 
 	split = new QSplitter(this);
-	split->setOrientation(QSplitter::Vertical);
+	split->setOrientation(Qt::Vertical);
 	list = new ConfigView(split, name);
 	list->list->mode = listMode;
 	info = new ConfigInfoView(split, name);
@@ -1290,14 +1290,14 @@ ConfigMainWindow::ConfigMainWindow(void)
 		move(x, y);
 
 	split1 = new QSplitter(this);
-	split1->setOrientation(QSplitter::Horizontal);
+	split1->setOrientation(Qt::Horizontal);
 	setCentralWidget(split1);
 
 	menuView = new ConfigView(split1, "menu");
 	menuList = menuView->list;
 
 	split2 = new QSplitter(split1);
-	split2->setOrientation(QSplitter::Vertical);
+	split2->setOrientation(Qt::Vertical);
 
 	// create config tree
 	configView = new ConfigView(split2, "config");
-- 
cgit v0.10.2


From fbb86374445d97072dd994f1a4adf023bfd1e86e Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:51 +0200
Subject: kconfig qconf: add namespace for use of Key_ enum values

They are defined in the 'Qt' namespace.

Fixes the following compiler errors after a quick conversion with 'qt3to4',
which occured with g++ 3.4.6 and 4.1.2, but not anymore with 4.3.2.

scripts/kconfig/qconf.cc: In member function 'virtual void ConfigLineEdit::keyPressEvent(QKeyEvent*)':
scripts/kconfig/qconf.cc:311: error: 'Key_Escape' was not declared in this scope
scripts/kconfig/qconf.cc:313: error: 'Key_Return' was not declared in this scope
scripts/kconfig/qconf.cc:314: error: 'Key_Enter' was not declared in this scope

scripts/kconfig/qconf.cc: In member function 'virtual void ConfigList::keyPressEvent(QKeyEvent*)':
scripts/kconfig/qconf.cc:653: error: 'Key_Escape' was not declared in this scope
scripts/kconfig/qconf.cc:666: error: 'Key_Return' was not declared in this scope
scripts/kconfig/qconf.cc:667: error: 'Key_Enter' was not declared in this scope
scripts/kconfig/qconf.cc:681: error: 'Key_Space' was not declared in this scope
scripts/kconfig/qconf.cc:684: error: 'Key_N' was not declared in this scope
scripts/kconfig/qconf.cc:687: error: 'Key_M' was not declared in this scope
scripts/kconfig/qconf.cc:690: error: 'Key_Y' was not declared in this scope

scripts/kconfig/qconf.cc: In constructor 'ConfigMainWindow::ConfigMainWindow()':
scripts/kconfig/qconf.cc:1329: error: 'CTRL' was not declared in this scope
scripts/kconfig/qconf.cc:1329: error: 'Key_Q' was not declared in this scope
scripts/kconfig/qconf.cc:1331: error: 'Key_L' was not declared in this scope
scripts/kconfig/qconf.cc:1333: error: 'Key_S' was not declared in this scope
scripts/kconfig/qconf.cc:1340: error: 'Key_F' was not declared in this scope

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 7433dac..afae613 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -297,10 +297,10 @@ void ConfigLineEdit::show(ConfigItem* i)
 void ConfigLineEdit::keyPressEvent(QKeyEvent* e)
 {
 	switch (e->key()) {
-	case Key_Escape:
+	case Qt::Key_Escape:
 		break;
-	case Key_Return:
-	case Key_Enter:
+	case Qt::Key_Return:
+	case Qt::Key_Enter:
 		sym_set_string_value(item->menu->sym, text().latin1());
 		parent()->updateList(item);
 		break;
@@ -639,7 +639,7 @@ void ConfigList::keyPressEvent(QKeyEvent* ev)
 	struct menu *menu;
 	enum prop_type type;
 
-	if (ev->key() == Key_Escape && mode != fullMode && mode != listMode) {
+	if (ev->key() == Qt::Key_Escape && mode != fullMode && mode != listMode) {
 		emit parentSelected();
 		ev->accept();
 		return;
@@ -652,8 +652,8 @@ void ConfigList::keyPressEvent(QKeyEvent* ev)
 	item = (ConfigItem*)i;
 
 	switch (ev->key()) {
-	case Key_Return:
-	case Key_Enter:
+	case Qt::Key_Return:
+	case Qt::Key_Enter:
 		if (item->goParent) {
 			emit parentSelected();
 			break;
@@ -667,16 +667,16 @@ void ConfigList::keyPressEvent(QKeyEvent* ev)
 			emit menuSelected(menu);
 			break;
 		}
-	case Key_Space:
+	case Qt::Key_Space:
 		changeValue(item);
 		break;
-	case Key_N:
+	case Qt::Key_N:
 		setValue(item, no);
 		break;
-	case Key_M:
+	case Qt::Key_M:
 		setValue(item, mod);
 		break;
-	case Key_Y:
+	case Qt::Key_Y:
 		setValue(item, yes);
 		break;
 	default:
@@ -1315,18 +1315,18 @@ ConfigMainWindow::ConfigMainWindow(void)
 	backAction = new QAction("Back", QPixmap(xpm_back), _("Back"), 0, this);
 	  connect(backAction, SIGNAL(activated()), SLOT(goBack()));
 	  backAction->setEnabled(FALSE);
-	QAction *quitAction = new QAction("Quit", _("&Quit"), CTRL+Key_Q, this);
+	QAction *quitAction = new QAction("Quit", _("&Quit"), Qt::CTRL + Qt::Key_Q, this);
 	  connect(quitAction, SIGNAL(activated()), SLOT(close()));
-	QAction *loadAction = new QAction("Load", QPixmap(xpm_load), _("&Load"), CTRL+Key_L, this);
+	QAction *loadAction = new QAction("Load", QPixmap(xpm_load), _("&Load"), Qt::CTRL + Qt::Key_L, this);
 	  connect(loadAction, SIGNAL(activated()), SLOT(loadConfig()));
-	saveAction = new QAction("Save", QPixmap(xpm_save), _("&Save"), CTRL+Key_S, this);
+	saveAction = new QAction("Save", QPixmap(xpm_save), _("&Save"), Qt::CTRL + Qt::Key_S, this);
 	  connect(saveAction, SIGNAL(activated()), SLOT(saveConfig()));
 	conf_set_changed_callback(conf_changed);
 	// Set saveAction's initial state
 	conf_changed();
 	QAction *saveAsAction = new QAction("Save As...", _("Save &As..."), 0, this);
 	  connect(saveAsAction, SIGNAL(activated()), SLOT(saveConfigAs()));
-	QAction *searchAction = new QAction("Find", _("&Find"), CTRL+Key_F, this);
+	QAction *searchAction = new QAction("Find", _("&Find"), Qt::CTRL + Qt::Key_F, this);
 	  connect(searchAction, SIGNAL(activated()), SLOT(searchConfig()));
 	QAction *singleViewAction = new QAction("Single View", QPixmap(xpm_single_view), _("Single View"), 0, this);
 	  connect(singleViewAction, SIGNAL(activated()), SLOT(showSingleView()));
-- 
cgit v0.10.2


From 8d90c97e46930d01e9394bceb6276c7175136bdc Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:52 +0200
Subject: kconfig qconf: fix the type of the desktop widget

QApplication::desktop() returns a pointer to QDesktopWidget, not to
QWidget.

Fixes the following compiler error after a quick conversion with 'qt3to4',
which occured with g++ 3.4.6 and 4.1.2, but not anymore with 4.3.2.

scripts/kconfig/qconf.cc: In constructor 'ConfigMainWindow::ConfigMainWindow()':
scripts/kconfig/qconf.cc:1289: error: cannot convert 'QDesktopWidget*' to 'QWidget*' in initialization

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index afae613..19811fc 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -5,6 +5,7 @@
 
 #include <qapplication.h>
 #include <qmainwindow.h>
+#include <qdesktopwidget.h>
 #include <qtoolbar.h>
 #include <qlayout.h>
 #include <qvbox.h>
@@ -1275,7 +1276,7 @@ ConfigMainWindow::ConfigMainWindow(void)
 	int x, y, width, height;
 	char title[256];
 
-	QWidget *d = configApp->desktop();
+	QDesktopWidget *d = configApp->desktop();
 	snprintf(title, sizeof(title), _("Linux Kernel v%s Configuration"),
 		getenv("KERNELVERSION"));
 	setCaption(title);
-- 
cgit v0.10.2


From 284026cdfb5a899e558dcb7a36aefaf54a78c094 Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:53 +0200
Subject: kconfig: do not hardcode ".config" filename

Rather than hardcoding ".config" use conf_get_configname(), which also
respects the environment variable KCONFIG_CONFIG.

This fixes "make silentoldconfig" when KCONFIG_CONFIG is used and also
suggests the given filename for "Load" and "Save as" in qconf.

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index d190092..3baaaec 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -498,14 +498,15 @@ int main(int ac, char **av)
 	conf_parse(name);
 	//zconfdump(stdout);
 	if (sync_kconfig) {
-		if (stat(".config", &tmpstat)) {
+		name = conf_get_configname();
+		if (stat(name, &tmpstat)) {
 			fprintf(stderr, _("***\n"
 				"*** You have not yet configured your kernel!\n"
-				"*** (missing kernel .config file)\n"
+				"*** (missing kernel config file \"%s\")\n"
 				"***\n"
 				"*** Please run some configurator (e.g. \"make oldconfig\" or\n"
 				"*** \"make menuconfig\" or \"make xconfig\").\n"
-				"***\n"));
+				"***\n"), name);
 			exit(1);
 		}
 	}
diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 19811fc..ce7d508 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -1448,7 +1448,7 @@ ConfigMainWindow::ConfigMainWindow(void)
 
 void ConfigMainWindow::loadConfig(void)
 {
-	QString s = QFileDialog::getOpenFileName(".config", NULL, this);
+	QString s = QFileDialog::getOpenFileName(conf_get_configname(), NULL, this);
 	if (s.isNull())
 		return;
 	if (conf_read(QFile::encodeName(s)))
@@ -1464,7 +1464,7 @@ void ConfigMainWindow::saveConfig(void)
 
 void ConfigMainWindow::saveConfigAs(void)
 {
-	QString s = QFileDialog::getSaveFileName(".config", NULL, this);
+	QString s = QFileDialog::getSaveFileName(conf_get_configname(), NULL, this);
 	if (s.isNull())
 		return;
 	if (conf_write(QFile::encodeName(s)))
-- 
cgit v0.10.2


From 12122f62324e7c6837ee8b0fa8d257ce7ebcfc6f Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Mon, 18 May 2009 01:36:54 +0200
Subject: kconfig: do not hardcode "include/config/auto.conf" filename

Regardless of KCONFIG_AUTOCONFIG, the filename written as a Make target
into "include/config/auto.conf.cmd" was always the default one.

Of course this doesn't make it work for the Kernel kbuild system, since
there the filename is hardcoded at several places in the Makefiles.

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 273d738..a04da34 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -41,6 +41,13 @@ const char *conf_get_configname(void)
 	return name ? name : ".config";
 }
 
+const char *conf_get_autoconfig_name(void)
+{
+	char *name = getenv("KCONFIG_AUTOCONFIG");
+
+	return name ? name : "include/config/auto.conf";
+}
+
 static char *conf_expand_value(const char *in)
 {
 	struct symbol *sym;
@@ -555,15 +562,14 @@ int conf_write(const char *name)
 
 int conf_split_config(void)
 {
-	char *name, path[128];
+	const char *name;
+	char path[128];
 	char *s, *d, c;
 	struct symbol *sym;
 	struct stat sb;
 	int res, i, fd;
 
-	name = getenv("KCONFIG_AUTOCONFIG");
-	if (!name)
-		name = "include/config/auto.conf";
+	name = conf_get_autoconfig_name();
 	conf_read_simple(name, S_DEF_AUTO);
 
 	if (chdir("include/config"))
@@ -670,7 +676,7 @@ int conf_write_autoconf(void)
 {
 	struct symbol *sym;
 	const char *str;
-	char *name;
+	const char *name;
 	FILE *out, *out_h;
 	time_t now;
 	int i, l;
@@ -773,9 +779,7 @@ int conf_write_autoconf(void)
 		name = "include/linux/autoconf.h";
 	if (rename(".tmpconfig.h", name))
 		return 1;
-	name = getenv("KCONFIG_AUTOCONFIG");
-	if (!name)
-		name = "include/config/auto.conf";
+	name = conf_get_autoconfig_name();
 	/*
 	 * This must be the last step, kbuild has a dependency on auto.conf
 	 * and this marks the successful completion of the previous steps.
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 4a9af6f..f379b0b 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -74,6 +74,7 @@ char *zconf_curname(void);
 
 /* confdata.c */
 const char *conf_get_configname(void);
+const char *conf_get_autoconfig_name(void);
 char *conf_get_default_confname(void);
 void sym_set_change_count(int count);
 void sym_add_change_count(int count);
diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index 3cc9f93..b6b2a46a 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c
@@ -46,8 +46,8 @@ int file_write_dep(const char *name)
 		else
 			fprintf(out, "\t%s\n", file->name);
 	}
-	fprintf(out, "\ninclude/config/auto.conf: \\\n"
-		     "\t$(deps_config)\n\n");
+	fprintf(out, "\n%s: \\\n"
+		     "\t$(deps_config)\n\n", conf_get_autoconfig_name());
 
 	expr_list_for_each_sym(sym_env_list, e, sym) {
 		struct property *prop;
@@ -61,7 +61,7 @@ int file_write_dep(const char *name)
 		if (!value)
 			value = "";
 		fprintf(out, "ifneq \"$(%s)\" \"%s\"\n", env_sym->name, value);
-		fprintf(out, "include/config/auto.conf: FORCE\n");
+		fprintf(out, "%s: FORCE\n", conf_get_autoconfig_name());
 		fprintf(out, "endif\n");
 	}
 
-- 
cgit v0.10.2


From ad6ccfad6f759a5d657dabe2071a8f2a503fcc84 Mon Sep 17 00:00:00 2001
From: Manish Katiyar <mkatiyar@gmail.com>
Date: Tue, 12 May 2009 13:43:35 -0700
Subject: kernel/kallsyms.c: replace deprecated __initcall with device_initcall
 and fix whitespace

Fix coding style whitespace issues and replace __initcall with
device_initcall.  Fixed multi-line comments as per coding style.

Errors as reported by checkpatch.pl :-
Before:
total: 14 errors, 14 warnings, 487 lines checked
After :
total: 0 errors, 8 warnings, 507 lines checked

Compile tested binary verified as :-
Before:
 text    data     bss     dec     hex filename
 2405       4       0    2409     969 kernel/kallsyms.o
After :
 text     data     bss     dec     hex filename
 2405       4       0    2409     969 kernel/kallsyms.o

Signed-off-by: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 374faf9..3a29dbe 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -30,12 +30,16 @@
 #define all_var 0
 #endif
 
-/* These will be re-linked against their real values during the second link stage */
+/*
+ * These will be re-linked against their real values
+ * during the second link stage.
+ */
 extern const unsigned long kallsyms_addresses[] __attribute__((weak));
 extern const u8 kallsyms_names[] __attribute__((weak));
 
-/* tell the compiler that the count isn't in the small data section if the arch
- * has one (eg: FRV)
+/*
+ * Tell the compiler that the count isn't in the small data section if the arch
+ * has one (eg: FRV).
  */
 extern const unsigned long kallsyms_num_syms
 __attribute__((weak, section(".rodata")));
@@ -75,31 +79,37 @@ static int is_ksym_addr(unsigned long addr)
 	return is_kernel_text(addr) || is_kernel_inittext(addr);
 }
 
-/* expand a compressed symbol data into the resulting uncompressed string,
-   given the offset to where the symbol is in the compressed stream */
+/*
+ * Expand a compressed symbol data into the resulting uncompressed string,
+ * given the offset to where the symbol is in the compressed stream.
+ */
 static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
 {
 	int len, skipped_first = 0;
 	const u8 *tptr, *data;
 
-	/* get the compressed symbol length from the first symbol byte */
+	/* Get the compressed symbol length from the first symbol byte. */
 	data = &kallsyms_names[off];
 	len = *data;
 	data++;
 
-	/* update the offset to return the offset for the next symbol on
-	 * the compressed stream */
+	/*
+	 * Update the offset to return the offset for the next symbol on
+	 * the compressed stream.
+	 */
 	off += len + 1;
 
-	/* for every byte on the compressed symbol data, copy the table
-	   entry for that byte */
-	while(len) {
-		tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
+	/*
+	 * For every byte on the compressed symbol data, copy the table
+	 * entry for that byte.
+	 */
+	while (len) {
+		tptr = &kallsyms_token_table[kallsyms_token_index[*data]];
 		data++;
 		len--;
 
 		while (*tptr) {
-			if(skipped_first) {
+			if (skipped_first) {
 				*result = *tptr;
 				result++;
 			} else
@@ -110,36 +120,46 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
 
 	*result = '\0';
 
-	/* return to offset to the next symbol */
+	/* Return to offset to the next symbol. */
 	return off;
 }
 
-/* get symbol type information. This is encoded as a single char at the
- * begining of the symbol name */
+/*
+ * Get symbol type information. This is encoded as a single char at the
+ * beginning of the symbol name.
+ */
 static char kallsyms_get_symbol_type(unsigned int off)
 {
-	/* get just the first code, look it up in the token table, and return the
-	 * first char from this token */
-	return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
+	/*
+	 * Get just the first code, look it up in the token table,
+	 * and return the first char from this token.
+	 */
+	return kallsyms_token_table[kallsyms_token_index[kallsyms_names[off + 1]]];
 }
 
 
-/* find the offset on the compressed stream given and index in the
- * kallsyms array */
+/*
+ * Find the offset on the compressed stream given and index in the
+ * kallsyms array.
+ */
 static unsigned int get_symbol_offset(unsigned long pos)
 {
 	const u8 *name;
 	int i;
 
-	/* use the closest marker we have. We have markers every 256 positions,
-	 * so that should be close enough */
-	name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
+	/*
+	 * Use the closest marker we have. We have markers every 256 positions,
+	 * so that should be close enough.
+	 */
+	name = &kallsyms_names[kallsyms_markers[pos >> 8]];
 
-	/* sequentially scan all the symbols up to the point we're searching for.
-	 * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
-	 * just need to add the len to the current pointer for every symbol we
-	 * wish to skip */
-	for(i = 0; i < (pos&0xFF); i++)
+	/*
+	 * Sequentially scan all the symbols up to the point we're searching
+	 * for. Every symbol is stored in a [<len>][<len> bytes of data] format,
+	 * so we just need to add the len to the current pointer for every
+	 * symbol we wish to skip.
+	 */
+	for (i = 0; i < (pos & 0xFF); i++)
 		name = name + (*name) + 1;
 
 	return name - kallsyms_names;
@@ -190,7 +210,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	/* This kernel should never had been booted. */
 	BUG_ON(!kallsyms_addresses);
 
-	/* do a binary search on the sorted kallsyms_addresses array */
+	/* Do a binary search on the sorted kallsyms_addresses array. */
 	low = 0;
 	high = kallsyms_num_syms;
 
@@ -203,15 +223,15 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	}
 
 	/*
-	 * search for the first aliased symbol. Aliased
-	 * symbols are symbols with the same address
+	 * Search for the first aliased symbol. Aliased
+	 * symbols are symbols with the same address.
 	 */
 	while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
 		--low;
 
 	symbol_start = kallsyms_addresses[low];
 
-	/* Search for next non-aliased symbol */
+	/* Search for next non-aliased symbol. */
 	for (i = low + 1; i < kallsyms_num_syms; i++) {
 		if (kallsyms_addresses[i] > symbol_start) {
 			symbol_end = kallsyms_addresses[i];
@@ -219,7 +239,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
 		}
 	}
 
-	/* if we found no next symbol, we use the end of the section */
+	/* If we found no next symbol, we use the end of the section. */
 	if (!symbol_end) {
 		if (is_kernel_inittext(addr))
 			symbol_end = (unsigned long)_einittext;
@@ -252,10 +272,10 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
 
 /*
  * Lookup an address
- * - modname is set to NULL if it's in the kernel
- * - we guarantee that the returned name is valid until we reschedule even if
- *   it resides in a module
- * - we also guarantee that modname will be valid until rescheduled
+ * - modname is set to NULL if it's in the kernel.
+ * - We guarantee that the returned name is valid until we reschedule even if.
+ *   It resides in a module.
+ * - We also guarantee that modname will be valid until rescheduled.
  */
 const char *kallsyms_lookup(unsigned long addr,
 			    unsigned long *symbolsize,
@@ -276,7 +296,7 @@ const char *kallsyms_lookup(unsigned long addr,
 		return namebuf;
 	}
 
-	/* see if it's in a module */
+	/* See if it's in a module. */
 	return module_address_lookup(addr, symbolsize, offset, modname,
 				     namebuf);
 }
@@ -294,7 +314,7 @@ int lookup_symbol_name(unsigned long addr, char *symname)
 		kallsyms_expand_symbol(get_symbol_offset(pos), symname);
 		return 0;
 	}
-	/* see if it's in a module */
+	/* See if it's in a module. */
 	return lookup_module_symbol_name(addr, symname);
 }
 
@@ -313,7 +333,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 		modname[0] = '\0';
 		return 0;
 	}
-	/* see if it's in a module */
+	/* See if it's in a module. */
 	return lookup_module_symbol_attrs(addr, size, offset, modname, name);
 }
 
@@ -342,6 +362,7 @@ int sprint_symbol(char *buffer, unsigned long address)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(sprint_symbol);
 
 /* Look up a kernel symbol and print it to the kernel messages. */
 void __print_symbol(const char *fmt, unsigned long address)
@@ -352,13 +373,13 @@ void __print_symbol(const char *fmt, unsigned long address)
 
 	printk(fmt, buffer);
 }
+EXPORT_SYMBOL(__print_symbol);
 
 /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
-struct kallsym_iter
-{
+struct kallsym_iter {
 	loff_t pos;
 	unsigned long value;
-	unsigned int nameoff; /* If iterating in core kernel symbols */
+	unsigned int nameoff; /* If iterating in core kernel symbols. */
 	char type;
 	char name[KSYM_NAME_LEN];
 	char module_name[MODULE_NAME_LEN];
@@ -404,7 +425,7 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos)
 		iter->pos = pos;
 		return get_ksymbol_mod(iter);
 	}
-	
+
 	/* If we're not on the desired position, reset to new position. */
 	if (pos != iter->pos)
 		reset_iter(iter, pos);
@@ -439,23 +460,25 @@ static int s_show(struct seq_file *m, void *p)
 {
 	struct kallsym_iter *iter = m->private;
 
-	/* Some debugging symbols have no name.  Ignore them. */ 
+	/* Some debugging symbols have no name.  Ignore them. */
 	if (!iter->name[0])
 		return 0;
 
 	if (iter->module_name[0]) {
 		char type;
 
-		/* Label it "global" if it is exported,
-		 * "local" if not exported. */
+		/*
+		 * Label it "global" if it is exported,
+		 * "local" if not exported.
+		 */
 		type = iter->exported ? toupper(iter->type) :
 					tolower(iter->type);
 		seq_printf(m, "%0*lx %c %s\t[%s]\n",
-			   (int)(2*sizeof(void*)),
+			   (int)(2 * sizeof(void *)),
 			   iter->value, type, iter->name, iter->module_name);
 	} else
 		seq_printf(m, "%0*lx %c %s\n",
-			   (int)(2*sizeof(void*)),
+			   (int)(2 * sizeof(void *)),
 			   iter->value, iter->type, iter->name);
 	return 0;
 }
@@ -469,9 +492,11 @@ static const struct seq_operations kallsyms_op = {
 
 static int kallsyms_open(struct inode *inode, struct file *file)
 {
-	/* We keep iterator in m->private, since normal case is to
+	/*
+	 * We keep iterator in m->private, since normal case is to
 	 * s_start from where we left off, so we avoid doing
-	 * using get_symbol_offset for every symbol */
+	 * using get_symbol_offset for every symbol.
+	 */
 	struct kallsym_iter *iter;
 	int ret;
 
@@ -500,7 +525,4 @@ static int __init kallsyms_init(void)
 	proc_create("kallsyms", 0444, NULL, &kallsyms_operations);
 	return 0;
 }
-__initcall(kallsyms_init);
-
-EXPORT_SYMBOL(__print_symbol);
-EXPORT_SYMBOL_GPL(sprint_symbol);
+device_initcall(kallsyms_init);
-- 
cgit v0.10.2


From 1581c1cede3b180b77aa08024c6ca406252cc21f Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Tue, 12 May 2009 13:43:36 -0700
Subject: scripts/headers_check.pl: correct RE in header CONFIG leak check

Correct the regular expression in scripts/headers_check.pl to include '_'
as a valid character in the class; otherwise, the check will report a
"leaked" symbol of CONFIG_A_B_C as merely CONFIG_A.

This patch will make no difference whatsoever in the current kernel tree
as the call to the perl routine that does that check is currently
commented out:

                &check_include();
                &check_asm_types();
                &check_sizetypes();
                &check_prototypes();
                # Dropped for now. Too much noise &check_config();

However, I noticed that problem when I was building the yum downloadable
kernel source rpm for fedora 11 (beta), which *does* run that check, and
that's where the problem became obvious.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index 56f90a4..4414c43 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -70,7 +70,7 @@ sub check_prototypes
 
 sub check_config
 {
-	if ($line =~ m/[^a-zA-Z0-9_]+CONFIG_([a-zA-Z0-9]+)[^a-zA-Z0-9]/) {
+	if ($line =~ m/[^a-zA-Z0-9_]+CONFIG_([a-zA-Z0-9_]+)[^a-zA-Z0-9_]/) {
 		printf STDERR "$filename:$lineno: leaks CONFIG_$1 to userspace where it is not valid\n";
 	}
 }
-- 
cgit v0.10.2


From 8b8b76c045beb85c1c2b28e07eff6599a1e95cf1 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sat, 6 Jun 2009 00:18:05 +0200
Subject: kbuild: add hint about __refdata to modpost

As requested by Guennadi Liakhovetski

Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 94e71ef..4522948 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1196,7 +1196,7 @@ static void report_sec_mismatch(const char *modname, enum mismatch mismatch,
 		"The variable %s references\n"
 		"the %s %s%s%s\n"
 		"If the reference is valid then annotate the\n"
-		"variable with __init* (see linux/init.h) "
+		"variable with __init* or __refdata (see linux/init.h) "
 		"or name the variable:\n",
 		fromsym, to, sec2annotation(tosec), tosym, to_p);
 		while (*s)
-- 
cgit v0.10.2


From a9eb522375abc79c2bf23940a067bc0046276236 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 5 Jun 2009 15:02:47 -0700
Subject: menu: fix embedded menu presentation

The STRIP_ASM_SYMS kconfig symbol mucks up the embedded menu because
STRIP_ASM_SYMS is in the middle of the embedded menu items but it does not
depend on EMBEDDED.  Move it to beyond the end of the embedded menu so
that the menu is presented correctly.

Or if STRIP_ASM_SYMS should depend on EMBEDDED, that can also be fixed.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/init/Kconfig b/init/Kconfig
index 7be4d38..4389cee 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -808,14 +808,6 @@ config KALLSYMS_EXTRA_PASS
 	   you wait for kallsyms to be fixed.
 
 
-config STRIP_ASM_SYMS
-	bool "Strip assembler-generated symbols during link"
-	default n
-	help
-	  Strip internal assembler-generated symbols during a link (symbols
-	  that look like '.Lxxx') so they don't pollute the output of
-	  get_wchan() and suchlike.
-
 config HOTPLUG
 	bool "Support for hot-pluggable devices" if EMBEDDED
 	default y
@@ -961,6 +953,14 @@ config SLUB_DEBUG
 	  SLUB sysfs support. /sys/slab will not exist and there will be
 	  no support for cache validation etc.
 
+config STRIP_ASM_SYMS
+	bool "Strip assembler-generated symbols during link"
+	default n
+	help
+	  Strip internal assembler-generated symbols during a link (symbols
+	  that look like '.Lxxx') so they don't pollute the output of
+	  get_wchan() and suchlike.
+
 config COMPAT_BRK
 	bool "Disable heap randomization"
 	default y
-- 
cgit v0.10.2


From bdefe35d40c6bb5859dadee26c2e19bf58540f1d Mon Sep 17 00:00:00 2001
From: dann frazier <dannf@debian.org>
Date: Fri, 5 Jun 2009 15:02:49 -0700
Subject: Remove bashisms from scripts

The '-e' option to echo and brace expansion are not guaranteed to be supported
by a POSIX-compliant /bin/sh (e.g. dash)

Signed-off-by: dann frazier <dannf@debian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh
index cc767b3..debecb5 100644
--- a/scripts/gcc-version.sh
+++ b/scripts/gcc-version.sh
@@ -18,7 +18,7 @@ compiler="$*"
 
 if [ ${#compiler} -eq 0 ]; then
 	echo "Error: No compiler specified."
-	echo -e "Usage:\n\t$0 <gcc-command>"
+	printf "Usage:\n\t$0 <gcc-command>\n"
 	exit 1
 fi
 
diff --git a/scripts/ver_linux b/scripts/ver_linux
index dbb3037..7de36df 100755
--- a/scripts/ver_linux
+++ b/scripts/ver_linux
@@ -65,7 +65,7 @@ sed -n -e '/^.*\/libc-\([^/]*\)\.so$/{s//\1/;p;q}' < /proc/self/maps
 ldd -v > /dev/null 2>&1 && ldd -v || ldd --version |head -n 1 | awk \
 'NR==1{print "Dynamic linker (ldd)  ", $NF}'
 
-ls -l /usr/lib/lib{g,stdc}++.so  2>/dev/null | awk -F. \
+ls -l /usr/lib/libg++.so /usr/lib/libstdc++.so  2>/dev/null | awk -F. \
        '{print "Linux C++ Library      " $4"."$5"."$6}'
 
 ps --version 2>&1 | grep version | awk \
-- 
cgit v0.10.2


From 35763e854f8aa2d579236a4670c32c078d143b3a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 19:37:54 -0400
Subject: ignore *.patch files

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/.gitignore b/.gitignore
index 51bd99d..3433d7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@
 *.elf
 *.bin
 *.gz
+*.patch
 
 #
 # Top-level generic files
-- 
cgit v0.10.2


From 7211b8b9028854eee0d03ee6231440b7fb492521 Mon Sep 17 00:00:00 2001
From: Sergei Poselenov <sposelenov@emcraft.com>
Date: Fri, 5 Jun 2009 16:11:09 +0400
Subject: kbuild: fix "Argument list too long" error for "make headers_check",

I'm trying to install kernel headers to build a cross-toolchain, but got
the following:

make ARCH=arm
INSTALL_HDR_PATH=/work/psl/eldk-builds/arm-2009-04-21/work/var/tmp/crosstool-0.43-3-root/usr/crosstool/gcc-4.2.2-glibc-20070515T2025-eldk/
+arm-linux-gnueabi/arm-linux-gnueabi/
headers_check
...
  CHECK   include/linux/raid (2 files)
  CHECK   include/linux/spi (1 files)
  CHECK   include/linux/sunrpc (1 files)
  CHECK   include/linux/tc_act (6 files)
  CHECK   include/linux/tc_ematch (4 files)
  CHECK   include/linux/usb (8 files)
make[2]: execvp: /bin/sh: Argument list too long
make[2]: ***
[/work/psl/eldk-builds/arm-2009-04-21/work/var/tmp/crosstool-0.43-3-root/usr/crosstool/gcc-4.2.2-glibc-20070515T2025-eldk/arm-linux-gnueab
+i/arm-linux-gnueabi//include/linux/.check]
Error 127
make[1]: *** [linux] Error 2
make: *** [headers_check] Error 2
->

Introduce use of xargs to fix this.

Signed-off-by: Sergei Poselenov <sposelenov@emcraft.com>
Cc: Wolfgang Denk <wd@denx.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 095cfc8..0fcd838 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -54,8 +54,12 @@ quiet_cmd_remove = REMOVE  $(unwanted)
       cmd_remove = rm -f $(unwanted-file)
 
 quiet_cmd_check = CHECK   $(printdir) ($(words $(all-files)) files)
-      cmd_check = $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH) \
-                  $(addprefix $(install)/, $(all-files));           \
+# Headers list can be pretty long, xargs helps to avoid
+# the "Argument list too long" error.
+      cmd_check = for f in $(all-files); do                          \
+                  echo "$(install)/$${f}"; done                      \
+                  | xargs                                            \
+                  $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \
 	          touch $@
 
 PHONY += __headersinst __headerscheck
-- 
cgit v0.10.2


From 67b7ebe091cd92fd69f732da3170720d79c4e632 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 4 Jun 2009 22:12:01 -0400
Subject: kbuild/headers_check: refine extern check

'extern' checking information is not clear, refine it.
Plus, fix a comment.

Signed-off-by: WANG Cong <amwang@redhat.com>
[sam: redid the extern error message]
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
index 4414c43..db1dd7a 100644
--- a/scripts/headers_check.pl
+++ b/scripts/headers_check.pl
@@ -2,7 +2,7 @@
 #
 # headers_check.pl execute a number of trivial consistency checks
 #
-# Usage: headers_check.pl dir [files...]
+# Usage: headers_check.pl dir arch [files...]
 # dir:   dir to look for included files
 # arch:  architecture
 # files: list of files to check
@@ -37,7 +37,7 @@ foreach my $file (@files) {
 		&check_include();
 		&check_asm_types();
 		&check_sizetypes();
-		&check_prototypes();
+		&check_declarations();
 		# Dropped for now. Too much noise &check_config();
 	}
 	close FH;
@@ -61,10 +61,12 @@ sub check_include
 	}
 }
 
-sub check_prototypes
+sub check_declarations
 {
-	if ($line =~ m/^\s*extern\b/) {
-		printf STDERR "$filename:$lineno: extern's make no sense in userspace\n";
+	if ($line =~m/^\s*extern\b/) {
+		printf STDERR "$filename:$lineno: " .
+		              "userspace cannot call function or variable " .
+		              "defined in the kernel\n";
 	}
 }
 
-- 
cgit v0.10.2


From eedc9d83eaab2d35fb9dd1ec25b765dec964e26c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Sat, 6 Jun 2009 22:47:11 +0100
Subject: kbuild: fix headers_exports with boolean expression

When we had code like this in a header unifdef failed to
deduct that the expression was always false - and we had code exported
that was not intended for userspace.

#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
  int a;
#endif

This commit implment support in unidef which allows it to work out if
an #if expression always evaluates true or false for symbols which
are being undefined/always defined.

The patch is slightly more complicated than I'd hoped because unifdef
needs to see lines fully evaluated - doing otherwise causes it to
mark the line as "dirty" and copy it over no matter what.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/unifdef.c b/scripts/unifdef.c
index 05a31a6..30d459f 100644
--- a/scripts/unifdef.c
+++ b/scripts/unifdef.c
@@ -678,8 +678,10 @@ eval_unary(const struct ops *ops, int *valp, const char **cpp)
 	if (*cp == '!') {
 		debug("eval%d !", ops - eval_ops);
 		cp++;
-		if (eval_unary(ops, valp, &cp) == LT_IF)
+		if (eval_unary(ops, valp, &cp) == LT_IF) {
+			*cpp = cp;
 			return (LT_IF);
+		}
 		*valp = !*valp;
 	} else if (*cp == '(') {
 		cp++;
@@ -700,13 +702,16 @@ eval_unary(const struct ops *ops, int *valp, const char **cpp)
 			return (LT_IF);
 		cp = skipcomment(cp);
 		sym = findsym(cp);
-		if (sym < 0)
-			return (LT_IF);
-		*valp = (value[sym] != NULL);
 		cp = skipsym(cp);
 		cp = skipcomment(cp);
 		if (*cp++ != ')')
 			return (LT_IF);
+		if (sym >= 0)
+			*valp = (value[sym] != NULL);
+		else {
+			*cpp = cp;
+			return (LT_IF);
+		}
 		keepthis = false;
 	} else if (!endsym(*cp)) {
 		debug("eval%d symbol", ops - eval_ops);
@@ -741,11 +746,11 @@ eval_table(const struct ops *ops, int *valp, const char **cpp)
 	const struct op *op;
 	const char *cp;
 	int val;
+	Linetype lhs, rhs;
 
 	debug("eval%d", ops - eval_ops);
 	cp = *cpp;
-	if (ops->inner(ops+1, valp, &cp) == LT_IF)
-		return (LT_IF);
+	lhs = ops->inner(ops+1, valp, &cp);
 	for (;;) {
 		cp = skipcomment(cp);
 		for (op = ops->op; op->str != NULL; op++)
@@ -755,14 +760,32 @@ eval_table(const struct ops *ops, int *valp, const char **cpp)
 			break;
 		cp += strlen(op->str);
 		debug("eval%d %s", ops - eval_ops, op->str);
-		if (ops->inner(ops+1, &val, &cp) == LT_IF)
-			return (LT_IF);
-		*valp = op->fn(*valp, val);
+		rhs = ops->inner(ops+1, &val, &cp);
+		if (op->fn == op_and && (lhs == LT_FALSE || rhs == LT_FALSE)) {
+			debug("eval%d: and always false", ops - eval_ops);
+			if (lhs == LT_IF)
+				*valp = val;
+			lhs = LT_FALSE;
+			continue;
+		}
+		if (op->fn == op_or && (lhs == LT_TRUE || rhs == LT_TRUE)) {
+			debug("eval%d: or always true", ops - eval_ops);
+			if (lhs == LT_IF)
+				*valp = val;
+			lhs = LT_TRUE;
+			continue;
+		}
+		if (rhs == LT_IF)
+			lhs = LT_IF;
+		if (lhs != LT_IF)
+			*valp = op->fn(*valp, val);
 	}
 
 	*cpp = cp;
 	debug("eval%d = %d", ops - eval_ops, *valp);
-	return (*valp ? LT_TRUE : LT_FALSE);
+	if (lhs != LT_IF)
+		lhs = (*valp ? LT_TRUE : LT_FALSE);
+	return lhs;
 }
 
 /*
@@ -773,12 +796,15 @@ eval_table(const struct ops *ops, int *valp, const char **cpp)
 static Linetype
 ifeval(const char **cpp)
 {
+	const char *cp = *cpp;
 	int ret;
 	int val;
 
 	debug("eval %s", *cpp);
 	keepthis = killconsts ? false : true;
-	ret = eval_table(eval_ops, &val, cpp);
+	ret = eval_table(eval_ops, &val, &cp);
+	if (ret != LT_IF)
+		*cpp = cp;
 	debug("eval = %d", val);
 	return (keepthis ? LT_IF : ret);
 }
-- 
cgit v0.10.2


From 83d60795157c83389e6aaa0532d5e19afa976a24 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2009 09:45:57 +0100
Subject: drm/i915: Sanity check execbuffer arguments before touching state.

By sending a broken execbuffer (its length was not suitably aligned) I
triggered an operation upon a freed object. The invalid alignment was
discovered after updating the write_domain on the object but before the
object was placed on the active queue. So during the unwind process
following the error, the now freed object attempts to flush its
non-existent, but outstanding, GPU writes causing this use-after-free.

[drm:i915_dispatch_gem_execbuffer] *ERROR* alignment
[drm:i915_gem_execbuffer] *ERROR* dispatch failed -22
WARNING: at lib/kref.c:43 warn_slowpath_null+0x10/0x15()
Modules linked in:
Pid: 4552, comm: lt-csi-drm Not tainted 2.6.30-rc6 #423
Call Trace:
 [<c0119ef3>] warn_slowpath_fmt+0x57/0x6d
 [<c014de24>] ? get_pageblock_migratetype+0x18/0x1e
 [<c014e8fd>] ? free_hot_page+0xa/0xc
 [<c014e915>] ? __free_pages+0x16/0x1f
 [<c0153ebf>] ? shmem_truncate_range+0x63e/0x656
 [<c015fb2f>] ? slob_page_alloc+0x146/0x1c8
 [<c0119f19>] warn_slowpath_null+0x10/0x15
 [<c01f55f2>] kref_get+0x1b/0x21
 [<c02605db>] i915_gem_object_move_to_active+0x1f/0x56
 [<c0261302>] i915_add_request+0x156/0x19a
 [<c026136e>] i915_gem_object_flush_gpu_write_domain+0x28/0x3f
 [<c0261eca>] i915_gem_object_unbind+0x4a/0x124
 [<c0261fd7>] i915_gem_free_object+0x33/0x9b
 [<c0250d6b>] drm_gem_object_free+0x28/0x4a
 [<c0250d43>] ? drm_gem_object_free+0x0/0x4a
 [<c01f55ce>] kref_put+0x38/0x41
 [<c0250cbf>] drm_gem_object_unreference+0x11/0x13
 [<c0250d06>] drm_gem_object_handle_unreference+0x1e/0x21
 [<c0250d13>] drm_gem_object_release_handle+0xa/0xe
 [<c01f3e6b>] idr_for_each+0x5f/0x98
 [<c0250d09>] ? drm_gem_object_release_handle+0x0/0xe
 [<c0250daf>] drm_gem_release+0x22/0x34
 [<c025046f>] drm_release+0x1e8/0x3c4
 [<c0162d25>] __fput+0xaf/0x146
 [<c0162dce>] fput+0x12/0x14
 [<c01605ef>] filp_close+0x48/0x52
 [<c011b182>] put_files_struct+0x57/0x9b
 [<c011b1e4>] exit_files+0x1e/0x20
 [<c011c6b6>] do_exit+0x16d/0x511
 [<c03704ab>] ? __schedule+0x3d4/0x3e5
 [<c0103f0d>] ? handle_irq+0xd/0x69
 [<c011caa7>] do_group_exit+0x4d/0x73
 [<c011cae0>] sys_exit_group+0x13/0x17
 [<c010268c>] sysenter_do_call+0x12/0x2b

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 38e0f83..ac22668 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3050,20 +3050,12 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int nbox = exec->num_cliprects;
 	int i = 0, count;
-	uint32_t	exec_start, exec_len;
+	uint32_t exec_start, exec_len;
 	RING_LOCALS;
 
 	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
 	exec_len = (uint32_t) exec->batch_len;
 
-	if ((exec_start | exec_len) & 0x7) {
-		DRM_ERROR("alignment\n");
-		return -EINVAL;
-	}
-
-	if (!exec_start)
-		return -EINVAL;
-
 	count = nbox ? nbox : 1;
 
 	for (i = 0; i < count; i++) {
@@ -3211,6 +3203,24 @@ err:
 	return ret;
 }
 
+static int
+i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
+			   uint64_t exec_offset)
+{
+	uint32_t exec_start, exec_len;
+
+	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+	exec_len = (uint32_t) exec->batch_len;
+
+	if ((exec_start | exec_len) & 0x7)
+		return -EINVAL;
+
+	if (!exec_start)
+		return -EINVAL;
+
+	return 0;
+}
+
 int
 i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
@@ -3362,6 +3372,14 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
 	batch_obj->pending_write_domain = 0;
 
+	/* Sanity check the batch buffer, prior to moving objects */
+	exec_offset = exec_list[args->buffer_count - 1].offset;
+	ret = i915_gem_check_execbuffer (args, exec_offset);
+	if (ret != 0) {
+		DRM_ERROR("execbuf with invalid offset/length\n");
+		goto err;
+	}
+
 	i915_verify_inactive(dev, __FILE__, __LINE__);
 
 	/* Zero the global flush/invalidate flags. These
@@ -3410,8 +3428,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	}
 #endif
 
-	exec_offset = exec_list[args->buffer_count - 1].offset;
-
 #if WATCH_EXEC
 	i915_gem_dump_object(batch_obj,
 			      args->batch_len,
-- 
cgit v0.10.2


From 5f26a2c7ad6eba97141e8372f3def282f934b169 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2009 09:45:58 +0100
Subject: drm/i915: OR in the COMMAND read domain for the batch buffer.

The batch buffer may be shared with another read buffer, so we should not
ignore any previously set domains, but just or in the command domain (and
check that the buffer is not writable).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ac22668..2d705e8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3369,8 +3369,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
 	/* Set the pending read domains for the batch buffer to COMMAND */
 	batch_obj = object_list[args->buffer_count-1];
-	batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
-	batch_obj->pending_write_domain = 0;
+	if (batch_obj->pending_write_domain) {
+		DRM_ERROR("Attempting to use self-modifying batch buffer\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
 	/* Sanity check the batch buffer, prior to moving objects */
 	exec_offset = exec_list[args->buffer_count - 1].offset;
-- 
cgit v0.10.2


From ef53dae8658cf0e93d380983824a661067948d87 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 7 Jun 2009 20:46:37 +0200
Subject: Improve vmlinux.lds.h support for arch specific linker scripts

To support alingment of the individual architecture specific linker scripts
provide a set of general definitions in vmlinux.lds.h

With these definitions applied the diverse linekr scripts can be reduced
in line count and their readability are improved - IMO.

A sample linker script is included to give the preferred
order of the sections for the architectures that do not
have any special requirments.

These definitions are also a first step towards eventual
support for -ffunction-sections.
The definitions makes it much easier to do a global
renaming of section names - but the main purpose is
to clean up the linker scripts.

Tim Aboot has provided a lot of inputs to improve
the definitions - all faults are mine.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@mit.edu>

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3edb114..fba4223 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -1,4 +1,58 @@
-#include <linux/section-names.h>
+/*
+ * Helper macros to support writing architecture specific
+ * linker scripts.
+ *
+ * A minimal linker scripts has following content:
+ * [This is a sample, architectures may have special requiriements]
+ *
+ * OUTPUT_FORMAT(...)
+ * OUTPUT_ARCH(...)
+ * ENTRY(...)
+ * SECTIONS
+ * {
+ *	. = START;
+ *	__init_begin = .;
+ *	HEAD_SECTION
+ *	INIT_TEXT_SECTION(PAGE_SIZE)
+ *	INIT_DATA_SECTION(...)
+ *	PERCPU(PAGE_SIZE)
+ *	__init_end = .;
+ *
+ *	_stext = .;
+ *	TEXT_SECTION = 0
+ *	_etext = .;
+ *
+ *      _sdata = .;
+ *	RO_DATA_SECTION(PAGE_SIZE)
+ *	RW_DATA_SECTION(...)
+ *	_edata = .;
+ *
+ *	EXCEPTION_TABLE(...)
+ *	NOTES
+ *
+ *	__bss_start = .;
+ *	BSS_SECTION(0, 0)
+ *	__bss_stop = .;
+ *	_end = .;
+ *
+ *	/DISCARD/ : {
+ *		EXIT_TEXT
+ *		EXIT_DATA
+ *		*(.exitcall.exit)
+ *	}
+ *	STABS_DEBUG
+ *	DWARF_DEBUG
+ * }
+ *
+ * [__init_begin, __init_end] is the init section that may be freed after init
+ * [_stext, _etext] is the text section
+ * [_sdata, _edata] is the data section
+ *
+ * Some of the included output section have their own set of constants.
+ * Examples are: [__initramfs_start, __initramfs_end] for initramfs and
+ *               [__nosave_begin, __nosave_end] for the nosave data
+ */
+ #include <linux/section-names.h>
 
 #ifndef LOAD_OFFSET
 #define LOAD_OFFSET 0
@@ -116,7 +170,36 @@
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()
 
-#define RO_DATA(align)							\
+/*
+ * Data section helpers
+ */
+#define NOSAVE_DATA							\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__nosave_begin) = .;				\
+	*(.data.nosave)							\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__nosave_end) = .;
+
+#define PAGE_ALIGNED_DATA(page_align)					\
+	. = ALIGN(page_align);						\
+	*(.data.page_aligned)
+
+#define READ_MOSTLY_DATA(align)						\
+	. = ALIGN(align);						\
+	*(.data.read_mostly)
+
+#define CACHELINE_ALIGNED_DATA(align)					\
+	. = ALIGN(align);						\
+	*(.data.cacheline_aligned)
+
+#define INIT_TASK(align)						\
+	. = ALIGN(align);						\
+	*(.data.init_task)
+
+/*
+ * Read only Data
+ */
+#define RO_DATA_SECTION(align)						\
 	. = ALIGN((align));						\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
@@ -270,9 +353,10 @@
 	}								\
 	. = ALIGN((align));
 
-/* RODATA provided for backward compatibility.
+/* RODATA & RO_DATA provided for backward compatibility.
  * All archs are supposed to use RO_DATA() */
-#define RODATA RO_DATA(4096)
+#define RODATA          RO_DATA_SECTION(4096)
+#define RO_DATA(align)  RO_DATA_SECTION(align)
 
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
@@ -332,6 +416,31 @@
 /* Section used for early init (in .S files) */
 #define HEAD_TEXT  *(HEAD_TEXT_SECTION)
 
+#define HEAD_SECTION							\
+	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {		\
+		HEAD_TEXT						\
+	}
+
+/*
+ * Exception table
+ */
+#define EXCEPTION_TABLE(align)						\
+	. = ALIGN(align);						\
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start___ex_table) = .;			\
+		*(__ex_table)						\
+		VMLINUX_SYMBOL(__stop___ex_table) = .;			\
+	}
+
+/*
+ * Init task
+ */
+#define INIT_TASK_DATA(align)						\
+	. = ALIGN(align);						\
+	.data.init_task : {						\
+		INIT_TASK						\
+	}
+
 /* init and exit section handling */
 #define INIT_DATA							\
 	*(.init.data)							\
@@ -364,9 +473,32 @@
 	CPU_DISCARD(exit.text)						\
 	MEM_DISCARD(exit.text)
 
-		/* DWARF debug sections.
-		Symbols in the DWARF debugging sections are relative to
-		the beginning of the section so we begin them at 0.  */
+/*
+ * bss (Block Started by Symbol) - uninitialized data
+ * zeroed during startup
+ */
+#define SBSS								\
+	.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {				\
+		*(.sbss)						\
+		*(.scommon)						\
+	}
+
+#define BSS(bss_align)							\
+	. = ALIGN(bss_align);						\
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {				\
+		VMLINUX_SYMBOL(__bss_start) = .;			\
+		*(.bss.page_aligned)					\
+		*(.dynbss)						\
+		*(.bss)							\
+		*(COMMON)						\
+		VMLINUX_SYMBOL(__bss_stop) = .;				\
+	}
+
+/*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to
+ * the beginning of the section so we begin them at 0.
+ */
 #define DWARF_DEBUG							\
 		/* DWARF 1 */						\
 		.debug          0 : { *(.debug) }			\
@@ -433,6 +565,12 @@
 		VMLINUX_SYMBOL(__stop_notes) = .;			\
 	}
 
+#define INIT_SETUP(initsetup_align)					\
+		. = ALIGN(initsetup_align);				\
+		VMLINUX_SYMBOL(__setup_start) = .;			\
+		*(.init.setup)						\
+		VMLINUX_SYMBOL(__setup_end) = .;
+
 #define INITCALLS							\
 	*(.initcallearly.init)						\
 	VMLINUX_SYMBOL(__early_initcall_end) = .;			\
@@ -454,6 +592,31 @@
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
 
+#define INIT_CALLS							\
+		VMLINUX_SYMBOL(__initcall_start) = .;			\
+		INITCALLS						\
+		VMLINUX_SYMBOL(__initcall_end) = .;
+
+#define CON_INITCALL							\
+		VMLINUX_SYMBOL(__con_initcall_start) = .;		\
+		*(.con_initcall.init)					\
+		VMLINUX_SYMBOL(__con_initcall_end) = .;
+
+#define SECURITY_INITCALL						\
+		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
+		*(.security_initcall.init)				\
+		VMLINUX_SYMBOL(__security_initcall_end) = .;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#define INIT_RAM_FS							\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__initramfs_start) = .;				\
+	*(.init.ramfs)							\
+	VMLINUX_SYMBOL(__initramfs_end) = .;
+#else
+#define INITRAMFS
+#endif
+
 /**
  * PERCPU_VADDR - define output section for percpu area
  * @vaddr: explicit base address (optional)
@@ -510,3 +673,57 @@
 		*(.data.percpu.shared_aligned)				\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
 	}
+
+
+/*
+ * Definition of the high level *_SECTION macros
+ * They will fit only a subset of the architectures
+ */
+
+
+/*
+ * Writeable data.
+ * All sections are combined in a single .data section.
+ * The sections following CONSTRUCTORS are arranged so their
+ * typical alignment matches.
+ * A cacheline is typical/always less than a PAGE_SIZE so
+ * the sections that has this restriction (or similar)
+ * is located before the ones requiring PAGE_SIZE alignment.
+ * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which
+ * matches the requirment of PAGE_ALIGNED_DATA.
+ *
+/* use 0 as page_align if page_aligned data is not used */
+#define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask)	\
+	. = ALIGN(PAGE_SIZE);						\
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
+		INIT_TASK(inittask)					\
+		CACHELINE_ALIGNED_DATA(cacheline)			\
+		READ_MOSTLY_DATA(cacheline)				\
+		DATA_DATA						\
+		CONSTRUCTORS						\
+		NOSAVE_DATA(nosave)					\
+		PAGE_ALIGNED_DATA(pagealigned)				\
+	}
+
+#define INIT_TEXT_SECTION(inittext_align)				\
+	. = ALIGN(inittext_align);					\
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(_sinittext) = .;				\
+		INIT_TEXT						\
+		VMLINUX_SYMBOL(_einittext) = .;				\
+	}
+
+#define INIT_DATA_SECTION(initsetup_align)				\
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {		\
+		INIT_DATA						\
+		INIT_SETUP(initsetup_align)				\
+		INIT_CALLS						\
+		CON_INITCALL						\
+		SECURITY_INITCALL					\
+		INIT_RAM_FS						\
+	}
+
+#define BSS_SECTION(sbss_align, bss_align)				\
+	SBSS								\
+	BSS(bss_align)							\
+	. = ALIGN(4);							\
-- 
cgit v0.10.2


From b9d97328e27b9272ed2ff2ad18de61aa1bf12af8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 9 Jun 2009 08:50:38 -0700
Subject: kernel-doc: cleanup perl script

Various cleanups of scripts/kernel-doc:
- don't use **/ as an ending kernel-doc block since it's not preferred;
- typos/spellos
- add whitespace around ==, after comma, & around . operator;

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3208a3a..5c58478 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -5,7 +5,7 @@ use strict;
 ## Copyright (c) 1998 Michael Zucchi, All Rights Reserved        ##
 ## Copyright (C) 2000, 1  Tim Waugh <twaugh@redhat.com>          ##
 ## Copyright (C) 2001  Simon Huggins                             ##
-## Copyright (C) 2005-2008  Randy Dunlap                         ##
+## Copyright (C) 2005-2009  Randy Dunlap                         ##
 ## 								 ##
 ## #define enhancements by Armin Kuster <akuster@mvista.com>	 ##
 ## Copyright (c) 2000 MontaVista Software, Inc.			 ##
@@ -85,7 +85,7 @@ use strict;
 #
 # /**
 #  * my_function
-#  **/
+#  */
 #
 # If the Description: header tag is omitted, then there must be a blank line
 # after the last parameter specification.
@@ -105,7 +105,7 @@ use strict;
 #  */
 # etc.
 #
-# Beside functions you can also write documentation for structs, unions,
+# Besides functions you can also write documentation for structs, unions,
 # enums and typedefs. Instead of the function name you must write the name
 # of the declaration;  the struct/union/enum/typedef must always precede
 # the name. Nesting of declarations is not supported.
@@ -223,7 +223,7 @@ sub usage {
 }
 
 # read arguments
-if ($#ARGV==-1) {
+if ($#ARGV == -1) {
     usage();
 }
 
@@ -240,12 +240,12 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
   " " . ((localtime)[5]+1900);
 
 # Essentially these are globals
-# They probably want to be tidied up made more localised or summat.
-# CAVEAT EMPTOR!  Some of the others I localised may not want to be which
+# They probably want to be tidied up, made more localised or something.
+# CAVEAT EMPTOR!  Some of the others I localised may not want to be, which
 # could cause "use of undefined value" or other bugs.
-my ($function, %function_table,%parametertypes,$declaration_purpose);
-my ($type,$declaration_name,$return_type);
-my ($newsection,$newcontents,$prototype,$filelist, $brcount, %source_map);
+my ($function, %function_table, %parametertypes, $declaration_purpose);
+my ($type, $declaration_name, $return_type);
+my ($newsection, $newcontents, $prototype, $filelist, $brcount, %source_map);
 
 if (defined($ENV{'KBUILD_VERBOSE'})) {
 	$verbose = "$ENV{'KBUILD_VERBOSE'}";
@@ -279,10 +279,10 @@ my $doc_special = "\@\%\$\&";
 my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start.
 my $doc_end = '\*/';
 my $doc_com = '\s*\*\s*';
-my $doc_decl = $doc_com.'(\w+)';
-my $doc_sect = $doc_com.'(['.$doc_special.']?[\w\s]+):(.*)';
-my $doc_content = $doc_com.'(.*)';
-my $doc_block = $doc_com.'DOC:\s*(.*)?';
+my $doc_decl = $doc_com . '(\w+)';
+my $doc_sect = $doc_com . '([' . $doc_special . ']?[\w\s]+):(.*)';
+my $doc_content = $doc_com . '(.*)';
+my $doc_block = $doc_com . 'DOC:\s*(.*)?';
 
 my %constants;
 my %parameterdescs;
@@ -485,12 +485,12 @@ sub output_enum_html(%) {
     my %args = %{$_[0]};
     my ($parameter);
     my $count;
-    print "<h2>enum ".$args{'enum'}."</h2>\n";
+    print "<h2>enum " . $args{'enum'} . "</h2>\n";
 
-    print "<b>enum ".$args{'enum'}."</b> {<br>\n";
+    print "<b>enum " . $args{'enum'} . "</b> {<br>\n";
     $count = 0;
     foreach $parameter (@{$args{'parameterlist'}}) {
-	print " <b>".$parameter."</b>";
+	print " <b>" . $parameter . "</b>";
 	if ($count != $#{$args{'parameterlist'}}) {
 	    $count++;
 	    print ",\n";
@@ -502,7 +502,7 @@ sub output_enum_html(%) {
     print "<h3>Constants</h3>\n";
     print "<dl>\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
-	print "<dt><b>".$parameter."</b>\n";
+	print "<dt><b>" . $parameter . "</b>\n";
 	print "<dd>";
 	output_highlight($args{'parameterdescs'}{$parameter});
     }
@@ -516,9 +516,9 @@ sub output_typedef_html(%) {
     my %args = %{$_[0]};
     my ($parameter);
     my $count;
-    print "<h2>typedef ".$args{'typedef'}."</h2>\n";
+    print "<h2>typedef " . $args{'typedef'} . "</h2>\n";
 
-    print "<b>typedef ".$args{'typedef'}."</b>\n";
+    print "<b>typedef " . $args{'typedef'} . "</b>\n";
     output_section_html(@_);
     print "<hr>\n";
 }
@@ -528,8 +528,8 @@ sub output_struct_html(%) {
     my %args = %{$_[0]};
     my ($parameter);
 
-    print "<h2>".$args{'type'}." ".$args{'struct'}. " - " .$args{'purpose'}."</h2>\n";
-    print "<b>".$args{'type'}." ".$args{'struct'}."</b> {<br>\n";
+    print "<h2>" . $args{'type'} . " " . $args{'struct'} . " - " . $args{'purpose'} . "</h2>\n";
+    print "<b>" . $args{'type'} . " " . $args{'struct'} . "</b> {<br>\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	if ($parameter =~ /^#/) {
 		print "$parameter<br>\n";
@@ -561,7 +561,7 @@ sub output_struct_html(%) {
 	$parameter_name =~ s/\[.*//;
 
 	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
-	print "<dt><b>".$parameter."</b>\n";
+	print "<dt><b>" . $parameter . "</b>\n";
 	print "<dd>";
 	output_highlight($args{'parameterdescs'}{$parameter_name});
     }
@@ -576,9 +576,9 @@ sub output_function_html(%) {
     my ($parameter, $section);
     my $count;
 
-    print "<h2>" .$args{'function'}." - ".$args{'purpose'}."</h2>\n";
-    print "<i>".$args{'functiontype'}."</i>\n";
-    print "<b>".$args{'function'}."</b>\n";
+    print "<h2>" . $args{'function'} . " - " . $args{'purpose'} . "</h2>\n";
+    print "<i>" . $args{'functiontype'} . "</i>\n";
+    print "<b>" . $args{'function'} . "</b>\n";
     print "(";
     $count = 0;
     foreach $parameter (@{$args{'parameterlist'}}) {
@@ -587,7 +587,7 @@ sub output_function_html(%) {
 	    # pointer-to-function
 	    print "<i>$1</i><b>$parameter</b>) <i>($2)</i>";
 	} else {
-	    print "<i>".$type."</i> <b>".$parameter."</b>";
+	    print "<i>" . $type . "</i> <b>" . $parameter . "</b>";
 	}
 	if ($count != $#{$args{'parameterlist'}}) {
 	    $count++;
@@ -603,7 +603,7 @@ sub output_function_html(%) {
 	$parameter_name =~ s/\[.*//;
 
 	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
-	print "<dt><b>".$parameter."</b>\n";
+	print "<dt><b>" . $parameter . "</b>\n";
 	print "<dd>";
 	output_highlight($args{'parameterdescs'}{$parameter_name});
     }
@@ -657,7 +657,7 @@ sub output_function_xml(%) {
     my $count;
     my $id;
 
-    $id = "API-".$args{'function'};
+    $id = "API-" . $args{'function'};
     $id =~ s/[^A-Za-z0-9]/-/g;
 
     print "<refentry id=\"$id\">\n";
@@ -667,12 +667,12 @@ sub output_function_xml(%) {
     print " <date>$man_date</date>\n";
     print "</refentryinfo>\n";
     print "<refmeta>\n";
-    print " <refentrytitle><phrase>".$args{'function'}."</phrase></refentrytitle>\n";
+    print " <refentrytitle><phrase>" . $args{'function'} . "</phrase></refentrytitle>\n";
     print " <manvolnum>9</manvolnum>\n";
     print " <refmiscinfo class=\"version\">" . $kernelversion . "</refmiscinfo>\n";
     print "</refmeta>\n";
     print "<refnamediv>\n";
-    print " <refname>".$args{'function'}."</refname>\n";
+    print " <refname>" . $args{'function'} . "</refname>\n";
     print " <refpurpose>\n";
     print "  ";
     output_highlight ($args{'purpose'});
@@ -682,8 +682,8 @@ sub output_function_xml(%) {
     print "<refsynopsisdiv>\n";
     print " <title>Synopsis</title>\n";
     print "  <funcsynopsis><funcprototype>\n";
-    print "   <funcdef>".$args{'functiontype'}." ";
-    print "<function>".$args{'function'}." </function></funcdef>\n";
+    print "   <funcdef>" . $args{'functiontype'} . " ";
+    print "<function>" . $args{'function'} . " </function></funcdef>\n";
 
     $count = 0;
     if ($#{$args{'parameterlist'}} >= 0) {
@@ -694,7 +694,7 @@ sub output_function_xml(%) {
 		print "   <paramdef>$1<parameter>$parameter</parameter>)\n";
 		print "     <funcparams>$2</funcparams></paramdef>\n";
 	    } else {
-		print "   <paramdef>".$type;
+		print "   <paramdef>" . $type;
 		print " <parameter>$parameter</parameter></paramdef>\n";
 	    }
 	}
@@ -734,7 +734,7 @@ sub output_struct_xml(%) {
     my ($parameter, $section);
     my $id;
 
-    $id = "API-struct-".$args{'struct'};
+    $id = "API-struct-" . $args{'struct'};
     $id =~ s/[^A-Za-z0-9]/-/g;
 
     print "<refentry id=\"$id\">\n";
@@ -744,12 +744,12 @@ sub output_struct_xml(%) {
     print " <date>$man_date</date>\n";
     print "</refentryinfo>\n";
     print "<refmeta>\n";
-    print " <refentrytitle><phrase>".$args{'type'}." ".$args{'struct'}."</phrase></refentrytitle>\n";
+    print " <refentrytitle><phrase>" . $args{'type'} . " " . $args{'struct'} . "</phrase></refentrytitle>\n";
     print " <manvolnum>9</manvolnum>\n";
     print " <refmiscinfo class=\"version\">" . $kernelversion . "</refmiscinfo>\n";
     print "</refmeta>\n";
     print "<refnamediv>\n";
-    print " <refname>".$args{'type'}." ".$args{'struct'}."</refname>\n";
+    print " <refname>" . $args{'type'} . " " . $args{'struct'} . "</refname>\n";
     print " <refpurpose>\n";
     print "  ";
     output_highlight ($args{'purpose'});
@@ -759,7 +759,7 @@ sub output_struct_xml(%) {
     print "<refsynopsisdiv>\n";
     print " <title>Synopsis</title>\n";
     print "  <programlisting>\n";
-    print $args{'type'}." ".$args{'struct'}." {\n";
+    print $args{'type'} . " " . $args{'struct'} . " {\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	if ($parameter =~ /^#/) {
 	    print "$parameter\n";
@@ -779,7 +779,7 @@ sub output_struct_xml(%) {
 	    # bitfield
 	    print "  $1 $parameter$2;\n";
 	} else {
-	    print "  ".$type." ".$parameter.";\n";
+	    print "  " . $type . " " . $parameter . ";\n";
 	}
     }
     print "};";
@@ -824,7 +824,7 @@ sub output_enum_xml(%) {
     my $count;
     my $id;
 
-    $id = "API-enum-".$args{'enum'};
+    $id = "API-enum-" . $args{'enum'};
     $id =~ s/[^A-Za-z0-9]/-/g;
 
     print "<refentry id=\"$id\">\n";
@@ -834,12 +834,12 @@ sub output_enum_xml(%) {
     print " <date>$man_date</date>\n";
     print "</refentryinfo>\n";
     print "<refmeta>\n";
-    print " <refentrytitle><phrase>enum ".$args{'enum'}."</phrase></refentrytitle>\n";
+    print " <refentrytitle><phrase>enum " . $args{'enum'} . "</phrase></refentrytitle>\n";
     print " <manvolnum>9</manvolnum>\n";
     print " <refmiscinfo class=\"version\">" . $kernelversion . "</refmiscinfo>\n";
     print "</refmeta>\n";
     print "<refnamediv>\n";
-    print " <refname>enum ".$args{'enum'}."</refname>\n";
+    print " <refname>enum " . $args{'enum'} . "</refname>\n";
     print " <refpurpose>\n";
     print "  ";
     output_highlight ($args{'purpose'});
@@ -849,7 +849,7 @@ sub output_enum_xml(%) {
     print "<refsynopsisdiv>\n";
     print " <title>Synopsis</title>\n";
     print "  <programlisting>\n";
-    print "enum ".$args{'enum'}." {\n";
+    print "enum " . $args{'enum'} . " {\n";
     $count = 0;
     foreach $parameter (@{$args{'parameterlist'}}) {
 	print "  $parameter";
@@ -891,7 +891,7 @@ sub output_typedef_xml(%) {
     my ($parameter, $section);
     my $id;
 
-    $id = "API-typedef-".$args{'typedef'};
+    $id = "API-typedef-" . $args{'typedef'};
     $id =~ s/[^A-Za-z0-9]/-/g;
 
     print "<refentry id=\"$id\">\n";
@@ -901,11 +901,11 @@ sub output_typedef_xml(%) {
     print " <date>$man_date</date>\n";
     print "</refentryinfo>\n";
     print "<refmeta>\n";
-    print " <refentrytitle><phrase>typedef ".$args{'typedef'}."</phrase></refentrytitle>\n";
+    print " <refentrytitle><phrase>typedef " . $args{'typedef'} . "</phrase></refentrytitle>\n";
     print " <manvolnum>9</manvolnum>\n";
     print "</refmeta>\n";
     print "<refnamediv>\n";
-    print " <refname>typedef ".$args{'typedef'}."</refname>\n";
+    print " <refname>typedef " . $args{'typedef'} . "</refname>\n";
     print " <refpurpose>\n";
     print "  ";
     output_highlight ($args{'purpose'});
@@ -914,7 +914,7 @@ sub output_typedef_xml(%) {
 
     print "<refsynopsisdiv>\n";
     print " <title>Synopsis</title>\n";
-    print "  <synopsis>typedef ".$args{'typedef'}.";</synopsis>\n";
+    print "  <synopsis>typedef " . $args{'typedef'} . ";</synopsis>\n";
     print "</refsynopsisdiv>\n";
 
     output_section_xml(@_);
@@ -963,15 +963,15 @@ sub output_function_gnome {
     my $count;
     my $id;
 
-    $id = $args{'module'}."-".$args{'function'};
+    $id = $args{'module'} . "-" . $args{'function'};
     $id =~ s/[^A-Za-z0-9]/-/g;
 
     print "<sect2>\n";
-    print " <title id=\"$id\">".$args{'function'}."</title>\n";
+    print " <title id=\"$id\">" . $args{'function'} . "</title>\n";
 
     print "  <funcsynopsis>\n";
-    print "   <funcdef>".$args{'functiontype'}." ";
-    print "<function>".$args{'function'}." ";
+    print "   <funcdef>" . $args{'functiontype'} . " ";
+    print "<function>" . $args{'function'} . " ";
     print "</function></funcdef>\n";
 
     $count = 0;
@@ -983,7 +983,7 @@ sub output_function_gnome {
 		print "   <paramdef>$1 <parameter>$parameter</parameter>)\n";
 		print "     <funcparams>$2</funcparams></paramdef>\n";
 	    } else {
-		print "   <paramdef>".$type;
+		print "   <paramdef>" . $type;
 		print " <parameter>$parameter</parameter></paramdef>\n";
 	    }
 	}
@@ -1043,13 +1043,13 @@ sub output_function_man(%) {
     print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n";
 
     print ".SH NAME\n";
-    print $args{'function'}." \\- ".$args{'purpose'}."\n";
+    print $args{'function'} . " \\- " . $args{'purpose'} . "\n";
 
     print ".SH SYNOPSIS\n";
     if ($args{'functiontype'} ne "") {
-	print ".B \"".$args{'functiontype'}."\" ".$args{'function'}."\n";
+	print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n";
     } else {
-	print ".B \"".$args{'function'}."\n";
+	print ".B \"" . $args{'function'} . "\n";
     }
     $count = 0;
     my $parenth = "(";
@@ -1061,10 +1061,10 @@ sub output_function_man(%) {
 	$type = $args{'parametertypes'}{$parameter};
 	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
 	    # pointer-to-function
-	    print ".BI \"".$parenth.$1."\" ".$parameter." \") (".$2.")".$post."\"\n";
+	    print ".BI \"" . $parenth . $1 . "\" " . $parameter . " \") (" . $2 . ")" . $post . "\"\n";
 	} else {
 	    $type =~ s/([^\*])$/$1 /;
-	    print ".BI \"".$parenth.$type."\" ".$parameter." \"".$post."\"\n";
+	    print ".BI \"" . $parenth . $type . "\" " . $parameter . " \"" . $post . "\"\n";
 	}
 	$count++;
 	$parenth = "";
@@ -1075,7 +1075,7 @@ sub output_function_man(%) {
 	my $parameter_name = $parameter;
 	$parameter_name =~ s/\[.*//;
 
-	print ".IP \"".$parameter."\" 12\n";
+	print ".IP \"" . $parameter . "\" 12\n";
 	output_highlight($args{'parameterdescs'}{$parameter_name});
     }
     foreach $section (@{$args{'sectionlist'}}) {
@@ -1094,10 +1094,10 @@ sub output_enum_man(%) {
     print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n";
 
     print ".SH NAME\n";
-    print "enum ".$args{'enum'}." \\- ".$args{'purpose'}."\n";
+    print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n";
 
     print ".SH SYNOPSIS\n";
-    print "enum ".$args{'enum'}." {\n";
+    print "enum " . $args{'enum'} . " {\n";
     $count = 0;
     foreach my $parameter (@{$args{'parameterlist'}}) {
 	print ".br\n.BI \"    $parameter\"\n";
@@ -1116,7 +1116,7 @@ sub output_enum_man(%) {
 	my $parameter_name = $parameter;
 	$parameter_name =~ s/\[.*//;
 
-	print ".IP \"".$parameter."\" 12\n";
+	print ".IP \"" . $parameter . "\" 12\n";
 	output_highlight($args{'parameterdescs'}{$parameter_name});
     }
     foreach $section (@{$args{'sectionlist'}}) {
@@ -1131,13 +1131,13 @@ sub output_struct_man(%) {
     my %args = %{$_[0]};
     my ($parameter, $section);
 
-    print ".TH \"$args{'module'}\" 9 \"".$args{'type'}." ".$args{'struct'}."\" \"$man_date\" \"API Manual\" LINUX\n";
+    print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n";
 
     print ".SH NAME\n";
-    print $args{'type'}." ".$args{'struct'}." \\- ".$args{'purpose'}."\n";
+    print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n";
 
     print ".SH SYNOPSIS\n";
-    print $args{'type'}." ".$args{'struct'}." {\n.br\n";
+    print $args{'type'} . " " . $args{'struct'} . " {\n.br\n";
 
     foreach my $parameter (@{$args{'parameterlist'}}) {
 	if ($parameter =~ /^#/) {
@@ -1151,13 +1151,13 @@ sub output_struct_man(%) {
 	$type = $args{'parametertypes'}{$parameter};
 	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
 	    # pointer-to-function
-	    print ".BI \"    ".$1."\" ".$parameter." \") (".$2.")"."\"\n;\n";
+	    print ".BI \"    " . $1 . "\" " . $parameter . " \") (" . $2 . ")" . "\"\n;\n";
 	} elsif ($type =~ m/^(.*?)\s*(:.*)/) {
 	    # bitfield
-	    print ".BI \"    ".$1."\ \" ".$parameter.$2." \""."\"\n;\n";
+	    print ".BI \"    " . $1 . "\ \" " . $parameter . $2 . " \"" . "\"\n;\n";
 	} else {
 	    $type =~ s/([^\*])$/$1 /;
-	    print ".BI \"    ".$type."\" ".$parameter." \""."\"\n;\n";
+	    print ".BI \"    " . $type . "\" " . $parameter . " \"" . "\"\n;\n";
 	}
 	print "\n.br\n";
     }
@@ -1171,7 +1171,7 @@ sub output_struct_man(%) {
 	$parameter_name =~ s/\[.*//;
 
 	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
-	print ".IP \"".$parameter."\" 12\n";
+	print ".IP \"" . $parameter . "\" 12\n";
 	output_highlight($args{'parameterdescs'}{$parameter_name});
     }
     foreach $section (@{$args{'sectionlist'}}) {
@@ -1189,7 +1189,7 @@ sub output_typedef_man(%) {
     print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n";
 
     print ".SH NAME\n";
-    print "typedef ".$args{'typedef'}." \\- ".$args{'purpose'}."\n";
+    print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n";
 
     foreach $section (@{$args{'sectionlist'}}) {
 	print ".SH \"$section\"\n";
@@ -1218,13 +1218,13 @@ sub output_function_text(%) {
     my $start;
 
     print "Name:\n\n";
-    print $args{'function'}." - ".$args{'purpose'}."\n";
+    print $args{'function'} . " - " . $args{'purpose'} . "\n";
 
     print "\nSynopsis:\n\n";
     if ($args{'functiontype'} ne "") {
-	$start = $args{'functiontype'}." ".$args{'function'}." (";
+	$start = $args{'functiontype'} . " " . $args{'function'} . " (";
     } else {
-	$start = $args{'function'}." (";
+	$start = $args{'function'} . " (";
     }
     print $start;
 
@@ -1233,9 +1233,9 @@ sub output_function_text(%) {
 	$type = $args{'parametertypes'}{$parameter};
 	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
 	    # pointer-to-function
-	    print $1.$parameter.") (".$2;
+	    print $1 . $parameter . ") (" . $2;
 	} else {
-	    print $type." ".$parameter;
+	    print $type . " " . $parameter;
 	}
 	if ($count != $#{$args{'parameterlist'}}) {
 	    $count++;
@@ -1251,7 +1251,7 @@ sub output_function_text(%) {
 	my $parameter_name = $parameter;
 	$parameter_name =~ s/\[.*//;
 
-	print $parameter."\n\t".$args{'parameterdescs'}{$parameter_name}."\n";
+	print $parameter . "\n\t" . $args{'parameterdescs'}{$parameter_name} . "\n";
     }
     output_section_text(@_);
 }
@@ -1276,8 +1276,8 @@ sub output_enum_text(%) {
     my $count;
     print "Enum:\n\n";
 
-    print "enum ".$args{'enum'}." - ".$args{'purpose'}."\n\n";
-    print "enum ".$args{'enum'}." {\n";
+    print "enum " . $args{'enum'} . " - " . $args{'purpose'} . "\n\n";
+    print "enum " . $args{'enum'} . " {\n";
     $count = 0;
     foreach $parameter (@{$args{'parameterlist'}}) {
 	print "\t$parameter";
@@ -1292,7 +1292,7 @@ sub output_enum_text(%) {
     print "Constants:\n\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	print "$parameter\n\t";
-	print $args{'parameterdescs'}{$parameter}."\n";
+	print $args{'parameterdescs'}{$parameter} . "\n";
     }
 
     output_section_text(@_);
@@ -1305,7 +1305,7 @@ sub output_typedef_text(%) {
     my $count;
     print "Typedef:\n\n";
 
-    print "typedef ".$args{'typedef'}." - ".$args{'purpose'}."\n";
+    print "typedef " . $args{'typedef'} . " - " . $args{'purpose'} . "\n";
     output_section_text(@_);
 }
 
@@ -1314,8 +1314,8 @@ sub output_struct_text(%) {
     my %args = %{$_[0]};
     my ($parameter);
 
-    print $args{'type'}." ".$args{'struct'}." - ".$args{'purpose'}."\n\n";
-    print $args{'type'}." ".$args{'struct'}." {\n";
+    print $args{'type'} . " " . $args{'struct'} . " - " . $args{'purpose'} . "\n\n";
+    print $args{'type'} . " " . $args{'struct'} . " {\n";
     foreach $parameter (@{$args{'parameterlist'}}) {
 	if ($parameter =~ /^#/) {
 	    print "$parameter\n";
@@ -1334,7 +1334,7 @@ sub output_struct_text(%) {
 	    # bitfield
 	    print "\t$1 $parameter$2;\n";
 	} else {
-	    print "\t".$type." ".$parameter.";\n";
+	    print "\t" . $type . " " . $parameter . ";\n";
 	}
     }
     print "};\n\n";
@@ -1348,7 +1348,7 @@ sub output_struct_text(%) {
 
 	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
 	print "$parameter\n\t";
-	print $args{'parameterdescs'}{$parameter_name}."\n";
+	print $args{'parameterdescs'}{$parameter_name} . "\n";
     }
     print "\n";
     output_section_text(@_);
@@ -1387,7 +1387,7 @@ sub output_declaration {
 # generic output function - calls the right one based on current output mode.
 sub output_blockhead {
     no strict 'refs';
-    my $func = "output_blockhead_".$output_mode;
+    my $func = "output_blockhead_" . $output_mode;
     &$func(@_);
     $section_counter++;
 }
@@ -1398,7 +1398,7 @@ sub output_blockhead {
 sub dump_declaration($$) {
     no strict 'refs';
     my ($prototype, $file) = @_;
-    my $func = "dump_".$decl_type;
+    my $func = "dump_" . $decl_type;
     &$func(@_);
 }
 
@@ -1645,7 +1645,7 @@ sub push_parameter($$$) {
 		    "or member '$param' not " .
 		    "described in '$declaration_name'\n";
 	    }
-	    print STDERR "Warning(${file}:$.):".
+	    print STDERR "Warning(${file}:$.):" .
 			 " No description found for parameter '$param'\n";
 	    ++$warnings;
 	}
@@ -1907,7 +1907,7 @@ sub process_state3_type($$) {
 	    ($2 eq '{') && $brcount++;
 	    ($2 eq '}') && $brcount--;
 	    if (($2 eq ';') && ($brcount == 0)) {
-		dump_declaration($prototype,$file);
+		dump_declaration($prototype, $file);
 		reset_state();
 		last;
 	    }
@@ -2084,7 +2084,7 @@ sub process_file($) {
 		    $section = $section_default;
 		    $contents = "";
 		} else {
-		    $contents .= $1."\n";
+		    $contents .= $1 . "\n";
 		}
 	    } else {
 		# i dont know - bad line?  ignore.
-- 
cgit v0.10.2


From 110bf2b764eb6026b868d84499263cb24b1bcc8d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 9 Jun 2009 17:29:07 -0400
Subject: tracing: add protection around module events unload

When reading the trace buffer, there is a race that when a module
is unloaded it removes events that is stilled referenced in the buffers.
This patch adds the protection around the unloading of the events
from modules and the reading of the trace buffers.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6c81f9c..aa08be6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1050,12 +1050,13 @@ static void trace_module_remove_events(struct module *mod)
 	struct ftrace_event_call *call, *p;
 	bool found = false;
 
+	down_write(&trace_event_mutex);
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
 		if (call->mod == mod) {
 			found = true;
 			ftrace_event_enable_disable(call, 0);
 			if (call->event)
-				unregister_ftrace_event(call->event);
+				__unregister_ftrace_event(call->event);
 			debugfs_remove_recursive(call->dir);
 			list_del(&call->list);
 			trace_destroy_fields(call);
@@ -1079,6 +1080,7 @@ static void trace_module_remove_events(struct module *mod)
 	 */
 	if (found)
 		tracing_reset_current_online_cpus();
+	up_write(&trace_event_mutex);
 }
 
 static int trace_module_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index c05aff4..7938f3a 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -14,7 +14,7 @@
 /* must be a power of 2 */
 #define EVENT_HASHSIZE	128
 
-static DECLARE_RWSEM(trace_event_mutex);
+DECLARE_RWSEM(trace_event_mutex);
 
 DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
 EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
@@ -702,6 +702,16 @@ int register_ftrace_event(struct trace_event *event)
 }
 EXPORT_SYMBOL_GPL(register_ftrace_event);
 
+/*
+ * Used by module code with the trace_event_mutex held for write.
+ */
+int __unregister_ftrace_event(struct trace_event *event)
+{
+	hlist_del(&event->node);
+	list_del(&event->list);
+	return 0;
+}
+
 /**
  * unregister_ftrace_event - remove a no longer used event
  * @event: the event to remove
@@ -709,8 +719,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_event);
 int unregister_ftrace_event(struct trace_event *event)
 {
 	down_write(&trace_event_mutex);
-	hlist_del(&event->node);
-	list_del(&event->list);
+	__unregister_ftrace_event(event);
 	up_write(&trace_event_mutex);
 
 	return 0;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index ac240e7..d38bec4 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -27,6 +27,10 @@ extern struct trace_event *ftrace_find_event(int type);
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags);
 
+/* used by module unregistering */
+extern int __unregister_ftrace_event(struct trace_event *event);
+extern struct rw_semaphore trace_event_mutex;
+
 #define MAX_MEMHEX_BYTES	8
 #define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
-- 
cgit v0.10.2


From 1f803ee5cea67d2387aeedb4b07e645a743729de Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2009 09:45:59 +0100
Subject: drm/i915: Call drm_vblank_post_modeset() on error paths.

Ensure that the drm_vblank_pre_modeset() is always balanced by
drm_vblank_post_modeset() within intel_crtc_mode_set().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c5c4582..a87eeff 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1598,6 +1598,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, &clock);
 	if (!ok) {
 		DRM_ERROR("Couldn't find PLL settings for mode!\n");
+		drm_vblank_post_modeset(dev, pipe);
 		return -EINVAL;
 	}
 
@@ -1858,12 +1859,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 	/* Flush the plane changes */
 	ret = intel_pipe_set_base(crtc, x, y, old_fb);
-	if (ret != 0)
-	    return ret;
-
 	drm_vblank_post_modeset(dev, pipe);
 
-	return 0;
+	return ret;
 }
 
 /** Loads the palette/gamma unit for the CRTC with the prepared values */
-- 
cgit v0.10.2


From b1ce786cb85280490ca3c29a62ddf8608826b414 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2009 09:46:00 +0100
Subject: drm/i915: no need to hold mutex for object lookup

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2d705e8..744bf98 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3701,15 +3701,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_object *obj;
 	struct drm_i915_gem_object *obj_priv;
 
-	mutex_lock(&dev->struct_mutex);
 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 	if (obj == NULL) {
 		DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
 			  args->handle);
-		mutex_unlock(&dev->struct_mutex);
 		return -EBADF;
 	}
 
+	mutex_lock(&dev->struct_mutex);
 	/* Update the active list for the hardware's current position.
 	 * Otherwise this only updates on a delayed timer or when irqs are
 	 * actually unmasked, and our working set ends up being larger than
-- 
cgit v0.10.2


From 21d509e339565c82887733c02465bb7f5866c8f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2009 09:46:02 +0100
Subject: drm/i915: use I915_GEM_GPU_DOMAINS

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 744bf98..cf5dc08 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -989,10 +989,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		return -ENODEV;
 
 	/* Only handle setting domains to types used by the CPU. */
-	if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+	if (write_domain & I915_GEM_GPU_DOMAINS)
 		return -EINVAL;
 
-	if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+	if (read_domains & I915_GEM_GPU_DOMAINS)
 		return -EINVAL;
 
 	/* Having something in the write domain implies it's in the read
@@ -1769,8 +1769,7 @@ i915_gem_flush(struct drm_device *dev,
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
 		drm_agp_chipset_flush(dev);
 
-	if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
-						     I915_GEM_DOMAIN_GTT)) {
+	if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
 		/*
 		 * read/write caches:
 		 *
@@ -2424,8 +2423,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
 	 * a GPU cache
 	 */
-	BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-	BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
+	BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
 
 	return 0;
 }
@@ -3568,8 +3567,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 		atomic_inc(&dev->pin_count);
 		atomic_add(obj->size, &dev->pin_memory);
 		if (!obj_priv->active &&
-		    (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
-					   I915_GEM_DOMAIN_GTT)) == 0 &&
+		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
 		    !list_empty(&obj_priv->list))
 			list_del_init(&obj_priv->list);
 	}
@@ -3596,8 +3594,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)
 	 */
 	if (obj_priv->pin_count == 0) {
 		if (!obj_priv->active &&
-		    (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
-					   I915_GEM_DOMAIN_GTT)) == 0)
+		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
 			list_move_tail(&obj_priv->list,
 				       &dev_priv->mm.inactive_list);
 		atomic_dec(&dev->pin_count);
@@ -3847,9 +3844,8 @@ i915_gem_idle(struct drm_device *dev)
 
 	/* Flush the GPU along with all non-CPU write domains
 	 */
-	i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
-		       ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-	seqno = i915_add_request(dev, NULL, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
 
 	if (seqno == 0) {
 		mutex_unlock(&dev->struct_mutex);
-- 
cgit v0.10.2


From 2939e1f5331455d17a4a704dd6210e1474002545 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 6 Jun 2009 09:46:03 +0100
Subject: drm/i915: NOMEM->NOSPC

To differentiate between encountering an out-of-memory error with running
out of space in the aperture, use ENOSPC for the later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf5dc08..c0ae6bb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2005,7 +2005,7 @@ i915_gem_evict_something(struct drm_device *dev)
 		/* If we didn't do any of the above, there's nothing to be done
 		 * and we just can't fit it in.
 		 */
-		return -ENOMEM;
+		return -ENOSPC;
 	}
 	return ret;
 }
@@ -2020,7 +2020,7 @@ i915_gem_evict_everything(struct drm_device *dev)
 		if (ret != 0)
 			break;
 	}
-	if (ret == -ENOMEM)
+	if (ret == -ENOSPC)
 		return 0;
 	return ret;
 }
@@ -2229,7 +2229,7 @@ try_again:
 		loff_t offset;
 
 		if (avail == 0)
-			return -ENOMEM;
+			return -ENOSPC;
 
 		for (i = dev_priv->fence_reg_start;
 		     i < dev_priv->num_fence_regs; i++) {
@@ -2378,7 +2378,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 		spin_unlock(&dev_priv->mm.active_list_lock);
 		if (lists_empty) {
 			DRM_ERROR("GTT full, but LRU list empty\n");
-			return -ENOMEM;
+			return -ENOSPC;
 		}
 
 		ret = i915_gem_evict_something(dev);
@@ -3349,7 +3349,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 			break;
 
 		/* error other than GTT full, or we've already tried again */
-		if (ret != -ENOMEM || pin_tries >= 1) {
+		if (ret != -ENOSPC || pin_tries >= 1) {
 			if (ret != -ERESTARTSYS)
 				DRM_ERROR("Failed to pin buffers %d\n", ret);
 			goto err;
-- 
cgit v0.10.2


From 03d6069912babc07a3da20e715dd6a5dc8f0f867 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Fri, 5 Jun 2009 18:19:56 -0700
Subject: drm/i915: Hook connector to encoder during load detection (fixes
 tv/vga detect)

With the DRM-driven DPMS code, encoders are considered idle unless a
connector is hooked to them, so mode setting is skipped. This makes load
detection fail as none of the hardware is enabled.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eric Anholt <eric@anholt.net>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a87eeff..b32a51f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2136,6 +2136,7 @@ struct drm_crtc *intel_get_load_detect_pipe(struct intel_output *intel_output,
 	}
 
 	encoder->crtc = crtc;
+	intel_output->base.encoder = encoder;
 	intel_output->load_detect_temp = true;
 
 	intel_crtc = to_intel_crtc(crtc);
@@ -2171,6 +2172,7 @@ void intel_release_load_detect_pipe(struct intel_output *intel_output, int dpms_
 
 	if (intel_output->load_detect_temp) {
 		encoder->crtc = NULL;
+		intel_output->base.encoder = NULL;
 		intel_output->load_detect_temp = false;
 		crtc->enabled = drm_helper_crtc_in_use(crtc);
 		drm_helper_disable_unused_functions(dev);
-- 
cgit v0.10.2


From 14d0f0b063f5363984dd305a792854f9c23e9e97 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:37:04 +0530
Subject: [SCSI] mpt fusion: Fixing 1078 data corruption issue for 36GB memory
 region

The reason for this change is there is a data corruption when four different
physical memory regions in the 36GB to 37GB region are
accessed. This is only affecting 1078.

The solution is we need to use different addressing when filling in
the scatter gather table for the effected memory regions.  So instead
of snooping on all four different memory holes, we treat any physical
addresses in the 36GB address with the same algorithm.

The fix is explained below
1) Ensure that the message frames are NOT located in the trouble
region. There is no remapping available for message frames, they must
be allocated outside the problem region.
2) Ensure that Sense buffers are NOT in the trouble region. There is
no remapping available.
3) Walk through the SGE entries and if any are inside the trouble region
   then they need to be remapped as discussed below.
	1) Set the Local Address bit in the SGE Flags field.
  	MPI_SGE_FLAGS_LOCAL_ADDRESS
  	2) Ensure we are using 64-bit SGEs
  	3) Set MSb (Bit 63) of the 64-bit address, this will indicate buffer
	location is Host Memory.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 5d496a9..a663692 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -998,7 +998,7 @@ mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mpt_add_sge - Place a simple SGE at address pAddr.
+ *	mpt_add_sge - Place a simple 32 bit SGE at address pAddr.
  *	@pAddr: virtual address for SGE
  *	@flagslength: SGE flags and data transfer length
  *	@dma_addr: Physical address
@@ -1006,23 +1006,117 @@ mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
  *	This routine places a MPT request frame back on the MPT adapter's
  *	FreeQ.
  */
-void
-mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr)
+static void
+mpt_add_sge(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
 {
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
+	SGESimple32_t *pSge = (SGESimple32_t *) pAddr;
+	pSge->FlagsLength = cpu_to_le32(flagslength);
+	pSge->Address = cpu_to_le32(dma_addr);
+}
+
+/**
+ *	mpt_add_sge_64bit - Place a simple 64 bit SGE at address pAddr.
+ *	@pAddr: virtual address for SGE
+ *	@flagslength: SGE flags and data transfer length
+ *	@dma_addr: Physical address
+ *
+ *	This routine places a MPT request frame back on the MPT adapter's
+ *	FreeQ.
+ **/
+static void
+mpt_add_sge_64bit(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
+{
+	SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
+	pSge->Address.Low = cpu_to_le32
+			(lower_32_bits((unsigned long)(dma_addr)));
+	pSge->Address.High = cpu_to_le32
+			(upper_32_bits((unsigned long)dma_addr));
+	pSge->FlagsLength = cpu_to_le32
+			((flagslength | MPT_SGE_FLAGS_64_BIT_ADDRESSING));
+}
+
+/**
+ *	mpt_add_sge_64bit_1078 - Place a simple 64 bit SGE at address pAddr
+ *	(1078 workaround).
+ *	@pAddr: virtual address for SGE
+ *	@flagslength: SGE flags and data transfer length
+ *	@dma_addr: Physical address
+ *
+ *	This routine places a MPT request frame back on the MPT adapter's
+ *	FreeQ.
+ **/
+static void
+mpt_add_sge_64bit_1078(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
+{
+	SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
+	u32 tmp;
+
+	pSge->Address.Low = cpu_to_le32
+			(lower_32_bits((unsigned long)(dma_addr)));
+	tmp = (u32)(upper_32_bits((unsigned long)dma_addr));
+
+	/*
+	 * 1078 errata workaround for the 36GB limitation
+	 */
+	if ((((u64)dma_addr + MPI_SGE_LENGTH(flagslength)) >> 32)  == 9) {
+		flagslength |=
+		    MPI_SGE_SET_FLAGS(MPI_SGE_FLAGS_LOCAL_ADDRESS);
+		tmp |= (1<<31);
+		if (mpt_debug_level & MPT_DEBUG_36GB_MEM)
+			printk(KERN_DEBUG "1078 P0M2 addressing for "
+			    "addr = 0x%llx len = %d\n",
+			    (unsigned long long)dma_addr,
+			    MPI_SGE_LENGTH(flagslength));
+	}
+
+	pSge->Address.High = cpu_to_le32(tmp);
+	pSge->FlagsLength = cpu_to_le32(
+		(flagslength | MPT_SGE_FLAGS_64_BIT_ADDRESSING));
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/**
+ *	mpt_add_chain - Place a 32 bit chain SGE at address pAddr.
+ *	@pAddr: virtual address for SGE
+ *	@next: nextChainOffset value (u32's)
+ *	@length: length of next SGL segment
+ *	@dma_addr: Physical address
+ *
+ */
+static void
+mpt_add_chain(void *pAddr, u8 next, u16 length, dma_addr_t dma_addr)
+{
+		SGEChain32_t *pChain = (SGEChain32_t *) pAddr;
+		pChain->Length = cpu_to_le16(length);
+		pChain->Flags = MPI_SGE_FLAGS_CHAIN_ELEMENT;
+		pChain->NextChainOffset = next;
+		pChain->Address = cpu_to_le32(dma_addr);
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/**
+ *	mpt_add_chain_64bit - Place a 64 bit chain SGE at address pAddr.
+ *	@pAddr: virtual address for SGE
+ *	@next: nextChainOffset value (u32's)
+ *	@length: length of next SGL segment
+ *	@dma_addr: Physical address
+ *
+ */
+static void
+mpt_add_chain_64bit(void *pAddr, u8 next, u16 length, dma_addr_t dma_addr)
+{
+		SGEChain64_t *pChain = (SGEChain64_t *) pAddr;
 		u32 tmp = dma_addr & 0xFFFFFFFF;
 
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address.Low = cpu_to_le32(tmp);
-		tmp = (u32) ((u64)dma_addr >> 32);
-		pSge->Address.High = cpu_to_le32(tmp);
+		pChain->Length = cpu_to_le16(length);
+		pChain->Flags = (MPI_SGE_FLAGS_CHAIN_ELEMENT |
+				 MPI_SGE_FLAGS_64_BIT_ADDRESSING);
 
-	} else {
-		SGESimple32_t *pSge = (SGESimple32_t *) pAddr;
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address = cpu_to_le32(dma_addr);
-	}
+		pChain->NextChainOffset = next;
+
+		pChain->Address.Low = cpu_to_le32(tmp);
+		tmp = (u32)(upper_32_bits((unsigned long)dma_addr));
+		pChain->Address.High = cpu_to_le32(tmp);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -1225,7 +1319,7 @@ mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init)
 	}
 	flags_length = flags_length << MPI_SGE_FLAGS_SHIFT;
 	flags_length |= ioc->HostPageBuffer_sz;
-	mpt_add_sge(psge, flags_length, ioc->HostPageBuffer_dma);
+	ioc->add_sge(psge, flags_length, ioc->HostPageBuffer_dma);
 	ioc->facts.HostPageBufferSGE = ioc_init->HostPageBufferSGE;
 
 return 0;
@@ -1534,21 +1628,42 @@ mpt_mapresources(MPT_ADAPTER *ioc)
 
 	pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
-	    && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
-		    ": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
-		    ioc->name));
-	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
-	    && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
-		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
-		    ": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
-		    ioc->name));
+	if (sizeof(dma_addr_t) > 4) {
+		const uint64_t required_mask = dma_get_required_mask
+		    (&pdev->dev);
+		if (required_mask > DMA_BIT_MASK(32)
+			&& !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
+			&& !pci_set_consistent_dma_mask(pdev,
+						 DMA_BIT_MASK(64))) {
+			ioc->dma_mask = DMA_BIT_MASK(64);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+				": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
+				ioc->name));
+		} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
+			&& !pci_set_consistent_dma_mask(pdev,
+						DMA_BIT_MASK(32))) {
+			ioc->dma_mask = DMA_BIT_MASK(32);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+				": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
+				ioc->name));
+		} else {
+			printk(MYIOC_s_WARN_FMT "no suitable DMA mask for %s\n",
+			    ioc->name, pci_name(pdev));
+			return r;
+		}
 	} else {
-		printk(MYIOC_s_WARN_FMT "no suitable DMA mask for %s\n",
-		    ioc->name, pci_name(pdev));
-		pci_release_selected_regions(pdev, ioc->bars);
-		return r;
+		if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
+			&& !pci_set_consistent_dma_mask(pdev,
+						DMA_BIT_MASK(32))) {
+			ioc->dma_mask = DMA_BIT_MASK(32);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+				": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n",
+				ioc->name));
+		} else {
+			printk(MYIOC_s_WARN_FMT "no suitable DMA mask for %s\n",
+			    ioc->name, pci_name(pdev));
+			return r;
+		}
 	}
 
 	mem_phys = msize = 0;
@@ -1650,6 +1765,23 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 		return r;
 	}
 
+	/*
+	 * Setting up proper handlers for scatter gather handling
+	 */
+	if (ioc->dma_mask == DMA_BIT_MASK(64)) {
+		if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1078)
+			ioc->add_sge = &mpt_add_sge_64bit_1078;
+		else
+			ioc->add_sge = &mpt_add_sge_64bit;
+		ioc->add_chain = &mpt_add_chain_64bit;
+		ioc->sg_addr_size = 8;
+	} else {
+		ioc->add_sge = &mpt_add_sge;
+		ioc->add_chain = &mpt_add_chain;
+		ioc->sg_addr_size = 4;
+	}
+	ioc->SGE_size = sizeof(u32) + ioc->sg_addr_size;
+
 	ioc->alloc_total = sizeof(MPT_ADAPTER);
 	ioc->req_sz = MPT_DEFAULT_FRAME_SIZE;		/* avoid div by zero! */
 	ioc->reply_sz = MPT_REPLY_FRAME_SIZE;
@@ -1994,6 +2126,21 @@ mpt_resume(struct pci_dev *pdev)
 	if (err)
 		return err;
 
+	if (ioc->dma_mask == DMA_BIT_MASK(64)) {
+		if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1078)
+			ioc->add_sge = &mpt_add_sge_64bit_1078;
+		else
+			ioc->add_sge = &mpt_add_sge_64bit;
+		ioc->add_chain = &mpt_add_chain_64bit;
+		ioc->sg_addr_size = 8;
+	} else {
+
+		ioc->add_sge = &mpt_add_sge;
+		ioc->add_chain = &mpt_add_chain;
+		ioc->sg_addr_size = 4;
+	}
+	ioc->SGE_size = sizeof(u32) + ioc->sg_addr_size;
+
 	printk(MYIOC_s_INFO_FMT "pci-resume: ioc-state=0x%x,doorbell=0x%x\n",
 	    ioc->name, (mpt_GetIocState(ioc, 1) >> MPI_IOC_STATE_SHIFT),
 	    CHIPREG_READ32(&ioc->chip->Doorbell));
@@ -3325,11 +3472,10 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag)
 	FWUpload_t		*prequest;
 	FWUploadReply_t		*preply;
 	FWUploadTCSGE_t		*ptcsge;
-	int			 sgeoffset;
 	u32			 flagsLength;
 	int			 ii, sz, reply_sz;
 	int			 cmdStatus;
-
+	int			request_size;
 	/* If the image size is 0, we are done.
 	 */
 	if ((sz = ioc->facts.FWImageSize) == 0)
@@ -3364,18 +3510,17 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag)
 	ptcsge->ImageSize = cpu_to_le32(sz);
 	ptcsge++;
 
-	sgeoffset = sizeof(FWUpload_t) - sizeof(SGE_MPI_UNION) + sizeof(FWUploadTCSGE_t);
-
 	flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ | sz;
-	mpt_add_sge((char *)ptcsge, flagsLength, ioc->cached_fw_dma);
-
-	sgeoffset += sizeof(u32) + sizeof(dma_addr_t);
-	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": Sending FW Upload (req @ %p) sgeoffset=%d \n",
-	    ioc->name, prequest, sgeoffset));
+	ioc->add_sge((char *)ptcsge, flagsLength, ioc->cached_fw_dma);
+	request_size = offsetof(FWUpload_t, SGL) + sizeof(FWUploadTCSGE_t) +
+	    ioc->SGE_size;
+	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending FW Upload "
+	    " (req @ %p) fw_size=%d mf_request_size=%d\n", ioc->name, prequest,
+	    ioc->facts.FWImageSize, request_size));
 	DBG_DUMP_FW_REQUEST_FRAME(ioc, (u32 *)prequest);
 
-	ii = mpt_handshake_req_reply_wait(ioc, sgeoffset, (u32*)prequest,
-				reply_sz, (u16*)preply, 65 /*seconds*/, sleepFlag);
+	ii = mpt_handshake_req_reply_wait(ioc, request_size, (u32 *)prequest,
+	    reply_sz, (u16 *)preply, 65 /*seconds*/, sleepFlag);
 
 	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": FW Upload completed rc=%x \n", ioc->name, ii));
 
@@ -4090,18 +4235,18 @@ initChainBuffers(MPT_ADAPTER *ioc)
 	 * num_sge = num sge in request frame + last chain buffer
 	 * scale = num sge per chain buffer if no chain element
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64))
-		num_sge =  scale + (ioc->req_sz - 60) / (sizeof(dma_addr_t) + sizeof(u32));
+	scale = ioc->req_sz / ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64))
+		num_sge =  scale + (ioc->req_sz - 60) / ioc->SGE_size;
 	else
-		num_sge =  1+ scale + (ioc->req_sz - 64) / (sizeof(dma_addr_t) + sizeof(u32));
+		num_sge =  1 + scale + (ioc->req_sz - 64) / ioc->SGE_size;
 
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) * (ioc->facts.MaxChainDepth-1) + scale +
-			(ioc->req_sz - 60) / (sizeof(dma_addr_t) + sizeof(u32));
+			(ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
-		numSGE = 1 + (scale - 1) * (ioc->facts.MaxChainDepth-1) + scale +
-			(ioc->req_sz - 64) / (sizeof(dma_addr_t) + sizeof(u32));
+		numSGE = 1 + (scale - 1) * (ioc->facts.MaxChainDepth-1) +
+		    scale + (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "num_sge=%d numSGE=%d\n",
 		ioc->name, num_sge, numSGE));
@@ -4161,12 +4306,42 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
 	dma_addr_t alloc_dma;
 	u8 *mem;
 	int i, reply_sz, sz, total_size, num_chain;
+	u64	dma_mask;
+
+	dma_mask = 0;
 
 	/*  Prime reply FIFO...  */
 
 	if (ioc->reply_frames == NULL) {
 		if ( (num_chain = initChainBuffers(ioc)) < 0)
 			return -1;
+		/*
+		 * 1078 errata workaround for the 36GB limitation
+		 */
+		if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078 &&
+		    ioc->dma_mask > DMA_35BIT_MASK) {
+			if (!pci_set_dma_mask(ioc->pcidev, DMA_BIT_MASK(32))
+			    && !pci_set_consistent_dma_mask(ioc->pcidev,
+			    DMA_BIT_MASK(32))) {
+				dma_mask = DMA_35BIT_MASK;
+				d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				    "setting 35 bit addressing for "
+				    "Request/Reply/Chain and Sense Buffers\n",
+				    ioc->name));
+			} else {
+				/*Reseting DMA mask to 64 bit*/
+				pci_set_dma_mask(ioc->pcidev,
+					DMA_BIT_MASK(64));
+				pci_set_consistent_dma_mask(ioc->pcidev,
+					DMA_BIT_MASK(64));
+
+				printk(MYIOC_s_ERR_FMT
+				    "failed setting 35 bit addressing for "
+				    "Request/Reply/Chain and Sense Buffers\n",
+				    ioc->name);
+				return -1;
+			}
+		}
 
 		total_size = reply_sz = (ioc->reply_sz * ioc->reply_depth);
 		dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "ReplyBuffer sz=%d bytes, ReplyDepth=%d\n",
@@ -4305,6 +4480,12 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
 		alloc_dma += ioc->reply_sz;
 	}
 
+	if (dma_mask == DMA_35BIT_MASK && !pci_set_dma_mask(ioc->pcidev,
+	    ioc->dma_mask) && !pci_set_consistent_dma_mask(ioc->pcidev,
+	    ioc->dma_mask))
+		d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "restoring 64 bit addressing\n", ioc->name));
+
 	return 0;
 
 out_fail:
@@ -4324,6 +4505,13 @@ out_fail:
 				ioc->sense_buf_pool, ioc->sense_buf_pool_dma);
 		ioc->sense_buf_pool = NULL;
 	}
+
+	if (dma_mask == DMA_35BIT_MASK && !pci_set_dma_mask(ioc->pcidev,
+	    DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(ioc->pcidev,
+	    DMA_BIT_MASK(64)))
+		d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "restoring 64 bit addressing\n", ioc->name));
+
 	return -1;
 }
 
@@ -5926,7 +6114,7 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 			ioc->name, pReq->Header.PageType, pReq->Header.PageNumber, pReq->Action));
 	}
 
-	mpt_add_sge((char *)&pReq->PageBufferSGE, flagsLength, pCfg->physAddr);
+	ioc->add_sge((char *)&pReq->PageBufferSGE, flagsLength, pCfg->physAddr);
 
 	/* Append pCfg pointer to end of mf
 	 */
@@ -7613,7 +7801,6 @@ EXPORT_SYMBOL(mpt_get_msg_frame);
 EXPORT_SYMBOL(mpt_put_msg_frame);
 EXPORT_SYMBOL(mpt_put_msg_frame_hi_pri);
 EXPORT_SYMBOL(mpt_free_msg_frame);
-EXPORT_SYMBOL(mpt_add_sge);
 EXPORT_SYMBOL(mpt_send_handshake_request);
 EXPORT_SYMBOL(mpt_verify_adapter);
 EXPORT_SYMBOL(mpt_GetIocState);
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index b3e981d..4a60676 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
 #define COPYRIGHT	"Copyright (c) 1999-2008 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.04.07"
-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.07"
+#define MPT_LINUX_VERSION_COMMON	"3.04.08"
+#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.08"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
 #define show_mptmod_ver(s,ver)  \
@@ -134,6 +134,7 @@
 
 #define MPT_COALESCING_TIMEOUT		0x10
 
+
 /*
  * SCSI transfer rate defines.
  */
@@ -564,6 +565,10 @@ struct mptfc_rport_info
 	u8		flags;
 };
 
+typedef void (*MPT_ADD_SGE)(void *pAddr, u32 flagslength, dma_addr_t dma_addr);
+typedef void (*MPT_ADD_CHAIN)(void *pAddr, u8 next, u16 length,
+		dma_addr_t dma_addr);
+
 /*
  *  Adapter Structure - pci_dev specific. Maximum: MPT_MAX_ADAPTERS
  */
@@ -600,6 +605,10 @@ typedef struct _MPT_ADAPTER
 	int			 reply_depth;	/* Num Allocated reply frames */
 	int			 reply_sz;	/* Reply frame size */
 	int			 num_chain;	/* Number of chain buffers */
+	MPT_ADD_SGE              add_sge;       /* Pointer to add_sge
+						   function */
+	MPT_ADD_CHAIN		 add_chain;	/* Pointer to add_chain
+						   function */
 		/* Pool of buffers for chaining. ReqToChain
 		 * and ChainToChain track index of chain buffers.
 		 * ChainBuffer (DMA) virt/phys addresses.
@@ -711,12 +720,15 @@ typedef struct _MPT_ADAPTER
 	struct workqueue_struct *fc_rescan_work_q;
 	struct scsi_cmnd	**ScsiLookup;
 	spinlock_t		  scsi_lookup_lock;
-
+	u64			dma_mask;
 	char			 reset_work_q_name[20];
 	struct workqueue_struct *reset_work_q;
 	struct delayed_work	 fault_reset_work;
 	spinlock_t		 fault_reset_work_lock;
 
+	u8			sg_addr_size;
+	u8			SGE_size;
+
 } MPT_ADAPTER;
 
 /*
@@ -753,13 +765,14 @@ typedef struct _mpt_sge {
 	dma_addr_t	Address;
 } MptSge_t;
 
-#define mpt_addr_size() \
-	((sizeof(dma_addr_t) == sizeof(u64)) ? MPI_SGE_FLAGS_64_BIT_ADDRESSING : \
-		MPI_SGE_FLAGS_32_BIT_ADDRESSING)
 
-#define mpt_msg_flags() \
-	((sizeof(dma_addr_t) == sizeof(u64)) ? MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_64 : \
-		MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_32)
+#define mpt_msg_flags(ioc) \
+	(ioc->sg_addr_size == sizeof(u64)) ?		\
+	MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_64 : 		\
+	MPI_SCSIIO_MSGFLGS_SENSE_WIDTH_32
+
+#define MPT_SGE_FLAGS_64_BIT_ADDRESSING \
+	(MPI_SGE_FLAGS_64_BIT_ADDRESSING << MPI_SGE_FLAGS_SHIFT)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
@@ -909,7 +922,6 @@ extern MPT_FRAME_HDR	*mpt_get_msg_frame(u8 cb_idx, MPT_ADAPTER *ioc);
 extern void	 mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_put_msg_frame(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_put_msg_frame_hi_pri(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
-extern void	 mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr);
 
 extern int	 mpt_send_handshake_request(u8 cb_idx, MPT_ADAPTER *ioc, int reqBytes, u32 *req, int sleepFlag);
 extern int	 mpt_verify_adapter(int iocid, MPT_ADAPTER **iocpp);
@@ -959,7 +971,6 @@ extern int mpt_fwfault_debug;
 #define MPT_SGE_FLAGS_END_OF_BUFFER		(0x40000000)
 #define MPT_SGE_FLAGS_LOCAL_ADDRESS		(0x08000000)
 #define MPT_SGE_FLAGS_DIRECTION			(0x04000000)
-#define MPT_SGE_FLAGS_ADDRESSING		(mpt_addr_size() << MPI_SGE_FLAGS_SHIFT)
 #define MPT_SGE_FLAGS_END_OF_LIST		(0x01000000)
 
 #define MPT_SGE_FLAGS_TRANSACTION_ELEMENT	(0x00000000)
@@ -972,14 +983,12 @@ extern int mpt_fwfault_debug;
 	 MPT_SGE_FLAGS_END_OF_BUFFER |	\
 	 MPT_SGE_FLAGS_END_OF_LIST |	\
 	 MPT_SGE_FLAGS_SIMPLE_ELEMENT |	\
-	 MPT_SGE_FLAGS_ADDRESSING | \
 	 MPT_TRANSFER_IOC_TO_HOST)
 #define MPT_SGE_FLAGS_SSIMPLE_WRITE \
 	(MPT_SGE_FLAGS_LAST_ELEMENT |	\
 	 MPT_SGE_FLAGS_END_OF_BUFFER |	\
 	 MPT_SGE_FLAGS_END_OF_LIST |	\
 	 MPT_SGE_FLAGS_SIMPLE_ELEMENT |	\
-	 MPT_SGE_FLAGS_ADDRESSING | \
 	 MPT_TRANSFER_HOST_TO_IOC)
 
 /*}-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index c638171..bece386 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -841,8 +841,9 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 	 *	96		8
 	 *	64		4
 	 */
-	maxfrags = (iocp->req_sz - sizeof(MPIHeader_t) - sizeof(FWDownloadTCSGE_t))
-			/ (sizeof(dma_addr_t) + sizeof(u32));
+	maxfrags = (iocp->req_sz - sizeof(MPIHeader_t) -
+			sizeof(FWDownloadTCSGE_t))
+			/ iocp->SGE_size;
 	if (numfrags > maxfrags) {
 		ret = -EMLINK;
 		goto fwdl_out;
@@ -870,7 +871,7 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 		if (nib == 0 || nib == 3) {
 			;
 		} else if (sgIn->Address) {
-			mpt_add_sge(sgOut, sgIn->FlagsLength, sgIn->Address);
+			iocp->add_sge(sgOut, sgIn->FlagsLength, sgIn->Address);
 			n++;
 			if (copy_from_user(bl->kptr, ufwbuf+fw_bytes_copied, bl->len)) {
 				printk(MYIOC_s_ERR_FMT "%s@%d::_ioctl_fwdl - "
@@ -882,7 +883,7 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 		}
 		sgIn++;
 		bl++;
-		sgOut += (sizeof(dma_addr_t) + sizeof(u32));
+		sgOut += iocp->SGE_size;
 	}
 
 	DBG_DUMP_FW_DOWNLOAD(iocp, (u32 *)mf, numfrags);
@@ -1003,7 +1004,7 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
 	 *
 	 */
 	sgl = sglbuf;
-	sg_spill = ((ioc->req_sz - sge_offset)/(sizeof(dma_addr_t) + sizeof(u32))) - 1;
+	sg_spill = ((ioc->req_sz - sge_offset)/ioc->SGE_size) - 1;
 	while (bytes_allocd < bytes) {
 		this_alloc = min(alloc_sz, bytes-bytes_allocd);
 		buflist[buflist_ent].len = this_alloc;
@@ -1024,8 +1025,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags,
 			dma_addr_t dma_addr;
 
 			bytes_allocd += this_alloc;
-			sgl->FlagsLength = (0x10000000|MPT_SGE_FLAGS_ADDRESSING|sgdir|this_alloc);
-			dma_addr = pci_map_single(ioc->pcidev, buflist[buflist_ent].kptr, this_alloc, dir);
+			sgl->FlagsLength = (0x10000000|sgdir|this_alloc);
+			dma_addr = pci_map_single(ioc->pcidev,
+				buflist[buflist_ent].kptr, this_alloc, dir);
 			sgl->Address = dma_addr;
 
 			fragcnt++;
@@ -1799,9 +1801,9 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 	 */
 	sz = karg.dataSgeOffset * 4;
 	if (karg.dataInSize > 0)
-		sz += sizeof(dma_addr_t) + sizeof(u32);
+		sz += ioc->SGE_size;
 	if (karg.dataOutSize > 0)
-		sz += sizeof(dma_addr_t) + sizeof(u32);
+		sz += ioc->SGE_size;
 
 	if (sz > ioc->req_sz) {
 		printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
@@ -1893,7 +1895,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 			}
 
 			pScsiReq->MsgFlags &= ~MPI_SCSIIO_MSGFLGS_SENSE_WIDTH;
-			pScsiReq->MsgFlags |= mpt_msg_flags();
+			pScsiReq->MsgFlags |= mpt_msg_flags(ioc);
 
 
 			/* verify that app has not requested
@@ -1979,7 +1981,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 			int dataSize;
 
 			pScsiReq->MsgFlags &= ~MPI_SCSIIO_MSGFLGS_SENSE_WIDTH;
-			pScsiReq->MsgFlags |= mpt_msg_flags();
+			pScsiReq->MsgFlags |= mpt_msg_flags(ioc);
 
 
 			/* verify that app has not requested
@@ -2123,8 +2125,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 			if (karg.dataInSize > 0) {
 				flagsLength = ( MPI_SGE_FLAGS_SIMPLE_ELEMENT |
 						MPI_SGE_FLAGS_END_OF_BUFFER |
-						MPI_SGE_FLAGS_DIRECTION |
-						mpt_addr_size() )
+						MPI_SGE_FLAGS_DIRECTION)
 						<< MPI_SGE_FLAGS_SHIFT;
 			} else {
 				flagsLength = MPT_SGE_FLAGS_SSIMPLE_WRITE;
@@ -2141,8 +2142,8 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 				/* Set up this SGE.
 				 * Copy to MF and to sglbuf
 				 */
-				mpt_add_sge(psge, flagsLength, dma_addr_out);
-				psge += (sizeof(u32) + sizeof(dma_addr_t));
+				ioc->add_sge(psge, flagsLength, dma_addr_out);
+				psge += ioc->SGE_size;
 
 				/* Copy user data to kernel space.
 				 */
@@ -2175,13 +2176,13 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 				/* Set up this SGE
 				 * Copy to MF and to sglbuf
 				 */
-				mpt_add_sge(psge, flagsLength, dma_addr_in);
+				ioc->add_sge(psge, flagsLength, dma_addr_in);
 			}
 		}
 	} else  {
 		/* Add a NULL SGE
 		 */
-		mpt_add_sge(psge, flagsLength, (dma_addr_t) -1);
+		ioc->add_sge(psge, flagsLength, (dma_addr_t) -1);
 	}
 
 	ioc->ioctl->wait_done = 0;
@@ -2498,7 +2499,7 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 	pbuf = pci_alloc_consistent(ioc->pcidev, 4, &buf_dma);
 	if (!pbuf)
 		goto out;
-	mpt_add_sge((char *)&IstwiRWRequest->SGL,
+	ioc->add_sge((char *)&IstwiRWRequest->SGL,
 	    (MPT_SGE_FLAGS_SSIMPLE_READ|4), buf_dma);
 
 	ioc->ioctl->wait_done = 0;
diff --git a/drivers/message/fusion/mptdebug.h b/drivers/message/fusion/mptdebug.h
index 510b9f4..28e4788 100644
--- a/drivers/message/fusion/mptdebug.h
+++ b/drivers/message/fusion/mptdebug.h
@@ -58,6 +58,7 @@
 #define MPT_DEBUG_FC			0x00080000
 #define MPT_DEBUG_SAS			0x00100000
 #define MPT_DEBUG_SAS_WIDE		0x00200000
+#define MPT_DEBUG_36GB_MEM              0x00400000
 
 /*
  * CONFIG_FUSION_LOGGING - enabled in Kconfig
@@ -135,6 +136,8 @@
 #define dsaswideprintk(IOC, CMD)		\
 	MPT_CHECK_LOGGING(IOC, CMD, MPT_DEBUG_SAS_WIDE)
 
+#define d36memprintk(IOC, CMD)		\
+	MPT_CHECK_LOGGING(IOC, CMD, MPT_DEBUG_36GB_MEM)
 
 
 /*
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index c3c24fd..da16b47 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -1251,17 +1251,15 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * A slightly different algorithm is required for
 	 * 64bit SGEs.
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	scale = ioc->req_sz/ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
 		numSGE = 1 + (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 
 	if (numSGE < sh->sg_tablesize) {
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index a9019f0..b162f7a 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1319,15 +1319,15 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	/* request */
 	flagsLength = (MPI_SGE_FLAGS_SIMPLE_ELEMENT |
 		       MPI_SGE_FLAGS_END_OF_BUFFER |
-		       MPI_SGE_FLAGS_DIRECTION |
-		       mpt_addr_size()) << MPI_SGE_FLAGS_SHIFT;
+		       MPI_SGE_FLAGS_DIRECTION)
+		       << MPI_SGE_FLAGS_SHIFT;
 	flagsLength |= (req->data_len - 4);
 
 	dma_addr_out = pci_map_single(ioc->pcidev, bio_data(req->bio),
 				      req->data_len, PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_out)
 		goto put_mf;
-	mpt_add_sge(psge, flagsLength, dma_addr_out);
+	ioc->add_sge(psge, flagsLength, dma_addr_out);
 	psge += (sizeof(u32) + sizeof(dma_addr_t));
 
 	/* response */
@@ -1337,7 +1337,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 				      rsp->data_len, PCI_DMA_BIDIRECTIONAL);
 	if (!dma_addr_in)
 		goto unmap;
-	mpt_add_sge(psge, flagsLength, dma_addr_in);
+	ioc->add_sge(psge, flagsLength, dma_addr_in);
 
 	mpt_put_msg_frame(mptsasMgmtCtx, ioc, mf);
 
@@ -3211,17 +3211,15 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * A slightly different algorithm is required for
 	 * 64bit SGEs.
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	scale = ioc->req_sz/ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
 		numSGE = 1 + (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 
 	if (numSGE < sh->sg_tablesize) {
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index e62c6bc..8c08c73 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -113,69 +113,6 @@ int 		mptscsih_resume(struct pci_dev *pdev);
 
 #define SNS_LEN(scp)	SCSI_SENSE_BUFFERSIZE
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_add_sge - Place a simple SGE at address pAddr.
- *	@pAddr: virtual address for SGE
- *	@flagslength: SGE flags and data transfer length
- *	@dma_addr: Physical address
- *
- *	This routine places a MPT request frame back on the MPT adapter's
- *	FreeQ.
- */
-static inline void
-mptscsih_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr)
-{
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		SGESimple64_t *pSge = (SGESimple64_t *) pAddr;
-		u32 tmp = dma_addr & 0xFFFFFFFF;
-
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address.Low = cpu_to_le32(tmp);
-		tmp = (u32) ((u64)dma_addr >> 32);
-		pSge->Address.High = cpu_to_le32(tmp);
-
-	} else {
-		SGESimple32_t *pSge = (SGESimple32_t *) pAddr;
-		pSge->FlagsLength = cpu_to_le32(flagslength);
-		pSge->Address = cpu_to_le32(dma_addr);
-	}
-} /* mptscsih_add_sge() */
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_add_chain - Place a chain SGE at address pAddr.
- *	@pAddr: virtual address for SGE
- *	@next: nextChainOffset value (u32's)
- *	@length: length of next SGL segment
- *	@dma_addr: Physical address
- *
- *	This routine places a MPT request frame back on the MPT adapter's
- *	FreeQ.
- */
-static inline void
-mptscsih_add_chain(char *pAddr, u8 next, u16 length, dma_addr_t dma_addr)
-{
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		SGEChain64_t *pChain = (SGEChain64_t *) pAddr;
-		u32 tmp = dma_addr & 0xFFFFFFFF;
-
-		pChain->Length = cpu_to_le16(length);
-		pChain->Flags = MPI_SGE_FLAGS_CHAIN_ELEMENT | mpt_addr_size();
-
-		pChain->NextChainOffset = next;
-
-		pChain->Address.Low = cpu_to_le32(tmp);
-		tmp = (u32) ((u64)dma_addr >> 32);
-		pChain->Address.High = cpu_to_le32(tmp);
-	} else {
-		SGEChain32_t *pChain = (SGEChain32_t *) pAddr;
-		pChain->Length = cpu_to_le16(length);
-		pChain->Flags = MPI_SGE_FLAGS_CHAIN_ELEMENT | mpt_addr_size();
-		pChain->NextChainOffset = next;
-		pChain->Address = cpu_to_le32(dma_addr);
-	}
-} /* mptscsih_add_chain() */
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
@@ -281,10 +218,10 @@ mptscsih_AddSGE(MPT_ADAPTER *ioc, struct scsi_cmnd *SCpnt,
 	 */
 
 nextSGEset:
-	numSgeSlots = ((frm_sz - sgeOffset) / (sizeof(u32) + sizeof(dma_addr_t)) );
+	numSgeSlots = ((frm_sz - sgeOffset) / ioc->SGE_size);
 	numSgeThisFrame = (sges_left < numSgeSlots) ? sges_left : numSgeSlots;
 
-	sgflags = MPT_SGE_FLAGS_SIMPLE_ELEMENT | MPT_SGE_FLAGS_ADDRESSING | sgdir;
+	sgflags = MPT_SGE_FLAGS_SIMPLE_ELEMENT | sgdir;
 
 	/* Get first (num - 1) SG elements
 	 * Skip any SG entries with a length of 0
@@ -299,11 +236,11 @@ nextSGEset:
 		}
 
 		v2 = sg_dma_address(sg);
-		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
+		ioc->add_sge(psge, sgflags | thisxfer, v2);
 
 		sg = sg_next(sg);	/* Get next SG element from the OS */
-		psge += (sizeof(u32) + sizeof(dma_addr_t));
-		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
+		psge += ioc->SGE_size;
+		sgeOffset += ioc->SGE_size;
 		sg_done++;
 	}
 
@@ -320,12 +257,8 @@ nextSGEset:
 		thisxfer = sg_dma_len(sg);
 
 		v2 = sg_dma_address(sg);
-		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
-		/*
-		sg = sg_next(sg);
-		psge += (sizeof(u32) + sizeof(dma_addr_t));
-		*/
-		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
+		ioc->add_sge(psge, sgflags | thisxfer, v2);
+		sgeOffset += ioc->SGE_size;
 		sg_done++;
 
 		if (chainSge) {
@@ -334,7 +267,8 @@ nextSGEset:
 			 * Update the chain element
 			 * Offset and Length fields.
 			 */
-			mptscsih_add_chain((char *)chainSge, 0, sgeOffset, ioc->ChainBufferDMA + chain_dma_off);
+			ioc->add_chain((char *)chainSge, 0, sgeOffset,
+				ioc->ChainBufferDMA + chain_dma_off);
 		} else {
 			/* The current buffer is the original MF
 			 * and there is no Chain buffer.
@@ -367,7 +301,7 @@ nextSGEset:
 		 * set properly).
 		 */
 		if (sg_done) {
-			u32 *ptmp = (u32 *) (psge - (sizeof(u32) + sizeof(dma_addr_t)));
+			u32 *ptmp = (u32 *) (psge - ioc->SGE_size);
 			sgflags = le32_to_cpu(*ptmp);
 			sgflags |= MPT_SGE_FLAGS_LAST_ELEMENT;
 			*ptmp = cpu_to_le32(sgflags);
@@ -381,8 +315,9 @@ nextSGEset:
 			 * Old chain element is now complete.
 			 */
 			u8 nextChain = (u8) (sgeOffset >> 2);
-			sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
-			mptscsih_add_chain((char *)chainSge, nextChain, sgeOffset, ioc->ChainBufferDMA + chain_dma_off);
+			sgeOffset += ioc->SGE_size;
+			ioc->add_chain((char *)chainSge, nextChain, sgeOffset,
+					 ioc->ChainBufferDMA + chain_dma_off);
 		} else {
 			/* The original MF buffer requires a chain buffer -
 			 * set the offset.
@@ -1422,7 +1357,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	pScsiReq->CDBLength = SCpnt->cmd_len;
 	pScsiReq->SenseBufferLength = MPT_SENSE_BUFFER_SIZE;
 	pScsiReq->Reserved = 0;
-	pScsiReq->MsgFlags = mpt_msg_flags();
+	pScsiReq->MsgFlags = mpt_msg_flags(ioc);
 	int_to_scsilun(SCpnt->device->lun, (struct scsi_lun *)pScsiReq->LUN);
 	pScsiReq->Control = cpu_to_le32(scsictl);
 
@@ -1448,7 +1383,8 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	 */
 	if (datalen == 0) {
 		/* Add a NULL SGE */
-		mptscsih_add_sge((char *)&pScsiReq->SGL, MPT_SGE_FLAGS_SSIMPLE_READ | 0,
+		ioc->add_sge((char *)&pScsiReq->SGL,
+			MPT_SGE_FLAGS_SSIMPLE_READ | 0,
 			(dma_addr_t) -1);
 	} else {
 		/* Add a 32 or 64 bit SGE */
@@ -3172,7 +3108,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 
 	pScsiReq->Reserved = 0;
 
-	pScsiReq->MsgFlags = mpt_msg_flags();
+	pScsiReq->MsgFlags = mpt_msg_flags(ioc);
 	/* MsgContext set in mpt_get_msg_fram call  */
 
 	int_to_scsilun(io->lun, (struct scsi_lun *)pScsiReq->LUN);
@@ -3199,11 +3135,11 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 			ioc->name, cmd, io->channel, io->id, io->lun));
 
 	if (dir == MPI_SCSIIO_CONTROL_READ) {
-		mpt_add_sge((char *) &pScsiReq->SGL,
+		ioc->add_sge((char *) &pScsiReq->SGL,
 			MPT_SGE_FLAGS_SSIMPLE_READ | io->size,
 			io->data_dma);
 	} else {
-		mpt_add_sge((char *) &pScsiReq->SGL,
+		ioc->add_sge((char *) &pScsiReq->SGL,
 			MPT_SGE_FLAGS_SSIMPLE_WRITE | io->size,
 			io->data_dma);
 	}
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 6162014..643a3c6 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -300,7 +300,7 @@ mptspi_writeIOCPage4(MPT_SCSI_HOST *hd, u8 channel , u8 id)
 	flagsLength = MPT_SGE_FLAGS_SSIMPLE_WRITE |
 		(IOCPage4Ptr->Header.PageLength + ii) * 4;
 
-	mpt_add_sge((char *)&pReq->PageBufferSGE, flagsLength, dataDma);
+	ioc->add_sge((char *)&pReq->PageBufferSGE, flagsLength, dataDma);
 
 	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"writeIOCPage4: MaxSEP=%d ActiveSEP=%d id=%d bus=%d\n",
@@ -643,7 +643,7 @@ mptscsih_quiesce_raid(MPT_SCSI_HOST *hd, int quiesce, u8 channel, u8 id)
 	pReq->Reserved2 = 0;
 	pReq->ActionDataWord = 0; /* Reserved for this action */
 
-	mpt_add_sge((char *)&pReq->ActionDataSGE,
+	ioc->add_sge((char *)&pReq->ActionDataSGE,
 		MPT_SGE_FLAGS_SSIMPLE_READ | 0, (dma_addr_t) -1);
 
 	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "RAID Volume action=%x channel=%d id=%d\n",
@@ -1423,17 +1423,15 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * A slightly different algorithm is required for
 	 * 64bit SGEs.
 	 */
-	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	scale = ioc->req_sz/ioc->SGE_size;
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		numSGE = (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 60) / ioc->SGE_size;
 	} else {
 		numSGE = 1 + (scale - 1) *
 		  (ioc->facts.MaxChainDepth-1) + scale +
-		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
-		  sizeof(u32));
+		  (ioc->req_sz - 64) / ioc->SGE_size;
 	}
 
 	if (numSGE < sh->sg_tablesize) {
-- 
cgit v0.10.2


From 7b5a65b9e649dad9cf9c6d282df4162843070351 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:38:14 +0530
Subject: [SCSI] mpt fusion: Added support for MPT discovery completion check

sas_discovery_quiesce_io flag is used to control IO start/resume functionality.
IO will be stoped while doing discovery of topology. Once discovery is completed
It will resume IO. Resending patch including James review.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index a663692..54326e0 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -277,6 +277,56 @@ mpt_get_cb_idx(MPT_DRIVER_CLASS dclass)
 }
 
 /**
+ * mpt_is_discovery_complete - determine if discovery has completed
+ * @ioc: per adatper instance
+ *
+ * Returns 1 when discovery completed, else zero.
+ */
+static int
+mpt_is_discovery_complete(MPT_ADAPTER *ioc)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasIOUnitPage0_t *buffer;
+	dma_addr_t dma_handle;
+	int rc = 0;
+
+	memset(&hdr, 0, sizeof(ConfigExtendedPageHeader_t));
+	memset(&cfg, 0, sizeof(CONFIGPARMS));
+	hdr.PageVersion = MPI_SASIOUNITPAGE0_PAGEVERSION;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_IO_UNIT;
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	if ((mpt_config(ioc, &cfg)))
+		goto out;
+	if (!hdr.ExtPageLength)
+		goto out;
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+	    &dma_handle);
+	if (!buffer)
+		goto out;
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	if ((mpt_config(ioc, &cfg)))
+		goto out_free_consistent;
+
+	if (!(buffer->PhyData[0].PortFlags &
+	    MPI_SAS_IOUNIT0_PORT_FLAGS_DISCOVERY_IN_PROGRESS))
+		rc = 1;
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+	    buffer, dma_handle);
+ out:
+	return rc;
+}
+
+/**
  *	mpt_fault_reset_work - work performed on workq after ioc fault
  *	@work: input argument, used to derive ioc
  *
@@ -307,6 +357,12 @@ mpt_fault_reset_work(struct work_struct *work)
 			printk(MYIOC_s_WARN_FMT "IOC is in FAULT state after "
 			    "reset (%04xh)\n", ioc->name, ioc_raw_state &
 			    MPI_DOORBELL_DATA_MASK);
+	} else if (ioc->bus_type == SAS && ioc->sas_discovery_quiesce_io) {
+		if ((mpt_is_discovery_complete(ioc))) {
+			devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "clearing "
+			    "discovery_quiesce_io flag\n", ioc->name));
+			ioc->sas_discovery_quiesce_io = 0;
+		}
 	}
 
  out:
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 4a60676..8cd0a16 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -706,6 +706,7 @@ typedef struct _MPT_ADAPTER
 	struct mutex		 sas_discovery_mutex;
 	u8			 sas_discovery_runtime;
 	u8			 sas_discovery_ignore_events;
+	u8			 sas_discovery_quiesce_io;
 	int			 sas_index; /* index refrencing */
 	MPT_SAS_MGMT		 sas_mgmt;
 	struct work_struct	 sas_persist_task;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index b162f7a..40dbaaa 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1008,6 +1008,8 @@ mptsas_slave_alloc(struct scsi_device *sdev)
 static int
 mptsas_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
+	MPT_SCSI_HOST	*hd;
+	MPT_ADAPTER	*ioc;
 	VirtDevice	*vdevice = SCpnt->device->hostdata;
 
 	if (!vdevice || !vdevice->vtarget || vdevice->vtarget->deleted) {
@@ -1016,6 +1018,12 @@ mptsas_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 		return 0;
 	}
 
+	hd = shost_priv(SCpnt->device->host);
+	ioc = hd->ioc;
+
+	if (ioc->sas_discovery_quiesce_io)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 //	scsi_print_command(SCpnt);
 
 	return mptscsih_qcmd(SCpnt,done);
@@ -3009,6 +3017,7 @@ mptsas_send_discovery_event(MPT_ADAPTER *ioc,
 	EVENT_DATA_SAS_DISCOVERY *discovery_data)
 {
 	struct mptsas_discovery_event *ev;
+	u32 discovery_status;
 
 	/*
 	 * DiscoveryStatus
@@ -3017,7 +3026,9 @@ mptsas_send_discovery_event(MPT_ADAPTER *ioc,
 	 * kicks off discovery, and return to zero
 	 * once its completed.
 	 */
-	if (discovery_data->DiscoveryStatus)
+	discovery_status = le32_to_cpu(discovery_data->DiscoveryStatus);
+	ioc->sas_discovery_quiesce_io = discovery_status ? 1 : 0;
+	if (discovery_status)
 		return;
 
 	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
@@ -3299,12 +3310,22 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return error;
 }
 
+void
+mptsas_shutdown(struct pci_dev *pdev)
+{
+	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
+
+	ioc->sas_discovery_quiesce_io = 0;
+}
+
 static void __devexit mptsas_remove(struct pci_dev *pdev)
 {
 	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
 	struct mptsas_portinfo *p, *n;
 	int i;
 
+	mptsas_shutdown(pdev);
+
 	ioc->sas_discovery_ignore_events = 1;
 	sas_remove_host(ioc->sh);
 
@@ -3342,7 +3363,7 @@ static struct pci_driver mptsas_driver = {
 	.id_table	= mptsas_pci_table,
 	.probe		= mptsas_probe,
 	.remove		= __devexit_p(mptsas_remove),
-	.shutdown	= mptscsih_shutdown,
+	.shutdown	= mptsas_shutdown,
 #ifdef CONFIG_PM
 	.suspend	= mptscsih_suspend,
 	.resume		= mptscsih_resume,
-- 
cgit v0.10.2


From fd76175a7d3abf4d14df17f5f4c7e68b466b455d Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:39:06 +0530
Subject: [SCSI] mpt fusion: Optimized SendEvent notification Using Doorbell
 instead FIFO

SendEventNotification was handled through FIFO, now it is using doorbell to
communicate with hardware. Added Sleep Flag as an extra argument to support
Can-Sleep feature.  Resending patch including compilation error fix reviewed
by Grant Grundler.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 54326e0..0d2fb0e 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -192,7 +192,8 @@ static void 	mpt_read_ioc_pg_1(MPT_ADAPTER *ioc);
 static void 	mpt_read_ioc_pg_4(MPT_ADAPTER *ioc);
 static void	mpt_timer_expired(unsigned long data);
 static void	mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc);
-static int	SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch);
+static int	SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch,
+	int sleepFlag);
 static int	SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp);
 static int	mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int sleepFlag);
 static int	mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init);
@@ -208,7 +209,8 @@ static int	procmpt_iocinfo_read(char *buf, char **start, off_t offset,
 static void	mpt_get_fw_exp_ver(char *buf, MPT_ADAPTER *ioc);
 
 //int		mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag);
-static int	ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *evReply, int *evHandlers);
+static int	ProcessEventNotification(MPT_ADAPTER *ioc,
+		EventNotificationReply_t *evReply, int *evHandlers);
 static void	mpt_iocstatus_info(MPT_ADAPTER *ioc, u32 ioc_status, MPT_FRAME_HDR *mf);
 static void	mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info);
 static void	mpt_spi_log_info(MPT_ADAPTER *ioc, u32 log_info);
@@ -2472,28 +2474,36 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 		}
 	}
 
+	/*  Enable MPT base driver management of EventNotification
+	 *  and EventAck handling.
+	 */
+	if ((ret == 0) && (!ioc->facts.EventState)) {
+		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+			"SendEventNotification\n",
+		    ioc->name));
+		ret = SendEventNotification(ioc, 1, sleepFlag);	/* 1=Enable */
+	}
+
+	if (ioc->alt_ioc && alt_ioc_ready && !ioc->alt_ioc->facts.EventState)
+		rc = SendEventNotification(ioc->alt_ioc, 1, sleepFlag);
+
 	if (ret == 0) {
 		/* Enable! (reply interrupt) */
 		CHIPREG_WRITE32(&ioc->chip->IntMask, MPI_HIM_DIM);
 		ioc->active = 1;
 	}
-
-	if (reset_alt_ioc_active && ioc->alt_ioc) {
-		/* (re)Enable alt-IOC! (reply interrupt) */
-		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT "alt_ioc reply irq re-enabled\n",
-		    ioc->alt_ioc->name));
-		CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask, MPI_HIM_DIM);
-		ioc->alt_ioc->active = 1;
+	if (rc == 0) {	/* alt ioc */
+		if (reset_alt_ioc_active && ioc->alt_ioc) {
+			/* (re)Enable alt-IOC! (reply interrupt) */
+			dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "alt-ioc"
+				"reply irq re-enabled\n",
+				ioc->alt_ioc->name));
+			CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask,
+				MPI_HIM_DIM);
+			ioc->alt_ioc->active = 1;
+		}
 	}
 
-	/*  Enable MPT base driver management of EventNotification
-	 *  and EventAck handling.
-	 */
-	if ((ret == 0) && (!ioc->facts.EventState))
-		(void) SendEventNotification(ioc, 1);	/* 1=Enable EventNotification */
-
-	if (ioc->alt_ioc && alt_ioc_ready && !ioc->alt_ioc->facts.EventState)
-		(void) SendEventNotification(ioc->alt_ioc, 1);	/* 1=Enable EventNotification */
 
 	/*	Add additional "reason" check before call to GetLanConfigPages
 	 *	(combined with GetIoUnitPage2 call).  This prevents a somewhat
@@ -6019,30 +6029,28 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc)
  *	SendEventNotification - Send EventNotification (on or off) request to adapter
  *	@ioc: Pointer to MPT_ADAPTER structure
  *	@EvSwitch: Event switch flags
+ *	@sleepFlag: Specifies whether the process can sleep
  */
 static int
-SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch)
+SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch, int sleepFlag)
 {
-	EventNotification_t	*evnp;
+	EventNotification_t	evn;
+	MPIDefaultReply_t	reply_buf;
 
-	evnp = (EventNotification_t *) mpt_get_msg_frame(mpt_base_index, ioc);
-	if (evnp == NULL) {
-		devtverboseprintk(ioc, printk(MYIOC_s_WARN_FMT "Unable to allocate event request frame!\n",
-				ioc->name));
-		return 0;
-	}
-	memset(evnp, 0, sizeof(*evnp));
-
-	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending EventNotification (%d) request %p\n", ioc->name, EvSwitch, evnp));
+	memset(&evn, 0, sizeof(EventNotification_t));
+	memset(&reply_buf, 0, sizeof(MPIDefaultReply_t));
 
-	evnp->Function = MPI_FUNCTION_EVENT_NOTIFICATION;
-	evnp->ChainOffset = 0;
-	evnp->MsgFlags = 0;
-	evnp->Switch = EvSwitch;
+	evn.Function = MPI_FUNCTION_EVENT_NOTIFICATION;
+	evn.Switch = EvSwitch;
+	evn.MsgContext = cpu_to_le32(mpt_base_index << 16);
 
-	mpt_put_msg_frame(mpt_base_index, ioc, (MPT_FRAME_HDR *)evnp);
+	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "Sending EventNotification (%d) request %p\n",
+	    ioc->name, EvSwitch, &evn));
 
-	return 0;
+	return mpt_handshake_req_reply_wait(ioc, sizeof(EventNotification_t),
+	    (u32 *)&evn, sizeof(MPIDefaultReply_t), (u16 *)&reply_buf, 30,
+	    sleepFlag);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-- 
cgit v0.10.2


From f0f09d3b3f06900d64971625d6753dea0623ed45 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:40:57 +0530
Subject: [SCSI] mpt fusion: config path optimized, completion queue is used

1) 	Previously we had mutliple #defines to use same values.
	Now those #defines are optimized.
	MPT_IOCTL_STATUS_* is removed and  MPT_MGMT_STATUS_* are new
	#defines.
2.)	config path is optimized.
	Instead of wait Queue and timer, using completion Q.
3.)	mpt_timer_expired is not used.

[jejb: elide patch to eliminate mpt_timer_expired]
Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 0d2fb0e..e63a626 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -146,7 +146,6 @@ static MPT_EVHANDLER		 MptEvHandlers[MPT_MAX_PROTOCOL_DRIVERS];
 static MPT_RESETHANDLER		 MptResetHandlers[MPT_MAX_PROTOCOL_DRIVERS];
 static struct mpt_pci_driver 	*MptDeviceDriverHandlers[MPT_MAX_PROTOCOL_DRIVERS];
 
-static DECLARE_WAIT_QUEUE_HEAD(mpt_waitq);
 
 /*
  *  Driver Callback Index's
@@ -159,7 +158,8 @@ static u8 last_drv_idx;
  *  Forward protos...
  */
 static irqreturn_t mpt_interrupt(int irq, void *bus_id);
-static int	mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply);
+static int	mptbase_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
+		MPT_FRAME_HDR *reply);
 static int	mpt_handshake_req_reply_wait(MPT_ADAPTER *ioc, int reqBytes,
 			u32 *req, int replyBytes, u16 *u16reply, int maxwait,
 			int sleepFlag);
@@ -190,7 +190,6 @@ static int	mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum);
 static int	mpt_readScsiDevicePageHeaders(MPT_ADAPTER *ioc, int portnum);
 static void 	mpt_read_ioc_pg_1(MPT_ADAPTER *ioc);
 static void 	mpt_read_ioc_pg_4(MPT_ADAPTER *ioc);
-static void	mpt_timer_expired(unsigned long data);
 static void	mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc);
 static int	SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch,
 	int sleepFlag);
@@ -559,9 +558,9 @@ mpt_interrupt(int irq, void *bus_id)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mpt_base_reply - MPT base driver's callback routine
+ *	mptbase_reply - MPT base driver's callback routine
  *	@ioc: Pointer to MPT_ADAPTER structure
- *	@mf: Pointer to original MPT request frame
+ *	@req: Pointer to original MPT request frame
  *	@reply: Pointer to MPT reply frame (NULL if TurboReply)
  *
  *	MPT base driver's callback routine; all base driver
@@ -572,122 +571,49 @@ mpt_interrupt(int irq, void *bus_id)
  *	should be freed, or 0 if it shouldn't.
  */
 static int
-mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *reply)
+mptbase_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 {
+	EventNotificationReply_t *pEventReply;
+	u8 event;
+	int evHandlers;
 	int freereq = 1;
-	u8 func;
-
-	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_base_reply() called\n", ioc->name));
-#ifdef CONFIG_FUSION_LOGGING
-	if ((ioc->debug_level & MPT_DEBUG_MSG_FRAME) &&
-			!(reply->u.hdr.MsgFlags & MPI_MSGFLAGS_CONTINUATION_REPLY)) {
-		dmfprintk(ioc, printk(MYIOC_s_INFO_FMT ": Original request frame (@%p) header\n",
-		    ioc->name, mf));
-		DBG_DUMP_REQUEST_FRAME_HDR(ioc, (u32 *)mf);
-	}
-#endif
-
-	func = reply->u.hdr.Function;
-	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_base_reply, Function=%02Xh\n",
-			ioc->name, func));
-
-	if (func == MPI_FUNCTION_EVENT_NOTIFICATION) {
-		EventNotificationReply_t *pEvReply = (EventNotificationReply_t *) reply;
-		int evHandlers = 0;
-		int results;
-
-		results = ProcessEventNotification(ioc, pEvReply, &evHandlers);
-		if (results != evHandlers) {
-			/* CHECKME! Any special handling needed here? */
-			devtverboseprintk(ioc, printk(MYIOC_s_WARN_FMT "Called %d event handlers, sum results = %d\n",
-					ioc->name, evHandlers, results));
-		}
 
-		/*
-		 *	Hmmm...  It seems that EventNotificationReply is an exception
-		 *	to the rule of one reply per request.
-		 */
-		if (pEvReply->MsgFlags & MPI_MSGFLAGS_CONTINUATION_REPLY) {
+	switch (reply->u.hdr.Function) {
+	case MPI_FUNCTION_EVENT_NOTIFICATION:
+		pEventReply = (EventNotificationReply_t *)reply;
+		evHandlers = 0;
+		ProcessEventNotification(ioc, pEventReply, &evHandlers);
+		event = le32_to_cpu(pEventReply->Event) & 0xFF;
+		if (pEventReply->MsgFlags & MPI_MSGFLAGS_CONTINUATION_REPLY)
 			freereq = 0;
-		} else {
-			devtverboseprintk(ioc, printk(MYIOC_s_WARN_FMT "EVENT_NOTIFICATION reply %p returns Request frame\n",
-				ioc->name, pEvReply));
-		}
-
-#ifdef CONFIG_PROC_FS
-//		LogEvent(ioc, pEvReply);
-#endif
-
-	} else if (func == MPI_FUNCTION_EVENT_ACK) {
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_base_reply, EventAck reply received\n",
-				ioc->name));
-	} else if (func == MPI_FUNCTION_CONFIG) {
-		CONFIGPARMS *pCfg;
-		unsigned long flags;
-
-		dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "config_complete (mf=%p,mr=%p)\n",
-				ioc->name, mf, reply));
-
-		pCfg = * ((CONFIGPARMS **)((u8 *) mf + ioc->req_sz - sizeof(void *)));
-
-		if (pCfg) {
-			/* disable timer and remove from linked list */
-			del_timer(&pCfg->timer);
-
-			spin_lock_irqsave(&ioc->FreeQlock, flags);
-			list_del(&pCfg->linkage);
-			spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-			/*
-			 *	If IOC Status is SUCCESS, save the header
-			 *	and set the status code to GOOD.
-			 */
-			pCfg->status = MPT_CONFIG_ERROR;
-			if (reply) {
-				ConfigReply_t	*pReply = (ConfigReply_t *)reply;
-				u16		 status;
-
-				status = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
-				dcprintk(ioc, printk(MYIOC_s_NOTE_FMT "  IOCStatus=%04xh, IOCLogInfo=%08xh\n",
-				     ioc->name, status, le32_to_cpu(pReply->IOCLogInfo)));
-
-				pCfg->status = status;
-				if (status == MPI_IOCSTATUS_SUCCESS) {
-					if ((pReply->Header.PageType &
-					    MPI_CONFIG_PAGETYPE_MASK) ==
-					    MPI_CONFIG_PAGETYPE_EXTENDED) {
-						pCfg->cfghdr.ehdr->ExtPageLength =
-						    le16_to_cpu(pReply->ExtPageLength);
-						pCfg->cfghdr.ehdr->ExtPageType =
-						    pReply->ExtPageType;
-					}
-					pCfg->cfghdr.hdr->PageVersion = pReply->Header.PageVersion;
-
-					/* If this is a regular header, save PageLength. */
-					/* LMP Do this better so not using a reserved field! */
-					pCfg->cfghdr.hdr->PageLength = pReply->Header.PageLength;
-					pCfg->cfghdr.hdr->PageNumber = pReply->Header.PageNumber;
-					pCfg->cfghdr.hdr->PageType = pReply->Header.PageType;
-				}
-			}
-
-			/*
-			 *	Wake up the original calling thread
-			 */
-			pCfg->wait_done = 1;
-			wake_up(&mpt_waitq);
+		if (event != MPI_EVENT_EVENT_CHANGE)
+			break;
+	case MPI_FUNCTION_CONFIG:
+	case MPI_FUNCTION_SAS_IO_UNIT_CONTROL:
+		ioc->mptbase_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+		if (reply) {
+			ioc->mptbase_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+			memcpy(ioc->mptbase_cmds.reply, reply,
+			    min(MPT_DEFAULT_FRAME_SIZE,
+				4 * reply->u.reply.MsgLength));
 		}
-	} else if (func == MPI_FUNCTION_SAS_IO_UNIT_CONTROL) {
-		/* we should be always getting a reply frame */
-		memcpy(ioc->persist_reply_frame, reply,
-		    min(MPT_DEFAULT_FRAME_SIZE,
-		    4*reply->u.reply.MsgLength));
-		del_timer(&ioc->persist_timer);
-		ioc->persist_wait_done = 1;
-		wake_up(&mpt_waitq);
-	} else {
-		printk(MYIOC_s_ERR_FMT "Unexpected msg function (=%02Xh) reply received!\n",
-				ioc->name, func);
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->mptbase_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+			complete(&ioc->mptbase_cmds.done);
+		} else
+			freereq = 0;
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_FREE_MF)
+			freereq = 1;
+		break;
+	case MPI_FUNCTION_EVENT_ACK:
+		devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "EventAck reply received\n", ioc->name));
+		break;
+	default:
+		printk(MYIOC_s_ERR_FMT
+		    "Unexpected msg function (=%02Xh) reply received!\n",
+		    ioc->name, reply->u.hdr.Function);
+		break;
 	}
 
 	/*
@@ -1849,6 +1775,9 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init(&ioc->diagLock);
 	spin_lock_init(&ioc->initializing_hba_lock);
 
+	mutex_init(&ioc->mptbase_cmds.mutex);
+	init_completion(&ioc->mptbase_cmds.done);
+
 	/* Initialize the event logging.
 	 */
 	ioc->eventTypes = 0;	/* None */
@@ -1866,10 +1795,6 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	memset(&ioc->spi_data, 0, sizeof(SpiCfgData));
 
-	/* Initialize the running configQ head.
-	 */
-	INIT_LIST_HEAD(&ioc->configQ);
-
 	/* Initialize the fc rport list head.
 	 */
 	INIT_LIST_HEAD(&ioc->fc_rports);
@@ -5013,7 +4938,14 @@ mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode)
 	SasIoUnitControlReply_t		*sasIoUnitCntrReply;
 	MPT_FRAME_HDR			*mf = NULL;
 	MPIHeader_t			*mpi_hdr;
+	int				ret = 0;
+	unsigned long 	 		timeleft;
+
+	mutex_lock(&ioc->mptbase_cmds.mutex);
 
+	/* init the internal cmd struct */
+	memset(ioc->mptbase_cmds.reply, 0 , MPT_DEFAULT_FRAME_SIZE);
+	INITIALIZE_MGMT_STATUS(ioc->mptbase_cmds.status)
 
 	/* insure garbage is not sent to fw */
 	switch(persist_opcode) {
@@ -5023,17 +4955,19 @@ mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode)
 		break;
 
 	default:
-		return -1;
-		break;
+		ret = -1;
+		goto out;
 	}
 
-	printk("%s: persist_opcode=%x\n",__func__, persist_opcode);
+	printk(KERN_DEBUG  "%s: persist_opcode=%x\n",
+		__func__, persist_opcode);
 
 	/* Get a MF for this command.
 	 */
 	if ((mf = mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
-		printk("%s: no msg frames!\n",__func__);
-		return -1;
+		printk(KERN_DEBUG "%s: no msg frames!\n", __func__);
+		ret = -1;
+		goto out;
         }
 
 	mpi_hdr = (MPIHeader_t *) mf;
@@ -5043,27 +4977,42 @@ mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode)
 	sasIoUnitCntrReq->MsgContext = mpi_hdr->MsgContext;
 	sasIoUnitCntrReq->Operation = persist_opcode;
 
-	init_timer(&ioc->persist_timer);
-	ioc->persist_timer.data = (unsigned long) ioc;
-	ioc->persist_timer.function = mpt_timer_expired;
-	ioc->persist_timer.expires = jiffies + HZ*10 /* 10 sec */;
-	ioc->persist_wait_done=0;
-	add_timer(&ioc->persist_timer);
 	mpt_put_msg_frame(mpt_base_index, ioc, mf);
-	wait_event(mpt_waitq, ioc->persist_wait_done);
+	timeleft = wait_for_completion_timeout(&ioc->mptbase_cmds.done, 10*HZ);
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		printk(KERN_DEBUG "%s: failed\n", __func__);
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		if (!timeleft) {
+			printk(KERN_DEBUG "%s: Issuing Reset from %s!!\n",
+			    ioc->name, __func__);
+			mpt_HardResetHandler(ioc, CAN_SLEEP);
+			mpt_free_msg_frame(ioc, mf);
+		}
+		goto out;
+	}
+
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		ret = -1;
+		goto out;
+	}
 
 	sasIoUnitCntrReply =
-	    (SasIoUnitControlReply_t *)ioc->persist_reply_frame;
+	    (SasIoUnitControlReply_t *)ioc->mptbase_cmds.reply;
 	if (le16_to_cpu(sasIoUnitCntrReply->IOCStatus) != MPI_IOCSTATUS_SUCCESS) {
-		printk("%s: IOCStatus=0x%X IOCLogInfo=0x%X\n",
-		    __func__,
-		    sasIoUnitCntrReply->IOCStatus,
+		printk(KERN_DEBUG "%s: IOCStatus=0x%X IOCLogInfo=0x%X\n",
+		    __func__, sasIoUnitCntrReply->IOCStatus,
 		    sasIoUnitCntrReply->IOCLogInfo);
-		return -1;
-	}
+		printk(KERN_DEBUG "%s: failed\n", __func__);
+		ret = -1;
+	} else
+		printk(KERN_DEBUG "%s: success\n", __func__);
+ out:
 
-	printk("%s: success\n",__func__);
-	return 0;
+	CLEAR_MGMT_STATUS(ioc->mptbase_cmds.status)
+	mutex_unlock(&ioc->mptbase_cmds.mutex);
+	return ret;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -6066,7 +6015,7 @@ SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp)
 
 	if ((pAck = (EventAck_t *) mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
 		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, no msg frames!!\n",
-		    ioc->name,__func__));
+		    ioc->name, __func__));
 		return -1;
 	}
 
@@ -6103,12 +6052,18 @@ int
 mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 {
 	Config_t	*pReq;
+	ConfigReply_t	*pReply;
 	ConfigExtendedPageHeader_t  *pExtHdr = NULL;
 	MPT_FRAME_HDR	*mf;
-	unsigned long	 flags;
-	int		 ii, rc;
+	int		 ii;
 	int		 flagsLength;
+	long		 timeout;
+	int		 ret;
+	u8		 page_type = 0, extend_page;
+	unsigned long 	 timeleft;
 	int		 in_isr;
+	u8		 issue_hard_reset = 0;
+	u8		 retry_count = 0;
 
 	/*	Prevent calling wait_event() (below), if caller happens
 	 *	to be in ISR context, because that is fatal!
@@ -6120,13 +6075,31 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 		return -EPERM;
 	}
 
+	/* don't send if no chance of success */
+	if (!ioc->active ||
+	    mpt_GetIocState(ioc, 1) != MPI_IOC_STATE_OPERATIONAL) {
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: ioc not operational, %d, %xh\n",
+		    ioc->name, __func__, ioc->active,
+		    mpt_GetIocState(ioc, 0)));
+		return -EFAULT;
+	}
+
+ retry_config:
+	mutex_lock(&ioc->mptbase_cmds.mutex);
+	/* init the internal cmd struct */
+	memset(ioc->mptbase_cmds.reply, 0 , MPT_DEFAULT_FRAME_SIZE);
+	INITIALIZE_MGMT_STATUS(ioc->mptbase_cmds.status)
+
 	/* Get and Populate a free Frame
 	 */
 	if ((mf = mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
-		dcprintk(ioc, printk(MYIOC_s_WARN_FMT "mpt_config: no msg frames!\n",
-				ioc->name));
-		return -EAGAIN;
+		dcprintk(ioc, printk(MYIOC_s_WARN_FMT
+		"mpt_config: no msg frames!\n", ioc->name));
+		ret = -EAGAIN;
+		goto out;
 	}
+
 	pReq = (Config_t *)mf;
 	pReq->Action = pCfg->action;
 	pReq->Reserved = 0;
@@ -6152,7 +6125,9 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 		pReq->ExtPageType = pExtHdr->ExtPageType;
 		pReq->Header.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
 
-		/* Page Length must be treated as a reserved field for the extended header. */
+		/* Page Length must be treated as a reserved field for the
+		 * extended header.
+		 */
 		pReq->Header.PageLength = 0;
 	}
 
@@ -6165,78 +6140,91 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 	else
 		flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ;
 
-	if ((pCfg->cfghdr.hdr->PageType & MPI_CONFIG_PAGETYPE_MASK) == MPI_CONFIG_PAGETYPE_EXTENDED) {
+	if ((pCfg->cfghdr.hdr->PageType & MPI_CONFIG_PAGETYPE_MASK) ==
+	    MPI_CONFIG_PAGETYPE_EXTENDED) {
 		flagsLength |= pExtHdr->ExtPageLength * 4;
-
-		dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending Config request type %d, page %d and action %d\n",
-			ioc->name, pReq->ExtPageType, pReq->Header.PageNumber, pReq->Action));
-	}
-	else {
+		page_type = pReq->ExtPageType;
+		extend_page = 1;
+	} else {
 		flagsLength |= pCfg->cfghdr.hdr->PageLength * 4;
-
-		dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending Config request type %d, page %d and action %d\n",
-			ioc->name, pReq->Header.PageType, pReq->Header.PageNumber, pReq->Action));
+		page_type = pReq->Header.PageType;
+		extend_page = 0;
 	}
 
-	ioc->add_sge((char *)&pReq->PageBufferSGE, flagsLength, pCfg->physAddr);
-
-	/* Append pCfg pointer to end of mf
-	 */
-	*((void **) (((u8 *) mf) + (ioc->req_sz - sizeof(void *)))) =  (void *) pCfg;
-
-	/* Initalize the timer
-	 */
-	init_timer_on_stack(&pCfg->timer);
-	pCfg->timer.data = (unsigned long) ioc;
-	pCfg->timer.function = mpt_timer_expired;
-	pCfg->wait_done = 0;
-
-	/* Set the timer; ensure 10 second minimum */
-	if (pCfg->timeout < 10)
-		pCfg->timer.expires = jiffies + HZ*10;
-	else
-		pCfg->timer.expires = jiffies + HZ*pCfg->timeout;
-
-	/* Add to end of Q, set timer and then issue this command */
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	list_add_tail(&pCfg->linkage, &ioc->configQ);
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+	dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "Sending Config request type 0x%x, page 0x%x and action %d\n",
+	    ioc->name, page_type, pReq->Header.PageNumber, pReq->Action));
 
-	add_timer(&pCfg->timer);
+	ioc->add_sge((char *)&pReq->PageBufferSGE, flagsLength, pCfg->physAddr);
+	timeout = (pCfg->timeout < 15) ? HZ*15 : HZ*pCfg->timeout;
 	mpt_put_msg_frame(mpt_base_index, ioc, mf);
-	wait_event(mpt_waitq, pCfg->wait_done);
-
-	/* mf has been freed - do not access */
+	timeleft = wait_for_completion_timeout(&ioc->mptbase_cmds.done,
+		timeout);
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "Failed Sending Config request type 0x%x, page 0x%x,"
+		    " action %d, status %xh, time left %ld\n\n",
+			ioc->name, page_type, pReq->Header.PageNumber,
+			pReq->Action, ioc->mptbase_cmds.status, timeleft));
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		if (!timeleft)
+			issue_hard_reset = 1;
+		goto out;
+	}
 
-	rc = pCfg->status;
+	if (!(ioc->mptbase_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		ret = -1;
+		goto out;
+	}
+	pReply = (ConfigReply_t	*)ioc->mptbase_cmds.reply;
+	ret = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+	if (ret == MPI_IOCSTATUS_SUCCESS) {
+		if (extend_page) {
+			pCfg->cfghdr.ehdr->ExtPageLength =
+			    le16_to_cpu(pReply->ExtPageLength);
+			pCfg->cfghdr.ehdr->ExtPageType =
+			    pReply->ExtPageType;
+		}
+		pCfg->cfghdr.hdr->PageVersion = pReply->Header.PageVersion;
+		pCfg->cfghdr.hdr->PageLength = pReply->Header.PageLength;
+		pCfg->cfghdr.hdr->PageNumber = pReply->Header.PageNumber;
+		pCfg->cfghdr.hdr->PageType = pReply->Header.PageType;
 
-	return rc;
-}
+	}
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mpt_timer_expired - Callback for timer process.
- *	Used only internal config functionality.
- *	@data: Pointer to MPT_SCSI_HOST recast as an unsigned long
- */
-static void
-mpt_timer_expired(unsigned long data)
-{
-	MPT_ADAPTER *ioc = (MPT_ADAPTER *) data;
+	if (retry_count)
+		printk(MYIOC_s_INFO_FMT "Retry completed "
+		    "ret=0x%x timeleft=%ld\n",
+		    ioc->name, ret, timeleft);
 
-	dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_timer_expired! \n", ioc->name));
+	dcprintk(ioc, printk(KERN_DEBUG "IOCStatus=%04xh, IOCLogInfo=%08xh\n",
+	     ret, le32_to_cpu(pReply->IOCLogInfo)));
 
-	/* Perform a FW reload */
-	if (mpt_HardResetHandler(ioc, NO_SLEEP) < 0)
-		printk(MYIOC_s_WARN_FMT "Firmware Reload FAILED!\n", ioc->name);
+out:
 
-	/* No more processing.
-	 * Hard reset clean-up will wake up
-	 * process and free all resources.
-	 */
-	dcprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mpt_timer_expired complete!\n", ioc->name));
+	CLEAR_MGMT_STATUS(ioc->mptbase_cmds.status)
+	mutex_unlock(&ioc->mptbase_cmds.mutex);
+	if (issue_hard_reset) {
+		issue_hard_reset = 0;
+		printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+		    ioc->name, __func__);
+		mpt_HardResetHandler(ioc, CAN_SLEEP);
+		mpt_free_msg_frame(ioc, mf);
+		/* attempt one retry for a timed out command */
+		if (!retry_count) {
+			printk(MYIOC_s_INFO_FMT
+			    "Attempting Retry Config request"
+			    " type 0x%x, page 0x%x,"
+			    " action %d\n", ioc->name, page_type,
+			    pCfg->cfghdr.hdr->PageNumber, pCfg->action);
+			retry_count++;
+			goto retry_config;
+		}
+	}
+	return ret;
 
-	return;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -6250,41 +6238,27 @@ mpt_timer_expired(unsigned long data)
 static int
 mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
-	CONFIGPARMS *pCfg;
-	unsigned long flags;
-
-	dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	    ": IOC %s_reset routed to MPT base driver!\n",
-	    ioc->name, reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
-	    reset_phase==MPT_IOC_PRE_RESET ? "pre" : "post")));
-
-	if (reset_phase == MPT_IOC_SETUP_RESET) {
-		;
-	} else if (reset_phase == MPT_IOC_PRE_RESET) {
-		/* If the internal config Q is not empty -
-		 * delete timer. MF resources will be freed when
-		 * the FIFO's are primed.
-		 */
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		list_for_each_entry(pCfg, &ioc->configQ, linkage)
-			del_timer(&pCfg->timer);
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	} else {
-		CONFIGPARMS *pNext;
-
-		/* Search the configQ for internal commands.
-		 * Flush the Q, and wake up all suspended threads.
-		 */
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		list_for_each_entry_safe(pCfg, pNext, &ioc->configQ, linkage) {
-			list_del(&pCfg->linkage);
-
-			pCfg->status = MPT_CONFIG_ERROR;
-			pCfg->wait_done = 1;
-			wake_up(&mpt_waitq);
+	switch (reset_phase) {
+	case MPT_IOC_SETUP_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n",  ioc->name, __func__));
+/* wake up mptbase_cmds */
+		if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->mptbase_cmds.status |=
+			    MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->mptbase_cmds.done);
 		}
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+		break;
+	default:
+		break;
 	}
 
 	return 1;		/* currently means nothing really */
@@ -7901,7 +7875,7 @@ fusion_init(void)
 	/*  Register ourselves (mptbase) in order to facilitate
 	 *  EventNotification handling.
 	 */
-	mpt_base_index = mpt_register(mpt_base_reply, MPTBASE_DRIVER);
+	mpt_base_index = mpt_register(mptbase_reply, MPTBASE_DRIVER);
 
 	/* Register for hard reset handling callbacks.
 	 */
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 8cd0a16..41273ff 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -432,14 +432,6 @@ do { \
  *	IOCTL structure and associated defines
  */
 
-#define MPT_IOCTL_STATUS_DID_IOCRESET	0x01	/* IOC Reset occurred on the current*/
-#define MPT_IOCTL_STATUS_RF_VALID	0x02	/* The Reply Frame is VALID */
-#define MPT_IOCTL_STATUS_TIMER_ACTIVE	0x04	/* The timer is running */
-#define MPT_IOCTL_STATUS_SENSE_VALID	0x08	/* Sense data is valid */
-#define MPT_IOCTL_STATUS_COMMAND_GOOD	0x10	/* Command Status GOOD */
-#define MPT_IOCTL_STATUS_TMTIMER_ACTIVE	0x20	/* The TM timer is running */
-#define MPT_IOCTL_STATUS_TM_FAILED	0x40	/* User TM request failed */
-
 #define MPTCTL_RESET_OK			0x01	/* Issue Bus Reset */
 
 typedef struct _MPT_IOCTL {
@@ -454,16 +446,27 @@ typedef struct _MPT_IOCTL {
 	struct mutex		 ioctl_mutex;
 } MPT_IOCTL;
 
-#define MPT_SAS_MGMT_STATUS_RF_VALID	0x02	/* The Reply Frame is VALID */
-#define MPT_SAS_MGMT_STATUS_COMMAND_GOOD	0x10	/* Command Status GOOD */
-#define MPT_SAS_MGMT_STATUS_TM_FAILED	0x40	/* User TM request failed */
-
-typedef struct _MPT_SAS_MGMT {
+#define MPT_MGMT_STATUS_RF_VALID	0x01	/* The Reply Frame is VALID */
+#define MPT_MGMT_STATUS_COMMAND_GOOD	0x02	/* Command Status GOOD */
+#define MPT_MGMT_STATUS_PENDING		0x04	/* command is pending */
+#define MPT_MGMT_STATUS_DID_IOCRESET	0x08	/* IOC Reset occurred
+						   on the current*/
+#define MPT_MGMT_STATUS_SENSE_VALID	0x10	/* valid sense info */
+#define MPT_MGMT_STATUS_TIMER_ACTIVE	0x20	/* obsolete */
+#define MPT_MGMT_STATUS_FREE_MF		0x40	/* free the mf from
+						   complete routine */
+
+#define INITIALIZE_MGMT_STATUS(status) \
+	status = MPT_MGMT_STATUS_PENDING;
+#define CLEAR_MGMT_STATUS(status) \
+	status = 0;
+
+typedef struct _MPT_MGMT {
 	struct mutex		 mutex;
 	struct completion	 done;
 	u8			 reply[MPT_DEFAULT_FRAME_SIZE]; /* reply frame data */
 	u8			 status;	/* current command status */
-}MPT_SAS_MGMT;
+} MPT_MGMT;
 
 /*
  *  Event Structure and define
@@ -661,7 +664,6 @@ typedef struct _MPT_ADAPTER
 	struct _mpt_ioctl_events *events;	/* pointer to event log */
 	u8			*cached_fw;	/* Pointer to FW */
 	dma_addr_t	 	cached_fw_dma;
-	struct list_head	 configQ;	/* linked list of config. requests */
 	int			 hs_reply_idx;
 #ifndef MFCNT
 	u32			 pad0;
@@ -674,9 +676,6 @@ typedef struct _MPT_ADAPTER
 	IOCFactsReply_t		 facts;
 	PortFactsReply_t	 pfacts[2];
 	FCPortPage0_t		 fc_port_page0[2];
-	struct timer_list	 persist_timer;	/* persist table timer */
-	int			 persist_wait_done; /* persist completion flag */
-	u8			 persist_reply_frame[MPT_DEFAULT_FRAME_SIZE]; /* persist reply */
 	LANPage0_t		 lan_cnfg_page0;
 	LANPage1_t		 lan_cnfg_page1;
 
@@ -708,7 +707,8 @@ typedef struct _MPT_ADAPTER
 	u8			 sas_discovery_ignore_events;
 	u8			 sas_discovery_quiesce_io;
 	int			 sas_index; /* index refrencing */
-	MPT_SAS_MGMT		 sas_mgmt;
+	MPT_MGMT		 sas_mgmt;
+	MPT_MGMT		 mptbase_cmds; /* for sending config pages */
 	struct work_struct	 sas_persist_task;
 
 	struct work_struct	 fc_setup_reset_work;
@@ -884,21 +884,16 @@ struct scsi_cmnd;
  * Generic structure passed to the base mpt_config function.
  */
 typedef struct _x_config_parms {
-	struct list_head	 linkage;	/* linked list */
-	struct timer_list	 timer;		/* timer function for this request  */
 	union {
 		ConfigExtendedPageHeader_t	*ehdr;
 		ConfigPageHeader_t	*hdr;
 	} cfghdr;
 	dma_addr_t		 physAddr;
-	int			 wait_done;	/* wait for this request */
 	u32			 pageAddr;	/* properly formatted */
+	u16			 status;
 	u8			 action;
 	u8			 dir;
 	u8			 timeout;	/* seconds */
-	u8			 pad1;
-	u16			 status;
-	u16			 pad2;
 } CONFIGPARMS;
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index bece386..22b75cb 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -221,7 +221,7 @@ mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_reply() NULL Reply "
 				"Function=%x!\n", ioc->name, cmd));
 
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_COMMAND_GOOD;
+			ioc->ioctl->status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 			ioc->ioctl->reset &= ~MPTCTL_RESET_OK;
 
 			/* We are done, issue wake up
@@ -237,14 +237,14 @@ mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 		 */
 		memcpy(ioc->ioctl->ReplyFrame, reply,
 			min(ioc->reply_sz, 4*reply->u.reply.MsgLength));
-		ioc->ioctl->status |= MPT_IOCTL_STATUS_RF_VALID;
+		ioc->ioctl->status |= MPT_MGMT_STATUS_RF_VALID;
 
 		/* Set the command status to GOOD if IOC Status is GOOD
 		 * OR if SCSI I/O cmd and data underrun or recovered error.
 		 */
 		iocStatus = le16_to_cpu(reply->u.reply.IOCStatus) & MPI_IOCSTATUS_MASK;
 		if (iocStatus  == MPI_IOCSTATUS_SUCCESS)
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_COMMAND_GOOD;
+			ioc->ioctl->status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 
 		if (iocStatus || reply->u.reply.IOCLogInfo)
 			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "\tiocstatus (0x%04X), "
@@ -268,7 +268,8 @@ mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 
 			if ((iocStatus == MPI_IOCSTATUS_SCSI_DATA_UNDERRUN) ||
 			(iocStatus == MPI_IOCSTATUS_SCSI_RECOVERED_ERROR)) {
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_COMMAND_GOOD;
+				ioc->ioctl->status |=
+					MPT_MGMT_STATUS_COMMAND_GOOD;
 			}
 		}
 
@@ -284,7 +285,7 @@ mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 			    ((u8 *)ioc->sense_buf_pool +
 			     (req_index * MPT_SENSE_BUFFER_ALLOC));
 			memcpy(ioc->ioctl->sense, sense_data, sz);
-			ioc->ioctl->status |= MPT_IOCTL_STATUS_SENSE_VALID;
+			ioc->ioctl->status |= MPT_MGMT_STATUS_SENSE_VALID;
 		}
 
 		if (cmd == MPI_FUNCTION_SCSI_TASK_MGMT)
@@ -483,10 +484,10 @@ mptctl_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 
 	switch(reset_phase) {
 	case MPT_IOC_SETUP_RESET:
-		ioctl->status |= MPT_IOCTL_STATUS_DID_IOCRESET;
+		ioctl->status |= MPT_MGMT_STATUS_DID_IOCRESET;
 		break;
 	case MPT_IOC_POST_RESET:
-		ioctl->status &= ~MPT_IOCTL_STATUS_DID_IOCRESET;
+		ioctl->status &= ~MPT_MGMT_STATUS_DID_IOCRESET;
 		break;
 	case MPT_IOC_PRE_RESET:
 	default:
@@ -1791,7 +1792,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 			"No memory available during driver init.\n",
 				__FILE__, __LINE__);
 		return -ENOMEM;
-	} else if (ioc->ioctl->status & MPT_IOCTL_STATUS_DID_IOCRESET) {
+	} else if (ioc->ioctl->status & MPT_MGMT_STATUS_DID_IOCRESET) {
 		printk(KERN_ERR MYNAM "%s@%d::mptctl_do_mpt_command - "
 			"Busy with IOC Reset \n", __FILE__, __LINE__);
 		return -EBUSY;
@@ -2231,7 +2232,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 	/* If a valid reply frame, copy to the user.
 	 * Offset 2: reply length in U32's
 	 */
-	if (ioc->ioctl->status & MPT_IOCTL_STATUS_RF_VALID) {
+	if (ioc->ioctl->status & MPT_MGMT_STATUS_RF_VALID) {
 		if (karg.maxReplyBytes < ioc->reply_sz) {
 			 sz = min(karg.maxReplyBytes, 4*ioc->ioctl->ReplyFrame[2]);
 		} else {
@@ -2253,7 +2254,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 
 	/* If valid sense data, copy to user.
 	 */
-	if (ioc->ioctl->status & MPT_IOCTL_STATUS_SENSE_VALID) {
+	if (ioc->ioctl->status & MPT_MGMT_STATUS_SENSE_VALID) {
 		sz = min(karg.maxSenseBytes, MPT_SENSE_BUFFER_SIZE);
 		if (sz > 0) {
 			if (copy_to_user(karg.senseDataPtr, ioc->ioctl->sense, sz)) {
@@ -2270,7 +2271,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 	/* If the overall status is _GOOD and data in, copy data
 	 * to user.
 	 */
-	if ((ioc->ioctl->status & MPT_IOCTL_STATUS_COMMAND_GOOD) &&
+	if ((ioc->ioctl->status & MPT_MGMT_STATUS_COMMAND_GOOD) &&
 				(karg.dataInSize > 0) && (bufIn.kptr)) {
 
 		if (copy_to_user(karg.dataInBufPtr,
@@ -2285,9 +2286,9 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 
 done_free_mem:
 
-	ioc->ioctl->status &= ~(MPT_IOCTL_STATUS_COMMAND_GOOD |
-		MPT_IOCTL_STATUS_SENSE_VALID |
-		MPT_IOCTL_STATUS_RF_VALID );
+	ioc->ioctl->status &= ~(MPT_MGMT_STATUS_COMMAND_GOOD |
+		MPT_MGMT_STATUS_SENSE_VALID |
+		MPT_MGMT_STATUS_RF_VALID);
 
 	/* Free the allocated memory.
 	 */
@@ -2527,7 +2528,7 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 	 *   bays have drives in them
 	 * pbuf[3] = Checksum (0x100 = (byte0 + byte2 + byte3)
 	 */
-	if (ioc->ioctl->status & MPT_IOCTL_STATUS_RF_VALID)
+	if (ioc->ioctl->status & MPT_MGMT_STATUS_RF_VALID)
 		karg.rsvd = *(u32 *)pbuf;
 
  out:
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 40dbaaa..dc23adf 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1122,9 +1122,9 @@ static int mptsas_get_linkerrors(struct sas_phy *phy)
 static int mptsas_mgmt_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
 		MPT_FRAME_HDR *reply)
 {
-	ioc->sas_mgmt.status |= MPT_SAS_MGMT_STATUS_COMMAND_GOOD;
+	ioc->sas_mgmt.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 	if (reply != NULL) {
-		ioc->sas_mgmt.status |= MPT_SAS_MGMT_STATUS_RF_VALID;
+		ioc->sas_mgmt.status |= MPT_MGMT_STATUS_RF_VALID;
 		memcpy(ioc->sas_mgmt.reply, reply,
 		    min(ioc->reply_sz, 4 * reply->u.reply.MsgLength));
 	}
@@ -1182,7 +1182,7 @@ static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset)
 
 	/* a reply frame is expected */
 	if ((ioc->sas_mgmt.status &
-	    MPT_IOCTL_STATUS_RF_VALID) == 0) {
+	    MPT_MGMT_STATUS_RF_VALID) == 0) {
 		error = -ENXIO;
 		goto out_unlock;
 	}
@@ -1359,7 +1359,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	}
 	mf = NULL;
 
-	if (ioc->sas_mgmt.status & MPT_IOCTL_STATUS_RF_VALID) {
+	if (ioc->sas_mgmt.status & MPT_MGMT_STATUS_RF_VALID) {
 		SmpPassthroughReply_t *smprep;
 
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
-- 
cgit v0.10.2


From 37c60f374a855974c27bd30d5662a8fa5e933792 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:44:06 +0530
Subject: [SCSI] mpt fusion: rewrite of all internal generated functions

Rewrite of all internal generated functions that issue commands to firmware,
porting them to be single threaded using the generic MPT_MGMT
struct. Implemented using completion Queue.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index e63a626..d8d5231 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -1775,6 +1775,8 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init(&ioc->diagLock);
 	spin_lock_init(&ioc->initializing_hba_lock);
 
+	mutex_init(&ioc->internal_cmds.mutex);
+	init_completion(&ioc->internal_cmds.done);
 	mutex_init(&ioc->mptbase_cmds.mutex);
 	init_completion(&ioc->mptbase_cmds.done);
 
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 41273ff..4d77256 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -465,7 +465,9 @@ typedef struct _MPT_MGMT {
 	struct mutex		 mutex;
 	struct completion	 done;
 	u8			 reply[MPT_DEFAULT_FRAME_SIZE]; /* reply frame data */
+	u8			 sense[MPT_SENSE_BUFFER_ALLOC];
 	u8			 status;	/* current command status */
+	int			 completion_code;
 } MPT_MGMT;
 
 /*
@@ -709,6 +711,7 @@ typedef struct _MPT_ADAPTER
 	int			 sas_index; /* index refrencing */
 	MPT_MGMT		 sas_mgmt;
 	MPT_MGMT		 mptbase_cmds; /* for sending config pages */
+	MPT_MGMT		 internal_cmds;
 	struct work_struct	 sas_persist_task;
 
 	struct work_struct	 fc_setup_reset_work;
@@ -863,8 +866,6 @@ typedef struct _MPT_SCSI_HOST {
 	unsigned long		  timeouts;		/* cmd timeouts */
 	ushort			  sel_timeout[MPT_MAX_FC_DEVICES];
 	char 			  *info_kbuf;
-	wait_queue_head_t	  scandv_waitq;
-	int			  scandv_wait_done;
 	long			  last_queue_full;
 	u16			  tm_iocstatus;
 	u16			  spi_pending;
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index da16b47..d093871 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -1310,8 +1310,6 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	hd->timer.data = (unsigned long) hd;
 	hd->timer.function = mptscsih_timer_expired;
 
-	init_waitqueue_head(&hd->scandv_waitq);
-	hd->scandv_wait_done = 0;
 	hd->last_queue_full = 0;
 
 	sh->transportt = mptfc_transport_template;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index dc23adf..16c4232 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -3282,8 +3282,6 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ioc->sas_data.ptClear = mpt_pt_clear;
 
-	init_waitqueue_head(&hd->scandv_waitq);
-	hd->scandv_wait_done = 0;
 	hd->last_queue_full = 0;
 	INIT_LIST_HEAD(&hd->target_reset_list);
 	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 8c08c73..3517325 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -100,6 +100,8 @@ static int	mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id,
 int		mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
 int		mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply);
 
+static int	mptscsih_get_completion_code(MPT_ADAPTER *ioc,
+		MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply);
 int		mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r);
 static int	mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd);
 static void	mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, VirtDevice *vdevice);
@@ -2571,94 +2573,35 @@ int
 mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	MPT_SCSI_HOST	*hd;
-	unsigned long	 flags;
-
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	    ": IOC %s_reset routed to SCSI host driver!\n",
-	    ioc->name, reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
-	    reset_phase==MPT_IOC_PRE_RESET ? "pre" : "post")));
 
-	/* If a FW reload request arrives after base installed but
-	 * before all scsi hosts have been attached, then an alt_ioc
-	 * may have a NULL sh pointer.
-	 */
 	if (ioc->sh == NULL || shost_priv(ioc->sh) == NULL)
 		return 0;
-	else
-		hd = shost_priv(ioc->sh);
-
-	if (reset_phase == MPT_IOC_SETUP_RESET) {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Setup-Diag Reset\n", ioc->name));
 
-		/* Clean Up:
-		 * 1. Set Hard Reset Pending Flag
-		 * All new commands go to doneQ
-		 */
+	hd = shost_priv(ioc->sh);
+	switch (reset_phase) {
+	case MPT_IOC_SETUP_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
 		hd->resetPending = 1;
-
-	} else if (reset_phase == MPT_IOC_PRE_RESET) {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Pre-Diag Reset\n", ioc->name));
-
-		/* 2. Flush running commands
-		 *	Clean ScsiLookup (and associated memory)
-		 *	AND clean mytaskQ
-		 */
-
-		/* 2b. Reply to OS all known outstanding I/O commands.
-		 */
-		mptscsih_flush_running_cmds(hd);
-
-		/* 2c. If there was an internal command that
-		 * has not completed, configuration or io request,
-		 * free these resources.
-		 */
-		if (hd->cmdPtr) {
-			del_timer(&hd->timer);
-			mpt_free_msg_frame(ioc, hd->cmdPtr);
-		}
-
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Pre-Reset complete.\n", ioc->name));
-
-	} else {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Post-Diag Reset\n", ioc->name));
-
-		/* Once a FW reload begins, all new OS commands are
-		 * redirected to the doneQ w/ a reset status.
-		 * Init all control structures.
-		 */
-
-		/* 2. Chain Buffer initialization
-		 */
-
-		/* 4. Renegotiate to all devices, if SPI
-		 */
-
-		/* 5. Enable new commands to be posted
-		 */
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		hd->tmPending = 0;
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
 		hd->resetPending = 0;
-		hd->tmState = TM_STATE_NONE;
-
-		/* 6. If there was an internal command,
-		 * wake this process up.
-		 */
-		if (hd->cmdPtr) {
-			/*
-			 * Wake up the original calling thread
-			 */
-			hd->pLocal = &hd->localReply;
-			hd->pLocal->completion = MPT_SCANDV_DID_RESET;
-			hd->scandv_wait_done = 1;
-			wake_up(&hd->scandv_waitq);
-			hd->cmdPtr = NULL;
+		mptscsih_flush_running_cmds(hd);
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n", ioc->name, __func__));
+		if (ioc->internal_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->internal_cmds.status |=
+				MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->internal_cmds.done);
 		}
-
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Post-Reset complete.\n", ioc->name));
-
+		break;
+	default:
+		break;
 	}
-
 	return 1;		/* currently means nothing really */
 }
 
@@ -2669,8 +2612,9 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 	MPT_SCSI_HOST *hd;
 	u8 event = le32_to_cpu(pEvReply->Event) & 0xFF;
 
-	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "MPT event (=%02Xh) routed to SCSI host driver!\n",
-			ioc->name, event));
+	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"MPT event (=%02Xh) routed to SCSI host driver!\n",
+		ioc->name, event));
 
 	if (ioc->sh == NULL ||
 		((hd = shost_priv(ioc->sh)) == NULL))
@@ -2711,8 +2655,9 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 	case MPI_EVENT_STATE_CHANGE:			/* 02 */
 	case MPI_EVENT_EVENT_CHANGE:			/* 0A */
 	default:
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": Ignoring event (=%02Xh)\n",
-		    ioc->name, event));
+		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			": Ignoring event (=%02Xh)\n",
+			ioc->name, event));
 		break;
 	}
 
@@ -2745,153 +2690,44 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
  *	Used ONLY for DV and other internal commands.
  */
 int
-mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
+mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
+				MPT_FRAME_HDR *reply)
 {
-	MPT_SCSI_HOST	*hd;
 	SCSIIORequest_t *pReq;
-	int		 completionCode;
+	SCSIIOReply_t	*pReply;
+	u8		 cmd;
 	u16		 req_idx;
+	u8	*sense_data;
+	int		 sz;
 
-	hd = shost_priv(ioc->sh);
-
-	if ((mf == NULL) ||
-	    (mf >= MPT_INDEX_2_MFPTR(ioc, ioc->req_depth))) {
-		printk(MYIOC_s_ERR_FMT
-			"ScanDvComplete, %s req frame ptr! (=%p)\n",
-				ioc->name, mf?"BAD":"NULL", (void *) mf);
-		goto wakeup;
-	}
-
-	del_timer(&hd->timer);
-	req_idx = le16_to_cpu(mf->u.frame.hwhdr.msgctxu.fld.req_idx);
-	mptscsih_set_scsi_lookup(ioc, req_idx, NULL);
-	pReq = (SCSIIORequest_t *) mf;
+	ioc->internal_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+	ioc->internal_cmds.completion_code = MPT_SCANDV_GOOD;
+	if (!reply)
+		goto out;
 
-	if (mf != hd->cmdPtr) {
-		printk(MYIOC_s_WARN_FMT "ScanDvComplete (mf=%p, cmdPtr=%p, idx=%d)\n",
-				ioc->name, (void *)mf, (void *) hd->cmdPtr, req_idx);
+	pReply = (SCSIIOReply_t *) reply;
+	pReq = (SCSIIORequest_t *) req;
+	ioc->internal_cmds.completion_code =
+	    mptscsih_get_completion_code(ioc, req, reply);
+	ioc->internal_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	memcpy(ioc->internal_cmds.reply, reply,
+	    min(MPT_DEFAULT_FRAME_SIZE, 4 * reply->u.reply.MsgLength));
+	cmd = reply->u.hdr.Function;
+	if (((cmd == MPI_FUNCTION_SCSI_IO_REQUEST) ||
+	    (cmd == MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) &&
+	    (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_VALID)) {
+		req_idx = le16_to_cpu(req->u.frame.hwhdr.msgctxu.fld.req_idx);
+		sense_data = ((u8 *)ioc->sense_buf_pool +
+		    (req_idx * MPT_SENSE_BUFFER_ALLOC));
+		sz = min_t(int, pReq->SenseBufferLength,
+		    MPT_SENSE_BUFFER_ALLOC);
+		memcpy(ioc->internal_cmds.sense, sense_data, sz);
 	}
-	hd->cmdPtr = NULL;
-
-	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "ScanDvComplete (mf=%p,mr=%p,idx=%d)\n",
-			ioc->name, mf, mr, req_idx));
-
-	hd->pLocal = &hd->localReply;
-	hd->pLocal->scsiStatus = 0;
-
-	/* If target struct exists, clear sense valid flag.
-	 */
-	if (mr == NULL) {
-		completionCode = MPT_SCANDV_GOOD;
-	} else {
-		SCSIIOReply_t	*pReply;
-		u16		 status;
-		u8		 scsi_status;
-
-		pReply = (SCSIIOReply_t *) mr;
-
-		status = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
-		scsi_status = pReply->SCSIStatus;
-
-
-		switch(status) {
-
-		case MPI_IOCSTATUS_SCSI_DEVICE_NOT_THERE:	/* 0x0043 */
-			completionCode = MPT_SCANDV_SELECTION_TIMEOUT;
-			break;
-
-		case MPI_IOCSTATUS_SCSI_IO_DATA_ERROR:		/* 0x0046 */
-		case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:	/* 0x0048 */
-		case MPI_IOCSTATUS_SCSI_IOC_TERMINATED:		/* 0x004B */
-		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
-			completionCode = MPT_SCANDV_DID_RESET;
-			break;
-
-		case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN:		/* 0x0045 */
-		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
-		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
-			if (pReply->Function == MPI_FUNCTION_CONFIG) {
-				ConfigReply_t *pr = (ConfigReply_t *)mr;
-				completionCode = MPT_SCANDV_GOOD;
-				hd->pLocal->header.PageVersion = pr->Header.PageVersion;
-				hd->pLocal->header.PageLength = pr->Header.PageLength;
-				hd->pLocal->header.PageNumber = pr->Header.PageNumber;
-				hd->pLocal->header.PageType = pr->Header.PageType;
-
-			} else if (pReply->Function == MPI_FUNCTION_RAID_ACTION) {
-				/* If the RAID Volume request is successful,
-				 * return GOOD, else indicate that
-				 * some type of error occurred.
-				 */
-				MpiRaidActionReply_t	*pr = (MpiRaidActionReply_t *)mr;
-				if (le16_to_cpu(pr->ActionStatus) == MPI_RAID_ACTION_ASTATUS_SUCCESS)
-					completionCode = MPT_SCANDV_GOOD;
-				else
-					completionCode = MPT_SCANDV_SOME_ERROR;
-				memcpy(hd->pLocal->sense, pr, sizeof(hd->pLocal->sense));
-
-			} else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_VALID) {
-				u8		*sense_data;
-				int		 sz;
-
-				/* save sense data in global structure
-				 */
-				completionCode = MPT_SCANDV_SENSE;
-				hd->pLocal->scsiStatus = scsi_status;
-				sense_data = ((u8 *)ioc->sense_buf_pool +
-					(req_idx * MPT_SENSE_BUFFER_ALLOC));
-
-				sz = min_t(int, pReq->SenseBufferLength,
-							SCSI_STD_SENSE_BYTES);
-				memcpy(hd->pLocal->sense, sense_data, sz);
-
-				ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "  Check Condition, sense ptr %p\n",
-				    ioc->name, sense_data));
-			} else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_FAILED) {
-				if (pReq->CDB[0] == INQUIRY)
-					completionCode = MPT_SCANDV_ISSUE_SENSE;
-				else
-					completionCode = MPT_SCANDV_DID_RESET;
-			}
-			else if (pReply->SCSIState & MPI_SCSI_STATE_NO_SCSI_STATUS)
-				completionCode = MPT_SCANDV_DID_RESET;
-			else if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
-				completionCode = MPT_SCANDV_DID_RESET;
-			else {
-				completionCode = MPT_SCANDV_GOOD;
-				hd->pLocal->scsiStatus = scsi_status;
-			}
-			break;
-
-		case MPI_IOCSTATUS_SCSI_PROTOCOL_ERROR:		/* 0x0047 */
-			if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
-				completionCode = MPT_SCANDV_DID_RESET;
-			else
-				completionCode = MPT_SCANDV_SOME_ERROR;
-			break;
-
-		default:
-			completionCode = MPT_SCANDV_SOME_ERROR;
-			break;
-
-		}	/* switch(status) */
-
-	} /* end of address reply case */
-
-	hd->pLocal->completion = completionCode;
-
-	/* MF and RF are freed in mpt_interrupt
-	 */
-wakeup:
-	/* Free Chain buffers (will never chain) in scan or dv */
-	//mptscsih_freeChainBuffers(ioc, req_idx);
-
-	/*
-	 * Wake up the original calling thread
-	 */
-	hd->scandv_wait_done = 1;
-	wake_up(&hd->scandv_waitq);
-
+ out:
+	if (!(ioc->internal_cmds.status & MPT_MGMT_STATUS_PENDING))
+		return 0;
+	ioc->internal_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+	complete(&ioc->internal_cmds.done);
 	return 1;
 }
 
@@ -2940,6 +2776,95 @@ mptscsih_timer_expired(unsigned long data)
 	return;
 }
 
+/**
+ *	mptscsih_get_completion_code -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@reply:
+ *	@cmd:
+ *
+ **/
+static int
+mptscsih_get_completion_code(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
+				MPT_FRAME_HDR *reply)
+{
+	SCSIIOReply_t	*pReply;
+	MpiRaidActionReply_t *pr;
+	u8		 scsi_status;
+	u16		 status;
+	int		 completion_code;
+
+	pReply = (SCSIIOReply_t *)reply;
+	status = le16_to_cpu(pReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+	scsi_status = pReply->SCSIStatus;
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "IOCStatus=%04xh, SCSIState=%02xh, SCSIStatus=%02xh,"
+	    "IOCLogInfo=%08xh\n", ioc->name, status, pReply->SCSIState,
+	    scsi_status, le32_to_cpu(pReply->IOCLogInfo)));
+
+	switch (status) {
+
+	case MPI_IOCSTATUS_SCSI_DEVICE_NOT_THERE:	/* 0x0043 */
+		completion_code = MPT_SCANDV_SELECTION_TIMEOUT;
+		break;
+
+	case MPI_IOCSTATUS_SCSI_IO_DATA_ERROR:		/* 0x0046 */
+	case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:	/* 0x0048 */
+	case MPI_IOCSTATUS_SCSI_IOC_TERMINATED:		/* 0x004B */
+	case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
+		completion_code = MPT_SCANDV_DID_RESET;
+		break;
+
+	case MPI_IOCSTATUS_BUSY:
+	case MPI_IOCSTATUS_INSUFFICIENT_RESOURCES:
+		completion_code = MPT_SCANDV_BUSY;
+		break;
+
+	case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN:		/* 0x0045 */
+	case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
+	case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
+		if (pReply->Function == MPI_FUNCTION_CONFIG) {
+			completion_code = MPT_SCANDV_GOOD;
+		} else if (pReply->Function == MPI_FUNCTION_RAID_ACTION) {
+			pr = (MpiRaidActionReply_t *)reply;
+			if (le16_to_cpu(pr->ActionStatus) ==
+				MPI_RAID_ACTION_ASTATUS_SUCCESS)
+				completion_code = MPT_SCANDV_GOOD;
+			else
+				completion_code = MPT_SCANDV_SOME_ERROR;
+		} else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_VALID)
+			completion_code = MPT_SCANDV_SENSE;
+		else if (pReply->SCSIState & MPI_SCSI_STATE_AUTOSENSE_FAILED) {
+			if (req->u.scsireq.CDB[0] == INQUIRY)
+				completion_code = MPT_SCANDV_ISSUE_SENSE;
+			else
+				completion_code = MPT_SCANDV_DID_RESET;
+		} else if (pReply->SCSIState & MPI_SCSI_STATE_NO_SCSI_STATUS)
+			completion_code = MPT_SCANDV_DID_RESET;
+		else if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
+			completion_code = MPT_SCANDV_DID_RESET;
+		else if (scsi_status == MPI_SCSI_STATUS_BUSY)
+			completion_code = MPT_SCANDV_BUSY;
+		else
+			completion_code = MPT_SCANDV_GOOD;
+		break;
+
+	case MPI_IOCSTATUS_SCSI_PROTOCOL_ERROR:		/* 0x0047 */
+		if (pReply->SCSIState & MPI_SCSI_STATE_TERMINATED)
+			completion_code = MPT_SCANDV_DID_RESET;
+		else
+			completion_code = MPT_SCANDV_SOME_ERROR;
+		break;
+	default:
+		completion_code = MPT_SCANDV_SOME_ERROR;
+		break;
+
+	}	/* switch(status) */
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "  completionCode set to %08xh\n", ioc->name, completion_code));
+	return completion_code;
+}
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
@@ -2966,22 +2891,17 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 {
 	MPT_FRAME_HDR	*mf;
 	SCSIIORequest_t	*pScsiReq;
-	SCSIIORequest_t	 ReqCopy;
 	int		 my_idx, ii, dir;
-	int		 rc, cmdTimeout;
-	int		in_isr;
+	int		 timeout;
 	char		 cmdLen;
 	char		 CDB[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	char		 cmd = io->cmd;
-	MPT_ADAPTER 	*ioc = hd->ioc;
-
-	in_isr = in_interrupt();
-	if (in_isr) {
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Internal SCSI IO request not allowed in ISR context!\n",
-					ioc->name));
-		return -EPERM;
-	}
+	u8		 cmd = io->cmd;
+	MPT_ADAPTER *ioc = hd->ioc;
+	int		 ret = 0;
+	unsigned long	 timeleft;
+	unsigned long	 flags;
 
+	mutex_lock(&ioc->internal_cmds.mutex);
 
 	/* Set command specific information
 	 */
@@ -2991,13 +2911,13 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
 		CDB[4] = io->size;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case TEST_UNIT_READY:
 		cmdLen = 6;
 		dir = MPI_SCSIIO_CONTROL_READ;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case START_STOP:
@@ -3005,7 +2925,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
 		CDB[4] = 1;	/*Spin up the disk */
-		cmdTimeout = 15;
+		timeout = 15;
 		break;
 
 	case REQUEST_SENSE:
@@ -3013,7 +2933,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 		CDB[0] = cmd;
 		CDB[4] = io->size;
 		dir = MPI_SCSIIO_CONTROL_READ;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case READ_BUFFER:
@@ -3032,7 +2952,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 		CDB[6] = (io->size >> 16) & 0xFF;
 		CDB[7] = (io->size >>  8) & 0xFF;
 		CDB[8] = io->size & 0xFF;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case WRITE_BUFFER:
@@ -3047,21 +2967,21 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 		CDB[6] = (io->size >> 16) & 0xFF;
 		CDB[7] = (io->size >>  8) & 0xFF;
 		CDB[8] = io->size & 0xFF;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case RESERVE:
 		cmdLen = 6;
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case RELEASE:
 		cmdLen = 6;
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	case SYNCHRONIZE_CACHE:
@@ -3069,20 +2989,23 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 		dir = MPI_SCSIIO_CONTROL_READ;
 		CDB[0] = cmd;
 //		CDB[1] = 0x02;	/* set immediate bit */
-		cmdTimeout = 10;
+		timeout = 10;
 		break;
 
 	default:
 		/* Error Case */
-		return -EFAULT;
+		ret = -EFAULT;
+		goto out;
 	}
 
 	/* Get and Populate a free Frame
+	 * MsgContext set in mpt_get_msg_frame call
 	 */
 	if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "No msg frames!\n",
-		    ioc->name));
-		return -EBUSY;
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s: No msg frames!\n",
+		    ioc->name, __func__));
+		ret = MPT_SCANDV_BUSY;
+		goto out;
 	}
 
 	pScsiReq = (SCSIIORequest_t *) mf;
@@ -3120,74 +3043,58 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 
 	if (cmd == REQUEST_SENSE) {
 		pScsiReq->Control = cpu_to_le32(dir | MPI_SCSIIO_CONTROL_UNTAGGED);
-		ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Untagged! 0x%2x\n",
-			ioc->name, cmd));
+		devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: Untagged! 0x%02x\n", ioc->name, __func__, cmd));
 	}
 
-	for (ii=0; ii < 16; ii++)
+	for (ii = 0; ii < 16; ii++)
 		pScsiReq->CDB[ii] = CDB[ii];
 
 	pScsiReq->DataLength = cpu_to_le32(io->size);
 	pScsiReq->SenseBufferLowAddr = cpu_to_le32(ioc->sense_buf_low_dma
 					   + (my_idx * MPT_SENSE_BUFFER_ALLOC));
 
-	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Sending Command 0x%x for (%d:%d:%d)\n",
-			ioc->name, cmd, io->channel, io->id, io->lun));
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "%s: Sending Command 0x%02x for fw_channel=%d fw_id=%d lun=%d\n",
+	    ioc->name, __func__, cmd, io->channel, io->id, io->lun));
 
-	if (dir == MPI_SCSIIO_CONTROL_READ) {
+	if (dir == MPI_SCSIIO_CONTROL_READ)
 		ioc->add_sge((char *) &pScsiReq->SGL,
-			MPT_SGE_FLAGS_SSIMPLE_READ | io->size,
-			io->data_dma);
-	} else {
+		    MPT_SGE_FLAGS_SSIMPLE_READ | io->size, io->data_dma);
+	else
 		ioc->add_sge((char *) &pScsiReq->SGL,
-			MPT_SGE_FLAGS_SSIMPLE_WRITE | io->size,
-			io->data_dma);
-	}
-
-	/* The ISR will free the request frame, but we need
-	 * the information to initialize the target. Duplicate.
-	 */
-	memcpy(&ReqCopy, pScsiReq, sizeof(SCSIIORequest_t));
-
-	/* Issue this command after:
-	 *	finish init
-	 *	add timer
-	 * Wait until the reply has been received
-	 *  ScsiScanDvCtx callback function will
-	 *	set hd->pLocal;
-	 *	set scandv_wait_done and call wake_up
-	 */
-	hd->pLocal = NULL;
-	hd->timer.expires = jiffies + HZ*cmdTimeout;
-	hd->scandv_wait_done = 0;
+		    MPT_SGE_FLAGS_SSIMPLE_WRITE | io->size, io->data_dma);
 
-	/* Save cmd pointer, for resource free if timeout or
-	 * FW reload occurs
-	 */
-	hd->cmdPtr = mf;
-
-	add_timer(&hd->timer);
+	INITIALIZE_MGMT_STATUS(ioc->internal_cmds.status)
 	mpt_put_msg_frame(ioc->InternalCtx, ioc, mf);
-	wait_event(hd->scandv_waitq, hd->scandv_wait_done);
-
-	if (hd->pLocal) {
-		rc = hd->pLocal->completion;
-		hd->pLocal->skip = 0;
-
-		/* Always set fatal error codes in some cases.
-		 */
-		if (rc == MPT_SCANDV_SELECTION_TIMEOUT)
-			rc = -ENXIO;
-		else if (rc == MPT_SCANDV_SOME_ERROR)
-			rc =  -rc;
-	} else {
-		rc = -EFAULT;
-		/* This should never happen. */
-		ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "_do_cmd: Null pLocal!!!\n",
-				ioc->name));
+	timeleft = wait_for_completion_timeout(&ioc->internal_cmds.done,
+	    timeout*HZ);
+	if (!(ioc->internal_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = MPT_SCANDV_DID_RESET;
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: TIMED OUT for cmd=0x%02x\n", ioc->name, __func__,
+		    cmd));
+		if (ioc->internal_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			mpt_free_msg_frame(ioc, mf);
+			goto out;
+		}
+		if (!timeleft) {
+			printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+			    ioc->name, __func__);
+			mpt_HardResetHandler(ioc, CAN_SLEEP);
+			mpt_free_msg_frame(ioc, mf);
+		}
+		goto out;
 	}
 
-	return rc;
+	ret = ioc->internal_cmds.completion_code;
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: success, rc=0x%02x\n",
+			ioc->name, __func__, ret));
+
+ out:
+	CLEAR_MGMT_STATUS(ioc->internal_cmds.status)
+	mutex_unlock(&ioc->internal_cmds.mutex);
+	return ret;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -3427,6 +3334,7 @@ struct device_attribute *mptscsih_host_attrs[] = {
 	&dev_attr_debug_level,
 	NULL,
 };
+
 EXPORT_SYMBOL(mptscsih_host_attrs);
 
 EXPORT_SYMBOL(mptscsih_remove);
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 319aa30..0b103a2 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -60,6 +60,7 @@
 #define MPT_SCANDV_SELECTION_TIMEOUT	(0x00000008)
 #define MPT_SCANDV_ISSUE_SENSE		(0x00000010)
 #define MPT_SCANDV_FALLBACK		(0x00000020)
+#define MPT_SCANDV_BUSY			(0x00000040)
 
 #define MPT_SCANDV_MAX_RETRIES		(10)
 
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 643a3c6..8f46fdf 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -614,19 +614,24 @@ static void mptspi_read_parameters(struct scsi_target *starget)
 	spi_width(starget) = (nego & MPI_SCSIDEVPAGE0_NP_WIDE) ? 1 : 0;
 }
 
-static int
+int
 mptscsih_quiesce_raid(MPT_SCSI_HOST *hd, int quiesce, u8 channel, u8 id)
 {
+	MPT_ADAPTER	*ioc = hd->ioc;
 	MpiRaidActionRequest_t	*pReq;
 	MPT_FRAME_HDR		*mf;
-	MPT_ADAPTER *ioc = hd->ioc;
+	int			ret;
+	unsigned long 	 	timeleft;
+
+	mutex_lock(&ioc->internal_cmds.mutex);
 
 	/* Get and Populate a free Frame
 	 */
 	if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) {
-		ddvprintk(ioc, printk(MYIOC_s_WARN_FMT "_do_raid: no msg frames!\n",
-					ioc->name));
-		return -EAGAIN;
+		dfailprintk(hd->ioc, printk(MYIOC_s_WARN_FMT
+			"%s: no msg frames!\n", ioc->name, __func__));
+		ret = -EAGAIN;
+		goto out;
 	}
 	pReq = (MpiRaidActionRequest_t *)mf;
 	if (quiesce)
@@ -649,23 +654,30 @@ mptscsih_quiesce_raid(MPT_SCSI_HOST *hd, int quiesce, u8 channel, u8 id)
 	ddvprintk(ioc, printk(MYIOC_s_DEBUG_FMT "RAID Volume action=%x channel=%d id=%d\n",
 			ioc->name, pReq->Action, channel, id));
 
-	hd->pLocal = NULL;
-	hd->timer.expires = jiffies + HZ*10; /* 10 second timeout */
-	hd->scandv_wait_done = 0;
-
-	/* Save cmd pointer, for resource free if timeout or
-	 * FW reload occurs
-	 */
-	hd->cmdPtr = mf;
-
-	add_timer(&hd->timer);
+	INITIALIZE_MGMT_STATUS(ioc->internal_cmds.status)
 	mpt_put_msg_frame(ioc->InternalCtx, ioc, mf);
-	wait_event(hd->scandv_waitq, hd->scandv_wait_done);
+	timeleft = wait_for_completion_timeout(&ioc->internal_cmds.done, 10*HZ);
+	if (!(ioc->internal_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: TIMED OUT!\n",
+		    ioc->name, __func__));
+		if (ioc->internal_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		if (!timeleft) {
+			printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+			    ioc->name, __func__);
+			mpt_HardResetHandler(ioc, CAN_SLEEP);
+			mpt_free_msg_frame(ioc, mf);
+		}
+		goto out;
+	}
 
-	if ((hd->pLocal == NULL) || (hd->pLocal->completion != 0))
-		return -1;
+	ret = ioc->internal_cmds.completion_code;
 
-	return 0;
+ out:
+	CLEAR_MGMT_STATUS(ioc->internal_cmds.status)
+	mutex_unlock(&ioc->internal_cmds.mutex);
+	return ret;
 }
 
 static void mptspi_dv_device(struct _MPT_SCSI_HOST *hd,
@@ -1491,8 +1503,6 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		mpt_saf_te));
 	ioc->spi_data.noQas = 0;
 
-	init_waitqueue_head(&hd->scandv_waitq);
-	hd->scandv_wait_done = 0;
 	hd->last_queue_full = 0;
 	hd->spi_pending = 0;
 
-- 
cgit v0.10.2


From 1ba9ab2eb2c53df52e498779e14cf4e5ea77b0ad Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:44:48 +0530
Subject: [SCSI] mpt fusion: rewrite taskmgmt request and completion routines

1.)	 rewrite taskmanagement request and completion routines, making them
single threaded and using the generic MPT_MGMT struct, deleting
mptscsih_TMHandler, replacing with single request TM handler
mptscsih_IssueTaskMgmt, and killing the watchdog timer functions.
2.) cleanup ioc_reset callback handlers, introducing wrappers for
synchronizing error recovery (mpt_set_taskmgmt_in_progress_flag,
mpt_clear_taskmgmt_in_progress_flag), as the fusion firmware only handles
one task management request at a time

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index d8d5231..af862bf 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -341,7 +341,7 @@ mpt_fault_reset_work(struct work_struct *work)
 	int		 rc;
 	unsigned long	 flags;
 
-	if (ioc->diagPending || !ioc->active)
+	if (ioc->ioc_reset_in_progress || !ioc->active)
 		goto out;
 
 	ioc_raw_state = mpt_GetIocState(ioc, 0);
@@ -1771,14 +1771,15 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	ioc->reply_sz = MPT_REPLY_FRAME_SIZE;
 
 	ioc->pcidev = pdev;
-	ioc->diagPending = 0;
-	spin_lock_init(&ioc->diagLock);
 	spin_lock_init(&ioc->initializing_hba_lock);
 
+	spin_lock_init(&ioc->taskmgmt_lock);
 	mutex_init(&ioc->internal_cmds.mutex);
 	init_completion(&ioc->internal_cmds.done);
 	mutex_init(&ioc->mptbase_cmds.mutex);
 	init_completion(&ioc->mptbase_cmds.done);
+	mutex_init(&ioc->taskmgmt_cmds.mutex);
+	init_completion(&ioc->taskmgmt_cmds.done);
 
 	/* Initialize the event logging.
 	 */
@@ -6572,6 +6573,53 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh
 
 	*size = y;
 }
+/**
+ *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task managment
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ *	Returns 0 for SUCCESS or -1 if FAILED.
+ *
+ *	If -1 is return, then it was not possible to set the flags
+ **/
+int
+mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
+{
+	unsigned long	 flags;
+	int		 retval;
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress || ioc->taskmgmt_in_progress ||
+	    (ioc->alt_ioc && ioc->alt_ioc->taskmgmt_in_progress)) {
+		retval = -1;
+		goto out;
+	}
+	retval = 0;
+	ioc->taskmgmt_in_progress = 1;
+	if (ioc->alt_ioc)
+		ioc->alt_ioc->taskmgmt_in_progress = 1;
+ out:
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+	return retval;
+}
+EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag);
+
+/**
+ *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task managment
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ **/
+void
+mpt_clear_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
+{
+	unsigned long	 flags;
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	ioc->taskmgmt_in_progress = 0;
+	if (ioc->alt_ioc)
+		ioc->alt_ioc->taskmgmt_in_progress = 0;
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+}
+EXPORT_SYMBOL(mpt_clear_taskmgmt_in_progress_flag);
 
 
 /**
@@ -6638,14 +6686,15 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 	/* Reset the adapter. Prevent more than 1 call to
 	 * mpt_do_ioc_recovery at any instant in time.
 	 */
-	spin_lock_irqsave(&ioc->diagLock, flags);
-	if ((ioc->diagPending) || (ioc->alt_ioc && ioc->alt_ioc->diagPending)){
-		spin_unlock_irqrestore(&ioc->diagLock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 		return 0;
-	} else {
-		ioc->diagPending = 1;
 	}
-	spin_unlock_irqrestore(&ioc->diagLock, flags);
+	ioc->ioc_reset_in_progress = 1;
+	if (ioc->alt_ioc)
+		ioc->alt_ioc->ioc_reset_in_progress = 1;
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
 	/* FIXME: If do_ioc_recovery fails, repeat....
 	 */
@@ -6680,11 +6729,14 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 	if (ioc->alt_ioc)
 		ioc->alt_ioc->reload_fw = 0;
 
-	spin_lock_irqsave(&ioc->diagLock, flags);
-	ioc->diagPending = 0;
-	if (ioc->alt_ioc)
-		ioc->alt_ioc->diagPending = 0;
-	spin_unlock_irqrestore(&ioc->diagLock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	ioc->ioc_reset_in_progress = 0;
+	ioc->taskmgmt_in_progress = 0;
+	if (ioc->alt_ioc) {
+		ioc->alt_ioc->ioc_reset_in_progress = 0;
+		ioc->alt_ioc->taskmgmt_in_progress = 0;
+	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
 	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "HardResetHandler rc = %d!\n", ioc->name, rc));
 
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 4d77256..2129aff 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -657,8 +657,6 @@ typedef struct _MPT_ADAPTER
 	MPT_IOCTL		*ioctl;		/* ioctl data pointer */
 	struct proc_dir_entry	*ioc_dentry;
 	struct _MPT_ADAPTER	*alt_ioc;	/* ptr to 929 bound adapter port */
-	spinlock_t		 diagLock;	/* diagnostic reset lock */
-	int			 diagPending;
 	u32			 biosVersion;	/* BIOS version from IO Unit Page 2 */
 	int			 eventTypes;	/* Event logging parameters */
 	int			 eventContext;	/* Next event context */
@@ -712,6 +710,10 @@ typedef struct _MPT_ADAPTER
 	MPT_MGMT		 sas_mgmt;
 	MPT_MGMT		 mptbase_cmds; /* for sending config pages */
 	MPT_MGMT		 internal_cmds;
+	MPT_MGMT		 taskmgmt_cmds;
+	spinlock_t		 taskmgmt_lock; /* diagnostic reset lock */
+	int			 taskmgmt_in_progress;
+	u8			 ioc_reset_in_progress;
 	struct work_struct	 sas_persist_task;
 
 	struct work_struct	 fc_setup_reset_work;
@@ -931,6 +933,8 @@ extern void	 mpt_free_fw_memory(MPT_ADAPTER *ioc);
 extern int	 mpt_findImVolumes(MPT_ADAPTER *ioc);
 extern int	 mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode);
 extern int	 mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk);
+extern int	 mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc);
+extern void	 mpt_clear_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc);
 extern void     mpt_halt_firmware(MPT_ADAPTER *ioc);
 
 
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 3517325..2463731 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -92,20 +92,24 @@ static int	mptscsih_AddSGE(MPT_ADAPTER *ioc, struct scsi_cmnd *SCpnt,
 				 SCSIIORequest_t *pReq, int req_idx);
 static void	mptscsih_freeChainBuffers(MPT_ADAPTER *ioc, int req_idx);
 static void	mptscsih_copy_sense_data(struct scsi_cmnd *sc, MPT_SCSI_HOST *hd, MPT_FRAME_HDR *mf, SCSIIOReply_t *pScsiReply);
-static int	mptscsih_tm_pending_wait(MPT_SCSI_HOST * hd);
-static int	mptscsih_tm_wait_for_completion(MPT_SCSI_HOST * hd, ulong timeout );
 
-static int	mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout);
+int	mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id,
+		int lun, int ctx2abort, ulong timeout);
 
 int		mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
 int		mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply);
 
+static void
+mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code);
 static int	mptscsih_get_completion_code(MPT_ADAPTER *ioc,
 		MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply);
 int		mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r);
 static int	mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd);
 static void	mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, VirtDevice *vdevice);
 
+static int
+mptscsih_taskmgmt_reply(MPT_ADAPTER *ioc, u8 type,
+				SCSITaskMgmtReply_t *pScsiTmReply);
 void 		mptscsih_remove(struct pci_dev *);
 void 		mptscsih_shutdown(struct pci_dev *);
 #ifdef CONFIG_PM
@@ -1466,8 +1470,8 @@ mptscsih_freeChainBuffers(MPT_ADAPTER *ioc, int req_idx)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mptscsih_TMHandler - Generic handler for SCSI Task Management.
- *	@hd: Pointer to MPT SCSI HOST structure
+ *	mptscsih_IssueTaskMgmt - Generic send Task Management function.
+ *	@hd: Pointer to MPT_SCSI_HOST structure
  *	@type: Task Management type
  *	@channel: channel number for task management
  *	@id: Logical Target ID for reset (if appropriate)
@@ -1475,145 +1479,68 @@ mptscsih_freeChainBuffers(MPT_ADAPTER *ioc, int req_idx)
  *	@ctx2abort: Context for the task to be aborted (if appropriate)
  *	@timeout: timeout for task management control
  *
- *	Fall through to mpt_HardResetHandler if: not operational, too many
- *	failed TM requests or handshake failure.
+ *	Remark: _HardResetHandler can be invoked from an interrupt thread (timer)
+ *	or a non-interrupt thread.  In the former, must not call schedule().
  *
- *	Remark: Currently invoked from a non-interrupt thread (_bh).
+ *	Not all fields are meaningfull for all task types.
  *
- *	Note: With old EH code, at most 1 SCSI TaskMgmt function per IOC
- *	will be active.
+ *	Returns 0 for SUCCESS, or FAILED.
  *
- *	Returns 0 for SUCCESS, or %FAILED.
  **/
 int
-mptscsih_TMHandler(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout)
+mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun,
+	int ctx2abort, ulong timeout)
 {
-	MPT_ADAPTER	*ioc;
-	int		 rc = -1;
+	MPT_FRAME_HDR	*mf;
+	SCSITaskMgmt_t	*pScsiTm;
+	int		 ii;
+	int		 retval;
+	MPT_ADAPTER 	*ioc = hd->ioc;
+	unsigned long	 timeleft;
+	u8		 issue_hard_reset;
 	u32		 ioc_raw_state;
-	unsigned long	 flags;
-
-	ioc = hd->ioc;
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler Entered!\n", ioc->name));
-
-	// SJR - CHECKME - Can we avoid this here?
-	// (mpt_HardResetHandler has this check...)
-	spin_lock_irqsave(&ioc->diagLock, flags);
-	if ((ioc->diagPending) || (ioc->alt_ioc && ioc->alt_ioc->diagPending)) {
-		spin_unlock_irqrestore(&ioc->diagLock, flags);
-		return FAILED;
-	}
-	spin_unlock_irqrestore(&ioc->diagLock, flags);
-
-	/*  Wait a fixed amount of time for the TM pending flag to be cleared.
-	 *  If we time out and not bus reset, then we return a FAILED status
-	 *  to the caller.
-	 *  The call to mptscsih_tm_pending_wait() will set the pending flag
-	 *  if we are
-	 *  successful. Otherwise, reload the FW.
-	 */
-	if (mptscsih_tm_pending_wait(hd) == FAILED) {
-		if (type == MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK) {
-			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler abort: "
-			   "Timed out waiting for last TM (%d) to complete! \n",
-			   ioc->name, hd->tmPending));
-			return FAILED;
-		} else if (type == MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET) {
-			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler target "
-				"reset: Timed out waiting for last TM (%d) "
-				"to complete! \n", ioc->name,
-				hd->tmPending));
-			return FAILED;
-		} else if (type == MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS) {
-			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TMHandler bus reset: "
-			   "Timed out waiting for last TM (%d) to complete! \n",
-			  ioc->name, hd->tmPending));
-			return FAILED;
-		}
-	} else {
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		hd->tmPending |=  (1 << type);
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-	}
+	unsigned long	 time_count;
 
+	issue_hard_reset = 0;
 	ioc_raw_state = mpt_GetIocState(ioc, 0);
 
 	if ((ioc_raw_state & MPI_IOC_STATE_MASK) != MPI_IOC_STATE_OPERATIONAL) {
 		printk(MYIOC_s_WARN_FMT
-			"TM Handler for type=%x: IOC Not operational (0x%x)!\n",
+			"TaskMgmt type=%x: IOC Not operational (0x%x)!\n",
 			ioc->name, type, ioc_raw_state);
-		printk(MYIOC_s_WARN_FMT " Issuing HardReset!!\n", ioc->name);
+		printk(MYIOC_s_WARN_FMT "Issuing HardReset from %s!!\n",
+		    ioc->name, __func__);
 		if (mpt_HardResetHandler(ioc, CAN_SLEEP) < 0)
-			printk(MYIOC_s_WARN_FMT "TMHandler: HardReset "
+			printk(MYIOC_s_WARN_FMT "TaskMgmt HardReset "
 			    "FAILED!!\n", ioc->name);
-		return FAILED;
+		return 0;
 	}
 
 	if (ioc_raw_state & MPI_DOORBELL_ACTIVE) {
 		printk(MYIOC_s_WARN_FMT
-			"TM Handler for type=%x: ioc_state: "
+			"TaskMgmt type=%x: ioc_state: "
 			"DOORBELL_ACTIVE (0x%x)!\n",
 			ioc->name, type, ioc_raw_state);
 		return FAILED;
 	}
 
-	/* Isse the Task Mgmt request.
-	 */
-	if (hd->hard_resets < -1)
-		hd->hard_resets++;
-
-	rc = mptscsih_IssueTaskMgmt(hd, type, channel, id, lun,
-	    ctx2abort, timeout);
-	if (rc)
-		printk(MYIOC_s_INFO_FMT "Issue of TaskMgmt failed!\n",
-		       ioc->name);
-	else
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Issue of TaskMgmt Successful!\n",
-			   ioc->name));
-
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			"TMHandler rc = %d!\n", ioc->name, rc));
-
-	return rc;
-}
-
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_IssueTaskMgmt - Generic send Task Management function.
- *	@hd: Pointer to MPT_SCSI_HOST structure
- *	@type: Task Management type
- *	@channel: channel number for task management
- *	@id: Logical Target ID for reset (if appropriate)
- *	@lun: Logical Unit for reset (if appropriate)
- *	@ctx2abort: Context for the task to be aborted (if appropriate)
- *	@timeout: timeout for task management control
- *
- *	Remark: _HardResetHandler can be invoked from an interrupt thread (timer)
- *	or a non-interrupt thread.  In the former, must not call schedule().
- *
- *	Not all fields are meaningfull for all task types.
- *
- *	Returns 0 for SUCCESS, or FAILED.
- *
- **/
-static int
-mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout)
-{
-	MPT_FRAME_HDR	*mf;
-	SCSITaskMgmt_t	*pScsiTm;
-	int		 ii;
-	int		 retval;
-	MPT_ADAPTER 	*ioc = hd->ioc;
+	mutex_lock(&ioc->taskmgmt_cmds.mutex);
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+		mf = NULL;
+		retval = FAILED;
+		goto out;
+	}
 
 	/* Return Fail to calling function if no message frames available.
 	 */
 	if ((mf = mpt_get_msg_frame(ioc->TaskCtx, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "IssueTaskMgmt, no msg frames!!\n",
-				ioc->name));
-		return FAILED;
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"TaskMgmt no msg frames!!\n", ioc->name));
+		retval = FAILED;
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		goto out;
 	}
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt request @ %p\n",
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
 			ioc->name, mf));
 
 	/* Format the Request
@@ -1637,11 +1564,14 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, i
 
 	pScsiTm->TaskMsgContext = ctx2abort;
 
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt: ctx2abort (0x%08x) "
-		"type=%d\n", ioc->name, ctx2abort, type));
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt: ctx2abort (0x%08x) "
+		"task_type = 0x%02X, timeout = %ld\n", ioc->name, ctx2abort,
+		type, timeout));
 
 	DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)pScsiTm);
 
+	INITIALIZE_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	time_count = jiffies;
 	if ((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
 	    (ioc->facts.MsgVersion >= MPI_VERSION_01_05))
 		mpt_put_msg_frame_hi_pri(ioc->TaskCtx, ioc, mf);
@@ -1649,47 +1579,50 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, i
 		retval = mpt_send_handshake_request(ioc->TaskCtx, ioc,
 			sizeof(SCSITaskMgmt_t), (u32*)pScsiTm, CAN_SLEEP);
 		if (retval) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "send_handshake FAILED!"
-			" (hd %p, ioc %p, mf %p, rc=%d) \n", ioc->name, hd,
-			ioc, mf, retval));
-			goto fail_out;
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+				"TaskMgmt handshake FAILED!(mf=%p, rc=%d) \n",
+				ioc->name, mf, retval));
+			mpt_free_msg_frame(ioc, mf);
+			mpt_clear_taskmgmt_in_progress_flag(ioc);
+			goto out;
 		}
 	}
 
-	if(mptscsih_tm_wait_for_completion(hd, timeout) == FAILED) {
-		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "task management request TIMED OUT!"
-			" (hd %p, ioc %p, mf %p) \n", ioc->name, hd,
-			ioc, mf));
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling HardReset! \n",
-			 ioc->name));
-		retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "rc=%d \n",
-			 ioc->name, retval));
-		goto fail_out;
+	timeleft = wait_for_completion_timeout(&ioc->taskmgmt_cmds.done,
+		timeout*HZ);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		retval = FAILED;
+		dtmprintk(ioc, printk(MYIOC_s_ERR_FMT
+		    "TaskMgmt TIMED OUT!(mf=%p)\n", ioc->name, mf));
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		issue_hard_reset = 1;
+		goto out;
 	}
 
-	/*
-	 * Handle success case, see if theres a non-zero ioc_status.
-	 */
-	if (hd->tm_iocstatus == MPI_IOCSTATUS_SUCCESS ||
-	   hd->tm_iocstatus == MPI_IOCSTATUS_SCSI_TASK_TERMINATED ||
-	   hd->tm_iocstatus == MPI_IOCSTATUS_SCSI_IOC_TERMINATED)
-		retval = 0;
-	else
-		retval = FAILED;
+	retval = mptscsih_taskmgmt_reply(ioc, type,
+	    (SCSITaskMgmtReply_t *) ioc->taskmgmt_cmds.reply);
 
-	return retval;
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt completed (%d seconds)\n",
+	    ioc->name, jiffies_to_msecs(jiffies - time_count)/1000));
 
- fail_out:
+ out:
 
-	/*
-	 * Free task management mf, and corresponding tm flags
-	 */
-	mpt_free_msg_frame(ioc, mf);
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	return FAILED;
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	if (issue_hard_reset) {
+		printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+			ioc->name, __func__);
+		retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
+		mpt_free_msg_frame(ioc, mf);
+	}
+
+	retval = (retval == 0) ? 0 : FAILED;
+	mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+	return retval;
 }
+EXPORT_SYMBOL(mptscsih_IssueTaskMgmt);
 
 static int
 mptscsih_get_tm_timeout(MPT_ADAPTER *ioc)
@@ -1799,9 +1732,11 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 
 	hd->abortSCpnt = SCpnt;
 
-	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
-	    vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun,
-	    ctx2abort, mptscsih_get_tm_timeout(ioc));
+	retval = mptscsih_IssueTaskMgmt(hd,
+			 MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
+			 vdevice->vtarget->channel,
+			 vdevice->vtarget->id, vdevice->lun,
+			 ctx2abort, mptscsih_get_tm_timeout(ioc));
 
 	if (SCPNT_TO_LOOKUP_IDX(ioc, SCpnt) == scpnt_idx &&
 	    SCpnt->serial_number == sn)
@@ -1865,9 +1800,11 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
 		goto out;
 	}
 
-	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
-	    vdevice->vtarget->channel, vdevice->vtarget->id, 0, 0,
-	    mptscsih_get_tm_timeout(ioc));
+	retval = mptscsih_IssueTaskMgmt(hd,
+				MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
+				vdevice->vtarget->channel,
+				vdevice->vtarget->id, 0, 0,
+				mptscsih_get_tm_timeout(ioc));
 
  out:
 	printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n",
@@ -1914,8 +1851,10 @@ mptscsih_bus_reset(struct scsi_cmnd * SCpnt)
 		hd->timeouts++;
 
 	vdevice = SCpnt->device->hostdata;
-	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
-	    vdevice->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(ioc));
+	retval = mptscsih_IssueTaskMgmt(hd,
+					MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+					vdevice->vtarget->channel, 0, 0, 0,
+					mptscsih_get_tm_timeout(ioc));
 
 	printk(MYIOC_s_INFO_FMT "bus reset: %s (sc=%p)\n",
 	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
@@ -1976,65 +1915,55 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt)
 	return retval;
 }
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_tm_pending_wait - wait for pending task management request to complete
- *	@hd: Pointer to MPT host structure.
- *
- *	Returns {SUCCESS,FAILED}.
- */
 static int
-mptscsih_tm_pending_wait(MPT_SCSI_HOST * hd)
+mptscsih_taskmgmt_reply(MPT_ADAPTER *ioc, u8 type,
+	SCSITaskMgmtReply_t *pScsiTmReply)
 {
-	unsigned long  flags;
-	int            loop_count = 4 * 10;  /* Wait 10 seconds */
-	int            status = FAILED;
-	MPT_ADAPTER	*ioc = hd->ioc;
+	u16			 iocstatus;
+	u32			 termination_count;
+	int			 retval;
 
-	do {
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		if (hd->tmState == TM_STATE_NONE) {
-			hd->tmState = TM_STATE_IN_PROGRESS;
-			hd->tmPending = 1;
-			spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-			status = SUCCESS;
-			break;
-		}
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-		msleep(250);
-	} while (--loop_count);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		retval = FAILED;
+		goto out;
+	}
 
-	return status;
-}
+	DBG_DUMP_TM_REPLY_FRAME(ioc, (u32 *)pScsiTmReply);
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-/**
- *	mptscsih_tm_wait_for_completion - wait for completion of TM task
- *	@hd: Pointer to MPT host structure.
- *	@timeout: timeout value
- *
- *	Returns {SUCCESS,FAILED}.
- */
-static int
-mptscsih_tm_wait_for_completion(MPT_SCSI_HOST * hd, ulong timeout )
-{
-	unsigned long  flags;
-	int            loop_count = 4 * timeout;
-	int            status = FAILED;
-	MPT_ADAPTER	*ioc = hd->ioc;
+	iocstatus = le16_to_cpu(pScsiTmReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+	termination_count = le32_to_cpu(pScsiTmReply->TerminationCount);
 
-	do {
-		spin_lock_irqsave(&ioc->FreeQlock, flags);
-		if(hd->tmPending == 0) {
-			status = SUCCESS;
-			spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-			break;
-		}
-		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-		msleep(250);
-	} while (--loop_count);
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt fw_channel = %d, fw_id = %d, task_type = 0x%02X,\n"
+	    "\tiocstatus = 0x%04X, loginfo = 0x%08X, response_code = 0x%02X,\n"
+	    "\tterm_cmnds = %d\n", ioc->name, pScsiTmReply->Bus,
+	    pScsiTmReply->TargetID, type, le16_to_cpu(pScsiTmReply->IOCStatus),
+	    le32_to_cpu(pScsiTmReply->IOCLogInfo), pScsiTmReply->ResponseCode,
+	    termination_count));
+
+	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05 &&
+	    pScsiTmReply->ResponseCode)
+		mptscsih_taskmgmt_response_code(ioc,
+		    pScsiTmReply->ResponseCode);
 
-	return status;
+	if (iocstatus == MPI_IOCSTATUS_SUCCESS) {
+		retval = 0;
+		goto out;
+	}
+
+	retval = FAILED;
+	if (type == MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK) {
+		if (termination_count == 1)
+			retval = 0;
+		goto out;
+	}
+
+	if (iocstatus == MPI_IOCSTATUS_SCSI_TASK_TERMINATED ||
+	   iocstatus == MPI_IOCSTATUS_SCSI_IOC_TERMINATED)
+		retval = 0;
+
+ out:
+	return retval;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -2088,97 +2017,28 @@ mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code)
  *	Returns 1 indicating alloc'd request frame ptr should be freed.
  **/
 int
-mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
+mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf,
+	MPT_FRAME_HDR *mr)
 {
-	SCSITaskMgmtReply_t	*pScsiTmReply;
-	SCSITaskMgmt_t		*pScsiTmReq;
-	MPT_SCSI_HOST		*hd;
-	unsigned long		 flags;
-	u16			 iocstatus;
-	u8			 tmType;
-	u32			 termination_count;
-
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt completed (mf=%p,mr=%p)\n",
-	    ioc->name, mf, mr));
-	if (!ioc->sh) {
-		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
-		    "TaskMgmt Complete: NULL Scsi Host Ptr\n", ioc->name));
-		return 1;
-	}
-
-	if (mr == NULL) {
-		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
-		    "ERROR! TaskMgmt Reply: NULL Request %p\n", ioc->name, mf));
-		return 1;
-	}
-
-	hd = shost_priv(ioc->sh);
-	pScsiTmReply = (SCSITaskMgmtReply_t*)mr;
-	pScsiTmReq = (SCSITaskMgmt_t*)mf;
-	tmType = pScsiTmReq->TaskType;
-	iocstatus = le16_to_cpu(pScsiTmReply->IOCStatus) & MPI_IOCSTATUS_MASK;
-	termination_count = le32_to_cpu(pScsiTmReply->TerminationCount);
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"TaskMgmt completed (mf=%p, mr=%p)\n", ioc->name, mf, mr));
 
-	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05 &&
-	    pScsiTmReply->ResponseCode)
-		mptscsih_taskmgmt_response_code(ioc,
-		    pScsiTmReply->ResponseCode);
-	DBG_DUMP_TM_REPLY_FRAME(ioc, (u32 *)pScsiTmReply);
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 
-#ifdef CONFIG_FUSION_LOGGING
-	if ((ioc->debug_level & MPT_DEBUG_REPLY) ||
-				(ioc->debug_level & MPT_DEBUG_TM ))
-		printk("%s: ha=%d [%d:%d:0] task_type=0x%02X "
-			"iocstatus=0x%04X\n\tloginfo=0x%08X response_code=0x%02X "
-			"term_cmnds=%d\n", __func__, ioc->id, pScsiTmReply->Bus,
-			 pScsiTmReply->TargetID, pScsiTmReq->TaskType,
-			le16_to_cpu(pScsiTmReply->IOCStatus),
-			le32_to_cpu(pScsiTmReply->IOCLogInfo),pScsiTmReply->ResponseCode,
-			le32_to_cpu(pScsiTmReply->TerminationCount));
-#endif
-	if (!iocstatus) {
-		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT " TaskMgmt SUCCESS\n", ioc->name));
-			hd->abortSCpnt = NULL;
+	if (!mr)
 		goto out;
-	}
-
-	/* Error?  (anything non-zero?) */
-
-	/* clear flags and continue.
-	 */
-	switch (tmType) {
-
-	case MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK:
-		if (termination_count == 1)
-			iocstatus = MPI_IOCSTATUS_SCSI_TASK_TERMINATED;
-		hd->abortSCpnt = NULL;
-		break;
-
-	case MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS:
-
-		/* If an internal command is present
-		 * or the TM failed - reload the FW.
-		 * FC FW may respond FAILED to an ABORT
-		 */
-		if (iocstatus == MPI_IOCSTATUS_SCSI_TASK_MGMT_FAILED ||
-		    hd->cmdPtr)
-			if (mpt_HardResetHandler(ioc, NO_SLEEP) < 0)
-				printk(MYIOC_s_WARN_FMT " Firmware Reload FAILED!!\n", ioc->name);
-		break;
-
-	case MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
-	default:
-		break;
-	}
 
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	memcpy(ioc->taskmgmt_cmds.reply, mr,
+	    min(MPT_DEFAULT_FRAME_SIZE, 4 * mr->u.reply.MsgLength));
  out:
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
-	hd->tm_iocstatus = iocstatus;
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	return 1;
+	if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		ioc->taskmgmt_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->taskmgmt_cmds.done);
+		return 1;
+	}
+	return 0;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -2901,6 +2761,16 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io)
 	unsigned long	 timeleft;
 	unsigned long	 flags;
 
+	/* don't send internal command during diag reset */
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"%s: busy with host reset\n", ioc->name, __func__));
+		return MPT_SCANDV_BUSY;
+	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+
 	mutex_lock(&ioc->internal_cmds.mutex);
 
 	/* Set command specific information
@@ -3360,6 +3230,5 @@ EXPORT_SYMBOL(mptscsih_event_process);
 EXPORT_SYMBOL(mptscsih_ioc_reset);
 EXPORT_SYMBOL(mptscsih_change_queue_depth);
 EXPORT_SYMBOL(mptscsih_timer_expired);
-EXPORT_SYMBOL(mptscsih_TMHandler);
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 0b103a2..6ac5d4a 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -113,6 +113,8 @@ extern int mptscsih_resume(struct pci_dev *pdev);
 extern int mptscsih_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset, int length, int func);
 extern const char * mptscsih_info(struct Scsi_Host *SChost);
 extern int mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *));
+extern int mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel,
+	u8 id, int lun, int ctx2abort, ulong timeout);
 extern void mptscsih_slave_destroy(struct scsi_device *device);
 extern int mptscsih_slave_configure(struct scsi_device *device);
 extern int mptscsih_abort(struct scsi_cmnd * SCpnt);
@@ -127,7 +129,6 @@ extern int mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pE
 extern int mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
 extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth);
 extern void mptscsih_timer_expired(unsigned long data);
-extern int mptscsih_TMHandler(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, int ctx2abort, ulong timeout);
 extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern struct device_attribute *mptscsih_host_attrs[];
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 8f46fdf..e94c76d 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -1522,7 +1522,7 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * issue internal bus reset
 	 */
 	if (ioc->spi_data.bus_reset)
-		mptscsih_TMHandler(hd,
+		mptscsih_IssueTaskMgmt(hd,
 		    MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
 		    0, 0, 0, 0, 5);
 
-- 
cgit v0.10.2


From e7deff3374cc5951f914dcb3c66bc1b8de8a084e Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:46:07 +0530
Subject: [SCSI] mpt fusion: Adding DeviceResetCtx for internal Device reset
 frame

1.) 	Added taskmgmt_quiesce_io flag in IOC and removed resetPending from
	_MPT_SCSI_HOST struct.
2.)	Reset from Scsi mid layer and internal Reset are seperate context.
	Adding DeviceResetCtx for internal Device reset frame.
	mptsas_taskmgmt_complete is optimized as part of implementation.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index af862bf..ae203ec 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -6243,6 +6243,7 @@ mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	switch (reset_phase) {
 	case MPT_IOC_SETUP_RESET:
+		ioc->taskmgmt_quiesce_io = 1;
 		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
 		break;
@@ -6595,8 +6596,11 @@ mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
 	}
 	retval = 0;
 	ioc->taskmgmt_in_progress = 1;
-	if (ioc->alt_ioc)
+	ioc->taskmgmt_quiesce_io = 1;
+	if (ioc->alt_ioc) {
 		ioc->alt_ioc->taskmgmt_in_progress = 1;
+		ioc->alt_ioc->taskmgmt_quiesce_io = 1;
+	}
  out:
 	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 	return retval;
@@ -6615,8 +6619,11 @@ mpt_clear_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
 
 	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	ioc->taskmgmt_in_progress = 0;
-	if (ioc->alt_ioc)
+	ioc->taskmgmt_quiesce_io = 0;
+	if (ioc->alt_ioc) {
 		ioc->alt_ioc->taskmgmt_in_progress = 0;
+		ioc->alt_ioc->taskmgmt_quiesce_io = 0;
+	}
 	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 }
 EXPORT_SYMBOL(mpt_clear_taskmgmt_in_progress_flag);
@@ -6731,9 +6738,11 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 
 	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	ioc->ioc_reset_in_progress = 0;
+	ioc->taskmgmt_quiesce_io = 0;
 	ioc->taskmgmt_in_progress = 0;
 	if (ioc->alt_ioc) {
 		ioc->alt_ioc->ioc_reset_in_progress = 0;
+		ioc->alt_ioc->taskmgmt_quiesce_io = 0;
 		ioc->alt_ioc->taskmgmt_in_progress = 0;
 	}
 	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 2129aff..a0bf7d8 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -713,6 +713,7 @@ typedef struct _MPT_ADAPTER
 	MPT_MGMT		 taskmgmt_cmds;
 	spinlock_t		 taskmgmt_lock; /* diagnostic reset lock */
 	int			 taskmgmt_in_progress;
+	u8			 taskmgmt_quiesce_io;
 	u8			 ioc_reset_in_progress;
 	struct work_struct	 sas_persist_task;
 
@@ -855,7 +856,6 @@ typedef struct _MPT_SCSI_HOST {
 		 * OS callbacks. freeQ is the free pool.
 		 */
 	u8			  tmPending;
-	u8			  resetPending;
 	u8			  negoNvram;		/* DV disabled, nego NVRAM */
 	u8			  pad1;
 	u8                        tmState;
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index d093871..a53b332 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -1292,7 +1292,6 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	hd->tmPending = 0;
 	hd->tmState = TM_STATE_NONE;
-	hd->resetPending = 0;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 16c4232..3efa728 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -93,6 +93,7 @@ static u8	mptsasDoneCtx = MPT_MAX_PROTOCOL_DRIVERS;
 static u8	mptsasTaskCtx = MPT_MAX_PROTOCOL_DRIVERS;
 static u8	mptsasInternalCtx = MPT_MAX_PROTOCOL_DRIVERS; /* Used only for internal commands */
 static u8	mptsasMgmtCtx = MPT_MAX_PROTOCOL_DRIVERS;
+static u8	mptsasDeviceResetCtx = MPT_MAX_PROTOCOL_DRIVERS;
 
 static void mptsas_hotplug_work(struct work_struct *work);
 
@@ -523,10 +524,12 @@ mptsas_find_vtarget(MPT_ADAPTER *ioc, u8 channel, u8 id)
 	VirtTarget 			*vtarget = NULL;
 
 	shost_for_each_device(sdev, ioc->sh) {
-		if ((vdevice = sdev->hostdata) == NULL)
+		vdevice = sdev->hostdata;
+		if ((vdevice == NULL) ||
+			(vdevice->vtarget == NULL))
 			continue;
 		if (vdevice->vtarget->id == id &&
-		    vdevice->vtarget->channel == channel)
+			vdevice->vtarget->channel == channel)
 			vtarget = vdevice->vtarget;
 	}
 	return vtarget;
@@ -551,9 +554,11 @@ mptsas_target_reset(MPT_ADAPTER *ioc, u8 channel, u8 id)
 	MPT_FRAME_HDR	*mf;
 	SCSITaskMgmt_t	*pScsiTm;
 
-	if ((mf = mpt_get_msg_frame(ioc->TaskCtx, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, no msg frames @%d!!\n",
-		    ioc->name,__func__, __LINE__));
+	mf = mpt_get_msg_frame(mptsasDeviceResetCtx, ioc);
+	if (mf == NULL) {
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"%s, no msg frames @%d!!\n",
+			ioc->name, __func__, __LINE__));
 		return 0;
 	}
 
@@ -569,7 +574,7 @@ mptsas_target_reset(MPT_ADAPTER *ioc, u8 channel, u8 id)
 
 	DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)mf);
 
-	mpt_put_msg_frame_hi_pri(ioc->TaskCtx, ioc, mf);
+	mpt_put_msg_frame_hi_pri(mptsasDeviceResetCtx, ioc, mf);
 
 	return 1;
 }
@@ -605,8 +610,9 @@ mptsas_target_reset_queue(MPT_ADAPTER *ioc,
 	target_reset_list = kzalloc(sizeof(*target_reset_list),
 	    GFP_ATOMIC);
 	if (!target_reset_list) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, failed to allocate mem @%d..!!\n",
-		    ioc->name,__func__, __LINE__));
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"%s, failed to allocate mem @%d..!!\n",
+			ioc->name, __func__, __LINE__));
 		return;
 	}
 
@@ -614,55 +620,94 @@ mptsas_target_reset_queue(MPT_ADAPTER *ioc,
 		sizeof(*sas_event_data));
 	list_add_tail(&target_reset_list->list, &hd->target_reset_list);
 
-	if (hd->resetPending)
-		return;
+	target_reset_list->time_count = jiffies;
 
 	if (mptsas_target_reset(ioc, channel, id)) {
 		target_reset_list->target_reset_issued = 1;
-		hd->resetPending = 1;
 	}
 }
 
 /**
- * mptsas_dev_reset_complete
- *
- * Completion for TARGET_RESET after NOT_RESPONDING_EVENT,
- * enable work queue to finish off removing device from upper layers.
- * then send next TARGET_RESET in the queue.
- *
- * @ioc
+ *	mptsas_taskmgmt_complete - Completion for TARGET_RESET after
+ *	NOT_RESPONDING_EVENT, enable work queue to finish off removing device
+ *	from upper layers. then send next TARGET_RESET in the queue.
+ *	@ioc: Pointer to MPT_ADAPTER structure
  *
  **/
-static void
-mptsas_dev_reset_complete(MPT_ADAPTER *ioc)
+static int
+mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 {
 	MPT_SCSI_HOST	*hd = shost_priv(ioc->sh);
         struct list_head *head = &hd->target_reset_list;
-	struct mptsas_target_reset_event *target_reset_list;
 	struct mptsas_hotplug_event *ev;
 	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data;
 	u8		id, channel;
 	__le64		sas_address;
+	struct mptsas_target_reset_event	*target_reset_list;
+	SCSITaskMgmtReply_t *pScsiTmReply;
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt completed: "
+	    "(mf = %p, mr = %p)\n", ioc->name, mf, mr));
+
+	pScsiTmReply = (SCSITaskMgmtReply_t *)mr;
+	if (pScsiTmReply) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "\tTaskMgmt completed: fw_channel = %d, fw_id = %d,\n"
+		    "\ttask_type = 0x%02X, iocstatus = 0x%04X "
+		    "loginfo = 0x%08X,\n\tresponse_code = 0x%02X, "
+		    "term_cmnds = %d\n", ioc->name,
+		    pScsiTmReply->Bus, pScsiTmReply->TargetID,
+		    pScsiTmReply->TaskType,
+		    le16_to_cpu(pScsiTmReply->IOCStatus),
+		    le32_to_cpu(pScsiTmReply->IOCLogInfo),
+		    pScsiTmReply->ResponseCode,
+		    le32_to_cpu(pScsiTmReply->TerminationCount)));
+
+		if (pScsiTmReply->ResponseCode)
+			mptscsih_taskmgmt_response_code(ioc,
+			pScsiTmReply->ResponseCode);
+	}
+
+	if (pScsiTmReply && (pScsiTmReply->TaskType ==
+	    MPI_SCSITASKMGMT_TASKTYPE_QUERY_TASK || pScsiTmReply->TaskType ==
+	     MPI_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET)) {
+		ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+		ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+		memcpy(ioc->taskmgmt_cmds.reply, mr,
+		    min(MPT_DEFAULT_FRAME_SIZE, 4 * mr->u.reply.MsgLength));
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->taskmgmt_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+			complete(&ioc->taskmgmt_cmds.done);
+			return 1;
+		}
+		return 0;
+	}
+
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
 
 	if (list_empty(head))
-		return;
+		return 1;
+
+	target_reset_list = list_entry(head->next,
+	    struct mptsas_target_reset_event, list);
 
-	target_reset_list = list_entry(head->next, struct mptsas_target_reset_event, list);
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt: completed (%d seconds)\n",
+	    ioc->name, jiffies_to_msecs(jiffies -
+	    target_reset_list->time_count)/1000));
 
 	sas_event_data = &target_reset_list->sas_event_data;
-	id = sas_event_data->TargetID;
-	channel = sas_event_data->Bus;
-	hd->resetPending = 0;
+	id = pScsiTmReply->TargetID;
+	channel = pScsiTmReply->Bus;
+	target_reset_list->time_count = jiffies;
 
 	/*
 	 * retry target reset
 	 */
 	if (!target_reset_list->target_reset_issued) {
-		if (mptsas_target_reset(ioc, channel, id)) {
+		if (mptsas_target_reset(ioc, channel, id))
 			target_reset_list->target_reset_issued = 1;
-			hd->resetPending = 1;
-		}
-		return;
+		return 1;
 	}
 
 	/*
@@ -674,7 +719,7 @@ mptsas_dev_reset_complete(MPT_ADAPTER *ioc)
 	if (!ev) {
 		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, failed to allocate mem @%d..!!\n",
 		    ioc->name,__func__, __LINE__));
-		return;
+		return 0;
 	}
 
 	INIT_WORK(&ev->work, mptsas_hotplug_work);
@@ -693,40 +738,26 @@ mptsas_dev_reset_complete(MPT_ADAPTER *ioc)
 	schedule_work(&ev->work);
 	kfree(target_reset_list);
 
+
 	/*
 	 * issue target reset to next device in the queue
 	 */
 
 	head = &hd->target_reset_list;
 	if (list_empty(head))
-		return;
+		return 1;
 
 	target_reset_list = list_entry(head->next, struct mptsas_target_reset_event,
 	    list);
 
-	sas_event_data = &target_reset_list->sas_event_data;
-	id = sas_event_data->TargetID;
-	channel = sas_event_data->Bus;
+	id = target_reset_list->sas_event_data.TargetID;
+	channel = target_reset_list->sas_event_data.Bus;
+	target_reset_list->time_count = jiffies;
 
-	if (mptsas_target_reset(ioc, channel, id)) {
+	if (mptsas_target_reset(ioc, channel, id))
 		target_reset_list->target_reset_issued = 1;
-		hd->resetPending = 1;
-	}
-}
 
-/**
- * mptsas_taskmgmt_complete
- *
- * @ioc
- * @mf
- * @mr
- *
- **/
-static int
-mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
-{
-	mptsas_dev_reset_complete(ioc);
-	return mptscsih_taskmgmt_complete(ioc, mf, mr);
+	return 1;
 }
 
 /**
@@ -3262,7 +3293,6 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	hd->tmPending = 0;
 	hd->tmState = TM_STATE_NONE;
-	hd->resetPending = 0;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
@@ -3381,10 +3411,12 @@ mptsas_init(void)
 		return -ENODEV;
 
 	mptsasDoneCtx = mpt_register(mptscsih_io_done, MPTSAS_DRIVER);
-	mptsasTaskCtx = mpt_register(mptsas_taskmgmt_complete, MPTSAS_DRIVER);
+	mptsasTaskCtx = mpt_register(mptscsih_taskmgmt_complete, MPTSAS_DRIVER);
 	mptsasInternalCtx =
 		mpt_register(mptscsih_scandv_complete, MPTSAS_DRIVER);
 	mptsasMgmtCtx = mpt_register(mptsas_mgmt_done, MPTSAS_DRIVER);
+	mptsasDeviceResetCtx =
+		mpt_register(mptsas_taskmgmt_complete, MPTSAS_DRIVER);
 
 	mpt_event_register(mptsasDoneCtx, mptsas_event_process);
 	mpt_reset_register(mptsasDoneCtx, mptsas_ioc_reset);
@@ -3409,6 +3441,7 @@ mptsas_exit(void)
 	mpt_deregister(mptsasInternalCtx);
 	mpt_deregister(mptsasTaskCtx);
 	mpt_deregister(mptsasDoneCtx);
+	mpt_deregister(mptsasDeviceResetCtx);
 }
 
 module_init(mptsas_init);
diff --git a/drivers/message/fusion/mptsas.h b/drivers/message/fusion/mptsas.h
index 2b544e0..bf528a5 100644
--- a/drivers/message/fusion/mptsas.h
+++ b/drivers/message/fusion/mptsas.h
@@ -53,6 +53,7 @@ struct mptsas_target_reset_event {
 	struct list_head 	list;
 	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE sas_event_data;
 	u8	target_reset_issued;
+	unsigned long	 time_count;
 };
 
 enum mptsas_hotplug_action {
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 2463731..a6a2bbd 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -99,7 +99,7 @@ int	mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id,
 int		mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
 int		mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply);
 
-static void
+void
 mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code);
 static int	mptscsih_get_completion_code(MPT_ADAPTER *ioc,
 		MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply);
@@ -1304,7 +1304,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "qcmd: SCpnt=%p, done()=%p\n",
 		ioc->name, SCpnt, done));
 
-	if (hd->resetPending) {
+	if (ioc->taskmgmt_quiesce_io) {
 		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT "qcmd: SCpnt=%p timeout + 60HZ\n",
 			ioc->name, SCpnt));
 		return SCSI_MLQUEUE_HOST_BUSY;
@@ -1709,11 +1709,6 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 		goto out;
 	}
 
-	if (hd->resetPending) {
-		retval = FAILED;
-		goto out;
-	}
-
 	if (hd->timeouts < -1)
 		hd->timeouts++;
 
@@ -1782,11 +1777,6 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
 	       ioc->name, SCpnt);
 	scsi_print_command(SCpnt);
 
-	if (hd->resetPending) {
-		retval = FAILED;
-		goto out;
-	}
-
 	vdevice = SCpnt->device->hostdata;
 	if (!vdevice || !vdevice->vtarget) {
 		retval = 0;
@@ -1967,7 +1957,7 @@ mptscsih_taskmgmt_reply(MPT_ADAPTER *ioc, u8 type,
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-static void
+void
 mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code)
 {
 	char *desc;
@@ -2001,6 +1991,7 @@ mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code)
 	printk(MYIOC_s_INFO_FMT "Response Code(0x%08x): F/W: %s\n",
 		ioc->name, response_code, desc);
 }
+EXPORT_SYMBOL(mptscsih_taskmgmt_response_code);
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
@@ -2442,12 +2433,10 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 	case MPT_IOC_SETUP_RESET:
 		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
-		hd->resetPending = 1;
 		break;
 	case MPT_IOC_PRE_RESET:
 		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
-		hd->resetPending = 0;
 		mptscsih_flush_running_cmds(hd);
 		break;
 	case MPT_IOC_POST_RESET:
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 6ac5d4a..91e9e9f 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -132,3 +132,4 @@ extern void mptscsih_timer_expired(unsigned long data);
 extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern struct device_attribute *mptscsih_host_attrs[];
+extern void mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code);
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index e94c76d..8b94074 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -1476,7 +1476,6 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	hd->tmPending = 0;
 	hd->tmState = TM_STATE_NONE;
-	hd->resetPending = 0;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
-- 
cgit v0.10.2


From ea2a788de4ce5ebab09276e25443f55592af2335 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:46:50 +0530
Subject: [SCSI] mpt fusion: rewrite of ioctl_cmds internal generated function

1) rewrite of ioctl_cmds internal generated function that issue commands to
firmware, porting them to be single threaded using the generic MPT_MGMT
struct. All wait Queues are replace by completion Queue.
2) added seperate callback handler for ioctl task managment
(mptctl_taskmgmt_reply), to handle command that timeout
3) rewrite mptctl_bus_reset

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index a0bf7d8..24d6012 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -434,18 +434,6 @@ do { \
 
 #define MPTCTL_RESET_OK			0x01	/* Issue Bus Reset */
 
-typedef struct _MPT_IOCTL {
-	struct _MPT_ADAPTER	*ioc;
-	u8			 ReplyFrame[MPT_DEFAULT_FRAME_SIZE];	/* reply frame data */
-	u8			 sense[MPT_SENSE_BUFFER_ALLOC];
-	int			 wait_done;	/* wake-up value for this ioc */
-	u8			 rsvd;
-	u8			 status;	/* current command status */
-	u8			 reset;		/* 1 if bus reset allowed */
-	u8			 id;		/* target for reset */
-	struct mutex		 ioctl_mutex;
-} MPT_IOCTL;
-
 #define MPT_MGMT_STATUS_RF_VALID	0x01	/* The Reply Frame is VALID */
 #define MPT_MGMT_STATUS_COMMAND_GOOD	0x02	/* Command Status GOOD */
 #define MPT_MGMT_STATUS_PENDING		0x04	/* command is pending */
@@ -460,6 +448,10 @@ typedef struct _MPT_IOCTL {
 	status = MPT_MGMT_STATUS_PENDING;
 #define CLEAR_MGMT_STATUS(status) \
 	status = 0;
+#define CLEAR_MGMT_PENDING_STATUS(status) \
+	status &= ~MPT_MGMT_STATUS_PENDING;
+#define SET_MGMT_MSG_CONTEXT(msg_context, value) \
+	msg_context = value;
 
 typedef struct _MPT_MGMT {
 	struct mutex		 mutex;
@@ -468,6 +460,7 @@ typedef struct _MPT_MGMT {
 	u8			 sense[MPT_SENSE_BUFFER_ALLOC];
 	u8			 status;	/* current command status */
 	int			 completion_code;
+	u32			 msg_context;
 } MPT_MGMT;
 
 /*
@@ -654,7 +647,6 @@ typedef struct _MPT_ADAPTER
 	RaidCfgData		raid_data;	/* Raid config. data */
 	SasCfgData		sas_data;	/* Sas config. data */
 	FcCfgData		fc_data;	/* Fc config. data */
-	MPT_IOCTL		*ioctl;		/* ioctl data pointer */
 	struct proc_dir_entry	*ioc_dentry;
 	struct _MPT_ADAPTER	*alt_ioc;	/* ptr to 929 bound adapter port */
 	u32			 biosVersion;	/* BIOS version from IO Unit Page 2 */
@@ -711,6 +703,7 @@ typedef struct _MPT_ADAPTER
 	MPT_MGMT		 mptbase_cmds; /* for sending config pages */
 	MPT_MGMT		 internal_cmds;
 	MPT_MGMT		 taskmgmt_cmds;
+	MPT_MGMT		 ioctl_cmds;
 	spinlock_t		 taskmgmt_lock; /* diagnostic reset lock */
 	int			 taskmgmt_in_progress;
 	u8			 taskmgmt_quiesce_io;
@@ -855,10 +848,8 @@ typedef struct _MPT_SCSI_HOST {
 		/* Pool of memory for holding SCpnts before doing
 		 * OS callbacks. freeQ is the free pool.
 		 */
-	u8			  tmPending;
 	u8			  negoNvram;		/* DV disabled, nego NVRAM */
 	u8			  pad1;
-	u8                        tmState;
 	u8			  rsvd[2];
 	MPT_FRAME_HDR		 *cmdPtr;		/* Ptr to nonOS request */
 	struct scsi_cmnd	 *abortSCpnt;
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 22b75cb..ab62013 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -84,6 +84,7 @@ MODULE_VERSION(my_VERSION);
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 
 static u8 mptctl_id = MPT_MAX_PROTOCOL_DRIVERS;
+static u8 mptctl_taskmgmt_id = MPT_MAX_PROTOCOL_DRIVERS;
 
 static DECLARE_WAIT_QUEUE_HEAD ( mptctl_wait );
 
@@ -127,10 +128,7 @@ static MptSge_t *kbuf_alloc_2_sgl(int bytes, u32 dir, int sge_offset, int *frags
 		struct buflist **blp, dma_addr_t *sglbuf_dma, MPT_ADAPTER *ioc);
 static void kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma,
 		struct buflist *buflist, MPT_ADAPTER *ioc);
-static void mptctl_timeout_expired (MPT_IOCTL *ioctl);
-static int  mptctl_bus_reset(MPT_IOCTL *ioctl);
-static int mptctl_set_tm_flags(MPT_SCSI_HOST *hd);
-static void mptctl_free_tm_flags(MPT_ADAPTER *ioc);
+static int mptctl_bus_reset(MPT_ADAPTER *ioc, u8 function);
 
 /*
  * Reset Handler cleanup function
@@ -183,10 +181,10 @@ mptctl_syscall_down(MPT_ADAPTER *ioc, int nonblock)
 	int rc = 0;
 
 	if (nonblock) {
-		if (!mutex_trylock(&ioc->ioctl->ioctl_mutex))
+		if (!mutex_trylock(&ioc->ioctl_cmds.mutex))
 			rc = -EAGAIN;
 	} else {
-		if (mutex_lock_interruptible(&ioc->ioctl->ioctl_mutex))
+		if (mutex_lock_interruptible(&ioc->ioctl_cmds.mutex))
 			rc = -ERESTARTSYS;
 	}
 	return rc;
@@ -202,100 +200,78 @@ mptctl_syscall_down(MPT_ADAPTER *ioc, int nonblock)
 static int
 mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
 {
-	char *sense_data;
-	int sz, req_index;
-	u16 iocStatus;
-	u8 cmd;
+	char	*sense_data;
+	int	req_index;
+	int	sz;
 
-	if (req)
-		 cmd = req->u.hdr.Function;
-	else
-		return 1;
-	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "\tcompleting mpi function (0x%02X), req=%p, "
-	    "reply=%p\n", ioc->name,  req->u.hdr.Function, req, reply));
-
-	if (ioc->ioctl) {
-
-		if (reply==NULL) {
-
-			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_reply() NULL Reply "
-				"Function=%x!\n", ioc->name, cmd));
+	if (!req)
+		return 0;
 
-			ioc->ioctl->status |= MPT_MGMT_STATUS_COMMAND_GOOD;
-			ioc->ioctl->reset &= ~MPTCTL_RESET_OK;
+	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "completing mpi function "
+	    "(0x%02X), req=%p, reply=%p\n", ioc->name,  req->u.hdr.Function,
+	    req, reply));
 
-			/* We are done, issue wake up
-	 		*/
-			ioc->ioctl->wait_done = 1;
-			wake_up (&mptctl_wait);
-			return 1;
+	/*
+	 * Handling continuation of the same reply. Processing the first
+	 * reply, and eating the other replys that come later.
+	 */
+	if (ioc->ioctl_cmds.msg_context != req->u.hdr.MsgContext)
+		goto out_continuation;
 
-		}
+	ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
 
-		/* Copy the reply frame (which much exist
-		 * for non-SCSI I/O) to the IOC structure.
-		 */
-		memcpy(ioc->ioctl->ReplyFrame, reply,
-			min(ioc->reply_sz, 4*reply->u.reply.MsgLength));
-		ioc->ioctl->status |= MPT_MGMT_STATUS_RF_VALID;
+	if (!reply)
+		goto out;
 
-		/* Set the command status to GOOD if IOC Status is GOOD
-		 * OR if SCSI I/O cmd and data underrun or recovered error.
-		 */
-		iocStatus = le16_to_cpu(reply->u.reply.IOCStatus) & MPI_IOCSTATUS_MASK;
-		if (iocStatus  == MPI_IOCSTATUS_SUCCESS)
-			ioc->ioctl->status |= MPT_MGMT_STATUS_COMMAND_GOOD;
-
-		if (iocStatus || reply->u.reply.IOCLogInfo)
-			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "\tiocstatus (0x%04X), "
-				"loginfo (0x%08X)\n", ioc->name,
-				iocStatus,
-				le32_to_cpu(reply->u.reply.IOCLogInfo)));
-
-		if ((cmd == MPI_FUNCTION_SCSI_IO_REQUEST) ||
-			(cmd == MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
-
-			if (reply->u.sreply.SCSIStatus || reply->u.sreply.SCSIState)
-				dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-					"\tscsi_status (0x%02x), scsi_state (0x%02x), "
-					"tag = (0x%04x), transfer_count (0x%08x)\n", ioc->name,
-					reply->u.sreply.SCSIStatus,
-					reply->u.sreply.SCSIState,
-					le16_to_cpu(reply->u.sreply.TaskTag),
-					le32_to_cpu(reply->u.sreply.TransferCount)));
-
-			ioc->ioctl->reset &= ~MPTCTL_RESET_OK;
-
-			if ((iocStatus == MPI_IOCSTATUS_SCSI_DATA_UNDERRUN) ||
-			(iocStatus == MPI_IOCSTATUS_SCSI_RECOVERED_ERROR)) {
-				ioc->ioctl->status |=
-					MPT_MGMT_STATUS_COMMAND_GOOD;
-			}
-		}
+	ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	sz = min(ioc->reply_sz, 4*reply->u.reply.MsgLength);
+	memcpy(ioc->ioctl_cmds.reply, reply, sz);
 
-		/* Copy the sense data - if present
-		 */
-		if ((cmd == MPI_FUNCTION_SCSI_IO_REQUEST) &&
-			(reply->u.sreply.SCSIState &
-			 MPI_SCSI_STATE_AUTOSENSE_VALID)){
+	if (reply->u.reply.IOCStatus || reply->u.reply.IOCLogInfo)
+		dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "iocstatus (0x%04X), loginfo (0x%08X)\n", ioc->name,
+		    le16_to_cpu(reply->u.reply.IOCStatus),
+		    le32_to_cpu(reply->u.reply.IOCLogInfo)));
+
+	if ((req->u.hdr.Function == MPI_FUNCTION_SCSI_IO_REQUEST) ||
+		(req->u.hdr.Function ==
+		 MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
+
+		if (reply->u.sreply.SCSIStatus || reply->u.sreply.SCSIState)
+			dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"scsi_status (0x%02x), scsi_state (0x%02x), "
+			"tag = (0x%04x), transfer_count (0x%08x)\n", ioc->name,
+			reply->u.sreply.SCSIStatus,
+			reply->u.sreply.SCSIState,
+			le16_to_cpu(reply->u.sreply.TaskTag),
+			le32_to_cpu(reply->u.sreply.TransferCount)));
+
+		if (reply->u.sreply.SCSIState &
+			MPI_SCSI_STATE_AUTOSENSE_VALID) {
 			sz = req->u.scsireq.SenseBufferLength;
 			req_index =
 			    le16_to_cpu(req->u.frame.hwhdr.msgctxu.fld.req_idx);
-			sense_data =
-			    ((u8 *)ioc->sense_buf_pool +
+			sense_data = ((u8 *)ioc->sense_buf_pool +
 			     (req_index * MPT_SENSE_BUFFER_ALLOC));
-			memcpy(ioc->ioctl->sense, sense_data, sz);
-			ioc->ioctl->status |= MPT_MGMT_STATUS_SENSE_VALID;
+			memcpy(ioc->ioctl_cmds.sense, sense_data, sz);
+			ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_SENSE_VALID;
 		}
+	}
 
-		if (cmd == MPI_FUNCTION_SCSI_TASK_MGMT)
-			mptctl_free_tm_flags(ioc);
-
-		/* We are done, issue wake up
-		 */
-		ioc->ioctl->wait_done = 1;
-		wake_up (&mptctl_wait);
+ out:
+	/* We are done, issue wake up
+	 */
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_PENDING) {
+		if (req->u.hdr.Function == MPI_FUNCTION_SCSI_TASK_MGMT)
+			mpt_clear_taskmgmt_in_progress_flag(ioc);
+		ioc->ioctl_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->ioctl_cmds.done);
 	}
+
+ out_continuation:
+	if (reply && (reply->u.reply.MsgFlags &
+	    MPI_MSGFLAGS_CONTINUATION_REPLY))
+		return 0;
 	return 1;
 }
 
@@ -305,30 +281,66 @@ mptctl_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req, MPT_FRAME_HDR *reply)
  * Expecting an interrupt, however timed out.
  *
  */
-static void mptctl_timeout_expired (MPT_IOCTL *ioctl)
+static void
+mptctl_timeout_expired(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 {
-	int rc = 1;
+	unsigned long flags;
 
-	if (ioctl == NULL)
-		return;
-	dctlprintk(ioctl->ioc,
-		   printk(MYIOC_s_DEBUG_FMT ": Timeout Expired! Host %d\n",
-		   ioctl->ioc->name, ioctl->ioc->id));
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": %s\n",
+		ioc->name, __func__));
 
-	ioctl->wait_done = 0;
-	if (ioctl->reset & MPTCTL_RESET_OK)
-		rc = mptctl_bus_reset(ioctl);
+	if (mpt_fwfault_debug)
+		mpt_halt_firmware(ioc);
 
-	if (rc) {
-		/* Issue a reset for this device.
-		 * The IOC is not responding.
-		 */
-		dctlprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT "Calling HardReset! \n",
-			 ioctl->ioc->name));
-		mpt_HardResetHandler(ioctl->ioc, CAN_SLEEP);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		CLEAR_MGMT_PENDING_STATUS(ioc->ioctl_cmds.status)
+		mpt_free_msg_frame(ioc, mf);
+		return;
 	}
-	return;
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+
 
+	if (!mptctl_bus_reset(ioc, mf->u.hdr.Function))
+		return;
+
+	/* Issue a reset for this device.
+	 * The IOC is not responding.
+	 */
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling HardReset! \n",
+		 ioc->name));
+	CLEAR_MGMT_PENDING_STATUS(ioc->ioctl_cmds.status)
+	mpt_HardResetHandler(ioc, CAN_SLEEP);
+	mpt_free_msg_frame(ioc, mf);
+}
+
+static int
+mptctl_taskmgmt_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
+{
+	if (!mf)
+		return 0;
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"TaskMgmt completed (mf=%p, mr=%p)\n",
+		ioc->name, mf, mr));
+
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_COMMAND_GOOD;
+
+	if (!mr)
+		goto out;
+
+	ioc->taskmgmt_cmds.status |= MPT_MGMT_STATUS_RF_VALID;
+	memcpy(ioc->taskmgmt_cmds.reply, mr,
+	    min(MPT_DEFAULT_FRAME_SIZE, 4 * mr->u.reply.MsgLength));
+ out:
+	if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		ioc->taskmgmt_cmds.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->taskmgmt_cmds.done);
+		return 1;
+	}
+	return 0;
 }
 
 /* mptctl_bus_reset
@@ -336,133 +348,150 @@ static void mptctl_timeout_expired (MPT_IOCTL *ioctl)
  * Bus reset code.
  *
  */
-static int mptctl_bus_reset(MPT_IOCTL *ioctl)
+static int mptctl_bus_reset(MPT_ADAPTER *ioc, u8 function)
 {
 	MPT_FRAME_HDR	*mf;
 	SCSITaskMgmt_t	*pScsiTm;
-	MPT_SCSI_HOST	*hd;
+	SCSITaskMgmtReply_t *pScsiTmReply;
 	int		 ii;
-	int		 retval=0;
-
-
-	ioctl->reset &= ~MPTCTL_RESET_OK;
-
-	if (ioctl->ioc->sh == NULL)
+	int		 retval;
+	unsigned long	 timeout;
+	unsigned long	 time_count;
+	u16		 iocstatus;
+
+	/* bus reset is only good for SCSI IO, RAID PASSTHRU */
+	if (!(function == MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH) ||
+	    (function == MPI_FUNCTION_SCSI_IO_REQUEST)) {
+		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"TaskMgmt, not SCSI_IO!!\n", ioc->name));
 		return -EPERM;
+	}
 
-	hd = shost_priv(ioctl->ioc->sh);
-	if (hd == NULL)
+	mutex_lock(&ioc->taskmgmt_cmds.mutex);
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+		mutex_unlock(&ioc->taskmgmt_cmds.mutex);
 		return -EPERM;
+	}
 
-	/* Single threading ....
-	 */
-	if (mptctl_set_tm_flags(hd) != 0)
-		return -EPERM;
+	retval = 0;
 
 	/* Send request
 	 */
-	if ((mf = mpt_get_msg_frame(mptctl_id, ioctl->ioc)) == NULL) {
-		dtmprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt, no msg frames!!\n",
-				ioctl->ioc->name));
-
-		mptctl_free_tm_flags(ioctl->ioc);
-		return -ENOMEM;
+	mf = mpt_get_msg_frame(mptctl_taskmgmt_id, ioc);
+	if (mf == NULL) {
+		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"TaskMgmt, no msg frames!!\n", ioc->name));
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		retval = -ENOMEM;
+		goto mptctl_bus_reset_done;
 	}
 
-	dtmprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT "IssueTaskMgmt request @ %p\n",
-			ioctl->ioc->name, mf));
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
+		ioc->name, mf));
 
 	pScsiTm = (SCSITaskMgmt_t *) mf;
-	pScsiTm->TargetID = ioctl->id;
-	pScsiTm->Bus = hd->port;	/* 0 */
-	pScsiTm->ChainOffset = 0;
+	memset(pScsiTm, 0, sizeof(SCSITaskMgmt_t));
 	pScsiTm->Function = MPI_FUNCTION_SCSI_TASK_MGMT;
-	pScsiTm->Reserved = 0;
 	pScsiTm->TaskType = MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS;
-	pScsiTm->Reserved1 = 0;
 	pScsiTm->MsgFlags = MPI_SCSITASKMGMT_MSGFLAGS_LIPRESET_RESET_OPTION;
-
+	pScsiTm->TargetID = 0;
+	pScsiTm->Bus = 0;
+	pScsiTm->ChainOffset = 0;
+	pScsiTm->Reserved = 0;
+	pScsiTm->Reserved1 = 0;
+	pScsiTm->TaskMsgContext = 0;
 	for (ii= 0; ii < 8; ii++)
 		pScsiTm->LUN[ii] = 0;
-
 	for (ii=0; ii < 7; ii++)
 		pScsiTm->Reserved2[ii] = 0;
 
-	pScsiTm->TaskMsgContext = 0;
-	dtmprintk(ioctl->ioc, printk(MYIOC_s_DEBUG_FMT
-		"mptctl_bus_reset: issued.\n", ioctl->ioc->name));
-
-	DBG_DUMP_TM_REQUEST_FRAME(ioctl->ioc, (u32 *)mf);
+	switch (ioc->bus_type) {
+	case FC:
+		timeout = 40;
+		break;
+	case SAS:
+		timeout = 30;
+		break;
+	case SPI:
+	default:
+		timeout = 2;
+		break;
+	}
 
-	ioctl->wait_done=0;
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		"TaskMgmt type=%d timeout=%ld\n",
+		ioc->name, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS, timeout));
 
-	if ((ioctl->ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
-	    (ioctl->ioc->facts.MsgVersion >= MPI_VERSION_01_05))
-		mpt_put_msg_frame_hi_pri(mptctl_id, ioctl->ioc, mf);
+	INITIALIZE_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	time_count = jiffies;
+	if ((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
+	    (ioc->facts.MsgVersion >= MPI_VERSION_01_05))
+		mpt_put_msg_frame_hi_pri(mptctl_taskmgmt_id, ioc, mf);
 	else {
-		retval = mpt_send_handshake_request(mptctl_id, ioctl->ioc,
-			sizeof(SCSITaskMgmt_t), (u32*)pScsiTm, CAN_SLEEP);
+		retval = mpt_send_handshake_request(mptctl_taskmgmt_id, ioc,
+		    sizeof(SCSITaskMgmt_t), (u32 *)pScsiTm, CAN_SLEEP);
 		if (retval != 0) {
-			dfailprintk(ioctl->ioc, printk(MYIOC_s_ERR_FMT "_send_handshake FAILED!"
-				" (hd %p, ioc %p, mf %p) \n", hd->ioc->name, hd,
-				hd->ioc, mf));
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+				"TaskMgmt send_handshake FAILED!"
+				" (ioc %p, mf %p, rc=%d) \n", ioc->name,
+				ioc, mf, retval));
+			mpt_clear_taskmgmt_in_progress_flag(ioc);
 			goto mptctl_bus_reset_done;
 		}
 	}
 
 	/* Now wait for the command to complete */
-	ii = wait_event_timeout(mptctl_wait,
-	     ioctl->wait_done == 1,
-	     HZ*5 /* 5 second timeout */);
+	ii = wait_for_completion_timeout(&ioc->taskmgmt_cmds.done, timeout*HZ);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt failed\n", ioc->name));
+		mpt_free_msg_frame(ioc, mf);
+		mpt_clear_taskmgmt_in_progress_flag(ioc);
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			retval = 0;
+		else
+			retval = -1; /* return failure */
+		goto mptctl_bus_reset_done;
+	}
 
-	if(ii <=0 && (ioctl->wait_done != 1 ))  {
-		mpt_free_msg_frame(hd->ioc, mf);
-		ioctl->wait_done = 0;
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt failed\n", ioc->name));
+		retval = -1; /* return failure */
+		goto mptctl_bus_reset_done;
+	}
+
+	pScsiTmReply = (SCSITaskMgmtReply_t *) ioc->taskmgmt_cmds.reply;
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "TaskMgmt fw_channel = %d, fw_id = %d, task_type=0x%02X, "
+	    "iocstatus=0x%04X\n\tloginfo=0x%08X, response_code=0x%02X, "
+	    "term_cmnds=%d\n", ioc->name, pScsiTmReply->Bus,
+	    pScsiTmReply->TargetID, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+	    le16_to_cpu(pScsiTmReply->IOCStatus),
+	    le32_to_cpu(pScsiTmReply->IOCLogInfo),
+	    pScsiTmReply->ResponseCode,
+	    le32_to_cpu(pScsiTmReply->TerminationCount)));
+
+	iocstatus = le16_to_cpu(pScsiTmReply->IOCStatus) & MPI_IOCSTATUS_MASK;
+
+	if (iocstatus == MPI_IOCSTATUS_SCSI_TASK_TERMINATED ||
+	   iocstatus == MPI_IOCSTATUS_SCSI_IOC_TERMINATED ||
+	   iocstatus == MPI_IOCSTATUS_SUCCESS)
+		retval = 0;
+	else {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt failed\n", ioc->name));
 		retval = -1; /* return failure */
 	}
 
-mptctl_bus_reset_done:
 
-	mptctl_free_tm_flags(ioctl->ioc);
+ mptctl_bus_reset_done:
+	mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
 	return retval;
 }
 
-static int
-mptctl_set_tm_flags(MPT_SCSI_HOST *hd) {
-	unsigned long flags;
-
-	spin_lock_irqsave(&hd->ioc->FreeQlock, flags);
-
-	if (hd->tmState == TM_STATE_NONE) {
-		hd->tmState = TM_STATE_IN_PROGRESS;
-		hd->tmPending = 1;
-		spin_unlock_irqrestore(&hd->ioc->FreeQlock, flags);
-	} else {
-		spin_unlock_irqrestore(&hd->ioc->FreeQlock, flags);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-static void
-mptctl_free_tm_flags(MPT_ADAPTER *ioc)
-{
-	MPT_SCSI_HOST * hd;
-	unsigned long flags;
-
-	hd = shost_priv(ioc->sh);
-	if (hd == NULL)
-		return;
-
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
-
-	hd->tmState = TM_STATE_NONE;
-	hd->tmPending = 0;
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
-
-	return;
-}
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /* mptctl_ioc_reset
@@ -474,22 +503,23 @@ mptctl_free_tm_flags(MPT_ADAPTER *ioc)
 static int
 mptctl_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
-	MPT_IOCTL *ioctl = ioc->ioctl;
-	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "IOC %s_reset routed to IOCTL driver!\n", ioc->name,
-		reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
-		reset_phase==MPT_IOC_PRE_RESET ? "pre" : "post")));
-
-	if(ioctl == NULL)
-		return 1;
-
 	switch(reset_phase) {
 	case MPT_IOC_SETUP_RESET:
-		ioctl->status |= MPT_MGMT_STATUS_DID_IOCRESET;
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
 		break;
 	case MPT_IOC_POST_RESET:
-		ioctl->status &= ~MPT_MGMT_STATUS_DID_IOCRESET;
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n", ioc->name, __func__));
+		if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->ioctl_cmds.status |= MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->ioctl_cmds.done);
+		}
 		break;
-	case MPT_IOC_PRE_RESET:
 	default:
 		break;
 	}
@@ -643,7 +673,7 @@ __mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	else
 		ret = -EINVAL;
 
-	mutex_unlock(&iocp->ioctl->ioctl_mutex);
+	mutex_unlock(&iocp->ioctl_cmds.mutex);
 
 	return ret;
 }
@@ -759,6 +789,7 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 	int			 sge_offset = 0;
 	u16			 iocstat;
 	pFWDownloadReply_t	 ReplyMsg = NULL;
+	unsigned long		 timeleft;
 
 	if (mpt_verify_adapter(ioc, &iocp) < 0) {
 		printk(KERN_DEBUG MYNAM "ioctl_fwdl - ioc%d not found!\n",
@@ -893,16 +924,30 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 	 * Finally, perform firmware download.
 	 */
 	ReplyMsg = NULL;
+	SET_MGMT_MSG_CONTEXT(iocp->ioctl_cmds.msg_context, dlmsg->MsgContext);
+	INITIALIZE_MGMT_STATUS(iocp->ioctl_cmds.status)
 	mpt_put_msg_frame(mptctl_id, iocp, mf);
 
 	/* Now wait for the command to complete */
-	ret = wait_event_timeout(mptctl_wait,
-	     iocp->ioctl->wait_done == 1,
-	     HZ*60);
+retry_wait:
+	timeleft = wait_for_completion_timeout(&iocp->ioctl_cmds.done, HZ*60);
+	if (!(iocp->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		ret = -ETIME;
+		printk(MYIOC_s_WARN_FMT "%s: failed\n", iocp->name, __func__);
+		if (iocp->ioctl_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			mpt_free_msg_frame(iocp, mf);
+			goto fwdl_out;
+		}
+		if (!timeleft)
+			mptctl_timeout_expired(iocp, mf);
+		else
+			goto retry_wait;
+		goto fwdl_out;
+	}
 
-	if(ret <=0 && (iocp->ioctl->wait_done != 1 )) {
-	/* Now we need to reset the board */
-		mptctl_timeout_expired(iocp->ioctl);
+	if (!(iocp->ioctl_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		printk(MYIOC_s_WARN_FMT "%s: failed\n", iocp->name, __func__);
+		mpt_free_msg_frame(iocp, mf);
 		ret = -ENODATA;
 		goto fwdl_out;
 	}
@@ -910,7 +955,7 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 	if (sgl)
 		kfree_sgl(sgl, sgl_dma, buflist, iocp);
 
-	ReplyMsg = (pFWDownloadReply_t)iocp->ioctl->ReplyFrame;
+	ReplyMsg = (pFWDownloadReply_t)iocp->ioctl_cmds.reply;
 	iocstat = le16_to_cpu(ReplyMsg->IOCStatus) & MPI_IOCSTATUS_MASK;
 	if (iocstat == MPI_IOCSTATUS_SUCCESS) {
 		printk(MYIOC_s_INFO_FMT "F/W update successfull!\n", iocp->name);
@@ -934,6 +979,9 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen)
 	return 0;
 
 fwdl_out:
+
+	CLEAR_MGMT_STATUS(iocp->ioctl_cmds.status);
+	SET_MGMT_MSG_CONTEXT(iocp->ioctl_cmds.msg_context, 0);
         kfree_sgl(sgl, sgl_dma, buflist, iocp);
 	return ret;
 }
@@ -1774,7 +1822,10 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 	int		msgContext;
 	u16		req_idx;
 	ulong 		timeout;
+	unsigned long	timeleft;
 	struct scsi_device *sdev;
+	unsigned long	 flags;
+	u8		 function;
 
 	/* bufIn and bufOut are used for user to kernel space transfers
 	 */
@@ -1787,16 +1838,15 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 				__FILE__, __LINE__, iocnum);
 		return -ENODEV;
 	}
-	if (!ioc->ioctl) {
-		printk(KERN_ERR MYNAM "%s@%d::mptctl_do_mpt_command - "
-			"No memory available during driver init.\n",
-				__FILE__, __LINE__);
-		return -ENOMEM;
-	} else if (ioc->ioctl->status & MPT_MGMT_STATUS_DID_IOCRESET) {
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 		printk(KERN_ERR MYNAM "%s@%d::mptctl_do_mpt_command - "
-			"Busy with IOC Reset \n", __FILE__, __LINE__);
+			"Busy with diagnostic reset\n", __FILE__, __LINE__);
 		return -EBUSY;
 	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
 	/* Verify that the final request frame will not be too large.
 	 */
@@ -1830,10 +1880,12 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 		printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
 			"Unable to read MF from mpt_ioctl_command struct @ %p\n",
 			ioc->name, __FILE__, __LINE__, mfPtr);
+		function = -1;
 		rc = -EFAULT;
 		goto done_free_mem;
 	}
 	hdr->MsgContext = cpu_to_le32(msgContext);
+	function = hdr->Function;
 
 
 	/* Verify that this request is allowed.
@@ -1841,7 +1893,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 	dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "sending mpi function (0x%02X), req=%p\n",
 	    ioc->name, hdr->Function, mf));
 
-	switch (hdr->Function) {
+	switch (function) {
 	case MPI_FUNCTION_IOC_FACTS:
 	case MPI_FUNCTION_PORT_FACTS:
 		karg.dataOutSize  = karg.dataInSize = 0;
@@ -1938,8 +1990,6 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 			pScsiReq->Control = cpu_to_le32(scsidir | qtag);
 			pScsiReq->DataLength = cpu_to_le32(dataSize);
 
-			ioc->ioctl->reset = MPTCTL_RESET_OK;
-			ioc->ioctl->id = pScsiReq->TargetID;
 
 		} else {
 			printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
@@ -2017,8 +2067,6 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 			pScsiReq->Control = cpu_to_le32(scsidir | qtag);
 			pScsiReq->DataLength = cpu_to_le32(dataSize);
 
-			ioc->ioctl->reset = MPTCTL_RESET_OK;
-			ioc->ioctl->id = pScsiReq->TargetID;
 		} else {
 			printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
 				"SCSI driver is not loaded. \n",
@@ -2029,20 +2077,17 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 		break;
 
 	case MPI_FUNCTION_SCSI_TASK_MGMT:
-		{
-			MPT_SCSI_HOST *hd = NULL;
-			if ((ioc->sh == NULL) || ((hd = shost_priv(ioc->sh)) == NULL)) {
-				printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
-					"SCSI driver not loaded or SCSI host not found. \n",
-					ioc->name, __FILE__, __LINE__);
-				rc = -EFAULT;
-				goto done_free_mem;
-			} else if (mptctl_set_tm_flags(hd) != 0) {
-				rc = -EPERM;
-				goto done_free_mem;
-			}
-		}
+	{
+		SCSITaskMgmt_t	*pScsiTm;
+		pScsiTm = (SCSITaskMgmt_t *)mf;
+		dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"\tTaskType=0x%x MsgFlags=0x%x "
+			"TaskMsgContext=0x%x id=%d channel=%d\n",
+			ioc->name, pScsiTm->TaskType, le32_to_cpu
+			(pScsiTm->TaskMsgContext), pScsiTm->MsgFlags,
+			pScsiTm->TargetID, pScsiTm->Bus));
 		break;
+	}
 
 	case MPI_FUNCTION_IOC_INIT:
 		{
@@ -2186,9 +2231,16 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 		ioc->add_sge(psge, flagsLength, (dma_addr_t) -1);
 	}
 
-	ioc->ioctl->wait_done = 0;
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, hdr->MsgContext);
+	INITIALIZE_MGMT_STATUS(ioc->ioctl_cmds.status)
 	if (hdr->Function == MPI_FUNCTION_SCSI_TASK_MGMT) {
 
+		mutex_lock(&ioc->taskmgmt_cmds.mutex);
+		if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+			mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+			goto done_free_mem;
+		}
+
 		DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)mf);
 
 		if ((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) &&
@@ -2199,10 +2251,11 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 				sizeof(SCSITaskMgmt_t), (u32*)mf, CAN_SLEEP);
 			if (rc != 0) {
 				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				    "_send_handshake FAILED! (ioc %p, mf %p)\n",
+				    "send_handshake FAILED! (ioc %p, mf %p)\n",
 				    ioc->name, ioc, mf));
-				mptctl_free_tm_flags(ioc);
+				mpt_clear_taskmgmt_in_progress_flag(ioc);
 				rc = -ENODATA;
+				mutex_unlock(&ioc->taskmgmt_cmds.mutex);
 				goto done_free_mem;
 			}
 		}
@@ -2212,36 +2265,47 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 
 	/* Now wait for the command to complete */
 	timeout = (karg.timeout > 0) ? karg.timeout : MPT_IOCTL_DEFAULT_TIMEOUT;
-	timeout = wait_event_timeout(mptctl_wait,
-	     ioc->ioctl->wait_done == 1,
-	     HZ*timeout);
-
-	if(timeout <=0 && (ioc->ioctl->wait_done != 1 )) {
-	/* Now we need to reset the board */
-
-		if (hdr->Function == MPI_FUNCTION_SCSI_TASK_MGMT)
-			mptctl_free_tm_flags(ioc);
-
-		mptctl_timeout_expired(ioc->ioctl);
-		rc = -ENODATA;
+retry_wait:
+	timeleft = wait_for_completion_timeout(&ioc->ioctl_cmds.done,
+				HZ*timeout);
+	if (!(ioc->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		rc = -ETIME;
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "%s: TIMED OUT!\n",
+		    ioc->name, __func__));
+		if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			if (function == MPI_FUNCTION_SCSI_TASK_MGMT)
+				mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+			goto done_free_mem;
+		}
+		if (!timeleft) {
+			if (function == MPI_FUNCTION_SCSI_TASK_MGMT)
+				mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+			mptctl_timeout_expired(ioc, mf);
+			mf = NULL;
+		} else
+			goto retry_wait;
 		goto done_free_mem;
 	}
 
+	if (function == MPI_FUNCTION_SCSI_TASK_MGMT)
+		mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+
+
 	mf = NULL;
 
 	/* If a valid reply frame, copy to the user.
 	 * Offset 2: reply length in U32's
 	 */
-	if (ioc->ioctl->status & MPT_MGMT_STATUS_RF_VALID) {
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_RF_VALID) {
 		if (karg.maxReplyBytes < ioc->reply_sz) {
-			 sz = min(karg.maxReplyBytes, 4*ioc->ioctl->ReplyFrame[2]);
+			sz = min(karg.maxReplyBytes,
+				4*ioc->ioctl_cmds.reply[2]);
 		} else {
-			 sz = min(ioc->reply_sz, 4*ioc->ioctl->ReplyFrame[2]);
+			 sz = min(ioc->reply_sz, 4*ioc->ioctl_cmds.reply[2]);
 		}
-
 		if (sz > 0) {
 			if (copy_to_user(karg.replyFrameBufPtr,
-				 &ioc->ioctl->ReplyFrame, sz)){
+				 ioc->ioctl_cmds.reply, sz)){
 				 printk(MYIOC_s_ERR_FMT
 				     "%s@%d::mptctl_do_mpt_command - "
 				 "Unable to write out reply frame %p\n",
@@ -2254,10 +2318,11 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 
 	/* If valid sense data, copy to user.
 	 */
-	if (ioc->ioctl->status & MPT_MGMT_STATUS_SENSE_VALID) {
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_SENSE_VALID) {
 		sz = min(karg.maxSenseBytes, MPT_SENSE_BUFFER_SIZE);
 		if (sz > 0) {
-			if (copy_to_user(karg.senseDataPtr, ioc->ioctl->sense, sz)) {
+			if (copy_to_user(karg.senseDataPtr,
+				ioc->ioctl_cmds.sense, sz)) {
 				printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_do_mpt_command - "
 				"Unable to write sense data to user %p\n",
 				ioc->name, __FILE__, __LINE__,
@@ -2271,7 +2336,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 	/* If the overall status is _GOOD and data in, copy data
 	 * to user.
 	 */
-	if ((ioc->ioctl->status & MPT_MGMT_STATUS_COMMAND_GOOD) &&
+	if ((ioc->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD) &&
 				(karg.dataInSize > 0) && (bufIn.kptr)) {
 
 		if (copy_to_user(karg.dataInBufPtr,
@@ -2286,9 +2351,8 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr)
 
 done_free_mem:
 
-	ioc->ioctl->status &= ~(MPT_MGMT_STATUS_COMMAND_GOOD |
-		MPT_MGMT_STATUS_SENSE_VALID |
-		MPT_MGMT_STATUS_RF_VALID);
+	CLEAR_MGMT_STATUS(ioc->ioctl_cmds.status)
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0);
 
 	/* Free the allocated memory.
 	 */
@@ -2338,6 +2402,8 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 	ToolboxIstwiReadWriteRequest_t	*IstwiRWRequest;
 	MPT_FRAME_HDR		*mf = NULL;
 	MPIHeader_t		*mpi_hdr;
+	unsigned long		timeleft;
+	int			retval;
 
 	/* Reset long to int. Should affect IA64 and SPARC only
 	 */
@@ -2478,8 +2544,8 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 	 * Gather ISTWI(Industry Standard Two Wire Interface) Data
 	 */
 	if ((mf = mpt_get_msg_frame(mptctl_id, ioc)) == NULL) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, no msg frames!!\n",
-		    ioc->name,__func__));
+		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
+			"%s, no msg frames!!\n", ioc->name, __func__));
 		goto out;
 	}
 
@@ -2503,19 +2569,26 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 	ioc->add_sge((char *)&IstwiRWRequest->SGL,
 	    (MPT_SGE_FLAGS_SSIMPLE_READ|4), buf_dma);
 
-	ioc->ioctl->wait_done = 0;
+	retval = 0;
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context,
+				IstwiRWRequest->MsgContext);
+	INITIALIZE_MGMT_STATUS(ioc->ioctl_cmds.status)
 	mpt_put_msg_frame(mptctl_id, ioc, mf);
 
-	rc = wait_event_timeout(mptctl_wait,
-	     ioc->ioctl->wait_done == 1,
-	     HZ*MPT_IOCTL_DEFAULT_TIMEOUT /* 10 sec */);
-
-	if(rc <=0 && (ioc->ioctl->wait_done != 1 )) {
-		/*
-		 * Now we need to reset the board
-		 */
-		mpt_free_msg_frame(ioc, mf);
-		mptctl_timeout_expired(ioc->ioctl);
+retry_wait:
+	timeleft = wait_for_completion_timeout(&ioc->ioctl_cmds.done,
+			HZ*MPT_IOCTL_DEFAULT_TIMEOUT);
+	if (!(ioc->ioctl_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		retval = -ETIME;
+		printk(MYIOC_s_WARN_FMT "%s: failed\n", ioc->name, __func__);
+		if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) {
+			mpt_free_msg_frame(ioc, mf);
+			goto out;
+		}
+		if (!timeleft)
+			mptctl_timeout_expired(ioc, mf);
+		else
+			goto retry_wait;
 		goto out;
 	}
 
@@ -2528,10 +2601,13 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 	 *   bays have drives in them
 	 * pbuf[3] = Checksum (0x100 = (byte0 + byte2 + byte3)
 	 */
-	if (ioc->ioctl->status & MPT_MGMT_STATUS_RF_VALID)
+	if (ioc->ioctl_cmds.status & MPT_MGMT_STATUS_RF_VALID)
 		karg.rsvd = *(u32 *)pbuf;
 
  out:
+	CLEAR_MGMT_STATUS(ioc->ioctl_cmds.status)
+	SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0);
+
 	if (pbuf)
 		pci_free_consistent(ioc->pcidev, 4, pbuf, buf_dma);
 
@@ -2755,7 +2831,7 @@ compat_mptfwxfer_ioctl(struct file *filp, unsigned int cmd,
 
 	ret = mptctl_do_fw_download(kfw.iocnum, kfw.bufp, kfw.fwlen);
 
-	mutex_unlock(&iocp->ioctl->ioctl_mutex);
+	mutex_unlock(&iocp->ioctl_cmds.mutex);
 
 	return ret;
 }
@@ -2809,7 +2885,7 @@ compat_mpt_command(struct file *filp, unsigned int cmd,
 	 */
 	ret = mptctl_do_mpt_command (karg, &uarg->MF);
 
-	mutex_unlock(&iocp->ioctl->ioctl_mutex);
+	mutex_unlock(&iocp->ioctl_cmds.mutex);
 
 	return ret;
 }
@@ -2861,21 +2937,10 @@ static long compat_mpctl_ioctl(struct file *f, unsigned int cmd, unsigned long a
 static int
 mptctl_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	MPT_IOCTL *mem;
 	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
 
-	/*
-	 * Allocate and inite a MPT_IOCTL structure
-	*/
-	mem = kzalloc(sizeof(MPT_IOCTL), GFP_KERNEL);
-	if (!mem) {
-		mptctl_remove(pdev);
-		return -ENOMEM;
-	}
-
-	ioc->ioctl = mem;
-	ioc->ioctl->ioc = ioc;
-	mutex_init(&ioc->ioctl->ioctl_mutex);
+	mutex_init(&ioc->ioctl_cmds.mutex);
+	init_completion(&ioc->ioctl_cmds.done);
 	return 0;
 }
 
@@ -2889,9 +2954,6 @@ mptctl_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void
 mptctl_remove(struct pci_dev *pdev)
 {
-	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
-
-	kfree ( ioc->ioctl );
 }
 
 static struct mpt_pci_driver mptctl_driver = {
@@ -2931,6 +2993,7 @@ static int __init mptctl_init(void)
 		goto out_fail;
 	}
 
+	mptctl_taskmgmt_id = mpt_register(mptctl_taskmgmt_reply, MPTCTL_DRIVER);
 	mpt_reset_register(mptctl_id, mptctl_ioc_reset);
 	mpt_event_register(mptctl_id, mptctl_event_process);
 
@@ -2955,6 +3018,7 @@ static void mptctl_exit(void)
 
 	/* De-register callback handler from base module */
 	mpt_deregister(mptctl_id);
+	mpt_reset_deregister(mptctl_taskmgmt_id);
 
         mpt_device_driver_deregister(MPTCTL_DRIVER);
 
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index a53b332..e61df13 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -1290,8 +1290,6 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Clear the TM flags
 	 */
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 3efa728..eb6b10e 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -553,15 +553,21 @@ mptsas_target_reset(MPT_ADAPTER *ioc, u8 channel, u8 id)
 {
 	MPT_FRAME_HDR	*mf;
 	SCSITaskMgmt_t	*pScsiTm;
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0)
+		return 0;
+
 
 	mf = mpt_get_msg_frame(mptsasDeviceResetCtx, ioc);
 	if (mf == NULL) {
 		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
-			"%s, no msg frames @%d!!\n",
-			ioc->name, __func__, __LINE__));
-		return 0;
+			"%s, no msg frames @%d!!\n", ioc->name,
+			__func__, __LINE__));
+		goto out_fail;
 	}
 
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
+		ioc->name, mf));
+
 	/* Format the Request
 	 */
 	pScsiTm = (SCSITaskMgmt_t *) mf;
@@ -574,9 +580,18 @@ mptsas_target_reset(MPT_ADAPTER *ioc, u8 channel, u8 id)
 
 	DBG_DUMP_TM_REQUEST_FRAME(ioc, (u32 *)mf);
 
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	   "TaskMgmt type=%d (sas device delete) fw_channel = %d fw_id = %d)\n",
+	   ioc->name, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, channel, id));
+
 	mpt_put_msg_frame_hi_pri(mptsasDeviceResetCtx, ioc, mf);
 
 	return 1;
+
+ out_fail:
+
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
+	return 0;
 }
 
 /**
@@ -719,9 +734,12 @@ mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 	if (!ev) {
 		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, failed to allocate mem @%d..!!\n",
 		    ioc->name,__func__, __LINE__));
-		return 0;
+		goto out_fail;
 	}
 
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
+		ioc->name, mf));
+
 	INIT_WORK(&ev->work, mptsas_hotplug_work);
 	ev->ioc = ioc;
 	ev->handle = le16_to_cpu(sas_event_data->DevHandle);
@@ -734,10 +752,19 @@ mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 	    sizeof(__le64));
 	ev->sas_address = le64_to_cpu(sas_address);
 	ev->device_info = le32_to_cpu(sas_event_data->DeviceInfo);
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	   "TaskMgmt type=%d (sas device delete) fw_channel = %d fw_id = %d)\n",
+	   ioc->name, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, channel, id));
+
 	ev->event_type = MPTSAS_DEL_DEVICE;
 	schedule_work(&ev->work);
 	kfree(target_reset_list);
 
+ out_fail:
+
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
+	return 0;
+
 
 	/*
 	 * issue target reset to next device in the queue
@@ -3291,8 +3318,6 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Clear the TM flags
 	 */
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index a6a2bbd..477f6f8 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -1895,8 +1895,6 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt)
 		 *  NONE.
 		 */
 		retval = 0;
-		hd->tmPending = 0;
-		hd->tmState = TM_STATE_NONE;
 	}
 
 	printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n",
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 8b94074..c5b808f 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -1474,8 +1474,6 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Clear the TM flags
 	 */
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
 	hd->abortSCpnt = NULL;
 
 	/* Clear the pointer used to store
-- 
cgit v0.10.2


From 3eb0822c6740c5564c37a2fe56449cdb4f3d800c Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:47:26 +0530
Subject: [SCSI] mpt fusion: Firmware event implementation using seperate
 WorkQueue

Now Firmware events are handled by firmware event queue.
Previously it was handled in interrupt context/WorkQueue of Linux.
Firmware Event handling is restructured and optimized.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index ae203ec..d67b263 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -1931,6 +1931,11 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	mpt_detect_bound_ports(ioc, pdev);
 
+	INIT_LIST_HEAD(&ioc->fw_event_list);
+	spin_lock_init(&ioc->fw_event_lock);
+	snprintf(ioc->fw_event_q_name, 20, "mpt/%d", ioc->id);
+	ioc->fw_event_q = create_singlethread_workqueue(ioc->fw_event_q_name);
+
 	if ((r = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_BRINGUP,
 	    CAN_SLEEP)) != 0){
 		printk(MYIOC_s_ERR_FMT "didn't initialize properly! (%d)\n",
@@ -2010,6 +2015,11 @@ mpt_detach(struct pci_dev *pdev)
 	cancel_delayed_work(&ioc->fault_reset_work);
 	destroy_workqueue(wq);
 
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	wq = ioc->fw_event_q;
+	ioc->fw_event_q = NULL;
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+	destroy_workqueue(wq);
 
 	sprintf(pname, MPT_PROCFS_MPTBASEDIR "/%s/summary", ioc->name);
 	remove_proc_entry(pname, NULL);
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 24d6012..b6efc64 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -694,9 +694,18 @@ typedef struct _MPT_ADAPTER
 	struct net_device	*netdev;
 	struct list_head	 sas_topology;
 	struct mutex		 sas_topology_mutex;
+
+	struct workqueue_struct	*fw_event_q;
+	struct list_head	 fw_event_list;
+	spinlock_t		 fw_event_lock;
+	u8			 fw_events_off; /* if '1', then ignore events */
+	char 			 fw_event_q_name[20];
+
 	struct mutex		 sas_discovery_mutex;
 	u8			 sas_discovery_runtime;
 	u8			 sas_discovery_ignore_events;
+	struct list_head	 sas_device_info_list;
+	struct mutex		 sas_device_info_mutex;
 	u8			 sas_discovery_quiesce_io;
 	int			 sas_index; /* index refrencing */
 	MPT_MGMT		 sas_mgmt;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index eb6b10e..22a027e 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -95,7 +95,24 @@ static u8	mptsasInternalCtx = MPT_MAX_PROTOCOL_DRIVERS; /* Used only for interna
 static u8	mptsasMgmtCtx = MPT_MAX_PROTOCOL_DRIVERS;
 static u8	mptsasDeviceResetCtx = MPT_MAX_PROTOCOL_DRIVERS;
 
-static void mptsas_hotplug_work(struct work_struct *work);
+static void mptsas_firmware_event_work(struct work_struct *work);
+static void mptsas_send_sas_event(struct fw_event_work *fw_event);
+static void mptsas_send_raid_event(struct fw_event_work *fw_event);
+static void mptsas_send_ir2_event(struct fw_event_work *fw_event);
+static void mptsas_parse_device_info(struct sas_identify *identify,
+		struct mptsas_devinfo *device_info);
+static inline void mptsas_set_rphy(MPT_ADAPTER *ioc,
+		struct mptsas_phyinfo *phy_info, struct sas_rphy *rphy);
+static struct mptsas_phyinfo	*mptsas_find_phyinfo_by_sas_address
+		(MPT_ADAPTER *ioc, u64 sas_address);
+static int mptsas_sas_device_pg0(MPT_ADAPTER *ioc,
+	struct mptsas_devinfo *device_info, u32 form, u32 form_specific);
+static int mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc,
+	struct mptsas_enclosure *enclosure, u32 form, u32 form_specific);
+static int mptsas_add_end_device(MPT_ADAPTER *ioc,
+	struct mptsas_phyinfo *phy_info);
+static void mptsas_del_end_device(MPT_ADAPTER *ioc,
+	struct mptsas_phyinfo *phy_info);
 
 static void mptsas_print_phy_data(MPT_ADAPTER *ioc,
 					MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
@@ -219,6 +236,100 @@ static void mptsas_print_expander_pg1(MPT_ADAPTER *ioc, SasExpanderPage1_t *pg1)
 	    le16_to_cpu(pg1->AttachedDevHandle)));
 }
 
+/* inhibit sas firmware event handling */
+static void
+mptsas_fw_event_off(MPT_ADAPTER *ioc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	ioc->fw_events_off = 1;
+	ioc->sas_discovery_quiesce_io = 0;
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+
+}
+
+/* enable sas firmware event handling */
+static void
+mptsas_fw_event_on(MPT_ADAPTER *ioc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	ioc->fw_events_off = 0;
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* queue a sas firmware event */
+static void
+mptsas_add_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
+    unsigned long delay)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	list_add_tail(&fw_event->list, &ioc->fw_event_list);
+	INIT_DELAYED_WORK(&fw_event->work, mptsas_firmware_event_work);
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: add (fw_event=0x%p)\n",
+	    ioc->name, __func__, fw_event));
+	queue_delayed_work(ioc->fw_event_q, &fw_event->work,
+	    delay);
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* free memory assoicated to a sas firmware event */
+static void
+mptsas_free_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: kfree (fw_event=0x%p)\n",
+	    ioc->name, __func__, fw_event));
+	list_del(&fw_event->list);
+	kfree(fw_event);
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
+/* walk the firmware event queue, and either stop or wait for
+ * outstanding events to complete */
+static void
+mptsas_cleanup_fw_event_q(MPT_ADAPTER *ioc)
+{
+	struct fw_event_work *fw_event, *next;
+	struct mptsas_target_reset_event *target_reset_list, *n;
+	u8	flush_q;
+	MPT_SCSI_HOST	*hd = shost_priv(ioc->sh);
+
+	/* flush the target_reset_list */
+	if (!list_empty(&hd->target_reset_list)) {
+		list_for_each_entry_safe(target_reset_list, n,
+		    &hd->target_reset_list, list) {
+			dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			    "%s: removing target reset for id=%d\n",
+			    ioc->name, __func__,
+			   target_reset_list->sas_event_data.TargetID));
+			list_del(&target_reset_list->list);
+			kfree(target_reset_list);
+		}
+	}
+
+	if (list_empty(&ioc->fw_event_list) ||
+	     !ioc->fw_event_q || in_interrupt())
+		return;
+
+	flush_q = 0;
+	list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) {
+		if (cancel_delayed_work(&fw_event->work))
+			mptsas_free_fw_event(ioc, fw_event);
+		else
+			flush_q = 1;
+	}
+	if (flush_q)
+		flush_workqueue(ioc->fw_event_q);
+}
+
+
 static inline MPT_ADAPTER *phy_to_ioc(struct sas_phy *phy)
 {
 	struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);
@@ -309,6 +420,7 @@ mptsas_port_delete(MPT_ADAPTER *ioc, struct mptsas_portinfo_details * port_detai
 		if(phy_info->port_details != port_details)
 			continue;
 		memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo));
+		mptsas_set_rphy(ioc, phy_info, NULL);
 		phy_info->port_details = NULL;
 	}
 	kfree(port_details);
@@ -380,6 +492,157 @@ starget)
 		phy_info->port_details->starget = starget;
 }
 
+/**
+ *	mptsas_add_device_component -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel: fw mapped id's
+ *	@id:
+ *	@sas_address:
+ *	@device_info:
+ *
+ **/
+static void
+mptsas_add_device_component(MPT_ADAPTER *ioc, u8 channel, u8 id,
+	u64 sas_address, u32 device_info, u16 slot, u64 enclosure_logical_id)
+{
+	struct mptsas_device_info	*sas_info, *next;
+	struct scsi_device	*sdev;
+	struct scsi_target	*starget;
+	struct sas_rphy	*rphy;
+
+	/*
+	 * Delete all matching devices out of the list
+	 */
+	mutex_lock(&ioc->sas_device_info_mutex);
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+	    list) {
+		if ((sas_info->sas_address == sas_address ||
+			(sas_info->fw.channel == channel &&
+			sas_info->fw.id == id))) {
+			list_del(&sas_info->list);
+			kfree(sas_info);
+		}
+	}
+
+	sas_info = kzalloc(sizeof(struct mptsas_device_info), GFP_KERNEL);
+	if (!sas_info)
+		goto out;
+
+	/*
+	 * Set Firmware mapping
+	 */
+	sas_info->fw.id = id;
+	sas_info->fw.channel = channel;
+
+	sas_info->sas_address = sas_address;
+	sas_info->device_info = device_info;
+	sas_info->slot = slot;
+	sas_info->enclosure_logical_id = enclosure_logical_id;
+	INIT_LIST_HEAD(&sas_info->list);
+	list_add_tail(&sas_info->list, &ioc->sas_device_info_list);
+
+	/*
+	 * Set OS mapping
+	 */
+	shost_for_each_device(sdev, ioc->sh) {
+		starget = scsi_target(sdev);
+		rphy = dev_to_rphy(starget->dev.parent);
+		if (rphy->identify.sas_address == sas_address) {
+			sas_info->os.id = starget->id;
+			sas_info->os.channel = starget->channel;
+		}
+	}
+
+ out:
+	mutex_unlock(&ioc->sas_device_info_mutex);
+	return;
+}
+
+/**
+ *	mptsas_add_device_component_by_fw -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel:  fw mapped id's
+ *	@id:
+ *
+ **/
+static void
+mptsas_add_device_component_by_fw(MPT_ADAPTER *ioc, u8 channel, u8 id)
+{
+	struct mptsas_devinfo sas_device;
+	struct mptsas_enclosure enclosure_info;
+	int rc;
+
+	rc = mptsas_sas_device_pg0(ioc, &sas_device,
+	    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+	     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+	    (channel << 8) + id);
+	if (rc)
+		return;
+
+	memset(&enclosure_info, 0, sizeof(struct mptsas_enclosure));
+	mptsas_sas_enclosure_pg0(ioc, &enclosure_info,
+	    (MPI_SAS_ENCLOS_PGAD_FORM_HANDLE <<
+	     MPI_SAS_ENCLOS_PGAD_FORM_SHIFT),
+	     sas_device.handle_enclosure);
+
+	mptsas_add_device_component(ioc, sas_device.channel,
+	    sas_device.id, sas_device.sas_address, sas_device.device_info,
+	    sas_device.slot, enclosure_info.enclosure_logical_id);
+}
+
+/**
+ *	mptsas_add_device_component_starget -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@starget:
+ *
+ **/
+static void
+mptsas_add_device_component_starget(MPT_ADAPTER *ioc,
+	struct scsi_target *starget)
+{
+	VirtTarget	*vtarget;
+	struct sas_rphy	*rphy;
+	struct mptsas_phyinfo	*phy_info = NULL;
+	struct mptsas_enclosure	enclosure_info;
+
+	rphy = dev_to_rphy(starget->dev.parent);
+	vtarget = starget->hostdata;
+	phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+			rphy->identify.sas_address);
+	if (!phy_info)
+		return;
+
+	memset(&enclosure_info, 0, sizeof(struct mptsas_enclosure));
+	mptsas_sas_enclosure_pg0(ioc, &enclosure_info,
+		(MPI_SAS_ENCLOS_PGAD_FORM_HANDLE <<
+		MPI_SAS_ENCLOS_PGAD_FORM_SHIFT),
+		phy_info->attached.handle_enclosure);
+
+	mptsas_add_device_component(ioc, phy_info->attached.channel,
+		phy_info->attached.id, phy_info->attached.sas_address,
+		phy_info->attached.device_info,
+		phy_info->attached.slot, enclosure_info.enclosure_logical_id);
+}
+
+/**
+ *	mptsas_del_device_components - Cleaning the list
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ **/
+static void
+mptsas_del_device_components(MPT_ADAPTER *ioc)
+{
+	struct mptsas_device_info	*sas_info, *next;
+
+	mutex_lock(&ioc->sas_device_info_mutex);
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+		list) {
+		list_del(&sas_info->list);
+		kfree(sas_info);
+	}
+	mutex_unlock(&ioc->sas_device_info_mutex);
+}
+
 
 /*
  * mptsas_setup_wide_ports
@@ -535,6 +798,29 @@ mptsas_find_vtarget(MPT_ADAPTER *ioc, u8 channel, u8 id)
 	return vtarget;
 }
 
+static void
+mptsas_queue_device_delete(MPT_ADAPTER *ioc,
+	MpiEventDataSasDeviceStatusChange_t *sas_event_data)
+{
+	struct fw_event_work *fw_event;
+	int sz;
+
+	sz = offsetof(struct fw_event_work, event_data) +
+	    sizeof(MpiEventDataSasDeviceStatusChange_t);
+	fw_event = kzalloc(sz, GFP_ATOMIC);
+	if (!fw_event) {
+		printk(MYIOC_s_WARN_FMT "%s: failed at (line=%d)\n",
+		    ioc->name, __func__, __LINE__);
+		return;
+	}
+	memcpy(fw_event->event_data, sas_event_data,
+	    sizeof(MpiEventDataSasDeviceStatusChange_t));
+	fw_event->event = MPI_EVENT_SAS_DEVICE_STATUS_CHANGE;
+	fw_event->ioc = ioc;
+	mptsas_add_fw_event(ioc, fw_event, msecs_to_jiffies(1));
+}
+
+
 /**
  * mptsas_target_reset
  *
@@ -654,10 +940,8 @@ mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 {
 	MPT_SCSI_HOST	*hd = shost_priv(ioc->sh);
         struct list_head *head = &hd->target_reset_list;
-	struct mptsas_hotplug_event *ev;
 	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data;
 	u8		id, channel;
-	__le64		sas_address;
 	struct mptsas_target_reset_event	*target_reset_list;
 	SCSITaskMgmtReply_t *pScsiTmReply;
 
@@ -729,41 +1013,9 @@ mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 	 * enable work queue to remove device from upper layers
 	 */
 	list_del(&target_reset_list->list);
-
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev) {
-		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT "%s, failed to allocate mem @%d..!!\n",
-		    ioc->name,__func__, __LINE__));
-		goto out_fail;
-	}
-
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request (mf=%p)\n",
-		ioc->name, mf));
-
-	INIT_WORK(&ev->work, mptsas_hotplug_work);
-	ev->ioc = ioc;
-	ev->handle = le16_to_cpu(sas_event_data->DevHandle);
-	ev->parent_handle =
-	    le16_to_cpu(sas_event_data->ParentDevHandle);
-	ev->channel = channel;
-	ev->id =id;
-	ev->phy_id = sas_event_data->PhyNum;
-	memcpy(&sas_address, &sas_event_data->SASAddress,
-	    sizeof(__le64));
-	ev->sas_address = le64_to_cpu(sas_address);
-	ev->device_info = le32_to_cpu(sas_event_data->DeviceInfo);
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	   "TaskMgmt type=%d (sas device delete) fw_channel = %d fw_id = %d)\n",
-	   ioc->name, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, channel, id));
-
-	ev->event_type = MPTSAS_DEL_DEVICE;
-	schedule_work(&ev->work);
-	kfree(target_reset_list);
-
- out_fail:
-
-	mpt_clear_taskmgmt_in_progress_flag(ioc);
-	return 0;
+	if ((mptsas_find_vtarget(ioc, channel, id)) && !ioc->fw_events_off)
+		mptsas_queue_device_delete(ioc,
+			&target_reset_list->sas_event_data);
 
 
 	/*
@@ -798,37 +1050,58 @@ static int
 mptsas_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	MPT_SCSI_HOST	*hd;
-	struct mptsas_target_reset_event *target_reset_list, *n;
 	int rc;
 
 	rc = mptscsih_ioc_reset(ioc, reset_phase);
+	if ((ioc->bus_type != SAS) || (!rc))
+		return rc;
 
-	if (ioc->bus_type != SAS)
-		goto out;
-
-	if (reset_phase != MPT_IOC_POST_RESET)
-		goto out;
-
-	if (!ioc->sh || !ioc->sh->hostdata)
-		goto out;
 	hd = shost_priv(ioc->sh);
 	if (!hd->ioc)
 		goto out;
 
-	if (list_empty(&hd->target_reset_list))
-		goto out;
-
-	/* flush the target_reset_list */
-	list_for_each_entry_safe(target_reset_list, n,
-	    &hd->target_reset_list, list) {
-		list_del(&target_reset_list->list);
-		kfree(target_reset_list);
+	switch (reset_phase) {
+	case MPT_IOC_SETUP_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_SETUP_RESET\n", ioc->name, __func__));
+		mptsas_fw_event_off(ioc);
+		break;
+	case MPT_IOC_PRE_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_PRE_RESET\n", ioc->name, __func__));
+		break;
+	case MPT_IOC_POST_RESET:
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: MPT_IOC_POST_RESET\n", ioc->name, __func__));
+		if (ioc->sas_mgmt.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->sas_mgmt.status |= MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->sas_mgmt.done);
+		}
+		mptsas_cleanup_fw_event_q(ioc);
+		mptsas_fw_event_on(ioc);
+		break;
+	default:
+		break;
 	}
 
  out:
 	return rc;
 }
 
+
+/**
+ * enum device_state -
+ * @DEVICE_RETRY: need to retry the TUR
+ * @DEVICE_ERROR: TUR return error, don't add device
+ * @DEVICE_READY: device can be added
+ *
+ */
+enum device_state{
+	DEVICE_RETRY,
+	DEVICE_ERROR,
+	DEVICE_READY,
+};
+
 static int
 mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
 		u32 form, u32 form_specific)
@@ -894,15 +1167,268 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
 	return error;
 }
 
+/**
+ *	mptsas_add_end_device - report a new end device to sas transport layer
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@phy_info: decribes attached device
+ *
+ *	return (0) success (1) failure
+ *
+ **/
+static int
+mptsas_add_end_device(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info)
+{
+	struct sas_rphy *rphy;
+	struct sas_port *port;
+	struct sas_identify identify;
+	char *ds = NULL;
+	u8 fw_id;
+
+	if (!phy_info) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: exit at line=%d\n", ioc->name,
+			 __func__, __LINE__));
+		return 1;
+	}
+
+	fw_id = phy_info->attached.id;
+
+	if (mptsas_get_rphy(phy_info)) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return 2;
+	}
+
+	port = mptsas_get_port(phy_info);
+	if (!port) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return 3;
+	}
+
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SSP_TARGET)
+		ds = "ssp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_STP_TARGET)
+		ds = "stp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
+		ds = "sata";
+
+	printk(MYIOC_s_INFO_FMT "attaching %s device: fw_channel %d, fw_id %d,"
+	    " phy %d, sas_addr 0x%llx\n", ioc->name, ds,
+	    phy_info->attached.channel, phy_info->attached.id,
+	    phy_info->attached.phy_id, (unsigned long long)
+	    phy_info->attached.sas_address);
+
+	mptsas_parse_device_info(&identify, &phy_info->attached);
+	rphy = sas_end_device_alloc(port);
+	if (!rphy) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return 5; /* non-fatal: an rphy can be added later */
+	}
+
+	rphy->identify = identify;
+	if (sas_rphy_add(rphy)) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		sas_rphy_free(rphy);
+		return 6;
+	}
+	mptsas_set_rphy(ioc, phy_info, rphy);
+	return 0;
+}
+
+/**
+ *	mptsas_del_end_device - report a deleted end device to sas transport
+ *	layer
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@phy_info: decribes attached device
+ *
+ **/
+static void
+mptsas_del_end_device(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info)
+{
+	struct sas_rphy *rphy;
+	struct sas_port *port;
+	struct mptsas_portinfo *port_info;
+	struct mptsas_phyinfo *phy_info_parent;
+	int i;
+	char *ds = NULL;
+	u8 fw_id;
+	u64 sas_address;
+
+	if (!phy_info)
+		return;
+
+	fw_id = phy_info->attached.id;
+	sas_address = phy_info->attached.sas_address;
+
+	if (!phy_info->port_details) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return;
+	}
+	rphy = mptsas_get_rphy(phy_info);
+	if (!rphy) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return;
+	}
+
+	if (phy_info->attached.device_info & MPI_SAS_DEVICE_INFO_SSP_INITIATOR
+		|| phy_info->attached.device_info
+			& MPI_SAS_DEVICE_INFO_SMP_INITIATOR
+		|| phy_info->attached.device_info
+			& MPI_SAS_DEVICE_INFO_STP_INITIATOR)
+		ds = "initiator";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SSP_TARGET)
+		ds = "ssp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_STP_TARGET)
+		ds = "stp";
+	if (phy_info->attached.device_info &
+	    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
+		ds = "sata";
+
+	dev_printk(KERN_DEBUG, &rphy->dev, MYIOC_s_FMT
+	    "removing %s device: fw_channel %d, fw_id %d, phy %d,"
+	    "sas_addr 0x%llx\n", ioc->name, ds, phy_info->attached.channel,
+	    phy_info->attached.id, phy_info->attached.phy_id,
+	    (unsigned long long) sas_address);
+
+	port = mptsas_get_port(phy_info);
+	if (!port) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, fw_id, __LINE__));
+		return;
+	}
+	port_info = phy_info->portinfo;
+	phy_info_parent = port_info->phy_info;
+	for (i = 0; i < port_info->num_phys; i++, phy_info_parent++) {
+		if (!phy_info_parent->phy)
+			continue;
+		if (phy_info_parent->attached.sas_address !=
+		    sas_address)
+			continue;
+		dev_printk(KERN_DEBUG, &phy_info_parent->phy->dev,
+		    MYIOC_s_FMT "delete phy %d, phy-obj (0x%p)\n",
+		    ioc->name, phy_info_parent->phy_id,
+		    phy_info_parent->phy);
+		sas_port_delete_phy(port, phy_info_parent->phy);
+	}
+
+	dev_printk(KERN_DEBUG, &port->dev, MYIOC_s_FMT
+	    "delete port %d, sas_addr (0x%llx)\n", ioc->name,
+	     port->port_identifier, (unsigned long long)sas_address);
+	sas_port_delete(port);
+	mptsas_set_port(ioc, phy_info, NULL);
+	mptsas_port_delete(ioc, phy_info->port_details);
+}
+
+struct mptsas_phyinfo *
+mptsas_refreshing_device_handles(MPT_ADAPTER *ioc,
+	struct mptsas_devinfo *sas_device)
+{
+	struct mptsas_phyinfo *phy_info;
+	struct mptsas_portinfo *port_info;
+	int i;
+
+	phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+	    sas_device->sas_address);
+	if (!phy_info)
+		goto out;
+	port_info = phy_info->portinfo;
+	if (!port_info)
+		goto out;
+	mutex_lock(&ioc->sas_topology_mutex);
+	for (i = 0; i < port_info->num_phys; i++) {
+		if (port_info->phy_info[i].attached.sas_address !=
+			sas_device->sas_address)
+			continue;
+		port_info->phy_info[i].attached.channel = sas_device->channel;
+		port_info->phy_info[i].attached.id = sas_device->id;
+		port_info->phy_info[i].attached.sas_address =
+		    sas_device->sas_address;
+		port_info->phy_info[i].attached.handle = sas_device->handle;
+		port_info->phy_info[i].attached.handle_parent =
+		    sas_device->handle_parent;
+		port_info->phy_info[i].attached.handle_enclosure =
+		    sas_device->handle_enclosure;
+	}
+	mutex_unlock(&ioc->sas_topology_mutex);
+ out:
+	return phy_info;
+}
+
+/**
+ * mptsas_firmware_event_work - work thread for processing fw events
+ * @work: work queue payload containing info describing the event
+ * Context: user
+ *
+ */
+static void
+mptsas_firmware_event_work(struct work_struct *work)
+{
+	struct fw_event_work *fw_event =
+		container_of(work, struct fw_event_work, work.work);
+	MPT_ADAPTER *ioc = fw_event->ioc;
+
+
+	/* events handling turned off during host reset */
+	if (ioc->fw_events_off) {
+		mptsas_free_fw_event(ioc, fw_event);
+		return;
+	}
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: fw_event=(0x%p), "
+	    "event = (0x%02x)\n", ioc->name, __func__, fw_event,
+	    (fw_event->event & 0xFF)));
+
+	switch (fw_event->event) {
+	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
+		mptsas_send_sas_event(fw_event);
+		break;
+	case MPI_EVENT_INTEGRATED_RAID:
+		mptsas_send_raid_event(fw_event);
+		break;
+	case MPI_EVENT_IR2:
+		mptsas_send_ir2_event(fw_event);
+		break;
+	case MPI_EVENT_PERSISTENT_TABLE_FULL:
+		mptbase_sas_persist_operation(ioc,
+		    MPI_SAS_OP_CLEAR_NOT_PRESENT);
+		mptsas_free_fw_event(ioc, fw_event);
+		break;
+	}
+}
+
+
+
 static int
 mptsas_slave_configure(struct scsi_device *sdev)
 {
+	struct Scsi_Host	*host = sdev->host;
+	MPT_SCSI_HOST	*hd = shost_priv(host);
+	MPT_ADAPTER	*ioc = hd->ioc;
 
 	if (sdev->channel == MPTSAS_RAID_CHANNEL)
 		goto out;
 
 	sas_read_port_mode_page(sdev);
 
+	mptsas_add_device_component_starget(ioc, scsi_target(sdev));
+
  out:
 	return mptscsih_slave_configure(sdev);
 }
@@ -984,11 +1510,15 @@ mptsas_target_destroy(struct scsi_target *starget)
 	struct sas_rphy		*rphy;
 	struct mptsas_portinfo	*p;
 	int 			 i;
-	MPT_ADAPTER *ioc = hd->ioc;
+	MPT_ADAPTER	*ioc = hd->ioc;
+	VirtTarget	*vtarget;
 
 	if (!starget->hostdata)
 		return;
 
+	vtarget = starget->hostdata;
+
+
 	if (starget->channel == MPTSAS_RAID_CHANNEL)
 		goto out;
 
@@ -998,12 +1528,21 @@ mptsas_target_destroy(struct scsi_target *starget)
 			if (p->phy_info[i].attached.sas_address !=
 					rphy->identify.sas_address)
 				continue;
+
+			starget_printk(KERN_INFO, starget, MYIOC_s_FMT
+			"delete device: fw_channel %d, fw_id %d, phy %d, "
+			"sas_addr 0x%llx\n", ioc->name,
+			p->phy_info[i].attached.channel,
+			p->phy_info[i].attached.id,
+			p->phy_info[i].attached.phy_id, (unsigned long long)
+			p->phy_info[i].attached.sas_address);
+
 			mptsas_set_starget(&p->phy_info[i], NULL);
-			goto out;
 		}
 	}
 
  out:
+	vtarget->starget = NULL;
 	kfree(starget->hostdata);
 	starget->hostdata = NULL;
 }
@@ -2471,6 +3010,7 @@ mptsas_discovery_work(struct work_struct *work)
 	kfree(ev);
 }
 
+
 static struct mptsas_phyinfo *
 mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
 {
@@ -2495,30 +3035,6 @@ mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
 	return phy_info;
 }
 
-static struct mptsas_phyinfo *
-mptsas_find_phyinfo_by_target(MPT_ADAPTER *ioc, u8 channel, u8 id)
-{
-	struct mptsas_portinfo *port_info;
-	struct mptsas_phyinfo *phy_info = NULL;
-	int i;
-
-	mutex_lock(&ioc->sas_topology_mutex);
-	list_for_each_entry(port_info, &ioc->sas_topology, list) {
-		for (i = 0; i < port_info->num_phys; i++) {
-			if (!mptsas_is_end_device(
-				&port_info->phy_info[i].attached))
-				continue;
-			if (port_info->phy_info[i].attached.id != id)
-				continue;
-			if (port_info->phy_info[i].attached.channel != channel)
-				continue;
-			phy_info = &port_info->phy_info[i];
-			break;
-		}
-	}
-	mutex_unlock(&ioc->sas_topology_mutex);
-	return phy_info;
-}
 
 static struct mptsas_phyinfo *
 mptsas_find_phyinfo_by_phys_disk_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
@@ -2547,17 +3063,6 @@ mptsas_find_phyinfo_by_phys_disk_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
 	return phy_info;
 }
 
-/*
- * Work queue thread to clear the persitency table
- */
-static void
-mptsas_persist_clear_table(struct work_struct *work)
-{
-	MPT_ADAPTER *ioc = container_of(work, MPT_ADAPTER, sas_persist_task);
-
-	mptbase_sas_persist_operation(ioc, MPI_SAS_OP_CLEAR_NOT_PRESENT);
-}
-
 static void
 mptsas_reprobe_lun(struct scsi_device *sdev, void *data)
 {
@@ -2583,7 +3088,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
 	pRaidVolumePage0_t		buffer = NULL;
 	RaidPhysDiskPage0_t 		phys_disk;
 	int				i;
-	struct mptsas_hotplug_event 	*ev;
+	struct mptsas_phyinfo	*phy_info;
+	struct mptsas_devinfo		sas_device;
 
 	memset(&cfg, 0 , sizeof(CONFIGPARMS));
 	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
@@ -2623,20 +3129,16 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
 		    buffer->PhysDisk[i].PhysDiskNum, &phys_disk) != 0)
 			continue;
 
-		ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-		if (!ev) {
-			printk(MYIOC_s_WARN_FMT "mptsas: lost hotplug event\n", ioc->name);
-			goto out;
-		}
+		if (mptsas_sas_device_pg0(ioc, &sas_device,
+		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+			(phys_disk.PhysDiskBus << 8) +
+			phys_disk.PhysDiskID))
+			continue;
 
-		INIT_WORK(&ev->work, mptsas_hotplug_work);
-		ev->ioc = ioc;
-		ev->id = phys_disk.PhysDiskID;
-		ev->channel = phys_disk.PhysDiskBus;
-		ev->phys_disk_num_valid = 1;
-		ev->phys_disk_num = phys_disk.PhysDiskNum;
-		ev->event_type = MPTSAS_ADD_DEVICE;
-		schedule_work(&ev->work);
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+		    sas_device.sas_address);
+		mptsas_add_end_device(ioc, phy_info);
 	}
 
  out:
@@ -2648,417 +3150,385 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id)
  * Work queue thread to handle SAS hotplug events
  */
 static void
-mptsas_hotplug_work(struct work_struct *work)
+mptsas_hotplug_work(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
+    struct mptsas_hotplug_event *hot_plug_info)
 {
-	struct mptsas_hotplug_event *ev =
-		container_of(work, struct mptsas_hotplug_event, work);
-
-	MPT_ADAPTER *ioc = ev->ioc;
 	struct mptsas_phyinfo *phy_info;
-	struct sas_rphy *rphy;
-	struct sas_port *port;
-	struct scsi_device *sdev;
 	struct scsi_target * starget;
-	struct sas_identify identify;
-	char *ds = NULL;
 	struct mptsas_devinfo sas_device;
 	VirtTarget *vtarget;
-	VirtDevice *vdevice;
+	int i;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
-	switch (ev->event_type) {
-	case MPTSAS_DEL_DEVICE:
+	switch (hot_plug_info->event_type) {
 
-		phy_info = NULL;
-		if (ev->phys_disk_num_valid) {
-			if (ev->hidden_raid_component){
-				if (mptsas_sas_device_pg0(ioc, &sas_device,
-				    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
-				     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-				    (ev->channel << 8) + ev->id)) {
-					dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-					"%s: exit at line=%d\n", ioc->name,
-						__func__, __LINE__));
-					break;
-				}
-				phy_info = mptsas_find_phyinfo_by_sas_address(
-				    ioc, sas_device.sas_address);
-			}else
-				phy_info = mptsas_find_phyinfo_by_phys_disk_num(
-				    ioc, ev->channel, ev->phys_disk_num);
+	case MPTSAS_ADD_PHYSDISK:
+
+		if (!ioc->raid_data.pIocPg2)
+			break;
+
+		for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
+			if (ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID ==
+			    hot_plug_info->id) {
+				printk(MYIOC_s_WARN_FMT "firmware bug: unable "
+				    "to add hidden disk - target_id matchs "
+				    "volume_id\n", ioc->name);
+				mptsas_free_fw_event(ioc, fw_event);
+				return;
+			}
 		}
+		mpt_findImVolumes(ioc);
 
+	case MPTSAS_ADD_DEVICE:
+		memset(&sas_device, 0, sizeof(struct mptsas_devinfo));
+		mptsas_sas_device_pg0(ioc, &sas_device,
+		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+		    MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+		    (hot_plug_info->channel << 8) +
+		    hot_plug_info->id);
+
+		if (!sas_device.handle)
+			return;
+
+		phy_info = mptsas_refreshing_device_handles(ioc, &sas_device);
 		if (!phy_info)
-			phy_info = mptsas_find_phyinfo_by_target(ioc,
-			    ev->channel, ev->id);
+			break;
 
-		/*
-		 * Sanity checks, for non-existing phys and remote rphys.
-		 */
-		if (!phy_info){
+		if (mptsas_get_rphy(phy_info))
+			break;
+
+		mptsas_add_end_device(ioc, phy_info);
+		break;
+
+	case MPTSAS_DEL_DEVICE:
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+		    hot_plug_info->sas_address);
+		mptsas_del_end_device(ioc, phy_info);
+		break;
+
+	case MPTSAS_DEL_PHYSDISK:
+
+		mpt_findImVolumes(ioc);
+
+		phy_info = mptsas_find_phyinfo_by_phys_disk_num(
+				ioc, hot_plug_info->channel,
+				hot_plug_info->phys_disk_num);
+		mptsas_del_end_device(ioc, phy_info);
+		break;
+
+	case MPTSAS_ADD_PHYSDISK_REPROBE:
+
+		if (mptsas_sas_device_pg0(ioc, &sas_device,
+		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
+		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+		    (hot_plug_info->channel << 8) + hot_plug_info->id)) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-				__func__, __LINE__));
+			"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
-		if (!phy_info->port_details) {
+
+		phy_info = mptsas_find_phyinfo_by_sas_address(
+		    ioc, sas_device.sas_address);
+
+		if (!phy_info) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+				"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
-		rphy = mptsas_get_rphy(phy_info);
-		if (!rphy) {
+
+		starget = mptsas_get_starget(phy_info);
+		if (!starget) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+				"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		port = mptsas_get_port(phy_info);
-		if (!port) {
+		vtarget = starget->hostdata;
+		if (!vtarget) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+				"%s: fw_id=%d exit at line=%d\n", ioc->name,
+				 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		starget = mptsas_get_starget(phy_info);
-		if (starget) {
-			vtarget = starget->hostdata;
-
-			if (!vtarget) {
-				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-					"%s: exit at line=%d\n", ioc->name,
-					__func__, __LINE__));
-				break;
-			}
+		mpt_findImVolumes(ioc);
 
-			/*
-			 * Handling  RAID components
-			 */
-			if (ev->phys_disk_num_valid &&
-			    ev->hidden_raid_component) {
-				printk(MYIOC_s_INFO_FMT
-				    "RAID Hidding: channel=%d, id=%d, "
-				    "physdsk %d \n", ioc->name, ev->channel,
-				    ev->id, ev->phys_disk_num);
-				vtarget->id = ev->phys_disk_num;
-				vtarget->tflags |=
-				    MPT_TARGET_FLAGS_RAID_COMPONENT;
-				mptsas_reprobe_target(starget, 1);
-				phy_info->attached.phys_disk_num =
-				    ev->phys_disk_num;
-			break;
-			}
-		}
+		starget_printk(KERN_INFO, starget, MYIOC_s_FMT "RAID Hidding: "
+		    "fw_channel=%d, fw_id=%d, physdsk %d, sas_addr 0x%llx\n",
+		    ioc->name, hot_plug_info->channel, hot_plug_info->id,
+		    hot_plug_info->phys_disk_num, (unsigned long long)
+		    sas_device.sas_address);
 
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SSP_TARGET)
-			ds = "ssp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_STP_TARGET)
-			ds = "stp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
-			ds = "sata";
-
-		printk(MYIOC_s_INFO_FMT
-		       "removing %s device, channel %d, id %d, phy %d\n",
-		       ioc->name, ds, ev->channel, ev->id, phy_info->phy_id);
-		dev_printk(KERN_DEBUG, &port->dev, MYIOC_s_FMT
-		    "delete port (%d)\n", ioc->name, port->port_identifier);
-		sas_port_delete(port);
-		mptsas_port_delete(ioc, phy_info->port_details);
+		vtarget->id = hot_plug_info->phys_disk_num;
+		vtarget->tflags |= MPT_TARGET_FLAGS_RAID_COMPONENT;
+		phy_info->attached.phys_disk_num = hot_plug_info->phys_disk_num;
+		mptsas_reprobe_target(starget, 1);
 		break;
-	case MPTSAS_ADD_DEVICE:
 
-		if (ev->phys_disk_num_valid)
-			mpt_findImVolumes(ioc);
+	case MPTSAS_DEL_PHYSDISK_REPROBE:
 
-		/*
-		 * Refresh sas device pg0 data
-		 */
 		if (mptsas_sas_device_pg0(ioc, &sas_device,
 		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
 		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-			(ev->channel << 8) + ev->id)) {
+			(hot_plug_info->channel << 8) + hot_plug_info->id)) {
 				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-					"%s: exit at line=%d\n", ioc->name,
-					__func__, __LINE__));
+				    "%s: fw_id=%d exit at line=%d\n",
+				    ioc->name, __func__,
+				    hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		__mptsas_discovery_work(ioc);
-
 		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
 				sas_device.sas_address);
-
-		if (!phy_info || !phy_info->port_details) {
+		if (!phy_info) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
 		starget = mptsas_get_starget(phy_info);
-		if (starget && (!ev->hidden_raid_component)){
-
-			vtarget = starget->hostdata;
-
-			if (!vtarget) {
-				dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				    "%s: exit at line=%d\n", ioc->name,
-				    __func__, __LINE__));
-				break;
-			}
-			/*
-			 * Handling  RAID components
-			 */
-			if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
-				printk(MYIOC_s_INFO_FMT
-				    "RAID Exposing: channel=%d, id=%d, "
-				    "physdsk %d \n", ioc->name, ev->channel,
-				    ev->id, ev->phys_disk_num);
-				vtarget->tflags &=
-				    ~MPT_TARGET_FLAGS_RAID_COMPONENT;
-				vtarget->id = ev->id;
-				mptsas_reprobe_target(starget, 0);
-				phy_info->attached.phys_disk_num = ~0;
-			}
+		if (!starget) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		if (mptsas_get_rphy(phy_info)) {
+		vtarget = starget->hostdata;
+		if (!vtarget) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			if (ev->channel) printk("%d\n", __LINE__);
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
 
-		port = mptsas_get_port(phy_info);
-		if (!port) {
+		if (!(vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)) {
 			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
+			    "%s: fw_id=%d exit at line=%d\n", ioc->name,
+			 __func__, hot_plug_info->id, __LINE__));
 			break;
 		}
-		memcpy(&phy_info->attached, &sas_device,
-		    sizeof(struct mptsas_devinfo));
-
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SSP_TARGET)
-			ds = "ssp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_STP_TARGET)
-			ds = "stp";
-		if (phy_info->attached.device_info &
-		    MPI_SAS_DEVICE_INFO_SATA_DEVICE)
-			ds = "sata";
-
-		printk(MYIOC_s_INFO_FMT
-		       "attaching %s device, channel %d, id %d, phy %d\n",
-		       ioc->name, ds, ev->channel, ev->id, ev->phy_id);
 
-		mptsas_parse_device_info(&identify, &phy_info->attached);
-		rphy = sas_end_device_alloc(port);
-		if (!rphy) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			break; /* non-fatal: an rphy can be added later */
-		}
+		mpt_findImVolumes(ioc);
 
-		rphy->identify = identify;
-		if (sas_rphy_add(rphy)) {
-			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
-				"%s: exit at line=%d\n", ioc->name,
-			       	__func__, __LINE__));
-			sas_rphy_free(rphy);
-			break;
-		}
-		mptsas_set_rphy(ioc, phy_info, rphy);
+		starget_printk(KERN_INFO, starget, MYIOC_s_FMT "RAID Exposing:"
+		    " fw_channel=%d, fw_id=%d, physdsk %d, sas_addr 0x%llx\n",
+		    ioc->name, hot_plug_info->channel, hot_plug_info->id,
+		    hot_plug_info->phys_disk_num, (unsigned long long)
+		    sas_device.sas_address);
+
+		vtarget->tflags &= ~MPT_TARGET_FLAGS_RAID_COMPONENT;
+		vtarget->id = hot_plug_info->id;
+		phy_info->attached.phys_disk_num = ~0;
+		mptsas_reprobe_target(starget, 0);
+		mptsas_add_device_component_by_fw(ioc,
+		    hot_plug_info->channel, hot_plug_info->id);
 		break;
+
 	case MPTSAS_ADD_RAID:
-		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
-		    ev->id, 0);
-		if (sdev) {
-			scsi_device_put(sdev);
-			break;
-		}
-		printk(MYIOC_s_INFO_FMT
-		       "attaching raid volume, channel %d, id %d\n",
-		       ioc->name, MPTSAS_RAID_CHANNEL, ev->id);
-		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL, ev->id, 0);
+
 		mpt_findImVolumes(ioc);
+		printk(MYIOC_s_INFO_FMT "attaching raid volume, channel %d, "
+		    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info->id);
+		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info->id, 0);
 		break;
+
 	case MPTSAS_DEL_RAID:
-		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
-		    ev->id, 0);
-		if (!sdev)
-			break;
-		printk(MYIOC_s_INFO_FMT
-		       "removing raid volume, channel %d, id %d\n",
-		       ioc->name, MPTSAS_RAID_CHANNEL, ev->id);
-		vdevice = sdev->hostdata;
-		scsi_remove_device(sdev);
-		scsi_device_put(sdev);
+
 		mpt_findImVolumes(ioc);
+		printk(MYIOC_s_INFO_FMT "removing raid volume, channel %d, "
+		    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info->id);
+		scsi_remove_device(hot_plug_info->sdev);
+		scsi_device_put(hot_plug_info->sdev);
 		break;
+
 	case MPTSAS_ADD_INACTIVE_VOLUME:
+
+		mpt_findImVolumes(ioc);
 		mptsas_adding_inactive_raid_components(ioc,
-		    ev->channel, ev->id);
+		    hot_plug_info->channel, hot_plug_info->id);
 		break;
-	case MPTSAS_IGNORE_EVENT:
+
 	default:
 		break;
 	}
 
-	mutex_unlock(&ioc->sas_discovery_mutex);
-	kfree(ev);
+	mptsas_free_fw_event(ioc, fw_event);
 }
 
 static void
-mptsas_send_sas_event(MPT_ADAPTER *ioc,
-		EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data)
+mptsas_send_sas_event(struct fw_event_work *fw_event)
 {
-	struct mptsas_hotplug_event *ev;
-	u32 device_info = le32_to_cpu(sas_event_data->DeviceInfo);
-	__le64 sas_address;
+	MPT_ADAPTER *ioc;
+	struct mptsas_hotplug_event hot_plug_info;
+	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data;
+	u32 device_info;
+	u64 sas_address;
+
+	ioc = fw_event->ioc;
+	sas_event_data = (EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)
+	    fw_event->event_data;
+	device_info = le32_to_cpu(sas_event_data->DeviceInfo);
 
 	if ((device_info &
-	     (MPI_SAS_DEVICE_INFO_SSP_TARGET |
-	      MPI_SAS_DEVICE_INFO_STP_TARGET |
-	      MPI_SAS_DEVICE_INFO_SATA_DEVICE )) == 0)
+		(MPI_SAS_DEVICE_INFO_SSP_TARGET |
+		MPI_SAS_DEVICE_INFO_STP_TARGET |
+		MPI_SAS_DEVICE_INFO_SATA_DEVICE)) == 0) {
+		mptsas_free_fw_event(ioc, fw_event);
+		return;
+	}
+
+	if (sas_event_data->ReasonCode ==
+		MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED) {
+		mptbase_sas_persist_operation(ioc,
+		MPI_SAS_OP_CLEAR_NOT_PRESENT);
+		mptsas_free_fw_event(ioc, fw_event);
 		return;
+	}
 
 	switch (sas_event_data->ReasonCode) {
 	case MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING:
-
-		mptsas_target_reset_queue(ioc, sas_event_data);
-		break;
-
 	case MPI_EVENT_SAS_DEV_STAT_RC_ADDED:
-		ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-		if (!ev) {
-			printk(MYIOC_s_WARN_FMT "lost hotplug event\n", ioc->name);
-			break;
-		}
-
-		INIT_WORK(&ev->work, mptsas_hotplug_work);
-		ev->ioc = ioc;
-		ev->handle = le16_to_cpu(sas_event_data->DevHandle);
-		ev->parent_handle =
-		    le16_to_cpu(sas_event_data->ParentDevHandle);
-		ev->channel = sas_event_data->Bus;
-		ev->id = sas_event_data->TargetID;
-		ev->phy_id = sas_event_data->PhyNum;
+		memset(&hot_plug_info, 0, sizeof(struct mptsas_hotplug_event));
+		hot_plug_info.handle = le16_to_cpu(sas_event_data->DevHandle);
+		hot_plug_info.channel = sas_event_data->Bus;
+		hot_plug_info.id = sas_event_data->TargetID;
+		hot_plug_info.phy_id = sas_event_data->PhyNum;
 		memcpy(&sas_address, &sas_event_data->SASAddress,
-		    sizeof(__le64));
-		ev->sas_address = le64_to_cpu(sas_address);
-		ev->device_info = device_info;
-
+		    sizeof(u64));
+		hot_plug_info.sas_address = le64_to_cpu(sas_address);
+		hot_plug_info.device_info = device_info;
 		if (sas_event_data->ReasonCode &
 		    MPI_EVENT_SAS_DEV_STAT_RC_ADDED)
-			ev->event_type = MPTSAS_ADD_DEVICE;
+			hot_plug_info.event_type = MPTSAS_ADD_DEVICE;
 		else
-			ev->event_type = MPTSAS_DEL_DEVICE;
-		schedule_work(&ev->work);
+			hot_plug_info.event_type = MPTSAS_DEL_DEVICE;
+		mptsas_hotplug_work(ioc, fw_event, &hot_plug_info);
 		break;
+
 	case MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED:
-	/*
-	 * Persistent table is full.
-	 */
-		INIT_WORK(&ioc->sas_persist_task,
-		    mptsas_persist_clear_table);
-		schedule_work(&ioc->sas_persist_task);
+		mptbase_sas_persist_operation(ioc,
+		    MPI_SAS_OP_CLEAR_NOT_PRESENT);
+		mptsas_free_fw_event(ioc, fw_event);
 		break;
-	/*
-	 * TODO, handle other events
-	 */
+
 	case MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA:
-	case MPI_EVENT_SAS_DEV_STAT_RC_UNSUPPORTED:
+	/* TODO */
 	case MPI_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET:
-	case MPI_EVENT_SAS_DEV_STAT_RC_TASK_ABORT_INTERNAL:
-	case MPI_EVENT_SAS_DEV_STAT_RC_ABORT_TASK_SET_INTERNAL:
-	case MPI_EVENT_SAS_DEV_STAT_RC_CLEAR_TASK_SET_INTERNAL:
-	case MPI_EVENT_SAS_DEV_STAT_RC_QUERY_TASK_INTERNAL:
+	/* TODO */
 	default:
+		mptsas_free_fw_event(ioc, fw_event);
 		break;
 	}
 }
+
 static void
-mptsas_send_raid_event(MPT_ADAPTER *ioc,
-		EVENT_DATA_RAID *raid_event_data)
+mptsas_send_raid_event(struct fw_event_work *fw_event)
 {
-	struct mptsas_hotplug_event *ev;
-	int status = le32_to_cpu(raid_event_data->SettingsStatus);
-	int state = (status >> 8) & 0xff;
-
-	if (ioc->bus_type != SAS)
-		return;
-
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev) {
-		printk(MYIOC_s_WARN_FMT "lost hotplug event\n", ioc->name);
-		return;
+	MPT_ADAPTER *ioc;
+	EVENT_DATA_RAID *raid_event_data;
+	struct mptsas_hotplug_event hot_plug_info;
+	int status;
+	int state;
+	struct scsi_device *sdev = NULL;
+	VirtDevice *vdevice = NULL;
+	RaidPhysDiskPage0_t phys_disk;
+
+	ioc = fw_event->ioc;
+	raid_event_data = (EVENT_DATA_RAID *)fw_event->event_data;
+	status = le32_to_cpu(raid_event_data->SettingsStatus);
+	state = (status >> 8) & 0xff;
+
+	memset(&hot_plug_info, 0, sizeof(struct mptsas_hotplug_event));
+	hot_plug_info.id = raid_event_data->VolumeID;
+	hot_plug_info.channel = raid_event_data->VolumeBus;
+	hot_plug_info.phys_disk_num = raid_event_data->PhysDiskNum;
+
+	if (raid_event_data->ReasonCode == MPI_EVENT_RAID_RC_VOLUME_DELETED ||
+	    raid_event_data->ReasonCode == MPI_EVENT_RAID_RC_VOLUME_CREATED ||
+	    raid_event_data->ReasonCode ==
+	    MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED) {
+		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    hot_plug_info.id, 0);
+		hot_plug_info.sdev = sdev;
+		if (sdev)
+			vdevice = sdev->hostdata;
 	}
 
-	INIT_WORK(&ev->work, mptsas_hotplug_work);
-	ev->ioc = ioc;
-	ev->id = raid_event_data->VolumeID;
-	ev->channel = raid_event_data->VolumeBus;
-	ev->event_type = MPTSAS_IGNORE_EVENT;
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Entering %s: "
+	    "ReasonCode=%02x\n", ioc->name, __func__,
+	    raid_event_data->ReasonCode));
 
 	switch (raid_event_data->ReasonCode) {
 	case MPI_EVENT_RAID_RC_PHYSDISK_DELETED:
-		ev->phys_disk_num_valid = 1;
-		ev->phys_disk_num = raid_event_data->PhysDiskNum;
-		ev->event_type = MPTSAS_ADD_DEVICE;
+		hot_plug_info.event_type = MPTSAS_DEL_PHYSDISK_REPROBE;
 		break;
 	case MPI_EVENT_RAID_RC_PHYSDISK_CREATED:
-		ev->phys_disk_num_valid = 1;
-		ev->phys_disk_num = raid_event_data->PhysDiskNum;
-		ev->hidden_raid_component = 1;
-		ev->event_type = MPTSAS_DEL_DEVICE;
+		hot_plug_info.event_type = MPTSAS_ADD_PHYSDISK_REPROBE;
 		break;
 	case MPI_EVENT_RAID_RC_PHYSDISK_STATUS_CHANGED:
 		switch (state) {
 		case MPI_PD_STATE_ONLINE:
 		case MPI_PD_STATE_NOT_COMPATIBLE:
-			ev->phys_disk_num_valid = 1;
-			ev->phys_disk_num = raid_event_data->PhysDiskNum;
-			ev->hidden_raid_component = 1;
-			ev->event_type = MPTSAS_ADD_DEVICE;
+			mpt_raid_phys_disk_pg0(ioc,
+			    raid_event_data->PhysDiskNum, &phys_disk);
+			hot_plug_info.id = phys_disk.PhysDiskID;
+			hot_plug_info.channel = phys_disk.PhysDiskBus;
+			hot_plug_info.event_type = MPTSAS_ADD_PHYSDISK;
 			break;
+		case MPI_PD_STATE_FAILED:
 		case MPI_PD_STATE_MISSING:
 		case MPI_PD_STATE_OFFLINE_AT_HOST_REQUEST:
 		case MPI_PD_STATE_FAILED_AT_HOST_REQUEST:
 		case MPI_PD_STATE_OFFLINE_FOR_ANOTHER_REASON:
-			ev->phys_disk_num_valid = 1;
-			ev->phys_disk_num = raid_event_data->PhysDiskNum;
-			ev->event_type = MPTSAS_DEL_DEVICE;
+			hot_plug_info.event_type = MPTSAS_DEL_PHYSDISK;
 			break;
 		default:
 			break;
 		}
 		break;
 	case MPI_EVENT_RAID_RC_VOLUME_DELETED:
-		ev->event_type = MPTSAS_DEL_RAID;
+		if (!sdev)
+			break;
+		vdevice->vtarget->deleted = 1; /* block IO */
+		hot_plug_info.event_type = MPTSAS_DEL_RAID;
 		break;
 	case MPI_EVENT_RAID_RC_VOLUME_CREATED:
-		ev->event_type = MPTSAS_ADD_RAID;
+		if (sdev) {
+			scsi_device_put(sdev);
+			break;
+		}
+		hot_plug_info.event_type = MPTSAS_ADD_RAID;
 		break;
 	case MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED:
+		if (!(status & MPI_RAIDVOL0_STATUS_FLAG_ENABLED)) {
+			if (!sdev)
+				break;
+			vdevice->vtarget->deleted = 1; /* block IO */
+			hot_plug_info.event_type = MPTSAS_DEL_RAID;
+			break;
+		}
 		switch (state) {
 		case MPI_RAIDVOL0_STATUS_STATE_FAILED:
 		case MPI_RAIDVOL0_STATUS_STATE_MISSING:
-			ev->event_type = MPTSAS_DEL_RAID;
+			if (!sdev)
+				break;
+			vdevice->vtarget->deleted = 1; /* block IO */
+			hot_plug_info.event_type = MPTSAS_DEL_RAID;
 			break;
 		case MPI_RAIDVOL0_STATUS_STATE_OPTIMAL:
 		case MPI_RAIDVOL0_STATUS_STATE_DEGRADED:
-			ev->event_type = MPTSAS_ADD_RAID;
+			if (sdev) {
+				scsi_device_put(sdev);
+				break;
+			}
+			hot_plug_info.event_type = MPTSAS_ADD_RAID;
 			break;
 		default:
 			break;
@@ -3067,7 +3537,11 @@ mptsas_send_raid_event(MPT_ADAPTER *ioc,
 	default:
 		break;
 	}
-	schedule_work(&ev->work);
+
+	if (hot_plug_info.event_type != MPTSAS_IGNORE_EVENT)
+		mptsas_hotplug_work(ioc, fw_event, &hot_plug_info);
+	else
+		mptsas_free_fw_event(ioc, fw_event);
 }
 
 static void
@@ -3106,76 +3580,88 @@ mptsas_send_discovery_event(MPT_ADAPTER *ioc,
  *
  */
 static void
-mptsas_send_ir2_event(MPT_ADAPTER *ioc, PTR_MPI_EVENT_DATA_IR2 ir2_data)
+mptsas_send_ir2_event(struct fw_event_work *fw_event)
 {
-	struct mptsas_hotplug_event *ev;
-
-	if (ir2_data->ReasonCode !=
-	    MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED)
-		return;
-
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev)
+	MPT_ADAPTER	*ioc;
+	struct mptsas_hotplug_event hot_plug_info;
+	MPI_EVENT_DATA_IR2	*ir2_data;
+	u8 reasonCode;
+
+	ioc = fw_event->ioc;
+	ir2_data = (MPI_EVENT_DATA_IR2 *)fw_event->event_data;
+	reasonCode = ir2_data->ReasonCode;
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Entering %s: "
+	    "ReasonCode=%02x\n", ioc->name, __func__, reasonCode));
+
+	memset(&hot_plug_info, 0, sizeof(struct mptsas_hotplug_event));
+	hot_plug_info.id = ir2_data->TargetID;
+	hot_plug_info.channel = ir2_data->Bus;
+	switch (reasonCode) {
+	case MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED:
+		hot_plug_info.event_type = MPTSAS_ADD_INACTIVE_VOLUME;
+		break;
+	default:
+		mptsas_free_fw_event(ioc, fw_event);
 		return;
-
-	INIT_WORK(&ev->work, mptsas_hotplug_work);
-	ev->ioc = ioc;
-	ev->id = ir2_data->TargetID;
-	ev->channel = ir2_data->Bus;
-	ev->event_type = MPTSAS_ADD_INACTIVE_VOLUME;
-
-	schedule_work(&ev->work);
-};
+	}
+	mptsas_hotplug_work(ioc, fw_event, &hot_plug_info);
+}
 
 static int
 mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 {
-	int rc=1;
-	u8 event = le32_to_cpu(reply->Event) & 0xFF;
-
-	if (!ioc->sh)
-		goto out;
+	u32 event = le32_to_cpu(reply->Event);
+	int sz, event_data_sz;
+	struct fw_event_work *fw_event;
+	unsigned long delay;
 
-	/*
-	 * sas_discovery_ignore_events
-	 *
-	 * This flag is to prevent anymore processing of
-	 * sas events once mptsas_remove function is called.
-	 */
-	if (ioc->sas_discovery_ignore_events) {
-		rc = mptscsih_event_process(ioc, reply);
-		goto out;
-	}
+	/* events turned off due to host reset or driver unloading */
+	if (ioc->fw_events_off)
+		return 0;
 
+	delay = msecs_to_jiffies(1);
 	switch (event) {
 	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
-		mptsas_send_sas_event(ioc,
-			(EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data);
-		break;
-	case MPI_EVENT_INTEGRATED_RAID:
-		mptsas_send_raid_event(ioc,
-			(EVENT_DATA_RAID *)reply->Data);
-		break;
-	case MPI_EVENT_PERSISTENT_TABLE_FULL:
-		INIT_WORK(&ioc->sas_persist_task,
-		    mptsas_persist_clear_table);
-		schedule_work(&ioc->sas_persist_task);
+	{
+		EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data =
+		    (EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data;
+
+		if (sas_event_data->ReasonCode ==
+		    MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING) {
+			mptsas_target_reset_queue(ioc, sas_event_data);
+			return 0;
+		}
 		break;
-	 case MPI_EVENT_SAS_DISCOVERY:
+	}
+	case MPI_EVENT_SAS_DISCOVERY:
 		mptsas_send_discovery_event(ioc,
 			(EVENT_DATA_SAS_DISCOVERY *)reply->Data);
 		break;
+	case MPI_EVENT_INTEGRATED_RAID:
+	case MPI_EVENT_PERSISTENT_TABLE_FULL:
 	case MPI_EVENT_IR2:
-		mptsas_send_ir2_event(ioc,
-		    (PTR_MPI_EVENT_DATA_IR2)reply->Data);
+	case MPI_EVENT_SAS_PHY_LINK_STATUS:
+	case MPI_EVENT_QUEUE_FULL:
 		break;
 	default:
-		rc = mptscsih_event_process(ioc, reply);
-		break;
+		return 0;
 	}
- out:
 
-	return rc;
+	event_data_sz = ((reply->MsgLength * 4) -
+	    offsetof(EventNotificationReply_t, Data));
+	sz = offsetof(struct fw_event_work, event_data) + event_data_sz;
+	fw_event = kzalloc(sz, GFP_ATOMIC);
+	if (!fw_event) {
+		printk(MYIOC_s_WARN_FMT "%s: failed at (line=%d)\n", ioc->name,
+		 __func__, __LINE__);
+		return 0;
+	}
+	memcpy(fw_event->event_data, reply->Data, event_data_sz);
+	fw_event->event = event;
+	fw_event->ioc = ioc;
+	mptsas_add_fw_event(ioc, fw_event, delay);
+	return 0;
 }
 
 static int
@@ -3197,6 +3683,7 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return r;
 
 	ioc = pci_get_drvdata(pdev);
+	mptsas_fw_event_off(ioc);
 	ioc->DoneCtx = mptsasDoneCtx;
 	ioc->TaskCtx = mptsasTaskCtx;
 	ioc->InternalCtx = mptsasInternalCtx;
@@ -3339,6 +3826,9 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	hd->last_queue_full = 0;
 	INIT_LIST_HEAD(&hd->target_reset_list);
+	INIT_LIST_HEAD(&ioc->sas_device_info_list);
+	mutex_init(&ioc->sas_device_info_mutex);
+
 	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
 
 	if (ioc->sas_data.ptClear==1) {
@@ -3354,7 +3844,7 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	mptsas_scan_sas_topology(ioc);
-
+	mptsas_fw_event_on(ioc);
 	return 0;
 
  out_mptsas_probe:
@@ -3368,7 +3858,8 @@ mptsas_shutdown(struct pci_dev *pdev)
 {
 	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
 
-	ioc->sas_discovery_quiesce_io = 0;
+	mptsas_fw_event_off(ioc);
+	mptsas_cleanup_fw_event_q(ioc);
 }
 
 static void __devexit mptsas_remove(struct pci_dev *pdev)
@@ -3379,6 +3870,8 @@ static void __devexit mptsas_remove(struct pci_dev *pdev)
 
 	mptsas_shutdown(pdev);
 
+	mptsas_del_device_components(ioc);
+
 	ioc->sas_discovery_ignore_events = 1;
 	sas_remove_host(ioc->sh);
 
@@ -3387,6 +3880,7 @@ static void __devexit mptsas_remove(struct pci_dev *pdev)
 		list_del(&p->list);
 		for (i = 0 ; i < p->num_phys ; i++)
 			mptsas_port_delete(ioc, p->phy_info[i].port_details);
+
 		kfree(p->phy_info);
 		kfree(p);
 	}
diff --git a/drivers/message/fusion/mptsas.h b/drivers/message/fusion/mptsas.h
index bf528a5..9e0885a 100644
--- a/drivers/message/fusion/mptsas.h
+++ b/drivers/message/fusion/mptsas.h
@@ -61,12 +61,30 @@ enum mptsas_hotplug_action {
 	MPTSAS_DEL_DEVICE,
 	MPTSAS_ADD_RAID,
 	MPTSAS_DEL_RAID,
+	MPTSAS_ADD_PHYSDISK,
+	MPTSAS_ADD_PHYSDISK_REPROBE,
+	MPTSAS_DEL_PHYSDISK,
+	MPTSAS_DEL_PHYSDISK_REPROBE,
 	MPTSAS_ADD_INACTIVE_VOLUME,
 	MPTSAS_IGNORE_EVENT,
 };
 
+struct mptsas_mapping{
+	u8			id;
+	u8			channel;
+};
+
+struct mptsas_device_info {
+	struct list_head 	list;
+	struct mptsas_mapping	os;	/* operating system mapping*/
+	struct mptsas_mapping	fw;	/* firmware mapping */
+	u64			sas_address;
+	u32			device_info; /* specific bits for devices */
+	u16			slot;		/* enclosure slot id */
+	u64			enclosure_logical_id; /*enclosure address */
+};
+
 struct mptsas_hotplug_event {
-	struct work_struct	work;
 	MPT_ADAPTER		*ioc;
 	enum mptsas_hotplug_action event_type;
 	u64			sas_address;
@@ -74,11 +92,18 @@ struct mptsas_hotplug_event {
 	u8			id;
 	u32			device_info;
 	u16			handle;
-	u16			parent_handle;
 	u8			phy_id;
-	u8			phys_disk_num_valid;	/* hrc (hidden raid component) */
 	u8			phys_disk_num;		/* hrc - unique index*/
-	u8			hidden_raid_component;	/* hrc - don't expose*/
+	struct scsi_device	*sdev;
+};
+
+struct fw_event_work {
+	struct list_head 	list;
+	struct delayed_work	 work;
+	MPT_ADAPTER	*ioc;
+	u32			event;
+	u8			retries;
+	u8			event_data[1];
 };
 
 struct mptsas_discovery_event {
-- 
cgit v0.10.2


From f9c34022eae9c76465dc2ec8805b9905e171ef40 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:49:36 +0530
Subject: [SCSI] mpt fusion: SAS topology scan changes, expander events

SAS topology scan is restructured. HBA firmware is generating more
events. Expander Events are added, Link status events are also added with
respect to SAS topology scan optimization.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index b6efc64..8d1aadb 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -704,6 +704,11 @@ typedef struct _MPT_ADAPTER
 	struct mutex		 sas_discovery_mutex;
 	u8			 sas_discovery_runtime;
 	u8			 sas_discovery_ignore_events;
+
+	/* port_info object for the host */
+	struct mptsas_portinfo	*hba_port_info;
+	u64			 hba_port_sas_addr;
+	u16			 hba_port_num_phy;
 	struct list_head	 sas_device_info_list;
 	struct mutex		 sas_device_info_mutex;
 	u8			 sas_discovery_quiesce_io;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 22a027e..7f2f76f 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -113,6 +113,12 @@ static int mptsas_add_end_device(MPT_ADAPTER *ioc,
 	struct mptsas_phyinfo *phy_info);
 static void mptsas_del_end_device(MPT_ADAPTER *ioc,
 	struct mptsas_phyinfo *phy_info);
+static void mptsas_send_link_status_event(struct fw_event_work *fw_event);
+static struct mptsas_portinfo	*mptsas_find_portinfo_by_sas_address
+		(MPT_ADAPTER *ioc, u64 sas_address);
+static void mptsas_expander_delete(MPT_ADAPTER *ioc,
+		struct mptsas_portinfo *port_info, u8 force);
+static void mptsas_send_expander_event(struct fw_event_work *fw_event);
 
 static void mptsas_print_phy_data(MPT_ADAPTER *ioc,
 					MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
@@ -342,20 +348,6 @@ static inline MPT_ADAPTER *rphy_to_ioc(struct sas_rphy *rphy)
 	return ((MPT_SCSI_HOST *)shost->hostdata)->ioc;
 }
 
-static struct mptsas_portinfo *
-mptsas_get_hba_portinfo(MPT_ADAPTER *ioc)
-{
-	struct list_head	*head = &ioc->sas_topology;
-	struct mptsas_portinfo	*pi = NULL;
-
-	/* always the first entry on sas_topology list */
-
-	if (!list_empty(head))
-		pi = list_entry(head->next, struct mptsas_portinfo, list);
-
-	return pi;
-}
-
 /*
  * mptsas_find_portinfo_by_handle
  *
@@ -377,6 +369,38 @@ mptsas_find_portinfo_by_handle(MPT_ADAPTER *ioc, u16 handle)
 	return rc;
 }
 
+/**
+ *	mptsas_find_portinfo_by_sas_address -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@handle:
+ *
+ *	This function should be called with the sas_topology_mutex already held
+ *
+ **/
+static struct mptsas_portinfo *
+mptsas_find_portinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
+{
+	struct mptsas_portinfo *port_info, *rc = NULL;
+	int i;
+
+	if (sas_address >= ioc->hba_port_sas_addr &&
+	    sas_address < (ioc->hba_port_sas_addr +
+	    ioc->hba_port_num_phy))
+		return ioc->hba_port_info;
+
+	mutex_lock(&ioc->sas_topology_mutex);
+	list_for_each_entry(port_info, &ioc->sas_topology, list)
+		for (i = 0; i < port_info->num_phys; i++)
+			if (port_info->phy_info[i].identify.sas_address ==
+			    sas_address) {
+				rc = port_info;
+				goto out;
+			}
+ out:
+	mutex_unlock(&ioc->sas_topology_mutex);
+	return rc;
+}
+
 /*
  * Returns true if there is a scsi end device
  */
@@ -940,7 +964,6 @@ mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 {
 	MPT_SCSI_HOST	*hd = shost_priv(ioc->sh);
         struct list_head *head = &hd->target_reset_list;
-	EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data;
 	u8		id, channel;
 	struct mptsas_target_reset_event	*target_reset_list;
 	SCSITaskMgmtReply_t *pScsiTmReply;
@@ -995,7 +1018,6 @@ mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 	    ioc->name, jiffies_to_msecs(jiffies -
 	    target_reset_list->time_count)/1000));
 
-	sas_event_data = &target_reset_list->sas_event_data;
 	id = pScsiTmReply->TargetID;
 	channel = pScsiTmReply->Bus;
 	target_reset_list->time_count = jiffies;
@@ -1410,6 +1432,12 @@ mptsas_firmware_event_work(struct work_struct *work)
 		    MPI_SAS_OP_CLEAR_NOT_PRESENT);
 		mptsas_free_fw_event(ioc, fw_event);
 		break;
+	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
+		mptsas_send_expander_event(fw_event);
+		break;
+	case MPI_EVENT_SAS_PHY_LINK_STATUS:
+		mptsas_send_link_status_event(fw_event);
+		break;
 	}
 }
 
@@ -1909,7 +1937,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		struct mptsas_portinfo *port_info;
 
 		mutex_lock(&ioc->sas_topology_mutex);
-		port_info = mptsas_get_hba_portinfo(ioc);
+		port_info = ioc->hba_port_info;
 		if (port_info && port_info->phy_info)
 			sas_address =
 				port_info->phy_info[0].phy->identify.sas_address;
@@ -2646,9 +2674,7 @@ static int mptsas_probe_one_phy(struct device *dev,
 			struct mptsas_portinfo *port_info;
 			int i;
 
-			mutex_lock(&ioc->sas_topology_mutex);
-			port_info = mptsas_get_hba_portinfo(ioc);
-			mutex_unlock(&ioc->sas_topology_mutex);
+			port_info = ioc->hba_port_info;
 
 			for (i = 0; i < port_info->num_phys; i++)
 				if (port_info->phy_info[i].identify.sas_address ==
@@ -2707,7 +2733,7 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 	struct mptsas_portinfo *port_info, *hba;
 	int error = -ENOMEM, i;
 
-	hba = kzalloc(sizeof(*port_info), GFP_KERNEL);
+	hba = kzalloc(sizeof(struct mptsas_portinfo), GFP_KERNEL);
 	if (! hba)
 		goto out;
 
@@ -2717,9 +2743,10 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 
 	mptsas_sas_io_unit_pg1(ioc);
 	mutex_lock(&ioc->sas_topology_mutex);
-	port_info = mptsas_get_hba_portinfo(ioc);
+	port_info = ioc->hba_port_info;
 	if (!port_info) {
-		port_info = hba;
+		ioc->hba_port_info = port_info = hba;
+		ioc->hba_port_num_phy = port_info->num_phys;
 		list_add_tail(&port_info->list, &ioc->sas_topology);
 	} else {
 		for (i = 0; i < hba->num_phys; i++) {
@@ -2735,15 +2762,22 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 		hba = NULL;
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
+#if defined(CPQ_CIM)
+	ioc->num_ports = port_info->num_phys;
+#endif
 	for (i = 0; i < port_info->num_phys; i++) {
 		mptsas_sas_phy_pg0(ioc, &port_info->phy_info[i],
 			(MPI_SAS_PHY_PGAD_FORM_PHY_NUMBER <<
 			 MPI_SAS_PHY_PGAD_FORM_SHIFT), i);
-
+		port_info->phy_info[i].identify.handle =
+		    port_info->phy_info[i].handle;
 		mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify,
 			(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
 			 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-			 port_info->phy_info[i].handle);
+			 port_info->phy_info[i].identify.handle);
+		if (!ioc->hba_port_sas_addr)
+			ioc->hba_port_sas_addr =
+			    port_info->phy_info[i].identify.sas_address;
 		port_info->phy_info[i].identify.phy_id =
 		    port_info->phy_info[i].phy_id = i;
 		if (port_info->phy_info[i].attached.handle)
@@ -2768,248 +2802,497 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 	return error;
 }
 
-static int
-mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle)
+static void
+mptsas_expander_refresh(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 {
-	struct mptsas_portinfo *port_info, *p, *ex;
-	struct device *parent;
-	struct sas_rphy *rphy;
-	int error = -ENOMEM, i, j;
-
-	ex = kzalloc(sizeof(*port_info), GFP_KERNEL);
-	if (!ex)
-		goto out;
-
-	error = mptsas_sas_expander_pg0(ioc, ex,
-	    (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
-	     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), *handle);
-	if (error)
-		goto out_free_port_info;
-
-	*handle = ex->phy_info[0].handle;
-
-	mutex_lock(&ioc->sas_topology_mutex);
-	port_info = mptsas_find_portinfo_by_handle(ioc, *handle);
-	if (!port_info) {
-		port_info = ex;
-		list_add_tail(&port_info->list, &ioc->sas_topology);
-	} else {
-		for (i = 0; i < ex->num_phys; i++) {
-			port_info->phy_info[i].handle =
-				ex->phy_info[i].handle;
-			port_info->phy_info[i].port_id =
-				ex->phy_info[i].port_id;
-		}
-		kfree(ex->phy_info);
-		kfree(ex);
-		ex = NULL;
-	}
-	mutex_unlock(&ioc->sas_topology_mutex);
+	struct mptsas_portinfo *parent;
+	struct device *parent_dev;
+	struct sas_rphy	*rphy;
+	int		i;
+	u64		sas_address; /* expander sas address */
+	u32		handle;
 
+	handle = port_info->phy_info[0].handle;
+	sas_address = port_info->phy_info[0].identify.sas_address;
 	for (i = 0; i < port_info->num_phys; i++) {
 		mptsas_sas_expander_pg1(ioc, &port_info->phy_info[i],
-			(MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM <<
-			 MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + *handle);
+		    (MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM <<
+		    MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + handle);
 
-		if (port_info->phy_info[i].identify.handle) {
-			mptsas_sas_device_pg0(ioc,
-				&port_info->phy_info[i].identify,
-				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
-				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-				port_info->phy_info[i].identify.handle);
-			port_info->phy_info[i].identify.phy_id =
-			    port_info->phy_info[i].phy_id;
-		}
+		mptsas_sas_device_pg0(ioc,
+		    &port_info->phy_info[i].identify,
+		    (MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+		    MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+		    port_info->phy_info[i].identify.handle);
+		port_info->phy_info[i].identify.phy_id =
+		    port_info->phy_info[i].phy_id;
 
 		if (port_info->phy_info[i].attached.handle) {
 			mptsas_sas_device_pg0(ioc,
-				&port_info->phy_info[i].attached,
-				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
-				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-				port_info->phy_info[i].attached.handle);
+			    &port_info->phy_info[i].attached,
+			    (MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+			     MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+			    port_info->phy_info[i].attached.handle);
 			port_info->phy_info[i].attached.phy_id =
 			    port_info->phy_info[i].phy_id;
 		}
 	}
 
-	parent = &ioc->sh->shost_gendev;
-	for (i = 0; i < port_info->num_phys; i++) {
-		mutex_lock(&ioc->sas_topology_mutex);
-		list_for_each_entry(p, &ioc->sas_topology, list) {
-			for (j = 0; j < p->num_phys; j++) {
-				if (port_info->phy_info[i].identify.handle !=
-						p->phy_info[j].attached.handle)
-					continue;
-				rphy = mptsas_get_rphy(&p->phy_info[j]);
-				parent = &rphy->dev;
-			}
-		}
+	mutex_lock(&ioc->sas_topology_mutex);
+	parent = mptsas_find_portinfo_by_handle(ioc,
+	    port_info->phy_info[0].identify.handle_parent);
+	if (!parent) {
 		mutex_unlock(&ioc->sas_topology_mutex);
+		return;
+	}
+	for (i = 0, parent_dev = NULL; i < parent->num_phys && !parent_dev;
+	    i++) {
+		if (parent->phy_info[i].attached.sas_address == sas_address) {
+			rphy = mptsas_get_rphy(&parent->phy_info[i]);
+			parent_dev = &rphy->dev;
+		}
 	}
+	mutex_unlock(&ioc->sas_topology_mutex);
 
 	mptsas_setup_wide_ports(ioc, port_info);
-
 	for (i = 0; i < port_info->num_phys; i++, ioc->sas_index++)
-		mptsas_probe_one_phy(parent, &port_info->phy_info[i],
+		mptsas_probe_one_phy(parent_dev, &port_info->phy_info[i],
 		    ioc->sas_index, 0);
+}
 
-	return 0;
+static void
+mptsas_expander_event_add(MPT_ADAPTER *ioc,
+    MpiEventDataSasExpanderStatusChange_t *expander_data)
+{
+	struct mptsas_portinfo *port_info;
+	int i;
+	__le64 sas_address;
 
- out_free_port_info:
-	if (ex) {
-		kfree(ex->phy_info);
-		kfree(ex);
+	port_info = kzalloc(sizeof(struct mptsas_portinfo), GFP_KERNEL);
+	if (!port_info)
+		BUG();
+	port_info->num_phys = (expander_data->NumPhys) ?
+	    expander_data->NumPhys : 1;
+	port_info->phy_info = kcalloc(port_info->num_phys,
+	    sizeof(struct mptsas_phyinfo), GFP_KERNEL);
+	if (!port_info->phy_info)
+		BUG();
+	memcpy(&sas_address, &expander_data->SASAddress, sizeof(__le64));
+	for (i = 0; i < port_info->num_phys; i++) {
+		port_info->phy_info[i].portinfo = port_info;
+		port_info->phy_info[i].handle =
+		    le16_to_cpu(expander_data->DevHandle);
+		port_info->phy_info[i].identify.sas_address =
+		    le64_to_cpu(sas_address);
+		port_info->phy_info[i].identify.handle_parent =
+		    le16_to_cpu(expander_data->ParentDevHandle);
 	}
- out:
-	return error;
+
+	mutex_lock(&ioc->sas_topology_mutex);
+	list_add_tail(&port_info->list, &ioc->sas_topology);
+	mutex_unlock(&ioc->sas_topology_mutex);
+
+	printk(MYIOC_s_INFO_FMT "add expander: num_phys %d, "
+	    "sas_addr (0x%llx)\n", ioc->name, port_info->num_phys,
+	    (unsigned long long)sas_address);
+
+	mptsas_expander_refresh(ioc, port_info);
 }
 
-/*
- * mptsas_delete_expander_phys
- *
- *
- * This will traverse topology, and remove expanders
- * that are no longer present
- */
+/**
+ * mptsas_delete_expander_siblings - remove siblings attached to expander
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @parent: the parent port_info object
+ * @expander: the expander port_info object
+ **/
 static void
-mptsas_delete_expander_phys(MPT_ADAPTER *ioc)
+mptsas_delete_expander_siblings(MPT_ADAPTER *ioc, struct mptsas_portinfo
+    *parent, struct mptsas_portinfo *expander)
 {
-	struct mptsas_portinfo buffer;
-	struct mptsas_portinfo *port_info, *n, *parent;
 	struct mptsas_phyinfo *phy_info;
-	struct sas_port * port;
+	struct mptsas_portinfo *port_info;
+	struct sas_rphy *rphy;
 	int i;
-	u64	expander_sas_address;
 
-	mutex_lock(&ioc->sas_topology_mutex);
-	list_for_each_entry_safe(port_info, n, &ioc->sas_topology, list) {
+	phy_info = expander->phy_info;
+	for (i = 0; i < expander->num_phys; i++, phy_info++) {
+		rphy = mptsas_get_rphy(phy_info);
+		if (!rphy)
+			continue;
+		if (rphy->identify.device_type == SAS_END_DEVICE)
+			mptsas_del_end_device(ioc, phy_info);
+	}
 
-		if (!(port_info->phy_info[0].identify.device_info &
-		    MPI_SAS_DEVICE_INFO_SMP_TARGET))
+	phy_info = expander->phy_info;
+	for (i = 0; i < expander->num_phys; i++, phy_info++) {
+		rphy = mptsas_get_rphy(phy_info);
+		if (!rphy)
 			continue;
+		if (rphy->identify.device_type ==
+		    MPI_SAS_DEVICE_INFO_EDGE_EXPANDER ||
+		    rphy->identify.device_type ==
+		    MPI_SAS_DEVICE_INFO_FANOUT_EXPANDER) {
+			port_info = mptsas_find_portinfo_by_sas_address(ioc,
+			    rphy->identify.sas_address);
+			if (!port_info)
+				continue;
+			if (port_info == parent) /* backlink rphy */
+				continue;
+			/*
+			Delete this expander even if the expdevpage is exists
+			because the parent expander is already deleted
+			*/
+			mptsas_expander_delete(ioc, port_info, 1);
+		}
+	}
+}
 
-		if (mptsas_sas_expander_pg0(ioc, &buffer,
-		     (MPI_SAS_EXPAND_PGAD_FORM_HANDLE <<
-		     MPI_SAS_EXPAND_PGAD_FORM_SHIFT),
-		     port_info->phy_info[0].handle)) {
 
-			/*
-			 * Obtain the port_info instance to the parent port
-			 */
-			parent = mptsas_find_portinfo_by_handle(ioc,
-			    port_info->phy_info[0].identify.handle_parent);
+/**
+ *	mptsas_expander_delete - remove this expander
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@port_info: expander port_info struct
+ *	@force: Flag to forcefully delete the expander
+ *
+ **/
 
-			if (!parent)
-				goto next_port;
+static void mptsas_expander_delete(MPT_ADAPTER *ioc,
+		struct mptsas_portinfo *port_info, u8 force)
+{
 
-			expander_sas_address =
-				port_info->phy_info[0].identify.sas_address;
+	struct mptsas_portinfo *parent;
+	int		i;
+	u64		expander_sas_address;
+	struct mptsas_phyinfo *phy_info;
+	struct mptsas_portinfo buffer;
+	struct mptsas_portinfo_details *port_details;
+	struct sas_port *port;
 
-			/*
-			 * Delete rphys in the parent that point
-			 * to this expander.  The transport layer will
-			 * cleanup all the children.
-			 */
-			phy_info = parent->phy_info;
-			for (i = 0; i < parent->num_phys; i++, phy_info++) {
-				port = mptsas_get_port(phy_info);
-				if (!port)
-					continue;
-				if (phy_info->attached.sas_address !=
-					expander_sas_address)
-					continue;
-				dsaswideprintk(ioc,
-				    dev_printk(KERN_DEBUG, &port->dev,
-				    MYIOC_s_FMT "delete port (%d)\n", ioc->name,
-				    port->port_identifier));
-				sas_port_delete(port);
-				mptsas_port_delete(ioc, phy_info->port_details);
-			}
- next_port:
+	if (!port_info)
+		return;
 
-			phy_info = port_info->phy_info;
-			for (i = 0; i < port_info->num_phys; i++, phy_info++)
-				mptsas_port_delete(ioc, phy_info->port_details);
+	/* see if expander is still there before deleting */
+	mptsas_sas_expander_pg0(ioc, &buffer,
+	    (MPI_SAS_EXPAND_PGAD_FORM_HANDLE <<
+	    MPI_SAS_EXPAND_PGAD_FORM_SHIFT),
+	    port_info->phy_info[0].identify.handle);
 
-			list_del(&port_info->list);
-			kfree(port_info->phy_info);
-			kfree(port_info);
-		}
-		/*
-		* Free this memory allocated from inside
-		* mptsas_sas_expander_pg0
-		*/
+	if (buffer.num_phys) {
 		kfree(buffer.phy_info);
+		if (!force)
+			return;
 	}
-	mutex_unlock(&ioc->sas_topology_mutex);
+
+
+	/*
+	 * Obtain the port_info instance to the parent port
+	 */
+	port_details = NULL;
+	expander_sas_address =
+	    port_info->phy_info[0].identify.sas_address;
+	parent = mptsas_find_portinfo_by_handle(ioc,
+	    port_info->phy_info[0].identify.handle_parent);
+	mptsas_delete_expander_siblings(ioc, parent, port_info);
+	if (!parent)
+		goto out;
+
+	/*
+	 * Delete rphys in the parent that point
+	 * to this expander.
+	 */
+	phy_info = parent->phy_info;
+	port = NULL;
+	for (i = 0; i < parent->num_phys; i++, phy_info++) {
+		if (!phy_info->phy)
+			continue;
+		if (phy_info->attached.sas_address !=
+		    expander_sas_address)
+			continue;
+		if (!port) {
+			port = mptsas_get_port(phy_info);
+			port_details = phy_info->port_details;
+		}
+		dev_printk(KERN_DEBUG, &phy_info->phy->dev,
+		    MYIOC_s_FMT "delete phy %d, phy-obj (0x%p)\n", ioc->name,
+		    phy_info->phy_id, phy_info->phy);
+		sas_port_delete_phy(port, phy_info->phy);
+	}
+	if (port) {
+		dev_printk(KERN_DEBUG, &port->dev,
+		    MYIOC_s_FMT "delete port %d, sas_addr (0x%llx)\n",
+		    ioc->name, port->port_identifier,
+		    (unsigned long long)expander_sas_address);
+		sas_port_delete(port);
+		mptsas_port_delete(ioc, port_details);
+	}
+ out:
+
+	printk(MYIOC_s_INFO_FMT "delete expander: num_phys %d, "
+	    "sas_addr (0x%llx)\n",  ioc->name, port_info->num_phys,
+	    (unsigned long long)expander_sas_address);
+
+	/*
+	 * free link
+	 */
+	list_del(&port_info->list);
+	kfree(port_info->phy_info);
+	kfree(port_info);
 }
 
-/*
- * Start of day discovery
+
+/**
+ * mptsas_send_expander_event - expanders events
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @expander_data: event data
+ *
+ *
+ * This function handles adding, removing, and refreshing
+ * device handles within the expander objects.
  */
 static void
-mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
+mptsas_send_expander_event(struct fw_event_work *fw_event)
 {
-	u32 handle = 0xFFFF;
+	MPT_ADAPTER *ioc;
+	MpiEventDataSasExpanderStatusChange_t *expander_data;
+	struct mptsas_portinfo *port_info;
+	__le64 sas_address;
 	int i;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
-	mptsas_probe_hba_phys(ioc);
-	while (!mptsas_probe_expander_phys(ioc, &handle))
-		;
-	/*
-	  Reporting RAID volumes.
-	*/
-	if (!ioc->ir_firmware)
-		goto out;
-	if (!ioc->raid_data.pIocPg2)
-		goto out;
-	if (!ioc->raid_data.pIocPg2->NumActiveVolumes)
-		goto out;
-	for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
-		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL,
-		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID, 0);
+	ioc = fw_event->ioc;
+	expander_data = (MpiEventDataSasExpanderStatusChange_t *)
+	    fw_event->event_data;
+	memcpy(&sas_address, &expander_data->SASAddress, sizeof(__le64));
+	port_info = mptsas_find_portinfo_by_sas_address(ioc, sas_address);
+
+	if (expander_data->ReasonCode == MPI_EVENT_SAS_EXP_RC_ADDED) {
+		if (port_info) {
+			for (i = 0; i < port_info->num_phys; i++) {
+				port_info->phy_info[i].portinfo = port_info;
+				port_info->phy_info[i].handle =
+				    le16_to_cpu(expander_data->DevHandle);
+				port_info->phy_info[i].identify.sas_address =
+				    le64_to_cpu(sas_address);
+				port_info->phy_info[i].identify.handle_parent =
+				    le16_to_cpu(expander_data->ParentDevHandle);
+			}
+			mptsas_expander_refresh(ioc, port_info);
+		} else if (!port_info && expander_data->NumPhys)
+			mptsas_expander_event_add(ioc, expander_data);
+	} else if (expander_data->ReasonCode ==
+	    MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING)
+		mptsas_expander_delete(ioc, port_info, 0);
+
+	mptsas_free_fw_event(ioc, fw_event);
+}
+
+
+/**
+ * mptsas_expander_add -
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @handle:
+ *
+ */
+struct mptsas_portinfo *
+mptsas_expander_add(MPT_ADAPTER *ioc, u16 handle)
+{
+	struct mptsas_portinfo buffer, *port_info;
+	int i;
+
+	if ((mptsas_sas_expander_pg0(ioc, &buffer,
+	    (MPI_SAS_EXPAND_PGAD_FORM_HANDLE <<
+	    MPI_SAS_EXPAND_PGAD_FORM_SHIFT), handle)))
+		return NULL;
+
+	port_info = kzalloc(sizeof(struct mptsas_portinfo), GFP_ATOMIC);
+	if (!port_info) {
+		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+		"%s: exit at line=%d\n", ioc->name,
+		__func__, __LINE__));
+		return NULL;
+	}
+	port_info->num_phys = buffer.num_phys;
+	port_info->phy_info = buffer.phy_info;
+	for (i = 0; i < port_info->num_phys; i++)
+		port_info->phy_info[i].portinfo = port_info;
+	mutex_lock(&ioc->sas_topology_mutex);
+	list_add_tail(&port_info->list, &ioc->sas_topology);
+	mutex_unlock(&ioc->sas_topology_mutex);
+	printk(MYIOC_s_INFO_FMT "add expander: num_phys %d, "
+	    "sas_addr (0x%llx)\n", ioc->name, port_info->num_phys,
+	    (unsigned long long)buffer.phy_info[0].identify.sas_address);
+	mptsas_expander_refresh(ioc, port_info);
+	return port_info;
+}
+
+static void
+mptsas_send_link_status_event(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc;
+	MpiEventDataSasPhyLinkStatus_t *link_data;
+	struct mptsas_portinfo *port_info;
+	struct mptsas_phyinfo *phy_info = NULL;
+	__le64 sas_address;
+	u8 phy_num;
+	u8 link_rate;
+
+	ioc = fw_event->ioc;
+	link_data = (MpiEventDataSasPhyLinkStatus_t *)fw_event->event_data;
+
+	memcpy(&sas_address, &link_data->SASAddress, sizeof(__le64));
+	sas_address = le64_to_cpu(sas_address);
+	link_rate = link_data->LinkRates >> 4;
+	phy_num = link_data->PhyNum;
+
+	port_info = mptsas_find_portinfo_by_sas_address(ioc, sas_address);
+	if (port_info) {
+		phy_info = &port_info->phy_info[phy_num];
+		if (phy_info)
+			phy_info->negotiated_link_rate = link_rate;
+	}
+
+	if (link_rate == MPI_SAS_IOUNIT0_RATE_1_5 ||
+	    link_rate == MPI_SAS_IOUNIT0_RATE_3_0) {
+
+		if (!port_info)
+			goto out;
+
+		if (port_info == ioc->hba_port_info)
+			mptsas_probe_hba_phys(ioc);
+		else
+			mptsas_expander_refresh(ioc, port_info);
+	} else if (phy_info && phy_info->phy) {
+		if (link_rate ==  MPI_SAS_IOUNIT0_RATE_PHY_DISABLED)
+			phy_info->phy->negotiated_linkrate =
+			    SAS_PHY_DISABLED;
+		else if (link_rate ==
+		    MPI_SAS_IOUNIT0_RATE_FAILED_SPEED_NEGOTIATION)
+			phy_info->phy->negotiated_linkrate =
+			    SAS_LINK_RATE_FAILED;
+		else
+			phy_info->phy->negotiated_linkrate =
+			    SAS_LINK_RATE_UNKNOWN;
 	}
  out:
-	mutex_unlock(&ioc->sas_discovery_mutex);
+	mptsas_free_fw_event(ioc, fw_event);
 }
 
-/*
- * Work queue thread to handle Runtime discovery
- * Mere purpose is the hot add/delete of expanders
- *(Mutex UNLOCKED)
- */
+/**
+ *	mptsas_probe_expanders - adding expanders
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *
+ **/
 static void
-__mptsas_discovery_work(MPT_ADAPTER *ioc)
+mptsas_probe_expanders(MPT_ADAPTER *ioc)
 {
-	u32 handle = 0xFFFF;
+	struct mptsas_portinfo buffer, *port_info;
+	u32 			handle;
+	int i;
 
-	ioc->sas_discovery_runtime=1;
-	mptsas_delete_expander_phys(ioc);
-	mptsas_probe_hba_phys(ioc);
-	while (!mptsas_probe_expander_phys(ioc, &handle))
-		;
-	ioc->sas_discovery_runtime=0;
+	handle = 0xFFFF;
+	while (!mptsas_sas_expander_pg0(ioc, &buffer,
+	    (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
+	     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), handle)) {
+
+		handle = buffer.phy_info[0].handle;
+		port_info = mptsas_find_portinfo_by_sas_address(ioc,
+		    buffer.phy_info[0].identify.sas_address);
+
+		if (port_info) {
+			/* refreshing handles */
+			for (i = 0; i < buffer.num_phys; i++) {
+				port_info->phy_info[i].handle = handle;
+				port_info->phy_info[i].identify.handle_parent =
+				    buffer.phy_info[0].identify.handle_parent;
+			}
+			mptsas_expander_refresh(ioc, port_info);
+			kfree(buffer.phy_info);
+			continue;
+		}
+
+		port_info = kzalloc(sizeof(struct mptsas_portinfo), GFP_KERNEL);
+		if (!port_info) {
+			dfailprintk(ioc, printk(MYIOC_s_ERR_FMT
+			"%s: exit at line=%d\n", ioc->name,
+			__func__, __LINE__));
+			return;
+		}
+		port_info->num_phys = buffer.num_phys;
+		port_info->phy_info = buffer.phy_info;
+		for (i = 0; i < port_info->num_phys; i++)
+			port_info->phy_info[i].portinfo = port_info;
+		mutex_lock(&ioc->sas_topology_mutex);
+		list_add_tail(&port_info->list, &ioc->sas_topology);
+		mutex_unlock(&ioc->sas_topology_mutex);
+		printk(MYIOC_s_INFO_FMT "add expander: num_phys %d, "
+		    "sas_addr (0x%llx)\n", ioc->name, port_info->num_phys,
+	    (unsigned long long)buffer.phy_info[0].identify.sas_address);
+		mptsas_expander_refresh(ioc, port_info);
+	}
+}
+
+static void
+mptsas_probe_devices(MPT_ADAPTER *ioc)
+{
+	u16 handle;
+	struct mptsas_devinfo sas_device;
+	struct mptsas_phyinfo *phy_info;
+
+	handle = 0xFFFF;
+	while (!(mptsas_sas_device_pg0(ioc, &sas_device,
+	    MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE, handle))) {
+
+		handle = sas_device.handle;
+
+		if ((sas_device.device_info &
+		     (MPI_SAS_DEVICE_INFO_SSP_TARGET |
+		      MPI_SAS_DEVICE_INFO_STP_TARGET |
+		      MPI_SAS_DEVICE_INFO_SATA_DEVICE)) == 0)
+			continue;
+
+		phy_info = mptsas_refreshing_device_handles(ioc, &sas_device);
+		if (!phy_info)
+			continue;
+
+		if (mptsas_get_rphy(phy_info))
+			continue;
+
+		mptsas_add_end_device(ioc, phy_info);
+	}
 }
 
 /*
- * Work queue thread to handle Runtime discovery
- * Mere purpose is the hot add/delete of expanders
- *(Mutex LOCKED)
+ * Start of day discovery
  */
 static void
-mptsas_discovery_work(struct work_struct *work)
+mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
 {
-	struct mptsas_discovery_event *ev =
-		container_of(work, struct mptsas_discovery_event, work);
-	MPT_ADAPTER *ioc = ev->ioc;
+	struct scsi_device *sdev;
+	int i;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
-	__mptsas_discovery_work(ioc);
-	mutex_unlock(&ioc->sas_discovery_mutex);
-	kfree(ev);
-}
+	mptsas_probe_hba_phys(ioc);
+	mptsas_probe_expanders(ioc);
+	mptsas_probe_devices(ioc);
 
+	/*
+	  Reporting RAID volumes.
+	*/
+	if (!ioc->ir_firmware || !ioc->raid_data.pIocPg2 ||
+	    !ioc->raid_data.pIocPg2->NumActiveVolumes)
+		return;
+	for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
+		sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID, 0);
+		if (sdev) {
+			scsi_device_put(sdev);
+			continue;
+		}
+		printk(MYIOC_s_INFO_FMT "attaching raid volume, channel %d, "
+		    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL,
+		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID);
+		scsi_add_device(ioc->sh, MPTSAS_RAID_CHANNEL,
+		    ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID, 0);
+	}
+}
 
 static struct mptsas_phyinfo *
 mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
@@ -3544,33 +3827,6 @@ mptsas_send_raid_event(struct fw_event_work *fw_event)
 		mptsas_free_fw_event(ioc, fw_event);
 }
 
-static void
-mptsas_send_discovery_event(MPT_ADAPTER *ioc,
-	EVENT_DATA_SAS_DISCOVERY *discovery_data)
-{
-	struct mptsas_discovery_event *ev;
-	u32 discovery_status;
-
-	/*
-	 * DiscoveryStatus
-	 *
-	 * This flag will be non-zero when firmware
-	 * kicks off discovery, and return to zero
-	 * once its completed.
-	 */
-	discovery_status = le32_to_cpu(discovery_data->DiscoveryStatus);
-	ioc->sas_discovery_quiesce_io = discovery_status ? 1 : 0;
-	if (discovery_status)
-		return;
-
-	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
-	if (!ev)
-		return;
-	INIT_WORK(&ev->work, mptsas_discovery_work);
-	ev->ioc = ioc;
-	schedule_work(&ev->work);
-};
-
 /*
  * mptsas_send_ir2_event - handle exposing hidden disk when
  * an inactive raid volume is added
@@ -3634,10 +3890,28 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 		}
 		break;
 	}
-	case MPI_EVENT_SAS_DISCOVERY:
-		mptsas_send_discovery_event(ioc,
-			(EVENT_DATA_SAS_DISCOVERY *)reply->Data);
+	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
+	{
+		MpiEventDataSasExpanderStatusChange_t *expander_data =
+		    (MpiEventDataSasExpanderStatusChange_t *)reply->Data;
+
+
+		if (expander_data->ReasonCode ==
+		    MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING &&
+		    ioc->device_missing_delay)
+			delay = HZ * ioc->device_missing_delay;
 		break;
+	}
+	case MPI_EVENT_SAS_DISCOVERY:
+	{
+		u32 discovery_status;
+		EventDataSasDiscovery_t *discovery_data =
+		    (EventDataSasDiscovery_t *)reply->Data;
+
+		discovery_status = le32_to_cpu(discovery_data->DiscoveryStatus);
+		ioc->sas_discovery_quiesce_io = discovery_status ? 1 : 0;
+		return 0;
+	}
 	case MPI_EVENT_INTEGRATED_RAID:
 	case MPI_EVENT_PERSISTENT_TABLE_FULL:
 	case MPI_EVENT_IR2:
@@ -3885,7 +4159,7 @@ static void __devexit mptsas_remove(struct pci_dev *pdev)
 		kfree(p);
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
-
+	ioc->hba_port_info = NULL;
 	mptscsih_remove(pdev);
 }
 
-- 
cgit v0.10.2


From eedf92b99806aeff18dc21c9dfb28999ce374413 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:51:32 +0530
Subject: [SCSI] mpt fusion: Rescan SAS topology added

1.) SAS topology Rescan is added. If Firmware is doing Reset and we get
Device add interrupt from Firmware, we will not receive it as part of Reset
is going ON. After Reset we will do special Rescan of SAS topology.
2.) Driver version changed from 3.04.08 to 3.04.09.

Added proper lock/unlock in mptsas_not_responding_devices() as per James'
comment.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 8d1aadb..11fc8f3 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
 #define COPYRIGHT	"Copyright (c) 1999-2008 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.04.08"
-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.08"
+#define MPT_LINUX_VERSION_COMMON	"3.04.09"
+#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.09"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
 #define show_mptmod_ver(s,ver)  \
@@ -711,6 +711,7 @@ typedef struct _MPT_ADAPTER
 	u16			 hba_port_num_phy;
 	struct list_head	 sas_device_info_list;
 	struct mutex		 sas_device_info_mutex;
+	u8			 old_sas_discovery_protocal;
 	u8			 sas_discovery_quiesce_io;
 	int			 sas_index; /* index refrencing */
 	MPT_MGMT		 sas_mgmt;
@@ -741,6 +742,7 @@ typedef struct _MPT_ADAPTER
 	spinlock_t		 fault_reset_work_lock;
 
 	u8			sg_addr_size;
+	u8			in_rescan;
 	u8			SGE_size;
 
 } MPT_ADAPTER;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 7f2f76f..6aa9126 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -119,6 +119,8 @@ static struct mptsas_portinfo	*mptsas_find_portinfo_by_sas_address
 static void mptsas_expander_delete(MPT_ADAPTER *ioc,
 		struct mptsas_portinfo *port_info, u8 force);
 static void mptsas_send_expander_event(struct fw_event_work *fw_event);
+static void mptsas_not_responding_devices(MPT_ADAPTER *ioc);
+static void mptsas_scan_sas_topology(MPT_ADAPTER *ioc);
 
 static void mptsas_print_phy_data(MPT_ADAPTER *ioc,
 					MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
@@ -844,6 +846,24 @@ mptsas_queue_device_delete(MPT_ADAPTER *ioc,
 	mptsas_add_fw_event(ioc, fw_event, msecs_to_jiffies(1));
 }
 
+static void
+mptsas_queue_rescan(MPT_ADAPTER *ioc)
+{
+	struct fw_event_work *fw_event;
+	int sz;
+
+	sz = offsetof(struct fw_event_work, event_data);
+	fw_event = kzalloc(sz, GFP_ATOMIC);
+	if (!fw_event) {
+		printk(MYIOC_s_WARN_FMT "%s: failed at (line=%d)\n",
+		    ioc->name, __func__, __LINE__);
+		return;
+	}
+	fw_event->event = -1;
+	fw_event->ioc = ioc;
+	mptsas_add_fw_event(ioc, fw_event, msecs_to_jiffies(1));
+}
+
 
 /**
  * mptsas_target_reset
@@ -1100,6 +1120,7 @@ mptsas_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 			complete(&ioc->sas_mgmt.done);
 		}
 		mptsas_cleanup_fw_event_q(ioc);
+		mptsas_queue_rescan(ioc);
 		mptsas_fw_event_on(ioc);
 		break;
 	default:
@@ -1406,6 +1427,23 @@ mptsas_firmware_event_work(struct work_struct *work)
 		container_of(work, struct fw_event_work, work.work);
 	MPT_ADAPTER *ioc = fw_event->ioc;
 
+	/* special rescan topology handling */
+	if (fw_event->event == -1) {
+		if (ioc->in_rescan) {
+			devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				"%s: rescan ignored as it is in progress\n",
+				ioc->name, __func__));
+			return;
+		}
+		devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: rescan after "
+		    "reset\n", ioc->name, __func__));
+		ioc->in_rescan = 1;
+		mptsas_not_responding_devices(ioc);
+		mptsas_scan_sas_topology(ioc);
+		ioc->in_rescan = 0;
+		mptsas_free_fw_event(ioc, fw_event);
+		return;
+	}
 
 	/* events handling turned off during host reset */
 	if (ioc->fw_events_off) {
@@ -3153,8 +3191,15 @@ mptsas_send_link_status_event(struct fw_event_work *fw_event)
 	if (link_rate == MPI_SAS_IOUNIT0_RATE_1_5 ||
 	    link_rate == MPI_SAS_IOUNIT0_RATE_3_0) {
 
-		if (!port_info)
+		if (!port_info) {
+			if (ioc->old_sas_discovery_protocal) {
+				port_info = mptsas_expander_add(ioc,
+					le16_to_cpu(link_data->DevHandle));
+				if (port_info)
+					goto out;
+			}
 			goto out;
+		}
 
 		if (port_info == ioc->hba_port_info)
 			mptsas_probe_hba_phys(ioc);
@@ -3176,6 +3221,121 @@ mptsas_send_link_status_event(struct fw_event_work *fw_event)
 	mptsas_free_fw_event(ioc, fw_event);
 }
 
+static void
+mptsas_not_responding_devices(MPT_ADAPTER *ioc)
+{
+	struct mptsas_portinfo buffer, *port_info;
+	struct mptsas_device_info	*sas_info;
+	struct mptsas_devinfo sas_device;
+	u32	handle;
+	VirtTarget *vtarget = NULL;
+	struct mptsas_phyinfo *phy_info;
+	u8 found_expander;
+	int retval, retry_count;
+	unsigned long flags;
+
+	mpt_findImVolumes(ioc);
+
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		   "%s: exiting due to a parallel reset \n", ioc->name,
+		    __func__));
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+
+	/* devices, logical volumes */
+	mutex_lock(&ioc->sas_device_info_mutex);
+ redo_device_scan:
+	list_for_each_entry(sas_info, &ioc->sas_device_info_list, list) {
+		sas_device.handle = 0;
+		retry_count = 0;
+retry_page:
+		retval = mptsas_sas_device_pg0(ioc, &sas_device,
+				(MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID
+				<< MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+				(sas_info->fw.channel << 8) +
+				sas_info->fw.id);
+
+		if (sas_device.handle)
+			continue;
+		if (retval == -EBUSY) {
+			spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+			if (ioc->ioc_reset_in_progress) {
+				dfailprintk(ioc,
+				    printk(MYIOC_s_DEBUG_FMT
+				    "%s: exiting due to reset\n",
+				    ioc->name, __func__));
+				spin_unlock_irqrestore
+				    (&ioc->taskmgmt_lock, flags);
+				mutex_unlock(&ioc->sas_device_info_mutex);
+				return;
+			}
+			spin_unlock_irqrestore(&ioc->taskmgmt_lock,
+			flags);
+		}
+
+		if (retval && (retval != -ENODEV)) {
+			if (retry_count < 10) {
+				retry_count++;
+				goto retry_page;
+			} else {
+				devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				"%s: Config page retry exceeded retry "
+				"count deleting device 0x%llx\n",
+				ioc->name, __func__,
+				sas_info->sas_address));
+			}
+		}
+
+		/* delete device */
+		vtarget = mptsas_find_vtarget(ioc,
+				sas_info->fw.channel, sas_info->fw.id);
+		if (vtarget)
+			vtarget->deleted = 1;
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+		    sas_info->sas_address);
+		if (phy_info) {
+			mptsas_del_end_device(ioc, phy_info);
+			goto redo_device_scan;
+		}
+	}
+	mutex_unlock(&ioc->sas_device_info_mutex);
+
+	/* expanders */
+	mutex_lock(&ioc->sas_topology_mutex);
+ redo_expander_scan:
+	list_for_each_entry(port_info, &ioc->sas_topology, list) {
+
+		if (port_info->phy_info &&
+		    (!(port_info->phy_info[0].identify.device_info &
+		    MPI_SAS_DEVICE_INFO_SMP_TARGET)))
+			continue;
+		found_expander = 0;
+		handle = 0xFFFF;
+		while (!mptsas_sas_expander_pg0(ioc, &buffer,
+		    (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
+		     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), handle) &&
+		    !found_expander) {
+
+			handle = buffer.phy_info[0].handle;
+			if (buffer.phy_info[0].identify.sas_address ==
+			    port_info->phy_info[0].identify.sas_address) {
+				found_expander = 1;
+			}
+			kfree(buffer.phy_info);
+		}
+
+		if (!found_expander) {
+			mptsas_expander_delete(ioc, port_info, 0);
+			goto redo_expander_scan;
+		}
+	}
+	mutex_lock(&ioc->sas_topology_mutex);
+}
+
 /**
  *	mptsas_probe_expanders - adding expanders
  *	@ioc: Pointer to MPT_ADAPTER structure
@@ -3895,6 +4055,8 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 		MpiEventDataSasExpanderStatusChange_t *expander_data =
 		    (MpiEventDataSasExpanderStatusChange_t *)reply->Data;
 
+		if (ioc->old_sas_discovery_protocal)
+			return 0;
 
 		if (expander_data->ReasonCode ==
 		    MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING &&
@@ -3910,6 +4072,8 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 
 		discovery_status = le32_to_cpu(discovery_data->DiscoveryStatus);
 		ioc->sas_discovery_quiesce_io = discovery_status ? 1 : 0;
+		if (ioc->old_sas_discovery_protocal && !discovery_status)
+			mptsas_queue_rescan(ioc);
 		return 0;
 	}
 	case MPI_EVENT_INTEGRATED_RAID:
@@ -4117,6 +4281,9 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_mptsas_probe;
 	}
 
+	/* older firmware doesn't support expander events */
+	if ((ioc->facts.HeaderVersion >> 8) < 0xE)
+		ioc->old_sas_discovery_protocal = 1;
 	mptsas_scan_sas_topology(ioc);
 	mptsas_fw_event_on(ioc);
 	return 0;
-- 
cgit v0.10.2


From 2f187862e579f1f5e883188cab6bd867cb60387f Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:52:37 +0530
Subject: [SCSI] mpt fusion: Code Cleanup patch

Resending patch considering Grants G's code review.

Main goal to submit this patch is code cleaup.
1. Better driver debug prints and code indentation.
2. fault_reset_work_lock is not used anywhere. driver is using taskmgmt_lock
instead of fault_reset_work_lock.
3. setting pci_set_drvdata properly.
4. Ingore config request when IOC is in reset state.( ioc_reset_in_progress
is set).
5. Init/clear managment frame proprely.(INITIALIZE_MGMT_STATUS and
CLEAR_MGMT_STATUS)

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index d67b263..8f04d37 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -207,7 +207,6 @@ static int	procmpt_iocinfo_read(char *buf, char **start, off_t offset,
 #endif
 static void	mpt_get_fw_exp_ver(char *buf, MPT_ADAPTER *ioc);
 
-//int		mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag);
 static int	ProcessEventNotification(MPT_ADAPTER *ioc,
 		EventNotificationReply_t *evReply, int *evHandlers);
 static void	mpt_iocstatus_info(MPT_ADAPTER *ioc, u32 ioc_status, MPT_FRAME_HDR *mf);
@@ -374,11 +373,11 @@ mpt_fault_reset_work(struct work_struct *work)
 		ioc = ioc->alt_ioc;
 
 	/* rearm the timer */
-	spin_lock_irqsave(&ioc->fault_reset_work_lock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	if (ioc->reset_work_q)
 		queue_delayed_work(ioc->reset_work_q, &ioc->fault_reset_work,
 			msecs_to_jiffies(MPT_POLLING_INTERVAL));
-	spin_unlock_irqrestore(&ioc->fault_reset_work_lock, flags);
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 }
 
 
@@ -972,11 +971,15 @@ mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 
 	/*  Put Request back on FreeQ!  */
 	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	mf->u.frame.linkage.arg1 = 0xdeadbeaf; /* signature to know if this mf is freed */
+	if (cpu_to_le32(mf->u.frame.linkage.arg1) == 0xdeadbeaf)
+		goto out;
+	/* signature to know if this mf is freed */
+	mf->u.frame.linkage.arg1 = cpu_to_le32(0xdeadbeaf);
 	list_add_tail(&mf->u.frame.linkage.list, &ioc->FreeQ);
 #ifdef MFCNT
 	ioc->mfcnt--;
 #endif
+ out:
 	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
 }
 
@@ -1731,6 +1734,7 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ioc->id = mpt_ids++;
 	sprintf(ioc->name, "ioc%d", ioc->id);
+	dinitprintk(ioc, printk(KERN_WARNING MYNAM ": mpt_adapter_install\n"));
 
 	/*
 	 * set initial debug level
@@ -1771,7 +1775,6 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	ioc->reply_sz = MPT_REPLY_FRAME_SIZE;
 
 	ioc->pcidev = pdev;
-	spin_lock_init(&ioc->initializing_hba_lock);
 
 	spin_lock_init(&ioc->taskmgmt_lock);
 	mutex_init(&ioc->internal_cmds.mutex);
@@ -1792,6 +1795,7 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	ioc->mfcnt = 0;
 #endif
 
+	ioc->sh = NULL;
 	ioc->cached_fw = NULL;
 
 	/* Initilize SCSI Config Data structure
@@ -1808,9 +1812,8 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Initialize workqueue */
 	INIT_DELAYED_WORK(&ioc->fault_reset_work, mpt_fault_reset_work);
-	spin_lock_init(&ioc->fault_reset_work_lock);
 
-	snprintf(ioc->reset_work_q_name, sizeof(ioc->reset_work_q_name),
+	snprintf(ioc->reset_work_q_name, MPT_KOBJ_NAME_LEN,
 		 "mpt_poll_%d", ioc->id);
 	ioc->reset_work_q =
 		create_singlethread_workqueue(ioc->reset_work_q_name);
@@ -1885,11 +1888,14 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	case MPI_MANUFACTPAGE_DEVID_SAS1064:
 	case MPI_MANUFACTPAGE_DEVID_SAS1068:
 		ioc->errata_flag_1064 = 1;
+		ioc->bus_type = SAS;
+		break;
 
 	case MPI_MANUFACTPAGE_DEVID_SAS1064E:
 	case MPI_MANUFACTPAGE_DEVID_SAS1068E:
 	case MPI_MANUFACTPAGE_DEVID_SAS1078:
 		ioc->bus_type = SAS;
+		break;
 	}
 
 
@@ -1933,7 +1939,7 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	INIT_LIST_HEAD(&ioc->fw_event_list);
 	spin_lock_init(&ioc->fw_event_lock);
-	snprintf(ioc->fw_event_q_name, 20, "mpt/%d", ioc->id);
+	snprintf(ioc->fw_event_q_name, MPT_KOBJ_NAME_LEN, "mpt/%d", ioc->id);
 	ioc->fw_event_q = create_singlethread_workqueue(ioc->fw_event_q_name);
 
 	if ((r = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_BRINGUP,
@@ -2008,10 +2014,10 @@ mpt_detach(struct pci_dev *pdev)
 	/*
 	 * Stop polling ioc for fault condition
 	 */
-	spin_lock_irqsave(&ioc->fault_reset_work_lock, flags);
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	wq = ioc->reset_work_q;
 	ioc->reset_work_q = NULL;
-	spin_unlock_irqrestore(&ioc->fault_reset_work_lock, flags);
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 	cancel_delayed_work(&ioc->fault_reset_work);
 	destroy_workqueue(wq);
 
@@ -2234,12 +2240,16 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 	ioc->active = 0;
 
 	if (ioc->alt_ioc) {
-		if (ioc->alt_ioc->active)
+		if (ioc->alt_ioc->active ||
+		    reason == MPT_HOSTEVENT_IOC_RECOVER) {
 			reset_alt_ioc_active = 1;
-
-		/* Disable alt-IOC's reply interrupts (and FreeQ) for a bit ... */
-		CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask, 0xFFFFFFFF);
-		ioc->alt_ioc->active = 0;
+			/* Disable alt-IOC's reply interrupts
+			 *  (and FreeQ) for a bit
+			 **/
+			CHIPREG_WRITE32(&ioc->alt_ioc->chip->IntMask,
+				0xFFFFFFFF);
+			ioc->alt_ioc->active = 0;
+		}
 	}
 
 	hard = 1;
@@ -2260,9 +2270,11 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 			}
 
 		} else {
-			printk(MYIOC_s_WARN_FMT "NOT READY!\n", ioc->name);
+			printk(MYIOC_s_WARN_FMT
+			    "NOT READY WARNING!\n", ioc->name);
 		}
-		return -1;
+		ret = -1;
+		goto out;
 	}
 
 	/* hard_reset_done = 0 if a soft reset was performed
@@ -2272,7 +2284,9 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 		if ((rc = MakeIocReady(ioc->alt_ioc, 0, sleepFlag)) == 0)
 			alt_ioc_ready = 1;
 		else
-			printk(MYIOC_s_WARN_FMT "alt_ioc not ready!\n", ioc->alt_ioc->name);
+			printk(MYIOC_s_WARN_FMT
+			    ": alt-ioc Not ready WARNING!\n",
+			    ioc->alt_ioc->name);
 	}
 
 	for (ii=0; ii<5; ii++) {
@@ -2293,7 +2307,8 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 	if (alt_ioc_ready) {
 		if ((rc = GetIocFacts(ioc->alt_ioc, sleepFlag, reason)) != 0) {
 			dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			    "Initial Alt IocFacts failed rc=%x\n", ioc->name, rc));
+			    "Initial Alt IocFacts failed rc=%x\n",
+			    ioc->name, rc));
 			/* Retry - alt IOC was initialized once
 			 */
 			rc = GetIocFacts(ioc->alt_ioc, sleepFlag, reason);
@@ -2337,16 +2352,20 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 			    IRQF_SHARED, ioc->name, ioc);
 			if (rc < 0) {
 				printk(MYIOC_s_ERR_FMT "Unable to allocate "
-				    "interrupt %d!\n", ioc->name, ioc->pcidev->irq);
+				    "interrupt %d!\n",
+				    ioc->name, ioc->pcidev->irq);
 				if (ioc->msi_enable)
 					pci_disable_msi(ioc->pcidev);
-				return -EBUSY;
+				ret = -EBUSY;
+				goto out;
 			}
 			irq_allocated = 1;
 			ioc->pci_irq = ioc->pcidev->irq;
 			pci_set_master(ioc->pcidev);		/* ?? */
-			dprintk(ioc, printk(MYIOC_s_INFO_FMT "installed at interrupt "
-			    "%d\n", ioc->name, ioc->pcidev->irq));
+			pci_set_drvdata(ioc->pcidev, ioc);
+			dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+			    "installed at interrupt %d\n", ioc->name,
+			    ioc->pcidev->irq));
 		}
 	}
 
@@ -2355,17 +2374,22 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 	 * init as upper addresses are needed for init.
 	 * If fails, continue with alt-ioc processing
 	 */
+	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT "PrimeIocFifos\n",
+	    ioc->name));
 	if ((ret == 0) && ((rc = PrimeIocFifos(ioc)) != 0))
 		ret = -3;
 
 	/* May need to check/upload firmware & data here!
 	 * If fails, continue with alt-ioc processing
 	 */
+	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT "SendIocInit\n",
+	    ioc->name));
 	if ((ret == 0) && ((rc = SendIocInit(ioc, sleepFlag)) != 0))
 		ret = -4;
 // NEW!
 	if (alt_ioc_ready && ((rc = PrimeIocFifos(ioc->alt_ioc)) != 0)) {
-		printk(MYIOC_s_WARN_FMT ": alt_ioc (%d) FIFO mgmt alloc!\n",
+		printk(MYIOC_s_WARN_FMT
+		    ": alt-ioc (%d) FIFO mgmt alloc WARNING!\n",
 		    ioc->alt_ioc->name, rc);
 		alt_ioc_ready = 0;
 		reset_alt_ioc_active = 0;
@@ -2375,8 +2399,9 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 		if ((rc = SendIocInit(ioc->alt_ioc, sleepFlag)) != 0) {
 			alt_ioc_ready = 0;
 			reset_alt_ioc_active = 0;
-			printk(MYIOC_s_WARN_FMT "alt_ioc (%d) init failure!\n",
-			    ioc->alt_ioc->name, rc);
+			printk(MYIOC_s_WARN_FMT
+				": alt-ioc: (%d) init failure WARNING!\n",
+					ioc->alt_ioc->name, rc);
 		}
 	}
 
@@ -2457,8 +2482,9 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 		mutex_init(&ioc->raid_data.inactive_list_mutex);
 		INIT_LIST_HEAD(&ioc->raid_data.inactive_list);
 
-		if (ioc->bus_type == SAS) {
+		switch (ioc->bus_type) {
 
+		case SAS:
 			/* clear persistency table */
 			if(ioc->facts.IOCExceptions &
 			    MPI_IOCFACTS_EXCEPT_PERSISTENT_TABLE_FULL) {
@@ -2472,8 +2498,15 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 			 */
 			mpt_findImVolumes(ioc);
 
-		} else if (ioc->bus_type == FC) {
-			if ((ioc->pfacts[0].ProtocolFlags & MPI_PORTFACTS_PROTOCOL_LAN) &&
+			/* Check, and possibly reset, the coalescing value
+			 */
+			mpt_read_ioc_pg_1(ioc);
+
+			break;
+
+		case FC:
+			if ((ioc->pfacts[0].ProtocolFlags &
+				MPI_PORTFACTS_PROTOCOL_LAN) &&
 			    (ioc->lan_cnfg_page0.Header.PageLength == 0)) {
 				/*
 				 *  Pre-fetch the ports LAN MAC address!
@@ -2482,11 +2515,14 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 				(void) GetLanConfigPages(ioc);
 				a = (u8*)&ioc->lan_cnfg_page1.HardwareAddressLow;
 				dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-				    "LanAddr = %02X:%02X:%02X:%02X:%02X:%02X\n",
-				    ioc->name, a[5], a[4], a[3], a[2], a[1], a[0]));
-
+					"LanAddr = %02X:%02X:%02X"
+					":%02X:%02X:%02X\n",
+					ioc->name, a[5], a[4],
+					a[3], a[2], a[1], a[0]));
 			}
-		} else {
+			break;
+
+		case SPI:
 			/* Get NVRAM and adapter maximums from SPP 0 and 2
 			 */
 			mpt_GetScsiPortSettings(ioc, 0);
@@ -2505,6 +2541,8 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 			mpt_read_ioc_pg_1(ioc);
 
 			mpt_read_ioc_pg_4(ioc);
+
+			break;
 		}
 
 		GetIoUnitPage2(ioc);
@@ -2586,16 +2624,20 @@ mpt_detect_bound_ports(MPT_ADAPTER *ioc, struct pci_dev *pdev)
 		if (_pcidev == peer) {
 			/* Paranoia checks */
 			if (ioc->alt_ioc != NULL) {
-				printk(MYIOC_s_WARN_FMT "Oops, already bound to %s!\n",
-					ioc->name, ioc->alt_ioc->name);
+				printk(MYIOC_s_WARN_FMT
+				    "Oops, already bound (%s <==> %s)!\n",
+				    ioc->name, ioc->name, ioc->alt_ioc->name);
 				break;
 			} else if (ioc_srch->alt_ioc != NULL) {
-				printk(MYIOC_s_WARN_FMT "Oops, already bound to %s!\n",
-					ioc_srch->name, ioc_srch->alt_ioc->name);
+				printk(MYIOC_s_WARN_FMT
+				    "Oops, already bound (%s <==> %s)!\n",
+				    ioc_srch->name, ioc_srch->name,
+				    ioc_srch->alt_ioc->name);
 				break;
 			}
-			dprintk(ioc, printk(MYIOC_s_INFO_FMT "FOUND! binding to %s\n",
-				ioc->name, ioc_srch->name));
+			dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				"FOUND! binding %s <==> %s\n",
+				ioc->name, ioc->name, ioc_srch->name));
 			ioc_srch->alt_ioc = ioc;
 			ioc->alt_ioc = ioc_srch;
 		}
@@ -2615,8 +2657,8 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 	int ret;
 
 	if (ioc->cached_fw != NULL) {
-		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: Pushing FW onto "
-		    "adapter\n", __func__, ioc->name));
+		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"%s: Pushing FW onto adapter\n", __func__, ioc->name));
 		if ((ret = mpt_downloadboot(ioc, (MpiFwHeader_t *)
 		    ioc->cached_fw, CAN_SLEEP)) < 0) {
 			printk(MYIOC_s_WARN_FMT
@@ -2626,10 +2668,13 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 	}
 
 	/* Disable adapter interrupts! */
+	synchronize_irq(ioc->pcidev->irq);
 	CHIPREG_WRITE32(&ioc->chip->IntMask, 0xFFFFFFFF);
 	ioc->active = 0;
+
 	/* Clear any lingering interrupt */
 	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
+	CHIPREG_READ32(&ioc->chip->IntStatus);
 
 	if (ioc->alloc != NULL) {
 		sz = ioc->alloc_sz;
@@ -2689,19 +2734,22 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 		if((ret = mpt_host_page_access_control(ioc,
 		    MPI_DB_HPBAC_FREE_BUFFER, NO_SLEEP)) != 0) {
 			printk(MYIOC_s_ERR_FMT
-			   "host page buffers free failed (%d)!\n",
-			    ioc->name, ret);
+			   ": %s: host page buffers free failed (%d)!\n",
+			    ioc->name, __func__, ret);
 		}
-		dexitprintk(ioc, printk(MYIOC_s_INFO_FMT "HostPageBuffer free  @ %p, sz=%d bytes\n",
-		 	ioc->name, ioc->HostPageBuffer, ioc->HostPageBuffer_sz));
+		dexitprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"HostPageBuffer free  @ %p, sz=%d bytes\n",
+			ioc->name, ioc->HostPageBuffer,
+			ioc->HostPageBuffer_sz));
 		pci_free_consistent(ioc->pcidev, ioc->HostPageBuffer_sz,
 		    ioc->HostPageBuffer, ioc->HostPageBuffer_dma);
 		ioc->HostPageBuffer = NULL;
 		ioc->HostPageBuffer_sz = 0;
 		ioc->alloc_total -= ioc->HostPageBuffer_sz;
 	}
-}
 
+	pci_set_drvdata(ioc->pcidev, NULL);
+}
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
  *	mpt_adapter_dispose - Free all resources associated with an MPT adapter
@@ -2841,8 +2889,12 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag)
 	}
 
 	/* Is it already READY? */
-	if (!statefault && (ioc_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_READY)
+	if (!statefault &&
+	    ((ioc_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_READY)) {
+		dinitprintk(ioc, printk(MYIOC_s_INFO_FMT
+		    "IOC is in READY state\n", ioc->name));
 		return 0;
+	}
 
 	/*
 	 *	Check to see if IOC is in FAULT state.
@@ -2915,8 +2967,9 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag)
 
 		ii++; cntdn--;
 		if (!cntdn) {
-			printk(MYIOC_s_ERR_FMT "Wait IOC_READY state timeout(%d)!\n",
-					ioc->name, (int)((ii+5)/HZ));
+			printk(MYIOC_s_ERR_FMT
+				"Wait IOC_READY state (0x%x) timeout(%d)!\n",
+				ioc->name, ioc_state, (int)((ii+5)/HZ));
 			return -ETIME;
 		}
 
@@ -2929,9 +2982,8 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag)
 	}
 
 	if (statefault < 3) {
-		printk(MYIOC_s_INFO_FMT "Recovered from %s\n",
-				ioc->name,
-				statefault==1 ? "stuck handshake" : "IOC FAULT");
+		printk(MYIOC_s_INFO_FMT "Recovered from %s\n", ioc->name,
+			statefault == 1 ? "stuck handshake" : "IOC FAULT");
 	}
 
 	return hard_reset_done;
@@ -2984,8 +3036,9 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason)
 
 	/* IOC *must* NOT be in RESET state! */
 	if (ioc->last_state == MPI_IOC_STATE_RESET) {
-		printk(MYIOC_s_ERR_FMT "Can't get IOCFacts NOT READY! (%08x)\n",
-		    ioc->name, ioc->last_state );
+		printk(KERN_ERR MYNAM
+		    ": ERROR - Can't get IOCFacts, %s NOT READY! (%08x)\n",
+		    ioc->name, ioc->last_state);
 		return -44;
 	}
 
@@ -3047,7 +3100,7 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason)
 		 *	Old: u16{Major(4),Minor(4),SubMinor(8)}
 		 *	New: u32{Major(8),Minor(8),Unit(8),Dev(8)}
 		 */
-		if (facts->MsgVersion < 0x0102) {
+		if (facts->MsgVersion < MPI_VERSION_01_02) {
 			/*
 			 *	Handle old FC f/w style, convert to new...
 			 */
@@ -3059,9 +3112,11 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason)
 			facts->FWVersion.Word = le32_to_cpu(facts->FWVersion.Word);
 
 		facts->ProductID = le16_to_cpu(facts->ProductID);
+
 		if ((ioc->facts.ProductID & MPI_FW_HEADER_PID_PROD_MASK)
 		    > MPI_FW_HEADER_PID_PROD_TARGET_SCSI)
 			ioc->ir_firmware = 1;
+
 		facts->CurrentHostMfaHighAddr =
 				le32_to_cpu(facts->CurrentHostMfaHighAddr);
 		facts->GlobalCredits = le16_to_cpu(facts->GlobalCredits);
@@ -3077,7 +3132,7 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason)
 		 * to 14 in MPI-1.01.0x.
 		 */
 		if (facts->MsgLength >= (offsetof(IOCFactsReply_t,FWImageSize) + 7)/4 &&
-		    facts->MsgVersion > 0x0100) {
+		    facts->MsgVersion > MPI_VERSION_01_00) {
 			facts->FWImageSize = le32_to_cpu(facts->FWImageSize);
 		}
 
@@ -3259,6 +3314,7 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag)
 
 	ioc_init.MaxDevices = (U8)ioc->devices_per_bus;
 	ioc_init.MaxBuses = (U8)ioc->number_of_buses;
+
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "facts.MsgVersion=%x\n",
 		   ioc->name, ioc->facts.MsgVersion));
 	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05) {
@@ -3273,7 +3329,7 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag)
 	}
 	ioc_init.ReplyFrameSize = cpu_to_le16(ioc->reply_sz);	/* in BYTES */
 
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	if (ioc->sg_addr_size == sizeof(u64)) {
 		/* Save the upper 32-bits of the request
 		 * (reply) and sense buffers.
 		 */
@@ -3526,29 +3582,29 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag)
 	ii = mpt_handshake_req_reply_wait(ioc, request_size, (u32 *)prequest,
 	    reply_sz, (u16 *)preply, 65 /*seconds*/, sleepFlag);
 
-	dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": FW Upload completed rc=%x \n", ioc->name, ii));
+	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "FW Upload completed "
+	    "rc=%x \n", ioc->name, ii));
 
 	cmdStatus = -EFAULT;
 	if (ii == 0) {
 		/* Handshake transfer was complete and successful.
 		 * Check the Reply Frame.
 		 */
-		int status, transfer_sz;
-		status = le16_to_cpu(preply->IOCStatus);
-		if (status == MPI_IOCSTATUS_SUCCESS) {
-			transfer_sz = le32_to_cpu(preply->ActualImageSize);
-			if (transfer_sz == sz)
+		int status;
+		status = le16_to_cpu(preply->IOCStatus) &
+				MPI_IOCSTATUS_MASK;
+		if (status == MPI_IOCSTATUS_SUCCESS &&
+		    ioc->facts.FWImageSize ==
+		    le32_to_cpu(preply->ActualImageSize))
 				cmdStatus = 0;
-		}
 	}
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": do_upload cmdStatus=%d \n",
 			ioc->name, cmdStatus));
 
 
 	if (cmdStatus) {
-
-		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": fw upload failed, freeing image \n",
-			ioc->name));
+		ddlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "fw upload failed, "
+		    "freeing image \n", ioc->name));
 		mpt_free_fw_memory(ioc);
 	}
 	kfree(prequest);
@@ -3872,6 +3928,10 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag)
 	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
 
 	if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078) {
+
+		if (!ignore)
+			return 0;
+
 		drsprintk(ioc, printk(MYIOC_s_WARN_FMT "%s: Doorbell=%p; 1078 reset "
 			"address=%p\n",  ioc->name, __func__,
 			&ioc->chip->Doorbell, &ioc->chip->Reset_1078));
@@ -3889,6 +3949,7 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag)
 				"looking for READY STATE: doorbell=%x"
 			        " count=%d\n",
 				ioc->name, doorbell, count));
+
 			if (doorbell == MPI_IOC_STATE_READY) {
 				return 1;
 			}
@@ -4039,6 +4100,10 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag)
 				doorbell = CHIPREG_READ32(&ioc->chip->Doorbell);
 				doorbell &= MPI_IOC_STATE_MASK;
 
+				drsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+				    "looking for READY STATE: doorbell=%x"
+				    " count=%d\n", ioc->name, doorbell, count));
+
 				if (doorbell == MPI_IOC_STATE_READY) {
 					break;
 				}
@@ -4050,6 +4115,11 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag)
 					mdelay (1000);
 				}
 			}
+
+			if (doorbell != MPI_IOC_STATE_READY)
+				printk(MYIOC_s_ERR_FMT "Failed to come READY "
+				    "after reset! IocState=%x", ioc->name,
+				    doorbell);
 		}
 	}
 
@@ -4168,8 +4238,9 @@ SendIocReset(MPT_ADAPTER *ioc, u8 reset_type, int sleepFlag)
 			if (sleepFlag != CAN_SLEEP)
 				count *= 10;
 
-			printk(MYIOC_s_ERR_FMT "Wait IOC_READY state timeout(%d)!\n",
-			    ioc->name, (int)((count+5)/HZ));
+			printk(MYIOC_s_ERR_FMT
+			    "Wait IOC_READY state (0x%x) timeout(%d)!\n",
+			    ioc->name, state, (int)((count+5)/HZ));
 			return -ETIME;
 		}
 
@@ -4255,8 +4326,13 @@ initChainBuffers(MPT_ADAPTER *ioc)
 	dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "num_sge=%d numSGE=%d\n",
 		ioc->name, num_sge, numSGE));
 
-	if ( numSGE > MPT_SCSI_SG_DEPTH	)
-		numSGE = MPT_SCSI_SG_DEPTH;
+	if (ioc->bus_type == FC) {
+		if (numSGE > MPT_SCSI_FC_SG_DEPTH)
+			numSGE = MPT_SCSI_FC_SG_DEPTH;
+	} else {
+		if (numSGE > MPT_SCSI_SG_DEPTH)
+			numSGE = MPT_SCSI_SG_DEPTH;
+	}
 
 	num_chain = 1;
 	while (numSGE - num_sge > 0) {
@@ -4493,6 +4569,7 @@ PrimeIocFifos(MPT_ADAPTER *ioc)
 	return 0;
 
 out_fail:
+
 	if (ioc->alloc != NULL) {
 		sz = ioc->alloc_sz;
 		pci_free_consistent(ioc->pcidev,
@@ -5610,17 +5687,20 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id)
  *	-ENOMEM if pci_alloc failed
  **/
 int
-mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk)
+mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
+			RaidPhysDiskPage0_t *phys_disk)
 {
-	CONFIGPARMS		 	cfg;
-	ConfigPageHeader_t	 	hdr;
+	CONFIGPARMS			cfg;
+	ConfigPageHeader_t		hdr;
 	dma_addr_t			dma_handle;
 	pRaidPhysDiskPage0_t		buffer = NULL;
 	int				rc;
 
 	memset(&cfg, 0 , sizeof(CONFIGPARMS));
 	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+	memset(phys_disk, 0, sizeof(RaidPhysDiskPage0_t));
 
+	hdr.PageVersion = MPI_RAIDPHYSDISKPAGE0_PAGEVERSION;
 	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK;
 	cfg.cfghdr.hdr = &hdr;
 	cfg.physAddr = -1;
@@ -6074,7 +6154,8 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 	int		 ret;
 	u8		 page_type = 0, extend_page;
 	unsigned long 	 timeleft;
-	int		 in_isr;
+	unsigned long	 flags;
+    int		 in_isr;
 	u8		 issue_hard_reset = 0;
 	u8		 retry_count = 0;
 
@@ -6086,7 +6167,17 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
 		dcprintk(ioc, printk(MYIOC_s_WARN_FMT "Config request not allowed in ISR context!\n",
 				ioc->name));
 		return -EPERM;
+    }
+
+	/* don't send a config page during diag reset */
+	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+	if (ioc->ioc_reset_in_progress) {
+		dfailprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "%s: busy with host reset\n", ioc->name, __func__));
+		spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+		return -EBUSY;
 	}
+	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
 	/* don't send if no chance of success */
 	if (!ioc->active ||
@@ -6270,6 +6361,12 @@ mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 			    MPT_MGMT_STATUS_DID_IOCRESET;
 			complete(&ioc->mptbase_cmds.done);
 		}
+/* wake up taskmgmt_cmds */
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_PENDING) {
+			ioc->taskmgmt_cmds.status |=
+				MPT_MGMT_STATUS_DID_IOCRESET;
+			complete(&ioc->taskmgmt_cmds.done);
+		}
 		break;
 	default:
 		break;
@@ -6690,7 +6787,9 @@ int
 mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 {
 	int		 rc;
+	u8	 cb_idx;
 	unsigned long	 flags;
+	unsigned long	 time_count;
 
 	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "HardResetHandler Entered!\n", ioc->name));
 #ifdef MFCNT
@@ -6721,30 +6820,24 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 	 * Prevents timeouts occurring during a diagnostic reset...very bad.
 	 * For all other protocol drivers, this is a no-op.
 	 */
-	{
-		u8	 cb_idx;
-		int	 r = 0;
-
-		for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
-			if (MptResetHandlers[cb_idx]) {
-				dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling IOC reset_setup handler #%d\n",
-						ioc->name, cb_idx));
-				r += mpt_signal_reset(cb_idx, ioc, MPT_IOC_SETUP_RESET);
-				if (ioc->alt_ioc) {
-					dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Calling alt-%s setup reset handler #%d\n",
-							ioc->name, ioc->alt_ioc->name, cb_idx));
-					r += mpt_signal_reset(cb_idx, ioc->alt_ioc, MPT_IOC_SETUP_RESET);
-				}
-			}
+	for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
+		if (MptResetHandlers[cb_idx]) {
+			mpt_signal_reset(cb_idx, ioc, MPT_IOC_SETUP_RESET);
+			if (ioc->alt_ioc)
+				mpt_signal_reset(cb_idx, ioc->alt_ioc,
+					MPT_IOC_SETUP_RESET);
 		}
 	}
 
-	if ((rc = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_RECOVER, sleepFlag)) != 0) {
-		printk(MYIOC_s_WARN_FMT "Cannot recover rc = %d!\n", ioc->name, rc);
+	time_count = jiffies;
+	rc = mpt_do_ioc_recovery(ioc, MPT_HOSTEVENT_IOC_RECOVER, sleepFlag);
+	if (rc != 0) {
+		printk(KERN_WARNING MYNAM
+		    ": WARNING - (%d) Cannot recover %s\n", rc, ioc->name);
+	} else {
+		if (ioc->hard_resets < -1)
+			ioc->hard_resets++;
 	}
-	ioc->reload_fw = 0;
-	if (ioc->alt_ioc)
-		ioc->alt_ioc->reload_fw = 0;
 
 	spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
 	ioc->ioc_reset_in_progress = 0;
@@ -6757,16 +6850,27 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 	}
 	spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
 
-	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "HardResetHandler rc = %d!\n", ioc->name, rc));
+	dtmprintk(ioc,
+	    printk(MYIOC_s_DEBUG_FMT
+		"HardResetHandler: completed (%d seconds): %s\n", ioc->name,
+		jiffies_to_msecs(jiffies - time_count)/1000, ((rc == 0) ?
+		"SUCCESS" : "FAILED")));
 
 	return rc;
 }
 
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+#ifdef CONFIG_FUSION_LOGGING
 static void
-EventDescriptionStr(u8 event, u32 evData0, char *evStr)
+mpt_display_event_info(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply)
 {
 	char *ds = NULL;
+	u32 evData0;
+	int ii;
+	u8 event;
+	char *evStr = ioc->evStr;
+
+	event = le32_to_cpu(pEventReply->Event) & 0xFF;
+	evData0 = le32_to_cpu(pEventReply->Data[0]);
 
 	switch(event) {
 	case MPI_EVENT_NONE:
@@ -6800,9 +6904,9 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr)
 		if (evData0 == MPI_EVENT_LOOP_STATE_CHANGE_LIP)
 			ds = "Loop State(LIP) Change";
 		else if (evData0 == MPI_EVENT_LOOP_STATE_CHANGE_LPE)
-			ds = "Loop State(LPE) Change";		/* ??? */
+			ds = "Loop State(LPE) Change";
 		else
-			ds = "Loop State(LPB) Change";		/* ??? */
+			ds = "Loop State(LPB) Change";
 		break;
 	case MPI_EVENT_LOGOUT:
 		ds = "Logout";
@@ -7002,28 +7106,53 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr)
 	}
 	case MPI_EVENT_IR2:
 	{
+		u8 id = (u8)(evData0);
+		u8 channel = (u8)(evData0 >> 8);
+		u8 phys_num = (u8)(evData0 >> 24);
 		u8 ReasonCode = (u8)(evData0 >> 16);
+
 		switch (ReasonCode) {
 		case MPI_EVENT_IR2_RC_LD_STATE_CHANGED:
-			ds = "IR2: LD State Changed";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: LD State Changed: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_PD_STATE_CHANGED:
-			ds = "IR2: PD State Changed";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: PD State Changed "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_BAD_BLOCK_TABLE_FULL:
-			ds = "IR2: Bad Block Table Full";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Bad Block Table Full: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_PD_INSERTED:
-			ds = "IR2: PD Inserted";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: PD Inserted: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_PD_REMOVED:
-			ds = "IR2: PD Removed";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: PD Removed: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED:
-			ds = "IR2: Foreign CFG Detected";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Foreign CFG Detected: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		case MPI_EVENT_IR2_RC_REBUILD_MEDIUM_ERROR:
-			ds = "IR2: Rebuild Medium Error";
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Rebuild Medium Error: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
 			break;
 		default:
 			ds = "IR2";
@@ -7059,13 +7188,18 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr)
 	case MPI_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE:
 	{
 		u8 reason = (u8)(evData0);
-		u8 port_num = (u8)(evData0 >> 8);
-		u16 handle = le16_to_cpu(evData0 >> 16);
 
-		snprintf(evStr, EVENT_DESCR_STR_SZ,
-		    "SAS Initiator Device Status Change: reason=0x%02x "
-		    "port=%d handle=0x%04x",
-		    reason, port_num, handle);
+		switch (reason) {
+		case MPI_EVENT_SAS_INIT_RC_ADDED:
+			ds = "SAS Initiator Status Change: Added";
+			break;
+		case MPI_EVENT_SAS_INIT_RC_REMOVED:
+			ds = "SAS Initiator Status Change: Deleted";
+			break;
+		default:
+			ds = "SAS Initiator Status Change";
+			break;
+		}
 		break;
 	}
 
@@ -7113,6 +7247,24 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr)
 		break;
 	}
 
+	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
+	{
+		u8 reason = (u8)(evData0);
+
+		switch (reason) {
+		case MPI_EVENT_SAS_EXP_RC_ADDED:
+			ds = "Expander Status Change: Added";
+			break;
+		case MPI_EVENT_SAS_EXP_RC_NOT_RESPONDING:
+			ds = "Expander Status Change: Deleted";
+			break;
+		default:
+			ds = "Expander Status Change";
+			break;
+		}
+		break;
+	}
+
 	/*
 	 *  MPT base "custom" events may be added here...
 	 */
@@ -7122,8 +7274,20 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr)
 	}
 	if (ds)
 		strncpy(evStr, ds, EVENT_DESCR_STR_SZ);
-}
 
+
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "MPT event:(%02Xh) : %s\n",
+	    ioc->name, event, evStr));
+
+	devtverboseprintk(ioc, printk(KERN_DEBUG MYNAM
+	    ": Event data:\n"));
+	for (ii = 0; ii < le16_to_cpu(pEventReply->EventDataLength); ii++)
+		devtverboseprintk(ioc, printk(" %08x",
+		    le32_to_cpu(pEventReply->Data[ii])));
+	devtverboseprintk(ioc, printk(KERN_DEBUG "\n"));
+}
+#endif
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
  *	ProcessEventNotification - Route EventNotificationReply to all event handlers
@@ -7140,37 +7304,24 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply
 {
 	u16 evDataLen;
 	u32 evData0 = 0;
-//	u32 evCtx;
 	int ii;
 	u8 cb_idx;
 	int r = 0;
 	int handlers = 0;
-	char evStr[EVENT_DESCR_STR_SZ];
 	u8 event;
 
 	/*
 	 *  Do platform normalization of values
 	 */
 	event = le32_to_cpu(pEventReply->Event) & 0xFF;
-//	evCtx = le32_to_cpu(pEventReply->EventContext);
 	evDataLen = le16_to_cpu(pEventReply->EventDataLength);
 	if (evDataLen) {
 		evData0 = le32_to_cpu(pEventReply->Data[0]);
 	}
 
-	EventDescriptionStr(event, evData0, evStr);
-	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "MPT event:(%02Xh) : %s\n",
-			ioc->name,
-			event,
-			evStr));
-
 #ifdef CONFIG_FUSION_LOGGING
-	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-	    ": Event data:\n", ioc->name));
-	for (ii = 0; ii < evDataLen; ii++)
-		devtverboseprintk(ioc, printk(" %08x",
-		    le32_to_cpu(pEventReply->Data[ii])));
-	devtverboseprintk(ioc, printk("\n"));
+	if (evDataLen)
+		mpt_display_event_info(ioc, pEventReply);
 #endif
 
 	/*
@@ -7225,8 +7376,9 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply
 	 */
 	for (cb_idx = MPT_MAX_PROTOCOL_DRIVERS-1; cb_idx; cb_idx--) {
 		if (MptEvHandlers[cb_idx]) {
-			devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "Routing Event to event handler #%d\n",
-					ioc->name, cb_idx));
+			devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			    "Routing Event to event handler #%d\n",
+			    ioc->name, cb_idx));
 			r += (*(MptEvHandlers[cb_idx]))(ioc, pEventReply);
 			handlers++;
 		}
@@ -7310,8 +7462,6 @@ mpt_spi_log_info(MPT_ADAPTER *ioc, u32 log_info)
 	switch (info) {
 	case 0x00010000:
 		desc = "bug! MID not found";
-		if (ioc->reload_fw == 0)
-			ioc->reload_fw++;
 		break;
 
 	case 0x00020000:
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 11fc8f3..91499d1 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,7 +76,7 @@
 #define COPYRIGHT	"Copyright (c) 1999-2008 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.04.09"
+#define MPT_LINUX_VERSION_COMMON	"3.04.10"
 #define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.09"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
@@ -104,6 +104,7 @@
 #endif
 
 #define MPT_NAME_LENGTH			32
+#define MPT_KOBJ_NAME_LEN		20
 
 #define MPT_PROCFS_MPTBASEDIR		"mpt"
 						/* chg it to "driver/fusion" ? */
@@ -162,10 +163,10 @@
 /*
  * Set the MAX_SGE value based on user input.
  */
-#ifdef  CONFIG_FUSION_MAX_SGE
-#if     CONFIG_FUSION_MAX_SGE  < 16
+#ifdef CONFIG_FUSION_MAX_SGE
+#if CONFIG_FUSION_MAX_SGE  < 16
 #define MPT_SCSI_SG_DEPTH	16
-#elif   CONFIG_FUSION_MAX_SGE  > 128
+#elif CONFIG_FUSION_MAX_SGE  > 128
 #define MPT_SCSI_SG_DEPTH	128
 #else
 #define MPT_SCSI_SG_DEPTH	CONFIG_FUSION_MAX_SGE
@@ -174,6 +175,18 @@
 #define MPT_SCSI_SG_DEPTH	40
 #endif
 
+#ifdef CONFIG_FUSION_MAX_FC_SGE
+#if CONFIG_FUSION_MAX_FC_SGE  < 16
+#define MPT_SCSI_FC_SG_DEPTH	16
+#elif CONFIG_FUSION_MAX_FC_SGE  > 256
+#define MPT_SCSI_FC_SG_DEPTH	256
+#else
+#define MPT_SCSI_FC_SG_DEPTH	CONFIG_FUSION_MAX_FC_SGE
+#endif
+#else
+#define MPT_SCSI_FC_SG_DEPTH	40
+#endif
+
 /* debug print string length used for events and iocstatus */
 # define EVENT_DESCR_STR_SZ             100
 
@@ -576,6 +589,10 @@ typedef struct _MPT_ADAPTER
 	int			 pci_irq;	/* This irq           */
 	char			 name[MPT_NAME_LENGTH];	/* "iocN"             */
 	char			 prod_name[MPT_NAME_LENGTH];	/* "LSIFC9x9"         */
+#ifdef CONFIG_FUSION_LOGGING
+	/* used in mpt_display_event_info */
+	char			 evStr[EVENT_DESCR_STR_SZ];
+#endif
 	char			 board_name[16];
 	char			 board_assembly[16];
 	char			 board_tracer[16];
@@ -682,14 +699,11 @@ typedef struct _MPT_ADAPTER
 	int			 aen_event_read_flag; /* flag to indicate event log was read*/
 	u8			 FirstWhoInit;
 	u8			 upload_fw;	/* If set, do a fw upload */
-	u8			 reload_fw;	/* Force a FW Reload on next reset */
 	u8			 NBShiftFactor;  /* NB Shift Factor based on Block Size (Facts)  */
 	u8			 pad1[4];
 	u8			 DoneCtx;
 	u8			 TaskCtx;
 	u8			 InternalCtx;
-	spinlock_t		 initializing_hba_lock;
-	int 	 		 initializing_hba_lock_flag;
 	struct list_head	 list;
 	struct net_device	*netdev;
 	struct list_head	 sas_topology;
@@ -699,7 +713,7 @@ typedef struct _MPT_ADAPTER
 	struct list_head	 fw_event_list;
 	spinlock_t		 fw_event_lock;
 	u8			 fw_events_off; /* if '1', then ignore events */
-	char 			 fw_event_q_name[20];
+	char 			 fw_event_q_name[MPT_KOBJ_NAME_LEN];
 
 	struct mutex		 sas_discovery_mutex;
 	u8			 sas_discovery_runtime;
@@ -731,15 +745,22 @@ typedef struct _MPT_ADAPTER
 	u8			 fc_link_speed[2];
 	spinlock_t		 fc_rescan_work_lock;
 	struct work_struct	 fc_rescan_work;
-	char			 fc_rescan_work_q_name[20];
+	char			 fc_rescan_work_q_name[MPT_KOBJ_NAME_LEN];
 	struct workqueue_struct *fc_rescan_work_q;
+
+	/* driver forced bus resets count */
+	unsigned long		  hard_resets;
+	/* fw/external bus resets count */
+	unsigned long		  soft_resets;
+	/* cmd timeouts */
+	unsigned long		  timeouts;
+
 	struct scsi_cmnd	**ScsiLookup;
 	spinlock_t		  scsi_lookup_lock;
 	u64			dma_mask;
-	char			 reset_work_q_name[20];
+	char			 reset_work_q_name[MPT_KOBJ_NAME_LEN];
 	struct workqueue_struct *reset_work_q;
 	struct delayed_work	 fault_reset_work;
-	spinlock_t		 fault_reset_work_lock;
 
 	u8			sg_addr_size;
 	u8			in_rescan;
@@ -870,9 +891,6 @@ typedef struct _MPT_SCSI_HOST {
 	MPT_FRAME_HDR		 *cmdPtr;		/* Ptr to nonOS request */
 	struct scsi_cmnd	 *abortSCpnt;
 	MPT_LOCAL_REPLY		  localReply;		/* internal cmd reply struct */
-	unsigned long		  hard_resets;		/* driver forced bus resets count */
-	unsigned long		  soft_resets;		/* fw/external bus resets count */
-	unsigned long		  timeouts;		/* cmd timeouts */
 	ushort			  sel_timeout[MPT_MAX_FC_DEVICES];
 	char 			  *info_kbuf;
 	long			  last_queue_full;
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index ab62013..9b2e219 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -2534,9 +2534,9 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size)
 		MPT_SCSI_HOST *hd =  shost_priv(ioc->sh);
 
 		if (hd && (cim_rev == 1)) {
-			karg.hard_resets = hd->hard_resets;
-			karg.soft_resets = hd->soft_resets;
-			karg.timeouts = hd->timeouts;
+			karg.hard_resets = ioc->hard_resets;
+			karg.soft_resets = ioc->soft_resets;
+			karg.timeouts = ioc->timeouts;
 		}
 	}
 
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 6aa9126..da22141 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -724,8 +724,8 @@ mptsas_setup_wide_ports(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 		 * Forming a port
 		 */
 		if (!port_details) {
-			port_details = kzalloc(sizeof(*port_details),
-				GFP_KERNEL);
+			port_details = kzalloc(sizeof(struct
+				mptsas_portinfo_details), GFP_KERNEL);
 			if (!port_details)
 				goto out;
 			port_details->num_phys = 1;
@@ -952,7 +952,7 @@ mptsas_target_reset_queue(MPT_ADAPTER *ioc,
 
 	vtarget->deleted = 1; /* block IO */
 
-	target_reset_list = kzalloc(sizeof(*target_reset_list),
+	target_reset_list = kzalloc(sizeof(struct mptsas_target_reset_event),
 	    GFP_ATOMIC);
 	if (!target_reset_list) {
 		dfailprintk(ioc, printk(MYIOC_s_WARN_FMT
@@ -1791,8 +1791,13 @@ static int mptsas_mgmt_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *req,
 		memcpy(ioc->sas_mgmt.reply, reply,
 		    min(ioc->reply_sz, 4 * reply->u.reply.MsgLength));
 	}
-	complete(&ioc->sas_mgmt.done);
-	return 1;
+
+	if (ioc->sas_mgmt.status & MPT_MGMT_STATUS_PENDING) {
+		ioc->sas_mgmt.status &= ~MPT_MGMT_STATUS_PENDING;
+		complete(&ioc->sas_mgmt.done);
+		return 1;
+	}
+	return 0;
 }
 
 static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset)
@@ -1831,6 +1836,7 @@ static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset)
 		MPI_SAS_OP_PHY_HARD_RESET : MPI_SAS_OP_PHY_LINK_RESET;
 	req->PhyNum = phy->identify.phy_identifier;
 
+	INITIALIZE_MGMT_STATUS(ioc->sas_mgmt.status)
 	mpt_put_msg_frame(mptsasMgmtCtx, ioc, mf);
 
 	timeleft = wait_for_completion_timeout(&ioc->sas_mgmt.done,
@@ -1862,6 +1868,7 @@ static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset)
 	error = 0;
 
  out_unlock:
+	CLEAR_MGMT_STATUS(ioc->sas_mgmt.status)
 	mutex_unlock(&ioc->sas_mgmt.mutex);
  out:
 	return error;
@@ -1999,10 +2006,15 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 	if (!dma_addr_out)
 		goto put_mf;
 	ioc->add_sge(psge, flagsLength, dma_addr_out);
-	psge += (sizeof(u32) + sizeof(dma_addr_t));
+	psge += ioc->SGE_size;
 
 	/* response */
-	flagsLength = MPT_SGE_FLAGS_SSIMPLE_READ;
+	flagsLength = MPI_SGE_FLAGS_SIMPLE_ELEMENT |
+		MPI_SGE_FLAGS_SYSTEM_ADDRESS |
+		MPI_SGE_FLAGS_IOC_TO_HOST |
+		MPI_SGE_FLAGS_END_OF_BUFFER;
+
+	flagsLength = flagsLength << MPI_SGE_FLAGS_SHIFT;
 	flagsLength |= rsp->data_len + 4;
 	dma_addr_in =  pci_map_single(ioc->pcidev, bio_data(rsp->bio),
 				      rsp->data_len, PCI_DMA_BIDIRECTIONAL);
@@ -2010,6 +2022,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		goto unmap;
 	ioc->add_sge(psge, flagsLength, dma_addr_in);
 
+	INITIALIZE_MGMT_STATUS(ioc->sas_mgmt.status)
 	mpt_put_msg_frame(mptsasMgmtCtx, ioc, mf);
 
 	timeleft = wait_for_completion_timeout(&ioc->sas_mgmt.done, 10 * HZ);
@@ -2031,7 +2044,8 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		req->data_len = 0;
 		rsp->data_len -= smprep->ResponseDataLength;
 	} else {
-		printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n",
+		printk(MYIOC_s_ERR_FMT
+		    "%s: smp passthru reply failed to be returned\n",
 		    ioc->name, __func__);
 		ret = -ENXIO;
 	}
@@ -2046,6 +2060,7 @@ put_mf:
 	if (mf)
 		mpt_free_msg_frame(ioc, mf);
 out_unlock:
+	CLEAR_MGMT_STATUS(ioc->sas_mgmt.status)
 	mutex_unlock(&ioc->sas_mgmt.mutex);
 out:
 	return ret;
@@ -2109,7 +2124,7 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 
 	port_info->num_phys = buffer->NumPhys;
 	port_info->phy_info = kcalloc(port_info->num_phys,
-		sizeof(*port_info->phy_info),GFP_KERNEL);
+		sizeof(struct mptsas_phyinfo), GFP_KERNEL);
 	if (!port_info->phy_info) {
 		error = -ENOMEM;
 		goto out_free_consistent;
@@ -2271,10 +2286,6 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
 	__le64 sas_address;
 	int error=0;
 
-	if (ioc->sas_discovery_runtime &&
-		mptsas_is_end_device(device_info))
-			goto out;
-
 	hdr.PageVersion = MPI_SASDEVICE0_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 0;
@@ -2315,6 +2326,7 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
 
 	mptsas_print_device_pg0(ioc, buffer);
 
+	memset(device_info, 0, sizeof(struct mptsas_devinfo));
 	device_info->handle = le16_to_cpu(buffer->DevHandle);
 	device_info->handle_parent = le16_to_cpu(buffer->ParentDevHandle);
 	device_info->handle_enclosure =
@@ -2346,7 +2358,9 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
 	SasExpanderPage0_t *buffer;
 	dma_addr_t dma_handle;
 	int i, error;
+	__le64 sas_address;
 
+	memset(port_info, 0, sizeof(struct mptsas_portinfo));
 	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 0;
@@ -2392,18 +2406,23 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
 	}
 
 	/* save config data */
-	port_info->num_phys = buffer->NumPhys;
+	port_info->num_phys = (buffer->NumPhys) ? buffer->NumPhys : 1;
 	port_info->phy_info = kcalloc(port_info->num_phys,
-		sizeof(*port_info->phy_info),GFP_KERNEL);
+		sizeof(struct mptsas_phyinfo), GFP_KERNEL);
 	if (!port_info->phy_info) {
 		error = -ENOMEM;
 		goto out_free_consistent;
 	}
 
+	memcpy(&sas_address, &buffer->SASAddress, sizeof(__le64));
 	for (i = 0; i < port_info->num_phys; i++) {
 		port_info->phy_info[i].portinfo = port_info;
 		port_info->phy_info[i].handle =
 		    le16_to_cpu(buffer->DevHandle);
+		port_info->phy_info[i].identify.sas_address =
+		    le64_to_cpu(sas_address);
+		port_info->phy_info[i].identify.handle_parent =
+		    le16_to_cpu(buffer->ParentDevHandle);
 	}
 
  out_free_consistent:
@@ -2423,11 +2442,7 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
 	dma_addr_t dma_handle;
 	int error=0;
 
-	if (ioc->sas_discovery_runtime &&
-		mptsas_is_end_device(&phy_info->attached))
-			goto out;
-
-	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
+	hdr.PageVersion = MPI_SASEXPANDER1_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 1;
 	hdr.Reserved1 = 0;
@@ -2462,6 +2477,12 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
 	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
 
 	error = mpt_config(ioc, &cfg);
+
+	if (error == MPI_IOCSTATUS_CONFIG_INVALID_PAGE) {
+		error = -ENODEV;
+		goto out;
+	}
+
 	if (error)
 		goto out_free_consistent;
 
@@ -2681,16 +2702,21 @@ static int mptsas_probe_one_phy(struct device *dev,
 				goto out;
 			}
 			mptsas_set_port(ioc, phy_info, port);
-			dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			    "sas_port_alloc: port=%p dev=%p port_id=%d\n",
-			    ioc->name, port, dev, port->port_identifier));
+			devtprintk(ioc, dev_printk(KERN_DEBUG, &port->dev,
+			    MYIOC_s_FMT "add port %d, sas_addr (0x%llx)\n",
+			    ioc->name, port->port_identifier,
+			    (unsigned long long)phy_info->
+			    attached.sas_address));
 		}
-		dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT "sas_port_add_phy: phy_id=%d\n",
-		    ioc->name, phy_info->phy_id));
+		dsaswideprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+			"sas_port_add_phy: phy_id=%d\n",
+			ioc->name, phy_info->phy_id));
 		sas_port_add_phy(port, phy_info->phy);
 		phy_info->sas_port_add_phy = 0;
+		devtprintk(ioc, dev_printk(KERN_DEBUG, &phy_info->phy->dev,
+		    MYIOC_s_FMT "add phy %d, phy-obj (0x%p)\n", ioc->name,
+		     phy_info->phy_id, phy_info->phy));
 	}
-
 	if (!mptsas_get_rphy(phy_info) && port && !port->rphy) {
 
 		struct sas_rphy *rphy;
@@ -2703,9 +2729,10 @@ static int mptsas_probe_one_phy(struct device *dev,
 		 * the adding/removing of devices that occur
 		 * after start of day.
 		 */
-		if (ioc->sas_discovery_runtime &&
-			mptsas_is_end_device(&phy_info->attached))
-				goto out;
+		if (mptsas_is_end_device(&phy_info->attached) &&
+		    phy_info->attached.handle_parent) {
+			goto out;
+		}
 
 		mptsas_parse_device_info(&identify, &phy_info->attached);
 		if (scsi_is_host_device(parent)) {
@@ -3420,9 +3447,12 @@ mptsas_probe_devices(MPT_ADAPTER *ioc)
 	}
 }
 
-/*
- * Start of day discovery
- */
+/**
+ *	mptsas_scan_sas_topology -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@sas_address:
+ *
+ **/
 static void
 mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
 {
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 477f6f8..6424dcb 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -80,7 +80,6 @@ MODULE_VERSION(my_VERSION);
 /*
  *  Other private/forward protos...
  */
-static struct scsi_cmnd * mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i);
 static struct scsi_cmnd * mptscsih_getclear_scsi_lookup(MPT_ADAPTER *ioc, int i);
 static void	mptscsih_set_scsi_lookup(MPT_ADAPTER *ioc, int i, struct scsi_cmnd *scmd);
 static int	SCPNT_TO_LOOKUP_IDX(MPT_ADAPTER *ioc, struct scsi_cmnd *scmd);
@@ -236,7 +235,8 @@ nextSGEset:
 	for (ii=0; ii < (numSgeThisFrame-1); ii++) {
 		thisxfer = sg_dma_len(sg);
 		if (thisxfer == 0) {
-			sg = sg_next(sg); /* Get next SG element from the OS */
+			/* Get next SG element from the OS */
+			sg = sg_next(sg);
 			sg_done++;
 			continue;
 		}
@@ -244,7 +244,8 @@ nextSGEset:
 		v2 = sg_dma_address(sg);
 		ioc->add_sge(psge, sgflags | thisxfer, v2);
 
-		sg = sg_next(sg);	/* Get next SG element from the OS */
+		/* Get next SG element from the OS */
+		sg = sg_next(sg);
 		psge += ioc->SGE_size;
 		sgeOffset += ioc->SGE_size;
 		sg_done++;
@@ -533,14 +534,15 @@ mptscsih_info_scsiio(MPT_ADAPTER *ioc, struct scsi_cmnd *sc, SCSIIOReply_t * pSc
 	}
 
 	scsi_print_command(sc);
-	printk(MYIOC_s_DEBUG_FMT "\tfw_channel = %d, fw_id = %d\n",
-	    ioc->name, pScsiReply->Bus, pScsiReply->TargetID);
+	printk(MYIOC_s_DEBUG_FMT "\tfw_channel = %d, fw_id = %d, lun = %d\n",
+	    ioc->name, pScsiReply->Bus, pScsiReply->TargetID, sc->device->lun);
 	printk(MYIOC_s_DEBUG_FMT "\trequest_len = %d, underflow = %d, "
 	    "resid = %d\n", ioc->name, scsi_bufflen(sc), sc->underflow,
 	    scsi_get_resid(sc));
 	printk(MYIOC_s_DEBUG_FMT "\ttag = %d, transfer_count = %d, "
 	    "sc->result = %08X\n", ioc->name, le16_to_cpu(pScsiReply->TaskTag),
 	    le32_to_cpu(pScsiReply->TransferCount), sc->result);
+
 	printk(MYIOC_s_DEBUG_FMT "\tiocstatus = %s (0x%04x), "
 	    "scsi_status = %s (0x%02x), scsi_state = (0x%02x)\n",
 	    ioc->name, desc, ioc_status, desc1, pScsiReply->SCSIStatus,
@@ -595,16 +597,14 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 	req_idx = le16_to_cpu(mf->u.frame.hwhdr.msgctxu.fld.req_idx);
 	req_idx_MR = (mr != NULL) ?
 	    le16_to_cpu(mr->u.frame.hwhdr.msgctxu.fld.req_idx) : req_idx;
+
+	/* Special case, where already freed message frame is received from
+	 * Firmware. It happens with Resetting IOC.
+	 * Return immediately. Do not care
+	 */
 	if ((req_idx != req_idx_MR) ||
-	    (mf->u.frame.linkage.arg1 == 0xdeadbeaf)) {
-		printk(MYIOC_s_ERR_FMT "Received a mf that was already freed\n",
-		    ioc->name);
-		printk (MYIOC_s_ERR_FMT
-		    "req_idx=%x req_idx_MR=%x mf=%p mr=%p sc=%p\n",
-		    ioc->name, req_idx, req_idx_MR, mf, mr,
-		    mptscsih_get_scsi_lookup(ioc, req_idx_MR));
+	    (le32_to_cpu(mf->u.frame.linkage.arg1) == 0xdeadbeaf))
 		return 0;
-	}
 
 	sc = mptscsih_getclear_scsi_lookup(ioc, req_idx);
 	if (sc == NULL) {
@@ -751,12 +751,16 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 			 */
 
 		case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:	/* 0x0048 */
-		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
 			/* Linux handles an unsolicited DID_RESET better
 			 * than an unsolicited DID_ABORT.
 			 */
 			sc->result = DID_RESET << 16;
 
+		case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:		/* 0x004C */
+			if (ioc->bus_type == FC)
+				sc->result = DID_ERROR << 16;
+			else
+				sc->result = DID_RESET << 16;
 			break;
 
 		case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:	/* 0x0049 */
@@ -933,9 +937,9 @@ mptscsih_flush_running_cmds(MPT_SCSI_HOST *hd)
 		scsi_dma_unmap(sc);
 		sc->result = DID_RESET << 16;
 		sc->host_scribble = NULL;
-		sdev_printk(KERN_INFO, sc->device, MYIOC_s_FMT
-		    "completing cmds: fw_channel %d, fw_id %d, sc=%p,"
-		    " mf = %p, idx=%x\n", ioc->name, channel, id, sc, mf, ii);
+		dtmprintk(ioc, sdev_printk(KERN_INFO, sc->device, MYIOC_s_FMT
+		    "completing cmds: fw_channel %d, fw_id %d, sc=%p, mf = %p, "
+		    "idx=%x\n", ioc->name, channel, id, sc, mf, ii));
 		sc->scsi_done(sc);
 	}
 }
@@ -994,9 +998,11 @@ mptscsih_search_running_cmds(MPT_SCSI_HOST *hd, VirtDevice *vdevice)
 			scsi_dma_unmap(sc);
 			sc->host_scribble = NULL;
 			sc->result = DID_NO_CONNECT << 16;
-			sdev_printk(KERN_INFO, sc->device, MYIOC_s_FMT "completing cmds: fw_channel %d,"
-			   "fw_id %d, sc=%p, mf = %p, idx=%x\n", ioc->name, vdevice->vtarget->channel,
-			   vdevice->vtarget->id, sc, mf, ii);
+			dtmprintk(ioc, sdev_printk(KERN_INFO, sc->device,
+			   MYIOC_s_FMT "completing cmds: fw_channel %d, "
+			   "fw_id %d, sc=%p, mf = %p, idx=%x\n", ioc->name,
+			   vdevice->vtarget->channel, vdevice->vtarget->id,
+			   sc, mf, ii));
 			sc->scsi_done(sc);
 			spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 		}
@@ -1287,7 +1293,6 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	MPT_FRAME_HDR		*mf;
 	SCSIIORequest_t		*pScsiReq;
 	VirtDevice		*vdevice = SCpnt->device->hostdata;
-	int	 lun;
 	u32	 datalen;
 	u32	 scsictl;
 	u32	 scsidir;
@@ -1298,7 +1303,6 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 
 	hd = shost_priv(SCpnt->device->host);
 	ioc = hd->ioc;
-	lun = SCpnt->device->lun;
 	SCpnt->scsi_done = done;
 
 	dmfprintk(ioc, printk(MYIOC_s_DEBUG_FMT "qcmd: SCpnt=%p, done()=%p\n",
@@ -1709,8 +1713,8 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 		goto out;
 	}
 
-	if (hd->timeouts < -1)
-		hd->timeouts++;
+	if (ioc->timeouts < -1)
+		ioc->timeouts++;
 
 	if (mpt_fwfault_debug)
 		mpt_halt_firmware(ioc);
@@ -1734,17 +1738,23 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 			 ctx2abort, mptscsih_get_tm_timeout(ioc));
 
 	if (SCPNT_TO_LOOKUP_IDX(ioc, SCpnt) == scpnt_idx &&
-	    SCpnt->serial_number == sn)
+	    SCpnt->serial_number == sn) {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "task abort: command still in active list! (sc=%p)\n",
+		    ioc->name, SCpnt));
 		retval = FAILED;
+	} else {
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "task abort: command cleared from active list! (sc=%p)\n",
+		    ioc->name, SCpnt));
+		retval = SUCCESS;
+	}
 
  out:
 	printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n",
-	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+	    ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), SCpnt);
 
-	if (retval == 0)
-		return SUCCESS;
-	else
-		return FAILED;
+	return retval;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -1779,7 +1789,7 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
 
 	vdevice = SCpnt->device->hostdata;
 	if (!vdevice || !vdevice->vtarget) {
-		retval = 0;
+		retval = SUCCESS;
 		goto out;
 	}
 
@@ -1837,10 +1847,12 @@ mptscsih_bus_reset(struct scsi_cmnd * SCpnt)
 	       ioc->name, SCpnt);
 	scsi_print_command(SCpnt);
 
-	if (hd->timeouts < -1)
-		hd->timeouts++;
+	if (ioc->timeouts < -1)
+		ioc->timeouts++;
 
 	vdevice = SCpnt->device->hostdata;
+	if (!vdevice || !vdevice->vtarget)
+		return SUCCESS;
 	retval = mptscsih_IssueTaskMgmt(hd,
 					MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
 					vdevice->vtarget->channel, 0, 0, 0,
@@ -1868,8 +1880,9 @@ int
 mptscsih_host_reset(struct scsi_cmnd *SCpnt)
 {
 	MPT_SCSI_HOST *  hd;
-	int              retval;
+	int              status = SUCCESS;
 	MPT_ADAPTER	*ioc;
+	int		retval;
 
 	/*  If we can't locate the host to reset, then we failed. */
 	if ((hd = shost_priv(SCpnt->device->host)) == NULL){
@@ -1888,19 +1901,16 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt)
 	/*  If our attempts to reset the host failed, then return a failed
 	 *  status.  The host will be taken off line by the SCSI mid-layer.
 	 */
-	if (mpt_HardResetHandler(ioc, CAN_SLEEP) < 0) {
-		retval = FAILED;
-	} else {
-		/*  Make sure TM pending is cleared and TM state is set to
-		 *  NONE.
-		 */
-		retval = 0;
-	}
+    retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
+	if (retval < 0)
+		status = FAILED;
+	else
+		status = SUCCESS;
 
 	printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n",
 	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
 
-	return retval;
+	return status;
 }
 
 static int
@@ -2244,7 +2254,6 @@ mptscsih_slave_configure(struct scsi_device *sdev)
 		    sdev->ppr, sdev->inquiry_len));
 
 	vdevice->configured_lun = 1;
-	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH);
 
 	dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"Queue depth=%d, tflags=%x\n",
@@ -2256,6 +2265,7 @@ mptscsih_slave_configure(struct scsi_device *sdev)
 		    ioc->name, vtarget->negoFlags, vtarget->maxOffset,
 		    vtarget->minSyncFactor));
 
+	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH);
 	dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"tagged %d, simple %d, ordered %d\n",
 		ioc->name,sdev->tagged_supported, sdev->simple_tags,
@@ -2328,36 +2338,17 @@ mptscsih_copy_sense_data(struct scsi_cmnd *sc, MPT_SCSI_HOST *hd, MPT_FRAME_HDR
 	}
 }
 
-/**
- * mptscsih_get_scsi_lookup
- * @ioc: Pointer to MPT_ADAPTER structure
- * @i: index into the array
- *
- * retrieves scmd entry from ScsiLookup[] array list
- *
- * Returns the scsi_cmd pointer
- **/
-static struct scsi_cmnd *
-mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i)
-{
-	unsigned long	flags;
-	struct scsi_cmnd *scmd;
-
-	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
-	scmd = ioc->ScsiLookup[i];
-	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
-
-	return scmd;
-}
 
 /**
  * mptscsih_getclear_scsi_lookup
- * @ioc: Pointer to MPT_ADAPTER structure
- * @i: index into the array
  *
  * retrieves and clears scmd entry from ScsiLookup[] array list
  *
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @i: index into the array
+ *
  * Returns the scsi_cmd pointer
+ *
  **/
 static struct scsi_cmnd *
 mptscsih_getclear_scsi_lookup(MPT_ADAPTER *ioc, int i)
@@ -2456,57 +2447,16 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 int
 mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 {
-	MPT_SCSI_HOST *hd;
 	u8 event = le32_to_cpu(pEvReply->Event) & 0xFF;
 
 	devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"MPT event (=%02Xh) routed to SCSI host driver!\n",
 		ioc->name, event));
 
-	if (ioc->sh == NULL ||
-		((hd = shost_priv(ioc->sh)) == NULL))
-		return 1;
-
-	switch (event) {
-	case MPI_EVENT_UNIT_ATTENTION:			/* 03 */
-		/* FIXME! */
-		break;
-	case MPI_EVENT_IOC_BUS_RESET:			/* 04 */
-	case MPI_EVENT_EXT_BUS_RESET:			/* 05 */
-		if (hd && (ioc->bus_type == SPI) && (hd->soft_resets < -1))
-			hd->soft_resets++;
-		break;
-	case MPI_EVENT_LOGOUT:				/* 09 */
-		/* FIXME! */
-		break;
-
-	case MPI_EVENT_RESCAN:				/* 06 */
-		break;
-
-		/*
-		 *  CHECKME! Don't think we need to do
-		 *  anything for these, but...
-		 */
-	case MPI_EVENT_LINK_STATUS_CHANGE:		/* 07 */
-	case MPI_EVENT_LOOP_STATE_CHANGE:		/* 08 */
-		/*
-		 *  CHECKME!  Falling thru...
-		 */
-		break;
-
-	case MPI_EVENT_INTEGRATED_RAID:			/* 0B */
-		break;
-
-	case MPI_EVENT_NONE:				/* 00 */
-	case MPI_EVENT_LOG_DATA:			/* 01 */
-	case MPI_EVENT_STATE_CHANGE:			/* 02 */
-	case MPI_EVENT_EVENT_CHANGE:			/* 0A */
-	default:
-		dprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-			": Ignoring event (=%02Xh)\n",
-			ioc->name, event));
-		break;
-	}
+	if ((event == MPI_EVENT_IOC_BUS_RESET ||
+	    event == MPI_EVENT_EXT_BUS_RESET) &&
+	    (ioc->bus_type == SPI) && (ioc->soft_resets < -1))
+			ioc->soft_resets++;
 
 	return 1;		/* currently means nothing really */
 }
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 91e9e9f..5727395 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -90,6 +90,7 @@
 
 #endif
 
+
 typedef struct _internal_cmd {
 	char		*data;		/* data pointer */
 	dma_addr_t	data_dma;	/* data dma address */
-- 
cgit v0.10.2


From 71278192a887d7da3e768809c6fe9979d172ff23 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:53:14 +0530
Subject: [SCSI] mpt fusion: Put IOC into ready state if it not already in
 ready state

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 8f04d37..9f6b315 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -2667,6 +2667,22 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 		}
 	}
 
+	/*
+	 * Put the controller into ready state (if its not already)
+	 */
+	if (mpt_GetIocState(ioc, 1) != MPI_IOC_STATE_READY) {
+		if (!SendIocReset(ioc, MPI_FUNCTION_IOC_MESSAGE_UNIT_RESET,
+		    CAN_SLEEP)) {
+			if (mpt_GetIocState(ioc, 1) != MPI_IOC_STATE_READY)
+				printk(MYIOC_s_ERR_FMT "%s:  IOC msg unit "
+				    "reset failed to put ioc in ready state!\n",
+				    ioc->name, __func__);
+		} else
+			printk(MYIOC_s_ERR_FMT "%s:  IOC msg unit reset "
+			    "failed!\n", ioc->name, __func__);
+	}
+
+
 	/* Disable adapter interrupts! */
 	synchronize_irq(ioc->pcidev->irq);
 	CHIPREG_WRITE32(&ioc->chip->IntMask, 0xFFFFFFFF);
-- 
cgit v0.10.2


From a7938b0bb3b458fe0723608be3db6c4ed8d79a8c Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:53:56 +0530
Subject: [SCSI] mpt fusion: RAID device handling and Dual port Raid support is
 added

1. Handle integrated Raid device(Add/Delete) and error condition and check
   related to Raid device. is_logical_volume will represent logical volume
   device.
2. Raid device dual port support is added. Main functions to support this
   feature are mpt_raid_phys_disk_get_num_paths and mpt_raid_phys_disk_pg1.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 9f6b315..44b9315 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -5763,6 +5763,161 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num,
 }
 
 /**
+ *	mpt_raid_phys_disk_get_num_paths - returns number paths associated to this phys_num
+ *	@ioc: Pointer to a Adapter Structure
+ *	@phys_disk_num: io unit unique phys disk num generated by the ioc
+ *
+ *	Return:
+ *	returns number paths
+ **/
+int
+mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num)
+{
+	CONFIGPARMS		 	cfg;
+	ConfigPageHeader_t	 	hdr;
+	dma_addr_t			dma_handle;
+	pRaidPhysDiskPage1_t		buffer = NULL;
+	int				rc;
+
+	memset(&cfg, 0 , sizeof(CONFIGPARMS));
+	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+
+	hdr.PageVersion = MPI_RAIDPHYSDISKPAGE1_PAGEVERSION;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK;
+	hdr.PageNumber = 1;
+	cfg.cfghdr.hdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = 0;
+		goto out;
+	}
+
+	if (!hdr.PageLength) {
+		rc = 0;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
+	    &dma_handle);
+
+	if (!buffer) {
+		rc = 0;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+	cfg.pageAddr = phys_disk_num;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = 0;
+		goto out;
+	}
+
+	rc = buffer->NumPhysDiskPaths;
+ out:
+
+	if (buffer)
+		pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
+		    dma_handle);
+
+	return rc;
+}
+EXPORT_SYMBOL(mpt_raid_phys_disk_get_num_paths);
+
+/**
+ *	mpt_raid_phys_disk_pg1 - returns phys disk page 1
+ *	@ioc: Pointer to a Adapter Structure
+ *	@phys_disk_num: io unit unique phys disk num generated by the ioc
+ *	@phys_disk: requested payload data returned
+ *
+ *	Return:
+ *	0 on success
+ *	-EFAULT if read of config page header fails or data pointer not NULL
+ *	-ENOMEM if pci_alloc failed
+ **/
+int
+mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
+		RaidPhysDiskPage1_t *phys_disk)
+{
+	CONFIGPARMS		 	cfg;
+	ConfigPageHeader_t	 	hdr;
+	dma_addr_t			dma_handle;
+	pRaidPhysDiskPage1_t		buffer = NULL;
+	int				rc;
+	int				i;
+	__le64				sas_address;
+
+	memset(&cfg, 0 , sizeof(CONFIGPARMS));
+	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+	rc = 0;
+
+	hdr.PageVersion = MPI_RAIDPHYSDISKPAGE1_PAGEVERSION;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_PHYSDISK;
+	hdr.PageNumber = 1;
+	cfg.cfghdr.hdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	if (!hdr.PageLength) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
+	    &dma_handle);
+
+	if (!buffer) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+	cfg.pageAddr = phys_disk_num;
+
+	if (mpt_config(ioc, &cfg) != 0) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	phys_disk->NumPhysDiskPaths = buffer->NumPhysDiskPaths;
+	phys_disk->PhysDiskNum = phys_disk_num;
+	for (i = 0; i < phys_disk->NumPhysDiskPaths; i++) {
+		phys_disk->Path[i].PhysDiskID = buffer->Path[i].PhysDiskID;
+		phys_disk->Path[i].PhysDiskBus = buffer->Path[i].PhysDiskBus;
+		phys_disk->Path[i].OwnerIdentifier =
+				buffer->Path[i].OwnerIdentifier;
+		phys_disk->Path[i].Flags = le16_to_cpu(buffer->Path[i].Flags);
+		memcpy(&sas_address, &buffer->Path[i].WWID, sizeof(__le64));
+		sas_address = le64_to_cpu(sas_address);
+		memcpy(&phys_disk->Path[i].WWID, &sas_address, sizeof(__le64));
+		memcpy(&sas_address,
+				&buffer->Path[i].OwnerWWID, sizeof(__le64));
+		sas_address = le64_to_cpu(sas_address);
+		memcpy(&phys_disk->Path[i].OwnerWWID,
+				&sas_address, sizeof(__le64));
+	}
+
+ out:
+
+	if (buffer)
+		pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
+		    dma_handle);
+
+	return rc;
+}
+EXPORT_SYMBOL(mpt_raid_phys_disk_pg1);
+
+
+/**
  *	mpt_findImVolumes - Identify IDs of hidden disks and RAID Volumes
  *	@ioc: Pointer to a Adapter Strucutre
  *
@@ -7170,6 +7325,18 @@ mpt_display_event_info(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply)
 			    "id=%d channel=%d phys_num=%d",
 			    id, channel, phys_num);
 			break;
+		case MPI_EVENT_IR2_RC_DUAL_PORT_ADDED:
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Dual Port Added: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
+			break;
+		case MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED:
+			snprintf(evStr, EVENT_DESCR_STR_SZ,
+			    "IR2: Dual Port Removed: "
+			    "id=%d channel=%d phys_num=%d",
+			    id, channel, phys_num);
+			break;
 		default:
 			ds = "IR2";
 		break;
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 91499d1..4f3d4c3 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -958,6 +958,10 @@ extern void	 mpt_free_fw_memory(MPT_ADAPTER *ioc);
 extern int	 mpt_findImVolumes(MPT_ADAPTER *ioc);
 extern int	 mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode);
 extern int	 mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk);
+extern int	mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num,
+		pRaidPhysDiskPage1_t phys_disk);
+extern int	mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc,
+		u8 phys_disk_num);
 extern int	 mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc);
 extern void	 mpt_clear_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc);
 extern void     mpt_halt_firmware(MPT_ADAPTER *ioc);
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index da22141..7215823 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -121,6 +121,7 @@ static void mptsas_expander_delete(MPT_ADAPTER *ioc,
 static void mptsas_send_expander_event(struct fw_event_work *fw_event);
 static void mptsas_not_responding_devices(MPT_ADAPTER *ioc);
 static void mptsas_scan_sas_topology(MPT_ADAPTER *ioc);
+static void mptsas_volume_delete(MPT_ADAPTER *ioc, u8 id);
 
 static void mptsas_print_phy_data(MPT_ADAPTER *ioc,
 					MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
@@ -542,9 +543,10 @@ mptsas_add_device_component(MPT_ADAPTER *ioc, u8 channel, u8 id,
 	mutex_lock(&ioc->sas_device_info_mutex);
 	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
 	    list) {
-		if ((sas_info->sas_address == sas_address ||
-			(sas_info->fw.channel == channel &&
-			sas_info->fw.id == id))) {
+		if (!sas_info->is_logical_volume &&
+		    (sas_info->sas_address == sas_address ||
+		    (sas_info->fw.channel == channel &&
+		     sas_info->fw.id == id))) {
 			list_del(&sas_info->list);
 			kfree(sas_info);
 		}
@@ -617,6 +619,100 @@ mptsas_add_device_component_by_fw(MPT_ADAPTER *ioc, u8 channel, u8 id)
 }
 
 /**
+ *	mptsas_add_device_component_starget_ir - Handle Integrated RAID, adding
+ *	each individual device to list
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel: fw mapped id's
+ *	@id:
+ *
+ **/
+static void
+mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
+		struct scsi_target *starget)
+{
+	CONFIGPARMS			cfg;
+	ConfigPageHeader_t		hdr;
+	dma_addr_t			dma_handle;
+	pRaidVolumePage0_t		buffer = NULL;
+	int				i;
+	RaidPhysDiskPage0_t 		phys_disk;
+	struct mptsas_device_info	*sas_info, *next;
+
+	memset(&cfg, 0 , sizeof(CONFIGPARMS));
+	memset(&hdr, 0 , sizeof(ConfigPageHeader_t));
+	hdr.PageType = MPI_CONFIG_PAGETYPE_RAID_VOLUME;
+	/* assumption that all volumes on channel = 0 */
+	cfg.pageAddr = starget->id;
+	cfg.cfghdr.hdr = &hdr;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+	cfg.timeout = 10;
+
+	if (mpt_config(ioc, &cfg) != 0)
+		goto out;
+
+	if (!hdr.PageLength)
+		goto out;
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4,
+	    &dma_handle);
+
+	if (!buffer)
+		goto out;
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	if (mpt_config(ioc, &cfg) != 0)
+		goto out;
+
+	if (!buffer->NumPhysDisks)
+		goto out;
+
+	/*
+	 * Adding entry for hidden components
+	 */
+	for (i = 0; i < buffer->NumPhysDisks; i++) {
+
+		if (mpt_raid_phys_disk_pg0(ioc,
+		    buffer->PhysDisk[i].PhysDiskNum, &phys_disk) != 0)
+			continue;
+
+		mptsas_add_device_component_by_fw(ioc, phys_disk.PhysDiskBus,
+		    phys_disk.PhysDiskID);
+
+	}
+
+	/*
+	 * Delete all matching devices out of the list
+	 */
+	mutex_lock(&ioc->sas_device_info_mutex);
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+	    list) {
+		if (sas_info->is_logical_volume && sas_info->fw.id ==
+		    starget->id) {
+			list_del(&sas_info->list);
+			kfree(sas_info);
+		}
+	}
+
+	sas_info = kzalloc(sizeof(struct mptsas_device_info), GFP_KERNEL);
+	if (sas_info) {
+		sas_info->fw.id = starget->id;
+		sas_info->os.id = starget->id;
+		sas_info->os.channel = starget->channel;
+		sas_info->is_logical_volume = 1;
+		INIT_LIST_HEAD(&sas_info->list);
+		list_add_tail(&sas_info->list, &ioc->sas_device_info_list);
+	}
+	mutex_unlock(&ioc->sas_device_info_mutex);
+
+ out:
+	if (buffer)
+		pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer,
+		    dma_handle);
+}
+
+/**
  *	mptsas_add_device_component_starget -
  *	@ioc: Pointer to MPT_ADAPTER structure
  *	@starget:
@@ -817,6 +913,10 @@ mptsas_find_vtarget(MPT_ADAPTER *ioc, u8 channel, u8 id)
 		if ((vdevice == NULL) ||
 			(vdevice->vtarget == NULL))
 			continue;
+		if ((vdevice->vtarget->tflags &
+		    MPT_TARGET_FLAGS_RAID_COMPONENT ||
+		    vdevice->vtarget->raidVolume))
+			continue;
 		if (vdevice->vtarget->id == id &&
 			vdevice->vtarget->channel == channel)
 			vtarget = vdevice->vtarget;
@@ -1487,9 +1587,21 @@ mptsas_slave_configure(struct scsi_device *sdev)
 	struct Scsi_Host	*host = sdev->host;
 	MPT_SCSI_HOST	*hd = shost_priv(host);
 	MPT_ADAPTER	*ioc = hd->ioc;
+	VirtDevice	*vdevice = sdev->hostdata;
 
-	if (sdev->channel == MPTSAS_RAID_CHANNEL)
+	if (vdevice->vtarget->deleted) {
+		sdev_printk(KERN_INFO, sdev, "clearing deleted flag\n");
+		vdevice->vtarget->deleted = 0;
+	}
+
+	/*
+	 * RAID volumes placed beyond the last expected port.
+	 * Ignore sending sas mode pages in that case..
+	 */
+	if (sdev->channel == MPTSAS_RAID_CHANNEL) {
+		mptsas_add_device_component_starget_ir(ioc, scsi_target(sdev));
 		goto out;
+	}
 
 	sas_read_port_mode_page(sdev);
 
@@ -1525,9 +1637,18 @@ mptsas_target_alloc(struct scsi_target *starget)
 	 * RAID volumes placed beyond the last expected port.
 	 */
 	if (starget->channel == MPTSAS_RAID_CHANNEL) {
-		for (i=0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++)
-			if (id == ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID)
-				channel = ioc->raid_data.pIocPg2->RaidVolume[i].VolumeBus;
+		if (!ioc->raid_data.pIocPg2) {
+			kfree(vtarget);
+			return -ENXIO;
+		}
+		for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++) {
+			if (id == ioc->raid_data.pIocPg2->
+					RaidVolume[i].VolumeID) {
+				channel = ioc->raid_data.pIocPg2->
+					RaidVolume[i].VolumeBus;
+			}
+		}
+		vtarget->raidVolume = 1;
 		goto out;
 	}
 
@@ -3277,59 +3398,66 @@ mptsas_not_responding_devices(MPT_ADAPTER *ioc)
 	mutex_lock(&ioc->sas_device_info_mutex);
  redo_device_scan:
 	list_for_each_entry(sas_info, &ioc->sas_device_info_list, list) {
-		sas_device.handle = 0;
-		retry_count = 0;
+		if (!sas_info->is_logical_volume) {
+			sas_device.handle = 0;
+			retry_count = 0;
 retry_page:
-		retval = mptsas_sas_device_pg0(ioc, &sas_device,
+			retval = mptsas_sas_device_pg0(ioc, &sas_device,
 				(MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID
 				<< MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
 				(sas_info->fw.channel << 8) +
 				sas_info->fw.id);
 
-		if (sas_device.handle)
-			continue;
-		if (retval == -EBUSY) {
-			spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
-			if (ioc->ioc_reset_in_progress) {
-				dfailprintk(ioc,
-				    printk(MYIOC_s_DEBUG_FMT
-				    "%s: exiting due to reset\n",
-				    ioc->name, __func__));
-				spin_unlock_irqrestore
-				    (&ioc->taskmgmt_lock, flags);
-				mutex_unlock(&ioc->sas_device_info_mutex);
-				return;
+			if (sas_device.handle)
+				continue;
+			if (retval == -EBUSY) {
+				spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+				if (ioc->ioc_reset_in_progress) {
+					dfailprintk(ioc,
+					printk(MYIOC_s_DEBUG_FMT
+					"%s: exiting due to reset\n",
+					ioc->name, __func__));
+					spin_unlock_irqrestore
+					(&ioc->taskmgmt_lock, flags);
+					mutex_unlock(&ioc->
+					sas_device_info_mutex);
+					return;
+				}
+				spin_unlock_irqrestore(&ioc->taskmgmt_lock,
+				flags);
 			}
-			spin_unlock_irqrestore(&ioc->taskmgmt_lock,
-			flags);
-		}
 
-		if (retval && (retval != -ENODEV)) {
-			if (retry_count < 10) {
-				retry_count++;
-				goto retry_page;
-			} else {
-				devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
-				"%s: Config page retry exceeded retry "
-				"count deleting device 0x%llx\n",
-				ioc->name, __func__,
-				sas_info->sas_address));
+			if (retval && (retval != -ENODEV)) {
+				if (retry_count < 10) {
+					retry_count++;
+					goto retry_page;
+				} else {
+					devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+					"%s: Config page retry exceeded retry "
+					"count deleting device 0x%llx\n",
+					ioc->name, __func__,
+					sas_info->sas_address));
+				}
 			}
-		}
 
-		/* delete device */
-		vtarget = mptsas_find_vtarget(ioc,
+			/* delete device */
+			vtarget = mptsas_find_vtarget(ioc,
 				sas_info->fw.channel, sas_info->fw.id);
-		if (vtarget)
-			vtarget->deleted = 1;
-		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
-		    sas_info->sas_address);
-		if (phy_info) {
-			mptsas_del_end_device(ioc, phy_info);
-			goto redo_device_scan;
-		}
+
+			if (vtarget)
+				vtarget->deleted = 1;
+
+			phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+					sas_info->sas_address);
+
+			if (phy_info) {
+				mptsas_del_end_device(ioc, phy_info);
+				goto redo_device_scan;
+			}
+		} else
+			mptsas_volume_delete(ioc, sas_info->fw.id);
 	}
-	mutex_unlock(&ioc->sas_device_info_mutex);
+	mutex_lock(&ioc->sas_device_info_mutex);
 
 	/* expanders */
 	mutex_lock(&ioc->sas_topology_mutex);
@@ -3508,28 +3636,74 @@ mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
 	return phy_info;
 }
 
-
+/**
+ *	mptsas_find_phyinfo_by_phys_disk_num -
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@phys_disk_num:
+ *	@channel:
+ *	@id:
+ *
+ **/
 static struct mptsas_phyinfo *
-mptsas_find_phyinfo_by_phys_disk_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
+mptsas_find_phyinfo_by_phys_disk_num(MPT_ADAPTER *ioc, u8 phys_disk_num,
+	u8 channel, u8 id)
 {
-	struct mptsas_portinfo *port_info;
 	struct mptsas_phyinfo *phy_info = NULL;
+	struct mptsas_portinfo *port_info;
+	RaidPhysDiskPage1_t *phys_disk = NULL;
+	int num_paths;
+	u64 sas_address = 0;
 	int i;
 
+	phy_info = NULL;
+	if (!ioc->raid_data.pIocPg3)
+		return NULL;
+	/* dual port support */
+	num_paths = mpt_raid_phys_disk_get_num_paths(ioc, phys_disk_num);
+	if (!num_paths)
+		goto out;
+	phys_disk = kzalloc(offsetof(RaidPhysDiskPage1_t, Path) +
+	   (num_paths * sizeof(RAID_PHYS_DISK1_PATH)), GFP_KERNEL);
+	if (!phys_disk)
+		goto out;
+	mpt_raid_phys_disk_pg1(ioc, phys_disk_num, phys_disk);
+	for (i = 0; i < num_paths; i++) {
+		if ((phys_disk->Path[i].Flags & 1) != 0)
+			/* entry no longer valid */
+			continue;
+		if ((id == phys_disk->Path[i].PhysDiskID) &&
+		    (channel == phys_disk->Path[i].PhysDiskBus)) {
+			memcpy(&sas_address, &phys_disk->Path[i].WWID,
+				sizeof(u64));
+			phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+					sas_address);
+			goto out;
+		}
+	}
+
+ out:
+	kfree(phys_disk);
+	if (phy_info)
+		return phy_info;
+
+	/*
+	 * Extra code to handle RAID0 case, where the sas_address is not updated
+	 * in phys_disk_page_1 when hotswapped
+	 */
 	mutex_lock(&ioc->sas_topology_mutex);
 	list_for_each_entry(port_info, &ioc->sas_topology, list) {
-		for (i = 0; i < port_info->num_phys; i++) {
+		for (i = 0; i < port_info->num_phys && !phy_info; i++) {
 			if (!mptsas_is_end_device(
 				&port_info->phy_info[i].attached))
 				continue;
 			if (port_info->phy_info[i].attached.phys_disk_num == ~0)
 				continue;
-			if (port_info->phy_info[i].attached.phys_disk_num != id)
-				continue;
-			if (port_info->phy_info[i].attached.channel != channel)
-				continue;
-			phy_info = &port_info->phy_info[i];
-			break;
+			if ((port_info->phy_info[i].attached.phys_disk_num ==
+			    phys_disk_num) &&
+			    (port_info->phy_info[i].attached.id == id) &&
+			    (port_info->phy_info[i].attached.channel ==
+			     channel))
+				phy_info = &port_info->phy_info[i];
 		}
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
@@ -3683,8 +3857,9 @@ mptsas_hotplug_work(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
 		mpt_findImVolumes(ioc);
 
 		phy_info = mptsas_find_phyinfo_by_phys_disk_num(
-				ioc, hot_plug_info->channel,
-				hot_plug_info->phys_disk_num);
+				ioc, hot_plug_info->phys_disk_num,
+				hot_plug_info->channel,
+				hot_plug_info->id);
 		mptsas_del_end_device(ioc, phy_info);
 		break;
 
@@ -4032,6 +4207,7 @@ mptsas_send_ir2_event(struct fw_event_work *fw_event)
 	struct mptsas_hotplug_event hot_plug_info;
 	MPI_EVENT_DATA_IR2	*ir2_data;
 	u8 reasonCode;
+	RaidPhysDiskPage0_t phys_disk;
 
 	ioc = fw_event->ioc;
 	ir2_data = (MPI_EVENT_DATA_IR2 *)fw_event->event_data;
@@ -4047,6 +4223,17 @@ mptsas_send_ir2_event(struct fw_event_work *fw_event)
 	case MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED:
 		hot_plug_info.event_type = MPTSAS_ADD_INACTIVE_VOLUME;
 		break;
+	case MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED:
+		hot_plug_info.phys_disk_num = ir2_data->PhysDiskNum;
+		hot_plug_info.event_type = MPTSAS_DEL_PHYSDISK;
+		break;
+	case MPI_EVENT_IR2_RC_DUAL_PORT_ADDED:
+		hot_plug_info.phys_disk_num = ir2_data->PhysDiskNum;
+		mpt_raid_phys_disk_pg0(ioc,
+		    ir2_data->PhysDiskNum, &phys_disk);
+		hot_plug_info.id = phys_disk.PhysDiskID;
+		hot_plug_info.event_type = MPTSAS_ADD_PHYSDISK;
+		break;
 	default:
 		mptsas_free_fw_event(ioc, fw_event);
 		return;
@@ -4132,6 +4319,31 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 	return 0;
 }
 
+/* Delete a volume when no longer listed in ioc pg2
+ */
+static void mptsas_volume_delete(MPT_ADAPTER *ioc, u8 id)
+{
+	struct scsi_device *sdev;
+	int i;
+
+	sdev = scsi_device_lookup(ioc->sh, MPTSAS_RAID_CHANNEL, id, 0);
+	if (!sdev)
+		return;
+	if (!ioc->raid_data.pIocPg2)
+		goto out;
+	if (!ioc->raid_data.pIocPg2->NumActiveVolumes)
+		goto out;
+	for (i = 0; i < ioc->raid_data.pIocPg2->NumActiveVolumes; i++)
+		if (ioc->raid_data.pIocPg2->RaidVolume[i].VolumeID == id)
+			goto release_sdev;
+ out:
+	printk(MYIOC_s_INFO_FMT "removing raid volume, channel %d, "
+	    "id %d\n", ioc->name, MPTSAS_RAID_CHANNEL, id);
+	scsi_remove_device(sdev);
+ release_sdev:
+	scsi_device_put(sdev);
+}
+
 static int
 mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
diff --git a/drivers/message/fusion/mptsas.h b/drivers/message/fusion/mptsas.h
index 9e0885a..57258b6 100644
--- a/drivers/message/fusion/mptsas.h
+++ b/drivers/message/fusion/mptsas.h
@@ -82,6 +82,7 @@ struct mptsas_device_info {
 	u32			device_info; /* specific bits for devices */
 	u16			slot;		/* enclosure slot id */
 	u64			enclosure_logical_id; /*enclosure address */
+	u8			is_logical_volume; /* is this logical volume */
 };
 
 struct mptsas_hotplug_event {
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 6424dcb..cf1aba1 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -2087,8 +2087,10 @@ int
 mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id)
 {
 	struct inactive_raid_component_info *component_info;
-	int i;
+	int i, j;
+	RaidPhysDiskPage1_t *phys_disk;
 	int rc = 0;
+	int num_paths;
 
 	if (!ioc->raid_data.pIocPg3)
 		goto out;
@@ -2100,6 +2102,45 @@ mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id)
 		}
 	}
 
+	if (ioc->bus_type != SAS)
+		goto out;
+
+	/*
+	 * Check if dual path
+	 */
+	for (i = 0; i < ioc->raid_data.pIocPg3->NumPhysDisks; i++) {
+		num_paths = mpt_raid_phys_disk_get_num_paths(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum);
+		if (num_paths < 2)
+			continue;
+		phys_disk = kzalloc(offsetof(RaidPhysDiskPage1_t, Path) +
+		   (num_paths * sizeof(RAID_PHYS_DISK1_PATH)), GFP_KERNEL);
+		if (!phys_disk)
+			continue;
+		if ((mpt_raid_phys_disk_pg1(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum,
+		    phys_disk))) {
+			kfree(phys_disk);
+			continue;
+		}
+		for (j = 0; j < num_paths; j++) {
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_INVALID))
+				continue;
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_BROKEN))
+				continue;
+			if ((id == phys_disk->Path[j].PhysDiskID) &&
+			    (channel == phys_disk->Path[j].PhysDiskBus)) {
+				rc = 1;
+				kfree(phys_disk);
+				goto out;
+			}
+		}
+		kfree(phys_disk);
+	}
+
+
 	/*
 	 * Check inactive list for matching phys disks
 	 */
@@ -2124,8 +2165,10 @@ u8
 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
 {
 	struct inactive_raid_component_info *component_info;
-	int i;
+	int i, j;
+	RaidPhysDiskPage1_t *phys_disk;
 	int rc = -ENXIO;
+	int num_paths;
 
 	if (!ioc->raid_data.pIocPg3)
 		goto out;
@@ -2137,6 +2180,44 @@ mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id)
 		}
 	}
 
+	if (ioc->bus_type != SAS)
+		goto out;
+
+	/*
+	 * Check if dual path
+	 */
+	for (i = 0; i < ioc->raid_data.pIocPg3->NumPhysDisks; i++) {
+		num_paths = mpt_raid_phys_disk_get_num_paths(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum);
+		if (num_paths < 2)
+			continue;
+		phys_disk = kzalloc(offsetof(RaidPhysDiskPage1_t, Path) +
+		   (num_paths * sizeof(RAID_PHYS_DISK1_PATH)), GFP_KERNEL);
+		if (!phys_disk)
+			continue;
+		if ((mpt_raid_phys_disk_pg1(ioc,
+		    ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskNum,
+		    phys_disk))) {
+			kfree(phys_disk);
+			continue;
+		}
+		for (j = 0; j < num_paths; j++) {
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_INVALID))
+				continue;
+			if ((phys_disk->Path[j].Flags &
+			    MPI_RAID_PHYSDISK1_FLAG_BROKEN))
+				continue;
+			if ((id == phys_disk->Path[j].PhysDiskID) &&
+			    (channel == phys_disk->Path[j].PhysDiskBus)) {
+				rc = phys_disk->PhysDiskNum;
+				kfree(phys_disk);
+				goto out;
+			}
+		}
+		kfree(phys_disk);
+	}
+
 	/*
 	 * Check inactive list for matching phys disks
 	 */
-- 
cgit v0.10.2


From 57e985136bfafdfcd72c4c7d91115955d225677e Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:55:09 +0530
Subject: [SCSI] mpt fusion: Queue full event handling

FW will report Queue full event to Driver and driver will handle this queue
full event to SCSI Mid layer.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 7215823..10a12d8 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -121,6 +121,7 @@ static void mptsas_expander_delete(MPT_ADAPTER *ioc,
 static void mptsas_send_expander_event(struct fw_event_work *fw_event);
 static void mptsas_not_responding_devices(MPT_ADAPTER *ioc);
 static void mptsas_scan_sas_topology(MPT_ADAPTER *ioc);
+static void mptsas_handle_queue_full_event(struct fw_event_work *fw_event);
 static void mptsas_volume_delete(MPT_ADAPTER *ioc, u8 id);
 
 static void mptsas_print_phy_data(MPT_ADAPTER *ioc,
@@ -680,6 +681,18 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc,
 		mptsas_add_device_component_by_fw(ioc, phys_disk.PhysDiskBus,
 		    phys_disk.PhysDiskID);
 
+		mutex_lock(&ioc->sas_device_info_mutex);
+		list_for_each_entry(sas_info, &ioc->sas_device_info_list,
+		    list) {
+			if (!sas_info->is_logical_volume &&
+			    (sas_info->fw.channel == phys_disk.PhysDiskBus &&
+			    sas_info->fw.id == phys_disk.PhysDiskID)) {
+				sas_info->is_hidden_raid_component = 1;
+				sas_info->volume_id = starget->id;
+			}
+		}
+		mutex_unlock(&ioc->sas_device_info_mutex);
+
 	}
 
 	/*
@@ -747,6 +760,29 @@ mptsas_add_device_component_starget(MPT_ADAPTER *ioc,
 }
 
 /**
+ *	mptsas_del_device_component_by_os - Once a device has been removed, we
+ *	mark the entry in the list as being cached
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@channel: os mapped id's
+ *	@id:
+ *
+ **/
+static void
+mptsas_del_device_component_by_os(MPT_ADAPTER *ioc, u8 channel, u8 id)
+{
+	struct mptsas_device_info	*sas_info, *next;
+
+	/*
+	 * Set is_cached flag
+	 */
+	list_for_each_entry_safe(sas_info, next, &ioc->sas_device_info_list,
+		list) {
+		if (sas_info->os.channel == channel && sas_info->os.id == id)
+			sas_info->is_cached = 1;
+	}
+}
+
+/**
  *	mptsas_del_device_components - Cleaning the list
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
@@ -1576,6 +1612,9 @@ mptsas_firmware_event_work(struct work_struct *work)
 	case MPI_EVENT_SAS_PHY_LINK_STATUS:
 		mptsas_send_link_status_event(fw_event);
 		break;
+	case MPI_EVENT_QUEUE_FULL:
+		mptsas_handle_queue_full_event(fw_event);
+		break;
 	}
 }
 
@@ -1705,6 +1744,9 @@ mptsas_target_destroy(struct scsi_target *starget)
 
 	vtarget = starget->hostdata;
 
+	mptsas_del_device_component_by_os(ioc, starget->channel,
+	    starget->id);
+
 
 	if (starget->channel == MPTSAS_RAID_CHANNEL)
 		goto out;
@@ -3398,6 +3440,8 @@ mptsas_not_responding_devices(MPT_ADAPTER *ioc)
 	mutex_lock(&ioc->sas_device_info_mutex);
  redo_device_scan:
 	list_for_each_entry(sas_info, &ioc->sas_device_info_list, list) {
+		if (sas_info->is_cached)
+			continue;
 		if (!sas_info->is_logical_volume) {
 			sas_device.handle = 0;
 			retry_count = 0;
@@ -3612,6 +3656,95 @@ mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
 	}
 }
 
+
+static void
+mptsas_handle_queue_full_event(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc;
+	EventDataQueueFull_t *qfull_data;
+	struct mptsas_device_info *sas_info;
+	struct scsi_device	*sdev;
+	int depth;
+	int id = -1;
+	int channel = -1;
+	int fw_id, fw_channel;
+	u16 current_depth;
+
+
+	ioc = fw_event->ioc;
+	qfull_data = (EventDataQueueFull_t *)fw_event->event_data;
+	fw_id = qfull_data->TargetID;
+	fw_channel = qfull_data->Bus;
+	current_depth = le16_to_cpu(qfull_data->CurrentDepth);
+
+	/* if hidden raid component, look for the volume id */
+	mutex_lock(&ioc->sas_device_info_mutex);
+	if (mptscsih_is_phys_disk(ioc, fw_channel, fw_id)) {
+		list_for_each_entry(sas_info, &ioc->sas_device_info_list,
+		    list) {
+			if (sas_info->is_cached ||
+			    sas_info->is_logical_volume)
+				continue;
+			if (sas_info->is_hidden_raid_component &&
+			    (sas_info->fw.channel == fw_channel &&
+			    sas_info->fw.id == fw_id)) {
+				id = sas_info->volume_id;
+				channel = MPTSAS_RAID_CHANNEL;
+				goto out;
+			}
+		}
+	} else {
+		list_for_each_entry(sas_info, &ioc->sas_device_info_list,
+		    list) {
+			if (sas_info->is_cached ||
+			    sas_info->is_hidden_raid_component ||
+			    sas_info->is_logical_volume)
+				continue;
+			if (sas_info->fw.channel == fw_channel &&
+			    sas_info->fw.id == fw_id) {
+				id = sas_info->os.id;
+				channel = sas_info->os.channel;
+				goto out;
+			}
+		}
+
+	}
+
+ out:
+	mutex_unlock(&ioc->sas_device_info_mutex);
+
+	if (id != -1) {
+		shost_for_each_device(sdev, ioc->sh) {
+			if (sdev->id == id && sdev->channel == channel) {
+				if (current_depth > sdev->queue_depth) {
+					sdev_printk(KERN_INFO, sdev,
+					    "strange observation, the queue "
+					    "depth is (%d) meanwhile fw queue "
+					    "depth (%d)\n", sdev->queue_depth,
+					    current_depth);
+					continue;
+				}
+				depth = scsi_track_queue_full(sdev,
+				    current_depth - 1);
+				if (depth > 0)
+					sdev_printk(KERN_INFO, sdev,
+					"Queue depth reduced to (%d)\n",
+					   depth);
+				else if (depth < 0)
+					sdev_printk(KERN_INFO, sdev,
+					"Tagged Command Queueing is being "
+					"disabled\n");
+				else if (depth == 0)
+					sdev_printk(KERN_INFO, sdev,
+					"Queue depth not changed yet\n");
+			}
+		}
+	}
+
+	mptsas_free_fw_event(ioc, fw_event);
+}
+
+
 static struct mptsas_phyinfo *
 mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
 {
diff --git a/drivers/message/fusion/mptsas.h b/drivers/message/fusion/mptsas.h
index 57258b6..953c2bf 100644
--- a/drivers/message/fusion/mptsas.h
+++ b/drivers/message/fusion/mptsas.h
@@ -83,6 +83,12 @@ struct mptsas_device_info {
 	u16			slot;		/* enclosure slot id */
 	u64			enclosure_logical_id; /*enclosure address */
 	u8			is_logical_volume; /* is this logical volume */
+	/* this belongs to volume */
+	u8			is_hidden_raid_component;
+	/* this valid when is_hidden_raid_component set */
+	u8			volume_id;
+	/* cached data for a removed device */
+	u8			is_cached;
 };
 
 struct mptsas_hotplug_event {
-- 
cgit v0.10.2


From db7051b2984d2c7d44b6178ad4c523500dff7f7c Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Fri, 29 May 2009 16:56:59 +0530
Subject: [SCSI] mpt fusion: Added support for Broadcast primitives Event
 handling

Firmware is able to handle Broadcast primitives, but upstream driver does not
have support for broadcast primitive handling. Now this patch is mainly to
support broadcast primitives.

Signed-off-by: Kashyap Desai <kadesai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 4f3d4c3..1c8514d 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -758,6 +758,7 @@ typedef struct _MPT_ADAPTER
 	struct scsi_cmnd	**ScsiLookup;
 	spinlock_t		  scsi_lookup_lock;
 	u64			dma_mask;
+	u32			  broadcast_aen_busy;
 	char			 reset_work_q_name[MPT_KOBJ_NAME_LEN];
 	struct workqueue_struct *reset_work_q;
 	struct delayed_work	 fault_reset_work;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 10a12d8..88a1a6d 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -121,6 +121,7 @@ static void mptsas_expander_delete(MPT_ADAPTER *ioc,
 static void mptsas_send_expander_event(struct fw_event_work *fw_event);
 static void mptsas_not_responding_devices(MPT_ADAPTER *ioc);
 static void mptsas_scan_sas_topology(MPT_ADAPTER *ioc);
+static void mptsas_broadcast_primative_work(struct fw_event_work *fw_event);
 static void mptsas_handle_queue_full_event(struct fw_event_work *fw_event);
 static void mptsas_volume_delete(MPT_ADAPTER *ioc, u8 id);
 
@@ -287,6 +288,21 @@ mptsas_add_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
 	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
 }
 
+/* requeue a sas firmware event */
+static void
+mptsas_requeue_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event,
+    unsigned long delay)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT "%s: reschedule task "
+	    "(fw_event=0x%p)\n", ioc->name, __func__, fw_event));
+	fw_event->retries++;
+	queue_delayed_work(ioc->fw_event_q, &fw_event->work,
+	    msecs_to_jiffies(delay));
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+}
+
 /* free memory assoicated to a sas firmware event */
 static void
 mptsas_free_fw_event(MPT_ADAPTER *ioc, struct fw_event_work *fw_event)
@@ -1606,6 +1622,9 @@ mptsas_firmware_event_work(struct work_struct *work)
 		    MPI_SAS_OP_CLEAR_NOT_PRESENT);
 		mptsas_free_fw_event(ioc, fw_event);
 		break;
+	case MPI_EVENT_SAS_BROADCAST_PRIMITIVE:
+		mptsas_broadcast_primative_work(fw_event);
+		break;
 	case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
 		mptsas_send_expander_event(fw_event);
 		break;
@@ -4325,6 +4344,182 @@ mptsas_send_raid_event(struct fw_event_work *fw_event)
 		mptsas_free_fw_event(ioc, fw_event);
 }
 
+/**
+ *	mptsas_issue_tm - send mptsas internal tm request
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@type: Task Management type
+ *	@channel: channel number for task management
+ *	@id: Logical Target ID for reset (if appropriate)
+ *	@lun: Logical unit for reset (if appropriate)
+ *	@task_context: Context for the task to be aborted
+ *	@timeout: timeout for task management control
+ *
+ *	return 0 on success and -1 on failure:
+ *
+ */
+static int
+mptsas_issue_tm(MPT_ADAPTER *ioc, u8 type, u8 channel, u8 id, u64 lun,
+	int task_context, ulong timeout, u8 *issue_reset)
+{
+	MPT_FRAME_HDR	*mf;
+	SCSITaskMgmt_t	*pScsiTm;
+	int		 retval;
+	unsigned long	 timeleft;
+
+	*issue_reset = 0;
+	mf = mpt_get_msg_frame(mptsasDeviceResetCtx, ioc);
+	if (mf == NULL) {
+		retval = -1; /* return failure */
+		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT "TaskMgmt request: no "
+		    "msg frames!!\n", ioc->name));
+		goto out;
+	}
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt request: mr = %p, "
+	    "task_type = 0x%02X,\n\t timeout = %ld, fw_channel = %d, "
+	    "fw_id = %d, lun = %lld,\n\t task_context = 0x%x\n", ioc->name, mf,
+	     type, timeout, channel, id, (unsigned long long)lun,
+	     task_context));
+
+	pScsiTm = (SCSITaskMgmt_t *) mf;
+	memset(pScsiTm, 0, sizeof(SCSITaskMgmt_t));
+	pScsiTm->Function = MPI_FUNCTION_SCSI_TASK_MGMT;
+	pScsiTm->TaskType = type;
+	pScsiTm->MsgFlags = 0;
+	pScsiTm->TargetID = id;
+	pScsiTm->Bus = channel;
+	pScsiTm->ChainOffset = 0;
+	pScsiTm->Reserved = 0;
+	pScsiTm->Reserved1 = 0;
+	pScsiTm->TaskMsgContext = task_context;
+	int_to_scsilun(lun, (struct scsi_lun *)pScsiTm->LUN);
+
+	INITIALIZE_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	CLEAR_MGMT_STATUS(ioc->internal_cmds.status)
+	retval = 0;
+	mpt_put_msg_frame_hi_pri(mptsasDeviceResetCtx, ioc, mf);
+
+	/* Now wait for the command to complete */
+	timeleft = wait_for_completion_timeout(&ioc->taskmgmt_cmds.done,
+	    timeout*HZ);
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_COMMAND_GOOD)) {
+		retval = -1; /* return failure */
+		dtmprintk(ioc, printk(MYIOC_s_ERR_FMT
+		    "TaskMgmt request: TIMED OUT!(mr=%p)\n", ioc->name, mf));
+		mpt_free_msg_frame(ioc, mf);
+		if (ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
+			goto out;
+		*issue_reset = 1;
+		goto out;
+	}
+
+	if (!(ioc->taskmgmt_cmds.status & MPT_MGMT_STATUS_RF_VALID)) {
+		retval = -1; /* return failure */
+		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+		    "TaskMgmt request: failed with no reply\n", ioc->name));
+		goto out;
+	}
+
+ out:
+	CLEAR_MGMT_STATUS(ioc->taskmgmt_cmds.status)
+	return retval;
+}
+
+/**
+ *	mptsas_broadcast_primative_work - Handle broadcast primitives
+ *	@work: work queue payload containing info describing the event
+ *
+ *	this will be handled in workqueue context.
+ */
+static void
+mptsas_broadcast_primative_work(struct fw_event_work *fw_event)
+{
+	MPT_ADAPTER *ioc = fw_event->ioc;
+	MPT_FRAME_HDR	*mf;
+	VirtDevice	*vdevice;
+	int			ii;
+	struct scsi_cmnd	*sc;
+	SCSITaskMgmtReply_t	*pScsiTmReply;
+	u8			issue_reset;
+	int			task_context;
+	u8			channel, id;
+	int			 lun;
+	u32			 termination_count;
+	u32			 query_count;
+
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "%s - enter\n", ioc->name, __func__));
+
+	mutex_lock(&ioc->taskmgmt_cmds.mutex);
+	if (mpt_set_taskmgmt_in_progress_flag(ioc) != 0) {
+		mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+		mptsas_requeue_fw_event(ioc, fw_event, 1000);
+		return;
+	}
+
+	issue_reset = 0;
+	termination_count = 0;
+	query_count = 0;
+	mpt_findImVolumes(ioc);
+	pScsiTmReply = (SCSITaskMgmtReply_t *) ioc->taskmgmt_cmds.reply;
+
+	for (ii = 0; ii < ioc->req_depth; ii++) {
+		if (ioc->fw_events_off)
+			goto out;
+		sc = mptscsih_get_scsi_lookup(ioc, ii);
+		if (!sc)
+			continue;
+		mf = MPT_INDEX_2_MFPTR(ioc, ii);
+		if (!mf)
+			continue;
+		task_context = mf->u.frame.hwhdr.msgctxu.MsgContext;
+		vdevice = sc->device->hostdata;
+		if (!vdevice || !vdevice->vtarget)
+			continue;
+		if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)
+			continue; /* skip hidden raid components */
+		if (vdevice->vtarget->raidVolume)
+			continue; /* skip hidden raid components */
+		channel = vdevice->vtarget->channel;
+		id = vdevice->vtarget->id;
+		lun = vdevice->lun;
+		if (mptsas_issue_tm(ioc, MPI_SCSITASKMGMT_TASKTYPE_QUERY_TASK,
+		    channel, id, (u64)lun, task_context, 30, &issue_reset))
+			goto out;
+		query_count++;
+		termination_count +=
+		    le32_to_cpu(pScsiTmReply->TerminationCount);
+		if ((pScsiTmReply->IOCStatus == MPI_IOCSTATUS_SUCCESS) &&
+		    (pScsiTmReply->ResponseCode ==
+		    MPI_SCSITASKMGMT_RSP_TM_SUCCEEDED ||
+		    pScsiTmReply->ResponseCode ==
+		    MPI_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC))
+			continue;
+		if (mptsas_issue_tm(ioc,
+		    MPI_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET,
+		    channel, id, (u64)lun, 0, 30, &issue_reset))
+			goto out;
+		termination_count +=
+		    le32_to_cpu(pScsiTmReply->TerminationCount);
+	}
+
+ out:
+	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT
+	    "%s - exit, query_count = %d termination_count = %d\n",
+	    ioc->name, __func__, query_count, termination_count));
+
+	ioc->broadcast_aen_busy = 0;
+	mpt_clear_taskmgmt_in_progress_flag(ioc);
+	mutex_unlock(&ioc->taskmgmt_cmds.mutex);
+
+	if (issue_reset) {
+		printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!!\n",
+		    ioc->name, __func__);
+		mpt_HardResetHandler(ioc, CAN_SLEEP);
+	}
+	mptsas_free_fw_event(ioc, fw_event);
+}
+
 /*
  * mptsas_send_ir2_event - handle exposing hidden disk when
  * an inactive raid volume is added
@@ -4388,6 +4583,18 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 
 	delay = msecs_to_jiffies(1);
 	switch (event) {
+	case MPI_EVENT_SAS_BROADCAST_PRIMITIVE:
+	{
+		EVENT_DATA_SAS_BROADCAST_PRIMITIVE *broadcast_event_data =
+		    (EVENT_DATA_SAS_BROADCAST_PRIMITIVE *)reply->Data;
+		if (broadcast_event_data->Primitive !=
+		    MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT)
+			return 0;
+		if (ioc->broadcast_aen_busy)
+			return 0;
+		ioc->broadcast_aen_busy = 1;
+		break;
+	}
 	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
 	{
 		EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data =
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index cf1aba1..9668120 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -80,6 +80,7 @@ MODULE_VERSION(my_VERSION);
 /*
  *  Other private/forward protos...
  */
+struct scsi_cmnd	*mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i);
 static struct scsi_cmnd * mptscsih_getclear_scsi_lookup(MPT_ADAPTER *ioc, int i);
 static void	mptscsih_set_scsi_lookup(MPT_ADAPTER *ioc, int i, struct scsi_cmnd *scmd);
 static int	SCPNT_TO_LOOKUP_IDX(MPT_ADAPTER *ioc, struct scsi_cmnd *scmd);
@@ -2419,6 +2420,26 @@ mptscsih_copy_sense_data(struct scsi_cmnd *sc, MPT_SCSI_HOST *hd, MPT_FRAME_HDR
 	}
 }
 
+/**
+ * mptscsih_get_scsi_lookup - retrieves scmd entry
+ * @ioc: Pointer to MPT_ADAPTER structure
+ * @i: index into the array
+ *
+ * Returns the scsi_cmd pointer
+ */
+struct scsi_cmnd *
+mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i)
+{
+	unsigned long	flags;
+	struct scsi_cmnd *scmd;
+
+	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+	scmd = ioc->ScsiLookup[i];
+	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+
+	return scmd;
+}
+EXPORT_SYMBOL(mptscsih_get_scsi_lookup);
 
 /**
  * mptscsih_getclear_scsi_lookup
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 5727395..eb3f677 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -133,4 +133,5 @@ extern void mptscsih_timer_expired(unsigned long data);
 extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern struct device_attribute *mptscsih_host_attrs[];
+extern struct scsi_cmnd	*mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i);
 extern void mptscsih_taskmgmt_response_code(MPT_ADAPTER *ioc, u8 response_code);
-- 
cgit v0.10.2


From fc847ab4318cd6ab6c231739ad51d2502d19a87a Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Tue, 9 Jun 2009 23:01:01 +0000
Subject: [SCSI] mpt fusion: fix up doc book comments

Several of the doc book in the previous patches had incorrect multi-line short
function descriptors.  Fixed it all to be the correct single line descriptor.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 88a1a6d..14c490a 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -636,8 +636,7 @@ mptsas_add_device_component_by_fw(MPT_ADAPTER *ioc, u8 channel, u8 id)
 }
 
 /**
- *	mptsas_add_device_component_starget_ir - Handle Integrated RAID, adding
- *	each individual device to list
+ *	mptsas_add_device_component_starget_ir - Handle Integrated RAID, adding each individual device to list
  *	@ioc: Pointer to MPT_ADAPTER structure
  *	@channel: fw mapped id's
  *	@id:
@@ -776,8 +775,7 @@ mptsas_add_device_component_starget(MPT_ADAPTER *ioc,
 }
 
 /**
- *	mptsas_del_device_component_by_os - Once a device has been removed, we
- *	mark the entry in the list as being cached
+ *	mptsas_del_device_component_by_os - Once a device has been removed, we mark the entry in the list as being cached
  *	@ioc: Pointer to MPT_ADAPTER structure
  *	@channel: os mapped id's
  *	@id:
@@ -1125,11 +1123,12 @@ mptsas_target_reset_queue(MPT_ADAPTER *ioc,
 }
 
 /**
- *	mptsas_taskmgmt_complete - Completion for TARGET_RESET after
- *	NOT_RESPONDING_EVENT, enable work queue to finish off removing device
- *	from upper layers. then send next TARGET_RESET in the queue.
+ *	mptsas_taskmgmt_complete - complete SAS task management function
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
+ *	Completion for TARGET_RESET after NOT_RESPONDING_EVENT, enable work
+ *	queue to finish off removing device from upper layers. then send next
+ *	TARGET_RESET in the queue.
  **/
 static int
 mptsas_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
@@ -1441,8 +1440,7 @@ mptsas_add_end_device(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info)
 }
 
 /**
- *	mptsas_del_end_device - report a deleted end device to sas transport
- *	layer
+ *	mptsas_del_end_device - report a deleted end device to sas transport layer
  *	@ioc: Pointer to MPT_ADAPTER structure
  *	@phy_info: decribes attached device
  *
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 9668120..024e830 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -2442,10 +2442,7 @@ mptscsih_get_scsi_lookup(MPT_ADAPTER *ioc, int i)
 EXPORT_SYMBOL(mptscsih_get_scsi_lookup);
 
 /**
- * mptscsih_getclear_scsi_lookup
- *
- * retrieves and clears scmd entry from ScsiLookup[] array list
- *
+ * mptscsih_getclear_scsi_lookup -  retrieves and clears scmd entry from ScsiLookup[] array list
  * @ioc: Pointer to MPT_ADAPTER structure
  * @i: index into the array
  *
-- 
cgit v0.10.2


From 35f2c2f6f6ae13ef23c4f68e6d3073753077ca43 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 9 Jun 2009 17:48:56 +0900
Subject: nommu: Provide mmap_min_addr definition.

With the "security: use mmap_min_addr indepedently of security models"
change, mmap_min_addr is used in common areas, which susbsequently blows
up the nommu build. This stubs in the definition in the nommu case as
well.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

--

 mm/nommu.c |    3 +++
 1 file changed, 3 insertions(+)
Signed-off-by: James Morris <jmorris@namei.org>

diff --git a/mm/nommu.c b/mm/nommu.c
index b571ef7..2fd2ad5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,6 +69,9 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 int heap_stack_gap = 0;
 
+/* amount of vm to protect from userspace access */
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+
 atomic_long_t mmap_pages_allocated;
 
 EXPORT_SYMBOL(mem_map);
-- 
cgit v0.10.2


From 586c7e6a280580fd94b662bf486f9bb31098d14b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 9 Jun 2009 16:26:23 -0700
Subject: shm: fix unused warnings on nommu

The massive nommu update (8feae131) resulted in these warnings:
ipc/shm.c: In function `sys_shmdt':
ipc/shm.c:974: warning: unused variable `size'
ipc/shm.c:972: warning: unused variable `next'

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/ipc/shm.c b/ipc/shm.c
index faa46da..4259716 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -969,10 +969,13 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma, *next;
+	struct vm_area_struct *vma;
 	unsigned long addr = (unsigned long)shmaddr;
-	loff_t size = 0;
 	int retval = -EINVAL;
+#ifdef CONFIG_MMU
+	loff_t size = 0;
+	struct vm_area_struct *next;
+#endif
 
 	if (addr & ~PAGE_MASK)
 		return retval;
-- 
cgit v0.10.2


From 463aea1a1c49f1a7d4b50656dfd6c8bb33358b1b Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 9 Jun 2009 16:26:24 -0700
Subject: autofs4: remove hashed check in validate_wait()

The recent ->lookup() deadlock correction required the directory inode
mutex to be dropped while waiting for expire completion.  We were
concerned about side effects from this change and one has been identified.

I saw several error messages.

They cause autofs to become quite confused and don't really point to the
actual problem.

Things like:

handle_packet_missing_direct:1376: can't find map entry for (43,1827932)

which is usually totally fatal (although in this case it wouldn't be
except that I treat is as such because it normally is).

do_mount_direct: direct trigger not valid or already mounted
/test/nested/g3c/s1/ss1

which is recoverable, however if this problem is at play it can cause
autofs to become quite confused as to the dependencies in the mount tree
because mount triggers end up mounted multiple times.  It's hard to
accurately check for this over mounting case and automount shouldn't need
to if the kernel module is doing its job.

There was one other message, similar in consequence of this last one but I
can't locate a log example just now.

When checking if a mount has already completed prior to adding a new mount
request to the wait queue we check if the dentry is hashed and, if so, if
it is a mount point.  But, if a mount successfully completed while we
slept on the wait queue mutex the dentry must exist for the mount to have
completed so the test is not really needed.

Mounts can also be done on top of a global root dentry, so for the above
case, where a mount request completes and the wait queue entry has already
been removed, the hashed test returning false can cause an incorrect
callback to the daemon.  Also, d_mountpoint() is not sufficient to check
if a mount has completed for the multi-mount case when we don't have a
real mount at the base of the tree.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index eeb2468..2341375 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -297,20 +297,14 @@ static int validate_request(struct autofs_wait_queue **wait,
 	 */
 	if (notify == NFY_MOUNT) {
 		/*
-		 * If the dentry isn't hashed just go ahead and try the
-		 * mount again with a new wait (not much else we can do).
-		*/
-		if (!d_unhashed(dentry)) {
-			/*
-			 * But if the dentry is hashed, that means that we
-			 * got here through the revalidate path.  Thus, we
-			 * need to check if the dentry has been mounted
-			 * while we waited on the wq_mutex. If it has,
-			 * simply return success.
-			 */
-			if (d_mountpoint(dentry))
-				return 0;
-		}
+		 * If the dentry was successfully mounted while we slept
+		 * on the wait queue mutex we can return success. If it
+		 * isn't mounted (doesn't have submounts for the case of
+		 * a multi-mount with no mount at it's base) we can
+		 * continue on and create a new request.
+		 */
+		if (have_submounts(dentry))
+			return 0;
 	}
 
 	return 1;
-- 
cgit v0.10.2


From a61d90d75d0f9e86432c45b496b4b0fbf0fd03dc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 9 Jun 2009 16:26:26 -0700
Subject: jbd: fix race in buffer processing in commit code

In commit code, we scan buffers attached to a transaction.  During this
scan, we sometimes have to drop j_list_lock and then we recheck whether
the journal buffer head didn't get freed by journal_try_to_free_buffers().
 But checking for buffer_jbd(bh) isn't enough because a new journal head
could get attached to our buffer head.  So add a check whether the journal
head remained the same and whether it's still at the same transaction and
list.

This is a nasty bug and can cause problems like memory corruption (use after
free) or trigger various assertions in JBD code (observed).

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: <stable@kernel.org>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 06560c5..618e21c 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -241,7 +241,7 @@ write_out_data:
 			spin_lock(&journal->j_list_lock);
 		}
 		/* Someone already cleaned up the buffer? */
-		if (!buffer_jbd(bh)
+		if (!buffer_jbd(bh) || bh2jh(bh) != jh
 			|| jh->b_transaction != commit_transaction
 			|| jh->b_jlist != BJ_SyncData) {
 			jbd_unlock_bh_state(bh);
@@ -478,7 +478,9 @@ void journal_commit_transaction(journal_t *journal)
 			spin_lock(&journal->j_list_lock);
 			continue;
 		}
-		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+		if (buffer_jbd(bh) && bh2jh(bh) == jh &&
+		    jh->b_transaction == commit_transaction &&
+		    jh->b_jlist == BJ_Locked) {
 			__journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 			journal_remove_journal_head(bh);
-- 
cgit v0.10.2


From 96050dfb25966612008dcea7d342e91fa01e993c Mon Sep 17 00:00:00 2001
From: Peter Botha <peterb@goldcircle.co.za>
Date: Tue, 9 Jun 2009 17:16:32 -0700
Subject: char: mxser, fix ISA board lookup

There's a bug in the mxser kernel module that still appears in the
2.6.29.4 kernel.

mxser_get_ISA_conf takes a ioaddress as its first argument, by passing the
not of the ioaddr, you're effectively passing 0 which means it won't be
able to talk to an ISA card.  I have tested this, and removing the !
fixes the problem.

Cc: "Peter Botha" <peterb@goldcircle.co.za>
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index a420e8d..13f8871 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -2711,7 +2711,7 @@ static int __init mxser_module_init(void)
 			continue;
 
 		brd = &mxser_boards[m];
-		retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+		retval = mxser_get_ISA_conf(ioaddr[b], brd);
 		if (retval <= 0) {
 			brd->info = NULL;
 			continue;
-- 
cgit v0.10.2


From 07a2039b8eb0af4ff464efd3dfd95de5c02648c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 9 Jun 2009 20:05:27 -0700
Subject: Linux 2.6.30


diff --git a/Makefile b/Makefile
index 1065154..03373bb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 30
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2


From 2b83868723d090078ac0e2120e06a1cc94dbaef0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 9 Jun 2009 20:40:25 -0700
Subject: Make /dev/zero reads interruptible by signals

This helps with bad latencies for large reads from /dev/zero, but might
conceivably break some application that "knows" that a read of /dev/zero
cannot return early.  So do this early in the merge window to give us
maximal test coverage, even if the patch is totally trivial.

Obviously, no well-behaved application should ever depend on the read
being uninterruptible, but hey, bugs happen.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 65e12bc..f96d0be 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -694,9 +694,8 @@ static ssize_t read_zero(struct file * file, char __user * buf,
 		written += chunk - unwritten;
 		if (unwritten)
 			break;
-		/* Consider changing this to just 'signal_pending()' with lots of testing */
-		if (fatal_signal_pending(current))
-			return written ? written : -EINTR;
+		if (signal_pending(current))
+			return written ? written : -ERESTARTSYS;
 		buf += chunk;
 		count -= chunk;
 		cond_resched();
-- 
cgit v0.10.2


From c1d0d32a603ed06377f404adf2c538de33bb3634 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 10 Jun 2009 09:48:33 +0300
Subject: sh: plug vsyscall dir in to archclean.

The vsyscall targets are presently not cleaned up, so just handle it in
the archclean rule.

Reported-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 0fe35cf..75d049b0 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -205,6 +205,7 @@ archprepare: maketools
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=arch/sh/kernel/vsyscall
 
 define archhelp
 	@echo '* zImage 	           - Compressed kernel image'
-- 
cgit v0.10.2


From 40bc9a27e00d6c8c7e4dc2865c02d7402a950472 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 10 Jun 2009 09:09:40 +0100
Subject: GFS2: Fix cache coherency between truncate and O_DIRECT read

If a page was partially zeroed as the result of a truncate, then it was
not being correctly marked dirty. This resulted in the deleted data
reappearing if the file was read back via direct I/O.

Reported-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1153a07..3297635 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1012,7 +1012,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
 	zero_user(page, offset, length);
-
+	mark_buffer_dirty(bh);
 unlock:
 	unlock_page(page);
 	page_cache_release(page);
-- 
cgit v0.10.2


From fa047e4f6fa63a6e9d0ae4d7749538830d14a343 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 10 Jun 2009 10:25:56 +0200
Subject: HID: fix inverted wheel for bluetooth version of apple mighty mouse

Bluetooth version of Apple Mighty mouse (0x05ac/0x030c) doesn't, according to
multiple reports on linux-input@, need the same quirk as the USB version of
this mouse (0x05ac/0x0304) does.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index acbce57..303ccce 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -436,10 +436,6 @@ static const struct hid_device_id apple_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 
-	/* Apple wireless Mighty Mouse */
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c),
-		.driver_data = APPLE_MIGHTYMOUSE | APPLE_INVERT_HWHEEL },
-
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, apple_devices);
-- 
cgit v0.10.2


From 3e7c73e9b15eab73e9cf72daf3931925da8afcff Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 24 Feb 2009 21:46:19 +0200
Subject: KVM: VMX: Don't use highmem pages for the msr and pio bitmaps

Highmem pages are a pain, and saving three lowmem pages on i386 isn't worth
the extra code.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bb48133..b20c9e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -111,9 +111,9 @@ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
 
-static struct page *vmx_io_bitmap_a;
-static struct page *vmx_io_bitmap_b;
-static struct page *vmx_msr_bitmap;
+static unsigned long *vmx_io_bitmap_a;
+static unsigned long *vmx_io_bitmap_b;
+static unsigned long *vmx_msr_bitmap;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -2082,9 +2082,9 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
 	spin_unlock(&vmx_vpid_lock);
 }
 
-static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
+static void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
 {
-	void *va;
+	int f = sizeof(unsigned long);
 
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
@@ -2094,16 +2094,14 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
 	 * have the write-low and read-high bitmap offsets the wrong way round.
 	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
 	 */
-	va = kmap(msr_bitmap);
 	if (msr <= 0x1fff) {
-		__clear_bit(msr, va + 0x000); /* read-low */
-		__clear_bit(msr, va + 0x800); /* write-low */
+		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
+		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
 	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
 		msr &= 0x1fff;
-		__clear_bit(msr, va + 0x400); /* read-high */
-		__clear_bit(msr, va + 0xc00); /* write-high */
+		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
+		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
 	}
-	kunmap(msr_bitmap);
 }
 
 /*
@@ -2121,11 +2119,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	u32 exec_control;
 
 	/* I/O */
-	vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
-	vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
+	vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
+	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
 
 	if (cpu_has_vmx_msr_bitmap())
-		vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap));
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
@@ -3695,20 +3693,19 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-	void *va;
 	int r;
 
-	vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_io_bitmap_a)
 		return -ENOMEM;
 
-	vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_io_bitmap_b) {
 		r = -ENOMEM;
 		goto out;
 	}
 
-	vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	vmx_msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_msr_bitmap) {
 		r = -ENOMEM;
 		goto out1;
@@ -3718,18 +3715,12 @@ static int __init vmx_init(void)
 	 * Allow direct access to the PC debug port (it is often used for I/O
 	 * delays, but the vmexits simply slow things down).
 	 */
-	va = kmap(vmx_io_bitmap_a);
-	memset(va, 0xff, PAGE_SIZE);
-	clear_bit(0x80, va);
-	kunmap(vmx_io_bitmap_a);
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
 
-	va = kmap(vmx_io_bitmap_b);
-	memset(va, 0xff, PAGE_SIZE);
-	kunmap(vmx_io_bitmap_b);
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 
-	va = kmap(vmx_msr_bitmap);
-	memset(va, 0xff, PAGE_SIZE);
-	kunmap(vmx_msr_bitmap);
+	memset(vmx_msr_bitmap, 0xff, PAGE_SIZE);
 
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
@@ -3762,19 +3753,19 @@ static int __init vmx_init(void)
 	return 0;
 
 out2:
-	__free_page(vmx_msr_bitmap);
+	free_page((unsigned long)vmx_msr_bitmap);
 out1:
-	__free_page(vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_b);
 out:
-	__free_page(vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_io_bitmap_a);
 	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	__free_page(vmx_msr_bitmap);
-	__free_page(vmx_io_bitmap_b);
-	__free_page(vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_msr_bitmap);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
 
 	kvm_exit();
 }
-- 
cgit v0.10.2


From 5897297bc228fc3c85fdc421fd5c487f9a99821a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 24 Feb 2009 22:26:47 +0200
Subject: KVM: VMX: Don't intercept MSR_KERNEL_GS_BASE

Windows 2008 accesses this MSR often on context switch intensive workloads;
since we run in guest context with the guest MSR value loaded (so swapgs can
work correctly), we can simply disable interception of rdmsr/wrmsr for this
MSR.

A complication occurs since in legacy mode, we run with the host MSR value
loaded. In this case we enable interception.  This means we need two MSR
bitmaps, one for legacy mode and one for long mode.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b20c9e4..b5eae7a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -113,7 +113,8 @@ static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
 
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap;
+static unsigned long *vmx_msr_bitmap_legacy;
+static unsigned long *vmx_msr_bitmap_longmode;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -812,6 +813,7 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
 static void setup_msrs(struct vcpu_vmx *vmx)
 {
 	int save_nmsrs;
+	unsigned long *msr_bitmap;
 
 	vmx_load_host_state(vmx);
 	save_nmsrs = 0;
@@ -847,6 +849,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		__find_msr_index(vmx, MSR_KERNEL_GS_BASE);
 #endif
 	vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
+
+	if (cpu_has_vmx_msr_bitmap()) {
+		if (is_long_mode(&vmx->vcpu))
+			msr_bitmap = vmx_msr_bitmap_longmode;
+		else
+			msr_bitmap = vmx_msr_bitmap_legacy;
+
+		vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+	}
 }
 
 /*
@@ -2082,7 +2093,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
 	spin_unlock(&vmx_vpid_lock);
 }
 
-static void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
 {
 	int f = sizeof(unsigned long);
 
@@ -2104,6 +2115,13 @@ static void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
 	}
 }
 
+static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+{
+	if (!longmode_only)
+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
+}
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -2123,7 +2141,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
 
 	if (cpu_has_vmx_msr_bitmap())
-		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap));
+		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
@@ -3705,12 +3723,18 @@ static int __init vmx_init(void)
 		goto out;
 	}
 
-	vmx_msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap) {
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy) {
 		r = -ENOMEM;
 		goto out1;
 	}
 
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode) {
+		r = -ENOMEM;
+		goto out2;
+	}
+
 	/*
 	 * Allow direct access to the PC debug port (it is often used for I/O
 	 * delays, but the vmexits simply slow things down).
@@ -3720,19 +3744,21 @@ static int __init vmx_init(void)
 
 	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 
-	memset(vmx_msr_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
 
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out2;
+		goto out3;
 
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
-	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 
 	if (vm_need_ept()) {
 		bypass_guest_pf = 0;
@@ -3752,8 +3778,10 @@ static int __init vmx_init(void)
 
 	return 0;
 
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
 out2:
-	free_page((unsigned long)vmx_msr_bitmap);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
 out1:
 	free_page((unsigned long)vmx_io_bitmap_b);
 out:
@@ -3763,7 +3791,8 @@ out:
 
 static void __exit vmx_exit(void)
 {
-	free_page((unsigned long)vmx_msr_bitmap);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
 	free_page((unsigned long)vmx_io_bitmap_b);
 	free_page((unsigned long)vmx_io_bitmap_a);
 
-- 
cgit v0.10.2


From cf9e4e15e8f6306b2559979269ead7c02e6b2b95 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:36 +0800
Subject: KVM: Split IOAPIC structure

Prepared for reuse ioapic_redir_entry for MSI.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2b8318c..b84aca3 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,4 +40,21 @@ typedef unsigned long  hfn_t;
 
 typedef hfn_t pfn_t;
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c3b99de..8128013 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
-	union ioapic_redir_entry *pent;
+	union kvm_ioapic_redirect_entry *pent;
 	int injected = -1;
 
 	pent = &ioapic->redirtbl[idx];
@@ -284,7 +284,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
 	u32 old_irr = ioapic->irr;
 	u32 mask = 1 << irq;
-	union ioapic_redir_entry entry;
+	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
@@ -305,7 +305,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
 				    int trigger_mode)
 {
-	union ioapic_redir_entry *ent;
+	union kvm_ioapic_redirect_entry *ent;
 
 	ent = &ioapic->redirtbl[pin];
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index a34bd5e..008ec87 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -40,22 +40,7 @@ struct kvm_ioapic {
 	u32 id;
 	u32 irr;
 	u32 pad;
-	union ioapic_redir_entry {
-		u64 bits;
-		struct {
-			u8 vector;
-			u8 delivery_mode:3;
-			u8 dest_mode:1;
-			u8 delivery_status:1;
-			u8 polarity:1;
-			u8 remote_irr:1;
-			u8 trig_mode:1;
-			u8 mask:1;
-			u8 reserve:7;
-			u8 reserved[4];
-			u8 dest_id;
-		} fields;
-	} redirtbl[IOAPIC_NUM_PINS];
+	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
-- 
cgit v0.10.2


From 116191b69b608d0f1513e3abe71d6a46800f2bd6 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:37 +0800
Subject: KVM: Unify the delivery of IOAPIC and MSI interrupts

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 894a56e..1a2f98f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -352,6 +352,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+				   union kvm_ioapic_redirect_entry *entry,
+				   unsigned long *deliver_bitmask);
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8128013..883fd0d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -203,79 +203,56 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
-	u8 dest = ioapic->redirtbl[irq].fields.dest_id;
-	u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
-	u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
-	u8 vector = ioapic->redirtbl[irq].fields.vector;
-	u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
-	u32 deliver_bitmask;
+	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
+	unsigned long deliver_bitmask;
 	struct kvm_vcpu *vcpu;
 	int vcpu_id, r = -1;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
-		     dest, dest_mode, delivery_mode, vector, trig_mode);
+		     entry.fields.dest, entry.fields.dest_mode,
+		     entry.fields.delivery_mode, entry.fields.vector,
+		     entry.fields.trig_mode);
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
-							  dest_mode);
+	kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask);
 	if (!deliver_bitmask) {
 		ioapic_debug("no target on destination\n");
 		return 0;
 	}
 
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
+	/* Always delivery PIT interrupt to vcpu 0 */
 #ifdef CONFIG_X86
-		if (irq == 0)
-			vcpu = ioapic->kvm->vcpus[0];
+	if (irq == 0)
+		deliver_bitmask = 1;
 #endif
-		if (vcpu != NULL)
-			r = ioapic_inj_irq(ioapic, vcpu, vector,
-				       trig_mode, delivery_mode);
-		else
-			ioapic_debug("null lowest prio vcpu: "
-				     "mask=%x vector=%x delivery_mode=%x\n",
-				     deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
-		break;
-	case IOAPIC_FIXED:
-#ifdef CONFIG_X86
-		if (irq == 0)
-			deliver_bitmask = 1;
-#endif
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
+
+	for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+		if (!(deliver_bitmask & (1 << vcpu_id)))
+			continue;
+		deliver_bitmask &= ~(1 << vcpu_id);
+		vcpu = ioapic->kvm->vcpus[vcpu_id];
+		if (vcpu) {
+			if (entry.fields.delivery_mode ==
+					IOAPIC_LOWEST_PRIORITY ||
+			    entry.fields.delivery_mode == IOAPIC_FIXED) {
 				if (r < 0)
 					r = 0;
-				r += ioapic_inj_irq(ioapic, vcpu, vector,
-					       trig_mode, delivery_mode);
-			}
-		}
-		break;
-	case IOAPIC_NMI:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				ioapic_inj_nmi(vcpu);
+				r += ioapic_inj_irq(ioapic, vcpu,
+						    entry.fields.vector,
+						    entry.fields.trig_mode,
+						    entry.fields.delivery_mode);
+			} else if (entry.fields.delivery_mode == IOAPIC_NMI) {
 				r = 1;
-			}
-			else
-				ioapic_debug("NMI to vcpu %d failed\n",
-						vcpu->vcpu_id);
-		}
-		break;
-	default:
-		printk(KERN_WARNING "Unsupported delivery mode %d\n",
-		       delivery_mode);
-		break;
+				ioapic_inj_nmi(vcpu);
+			} else
+				ioapic_debug("unsupported delivery mode %x!\n",
+					     entry.fields.delivery_mode);
+		} else
+			ioapic_debug("null destination vcpu: "
+				     "mask=%x vector=%x delivery_mode=%x\n",
+				     entry.fields.deliver_bitmask,
+				     entry.fields.vector,
+				     entry.fields.delivery_mode);
 	}
 	return r;
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 864ac54..aec7a0d 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -43,53 +43,74 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+				   union kvm_ioapic_redirect_entry *entry,
+				   unsigned long *deliver_bitmask)
+{
+	struct kvm_vcpu *vcpu;
+
+	*deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+				entry->fields.dest_id, entry->fields.dest_mode);
+	switch (entry->fields.delivery_mode) {
+	case IOAPIC_LOWEST_PRIORITY:
+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
+				entry->fields.vector, *deliver_bitmask);
+		*deliver_bitmask = 1 << vcpu->vcpu_id;
+		break;
+	case IOAPIC_FIXED:
+	case IOAPIC_NMI:
+		break;
+	default:
+		if (printk_ratelimit())
+			printk(KERN_INFO "kvm: unsupported delivery mode %d\n",
+				entry->fields.delivery_mode);
+		*deliver_bitmask = 0;
+	}
+}
+
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
 	int vcpu_id, r = -1;
 	struct kvm_vcpu *vcpu;
 	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-	int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
-			>> MSI_ADDR_DEST_ID_SHIFT;
-	int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
-			>> MSI_DATA_VECTOR_SHIFT;
-	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-				(unsigned long *)&e->msi.address_lo);
-	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-				(unsigned long *)&e->msi.data);
-	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-				(unsigned long *)&e->msi.data);
-	u32 deliver_bitmask;
+	union kvm_ioapic_redirect_entry entry;
+	unsigned long deliver_bitmask;
 
 	BUG_ON(!ioapic);
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-				dest_id, dest_mode);
-	/* IOAPIC delivery mode value is the same as MSI here */
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
-		if (vcpu != NULL)
-			r = kvm_apic_set_irq(vcpu, vector, trig_mode);
-		else
-			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
-		break;
-	case IOAPIC_FIXED:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				if (r < 0)
-					r = 0;
-				r += kvm_apic_set_irq(vcpu, vector, trig_mode);
-			}
+	entry.bits = 0;
+	entry.fields.dest_id = (e->msi.address_lo &
+			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	entry.fields.vector = (e->msi.data &
+			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+	entry.fields.dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+			(unsigned long *)&e->msi.address_lo);
+	entry.fields.trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+			(unsigned long *)&e->msi.data);
+	entry.fields.delivery_mode = test_bit(
+			MSI_DATA_DELIVERY_MODE_SHIFT,
+			(unsigned long *)&e->msi.data);
+
+	/* TODO Deal with RH bit of MSI message address */
+
+	kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask);
+
+	if (!deliver_bitmask) {
+		printk(KERN_WARNING "kvm: no destination for MSI delivery!");
+		return -1;
+	}
+	for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+		if (!(deliver_bitmask & (1 << vcpu_id)))
+			continue;
+		deliver_bitmask &= ~(1 << vcpu_id);
+		vcpu = ioapic->kvm->vcpus[vcpu_id];
+		if (vcpu) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, entry.fields.vector,
+					      entry.fields.trig_mode);
 		}
-		break;
-	default:
-		break;
 	}
 	return r;
 }
-- 
cgit v0.10.2


From e5871be0f5d6847bc9585c997acb1b917c168f03 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:38 +0800
Subject: KVM: Change API of kvm_ioapic_get_delivery_bitmask

In order to use with bit ops.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 883fd0d..3b53712 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -161,22 +161,22 @@ static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 	kvm_vcpu_kick(vcpu);
 }
 
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				    u8 dest_mode)
+void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				     u8 dest_mode, unsigned long *mask)
 {
-	u32 mask = 0;
 	int i;
 	struct kvm *kvm = ioapic->kvm;
 	struct kvm_vcpu *vcpu;
 
 	ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
 
+	*mask = 0;
 	if (dest_mode == 0) {	/* Physical mode. */
 		if (dest == 0xFF) {	/* Broadcast. */
 			for (i = 0; i < KVM_MAX_VCPUS; ++i)
 				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
-					mask |= 1 << i;
-			return mask;
+					*mask |= 1 << i;
+			return;
 		}
 		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 			vcpu = kvm->vcpus[i];
@@ -184,7 +184,7 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 				continue;
 			if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
 				if (vcpu->arch.apic)
-					mask = 1 << i;
+					*mask = 1 << i;
 				break;
 			}
 		}
@@ -195,10 +195,9 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 				continue;
 			if (vcpu->arch.apic &&
 			    kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
-				mask |= 1 << vcpu->vcpu_id;
+				*mask |= 1 << vcpu->vcpu_id;
 		}
-	ioapic_debug("mask %x\n", mask);
-	return mask;
+	ioapic_debug("mask %x\n", *mask);
 }
 
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 008ec87..f395798 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,7 +70,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				u8 dest_mode);
+void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				     u8 dest_mode, unsigned long *mask);
 
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index aec7a0d..e8ff89c 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -49,8 +49,9 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 {
 	struct kvm_vcpu *vcpu;
 
-	*deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-				entry->fields.dest_id, entry->fields.dest_mode);
+	kvm_ioapic_get_delivery_bitmask(ioapic, entry->fields.dest_id,
+					entry->fields.dest_mode,
+					deliver_bitmask);
 	switch (entry->fields.delivery_mode) {
 	case IOAPIC_LOWEST_PRIORITY:
 		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
-- 
cgit v0.10.2


From 110c2faeba1f1994bcb1de55b9c31f4147dbfdb6 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:39 +0800
Subject: KVM: Update intr delivery func to accept unsigned long* bitmap

Would be used with bit ops, and would be easily extended if KVM_MAX_VCPUS is
increased.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f0b67f2..6aa8d20 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -409,7 +409,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 }
 
 static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap)
+				       unsigned long *bitmap)
 {
 	int last;
 	int next;
@@ -421,7 +421,7 @@ static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
 	do {
 		if (++next == KVM_MAX_VCPUS)
 			next = 0;
-		if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
+		if (kvm->vcpus[next] == NULL || !test_bit(next, bitmap))
 			continue;
 		apic = kvm->vcpus[next]->arch.apic;
 		if (apic && apic_enabled(apic))
@@ -437,7 +437,7 @@ static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-		unsigned long bitmap)
+		unsigned long *bitmap)
 {
 	struct kvm_lapic *apic;
 
@@ -508,7 +508,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	}
 
 	if (delivery_mode == APIC_DM_LOWEST) {
-		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
+		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, &lpr_map);
 		if (target != NULL)
 			__apic_accept_irq(target->arch.apic, delivery_mode,
 					  vector, level, trig_mode);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index f395798..7275f87 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -65,7 +65,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap);
+				       unsigned long *bitmap);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e8ff89c..d4421cd 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -55,7 +55,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 	switch (entry->fields.delivery_mode) {
 	case IOAPIC_LOWEST_PRIORITY:
 		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
-				entry->fields.vector, *deliver_bitmask);
+				entry->fields.vector, deliver_bitmask);
 		*deliver_bitmask = 1 << vcpu->vcpu_id;
 		break;
 	case IOAPIC_FIXED:
-- 
cgit v0.10.2


From bfd349d073b2838a6a031f057d25e266619b7093 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:40 +0800
Subject: KVM: bit ops for deliver_bitmap

It's also convenient when we extend KVM supported vcpu number in the future.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6aa8d20..afc59b2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -483,9 +483,10 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
 	struct kvm_vcpu *target;
 	struct kvm_vcpu *vcpu;
-	unsigned long lpr_map = 0;
+	DECLARE_BITMAP(lpr_map, KVM_MAX_VCPUS);
 	int i;
 
+	bitmap_zero(lpr_map, KVM_MAX_VCPUS);
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
@@ -500,7 +501,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 		if (vcpu->arch.apic &&
 		    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
 			if (delivery_mode == APIC_DM_LOWEST)
-				set_bit(vcpu->vcpu_id, &lpr_map);
+				__set_bit(vcpu->vcpu_id, lpr_map);
 			else
 				__apic_accept_irq(vcpu->arch.apic, delivery_mode,
 						  vector, level, trig_mode);
@@ -508,7 +509,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	}
 
 	if (delivery_mode == APIC_DM_LOWEST) {
-		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, &lpr_map);
+		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
 		if (target != NULL)
 			__apic_accept_irq(target->arch.apic, delivery_mode,
 					  vector, level, trig_mode);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 3b53712..7c2cb2b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -203,7 +203,7 @@ void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
 	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
-	unsigned long deliver_bitmask;
+	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 	struct kvm_vcpu *vcpu;
 	int vcpu_id, r = -1;
 
@@ -213,22 +213,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 		     entry.fields.delivery_mode, entry.fields.vector,
 		     entry.fields.trig_mode);
 
-	kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask);
-	if (!deliver_bitmask) {
-		ioapic_debug("no target on destination\n");
-		return 0;
-	}
+	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 
 	/* Always delivery PIT interrupt to vcpu 0 */
 #ifdef CONFIG_X86
 	if (irq == 0)
-		deliver_bitmask = 1;
+		__set_bit(0, deliver_bitmask);
+	else
 #endif
+		kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
+
+	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
+		ioapic_debug("no target on destination\n");
+		return 0;
+	}
 
-	for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-		if (!(deliver_bitmask & (1 << vcpu_id)))
-			continue;
-		deliver_bitmask &= ~(1 << vcpu_id);
+	while ((vcpu_id = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
+			< KVM_MAX_VCPUS) {
+		__clear_bit(vcpu_id, deliver_bitmask);
 		vcpu = ioapic->kvm->vcpus[vcpu_id];
 		if (vcpu) {
 			if (entry.fields.delivery_mode ==
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index d4421cd..d165e05 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -56,7 +56,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 	case IOAPIC_LOWEST_PRIORITY:
 		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
 				entry->fields.vector, deliver_bitmask);
-		*deliver_bitmask = 1 << vcpu->vcpu_id;
+		__set_bit(vcpu->vcpu_id, deliver_bitmask);
 		break;
 	case IOAPIC_FIXED:
 	case IOAPIC_NMI:
@@ -76,10 +76,12 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	struct kvm_vcpu *vcpu;
 	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
 	union kvm_ioapic_redirect_entry entry;
-	unsigned long deliver_bitmask;
+	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 
 	BUG_ON(!ioapic);
 
+	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+
 	entry.bits = 0;
 	entry.fields.dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
@@ -95,16 +97,15 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
 	/* TODO Deal with RH bit of MSI message address */
 
-	kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask);
+	kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
 
-	if (!deliver_bitmask) {
+	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
 		printk(KERN_WARNING "kvm: no destination for MSI delivery!");
 		return -1;
 	}
-	for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-		if (!(deliver_bitmask & (1 << vcpu_id)))
-			continue;
-		deliver_bitmask &= ~(1 << vcpu_id);
+	while ((vcpu_id = find_first_bit(deliver_bitmask,
+					KVM_MAX_VCPUS)) < KVM_MAX_VCPUS) {
+		__clear_bit(vcpu_id, deliver_bitmask);
 		vcpu = ioapic->kvm->vcpus[vcpu_id];
 		if (vcpu) {
 			if (r < 0)
-- 
cgit v0.10.2


From c1e01514296e8a4a43ff0c88dcff635cb90feb5f Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Feb 2009 17:22:26 +0800
Subject: KVM: Ioctls for init MSI-X entry

Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls.

This two ioctls are used by userspace to specific guest device MSI-X entry
number and correlate MSI-X entry with GSI during the initialization stage.

MSI-X should be well initialzed before enabling.

Don't support change MSI-X entry number for now.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8cc1379..78cdee8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -487,6 +487,10 @@ struct kvm_irq_routing {
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
 #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
 				     struct kvm_assigned_pci_dev)
+#define KVM_ASSIGN_SET_MSIX_NR \
+			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
+#define KVM_ASSIGN_SET_MSIX_ENTRY \
+			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 
 /*
  * ioctls for vcpu fds
@@ -607,4 +611,18 @@ struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+struct kvm_assigned_msix_nr {
+	__u32 assigned_dev_id;
+	__u16 entry_nr;
+	__u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV		512
+struct kvm_assigned_msix_entry {
+	__u32 assigned_dev_id;
+	__u32 gsi;
+	__u16 entry; /* The index of entry in the MSI-X table */
+	__u16 padding[3];
+};
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1a2f98f..432edc2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,6 +319,12 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+struct kvm_guest_msix_entry {
+	u32 vector;
+	u16 entry;
+	u16 flags;
+};
+
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
 	struct work_struct interrupt_work;
@@ -326,13 +332,17 @@ struct kvm_assigned_dev_kernel {
 	int assigned_dev_id;
 	int host_busnr;
 	int host_devfn;
+	unsigned int entries_nr;
 	int host_irq;
 	bool host_irq_disabled;
+	struct msix_entry *host_msix_entries;
 	int guest_irq;
+	struct kvm_guest_msix_entry *guest_msix_entries;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
 #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
+#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d0dd39..1ceb969 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1593,6 +1593,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 	return 0;
 }
 
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
+			r = -EINVAL;
+			goto msix_nr_out;
+		}
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries = kzalloc(
+				sizeof(struct kvm_guest_msix_entry) *
+				entry_nr->entry_nr, GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else /* Not allowed set MSI-X number twice */
+		r = -EINVAL;
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+#endif
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -1917,7 +1999,29 @@ static long kvm_vm_ioctl(struct file *filp,
 		vfree(entries);
 		break;
 	}
+#ifdef __KVM_HAVE_MSIX
+	case KVM_ASSIGN_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
+#endif /* KVM_CAP_IRQ_ROUTING */
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
-- 
cgit v0.10.2


From 2350bd1f62c8706c22b8e58c3bfff10806c0a31b Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Feb 2009 17:22:27 +0800
Subject: KVM: Add MSI-X interrupt injection logic

We have to handle more than one interrupt with one handler for MSI-X. Avi
suggested to use a flag to indicate the pending. So here is it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 432edc2..3832243 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,6 +319,7 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+#define KVM_ASSIGNED_MSIX_PENDING		0x1
 struct kvm_guest_msix_entry {
 	u32 vector;
 	u16 entry;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ceb969..8bd44d6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,25 +95,69 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
 	return NULL;
 }
 
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+				    *assigned_dev, int irq)
+{
+	int i, index;
+	struct msix_entry *host_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+
+	index = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == host_msix_entries[i].vector) {
+			index = i;
+			break;
+		}
+	if (index < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+
+	return index;
+}
+
 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev;
+	struct kvm *kvm;
+	int irq, i;
 
 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
 				    interrupt_work);
+	kvm = assigned_dev->kvm;
 
 	/* This is taken to safely inject irq inside the guest. When
 	 * the interrupt injection (or the ioapic code) uses a
 	 * finer-grained lock, update this
 	 */
-	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
-
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
-		enable_irq(assigned_dev->host_irq);
-		assigned_dev->host_irq_disabled = false;
+	mutex_lock(&kvm->lock);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		struct kvm_guest_msix_entry *guest_entries =
+			assigned_dev->guest_msix_entries;
+		for (i = 0; i < assigned_dev->entries_nr; i++) {
+			if (!(guest_entries[i].flags &
+					KVM_ASSIGNED_MSIX_PENDING))
+				continue;
+			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
+			kvm_set_irq(assigned_dev->kvm,
+				    assigned_dev->irq_source_id,
+				    guest_entries[i].vector, 1);
+			irq = assigned_dev->host_msix_entries[i].vector;
+			if (irq != 0)
+				enable_irq(irq);
+			assigned_dev->host_irq_disabled = false;
+		}
+	} else {
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+		if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_GUEST_MSI) {
+			enable_irq(assigned_dev->host_irq);
+			assigned_dev->host_irq_disabled = false;
+		}
 	}
+
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
@@ -122,6 +166,14 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+		int index = find_index_from_host_irq(assigned_dev, irq);
+		if (index < 0)
+			return IRQ_HANDLED;
+		assigned_dev->guest_msix_entries[index].flags |=
+			KVM_ASSIGNED_MSIX_PENDING;
+	}
+
 	schedule_work(&assigned_dev->interrupt_work);
 
 	disable_irq_nosync(irq);
-- 
cgit v0.10.2


From d510d6cc653bc4b3094ea73afe12600d0ab445b3 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Feb 2009 17:22:28 +0800
Subject: KVM: Enable MSI-X for KVM assigned device

This patch finally enable MSI-X.

What we need for MSI-X:
1. Intercept one page in MMIO region of device. So that we can get guest desired
MSI-X table and set up the real one. Now this have been done by guest, and
transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY.

2. Information for incoming interrupt. Now one device can have more than one
interrupt, and they are all handled by one workqueue structure. So we need to
identify them. The previous patch enable gsi_msg_pending_bitmap get this done.

3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X
message address/data. We used same entry number for the host and guest here, so
that it's easy to find the correlated guest gsi.

What we lack for now:
1. The PCI spec said nothing can existed with MSI-X table in the same page of
MMIO region, except pending bits. The patch ignore pending bits as the first
step (so they are always 0 - no pending).

2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS
can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch
didn't support this, and Linux also don't work in this way.

3. The patch didn't implement MSI-X mask all and mask single entry. I would
implement the former in driver/pci/msi.c later. And for single entry, userspace
should have reposibility to handle it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index dc3f6cf..125be8b 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -16,6 +16,7 @@
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_MSIX
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 78cdee8..640835e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -409,6 +409,9 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
 #endif
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_DEVICE_MSIX 28
+#endif
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 
@@ -611,6 +614,11 @@ struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION  (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
+					KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX  (1 << 1)
+#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX    (1 << 2)
+
 struct kvm_assigned_msix_nr {
 	__u32 assigned_dev_id;
 	__u16 entry_nr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8bd44d6..3bed827 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -236,13 +236,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	 * now, the kvm state is still legal for probably we also have to wait
 	 * interrupt_work done.
 	 */
-	disable_irq_nosync(assigned_dev->host_irq);
-	cancel_work_sync(&assigned_dev->interrupt_work);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq_nosync(assigned_dev->
+					   host_msix_entries[i].vector);
+
+		cancel_work_sync(&assigned_dev->interrupt_work);
 
-	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 (void *)assigned_dev);
 
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		pci_disable_msi(assigned_dev->dev);
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		disable_irq_nosync(assigned_dev->host_irq);
+		cancel_work_sync(&assigned_dev->interrupt_work);
+
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+		if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
 
 	assigned_dev->irq_requested_type = 0;
 }
@@ -373,6 +393,60 @@ static int assigned_device_update_msi(struct kvm *kvm,
 }
 #endif
 
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_update_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+	int i, r;
+
+	adev->ack_notifier.gsi = -1;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+			return -ENOTTY;
+
+		if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
+			/* Guest disable MSI-X */
+			kvm_free_assigned_irq(kvm, adev);
+			if (msi2intx) {
+				pci_enable_msi(adev->dev);
+				if (adev->dev->msi_enabled)
+					return assigned_device_update_msi(kvm,
+							adev, airq);
+			}
+			return assigned_device_update_intx(kvm, adev, airq);
+		}
+
+		/* host_msix_entries and guest_msix_entries should have been
+		 * initialized */
+		if (adev->entries_nr == 0)
+			return -EINVAL;
+
+		kvm_free_assigned_irq(kvm, adev);
+
+		r = pci_enable_msix(adev->dev, adev->host_msix_entries,
+				    adev->entries_nr);
+		if (r)
+			return r;
+
+		for (i = 0; i < adev->entries_nr; i++) {
+			r = request_irq((adev->host_msix_entries + i)->vector,
+					kvm_assigned_dev_intr, 0,
+					"kvm_assigned_msix_device",
+					(void *)adev);
+			if (r)
+				return r;
+		}
+	}
+
+	adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+
+	return 0;
+}
+#endif
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -417,12 +491,24 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		}
 	}
 
-	if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+	if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
+		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
+	else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
 		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
 		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
 
 	changed_flags = assigned_irq->flags ^ current_flags;
 
+#ifdef __KVM_HAVE_MSIX
+	if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
+		r = assigned_device_update_msix(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to execute "
+					"MSI-X action!\n");
+			goto out_release;
+		}
+	} else
+#endif
 	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
 	    (msi2intx && match->dev->msi_enabled)) {
 #ifdef CONFIG_X86
-- 
cgit v0.10.2


From 2dea4c84bc936731668b5a7a9fba5b436a422668 Mon Sep 17 00:00:00 2001
From: "Matt T. Yourst" <yourst@users.sourceforge.net>
Date: Tue, 24 Feb 2009 15:28:00 -0300
Subject: KVM: x86: silence preempt warning on kvm_write_guest_time

This issue just appeared in kvm-84 when running on 2.6.28.7 (x86-64)
with PREEMPT enabled.

We're getting syslog warnings like this many (but not all) times qemu
tells KVM to run the VCPU:

BUG: using smp_processor_id() in preemptible [00000000] code:
qemu-system-x86/28938
caller is kvm_arch_vcpu_ioctl_run+0x5d1/0xc70 [kvm]
Pid: 28938, comm: qemu-system-x86 2.6.28.7-mtyrel-64bit
Call Trace:
debug_smp_processor_id+0xf7/0x100
kvm_arch_vcpu_ioctl_run+0x5d1/0xc70 [kvm]
? __wake_up+0x4e/0x70
? wake_futex+0x27/0x40
kvm_vcpu_ioctl+0x2e9/0x5a0 [kvm]
enqueue_hrtimer+0x8a/0x110
_spin_unlock_irqrestore+0x27/0x50
vfs_ioctl+0x31/0xa0
do_vfs_ioctl+0x74/0x480
sys_futex+0xb4/0x140
sys_ioctl+0x99/0xa0
system_call_fastpath+0x16/0x1b

As it turns out, the call trace is messed up due to gcc's inlining, but
I isolated the problem anyway: kvm_write_guest_time() is being used in a
non-thread-safe manner on preemptable kernels.

Basically kvm_write_guest_time()'s body needs to be surrounded by
preempt_disable() and preempt_enable(), since the kernel won't let us
query any per-CPU data (indirectly using smp_processor_id()) without
preemption disabled. The attached patch fixes this issue by disabling
preemption inside kvm_write_guest_time().

[marcelo: surround only __get_cpu_var calls since the warning
is harmless]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3944e91..43e049a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -634,10 +634,12 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	if ((!vcpu->time_page))
 		return;
 
+	preempt_disable();
 	if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
 		kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
 		vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
 	}
+	preempt_enable();
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
-- 
cgit v0.10.2


From a90ede7b17d122acd58e6e1ff911be9dcf5263cc Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 11 Feb 2009 22:45:42 -0200
Subject: KVM: x86: paravirt skip pit-through-ioapic boot check

Skip the test which checks if the PIT is properly routed when
using the IOAPIC, aimed at buggy hardware.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33019dd..057173d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/hardirq.h>
+#include <asm/timer.h>
 
 #define MMU_QUEUE_SIZE 1024
 
@@ -230,6 +231,9 @@ static void paravirt_ops_setup(void)
 		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
 		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
 	}
+#ifdef CONFIG_X86_IO_APIC
+	no_timer_check = 1;
+#endif
 }
 
 void __init kvm_guest_init(void)
-- 
cgit v0.10.2


From b95b51d580bff9376850eef29d34c3aa08c26db7 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 26 Feb 2009 13:55:33 +0100
Subject: KVM: declare ioapic functions only on affected hardware

Since "KVM: Unify the delivery of IOAPIC and MSI interrupts"
I get the following warnings:

  CC [M]  arch/s390/kvm/kvm-s390.o
In file included from arch/s390/kvm/kvm-s390.c:22:
include/linux/kvm_host.h:357: warning: 'struct kvm_ioapic' declared inside parameter list
include/linux/kvm_host.h:357: warning: its scope is only this definition or declaration, which is probably not what you want

This patch limits IOAPIC functions for architectures that have one.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3832243..3b91ec9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -363,9 +363,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
+#ifdef __KVM_HAVE_IOAPIC
 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
+#endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
-- 
cgit v0.10.2


From 5a05d54554f19a128306eca7f7f5ed31f7d7eeb9 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 23 Feb 2009 10:57:39 -0300
Subject: KVM: PIT: remove unused scheduled variable

Unused.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c13bb92..3eddae6 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -208,7 +208,6 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 		wake_up_interruptible(&vcpu0->wq);
 
 	hrtimer_add_expires_ns(&pt->timer, pt->period);
-	pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
 	if (pt->period)
 		ps->channels[0].count_load_time = ktime_get();
 
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 6acbe4b..521accf 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -7,7 +7,6 @@ struct kvm_kpit_timer {
 	struct hrtimer timer;
 	int irq;
 	s64 period; /* unit: ns */
-	s64 scheduled;
 	atomic_t pending;
 	bool reinject;
 };
-- 
cgit v0.10.2


From fd66842370e32f3bbe429677280a326c07e508c1 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 23 Feb 2009 10:57:40 -0300
Subject: KVM: PIT: remove usage of count_load_time for channel 0

We can infer elapsed time from hrtimer_expires_remaining.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 3eddae6..09ae841 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -98,6 +98,32 @@ static int pit_get_gate(struct kvm *kvm, int channel)
 	return kvm->arch.vpit->pit_state.channels[channel].gate;
 }
 
+static s64 __kpit_elapsed(struct kvm *kvm)
+{
+	s64 elapsed;
+	ktime_t remaining;
+	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+	remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
+	if (ktime_to_ns(remaining) < 0)
+		remaining = ktime_set(0, 0);
+
+	elapsed = ps->pit_timer.period;
+	if (ktime_to_ns(remaining) <= ps->pit_timer.period)
+		elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
+
+	return elapsed;
+}
+
+static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
+			int channel)
+{
+	if (channel == 0)
+		return __kpit_elapsed(kvm);
+
+	return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+}
+
 static int pit_get_count(struct kvm *kvm, int channel)
 {
 	struct kvm_kpit_channel_state *c =
@@ -107,7 +133,7 @@ static int pit_get_count(struct kvm *kvm, int channel)
 
 	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
 
-	t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+	t = kpit_elapsed(kvm, c, channel);
 	d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
 
 	switch (c->mode) {
@@ -137,7 +163,7 @@ static int pit_get_out(struct kvm *kvm, int channel)
 
 	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
 
-	t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+	t = kpit_elapsed(kvm, c, channel);
 	d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
 
 	switch (c->mode) {
@@ -208,8 +234,6 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 		wake_up_interruptible(&vcpu0->wq);
 
 	hrtimer_add_expires_ns(&pt->timer, pt->period);
-	if (pt->period)
-		ps->channels[0].count_load_time = ktime_get();
 
 	return (pt->period == 0 ? 0 : 1);
 }
@@ -305,11 +329,12 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 	if (val == 0)
 		val = 0x10000;
 
-	ps->channels[channel].count_load_time = ktime_get();
 	ps->channels[channel].count = val;
 
-	if (channel != 0)
+	if (channel != 0) {
+		ps->channels[channel].count_load_time = ktime_get();
 		return;
+	}
 
 	/* Two types of timer
 	 * mode 1 is one shot, mode 2 is period, otherwise del timer */
-- 
cgit v0.10.2


From d3c7b77d1a6e7a0a27035a7ba723a3455317883e Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 23 Feb 2009 10:57:41 -0300
Subject: KVM: unify part of generic timer handling

Hide the internals of vcpu awakening / injection from the in-kernel
emulated timers. This makes future changes in this logic easier and
decreases the distance to more generic timer handling.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d3ec292..b43c4ef 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@ endif
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-	i8254.o
+	i8254.o timer.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 09ae841..4e2e3f2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -219,25 +219,6 @@ static void pit_latch_status(struct kvm *kvm, int channel)
 	}
 }
 
-static int __pit_timer_fn(struct kvm_kpit_state *ps)
-{
-	struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
-	struct kvm_kpit_timer *pt = &ps->pit_timer;
-
-	if (!atomic_inc_and_test(&pt->pending))
-		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-
-	if (!pt->reinject)
-		atomic_set(&pt->pending, 1);
-
-	if (vcpu0 && waitqueue_active(&vcpu0->wq))
-		wake_up_interruptible(&vcpu0->wq);
-
-	hrtimer_add_expires_ns(&pt->timer, pt->period);
-
-	return (pt->period == 0 ? 0 : 1);
-}
-
 int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@@ -258,21 +239,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 	spin_unlock(&ps->inject_lock);
 }
 
-static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
-{
-	struct kvm_kpit_state *ps;
-	int restart_timer = 0;
-
-	ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
-
-	restart_timer = __pit_timer_fn(ps);
-
-	if (restart_timer)
-		return HRTIMER_RESTART;
-	else
-		return HRTIMER_NORESTART;
-}
-
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@@ -286,15 +252,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
-static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+static void destroy_pit_timer(struct kvm_timer *pt)
 {
 	pr_debug("pit: execute del timer!\n");
 	hrtimer_cancel(&pt->timer);
 }
 
+static bool kpit_is_periodic(struct kvm_timer *ktimer)
+{
+	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
+						 pit_timer);
+	return ps->is_periodic;
+}
+
+struct kvm_timer_ops kpit_ops = {
+	.is_periodic = kpit_is_periodic,
+};
+
 static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
-	struct kvm_kpit_timer *pt = &ps->pit_timer;
+	struct kvm_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -304,7 +281,13 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 	/* TODO The new value only affected after the retriggered */
 	hrtimer_cancel(&pt->timer);
 	pt->period = (is_period == 0) ? 0 : interval;
-	pt->timer.function = pit_timer_fn;
+	ps->is_periodic = is_period;
+
+	pt->timer.function = kvm_timer_fn;
+	pt->t_ops = &kpit_ops;
+	pt->kvm = ps->pit->kvm;
+	pt->vcpu_id = 0;
+
 	atomic_set(&pt->pending, 0);
 	ps->irq_ack = 1;
 
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 521accf..bbd863f 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,14 +3,6 @@
 
 #include "iodev.h"
 
-struct kvm_kpit_timer {
-	struct hrtimer timer;
-	int irq;
-	s64 period; /* unit: ns */
-	atomic_t pending;
-	bool reinject;
-};
-
 struct kvm_kpit_channel_state {
 	u32 count; /* can be 65536 */
 	u16 latched_count;
@@ -29,7 +21,8 @@ struct kvm_kpit_channel_state {
 
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
-	struct kvm_kpit_timer pit_timer;
+	struct kvm_timer pit_timer;
+	bool is_periodic;
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
new file mode 100644
index 0000000..26bd6ba
--- /dev/null
+++ b/arch/x86/kvm/kvm_timer.h
@@ -0,0 +1,18 @@
+
+struct kvm_timer {
+	struct hrtimer timer;
+	s64 period; 				/* unit: ns */
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm_timer_ops *t_ops;
+	struct kvm *kvm;
+	int vcpu_id;
+};
+
+struct kvm_timer_ops {
+        bool (*is_periodic)(struct kvm_timer *);
+};
+
+
+enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
+
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afc59b2..27ca43e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -528,12 +528,13 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	if (apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
-	remaining = hrtimer_expires_remaining(&apic->timer.dev);
+	remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = ktime_set(0, 0);
 
-	ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
-	tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
+	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
+	tmcct = div64_u64(ns,
+			 (APIC_BUS_CYCLE_NS * apic->divide_count));
 
 	return tmcct;
 }
@@ -620,25 +621,25 @@ static void update_divide_count(struct kvm_lapic *apic)
 	tdcr = apic_get_reg(apic, APIC_TDCR);
 	tmp1 = tdcr & 0xf;
 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
-	apic->timer.divide_count = 0x1 << (tmp2 & 0x7);
+	apic->divide_count = 0x1 << (tmp2 & 0x7);
 
 	apic_debug("timer divide count is 0x%x\n",
-				   apic->timer.divide_count);
+				   apic->lapic_timer.divide_count);
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
 {
-	ktime_t now = apic->timer.dev.base->get_time();
+	ktime_t now = apic->lapic_timer.timer.base->get_time();
 
-	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
-		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
-	atomic_set(&apic->timer.pending, 0);
+	apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
+		    APIC_BUS_CYCLE_NS * apic->divide_count;
+	atomic_set(&apic->lapic_timer.pending, 0);
 
-	if (!apic->timer.period)
+	if (!apic->lapic_timer.period)
 		return;
 
-	hrtimer_start(&apic->timer.dev,
-		      ktime_add_ns(now, apic->timer.period),
+	hrtimer_start(&apic->lapic_timer.timer,
+		      ktime_add_ns(now, apic->lapic_timer.period),
 		      HRTIMER_MODE_ABS);
 
 	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
@@ -647,9 +648,9 @@ static void start_apic_timer(struct kvm_lapic *apic)
 			   "expire @ 0x%016" PRIx64 ".\n", __func__,
 			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
 			   apic_get_reg(apic, APIC_TMICT),
-			   apic->timer.period,
+			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
-					apic->timer.period)));
+					apic->lapic_timer.period)));
 }
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -731,7 +732,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
 			}
-			atomic_set(&apic->timer.pending, 0);
+			atomic_set(&apic->lapic_timer.pending, 0);
 
 		}
 		break;
@@ -763,7 +764,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		break;
 
 	case APIC_TMICT:
-		hrtimer_cancel(&apic->timer.dev);
+		hrtimer_cancel(&apic->lapic_timer.timer);
 		apic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
 		return;
@@ -803,7 +804,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apic)
 		return;
 
-	hrtimer_cancel(&vcpu->arch.apic->timer.dev);
+	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
 
 	if (vcpu->arch.apic->regs_page)
 		__free_page(vcpu->arch.apic->regs_page);
@@ -881,7 +882,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	ASSERT(apic != NULL);
 
 	/* Stop the timer in case it's a reset to an active apic */
-	hrtimer_cancel(&apic->timer.dev);
+	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
@@ -906,7 +907,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
 	update_divide_count(apic);
-	atomic_set(&apic->timer.pending, 0);
+	atomic_set(&apic->lapic_timer.pending, 0);
 	if (vcpu->vcpu_id == 0)
 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
@@ -937,22 +938,11 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
  *----------------------------------------------------------------------
  */
 
-/* TODO: make sure __apic_timer_fn runs in current pCPU */
-static int __apic_timer_fn(struct kvm_lapic *apic)
+static bool lapic_is_periodic(struct kvm_timer *ktimer)
 {
-	int result = 0;
-	wait_queue_head_t *q = &apic->vcpu->wq;
-
-	if(!atomic_inc_and_test(&apic->timer.pending))
-		set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-	if (apic_lvtt_period(apic)) {
-		result = 1;
-		hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period);
-	}
-	return result;
+	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
+					      lapic_timer);
+	return apic_lvtt_period(apic);
 }
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -960,7 +950,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *lapic = vcpu->arch.apic;
 
 	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return atomic_read(&lapic->timer.pending);
+		return atomic_read(&lapic->lapic_timer.pending);
 
 	return 0;
 }
@@ -987,20 +977,9 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
-static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
-{
-	struct kvm_lapic *apic;
-	int restart_timer = 0;
-
-	apic = container_of(data, struct kvm_lapic, timer.dev);
-
-	restart_timer = __apic_timer_fn(apic);
-
-	if (restart_timer)
-		return HRTIMER_RESTART;
-	else
-		return HRTIMER_NORESTART;
-}
+struct kvm_timer_ops lapic_timer_ops = {
+	.is_periodic = lapic_is_periodic,
+};
 
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
@@ -1025,8 +1004,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
-	hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	apic->timer.dev.function = apic_timer_fn;
+	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_ABS);
+	apic->lapic_timer.timer.function = kvm_timer_fn;
+	apic->lapic_timer.t_ops = &lapic_timer_ops;
+	apic->lapic_timer.kvm = vcpu->kvm;
+	apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
+
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
 
@@ -1079,9 +1063,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && atomic_read(&apic->timer.pending) > 0) {
+	if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
 		if (kvm_apic_local_deliver(apic, APIC_LVTT))
-			atomic_dec(&apic->timer.pending);
+			atomic_dec(&apic->lapic_timer.pending);
 	}
 }
 
@@ -1107,7 +1091,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 			     MSR_IA32_APICBASE_BASE;
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 	apic_update_ppr(apic);
-	hrtimer_cancel(&apic->timer.dev);
+	hrtimer_cancel(&apic->lapic_timer.timer);
 	update_divide_count(apic);
 	start_apic_timer(apic);
 }
@@ -1120,7 +1104,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 	if (!apic)
 		return;
 
-	timer = &apic->timer.dev;
+	timer = &apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 45ab6ee..2fc0d3c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,18 +2,15 @@
 #define __KVM_X86_LAPIC_H
 
 #include "iodev.h"
+#include "kvm_timer.h"
 
 #include <linux/kvm_host.h>
 
 struct kvm_lapic {
 	unsigned long base_address;
 	struct kvm_io_device dev;
-	struct {
-		atomic_t pending;
-		s64 period;	/* unit: ns */
-		u32 divide_count;
-		struct hrtimer dev;
-	} timer;
+	struct kvm_timer lapic_timer;
+	u32 divide_count;
 	struct kvm_vcpu *vcpu;
 	struct page *regs_page;
 	void *regs;
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
new file mode 100644
index 0000000..86dbac0
--- /dev/null
+++ b/arch/x86/kvm/timer.c
@@ -0,0 +1,46 @@
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/hrtimer.h>
+#include <asm/atomic.h>
+#include "kvm_timer.h"
+
+static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
+{
+	int restart_timer = 0;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/* FIXME: this code should not know anything about vcpus */
+	if (!atomic_inc_and_test(&ktimer->pending))
+		set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+
+	if (!ktimer->reinject)
+		atomic_set(&ktimer->pending, 1);
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+
+	if (ktimer->t_ops->is_periodic(ktimer)) {
+		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+		restart_timer = 1;
+	}
+
+	return restart_timer;
+}
+
+enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
+{
+	int restart_timer;
+	struct kvm_vcpu *vcpu;
+	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+
+	vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
+	if (!vcpu)
+		return HRTIMER_NORESTART;
+
+	restart_timer = __kvm_timer_fn(vcpu, ktimer);
+	if (restart_timer)
+		return HRTIMER_RESTART;
+	else
+		return HRTIMER_NORESTART;
+}
+
-- 
cgit v0.10.2


From 3f5e06f8799adca3e7e1bbafe1cd780a3e69f604 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.zhang@intel.com>
Date: Mon, 2 Mar 2009 22:06:41 -0500
Subject: KVM: ia64: fix compilation error in kvm_get_lowest_prio_vcpu

Modify the arg of kvm_get_lowest_prio_vcpu().
Make it consistent with its declaration.

Signed-off-by: Yang Zhang <yang.zhang@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d20a5db..774f0d7 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1837,7 +1837,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap)
+				       unsigned long *bitmap)
 {
 	struct kvm_vcpu *lvcpu = kvm->vcpus[0];
 	int i;
-- 
cgit v0.10.2


From 74a3a8f152053394a016518cc2f2fee216897fa4 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 4 Mar 2009 13:33:02 +0800
Subject: KVM: Merge kvm_ioapic_get_delivery_bitmask into
 kvm_get_intr_delivery_bitmask

Gleb fixed bitmap ops usage in kvm_ioapic_get_delivery_bitmask.

Sheng merged two functions, as well as fixed several issues in
kvm_get_intr_delivery_bitmask
1. deliver_bitmask is a bitmap rather than a unsigned long intereger.
2. Lowest priority target bitmap wrong calculated by mistake.
3. Prevent potential NULL reference.
4. Declaration in include/kvm_host.h caused powerpc compilation warning.
5. Add warning for guest broadcast interrupt with lowest priority delivery mode.
6. Removed duplicate bitmap clean up in caller of kvm_get_intr_delivery_bitmask.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 7c2cb2b..ea268a8 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -161,45 +161,6 @@ static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 	kvm_vcpu_kick(vcpu);
 }
 
-void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				     u8 dest_mode, unsigned long *mask)
-{
-	int i;
-	struct kvm *kvm = ioapic->kvm;
-	struct kvm_vcpu *vcpu;
-
-	ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
-
-	*mask = 0;
-	if (dest_mode == 0) {	/* Physical mode. */
-		if (dest == 0xFF) {	/* Broadcast. */
-			for (i = 0; i < KVM_MAX_VCPUS; ++i)
-				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
-					*mask |= 1 << i;
-			return;
-		}
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
-				if (vcpu->arch.apic)
-					*mask = 1 << i;
-				break;
-			}
-		}
-	} else if (dest != 0)	/* Logical mode, MDA non-zero. */
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (vcpu->arch.apic &&
-			    kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
-				*mask |= 1 << vcpu->vcpu_id;
-		}
-	ioapic_debug("mask %x\n", *mask);
-}
-
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
 	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
@@ -213,13 +174,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 		     entry.fields.delivery_mode, entry.fields.vector,
 		     entry.fields.trig_mode);
 
-	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-
 	/* Always delivery PIT interrupt to vcpu 0 */
 #ifdef CONFIG_X86
-	if (irq == 0)
+	if (irq == 0) {
+                bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 		__set_bit(0, deliver_bitmask);
-	else
+        } else
 #endif
 		kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7275f87..c8032ab 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,7 +70,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				     u8 dest_mode, unsigned long *mask);
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+				   union kvm_ioapic_redirect_entry *entry,
+				   unsigned long *deliver_bitmask);
 
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index d165e05..1c6ff6d 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -47,15 +47,54 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask)
 {
+	int i;
+	struct kvm *kvm = ioapic->kvm;
 	struct kvm_vcpu *vcpu;
 
-	kvm_ioapic_get_delivery_bitmask(ioapic, entry->fields.dest_id,
-					entry->fields.dest_mode,
-					deliver_bitmask);
+	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+
+	if (entry->fields.dest_mode == 0) {	/* Physical mode. */
+		if (entry->fields.dest_id == 0xFF) {	/* Broadcast. */
+			for (i = 0; i < KVM_MAX_VCPUS; ++i)
+				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
+					__set_bit(i, deliver_bitmask);
+			/* Lowest priority shouldn't combine with broadcast */
+			if (entry->fields.delivery_mode ==
+			    IOAPIC_LOWEST_PRIORITY && printk_ratelimit())
+				printk(KERN_INFO "kvm: apic: phys broadcast "
+						  "and lowest prio\n");
+			return;
+		}
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (!vcpu)
+				continue;
+			if (kvm_apic_match_physical_addr(vcpu->arch.apic,
+					entry->fields.dest_id)) {
+				if (vcpu->arch.apic)
+					__set_bit(i, deliver_bitmask);
+				break;
+			}
+		}
+	} else if (entry->fields.dest_id != 0) /* Logical mode, MDA non-zero. */
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (!vcpu)
+				continue;
+			if (vcpu->arch.apic &&
+			    kvm_apic_match_logical_addr(vcpu->arch.apic,
+					entry->fields.dest_id))
+				__set_bit(i, deliver_bitmask);
+		}
+
 	switch (entry->fields.delivery_mode) {
 	case IOAPIC_LOWEST_PRIORITY:
+		/* Select one in deliver_bitmask */
 		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
 				entry->fields.vector, deliver_bitmask);
+		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+		if (!vcpu)
+			return;
 		__set_bit(vcpu->vcpu_id, deliver_bitmask);
 		break;
 	case IOAPIC_FIXED:
@@ -65,7 +104,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 		if (printk_ratelimit())
 			printk(KERN_INFO "kvm: unsupported delivery mode %d\n",
 				entry->fields.delivery_mode);
-		*deliver_bitmask = 0;
+		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 	}
 }
 
@@ -80,8 +119,6 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
 	BUG_ON(!ioapic);
 
-	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-
 	entry.bits = 0;
 	entry.fields.dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-- 
cgit v0.10.2


From f5a1e9f89504f57b2b45645a7239dc8a8ddb0f4c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 5 Mar 2009 13:12:29 +0100
Subject: KVM: MMU: remove call to kvm_mmu_pte_write from walk_addr

There is no reason to update the shadow pte here because the guest pte
is only changed to dirty state.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bd7020..855eb71 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -209,7 +209,6 @@ walk:
 		if (ret)
 			goto walk;
 		pte |= PT_DIRTY_MASK;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
 		walker->ptes[walker->level - 1] = pte;
 	}
 
-- 
cgit v0.10.2


From 6da7e3f643cf7099965d75fda8606b9d3a8650b9 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:34:44 +0200
Subject: KVM: APIC: kvm_apic_set_irq deliver all kinds of interrupts

Get rid of ioapic_inj_irq() and ioapic_inj_nmi() functions.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 4542651..5608488 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -585,7 +585,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 void kvm_sal_emul(struct kvm_vcpu *vcpu);
 
-static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
 #endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 774f0d7..99d6d17 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -292,13 +292,13 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
 {
 	switch (dm) {
 	case SAPIC_FIXED:
-		kvm_apic_set_irq(vcpu, vector, 0);
+		kvm_apic_set_irq(vcpu, vector, dm, 0);
 		break;
 	case SAPIC_NMI:
-		kvm_apic_set_irq(vcpu, 2, 0);
+		kvm_apic_set_irq(vcpu, 2, dm, 0);
 		break;
 	case SAPIC_EXTINT:
-		kvm_apic_set_irq(vcpu, 0, 0);
+		kvm_apic_set_irq(vcpu, 0, dm, 0);
 		break;
 	case SAPIC_INIT:
 	case SAPIC_PMI:
@@ -1813,7 +1813,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 	put_cpu();
 }
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig)
 {
 
 	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index 6d6cbcb..cbcfaa6 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -20,6 +20,6 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 27ca43e..a42f968 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -196,20 +196,30 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
+			     int vector, int level, int trig_mode);
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	int lapic_dmode;
 
-	if (!apic_test_and_set_irr(vec, apic)) {
-		/* a new pending irq is set in IRR */
-		if (trig)
-			apic_set_vector(vec, apic->regs + APIC_TMR);
-		else
-			apic_clear_vector(vec, apic->regs + APIC_TMR);
-		kvm_vcpu_kick(apic->vcpu);
-		return 1;
+	switch (dmode) {
+	case IOAPIC_LOWEST_PRIORITY:
+		lapic_dmode = APIC_DM_LOWEST;
+		break;
+	case IOAPIC_FIXED:
+		lapic_dmode = APIC_DM_FIXED;
+		break;
+	case IOAPIC_NMI:
+		lapic_dmode = APIC_DM_NMI;
+		break;
+	default:
+		printk(KERN_DEBUG"Ignoring delivery mode %d\n", dmode);
+		return 0;
+		break;
 	}
-	return 0;
+	return __apic_accept_irq(apic, lapic_dmode, vec, 1, trig);
 }
 
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -327,7 +337,7 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			     int vector, int level, int trig_mode)
 {
-	int orig_irr, result = 0;
+	int result = 0;
 	struct kvm_vcpu *vcpu = apic->vcpu;
 
 	switch (delivery_mode) {
@@ -337,10 +347,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (unlikely(!apic_enabled(apic)))
 			break;
 
-		orig_irr = apic_test_and_set_irr(vector, apic);
-		if (orig_irr && trig_mode) {
-			apic_debug("level trig mode repeatedly for vector %d",
-				   vector);
+		result = !apic_test_and_set_irr(vector, apic);
+		if (!result) {
+			if (trig_mode)
+				apic_debug("level trig mode repeatedly for "
+						"vector %d", vector);
 			break;
 		}
 
@@ -349,10 +360,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			apic_set_vector(vector, apic->regs + APIC_TMR);
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
-
 		kvm_vcpu_kick(vcpu);
-
-		result = (orig_irr == 0);
 		break;
 
 	case APIC_DM_REMRD:
@@ -364,12 +372,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		break;
 
 	case APIC_DM_NMI:
+		result = 1;
 		kvm_inject_nmi(vcpu);
 		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_INIT:
 		if (level) {
+			result = 1;
 			if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 				printk(KERN_DEBUG
 				       "INIT on a runnable vcpu %d\n",
@@ -386,6 +396,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
 			   vcpu->vcpu_id, vector);
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+			result = 1;
 			vcpu->arch.sipi_vector = vector;
 			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 			kvm_vcpu_kick(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2fc0d3c..1b0e3c0 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,7 +31,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ea268a8..d4a7948 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -142,25 +142,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	}
 }
 
-static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
-			   struct kvm_vcpu *vcpu,
-			   u8 vector, u8 trig_mode, u8 delivery_mode)
-{
-	ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
-		     delivery_mode);
-
-	ASSERT((delivery_mode == IOAPIC_FIXED) ||
-	       (delivery_mode == IOAPIC_LOWEST_PRIORITY));
-
-	return kvm_apic_set_irq(vcpu, vector, trig_mode);
-}
-
-static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
-{
-	kvm_inject_nmi(vcpu);
-	kvm_vcpu_kick(vcpu);
-}
-
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
 	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
@@ -193,21 +174,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 		__clear_bit(vcpu_id, deliver_bitmask);
 		vcpu = ioapic->kvm->vcpus[vcpu_id];
 		if (vcpu) {
-			if (entry.fields.delivery_mode ==
-					IOAPIC_LOWEST_PRIORITY ||
-			    entry.fields.delivery_mode == IOAPIC_FIXED) {
-				if (r < 0)
-					r = 0;
-				r += ioapic_inj_irq(ioapic, vcpu,
-						    entry.fields.vector,
-						    entry.fields.trig_mode,
-						    entry.fields.delivery_mode);
-			} else if (entry.fields.delivery_mode == IOAPIC_NMI) {
-				r = 1;
-				ioapic_inj_nmi(vcpu);
-			} else
-				ioapic_debug("unsupported delivery mode %x!\n",
-					     entry.fields.delivery_mode);
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu,
+					entry.fields.vector,
+					entry.fields.trig_mode,
+					entry.fields.delivery_mode);
 		} else
 			ioapic_debug("null destination vcpu: "
 				     "mask=%x vector=%x delivery_mode=%x\n",
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 1c6ff6d..325c668 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -148,6 +148,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 			if (r < 0)
 				r = 0;
 			r += kvm_apic_set_irq(vcpu, entry.fields.vector,
+					      entry.fields.dest_mode,
 					      entry.fields.trig_mode);
 		}
 	}
-- 
cgit v0.10.2


From a53c17d21c46a752f5ac6695376481bc27865b04 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:34:49 +0200
Subject: KVM: ioapic/msi interrupt delivery consolidation

ioapic_deliver() and kvm_set_msi() have code duplication. Move
the code into ioapic_deliver_entry() function and call it from
both places.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3b91ec9..ec9d078 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -364,7 +364,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
 #ifdef __KVM_HAVE_IOAPIC
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index d4a7948..b71c044 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -142,54 +142,57 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	}
 }
 
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e)
 {
-	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
 	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-	struct kvm_vcpu *vcpu;
-	int vcpu_id, r = -1;
+	int i, r = -1;
 
-	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-		     "vector=%x trig_mode=%x\n",
-		     entry.fields.dest, entry.fields.dest_mode,
-		     entry.fields.delivery_mode, entry.fields.vector,
-		     entry.fields.trig_mode);
-
-	/* Always delivery PIT interrupt to vcpu 0 */
-#ifdef CONFIG_X86
-	if (irq == 0) {
-                bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-		__set_bit(0, deliver_bitmask);
-        } else
-#endif
-		kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
+	kvm_get_intr_delivery_bitmask(kvm, e, deliver_bitmask);
 
 	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
 		ioapic_debug("no target on destination\n");
-		return 0;
+		return r;
 	}
 
-	while ((vcpu_id = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
+	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
 			< KVM_MAX_VCPUS) {
-		__clear_bit(vcpu_id, deliver_bitmask);
-		vcpu = ioapic->kvm->vcpus[vcpu_id];
+		struct kvm_vcpu *vcpu = kvm->vcpus[i];
+		__clear_bit(i, deliver_bitmask);
 		if (vcpu) {
 			if (r < 0)
 				r = 0;
-			r += kvm_apic_set_irq(vcpu,
-					entry.fields.vector,
-					entry.fields.trig_mode,
-					entry.fields.delivery_mode);
+			r += kvm_apic_set_irq(vcpu, e->fields.vector,
+					e->fields.delivery_mode,
+					e->fields.trig_mode);
 		} else
 			ioapic_debug("null destination vcpu: "
 				     "mask=%x vector=%x delivery_mode=%x\n",
-				     entry.fields.deliver_bitmask,
-				     entry.fields.vector,
-				     entry.fields.delivery_mode);
+				     e->fields.deliver_bitmask,
+				     e->fields.vector, e->fields.delivery_mode);
 	}
 	return r;
 }
 
+static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
+
+	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+		     "vector=%x trig_mode=%x\n",
+		     entry.fields.dest, entry.fields.dest_mode,
+		     entry.fields.delivery_mode, entry.fields.vector,
+		     entry.fields.trig_mode);
+
+#ifdef CONFIG_X86
+	/* Always delivery PIT interrupt to vcpu 0 */
+	if (irq == 0) {
+		entry.fields.dest_mode = 0; /* Physical mode. */
+		entry.fields.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
+	}
+#endif
+	return ioapic_deliver_entry(ioapic->kvm, &entry);
+}
+
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
 	u32 old_irr = ioapic->irr;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index c8032ab..bedeea5 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,8 +70,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
-
+int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e);
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 325c668..35397a5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -43,12 +43,11 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask)
 {
 	int i;
-	struct kvm *kvm = ioapic->kvm;
 	struct kvm_vcpu *vcpu;
 
 	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
@@ -90,7 +89,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 	switch (entry->fields.delivery_mode) {
 	case IOAPIC_LOWEST_PRIORITY:
 		/* Select one in deliver_bitmask */
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
+		vcpu = kvm_get_lowest_prio_vcpu(kvm,
 				entry->fields.vector, deliver_bitmask);
 		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 		if (!vcpu)
@@ -111,13 +110,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
-	int vcpu_id, r = -1;
-	struct kvm_vcpu *vcpu;
-	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
 	union kvm_ioapic_redirect_entry entry;
-	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-
-	BUG_ON(!ioapic);
 
 	entry.bits = 0;
 	entry.fields.dest_id = (e->msi.address_lo &
@@ -133,26 +126,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 			(unsigned long *)&e->msi.data);
 
 	/* TODO Deal with RH bit of MSI message address */
-
-	kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
-
-	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
-		printk(KERN_WARNING "kvm: no destination for MSI delivery!");
-		return -1;
-	}
-	while ((vcpu_id = find_first_bit(deliver_bitmask,
-					KVM_MAX_VCPUS)) < KVM_MAX_VCPUS) {
-		__clear_bit(vcpu_id, deliver_bitmask);
-		vcpu = ioapic->kvm->vcpus[vcpu_id];
-		if (vcpu) {
-			if (r < 0)
-				r = 0;
-			r += kvm_apic_set_irq(vcpu, entry.fields.vector,
-					      entry.fields.dest_mode,
-					      entry.fields.trig_mode);
-		}
-	}
-	return r;
+	return ioapic_deliver_entry(kvm, &entry);
 }
 
 /* This should be called with the kvm->lock mutex held
-- 
cgit v0.10.2


From 343f94fe4d16ec898da77720c03da9e09f8523d2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:34:54 +0200
Subject: KVM: consolidate ioapic/ipi interrupt delivery logic

Use kvm_apic_match_dest() in kvm_get_intr_delivery_bitmask() instead
of duplicating the same code. Use kvm_get_intr_delivery_bitmask() in
apic_send_ipi() to figure out ipi destination instead of reimplementing
the logic.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 99d6d17..8eea9cb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1852,6 +1852,14 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
 	return lvcpu;
 }
 
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode)
+{
+	return (dest_mode == 0) ?
+		kvm_apic_match_physical_addr(target, dest) :
+		kvm_apic_match_logical_addr(target, dest);
+}
+
 static int find_highest_bits(int *dat)
 {
 	u32  bits, bitnum;
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index cbcfaa6..31602e7 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -20,6 +20,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
+bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a42f968..998862a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -260,7 +260,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
 {
-	return kvm_apic_id(apic) == dest;
+	return dest == 0xff || kvm_apic_id(apic) == dest;
 }
 
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
@@ -289,37 +289,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 	return result;
 }
 
-static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
 
 	apic_debug("target %p, source %p, dest 0x%x, "
-		   "dest_mode 0x%x, short_hand 0x%x",
+		   "dest_mode 0x%x, short_hand 0x%x\n",
 		   target, source, dest, dest_mode, short_hand);
 
 	ASSERT(!target);
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
-		if (dest_mode == 0) {
+		if (dest_mode == 0)
 			/* Physical mode. */
-			if ((dest == 0xFF) || (dest == kvm_apic_id(target)))
-				result = 1;
-		} else
+			result = kvm_apic_match_physical_addr(target, dest);
+		else
 			/* Logical mode. */
 			result = kvm_apic_match_logical_addr(target, dest);
 		break;
 	case APIC_DEST_SELF:
-		if (target == source)
-			result = 1;
+		result = (target == source);
 		break;
 	case APIC_DEST_ALLINC:
 		result = 1;
 		break;
 	case APIC_DEST_ALLBUT:
-		if (target != source)
-			result = 1;
+		result = (target != source);
 		break;
 	default:
 		printk(KERN_WARNING "Bad dest shorthand value %x\n",
@@ -492,38 +489,26 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
 	unsigned int vector = icr_low & APIC_VECTOR_MASK;
 
-	struct kvm_vcpu *target;
-	struct kvm_vcpu *vcpu;
-	DECLARE_BITMAP(lpr_map, KVM_MAX_VCPUS);
+	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 	int i;
 
-	bitmap_zero(lpr_map, KVM_MAX_VCPUS);
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
 		   icr_high, icr_low, short_hand, dest,
 		   trig_mode, level, dest_mode, delivery_mode, vector);
 
-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
-		vcpu = apic->vcpu->kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-
-		if (vcpu->arch.apic &&
-		    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
-			if (delivery_mode == APIC_DM_LOWEST)
-				__set_bit(vcpu->vcpu_id, lpr_map);
-			else
-				__apic_accept_irq(vcpu->arch.apic, delivery_mode,
-						  vector, level, trig_mode);
-		}
-	}
-
-	if (delivery_mode == APIC_DM_LOWEST) {
-		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
-		if (target != NULL)
-			__apic_accept_irq(target->arch.apic, delivery_mode,
-					  vector, level, trig_mode);
+	kvm_get_intr_delivery_bitmask(apic->vcpu->kvm, apic, dest, dest_mode,
+			delivery_mode == APIC_DM_LOWEST, short_hand,
+			deliver_bitmask);
+
+	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
+			< KVM_MAX_VCPUS) {
+		struct kvm_vcpu *vcpu = apic->vcpu->kvm->vcpus[i];
+		__clear_bit(i, deliver_bitmask);
+		if (vcpu)
+			__apic_accept_irq(vcpu->arch.apic, delivery_mode,
+					vector, level, trig_mode);
 	}
 }
 
@@ -930,16 +915,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_reset);
 
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+bool kvm_apic_present(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
-	int ret = 0;
-
-	if (!apic)
-		return 0;
-	ret = apic_enabled(apic);
+	return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
+}
 
-	return ret;
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1b0e3c0..b66dc14 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -37,6 +37,8 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
+bool kvm_apic_present(struct kvm_vcpu *vcpu);
+bool kvm_lapic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec9d078..fb60f31 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -363,11 +363,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
-#ifdef __KVM_HAVE_IOAPIC
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask);
-#endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index b71c044..43969bb 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -147,7 +147,10 @@ int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e)
 	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 	int i, r = -1;
 
-	kvm_get_intr_delivery_bitmask(kvm, e, deliver_bitmask);
+	kvm_get_intr_delivery_bitmask(kvm, NULL, e->fields.dest_id,
+			e->fields.dest_mode,
+			e->fields.delivery_mode == IOAPIC_LOWEST_PRIORITY,
+			0, deliver_bitmask);
 
 	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
 		ioapic_debug("no target on destination\n");
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index bedeea5..d996c7a 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -65,13 +65,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long *bitmap);
+		unsigned long *bitmap);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask);
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
+		int dest_id, int dest_mode, bool low_prio, int short_hand,
+		unsigned long *deliver_bitmask);
 int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e);
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 35397a5..e43701c 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -43,67 +43,35 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask)
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
+		int dest_id, int dest_mode, bool low_prio, int short_hand,
+		unsigned long *deliver_bitmask)
 {
 	int i;
 	struct kvm_vcpu *vcpu;
 
+	if (dest_mode == 0 && dest_id == 0xff && low_prio)
+		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+
 	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		vcpu = kvm->vcpus[i];
 
-	if (entry->fields.dest_mode == 0) {	/* Physical mode. */
-		if (entry->fields.dest_id == 0xFF) {	/* Broadcast. */
-			for (i = 0; i < KVM_MAX_VCPUS; ++i)
-				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
-					__set_bit(i, deliver_bitmask);
-			/* Lowest priority shouldn't combine with broadcast */
-			if (entry->fields.delivery_mode ==
-			    IOAPIC_LOWEST_PRIORITY && printk_ratelimit())
-				printk(KERN_INFO "kvm: apic: phys broadcast "
-						  "and lowest prio\n");
-			return;
-		}
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (kvm_apic_match_physical_addr(vcpu->arch.apic,
-					entry->fields.dest_id)) {
-				if (vcpu->arch.apic)
-					__set_bit(i, deliver_bitmask);
-				break;
-			}
-		}
-	} else if (entry->fields.dest_id != 0) /* Logical mode, MDA non-zero. */
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (vcpu->arch.apic &&
-			    kvm_apic_match_logical_addr(vcpu->arch.apic,
-					entry->fields.dest_id))
-				__set_bit(i, deliver_bitmask);
-		}
+		if (!vcpu || !kvm_apic_present(vcpu))
+			continue;
 
-	switch (entry->fields.delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		/* Select one in deliver_bitmask */
-		vcpu = kvm_get_lowest_prio_vcpu(kvm,
-				entry->fields.vector, deliver_bitmask);
-		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-		if (!vcpu)
-			return;
-		__set_bit(vcpu->vcpu_id, deliver_bitmask);
-		break;
-	case IOAPIC_FIXED:
-	case IOAPIC_NMI:
-		break;
-	default:
-		if (printk_ratelimit())
-			printk(KERN_INFO "kvm: unsupported delivery mode %d\n",
-				entry->fields.delivery_mode);
+		if (!kvm_apic_match_dest(vcpu, src, short_hand, dest_id,
+					dest_mode))
+			continue;
+
+		__set_bit(i, deliver_bitmask);
+	}
+
+	if (low_prio) {
+		vcpu = kvm_get_lowest_prio_vcpu(kvm, 0, deliver_bitmask);
 		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+		if (vcpu)
+			__set_bit(vcpu->vcpu_id, deliver_bitmask);
 	}
 }
 
-- 
cgit v0.10.2


From e1035715ef8d3171e29f9c6aee6f40d57b3fead5 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:34:59 +0200
Subject: KVM: change the way how lowest priority vcpu is calculated

The new way does not require additional loop over vcpus to calculate
the one with lowest priority as one is chosen during delivery bitmap
construction.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8eea9cb..1887a93 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1836,20 +1836,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 	return 0;
 }
 
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long *bitmap)
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-	struct kvm_vcpu *lvcpu = kvm->vcpus[0];
-	int i;
-
-	for (i = 1; i < kvm->arch.online_vcpus; i++) {
-		if (!kvm->vcpus[i])
-			continue;
-		if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
-			lvcpu = kvm->vcpus[i];
-	}
-
-	return lvcpu;
+	return vcpu1->arch.xtp - vcpu2->arch.xtp;
 }
 
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index 31602e7..e42109e 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -22,6 +22,7 @@ int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f0faf58..4627627 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,6 +286,7 @@ struct kvm_vcpu_arch {
 	u64 shadow_efer;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
+	int32_t apic_arb_prio;
 	int mp_state;
 	int sipi_vector;
 	u64 ia32_misc_enable_msr;
@@ -400,7 +401,6 @@ struct kvm_arch{
 	struct hlist_head irq_ack_notifier_list;
 	int vapics_in_nmi_mode;
 
-	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
 	struct page *apic_access_page;
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 998862a..814466f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,8 +338,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	struct kvm_vcpu *vcpu = apic->vcpu;
 
 	switch (delivery_mode) {
-	case APIC_DM_FIXED:
 	case APIC_DM_LOWEST:
+		vcpu->arch.apic_arb_prio++;
+	case APIC_DM_FIXED:
 		/* FIXME add logic for vcpu on reset */
 		if (unlikely(!apic_enabled(apic)))
 			break;
@@ -416,43 +417,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	return result;
 }
 
-static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
-				       unsigned long *bitmap)
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 {
-	int last;
-	int next;
-	struct kvm_lapic *apic = NULL;
-
-	last = kvm->arch.round_robin_prev_vcpu;
-	next = last;
-
-	do {
-		if (++next == KVM_MAX_VCPUS)
-			next = 0;
-		if (kvm->vcpus[next] == NULL || !test_bit(next, bitmap))
-			continue;
-		apic = kvm->vcpus[next]->arch.apic;
-		if (apic && apic_enabled(apic))
-			break;
-		apic = NULL;
-	} while (next != last);
-	kvm->arch.round_robin_prev_vcpu = next;
-
-	if (!apic)
-		printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
-
-	return apic;
-}
-
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-		unsigned long *bitmap)
-{
-	struct kvm_lapic *apic;
-
-	apic = kvm_apic_round_robin(kvm, vector, bitmap);
-	if (apic)
-		return apic->vcpu;
-	return NULL;
+	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
 }
 
 static void apic_set_eoi(struct kvm_lapic *apic)
@@ -908,6 +875,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
 
+	vcpu->arch.apic_arb_prio = 0;
+
 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
 		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
 		   vcpu, kvm_apic_id(apic),
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index d996c7a..e7bc92d 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -64,10 +64,9 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 	return kvm->arch.vioapic;
 }
 
-struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-		unsigned long *bitmap);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e43701c..f5e059b 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -47,7 +47,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
 		int dest_id, int dest_mode, bool low_prio, int short_hand,
 		unsigned long *deliver_bitmask)
 {
-	int i;
+	int i, lowest = -1;
 	struct kvm_vcpu *vcpu;
 
 	if (dest_mode == 0 && dest_id == 0xff && low_prio)
@@ -64,15 +64,18 @@ void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
 					dest_mode))
 			continue;
 
-		__set_bit(i, deliver_bitmask);
+		if (!low_prio) {
+			__set_bit(i, deliver_bitmask);
+		} else {
+			if (lowest < 0)
+				lowest = i;
+			if (kvm_apic_compare_prio(vcpu, kvm->vcpus[lowest]) < 0)
+				lowest = i;
+		}
 	}
 
-	if (low_prio) {
-		vcpu = kvm_get_lowest_prio_vcpu(kvm, 0, deliver_bitmask);
-		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-		if (vcpu)
-			__set_bit(vcpu->vcpu_id, deliver_bitmask);
-	}
+	if (lowest != -1)
+		__set_bit(lowest, deliver_bitmask);
 }
 
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-- 
cgit v0.10.2


From 58c2dde17d6eb6c8c0566e52d184aa16755d890f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:35:04 +0200
Subject: KVM: APIC: get rid of deliver_bitmask

Deliver interrupt during destination matching loop.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 1887a93..acf43ec 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -283,6 +283,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 }
 
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vector, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		kvm_vcpu_kick(vcpu);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  *  offset: address offset to IPI space.
  *  value:  deliver value.
@@ -292,20 +304,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
 {
 	switch (dm) {
 	case SAPIC_FIXED:
-		kvm_apic_set_irq(vcpu, vector, dm, 0);
 		break;
 	case SAPIC_NMI:
-		kvm_apic_set_irq(vcpu, 2, dm, 0);
+		vector = 2;
 		break;
 	case SAPIC_EXTINT:
-		kvm_apic_set_irq(vcpu, 0, dm, 0);
+		vector = 0;
 		break;
 	case SAPIC_INIT:
 	case SAPIC_PMI:
 	default:
 		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		break;
+		return;
 	}
+	__apic_accept_irq(vcpu, vector);
 }
 
 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@@ -1813,17 +1825,9 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 	put_cpu();
 }
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
-
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vec, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
+	return __apic_accept_irq(vcpu, irq->vector);
 }
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@@ -1844,6 +1848,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode)
 {
+	struct kvm_lapic *target = vcpu->arch.apic;
 	return (dest_mode == 0) ?
 		kvm_apic_match_physical_addr(target, dest) :
 		kvm_apic_match_logical_addr(target, dest);
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index e42109e..ee541ce 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -23,7 +23,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-bool kvm_apic_present(struct kvm_vcpu *vcpu);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+#define kvm_apic_present(x) (true)
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 814466f..dd934d2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -199,27 +199,12 @@ EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			     int vector, int level, int trig_mode);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	int lapic_dmode;
 
-	switch (dmode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		lapic_dmode = APIC_DM_LOWEST;
-		break;
-	case IOAPIC_FIXED:
-		lapic_dmode = APIC_DM_FIXED;
-		break;
-	case IOAPIC_NMI:
-		lapic_dmode = APIC_DM_NMI;
-		break;
-	default:
-		printk(KERN_DEBUG"Ignoring delivery mode %d\n", dmode);
-		return 0;
-		break;
-	}
-	return __apic_accept_irq(apic, lapic_dmode, vec, 1, trig);
+	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
+			irq->level, irq->trig_mode);
 }
 
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -447,36 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 {
 	u32 icr_low = apic_get_reg(apic, APIC_ICR);
 	u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+	struct kvm_lapic_irq irq;
 
-	unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
-	unsigned int short_hand = icr_low & APIC_SHORT_MASK;
-	unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
-	unsigned int level = icr_low & APIC_INT_ASSERT;
-	unsigned int dest_mode = icr_low & APIC_DEST_MASK;
-	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
-	unsigned int vector = icr_low & APIC_VECTOR_MASK;
-
-	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-	int i;
+	irq.vector = icr_low & APIC_VECTOR_MASK;
+	irq.delivery_mode = icr_low & APIC_MODE_MASK;
+	irq.dest_mode = icr_low & APIC_DEST_MASK;
+	irq.level = icr_low & APIC_INT_ASSERT;
+	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
+	irq.shorthand = icr_low & APIC_SHORT_MASK;
+	irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
-		   icr_high, icr_low, short_hand, dest,
-		   trig_mode, level, dest_mode, delivery_mode, vector);
-
-	kvm_get_intr_delivery_bitmask(apic->vcpu->kvm, apic, dest, dest_mode,
-			delivery_mode == APIC_DM_LOWEST, short_hand,
-			deliver_bitmask);
-
-	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
-			< KVM_MAX_VCPUS) {
-		struct kvm_vcpu *vcpu = apic->vcpu->kvm->vcpus[i];
-		__clear_bit(i, deliver_bitmask);
-		if (vcpu)
-			__apic_accept_irq(vcpu->arch.apic, delivery_mode,
-					vector, level, trig_mode);
-	}
+		   icr_high, icr_low, irq.shorthand, irq.dest,
+		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
+		   irq.vector);
+
+	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index b66dc14..a587f83 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,14 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
 bool kvm_apic_present(struct kvm_vcpu *vcpu);
-bool kvm_lapic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b84aca3..fb46efb 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -57,4 +57,14 @@ union kvm_ioapic_redirect_entry {
 	} fields;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 43969bb..1eddae9 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -142,58 +142,33 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	}
 }
 
-int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e)
-{
-	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-	int i, r = -1;
-
-	kvm_get_intr_delivery_bitmask(kvm, NULL, e->fields.dest_id,
-			e->fields.dest_mode,
-			e->fields.delivery_mode == IOAPIC_LOWEST_PRIORITY,
-			0, deliver_bitmask);
-
-	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
-		ioapic_debug("no target on destination\n");
-		return r;
-	}
-
-	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
-			< KVM_MAX_VCPUS) {
-		struct kvm_vcpu *vcpu = kvm->vcpus[i];
-		__clear_bit(i, deliver_bitmask);
-		if (vcpu) {
-			if (r < 0)
-				r = 0;
-			r += kvm_apic_set_irq(vcpu, e->fields.vector,
-					e->fields.delivery_mode,
-					e->fields.trig_mode);
-		} else
-			ioapic_debug("null destination vcpu: "
-				     "mask=%x vector=%x delivery_mode=%x\n",
-				     e->fields.deliver_bitmask,
-				     e->fields.vector, e->fields.delivery_mode);
-	}
-	return r;
-}
-
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
-	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
+	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+	struct kvm_lapic_irq irqe;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
-		     entry.fields.dest, entry.fields.dest_mode,
-		     entry.fields.delivery_mode, entry.fields.vector,
-		     entry.fields.trig_mode);
+		     entry->fields.dest, entry->fields.dest_mode,
+		     entry->fields.delivery_mode, entry->fields.vector,
+		     entry->fields.trig_mode);
+
+	irqe.dest_id = entry->fields.dest_id;
+	irqe.vector = entry->fields.vector;
+	irqe.dest_mode = entry->fields.dest_mode;
+	irqe.trig_mode = entry->fields.trig_mode;
+	irqe.delivery_mode = entry->fields.delivery_mode << 8;
+	irqe.level = 1;
+	irqe.shorthand = 0;
 
 #ifdef CONFIG_X86
 	/* Always delivery PIT interrupt to vcpu 0 */
 	if (irq == 0) {
-		entry.fields.dest_mode = 0; /* Physical mode. */
-		entry.fields.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
+		irqe.dest_mode = 0; /* Physical mode. */
+		irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
 	}
 #endif
-	return ioapic_deliver_entry(ioapic->kvm, &entry);
+	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index e7bc92d..7080b71 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -71,8 +71,6 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
-		int dest_id, int dest_mode, bool low_prio, int short_hand,
-		unsigned long *deliver_bitmask);
-int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq);
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index f5e059b..4fa1f60 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -22,6 +22,9 @@
 #include <linux/kvm_host.h>
 
 #include <asm/msidef.h>
+#ifdef CONFIG_IA64
+#include <asm/iosapic.h>
+#endif
 
 #include "irq.h"
 
@@ -43,61 +46,71 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
-		int dest_id, int dest_mode, bool low_prio, int short_hand,
-		unsigned long *deliver_bitmask)
+inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
 {
-	int i, lowest = -1;
-	struct kvm_vcpu *vcpu;
+#ifdef CONFIG_IA64
+	return irq->delivery_mode ==
+		(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+#else
+	return irq->delivery_mode == APIC_DM_LOWEST;
+#endif
+}
 
-	if (dest_mode == 0 && dest_id == 0xff && low_prio)
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq)
+{
+	int i, r = -1;
+	struct kvm_vcpu *vcpu, *lowest = NULL;
+
+	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
+			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
 
-	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 	for (i = 0; i < KVM_MAX_VCPUS; i++) {
 		vcpu = kvm->vcpus[i];
 
 		if (!vcpu || !kvm_apic_present(vcpu))
 			continue;
 
-		if (!kvm_apic_match_dest(vcpu, src, short_hand, dest_id,
-					dest_mode))
+		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
 			continue;
 
-		if (!low_prio) {
-			__set_bit(i, deliver_bitmask);
+		if (!kvm_is_dm_lowest_prio(irq)) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, irq);
 		} else {
-			if (lowest < 0)
-				lowest = i;
-			if (kvm_apic_compare_prio(vcpu, kvm->vcpus[lowest]) < 0)
-				lowest = i;
+			if (!lowest)
+				lowest = vcpu;
+			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+				lowest = vcpu;
 		}
 	}
 
-	if (lowest != -1)
-		__set_bit(lowest, deliver_bitmask);
+	if (lowest)
+		r = kvm_apic_set_irq(lowest, irq);
+
+	return r;
 }
 
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
-	union kvm_ioapic_redirect_entry entry;
+	struct kvm_lapic_irq irq;
 
-	entry.bits = 0;
-	entry.fields.dest_id = (e->msi.address_lo &
+	irq.dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-	entry.fields.vector = (e->msi.data &
+	irq.vector = (e->msi.data &
 			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-	entry.fields.dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-			(unsigned long *)&e->msi.address_lo);
-	entry.fields.trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-			(unsigned long *)&e->msi.data);
-	entry.fields.delivery_mode = test_bit(
-			MSI_DATA_DELIVERY_MODE_SHIFT,
-			(unsigned long *)&e->msi.data);
+	irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+	irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+	irq.delivery_mode = e->msi.data & 0x700;
+	irq.level = 1;
+	irq.shorthand = 0;
 
 	/* TODO Deal with RH bit of MSI message address */
-	return ioapic_deliver_entry(kvm, &entry);
+	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
 /* This should be called with the kvm->lock mutex held
-- 
cgit v0.10.2


From 0c72ea7fb8a39b4bba071b19f5f835af5b5e538a Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 25 Feb 2009 10:38:52 -0600
Subject: KVM: ia64: Map in SN2 RTC registers to the VMM module

On SN2, map in the SN2 RTC registers to the VMM module, needed for ITC
emulation.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 5608488..1243995 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -371,6 +371,7 @@ struct kvm_vcpu_arch {
 	int last_run_cpu;
 	int vmm_tr_slot;
 	int vm_tr_slot;
+	int sn_rtc_tr_slot;
 
 #define KVM_MP_STATE_RUNNABLE          0
 #define KVM_MP_STATE_UNINITIALIZED     1
@@ -465,6 +466,7 @@ struct kvm_arch {
 	unsigned long	vmm_init_rr;
 
 	int		online_vcpus;
+	int		is_sn2;
 
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 7a9bff4..0a9cc73 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -146,6 +146,8 @@
 #define PAGE_GATE	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
 #define PAGE_KERNEL	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX)
 #define PAGE_KERNELRX	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+#define PAGE_KERNEL_UC	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX | \
+				 _PAGE_MA_UC)
 
 # ifndef __ASSEMBLY__
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index acf43ec..14a3fab 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -41,6 +41,9 @@
 #include <asm/div64.h>
 #include <asm/tlb.h>
 #include <asm/elf.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
 
 #include "misc.h"
 #include "vti.h"
@@ -119,8 +122,7 @@ void kvm_arch_hardware_enable(void *garbage)
 	unsigned long saved_psr;
 	int slot;
 
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-				PAGE_KERNEL));
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
 	local_irq_save(saved_psr);
 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
 	local_irq_restore(saved_psr);
@@ -425,6 +427,23 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte, rtc_phys_addr, map_addr;
+	int slot;
+
+	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
+	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
+	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
+	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
+	vcpu->arch.sn_rtc_tr_slot = slot;
+	if (slot < 0) {
+		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
+		slot = 0;
+	}
+	return slot;
+}
+
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 
@@ -563,18 +582,29 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
 	if (r < 0)
 		goto out;
 	vcpu->arch.vm_tr_slot = r;
+
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2) {
+		r = kvm_sn2_setup_mappings(vcpu);
+		if (r < 0)
+			goto out;
+	}
+#endif
+
 	r = 0;
 out:
 	return r;
-
 }
 
 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
 {
-
+	struct kvm *kvm = vcpu->kvm;
 	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
 	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (kvm->arch.is_sn2)
+		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
+#endif
 }
 
 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
@@ -800,6 +830,9 @@ struct  kvm *kvm_arch_create_vm(void)
 
 	if (IS_ERR(kvm))
 		return ERR_PTR(-ENOMEM);
+
+	kvm->arch.is_sn2 = ia64_platform_is("sn2");
+
 	kvm_init_vm(kvm);
 
 	kvm->arch.online_vcpus = 0;
-- 
cgit v0.10.2


From c6c9fcdf0fff7da77c088be11e3c4d0181b36941 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 25 Feb 2009 10:38:53 -0600
Subject: KVM: ia64: Create inline function kvm_get_itc() to centralize ITC
 reading.

Move all reading of special register 'AR_ITC' into two functions, one
in the kernel and one in the VMM module. When running on SN2, base the
result on the RTC rather the system ITC, as the ITC isn't
synchronized.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 14a3fab..ebbd995 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -68,6 +68,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	if (vcpu->kvm->arch.is_sn2)
+		return rtc_time();
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
 static void kvm_flush_icache(unsigned long start, unsigned long len)
 {
 	int l;
@@ -457,7 +467,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 
 	if (irqchip_in_kernel(vcpu->kvm)) {
 
-		vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
 
 		if (time_after(vcpu_now_itc, vpd->itm)) {
 			vcpu->arch.timer_check = 1;
@@ -929,7 +939,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	RESTORE_REGS(saved_gp);
 
 	vcpu->arch.irq_new_pending = 1;
-	vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
 	set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
 	vcpu_put(vcpu);
@@ -1210,7 +1220,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		regs->cr_iip = PALE_RESET_ENTRY;
 
 		/*Initialize itc offset for vcpus*/
-		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
+		itc_offset = 0UL - kvm_get_itc(vcpu);
 		for (i = 0; i < kvm->arch.online_vcpus; i++) {
 			v = (struct kvm_vcpu *)((char *)vcpu +
 					sizeof(struct kvm_vcpu_data) * i);
@@ -1472,7 +1482,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	}
 	for (i = 0; i < 4; i++)
 		regs->insvc[i] = vcpu->arch.insvc[i];
-	regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
 	SAVE_REGS(xtp);
 	SAVE_REGS(metaphysical_rr0);
 	SAVE_REGS(metaphysical_rr4);
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index a18ee17..a2c6c15 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
 		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
 }
 
+/*
+ * The Altix RTC is mapped specially here for the vmm module
+ */
+#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
+static long kvm_get_itc(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
+
+	if (kvm->arch.is_sn2)
+		return (*SN_RTC_BASE);
+	else
+#endif
+		return ia64_getreg(_IA64_REG_AR_ITC);
+}
+
 /************************************************************************
  * lsapic timer
  ***********************************************************************/
 u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
 {
 	unsigned long guest_itc;
-	guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
+	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
 
 	if (guest_itc >= VMX(vcpu, last_itc)) {
 		VMX(vcpu, last_itc) = guest_itc;
@@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 	struct kvm_vcpu *v;
 	struct kvm *kvm;
 	int i;
-	long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
+	long itc_offset = val - kvm_get_itc(vcpu);
 	unsigned long vitv = VCPU(vcpu, itv);
 
 	kvm = (struct kvm *)KVM_VM_BASE;
-- 
cgit v0.10.2


From ce17c643738bebcacf8d19d8cab7dd3eb96f9f32 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 25 Feb 2009 10:38:54 -0600
Subject: KVM: ia64: SN2 adjust emulated ITC frequency to match RTC frequency

On SN2 do not pass down the real ITC frequency, but rather patch the
values to match the SN2 RTC frequency.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index a8ae52e..e4b8231 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -21,6 +21,9 @@
 
 #include <linux/kvm_host.h>
 #include <linux/smp.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/shub_mmr.h>
 
 #include "vti.h"
 #include "misc.h"
@@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
 	return result;
 }
 
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+/*
+ * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
+ * RTC is used instead. This function patches the ratios from SAL
+ * to match the RTC before providing them to the guest.
+ */
+static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
 {
+	struct pal_freq_ratio *ratio;
+	unsigned long sal_freq, sal_drift, factor;
+
+	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+					    &sal_freq, &sal_drift);
+	ratio = (struct pal_freq_ratio *)&result->v2;
+	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
+		sn_rtc_cycles_per_second;
+
+	ratio->num = 3;
+	ratio->den = factor;
+}
 
+static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+{
 	struct ia64_pal_retval result;
 
 	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+
+	if (vcpu->kvm->arch.is_sn2)
+		sn2_patch_itc_freq_ratios(&result);
+
 	return result;
 }
 
-- 
cgit v0.10.2


From 0b5d7a2ccb98f0403b1969295429724af9dc9d8e Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 25 Feb 2009 10:38:55 -0600
Subject: KVM: ia64: Drop in SN2 replacement of fast path ITC emulation fault
 handler

Copy in SN2 RTC based ITC emulation for fast exit. The two versions
have the same size, so a dropin is simpler than patching the branch
instruction to hit the SN2 version.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 1243995..589536f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -580,6 +580,8 @@ struct kvm_vmm_info{
 	kvm_vmm_entry 	*vmm_entry;
 	kvm_tramp_entry *tramp_entry;
 	unsigned long 	vmm_ivt;
+	unsigned long	patch_mov_ar;
+	unsigned long	patch_mov_ar_sn2;
 };
 
 int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index ebbd995..4623a90 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1671,8 +1671,37 @@ out:
 	return 0;
 }
 
+
+/*
+ * On SN2, the ITC isn't stable, so copy in fast path code to use the
+ * SN2 RTC, replacing the ITC based default verion.
+ */
+static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
+			  struct module *module)
+{
+	unsigned long new_ar, new_ar_sn2;
+	unsigned long module_base;
+
+	if (!ia64_platform_is("sn2"))
+		return;
+
+	module_base = (unsigned long)module->module_core;
+
+	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
+	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
+
+	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
+	       "as source\n");
+
+	/*
+	 * Copy the SN2 version of mov_ar into place. They are both
+	 * the same size, so 6 bundles is sufficient (6 * 0x10).
+	 */
+	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
+}
+
 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-						struct module *module)
+			    struct module *module)
 {
 	unsigned long module_base;
 	unsigned long vmm_size;
@@ -1694,6 +1723,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
 		return -EFAULT;
 
 	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+	kvm_patch_vmm(vmm_info, module);
 	kvm_flush_icache(kvm_vmm_base, vmm_size);
 
 	/*Recalculate kvm_vmm_info based on new VMM*/
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
index 32254ce..f793be3 100644
--- a/arch/ia64/kvm/optvfault.S
+++ b/arch/ia64/kvm/optvfault.S
@@ -11,6 +11,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/processor.h>
+#include <asm/kvm_host.h>
 
 #include "vti.h"
 #include "asm-offsets.h"
@@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar)
 	;;
 END(kvm_asm_mov_from_ar)
 
+/*
+ * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
+ * clock as it's source for emulating the ITC. This version will be
+ * copied on top of the original version if the host is determined to
+ * be an SN2.
+ */
+GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
+
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	mov r24=b0
+	;;
+	ld8 r18=[r18]
+	ld8 r19=[r19]
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	add r19=r19,r18
+	shladd r17=r17,4,r20
+	;;
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	st8 [r16] = r19
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar_sn2)
+
+
 
 // mov r1=rr[r3]
 GLOBAL_ENTRY(kvm_asm_mov_from_rr)
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 9eee5c0..f4b4c89 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -30,15 +30,19 @@ MODULE_AUTHOR("Intel");
 MODULE_LICENSE("GPL");
 
 extern char kvm_ia64_ivt;
+extern char kvm_asm_mov_from_ar;
+extern char kvm_asm_mov_from_ar_sn2;
 extern fpswa_interface_t *vmm_fpswa_interface;
 
 long vmm_sanity = 1;
 
 struct kvm_vmm_info vmm_info = {
-	.module	     = THIS_MODULE,
-	.vmm_entry   = vmm_entry,
-	.tramp_entry = vmm_trampoline,
-	.vmm_ivt     = (unsigned long)&kvm_ia64_ivt,
+	.module			= THIS_MODULE,
+	.vmm_entry		= vmm_entry,
+	.tramp_entry		= vmm_trampoline,
+	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
+	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
+	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
 };
 
 static int __init  kvm_vmm_init(void)
-- 
cgit v0.10.2


From 386eb6e8b3caface8a0514da70a47c05cabb5b96 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Tue, 10 Mar 2009 22:51:09 +0100
Subject: KVM: make 'lapic_timer_ops' and 'kpit_ops' static

Fix this sparse warnings:
  arch/x86/kvm/lapic.c:916:22: warning: symbol 'lapic_timer_ops' was not declared. Should it be static?
  arch/x86/kvm/i8254.c:268:22: warning: symbol 'kpit_ops' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4e2e3f2..cf09bb6 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -265,7 +265,7 @@ static bool kpit_is_periodic(struct kvm_timer *ktimer)
 	return ps->is_periodic;
 }
 
-struct kvm_timer_ops kpit_ops = {
+static struct kvm_timer_ops kpit_ops = {
 	.is_periodic = kpit_is_periodic,
 };
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index dd934d2..4d76bb6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -913,7 +913,7 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
-struct kvm_timer_ops lapic_timer_ops = {
+static struct kvm_timer_ops lapic_timer_ops = {
 	.is_periodic = lapic_is_periodic,
 };
 
-- 
cgit v0.10.2


From e56d532f20c890a06bbe7cd479f4201e3a03cd73 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 12 Mar 2009 21:45:39 +0800
Subject: KVM: Device assignment framework rework

After discussion with Marcelo, we decided to rework device assignment framework
together. The old problems are kernel logic is unnecessary complex. So Marcelo
suggest to split it into a more elegant way:

1. Split host IRQ assign and guest IRQ assign. And userspace determine the
combination. Also discard msi2intx parameter, userspace can specific
KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_INTX in assigned_irq->flags to
enable MSI to INTx convertion.

2. Split assign IRQ and deassign IRQ. Import two new ioctls:
KVM_ASSIGN_DEV_IRQ and KVM_DEASSIGN_DEV_IRQ.

This patch also fixed the reversed _IOR vs _IOW in definition(by deprecated the
old interface).

[avi: replace homemade bitcount() by hweight_long()]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43e049a..41123fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_ASSIGN_DEV_IRQ:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 640835e..644e3a9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -412,6 +412,7 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_MSIX
 #define KVM_CAP_DEVICE_MSIX 28
 #endif
+#define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 
@@ -485,8 +486,10 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
 				   struct kvm_assigned_pci_dev)
 #define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
+/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
+#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
 #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
 				     struct kvm_assigned_pci_dev)
@@ -494,6 +497,7 @@ struct kvm_irq_routing {
 			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
 			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
+#define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 
 /*
  * ioctls for vcpu fds
@@ -584,6 +588,8 @@ struct kvm_debug_guest {
 #define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
 #define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 struct kvm_assigned_pci_dev {
 	__u32 assigned_dev_id;
 	__u32 busnr;
@@ -594,6 +600,17 @@ struct kvm_assigned_pci_dev {
 	};
 };
 
+#define KVM_DEV_IRQ_HOST_INTX    (1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI     (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX    (1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX   (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI    (1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX   (1 << 10)
+
+#define KVM_DEV_IRQ_HOST_MASK	 0x00ff
+#define KVM_DEV_IRQ_GUEST_MASK   0xff00
+
 struct kvm_assigned_irq {
 	__u32 assigned_dev_id;
 	__u32 host_irq;
@@ -609,15 +626,6 @@ struct kvm_assigned_irq {
 	};
 };
 
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
-
-#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
-
-#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION  (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
-					KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX  (1 << 1)
-#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX    (1 << 2)
 
 struct kvm_assigned_msix_nr {
 	__u32 assigned_dev_id;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fb60f31..40e49ed 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -339,11 +339,6 @@ struct kvm_assigned_dev_kernel {
 	struct msix_entry *host_msix_entries;
 	int guest_irq;
 	struct kvm_guest_msix_entry *guest_msix_entries;
-#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
-#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
-#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
-#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
-#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3bed827..792fb7f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -60,9 +61,6 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-static int msi2intx = 1;
-module_param(msi2intx, bool, 0);
-
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -132,7 +130,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&kvm->lock);
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
 			assigned_dev->guest_msix_entries;
 		for (i = 0; i < assigned_dev->entries_nr; i++) {
@@ -152,7 +150,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
 			    assigned_dev->guest_irq, 1);
 		if (assigned_dev->irq_requested_type &
-				KVM_ASSIGNED_DEV_GUEST_MSI) {
+				KVM_DEV_IRQ_GUEST_MSI) {
 			enable_irq(assigned_dev->host_irq);
 			assigned_dev->host_irq_disabled = false;
 		}
@@ -166,7 +164,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
-	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int index = find_index_from_host_irq(assigned_dev, irq);
 		if (index < 0)
 			return IRQ_HANDLED;
@@ -204,22 +202,22 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	}
 }
 
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void kvm_free_assigned_irq(struct kvm *kvm,
-				  struct kvm_assigned_dev_kernel *assigned_dev)
+static void deassign_guest_irq(struct kvm *kvm,
+			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	if (!irqchip_in_kernel(kvm))
-		return;
-
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
 		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 	assigned_dev->irq_source_id = -1;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
 
-	if (!assigned_dev->irq_requested_type)
-		return;
-
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+			      struct kvm_assigned_dev_kernel *assigned_dev)
+{
 	/*
 	 * In kvm_free_device_irq, cancel_work_sync return true if:
 	 * 1. work is scheduled, and then cancelled.
@@ -236,7 +234,7 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	 * now, the kvm state is still legal for probably we also have to wait
 	 * interrupt_work done.
 	 */
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int i;
 		for (i = 0; i < assigned_dev->entries_nr; i++)
 			disable_irq_nosync(assigned_dev->
@@ -259,14 +257,41 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
-		if (assigned_dev->irq_requested_type &
-				KVM_ASSIGNED_DEV_HOST_MSI)
+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
 			pci_disable_msi(assigned_dev->dev);
 	}
 
-	assigned_dev->irq_requested_type = 0;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
+}
+
+static int kvm_deassign_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *assigned_dev,
+			    unsigned long irq_requested_type)
+{
+	unsigned long guest_irq_type, host_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return -EINVAL;
+	/* no irq assignment to deassign */
+	if (!assigned_dev->irq_requested_type)
+		return -ENXIO;
+
+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+	if (host_irq_type)
+		deassign_host_irq(kvm, assigned_dev);
+	if (guest_irq_type)
+		deassign_guest_irq(kvm, assigned_dev);
+
+	return 0;
 }
 
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
 
 static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
@@ -298,256 +323,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
 	}
 }
 
-static int assigned_device_update_intx(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
 {
-	adev->guest_irq = airq->guest_irq;
-	adev->ack_notifier.gsi = airq->guest_irq;
-
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
-		return 0;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx &&
-		    (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
-			free_irq(adev->host_irq, (void *)adev);
-			pci_disable_msi(adev->dev);
-		}
+	dev->host_irq = dev->dev->irq;
+	/* Even though this is PCI, we don't want to use shared
+	 * interrupts. Sharing host devices with guest-assigned devices
+	 * on the same interrupt line is not a happy situation: there
+	 * are going to be long delays in accepting, acking, etc.
+	 */
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
+			0, "kvm_assigned_intx_device", (void *)dev))
+		return -EIO;
+	return 0;
+}
 
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+					   struct kvm_assigned_dev_kernel *dev)
+{
+	int r;
 
-		if (airq->host_irq)
-			adev->host_irq = airq->host_irq;
-		else
-			adev->host_irq = adev->dev->irq;
+	if (!dev->dev->msi_enabled) {
+		r = pci_enable_msi(dev->dev);
+		if (r)
+			return r;
+	}
 
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
-				0, "kvm_assigned_intx_device", (void *)adev))
-			return -EIO;
+	dev->host_irq = dev->dev->irq;
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
+			"kvm_assigned_msi_device", (void *)dev)) {
+		pci_disable_msi(dev->dev);
+		return -EIO;
 	}
 
-	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
-				   KVM_ASSIGNED_DEV_HOST_INTX;
 	return 0;
 }
+#endif
 
-#ifdef CONFIG_X86
-static int assigned_device_update_msi(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
 {
-	int r;
+	int i, r = -EINVAL;
 
-	adev->guest_irq = airq->guest_irq;
-	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
-		/* x86 don't care upper address of guest msi message addr */
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->ack_notifier.gsi = -1;
-	} else if (msi2intx) {
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->ack_notifier.gsi = airq->guest_irq;
-	} else {
-		/*
-		 * Guest require to disable device MSI, we disable MSI and
-		 * re-enable INTx by default again. Notice it's only for
-		 * non-msi2intx.
-		 */
-		assigned_device_update_intx(kvm, adev, airq);
-		return 0;
-	}
+	/* host_msix_entries and guest_msix_entries should have been
+	 * initialized */
+	if (dev->entries_nr == 0)
+		return r;
 
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		return 0;
+	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
+	if (r)
+		return r;
 
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx) {
-			if (adev->irq_requested_type &
-					KVM_ASSIGNED_DEV_HOST_INTX)
-				free_irq(adev->host_irq, (void *)adev);
+	for (i = 0; i < dev->entries_nr; i++) {
+		r = request_irq(dev->host_msix_entries[i].vector,
+				kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msix_device",
+				(void *)dev);
+		/* FIXME: free requested_irq's on failure */
+		if (r)
+			return r;
+	}
 
-			r = pci_enable_msi(adev->dev);
-			if (r)
-				return r;
-		}
+	return 0;
+}
 
-		adev->host_irq = adev->dev->irq;
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_msi_device", (void *)adev))
-			return -EIO;
-	}
+#endif
 
-	if (!msi2intx)
-		adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *dev,
+				struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = irq->guest_irq;
+	return 0;
+}
 
-	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
 	return 0;
 }
 #endif
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+			   struct kvm_assigned_dev_kernel *dev,
+			   __u32 host_irq_type)
+{
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+		return r;
 
+	switch (host_irq_type) {
+	case KVM_DEV_IRQ_HOST_INTX:
+		r = assigned_device_enable_host_intx(kvm, dev);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_HOST_MSI:
+		r = assigned_device_enable_host_msi(kvm, dev);
+		break;
+#endif
 #ifdef __KVM_HAVE_MSIX
-static int assigned_device_update_msix(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
-{
-	/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-	int i, r;
-
-	adev->ack_notifier.gsi = -1;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
-			return -ENOTTY;
-
-		if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
-			/* Guest disable MSI-X */
-			kvm_free_assigned_irq(kvm, adev);
-			if (msi2intx) {
-				pci_enable_msi(adev->dev);
-				if (adev->dev->msi_enabled)
-					return assigned_device_update_msi(kvm,
-							adev, airq);
-			}
-			return assigned_device_update_intx(kvm, adev, airq);
-		}
+	case KVM_DEV_IRQ_HOST_MSIX:
+		r = assigned_device_enable_host_msix(kvm, dev);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
 
-		/* host_msix_entries and guest_msix_entries should have been
-		 * initialized */
-		if (adev->entries_nr == 0)
-			return -EINVAL;
+	if (!r)
+		dev->irq_requested_type |= host_irq_type;
 
-		kvm_free_assigned_irq(kvm, adev);
+	return r;
+}
 
-		r = pci_enable_msix(adev->dev, adev->host_msix_entries,
-				    adev->entries_nr);
-		if (r)
-			return r;
+static int assign_guest_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *dev,
+			    struct kvm_assigned_irq *irq,
+			    unsigned long guest_irq_type)
+{
+	int id;
+	int r = -EEXIST;
 
-		for (i = 0; i < adev->entries_nr; i++) {
-			r = request_irq((adev->host_msix_entries + i)->vector,
-					kvm_assigned_dev_intr, 0,
-					"kvm_assigned_msix_device",
-					(void *)adev);
-			if (r)
-				return r;
-		}
+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+		return r;
+
+	id = kvm_request_irq_source_id(kvm);
+	if (id < 0)
+		return id;
+
+	dev->irq_source_id = id;
+
+	switch (guest_irq_type) {
+	case KVM_DEV_IRQ_GUEST_INTX:
+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_GUEST_MSI:
+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_GUEST_MSIX:
+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
+		break;
+#endif
+	default:
+		r = -EINVAL;
 	}
 
-	adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+	if (!r) {
+		dev->irq_requested_type |= guest_irq_type;
+		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+	} else
+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
 
-	return 0;
+	return r;
 }
-#endif
 
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-				   struct kvm_assigned_irq
-				   *assigned_irq)
+				   struct kvm_assigned_irq *assigned_irq)
 {
-	int r = 0;
+	int r = -EINVAL;
 	struct kvm_assigned_dev_kernel *match;
-	u32 current_flags = 0, changed_flags;
+	unsigned long host_irq_type, guest_irq_type;
 
-	mutex_lock(&kvm->lock);
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
 
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	mutex_lock(&kvm->lock);
+	r = -ENODEV;
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_irq->assigned_dev_id);
-	if (!match) {
-		mutex_unlock(&kvm->lock);
-		return -EINVAL;
-	}
-
-	if (!match->irq_requested_type) {
-		INIT_WORK(&match->interrupt_work,
-				kvm_assigned_dev_interrupt_work_handler);
-		if (irqchip_in_kernel(kvm)) {
-			/* Register ack nofitier */
-			match->ack_notifier.gsi = -1;
-			match->ack_notifier.irq_acked =
-					kvm_assigned_dev_ack_irq;
-			kvm_register_irq_ack_notifier(kvm,
-					&match->ack_notifier);
-
-			/* Request IRQ source ID */
-			r = kvm_request_irq_source_id(kvm);
-			if (r < 0)
-				goto out_release;
-			else
-				match->irq_source_id = r;
-
-#ifdef CONFIG_X86
-			/* Determine host device irq type, we can know the
-			 * result from dev->msi_enabled */
-			if (msi2intx)
-				pci_enable_msi(match->dev);
-#endif
-		}
-	}
+	if (!match)
+		goto out;
 
-	if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
-		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
-	else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
-		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
-		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
 
-	changed_flags = assigned_irq->flags ^ current_flags;
+	r = -EINVAL;
+	/* can only assign one type at a time */
+	if (hweight_long(host_irq_type) > 1)
+		goto out;
+	if (hweight_long(guest_irq_type) > 1)
+		goto out;
+	if (host_irq_type == 0 && guest_irq_type == 0)
+		goto out;
 
-#ifdef __KVM_HAVE_MSIX
-	if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
-		r = assigned_device_update_msix(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to execute "
-					"MSI-X action!\n");
-			goto out_release;
-		}
-	} else
-#endif
-	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
-	    (msi2intx && match->dev->msi_enabled)) {
-#ifdef CONFIG_X86
-		r = assigned_device_update_msi(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"MSI device!\n");
-			goto out_release;
-		}
-#else
-		r = -ENOTTY;
-#endif
-	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
-		/* Host device IRQ 0 means don't support INTx */
-		if (!msi2intx) {
-			printk(KERN_WARNING
-			       "kvm: wait device to enable MSI!\n");
-			r = 0;
-		} else {
-			printk(KERN_WARNING
-			       "kvm: failed to enable MSI device!\n");
-			r = -ENOTTY;
-			goto out_release;
-		}
-	} else {
-		/* Non-sharing INTx mode */
-		r = assigned_device_update_intx(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"INTx device!\n");
-			goto out_release;
-		}
-	}
+	r = 0;
+	if (host_irq_type)
+		r = assign_host_irq(kvm, match, host_irq_type);
+	if (r)
+		goto out;
 
+	if (guest_irq_type)
+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
 	mutex_unlock(&kvm->lock);
 	return r;
-out_release:
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+					 struct kvm_assigned_irq
+					 *assigned_irq)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
+out:
 	mutex_unlock(&kvm->lock);
-	kvm_free_assigned_device(kvm, match);
 	return r;
 }
 
@@ -565,7 +578,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      assigned_dev->assigned_dev_id);
 	if (match) {
 		/* device already assigned */
-		r = -EINVAL;
+		r = -EEXIST;
 		goto out;
 	}
 
@@ -604,6 +617,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->dev = dev;
 	match->irq_source_id = -1;
 	match->kvm = kvm;
+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
@@ -2084,6 +2100,11 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_ASSIGN_IRQ: {
+		r = -EOPNOTSUPP;
+		break;
+	}
+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
+	case KVM_ASSIGN_DEV_IRQ: {
 		struct kvm_assigned_irq assigned_irq;
 
 		r = -EFAULT;
@@ -2094,6 +2115,18 @@ static long kvm_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
+	case KVM_DEASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
 #endif
 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
 	case KVM_DEASSIGN_PCI_DEVICE: {
@@ -2596,9 +2629,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
-#ifndef CONFIG_X86
-	msi2intx = 0;
-#endif
 
 	return 0;
 
-- 
cgit v0.10.2


From f00be0cae4e6ad0a8c7be381c6d9be3586800b3e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 19 Mar 2009 12:20:36 +0200
Subject: KVM: MMU: do not free active mmu pages in free_mmu_pages()

free_mmu_pages() should only undo what alloc_mmu_pages() does.
Free mmu pages from the generic VM destruction function, kvm_destroy_vm().

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 32cf11e..8aac67c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2638,14 +2638,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
-	struct kvm_mmu_page *sp;
-
-	while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
-		sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
-				  struct kvm_mmu_page, link);
-		kvm_mmu_zap_page(vcpu->kvm, sp);
-		cond_resched();
-	}
 	free_page((unsigned long)vcpu->arch.mmu.pae_root);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 792fb7f..934dd1c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1032,6 +1032,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #endif
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+#else
+	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	mmdrop(mm);
-- 
cgit v0.10.2


From 7fe29e0faacb650d31b9e9f538203a157bec821d Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Fri, 20 Mar 2009 12:39:00 +0530
Subject: KVM: x86: Ignore reads to EVNTSEL MSRs

We ignore writes to the performance counters and performance event
selector registers already. Kaspersky antivirus reads the eventsel
MSR causing it to crash with the current behaviour.

Return 0 as data when the eventsel registers are read to stop the
crash.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 41123fc..c0ae5e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -895,6 +895,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_LASTINTFROMIP:
 	case MSR_IA32_LASTINTTOIP:
 	case MSR_VM_HSAVE_PA:
+	case MSR_P6_EVNTSEL0:
+	case MSR_P6_EVNTSEL1:
 		data = 0;
 		break;
 	case MSR_MTRRcap:
-- 
cgit v0.10.2


From 61c50edfcd40be9126579f9cec68c789b6089998 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 22 Mar 2009 12:37:05 +0200
Subject: KVM: SVM: Remove duplicate code in svm_do_inject_vector()

svm_do_inject_vector() reimplements pop_irq().

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1f8510c..5b35ebd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2346,15 +2346,7 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
 
 static void svm_do_inject_vector(struct vcpu_svm *svm)
 {
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	svm_inject_irq(svm, irq);
+	svm_inject_irq(svm, pop_irq(&svm->vcpu));
 }
 
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-- 
cgit v0.10.2


From fe4c7b1914ac46af751d256f5a20c2e12dcbaaae Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 23 Mar 2009 11:23:18 +0200
Subject: KVM: reuse (pop|push)_irq from svm.c in vmx.c

The prioritized bit vector manipulation functions are useful in both vmx and
svm.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5b35ebd..aa528db 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -19,6 +19,7 @@
 #include "irq.h"
 #include "mmu.h"
 #include "kvm_cache_regs.h"
+#include "x86.h"
 
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -132,24 +133,6 @@ static inline u32 svm_has(u32 feat)
 	return svm_features & feat;
 }
 
-static inline u8 pop_irq(struct kvm_vcpu *vcpu)
-{
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	return irq;
-}
-
-static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
-	set_bit(irq, vcpu->arch.irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
-}
-
 static inline void clgi(void)
 {
 	asm volatile (__ex(SVM_CLGI));
@@ -1116,7 +1099,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	if (!irqchip_in_kernel(kvm) &&
 	    is_external_interrupt(exit_int_info)) {
 		event_injection = true;
-		push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
+		kvm_push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
 	}
 
 	fault_address  = svm->vmcb->control.exit_info_2;
@@ -2336,7 +2319,7 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
 	if ((control->int_ctl & V_IRQ_MASK)
 	    && !irqchip_in_kernel(svm->vcpu.kvm)) {
 		control->int_ctl &= ~V_IRQ_MASK;
-		push_irq(&svm->vcpu, control->int_vector);
+		kvm_push_irq(&svm->vcpu, control->int_vector);
 	}
 
 	svm->vcpu.arch.interrupt_window_open =
@@ -2346,7 +2329,7 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
 
 static void svm_do_inject_vector(struct vcpu_svm *svm)
 {
-	svm_inject_irq(svm, pop_irq(&svm->vcpu));
+	svm_inject_irq(svm, kvm_pop_irq(&svm->vcpu));
 }
 
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b5eae7a..2c0a2ed 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2489,18 +2489,6 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu)
 				 GUEST_INTR_STATE_MOV_SS)));
 }
 
-static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
-{
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	kvm_queue_interrupt(vcpu, irq);
-}
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
@@ -2534,7 +2522,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-			kvm_do_inject_irq(vcpu);
+			kvm_queue_interrupt(vcpu, kvm_pop_irq(vcpu));
 
 		if (vcpu->arch.interrupt.pending)
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
@@ -2619,8 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		set_bit(irq, vcpu->arch.irq_pending);
-		set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+		kvm_push_irq(vcpu, irq);
 	}
 
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 6a4be78..2ab6791 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -19,4 +19,22 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.interrupt.pending = false;
 }
 
+static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
+{
+	int word_index = __ffs(vcpu->arch.irq_summary);
+	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
+	int irq = word_index * BITS_PER_LONG + bit_index;
+
+	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
+	if (!vcpu->arch.irq_pending[word_index])
+		clear_bit(word_index, &vcpu->arch.irq_summary);
+	return irq;
+}
+
+static inline void kvm_push_irq(struct kvm_vcpu *vcpu, u8 irq)
+{
+        set_bit(irq, vcpu->arch.irq_pending);
+        set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+}
+
 #endif
-- 
cgit v0.10.2


From c1f8bc04c6f8576553dc87abe7562e868433a19f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 15:41:17 +0200
Subject: KVM: VMX: Make module parameters readable

Useful to see how the module was loaded.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2c0a2ed..469787ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -39,19 +39,19 @@ MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
 static int bypass_guest_pf = 1;
-module_param(bypass_guest_pf, bool, 0);
+module_param(bypass_guest_pf, bool, S_IRUGO);
 
 static int enable_vpid = 1;
-module_param(enable_vpid, bool, 0);
+module_param(enable_vpid, bool, 0444);
 
 static int flexpriority_enabled = 1;
-module_param(flexpriority_enabled, bool, 0);
+module_param(flexpriority_enabled, bool, S_IRUGO);
 
 static int enable_ept = 1;
-module_param(enable_ept, bool, 0);
+module_param(enable_ept, bool, S_IRUGO);
 
 static int emulate_invalid_guest_state = 0;
-module_param(emulate_invalid_guest_state, bool, 0);
+module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
 struct vmcs {
 	u32 revision_id;
-- 
cgit v0.10.2


From 6062d012ed23c29672bb0f93ebcfb8e556def726 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 17:35:17 +0200
Subject: KVM: VMX: Rename kvm_handle_exit() to vmx_handle_exit()

It is a static vmx-specific function.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 469787ce..85f4fd5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3162,7 +3162,7 @@ static const int kvm_vmx_max_exit_handlers =
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
  */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3681,7 +3681,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.tlb_flush = vmx_flush_tlb,
 
 	.run = vmx_vcpu_run,
-	.handle_exit = kvm_handle_exit,
+	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
 	.patch_hypercall = vmx_patch_hypercall,
 	.get_irq = vmx_get_irq,
-- 
cgit v0.10.2


From 736caefe1511d9d1116ed4ffb0ea95b7368beb1f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 17:39:48 +0200
Subject: KVM: VMX: Simplify module parameter names

Instead of 'enable_vpid=1', use a simple 'vpid=1'.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 85f4fd5..a69ba6b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,13 +42,13 @@ static int bypass_guest_pf = 1;
 module_param(bypass_guest_pf, bool, S_IRUGO);
 
 static int enable_vpid = 1;
-module_param(enable_vpid, bool, 0444);
+module_param_named(vpid, enable_vpid, bool, 0444);
 
 static int flexpriority_enabled = 1;
-module_param(flexpriority_enabled, bool, S_IRUGO);
+module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
 
 static int enable_ept = 1;
-module_param(enable_ept, bool, S_IRUGO);
+module_param_named(ept, enable_ept, bool, S_IRUGO);
 
 static int emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-- 
cgit v0.10.2


From 4462d21a615dfd0c0f672c10832a011d6f280d5a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 17:53:37 +0200
Subject: KVM: VMX: Annotate module parameters as __read_mostly

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a69ba6b..f4b6c4b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -38,19 +38,19 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-static int bypass_guest_pf = 1;
+static int __read_mostly bypass_guest_pf = 1;
 module_param(bypass_guest_pf, bool, S_IRUGO);
 
-static int enable_vpid = 1;
+static int __read_mostly enable_vpid = 1;
 module_param_named(vpid, enable_vpid, bool, 0444);
 
-static int flexpriority_enabled = 1;
+static int __read_mostly flexpriority_enabled = 1;
 module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
 
-static int enable_ept = 1;
+static int __read_mostly enable_ept = 1;
 module_param_named(ept, enable_ept, bool, S_IRUGO);
 
-static int emulate_invalid_guest_state = 0;
+static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
 struct vmcs {
-- 
cgit v0.10.2


From 919818abc2ca0721f1cd296fbc24601d9044f993 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 18:01:29 +0200
Subject: KVM: VMX: Zero the vpid module parameter if vpid is not supported

This allows reading back how the hardware is configured.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f4b6c4b..9b97c8e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1202,6 +1202,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		      vmx_capability.ept, vmx_capability.vpid);
 	}
 
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+
 	min = 0;
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
@@ -2082,7 +2085,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
 	int vpid;
 
 	vmx->vpid = 0;
-	if (!enable_vpid || !cpu_has_vmx_vpid())
+	if (!enable_vpid)
 		return;
 	spin_lock(&vmx_vpid_lock);
 	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-- 
cgit v0.10.2


From 575ff2dcb25608d53737d1126ee0e7e4d6f11752 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 18:25:15 +0200
Subject: KVM: VMX: Zero ept module parameter if ept is not present

Allows reading back hardware capability.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9b97c8e..2f65120 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -265,7 +265,7 @@ static inline int cpu_has_vmx_ept(void)
 
 static inline int vm_need_ept(void)
 {
-	return (cpu_has_vmx_ept() && enable_ept);
+	return enable_ept;
 }
 
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
@@ -1205,6 +1205,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (!cpu_has_vmx_vpid())
 		enable_vpid = 0;
 
+	if (!cpu_has_vmx_ept())
+		enable_ept = 0;
+
 	min = 0;
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
-- 
cgit v0.10.2


From 089d034e0c4538d2436512fa64782b91008d4a7c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 23 Mar 2009 18:26:32 +0200
Subject: KVM: VMX: Fold vm_need_ept() into callers

Trivial.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2f65120..da6461d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -263,11 +263,6 @@ static inline int cpu_has_vmx_ept(void)
 		SECONDARY_EXEC_ENABLE_EPT);
 }
 
-static inline int vm_need_ept(void)
-{
-	return enable_ept;
-}
-
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
 	return ((cpu_has_vmx_virtualize_apic_accesses()) &&
@@ -382,7 +377,7 @@ static inline void ept_sync_global(void)
 
 static inline void ept_sync_context(u64 eptp)
 {
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		if (cpu_has_vmx_invept_context())
 			__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
 		else
@@ -392,7 +387,7 @@ static inline void ept_sync_context(u64 eptp)
 
 static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
 {
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		if (cpu_has_vmx_invept_individual_addr())
 			__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
 					eptp, gpa);
@@ -491,7 +486,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	}
 	if (vcpu->arch.rmode.active)
 		eb = ~0;
-	if (vm_need_ept())
+	if (enable_ept)
 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -1502,7 +1497,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	vpid_sync_vcpu_all(to_vmx(vcpu));
-	if (vm_need_ept())
+	if (enable_ept)
 		ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
 }
 
@@ -1587,7 +1582,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	}
 #endif
 
-	if (vm_need_ept())
+	if (enable_ept)
 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
 	vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -1616,7 +1611,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	u64 eptp;
 
 	guest_cr3 = cr3;
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		eptp = construct_eptp(cr3);
 		vmcs_write64(EPT_POINTER, eptp);
 		ept_sync_context(eptp);
@@ -1637,7 +1632,7 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
 	vcpu->arch.cr4 = cr4;
-	if (vm_need_ept())
+	if (enable_ept)
 		ept_update_paging_mode_cr4(&hw_cr4, vcpu);
 
 	vmcs_writel(CR4_READ_SHADOW, cr4);
@@ -1999,7 +1994,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
 	pfn_t identity_map_pfn;
 	u32 tmp;
 
-	if (!vm_need_ept())
+	if (!enable_ept)
 		return 1;
 	if (unlikely(!kvm->arch.ept_identity_pagetable)) {
 		printk(KERN_ERR "EPT: identity-mapping pagetable "
@@ -2163,7 +2158,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 				CPU_BASED_CR8_LOAD_EXITING;
 #endif
 	}
-	if (!vm_need_ept())
+	if (!enable_ept)
 		exec_control |= CPU_BASED_CR3_STORE_EXITING |
 				CPU_BASED_CR3_LOAD_EXITING  |
 				CPU_BASED_INVLPG_EXITING;
@@ -2176,7 +2171,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 				~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 		if (vmx->vpid == 0)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
-		if (!vm_need_ept())
+		if (!enable_ept)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
@@ -2637,7 +2632,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 	if (is_page_fault(intr_info)) {
 		/* EPT won't cause page fault directly */
-		if (vm_need_ept())
+		if (enable_ept)
 			BUG();
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
@@ -3187,7 +3182,7 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
-	if (vm_need_ept() && is_paging(vcpu)) {
+	if (enable_ept && is_paging(vcpu)) {
 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 		ept_load_pdptrs(vcpu);
 	}
@@ -3602,7 +3597,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 		if (alloc_apic_access_page(kvm) != 0)
 			goto free_vmcs;
 
-	if (vm_need_ept())
+	if (enable_ept)
 		if (alloc_identity_pagetable(kvm) != 0)
 			goto free_vmcs;
 
@@ -3753,7 +3748,7 @@ static int __init vmx_init(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 
-	if (vm_need_ept()) {
+	if (enable_ept) {
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK);
-- 
cgit v0.10.2


From 09cec754885f900f6aab23801878c0cd217ee1d6 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 23 Mar 2009 15:11:44 +0200
Subject: KVM: Timer event should not unconditionally unhalt vcpu.

Currently timer events are processed before entering guest mode. Move it
to main vcpu event loop since timer events should be processed even while
vcpu is halted.  Timer may cause interrupt/nmi to be injected and only then
vcpu will be unhalted.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 4623a90..d2a90fd 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -488,10 +488,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 		hrtimer_cancel(p_ht);
 		vcpu->arch.ht_active = 0;
 
-		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
+				kvm_cpu_has_pending_timer(vcpu))
 			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-				vcpu->arch.mp_state =
-					KVM_MP_STATE_RUNNABLE;
+				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
 		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
 			return -EINTR;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c0ae5e6..8fca7a4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3133,9 +3133,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
-	kvm_inject_pending_timer_irqs(vcpu);
-
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -3235,6 +3232,7 @@ out:
 	return r;
 }
 
+
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -3261,29 +3259,42 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			kvm_vcpu_block(vcpu);
 			down_read(&vcpu->kvm->slots_lock);
 			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
-				if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+			{
+				switch(vcpu->arch.mp_state) {
+				case KVM_MP_STATE_HALTED:
 					vcpu->arch.mp_state =
-							KVM_MP_STATE_RUNNABLE;
-			if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-				r = -EINTR;
+						KVM_MP_STATE_RUNNABLE;
+				case KVM_MP_STATE_RUNNABLE:
+					break;
+				case KVM_MP_STATE_SIPI_RECEIVED:
+				default:
+					r = -EINTR;
+					break;
+				}
+			}
 		}
 
-		if (r > 0) {
-			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_INTR;
-				++vcpu->stat.request_irq_exits;
-			}
-			if (signal_pending(current)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_INTR;
-				++vcpu->stat.signal_exits;
-			}
-			if (need_resched()) {
-				up_read(&vcpu->kvm->slots_lock);
-				kvm_resched(vcpu);
-				down_read(&vcpu->kvm->slots_lock);
-			}
+		if (r <= 0)
+			break;
+
+		clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+		if (kvm_cpu_has_pending_timer(vcpu))
+			kvm_inject_pending_timer_irqs(vcpu);
+
+		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+			r = -EINTR;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
+			++vcpu->stat.request_irq_exits;
+		}
+		if (signal_pending(current)) {
+			r = -EINTR;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
+			++vcpu->stat.signal_exits;
+		}
+		if (need_resched()) {
+			up_read(&vcpu->kvm->slots_lock);
+			kvm_resched(vcpu);
+			down_read(&vcpu->kvm->slots_lock);
 		}
 	}
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 934dd1c..a1a4272 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1611,11 +1611,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (kvm_cpu_has_interrupt(vcpu) ||
-		    kvm_cpu_has_pending_timer(vcpu) ||
-		    kvm_arch_vcpu_runnable(vcpu)) {
+				kvm_arch_vcpu_runnable(vcpu)) {
 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
 		}
+		if (kvm_cpu_has_pending_timer(vcpu))
+			break;
 		if (signal_pending(current))
 			break;
 
-- 
cgit v0.10.2


From 78646121e9a2fcf7977cc15966420e572a450bc3 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 23 Mar 2009 12:12:11 +0200
Subject: KVM: Fix interrupt unhalting a vcpu when it shouldn't

kvm_vcpu_block() unhalts vpu on an interrupt/timer without checking
if interrupt window is actually opened.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d2a90fd..3bf0a34 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1963,6 +1963,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.timer_fired;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9057335..2cf915e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 	return !!(v->arch.pending_exceptions);
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
 	return !(v->arch.msr & MSR_WE);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0189356..4ed4c3a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -318,6 +318,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4627627..8351c4d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -521,7 +521,7 @@ struct kvm_x86_ops {
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
-
+	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	int (*get_mt_mask_shift)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index aa528db..de74104 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2270,6 +2270,15 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
 }
 
+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+	return (vmcb->save.rflags & X86_EFLAGS_IF) &&
+		!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		(svm->vcpu.arch.hflags & HF_GIF_MASK);
+}
+
 static void svm_intr_assist(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2649,6 +2658,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
+	.interrupt_allowed = svm_interrupt_allowed,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da6461d..b9e06b0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2490,6 +2490,12 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu)
 				 GUEST_INTR_STATE_MOV_SS)));
 }
 
+static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	vmx_update_window_states(vcpu);
+	return vcpu->arch.interrupt_window_open;
+}
+
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
@@ -3691,7 +3697,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
-
+	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask_shift = vmx_get_mt_mask_shift,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8fca7a4..5bbcad3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4475,3 +4475,8 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
 	put_cpu();
 }
+
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	return kvm_x86_ops->interrupt_allowed(vcpu);
+}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 40e49ed..72d5684 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 
 void kvm_free_physmem(struct kvm *kvm);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a1a4272..63d5fa2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1610,7 +1610,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_cpu_has_interrupt(vcpu) ||
+		if ((kvm_arch_interrupt_allowed(vcpu) &&
+					kvm_cpu_has_interrupt(vcpu)) ||
 				kvm_arch_vcpu_runnable(vcpu)) {
 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
-- 
cgit v0.10.2


From 7d433b9f942606f66da8ef68b8baecd2915c5627 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 24 Mar 2009 14:27:47 +0200
Subject: KVM: VMX: Make flexpriority module parameter reflect hardware
 capability

If the hardware does not support flexpriority, zero the module parameter.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b9e06b0..37ae13d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -237,9 +237,7 @@ static inline int cpu_has_secondary_exec_ctrls(void)
 
 static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 {
-	return flexpriority_enabled
-		&& (vmcs_config.cpu_based_2nd_exec_ctrl &
-		    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+	return flexpriority_enabled;
 }
 
 static inline int cpu_has_vmx_invept_individual_addr(void)
@@ -1203,6 +1201,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (!cpu_has_vmx_ept())
 		enable_ept = 0;
 
+	if (!(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+		flexpriority_enabled = 0;
+
 	min = 0;
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
-- 
cgit v0.10.2


From f9c617f61127615dd054f3f159213bdd12451cab Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Mar 2009 10:08:52 +0800
Subject: KVM: VMX: Correct wrong vmcs field sizes

EXIT_QUALIFICATION and GUEST_LINEAR_ADDRESS are natural width, not 64-bit.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 37ae13d..aba41ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2991,7 +2991,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	kvm_mmu_invlpg(vcpu, exit_qualification);
 	skip_emulated_instruction(vcpu);
@@ -3007,11 +3007,11 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u64 exit_qualification;
+	unsigned long exit_qualification;
 	enum emulation_result er;
 	unsigned long offset;
 
-	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	offset = exit_qualification & 0xffful;
 
 	er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3062,11 +3062,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	u64 exit_qualification;
+	unsigned long exit_qualification;
 	gpa_t gpa;
 	int gla_validity;
 
-	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	if (exit_qualification & (1 << 6)) {
 		printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
@@ -3078,7 +3078,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
 		printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
 			(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
-			(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
+			vmcs_readl(GUEST_LINEAR_ADDRESS));
 		printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
 			(long unsigned int)exit_qualification);
 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-- 
cgit v0.10.2


From a8b876b1a469cb364fee16ba3aef01613a1231cc Mon Sep 17 00:00:00 2001
From: Eddie Dong <eddie.dong@intel.com>
Date: Thu, 26 Mar 2009 15:28:40 +0800
Subject: KVM: MMU: Fix comment in page_fault()

The original one is for the code before refactoring.

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 855eb71..eae9499 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -379,7 +379,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 		return r;
 
 	/*
-	 * Look up the shadow pte for the faulting address.
+	 * Look up the guest pte for the faulting address.
 	 */
 	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
 			     fetch_fault);
-- 
cgit v0.10.2


From 362c1055e58ecd25a9393c520ab263c80b147497 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.zhang@intel.com>
Date: Mon, 23 Mar 2009 03:31:04 -0400
Subject: KVM: ia64: enable external interrupt in vmm

Currently, the interrupt enable bit is cleared when in
the vmm.  This patch sets the bit and the external interrupts can
be dealt with when in the vmm.  This improves the I/O performance.

Signed-off-by: Yang Zhang <yang.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index b1dc809..a8f84da 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -652,20 +652,25 @@ void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
 		unsigned long isr, unsigned long iim)
 {
 	struct kvm_vcpu *v = current_vcpu;
+	long psr;
 
 	if (ia64_psr(regs)->cpl == 0) {
 		/* Allow hypercalls only when cpl = 0.  */
 		if (iim == DOMN_PAL_REQUEST) {
+			local_irq_save(psr);
 			set_pal_call_data(v);
 			vmm_transition(v);
 			get_pal_call_result(v);
 			vcpu_increment_iip(v);
+			local_irq_restore(psr);
 			return;
 		} else if (iim == DOMN_SAL_REQUEST) {
+			local_irq_save(psr);
 			set_sal_call_data(v);
 			vmm_transition(v);
 			get_sal_call_result(v);
 			vcpu_increment_iip(v);
+			local_irq_restore(psr);
 			return;
 		}
 	}
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 3ef1a01..40920c6 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic)
 	;;
 	srlz.i    // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -249,7 +249,7 @@ ENTRY(kvm_break_fault)
 	;;
 	srlz.i         // guarantee that interruption collection is on
 	;;
-	//(p15)ssm psr.i               // restore psr.i
+	(p15)ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -439,7 +439,7 @@ kvm_dispatch_vexirq:
 	;;
 	srlz.i // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	adds r3=8,r2                // set up second base pointer
 	;;
 	KVM_SAVE_REST
@@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch)
 	;;
 	srlz.i     // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
 	;;
 	KVM_SAVE_REST
@@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch)
 	;;
 	srlz.i   // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection)
 	;;
 	srlz.i   // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault)
 	;;
 	srlz.i    // guarantee that interruption collection is on
 	;;
-	//(p15) ssm psr.i               // restore psr.i
+	(p15) ssm psr.i               // restore psr.i
 	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
 	;;
 	KVM_SAVE_REST
@@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt)
 	;;
 	srlz.i
 	;;
-	//(p15) ssm psr.i
+	(p15) ssm psr.i
 	addl r14=@gprel(ia64_leave_hypervisor),gp
 	;;
 	KVM_SAVE_REST
@@ -1333,7 +1333,7 @@ hostret =   r24
 	;;
 (p7)    srlz.i
 	;;
-//(p6)    ssm psr.i
+(p6)    ssm psr.i
 	;;
 	mov rp=rpsave
 	mov ar.pfs=pfssave
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 2c2501f..4290a42 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
 			"(p7) st8 [%2]=r9;;"
 			"ssm psr.ic;;"
 			"srlz.d;;"
-			/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
+			"ssm psr.i;;"
+			"srlz.d;;"
 			: "=r"(ret) : "r"(iha), "r"(pte):"memory");
 
 	return ret;
-- 
cgit v0.10.2


From 82725b20e22fb85377f61a16f6d0d5cfc28b45d3 Mon Sep 17 00:00:00 2001
From: "Dong, Eddie" <eddie.dong@intel.com>
Date: Mon, 30 Mar 2009 16:21:08 +0800
Subject: KVM: MMU: Emulate #PF error code of reserved bits violation

Detect, indicate, and propagate page faults where reserved bits are set.
Take care to handle the different paging modes, each of which has different
sets of reserved bits.

[avi: fix pte reserved bits for efer.nxe=0]

Signed-off-by: Eddie Dong <eddie.dong@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8351c4d..548b97d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 
 	u64 *pae_root;
+	u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8aac67c..b2c8e28 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U << 0)
 #define PFERR_WRITE_MASK (1U << 1)
 #define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
 #define PFERR_FETCH_MASK (1U << 4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
 	shadow_trap_nonpresent_pte = trap_pte;
@@ -2151,6 +2157,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
 	nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+	int bit7;
+
+	bit7 = (gpte >> 7) & 1;
+	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2159,6 +2173,55 @@ static void paging_free(struct kvm_vcpu *vcpu)
 #include "paging_tmpl.h"
 #undef PTTYPE
 
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
+{
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	u64 exb_bit_rsvd = 0;
+
+	if (!is_nx(vcpu))
+		exb_bit_rsvd = rsvd_bits(63, 63);
+	switch (level) {
+	case PT32_ROOT_LEVEL:
+		/* no rsvd bits for 2 level 4K page table entries */
+		context->rsvd_bits_mask[0][1] = 0;
+		context->rsvd_bits_mask[0][0] = 0;
+		if (is_cpuid_PSE36())
+			/* 36bits PSE 4MB page */
+			context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+		else
+			/* 32 bits PSE 4MB page */
+			context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+		context->rsvd_bits_mask[1][0] = ~0ull;
+		break;
+	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62);		/* PDE */
+		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62); 	/* PTE */
+		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 62) |
+			rsvd_bits(13, 20);		/* large page */
+		context->rsvd_bits_mask[1][0] = ~0ull;
+		break;
+	case PT64_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51);
+		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+		context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
+		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+			rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+		context->rsvd_bits_mask[1][0] = ~0ull;
+		break;
+	}
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -2179,6 +2242,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+	reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
 	return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2186,6 +2250,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
+	reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -2201,6 +2266,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+	reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
@@ -2221,12 +2287,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
 		context->root_level = 0;
 	} else if (is_long_mode(vcpu)) {
+		reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 		context->root_level = PT64_ROOT_LEVEL;
 	} else if (is_pae(vcpu)) {
+		reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 		context->root_level = PT32E_ROOT_LEVEL;
 	} else {
+		reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
 		context->gva_to_gpa = paging32_gva_to_gpa;
 		context->root_level = PT32_ROOT_LEVEL;
 	}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index eae9499..09782a9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
+	int rsvd_fault = 0;
 
 	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
@@ -157,6 +158,10 @@ walk:
 		if (!is_present_pte(pte))
 			goto not_present;
 
+		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
+		if (rsvd_fault)
+			goto access_error;
+
 		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
@@ -232,6 +237,8 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
+	if (rsvd_fault)
+		walker->error_code |= PFERR_RSVD_MASK;
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5bbcad3..df86668 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3017,6 +3017,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 	return best;
 }
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best)
+		return best->eax & 0xff;
+	return 36;
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;
-- 
cgit v0.10.2


From 9645bb56b31a1b70ab9e470387b5264cafc04aa9 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 31 Mar 2009 11:31:54 +0300
Subject: KVM: MMU: Use different shadows when EFER.NXE changes

A pte that is shadowed when the guest EFER.NXE=1 is not valid when
EFER.NXE=0; if bit 63 is set, the pte should cause a fault, and since the
shadow EFER always has NX enabled, this won't happen.

Fix by using a different shadow page table for different EFER.NXE bits.  This
allows vcpus to run correctly with different values of EFER.NXE, and for
transitions on this bit to be handled correctly without requiring a full
flush.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 548b97d..3fc4623 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -185,6 +185,7 @@ union kvm_mmu_page_role {
 		unsigned access:3;
 		unsigned invalid:1;
 		unsigned cr4_pge:1;
+		unsigned nxe:1;
 	};
 };
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index df86668..007fadd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -523,6 +523,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	efer |= vcpu->arch.shadow_efer & EFER_LMA;
 
 	vcpu->arch.shadow_efer = efer;
+
+	vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
+	kvm_mmu_reset_context(vcpu);
 }
 
 void kvm_enable_efer_bits(u64 mask)
-- 
cgit v0.10.2


From 7a6ce84c74a3064f6b6dda9e2d55c509cd4e78e4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 31 Mar 2009 16:47:44 +0800
Subject: KVM: remove pointless conditional before kfree() in lapic
 initialization

Remove pointless conditional before kfree().

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 007fadd..7fe83fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1592,8 +1592,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = -EINVAL;
 	}
 out:
-	if (lapic)
-		kfree(lapic);
+	kfree(lapic);
 	return r;
 }
 
-- 
cgit v0.10.2


From 045471563df4b8723202a66ae54d58788b0f8e88 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 1 Apr 2009 15:52:31 +0800
Subject: KVM: VMX: Clean up Flex Priority related

And clean paranthes on returns.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aba41ae..1caa1fc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -216,61 +216,69 @@ static inline int is_external_interrupt(u32 intr_info)
 
 static inline int cpu_has_vmx_msr_bitmap(void)
 {
-	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS);
+	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
 }
 
 static inline int cpu_has_vmx_tpr_shadow(void)
 {
-	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
+	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
 }
 
 static inline int vm_need_tpr_shadow(struct kvm *kvm)
 {
-	return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
+	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
 }
 
 static inline int cpu_has_secondary_exec_ctrls(void)
 {
-	return (vmcs_config.cpu_based_exec_ctrl &
-		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+	return vmcs_config.cpu_based_exec_ctrl &
+		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 }
 
 static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 {
-	return flexpriority_enabled;
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+}
+
+static inline bool cpu_has_vmx_flexpriority(void)
+{
+	return cpu_has_vmx_tpr_shadow() &&
+		cpu_has_vmx_virtualize_apic_accesses();
 }
 
 static inline int cpu_has_vmx_invept_individual_addr(void)
 {
-	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
+	return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
 }
 
 static inline int cpu_has_vmx_invept_context(void)
 {
-	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
+	return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT);
 }
 
 static inline int cpu_has_vmx_invept_global(void)
 {
-	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
+	return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT);
 }
 
 static inline int cpu_has_vmx_ept(void)
 {
-	return (vmcs_config.cpu_based_2nd_exec_ctrl &
-		SECONDARY_EXEC_ENABLE_EPT);
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_EPT;
 }
 
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
-	return ((cpu_has_vmx_virtualize_apic_accesses()) &&
-		(irqchip_in_kernel(kvm)));
+	return flexpriority_enabled &&
+		(cpu_has_vmx_virtualize_apic_accesses()) &&
+		(irqchip_in_kernel(kvm));
 }
 
 static inline int cpu_has_vmx_vpid(void)
 {
-	return (vmcs_config.cpu_based_2nd_exec_ctrl &
-		SECONDARY_EXEC_ENABLE_VPID);
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_VPID;
 }
 
 static inline int cpu_has_virtual_nmis(void)
@@ -278,6 +286,11 @@ static inline int cpu_has_virtual_nmis(void)
 	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
 }
 
+static inline bool report_flexpriority(void)
+{
+	return flexpriority_enabled;
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
@@ -1201,7 +1214,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (!cpu_has_vmx_ept())
 		enable_ept = 0;
 
-	if (!(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
 
 	min = 0;
@@ -3655,7 +3668,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.check_processor_compatibility = vmx_check_processor_compat,
 	.hardware_enable = hardware_enable,
 	.hardware_disable = hardware_disable,
-	.cpu_has_accelerated_tpr = cpu_has_vmx_virtualize_apic_accesses,
+	.cpu_has_accelerated_tpr = report_flexpriority,
 
 	.vcpu_create = vmx_create_vcpu,
 	.vcpu_free = vmx_free_vcpu,
-- 
cgit v0.10.2


From 93ba03c2e2aba23b042cc15eef83b7a66d3ac17a Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 1 Apr 2009 15:52:32 +0800
Subject: KVM: VMX: Fix feature testing

The testing of feature is too early now, before vmcs_config complete initialization.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1caa1fc..7d7b0d6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1208,15 +1208,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		      vmx_capability.ept, vmx_capability.vpid);
 	}
 
-	if (!cpu_has_vmx_vpid())
-		enable_vpid = 0;
-
-	if (!cpu_has_vmx_ept())
-		enable_ept = 0;
-
-	if (!cpu_has_vmx_flexpriority())
-		flexpriority_enabled = 0;
-
 	min = 0;
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
@@ -1320,6 +1311,15 @@ static __init int hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+
+	if (!cpu_has_vmx_ept())
+		enable_ept = 0;
+
+	if (!cpu_has_vmx_flexpriority())
+		flexpriority_enabled = 0;
+
 	return alloc_kvm_area();
 }
 
-- 
cgit v0.10.2


From 20c466b56168ddccf034c136510d73e4a0e18605 Mon Sep 17 00:00:00 2001
From: "Dong, Eddie" <eddie.dong@intel.com>
Date: Tue, 31 Mar 2009 23:03:45 +0800
Subject: KVM: Use rsvd_bits_mask in load_pdptrs()

Also remove bit 5-6 from rsvd_bits_mask per latest SDM.

Signed-off-by: Eddie Dong <Eddie.Dong@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b2c8e28..da3ad3c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -225,11 +225,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
 	return vcpu->arch.shadow_efer & EFER_NX;
 }
 
-static int is_present_pte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 static int is_shadow_present_pte(u64 pte)
 {
 	return pte != shadow_trap_nonpresent_pte
@@ -2195,6 +2190,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 		context->rsvd_bits_mask[1][0] = ~0ull;
 		break;
 	case PT32E_ROOT_LEVEL:
+		context->rsvd_bits_mask[0][2] =
+			rsvd_bits(maxphyaddr, 63) |
+			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62);		/* PDE */
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index eaab214..3494a2f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+static inline int is_present_pte(unsigned long pte)
+{
+	return pte & PT_PRESENT_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7fe83fe..70ee81e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -234,7 +234,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
+		if (is_present_pte(pdpte[i]) &&
+		    (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
 			ret = 0;
 			goto out;
 		}
-- 
cgit v0.10.2


From 7b4a25cb296e2a73d2e87a4af65361d45d450a27 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 30 Mar 2009 16:03:08 +0300
Subject: KVM: VMX: Fix handling of a fault during NMI unblocked due to IRET

Bit 12 is undefined in any of the following cases:
 If the VM exit sets the valid bit in the IDT-vectoring information field.
 If the VM exit is due to a double fault.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7d7b0d6..631f9b7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3272,36 +3272,41 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
-	u32 idt_vectoring_info;
+	u32 idt_vectoring_info = vmx->idt_vectoring_info;
 	bool unblock_nmi;
 	u8 vector;
 	int type;
 	bool idtv_info_valid;
 	u32 error;
 
+	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	if (cpu_has_virtual_nmis()) {
 		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
 		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
 		/*
-		 * SDM 3: 25.7.1.2
+		 * SDM 3: 27.7.1.2 (September 2008)
 		 * Re-set bit "block by NMI" before VM entry if vmexit caused by
 		 * a guest IRET fault.
+		 * SDM 3: 23.2.2 (September 2008)
+		 * Bit 12 is undefined in any of the following cases:
+		 *  If the VM exit sets the valid bit in the IDT-vectoring
+		 *   information field.
+		 *  If the VM exit is due to a double fault.
 		 */
-		if (unblock_nmi && vector != DF_VECTOR)
+		if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+		    vector != DF_VECTOR && !idtv_info_valid)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
 	} else if (unlikely(vmx->soft_vnmi_blocked))
 		vmx->vnmi_blocked_time +=
 			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
-	idt_vectoring_info = vmx->idt_vectoring_info;
-	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
 	if (vmx->vcpu.arch.nmi_injected) {
 		/*
-		 * SDM 3: 25.7.1.2
+		 * SDM 3: 27.7.1.2 (September 2008)
 		 * Clear bit "block by NMI" before VM entry if a NMI delivery
 		 * faulted.
 		 */
-- 
cgit v0.10.2


From 37b96e988053c4dd21811b0408a12f8f60b4d0c8 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 30 Mar 2009 16:03:13 +0300
Subject: KVM: VMX: Rewrite vmx_complete_interrupt()'s twisted maze of if()
 statements

...with a more straightforward switch().

Also fix a bug when NMI could be dropped on exit. Although this should
never happen in practice, since NMIs can only be injected, never triggered
internally by the guest like exceptions.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 631f9b7..577aa95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3277,7 +3277,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	u8 vector;
 	int type;
 	bool idtv_info_valid;
-	u32 error;
 
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
@@ -3302,34 +3301,42 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		vmx->vnmi_blocked_time +=
 			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
+	vmx->vcpu.arch.nmi_injected = false;
+	kvm_clear_exception_queue(&vmx->vcpu);
+	kvm_clear_interrupt_queue(&vmx->vcpu);
+
+	if (!idtv_info_valid)
+		return;
+
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
-	if (vmx->vcpu.arch.nmi_injected) {
+
+	switch(type) {
+	case INTR_TYPE_NMI_INTR:
+		vmx->vcpu.arch.nmi_injected = true;
 		/*
 		 * SDM 3: 27.7.1.2 (September 2008)
-		 * Clear bit "block by NMI" before VM entry if a NMI delivery
-		 * faulted.
+		 * Clear bit "block by NMI" before VM entry if a NMI
+		 * delivery faulted.
 		 */
-		if (idtv_info_valid && type == INTR_TYPE_NMI_INTR)
-			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-					GUEST_INTR_STATE_NMI);
-		else
-			vmx->vcpu.arch.nmi_injected = false;
-	}
-	kvm_clear_exception_queue(&vmx->vcpu);
-	if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
-				type == INTR_TYPE_SOFT_EXCEPTION)) {
+		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+				GUEST_INTR_STATE_NMI);
+		break;
+	case INTR_TYPE_HARD_EXCEPTION:
+	case INTR_TYPE_SOFT_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
-			error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
-			kvm_queue_exception_e(&vmx->vcpu, vector, error);
+			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
+			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		vmx->idt_vectoring_info = 0;
-	}
-	kvm_clear_interrupt_queue(&vmx->vcpu);
-	if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
+		break;
+	case INTR_TYPE_EXT_INTR:
 		kvm_queue_interrupt(&vmx->vcpu, vector);
 		vmx->idt_vectoring_info = 0;
+		break;
+	default:
+		break;
 	}
 }
 
-- 
cgit v0.10.2


From 8843419048e500f8f38df555bca1bf7948804b7f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 30 Mar 2009 16:03:18 +0300
Subject: KVM: VMX: Do not zero idt_vectoring_info in
 vmx_complete_interrupts().

We will need it later in task_switch().
Code in handle_exception() is dead. is_external_interrupt(vect_info)
will always be false since idt_vectoring_info is zeroed in
vmx_complete_interrupts().

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 577aa95..e4ad9d3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2626,11 +2626,6 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
 		       "intr info 0x%x\n", __func__, vect_info, intr_info);
 
-	if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
-		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		kvm_push_irq(vcpu, irq);
-	}
-
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
 		return 1;  /* already handled by vmx_vcpu_run() */
 
@@ -3329,11 +3324,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
-		vmx->idt_vectoring_info = 0;
 		break;
 	case INTR_TYPE_EXT_INTR:
 		kvm_queue_interrupt(&vmx->vcpu, vector);
-		vmx->idt_vectoring_info = 0;
 		break;
 	default:
 		break;
-- 
cgit v0.10.2


From b237ac37a149e8b56436fabf093532483bff13b0 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 30 Mar 2009 16:03:24 +0300
Subject: KVM: Fix task switch back link handling.

Back link is written to a wrong TSS now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 70ee81e..adcf738 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3717,7 +3717,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
 	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
 	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
 	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
-	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
 }
 
 static int load_state_from_tss32(struct kvm_vcpu *vcpu,
@@ -3814,8 +3813,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
 }
 
 static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
-		       u32 old_tss_base,
-		       struct desc_struct *nseg_desc)
+			      u16 old_tss_sel, u32 old_tss_base,
+			      struct desc_struct *nseg_desc)
 {
 	struct tss_segment_16 tss_segment_16;
 	int ret = 0;
@@ -3834,6 +3833,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
 			   &tss_segment_16, sizeof tss_segment_16))
 		goto out;
 
+	if (old_tss_sel != 0xffff) {
+		tss_segment_16.prev_task_link = old_tss_sel;
+
+		if (kvm_write_guest(vcpu->kvm,
+				    get_tss_base_addr(vcpu, nseg_desc),
+				    &tss_segment_16.prev_task_link,
+				    sizeof tss_segment_16.prev_task_link))
+			goto out;
+	}
+
 	if (load_state_from_tss16(vcpu, &tss_segment_16))
 		goto out;
 
@@ -3843,7 +3852,7 @@ out:
 }
 
 static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
-		       u32 old_tss_base,
+		       u16 old_tss_sel, u32 old_tss_base,
 		       struct desc_struct *nseg_desc)
 {
 	struct tss_segment_32 tss_segment_32;
@@ -3863,6 +3872,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
 			   &tss_segment_32, sizeof tss_segment_32))
 		goto out;
 
+	if (old_tss_sel != 0xffff) {
+		tss_segment_32.prev_task_link = old_tss_sel;
+
+		if (kvm_write_guest(vcpu->kvm,
+				    get_tss_base_addr(vcpu, nseg_desc),
+				    &tss_segment_32.prev_task_link,
+				    sizeof tss_segment_32.prev_task_link))
+			goto out;
+	}
+
 	if (load_state_from_tss32(vcpu, &tss_segment_32))
 		goto out;
 
@@ -3918,12 +3937,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
 
+	/* set back link to prev task only if NT bit is set in eflags
+	   note that old_tss_sel is not used afetr this point */
+	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
+		old_tss_sel = 0xffff;
+
 	if (nseg_desc.type & 8)
-		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
-					 &nseg_desc);
+		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
+					 old_tss_base, &nseg_desc);
 	else
-		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
-					 &nseg_desc);
+		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
+					 old_tss_base, &nseg_desc);
 
 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
 		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
-- 
cgit v0.10.2


From 64a7ec066813443440bfc9f60a9e76a47cfa6b2b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 30 Mar 2009 16:03:29 +0300
Subject: KVM: Fix unneeded instruction skipping during task switching.

There is no need to skip instruction if the reason for a task switch
is a task gate in IDT and access to it is caused by an external even.
The problem  is currently solved only for VMX since there is no reliable
way to skip an instruction in SVM. We should emulate it instead.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 82ada75..85574b7 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb {
 #define SVM_EVTINJ_VALID_ERR (1 << 11)
 
 #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
 
 #define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
 #define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index de74104..bba67b7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1825,17 +1825,28 @@ static int task_switch_interception(struct vcpu_svm *svm,
 				    struct kvm_run *kvm_run)
 {
 	u16 tss_selector;
+	int reason;
+	int int_type = svm->vmcb->control.exit_int_info &
+		SVM_EXITINTINFO_TYPE_MASK;
 
 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
+
 	if (svm->vmcb->control.exit_info_2 &
 	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
-		return kvm_task_switch(&svm->vcpu, tss_selector,
-				       TASK_SWITCH_IRET);
-	if (svm->vmcb->control.exit_info_2 &
-	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
-		return kvm_task_switch(&svm->vcpu, tss_selector,
-				       TASK_SWITCH_JMP);
-	return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
+		reason = TASK_SWITCH_IRET;
+	else if (svm->vmcb->control.exit_info_2 &
+		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+		reason = TASK_SWITCH_JMP;
+	else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID)
+		reason = TASK_SWITCH_GATE;
+	else
+		reason = TASK_SWITCH_CALL;
+
+
+	if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT)
+		skip_emulated_instruction(&svm->vcpu);
+
+	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e4ad9d3..c6997c0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3038,22 +3038,40 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
-	int reason;
+	int reason, type, idt_v;
+
+	idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
+	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
-	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
-	    == INTR_TYPE_NMI_INTR) {
-		vcpu->arch.nmi_injected = false;
-		if (cpu_has_virtual_nmis())
-			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-				      GUEST_INTR_STATE_NMI);
+	if (reason == TASK_SWITCH_GATE && idt_v) {
+		switch (type) {
+		case INTR_TYPE_NMI_INTR:
+			vcpu->arch.nmi_injected = false;
+			if (cpu_has_virtual_nmis())
+				vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+					      GUEST_INTR_STATE_NMI);
+			break;
+		case INTR_TYPE_EXT_INTR:
+			kvm_clear_interrupt_queue(vcpu);
+			break;
+		case INTR_TYPE_HARD_EXCEPTION:
+		case INTR_TYPE_SOFT_EXCEPTION:
+			kvm_clear_exception_queue(vcpu);
+			break;
+		default:
+			break;
+		}
 	}
 	tss_selector = exit_qualification;
 
+	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
+		       type != INTR_TYPE_EXT_INTR &&
+		       type != INTR_TYPE_NMI_INTR))
+		skip_emulated_instruction(vcpu);
+
 	if (!kvm_task_switch(vcpu, tss_selector, reason))
 		return 0;
 
@@ -3306,7 +3324,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
 
-	switch(type) {
+	switch (type) {
 	case INTR_TYPE_NMI_INTR:
 		vmx->vcpu.arch.nmi_injected = true;
 		/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index adcf738..bb04f11 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3935,7 +3935,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
 	}
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	/* set back link to prev task only if NT bit is set in eflags
+	   note that old_tss_sel is not used afetr this point */
+	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
+		old_tss_sel = 0xffff;
 
 	/* set back link to prev task only if NT bit is set in eflags
 	   note that old_tss_sel is not used afetr this point */
-- 
cgit v0.10.2


From 4c26b4cd6ff6c3f7534f2aea9615a561c372ed05 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 2 Apr 2009 10:28:37 +0800
Subject: KVM: MMU: Discard reserved bits checking on PDE bit 7-8

1. It's related to a Linux kernel bug which fixed by Ingo on
07a66d7c53a538e1a9759954a82bb6c07365eff9. The original code exists for quite a
long time, and it would convert a PDE for large page into a normal PDE. But it
fail to fit normal PDE well.  With the code before Ingo's fix, the kernel would
fall reserved bit checking with bit 8 - the remaining global bit of PTE. So the
kernel would receive a double-fault.

2. After discussion, we decide to discard PDE bit 7-8 reserved checking for now.
For this marked as reserved in SDM, but didn't checked by the processor in
fact...

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index da3ad3c..b582add 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2194,7 +2194,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 			rsvd_bits(maxphyaddr, 63) |
 			rsvd_bits(7, 8) | rsvd_bits(1, 2);	/* PDPTE */
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 62);		/* PDE */
+			rsvd_bits(maxphyaddr, 62);	/* PDE */
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 62); 	/* PTE */
 		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
@@ -2208,13 +2208,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
 		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+			rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
 		context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
 		context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+			rsvd_bits(maxphyaddr, 51) |
+			rsvd_bits(13, 20);		/* large page */
 		context->rsvd_bits_mask[1][0] = ~0ull;
 		break;
 	}
-- 
cgit v0.10.2


From 2d033196541959d91802d5a62e63771448101557 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 2 Apr 2009 15:51:46 +0300
Subject: KVM: x86 emulator: fix call near emulation

The length of pushed on to the stack return address depends on operand
size not address size.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ca91749..d7c9f6f 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1792,7 +1792,6 @@ special_insn:
 		}
 		c->src.val = (unsigned long) c->eip;
 		jmp_rel(c, rel);
-		c->op_bytes = c->ad_bytes;
 		emulate_push(ctxt);
 		break;
 	}
-- 
cgit v0.10.2


From 2906e79f21a559e49611e1e188c4993cd45d9ce1 Mon Sep 17 00:00:00 2001
From: "Zhang, Xiantao" <xiantao.zhang@intel.com>
Date: Thu, 9 Apr 2009 21:37:28 +0800
Subject: KVM: ia64: make kvm depend on CONFIG_MODULES.

Since kvm-intel modue can't be built-in, make kvm depend on
CONFIG_MODULES.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 0a2d6b8..64d5209 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -23,7 +23,7 @@ if VIRTUALIZATION
 
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on HAVE_KVM && EXPERIMENTAL
+	depends on HAVE_KVM && MODULES && EXPERIMENTAL
 	# for device assignment:
 	depends on PCI
 	select PREEMPT_NOTIFIERS
-- 
cgit v0.10.2


From ede2ccc51742059d356d419260460cbbf3e36273 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 8 Apr 2009 13:14:19 -0300
Subject: KVM: PIT: fix count read and mode 0 handling

Commit 46ee278652f4cbd51013471b64c7897ba9bcd1b1 causes Solaris 10
to hang on boot.

Assuming that PIT counter reads should return 0 for an expired timer
is wrong: when it is active, the counter never stops (see comment on
__kpit_elapsed).

Also arm a one shot timer for mode 0.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index cf09bb6..4d6f0d2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,13 +104,18 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	ktime_t remaining;
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
 
+	/*
+	 * The Counter does not stop when it reaches zero. In
+	 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
+	 * the highest count, either FFFF hex for binary counting
+	 * or 9999 for BCD counting, and continues counting.
+	 * Modes 2 and 3 are periodic; the Counter reloads
+	 * itself with the initial count and continues counting
+	 * from there.
+	 */
 	remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
-	if (ktime_to_ns(remaining) < 0)
-		remaining = ktime_set(0, 0);
-
-	elapsed = ps->pit_timer.period;
-	if (ktime_to_ns(remaining) <= ps->pit_timer.period)
-		elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
+	elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
+	elapsed = mod_64(elapsed, ps->pit_timer.period);
 
 	return elapsed;
 }
@@ -280,7 +285,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 
 	/* TODO The new value only affected after the retriggered */
 	hrtimer_cancel(&pt->timer);
-	pt->period = (is_period == 0) ? 0 : interval;
+	pt->period = interval;
 	ps->is_periodic = is_period;
 
 	pt->timer.function = kvm_timer_fn;
@@ -304,10 +309,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 	pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
 
 	/*
-	 * Though spec said the state of 8254 is undefined after power-up,
-	 * seems some tricky OS like Windows XP depends on IRQ0 interrupt
-	 * when booting up.
-	 * So here setting initialize rate for it, and not a specific number
+	 * The largest possible initial count is 0; this is equivalent
+	 * to 216 for binary counting and 104 for BCD counting.
 	 */
 	if (val == 0)
 		val = 0x10000;
@@ -322,6 +325,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 	/* Two types of timer
 	 * mode 1 is one shot, mode 2 is period, otherwise del timer */
 	switch (ps->channels[0].mode) {
+	case 0:
 	case 1:
         /* FIXME: enhance mode 4 precision */
 	case 4:
-- 
cgit v0.10.2


From 2f8b9ee14eb439008e0c5131116ea6baa40dba50 Mon Sep 17 00:00:00 2001
From: nathan binkert <nate@binkert.org>
Date: Fri, 27 Mar 2009 21:53:05 -0700
Subject: KVM: Make kvm header C++ friendly

Two things needed fixing: 1) g++ does not allow a named structure type
within an anonymous union and 2) Avoid name clash between two padding
fields within the same struct by giving them different names as is
done elsewhere in the header.

Signed-off-by: Nathan Binkert <nate@binkert.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 644e3a9..3db5d8d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -119,7 +119,7 @@ struct kvm_run {
 			__u32 error_code;
 		} ex;
 		/* KVM_EXIT_IO */
-		struct kvm_io {
+		struct {
 #define KVM_EXIT_IO_IN  0
 #define KVM_EXIT_IO_OUT 1
 			__u8 direction;
@@ -224,10 +224,10 @@ struct kvm_interrupt {
 /* for KVM_GET_DIRTY_LOG */
 struct kvm_dirty_log {
 	__u32 slot;
-	__u32 padding;
+	__u32 padding1;
 	union {
 		void __user *dirty_bitmap; /* one bit per page */
-		__u64 padding;
+		__u64 padding2;
 	};
 };
 
-- 
cgit v0.10.2


From c2d0ee46e6e633a3c23ecbcb9b03ad731906cd79 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Sun, 5 Apr 2009 14:54:47 -0300
Subject: KVM: MMU: remove global page optimization logic

Complexity to fix it not worthwhile the gains, as discussed
in http://article.gmane.org/gmane.comp.emulators.kvm.devel/28649.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3fc4623..0e3a7c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -213,7 +213,6 @@ struct kvm_mmu_page {
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
-	bool global;
 	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
@@ -395,7 +394,6 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
-	struct list_head oos_global_pages;
 	struct iommu_domain *iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
@@ -425,7 +423,6 @@ struct kvm_vm_stat {
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
 	u32 mmu_unsync;
-	u32 mmu_unsync_global;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
@@ -640,7 +637,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b582add..b39ec62 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1075,18 +1075,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	return NULL;
 }
 
-static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	list_del(&sp->oos_link);
-	--kvm->stat.mmu_unsync_global;
-}
-
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
 	sp->unsync = 0;
-	if (sp->global)
-		kvm_unlink_unsync_global(kvm, sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1249,7 +1241,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
 	sp->gfn = gfn;
 	sp->role = role;
-	sp->global = 0;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!direct) {
 		if (rmap_write_protect(vcpu->kvm, gfn))
@@ -1647,11 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
 
-	if (sp->global) {
-		list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
-		++vcpu->kvm->stat.mmu_unsync_global;
-	} else
-		kvm_mmu_mark_parents_unsync(vcpu, sp);
+	kvm_mmu_mark_parents_unsync(vcpu, sp);
 
 	mmu_convert_notrap(sp);
 	return 0;
@@ -1678,21 +1665,12 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    int global, gfn_t gfn, pfn_t pfn, bool speculative,
+		    gfn_t gfn, pfn_t pfn, bool speculative,
 		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
 	u64 mt_mask = shadow_mt_mask;
-	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
-
-	if (!global && sp->global) {
-		sp->global = 0;
-		if (sp->unsync) {
-			kvm_unlink_unsync_global(vcpu->kvm, sp);
-			kvm_mmu_mark_parents_unsync(vcpu, sp);
-		}
-	}
 
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
@@ -1766,8 +1744,8 @@ set_pte:
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, int largepage, int global,
-			 gfn_t gfn, pfn_t pfn, bool speculative)
+			 int *ptwrite, int largepage, gfn_t gfn,
+			 pfn_t pfn, bool speculative)
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1796,7 +1774,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			was_rmapped = 1;
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, global, gfn, pfn, speculative, true)) {
+		      dirty, largepage, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1844,7 +1822,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 		    || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
 			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
 				     0, write, 1, &pt_write,
-				     largepage, 0, gfn, pfn, false);
+				     largepage, gfn, pfn, false);
 			++vcpu->stat.pf_fixed;
 			break;
 		}
@@ -2015,15 +1993,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void mmu_sync_global(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_mmu_page *sp, *n;
-
-	list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
-		kvm_sync_page(vcpu, sp);
-}
-
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2031,13 +2000,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
-{
-	spin_lock(&vcpu->kvm->mmu_lock);
-	mmu_sync_global(vcpu);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
 	return vaddr;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 09782a9..258e459 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -268,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
 		     gpte & PT_DIRTY_MASK, NULL, largepage,
-		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
-		     pfn, true);
+		     gpte_to_gfn(gpte), pfn, true);
 }
 
 /*
@@ -303,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 				     user_fault, write_fault,
 				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
 				     ptwrite, largepage,
-				     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
 				     gw->gfn, pfn, false);
 			break;
 		}
@@ -592,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
-			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
+			 is_dirty_pte(gpte), 0, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bb04f11..b5ac1b7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -108,7 +108,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
-	{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -322,7 +321,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
 
-	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
@@ -371,7 +369,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
 	vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
-	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -4364,7 +4361,6 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
-	INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-- 
cgit v0.10.2


From a5f868bd45a64ce1f502d228723a5b6c357790cd Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:14 +0300
Subject: KVM: x86 emulator: Add decoding of 16bit second immediate argument

Such as segment number in lcall/ljmp

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d7c9f6f..c015063 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -76,6 +76,7 @@
 #define Src2CL      (1<<29)
 #define Src2ImmByte (2<<29)
 #define Src2One     (3<<29)
+#define Src2Imm16   (4<<29)
 #define Src2Mask    (7<<29)
 
 enum {
@@ -1072,6 +1073,12 @@ done_prefixes:
 		c->src2.bytes = 1;
 		c->src2.val = insn_fetch(u8, 1, c->eip);
 		break;
+	case Src2Imm16:
+		c->src2.type = OP_IMM;
+		c->src2.ptr = (unsigned long *)c->eip;
+		c->src2.bytes = 2;
+		c->src2.val = insn_fetch(u16, 2, c->eip);
+		break;
 	case Src2One:
 		c->src2.bytes = 1;
 		c->src2.val = 1;
-- 
cgit v0.10.2


From 0654169e7309f2f68ec4bea9257b14b05ea94d7d Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:20 +0300
Subject: KVM: x86 emulator: Add lcall decoding

No emulation yet.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index c015063..71b4bee 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -154,7 +154,8 @@ static u32 opcode_table[256] = {
 	/* 0x90 - 0x97 */
 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
 	/* 0x98 - 0x9F */
-	0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
+	0, 0, SrcImm | Src2Imm16, 0,
+	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
 	/* 0xA0 - 0xA7 */
 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
 	ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
-- 
cgit v0.10.2


From 782b877c8073a9ef307ad6638ee472b8336b2b85 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:25 +0300
Subject: KVM: x86 emulator: Complete ljmp decoding at decode stage

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 71b4bee..8779cf2 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -193,7 +193,7 @@ static u32 opcode_table[256] = {
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	/* 0xE8 - 0xEF */
 	ImplicitOps | Stack, SrcImm | ImplicitOps,
-	ImplicitOps, SrcImmByte | ImplicitOps,
+	SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	/* 0xF0 - 0xF7 */
@@ -1805,30 +1805,15 @@ special_insn:
 	}
 	case 0xe9: /* jmp rel */
 		goto jmp;
-	case 0xea: /* jmp far */ {
-		uint32_t eip;
-		uint16_t sel;
-
-		switch (c->op_bytes) {
-		case 2:
-			eip = insn_fetch(u16, 2, c->eip);
-			break;
-		case 4:
-			eip = insn_fetch(u32, 4, c->eip);
-			break;
-		default:
-			DPRINTF("jmp far: Invalid op_bytes\n");
-			goto cannot_emulate;
-		}
-		sel = insn_fetch(u16, 2, c->eip);
-		if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
+	case 0xea: /* jmp far */
+		if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
+					VCPU_SREG_CS) < 0) {
 			DPRINTF("jmp far: Failed to load CS descriptor\n");
 			goto cannot_emulate;
 		}
 
-		c->eip = eip;
+		c->eip = c->src.val;
 		break;
-	}
 	case 0xeb:
 	      jmp:		/* jmp rel short */
 		jmp_rel(c, c->src.val);
-- 
cgit v0.10.2


From b2833e3cdebfe3ea4d0d1d3ce4d2ff1c42a4f8f4 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:30 +0300
Subject: KVM: x86 emulator: Complete short/near jcc decoding in decode stage

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 8779cf2..14b8ee2 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -136,11 +136,11 @@ static u32 opcode_table[256] = {
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
 	/* 0x70 - 0x77 */
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
 	/* 0x78 - 0x7F */
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
+	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
 	/* 0x80 - 0x87 */
 	Group | Group1_80, Group | Group1_81,
 	Group | Group1_82, Group | Group1_83,
@@ -232,10 +232,8 @@ static u32 twobyte_table[256] = {
 	/* 0x70 - 0x7F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0x80 - 0x8F */
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
+	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xA0 - 0xA7 */
@@ -1539,13 +1537,10 @@ special_insn:
 			return -1;
 		}
 		return 0;
-	case 0x70 ... 0x7f: /* jcc (short) */ {
-		int rel = insn_fetch(s8, 1, c->eip);
-
+	case 0x70 ... 0x7f: /* jcc (short) */
 		if (test_cc(c->b, ctxt->eflags))
-			jmp_rel(c, rel);
+			jmp_rel(c, c->src.val);
 		break;
-	}
 	case 0x80 ... 0x83:	/* Grp1 */
 		switch (c->modrm_reg) {
 		case 0:
@@ -2031,28 +2026,11 @@ twobyte_insn:
 		if (!test_cc(c->b, ctxt->eflags))
 			c->dst.type = OP_NONE; /* no writeback */
 		break;
-	case 0x80 ... 0x8f: /* jnz rel, etc*/ {
-		long int rel;
-
-		switch (c->op_bytes) {
-		case 2:
-			rel = insn_fetch(s16, 2, c->eip);
-			break;
-		case 4:
-			rel = insn_fetch(s32, 4, c->eip);
-			break;
-		case 8:
-			rel = insn_fetch(s64, 8, c->eip);
-			break;
-		default:
-			DPRINTF("jnz: Invalid op_bytes\n");
-			goto cannot_emulate;
-		}
+	case 0x80 ... 0x8f: /* jnz rel, etc*/
 		if (test_cc(c->b, ctxt->eflags))
-			jmp_rel(c, rel);
+			jmp_rel(c, c->src.val);
 		c->dst.type = OP_NONE;
 		break;
-	}
 	case 0xa3:
 	      bt:		/* bt */
 		c->dst.type = OP_NONE;
-- 
cgit v0.10.2


From d53c4777b3a3e5031710d0664851d1309325884b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:36 +0300
Subject: KVM: x86 emulator: Complete decoding of call near in decode stage

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 14b8ee2..4a9cd4c 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -192,7 +192,7 @@ static u32 opcode_table[256] = {
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	/* 0xE8 - 0xEF */
-	ImplicitOps | Stack, SrcImm | ImplicitOps,
+	SrcImm | Stack, SrcImm | ImplicitOps,
 	SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
@@ -1781,18 +1781,7 @@ special_insn:
 		io_dir_in = 0;
 		goto do_io;
 	case 0xe8: /* call (near) */ {
-		long int rel;
-		switch (c->op_bytes) {
-		case 2:
-			rel = insn_fetch(s16, 2, c->eip);
-			break;
-		case 4:
-			rel = insn_fetch(s32, 4, c->eip);
-			break;
-		default:
-			DPRINTF("Call: Invalid op_bytes\n");
-			goto cannot_emulate;
-		}
+		long int rel = c->src.val;
 		c->src.val = (unsigned long) c->eip;
 		jmp_rel(c, rel);
 		emulate_push(ctxt);
-- 
cgit v0.10.2


From 341de7e3728ade102eaadf56af404f4ce865a73d Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:41 +0300
Subject: KVM: x86 emulator: Add unsigned byte immediate decode

Extend "Source operand type" opcode description field to 4 bites
to accommodate new option.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 4a9cd4c..0988a13 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -59,13 +59,14 @@
 #define SrcImm      (5<<4)	/* Immediate operand. */
 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
 #define SrcOne      (7<<4)	/* Implied '1' */
-#define SrcMask     (7<<4)
+#define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
+#define SrcMask     (0xf<<4)
 /* Generic ModRM decode. */
-#define ModRM       (1<<7)
+#define ModRM       (1<<8)
 /* Destination is only written; never read. */
-#define Mov         (1<<8)
-#define BitOp       (1<<9)
-#define MemAbs      (1<<10)      /* Memory operand is absolute displacement */
+#define Mov         (1<<9)
+#define BitOp       (1<<10)
+#define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
 #define String      (1<<12)     /* String instruction (rep capable) */
 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
@@ -1044,10 +1045,14 @@ done_prefixes:
 		}
 		break;
 	case SrcImmByte:
+	case SrcImmUByte:
 		c->src.type = OP_IMM;
 		c->src.ptr = (unsigned long *)c->eip;
 		c->src.bytes = 1;
-		c->src.val = insn_fetch(s8, 1, c->eip);
+		if ((c->d & SrcMask) == SrcImmByte)
+			c->src.val = insn_fetch(s8, 1, c->eip);
+		else
+			c->src.val = insn_fetch(u8, 1, c->eip);
 		break;
 	case SrcOne:
 		c->src.bytes = 1;
-- 
cgit v0.10.2


From 84ce66a6865192567172f08ab5269aa380fa6ead Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:46 +0300
Subject: KVM: x86 emulator: Completely decode in/out at decoding stage

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 0988a13..c2f55ca 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -190,8 +190,8 @@ static u32 opcode_table[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xE0 - 0xE7 */
 	0, 0, 0, 0,
-	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
-	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
+	ByteOp | SrcImmUByte, SrcImmUByte,
+	ByteOp | SrcImmUByte, SrcImmUByte,
 	/* 0xE8 - 0xEF */
 	SrcImm | Stack, SrcImm | ImplicitOps,
 	SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
@@ -1777,12 +1777,12 @@ special_insn:
 		break;
 	case 0xe4: 	/* inb */
 	case 0xe5: 	/* in */
-		port = insn_fetch(u8, 1, c->eip);
+		port = c->src.val;
 		io_dir_in = 1;
 		goto do_io;
 	case 0xe6: /* outb */
 	case 0xe7: /* out */
-		port = insn_fetch(u8, 1, c->eip);
+		port = c->src.val;
 		io_dir_in = 0;
 		goto do_io;
 	case 0xe8: /* call (near) */ {
-- 
cgit v0.10.2


From e637b8238ad1783ebdd113bd34cd6982dec1006d Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:52 +0300
Subject: KVM: x86 emulator: Decode soft interrupt instructions

Do not emulate them yet.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index c2f55ca..d2664fc 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -181,7 +181,8 @@ static u32 opcode_table[256] = {
 	0, ImplicitOps | Stack, 0, 0,
 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
 	/* 0xC8 - 0xCF */
-	0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
+	0, 0, 0, ImplicitOps | Stack,
+	ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
 	/* 0xD0 - 0xD7 */
 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
-- 
cgit v0.10.2


From ba8afb6b0a2c7e06da760ffe5d078245058619b5 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:36:57 +0300
Subject: KVM: x86 emulator: Add new mode of instruction emulation: skip

In the new mode instruction is decoded, but not executed. The EIP
is moved to point after the instruction.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0e3a7c6..cb306cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -562,6 +562,7 @@ enum emulation_result {
 
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
+#define EMULTYPE_SKIP		    (1 << 2)
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			unsigned long cr2, u16 error_code, int emulation_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5ac1b7..8beccaa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2413,6 +2413,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 		}
 	}
 
+	if (emulation_type & EMULTYPE_SKIP) {
+		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
+		return EMULATE_DONE;
+	}
+
 	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 
 	if (vcpu->arch.pio.string)
-- 
cgit v0.10.2


From 8317c298eab14cc20e2de5289743ff0d4c5a6b30 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Apr 2009 13:37:02 +0300
Subject: KVM: SVM: Skip instruction on a task switch only when appropriate

If a task switch was initiated because off a task gate in IDT and IDT
was accessed because of an external even the instruction should not
be skipped.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index bba67b7..8fc6eea 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1828,6 +1828,7 @@ static int task_switch_interception(struct vcpu_svm *svm,
 	int reason;
 	int int_type = svm->vmcb->control.exit_int_info &
 		SVM_EXITINTINFO_TYPE_MASK;
+	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
 
 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
 
@@ -1843,8 +1844,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
 		reason = TASK_SWITCH_CALL;
 
 
-	if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT)
-		skip_emulated_instruction(&svm->vcpu);
+	if (reason != TASK_SWITCH_GATE ||
+	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
+	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
+	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
+		if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0,
+					EMULTYPE_SKIP) != EMULATE_DONE)
+			return 0;
+	}
 
 	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
-- 
cgit v0.10.2


From 463656c0007ddccee78db383eeb9e6eac75ccb7f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 12 Apr 2009 15:49:07 +0300
Subject: KVM: Replace kvmclock open-coded get_cpu_var() with the real thing

Suggested by Ingo Molnar.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8beccaa..ffbb2c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -631,16 +631,17 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	unsigned long flags;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
 	void *shared_kaddr;
+	unsigned long this_tsc_khz;
 
 	if ((!vcpu->time_page))
 		return;
 
-	preempt_disable();
-	if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
-		kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
-		vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+	this_tsc_khz = get_cpu_var(cpu_tsc_khz);
+	if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
+		kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = this_tsc_khz;
 	}
-	preempt_enable();
+	put_cpu_var(cpu_tsc_khz);
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
-- 
cgit v0.10.2


From c6b60c6921381130e5288b19f5fdf81152230b37 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 16 Apr 2009 10:43:48 +0200
Subject: KVM: ia64: Don't hold slots_lock in guest mode

Reorder locking to avoid holding the slots_lock when entering
the guest.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by : Xiantao Zhang<xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 3bf0a34..f127fb7 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -632,34 +632,22 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
 	vti_set_rr6(vcpu->arch.vmm_rr);
 	return kvm_insert_vmm_mapping(vcpu);
 }
+
 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
 {
 	kvm_purge_vmm_mapping(vcpu);
 	vti_set_rr6(vcpu->arch.host_rr6);
 }
 
-static int  vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	union context *host_ctx, *guest_ctx;
 	int r;
 
-	/*Get host and guest context with guest address space.*/
-	host_ctx = kvm_get_host_context(vcpu);
-	guest_ctx = kvm_get_guest_context(vcpu);
-
-	r = kvm_vcpu_pre_transition(vcpu);
-	if (r < 0)
-		goto out;
-	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-	kvm_vcpu_post_transition(vcpu);
-	r = 0;
-out:
-	return r;
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	int r;
+	/*
+	 * down_read() may sleep and return with interrupts enabled
+	 */
+	down_read(&vcpu->kvm->slots_lock);
 
 again:
 	if (signal_pending(current)) {
@@ -668,23 +656,28 @@ again:
 		goto out;
 	}
 
-	/*
-	 * down_read() may sleep and return with interrupts enabled
-	 */
-	down_read(&vcpu->kvm->slots_lock);
-
 	preempt_disable();
 	local_irq_disable();
 
+	/*Get host and guest context with guest address space.*/
+	host_ctx = kvm_get_host_context(vcpu);
+	guest_ctx = kvm_get_guest_context(vcpu);
+
 	vcpu->guest_mode = 1;
+
+	r = kvm_vcpu_pre_transition(vcpu);
+	if (r < 0)
+		goto vcpu_run_fail;
+
+	up_read(&vcpu->kvm->slots_lock);
 	kvm_guest_enter();
-	r = vti_vcpu_run(vcpu, kvm_run);
-	if (r < 0) {
-		local_irq_enable();
-		preempt_enable();
-		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-		goto out;
-	}
+
+	/*
+	 * Transition to the guest
+	 */
+	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+
+	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
 	vcpu->guest_mode = 0;
@@ -698,9 +691,10 @@ again:
 	 */
 	barrier();
 	kvm_guest_exit();
-	up_read(&vcpu->kvm->slots_lock);
 	preempt_enable();
 
+	down_read(&vcpu->kvm->slots_lock);
+
 	r = kvm_handle_exit(kvm_run, vcpu);
 
 	if (r > 0) {
@@ -709,12 +703,20 @@ again:
 	}
 
 out:
+	up_read(&vcpu->kvm->slots_lock);
 	if (r > 0) {
 		kvm_resched(vcpu);
+		down_read(&vcpu->kvm->slots_lock);
 		goto again;
 	}
 
 	return r;
+
+vcpu_run_fail:
+	local_irq_enable();
+	preempt_enable();
+	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+	goto out;
 }
 
 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 59839dfff5eabca01cc4e20b45797a60a80af8cb Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 16 Apr 2009 08:30:44 -0300
Subject: KVM: x86: check for cr3 validity in ioctl_set_sregs

Matt T. Yourst notes that kvm_arch_vcpu_ioctl_set_sregs lacks validity
checking for the new cr3 value:

"Userspace callers of KVM_SET_SREGS can pass a bogus value of cr3 to
the kernel. This will trigger a NULL pointer access in gfn_to_rmap()
when userspace next tries to call KVM_RUN on the affected VCPU and kvm
attempts to activate the new non-existent page table root.

This happens since kvm only validates that cr3 points to a valid guest
physical memory page when code *inside* the guest sets cr3. However, kvm
currently trusts the userspace caller (e.g. QEMU) on the host machine to
always supply a valid page table root, rather than properly validating
it along with the rest of the reloaded guest state."

http://sourceforge.net/tracker/?func=detail&atid=893831&aid=2687641&group_id=180599

Check for a valid cr3 address in kvm_arch_vcpu_ioctl_set_sregs, triple
fault in case of failure.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ffbb2c8..2bad49b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3993,7 +3993,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 	vcpu->arch.cr2 = sregs->cr2;
 	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
-	vcpu->arch.cr3 = sregs->cr3;
+
+	down_read(&vcpu->kvm->slots_lock);
+	if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
+		vcpu->arch.cr3 = sregs->cr3;
+	else
+		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+	up_read(&vcpu->kvm->slots_lock);
 
 	kvm_set_cr8(vcpu, sregs->cr8);
 
-- 
cgit v0.10.2


From 64f6afbd4c3317eb6845d770aa345afd3f120471 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Thu, 16 Apr 2009 17:59:16 +0800
Subject: KVM: ia64: Flush all TLBs once guest's memory mapping changes.

Flush all vcpu's TLB entries once changes guest's memory mapping.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f127fb7..9addca6 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1631,6 +1631,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
+	kvm_flush_remote_tlbs(kvm);
 }
 
 long kvm_arch_dev_ioctl(struct file *filp,
-- 
cgit v0.10.2


From f9b647adda0e821538b6d80e52873d861ffb1799 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 16 Apr 2009 11:24:58 +0200
Subject: KVM: ia64: remove empty function vti_vcpu_load()

vti_vcpu_load() doesn't do anything, so lets get rid of it.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by : Xiantao Zhang<xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 9addca6..7263171 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1100,10 +1100,6 @@ static void kvm_free_vmm_area(void)
 	}
 }
 
-static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 static int vti_init_vpd(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -1348,7 +1344,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->kvm = kvm;
 
 	cpu = get_cpu();
-	vti_vcpu_load(vcpu, cpu);
 	r = vti_vcpu_setup(vcpu, id);
 	put_cpu();
 
-- 
cgit v0.10.2


From 457459c3c738dfb37226ba116ba301140da0d1fb Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 16 Apr 2009 16:08:29 +0200
Subject: KVM: ia64: restore irq state before calling kvm_vcpu_init

Make sure to restore the psr after calling kvm_insert_vmm_mapping()
which calls ia64_itr_entry() as it disables local interrupts and
kvm_vcpu_init() may sleep.

Avoids a warning from the lock debugging code.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by : Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 7263171..5b868db 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1290,6 +1290,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
 
 	local_irq_save(psr);
 	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
 	if (r)
 		goto fail;
 	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
@@ -1307,13 +1308,11 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
 		goto uninit;
 
 	kvm_purge_vmm_mapping(vcpu);
-	local_irq_restore(psr);
 
 	return 0;
 uninit:
 	kvm_vcpu_uninit(vcpu);
 fail:
-	local_irq_restore(psr);
 	return r;
 }
 
-- 
cgit v0.10.2


From 4d13c3b04f14a9a72ffcdd082acc243e7e56b4e0 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 16 Apr 2009 16:53:13 +0200
Subject: KVM: ia64: preserve int status through call to kvm_insert_vmm_mapping

Preserve interrupt status around call to kvm_insert_vmm_mappin()
in kvm_vcpu_pre_transition().

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5b868db..cf5a193 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -619,6 +619,8 @@ static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
 {
+	unsigned long psr;
+	int r;
 	int cpu = smp_processor_id();
 
 	if (vcpu->arch.last_run_cpu != cpu ||
@@ -630,7 +632,10 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
 	vti_set_rr6(vcpu->arch.vmm_rr);
-	return kvm_insert_vmm_mapping(vcpu);
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+	return r;
 }
 
 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 43890ae8bcc5fa88ef3061613efe041b866fbc5a Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Fri, 17 Apr 2009 16:43:27 +0200
Subject: KVM: ia64: ia64 vcpu_reset() do not call kmalloc() with irqs disabled

Restore local irq enabled state before calling kvm_arch_vcpu_init(),
which calls kmalloc(GFP_KERNEL).

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index cf5a193..be4413e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -2001,6 +2001,7 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
 	long psr;
 	local_irq_save(psr);
 	r = kvm_insert_vmm_mapping(vcpu);
+	local_irq_restore(psr);
 	if (r)
 		goto fail;
 
@@ -2013,7 +2014,6 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_purge_vmm_mapping(vcpu);
 	r = 0;
 fail:
-	local_irq_restore(psr);
 	return r;
 }
 
-- 
cgit v0.10.2


From 3438253926822a6bf8487b4f7d82f26a2c0b2388 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Sat, 25 Apr 2009 12:43:21 +0200
Subject: KVM: MMU: Fix auditing code

Fix build breakage of hpa lookup in audit_mappings_page. Moreover, make
this function robust against shadow_notrap_nonpresent_pte entries.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b39ec62..3592aea 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3029,11 +3029,13 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
 				       " in nonleaf level: levels %d gva %lx"
 				       " level %d pte %llx\n", audit_msg,
 				       vcpu->arch.mmu.root_level, va, level, ent);
-
-			audit_mappings_page(vcpu, ent, va, level - 1);
+			else
+				audit_mappings_page(vcpu, ent, va, level - 1);
 		} else {
 			gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
-			hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
+			gfn_t gfn = gpa >> PAGE_SHIFT;
+			pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
+			hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
 
 			if (is_shadow_present_pte(ent)
 			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
-- 
cgit v0.10.2


From 8061823a25218174f30c3dd943989e1d72f7d06e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:44:56 +0300
Subject: KVM: Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip
 too

At the vector level, kernel and userspace irqchip are fairly similar.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index cf17ed5..11c2757 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -24,6 +24,7 @@
 
 #include "irq.h"
 #include "i8254.h"
+#include "x86.h"
 
 /*
  * check if there are pending timer events
@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
 	struct kvm_pic *s;
 
+	if (!irqchip_in_kernel(v->kvm))
+		return v->arch.irq_summary;
+
 	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
 		if (kvm_apic_accept_pic_intr(v)) {
 			s = pic_irqchip(v->kvm);	/* PIC */
@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 	struct kvm_pic *s;
 	int vector;
 
+	if (!irqchip_in_kernel(v->kvm))
+		return kvm_pop_irq(v);
+
 	vector = kvm_get_apic_interrupt(v);	/* APIC */
 	if (vector == -1) {
 		if (kvm_apic_accept_pic_intr(v)) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8fc6eea..6eef6d2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2091,8 +2091,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (kvm_run->request_interrupt_window &&
-	    !svm->vcpu.arch.irq_summary) {
+	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+	    kvm_run->request_interrupt_window &&
+	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
 		++svm->vcpu.stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -2373,7 +2374,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
 		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
 
-	if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
+	if (svm->vcpu.arch.interrupt_window_open &&
+	    kvm_cpu_has_interrupt(&svm->vcpu))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@@ -2383,7 +2385,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 * Interrupts blocked.  Wait for unblock.
 	 */
 	if (!svm->vcpu.arch.interrupt_window_open &&
-	    (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
+	    (kvm_cpu_has_interrupt(&svm->vcpu) ||
+	     kvm_run->request_interrupt_window))
 		svm_set_vintr(svm);
 	else
 		svm_clear_vintr(svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6997c0..b3292c1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2535,21 +2535,20 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		vmx_inject_nmi(vcpu);
 		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
-		else if (vcpu->arch.irq_summary
-			 || kvm_run->request_interrupt_window)
+		else if (kvm_cpu_has_interrupt(vcpu) ||
+			 kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
 	}
 
 	if (vcpu->arch.interrupt_window_open) {
-		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-			kvm_queue_interrupt(vcpu, kvm_pop_irq(vcpu));
+		if (kvm_cpu_has_interrupt(vcpu) && !vcpu->arch.interrupt.pending)
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 
 		if (vcpu->arch.interrupt.pending)
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-	}
-	if (!vcpu->arch.interrupt_window_open &&
-	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
+	} else if(kvm_cpu_has_interrupt(vcpu) ||
+		  kvm_run->request_interrupt_window)
 		enable_irq_window(vcpu);
 }
 
@@ -2976,8 +2975,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (kvm_run->request_interrupt_window &&
-	    !vcpu->arch.irq_summary) {
+	if (!irqchip_in_kernel(vcpu->kvm) &&
+	    kvm_run->request_interrupt_window &&
+	    !kvm_cpu_has_interrupt(vcpu)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2bad49b..4c2eb7c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3069,7 +3069,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->arch.irq_summary &&
+	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->arch.interrupt_window_open &&
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
@@ -3086,7 +3086,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 	else
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
-					 vcpu->arch.irq_summary == 0);
+					 !kvm_cpu_has_interrupt(vcpu));
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 863e8e658ee9ac6e5931b295eb7428456e450a0f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:44:57 +0300
Subject: KVM: VMX: Consolidate userspace and kernel interrupt injection for
 VMX

Use the same callback to inject irq/nmi events no matter what irqchip is
in use. Only from VMX for now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cb306cf..5edae35 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -517,7 +517,7 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
 	bool (*exception_injected)(struct kvm_vcpu *vcpu);
-	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
+	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6eef6d2..f2933ab 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2298,7 +2298,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 		(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
-static void svm_intr_assist(struct kvm_vcpu *vcpu)
+static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b3292c1..06252f7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2510,48 +2510,6 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 	return vcpu->arch.interrupt_window_open;
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
-{
-	vmx_update_window_states(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu) ||
-			 kvm_run->request_interrupt_window)
-			enable_irq_window(vcpu);
-		return;
-	}
-
-	if (vcpu->arch.interrupt_window_open) {
-		if (kvm_cpu_has_interrupt(vcpu) && !vcpu->arch.interrupt.pending)
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-
-		if (vcpu->arch.interrupt.pending)
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-	} else if(kvm_cpu_has_interrupt(vcpu) ||
-		  kvm_run->request_interrupt_window)
-		enable_irq_window(vcpu);
-}
-
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
@@ -3351,8 +3309,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_intr_assist(struct kvm_vcpu *vcpu)
+static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
+
 	update_tpr_threshold(vcpu);
 
 	vmx_update_window_states(vcpu);
@@ -3373,25 +3334,25 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 			return;
 		}
 	}
+
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
-		return;
+		goto out;
 	}
+
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-		else
-			enable_irq_window(vcpu);
 	}
-	if (vcpu->arch.interrupt.pending) {
+
+	if (vcpu->arch.interrupt.pending)
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
-	}
+
+out:
+	if (vcpu->arch.nmi_pending)
+		enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		enable_irq_window(vcpu);
 }
 
 /*
@@ -3733,7 +3694,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.queue_exception = vmx_queue_exception,
 	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
-	.inject_pending_vectors = do_interrupt_requests,
+	.inject_pending_vectors = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4c2eb7c..a84c96a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3173,7 +3173,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else if (irqchip_in_kernel(vcpu->kvm))
-		kvm_x86_ops->inject_pending_irq(vcpu);
+		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
 	else
 		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
 
-- 
cgit v0.10.2


From 1f21e79aaced0a041e9399346960ce26ae0f5a4e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:44:58 +0300
Subject: KVM: VMX: Cleanup vmx_intr_assist()

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06252f7..9eb518f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3309,6 +3309,34 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
+static void vmx_intr_inject(struct kvm_vcpu *vcpu)
+{
+	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		return;
+	}
+
+	if (vcpu->arch.interrupt.pending) {
+		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+		return;
+	}
+
+	/* try to inject new event if pending */
+	if (vcpu->arch.nmi_pending) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			vmx_inject_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
+		if (vcpu->arch.interrupt_window_open) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+		}
+	}
+}
+
 static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
@@ -3323,32 +3351,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 				GUEST_INTR_STATE_STI |
 				GUEST_INTR_STATE_MOV_SS);
 
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		goto out;
-	}
-
-	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open)
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-	}
-
-	if (vcpu->arch.interrupt.pending)
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	vmx_intr_inject(vcpu);
 
-out:
+	/* enable NMI/IRQ window open exits if needed */
 	if (vcpu->arch.nmi_pending)
 		enable_nmi_window(vcpu);
 	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-- 
cgit v0.10.2


From 5df56646472c42495dd2412c8d8aa72e59efe79a Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:44:59 +0300
Subject: KVM: Use kvm_arch_interrupt_allowed() instead of checking
 interrupt_window_open directly

kvm_arch_interrupt_allowed() also checks IF so drop the check.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a84c96a..ae6250b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3071,8 +3071,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 {
 	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
 		kvm_run->request_interrupt_window &&
-		vcpu->arch.interrupt_window_open &&
-		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
+		kvm_arch_interrupt_allowed(vcpu));
 }
 
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
@@ -3085,7 +3084,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 		kvm_run->ready_for_interrupt_injection = 1;
 	else
 		kvm_run->ready_for_interrupt_injection =
-					(vcpu->arch.interrupt_window_open &&
+					(kvm_arch_interrupt_allowed(vcpu) &&
 					 !kvm_cpu_has_interrupt(vcpu));
 }
 
-- 
cgit v0.10.2


From 9222be18f76b4410b4da0d06e1cc21079e64b3ec Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 23 Apr 2009 17:14:37 +0300
Subject: KVM: SVM: Coalesce userspace/kernel irqchip interrupt injection logic

Start to use interrupt/exception queues like VMX does.
This also fix the bug that if exit was caused by a guest
internal exception access to IDT the exception was not
reinjected.

Use EVENTINJ to inject interrupts.  Use VINT only for detecting when IRQ
windows is open again.  EVENTINJ ensures
the interrupt is injected immediately and not delayed.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f2933ab..a80ffaa 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -70,7 +70,6 @@ module_param(npt, int, S_IRUGO);
 static int nested = 0;
 module_param(nested, int, S_IRUGO);
 
-static void kvm_reput_irq(struct vcpu_svm *svm);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 
 static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
@@ -199,9 +198,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 
 static bool svm_exception_injected(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
+	return false;
 }
 
 static int is_external_interrupt(u32 info)
@@ -978,12 +975,9 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 
 static int svm_get_irq(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_svm *svm = to_svm(vcpu);
-	u32 exit_int_info = svm->vmcb->control.exit_int_info;
-
-	if (is_external_interrupt(exit_int_info))
-		return exit_int_info & SVM_EVTINJ_VEC_MASK;
-	return -1;
+	if (!vcpu->arch.interrupt.pending)
+		return -1;
+	return vcpu->arch.interrupt.nr;
 }
 
 static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1090,17 +1084,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
 static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	u32 exit_int_info = svm->vmcb->control.exit_int_info;
-	struct kvm *kvm = svm->vcpu.kvm;
 	u64 fault_address;
 	u32 error_code;
-	bool event_injection = false;
-
-	if (!irqchip_in_kernel(kvm) &&
-	    is_external_interrupt(exit_int_info)) {
-		event_injection = true;
-		kvm_push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
-	}
 
 	fault_address  = svm->vmcb->control.exit_info_2;
 	error_code = svm->vmcb->control.exit_info_1;
@@ -1120,9 +1105,11 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	 */
 	if (npt_enabled)
 		svm_flush_tlb(&svm->vcpu);
-
-	if (!npt_enabled && event_injection)
-		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+	else {
+		if (svm->vcpu.arch.interrupt.pending ||
+				svm->vcpu.arch.exception.pending)
+			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+	}
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
@@ -2196,7 +2183,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		}
 	}
 
-	kvm_reput_irq(svm);
 
 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2259,13 +2245,19 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
+static void svm_queue_irq(struct vcpu_svm *svm, unsigned nr)
+{
+	svm->vmcb->control.event_inj = nr |
+		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
+}
+
 static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	nested_svm_intr(svm);
 
-	svm_inject_irq(svm, irq);
+	svm_queue_irq(svm, irq);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -2298,98 +2290,47 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 		(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
-static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb *vmcb = svm->vmcb;
-	int intr_vector = -1;
-
-	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
-	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
-		intr_vector = vmcb->control.exit_int_info &
-			      SVM_EVTINJ_VEC_MASK;
-		vmcb->control.exit_int_info = 0;
-		svm_inject_irq(svm, intr_vector);
-		goto out;
-	}
-
-	if (vmcb->control.int_ctl & V_IRQ_MASK)
-		goto out;
-
-	if (!kvm_cpu_has_interrupt(vcpu))
-		goto out;
-
-	if (nested_svm_intr(svm))
-		goto out;
-
-	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
-		goto out;
-
-	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
-	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
-	    (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
-		/* unable to deliver irq, set pending irq */
-		svm_set_vintr(svm);
-		svm_inject_irq(svm, 0x0);
-		goto out;
-	}
-	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
-	intr_vector = kvm_cpu_get_interrupt(vcpu);
-	svm_inject_irq(svm, intr_vector);
-out:
-	update_cr8_intercept(vcpu);
+	svm_set_vintr(to_svm(vcpu));
+	svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
-static void kvm_reput_irq(struct vcpu_svm *svm)
+static void svm_intr_inject(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_control_area *control = &svm->vmcb->control;
-
-	if ((control->int_ctl & V_IRQ_MASK)
-	    && !irqchip_in_kernel(svm->vcpu.kvm)) {
-		control->int_ctl &= ~V_IRQ_MASK;
-		kvm_push_irq(&svm->vcpu, control->int_vector);
+	/* try to reinject previous events if any */
+	if (vcpu->arch.interrupt.pending) {
+		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
+		return;
 	}
 
-	svm->vcpu.arch.interrupt_window_open =
-		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vcpu.arch.hflags & HF_GIF_MASK);
-}
-
-static void svm_do_inject_vector(struct vcpu_svm *svm)
-{
-	svm_inject_irq(svm, kvm_pop_irq(&svm->vcpu));
+	/* try to inject new event if pending */
+	if (kvm_cpu_has_interrupt(vcpu)) {
+		if (vcpu->arch.interrupt_window_open) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
+		}
+	}
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
+static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb_control_area *control = &svm->vmcb->control;
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
 
 	if (nested_svm_intr(svm))
-		return;
+		goto out;
 
-	svm->vcpu.arch.interrupt_window_open =
-		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
-		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
+	svm->vcpu.arch.interrupt_window_open = svm_interrupt_allowed(vcpu);
 
-	if (svm->vcpu.arch.interrupt_window_open &&
-	    kvm_cpu_has_interrupt(&svm->vcpu))
-		/*
-		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
-		 */
-		svm_do_inject_vector(svm);
+	svm_intr_inject(vcpu);
 
-	/*
-	 * Interrupts blocked.  Wait for unblock.
-	 */
-	if (!svm->vcpu.arch.interrupt_window_open &&
-	    (kvm_cpu_has_interrupt(&svm->vcpu) ||
-	     kvm_run->request_interrupt_window))
-		svm_set_vintr(svm);
-	else
-		svm_clear_vintr(svm);
+	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		enable_irq_window(vcpu);
+
+out:
+	update_cr8_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2429,6 +2370,46 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
 }
 
+static void svm_complete_interrupts(struct vcpu_svm *svm)
+{
+	u8 vector;
+	int type;
+	u32 exitintinfo = svm->vmcb->control.exit_int_info;
+
+	svm->vcpu.arch.nmi_injected = false;
+	kvm_clear_exception_queue(&svm->vcpu);
+	kvm_clear_interrupt_queue(&svm->vcpu);
+
+	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
+		return;
+
+	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
+	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
+
+	switch (type) {
+	case SVM_EXITINTINFO_TYPE_NMI:
+		svm->vcpu.arch.nmi_injected = true;
+		break;
+	case SVM_EXITINTINFO_TYPE_EXEPT:
+		/* In case of software exception do not reinject an exception
+		   vector, but re-execute and instruction instead */
+		if (vector == BP_VECTOR || vector == OF_VECTOR)
+			break;
+		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
+			u32 err = svm->vmcb->control.exit_int_info_err;
+			kvm_queue_exception_e(&svm->vcpu, vector, err);
+
+		} else
+			kvm_queue_exception(&svm->vcpu, vector);
+		break;
+	case SVM_EXITINTINFO_TYPE_INTR:
+		kvm_queue_interrupt(&svm->vcpu, vector);
+		break;
+	default:
+		break;
+	}
+}
+
 #ifdef CONFIG_X86_64
 #define R "r"
 #else
@@ -2557,6 +2538,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	sync_cr8_to_lapic(vcpu);
 
 	svm->next_rip = 0;
+
+	svm_complete_interrupts(svm);
 }
 
 #undef R
@@ -2678,7 +2661,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.queue_exception = svm_queue_exception,
 	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
-	.inject_pending_vectors = do_interrupt_requests,
+	.inject_pending_vectors = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
 
 	.set_tss_addr = svm_set_tss_addr,
-- 
cgit v0.10.2


From 1cb948ae86f3d95cce58fac51d00766825f5f783 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:02 +0300
Subject: KVM: Remove exception_injected() callback.

It always return false for VMX/SVM now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5edae35..ea3741e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -516,7 +516,6 @@ struct kvm_x86_ops {
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
-	bool (*exception_injected)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a80ffaa..8fa5a0e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -196,11 +196,6 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	svm->vmcb->control.event_inj_err = error_code;
 }
 
-static bool svm_exception_injected(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-
 static int is_external_interrupt(u32 info)
 {
 	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -2659,7 +2654,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
 	.queue_exception = svm_queue_exception,
-	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
 	.inject_pending_vectors = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9eb518f..3186fcf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -789,11 +789,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
 }
 
-static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
@@ -3697,7 +3692,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
 	.queue_exception = vmx_queue_exception,
-	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
 	.inject_pending_vectors = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae6250b..b81970b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3237,8 +3237,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		profile_hit(KVM_PROFILING, (void *)rip);
 	}
 
-	if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
-		vcpu->arch.exception.pending = false;
 
 	kvm_lapic_sync_from_vapic(vcpu);
 
-- 
cgit v0.10.2


From 1d6ed0cb95a2f0839e1a31f1971dc37cd60c258a Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:03 +0300
Subject: KVM: Remove inject_pending_vectors() callback

It is the same as inject_pending_irq() for VMX/SVM now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ea3741e..aa5a54e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -517,8 +517,6 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
-	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
-				       struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8fa5a0e..2905475 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2655,7 +2655,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.set_irq = svm_set_irq,
 	.queue_exception = svm_queue_exception,
 	.inject_pending_irq = svm_intr_assist,
-	.inject_pending_vectors = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
 
 	.set_tss_addr = svm_set_tss_addr,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3186fcf..9162b4c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3693,7 +3693,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.set_irq = vmx_inject_irq,
 	.queue_exception = vmx_queue_exception,
 	.inject_pending_irq = vmx_intr_assist,
-	.inject_pending_vectors = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b81970b..0890df9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3171,10 +3171,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
-	else if (irqchip_in_kernel(vcpu->kvm))
-		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
 	else
-		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
+		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
 
 	kvm_lapic_sync_to_vapic(vcpu);
 
-- 
cgit v0.10.2


From 115666dfc733a6749d99bcf3b2fe3fa253218b36 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:04 +0300
Subject: KVM: Remove kvm_push_irq()

No longer used.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2ab6791..39350b2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -30,11 +30,4 @@ static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
 		clear_bit(word_index, &vcpu->arch.irq_summary);
 	return irq;
 }
-
-static inline void kvm_push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
-        set_bit(irq, vcpu->arch.irq_pending);
-        set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
-}
-
 #endif
-- 
cgit v0.10.2


From 615d5193055880d44db92b72403b7549251ac2a6 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:05 +0300
Subject: KVM: sync_lapic_to_cr8() should always sync cr8 to V_TPR

Even if IRQ chip is in userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2905475..143818e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2348,7 +2348,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
 
 	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
 		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
-		kvm_lapic_set_tpr(vcpu, cr8);
+		kvm_set_cr8(vcpu, cr8);
 	}
 }
 
@@ -2357,9 +2357,6 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 cr8;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
 	cr8 = kvm_get_cr8(vcpu);
 	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
-- 
cgit v0.10.2


From 0a5fff192388d2a74aa9ab5e0d394b745df9f225 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:06 +0300
Subject: KVM: Do not report TPR write to userspace if new value bigger or
 equal to a previous one.

Saves many exits to userspace in a case of IRQ chip in userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 143818e..e283a63 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1862,9 +1862,13 @@ static int emulate_on_interception(struct vcpu_svm *svm,
 
 static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
+	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
+	/* instruction emulation calls kvm_set_cr8() */
 	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
 	if (irqchip_in_kernel(svm->vcpu.kvm))
 		return 1;
+	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
+		return 1;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9162b4c..51f804c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2724,13 +2724,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
 			skip_emulated_instruction(vcpu);
 			return 1;
-		case 8:
-			kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg));
-			skip_emulated_instruction(vcpu);
-			if (irqchip_in_kernel(vcpu->kvm))
-				return 1;
-			kvm_run->exit_reason = KVM_EXIT_SET_TPR;
-			return 0;
+		case 8: {
+				u8 cr8_prev = kvm_get_cr8(vcpu);
+				u8 cr8 = kvm_register_read(vcpu, reg);
+				kvm_set_cr8(vcpu, cr8);
+				skip_emulated_instruction(vcpu);
+				if (irqchip_in_kernel(vcpu->kvm))
+					return 1;
+				if (cr8_prev <= cr8)
+					return 1;
+				kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+				return 0;
+			}
 		};
 		break;
 	case 2: /* clts */
-- 
cgit v0.10.2


From c4282df98ae0993983924c00ed76428a6609d68b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:07 +0300
Subject: KVM: Get rid of arch.interrupt_window_open & arch.nmi_window_open

They are recalculated before each use anyway.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa5a54e..53533ea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -266,7 +266,6 @@ struct kvm_mmu {
 
 struct kvm_vcpu_arch {
 	u64 host_tsc;
-	int interrupt_window_open;
 	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
 	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
 	/*
@@ -360,7 +359,6 @@ struct kvm_vcpu_arch {
 
 	bool nmi_pending;
 	bool nmi_injected;
-	bool nmi_window_open;
 
 	struct mtrr_state_type mtrr_state;
 	u32 pat;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e283a63..0f53439 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -216,8 +216,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
 	kvm_rip_write(vcpu, svm->next_rip);
 	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-
-	vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static int has_svm(void)
@@ -2305,7 +2303,7 @@ static void svm_intr_inject(struct kvm_vcpu *vcpu)
 
 	/* try to inject new event if pending */
 	if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (svm_interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
 		}
@@ -2321,8 +2319,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (nested_svm_intr(svm))
 		goto out;
 
-	svm->vcpu.arch.interrupt_window_open = svm_interrupt_allowed(vcpu);
-
 	svm_intr_inject(vcpu);
 
 	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51f804c..116eac0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -753,7 +753,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	if (interruptibility & 3)
 		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
 			     interruptibility & ~3);
-	vcpu->arch.interrupt_window_open = 1;
 }
 
 static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -2482,27 +2481,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
-static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-
-	vcpu->arch.nmi_window_open =
-		!(guest_intr & (GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS |
-				GUEST_INTR_STATE_NMI));
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
-		vcpu->arch.nmi_window_open = 0;
+		return 0;
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 !(guest_intr & (GUEST_INTR_STATE_STI |
-				 GUEST_INTR_STATE_MOV_SS)));
+	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
 }
 
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	vmx_update_window_states(vcpu);
-	return vcpu->arch.interrupt_window_open;
+	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -3194,9 +3187,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		       __func__, vectoring_info, exit_reason);
 
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->soft_vnmi_blocked = 0;
-			vcpu->arch.nmi_window_open = 1;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
 			   vcpu->arch.nmi_pending) {
 			/*
@@ -3209,7 +3201,6 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			       "state on VCPU %d after 1 s timeout\n",
 			       __func__, vcpu->vcpu_id);
 			vmx->soft_vnmi_blocked = 0;
-			vmx->vcpu.arch.nmi_window_open = 1;
 		}
 	}
 
@@ -3324,13 +3315,13 @@ static void vmx_intr_inject(struct kvm_vcpu *vcpu)
 
 	/* try to inject new event if pending */
 	if (vcpu->arch.nmi_pending) {
-		if (vcpu->arch.nmi_window_open) {
+		if (vmx_nmi_allowed(vcpu)) {
 			vcpu->arch.nmi_pending = false;
 			vcpu->arch.nmi_injected = true;
 			vmx_inject_nmi(vcpu);
 		}
 	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (vmx_interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
 		}
@@ -3344,8 +3335,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	update_tpr_threshold(vcpu);
 
-	vmx_update_window_states(vcpu);
-
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_STI |
@@ -3518,8 +3507,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vmx_update_window_states(vcpu);
-
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
 
-- 
cgit v0.10.2


From 95ba82731374eb1c2af4dd442526c4b314f0e8b6 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:08 +0300
Subject: KVM: SVM: Add NMI injection support

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53533ea..dd9ecd3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -512,10 +512,15 @@ struct kvm_x86_ops {
 				unsigned char *hypercall_addr);
 	int (*get_irq)(struct kvm_vcpu *vcpu);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
-	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
+	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+	void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	int (*get_mt_mask_shift)(void);
@@ -763,6 +768,7 @@ enum {
 #define HF_GIF_MASK		(1 << 0)
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
+#define HF_NMI_MASK		(1 << 3)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0f53439..1807288 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1843,6 +1843,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	++svm->vcpu.stat.nmi_window_exits;
+	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	return 1;
+}
+
 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@@ -1863,8 +1871,10 @@ static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
 	/* instruction emulation calls kvm_set_cr8() */
 	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
-	if (irqchip_in_kernel(svm->vcpu.kvm))
+	if (irqchip_in_kernel(svm->vcpu.kvm)) {
+		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
 		return 1;
+	}
 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return 1;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@@ -2120,6 +2130,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
 	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
 	[SVM_EXIT_CPUID]			= cpuid_interception,
+	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
 	[SVM_EXIT_HLT]				= halt_interception,
 	[SVM_EXIT_INVLPG]			= invlpg_interception,
@@ -2227,6 +2238,21 @@ static void pre_svm_run(struct vcpu_svm *svm)
 		new_asid(svm, svm_data);
 }
 
+static void svm_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+}
+
+static void svm_inject_nmi(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+	vcpu->arch.hflags |= HF_NMI_MASK;
+	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+	++vcpu->stat.nmi_injections;
+}
 
 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
@@ -2242,8 +2268,10 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
-static void svm_queue_irq(struct vcpu_svm *svm, unsigned nr)
+static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	svm->vmcb->control.event_inj = nr |
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
@@ -2254,28 +2282,26 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 
 	nested_svm_intr(svm);
 
-	svm_queue_irq(svm, irq);
+	svm_queue_irq(vcpu, irq);
 }
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb *vmcb = svm->vmcb;
-	int max_irr, tpr;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
+	if (irr == -1)
 		return;
 
-	vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
-
-	max_irr = kvm_lapic_find_highest_irr(vcpu);
-	if (max_irr == -1)
-		return;
-
-	tpr = kvm_lapic_get_cr8(vcpu) << 4;
+	if (tpr >= irr)
+		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+}
 
-	if (tpr >= (max_irr & 0xf0))
-		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
 }
 
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
@@ -2293,39 +2319,12 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 	svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
-static void svm_intr_inject(struct kvm_vcpu *vcpu)
-{
-	/* try to reinject previous events if any */
-	if (vcpu->arch.interrupt.pending) {
-		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-		return;
-	}
-
-	/* try to inject new event if pending */
-	if (kvm_cpu_has_interrupt(vcpu)) {
-		if (svm_interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-		}
-	}
-}
-
-static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
 
-	if (nested_svm_intr(svm))
-		goto out;
-
-	svm_intr_inject(vcpu);
-
-	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
 		enable_irq_window(vcpu);
-
-out:
-	update_cr8_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2650,9 +2649,14 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.patch_hypercall = svm_patch_hypercall,
 	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
+	.set_nmi = svm_inject_nmi,
 	.queue_exception = svm_queue_exception,
-	.inject_pending_irq = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
+	.nmi_allowed = svm_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
+	.drop_interrupt_shadow = svm_drop_interrupt_shadow,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 116eac0..bad2413 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1314,6 +1314,9 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
 
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
 	return alloc_kvm_area();
 }
 
@@ -2404,6 +2407,12 @@ out:
 	return ret;
 }
 
+void vmx_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+	vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+			GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
+}
+
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
@@ -3214,21 +3223,14 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void update_tpr_threshold(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
-	int max_irr, tpr;
-
-	if (!vm_need_tpr_shadow(vcpu->kvm))
-		return;
-
-	if (!kvm_lapic_enabled(vcpu) ||
-	    ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {
+	if (irr == -1 || tpr < irr) {
 		vmcs_write32(TPR_THRESHOLD, 0);
 		return;
 	}
 
-	tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4;
-	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
+	vmcs_write32(TPR_THRESHOLD, irr);
 }
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
@@ -3300,55 +3302,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_intr_inject(struct kvm_vcpu *vcpu)
-{
-	/* try to reinject previous events if any */
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		return;
-	}
-
-	if (vcpu->arch.interrupt.pending) {
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		return;
-	}
-
-	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (vmx_nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-			vmx_inject_nmi(vcpu);
-		}
-	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		}
-	}
-}
-
-static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	update_tpr_threshold(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	vmx_intr_inject(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		enable_irq_window(vcpu);
-}
-
 /*
  * Failure to inject an interrupt should give us the information
  * in IDT_VECTORING_INFO_FIELD.  However, if the failure occurs
@@ -3683,9 +3636,15 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.patch_hypercall = vmx_patch_hypercall,
 	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
+	.set_nmi = vmx_inject_nmi,
 	.queue_exception = vmx_queue_exception,
-	.inject_pending_irq = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
+	.nmi_allowed = vmx_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
+	.drop_interrupt_shadow = vmx_drop_interrupt_shadow,
+
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask_shift = vmx_get_mt_mask_shift,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0890df9..96e995c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3114,6 +3114,68 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	up_read(&vcpu->kvm->slots_lock);
 }
 
+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+	int max_irr, tpr;
+
+	if (!kvm_x86_ops->update_cr8_intercept)
+		return;
+
+	max_irr = kvm_lapic_find_highest_irr(vcpu);
+
+	if (max_irr != -1)
+		max_irr >>= 4;
+
+	tpr = kvm_lapic_get_cr8(vcpu);
+
+	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+}
+
+static void inject_irq(struct kvm_vcpu *vcpu)
+{
+	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		kvm_x86_ops->set_nmi(vcpu);
+		return;
+	}
+
+	if (vcpu->arch.interrupt.pending) {
+		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		return;
+	}
+
+	/* try to inject new event if pending */
+	if (vcpu->arch.nmi_pending) {
+		if (kvm_x86_ops->nmi_allowed(vcpu)) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			kvm_x86_ops->set_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
+		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		}
+	}
+}
+
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		kvm_x86_ops->drop_interrupt_shadow(vcpu);
+
+	inject_irq(vcpu);
+
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		kvm_x86_ops->enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		kvm_x86_ops->enable_irq_window(vcpu);
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -3172,9 +3234,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
-		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
+		inject_pending_irq(vcpu, kvm_run);
 
-	kvm_lapic_sync_to_vapic(vcpu);
+	if (kvm_lapic_enabled(vcpu)) {
+		if (!vcpu->arch.apic->vapic_addr)
+			update_cr8_intercept(vcpu);
+		else
+			kvm_lapic_sync_to_vapic(vcpu);
+	}
 
 	up_read(&vcpu->kvm->slots_lock);
 
-- 
cgit v0.10.2


From 16d7a191170f0ca48c2c3277017b3e6d275e0711 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:10 +0300
Subject: KVM: Fix userspace IRQ chip migration

Re-put pending IRQ vector into interrupt_bitmap before migration.
Otherwise it will be lost if migration happens in the wrong time.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 96e995c..6391721 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3566,17 +3566,17 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->arch.shadow_efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (irqchip_in_kernel(vcpu->kvm))
 		memset(sregs->interrupt_bitmap, 0,
 		       sizeof sregs->interrupt_bitmap);
-		pending_vec = kvm_x86_ops->get_irq(vcpu);
-		if (pending_vec >= 0)
-			set_bit(pending_vec,
-				(unsigned long *)sregs->interrupt_bitmap);
-	} else
+	else
 		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
 		       sizeof sregs->interrupt_bitmap);
 
+	pending_vec = kvm_x86_ops->get_irq(vcpu);
+	if (pending_vec >= 0)
+		set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);
+
 	vcpu_put(vcpu);
 
 	return 0;
-- 
cgit v0.10.2


From 14d0bc1f7c8226d5088e7182c3b53e0c7e91d1af Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 21 Apr 2009 17:45:11 +0300
Subject: KVM: Get rid of get_irq() callback

It just returns pending IRQ vector from the queue for VMX/SVM.
Get IRQ directly from the queue before migration and put it back
after.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd9ecd3..3e94d05 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -510,7 +510,6 @@ struct kvm_x86_ops {
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	int (*get_irq)(struct kvm_vcpu *vcpu);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1807288..d96a6d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -966,13 +966,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 	return 0;
 }
 
-static int svm_get_irq(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->arch.interrupt.pending)
-		return -1;
-	return vcpu->arch.interrupt.nr;
-}
-
 static void load_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
@@ -2647,7 +2640,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.handle_exit = handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
 	.patch_hypercall = svm_patch_hypercall,
-	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
 	.set_nmi = svm_inject_nmi,
 	.queue_exception = svm_queue_exception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bad2413..25be53a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1045,13 +1045,6 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 	return 0;
 }
 
-static int vmx_get_irq(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->arch.interrupt.pending)
-		return -1;
-	return vcpu->arch.interrupt.nr;
-}
-
 static __init int cpu_has_kvm_support(void)
 {
 	return cpu_has_vmx();
@@ -3634,7 +3627,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
 	.patch_hypercall = vmx_patch_hypercall,
-	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
 	.set_nmi = vmx_inject_nmi,
 	.queue_exception = vmx_queue_exception,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6391721..0f3e04b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3536,7 +3536,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
 	struct descriptor_table dt;
-	int pending_vec;
 
 	vcpu_load(vcpu);
 
@@ -3573,9 +3572,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
 		       sizeof sregs->interrupt_bitmap);
 
-	pending_vec = kvm_x86_ops->get_irq(vcpu);
-	if (pending_vec >= 0)
-		set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);
+	if (vcpu->arch.interrupt.pending)
+		set_bit(vcpu->arch.interrupt.nr,
+			(unsigned long *)sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -4097,9 +4096,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 			max_bits);
 		/* Only pending external irq is handled here */
 		if (pending_vec < max_bits) {
-			kvm_x86_ops->set_irq(vcpu, pending_vec);
-			pr_debug("Set back pending irq %d\n",
-				 pending_vec);
+			kvm_queue_interrupt(vcpu, pending_vec);
+			pr_debug("Set back pending irq %d\n", pending_vec);
 		}
 		kvm_pic_clear_isr_ack(vcpu->kvm);
 	}
-- 
cgit v0.10.2


From 9b62e5b10ff0f98346bcbe4a4fe3a0ca8fa7be30 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Tue, 7 Apr 2009 23:58:56 +0000
Subject: KVM: Wake up waitqueue before calling get_cpu()

This moves the get_cpu() call down to be called after we wake up the
waiters. Therefore the waitqueue locks can safely be rt mutex.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Sven-Thorsten Dietrich <sven@thebigcorporation.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0f3e04b..e271371 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4569,7 +4569,7 @@ static void vcpu_kick_intr(void *info)
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
 	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
@@ -4579,6 +4579,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 	 * We may be called synchronously with irqs disabled in guest mode,
 	 * So need not to call smp_call_function_single() in that case.
 	 */
+	cpu = get_cpu();
 	if (vcpu->guest_mode && vcpu->cpu != cpu)
 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
 	put_cpu();
-- 
cgit v0.10.2


From 4b12f0de33a64dfc624b2480f55b674f7fa23ef2 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 27 Apr 2009 20:35:42 +0800
Subject: KVM: Replace get_mt_mask_shift with get_mt_mask

Shadow_mt_mask is out of date, now it have only been used as a flag to indicate
if TDP enabled. Get rid of it and use tdp_enabled instead.

Also put memory type logical in kvm_x86_ops->get_mt_mask().

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3e94d05..8a6f6b6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -522,7 +522,7 @@ struct kvm_x86_ops {
 	void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
-	int (*get_mt_mask_shift)(void);
+	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -536,7 +536,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 void kvm_mmu_set_base_ptes(u64 base_pte);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -550,6 +550,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3592aea..bc614f9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -178,7 +178,6 @@ static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mt_mask;
 
 static inline u64 rsvd_bits(int s, int e)
 {
@@ -199,14 +198,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
-	shadow_mt_mask = mt_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -1608,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
 	return mtrr_state->def_type;
 }
 
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	u8 mtrr;
 
@@ -1618,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 		mtrr = MTRR_TYPE_WRBACK;
 	return mtrr;
 }
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
 
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -1670,7 +1669,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 {
 	u64 spte;
 	int ret = 0;
-	u64 mt_mask = shadow_mt_mask;
 
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
@@ -1690,16 +1688,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
-	if (mt_mask) {
-		if (!kvm_is_mmio_pfn(pfn)) {
-			mt_mask = get_memory_type(vcpu, gfn) <<
-				kvm_x86_ops->get_mt_mask_shift();
-			mt_mask |= VMX_EPT_IGMT_BIT;
-		} else
-			mt_mask = MTRR_TYPE_UNCACHABLE <<
-				kvm_x86_ops->get_mt_mask_shift();
-		spte |= mt_mask;
-	}
+	if (tdp_enabled)
+		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+			kvm_is_mmio_pfn(pfn));
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d96a6d3..6350378 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2589,7 +2589,7 @@ static int get_npt_level(void)
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
 	return 0;
 }
@@ -2652,7 +2652,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
-	.get_mt_mask_shift = svm_get_mt_mask_shift,
+	.get_mt_mask = svm_get_mt_mask,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 25be53a..59b080c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3577,9 +3577,17 @@ static int get_ept_level(void)
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
-	return VMX_EPT_MT_EPTE_SHIFT;
+	u64 ret;
+
+	if (is_mmio)
+		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+	else
+		ret = (kvm_get_guest_memory_type(vcpu, gfn) <<
+			VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IGMT_BIT;
+
+	return ret;
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -3639,7 +3647,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
-	.get_mt_mask_shift = vmx_get_mt_mask_shift,
+	.get_mt_mask = vmx_get_mt_mask,
 };
 
 static int __init vmx_init(void)
@@ -3698,8 +3706,7 @@ static int __init vmx_init(void)
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-				VMX_EPT_EXECUTABLE_MASK,
-				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();
 	} else
 		kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e271371..dd05682 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2772,7 +2772,7 @@ int kvm_arch_init(void *opaque)
 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+			PT_DIRTY_MASK, PT64_NX_MASK, 0);
 
 	for_each_possible_cpu(cpu)
 		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
-- 
cgit v0.10.2


From 522c68c4416de3cd3e11a9ff10d58e776a69ae1e Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 27 Apr 2009 20:35:43 +0800
Subject: KVM: Enable snooping control for supported hardware

Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.

[avi: fix build on ia64]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 589536f..5f43697 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -474,6 +474,7 @@ struct kvm_arch {
 
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
+	int iommu_flags;
 	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8a6f6b6..253d8f6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -393,6 +393,7 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
+	int iommu_flags;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 59b080c..e8a5649 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3581,11 +3581,26 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
 	u64 ret;
 
+	/* For VT-d and EPT combination
+	 * 1. MMIO: always map as UC
+	 * 2. EPT with VT-d:
+	 *   a. VT-d without snooping control feature: can't guarantee the
+	 *	result, try to trust guest.
+	 *   b. VT-d with snooping control feature: snooping control feature of
+	 *	VT-d engine can guarantee the cache correctness. Just set it
+	 *	to WB to keep consistent with host. So the same as item 3.
+	 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+	 *    consistent with host MTRR
+	 */
 	if (is_mmio)
 		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+	else if (vcpu->kvm->arch.iommu_domain &&
+		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+		      VMX_EPT_MT_EPTE_SHIFT;
 	else
-		ret = (kvm_get_guest_memory_type(vcpu, gfn) <<
-			VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IGMT_BIT;
+		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
+			| VMX_EPT_IGMT_BIT;
 
 	return ret;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 72d5684..bdce8e1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -367,6 +367,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
+/* For vcpu->arch.iommu_flags */
+#define KVM_IOMMU_CACHE_COHERENCY	0x1
+
 #ifdef CONFIG_IOMMU_API
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
+	flags = IOMMU_READ | IOMMU_WRITE;
+	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+		flags |= IOMMU_CACHE;
+
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
-				    PAGE_SIZE,
-				    IOMMU_READ | IOMMU_WRITE);
+				    PAGE_SIZE, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
 {
 	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r;
+	int r, last_flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
 		return r;
 	}
 
+	last_flags = kvm->arch.iommu_flags;
+	if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+				 IOMMU_CAP_CACHE_COHERENCY))
+		kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if ((last_flags ^ kvm->arch.iommu_flags) ==
+			KVM_IOMMU_CACHE_COHERENCY) {
+		kvm_iommu_unmap_memslots(kvm);
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
 	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
 
 	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
 }
 
 int kvm_deassign_device(struct kvm *kvm,
-- 
cgit v0.10.2


From 8e1c18157d8772e65247ac55525a05a26b83fffe Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 29 Apr 2009 11:09:04 +0800
Subject: KVM: VMX: Disable VMX when system shutdown

Intel TXT(Trusted Execution Technology) required VMX off for all cpu to work
when system shutdown.

CC: Joseph Cihula <joseph.cihula@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 63d5fa2..29c0afb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2391,15 +2391,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
 		      void *v)
 {
-	if (val == SYS_RESTART) {
-		/*
-		 * Some (well, at least mine) BIOSes hang on reboot if
-		 * in vmx root mode.
-		 */
-		printk(KERN_INFO "kvm: exiting hardware virtualization\n");
-		kvm_rebooting = true;
-		on_each_cpu(hardware_disable, NULL, 1);
-	}
+	/*
+	 * Some (well, at least mine) BIOSes hang on reboot if
+	 * in vmx root mode.
+	 *
+	 * And Intel TXT required VMX off for all cpu when system shutdown.
+	 */
+	printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+	kvm_rebooting = true;
+	on_each_cpu(hardware_disable, NULL, 1);
 	return NOTIFY_OK;
 }
 
-- 
cgit v0.10.2


From efbc100c20f9dc4a219a2a92763d68e4064109f2 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Fri, 1 May 2009 14:15:43 -0700
Subject: KVM: Trivial format fix in setup_routing_entry()

Remove extra tab.

Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 4fa1f60..a8bd466 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -271,7 +271,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			delta = 8;
 			break;
 		case KVM_IRQCHIP_IOAPIC:
-				e->set = kvm_set_ioapic_irq;
+			e->set = kvm_set_ioapic_irq;
 			break;
 		default:
 			goto out;
-- 
cgit v0.10.2


From 9b5843ddd20557b77c73ddbcc814a8c816bf3d3a Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Wed, 29 Apr 2009 17:29:09 -0400
Subject: KVM: fix apic_debug instances

Apparently nobody turned this on in a while...
setting apic_debug to something compilable, generates
some errors. This patch fixes it.

Signed-off-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4d76bb6..ae99d83 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -445,7 +445,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
-		   icr_high, icr_low, irq.shorthand, irq.dest,
+		   icr_high, icr_low, irq.shorthand, irq.dest_id,
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
@@ -560,7 +560,7 @@ static void update_divide_count(struct kvm_lapic *apic)
 	apic->divide_count = 0x1 << (tmp2 & 0x7);
 
 	apic_debug("timer divide count is 0x%x\n",
-				   apic->lapic_timer.divide_count);
+				   apic->divide_count);
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
-- 
cgit v0.10.2


From b586eb0253083795e58dcbe76665410d4676dc08 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Tue, 28 Apr 2009 12:45:43 +0200
Subject: KVM: SVM: Fix cross vendor migration issue in segment segment
 descriptor

On AMD CPUs sometimes the DB bit in the stack segment
descriptor is left as 1, although the whole segment has
been made unusable. Clear it here to pass an Intel VMX
entry check when cross vendor migrating.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6350378..e3ea982 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -803,6 +803,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 		if (!var->unusable)
 			var->type |= 0x1;
 		break;
+	case VCPU_SREG_SS:
+		/* On AMD CPUs sometimes the DB bit in the segment
+		 * descriptor is left as 1, although the whole segment has
+		 * been made unusable. Clear it here to pass an Intel VMX
+		 * entry check when cross vendor migrating.
+		 */
+		if (var->unusable)
+			var->db = 0;
+		break;
 	}
 }
 
-- 
cgit v0.10.2


From fe8e7f83de3f0979f13f57992cb33e4a8041272d Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 23 Apr 2009 17:03:48 +0300
Subject: KVM: SVM: Don't reinject event that caused a task switch

If a task switch caused by an event remove it from the event queue.
VMX already does that.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e3ea982..f994c6d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1811,6 +1811,10 @@ static int task_switch_interception(struct vcpu_svm *svm,
 	int int_type = svm->vmcb->control.exit_int_info &
 		SVM_EXITINTINFO_TYPE_MASK;
 	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
+	uint32_t type =
+		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
+	uint32_t idt_v =
+		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
 
 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
 
@@ -1820,11 +1824,26 @@ static int task_switch_interception(struct vcpu_svm *svm,
 	else if (svm->vmcb->control.exit_info_2 &
 		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
 		reason = TASK_SWITCH_JMP;
-	else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID)
+	else if (idt_v)
 		reason = TASK_SWITCH_GATE;
 	else
 		reason = TASK_SWITCH_CALL;
 
+	if (reason == TASK_SWITCH_GATE) {
+		switch (type) {
+		case SVM_EXITINTINFO_TYPE_NMI:
+			svm->vcpu.arch.nmi_injected = false;
+			break;
+		case SVM_EXITINTINFO_TYPE_EXEPT:
+			kvm_clear_exception_queue(&svm->vcpu);
+			break;
+		case SVM_EXITINTINFO_TYPE_INTR:
+			kvm_clear_interrupt_queue(&svm->vcpu);
+			break;
+		default:
+			break;
+		}
+	}
 
 	if (reason != TASK_SWITCH_GATE ||
 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
@@ -2203,7 +2222,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
 	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
-	    exit_code != SVM_EXIT_NPF)
+	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
 		       "exit_code 0x%x\n",
 		       __func__, svm->vmcb->control.exit_int_info,
-- 
cgit v0.10.2


From d6a8c875f35a6e1b3fb3f21e93eabb183b1f39ee Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 20 Apr 2009 18:10:07 +0200
Subject: KVM: Drop request_nmi from stats

The stats entry request_nmi is no longer used as the related user space
interface was dropped. So clean it up.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 253d8f6..ab7de4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -441,7 +441,6 @@ struct kvm_vcpu_stat {
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
-	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dd05682..a0faa48 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -91,7 +91,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
-	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
-- 
cgit v0.10.2


From 8d753f369bd28fff1706ffe9fb9fea4fd88cf85b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 10 May 2009 11:41:39 +0300
Subject: KVM: Fix cpuid feature misreporting

MTRR, PAT, MCE, and MCA are all supported (to some extent) but not reported.
Vista requires these features, so if userspace relies on kernel cpuid
reporting, it loses support for Vista.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0faa48..da3e65b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1254,9 +1254,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
 		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
 		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
+		bit(X86_FEATURE_MCE) |
 		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
-		bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
-		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
+		bit(X86_FEATURE_SEP) | bit(X86_FEATURE_MTRR) |
+		bit(X86_FEATURE_PGE) | bit(X86_FEATURE_MCA) |
+		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PAT) |
+		bit(X86_FEATURE_PSE36) |
 		bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
 		bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
 		bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
-- 
cgit v0.10.2


From 7faa4ee1c777a5f8e8373430cfd9cb6172aa3503 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 10 May 2009 13:55:35 +0300
Subject: KVM: Add AMD cpuid bit: cr8_legacy, abm, misaligned sse, sse4, 3dnow
 prefetch

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index da3e65b..95de3d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1247,44 +1247,47 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	entry->flags = 0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			 u32 index, int *nent, int maxnent)
 {
-	const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
-		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
-		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
-		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
-		bit(X86_FEATURE_MCE) |
-		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
-		bit(X86_FEATURE_SEP) | bit(X86_FEATURE_MTRR) |
-		bit(X86_FEATURE_PGE) | bit(X86_FEATURE_MCA) |
-		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PAT) |
-		bit(X86_FEATURE_PSE36) |
-		bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
-		bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
-		bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
-	const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
-		bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
-		bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
-		bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
-		bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
-		bit(X86_FEATURE_PGE) |
-		bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
-		bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
-		bit(X86_FEATURE_SYSCALL) |
-		(is_efer_nx() ? bit(X86_FEATURE_NX) : 0) |
+	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
 #ifdef CONFIG_X86_64
-		bit(X86_FEATURE_LM) |
+	unsigned f_lm = F(LM);
+#else
+	unsigned f_lm = 0;
 #endif
-		bit(X86_FEATURE_FXSR_OPT) |
-		bit(X86_FEATURE_MMXEXT) |
-		bit(X86_FEATURE_3DNOWEXT) |
-		bit(X86_FEATURE_3DNOW);
-	const u32 kvm_supported_word3_x86_features =
-		bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
+
+	/* cpuid 1.edx */
+	const u32 kvm_supported_word0_x86_features =
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
+		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
+		0 /* HTT, TM, Reserved, PBE */;
+	/* cpuid 0x80000001.edx */
+	const u32 kvm_supported_word1_x86_features =
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* Reserved */ |
+		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+		F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
+		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
+	/* cpuid 1.ecx */
+	const u32 kvm_supported_word4_x86_features =
+		F(XMM3) | F(CX16);
+	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =
-		bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
-		bit(X86_FEATURE_SVM);
+		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
+		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
+		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
+		0 /* SKINIT */ | 0 /* WDT */;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
@@ -1297,7 +1300,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		break;
 	case 1:
 		entry->edx &= kvm_supported_word0_x86_features;
-		entry->ecx &= kvm_supported_word3_x86_features;
+		entry->ecx &= kvm_supported_word4_x86_features;
 		break;
 	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
 	 * may return different values. This forces us to get_cpu() before
@@ -1359,6 +1362,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	put_cpu();
 }
 
+#undef F
+
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				     struct kvm_cpuid_entry2 __user *entries)
 {
-- 
cgit v0.10.2


From 069ebaa4644521e8e80b6849ace4dee53f93f55e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 10 May 2009 14:37:56 +0300
Subject: x86: Add cpu features MOVBE and POPCNT

Add cpu feature bit support for the MOVBE and POPCNT instructions.

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb83b1c..9c63bf3 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -115,6 +115,8 @@
 #define X86_FEATURE_XMM4_1	(4*32+19) /* "sse4_1" SSE-4.1 */
 #define X86_FEATURE_XMM4_2	(4*32+20) /* "sse4_2" SSE-4.2 */
 #define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE	(4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT      (4*32+23) /* POPCNT instruction */
 #define X86_FEATURE_AES		(4*32+25) /* AES instructions */
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
-- 
cgit v0.10.2


From d149c731e4f71982247a14409951259f36271dd7 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 10 May 2009 14:41:56 +0300
Subject: KVM: Update cpuid 1.ecx reporting

Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 95de3d3..7592770 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1281,7 +1281,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
 	/* cpuid 1.ecx */
 	const u32 kvm_supported_word4_x86_features =
-		F(XMM3) | F(CX16);
+		F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
+		0 /* DS-CPL, VMX, SMX, EST */ |
+		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
+		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
+		0 /* Reserved, DCA */ | F(XMM4_1) |
+		F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
+		0 /* Reserved, XSAVE, OSXSAVE */;
 	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =
 		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
-- 
cgit v0.10.2


From 32f8840064d88cc3f6e85203aec7b6b57bebcb97 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 7 May 2009 17:55:12 -0300
Subject: KVM: use smp_send_reschedule in kvm_vcpu_kick

KVM uses a function call IPI to cause the exit of a guest running on a
physical cpu. For virtual interrupt notification there is no need to
wait on IPI receival, or to execute any function.

This is exactly what the reschedule IPI does, without the overhead
of function IPI. So use it instead of smp_call_function_single in
kvm_vcpu_kick.

Also change the "guest_mode" variable to a bit in vcpu->requests, and
use that to collapse multiple IPI's that would be issued between the
first one and zeroing of guest mode.

This allows kvm_vcpu_kick to called with interrupts disabled.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index acc4d19..b448197 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = {
 	.name =		"IPI"
 };
 
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
 static struct irqaction resched_irqaction = {
 	.handler =	dummy_handler,
 	.flags =	IRQF_DISABLED,
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index be4413e..80c57b0 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -668,7 +668,7 @@ again:
 	host_ctx = kvm_get_host_context(vcpu);
 	guest_ctx = kvm_get_guest_context(vcpu);
 
-	vcpu->guest_mode = 1;
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
 
 	r = kvm_vcpu_pre_transition(vcpu);
 	if (r < 0)
@@ -685,7 +685,7 @@ again:
 	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	/*
@@ -1879,24 +1879,18 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	if (vcpu->guest_mode && cpu != ipi_pcpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+	me = get_cpu();
+	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 13f33ea..3b2e55e 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	/*
+	 * KVM uses this interrupt to force a cpu out of guest mode
+	 */
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7592770..3c4c327 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3230,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	local_irq_disable();
 
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+	smp_mb__after_clear_bit();
+
 	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -3237,13 +3240,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}
 
-	vcpu->guest_mode = 1;
-	/*
-	 * Make sure that guest_mode assignment won't happen after
-	 * testing the pending IRQ vector bitmap.
-	 */
-	smp_wmb();
-
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
@@ -3288,7 +3284,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	set_debugreg(vcpu->arch.host_dr6, 6);
 	set_debugreg(vcpu->arch.host_dr7, 7);
 
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	++vcpu->stat.exits;
@@ -4571,30 +4567,20 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	       || vcpu->arch.nmi_pending;
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu;
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
-	/*
-	 * We may be called synchronously with irqs disabled in guest mode,
-	 * So need not to call smp_call_function_single() in that case.
-	 */
-	cpu = get_cpu();
-	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+
+	me = get_cpu();
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bdce8e1..1618162 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
+#define KVM_REQ_KICK               9
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -72,7 +73,6 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	int   cpu;
 	struct kvm_run *run;
-	int guest_mode;
 	unsigned long requests;
 	unsigned long guest_debug;
 	int fpu_active;
-- 
cgit v0.10.2


From 547de29e5b1662deb05b5f90917902dc0e9ac182 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 7 May 2009 17:55:13 -0300
Subject: KVM: protect assigned dev workqueue, int handler and irq acker

kvm_assigned_dev_ack_irq is vulnerable to a race condition with the
interrupt handler function. It does:

        if (dev->host_irq_disabled) {
                enable_irq(dev->host_irq);
                dev->host_irq_disabled = false;
        }

If an interrupt triggers before the host->dev_irq_disabled assignment,
it will disable the interrupt and set dev->host_irq_disabled to true.

On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to
false, and the next kvm_assigned_dev_ack_irq call will fail to reenable
it.

Other than that, having the interrupt handler and work handlers run in
parallel sounds like asking for trouble (could not spot any obvious
problem, but better not have to, its fragile).

CC: sheng.yang@intel.com
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1618162..aacc544 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -345,6 +345,7 @@ struct kvm_assigned_dev_kernel {
 	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	spinlock_t assigned_dev_lock;
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 29c0afb..687d113 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -42,6 +42,7 @@
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -130,6 +131,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&kvm->lock);
+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
 			assigned_dev->guest_msix_entries;
@@ -156,18 +158,21 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 		}
 	}
 
+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
+	unsigned long flags;
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int index = find_index_from_host_irq(assigned_dev, irq);
 		if (index < 0)
-			return IRQ_HANDLED;
+			goto out;
 		assigned_dev->guest_msix_entries[index].flags |=
 			KVM_ASSIGNED_MSIX_PENDING;
 	}
@@ -177,6 +182,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 	disable_irq_nosync(irq);
 	assigned_dev->host_irq_disabled = true;
 
+out:
+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -184,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_assigned_dev_kernel *dev;
+	unsigned long flags;
 
 	if (kian->gsi == -1)
 		return;
@@ -196,10 +204,12 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	/* The guest irq may be shared so this ack may be
 	 * from another device.
 	 */
+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
 	if (dev->host_irq_disabled) {
 		enable_irq(dev->host_irq);
 		dev->host_irq_disabled = false;
 	}
+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
 }
 
 static void deassign_guest_irq(struct kvm *kvm,
@@ -615,6 +625,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
 	match->dev = dev;
+	spin_lock_init(&match->assigned_dev_lock);
 	match->irq_source_id = -1;
 	match->kvm = kvm;
 	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-- 
cgit v0.10.2


From 2809f5d2c4cfad171167b131bb2a21ab65eba40f Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Tue, 12 May 2009 16:21:05 -0400
Subject: KVM: Replace ->drop_interrupt_shadow() by ->set_interrupt_shadow()

This patch replaces drop_interrupt_shadow with the more
general set_interrupt_shadow, that can either drop or raise
it, depending on its parameter.  It also adds ->get_interrupt_shadow()
for future use.

Signed-off-by: Glauber Costa <glommer@redhat.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ab7de4a..16d1481 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -508,6 +508,8 @@ struct kvm_x86_ops {
 	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
+	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
@@ -519,7 +521,6 @@ struct kvm_x86_ops {
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 6a15973..be40d6e 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -143,6 +143,9 @@ struct decode_cache {
 	struct fetch_cache fetch;
 };
 
+#define X86_SHADOW_INT_MOV_SS  1
+#define X86_SHADOW_INT_STI     2
+
 struct x86_emulate_ctxt {
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f994c6d..8b5ffbd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -202,6 +202,27 @@ static int is_external_interrupt(u32 info)
 	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
 }
 
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 ret = 0;
+
+	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+		ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
+	return ret & mask;
+}
+
+static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (mask == 0)
+		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+	else
+		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -215,7 +236,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 		       __func__, kvm_rip_read(vcpu), svm->next_rip);
 
 	kvm_rip_write(vcpu, svm->next_rip);
-	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+	svm_set_interrupt_shadow(vcpu, 0);
 }
 
 static int has_svm(void)
@@ -2259,12 +2280,6 @@ static void pre_svm_run(struct vcpu_svm *svm)
 		new_asid(svm, svm_data);
 }
 
-static void svm_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-}
-
 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2667,6 +2682,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.run = svm_vcpu_run,
 	.handle_exit = handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
+	.set_interrupt_shadow = svm_set_interrupt_shadow,
+	.get_interrupt_shadow = svm_get_interrupt_shadow,
 	.patch_hypercall = svm_patch_hypercall,
 	.set_irq = svm_set_irq,
 	.set_nmi = svm_inject_nmi,
@@ -2676,7 +2693,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.enable_nmi_window = enable_nmi_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
-	.drop_interrupt_shadow = svm_drop_interrupt_shadow,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e8a5649..f3ab27b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -736,23 +736,45 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
 
+static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	int ret = 0;
+
+	if (interruptibility & GUEST_INTR_STATE_STI)
+		ret |= X86_SHADOW_INT_STI;
+	if (interruptibility & GUEST_INTR_STATE_MOV_SS)
+		ret |= X86_SHADOW_INT_MOV_SS;
+
+	return ret & mask;
+}
+
+static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+	u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	u32 interruptibility = interruptibility_old;
+
+	interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
+
+	if (mask & X86_SHADOW_INT_MOV_SS)
+		interruptibility |= GUEST_INTR_STATE_MOV_SS;
+	if (mask & X86_SHADOW_INT_STI)
+		interruptibility |= GUEST_INTR_STATE_STI;
+
+	if ((interruptibility != interruptibility_old))
+		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	unsigned long rip;
-	u32 interruptibility;
 
 	rip = kvm_rip_read(vcpu);
 	rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	kvm_rip_write(vcpu, rip);
 
-	/*
-	 * We emulated an instruction, so temporary interrupt blocking
-	 * should be removed, if set.
-	 */
-	interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	if (interruptibility & 3)
-		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
-			     interruptibility & ~3);
+	/* skipping an emulated instruction also counts */
+	vmx_set_interrupt_shadow(vcpu, 0);
 }
 
 static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -2400,12 +2422,6 @@ out:
 	return ret;
 }
 
-void vmx_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
-{
-	vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-			GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
-}
-
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
@@ -3649,6 +3665,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.run = vmx_vcpu_run,
 	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
+	.set_interrupt_shadow = vmx_set_interrupt_shadow,
+	.get_interrupt_shadow = vmx_get_interrupt_shadow,
 	.patch_hypercall = vmx_patch_hypercall,
 	.set_irq = vmx_inject_irq,
 	.set_nmi = vmx_inject_nmi,
@@ -3658,7 +3676,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.enable_nmi_window = enable_nmi_window,
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
-	.drop_interrupt_shadow = vmx_drop_interrupt_shadow,
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3c4c327..7475b02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3178,7 +3178,7 @@ static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_run->request_interrupt_window;
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		kvm_x86_ops->drop_interrupt_shadow(vcpu);
+		kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
 
 	inject_irq(vcpu);
 
-- 
cgit v0.10.2


From 310b5d306c1aee7ebe32f702c0e33e7988d50646 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Tue, 12 May 2009 16:21:06 -0400
Subject: KVM: Deal with interrupt shadow state for emulated instructions

We currently unblock shadow interrupt state when we skip an instruction,
but failing to do so when we actually emulate one. This blocks interrupts
in key instruction blocks, in particular sti; hlt; sequences

If the instruction emulated is an sti, we have to block shadow interrupts.
The same goes for mov ss. pop ss also needs it, but we don't currently
emulate it.

Without this patch, I cannot boot gpxe option roms at vmx machines.
This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469

Signed-off-by: Glauber Costa <glommer@redhat.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index be40d6e..b7ed2c4 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -155,6 +155,9 @@ struct x86_emulate_ctxt {
 	int mode;
 	u32 cs_base;
 
+	/* interruptibility state, as a result of execution of STI or MOV SS */
+	int interruptibility;
+
 	/* decode cache */
 	struct decode_cache decode;
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7475b02..48f744f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2379,7 +2379,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 			u16 error_code,
 			int emulation_type)
 {
-	int r;
+	int r, shadow_mask;
 	struct decode_cache *c;
 
 	kvm_clear_exception_queue(vcpu);
@@ -2433,6 +2433,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 	}
 
 	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
+	shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
+
+	if (r == 0)
+		kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
 
 	if (vcpu->arch.pio.string)
 		return EMULATE_DO_MMIO;
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d2664fc..c1b6c23 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1361,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
 	return 0;
 }
 
+void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
+{
+	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
+	/*
+	 * an sti; sti; sequence only disable interrupts for the first
+	 * instruction. So, if the last instruction, be it emulated or
+	 * not, left the system with the INT_STI flag enabled, it
+	 * means that the last instruction is an sti. We should not
+	 * leave the flag on in this case. The same goes for mov ss
+	 */
+	if (!(int_shadow & mask))
+		ctxt->interruptibility = mask;
+}
+
 int
 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
@@ -1372,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	int io_dir_in;
 	int rc = 0;
 
+	ctxt->interruptibility = 0;
+
 	/* Shadow copy of register state. Committed on successful emulation.
 	 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
 	 * modify them.
@@ -1618,6 +1634,9 @@ special_insn:
 		int err;
 
 		sel = c->src.val;
+		if (c->modrm_reg == VCPU_SREG_SS)
+			toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
+
 		if (c->modrm_reg <= 5) {
 			type_bits = (c->modrm_reg == 1) ? 9 : 1;
 			err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
@@ -1847,6 +1866,7 @@ special_insn:
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
 	case 0xfb: /* sti */
+		toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
 		ctxt->eflags |= X86_EFLAGS_IF;
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
-- 
cgit v0.10.2


From 7c8a83b75a38a807d37f5a4398eca2a42c8cf513 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 12 May 2009 18:55:43 -0300
Subject: KVM: MMU: protect kvm_mmu_change_mmu_pages with mmu_lock

kvm_handle_hva, called by MMU notifiers, manipulates mmu data only with
the protection of mmu_lock.

Update kvm_mmu_change_mmu_pages callers to take mmu_lock, thus protecting
against kvm_handle_hva.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index bc614f9..3ce60ad 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2723,7 +2723,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 {
 	struct kvm_mmu_page *sp;
 
-	spin_lock(&kvm->mmu_lock);
 	list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
 		int i;
 		u64 *pt;
@@ -2738,7 +2737,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 				pt[i] &= ~PT_WRITABLE_MASK;
 	}
 	kvm_flush_remote_tlbs(kvm);
-	spin_unlock(&kvm->mmu_lock);
 }
 
 void kvm_mmu_zap_all(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48f744f..d2a4eca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1625,10 +1625,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 		return -EINVAL;
 
 	down_write(&kvm->slots_lock);
+	spin_lock(&kvm->mmu_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
+	spin_unlock(&kvm->mmu_lock);
 	up_write(&kvm->slots_lock);
 	return 0;
 }
@@ -1804,7 +1806,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
+		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		spin_unlock(&kvm->mmu_lock);
 		kvm_flush_remote_tlbs(kvm);
 		memslot = &kvm->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -4548,12 +4552,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 		}
 	}
 
+	spin_lock(&kvm->mmu_lock);
 	if (!kvm->arch.n_requested_mmu_pages) {
 		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
 	}
 
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_flush_remote_tlbs(kvm);
 
 	return 0;
-- 
cgit v0.10.2


From b43b1901ad282aeb74161837fb403927102687a1 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 12 May 2009 18:55:44 -0300
Subject: KVM: take mmu_lock when updating a deleted slot

kvm_handle_hva relies on mmu_lock protection to safely access
the memslot structures.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 687d113..5fed9bf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1210,8 +1210,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_free_physmem_slot(&old, npages ? &new : NULL);
 	/* Slot deletion case: we have to update the current slot */
+	spin_lock(&kvm->mmu_lock);
 	if (!npages)
 		*memslot = old;
+	spin_unlock(&kvm->mmu_lock);
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
 	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
-- 
cgit v0.10.2


From 8986ecc0ef58c96eec48d8502c048f3ab67fd8e2 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 12 May 2009 18:55:45 -0300
Subject: KVM: x86: check for cr3 validity in mmu_alloc_roots

Verify the cr3 address stored in vcpu->arch.cr3 points to an existant
memslot. If not, inject a triple fault.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3ce60ad..5c3d6e8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1912,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 }
 
-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
+{
+	int ret = 0;
+
+	if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
+		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
 	gfn_t root_gfn;
@@ -1927,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 		ASSERT(!VALID_PAGE(root));
 		if (tdp_enabled)
 			direct = 1;
+		if (mmu_check_root(vcpu, root_gfn))
+			return 1;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
 				      PT64_ROOT_LEVEL, direct,
 				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.root_hpa = root;
-		return;
+		return 0;
 	}
 	direct = !is_paging(vcpu);
 	if (tdp_enabled)
@@ -1950,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 			root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
 		} else if (vcpu->arch.mmu.root_level == 0)
 			root_gfn = 0;
+		if (mmu_check_root(vcpu, root_gfn))
+			return 1;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 				      PT32_ROOT_LEVEL, direct,
 				      ACC_ALL, NULL);
@@ -1958,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 		vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
 	}
 	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+	return 0;
 }
 
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -1976,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	for (i = 0; i < 4; ++i) {
 		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
-		if (root) {
+		if (root && VALID_PAGE(root)) {
 			root &= PT64_BASE_ADDR_MASK;
 			sp = page_header(root);
 			mmu_sync_children(vcpu, sp);
@@ -2311,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 		goto out;
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
-	mmu_alloc_roots(vcpu);
+	r = mmu_alloc_roots(vcpu);
 	mmu_sync_roots(vcpu);
 	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (r)
+		goto out;
 	kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
 	kvm_mmu_flush_tlb(vcpu);
 out:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d2a4eca..3244437 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4568,6 +4568,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 	kvm_mmu_zap_all(kvm);
+	kvm_reload_remote_mmus(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 58f8ac279a8c46eb0a3193edd521ed2e41c4f914 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Tue, 12 May 2009 13:44:06 -0700
Subject: KVM: Expand on "help" info to specify kvm intel and amd module names

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a58504e..8600a09 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,9 @@ config KVM_INTEL
 	  Provides support for KVM on Intel processors equipped with the VT
 	  extensions.
 
+	  To compile this as a module, choose M here: the module
+	  will be called kvm-intel.
+
 config KVM_AMD
 	tristate "KVM for AMD processors support"
 	depends on KVM
@@ -57,6 +60,9 @@ config KVM_AMD
 	  Provides support for KVM on AMD processors equipped with the AMD-V
 	  (SVM) extensions.
 
+	  To compile this as a module, choose M here: the module
+	  will be called kvm-amd.
+
 config KVM_TRACE
 	bool "KVM trace support"
 	depends on KVM && SYSFS
-- 
cgit v0.10.2


From 2668dab794272f0898491acaf1e77e9a005abc0f Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Tue, 12 May 2009 17:21:48 +0200
Subject: KVM: s390: Fix memory slot versus run - v3

This patch fixes an incorrectness in the kvm backend for s390.
In case virtual cpus are being created before the corresponding
memory slot is being registered, we need to update the sie
control blocks for the virtual cpus.

*updates in v3*
In consideration of the s390 memslot constraints locking was changed
to trylock. These locks should never be held, as vcpu's can't run without
the single memslot we just assign when running this code. To ensure this
never deadlocks in case other code changes the code uses trylocks and bail
out if it can't get all locks.

Additionally most of the discussed special conditions for s390 like
only one memslot and no user_alloc are now checked for validity in
kvm_arch_set_memory_region.

Reported-by: Mijo Safradin <mijo@linux.vnet.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f4d56e9..86567e1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -657,6 +657,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot old,
 				int user_alloc)
 {
+	int i;
+
 	/* A few sanity checks. We can have exactly one memory slot which has
 	   to start at guest virtual zero and which has to be located at a
 	   page boundary in userland and which has to end at a page boundary.
@@ -664,7 +666,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	   vmas. It is okay to mmap() and munmap() stuff in this slot after
 	   doing this call at any time */
 
-	if (mem->slot)
+	if (mem->slot || kvm->arch.guest_memsize)
 		return -EINVAL;
 
 	if (mem->guest_phys_addr)
@@ -676,15 +678,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	if (mem->memory_size & (PAGE_SIZE - 1))
 		return -EINVAL;
 
+	if (!user_alloc)
+		return -EINVAL;
+
+	/* lock all vcpus */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (!kvm->vcpus[i])
+			continue;
+		if (!mutex_trylock(&kvm->vcpus[i]->mutex))
+			goto fail_out;
+	}
+
 	kvm->arch.guest_origin = mem->userspace_addr;
 	kvm->arch.guest_memsize = mem->memory_size;
 
-	/* FIXME: we do want to interrupt running CPUs and update their memory
-	   configuration now to avoid race conditions. But hey, changing the
-	   memory layout while virtual CPUs are running is usually bad
-	   programming practice. */
+	/* update sie control blocks, and unlock all vcpus */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm->vcpus[i]->arch.sie_block->gmsor =
+				kvm->arch.guest_origin;
+			kvm->vcpus[i]->arch.sie_block->gmslm =
+				kvm->arch.guest_memsize +
+				kvm->arch.guest_origin +
+				VIRTIODESCSPACE - 1ul;
+			mutex_unlock(&kvm->vcpus[i]->mutex);
+		}
+	}
 
 	return 0;
+
+fail_out:
+	for (; i >= 0; i--)
+		mutex_unlock(&kvm->vcpus[i]->mutex);
+	return -EINVAL;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
-- 
cgit v0.10.2


From ca8723023f25c9a70d76cbd6101f8fb4ffec2fa0 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 12 May 2009 17:21:49 +0200
Subject: KVM: s390: use hrtimer for clock wakeup from idle - v2

This patch reworks the s390 clock comparator wakeup to hrtimer. The clock
comparator is a per-cpu value that is compared against the TOD clock. If
ckc <= TOD an external interrupt 1004 is triggered. Since the clock comparator
and the TOD clock have a much higher resolution than jiffies we should use
hrtimers to trigger the wakeup. This speeds up guest nanosleep for small
values.

Since hrtimers callbacks run in hard-irq context, I added a tasklet to do
the actual work with enabled interrupts.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 54ea39f..a27d0d5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,6 +13,8 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
 #include <asm/cpuid.h>
@@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
 	s390_fp_regs      guest_fpregs;
 	unsigned int      guest_acrs[NUM_ACRS];
 	struct kvm_s390_local_interrupt local_int;
-	struct timer_list ckc_timer;
+	struct hrtimer    ckc_timer;
+	struct tasklet_struct tasklet;
 	union  {
 		cpuid_t	  cpu_id;
 		u64	  stidp_data;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4ed4c3a..a48830f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -12,6 +12,8 @@
 
 #include <asm/lowcore.h>
 #include <asm/uaccess.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/signal.h>
 #include "kvm-s390.h"
@@ -361,12 +363,10 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
-	sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+	sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
 
-	vcpu->arch.ckc_timer.expires = jiffies + sltime;
-
-	add_timer(&vcpu->arch.ckc_timer);
-	VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
+	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
+	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
 no_timer:
 	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
@@ -389,21 +389,34 @@ no_timer:
 	remove_wait_queue(&vcpu->wq, &wait);
 	spin_unlock_bh(&vcpu->arch.local_int.lock);
 	spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
-	del_timer(&vcpu->arch.ckc_timer);
+	hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
 	return 0;
 }
 
-void kvm_s390_idle_wakeup(unsigned long data)
+void kvm_s390_tasklet(unsigned long parm)
 {
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
 
-	spin_lock_bh(&vcpu->arch.local_int.lock);
+	spin_lock(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.timer_due = 1;
 	if (waitqueue_active(&vcpu->arch.local_int.wq))
 		wake_up_interruptible(&vcpu->arch.local_int.wq);
-	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	spin_unlock(&vcpu->arch.local_int.lock);
 }
 
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	tasklet_schedule(&vcpu->arch.tasklet);
+
+	return HRTIMER_NORESTART;
+}
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 86567e1..dc3d068 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -15,6 +15,7 @@
 #include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/fs.h>
+#include <linux/hrtimer.h>
 #include <linux/init.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
@@ -283,8 +284,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
 	vcpu->arch.sie_block->ecb   = 2;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
-	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
-		 (unsigned long) vcpu);
+	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
+		     (unsigned long) vcpu);
+	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	get_cpu_id(&vcpu->arch.cpu_id);
 	vcpu->arch.cpu_id.version = 0xff;
 	return 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 00bbe69..748fee8 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -14,6 +14,7 @@
 #ifndef ARCH_S390_KVM_S390_H
 #define ARCH_S390_KVM_S390_H
 
+#include <linux/hrtimer.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
@@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
-void kvm_s390_idle_wakeup(unsigned long data);
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
+void kvm_s390_tasklet(unsigned long parm);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 int kvm_s390_inject_vm(struct kvm *kvm,
 		struct kvm_s390_interrupt *s390int);
-- 
cgit v0.10.2


From b037a4f34ec51b6c8ccb352a04056c04a4bfc269 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 12 May 2009 17:21:50 +0200
Subject: KVM: s390: optimize float int lock: spin_lock_bh --> spin_lock

The floating interrupt lock is only taken in process context. We can
replace all spin_lock_bh with standard spin_lock calls.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a48830f..f04f530 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -301,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	}
 
 	if ((!rc) && atomic_read(&fi->active)) {
-		spin_lock_bh(&fi->lock);
+		spin_lock(&fi->lock);
 		list_for_each_entry(inti, &fi->list, list)
 			if (__interrupt_is_deliverable(vcpu, inti)) {
 				rc = 1;
 				break;
 			}
-		spin_unlock_bh(&fi->lock);
+		spin_unlock(&fi->lock);
 	}
 
 	if ((!rc) && (vcpu->arch.sie_block->ckc <
@@ -368,7 +368,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
 	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
 no_timer:
-	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+	spin_lock(&vcpu->arch.local_int.float_int->lock);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
 	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
 	while (list_empty(&vcpu->arch.local_int.list) &&
@@ -377,18 +377,18 @@ no_timer:
 		!signal_pending(current)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_unlock_bh(&vcpu->arch.local_int.lock);
-		spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_unlock(&vcpu->arch.local_int.float_int->lock);
 		vcpu_put(vcpu);
 		schedule();
 		vcpu_load(vcpu);
-		spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_lock(&vcpu->arch.local_int.float_int->lock);
 		spin_lock_bh(&vcpu->arch.local_int.lock);
 	}
 	__unset_cpu_idle(vcpu);
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&vcpu->wq, &wait);
 	spin_unlock_bh(&vcpu->arch.local_int.lock);
-	spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+	spin_unlock(&vcpu->arch.local_int.float_int->lock);
 	hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
 	return 0;
 }
@@ -455,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	if (atomic_read(&fi->active)) {
 		do {
 			deliver = 0;
-			spin_lock_bh(&fi->lock);
+			spin_lock(&fi->lock);
 			list_for_each_entry_safe(inti, n, &fi->list, list) {
 				if (__interrupt_is_deliverable(vcpu, inti)) {
 					list_del(&inti->list);
@@ -466,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 			}
 			if (list_empty(&fi->list))
 				atomic_set(&fi->active, 0);
-			spin_unlock_bh(&fi->lock);
+			spin_unlock(&fi->lock);
 			if (deliver) {
 				__do_deliver_interrupt(vcpu, inti);
 				kfree(inti);
@@ -531,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
 	mutex_lock(&kvm->lock);
 	fi = &kvm->arch.float_int;
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	list_add_tail(&inti->list, &fi->list);
 	atomic_set(&fi->active, 1);
 	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
@@ -548,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	if (waitqueue_active(&li->wq))
 		wake_up_interruptible(&li->wq);
 	spin_unlock_bh(&li->lock);
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	mutex_unlock(&kvm->lock);
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index dc3d068..36c654d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -318,11 +318,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
-	spin_lock_bh(&kvm->arch.float_int.lock);
+	spin_lock(&kvm->arch.float_int.lock);
 	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
 	init_waitqueue_head(&vcpu->arch.local_int.wq);
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
-	spin_unlock_bh(&kvm->arch.float_int.lock);
+	spin_unlock(&kvm->arch.float_int.lock);
 
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4b88834..93ecd06 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
 	int cpus = 0;
 	int n;
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	for (n = 0; n < KVM_MAX_VCPUS; n++)
 		if (fi->local_int[n])
 			cpus++;
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 
 	/* deal with other level 3 hypervisors */
 	if (stsi(mem, 3, 2, 2) == -ENOSYS)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index f27dbed..3667883 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	if (cpu_addr >= KVM_MAX_VCPUS)
 		return 3; /* not operational */
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	if (fi->local_int[cpu_addr] == NULL)
 		rc = 3; /* not operational */
 	else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
@@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		*reg |= SIGP_STAT_STOPPED;
 		rc = 1; /* status stored */
 	}
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 
 	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
 	return rc;
@@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 
 	inti->type = KVM_S390_INT_EMERGENCY;
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
 	if (li == NULL) {
 		rc = 3; /* not operational */
@@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	spin_unlock_bh(&li->lock);
 	rc = 0; /* order accepted */
 unlock:
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
 	return rc;
 }
@@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
 
 	inti->type = KVM_S390_SIGP_STOP;
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
 	if (li == NULL) {
 		rc = 3; /* not operational */
@@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
 	spin_unlock_bh(&li->lock);
 	rc = 0; /* order accepted */
 unlock:
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
 	return rc;
 }
@@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 	if (!inti)
 		return 2; /* busy */
 
-	spin_lock_bh(&fi->lock);
+	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
 
 	if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
@@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 out_li:
 	spin_unlock_bh(&li->lock);
 out_fi:
-	spin_unlock_bh(&fi->lock);
+	spin_unlock(&fi->lock);
 	return rc;
 }
 
-- 
cgit v0.10.2


From abf4a71ed95ff29d696bf04633958b2068ed2e0b Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Tue, 12 May 2009 17:21:51 +0200
Subject: KVM: s390: Unlink vcpu on destroy - v2

This patch makes sure we do unlink a vcpu's sie control block
from the system control area in kvm_arch_vcpu_destroy. This
prevents illegal accesses to the sie control block from other
virtual cpus after free.

Reported-by: Mijo Safradin <mijo@linux.vnet.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 36c654d..628494a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -196,6 +196,10 @@ out_nokvm:
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+	if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
+		(__u64) vcpu->arch.sie_block)
+		vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+	smp_mb();
 	free_page((unsigned long)(vcpu->arch.sie_block));
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu);
@@ -310,8 +314,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu->arch.sie_block->icpua = id;
 	BUG_ON(!kvm->arch.sca);
-	BUG_ON(kvm->arch.sca->cpu[id].sda);
-	kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	if (!kvm->arch.sca->cpu[id].sda)
+		kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	else
+		BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
 	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
 	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 
-- 
cgit v0.10.2


From 3edbcff9bfe2ed632e518b3cfe807d062cee8269 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Tue, 12 May 2009 17:21:52 +0200
Subject: KVM: s390: Sanity check on validity intercept

This patch adds a sanity check for the content of the guest
prefix register content before faulting in the cpu lowcore
that it refers to. The guest might end up in an endless loop
where SIE complains about missing lowcore with incorrect
content of the prefix register without this fix.

Reported-by: Mijo Safradin <mijo@linux.vnet.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 9d19803..98997cc 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 static int handle_validity(struct kvm_vcpu *vcpu)
 {
 	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+	int rc;
+
 	vcpu->stat.exit_validity++;
-	if (viwhy == 0x37) {
-		fault_in_pages_writeable((char __user *)
-					 vcpu->kvm->arch.guest_origin +
-					 vcpu->arch.sie_block->prefix,
-					 PAGE_SIZE);
-		return 0;
-	}
-	VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
-		   viwhy);
-	return -ENOTSUPP;
+	if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
+		<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
+		rc = fault_in_pages_writeable((char __user *)
+			 vcpu->kvm->arch.guest_origin +
+			 vcpu->arch.sie_block->prefix,
+			 2*PAGE_SIZE);
+		if (rc)
+			/* user will receive sigsegv, exit to user */
+			rc = -ENOTSUPP;
+	} else
+		rc = -ENOTSUPP;
+
+	if (rc)
+		VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+			   viwhy);
+	return rc;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 51e4d5ab28c75d819b3840da11559ad5c1429135 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Tue, 12 May 2009 17:21:53 +0200
Subject: KVM: s390: Verify memory in kvm run

This check verifies that the guest we're trying to run in KVM_RUN
has some memory assigned to it. It enters an endless exception
loop if this is not the case.

Reported-by: Mijo Safradin <mijo@linux.vnet.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 628494a..10bccd1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -487,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	vcpu_load(vcpu);
 
+	/* verify, that memory has been registered */
+	if (!vcpu->kvm->arch.guest_memsize) {
+		vcpu_put(vcpu);
+		return -EINVAL;
+	}
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
-- 
cgit v0.10.2


From 3298b75c880d6f0fd70750233c0f3e71a72a5bfb Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:46 +0300
Subject: KVM: Unprotect a page if #PF happens during NMI injection.

It is done for exception and interrupt already.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8b5ffbd..ac3d5ba 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1122,8 +1122,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	if (npt_enabled)
 		svm_flush_tlb(&svm->vcpu);
 	else {
-		if (svm->vcpu.arch.interrupt.pending ||
-				svm->vcpu.arch.exception.pending)
+		if (kvm_event_needs_reinjection(&svm->vcpu))
 			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
 	}
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3ab27b..8981654 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2615,7 +2615,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
 			    (u32)((u64)cr2 >> 32), handler);
-		if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
+		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 39350b2..21203d4 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -30,4 +30,10 @@ static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
 		clear_bit(word_index, &vcpu->arch.irq_summary);
 	return irq;
 }
+
+static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+		vcpu->arch.nmi_injected;
+}
 #endif
-- 
cgit v0.10.2


From fa9726b0733461781933ab7180aca45e46d0a891 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:47 +0300
Subject: KVM: Do not allow interrupt injection from userspace if there is a
 pending event.

The exception will immediately close the interrupt window.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3244437..96413f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3105,8 +3105,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 		kvm_run->ready_for_interrupt_injection = 1;
 	else
 		kvm_run->ready_for_interrupt_injection =
-					(kvm_arch_interrupt_allowed(vcpu) &&
-					 !kvm_cpu_has_interrupt(vcpu));
+			kvm_arch_interrupt_allowed(vcpu) &&
+			!kvm_cpu_has_interrupt(vcpu) &&
+			!kvm_event_needs_reinjection(vcpu);
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 923c61bbc6413e87e5f6b0bae663d202a8de0537 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:48 +0300
Subject: KVM: Remove irq_pending bitmap

Only one interrupt vector can be injected from userspace irqchip at
any given time so no need to store it in a bitmap. Put it into interrupt
queue directly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16d1481..977a785 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -266,8 +266,6 @@ struct kvm_mmu {
 
 struct kvm_vcpu_arch {
 	u64 host_tsc;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
 	/*
 	 * rip and regs accesses must go through
 	 * kvm_{register,rip}_{read,write} functions.
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 11c2757..96dfbb6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -50,7 +50,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 	struct kvm_pic *s;
 
 	if (!irqchip_in_kernel(v->kvm))
-		return v->arch.irq_summary;
+		return v->arch.interrupt.pending;
 
 	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
 		if (kvm_apic_accept_pic_intr(v)) {
@@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 	int vector;
 
 	if (!irqchip_in_kernel(v->kvm))
-		return kvm_pop_irq(v);
+		return v->arch.interrupt.nr;
 
 	vector = kvm_get_apic_interrupt(v);	/* APIC */
 	if (vector == -1) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 96413f4..54eec35 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1441,8 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	set_bit(irq->irq, vcpu->arch.irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
+	kvm_queue_interrupt(vcpu, irq->irq);
 
 	vcpu_put(vcpu);
 
@@ -3583,12 +3582,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->arch.shadow_efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
-	if (irqchip_in_kernel(vcpu->kvm))
-		memset(sregs->interrupt_bitmap, 0,
-		       sizeof sregs->interrupt_bitmap);
-	else
-		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
-		       sizeof sregs->interrupt_bitmap);
+	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
 
 	if (vcpu->arch.interrupt.pending)
 		set_bit(vcpu->arch.interrupt.nr,
@@ -4058,7 +4052,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
 	int mmu_reset_needed = 0;
-	int i, pending_vec, max_bits;
+	int pending_vec, max_bits;
 	struct descriptor_table dt;
 
 	vcpu_load(vcpu);
@@ -4100,24 +4094,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	if (!irqchip_in_kernel(vcpu->kvm)) {
-		memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
-		       sizeof vcpu->arch.irq_pending);
-		vcpu->arch.irq_summary = 0;
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
-			if (vcpu->arch.irq_pending[i])
-				__set_bit(i, &vcpu->arch.irq_summary);
-	} else {
-		max_bits = (sizeof sregs->interrupt_bitmap) << 3;
-		pending_vec = find_first_bit(
-			(const unsigned long *)sregs->interrupt_bitmap,
-			max_bits);
-		/* Only pending external irq is handled here */
-		if (pending_vec < max_bits) {
-			kvm_queue_interrupt(vcpu, pending_vec);
-			pr_debug("Set back pending irq %d\n", pending_vec);
-		}
-		kvm_pic_clear_isr_ack(vcpu->kvm);
+	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+	pending_vec = find_first_bit(
+		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
+	if (pending_vec < max_bits) {
+		kvm_queue_interrupt(vcpu, pending_vec);
+		pr_debug("Set back pending irq %d\n", pending_vec);
+		if (irqchip_in_kernel(vcpu->kvm))
+			kvm_pic_clear_isr_ack(vcpu->kvm);
 	}
 
 	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 21203d4..c1f1a8c 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -19,18 +19,6 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.interrupt.pending = false;
 }
 
-static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
-{
-	int word_index = __ffs(vcpu->arch.irq_summary);
-	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
-	if (!vcpu->arch.irq_pending[word_index])
-		clear_bit(word_index, &vcpu->arch.irq_summary);
-	return irq;
-}
-
 static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
-- 
cgit v0.10.2


From f629cf8485c9e1063fd8b915fa3bde80917400a1 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:49 +0300
Subject: KVM: skip_emulated_instruction() decode instruction if size is not
 known

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ac3d5ba..1315ce0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -228,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (!svm->next_rip) {
-		printk(KERN_DEBUG "%s: NOP\n", __func__);
+		if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+				EMULATE_DONE)
+			printk(KERN_DEBUG "%s: NOP\n", __func__);
 		return;
 	}
 	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@@ -1868,11 +1870,8 @@ static int task_switch_interception(struct vcpu_svm *svm,
 	if (reason != TASK_SWITCH_GATE ||
 	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
 	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
-	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
-		if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0,
-					EMULTYPE_SKIP) != EMULATE_DONE)
-			return 0;
-	}
+	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
+		skip_emulated_instruction(&svm->vcpu);
 
 	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
-- 
cgit v0.10.2


From 66fd3f7f901f29a557a473af595bf11b270b9ac2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:50 +0300
Subject: KVM: Do not re-execute INTn instruction.

Re-inject event instead. This is what Intel suggest. Also use correct
instruction length when re-injecting soft fault/interrupt.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 977a785..1d6c3f7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
 	struct kvm_pio_request pio;
 	void *pio_data;
 
+	u8 event_exit_inst_len;
+
 	struct kvm_queued_exception {
 		bool pending;
 		bool has_error_code;
@@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
 
 	struct kvm_queued_interrupt {
 		bool pending;
+		bool soft;
 		u8 nr;
 	} interrupt;
 
@@ -510,7 +513,7 @@ struct kvm_x86_ops {
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_irq)(struct kvm_vcpu *vcpu);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1315ce0..377c4f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2310,13 +2310,13 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	nested_svm_intr(svm);
 
-	svm_queue_irq(vcpu, irq);
+	svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -2418,7 +2418,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	case SVM_EXITINTINFO_TYPE_EXEPT:
 		/* In case of software exception do not reinject an exception
 		   vector, but re-execute and instruction instead */
-		if (vector == BP_VECTOR || vector == OF_VECTOR)
+		if (kvm_exception_is_soft(vector))
 			break;
 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
 			u32 err = svm->vmcb->control.exit_int_info_err;
@@ -2428,7 +2428,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 			kvm_queue_exception(&svm->vcpu, vector);
 		break;
 	case SVM_EXITINTINFO_TYPE_INTR:
-		kvm_queue_interrupt(&svm->vcpu, vector);
+		kvm_queue_interrupt(&svm->vcpu, vector, false);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8981654..29b49f0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -801,8 +801,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 		return;
 	}
 
-	if (nr == BP_VECTOR || nr == OF_VECTOR) {
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+	if (kvm_exception_is_soft(nr)) {
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
 		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
 	} else
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -2445,9 +2446,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	uint32_t intr;
+	int irq = vcpu->arch.interrupt.nr;
 
 	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
@@ -2462,8 +2465,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
 		return;
 	}
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+	intr = irq | INTR_INFO_VALID_MASK;
+	if (vcpu->arch.interrupt.soft) {
+		intr |= INTR_TYPE_SOFT_INTR;
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
+	} else
+		intr |= INTR_TYPE_EXT_INTR;
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -3024,6 +3033,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 					      GUEST_INTR_STATE_NMI);
 			break;
 		case INTR_TYPE_EXT_INTR:
+		case INTR_TYPE_SOFT_INTR:
 			kvm_clear_interrupt_queue(vcpu);
 			break;
 		case INTR_TYPE_HARD_EXCEPTION:
@@ -3295,16 +3305,24 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_NMI);
 		break;
-	case INTR_TYPE_HARD_EXCEPTION:
 	case INTR_TYPE_SOFT_EXCEPTION:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		/* fall through */
+	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
 			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
 			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		break;
+	case INTR_TYPE_SOFT_INTR:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+		/* fall through */
 	case INTR_TYPE_EXT_INTR:
-		kvm_queue_interrupt(&vmx->vcpu, vector);
+		kvm_queue_interrupt(&vmx->vcpu, vector,
+			type == INTR_TYPE_SOFT_INTR);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 54eec35..73cfe87 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1441,7 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	kvm_queue_interrupt(vcpu, irq->irq);
+	kvm_queue_interrupt(vcpu, irq->irq, false);
 
 	vcpu_put(vcpu);
 
@@ -3161,7 +3161,7 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->arch.interrupt.pending) {
-		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		kvm_x86_ops->set_irq(vcpu);
 		return;
 	}
 
@@ -3174,8 +3174,9 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 		}
 	} else if (kvm_cpu_has_interrupt(vcpu)) {
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+					    false);
+			kvm_x86_ops->set_irq(vcpu);
 		}
 	}
 }
@@ -4098,7 +4099,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
 	if (pending_vec < max_bits) {
-		kvm_queue_interrupt(vcpu, pending_vec);
+		kvm_queue_interrupt(vcpu, pending_vec, false);
 		pr_debug("Set back pending irq %d\n", pending_vec);
 		if (irqchip_in_kernel(vcpu->kvm))
 			kvm_pic_clear_isr_ack(vcpu->kvm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c1f1a8c..4c8e10a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.exception.pending = false;
 }
 
-static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
+static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
+	bool soft)
 {
 	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.soft = soft;
 	vcpu->arch.interrupt.nr = vector;
 }
 
@@ -24,4 +26,9 @@ static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
 		vcpu->arch.nmi_injected;
 }
+
+static inline bool kvm_exception_is_soft(unsigned int nr)
+{
+	return (nr == BP_VECTOR) || (nr == OF_VECTOR);
+}
 #endif
-- 
cgit v0.10.2


From 6a8b1d13121f8226783987dc7ddd861ee2245410 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:51 +0300
Subject: KVM: Always request IRQ/NMI window if an interrupt is pending

Currently they are not requested if there is pending exception.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 73cfe87..199426c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3152,8 +3152,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_irq(struct kvm_vcpu *vcpu)
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
+
 	/* try to reinject previous events if any */
 	if (vcpu->arch.nmi_injected) {
 		kvm_x86_ops->set_nmi(vcpu);
@@ -3181,26 +3184,11 @@ static void inject_irq(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-
-	inject_irq(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		kvm_x86_ops->enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		kvm_x86_ops->enable_irq_window(vcpu);
-}
-
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
 
 	if (vcpu->requests)
 		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3254,6 +3242,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	else
 		inject_pending_irq(vcpu, kvm_run);
 
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		kvm_x86_ops->enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		kvm_x86_ops->enable_irq_window(vcpu);
+
 	if (kvm_lapic_enabled(vcpu)) {
 		if (!vcpu->arch.apic->vapic_addr)
 			update_cr8_intercept(vcpu);
-- 
cgit v0.10.2


From 44c11430b52cbad0a467bc023a802d122dfd285c Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:52 +0300
Subject: KVM: inject NMI after IRET from a previous NMI, not before.

If NMI is received during handling of another NMI it should be injected
immediately after IRET from previous NMI handler, but SVM intercept IRET
before instruction execution so we can't inject pending NMI at this
point and there is not way to request exit when NMI window opens. This
patch fix SVM code to open NMI window after IRET by single stepping over
IRET instruction.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1d6c3f7..8212943 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
 	unsigned int time_offset;
 	struct page *time_page;
 
+	bool singlestep; /* guest is single stepped by KVM */
 	bool nmi_pending;
 	bool nmi_injected;
 
@@ -771,6 +772,7 @@ enum {
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 377c4f1..71510e0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -965,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
-	int old_debug = vcpu->guest_debug;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	vcpu->guest_debug = dbg->control;
-
 	svm->vmcb->control.intercept_exceptions &=
 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+	if (vcpu->arch.singlestep)
+		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
 		if (vcpu->guest_debug &
 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -984,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 				1 << BP_VECTOR;
 	} else
 		vcpu->guest_debug = 0;
+}
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	update_db_intercept(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -1133,14 +1144,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (!(svm->vcpu.guest_debug &
-	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+		!svm->vcpu.arch.singlestep) {
 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
 		return 1;
 	}
-	kvm_run->exit_reason = KVM_EXIT_DEBUG;
-	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
-	kvm_run->debug.arch.exception = DB_VECTOR;
-	return 0;
+
+	if (svm->vcpu.arch.singlestep) {
+		svm->vcpu.arch.singlestep = false;
+		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+			svm->vmcb->save.rflags &=
+				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+		update_db_intercept(&svm->vcpu);
+	}
+
+	if (svm->vcpu.guest_debug &
+	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		kvm_run->debug.arch.pc =
+			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		return 0;
+	}
+
+	return 1;
 }
 
 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1887,7 +1914,7 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.nmi_window_exits;
 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
-	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	svm->vcpu.arch.hflags |= HF_IRET_MASK;
 	return 1;
 }
 
@@ -2357,8 +2384,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
-		enable_irq_window(vcpu);
+	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+	    == HF_NMI_MASK)
+		return; /* IRET will cause a vm exit */
+
+	/* Something prevents NMI from been injected. Single step over
+	   possible problem (IRET or exception injection or interrupt
+	   shadow) */
+	vcpu->arch.singlestep = true;
+	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+	update_db_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2401,6 +2436,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	int type;
 	u32 exitintinfo = svm->vmcb->control.exit_int_info;
 
+	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
 	svm->vcpu.arch.nmi_injected = false;
 	kvm_clear_exception_queue(&svm->vcpu);
 	kvm_clear_interrupt_queue(&svm->vcpu);
-- 
cgit v0.10.2


From 36752c9b91f75aa3ff0f214a89f13d806cb2f61f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:53 +0300
Subject: KVM: Do not migrate pending software interrupts.

INTn will be re-executed after migration. If we wanted to migrate
pending software interrupt we would need to migrate interrupt type
and instruction length too, but we do not have all required info on
SVM, so SVM->VMX migration would need to re-execute INTn anyway. To
make it simple never migrate pending soft interrupt.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 199426c..beb806b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3579,7 +3579,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
 
-	if (vcpu->arch.interrupt.pending)
+	if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
 		set_bit(vcpu->arch.interrupt.nr,
 			(unsigned long *)sregs->interrupt_bitmap);
 
-- 
cgit v0.10.2


From 8db3baa2db34035b2ddb7d0e8b186eb92a056532 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:54 +0300
Subject: KVM: Disable CR8 intercept if tpr patching is active

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index beb806b..249540f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3142,7 +3142,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	if (!kvm_x86_ops->update_cr8_intercept)
 		return;
 
-	max_irr = kvm_lapic_find_highest_irr(vcpu);
+	if (!vcpu->arch.apic->vapic_addr)
+		max_irr = kvm_lapic_find_highest_irr(vcpu);
+	else
+		max_irr = -1;
 
 	if (max_irr != -1)
 		max_irr >>= 4;
@@ -3249,10 +3252,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_x86_ops->enable_irq_window(vcpu);
 
 	if (kvm_lapic_enabled(vcpu)) {
-		if (!vcpu->arch.apic->vapic_addr)
-			update_cr8_intercept(vcpu);
-		else
-			kvm_lapic_sync_to_vapic(vcpu);
+		update_cr8_intercept(vcpu);
+		kvm_lapic_sync_to_vapic(vcpu);
 	}
 
 	up_read(&vcpu->kvm->slots_lock);
-- 
cgit v0.10.2


From 20f65983e30f222e5383f77206e3f571d1d64610 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 11 May 2009 13:35:55 +0300
Subject: KVM: Move "exit due to NMI" handling into vmx_complete_interrupts()

To save us one reading of VM_EXIT_INTR_INFO.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 29b49f0..fe2ce2b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3261,8 +3261,17 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	int type;
 	bool idtv_info_valid;
 
-	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+	/* We need to handle NMIs before interrupts are enabled */
+	if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
+	    (exit_intr_info & INTR_INFO_VALID_MASK)) {
+		KVMTRACE_0D(NMI, &vmx->vcpu, handler);
+		asm("int $2");
+	}
+
+	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+
 	if (cpu_has_virtual_nmis()) {
 		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
 		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
@@ -3363,7 +3372,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 intr_info;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -3490,15 +3498,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
 
-	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
-	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-	    (intr_info & INTR_INFO_VALID_MASK)) {
-		KVMTRACE_0D(NMI, vcpu, handler);
-		asm("int $2");
-	}
-
 	vmx_complete_interrupts(vmx);
 }
 
-- 
cgit v0.10.2


From 56b237e31abf4d6dbc6e2a0214049b9a23be4883 Mon Sep 17 00:00:00 2001
From: Nitin A Kamble <nitin.a.kamble@intel.com>
Date: Thu, 4 Jun 2009 15:04:08 -0700
Subject: KVM: VMX: Rename rmode.active to rmode.vm86_active

That way the interpretation of rmode.active becomes more clear with
unrestricted guest code.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8212943..eabdc1c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -335,7 +335,7 @@ struct kvm_vcpu_arch {
 	} interrupt;
 
 	struct {
-		int active;
+		int vm86_active;
 		u8 save_iopl;
 		struct kvm_save_segment {
 			u16 selector;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fe2ce2b..c379a34 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -495,7 +495,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
 			eb |= 1u << BP_VECTOR;
 	}
-	if (vcpu->arch.rmode.active)
+	if (vcpu->arch.rmode.vm86_active)
 		eb = ~0;
 	if (enable_ept)
 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
@@ -731,7 +731,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	if (vcpu->arch.rmode.active)
+	if (vcpu->arch.rmode.vm86_active)
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
@@ -788,7 +788,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
 	}
 
-	if (vcpu->arch.rmode.active) {
+	if (vcpu->arch.rmode.vm86_active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = nr;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -1363,7 +1363,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	vmx->emulation_required = 1;
-	vcpu->arch.rmode.active = 0;
+	vcpu->arch.rmode.vm86_active = 0;
 
 	vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
 	vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
@@ -1425,7 +1425,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	vmx->emulation_required = 1;
-	vcpu->arch.rmode.active = 1;
+	vcpu->arch.rmode.vm86_active = 1;
 
 	vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1594,10 +1594,10 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	vmx_fpu_deactivate(vcpu);
 
-	if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
+	if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE))
 		enter_pmode(vcpu);
 
-	if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE))
+	if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE))
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
@@ -1655,7 +1655,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-	unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
+	unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ?
 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
 	vcpu->arch.cr4 = cr4;
@@ -1738,7 +1738,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
-	if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) {
+	if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) {
 		vcpu->arch.rmode.tr.selector = var->selector;
 		vcpu->arch.rmode.tr.base = var->base;
 		vcpu->arch.rmode.tr.limit = var->limit;
@@ -1748,7 +1748,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	vmcs_writel(sf->base, var->base);
 	vmcs_write32(sf->limit, var->limit);
 	vmcs_write16(sf->selector, var->selector);
-	if (vcpu->arch.rmode.active && var->s) {
+	if (vcpu->arch.rmode.vm86_active && var->s) {
 		/*
 		 * Hack real-mode segments into vm86 compatibility.
 		 */
@@ -2317,7 +2317,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	vmx->vcpu.arch.rmode.active = 0;
+	vmx->vcpu.arch.rmode.vm86_active = 0;
 
 	vmx->soft_vnmi_blocked = 0;
 
@@ -2455,7 +2455,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
 	++vcpu->stat.irq_injections;
-	if (vcpu->arch.rmode.active) {
+	if (vcpu->arch.rmode.vm86_active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = irq;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2493,7 +2493,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 	}
 
 	++vcpu->stat.nmi_injections;
-	if (vcpu->arch.rmode.active) {
+	if (vcpu->arch.rmode.vm86_active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = NMI_VECTOR;
 		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
@@ -2629,7 +2629,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
 
-	if (vcpu->arch.rmode.active &&
+	if (vcpu->arch.rmode.vm86_active &&
 	    handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
 								error_code)) {
 		if (vcpu->arch.halt_request) {
-- 
cgit v0.10.2


From a0861c02a981c943573478ea13b29b1fb958ee5b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 8 Jun 2009 17:37:09 +0800
Subject: KVM: Add VT-x machine check support

VT-x needs an explicit MC vector intercept to handle machine checks in the
hyper visor.

It also has a special option to catch machine checks that happen
during VT entry.

Do these interceptions and forward them to the Linux machine check
handler. Make it always look like user space is interrupted because
the machine check handler treats kernel/user space differently.

Thanks to Jiang Yunhong for help and testing.

Cc: stable@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498f944..11be5ad 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -247,6 +247,7 @@ enum vmcs_field {
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MCE_DURING_VMENTRY	 41
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
 #define EXIT_REASON_APIC_ACCESS         44
 #define EXIT_REASON_EPT_VIOLATION       48
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 09dd1d4..289cc48 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
  out2:
 	atomic_dec(&mce_entry);
 }
+EXPORT_SYMBOL_GPL(do_machine_check);
 
 #ifdef CONFIG_X86_MCE_INTEL
 /***
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c379a34..32d6ae8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -32,6 +32,7 @@
 #include <asm/desc.h>
 #include <asm/vmx.h>
 #include <asm/virtext.h>
+#include <asm/mce.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -97,6 +98,7 @@ struct vcpu_vmx {
 	int soft_vnmi_blocked;
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
+	u32 exit_reason;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -214,6 +216,13 @@ static inline int is_external_interrupt(u32 intr_info)
 		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
 }
 
+static inline int is_machine_check(u32 intr_info)
+{
+	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
+			     INTR_INFO_VALID_MASK)) ==
+		(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
+}
+
 static inline int cpu_has_vmx_msr_bitmap(void)
 {
 	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
@@ -485,7 +494,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
 	u32 eb;
 
-	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
 	if (!vcpu->fpu_active)
 		eb |= 1u << NM_VECTOR;
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -2582,6 +2591,31 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+/*
+ * Trigger machine check on the host. We assume all the MSRs are already set up
+ * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ * We pass a fake environment to the machine check handler because we want
+ * the guest to be always treated like user space, no matter what context
+ * it used internally.
+ */
+static void kvm_machine_check(void)
+{
+#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
+	struct pt_regs regs = {
+		.cs = 3, /* Fake ring 3 no matter what the guest ran on */
+		.flags = X86_EFLAGS_IF,
+	};
+
+	do_machine_check(&regs, 0);
+#endif
+}
+
+static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	/* already handled by vcpu_run */
+	return 1;
+}
+
 static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2593,6 +2627,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vect_info = vmx->idt_vectoring_info;
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
+	if (is_machine_check(intr_info))
+		return handle_machine_check(vcpu, kvm_run);
+
 	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
 						!is_page_fault(intr_info))
 		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
@@ -3166,6 +3203,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
+	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -3177,8 +3215,8 @@ static const int kvm_vmx_max_exit_handlers =
  */
 static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-	u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 exit_reason = vmx->exit_reason;
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
@@ -3263,6 +3301,14 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
 	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
+	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+
+	/* Handle machine checks before interrupts are enabled */
+	if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+	    || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
+		&& is_machine_check(exit_intr_info)))
+		kvm_machine_check();
+
 	/* We need to handle NMIs before interrupts are enabled */
 	if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
 	    (exit_intr_info & INTR_INFO_VALID_MASK)) {
-- 
cgit v0.10.2


From 003dec8913d6bebb4ecc989ec04a235cf38f5ea9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 10 Jun 2009 10:31:45 +0100
Subject: GFS2: Merge gfs2_get_sb into gfs2_get_sb_meta

These don't need to be separate functions.

Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 9da161c..f234aba 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1284,9 +1284,11 @@ static int set_meta_super(struct super_block *s, void *ptr)
 	return -EINVAL;
 }
 
-static struct super_block *get_gfs2_sb(const char *dev_name)
+static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	struct super_block *s;
+	struct gfs2_sbd *sdp;
 	struct path path;
 	int error;
 
@@ -1294,21 +1296,11 @@ static struct super_block *get_gfs2_sb(const char *dev_name)
 	if (error) {
 		printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
 		       dev_name, error);
-		return ERR_PTR(-ENOENT);
+		return error;
 	}
 	s = sget(&gfs2_fs_type, test_meta_super, set_meta_super,
 		 path.dentry->d_inode->i_sb->s_bdev);
 	path_put(&path);
-	return s;
-}
-
-static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
-			    const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	struct super_block *s;
-	struct gfs2_sbd *sdp;
-
-	s = get_gfs2_sb(dev_name);
 	if (IS_ERR(s)) {
 		printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
 		return PTR_ERR(s);
-- 
cgit v0.10.2


From 04dce7d9d429ea5ea04e9432d1726c930f4d67da Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 10 Jun 2009 16:59:46 +1000
Subject: spinlock: Add missing __raw_spin_lock_flags() stub for UP

This was only defined with CONFIG_DEBUG_SPINLOCK set, but some
obscure arch/powerpc code wants it always.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 938234c..d4841ed 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 #define __raw_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
 # define __raw_spin_lock(lock)		do { (void)(lock); } while (0)
+# define __raw_spin_lock_flags(lock, flags)	do { (void)(lock); } while (0)
 # define __raw_spin_unlock(lock)	do { (void)(lock); } while (0)
 # define __raw_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
-- 
cgit v0.10.2


From f1db457ce6e2f63cb01022f58c0c023838958bd1 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 10 Jun 2009 10:06:24 +0800
Subject: tracing/events: convert block trace points to TRACE_EVENT(), fix
 !CONFIG_BLOCK

Fix building failures when CONFIG_BLOCK == n.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2F1520.8020003@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index c7ec31d..7e4350e 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -218,7 +218,7 @@ static inline int blk_trace_init_sysfs(struct device *dev)
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
-#ifdef CONFIG_EVENT_TRACING
+#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
 
 static inline int blk_cmd_buf_len(struct request *rq)
 {
@@ -229,7 +229,7 @@ extern void blk_dump_cmd(char *buf, struct request *rq);
 extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
 extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
 
-#endif /* CONFIG_EVENT_TRACING */
+#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
 
 #endif /* __KERNEL__ */
 #endif
-- 
cgit v0.10.2


From 6bc1096d7ab3621b3ffcf06616d1f4e0325d903d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 22 May 2009 12:12:01 +0200
Subject: x86: MSR: add a struct representation of an MSR

Add a struct representing a 64bit MSR pair consisting of a low and high
register part and convert msr_info to use it. Also, rename msr-on-cpu.c
to msr.c.

Side note: Put the cpumask.h include in __KERNEL__ space thus fixing an
allmodconfig build failure in the headers_check target.

CC: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 638bf62..5e12132 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -12,6 +12,17 @@
 
 #include <asm/asm.h>
 #include <asm/errno.h>
+#include <asm/cpumask.h>
+
+struct msr {
+	union {
+		struct {
+			u32 l;
+			u32 h;
+		};
+		u64 q;
+	};
+};
 
 static inline unsigned long long native_read_tscp(unsigned int *aux)
 {
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 55e11aa..f9d3563 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,7 +2,7 @@
 # Makefile for x86 specific library files.
 #
 
-obj-$(CONFIG_SMP) := msr-on-cpu.o
+obj-$(CONFIG_SMP) := msr.o
 
 lib-y := delay.o
 lib-y += thunk_$(BITS).o
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
deleted file mode 100644
index 321cf72..0000000
--- a/arch/x86/lib/msr-on-cpu.c
+++ /dev/null
@@ -1,97 +0,0 @@
-#include <linux/module.h>
-#include <linux/preempt.h>
-#include <linux/smp.h>
-#include <asm/msr.h>
-
-struct msr_info {
-	u32 msr_no;
-	u32 l, h;
-	int err;
-};
-
-static void __rdmsr_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	rdmsr(rv->msr_no, rv->l, rv->h);
-}
-
-static void __wrmsr_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	wrmsr(rv->msr_no, rv->l, rv->h);
-}
-
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
-	*l = rv.l;
-	*h = rv.h;
-
-	return err;
-}
-
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	rv.l = l;
-	rv.h = h;
-	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-
-	return err;
-}
-
-/* These "safe" variants are slower and should be used when the target MSR
-   may not actually exist. */
-static void __rdmsr_safe_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
-}
-
-static void __wrmsr_safe_on_cpu(void *info)
-{
-	struct msr_info *rv = info;
-
-	rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
-}
-
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
-	*l = rv.l;
-	*h = rv.h;
-
-	return err ? err : rv.err;
-}
-
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	int err;
-	struct msr_info rv;
-
-	rv.msr_no = msr_no;
-	rv.l = l;
-	rv.h = h;
-	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
-
-	return err ? err : rv.err;
-}
-
-EXPORT_SYMBOL(rdmsr_on_cpu);
-EXPORT_SYMBOL(wrmsr_on_cpu);
-EXPORT_SYMBOL(rdmsr_safe_on_cpu);
-EXPORT_SYMBOL(wrmsr_safe_on_cpu);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
new file mode 100644
index 0000000..cade714
--- /dev/null
+++ b/arch/x86/lib/msr.c
@@ -0,0 +1,97 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+
+struct msr_info {
+	u32 msr_no;
+	struct msr reg;
+	int err;
+};
+
+static void __rdmsr_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	rdmsr(rv->msr_no, rv->reg.l, rv->reg.h);
+}
+
+static void __wrmsr_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	wrmsr(rv->msr_no, rv->reg.l, rv->reg.h);
+}
+
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+	*l = rv.reg.l;
+	*h = rv.reg.h;
+
+	return err;
+}
+
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	rv.reg.l = l;
+	rv.reg.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+	return err;
+}
+
+/* These "safe" variants are slower and should be used when the target MSR
+   may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+}
+
+static void __wrmsr_safe_on_cpu(void *info)
+{
+	struct msr_info *rv = info;
+
+	rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
+}
+
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+	*l = rv.reg.l;
+	*h = rv.reg.h;
+
+	return err ? err : rv.err;
+}
+
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	rv.reg.l = l;
+	rv.reg.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+	return err ? err : rv.err;
+}
+
+EXPORT_SYMBOL(rdmsr_on_cpu);
+EXPORT_SYMBOL(wrmsr_on_cpu);
+EXPORT_SYMBOL(rdmsr_safe_on_cpu);
+EXPORT_SYMBOL(wrmsr_safe_on_cpu);
-- 
cgit v0.10.2


From b034c19f9f61c8b6f2435aa2e77f52348ebde767 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 22 May 2009 13:52:19 +0200
Subject: x86: MSR: add methods for writing of an MSR on several CPUs

Provide for concurrent MSR writes on all the CPUs in the cpumask. Also,
add a temporary workaround for smp_call_function_many which skips the
CPU we're executing on.

Bart: zero out rv struct which is allocated on stack.

CC: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 5e12132..2260376 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -227,6 +227,8 @@ do {                                                            \
 #ifdef CONFIG_SMP
 int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
+void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs);
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
 #else  /*  CONFIG_SMP  */
@@ -240,6 +242,16 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 	wrmsr(msr_no, l, h);
 	return 0;
 }
+static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+				struct msr *msrs)
+{
+       rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+}
+static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no,
+				struct msr *msrs)
+{
+       wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+}
 static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
 				    u32 *l, u32 *h)
 {
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index cade714..1440b9c 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -6,21 +6,37 @@
 struct msr_info {
 	u32 msr_no;
 	struct msr reg;
+	struct msr *msrs;
+	int off;
 	int err;
 };
 
 static void __rdmsr_on_cpu(void *info)
 {
 	struct msr_info *rv = info;
+	struct msr *reg;
+	int this_cpu = raw_smp_processor_id();
 
-	rdmsr(rv->msr_no, rv->reg.l, rv->reg.h);
+	if (rv->msrs)
+		reg = &rv->msrs[this_cpu - rv->off];
+	else
+		reg = &rv->reg;
+
+	rdmsr(rv->msr_no, reg->l, reg->h);
 }
 
 static void __wrmsr_on_cpu(void *info)
 {
 	struct msr_info *rv = info;
+	struct msr *reg;
+	int this_cpu = raw_smp_processor_id();
+
+	if (rv->msrs)
+		reg = &rv->msrs[this_cpu - rv->off];
+	else
+		reg = &rv->reg;
 
-	wrmsr(rv->msr_no, rv->reg.l, rv->reg.h);
+	wrmsr(rv->msr_no, reg->l, reg->h);
 }
 
 int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
@@ -28,6 +44,8 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 	int err;
 	struct msr_info rv;
 
+	memset(&rv, 0, sizeof(rv));
+
 	rv.msr_no = msr_no;
 	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
 	*l = rv.reg.l;
@@ -35,12 +53,15 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 
 	return err;
 }
+EXPORT_SYMBOL(rdmsr_on_cpu);
 
 int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 {
 	int err;
 	struct msr_info rv;
 
+	memset(&rv, 0, sizeof(rv));
+
 	rv.msr_no = msr_no;
 	rv.reg.l = l;
 	rv.reg.h = h;
@@ -48,6 +69,70 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 
 	return err;
 }
+EXPORT_SYMBOL(wrmsr_on_cpu);
+
+/* rdmsr on a bunch of CPUs
+ *
+ * @mask:       which CPUs
+ * @msr_no:     which MSR
+ * @msrs:       array of MSR values
+ *
+ */
+void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
+{
+	struct msr_info rv;
+	int this_cpu;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.off    = cpumask_first(mask);
+	rv.msrs	  = msrs;
+	rv.msr_no = msr_no;
+
+	preempt_disable();
+	/*
+	 * FIXME: handle the CPU we're executing on separately for now until
+	 * smp_call_function_many has been fixed to not skip it.
+	 */
+	this_cpu = raw_smp_processor_id();
+	smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1);
+
+	smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1);
+	preempt_enable();
+}
+EXPORT_SYMBOL(rdmsr_on_cpus);
+
+/*
+ * wrmsr on a bunch of CPUs
+ *
+ * @mask:       which CPUs
+ * @msr_no:     which MSR
+ * @msrs:       array of MSR values
+ *
+ */
+void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
+{
+	struct msr_info rv;
+	int this_cpu;
+
+	memset(&rv, 0, sizeof(rv));
+
+	rv.off    = cpumask_first(mask);
+	rv.msrs   = msrs;
+	rv.msr_no = msr_no;
+
+	preempt_disable();
+	/*
+	 * FIXME: handle the CPU we're executing on separately for now until
+	 * smp_call_function_many has been fixed to not skip it.
+	 */
+	this_cpu = raw_smp_processor_id();
+	smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1);
+
+	smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1);
+	preempt_enable();
+}
+EXPORT_SYMBOL(wrmsr_on_cpus);
 
 /* These "safe" variants are slower and should be used when the target MSR
    may not actually exist. */
@@ -70,6 +155,8 @@ int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 	int err;
 	struct msr_info rv;
 
+	memset(&rv, 0, sizeof(rv));
+
 	rv.msr_no = msr_no;
 	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
 	*l = rv.reg.l;
@@ -77,12 +164,15 @@ int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 
 	return err ? err : rv.err;
 }
+EXPORT_SYMBOL(rdmsr_safe_on_cpu);
 
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 {
 	int err;
 	struct msr_info rv;
 
+	memset(&rv, 0, sizeof(rv));
+
 	rv.msr_no = msr_no;
 	rv.reg.l = l;
 	rv.reg.h = h;
@@ -90,8 +180,4 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 
 	return err ? err : rv.err;
 }
-
-EXPORT_SYMBOL(rdmsr_on_cpu);
-EXPORT_SYMBOL(wrmsr_on_cpu);
-EXPORT_SYMBOL(rdmsr_safe_on_cpu);
 EXPORT_SYMBOL(wrmsr_safe_on_cpu);
-- 
cgit v0.10.2


From d357cbb445208ea0c33b268e08a65e53fdbb5e86 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 14 May 2009 17:49:28 +0200
Subject: edac: fold __func__ into edac_debug_printk

This shortens debugfX() calls a bit.

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
CC: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 6ad95c8..48d3b14 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -76,10 +76,11 @@
 extern int edac_debug_level;
 
 #ifndef CONFIG_EDAC_DEBUG_VERBOSE
-#define edac_debug_printk(level, fmt, arg...)                            \
-	do {                                                             \
-		if (level <= edac_debug_level)                           \
-			edac_printk(KERN_DEBUG, EDAC_DEBUG, fmt, ##arg); \
+#define edac_debug_printk(level, fmt, arg...)                           \
+	do {                                                            \
+		if (level <= edac_debug_level)                          \
+			edac_printk(KERN_DEBUG, EDAC_DEBUG,		\
+				    "%s: " fmt, __func__, ##arg);	\
 	} while (0)
 #else  /* CONFIG_EDAC_DEBUG_VERBOSE */
 #define edac_debug_printk(level, fmt, arg...)                            \
-- 
cgit v0.10.2


From cfe40fdb4a46a68e45fa9a5ecbe588e94b89b4f3 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 4 May 2009 19:25:34 +0200
Subject: amd64_edac: add driver header

Borislav:
- remove register bit descriptions (complete text in BKDG)
- cleanup and remove excessive/superfluous comments

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
new file mode 100644
index 0000000..6f5d5d6
--- /dev/null
+++ b/drivers/edac/amd64_edac.h
@@ -0,0 +1,628 @@
+/*
+ * AMD64 class Memory Controller kernel module
+ *
+ * Copyright (c) 2009 SoftwareBitMaker.
+ * Copyright (c) 2009 Advanced Micro Devices, Inc.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ *	Originally Written by Thayne Harbaugh
+ *
+ *      Changes by Douglas "norsk" Thompson  <dougthompson@xmission.com>:
+ *		- K8 CPU Revision D and greater support
+ *
+ *      Changes by Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>:
+ *		- Module largely rewritten, with new (and hopefully correct)
+ *		code for dealing with node and chip select interleaving,
+ *		various code cleanup, and bug fixes
+ *		- Added support for memory hoisting using DRAM hole address
+ *		register
+ *
+ *	Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
+ *		-K8 Rev (1207) revision support added, required Revision
+ *		specific mini-driver code to support Rev F as well as
+ *		prior revisions
+ *
+ *	Changes by Douglas "norsk" Thompson <dougthompson@xmission.com>:
+ *		-Family 10h revision support added. New PCI Device IDs,
+ *		indicating new changes. Actual registers modified
+ *		were slight, less than the Rev E to Rev F transition
+ *		but changing the PCI Device ID was the proper thing to
+ *		do, as it provides for almost automactic family
+ *		detection. The mods to Rev F required more family
+ *		information detection.
+ *
+ *	Changes/Fixes by Borislav Petkov <borislav.petkov@amd.com>:
+ *		- misc fixes and code cleanups
+ *
+ * This module is based on the following documents
+ * (available from http://www.amd.com/):
+ *
+ *	Title:	BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
+ *		Opteron Processors
+ *	AMD publication #: 26094
+ *`	Revision: 3.26
+ *
+ *	Title:	BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
+ *		Processors
+ *	AMD publication #: 32559
+ *	Revision: 3.00
+ *	Issue Date: May 2006
+ *
+ *	Title:	BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
+ *		Processors
+ *	AMD publication #: 31116
+ *	Revision: 3.00
+ *	Issue Date: September 07, 2007
+ *
+ * Sections in the first 2 documents are no longer in sync with each other.
+ * The Family 10h BKDG was totally re-written from scratch with a new
+ * presentation model.
+ * Therefore, comments that refer to a Document section might be off.
+ */
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/edac.h>
+#include "edac_core.h"
+
+#define amd64_printk(level, fmt, arg...) \
+	edac_printk(level, "amd64", fmt, ##arg)
+
+#define amd64_mc_printk(mci, level, fmt, arg...) \
+	edac_mc_chipset_printk(mci, level, "amd64", fmt, ##arg)
+
+/*
+ * Throughout the comments in this code, the following terms are used:
+ *
+ *	SysAddr, DramAddr, and InputAddr
+ *
+ *  These terms come directly from the amd64 documentation
+ * (AMD publication #26094).  They are defined as follows:
+ *
+ *     SysAddr:
+ *         This is a physical address generated by a CPU core or a device
+ *         doing DMA.  If generated by a CPU core, a SysAddr is the result of
+ *         a virtual to physical address translation by the CPU core's address
+ *         translation mechanism (MMU).
+ *
+ *     DramAddr:
+ *         A DramAddr is derived from a SysAddr by subtracting an offset that
+ *         depends on which node the SysAddr maps to and whether the SysAddr
+ *         is within a range affected by memory hoisting.  The DRAM Base
+ *         (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers
+ *         determine which node a SysAddr maps to.
+ *
+ *         If the DRAM Hole Address Register (DHAR) is enabled and the SysAddr
+ *         is within the range of addresses specified by this register, then
+ *         a value x from the DHAR is subtracted from the SysAddr to produce a
+ *         DramAddr.  Here, x represents the base address for the node that
+ *         the SysAddr maps to plus an offset due to memory hoisting.  See
+ *         section 3.4.8 and the comments in amd64_get_dram_hole_info() and
+ *         sys_addr_to_dram_addr() below for more information.
+ *
+ *         If the SysAddr is not affected by the DHAR then a value y is
+ *         subtracted from the SysAddr to produce a DramAddr.  Here, y is the
+ *         base address for the node that the SysAddr maps to.  See section
+ *         3.4.4 and the comments in sys_addr_to_dram_addr() below for more
+ *         information.
+ *
+ *     InputAddr:
+ *         A DramAddr is translated to an InputAddr before being passed to the
+ *         memory controller for the node that the DramAddr is associated
+ *         with.  The memory controller then maps the InputAddr to a csrow.
+ *         If node interleaving is not in use, then the InputAddr has the same
+ *         value as the DramAddr.  Otherwise, the InputAddr is produced by
+ *         discarding the bits used for node interleaving from the DramAddr.
+ *         See section 3.4.4 for more information.
+ *
+ *         The memory controller for a given node uses its DRAM CS Base and
+ *         DRAM CS Mask registers to map an InputAddr to a csrow.  See
+ *         sections 3.5.4 and 3.5.5 for more information.
+ */
+
+#define EDAC_AMD64_VERSION		" Ver: 3.2.0 " __DATE__
+#define EDAC_MOD_STR			"amd64_edac"
+
+/* Extended Model from CPUID, for CPU Revision numbers */
+#define OPTERON_CPU_LE_REV_C		0
+#define OPTERON_CPU_REV_D		1
+#define OPTERON_CPU_REV_E		2
+
+/* NPT processors have the following Extended Models */
+#define OPTERON_CPU_REV_F		4
+#define OPTERON_CPU_REV_FA		5
+
+/* Hardware limit on ChipSelect rows per MC and processors per system */
+#define CHIPSELECT_COUNT		8
+#define DRAM_REG_COUNT			8
+
+
+/*
+ * PCI-defined configuration space registers
+ */
+
+
+/*
+ * Function 1 - Address Map
+ */
+#define K8_DRAM_BASE_LOW		0x40
+#define K8_DRAM_LIMIT_LOW		0x44
+#define K8_DHAR				0xf0
+
+#define DHAR_VALID			BIT(0)
+#define F10_DRAM_MEM_HOIST_VALID	BIT(1)
+
+#define DHAR_BASE_MASK			0xff000000
+#define dhar_base(dhar)			(dhar & DHAR_BASE_MASK)
+
+#define K8_DHAR_OFFSET_MASK		0x0000ff00
+#define k8_dhar_offset(dhar)		((dhar & K8_DHAR_OFFSET_MASK) << 16)
+
+#define F10_DHAR_OFFSET_MASK		0x0000ff80
+					/* NOTE: Extra mask bit vs K8 */
+#define f10_dhar_offset(dhar)		((dhar & F10_DHAR_OFFSET_MASK) << 16)
+
+
+/* F10 High BASE/LIMIT registers */
+#define F10_DRAM_BASE_HIGH		0x140
+#define F10_DRAM_LIMIT_HIGH		0x144
+
+
+/*
+ * Function 2 - DRAM controller
+ */
+#define K8_DCSB0			0x40
+#define F10_DCSB1			0x140
+
+#define K8_DCSB_CS_ENABLE		BIT(0)
+#define K8_DCSB_NPT_SPARE		BIT(1)
+#define K8_DCSB_NPT_TESTFAIL		BIT(2)
+
+/*
+ * REV E: select [31:21] and [15:9] from DCSB and the shift amount to form
+ * the address
+ */
+#define REV_E_DCSB_BASE_BITS		(0xFFE0FE00ULL)
+#define REV_E_DCS_SHIFT			4
+#define REV_E_DCSM_COUNT		8
+
+#define REV_F_F1Xh_DCSB_BASE_BITS	(0x1FF83FE0ULL)
+#define REV_F_F1Xh_DCS_SHIFT		8
+
+/*
+ * REV F and later: selects [28:19] and [13:5] from DCSB and the shift amount
+ * to form the address
+ */
+#define REV_F_DCSB_BASE_BITS		(0x1FF83FE0ULL)
+#define REV_F_DCS_SHIFT			8
+#define REV_F_DCSM_COUNT		4
+#define F10_DCSM_COUNT			4
+#define F11_DCSM_COUNT			2
+
+/* DRAM CS Mask Registers */
+#define K8_DCSM0			0x60
+#define F10_DCSM1			0x160
+
+/* REV E: select [29:21] and [15:9] from DCSM */
+#define REV_E_DCSM_MASK_BITS		0x3FE0FE00
+
+/* unused bits [24:20] and [12:0] */
+#define REV_E_DCS_NOTUSED_BITS		0x01F01FFF
+
+/* REV F and later: select [28:19] and [13:5] from DCSM */
+#define REV_F_F1Xh_DCSM_MASK_BITS	0x1FF83FE0
+
+/* unused bits [26:22] and [12:0] */
+#define REV_F_F1Xh_DCS_NOTUSED_BITS	0x07C01FFF
+
+#define DBAM0				0x80
+#define DBAM1				0x180
+
+/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
+#define DBAM_DIMM(i, reg)		((((reg) >> (4*i))) & 0xF)
+
+#define DBAM_MAX_VALUE			11
+
+
+#define F10_DCLR_0			0x90
+#define F10_DCLR_1			0x190
+#define REVE_WIDTH_128			BIT(16)
+#define F10_WIDTH_128			BIT(11)
+
+
+#define F10_DCHR_0			0x94
+#define F10_DCHR_1			0x194
+
+#define F10_DCHR_FOUR_RANK_DIMM		BIT(18)
+#define F10_DCHR_Ddr3Mode		BIT(8)
+#define F10_DCHR_MblMode		BIT(6)
+
+
+#define F10_DCTL_SEL_LOW		0x110
+
+#define dct_sel_baseaddr(pvt)    \
+	((pvt->dram_ctl_select_low) & 0xFFFFF800)
+
+#define dct_sel_interleave_addr(pvt)    \
+	(((pvt->dram_ctl_select_low) >> 6) & 0x3)
+
+enum {
+	F10_DCTL_SEL_LOW_DctSelHiRngEn	= BIT(0),
+	F10_DCTL_SEL_LOW_DctSelIntLvEn	= BIT(2),
+	F10_DCTL_SEL_LOW_DctGangEn	= BIT(4),
+	F10_DCTL_SEL_LOW_DctDatIntLv	= BIT(5),
+	F10_DCTL_SEL_LOW_DramEnable	= BIT(8),
+	F10_DCTL_SEL_LOW_MemCleared	= BIT(10),
+};
+
+#define    dct_high_range_enabled(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelHiRngEn)
+
+#define dct_interleave_enabled(pvt)	   \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelIntLvEn)
+
+#define dct_ganging_enabled(pvt)        \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctGangEn)
+
+#define dct_data_intlv_enabled(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctDatIntLv)
+
+#define dct_dram_enabled(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DramEnable)
+
+#define dct_memory_cleared(pvt)    \
+	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_MemCleared)
+
+
+#define F10_DCTL_SEL_HIGH		0x114
+
+
+/*
+ * Function 3 - Misc Control
+ */
+#define K8_NBCTL			0x40
+
+/* Correctable ECC error reporting enable */
+#define K8_NBCTL_CECCEn			BIT(0)
+
+/* UnCorrectable ECC error reporting enable */
+#define K8_NBCTL_UECCEn			BIT(1)
+
+#define K8_NBCFG			0x44
+#define K8_NBCFG_CHIPKILL		BIT(23)
+#define K8_NBCFG_ECC_ENABLE		BIT(22)
+
+#define K8_NBSL				0x48
+
+
+#define EXTRACT_HIGH_SYNDROME(x)	(((x) >> 24) & 0xff)
+#define EXTRACT_EXT_ERROR_CODE(x)	(((x) >> 16) & 0x1f)
+
+/* Family F10h: Normalized Extended Error Codes */
+#define F10_NBSL_EXT_ERR_RES		0x0
+#define F10_NBSL_EXT_ERR_CRC		0x1
+#define F10_NBSL_EXT_ERR_SYNC		0x2
+#define F10_NBSL_EXT_ERR_MST		0x3
+#define F10_NBSL_EXT_ERR_TGT		0x4
+#define F10_NBSL_EXT_ERR_GART		0x5
+#define F10_NBSL_EXT_ERR_RMW		0x6
+#define F10_NBSL_EXT_ERR_WDT		0x7
+#define F10_NBSL_EXT_ERR_ECC		0x8
+#define F10_NBSL_EXT_ERR_DEV		0x9
+#define F10_NBSL_EXT_ERR_LINK_DATA	0xA
+
+/* Next two are overloaded values */
+#define F10_NBSL_EXT_ERR_LINK_PROTO	0xB
+#define F10_NBSL_EXT_ERR_L3_PROTO	0xB
+
+#define F10_NBSL_EXT_ERR_NB_ARRAY	0xC
+#define F10_NBSL_EXT_ERR_DRAM_PARITY	0xD
+#define F10_NBSL_EXT_ERR_LINK_RETRY	0xE
+
+/* Next two are overloaded values */
+#define F10_NBSL_EXT_ERR_GART_WALK	0xF
+#define F10_NBSL_EXT_ERR_DEV_WALK	0xF
+
+/* 0x10 to 0x1B: Reserved */
+#define F10_NBSL_EXT_ERR_L3_DATA	0x1C
+#define F10_NBSL_EXT_ERR_L3_TAG		0x1D
+#define F10_NBSL_EXT_ERR_L3_LRU		0x1E
+
+/* K8: Normalized Extended Error Codes */
+#define K8_NBSL_EXT_ERR_ECC		0x0
+#define K8_NBSL_EXT_ERR_CRC		0x1
+#define K8_NBSL_EXT_ERR_SYNC		0x2
+#define K8_NBSL_EXT_ERR_MST		0x3
+#define K8_NBSL_EXT_ERR_TGT		0x4
+#define K8_NBSL_EXT_ERR_GART		0x5
+#define K8_NBSL_EXT_ERR_RMW		0x6
+#define K8_NBSL_EXT_ERR_WDT		0x7
+#define K8_NBSL_EXT_ERR_CHIPKILL_ECC	0x8
+#define K8_NBSL_EXT_ERR_DRAM_PARITY	0xD
+
+#define EXTRACT_ERROR_CODE(x)		((x) & 0xffff)
+#define	TEST_TLB_ERROR(x)		(((x) & 0xFFF0) == 0x0010)
+#define	TEST_MEM_ERROR(x)		(((x) & 0xFF00) == 0x0100)
+#define	TEST_BUS_ERROR(x)		(((x) & 0xF800) == 0x0800)
+#define	EXTRACT_TT_CODE(x)		(((x) >> 2) & 0x3)
+#define	EXTRACT_II_CODE(x)		(((x) >> 2) & 0x3)
+#define	EXTRACT_LL_CODE(x)		(((x) >> 0) & 0x3)
+#define	EXTRACT_RRRR_CODE(x)		(((x) >> 4) & 0xf)
+#define	EXTRACT_TO_CODE(x)		(((x) >> 8) & 0x1)
+#define	EXTRACT_PP_CODE(x)		(((x) >> 9) & 0x3)
+
+/*
+ * The following are for BUS type errors AFTER values have been normalized by
+ * shifting right
+ */
+#define K8_NBSL_PP_SRC			0x0
+#define K8_NBSL_PP_RES			0x1
+#define K8_NBSL_PP_OBS			0x2
+#define K8_NBSL_PP_GENERIC		0x3
+
+
+#define K8_NBSH				0x4C
+
+#define K8_NBSH_VALID_BIT		BIT(31)
+#define K8_NBSH_OVERFLOW		BIT(30)
+#define K8_NBSH_UNCORRECTED_ERR		BIT(29)
+#define K8_NBSH_ERR_ENABLE		BIT(28)
+#define K8_NBSH_MISC_ERR_VALID		BIT(27)
+#define K8_NBSH_VALID_ERROR_ADDR	BIT(26)
+#define K8_NBSH_PCC			BIT(25)
+#define K8_NBSH_CECC			BIT(14)
+#define K8_NBSH_UECC			BIT(13)
+#define K8_NBSH_ERR_SCRUBER		BIT(8)
+#define K8_NBSH_CORE3			BIT(3)
+#define K8_NBSH_CORE2			BIT(2)
+#define K8_NBSH_CORE1			BIT(1)
+#define K8_NBSH_CORE0			BIT(0)
+
+#define EXTRACT_LDT_LINK(x)		(((x) >> 4) & 0x7)
+#define EXTRACT_ERR_CPU_MAP(x)		((x) & 0xF)
+#define EXTRACT_LOW_SYNDROME(x)		(((x) >> 15) & 0xff)
+
+
+#define K8_NBEAL			0x50
+#define K8_NBEAH			0x54
+#define K8_SCRCTRL			0x58
+
+#define F10_NB_CFG_LOW			0x88
+#define	F10_NB_CFG_LOW_ENABLE_EXT_CFG	BIT(14)
+
+#define F10_NB_CFG_HIGH			0x8C
+
+#define F10_ONLINE_SPARE		0xB0
+#define F10_ONLINE_SPARE_SWAPDONE0(x)	((x) & BIT(1))
+#define F10_ONLINE_SPARE_SWAPDONE1(x)	((x) & BIT(3))
+#define F10_ONLINE_SPARE_BADDRAM_CS0(x) (((x) >> 4) & 0x00000007)
+#define F10_ONLINE_SPARE_BADDRAM_CS1(x) (((x) >> 8) & 0x00000007)
+
+#define F10_NB_ARRAY_ADDR		0xB8
+
+#define F10_NB_ARRAY_DRAM_ECC		0x80000000
+
+/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline  */
+#define SET_NB_ARRAY_ADDRESS(section)	(((section) & 0x3) << 1)
+
+#define F10_NB_ARRAY_DATA		0xBC
+
+#define SET_NB_DRAM_INJECTION_WRITE(word, bits)  \
+					(BIT(((word) & 0xF) + 20) | \
+					BIT(17) |  \
+					((bits) & 0xF))
+
+#define SET_NB_DRAM_INJECTION_READ(word, bits)  \
+					(BIT(((word) & 0xF) + 20) | \
+					BIT(16) |  \
+					((bits) & 0xF))
+
+#define K8_NBCAP			0xE8
+#define K8_NBCAP_CORES			(BIT(12)|BIT(13))
+#define K8_NBCAP_CHIPKILL		BIT(4)
+#define K8_NBCAP_SECDED			BIT(3)
+#define K8_NBCAP_8_NODE			BIT(2)
+#define K8_NBCAP_DUAL_NODE		BIT(1)
+#define K8_NBCAP_DCT_DUAL		BIT(0)
+
+/*
+ * MSR Regs
+ */
+#define K8_MSR_MCGCTL			0x017b
+#define K8_MSR_MCGCTL_NBE		BIT(4)
+
+#define K8_MSR_MC4CTL			0x0410
+#define K8_MSR_MC4STAT			0x0411
+#define K8_MSR_MC4ADDR			0x0412
+
+/* AMD sets the first MC device at device ID 0x18. */
+static inline int get_mc_node_id_from_pdev(struct pci_dev *pdev)
+{
+	return PCI_SLOT(pdev->devfn) - 0x18;
+}
+
+enum amd64_chipset_families {
+	K8_CPUS = 0,
+	F10_CPUS,
+	F11_CPUS,
+};
+
+/*
+ * Structure to hold:
+ *
+ * 1) dynamically read status and error address HW registers
+ * 2) sysfs entered values
+ * 3) MCE values
+ *
+ * Depends on entry into the modules
+ */
+struct amd64_error_info_regs {
+	u32 nbcfg;
+	u32 nbsh;
+	u32 nbsl;
+	u32 nbeah;
+	u32 nbeal;
+};
+
+/* Error injection control structure */
+struct error_injection {
+	u32	section;
+	u32	word;
+	u32	bit_map;
+};
+
+struct amd64_pvt {
+	/* pci_device handles which we utilize */
+	struct pci_dev *addr_f1_ctl;
+	struct pci_dev *dram_f2_ctl;
+	struct pci_dev *misc_f3_ctl;
+
+	int mc_node_id;		/* MC index of this MC node */
+	int ext_model;		/* extended model value of this node */
+
+	struct low_ops *ops;	/* pointer to per PCI Device ID func table */
+
+	int channel_count;
+
+	/* Raw registers */
+	u32 dclr0;		/* DRAM Configuration Low DCT0 reg */
+	u32 dclr1;		/* DRAM Configuration Low DCT1 reg */
+	u32 dchr0;		/* DRAM Configuration High DCT0 reg */
+	u32 dchr1;		/* DRAM Configuration High DCT1 reg */
+	u32 nbcap;		/* North Bridge Capabilities */
+	u32 nbcfg;		/* F10 North Bridge Configuration */
+	u32 ext_nbcfg;		/* Extended F10 North Bridge Configuration */
+	u32 dhar;		/* DRAM Hoist reg */
+	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
+	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
+
+	/* DRAM CS Base Address Registers F2x[1,0][5C:40] */
+	u32 dcsb0[CHIPSELECT_COUNT];
+	u32 dcsb1[CHIPSELECT_COUNT];
+
+	/* DRAM CS Mask Registers F2x[1,0][6C:60] */
+	u32 dcsm0[CHIPSELECT_COUNT];
+	u32 dcsm1[CHIPSELECT_COUNT];
+
+	/*
+	 * Decoded parts of DRAM BASE and LIMIT Registers
+	 * F1x[78,70,68,60,58,50,48,40]
+	 */
+	u64 dram_base[DRAM_REG_COUNT];
+	u64 dram_limit[DRAM_REG_COUNT];
+	u8  dram_IntlvSel[DRAM_REG_COUNT];
+	u8  dram_IntlvEn[DRAM_REG_COUNT];
+	u8  dram_DstNode[DRAM_REG_COUNT];
+	u8  dram_rw_en[DRAM_REG_COUNT];
+
+	/*
+	 * The following fields are set at (load) run time, after CPU revision
+	 * has been determined, since the dct_base and dct_mask registers vary
+	 * based on revision
+	 */
+	u32 dcsb_base;		/* DCSB base bits */
+	u32 dcsm_mask;		/* DCSM mask bits */
+	u32 num_dcsm;		/* Number of DCSM registers */
+	u32 dcs_mask_notused;	/* DCSM notused mask bits */
+	u32 dcs_shift;		/* DCSB and DCSM shift value */
+
+	u64 top_mem;		/* top of memory below 4GB */
+	u64 top_mem2;		/* top of memory above 4GB */
+
+	u32 dram_ctl_select_low;	/* DRAM Controller Select Low Reg */
+	u32 dram_ctl_select_high;	/* DRAM Controller Select High Reg */
+	u32 online_spare;               /* On-Line spare Reg */
+
+	/* temp storage for when input is received from sysfs */
+	struct amd64_error_info_regs ctl_error_info;
+
+	/* place to store error injection parameters prior to issue */
+	struct error_injection injection;
+
+	/* Save old hw registers' values before we modified them */
+	u32 nbctl_mcgctl_saved;		/* When true, following 2 are valid */
+	u32 old_nbctl;
+	u32 *old_mcgctl;		/* per core on this node */
+
+	/* MC Type Index value: socket F vs Family 10h */
+	u32 mc_type_index;
+
+	/* misc settings */
+	struct flags {
+		unsigned long cf8_extcfg:1;
+	} flags;
+};
+
+struct scrubrate {
+       u32 scrubval;           /* bit pattern for scrub rate */
+       u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
+};
+
+extern struct scrubrate scrubrates[23];
+extern u32 revf_quad_ddr2_shift[16];
+extern const char *tt_msgs[4];
+extern const char *ll_msgs[4];
+extern const char *rrrr_msgs[16];
+extern const char *to_msgs[2];
+extern const char *pp_msgs[4];
+extern const char *ii_msgs[4];
+extern const char *ext_msgs[32];
+extern const char *htlink_msgs[8];
+
+/*
+ * Each of the PCI Device IDs types have their own set of hardware accessor
+ * functions and per device encoding/decoding logic.
+ */
+struct low_ops {
+	int (*probe_valid_hardware)(struct amd64_pvt *pvt);
+	int (*early_channel_count)(struct amd64_pvt *pvt);
+
+	u64 (*get_error_address)(struct mem_ctl_info *mci,
+			struct amd64_error_info_regs *info);
+	void (*read_dram_base_limit)(struct amd64_pvt *pvt, int dram);
+	void (*read_dram_ctl_register)(struct amd64_pvt *pvt);
+	void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci,
+					struct amd64_error_info_regs *info,
+					u64 SystemAddr);
+	int (*dbam_map_to_pages)(struct amd64_pvt *pvt, int dram_map);
+};
+
+struct amd64_family_type {
+	const char *ctl_name;
+	u16 addr_f1_ctl;
+	u16 misc_f3_ctl;
+	struct low_ops ops;
+};
+
+static struct amd64_family_type amd64_family_types[];
+
+static inline const char *get_amd_family_name(int index)
+{
+	return amd64_family_types[index].ctl_name;
+}
+
+static inline struct low_ops *family_ops(int index)
+{
+	return &amd64_family_types[index].ops;
+}
+
+/*
+ * For future CPU versions, verify the following as new 'slow' rates appear and
+ * modify the necessary skip values for the supported CPU.
+ */
+#define K8_MIN_SCRUB_RATE_BITS	0x0
+#define F10_MIN_SCRUB_RATE_BITS	0x5
+#define F11_MIN_SCRUB_RATE_BITS	0x6
+
+int amd64_process_error_info(struct mem_ctl_info *mci,
+			     struct amd64_error_info_regs *info,
+			     int handle_errors);
+int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
+			     u64 *hole_offset, u64 *hole_size);
-- 
cgit v0.10.2


From fd3d6780f7358225a6ea1ffb1bb8fd8b2d6df446 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 19:50:23 +0200
Subject: amd64_edac: add debugging/testing code

This is for dumping different registers and testing the address mapping
logic using the ECC syndromes.

Borislav:

- split sysfs attrs per file
- use more conform names for the sysfs attrs
- fix function return value patterns
- cleanup/fix comments
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
new file mode 100644
index 0000000..0a41b24
--- /dev/null
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -0,0 +1,255 @@
+#include "amd64_edac.h"
+
+/*
+ * accept a hex value and store it into the virtual error register file, field:
+ * nbeal and nbeah. Assume virtual error values have already been set for: NBSL,
+ * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and
+ * CHANNEL
+ */
+static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
+				size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long long value;
+	int ret = 0;
+
+	ret = strict_strtoull(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBEA= 0x%llx\n", value);
+
+		/* place the value into the virtual error packet */
+		pvt->ctl_error_info.nbeal = (u32) value;
+		value >>= 32;
+		pvt->ctl_error_info.nbeah = (u32) value;
+
+		/* Process the Mapping request */
+		/* TODO: Add race prevention */
+		amd64_process_error_info(mci, &pvt->ctl_error_info, 1);
+
+		return count;
+	}
+	return ret;
+}
+
+/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
+static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 value;
+
+	value = pvt->ctl_error_info.nbeah;
+	value <<= 32;
+	value |= pvt->ctl_error_info.nbeal;
+
+	return sprintf(data, "%llx\n", value);
+}
+
+/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
+static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
+				size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBSL= 0x%lx\n", value);
+
+		pvt->ctl_error_info.nbsl = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/* display back what the last NBSL value written */
+static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 value;
+
+	value = pvt->ctl_error_info.nbsl;
+
+	return sprintf(data, "%x\n", value);
+}
+
+/* store the NBSH (MCA NB Status High) value user desires */
+static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
+				size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBSH= 0x%lx\n", value);
+
+		pvt->ctl_error_info.nbsh = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/* display back what the last NBSH value written */
+static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 value;
+
+	value = pvt->ctl_error_info.nbsh;
+
+	return sprintf(data, "%x\n", value);
+}
+
+/* accept and store the NBCFG (MCA NB Configuration) value user desires */
+static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+		debugf0("received NBCFG= 0x%lx\n", value);
+
+		pvt->ctl_error_info.nbcfg = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/* various show routines for the controls of a MCI */
+static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
+}
+
+
+static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%x\n", pvt->dhar);
+}
+
+
+static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%x\n", pvt->dbam0);
+}
+
+
+static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%llx\n", pvt->top_mem);
+}
+
+
+static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return sprintf(data, "%llx\n", pvt->top_mem2);
+}
+
+static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
+{
+	u64 hole_base = 0;
+	u64 hole_offset = 0;
+	u64 hole_size = 0;
+
+	amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size);
+
+	return sprintf(data, "%llx %llx %llx\n", hole_base, hole_offset,
+						 hole_size);
+}
+
+/*
+ * update NUM_DBG_ATTRS in case you add new members
+ */
+struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
+
+	{
+		.attr = {
+			.name = "nbea_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbea_show,
+		.store = amd64_nbea_store,
+	},
+	{
+		.attr = {
+			.name = "nbsl_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbsl_show,
+		.store = amd64_nbsl_store,
+	},
+	{
+		.attr = {
+			.name = "nbsh_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbsh_show,
+		.store = amd64_nbsh_store,
+	},
+	{
+		.attr = {
+			.name = "nbcfg_ctl",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = amd64_nbcfg_show,
+		.store = amd64_nbcfg_store,
+	},
+	{
+		.attr = {
+			.name = "dhar",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_dhar_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "dbam",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_dbam_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "topmem",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_topmem_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "topmem2",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_topmem2_show,
+		.store = NULL,
+	},
+	{
+		.attr = {
+			.name = "dram_hole",
+			.mode = (S_IRUGO)
+		},
+		.show = amd64_hole_show,
+		.store = NULL,
+	},
+};
-- 
cgit v0.10.2


From eb919690be994386eac326f8c53c4540602de563 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Tue, 5 May 2009 20:07:11 +0200
Subject: amd64_edac: add DRAM error injection logic using sysfs

Borislav:
- rename sysfs attrs to more conform names
- cleanup/fix comments according to BKDG text
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
new file mode 100644
index 0000000..d3675b7
--- /dev/null
+++ b/drivers/edac/amd64_edac_inj.c
@@ -0,0 +1,185 @@
+#include "amd64_edac.h"
+
+/*
+ * store error injection section value which refers to one of 4 16-byte sections
+ * within a 64-byte cacheline
+ *
+ * range: 0..3
+ */
+static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci,
+					  const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+		pvt->injection.section = (u32) value;
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * store error injection word value which refers to one of 9 16-bit word of the
+ * 16-byte (128-bit + ECC bits) section
+ *
+ * range: 0..8
+ */
+static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+
+		value = (value <= 8) ? value : 0;
+		pvt->injection.word = (u32) value;
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * store 16 bit error injection vector which enables injecting errors to the
+ * corresponding bit within the error injection word above. When used during a
+ * DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
+ */
+static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci,
+					     const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 16, &value);
+	if (ret != -EINVAL) {
+
+		pvt->injection.bit_map = (u32) value & 0xFFFF;
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * Do a DRAM ECC read. Assemble staged values in the pvt area, format into
+ * fields needed by the injection registers and read the NB Array Data Port.
+ */
+static ssize_t amd64_inject_read_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	u32 section, word_bits;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+
+		/* Form value to choose 16-byte section of cacheline */
+		section = F10_NB_ARRAY_DRAM_ECC |
+				SET_NB_ARRAY_ADDRESS(pvt->injection.section);
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_ADDR, section);
+
+		word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word,
+						pvt->injection.bit_map);
+
+		/* Issue 'word' and 'bit' along with the READ request */
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_DATA, word_bits);
+
+		debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * Do a DRAM ECC write. Assemble staged values in the pvt area and format into
+ * fields needed by the injection registers.
+ */
+static ssize_t amd64_inject_write_store(struct mem_ctl_info *mci,
+					const char *data, size_t count)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	unsigned long value;
+	u32 section, word_bits;
+	int ret = 0;
+
+	ret = strict_strtoul(data, 10, &value);
+	if (ret != -EINVAL) {
+
+		/* Form value to choose 16-byte section of cacheline */
+		section = F10_NB_ARRAY_DRAM_ECC |
+				SET_NB_ARRAY_ADDRESS(pvt->injection.section);
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_ADDR, section);
+
+		word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word,
+						pvt->injection.bit_map);
+
+		/* Issue 'word' and 'bit' along with the READ request */
+		pci_write_config_dword(pvt->misc_f3_ctl,
+					F10_NB_ARRAY_DATA, word_bits);
+
+		debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
+
+		return count;
+	}
+	return ret;
+}
+
+/*
+ * update NUM_INJ_ATTRS in case you add new members
+ */
+struct mcidev_sysfs_attribute amd64_inj_attrs[] = {
+
+	{
+		.attr = {
+			.name = "inject_section",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_section_store,
+	},
+	{
+		.attr = {
+			.name = "inject_word",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_word_store,
+	},
+	{
+		.attr = {
+			.name = "inject_ecc_vector",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_ecc_vector_store,
+	},
+	{
+		.attr = {
+			.name = "inject_write",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_write_store,
+	},
+	{
+		.attr = {
+			.name = "inject_read",
+			.mode = (S_IRUGO | S_IWUSR)
+		},
+		.show = NULL,
+		.store = amd64_inject_read_store,
+	},
+};
-- 
cgit v0.10.2


From b52401cecedf669211f0b7a5095abcb6d3fc82c2 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Wed, 6 May 2009 17:57:20 +0200
Subject: amd64_edac: add MCA error types

Borislav:
- cleanup comments

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac_err_types.c b/drivers/edac/amd64_edac_err_types.c
new file mode 100644
index 0000000..f212ff1
--- /dev/null
+++ b/drivers/edac/amd64_edac_err_types.c
@@ -0,0 +1,161 @@
+#include "amd64_edac.h"
+
+/*
+ * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
+ * for DDR2 DRAM mapping.
+ */
+u32 revf_quad_ddr2_shift[] = {
+	0,	/* 0000b NULL DIMM (128mb) */
+	28,	/* 0001b 256mb */
+	29,	/* 0010b 512mb */
+	29,	/* 0011b 512mb */
+	29,	/* 0100b 512mb */
+	30,	/* 0101b 1gb */
+	30,	/* 0110b 1gb */
+	31,	/* 0111b 2gb */
+	31,	/* 1000b 2gb */
+	32,	/* 1001b 4gb */
+	32,	/* 1010b 4gb */
+	33,	/* 1011b 8gb */
+	0,	/* 1100b future */
+	0,	/* 1101b future */
+	0,	/* 1110b future */
+	0	/* 1111b future */
+};
+
+/*
+ * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
+ * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
+ * or higher value'.
+ *
+ *FIXME: Produce a better mapping/linearisation.
+ */
+
+struct scrubrate scrubrates[] = {
+	{ 0x01, 1600000000UL},
+	{ 0x02, 800000000UL},
+	{ 0x03, 400000000UL},
+	{ 0x04, 200000000UL},
+	{ 0x05, 100000000UL},
+	{ 0x06, 50000000UL},
+	{ 0x07, 25000000UL},
+	{ 0x08, 12284069UL},
+	{ 0x09, 6274509UL},
+	{ 0x0A, 3121951UL},
+	{ 0x0B, 1560975UL},
+	{ 0x0C, 781440UL},
+	{ 0x0D, 390720UL},
+	{ 0x0E, 195300UL},
+	{ 0x0F, 97650UL},
+	{ 0x10, 48854UL},
+	{ 0x11, 24427UL},
+	{ 0x12, 12213UL},
+	{ 0x13, 6101UL},
+	{ 0x14, 3051UL},
+	{ 0x15, 1523UL},
+	{ 0x16, 761UL},
+	{ 0x00, 0UL},        /* scrubbing off */
+};
+
+/*
+ * string representation for the different MCA reported error types, see F3x48
+ * or MSR0000_0411.
+ */
+const char *tt_msgs[] = {        /* transaction type */
+	"instruction",
+	"data",
+	"generic",
+	"reserved"
+};
+
+const char *ll_msgs[] = {	/* cache level */
+	"L0",
+	"L1",
+	"L2",
+	"L3/generic"
+};
+
+const char *rrrr_msgs[] = {
+	"generic",
+	"generic read",
+	"generic write",
+	"data read",
+	"data write",
+	"inst fetch",
+	"prefetch",
+	"evict",
+	"snoop",
+	"reserved RRRR= 9",
+	"reserved RRRR= 10",
+	"reserved RRRR= 11",
+	"reserved RRRR= 12",
+	"reserved RRRR= 13",
+	"reserved RRRR= 14",
+	"reserved RRRR= 15"
+};
+
+const char *pp_msgs[] = {	/* participating processor */
+	"local node originated (SRC)",
+	"local node responded to request (RES)",
+	"local node observed as 3rd party (OBS)",
+	"generic"
+};
+
+const char *to_msgs[] = {
+	"no timeout",
+	"timed out"
+};
+
+const char *ii_msgs[] = {	/* memory or i/o */
+	"mem access",
+	"reserved",
+	"i/o access",
+	"generic"
+};
+
+/* Map the 5 bits of Extended Error code to the string table. */
+const char *ext_msgs[] = {	/* extended error */
+	"K8 ECC error/F10 reserved",	/* 0_0000b */
+	"CRC error",			/* 0_0001b */
+	"sync error",			/* 0_0010b */
+	"mst abort",			/* 0_0011b */
+	"tgt abort",			/* 0_0100b */
+	"GART error",			/* 0_0101b */
+	"RMW error",			/* 0_0110b */
+	"Wdog timer error",		/* 0_0111b */
+	"F10-ECC/K8-Chipkill error",	/* 0_1000b */
+	"DEV Error",			/* 0_1001b */
+	"Link Data error",		/* 0_1010b */
+	"Link or L3 Protocol error",	/* 0_1011b */
+	"NB Array error",		/* 0_1100b */
+	"DRAM Parity error",		/* 0_1101b */
+	"Link Retry/GART Table Walk/DEV Table Walk error", /* 0_1110b */
+	"Res 0x0ff error",		/* 0_1111b */
+	"Res 0x100 error",		/* 1_0000b */
+	"Res 0x101 error",		/* 1_0001b */
+	"Res 0x102 error",		/* 1_0010b */
+	"Res 0x103 error",		/* 1_0011b */
+	"Res 0x104 error",		/* 1_0100b */
+	"Res 0x105 error",		/* 1_0101b */
+	"Res 0x106 error",		/* 1_0110b */
+	"Res 0x107 error",		/* 1_0111b */
+	"Res 0x108 error",		/* 1_1000b */
+	"Res 0x109 error",		/* 1_1001b */
+	"Res 0x10A error",		/* 1_1010b */
+	"Res 0x10B error",		/* 1_1011b */
+	"L3 Cache Data error",		/* 1_1100b */
+	"L3 CacheTag error",		/* 1_1101b */
+	"L3 Cache LRU error",		/* 1_1110b */
+	"Res 0x1FF error"		/* 1_1111b */
+};
+
+const char *htlink_msgs[] = {
+	"none",
+	"1",
+	"2",
+	"1 2",
+	"3",
+	"1 3",
+	"2 3",
+	"1 2 3"
+};
-- 
cgit v0.10.2


From 2bc65418724a0ddbb20c9eeb9de8285af7748c16 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 4 May 2009 20:11:14 +0200
Subject: amd64_edac: add memory scrubber interface

Borislav:
- fix/cleanup comments
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
new file mode 100644
index 0000000..6876d43
--- /dev/null
+++ b/drivers/edac/amd64_edac.c
@@ -0,0 +1,130 @@
+#include "amd64_edac.h"
+
+static struct edac_pci_ctl_info *amd64_ctl_pci;
+
+static int report_gart_errors;
+module_param(report_gart_errors, int, 0644);
+
+/*
+ * Set by command line parameter. If BIOS has enabled the ECC, this override is
+ * cleared to prevent re-enabling the hardware by this driver.
+ */
+static int ecc_enable_override;
+module_param(ecc_enable_override, int, 0644);
+
+/* Lookup table for all possible MC control instances */
+struct amd64_pvt;
+static struct mem_ctl_info *mci_lookup[MAX_NUMNODES];
+static struct amd64_pvt *pvt_lookup[MAX_NUMNODES];
+
+/*
+ * Memory scrubber control interface. For K8, memory scrubbing is handled by
+ * hardware and can involve L2 cache, dcache as well as the main memory. With
+ * F10, this is extended to L3 cache scrubbing on CPU models sporting that
+ * functionality.
+ *
+ * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
+ * (dram) over to cache lines. This is nasty, so we will use bandwidth in
+ * bytes/sec for the setting.
+ *
+ * Currently, we only do dram scrubbing. If the scrubbing is done in software on
+ * other archs, we might not have access to the caches directly.
+ */
+
+/*
+ * scan the scrub rate mapping table for a close or matching bandwidth value to
+ * issue. If requested is too big, then use last maximum value found.
+ */
+static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
+				       u32 min_scrubrate)
+{
+	u32 scrubval;
+	int i;
+
+	/*
+	 * map the configured rate (new_bw) to a value specific to the AMD64
+	 * memory controller and apply to register. Search for the first
+	 * bandwidth entry that is greater or equal than the setting requested
+	 * and program that. If at last entry, turn off DRAM scrubbing.
+	 */
+	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
+		/*
+		 * skip scrub rates which aren't recommended
+		 * (see F10 BKDG, F3x58)
+		 */
+		if (scrubrates[i].scrubval < min_scrubrate)
+			continue;
+
+		if (scrubrates[i].bandwidth <= new_bw)
+			break;
+
+		/*
+		 * if no suitable bandwidth found, turn off DRAM scrubbing
+		 * entirely by falling back to the last element in the
+		 * scrubrates array.
+		 */
+	}
+
+	scrubval = scrubrates[i].scrubval;
+	if (scrubval)
+		edac_printk(KERN_DEBUG, EDAC_MC,
+			    "Setting scrub rate bandwidth: %u\n",
+			    scrubrates[i].bandwidth);
+	else
+		edac_printk(KERN_DEBUG, EDAC_MC, "Turning scrubbing off.\n");
+
+	pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F);
+
+	return 0;
+}
+
+static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 min_scrubrate = 0x0;
+
+	switch (boot_cpu_data.x86) {
+	case 0xf:
+		min_scrubrate = K8_MIN_SCRUB_RATE_BITS;
+		break;
+	case 0x10:
+		min_scrubrate = F10_MIN_SCRUB_RATE_BITS;
+		break;
+	case 0x11:
+		min_scrubrate = F11_MIN_SCRUB_RATE_BITS;
+		break;
+
+	default:
+		amd64_printk(KERN_ERR, "Unsupported family!\n");
+		break;
+	}
+	return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth,
+			min_scrubrate);
+}
+
+static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 scrubval = 0;
+	int status = -1, i, ret = 0;
+
+	ret = pci_read_config_dword(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval);
+	if (ret)
+		debugf0("Reading K8_SCRCTRL failed\n");
+
+	scrubval = scrubval & 0x001F;
+
+	edac_printk(KERN_DEBUG, EDAC_MC,
+		    "pci-read, sdram scrub control value: %d \n", scrubval);
+
+	for (i = 0; ARRAY_SIZE(scrubrates); i++) {
+		if (scrubrates[i].scrubval == scrubval) {
+			*bw = scrubrates[i].bandwidth;
+			status = 0;
+			break;
+		}
+	}
+
+	return status;
+}
+
-- 
cgit v0.10.2


From 6775763a2377e1ea865299b6daadc875d622de3f Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 15:53:22 +0200
Subject: amd64_edac: add sys addr to memory controller mapping helpers

Borislav:

- cleanup comments
- cleanup debug calls
- simplify find_mc_by_sys_addr's exit path

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6876d43..a774f34 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -128,3 +128,144 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
 	return status;
 }
 
+/* Map from a CSROW entry to the mask entry that operates on it */
+static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
+{
+	return csrow >> (pvt->num_dcsm >> 3);
+}
+
+/* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
+static u32 amd64_get_dct_base(struct amd64_pvt *pvt, int dct, int csrow)
+{
+	if (dct == 0)
+		return pvt->dcsb0[csrow];
+	else
+		return pvt->dcsb1[csrow];
+}
+
+/*
+ * Return the 'mask' address the i'th CS entry. This function is needed because
+ * there number of DCSM registers on Rev E and prior vs Rev F and later is
+ * different.
+ */
+static u32 amd64_get_dct_mask(struct amd64_pvt *pvt, int dct, int csrow)
+{
+	if (dct == 0)
+		return pvt->dcsm0[amd64_map_to_dcs_mask(pvt, csrow)];
+	else
+		return pvt->dcsm1[amd64_map_to_dcs_mask(pvt, csrow)];
+}
+
+
+/*
+ * In *base and *limit, pass back the full 40-bit base and limit physical
+ * addresses for the node given by node_id.  This information is obtained from
+ * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
+ * base and limit addresses are of type SysAddr, as defined at the start of
+ * section 3.4.4 (p. 70).  They are the lowest and highest physical addresses
+ * in the address range they represent.
+ */
+static void amd64_get_base_and_limit(struct amd64_pvt *pvt, int node_id,
+			       u64 *base, u64 *limit)
+{
+	*base = pvt->dram_base[node_id];
+	*limit = pvt->dram_limit[node_id];
+}
+
+/*
+ * Return 1 if the SysAddr given by sys_addr matches the base/limit associated
+ * with node_id
+ */
+static int amd64_base_limit_match(struct amd64_pvt *pvt,
+					u64 sys_addr, int node_id)
+{
+	u64 base, limit, addr;
+
+	amd64_get_base_and_limit(pvt, node_id, &base, &limit);
+
+	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
+	 * all ones if the most significant implemented address bit is 1.
+	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
+	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
+	 * Application Programming.
+	 */
+	addr = sys_addr & 0x000000ffffffffffull;
+
+	return (addr >= base) && (addr <= limit);
+}
+
+/*
+ * Attempt to map a SysAddr to a node. On success, return a pointer to the
+ * mem_ctl_info structure for the node that the SysAddr maps to.
+ *
+ * On failure, return NULL.
+ */
+static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
+						u64 sys_addr)
+{
+	struct amd64_pvt *pvt;
+	int node_id;
+	u32 intlv_en, bits;
+
+	/*
+	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
+	 * 3.4.4.2) registers to map the SysAddr to a node ID.
+	 */
+	pvt = mci->pvt_info;
+
+	/*
+	 * The value of this field should be the same for all DRAM Base
+	 * registers.  Therefore we arbitrarily choose to read it from the
+	 * register for node 0.
+	 */
+	intlv_en = pvt->dram_IntlvEn[0];
+
+	if (intlv_en == 0) {
+		for (node_id = 0; ; ) {
+			if (amd64_base_limit_match(pvt, sys_addr, node_id))
+				break;
+
+			if (++node_id >= DRAM_REG_COUNT)
+				goto err_no_match;
+		}
+		goto found;
+	}
+
+	if (unlikely((intlv_en != (0x01 << 8)) &&
+		     (intlv_en != (0x03 << 8)) &&
+		     (intlv_en != (0x07 << 8)))) {
+		amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
+			     "IntlvEn field of DRAM Base Register for node 0: "
+			     "This probably indicates a BIOS bug.\n", intlv_en);
+		return NULL;
+	}
+
+	bits = (((u32) sys_addr) >> 12) & intlv_en;
+
+	for (node_id = 0; ; ) {
+		if ((pvt->dram_limit[node_id] & intlv_en) == bits)
+			break;	/* intlv_sel field matches */
+
+		if (++node_id >= DRAM_REG_COUNT)
+			goto err_no_match;
+	}
+
+	/* sanity test for sys_addr */
+	if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
+		amd64_printk(KERN_WARNING,
+			  "%s(): sys_addr 0x%lx falls outside base/limit "
+			  "address range for node %d with node interleaving "
+			  "enabled.\n", __func__, (unsigned long)sys_addr,
+			  node_id);
+		return NULL;
+	}
+
+found:
+	return edac_mc_find(node_id);
+
+err_no_match:
+	debugf2("sys_addr 0x%lx doesn't match any node\n",
+		(unsigned long)sys_addr);
+
+	return NULL;
+}
-- 
cgit v0.10.2


From e2ce7255e84db656853e91536e6023f92ff89f97 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 15:57:12 +0200
Subject: amd64_edac: add functionality to compute the DRAM hole

Borislav:

- cleanup/fix comments, add BKDG refs
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index a774f34..4716fb5 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -269,3 +269,169 @@ err_no_match:
 
 	return NULL;
 }
+
+/*
+ * Extract the DRAM CS base address from selected csrow register.
+ */
+static u64 base_from_dct_base(struct amd64_pvt *pvt, int csrow)
+{
+	return ((u64) (amd64_get_dct_base(pvt, 0, csrow) & pvt->dcsb_base)) <<
+				pvt->dcs_shift;
+}
+
+/*
+ * Extract the mask from the dcsb0[csrow] entry in a CPU revision-specific way.
+ */
+static u64 mask_from_dct_mask(struct amd64_pvt *pvt, int csrow)
+{
+	u64 dcsm_bits, other_bits;
+	u64 mask;
+
+	/* Extract bits from DRAM CS Mask. */
+	dcsm_bits = amd64_get_dct_mask(pvt, 0, csrow) & pvt->dcsm_mask;
+
+	other_bits = pvt->dcsm_mask;
+	other_bits = ~(other_bits << pvt->dcs_shift);
+
+	/*
+	 * The extracted bits from DCSM belong in the spaces represented by
+	 * the cleared bits in other_bits.
+	 */
+	mask = (dcsm_bits << pvt->dcs_shift) | other_bits;
+
+	return mask;
+}
+
+/*
+ * @input_addr is an InputAddr associated with the node given by mci. Return the
+ * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
+ */
+static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
+{
+	struct amd64_pvt *pvt;
+	int csrow;
+	u64 base, mask;
+
+	pvt = mci->pvt_info;
+
+	/*
+	 * Here we use the DRAM CS Base and DRAM CS Mask registers. For each CS
+	 * base/mask register pair, test the condition shown near the start of
+	 * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
+	 */
+	for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+
+		/* This DRAM chip select is disabled on this node */
+		if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
+			continue;
+
+		base = base_from_dct_base(pvt, csrow);
+		mask = ~mask_from_dct_mask(pvt, csrow);
+
+		if ((input_addr & mask) == (base & mask)) {
+			debugf2("InputAddr 0x%lx matches csrow %d (node %d)\n",
+				(unsigned long)input_addr, csrow,
+				pvt->mc_node_id);
+
+			return csrow;
+		}
+	}
+
+	debugf2("no matching csrow for InputAddr 0x%lx (MC node %d)\n",
+		(unsigned long)input_addr, pvt->mc_node_id);
+
+	return -1;
+}
+
+/*
+ * Return the base value defined by the DRAM Base register for the node
+ * represented by mci.  This function returns the full 40-bit value despite the
+ * fact that the register only stores bits 39-24 of the value. See section
+ * 3.4.4.1 (BKDG #26094, K8, revA-E)
+ */
+static inline u64 get_dram_base(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return pvt->dram_base[pvt->mc_node_id];
+}
+
+/*
+ * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
+ * for the node represented by mci. Info is passed back in *hole_base,
+ * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
+ * info is invalid. Info may be invalid for either of the following reasons:
+ *
+ * - The revision of the node is not E or greater.  In this case, the DRAM Hole
+ *   Address Register does not exist.
+ *
+ * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
+ *   indicating that its contents are not valid.
+ *
+ * The values passed back in *hole_base, *hole_offset, and *hole_size are
+ * complete 32-bit values despite the fact that the bitfields in the DHAR
+ * only represent bits 31-24 of the base and offset values.
+ */
+int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
+			     u64 *hole_offset, u64 *hole_size)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 base;
+
+	/* only revE and later have the DRAM Hole Address Register */
+	if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_E) {
+		debugf1("  revision %d for node %d does not support DHAR\n",
+			pvt->ext_model, pvt->mc_node_id);
+		return 1;
+	}
+
+	/* only valid for Fam10h */
+	if (boot_cpu_data.x86 == 0x10 &&
+	    (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) == 0) {
+		debugf1("  Dram Memory Hoisting is DISABLED on this system\n");
+		return 1;
+	}
+
+	if ((pvt->dhar & DHAR_VALID) == 0) {
+		debugf1("  Dram Memory Hoisting is DISABLED on this node %d\n",
+			pvt->mc_node_id);
+		return 1;
+	}
+
+	/* This node has Memory Hoisting */
+
+	/* +------------------+--------------------+--------------------+-----
+	 * | memory           | DRAM hole          | relocated          |
+	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
+	 * |                  |                    | DRAM hole          |
+	 * |                  |                    | [0x100000000,      |
+	 * |                  |                    |  (0x100000000+     |
+	 * |                  |                    |   (0xffffffff-x))] |
+	 * +------------------+--------------------+--------------------+-----
+	 *
+	 * Above is a diagram of physical memory showing the DRAM hole and the
+	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
+	 * starts at address x (the base address) and extends through address
+	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
+	 * addresses in the hole so that they start at 0x100000000.
+	 */
+
+	base = dhar_base(pvt->dhar);
+
+	*hole_base = base;
+	*hole_size = (0x1ull << 32) - base;
+
+	if (boot_cpu_data.x86 > 0xf)
+		*hole_offset = f10_dhar_offset(pvt->dhar);
+	else
+		*hole_offset = k8_dhar_offset(pvt->dhar);
+
+	debugf1("  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
+		pvt->mc_node_id, (unsigned long)*hole_base,
+		(unsigned long)*hole_offset, (unsigned long)*hole_size);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
+
+
-- 
cgit v0.10.2


From 93c2df58b5b1a434cca8f60067e0e12d1942b7f1 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 4 May 2009 20:46:50 +0200
Subject: amd64_edac: add DRAM address type conversion facilities

Borislav:

- cleanup/fix comments, add BKDG refs
- fix function return value patterns
- cleanup dbg calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4716fb5..28f85c9 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -434,4 +434,298 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
 }
 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
 
+/*
+ * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
+ * assumed that sys_addr maps to the node given by mci.
+ *
+ * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
+ * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
+ * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
+ * then it is also involved in translating a SysAddr to a DramAddr. Sections
+ * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
+ * These parts of the documentation are unclear. I interpret them as follows:
+ *
+ * When node n receives a SysAddr, it processes the SysAddr as follows:
+ *
+ * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
+ *    Limit registers for node n. If the SysAddr is not within the range
+ *    specified by the base and limit values, then node n ignores the Sysaddr
+ *    (since it does not map to node n). Otherwise continue to step 2 below.
+ *
+ * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
+ *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
+ *    the range of relocated addresses (starting at 0x100000000) from the DRAM
+ *    hole. If not, skip to step 3 below. Else get the value of the
+ *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
+ *    offset defined by this value from the SysAddr.
+ *
+ * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
+ *    Base register for node n. To obtain the DramAddr, subtract the base
+ *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
+ */
+static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
+{
+	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
+	int ret = 0;
+
+	dram_base = get_dram_base(mci);
+
+	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
+				      &hole_size);
+	if (!ret) {
+		if ((sys_addr >= (1ull << 32)) &&
+		    (sys_addr < ((1ull << 32) + hole_size))) {
+			/* use DHAR to translate SysAddr to DramAddr */
+			dram_addr = sys_addr - hole_offset;
+
+			debugf2("using DHAR to translate SysAddr 0x%lx to "
+				"DramAddr 0x%lx\n",
+				(unsigned long)sys_addr,
+				(unsigned long)dram_addr);
+
+			return dram_addr;
+		}
+	}
+
+	/*
+	 * Translate the SysAddr to a DramAddr as shown near the start of
+	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
+	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
+	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
+	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
+	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
+	 * Programmer's Manual Volume 1 Application Programming.
+	 */
+	dram_addr = (sys_addr & 0xffffffffffull) - dram_base;
+
+	debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
+		"DramAddr 0x%lx\n", (unsigned long)sys_addr,
+		(unsigned long)dram_addr);
+	return dram_addr;
+}
+
+/*
+ * @intlv_en is the value of the IntlvEn field from a DRAM Base register
+ * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
+ * for node interleaving.
+ */
+static int num_node_interleave_bits(unsigned intlv_en)
+{
+	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
+	int n;
+
+	BUG_ON(intlv_en > 7);
+	n = intlv_shift_table[intlv_en];
+	return n;
+}
+
+/* Translate the DramAddr given by @dram_addr to an InputAddr. */
+static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
+{
+	struct amd64_pvt *pvt;
+	int intlv_shift;
+	u64 input_addr;
+
+	pvt = mci->pvt_info;
+
+	/*
+	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
+	 * concerning translating a DramAddr to an InputAddr.
+	 */
+	intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
+	input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
+	    (dram_addr & 0xfff);
+
+	debugf2("  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
+		intlv_shift, (unsigned long)dram_addr,
+		(unsigned long)input_addr);
+
+	return input_addr;
+}
+
+/*
+ * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
+ * assumed that @sys_addr maps to the node given by mci.
+ */
+static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
+{
+	u64 input_addr;
+
+	input_addr =
+	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
+
+	debugf2("SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
+		(unsigned long)sys_addr, (unsigned long)input_addr);
+
+	return input_addr;
+}
+
+
+/*
+ * @input_addr is an InputAddr associated with the node represented by mci.
+ * Translate @input_addr to a DramAddr and return the result.
+ */
+static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
+{
+	struct amd64_pvt *pvt;
+	int node_id, intlv_shift;
+	u64 bits, dram_addr;
+	u32 intlv_sel;
+
+	/*
+	 * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
+	 * shows how to translate a DramAddr to an InputAddr. Here we reverse
+	 * this procedure. When translating from a DramAddr to an InputAddr, the
+	 * bits used for node interleaving are discarded.  Here we recover these
+	 * bits from the IntlvSel field of the DRAM Limit register (section
+	 * 3.4.4.2) for the node that input_addr is associated with.
+	 */
+	pvt = mci->pvt_info;
+	node_id = pvt->mc_node_id;
+	BUG_ON((node_id < 0) || (node_id > 7));
+
+	intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
+
+	if (intlv_shift == 0) {
+		debugf1("    InputAddr 0x%lx translates to DramAddr of "
+			"same value\n",	(unsigned long)input_addr);
+
+		return input_addr;
+	}
+
+	bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
+	    (input_addr & 0xfff);
+
+	intlv_sel = pvt->dram_IntlvSel[node_id] & ((1 << intlv_shift) - 1);
+	dram_addr = bits + (intlv_sel << 12);
+
+	debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
+		"(%d node interleave bits)\n", (unsigned long)input_addr,
+		(unsigned long)dram_addr, intlv_shift);
+
+	return dram_addr;
+}
+
+/*
+ * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
+ * @dram_addr to a SysAddr.
+ */
+static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;
+	int ret = 0;
+
+	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
+				      &hole_size);
+	if (!ret) {
+		if ((dram_addr >= hole_base) &&
+		    (dram_addr < (hole_base + hole_size))) {
+			sys_addr = dram_addr + hole_offset;
+
+			debugf1("using DHAR to translate DramAddr 0x%lx to "
+				"SysAddr 0x%lx\n", (unsigned long)dram_addr,
+				(unsigned long)sys_addr);
+
+			return sys_addr;
+		}
+	}
+
+	amd64_get_base_and_limit(pvt, pvt->mc_node_id, &base, &limit);
+	sys_addr = dram_addr + base;
+
+	/*
+	 * The sys_addr we have computed up to this point is a 40-bit value
+	 * because the k8 deals with 40-bit values.  However, the value we are
+	 * supposed to return is a full 64-bit physical address.  The AMD
+	 * x86-64 architecture specifies that the most significant implemented
+	 * address bit through bit 63 of a physical address must be either all
+	 * 0s or all 1s.  Therefore we sign-extend the 40-bit sys_addr to a
+	 * 64-bit value below.  See section 3.4.2 of AMD publication 24592:
+	 * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
+	 * Programming.
+	 */
+	sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
+
+	debugf1("    Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
+		pvt->mc_node_id, (unsigned long)dram_addr,
+		(unsigned long)sys_addr);
+
+	return sys_addr;
+}
+
+/*
+ * @input_addr is an InputAddr associated with the node given by mci. Translate
+ * @input_addr to a SysAddr.
+ */
+static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
+					 u64 input_addr)
+{
+	return dram_addr_to_sys_addr(mci,
+				     input_addr_to_dram_addr(mci, input_addr));
+}
+
+/*
+ * Find the minimum and maximum InputAddr values that map to the given @csrow.
+ * Pass back these values in *input_addr_min and *input_addr_max.
+ */
+static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
+			      u64 *input_addr_min, u64 *input_addr_max)
+{
+	struct amd64_pvt *pvt;
+	u64 base, mask;
+
+	pvt = mci->pvt_info;
+	BUG_ON((csrow < 0) || (csrow >= CHIPSELECT_COUNT));
+
+	base = base_from_dct_base(pvt, csrow);
+	mask = mask_from_dct_mask(pvt, csrow);
+
+	*input_addr_min = base & ~mask;
+	*input_addr_max = base | mask | pvt->dcs_mask_notused;
+}
+
+/*
+ * Extract error address from MCA NB Address Low (section 3.6.4.5) and MCA NB
+ * Address High (section 3.6.4.6) register values and return the result. Address
+ * is located in the info structure (nbeah and nbeal), the encoding is device
+ * specific.
+ */
+static u64 extract_error_address(struct mem_ctl_info *mci,
+				 struct amd64_error_info_regs *info)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	return pvt->ops->get_error_address(mci, info);
+}
+
+
+/* Map the Error address to a PAGE and PAGE OFFSET. */
+static inline void error_address_to_page_and_offset(u64 error_address,
+						    u32 *page, u32 *offset)
+{
+	*page = (u32) (error_address >> PAGE_SHIFT);
+	*offset = ((u32) error_address) & ~PAGE_MASK;
+}
+
+/*
+ * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
+ * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
+ * of a node that detected an ECC memory error.  mci represents the node that
+ * the error address maps to (possibly different from the node that detected
+ * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
+ * error.
+ */
+static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
+{
+	int csrow;
+
+	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
+
+	if (csrow == -1)
+		amd64_mc_printk(mci, KERN_ERR,
+			     "Failed to translate InputAddr to csrow for "
+			     "address 0x%lx\n", (unsigned long)sys_addr);
+	return csrow;
+}
 
-- 
cgit v0.10.2


From 2da11654ea4d32c5700693675dfbd55c70619519 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:09:09 +0200
Subject: amd64_edac: add helper to dump relevant registers

Borislav:

- cleanup/fix comments
- fix function return value patterns
- cleanup dbg calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 28f85c9..3d6d000 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -729,3 +729,145 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
 	return csrow;
 }
 
+static int get_channel_from_ecc_syndrome(unsigned short syndrome);
+
+static void amd64_cpu_display_info(struct amd64_pvt *pvt)
+{
+	if (boot_cpu_data.x86 == 0x11)
+		edac_printk(KERN_DEBUG, EDAC_MC, "F11h CPU detected\n");
+	else if (boot_cpu_data.x86 == 0x10)
+		edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n");
+	else if (boot_cpu_data.x86 == 0xf)
+		edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n",
+			(pvt->ext_model >= OPTERON_CPU_REV_F) ?
+			"Rev F or later" : "Rev E or earlier");
+	else
+		/* we'll hardly ever ever get here */
+		edac_printk(KERN_ERR, EDAC_MC, "Unknown cpu!\n");
+}
+
+/*
+ * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
+ * are ECC capable.
+ */
+static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
+{
+	int bit;
+	enum dev_type edac_cap = EDAC_NONE;
+
+	bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= OPTERON_CPU_REV_F)
+		? 19
+		: 17;
+
+	if (pvt->dclr0 >> BIT(bit))
+		edac_cap = EDAC_FLAG_SECDED;
+
+	return edac_cap;
+}
+
+
+static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt,
+					 int ganged);
+
+/* Display and decode various NB registers for debug purposes. */
+static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
+{
+	int ganged;
+
+	debugf1("  nbcap:0x%8.08x DctDualCap=%s DualNode=%s 8-Node=%s\n",
+		pvt->nbcap,
+		(pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "True" : "False",
+		(pvt->nbcap & K8_NBCAP_DUAL_NODE) ? "True" : "False",
+		(pvt->nbcap & K8_NBCAP_8_NODE) ? "True" : "False");
+	debugf1("    ECC Capable=%s   ChipKill Capable=%s\n",
+		(pvt->nbcap & K8_NBCAP_SECDED) ? "True" : "False",
+		(pvt->nbcap & K8_NBCAP_CHIPKILL) ? "True" : "False");
+	debugf1("  DramCfg0-low=0x%08x DIMM-ECC=%s Parity=%s Width=%s\n",
+		pvt->dclr0,
+		(pvt->dclr0 & BIT(19)) ?  "Enabled" : "Disabled",
+		(pvt->dclr0 & BIT(8)) ?  "Enabled" : "Disabled",
+		(pvt->dclr0 & BIT(11)) ?  "128b" : "64b");
+	debugf1("    DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s  DIMM Type=%s\n",
+		(pvt->dclr0 & BIT(12)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(13)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(14)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(15)) ?  "Y" : "N",
+		(pvt->dclr0 & BIT(16)) ?  "UN-Buffered" : "Buffered");
+
+
+	debugf1("  online-spare: 0x%8.08x\n", pvt->online_spare);
+
+	if (boot_cpu_data.x86 == 0xf) {
+		debugf1("  dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n",
+			pvt->dhar, dhar_base(pvt->dhar),
+			k8_dhar_offset(pvt->dhar));
+		debugf1("      DramHoleValid=%s\n",
+			(pvt->dhar & DHAR_VALID) ?  "True" : "False");
+
+		debugf1("  dbam-dkt: 0x%8.08x\n", pvt->dbam0);
+
+		/* everything below this point is Fam10h and above */
+		return;
+
+	} else {
+		debugf1("  dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n",
+			pvt->dhar, dhar_base(pvt->dhar),
+			f10_dhar_offset(pvt->dhar));
+		debugf1("    DramMemHoistValid=%s DramHoleValid=%s\n",
+			(pvt->dhar & F10_DRAM_MEM_HOIST_VALID) ?
+			"True" : "False",
+			(pvt->dhar & DHAR_VALID) ?
+			"True" : "False");
+	}
+
+	/* Only if NOT ganged does dcl1 have valid info */
+	if (!dct_ganging_enabled(pvt)) {
+		debugf1("  DramCfg1-low=0x%08x DIMM-ECC=%s Parity=%s "
+			"Width=%s\n", pvt->dclr1,
+			(pvt->dclr1 & BIT(19)) ?  "Enabled" : "Disabled",
+			(pvt->dclr1 & BIT(8)) ?  "Enabled" : "Disabled",
+			(pvt->dclr1 & BIT(11)) ?  "128b" : "64b");
+		debugf1("    DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s  "
+			"DIMM Type=%s\n",
+			(pvt->dclr1 & BIT(12)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(13)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(14)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(15)) ?  "Y" : "N",
+			(pvt->dclr1 & BIT(16)) ?  "UN-Buffered" : "Buffered");
+	}
+
+	/*
+	 * Determine if ganged and then dump memory sizes for first controller,
+	 * and if NOT ganged dump info for 2nd controller.
+	 */
+	ganged = dct_ganging_enabled(pvt);
+
+	f10_debug_display_dimm_sizes(0, pvt, ganged);
+
+	if (!ganged)
+		f10_debug_display_dimm_sizes(1, pvt, ganged);
+}
+
+/* Read in both of DBAM registers */
+static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
+{
+	int err = 0;
+	unsigned int reg;
+
+	reg = DBAM0;
+	err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam0);
+	if (err)
+		goto err_reg;
+
+	if (boot_cpu_data.x86 >= 0x10) {
+		reg = DBAM1;
+		err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam1);
+
+		if (err)
+			goto err_reg;
+	}
+
+err_reg:
+	debugf0("Error reading F2x%03x.\n", reg);
+}
+
-- 
cgit v0.10.2


From 94be4bff21990674ac418c970c708a1a01cf709f Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:12:00 +0200
Subject: amd64_edac: assign DRAM chip select base and mask in a
 family-specific way

Borislav:

- cleanup/fix comments
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 3d6d000..5ec44a4 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -871,3 +871,145 @@ err_reg:
 	debugf0("Error reading F2x%03x.\n", reg);
 }
 
+/*
+ * NOTE: CPU Revision Dependent code: Rev E and Rev F
+ *
+ * Set the DCSB and DCSM mask values depending on the CPU revision value. Also
+ * set the shift factor for the DCSB and DCSM values.
+ *
+ * ->dcs_mask_notused, RevE:
+ *
+ * To find the max InputAddr for the csrow, start with the base address and set
+ * all bits that are "don't care" bits in the test at the start of section
+ * 3.5.4 (p. 84).
+ *
+ * The "don't care" bits are all set bits in the mask and all bits in the gaps
+ * between bit ranges [35:25] and [19:13]. The value REV_E_DCS_NOTUSED_BITS
+ * represents bits [24:20] and [12:0], which are all bits in the above-mentioned
+ * gaps.
+ *
+ * ->dcs_mask_notused, RevF and later:
+ *
+ * To find the max InputAddr for the csrow, start with the base address and set
+ * all bits that are "don't care" bits in the test at the start of NPT section
+ * 4.5.4 (p. 87).
+ *
+ * The "don't care" bits are all set bits in the mask and all bits in the gaps
+ * between bit ranges [36:27] and [21:13].
+ *
+ * The value REV_F_F1Xh_DCS_NOTUSED_BITS represents bits [26:22] and [12:0],
+ * which are all bits in the above-mentioned gaps.
+ */
+static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
+{
+	if (pvt->ext_model >= OPTERON_CPU_REV_F) {
+		pvt->dcsb_base		= REV_F_F1Xh_DCSB_BASE_BITS;
+		pvt->dcsm_mask		= REV_F_F1Xh_DCSM_MASK_BITS;
+		pvt->dcs_mask_notused	= REV_F_F1Xh_DCS_NOTUSED_BITS;
+		pvt->dcs_shift		= REV_F_F1Xh_DCS_SHIFT;
+
+		switch (boot_cpu_data.x86) {
+		case 0xf:
+			pvt->num_dcsm = REV_F_DCSM_COUNT;
+			break;
+
+		case 0x10:
+			pvt->num_dcsm = F10_DCSM_COUNT;
+			break;
+
+		case 0x11:
+			pvt->num_dcsm = F11_DCSM_COUNT;
+			break;
+
+		default:
+			amd64_printk(KERN_ERR, "Unsupported family!\n");
+			break;
+		}
+	} else {
+		pvt->dcsb_base		= REV_E_DCSB_BASE_BITS;
+		pvt->dcsm_mask		= REV_E_DCSM_MASK_BITS;
+		pvt->dcs_mask_notused	= REV_E_DCS_NOTUSED_BITS;
+		pvt->dcs_shift		= REV_E_DCS_SHIFT;
+		pvt->num_dcsm		= REV_E_DCSM_COUNT;
+	}
+}
+
+/*
+ * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask hw registers
+ */
+static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
+{
+	int cs, reg, err = 0;
+
+	amd64_set_dct_base_and_mask(pvt);
+
+	for (cs = 0; cs < CHIPSELECT_COUNT; cs++) {
+		reg = K8_DCSB0 + (cs * 4);
+		err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+						&pvt->dcsb0[cs]);
+		if (unlikely(err))
+			debugf0("Reading K8_DCSB0[%d] failed\n", cs);
+		else
+			debugf0("  DCSB0[%d]=0x%08x reg: F2x%x\n",
+				cs, pvt->dcsb0[cs], reg);
+
+		/* If DCT are NOT ganged, then read in DCT1's base */
+		if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
+			reg = F10_DCSB1 + (cs * 4);
+			err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+							&pvt->dcsb1[cs]);
+			if (unlikely(err))
+				debugf0("Reading F10_DCSB1[%d] failed\n", cs);
+			else
+				debugf0("  DCSB1[%d]=0x%08x reg: F2x%x\n",
+					cs, pvt->dcsb1[cs], reg);
+		} else {
+			pvt->dcsb1[cs] = 0;
+		}
+	}
+
+	for (cs = 0; cs < pvt->num_dcsm; cs++) {
+		reg = K8_DCSB0 + (cs * 4);
+		err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+					&pvt->dcsm0[cs]);
+		if (unlikely(err))
+			debugf0("Reading K8_DCSM0 failed\n");
+		else
+			debugf0("    DCSM0[%d]=0x%08x reg: F2x%x\n",
+				cs, pvt->dcsm0[cs], reg);
+
+		/* If DCT are NOT ganged, then read in DCT1's mask */
+		if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
+			reg = F10_DCSM1 + (cs * 4);
+			err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
+					&pvt->dcsm1[cs]);
+			if (unlikely(err))
+				debugf0("Reading F10_DCSM1[%d] failed\n", cs);
+			else
+				debugf0("    DCSM1[%d]=0x%08x reg: F2x%x\n",
+					cs, pvt->dcsm1[cs], reg);
+		} else
+			pvt->dcsm1[cs] = 0;
+	}
+}
+
+static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
+{
+	enum mem_type type;
+
+	if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= OPTERON_CPU_REV_F) {
+		/* Rev F and later */
+		type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
+	} else {
+		/* Rev E and earlier */
+		type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
+	}
+
+	debugf1("  Memory type is: %s\n",
+		(type == MEM_DDR2) ? "MEM_DDR2" :
+		(type == MEM_RDDR2) ? "MEM_RDDR2" :
+		(type == MEM_DDR) ? "MEM_DDR" : "MEM_RDDR");
+
+	return type;
+}
+
-- 
cgit v0.10.2


From ddff876d2022c5e06509f6535bc4fd61c4d6ffd6 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:14:52 +0200
Subject: amd64_edac: add k8-specific methods

Borislav:

- fix/cleanup/move comments
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5ec44a4..24c031f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1013,3 +1013,176 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
 	return type;
 }
 
+/*
+ * Read the DRAM Configuration Low register. It differs between CG, D & E revs
+ * and the later RevF memory controllers (DDR vs DDR2)
+ *
+ * Return:
+ *      number of memory channels in operation
+ * Pass back:
+ *      contents of the DCL0_LOW register
+ */
+static int k8_early_channel_count(struct amd64_pvt *pvt)
+{
+	int flag, err = 0;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+	if (err)
+		return err;
+
+	if ((boot_cpu_data.x86_model >> 4) >= OPTERON_CPU_REV_F) {
+		/* RevF (NPT) and later */
+		flag = pvt->dclr0 & F10_WIDTH_128;
+	} else {
+		/* RevE and earlier */
+		flag = pvt->dclr0 & REVE_WIDTH_128;
+	}
+
+	/* not used */
+	pvt->dclr1 = 0;
+
+	return (flag) ? 2 : 1;
+}
+
+/* extract the ERROR ADDRESS for the K8 CPUs */
+static u64 k8_get_error_address(struct mem_ctl_info *mci,
+				struct amd64_error_info_regs *info)
+{
+	return (((u64) (info->nbeah & 0xff)) << 32) +
+			(info->nbeal & ~0x03);
+}
+
+/*
+ * Read the Base and Limit registers for K8 based Memory controllers; extract
+ * fields from the 'raw' reg into separate data fields
+ *
+ * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN
+ */
+static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
+{
+	u32 low;
+	u32 off = dram << 3;	/* 8 bytes between DRAM entries */
+	int err;
+
+	err = pci_read_config_dword(pvt->addr_f1_ctl,
+				    K8_DRAM_BASE_LOW + off, &low);
+	if (err)
+		debugf0("Reading K8_DRAM_BASE_LOW failed\n");
+
+	/* Extract parts into separate data entries */
+	pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
+	pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
+	pvt->dram_rw_en[dram] = (low & 0x3);
+
+	err = pci_read_config_dword(pvt->addr_f1_ctl,
+				    K8_DRAM_LIMIT_LOW + off, &low);
+	if (err)
+		debugf0("Reading K8_DRAM_LIMIT_LOW failed\n");
+
+	/*
+	 * Extract parts into separate data entries. Limit is the HIGHEST memory
+	 * location of the region, so lower 24 bits need to be all ones
+	 */
+	pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
+	pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
+	pvt->dram_DstNode[dram] = (low & 0x7);
+}
+
+static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
+					struct amd64_error_info_regs *info,
+					u64 SystemAddress)
+{
+	struct mem_ctl_info *src_mci;
+	unsigned short syndrome;
+	int channel, csrow;
+	u32 page, offset;
+
+	/* Extract the syndrome parts and form a 16-bit syndrome */
+	syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
+	syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
+
+	/* CHIPKILL enabled */
+	if (info->nbcfg & K8_NBCFG_CHIPKILL) {
+		channel = get_channel_from_ecc_syndrome(syndrome);
+		if (channel < 0) {
+			/*
+			 * Syndrome didn't map, so we don't know which of the
+			 * 2 DIMMs is in error. So we need to ID 'both' of them
+			 * as suspect.
+			 */
+			amd64_mc_printk(mci, KERN_WARNING,
+				       "unknown syndrome 0x%x - possible error "
+				       "reporting race\n", syndrome);
+			edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+			return;
+		}
+	} else {
+		/*
+		 * non-chipkill ecc mode
+		 *
+		 * The k8 documentation is unclear about how to determine the
+		 * channel number when using non-chipkill memory.  This method
+		 * was obtained from email communication with someone at AMD.
+		 * (Wish the email was placed in this comment - norsk)
+		 */
+		channel = ((SystemAddress & BIT(3)) != 0);
+	}
+
+	/*
+	 * Find out which node the error address belongs to. This may be
+	 * different from the node that detected the error.
+	 */
+	src_mci = find_mc_by_sys_addr(mci, SystemAddress);
+	if (src_mci) {
+		amd64_mc_printk(mci, KERN_ERR,
+			     "failed to map error address 0x%lx to a node\n",
+			     (unsigned long)SystemAddress);
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+		return;
+	}
+
+	/* Now map the SystemAddress to a CSROW */
+	csrow = sys_addr_to_csrow(src_mci, SystemAddress);
+	if (csrow < 0) {
+		edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
+	} else {
+		error_address_to_page_and_offset(SystemAddress, &page, &offset);
+
+		edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
+				  channel, EDAC_MOD_STR);
+	}
+}
+
+/*
+ * determrine the number of PAGES in for this DIMM's size based on its DRAM
+ * Address Mapping.
+ *
+ * First step is to calc the number of bits to shift a value of 1 left to
+ * indicate show many pages. Start with the DBAM value as the starting bits,
+ * then proceed to adjust those shift bits, based on CPU rev and the table.
+ * See BKDG on the DBAM
+ */
+static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
+{
+	int nr_pages;
+
+	if (pvt->ext_model >= OPTERON_CPU_REV_F) {
+		nr_pages = 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT);
+	} else {
+		/*
+		 * RevE and less section; this line is tricky. It collapses the
+		 * table used by RevD and later to one that matches revisions CG
+		 * and earlier.
+		 */
+		dram_map -= (pvt->ext_model >= OPTERON_CPU_REV_D) ?
+				(dram_map > 8 ? 4 : (dram_map > 5 ?
+				3 : (dram_map > 2 ? 1 : 0))) : 0;
+
+		/* 25 shift is 32MiB minimum DIMM size in RevE and prior */
+		nr_pages = 1 << (dram_map + 25 - PAGE_SHIFT);
+	}
+
+	return nr_pages;
+}
+
+
-- 
cgit v0.10.2


From 1afd3c98b5e8df68e1840b56c0ced15f314ce30d Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:16:50 +0200
Subject: amd64_edac: add F10h-and-later methods-p1

Borislav:

Fail f10_early_channel_count() if error encountered while reading a NB
register since those cached register contents are accessed afterwards.

- fix/cleanup comments
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 24c031f..b4ef2c7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1185,4 +1185,185 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
 	return nr_pages;
 }
 
+/*
+ * Get the number of DCT channels in use.
+ *
+ * Return:
+ *	number of Memory Channels in operation
+ * Pass back:
+ *	contents of the DCL0_LOW register
+ */
+static int f10_early_channel_count(struct amd64_pvt *pvt)
+{
+	int err = 0, channels = 0;
+	u32 dbam;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1);
+	if (err)
+		goto err_reg;
+
+	/* If we are in 128 bit mode, then we are using 2 channels */
+	if (pvt->dclr0 & F10_WIDTH_128) {
+		debugf0("Data WIDTH is 128 bits - 2 channels\n");
+		channels = 2;
+		return channels;
+	}
+
+	/*
+	 * Need to check if in UN-ganged mode: In such, there are 2 channels,
+	 * but they are NOT in 128 bit mode and thus the above 'dcl0' status bit
+	 * will be OFF.
+	 *
+	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
+	 * their CSEnable bit on. If so, then SINGLE DIMM case.
+	 */
+	debugf0("Data WIDTH is NOT 128 bits - need more decoding\n");
 
+	/*
+	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
+	 * is more than just one DIMM present in unganged mode. Need to check
+	 * both controllers since DIMMs can be placed in either one.
+	 */
+	channels = 0;
+	err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam);
+	if (err)
+		goto err_reg;
+
+	if (DBAM_DIMM(0, dbam) > 0)
+		channels++;
+	if (DBAM_DIMM(1, dbam) > 0)
+		channels++;
+	if (DBAM_DIMM(2, dbam) > 0)
+		channels++;
+	if (DBAM_DIMM(3, dbam) > 0)
+		channels++;
+
+	/* If more than 2 DIMMs are present, then we have 2 channels */
+	if (channels > 2)
+		channels = 2;
+	else if (channels == 0) {
+		/* No DIMMs on DCT0, so look at DCT1 */
+		err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam);
+		if (err)
+			goto err_reg;
+
+		if (DBAM_DIMM(0, dbam) > 0)
+			channels++;
+		if (DBAM_DIMM(1, dbam) > 0)
+			channels++;
+		if (DBAM_DIMM(2, dbam) > 0)
+			channels++;
+		if (DBAM_DIMM(3, dbam) > 0)
+			channels++;
+
+		if (channels > 2)
+			channels = 2;
+	}
+
+	/* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
+	if (channels == 0)
+		channels = 1;
+
+	debugf0("DIMM count= %d\n", channels);
+
+	return channels;
+
+err_reg:
+	return -1;
+
+}
+
+static int f10_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
+{
+	return 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT);
+}
+
+/* Enable extended configuration access via 0xCF8 feature */
+static void amd64_setup(struct amd64_pvt *pvt)
+{
+	u32 reg;
+
+	pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
+
+	pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG);
+	reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
+	pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
+}
+
+/* Restore the extended configuration access via 0xCF8 feature */
+static void amd64_teardown(struct amd64_pvt *pvt)
+{
+	u32 reg;
+
+	pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
+
+	reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG;
+	if (pvt->flags.cf8_extcfg)
+		reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
+	pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
+}
+
+static u64 f10_get_error_address(struct mem_ctl_info *mci,
+			struct amd64_error_info_regs *info)
+{
+	return (((u64) (info->nbeah & 0xffff)) << 32) +
+			(info->nbeal & ~0x01);
+}
+
+/*
+ * Read the Base and Limit registers for F10 based Memory controllers. Extract
+ * fields from the 'raw' reg into separate data fields.
+ *
+ * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN.
+ */
+static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
+{
+	u32 high_offset, low_offset, high_base, low_base, high_limit, low_limit;
+
+	low_offset = K8_DRAM_BASE_LOW + (dram << 3);
+	high_offset = F10_DRAM_BASE_HIGH + (dram << 3);
+
+	/* read the 'raw' DRAM BASE Address register */
+	pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_base);
+
+	/* Read from the ECS data register */
+	pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_base);
+
+	/* Extract parts into separate data entries */
+	pvt->dram_rw_en[dram] = (low_base & 0x3);
+
+	if (pvt->dram_rw_en[dram] == 0)
+		return;
+
+	pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
+
+	pvt->dram_base[dram] = (((((u64) high_base & 0x000000FF) << 32) |
+				((u64) low_base & 0xFFFF0000))) << 8;
+
+	low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
+	high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
+
+	/* read the 'raw' LIMIT registers */
+	pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_limit);
+
+	/* Read from the ECS data register for the HIGH portion */
+	pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_limit);
+
+	debugf0("  HW Regs: BASE=0x%08x-%08x      LIMIT=  0x%08x-%08x\n",
+		high_base, low_base, high_limit, low_limit);
+
+	pvt->dram_DstNode[dram] = (low_limit & 0x7);
+	pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7;
+
+	/*
+	 * Extract address values and form a LIMIT address. Limit is the HIGHEST
+	 * memory location of the region, so low 24 bits need to be all ones.
+	 */
+	low_limit |= 0x0000FFFF;
+	pvt->dram_limit[dram] =
+		((((u64) high_limit << 32) + (u64) low_limit) << 8) | (0xFF);
+}
-- 
cgit v0.10.2


From 6163b5d4fb45d20e3eb92d627943f26572726889 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:20:17 +0200
Subject: amd64_edac: add F10h-and-later methods-p2

Borislav:

- fix a wrong negation in f10_determine_base_addr_offset()
- fix a wrong mask in f10_determine_base_addr_offset() which should
select DctSelBaseAddr[31:11] and not [31:16] as it was before
- remove StinkyIdentifiers, trivially simplify code.
- fix/cleanup comments
- fix function return value patterns

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index b4ef2c7..744a49a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1367,3 +1367,199 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
 	pvt->dram_limit[dram] =
 		((((u64) high_limit << 32) + (u64) low_limit) << 8) | (0xFF);
 }
+
+static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
+{
+	int err = 0;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW,
+				    &pvt->dram_ctl_select_low);
+	if (err) {
+		debugf0("Reading F10_DCTL_SEL_LOW failed\n");
+	} else {
+		debugf0("DRAM_DCTL_SEL_LOW=0x%x  DctSelBaseAddr=0x%x\n",
+			pvt->dram_ctl_select_low, dct_sel_baseaddr(pvt));
+
+		debugf0("  DRAM DCTs are=%s DRAM Is=%s DRAM-Ctl-"
+				"sel-hi-range=%s\n",
+			(dct_ganging_enabled(pvt) ? "GANGED" : "NOT GANGED"),
+			(dct_dram_enabled(pvt) ? "Enabled"   : "Disabled"),
+			(dct_high_range_enabled(pvt) ? "Enabled" : "Disabled"));
+
+		debugf0("  DctDatIntLv=%s MemCleared=%s DctSelIntLvAddr=0x%x\n",
+			(dct_data_intlv_enabled(pvt) ? "Enabled" : "Disabled"),
+			(dct_memory_cleared(pvt) ? "True " : "False "),
+			dct_sel_interleave_addr(pvt));
+	}
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH,
+				    &pvt->dram_ctl_select_high);
+	if (err)
+		debugf0("Reading F10_DCTL_SEL_HIGH failed\n");
+}
+
+static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
+				int hi_range_sel, u32 intlv_en)
+{
+	u32 cs, temp, dct_sel_high = (pvt->dram_ctl_select_low >> 1) & 1;
+
+	if (dct_ganging_enabled(pvt))
+		cs = 0;
+	else if (hi_range_sel)
+		cs = dct_sel_high;
+	else if (dct_interleave_enabled(pvt)) {
+		if (dct_sel_interleave_addr(pvt) == 0)
+			cs = sys_addr >> 6 & 1;
+		else if ((dct_sel_interleave_addr(pvt) >> 1) & 1) {
+			temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
+
+			if (dct_sel_interleave_addr(pvt) & 1)
+				cs = (sys_addr >> 9 & 1) ^ temp;
+			else
+				cs = (sys_addr >> 6 & 1) ^ temp;
+		} else if (intlv_en & 4)
+			cs = sys_addr >> 15 & 1;
+		else if (intlv_en & 2)
+			cs = sys_addr >> 14 & 1;
+		else if (intlv_en & 1)
+			cs = sys_addr >> 13 & 1;
+		else
+			cs = sys_addr >> 12 & 1;
+	} else if (dct_high_range_enabled(pvt) && !dct_ganging_enabled(pvt))
+		cs = ~dct_sel_high & 1;
+	else
+		cs = 0;
+
+	return cs;
+}
+
+static inline u32 f10_map_intlv_en_to_shift(u32 intlv_en)
+{
+	if (intlv_en == 1)
+		return 1;
+	else if (intlv_en == 3)
+		return 2;
+	else if (intlv_en == 7)
+		return 3;
+
+	return 0;
+}
+
+static inline u64 f10_determine_base_addr_offset(u64 sys_addr, int hi_range_sel,
+						 u32 dct_sel_base_addr,
+						 u64 dct_sel_base_off,
+						 u32 hole_en, u32 hole_off,
+						 u64 dram_base)
+{
+	u64 chan_off;
+
+	if (hi_range_sel) {
+		if (!(dct_sel_base_addr & 0xFFFFF800) &&
+		   (hole_en & 1) && (sys_addr >= 0x100000000ULL))
+			chan_off = hole_off << 16;
+		else
+			chan_off = dct_sel_base_off;
+	} else {
+		if ((hole_en & 1) && (sys_addr >= 0x100000000ULL))
+			chan_off = hole_off << 16;
+		else
+			chan_off = dram_base & 0xFFFFF8000000ULL;
+	}
+
+	return (sys_addr & 0x0000FFFFFFFFFFC0ULL) -
+			(chan_off & 0x0000FFFFFF800000ULL);
+}
+
+/* Hack for the time being - Can we get this from BIOS?? */
+#define	CH0SPARE_RANK	0
+#define	CH1SPARE_RANK	1
+
+/*
+ * checks if the csrow passed in is marked as SPARED, if so returns the new
+ * spare row
+ */
+static inline int f10_process_possible_spare(int csrow,
+				u32 cs, struct amd64_pvt *pvt)
+{
+	u32 swap_done;
+	u32 bad_dram_cs;
+
+	/* Depending on channel, isolate respective SPARING info */
+	if (cs) {
+		swap_done = F10_ONLINE_SPARE_SWAPDONE1(pvt->online_spare);
+		bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS1(pvt->online_spare);
+		if (swap_done && (csrow == bad_dram_cs))
+			csrow = CH1SPARE_RANK;
+	} else {
+		swap_done = F10_ONLINE_SPARE_SWAPDONE0(pvt->online_spare);
+		bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS0(pvt->online_spare);
+		if (swap_done && (csrow == bad_dram_cs))
+			csrow = CH0SPARE_RANK;
+	}
+	return csrow;
+}
+
+/*
+ * Iterate over the DRAM DCT "base" and "mask" registers looking for a
+ * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
+ *
+ * Return:
+ *	-EINVAL:  NOT FOUND
+ *	0..csrow = Chip-Select Row
+ */
+static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
+{
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+	u32 cs_base, cs_mask;
+	int cs_found = -EINVAL;
+	int csrow;
+
+	mci = mci_lookup[nid];
+	if (!mci)
+		return cs_found;
+
+	pvt = mci->pvt_info;
+
+	debugf1("InputAddr=0x%x  channelselect=%d\n", in_addr, cs);
+
+	for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+
+		cs_base = amd64_get_dct_base(pvt, cs, csrow);
+		if (!(cs_base & K8_DCSB_CS_ENABLE))
+			continue;
+
+		/*
+		 * We have an ENABLED CSROW, Isolate just the MASK bits of the
+		 * target: [28:19] and [13:5], which map to [36:27] and [21:13]
+		 * of the actual address.
+		 */
+		cs_base &= REV_F_F1Xh_DCSB_BASE_BITS;
+
+		/*
+		 * Get the DCT Mask, and ENABLE the reserved bits: [18:16] and
+		 * [4:0] to become ON. Then mask off bits [28:0] ([36:8])
+		 */
+		cs_mask = amd64_get_dct_mask(pvt, cs, csrow);
+
+		debugf1("    CSROW=%d CSBase=0x%x RAW CSMask=0x%x\n",
+				csrow, cs_base, cs_mask);
+
+		cs_mask = (cs_mask | 0x0007C01F) & 0x1FFFFFFF;
+
+		debugf1("              Final CSMask=0x%x\n", cs_mask);
+		debugf1("    (InputAddr & ~CSMask)=0x%x "
+				"(CSBase & ~CSMask)=0x%x\n",
+				(in_addr & ~cs_mask), (cs_base & ~cs_mask));
+
+		if ((in_addr & ~cs_mask) == (cs_base & ~cs_mask)) {
+			cs_found = f10_process_possible_spare(csrow, cs, pvt);
+
+			debugf1(" MATCH csrow=%d\n", cs_found);
+			break;
+		}
+	}
+	return cs_found;
+}
+
+
-- 
cgit v0.10.2


From f71d0a05001afd10e2be491ca002c55c7df42ed8 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:22:43 +0200
Subject: amd64_edac: add F10h-and-later methods-p3

Borislav:

- compute dct_sel_base_off in f10_match_to_this_node() correctly since
it cannot be assumed that the Reserved bits are zero and they have to be
masked out instead.

- cleanup, remove StinkyIdentifiers, simplify logic
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 744a49a..c2e2c3c 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1398,6 +1398,10 @@ static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
 		debugf0("Reading F10_DCTL_SEL_HIGH failed\n");
 }
 
+/*
+ * determine channel based on the interleaving mode: F10h BKDG, 2.8.9 Memory
+ * Interleaving Modes.
+ */
 static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
 				int hi_range_sel, u32 intlv_en)
 {
@@ -1408,6 +1412,9 @@ static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
 	else if (hi_range_sel)
 		cs = dct_sel_high;
 	else if (dct_interleave_enabled(pvt)) {
+		/*
+		 * see F2x110[DctSelIntLvAddr] - channel interleave mode
+		 */
 		if (dct_sel_interleave_addr(pvt) == 0)
 			cs = sys_addr >> 6 & 1;
 		else if ((dct_sel_interleave_addr(pvt) >> 1) & 1) {
@@ -1445,22 +1452,23 @@ static inline u32 f10_map_intlv_en_to_shift(u32 intlv_en)
 	return 0;
 }
 
-static inline u64 f10_determine_base_addr_offset(u64 sys_addr, int hi_range_sel,
+/* See F10h BKDG, 2.8.10.2 DctSelBaseOffset Programming */
+static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
 						 u32 dct_sel_base_addr,
 						 u64 dct_sel_base_off,
-						 u32 hole_en, u32 hole_off,
+						 u32 hole_valid, u32 hole_off,
 						 u64 dram_base)
 {
 	u64 chan_off;
 
 	if (hi_range_sel) {
 		if (!(dct_sel_base_addr & 0xFFFFF800) &&
-		   (hole_en & 1) && (sys_addr >= 0x100000000ULL))
+		   hole_valid && (sys_addr >= 0x100000000ULL))
 			chan_off = hole_off << 16;
 		else
 			chan_off = dct_sel_base_off;
 	} else {
-		if ((hole_en & 1) && (sys_addr >= 0x100000000ULL))
+		if (hole_valid && (sys_addr >= 0x100000000ULL))
 			chan_off = hole_off << 16;
 		else
 			chan_off = dram_base & 0xFFFFF8000000ULL;
@@ -1562,4 +1570,257 @@ static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
 	return cs_found;
 }
 
+/* For a given @dram_range, check if @sys_addr falls within it. */
+static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
+				  u64 sys_addr, int *nid, int *chan_sel)
+{
+	int node_id, cs_found = -EINVAL, high_range = 0;
+	u32 intlv_en, intlv_sel, intlv_shift, hole_off;
+	u32 hole_valid, tmp, dct_sel_base, channel;
+	u64 dram_base, chan_addr, dct_sel_base_off;
+
+	dram_base = pvt->dram_base[dram_range];
+	intlv_en = pvt->dram_IntlvEn[dram_range];
+
+	node_id = pvt->dram_DstNode[dram_range];
+	intlv_sel = pvt->dram_IntlvSel[dram_range];
+
+	debugf1("(dram=%d) Base=0x%llx SystemAddr= 0x%llx Limit=0x%llx\n",
+		dram_range, dram_base, sys_addr, pvt->dram_limit[dram_range]);
+
+	/*
+	 * This assumes that one node's DHAR is the same as all the other
+	 * nodes' DHAR.
+	 */
+	hole_off = (pvt->dhar & 0x0000FF80);
+	hole_valid = (pvt->dhar & 0x1);
+	dct_sel_base_off = (pvt->dram_ctl_select_high & 0xFFFFFC00) << 16;
+
+	debugf1("   HoleOffset=0x%x  HoleValid=0x%x IntlvSel=0x%x\n",
+			hole_off, hole_valid, intlv_sel);
+
+	if (intlv_en ||
+	    (intlv_sel != ((sys_addr >> 12) & intlv_en)))
+		return -EINVAL;
+
+	dct_sel_base = dct_sel_baseaddr(pvt);
+
+	/*
+	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
+	 * select between DCT0 and DCT1.
+	 */
+	if (dct_high_range_enabled(pvt) &&
+	   !dct_ganging_enabled(pvt) &&
+	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
+		high_range = 1;
+
+	channel = f10_determine_channel(pvt, sys_addr, high_range, intlv_en);
+
+	chan_addr = f10_get_base_addr_offset(sys_addr, high_range, dct_sel_base,
+					     dct_sel_base_off, hole_valid,
+					     hole_off, dram_base);
+
+	intlv_shift = f10_map_intlv_en_to_shift(intlv_en);
+
+	/* remove Node ID (in case of memory interleaving) */
+	tmp = chan_addr & 0xFC0;
+
+	chan_addr = ((chan_addr >> intlv_shift) & 0xFFFFFFFFF000ULL) | tmp;
+
+	/* remove channel interleave and hash */
+	if (dct_interleave_enabled(pvt) &&
+	   !dct_high_range_enabled(pvt) &&
+	   !dct_ganging_enabled(pvt)) {
+		if (dct_sel_interleave_addr(pvt) != 1)
+			chan_addr = (chan_addr >> 1) & 0xFFFFFFFFFFFFFFC0ULL;
+		else {
+			tmp = chan_addr & 0xFC0;
+			chan_addr = ((chan_addr & 0xFFFFFFFFFFFFC000ULL) >> 1)
+					| tmp;
+		}
+	}
+
+	debugf1("   (ChannelAddrLong=0x%llx) >> 8 becomes InputAddr=0x%x\n",
+		chan_addr, (u32)(chan_addr >> 8));
+
+	cs_found = f10_lookup_addr_in_dct(chan_addr >> 8, node_id, channel);
+
+	if (cs_found >= 0) {
+		*nid = node_id;
+		*chan_sel = channel;
+	}
+	return cs_found;
+}
+
+static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
+				       int *node, int *chan_sel)
+{
+	int dram_range, cs_found = -EINVAL;
+	u64 dram_base, dram_limit;
+
+	for (dram_range = 0; dram_range < DRAM_REG_COUNT; dram_range++) {
+
+		if (!pvt->dram_rw_en[dram_range])
+			continue;
+
+		dram_base = pvt->dram_base[dram_range];
+		dram_limit = pvt->dram_limit[dram_range];
+
+		if ((dram_base <= sys_addr) && (sys_addr <= dram_limit)) {
+
+			cs_found = f10_match_to_this_node(pvt, dram_range,
+							  sys_addr, node,
+							  chan_sel);
+			if (cs_found >= 0)
+				break;
+		}
+	}
+	return cs_found;
+}
+
+/*
+ * This the F10h reference code from AMD to map a @sys_addr to NodeID,
+ * CSROW, Channel.
+ *
+ * The @sys_addr is usually an error address received from the hardware.
+ */
+static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
+				     struct amd64_error_info_regs *info,
+				     u64 sys_addr)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u32 page, offset;
+	unsigned short syndrome;
+	int nid, csrow, chan = 0;
+
+	csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
+
+	if (csrow >= 0) {
+		error_address_to_page_and_offset(sys_addr, &page, &offset);
+
+		syndrome = EXTRACT_HIGH_SYNDROME(info->nbsl) << 8;
+		syndrome |= EXTRACT_LOW_SYNDROME(info->nbsh);
+
+		/*
+		 * Is CHIPKILL on? If so, then we can attempt to use the
+		 * syndrome to isolate which channel the error was on.
+		 */
+		if (pvt->nbcfg & K8_NBCFG_CHIPKILL)
+			chan = get_channel_from_ecc_syndrome(syndrome);
+
+		if (chan >= 0) {
+			edac_mc_handle_ce(mci, page, offset, syndrome,
+					csrow, chan, EDAC_MOD_STR);
+		} else {
+			/*
+			 * Channel unknown, report all channels on this
+			 * CSROW as failed.
+			 */
+			for (chan = 0; chan < mci->csrows[csrow].nr_channels;
+								chan++) {
+					edac_mc_handle_ce(mci, page, offset,
+							syndrome,
+							csrow, chan,
+							EDAC_MOD_STR);
+			}
+		}
+
+	} else {
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+	}
+}
+
+/*
+ * Input (@index) is the DBAM DIMM value (1 of 4) used as an index into a shift
+ * table (revf_quad_ddr2_shift) which starts at 128MB DIMM size. Index of 0
+ * indicates an empty DIMM slot, as reported by Hardware on empty slots.
+ *
+ * Normalize to 128MB by subracting 27 bit shift.
+ */
+static int map_dbam_to_csrow_size(int index)
+{
+	int mega_bytes = 0;
+
+	if (index > 0 && index <= DBAM_MAX_VALUE)
+		mega_bytes = ((128 << (revf_quad_ddr2_shift[index]-27)));
+
+	return mega_bytes;
+}
+
+/*
+ * debug routine to display the memory sizes of a DIMM (ganged or not) and it
+ * CSROWs as well
+ */
+static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt,
+					 int ganged)
+{
+	int dimm, size0, size1;
+	u32 dbam;
+	u32 *dcsb;
+
+	debugf1("  dbam%d: 0x%8.08x  CSROW is %s\n", ctrl,
+			ctrl ? pvt->dbam1 : pvt->dbam0,
+			ganged ? "GANGED - dbam1 not used" : "NON-GANGED");
+
+	dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
+	dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0;
+
+	/* Dump memory sizes for DIMM and its CSROWs */
+	for (dimm = 0; dimm < 4; dimm++) {
+
+		size0 = 0;
+		if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE)
+			size0 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam));
+
+		size1 = 0;
+		if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE)
+			size1 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam));
+
+		debugf1("     CTRL-%d DIMM-%d=%5dMB   CSROW-%d=%5dMB "
+				"CSROW-%d=%5dMB\n",
+				ctrl,
+				dimm,
+				size0 + size1,
+				dimm * 2,
+				size0,
+				dimm * 2 + 1,
+				size1);
+	}
+}
+
+/*
+ * Very early hardware probe on pci_probe thread to determine if this module
+ * supports the hardware.
+ *
+ * Return:
+ *      0 for OK
+ *      1 for error
+ */
+static int f10_probe_valid_hardware(struct amd64_pvt *pvt)
+{
+	int ret = 0;
+
+	/*
+	 * If we are on a DDR3 machine, we don't know yet if
+	 * we support that properly at this time
+	 */
+	if ((pvt->dchr0 & F10_DCHR_Ddr3Mode) ||
+	    (pvt->dchr1 & F10_DCHR_Ddr3Mode)) {
+
+		amd64_printk(KERN_WARNING,
+			"%s() This machine is running with DDR3 memory. "
+			"This is not currently supported. "
+			"DCHR0=0x%x DCHR1=0x%x\n",
+			__func__, pvt->dchr0, pvt->dchr1);
+
+		amd64_printk(KERN_WARNING,
+			"   Contact '%s' module MAINTAINER to help add"
+			" support.\n",
+			EDAC_MOD_STR);
+
+		ret = 1;
+
+	}
+	return ret;
+}
 
-- 
cgit v0.10.2


From 4d37607adbff69596a3170cf84badaf26efc59ac Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:25:05 +0200
Subject: amd64_edac: add per-family descriptors

Borislav:

- fix comments
- fix function return value patterns

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index c2e2c3c..e5f4a92 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1824,3 +1824,75 @@ static int f10_probe_valid_hardware(struct amd64_pvt *pvt)
 	return ret;
 }
 
+/*
+ * There currently are 3 types type of MC devices for AMD Athlon/Opterons
+ * (as per PCI DEVICE_IDs):
+ *
+ * Family K8: That is the Athlon64 and Opteron CPUs. They all have the same PCI
+ * DEVICE ID, even though there is differences between the different Revisions
+ * (CG,D,E,F).
+ *
+ * Family F10h and F11h.
+ *
+ */
+static struct amd64_family_type amd64_family_types[] = {
+	[K8_CPUS] = {
+		.ctl_name = "RevF",
+		.addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+		.misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+		.ops = {
+			.early_channel_count = k8_early_channel_count,
+			.get_error_address = k8_get_error_address,
+			.read_dram_base_limit = k8_read_dram_base_limit,
+			.map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
+			.dbam_map_to_pages = k8_dbam_map_to_pages,
+		}
+	},
+	[F10_CPUS] = {
+		.ctl_name = "Family 10h",
+		.addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP,
+		.misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+		.ops = {
+			.probe_valid_hardware = f10_probe_valid_hardware,
+			.early_channel_count = f10_early_channel_count,
+			.get_error_address = f10_get_error_address,
+			.read_dram_base_limit = f10_read_dram_base_limit,
+			.read_dram_ctl_register = f10_read_dram_ctl_register,
+			.map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
+			.dbam_map_to_pages = f10_dbam_map_to_pages,
+		}
+	},
+	[F11_CPUS] = {
+		.ctl_name = "Family 11h",
+		.addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP,
+		.misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC,
+		.ops = {
+			.probe_valid_hardware = f10_probe_valid_hardware,
+			.early_channel_count = f10_early_channel_count,
+			.get_error_address = f10_get_error_address,
+			.read_dram_base_limit = f10_read_dram_base_limit,
+			.read_dram_ctl_register = f10_read_dram_ctl_register,
+			.map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
+			.dbam_map_to_pages = f10_dbam_map_to_pages,
+		}
+	},
+};
+
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+						unsigned int device,
+						struct pci_dev *related)
+{
+	struct pci_dev *dev = NULL;
+
+	dev = pci_get_device(vendor, device, dev);
+	while (dev) {
+		if ((dev->bus->number == related->bus->number) &&
+		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+			break;
+		dev = pci_get_device(vendor, device, dev);
+	}
+
+	return dev;
+}
+
+
-- 
cgit v0.10.2


From b1289d6f9d23abab396077abb65d5a23a775cdb0 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 16:37:05 +0200
Subject: amd64_edac: add ECC chipkill syndrome mapping table

Borislav:

- fix comments
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e5f4a92..feb4986 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1895,4 +1895,139 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor,
 	return dev;
 }
 
+/*
+ * syndrome mapping table for ECC ChipKill devices
+ *
+ * The comment in each row is the token (nibble) number that is in error.
+ * The least significant nibble of the syndrome is the mask for the bits
+ * that are in error (need to be toggled) for the particular nibble.
+ *
+ * Each row contains 16 entries.
+ * The first entry (0th) is the channel number for that row of syndromes.
+ * The remaining 15 entries are the syndromes for the respective Error
+ * bit mask index.
+ *
+ * 1st index entry is 0x0001 mask, indicating that the rightmost bit is the
+ * bit in error.
+ * The 2nd index entry is 0x0010 that the second bit is damaged.
+ * The 3rd index entry is 0x0011 indicating that the rightmost 2 bits
+ * are damaged.
+ * Thus so on until index 15, 0x1111, whose entry has the syndrome
+ * indicating that all 4 bits are damaged.
+ *
+ * A search is performed on this table looking for a given syndrome.
+ *
+ * See the AMD documentation for ECC syndromes. This ECC table is valid
+ * across all the versions of the AMD64 processors.
+ *
+ * A fast lookup is to use the LAST four bits of the 16-bit syndrome as a
+ * COLUMN index, then search all ROWS of that column, looking for a match
+ * with the input syndrome. The ROW value will be the token number.
+ *
+ * The 0'th entry on that row, can be returned as the CHANNEL (0 or 1) of this
+ * error.
+ */
+#define NUMBER_ECC_ROWS  36
+static const unsigned short ecc_chipkill_syndromes[NUMBER_ECC_ROWS][16] = {
+	/* Channel 0 syndromes */
+	{/*0*/  0, 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57,
+	   0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df },
+	{/*1*/  0, 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7,
+	   0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f },
+	{/*2*/  0, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+	   0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f },
+	{/*3*/  0, 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057,
+	   0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df },
+	{/*4*/  0, 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097,
+	   0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f },
+	{/*5*/  0, 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857,
+	   0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf },
+	{/*6*/  0, 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467,
+	   0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f },
+	{/*7*/  0, 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27,
+	   0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff },
+	{/*8*/  0, 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177,
+	   0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f },
+	{/*9*/  0, 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07,
+	   0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f },
+	{/*a*/  0, 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07,
+	   0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f },
+	{/*b*/  0, 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7,
+	   0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f },
+	{/*c*/  0, 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87,
+	   0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f },
+	{/*d*/  0, 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067,
+	   0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f },
+	{/*e*/  0, 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77,
+	   0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f },
+	{/*f*/  0, 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77,
+	   0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x956d, 0xa6ee, 0xb72f },
+
+	/* Channel 1 syndromes */
+	{/*10*/ 1, 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187,
+	   0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f },
+	{/*11*/ 1, 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627,
+	   0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff },
+	{/*12*/ 1, 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97,
+	   0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f },
+	{/*13*/ 1, 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97,
+	   0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f },
+	{/*14*/ 1, 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987,
+	   0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f },
+	{/*15*/ 1, 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677,
+	   0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f },
+	{/*16*/ 1, 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387,
+	   0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f },
+	{/*17*/ 1, 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17,
+	   0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf },
+	{/*18*/ 1, 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7,
+	   0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f },
+	{/*19*/ 1, 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397,
+	   0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f },
+	{/*1a*/ 1, 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617,
+	   0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf },
+	{/*1b*/ 1, 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527,
+	   0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff },
+	{/*1c*/ 1, 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7,
+	   0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef },
+	{/*1d*/ 1, 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7,
+	   0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def },
+	{/*1e*/ 1, 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67,
+	   0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f },
+	{/*1f*/ 1, 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377,
+	   0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f },
+
+	/* ECC bits are also in the set of tokens and they too can go bad
+	 * first 2 cover channel 0, while the second 2 cover channel 1
+	 */
+	{/*20*/ 0, 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807,
+	   0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f },
+	{/*21*/ 0, 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07,
+	   0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f },
+	{/*22*/ 1, 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97,
+	   0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f },
+	{/*23*/ 1, 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757,
+	   0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df }
+};
+
+/*
+ * Given the syndrome argument, scan each of the channel tables for a syndrome
+ * match. Depending on which table it is found, return the channel number.
+ */
+static int get_channel_from_ecc_syndrome(unsigned short syndrome)
+{
+	int row;
+	int column;
 
+	/* Determine column to scan */
+	column = syndrome & 0xF;
+
+	/* Scan all rows, looking for syndrome, or end of table */
+	for (row = 0; row < NUMBER_ECC_ROWS; row++) {
+		if (ecc_chipkill_syndromes[row][column] == syndrome)
+			return ecc_chipkill_syndromes[row][0];
+	}
+
+	debugf0("syndrome(%x) not found\n", syndrome);
+	return -1;
+}
-- 
cgit v0.10.2


From d27bf6fa369ca0272df10558d2f290d6fc72e675 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Wed, 6 May 2009 17:55:27 +0200
Subject: amd64_edac: add error decoding logic

Borislav:

- fold amd64_error_info_valid() into its only user
- fix/cleanup comments
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index feb4986..09991c8 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2031,3 +2031,428 @@ static int get_channel_from_ecc_syndrome(unsigned short syndrome)
 	debugf0("syndrome(%x) not found\n", syndrome);
 	return -1;
 }
+
+/*
+ * Check for valid error in the NB Status High register. If so, proceed to read
+ * NB Status Low, NB Address Low and NB Address High registers and store data
+ * into error structure.
+ *
+ * Returns:
+ *	- 1: if hardware regs contains valid error info
+ *	- 0: if no valid error is indicated
+ */
+static int amd64_get_error_info_regs(struct mem_ctl_info *mci,
+				     struct amd64_error_info_regs *regs)
+{
+	struct amd64_pvt *pvt;
+	struct pci_dev *misc_f3_ctl;
+	int err = 0;
+
+	pvt = mci->pvt_info;
+	misc_f3_ctl = pvt->misc_f3_ctl;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBSH, &regs->nbsh);
+	if (err)
+		goto err_reg;
+
+	if (!(regs->nbsh & K8_NBSH_VALID_BIT))
+		return 0;
+
+	/* valid error, read remaining error information registers */
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBSL, &regs->nbsl);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBEAL, &regs->nbeal);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBEAH, &regs->nbeah);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(misc_f3_ctl, K8_NBCFG, &regs->nbcfg);
+	if (err)
+		goto err_reg;
+
+	return 1;
+
+err_reg:
+	debugf0("Reading error info register failed\n");
+	return 0;
+}
+
+/*
+ * This function is called to retrieve the error data from hardware and store it
+ * in the info structure.
+ *
+ * Returns:
+ *	- 1: if a valid error is found
+ *	- 0: if no error is found
+ */
+static int amd64_get_error_info(struct mem_ctl_info *mci,
+				struct amd64_error_info_regs *info)
+{
+	struct amd64_pvt *pvt;
+	struct amd64_error_info_regs regs;
+
+	pvt = mci->pvt_info;
+
+	if (!amd64_get_error_info_regs(mci, info))
+		return 0;
+
+	/*
+	 * Here's the problem with the K8's EDAC reporting: There are four
+	 * registers which report pieces of error information. They are shared
+	 * between CEs and UEs. Furthermore, contrary to what is stated in the
+	 * BKDG, the overflow bit is never used! Every error always updates the
+	 * reporting registers.
+	 *
+	 * Can you see the race condition? All four error reporting registers
+	 * must be read before a new error updates them! There is no way to read
+	 * all four registers atomically. The best than can be done is to detect
+	 * that a race has occured and then report the error without any kind of
+	 * precision.
+	 *
+	 * What is still positive is that errors are still reported and thus
+	 * problems can still be detected - just not localized because the
+	 * syndrome and address are spread out across registers.
+	 *
+	 * Grrrrr!!!!!  Here's hoping that AMD fixes this in some future K8 rev.
+	 * UEs and CEs should have separate register sets with proper overflow
+	 * bits that are used! At very least the problem can be fixed by
+	 * honoring the ErrValid bit in 'nbsh' and not updating registers - just
+	 * set the overflow bit - unless the current error is CE and the new
+	 * error is UE which would be the only situation for overwriting the
+	 * current values.
+	 */
+
+	regs = *info;
+
+	/* Use info from the second read - most current */
+	if (unlikely(!amd64_get_error_info_regs(mci, info)))
+		return 0;
+
+	/* clear the error bits in hardware */
+	pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT);
+
+	/* Check for the possible race condition */
+	if ((regs.nbsh != info->nbsh) ||
+	     (regs.nbsl != info->nbsl) ||
+	     (regs.nbeah != info->nbeah) ||
+	     (regs.nbeal != info->nbeal)) {
+		amd64_mc_printk(mci, KERN_WARNING,
+				"hardware STATUS read access race condition "
+				"detected!\n");
+		return 0;
+	}
+	return 1;
+}
+
+static inline void amd64_decode_gart_tlb_error(struct mem_ctl_info *mci,
+					 struct amd64_error_info_regs *info)
+{
+	u32 err_code;
+	u32 ec_tt;		/* error code transaction type (2b) */
+	u32 ec_ll;		/* error code cache level (2b) */
+
+	err_code = EXTRACT_ERROR_CODE(info->nbsl);
+	ec_ll = EXTRACT_LL_CODE(err_code);
+	ec_tt = EXTRACT_TT_CODE(err_code);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		     "GART TLB event: transaction type(%s), "
+		     "cache level(%s)\n", tt_msgs[ec_tt], ll_msgs[ec_ll]);
+}
+
+static inline void amd64_decode_mem_cache_error(struct mem_ctl_info *mci,
+				      struct amd64_error_info_regs *info)
+{
+	u32 err_code;
+	u32 ec_rrrr;		/* error code memory transaction (4b) */
+	u32 ec_tt;		/* error code transaction type (2b) */
+	u32 ec_ll;		/* error code cache level (2b) */
+
+	err_code = EXTRACT_ERROR_CODE(info->nbsl);
+	ec_ll = EXTRACT_LL_CODE(err_code);
+	ec_tt = EXTRACT_TT_CODE(err_code);
+	ec_rrrr = EXTRACT_RRRR_CODE(err_code);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		     "cache hierarchy error: memory transaction type(%s), "
+		     "transaction type(%s), cache level(%s)\n",
+		     rrrr_msgs[ec_rrrr], tt_msgs[ec_tt], ll_msgs[ec_ll]);
+}
+
+
+/*
+ * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
+ * ADDRESS and process.
+ */
+static void amd64_handle_ce(struct mem_ctl_info *mci,
+			    struct amd64_error_info_regs *info)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	u64 SystemAddress;
+
+	/* Ensure that the Error Address is VALID */
+	if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
+		amd64_mc_printk(mci, KERN_ERR,
+			"HW has no ERROR_ADDRESS available\n");
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+		return;
+	}
+
+	SystemAddress = extract_error_address(mci, info);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		"CE ERROR_ADDRESS= 0x%llx\n", SystemAddress);
+
+	pvt->ops->map_sysaddr_to_csrow(mci, info, SystemAddress);
+}
+
+/* Handle any Un-correctable Errors (UEs) */
+static void amd64_handle_ue(struct mem_ctl_info *mci,
+			    struct amd64_error_info_regs *info)
+{
+	int csrow;
+	u64 SystemAddress;
+	u32 page, offset;
+	struct mem_ctl_info *log_mci, *src_mci = NULL;
+
+	log_mci = mci;
+
+	if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
+		amd64_mc_printk(mci, KERN_CRIT,
+			"HW has no ERROR_ADDRESS available\n");
+		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+		return;
+	}
+
+	SystemAddress = extract_error_address(mci, info);
+
+	/*
+	 * Find out which node the error address belongs to. This may be
+	 * different from the node that detected the error.
+	 */
+	src_mci = find_mc_by_sys_addr(mci, SystemAddress);
+	if (!src_mci) {
+		amd64_mc_printk(mci, KERN_CRIT,
+			"ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n",
+			(unsigned long)SystemAddress);
+		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+		return;
+	}
+
+	log_mci = src_mci;
+
+	csrow = sys_addr_to_csrow(log_mci, SystemAddress);
+	if (csrow < 0) {
+		amd64_mc_printk(mci, KERN_CRIT,
+			"ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n",
+			(unsigned long)SystemAddress);
+		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
+	} else {
+		error_address_to_page_and_offset(SystemAddress, &page, &offset);
+		edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
+	}
+}
+
+static void amd64_decode_bus_error(struct mem_ctl_info *mci,
+				   struct amd64_error_info_regs *info)
+{
+	u32 err_code, ext_ec;
+	u32 ec_pp;		/* error code participating processor (2p) */
+	u32 ec_to;		/* error code timed out (1b) */
+	u32 ec_rrrr;		/* error code memory transaction (4b) */
+	u32 ec_ii;		/* error code memory or I/O (2b) */
+	u32 ec_ll;		/* error code cache level (2b) */
+
+	ext_ec = EXTRACT_EXT_ERROR_CODE(info->nbsl);
+	err_code = EXTRACT_ERROR_CODE(info->nbsl);
+
+	ec_ll = EXTRACT_LL_CODE(err_code);
+	ec_ii = EXTRACT_II_CODE(err_code);
+	ec_rrrr = EXTRACT_RRRR_CODE(err_code);
+	ec_to = EXTRACT_TO_CODE(err_code);
+	ec_pp = EXTRACT_PP_CODE(err_code);
+
+	amd64_mc_printk(mci, KERN_ERR,
+		"BUS ERROR:\n"
+		"  time-out(%s) mem or i/o(%s)\n"
+		"  participating processor(%s)\n"
+		"  memory transaction type(%s)\n"
+		"  cache level(%s) Error Found by: %s\n",
+		to_msgs[ec_to],
+		ii_msgs[ec_ii],
+		pp_msgs[ec_pp],
+		rrrr_msgs[ec_rrrr],
+		ll_msgs[ec_ll],
+		(info->nbsh & K8_NBSH_ERR_SCRUBER) ?
+			"Scrubber" : "Normal Operation");
+
+	/* If this was an 'observed' error, early out */
+	if (ec_pp == K8_NBSL_PP_OBS)
+		return;		/* We aren't the node involved */
+
+	/* Parse out the extended error code for ECC events */
+	switch (ext_ec) {
+	/* F10 changed to one Extended ECC error code */
+	case F10_NBSL_EXT_ERR_RES:		/* Reserved field */
+	case F10_NBSL_EXT_ERR_ECC:		/* F10 ECC ext err code */
+		break;
+
+	default:
+		amd64_mc_printk(mci, KERN_ERR, "NOT ECC: no special error "
+					       "handling for this error\n");
+		return;
+	}
+
+	if (info->nbsh & K8_NBSH_CECC)
+		amd64_handle_ce(mci, info);
+	else if (info->nbsh & K8_NBSH_UECC)
+		amd64_handle_ue(mci, info);
+
+	/*
+	 * If main error is CE then overflow must be CE.  If main error is UE
+	 * then overflow is unknown.  We'll call the overflow a CE - if
+	 * panic_on_ue is set then we're already panic'ed and won't arrive
+	 * here. Else, then apparently someone doesn't think that UE's are
+	 * catastrophic.
+	 */
+	if (info->nbsh & K8_NBSH_OVERFLOW)
+		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR
+					  "Error Overflow set");
+}
+
+int amd64_process_error_info(struct mem_ctl_info *mci,
+			     struct amd64_error_info_regs *info,
+			     int handle_errors)
+{
+	struct amd64_pvt *pvt;
+	struct amd64_error_info_regs *regs;
+	u32 err_code, ext_ec;
+	int gart_tlb_error = 0;
+
+	pvt = mci->pvt_info;
+
+	/* If caller doesn't want us to process the error, return */
+	if (!handle_errors)
+		return 1;
+
+	regs = info;
+
+	debugf1("NorthBridge ERROR: mci(0x%p)\n", mci);
+	debugf1("  MC node(%d) Error-Address(0x%.8x-%.8x)\n",
+		pvt->mc_node_id, regs->nbeah, regs->nbeal);
+	debugf1("  nbsh(0x%.8x) nbsl(0x%.8x)\n",
+		regs->nbsh, regs->nbsl);
+	debugf1("  Valid Error=%s Overflow=%s\n",
+		(regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False");
+	debugf1("  Err Uncorrected=%s MCA Error Reporting=%s\n",
+		(regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_ERR_ENABLE) ?
+			"True" : "False");
+	debugf1("  MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n",
+		(regs->nbsh & K8_NBSH_MISC_ERR_VALID) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_PCC) ?
+			"True" : "False");
+	debugf1("  CECC=%s UECC=%s Found by Scruber=%s\n",
+		(regs->nbsh & K8_NBSH_CECC) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_UECC) ?
+			"True" : "False",
+		(regs->nbsh & K8_NBSH_ERR_SCRUBER) ?
+			"True" : "False");
+	debugf1("  CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n",
+		(regs->nbsh & K8_NBSH_CORE0) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_CORE1) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_CORE2) ? "True" : "False",
+		(regs->nbsh & K8_NBSH_CORE3) ? "True" : "False");
+
+
+	err_code = EXTRACT_ERROR_CODE(regs->nbsl);
+
+	/* Determine which error type:
+	 *	1) GART errors - non-fatal, developmental events
+	 *	2) MEMORY errors
+	 *	3) BUS errors
+	 *	4) Unknown error
+	 */
+	if (TEST_TLB_ERROR(err_code)) {
+		/*
+		 * GART errors are intended to help graphics driver developers
+		 * to detect bad GART PTEs. It is recommended by AMD to disable
+		 * GART table walk error reporting by default[1] (currently
+		 * being disabled in mce_cpu_quirks()) and according to the
+		 * comment in mce_cpu_quirks(), such GART errors can be
+		 * incorrectly triggered. We may see these errors anyway and
+		 * unless requested by the user, they won't be reported.
+		 *
+		 * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
+		 *     AMD NPT family 0Fh processors
+		 */
+		if (report_gart_errors == 0)
+			return 1;
+
+		/*
+		 * Only if GART error reporting is requested should we generate
+		 * any logs.
+		 */
+		gart_tlb_error = 1;
+
+		debugf1("GART TLB error\n");
+		amd64_decode_gart_tlb_error(mci, info);
+	} else if (TEST_MEM_ERROR(err_code)) {
+		debugf1("Memory/Cache error\n");
+		amd64_decode_mem_cache_error(mci, info);
+	} else if (TEST_BUS_ERROR(err_code)) {
+		debugf1("Bus (Link/DRAM) error\n");
+		amd64_decode_bus_error(mci, info);
+	} else {
+		/* shouldn't reach here! */
+		amd64_mc_printk(mci, KERN_WARNING,
+			     "%s(): unknown MCE error 0x%x\n", __func__,
+			     err_code);
+	}
+
+	ext_ec = EXTRACT_EXT_ERROR_CODE(regs->nbsl);
+	amd64_mc_printk(mci, KERN_ERR,
+		"ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]);
+
+	if (((ext_ec >= F10_NBSL_EXT_ERR_CRC &&
+			ext_ec <= F10_NBSL_EXT_ERR_TGT) ||
+			(ext_ec == F10_NBSL_EXT_ERR_RMW)) &&
+			EXTRACT_LDT_LINK(info->nbsh)) {
+
+		amd64_mc_printk(mci, KERN_ERR,
+			"Error on hypertransport link: %s\n",
+			htlink_msgs[
+			EXTRACT_LDT_LINK(info->nbsh)]);
+	}
+
+	/*
+	 * Check the UE bit of the NB status high register, if set generate some
+	 * logs. If NOT a GART error, then process the event as a NO-INFO event.
+	 * If it was a GART error, skip that process.
+	 */
+	if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) {
+		amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n");
+		if (!gart_tlb_error)
+			edac_mc_handle_ue_no_info(mci, "UE bit is set\n");
+	}
+
+	if (regs->nbsh & K8_NBSH_PCC)
+		amd64_mc_printk(mci, KERN_CRIT,
+			"PCC (processor context corrupt) set\n");
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(amd64_process_error_info);
+
+
-- 
cgit v0.10.2


From 0ec449ee95b20245fef4aa9fa2486456f1540514 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 19:41:25 +0200
Subject: amd64_edac: add EDAC core-related initializers

Borislav:

- add a amd64_free_mc_sibling_devices() helper instead of opencoding the
  release-path.
- fix/cleanup comments
- fix function return value patterns
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 09991c8..5a6e714 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2455,4 +2455,319 @@ int amd64_process_error_info(struct mem_ctl_info *mci,
 }
 EXPORT_SYMBOL_GPL(amd64_process_error_info);
 
+/*
+ * The main polling 'check' function, called FROM the edac core to perform the
+ * error checking and if an error is encountered, error processing.
+ */
+static void amd64_check(struct mem_ctl_info *mci)
+{
+	struct amd64_error_info_regs info;
+
+	if (amd64_get_error_info(mci, &info))
+		amd64_process_error_info(mci, &info, 1);
+}
+
+/*
+ * Input:
+ *	1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
+ *	2) AMD Family index value
+ *
+ * Ouput:
+ *	Upon return of 0, the following filled in:
+ *
+ *		struct pvt->addr_f1_ctl
+ *		struct pvt->misc_f3_ctl
+ *
+ *	Filled in with related device funcitions of 'dram_f2_ctl'
+ *	These devices are "reserved" via the pci_get_device()
+ *
+ *	Upon return of 1 (error status):
+ *
+ *		Nothing reserved
+ */
+static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx)
+{
+	const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx];
+
+	/* Reserve the ADDRESS MAP Device */
+	pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
+						    amd64_dev->addr_f1_ctl,
+						    pvt->dram_f2_ctl);
+
+	if (!pvt->addr_f1_ctl) {
+		amd64_printk(KERN_ERR, "error address map device not found: "
+			     "vendor %x device 0x%x (broken BIOS?)\n",
+			     PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl);
+		return 1;
+	}
+
+	/* Reserve the MISC Device */
+	pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
+						    amd64_dev->misc_f3_ctl,
+						    pvt->dram_f2_ctl);
+
+	if (!pvt->misc_f3_ctl) {
+		pci_dev_put(pvt->addr_f1_ctl);
+		pvt->addr_f1_ctl = NULL;
+
+		amd64_printk(KERN_ERR, "error miscellaneous device not found: "
+			     "vendor %x device 0x%x (broken BIOS?)\n",
+			     PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl);
+		return 1;
+	}
+
+	debugf1("    Addr Map device PCI Bus ID:\t%s\n",
+		pci_name(pvt->addr_f1_ctl));
+	debugf1("    DRAM MEM-CTL PCI Bus ID:\t%s\n",
+		pci_name(pvt->dram_f2_ctl));
+	debugf1("    Misc device PCI Bus ID:\t%s\n",
+		pci_name(pvt->misc_f3_ctl));
+
+	return 0;
+}
+
+static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
+{
+	pci_dev_put(pvt->addr_f1_ctl);
+	pci_dev_put(pvt->misc_f3_ctl);
+}
+
+/*
+ * Retrieve the hardware registers of the memory controller (this includes the
+ * 'Address Map' and 'Misc' device regs)
+ */
+static void amd64_read_mc_registers(struct amd64_pvt *pvt)
+{
+	u64 msr_val;
+	int dram, err = 0;
+
+	/*
+	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
+	 * those are Read-As-Zero
+	 */
+	rdmsrl(MSR_K8_TOP_MEM1, msr_val);
+	pvt->top_mem = msr_val >> 23;
+	debugf0("  TOP_MEM=0x%08llx\n", pvt->top_mem);
+
+	/* check first whether TOP_MEM2 is enabled */
+	rdmsrl(MSR_K8_SYSCFG, msr_val);
+	if (msr_val & (1U << 21)) {
+		rdmsrl(MSR_K8_TOP_MEM2, msr_val);
+		pvt->top_mem2 = msr_val >> 23;
+		debugf0("  TOP_MEM2=0x%08llx\n", pvt->top_mem2);
+	} else
+		debugf0("  TOP_MEM2 disabled.\n");
+
+	amd64_cpu_display_info(pvt);
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap);
+	if (err)
+		goto err_reg;
+
+	if (pvt->ops->read_dram_ctl_register)
+		pvt->ops->read_dram_ctl_register(pvt);
+
+	for (dram = 0; dram < DRAM_REG_COUNT; dram++) {
+		/*
+		 * Call CPU specific READ function to get the DRAM Base and
+		 * Limit values from the DCT.
+		 */
+		pvt->ops->read_dram_base_limit(pvt, dram);
+
+		/*
+		 * Only print out debug info on rows with both R and W Enabled.
+		 * Normal processing, compiler should optimize this whole 'if'
+		 * debug output block away.
+		 */
+		if (pvt->dram_rw_en[dram] != 0) {
+			debugf1("  DRAM_BASE[%d]: 0x%8.08x-%8.08x "
+				"DRAM_LIMIT:  0x%8.08x-%8.08x\n",
+				dram,
+				(u32)(pvt->dram_base[dram] >> 32),
+				(u32)(pvt->dram_base[dram] & 0xFFFFFFFF),
+				(u32)(pvt->dram_limit[dram] >> 32),
+				(u32)(pvt->dram_limit[dram] & 0xFFFFFFFF));
+			debugf1("        IntlvEn=%s %s %s "
+				"IntlvSel=%d DstNode=%d\n",
+				pvt->dram_IntlvEn[dram] ?
+					"Enabled" : "Disabled",
+				(pvt->dram_rw_en[dram] & 0x2) ? "W" : "!W",
+				(pvt->dram_rw_en[dram] & 0x1) ? "R" : "!R",
+				pvt->dram_IntlvSel[dram],
+				pvt->dram_DstNode[dram]);
+		}
+	}
+
+	amd64_read_dct_base_mask(pvt);
+
+	err = pci_read_config_dword(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar);
+	if (err)
+		goto err_reg;
+
+	amd64_read_dbam_reg(pvt);
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl,
+				F10_ONLINE_SPARE, &pvt->online_spare);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+	if (err)
+		goto err_reg;
+
+	err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);
+	if (err)
+		goto err_reg;
+
+	if (!dct_ganging_enabled(pvt)) {
+		err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1,
+						&pvt->dclr1);
+		if (err)
+			goto err_reg;
+
+		err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_1,
+						&pvt->dchr1);
+		if (err)
+			goto err_reg;
+	}
+
+	amd64_dump_misc_regs(pvt);
+
+err_reg:
+	debugf0("Reading an MC register failed\n");
+
+}
+
+/*
+ * NOTE: CPU Revision Dependent code
+ *
+ * Input:
+ *	@csrow_nr ChipSelect Row Number (0..CHIPSELECT_COUNT-1)
+ *	k8 private pointer to -->
+ *			DRAM Bank Address mapping register
+ *			node_id
+ *			DCL register where dual_channel_active is
+ *
+ * The DBAM register consists of 4 sets of 4 bits each definitions:
+ *
+ * Bits:	CSROWs
+ * 0-3		CSROWs 0 and 1
+ * 4-7		CSROWs 2 and 3
+ * 8-11		CSROWs 4 and 5
+ * 12-15	CSROWs 6 and 7
+ *
+ * Values range from: 0 to 15
+ * The meaning of the values depends on CPU revision and dual-channel state,
+ * see relevant BKDG more info.
+ *
+ * The memory controller provides for total of only 8 CSROWs in its current
+ * architecture. Each "pair" of CSROWs normally represents just one DIMM in
+ * single channel or two (2) DIMMs in dual channel mode.
+ *
+ * The following code logic collapses the various tables for CSROW based on CPU
+ * revision.
+ *
+ * Returns:
+ *	The number of PAGE_SIZE pages on the specified CSROW number it
+ *	encompasses
+ *
+ */
+static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
+{
+	u32 dram_map, nr_pages;
+
+	/*
+	 * The math on this doesn't look right on the surface because x/2*4 can
+	 * be simplified to x*2 but this expression makes use of the fact that
+	 * it is integral math where 1/2=0. This intermediate value becomes the
+	 * number of bits to shift the DBAM register to extract the proper CSROW
+	 * field.
+	 */
+	dram_map = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF;
+
+	nr_pages = pvt->ops->dbam_map_to_pages(pvt, dram_map);
+
+	/*
+	 * If dual channel then double the memory size of single channel.
+	 * Channel count is 1 or 2
+	 */
+	nr_pages <<= (pvt->channel_count - 1);
+
+	debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, dram_map);
+	debugf0("    nr_pages= %u  channel-count = %d\n",
+		nr_pages, pvt->channel_count);
+
+	return nr_pages;
+}
+
+/*
+ * Initialize the array of csrow attribute instances, based on the values
+ * from pci config hardware registers.
+ */
+static int amd64_init_csrows(struct mem_ctl_info *mci)
+{
+	struct csrow_info *csrow;
+	struct amd64_pvt *pvt;
+	u64 input_addr_min, input_addr_max, sys_addr;
+	int i, err = 0, empty = 1;
+
+	pvt = mci->pvt_info;
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg);
+	if (err)
+		debugf0("Reading K8_NBCFG failed\n");
+
+	debugf0("NBCFG= 0x%x  CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
+		(pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
+		);
+
+	for (i = 0; i < CHIPSELECT_COUNT; i++) {
+		csrow = &mci->csrows[i];
+
+		if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
+			debugf1("----CSROW %d EMPTY for node %d\n", i,
+				pvt->mc_node_id);
+			continue;
+		}
+
+		debugf1("----CSROW %d VALID for MC node %d\n",
+			i, pvt->mc_node_id);
+
+		empty = 0;
+		csrow->nr_pages = amd64_csrow_nr_pages(i, pvt);
+		find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
+		sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
+		csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
+		sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
+		csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
+		csrow->page_mask = ~mask_from_dct_mask(pvt, i);
+		/* 8 bytes of resolution */
+
+		csrow->mtype = amd64_determine_memory_type(pvt);
+
+		debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
+		debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
+			(unsigned long)input_addr_min,
+			(unsigned long)input_addr_max);
+		debugf1("    sys_addr: 0x%lx  page_mask: 0x%lx\n",
+			(unsigned long)sys_addr, csrow->page_mask);
+		debugf1("    nr_pages: %u  first_page: 0x%lx "
+			"last_page: 0x%lx\n",
+			(unsigned)csrow->nr_pages,
+			csrow->first_page, csrow->last_page);
+
+		/*
+		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
+		 */
+		if (pvt->nbcfg & K8_NBCFG_ECC_ENABLE)
+			csrow->edac_mode =
+			    (pvt->nbcfg & K8_NBCFG_CHIPKILL) ?
+			    EDAC_S4ECD4ED : EDAC_SECDED;
+		else
+			csrow->edac_mode = EDAC_NONE;
+	}
+
+	return empty;
+}
 
-- 
cgit v0.10.2


From f9431992b6227069bc54800d55531c6f78d276a7 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 19:46:08 +0200
Subject: amd64_edac: add ECC reporting initializers

Borislav:
- convert to the new {rd|wr}msr_on_cpus interfaces.
- convert pvt->old_mcgctl to a bitmask thus saving some bytes
- fix/cleanup comments
- fix function return value patterns
- add a proper bugfix found by Doug to amd64_check_ecc_enabled where we
  missed checking for the ECC enabled bit in NB CFG.
- cleanup debug calls

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5a6e714..3b6c421 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2771,3 +2771,210 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
 	return empty;
 }
 
+/*
+ * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we"
+ * enable it.
+ */
+static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+	int cpu, idx = 0, err = 0;
+	struct msr msrs[cpumask_weight(cpumask)];
+	u32 value;
+	u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+	if (!ecc_enable_override)
+		return;
+
+	memset(msrs, 0, sizeof(msrs));
+
+	amd64_printk(KERN_WARNING,
+		"'ecc_enable_override' parameter is active, "
+		"Enabling AMD ECC hardware now: CAUTION\n");
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+	if (err)
+		debugf0("Reading K8_NBCTL failed\n");
+
+	/* turn on UECCn and CECCEn bits */
+	pvt->old_nbctl = value & mask;
+	pvt->nbctl_mcgctl_saved = 1;
+
+	value |= mask;
+	pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+	rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+	for_each_cpu(cpu, cpumask) {
+		if (msrs[idx].l & K8_MSR_MCGCTL_NBE)
+			set_bit(idx, &pvt->old_mcgctl);
+
+		msrs[idx].l |= K8_MSR_MCGCTL_NBE;
+		idx++;
+	}
+	wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+	if (err)
+		debugf0("Reading K8_NBCFG failed\n");
+
+	debugf0("NBCFG(1)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+		(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+	if (!(value & K8_NBCFG_ECC_ENABLE)) {
+		amd64_printk(KERN_WARNING,
+			"This node reports that DRAM ECC is "
+			"currently Disabled; ENABLING now\n");
+
+		/* Attempt to turn on DRAM ECC Enable */
+		value |= K8_NBCFG_ECC_ENABLE;
+		pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+
+		err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+		if (err)
+			debugf0("Reading K8_NBCFG failed\n");
+
+		if (!(value & K8_NBCFG_ECC_ENABLE)) {
+			amd64_printk(KERN_WARNING,
+				"Hardware rejects Enabling DRAM ECC checking\n"
+				"Check memory DIMM configuration\n");
+		} else {
+			amd64_printk(KERN_DEBUG,
+				"Hardware accepted DRAM ECC Enable\n");
+		}
+	}
+	debugf0("NBCFG(2)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n", value,
+		(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+
+	pvt->ctl_error_info.nbcfg = value;
+}
+
+static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+{
+	const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+	int cpu, idx = 0, err = 0;
+	struct msr msrs[cpumask_weight(cpumask)];
+	u32 value;
+	u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+	if (!pvt->nbctl_mcgctl_saved)
+		return;
+
+	memset(msrs, 0, sizeof(msrs));
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+	if (err)
+		debugf0("Reading K8_NBCTL failed\n");
+	value &= ~mask;
+	value |= pvt->old_nbctl;
+
+	/* restore the NB Enable MCGCTL bit */
+	pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+	rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+
+	for_each_cpu(cpu, cpumask) {
+		msrs[idx].l &= ~K8_MSR_MCGCTL_NBE;
+		msrs[idx].l |=
+			test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE;
+		idx++;
+	}
+
+	wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
+}
+
+static void check_mcg_ctl(void *ret)
+{
+	u64 msr_val = 0;
+	u8 nbe;
+
+	rdmsrl(MSR_IA32_MCG_CTL, msr_val);
+	nbe = msr_val & K8_MSR_MCGCTL_NBE;
+
+	debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+		raw_smp_processor_id(), msr_val,
+		(nbe ? "enabled" : "disabled"));
+
+	if (!nbe)
+		*(int *)ret = 0;
+}
+
+/* check MCG_CTL on all the cpus on this node */
+static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
+{
+	int ret = 1;
+	preempt_disable();
+	smp_call_function_many(mask, check_mcg_ctl, &ret, 1);
+	preempt_enable();
+
+	return ret;
+}
+
+/*
+ * EDAC requires that the BIOS have ECC enabled before taking over the
+ * processing of ECC errors. This is because the BIOS can properly initialize
+ * the memory system completely. A command line option allows to force-enable
+ * hardware ECC later in amd64_enable_ecc_error_reporting().
+ */
+static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+{
+	u32 value;
+	int err = 0, ret = 0;
+	u8 ecc_enabled = 0;
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+	if (err)
+		debugf0("Reading K8_NBCTL failed\n");
+
+	ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
+
+	ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id));
+
+	debugf0("K8_NBCFG=0x%x,  DRAM ECC is %s\n", value,
+			(value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled"));
+
+	if (!ecc_enabled || !ret) {
+		if (!ecc_enabled) {
+			amd64_printk(KERN_WARNING, "This node reports that "
+						   "Memory ECC is currently "
+						   "disabled.\n");
+
+			amd64_printk(KERN_WARNING, "bit 0x%lx in register "
+				"F3x%x of the MISC_CONTROL device (%s) "
+				"should be enabled\n", K8_NBCFG_ECC_ENABLE,
+				K8_NBCFG, pci_name(pvt->misc_f3_ctl));
+		}
+		if (!ret) {
+			amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
+					"of node %d should be enabled\n",
+					K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
+					pvt->mc_node_id);
+		}
+		if (!ecc_enable_override) {
+			amd64_printk(KERN_WARNING, "WARNING: ECC is NOT "
+				"currently enabled by the BIOS. Module "
+				"will NOT be loaded.\n"
+				"    Either Enable ECC in the BIOS, "
+				"or use the 'ecc_enable_override' "
+				"parameter.\n"
+				"    Might be a BIOS bug, if BIOS says "
+				"ECC is enabled\n"
+				"    Use of the override can cause "
+				"unknown side effects.\n");
+			ret = -ENODEV;
+		}
+	} else {
+		amd64_printk(KERN_INFO,
+			"ECC is enabled by BIOS, Proceeding "
+			"with EDAC module initialization\n");
+
+		/* CLEAR the override, since BIOS controlled it */
+		ecc_enable_override = 0;
+	}
+
+	return ret;
+}
+
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 6f5d5d6..e7aa760 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -70,6 +70,7 @@
 #include <linux/slab.h>
 #include <linux/mmzone.h>
 #include <linux/edac.h>
+#include <asm/msr.h>
 #include "edac_core.h"
 
 #define amd64_printk(level, fmt, arg...) \
@@ -549,7 +550,7 @@ struct amd64_pvt {
 	/* Save old hw registers' values before we modified them */
 	u32 nbctl_mcgctl_saved;		/* When true, following 2 are valid */
 	u32 old_nbctl;
-	u32 *old_mcgctl;		/* per core on this node */
+	unsigned long old_mcgctl;	/* per core on this node */
 
 	/* MC Type Index value: socket F vs Family 10h */
 	u32 mc_type_index;
-- 
cgit v0.10.2


From 7d6034d3213e2dd1c0f8678e11064007413011c4 Mon Sep 17 00:00:00 2001
From: Doug Thompson <dougthompson@xmission.com>
Date: Mon, 27 Apr 2009 20:01:01 +0200
Subject: amd64_edac: add module registration routines

Also, link into Kbuild by adding Kconfig and Makefile entries.

Borislav:
- Kconfig/Makefile splitting
- use zero-sized arrays for the sysfs attrs if not enabled
- rename sysfs attrs to more conform values
- shorten CONFIG_ names
- make multiple structure members assignment vertically aligned
- fix/cleanup comments
- fix function return value patterns
- fix err labels
- fix a memleak bug caught by Ingo
- remove the NUMA dependency and use num_k8_northbrides for initializing
  a driver instance per NB.
- do not copy the pvt contents into the mci struct in
  amd64_init_2nd_stage() and save it in the mci->pvt_info void ptr
  instead.
- cleanup debug calls
- simplify amd64_setup_pci_device()

Reviewed-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 956982f..3dc650a 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -58,6 +58,32 @@ config EDAC_MM_EDAC
 	  occurred so that a particular failing memory module can be
 	  replaced.  If unsure, select 'Y'.
 
+config EDAC_AMD64
+	tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
+	depends on EDAC_MM_EDAC && X86 && PCI
+	default m
+	help
+	Support for error detection and correction on the AMD 64
+	Families of Memory Controllers (K8, F10h and F11h)
+
+config EDAC_AMD64_ERROR_INJECTION
+	bool "Sysfs Error Injection facilities"
+	depends on EDAC_AMD64
+	help
+	  Recent Opterons (Family 10h and later) provide for Memory Error
+	  Injection into the ECC detection circuits. The amd64_edac module
+	  allows the operator/user to inject Uncorrectable and Correctable
+	  errors into DRAM.
+
+	  When enabled, in each of the respective memory controller directories
+	  (/sys/devices/system/edac/mc/mcX), there are 3 input files:
+
+	  - inject_section (0..3, 16-byte section of 64-byte cacheline),
+	  - inject_word (0..8, 16-bit word of 16-byte section),
+	  - inject_ecc_vector (hex ecc vector: select bits of inject word)
+
+	  In addition, there are two control files, inject_read and inject_write,
+	  which trigger the DRAM ECC Read and Write respectively.
 
 config EDAC_AMD76X
 	tristate "AMD 76x (760, 762, 768)"
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 5907681..633dc56 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -30,6 +30,13 @@ obj-$(CONFIG_EDAC_I3000)		+= i3000_edac.o
 obj-$(CONFIG_EDAC_X38)			+= x38_edac.o
 obj-$(CONFIG_EDAC_I82860)		+= i82860_edac.o
 obj-$(CONFIG_EDAC_R82600)		+= r82600_edac.o
+
+amd64_edac_mod-y :=  amd64_edac_err_types.o amd64_edac.o
+amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
+amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
+
+obj-$(CONFIG_EDAC_AMD64)		+= amd64_edac_mod.o
+
 obj-$(CONFIG_EDAC_PASEMI)		+= pasemi_edac.o
 obj-$(CONFIG_EDAC_MPC85XX)		+= mpc85xx_edac.o
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 3b6c421..c36bf40 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,4 +1,5 @@
 #include "amd64_edac.h"
+#include <asm/k8.h>
 
 static struct edac_pci_ctl_info *amd64_ctl_pci;
 
@@ -2978,3 +2979,376 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
 	return ret;
 }
 
+struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
+					  ARRAY_SIZE(amd64_inj_attrs) +
+					  1];
+
+struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
+
+static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+{
+	unsigned int i = 0, j = 0;
+
+	for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
+		sysfs_attrs[i] = amd64_dbg_attrs[i];
+
+	for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
+		sysfs_attrs[i] = amd64_inj_attrs[j];
+
+	sysfs_attrs[i] = terminator;
+
+	mci->mc_driver_sysfs_attributes = sysfs_attrs;
+}
+
+static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+
+	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
+	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
+	mci->edac_cap		= EDAC_FLAG_NONE;
+
+	if (pvt->nbcap & K8_NBCAP_SECDED)
+		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
+
+	if (pvt->nbcap & K8_NBCAP_CHIPKILL)
+		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
+
+	mci->edac_cap		= amd64_determine_edac_cap(pvt);
+	mci->mod_name		= EDAC_MOD_STR;
+	mci->mod_ver		= EDAC_AMD64_VERSION;
+	mci->ctl_name		= get_amd_family_name(pvt->mc_type_index);
+	mci->dev_name		= pci_name(pvt->dram_f2_ctl);
+	mci->ctl_page_to_phys	= NULL;
+
+	/* IMPORTANT: Set the polling 'check' function in this module */
+	mci->edac_check		= amd64_check;
+
+	/* memory scrubber interface */
+	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
+	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
+}
+
+/*
+ * Init stuff for this DRAM Controller device.
+ *
+ * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
+ * Space feature MUST be enabled on ALL Processors prior to actually reading
+ * from the ECS registers. Since the loading of the module can occur on any
+ * 'core', and cores don't 'see' all the other processors ECS data when the
+ * others are NOT enabled. Our solution is to first enable ECS access in this
+ * routine on all processors, gather some data in a amd64_pvt structure and
+ * later come back in a finish-setup function to perform that final
+ * initialization. See also amd64_init_2nd_stage() for that.
+ */
+static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
+				    int mc_type_index)
+{
+	struct amd64_pvt *pvt = NULL;
+	int err = 0, ret;
+
+	ret = -ENOMEM;
+	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+	if (!pvt)
+		goto err_exit;
+
+	pvt->mc_node_id = get_mc_node_id_from_pdev(dram_f2_ctl);
+
+	pvt->dram_f2_ctl	= dram_f2_ctl;
+	pvt->ext_model		= boot_cpu_data.x86_model >> 4;
+	pvt->mc_type_index	= mc_type_index;
+	pvt->ops		= family_ops(mc_type_index);
+	pvt->old_mcgctl		= 0;
+
+	/*
+	 * We have the dram_f2_ctl device as an argument, now go reserve its
+	 * sibling devices from the PCI system.
+	 */
+	ret = -ENODEV;
+	err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
+	if (err)
+		goto err_free;
+
+	ret = -EINVAL;
+	err = amd64_check_ecc_enabled(pvt);
+	if (err)
+		goto err_put;
+
+	/*
+	 * Key operation here: setup of HW prior to performing ops on it. Some
+	 * setup is required to access ECS data. After this is performed, the
+	 * 'teardown' function must be called upon error and normal exit paths.
+	 */
+	if (boot_cpu_data.x86 >= 0x10)
+		amd64_setup(pvt);
+
+	/*
+	 * Save the pointer to the private data for use in 2nd initialization
+	 * stage
+	 */
+	pvt_lookup[pvt->mc_node_id] = pvt;
+
+	return 0;
+
+err_put:
+	amd64_free_mc_sibling_devices(pvt);
+
+err_free:
+	kfree(pvt);
+
+err_exit:
+	return ret;
+}
+
+/*
+ * This is the finishing stage of the init code. Needs to be performed after all
+ * MCs' hardware have been prepped for accessing extended config space.
+ */
+static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
+{
+	int node_id = pvt->mc_node_id;
+	struct mem_ctl_info *mci;
+	int ret, err = 0;
+
+	amd64_read_mc_registers(pvt);
+
+	ret = -ENODEV;
+	if (pvt->ops->probe_valid_hardware) {
+		err = pvt->ops->probe_valid_hardware(pvt);
+		if (err)
+			goto err_exit;
+	}
+
+	/*
+	 * We need to determine how many memory channels there are. Then use
+	 * that information for calculating the size of the dynamic instance
+	 * tables in the 'mci' structure
+	 */
+	pvt->channel_count = pvt->ops->early_channel_count(pvt);
+	if (pvt->channel_count < 0)
+		goto err_exit;
+
+	ret = -ENOMEM;
+	mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id);
+	if (!mci)
+		goto err_exit;
+
+	mci->pvt_info = pvt;
+
+	mci->dev = &pvt->dram_f2_ctl->dev;
+	amd64_setup_mci_misc_attributes(mci);
+
+	if (amd64_init_csrows(mci))
+		mci->edac_cap = EDAC_FLAG_NONE;
+
+	amd64_enable_ecc_error_reporting(mci);
+	amd64_set_mc_sysfs_attributes(mci);
+
+	ret = -ENODEV;
+	if (edac_mc_add_mc(mci)) {
+		debugf1("failed edac_mc_add_mc()\n");
+		goto err_add_mc;
+	}
+
+	mci_lookup[node_id] = mci;
+	pvt_lookup[node_id] = NULL;
+	return 0;
+
+err_add_mc:
+	edac_mc_free(mci);
+
+err_exit:
+	debugf0("failure to init 2nd stage: ret=%d\n", ret);
+
+	amd64_restore_ecc_error_reporting(pvt);
+
+	if (boot_cpu_data.x86 > 0xf)
+		amd64_teardown(pvt);
+
+	amd64_free_mc_sibling_devices(pvt);
+
+	kfree(pvt_lookup[pvt->mc_node_id]);
+	pvt_lookup[node_id] = NULL;
+
+	return ret;
+}
+
+
+static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
+				 const struct pci_device_id *mc_type)
+{
+	int ret = 0;
+
+	debugf0("(MC node=%d,mc_type='%s')\n",
+		get_mc_node_id_from_pdev(pdev),
+		get_amd_family_name(mc_type->driver_data));
+
+	ret = pci_enable_device(pdev);
+	if (ret < 0)
+		ret = -EIO;
+	else
+		ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
+
+	if (ret < 0)
+		debugf0("ret=%d\n", ret);
+
+	return ret;
+}
+
+static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+
+	/* Remove from EDAC CORE tracking list */
+	mci = edac_mc_del_mc(&pdev->dev);
+	if (!mci)
+		return;
+
+	pvt = mci->pvt_info;
+
+	amd64_restore_ecc_error_reporting(pvt);
+
+	if (boot_cpu_data.x86 > 0xf)
+		amd64_teardown(pvt);
+
+	amd64_free_mc_sibling_devices(pvt);
+
+	kfree(pvt);
+	mci->pvt_info = NULL;
+
+	mci_lookup[pvt->mc_node_id] = NULL;
+
+	/* Free the EDAC CORE resources */
+	edac_mc_free(mci);
+}
+
+/*
+ * This table is part of the interface for loading drivers for PCI devices. The
+ * PCI core identifies what devices are on a system during boot, and then
+ * inquiry this table to see if this driver is for a given device found.
+ */
+static const struct pci_device_id amd64_pci_table[] __devinitdata = {
+	{
+		.vendor		= PCI_VENDOR_ID_AMD,
+		.device		= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= K8_CPUS
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_AMD,
+		.device		= PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= F10_CPUS
+	},
+	{
+		.vendor		= PCI_VENDOR_ID_AMD,
+		.device		= PCI_DEVICE_ID_AMD_11H_NB_DRAM,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= 0,
+		.class_mask	= 0,
+		.driver_data	= F11_CPUS
+	},
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, amd64_pci_table);
+
+static struct pci_driver amd64_pci_driver = {
+	.name		= EDAC_MOD_STR,
+	.probe		= amd64_init_one_instance,
+	.remove		= __devexit_p(amd64_remove_one_instance),
+	.id_table	= amd64_pci_table,
+};
+
+static void amd64_setup_pci_device(void)
+{
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+
+	if (amd64_ctl_pci)
+		return;
+
+	mci = mci_lookup[0];
+	if (mci) {
+
+		pvt = mci->pvt_info;
+		amd64_ctl_pci =
+			edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
+						    EDAC_MOD_STR);
+
+		if (!amd64_ctl_pci) {
+			pr_warning("%s(): Unable to create PCI control\n",
+				   __func__);
+
+			pr_warning("%s(): PCI error report via EDAC not set\n",
+				   __func__);
+			}
+	}
+}
+
+static int __init amd64_edac_init(void)
+{
+	int nb, err = -ENODEV;
+
+	edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
+
+	opstate_init();
+
+	if (cache_k8_northbridges() < 0)
+		goto err_exit;
+
+	err = pci_register_driver(&amd64_pci_driver);
+	if (err)
+		return err;
+
+	/*
+	 * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
+	 * amd64_pvt structs. These will be used in the 2nd stage init function
+	 * to finish initialization of the MC instances.
+	 */
+	for (nb = 0; nb < num_k8_northbridges; nb++) {
+		if (!pvt_lookup[nb])
+			continue;
+
+		err = amd64_init_2nd_stage(pvt_lookup[nb]);
+		if (err)
+			goto err_exit;
+	}
+
+	amd64_setup_pci_device();
+
+	return 0;
+
+err_exit:
+	debugf0("'finish_setup' stage failed\n");
+	pci_unregister_driver(&amd64_pci_driver);
+
+	return err;
+}
+
+static void __exit amd64_edac_exit(void)
+{
+	if (amd64_ctl_pci)
+		edac_pci_release_generic_ctl(amd64_ctl_pci);
+
+	pci_unregister_driver(&amd64_pci_driver);
+}
+
+module_init(amd64_edac_init);
+module_exit(amd64_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
+		"Dave Peterson, Thayne Harbaugh");
+MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
+		EDAC_AMD64_VERSION);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index e7aa760..a159957 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -577,6 +577,21 @@ extern const char *ii_msgs[4];
 extern const char *ext_msgs[32];
 extern const char *htlink_msgs[8];
 
+#ifdef CONFIG_EDAC_DEBUG
+#define NUM_DBG_ATTRS 9
+#else
+#define NUM_DBG_ATTRS 0
+#endif
+
+#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
+#define NUM_INJ_ATTRS 5
+#else
+#define NUM_INJ_ATTRS 0
+#endif
+
+extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS],
+				     amd64_inj_attrs[NUM_INJ_ATTRS];
+
 /*
  * Each of the PCI Device IDs types have their own set of hardware accessor
  * functions and per device encoding/decoding logic.
-- 
cgit v0.10.2


From 3d373290450b10933dad7c387c42179f54417009 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 20 May 2009 20:18:46 +0200
Subject: amd64_edac: do not enable module by default

While at it, fix a link failure when !K8_NB.

Acked-by: Doug Thompson <dougthompson@xmission.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Tested-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 3dc650a..eff363d 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -60,11 +60,10 @@ config EDAC_MM_EDAC
 
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
-	depends on EDAC_MM_EDAC && X86 && PCI
-	default m
+	depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI
 	help
-	Support for error detection and correction on the AMD 64
-	Families of Memory Controllers (K8, F10h and F11h)
+	  Support for error detection and correction on the AMD 64
+	  Families of Memory Controllers (K8, F10h and F11h)
 
 config EDAC_AMD64_ERROR_INJECTION
 	bool "Sysfs Error Injection facilities"
-- 
cgit v0.10.2


From 9456ffffcf3c2f8fafeb9dee45f1502209602253 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 20 May 2009 20:26:53 +0200
Subject: EDAC: do not enable modules by default

Prevent EDAC compilation units from being built by default and let the
user explicitly select the needed modules.

Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Tested-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index eff363d..ab4f359 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -49,7 +49,6 @@ config EDAC_DEBUG_VERBOSE
 
 config EDAC_MM_EDAC
 	tristate "Main Memory EDAC (Error Detection And Correction) reporting"
-	default y
 	help
 	  Some systems are able to detect and correct errors in main
 	  memory.  EDAC can report statistics on memory error
-- 
cgit v0.10.2


From c476c23b45a41eb4e3ea63af786cc4d74762fe11 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 20 May 2009 20:31:58 +0200
Subject: amd64_edac: add MAINTAINERS entry

Acked-by: Doug Thompson <dougthompson@xmission.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abdd..6362ee8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1978,6 +1978,16 @@ F:	Documentation/edac.txt
 F:	drivers/edac/edac_*
 F:	include/linux/edac.h
 
+EDAC-AMD64
+P:	Doug Thompson
+M:	dougthompson@xmission.com
+P:	Borislav Petkov
+M:	borislav.petkov@amd.com
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+W:	bluesmoke.sourceforge.net
+S:	Supported
+F:	drivers/edac/amd64_edac*
+
 EDAC-E752X
 P:	Mark Gross
 M:	mark.gross@intel.com
-- 
cgit v0.10.2


From dc81081b2d9a6a9d64dad1bef1e5fc9fb660e53e Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Wed, 10 Jun 2009 17:06:12 +0800
Subject: perf_counter/x86: Fix the model number of Intel Core2 processors

Fix the model number of Intel Core2 processors according to the
documentation: Intel Processor Identification with the CPUID
Instruction: http://www.intel.com/support/processors/sb/cs-009861.htm

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Also-Reported-by: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090610090612.GA26580@ywang-moblin2.bj.intel.com>
[ Added two more model numbers suggested by Arnd Bergmann ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 40978aa..49f2585 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1407,7 +1407,10 @@ static int intel_pmu_init(void)
 	 * Install the hw-cache-events table:
 	 */
 	switch (boot_cpu_data.x86_model) {
-	case 17:
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
-- 
cgit v0.10.2


From 4da646b7b52552f3b43eae27ffa5aa2c200f6db6 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Wed, 8 Apr 2009 02:00:13 -0400
Subject: [libata] ahci: use less error-prone array initializers

Also, remove unneeded prototype.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 6b91c26..cd832cb 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -313,7 +313,6 @@ static void ahci_error_handler(struct ata_port *ap);
 static void ahci_post_internal_cmd(struct ata_queued_cmd *qc);
 static int ahci_port_resume(struct ata_port *ap);
 static void ahci_dev_config(struct ata_device *dev);
-static unsigned int ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl);
 static void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag,
 			       u32 opts);
 #ifdef CONFIG_PM
@@ -404,14 +403,14 @@ static struct ata_port_operations ahci_sb600_ops = {
 #define AHCI_HFLAGS(flags)	.private_data	= (void *)(flags)
 
 static const struct ata_port_info ahci_port_info[] = {
-	/* board_ahci */
+	[board_ahci] =
 	{
 		.flags		= AHCI_FLAG_COMMON,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_vt8251 */
+	[board_ahci_vt8251] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_PMP),
 		.flags		= AHCI_FLAG_COMMON,
@@ -419,7 +418,7 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_vt8251_ops,
 	},
-	/* board_ahci_ign_iferr */
+	[board_ahci_ign_iferr] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_IRQ_IF_ERR),
 		.flags		= AHCI_FLAG_COMMON,
@@ -427,7 +426,7 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_sb600 */
+	[board_ahci_sb600] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_SERR_INTERNAL |
 				 AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
@@ -437,7 +436,7 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_sb600_ops,
 	},
-	/* board_ahci_mv */
+	[board_ahci_mv] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI |
 				 AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP),
@@ -447,7 +446,7 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_sb700, for SB700 and SB800 */
+	[board_ahci_sb700] =	/* for SB700 and SB800 */
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_SERR_INTERNAL),
 		.flags		= AHCI_FLAG_COMMON,
@@ -455,7 +454,7 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_sb600_ops,
 	},
-	/* board_ahci_mcp65 */
+	[board_ahci_mcp65] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_YES_NCQ),
 		.flags		= AHCI_FLAG_COMMON,
@@ -463,7 +462,7 @@ static const struct ata_port_info ahci_port_info[] = {
 		.udma_mask	= ATA_UDMA6,
 		.port_ops	= &ahci_ops,
 	},
-	/* board_ahci_nopmp */
+	[board_ahci_nopmp] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_NO_PMP),
 		.flags		= AHCI_FLAG_COMMON,
-- 
cgit v0.10.2


From 2102d7497393e982bf38ffe8f5fd3d487104880d Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Sun, 15 Feb 2009 23:30:38 +0400
Subject: libata-sff: avoid byte swapping in ata_sff_data_xfer()

Handling of the trailing byte in ata_sff_data_xfer() is suboptimal bacause:

- it always initializes the padding buffer to 0 which is not really needed in
  both the read and write cases;

- it has to use memcpy() to transfer a single byte from/to the padding buffer;

- it uses io{read|write}16() accessors which swap bytes on the big endian CPUs
  and so have to additionally convert the data from/to the little endian format
  instead of using io{read|write}16_rep() accessors which are not supposed to
  change the byte ordering.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index bb18415..bbbb1fa 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -727,17 +727,23 @@ unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf,
 	else
 		iowrite16_rep(data_addr, buf, words);
 
-	/* Transfer trailing 1 byte, if any. */
+	/* Transfer trailing byte, if any. */
 	if (unlikely(buflen & 0x01)) {
-		__le16 align_buf[1] = { 0 };
-		unsigned char *trailing_buf = buf + buflen - 1;
+		unsigned char pad[2];
 
+		/* Point buf to the tail of buffer */
+		buf += buflen - 1;
+
+		/*
+		 * Use io*16_rep() accessors here as well to avoid pointlessly
+		 * swapping bytes to and fro on the big endian machines...
+		 */
 		if (rw == READ) {
-			align_buf[0] = cpu_to_le16(ioread16(data_addr));
-			memcpy(trailing_buf, align_buf, 1);
+			ioread16_rep(data_addr, pad, 1);
+			*buf = pad[0];
 		} else {
-			memcpy(align_buf, trailing_buf, 1);
-			iowrite16(le16_to_cpu(align_buf[0]), data_addr);
+			pad[0] = *buf;
+			iowrite16_rep(data_addr, pad, 1);
 		}
 		words++;
 	}
-- 
cgit v0.10.2


From f35b5e7c066a41f60683d2689e52a1336479913b Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Wed, 15 Apr 2009 00:00:54 +0400
Subject: sata_sx4: speed up ECC initialization

ECC initialization takes too long. It writes zeroes by portions
of 4 byte, it takes more than 6 minutes on my machine to initialize
512Mb ECC DIMM module. Change portion to 128Kb - it significantly
reduces initialization time.

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index eb05a3c..bbcf970 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -193,6 +193,7 @@ enum {
 					  PDC_TIMER_MASK_INT,
 };
 
+#define ECC_ERASE_BUF_SZ (128 * 1024)
 
 struct pdc_port_priv {
 	u8			dimm_buf[(ATA_PRD_SZ * ATA_MAX_PRD) + 512];
@@ -1280,7 +1281,6 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
 {
 	int speed, size, length;
 	u32 addr, spd0, pci_status;
-	u32 tmp = 0;
 	u32 time_period = 0;
 	u32 tcount = 0;
 	u32 ticks = 0;
@@ -1395,14 +1395,17 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
 	pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
 			  PDC_DIMM_SPD_TYPE, &spd0);
 	if (spd0 == 0x02) {
+		void *buf;
 		VPRINTK("Start ECC initialization\n");
 		addr = 0;
 		length = size * 1024 * 1024;
+		buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL);
 		while (addr < length) {
-			pdc20621_put_to_dimm(host, (void *) &tmp, addr,
-					     sizeof(u32));
-			addr += sizeof(u32);
+			pdc20621_put_to_dimm(host, buf, addr,
+					     ECC_ERASE_BUF_SZ);
+			addr += ECC_ERASE_BUF_SZ;
 		}
+		kfree(buf);
 		VPRINTK("Finish ECC initialization\n");
 	}
 	return 0;
-- 
cgit v0.10.2


From 31f80112cc7e7ea4c220d6f62b0a7052754befb3 Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancockrwd@gmail.com>
Date: Mon, 13 Apr 2009 22:57:28 -0600
Subject: sata_sil: enable 32-bit PIO

32-bit PIO seems to work fine on sata_sil hardware (tested on SiI3114) and is
listed as OK in the Silicon Image datasheets. Enable it.

Signed-off-by: Robert Hancock <hancockrwd@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index e67ce8e..030ec07 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -183,7 +183,7 @@ static struct scsi_host_template sil_sht = {
 };
 
 static struct ata_port_operations sil_ops = {
-	.inherits		= &ata_bmdma_port_ops,
+	.inherits		= &ata_bmdma32_port_ops,
 	.dev_config		= sil_dev_config,
 	.set_mode		= sil_set_mode,
 	.bmdma_setup            = sil_bmdma_setup,
-- 
cgit v0.10.2


From 437681800bdaa9feb58cf943dfbbd239c21d3705 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 20 May 2009 09:44:39 +0200
Subject: [libata] get rid of ATA_MAX_QUEUE loop in ata_qc_complete_multiple()
 v2

We very rarely (if ever) complete more than one command in the
sactive mask at the time, even for extremely high IO rates. So
looping over the entire range of possible tags is pointless,
instead use __ffs() to just find the completed tags directly.

Updated to clear the tag from the done_mask instead of shifting
done_mask down as suggested by From: Tejun Heo <htejun@gmail.com>
Verified with a user space tester to produce the same results.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c924230..ca4d208 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5031,7 +5031,6 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
 {
 	int nr_done = 0;
 	u32 done_mask;
-	int i;
 
 	done_mask = ap->qc_active ^ qc_active;
 
@@ -5041,16 +5040,16 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
 		return -EINVAL;
 	}
 
-	for (i = 0; i < ATA_MAX_QUEUE; i++) {
+	while (done_mask) {
 		struct ata_queued_cmd *qc;
+		unsigned int tag = __ffs(done_mask);
 
-		if (!(done_mask & (1 << i)))
-			continue;
-
-		if ((qc = ata_qc_from_tag(ap, i))) {
+		qc = ata_qc_from_tag(ap, tag);
+		if (qc) {
 			ata_qc_complete(qc);
 			nr_done++;
 		}
+		done_mask &= ~(1 << tag);
 	}
 
 	return nr_done;
-- 
cgit v0.10.2


From d50ce07d6fd8a422757a231f9d7293cbddeaec31 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 May 2009 10:57:41 +0900
Subject: ahci: misc cleanups for EM stuff

Make the following EM related cleanups.

* Use msleep(1) instead of udelay(100) and reduce retry count to 5.

* s/MAX_SLOTS/EM_MAX_SLOTS/, s/MAX_RETRY/EM_MAX_RETRY/

* Make EM constants enums as suggested by Jeff.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Milburn <dmilburn@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index cd832cb..2141a31 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -77,8 +77,6 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
 			      size_t size);
 static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
 					ssize_t size);
-#define MAX_SLOTS 8
-#define MAX_RETRY 15
 
 enum {
 	AHCI_PCI_BAR		= 5,
@@ -231,6 +229,10 @@ enum {
 
 	ICH_MAP				= 0x90, /* ICH MAP register */
 
+	/* em constants */
+	EM_MAX_SLOTS			= 8,
+	EM_MAX_RETRY			= 5,
+
 	/* em_ctl bits */
 	EM_CTL_RST			= (1 << 9), /* Reset */
 	EM_CTL_TM			= (1 << 8), /* Transmit Message */
@@ -282,8 +284,8 @@ struct ahci_port_priv {
 	unsigned int		ncq_saw_dmas:1;
 	unsigned int		ncq_saw_sdb:1;
 	u32 			intr_mask;	/* interrupts to enable */
-	struct ahci_em_priv	em_priv[MAX_SLOTS];/* enclosure management info
-					 	 * per PM slot */
+	/* enclosure management info per PM slot */
+	struct ahci_em_priv	em_priv[EM_MAX_SLOTS];
 };
 
 static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
@@ -1140,12 +1142,12 @@ static void ahci_start_port(struct ata_port *ap)
 			emp = &pp->em_priv[link->pmp];
 
 			/* EM Transmit bit maybe busy during init */
-			for (i = 0; i < MAX_RETRY; i++) {
+			for (i = 0; i < EM_MAX_RETRY; i++) {
 				rc = ahci_transmit_led_message(ap,
 							       emp->led_state,
 							       4);
 				if (rc == -EBUSY)
-					udelay(100);
+					msleep(1);
 				else
 					break;
 			}
@@ -1339,7 +1341,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
 
 	/* get the slot number from the message */
 	pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8;
-	if (pmp < MAX_SLOTS)
+	if (pmp < EM_MAX_SLOTS)
 		emp = &pp->em_priv[pmp];
 	else
 		return -EINVAL;
@@ -1407,7 +1409,7 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
 
 	/* get the slot number from the message */
 	pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8;
-	if (pmp < MAX_SLOTS)
+	if (pmp < EM_MAX_SLOTS)
 		emp = &pp->em_priv[pmp];
 	else
 		return -EINVAL;
-- 
cgit v0.10.2


From 347979a034539ab20f3bc0c30ac0ccd3c4fd4c2e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Wed, 6 May 2009 17:10:08 +0100
Subject: ata_piix: Turn on hotplugging support for older chips

We can't do this for the later ones as they have all sorts of magic boot
time stuff that needs reviewing and the like. However we can do it for the
older ones and it turns out we need to as some IBM docking stations have a
second PIIX series device in them and without this change you can't use it
very well

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 1aeb708..716e369 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1509,8 +1509,8 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
 		dev_printk(KERN_DEBUG, &pdev->dev,
 			   "version " DRV_VERSION "\n");
 
-	/* no hotplugging support (FIXME) */
-	if (!in_module_init)
+	/* no hotplugging support for later devices (FIXME) */
+	if (!in_module_init && ent->driver_data >= ich5_sata)
 		return -ENODEV;
 
 	if (piix_broken_system_poweroff(pdev)) {
-- 
cgit v0.10.2


From 7654db1a9256d746ae4d229ba675f616a5d5e1a1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Wed, 6 May 2009 17:10:17 +0100
Subject: ata_piix: Remove stale comment

Combined mode pci quirk hacks went away - so the table to keep in sync
no longer exists.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 716e369..6e165bf 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -223,10 +223,8 @@ static const struct pci_device_id piix_pci_tbl[] = {
 	/* ICH8 Mobile PATA Controller */
 	{ 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 },
 
-	/* NOTE: The following PCI ids must be kept in sync with the
-	 * list in drivers/pci/quirks.c.
-	 */
-
+	/* SATA ports */
+	
 	/* 82801EB (ICH5) */
 	{ 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
 	/* 82801EB (ICH5) */
-- 
cgit v0.10.2


From ac04527f7947020c5890090b2ac87af4e98d977e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 8 Jun 2009 15:52:39 +0300
Subject: KVM: Disable large pages on misaligned memory slots

If a slots guest physical address and host virtual address unequal (mod
large page size), then we would erronously try to back guest large pages
with host large pages.  Detect this misalignment and diable large page
support for the trouble slot.

Cc: stable@kernel.org
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5fed9bf..5f865ed 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1086,7 +1086,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 {
 	int r;
 	gfn_t base_gfn;
-	unsigned long npages;
+	unsigned long npages, ugfn;
 	int largepages;
 	unsigned long i;
 	struct kvm_memory_slot *memslot;
@@ -1177,6 +1177,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			new.lpage_info[0].write_count = 1;
 		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
 			new.lpage_info[largepages-1].write_count = 1;
+		ugfn = new.userspace_addr >> PAGE_SHIFT;
+		/*
+		 * If the gfn and userspace address are not aligned wrt each
+		 * other, disable large page support for this slot
+		 */
+		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
+			for (i = 0; i < largepages; ++i)
+				new.lpage_info[i].write_count = 1;
 	}
 
 	/* Allocate page dirty bitmap if needed */
-- 
cgit v0.10.2


From 09f8ca74ae6c2d78b2c7f6c0751ed0cbe815a3d9 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 8 Jun 2009 15:55:21 +0300
Subject: KVM: Prevent overflow in largepages calculation

If userspace specifies a memory slot that is larger than 8 petabytes, it
could overflow the largepages variable.

Cc: stable@kernel.org
Signed-off-by: Avi Kivity <avi@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5f865ed..e211945 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1087,8 +1087,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	int r;
 	gfn_t base_gfn;
 	unsigned long npages, ugfn;
-	int largepages;
-	unsigned long i;
+	unsigned long largepages, i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
 
-- 
cgit v0.10.2


From 5f4417a156a6e44359effa9492de3ed5638a9b13 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Wed, 10 Jun 2009 14:37:20 +0200
Subject: ide: fix PowerMac bootup oops

PowerMac bootup with CONFIG_IDE=y oopses in ide_pio_cycle_time():
because "ide: try to use PIO Mode 0 during probe if possible" causes
pmac_ide_set_pio_mode() to be called before drive->id has been set.

Bart points out other places which now need drive->id set earlier,
so follow his advice to allocate it in ide_port_alloc_devices()
(using kzalloc_node, without error message, as when allocating drive)
and memset it for reuse in ide_port_init_devices_data().

Fixed in passing: ide_host_alloc() was missing ide_port_free_devices()
from an error path.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Joao Ramos <joao.ramos@inov.pt>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f9c2fb7..f371b0d 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -465,23 +465,8 @@ static u8 probe_for_drive(ide_drive_t *drive)
 	int rc;
 	u8 cmd;
 
-	/*
-	 *	In order to keep things simple we have an id
-	 *	block for all drives at all times. If the device
-	 *	is pre ATA or refuses ATA/ATAPI identify we
-	 *	will add faked data to this.
-	 *
-	 *	Also note that 0 everywhere means "can't do X"
-	 */
- 
 	drive->dev_flags &= ~IDE_DFLAG_ID_READ;
 
-	drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL);
-	if (drive->id == NULL) {
-		printk(KERN_ERR "ide: out of memory for id data.\n");
-		return 0;
-	}
-
 	m = (char *)&drive->id[ATA_ID_PROD];
 	strcpy(m, "UNKNOWN");
 
@@ -497,7 +482,7 @@ static u8 probe_for_drive(ide_drive_t *drive)
 		}
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-			goto out_free;
+			return 0;
 
 		/* identification failed? */
 		if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -521,7 +506,7 @@ static u8 probe_for_drive(ide_drive_t *drive)
 	}
 
 	if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-		goto out_free;
+		return 0;
 
 	/* The drive wasn't being helpful. Add generic info only */
 	if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -535,9 +520,6 @@ static u8 probe_for_drive(ide_drive_t *drive)
 	}
 
 	return 1;
-out_free:
-	kfree(drive->id);
-	return 0;
 }
 
 static void hwif_release_dev(struct device *dev)
@@ -825,8 +807,6 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
 		if (ide_init_queue(drive)) {
 			printk(KERN_ERR "ide: failed to init %s\n",
 					drive->name);
-			kfree(drive->id);
-			drive->id = NULL;
 			drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 			continue;
 		}
@@ -955,9 +935,6 @@ static void drive_release_dev (struct device *dev)
 	blk_cleanup_queue(drive->queue);
 	drive->queue = NULL;
 
-	kfree(drive->id);
-	drive->id = NULL;
-
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 
 	complete(&drive->gendev_rel_comp);
@@ -1140,8 +1117,11 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 
 	ide_port_for_each_dev(i, drive, hwif) {
 		u8 j = (hwif->index * MAX_DRIVES) + i;
+		u16 *saved_id = drive->id;
 
 		memset(drive, 0, sizeof(*drive));
+		memset(saved_id, 0, SECTOR_SIZE);
+		drive->id = saved_id;
 
 		drive->media			= ide_disk;
 		drive->select			= (i << 4) | ATA_DEVICE_OBS;
@@ -1248,8 +1228,10 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
 	ide_drive_t *drive;
 	int i;
 
-	ide_port_for_each_dev(i, drive, hwif)
+	ide_port_for_each_dev(i, drive, hwif) {
+		kfree(drive->id);
 		kfree(drive);
+	}
 }
 
 static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
@@ -1263,6 +1245,18 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
 		if (drive == NULL)
 			goto out_nomem;
 
+		/*
+		 * In order to keep things simple we have an id
+		 * block for all drives at all times. If the device
+		 * is pre ATA or refuses ATA/ATAPI identify we
+		 * will add faked data to this.
+		 *
+		 * Also note that 0 everywhere means "can't do X"
+		 */
+		drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
+		if (drive->id == NULL)
+			goto out_nomem;
+
 		hwif->devices[i] = drive;
 	}
 	return 0;
@@ -1304,6 +1298,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d,
 		if (idx < 0) {
 			printk(KERN_ERR "%s: no free slot for interface\n",
 					d ? d->name : "ide");
+			ide_port_free_devices(hwif);
 			kfree(hwif);
 			continue;
 		}
-- 
cgit v0.10.2


From 5df3bc2d35bd5cd08053f71679b27577b42676d6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Wed, 10 Jun 2009 14:37:21 +0200
Subject: ide: unexport ide_find_dma_mode()

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index d9123ec..0bbf71f 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -347,7 +347,6 @@ u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode)
 
 	return mode;
 }
-EXPORT_SYMBOL_GPL(ide_find_dma_mode);
 
 static int ide_tune_dma(ide_drive_t *drive)
 {
-- 
cgit v0.10.2


From ad7c52d0988a8965989dc06d630c52a5bde849d5 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Wed, 10 Jun 2009 14:37:21 +0200
Subject: ide: re-implement ide_pci_init_one() on top of ide_pci_init_two()

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>

diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 5314edf..ab3db61 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -1,7 +1,7 @@
 /*
  *  Copyright (C) 1998-2000  Andre Hedrick <andre@linux-ide.org>
  *  Copyright (C) 1995-1998  Mark Lord
- *  Copyright (C)      2007  Bartlomiej Zolnierkiewicz
+ *  Copyright (C) 2007-2009  Bartlomiej Zolnierkiewicz
  *
  *  May be copied or modified under the terms of the GNU General Public License
  */
@@ -534,61 +534,15 @@ out:
 	return ret;
 }
 
-int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
-		     void *priv)
-{
-	struct ide_host *host;
-	struct ide_hw hw[2], *hws[] = { NULL, NULL };
-	int ret;
-
-	ret = ide_setup_pci_controller(dev, d, 1);
-	if (ret < 0)
-		goto out;
-
-	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
-
-	host = ide_host_alloc(d, hws, 2);
-	if (host == NULL) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	host->dev[0] = &dev->dev;
-
-	host->host_priv = priv;
-
-	host->irq_flags = IRQF_SHARED;
-
-	pci_set_drvdata(dev, host);
-
-	ret = do_ide_setup_pci_device(dev, d, 1);
-	if (ret < 0)
-		goto out;
-
-	/* fixup IRQ */
-	if (ide_pci_is_in_compatibility_mode(dev)) {
-		hw[0].irq = pci_get_legacy_ide_irq(dev, 0);
-		hw[1].irq = pci_get_legacy_ide_irq(dev, 1);
-	} else
-		hw[1].irq = hw[0].irq = ret;
-
-	ret = ide_host_register(host, d, hws);
-	if (ret)
-		ide_host_free(host);
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ide_pci_init_one);
-
 int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		     const struct ide_port_info *d, void *priv)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
 	struct ide_host *host;
-	int ret, i;
+	int ret, i, n_ports = dev2 ? 4 : 2;
 	struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < n_ports / 2; i++) {
 		ret = ide_setup_pci_controller(pdev[i], d, !i);
 		if (ret < 0)
 			goto out;
@@ -596,23 +550,24 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
 	}
 
-	host = ide_host_alloc(d, hws, 4);
+	host = ide_host_alloc(d, hws, n_ports);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
 	host->dev[0] = &dev1->dev;
-	host->dev[1] = &dev2->dev;
+	if (dev2)
+		host->dev[1] = &dev2->dev;
 
 	host->host_priv = priv;
-
 	host->irq_flags = IRQF_SHARED;
 
 	pci_set_drvdata(pdev[0], host);
-	pci_set_drvdata(pdev[1], host);
+	if (dev2)
+		pci_set_drvdata(pdev[1], host);
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < n_ports / 2; i++) {
 		ret = do_ide_setup_pci_device(pdev[i], d, !i);
 
 		/*
@@ -638,6 +593,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(ide_pci_init_two);
 
+int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
+		     void *priv)
+{
+	return ide_pci_init_two(dev, NULL, d, priv);
+}
+EXPORT_SYMBOL_GPL(ide_pci_init_one);
+
 void ide_pci_remove(struct pci_dev *dev)
 {
 	struct ide_host *host = pci_get_drvdata(dev);
-- 
cgit v0.10.2


From 236e6723bedb483b2ebf73ada01e2c853c5cac01 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 10 Jun 2009 13:55:34 +0100
Subject: ASoC: Fix lm4857 control

Commit 4eaa9819dc08d7bfd1065ce530e31b18a119dcaf changed semantics of
private_value member of kcontrol. This resulted in inability to control
amplifier and subsequently in very low output volume.

Tested-by: Johannes Schauer <josch@pyneo.org>
Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>

diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c
index 289fadf..906709e 100644
--- a/sound/soc/s3c24xx/neo1973_wm8753.c
+++ b/sound/soc/s3c24xx/neo1973_wm8753.c
@@ -345,9 +345,11 @@ static void lm4857_write_regs(void)
 static int lm4857_get_reg(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	int reg = kcontrol->private_value & 0xFF;
-	int shift = (kcontrol->private_value >> 8) & 0x0F;
-	int mask = (kcontrol->private_value >> 16) & 0xFF;
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	int reg = mc->reg;
+	int shift = mc->shift;
+	int mask = mc->max;
 
 	pr_debug("Entered %s\n", __func__);
 
@@ -358,9 +360,11 @@ static int lm4857_get_reg(struct snd_kcontrol *kcontrol,
 static int lm4857_set_reg(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	int reg = kcontrol->private_value & 0xFF;
-	int shift = (kcontrol->private_value >> 8) & 0x0F;
-	int mask = (kcontrol->private_value >> 16) & 0xFF;
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	int reg = mc->reg;
+	int shift = mc->shift;
+	int mask = mc->max;
 
 	if (((lm4857_regs[reg] >> shift) & mask) ==
 		ucontrol->value.integer.value[0])
-- 
cgit v0.10.2


From feb6118d4e76bc5685909426c2dcc90fdb7ba05b Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:00:39 +0300
Subject: [SCSI] libosd: OSD2r05: Additional command enums

Add all constant definitions of new OSD commands added in
revision 4 & 5. Mainly for creating snapshots and clones.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index 62b2ab8..2cc8e8b 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -303,7 +303,15 @@ enum osd_service_actions {
 	OSD_ACT_V2(REMOVE_MEMBER_OBJECTS,	0x21)
 	OSD_ACT_V2(GET_MEMBER_ATTRIBUTES,	0x22)
 	OSD_ACT_V2(SET_MEMBER_ATTRIBUTES,	0x23)
+
+	OSD_ACT_V2(CREATE_CLONE,		0x28)
+	OSD_ACT_V2(CREATE_SNAPSHOT,		0x29)
+	OSD_ACT_V2(DETACH_CLONE,		0x2A)
+	OSD_ACT_V2(REFRESH_SNAPSHOT_CLONE,	0x2B)
+	OSD_ACT_V2(RESTORE_PARTITION_FROM_SNAPSHOT, 0x2C)
+
 	OSD_ACT_V2(READ_MAP,			0x31)
+	OSD_ACT_V2(READ_MAPS_COMPARE,		0x32)
 
 	OSD_ACT_V1_V2(PERFORM_SCSI_COMMAND,	0x8F7E, 0x8F7C)
 	OSD_ACT_V1_V2(SCSI_TASK_MANAGEMENT,	0x8F7F, 0x8F7D)
-- 
cgit v0.10.2


From 29191b92030bc97b05b539ce5ae0543c24a6d7ca Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:01:03 +0300
Subject: [SCSI] libosd: OSD2r05: Attribute definitions

Some New revision 5 Attribute definitions.
Some missing definitions of Attributes and pages

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h
index f888a6f..56e920a 100644
--- a/include/scsi/osd_attributes.h
+++ b/include/scsi/osd_attributes.h
@@ -29,6 +29,7 @@ enum {
 	OSD_APAGE_PARTITION_INFORMATION = OSD_APAGE_PARTITION_FIRST + 1,
 	OSD_APAGE_PARTITION_QUOTAS	= OSD_APAGE_PARTITION_FIRST + 2,
 	OSD_APAGE_PARTITION_TIMESTAMP	= OSD_APAGE_PARTITION_FIRST + 3,
+	OSD_APAGE_PARTITION_ATTR_ACCESS = OSD_APAGE_PARTITION_FIRST + 4,
 	OSD_APAGE_PARTITION_SECURITY	= OSD_APAGE_PARTITION_FIRST + 5,
 	OSD_APAGE_PARTITION_LAST	= 0x5FFFFFFF,
 
@@ -51,7 +52,9 @@ enum {
 	OSD_APAGE_RESERVED_TYPE_LAST	= 0xEFFFFFFF,
 
 	OSD_APAGE_COMMON_FIRST		= 0xF0000000,
-	OSD_APAGE_COMMON_LAST		= 0xFFFFFFFE,
+	OSD_APAGE_COMMON_LAST		= 0xFFFFFFFD,
+
+	OSD_APAGE_CURRENT_COMMAND	= 0xFFFFFFFE,
 
 	OSD_APAGE_REQUEST_ALL		= 0xFFFFFFFF,
 };
@@ -106,10 +109,30 @@ enum {
 	OSD_ATTR_RI_PRODUCT_REVISION_LEVEL   = 0x7,   /* 4        */
 	OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER    = 0x8,   /* variable */
 	OSD_ATTR_RI_OSD_NAME                 = 0x9,   /* variable */
+	OSD_ATTR_RI_MAX_CDB_CONTINUATION_LEN = 0xA,   /* 4        */
 	OSD_ATTR_RI_TOTAL_CAPACITY           = 0x80,  /* 8        */
 	OSD_ATTR_RI_USED_CAPACITY            = 0x81,  /* 8        */
 	OSD_ATTR_RI_NUMBER_OF_PARTITIONS     = 0xC0,  /* 8        */
 	OSD_ATTR_RI_CLOCK                    = 0x100, /* 6        */
+	OARI_DEFAULT_ISOLATION_METHOD        = 0X110, /* 1        */
+	OARI_SUPPORTED_ISOLATION_METHODS     = 0X111, /* 32       */
+
+	OARI_DATA_ATOMICITY_GUARANTEE                   = 0X120,   /* 8       */
+	OARI_DATA_ATOMICITY_ALIGNMENT                   = 0X121,   /* 8       */
+	OARI_ATTRIBUTES_ATOMICITY_GUARANTEE             = 0X122,   /* 8       */
+	OARI_DATA_ATTRIBUTES_ATOMICITY_MULTIPLIER       = 0X123,   /* 1       */
+
+	OARI_MAXIMUM_SNAPSHOTS_COUNT                    = 0X1C1,    /* 0 or 4 */
+	OARI_MAXIMUM_CLONES_COUNT                       = 0X1C2,    /* 0 or 4 */
+	OARI_MAXIMUM_BRANCH_DEPTH                       = 0X1CC,    /* 0 or 4 */
+	OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_FIRST  = 0X200,    /* 0 or 4 */
+	OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_LAST   = 0X2ff,    /* 0 or 4 */
+	OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_FIRST = 0X300,    /* 0 or 4 */
+	OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_LAST  = 0X30F,    /* 0 or 4 */
+	OARI_SUPPORT_FOR_DUPLICATED_OBJECT_FREEZING     = 0X310,    /* 0 or 4 */
+	OARI_SUPPORT_FOR_SNAPSHOT_REFRESHING            = 0X311,    /* 0 or 1 */
+	OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_FIRST = 0X7000001,/* 0 or 4 */
+	OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_LAST  = 0X700FFFF,/* 0 or 4 */
 };
 /* Root_Information_attributes_page does not have a get_page structure */
 
@@ -120,7 +143,15 @@ enum {
 	OSD_ATTR_PI_PARTITION_ID            = 0x1,     /* 8        */
 	OSD_ATTR_PI_USERNAME                = 0x9,     /* variable */
 	OSD_ATTR_PI_USED_CAPACITY           = 0x81,    /* 8        */
+	OSD_ATTR_PI_USED_CAPACITY_INCREMENT = 0x84,    /* 0 or 8   */
 	OSD_ATTR_PI_NUMBER_OF_OBJECTS       = 0xC1,    /* 8        */
+
+	OSD_ATTR_PI_ACTUAL_DATA_SPACE                      = 0xD1, /* 0 or 8 */
+	OSD_ATTR_PI_RESERVED_DATA_SPACE                    = 0xD2, /* 0 or 8 */
+	OSD_ATTR_PI_DEFAULT_SNAPSHOT_DUPLICATION_METHOD    = 0x200,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_CLONE_DUPLICATION_METHOD       = 0x201,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_SP_TIME_OF_DUPLICATION         = 0x300,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_CLONE_TIME_OF_DUPLICATION      = 0x301,/* 0 or 4 */
 };
 /* Partition Information attributes page does not have a get_page structure */
 
@@ -131,6 +162,7 @@ enum {
 	OSD_ATTR_CI_PARTITION_ID           = 0x1,       /* 8        */
 	OSD_ATTR_CI_COLLECTION_OBJECT_ID   = 0x2,       /* 8        */
 	OSD_ATTR_CI_USERNAME               = 0x9,       /* variable */
+	OSD_ATTR_CI_COLLECTION_TYPE        = 0xA,       /* 1        */
 	OSD_ATTR_CI_USED_CAPACITY          = 0x81,      /* 8        */
 };
 /* Collection Information attributes page does not have a get_page structure */
@@ -144,6 +176,8 @@ enum {
 	OSD_ATTR_OI_USERNAME             = 0x9,       /* variable */
 	OSD_ATTR_OI_USED_CAPACITY        = 0x81,      /* 8        */
 	OSD_ATTR_OI_LOGICAL_LENGTH       = 0x82,      /* 8        */
+	SD_ATTR_OI_ACTUAL_DATA_SPACE     = 0XD1,      /* 0 OR 8   */
+	SD_ATTR_OI_RESERVED_DATA_SPACE   = 0XD2,      /* 0 OR 8   */
 };
 /* Object Information attributes page does not have a get_page structure */
 
@@ -248,7 +282,18 @@ struct object_timestamps_attributes_page {
 	struct osd_timestamp data_modified_time;
 }  __packed;
 
-/* 7.1.2.19 Collections attributes page */
+/* OSD2r05: 7.1.3.19 Attributes Access attributes page
+ * (OSD_APAGE_PARTITION_ATTR_ACCESS)
+ *
+ * each attribute is of the form below. Total array length is deduced
+ * from the attribute's length
+ * (See allowed_attributes_access of the struct osd_cap_object_descriptor)
+ */
+struct attributes_access_attr {
+	struct osd_attributes_list_attrid attr_list[0];
+} __packed;
+
+/* OSD2r05: 7.1.2.21 Collections attributes page */
 /* TBD */
 
 /* 7.1.2.20 Root Policy/Security attributes page (OSD_APAGE_ROOT_SECURITY) */
@@ -324,4 +369,29 @@ struct object_security_attributes_page {
 	__be32 policy_access_tag;
 }  __packed;
 
+/* OSD2r05: 7.1.3.31 Current Command attributes page
+ * (OSD_APAGE_CURRENT_COMMAND)
+ */
+enum {
+	OSD_ATTR_CC_RESPONSE_INTEGRITY_CHECK_VALUE     = 0x1, /* 32  */
+	OSD_ATTR_CC_OBJECT_TYPE                        = 0x2, /* 1   */
+	OSD_ATTR_CC_PARTITION_ID                       = 0x3, /* 8   */
+	OSD_ATTR_CC_OBJECT_ID                          = 0x4, /* 8   */
+	OSD_ATTR_CC_STARTING_BYTE_ADDRESS_OF_APPEND    = 0x5, /* 8   */
+	OSD_ATTR_CC_CHANGE_IN_USED_CAPACITY            = 0x6, /* 8   */
+};
+
+/*TBD: osdv1_current_command_attributes_page */
+
+struct osdv2_current_command_attributes_page {
+	struct osd_attr_page_header hdr;  /* id=0xFFFFFFFE, size=0x44 */
+	u8 response_integrity_check_value[OSD_CRYPTO_KEYID_SIZE];
+	u8 object_type;
+	u8 reserved[3];
+	__be64 partition_id;
+	__be64 object_id;
+	__be64 starting_byte_address_of_append;
+	__be64 change_in_used_capacity;
+};
+
 #endif /*ndef __OSD_ATTRIBUTES_H__*/
-- 
cgit v0.10.2


From de6b20385b1c14f97ccdf7da173b4c9a7405083b Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:01:26 +0300
Subject: [SCSI] libosd: Better printout of OSD target system information

Shorten out the Attributes names.
Align all results on column 24.
Print system ID in a new line.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 1ce6b24..15f0bbc 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -118,39 +118,39 @@ static int _osd_print_system_info(struct osd_dev *od, void *caps)
 		_osd_ver_desc(or));
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_VENDOR_IDENTIFICATION [%s]\n",
+	OSD_INFO("VENDOR_IDENTIFICATION  [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_IDENTIFICATION [%s]\n",
+	OSD_INFO("PRODUCT_IDENTIFICATION [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_MODEL [%s]\n",
+	OSD_INFO("PRODUCT_MODEL          [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_REVISION_LEVEL [%u]\n",
+	OSD_INFO("PRODUCT_REVISION_LEVEL [%u]\n",
 		pFirst ? get_unaligned_be32(pFirst) : ~0U);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER [%s]\n",
+	OSD_INFO("PRODUCT_SERIAL_NUMBER  [%s]\n",
 		(char *)pFirst);
 
 	pFirst = get_attrs[a].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_OSD_NAME [%s]\n", (char *)pFirst);
+	OSD_INFO("OSD_NAME               [%s]\n", (char *)pFirst);
 	a++;
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_TOTAL_CAPACITY [0x%llx]\n",
+	OSD_INFO("TOTAL_CAPACITY         [0x%llx]\n",
 		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_USED_CAPACITY [0x%llx]\n",
+	OSD_INFO("USED_CAPACITY          [0x%llx]\n",
 		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
 
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_NUMBER_OF_PARTITIONS [%llu]\n",
+	OSD_INFO("NUMBER_OF_PARTITIONS   [%llu]\n",
 		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
 
 	if (a >= nelem)
@@ -158,7 +158,7 @@ static int _osd_print_system_info(struct osd_dev *od, void *caps)
 
 	/* FIXME: Where are the time utilities */
 	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("OSD_ATTR_RI_CLOCK [0x%02x%02x%02x%02x%02x%02x]\n",
+	OSD_INFO("CLOCK                  [0x%02x%02x%02x%02x%02x%02x]\n",
 		((char *)pFirst)[0], ((char *)pFirst)[1],
 		((char *)pFirst)[2], ((char *)pFirst)[3],
 		((char *)pFirst)[4], ((char *)pFirst)[5]);
@@ -169,7 +169,8 @@ static int _osd_print_system_info(struct osd_dev *od, void *caps)
 
 		hex_dump_to_buffer(get_attrs[a].val_ptr, len, 32, 1,
 				   sid_dump, sizeof(sid_dump), true);
-		OSD_INFO("OSD_ATTR_RI_OSD_SYSTEM_ID(%d) [%s]\n", len, sid_dump);
+		OSD_INFO("OSD_SYSTEM_ID(%d)\n"
+			 "        [%s]\n", len, sid_dump);
 		a++;
 	}
 out:
-- 
cgit v0.10.2


From 0e35afbc8b054e04a35faa796c72abb3b82bd33b Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:02:22 +0300
Subject: [SCSI] libosd: osd_req_{read,write}_kern new API

By popular demand, define usefull wrappers for osd_req_read/write
that recieve kernel pointers. All users had their own.

Also remove these from exofs

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 15f0bbc..c98153b 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -789,6 +789,20 @@ void osd_req_write(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_write);
 
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	osd_req_write(or, obj, bio, offset);
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_write_kern);
+
 /*TODO: void osd_req_append(struct osd_request *,
 	const struct osd_obj_id *, struct bio *data_out); */
 /*TODO: void osd_req_create_write(struct osd_request *,
@@ -824,6 +838,20 @@ void osd_req_read(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_read);
 
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	osd_req_read(or, obj, bio, offset);
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_read_kern);
+
 void osd_req_get_attributes(struct osd_request *or,
 	const struct osd_obj_id *obj)
 {
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1512c4..24667ee 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or,
 
 int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
 
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
 #endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b249ae9..48cc4d1 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
 
 	return -EIO;
 }
-
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (!bio)
-		return -ENOMEM;
-
-	osd_req_read(or, obj, bio, offset);
-	return 0;
-}
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (!bio)
-		return -ENOMEM;
-
-	osd_req_write(or, obj, bio, offset);
-	return 0;
-}
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b24d961..6132790 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -364,6 +364,8 @@ void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *);
 
 void osd_req_write(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_out, u64 offset);
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 void osd_req_append(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_out);/* NI */
 void osd_req_create_write(struct osd_request *or,
@@ -379,6 +381,8 @@ void osd_req_flush_object(struct osd_request *or,
 
 void osd_req_read(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 
 /*
  * Root/Partition/Collection/Object Attributes commands
-- 
cgit v0.10.2


From 546881aea9787ed5c626ac99ab80158ea9ae0515 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:04:00 +0300
Subject: [SCSI] libosd: Let _osd_req_finalize_data_integrity receive number of
 out_bytes

_osd_req_finalize_data_integrity was trying to deduce the number of
out_bytes from passed osd_request->out.bio. This is wrong when
the bio is chained. The caller of _osd_req_finalize_data_integrity
has more ready available information and should just pass it.

Also in the light of future support for CDB-continuation segment this is
a better solution.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index c98153b..ba2ebae 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1262,7 +1262,7 @@ static inline void osd_sec_parms_set_in_offset(bool is_v1,
 }
 
 static int _osd_req_finalize_data_integrity(struct osd_request *or,
-	bool has_in, bool has_out, const u8 *cap_key)
+	bool has_in, bool has_out, u64 out_data_bytes, const u8 *cap_key)
 {
 	struct osd_security_parameters *sec_parms = _osd_req_sec_params(or);
 	int ret;
@@ -1277,8 +1277,7 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or,
 		};
 		unsigned pad;
 
-		or->out_data_integ.data_bytes = cpu_to_be64(
-			or->out.bio ? or->out.bio->bi_size : 0);
+		or->out_data_integ.data_bytes = cpu_to_be64(out_data_bytes);
 		or->out_data_integ.set_attributes_bytes = cpu_to_be64(
 			or->set_attr.total_bytes);
 		or->out_data_integ.get_attributes_bytes = cpu_to_be64(
@@ -1370,6 +1369,7 @@ int osd_finalize_request(struct osd_request *or,
 {
 	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
 	bool has_in, has_out;
+	u64 out_data_bytes = or->out.total_bytes;
 	int ret;
 
 	if (options & OSD_REQ_FUA)
@@ -1439,7 +1439,8 @@ int osd_finalize_request(struct osd_request *or,
 		}
 	}
 
-	ret = _osd_req_finalize_data_integrity(or, has_in, has_out, cap_key);
+	ret = _osd_req_finalize_data_integrity(or, has_in, has_out,
+					       out_data_bytes, cap_key);
 	if (ret)
 		return ret;
 
-- 
cgit v0.10.2


From 62f469b596dd0aadf046a69027087c18db43734e Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:04:26 +0300
Subject: [SCSI] libosd: osd_req_{read,write} takes a length parameter

For supporting of chained-bios we can not inspect the first
bio only, as before. Caller shall pass the total length of the
request, ie. sum_bytes(bio-chain).

Also since the bio might be a chain we don't set it's direction
on behalf of it's callers. The bio direction should be properly
set prior to this call. So fix a couple of write users that now
need to set the bio direction properly

[In this patch I change both library code and user sites at
 exofs, to make it easy on integration. It should be submitted
 via James's scsi-misc tree.]

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Jeff Garzik <jeff@garzik.org>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index ba2ebae..3f5ec57 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -779,13 +779,14 @@ EXPORT_SYMBOL(osd_req_remove_object);
 */
 
 void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio, u64 offset)
+	const struct osd_obj_id *obj, u64 offset,
+	struct bio *bio, u64 len)
 {
-	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, bio->bi_size);
+	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
 	WARN_ON(or->out.bio || or->out.total_bytes);
-	bio->bi_rw |= (1 << BIO_RW);
+	WARN_ON(0 ==  bio_rw_flagged(bio, BIO_RW));
 	or->out.bio = bio;
-	or->out.total_bytes = bio->bi_size;
+	or->out.total_bytes = len;
 }
 EXPORT_SYMBOL(osd_req_write);
 
@@ -798,7 +799,8 @@ int osd_req_write_kern(struct osd_request *or,
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	osd_req_write(or, obj, bio, offset);
+	bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	osd_req_write(or, obj, offset, bio, len);
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_write_kern);
@@ -828,13 +830,14 @@ void osd_req_flush_object(struct osd_request *or,
 EXPORT_SYMBOL(osd_req_flush_object);
 
 void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio, u64 offset)
+	const struct osd_obj_id *obj, u64 offset,
+	struct bio *bio, u64 len)
 {
-	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, bio->bi_size);
+	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
 	WARN_ON(or->in.bio || or->in.total_bytes);
-	bio->bi_rw &= ~(1 << BIO_RW);
+	WARN_ON(1 == bio_rw_flagged(bio, BIO_RW));
 	or->in.bio = bio;
-	or->in.total_bytes = bio->bi_size;
+	or->in.total_bytes = len;
 }
 EXPORT_SYMBOL(osd_req_read);
 
@@ -847,7 +850,7 @@ int osd_req_read_kern(struct osd_request *or,
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	osd_req_read(or, obj, bio, offset);
+	osd_req_read(or, obj, offset, bio, len);
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_read_kern);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ba8d9fa..f79e8e5 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -266,7 +266,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
 		goto err;
 	}
 
-	osd_req_read(or, &obj, pcol->bio, i_start);
+	osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
 
 	if (is_sync) {
 		exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
@@ -522,7 +522,8 @@ static int write_exec(struct page_collect *pcol)
 
 	*pcol_copy = *pcol;
 
-	osd_req_write(or, &obj, pcol_copy->bio, i_start);
+	pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
 	ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
 	if (unlikely(ret)) {
 		EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 6132790..8c1e3b8 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -363,7 +363,7 @@ void osd_req_create_object(struct osd_request *or, struct osd_obj_id *);
 void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *);
 
 void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_out, u64 offset);
+	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
 int osd_req_write_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 void osd_req_append(struct osd_request *or,
@@ -380,7 +380,7 @@ void osd_req_flush_object(struct osd_request *or,
 	/*V2*/ u64 offset, /*V2*/ u64 len);
 
 void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_in, u64 offset);
+	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
 int osd_req_read_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 
-- 
cgit v0.10.2


From fc2fac5b5f11e2bee3bf37215c8746236f5ea188 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:04:43 +0300
Subject: [SCSI] libosd: Define an osd_dev wrapper to retrieve the
 request_queue

libosd users that need to work with bios, must sometime use
the request_queue associated with the osd_dev. Make a wrapper for
that, and convert all in-tree users.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 3f5ec57..3959797 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -670,7 +670,7 @@ static int _osd_req_list_objects(struct osd_request *or,
 	__be16 action, const struct osd_obj_id *obj, osd_id initial_id,
 	struct osd_obj_id_list *list, unsigned nelem)
 {
-	struct request_queue *q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *q = osd_request_queue(or->osd_dev);
 	u64 len = nelem * sizeof(osd_id) + sizeof(*list);
 	struct bio *bio;
 
@@ -793,7 +793,7 @@ EXPORT_SYMBOL(osd_req_write);
 int osd_req_write_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
 {
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
 	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
 
 	if (IS_ERR(bio))
@@ -844,7 +844,7 @@ EXPORT_SYMBOL(osd_req_read);
 int osd_req_read_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
 {
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
 	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
 
 	if (IS_ERR(bio))
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f79e8e5..77d0a29 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
 		struct inode *inode)
 {
 	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
-	struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
 
 	pcol->sbi = sbi;
-	pcol->req_q = req_q;
+	pcol->req_q = osd_request_queue(sbi->s_dev);
 	pcol->inode = inode;
 	pcol->expected_pages = expected_pages;
 
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 8c1e3b8..b44dc53 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -18,6 +18,7 @@
 #include "osd_types.h"
 
 #include <linux/blkdev.h>
+#include <scsi/scsi_device.h>
 
 /* Note: "NI" in comments below means "Not Implemented yet" */
 
@@ -69,6 +70,10 @@ void osd_dev_fini(struct osd_dev *od);
 
 /* some hi level device operations */
 int osd_auto_detect_ver(struct osd_dev *od, void *caps);    /* GFP_KERNEL */
+static inline struct request_queue *osd_request_queue(struct osd_dev *od)
+{
+	return od->scsi_device->request_queue;
+}
 
 /* we might want to use function vector in the future */
 static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v)
-- 
cgit v0.10.2


From 021e2230d6c04d80289fceb2d21c9ce93a615b32 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:05:05 +0300
Subject: [SCSI] osduld: use filp_open() when looking up an osd-device

This patch was inspired by Al Viro, for simplifying and fixing the
retrieval of osd-devices by in-kernel users, eg: file systems.
In-Kernel users, now, go through the same path user-mode does by
opening a file on the osd char-device and though holding a reference
to both the device and the Module.

A file pointer was added to the osd_dev structure which is now
allocated for each user. The internal osd_dev is no longer exposed
outside of the uld. I wanted to do that for a long time so each
libosd user can have his own defaults on the device.

The API is left the same, so user code need not change.

It is no longer needed to open/close a file handle on the osd
char-device from user-mode, before mounting an exofs on it.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index 22b59e1..0bdef33 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -49,6 +49,7 @@
 #include <linux/device.h>
 #include <linux/idr.h>
 #include <linux/major.h>
+#include <linux/file.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_driver.h>
@@ -175,10 +176,9 @@ static const struct file_operations osd_fops = {
 
 struct osd_dev *osduld_path_lookup(const char *name)
 {
-	struct path path;
-	struct inode *inode;
-	struct cdev *cdev;
-	struct osd_uld_device *uninitialized_var(oud);
+	struct osd_uld_device *oud;
+	struct osd_dev *od;
+	struct file *file;
 	int error;
 
 	if (!name || !*name) {
@@ -186,52 +186,46 @@ struct osd_dev *osduld_path_lookup(const char *name)
 		return ERR_PTR(-EINVAL);
 	}
 
-	error = kern_path(name, LOOKUP_FOLLOW, &path);
-	if (error) {
-		OSD_ERR("path_lookup of %s failed=>%d\n", name, error);
-		return ERR_PTR(error);
-	}
+	od = kzalloc(sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return ERR_PTR(-ENOMEM);
 
-	inode = path.dentry->d_inode;
-	error = -EINVAL; /* Not the right device e.g osd_uld_device */
-	if (!S_ISCHR(inode->i_mode)) {
-		OSD_DEBUG("!S_ISCHR()\n");
-		goto out;
+	file = filp_open(name, O_RDWR, 0);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto free_od;
 	}
 
-	cdev = inode->i_cdev;
-	if (!cdev) {
-		OSD_ERR("Before mounting an OSD Based filesystem\n");
-		OSD_ERR("  user-mode must open+close the %s device\n", name);
-		OSD_ERR("  Example: bash: echo < %s\n", name);
-		goto out;
+	if (file->f_op != &osd_fops){
+		error = -EINVAL;
+		goto close_file;
 	}
 
-	/* The Magic wand. Is it our char-dev */
-	/* TODO: Support sg devices */
-	if (cdev->owner != THIS_MODULE) {
-		OSD_ERR("Error mounting %s - is not an OSD device\n", name);
-		goto out;
-	}
+	oud = file->private_data;
 
-	oud = container_of(cdev, struct osd_uld_device, cdev);
+	*od = oud->od;
+	od->file = file;
 
-	__uld_get(oud);
-	error = 0;
+	return od;
 
-out:
-	path_put(&path);
-	return error ? ERR_PTR(error) : &oud->od;
+close_file:
+	fput(file);
+free_od:
+	kfree(od);
+	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(osduld_path_lookup);
 
 void osduld_put_device(struct osd_dev *od)
 {
-	if (od) {
-		struct osd_uld_device *oud = container_of(od,
-						struct osd_uld_device, od);
 
-		__uld_put(oud);
+	if (od && !IS_ERR(od)) {
+		struct osd_uld_device *oud = od->file->private_data;
+
+		BUG_ON(od->scsi_device != oud->od.scsi_device);
+
+		fput(od->file);
+		kfree(od);
 	}
 }
 EXPORT_SYMBOL(osduld_put_device);
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b44dc53..02bd9f7 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -48,6 +48,7 @@ enum osd_std_version {
  */
 struct osd_dev {
 	struct scsi_device *scsi_device;
+	struct file *file;
 	unsigned def_timeout;
 
 #ifdef OSD_VER1_SUPPORT
-- 
cgit v0.10.2


From 03306793e686fd895ab8fa095bb9ec33658ea53a Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:05:43 +0300
Subject: [SCSI] libosd: Use REQ_QUIET requests.

libosd has it's own sense decoding and printout. Don't
let scsi_lib duplicate that printout. (Which is done wrong
in regard to osd commands)

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 3959797..71341ad 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1340,6 +1340,8 @@ static int _init_blk_request(struct osd_request *or,
 
 	or->request = req;
 	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET;
+
 	req->timeout = or->timeout;
 	req->retries = or->retries;
 	req->sense = or->sense;
-- 
cgit v0.10.2


From 3860c97bd60a4525bb62eb90e3e7d2f02662ac59 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:05:58 +0300
Subject: [SCSI] osd: Remove out-of-tree left overs

* Delete Makefile. It is only used for out-of-tree compilation
  and was never needed. It slipped in by mistake.
* Remove from Kbuild all the out of tree stuff as promised.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
index 0e207aa..5fd73d77 100644
--- a/drivers/scsi/osd/Kbuild
+++ b/drivers/scsi/osd/Kbuild
@@ -11,31 +11,6 @@
 # it under the terms of the GNU General Public License version 2
 #
 
-ifneq ($(OSD_INC),)
-# we are built out-of-tree Kconfigure everything as on
-
-CONFIG_SCSI_OSD_INITIATOR=m
-ccflags-y += -DCONFIG_SCSI_OSD_INITIATOR -DCONFIG_SCSI_OSD_INITIATOR_MODULE
-
-CONFIG_SCSI_OSD_ULD=m
-ccflags-y += -DCONFIG_SCSI_OSD_ULD -DCONFIG_SCSI_OSD_ULD_MODULE
-
-# CONFIG_SCSI_OSD_DPRINT_SENSE =
-#	0 - no print of errors
-#	1 - print errors
-#	2 - errors + warrnings
-ccflags-y += -DCONFIG_SCSI_OSD_DPRINT_SENSE=1
-
-# Uncomment to turn debug on
-# ccflags-y += -DCONFIG_SCSI_OSD_DEBUG
-
-# if we are built out-of-tree and the hosting kernel has OSD headers
-# then "ccflags-y +=" will not pick the out-off-tree headers. Only by doing
-# this it will work. This might break in future kernels
-LINUXINCLUDE := -I$(OSD_INC) $(LINUXINCLUDE)
-
-endif
-
 # libosd.ko - osd-initiator library
 libosd-y := osd_initiator.o
 obj-$(CONFIG_SCSI_OSD_INITIATOR) += libosd.o
diff --git a/drivers/scsi/osd/Makefile b/drivers/scsi/osd/Makefile
deleted file mode 100755
index d905344..0000000
--- a/drivers/scsi/osd/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Makefile for the OSD modules (out of tree)
-#
-# Copyright (C) 2008 Panasas Inc.  All rights reserved.
-#
-# Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
-#   Benny Halevy <bhalevy@panasas.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2
-#
-# This Makefile is used to call the kernel Makefile in case of an out-of-tree
-# build.
-# $KSRC should point to a Kernel source tree otherwise host's default is
-# used. (eg. /lib/modules/`uname -r`/build)
-
-# include path for out-of-tree Headers
-OSD_INC ?= `pwd`/../../../include
-
-# allow users to override these
-# e.g. to compile for a kernel that you aren't currently running
-KSRC ?= /lib/modules/$(shell uname -r)/build
-KBUILD_OUTPUT ?=
-ARCH ?=
-V ?= 0
-
-# this is the basic Kbuild out-of-tree invocation, with the M= option
-KBUILD_BASE = +$(MAKE) -C $(KSRC) M=`pwd` KBUILD_OUTPUT=$(KBUILD_OUTPUT) ARCH=$(ARCH) V=$(V)
-
-all: libosd
-
-libosd: ;
-	$(KBUILD_BASE) OSD_INC=$(OSD_INC) modules
-
-clean:
-	$(KBUILD_BASE) clean
-- 
cgit v0.10.2


From 112ac808eb8a953dd356bbbc8322fdd6861e2c75 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 10 Jun 2009 16:39:01 +0200
Subject: ALSA: sound/ps3: Fix checkpatch issues

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index f361c26..d660b0f 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -165,8 +165,7 @@ static const struct snd_pcm_hardware snd_ps3_pcm_hw = {
 	.fifo_size = PS3_AUDIO_FIFO_SIZE
 };
 
-static struct snd_pcm_ops snd_ps3_pcm_spdif_ops =
-{
+static struct snd_pcm_ops snd_ps3_pcm_spdif_ops = {
 	.open = snd_ps3_pcm_open,
 	.close = snd_ps3_pcm_close,
 	.prepare = snd_ps3_pcm_prepare,
@@ -183,7 +182,7 @@ static int snd_ps3_verify_dma_stop(struct snd_ps3_card_info *card,
 	int dma_ch, done, retries, stop_forced = 0;
 	uint32_t status;
 
-	for (dma_ch = 0; dma_ch < 8; dma_ch ++) {
+	for (dma_ch = 0; dma_ch < 8; dma_ch++) {
 		retries = count;
 		do {
 			status = read_reg(PS3_AUDIO_KICK(dma_ch)) &
@@ -259,9 +258,7 @@ static void snd_ps3_kick_dma(struct snd_ps3_card_info *card)
 /*
  * convert virtual addr to ioif bus addr.
  */
-static dma_addr_t v_to_bus(struct snd_ps3_card_info *card,
-			   void * paddr,
-			   int ch)
+static dma_addr_t v_to_bus(struct snd_ps3_card_info *card, void *paddr, int ch)
 {
 	return card->dma_start_bus_addr[ch] +
 		(paddr - card->dma_start_vaddr[ch]);
@@ -321,7 +318,7 @@ static int snd_ps3_program_dma(struct snd_ps3_card_info *card,
 	spin_lock_irqsave(&card->dma_lock, irqsave);
 	for (ch = 0; ch < 2; ch++) {
 		start_vaddr = card->dma_next_transfer_vaddr[0];
-		for (stage = 0; stage < fill_stages; stage ++) {
+		for (stage = 0; stage < fill_stages; stage++) {
 			dma_ch = stage * 2 + ch;
 			if (silent)
 				dma_addr = card->null_buffer_start_dma_addr;
@@ -619,7 +616,7 @@ static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card)
 			  PS3_AUDIO_AO_3WMCTRL_ASOEN(2) |
 			  PS3_AUDIO_AO_3WMCTRL_ASOEN(3)),
 			0);
-	wmb(); 	/* ensure the hardware sees the change */
+	wmb();	/* ensure the hardware sees the change */
 	/* wait for actually stopped */
 	retries = 1000;
 	while ((read_reg(PS3_AUDIO_AO_3WMCTRL) &
@@ -798,20 +795,20 @@ static struct snd_kcontrol_new spdif_ctls[] = {
 	{
 		.access = SNDRV_CTL_ELEM_ACCESS_READ,
 		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
-		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK),
+		.name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, CON_MASK),
 		.info = snd_ps3_spdif_mask_info,
 		.get = snd_ps3_spdif_cmask_get,
 	},
 	{
 		.access = SNDRV_CTL_ELEM_ACCESS_READ,
 		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
-		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK),
+		.name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, PRO_MASK),
 		.info = snd_ps3_spdif_mask_info,
 		.get = snd_ps3_spdif_pmask_get,
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_PCM,
-		.name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT),
+		.name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT),
 		.info = snd_ps3_spdif_mask_info,
 		.get = snd_ps3_spdif_default_get,
 		.put = snd_ps3_spdif_default_put,
@@ -1020,11 +1017,12 @@ static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 	 * its size should be lager than PS3_AUDIO_FIFO_STAGE_SIZE * 2
 	 * PAGE_SIZE is enogh
 	 */
-	if (!(the_card.null_buffer_start_vaddr =
-	      dma_alloc_coherent(&the_card.ps3_dev->core,
-				 PAGE_SIZE,
-				 &the_card.null_buffer_start_dma_addr,
-				 GFP_KERNEL))) {
+	the_card.null_buffer_start_vaddr =
+		dma_alloc_coherent(&the_card.ps3_dev->core,
+				   PAGE_SIZE,
+				   &the_card.null_buffer_start_dma_addr,
+				   GFP_KERNEL);
+	if (!the_card.null_buffer_start_vaddr) {
 		pr_info("%s: nullbuffer alloc failed\n", __func__);
 		goto clean_preallocate;
 	}
@@ -1148,7 +1146,7 @@ static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id)
 				SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL :
 				SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
 			snd_ps3_kick_dma(card);
-			card->silent --;
+			card->silent--;
 		} else {
 			snd_ps3_program_dma(card,
 				(underflow_occured) ?
-- 
cgit v0.10.2


From cb6492e4a4e68281358510f0ffe2b0c4972ec166 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 10 Jun 2009 16:39:02 +0200
Subject: ALSA: sound/ps3: Restructure driver source

Sort includes, and reorder code so we can kill the forward declarations

No functional changes

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index d660b0f..cd9109a 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -18,81 +18,31 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
 #include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#include <sound/asound.h>
+#include <sound/control.h>
 #include <sound/core.h>
 #include <sound/initval.h>
-#include <sound/pcm.h>
-#include <sound/asound.h>
 #include <sound/memalloc.h>
+#include <sound/pcm.h>
 #include <sound/pcm_params.h>
-#include <sound/control.h>
-#include <linux/dmapool.h>
-#include <linux/dma-mapping.h>
-#include <asm/firmware.h>
+
 #include <asm/dma.h>
+#include <asm/firmware.h>
 #include <asm/lv1call.h>
 #include <asm/ps3.h>
 #include <asm/ps3av.h>
 
-#include "snd_ps3_reg.h"
 #include "snd_ps3.h"
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("PS3 sound driver");
-MODULE_AUTHOR("Sony Computer Entertainment Inc.");
-
-/* module  entries */
-static int __init snd_ps3_init(void);
-static void __exit snd_ps3_exit(void);
-
-/* ALSA snd driver ops */
-static int snd_ps3_pcm_open(struct snd_pcm_substream *substream);
-static int snd_ps3_pcm_close(struct snd_pcm_substream *substream);
-static int snd_ps3_pcm_prepare(struct snd_pcm_substream *substream);
-static int snd_ps3_pcm_trigger(struct snd_pcm_substream *substream,
-				 int cmd);
-static snd_pcm_uframes_t snd_ps3_pcm_pointer(struct snd_pcm_substream
-					     *substream);
-static int snd_ps3_pcm_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *hw_params);
-static int snd_ps3_pcm_hw_free(struct snd_pcm_substream *substream);
-
-
-/* ps3_system_bus_driver entries */
-static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev);
-static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev);
-
-/* address setup */
-static int snd_ps3_map_mmio(void);
-static void snd_ps3_unmap_mmio(void);
-static int snd_ps3_allocate_irq(void);
-static void snd_ps3_free_irq(void);
-static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start);
-
-/* interrupt handler */
-static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id);
-
-
-/* set sampling rate/format */
-static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream);
-/* take effect parameter change */
-static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card);
-/* initialize avsetting and take it effect */
-static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card);
-/* setup dma */
-static int snd_ps3_program_dma(struct snd_ps3_card_info *card,
-			       enum snd_ps3_dma_filltype filltype);
-static void snd_ps3_wait_for_dma_stop(struct snd_ps3_card_info *card);
-
-static dma_addr_t v_to_bus(struct snd_ps3_card_info *, void  *vaddr, int ch);
+#include "snd_ps3_reg.h"
 
 
-module_init(snd_ps3_init);
-module_exit(snd_ps3_exit);
-
 /*
  * global
  */
@@ -165,17 +115,6 @@ static const struct snd_pcm_hardware snd_ps3_pcm_hw = {
 	.fifo_size = PS3_AUDIO_FIFO_SIZE
 };
 
-static struct snd_pcm_ops snd_ps3_pcm_spdif_ops = {
-	.open = snd_ps3_pcm_open,
-	.close = snd_ps3_pcm_close,
-	.prepare = snd_ps3_pcm_prepare,
-	.ioctl = snd_pcm_lib_ioctl,
-	.trigger = snd_ps3_pcm_trigger,
-	.pointer = snd_ps3_pcm_pointer,
-	.hw_params = snd_ps3_pcm_hw_params,
-	.hw_free = snd_ps3_pcm_hw_free
-};
-
 static int snd_ps3_verify_dma_stop(struct snd_ps3_card_info *card,
 				   int count, int force_stop)
 {
@@ -369,6 +308,71 @@ static int snd_ps3_program_dma(struct snd_ps3_card_info *card,
 }
 
 /*
+ * Interrupt handler
+ */
+static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id)
+{
+
+	uint32_t port_intr;
+	int underflow_occured = 0;
+	struct snd_ps3_card_info *card = dev_id;
+
+	if (!card->running) {
+		update_reg(PS3_AUDIO_AX_IS, 0);
+		update_reg(PS3_AUDIO_INTR_0, 0);
+		return IRQ_HANDLED;
+	}
+
+	port_intr = read_reg(PS3_AUDIO_AX_IS);
+	/*
+	 *serial buffer empty detected (every 4 times),
+	 *program next dma and kick it
+	 */
+	if (port_intr & PS3_AUDIO_AX_IE_ASOBEIE(0)) {
+		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBEIE(0));
+		if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
+			write_reg(PS3_AUDIO_AX_IS, port_intr);
+			underflow_occured = 1;
+		}
+		if (card->silent) {
+			/* we are still in silent time */
+			snd_ps3_program_dma(card,
+				(underflow_occured) ?
+				SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL :
+				SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
+			snd_ps3_kick_dma(card);
+			card->silent--;
+		} else {
+			snd_ps3_program_dma(card,
+				(underflow_occured) ?
+				SND_PS3_DMA_FILLTYPE_FIRSTFILL :
+				SND_PS3_DMA_FILLTYPE_RUNNING);
+			snd_ps3_kick_dma(card);
+			snd_pcm_period_elapsed(card->substream);
+		}
+	} else if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
+		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBUIE(0));
+		/*
+		 * serial out underflow, but buffer empty not detected.
+		 * in this case, fill fifo with 0 to recover.  After
+		 * filling dummy data, serial automatically start to
+		 * consume them and then will generate normal buffer
+		 * empty interrupts.
+		 * If both buffer underflow and buffer empty are occured,
+		 * it is better to do nomal data transfer than empty one
+		 */
+		snd_ps3_program_dma(card,
+				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+		snd_ps3_kick_dma(card);
+		snd_ps3_program_dma(card,
+				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+		snd_ps3_kick_dma(card);
+	}
+	/* clear interrupt cause */
+	return IRQ_HANDLED;
+};
+
+/*
  * audio mute on/off
  * mute_on : 0 output enabled
  *           1 mute
@@ -379,6 +383,142 @@ static int snd_ps3_mute(int mute_on)
 }
 
 /*
+ * av setting
+ * NOTE: calling this function may generate audio interrupt.
+ */
+static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card)
+{
+	int ret, retries, i;
+	pr_debug("%s: start\n", __func__);
+
+	ret = ps3av_set_audio_mode(card->avs.avs_audio_ch,
+				  card->avs.avs_audio_rate,
+				  card->avs.avs_audio_width,
+				  card->avs.avs_audio_format,
+				  card->avs.avs_audio_source);
+	/*
+	 * Reset the following unwanted settings:
+	 */
+
+	/* disable all 3wire buffers */
+	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
+			~(PS3_AUDIO_AO_3WMCTRL_ASOEN(0) |
+			  PS3_AUDIO_AO_3WMCTRL_ASOEN(1) |
+			  PS3_AUDIO_AO_3WMCTRL_ASOEN(2) |
+			  PS3_AUDIO_AO_3WMCTRL_ASOEN(3)),
+			0);
+	wmb();	/* ensure the hardware sees the change */
+	/* wait for actually stopped */
+	retries = 1000;
+	while ((read_reg(PS3_AUDIO_AO_3WMCTRL) &
+		(PS3_AUDIO_AO_3WMCTRL_ASORUN(0) |
+		 PS3_AUDIO_AO_3WMCTRL_ASORUN(1) |
+		 PS3_AUDIO_AO_3WMCTRL_ASORUN(2) |
+		 PS3_AUDIO_AO_3WMCTRL_ASORUN(3))) &&
+	       --retries) {
+		udelay(1);
+	}
+
+	/* reset buffer pointer */
+	for (i = 0; i < 4; i++) {
+		update_reg(PS3_AUDIO_AO_3WCTRL(i),
+			   PS3_AUDIO_AO_3WCTRL_ASOBRST_RESET);
+		udelay(10);
+	}
+	wmb(); /* ensure the hardware actually start resetting */
+
+	/* enable 3wire#0 buffer */
+	update_reg(PS3_AUDIO_AO_3WMCTRL, PS3_AUDIO_AO_3WMCTRL_ASOEN(0));
+
+
+	/* In 24bit mode,ALSA inserts a zero byte at first byte of per sample */
+	update_mask_reg(PS3_AUDIO_AO_3WCTRL(0),
+			~PS3_AUDIO_AO_3WCTRL_ASODF,
+			PS3_AUDIO_AO_3WCTRL_ASODF_LSB);
+	update_mask_reg(PS3_AUDIO_AO_SPDCTRL(0),
+			~PS3_AUDIO_AO_SPDCTRL_SPODF,
+			PS3_AUDIO_AO_SPDCTRL_SPODF_LSB);
+	/* ensure all the setting above is written back to register */
+	wmb();
+	/* avsetting driver altered AX_IE, caller must reset it if you want */
+	pr_debug("%s: end\n", __func__);
+	return ret;
+}
+
+/*
+ *  set sampling rate according to the substream
+ */
+static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
+{
+	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
+	struct snd_ps3_avsetting_info avs;
+	int ret;
+
+	avs = card->avs;
+
+	pr_debug("%s: called freq=%d width=%d\n", __func__,
+		 substream->runtime->rate,
+		 snd_pcm_format_width(substream->runtime->format));
+
+	pr_debug("%s: before freq=%d width=%d\n", __func__,
+		 card->avs.avs_audio_rate, card->avs.avs_audio_width);
+
+	/* sample rate */
+	switch (substream->runtime->rate) {
+	case 44100:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_44K;
+		break;
+	case 48000:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
+		break;
+	case 88200:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_88K;
+		break;
+	case 96000:
+		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_96K;
+		break;
+	default:
+		pr_info("%s: invalid rate %d\n", __func__,
+			substream->runtime->rate);
+		return 1;
+	}
+
+	/* width */
+	switch (snd_pcm_format_width(substream->runtime->format)) {
+	case 16:
+		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
+		break;
+	case 24:
+		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_24;
+		break;
+	default:
+		pr_info("%s: invalid width %d\n", __func__,
+			snd_pcm_format_width(substream->runtime->format));
+		return 1;
+	}
+
+	memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
+
+	if (memcmp(&card->avs, &avs, sizeof(avs))) {
+		pr_debug("%s: after freq=%d width=%d\n", __func__,
+			 card->avs.avs_audio_rate, card->avs.avs_audio_width);
+
+		card->avs = avs;
+		snd_ps3_change_avsetting(card);
+		ret = 0;
+	} else
+		ret = 1;
+
+	/* check CS non-audio bit and mute accordingly */
+	if (avs.avs_cs_info[0] & 0x02)
+		ps3av_audio_mute_analog(1); /* mute if non-audio */
+	else
+		ps3av_audio_mute_analog(0);
+
+	return ret;
+}
+
+/*
  * PCM operators
  */
 static int snd_ps3_pcm_open(struct snd_pcm_substream *substream)
@@ -403,6 +543,13 @@ static int snd_ps3_pcm_open(struct snd_pcm_substream *substream)
 	return 0;
 };
 
+static int snd_ps3_pcm_close(struct snd_pcm_substream *substream)
+{
+	/* mute on */
+	snd_ps3_mute(1);
+	return 0;
+};
+
 static int snd_ps3_pcm_hw_params(struct snd_pcm_substream *substream,
 				 struct snd_pcm_hw_params *hw_params)
 {
@@ -414,6 +561,13 @@ static int snd_ps3_pcm_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 };
 
+static int snd_ps3_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	int ret;
+	ret = snd_pcm_lib_free_pages(substream);
+	return ret;
+};
+
 static int snd_ps3_delay_to_bytes(struct snd_pcm_substream *substream,
 				  unsigned int delay_ms)
 {
@@ -467,287 +621,91 @@ static int snd_ps3_pcm_prepare(struct snd_pcm_substream *substream)
 			runtime->dma_area;
 		card->dma_start_bus_addr[SND_PS3_CH_L] = runtime->dma_addr;
 
-		card->dma_last_transfer_vaddr[SND_PS3_CH_R] =
-			card->dma_next_transfer_vaddr[SND_PS3_CH_R] =
-			card->dma_start_vaddr[SND_PS3_CH_R] =
-			runtime->dma_area + (runtime->dma_bytes / 2);
-		card->dma_start_bus_addr[SND_PS3_CH_R] =
-			runtime->dma_addr + (runtime->dma_bytes / 2);
-
-		pr_debug("%s: vaddr=%p bus=%#llx\n", __func__,
-			 card->dma_start_vaddr[SND_PS3_CH_L],
-			 card->dma_start_bus_addr[SND_PS3_CH_L]);
-
-	}
-	spin_unlock_irqrestore(&card->dma_lock, irqsave);
-
-	/* ensure the hardware sees the change */
-	mb();
-
-	return 0;
-};
-
-static int snd_ps3_pcm_trigger(struct snd_pcm_substream *substream,
-			       int cmd)
-{
-	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
-	int ret = 0;
-
-	switch (cmd) {
-	case SNDRV_PCM_TRIGGER_START:
-		/* clear outstanding interrupts  */
-		update_reg(PS3_AUDIO_AX_IS, 0);
-
-		spin_lock(&card->dma_lock);
-		{
-			card->running = 1;
-		}
-		spin_unlock(&card->dma_lock);
-
-		snd_ps3_program_dma(card,
-				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-		snd_ps3_kick_dma(card);
-		while (read_reg(PS3_AUDIO_KICK(7)) &
-		       PS3_AUDIO_KICK_STATUS_MASK) {
-			udelay(1);
-		}
-		snd_ps3_program_dma(card, SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
-		snd_ps3_kick_dma(card);
-		break;
-
-	case SNDRV_PCM_TRIGGER_STOP:
-		spin_lock(&card->dma_lock);
-		{
-			card->running = 0;
-		}
-		spin_unlock(&card->dma_lock);
-		snd_ps3_wait_for_dma_stop(card);
-		break;
-	default:
-		break;
-
-	}
-
-	return ret;
-};
-
-/*
- * report current pointer
- */
-static snd_pcm_uframes_t snd_ps3_pcm_pointer(
-	struct snd_pcm_substream *substream)
-{
-	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
-	size_t bytes;
-	snd_pcm_uframes_t ret;
-
-	spin_lock(&card->dma_lock);
-	{
-		bytes = (size_t)(card->dma_last_transfer_vaddr[SND_PS3_CH_L] -
-				 card->dma_start_vaddr[SND_PS3_CH_L]);
-	}
-	spin_unlock(&card->dma_lock);
-
-	ret = bytes_to_frames(substream->runtime, bytes * 2);
-
-	return ret;
-};
-
-static int snd_ps3_pcm_hw_free(struct snd_pcm_substream *substream)
-{
-	int ret;
-	ret = snd_pcm_lib_free_pages(substream);
-	return ret;
-};
-
-static int snd_ps3_pcm_close(struct snd_pcm_substream *substream)
-{
-	/* mute on */
-	snd_ps3_mute(1);
-	return 0;
-};
-
-static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
-{
-	/*
-	 * avsetting driver seems to never change the followings
-	 * so, init them here once
-	 */
-
-	/* no dma interrupt needed */
-	write_reg(PS3_AUDIO_INTR_EN_0, 0);
-
-	/* use every 4 buffer empty interrupt */
-	update_mask_reg(PS3_AUDIO_AX_IC,
-			PS3_AUDIO_AX_IC_AASOIMD_MASK,
-			PS3_AUDIO_AX_IC_AASOIMD_EVERY4);
-
-	/* enable 3wire clocks */
-	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
-			~(PS3_AUDIO_AO_3WMCTRL_ASOBCLKD_DISABLED |
-			  PS3_AUDIO_AO_3WMCTRL_ASOLRCKD_DISABLED),
-			0);
-	update_reg(PS3_AUDIO_AO_3WMCTRL,
-		   PS3_AUDIO_AO_3WMCTRL_ASOPLRCK_DEFAULT);
-}
-
-/*
- * av setting
- * NOTE: calling this function may generate audio interrupt.
- */
-static int snd_ps3_change_avsetting(struct snd_ps3_card_info *card)
-{
-	int ret, retries, i;
-	pr_debug("%s: start\n", __func__);
-
-	ret = ps3av_set_audio_mode(card->avs.avs_audio_ch,
-				  card->avs.avs_audio_rate,
-				  card->avs.avs_audio_width,
-				  card->avs.avs_audio_format,
-				  card->avs.avs_audio_source);
-	/*
-	 * Reset the following unwanted settings:
-	 */
-
-	/* disable all 3wire buffers */
-	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
-			~(PS3_AUDIO_AO_3WMCTRL_ASOEN(0) |
-			  PS3_AUDIO_AO_3WMCTRL_ASOEN(1) |
-			  PS3_AUDIO_AO_3WMCTRL_ASOEN(2) |
-			  PS3_AUDIO_AO_3WMCTRL_ASOEN(3)),
-			0);
-	wmb();	/* ensure the hardware sees the change */
-	/* wait for actually stopped */
-	retries = 1000;
-	while ((read_reg(PS3_AUDIO_AO_3WMCTRL) &
-		(PS3_AUDIO_AO_3WMCTRL_ASORUN(0) |
-		 PS3_AUDIO_AO_3WMCTRL_ASORUN(1) |
-		 PS3_AUDIO_AO_3WMCTRL_ASORUN(2) |
-		 PS3_AUDIO_AO_3WMCTRL_ASORUN(3))) &&
-	       --retries) {
-		udelay(1);
-	}
-
-	/* reset buffer pointer */
-	for (i = 0; i < 4; i++) {
-		update_reg(PS3_AUDIO_AO_3WCTRL(i),
-			   PS3_AUDIO_AO_3WCTRL_ASOBRST_RESET);
-		udelay(10);
-	}
-	wmb(); /* ensure the hardware actually start resetting */
-
-	/* enable 3wire#0 buffer */
-	update_reg(PS3_AUDIO_AO_3WMCTRL, PS3_AUDIO_AO_3WMCTRL_ASOEN(0));
-
-
-	/* In 24bit mode,ALSA inserts a zero byte at first byte of per sample */
-	update_mask_reg(PS3_AUDIO_AO_3WCTRL(0),
-			~PS3_AUDIO_AO_3WCTRL_ASODF,
-			PS3_AUDIO_AO_3WCTRL_ASODF_LSB);
-	update_mask_reg(PS3_AUDIO_AO_SPDCTRL(0),
-			~PS3_AUDIO_AO_SPDCTRL_SPODF,
-			PS3_AUDIO_AO_SPDCTRL_SPODF_LSB);
-	/* ensure all the setting above is written back to register */
-	wmb();
-	/* avsetting driver altered AX_IE, caller must reset it if you want */
-	pr_debug("%s: end\n", __func__);
-	return ret;
-}
-
-static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
-{
-	int ret;
-	pr_debug("%s: start\n", __func__);
-	card->avs.avs_audio_ch = PS3AV_CMD_AUDIO_NUM_OF_CH_2;
-	card->avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
-	card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
-	card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
-	card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
-	memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
-
-	ret = snd_ps3_change_avsetting(card);
+		card->dma_last_transfer_vaddr[SND_PS3_CH_R] =
+			card->dma_next_transfer_vaddr[SND_PS3_CH_R] =
+			card->dma_start_vaddr[SND_PS3_CH_R] =
+			runtime->dma_area + (runtime->dma_bytes / 2);
+		card->dma_start_bus_addr[SND_PS3_CH_R] =
+			runtime->dma_addr + (runtime->dma_bytes / 2);
 
-	snd_ps3_audio_fixup(card);
+		pr_debug("%s: vaddr=%p bus=%#llx\n", __func__,
+			 card->dma_start_vaddr[SND_PS3_CH_L],
+			 card->dma_start_bus_addr[SND_PS3_CH_L]);
 
-	/* to start to generate SPDIF signal, fill data */
-	snd_ps3_program_dma(card, SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-	snd_ps3_kick_dma(card);
-	pr_debug("%s: end\n", __func__);
-	return ret;
-}
+	}
+	spin_unlock_irqrestore(&card->dma_lock, irqsave);
 
-/*
- *  set sampling rate according to the substream
- */
-static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
+	/* ensure the hardware sees the change */
+	mb();
+
+	return 0;
+};
+
+static int snd_ps3_pcm_trigger(struct snd_pcm_substream *substream,
+			       int cmd)
 {
 	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
-	struct snd_ps3_avsetting_info avs;
-	int ret;
-
-	avs = card->avs;
+	int ret = 0;
 
-	pr_debug("%s: called freq=%d width=%d\n", __func__,
-		 substream->runtime->rate,
-		 snd_pcm_format_width(substream->runtime->format));
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		/* clear outstanding interrupts  */
+		update_reg(PS3_AUDIO_AX_IS, 0);
 
-	pr_debug("%s: before freq=%d width=%d\n", __func__,
-		 card->avs.avs_audio_rate, card->avs.avs_audio_width);
+		spin_lock(&card->dma_lock);
+		{
+			card->running = 1;
+		}
+		spin_unlock(&card->dma_lock);
 
-	/* sample rate */
-	switch (substream->runtime->rate) {
-	case 44100:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_44K;
-		break;
-	case 48000:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
-		break;
-	case 88200:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_88K;
-		break;
-	case 96000:
-		avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_96K;
+		snd_ps3_program_dma(card,
+				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+		snd_ps3_kick_dma(card);
+		while (read_reg(PS3_AUDIO_KICK(7)) &
+		       PS3_AUDIO_KICK_STATUS_MASK) {
+			udelay(1);
+		}
+		snd_ps3_program_dma(card, SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
+		snd_ps3_kick_dma(card);
 		break;
-	default:
-		pr_info("%s: invalid rate %d\n", __func__,
-			substream->runtime->rate);
-		return 1;
-	}
 
-	/* width */
-	switch (snd_pcm_format_width(substream->runtime->format)) {
-	case 16:
-		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
-		break;
-	case 24:
-		avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_24;
+	case SNDRV_PCM_TRIGGER_STOP:
+		spin_lock(&card->dma_lock);
+		{
+			card->running = 0;
+		}
+		spin_unlock(&card->dma_lock);
+		snd_ps3_wait_for_dma_stop(card);
 		break;
 	default:
-		pr_info("%s: invalid width %d\n", __func__,
-			snd_pcm_format_width(substream->runtime->format));
-		return 1;
+		break;
+
 	}
 
-	memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
+	return ret;
+};
 
-	if (memcmp(&card->avs, &avs, sizeof(avs))) {
-		pr_debug("%s: after freq=%d width=%d\n", __func__,
-			 card->avs.avs_audio_rate, card->avs.avs_audio_width);
+/*
+ * report current pointer
+ */
+static snd_pcm_uframes_t snd_ps3_pcm_pointer(
+	struct snd_pcm_substream *substream)
+{
+	struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
+	size_t bytes;
+	snd_pcm_uframes_t ret;
 
-		card->avs = avs;
-		snd_ps3_change_avsetting(card);
-		ret = 0;
-	} else
-		ret = 1;
+	spin_lock(&card->dma_lock);
+	{
+		bytes = (size_t)(card->dma_last_transfer_vaddr[SND_PS3_CH_L] -
+				 card->dma_start_vaddr[SND_PS3_CH_L]);
+	}
+	spin_unlock(&card->dma_lock);
 
-	/* check CS non-audio bit and mute accordingly */
-	if (avs.avs_cs_info[0] & 0x02)
-		ps3av_audio_mute_analog(1); /* mute if non-audio */
-	else
-		ps3av_audio_mute_analog(0);
+	ret = bytes_to_frames(substream->runtime, bytes * 2);
 
 	return ret;
-}
+};
 
 /*
  * SPDIF status bits controls
@@ -815,6 +773,17 @@ static struct snd_kcontrol_new spdif_ctls[] = {
 	},
 };
 
+static struct snd_pcm_ops snd_ps3_pcm_spdif_ops = {
+	.open = snd_ps3_pcm_open,
+	.close = snd_ps3_pcm_close,
+	.ioctl = snd_pcm_lib_ioctl,
+	.hw_params = snd_ps3_pcm_hw_params,
+	.hw_free = snd_ps3_pcm_hw_free,
+	.prepare = snd_ps3_pcm_prepare,
+	.trigger = snd_ps3_pcm_trigger,
+	.pointer = snd_ps3_pcm_pointer,
+};
+
 
 static int snd_ps3_map_mmio(void)
 {
@@ -912,6 +881,52 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 			ret);
 }
 
+static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
+{
+	/*
+	 * avsetting driver seems to never change the followings
+	 * so, init them here once
+	 */
+
+	/* no dma interrupt needed */
+	write_reg(PS3_AUDIO_INTR_EN_0, 0);
+
+	/* use every 4 buffer empty interrupt */
+	update_mask_reg(PS3_AUDIO_AX_IC,
+			PS3_AUDIO_AX_IC_AASOIMD_MASK,
+			PS3_AUDIO_AX_IC_AASOIMD_EVERY4);
+
+	/* enable 3wire clocks */
+	update_mask_reg(PS3_AUDIO_AO_3WMCTRL,
+			~(PS3_AUDIO_AO_3WMCTRL_ASOBCLKD_DISABLED |
+			  PS3_AUDIO_AO_3WMCTRL_ASOLRCKD_DISABLED),
+			0);
+	update_reg(PS3_AUDIO_AO_3WMCTRL,
+		   PS3_AUDIO_AO_3WMCTRL_ASOPLRCK_DEFAULT);
+}
+
+static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
+{
+	int ret;
+	pr_debug("%s: start\n", __func__);
+	card->avs.avs_audio_ch = PS3AV_CMD_AUDIO_NUM_OF_CH_2;
+	card->avs.avs_audio_rate = PS3AV_CMD_AUDIO_FS_48K;
+	card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
+	card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
+	card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
+	memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
+
+	ret = snd_ps3_change_avsetting(card);
+
+	snd_ps3_audio_fixup(card);
+
+	/* to start to generate SPDIF signal, fill data */
+	snd_ps3_program_dma(card, SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
+	snd_ps3_kick_dma(card);
+	pr_debug("%s: end\n", __func__);
+	return ret;
+}
+
 static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 {
 	int i, ret;
@@ -1113,71 +1128,6 @@ static struct ps3_system_bus_driver snd_ps3_bus_driver_info = {
 
 
 /*
- * Interrupt handler
- */
-static irqreturn_t snd_ps3_interrupt(int irq, void *dev_id)
-{
-
-	uint32_t port_intr;
-	int underflow_occured = 0;
-	struct snd_ps3_card_info *card = dev_id;
-
-	if (!card->running) {
-		update_reg(PS3_AUDIO_AX_IS, 0);
-		update_reg(PS3_AUDIO_INTR_0, 0);
-		return IRQ_HANDLED;
-	}
-
-	port_intr = read_reg(PS3_AUDIO_AX_IS);
-	/*
-	 *serial buffer empty detected (every 4 times),
-	 *program next dma and kick it
-	 */
-	if (port_intr & PS3_AUDIO_AX_IE_ASOBEIE(0)) {
-		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBEIE(0));
-		if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
-			write_reg(PS3_AUDIO_AX_IS, port_intr);
-			underflow_occured = 1;
-		}
-		if (card->silent) {
-			/* we are still in silent time */
-			snd_ps3_program_dma(card,
-				(underflow_occured) ?
-				SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL :
-				SND_PS3_DMA_FILLTYPE_SILENT_RUNNING);
-			snd_ps3_kick_dma(card);
-			card->silent--;
-		} else {
-			snd_ps3_program_dma(card,
-				(underflow_occured) ?
-				SND_PS3_DMA_FILLTYPE_FIRSTFILL :
-				SND_PS3_DMA_FILLTYPE_RUNNING);
-			snd_ps3_kick_dma(card);
-			snd_pcm_period_elapsed(card->substream);
-		}
-	} else if (port_intr & PS3_AUDIO_AX_IE_ASOBUIE(0)) {
-		write_reg(PS3_AUDIO_AX_IS, PS3_AUDIO_AX_IE_ASOBUIE(0));
-		/*
-		 * serial out underflow, but buffer empty not detected.
-		 * in this case, fill fifo with 0 to recover.  After
-		 * filling dummy data, serial automatically start to
-		 * consume them and then will generate normal buffer
-		 * empty interrupts.
-		 * If both buffer underflow and buffer empty are occured,
-		 * it is better to do nomal data transfer than empty one
-		 */
-		snd_ps3_program_dma(card,
-				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-		snd_ps3_kick_dma(card);
-		snd_ps3_program_dma(card,
-				    SND_PS3_DMA_FILLTYPE_SILENT_FIRSTFILL);
-		snd_ps3_kick_dma(card);
-	}
-	/* clear interrupt cause */
-	return IRQ_HANDLED;
-};
-
-/*
  * module/subsystem initialize/terminate
  */
 static int __init snd_ps3_init(void)
@@ -1195,10 +1145,15 @@ static int __init snd_ps3_init(void)
 
 	return ret;
 }
+module_init(snd_ps3_init);
 
 static void __exit snd_ps3_exit(void)
 {
 	ps3_system_bus_driver_unregister(&snd_ps3_bus_driver_info);
 }
+module_exit(snd_ps3_exit);
 
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("PS3 sound driver");
+MODULE_AUTHOR("Sony Computer Entertainment Inc.");
 MODULE_ALIAS(PS3_MODULE_ALIAS_SOUND);
-- 
cgit v0.10.2


From 2233123f2776f29d3ac31df1fd1dc40c44d337a5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 10 Jun 2009 16:39:03 +0200
Subject: ALSA: sound/ps3: Correct existing and add missing annotations

probe functions should be __devinit

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index cd9109a..53c81a5 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -785,7 +785,7 @@ static struct snd_pcm_ops snd_ps3_pcm_spdif_ops = {
 };
 
 
-static int snd_ps3_map_mmio(void)
+static int __devinit snd_ps3_map_mmio(void)
 {
 	the_card.mapped_mmio_vaddr =
 		ioremap(the_card.ps3_dev->m_region->bus_addr,
@@ -807,7 +807,7 @@ static void snd_ps3_unmap_mmio(void)
 	the_card.mapped_mmio_vaddr = NULL;
 }
 
-static int snd_ps3_allocate_irq(void)
+static int __devinit snd_ps3_allocate_irq(void)
 {
 	int ret;
 	u64 lpar_addr, lpar_size;
@@ -865,7 +865,7 @@ static void snd_ps3_free_irq(void)
 	ps3_irq_plug_destroy(the_card.irq_no);
 }
 
-static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
+static void __devinit snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 {
 	uint64_t val;
 	int ret;
@@ -881,7 +881,7 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 			ret);
 }
 
-static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
+static void __devinit snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
 {
 	/*
 	 * avsetting driver seems to never change the followings
@@ -905,7 +905,7 @@ static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
 		   PS3_AUDIO_AO_3WMCTRL_ASOPLRCK_DEFAULT);
 }
 
-static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
+static int __devinit snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
 {
 	int ret;
 	pr_debug("%s: start\n", __func__);
@@ -927,7 +927,7 @@ static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
 	return ret;
 }
 
-static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
+static int __devinit snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 {
 	int i, ret;
 	u64 lpar_addr, lpar_size;
-- 
cgit v0.10.2


From bd2b5b12849a3446abad0b25e920f86f5480b309 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 13:40:57 +0200
Subject: perf_counter: More aggressive frequency adjustment

Also employ the overflow handler to adjust the frequency, this results
in a stable frequency in about 40~50 samples, instead of that many ticks.

This also means we can start sampling at a sample period of 1 without
running head-first into the throttle.

It relies on sched_clock() to accurately measure the time difference
between the overflow NMIs.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 49f2585..240ca56 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -696,10 +696,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (!attr->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
-	if (!hwc->sample_period)
+	if (!hwc->sample_period) {
 		hwc->sample_period = x86_pmu.max_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
 
-	atomic64_set(&hwc->period_left, hwc->sample_period);
 	counter->destroy = hw_perf_counter_destroy;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 3586df8..282d8cc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -371,6 +371,7 @@ struct hw_perf_counter {
 
 	u64				freq_count;
 	u64				freq_interrupts;
+	u64				freq_stamp;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5eacaaf..51c571e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1184,13 +1184,33 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 static void perf_log_throttle(struct perf_counter *counter, int enable);
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
-static void perf_adjust_freq(struct perf_counter_context *ctx)
+static void perf_adjust_period(struct perf_counter *counter, u64 events)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 period, sample_period;
+	s64 delta;
+
+	events *= hwc->sample_period;
+	period = div64_u64(events, counter->attr.sample_freq);
+
+	delta = (s64)(period - hwc->sample_period);
+	delta = (delta + 7) / 8; /* low pass filter */
+
+	sample_period = hwc->sample_period + delta;
+
+	if (!sample_period)
+		sample_period = 1;
+
+	perf_log_period(counter, sample_period);
+
+	hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
-	u64 interrupts, sample_period;
-	u64 events, period, freq;
-	s64 delta;
+	u64 interrupts, freq;
 
 	spin_lock(&ctx->lock);
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -1202,6 +1222,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		interrupts = hwc->interrupts;
 		hwc->interrupts = 0;
 
+		/*
+		 * unthrottle counters on the tick
+		 */
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(counter, 1);
 			counter->pmu->unthrottle(counter);
@@ -1211,6 +1234,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (!counter->attr.freq || !counter->attr.sample_freq)
 			continue;
 
+		/*
+		 * if the specified freq < HZ then we need to skip ticks
+		 */
 		if (counter->attr.sample_freq < HZ) {
 			freq = counter->attr.sample_freq;
 
@@ -1226,20 +1252,20 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		} else
 			freq = HZ;
 
-		events = freq * interrupts * hwc->sample_period;
-		period = div64_u64(events, counter->attr.sample_freq);
-
-		delta = (s64)(1 + period - hwc->sample_period);
-		delta >>= 1;
-
-		sample_period = hwc->sample_period + delta;
-
-		if (!sample_period)
-			sample_period = 1;
+		perf_adjust_period(counter, freq * interrupts);
 
-		perf_log_period(counter, sample_period);
-
-		hwc->sample_period = sample_period;
+		/*
+		 * In order to avoid being stalled by an (accidental) huge
+		 * sample period, force reset the sample period if we didn't
+		 * get any events in this freq period.
+		 */
+		if (!interrupts) {
+			perf_disable();
+			counter->pmu->disable(counter);
+			atomic_set(&hwc->period_left, 0);
+			counter->pmu->enable(counter);
+			perf_enable();
+		}
 	}
 	spin_unlock(&ctx->lock);
 }
@@ -1279,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = curr->perf_counter_ctxp;
 
-	perf_adjust_freq(&cpuctx->ctx);
+	perf_ctx_adjust_freq(&cpuctx->ctx);
 	if (ctx)
-		perf_adjust_freq(ctx);
+		perf_ctx_adjust_freq(ctx);
 
 	perf_counter_cpu_sched_out(cpuctx);
 	if (ctx)
@@ -1647,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
 		counter->attr.sample_freq = value;
 	} else {
+		perf_log_period(counter, value);
+
 		counter->attr.sample_period = value;
 		counter->hw.sample_period = value;
-
-		perf_log_period(counter, value);
 	}
 unlock:
 	spin_unlock_irq(&ctx->lock);
@@ -2853,35 +2879,41 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
  * event flow.
  */
 
+struct freq_event {
+	struct perf_event_header	header;
+	u64				time;
+	u64				id;
+	u64				period;
+};
+
 static void perf_log_period(struct perf_counter *counter, u64 period)
 {
 	struct perf_output_handle handle;
+	struct freq_event event;
 	int ret;
 
-	struct {
-		struct perf_event_header	header;
-		u64				time;
-		u64				id;
-		u64				period;
-	} freq_event = {
+	if (counter->hw.sample_period == period)
+		return;
+
+	if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
+		return;
+
+	event = (struct freq_event) {
 		.header = {
 			.type = PERF_EVENT_PERIOD,
 			.misc = 0,
-			.size = sizeof(freq_event),
+			.size = sizeof(event),
 		},
 		.time = sched_clock(),
 		.id = counter->id,
 		.period = period,
 	};
 
-	if (counter->hw.sample_period == period)
-		return;
-
-	ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
+	ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
 	if (ret)
 		return;
 
-	perf_output_put(&handle, freq_event);
+	perf_output_put(&handle, event);
 	perf_output_end(&handle);
 }
 
@@ -2923,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter,
 {
 	int events = atomic_read(&counter->event_limit);
 	int throttle = counter->pmu->unthrottle != NULL;
+	struct hw_perf_counter *hwc = &counter->hw;
 	int ret = 0;
 
 	if (!throttle) {
-		counter->hw.interrupts++;
+		hwc->interrupts++;
 	} else {
-		if (counter->hw.interrupts != MAX_INTERRUPTS) {
-			counter->hw.interrupts++;
-			if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
-				counter->hw.interrupts = MAX_INTERRUPTS;
+		if (hwc->interrupts != MAX_INTERRUPTS) {
+			hwc->interrupts++;
+			if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
+				hwc->interrupts = MAX_INTERRUPTS;
 				perf_log_throttle(counter, 0);
 				ret = 1;
 			}
@@ -2945,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter,
 		}
 	}
 
+	if (counter->attr.freq) {
+		u64 now = sched_clock();
+		s64 delta = now - hwc->freq_stamp;
+
+		hwc->freq_stamp = now;
+
+		if (delta > 0 && delta < TICK_NSEC)
+			perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
+	}
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -3379,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
-	counter->hw.sample_period = counter->attr.sample_period;
 
 	return &perf_ops_generic;
 }
@@ -3483,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	pmu = NULL;
 
 	hwc = &counter->hw;
+	hwc->sample_period = attr->sample_period;
 	if (attr->freq && attr->sample_freq)
-		hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq);
-	else
-		hwc->sample_period = attr->sample_period;
+		hwc->sample_period = 1;
+
+	atomic64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
 	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
@@ -3687,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter,
 	else
 		child_counter->state = PERF_COUNTER_STATE_OFF;
 
+	if (parent_counter->attr.freq)
+		child_counter->hw.sample_period = parent_counter->hw.sample_period;
+
 	/*
 	 * Link it up in the child's context:
 	 */
-- 
cgit v0.10.2


From 4502d77c1d8f15f20c04b92cb96c12d4e465de29 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 15:03:06 +0200
Subject: perf_counter tools: Small frequency related fixes

Create the counter in a disabled state and only enable it after we
mmap() the buffer, this allows us to see the first few samples (and
observe the frequency ramp).

Furthermore, print the period in the verbose report.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index deaee42..a5698ad 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -347,6 +347,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	attr->mmap		= track;
 	attr->comm		= track;
 	attr->inherit		= (cpu < 0) && inherit;
+	attr->disabled		= 1;
 
 	track = 0; /* only the first counter needs these */
 
@@ -402,6 +403,8 @@ try_again:
 		error("failed to mmap with %d (%s)\n", errno, strerror(errno));
 		exit(-1);
 	}
+
+	ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
 }
 
 static void open_counters(int cpu, pid_t pid)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0b18cb9..9a0e31e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,6 +47,7 @@ struct ip_event {
 	struct perf_event_header header;
 	__u64 ip;
 	__u32 pid, tid;
+	__u64 period;
 };
 
 struct mmap_event {
@@ -943,12 +944,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	uint64_t ip = event->ip.ip;
 	struct map *map = NULL;
 
-	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
+	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
 		event->ip.pid,
-		(void *)(long)ip);
+		(void *)(long)ip,
+		(long long)event->ip.period);
 
 	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-- 
cgit v0.10.2


From f7b7c26e01e51fe46097e11f179dc71ce7950084 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 15:55:59 +0200
Subject: perf_counter tools: Propagate signals properly

Currently report and stat catch SIGINT (and others) without altering
their exit state. This means that things like:

   while :; do perf stat ./foo ; done

Loops become hard-to-interrupt, because bash never sees perf terminate
due to interruption. Fix this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a5698ad..c10553c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -169,10 +169,21 @@ static void mmap_read(struct mmap_data *md)
 }
 
 static volatile int done = 0;
+static volatile int signr = -1;
 
 static void sig_handler(int sig)
 {
 	done = 1;
+	signr = sig;
+}
+
+static void sig_atexit(void)
+{
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	kill(getpid(), signr);
 }
 
 static void pid_synthesize_comm_event(pid_t pid, int full)
@@ -459,6 +470,7 @@ static int __cmd_record(int argc, const char **argv)
 	} else for (i = 0; i < nr_cpus; i++)
 		open_counters(i, target_pid);
 
+	atexit(sig_atexit);
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8085509..6404906 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -296,8 +296,20 @@ static int do_perf_stat(int argc, const char **argv)
 	return 0;
 }
 
+static volatile int signr = -1;
+
 static void skip_signal(int signo)
 {
+	signr = signo;
+}
+
+static void sig_atexit(void)
+{
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	kill(getpid(), signr);
 }
 
 static const char * const stat_usage[] = {
@@ -345,6 +357,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 	 * What we want is for Ctrl-C to work in the exec()-ed
 	 * task, but being ignored by perf stat itself:
 	 */
+	atexit(sig_atexit);
 	signal(SIGINT,  skip_signal);
 	signal(SIGALRM, skip_signal);
 	signal(SIGABRT, skip_signal);
-- 
cgit v0.10.2


From 58a09b38cfcd700b796ea07ae3d2e0efbb28b561 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Wed, 27 May 2009 15:04:43 +0800
Subject: [libata] ahci: Restore SB600 SATA controller 64 bit DMA

Community reported one SB600 SATA issue(BZ #9412), which led to 64 bit
DMA disablement for all SB600 revisions by driver maintainers with
commits c7a42156d99bcea7f8173ba7a6034bbaa2ecb77c and
4cde32fc4b32e96a99063af3183acdfd54c563f0.

But the root cause is ASUS M2A-VM system BIOS bug in old revisions
like 0901, while forcing into 32bit DMA happens to work as workaround.
Now it's time to withdraw 4cde32fc4b32e96a99063af3183acdfd54c563f0
so as to restore the SB600 SATA 64bit DMA capability.
This patch is also adding the workaround for M2A-VM old BIOS revisions,
but users are suggested to upgrade their system BIOS to the latest one
if they meet this issue.

Signed-off-by: Shane Huang <shane.huang@amd.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2141a31..15a2303 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -431,8 +431,7 @@ static const struct ata_port_info ahci_port_info[] = {
 	[board_ahci_sb600] =
 	{
 		AHCI_HFLAGS	(AHCI_HFLAG_IGN_SERR_INTERNAL |
-				 AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
-				 AHCI_HFLAG_SECT255),
+				 AHCI_HFLAG_NO_MSI | AHCI_HFLAG_SECT255),
 		.flags		= AHCI_FLAG_COMMON,
 		.pio_mask	= ATA_PIO4,
 		.udma_mask	= ATA_UDMA6,
@@ -2585,6 +2584,51 @@ static void ahci_p5wdh_workaround(struct ata_host *host)
 	}
 }
 
+/*
+ * SB600 ahci controller on ASUS M2A-VM can't do 64bit DMA with older
+ * BIOS.  The oldest version known to be broken is 0901 and working is
+ * 1501 which was released on 2007-10-26.  Force 32bit DMA on anything
+ * older than 1501.  Please read bko#9412 for more info.
+ */
+static bool ahci_asus_m2a_vm_32bit_only(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		{
+			.ident = "ASUS M2A-VM",
+			.matches = {
+				DMI_MATCH(DMI_BOARD_VENDOR,
+					  "ASUSTeK Computer INC."),
+				DMI_MATCH(DMI_BOARD_NAME, "M2A-VM"),
+			},
+		},
+		{ }
+	};
+	const char *cutoff_mmdd = "10/26";
+	const char *date;
+	int year;
+
+	if (pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x12, 0) ||
+	    !dmi_check_system(sysids))
+		return false;
+
+	/*
+	 * Argh.... both version and date are free form strings.
+	 * Let's hope they're using the same date format across
+	 * different versions.
+	 */
+	date = dmi_get_system_info(DMI_BIOS_DATE);
+	year = dmi_get_year(DMI_BIOS_DATE);
+	if (date && strlen(date) >= 10 && date[2] == '/' && date[5] == '/' &&
+	    (year > 2007 ||
+	     (year == 2007 && strncmp(date, cutoff_mmdd, 5) >= 0)))
+		return false;
+
+	dev_printk(KERN_WARNING, &pdev->dev, "ASUS M2A-VM: BIOS too old, "
+		   "forcing 32bit DMA, update BIOS\n");
+
+	return true;
+}
+
 static bool ahci_broken_system_poweroff(struct pci_dev *pdev)
 {
 	static const struct dmi_system_id broken_systems[] = {
@@ -2745,6 +2789,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (board_id == board_ahci_sb700 && pdev->revision >= 0x40)
 		hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL;
 
+	/* apply ASUS M2A_VM quirk */
+	if (ahci_asus_m2a_vm_32bit_only(pdev))
+		hpriv->flags |= AHCI_HFLAG_32BIT_ONLY;
+
 	if (!(hpriv->flags & AHCI_HFLAG_NO_MSI))
 		pci_enable_msi(pdev);
 
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 5f1b540..24c84ae 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -596,6 +596,7 @@ int dmi_get_year(int field)
 
 	return year;
 }
+EXPORT_SYMBOL(dmi_get_year);
 
 /**
  *	dmi_walk - Walk the DMI table and get called back for every record
-- 
cgit v0.10.2


From 7f4774b38ee6270bbc6c3015cb3fa6c415ffb340 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 10 Jun 2009 16:29:07 +0900
Subject: sata_nv: use hardreset only for post-boot probing

When I thought it was finally defeated, it came back with vengeance.
The failure cases are ever more convoluted.  Now there is a single
combination which fails boot probing - MCP5x + Intel SSD and there are
two hotplug failure reports on different flavors where softreset fails
to bring up the device.

Through the many bug reports after the switch to hardreset, the
following patterns emerged.

- Softreset during boot always works.

- Hardreset during boot sometimes fails to bring up the link on
  certain comibnations and device signature acquisition is unreliable.

- Hardreset is often necessary after hotplug.

It looks like the old behavior of preferring softreset was somehow
pretty close to the working reset protocol although it could have lost
a device during phy error handling by issuing hardreset.

This patch implements nv_hardreset() which kicks in only for post-boot
(!LOADING) device probing resets.  This should be able to work around
all known problem cases.  This isn't perfect but given the various
hardreset quirks on these controllers, I think this is as good as it
can get.

Tested on mcp5x (swncq), nf3 and ck804 for all both boot, warm and
hot probing cases.

Kudos to all the bug reporters and their painful hours with these damn
controllers.  ;-)

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Robert Hancock <hancockr@shaw.ca>
Reported-by: David Lang <david@lang.hm>
Reported-by: Samo Vodopivec <lament.email.si@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 6cda12b..b2d11f3 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -305,8 +305,8 @@ static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance);
 static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
 static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
-static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class,
-				   unsigned long deadline);
+static int nv_hardreset(struct ata_link *link, unsigned int *class,
+			unsigned long deadline);
 static void nv_nf2_freeze(struct ata_port *ap);
 static void nv_nf2_thaw(struct ata_port *ap);
 static void nv_ck804_freeze(struct ata_port *ap);
@@ -406,49 +406,82 @@ static struct scsi_host_template nv_swncq_sht = {
 	.slave_configure	= nv_swncq_slave_config,
 };
 
-static struct ata_port_operations nv_common_ops = {
+/*
+ * NV SATA controllers have various different problems with hardreset
+ * protocol depending on the specific controller and device.
+ *
+ * GENERIC:
+ *
+ *  bko11195 reports that link doesn't come online after hardreset on
+ *  generic nv's and there have been several other similar reports on
+ *  linux-ide.
+ *
+ *  bko12351#c23 reports that warmplug on MCP61 doesn't work with
+ *  softreset.
+ *
+ * NF2/3:
+ *
+ *  bko3352 reports nf2/3 controllers can't determine device signature
+ *  reliably after hardreset.  The following thread reports detection
+ *  failure on cold boot with the standard debouncing timing.
+ *
+ *  http://thread.gmane.org/gmane.linux.ide/34098
+ *
+ *  bko12176 reports that hardreset fails to bring up the link during
+ *  boot on nf2.
+ *
+ * CK804:
+ *
+ *  For initial probing after boot and hot plugging, hardreset mostly
+ *  works fine on CK804 but curiously, reprobing on the initial port
+ *  by rescanning or rmmod/insmod fails to acquire the initial D2H Reg
+ *  FIS in somewhat undeterministic way.
+ *
+ * SWNCQ:
+ *
+ *  bko12351 reports that when SWNCQ is enabled, for hotplug to work,
+ *  hardreset should be used and hardreset can't report proper
+ *  signature, which suggests that mcp5x is closer to nf2 as long as
+ *  reset quirkiness is concerned.
+ *
+ *  bko12703 reports that boot probing fails for intel SSD with
+ *  hardreset.  Link fails to come online.  Softreset works fine.
+ *
+ * The failures are varied but the following patterns seem true for
+ * all flavors.
+ *
+ * - Softreset during boot always works.
+ *
+ * - Hardreset during boot sometimes fails to bring up the link on
+ *   certain comibnations and device signature acquisition is
+ *   unreliable.
+ *
+ * - Hardreset is often necessary after hotplug.
+ *
+ * So, preferring softreset for boot probing and error handling (as
+ * hardreset might bring down the link) but using hardreset for
+ * post-boot probing should work around the above issues in most
+ * cases.  Define nv_hardreset() which only kicks in for post-boot
+ * probing and use it for all variants.
+ */
+static struct ata_port_operations nv_generic_ops = {
 	.inherits		= &ata_bmdma_port_ops,
 	.lost_interrupt		= ATA_OP_NULL,
 	.scr_read		= nv_scr_read,
 	.scr_write		= nv_scr_write,
+	.hardreset		= nv_hardreset,
 };
 
-/* OSDL bz11195 reports that link doesn't come online after hardreset
- * on generic nv's and there have been several other similar reports
- * on linux-ide.  Disable hardreset for generic nv's.
- */
-static struct ata_port_operations nv_generic_ops = {
-	.inherits		= &nv_common_ops,
-	.hardreset		= ATA_OP_NULL,
-};
-
-/* nf2 is ripe with hardreset related problems.
- *
- * kernel bz#3352 reports nf2/3 controllers can't determine device
- * signature reliably.  The following thread reports detection failure
- * on cold boot with the standard debouncing timing.
- *
- * http://thread.gmane.org/gmane.linux.ide/34098
- *
- * And bz#12176 reports that hardreset simply doesn't work on nf2.
- * Give up on it and just don't do hardreset.
- */
 static struct ata_port_operations nv_nf2_ops = {
 	.inherits		= &nv_generic_ops,
 	.freeze			= nv_nf2_freeze,
 	.thaw			= nv_nf2_thaw,
 };
 
-/* For initial probing after boot and hot plugging, hardreset mostly
- * works fine on CK804 but curiously, reprobing on the initial port by
- * rescanning or rmmod/insmod fails to acquire the initial D2H Reg FIS
- * in somewhat undeterministic way.  Use noclassify hardreset.
- */
 static struct ata_port_operations nv_ck804_ops = {
-	.inherits		= &nv_common_ops,
+	.inherits		= &nv_generic_ops,
 	.freeze			= nv_ck804_freeze,
 	.thaw			= nv_ck804_thaw,
-	.hardreset		= nv_noclassify_hardreset,
 	.host_stop		= nv_ck804_host_stop,
 };
 
@@ -476,19 +509,8 @@ static struct ata_port_operations nv_adma_ops = {
 	.host_stop		= nv_adma_host_stop,
 };
 
-/* Kernel bz#12351 reports that when SWNCQ is enabled, for hotplug to
- * work, hardreset should be used and hardreset can't report proper
- * signature, which suggests that mcp5x is closer to nf2 as long as
- * reset quirkiness is concerned.  Define separate ops for mcp5x with
- * nv_noclassify_hardreset().
- */
-static struct ata_port_operations nv_mcp5x_ops = {
-	.inherits		= &nv_common_ops,
-	.hardreset		= nv_noclassify_hardreset,
-};
-
 static struct ata_port_operations nv_swncq_ops = {
-	.inherits		= &nv_mcp5x_ops,
+	.inherits		= &nv_generic_ops,
 
 	.qc_defer		= ata_std_qc_defer,
 	.qc_prep		= nv_swncq_qc_prep,
@@ -557,7 +579,7 @@ static const struct ata_port_info nv_port_info[] = {
 		.pio_mask	= NV_PIO_MASK,
 		.mwdma_mask	= NV_MWDMA_MASK,
 		.udma_mask	= NV_UDMA_MASK,
-		.port_ops	= &nv_mcp5x_ops,
+		.port_ops	= &nv_generic_ops,
 		.private_data	= NV_PI_PRIV(nv_generic_interrupt, &nv_sht),
 	},
 	/* SWNCQ */
@@ -1559,15 +1581,24 @@ static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 	return 0;
 }
 
-static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class,
-				   unsigned long deadline)
+static int nv_hardreset(struct ata_link *link, unsigned int *class,
+			unsigned long deadline)
 {
-	bool online;
-	int rc;
+	struct ata_eh_context *ehc = &link->eh_context;
 
-	rc = sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
-				 &online, NULL);
-	return online ? -EAGAIN : rc;
+	/* Do hardreset iff it's post-boot probing, please read the
+	 * comment above port ops for details.
+	 */
+	if (!(link->ap->pflags & ATA_PFLAG_LOADING) &&
+	    !ata_dev_enabled(link->device))
+		sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
+				    NULL, NULL);
+	else if (!(ehc->i.flags & ATA_EHI_QUIET))
+		ata_link_printk(link, KERN_INFO,
+				"nv: skipping hardreset on occupied port\n");
+
+	/* device signature acquisition is unreliable */
+	return -EAGAIN;
 }
 
 static void nv_nf2_freeze(struct ata_port *ap)
-- 
cgit v0.10.2


From 517d3cc15b36392e518abab6bacbb72089658313 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Wed, 13 May 2009 15:02:42 +0100
Subject: [libata] ata_piix: Enable parallel scan

This patch turns on parallel scanning for the ata_piix driver.
This driver is used on most netbooks (no AHCI for cheap storage it seems).
The scan is the dominating time factor in the kernel boot for these
devices; with this flag it gets cut in half for the device I used
for testing (eeepc).
Alan took a look at the driver source and concluded that it ought to be safe
to do for this driver.  Alan has also checked with the hardware team.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 6e165bf..d0a14cf 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1589,6 +1589,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
 		host->ports[1]->mwdma_mask = 0;
 		host->ports[1]->udma_mask = 0;
 	}
+	host->flags |= ATA_HOST_PARALLEL_SCAN;
 
 	pci_set_master(pdev);
 	return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht);
-- 
cgit v0.10.2


From ef14f0c1578dce4b688726eb2603e50b62d6665a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Jun 2009 17:07:47 +0200
Subject: xfs: use generic Posix ACL code

This patch rips out the XFS ACL handling code and uses the generic
fs/posix_acl.c code instead.  The ondisk format is of course left
unchanged.

This also introduces the same ACL caching all other Linux filesystems do
by adding pointers to the acl and default acl in struct xfs_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 29228f5..480f281 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -39,6 +39,7 @@ config XFS_QUOTA
 config XFS_POSIX_ACL
 	bool "XFS POSIX ACL support"
 	depends on XFS_FS
+	select FS_POSIX_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
 	  groups beyond the owner/group/world scheme.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 222c59e..7a59dae 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
 endif
 
 xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL)	+= $(XFS_LINUX)/xfs_acl.o
 xfs-$(CONFIG_PROC_FS)		+= $(XFS_LINUX)/xfs_stats.o
 xfs-$(CONFIG_SYSCTL)		+= $(XFS_LINUX)/xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)		+= $(XFS_LINUX)/xfs_ioctl32.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
new file mode 100644
index 0000000..1e9d124
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+#define XFS_ACL_NOT_CACHED	((void *)-1)
+
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
+ *    protected by inode->i_lock.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+	struct posix_acl_entry *acl_e;
+	struct posix_acl *acl;
+	struct xfs_acl_entry *ace;
+	int count, i;
+
+	count = be32_to_cpu(aclp->acl_cnt);
+
+	acl = posix_acl_alloc(count, GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		acl_e = &acl->a_entries[i];
+		ace = &aclp->acl_entry[i];
+
+		/*
+		 * The tag is 32 bits on disk and 16 bits in core.
+		 *
+		 * Because every access to it goes through the core
+		 * format first this is not a problem.
+		 */
+		acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+		acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+		switch (acl_e->e_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			acl_e->e_id = be32_to_cpu(ace->ae_id);
+			break;
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			acl_e->e_id = ACL_UNDEFINED_ID;
+			break;
+		default:
+			goto fail;
+		}
+	}
+	return acl;
+
+fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+	const struct posix_acl_entry *acl_e;
+	struct xfs_acl_entry *ace;
+	int i;
+
+	aclp->acl_cnt = cpu_to_be32(acl->a_count);
+	for (i = 0; i < acl->a_count; i++) {
+		ace = &aclp->acl_entry[i];
+		acl_e = &acl->a_entries[i];
+
+		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+		ace->ae_id = cpu_to_be32(acl_e->e_id);
+		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+	}
+}
+
+/*
+ * Update the cached ACL pointer in the inode.
+ *
+ * Because we don't hold any locks while reading/writing the attribute
+ * from/to disk another thread could have raced and updated the cached
+ * ACL value before us. In that case we release the previous cached value
+ * and update it with our new value.
+ */
+STATIC void
+xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
+		struct posix_acl *acl)
+{
+	spin_lock(&inode->i_lock);
+	if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
+		posix_acl_release(*p_acl);
+	*p_acl = posix_acl_dup(acl);
+	spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl *acl = NULL, **p_acl;
+	struct xfs_acl *xfs_acl;
+	int len = sizeof(struct xfs_acl);
+	char *ea_name;
+	int error;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		ea_name = SGI_ACL_FILE;
+		p_acl = &ip->i_acl;
+		break;
+	case ACL_TYPE_DEFAULT:
+		ea_name = SGI_ACL_DEFAULT;
+		p_acl = &ip->i_default_acl;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	spin_lock(&inode->i_lock);
+	if (*p_acl != XFS_ACL_NOT_CACHED)
+		acl = posix_acl_dup(*p_acl);
+	spin_unlock(&inode->i_lock);
+
+	/*
+	 * If we have a cached ACLs value just return it, not need to
+	 * go out to the disk.
+	 */
+	if (acl)
+		return acl;
+
+	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+	if (!xfs_acl)
+		return ERR_PTR(-ENOMEM);
+
+	error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
+	if (error) {
+		/*
+		 * If the attribute doesn't exist make sure we have a negative
+		 * cache entry, for any other error assume it is transient and
+		 * leave the cache entry as XFS_ACL_NOT_CACHED.
+		 */
+		if (error == -ENOATTR) {
+			acl = NULL;
+			goto out_update_cache;
+		}
+		goto out;
+	}
+
+	acl = xfs_acl_from_disk(xfs_acl);
+	if (IS_ERR(acl))
+		goto out;
+
+ out_update_cache:
+	xfs_update_cached_acl(inode, p_acl, acl);
+ out:
+	kfree(xfs_acl);
+	return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl **p_acl;
+	char *ea_name;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		ea_name = SGI_ACL_FILE;
+		p_acl = &ip->i_acl;
+		break;
+	case ACL_TYPE_DEFAULT:
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		ea_name = SGI_ACL_DEFAULT;
+		p_acl = &ip->i_default_acl;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (acl) {
+		struct xfs_acl *xfs_acl;
+		int len;
+
+		xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+		if (!xfs_acl)
+			return -ENOMEM;
+
+		xfs_acl_to_disk(xfs_acl, acl);
+		len = sizeof(struct xfs_acl) -
+			(sizeof(struct xfs_acl_entry) *
+			 (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+		error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
+				len, ATTR_ROOT);
+
+		kfree(xfs_acl);
+	} else {
+		/*
+		 * A NULL ACL argument means we want to remove the ACL.
+		 */
+		error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+		/*
+		 * If the attribute didn't exist to start with that's fine.
+		 */
+		if (error == -ENOATTR)
+			error = 0;
+	}
+
+	if (!error)
+		xfs_update_cached_acl(inode, p_acl, acl);
+	return error;
+}
+
+int
+xfs_check_acl(struct inode *inode, int mask)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl *acl;
+	int error = -EAGAIN;
+
+	xfs_itrace_entry(ip);
+
+	/*
+	 * If there is no attribute fork no ACL exists on this inode and
+	 * we can skip the whole exercise.
+	 */
+	if (!XFS_IFORK_Q(ip))
+		return -EAGAIN;
+
+	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+	}
+
+	return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, mode_t mode)
+{
+	int error = 0;
+
+	if (mode != inode->i_mode) {
+		struct iattr iattr;
+
+		iattr.ia_valid = ATTR_MODE;
+		iattr.ia_mode = mode;
+
+		error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+	}
+
+	return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, char *name)
+{
+	int len = sizeof(struct xfs_acl);
+
+	return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+			    ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+	return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+	if (!S_ISDIR(inode->i_mode))
+		return 0;
+	return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+{
+	struct posix_acl *clone;
+	mode_t mode;
+	int error = 0, inherit = 0;
+
+	if (S_ISDIR(inode->i_mode)) {
+		error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+		if (error)
+			return error;
+	}
+
+	clone = posix_acl_clone(default_acl, GFP_KERNEL);
+	if (!clone)
+		return -ENOMEM;
+
+	mode = inode->i_mode;
+	error = posix_acl_create_masq(clone, &mode);
+	if (error < 0)
+		goto out_release_clone;
+
+	/*
+	 * If posix_acl_create_masq returns a positive value we need to
+	 * inherit a permission that can't be represented using the Unix
+	 * mode bits and we actually need to set an ACL.
+	 */
+	if (error > 0)
+		inherit = 1;
+
+	error = xfs_set_mode(inode, mode);
+	if (error)
+		goto out_release_clone;
+
+	if (inherit)
+		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ out_release_clone:
+	posix_acl_release(clone);
+	return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+	struct posix_acl *acl, *clone;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	posix_acl_release(acl);
+	if (!clone)
+		return -ENOMEM;
+
+	error = posix_acl_chmod_masq(clone, inode->i_mode);
+	if (!error)
+		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+	posix_acl_release(clone);
+	return error;
+}
+
+void
+xfs_inode_init_acls(struct xfs_inode *ip)
+{
+	/*
+	 * No need for locking, inode is not live yet.
+	 */
+	ip->i_acl = XFS_ACL_NOT_CACHED;
+	ip->i_default_acl = XFS_ACL_NOT_CACHED;
+}
+
+void
+xfs_inode_clear_acls(struct xfs_inode *ip)
+{
+	/*
+	 * No need for locking here, the inode is not live anymore
+	 * and just about to be freed.
+	 */
+	if (ip->i_acl != XFS_ACL_NOT_CACHED)
+		posix_acl_release(ip->i_acl);
+	if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
+		posix_acl_release(ip->i_default_acl);
+}
+
+
+/*
+ * System xattr handlers.
+ *
+ * Currently Posix ACLs are the only system namespace extended attribute
+ * handlers supported by XFS, so we just implement the handlers here.
+ * If we ever support other system extended attributes this will need
+ * some refactoring.
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+	if (strcmp(name, "posix_acl_access") == 0)
+		return ACL_TYPE_ACCESS;
+	else if (strcmp(name, "posix_acl_default") == 0)
+		return ACL_TYPE_DEFAULT;
+	return -EINVAL;
+}
+
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+		void *value, size_t size)
+{
+	struct posix_acl *acl;
+	int type, error;
+
+	type = xfs_decode_acl(name);
+	if (type < 0)
+		return type;
+
+	acl = xfs_get_acl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+
+	error = posix_acl_to_xattr(acl, value, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	struct posix_acl *acl = NULL;
+	int error = 0, type;
+
+	type = xfs_decode_acl(name);
+	if (type < 0)
+		return type;
+	if (flags & XATTR_CREATE)
+		return -EINVAL;
+	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+		return value ? -EACCES : 0;
+	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (!value)
+		goto set_acl;
+
+	acl = posix_acl_from_xattr(value, size);
+	if (!acl) {
+		/*
+		 * acl_set_file(3) may request that we set default ACLs with
+		 * zero length -- defend (gracefully) against that here.
+		 */
+		goto out;
+	}
+	if (IS_ERR(acl)) {
+		error = PTR_ERR(acl);
+		goto out;
+	}
+
+	error = posix_acl_valid(acl);
+	if (error)
+		goto out_release;
+
+	error = -EINVAL;
+	if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+		goto out_release;
+
+	if (type == ACL_TYPE_ACCESS) {
+		mode_t mode = inode->i_mode;
+		error = posix_acl_equiv_mode(acl, &mode);
+
+		if (error <= 0) {
+			posix_acl_release(acl);
+			acl = NULL;
+
+			if (error < 0)
+				return error;
+		}
+
+		error = xfs_set_mode(inode, mode);
+		if (error)
+			goto out_release;
+	}
+
+ set_acl:
+	error = xfs_set_acl(inode, type, acl);
+ out_release:
+	posix_acl_release(acl);
+ out:
+	return error;
+}
+
+struct xattr_handler xfs_xattr_system_handler = {
+	.prefix	= XATTR_SYSTEM_PREFIX,
+	.get	= xfs_xattr_system_get,
+	.set	= xfs_xattr_system_set,
+};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index c7d684f..182d4be 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,7 +41,6 @@
 #include "xfs_itable.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 631d013..84e391a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -51,6 +52,7 @@
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
+#include <linux/posix_acl.h>
 #include <linux/security.h>
 #include <linux/falloc.h>
 #include <linux/fiemap.h>
@@ -202,9 +204,8 @@ xfs_vn_mknod(
 {
 	struct inode	*inode;
 	struct xfs_inode *ip = NULL;
-	xfs_acl_t	*default_acl = NULL;
+	struct posix_acl *default_acl = NULL;
 	struct xfs_name	name;
-	int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
 	int		error;
 
 	/*
@@ -219,18 +220,14 @@ xfs_vn_mknod(
 		rdev = 0;
 	}
 
-	if (test_default_acl && test_default_acl(dir)) {
-		if (!_ACL_ALLOC(default_acl)) {
-			return -ENOMEM;
-		}
-		if (!_ACL_GET_DEFAULT(dir, default_acl)) {
-			_ACL_FREE(default_acl);
-			default_acl = NULL;
-		}
-	}
+	if (IS_POSIXACL(dir)) {
+		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+		if (IS_ERR(default_acl))
+			return -PTR_ERR(default_acl);
 
-	if (IS_POSIXACL(dir) && !default_acl)
-		mode &= ~current->fs->umask;
+		if (!default_acl)
+			mode &= ~current->fs->umask;
+	}
 
 	xfs_dentry_to_name(&name, dentry);
 	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
@@ -244,10 +241,10 @@ xfs_vn_mknod(
 		goto out_cleanup_inode;
 
 	if (default_acl) {
-		error = _ACL_INHERIT(inode, mode, default_acl);
+		error = -xfs_inherit_acl(inode, default_acl);
 		if (unlikely(error))
 			goto out_cleanup_inode;
-		_ACL_FREE(default_acl);
+		posix_acl_release(default_acl);
 	}
 
 
@@ -257,8 +254,7 @@ xfs_vn_mknod(
  out_cleanup_inode:
 	xfs_cleanup_inode(dir, inode, dentry);
  out_free_acl:
-	if (default_acl)
-		_ACL_FREE(default_acl);
+	posix_acl_release(default_acl);
 	return -error;
 }
 
@@ -488,26 +484,6 @@ xfs_vn_put_link(
 		kfree(s);
 }
 
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-xfs_check_acl(
-	struct inode		*inode,
-	int			mask)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	int			error;
-
-	xfs_itrace_entry(ip);
-
-	if (XFS_IFORK_Q(ip)) {
-		error = xfs_acl_iaccess(ip, mask, NULL);
-		if (error != -1)
-			return -error;
-	}
-
-	return -EAGAIN;
-}
-
 STATIC int
 xfs_vn_permission(
 	struct inode		*inode,
@@ -515,9 +491,6 @@ xfs_vn_permission(
 {
 	return generic_permission(inode, mask, xfs_check_acl);
 }
-#else
-#define xfs_vn_permission NULL
-#endif
 
 STATIC int
 xfs_vn_getattr(
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192..7078974 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d4e7ef8..36fb8a6 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,7 +43,6 @@
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -1735,18 +1734,8 @@ xfs_init_zones(void)
 	if (!xfs_ili_zone)
 		goto out_destroy_inode_zone;
 
-#ifdef CONFIG_XFS_POSIX_ACL
-	xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
-	if (!xfs_acl_zone)
-		goto out_destroy_ili_zone;
-#endif
-
 	return 0;
 
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
-	kmem_zone_destroy(xfs_ili_zone);
  out_destroy_inode_zone:
 	kmem_zone_destroy(xfs_inode_zone);
  out_destroy_efi_zone:
@@ -1780,9 +1769,6 @@ xfs_init_zones(void)
 STATIC void
 xfs_destroy_zones(void)
 {
-#ifdef CONFIG_XFS_POSIX_ACL
-	kmem_zone_destroy(xfs_acl_zone);
-#endif
 	kmem_zone_destroy(xfs_ili_zone);
 	kmem_zone_destroy(xfs_inode_zone);
 	kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 964621f..497c7fb 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,67 +29,6 @@
 #include <linux/xattr.h>
 
 
-/*
- * ACL handling.  Should eventually be moved into xfs_acl.c
- */
-
-static int
-xfs_decode_acl(const char *name)
-{
-	if (strcmp(name, "posix_acl_access") == 0)
-		return _ACL_TYPE_ACCESS;
-	else if (strcmp(name, "posix_acl_default") == 0)
-		return _ACL_TYPE_DEFAULT;
-	return -EINVAL;
-}
-
-/*
- * Get system extended attributes which at the moment only
- * includes Posix ACLs.
- */
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-		void *buffer, size_t size)
-{
-	int acl;
-
-	acl = xfs_decode_acl(name);
-	if (acl < 0)
-		return acl;
-
-	return xfs_acl_vget(inode, buffer, size, acl);
-}
-
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	int acl;
-
-	acl = xfs_decode_acl(name);
-	if (acl < 0)
-		return acl;
-	if (flags & XATTR_CREATE)
-		return -EINVAL;
-
-	if (!value)
-		return xfs_acl_vremove(inode, acl);
-
-	return xfs_acl_vset(inode, (void *)value, size, acl);
-}
-
-static struct xattr_handler xfs_xattr_system_handler = {
-	.prefix	= XATTR_SYSTEM_PREFIX,
-	.get	= xfs_xattr_system_get,
-	.set	= xfs_xattr_system_set,
-};
-
-
-/*
- * Real xattr handling.  The only difference between the namespaces is
- * a flag passed to the low-level attr code.
- */
-
 static int
 __xfs_xattr_get(struct inode *inode, const char *name,
 		void *value, size_t size, int xflags)
@@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = {
 	&xfs_xattr_user_handler,
 	&xfs_xattr_trusted_handler,
 	&xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
 	&xfs_xattr_system_handler,
+#endif
 	NULL
 };
 
@@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 	/*
 	 * Then add the two synthetic ACL attributes.
 	 */
-	if (xfs_acl_vhasacl_access(inode)) {
+	if (posix_acl_access_exists(inode)) {
 		error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
 				strlen(POSIX_ACL_XATTR_ACCESS) + 1,
 				data, size, &context.count);
@@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 			return error;
 	}
 
-	if (xfs_acl_vhasacl_default(inode)) {
+	if (posix_acl_default_exists(inode)) {
 		error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
 				strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
 				data, size, &context.count);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 4d6d051..2f3f2229 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1728f6a..d0d4a9a 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 22b7c8d..45b1bfe 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 56a5965..a534663 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 709f5f5..21b08c0 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 7126f85..4e4276b 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,7 +45,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index eafa7ab..97ac964 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
deleted file mode 100644
index a8cdd73..0000000
--- a/fs/xfs/xfs_acl.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/capability.h>
-#include <linux/posix_acl_xattr.h>
-
-STATIC int	xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
-STATIC void     xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void	xfs_acl_get_endian(xfs_acl_t *);
-STATIC int	xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int	xfs_acl_invalid(xfs_acl_t *);
-STATIC void	xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void	xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
-STATIC void	xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
-STATIC int	xfs_acl_allow_set(struct inode *, int);
-
-kmem_zone_t *xfs_acl_zone;
-
-
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
-	struct inode	*vp)
-{
-	int		error;
-
-	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
-	return (error == 0);
-}
-
-/*
- * Test for existence of default ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_default(
-	struct inode	*vp)
-{
-	int		error;
-
-	if (!S_ISDIR(vp->i_mode))
-		return 0;
-	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
-	return (error == 0);
-}
-
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
-	posix_acl_xattr_header	*src,
-	size_t			size,
-	xfs_acl_t		*dest)
-{
-	posix_acl_xattr_entry	*src_entry;
-	xfs_acl_entry_t		*dest_entry;
-	int			n;
-
-	if (!src || !dest)
-		return EINVAL;
-
-	if (size < sizeof(posix_acl_xattr_header))
-		return EINVAL;
-
-	if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
-		return EOPNOTSUPP;
-
-	memset(dest, 0, sizeof(xfs_acl_t));
-	dest->acl_cnt = posix_acl_xattr_count(size);
-	if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
-		return EINVAL;
-
-	/*
-	 * acl_set_file(3) may request that we set default ACLs with
-	 * zero length -- defend (gracefully) against that here.
-	 */
-	if (!dest->acl_cnt)
-		return 0;
-
-	src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
-	dest_entry = &dest->acl_entry[0];
-
-	for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
-		dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
-		if (_ACL_PERM_INVALID(dest_entry->ae_perm))
-			return EINVAL;
-		dest_entry->ae_tag  = le16_to_cpu(src_entry->e_tag);
-		switch(dest_entry->ae_tag) {
-		case ACL_USER:
-		case ACL_GROUP:
-			dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
-			break;
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			dest_entry->ae_id = ACL_UNDEFINED_ID;
-			break;
-		default:
-			return EINVAL;
-		}
-	}
-	if (xfs_acl_invalid(dest))
-		return EINVAL;
-
-	return 0;
-}
-
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
-	const void	*va,
-	const void	*vb)
-{
-	xfs_acl_entry_t	*a = (xfs_acl_entry_t *)va,
-			*b = (xfs_acl_entry_t *)vb;
-
-	if (a->ae_tag == b->ae_tag)
-		return (a->ae_id - b->ae_id);
-	return (a->ae_tag - b->ae_tag);
-}
-
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
-	xfs_acl_t		*src,
-	posix_acl_xattr_header	*dest,
-	size_t			size)
-{
-	int			n;
-	size_t			new_size = posix_acl_xattr_size(src->acl_cnt);
-	posix_acl_xattr_entry	*dest_entry;
-	xfs_acl_entry_t		*src_entry;
-
-	if (size < new_size)
-		return -ERANGE;
-
-	/* Need to sort src XFS ACL by <ae_tag,ae_id> */
-	xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
-		 xfs_acl_entry_compare);
-
-	dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
-	dest_entry = &dest->a_entries[0];
-	src_entry = &src->acl_entry[0];
-	for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
-		dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
-		if (_ACL_PERM_INVALID(src_entry->ae_perm))
-			return -EINVAL;
-		dest_entry->e_tag  = cpu_to_le16(src_entry->ae_tag);
-		switch (src_entry->ae_tag) {
-		case ACL_USER:
-		case ACL_GROUP:
-			dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
-				break;
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-	return new_size;
-}
-
-int
-xfs_acl_vget(
-	struct inode	*vp,
-	void		*acl,
-	size_t		size,
-	int		kind)
-{
-	int			error;
-	xfs_acl_t		*xfs_acl = NULL;
-	posix_acl_xattr_header	*ext_acl = acl;
-	int			flags = 0;
-
-	if(size) {
-		if (!(_ACL_ALLOC(xfs_acl))) {
-			error = ENOMEM;
-			goto out;
-		}
-		memset(xfs_acl, 0, sizeof(xfs_acl_t));
-	} else
-		flags = ATTR_KERNOVAL;
-
-	xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
-	if (error)
-		goto out;
-
-	if (!size) {
-		error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
-	} else {
-		if (xfs_acl_invalid(xfs_acl)) {
-			error = EINVAL;
-			goto out;
-		}
-		if (kind == _ACL_TYPE_ACCESS)
-			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
-		error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
-	}
-out:
-	if(xfs_acl)
-		_ACL_FREE(xfs_acl);
-	return -error;
-}
-
-int
-xfs_acl_vremove(
-	struct inode	*vp,
-	int		kind)
-{
-	int		error;
-
-	error = xfs_acl_allow_set(vp, kind);
-	if (!error) {
-		error = xfs_attr_remove(XFS_I(vp),
-						kind == _ACL_TYPE_DEFAULT?
-						SGI_ACL_DEFAULT: SGI_ACL_FILE,
-						ATTR_ROOT);
-		if (error == ENOATTR)
-			error = 0;	/* 'scool */
-	}
-	return -error;
-}
-
-int
-xfs_acl_vset(
-	struct inode		*vp,
-	void			*acl,
-	size_t			size,
-	int			kind)
-{
-	posix_acl_xattr_header	*ext_acl = acl;
-	xfs_acl_t		*xfs_acl;
-	int			error;
-	int			basicperms = 0; /* more than std unix perms? */
-
-	if (!acl)
-		return -EINVAL;
-
-	if (!(_ACL_ALLOC(xfs_acl)))
-		return -ENOMEM;
-
-	error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
-	if (error) {
-		_ACL_FREE(xfs_acl);
-		return -error;
-	}
-	if (!xfs_acl->acl_cnt) {
-		_ACL_FREE(xfs_acl);
-		return 0;
-	}
-
-	error = xfs_acl_allow_set(vp, kind);
-
-	/* Incoming ACL exists, set file mode based on its value */
-	if (!error && kind == _ACL_TYPE_ACCESS)
-		error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
-
-	if (error)
-		goto out;
-
-	/*
-	 * If we have more than std unix permissions, set up the actual attr.
-	 * Otherwise, delete any existing attr.  This prevents us from
-	 * having actual attrs for permissions that can be stored in the
-	 * standard permission bits.
-	 */
-	if (!basicperms) {
-		xfs_acl_set_attr(vp, xfs_acl, kind, &error);
-	} else {
-		error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-	}
-
-out:
-	_ACL_FREE(xfs_acl);
-	return -error;
-}
-
-int
-xfs_acl_iaccess(
-	xfs_inode_t	*ip,
-	mode_t		mode,
-	cred_t		*cr)
-{
-	xfs_acl_t	*acl;
-	int		rval;
-	struct xfs_name	acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
-
-	if (!(_ACL_ALLOC(acl)))
-		return -1;
-
-	/* If the file has no ACL return -1. */
-	rval = sizeof(xfs_acl_t);
-	if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
-		_ACL_FREE(acl);
-		return -1;
-	}
-	xfs_acl_get_endian(acl);
-
-	/* If the file has an empty ACL return -1. */
-	if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
-		_ACL_FREE(acl);
-		return -1;
-	}
-
-	/* Synchronize ACL with mode bits */
-	xfs_acl_sync_mode(ip->i_d.di_mode, acl);
-
-	rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
-	_ACL_FREE(acl);
-	return rval;
-}
-
-STATIC int
-xfs_acl_allow_set(
-	struct inode	*vp,
-	int		kind)
-{
-	if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
-		return EPERM;
-	if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
-		return ENOTDIR;
-	if (vp->i_sb->s_flags & MS_RDONLY)
-		return EROFS;
-	if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
-		return EPERM;
-	return 0;
-}
-
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- *       It is not used to find out the credentials uid or groups etc, as was
- *       done in IRIX. It is assumed that the uid and groups for the current
- *       thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
-	uid_t		fuid,
-	gid_t		fgid,
-	xfs_acl_t	*fap,
-	mode_t		md,
-	cred_t		*cr)
-{
-	xfs_acl_entry_t	matched;
-	int		i, allows;
-	int		maskallows = -1;	/* true, but not 1, either */
-	int		seen_userobj = 0;
-
-	matched.ae_tag = 0;	/* Invalid type */
-	matched.ae_perm = 0;
-
-	for (i = 0; i < fap->acl_cnt; i++) {
-		/*
-		 * Break out if we've got a user_obj entry or
-		 * a user entry and the mask (and have processed USER_OBJ)
-		 */
-		if (matched.ae_tag == ACL_USER_OBJ)
-			break;
-		if (matched.ae_tag == ACL_USER) {
-			if (maskallows != -1 && seen_userobj)
-				break;
-			if (fap->acl_entry[i].ae_tag != ACL_MASK &&
-			    fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
-				continue;
-		}
-		/* True if this entry allows the requested access */
-		allows = ((fap->acl_entry[i].ae_perm & md) == md);
-
-		switch (fap->acl_entry[i].ae_tag) {
-		case ACL_USER_OBJ:
-			seen_userobj = 1;
-			if (fuid != current_fsuid())
-				continue;
-			matched.ae_tag = ACL_USER_OBJ;
-			matched.ae_perm = allows;
-			break;
-		case ACL_USER:
-			if (fap->acl_entry[i].ae_id != current_fsuid())
-				continue;
-			matched.ae_tag = ACL_USER;
-			matched.ae_perm = allows;
-			break;
-		case ACL_GROUP_OBJ:
-			if ((matched.ae_tag == ACL_GROUP_OBJ ||
-			    matched.ae_tag == ACL_GROUP) && !allows)
-				continue;
-			if (!in_group_p(fgid))
-				continue;
-			matched.ae_tag = ACL_GROUP_OBJ;
-			matched.ae_perm = allows;
-			break;
-		case ACL_GROUP:
-			if ((matched.ae_tag == ACL_GROUP_OBJ ||
-			    matched.ae_tag == ACL_GROUP) && !allows)
-				continue;
-			if (!in_group_p(fap->acl_entry[i].ae_id))
-				continue;
-			matched.ae_tag = ACL_GROUP;
-			matched.ae_perm = allows;
-			break;
-		case ACL_MASK:
-			maskallows = allows;
-			break;
-		case ACL_OTHER:
-			if (matched.ae_tag != 0)
-				continue;
-			matched.ae_tag = ACL_OTHER;
-			matched.ae_perm = allows;
-			break;
-		}
-	}
-	/*
-	 * First possibility is that no matched entry allows access.
-	 * The capability to override DAC may exist, so check for it.
-	 */
-	switch (matched.ae_tag) {
-	case ACL_OTHER:
-	case ACL_USER_OBJ:
-		if (matched.ae_perm)
-			return 0;
-		break;
-	case ACL_USER:
-	case ACL_GROUP_OBJ:
-	case ACL_GROUP:
-		if (maskallows && matched.ae_perm)
-			return 0;
-		break;
-	case 0:
-		break;
-	}
-
-	/* EACCES tells generic_permission to check for capability overrides */
-	return EACCES;
-}
-
-/*
- * ACL validity checker.
- *   This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
-	xfs_acl_t	*aclp)
-{
-	xfs_acl_entry_t	*entry, *e;
-	int		user = 0, group = 0, other = 0, mask = 0;
-	int		mask_required = 0;
-	int		i, j;
-
-	if (!aclp)
-		goto acl_invalid;
-
-	if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
-		goto acl_invalid;
-
-	for (i = 0; i < aclp->acl_cnt; i++) {
-		entry = &aclp->acl_entry[i];
-		switch (entry->ae_tag) {
-		case ACL_USER_OBJ:
-			if (user++)
-				goto acl_invalid;
-			break;
-		case ACL_GROUP_OBJ:
-			if (group++)
-				goto acl_invalid;
-			break;
-		case ACL_OTHER:
-			if (other++)
-				goto acl_invalid;
-			break;
-		case ACL_USER:
-		case ACL_GROUP:
-			for (j = i + 1; j < aclp->acl_cnt; j++) {
-				e = &aclp->acl_entry[j];
-				if (e->ae_id == entry->ae_id &&
-				    e->ae_tag == entry->ae_tag)
-					goto acl_invalid;
-			}
-			mask_required++;
-			break;
-		case ACL_MASK:
-			if (mask++)
-				goto acl_invalid;
-			break;
-		default:
-			goto acl_invalid;
-		}
-	}
-	if (!user || !group || !other || (mask_required && !mask))
-		goto acl_invalid;
-	else
-		return 0;
-acl_invalid:
-	return EINVAL;
-}
-
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
-	xfs_acl_t	*aclp)
-{
-	xfs_acl_entry_t	*ace, *end;
-
-	INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-	end = &aclp->acl_entry[0]+aclp->acl_cnt;
-	for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
-		INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
-		INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
-		INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
-	}
-}
-
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
-	struct inode	*vp,
-	xfs_acl_t	*aclp,
-	int		kind,
-	int		flags,
-	int		*error)
-{
-	int		len = sizeof(xfs_acl_t);
-
-	ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
-	flags |= ATTR_ROOT;
-	*error = xfs_attr_get(XFS_I(vp),
-					kind == _ACL_TYPE_ACCESS ?
-					SGI_ACL_FILE : SGI_ACL_DEFAULT,
-					(char *)aclp, &len, flags);
-	if (*error || (flags & ATTR_KERNOVAL))
-		return;
-	xfs_acl_get_endian(aclp);
-}
-
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
-	struct inode	*vp,
-	xfs_acl_t	*aclp,
-	int		kind,
-	int		*error)
-{
-	xfs_acl_entry_t	*ace, *newace, *end;
-	xfs_acl_t	*newacl;
-	int		len;
-
-	if (!(_ACL_ALLOC(newacl))) {
-		*error = ENOMEM;
-		return;
-	}
-
-	len = sizeof(xfs_acl_t) -
-	      (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
-	end = &aclp->acl_entry[0]+aclp->acl_cnt;
-	for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
-	     ace < end;
-	     ace++, newace++) {
-		INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
-		INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
-		INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
-	}
-	INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-	*error = xfs_attr_set(XFS_I(vp),
-				kind == _ACL_TYPE_ACCESS ?
-				SGI_ACL_FILE: SGI_ACL_DEFAULT,
-				(char *)newacl, len, ATTR_ROOT);
-	_ACL_FREE(newacl);
-}
-
-int
-xfs_acl_vtoacl(
-	struct inode	*vp,
-	xfs_acl_t	*access_acl,
-	xfs_acl_t	*default_acl)
-{
-	int		error = 0;
-
-	if (access_acl) {
-		/*
-		 * Get the Access ACL and the mode.  If either cannot
-		 * be obtained for some reason, invalidate the access ACL.
-		 */
-		xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
-		if (error)
-			access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
-		else /* We have a good ACL and the file mode, synchronize. */
-			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
-	}
-
-	if (default_acl) {
-		xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
-		if (error)
-			default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
-	}
-	return error;
-}
-
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
-int
-xfs_acl_inherit(
-	struct inode	*vp,
-	mode_t		mode,
-	xfs_acl_t	*pdaclp)
-{
-	xfs_acl_t	*cacl;
-	int		error = 0;
-	int		basicperms = 0;
-
-	/*
-	 * If the parent does not have a default ACL, or it's an
-	 * invalid ACL, we're done.
-	 */
-	if (!vp)
-		return 0;
-	if (!pdaclp || xfs_acl_invalid(pdaclp))
-		return 0;
-
-	/*
-	 * Copy the default ACL of the containing directory to
-	 * the access ACL of the new file and use the mode that
-	 * was passed in to set up the correct initial values for
-	 * the u::,g::[m::], and o:: entries.  This is what makes
-	 * umask() "work" with ACL's.
-	 */
-
-	if (!(_ACL_ALLOC(cacl)))
-		return ENOMEM;
-
-	memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
-	xfs_acl_filter_mode(mode, cacl);
-	error = xfs_acl_setmode(vp, cacl, &basicperms);
-	if (error)
-		goto out_error;
-
-	/*
-	 * Set the Default and Access ACL on the file.  The mode is already
-	 * set on the file, so we don't need to worry about that.
-	 *
-	 * If the new file is a directory, its default ACL is a copy of
-	 * the containing directory's default ACL.
-	 */
-	if (S_ISDIR(vp->i_mode))
-		xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
-	if (!error && !basicperms)
-		xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
-out_error:
-	_ACL_FREE(cacl);
-	return error;
-}
-
-/*
- * Set up the correct mode on the file based on the supplied ACL.  This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL.  Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
-	struct inode	*vp,
-	xfs_acl_t	*acl,
-	int		*basicperms)
-{
-	struct iattr	iattr;
-	xfs_acl_entry_t	*ap;
-	xfs_acl_entry_t	*gap = NULL;
-	int		i, nomask = 1;
-
-	*basicperms = 1;
-
-	if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
-		return 0;
-
-	/*
-	 * Copy the u::, g::, o::, and m:: bits from the ACL into the
-	 * mode.  The m:: bits take precedence over the g:: bits.
-	 */
-	iattr.ia_valid = ATTR_MODE;
-	iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
-	iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
-	ap = acl->acl_entry;
-	for (i = 0; i < acl->acl_cnt; ++i) {
-		switch (ap->ae_tag) {
-		case ACL_USER_OBJ:
-			iattr.ia_mode |= ap->ae_perm << 6;
-			break;
-		case ACL_GROUP_OBJ:
-			gap = ap;
-			break;
-		case ACL_MASK:	/* more than just standard modes */
-			nomask = 0;
-			iattr.ia_mode |= ap->ae_perm << 3;
-			*basicperms = 0;
-			break;
-		case ACL_OTHER:
-			iattr.ia_mode |= ap->ae_perm;
-			break;
-		default:	/* more than just standard modes */
-			*basicperms = 0;
-			break;
-		}
-		ap++;
-	}
-
-	/* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
-	if (gap && nomask)
-		iattr.ia_mode |= gap->ae_perm << 3;
-
-	return xfs_setattr(XFS_I(vp), &iattr, 0);
-}
-
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change.  This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks.  Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- *         the existing mode bits will override whatever is in the
- *         ACL. Similarly, if there is a pre-existing ACL that was
- *         never in sync with its mode (owing to a bug in 6.5 and
- *         before), it will now magically (or mystically) be
- *         synchronized.  This could cause slight astonishment, but
- *         it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries.  These are treated as place holders when we fill
- * out the ACL.  This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
-	mode_t		mode,
-	xfs_acl_t	*acl)
-{
-	int		i, nomask = 1;
-	xfs_acl_entry_t	*ap;
-	xfs_acl_entry_t	*gap = NULL;
-
-	/*
-	 * Set ACL entries. POSIX1003.1eD16 requires that the MASK
-	 * be set instead of the GROUP entry, if there is a MASK.
-	 */
-	for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
-		switch (ap->ae_tag) {
-		case ACL_USER_OBJ:
-			ap->ae_perm = (mode >> 6) & 0x7;
-			break;
-		case ACL_GROUP_OBJ:
-			gap = ap;
-			break;
-		case ACL_MASK:
-			nomask = 0;
-			ap->ae_perm = (mode >> 3) & 0x7;
-			break;
-		case ACL_OTHER:
-			ap->ae_perm = mode & 0x7;
-			break;
-		default:
-			break;
-		}
-	}
-	/* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
-	if (gap && nomask)
-		gap->ae_perm = (mode >> 3) & 0x7;
-}
-
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
-	mode_t		mode,
-	xfs_acl_t	*acl)
-{
-	int		i, nomask = 1;
-	xfs_acl_entry_t	*ap;
-	xfs_acl_entry_t	*gap = NULL;
-
-	/*
-	 * Set ACL entries. POSIX1003.1eD16 requires that the MASK
-	 * be merged with GROUP entry, if there is a MASK.
-	 */
-	for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
-		switch (ap->ae_tag) {
-		case ACL_USER_OBJ:
-			ap->ae_perm &= (mode >> 6) & 0x7;
-			break;
-		case ACL_GROUP_OBJ:
-			gap = ap;
-			break;
-		case ACL_MASK:
-			nomask = 0;
-			ap->ae_perm &= (mode >> 3) & 0x7;
-			break;
-		case ACL_OTHER:
-			ap->ae_perm &= mode & 0x7;
-			break;
-		default:
-			break;
-		}
-	}
-	/* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
-	if (gap && nomask)
-		gap->ae_perm &= (mode >> 3) & 0x7;
-}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 642f1db..63dc1f2 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,81 +18,48 @@
 #ifndef __XFS_ACL_H__
 #define __XFS_ACL_H__
 
-/*
- * Access Control Lists
- */
-typedef __uint16_t	xfs_acl_perm_t;
-typedef __int32_t	xfs_acl_tag_t;
-typedef __int32_t	xfs_acl_id_t;
+struct inode;
+struct posix_acl;
+struct xfs_inode;
 
 #define XFS_ACL_MAX_ENTRIES 25
 #define XFS_ACL_NOT_PRESENT (-1)
 
-typedef struct xfs_acl_entry {
-	xfs_acl_tag_t	ae_tag;
-	xfs_acl_id_t	ae_id;
-	xfs_acl_perm_t	ae_perm;
-} xfs_acl_entry_t;
-
-typedef struct xfs_acl {
-	__int32_t	acl_cnt;
-	xfs_acl_entry_t	acl_entry[XFS_ACL_MAX_ENTRIES];
-} xfs_acl_t;
+/* On-disk XFS access control list structure */
+struct xfs_acl {
+	__be32		acl_cnt;
+	struct xfs_acl_entry {
+		__be32	ae_tag;
+		__be32	ae_id;
+		__be16	ae_perm;
+	} acl_entry[XFS_ACL_MAX_ENTRIES];
+};
 
 /* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE	"SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT	"SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE		"SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT		"SGI_ACL_DEFAULT"
 #define SGI_ACL_FILE_SIZE	(sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE	(sizeof(SGI_ACL_DEFAULT)-1)
 
-#define _ACL_TYPE_ACCESS	1
-#define _ACL_TYPE_DEFAULT	2
-
 #ifdef CONFIG_XFS_POSIX_ACL
+extern int xfs_check_acl(struct inode *inode, int mask);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
+extern int xfs_acl_chmod(struct inode *inode);
+extern void xfs_inode_init_acls(struct xfs_inode *ip);
+extern void xfs_inode_clear_acls(struct xfs_inode *ip);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
 
-struct vattr;
-struct xfs_inode;
-
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name)	\
-		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
-#define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
-
-extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct inode *);
-extern int xfs_acl_vhasacl_default(struct inode *);
-extern int xfs_acl_vset(struct inode *, void *, size_t, int);
-extern int xfs_acl_vget(struct inode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct inode *, int);
-
-#define _ACL_PERM_INVALID(perm)	((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-
-#define _ACL_INHERIT(c,m,d)	(xfs_acl_inherit(c,m,d))
-#define _ACL_GET_ACCESS(pv,pa)	(xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd)	(xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS	xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS	xfs_acl_vhasacl_default
-
-#define _ACL_ALLOC(a)		((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a)		((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
-
+extern struct xattr_handler xfs_xattr_system_handler;
 #else
-#define xfs_acl_zone_init(zone,name)
-#define xfs_acl_zone_destroy(zone)
-#define xfs_acl_vset(v,p,sz,t)	(-EOPNOTSUPP)
-#define xfs_acl_vget(v,p,sz,t)	(-EOPNOTSUPP)
-#define xfs_acl_vremove(v,t)	(-EOPNOTSUPP)
-#define xfs_acl_vhasacl_access(v)	(0)
-#define xfs_acl_vhasacl_default(v)	(0)
-#define _ACL_ALLOC(a)		(1)	/* successfully allocate nothing */
-#define _ACL_FREE(a)		((void)0)
-#define _ACL_INHERIT(c,m,d)	(0)
-#define _ACL_GET_ACCESS(pv,pa)	(0)
-#define _ACL_GET_DEFAULT(pv,pd)	(0)
-#define _ACL_ACCESS_EXISTS	(NULL)
-#define _ACL_DEFAULT_EXISTS	(NULL)
-#endif
-
+# define xfs_check_acl					NULL
+# define xfs_get_acl(inode, type)			NULL
+# define xfs_inherit_acl(inode, default_acl)		0
+# define xfs_acl_chmod(inode)				0
+# define xfs_inode_init_acls(ip)
+# define xfs_inode_clear_acls(ip)
+# define posix_acl_access_exists(inode)			0
+# define posix_acl_default_exists(inode)		0
+#endif /* CONFIG_XFS_POSIX_ACL */
 #endif	/* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 53d5e70..0902249 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 
 #endif	/* __KERNEL__ */
 
-/* do we need conversion? */
-#define ARCH_NOCONVERT 1
-#ifdef XFS_NATIVE_HOST
-# define ARCH_CONVERT	ARCH_NOCONVERT
-#else
-# define ARCH_CONVERT	0
-#endif
-
-/* generic swapping macros */
-
-#ifndef HAVE_SWABMACROS
-#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
-#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
-#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
-#endif
-
-#define INT_SWAP(type, var) \
-    ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
-    ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
-    ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
-    (var))))
-
 /*
  * get and set integers from potentially unaligned locations
  */
@@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 	((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
     }
 
-/* does not return a value */
-#define INT_SET(reference,arch,valueref) \
-    (__builtin_constant_p(valueref) ? \
-	(void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
-	(void)( \
-	    ((reference) = (valueref)), \
-	    ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
-	) \
-    )
-
 /*
  * In directories inode numbers are stored as unaligned arrays of unsigned
  * 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index cd1008b..db15feb 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -45,7 +45,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
-#include "xfs_acl.h"
 #include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 73e1c0d..76c540f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -18,6 +18,7 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
+#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -82,6 +83,7 @@ xfs_inode_alloc(
 	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 	ip->i_size = 0;
 	ip->i_new_size = 0;
+	xfs_inode_init_acls(ip);
 
 	/*
 	 * Initialize inode's trace buffers.
@@ -558,6 +560,7 @@ xfs_ireclaim(
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
+	xfs_inode_clear_acls(ip);
 	kmem_zone_free(xfs_inode_zone, ip);
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 123b20c..1f22d65 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,7 +49,6 @@
 #include "xfs_utils.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
-#include "xfs_acl.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f879c1b..7701670 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,6 +18,7 @@
 #ifndef	__XFS_INODE_H__
 #define	__XFS_INODE_H__
 
+struct posix_acl;
 struct xfs_dinode;
 struct xfs_inode;
 
@@ -272,6 +273,11 @@ typedef struct xfs_inode {
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 
+#ifdef CONFIG_XFS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
+
 	/* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
 	struct ktrace		*i_trace;	/* general inode trace */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index feb30a9..67ae5555 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 36f3a21..fea6861 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -41,7 +41,6 @@
 #include "xfs_ialloc.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
-#include "xfs_acl.h"
 #include "xfs_error.h"
 #include "xfs_buf_item.h"
 #include "xfs_rw.h"
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b56321b..c4eca5e 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -42,6 +42,7 @@
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_rw.h"
 #include "xfs_error.h"
@@ -467,8 +468,20 @@ xfs_setattr(
 	xfs_qm_dqrele(udqp);
 	xfs_qm_dqrele(gdqp);
 
-	if (code) {
+	if (code)
 		return code;
+
+	/*
+	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+	 * 	     update.  We could avoid this with linked transactions
+	 * 	     and passing down the transaction pointer all the way
+	 *	     to attr_set.  No previous user of the generic
+	 * 	     Posix ACL code seems to care about this issue either.
+	 */
+	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+		code = -xfs_acl_chmod(inode);
+		if (code)
+			return XFS_ERROR(code);
 	}
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 04373c6..a9e102d 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
 #define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
 
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip);
-- 
cgit v0.10.2


From 5c939df56c3ea018b58e5aa76181284c2053d699 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Wed, 27 May 2009 09:16:03 -0400
Subject: btrfs: Fix set/clear_extent_bit for 'end == (u64)-1'

There are some 'start = state->end + 1;' like code in set_extent_bit
and clear_extent_bit. They overflow when end == (u64)-1.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe9eb99..6826018 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -476,6 +476,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	struct extent_state *state;
 	struct extent_state *prealloc = NULL;
 	struct rb_node *node;
+	u64 last_end;
 	int err;
 	int set = 0;
 
@@ -498,6 +499,7 @@ again:
 	if (state->start > end)
 		goto out;
 	WARN_ON(state->end < start);
+	last_end = state->end;
 
 	/*
 	 *     | ---- desired range ---- |
@@ -524,9 +526,11 @@ again:
 		if (err)
 			goto out;
 		if (state->end <= end) {
-			start = state->end + 1;
 			set |= clear_state_bit(tree, state, bits,
 					wake, delete);
+			if (last_end == (u64)-1)
+				goto out;
+			start = last_end + 1;
 		} else {
 			start = state->start;
 		}
@@ -552,8 +556,10 @@ again:
 		goto out;
 	}
 
-	start = state->end + 1;
 	set |= clear_state_bit(tree, state, bits, wake, delete);
+	if (last_end == (u64)-1)
+		goto out;
+	start = last_end + 1;
 	goto search_again;
 
 out:
@@ -707,8 +713,10 @@ again:
 			goto out;
 		}
 		set_state_bits(tree, state, bits);
-		start = state->end + 1;
 		merge_state(tree, state);
+		if (last_end == (u64)-1)
+			goto out;
+		start = last_end + 1;
 		goto search_again;
 	}
 
@@ -742,8 +750,10 @@ again:
 			goto out;
 		if (state->end <= end) {
 			set_state_bits(tree, state, bits);
-			start = state->end + 1;
 			merge_state(tree, state);
+			if (last_end == (u64)-1)
+				goto out;
+			start = last_end + 1;
 		} else {
 			start = state->start;
 		}
-- 
cgit v0.10.2


From 5d4f98a28c7d334091c1b7744f48a1acdd2a4ae0 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Wed, 10 Jun 2009 10:45:14 -0400
Subject: Btrfs: Mixed back reference  (FORWARD ROLLING FORMAT CHANGE)

This commit introduces a new kind of back reference for btrfs metadata.
Once a filesystem has been mounted with this commit, IT WILL NO LONGER
BE MOUNTABLE BY OLDER KERNELS.

When a tree block in subvolume tree is cow'd, the reference counts of all
extents it points to are increased by one.  At transaction commit time,
the old root of the subvolume is recorded in a "dead root" data structure,
and the btree it points to is later walked, dropping reference counts
and freeing any blocks where the reference count goes to 0.

The increments done during cow and decrements done after commit cancel out,
and the walk is a very expensive way to go about freeing the blocks that
are no longer referenced by the new btree root.  This commit reduces the
transaction overhead by avoiding the need for dead root records.

When a non-shared tree block is cow'd, we free the old block at once, and the
new block inherits old block's references. When a tree block with reference
count > 1 is cow'd, we increase the reference counts of all extents
the new block points to by one, and decrease the old block's reference count by
one.

This dead tree avoidance code removes the need to modify the reference
counts of lower level extents when a non-shared tree block is cow'd.
But we still need to update back ref for all pointers in the block.
This is because the location of the block is recorded in the back ref
item.

We can solve this by introducing a new type of back ref. The new
back ref provides information about pointer's key, level and in which
tree the pointer lives. This information allow us to find the pointer
by searching the tree. The shortcoming of the new back ref is that it
only works for pointers in tree blocks referenced by their owner trees.

This is mostly a problem for snapshots, where resolving one of these
fuzzy back references would be O(number_of_snapshots) and quite slow.
The solution used here is to use the fuzzy back references in the common
case where a given tree block is only referenced by one root,
and use the full back references when multiple roots have a reference
on a given block.

This commit adds per subvolume red-black tree to keep trace of cached
inodes. The red-black tree helps the balancing code to find cached
inodes whose inode numbers within a given range.

This commit improves the balancing code by introducing several data
structures to keep the state of balancing. The most important one
is the back ref cache. It caches how the upper level tree blocks are
referenced. This greatly reduce the overhead of checking back ref.

The improved balancing code scales significantly better with a large
number of snapshots.

This is a very large commit and was written in a number of
pieces.  But, they depend heavily on the disk format change and were
squashed together to make sure git bisect didn't end up in a
bad state wrt space balancing or the format change.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9421284..a35eb36 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   transaction.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
-	   compression.o delayed-ref.o
+	   export.o tree-log.o acl.o free-space-cache.o zlib.o \
+	   compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b30986f..ecf5f7d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -72,6 +72,9 @@ struct btrfs_inode {
 	 */
 	struct list_head ordered_operations;
 
+	/* node for the red-black tree that links inodes in subvolume root */
+	struct rb_node rb_node;
+
 	/* the space_info for where this inode's data allocations are done */
 	struct btrfs_space_info *space_info;
 
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fedf8b9..2b96027 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 	u32 nritems;
 	int ret = 0;
 	int level;
-	struct btrfs_root *new_root;
-
-	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
-	if (!new_root)
-		return -ENOMEM;
-
-	memcpy(new_root, root, sizeof(*new_root));
-	new_root->root_key.objectid = new_root_objectid;
+	struct btrfs_disk_key disk_key;
 
 	WARN_ON(root->ref_cows && trans->transid !=
 		root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
 	level = btrfs_header_level(buf);
 	nritems = btrfs_header_nritems(buf);
+	if (level == 0)
+		btrfs_item_key(buf, &disk_key, 0);
+	else
+		btrfs_node_key(buf, &disk_key, 0);
 
-	cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
-				     new_root_objectid, trans->transid,
-				     level, buf->start, 0);
-	if (IS_ERR(cow)) {
-		kfree(new_root);
+	cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
+				     new_root_objectid, &disk_key, level,
+				     buf->start, 0);
+	if (IS_ERR(cow))
 		return PTR_ERR(cow);
-	}
 
 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
 	btrfs_set_header_bytenr(cow, cow->start);
 	btrfs_set_header_generation(cow, trans->transid);
-	btrfs_set_header_owner(cow, new_root_objectid);
-	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+				     BTRFS_HEADER_FLAG_RELOC);
+	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+	else
+		btrfs_set_header_owner(cow, new_root_objectid);
 
 	write_extent_buffer(cow, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(cow),
 			    BTRFS_FSID_SIZE);
 
 	WARN_ON(btrfs_header_generation(buf) > trans->transid);
-	ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
-	kfree(new_root);
+	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+		ret = btrfs_inc_ref(trans, root, cow, 1);
+	else
+		ret = btrfs_inc_ref(trans, root, cow, 0);
 
 	if (ret)
 		return ret;
@@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 }
 
 /*
+ * check if the tree block can be shared by multiple trees
+ */
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+			      struct extent_buffer *buf)
+{
+	/*
+	 * Tree blocks not in refernece counted trees and tree roots
+	 * are never shared. If a block was allocated after the last
+	 * snapshot and the block was not allocated by tree relocation,
+	 * we know the block is not shared.
+	 */
+	if (root->ref_cows &&
+	    buf != root->node && buf != root->commit_root &&
+	    (btrfs_header_generation(buf) <=
+	     btrfs_root_last_snapshot(&root->root_item) ||
+	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+		return 1;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (root->ref_cows &&
+	    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+		return 1;
+#endif
+	return 0;
+}
+
+static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
+				       struct btrfs_root *root,
+				       struct extent_buffer *buf,
+				       struct extent_buffer *cow)
+{
+	u64 refs;
+	u64 owner;
+	u64 flags;
+	u64 new_flags = 0;
+	int ret;
+
+	/*
+	 * Backrefs update rules:
+	 *
+	 * Always use full backrefs for extent pointers in tree block
+	 * allocated by tree relocation.
+	 *
+	 * If a shared tree block is no longer referenced by its owner
+	 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
+	 * use full backrefs for extent pointers in tree block.
+	 *
+	 * If a tree block is been relocating
+	 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
+	 * use full backrefs for extent pointers in tree block.
+	 * The reason for this is some operations (such as drop tree)
+	 * are only allowed for blocks use full backrefs.
+	 */
+
+	if (btrfs_block_can_be_shared(root, buf)) {
+		ret = btrfs_lookup_extent_info(trans, root, buf->start,
+					       buf->len, &refs, &flags);
+		BUG_ON(ret);
+		BUG_ON(refs == 0);
+	} else {
+		refs = 1;
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+			flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+		else
+			flags = 0;
+	}
+
+	owner = btrfs_header_owner(buf);
+	BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
+	       !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
+	if (refs > 1) {
+		if ((owner == root->root_key.objectid ||
+		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
+			ret = btrfs_inc_ref(trans, root, buf, 1);
+			BUG_ON(ret);
+
+			if (root->root_key.objectid ==
+			    BTRFS_TREE_RELOC_OBJECTID) {
+				ret = btrfs_dec_ref(trans, root, buf, 0);
+				BUG_ON(ret);
+				ret = btrfs_inc_ref(trans, root, cow, 1);
+				BUG_ON(ret);
+			}
+			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+		} else {
+
+			if (root->root_key.objectid ==
+			    BTRFS_TREE_RELOC_OBJECTID)
+				ret = btrfs_inc_ref(trans, root, cow, 1);
+			else
+				ret = btrfs_inc_ref(trans, root, cow, 0);
+			BUG_ON(ret);
+		}
+		if (new_flags != 0) {
+			ret = btrfs_set_disk_extent_flags(trans, root,
+							  buf->start,
+							  buf->len,
+							  new_flags, 0);
+			BUG_ON(ret);
+		}
+	} else {
+		if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+			if (root->root_key.objectid ==
+			    BTRFS_TREE_RELOC_OBJECTID)
+				ret = btrfs_inc_ref(trans, root, cow, 1);
+			else
+				ret = btrfs_inc_ref(trans, root, cow, 0);
+			BUG_ON(ret);
+			ret = btrfs_dec_ref(trans, root, buf, 1);
+			BUG_ON(ret);
+		}
+		clean_tree_block(trans, root, buf);
+	}
+	return 0;
+}
+
+/*
  * does the dirty work in cow of a single block.  The parent block (if
  * supplied) is updated to point to the new cow copy.  The new buffer is marked
  * dirty and returned locked.  If you modify the block it needs to be marked
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 			     struct extent_buffer **cow_ret,
 			     u64 search_start, u64 empty_size)
 {
-	u64 parent_start;
+	struct btrfs_disk_key disk_key;
 	struct extent_buffer *cow;
-	u32 nritems;
-	int ret = 0;
 	int level;
 	int unlock_orig = 0;
+	u64 parent_start;
 
 	if (*cow_ret == buf)
 		unlock_orig = 1;
 
 	btrfs_assert_tree_locked(buf);
 
-	if (parent)
-		parent_start = parent->start;
-	else
-		parent_start = 0;
-
 	WARN_ON(root->ref_cows && trans->transid !=
 		root->fs_info->running_transaction->transid);
 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
-	nritems = btrfs_header_nritems(buf);
 
-	cow = btrfs_alloc_free_block(trans, root, buf->len,
-				     parent_start, root->root_key.objectid,
-				     trans->transid, level,
-				     search_start, empty_size);
+	if (level == 0)
+		btrfs_item_key(buf, &disk_key, 0);
+	else
+		btrfs_node_key(buf, &disk_key, 0);
+
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		if (parent)
+			parent_start = parent->start;
+		else
+			parent_start = 0;
+	} else
+		parent_start = 0;
+
+	cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
+				     root->root_key.objectid, &disk_key,
+				     level, search_start, empty_size);
 	if (IS_ERR(cow))
 		return PTR_ERR(cow);
 
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
 	btrfs_set_header_bytenr(cow, cow->start);
 	btrfs_set_header_generation(cow, trans->transid);
-	btrfs_set_header_owner(cow, root->root_key.objectid);
-	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+				     BTRFS_HEADER_FLAG_RELOC);
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+	else
+		btrfs_set_header_owner(cow, root->root_key.objectid);
 
 	write_extent_buffer(cow, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(cow),
 			    BTRFS_FSID_SIZE);
 
-	WARN_ON(btrfs_header_generation(buf) > trans->transid);
-	if (btrfs_header_generation(buf) != trans->transid) {
-		u32 nr_extents;
-		ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
-		if (ret)
-			return ret;
-
-		ret = btrfs_cache_ref(trans, root, buf, nr_extents);
-		WARN_ON(ret);
-	} else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
-		/*
-		 * There are only two places that can drop reference to
-		 * tree blocks owned by living reloc trees, one is here,
-		 * the other place is btrfs_drop_subtree. In both places,
-		 * we check reference count while tree block is locked.
-		 * Furthermore, if reference count is one, it won't get
-		 * increased by someone else.
-		 */
-		u32 refs;
-		ret = btrfs_lookup_extent_ref(trans, root, buf->start,
-					      buf->len, &refs);
-		BUG_ON(ret);
-		if (refs == 1) {
-			ret = btrfs_update_ref(trans, root, buf, cow,
-					       0, nritems);
-			clean_tree_block(trans, root, buf);
-		} else {
-			ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
-		}
-		BUG_ON(ret);
-	} else {
-		ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
-		if (ret)
-			return ret;
-		clean_tree_block(trans, root, buf);
-	}
-
-	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
-		ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
-		WARN_ON(ret);
-	}
+	update_ref_for_cow(trans, root, buf, cow);
 
 	if (buf == root->node) {
 		WARN_ON(parent && parent != buf);
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+			parent_start = buf->start;
+		else
+			parent_start = 0;
 
 		spin_lock(&root->node_lock);
 		root->node = cow;
 		extent_buffer_get(cow);
 		spin_unlock(&root->node_lock);
 
-		if (buf != root->commit_root) {
-			btrfs_free_extent(trans, root, buf->start,
-					  buf->len, buf->start,
-					  root->root_key.objectid,
-					  btrfs_header_generation(buf),
-					  level, 1);
-		}
+		btrfs_free_extent(trans, root, buf->start, buf->len,
+				  parent_start, root->root_key.objectid,
+				  level, 0);
 		free_extent_buffer(buf);
 		add_root_to_dirty_list(root);
 	} else {
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+			parent_start = parent->start;
+		else
+			parent_start = 0;
+
+		WARN_ON(trans->transid != btrfs_header_generation(parent));
 		btrfs_set_node_blockptr(parent, parent_slot,
 					cow->start);
-		WARN_ON(trans->transid == 0);
 		btrfs_set_node_ptr_generation(parent, parent_slot,
 					      trans->transid);
 		btrfs_mark_buffer_dirty(parent);
-		WARN_ON(btrfs_header_generation(parent) != trans->transid);
 		btrfs_free_extent(trans, root, buf->start, buf->len,
-				  parent_start, btrfs_header_owner(parent),
-				  btrfs_header_generation(parent), level, 1);
+				  parent_start, root->root_key.objectid,
+				  level, 0);
 	}
 	if (unlock_orig)
 		btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+static inline int should_cow_block(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct extent_buffer *buf)
+{
+	if (btrfs_header_generation(buf) == trans->transid &&
+	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
+	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
+	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+		return 0;
+	return 1;
+}
+
 /*
  * cows a single block, see __btrfs_cow_block for the real work.
  * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 		WARN_ON(1);
 	}
 
-	if (btrfs_header_generation(buf) == trans->transid &&
-	    btrfs_header_owner(buf) == root->root_key.objectid &&
-	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+	if (!should_cow_block(trans, root, buf)) {
 		*cow_ret = buf;
 		return 0;
 	}
@@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 /*
  * same as comp_keys only with two btrfs_key's
  */
-static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 {
 	if (k1->objectid > k2->objectid)
 		return 1;
@@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 	return -1;
 }
 
+int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+		     int level, int *slot)
+{
+	return bin_search(eb, key, level, slot);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		root->node = child;
 		spin_unlock(&root->node_lock);
 
-		ret = btrfs_update_extent_ref(trans, root, child->start,
-					      child->len,
-					      mid->start, child->start,
-					      root->root_key.objectid,
-					      trans->transid, level - 1);
-		BUG_ON(ret);
-
 		add_root_to_dirty_list(root);
 		btrfs_tree_unlock(child);
 
@@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		/* once for the path */
 		free_extent_buffer(mid);
 		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
-					mid->start, root->root_key.objectid,
-					btrfs_header_generation(mid),
-					level, 1);
+					0, root->root_key.objectid, level, 1);
 		/* once for the root ptr */
 		free_extent_buffer(mid);
 		return ret;
@@ -998,7 +1101,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 			ret = wret;
 		if (btrfs_header_nritems(right) == 0) {
 			u64 bytenr = right->start;
-			u64 generation = btrfs_header_generation(parent);
 			u32 blocksize = right->len;
 
 			clean_tree_block(trans, root, right);
@@ -1010,9 +1112,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 			if (wret)
 				ret = wret;
 			wret = btrfs_free_extent(trans, root, bytenr,
-						 blocksize, parent->start,
-						 btrfs_header_owner(parent),
-						 generation, level, 1);
+						 blocksize, 0,
+						 root->root_key.objectid,
+						 level, 0);
 			if (wret)
 				ret = wret;
 		} else {
@@ -1047,7 +1149,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	}
 	if (btrfs_header_nritems(mid) == 0) {
 		/* we've managed to empty the middle node, drop it */
-		u64 root_gen = btrfs_header_generation(parent);
 		u64 bytenr = mid->start;
 		u32 blocksize = mid->len;
 
@@ -1059,9 +1160,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		if (wret)
 			ret = wret;
 		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
-					 parent->start,
-					 btrfs_header_owner(parent),
-					 root_gen, level, 1);
+					 0, root->root_key.objectid,
+					 level, 0);
 		if (wret)
 			ret = wret;
 	} else {
@@ -1437,7 +1537,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 {
 	int i;
 
-	if (path->keep_locks || path->lowest_level)
+	if (path->keep_locks)
 		return;
 
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1614,10 +1714,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 		lowest_unlock = 2;
 
 again:
-	if (p->skip_locking)
-		b = btrfs_root_node(root);
-	else
-		b = btrfs_lock_root_node(root);
+	if (p->search_commit_root) {
+		b = root->commit_root;
+		extent_buffer_get(b);
+		if (!p->skip_locking)
+			btrfs_tree_lock(b);
+	} else {
+		if (p->skip_locking)
+			b = btrfs_root_node(root);
+		else
+			b = btrfs_lock_root_node(root);
+	}
 
 	while (b) {
 		level = btrfs_header_level(b);
@@ -1638,11 +1745,9 @@ again:
 			 * then we don't want to set the path blocking,
 			 * so we test it here
 			 */
-			if (btrfs_header_generation(b) == trans->transid &&
-			    btrfs_header_owner(b) == root->root_key.objectid &&
-			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
+			if (!should_cow_block(trans, root, b))
 				goto cow_done;
-			}
+
 			btrfs_set_path_blocking(p);
 
 			wret = btrfs_cow_block(trans, root, b,
@@ -1764,138 +1869,6 @@ done:
 	return ret;
 }
 
-int btrfs_merge_path(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root,
-		     struct btrfs_key *node_keys,
-		     u64 *nodes, int lowest_level)
-{
-	struct extent_buffer *eb;
-	struct extent_buffer *parent;
-	struct btrfs_key key;
-	u64 bytenr;
-	u64 generation;
-	u32 blocksize;
-	int level;
-	int slot;
-	int key_match;
-	int ret;
-
-	eb = btrfs_lock_root_node(root);
-	ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
-	BUG_ON(ret);
-
-	btrfs_set_lock_blocking(eb);
-
-	parent = eb;
-	while (1) {
-		level = btrfs_header_level(parent);
-		if (level == 0 || level <= lowest_level)
-			break;
-
-		ret = bin_search(parent, &node_keys[lowest_level], level,
-				 &slot);
-		if (ret && slot > 0)
-			slot--;
-
-		bytenr = btrfs_node_blockptr(parent, slot);
-		if (nodes[level - 1] == bytenr)
-			break;
-
-		blocksize = btrfs_level_size(root, level - 1);
-		generation = btrfs_node_ptr_generation(parent, slot);
-		btrfs_node_key_to_cpu(eb, &key, slot);
-		key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
-
-		if (generation == trans->transid) {
-			eb = read_tree_block(root, bytenr, blocksize,
-					     generation);
-			btrfs_tree_lock(eb);
-			btrfs_set_lock_blocking(eb);
-		}
-
-		/*
-		 * if node keys match and node pointer hasn't been modified
-		 * in the running transaction, we can merge the path. for
-		 * blocks owened by reloc trees, the node pointer check is
-		 * skipped, this is because these blocks are fully controlled
-		 * by the space balance code, no one else can modify them.
-		 */
-		if (!nodes[level - 1] || !key_match ||
-		    (generation == trans->transid &&
-		     btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
-			if (level == 1 || level == lowest_level + 1) {
-				if (generation == trans->transid) {
-					btrfs_tree_unlock(eb);
-					free_extent_buffer(eb);
-				}
-				break;
-			}
-
-			if (generation != trans->transid) {
-				eb = read_tree_block(root, bytenr, blocksize,
-						generation);
-				btrfs_tree_lock(eb);
-				btrfs_set_lock_blocking(eb);
-			}
-
-			ret = btrfs_cow_block(trans, root, eb, parent, slot,
-					      &eb);
-			BUG_ON(ret);
-
-			if (root->root_key.objectid ==
-			    BTRFS_TREE_RELOC_OBJECTID) {
-				if (!nodes[level - 1]) {
-					nodes[level - 1] = eb->start;
-					memcpy(&node_keys[level - 1], &key,
-					       sizeof(node_keys[0]));
-				} else {
-					WARN_ON(1);
-				}
-			}
-
-			btrfs_tree_unlock(parent);
-			free_extent_buffer(parent);
-			parent = eb;
-			continue;
-		}
-
-		btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
-		btrfs_set_node_ptr_generation(parent, slot, trans->transid);
-		btrfs_mark_buffer_dirty(parent);
-
-		ret = btrfs_inc_extent_ref(trans, root,
-					nodes[level - 1],
-					blocksize, parent->start,
-					btrfs_header_owner(parent),
-					btrfs_header_generation(parent),
-					level - 1);
-		BUG_ON(ret);
-
-		/*
-		 * If the block was created in the running transaction,
-		 * it's possible this is the last reference to it, so we
-		 * should drop the subtree.
-		 */
-		if (generation == trans->transid) {
-			ret = btrfs_drop_subtree(trans, root, eb, parent);
-			BUG_ON(ret);
-			btrfs_tree_unlock(eb);
-			free_extent_buffer(eb);
-		} else {
-			ret = btrfs_free_extent(trans, root, bytenr,
-					blocksize, parent->start,
-					btrfs_header_owner(parent),
-					btrfs_header_generation(parent),
-					level - 1, 1);
-			BUG_ON(ret);
-		}
-		break;
-	}
-	btrfs_tree_unlock(parent);
-	free_extent_buffer(parent);
-	return 0;
-}
-
 /*
  * adjust the pointers going up the tree, starting at level
  * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1994,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(src);
 	btrfs_mark_buffer_dirty(dst);
 
-	ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
-	BUG_ON(ret);
-
 	return ret;
 }
 
@@ -2083,9 +2053,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(src);
 	btrfs_mark_buffer_dirty(dst);
 
-	ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
-	BUG_ON(ret);
-
 	return ret;
 }
 
@@ -2105,7 +2072,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 	struct extent_buffer *c;
 	struct extent_buffer *old;
 	struct btrfs_disk_key lower_key;
-	int ret;
 
 	BUG_ON(path->nodes[level]);
 	BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2083,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 		btrfs_node_key(lower, &lower_key, 0);
 
 	c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
-				   root->root_key.objectid, trans->transid,
+				   root->root_key.objectid, &lower_key,
 				   level, root->node->start, 0);
 	if (IS_ERR(c))
 		return PTR_ERR(c);
 
-	memset_extent_buffer(c, 0, 0, root->nodesize);
+	memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_nritems(c, 1);
 	btrfs_set_header_level(c, level);
 	btrfs_set_header_bytenr(c, c->start);
 	btrfs_set_header_generation(c, trans->transid);
+	btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(c, root->root_key.objectid);
 
 	write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2118,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 	root->node = c;
 	spin_unlock(&root->node_lock);
 
-	ret = btrfs_update_extent_ref(trans, root, lower->start,
-				      lower->len, lower->start, c->start,
-				      root->root_key.objectid,
-				      trans->transid, level - 1);
-	BUG_ON(ret);
-
 	/* the super has an extra ref to root->node */
 	free_extent_buffer(old);
 
@@ -2244,20 +2205,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 	}
 
 	c_nritems = btrfs_header_nritems(c);
+	mid = (c_nritems + 1) / 2;
+	btrfs_node_key(c, &disk_key, mid);
 
-	split = btrfs_alloc_free_block(trans, root, root->nodesize,
-					path->nodes[level + 1]->start,
+	split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
 					root->root_key.objectid,
-					trans->transid, level, c->start, 0);
+					&disk_key, level, c->start, 0);
 	if (IS_ERR(split))
 		return PTR_ERR(split);
 
-	btrfs_set_header_flags(split, btrfs_header_flags(c));
+	memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_level(split, btrfs_header_level(c));
 	btrfs_set_header_bytenr(split, split->start);
 	btrfs_set_header_generation(split, trans->transid);
+	btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(split, root->root_key.objectid);
-	btrfs_set_header_flags(split, 0);
 	write_extent_buffer(split, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(split),
 			    BTRFS_FSID_SIZE);
@@ -2265,7 +2227,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
 			    BTRFS_UUID_SIZE);
 
-	mid = (c_nritems + 1) / 2;
 
 	copy_extent_buffer(split, c,
 			   btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2239,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(c);
 	btrfs_mark_buffer_dirty(split);
 
-	btrfs_node_key(split, &disk_key, 0);
 	wret = insert_ptr(trans, root, path, &disk_key, split->start,
 			  path->slots[level + 1] + 1,
 			  level + 1);
 	if (wret)
 		ret = wret;
 
-	ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
-	BUG_ON(ret);
-
 	if (path->slots[level] >= mid) {
 		path->slots[level] -= mid;
 		btrfs_tree_unlock(c);
@@ -2360,7 +2317,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
 	u32 right_nritems;
 	u32 data_end;
 	u32 this_item_size;
-	int ret;
 
 	if (empty)
 		nr = 0;
@@ -2473,9 +2429,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
 		btrfs_mark_buffer_dirty(left);
 	btrfs_mark_buffer_dirty(right);
 
-	ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
-	BUG_ON(ret);
-
 	btrfs_item_key(right, &disk_key, 0);
 	btrfs_set_node_key(upper, &disk_key, slot + 1);
 	btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2673,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 	if (right_nritems)
 		btrfs_mark_buffer_dirty(right);
 
-	ret = btrfs_update_ref(trans, root, right, left,
-			       old_left_nritems, push_items);
-	BUG_ON(ret);
-
 	btrfs_item_key(right, &disk_key, 0);
 	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
 	if (wret)
@@ -2880,9 +2829,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(l);
 	BUG_ON(path->slots[0] != slot);
 
-	ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
-	BUG_ON(ret);
-
 	if (mid <= slot) {
 		btrfs_tree_unlock(path->nodes[0]);
 		free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2857,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 			       struct btrfs_path *path, int data_size,
 			       int extend)
 {
+	struct btrfs_disk_key disk_key;
 	struct extent_buffer *l;
 	u32 nritems;
 	int mid;
@@ -2918,7 +2865,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 	struct extent_buffer *right;
 	int ret = 0;
 	int wret;
-	int double_split;
+	int split;
 	int num_doubles = 0;
 
 	/* first try to make some room by pushing left and right */
@@ -2945,16 +2892,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 			return ret;
 	}
 again:
-	double_split = 0;
+	split = 1;
 	l = path->nodes[0];
 	slot = path->slots[0];
 	nritems = btrfs_header_nritems(l);
 	mid = (nritems + 1) / 2;
 
-	right = btrfs_alloc_free_block(trans, root, root->leafsize,
-					path->nodes[1]->start,
+	if (mid <= slot) {
+		if (nritems == 1 ||
+		    leaf_space_used(l, mid, nritems - mid) + data_size >
+			BTRFS_LEAF_DATA_SIZE(root)) {
+			if (slot >= nritems) {
+				split = 0;
+			} else {
+				mid = slot;
+				if (mid != nritems &&
+				    leaf_space_used(l, mid, nritems - mid) +
+				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+					split = 2;
+				}
+			}
+		}
+	} else {
+		if (leaf_space_used(l, 0, mid) + data_size >
+			BTRFS_LEAF_DATA_SIZE(root)) {
+			if (!extend && data_size && slot == 0) {
+				split = 0;
+			} else if ((extend || !data_size) && slot == 0) {
+				mid = 1;
+			} else {
+				mid = slot;
+				if (mid != nritems &&
+				    leaf_space_used(l, mid, nritems - mid) +
+				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+					split = 2 ;
+				}
+			}
+		}
+	}
+
+	if (split == 0)
+		btrfs_cpu_key_to_disk(&disk_key, ins_key);
+	else
+		btrfs_item_key(l, &disk_key, mid);
+
+	right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
 					root->root_key.objectid,
-					trans->transid, 0, l->start, 0);
+					&disk_key, 0, l->start, 0);
 	if (IS_ERR(right)) {
 		BUG_ON(1);
 		return PTR_ERR(right);
@@ -2963,6 +2947,7 @@ again:
 	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_bytenr(right, right->start);
 	btrfs_set_header_generation(right, trans->transid);
+	btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(right, root->root_key.objectid);
 	btrfs_set_header_level(right, 0);
 	write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2958,47 @@ again:
 			    (unsigned long)btrfs_header_chunk_tree_uuid(right),
 			    BTRFS_UUID_SIZE);
 
-	if (mid <= slot) {
-		if (nritems == 1 ||
-		    leaf_space_used(l, mid, nritems - mid) + data_size >
-			BTRFS_LEAF_DATA_SIZE(root)) {
-			if (slot >= nritems) {
-				struct btrfs_disk_key disk_key;
-
-				btrfs_cpu_key_to_disk(&disk_key, ins_key);
-				btrfs_set_header_nritems(right, 0);
-				wret = insert_ptr(trans, root, path,
-						  &disk_key, right->start,
-						  path->slots[1] + 1, 1);
-				if (wret)
-					ret = wret;
+	if (split == 0) {
+		if (mid <= slot) {
+			btrfs_set_header_nritems(right, 0);
+			wret = insert_ptr(trans, root, path,
+					  &disk_key, right->start,
+					  path->slots[1] + 1, 1);
+			if (wret)
+				ret = wret;
 
-				btrfs_tree_unlock(path->nodes[0]);
-				free_extent_buffer(path->nodes[0]);
-				path->nodes[0] = right;
-				path->slots[0] = 0;
-				path->slots[1] += 1;
-				btrfs_mark_buffer_dirty(right);
-				return ret;
-			}
-			mid = slot;
-			if (mid != nritems &&
-			    leaf_space_used(l, mid, nritems - mid) +
-			    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
-				double_split = 1;
-			}
-		}
-	} else {
-		if (leaf_space_used(l, 0, mid) + data_size >
-			BTRFS_LEAF_DATA_SIZE(root)) {
-			if (!extend && data_size && slot == 0) {
-				struct btrfs_disk_key disk_key;
-
-				btrfs_cpu_key_to_disk(&disk_key, ins_key);
-				btrfs_set_header_nritems(right, 0);
-				wret = insert_ptr(trans, root, path,
-						  &disk_key,
-						  right->start,
-						  path->slots[1], 1);
+			btrfs_tree_unlock(path->nodes[0]);
+			free_extent_buffer(path->nodes[0]);
+			path->nodes[0] = right;
+			path->slots[0] = 0;
+			path->slots[1] += 1;
+		} else {
+			btrfs_set_header_nritems(right, 0);
+			wret = insert_ptr(trans, root, path,
+					  &disk_key,
+					  right->start,
+					  path->slots[1], 1);
+			if (wret)
+				ret = wret;
+			btrfs_tree_unlock(path->nodes[0]);
+			free_extent_buffer(path->nodes[0]);
+			path->nodes[0] = right;
+			path->slots[0] = 0;
+			if (path->slots[1] == 0) {
+				wret = fixup_low_keys(trans, root,
+						path, &disk_key, 1);
 				if (wret)
 					ret = wret;
-				btrfs_tree_unlock(path->nodes[0]);
-				free_extent_buffer(path->nodes[0]);
-				path->nodes[0] = right;
-				path->slots[0] = 0;
-				if (path->slots[1] == 0) {
-					wret = fixup_low_keys(trans, root,
-						      path, &disk_key, 1);
-					if (wret)
-						ret = wret;
-				}
-				btrfs_mark_buffer_dirty(right);
-				return ret;
-			} else if ((extend || !data_size) && slot == 0) {
-				mid = 1;
-			} else {
-				mid = slot;
-				if (mid != nritems &&
-				    leaf_space_used(l, mid, nritems - mid) +
-				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
-					double_split = 1;
-				}
 			}
 		}
+		btrfs_mark_buffer_dirty(right);
+		return ret;
 	}
 
 	ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
 	BUG_ON(ret);
 
-	if (double_split) {
+	if (split == 2) {
 		BUG_ON(num_doubles != 0);
 		num_doubles++;
 		goto again;
@@ -3447,7 +3400,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
 		/* figure out how many keys we can insert in here */
 		total_data = data_size[0];
 		for (i = 1; i < nr; i++) {
-			if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
+			if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
 				break;
 			total_data += data_size[i];
 		}
@@ -3745,9 +3698,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
 /*
  * a helper function to delete the leaf pointed to by path->slots[1] and
- * path->nodes[1].  bytenr is the node block pointer, but since the callers
- * already know it, it is faster to have them pass it down than to
- * read it out of the node again.
+ * path->nodes[1].
  *
  * This deletes the pointer in path->nodes[1] and frees the leaf
  * block extent.  zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3706,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  * The path must have already been setup for deleting the leaf, including
  * all the proper balancing.  path->nodes[1] must be locked.
  */
-noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root,
-			    struct btrfs_path *path, u64 bytenr)
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path,
+				   struct extent_buffer *leaf)
 {
 	int ret;
-	u64 root_gen = btrfs_header_generation(path->nodes[1]);
-	u64 parent_start = path->nodes[1]->start;
-	u64 parent_owner = btrfs_header_owner(path->nodes[1]);
 
+	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
 	ret = del_ptr(trans, root, path, 1, path->slots[1]);
 	if (ret)
 		return ret;
@@ -3774,10 +3724,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
 	 */
 	btrfs_unlock_up_safe(path, 0);
 
-	ret = btrfs_free_extent(trans, root, bytenr,
-				btrfs_level_size(root, 0),
-				parent_start, parent_owner,
-				root_gen, 0, 1);
+	ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
+				0, root->root_key.objectid, 0, 0);
 	return ret;
 }
 /*
@@ -3845,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		if (leaf == root->node) {
 			btrfs_set_header_level(leaf, 0);
 		} else {
-			ret = btrfs_del_leaf(trans, root, path, leaf->start);
+			ret = btrfs_del_leaf(trans, root, path, leaf);
 			BUG_ON(ret);
 		}
 	} else {
@@ -3884,8 +3832,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
 			if (btrfs_header_nritems(leaf) == 0) {
 				path->slots[1] = slot;
-				ret = btrfs_del_leaf(trans, root, path,
-						     leaf->start);
+				ret = btrfs_del_leaf(trans, root, path, leaf);
 				BUG_ON(ret);
 				free_extent_buffer(leaf);
 			} else {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4414a5d..ce3ab4e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,8 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_MAX_LEVEL 8
 
+#define BTRFS_COMPAT_EXTENT_TREE_V0
+
 /*
  * files bigger than this get some pre-flushing when they are added
  * to the ordered operations list.  That way we limit the total
@@ -267,7 +269,18 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 }
 
 #define BTRFS_FSID_SIZE 16
-#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0)
+#define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC		(1ULL << 1)
+#define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
+
+#define BTRFS_BACKREF_REV_MAX		256
+#define BTRFS_BACKREF_REV_SHIFT		56
+#define BTRFS_BACKREF_REV_MASK		(((u64)BTRFS_BACKREF_REV_MAX - 1) << \
+					 BTRFS_BACKREF_REV_SHIFT)
+
+#define BTRFS_OLD_BACKREF_REV		0
+#define BTRFS_MIXED_BACKREF_REV		1
 
 /*
  * every tree block (leaf or node) starts with this header.
@@ -296,7 +309,6 @@ struct btrfs_header {
 					sizeof(struct btrfs_item) - \
 					sizeof(struct btrfs_file_extent_item))
 
-#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
 
 /*
  * this is a very generous portion of the super block, giving us
@@ -355,9 +367,12 @@ struct btrfs_super_block {
  * Compat flags that we support.  If any incompat flags are set other than the
  * ones specified below then we will fail to mount
  */
-#define BTRFS_FEATURE_COMPAT_SUPP	0x0
-#define BTRFS_FEATURE_COMPAT_RO_SUPP	0x0
-#define BTRFS_FEATURE_INCOMPAT_SUPP	0x0
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
+
+#define BTRFS_FEATURE_COMPAT_SUPP		0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
+#define BTRFS_FEATURE_INCOMPAT_SUPP		\
+	BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -421,23 +436,65 @@ struct btrfs_path {
 	unsigned int keep_locks:1;
 	unsigned int skip_locking:1;
 	unsigned int leave_spinning:1;
+	unsigned int search_commit_root:1;
 };
 
 /*
  * items in the extent btree are used to record the objectid of the
  * owner of the block and the number of references
  */
+
 struct btrfs_extent_item {
+	__le64 refs;
+	__le64 generation;
+	__le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
 	__le32 refs;
 } __attribute__ ((__packed__));
 
-struct btrfs_extent_ref {
+#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
+					sizeof(struct btrfs_item))
+
+#define BTRFS_EXTENT_FLAG_DATA		(1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK	(1ULL << 1)
+
+/* following flags only apply to tree blocks */
+
+/* use full backrefs for extent pointers in the block */
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF	(1ULL << 8)
+
+struct btrfs_tree_block_info {
+	struct btrfs_disk_key key;
+	u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+	__le64 root;
+	__le64 objectid;
+	__le64 offset;
+	__le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+	__le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+	u8 type;
+	u64 offset;
+} __attribute__ ((__packed__));
+
+/* old style backrefs item */
+struct btrfs_extent_ref_v0 {
 	__le64 root;
 	__le64 generation;
 	__le64 objectid;
-	__le32 num_refs;
+	__le32 count;
 } __attribute__ ((__packed__));
 
+
 /* dev extents record free space on individual devices.  The owner
  * field points back to the chunk allocation mapping tree that allocated
  * the extent.  The chunk tree uuid field is a way to double check the owner
@@ -695,12 +752,7 @@ struct btrfs_block_group_cache {
 	struct list_head cluster_list;
 };
 
-struct btrfs_leaf_ref_tree {
-	struct rb_root root;
-	struct list_head list;
-	spinlock_t lock;
-};
-
+struct reloc_control;
 struct btrfs_device;
 struct btrfs_fs_devices;
 struct btrfs_fs_info {
@@ -831,18 +883,11 @@ struct btrfs_fs_info {
 	struct task_struct *cleaner_kthread;
 	int thread_pool_size;
 
-	/* tree relocation relocated fields */
-	struct list_head dead_reloc_roots;
-	struct btrfs_leaf_ref_tree reloc_ref_tree;
-	struct btrfs_leaf_ref_tree shared_ref_tree;
-
 	struct kobject super_kobj;
 	struct completion kobj_unregister;
 	int do_barriers;
 	int closing;
 	int log_root_recovering;
-	atomic_t throttles;
-	atomic_t throttle_gen;
 
 	u64 total_pinned;
 
@@ -861,6 +906,8 @@ struct btrfs_fs_info {
 	 */
 	struct list_head space_info;
 
+	struct reloc_control *reloc_ctl;
+
 	spinlock_t delalloc_lock;
 	spinlock_t new_trans_lock;
 	u64 delalloc_bytes;
@@ -891,7 +938,6 @@ struct btrfs_fs_info {
  * in ram representation of the tree.  extent_root is used for all allocations
  * and for the extent tree extent_root root.
  */
-struct btrfs_dirty_root;
 struct btrfs_root {
 	struct extent_buffer *node;
 
@@ -899,9 +945,6 @@ struct btrfs_root {
 	spinlock_t node_lock;
 
 	struct extent_buffer *commit_root;
-	struct btrfs_leaf_ref_tree *ref_tree;
-	struct btrfs_leaf_ref_tree ref_tree_struct;
-	struct btrfs_dirty_root *dirty_root;
 	struct btrfs_root *log_root;
 	struct btrfs_root *reloc_root;
 
@@ -952,10 +995,15 @@ struct btrfs_root {
 	/* the dirty list is only used by non-reference counted roots */
 	struct list_head dirty_list;
 
+	struct list_head root_list;
+
 	spinlock_t list_lock;
-	struct list_head dead_list;
 	struct list_head orphan_list;
 
+	spinlock_t inode_lock;
+	/* red-black tree that keeps track of in-memory inodes */
+	struct rb_root inode_tree;
+
 	/*
 	 * right now this just gets used so that a root has its own devid
 	 * for stat.  It may be used for more later
@@ -1017,7 +1065,16 @@ struct btrfs_root {
  * are used, and how many references there are to each block
  */
 #define BTRFS_EXTENT_ITEM_KEY	168
-#define BTRFS_EXTENT_REF_KEY	180
+
+#define BTRFS_TREE_BLOCK_REF_KEY	176
+
+#define BTRFS_EXTENT_DATA_REF_KEY	178
+
+#define BTRFS_EXTENT_REF_V0_KEY		180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY	182
+
+#define BTRFS_SHARED_DATA_REF_KEY	184
 
 /*
  * block groups give us hints into the extent allocation trees.  Which
@@ -1317,24 +1374,67 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
 	return (u8 *)((unsigned long)dev + ptr);
 }
 
-/* struct btrfs_extent_ref */
-BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
-BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
-BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
-BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
 
-BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
-			 generation, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
-			 objectid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
-			 num_refs, 32);
+BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+
+
+BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+
+static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+					struct btrfs_tree_block_info *item,
+					struct btrfs_disk_key *key)
+{
+	read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+					    struct btrfs_tree_block_info *item,
+					    struct btrfs_disk_key *key)
+{
+	write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
 
-/* struct btrfs_extent_item */
-BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
-			 refs, 32);
+BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
+		   root, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
+		   objectid, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
+		   offset, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
+		   count, 32);
+
+BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
+		   count, 32);
+
+BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
+		   type, 8);
+BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
+		   offset, 64);
+
+static inline u32 btrfs_extent_inline_ref_size(int type)
+{
+	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    type == BTRFS_SHARED_BLOCK_REF_KEY)
+		return sizeof(struct btrfs_extent_inline_ref);
+	if (type == BTRFS_SHARED_DATA_REF_KEY)
+		return sizeof(struct btrfs_shared_data_ref) +
+		       sizeof(struct btrfs_extent_inline_ref);
+	if (type == BTRFS_EXTENT_DATA_REF_KEY)
+		return sizeof(struct btrfs_extent_data_ref) +
+		       offsetof(struct btrfs_extent_inline_ref, offset);
+	BUG();
+	return 0;
+}
+
+BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
+		   generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
 
 /* struct btrfs_node */
 BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
@@ -1558,6 +1658,21 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
 	return (flags & flag) == flag;
 }
 
+static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+{
+	u64 flags = btrfs_header_flags(eb);
+	return flags >> BTRFS_BACKREF_REV_SHIFT;
+}
+
+static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
+						int rev)
+{
+	u64 flags = btrfs_header_flags(eb);
+	flags &= ~BTRFS_BACKREF_REV_MASK;
+	flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
+	btrfs_set_header_flags(eb, flags);
+}
+
 static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
 {
 	unsigned long ptr = offsetof(struct btrfs_header, fsid);
@@ -1790,39 +1905,32 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf);
 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, u64 objectid, u64 bytenr);
+			  struct btrfs_root *root,
+			  u64 objectid, u64 offset, u64 bytenr);
 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
 						 struct btrfs_fs_info *info,
 						 u64 bytenr);
+void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 u64 btrfs_find_block_group(struct btrfs_root *root,
 			   u64 search_start, u64 search_hint, int owner);
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
-					     struct btrfs_root *root,
-					     u32 blocksize, u64 parent,
-					     u64 root_objectid,
-					     u64 ref_generation,
-					     int level,
-					     u64 hint,
-					     u64 empty_size);
+					struct btrfs_root *root, u32 blocksize,
+					u64 parent, u64 root_objectid,
+					struct btrfs_disk_key *key, int level,
+					u64 hint, u64 empty_size);
 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
 					    struct btrfs_root *root,
 					    u64 bytenr, u32 blocksize,
 					    int level);
-int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root,
-		       u64 num_bytes, u64 parent, u64 min_bytes,
-		       u64 root_objectid, u64 ref_generation,
-		       u64 owner, u64 empty_size, u64 hint_byte,
-		       u64 search_end, struct btrfs_key *ins, u64 data);
-int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins);
-int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins);
+int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 root_objectid, u64 owner,
+				     u64 offset, struct btrfs_key *ins);
+int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   u64 root_objectid, u64 owner, u64 offset,
+				   struct btrfs_key *ins);
 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 				  struct btrfs_root *root,
 				  u64 num_bytes, u64 min_alloc_size,
@@ -1830,18 +1938,18 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 				  u64 search_end, struct btrfs_key *ins,
 				  u64 data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *orig_buf, struct extent_buffer *buf,
-		  u32 *nr_extents);
-int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		    struct extent_buffer *buf, u32 nr_extents);
-int btrfs_update_ref(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root, struct extent_buffer *orig_buf,
-		     struct extent_buffer *buf, int start_slot, int nr);
+		  struct extent_buffer *buf, int full_backref);
+int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *buf, int full_backref);
+int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 flags,
+				int is_data);
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent,
-		      u64 root_objectid, u64 ref_generation,
-		      u64 owner_objectid, int pin);
+		      u64 root_objectid, u64 owner, u64 offset);
+
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
@@ -1849,13 +1957,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 ref_generation,
-			 u64 owner_objectid);
-int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr, u64 num_bytes,
-			    u64 orig_parent, u64 parent,
-			    u64 root_objectid, u64 ref_generation,
-			    u64 owner_objectid);
+			 u64 root_objectid, u64 owner, u64 offset);
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 				    struct btrfs_root *root);
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
@@ -1867,16 +1970,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 			   u64 size);
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start);
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
-int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root);
-int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
-int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *root,
-			       struct extent_buffer *buf, u64 orig_start);
-int btrfs_add_dead_reloc_root(struct btrfs_root *root);
-int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
+int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
+				struct btrfs_block_group_cache *group);
+
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -1891,13 +1987,12 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
 			      u64 bytes);
 /* ctree.c */
+int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+		     int level, int *slot);
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
 int btrfs_previous_item(struct btrfs_root *root,
 			struct btrfs_path *path, u64 min_objectid,
 			int type);
-int btrfs_merge_path(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root,
-		     struct btrfs_key *node_keys,
-		     u64 *nodes, int lowest_level);
 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, struct btrfs_path *path,
 			    struct btrfs_key *new_key);
@@ -1918,6 +2013,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      struct extent_buffer *buf,
 		      struct extent_buffer **cow_ret, u64 new_root_objectid);
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+			      struct extent_buffer *buf);
 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_path *path, u32 data_size);
 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1944,9 +2041,6 @@ void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
 
 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		   struct btrfs_path *path, int slot, int nr);
-int btrfs_del_leaf(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root,
-			    struct btrfs_path *path, u64 bytenr);
 static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path)
@@ -2005,8 +2099,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
 			 btrfs_root_item *item, struct btrfs_key *key);
 int btrfs_search_root(struct btrfs_root *root, u64 search_start,
 		      u64 *found_objectid);
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
-			  struct btrfs_root *latest_root);
+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_set_root_node(struct btrfs_root_item *item,
+			struct extent_buffer *node);
 /* dir-item.c */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
@@ -2139,7 +2234,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
-void btrfs_read_locked_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, int wait);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2147,12 +2241,8 @@ void btrfs_destroy_inode(struct inode *inode);
 int btrfs_init_cachep(void);
 void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
-struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
-			    struct btrfs_root *root, int wait);
-struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
-				struct btrfs_root *root);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root, int *is_new);
+			 struct btrfs_root *root);
 int btrfs_commit_write(struct file *file, struct page *page,
 		       unsigned from, unsigned to);
 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2209,4 +2299,12 @@ int btrfs_check_acl(struct inode *inode, int mask);
 int btrfs_init_acl(struct inode *inode, struct inode *dir);
 int btrfs_acl_chmod(struct inode *inode);
 
+/* relocation.c */
+int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
+int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root);
+int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root);
+int btrfs_recover_relocation(struct btrfs_root *root);
+int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
 #endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c0..84e6781 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -29,27 +29,87 @@
  * add extents in the middle of btrfs_search_slot, and it allows
  * us to buffer up frequently modified backrefs in an rb tree instead
  * of hammering updates on the extent allocation tree.
- *
- * Right now this code is only used for reference counted trees, but
- * the long term goal is to get rid of the similar code for delayed
- * extent tree modifications.
  */
 
 /*
- * entries in the rb tree are ordered by the byte number of the extent
- * and by the byte number of the parent block.
+ * compare two delayed tree backrefs with same bytenr and type
+ */
+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
+			  struct btrfs_delayed_tree_ref *ref1)
+{
+	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
+		if (ref1->root < ref2->root)
+			return -1;
+		if (ref1->root > ref2->root)
+			return 1;
+	} else {
+		if (ref1->parent < ref2->parent)
+			return -1;
+		if (ref1->parent > ref2->parent)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * compare two delayed data backrefs with same bytenr and type
  */
-static int comp_entry(struct btrfs_delayed_ref_node *ref,
-		      u64 bytenr, u64 parent)
+static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
+			  struct btrfs_delayed_data_ref *ref1)
 {
-	if (bytenr < ref->bytenr)
+	if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
+		if (ref1->root < ref2->root)
+			return -1;
+		if (ref1->root > ref2->root)
+			return 1;
+		if (ref1->objectid < ref2->objectid)
+			return -1;
+		if (ref1->objectid > ref2->objectid)
+			return 1;
+		if (ref1->offset < ref2->offset)
+			return -1;
+		if (ref1->offset > ref2->offset)
+			return 1;
+	} else {
+		if (ref1->parent < ref2->parent)
+			return -1;
+		if (ref1->parent > ref2->parent)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * entries in the rb tree are ordered by the byte number of the extent,
+ * type of the delayed backrefs and content of delayed backrefs.
+ */
+static int comp_entry(struct btrfs_delayed_ref_node *ref2,
+		      struct btrfs_delayed_ref_node *ref1)
+{
+	if (ref1->bytenr < ref2->bytenr)
 		return -1;
-	if (bytenr > ref->bytenr)
+	if (ref1->bytenr > ref2->bytenr)
 		return 1;
-	if (parent < ref->parent)
+	if (ref1->is_head && ref2->is_head)
+		return 0;
+	if (ref2->is_head)
 		return -1;
-	if (parent > ref->parent)
+	if (ref1->is_head)
 		return 1;
+	if (ref1->type < ref2->type)
+		return -1;
+	if (ref1->type > ref2->type)
+		return 1;
+	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
+		return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
+				      btrfs_delayed_node_to_tree_ref(ref1));
+	} else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
+		   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
+		return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
+				      btrfs_delayed_node_to_data_ref(ref1));
+	}
+	BUG();
 	return 0;
 }
 
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref,
  * inserted.
  */
 static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
-						  u64 bytenr, u64 parent,
 						  struct rb_node *node)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent_node = NULL;
 	struct btrfs_delayed_ref_node *entry;
+	struct btrfs_delayed_ref_node *ins;
 	int cmp;
 
+	ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 	while (*p) {
 		parent_node = *p;
 		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 				 rb_node);
 
-		cmp = comp_entry(entry, bytenr, parent);
+		cmp = comp_entry(entry, ins);
 		if (cmp < 0)
 			p = &(*p)->rb_left;
 		else if (cmp > 0)
@@ -81,18 +142,17 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
 			return entry;
 	}
 
-	entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 	rb_link_node(node, parent_node, p);
 	rb_insert_color(node, root);
 	return NULL;
 }
 
 /*
- * find an entry based on (bytenr,parent).  This returns the delayed
- * ref if it was able to find one, or NULL if nothing was in that spot
+ * find an head entry based on bytenr. This returns the delayed ref
+ * head if it was able to find one, or NULL if nothing was in that spot
  */
-static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
-				  u64 bytenr, u64 parent,
+static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
+				  u64 bytenr,
 				  struct btrfs_delayed_ref_node **last)
 {
 	struct rb_node *n = root->rb_node;
@@ -105,7 +165,15 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
 		if (last)
 			*last = entry;
 
-		cmp = comp_entry(entry, bytenr, parent);
+		if (bytenr < entry->bytenr)
+			cmp = -1;
+		else if (bytenr > entry->bytenr)
+			cmp = 1;
+		else if (!btrfs_delayed_ref_is_head(entry))
+			cmp = 1;
+		else
+			cmp = 0;
+
 		if (cmp < 0)
 			n = n->rb_left;
 		else if (cmp > 0)
@@ -154,7 +222,7 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
 		node = rb_first(&delayed_refs->root);
 	} else {
 		ref = NULL;
-		tree_search(&delayed_refs->root, start, (u64)-1, &ref);
+		find_ref_head(&delayed_refs->root, start, &ref);
 		if (ref) {
 			struct btrfs_delayed_ref_node *tmp;
 
@@ -234,7 +302,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
 	delayed_refs = &trans->transaction->delayed_refs;
 	spin_lock(&delayed_refs->lock);
 
-	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+	ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
 	if (ref) {
 		prev_node = rb_prev(&ref->rb_node);
 		if (!prev_node)
@@ -250,25 +318,28 @@ out:
 }
 
 /*
- * helper function to lookup reference count
+ * helper function to lookup reference count and flags of extent.
  *
  * the head node for delayed ref is used to store the sum of all the
- * reference count modifications queued up in the rbtree.  This way you
- * can check to see what the reference count would be if all of the
- * delayed refs are processed.
+ * reference count modifications queued up in the rbtree. the head
+ * node may also store the extent flags to set. This way you can check
+ * to see what the reference count and extent flags would be if all of
+ * the delayed refs are not processed.
  */
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr,
-			    u64 num_bytes, u32 *refs)
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root, u64 bytenr,
+			     u64 num_bytes, u64 *refs, u64 *flags)
 {
 	struct btrfs_delayed_ref_node *ref;
 	struct btrfs_delayed_ref_head *head;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_path *path;
-	struct extent_buffer *leaf;
 	struct btrfs_extent_item *ei;
+	struct extent_buffer *leaf;
 	struct btrfs_key key;
-	u32 num_refs;
+	u32 item_size;
+	u64 num_refs;
+	u64 extent_flags;
 	int ret;
 
 	path = btrfs_alloc_path();
@@ -287,37 +358,60 @@ again:
 
 	if (ret == 0) {
 		leaf = path->nodes[0];
-		ei = btrfs_item_ptr(leaf, path->slots[0],
-				    struct btrfs_extent_item);
-		num_refs = btrfs_extent_refs(leaf, ei);
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+		if (item_size >= sizeof(*ei)) {
+			ei = btrfs_item_ptr(leaf, path->slots[0],
+					    struct btrfs_extent_item);
+			num_refs = btrfs_extent_refs(leaf, ei);
+			extent_flags = btrfs_extent_flags(leaf, ei);
+		} else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+			struct btrfs_extent_item_v0 *ei0;
+			BUG_ON(item_size != sizeof(*ei0));
+			ei0 = btrfs_item_ptr(leaf, path->slots[0],
+					     struct btrfs_extent_item_v0);
+			num_refs = btrfs_extent_refs_v0(leaf, ei0);
+			/* FIXME: this isn't correct for data */
+			extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+#else
+			BUG();
+#endif
+		}
+		BUG_ON(num_refs == 0);
 	} else {
 		num_refs = 0;
+		extent_flags = 0;
 		ret = 0;
 	}
 
 	spin_lock(&delayed_refs->lock);
-	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+	ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
 	if (ref) {
 		head = btrfs_delayed_node_to_head(ref);
-		if (mutex_trylock(&head->mutex)) {
-			num_refs += ref->ref_mod;
-			mutex_unlock(&head->mutex);
-			*refs = num_refs;
-			goto out;
-		}
+		if (!mutex_trylock(&head->mutex)) {
+			atomic_inc(&ref->refs);
+			spin_unlock(&delayed_refs->lock);
 
-		atomic_inc(&ref->refs);
-		spin_unlock(&delayed_refs->lock);
+			btrfs_release_path(root->fs_info->extent_root, path);
 
-		btrfs_release_path(root->fs_info->extent_root, path);
+			mutex_lock(&head->mutex);
+			mutex_unlock(&head->mutex);
+			btrfs_put_delayed_ref(ref);
+			goto again;
+		}
+		if (head->extent_op && head->extent_op->update_flags)
+			extent_flags |= head->extent_op->flags_to_set;
+		else
+			BUG_ON(num_refs == 0);
 
-		mutex_lock(&head->mutex);
+		num_refs += ref->ref_mod;
 		mutex_unlock(&head->mutex);
-		btrfs_put_delayed_ref(ref);
-		goto again;
-	} else {
-		*refs = num_refs;
 	}
+	WARN_ON(num_refs == 0);
+	if (refs)
+		*refs = num_refs;
+	if (flags)
+		*flags = extent_flags;
 out:
 	spin_unlock(&delayed_refs->lock);
 	btrfs_free_path(path);
@@ -338,16 +432,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
 		    struct btrfs_delayed_ref_node *existing,
 		    struct btrfs_delayed_ref_node *update)
 {
-	struct btrfs_delayed_ref *existing_ref;
-	struct btrfs_delayed_ref *ref;
-
-	existing_ref = btrfs_delayed_node_to_ref(existing);
-	ref = btrfs_delayed_node_to_ref(update);
-
-	if (ref->pin)
-		existing_ref->pin = 1;
-
-	if (ref->action != existing_ref->action) {
+	if (update->action != existing->action) {
 		/*
 		 * this is effectively undoing either an add or a
 		 * drop.  We decrement the ref_mod, and if it goes
@@ -363,20 +448,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
 			delayed_refs->num_entries--;
 			if (trans->delayed_ref_updates)
 				trans->delayed_ref_updates--;
+		} else {
+			WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
+				existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 		}
 	} else {
-		if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
-			/* if we're adding refs, make sure all the
-			 * details match up.  The extent could
-			 * have been totally freed and reallocated
-			 * by a different owner before the delayed
-			 * ref entries were removed.
-			 */
-			existing_ref->owner_objectid = ref->owner_objectid;
-			existing_ref->generation = ref->generation;
-			existing_ref->root = ref->root;
-			existing->num_bytes = update->num_bytes;
-		}
+		WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
+			existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 		/*
 		 * the action on the existing ref matches
 		 * the action on the ref we're trying to add.
@@ -401,6 +479,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
 
 	existing_ref = btrfs_delayed_node_to_head(existing);
 	ref = btrfs_delayed_node_to_head(update);
+	BUG_ON(existing_ref->is_data != ref->is_data);
 
 	if (ref->must_insert_reserved) {
 		/* if the extent was freed and then
@@ -420,6 +499,24 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
 
 	}
 
+	if (ref->extent_op) {
+		if (!existing_ref->extent_op) {
+			existing_ref->extent_op = ref->extent_op;
+		} else {
+			if (ref->extent_op->update_key) {
+				memcpy(&existing_ref->extent_op->key,
+				       &ref->extent_op->key,
+				       sizeof(ref->extent_op->key));
+				existing_ref->extent_op->update_key = 1;
+			}
+			if (ref->extent_op->update_flags) {
+				existing_ref->extent_op->flags_to_set |=
+					ref->extent_op->flags_to_set;
+				existing_ref->extent_op->update_flags = 1;
+			}
+			kfree(ref->extent_op);
+		}
+	}
 	/*
 	 * update the reference mod on the head to reflect this new operation
 	 */
@@ -427,19 +524,16 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
 }
 
 /*
- * helper function to actually insert a delayed ref into the rbtree.
+ * helper function to actually insert a head node into the rbtree.
  * this does all the dirty work in terms of maintaining the correct
- * overall modification count in the head node and properly dealing
- * with updating existing nodes as new modifications are queued.
+ * overall modification count.
  */
-static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
-			  struct btrfs_delayed_ref_node *ref,
-			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid, int action,
-			  int pin)
+static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
+					struct btrfs_delayed_ref_node *ref,
+					u64 bytenr, u64 num_bytes,
+					int action, int is_data)
 {
 	struct btrfs_delayed_ref_node *existing;
-	struct btrfs_delayed_ref *full_ref;
 	struct btrfs_delayed_ref_head *head_ref = NULL;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int count_mod = 1;
@@ -449,12 +543,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
 	 * the head node stores the sum of all the mods, so dropping a ref
 	 * should drop the sum in the head node by one.
 	 */
-	if (parent == (u64)-1) {
-		if (action == BTRFS_DROP_DELAYED_REF)
-			count_mod = -1;
-		else if (action == BTRFS_UPDATE_DELAYED_HEAD)
-			count_mod = 0;
-	}
+	if (action == BTRFS_UPDATE_DELAYED_HEAD)
+		count_mod = 0;
+	else if (action == BTRFS_DROP_DELAYED_REF)
+		count_mod = -1;
 
 	/*
 	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,57 +559,148 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
 	 * Once we record must_insert_reserved, switch the action to
 	 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 	 */
-	if (action == BTRFS_ADD_DELAYED_EXTENT) {
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
 		must_insert_reserved = 1;
-		action = BTRFS_ADD_DELAYED_REF;
-	} else {
+	else
 		must_insert_reserved = 0;
-	}
-
 
 	delayed_refs = &trans->transaction->delayed_refs;
 
 	/* first set the basic ref node struct up */
 	atomic_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
-	ref->parent = parent;
+	ref->num_bytes = num_bytes;
 	ref->ref_mod = count_mod;
+	ref->type  = 0;
+	ref->action  = 0;
+	ref->is_head = 1;
 	ref->in_tree = 1;
+
+	head_ref = btrfs_delayed_node_to_head(ref);
+	head_ref->must_insert_reserved = must_insert_reserved;
+	head_ref->is_data = is_data;
+
+	INIT_LIST_HEAD(&head_ref->cluster);
+	mutex_init(&head_ref->mutex);
+
+	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
+
+	if (existing) {
+		update_existing_head_ref(existing, ref);
+		/*
+		 * we've updated the existing ref, free the newly
+		 * allocated ref
+		 */
+		kfree(ref);
+	} else {
+		delayed_refs->num_heads++;
+		delayed_refs->num_heads_ready++;
+		delayed_refs->num_entries++;
+		trans->delayed_ref_updates++;
+	}
+	return 0;
+}
+
+/*
+ * helper to insert a delayed tree ref into the rbtree.
+ */
+static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+					 struct btrfs_delayed_ref_node *ref,
+					 u64 bytenr, u64 num_bytes, u64 parent,
+					 u64 ref_root, int level, int action)
+{
+	struct btrfs_delayed_ref_node *existing;
+	struct btrfs_delayed_tree_ref *full_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
+		action = BTRFS_ADD_DELAYED_REF;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+
+	/* first set the basic ref node struct up */
+	atomic_set(&ref->refs, 1);
+	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
+	ref->ref_mod = 1;
+	ref->action = action;
+	ref->is_head = 0;
+	ref->in_tree = 1;
 
-	if (btrfs_delayed_ref_is_head(ref)) {
-		head_ref = btrfs_delayed_node_to_head(ref);
-		head_ref->must_insert_reserved = must_insert_reserved;
-		INIT_LIST_HEAD(&head_ref->cluster);
-		mutex_init(&head_ref->mutex);
+	full_ref = btrfs_delayed_node_to_tree_ref(ref);
+	if (parent) {
+		full_ref->parent = parent;
+		ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
 	} else {
-		full_ref = btrfs_delayed_node_to_ref(ref);
 		full_ref->root = ref_root;
-		full_ref->generation = ref_generation;
-		full_ref->owner_objectid = owner_objectid;
-		full_ref->pin = pin;
-		full_ref->action = action;
+		ref->type = BTRFS_TREE_BLOCK_REF_KEY;
 	}
+	full_ref->level = level;
 
-	existing = tree_insert(&delayed_refs->root, bytenr,
-			       parent, &ref->rb_node);
+	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
 	if (existing) {
-		if (btrfs_delayed_ref_is_head(ref))
-			update_existing_head_ref(existing, ref);
-		else
-			update_existing_ref(trans, delayed_refs, existing, ref);
+		update_existing_ref(trans, delayed_refs, existing, ref);
+		/*
+		 * we've updated the existing ref, free the newly
+		 * allocated ref
+		 */
+		kfree(ref);
+	} else {
+		delayed_refs->num_entries++;
+		trans->delayed_ref_updates++;
+	}
+	return 0;
+}
+
+/*
+ * helper to insert a delayed data ref into the rbtree.
+ */
+static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
+					 struct btrfs_delayed_ref_node *ref,
+					 u64 bytenr, u64 num_bytes, u64 parent,
+					 u64 ref_root, u64 owner, u64 offset,
+					 int action)
+{
+	struct btrfs_delayed_ref_node *existing;
+	struct btrfs_delayed_data_ref *full_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	if (action == BTRFS_ADD_DELAYED_EXTENT)
+		action = BTRFS_ADD_DELAYED_REF;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+
+	/* first set the basic ref node struct up */
+	atomic_set(&ref->refs, 1);
+	ref->bytenr = bytenr;
+	ref->num_bytes = num_bytes;
+	ref->ref_mod = 1;
+	ref->action = action;
+	ref->is_head = 0;
+	ref->in_tree = 1;
+
+	full_ref = btrfs_delayed_node_to_data_ref(ref);
+	if (parent) {
+		full_ref->parent = parent;
+		ref->type = BTRFS_SHARED_DATA_REF_KEY;
+	} else {
+		full_ref->root = ref_root;
+		ref->type = BTRFS_EXTENT_DATA_REF_KEY;
+	}
+	full_ref->objectid = owner;
+	full_ref->offset = offset;
 
+	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
+
+	if (existing) {
+		update_existing_ref(trans, delayed_refs, existing, ref);
 		/*
 		 * we've updated the existing ref, free the newly
 		 * allocated ref
 		 */
 		kfree(ref);
 	} else {
-		if (btrfs_delayed_ref_is_head(ref)) {
-			delayed_refs->num_heads++;
-			delayed_refs->num_heads_ready++;
-		}
 		delayed_refs->num_entries++;
 		trans->delayed_ref_updates++;
 	}
@@ -525,37 +708,78 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
 }
 
 /*
- * add a delayed ref to the tree.  This does all of the accounting required
+ * add a delayed tree ref.  This does all of the accounting required
  * to make sure the delayed ref is eventually processed before this
  * transaction commits.
  */
-int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
-			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid, int action,
-			  int pin)
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes, u64 parent,
+			       u64 ref_root,  int level, int action,
+			       struct btrfs_delayed_extent_op *extent_op)
 {
-	struct btrfs_delayed_ref *ref;
+	struct btrfs_delayed_tree_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int ret;
 
+	BUG_ON(extent_op && extent_op->is_data);
 	ref = kmalloc(sizeof(*ref), GFP_NOFS);
 	if (!ref)
 		return -ENOMEM;
 
+	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+	if (!head_ref) {
+		kfree(ref);
+		return -ENOMEM;
+	}
+
+	head_ref->extent_op = extent_op;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+
 	/*
-	 * the parent = 0 case comes from cases where we don't actually
-	 * know the parent yet.  It will get updated later via a add/drop
-	 * pair.
+	 * insert both the head node and the new ref without dropping
+	 * the spin lock
 	 */
-	if (parent == 0)
-		parent = bytenr;
+	ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
+				   action, 0);
+	BUG_ON(ret);
+
+	ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
+				   parent, ref_root, level, action);
+	BUG_ON(ret);
+	spin_unlock(&delayed_refs->lock);
+	return 0;
+}
+
+/*
+ * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
+ */
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes,
+			       u64 parent, u64 ref_root,
+			       u64 owner, u64 offset, int action,
+			       struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_delayed_data_ref *ref;
+	struct btrfs_delayed_ref_head *head_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	int ret;
+
+	BUG_ON(extent_op && !extent_op->is_data);
+	ref = kmalloc(sizeof(*ref), GFP_NOFS);
+	if (!ref)
+		return -ENOMEM;
 
 	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
 	if (!head_ref) {
 		kfree(ref);
 		return -ENOMEM;
 	}
+
+	head_ref->extent_op = extent_op;
+
 	delayed_refs = &trans->transaction->delayed_refs;
 	spin_lock(&delayed_refs->lock);
 
@@ -563,14 +787,39 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
 	 * insert both the head node and the new ref without dropping
 	 * the spin lock
 	 */
-	ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
-				      (u64)-1, 0, 0, 0, action, pin);
+	ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
+				   action, 1);
 	BUG_ON(ret);
 
-	ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
-				      parent, ref_root, ref_generation,
-				      owner_objectid, action, pin);
+	ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
+				   parent, ref_root, owner, offset, action);
+	BUG_ON(ret);
+	spin_unlock(&delayed_refs->lock);
+	return 0;
+}
+
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+				u64 bytenr, u64 num_bytes,
+				struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_delayed_ref_head *head_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	int ret;
+
+	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+	if (!head_ref)
+		return -ENOMEM;
+
+	head_ref->extent_op = extent_op;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+
+	ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
+				   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+				   extent_op->is_data);
 	BUG_ON(ret);
+
 	spin_unlock(&delayed_refs->lock);
 	return 0;
 }
@@ -587,7 +836,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
 	struct btrfs_delayed_ref_root *delayed_refs;
 
 	delayed_refs = &trans->transaction->delayed_refs;
-	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+	ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
 	if (ref)
 		return btrfs_delayed_node_to_head(ref);
 	return NULL;
@@ -603,6 +852,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
  *
  * It is the same as doing a ref add and delete in two separate calls.
  */
+#if 0
 int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
 			  u64 bytenr, u64 num_bytes, u64 orig_parent,
 			  u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -666,3 +916,4 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
 	spin_unlock(&delayed_refs->lock);
 	return 0;
 }
+#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 3bec2ff..f6fc67d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -30,9 +30,6 @@ struct btrfs_delayed_ref_node {
 	/* the starting bytenr of the extent */
 	u64 bytenr;
 
-	/* the parent our backref will point to */
-	u64 parent;
-
 	/* the size of the extent */
 	u64 num_bytes;
 
@@ -50,10 +47,21 @@ struct btrfs_delayed_ref_node {
 	 */
 	int ref_mod;
 
+	unsigned int action:8;
+	unsigned int type:8;
 	/* is this node still in the rbtree? */
+	unsigned int is_head:1;
 	unsigned int in_tree:1;
 };
 
+struct btrfs_delayed_extent_op {
+	struct btrfs_disk_key key;
+	u64 flags_to_set;
+	unsigned int update_key:1;
+	unsigned int update_flags:1;
+	unsigned int is_data:1;
+};
+
 /*
  * the head refs are used to hold a lock on a given extent, which allows us
  * to make sure that only one process is running the delayed refs
@@ -71,6 +79,7 @@ struct btrfs_delayed_ref_head {
 
 	struct list_head cluster;
 
+	struct btrfs_delayed_extent_op *extent_op;
 	/*
 	 * when a new extent is allocated, it is just reserved in memory
 	 * The actual extent isn't inserted into the extent allocation tree
@@ -84,27 +93,26 @@ struct btrfs_delayed_ref_head {
 	 * the free has happened.
 	 */
 	unsigned int must_insert_reserved:1;
+	unsigned int is_data:1;
 };
 
-struct btrfs_delayed_ref {
+struct btrfs_delayed_tree_ref {
 	struct btrfs_delayed_ref_node node;
+	union {
+		u64 root;
+		u64 parent;
+	};
+	int level;
+};
 
-	/* the root objectid our ref will point to */
-	u64 root;
-
-	/* the generation for the backref */
-	u64 generation;
-
-	/* owner_objectid of the backref  */
-	u64 owner_objectid;
-
-	/* operation done by this entry in the rbtree */
-	u8 action;
-
-	/* if pin == 1, when the extent is freed it will be pinned until
-	 * transaction commit
-	 */
-	unsigned int pin:1;
+struct btrfs_delayed_data_ref {
+	struct btrfs_delayed_ref_node node;
+	union {
+		u64 root;
+		u64 parent;
+	};
+	u64 objectid;
+	u64 offset;
 };
 
 struct btrfs_delayed_ref_root {
@@ -143,17 +151,25 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
 	}
 }
 
-int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
-			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid, int action,
-			  int pin);
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes, u64 parent,
+			       u64 ref_root, int level, int action,
+			       struct btrfs_delayed_extent_op *extent_op);
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+			       u64 bytenr, u64 num_bytes,
+			       u64 parent, u64 ref_root,
+			       u64 owner, u64 offset, int action,
+			       struct btrfs_delayed_extent_op *extent_op);
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+				u64 bytenr, u64 num_bytes,
+				struct btrfs_delayed_extent_op *extent_op);
 
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
 int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr,
-			    u64 num_bytes, u32 *refs);
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root, u64 bytenr,
+			     u64 num_bytes, u64 *refs, u64 *flags);
 int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
 			  u64 bytenr, u64 num_bytes, u64 orig_parent,
 			  u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -169,18 +185,24 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
  */
 static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
 {
-	return node->parent == (u64)-1;
+	return node->is_head;
 }
 
 /*
  * helper functions to cast a node into its container
  */
-static inline struct btrfs_delayed_ref *
-btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node)
+static inline struct btrfs_delayed_tree_ref *
+btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
 {
 	WARN_ON(btrfs_delayed_ref_is_head(node));
-	return container_of(node, struct btrfs_delayed_ref, node);
+	return container_of(node, struct btrfs_delayed_tree_ref, node);
+}
 
+static inline struct btrfs_delayed_data_ref *
+btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
+{
+	WARN_ON(btrfs_delayed_ref_is_head(node));
+	return container_of(node, struct btrfs_delayed_data_ref, node);
 }
 
 static inline struct btrfs_delayed_ref_head *
@@ -188,6 +210,5 @@ btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
 {
 	WARN_ON(!btrfs_delayed_ref_is_head(node));
 	return container_of(node, struct btrfs_delayed_ref_head, node);
-
 }
 #endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b0ea0b..7f5c6e3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -36,7 +36,6 @@
 #include "print-tree.h"
 #include "async-thread.h"
 #include "locking.h"
-#include "ref-cache.h"
 #include "tree-log.h"
 #include "free-space-cache.h"
 
@@ -884,7 +883,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 {
 	root->node = NULL;
 	root->commit_root = NULL;
-	root->ref_tree = NULL;
 	root->sectorsize = sectorsize;
 	root->nodesize = nodesize;
 	root->leafsize = leafsize;
@@ -899,12 +897,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	root->last_inode_alloc = 0;
 	root->name = NULL;
 	root->in_sysfs = 0;
+	root->inode_tree.rb_node = NULL;
 
 	INIT_LIST_HEAD(&root->dirty_list);
 	INIT_LIST_HEAD(&root->orphan_list);
-	INIT_LIST_HEAD(&root->dead_list);
+	INIT_LIST_HEAD(&root->root_list);
 	spin_lock_init(&root->node_lock);
 	spin_lock_init(&root->list_lock);
+	spin_lock_init(&root->inode_lock);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->log_mutex);
 	init_waitqueue_head(&root->log_writer_wait);
@@ -918,9 +918,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	extent_io_tree_init(&root->dirty_log_pages,
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 
-	btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
-	root->ref_tree = &root->ref_tree_struct;
-
 	memset(&root->root_key, 0, sizeof(root->root_key));
 	memset(&root->root_item, 0, sizeof(root->root_item));
 	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -959,6 +956,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
+	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node);
 	return 0;
 }
@@ -1025,20 +1023,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
 	 */
 	root->ref_cows = 0;
 
-	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
-				      0, BTRFS_TREE_LOG_OBJECTID,
-				      trans->transid, 0, 0, 0);
+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
+				      BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
 	if (IS_ERR(leaf)) {
 		kfree(root);
 		return ERR_CAST(leaf);
 	}
 
+	memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
+	btrfs_set_header_bytenr(leaf, leaf->start);
+	btrfs_set_header_generation(leaf, trans->transid);
+	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
+	btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
 	root->node = leaf;
-	btrfs_set_header_nritems(root->node, 0);
-	btrfs_set_header_level(root->node, 0);
-	btrfs_set_header_bytenr(root->node, root->node->start);
-	btrfs_set_header_generation(root->node, trans->transid);
-	btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
 
 	write_extent_buffer(root->node, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(root->node),
@@ -1081,8 +1078,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
 	inode_item->nbytes = cpu_to_le64(root->leafsize);
 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
 
-	btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
-	btrfs_set_root_generation(&log_root->root_item, trans->transid);
+	btrfs_set_root_node(&log_root->root_item, log_root->node);
 
 	WARN_ON(root->log_root);
 	root->log_root = log_root;
@@ -1144,6 +1140,7 @@ out:
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
+	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node);
 insert:
 	if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -1210,7 +1207,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
 	}
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
 		ret = btrfs_find_dead_roots(fs_info->tree_root,
-					    root->root_key.objectid, root);
+					    root->root_key.objectid);
 		BUG_ON(ret);
 		btrfs_orphan_cleanup(root);
 	}
@@ -1569,8 +1566,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	atomic_set(&fs_info->async_delalloc_pages, 0);
 	atomic_set(&fs_info->async_submit_draining, 0);
 	atomic_set(&fs_info->nr_async_bios, 0);
-	atomic_set(&fs_info->throttles, 0);
-	atomic_set(&fs_info->throttle_gen, 0);
 	fs_info->sb = sb;
 	fs_info->max_extent = (u64)-1;
 	fs_info->max_inline = 8192 * 1024;
@@ -1598,6 +1593,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
 	fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
 
+	RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
 	extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
 			     fs_info->btree_inode->i_mapping,
 			     GFP_NOFS);
@@ -1613,10 +1609,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 	fs_info->do_barriers = 1;
 
-	INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
-	btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
-	btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
-
 	BTRFS_I(fs_info->btree_inode)->root = tree_root;
 	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
 	       sizeof(struct btrfs_key));
@@ -1674,6 +1666,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 		goto fail_iput;
 	}
 
+	features = btrfs_super_incompat_flags(disk_super);
+	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+		btrfs_set_super_incompat_flags(disk_super, features);
+	}
+
 	features = btrfs_super_compat_ro_flags(disk_super) &
 		~BTRFS_FEATURE_COMPAT_RO_SUPP;
 	if (!(sb->s_flags & MS_RDONLY) && features) {
@@ -1771,7 +1769,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	if (ret) {
 		printk(KERN_WARNING "btrfs: failed to read the system "
 		       "array on %s\n", sb->s_id);
-		goto fail_sys_array;
+		goto fail_sb_buffer;
 	}
 
 	blocksize = btrfs_level_size(tree_root,
@@ -1785,6 +1783,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 					   btrfs_super_chunk_root(disk_super),
 					   blocksize, generation);
 	BUG_ON(!chunk_root->node);
+	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
+	chunk_root->commit_root = btrfs_root_node(chunk_root);
 
 	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
 	   (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -1810,7 +1810,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 					  blocksize, generation);
 	if (!tree_root->node)
 		goto fail_chunk_root;
-
+	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
+	tree_root->commit_root = btrfs_root_node(tree_root);
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
@@ -1820,14 +1821,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_DEV_TREE_OBJECTID, dev_root);
-	dev_root->track_dirty = 1;
 	if (ret)
 		goto fail_extent_root;
+	dev_root->track_dirty = 1;
 
 	ret = find_and_setup_root(tree_root, fs_info,
 				  BTRFS_CSUM_TREE_OBJECTID, csum_root);
 	if (ret)
-		goto fail_extent_root;
+		goto fail_dev_root;
 
 	csum_root->track_dirty = 1;
 
@@ -1881,7 +1882,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	}
 
 	if (!(sb->s_flags & MS_RDONLY)) {
-		ret = btrfs_cleanup_reloc_trees(tree_root);
+		ret = btrfs_recover_relocation(tree_root);
 		BUG_ON(ret);
 	}
 
@@ -1908,14 +1909,19 @@ fail_cleaner:
 
 fail_csum_root:
 	free_extent_buffer(csum_root->node);
+	free_extent_buffer(csum_root->commit_root);
+fail_dev_root:
+	free_extent_buffer(dev_root->node);
+	free_extent_buffer(dev_root->commit_root);
 fail_extent_root:
 	free_extent_buffer(extent_root->node);
+	free_extent_buffer(extent_root->commit_root);
 fail_tree_root:
 	free_extent_buffer(tree_root->node);
+	free_extent_buffer(tree_root->commit_root);
 fail_chunk_root:
 	free_extent_buffer(chunk_root->node);
-fail_sys_array:
-	free_extent_buffer(dev_root->node);
+	free_extent_buffer(chunk_root->commit_root);
 fail_sb_buffer:
 	btrfs_stop_workers(&fs_info->fixup_workers);
 	btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -2173,6 +2179,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
 
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 {
+	WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
 	radix_tree_delete(&fs_info->fs_roots_radix,
 			  (unsigned long)root->root_key.objectid);
 	if (root->anon_super.s_dev) {
@@ -2219,10 +2226,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
 					     ARRAY_SIZE(gang));
 		if (!ret)
 			break;
+
+		root_objectid = gang[ret - 1]->root_key.objectid + 1;
 		for (i = 0; i < ret; i++) {
 			root_objectid = gang[i]->root_key.objectid;
 			ret = btrfs_find_dead_roots(fs_info->tree_root,
-						    root_objectid, gang[i]);
+						    root_objectid);
 			BUG_ON(ret);
 			btrfs_orphan_cleanup(gang[i]);
 		}
@@ -2278,20 +2287,16 @@ int close_ctree(struct btrfs_root *root)
 		       (unsigned long long)fs_info->total_ref_cache_size);
 	}
 
-	if (fs_info->extent_root->node)
-		free_extent_buffer(fs_info->extent_root->node);
-
-	if (fs_info->tree_root->node)
-		free_extent_buffer(fs_info->tree_root->node);
-
-	if (root->fs_info->chunk_root->node)
-		free_extent_buffer(root->fs_info->chunk_root->node);
-
-	if (root->fs_info->dev_root->node)
-		free_extent_buffer(root->fs_info->dev_root->node);
-
-	if (root->fs_info->csum_root->node)
-		free_extent_buffer(root->fs_info->csum_root->node);
+	free_extent_buffer(fs_info->extent_root->node);
+	free_extent_buffer(fs_info->extent_root->commit_root);
+	free_extent_buffer(fs_info->tree_root->node);
+	free_extent_buffer(fs_info->tree_root->commit_root);
+	free_extent_buffer(root->fs_info->chunk_root->node);
+	free_extent_buffer(root->fs_info->chunk_root->commit_root);
+	free_extent_buffer(root->fs_info->dev_root->node);
+	free_extent_buffer(root->fs_info->dev_root->commit_root);
+	free_extent_buffer(root->fs_info->csum_root->node);
+	free_extent_buffer(root->fs_info->csum_root->commit_root);
 
 	btrfs_free_block_groups(root->fs_info);
 
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 85315d2..9596b40 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -78,7 +78,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 	key.offset = 0;
 
-	inode = btrfs_iget(sb, &key, root, NULL);
+	inode = btrfs_iget(sb, &key, root);
 	if (IS_ERR(inode))
 		return (void *)inode;
 
@@ -192,7 +192,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 	key.offset = 0;
 
-	return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
+	return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
 }
 
 const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 35af933..a42419c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -30,43 +30,33 @@
 #include "transaction.h"
 #include "volumes.h"
 #include "locking.h"
-#include "ref-cache.h"
 #include "free-space-cache.h"
 
-#define PENDING_EXTENT_INSERT 0
-#define PENDING_EXTENT_DELETE 1
-#define PENDING_BACKREF_UPDATE 2
-
-struct pending_extent_op {
-	int type;
-	u64 bytenr;
-	u64 num_bytes;
-	u64 parent;
-	u64 orig_parent;
-	u64 generation;
-	u64 orig_generation;
-	int level;
-	struct list_head list;
-	int del;
-};
-
-static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-					 struct btrfs_root *root, u64 parent,
-					 u64 root_objectid, u64 ref_generation,
-					 u64 owner, struct btrfs_key *ins,
-					 int ref_mod);
 static int update_reserved_extents(struct btrfs_root *root,
 				   u64 bytenr, u64 num, int reserve);
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc,
 			      int mark_free);
-static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					u64 bytenr, u64 num_bytes, u64 parent,
-					u64 root_objectid, u64 ref_generation,
-					u64 owner_objectid, int pin,
-					int ref_to_drop);
+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 parent,
+				u64 root_objectid, u64 owner_objectid,
+				u64 owner_offset, int refs_to_drop,
+				struct btrfs_delayed_extent_op *extra_op);
+static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
+				    struct extent_buffer *leaf,
+				    struct btrfs_extent_item *ei);
+static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root,
+				      u64 parent, u64 root_objectid,
+				      u64 flags, u64 owner, u64 offset,
+				      struct btrfs_key *ins, int ref_mod);
+static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 parent, u64 root_objectid,
+				     u64 flags, struct btrfs_disk_key *key,
+				     int level, struct btrfs_key *ins);
 
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -453,348 +443,1078 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
  *    maintenance.  This is actually the same as #2, but with a slightly
  *    different use case.
  *
+ * There are two kinds of back refs. The implicit back refs is optimized
+ * for pointers in non-shared tree blocks. For a given pointer in a block,
+ * back refs of this kind provide information about the block's owner tree
+ * and the pointer's key. These information allow us to find the block by
+ * b-tree searching. The full back refs is for pointers in tree blocks not
+ * referenced by their owner trees. The location of tree block is recorded
+ * in the back refs. Actually the full back refs is generic, and can be
+ * used in all cases the implicit back refs is used. The major shortcoming
+ * of the full back refs is its overhead. Every time a tree block gets
+ * COWed, we have to update back refs entry for all pointers in it.
+ *
+ * For a newly allocated tree block, we use implicit back refs for
+ * pointers in it. This means most tree related operations only involve
+ * implicit back refs. For a tree block created in old transaction, the
+ * only way to drop a reference to it is COW it. So we can detect the
+ * event that tree block loses its owner tree's reference and do the
+ * back refs conversion.
+ *
+ * When a tree block is COW'd through a tree, there are four cases:
+ *
+ * The reference count of the block is one and the tree is the block's
+ * owner tree. Nothing to do in this case.
+ *
+ * The reference count of the block is one and the tree is not the
+ * block's owner tree. In this case, full back refs is used for pointers
+ * in the block. Remove these full back refs, add implicit back refs for
+ * every pointers in the new block.
+ *
+ * The reference count of the block is greater than one and the tree is
+ * the block's owner tree. In this case, implicit back refs is used for
+ * pointers in the block. Add full back refs for every pointers in the
+ * block, increase lower level extents' reference counts. The original
+ * implicit back refs are entailed to the new block.
+ *
+ * The reference count of the block is greater than one and the tree is
+ * not the block's owner tree. Add implicit back refs for every pointer in
+ * the new block, increase lower level extents' reference count.
+ *
+ * Back Reference Key composing:
+ *
+ * The key objectid corresponds to the first byte in the extent,
+ * The key type is used to differentiate between types of back refs.
+ * There are different meanings of the key offset for different types
+ * of back refs.
+ *
  * File extents can be referenced by:
  *
  * - multiple snapshots, subvolumes, or different generations in one subvol
  * - different files inside a single subvolume
  * - different offsets inside a file (bookend extents in file.c)
  *
- * The extent ref structure has fields for:
+ * The extent ref structure for the implicit back refs has fields for:
  *
  * - Objectid of the subvolume root
- * - Generation number of the tree holding the reference
  * - objectid of the file holding the reference
- * - number of references holding by parent node (alway 1 for tree blocks)
- *
- * Btree leaf may hold multiple references to a file extent. In most cases,
- * these references are from same file and the corresponding offsets inside
- * the file are close together.
- *
- * When a file extent is allocated the fields are filled in:
- *     (root_key.objectid, trans->transid, inode objectid, 1)
+ * - original offset in the file
+ * - how many bookend extents
  *
- * When a leaf is cow'd new references are added for every file extent found
- * in the leaf.  It looks similar to the create case, but trans->transid will
- * be different when the block is cow'd.
+ * The key offset for the implicit back refs is hash of the first
+ * three fields.
  *
- *     (root_key.objectid, trans->transid, inode objectid,
- *      number of references in the leaf)
+ * The extent ref structure for the full back refs has field for:
  *
- * When a file extent is removed either during snapshot deletion or
- * file truncation, we find the corresponding back reference and check
- * the following fields:
+ * - number of pointers in the tree leaf
  *
- *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
- *      inode objectid)
+ * The key offset for the implicit back refs is the first byte of
+ * the tree leaf
  *
- * Btree extents can be referenced by:
- *
- * - Different subvolumes
- * - Different generations of the same subvolume
- *
- * When a tree block is created, back references are inserted:
+ * When a file extent is allocated, The implicit back refs is used.
+ * the fields are filled in:
  *
- * (root->root_key.objectid, trans->transid, level, 1)
+ *     (root_key.objectid, inode objectid, offset in file, 1)
  *
- * When a tree block is cow'd, new back references are added for all the
- * blocks it points to. If the tree block isn't in reference counted root,
- * the old back references are removed. These new back references are of
- * the form (trans->transid will have increased since creation):
+ * When a file extent is removed file truncation, we find the
+ * corresponding implicit back refs and check the following fields:
  *
- * (root->root_key.objectid, trans->transid, level, 1)
+ *     (btrfs_header_owner(leaf), inode objectid, offset in file)
  *
- * When a backref is in deleting, the following fields are checked:
+ * Btree extents can be referenced by:
  *
- * if backref was for a tree root:
- *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
- * else
- *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
+ * - Different subvolumes
  *
- * Back Reference Key composing:
+ * Both the implicit back refs and the full back refs for tree blocks
+ * only consist of key. The key offset for the implicit back refs is
+ * objectid of block's owner tree. The key offset for the full back refs
+ * is the first byte of parent block.
  *
- * The key objectid corresponds to the first byte in the extent, the key
- * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
- * byte of parent extent. If a extent is tree root, the key offset is set
- * to the key objectid.
+ * When implicit back refs is used, information about the lowest key and
+ * level of the tree block are required. These information are stored in
+ * tree block info structure.
  */
 
-static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  u64 bytenr, u64 parent,
-					  u64 ref_root, u64 ref_generation,
-					  u64 owner_objectid, int del)
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  struct btrfs_path *path,
+				  u64 owner, u32 extra_size)
 {
+	struct btrfs_extent_item *item;
+	struct btrfs_extent_item_v0 *ei0;
+	struct btrfs_extent_ref_v0 *ref0;
+	struct btrfs_tree_block_info *bi;
+	struct extent_buffer *leaf;
 	struct btrfs_key key;
-	struct btrfs_extent_ref *ref;
+	struct btrfs_key found_key;
+	u32 new_size = sizeof(*item);
+	u64 refs;
+	int ret;
+
+	leaf = path->nodes[0];
+	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
+
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+	ei0 = btrfs_item_ptr(leaf, path->slots[0],
+			     struct btrfs_extent_item_v0);
+	refs = btrfs_extent_refs_v0(leaf, ei0);
+
+	if (owner == (u64)-1) {
+		while (1) {
+			if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+				ret = btrfs_next_leaf(root, path);
+				if (ret < 0)
+					return ret;
+				BUG_ON(ret > 0);
+				leaf = path->nodes[0];
+			}
+			btrfs_item_key_to_cpu(leaf, &found_key,
+					      path->slots[0]);
+			BUG_ON(key.objectid != found_key.objectid);
+			if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
+				path->slots[0]++;
+				continue;
+			}
+			ref0 = btrfs_item_ptr(leaf, path->slots[0],
+					      struct btrfs_extent_ref_v0);
+			owner = btrfs_ref_objectid_v0(leaf, ref0);
+			break;
+		}
+	}
+	btrfs_release_path(root, path);
+
+	if (owner < BTRFS_FIRST_FREE_OBJECTID)
+		new_size += sizeof(*bi);
+
+	new_size -= sizeof(*ei0);
+	ret = btrfs_search_slot(trans, root, &key, path,
+				new_size + extra_size, 1);
+	if (ret < 0)
+		return ret;
+	BUG_ON(ret);
+
+	ret = btrfs_extend_item(trans, root, path, new_size);
+	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	btrfs_set_extent_refs(leaf, item, refs);
+	/* FIXME: get real generation */
+	btrfs_set_extent_generation(leaf, item, 0);
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		btrfs_set_extent_flags(leaf, item,
+				       BTRFS_EXTENT_FLAG_TREE_BLOCK |
+				       BTRFS_BLOCK_FLAG_FULL_BACKREF);
+		bi = (struct btrfs_tree_block_info *)(item + 1);
+		/* FIXME: get first key of the block */
+		memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
+		btrfs_set_tree_block_level(leaf, bi, (int)owner);
+	} else {
+		btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+#endif
+
+static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
+{
+	u32 high_crc = ~(u32)0;
+	u32 low_crc = ~(u32)0;
+	__le64 lenum;
+
+	lenum = cpu_to_le64(root_objectid);
+	high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
+	lenum = cpu_to_le64(owner);
+	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+	lenum = cpu_to_le64(offset);
+	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+
+	return ((u64)high_crc << 31) ^ (u64)low_crc;
+}
+
+static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
+				     struct btrfs_extent_data_ref *ref)
+{
+	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
+				    btrfs_extent_data_ref_objectid(leaf, ref),
+				    btrfs_extent_data_ref_offset(leaf, ref));
+}
+
+static int match_extent_data_ref(struct extent_buffer *leaf,
+				 struct btrfs_extent_data_ref *ref,
+				 u64 root_objectid, u64 owner, u64 offset)
+{
+	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
+	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
+	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
+		return 0;
+	return 1;
+}
+
+static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
+					   struct btrfs_root *root,
+					   struct btrfs_path *path,
+					   u64 bytenr, u64 parent,
+					   u64 root_objectid,
+					   u64 owner, u64 offset)
+{
+	struct btrfs_key key;
+	struct btrfs_extent_data_ref *ref;
 	struct extent_buffer *leaf;
-	u64 ref_objectid;
+	u32 nritems;
 	int ret;
+	int recow;
+	int err = -ENOENT;
 
 	key.objectid = bytenr;
-	key.type = BTRFS_EXTENT_REF_KEY;
-	key.offset = parent;
+	if (parent) {
+		key.type = BTRFS_SHARED_DATA_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_EXTENT_DATA_REF_KEY;
+		key.offset = hash_extent_data_ref(root_objectid,
+						  owner, offset);
+	}
+again:
+	recow = 0;
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret < 0) {
+		err = ret;
+		goto fail;
+	}
 
-	ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
-	if (ret < 0)
-		goto out;
-	if (ret > 0) {
-		ret = -ENOENT;
-		goto out;
+	if (parent) {
+		if (!ret)
+			return 0;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		key.type = BTRFS_EXTENT_REF_V0_KEY;
+		btrfs_release_path(root, path);
+		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+		if (ret < 0) {
+			err = ret;
+			goto fail;
+		}
+		if (!ret)
+			return 0;
+#endif
+		goto fail;
 	}
 
 	leaf = path->nodes[0];
-	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-	ref_objectid = btrfs_ref_objectid(leaf, ref);
-	if (btrfs_ref_root(leaf, ref) != ref_root ||
-	    btrfs_ref_generation(leaf, ref) != ref_generation ||
-	    (ref_objectid != owner_objectid &&
-	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
-		ret = -EIO;
-		WARN_ON(1);
-		goto out;
+	nritems = btrfs_header_nritems(leaf);
+	while (1) {
+		if (path->slots[0] >= nritems) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				err = ret;
+			if (ret)
+				goto fail;
+
+			leaf = path->nodes[0];
+			nritems = btrfs_header_nritems(leaf);
+			recow = 1;
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid != bytenr ||
+		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
+			goto fail;
+
+		ref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_extent_data_ref);
+
+		if (match_extent_data_ref(leaf, ref, root_objectid,
+					  owner, offset)) {
+			if (recow) {
+				btrfs_release_path(root, path);
+				goto again;
+			}
+			err = 0;
+			break;
+		}
+		path->slots[0]++;
 	}
-	ret = 0;
-out:
-	return ret;
+fail:
+	return err;
 }
 
-static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  u64 bytenr, u64 parent,
-					  u64 ref_root, u64 ref_generation,
-					  u64 owner_objectid,
-					  int refs_to_add)
+static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
+					   struct btrfs_root *root,
+					   struct btrfs_path *path,
+					   u64 bytenr, u64 parent,
+					   u64 root_objectid, u64 owner,
+					   u64 offset, int refs_to_add)
 {
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
-	struct btrfs_extent_ref *ref;
+	u32 size;
 	u32 num_refs;
 	int ret;
 
 	key.objectid = bytenr;
-	key.type = BTRFS_EXTENT_REF_KEY;
-	key.offset = parent;
+	if (parent) {
+		key.type = BTRFS_SHARED_DATA_REF_KEY;
+		key.offset = parent;
+		size = sizeof(struct btrfs_shared_data_ref);
+	} else {
+		key.type = BTRFS_EXTENT_DATA_REF_KEY;
+		key.offset = hash_extent_data_ref(root_objectid,
+						  owner, offset);
+		size = sizeof(struct btrfs_extent_data_ref);
+	}
 
-	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
-	if (ret == 0) {
-		leaf = path->nodes[0];
-		ref = btrfs_item_ptr(leaf, path->slots[0],
-				     struct btrfs_extent_ref);
-		btrfs_set_ref_root(leaf, ref, ref_root);
-		btrfs_set_ref_generation(leaf, ref, ref_generation);
-		btrfs_set_ref_objectid(leaf, ref, owner_objectid);
-		btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
-	} else if (ret == -EEXIST) {
-		u64 existing_owner;
-
-		BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
-		leaf = path->nodes[0];
+	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
+	if (ret && ret != -EEXIST)
+		goto fail;
+
+	leaf = path->nodes[0];
+	if (parent) {
+		struct btrfs_shared_data_ref *ref;
 		ref = btrfs_item_ptr(leaf, path->slots[0],
-				     struct btrfs_extent_ref);
-		if (btrfs_ref_root(leaf, ref) != ref_root ||
-		    btrfs_ref_generation(leaf, ref) != ref_generation) {
-			ret = -EIO;
-			WARN_ON(1);
-			goto out;
+				     struct btrfs_shared_data_ref);
+		if (ret == 0) {
+			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
+		} else {
+			num_refs = btrfs_shared_data_ref_count(leaf, ref);
+			num_refs += refs_to_add;
+			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
 		}
+	} else {
+		struct btrfs_extent_data_ref *ref;
+		while (ret == -EEXIST) {
+			ref = btrfs_item_ptr(leaf, path->slots[0],
+					     struct btrfs_extent_data_ref);
+			if (match_extent_data_ref(leaf, ref, root_objectid,
+						  owner, offset))
+				break;
+			btrfs_release_path(root, path);
+			key.offset++;
+			ret = btrfs_insert_empty_item(trans, root, path, &key,
+						      size);
+			if (ret && ret != -EEXIST)
+				goto fail;
 
-		num_refs = btrfs_ref_num_refs(leaf, ref);
-		BUG_ON(num_refs == 0);
-		btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
-
-		existing_owner = btrfs_ref_objectid(leaf, ref);
-		if (existing_owner != owner_objectid &&
-		    existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
-			btrfs_set_ref_objectid(leaf, ref,
-					BTRFS_MULTIPLE_OBJECTIDS);
+			leaf = path->nodes[0];
+		}
+		ref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_extent_data_ref);
+		if (ret == 0) {
+			btrfs_set_extent_data_ref_root(leaf, ref,
+						       root_objectid);
+			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
+			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
+			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
+		} else {
+			num_refs = btrfs_extent_data_ref_count(leaf, ref);
+			num_refs += refs_to_add;
+			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
 		}
-		ret = 0;
-	} else {
-		goto out;
 	}
-	btrfs_unlock_up_safe(path, 1);
-	btrfs_mark_buffer_dirty(path->nodes[0]);
-out:
+	btrfs_mark_buffer_dirty(leaf);
+	ret = 0;
+fail:
 	btrfs_release_path(root, path);
 	return ret;
 }
 
-static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root,
-					  struct btrfs_path *path,
-					  int refs_to_drop)
+static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
+					   struct btrfs_root *root,
+					   struct btrfs_path *path,
+					   int refs_to_drop)
 {
+	struct btrfs_key key;
+	struct btrfs_extent_data_ref *ref1 = NULL;
+	struct btrfs_shared_data_ref *ref2 = NULL;
 	struct extent_buffer *leaf;
-	struct btrfs_extent_ref *ref;
-	u32 num_refs;
+	u32 num_refs = 0;
 	int ret = 0;
 
 	leaf = path->nodes[0];
-	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-	num_refs = btrfs_ref_num_refs(leaf, ref);
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+		ref1 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_data_ref);
+		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+		ref2 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_shared_data_ref);
+		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		struct btrfs_extent_ref_v0 *ref0;
+		ref0 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_ref_v0);
+		num_refs = btrfs_ref_count_v0(leaf, ref0);
+#endif
+	} else {
+		BUG();
+	}
+
 	BUG_ON(num_refs < refs_to_drop);
 	num_refs -= refs_to_drop;
+
 	if (num_refs == 0) {
 		ret = btrfs_del_item(trans, root, path);
 	} else {
-		btrfs_set_ref_num_refs(leaf, ref, num_refs);
+		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
+			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
+		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
+			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		else {
+			struct btrfs_extent_ref_v0 *ref0;
+			ref0 = btrfs_item_ptr(leaf, path->slots[0],
+					struct btrfs_extent_ref_v0);
+			btrfs_set_ref_count_v0(leaf, ref0, num_refs);
+		}
+#endif
 		btrfs_mark_buffer_dirty(leaf);
 	}
-	btrfs_release_path(root, path);
 	return ret;
 }
 
-#ifdef BIO_RW_DISCARD
-static void btrfs_issue_discard(struct block_device *bdev,
-				u64 start, u64 len)
+static noinline u32 extent_data_ref_count(struct btrfs_root *root,
+					  struct btrfs_path *path,
+					  struct btrfs_extent_inline_ref *iref)
 {
-	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
-}
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_data_ref *ref1;
+	struct btrfs_shared_data_ref *ref2;
+	u32 num_refs = 0;
+
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+	if (iref) {
+		if (btrfs_extent_inline_ref_type(leaf, iref) ==
+		    BTRFS_EXTENT_DATA_REF_KEY) {
+			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
+			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+		} else {
+			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
+			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+		}
+	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+		ref1 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_data_ref);
+		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
+	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+		ref2 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_shared_data_ref);
+		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		struct btrfs_extent_ref_v0 *ref0;
+		ref0 = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_extent_ref_v0);
+		num_refs = btrfs_ref_count_v0(leaf, ref0);
 #endif
+	} else {
+		WARN_ON(1);
+	}
+	return num_refs;
+}
 
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
-				u64 num_bytes)
+static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
+					  struct btrfs_root *root,
+					  struct btrfs_path *path,
+					  u64 bytenr, u64 parent,
+					  u64 root_objectid)
 {
-#ifdef BIO_RW_DISCARD
+	struct btrfs_key key;
 	int ret;
-	u64 map_length = num_bytes;
-	struct btrfs_multi_bio *multi = NULL;
-
-	/* Tell the block device(s) that the sectors can be discarded */
-	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
-			      bytenr, &map_length, &multi, 0);
-	if (!ret) {
-		struct btrfs_bio_stripe *stripe = multi->stripes;
-		int i;
-
-		if (map_length > num_bytes)
-			map_length = num_bytes;
 
-		for (i = 0; i < multi->num_stripes; i++, stripe++) {
-			btrfs_issue_discard(stripe->dev->bdev,
-					    stripe->physical,
-					    map_length);
-		}
-		kfree(multi);
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_TREE_BLOCK_REF_KEY;
+		key.offset = root_objectid;
 	}
 
-	return ret;
-#else
-	return 0;
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret > 0)
+		ret = -ENOENT;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (ret == -ENOENT && parent) {
+		btrfs_release_path(root, path);
+		key.type = BTRFS_EXTENT_REF_V0_KEY;
+		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+		if (ret > 0)
+			ret = -ENOENT;
+	}
 #endif
+	return ret;
 }
 
-static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-				     struct btrfs_root *root, u64 bytenr,
-				     u64 num_bytes,
-				     u64 orig_parent, u64 parent,
-				     u64 orig_root, u64 ref_root,
-				     u64 orig_generation, u64 ref_generation,
-				     u64 owner_objectid)
+static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
+					  struct btrfs_root *root,
+					  struct btrfs_path *path,
+					  u64 bytenr, u64 parent,
+					  u64 root_objectid)
 {
+	struct btrfs_key key;
 	int ret;
-	int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
 
-	ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
-				       orig_parent, parent, orig_root,
-				       ref_root, orig_generation,
-				       ref_generation, owner_objectid, pin);
-	BUG_ON(ret);
+	key.objectid = bytenr;
+	if (parent) {
+		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
+		key.offset = parent;
+	} else {
+		key.type = BTRFS_TREE_BLOCK_REF_KEY;
+		key.offset = root_objectid;
+	}
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+	btrfs_release_path(root, path);
 	return ret;
 }
 
-int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root, u64 bytenr,
-			    u64 num_bytes, u64 orig_parent, u64 parent,
-			    u64 ref_root, u64 ref_generation,
-			    u64 owner_objectid)
+static inline int extent_ref_type(u64 parent, u64 owner)
 {
-	int ret;
-	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
-	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
-		return 0;
-
-	ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
-					orig_parent, parent, ref_root,
-					ref_root, ref_generation,
-					ref_generation, owner_objectid);
-	return ret;
+	int type;
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		if (parent > 0)
+			type = BTRFS_SHARED_BLOCK_REF_KEY;
+		else
+			type = BTRFS_TREE_BLOCK_REF_KEY;
+	} else {
+		if (parent > 0)
+			type = BTRFS_SHARED_DATA_REF_KEY;
+		else
+			type = BTRFS_EXTENT_DATA_REF_KEY;
+	}
+	return type;
 }
-static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-				  struct btrfs_root *root, u64 bytenr,
-				  u64 num_bytes,
-				  u64 orig_parent, u64 parent,
-				  u64 orig_root, u64 ref_root,
-				  u64 orig_generation, u64 ref_generation,
-				  u64 owner_objectid)
-{
-	int ret;
 
-	ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
-				    ref_generation, owner_objectid,
-				    BTRFS_ADD_DELAYED_REF, 0);
-	BUG_ON(ret);
-	return ret;
-}
+static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
 
-static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, u64 bytenr,
-			  u64 num_bytes, u64 parent, u64 ref_root,
-			  u64 ref_generation, u64 owner_objectid,
-			  int refs_to_add)
 {
-	struct btrfs_path *path;
-	int ret;
-	struct btrfs_key key;
-	struct extent_buffer *l;
-	struct btrfs_extent_item *item;
-	u32 refs;
+	int level;
+	BUG_ON(!path->keep_locks);
+	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+		if (!path->nodes[level])
+			break;
+		btrfs_assert_tree_locked(path->nodes[level]);
+		if (path->slots[level] + 1 >=
+		    btrfs_header_nritems(path->nodes[level]))
+			continue;
+		if (level == 0)
+			btrfs_item_key_to_cpu(path->nodes[level], key,
+					      path->slots[level] + 1);
+		else
+			btrfs_node_key_to_cpu(path->nodes[level], key,
+					      path->slots[level] + 1);
+		return 0;
+	}
+	return 1;
+}
 
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+/*
+ * look for inline back ref. if back ref is found, *ref_ret is set
+ * to the address of inline back ref, and 0 is returned.
+ *
+ * if back ref isn't found, *ref_ret is set to the address where it
+ * should be inserted, and -ENOENT is returned.
+ *
+ * if insert is true and there are too many inline back refs, the path
+ * points to the extent item, and -EAGAIN is returned.
+ *
+ * NOTE: inline back refs are ordered in the same way that back ref
+ *	 items in the tree are ordered.
+ */
+static noinline_for_stack
+int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref **ref_ret,
+				 u64 bytenr, u64 num_bytes,
+				 u64 parent, u64 root_objectid,
+				 u64 owner, u64 offset, int insert)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_inline_ref *iref;
+	u64 flags;
+	u64 item_size;
+	unsigned long ptr;
+	unsigned long end;
+	int extra_size;
+	int type;
+	int want;
+	int ret;
+	int err = 0;
 
-	path->reada = 1;
-	path->leave_spinning = 1;
 	key.objectid = bytenr;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
 	key.offset = num_bytes;
 
-	/* first find the extent item and update its reference count */
-	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
-				path, 0, 1);
+	want = extent_ref_type(parent, owner);
+	if (insert) {
+		extra_size = btrfs_extent_inline_ref_size(want);
+		if (owner >= BTRFS_FIRST_FREE_OBJECTID)
+			path->keep_locks = 1;
+	} else
+		extra_size = -1;
+	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
 	if (ret < 0) {
-		btrfs_set_path_blocking(path);
+		err = ret;
+		goto out;
+	}
+	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		if (!insert) {
+			err = -ENOENT;
+			goto out;
+		}
+		ret = convert_extent_item_v0(trans, root, path, owner,
+					     extra_size);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		leaf = path->nodes[0];
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+	}
+#endif
+	BUG_ON(item_size < sizeof(*ei));
+
+	if (owner < BTRFS_FIRST_FREE_OBJECTID && insert &&
+	    item_size + extra_size >= BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
+		err = -EAGAIN;
+		goto out;
+	}
+
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	flags = btrfs_extent_flags(leaf, ei);
+
+	ptr = (unsigned long)(ei + 1);
+	end = (unsigned long)ei + item_size;
+
+	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+		ptr += sizeof(struct btrfs_tree_block_info);
+		BUG_ON(ptr > end);
+	} else {
+		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+	}
+
+	err = -ENOENT;
+	while (1) {
+		if (ptr >= end) {
+			WARN_ON(ptr > end);
+			break;
+		}
+		iref = (struct btrfs_extent_inline_ref *)ptr;
+		type = btrfs_extent_inline_ref_type(leaf, iref);
+		if (want < type)
+			break;
+		if (want > type) {
+			ptr += btrfs_extent_inline_ref_size(type);
+			continue;
+		}
+
+		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+			struct btrfs_extent_data_ref *dref;
+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+			if (match_extent_data_ref(leaf, dref, root_objectid,
+						  owner, offset)) {
+				err = 0;
+				break;
+			}
+			if (hash_extent_data_ref_item(leaf, dref) <
+			    hash_extent_data_ref(root_objectid, owner, offset))
+				break;
+		} else {
+			u64 ref_offset;
+			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
+			if (parent > 0) {
+				if (parent == ref_offset) {
+					err = 0;
+					break;
+				}
+				if (ref_offset < parent)
+					break;
+			} else {
+				if (root_objectid == ref_offset) {
+					err = 0;
+					break;
+				}
+				if (ref_offset < root_objectid)
+					break;
+			}
+		}
+		ptr += btrfs_extent_inline_ref_size(type);
+	}
+	if (err == -ENOENT && insert) {
+		if (item_size + extra_size >=
+		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
+			err = -EAGAIN;
+			goto out;
+		}
+		/*
+		 * To add new inline back ref, we have to make sure
+		 * there is no corresponding back ref item.
+		 * For simplicity, we just do not add new inline back
+		 * ref if there is any kind of item for this block
+		 */
+		if (owner >= BTRFS_FIRST_FREE_OBJECTID &&
+		    find_next_key(path, &key) == 0 && key.objectid == bytenr) {
+			err = -EAGAIN;
+			goto out;
+		}
+	}
+	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
+out:
+	if (insert && owner >= BTRFS_FIRST_FREE_OBJECTID) {
+		path->keep_locks = 0;
+		btrfs_unlock_up_safe(path, 1);
+	}
+	return err;
+}
+
+/*
+ * helper to add new inline back ref
+ */
+static noinline_for_stack
+int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_path *path,
+				struct btrfs_extent_inline_ref *iref,
+				u64 parent, u64 root_objectid,
+				u64 owner, u64 offset, int refs_to_add,
+				struct btrfs_delayed_extent_op *extent_op)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	unsigned long ptr;
+	unsigned long end;
+	unsigned long item_offset;
+	u64 refs;
+	int size;
+	int type;
+	int ret;
+
+	leaf = path->nodes[0];
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	item_offset = (unsigned long)iref - (unsigned long)ei;
+
+	type = extent_ref_type(parent, owner);
+	size = btrfs_extent_inline_ref_size(type);
+
+	ret = btrfs_extend_item(trans, root, path, size);
+	BUG_ON(ret);
+
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	refs = btrfs_extent_refs(leaf, ei);
+	refs += refs_to_add;
+	btrfs_set_extent_refs(leaf, ei, refs);
+	if (extent_op)
+		__run_delayed_extent_op(extent_op, leaf, ei);
+
+	ptr = (unsigned long)ei + item_offset;
+	end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
+	if (ptr < end - size)
+		memmove_extent_buffer(leaf, ptr + size, ptr,
+				      end - size - ptr);
+
+	iref = (struct btrfs_extent_inline_ref *)ptr;
+	btrfs_set_extent_inline_ref_type(leaf, iref, type);
+	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+		struct btrfs_extent_data_ref *dref;
+		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
+		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
+		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
+		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
+	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
+		struct btrfs_shared_data_ref *sref;
+		sref = (struct btrfs_shared_data_ref *)(iref + 1);
+		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else {
+		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+
+static int lookup_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref **ref_ret,
+				 u64 bytenr, u64 num_bytes, u64 parent,
+				 u64 root_objectid, u64 owner, u64 offset)
+{
+	int ret;
+
+	ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
+					   bytenr, num_bytes, parent,
+					   root_objectid, owner, offset, 0);
+	if (ret != -ENOENT)
 		return ret;
+
+	btrfs_release_path(root, path);
+	*ref_ret = NULL;
+
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
+					    root_objectid);
+	} else {
+		ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
+					     root_objectid, owner, offset);
 	}
+	return ret;
+}
 
-	if (ret > 0) {
-		WARN_ON(1);
-		btrfs_free_path(path);
-		return -EIO;
+/*
+ * helper to update/remove inline back ref
+ */
+static noinline_for_stack
+int update_inline_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref *iref,
+				 int refs_to_mod,
+				 struct btrfs_delayed_extent_op *extent_op)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_data_ref *dref = NULL;
+	struct btrfs_shared_data_ref *sref = NULL;
+	unsigned long ptr;
+	unsigned long end;
+	u32 item_size;
+	int size;
+	int type;
+	int ret;
+	u64 refs;
+
+	leaf = path->nodes[0];
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	refs = btrfs_extent_refs(leaf, ei);
+	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+	refs += refs_to_mod;
+	btrfs_set_extent_refs(leaf, ei, refs);
+	if (extent_op)
+		__run_delayed_extent_op(extent_op, leaf, ei);
+
+	type = btrfs_extent_inline_ref_type(leaf, iref);
+
+	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
+		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+		refs = btrfs_extent_data_ref_count(leaf, dref);
+	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
+		sref = (struct btrfs_shared_data_ref *)(iref + 1);
+		refs = btrfs_shared_data_ref_count(leaf, sref);
+	} else {
+		refs = 1;
+		BUG_ON(refs_to_mod != -1);
 	}
-	l = path->nodes[0];
 
-	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
-	if (key.objectid != bytenr) {
-		btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
-		printk(KERN_ERR "btrfs wanted %llu found %llu\n",
-		       (unsigned long long)bytenr,
-		       (unsigned long long)key.objectid);
-		BUG();
+	BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+	refs += refs_to_mod;
+
+	if (refs > 0) {
+		if (type == BTRFS_EXTENT_DATA_REF_KEY)
+			btrfs_set_extent_data_ref_count(leaf, dref, refs);
+		else
+			btrfs_set_shared_data_ref_count(leaf, sref, refs);
+	} else {
+		size =  btrfs_extent_inline_ref_size(type);
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+		ptr = (unsigned long)iref;
+		end = (unsigned long)ei + item_size;
+		if (ptr + size < end)
+			memmove_extent_buffer(leaf, ptr, ptr + size,
+					      end - ptr - size);
+		item_size -= size;
+		ret = btrfs_truncate_item(trans, root, path, item_size, 1);
+		BUG_ON(ret);
+	}
+	btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+
+static noinline_for_stack
+int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 u64 bytenr, u64 num_bytes, u64 parent,
+				 u64 root_objectid, u64 owner,
+				 u64 offset, int refs_to_add,
+				 struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_extent_inline_ref *iref;
+	int ret;
+
+	ret = lookup_inline_extent_backref(trans, root, path, &iref,
+					   bytenr, num_bytes, parent,
+					   root_objectid, owner, offset, 1);
+	if (ret == 0) {
+		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
+		ret = update_inline_extent_backref(trans, root, path, iref,
+						   refs_to_add, extent_op);
+	} else if (ret == -ENOENT) {
+		ret = setup_inline_extent_backref(trans, root, path, iref,
+						  parent, root_objectid,
+						  owner, offset, refs_to_add,
+						  extent_op);
 	}
-	BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
+	return ret;
+}
 
-	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
+static int insert_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 u64 bytenr, u64 parent, u64 root_objectid,
+				 u64 owner, u64 offset, int refs_to_add)
+{
+	int ret;
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		BUG_ON(refs_to_add != 1);
+		ret = insert_tree_block_ref(trans, root, path, bytenr,
+					    parent, root_objectid);
+	} else {
+		ret = insert_extent_data_ref(trans, root, path, bytenr,
+					     parent, root_objectid,
+					     owner, offset, refs_to_add);
+	}
+	return ret;
+}
 
-	refs = btrfs_extent_refs(l, item);
-	btrfs_set_extent_refs(l, item, refs + refs_to_add);
-	btrfs_unlock_up_safe(path, 1);
+static int remove_extent_backref(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct btrfs_extent_inline_ref *iref,
+				 int refs_to_drop, int is_data)
+{
+	int ret;
 
-	btrfs_mark_buffer_dirty(path->nodes[0]);
+	BUG_ON(!is_data && refs_to_drop != 1);
+	if (iref) {
+		ret = update_inline_extent_backref(trans, root, path, iref,
+						   -refs_to_drop, NULL);
+	} else if (is_data) {
+		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+	} else {
+		ret = btrfs_del_item(trans, root, path);
+	}
+	return ret;
+}
+
+#ifdef BIO_RW_DISCARD
+static void btrfs_issue_discard(struct block_device *bdev,
+				u64 start, u64 len)
+{
+	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+}
+#endif
+
+static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+				u64 num_bytes)
+{
+#ifdef BIO_RW_DISCARD
+	int ret;
+	u64 map_length = num_bytes;
+	struct btrfs_multi_bio *multi = NULL;
+
+	/* Tell the block device(s) that the sectors can be discarded */
+	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+			      bytenr, &map_length, &multi, 0);
+	if (!ret) {
+		struct btrfs_bio_stripe *stripe = multi->stripes;
+		int i;
+
+		if (map_length > num_bytes)
+			map_length = num_bytes;
+
+		for (i = 0; i < multi->num_stripes; i++, stripe++) {
+			btrfs_issue_discard(stripe->dev->bdev,
+					    stripe->physical,
+					    map_length);
+		}
+		kfree(multi);
+	}
+
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root,
+			 u64 bytenr, u64 num_bytes, u64 parent,
+			 u64 root_objectid, u64 owner, u64 offset)
+{
+	int ret;
+	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
+	       root_objectid == BTRFS_TREE_LOG_OBJECTID);
+
+	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, (int)owner,
+					BTRFS_ADD_DELAYED_REF, NULL);
+	} else {
+		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, owner, offset,
+					BTRFS_ADD_DELAYED_REF, NULL);
+	}
+	return ret;
+}
+
+static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  u64 bytenr, u64 num_bytes,
+				  u64 parent, u64 root_objectid,
+				  u64 owner, u64 offset, int refs_to_add,
+				  struct btrfs_delayed_extent_op *extent_op)
+{
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_item *item;
+	u64 refs;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	path->reada = 1;
+	path->leave_spinning = 1;
+	/* this will setup the path even if it fails to insert the back ref */
+	ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
+					   path, bytenr, num_bytes, parent,
+					   root_objectid, owner, offset,
+					   refs_to_add, extent_op);
+	if (ret == 0)
+		goto out;
+
+	if (ret != -EAGAIN) {
+		err = ret;
+		goto out;
+	}
 
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	refs = btrfs_extent_refs(leaf, item);
+	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
+	if (extent_op)
+		__run_delayed_extent_op(extent_op, leaf, item);
+
+	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(root->fs_info->extent_root, path);
 
 	path->reada = 1;
@@ -802,56 +1522,197 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
 
 	/* now insert the actual backref */
 	ret = insert_extent_backref(trans, root->fs_info->extent_root,
-				    path, bytenr, parent,
-				    ref_root, ref_generation,
-				    owner_objectid, refs_to_add);
+				    path, bytenr, parent, root_objectid,
+				    owner, offset, refs_to_add);
 	BUG_ON(ret);
+out:
 	btrfs_free_path(path);
-	return 0;
+	return err;
 }
 
-int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
-			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 ref_root, u64 ref_generation,
-			 u64 owner_objectid)
+static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_delayed_ref_node *node,
+				struct btrfs_delayed_extent_op *extent_op,
+				int insert_reserved)
+{
+	int ret = 0;
+	struct btrfs_delayed_data_ref *ref;
+	struct btrfs_key ins;
+	u64 parent = 0;
+	u64 ref_root = 0;
+	u64 flags = 0;
+
+	ins.objectid = node->bytenr;
+	ins.offset = node->num_bytes;
+	ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+	ref = btrfs_delayed_node_to_data_ref(node);
+	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
+		parent = ref->parent;
+	else
+		ref_root = ref->root;
+
+	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+		if (extent_op) {
+			BUG_ON(extent_op->update_key);
+			flags |= extent_op->flags_to_set;
+		}
+		ret = alloc_reserved_file_extent(trans, root,
+						 parent, ref_root, flags,
+						 ref->objectid, ref->offset,
+						 &ins, node->ref_mod);
+		update_reserved_extents(root, ins.objectid, ins.offset, 0);
+	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
+		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
+					     node->num_bytes, parent,
+					     ref_root, ref->objectid,
+					     ref->offset, node->ref_mod,
+					     extent_op);
+	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
+		ret = __btrfs_free_extent(trans, root, node->bytenr,
+					  node->num_bytes, parent,
+					  ref_root, ref->objectid,
+					  ref->offset, node->ref_mod,
+					  extent_op);
+	} else {
+		BUG();
+	}
+	return ret;
+}
+
+static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
+				    struct extent_buffer *leaf,
+				    struct btrfs_extent_item *ei)
+{
+	u64 flags = btrfs_extent_flags(leaf, ei);
+	if (extent_op->update_flags) {
+		flags |= extent_op->flags_to_set;
+		btrfs_set_extent_flags(leaf, ei, flags);
+	}
+
+	if (extent_op->update_key) {
+		struct btrfs_tree_block_info *bi;
+		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
+	}
+}
+
+static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_delayed_ref_node *node,
+				 struct btrfs_delayed_extent_op *extent_op)
 {
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct btrfs_extent_item *ei;
+	struct extent_buffer *leaf;
+	u32 item_size;
 	int ret;
-	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
-	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
-		return 0;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = node->bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = node->num_bytes;
+
+	path->reada = 1;
+	path->leave_spinning = 1;
+	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
+				path, 0, 1);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+	if (ret > 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
+					     path, (u64)-1, 0);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		leaf = path->nodes[0];
+		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+	}
+#endif
+	BUG_ON(item_size < sizeof(*ei));
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	__run_delayed_extent_op(extent_op, leaf, ei);
 
-	ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
-				     0, ref_root, 0, ref_generation,
-				     owner_objectid);
-	return ret;
+	btrfs_mark_buffer_dirty(leaf);
+out:
+	btrfs_free_path(path);
+	return err;
 }
 
-static int drop_delayed_ref(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					struct btrfs_delayed_ref_node *node)
+static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_delayed_ref_node *node,
+				struct btrfs_delayed_extent_op *extent_op,
+				int insert_reserved)
 {
 	int ret = 0;
-	struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
+	struct btrfs_delayed_tree_ref *ref;
+	struct btrfs_key ins;
+	u64 parent = 0;
+	u64 ref_root = 0;
 
-	BUG_ON(node->ref_mod == 0);
-	ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
-				  node->parent, ref->root, ref->generation,
-				  ref->owner_objectid, ref->pin, node->ref_mod);
+	ins.objectid = node->bytenr;
+	ins.offset = node->num_bytes;
+	ins.type = BTRFS_EXTENT_ITEM_KEY;
 
+	ref = btrfs_delayed_node_to_tree_ref(node);
+	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
+		parent = ref->parent;
+	else
+		ref_root = ref->root;
+
+	BUG_ON(node->ref_mod != 1);
+	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
+		BUG_ON(!extent_op || !extent_op->update_flags ||
+		       !extent_op->update_key);
+		ret = alloc_reserved_tree_block(trans, root,
+						parent, ref_root,
+						extent_op->flags_to_set,
+						&extent_op->key,
+						ref->level, &ins);
+		update_reserved_extents(root, ins.objectid, ins.offset, 0);
+	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
+		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
+					     node->num_bytes, parent, ref_root,
+					     ref->level, 0, 1, extent_op);
+	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
+		ret = __btrfs_free_extent(trans, root, node->bytenr,
+					  node->num_bytes, parent, ref_root,
+					  ref->level, 0, 1, extent_op);
+	} else {
+		BUG();
+	}
 	return ret;
 }
 
+
 /* helper function to actually process a single delayed ref entry */
-static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					struct btrfs_delayed_ref_node *node,
-					int insert_reserved)
+static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root,
+			       struct btrfs_delayed_ref_node *node,
+			       struct btrfs_delayed_extent_op *extent_op,
+			       int insert_reserved)
 {
 	int ret;
-	struct btrfs_delayed_ref *ref;
-
-	if (node->parent == (u64)-1) {
+	if (btrfs_delayed_ref_is_head(node)) {
 		struct btrfs_delayed_ref_head *head;
 		/*
 		 * we've hit the end of the chain and we were supposed
@@ -859,44 +1720,35 @@ static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 		 * deleted before we ever needed to insert it, so all
 		 * we have to do is clean up the accounting
 		 */
+		BUG_ON(extent_op);
+		head = btrfs_delayed_node_to_head(node);
 		if (insert_reserved) {
+			if (head->is_data) {
+				ret = btrfs_del_csums(trans, root,
+						      node->bytenr,
+						      node->num_bytes);
+				BUG_ON(ret);
+			}
+			btrfs_update_pinned_extents(root, node->bytenr,
+						    node->num_bytes, 1);
 			update_reserved_extents(root, node->bytenr,
 						node->num_bytes, 0);
 		}
-		head = btrfs_delayed_node_to_head(node);
 		mutex_unlock(&head->mutex);
 		return 0;
 	}
 
-	ref = btrfs_delayed_node_to_ref(node);
-	if (ref->action == BTRFS_ADD_DELAYED_REF) {
-		if (insert_reserved) {
-			struct btrfs_key ins;
-
-			ins.objectid = node->bytenr;
-			ins.offset = node->num_bytes;
-			ins.type = BTRFS_EXTENT_ITEM_KEY;
-
-			/* record the full extent allocation */
-			ret = __btrfs_alloc_reserved_extent(trans, root,
-					node->parent, ref->root,
-					ref->generation, ref->owner_objectid,
-					&ins, node->ref_mod);
-			update_reserved_extents(root, node->bytenr,
-						node->num_bytes, 0);
-		} else {
-			/* just add one backref */
-			ret = add_extent_ref(trans, root, node->bytenr,
-				     node->num_bytes,
-				     node->parent, ref->root, ref->generation,
-				     ref->owner_objectid, node->ref_mod);
-		}
-		BUG_ON(ret);
-	} else if (ref->action == BTRFS_DROP_DELAYED_REF) {
-		WARN_ON(insert_reserved);
-		ret = drop_delayed_ref(trans, root, node);
-	}
-	return 0;
+	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
+	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
+		ret = run_delayed_tree_ref(trans, root, node, extent_op,
+					   insert_reserved);
+	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
+		 node->type == BTRFS_SHARED_DATA_REF_KEY)
+		ret = run_delayed_data_ref(trans, root, node, extent_op,
+					   insert_reserved);
+	else
+		BUG();
+	return ret;
 }
 
 static noinline struct btrfs_delayed_ref_node *
@@ -919,7 +1771,7 @@ again:
 				rb_node);
 		if (ref->bytenr != head->node.bytenr)
 			break;
-		if (btrfs_delayed_node_to_ref(ref)->action == action)
+		if (ref->action == action)
 			return ref;
 		node = rb_prev(node);
 	}
@@ -937,6 +1789,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_delayed_ref_node *ref;
 	struct btrfs_delayed_ref_head *locked_ref = NULL;
+	struct btrfs_delayed_extent_op *extent_op;
 	int ret;
 	int count = 0;
 	int must_insert_reserved = 0;
@@ -975,6 +1828,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 		must_insert_reserved = locked_ref->must_insert_reserved;
 		locked_ref->must_insert_reserved = 0;
 
+		extent_op = locked_ref->extent_op;
+		locked_ref->extent_op = NULL;
+
 		/*
 		 * locked_ref is the head node, so we have to go one
 		 * node back for any delayed ref updates
@@ -986,6 +1842,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 			 * so that any accounting fixes can happen
 			 */
 			ref = &locked_ref->node;
+
+			if (extent_op && must_insert_reserved) {
+				kfree(extent_op);
+				extent_op = NULL;
+			}
+
+			if (extent_op) {
+				spin_unlock(&delayed_refs->lock);
+
+				ret = run_delayed_extent_op(trans, root,
+							    ref, extent_op);
+				BUG_ON(ret);
+				kfree(extent_op);
+
+				cond_resched();
+				spin_lock(&delayed_refs->lock);
+				continue;
+			}
+
 			list_del_init(&locked_ref->cluster);
 			locked_ref = NULL;
 		}
@@ -993,14 +1868,17 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 		ref->in_tree = 0;
 		rb_erase(&ref->rb_node, &delayed_refs->root);
 		delayed_refs->num_entries--;
+
 		spin_unlock(&delayed_refs->lock);
 
-		ret = run_one_delayed_ref(trans, root, ref,
+		ret = run_one_delayed_ref(trans, root, ref, extent_op,
 					  must_insert_reserved);
 		BUG_ON(ret);
-		btrfs_put_delayed_ref(ref);
 
+		btrfs_put_delayed_ref(ref);
+		kfree(extent_op);
 		count++;
+
 		cond_resched();
 		spin_lock(&delayed_refs->lock);
 	}
@@ -1095,25 +1973,112 @@ out:
 	return 0;
 }
 
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, u64 objectid, u64 bytenr)
+int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 flags,
+				int is_data)
+{
+	struct btrfs_delayed_extent_op *extent_op;
+	int ret;
+
+	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+	if (!extent_op)
+		return -ENOMEM;
+
+	extent_op->flags_to_set = flags;
+	extent_op->update_flags = 1;
+	extent_op->update_key = 0;
+	extent_op->is_data = is_data ? 1 : 0;
+
+	ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
+	if (ret)
+		kfree(extent_op);
+	return ret;
+}
+
+static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root,
+				      struct btrfs_path *path,
+				      u64 objectid, u64 offset, u64 bytenr)
+{
+	struct btrfs_delayed_ref_head *head;
+	struct btrfs_delayed_ref_node *ref;
+	struct btrfs_delayed_data_ref *data_ref;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	struct rb_node *node;
+	int ret = 0;
+
+	ret = -ENOENT;
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+	head = btrfs_find_delayed_ref_head(trans, bytenr);
+	if (!head)
+		goto out;
+
+	if (!mutex_trylock(&head->mutex)) {
+		atomic_inc(&head->node.refs);
+		spin_unlock(&delayed_refs->lock);
+
+		btrfs_release_path(root->fs_info->extent_root, path);
+
+		mutex_lock(&head->mutex);
+		mutex_unlock(&head->mutex);
+		btrfs_put_delayed_ref(&head->node);
+		return -EAGAIN;
+	}
+
+	node = rb_prev(&head->node.rb_node);
+	if (!node)
+		goto out_unlock;
+
+	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+
+	if (ref->bytenr != bytenr)
+		goto out_unlock;
+
+	ret = 1;
+	if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
+		goto out_unlock;
+
+	data_ref = btrfs_delayed_node_to_data_ref(ref);
+
+	node = rb_prev(node);
+	if (node) {
+		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+		if (ref->bytenr == bytenr)
+			goto out_unlock;
+	}
+
+	if (data_ref->root != root->root_key.objectid ||
+	    data_ref->objectid != objectid || data_ref->offset != offset)
+		goto out_unlock;
+
+	ret = 0;
+out_unlock:
+	mutex_unlock(&head->mutex);
+out:
+	spin_unlock(&delayed_refs->lock);
+	return ret;
+}
+
+static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
+					struct btrfs_root *root,
+					struct btrfs_path *path,
+					u64 objectid, u64 offset, u64 bytenr)
 {
 	struct btrfs_root *extent_root = root->fs_info->extent_root;
-	struct btrfs_path *path;
 	struct extent_buffer *leaf;
-	struct btrfs_extent_ref *ref_item;
+	struct btrfs_extent_data_ref *ref;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_extent_item *ei;
 	struct btrfs_key key;
-	struct btrfs_key found_key;
-	u64 ref_root;
-	u64 last_snapshot;
-	u32 nritems;
+	u32 item_size;
 	int ret;
 
 	key.objectid = bytenr;
 	key.offset = (u64)-1;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
 
-	path = btrfs_alloc_path();
 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
@@ -1125,55 +2090,83 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
 
 	path->slots[0]--;
 	leaf = path->nodes[0];
-	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
-	if (found_key.objectid != bytenr ||
-	    found_key.type != BTRFS_EXTENT_ITEM_KEY)
+	if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
 		goto out;
 
-	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
-	while (1) {
-		leaf = path->nodes[0];
-		nritems = btrfs_header_nritems(leaf);
-		if (path->slots[0] >= nritems) {
-			ret = btrfs_next_leaf(extent_root, path);
-			if (ret < 0)
-				goto out;
-			if (ret == 0)
-				continue;
-			break;
-		}
-		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		if (found_key.objectid != bytenr)
-			break;
+	ret = 1;
+	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+		goto out;
+	}
+#endif
+	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 
-		if (found_key.type != BTRFS_EXTENT_REF_KEY) {
-			path->slots[0]++;
-			continue;
-		}
+	if (item_size != sizeof(*ei) +
+	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
+		goto out;
 
-		ref_item = btrfs_item_ptr(leaf, path->slots[0],
-					  struct btrfs_extent_ref);
-		ref_root = btrfs_ref_root(leaf, ref_item);
-		if ((ref_root != root->root_key.objectid &&
-		     ref_root != BTRFS_TREE_LOG_OBJECTID) ||
-		     objectid != btrfs_ref_objectid(leaf, ref_item)) {
-			ret = 1;
-			goto out;
-		}
-		if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
-			ret = 1;
+	if (btrfs_extent_generation(leaf, ei) <=
+	    btrfs_root_last_snapshot(&root->root_item))
+		goto out;
+
+	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+	if (btrfs_extent_inline_ref_type(leaf, iref) !=
+	    BTRFS_EXTENT_DATA_REF_KEY)
+		goto out;
+
+	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
+	if (btrfs_extent_refs(leaf, ei) !=
+	    btrfs_extent_data_ref_count(leaf, ref) ||
+	    btrfs_extent_data_ref_root(leaf, ref) !=
+	    root->root_key.objectid ||
+	    btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
+	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
+		goto out;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root,
+			  u64 objectid, u64 offset, u64 bytenr)
+{
+	struct btrfs_path *path;
+	int ret;
+	int ret2;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOENT;
+
+	do {
+		ret = check_committed_ref(trans, root, path, objectid,
+					  offset, bytenr);
+		if (ret && ret != -ENOENT)
 			goto out;
-		}
 
-		path->slots[0]++;
+		ret2 = check_delayed_ref(trans, root, path, objectid,
+					 offset, bytenr);
+	} while (ret2 == -EAGAIN);
+
+	if (ret2 && ret2 != -ENOENT) {
+		ret = ret2;
+		goto out;
 	}
-	ret = 0;
+
+	if (ret != -ENOENT || ret2 != -ENOENT)
+		ret = 0;
 out:
 	btrfs_free_path(path);
 	return ret;
 }
 
+#if 0
 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		    struct extent_buffer *buf, u32 nr_extents)
 {
@@ -1291,191 +2284,49 @@ static int refsort_cmp(const void *a_void, const void *b_void)
 		return 1;
 	return 0;
 }
+#endif
 
-
-noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
+static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
-			   struct extent_buffer *orig_buf,
-			   struct extent_buffer *buf, u32 *nr_extents)
+			   struct extent_buffer *buf,
+			   int full_backref, int inc)
 {
 	u64 bytenr;
+	u64 num_bytes;
+	u64 parent;
 	u64 ref_root;
-	u64 orig_root;
-	u64 ref_generation;
-	u64 orig_generation;
-	struct refsort *sorted;
 	u32 nritems;
-	u32 nr_file_extents = 0;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
 	int i;
 	int level;
 	int ret = 0;
-	int faili = 0;
-	int refi = 0;
-	int slot;
 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
-			    u64, u64, u64, u64, u64, u64, u64, u64, u64);
+			    u64, u64, u64, u64, u64, u64);
 
 	ref_root = btrfs_header_owner(buf);
-	ref_generation = btrfs_header_generation(buf);
-	orig_root = btrfs_header_owner(orig_buf);
-	orig_generation = btrfs_header_generation(orig_buf);
-
 	nritems = btrfs_header_nritems(buf);
 	level = btrfs_header_level(buf);
 
-	sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
-	BUG_ON(!sorted);
-
-	if (root->ref_cows) {
-		process_func = __btrfs_inc_extent_ref;
-	} else {
-		if (level == 0 &&
-		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
-			goto out;
-		if (level != 0 &&
-		    root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
-			goto out;
-		process_func = __btrfs_update_extent_ref;
-	}
-
-	/*
-	 * we make two passes through the items.  In the first pass we
-	 * only record the byte number and slot.  Then we sort based on
-	 * byte number and do the actual work based on the sorted results
-	 */
-	for (i = 0; i < nritems; i++) {
-		cond_resched();
-		if (level == 0) {
-			btrfs_item_key_to_cpu(buf, &key, i);
-			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-				continue;
-			fi = btrfs_item_ptr(buf, i,
-					    struct btrfs_file_extent_item);
-			if (btrfs_file_extent_type(buf, fi) ==
-			    BTRFS_FILE_EXTENT_INLINE)
-				continue;
-			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
-			if (bytenr == 0)
-				continue;
-
-			nr_file_extents++;
-			sorted[refi].bytenr = bytenr;
-			sorted[refi].slot = i;
-			refi++;
-		} else {
-			bytenr = btrfs_node_blockptr(buf, i);
-			sorted[refi].bytenr = bytenr;
-			sorted[refi].slot = i;
-			refi++;
-		}
-	}
-	/*
-	 * if refi == 0, we didn't actually put anything into the sorted
-	 * array and we're done
-	 */
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	for (i = 0; i < refi; i++) {
-		cond_resched();
-		slot = sorted[i].slot;
-		bytenr = sorted[i].bytenr;
-
-		if (level == 0) {
-			btrfs_item_key_to_cpu(buf, &key, slot);
-			fi = btrfs_item_ptr(buf, slot,
-					    struct btrfs_file_extent_item);
-
-			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
-			if (bytenr == 0)
-				continue;
-
-			ret = process_func(trans, root, bytenr,
-				   btrfs_file_extent_disk_num_bytes(buf, fi),
-				   orig_buf->start, buf->start,
-				   orig_root, ref_root,
-				   orig_generation, ref_generation,
-				   key.objectid);
-
-			if (ret) {
-				faili = slot;
-				WARN_ON(1);
-				goto fail;
-			}
-		} else {
-			ret = process_func(trans, root, bytenr, buf->len,
-					   orig_buf->start, buf->start,
-					   orig_root, ref_root,
-					   orig_generation, ref_generation,
-					   level - 1);
-			if (ret) {
-				faili = slot;
-				WARN_ON(1);
-				goto fail;
-			}
-		}
-	}
-out:
-	kfree(sorted);
-	if (nr_extents) {
-		if (level == 0)
-			*nr_extents = nr_file_extents;
-		else
-			*nr_extents = nritems;
-	}
-	return 0;
-fail:
-	kfree(sorted);
-	WARN_ON(1);
-	return ret;
-}
-
-int btrfs_update_ref(struct btrfs_trans_handle *trans,
-		     struct btrfs_root *root, struct extent_buffer *orig_buf,
-		     struct extent_buffer *buf, int start_slot, int nr)
-
-{
-	u64 bytenr;
-	u64 ref_root;
-	u64 orig_root;
-	u64 ref_generation;
-	u64 orig_generation;
-	struct btrfs_key key;
-	struct btrfs_file_extent_item *fi;
-	int i;
-	int ret;
-	int slot;
-	int level;
-
-	BUG_ON(start_slot < 0);
-	BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
-
-	ref_root = btrfs_header_owner(buf);
-	ref_generation = btrfs_header_generation(buf);
-	orig_root = btrfs_header_owner(orig_buf);
-	orig_generation = btrfs_header_generation(orig_buf);
-	level = btrfs_header_level(buf);
+	if (!root->ref_cows && level == 0)
+		return 0;
 
-	if (!root->ref_cows) {
-		if (level == 0 &&
-		    root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
-			return 0;
-		if (level != 0 &&
-		    root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
-			return 0;
-	}
+	if (inc)
+		process_func = btrfs_inc_extent_ref;
+	else
+		process_func = btrfs_free_extent;
 
-	for (i = 0, slot = start_slot; i < nr; i++, slot++) {
-		cond_resched();
+	if (full_backref)
+		parent = buf->start;
+	else
+		parent = 0;
+
+	for (i = 0; i < nritems; i++) {
 		if (level == 0) {
-			btrfs_item_key_to_cpu(buf, &key, slot);
+			btrfs_item_key_to_cpu(buf, &key, i);
 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 				continue;
-			fi = btrfs_item_ptr(buf, slot,
+			fi = btrfs_item_ptr(buf, i,
 					    struct btrfs_file_extent_item);
 			if (btrfs_file_extent_type(buf, fi) ==
 			    BTRFS_FILE_EXTENT_INLINE)
@@ -1483,28 +2334,39 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans,
 			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
 			if (bytenr == 0)
 				continue;
-			ret = __btrfs_update_extent_ref(trans, root, bytenr,
-				    btrfs_file_extent_disk_num_bytes(buf, fi),
-				    orig_buf->start, buf->start,
-				    orig_root, ref_root, orig_generation,
-				    ref_generation, key.objectid);
+
+			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
+			key.offset -= btrfs_file_extent_offset(buf, fi);
+			ret = process_func(trans, root, bytenr, num_bytes,
+					   parent, ref_root, key.objectid,
+					   key.offset);
 			if (ret)
 				goto fail;
 		} else {
-			bytenr = btrfs_node_blockptr(buf, slot);
-			ret = __btrfs_update_extent_ref(trans, root, bytenr,
-					    buf->len, orig_buf->start,
-					    buf->start, orig_root, ref_root,
-					    orig_generation, ref_generation,
-					    level - 1);
+			bytenr = btrfs_node_blockptr(buf, i);
+			num_bytes = btrfs_level_size(root, level - 1);
+			ret = process_func(trans, root, bytenr, num_bytes,
+					   parent, ref_root, level - 1, 0);
 			if (ret)
 				goto fail;
 		}
 	}
 	return 0;
 fail:
-	WARN_ON(1);
-	return -1;
+	BUG();
+	return ret;
+}
+
+int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *buf, int full_backref)
+{
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
+}
+
+int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		  struct extent_buffer *buf, int full_backref)
+{
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -2007,6 +2869,24 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 	u64 old_val;
 	u64 byte_in_group;
 
+	/* block accounting for super block */
+	spin_lock(&info->delalloc_lock);
+	old_val = btrfs_super_bytes_used(&info->super_copy);
+	if (alloc)
+		old_val += num_bytes;
+	else
+		old_val -= num_bytes;
+	btrfs_set_super_bytes_used(&info->super_copy, old_val);
+
+	/* block accounting for root item */
+	old_val = btrfs_root_used(&root->root_item);
+	if (alloc)
+		old_val += num_bytes;
+	else
+		old_val -= num_bytes;
+	btrfs_set_root_used(&root->root_item, old_val);
+	spin_unlock(&info->delalloc_lock);
+
 	while (total) {
 		cache = btrfs_lookup_block_group(info, bytenr);
 		if (!cache)
@@ -2216,8 +3096,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
 		u64 header_owner = btrfs_header_owner(buf);
 		u64 header_transid = btrfs_header_generation(buf);
 		if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
-		    header_owner != BTRFS_TREE_RELOC_OBJECTID &&
-		    header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
 		    header_transid == trans->transid &&
 		    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
 			*must_clean = buf;
@@ -2235,63 +3113,77 @@ pinit:
 	return 0;
 }
 
-/*
- * remove an extent from the root, returns 0 on success
- */
-static int __free_extent(struct btrfs_trans_handle *trans,
-			 struct btrfs_root *root,
-			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 ref_generation,
-			 u64 owner_objectid, int pin, int mark_free,
-			 int refs_to_drop)
+
+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				u64 bytenr, u64 num_bytes, u64 parent,
+				u64 root_objectid, u64 owner_objectid,
+				u64 owner_offset, int refs_to_drop,
+				struct btrfs_delayed_extent_op *extent_op)
 {
-	struct btrfs_path *path;
 	struct btrfs_key key;
+	struct btrfs_path *path;
 	struct btrfs_fs_info *info = root->fs_info;
 	struct btrfs_root *extent_root = info->extent_root;
 	struct extent_buffer *leaf;
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_inline_ref *iref;
 	int ret;
+	int is_data;
 	int extent_slot = 0;
 	int found_extent = 0;
 	int num_to_del = 1;
-	struct btrfs_extent_item *ei;
-	u32 refs;
+	u32 item_size;
+	u64 refs;
 
-	key.objectid = bytenr;
-	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-	key.offset = num_bytes;
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
 	path->reada = 1;
 	path->leave_spinning = 1;
-	ret = lookup_extent_backref(trans, extent_root, path,
-				    bytenr, parent, root_objectid,
-				    ref_generation, owner_objectid, 1);
+
+	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
+	BUG_ON(!is_data && refs_to_drop != 1);
+
+	ret = lookup_extent_backref(trans, extent_root, path, &iref,
+				    bytenr, num_bytes, parent,
+				    root_objectid, owner_objectid,
+				    owner_offset);
 	if (ret == 0) {
-		struct btrfs_key found_key;
 		extent_slot = path->slots[0];
-		while (extent_slot > 0) {
-			extent_slot--;
-			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+		while (extent_slot >= 0) {
+			btrfs_item_key_to_cpu(path->nodes[0], &key,
 					      extent_slot);
-			if (found_key.objectid != bytenr)
+			if (key.objectid != bytenr)
 				break;
-			if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
-			    found_key.offset == num_bytes) {
+			if (key.type == BTRFS_EXTENT_ITEM_KEY &&
+			    key.offset == num_bytes) {
 				found_extent = 1;
 				break;
 			}
 			if (path->slots[0] - extent_slot > 5)
 				break;
+			extent_slot--;
 		}
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
+		if (found_extent && item_size < sizeof(*ei))
+			found_extent = 0;
+#endif
 		if (!found_extent) {
+			BUG_ON(iref);
 			ret = remove_extent_backref(trans, extent_root, path,
-						    refs_to_drop);
+						    NULL, refs_to_drop,
+						    is_data);
 			BUG_ON(ret);
 			btrfs_release_path(extent_root, path);
 			path->leave_spinning = 1;
+
+			key.objectid = bytenr;
+			key.type = BTRFS_EXTENT_ITEM_KEY;
+			key.offset = num_bytes;
+
 			ret = btrfs_search_slot(trans, extent_root,
 						&key, path, -1, 1);
 			if (ret) {
@@ -2307,82 +3199,98 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 		btrfs_print_leaf(extent_root, path->nodes[0]);
 		WARN_ON(1);
 		printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
-		       "parent %llu root %llu gen %llu owner %llu\n",
+		       "parent %llu root %llu  owner %llu offset %llu\n",
 		       (unsigned long long)bytenr,
 		       (unsigned long long)parent,
 		       (unsigned long long)root_objectid,
-		       (unsigned long long)ref_generation,
-		       (unsigned long long)owner_objectid);
+		       (unsigned long long)owner_objectid,
+		       (unsigned long long)owner_offset);
 	}
 
 	leaf = path->nodes[0];
+	item_size = btrfs_item_size_nr(leaf, extent_slot);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		BUG_ON(found_extent || extent_slot != path->slots[0]);
+		ret = convert_extent_item_v0(trans, extent_root, path,
+					     owner_objectid, 0);
+		BUG_ON(ret < 0);
+
+		btrfs_release_path(extent_root, path);
+		path->leave_spinning = 1;
+
+		key.objectid = bytenr;
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = num_bytes;
+
+		ret = btrfs_search_slot(trans, extent_root, &key, path,
+					-1, 1);
+		if (ret) {
+			printk(KERN_ERR "umm, got %d back from search"
+			       ", was looking for %llu\n", ret,
+			       (unsigned long long)bytenr);
+			btrfs_print_leaf(extent_root, path->nodes[0]);
+		}
+		BUG_ON(ret);
+		extent_slot = path->slots[0];
+		leaf = path->nodes[0];
+		item_size = btrfs_item_size_nr(leaf, extent_slot);
+	}
+#endif
+	BUG_ON(item_size < sizeof(*ei));
 	ei = btrfs_item_ptr(leaf, extent_slot,
 			    struct btrfs_extent_item);
-	refs = btrfs_extent_refs(leaf, ei);
-
-	/*
-	 * we're not allowed to delete the extent item if there
-	 * are other delayed ref updates pending
-	 */
+	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+		struct btrfs_tree_block_info *bi;
+		BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
+	}
 
+	refs = btrfs_extent_refs(leaf, ei);
 	BUG_ON(refs < refs_to_drop);
 	refs -= refs_to_drop;
-	btrfs_set_extent_refs(leaf, ei, refs);
-	btrfs_mark_buffer_dirty(leaf);
 
-	if (refs == 0 && found_extent &&
-	    path->slots[0] == extent_slot + 1) {
-		struct btrfs_extent_ref *ref;
-		ref = btrfs_item_ptr(leaf, path->slots[0],
-				     struct btrfs_extent_ref);
-		BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
-		/* if the back ref and the extent are next to each other
-		 * they get deleted below in one shot
+	if (refs > 0) {
+		if (extent_op)
+			__run_delayed_extent_op(extent_op, leaf, ei);
+		/*
+		 * In the case of inline back ref, reference count will
+		 * be updated by remove_extent_backref
 		 */
-		path->slots[0] = extent_slot;
-		num_to_del = 2;
-	} else if (found_extent) {
-		/* otherwise delete the extent back ref */
-		ret = remove_extent_backref(trans, extent_root, path,
-					    refs_to_drop);
-		BUG_ON(ret);
-		/* if refs are 0, we need to setup the path for deletion */
-		if (refs == 0) {
-			btrfs_release_path(extent_root, path);
-			path->leave_spinning = 1;
-			ret = btrfs_search_slot(trans, extent_root, &key, path,
-						-1, 1);
+		if (iref) {
+			BUG_ON(!found_extent);
+		} else {
+			btrfs_set_extent_refs(leaf, ei, refs);
+			btrfs_mark_buffer_dirty(leaf);
+		}
+		if (found_extent) {
+			ret = remove_extent_backref(trans, extent_root, path,
+						    iref, refs_to_drop,
+						    is_data);
 			BUG_ON(ret);
 		}
-	}
-
-	if (refs == 0) {
-		u64 super_used;
-		u64 root_used;
+	} else {
+		int mark_free = 0;
 		struct extent_buffer *must_clean = NULL;
 
-		if (pin) {
-			ret = pin_down_bytes(trans, root, path,
-				bytenr, num_bytes,
-				owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
-				&must_clean);
-			if (ret > 0)
-				mark_free = 1;
-			BUG_ON(ret < 0);
+		if (found_extent) {
+			BUG_ON(is_data && refs_to_drop !=
+			       extent_data_ref_count(root, path, iref));
+			if (iref) {
+				BUG_ON(path->slots[0] != extent_slot);
+			} else {
+				BUG_ON(path->slots[0] != extent_slot + 1);
+				path->slots[0] = extent_slot;
+				num_to_del = 2;
+			}
 		}
 
-		/* block accounting for super block */
-		spin_lock(&info->delalloc_lock);
-		super_used = btrfs_super_bytes_used(&info->super_copy);
-		btrfs_set_super_bytes_used(&info->super_copy,
-					   super_used - num_bytes);
-
-		/* block accounting for root item */
-		root_used = btrfs_root_used(&root->root_item);
-		btrfs_set_root_used(&root->root_item,
-					   root_used - num_bytes);
-		spin_unlock(&info->delalloc_lock);
-
+		ret = pin_down_bytes(trans, root, path, bytenr,
+				     num_bytes, is_data, &must_clean);
+		if (ret > 0)
+			mark_free = 1;
+		BUG_ON(ret < 0);
 		/*
 		 * it is going to be very rare for someone to be waiting
 		 * on the block we're freeing.  del_items might need to
@@ -2403,7 +3311,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 			free_extent_buffer(must_clean);
 		}
 
-		if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+		if (is_data) {
 			ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
 			BUG_ON(ret);
 		} else {
@@ -2421,34 +3329,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
 }
 
 /*
- * remove an extent from the root, returns 0 on success
- */
-static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-					struct btrfs_root *root,
-					u64 bytenr, u64 num_bytes, u64 parent,
-					u64 root_objectid, u64 ref_generation,
-					u64 owner_objectid, int pin,
-					int refs_to_drop)
-{
-	WARN_ON(num_bytes < root->sectorsize);
-
-	/*
-	 * if metadata always pin
-	 * if data pin when any transaction has committed this
-	 */
-	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
-	    ref_generation != trans->transid)
-		pin = 1;
-
-	if (ref_generation != trans->transid)
-		pin = 1;
-
-	return __free_extent(trans, root, bytenr, num_bytes, parent,
-			    root_objectid, ref_generation,
-			    owner_objectid, pin, pin == 0, refs_to_drop);
-}
-
-/*
  * when we free an extent, it is possible (and likely) that we free the last
  * delayed ref for that extent as well.  This searches the delayed ref tree for
  * a given extent, and if there are no other delayed refs to be processed, it
@@ -2479,6 +3359,13 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 	if (ref->bytenr == bytenr)
 		goto out;
 
+	if (head->extent_op) {
+		if (!head->must_insert_reserved)
+			goto out;
+		kfree(head->extent_op);
+		head->extent_op = NULL;
+	}
+
 	/*
 	 * waiting for the lock here would deadlock.  If someone else has it
 	 * locked they are already in the process of dropping it anyway
@@ -2507,7 +3394,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
 	spin_unlock(&delayed_refs->lock);
 
 	ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
-				  &head->node, head->must_insert_reserved);
+				  &head->node, head->extent_op,
+				  head->must_insert_reserved);
 	BUG_ON(ret);
 	btrfs_put_delayed_ref(&head->node);
 	return 0;
@@ -2519,32 +3407,32 @@ out:
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent,
-		      u64 root_objectid, u64 ref_generation,
-		      u64 owner_objectid, int pin)
+		      u64 root_objectid, u64 owner, u64 offset)
 {
 	int ret;
 
 	/*
 	 * tree log blocks never actually go into the extent allocation
 	 * tree, just update pinning info and exit early.
-	 *
-	 * data extents referenced by the tree log do need to have
-	 * their reference counts bumped.
 	 */
-	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
-	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+	if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
+		WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
 		/* unlocks the pinned mutex */
 		btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
 		update_reserved_extents(root, bytenr, num_bytes, 0);
 		ret = 0;
-	} else {
-		ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
-				       root_objectid, ref_generation,
-				       owner_objectid,
-				       BTRFS_DROP_DELAYED_REF, 1);
+	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, (int)owner,
+					BTRFS_DROP_DELAYED_REF, NULL);
 		BUG_ON(ret);
 		ret = check_ref_cleanup(trans, root, bytenr);
 		BUG_ON(ret);
+	} else {
+		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+					parent, root_objectid, owner,
+					offset, BTRFS_DROP_DELAYED_REF, NULL);
+		BUG_ON(ret);
 	}
 	return ret;
 }
@@ -2969,99 +3857,147 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-					 struct btrfs_root *root, u64 parent,
-					 u64 root_objectid, u64 ref_generation,
-					 u64 owner, struct btrfs_key *ins,
-					 int ref_mod)
+static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *root,
+				      u64 parent, u64 root_objectid,
+				      u64 flags, u64 owner, u64 offset,
+				      struct btrfs_key *ins, int ref_mod)
 {
 	int ret;
-	u64 super_used;
-	u64 root_used;
-	u64 num_bytes = ins->offset;
-	u32 sizes[2];
-	struct btrfs_fs_info *info = root->fs_info;
-	struct btrfs_root *extent_root = info->extent_root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_extent_item *extent_item;
-	struct btrfs_extent_ref *ref;
+	struct btrfs_extent_inline_ref *iref;
 	struct btrfs_path *path;
-	struct btrfs_key keys[2];
-
-	if (parent == 0)
-		parent = ins->objectid;
-
-	/* block accounting for super block */
-	spin_lock(&info->delalloc_lock);
-	super_used = btrfs_super_bytes_used(&info->super_copy);
-	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
+	struct extent_buffer *leaf;
+	int type;
+	u32 size;
 
-	/* block accounting for root item */
-	root_used = btrfs_root_used(&root->root_item);
-	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
-	spin_unlock(&info->delalloc_lock);
+	if (parent > 0)
+		type = BTRFS_SHARED_DATA_REF_KEY;
+	else
+		type = BTRFS_EXTENT_DATA_REF_KEY;
 
-	memcpy(&keys[0], ins, sizeof(*ins));
-	keys[1].objectid = ins->objectid;
-	keys[1].type = BTRFS_EXTENT_REF_KEY;
-	keys[1].offset = parent;
-	sizes[0] = sizeof(*extent_item);
-	sizes[1] = sizeof(*ref);
+	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
 	path->leave_spinning = 1;
-	ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
-				       sizes, 2);
+	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
+				      ins, size);
 	BUG_ON(ret);
 
-	extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+	leaf = path->nodes[0];
+	extent_item = btrfs_item_ptr(leaf, path->slots[0],
 				     struct btrfs_extent_item);
-	btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
-	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
-			     struct btrfs_extent_ref);
-
-	btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
-	btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
-	btrfs_set_ref_objectid(path->nodes[0], ref, owner);
-	btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
+	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
+	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
+	btrfs_set_extent_flags(leaf, extent_item,
+			       flags | BTRFS_EXTENT_FLAG_DATA);
+
+	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+	btrfs_set_extent_inline_ref_type(leaf, iref, type);
+	if (parent > 0) {
+		struct btrfs_shared_data_ref *ref;
+		ref = (struct btrfs_shared_data_ref *)(iref + 1);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+		btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
+	} else {
+		struct btrfs_extent_data_ref *ref;
+		ref = (struct btrfs_extent_data_ref *)(&iref->offset);
+		btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
+		btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
+		btrfs_set_extent_data_ref_offset(leaf, ref, offset);
+		btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
+	}
 
 	btrfs_mark_buffer_dirty(path->nodes[0]);
-
-	trans->alloc_exclude_start = 0;
-	trans->alloc_exclude_nr = 0;
 	btrfs_free_path(path);
 
-	if (ret)
-		goto out;
-
-	ret = update_block_group(trans, root, ins->objectid,
-				 ins->offset, 1, 0);
+	ret = update_block_group(trans, root, ins->objectid, ins->offset,
+				 1, 0);
 	if (ret) {
 		printk(KERN_ERR "btrfs update block group failed for %llu "
 		       "%llu\n", (unsigned long long)ins->objectid,
 		       (unsigned long long)ins->offset);
 		BUG();
 	}
-out:
 	return ret;
 }
 
-int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins)
+static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 parent, u64 root_objectid,
+				     u64 flags, struct btrfs_disk_key *key,
+				     int level, struct btrfs_key *ins)
 {
 	int ret;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_extent_item *extent_item;
+	struct btrfs_tree_block_info *block_info;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
 
-	if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
-		return 0;
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
 
-	ret = btrfs_add_delayed_ref(trans, ins->objectid,
-				    ins->offset, parent, root_objectid,
-				    ref_generation, owner,
-				    BTRFS_ADD_DELAYED_EXTENT, 0);
+	path->leave_spinning = 1;
+	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
+				      ins, size);
 	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	extent_item = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_extent_item);
+	btrfs_set_extent_refs(leaf, extent_item, 1);
+	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
+	btrfs_set_extent_flags(leaf, extent_item,
+			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
+	block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
+
+	btrfs_set_tree_block_key(leaf, block_info, key);
+	btrfs_set_tree_block_level(leaf, block_info, level);
+
+	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+	if (parent > 0) {
+		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+		btrfs_set_extent_inline_ref_type(leaf, iref,
+						 BTRFS_SHARED_BLOCK_REF_KEY);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+	} else {
+		btrfs_set_extent_inline_ref_type(leaf, iref,
+						 BTRFS_TREE_BLOCK_REF_KEY);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+	}
+
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_free_path(path);
+
+	ret = update_block_group(trans, root, ins->objectid, ins->offset,
+				 1, 0);
+	if (ret) {
+		printk(KERN_ERR "btrfs update block group failed for %llu "
+		       "%llu\n", (unsigned long long)ins->objectid,
+		       (unsigned long long)ins->offset);
+		BUG();
+	}
+	return ret;
+}
+
+int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root,
+				     u64 root_objectid, u64 owner,
+				     u64 offset, struct btrfs_key *ins)
+{
+	int ret;
+
+	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
+
+	ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
+					 0, root_objectid, owner, offset,
+					 BTRFS_ADD_DELAYED_EXTENT, NULL);
 	return ret;
 }
 
@@ -3070,10 +4006,10 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
  * an extent has been allocated and makes sure to clear the free
  * space cache bits as well
  */
-int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, u64 parent,
-				u64 root_objectid, u64 ref_generation,
-				u64 owner, struct btrfs_key *ins)
+int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   u64 root_objectid, u64 owner, u64 offset,
+				   struct btrfs_key *ins)
 {
 	int ret;
 	struct btrfs_block_group_cache *block_group;
@@ -3087,8 +4023,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
 				      ins->offset);
 	BUG_ON(ret);
 	btrfs_put_block_group(block_group);
-	ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
-					    ref_generation, owner, ins, 1);
+	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
+					 0, owner, offset, ins, 1);
 	return ret;
 }
 
@@ -3099,26 +4035,48 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
  *
  * returns 0 if everything worked, non-zero otherwise.
  */
-int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
-		       struct btrfs_root *root,
-		       u64 num_bytes, u64 parent, u64 min_alloc_size,
-		       u64 root_objectid, u64 ref_generation,
-		       u64 owner_objectid, u64 empty_size, u64 hint_byte,
-		       u64 search_end, struct btrfs_key *ins, u64 data)
+static int alloc_tree_block(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root,
+			    u64 num_bytes, u64 parent, u64 root_objectid,
+			    struct btrfs_disk_key *key, int level,
+			    u64 empty_size, u64 hint_byte, u64 search_end,
+			    struct btrfs_key *ins)
 {
 	int ret;
-	ret = __btrfs_reserve_extent(trans, root, num_bytes,
-				     min_alloc_size, empty_size, hint_byte,
-				     search_end, ins, data);
+	u64 flags = 0;
+
+	ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+				     empty_size, hint_byte, search_end,
+				     ins, 0);
 	BUG_ON(ret);
+
+	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		if (parent == 0)
+			parent = ins->objectid;
+		flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+	} else
+		BUG_ON(parent > 0);
+
+	update_reserved_extents(root, ins->objectid, ins->offset, 1);
 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
-		ret = btrfs_add_delayed_ref(trans, ins->objectid,
-					    ins->offset, parent, root_objectid,
-					    ref_generation, owner_objectid,
-					    BTRFS_ADD_DELAYED_EXTENT, 0);
+		struct btrfs_delayed_extent_op *extent_op;
+		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+		BUG_ON(!extent_op);
+		if (key)
+			memcpy(&extent_op->key, key, sizeof(extent_op->key));
+		else
+			memset(&extent_op->key, 0, sizeof(extent_op->key));
+		extent_op->flags_to_set = flags;
+		extent_op->update_key = 1;
+		extent_op->update_flags = 1;
+		extent_op->is_data = 0;
+
+		ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
+					ins->offset, parent, root_objectid,
+					level, BTRFS_ADD_DELAYED_EXTENT,
+					extent_op);
 		BUG_ON(ret);
 	}
-	update_reserved_extents(root, ins->objectid, ins->offset, 1);
 	return ret;
 }
 
@@ -3157,21 +4115,17 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
  * returns the tree buffer or NULL.
  */
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
-					     struct btrfs_root *root,
-					     u32 blocksize, u64 parent,
-					     u64 root_objectid,
-					     u64 ref_generation,
-					     int level,
-					     u64 hint,
-					     u64 empty_size)
+					struct btrfs_root *root, u32 blocksize,
+					u64 parent, u64 root_objectid,
+					struct btrfs_disk_key *key, int level,
+					u64 hint, u64 empty_size)
 {
 	struct btrfs_key ins;
 	int ret;
 	struct extent_buffer *buf;
 
-	ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
-				 root_objectid, ref_generation, level,
-				 empty_size, hint, (u64)-1, &ins, 0);
+	ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
+			       key, level, empty_size, hint, (u64)-1, &ins);
 	if (ret) {
 		BUG_ON(ret > 0);
 		return ERR_PTR(ret);
@@ -3185,32 +4139,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf)
 {
-	u64 leaf_owner;
-	u64 leaf_generation;
-	struct refsort *sorted;
+	u64 disk_bytenr;
+	u64 num_bytes;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
+	u32 nritems;
 	int i;
-	int nritems;
 	int ret;
-	int refi = 0;
-	int slot;
 
 	BUG_ON(!btrfs_is_leaf(leaf));
 	nritems = btrfs_header_nritems(leaf);
-	leaf_owner = btrfs_header_owner(leaf);
-	leaf_generation = btrfs_header_generation(leaf);
 
-	sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-	/* we do this loop twice.  The first time we build a list
-	 * of the extents we have a reference on, then we sort the list
-	 * by bytenr.  The second time around we actually do the
-	 * extent freeing.
-	 */
 	for (i = 0; i < nritems; i++) {
-		u64 disk_bytenr;
 		cond_resched();
-
 		btrfs_item_key_to_cpu(leaf, &key, i);
 
 		/* only extents have references, skip everything else */
@@ -3230,45 +4171,16 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 		if (disk_bytenr == 0)
 			continue;
 
-		sorted[refi].bytenr = disk_bytenr;
-		sorted[refi].slot = i;
-		refi++;
-	}
-
-	if (refi == 0)
-		goto out;
-
-	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-	for (i = 0; i < refi; i++) {
-		u64 disk_bytenr;
-
-		disk_bytenr = sorted[i].bytenr;
-		slot = sorted[i].slot;
-
-		cond_resched();
-
-		btrfs_item_key_to_cpu(leaf, &key, slot);
-		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-			continue;
-
-		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
-
-		ret = btrfs_free_extent(trans, root, disk_bytenr,
-				btrfs_file_extent_disk_num_bytes(leaf, fi),
-				leaf->start, leaf_owner, leaf_generation,
-				key.objectid, 0);
+		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+		ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
+					leaf->start, 0, key.objectid, 0);
 		BUG_ON(ret);
-
-		atomic_inc(&root->fs_info->throttle_gen);
-		wake_up(&root->fs_info->transaction_throttle);
-		cond_resched();
 	}
-out:
-	kfree(sorted);
 	return 0;
 }
 
+#if 0
+
 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
 					struct btrfs_leaf_ref *ref)
@@ -3311,13 +4223,14 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+
 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root, u64 start,
 				     u64 len, u32 *refs)
 {
 	int ret;
 
-	ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
+	ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
 	BUG_ON(ret);
 
 #if 0 /* some debugging code in case we see problems here */
@@ -3352,6 +4265,7 @@ static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
+
 /*
  * this is used while deleting old snapshots, and it drops the refs
  * on a whole subtree starting from a level 1 node.
@@ -3645,32 +4559,36 @@ out:
 	cond_resched();
 	return 0;
 }
+#endif
 
 /*
  * helper function for drop_subtree, this function is similar to
  * walk_down_tree. The main difference is that it checks reference
  * counts while tree blocks are locked.
  */
-static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
-				      struct btrfs_root *root,
-				      struct btrfs_path *path, int *level)
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path, int *level)
 {
 	struct extent_buffer *next;
 	struct extent_buffer *cur;
 	struct extent_buffer *parent;
 	u64 bytenr;
 	u64 ptr_gen;
+	u64 refs;
+	u64 flags;
 	u32 blocksize;
-	u32 refs;
 	int ret;
 
 	cur = path->nodes[*level];
-	ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
-				      &refs);
+	ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
+				       &refs, &flags);
 	BUG_ON(ret);
 	if (refs > 1)
 		goto out;
 
+	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
 	while (*level >= 0) {
 		cur = path->nodes[*level];
 		if (*level == 0) {
@@ -3692,16 +4610,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
 		btrfs_tree_lock(next);
 		btrfs_set_lock_blocking(next);
 
-		ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
-					      &refs);
+		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+					       &refs, &flags);
 		BUG_ON(ret);
 		if (refs > 1) {
 			parent = path->nodes[*level];
 			ret = btrfs_free_extent(trans, root, bytenr,
-					blocksize, parent->start,
-					btrfs_header_owner(parent),
-					btrfs_header_generation(parent),
-					*level - 1, 1);
+						blocksize, parent->start,
+						btrfs_header_owner(parent),
+						*level - 1, 0);
 			BUG_ON(ret);
 			path->slots[*level]++;
 			btrfs_tree_unlock(next);
@@ -3709,6 +4626,8 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
 			continue;
 		}
 
+		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
 		*level = btrfs_header_level(next);
 		path->nodes[*level] = next;
 		path->slots[*level] = 0;
@@ -3716,13 +4635,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
 		cond_resched();
 	}
 out:
-	parent = path->nodes[*level + 1];
+	if (path->nodes[*level] == root->node)
+		parent = path->nodes[*level];
+	else
+		parent = path->nodes[*level + 1];
 	bytenr = path->nodes[*level]->start;
 	blocksize = path->nodes[*level]->len;
 
-	ret = btrfs_free_extent(trans, root, bytenr, blocksize,
-			parent->start, btrfs_header_owner(parent),
-			btrfs_header_generation(parent), *level, 1);
+	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
+				btrfs_header_owner(parent), *level, 0);
 	BUG_ON(ret);
 
 	if (path->locks[*level]) {
@@ -3746,8 +4667,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 				 struct btrfs_path *path,
 				 int *level, int max_level)
 {
-	u64 root_owner;
-	u64 root_gen;
 	struct btrfs_root_item *root_item = &root->root_item;
 	int i;
 	int slot;
@@ -3755,24 +4674,22 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 
 	for (i = *level; i < max_level && path->nodes[i]; i++) {
 		slot = path->slots[i];
-		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
-			struct extent_buffer *node;
-			struct btrfs_disk_key disk_key;
-
+		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
 			/*
 			 * there is more work to do in this level.
 			 * Update the drop_progress marker to reflect
 			 * the work we've done so far, and then bump
 			 * the slot number
 			 */
-			node = path->nodes[i];
 			path->slots[i]++;
-			*level = i;
 			WARN_ON(*level == 0);
-			btrfs_node_key(node, &disk_key, path->slots[i]);
-			memcpy(&root_item->drop_progress,
-			       &disk_key, sizeof(disk_key));
-			root_item->drop_level = i;
+			if (max_level == BTRFS_MAX_LEVEL) {
+				btrfs_node_key(path->nodes[i],
+					       &root_item->drop_progress,
+					       path->slots[i]);
+				root_item->drop_level = i;
+			}
+			*level = i;
 			return 0;
 		} else {
 			struct extent_buffer *parent;
@@ -3786,22 +4703,20 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 			else
 				parent = path->nodes[*level + 1];
 
-			root_owner = btrfs_header_owner(parent);
-			root_gen = btrfs_header_generation(parent);
-
-			clean_tree_block(trans, root, path->nodes[*level]);
+			clean_tree_block(trans, root, path->nodes[i]);
 			ret = btrfs_free_extent(trans, root,
-						path->nodes[*level]->start,
-						path->nodes[*level]->len,
-						parent->start, root_owner,
-						root_gen, *level, 1);
+						path->nodes[i]->start,
+						path->nodes[i]->len,
+						parent->start,
+						btrfs_header_owner(parent),
+						*level, 0);
 			BUG_ON(ret);
 			if (path->locks[*level]) {
-				btrfs_tree_unlock(path->nodes[*level]);
-				path->locks[*level] = 0;
+				btrfs_tree_unlock(path->nodes[i]);
+				path->locks[i] = 0;
 			}
-			free_extent_buffer(path->nodes[*level]);
-			path->nodes[*level] = NULL;
+			free_extent_buffer(path->nodes[i]);
+			path->nodes[i] = NULL;
 			*level = i + 1;
 		}
 	}
@@ -3820,21 +4735,18 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
 	int wret;
 	int level;
 	struct btrfs_path *path;
-	int i;
-	int orig_level;
 	int update_count;
 	struct btrfs_root_item *root_item = &root->root_item;
 
-	WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
 	level = btrfs_header_level(root->node);
-	orig_level = level;
 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
-		path->nodes[level] = root->node;
-		extent_buffer_get(root->node);
+		path->nodes[level] = btrfs_lock_root_node(root);
+		btrfs_set_lock_blocking(path->nodes[level]);
 		path->slots[level] = 0;
+		path->locks[level] = 1;
 	} else {
 		struct btrfs_key key;
 		struct btrfs_disk_key found_key;
@@ -3856,12 +4768,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
 		 * unlock our path, this is safe because only this
 		 * function is allowed to delete this snapshot
 		 */
-		for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
-			if (path->nodes[i] && path->locks[i]) {
-				path->locks[i] = 0;
-				btrfs_tree_unlock(path->nodes[i]);
-			}
-		}
+		btrfs_unlock_up_safe(path, 0);
 	}
 	while (1) {
 		unsigned long update;
@@ -3882,8 +4789,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
 			ret = -EAGAIN;
 			break;
 		}
-		atomic_inc(&root->fs_info->throttle_gen);
-		wake_up(&root->fs_info->transaction_throttle);
 		for (update_count = 0; update_count < 16; update_count++) {
 			update = trans->delayed_ref_updates;
 			trans->delayed_ref_updates = 0;
@@ -3893,12 +4798,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
 				break;
 		}
 	}
-	for (i = 0; i <= orig_level; i++) {
-		if (path->nodes[i]) {
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-		}
-	}
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -3931,7 +4830,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 	path->slots[level] = 0;
 
 	while (1) {
-		wret = walk_down_subtree(trans, root, path, &level);
+		wret = walk_down_tree(trans, root, path, &level);
 		if (wret < 0)
 			ret = wret;
 		if (wret != 0)
@@ -3948,6 +4847,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
+#if 0
 static unsigned long calc_ra(unsigned long start, unsigned long last,
 			     unsigned long nr)
 {
@@ -5429,6 +6329,7 @@ out:
 	kfree(ref_path);
 	return ret;
 }
+#endif
 
 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 {
@@ -5477,7 +6378,8 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
 	u64 calc;
 
 	spin_lock(&shrink_block_group->lock);
-	if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
+	if (btrfs_block_group_used(&shrink_block_group->item) +
+	    shrink_block_group->reserved > 0) {
 		spin_unlock(&shrink_block_group->lock);
 
 		trans = btrfs_start_transaction(root, 1);
@@ -5502,6 +6404,17 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
 	return 0;
 }
 
+
+int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
+					 struct btrfs_block_group_cache *group)
+
+{
+	__alloc_chunk_for_shrink(root, group, 1);
+	set_block_group_readonly(group);
+	return 0;
+}
+
+#if 0
 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 u64 objectid, u64 size)
@@ -5781,6 +6694,7 @@ out:
 	btrfs_free_path(path);
 	return ret;
 }
+#endif
 
 static int find_first_block_group(struct btrfs_root *root,
 		struct btrfs_path *path, struct btrfs_key *key)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d51dc3..0726a73 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -291,16 +291,12 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 {
 	u64 extent_end = 0;
 	u64 search_start = start;
-	u64 leaf_start;
 	u64 ram_bytes = 0;
-	u64 orig_parent = 0;
 	u64 disk_bytenr = 0;
 	u64 orig_locked_end = locked_end;
 	u8 compression;
 	u8 encryption;
 	u16 other_encoding = 0;
-	u64 root_gen;
-	u64 root_owner;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *extent;
 	struct btrfs_path *path;
@@ -340,9 +336,6 @@ next_slot:
 		bookend = 0;
 		found_extent = 0;
 		found_inline = 0;
-		leaf_start = 0;
-		root_gen = 0;
-		root_owner = 0;
 		compression = 0;
 		encryption = 0;
 		extent = NULL;
@@ -417,9 +410,6 @@ next_slot:
 		if (found_extent) {
 			read_extent_buffer(leaf, &old, (unsigned long)extent,
 					   sizeof(old));
-			root_gen = btrfs_header_generation(leaf);
-			root_owner = btrfs_header_owner(leaf);
-			leaf_start = leaf->start;
 		}
 
 		if (end < extent_end && end >= key.offset) {
@@ -443,14 +433,14 @@ next_slot:
 				}
 				locked_end = extent_end;
 			}
-			orig_parent = path->nodes[0]->start;
 			disk_bytenr = le64_to_cpu(old.disk_bytenr);
 			if (disk_bytenr != 0) {
 				ret = btrfs_inc_extent_ref(trans, root,
 					   disk_bytenr,
-					   le64_to_cpu(old.disk_num_bytes),
-					   orig_parent, root->root_key.objectid,
-					   trans->transid, inode->i_ino);
+					   le64_to_cpu(old.disk_num_bytes), 0,
+					   root->root_key.objectid,
+					   key.objectid, key.offset -
+					   le64_to_cpu(old.offset));
 				BUG_ON(ret);
 			}
 		}
@@ -568,17 +558,6 @@ next_slot:
 			btrfs_mark_buffer_dirty(path->nodes[0]);
 			btrfs_set_lock_blocking(path->nodes[0]);
 
-			if (disk_bytenr != 0) {
-				ret = btrfs_update_extent_ref(trans, root,
-						disk_bytenr,
-						le64_to_cpu(old.disk_num_bytes),
-						orig_parent,
-						leaf->start,
-						root->root_key.objectid,
-						trans->transid, ins.objectid);
-
-				BUG_ON(ret);
-			}
 			path->leave_spinning = 0;
 			btrfs_release_path(root, path);
 			if (disk_bytenr != 0)
@@ -594,8 +573,9 @@ next_slot:
 				ret = btrfs_free_extent(trans, root,
 						old_disk_bytenr,
 						le64_to_cpu(old.disk_num_bytes),
-						leaf_start, root_owner,
-						root_gen, key.objectid, 0);
+						0, root->root_key.objectid,
+						key.objectid, key.offset -
+						le64_to_cpu(old.offset));
 				BUG_ON(ret);
 				*hint_byte = old_disk_bytenr;
 			}
@@ -664,12 +644,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 	u64 bytenr;
 	u64 num_bytes;
 	u64 extent_end;
-	u64 extent_offset;
+	u64 orig_offset;
 	u64 other_start;
 	u64 other_end;
 	u64 split = start;
 	u64 locked_end = end;
-	u64 orig_parent;
 	int extent_type;
 	int split_end = 1;
 	int ret;
@@ -703,7 +682,7 @@ again:
 
 	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
-	extent_offset = btrfs_file_extent_offset(leaf, fi);
+	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
 
 	if (key.offset == start)
 		split = end;
@@ -711,8 +690,6 @@ again:
 	if (key.offset == start && extent_end == end) {
 		int del_nr = 0;
 		int del_slot = 0;
-		u64 leaf_owner = btrfs_header_owner(leaf);
-		u64 leaf_gen = btrfs_header_generation(leaf);
 		other_start = end;
 		other_end = 0;
 		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
@@ -721,8 +698,8 @@ again:
 			del_slot = path->slots[0] + 1;
 			del_nr++;
 			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-						leaf->start, leaf_owner,
-						leaf_gen, inode->i_ino, 0);
+						0, root->root_key.objectid,
+						inode->i_ino, orig_offset);
 			BUG_ON(ret);
 		}
 		other_start = 0;
@@ -733,8 +710,8 @@ again:
 			del_slot = path->slots[0];
 			del_nr++;
 			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
-						leaf->start, leaf_owner,
-						leaf_gen, inode->i_ino, 0);
+						0, root->root_key.objectid,
+						inode->i_ino, orig_offset);
 			BUG_ON(ret);
 		}
 		split_end = 0;
@@ -768,13 +745,12 @@ again:
 			locked_end = extent_end;
 		}
 		btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
-		extent_offset += split - key.offset;
 	} else  {
 		BUG_ON(key.offset != start);
-		btrfs_set_file_extent_offset(leaf, fi, extent_offset +
-					     split - key.offset);
-		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
 		key.offset = split;
+		btrfs_set_file_extent_offset(leaf, fi, key.offset -
+					     orig_offset);
+		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
 		btrfs_set_item_key_safe(trans, root, path, &key);
 		extent_end = split;
 	}
@@ -793,7 +769,8 @@ again:
 					    struct btrfs_file_extent_item);
 			key.offset = split;
 			btrfs_set_item_key_safe(trans, root, path, &key);
-			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+			btrfs_set_file_extent_offset(leaf, fi, key.offset -
+						     orig_offset);
 			btrfs_set_file_extent_num_bytes(leaf, fi,
 							other_end - split);
 			goto done;
@@ -815,10 +792,9 @@ again:
 
 	btrfs_mark_buffer_dirty(leaf);
 
-	orig_parent = leaf->start;
-	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
-				   orig_parent, root->root_key.objectid,
-				   trans->transid, inode->i_ino);
+	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
+				   root->root_key.objectid,
+				   inode->i_ino, orig_offset);
 	BUG_ON(ret);
 	btrfs_release_path(root, path);
 
@@ -833,20 +809,12 @@ again:
 	btrfs_set_file_extent_type(leaf, fi, extent_type);
 	btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
 	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
-	btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+	btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
 	btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
 	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
 	btrfs_set_file_extent_compression(leaf, fi, 0);
 	btrfs_set_file_extent_encryption(leaf, fi, 0);
 	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
-
-	if (orig_parent != leaf->start) {
-		ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
-					      orig_parent, leaf->start,
-					      root->root_key.objectid,
-					      trans->transid, inode->i_ino);
-		BUG_ON(ret);
-	}
 done:
 	btrfs_mark_buffer_dirty(leaf);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c8b019..917bf10 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -48,7 +48,6 @@
 #include "ordered-data.h"
 #include "xattr.h"
 #include "tree-log.h"
-#include "ref-cache.h"
 #include "compression.h"
 #include "locking.h"
 
@@ -944,6 +943,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	u64 cow_start;
 	u64 cur_offset;
 	u64 extent_end;
+	u64 extent_offset;
 	u64 disk_bytenr;
 	u64 num_bytes;
 	int extent_type;
@@ -1005,6 +1005,7 @@ next_slot:
 		if (extent_type == BTRFS_FILE_EXTENT_REG ||
 		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
 			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+			extent_offset = btrfs_file_extent_offset(leaf, fi);
 			extent_end = found_key.offset +
 				btrfs_file_extent_num_bytes(leaf, fi);
 			if (extent_end <= start) {
@@ -1022,9 +1023,10 @@ next_slot:
 			if (btrfs_extent_readonly(root, disk_bytenr))
 				goto out_check;
 			if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
-						  disk_bytenr))
+						  found_key.offset -
+						  extent_offset, disk_bytenr))
 				goto out_check;
-			disk_bytenr += btrfs_file_extent_offset(leaf, fi);
+			disk_bytenr += extent_offset;
 			disk_bytenr += cur_offset - found_key.offset;
 			num_bytes = min(end + 1, extent_end) - cur_offset;
 			/*
@@ -1489,9 +1491,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 	ins.objectid = disk_bytenr;
 	ins.offset = disk_num_bytes;
 	ins.type = BTRFS_EXTENT_ITEM_KEY;
-	ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
-					  root->root_key.objectid,
-					  trans->transid, inode->i_ino, &ins);
+	ret = btrfs_alloc_reserved_file_extent(trans, root,
+					root->root_key.objectid,
+					inode->i_ino, file_pos, &ins);
 	BUG_ON(ret);
 	btrfs_free_path(path);
 
@@ -1956,23 +1958,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 		 * crossing root thing.  we store the inode number in the
 		 * offset of the orphan item.
 		 */
-		inode = btrfs_iget_locked(root->fs_info->sb,
-					  found_key.offset, root);
-		if (!inode)
+		found_key.objectid = found_key.offset;
+		found_key.type = BTRFS_INODE_ITEM_KEY;
+		found_key.offset = 0;
+		inode = btrfs_iget(root->fs_info->sb, &found_key, root);
+		if (IS_ERR(inode))
 			break;
 
-		if (inode->i_state & I_NEW) {
-			BTRFS_I(inode)->root = root;
-
-			/* have to set the location manually */
-			BTRFS_I(inode)->location.objectid = inode->i_ino;
-			BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-			BTRFS_I(inode)->location.offset = 0;
-
-			btrfs_read_locked_inode(inode);
-			unlock_new_inode(inode);
-		}
-
 		/*
 		 * add this inode to the orphan list so btrfs_orphan_del does
 		 * the proper thing when we hit it
@@ -2069,7 +2061,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 /*
  * read an inode from the btree into the in-memory inode
  */
-void btrfs_read_locked_inode(struct inode *inode)
+static void btrfs_read_locked_inode(struct inode *inode)
 {
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
@@ -2599,9 +2591,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	struct btrfs_file_extent_item *fi;
 	u64 extent_start = 0;
 	u64 extent_num_bytes = 0;
+	u64 extent_offset = 0;
 	u64 item_end = 0;
-	u64 root_gen = 0;
-	u64 root_owner = 0;
 	int found_extent;
 	int del_item;
 	int pending_del_nr = 0;
@@ -2716,6 +2707,9 @@ search_again:
 				extent_num_bytes =
 					btrfs_file_extent_disk_num_bytes(leaf,
 									 fi);
+				extent_offset = found_key.offset -
+					btrfs_file_extent_offset(leaf, fi);
+
 				/* FIXME blocksize != 4096 */
 				num_dec = btrfs_file_extent_num_bytes(leaf, fi);
 				if (extent_start != 0) {
@@ -2723,8 +2717,6 @@ search_again:
 					if (root->ref_cows)
 						inode_sub_bytes(inode, num_dec);
 				}
-				root_gen = btrfs_header_generation(leaf);
-				root_owner = btrfs_header_owner(leaf);
 			}
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			/*
@@ -2768,12 +2760,12 @@ delete:
 		} else {
 			break;
 		}
-		if (found_extent) {
+		if (found_extent && root->ref_cows) {
 			btrfs_set_path_blocking(path);
 			ret = btrfs_free_extent(trans, root, extent_start,
-						extent_num_bytes,
-						leaf->start, root_owner,
-						root_gen, inode->i_ino, 0);
+						extent_num_bytes, 0,
+						btrfs_header_owner(leaf),
+						inode->i_ino, extent_offset);
 			BUG_ON(ret);
 		}
 next:
@@ -3105,6 +3097,45 @@ static int fixup_tree_root_location(struct btrfs_root *root,
 	return 0;
 }
 
+static void inode_tree_add(struct inode *inode)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_inode *entry;
+	struct rb_node **p = &root->inode_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	spin_lock(&root->inode_lock);
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct btrfs_inode, rb_node);
+
+		if (inode->i_ino < entry->vfs_inode.i_ino)
+			p = &(*p)->rb_left;
+		else if (inode->i_ino > entry->vfs_inode.i_ino)
+			p = &(*p)->rb_right;
+		else {
+			WARN_ON(!(entry->vfs_inode.i_state &
+				  (I_WILL_FREE | I_FREEING | I_CLEAR)));
+			break;
+		}
+	}
+	rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
+	rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
+	spin_unlock(&root->inode_lock);
+}
+
+static void inode_tree_del(struct inode *inode)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+
+	if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
+		spin_lock(&root->inode_lock);
+		rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
+		spin_unlock(&root->inode_lock);
+		RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+	}
+}
+
 static noinline void init_btrfs_i(struct inode *inode)
 {
 	struct btrfs_inode *bi = BTRFS_I(inode);
@@ -3130,6 +3161,7 @@ static noinline void init_btrfs_i(struct inode *inode)
 			     inode->i_mapping, GFP_NOFS);
 	INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
 	INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
+	RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
 	btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
 	mutex_init(&BTRFS_I(inode)->extent_mutex);
 	mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3152,26 +3184,9 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
 		args->root == BTRFS_I(inode)->root;
 }
 
-struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
-			    struct btrfs_root *root, int wait)
-{
-	struct inode *inode;
-	struct btrfs_iget_args args;
-	args.ino = objectid;
-	args.root = root;
-
-	if (wait) {
-		inode = ilookup5(s, objectid, btrfs_find_actor,
-				 (void *)&args);
-	} else {
-		inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
-					(void *)&args);
-	}
-	return inode;
-}
-
-struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
-				struct btrfs_root *root)
+static struct inode *btrfs_iget_locked(struct super_block *s,
+				       u64 objectid,
+				       struct btrfs_root *root)
 {
 	struct inode *inode;
 	struct btrfs_iget_args args;
@@ -3188,24 +3203,21 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
  * Returns in *is_new if the inode was read from disk
  */
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root, int *is_new)
+			 struct btrfs_root *root)
 {
 	struct inode *inode;
 
 	inode = btrfs_iget_locked(s, location->objectid, root);
 	if (!inode)
-		return ERR_PTR(-EACCES);
+		return ERR_PTR(-ENOMEM);
 
 	if (inode->i_state & I_NEW) {
 		BTRFS_I(inode)->root = root;
 		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
 		btrfs_read_locked_inode(inode);
+
+		inode_tree_add(inode);
 		unlock_new_inode(inode);
-		if (is_new)
-			*is_new = 1;
-	} else {
-		if (is_new)
-			*is_new = 0;
 	}
 
 	return inode;
@@ -3218,7 +3230,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	struct btrfs_root *root = bi->root;
 	struct btrfs_root *sub_root = root;
 	struct btrfs_key location;
-	int ret, new;
+	int ret;
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -3236,7 +3248,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 			return ERR_PTR(ret);
 		if (ret > 0)
 			return ERR_PTR(-ENOENT);
-		inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
+		inode = btrfs_iget(dir->i_sb, &location, sub_root);
 		if (IS_ERR(inode))
 			return ERR_CAST(inode);
 	}
@@ -3631,6 +3643,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
 
 	insert_inode_hash(inode);
+	inode_tree_add(inode);
 	return inode;
 fail:
 	if (dir)
@@ -4683,6 +4696,7 @@ void btrfs_destroy_inode(struct inode *inode)
 			btrfs_put_ordered_extent(ordered);
 		}
 	}
+	inode_tree_del(inode);
 	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2624b53..54dfd45 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -82,22 +82,25 @@ static noinline int create_subvol(struct btrfs_root *root,
 	if (ret)
 		goto fail;
 
-	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
-				      objectid, trans->transid, 0, 0, 0);
+	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+				      0, objectid, NULL, 0, 0, 0);
 	if (IS_ERR(leaf)) {
 		ret = PTR_ERR(leaf);
 		goto fail;
 	}
 
-	btrfs_set_header_nritems(leaf, 0);
-	btrfs_set_header_level(leaf, 0);
+	memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_bytenr(leaf, leaf->start);
 	btrfs_set_header_generation(leaf, trans->transid);
+	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
 	btrfs_set_header_owner(leaf, objectid);
 
 	write_extent_buffer(leaf, root->fs_info->fsid,
 			    (unsigned long)btrfs_header_fsid(leaf),
 			    BTRFS_FSID_SIZE);
+	write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+			    (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+			    BTRFS_UUID_SIZE);
 	btrfs_mark_buffer_dirty(leaf);
 
 	inode_item = &root_item.inode;
@@ -125,7 +128,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 	btrfs_set_root_dirid(&root_item, new_dirid);
 
 	key.objectid = objectid;
-	key.offset = 1;
+	key.offset = 0;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				&root_item);
@@ -911,10 +914,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 				if (disko) {
 					inode_add_bytes(inode, datal);
 					ret = btrfs_inc_extent_ref(trans, root,
-						   disko, diskl, leaf->start,
-						   root->root_key.objectid,
-						   trans->transid,
-						   inode->i_ino);
+							disko, diskl, 0,
+							root->root_key.objectid,
+							inode->i_ino,
+							new_key.offset - datao);
 					BUG_ON(ret);
 				}
 			} else if (type == BTRFS_FILE_EXTENT_INLINE) {
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 5f8f218..6d6523d 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -45,22 +45,132 @@ static void print_dev_item(struct extent_buffer *eb,
 	       (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
 	       (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
 }
+static void print_extent_data_ref(struct extent_buffer *eb,
+				  struct btrfs_extent_data_ref *ref)
+{
+	printk(KERN_INFO "\t\textent data backref root %llu "
+	       "objectid %llu offset %llu count %u\n",
+	       (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
+	       (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
+	       (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
+	       btrfs_extent_data_ref_count(eb, ref));
+}
+
+static void print_extent_item(struct extent_buffer *eb, int slot)
+{
+	struct btrfs_extent_item *ei;
+	struct btrfs_extent_inline_ref *iref;
+	struct btrfs_extent_data_ref *dref;
+	struct btrfs_shared_data_ref *sref;
+	struct btrfs_disk_key key;
+	unsigned long end;
+	unsigned long ptr;
+	int type;
+	u32 item_size = btrfs_item_size_nr(eb, slot);
+	u64 flags;
+	u64 offset;
+
+	if (item_size < sizeof(*ei)) {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		struct btrfs_extent_item_v0 *ei0;
+		BUG_ON(item_size != sizeof(*ei0));
+		ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
+		printk(KERN_INFO "\t\textent refs %u\n",
+		       btrfs_extent_refs_v0(eb, ei0));
+		return;
+#else
+		BUG();
+#endif
+	}
+
+	ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
+	flags = btrfs_extent_flags(eb, ei);
+
+	printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
+	       (unsigned long long)btrfs_extent_refs(eb, ei),
+	       (unsigned long long)btrfs_extent_generation(eb, ei),
+	       (unsigned long long)flags);
+
+	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+		struct btrfs_tree_block_info *info;
+		info = (struct btrfs_tree_block_info *)(ei + 1);
+		btrfs_tree_block_key(eb, info, &key);
+		printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
+		       "level %d\n",
+		       (unsigned long long)btrfs_disk_key_objectid(&key),
+		       key.type,
+		       (unsigned long long)btrfs_disk_key_offset(&key),
+		       btrfs_tree_block_level(eb, info));
+		iref = (struct btrfs_extent_inline_ref *)(info + 1);
+	} else {
+		iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+	}
+
+	ptr = (unsigned long)iref;
+	end = (unsigned long)ei + item_size;
+	while (ptr < end) {
+		iref = (struct btrfs_extent_inline_ref *)ptr;
+		type = btrfs_extent_inline_ref_type(eb, iref);
+		offset = btrfs_extent_inline_ref_offset(eb, iref);
+		switch (type) {
+		case BTRFS_TREE_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\ttree block backref "
+				"root %llu\n", (unsigned long long)offset);
+			break;
+		case BTRFS_SHARED_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\tshared block backref "
+				"parent %llu\n", (unsigned long long)offset);
+			break;
+		case BTRFS_EXTENT_DATA_REF_KEY:
+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+			print_extent_data_ref(eb, dref);
+			break;
+		case BTRFS_SHARED_DATA_REF_KEY:
+			sref = (struct btrfs_shared_data_ref *)(iref + 1);
+			printk(KERN_INFO "\t\tshared data backref "
+			       "parent %llu count %u\n",
+			       (unsigned long long)offset,
+			       btrfs_shared_data_ref_count(eb, sref));
+			break;
+		default:
+			BUG();
+		}
+		ptr += btrfs_extent_inline_ref_size(type);
+	}
+	WARN_ON(ptr > end);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
+{
+	struct btrfs_extent_ref_v0 *ref0;
+
+	ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
+	printk("\t\textent back ref root %llu gen %llu "
+		"owner %llu num_refs %lu\n",
+		(unsigned long long)btrfs_ref_root_v0(eb, ref0),
+		(unsigned long long)btrfs_ref_generation_v0(eb, ref0),
+		(unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
+		(unsigned long)btrfs_ref_count_v0(eb, ref0));
+}
+#endif
+
 void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 {
 	int i;
+	u32 type;
 	u32 nr = btrfs_header_nritems(l);
 	struct btrfs_item *item;
-	struct btrfs_extent_item *ei;
 	struct btrfs_root_item *ri;
 	struct btrfs_dir_item *di;
 	struct btrfs_inode_item *ii;
 	struct btrfs_block_group_item *bi;
 	struct btrfs_file_extent_item *fi;
+	struct btrfs_extent_data_ref *dref;
+	struct btrfs_shared_data_ref *sref;
+	struct btrfs_dev_extent *dev_extent;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
-	struct btrfs_extent_ref *ref;
-	struct btrfs_dev_extent *dev_extent;
-	u32 type;
 
 	printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
 		(unsigned long long)btrfs_header_bytenr(l), nr,
@@ -100,20 +210,25 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 				btrfs_disk_root_refs(l, ri));
 			break;
 		case BTRFS_EXTENT_ITEM_KEY:
-			ei = btrfs_item_ptr(l, i, struct btrfs_extent_item);
-			printk(KERN_INFO "\t\textent data refs %u\n",
-				btrfs_extent_refs(l, ei));
-			break;
-		case BTRFS_EXTENT_REF_KEY:
-			ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref);
-			printk(KERN_INFO "\t\textent back ref root %llu "
-			       "gen %llu owner %llu num_refs %lu\n",
-			       (unsigned long long)btrfs_ref_root(l, ref),
-			       (unsigned long long)btrfs_ref_generation(l, ref),
-			       (unsigned long long)btrfs_ref_objectid(l, ref),
-			       (unsigned long)btrfs_ref_num_refs(l, ref));
+			print_extent_item(l, i);
+			break;
+		case BTRFS_TREE_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\ttree block backref\n");
+			break;
+		case BTRFS_SHARED_BLOCK_REF_KEY:
+			printk(KERN_INFO "\t\tshared block backref\n");
+			break;
+		case BTRFS_EXTENT_DATA_REF_KEY:
+			dref = btrfs_item_ptr(l, i,
+					      struct btrfs_extent_data_ref);
+			print_extent_data_ref(l, dref);
+			break;
+		case BTRFS_SHARED_DATA_REF_KEY:
+			sref = btrfs_item_ptr(l, i,
+					      struct btrfs_shared_data_ref);
+			printk(KERN_INFO "\t\tshared data backref count %u\n",
+			       btrfs_shared_data_ref_count(l, sref));
 			break;
-
 		case BTRFS_EXTENT_DATA_KEY:
 			fi = btrfs_item_ptr(l, i,
 					    struct btrfs_file_extent_item);
@@ -139,6 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 			       (unsigned long long)
 			       btrfs_file_extent_ram_bytes(l, fi));
 			break;
+		case BTRFS_EXTENT_REF_V0_KEY:
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+			print_extent_ref_v0(l, i);
+#else
+			BUG();
+#endif
 		case BTRFS_BLOCK_GROUP_ITEM_KEY:
 			bi = btrfs_item_ptr(l, i,
 					    struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
new file mode 100644
index 0000000..b23dc20
--- /dev/null
+++ b/fs/btrfs/relocation.c
@@ -0,0 +1,3711 @@
+/*
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "volumes.h"
+#include "locking.h"
+#include "btrfs_inode.h"
+#include "async-thread.h"
+
+/*
+ * backref_node, mapping_node and tree_block start with this
+ */
+struct tree_entry {
+	struct rb_node rb_node;
+	u64 bytenr;
+};
+
+/*
+ * present a tree block in the backref cache
+ */
+struct backref_node {
+	struct rb_node rb_node;
+	u64 bytenr;
+	/* objectid tree block owner */
+	u64 owner;
+	/* list of upper level blocks reference this block */
+	struct list_head upper;
+	/* list of child blocks in the cache */
+	struct list_head lower;
+	/* NULL if this node is not tree root */
+	struct btrfs_root *root;
+	/* extent buffer got by COW the block */
+	struct extent_buffer *eb;
+	/* level of tree block */
+	unsigned int level:8;
+	/* 1 if the block is root of old snapshot */
+	unsigned int old_root:1;
+	/* 1 if no child blocks in the cache */
+	unsigned int lowest:1;
+	/* is the extent buffer locked */
+	unsigned int locked:1;
+	/* has the block been processed */
+	unsigned int processed:1;
+	/* have backrefs of this block been checked */
+	unsigned int checked:1;
+};
+
+/*
+ * present a block pointer in the backref cache
+ */
+struct backref_edge {
+	struct list_head list[2];
+	struct backref_node *node[2];
+	u64 blockptr;
+};
+
+#define LOWER	0
+#define UPPER	1
+
+struct backref_cache {
+	/* red black tree of all backref nodes in the cache */
+	struct rb_root rb_root;
+	/* list of backref nodes with no child block in the cache */
+	struct list_head pending[BTRFS_MAX_LEVEL];
+	spinlock_t lock;
+};
+
+/*
+ * map address of tree root to tree
+ */
+struct mapping_node {
+	struct rb_node rb_node;
+	u64 bytenr;
+	void *data;
+};
+
+struct mapping_tree {
+	struct rb_root rb_root;
+	spinlock_t lock;
+};
+
+/*
+ * present a tree block to process
+ */
+struct tree_block {
+	struct rb_node rb_node;
+	u64 bytenr;
+	struct btrfs_key key;
+	unsigned int level:8;
+	unsigned int key_ready:1;
+};
+
+/* inode vector */
+#define INODEVEC_SIZE 16
+
+struct inodevec {
+	struct list_head list;
+	struct inode *inode[INODEVEC_SIZE];
+	int nr;
+};
+
+struct reloc_control {
+	/* block group to relocate */
+	struct btrfs_block_group_cache *block_group;
+	/* extent tree */
+	struct btrfs_root *extent_root;
+	/* inode for moving data */
+	struct inode *data_inode;
+	struct btrfs_workers workers;
+	/* tree blocks have been processed */
+	struct extent_io_tree processed_blocks;
+	/* map start of tree root to corresponding reloc tree */
+	struct mapping_tree reloc_root_tree;
+	/* list of reloc trees */
+	struct list_head reloc_roots;
+	u64 search_start;
+	u64 extents_found;
+	u64 extents_skipped;
+	int stage;
+	int create_reloc_root;
+	unsigned int found_file_extent:1;
+	unsigned int found_old_snapshot:1;
+};
+
+/* stages of data relocation */
+#define MOVE_DATA_EXTENTS	0
+#define UPDATE_DATA_PTRS	1
+
+/*
+ * merge reloc tree to corresponding fs tree in worker threads
+ */
+struct async_merge {
+	struct btrfs_work work;
+	struct reloc_control *rc;
+	struct btrfs_root *root;
+	struct completion *done;
+	atomic_t *num_pending;
+};
+
+static void mapping_tree_init(struct mapping_tree *tree)
+{
+	tree->rb_root.rb_node = NULL;
+	spin_lock_init(&tree->lock);
+}
+
+static void backref_cache_init(struct backref_cache *cache)
+{
+	int i;
+	cache->rb_root.rb_node = NULL;
+	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+		INIT_LIST_HEAD(&cache->pending[i]);
+	spin_lock_init(&cache->lock);
+}
+
+static void backref_node_init(struct backref_node *node)
+{
+	memset(node, 0, sizeof(*node));
+	INIT_LIST_HEAD(&node->upper);
+	INIT_LIST_HEAD(&node->lower);
+	RB_CLEAR_NODE(&node->rb_node);
+}
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
+				   struct rb_node *node)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct tree_entry *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct tree_entry, rb_node);
+
+		if (bytenr < entry->bytenr)
+			p = &(*p)->rb_left;
+		else if (bytenr > entry->bytenr)
+			p = &(*p)->rb_right;
+		else
+			return parent;
+	}
+
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
+static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
+{
+	struct rb_node *n = root->rb_node;
+	struct tree_entry *entry;
+
+	while (n) {
+		entry = rb_entry(n, struct tree_entry, rb_node);
+
+		if (bytenr < entry->bytenr)
+			n = n->rb_left;
+		else if (bytenr > entry->bytenr)
+			n = n->rb_right;
+		else
+			return n;
+	}
+	return NULL;
+}
+
+/*
+ * walk up backref nodes until reach node presents tree root
+ */
+static struct backref_node *walk_up_backref(struct backref_node *node,
+					    struct backref_edge *edges[],
+					    int *index)
+{
+	struct backref_edge *edge;
+	int idx = *index;
+
+	while (!list_empty(&node->upper)) {
+		edge = list_entry(node->upper.next,
+				  struct backref_edge, list[LOWER]);
+		edges[idx++] = edge;
+		node = edge->node[UPPER];
+	}
+	*index = idx;
+	return node;
+}
+
+/*
+ * walk down backref nodes to find start of next reference path
+ */
+static struct backref_node *walk_down_backref(struct backref_edge *edges[],
+					      int *index)
+{
+	struct backref_edge *edge;
+	struct backref_node *lower;
+	int idx = *index;
+
+	while (idx > 0) {
+		edge = edges[idx - 1];
+		lower = edge->node[LOWER];
+		if (list_is_last(&edge->list[LOWER], &lower->upper)) {
+			idx--;
+			continue;
+		}
+		edge = list_entry(edge->list[LOWER].next,
+				  struct backref_edge, list[LOWER]);
+		edges[idx - 1] = edge;
+		*index = idx;
+		return edge->node[UPPER];
+	}
+	*index = 0;
+	return NULL;
+}
+
+static void drop_node_buffer(struct backref_node *node)
+{
+	if (node->eb) {
+		if (node->locked) {
+			btrfs_tree_unlock(node->eb);
+			node->locked = 0;
+		}
+		free_extent_buffer(node->eb);
+		node->eb = NULL;
+	}
+}
+
+static void drop_backref_node(struct backref_cache *tree,
+			      struct backref_node *node)
+{
+	BUG_ON(!node->lowest);
+	BUG_ON(!list_empty(&node->upper));
+
+	drop_node_buffer(node);
+	list_del(&node->lower);
+
+	rb_erase(&node->rb_node, &tree->rb_root);
+	kfree(node);
+}
+
+/*
+ * remove a backref node from the backref cache
+ */
+static void remove_backref_node(struct backref_cache *cache,
+				struct backref_node *node)
+{
+	struct backref_node *upper;
+	struct backref_edge *edge;
+
+	if (!node)
+		return;
+
+	BUG_ON(!node->lowest);
+	while (!list_empty(&node->upper)) {
+		edge = list_entry(node->upper.next, struct backref_edge,
+				  list[LOWER]);
+		upper = edge->node[UPPER];
+		list_del(&edge->list[LOWER]);
+		list_del(&edge->list[UPPER]);
+		kfree(edge);
+		/*
+		 * add the node to pending list if no other
+		 * child block cached.
+		 */
+		if (list_empty(&upper->lower)) {
+			list_add_tail(&upper->lower,
+				      &cache->pending[upper->level]);
+			upper->lowest = 1;
+		}
+	}
+	drop_backref_node(cache, node);
+}
+
+/*
+ * find reloc tree by address of tree root
+ */
+static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
+					  u64 bytenr)
+{
+	struct rb_node *rb_node;
+	struct mapping_node *node;
+	struct btrfs_root *root = NULL;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+	rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
+	if (rb_node) {
+		node = rb_entry(rb_node, struct mapping_node, rb_node);
+		root = (struct btrfs_root *)node->data;
+	}
+	spin_unlock(&rc->reloc_root_tree.lock);
+	return root;
+}
+
+static int is_cowonly_root(u64 root_objectid)
+{
+	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
+	    root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
+	    root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
+	    root_objectid == BTRFS_DEV_TREE_OBJECTID ||
+	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
+	    root_objectid == BTRFS_CSUM_TREE_OBJECTID)
+		return 1;
+	return 0;
+}
+
+static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
+					u64 root_objectid)
+{
+	struct btrfs_key key;
+
+	key.objectid = root_objectid;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	if (is_cowonly_root(root_objectid))
+		key.offset = 0;
+	else
+		key.offset = (u64)-1;
+
+	return btrfs_read_fs_root_no_name(fs_info, &key);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static noinline_for_stack
+struct btrfs_root *find_tree_root(struct reloc_control *rc,
+				  struct extent_buffer *leaf,
+				  struct btrfs_extent_ref_v0 *ref0)
+{
+	struct btrfs_root *root;
+	u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
+	u64 generation = btrfs_ref_generation_v0(leaf, ref0);
+
+	BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
+
+	root = read_fs_root(rc->extent_root->fs_info, root_objectid);
+	BUG_ON(IS_ERR(root));
+
+	if (root->ref_cows &&
+	    generation != btrfs_root_generation(&root->root_item))
+		return NULL;
+
+	return root;
+}
+#endif
+
+static noinline_for_stack
+int find_inline_backref(struct extent_buffer *leaf, int slot,
+			unsigned long *ptr, unsigned long *end)
+{
+	struct btrfs_extent_item *ei;
+	struct btrfs_tree_block_info *bi;
+	u32 item_size;
+
+	item_size = btrfs_item_size_nr(leaf, slot);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (item_size < sizeof(*ei)) {
+		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+		return 1;
+	}
+#endif
+	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+	WARN_ON(!(btrfs_extent_flags(leaf, ei) &
+		  BTRFS_EXTENT_FLAG_TREE_BLOCK));
+
+	if (item_size <= sizeof(*ei) + sizeof(*bi)) {
+		WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
+		return 1;
+	}
+
+	bi = (struct btrfs_tree_block_info *)(ei + 1);
+	*ptr = (unsigned long)(bi + 1);
+	*end = (unsigned long)ei + item_size;
+	return 0;
+}
+
+/*
+ * build backref tree for a given tree block. root of the backref tree
+ * corresponds the tree block, leaves of the backref tree correspond
+ * roots of b-trees that reference the tree block.
+ *
+ * the basic idea of this function is check backrefs of a given block
+ * to find upper level blocks that refernece the block, and then check
+ * bakcrefs of these upper level blocks recursively. the recursion stop
+ * when tree root is reached or backrefs for the block is cached.
+ *
+ * NOTE: if we find backrefs for a block are cached, we know backrefs
+ * for all upper level blocks that directly/indirectly reference the
+ * block are also cached.
+ */
+static struct backref_node *build_backref_tree(struct reloc_control *rc,
+					       struct backref_cache *cache,
+					       struct btrfs_key *node_key,
+					       int level, u64 bytenr)
+{
+	struct btrfs_path *path1;
+	struct btrfs_path *path2;
+	struct extent_buffer *eb;
+	struct btrfs_root *root;
+	struct backref_node *cur;
+	struct backref_node *upper;
+	struct backref_node *lower;
+	struct backref_node *node = NULL;
+	struct backref_node *exist = NULL;
+	struct backref_edge *edge;
+	struct rb_node *rb_node;
+	struct btrfs_key key;
+	unsigned long end;
+	unsigned long ptr;
+	LIST_HEAD(list);
+	int ret;
+	int err = 0;
+
+	path1 = btrfs_alloc_path();
+	path2 = btrfs_alloc_path();
+	if (!path1 || !path2) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	node = kmalloc(sizeof(*node), GFP_NOFS);
+	if (!node) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	backref_node_init(node);
+	node->bytenr = bytenr;
+	node->owner = 0;
+	node->level = level;
+	node->lowest = 1;
+	cur = node;
+again:
+	end = 0;
+	ptr = 0;
+	key.objectid = cur->bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	path1->search_commit_root = 1;
+	path1->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
+				0, 0);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+	BUG_ON(!ret || !path1->slots[0]);
+
+	path1->slots[0]--;
+
+	WARN_ON(cur->checked);
+	if (!list_empty(&cur->upper)) {
+		/*
+		 * the backref was added previously when processsing
+		 * backref of type BTRFS_TREE_BLOCK_REF_KEY
+		 */
+		BUG_ON(!list_is_singular(&cur->upper));
+		edge = list_entry(cur->upper.next, struct backref_edge,
+				  list[LOWER]);
+		BUG_ON(!list_empty(&edge->list[UPPER]));
+		exist = edge->node[UPPER];
+		/*
+		 * add the upper level block to pending list if we need
+		 * check its backrefs
+		 */
+		if (!exist->checked)
+			list_add_tail(&edge->list[UPPER], &list);
+	} else {
+		exist = NULL;
+	}
+
+	while (1) {
+		cond_resched();
+		eb = path1->nodes[0];
+
+		if (ptr >= end) {
+			if (path1->slots[0] >= btrfs_header_nritems(eb)) {
+				ret = btrfs_next_leaf(rc->extent_root, path1);
+				if (ret < 0) {
+					err = ret;
+					goto out;
+				}
+				if (ret > 0)
+					break;
+				eb = path1->nodes[0];
+			}
+
+			btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
+			if (key.objectid != cur->bytenr) {
+				WARN_ON(exist);
+				break;
+			}
+
+			if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+				ret = find_inline_backref(eb, path1->slots[0],
+							  &ptr, &end);
+				if (ret)
+					goto next;
+			}
+		}
+
+		if (ptr < end) {
+			/* update key for inline back ref */
+			struct btrfs_extent_inline_ref *iref;
+			iref = (struct btrfs_extent_inline_ref *)ptr;
+			key.type = btrfs_extent_inline_ref_type(eb, iref);
+			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
+			WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
+				key.type != BTRFS_SHARED_BLOCK_REF_KEY);
+		}
+
+		if (exist &&
+		    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
+		      exist->owner == key.offset) ||
+		     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
+		      exist->bytenr == key.offset))) {
+			exist = NULL;
+			goto next;
+		}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
+		    key.type == BTRFS_EXTENT_REF_V0_KEY) {
+			if (key.objectid == key.offset &&
+			    key.type == BTRFS_EXTENT_REF_V0_KEY) {
+				struct btrfs_extent_ref_v0 *ref0;
+				ref0 = btrfs_item_ptr(eb, path1->slots[0],
+						struct btrfs_extent_ref_v0);
+				root = find_tree_root(rc, eb, ref0);
+				if (root)
+					cur->root = root;
+				else
+					cur->old_root = 1;
+				break;
+			}
+#else
+		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
+		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
+#endif
+			if (key.objectid == key.offset) {
+				/*
+				 * only root blocks of reloc trees use
+				 * backref of this type.
+				 */
+				root = find_reloc_root(rc, cur->bytenr);
+				BUG_ON(!root);
+				cur->root = root;
+				break;
+			}
+
+			edge = kzalloc(sizeof(*edge), GFP_NOFS);
+			if (!edge) {
+				err = -ENOMEM;
+				goto out;
+			}
+			rb_node = tree_search(&cache->rb_root, key.offset);
+			if (!rb_node) {
+				upper = kmalloc(sizeof(*upper), GFP_NOFS);
+				if (!upper) {
+					kfree(edge);
+					err = -ENOMEM;
+					goto out;
+				}
+				backref_node_init(upper);
+				upper->bytenr = key.offset;
+				upper->owner = 0;
+				upper->level = cur->level + 1;
+				/*
+				 *  backrefs for the upper level block isn't
+				 *  cached, add the block to pending list
+				 */
+				list_add_tail(&edge->list[UPPER], &list);
+			} else {
+				upper = rb_entry(rb_node, struct backref_node,
+						 rb_node);
+				INIT_LIST_HEAD(&edge->list[UPPER]);
+			}
+			list_add(&edge->list[LOWER], &cur->upper);
+			edge->node[UPPER] = upper;
+			edge->node[LOWER] = cur;
+
+			goto next;
+		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
+			goto next;
+		}
+
+		/* key.type == BTRFS_TREE_BLOCK_REF_KEY */
+		root = read_fs_root(rc->extent_root->fs_info, key.offset);
+		if (IS_ERR(root)) {
+			err = PTR_ERR(root);
+			goto out;
+		}
+
+		if (btrfs_root_level(&root->root_item) == cur->level) {
+			/* tree root */
+			BUG_ON(btrfs_root_bytenr(&root->root_item) !=
+			       cur->bytenr);
+			cur->root = root;
+			break;
+		}
+
+		level = cur->level + 1;
+
+		/*
+		 * searching the tree to find upper level blocks
+		 * reference the block.
+		 */
+		path2->search_commit_root = 1;
+		path2->skip_locking = 1;
+		path2->lowest_level = level;
+		ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
+		path2->lowest_level = 0;
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+
+		eb = path2->nodes[level];
+		WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
+			cur->bytenr);
+
+		lower = cur;
+		for (; level < BTRFS_MAX_LEVEL; level++) {
+			if (!path2->nodes[level]) {
+				BUG_ON(btrfs_root_bytenr(&root->root_item) !=
+				       lower->bytenr);
+				lower->root = root;
+				break;
+			}
+
+			edge = kzalloc(sizeof(*edge), GFP_NOFS);
+			if (!edge) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			eb = path2->nodes[level];
+			rb_node = tree_search(&cache->rb_root, eb->start);
+			if (!rb_node) {
+				upper = kmalloc(sizeof(*upper), GFP_NOFS);
+				if (!upper) {
+					kfree(edge);
+					err = -ENOMEM;
+					goto out;
+				}
+				backref_node_init(upper);
+				upper->bytenr = eb->start;
+				upper->owner = btrfs_header_owner(eb);
+				upper->level = lower->level + 1;
+
+				/*
+				 * if we know the block isn't shared
+				 * we can void checking its backrefs.
+				 */
+				if (btrfs_block_can_be_shared(root, eb))
+					upper->checked = 0;
+				else
+					upper->checked = 1;
+
+				/*
+				 * add the block to pending list if we
+				 * need check its backrefs. only block
+				 * at 'cur->level + 1' is added to the
+				 * tail of pending list. this guarantees
+				 * we check backrefs from lower level
+				 * blocks to upper level blocks.
+				 */
+				if (!upper->checked &&
+				    level == cur->level + 1) {
+					list_add_tail(&edge->list[UPPER],
+						      &list);
+				} else
+					INIT_LIST_HEAD(&edge->list[UPPER]);
+			} else {
+				upper = rb_entry(rb_node, struct backref_node,
+						 rb_node);
+				BUG_ON(!upper->checked);
+				INIT_LIST_HEAD(&edge->list[UPPER]);
+			}
+			list_add_tail(&edge->list[LOWER], &lower->upper);
+			edge->node[UPPER] = upper;
+			edge->node[LOWER] = lower;
+
+			if (rb_node)
+				break;
+			lower = upper;
+			upper = NULL;
+		}
+		btrfs_release_path(root, path2);
+next:
+		if (ptr < end) {
+			ptr += btrfs_extent_inline_ref_size(key.type);
+			if (ptr >= end) {
+				WARN_ON(ptr > end);
+				ptr = 0;
+				end = 0;
+			}
+		}
+		if (ptr >= end)
+			path1->slots[0]++;
+	}
+	btrfs_release_path(rc->extent_root, path1);
+
+	cur->checked = 1;
+	WARN_ON(exist);
+
+	/* the pending list isn't empty, take the first block to process */
+	if (!list_empty(&list)) {
+		edge = list_entry(list.next, struct backref_edge, list[UPPER]);
+		list_del_init(&edge->list[UPPER]);
+		cur = edge->node[UPPER];
+		goto again;
+	}
+
+	/*
+	 * everything goes well, connect backref nodes and insert backref nodes
+	 * into the cache.
+	 */
+	BUG_ON(!node->checked);
+	rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
+	BUG_ON(rb_node);
+
+	list_for_each_entry(edge, &node->upper, list[LOWER])
+		list_add_tail(&edge->list[UPPER], &list);
+
+	while (!list_empty(&list)) {
+		edge = list_entry(list.next, struct backref_edge, list[UPPER]);
+		list_del_init(&edge->list[UPPER]);
+		upper = edge->node[UPPER];
+
+		if (!RB_EMPTY_NODE(&upper->rb_node)) {
+			if (upper->lowest) {
+				list_del_init(&upper->lower);
+				upper->lowest = 0;
+			}
+
+			list_add_tail(&edge->list[UPPER], &upper->lower);
+			continue;
+		}
+
+		BUG_ON(!upper->checked);
+		rb_node = tree_insert(&cache->rb_root, upper->bytenr,
+				      &upper->rb_node);
+		BUG_ON(rb_node);
+
+		list_add_tail(&edge->list[UPPER], &upper->lower);
+
+		list_for_each_entry(edge, &upper->upper, list[LOWER])
+			list_add_tail(&edge->list[UPPER], &list);
+	}
+out:
+	btrfs_free_path(path1);
+	btrfs_free_path(path2);
+	if (err) {
+		INIT_LIST_HEAD(&list);
+		upper = node;
+		while (upper) {
+			if (RB_EMPTY_NODE(&upper->rb_node)) {
+				list_splice_tail(&upper->upper, &list);
+				kfree(upper);
+			}
+
+			if (list_empty(&list))
+				break;
+
+			edge = list_entry(list.next, struct backref_edge,
+					  list[LOWER]);
+			upper = edge->node[UPPER];
+			kfree(edge);
+		}
+		return ERR_PTR(err);
+	}
+	return node;
+}
+
+/*
+ * helper to add 'address of tree root -> reloc tree' mapping
+ */
+static int __add_reloc_root(struct btrfs_root *root)
+{
+	struct rb_node *rb_node;
+	struct mapping_node *node;
+	struct reloc_control *rc = root->fs_info->reloc_ctl;
+
+	node = kmalloc(sizeof(*node), GFP_NOFS);
+	BUG_ON(!node);
+
+	node->bytenr = root->node->start;
+	node->data = root;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+	rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+			      node->bytenr, &node->rb_node);
+	spin_unlock(&rc->reloc_root_tree.lock);
+	BUG_ON(rb_node);
+
+	list_add_tail(&root->root_list, &rc->reloc_roots);
+	return 0;
+}
+
+/*
+ * helper to update/delete the 'address of tree root -> reloc tree'
+ * mapping
+ */
+static int __update_reloc_root(struct btrfs_root *root, int del)
+{
+	struct rb_node *rb_node;
+	struct mapping_node *node = NULL;
+	struct reloc_control *rc = root->fs_info->reloc_ctl;
+
+	spin_lock(&rc->reloc_root_tree.lock);
+	rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+			      root->commit_root->start);
+	if (rb_node) {
+		node = rb_entry(rb_node, struct mapping_node, rb_node);
+		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+	}
+	spin_unlock(&rc->reloc_root_tree.lock);
+
+	BUG_ON((struct btrfs_root *)node->data != root);
+
+	if (!del) {
+		spin_lock(&rc->reloc_root_tree.lock);
+		node->bytenr = root->node->start;
+		rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+				      node->bytenr, &node->rb_node);
+		spin_unlock(&rc->reloc_root_tree.lock);
+		BUG_ON(rb_node);
+	} else {
+		list_del_init(&root->root_list);
+		kfree(node);
+	}
+	return 0;
+}
+
+/*
+ * create reloc tree for a given fs tree. reloc tree is just a
+ * snapshot of the fs tree with special root objectid.
+ */
+int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root)
+{
+	struct btrfs_root *reloc_root;
+	struct extent_buffer *eb;
+	struct btrfs_root_item *root_item;
+	struct btrfs_key root_key;
+	int ret;
+
+	if (root->reloc_root) {
+		reloc_root = root->reloc_root;
+		reloc_root->last_trans = trans->transid;
+		return 0;
+	}
+
+	if (!root->fs_info->reloc_ctl ||
+	    !root->fs_info->reloc_ctl->create_reloc_root ||
+	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+		return 0;
+
+	root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
+	BUG_ON(!root_item);
+
+	root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
+	root_key.type = BTRFS_ROOT_ITEM_KEY;
+	root_key.offset = root->root_key.objectid;
+
+	ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
+			      BTRFS_TREE_RELOC_OBJECTID);
+	BUG_ON(ret);
+
+	btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
+	memcpy(root_item, &root->root_item, sizeof(*root_item));
+	btrfs_set_root_refs(root_item, 1);
+	btrfs_set_root_bytenr(root_item, eb->start);
+	btrfs_set_root_level(root_item, btrfs_header_level(eb));
+	btrfs_set_root_generation(root_item, trans->transid);
+	memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
+	root_item->drop_level = 0;
+
+	btrfs_tree_unlock(eb);
+	free_extent_buffer(eb);
+
+	ret = btrfs_insert_root(trans, root->fs_info->tree_root,
+				&root_key, root_item);
+	BUG_ON(ret);
+	kfree(root_item);
+
+	reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
+						 &root_key);
+	BUG_ON(IS_ERR(reloc_root));
+	reloc_root->last_trans = trans->transid;
+
+	__add_reloc_root(reloc_root);
+	root->reloc_root = reloc_root;
+	return 0;
+}
+
+/*
+ * update root item of reloc tree
+ */
+int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root)
+{
+	struct btrfs_root *reloc_root;
+	struct btrfs_root_item *root_item;
+	int del = 0;
+	int ret;
+
+	if (!root->reloc_root)
+		return 0;
+
+	reloc_root = root->reloc_root;
+	root_item = &reloc_root->root_item;
+
+	if (btrfs_root_refs(root_item) == 0) {
+		root->reloc_root = NULL;
+		del = 1;
+	}
+
+	__update_reloc_root(reloc_root, del);
+
+	if (reloc_root->commit_root != reloc_root->node) {
+		btrfs_set_root_node(root_item, reloc_root->node);
+		free_extent_buffer(reloc_root->commit_root);
+		reloc_root->commit_root = btrfs_root_node(reloc_root);
+	}
+
+	ret = btrfs_update_root(trans, root->fs_info->tree_root,
+				&reloc_root->root_key, root_item);
+	BUG_ON(ret);
+	return 0;
+}
+
+/*
+ * helper to find first cached inode with inode number >= objectid
+ * in a subvolume
+ */
+static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
+{
+	struct rb_node *node;
+	struct rb_node *prev;
+	struct btrfs_inode *entry;
+	struct inode *inode;
+
+	spin_lock(&root->inode_lock);
+again:
+	node = root->inode_tree.rb_node;
+	prev = NULL;
+	while (node) {
+		prev = node;
+		entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+		if (objectid < entry->vfs_inode.i_ino)
+			node = node->rb_left;
+		else if (objectid > entry->vfs_inode.i_ino)
+			node = node->rb_right;
+		else
+			break;
+	}
+	if (!node) {
+		while (prev) {
+			entry = rb_entry(prev, struct btrfs_inode, rb_node);
+			if (objectid <= entry->vfs_inode.i_ino) {
+				node = prev;
+				break;
+			}
+			prev = rb_next(prev);
+		}
+	}
+	while (node) {
+		entry = rb_entry(node, struct btrfs_inode, rb_node);
+		inode = igrab(&entry->vfs_inode);
+		if (inode) {
+			spin_unlock(&root->inode_lock);
+			return inode;
+		}
+
+		objectid = entry->vfs_inode.i_ino + 1;
+		if (cond_resched_lock(&root->inode_lock))
+			goto again;
+
+		node = rb_next(node);
+	}
+	spin_unlock(&root->inode_lock);
+	return NULL;
+}
+
+static int in_block_group(u64 bytenr,
+			  struct btrfs_block_group_cache *block_group)
+{
+	if (bytenr >= block_group->key.objectid &&
+	    bytenr < block_group->key.objectid + block_group->key.offset)
+		return 1;
+	return 0;
+}
+
+/*
+ * get new location of data
+ */
+static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
+			    u64 bytenr, u64 num_bytes)
+{
+	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *fi;
+	struct extent_buffer *leaf;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	bytenr -= BTRFS_I(reloc_inode)->index_cnt;
+	ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
+				       bytenr, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	fi = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+
+	BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
+	       btrfs_file_extent_compression(leaf, fi) ||
+	       btrfs_file_extent_encryption(leaf, fi) ||
+	       btrfs_file_extent_other_encoding(leaf, fi));
+
+	if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
+		ret = 1;
+		goto out;
+	}
+
+	if (new_bytenr)
+		*new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * update file extent items in the tree leaf to point to
+ * the new locations.
+ */
+static int replace_file_extents(struct btrfs_trans_handle *trans,
+				struct reloc_control *rc,
+				struct btrfs_root *root,
+				struct extent_buffer *leaf,
+				struct list_head *inode_list)
+{
+	struct btrfs_key key;
+	struct btrfs_file_extent_item *fi;
+	struct inode *inode = NULL;
+	struct inodevec *ivec = NULL;
+	u64 parent;
+	u64 bytenr;
+	u64 new_bytenr;
+	u64 num_bytes;
+	u64 end;
+	u32 nritems;
+	u32 i;
+	int ret;
+	int first = 1;
+	int dirty = 0;
+
+	if (rc->stage != UPDATE_DATA_PTRS)
+		return 0;
+
+	/* reloc trees always use full backref */
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+		parent = leaf->start;
+	else
+		parent = 0;
+
+	nritems = btrfs_header_nritems(leaf);
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		btrfs_item_key_to_cpu(leaf, &key, i);
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			continue;
+		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+		if (bytenr == 0)
+			continue;
+		if (!in_block_group(bytenr, rc->block_group))
+			continue;
+
+		/*
+		 * if we are modifying block in fs tree, wait for readpage
+		 * to complete and drop the extent cache
+		 */
+		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+			if (!ivec || ivec->nr == INODEVEC_SIZE) {
+				ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
+				BUG_ON(!ivec);
+				ivec->nr = 0;
+				list_add_tail(&ivec->list, inode_list);
+			}
+			if (first) {
+				inode = find_next_inode(root, key.objectid);
+				if (inode)
+					ivec->inode[ivec->nr++] = inode;
+				first = 0;
+			} else if (inode && inode->i_ino < key.objectid) {
+				inode = find_next_inode(root, key.objectid);
+				if (inode)
+					ivec->inode[ivec->nr++] = inode;
+			}
+			if (inode && inode->i_ino == key.objectid) {
+				end = key.offset +
+				      btrfs_file_extent_num_bytes(leaf, fi);
+				WARN_ON(!IS_ALIGNED(key.offset,
+						    root->sectorsize));
+				WARN_ON(!IS_ALIGNED(end, root->sectorsize));
+				end--;
+				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+						      key.offset, end,
+						      GFP_NOFS);
+				if (!ret)
+					continue;
+
+				btrfs_drop_extent_cache(inode, key.offset, end,
+							1);
+				unlock_extent(&BTRFS_I(inode)->io_tree,
+					      key.offset, end, GFP_NOFS);
+			}
+		}
+
+		ret = get_new_location(rc->data_inode, &new_bytenr,
+				       bytenr, num_bytes);
+		if (ret > 0)
+			continue;
+		BUG_ON(ret < 0);
+
+		btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
+		dirty = 1;
+
+		key.offset -= btrfs_file_extent_offset(leaf, fi);
+		ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
+					   num_bytes, parent,
+					   btrfs_header_owner(leaf),
+					   key.objectid, key.offset);
+		BUG_ON(ret);
+
+		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+					parent, btrfs_header_owner(leaf),
+					key.objectid, key.offset);
+		BUG_ON(ret);
+	}
+	if (dirty)
+		btrfs_mark_buffer_dirty(leaf);
+	return 0;
+}
+
+static noinline_for_stack
+int memcmp_node_keys(struct extent_buffer *eb, int slot,
+		     struct btrfs_path *path, int level)
+{
+	struct btrfs_disk_key key1;
+	struct btrfs_disk_key key2;
+	btrfs_node_key(eb, &key1, slot);
+	btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
+	return memcmp(&key1, &key2, sizeof(key1));
+}
+
+/*
+ * try to replace tree blocks in fs tree with the new blocks
+ * in reloc tree. tree blocks haven't been modified since the
+ * reloc tree was create can be replaced.
+ *
+ * if a block was replaced, level of the block + 1 is returned.
+ * if no block got replaced, 0 is returned. if there are other
+ * errors, a negative error number is returned.
+ */
+static int replace_path(struct btrfs_trans_handle *trans,
+			struct btrfs_root *dest, struct btrfs_root *src,
+			struct btrfs_path *path, struct btrfs_key *next_key,
+			struct extent_buffer **leaf,
+			int lowest_level, int max_level)
+{
+	struct extent_buffer *eb;
+	struct extent_buffer *parent;
+	struct btrfs_key key;
+	u64 old_bytenr;
+	u64 new_bytenr;
+	u64 old_ptr_gen;
+	u64 new_ptr_gen;
+	u64 last_snapshot;
+	u32 blocksize;
+	int level;
+	int ret;
+	int slot;
+
+	BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+	BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+	BUG_ON(lowest_level > 1 && leaf);
+
+	last_snapshot = btrfs_root_last_snapshot(&src->root_item);
+
+	slot = path->slots[lowest_level];
+	btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
+
+	eb = btrfs_lock_root_node(dest);
+	btrfs_set_lock_blocking(eb);
+	level = btrfs_header_level(eb);
+
+	if (level < lowest_level) {
+		btrfs_tree_unlock(eb);
+		free_extent_buffer(eb);
+		return 0;
+	}
+
+	ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
+	BUG_ON(ret);
+	btrfs_set_lock_blocking(eb);
+
+	if (next_key) {
+		next_key->objectid = (u64)-1;
+		next_key->type = (u8)-1;
+		next_key->offset = (u64)-1;
+	}
+
+	parent = eb;
+	while (1) {
+		level = btrfs_header_level(parent);
+		BUG_ON(level < lowest_level);
+
+		ret = btrfs_bin_search(parent, &key, level, &slot);
+		if (ret && slot > 0)
+			slot--;
+
+		if (next_key && slot + 1 < btrfs_header_nritems(parent))
+			btrfs_node_key_to_cpu(parent, next_key, slot + 1);
+
+		old_bytenr = btrfs_node_blockptr(parent, slot);
+		blocksize = btrfs_level_size(dest, level - 1);
+		old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
+
+		if (level <= max_level) {
+			eb = path->nodes[level];
+			new_bytenr = btrfs_node_blockptr(eb,
+							path->slots[level]);
+			new_ptr_gen = btrfs_node_ptr_generation(eb,
+							path->slots[level]);
+		} else {
+			new_bytenr = 0;
+			new_ptr_gen = 0;
+		}
+
+		if (new_bytenr > 0 && new_bytenr == old_bytenr) {
+			WARN_ON(1);
+			ret = level;
+			break;
+		}
+
+		if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
+		    memcmp_node_keys(parent, slot, path, level)) {
+			if (level <= lowest_level && !leaf) {
+				ret = 0;
+				break;
+			}
+
+			eb = read_tree_block(dest, old_bytenr, blocksize,
+					     old_ptr_gen);
+			btrfs_tree_lock(eb);
+			ret = btrfs_cow_block(trans, dest, eb, parent,
+					      slot, &eb);
+			BUG_ON(ret);
+			btrfs_set_lock_blocking(eb);
+
+			if (level <= lowest_level) {
+				*leaf = eb;
+				ret = 0;
+				break;
+			}
+
+			btrfs_tree_unlock(parent);
+			free_extent_buffer(parent);
+
+			parent = eb;
+			continue;
+		}
+
+		btrfs_node_key_to_cpu(path->nodes[level], &key,
+				      path->slots[level]);
+		btrfs_release_path(src, path);
+
+		path->lowest_level = level;
+		ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
+		path->lowest_level = 0;
+		BUG_ON(ret);
+
+		/*
+		 * swap blocks in fs tree and reloc tree.
+		 */
+		btrfs_set_node_blockptr(parent, slot, new_bytenr);
+		btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
+		btrfs_mark_buffer_dirty(parent);
+
+		btrfs_set_node_blockptr(path->nodes[level],
+					path->slots[level], old_bytenr);
+		btrfs_set_node_ptr_generation(path->nodes[level],
+					      path->slots[level], old_ptr_gen);
+		btrfs_mark_buffer_dirty(path->nodes[level]);
+
+		ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
+					path->nodes[level]->start,
+					src->root_key.objectid, level - 1, 0);
+		BUG_ON(ret);
+		ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
+					0, dest->root_key.objectid, level - 1,
+					0);
+		BUG_ON(ret);
+
+		ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
+					path->nodes[level]->start,
+					src->root_key.objectid, level - 1, 0);
+		BUG_ON(ret);
+
+		ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
+					0, dest->root_key.objectid, level - 1,
+					0);
+		BUG_ON(ret);
+
+		btrfs_unlock_up_safe(path, 0);
+
+		ret = level;
+		break;
+	}
+	btrfs_tree_unlock(parent);
+	free_extent_buffer(parent);
+	return ret;
+}
+
+/*
+ * helper to find next relocated block in reloc tree
+ */
+static noinline_for_stack
+int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
+		       int *level)
+{
+	struct extent_buffer *eb;
+	int i;
+	u64 last_snapshot;
+	u32 nritems;
+
+	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+
+	for (i = 0; i < *level; i++) {
+		free_extent_buffer(path->nodes[i]);
+		path->nodes[i] = NULL;
+	}
+
+	for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+		eb = path->nodes[i];
+		nritems = btrfs_header_nritems(eb);
+		while (path->slots[i] + 1 < nritems) {
+			path->slots[i]++;
+			if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
+			    last_snapshot)
+				continue;
+
+			*level = i;
+			return 0;
+		}
+		free_extent_buffer(path->nodes[i]);
+		path->nodes[i] = NULL;
+	}
+	return 1;
+}
+
+/*
+ * walk down reloc tree to find relocated block of lowest level
+ */
+static noinline_for_stack
+int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
+			 int *level)
+{
+	struct extent_buffer *eb = NULL;
+	int i;
+	u64 bytenr;
+	u64 ptr_gen = 0;
+	u64 last_snapshot;
+	u32 blocksize;
+	u32 nritems;
+
+	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+
+	for (i = *level; i > 0; i--) {
+		eb = path->nodes[i];
+		nritems = btrfs_header_nritems(eb);
+		while (path->slots[i] < nritems) {
+			ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
+			if (ptr_gen > last_snapshot)
+				break;
+			path->slots[i]++;
+		}
+		if (path->slots[i] >= nritems) {
+			if (i == *level)
+				break;
+			*level = i + 1;
+			return 0;
+		}
+		if (i == 1) {
+			*level = i;
+			return 0;
+		}
+
+		bytenr = btrfs_node_blockptr(eb, path->slots[i]);
+		blocksize = btrfs_level_size(root, i - 1);
+		eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		BUG_ON(btrfs_header_level(eb) != i - 1);
+		path->nodes[i - 1] = eb;
+		path->slots[i - 1] = 0;
+	}
+	return 1;
+}
+
+/*
+ * invalidate extent cache for file extents whose key in range of
+ * [min_key, max_key)
+ */
+static int invalidate_extent_cache(struct btrfs_root *root,
+				   struct btrfs_key *min_key,
+				   struct btrfs_key *max_key)
+{
+	struct inode *inode = NULL;
+	u64 objectid;
+	u64 start, end;
+
+	objectid = min_key->objectid;
+	while (1) {
+		cond_resched();
+		iput(inode);
+
+		if (objectid > max_key->objectid)
+			break;
+
+		inode = find_next_inode(root, objectid);
+		if (!inode)
+			break;
+
+		if (inode->i_ino > max_key->objectid) {
+			iput(inode);
+			break;
+		}
+
+		objectid = inode->i_ino + 1;
+		if (!S_ISREG(inode->i_mode))
+			continue;
+
+		if (unlikely(min_key->objectid == inode->i_ino)) {
+			if (min_key->type > BTRFS_EXTENT_DATA_KEY)
+				continue;
+			if (min_key->type < BTRFS_EXTENT_DATA_KEY)
+				start = 0;
+			else {
+				start = min_key->offset;
+				WARN_ON(!IS_ALIGNED(start, root->sectorsize));
+			}
+		} else {
+			start = 0;
+		}
+
+		if (unlikely(max_key->objectid == inode->i_ino)) {
+			if (max_key->type < BTRFS_EXTENT_DATA_KEY)
+				continue;
+			if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
+				end = (u64)-1;
+			} else {
+				if (max_key->offset == 0)
+					continue;
+				end = max_key->offset;
+				WARN_ON(!IS_ALIGNED(end, root->sectorsize));
+				end--;
+			}
+		} else {
+			end = (u64)-1;
+		}
+
+		/* the lock_extent waits for readpage to complete */
+		lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+		btrfs_drop_extent_cache(inode, start, end, 1);
+		unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+	}
+	return 0;
+}
+
+static int find_next_key(struct btrfs_path *path, int level,
+			 struct btrfs_key *key)
+
+{
+	while (level < BTRFS_MAX_LEVEL) {
+		if (!path->nodes[level])
+			break;
+		if (path->slots[level] + 1 <
+		    btrfs_header_nritems(path->nodes[level])) {
+			btrfs_node_key_to_cpu(path->nodes[level], key,
+					      path->slots[level] + 1);
+			return 0;
+		}
+		level++;
+	}
+	return 1;
+}
+
+/*
+ * merge the relocated tree blocks in reloc tree with corresponding
+ * fs tree.
+ */
+static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
+					       struct btrfs_root *root)
+{
+	LIST_HEAD(inode_list);
+	struct btrfs_key key;
+	struct btrfs_key next_key;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *reloc_root;
+	struct btrfs_root_item *root_item;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf = NULL;
+	unsigned long nr;
+	int level;
+	int max_level;
+	int replaced = 0;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	reloc_root = root->reloc_root;
+	root_item = &reloc_root->root_item;
+
+	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+		level = btrfs_root_level(root_item);
+		extent_buffer_get(reloc_root->node);
+		path->nodes[level] = reloc_root->node;
+		path->slots[level] = 0;
+	} else {
+		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+
+		level = root_item->drop_level;
+		BUG_ON(level == 0);
+		path->lowest_level = level;
+		ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
+		if (ret < 0) {
+			btrfs_free_path(path);
+			return ret;
+		}
+
+		btrfs_node_key_to_cpu(path->nodes[level], &next_key,
+				      path->slots[level]);
+		WARN_ON(memcmp(&key, &next_key, sizeof(key)));
+
+		btrfs_unlock_up_safe(path, 0);
+	}
+
+	if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
+		trans = btrfs_start_transaction(root, 1);
+
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &key, 0);
+		btrfs_release_path(reloc_root, path);
+
+		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+
+		leaf = path->nodes[0];
+		btrfs_unlock_up_safe(path, 1);
+		ret = replace_file_extents(trans, rc, root, leaf,
+					   &inode_list);
+		if (ret < 0)
+			err = ret;
+		goto out;
+	}
+
+	memset(&next_key, 0, sizeof(next_key));
+
+	while (1) {
+		leaf = NULL;
+		replaced = 0;
+		trans = btrfs_start_transaction(root, 1);
+		max_level = level;
+
+		ret = walk_down_reloc_tree(reloc_root, path, &level);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		if (ret > 0)
+			break;
+
+		if (!find_next_key(path, level, &key) &&
+		    btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
+			ret = 0;
+		} else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
+			ret = replace_path(trans, root, reloc_root,
+					   path, &next_key, &leaf,
+					   level, max_level);
+		} else {
+			ret = replace_path(trans, root, reloc_root,
+					   path, &next_key, NULL,
+					   level, max_level);
+		}
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+
+		if (ret > 0) {
+			level = ret;
+			btrfs_node_key_to_cpu(path->nodes[level], &key,
+					      path->slots[level]);
+			replaced = 1;
+		} else if (leaf) {
+			/*
+			 * no block got replaced, try replacing file extents
+			 */
+			btrfs_item_key_to_cpu(leaf, &key, 0);
+			ret = replace_file_extents(trans, rc, root, leaf,
+						   &inode_list);
+			btrfs_tree_unlock(leaf);
+			free_extent_buffer(leaf);
+			BUG_ON(ret < 0);
+		}
+
+		ret = walk_up_reloc_tree(reloc_root, path, &level);
+		if (ret > 0)
+			break;
+
+		BUG_ON(level == 0);
+		/*
+		 * save the merging progress in the drop_progress.
+		 * this is OK since root refs == 1 in this case.
+		 */
+		btrfs_node_key(path->nodes[level], &root_item->drop_progress,
+			       path->slots[level]);
+		root_item->drop_level = level;
+
+		nr = trans->blocks_used;
+		btrfs_end_transaction(trans, root);
+
+		btrfs_btree_balance_dirty(root, nr);
+
+		if (replaced && rc->stage == UPDATE_DATA_PTRS)
+			invalidate_extent_cache(root, &key, &next_key);
+	}
+
+	/*
+	 * handle the case only one block in the fs tree need to be
+	 * relocated and the block is tree root.
+	 */
+	leaf = btrfs_lock_root_node(root);
+	ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
+	btrfs_tree_unlock(leaf);
+	free_extent_buffer(leaf);
+	if (ret < 0)
+		err = ret;
+out:
+	btrfs_free_path(path);
+
+	if (err == 0) {
+		memset(&root_item->drop_progress, 0,
+		       sizeof(root_item->drop_progress));
+		root_item->drop_level = 0;
+		btrfs_set_root_refs(root_item, 0);
+	}
+
+	nr = trans->blocks_used;
+	btrfs_end_transaction(trans, root);
+
+	btrfs_btree_balance_dirty(root, nr);
+
+	/*
+	 * put inodes while we aren't holding the tree locks
+	 */
+	while (!list_empty(&inode_list)) {
+		struct inodevec *ivec;
+		ivec = list_entry(inode_list.next, struct inodevec, list);
+		list_del(&ivec->list);
+		while (ivec->nr > 0) {
+			ivec->nr--;
+			iput(ivec->inode[ivec->nr]);
+		}
+		kfree(ivec);
+	}
+
+	if (replaced && rc->stage == UPDATE_DATA_PTRS)
+		invalidate_extent_cache(root, &key, &next_key);
+
+	return err;
+}
+
+/*
+ * callback for the work threads.
+ * this function merges reloc tree with corresponding fs tree,
+ * and then drops the reloc tree.
+ */
+static void merge_func(struct btrfs_work *work)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
+	struct btrfs_root *reloc_root;
+	struct async_merge *async;
+
+	async = container_of(work, struct async_merge, work);
+	reloc_root = async->root;
+
+	if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+		root = read_fs_root(reloc_root->fs_info,
+				    reloc_root->root_key.offset);
+		BUG_ON(IS_ERR(root));
+		BUG_ON(root->reloc_root != reloc_root);
+
+		merge_reloc_root(async->rc, root);
+
+		trans = btrfs_start_transaction(root, 1);
+		btrfs_update_reloc_root(trans, root);
+		btrfs_end_transaction(trans, root);
+	}
+
+	btrfs_drop_dead_root(reloc_root);
+
+	if (atomic_dec_and_test(async->num_pending))
+		complete(async->done);
+
+	kfree(async);
+}
+
+static int merge_reloc_roots(struct reloc_control *rc)
+{
+	struct async_merge *async;
+	struct btrfs_root *root;
+	struct completion done;
+	atomic_t num_pending;
+
+	init_completion(&done);
+	atomic_set(&num_pending, 1);
+
+	while (!list_empty(&rc->reloc_roots)) {
+		root = list_entry(rc->reloc_roots.next,
+				  struct btrfs_root, root_list);
+		list_del_init(&root->root_list);
+
+		async = kmalloc(sizeof(*async), GFP_NOFS);
+		BUG_ON(!async);
+		async->work.func = merge_func;
+		async->work.flags = 0;
+		async->rc = rc;
+		async->root = root;
+		async->done = &done;
+		async->num_pending = &num_pending;
+		atomic_inc(&num_pending);
+		btrfs_queue_worker(&rc->workers, &async->work);
+	}
+
+	if (!atomic_dec_and_test(&num_pending))
+		wait_for_completion(&done);
+
+	BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+	return 0;
+}
+
+static void free_block_list(struct rb_root *blocks)
+{
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	while ((rb_node = rb_first(blocks))) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+		rb_erase(rb_node, blocks);
+		kfree(block);
+	}
+}
+
+static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
+				      struct btrfs_root *reloc_root)
+{
+	struct btrfs_root *root;
+
+	if (reloc_root->last_trans == trans->transid)
+		return 0;
+
+	root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
+	BUG_ON(IS_ERR(root));
+	BUG_ON(root->reloc_root != reloc_root);
+
+	return btrfs_record_root_in_trans(trans, root);
+}
+
+/*
+ * select one tree from trees that references the block.
+ * for blocks in refernce counted trees, we preper reloc tree.
+ * if no reloc tree found and reloc_only is true, NULL is returned.
+ */
+static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
+					    struct backref_node *node,
+					    struct backref_edge *edges[],
+					    int *nr, int reloc_only)
+{
+	struct backref_node *next;
+	struct btrfs_root *root;
+	int index;
+	int loop = 0;
+again:
+	index = 0;
+	next = node;
+	while (1) {
+		cond_resched();
+		next = walk_up_backref(next, edges, &index);
+		root = next->root;
+		if (!root) {
+			BUG_ON(!node->old_root);
+			goto skip;
+		}
+
+		/* no other choice for non-refernce counted tree */
+		if (!root->ref_cows) {
+			BUG_ON(reloc_only);
+			break;
+		}
+
+		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+			record_reloc_root_in_trans(trans, root);
+			break;
+		}
+
+		if (loop) {
+			btrfs_record_root_in_trans(trans, root);
+			break;
+		}
+
+		if (reloc_only || next != node) {
+			if (!root->reloc_root)
+				btrfs_record_root_in_trans(trans, root);
+			root = root->reloc_root;
+			/*
+			 * if the reloc tree was created in current
+			 * transation, there is no node in backref tree
+			 * corresponds to the root of the reloc tree.
+			 */
+			if (btrfs_root_last_snapshot(&root->root_item) ==
+			    trans->transid - 1)
+				break;
+		}
+skip:
+		root = NULL;
+		next = walk_down_backref(edges, &index);
+		if (!next || next->level <= node->level)
+			break;
+	}
+
+	if (!root && !loop && !reloc_only) {
+		loop = 1;
+		goto again;
+	}
+
+	if (root)
+		*nr = index;
+	else
+		*nr = 0;
+
+	return root;
+}
+
+static noinline_for_stack
+struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
+				   struct backref_node *node)
+{
+	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	int nr;
+	return __select_one_root(trans, node, edges, &nr, 0);
+}
+
+static noinline_for_stack
+struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
+				     struct backref_node *node,
+				     struct backref_edge *edges[], int *nr)
+{
+	return __select_one_root(trans, node, edges, nr, 1);
+}
+
+static void grab_path_buffers(struct btrfs_path *path,
+			      struct backref_node *node,
+			      struct backref_edge *edges[], int nr)
+{
+	int i = 0;
+	while (1) {
+		drop_node_buffer(node);
+		node->eb = path->nodes[node->level];
+		BUG_ON(!node->eb);
+		if (path->locks[node->level])
+			node->locked = 1;
+		path->nodes[node->level] = NULL;
+		path->locks[node->level] = 0;
+
+		if (i >= nr)
+			break;
+
+		edges[i]->blockptr = node->eb->start;
+		node = edges[i]->node[UPPER];
+		i++;
+	}
+}
+
+/*
+ * relocate a block tree, and then update pointers in upper level
+ * blocks that reference the block to point to the new location.
+ *
+ * if called by link_to_upper, the block has already been relocated.
+ * in that case this function just updates pointers.
+ */
+static int do_relocation(struct btrfs_trans_handle *trans,
+			 struct backref_node *node,
+			 struct btrfs_key *key,
+			 struct btrfs_path *path, int lowest)
+{
+	struct backref_node *upper;
+	struct backref_edge *edge;
+	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	struct btrfs_root *root;
+	struct extent_buffer *eb;
+	u32 blocksize;
+	u64 bytenr;
+	u64 generation;
+	int nr;
+	int slot;
+	int ret;
+	int err = 0;
+
+	BUG_ON(lowest && node->eb);
+
+	path->lowest_level = node->level + 1;
+	list_for_each_entry(edge, &node->upper, list[LOWER]) {
+		cond_resched();
+		if (node->eb && node->eb->start == edge->blockptr)
+			continue;
+
+		upper = edge->node[UPPER];
+		root = select_reloc_root(trans, upper, edges, &nr);
+		if (!root)
+			continue;
+
+		if (upper->eb && !upper->locked)
+			drop_node_buffer(upper);
+
+		if (!upper->eb) {
+			ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			BUG_ON(ret > 0);
+
+			slot = path->slots[upper->level];
+
+			btrfs_unlock_up_safe(path, upper->level + 1);
+			grab_path_buffers(path, upper, edges, nr);
+
+			btrfs_release_path(NULL, path);
+		} else {
+			ret = btrfs_bin_search(upper->eb, key, upper->level,
+					       &slot);
+			BUG_ON(ret);
+		}
+
+		bytenr = btrfs_node_blockptr(upper->eb, slot);
+		if (!lowest) {
+			if (node->eb->start == bytenr) {
+				btrfs_tree_unlock(upper->eb);
+				upper->locked = 0;
+				continue;
+			}
+		} else {
+			BUG_ON(node->bytenr != bytenr);
+		}
+
+		blocksize = btrfs_level_size(root, node->level);
+		generation = btrfs_node_ptr_generation(upper->eb, slot);
+		eb = read_tree_block(root, bytenr, blocksize, generation);
+		btrfs_tree_lock(eb);
+		btrfs_set_lock_blocking(eb);
+
+		if (!node->eb) {
+			ret = btrfs_cow_block(trans, root, eb, upper->eb,
+					      slot, &eb);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			btrfs_set_lock_blocking(eb);
+			node->eb = eb;
+			node->locked = 1;
+		} else {
+			btrfs_set_node_blockptr(upper->eb, slot,
+						node->eb->start);
+			btrfs_set_node_ptr_generation(upper->eb, slot,
+						      trans->transid);
+			btrfs_mark_buffer_dirty(upper->eb);
+
+			ret = btrfs_inc_extent_ref(trans, root,
+						node->eb->start, blocksize,
+						upper->eb->start,
+						btrfs_header_owner(upper->eb),
+						node->level, 0);
+			BUG_ON(ret);
+
+			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
+			BUG_ON(ret);
+
+			btrfs_tree_unlock(eb);
+			free_extent_buffer(eb);
+		}
+		if (!lowest) {
+			btrfs_tree_unlock(upper->eb);
+			upper->locked = 0;
+		}
+	}
+	path->lowest_level = 0;
+	return err;
+}
+
+static int link_to_upper(struct btrfs_trans_handle *trans,
+			 struct backref_node *node,
+			 struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	if (!node->eb || list_empty(&node->upper))
+		return 0;
+
+	btrfs_node_key_to_cpu(node->eb, &key, 0);
+	return do_relocation(trans, node, &key, path, 0);
+}
+
+static int finish_pending_nodes(struct btrfs_trans_handle *trans,
+				struct backref_cache *cache,
+				struct btrfs_path *path)
+{
+	struct backref_node *node;
+	int level;
+	int ret;
+	int err = 0;
+
+	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+		while (!list_empty(&cache->pending[level])) {
+			node = list_entry(cache->pending[level].next,
+					  struct backref_node, lower);
+			BUG_ON(node->level != level);
+
+			ret = link_to_upper(trans, node, path);
+			if (ret < 0)
+				err = ret;
+			/*
+			 * this remove the node from the pending list and
+			 * may add some other nodes to the level + 1
+			 * pending list
+			 */
+			remove_backref_node(cache, node);
+		}
+	}
+	BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
+	return err;
+}
+
+static void mark_block_processed(struct reloc_control *rc,
+				 struct backref_node *node)
+{
+	u32 blocksize;
+	if (node->level == 0 ||
+	    in_block_group(node->bytenr, rc->block_group)) {
+		blocksize = btrfs_level_size(rc->extent_root, node->level);
+		set_extent_bits(&rc->processed_blocks, node->bytenr,
+				node->bytenr + blocksize - 1, EXTENT_DIRTY,
+				GFP_NOFS);
+	}
+	node->processed = 1;
+}
+
+/*
+ * mark a block and all blocks directly/indirectly reference the block
+ * as processed.
+ */
+static void update_processed_blocks(struct reloc_control *rc,
+				    struct backref_node *node)
+{
+	struct backref_node *next = node;
+	struct backref_edge *edge;
+	struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+	int index = 0;
+
+	while (next) {
+		cond_resched();
+		while (1) {
+			if (next->processed)
+				break;
+
+			mark_block_processed(rc, next);
+
+			if (list_empty(&next->upper))
+				break;
+
+			edge = list_entry(next->upper.next,
+					  struct backref_edge, list[LOWER]);
+			edges[index++] = edge;
+			next = edge->node[UPPER];
+		}
+		next = walk_down_backref(edges, &index);
+	}
+}
+
+static int tree_block_processed(u64 bytenr, u32 blocksize,
+				struct reloc_control *rc)
+{
+	if (test_range_bit(&rc->processed_blocks, bytenr,
+			   bytenr + blocksize - 1, EXTENT_DIRTY, 1))
+		return 1;
+	return 0;
+}
+
+/*
+ * check if there are any file extent pointers in the leaf point to
+ * data require processing
+ */
+static int check_file_extents(struct reloc_control *rc,
+			      u64 bytenr, u32 blocksize, u64 ptr_gen)
+{
+	struct btrfs_key found_key;
+	struct btrfs_file_extent_item *fi;
+	struct extent_buffer *leaf;
+	u32 nritems;
+	int i;
+	int ret = 0;
+
+	leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
+
+	nritems = btrfs_header_nritems(leaf);
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		btrfs_item_key_to_cpu(leaf, &found_key, i);
+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			continue;
+		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+		if (bytenr == 0)
+			continue;
+		if (in_block_group(bytenr, rc->block_group)) {
+			ret = 1;
+			break;
+		}
+	}
+	free_extent_buffer(leaf);
+	return ret;
+}
+
+/*
+ * scan child blocks of a given block to find blocks require processing
+ */
+static int add_child_blocks(struct btrfs_trans_handle *trans,
+			    struct reloc_control *rc,
+			    struct backref_node *node,
+			    struct rb_root *blocks)
+{
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	u64 bytenr;
+	u64 ptr_gen;
+	u32 blocksize;
+	u32 nritems;
+	int i;
+	int err = 0;
+
+	nritems = btrfs_header_nritems(node->eb);
+	blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		bytenr = btrfs_node_blockptr(node->eb, i);
+		ptr_gen = btrfs_node_ptr_generation(node->eb, i);
+		if (ptr_gen == trans->transid)
+			continue;
+		if (!in_block_group(bytenr, rc->block_group) &&
+		    (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
+			continue;
+		if (tree_block_processed(bytenr, blocksize, rc))
+			continue;
+
+		readahead_tree_block(rc->extent_root,
+				     bytenr, blocksize, ptr_gen);
+	}
+
+	for (i = 0; i < nritems; i++) {
+		cond_resched();
+		bytenr = btrfs_node_blockptr(node->eb, i);
+		ptr_gen = btrfs_node_ptr_generation(node->eb, i);
+		if (ptr_gen == trans->transid)
+			continue;
+		if (!in_block_group(bytenr, rc->block_group) &&
+		    (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
+			continue;
+		if (tree_block_processed(bytenr, blocksize, rc))
+			continue;
+		if (!in_block_group(bytenr, rc->block_group) &&
+		    !check_file_extents(rc, bytenr, blocksize, ptr_gen))
+			continue;
+
+		block = kmalloc(sizeof(*block), GFP_NOFS);
+		if (!block) {
+			err = -ENOMEM;
+			break;
+		}
+		block->bytenr = bytenr;
+		btrfs_node_key_to_cpu(node->eb, &block->key, i);
+		block->level = node->level - 1;
+		block->key_ready = 1;
+		rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
+		BUG_ON(rb_node);
+	}
+	if (err)
+		free_block_list(blocks);
+	return err;
+}
+
+/*
+ * find adjacent blocks require processing
+ */
+static noinline_for_stack
+int add_adjacent_blocks(struct btrfs_trans_handle *trans,
+			struct reloc_control *rc,
+			struct backref_cache *cache,
+			struct rb_root *blocks, int level,
+			struct backref_node **upper)
+{
+	struct backref_node *node;
+	int ret = 0;
+
+	WARN_ON(!list_empty(&cache->pending[level]));
+
+	if (list_empty(&cache->pending[level + 1]))
+		return 1;
+
+	node = list_entry(cache->pending[level + 1].next,
+			  struct backref_node, lower);
+	if (node->eb)
+		ret = add_child_blocks(trans, rc, node, blocks);
+
+	*upper = node;
+	return ret;
+}
+
+static int get_tree_block_key(struct reloc_control *rc,
+			      struct tree_block *block)
+{
+	struct extent_buffer *eb;
+
+	BUG_ON(block->key_ready);
+	eb = read_tree_block(rc->extent_root, block->bytenr,
+			     block->key.objectid, block->key.offset);
+	WARN_ON(btrfs_header_level(eb) != block->level);
+	if (block->level == 0)
+		btrfs_item_key_to_cpu(eb, &block->key, 0);
+	else
+		btrfs_node_key_to_cpu(eb, &block->key, 0);
+	free_extent_buffer(eb);
+	block->key_ready = 1;
+	return 0;
+}
+
+static int reada_tree_block(struct reloc_control *rc,
+			    struct tree_block *block)
+{
+	BUG_ON(block->key_ready);
+	readahead_tree_block(rc->extent_root, block->bytenr,
+			     block->key.objectid, block->key.offset);
+	return 0;
+}
+
+/*
+ * helper function to relocate a tree block
+ */
+static int relocate_tree_block(struct btrfs_trans_handle *trans,
+				struct reloc_control *rc,
+				struct backref_node *node,
+				struct btrfs_key *key,
+				struct btrfs_path *path)
+{
+	struct btrfs_root *root;
+	int ret;
+
+	root = select_one_root(trans, node);
+	if (unlikely(!root)) {
+		rc->found_old_snapshot = 1;
+		update_processed_blocks(rc, node);
+		return 0;
+	}
+
+	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+		ret = do_relocation(trans, node, key, path, 1);
+		if (ret < 0)
+			goto out;
+		if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
+			ret = replace_file_extents(trans, rc, root,
+						   node->eb, NULL);
+			if (ret < 0)
+				goto out;
+		}
+		drop_node_buffer(node);
+	} else if (!root->ref_cows) {
+		path->lowest_level = node->level;
+		ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+		btrfs_release_path(root, path);
+		if (ret < 0)
+			goto out;
+	} else if (root != node->root) {
+		WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
+	}
+
+	update_processed_blocks(rc, node);
+	ret = 0;
+out:
+	drop_node_buffer(node);
+	return ret;
+}
+
+/*
+ * relocate a list of blocks
+ */
+static noinline_for_stack
+int relocate_tree_blocks(struct btrfs_trans_handle *trans,
+			 struct reloc_control *rc, struct rb_root *blocks)
+{
+	struct backref_cache *cache;
+	struct backref_node *node;
+	struct btrfs_path *path;
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	int level = -1;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	cache = kmalloc(sizeof(*cache), GFP_NOFS);
+	if (!cache) {
+		btrfs_free_path(path);
+		return -ENOMEM;
+	}
+
+	backref_cache_init(cache);
+
+	rb_node = rb_first(blocks);
+	while (rb_node) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+		if (level == -1)
+			level = block->level;
+		else
+			BUG_ON(level != block->level);
+		if (!block->key_ready)
+			reada_tree_block(rc, block);
+		rb_node = rb_next(rb_node);
+	}
+
+	rb_node = rb_first(blocks);
+	while (rb_node) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+		if (!block->key_ready)
+			get_tree_block_key(rc, block);
+		rb_node = rb_next(rb_node);
+	}
+
+	rb_node = rb_first(blocks);
+	while (rb_node) {
+		block = rb_entry(rb_node, struct tree_block, rb_node);
+
+		node = build_backref_tree(rc, cache, &block->key,
+					  block->level, block->bytenr);
+		if (IS_ERR(node)) {
+			err = PTR_ERR(node);
+			goto out;
+		}
+
+		ret = relocate_tree_block(trans, rc, node, &block->key,
+					  path);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		remove_backref_node(cache, node);
+		rb_node = rb_next(rb_node);
+	}
+
+	if (level > 0)
+		goto out;
+
+	free_block_list(blocks);
+
+	/*
+	 * now backrefs of some upper level tree blocks have been cached,
+	 * try relocating blocks referenced by these upper level blocks.
+	 */
+	while (1) {
+		struct backref_node *upper = NULL;
+		if (trans->transaction->in_commit ||
+		    trans->transaction->delayed_refs.flushing)
+			break;
+
+		ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
+					  &upper);
+		if (ret < 0)
+			err = ret;
+		if (ret != 0)
+			break;
+
+		rb_node = rb_first(blocks);
+		while (rb_node) {
+			block = rb_entry(rb_node, struct tree_block, rb_node);
+			if (trans->transaction->in_commit ||
+			    trans->transaction->delayed_refs.flushing)
+				goto out;
+			BUG_ON(!block->key_ready);
+			node = build_backref_tree(rc, cache, &block->key,
+						  level, block->bytenr);
+			if (IS_ERR(node)) {
+				err = PTR_ERR(node);
+				goto out;
+			}
+
+			ret = relocate_tree_block(trans, rc, node,
+						  &block->key, path);
+			if (ret < 0) {
+				err = ret;
+				goto out;
+			}
+			remove_backref_node(cache, node);
+			rb_node = rb_next(rb_node);
+		}
+		free_block_list(blocks);
+
+		if (upper) {
+			ret = link_to_upper(trans, upper, path);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			remove_backref_node(cache, upper);
+		}
+	}
+out:
+	free_block_list(blocks);
+
+	ret = finish_pending_nodes(trans, cache, path);
+	if (ret < 0)
+		err = ret;
+
+	kfree(cache);
+	btrfs_free_path(path);
+	return err;
+}
+
+static noinline_for_stack
+int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
+{
+	u64 page_start;
+	u64 page_end;
+	unsigned long i;
+	unsigned long first_index;
+	unsigned long last_index;
+	unsigned int total_read = 0;
+	unsigned int total_dirty = 0;
+	struct page *page;
+	struct file_ra_state *ra;
+	struct btrfs_ordered_extent *ordered;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	int ret = 0;
+
+	ra = kzalloc(sizeof(*ra), GFP_NOFS);
+	if (!ra)
+		return -ENOMEM;
+
+	mutex_lock(&inode->i_mutex);
+	first_index = start >> PAGE_CACHE_SHIFT;
+	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
+
+	/* make sure the dirty trick played by the caller work */
+	ret = invalidate_inode_pages2_range(inode->i_mapping,
+					    first_index, last_index);
+	if (ret)
+		goto out_unlock;
+
+	file_ra_state_init(ra, inode->i_mapping);
+
+	for (i = first_index ; i <= last_index; i++) {
+		if (total_read % ra->ra_pages == 0) {
+			btrfs_force_ra(inode->i_mapping, ra, NULL, i,
+				min(last_index, ra->ra_pages + i - 1));
+		}
+		total_read++;
+again:
+		if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
+			BUG_ON(1);
+		page = grab_cache_page(inode->i_mapping, i);
+		if (!page) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
+		if (!PageUptodate(page)) {
+			btrfs_readpage(NULL, page);
+			lock_page(page);
+			if (!PageUptodate(page)) {
+				unlock_page(page);
+				page_cache_release(page);
+				ret = -EIO;
+				goto out_unlock;
+			}
+		}
+		wait_on_page_writeback(page);
+
+		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
+		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		lock_extent(io_tree, page_start, page_end, GFP_NOFS);
+
+		ordered = btrfs_lookup_ordered_extent(inode, page_start);
+		if (ordered) {
+			unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+			unlock_page(page);
+			page_cache_release(page);
+			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_put_ordered_extent(ordered);
+			goto again;
+		}
+		set_page_extent_mapped(page);
+
+		if (i == first_index)
+			set_extent_bits(io_tree, page_start, page_end,
+					EXTENT_BOUNDARY, GFP_NOFS);
+		btrfs_set_extent_delalloc(inode, page_start, page_end);
+
+		set_page_dirty(page);
+		total_dirty++;
+
+		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+out_unlock:
+	mutex_unlock(&inode->i_mutex);
+	kfree(ra);
+	balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
+	return ret;
+}
+
+static noinline_for_stack
+int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map *em;
+	u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
+	u64 end = start + extent_key->offset - 1;
+
+	em = alloc_extent_map(GFP_NOFS);
+	em->start = start;
+	em->len = extent_key->offset;
+	em->block_len = extent_key->offset;
+	em->block_start = extent_key->objectid;
+	em->bdev = root->fs_info->fs_devices->latest_bdev;
+	set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+	/* setup extent map to cheat btrfs_readpage */
+	lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+	while (1) {
+		int ret;
+		spin_lock(&em_tree->lock);
+		ret = add_extent_mapping(em_tree, em);
+		spin_unlock(&em_tree->lock);
+		if (ret != -EEXIST) {
+			free_extent_map(em);
+			break;
+		}
+		btrfs_drop_extent_cache(inode, start, end, 0);
+	}
+	unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+
+	return relocate_inode_pages(inode, start, extent_key->offset);
+}
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+static int get_ref_objectid_v0(struct reloc_control *rc,
+			       struct btrfs_path *path,
+			       struct btrfs_key *extent_key,
+			       u64 *ref_objectid, int *path_change)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_ref_v0 *ref0;
+	int ret;
+	int slot;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	while (1) {
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(rc->extent_root, path);
+			if (ret < 0)
+				return ret;
+			BUG_ON(ret > 0);
+			leaf = path->nodes[0];
+			slot = path->slots[0];
+			if (path_change)
+				*path_change = 1;
+		}
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid != extent_key->objectid)
+			return -ENOENT;
+
+		if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
+			slot++;
+			continue;
+		}
+		ref0 = btrfs_item_ptr(leaf, slot,
+				struct btrfs_extent_ref_v0);
+		*ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
+		break;
+	}
+	return 0;
+}
+#endif
+
+/*
+ * helper to add a tree block to the list.
+ * the major work is getting the generation and level of the block
+ */
+static int add_tree_block(struct reloc_control *rc,
+			  struct btrfs_key *extent_key,
+			  struct btrfs_path *path,
+			  struct rb_root *blocks)
+{
+	struct extent_buffer *eb;
+	struct btrfs_extent_item *ei;
+	struct btrfs_tree_block_info *bi;
+	struct tree_block *block;
+	struct rb_node *rb_node;
+	u32 item_size;
+	int level = -1;
+	int generation;
+
+	eb =  path->nodes[0];
+	item_size = btrfs_item_size_nr(eb, path->slots[0]);
+
+	if (item_size >= sizeof(*ei) + sizeof(*bi)) {
+		ei = btrfs_item_ptr(eb, path->slots[0],
+				struct btrfs_extent_item);
+		bi = (struct btrfs_tree_block_info *)(ei + 1);
+		generation = btrfs_extent_generation(eb, ei);
+		level = btrfs_tree_block_level(eb, bi);
+	} else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		u64 ref_owner;
+		int ret;
+
+		BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+		ret = get_ref_objectid_v0(rc, path, extent_key,
+					  &ref_owner, NULL);
+		BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
+		level = (int)ref_owner;
+		/* FIXME: get real generation */
+		generation = 0;
+#else
+		BUG();
+#endif
+	}
+
+	btrfs_release_path(rc->extent_root, path);
+
+	BUG_ON(level == -1);
+
+	block = kmalloc(sizeof(*block), GFP_NOFS);
+	if (!block)
+		return -ENOMEM;
+
+	block->bytenr = extent_key->objectid;
+	block->key.objectid = extent_key->offset;
+	block->key.offset = generation;
+	block->level = level;
+	block->key_ready = 0;
+
+	rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
+	BUG_ON(rb_node);
+
+	return 0;
+}
+
+/*
+ * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
+ */
+static int __add_tree_block(struct reloc_control *rc,
+			    u64 bytenr, u32 blocksize,
+			    struct rb_root *blocks)
+{
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	int ret;
+
+	if (tree_block_processed(bytenr, blocksize, rc))
+		return 0;
+
+	if (tree_search(blocks, bytenr))
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = bytenr;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = blocksize;
+
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret);
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	ret = add_tree_block(rc, &key, path, blocks);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * helper to check if the block use full backrefs for pointers in it
+ */
+static int block_use_full_backref(struct reloc_control *rc,
+				  struct extent_buffer *eb)
+{
+	struct btrfs_path *path;
+	struct btrfs_extent_item *ei;
+	struct btrfs_key key;
+	u64 flags;
+	int ret;
+
+	if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
+	    btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
+		return 1;
+
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
+
+	key.objectid = eb->start;
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+	key.offset = eb->len;
+
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, rc->extent_root,
+				&key, path, 0, 0);
+	BUG_ON(ret);
+
+	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_extent_item);
+	flags = btrfs_extent_flags(path->nodes[0], ei);
+	BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
+	if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+		ret = 1;
+	else
+		ret = 0;
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
+ * this function scans fs tree to find blocks reference the data extent
+ */
+static int find_data_references(struct reloc_control *rc,
+				struct btrfs_key *extent_key,
+				struct extent_buffer *leaf,
+				struct btrfs_extent_data_ref *ref,
+				struct rb_root *blocks)
+{
+	struct btrfs_path *path;
+	struct tree_block *block;
+	struct btrfs_root *root;
+	struct btrfs_file_extent_item *fi;
+	struct rb_node *rb_node;
+	struct btrfs_key key;
+	u64 ref_root;
+	u64 ref_objectid;
+	u64 ref_offset;
+	u32 ref_count;
+	u32 nritems;
+	int err = 0;
+	int added = 0;
+	int counted;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	ref_root = btrfs_extent_data_ref_root(leaf, ref);
+	ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
+	ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
+	ref_count = btrfs_extent_data_ref_count(leaf, ref);
+
+	root = read_fs_root(rc->extent_root->fs_info, ref_root);
+	if (IS_ERR(root)) {
+		err = PTR_ERR(root);
+		goto out;
+	}
+
+	key.objectid = ref_objectid;
+	key.offset = ref_offset;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		err = ret;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	nritems = btrfs_header_nritems(leaf);
+	/*
+	 * the references in tree blocks that use full backrefs
+	 * are not counted in
+	 */
+	if (block_use_full_backref(rc, leaf))
+		counted = 0;
+	else
+		counted = 1;
+	rb_node = tree_search(blocks, leaf->start);
+	if (rb_node) {
+		if (counted)
+			added = 1;
+		else
+			path->slots[0] = nritems;
+	}
+
+	while (ref_count > 0) {
+		while (path->slots[0] >= nritems) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0) {
+				err = ret;
+				goto out;
+			}
+			if (ret > 0) {
+				WARN_ON(1);
+				goto out;
+			}
+
+			leaf = path->nodes[0];
+			nritems = btrfs_header_nritems(leaf);
+			added = 0;
+
+			if (block_use_full_backref(rc, leaf))
+				counted = 0;
+			else
+				counted = 1;
+			rb_node = tree_search(blocks, leaf->start);
+			if (rb_node) {
+				if (counted)
+					added = 1;
+				else
+					path->slots[0] = nritems;
+			}
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid != ref_objectid ||
+		    key.type != BTRFS_EXTENT_DATA_KEY) {
+			WARN_ON(1);
+			break;
+		}
+
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			goto next;
+
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
+		    extent_key->objectid)
+			goto next;
+
+		key.offset -= btrfs_file_extent_offset(leaf, fi);
+		if (key.offset != ref_offset)
+			goto next;
+
+		if (counted)
+			ref_count--;
+		if (added)
+			goto next;
+
+		if (!tree_block_processed(leaf->start, leaf->len, rc)) {
+			block = kmalloc(sizeof(*block), GFP_NOFS);
+			if (!block) {
+				err = -ENOMEM;
+				break;
+			}
+			block->bytenr = leaf->start;
+			btrfs_item_key_to_cpu(leaf, &block->key, 0);
+			block->level = 0;
+			block->key_ready = 1;
+			rb_node = tree_insert(blocks, block->bytenr,
+					      &block->rb_node);
+			BUG_ON(rb_node);
+		}
+		if (counted)
+			added = 1;
+		else
+			path->slots[0] = nritems;
+next:
+		path->slots[0]++;
+
+	}
+out:
+	btrfs_free_path(path);
+	return err;
+}
+
+/*
+ * hepler to find all tree blocks that reference a given data extent
+ */
+static noinline_for_stack
+int add_data_references(struct reloc_control *rc,
+			struct btrfs_key *extent_key,
+			struct btrfs_path *path,
+			struct rb_root *blocks)
+{
+	struct btrfs_key key;
+	struct extent_buffer *eb;
+	struct btrfs_extent_data_ref *dref;
+	struct btrfs_extent_inline_ref *iref;
+	unsigned long ptr;
+	unsigned long end;
+	u32 blocksize;
+	int ret;
+	int err = 0;
+
+	ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
+			       extent_key->offset);
+	BUG_ON(ret < 0);
+	if (ret > 0) {
+		/* the relocated data is fragmented */
+		rc->extents_skipped++;
+		btrfs_release_path(rc->extent_root, path);
+		return 0;
+	}
+
+	blocksize = btrfs_level_size(rc->extent_root, 0);
+
+	eb = path->nodes[0];
+	ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+	end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+	if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
+		ptr = end;
+	else
+#endif
+		ptr += sizeof(struct btrfs_extent_item);
+
+	while (ptr < end) {
+		iref = (struct btrfs_extent_inline_ref *)ptr;
+		key.type = btrfs_extent_inline_ref_type(eb, iref);
+		if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
+			ret = __add_tree_block(rc, key.offset, blocksize,
+					       blocks);
+		} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+			ret = find_data_references(rc, extent_key,
+						   eb, dref, blocks);
+		} else {
+			BUG();
+		}
+		ptr += btrfs_extent_inline_ref_size(key.type);
+	}
+	WARN_ON(ptr > end);
+
+	while (1) {
+		cond_resched();
+		eb = path->nodes[0];
+		if (path->slots[0] >= btrfs_header_nritems(eb)) {
+			ret = btrfs_next_leaf(rc->extent_root, path);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+			if (ret > 0)
+				break;
+			eb = path->nodes[0];
+		}
+
+		btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
+		if (key.objectid != extent_key->objectid)
+			break;
+
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+		if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
+		    key.type == BTRFS_EXTENT_REF_V0_KEY) {
+#else
+		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
+		if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
+#endif
+			ret = __add_tree_block(rc, key.offset, blocksize,
+					       blocks);
+		} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
+			dref = btrfs_item_ptr(eb, path->slots[0],
+					      struct btrfs_extent_data_ref);
+			ret = find_data_references(rc, extent_key,
+						   eb, dref, blocks);
+		} else {
+			ret = 0;
+		}
+		if (ret) {
+			err = ret;
+			break;
+		}
+		path->slots[0]++;
+	}
+	btrfs_release_path(rc->extent_root, path);
+	if (err)
+		free_block_list(blocks);
+	return err;
+}
+
+/*
+ * hepler to find next unprocessed extent
+ */
+static noinline_for_stack
+int find_next_extent(struct btrfs_trans_handle *trans,
+		     struct reloc_control *rc, struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	u64 start, end, last;
+	int ret;
+
+	last = rc->block_group->key.objectid + rc->block_group->key.offset;
+	while (1) {
+		cond_resched();
+		if (rc->search_start >= last) {
+			ret = 1;
+			break;
+		}
+
+		key.objectid = rc->search_start;
+		key.type = BTRFS_EXTENT_ITEM_KEY;
+		key.offset = 0;
+
+		path->search_commit_root = 1;
+		path->skip_locking = 1;
+		ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
+					0, 0);
+		if (ret < 0)
+			break;
+next:
+		leaf = path->nodes[0];
+		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(rc->extent_root, path);
+			if (ret != 0)
+				break;
+			leaf = path->nodes[0];
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.objectid >= last) {
+			ret = 1;
+			break;
+		}
+
+		if (key.type != BTRFS_EXTENT_ITEM_KEY ||
+		    key.objectid + key.offset <= rc->search_start) {
+			path->slots[0]++;
+			goto next;
+		}
+
+		ret = find_first_extent_bit(&rc->processed_blocks,
+					    key.objectid, &start, &end,
+					    EXTENT_DIRTY);
+
+		if (ret == 0 && start <= key.objectid) {
+			btrfs_release_path(rc->extent_root, path);
+			rc->search_start = end + 1;
+		} else {
+			rc->search_start = key.objectid + key.offset;
+			return 0;
+		}
+	}
+	btrfs_release_path(rc->extent_root, path);
+	return ret;
+}
+
+static void set_reloc_control(struct reloc_control *rc)
+{
+	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+	mutex_lock(&fs_info->trans_mutex);
+	fs_info->reloc_ctl = rc;
+	mutex_unlock(&fs_info->trans_mutex);
+}
+
+static void unset_reloc_control(struct reloc_control *rc)
+{
+	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+	mutex_lock(&fs_info->trans_mutex);
+	fs_info->reloc_ctl = NULL;
+	mutex_unlock(&fs_info->trans_mutex);
+}
+
+static int check_extent_flags(u64 flags)
+{
+	if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
+	    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+		return 1;
+	if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
+	    !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+		return 1;
+	if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
+	    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+		return 1;
+	return 0;
+}
+
+static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
+{
+	struct rb_root blocks = RB_ROOT;
+	struct btrfs_key key;
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_path *path;
+	struct btrfs_extent_item *ei;
+	unsigned long nr;
+	u64 flags;
+	u32 item_size;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	rc->search_start = rc->block_group->key.objectid;
+	clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
+			  GFP_NOFS);
+
+	rc->create_reloc_root = 1;
+	set_reloc_control(rc);
+
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+
+	while (1) {
+		trans = btrfs_start_transaction(rc->extent_root, 1);
+
+		ret = find_next_extent(trans, rc, path);
+		if (ret < 0)
+			err = ret;
+		if (ret != 0)
+			break;
+
+		rc->extents_found++;
+
+		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_extent_item);
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		item_size = btrfs_item_size_nr(path->nodes[0],
+					       path->slots[0]);
+		if (item_size >= sizeof(*ei)) {
+			flags = btrfs_extent_flags(path->nodes[0], ei);
+			ret = check_extent_flags(flags);
+			BUG_ON(ret);
+
+		} else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+			u64 ref_owner;
+			int path_change = 0;
+
+			BUG_ON(item_size !=
+			       sizeof(struct btrfs_extent_item_v0));
+			ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
+						  &path_change);
+			if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
+				flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
+			else
+				flags = BTRFS_EXTENT_FLAG_DATA;
+
+			if (path_change) {
+				btrfs_release_path(rc->extent_root, path);
+
+				path->search_commit_root = 1;
+				path->skip_locking = 1;
+				ret = btrfs_search_slot(NULL, rc->extent_root,
+							&key, path, 0, 0);
+				if (ret < 0) {
+					err = ret;
+					break;
+				}
+				BUG_ON(ret > 0);
+			}
+#else
+			BUG();
+#endif
+		}
+
+		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+			ret = add_tree_block(rc, &key, path, &blocks);
+		} else if (rc->stage == UPDATE_DATA_PTRS &&
+			 (flags & BTRFS_EXTENT_FLAG_DATA)) {
+			ret = add_data_references(rc, &key, path, &blocks);
+		} else {
+			btrfs_release_path(rc->extent_root, path);
+			ret = 0;
+		}
+		if (ret < 0) {
+			err = 0;
+			break;
+		}
+
+		if (!RB_EMPTY_ROOT(&blocks)) {
+			ret = relocate_tree_blocks(trans, rc, &blocks);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+		}
+
+		nr = trans->blocks_used;
+		btrfs_end_transaction_throttle(trans, rc->extent_root);
+		trans = NULL;
+		btrfs_btree_balance_dirty(rc->extent_root, nr);
+
+		if (rc->stage == MOVE_DATA_EXTENTS &&
+		    (flags & BTRFS_EXTENT_FLAG_DATA)) {
+			rc->found_file_extent = 1;
+			ret = relocate_data_extent(rc->data_inode, &key);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+		}
+	}
+	btrfs_free_path(path);
+
+	if (trans) {
+		nr = trans->blocks_used;
+		btrfs_end_transaction(trans, rc->extent_root);
+		btrfs_btree_balance_dirty(rc->extent_root, nr);
+	}
+
+	rc->create_reloc_root = 0;
+	smp_mb();
+
+	if (rc->extents_found > 0) {
+		trans = btrfs_start_transaction(rc->extent_root, 1);
+		btrfs_commit_transaction(trans, rc->extent_root);
+	}
+
+	merge_reloc_roots(rc);
+
+	unset_reloc_control(rc);
+
+	/* get rid of pinned extents */
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+
+	return err;
+}
+
+static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 u64 objectid, u64 size)
+{
+	struct btrfs_path *path;
+	struct btrfs_inode_item *item;
+	struct extent_buffer *leaf;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+	if (ret)
+		goto out;
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
+	memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
+	btrfs_set_inode_generation(leaf, item, 1);
+	btrfs_set_inode_size(leaf, item, size);
+	btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
+	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * helper to create inode for data relocation.
+ * the inode is in data relocation tree and its link count is 0
+ */
+static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
+					struct btrfs_block_group_cache *group)
+{
+	struct inode *inode = NULL;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	unsigned long nr;
+	u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
+	int err = 0;
+
+	root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
+	if (IS_ERR(root))
+		return ERR_CAST(root);
+
+	trans = btrfs_start_transaction(root, 1);
+	BUG_ON(!trans);
+
+	err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
+	if (err)
+		goto out;
+
+	err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
+	BUG_ON(err);
+
+	err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
+				       group->key.offset, 0, group->key.offset,
+				       0, 0, 0);
+	BUG_ON(err);
+
+	key.objectid = objectid;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	inode = btrfs_iget(root->fs_info->sb, &key, root);
+	BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
+	BTRFS_I(inode)->index_cnt = group->key.objectid;
+
+	err = btrfs_orphan_add(trans, inode);
+out:
+	nr = trans->blocks_used;
+	btrfs_end_transaction(trans, root);
+
+	btrfs_btree_balance_dirty(root, nr);
+	if (err) {
+		if (inode)
+			iput(inode);
+		inode = ERR_PTR(err);
+	}
+	return inode;
+}
+
+/*
+ * function to relocate all extents in a block group.
+ */
+int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
+{
+	struct btrfs_fs_info *fs_info = extent_root->fs_info;
+	struct reloc_control *rc;
+	int ret;
+	int err = 0;
+
+	rc = kzalloc(sizeof(*rc), GFP_NOFS);
+	if (!rc)
+		return -ENOMEM;
+
+	mapping_tree_init(&rc->reloc_root_tree);
+	extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
+	INIT_LIST_HEAD(&rc->reloc_roots);
+
+	rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
+	BUG_ON(!rc->block_group);
+
+	btrfs_init_workers(&rc->workers, "relocate",
+			   fs_info->thread_pool_size);
+
+	rc->extent_root = extent_root;
+	btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
+
+	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
+	if (IS_ERR(rc->data_inode)) {
+		err = PTR_ERR(rc->data_inode);
+		rc->data_inode = NULL;
+		goto out;
+	}
+
+	printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
+	       (unsigned long long)rc->block_group->key.objectid,
+	       (unsigned long long)rc->block_group->flags);
+
+	btrfs_start_delalloc_inodes(fs_info->tree_root);
+	btrfs_wait_ordered_extents(fs_info->tree_root, 0);
+
+	while (1) {
+		mutex_lock(&fs_info->cleaner_mutex);
+		btrfs_clean_old_snapshots(fs_info->tree_root);
+		mutex_unlock(&fs_info->cleaner_mutex);
+
+		rc->extents_found = 0;
+		rc->extents_skipped = 0;
+
+		ret = relocate_block_group(rc);
+		if (ret < 0) {
+			err = ret;
+			break;
+		}
+
+		if (rc->extents_found == 0)
+			break;
+
+		printk(KERN_INFO "btrfs: found %llu extents\n",
+			(unsigned long long)rc->extents_found);
+
+		if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
+			btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
+			invalidate_mapping_pages(rc->data_inode->i_mapping,
+						 0, -1);
+			rc->stage = UPDATE_DATA_PTRS;
+		} else if (rc->stage == UPDATE_DATA_PTRS &&
+			   rc->extents_skipped >= rc->extents_found) {
+			iput(rc->data_inode);
+			rc->data_inode = create_reloc_inode(fs_info,
+							    rc->block_group);
+			if (IS_ERR(rc->data_inode)) {
+				err = PTR_ERR(rc->data_inode);
+				rc->data_inode = NULL;
+				break;
+			}
+			rc->stage = MOVE_DATA_EXTENTS;
+			rc->found_file_extent = 0;
+		}
+	}
+
+	filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
+				 rc->block_group->key.objectid,
+				 rc->block_group->key.objectid +
+				 rc->block_group->key.offset - 1);
+
+	WARN_ON(rc->block_group->pinned > 0);
+	WARN_ON(rc->block_group->reserved > 0);
+	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
+out:
+	iput(rc->data_inode);
+	btrfs_stop_workers(&rc->workers);
+	btrfs_put_block_group(rc->block_group);
+	kfree(rc);
+	return err;
+}
+
+/*
+ * recover relocation interrupted by system crash.
+ *
+ * this function resumes merging reloc trees with corresponding fs trees.
+ * this is important for keeping the sharing of tree blocks
+ */
+int btrfs_recover_relocation(struct btrfs_root *root)
+{
+	LIST_HEAD(reloc_roots);
+	struct btrfs_key key;
+	struct btrfs_root *fs_root;
+	struct btrfs_root *reloc_root;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct reloc_control *rc = NULL;
+	struct btrfs_trans_handle *trans;
+	int ret;
+	int err = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = BTRFS_TREE_RELOC_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	while (1) {
+		ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
+					path, 0, 0);
+		if (ret < 0) {
+			err = ret;
+			goto out;
+		}
+		if (ret > 0) {
+			if (path->slots[0] == 0)
+				break;
+			path->slots[0]--;
+		}
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		btrfs_release_path(root->fs_info->tree_root, path);
+
+		if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
+		    key.type != BTRFS_ROOT_ITEM_KEY)
+			break;
+
+		reloc_root = btrfs_read_fs_root_no_radix(root, &key);
+		if (IS_ERR(reloc_root)) {
+			err = PTR_ERR(reloc_root);
+			goto out;
+		}
+
+		list_add(&reloc_root->root_list, &reloc_roots);
+
+		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+			fs_root = read_fs_root(root->fs_info,
+					       reloc_root->root_key.offset);
+			if (IS_ERR(fs_root)) {
+				err = PTR_ERR(fs_root);
+				goto out;
+			}
+		}
+
+		if (key.offset == 0)
+			break;
+
+		key.offset--;
+	}
+	btrfs_release_path(root->fs_info->tree_root, path);
+
+	if (list_empty(&reloc_roots))
+		goto out;
+
+	rc = kzalloc(sizeof(*rc), GFP_NOFS);
+	if (!rc) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	mapping_tree_init(&rc->reloc_root_tree);
+	INIT_LIST_HEAD(&rc->reloc_roots);
+	btrfs_init_workers(&rc->workers, "relocate",
+			   root->fs_info->thread_pool_size);
+	rc->extent_root = root->fs_info->extent_root;
+
+	set_reloc_control(rc);
+
+	while (!list_empty(&reloc_roots)) {
+		reloc_root = list_entry(reloc_roots.next,
+					struct btrfs_root, root_list);
+		list_del(&reloc_root->root_list);
+
+		if (btrfs_root_refs(&reloc_root->root_item) == 0) {
+			list_add_tail(&reloc_root->root_list,
+				      &rc->reloc_roots);
+			continue;
+		}
+
+		fs_root = read_fs_root(root->fs_info,
+				       reloc_root->root_key.offset);
+		BUG_ON(IS_ERR(fs_root));
+
+		__add_reloc_root(reloc_root);
+		fs_root->reloc_root = reloc_root;
+	}
+
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+
+	merge_reloc_roots(rc);
+
+	unset_reloc_control(rc);
+
+	trans = btrfs_start_transaction(rc->extent_root, 1);
+	btrfs_commit_transaction(trans, rc->extent_root);
+out:
+	if (rc) {
+		btrfs_stop_workers(&rc->workers);
+		kfree(rc);
+	}
+	while (!list_empty(&reloc_roots)) {
+		reloc_root = list_entry(reloc_roots.next,
+					struct btrfs_root, root_list);
+		list_del(&reloc_root->root_list);
+		free_extent_buffer(reloc_root->node);
+		free_extent_buffer(reloc_root->commit_root);
+		kfree(reloc_root);
+	}
+	btrfs_free_path(path);
+
+	if (err == 0) {
+		/* cleanup orphan inode in data relocation tree */
+		fs_root = read_fs_root(root->fs_info,
+				       BTRFS_DATA_RELOC_TREE_OBJECTID);
+		if (IS_ERR(fs_root))
+			err = PTR_ERR(fs_root);
+	}
+	return err;
+}
+
+/*
+ * helper to add ordered checksum for data relocation.
+ *
+ * cloning checksum properly handles the nodatasum extents.
+ * it also saves CPU time to re-calculate the checksum.
+ */
+int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
+{
+	struct btrfs_ordered_sum *sums;
+	struct btrfs_sector_sum *sector_sum;
+	struct btrfs_ordered_extent *ordered;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	size_t offset;
+	int ret;
+	u64 disk_bytenr;
+	LIST_HEAD(list);
+
+	ordered = btrfs_lookup_ordered_extent(inode, file_pos);
+	BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
+
+	disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
+	ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
+				       disk_bytenr + len - 1, &list);
+
+	while (!list_empty(&list)) {
+		sums = list_entry(list.next, struct btrfs_ordered_sum, list);
+		list_del_init(&sums->list);
+
+		sector_sum = sums->sums;
+		sums->bytenr = ordered->start;
+
+		offset = 0;
+		while (offset < sums->len) {
+			sector_sum->bytenr += ordered->start - disk_bytenr;
+			sector_sum++;
+			offset += root->sectorsize;
+		}
+
+		btrfs_add_ordered_sum(inode, ordered, sums);
+	}
+	btrfs_put_ordered_extent(ordered);
+	return 0;
+}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b48650d..0ddc6d6 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -111,6 +111,15 @@ out:
 	return ret;
 }
 
+int btrfs_set_root_node(struct btrfs_root_item *item,
+			struct extent_buffer *node)
+{
+	btrfs_set_root_bytenr(item, node->start);
+	btrfs_set_root_level(item, btrfs_header_level(node));
+	btrfs_set_root_generation(item, btrfs_header_generation(node));
+	return 0;
+}
+
 /*
  * copy the data in 'item' into the btree
  */
@@ -164,8 +173,7 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
  * offset lower than the latest root.  They need to be queued for deletion to
  * finish what was happening when we crashed.
  */
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid,
-			  struct btrfs_root *latest)
+int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
 {
 	struct btrfs_root *dead_root;
 	struct btrfs_item *item;
@@ -227,10 +235,7 @@ again:
 			goto err;
 		}
 
-		if (objectid == BTRFS_TREE_RELOC_OBJECTID)
-			ret = btrfs_add_dead_reloc_root(dead_root);
-		else
-			ret = btrfs_add_dead_root(dead_root, latest);
+		ret = btrfs_add_dead_root(dead_root);
 		if (ret)
 			goto err;
 		goto again;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2ff7cd2..e9ef8c3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,7 +52,6 @@
 #include "export.h"
 #include "compression.h"
 
-
 static struct super_operations btrfs_super_ops;
 
 static void btrfs_put_super(struct super_block *sb)
@@ -322,7 +321,7 @@ static int btrfs_fill_super(struct super_block *sb,
 	struct dentry *root_dentry;
 	struct btrfs_super_block *disk_super;
 	struct btrfs_root *tree_root;
-	struct btrfs_inode *bi;
+	struct btrfs_key key;
 	int err;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -341,23 +340,15 @@ static int btrfs_fill_super(struct super_block *sb,
 	}
 	sb->s_fs_info = tree_root;
 	disk_super = &tree_root->fs_info->super_copy;
-	inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
-				  tree_root->fs_info->fs_root);
-	bi = BTRFS_I(inode);
-	bi->location.objectid = inode->i_ino;
-	bi->location.offset = 0;
-	bi->root = tree_root->fs_info->fs_root;
-
-	btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
 
-	if (!inode) {
-		err = -ENOMEM;
+	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto fail_close;
 	}
-	if (inode->i_state & I_NEW) {
-		btrfs_read_locked_inode(inode);
-		unlock_new_inode(inode);
-	}
 
 	root_dentry = d_alloc_root(inode);
 	if (!root_dentry) {
@@ -584,7 +575,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
 			return -EINVAL;
 
-		ret = btrfs_cleanup_reloc_trees(root);
+		/* recover relocation */
+		ret = btrfs_recover_relocation(root);
 		WARN_ON(ret);
 
 		ret = btrfs_cleanup_fs_roots(root->fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 01b1436..2e177d7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,7 +25,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "locking.h"
-#include "ref-cache.h"
 #include "tree-log.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
@@ -94,45 +93,37 @@ static noinline int join_transaction(struct btrfs_root *root)
  * to make sure the old root from before we joined the transaction is deleted
  * when the transaction commits
  */
-noinline int btrfs_record_root_in_trans(struct btrfs_root *root)
+static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
+					 struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
-	u64 running_trans_id = root->fs_info->running_transaction->transid;
-	if (root->ref_cows && root->last_trans < running_trans_id) {
+	if (root->ref_cows && root->last_trans < trans->transid) {
 		WARN_ON(root == root->fs_info->extent_root);
-		if (root->root_item.refs != 0) {
-			radix_tree_tag_set(&root->fs_info->fs_roots_radix,
-				   (unsigned long)root->root_key.objectid,
-				   BTRFS_ROOT_TRANS_TAG);
-
-			dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
-			BUG_ON(!dirty);
-			dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
-			BUG_ON(!dirty->root);
-			dirty->latest_root = root;
-			INIT_LIST_HEAD(&dirty->list);
-
-			root->commit_root = btrfs_root_node(root);
-
-			memcpy(dirty->root, root, sizeof(*root));
-			spin_lock_init(&dirty->root->node_lock);
-			spin_lock_init(&dirty->root->list_lock);
-			mutex_init(&dirty->root->objectid_mutex);
-			mutex_init(&dirty->root->log_mutex);
-			INIT_LIST_HEAD(&dirty->root->dead_list);
-			dirty->root->node = root->commit_root;
-			dirty->root->commit_root = NULL;
+		WARN_ON(root->root_item.refs == 0);
+		WARN_ON(root->commit_root != root->node);
+
+		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
+			   (unsigned long)root->root_key.objectid,
+			   BTRFS_ROOT_TRANS_TAG);
+		root->last_trans = trans->transid;
+		btrfs_init_reloc_root(trans, root);
+	}
+	return 0;
+}
 
-			spin_lock(&root->list_lock);
-			list_add(&dirty->root->dead_list, &root->dead_list);
-			spin_unlock(&root->list_lock);
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root)
+{
+	if (!root->ref_cows)
+		return 0;
 
-			root->dirty_root = dirty;
-		} else {
-			WARN_ON(1);
-		}
-		root->last_trans = running_trans_id;
+	mutex_lock(&root->fs_info->trans_mutex);
+	if (root->last_trans == trans->transid) {
+		mutex_unlock(&root->fs_info->trans_mutex);
+		return 0;
 	}
+
+	record_root_in_trans(trans, root);
+	mutex_unlock(&root->fs_info->trans_mutex);
 	return 0;
 }
 
@@ -181,7 +172,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 	ret = join_transaction(root);
 	BUG_ON(ret);
 
-	btrfs_record_root_in_trans(root);
 	h->transid = root->fs_info->running_transaction->transid;
 	h->transaction = root->fs_info->running_transaction;
 	h->blocks_reserved = num_blocks;
@@ -192,6 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 	h->delayed_ref_updates = 0;
 
 	root->fs_info->running_transaction->use_count++;
+	record_root_in_trans(h, root);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	return h;
 }
@@ -233,6 +224,7 @@ static noinline int wait_for_commit(struct btrfs_root *root,
 	return 0;
 }
 
+#if 0
 /*
  * rate limit against the drop_snapshot code.  This helps to slow down new
  * operations if the drop_snapshot code isn't able to keep up.
@@ -273,6 +265,7 @@ harder:
 			goto harder;
 	}
 }
+#endif
 
 void btrfs_throttle(struct btrfs_root *root)
 {
@@ -280,7 +273,6 @@ void btrfs_throttle(struct btrfs_root *root)
 	if (!root->fs_info->open_ioctl_trans)
 		wait_current_trans(root);
 	mutex_unlock(&root->fs_info->trans_mutex);
-	throttle_on_drops(root);
 }
 
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -323,9 +315,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 	memset(trans, 0, sizeof(*trans));
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
-	if (throttle)
-		throttle_on_drops(root);
-
 	return 0;
 }
 
@@ -462,12 +451,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 		old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 		if (old_root_bytenr == root->node->start)
 			break;
-		btrfs_set_root_bytenr(&root->root_item,
-				       root->node->start);
-		btrfs_set_root_level(&root->root_item,
-				     btrfs_header_level(root->node));
-		btrfs_set_root_generation(&root->root_item, trans->transid);
 
+		btrfs_set_root_node(&root->root_item, root->node);
 		ret = btrfs_update_root(trans, tree_root,
 					&root->root_key,
 					&root->root_item);
@@ -477,14 +462,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 		BUG_ON(ret);
 	}
+	free_extent_buffer(root->commit_root);
+	root->commit_root = btrfs_root_node(root);
 	return 0;
 }
 
 /*
  * update all the cowonly tree roots on disk
  */
-int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root)
+static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
+					 struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct list_head *next;
@@ -520,118 +507,54 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
  * a dirty root struct and adds it into the list of dead roots that need to
  * be deleted
  */
-int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest)
+int btrfs_add_dead_root(struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
-
-	dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
-	if (!dirty)
-		return -ENOMEM;
-	dirty->root = root;
-	dirty->latest_root = latest;
-
 	mutex_lock(&root->fs_info->trans_mutex);
-	list_add(&dirty->list, &latest->fs_info->dead_roots);
+	list_add(&root->root_list, &root->fs_info->dead_roots);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	return 0;
 }
 
 /*
- * at transaction commit time we need to schedule the old roots for
- * deletion via btrfs_drop_snapshot.  This runs through all the
- * reference counted roots that were modified in the current
- * transaction and puts them into the drop list
+ * update all the cowonly tree roots on disk
  */
-static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
-				    struct radix_tree_root *radix,
-				    struct list_head *list)
+static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
+				    struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
 	struct btrfs_root *gang[8];
-	struct btrfs_root *root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	int i;
 	int ret;
 	int err = 0;
-	u32 refs;
 
 	while (1) {
-		ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
+		ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
+						 (void **)gang, 0,
 						 ARRAY_SIZE(gang),
 						 BTRFS_ROOT_TRANS_TAG);
 		if (ret == 0)
 			break;
 		for (i = 0; i < ret; i++) {
 			root = gang[i];
-			radix_tree_tag_clear(radix,
-				     (unsigned long)root->root_key.objectid,
-				     BTRFS_ROOT_TRANS_TAG);
-
-			BUG_ON(!root->ref_tree);
-			dirty = root->dirty_root;
+			radix_tree_tag_clear(&fs_info->fs_roots_radix,
+					(unsigned long)root->root_key.objectid,
+					BTRFS_ROOT_TRANS_TAG);
 
 			btrfs_free_log(trans, root);
-			btrfs_free_reloc_root(trans, root);
-
-			if (root->commit_root == root->node) {
-				WARN_ON(root->node->start !=
-					btrfs_root_bytenr(&root->root_item));
-
-				free_extent_buffer(root->commit_root);
-				root->commit_root = NULL;
-				root->dirty_root = NULL;
-
-				spin_lock(&root->list_lock);
-				list_del_init(&dirty->root->dead_list);
-				spin_unlock(&root->list_lock);
+			btrfs_update_reloc_root(trans, root);
 
-				kfree(dirty->root);
-				kfree(dirty);
-
-				/* make sure to update the root on disk
-				 * so we get any updates to the block used
-				 * counts
-				 */
-				err = btrfs_update_root(trans,
-						root->fs_info->tree_root,
-						&root->root_key,
-						&root->root_item);
+			if (root->commit_root == root->node)
 				continue;
-			}
 
-			memset(&root->root_item.drop_progress, 0,
-			       sizeof(struct btrfs_disk_key));
-			root->root_item.drop_level = 0;
-			root->commit_root = NULL;
-			root->dirty_root = NULL;
-			root->root_key.offset = root->fs_info->generation;
-			btrfs_set_root_bytenr(&root->root_item,
-					      root->node->start);
-			btrfs_set_root_level(&root->root_item,
-					     btrfs_header_level(root->node));
-			btrfs_set_root_generation(&root->root_item,
-						  root->root_key.offset);
-
-			err = btrfs_insert_root(trans, root->fs_info->tree_root,
+			free_extent_buffer(root->commit_root);
+			root->commit_root = btrfs_root_node(root);
+
+			btrfs_set_root_node(&root->root_item, root->node);
+			err = btrfs_update_root(trans, fs_info->tree_root,
 						&root->root_key,
 						&root->root_item);
 			if (err)
 				break;
-
-			refs = btrfs_root_refs(&dirty->root->root_item);
-			btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
-			err = btrfs_update_root(trans, root->fs_info->tree_root,
-						&dirty->root->root_key,
-						&dirty->root->root_item);
-
-			BUG_ON(err);
-			if (refs == 1) {
-				list_add(&dirty->list, list);
-			} else {
-				WARN_ON(1);
-				free_extent_buffer(dirty->root->node);
-				kfree(dirty->root);
-				kfree(dirty);
-			}
 		}
 	}
 	return err;
@@ -688,12 +611,8 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
 				TASK_UNINTERRUPTIBLE);
 		mutex_unlock(&info->trans_mutex);
 
-		atomic_dec(&info->throttles);
-		wake_up(&info->transaction_throttle);
-
 		schedule();
 
-		atomic_inc(&info->throttles);
 		mutex_lock(&info->trans_mutex);
 		finish_wait(&info->transaction_wait, &wait);
 	}
@@ -705,111 +624,61 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
  * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
  * all of them
  */
-static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
-				     struct list_head *list)
+int btrfs_drop_dead_root(struct btrfs_root *root)
 {
-	struct btrfs_dirty_root *dirty;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	unsigned long nr;
-	u64 num_bytes;
-	u64 bytes_used;
-	u64 max_useless;
-	int ret = 0;
-	int err;
-
-	while (!list_empty(list)) {
-		struct btrfs_root *root;
-
-		dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
-		list_del_init(&dirty->list);
-
-		num_bytes = btrfs_root_used(&dirty->root->root_item);
-		root = dirty->latest_root;
-		atomic_inc(&root->fs_info->throttles);
-
-		while (1) {
-			/*
-			 * we don't want to jump in and create a bunch of
-			 * delayed refs if the transaction is starting to close
-			 */
-			wait_transaction_pre_flush(tree_root->fs_info);
-			trans = btrfs_start_transaction(tree_root, 1);
-
-			/*
-			 * we've joined a transaction, make sure it isn't
-			 * closing right now
-			 */
-			if (trans->transaction->delayed_refs.flushing) {
-				btrfs_end_transaction(trans, tree_root);
-				continue;
-			}
-
-			mutex_lock(&root->fs_info->drop_mutex);
-			ret = btrfs_drop_snapshot(trans, dirty->root);
-			if (ret != -EAGAIN)
-				break;
-			mutex_unlock(&root->fs_info->drop_mutex);
+	int ret;
 
-			err = btrfs_update_root(trans,
-					tree_root,
-					&dirty->root->root_key,
-					&dirty->root->root_item);
-			if (err)
-				ret = err;
-			nr = trans->blocks_used;
-			ret = btrfs_end_transaction(trans, tree_root);
-			BUG_ON(ret);
+	while (1) {
+		/*
+		 * we don't want to jump in and create a bunch of
+		 * delayed refs if the transaction is starting to close
+		 */
+		wait_transaction_pre_flush(tree_root->fs_info);
+		trans = btrfs_start_transaction(tree_root, 1);
 
-			btrfs_btree_balance_dirty(tree_root, nr);
-			cond_resched();
+		/*
+		 * we've joined a transaction, make sure it isn't
+		 * closing right now
+		 */
+		if (trans->transaction->delayed_refs.flushing) {
+			btrfs_end_transaction(trans, tree_root);
+			continue;
 		}
-		BUG_ON(ret);
-		atomic_dec(&root->fs_info->throttles);
-		wake_up(&root->fs_info->transaction_throttle);
 
-		num_bytes -= btrfs_root_used(&dirty->root->root_item);
-		bytes_used = btrfs_root_used(&root->root_item);
-		if (num_bytes) {
-			mutex_lock(&root->fs_info->trans_mutex);
-			btrfs_record_root_in_trans(root);
-			mutex_unlock(&root->fs_info->trans_mutex);
-			btrfs_set_root_used(&root->root_item,
-					    bytes_used - num_bytes);
-		}
+		ret = btrfs_drop_snapshot(trans, root);
+		if (ret != -EAGAIN)
+			break;
 
-		ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
-		if (ret) {
-			BUG();
+		ret = btrfs_update_root(trans, tree_root,
+					&root->root_key,
+					&root->root_item);
+		if (ret)
 			break;
-		}
-		mutex_unlock(&root->fs_info->drop_mutex);
-
-		spin_lock(&root->list_lock);
-		list_del_init(&dirty->root->dead_list);
-		if (!list_empty(&root->dead_list)) {
-			struct btrfs_root *oldest;
-			oldest = list_entry(root->dead_list.prev,
-					    struct btrfs_root, dead_list);
-			max_useless = oldest->root_key.offset - 1;
-		} else {
-			max_useless = root->root_key.offset - 1;
-		}
-		spin_unlock(&root->list_lock);
 
 		nr = trans->blocks_used;
 		ret = btrfs_end_transaction(trans, tree_root);
 		BUG_ON(ret);
 
-		ret = btrfs_remove_leaf_refs(root, max_useless, 0);
-		BUG_ON(ret);
-
-		free_extent_buffer(dirty->root->node);
-		kfree(dirty->root);
-		kfree(dirty);
-
 		btrfs_btree_balance_dirty(tree_root, nr);
 		cond_resched();
 	}
+	BUG_ON(ret);
+
+	ret = btrfs_del_root(trans, tree_root, &root->root_key);
+	BUG_ON(ret);
+
+	nr = trans->blocks_used;
+	ret = btrfs_end_transaction(trans, tree_root);
+	BUG_ON(ret);
+
+	free_extent_buffer(root->node);
+	free_extent_buffer(root->commit_root);
+	kfree(root);
+
+	btrfs_btree_balance_dirty(tree_root, nr);
 	return ret;
 }
 
@@ -839,24 +708,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto fail;
 
-	btrfs_record_root_in_trans(root);
+	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 
 	key.objectid = objectid;
-	key.offset = trans->transid;
+	key.offset = 0;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 
 	old = btrfs_lock_root_node(root);
 	btrfs_cow_block(trans, root, old, NULL, 0, &old);
+	btrfs_set_lock_blocking(old);
 
 	btrfs_copy_root(trans, root, old, &tmp, objectid);
 	btrfs_tree_unlock(old);
 	free_extent_buffer(old);
 
-	btrfs_set_root_bytenr(new_root_item, tmp->start);
-	btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
-	btrfs_set_root_generation(new_root_item, trans->transid);
+	btrfs_set_root_node(new_root_item, tmp);
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				new_root_item);
 	btrfs_tree_unlock(tmp);
@@ -964,6 +832,24 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+static void update_super_roots(struct btrfs_root *root)
+{
+	struct btrfs_root_item *root_item;
+	struct btrfs_super_block *super;
+
+	super = &root->fs_info->super_copy;
+
+	root_item = &root->fs_info->chunk_root->root_item;
+	super->chunk_root = root_item->bytenr;
+	super->chunk_root_generation = root_item->generation;
+	super->chunk_root_level = root_item->level;
+
+	root_item = &root->fs_info->tree_root->root_item;
+	super->root = root_item->bytenr;
+	super->generation = root_item->generation;
+	super->root_level = root_item->level;
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
@@ -971,8 +857,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	unsigned long timeout = 1;
 	struct btrfs_transaction *cur_trans;
 	struct btrfs_transaction *prev_trans = NULL;
-	struct btrfs_root *chunk_root = root->fs_info->chunk_root;
-	struct list_head dirty_fs_roots;
 	struct extent_io_tree *pinned_copy;
 	DEFINE_WAIT(wait);
 	int ret;
@@ -999,7 +883,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	BUG_ON(ret);
 
 	mutex_lock(&root->fs_info->trans_mutex);
-	INIT_LIST_HEAD(&dirty_fs_roots);
 	if (cur_trans->in_commit) {
 		cur_trans->use_count++;
 		mutex_unlock(&root->fs_info->trans_mutex);
@@ -1105,41 +988,36 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	 * with the tree-log code.
 	 */
 	mutex_lock(&root->fs_info->tree_log_mutex);
-	/*
-	 * keep tree reloc code from adding new reloc trees
-	 */
-	mutex_lock(&root->fs_info->tree_reloc_mutex);
-
 
-	ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
-			      &dirty_fs_roots);
+	ret = commit_fs_roots(trans, root);
 	BUG_ON(ret);
 
-	/* add_dirty_roots gets rid of all the tree log roots, it is now
+	/* commit_fs_roots gets rid of all the tree log roots, it is now
 	 * safe to free the root of tree log roots
 	 */
 	btrfs_free_log_root_tree(trans, root->fs_info);
 
-	ret = btrfs_commit_tree_roots(trans, root);
+	ret = commit_cowonly_roots(trans, root);
 	BUG_ON(ret);
 
 	cur_trans = root->fs_info->running_transaction;
 	spin_lock(&root->fs_info->new_trans_lock);
 	root->fs_info->running_transaction = NULL;
 	spin_unlock(&root->fs_info->new_trans_lock);
-	btrfs_set_super_generation(&root->fs_info->super_copy,
-				   cur_trans->transid);
-	btrfs_set_super_root(&root->fs_info->super_copy,
-			     root->fs_info->tree_root->node->start);
-	btrfs_set_super_root_level(&root->fs_info->super_copy,
-			   btrfs_header_level(root->fs_info->tree_root->node));
-
-	btrfs_set_super_chunk_root(&root->fs_info->super_copy,
-				   chunk_root->node->start);
-	btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
-					 btrfs_header_level(chunk_root->node));
-	btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy,
-				btrfs_header_generation(chunk_root->node));
+
+	btrfs_set_root_node(&root->fs_info->tree_root->root_item,
+			    root->fs_info->tree_root->node);
+	free_extent_buffer(root->fs_info->tree_root->commit_root);
+	root->fs_info->tree_root->commit_root =
+				btrfs_root_node(root->fs_info->tree_root);
+
+	btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
+			    root->fs_info->chunk_root->node);
+	free_extent_buffer(root->fs_info->chunk_root->commit_root);
+	root->fs_info->chunk_root->commit_root =
+				btrfs_root_node(root->fs_info->chunk_root);
+
+	update_super_roots(root);
 
 	if (!root->fs_info->log_root_recovering) {
 		btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
@@ -1153,7 +1031,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 	trans->transaction->blocked = 0;
 
-	wake_up(&root->fs_info->transaction_throttle);
 	wake_up(&root->fs_info->transaction_wait);
 
 	mutex_unlock(&root->fs_info->trans_mutex);
@@ -1170,9 +1047,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	btrfs_finish_extent_commit(trans, root, pinned_copy);
 	kfree(pinned_copy);
 
-	btrfs_drop_dead_reloc_roots(root);
-	mutex_unlock(&root->fs_info->tree_reloc_mutex);
-
 	/* do the directory inserts of any pending snapshot creations */
 	finish_pending_snapshots(trans, root->fs_info);
 
@@ -1186,16 +1060,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	put_transaction(cur_trans);
 	put_transaction(cur_trans);
 
-	list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
-	if (root->fs_info->closing)
-		list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
-
 	mutex_unlock(&root->fs_info->trans_mutex);
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
-
-	if (root->fs_info->closing)
-		drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
 	return ret;
 }
 
@@ -1204,16 +1071,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  */
 int btrfs_clean_old_snapshots(struct btrfs_root *root)
 {
-	struct list_head dirty_roots;
-	INIT_LIST_HEAD(&dirty_roots);
-again:
-	mutex_lock(&root->fs_info->trans_mutex);
-	list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
-	mutex_unlock(&root->fs_info->trans_mutex);
+	LIST_HEAD(list);
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	mutex_lock(&fs_info->trans_mutex);
+	list_splice_init(&fs_info->dead_roots, &list);
+	mutex_unlock(&fs_info->trans_mutex);
 
-	if (!list_empty(&dirty_roots)) {
-		drop_dirty_roots(root, &dirty_roots);
-		goto again;
+	while (!list_empty(&list)) {
+		root = list_entry(list.next, struct btrfs_root, root_list);
+		list_del_init(&root->root_list);
+		btrfs_drop_dead_root(root);
 	}
 	return 0;
 }
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94f5bde..961c3ee 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,12 +62,6 @@ struct btrfs_pending_snapshot {
 	struct list_head list;
 };
 
-struct btrfs_dirty_root {
-	struct list_head list;
-	struct btrfs_root *root;
-	struct btrfs_root *latest_root;
-};
-
 static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
 					       struct inode *inode)
 {
@@ -100,7 +94,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root);
 
-int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
+int btrfs_add_dead_root(struct btrfs_root *root);
+int btrfs_drop_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -108,7 +103,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root);
 void btrfs_throttle(struct btrfs_root *root);
-int btrfs_record_root_in_trans(struct btrfs_root *root);
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root);
 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 					struct extent_io_tree *dirty_pages);
 #endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index db5e212..2b41fc0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -430,18 +430,16 @@ no_copy:
 static noinline struct inode *read_one_inode(struct btrfs_root *root,
 					     u64 objectid)
 {
+	struct btrfs_key key;
 	struct inode *inode;
-	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
-	if (inode->i_state & I_NEW) {
-		BTRFS_I(inode)->root = root;
-		BTRFS_I(inode)->location.objectid = objectid;
-		BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-		BTRFS_I(inode)->location.offset = 0;
-		btrfs_read_locked_inode(inode);
-		unlock_new_inode(inode);
 
-	}
-	if (is_bad_inode(inode)) {
+	key.objectid = objectid;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	inode = btrfs_iget(root->fs_info->sb, &key, root);
+	if (IS_ERR(inode)) {
+		inode = NULL;
+	} else if (is_bad_inode(inode)) {
 		iput(inode);
 		inode = NULL;
 	}
@@ -541,6 +539,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+		u64 offset;
 		unsigned long dest_offset;
 		struct btrfs_key ins;
 
@@ -555,6 +554,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 		ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
 		ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
 		ins.type = BTRFS_EXTENT_ITEM_KEY;
+		offset = key->offset - btrfs_file_extent_offset(eb, item);
 
 		if (ins.objectid > 0) {
 			u64 csum_start;
@@ -569,19 +569,16 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 			if (ret == 0) {
 				ret = btrfs_inc_extent_ref(trans, root,
 						ins.objectid, ins.offset,
-						path->nodes[0]->start,
-						root->root_key.objectid,
-						trans->transid, key->objectid);
+						0, root->root_key.objectid,
+						key->objectid, offset);
 			} else {
 				/*
 				 * insert the extent pointer in the extent
 				 * allocation tree
 				 */
-				ret = btrfs_alloc_logged_extent(trans, root,
-						path->nodes[0]->start,
-						root->root_key.objectid,
-						trans->transid, key->objectid,
-						&ins);
+				ret = btrfs_alloc_logged_file_extent(trans,
+						root, root->root_key.objectid,
+						key->objectid, offset, &ins);
 				BUG_ON(ret);
 			}
 			btrfs_release_path(root, path);
@@ -1706,9 +1703,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
-				ret = btrfs_drop_leaf_ref(trans, root, next);
-				BUG_ON(ret);
-
 				WARN_ON(root_owner !=
 					BTRFS_TREE_LOG_OBJECTID);
 				ret = btrfs_free_reserved_extent(root,
@@ -1753,10 +1747,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 		btrfs_wait_tree_block_writeback(next);
 		btrfs_tree_unlock(next);
 
-		if (*level == 0) {
-			ret = btrfs_drop_leaf_ref(trans, root, next);
-			BUG_ON(ret);
-		}
 		WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
 		ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
 		BUG_ON(ret);
@@ -1811,12 +1801,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 				btrfs_wait_tree_block_writeback(next);
 				btrfs_tree_unlock(next);
 
-				if (*level == 0) {
-					ret = btrfs_drop_leaf_ref(trans, root,
-								  next);
-					BUG_ON(ret);
-				}
-
 				WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
 				ret = btrfs_free_reserved_extent(root,
 						path->nodes[*level]->start,
@@ -1884,11 +1868,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 			btrfs_wait_tree_block_writeback(next);
 			btrfs_tree_unlock(next);
 
-			if (orig_level == 0) {
-				ret = btrfs_drop_leaf_ref(trans, log,
-							  next);
-				BUG_ON(ret);
-			}
 			WARN_ON(log->root_key.objectid !=
 				BTRFS_TREE_LOG_OBJECTID);
 			ret = btrfs_free_reserved_extent(log, next->start,
@@ -2027,9 +2006,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
 	BUG_ON(ret);
 
-	btrfs_set_root_bytenr(&log->root_item, log->node->start);
-	btrfs_set_root_generation(&log->root_item, trans->transid);
-	btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
+	btrfs_set_root_node(&log->root_item, log->node);
 
 	root->log_batch = 0;
 	root->log_transid++;
@@ -2581,7 +2558,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 				       ins_keys, ins_sizes, nr);
 	BUG_ON(ret);
 
-	for (i = 0; i < nr; i++) {
+	for (i = 0; i < nr; i++, dst_path->slots[0]++) {
 		dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
 						   dst_path->slots[0]);
 
@@ -2617,36 +2594,31 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 			found_type = btrfs_file_extent_type(src, extent);
 			if (found_type == BTRFS_FILE_EXTENT_REG ||
 			    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
-				u64 ds = btrfs_file_extent_disk_bytenr(src,
-								   extent);
-				u64 dl = btrfs_file_extent_disk_num_bytes(src,
-								      extent);
-				u64 cs = btrfs_file_extent_offset(src, extent);
-				u64 cl = btrfs_file_extent_num_bytes(src,
-								     extent);;
+				u64 ds, dl, cs, cl;
+				ds = btrfs_file_extent_disk_bytenr(src,
+								extent);
+				/* ds == 0 is a hole */
+				if (ds == 0)
+					continue;
+
+				dl = btrfs_file_extent_disk_num_bytes(src,
+								extent);
+				cs = btrfs_file_extent_offset(src, extent);
+				cl = btrfs_file_extent_num_bytes(src,
+								extent);;
 				if (btrfs_file_extent_compression(src,
 								  extent)) {
 					cs = 0;
 					cl = dl;
 				}
-				/* ds == 0 is a hole */
-				if (ds != 0) {
-					ret = btrfs_inc_extent_ref(trans, log,
-						   ds, dl,
-						   dst_path->nodes[0]->start,
-						   BTRFS_TREE_LOG_OBJECTID,
-						   trans->transid,
-						   ins_keys[i].objectid);
-					BUG_ON(ret);
-					ret = btrfs_lookup_csums_range(
-						   log->fs_info->csum_root,
-						   ds + cs, ds + cs + cl - 1,
-						   &ordered_sums);
-					BUG_ON(ret);
-				}
+
+				ret = btrfs_lookup_csums_range(
+						log->fs_info->csum_root,
+						ds + cs, ds + cs + cl - 1,
+						&ordered_sums);
+				BUG_ON(ret);
 			}
 		}
-		dst_path->slots[0]++;
 	}
 
 	btrfs_mark_buffer_dirty(dst_path->nodes[0]);
@@ -3029,9 +3001,7 @@ again:
 		BUG_ON(!wc.replay_dest);
 
 		wc.replay_dest->log_root = log;
-		mutex_lock(&fs_info->trans_mutex);
-		btrfs_record_root_in_trans(wc.replay_dest);
-		mutex_unlock(&fs_info->trans_mutex);
+		btrfs_record_root_in_trans(trans, wc.replay_dest);
 		ret = walk_log_tree(trans, log, &wc);
 		BUG_ON(ret);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6d35b0..8bc6a88 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1671,8 +1671,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
 	int ret;
 	int i;
 
-	printk(KERN_INFO "btrfs relocating chunk %llu\n",
-	       (unsigned long long)chunk_offset);
 	root = root->fs_info->chunk_root;
 	extent_root = root->fs_info->extent_root;
 	em_tree = &root->fs_info->mapping_tree.map_tree;
-- 
cgit v0.10.2


From b36124210248706186a02093427bdff4b3f548e8 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 13 May 2009 19:12:15 -0400
Subject: Btrfs: stop avoiding balancing at the end of the transaction.

When the delayed reference code was added, some checks were added
to avoid extra balancing while the delayed references were being flushed.
This made for less efficient btrees, but it reduced the chances of
loops where no forward progress was made because the balances made
more delayed ref updates.

With the new dead root removal code and the mixed back references,
the extent allocation tree is no longer using precise back refs, and
the delayed reference updates don't carry the risk of looping forever
anymore.  So, the balance avoidance is no longer required.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2b96027..2f633e7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1052,8 +1052,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 		return 0;
 
-	if (trans->transaction->delayed_refs.flushing &&
-	    btrfs_header_nritems(mid) > 2)
+	if (btrfs_header_nritems(mid) > 2)
 		return 0;
 
 	if (btrfs_header_nritems(mid) < 2)
@@ -2194,7 +2193,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 		ret = insert_new_root(trans, root, path, level + 1);
 		if (ret)
 			return ret;
-	} else if (!trans->transaction->delayed_refs.flushing) {
+	} else {
 		ret = push_nodes_for_insert(trans, root, path, level);
 		c = path->nodes[level];
 		if (!ret && btrfs_header_nritems(c) <
@@ -2869,8 +2868,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 	int num_doubles = 0;
 
 	/* first try to make some room by pushing left and right */
-	if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY &&
-	    !trans->transaction->delayed_refs.flushing) {
+	if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
 		wret = push_leaf_right(trans, root, path, data_size, 0);
 		if (wret < 0)
 			return wret;
@@ -3809,8 +3807,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		}
 
 		/* delete the leaf if it is mostly empty */
-		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 &&
-		    !trans->transaction->delayed_refs.flushing) {
+		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
-- 
cgit v0.10.2


From cfbb9308463f6dad1334884db046ccf0f1a77918 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 18 May 2009 10:41:58 -0400
Subject: Btrfs: balance btree more often

With the new back reference code, the cost of a balance has gone down
in terms of the number of back reference updates done.  This commit
makes us more aggressively balance leaves and nodes as they become
less full.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2f633e7..60a45f3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1651,7 +1651,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
 		}
 		b = p->nodes[level];
 	} else if (ins_len < 0 && btrfs_header_nritems(b) <
-		   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
+		   BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
 		int sret;
 
 		sret = reada_for_balance(root, p, level);
@@ -3807,7 +3807,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		}
 
 		/* delete the leaf if it is mostly empty */
-		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
+		if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) {
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
-- 
cgit v0.10.2


From 2c943de6ad795a174dcc424c293bb77f15ae3b8c Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 18 May 2009 10:41:58 -0400
Subject: Btrfs: reduce mount -o ssd CPU usage

The block allocator in SSD mode will try to find groups of free blocks
that are close together.  This commit makes it loop less on a given
group size before bumping it.

The end result is that we are less likely to fill small holes in the
available free space, but we don't waste as much CPU building the
large cluster used by ssd mode.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0bc9365..2801655 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -652,7 +652,7 @@ again:
 			last = entry;
 			max_extent = 0;
 			total_retries++;
-			if (total_retries % 256 == 0) {
+			if (total_retries % 64 == 0) {
 				if (min_bytes >= (bytes + empty_size)) {
 					ret = -ENOSPC;
 					goto out;
-- 
cgit v0.10.2


From 585ad2c3797dcaa643aeba75b9f072778adf3490 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 18 May 2009 10:41:58 -0400
Subject: Btrfs: fix metadata dirty throttling limits

Once a metadata block has been written, it must be recowed, so the
btrfs dirty balancing call has a check to make sure a fair amount of metadata
was actually dirty before it started writing it back to disk.

A previous commit had changed the dirty tracking for metadata without
updating the btrfs dirty balancing checks.  This commit switches it
to use the correct counter.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7f5c6e3..e572cf4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2378,17 +2378,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 	 * looks as though older kernels can get into trouble with
 	 * this code, they end up stuck in balance_dirty_pages forever
 	 */
-	struct extent_io_tree *tree;
 	u64 num_dirty;
-	u64 start = 0;
 	unsigned long thresh = 32 * 1024 * 1024;
-	tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
 
 	if (current->flags & PF_MEMALLOC)
 		return;
 
-	num_dirty = count_range_bits(tree, &start, (u64)-1,
-				     thresh, EXTENT_DIRTY);
+	num_dirty = root->fs_info->dirty_metadata_bytes;
+
 	if (num_dirty > thresh) {
 		balance_dirty_pages_ratelimited_nr(
 				   root->fs_info->btree_inode->i_mapping, 1);
-- 
cgit v0.10.2


From d84275c938e1a5e2dc5b89eb9b878e0ddb2c55e0 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 9 Jun 2009 15:39:08 -0400
Subject: Btrfs: don't allow WRITE_SYNC bios to starve out regular writes

Btrfs uses dedicated threads to submit bios when checksumming is on,
which allows us to make sure the threads dedicated to checksumming don't get
stuck waiting for requests.  For each btrfs device, there are
two lists of bios.  One list is for WRITE_SYNC bios and the other
is for regular priority bios.

The IO submission threads used to process all of the WRITE_SYNC bios first and
then switch to the regular bios.  This commit makes sure we don't completely
starve the regular bios by rotating between the two lists.

WRITE_SYNC bios are still favored 2:1 over the regular bios, and this tries
to run in batches to avoid seeking.  Benchmarking shows this eliminates
stalls during streaming buffered writes on both multi-device and
single device filesystems.

If the regular bios starve, the system can end up with a large amount of ram
pinned down in writeback pages.  If we are a little more fair between the two
classes, we're able to keep throughput up and make progress on the bulk of
our dirty ram.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bc6a88..9d36181 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -163,6 +163,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	unsigned long num_sync_run;
 	unsigned long limit;
 	unsigned long last_waited = 0;
+	int force_reg = 0;
 
 	bdi = blk_get_backing_dev_info(device->bdev);
 	fs_info = device->dev_root->fs_info;
@@ -176,19 +177,22 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 
 loop:
 	spin_lock(&device->io_lock);
-	num_run = 0;
 
 loop_lock:
+	num_run = 0;
 
 	/* take all the bios off the list at once and process them
 	 * later on (without the lock held).  But, remember the
 	 * tail and other pointers so the bios can be properly reinserted
 	 * into the list if we hit congestion
 	 */
-	if (device->pending_sync_bios.head)
+	if (!force_reg && device->pending_sync_bios.head) {
 		pending_bios = &device->pending_sync_bios;
-	else
+		force_reg = 1;
+	} else {
 		pending_bios = &device->pending_bios;
+		force_reg = 0;
+	}
 
 	pending = pending_bios->head;
 	tail = pending_bios->tail;
@@ -228,10 +232,14 @@ loop_lock:
 	while (pending) {
 
 		rmb();
-		if (pending_bios != &device->pending_sync_bios &&
-		    device->pending_sync_bios.head &&
-		    num_run > 16) {
-			cond_resched();
+		/* we want to work on both lists, but do more bios on the
+		 * sync list than the regular list
+		 */
+		if ((num_run > 32 &&
+		    pending_bios != &device->pending_sync_bios &&
+		    device->pending_sync_bios.head) ||
+		   (num_run > 64 && pending_bios == &device->pending_sync_bios &&
+		    device->pending_bios.head)) {
 			spin_lock(&device->io_lock);
 			requeue_list(pending_bios, pending, tail);
 			goto loop_lock;
-- 
cgit v0.10.2


From d644d8a1e30b88a93bcfb63cada2ae628462ddba Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 9 Jun 2009 15:59:22 -0400
Subject: Btrfs: avoid IO stalls behind congested devices in a multi-device FS

The btrfs IO submission threads try to service a bunch of devices with a small
number of threads.  They do a congestion check to try and avoid waiting
on requests for a busy device.

The checks make sure we've sent a few requests down to a given device just so
that we aren't bouncing between busy devices without actually sending down
any IO.  The counter used to decide if we can switch to the next device
is somewhat overloaded.  It is also being used to decide if we've done
a good batch of requests between the WRITE_SYNC or regular priority lists.
It may get reset to zero often, leaving us hammering on a busy device
instead of moving on to another disk.

This commit adds a new counter for the number of bios sent while
servicing a device.  It doesn't get reset or fiddled with.  On
multi-device filesystems, this fixes IO stalls in streaming
write workloads.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9d36181..27d5f37 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -161,6 +161,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
 	int again = 0;
 	unsigned long num_run;
 	unsigned long num_sync_run;
+	unsigned long batch_run = 0;
 	unsigned long limit;
 	unsigned long last_waited = 0;
 	int force_reg = 0;
@@ -257,6 +258,8 @@ loop_lock:
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 		submit_bio(cur->bi_rw, cur);
 		num_run++;
+		batch_run++;
+
 		if (bio_sync(cur))
 			num_sync_run++;
 
@@ -273,7 +276,7 @@ loop_lock:
 		 * is now congested.  Back off and let other work structs
 		 * run instead
 		 */
-		if (pending && bdi_write_congested(bdi) && num_run > 16 &&
+		if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
 		    fs_info->fs_devices->open_devices > 1) {
 			struct io_context *ioc;
 
-- 
cgit v0.10.2


From 3b30c22f64a6bb297719c60e494af1d26563f584 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 9 Jun 2009 16:42:22 -0400
Subject: Btrfs: Add mount -o nossd

This allows you to turn off the ssd mode via remount.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e9ef8c3..22855a1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,8 +66,8 @@ static void btrfs_put_super(struct super_block *sb)
 enum {
 	Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
 	Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
-	Opt_ssd, Opt_thread_pool, Opt_noacl,  Opt_compress, Opt_notreelog,
-	Opt_ratio, Opt_flushoncommit, Opt_err,
+	Opt_ssd, Opt_nossd, Opt_thread_pool, Opt_noacl,  Opt_compress,
+	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -83,6 +83,7 @@ static match_table_t tokens = {
 	{Opt_thread_pool, "thread_pool=%d"},
 	{Opt_compress, "compress"},
 	{Opt_ssd, "ssd"},
+	{Opt_nossd, "nossd"},
 	{Opt_noacl, "noacl"},
 	{Opt_notreelog, "notreelog"},
 	{Opt_flushoncommit, "flushoncommit"},
@@ -173,6 +174,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
 			btrfs_set_opt(info->mount_opt, SSD);
 			break;
+		case Opt_nossd:
+			printk(KERN_INFO "btrfs: not using ssd allocation scheme\n");
+			btrfs_clear_opt(info->mount_opt, SSD);
+			break;
 		case Opt_nobarrier:
 			printk(KERN_INFO "btrfs: turning off barriers\n");
 			btrfs_set_opt(info->mount_opt, NOBARRIER);
-- 
cgit v0.10.2


From c604480171c510c1beeb81b82418e5bc4de8f1ae Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 9 Jun 2009 18:35:15 -0400
Subject: Btrfs: avoid allocation clusters that are too spread out

In SSD mode for data, and all the time for metadata the allocator
will try to find a cluster of nearby blocks for allocations.  This
commit adds extra checks to make sure that each free block in the
cluster is close to the last one.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 2801655..ac23476 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -645,7 +645,8 @@ again:
 		 * we haven't filled the empty size and the window is
 		 * very large.  reset and try again
 		 */
-		if (next->offset - window_start > (bytes + empty_size) * 2) {
+		if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
+		    next->offset - window_start > (bytes + empty_size) * 2) {
 			entry = next;
 			window_start = entry->offset;
 			window_free = entry->bytes;
-- 
cgit v0.10.2


From 451d7585a8bb1b9bec0d676ce3dece1923164e55 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 9 Jun 2009 20:28:34 -0400
Subject: Btrfs: add mount -o ssd_spread to spread allocations out

Some SSDs perform best when reusing block numbers often, while
others perform much better when clustering strictly allocates
big chunks of unused space.

The default mount -o ssd will find rough groupings of blocks
where there are a bunch of free blocks that might have some
allocated blocks mixed in.

mount -o ssd_spread will make sure there are no allocated blocks
mixed in.  It should perform better on lower end SSDs.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ce3ab4e..b9d8788 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1100,6 +1100,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_COMPRESS		(1 << 5)
 #define BTRFS_MOUNT_NOTREELOG           (1 << 6)
 #define BTRFS_MOUNT_FLUSHONCOMMIT       (1 << 7)
+#define BTRFS_MOUNT_SSD_SPREAD		(1 << 8)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a42419c..3355d7e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3607,7 +3607,7 @@ refill_cluster:
 			last_ptr_loop = 0;
 
 			/* allocate a cluster in this block group */
-			ret = btrfs_find_space_cluster(trans,
+			ret = btrfs_find_space_cluster(trans, root,
 					       block_group, last_ptr,
 					       offset, num_bytes,
 					       empty_cluster + empty_size);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ac23476..4538e48 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -579,6 +579,7 @@ out:
  * it returns -enospc
  */
 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
 			     struct btrfs_block_group_cache *block_group,
 			     struct btrfs_free_cluster *cluster,
 			     u64 offset, u64 bytes, u64 empty_size)
@@ -595,7 +596,9 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
 	int ret;
 
 	/* for metadata, allow allocates with more holes */
-	if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
+	if (btrfs_test_opt(root, SSD_SPREAD)) {
+		min_bytes = bytes + empty_size;
+	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
 		/*
 		 * we want to do larger allocations when we are
 		 * flushing out the delayed refs, it helps prevent
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index ab0bdc0..266fb87 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -31,6 +31,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
 			   u64 bytes);
 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
 			     struct btrfs_block_group_cache *block_group,
 			     struct btrfs_free_cluster *cluster,
 			     u64 offset, u64 bytes, u64 empty_size);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 22855a1..7f5b288 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,8 +66,8 @@ static void btrfs_put_super(struct super_block *sb)
 enum {
 	Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
 	Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
-	Opt_ssd, Opt_nossd, Opt_thread_pool, Opt_noacl,  Opt_compress,
-	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
+	Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
+	Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -83,6 +83,7 @@ static match_table_t tokens = {
 	{Opt_thread_pool, "thread_pool=%d"},
 	{Opt_compress, "compress"},
 	{Opt_ssd, "ssd"},
+	{Opt_ssd_spread, "ssd_spread"},
 	{Opt_nossd, "nossd"},
 	{Opt_noacl, "noacl"},
 	{Opt_notreelog, "notreelog"},
@@ -174,9 +175,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
 			btrfs_set_opt(info->mount_opt, SSD);
 			break;
+		case Opt_ssd_spread:
+			printk(KERN_INFO "btrfs: use spread ssd "
+			       "allocation scheme\n");
+			btrfs_set_opt(info->mount_opt, SSD);
+			btrfs_set_opt(info->mount_opt, SSD_SPREAD);
+			break;
 		case Opt_nossd:
-			printk(KERN_INFO "btrfs: not using ssd allocation scheme\n");
+			printk(KERN_INFO "btrfs: not using ssd allocation "
+			       "scheme\n");
 			btrfs_clear_opt(info->mount_opt, SSD);
+			btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
 			break;
 		case Opt_nobarrier:
 			printk(KERN_INFO "btrfs: turning off barriers\n");
@@ -429,7 +438,9 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
 	if (btrfs_test_opt(root, COMPRESS))
 		seq_puts(seq, ",compress");
-	if (btrfs_test_opt(root, SSD))
+	if (btrfs_test_opt(root, SSD_SPREAD))
+		seq_puts(seq, ",ssd_spread");
+	else if (btrfs_test_opt(root, SSD))
 		seq_puts(seq, ",ssd");
 	if (btrfs_test_opt(root, NOTREELOG))
 		seq_puts(seq, ",notreelog");
-- 
cgit v0.10.2


From c289811cc096c57ff35550ee8132793a4f9b5b59 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 10 Jun 2009 09:51:32 -0400
Subject: Btrfs: autodetect SSD devices

During mount, btrfs will check the queue nonrot flag
for all the devices found in the FS.  If they are all
non-rotating, SSD mode is enabled by default.

If the FS was mounted with -o nossd, the non-rotating
flag is ignored.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9d8788..5fa7d7d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1101,6 +1101,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_NOTREELOG           (1 << 6)
 #define BTRFS_MOUNT_FLUSHONCOMMIT       (1 << 7)
 #define BTRFS_MOUNT_SSD_SPREAD		(1 << 8)
+#define BTRFS_MOUNT_NOSSD		(1 << 9)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e572cf4..f4dfbb7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1850,6 +1850,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	if (IS_ERR(fs_info->transaction_kthread))
 		goto fail_cleaner;
 
+	if (!btrfs_test_opt(tree_root, SSD) &&
+	    !btrfs_test_opt(tree_root, NOSSD) &&
+	    !fs_info->fs_devices->rotating) {
+		printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+		       "mode\n");
+		btrfs_set_opt(fs_info->mount_opt, SSD);
+	}
+
 	if (btrfs_super_log_root(disk_super) != 0) {
 		u64 bytenr = btrfs_super_log_root(disk_super);
 
@@ -1893,6 +1901,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
 	if (!fs_info->fs_root)
 		goto fail_trans_kthread;
+
 	return tree_root;
 
 fail_trans_kthread:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 7f5b288..3427db2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -184,6 +184,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 		case Opt_nossd:
 			printk(KERN_INFO "btrfs: not using ssd allocation "
 			       "scheme\n");
+			btrfs_set_opt(info->mount_opt, NOSSD);
 			btrfs_clear_opt(info->mount_opt, SSD);
 			btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
 			break;
@@ -438,6 +439,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
 	if (btrfs_test_opt(root, COMPRESS))
 		seq_puts(seq, ",compress");
+	if (btrfs_test_opt(root, NOSSD))
+		seq_puts(seq, ",nossd");
 	if (btrfs_test_opt(root, SSD_SPREAD))
 		seq_puts(seq, ",ssd_spread");
 	else if (btrfs_test_opt(root, SSD))
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 27d5f37..3f4a593 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -605,6 +605,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		device->in_fs_metadata = 0;
 		device->mode = flags;
 
+		if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+			fs_devices->rotating = 1;
+
 		fs_devices->open_devices++;
 		if (device->writeable) {
 			fs_devices->rw_devices++;
@@ -1473,6 +1476,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	root->fs_info->fs_devices->rw_devices++;
 	root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
 
+	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+		root->fs_info->fs_devices->rotating = 1;
+
 	total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 	btrfs_set_super_total_bytes(&root->fs_info->super_copy,
 				    total_bytes + device->total_bytes);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5c3ff6d..3c1f731 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -107,6 +107,11 @@ struct btrfs_fs_devices {
 	int seeding;
 
 	int opened;
+
+	/* set when we find or add a device that doesn't have the
+	 * nonrot flag set
+	 */
+	int rotating;
 };
 
 struct btrfs_bio_stripe {
-- 
cgit v0.10.2


From 6cbff00f4632c8060b06bfc9585805217f11e12e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2009 10:37:41 +0200
Subject: Btrfs: implement FS_IOC_GETFLAGS/SETFLAGS/GETVERSION

Add support for the standard attributes set via chattr and read via
lsattr.  Currently we store the attributes in the flags value in
the btrfs inode, but I wonder whether we should split it into two so
that we don't have to keep converting between the two formats.

Remove the btrfs_clear_flag/btrfs_set_flag/btrfs_test_flag macros
as they were confusing the existing code and got in the way of the
new additions.

Also add the FS_IOC_GETVERSION ioctl for getting i_generation as it's
trivial.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ecf5f7d..acb4f35 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -157,5 +157,4 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
 	BTRFS_I(inode)->disk_i_size = size;
 }
 
-
 #endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ab07627..de1e2fd 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -123,7 +123,7 @@ static int check_compressed_csum(struct inode *inode,
 	u32 csum;
 	u32 *cb_sum = &cb->sums;
 
-	if (btrfs_test_flag(inode, NODATASUM))
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
 	for (i = 0; i < cb->nr_pages; i++) {
@@ -670,7 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			 */
 			atomic_inc(&cb->pending_bios);
 
-			if (!btrfs_test_flag(inode, NODATASUM)) {
+			if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
 				btrfs_lookup_bio_sums(root, inode, comp_bio,
 						      sums);
 			}
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
 	BUG_ON(ret);
 
-	if (!btrfs_test_flag(inode, NODATASUM))
+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
 		btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
 
 	ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5fa7d7d..4d6e0b6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1115,12 +1115,14 @@ struct btrfs_root {
 #define BTRFS_INODE_READONLY		(1 << 2)
 #define BTRFS_INODE_NOCOMPRESS		(1 << 3)
 #define BTRFS_INODE_PREALLOC		(1 << 4)
-#define btrfs_clear_flag(inode, flag)	(BTRFS_I(inode)->flags &= \
-					 ~BTRFS_INODE_##flag)
-#define btrfs_set_flag(inode, flag)	(BTRFS_I(inode)->flags |= \
-					 BTRFS_INODE_##flag)
-#define btrfs_test_flag(inode, flag)	(BTRFS_I(inode)->flags & \
-					 BTRFS_INODE_##flag)
+#define BTRFS_INODE_SYNC		(1 << 5)
+#define BTRFS_INODE_IMMUTABLE		(1 << 6)
+#define BTRFS_INODE_APPEND		(1 << 7)
+#define BTRFS_INODE_NODUMP		(1 << 8)
+#define BTRFS_INODE_NOATIME		(1 << 9)
+#define BTRFS_INODE_DIRSYNC		(1 << 10)
+
+
 /* some macros to generate set/get funcs for the struct fields.  This
  * assumes there is a lefoo_to_cpu for every type, so lets make a simple
  * one for u8:
@@ -2260,6 +2262,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size);
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+void btrfs_update_iflags(struct inode *inode);
+void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
 
 /* file.c */
 int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 917bf10..5b68330 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -368,7 +368,7 @@ again:
 	 * inode has not been flagged as nocompress.  This flag can
 	 * change at any time if we discover bad compression ratios.
 	 */
-	if (!btrfs_test_flag(inode, NOCOMPRESS) &&
+	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
 	    btrfs_test_opt(root, COMPRESS)) {
 		WARN_ON(pages);
 		pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
@@ -469,7 +469,7 @@ again:
 		nr_pages_ret = 0;
 
 		/* flag the file so we don't compress in the future */
-		btrfs_set_flag(inode, NOCOMPRESS);
+		BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
 	}
 	if (will_compress) {
 		*num_added += 1;
@@ -862,7 +862,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 		async_cow->locked_page = locked_page;
 		async_cow->start = start;
 
-		if (btrfs_test_flag(inode, NOCOMPRESS))
+		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
 			cur_end = end;
 		else
 			cur_end = min(end, start + 512 * 1024 - 1);
@@ -1133,10 +1133,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 	int ret;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 
-	if (btrfs_test_flag(inode, NODATACOW))
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 1, nr_written);
-	else if (btrfs_test_flag(inode, PREALLOC))
+	else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 0, nr_written);
 	else if (!btrfs_test_opt(root, COMPRESS))
@@ -1290,7 +1290,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 	int ret = 0;
 	int skip_sum;
 
-	skip_sum = btrfs_test_flag(inode, NODATASUM);
+	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
@@ -1790,7 +1790,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 		ClearPageChecked(page);
 		goto good;
 	}
-	if (btrfs_test_flag(inode, NODATASUM))
+
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
 	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
@@ -2156,6 +2157,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 		init_special_inode(inode, inode->i_mode, rdev);
 		break;
 	}
+
+	btrfs_update_iflags(inode);
 	return;
 
 make_bad:
@@ -3586,9 +3589,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 			btrfs_find_block_group(root, 0, alloc_hint, owner);
 	if ((mode & S_IFREG)) {
 		if (btrfs_test_opt(root, NODATASUM))
-			btrfs_set_flag(inode, NODATASUM);
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
 		if (btrfs_test_opt(root, NODATACOW))
-			btrfs_set_flag(inode, NODATACOW);
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
 	}
 
 	key[0].objectid = objectid;
@@ -3642,6 +3645,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	location->offset = 0;
 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
 
+	btrfs_inherit_iflags(inode, dir);
+
 	insert_inode_hash(inode);
 	inode_tree_add(inode);
 	return inode;
@@ -5075,7 +5080,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
 out:
 	if (cur_offset > start) {
 		inode->i_ctime = CURRENT_TIME;
-		btrfs_set_flag(inode, PREALLOC);
+		BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
 		    cur_offset > i_size_read(inode))
 			btrfs_i_size_write(inode, cur_offset);
@@ -5196,7 +5201,7 @@ static int btrfs_set_page_dirty(struct page *page)
 
 static int btrfs_permission(struct inode *inode, int mask)
 {
-	if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
+	if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
 		return -EACCES;
 	return generic_permission(inode, mask, btrfs_check_acl);
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 54dfd45..926332a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,7 +50,172 @@
 #include "volumes.h"
 #include "locking.h"
 
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
+{
+	if (S_ISDIR(mode))
+		return flags;
+	else if (S_ISREG(mode))
+		return flags & ~FS_DIRSYNC_FL;
+	else
+		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
+}
+
+/*
+ * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ */
+static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+{
+	unsigned int iflags = 0;
+
+	if (flags & BTRFS_INODE_SYNC)
+		iflags |= FS_SYNC_FL;
+	if (flags & BTRFS_INODE_IMMUTABLE)
+		iflags |= FS_IMMUTABLE_FL;
+	if (flags & BTRFS_INODE_APPEND)
+		iflags |= FS_APPEND_FL;
+	if (flags & BTRFS_INODE_NODUMP)
+		iflags |= FS_NODUMP_FL;
+	if (flags & BTRFS_INODE_NOATIME)
+		iflags |= FS_NOATIME_FL;
+	if (flags & BTRFS_INODE_DIRSYNC)
+		iflags |= FS_DIRSYNC_FL;
+
+	return iflags;
+}
+
+/*
+ * Update inode->i_flags based on the btrfs internal flags.
+ */
+void btrfs_update_iflags(struct inode *inode)
+{
+	struct btrfs_inode *ip = BTRFS_I(inode);
+
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+
+	if (ip->flags & BTRFS_INODE_SYNC)
+		inode->i_flags |= S_SYNC;
+	if (ip->flags & BTRFS_INODE_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	if (ip->flags & BTRFS_INODE_APPEND)
+		inode->i_flags |= S_APPEND;
+	if (ip->flags & BTRFS_INODE_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	if (ip->flags & BTRFS_INODE_DIRSYNC)
+		inode->i_flags |= S_DIRSYNC;
+}
+
+/*
+ * Inherit flags from the parent inode.
+ *
+ * Unlike extN we don't have any flags we don't want to inherit currently.
+ */
+void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
+{
+	unsigned int flags = BTRFS_I(dir)->flags;
+
+	if (S_ISREG(inode->i_mode))
+		flags &= ~BTRFS_INODE_DIRSYNC;
+	else if (!S_ISDIR(inode->i_mode))
+		flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
+
+	BTRFS_I(inode)->flags = flags;
+	btrfs_update_iflags(inode);
+}
+
+static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
+{
+	struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
+	unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+
+	if (copy_to_user(arg, &flags, sizeof(flags)))
+		return -EFAULT;
+	return 0;
+}
+
+static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct btrfs_inode *ip = BTRFS_I(inode);
+	struct btrfs_root *root = ip->root;
+	struct btrfs_trans_handle *trans;
+	unsigned int flags, oldflags;
+	int ret;
+
+	if (copy_from_user(&flags, arg, sizeof(flags)))
+		return -EFAULT;
+
+	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+		      FS_NOATIME_FL | FS_NODUMP_FL | \
+		      FS_SYNC_FL | FS_DIRSYNC_FL))
+		return -EOPNOTSUPP;
 
+	if (!is_owner_or_cap(inode))
+		return -EACCES;
+
+	mutex_lock(&inode->i_mutex);
+
+	flags = btrfs_mask_flags(inode->i_mode, flags);
+	oldflags = btrfs_flags_to_ioctl(ip->flags);
+	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+		if (!capable(CAP_LINUX_IMMUTABLE)) {
+			ret = -EPERM;
+			goto out_unlock;
+		}
+	}
+
+	ret = mnt_want_write(file->f_path.mnt);
+	if (ret)
+		goto out_unlock;
+
+	if (flags & FS_SYNC_FL)
+		ip->flags |= BTRFS_INODE_SYNC;
+	else
+		ip->flags &= ~BTRFS_INODE_SYNC;
+	if (flags & FS_IMMUTABLE_FL)
+		ip->flags |= BTRFS_INODE_IMMUTABLE;
+	else
+		ip->flags &= ~BTRFS_INODE_IMMUTABLE;
+	if (flags & FS_APPEND_FL)
+		ip->flags |= BTRFS_INODE_APPEND;
+	else
+		ip->flags &= ~BTRFS_INODE_APPEND;
+	if (flags & FS_NODUMP_FL)
+		ip->flags |= BTRFS_INODE_NODUMP;
+	else
+		ip->flags &= ~BTRFS_INODE_NODUMP;
+	if (flags & FS_NOATIME_FL)
+		ip->flags |= BTRFS_INODE_NOATIME;
+	else
+		ip->flags &= ~BTRFS_INODE_NOATIME;
+	if (flags & FS_DIRSYNC_FL)
+		ip->flags |= BTRFS_INODE_DIRSYNC;
+	else
+		ip->flags &= ~BTRFS_INODE_DIRSYNC;
+
+
+	trans = btrfs_join_transaction(root, 1);
+	BUG_ON(!trans);
+
+	ret = btrfs_update_inode(trans, root, inode);
+	BUG_ON(ret);
+
+	btrfs_update_iflags(inode);
+	inode->i_ctime = CURRENT_TIME;
+	btrfs_end_transaction(trans, root);
+
+	mnt_drop_write(file->f_path.mnt);
+ out_unlock:
+	mutex_unlock(&inode->i_mutex);
+	return 0;
+}
+
+static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+
+	return put_user(inode->i_generation, arg);
+}
 
 static noinline int create_subvol(struct btrfs_root *root,
 				  struct dentry *dentry,
@@ -1077,6 +1242,12 @@ long btrfs_ioctl(struct file *file, unsigned int
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		return btrfs_ioctl_getflags(file, argp);
+	case FS_IOC_SETFLAGS:
+		return btrfs_ioctl_setflags(file, argp);
+	case FS_IOC_GETVERSION:
+		return btrfs_ioctl_getversion(file, argp);
 	case BTRFS_IOC_SNAP_CREATE:
 		return btrfs_ioctl_snap_create(file, argp, 0);
 	case BTRFS_IOC_SUBVOL_CREATE:
-- 
cgit v0.10.2


From 163e783e6a8b1e8bcb4c9084d438091386b589df Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 19 Apr 2009 13:02:41 +0100
Subject: Btrfs: remove crc32c.h and use libcrc32c directly.

There's no need to preserve this abstraction; it used to let us use
hardware crc32c support directly, but libcrc32c is already doing that for us
through the crypto API -- so we're already using the Intel crc32c
acceleration where appropriate.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h
deleted file mode 100644
index 6e1b3de..0000000
--- a/fs/btrfs/crc32c.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_CRC32C__
-#define __BTRFS_CRC32C__
-#include <linux/crc32c.h>
-
-/*
- * this file used to do more for selecting the HW version of crc32c,
- * perhaps it will one day again soon.
- */
-#define btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
-#endif
-
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f4dfbb7..6c54c21 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,8 +26,8 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/crc32c.h>
 #include "compat.h"
-#include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -171,7 +171,7 @@ out:
 
 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
 {
-	return btrfs_crc32c(seed, data, len);
+	return crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, char *result)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3355d7e..33a65f2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,7 +23,6 @@
 #include <linux/rcupdate.h>
 #include "compat.h"
 #include "hash.h"
-#include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "print-tree.h"
@@ -625,11 +624,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
 	__le64 lenum;
 
 	lenum = cpu_to_le64(root_objectid);
-	high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
+	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
 	lenum = cpu_to_le64(owner);
-	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
 	lenum = cpu_to_le64(offset);
-	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
 
 	return ((u64)high_crc << 31) ^ (u64)low_crc;
 }
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 2a020b2..db2ff97 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,9 +19,9 @@
 #ifndef __HASH__
 #define __HASH__
 
-#include "crc32c.h"
+#include <linux/crc32c.h>
 static inline u64 btrfs_name_hash(const char *name, int len)
 {
-	return btrfs_crc32c((u32)~1, name, len);
+	return crc32c((u32)~1, name, len);
 }
 #endif
-- 
cgit v0.10.2


From 524724ed1f224875a117be593540591ed050c73d Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Wed, 10 Jun 2009 11:13:17 -0400
Subject: Btrfs: fdatasync should skip metadata writeout

In btrfs, fdatasync and fsync are identical, but
fdatasync should skip committing transaction when
inode->i_state is set just I_DIRTY_SYNC and this indicates
only atime or/and mtime updates.
Following patch improves fdatasync throughput.

--file-block-size=4K --file-total-size=16G --file-test-mode=rndwr
--file-fsync-mode=fdatasync run

Results:
-2.6.30-rc8
Test execution summary:
    total time:                          1980.6540s
    total number of events:              10001
    total time taken by event execution: 1192.9804
    per-request statistics:
         min:                            0.0000s
         avg:                            0.1193s
         max:                            15.3720s
         approx.  95 percentile:         0.7257s

Threads fairness:
    events (avg/stddev):           625.0625/151.32
    execution time (avg/stddev):   74.5613/9.46

-2.6.30-rc8-patched
Test execution summary:
    total time:                          1695.9118s
    total number of events:              10000
    total time taken by event execution: 871.3214
    per-request statistics:
         min:                            0.0000s
         avg:                            0.0871s
         max:                            10.4644s
         approx.  95 percentile:         0.4787s

Threads fairness:
    events (avg/stddev):           625.0000/131.86
    execution time (avg/stddev):   54.4576/8.98

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0726a73..126477e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1157,6 +1157,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 	root->log_batch++;
 
+	if (datasync && !(inode->i_state & I_DIRTY_PAGES))
+		goto out;
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
 	 */
-- 
cgit v0.10.2


From 7df336ec1266dccbb253bac52c529d3dcc7c22d0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 10 Jun 2009 11:36:43 -0400
Subject: Fix btrfs when ACLs are configured out

... otherwise generic_permission() will allow *anything* for all
files you don't own and that have some group permissions.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index cbba000..6039725 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -351,9 +351,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
 	return 0;
 }
 
-int btrfs_check_acl(struct inode *inode, int mask)
-{
-	return 0;
-}
-
 #endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4d6e0b6..03441a9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2301,7 +2301,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options);
 int btrfs_sync_fs(struct super_block *sb, int wait);
 
 /* acl.c */
+#ifdef CONFIG_FS_POSIX_ACL
 int btrfs_check_acl(struct inode *inode, int mask);
+#else
+#define btrfs_check_acl NULL
+#endif
 int btrfs_init_acl(struct inode *inode, struct inode *dir);
 int btrfs_acl_chmod(struct inode *inode);
 
-- 
cgit v0.10.2


From 58f7f68f228c3aba2ba4468d92e2cec35724ba2e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 10 Jun 2009 09:57:55 -0400
Subject: cifs: add addr= mount option alias for ip=

When you look in /proc/mounts, the address of the server gets displayed
as "addr=". That's really a better option to use anyway since it's more
generic. What if we eventually want to support non-IP transports? It
also makes CIFS option consistent with the NFS option of the same name.

Begin the migration to that option name by adding an alias for ip=
called addr=.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 10151f8..6298dc3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -958,7 +958,8 @@ cifs_parse_mount_options(char *options, const char *devname,
 				}
 				strcpy(vol->password, value);
 			}
-		} else if (strnicmp(data, "ip", 2) == 0) {
+		} else if (!strnicmp(data, "ip", 2) ||
+			   !strnicmp(data, "addr", 4)) {
 			if (!value || !*value) {
 				vol->UNCip = NULL;
 			} else if (strnlen(value, INET6_ADDRSTRLEN) <
-- 
cgit v0.10.2


From bc5c6c043d8381676339fb3da59cc4cc5921d368 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 10 Jun 2009 04:48:41 -0400
Subject: ftrace/documentation: fix typo in function grapher name

The function graph tracer is called just "function_graph" (no trailing
"_tracer" needed).

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
LKML-Reference: <1244623722-6325-1-git-send-email-vapier@gentoo.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index fd9a3e6..5ad2ded 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured.
 
 	Function call tracer to trace all kernel functions.
 
-  "function_graph_tracer"
+  "function_graph"
 
 	Similar to the function tracer except that the
 	function tracer probes the functions on their entry
-- 
cgit v0.10.2


From 1f3fff7bda95b75a6be5a02c2a6902573d8c18e6 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 10 Jun 2009 19:50:33 +0200
Subject: ALSA: use card device as parent for jack input-devices

This moves the jack devices from the PCI device into the ALSA card device, which
makes it easier for userspace to find all devices belonging to a specific card
while granting access to logged-in users.

Jack input devices from sound cards can now simply be matched with udev by doing:
  SUBSYSTEM="input", SUBSYSTEMS="sound", ...

 ls -l /sys/devices/pci0000:00/0000:00:1b.0/sound/card0
 controlC0
 device -> ../../../0000:00:1b.0
 id
 input10
 input11
 input8
 input9
 number
 pcmC0D0c
 pcmC0D0p
 pcmC0D1p
 power
 subsystem -> ../../../../../class/sound
 uevent

Cc: Lennart Poettering <lennart@0pointer.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/core/jack.c b/sound/core/jack.c
index d54d1a0..f705eec 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -63,7 +63,7 @@ static int snd_jack_dev_register(struct snd_device *device)
 
 	/* Default to the sound card device. */
 	if (!jack->input_dev->dev.parent)
-		jack->input_dev->dev.parent = card->dev;
+		jack->input_dev->dev.parent = snd_card_get_device_link(card);
 
 	err = input_register_device(jack->input_dev);
 	if (err == 0)
-- 
cgit v0.10.2


From a5c0f88678cd2fb1f649f7d366d756f2b2f97f0c Mon Sep 17 00:00:00 2001
From: Simos Xenitellis <simos.lists@googlemail.com>
Date: Wed, 10 Jun 2009 16:33:31 +0100
Subject: ALSA: hda - add quirk for STAC92xx (SigmaTel STAC9205)

A quirk is required for 8086:284b (rev 03) [Subsystem: 161f:2073].
The following has been tested with Alsa 1.0.20 (git master).

Background details can be found at
https://bugtrack.alsa-project.org/alsa-bug/view.php?id=4561
http://forum.ubuntu-gr.org/viewtopic.php?f=38&t=5290

Tested-by: Theodora Iliopoulou <th30dr@gmail.com>
Signed-off-by: Simos Xenitellis <simos@gnome.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 42f944b..93e47c9 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -1590,6 +1590,8 @@ static struct snd_pci_quirk stac9200_cfg_tbl[] = {
 	/* SigmaTel reference board */
 	SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668,
 		      "DFI LanParty", STAC_REF),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0xfb30,
+		      "SigmaTel",STAC_9205_REF),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DFI, 0x3101,
 		      "DFI LanParty", STAC_REF),
 	/* Dell laptops have BIOS problem */
@@ -5938,6 +5940,7 @@ static struct hda_codec_preset snd_hda_preset_sigmatel[] = {
  	{ .id = 0x83847661, .name = "CXD9872RD/K", .patch = patch_stac9872 },
  	{ .id = 0x83847662, .name = "STAC9872AK", .patch = patch_stac9872 },
  	{ .id = 0x83847664, .name = "CXD9872AKD", .patch = patch_stac9872 },
+	{ .id = 0x83847698, .name = "STAC9205", .patch = patch_stac9205 },
  	{ .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 },
  	{ .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 },
  	{ .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 },
-- 
cgit v0.10.2


From a41f20716975910d9beb90b7efc61107901492b8 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 10 Jun 2009 14:22:55 -0400
Subject: ext4: Avoid corrupting the uninitialized bit in the extent during
 truncate

The unitialized bit was not properly getting preserved in in an extent
which is partially truncated because the it was geting set to the
value of the first extent to be removed or truncated as part of the
truncate operation, and if there are multiple extents are getting
removed or modified as part of the truncate operation, it is only the
last extent which will might be partially truncated, and its
uninitalized bit is not necessarily the same as the first extent to be
truncated.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9c35a7b..2593f74 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2083,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	ex = EXT_LAST_EXTENT(eh);
 
 	ex_ee_block = le32_to_cpu(ex->ee_block);
-	if (ext4_ext_is_uninitialized(ex))
-		uninitialized = 1;
 	ex_ee_len = ext4_ext_get_actual_len(ex);
 
 	while (ex >= EXT_FIRST_EXTENT(eh) &&
 			ex_ee_block + ex_ee_len > start) {
+
+		if (ext4_ext_is_uninitialized(ex))
+			uninitialized = 1;
+		else
+			uninitialized = 0;
+
 		ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
 		path[depth].p_ext = ex;
 
-- 
cgit v0.10.2


From 6ff9a64d2aaa6eae396adc95e9c91c0cbfa6dbe4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Jun 2009 14:28:34 -0400
Subject: tracing: do not translate event helper macros in print format

By moving the macro that creates the print format code above the
defining of the event macro helpers (__get_str, __print_symbolic,
and __get_dynamic_array), we get a little cleaner print format.

Instead of:

  (char *)((void *)REC + REC->__data_loc_name)

we get:

   __get_str(name)

Instead of:

   ({ static const struct trace_print_flags symbols[] = { { HI_SOFTIRQ, "HI" }, {

we get:

   __print_symbolic(REC->vec, { HI_SOFTIRQ, "HI" }, {

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 40ede4d..1867553 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -81,6 +81,87 @@
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *	struct ftrace_raw_##call field;
+ *	int ret;
+ *
+ *	ret = trace_seq_printf(s, #type " " #item ";"
+ *			       " offset:%u; size:%u;\n",
+ *			       offsetof(struct ftrace_raw_##call, item),
+ *			       sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)					\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __array
+#define __array(type, item, len)						\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
+			       "offset:%u;\tsize:%u;\n",		       \
+			       (unsigned int)offsetof(typeof(field),	       \
+					__data_loc_##item),		       \
+			       (unsigned int)sizeof(field.__data_loc_##item)); \
+	if (!ret)							       \
+		return 0;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef __entry
+#define __entry REC
+
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct ftrace_raw_##call field __attribute__((unused));		\
+	int ret = 0;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
  * Stage 3 of the trace events.
  *
  * Override the macros in <trace/trace_events.h> to include the following:
@@ -179,83 +260,6 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item, len)				       \
-	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
-			       "offset:%u;\tsize:%u;\n",		       \
-			       (unsigned int)offsetof(typeof(field),	       \
-					__data_loc_##item),		       \
-			       (unsigned int)sizeof(field.__data_loc_##item)); \
-	if (!ret)							       \
-		return 0;
-
-#undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-static int								\
-ftrace_format_##call(struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field __attribute__((unused));		\
-	int ret = 0;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-
 #undef __field
 #define __field(type, item)						\
 	ret = trace_define_field(event_call, #type, #item,		\
-- 
cgit v0.10.2


From 61b6bc525a34931bb73e4c95bfe009cd9572a288 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 10 Jun 2009 10:04:58 -0400
Subject: cifs: remove never-used in6_addr option

This option was never used to my knowledge. Remove it before someone
does...

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6298dc3..97f4311 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -62,7 +62,6 @@ struct smb_vol {
 	char *domainname;
 	char *UNC;
 	char *UNCip;
-	char *in6_addr;   /* ipv6 address as human readable form of in6_addr */
 	char *iocharset;  /* local code page for mapping to and from Unicode */
 	char source_rfc1001_name[16]; /* netbios name of client */
 	char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
@@ -1320,16 +1319,6 @@ cifs_parse_mount_options(char *options, const char *devname,
 			vol->direct_io = 1;
 		} else if (strnicmp(data, "forcedirectio", 13) == 0) {
 			vol->direct_io = 1;
-		} else if (strnicmp(data, "in6_addr", 8) == 0) {
-			if (!value || !*value) {
-				vol->in6_addr = NULL;
-			} else if (strnlen(value, 49) == 48) {
-				vol->in6_addr = value;
-			} else {
-				printk(KERN_WARNING "CIFS: ip v6 address not "
-						    "48 characters long\n");
-				return 1;
-			}
 		} else if (strnicmp(data, "noac", 4) == 0) {
 			printk(KERN_WARNING "CIFS: Mount option noac not "
 				"supported. Instead set "
-- 
cgit v0.10.2


From e5e9a5206a171b2c467e494aebcdcf70c47289bc Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 10 Jun 2009 15:17:02 -0400
Subject: Btrfs: avoid races between super writeout and device list updates

On multi-device filesystems, btrfs writes supers to all of the devices
before considering a sync complete.  There wasn't any additional
locking between super writeout and the device list management code
because device management was done inside a transaction and
super writeout only happened  with no transation writers running.

With the btrfs fsync log and other async transaction updates, this
has been racey for some time.  This adds a mutex to protect
the device list.  The existing volume mutex could not be reused due to
transaction lock ordering requirements.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6c54c21..b7ddc77 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2111,7 +2111,7 @@ static int write_dev_supers(struct btrfs_device *device,
 
 int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
-	struct list_head *head = &root->fs_info->fs_devices->devices;
+	struct list_head *head;
 	struct btrfs_device *dev;
 	struct btrfs_super_block *sb;
 	struct btrfs_dev_item *dev_item;
@@ -2126,6 +2126,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
 
 	sb = &root->fs_info->super_for_commit;
 	dev_item = &sb->dev_item;
+
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+	head = &root->fs_info->fs_devices->devices;
 	list_for_each_entry(dev, head, dev_list) {
 		if (!dev->bdev) {
 			total_errors++;
@@ -2169,6 +2172,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
 		if (ret)
 			total_errors++;
 	}
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 	if (total_errors > max_errors) {
 		printk(KERN_ERR "btrfs: %d errors while writing supers\n",
 		       total_errors);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3f4a593..3ab80e9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -377,6 +377,7 @@ static noinline int device_list_add(const char *path,
 		memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
 		fs_devices->latest_devid = devid;
 		fs_devices->latest_trans = found_transid;
+		mutex_init(&fs_devices->device_list_mutex);
 		device = NULL;
 	} else {
 		device = __find_device(&fs_devices->devices, devid,
@@ -403,7 +404,11 @@ static noinline int device_list_add(const char *path,
 			return -ENOMEM;
 		}
 		INIT_LIST_HEAD(&device->dev_alloc_list);
+
+		mutex_lock(&fs_devices->device_list_mutex);
 		list_add(&device->dev_list, &fs_devices->devices);
+		mutex_unlock(&fs_devices->device_list_mutex);
+
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
 	}
@@ -429,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 	INIT_LIST_HEAD(&fs_devices->devices);
 	INIT_LIST_HEAD(&fs_devices->alloc_list);
 	INIT_LIST_HEAD(&fs_devices->list);
+	mutex_init(&fs_devices->device_list_mutex);
 	fs_devices->latest_devid = orig->latest_devid;
 	fs_devices->latest_trans = orig->latest_trans;
 	memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 
+	mutex_lock(&orig->device_list_mutex);
 	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
 		device = kzalloc(sizeof(*device), GFP_NOFS);
 		if (!device)
@@ -454,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
 	}
+	mutex_unlock(&orig->device_list_mutex);
 	return fs_devices;
 error:
+	mutex_unlock(&orig->device_list_mutex);
 	free_fs_devices(fs_devices);
 	return ERR_PTR(-ENOMEM);
 }
@@ -466,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 
 	mutex_lock(&uuid_mutex);
 again:
+	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
 		if (device->in_fs_metadata)
 			continue;
@@ -485,6 +495,7 @@ again:
 		kfree(device->name);
 		kfree(device);
 	}
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	if (fs_devices->seed) {
 		fs_devices = fs_devices->seed;
@@ -1135,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
 		device = NULL;
 		devices = &root->fs_info->fs_devices->devices;
+		mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 		list_for_each_entry(tmp, devices, dev_list) {
 			if (tmp->in_fs_metadata && !tmp->bdev) {
 				device = tmp;
 				break;
 			}
 		}
+		mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 		bdev = NULL;
 		bh = NULL;
 		disk_super = NULL;
@@ -1195,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 		goto error_brelse;
 
 	device->in_fs_metadata = 0;
+
+	/*
+	 * the device list mutex makes sure that we don't change
+	 * the device list while someone else is writing out all
+	 * the device supers.
+	 */
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	list_del_init(&device->dev_list);
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
 	device->fs_devices->num_devices--;
 
 	next_device = list_entry(root->fs_info->fs_devices->devices.next,
@@ -1289,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
 	seed_devices->opened = 1;
 	INIT_LIST_HEAD(&seed_devices->devices);
 	INIT_LIST_HEAD(&seed_devices->alloc_list);
+	mutex_init(&seed_devices->device_list_mutex);
 	list_splice_init(&fs_devices->devices, &seed_devices->devices);
 	list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
 	list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1414,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	mutex_lock(&root->fs_info->volume_mutex);
 
 	devices = &root->fs_info->fs_devices->devices;
+	/*
+	 * we have the volume lock, so we don't need the extra
+	 * device list mutex while reading the list here.
+	 */
 	list_for_each_entry(device, devices, dev_list) {
 		if (device->bdev == bdev) {
 			ret = -EEXIST;
@@ -1468,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	}
 
 	device->fs_devices = root->fs_info->fs_devices;
+
+	/*
+	 * we don't want write_supers to jump in here with our device
+	 * half setup
+	 */
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
 	list_add(&device->dev_alloc_list,
 		 &root->fs_info->fs_devices->alloc_list);
@@ -1486,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
 	btrfs_set_super_num_devices(&root->fs_info->super_copy,
 				    total_bytes + 1);
+	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
 	if (seeding_dev) {
 		ret = init_first_rw_device(trans, root, device);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3c1f731..5139a83 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -96,7 +96,12 @@ struct btrfs_fs_devices {
 	u64 rw_devices;
 	u64 total_rw_bytes;
 	struct block_device *latest_bdev;
-	/* all of the devices in the FS */
+
+	/* all of the devices in the FS, protected by a mutex
+	 * so we can safely walk it to write out the supers without
+	 * worrying about add/remove by the multi-device code
+	 */
+	struct mutex device_list_mutex;
 	struct list_head devices;
 
 	/* devices not currently being allocated */
-- 
cgit v0.10.2


From 4eedeb75e7f15ffdb12d1ad559b565e7505bdbaf Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Wed, 10 Jun 2009 15:28:55 -0400
Subject: Btrfs: pin buffers during write_dev_supers

write_dev_supers is called in sequence.  First is it called with wait == 0,
which starts IO on all of the super blocks for a given device.  Then it is
called with wait == 1 to make sure they all reach the disk.

It doesn't currently pin the buffers between the two calls, and it also
assumes the buffers won't go away between the two calls, leading to
an oops if the VM manages to free the buffers in the middle of the sync.

This fixes that assumption and updates the code to return an error if things
are not up to date when the wait == 1 run is done.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b7ddc77..0d50d49 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2020,6 +2020,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
 	return latest;
 }
 
+/*
+ * this should be called twice, once with wait == 0 and
+ * once with wait == 1.  When wait == 0 is done, all the buffer heads
+ * we write are pinned.
+ *
+ * They are released when wait == 1 is done.
+ * max_mirrors must be the same for both runs, and it indicates how
+ * many supers on this one device should be written.
+ *
+ * max_mirrors == 0 means to write them all.
+ */
 static int write_dev_supers(struct btrfs_device *device,
 			    struct btrfs_super_block *sb,
 			    int do_barriers, int wait, int max_mirrors)
@@ -2055,12 +2066,16 @@ static int write_dev_supers(struct btrfs_device *device,
 			bh = __find_get_block(device->bdev, bytenr / 4096,
 					      BTRFS_SUPER_INFO_SIZE);
 			BUG_ON(!bh);
-			brelse(bh);
 			wait_on_buffer(bh);
-			if (buffer_uptodate(bh)) {
-				brelse(bh);
-				continue;
-			}
+			if (!buffer_uptodate(bh))
+				errors++;
+
+			/* drop our reference */
+			brelse(bh);
+
+			/* drop the reference from the wait == 0 run */
+			brelse(bh);
+			continue;
 		} else {
 			btrfs_set_super_bytenr(sb, bytenr);
 
@@ -2071,12 +2086,18 @@ static int write_dev_supers(struct btrfs_device *device,
 					      BTRFS_CSUM_SIZE);
 			btrfs_csum_final(crc, sb->csum);
 
+			/*
+			 * one reference for us, and we leave it for the
+			 * caller
+			 */
 			bh = __getblk(device->bdev, bytenr / 4096,
 				      BTRFS_SUPER_INFO_SIZE);
 			memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
 
-			set_buffer_uptodate(bh);
+			/* one reference for submit_bh */
 			get_bh(bh);
+
+			set_buffer_uptodate(bh);
 			lock_buffer(bh);
 			bh->b_end_io = btrfs_end_buffer_write_sync;
 		}
@@ -2088,6 +2109,7 @@ static int write_dev_supers(struct btrfs_device *device,
 				       device->name);
 				set_buffer_uptodate(bh);
 				device->barriers = 0;
+				/* one reference for submit_bh */
 				get_bh(bh);
 				lock_buffer(bh);
 				ret = submit_bh(WRITE_SYNC, bh);
@@ -2096,15 +2118,8 @@ static int write_dev_supers(struct btrfs_device *device,
 			ret = submit_bh(WRITE_SYNC, bh);
 		}
 
-		if (!ret && wait) {
-			wait_on_buffer(bh);
-			if (!buffer_uptodate(bh))
-				errors++;
-		} else if (ret) {
+		if (ret)
 			errors++;
-		}
-		if (wait)
-			brelse(bh);
 	}
 	return errors < i ? 0 : -1;
 }
-- 
cgit v0.10.2


From d9c7d394a8ebacb60097b192939ae9f15235225e Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@novell.com>
Date: Wed, 10 Jun 2009 12:57:06 -0700
Subject: block: prevent possible io_context->refcount overflow

Currently io_context has an atomic_t(32-bit) as refcount.  In the case of
cfq, for each device against whcih a task does I/O, a reference to the
io_context would be taken.  And when there are multiple process sharing
io_contexts(CLONE_IO) would also have a reference to the same io_context.

Theoretically the possible maximum number of processes sharing the same
io_context + the number of disks/cfq_data referring to the same io_context
can overflow the 32-bit counter on a very high-end machine.

Even though it is an improbable case, let us make it atomic_long_t.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 012f065..d4ed600 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -35,9 +35,9 @@ int put_io_context(struct io_context *ioc)
 	if (ioc == NULL)
 		return 1;
 
-	BUG_ON(atomic_read(&ioc->refcount) == 0);
+	BUG_ON(atomic_long_read(&ioc->refcount) == 0);
 
-	if (atomic_dec_and_test(&ioc->refcount)) {
+	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
@@ -90,7 +90,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 
 	ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
 	if (ret) {
-		atomic_set(&ret->refcount, 1);
+		atomic_long_set(&ret->refcount, 1);
 		atomic_set(&ret->nr_tasks, 1);
 		spin_lock_init(&ret->lock);
 		ret->ioprio_changed = 0;
@@ -151,7 +151,7 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node)
 		ret = current_io_context(gfp_flags, node);
 		if (unlikely(!ret))
 			break;
-	} while (!atomic_inc_not_zero(&ret->refcount));
+	} while (!atomic_long_inc_not_zero(&ret->refcount));
 
 	return ret;
 }
@@ -163,8 +163,8 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc)
 	struct io_context *dst = *pdst;
 
 	if (src) {
-		BUG_ON(atomic_read(&src->refcount) == 0);
-		atomic_inc(&src->refcount);
+		BUG_ON(atomic_long_read(&src->refcount) == 0);
+		atomic_long_inc(&src->refcount);
 		put_io_context(dst);
 		*pdst = src;
 	}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 99ac430..ef2f72d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1282,7 +1282,7 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	if (!cfqd->active_cic) {
 		struct cfq_io_context *cic = RQ_CIC(rq);
 
-		atomic_inc(&cic->ioc->refcount);
+		atomic_long_inc(&cic->ioc->refcount);
 		cfqd->active_cic = cic;
 	}
 }
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 08b987b..dd05434 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -64,7 +64,7 @@ struct cfq_io_context {
  * and kmalloc'ed. These could be shared between processes.
  */
 struct io_context {
-	atomic_t refcount;
+	atomic_long_t refcount;
 	atomic_t nr_tasks;
 
 	/* all the fields below are protected by this lock */
@@ -91,8 +91,8 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 	 * if ref count is zero, don't allow sharing (ioc is going away, it's
 	 * a race).
 	 */
-	if (ioc && atomic_inc_not_zero(&ioc->refcount)) {
-		atomic_inc(&ioc->nr_tasks);
+	if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) {
+		atomic_long_inc(&ioc->refcount);
 		return ioc;
 	}
 
-- 
cgit v0.10.2


From 0e0c62123b517d2b3c26922342c0cc5bb63a93f8 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Wed, 10 Jun 2009 12:57:07 -0700
Subject: fs/bio.c: add missing __user annotation

As reported by sparse:

fs/bio.c:720:13: warning: incorrect type in assignment (different address spaces)
fs/bio.c:720:13:    expected char *iov_addr
fs/bio.c:720:13:    got void [noderef] <asn:1>*
fs/bio.c:724:36: warning: incorrect type in argument 2 (different address spaces)
fs/bio.c:724:36:    expected void const [noderef] <asn:1>*from
fs/bio.c:724:36:    got char *iov_addr

Signed-off-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/fs/bio.c b/fs/bio.c
index ab423a1..533266a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -722,7 +722,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
 
 		while (bv_len && iov_idx < iov_count) {
 			unsigned int bytes;
-			char *iov_addr;
+			char __user *iov_addr;
 
 			bytes = min_t(unsigned int,
 				      iov[iov_idx].iov_len - iov_off, bv_len);
-- 
cgit v0.10.2


From 1adbee50fd6fce5af4feb34d2db93cfe4d2066a4 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 10 Jun 2009 12:57:08 -0700
Subject: ramdisk: remove long-deprecated "ramdisk=" boot-time parameter

The "ramdisk" parameter was removed from the defunct rd.c file quite some
time ago, in favour of the more specific "ramdisk_size" parameter so, for
consistency, the same should be done here.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 5f7e64b..4bf8705 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -407,12 +407,7 @@ static int __init ramdisk_size(char *str)
 	rd_size = simple_strtol(str, NULL, 0);
 	return 1;
 }
-static int __init ramdisk_size2(char *str)
-{
-	return ramdisk_size(str);
-}
-__setup("ramdisk=", ramdisk_size);
-__setup("ramdisk_size=", ramdisk_size2);
+__setup("ramdisk_size=", ramdisk_size);
 #endif
 
 /*
-- 
cgit v0.10.2


From 5e50b9ef975219304cc91d601530994861585bfe Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Wed, 10 Jun 2009 12:57:09 -0700
Subject: floppy: fix hibernation

Based on Ingo Molnar's patch from 2006, this makes the floppy work after
resume from hibernation, at least on my machine.

This fix resets the floppy controller on resume.  It was experimentally
determined to bring the controller back to life - we don't really know why
it works.

floppy_init() does the same thing at boot/modprobe time.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 90877fe..862b40c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4148,6 +4148,24 @@ static void floppy_device_release(struct device *dev)
 {
 }
 
+static int floppy_resume(struct platform_device *dev)
+{
+	int fdc;
+
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (FDCS->address != -1)
+			user_reset_fdc(-1, FD_RESET_ALWAYS, 0);
+
+	return 0;
+}
+
+static struct platform_driver floppy_driver = {
+	.resume = floppy_resume,
+	.driver = {
+		.name = "floppy",
+	},
+};
+
 static struct platform_device floppy_device[N_DRIVE];
 
 static struct kobject *floppy_find(dev_t dev, int *part, void *data)
@@ -4196,10 +4214,14 @@ static int __init floppy_init(void)
 	if (err)
 		goto out_put_disk;
 
+	err = platform_driver_register(&floppy_driver);
+	if (err)
+		goto out_unreg_blkdev;
+
 	floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
 	if (!floppy_queue) {
 		err = -ENOMEM;
-		goto out_unreg_blkdev;
+		goto out_unreg_driver;
 	}
 	blk_queue_max_sectors(floppy_queue, 64);
 
@@ -4346,6 +4368,8 @@ out_flush_work:
 out_unreg_region:
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	blk_cleanup_queue(floppy_queue);
+out_unreg_driver:
+	platform_driver_unregister(&floppy_driver);
 out_unreg_blkdev:
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 out_put_disk:
@@ -4566,6 +4590,7 @@ static void __exit floppy_module_exit(void)
 
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
+	platform_driver_unregister(&floppy_driver);
 
 	for (drive = 0; drive < N_DRIVE; drive++) {
 		del_timer_sync(&motor_off_timer[drive]);
-- 
cgit v0.10.2


From b43d65f7e818485664037a46367cfb15af05bd8c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 9 Jun 2009 08:11:42 +0100
Subject: [ARM] 5546/1: ARM PL022 SSP/SPI driver v3

This adds a driver for the ARM PL022 PrimeCell SSP/SPI
driver found in the U300 platforms as well as in some
ARM reference hardware, and in a modified version on the
Nomadik board.

Reviewed-by: Alessandro Rubini <rubini-list@gnudd.com>
Reviewed-by: Russell King <linux@arm.linux.org.uk>
Reviewed-by: Baruch Siach <baruch@tkos.co.il>

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 7c61251..8e7c17e 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -171,6 +171,15 @@ config SPI_ORION
 	help
 	  This enables using the SPI master controller on the Orion chips.
 
+config SPI_PL022
+	tristate "ARM AMBA PL022 SSP controller (EXPERIMENTAL)"
+	depends on ARM_AMBA && EXPERIMENTAL
+	default y if MACH_U300
+	help
+	  This selects the ARM(R) AMBA(R) PrimeCell PL022 SSP
+	  controller. If you have an embedded system with an AMBA(R)
+	  bus and a PL022 controller, say Y or M here.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 5d04519..ecfadb1 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
 obj-$(CONFIG_SPI_OMAP24XX)		+= omap2_mcspi.o
 obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
+obj-$(CONFIG_SPI_PL022)			+= amba-pl022.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
 obj-$(CONFIG_SPI_MPC83xx)		+= spi_mpc83xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
new file mode 100644
index 0000000..da76797
--- /dev/null
+++ b/drivers/spi/amba-pl022.c
@@ -0,0 +1,1866 @@
+/*
+ * drivers/spi/amba-pl022.c
+ *
+ * A driver for the ARM PL022 PrimeCell SSP/SPI bus master.
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * Copyright (C) 2006 STMicroelectronics Pvt. Ltd.
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * Initial version inspired by:
+ *	linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c
+ * Initial adoption to PL022 by:
+ *      Sachin Verma <sachin.verma@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * TODO:
+ * - add timeout on polled transfers
+ * - add generic DMA framework support
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/pl022.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+
+/*
+ * This macro is used to define some register default values.
+ * reg is masked with mask, the OR:ed with an (again masked)
+ * val shifted sb steps to the left.
+ */
+#define SSP_WRITE_BITS(reg, val, mask, sb) \
+ ((reg) = (((reg) & ~(mask)) | (((val)<<(sb)) & (mask))))
+
+/*
+ * This macro is also used to define some default values.
+ * It will just shift val by sb steps to the left and mask
+ * the result with mask.
+ */
+#define GEN_MASK_BITS(val, mask, sb) \
+ (((val)<<(sb)) & (mask))
+
+#define DRIVE_TX		0
+#define DO_NOT_DRIVE_TX		1
+
+#define DO_NOT_QUEUE_DMA	0
+#define QUEUE_DMA		1
+
+#define RX_TRANSFER		1
+#define TX_TRANSFER		2
+
+/*
+ * Macros to access SSP Registers with their offsets
+ */
+#define SSP_CR0(r)	(r + 0x000)
+#define SSP_CR1(r)	(r + 0x004)
+#define SSP_DR(r)	(r + 0x008)
+#define SSP_SR(r)	(r + 0x00C)
+#define SSP_CPSR(r)	(r + 0x010)
+#define SSP_IMSC(r)	(r + 0x014)
+#define SSP_RIS(r)	(r + 0x018)
+#define SSP_MIS(r)	(r + 0x01C)
+#define SSP_ICR(r)	(r + 0x020)
+#define SSP_DMACR(r)	(r + 0x024)
+#define SSP_ITCR(r)	(r + 0x080)
+#define SSP_ITIP(r)	(r + 0x084)
+#define SSP_ITOP(r)	(r + 0x088)
+#define SSP_TDR(r)	(r + 0x08C)
+
+#define SSP_PID0(r)	(r + 0xFE0)
+#define SSP_PID1(r)	(r + 0xFE4)
+#define SSP_PID2(r)	(r + 0xFE8)
+#define SSP_PID3(r)	(r + 0xFEC)
+
+#define SSP_CID0(r)	(r + 0xFF0)
+#define SSP_CID1(r)	(r + 0xFF4)
+#define SSP_CID2(r)	(r + 0xFF8)
+#define SSP_CID3(r)	(r + 0xFFC)
+
+/*
+ * SSP Control Register 0  - SSP_CR0
+ */
+#define SSP_CR0_MASK_DSS	(0x1FUL << 0)
+#define SSP_CR0_MASK_HALFDUP	(0x1UL << 5)
+#define SSP_CR0_MASK_SPO	(0x1UL << 6)
+#define SSP_CR0_MASK_SPH	(0x1UL << 7)
+#define SSP_CR0_MASK_SCR	(0xFFUL << 8)
+#define SSP_CR0_MASK_CSS	(0x1FUL << 16)
+#define SSP_CR0_MASK_FRF	(0x3UL << 21)
+
+/*
+ * SSP Control Register 0  - SSP_CR1
+ */
+#define SSP_CR1_MASK_LBM	(0x1UL << 0)
+#define SSP_CR1_MASK_SSE	(0x1UL << 1)
+#define SSP_CR1_MASK_MS		(0x1UL << 2)
+#define SSP_CR1_MASK_SOD	(0x1UL << 3)
+#define SSP_CR1_MASK_RENDN	(0x1UL << 4)
+#define SSP_CR1_MASK_TENDN	(0x1UL << 5)
+#define SSP_CR1_MASK_MWAIT	(0x1UL << 6)
+#define SSP_CR1_MASK_RXIFLSEL	(0x7UL << 7)
+#define SSP_CR1_MASK_TXIFLSEL	(0x7UL << 10)
+
+/*
+ * SSP Data Register - SSP_DR
+ */
+#define SSP_DR_MASK_DATA	0xFFFFFFFF
+
+/*
+ * SSP Status Register - SSP_SR
+ */
+#define SSP_SR_MASK_TFE		(0x1UL << 0) /* Transmit FIFO empty */
+#define SSP_SR_MASK_TNF		(0x1UL << 1) /* Transmit FIFO not full */
+#define SSP_SR_MASK_RNE		(0x1UL << 2) /* Receive FIFO not empty */
+#define SSP_SR_MASK_RFF 	(0x1UL << 3) /* Receive FIFO full */
+#define SSP_SR_MASK_BSY		(0x1UL << 4) /* Busy Flag */
+
+/*
+ * SSP Clock Prescale Register  - SSP_CPSR
+ */
+#define SSP_CPSR_MASK_CPSDVSR	(0xFFUL << 0)
+
+/*
+ * SSP Interrupt Mask Set/Clear Register - SSP_IMSC
+ */
+#define SSP_IMSC_MASK_RORIM (0x1UL << 0) /* Receive Overrun Interrupt mask */
+#define SSP_IMSC_MASK_RTIM  (0x1UL << 1) /* Receive timeout Interrupt mask */
+#define SSP_IMSC_MASK_RXIM  (0x1UL << 2) /* Receive FIFO Interrupt mask */
+#define SSP_IMSC_MASK_TXIM  (0x1UL << 3) /* Transmit FIFO Interrupt mask */
+
+/*
+ * SSP Raw Interrupt Status Register - SSP_RIS
+ */
+/* Receive Overrun Raw Interrupt status */
+#define SSP_RIS_MASK_RORRIS		(0x1UL << 0)
+/* Receive Timeout Raw Interrupt status */
+#define SSP_RIS_MASK_RTRIS		(0x1UL << 1)
+/* Receive FIFO Raw Interrupt status */
+#define SSP_RIS_MASK_RXRIS		(0x1UL << 2)
+/* Transmit FIFO Raw Interrupt status */
+#define SSP_RIS_MASK_TXRIS		(0x1UL << 3)
+
+/*
+ * SSP Masked Interrupt Status Register - SSP_MIS
+ */
+/* Receive Overrun Masked Interrupt status */
+#define SSP_MIS_MASK_RORMIS		(0x1UL << 0)
+/* Receive Timeout Masked Interrupt status */
+#define SSP_MIS_MASK_RTMIS		(0x1UL << 1)
+/* Receive FIFO Masked Interrupt status */
+#define SSP_MIS_MASK_RXMIS		(0x1UL << 2)
+/* Transmit FIFO Masked Interrupt status */
+#define SSP_MIS_MASK_TXMIS		(0x1UL << 3)
+
+/*
+ * SSP Interrupt Clear Register - SSP_ICR
+ */
+/* Receive Overrun Raw Clear Interrupt bit */
+#define SSP_ICR_MASK_RORIC		(0x1UL << 0)
+/* Receive Timeout Clear Interrupt bit */
+#define SSP_ICR_MASK_RTIC		(0x1UL << 1)
+
+/*
+ * SSP DMA Control Register - SSP_DMACR
+ */
+/* Receive DMA Enable bit */
+#define SSP_DMACR_MASK_RXDMAE		(0x1UL << 0)
+/* Transmit DMA Enable bit */
+#define SSP_DMACR_MASK_TXDMAE		(0x1UL << 1)
+
+/*
+ * SSP Integration Test control Register - SSP_ITCR
+ */
+#define SSP_ITCR_MASK_ITEN		(0x1UL << 0)
+#define SSP_ITCR_MASK_TESTFIFO		(0x1UL << 1)
+
+/*
+ * SSP Integration Test Input Register - SSP_ITIP
+ */
+#define ITIP_MASK_SSPRXD		 (0x1UL << 0)
+#define ITIP_MASK_SSPFSSIN		 (0x1UL << 1)
+#define ITIP_MASK_SSPCLKIN		 (0x1UL << 2)
+#define ITIP_MASK_RXDMAC		 (0x1UL << 3)
+#define ITIP_MASK_TXDMAC		 (0x1UL << 4)
+#define ITIP_MASK_SSPTXDIN		 (0x1UL << 5)
+
+/*
+ * SSP Integration Test output Register - SSP_ITOP
+ */
+#define ITOP_MASK_SSPTXD		 (0x1UL << 0)
+#define ITOP_MASK_SSPFSSOUT		 (0x1UL << 1)
+#define ITOP_MASK_SSPCLKOUT		 (0x1UL << 2)
+#define ITOP_MASK_SSPOEn		 (0x1UL << 3)
+#define ITOP_MASK_SSPCTLOEn		 (0x1UL << 4)
+#define ITOP_MASK_RORINTR		 (0x1UL << 5)
+#define ITOP_MASK_RTINTR		 (0x1UL << 6)
+#define ITOP_MASK_RXINTR		 (0x1UL << 7)
+#define ITOP_MASK_TXINTR		 (0x1UL << 8)
+#define ITOP_MASK_INTR			 (0x1UL << 9)
+#define ITOP_MASK_RXDMABREQ		 (0x1UL << 10)
+#define ITOP_MASK_RXDMASREQ		 (0x1UL << 11)
+#define ITOP_MASK_TXDMABREQ		 (0x1UL << 12)
+#define ITOP_MASK_TXDMASREQ		 (0x1UL << 13)
+
+/*
+ * SSP Test Data Register - SSP_TDR
+ */
+#define TDR_MASK_TESTDATA 		(0xFFFFFFFF)
+
+/*
+ * Message State
+ * we use the spi_message.state (void *) pointer to
+ * hold a single state value, that's why all this
+ * (void *) casting is done here.
+ */
+#define STATE_START                     ((void *) 0)
+#define STATE_RUNNING                   ((void *) 1)
+#define STATE_DONE                      ((void *) 2)
+#define STATE_ERROR                     ((void *) -1)
+
+/*
+ * Queue State
+ */
+#define QUEUE_RUNNING                   (0)
+#define QUEUE_STOPPED                   (1)
+/*
+ * SSP State - Whether Enabled or Disabled
+ */
+#define SSP_DISABLED 			(0)
+#define SSP_ENABLED 			(1)
+
+/*
+ * SSP DMA State - Whether DMA Enabled or Disabled
+ */
+#define SSP_DMA_DISABLED 		(0)
+#define SSP_DMA_ENABLED 		(1)
+
+/*
+ * SSP Clock Defaults
+ */
+#define NMDK_SSP_DEFAULT_CLKRATE 0x2
+#define NMDK_SSP_DEFAULT_PRESCALE 0x40
+
+/*
+ * SSP Clock Parameter ranges
+ */
+#define CPSDVR_MIN 0x02
+#define CPSDVR_MAX 0xFE
+#define SCR_MIN 0x00
+#define SCR_MAX 0xFF
+
+/*
+ * SSP Interrupt related Macros
+ */
+#define DEFAULT_SSP_REG_IMSC  0x0UL
+#define DISABLE_ALL_INTERRUPTS DEFAULT_SSP_REG_IMSC
+#define ENABLE_ALL_INTERRUPTS (~DEFAULT_SSP_REG_IMSC)
+
+#define CLEAR_ALL_INTERRUPTS  0x3
+
+
+/*
+ * The type of reading going on on this chip
+ */
+enum ssp_reading {
+	READING_NULL,
+	READING_U8,
+	READING_U16,
+	READING_U32
+};
+
+/**
+ * The type of writing going on on this chip
+ */
+enum ssp_writing {
+	WRITING_NULL,
+	WRITING_U8,
+	WRITING_U16,
+	WRITING_U32
+};
+
+/**
+ * struct vendor_data - vendor-specific config parameters
+ * for PL022 derivates
+ * @fifodepth: depth of FIFOs (both)
+ * @max_bpw: maximum number of bits per word
+ * @unidir: supports unidirection transfers
+ */
+struct vendor_data {
+	int fifodepth;
+	int max_bpw;
+	bool unidir;
+};
+
+/**
+ * struct pl022 - This is the private SSP driver data structure
+ * @adev: AMBA device model hookup
+ * @phybase: The physical memory where the SSP device resides
+ * @virtbase: The virtual memory where the SSP is mapped
+ * @master: SPI framework hookup
+ * @master_info: controller-specific data from machine setup
+ * @regs: SSP controller register's virtual address
+ * @pump_messages: Work struct for scheduling work to the workqueue
+ * @lock: spinlock to syncronise access to driver data
+ * @workqueue: a workqueue on which any spi_message request is queued
+ * @busy: workqueue is busy
+ * @run: workqueue is running
+ * @pump_transfers: Tasklet used in Interrupt Transfer mode
+ * @cur_msg: Pointer to current spi_message being processed
+ * @cur_transfer: Pointer to current spi_transfer
+ * @cur_chip: pointer to current clients chip(assigned from controller_state)
+ * @tx: current position in TX buffer to be read
+ * @tx_end: end position in TX buffer to be read
+ * @rx: current position in RX buffer to be written
+ * @rx_end: end position in RX buffer to be written
+ * @readingtype: the type of read currently going on
+ * @writingtype: the type or write currently going on
+ */
+struct pl022 {
+	struct amba_device		*adev;
+	struct vendor_data		*vendor;
+	resource_size_t			phybase;
+	void __iomem			*virtbase;
+	struct clk			*clk;
+	struct spi_master		*master;
+	struct pl022_ssp_controller	*master_info;
+	/* Driver message queue */
+	struct workqueue_struct		*workqueue;
+	struct work_struct		pump_messages;
+	spinlock_t			queue_lock;
+	struct list_head		queue;
+	int				busy;
+	int				run;
+	/* Message transfer pump */
+	struct tasklet_struct		pump_transfers;
+	struct spi_message		*cur_msg;
+	struct spi_transfer		*cur_transfer;
+	struct chip_data		*cur_chip;
+	void				*tx;
+	void				*tx_end;
+	void				*rx;
+	void				*rx_end;
+	enum ssp_reading		read;
+	enum ssp_writing		write;
+};
+
+/**
+ * struct chip_data - To maintain runtime state of SSP for each client chip
+ * @cr0: Value of control register CR0 of SSP
+ * @cr1: Value of control register CR1 of SSP
+ * @dmacr: Value of DMA control Register of SSP
+ * @cpsr: Value of Clock prescale register
+ * @n_bytes: how many bytes(power of 2) reqd for a given data width of client
+ * @enable_dma: Whether to enable DMA or not
+ * @write: function ptr to be used to write when doing xfer for this chip
+ * @read: function ptr to be used to read when doing xfer for this chip
+ * @cs_control: chip select callback provided by chip
+ * @xfer_type: polling/interrupt/DMA
+ *
+ * Runtime state of the SSP controller, maintained per chip,
+ * This would be set according to the current message that would be served
+ */
+struct chip_data {
+	u16 cr0;
+	u16 cr1;
+	u16 dmacr;
+	u16 cpsr;
+	u8 n_bytes;
+	u8 enable_dma:1;
+	enum ssp_reading read;
+	enum ssp_writing write;
+	void (*cs_control) (u32 command);
+	int xfer_type;
+};
+
+/**
+ * null_cs_control - Dummy chip select function
+ * @command: select/delect the chip
+ *
+ * If no chip select function is provided by client this is used as dummy
+ * chip select
+ */
+static void null_cs_control(u32 command)
+{
+	pr_debug("pl022: dummy chip select control, CS=0x%x\n", command);
+}
+
+/**
+ * giveback - current spi_message is over, schedule next message and call
+ * callback of this message. Assumes that caller already
+ * set message->status; dma and pio irqs are blocked
+ * @pl022: SSP driver private data structure
+ */
+static void giveback(struct pl022 *pl022)
+{
+	struct spi_transfer *last_transfer;
+	unsigned long flags;
+	struct spi_message *msg;
+	void (*curr_cs_control) (u32 command);
+
+	/*
+	 * This local reference to the chip select function
+	 * is needed because we set curr_chip to NULL
+	 * as a step toward termininating the message.
+	 */
+	curr_cs_control = pl022->cur_chip->cs_control;
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+	msg = pl022->cur_msg;
+	pl022->cur_msg = NULL;
+	pl022->cur_transfer = NULL;
+	pl022->cur_chip = NULL;
+	queue_work(pl022->workqueue, &pl022->pump_messages);
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	last_transfer = list_entry(msg->transfers.prev,
+					struct spi_transfer,
+					transfer_list);
+
+	/* Delay if requested before any change in chip select */
+	if (last_transfer->delay_usecs)
+		/*
+		 * FIXME: This runs in interrupt context.
+		 * Is this really smart?
+		 */
+		udelay(last_transfer->delay_usecs);
+
+	/*
+	 * Drop chip select UNLESS cs_change is true or we are returning
+	 * a message with an error, or next message is for another chip
+	 */
+	if (!last_transfer->cs_change)
+		curr_cs_control(SSP_CHIP_DESELECT);
+	else {
+		struct spi_message *next_msg;
+
+		/* Holding of cs was hinted, but we need to make sure
+		 * the next message is for the same chip.  Don't waste
+		 * time with the following tests unless this was hinted.
+		 *
+		 * We cannot postpone this until pump_messages, because
+		 * after calling msg->complete (below) the driver that
+		 * sent the current message could be unloaded, which
+		 * could invalidate the cs_control() callback...
+		 */
+
+		/* get a pointer to the next message, if any */
+		spin_lock_irqsave(&pl022->queue_lock, flags);
+		if (list_empty(&pl022->queue))
+			next_msg = NULL;
+		else
+			next_msg = list_entry(pl022->queue.next,
+					struct spi_message, queue);
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+		/* see if the next and current messages point
+		 * to the same chip
+		 */
+		if (next_msg && next_msg->spi != msg->spi)
+			next_msg = NULL;
+		if (!next_msg || msg->state == STATE_ERROR)
+			curr_cs_control(SSP_CHIP_DESELECT);
+	}
+	msg->state = NULL;
+	if (msg->complete)
+		msg->complete(msg->context);
+	/* This message is completed, so let's turn off the clock! */
+	clk_disable(pl022->clk);
+}
+
+/**
+ * flush - flush the FIFO to reach a clean state
+ * @pl022: SSP driver private data structure
+ */
+static int flush(struct pl022 *pl022)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	dev_dbg(&pl022->adev->dev, "flush\n");
+	do {
+		while (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE)
+			readw(SSP_DR(pl022->virtbase));
+	} while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_BSY) && limit--);
+	return limit;
+}
+
+/**
+ * restore_state - Load configuration of current chip
+ * @pl022: SSP driver private data structure
+ */
+static void restore_state(struct pl022 *pl022)
+{
+	struct chip_data *chip = pl022->cur_chip;
+
+	writew(chip->cr0, SSP_CR0(pl022->virtbase));
+	writew(chip->cr1, SSP_CR1(pl022->virtbase));
+	writew(chip->dmacr, SSP_DMACR(pl022->virtbase));
+	writew(chip->cpsr, SSP_CPSR(pl022->virtbase));
+	writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+	writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+}
+
+/**
+ * load_ssp_default_config - Load default configuration for SSP
+ * @pl022: SSP driver private data structure
+ */
+
+/*
+ * Default SSP Register Values
+ */
+#define DEFAULT_SSP_REG_CR0 ( \
+	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0)	| \
+	GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
+	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
+	GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \
+	GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
+	GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16)	| \
+	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
+)
+
+#define DEFAULT_SSP_REG_CR1 ( \
+	GEN_MASK_BITS(LOOPBACK_DISABLED, SSP_CR1_MASK_LBM, 0) | \
+	GEN_MASK_BITS(SSP_DISABLED, SSP_CR1_MASK_SSE, 1) | \
+	GEN_MASK_BITS(SSP_MASTER, SSP_CR1_MASK_MS, 2) | \
+	GEN_MASK_BITS(DO_NOT_DRIVE_TX, SSP_CR1_MASK_SOD, 3) | \
+	GEN_MASK_BITS(SSP_RX_MSB, SSP_CR1_MASK_RENDN, 4) | \
+	GEN_MASK_BITS(SSP_TX_MSB, SSP_CR1_MASK_TENDN, 5) | \
+	GEN_MASK_BITS(SSP_MWIRE_WAIT_ZERO, SSP_CR1_MASK_MWAIT, 6) |\
+	GEN_MASK_BITS(SSP_RX_1_OR_MORE_ELEM, SSP_CR1_MASK_RXIFLSEL, 7) | \
+	GEN_MASK_BITS(SSP_TX_1_OR_MORE_EMPTY_LOC, SSP_CR1_MASK_TXIFLSEL, 10) \
+)
+
+#define DEFAULT_SSP_REG_CPSR ( \
+	GEN_MASK_BITS(NMDK_SSP_DEFAULT_PRESCALE, SSP_CPSR_MASK_CPSDVSR, 0) \
+)
+
+#define DEFAULT_SSP_REG_DMACR (\
+	GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_RXDMAE, 0) | \
+	GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_TXDMAE, 1) \
+)
+
+
+static void load_ssp_default_config(struct pl022 *pl022)
+{
+	writew(DEFAULT_SSP_REG_CR0, SSP_CR0(pl022->virtbase));
+	writew(DEFAULT_SSP_REG_CR1, SSP_CR1(pl022->virtbase));
+	writew(DEFAULT_SSP_REG_DMACR, SSP_DMACR(pl022->virtbase));
+	writew(DEFAULT_SSP_REG_CPSR, SSP_CPSR(pl022->virtbase));
+	writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+	writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+}
+
+/**
+ * This will write to TX and read from RX according to the parameters
+ * set in pl022.
+ */
+static void readwriter(struct pl022 *pl022)
+{
+
+	/*
+	 * The FIFO depth is different inbetween primecell variants.
+	 * I believe filling in too much in the FIFO might cause
+	 * errons in 8bit wide transfers on ARM variants (just 8 words
+	 * FIFO, means only 8x8 = 64 bits in FIFO) at least.
+	 *
+	 * FIXME: currently we have no logic to account for this.
+	 * perhaps there is even something broken in HW regarding
+	 * 8bit transfers (it doesn't fail on 16bit) so this needs
+	 * more investigation...
+	 */
+	dev_dbg(&pl022->adev->dev,
+		"%s, rx: %p, rxend: %p, tx: %p, txend: %p\n",
+		__func__, pl022->rx, pl022->rx_end, pl022->tx, pl022->tx_end);
+
+	/* Read as much as you can */
+	while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE)
+	       && (pl022->rx < pl022->rx_end)) {
+		switch (pl022->read) {
+		case READING_NULL:
+			readw(SSP_DR(pl022->virtbase));
+			break;
+		case READING_U8:
+			*(u8 *) (pl022->rx) =
+				readw(SSP_DR(pl022->virtbase)) & 0xFFU;
+			break;
+		case READING_U16:
+			*(u16 *) (pl022->rx) =
+				(u16) readw(SSP_DR(pl022->virtbase));
+			break;
+		case READING_U32:
+			*(u32 *) (pl022->rx) =
+				readl(SSP_DR(pl022->virtbase));
+			break;
+		}
+		pl022->rx += (pl022->cur_chip->n_bytes);
+	}
+	/*
+	 * Write as much as you can, while keeping an eye on the RX FIFO!
+	 */
+	while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
+	       && (pl022->tx < pl022->tx_end)) {
+		switch (pl022->write) {
+		case WRITING_NULL:
+			writew(0x0, SSP_DR(pl022->virtbase));
+			break;
+		case WRITING_U8:
+			writew(*(u8 *) (pl022->tx), SSP_DR(pl022->virtbase));
+			break;
+		case WRITING_U16:
+			writew((*(u16 *) (pl022->tx)), SSP_DR(pl022->virtbase));
+			break;
+		case WRITING_U32:
+			writel(*(u32 *) (pl022->tx), SSP_DR(pl022->virtbase));
+			break;
+		}
+		pl022->tx += (pl022->cur_chip->n_bytes);
+		/*
+		 * This inner reader takes care of things appearing in the RX
+		 * FIFO as we're transmitting. This will happen a lot since the
+		 * clock starts running when you put things into the TX FIFO,
+		 * and then things are continously clocked into the RX FIFO.
+		 */
+		while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE)
+		       && (pl022->rx < pl022->rx_end)) {
+			switch (pl022->read) {
+			case READING_NULL:
+				readw(SSP_DR(pl022->virtbase));
+				break;
+			case READING_U8:
+				*(u8 *) (pl022->rx) =
+					readw(SSP_DR(pl022->virtbase)) & 0xFFU;
+				break;
+			case READING_U16:
+				*(u16 *) (pl022->rx) =
+					(u16) readw(SSP_DR(pl022->virtbase));
+				break;
+			case READING_U32:
+				*(u32 *) (pl022->rx) =
+					readl(SSP_DR(pl022->virtbase));
+				break;
+			}
+			pl022->rx += (pl022->cur_chip->n_bytes);
+		}
+	}
+	/*
+	 * When we exit here the TX FIFO should be full and the RX FIFO
+	 * should be empty
+	 */
+}
+
+
+/**
+ * next_transfer - Move to the Next transfer in the current spi message
+ * @pl022: SSP driver private data structure
+ *
+ * This function moves though the linked list of spi transfers in the
+ * current spi message and returns with the state of current spi
+ * message i.e whether its last transfer is done(STATE_DONE) or
+ * Next transfer is ready(STATE_RUNNING)
+ */
+static void *next_transfer(struct pl022 *pl022)
+{
+	struct spi_message *msg = pl022->cur_msg;
+	struct spi_transfer *trans = pl022->cur_transfer;
+
+	/* Move to next transfer */
+	if (trans->transfer_list.next != &msg->transfers) {
+		pl022->cur_transfer =
+		    list_entry(trans->transfer_list.next,
+			       struct spi_transfer, transfer_list);
+		return STATE_RUNNING;
+	}
+	return STATE_DONE;
+}
+/**
+ * pl022_interrupt_handler - Interrupt handler for SSP controller
+ *
+ * This function handles interrupts generated for an interrupt based transfer.
+ * If a receive overrun (ROR) interrupt is there then we disable SSP, flag the
+ * current message's state as STATE_ERROR and schedule the tasklet
+ * pump_transfers which will do the postprocessing of the current message by
+ * calling giveback(). Otherwise it reads data from RX FIFO till there is no
+ * more data, and writes data in TX FIFO till it is not full. If we complete
+ * the transfer we move to the next transfer and schedule the tasklet.
+ */
+static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
+{
+	struct pl022 *pl022 = dev_id;
+	struct spi_message *msg = pl022->cur_msg;
+	u16 irq_status = 0;
+	u16 flag = 0;
+
+	if (unlikely(!msg)) {
+		dev_err(&pl022->adev->dev,
+			"bad message state in interrupt handler");
+		/* Never fail */
+		return IRQ_HANDLED;
+	}
+
+	/* Read the Interrupt Status Register */
+	irq_status = readw(SSP_MIS(pl022->virtbase));
+
+	if (unlikely(!irq_status))
+		return IRQ_NONE;
+
+	/* This handles the error code interrupts */
+	if (unlikely(irq_status & SSP_MIS_MASK_RORMIS)) {
+		/*
+		 * Overrun interrupt - bail out since our Data has been
+		 * corrupted
+		 */
+		dev_err(&pl022->adev->dev,
+			"FIFO overrun\n");
+		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF)
+			dev_err(&pl022->adev->dev,
+				"RXFIFO is full\n");
+		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
+			dev_err(&pl022->adev->dev,
+				"TXFIFO is full\n");
+
+		/*
+		 * Disable and clear interrupts, disable SSP,
+		 * mark message with bad status so it can be
+		 * retried.
+		 */
+		writew(DISABLE_ALL_INTERRUPTS,
+		       SSP_IMSC(pl022->virtbase));
+		writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+		writew((readw(SSP_CR1(pl022->virtbase)) &
+			(~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase));
+		msg->state = STATE_ERROR;
+
+		/* Schedule message queue handler */
+		tasklet_schedule(&pl022->pump_transfers);
+		return IRQ_HANDLED;
+	}
+
+	readwriter(pl022);
+
+	if ((pl022->tx == pl022->tx_end) && (flag == 0)) {
+		flag = 1;
+		/* Disable Transmit interrupt */
+		writew(readw(SSP_IMSC(pl022->virtbase)) &
+		       (~SSP_IMSC_MASK_TXIM),
+		       SSP_IMSC(pl022->virtbase));
+	}
+
+	/*
+	 * Since all transactions must write as much as shall be read,
+	 * we can conclude the entire transaction once RX is complete.
+	 * At this point, all TX will always be finished.
+	 */
+	if (pl022->rx >= pl022->rx_end) {
+		writew(DISABLE_ALL_INTERRUPTS,
+		       SSP_IMSC(pl022->virtbase));
+		writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+		if (unlikely(pl022->rx > pl022->rx_end)) {
+			dev_warn(&pl022->adev->dev, "read %u surplus "
+				 "bytes (did you request an odd "
+				 "number of bytes on a 16bit bus?)\n",
+				 (u32) (pl022->rx - pl022->rx_end));
+		}
+		/* Update total bytes transfered */
+		msg->actual_length += pl022->cur_transfer->len;
+		if (pl022->cur_transfer->cs_change)
+			pl022->cur_chip->
+				cs_control(SSP_CHIP_DESELECT);
+		/* Move to next transfer */
+		msg->state = next_transfer(pl022);
+		tasklet_schedule(&pl022->pump_transfers);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * This sets up the pointers to memory for the next message to
+ * send out on the SPI bus.
+ */
+static int set_up_next_transfer(struct pl022 *pl022,
+				struct spi_transfer *transfer)
+{
+	int residue;
+
+	/* Sanity check the message for this bus width */
+	residue = pl022->cur_transfer->len % pl022->cur_chip->n_bytes;
+	if (unlikely(residue != 0)) {
+		dev_err(&pl022->adev->dev,
+			"message of %u bytes to transmit but the current "
+			"chip bus has a data width of %u bytes!\n",
+			pl022->cur_transfer->len,
+			pl022->cur_chip->n_bytes);
+		dev_err(&pl022->adev->dev, "skipping this message\n");
+		return -EIO;
+	}
+	pl022->tx = (void *)transfer->tx_buf;
+	pl022->tx_end = pl022->tx + pl022->cur_transfer->len;
+	pl022->rx = (void *)transfer->rx_buf;
+	pl022->rx_end = pl022->rx + pl022->cur_transfer->len;
+	pl022->write =
+	    pl022->tx ? pl022->cur_chip->write : WRITING_NULL;
+	pl022->read = pl022->rx ? pl022->cur_chip->read : READING_NULL;
+	return 0;
+}
+
+/**
+ * pump_transfers - Tasklet function which schedules next interrupt transfer
+ * when running in interrupt transfer mode.
+ * @data: SSP driver private data structure
+ *
+ */
+static void pump_transfers(unsigned long data)
+{
+	struct pl022 *pl022 = (struct pl022 *) data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+
+	/* Get current state information */
+	message = pl022->cur_msg;
+	transfer = pl022->cur_transfer;
+
+	/* Handle for abort */
+	if (message->state == STATE_ERROR) {
+		message->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+
+	/* Handle end of message */
+	if (message->state == STATE_DONE) {
+		message->status = 0;
+		giveback(pl022);
+		return;
+	}
+
+	/* Delay if requested at end of transfer before CS change */
+	if (message->state == STATE_RUNNING) {
+		previous = list_entry(transfer->transfer_list.prev,
+					struct spi_transfer,
+					transfer_list);
+		if (previous->delay_usecs)
+			/*
+			 * FIXME: This runs in interrupt context.
+			 * Is this really smart?
+			 */
+			udelay(previous->delay_usecs);
+
+		/* Drop chip select only if cs_change is requested */
+		if (previous->cs_change)
+			pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+	} else {
+		/* STATE_START */
+		message->state = STATE_RUNNING;
+	}
+
+	if (set_up_next_transfer(pl022, transfer)) {
+		message->state = STATE_ERROR;
+		message->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+	/* Flush the FIFOs and let's go! */
+	flush(pl022);
+	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+}
+
+/**
+ * NOT IMPLEMENTED
+ * configure_dma - It configures the DMA pipes for DMA transfers
+ * @data: SSP driver's private data structure
+ *
+ */
+static int configure_dma(void *data)
+{
+	struct pl022 *pl022 = data;
+	dev_dbg(&pl022->adev->dev, "configure DMA\n");
+	return -ENOTSUPP;
+}
+
+/**
+ * do_dma_transfer - It handles transfers of the current message
+ * if it is DMA xfer.
+ * NOT FULLY IMPLEMENTED
+ * @data: SSP driver's private data structure
+ */
+static void do_dma_transfer(void *data)
+{
+	struct pl022 *pl022 = data;
+
+	if (configure_dma(data)) {
+		dev_dbg(&pl022->adev->dev, "configuration of DMA Failed!\n");
+		goto err_config_dma;
+	}
+
+	/* TODO: Implememt DMA setup of pipes here */
+
+	/* Enable target chip, set up transfer */
+	pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+	if (set_up_next_transfer(pl022, pl022->cur_transfer)) {
+		/* Error path */
+		pl022->cur_msg->state = STATE_ERROR;
+		pl022->cur_msg->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+	/* Enable SSP */
+	writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
+	       SSP_CR1(pl022->virtbase));
+
+	/* TODO: Enable the DMA transfer here */
+	return;
+
+ err_config_dma:
+	pl022->cur_msg->state = STATE_ERROR;
+	pl022->cur_msg->status = -EIO;
+	giveback(pl022);
+	return;
+}
+
+static void do_interrupt_transfer(void *data)
+{
+	struct pl022 *pl022 = data;
+
+	/* Enable target chip */
+	pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+	if (set_up_next_transfer(pl022, pl022->cur_transfer)) {
+		/* Error path */
+		pl022->cur_msg->state = STATE_ERROR;
+		pl022->cur_msg->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+	/* Enable SSP, turn on interrupts */
+	writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
+	       SSP_CR1(pl022->virtbase));
+	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+}
+
+static void do_polling_transfer(void *data)
+{
+	struct pl022 *pl022 = data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+	struct chip_data *chip;
+
+	chip = pl022->cur_chip;
+	message = pl022->cur_msg;
+
+	while (message->state != STATE_DONE) {
+		/* Handle for abort */
+		if (message->state == STATE_ERROR)
+			break;
+		transfer = pl022->cur_transfer;
+
+		/* Delay if requested at end of transfer */
+		if (message->state == STATE_RUNNING) {
+			previous =
+			    list_entry(transfer->transfer_list.prev,
+				       struct spi_transfer, transfer_list);
+			if (previous->delay_usecs)
+				udelay(previous->delay_usecs);
+			if (previous->cs_change)
+				pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+		} else {
+			/* STATE_START */
+			message->state = STATE_RUNNING;
+			pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+		}
+
+		/* Configuration Changing Per Transfer */
+		if (set_up_next_transfer(pl022, transfer)) {
+			/* Error path */
+			message->state = STATE_ERROR;
+			break;
+		}
+		/* Flush FIFOs and enable SSP */
+		flush(pl022);
+		writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
+		       SSP_CR1(pl022->virtbase));
+
+		dev_dbg(&pl022->adev->dev, "POLLING TRANSFER ONGOING ... \n");
+		/* FIXME: insert a timeout so we don't hang here indefinately */
+		while (pl022->tx < pl022->tx_end || pl022->rx < pl022->rx_end)
+			readwriter(pl022);
+
+		/* Update total byte transfered */
+		message->actual_length += pl022->cur_transfer->len;
+		if (pl022->cur_transfer->cs_change)
+			pl022->cur_chip->cs_control(SSP_CHIP_DESELECT);
+		/* Move to next transfer */
+		message->state = next_transfer(pl022);
+	}
+
+	/* Handle end of message */
+	if (message->state == STATE_DONE)
+		message->status = 0;
+	else
+		message->status = -EIO;
+
+	giveback(pl022);
+	return;
+}
+
+/**
+ * pump_messages - Workqueue function which processes spi message queue
+ * @data: pointer to private data of SSP driver
+ *
+ * This function checks if there is any spi message in the queue that
+ * needs processing and delegate control to appropriate function
+ * do_polling_transfer()/do_interrupt_transfer()/do_dma_transfer()
+ * based on the kind of the transfer
+ *
+ */
+static void pump_messages(struct work_struct *work)
+{
+	struct pl022 *pl022 =
+		container_of(work, struct pl022, pump_messages);
+	unsigned long flags;
+
+	/* Lock queue and check for queue work */
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+	if (list_empty(&pl022->queue) || pl022->run == QUEUE_STOPPED) {
+		pl022->busy = 0;
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return;
+	}
+	/* Make sure we are not already running a message */
+	if (pl022->cur_msg) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return;
+	}
+	/* Extract head of queue */
+	pl022->cur_msg =
+	    list_entry(pl022->queue.next, struct spi_message, queue);
+
+	list_del_init(&pl022->cur_msg->queue);
+	pl022->busy = 1;
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	/* Initial message state */
+	pl022->cur_msg->state = STATE_START;
+	pl022->cur_transfer = list_entry(pl022->cur_msg->transfers.next,
+					    struct spi_transfer,
+					    transfer_list);
+
+	/* Setup the SPI using the per chip configuration */
+	pl022->cur_chip = spi_get_ctldata(pl022->cur_msg->spi);
+	/*
+	 * We enable the clock here, then the clock will be disabled when
+	 * giveback() is called in each method (poll/interrupt/DMA)
+	 */
+	clk_enable(pl022->clk);
+	restore_state(pl022);
+	flush(pl022);
+
+	if (pl022->cur_chip->xfer_type == POLLING_TRANSFER)
+		do_polling_transfer(pl022);
+	else if (pl022->cur_chip->xfer_type == INTERRUPT_TRANSFER)
+		do_interrupt_transfer(pl022);
+	else
+		do_dma_transfer(pl022);
+}
+
+
+static int __init init_queue(struct pl022 *pl022)
+{
+	INIT_LIST_HEAD(&pl022->queue);
+	spin_lock_init(&pl022->queue_lock);
+
+	pl022->run = QUEUE_STOPPED;
+	pl022->busy = 0;
+
+	tasklet_init(&pl022->pump_transfers,
+			pump_transfers,	(unsigned long)pl022);
+
+	INIT_WORK(&pl022->pump_messages, pump_messages);
+	pl022->workqueue = create_singlethread_workqueue(
+					dev_name(pl022->master->dev.parent));
+	if (pl022->workqueue == NULL)
+		return -EBUSY;
+
+	return 0;
+}
+
+
+static int start_queue(struct pl022 *pl022)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+
+	if (pl022->run == QUEUE_RUNNING || pl022->busy) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return -EBUSY;
+	}
+
+	pl022->run = QUEUE_RUNNING;
+	pl022->cur_msg = NULL;
+	pl022->cur_transfer = NULL;
+	pl022->cur_chip = NULL;
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	queue_work(pl022->workqueue, &pl022->pump_messages);
+
+	return 0;
+}
+
+
+static int stop_queue(struct pl022 *pl022)
+{
+	unsigned long flags;
+	unsigned limit = 500;
+	int status = 0;
+
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+
+	/* This is a bit lame, but is optimized for the common execution path.
+	 * A wait_queue on the pl022->busy could be used, but then the common
+	 * execution path (pump_messages) would be required to call wake_up or
+	 * friends on every SPI message. Do this instead */
+	pl022->run = QUEUE_STOPPED;
+	while (!list_empty(&pl022->queue) && pl022->busy && limit--) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		msleep(10);
+		spin_lock_irqsave(&pl022->queue_lock, flags);
+	}
+
+	if (!list_empty(&pl022->queue) || pl022->busy)
+		status = -EBUSY;
+
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	return status;
+}
+
+static int destroy_queue(struct pl022 *pl022)
+{
+	int status;
+
+	status = stop_queue(pl022);
+	/* we are unloading the module or failing to load (only two calls
+	 * to this routine), and neither call can handle a return value.
+	 * However, destroy_workqueue calls flush_workqueue, and that will
+	 * block until all work is done.  If the reason that stop_queue
+	 * timed out is that the work will never finish, then it does no
+	 * good to call destroy_workqueue, so return anyway. */
+	if (status != 0)
+		return status;
+
+	destroy_workqueue(pl022->workqueue);
+
+	return 0;
+}
+
+static int verify_controller_parameters(struct pl022 *pl022,
+					struct pl022_config_chip *chip_info)
+{
+	if ((chip_info->lbm != LOOPBACK_ENABLED)
+	    && (chip_info->lbm != LOOPBACK_DISABLED)) {
+		dev_err(chip_info->dev,
+			"loopback Mode is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->iface < SSP_INTERFACE_MOTOROLA_SPI)
+	    || (chip_info->iface > SSP_INTERFACE_UNIDIRECTIONAL)) {
+		dev_err(chip_info->dev,
+			"interface is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->iface == SSP_INTERFACE_UNIDIRECTIONAL) &&
+	    (!pl022->vendor->unidir)) {
+		dev_err(chip_info->dev,
+			"unidirectional mode not supported in this "
+			"hardware version\n");
+		return -EINVAL;
+	}
+	if ((chip_info->hierarchy != SSP_MASTER)
+	    && (chip_info->hierarchy != SSP_SLAVE)) {
+		dev_err(chip_info->dev,
+			"hierarchy is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if (((chip_info->clk_freq).cpsdvsr < CPSDVR_MIN)
+	    || ((chip_info->clk_freq).cpsdvsr > CPSDVR_MAX)) {
+		dev_err(chip_info->dev,
+			"cpsdvsr is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->endian_rx != SSP_RX_MSB)
+	    && (chip_info->endian_rx != SSP_RX_LSB)) {
+		dev_err(chip_info->dev,
+			"RX FIFO endianess is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->endian_tx != SSP_TX_MSB)
+	    && (chip_info->endian_tx != SSP_TX_LSB)) {
+		dev_err(chip_info->dev,
+			"TX FIFO endianess is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->data_size < SSP_DATA_BITS_4)
+	    || (chip_info->data_size > SSP_DATA_BITS_32)) {
+		dev_err(chip_info->dev,
+			"DATA Size is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->com_mode != INTERRUPT_TRANSFER)
+	    && (chip_info->com_mode != DMA_TRANSFER)
+	    && (chip_info->com_mode != POLLING_TRANSFER)) {
+		dev_err(chip_info->dev,
+			"Communication mode is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->rx_lev_trig < SSP_RX_1_OR_MORE_ELEM)
+	    || (chip_info->rx_lev_trig > SSP_RX_32_OR_MORE_ELEM)) {
+		dev_err(chip_info->dev,
+			"RX FIFO Trigger Level is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->tx_lev_trig < SSP_TX_1_OR_MORE_EMPTY_LOC)
+	    || (chip_info->tx_lev_trig > SSP_TX_32_OR_MORE_EMPTY_LOC)) {
+		dev_err(chip_info->dev,
+			"TX FIFO Trigger Level is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
+		if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE)
+		    && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) {
+			dev_err(chip_info->dev,
+				"Clock Phase is configured incorrectly\n");
+			return -EINVAL;
+		}
+		if ((chip_info->clk_pol != SSP_CLK_POL_IDLE_LOW)
+		    && (chip_info->clk_pol != SSP_CLK_POL_IDLE_HIGH)) {
+			dev_err(chip_info->dev,
+				"Clock Polarity is configured incorrectly\n");
+			return -EINVAL;
+		}
+	}
+	if (chip_info->iface == SSP_INTERFACE_NATIONAL_MICROWIRE) {
+		if ((chip_info->ctrl_len < SSP_BITS_4)
+		    || (chip_info->ctrl_len > SSP_BITS_32)) {
+			dev_err(chip_info->dev,
+				"CTRL LEN is configured incorrectly\n");
+			return -EINVAL;
+		}
+		if ((chip_info->wait_state != SSP_MWIRE_WAIT_ZERO)
+		    && (chip_info->wait_state != SSP_MWIRE_WAIT_ONE)) {
+			dev_err(chip_info->dev,
+				"Wait State is configured incorrectly\n");
+			return -EINVAL;
+		}
+		if ((chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
+		    && (chip_info->duplex !=
+			SSP_MICROWIRE_CHANNEL_HALF_DUPLEX)) {
+			dev_err(chip_info->dev,
+				"DUPLEX is configured incorrectly\n");
+			return -EINVAL;
+		}
+	}
+	if (chip_info->cs_control == NULL) {
+		dev_warn(chip_info->dev,
+			"Chip Select Function is NULL for this chip\n");
+		chip_info->cs_control = null_cs_control;
+	}
+	return 0;
+}
+
+/**
+ * pl022_transfer - transfer function registered to SPI master framework
+ * @spi: spi device which is requesting transfer
+ * @msg: spi message which is to handled is queued to driver queue
+ *
+ * This function is registered to the SPI framework for this SPI master
+ * controller. It will queue the spi_message in the queue of driver if
+ * the queue is not stopped and return.
+ */
+static int pl022_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct pl022 *pl022 = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+
+	if (pl022->run == QUEUE_STOPPED) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return -ESHUTDOWN;
+	}
+	msg->actual_length = 0;
+	msg->status = -EINPROGRESS;
+	msg->state = STATE_START;
+
+	list_add_tail(&msg->queue, &pl022->queue);
+	if (pl022->run == QUEUE_RUNNING && !pl022->busy)
+		queue_work(pl022->workqueue, &pl022->pump_messages);
+
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+	return 0;
+}
+
+static int calculate_effective_freq(struct pl022 *pl022,
+				    int freq,
+				    struct ssp_clock_params *clk_freq)
+{
+	/* Lets calculate the frequency parameters */
+	u16 cpsdvsr = 2;
+	u16 scr = 0;
+	bool freq_found = false;
+	u32 rate;
+	u32 max_tclk;
+	u32 min_tclk;
+
+	rate = clk_get_rate(pl022->clk);
+	/* cpsdvscr = 2 & scr 0 */
+	max_tclk = (rate / (CPSDVR_MIN * (1 + SCR_MIN)));
+	/* cpsdvsr = 254 & scr = 255 */
+	min_tclk = (rate / (CPSDVR_MAX * (1 + SCR_MAX)));
+
+	if ((freq <= max_tclk) && (freq >= min_tclk)) {
+		while (cpsdvsr <= CPSDVR_MAX && !freq_found) {
+			while (scr <= SCR_MAX && !freq_found) {
+				if ((rate /
+				     (cpsdvsr * (1 + scr))) > freq)
+					scr += 1;
+				else {
+					/*
+					 * This bool is made true when
+					 * effective frequency >=
+					 * target frequency is found
+					 */
+					freq_found = true;
+					if ((rate /
+					     (cpsdvsr * (1 + scr))) != freq) {
+						if (scr == SCR_MIN) {
+							cpsdvsr -= 2;
+							scr = SCR_MAX;
+						} else
+							scr -= 1;
+					}
+				}
+			}
+			if (!freq_found) {
+				cpsdvsr += 2;
+				scr = SCR_MIN;
+			}
+		}
+		if (cpsdvsr != 0) {
+			dev_dbg(&pl022->adev->dev,
+				"SSP Effective Frequency is %u\n",
+				(rate / (cpsdvsr * (1 + scr))));
+			clk_freq->cpsdvsr = (u8) (cpsdvsr & 0xFF);
+			clk_freq->scr = (u8) (scr & 0xFF);
+			dev_dbg(&pl022->adev->dev,
+				"SSP cpsdvsr = %d, scr = %d\n",
+				clk_freq->cpsdvsr, clk_freq->scr);
+		}
+	} else {
+		dev_err(&pl022->adev->dev,
+			"controller data is incorrect: out of range frequency");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * NOT IMPLEMENTED
+ * process_dma_info - Processes the DMA info provided by client drivers
+ * @chip_info: chip info provided by client device
+ * @chip: Runtime state maintained by the SSP controller for each spi device
+ *
+ * This function processes and stores DMA config provided by client driver
+ * into the runtime state maintained by the SSP controller driver
+ */
+static int process_dma_info(struct pl022_config_chip *chip_info,
+			    struct chip_data *chip)
+{
+	dev_err(chip_info->dev,
+		"cannot process DMA info, DMA not implemented!\n");
+	return -ENOTSUPP;
+}
+
+/**
+ * pl022_setup - setup function registered to SPI master framework
+ * @spi: spi device which is requesting setup
+ *
+ * This function is registered to the SPI framework for this SPI master
+ * controller. If it is the first time when setup is called by this device,
+ * this function will initialize the runtime state for this chip and save
+ * the same in the device structure. Else it will update the runtime info
+ * with the updated chip info. Nothing is really being written to the
+ * controller hardware here, that is not done until the actual transfer
+ * commence.
+ */
+
+/* FIXME: JUST GUESSING the spi->mode bits understood by this driver */
+#define MODEBITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
+			| SPI_LSB_FIRST | SPI_LOOP)
+
+static int pl022_setup(struct spi_device *spi)
+{
+	struct pl022_config_chip *chip_info;
+	struct chip_data *chip;
+	int status = 0;
+	struct pl022 *pl022 = spi_master_get_devdata(spi->master);
+
+	if (spi->mode & ~MODEBITS) {
+		dev_dbg(&spi->dev, "unsupported mode bits %x\n",
+			spi->mode & ~MODEBITS);
+		return -EINVAL;
+	}
+
+	if (!spi->max_speed_hz)
+		return -EINVAL;
+
+	/* Get controller_state if one is supplied */
+	chip = spi_get_ctldata(spi);
+
+	if (chip == NULL) {
+		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
+		if (!chip) {
+			dev_err(&spi->dev,
+				"cannot allocate controller state\n");
+			return -ENOMEM;
+		}
+		dev_dbg(&spi->dev,
+			"allocated memory for controller's runtime state\n");
+	}
+
+	/* Get controller data if one is supplied */
+	chip_info = spi->controller_data;
+
+	if (chip_info == NULL) {
+		/* spi_board_info.controller_data not is supplied */
+		dev_dbg(&spi->dev,
+			"using default controller_data settings\n");
+
+		chip_info =
+			kzalloc(sizeof(struct pl022_config_chip), GFP_KERNEL);
+
+		if (!chip_info) {
+			dev_err(&spi->dev,
+				"cannot allocate controller data\n");
+			status = -ENOMEM;
+			goto err_first_setup;
+		}
+
+		dev_dbg(&spi->dev, "allocated memory for controller data\n");
+
+		/* Pointer back to the SPI device */
+		chip_info->dev = &spi->dev;
+		/*
+		 * Set controller data default values:
+		 * Polling is supported by default
+		 */
+		chip_info->lbm = LOOPBACK_DISABLED;
+		chip_info->com_mode = POLLING_TRANSFER;
+		chip_info->iface = SSP_INTERFACE_MOTOROLA_SPI;
+		chip_info->hierarchy = SSP_SLAVE;
+		chip_info->slave_tx_disable = DO_NOT_DRIVE_TX;
+		chip_info->endian_tx = SSP_TX_LSB;
+		chip_info->endian_rx = SSP_RX_LSB;
+		chip_info->data_size = SSP_DATA_BITS_12;
+		chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
+		chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
+		chip_info->clk_phase = SSP_CLK_FALLING_EDGE;
+		chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
+		chip_info->ctrl_len = SSP_BITS_8;
+		chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
+		chip_info->duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX;
+		chip_info->cs_control = null_cs_control;
+	} else {
+		dev_dbg(&spi->dev,
+			"using user supplied controller_data settings\n");
+	}
+
+	/*
+	 * We can override with custom divisors, else we use the board
+	 * frequency setting
+	 */
+	if ((0 == chip_info->clk_freq.cpsdvsr)
+	    && (0 == chip_info->clk_freq.scr)) {
+		status = calculate_effective_freq(pl022,
+						  spi->max_speed_hz,
+						  &chip_info->clk_freq);
+		if (status < 0)
+			goto err_config_params;
+	} else {
+		if ((chip_info->clk_freq.cpsdvsr % 2) != 0)
+			chip_info->clk_freq.cpsdvsr =
+				chip_info->clk_freq.cpsdvsr - 1;
+	}
+	status = verify_controller_parameters(pl022, chip_info);
+	if (status) {
+		dev_err(&spi->dev, "controller data is incorrect");
+		goto err_config_params;
+	}
+	/* Now set controller state based on controller data */
+	chip->xfer_type = chip_info->com_mode;
+	chip->cs_control = chip_info->cs_control;
+
+	if (chip_info->data_size <= 8) {
+		dev_dbg(&spi->dev, "1 <= n <=8 bits per word\n");
+		chip->n_bytes = 1;
+		chip->read = READING_U8;
+		chip->write = WRITING_U8;
+	} else if (chip_info->data_size <= 16) {
+		dev_dbg(&spi->dev, "9 <= n <= 16 bits per word\n");
+		chip->n_bytes = 2;
+		chip->read = READING_U16;
+		chip->write = WRITING_U16;
+	} else {
+		if (pl022->vendor->max_bpw >= 32) {
+			dev_dbg(&spi->dev, "17 <= n <= 32 bits per word\n");
+			chip->n_bytes = 4;
+			chip->read = READING_U32;
+			chip->write = WRITING_U32;
+		} else {
+			dev_err(&spi->dev,
+				"illegal data size for this controller!\n");
+			dev_err(&spi->dev,
+				"a standard pl022 can only handle "
+				"1 <= n <= 16 bit words\n");
+			goto err_config_params;
+		}
+	}
+
+	/* Now Initialize all register settings required for this chip */
+	chip->cr0 = 0;
+	chip->cr1 = 0;
+	chip->dmacr = 0;
+	chip->cpsr = 0;
+	if ((chip_info->com_mode == DMA_TRANSFER)
+	    && ((pl022->master_info)->enable_dma)) {
+		chip->enable_dma = 1;
+		dev_dbg(&spi->dev, "DMA mode set in controller state\n");
+		status = process_dma_info(chip_info, chip);
+		if (status < 0)
+			goto err_config_params;
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
+			       SSP_DMACR_MASK_RXDMAE, 0);
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
+			       SSP_DMACR_MASK_TXDMAE, 1);
+	} else {
+		chip->enable_dma = 0;
+		dev_dbg(&spi->dev, "DMA mode NOT set in controller state\n");
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED,
+			       SSP_DMACR_MASK_RXDMAE, 0);
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED,
+			       SSP_DMACR_MASK_TXDMAE, 1);
+	}
+
+	chip->cpsr = chip_info->clk_freq.cpsdvsr;
+
+	SSP_WRITE_BITS(chip->cr0, chip_info->data_size, SSP_CR0_MASK_DSS, 0);
+	SSP_WRITE_BITS(chip->cr0, chip_info->duplex, SSP_CR0_MASK_HALFDUP, 5);
+	SSP_WRITE_BITS(chip->cr0, chip_info->clk_pol, SSP_CR0_MASK_SPO, 6);
+	SSP_WRITE_BITS(chip->cr0, chip_info->clk_phase, SSP_CR0_MASK_SPH, 7);
+	SSP_WRITE_BITS(chip->cr0, chip_info->clk_freq.scr, SSP_CR0_MASK_SCR, 8);
+	SSP_WRITE_BITS(chip->cr0, chip_info->ctrl_len, SSP_CR0_MASK_CSS, 16);
+	SSP_WRITE_BITS(chip->cr0, chip_info->iface, SSP_CR0_MASK_FRF, 21);
+	SSP_WRITE_BITS(chip->cr1, chip_info->lbm, SSP_CR1_MASK_LBM, 0);
+	SSP_WRITE_BITS(chip->cr1, SSP_DISABLED, SSP_CR1_MASK_SSE, 1);
+	SSP_WRITE_BITS(chip->cr1, chip_info->hierarchy, SSP_CR1_MASK_MS, 2);
+	SSP_WRITE_BITS(chip->cr1, chip_info->slave_tx_disable, SSP_CR1_MASK_SOD, 3);
+	SSP_WRITE_BITS(chip->cr1, chip_info->endian_rx, SSP_CR1_MASK_RENDN, 4);
+	SSP_WRITE_BITS(chip->cr1, chip_info->endian_tx, SSP_CR1_MASK_TENDN, 5);
+	SSP_WRITE_BITS(chip->cr1, chip_info->wait_state, SSP_CR1_MASK_MWAIT, 6);
+	SSP_WRITE_BITS(chip->cr1, chip_info->rx_lev_trig, SSP_CR1_MASK_RXIFLSEL, 7);
+	SSP_WRITE_BITS(chip->cr1, chip_info->tx_lev_trig, SSP_CR1_MASK_TXIFLSEL, 10);
+
+	/* Save controller_state */
+	spi_set_ctldata(spi, chip);
+	return status;
+ err_config_params:
+ err_first_setup:
+	kfree(chip);
+	return status;
+}
+
+/**
+ * pl022_cleanup - cleanup function registered to SPI master framework
+ * @spi: spi device which is requesting cleanup
+ *
+ * This function is registered to the SPI framework for this SPI master
+ * controller. It will free the runtime state of chip.
+ */
+static void pl022_cleanup(struct spi_device *spi)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+
+	spi_set_ctldata(spi, NULL);
+	kfree(chip);
+}
+
+
+static int __init
+pl022_probe(struct amba_device *adev, struct amba_id *id)
+{
+	struct device *dev = &adev->dev;
+	struct pl022_ssp_controller *platform_info = adev->dev.platform_data;
+	struct spi_master *master;
+	struct pl022 *pl022 = NULL;	/*Data for this driver */
+	int status = 0;
+
+	dev_info(&adev->dev,
+		 "ARM PL022 driver, device ID: 0x%08x\n", adev->periphid);
+	if (platform_info == NULL) {
+		dev_err(&adev->dev, "probe - no platform data supplied\n");
+		status = -ENODEV;
+		goto err_no_pdata;
+	}
+
+	/* Allocate master with space for data */
+	master = spi_alloc_master(dev, sizeof(struct pl022));
+	if (master == NULL) {
+		dev_err(&adev->dev, "probe - cannot alloc SPI master\n");
+		status = -ENOMEM;
+		goto err_no_master;
+	}
+
+	pl022 = spi_master_get_devdata(master);
+	pl022->master = master;
+	pl022->master_info = platform_info;
+	pl022->adev = adev;
+	pl022->vendor = id->data;
+
+	/*
+	 * Bus Number Which has been Assigned to this SSP controller
+	 * on this board
+	 */
+	master->bus_num = platform_info->bus_id;
+	master->num_chipselect = platform_info->num_chipselect;
+	master->cleanup = pl022_cleanup;
+	master->setup = pl022_setup;
+	master->transfer = pl022_transfer;
+
+	dev_dbg(&adev->dev, "BUSNO: %d\n", master->bus_num);
+
+	status = amba_request_regions(adev, NULL);
+	if (status)
+		goto err_no_ioregion;
+
+	pl022->virtbase = ioremap(adev->res.start, resource_size(&adev->res));
+	if (pl022->virtbase == NULL) {
+		status = -ENOMEM;
+		goto err_no_ioremap;
+	}
+	printk(KERN_INFO "pl022: mapped registers from 0x%08x to %p\n",
+	       adev->res.start, pl022->virtbase);
+
+	pl022->clk = clk_get(&adev->dev, NULL);
+	if (IS_ERR(pl022->clk)) {
+		status = PTR_ERR(pl022->clk);
+		dev_err(&adev->dev, "could not retrieve SSP/SPI bus clock\n");
+		goto err_no_clk;
+	}
+
+	/* Disable SSP */
+	clk_enable(pl022->clk);
+	writew((readw(SSP_CR1(pl022->virtbase)) & (~SSP_CR1_MASK_SSE)),
+	       SSP_CR1(pl022->virtbase));
+	load_ssp_default_config(pl022);
+	clk_disable(pl022->clk);
+
+	status = request_irq(adev->irq[0], pl022_interrupt_handler, 0, "pl022",
+			     pl022);
+	if (status < 0) {
+		dev_err(&adev->dev, "probe - cannot get IRQ (%d)\n", status);
+		goto err_no_irq;
+	}
+	/* Initialize and start queue */
+	status = init_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev, "probe - problem initializing queue\n");
+		goto err_init_queue;
+	}
+	status = start_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev, "probe - problem starting queue\n");
+		goto err_start_queue;
+	}
+	/* Register with the SPI framework */
+	amba_set_drvdata(adev, pl022);
+	status = spi_register_master(master);
+	if (status != 0) {
+		dev_err(&adev->dev,
+			"probe - problem registering spi master\n");
+		goto err_spi_register;
+	}
+	dev_dbg(dev, "probe succeded\n");
+	return 0;
+
+ err_spi_register:
+ err_start_queue:
+ err_init_queue:
+	destroy_queue(pl022);
+	free_irq(adev->irq[0], pl022);
+ err_no_irq:
+	clk_put(pl022->clk);
+ err_no_clk:
+	iounmap(pl022->virtbase);
+ err_no_ioremap:
+	amba_release_regions(adev);
+ err_no_ioregion:
+	spi_master_put(master);
+ err_no_master:
+ err_no_pdata:
+	return status;
+}
+
+static int __exit
+pl022_remove(struct amba_device *adev)
+{
+	struct pl022 *pl022 = amba_get_drvdata(adev);
+	int status = 0;
+	if (!pl022)
+		return 0;
+
+	/* Remove the queue */
+	status = destroy_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev,
+			"queue remove failed (%d)\n", status);
+		return status;
+	}
+	load_ssp_default_config(pl022);
+	free_irq(adev->irq[0], pl022);
+	clk_disable(pl022->clk);
+	clk_put(pl022->clk);
+	iounmap(pl022->virtbase);
+	amba_release_regions(adev);
+	tasklet_disable(&pl022->pump_transfers);
+	spi_unregister_master(pl022->master);
+	spi_master_put(pl022->master);
+	amba_set_drvdata(adev, NULL);
+	dev_dbg(&adev->dev, "remove succeded\n");
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int pl022_suspend(struct amba_device *adev, pm_message_t state)
+{
+	struct pl022 *pl022 = amba_get_drvdata(adev);
+	int status = 0;
+
+	status = stop_queue(pl022);
+	if (status) {
+		dev_warn(&adev->dev, "suspend cannot stop queue\n");
+		return status;
+	}
+
+	clk_enable(pl022->clk);
+	load_ssp_default_config(pl022);
+	clk_disable(pl022->clk);
+	dev_dbg(&adev->dev, "suspended\n");
+	return 0;
+}
+
+static int pl022_resume(struct amba_device *adev)
+{
+	struct pl022 *pl022 = amba_get_drvdata(adev);
+	int status = 0;
+
+	/* Start the queue running */
+	status = start_queue(pl022);
+	if (status)
+		dev_err(&adev->dev, "problem starting queue (%d)\n", status);
+	else
+		dev_dbg(&adev->dev, "resumed\n");
+
+	return status;
+}
+#else
+#define pl022_suspend NULL
+#define pl022_resume NULL
+#endif	/* CONFIG_PM */
+
+static struct vendor_data vendor_arm = {
+	.fifodepth = 8,
+	.max_bpw = 16,
+	.unidir = false,
+};
+
+
+static struct vendor_data vendor_st = {
+	.fifodepth = 32,
+	.max_bpw = 32,
+	.unidir = false,
+};
+
+static struct amba_id pl022_ids[] = {
+	{
+		/*
+		 * ARM PL022 variant, this has a 16bit wide
+		 * and 8 locations deep TX/RX FIFO
+		 */
+		.id	= 0x00041022,
+		.mask	= 0x000fffff,
+		.data	= &vendor_arm,
+	},
+	{
+		/*
+		 * ST Micro derivative, this has 32bit wide
+		 * and 32 locations deep TX/RX FIFO
+		 */
+		.id	= 0x00108022,
+		.mask	= 0xffffffff,
+		.data	= &vendor_st,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl022_driver = {
+	.drv = {
+		.name	= "ssp-pl022",
+	},
+	.id_table	= pl022_ids,
+	.probe		= pl022_probe,
+	.remove		= __exit_p(pl022_remove),
+	.suspend        = pl022_suspend,
+	.resume         = pl022_resume,
+};
+
+
+static int __init pl022_init(void)
+{
+	return amba_driver_register(&pl022_driver);
+}
+
+module_init(pl022_init);
+
+static void __exit pl022_exit(void)
+{
+	amba_driver_unregister(&pl022_driver);
+}
+
+module_exit(pl022_exit);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
+MODULE_DESCRIPTION("PL022 SSP Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
new file mode 100644
index 0000000..dcad0ff
--- /dev/null
+++ b/include/linux/amba/pl022.h
@@ -0,0 +1,264 @@
+/*
+ * include/linux/amba/pl022.h
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * Copyright (C) 2006 STMicroelectronics Pvt. Ltd.
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * Initial version inspired by:
+ *	linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c
+ * Initial adoption to PL022 by:
+ *      Sachin Verma <sachin.verma@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _SSP_PL022_H
+#define _SSP_PL022_H
+
+#include <linux/device.h>
+
+/**
+ * whether SSP is in loopback mode or not
+ */
+enum ssp_loopback {
+	LOOPBACK_DISABLED,
+	LOOPBACK_ENABLED
+};
+
+/**
+ * enum ssp_interface - interfaces allowed for this SSP Controller
+ * @SSP_INTERFACE_MOTOROLA_SPI: Motorola Interface
+ * @SSP_INTERFACE_TI_SYNC_SERIAL: Texas Instrument Synchronous Serial
+ * interface
+ * @SSP_INTERFACE_NATIONAL_MICROWIRE: National Semiconductor Microwire
+ * interface
+ * @SSP_INTERFACE_UNIDIRECTIONAL: Unidirectional interface (STn8810
+ * &STn8815 only)
+ */
+enum ssp_interface {
+	SSP_INTERFACE_MOTOROLA_SPI,
+	SSP_INTERFACE_TI_SYNC_SERIAL,
+	SSP_INTERFACE_NATIONAL_MICROWIRE,
+	SSP_INTERFACE_UNIDIRECTIONAL
+};
+
+/**
+ * enum ssp_hierarchy - whether SSP is configured as Master or Slave
+ */
+enum ssp_hierarchy {
+	SSP_MASTER,
+	SSP_SLAVE
+};
+
+/**
+ * enum ssp_clock_params - clock parameters, to set SSP clock at a
+ * desired freq
+ */
+struct ssp_clock_params {
+	u8 cpsdvsr; /* value from 2 to 254 (even only!) */
+	u8 scr;	    /* value from 0 to 255 */
+};
+
+/**
+ * enum ssp_rx_endian - endianess of Rx FIFO Data
+ */
+enum ssp_rx_endian {
+	SSP_RX_MSB,
+	SSP_RX_LSB
+};
+
+/**
+ * enum ssp_tx_endian - endianess of Tx FIFO Data
+ */
+enum ssp_tx_endian {
+	SSP_TX_MSB,
+	SSP_TX_LSB
+};
+
+/**
+ * enum ssp_data_size - number of bits in one data element
+ */
+enum ssp_data_size {
+	SSP_DATA_BITS_4 = 0x03, SSP_DATA_BITS_5, SSP_DATA_BITS_6,
+	SSP_DATA_BITS_7, SSP_DATA_BITS_8, SSP_DATA_BITS_9,
+	SSP_DATA_BITS_10, SSP_DATA_BITS_11, SSP_DATA_BITS_12,
+	SSP_DATA_BITS_13, SSP_DATA_BITS_14, SSP_DATA_BITS_15,
+	SSP_DATA_BITS_16, SSP_DATA_BITS_17, SSP_DATA_BITS_18,
+	SSP_DATA_BITS_19, SSP_DATA_BITS_20, SSP_DATA_BITS_21,
+	SSP_DATA_BITS_22, SSP_DATA_BITS_23, SSP_DATA_BITS_24,
+	SSP_DATA_BITS_25, SSP_DATA_BITS_26, SSP_DATA_BITS_27,
+	SSP_DATA_BITS_28, SSP_DATA_BITS_29, SSP_DATA_BITS_30,
+	SSP_DATA_BITS_31, SSP_DATA_BITS_32
+};
+
+/**
+ * enum ssp_mode - SSP mode of operation (Communication modes)
+ */
+enum ssp_mode {
+	INTERRUPT_TRANSFER,
+	POLLING_TRANSFER,
+	DMA_TRANSFER
+};
+
+/**
+ * enum ssp_rx_level_trig - receive FIFO watermark level which triggers
+ * IT: Interrupt fires when _N_ or more elements in RX FIFO.
+ */
+enum ssp_rx_level_trig {
+	SSP_RX_1_OR_MORE_ELEM,
+	SSP_RX_4_OR_MORE_ELEM,
+	SSP_RX_8_OR_MORE_ELEM,
+	SSP_RX_16_OR_MORE_ELEM,
+	SSP_RX_32_OR_MORE_ELEM
+};
+
+/**
+ * Transmit FIFO watermark level which triggers (IT Interrupt fires
+ * when _N_ or more empty locations in TX FIFO)
+ */
+enum ssp_tx_level_trig {
+	SSP_TX_1_OR_MORE_EMPTY_LOC,
+	SSP_TX_4_OR_MORE_EMPTY_LOC,
+	SSP_TX_8_OR_MORE_EMPTY_LOC,
+	SSP_TX_16_OR_MORE_EMPTY_LOC,
+	SSP_TX_32_OR_MORE_EMPTY_LOC
+};
+
+/**
+ * enum SPI Clock Phase - clock phase (Motorola SPI interface only)
+ * @SSP_CLK_RISING_EDGE: Receive data on rising edge
+ * @SSP_CLK_FALLING_EDGE: Receive data on falling edge
+ */
+enum ssp_spi_clk_phase {
+	SSP_CLK_RISING_EDGE,
+	SSP_CLK_FALLING_EDGE
+};
+
+/**
+ * enum SPI Clock Polarity - clock polarity (Motorola SPI interface only)
+ * @SSP_CLK_POL_IDLE_LOW: Low inactive level
+ * @SSP_CLK_POL_IDLE_HIGH: High inactive level
+ */
+enum ssp_spi_clk_pol {
+	SSP_CLK_POL_IDLE_LOW,
+	SSP_CLK_POL_IDLE_HIGH
+};
+
+/**
+ * Microwire Conrol Lengths Command size in microwire format
+ */
+enum ssp_microwire_ctrl_len {
+	SSP_BITS_4 = 0x03, SSP_BITS_5, SSP_BITS_6,
+	SSP_BITS_7, SSP_BITS_8, SSP_BITS_9,
+	SSP_BITS_10, SSP_BITS_11, SSP_BITS_12,
+	SSP_BITS_13, SSP_BITS_14, SSP_BITS_15,
+	SSP_BITS_16, SSP_BITS_17, SSP_BITS_18,
+	SSP_BITS_19, SSP_BITS_20, SSP_BITS_21,
+	SSP_BITS_22, SSP_BITS_23, SSP_BITS_24,
+	SSP_BITS_25, SSP_BITS_26, SSP_BITS_27,
+	SSP_BITS_28, SSP_BITS_29, SSP_BITS_30,
+	SSP_BITS_31, SSP_BITS_32
+};
+
+/**
+ * enum Microwire Wait State
+ * @SSP_MWIRE_WAIT_ZERO: No wait state inserted after last command bit
+ * @SSP_MWIRE_WAIT_ONE: One wait state inserted after last command bit
+ */
+enum ssp_microwire_wait_state {
+	SSP_MWIRE_WAIT_ZERO,
+	SSP_MWIRE_WAIT_ONE
+};
+
+/**
+ * enum Microwire - whether Full/Half Duplex
+ * @SSP_MICROWIRE_CHANNEL_FULL_DUPLEX: SSPTXD becomes bi-directional,
+ *     SSPRXD not used
+ * @SSP_MICROWIRE_CHANNEL_HALF_DUPLEX: SSPTXD is an output, SSPRXD is
+ *     an input.
+ */
+enum ssp_duplex {
+	SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
+	SSP_MICROWIRE_CHANNEL_HALF_DUPLEX
+};
+
+/**
+ * CHIP select/deselect commands
+ */
+enum ssp_chip_select {
+	SSP_CHIP_SELECT,
+	SSP_CHIP_DESELECT
+};
+
+
+/**
+ * struct pl022_ssp_master - device.platform_data for SPI controller devices.
+ * @num_chipselect: chipselects are used to distinguish individual
+ *     SPI slaves, and are numbered from zero to num_chipselects - 1.
+ *     each slave has a chipselect signal, but it's common that not
+ *     every chipselect is connected to a slave.
+ * @enable_dma: if true enables DMA driven transfers.
+ */
+struct pl022_ssp_controller {
+	u16 bus_id;
+	u8 num_chipselect;
+	u8 enable_dma:1;
+};
+
+/**
+ * struct ssp_config_chip - spi_board_info.controller_data for SPI
+ * slave devices, copied to spi_device.controller_data.
+ *
+ * @lbm: used for test purpose to internally connect RX and TX
+ * @iface: Interface type(Motorola, TI, Microwire, Universal)
+ * @hierarchy: sets whether interface is master or slave
+ * @slave_tx_disable: SSPTXD is disconnected (in slave mode only)
+ * @clk_freq: Tune freq parameters of SSP(when in master mode)
+ * @endian_rx: Endianess of Data in Rx FIFO
+ * @endian_tx: Endianess of Data in Tx FIFO
+ * @data_size: Width of data element(4 to 32 bits)
+ * @com_mode: communication mode: polling, Interrupt or DMA
+ * @rx_lev_trig: Rx FIFO watermark level (for IT & DMA mode)
+ * @tx_lev_trig: Tx FIFO watermark level (for IT & DMA mode)
+ * @clk_phase: Motorola SPI interface Clock phase
+ * @clk_pol: Motorola SPI interface Clock polarity
+ * @ctrl_len: Microwire interface: Control length
+ * @wait_state: Microwire interface: Wait state
+ * @duplex: Microwire interface: Full/Half duplex
+ * @cs_control: function pointer to board-specific function to
+ * assert/deassert I/O port to control HW generation of devices chip-select.
+ * @dma_xfer_type: Type of DMA xfer (Mem-to-periph or Periph-to-Periph)
+ * @dma_config: DMA configuration for SSP controller and peripheral
+ */
+struct pl022_config_chip {
+	struct device *dev;
+	enum ssp_loopback lbm;
+	enum ssp_interface iface;
+	enum ssp_hierarchy hierarchy;
+	bool slave_tx_disable;
+	struct ssp_clock_params clk_freq;
+	enum ssp_rx_endian endian_rx;
+	enum ssp_tx_endian endian_tx;
+	enum ssp_data_size data_size;
+	enum ssp_mode com_mode;
+	enum ssp_rx_level_trig rx_lev_trig;
+	enum ssp_tx_level_trig tx_lev_trig;
+	enum ssp_spi_clk_phase clk_phase;
+	enum ssp_spi_clk_pol clk_pol;
+	enum ssp_microwire_ctrl_len ctrl_len;
+	enum ssp_microwire_wait_state wait_state;
+	enum ssp_duplex duplex;
+	void (*cs_control) (u32 control);
+};
+
+#endif /* _SSP_PL022_H */
-- 
cgit v0.10.2


From 0aacfe1d239952a87c17e38e12df94c4ea979839 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 1 Jun 2009 18:17:53 +0100
Subject: [ARM] Make ARM_VIC_NR depend on ARM_VIC

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 38518d4..08f2786 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -7,6 +7,7 @@ config ARM_VIC
 config ARM_VIC_NR
 	int
 	default 2
+	depends on ARM_VIC
 	help
 	  The maximum number of VICs available in the system, for
 	  power management.
-- 
cgit v0.10.2


From 0de51088e6a82bc8413d3ca9e28bbca2788b5b53 Mon Sep 17 00:00:00 2001
From: Harald Welte <HaraldWelte@viatech.com>
Date: Mon, 8 Jun 2009 18:27:54 +0800
Subject: CPUFREQ: Enable acpi-cpufreq driver for VIA/Centaur CPUs

The VIA/Centaur C7, C7-M and Nano CPU's all support ACPI based cpu p-states
using a MSR interface.  The Linux driver just never made use of it, since in
addition to the check for the EST flag it also checked if the vendor is Intel.

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
[ Removed the vendor checks entirely  - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 752e8c6..ae9b5032 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -90,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid)
 {
 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
 
-	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
-	    !cpu_has(cpu, X86_FEATURE_EST))
-		return 0;
-
-	return 1;
+	return cpu_has(cpu, X86_FEATURE_EST);
 }
 
 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
-- 
cgit v0.10.2


From 0fea615e526b4b7eff0363ee02d5753e5f924089 Mon Sep 17 00:00:00 2001
From: Harald Welte <HaraldWelte@viatech.com>
Date: Mon, 8 Jun 2009 18:29:36 +0800
Subject: CPUFREQ: Mark e_powersaver driver as EXPERIMENTAL and DANGEROUS

The e_powersaver driver for VIA's C7 CPU's needs to be marked as
DANGEROUS as it configures the CPU to power states that are out
of specification.

According to Centaur, all systems with C7 and Nano CPU's support
the ACPI p-state method.  Thus, the acpi-cpufreq driver should
be used instead.

Signed-off-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 52c8398..f138c6c 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -220,11 +220,14 @@ config X86_LONGHAUL
 	  If in doubt, say N.
 
 config X86_E_POWERSAVER
-	tristate "VIA C7 Enhanced PowerSaver"
+	tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
 	select CPU_FREQ_TABLE
-	depends on X86_32
+	depends on X86_32 && EXPERIMENTAL
 	help
-	  This adds the CPUFreq driver for VIA C7 processors.
+	  This adds the CPUFreq driver for VIA C7 processors.  However, this driver
+	  does not have any safeguards to prevent operating the CPU out of spec
+	  and is thus considered dangerous.  Please use the regular ACPI cpufreq
+	  driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
 
 	  If in doubt, say N.
 
-- 
cgit v0.10.2


From 511b01bdf64ad8a38414096eab283c7784aebfc4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Jun 2009 00:32:00 +0200
Subject: Revert "x86, bts: reenable ptrace branch trace support"

This reverts commit 7e0bfad24d85de7cf2202a7b0ce51de11a077b21.

A late objection to the ABI has arrived:

   http://lkml.org/lkml/2009/6/10/253

Keep the ABI disabled out of caution, to not create premature
user-space expectations.

While the hw-branch-tracing variant uses and tests the BTS code.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Markus Metzger <markus.t.metzger@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 924e156..8130334 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -506,6 +506,7 @@ config X86_PTRACE_BTS
 	bool "Branch Trace Store"
 	default y
 	depends on X86_DEBUGCTLMSR
+	depends on BROKEN
 	---help---
 	  This adds a ptrace interface to the hardware's branch trace store.
 
-- 
cgit v0.10.2


From fd0fb038d5a308c7faddd1701be5e70aaffec98b Mon Sep 17 00:00:00 2001
From: Shin Hong <hongshin@gmail.com>
Date: Wed, 10 Jun 2009 20:11:29 -0400
Subject: Btrfs: init worker struct fields before kthread-run

This patch fixes a bug which may result race condition
between btrfs_start_workers() and worker_loop().

btrfs_start_workers() executed in a parent thread writes
on workers->worker and worker_loop() in a child thread
reads workers->worker. However, there is no synchronization
enforcing the order of two operations.

This patch makes btrfs_start_workers() fill workers->worker
before it starts a child thread with worker_loop()

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 502c3d6..7f88628 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -294,10 +294,10 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
 		INIT_LIST_HEAD(&worker->worker_list);
 		spin_lock_init(&worker->lock);
 		atomic_set(&worker->num_pending, 0);
+		worker->workers = workers;
 		worker->task = kthread_run(worker_loop, worker,
 					   "btrfs-%s-%d", workers->name,
 					   workers->num_workers + i);
-		worker->workers = workers;
 		if (IS_ERR(worker->task)) {
 			kfree(worker);
 			ret = PTR_ERR(worker->task);
-- 
cgit v0.10.2


From 66fff22483d8542dfb4d61a28d21277bbde321e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 22:53:37 +0200
Subject: perf_counter: Annotate exit ctx recursion

Ever since Paul fixed it to unclone the context before taking the
ctx->lock this became a false positive, annotate it away.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 51c571e..ae591a1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3879,7 +3879,18 @@ void perf_counter_exit_task(struct task_struct *child)
 	spin_unlock(&child_ctx->lock);
 	local_irq_restore(flags);
 
-	mutex_lock(&child_ctx->mutex);
+	/*
+	 * We can recurse on the same lock type through:
+	 *
+	 *   __perf_counter_exit_task()
+	 *     sync_child_counter()
+	 *       fput(parent_counter->filp)
+	 *         perf_release()
+	 *           mutex_lock(&ctx->mutex)
+	 *
+	 * But since its the parent context it won't be the same instance.
+	 */
+	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
 
 again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
-- 
cgit v0.10.2


From ea1900e571d40a3ce60c835c2f21e1fd8c5cb663 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 21:45:22 +0200
Subject: perf_counter tools: Normalize data using per sample period data

When we use variable period sampling, add the period to the sample
data and use that to normalize the samples.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c10553c..919f23c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -350,8 +350,9 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	struct perf_counter_attr *attr = attrs + counter;
 	int track = 1;
 
-	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 	if (freq) {
+		attr->sample_type	|= PERF_SAMPLE_PERIOD;
 		attr->freq		= 1;
 		attr->sample_freq	= freq;
 	}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9a0e31e..f57fd5c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -456,7 +456,7 @@ struct hist_entry {
 	uint64_t	 ip;
 	char		 level;
 
-	uint32_t	 count;
+	uint64_t	 count;
 };
 
 /*
@@ -726,7 +726,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, uint64_t ip, char level)
+		struct symbol *sym, uint64_t ip, char level, uint64_t count)
 {
 	struct rb_node **p = &hist.rb_node;
 	struct rb_node *parent = NULL;
@@ -738,7 +738,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		.sym	= sym,
 		.ip	= ip,
 		.level	= level,
-		.count	= 1,
+		.count	= count,
 	};
 	int cmp;
 
@@ -749,7 +749,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		cmp = hist_entry__cmp(&entry, he);
 
 		if (!cmp) {
-			he->count++;
+			he->count += count;
 			return 0;
 		}
 
@@ -942,15 +942,19 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	struct dso *dso = NULL;
 	struct thread *thread = threads__findnew(event->ip.pid);
 	uint64_t ip = event->ip.ip;
+	uint64_t period = 1;
 	struct map *map = NULL;
 
+	if (event->header.type & PERF_SAMPLE_PERIOD)
+		period = event->ip.period;
+
 	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
 		event->ip.pid,
 		(void *)(long)ip,
-		(long long)event->ip.period);
+		(long long)period);
 
 	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
@@ -1001,13 +1005,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 		if (dso)
 			sym = dso->find_symbol(dso, ip);
 
-		if (hist_entry__add(thread, map, dso, sym, ip, level)) {
+		if (hist_entry__add(thread, map, dso, sym, ip, level, period)) {
 			fprintf(stderr,
 		"problem incrementing symbol count, skipping event\n");
 			return -1;
 		}
 	}
-	total++;
+	total += period;
 
 	return 0;
 }
-- 
cgit v0.10.2


From df1a132bf3d3508f863336c80a27806a2ac947e0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 21:02:22 +0200
Subject: perf_counter: Introduce struct for sample data

For easy extension of the sample data, put it in a structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 4786ad9..5e0bf399 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -1001,7 +1001,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record) {
-		addr = 0;
+		struct perf_sample_data data = {
+			.regs = regs,
+			.addr = 0,
+		};
+
 		if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
 			/*
 			 * The user wants a data address recorded.
@@ -1016,9 +1020,9 @@ static void record_and_restart(struct perf_counter *counter, long val,
 			sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
 				POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
 			if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
-				addr = mfspr(SPRN_SDAR);
+				data.addr = mfspr(SPRN_SDAR);
 		}
-		if (perf_counter_overflow(counter, nmi, regs, addr)) {
+		if (perf_counter_overflow(counter, nmi, &data)) {
 			/*
 			 * Interrupts are coming too fast - throttle them
 			 * by setting the counter to 0, so it will be
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 240ca56..82a23d4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1173,11 +1173,14 @@ static void intel_pmu_reset(void)
  */
 static int intel_pmu_handle_irq(struct pt_regs *regs)
 {
+	struct perf_sample_data data;
 	struct cpu_hw_counters *cpuc;
-	struct cpu_hw_counters;
 	int bit, cpu, loops;
 	u64 ack, status;
 
+	data.regs = regs;
+	data.addr = 0;
+
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
@@ -1210,7 +1213,7 @@ again:
 		if (!intel_pmu_save_and_restart(counter))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, regs, 0))
+		if (perf_counter_overflow(counter, 1, &data))
 			intel_pmu_disable_counter(&counter->hw, bit);
 	}
 
@@ -1230,12 +1233,16 @@ again:
 
 static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
-	int cpu, idx, handled = 0;
+	struct perf_sample_data data;
 	struct cpu_hw_counters *cpuc;
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
+	int cpu, idx, handled = 0;
 	u64 val;
 
+	data.regs = regs;
+	data.addr = 0;
+
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
@@ -1256,7 +1263,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, regs, 0))
+		if (perf_counter_overflow(counter, 1, &data))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 282d8cc..d8c0eb4 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -605,8 +605,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_counter_context *ctx, int cpu);
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
-extern int perf_counter_overflow(struct perf_counter *counter,
-				 int nmi, struct pt_regs *regs, u64 addr);
+struct perf_sample_data {
+	struct pt_regs	*regs;
+	u64		addr;
+};
+
+extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
+				 struct perf_sample_data *data);
+
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ae591a1..4fe85e8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2378,8 +2378,8 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
 	return task_pid_nr_ns(p, counter->ns);
 }
 
-static void perf_counter_output(struct perf_counter *counter,
-				int nmi, struct pt_regs *regs, u64 addr)
+static void perf_counter_output(struct perf_counter *counter, int nmi,
+				struct perf_sample_data *data)
 {
 	int ret;
 	u64 sample_type = counter->attr.sample_type;
@@ -2404,10 +2404,10 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.size = sizeof(header);
 
 	header.misc = PERF_EVENT_MISC_OVERFLOW;
-	header.misc |= perf_misc_flags(regs);
+	header.misc |= perf_misc_flags(data->regs);
 
 	if (sample_type & PERF_SAMPLE_IP) {
-		ip = perf_instruction_pointer(regs);
+		ip = perf_instruction_pointer(data->regs);
 		header.type |= PERF_SAMPLE_IP;
 		header.size += sizeof(ip);
 	}
@@ -2460,7 +2460,7 @@ static void perf_counter_output(struct perf_counter *counter,
 	}
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		callchain = perf_callchain(regs);
+		callchain = perf_callchain(data->regs);
 
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
@@ -2486,7 +2486,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		perf_output_put(&handle, time);
 
 	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(&handle, addr);
+		perf_output_put(&handle, data->addr);
 
 	if (sample_type & PERF_SAMPLE_ID)
 		perf_output_put(&handle, counter->id);
@@ -2950,8 +2950,8 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
  * Generic counter overflow handling.
  */
 
-int perf_counter_overflow(struct perf_counter *counter,
-			  int nmi, struct pt_regs *regs, u64 addr)
+int perf_counter_overflow(struct perf_counter *counter, int nmi,
+			  struct perf_sample_data *data)
 {
 	int events = atomic_read(&counter->event_limit);
 	int throttle = counter->pmu->unthrottle != NULL;
@@ -3005,7 +3005,7 @@ int perf_counter_overflow(struct perf_counter *counter,
 			perf_counter_disable(counter);
 	}
 
-	perf_counter_output(counter, nmi, regs, addr);
+	perf_counter_output(counter, nmi, data);
 	return ret;
 }
 
@@ -3054,24 +3054,25 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
 	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
 	struct perf_counter *counter;
-	struct pt_regs *regs;
 	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
 
-	regs = get_irq_regs();
+	data.addr = 0;
+	data.regs = get_irq_regs();
 	/*
 	 * In case we exclude kernel IPs or are somehow not in interrupt
 	 * context, provide the next best thing, the user IP.
 	 */
-	if ((counter->attr.exclude_kernel || !regs) &&
+	if ((counter->attr.exclude_kernel || !data.regs) &&
 			!counter->attr.exclude_user)
-		regs = task_pt_regs(current);
+		data.regs = task_pt_regs(current);
 
-	if (regs) {
-		if (perf_counter_overflow(counter, 0, regs, 0))
+	if (data.regs) {
+		if (perf_counter_overflow(counter, 0, &data))
 			ret = HRTIMER_NORESTART;
 	}
 
@@ -3084,9 +3085,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 static void perf_swcounter_overflow(struct perf_counter *counter,
 				    int nmi, struct pt_regs *regs, u64 addr)
 {
+	struct perf_sample_data data = {
+		.regs = regs,
+		.addr = addr,
+	};
+
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	if (perf_counter_overflow(counter, nmi, regs, addr))
+	if (perf_counter_overflow(counter, nmi, &data))
 		/* soft-disable the counter */
 		;
 
-- 
cgit v0.10.2


From 9e350de37ac9607012fcf9c5314a28fbddf8f43c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 21:34:59 +0200
Subject: perf_counter: Accurate period data

We currently log hw.sample_period for PERF_SAMPLE_PERIOD, however this is
incorrect. When we adjust the period, it will only take effect the next
cycle but report it for the current cycle. So when we adjust the period
for every cycle, we're always wrong.

Solve this by keeping track of the last_period.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5e0bf399..4990ce2 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -767,6 +767,7 @@ static void power_pmu_unthrottle(struct perf_counter *counter)
 	perf_disable();
 	power_pmu_read(counter);
 	left = counter->hw.sample_period;
+	counter->hw.last_period = left;
 	val = 0;
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
@@ -937,7 +938,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw.sample_period);
+	counter->hw.last_period = counter->hw.sample_period;
+	atomic64_set(&counter->hw.period_left, counter->hw.last_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -1002,8 +1004,9 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	if (record) {
 		struct perf_sample_data data = {
-			.regs = regs,
-			.addr = 0,
+			.regs	= regs,
+			.addr	= 0,
+			.period	= counter->hw.last_period,
 		};
 
 		if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 82a23d4..57ae1be 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -698,6 +698,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!hwc->sample_period) {
 		hwc->sample_period = x86_pmu.max_period;
+		hwc->last_period = hwc->sample_period;
 		atomic64_set(&hwc->period_left, hwc->sample_period);
 	}
 
@@ -880,12 +881,14 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	if (unlikely(left <= -period)) {
 		left = period;
 		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
 		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
 		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
 		ret = 1;
 	}
 	/*
@@ -1257,9 +1260,12 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
 			continue;
 
-		/* counter overflow */
-		handled = 1;
-		inc_irq_stat(apic_perf_irqs);
+		/*
+		 * counter overflow
+		 */
+		handled		= 1;
+		data.period	= counter->hw.last_period;
+
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
@@ -1267,6 +1273,9 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
 	return handled;
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d8c0eb4..5b96647 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -366,6 +366,7 @@ struct hw_perf_counter {
 	};
 	atomic64_t			prev_count;
 	u64				sample_period;
+	u64				last_period;
 	atomic64_t			period_left;
 	u64				interrupts;
 
@@ -606,8 +607,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 struct perf_sample_data {
-	struct pt_regs	*regs;
-	u64		addr;
+	struct pt_regs		*regs;
+	u64			addr;
+	u64			period;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4fe85e8..8b89b40 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2495,7 +2495,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		perf_output_put(&handle, cpu_entry);
 
 	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(&handle, counter->hw.sample_period);
+		perf_output_put(&handle, data->period);
 
 	/*
 	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
@@ -3040,11 +3040,13 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 	if (unlikely(left <= -period)) {
 		left = period;
 		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
 		atomic64_add(period, &hwc->period_left);
+		hwc->last_period = period;
 	}
 
 	atomic64_set(&hwc->prev_count, -left);
@@ -3086,8 +3088,9 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 				    int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data = {
-		.regs = regs,
-		.addr = addr,
+		.regs	= regs,
+		.addr	= addr,
+		.period	= counter->hw.last_period,
 	};
 
 	perf_swcounter_update(counter);
-- 
cgit v0.10.2


From 955f2d966534803ec32411086a1698170f17f962 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 10 Jun 2009 21:32:01 +0200
Subject: ALSA: snd_usb_caiaq: set mixername

alsamixer and friends want the mixername to be set. Even though the
driver does not exports a real mixer device, export the name doesn't
harm.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c
index b9a2b31..2240624 100644
--- a/sound/usb/caiaq/device.c
+++ b/sound/usb/caiaq/device.c
@@ -35,7 +35,7 @@
 #include "input.h"
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
-MODULE_DESCRIPTION("caiaq USB audio, version 1.3.15");
+MODULE_DESCRIPTION("caiaq USB audio, version 1.3.16");
 MODULE_LICENSE("GPL");
 MODULE_SUPPORTED_DEVICE("{{Native Instruments, RigKontrol2},"
 			 "{Native Instruments, RigKontrol3},"
@@ -426,6 +426,7 @@ static int __devinit init_card(struct snd_usb_caiaqdev *dev)
 
 	strlcpy(card->driver, MODNAME, sizeof(card->driver));
 	strlcpy(card->shortname, dev->product_name, sizeof(card->shortname));
+	strlcpy(card->mixername, dev->product_name, sizeof(card->mixername));
 
 	/* if the id was not passed as module option, fill it with a shortened
 	 * version of the product string which does not contain any
-- 
cgit v0.10.2


From 7f72b47cedd1efc301576ff1050ec0a26c57eb48 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Tue, 28 Apr 2009 14:13:21 +1000
Subject: m68knommu: fix system reset for ColdFire 527x family

The sofwtare reset control for the 527x ColdFire family is based on
the same Reset Control Unit as the 528x ColdFire family. So use the
same reset code for both.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/system_no.h b/arch/m68k/include/asm/system_no.h
index 4496c0a..5fbc96d 100644
--- a/arch/m68k/include/asm/system_no.h
+++ b/arch/m68k/include/asm/system_no.h
@@ -264,18 +264,18 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	: /* No output */		\
 	: "o" (*(char *)MCF_MBAR) );	\
 })
-#elif defined(CONFIG_M528x)
+#elif defined(CONFIG_M528x) || defined(CONFIG_M527x)
 /*
- * The MCF528x has a bit (SOFTRST) in memory (Reset Control Register RCR),
- * that when set, resets the MCF528x.
+ * Most of the newer ColdFire family members have a proper RESET unit.
+ * Use the software reset control bit in the Reset Control Register (RCR).
  */
 #define HARD_RESET_NOW() \
-({						\
-	unsigned char volatile *reset;		\
-	asm("move.w	#0x2700, %sr");		\
+({									\
+	unsigned char volatile *reset;					\
+	asm("move.w #0x2700, %sr");					\
 	reset = ((volatile unsigned char *)(MCF_IPSBAR + 0x110000));	\
-	while(1)				\
-	*reset |= (0x01 << 7);\
+	while (1)							\
+		*reset |= (0x01 << 7);					\
 })
 #elif defined(CONFIG_M523x)
 #define HARD_RESET_NOW() ({		\
-- 
cgit v0.10.2


From 293ca0f75415dd8373c716bf9755569daca7ba5d Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Tue, 28 Apr 2009 16:56:03 +1000
Subject: m68knommu: merge system reset for code ColdFire 523x family

The sofwtare reset control code for the 523x ColdFire family uses the
same Reset unit hardware as the 527x and 528x ColdFire parts. So they
should all use the same code. Merge them.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/system_no.h b/arch/m68k/include/asm/system_no.h
index 5fbc96d..a0a1ae8 100644
--- a/arch/m68k/include/asm/system_no.h
+++ b/arch/m68k/include/asm/system_no.h
@@ -264,7 +264,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	: /* No output */		\
 	: "o" (*(char *)MCF_MBAR) );	\
 })
-#elif defined(CONFIG_M528x) || defined(CONFIG_M527x)
+#elif defined(CONFIG_M523x) || defined(CONFIG_M528x) || defined(CONFIG_M527x)
 /*
  * Most of the newer ColdFire family members have a proper RESET unit.
  * Use the software reset control bit in the Reset Control Register (RCR).
@@ -275,16 +275,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 	asm("move.w #0x2700, %sr");					\
 	reset = ((volatile unsigned char *)(MCF_IPSBAR + 0x110000));	\
 	while (1)							\
-		*reset |= (0x01 << 7);					\
-})
-#elif defined(CONFIG_M523x)
-#define HARD_RESET_NOW() ({		\
-	asm("				\
-	movew #0x2700, %sr;		\
-	movel #0x01000000, %sp;		\
-	moveal #0x40110000, %a0;	\
-	moveb #0x80, (%a0);		\
-	");				\
+		*reset |= 0x80;						\
 })
 #elif defined(CONFIG_M520x)
 	/*
-- 
cgit v0.10.2


From c18e52c7696e39c7cb23cd083bbda5652c338b79 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 17:03:09 +1000
Subject: m68knommu: add CPU reset code for the 5307 ColdFire

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c
index 44803bf..39da9e9 100644
--- a/arch/m68knommu/platform/5307/config.c
+++ b/arch/m68knommu/platform/5307/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -22,8 +21,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -119,6 +116,17 @@ void mcf_settimericr(unsigned int timer, unsigned int level)
 
 /***************************************************************************/
 
+void m5307_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -134,7 +142,7 @@ void __init config_BSP(char *commandp, int size)
 	mcf_timerlevel = 6;
 #endif
 
-	mach_reset = coldfire_reset;
+	mach_reset = m5307_cpu_reset;
 
 #ifdef CONFIG_BDM_DISABLE
 	/*
-- 
cgit v0.10.2


From eb7c874d5c81dae6d7b80857e9f0dab1a5d05ebd Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 17:15:18 +1000
Subject: m68knommu: add CPU reset code for the 5407 ColdFire

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c
index 0ee8c1a..b41d942 100644
--- a/arch/m68knommu/platform/5407/config.c
+++ b/arch/m68knommu/platform/5407/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,8 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -110,6 +107,17 @@ void mcf_settimericr(unsigned int timer, unsigned int level)
 
 /***************************************************************************/
 
+void m5407_cpu_reset(void)
+{
+	local_irq_disable();
+	/* set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -121,7 +129,7 @@ void __init config_BSP(char *commandp, int size)
 	mcf_timerlevel = 6;
 #endif
 
-	mach_reset = coldfire_reset;
+	mach_reset = m5407_cpu_reset;
 }
 
 /***************************************************************************/
-- 
cgit v0.10.2


From 851377bca613175473b6dafd6c7d1bfbcf24efe2 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 17:22:05 +1000
Subject: m68knommu: add CPU reset code for the 5206 ColdFire

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c
index 53a5920..f6f7987 100644
--- a/arch/m68knommu/platform/5206/config.c
+++ b/arch/m68knommu/platform/5206/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,10 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m5206_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -109,10 +104,21 @@ void mcf_settimericr(unsigned int timer, unsigned int level)
 
 /***************************************************************************/
 
+void m5206_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
-	mach_reset = coldfire_reset;
+	mach_reset = m5206_cpu_reset;
 }
 
 /***************************************************************************/
-- 
cgit v0.10.2


From b61a7260ff70d10ca27edcc74535af0d46a277fa Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 17:22:56 +1000
Subject: m68knommu: add CPU reset code for the 5206e ColdFire

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c
index db90254..65887799 100644
--- a/arch/m68knommu/platform/5206e/config.c
+++ b/arch/m68knommu/platform/5206e/config.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,10 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m5206e_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -109,6 +104,17 @@ void mcf_settimericr(unsigned int timer, unsigned int level)
 
 /***************************************************************************/
 
+void m5206e_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -119,7 +125,7 @@ void __init config_BSP(char *commandp, int size)
 	commandp[size-1] = 0;
 #endif /* CONFIG_NETtel */
 
-	mach_reset = coldfire_reset;
+	mach_reset = m5206e_cpu_reset;
 }
 
 /***************************************************************************/
-- 
cgit v0.10.2


From 6f5aa7ce38dfb9c04eecac1222fc4b2521f02b83 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 17:31:19 +1000
Subject: m68knommu: add CPU reset code for the 5249 ColdFire

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c
index 9eab19d..93d9988 100644
--- a/arch/m68knommu/platform/5249/config.c
+++ b/arch/m68knommu/platform/5249/config.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -20,10 +19,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m5249_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -106,10 +101,21 @@ void mcf_settimericr(unsigned int timer, unsigned int level)
 
 /***************************************************************************/
 
+void m5249_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to soft reset, and enabled */
+	__raw_writeb(0xc0, MCF_MBAR + MCFSIM_SYPCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
-	mach_reset = coldfire_reset;
+	mach_reset = m5249_cpu_reset;
 }
 
 /***************************************************************************/
-- 
cgit v0.10.2


From 384feb91319766298c6358f23f54873d44c9d8cb Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 22:03:43 +1000
Subject: m68knommu: add CPU reset code for the 532x ColdFire

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/m532xsim.h b/arch/m68k/include/asm/m532xsim.h
index ce60345..eb7fd44 100644
--- a/arch/m68k/include/asm/m532xsim.h
+++ b/arch/m68k/include/asm/m532xsim.h
@@ -127,6 +127,18 @@
 
 /*********************************************************************
  *
+ * Reset Controller Module
+ *
+ *********************************************************************/
+
+#define	MCF_RCR			0xFC0A0000
+#define	MCF_RSR			0xFC0A0001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
+/*********************************************************************
+ *
  * Inter-IC (I2C) Module
  *
  *********************************************************************/
diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c
index 591f2f8..cdb7619 100644
--- a/arch/m68knommu/platform/532x/config.c
+++ b/arch/m68knommu/platform/532x/config.c
@@ -31,8 +31,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -164,6 +162,14 @@ void mcf_settimericr(unsigned int timer, unsigned int level)
 
 /***************************************************************************/
 
+static void m532x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_RCR);
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_setimr(MCFSIM_IMR_MASKALL);
@@ -181,7 +187,7 @@ void __init config_BSP(char *commandp, int size)
 
 	mcf_timervector = 64+32;
 	mcf_profilevector = 64+33;
-	mach_reset = coldfire_reset;
+	mach_reset = m532x_cpu_reset;
 
 #ifdef CONFIG_BDM_DISABLE
 	/*
-- 
cgit v0.10.2


From 25ce4a908abebf8c7d2e89908d36050e1de9cbec Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 22:39:50 +1000
Subject: m68knommu: move CPU reset code for the 520x ColdFire into its
 platform code

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h
index 49d016e..83bbcfd 100644
--- a/arch/m68k/include/asm/m520xsim.h
+++ b/arch/m68k/include/asm/m520xsim.h
@@ -59,5 +59,14 @@
 #define MCFPIT_IMR		MCFINTC_IMRL
 #define MCFPIT_IMR_IBIT		(1 << MCFINT_PIT1)
 
+/*
+ *  Reset Controll Unit.
+ */
+#define	MCF_RCR			0xFC0A0000
+#define	MCF_RSR			0xFC0A0001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
 /****************************************************************************/
 #endif  /* m520xsim_h */
diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c
index 855fc6a..1c43a8a 100644
--- a/arch/m68knommu/platform/520x/config.c
+++ b/arch/m68knommu/platform/520x/config.c
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -23,10 +22,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m520x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -169,9 +164,17 @@ void mcf_autovector(unsigned int vec)
 
 /***************************************************************************/
 
+static void m520x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_RCR);
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
-	mach_reset = coldfire_reset;
+	mach_reset = m520x_cpu_reset;
 	m520x_uarts_init();
 	m520x_fec_init();
 }
-- 
cgit v0.10.2


From 55b33f316d25c1ffb13a65de7f846b950b5ef5a6 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 22:58:35 +1000
Subject: m68knommu: move CPU reset code for the 523x ColdFire into its
 platform code

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/m523xsim.h b/arch/m68k/include/asm/m523xsim.h
index bf39731..55183b5 100644
--- a/arch/m68k/include/asm/m523xsim.h
+++ b/arch/m68k/include/asm/m523xsim.h
@@ -41,5 +41,14 @@
 #define	MCFSIM_DACR1		0x50		/* SDRAM base address 1 */
 #define	MCFSIM_DMR1		0x54		/* SDRAM address mask 1 */
 
+/*
+ *  Reset Controll Unit (relative to IPSBAR).
+ */
+#define	MCF_RCR			0x110000
+#define	MCF_RSR			0x110001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
 /****************************************************************************/
 #endif	/* m523xsim_h */
diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c
index 74133f2..961fefe 100644
--- a/arch/m68knommu/platform/523x/config.c
+++ b/arch/m68knommu/platform/523x/config.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -24,10 +23,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m523x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -145,13 +140,20 @@ void mcf_autovector(unsigned int vec)
 {
 	/* Everything is auto-vectored on the 523x */
 }
+/***************************************************************************/
+
+static void m523x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_IPSBAR + MCF_RCR);
+}
 
 /***************************************************************************/
 
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_disableall();
-	mach_reset = coldfire_reset;
+	mach_reset = m523x_cpu_reset;
 	m523x_uarts_init();
 	m523x_fec_init();
 }
-- 
cgit v0.10.2


From 4c0b008d49e728735f54419e60446480017bfe1b Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 23:06:45 +1000
Subject: m68knommu: move CPU reset code for the 527x ColdFire into its
 platform code

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/m527xsim.h b/arch/m68k/include/asm/m527xsim.h
index 1f63ab3..95f4f8e 100644
--- a/arch/m68k/include/asm/m527xsim.h
+++ b/arch/m68k/include/asm/m527xsim.h
@@ -70,5 +70,14 @@
 #define UART2_ENABLE_MASK	0x3f00 
 #endif
 
+/*
+ *  Reset Controll Unit (relative to IPSBAR).
+ */
+#define	MCF_RCR			0x110000
+#define	MCF_RSR			0x110001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
+
 /****************************************************************************/
 #endif	/* m527xsim_h */
diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68knommu/platform/527x/config.c
index 428b159..f746439 100644
--- a/arch/m68knommu/platform/527x/config.c
+++ b/arch/m68knommu/platform/527x/config.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -24,10 +23,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m527x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -227,10 +222,18 @@ void mcf_autovector(unsigned int vec)
 
 /***************************************************************************/
 
+static void m527x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_IPSBAR + MCF_RCR);
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 	mcf_disableall();
-	mach_reset = coldfire_reset;
+	mach_reset = m527x_cpu_reset;
 	m527x_uarts_init();
 	m527x_fec_init();
 }
-- 
cgit v0.10.2


From dd65b1de553ddfd85e8fea9d3aa9db7479ccf2aa Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 23:15:56 +1000
Subject: m68knommu: move CPU reset code for the 528x ColdFire into its
 platform code

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/m528xsim.h b/arch/m68k/include/asm/m528xsim.h
index 28bf783..d79c49f 100644
--- a/arch/m68k/include/asm/m528xsim.h
+++ b/arch/m68k/include/asm/m528xsim.h
@@ -56,6 +56,14 @@
 #define MCF5282_INTC0_ICR17     (volatile u8 *) (MCF_IPSBAR + 0x0C51)
 
 
+/*
+ *  Reset Control Unit (relative to IPSBAR).
+ */
+#define	MCF_RCR			0x110000
+#define	MCF_RSR			0x110001
+
+#define	MCF_RCR_SWRESET		0x80		/* Software reset bit */
+#define	MCF_RCR_FRCSTOUT	0x40		/* Force external reset */
 
 /*********************************************************************
 *
diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68knommu/platform/528x/config.c
index bee526f..a1d1a61 100644
--- a/arch/m68knommu/platform/528x/config.c
+++ b/arch/m68knommu/platform/528x/config.c
@@ -31,10 +31,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
-/***************************************************************************/
-
 static struct mcf_platform_uart m528x_uart_platform[] = {
 	{
 		.mapbase	= MCF_MBAR + MCFUART_BASE1,
@@ -171,6 +167,14 @@ void mcf_autovector(unsigned int vec)
 
 /***************************************************************************/
 
+static void m528x_cpu_reset(void)
+{
+	local_irq_disable();
+	__raw_writeb(MCF_RCR_SWRESET, MCF_IPSBAR + MCF_RCR);
+}
+
+/***************************************************************************/
+
 #ifdef CONFIG_WILDFIRE
 void wildfire_halt(void)
 {
@@ -214,6 +218,7 @@ void __init config_BSP(char *commandp, int size)
 
 static int __init init_BSP(void)
 {
+	mach_reset = m528x_cpu_reset;
 	m528x_uarts_init();
 	m528x_fec_init();
 	platform_add_devices(m528x_devices, ARRAY_SIZE(m528x_devices));
-- 
cgit v0.10.2


From 05728aec8b05ec3fa859add8b22ba46432611e9e Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 30 Apr 2009 23:32:52 +1000
Subject: m68knommu: move CPU reset code for the 5272 ColdFire into its
 platform code

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c
index e049245..5f95fcd 100644
--- a/arch/m68knommu/platform/5272/config.c
+++ b/arch/m68knommu/platform/5272/config.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
@@ -21,8 +20,6 @@
 
 /***************************************************************************/
 
-void coldfire_reset(void);
-
 extern unsigned int mcf_timervector;
 extern unsigned int mcf_profilevector;
 extern unsigned int mcf_timerlevel;
@@ -170,6 +167,19 @@ void mcf_settimericr(int timer, int level)
 
 /***************************************************************************/
 
+static void m5272_cpu_reset(void)
+{
+	local_irq_disable();
+	/* Set watchdog to reset, and enabled */
+	__raw_writew(0, MCF_MBAR + MCFSIM_WIRR);
+	__raw_writew(1, MCF_MBAR + MCFSIM_WRRR);
+	__raw_writew(0, MCF_MBAR + MCFSIM_WCR);
+	for (;;)
+		/* wait for watchdog to timeout */;
+}
+
+/***************************************************************************/
+
 void __init config_BSP(char *commandp, int size)
 {
 #if defined (CONFIG_MOD5272)
@@ -194,7 +204,7 @@ void __init config_BSP(char *commandp, int size)
 
 	mcf_timervector = 69;
 	mcf_profilevector = 70;
-	mach_reset = coldfire_reset;
+	mach_reset = m5272_cpu_reset;
 }
 
 /***************************************************************************/
-- 
cgit v0.10.2


From fb29ad7949aeed3d464ba8d2c9772835f456d4c4 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Fri, 1 May 2009 16:09:17 +1000
Subject: m68knommu: remove obsolete reset code

All ColdFire and non-MMU 68k code has custom reset routines.
Remove the obsolete and now un-used reset macros.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/system_no.h b/arch/m68k/include/asm/system_no.h
index a0a1ae8..3c0718d 100644
--- a/arch/m68k/include/asm/system_no.h
+++ b/arch/m68k/include/asm/system_no.h
@@ -203,104 +203,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 #include <asm-generic/cmpxchg.h>
 #endif
 
-#if defined( CONFIG_M68328 ) || defined( CONFIG_M68EZ328 ) || \
-	defined (CONFIG_M68360) || defined( CONFIG_M68VZ328 )
-#define HARD_RESET_NOW() ({		\
-        local_irq_disable();		\
-        asm("				\
-        moveal #0x10c00000, %a0;	\
-        moveb #0, 0xFFFFF300;		\
-        moveal 0(%a0), %sp;		\
-        moveal 4(%a0), %a0;		\
-        jmp (%a0);			\
-        ");				\
-})
-#endif
-
-#ifdef CONFIG_COLDFIRE
-#if defined(CONFIG_M5272) && defined(CONFIG_NETtel)
-/*
- * Need to account for broken early mask of 5272 silicon. So don't
- * jump through the original start address. Jump strait into the
- * known start of the FLASH code.
- */
-#define HARD_RESET_NOW() ({		\
-        asm("				\
-	movew #0x2700, %sr;		\
-        jmp 0xf0000400;			\
-        ");				\
-})
-#elif defined(CONFIG_NETtel) || \
-      defined(CONFIG_SECUREEDGEMP3) || defined(CONFIG_CLEOPATRA)
-#define HARD_RESET_NOW() ({		\
-        asm("				\
-	movew #0x2700, %sr;		\
-	moveal #0x10000044, %a0;	\
-	movel #0xffffffff, (%a0);	\
-	moveal #0x10000001, %a0;	\
-	moveb #0x00, (%a0);		\
-        moveal #0xf0000004, %a0;	\
-        moveal (%a0), %a0;		\
-        jmp (%a0);			\
-        ");				\
-})
-#elif defined(CONFIG_M5272)
-/*
- * Retrieve the boot address in flash using CSBR0 and CSOR0
- * find the reset vector at flash_address + 4 (e.g. 0x400)
- * remap it in the flash's current location (e.g. 0xf0000400)
- * and jump there.
- */ 
-#define HARD_RESET_NOW() ({		\
-	asm("				\
-	movew #0x2700, %%sr;		\
-	move.l	%0+0x40,%%d0;		\
-	and.l	%0+0x44,%%d0;		\
-	andi.l	#0xfffff000,%%d0;	\
-	mov.l	%%d0,%%a0;		\
-	or.l	4(%%a0),%%d0;		\
-	mov.l	%%d0,%%a0;		\
-	jmp (%%a0);"			\
-	: /* No output */		\
-	: "o" (*(char *)MCF_MBAR) );	\
-})
-#elif defined(CONFIG_M523x) || defined(CONFIG_M528x) || defined(CONFIG_M527x)
-/*
- * Most of the newer ColdFire family members have a proper RESET unit.
- * Use the software reset control bit in the Reset Control Register (RCR).
- */
-#define HARD_RESET_NOW() \
-({									\
-	unsigned char volatile *reset;					\
-	asm("move.w #0x2700, %sr");					\
-	reset = ((volatile unsigned char *)(MCF_IPSBAR + 0x110000));	\
-	while (1)							\
-		*reset |= 0x80;						\
-})
-#elif defined(CONFIG_M520x)
-	/*
-	 * The MCF5208 has a bit (SOFTRST) in memory (Reset Control Register 
-	 * RCR), that when set, resets the MCF5208.
-	 */
-#define HARD_RESET_NOW() 		\
-({					\
-	unsigned char volatile *reset;	\
-	asm("move.w     #0x2700, %sr");	\
-	reset = ((volatile unsigned char *)(MCF_IPSBAR + 0xA0000));	\
-	while(1)			\
-		*reset |= 0x80;		\
-})
-#else
-#define HARD_RESET_NOW() ({		\
-        asm("				\
-	movew #0x2700, %sr;		\
-        moveal #0x4, %a0;		\
-        moveal (%a0), %a0;		\
-        jmp (%a0);			\
-        ");				\
-})
-#endif
-#endif
 #define arch_align_stack(x) (x)
 
 
diff --git a/arch/m68knommu/platform/coldfire/vectors.c b/arch/m68knommu/platform/coldfire/vectors.c
index 6cf8946..bdca029 100644
--- a/arch/m68knommu/platform/coldfire/vectors.c
+++ b/arch/m68knommu/platform/coldfire/vectors.c
@@ -96,10 +96,3 @@ void ack_vector(unsigned int irq)
 }
 
 /***************************************************************************/
-
-void coldfire_reset(void)
-{
-	HARD_RESET_NOW();
-}
-
-/***************************************************************************/
-- 
cgit v0.10.2


From 308e610e8c435d7875842b427dbc99de43a4aec9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 15 May 2009 10:59:37 -0700
Subject: arch/m68knommu: Convert #ifdef DEBUG printk(KERN_DEBUG to pr_debug(

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index 5985f19..5c2bb3e 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -166,15 +166,13 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_INFO "Motorola M5235EVB support (C)2005 Syn-tech Systems, Inc. (Jate Sujjavanich)\n");
 #endif
 
-#ifdef DEBUG
-	printk(KERN_DEBUG "KERNEL -> TEXT=0x%06x-0x%06x DATA=0x%06x-0x%06x "
-		"BSS=0x%06x-0x%06x\n", (int) &_stext, (int) &_etext,
-		(int) &_sdata, (int) &_edata,
-		(int) &_sbss, (int) &_ebss);
-	printk(KERN_DEBUG "MEMORY -> ROMFS=0x%06x-0x%06x MEM=0x%06x-0x%06x\n ",
-		(int) &_ebss, (int) memory_start,
-		(int) memory_start, (int) memory_end);
-#endif
+	pr_debug("KERNEL -> TEXT=0x%06x-0x%06x DATA=0x%06x-0x%06x "
+		 "BSS=0x%06x-0x%06x\n", (int) &_stext, (int) &_etext,
+		 (int) &_sdata, (int) &_edata,
+		 (int) &_sbss, (int) &_ebss);
+	pr_debug("MEMORY -> ROMFS=0x%06x-0x%06x MEM=0x%06x-0x%06x\n ",
+		 (int) &_ebss, (int) memory_start,
+		 (int) memory_start, (int) memory_end);
 
 	/* Keep a copy of command line */
 	*cmdline_p = &command_line[0];
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index 7befc0c..b1703c6 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -126,9 +126,7 @@ void __init mem_init(void)
 	unsigned long start_mem = memory_start; /* DAVIDM - these must start at end of kernel */
 	unsigned long end_mem   = memory_end; /* DAVIDM - this must not include kernel stack at top */
 
-#ifdef DEBUG
-	printk(KERN_DEBUG "Mem_init: start=%lx, end=%lx\n", start_mem, end_mem);
-#endif
+	pr_debug("Mem_init: start=%lx, end=%lx\n", start_mem, end_mem);
 
 	end_mem &= PAGE_MASK;
 	high_memory = (void *) end_mem;
-- 
cgit v0.10.2


From 4f308e35a9bde9d6b671e8409157edb9065f117c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Wed, 10 Jun 2009 18:47:47 +0530
Subject: headers_check fix: m68k, swab.h

fix the following 'make headers_check' warnings:

  usr/include/asm-m68k/swab.h:4: include of <linux/types.h> is preferred over <asm/types.h>
  usr/include/asm-m68k/swab.h:10: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/swab.h b/arch/m68k/include/asm/swab.h
index 9e3054e..5b754aa 100644
--- a/arch/m68k/include/asm/swab.h
+++ b/arch/m68k/include/asm/swab.h
@@ -1,7 +1,7 @@
 #ifndef _M68K_SWAB_H
 #define _M68K_SWAB_H
 
-#include <asm/types.h>
+#include <linux/types.h>
 #include <linux/compiler.h>
 
 #define __SWAB_64_THRU_32__
-- 
cgit v0.10.2


From 9d4f94135385b565e2ce4a37f71b53d0fd3664e8 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 11 Jun 2009 13:05:00 +1000
Subject: m68knommu: enumerate INIT_THREAD fields properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use proper field value setting init INIT_THREAD macro.
Fixes this:

arch/m68knommu/kernel/init_task.c:27: warning: excess elements in array initializer
arch/m68knommu/kernel/init_task.c:27: warning: (near initialization for ‘init_task.thread.fpstate’)

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68k/include/asm/processor_no.h b/arch/m68k/include/asm/processor_no.h
index 91cba18..7a1e0ba 100644
--- a/arch/m68k/include/asm/processor_no.h
+++ b/arch/m68k/include/asm/processor_no.h
@@ -72,10 +72,10 @@ struct thread_struct {
 	unsigned char  fpstate[FPSTATESIZE];  /* floating point state */
 };
 
-#define INIT_THREAD  { \
-	sizeof(init_stack) + (unsigned long) init_stack, 0, \
-	PS_S, __KERNEL_DS, \
-	{0, 0}, 0, {0,}, {0, 0, 0}, {0,}, \
+#define INIT_THREAD  {							\
+	.ksp	= sizeof(init_stack) + (unsigned long) init_stack,	\
+	.sr	= PS_S,							\
+	.fs	= __KERNEL_DS,						\
 }
 
 /*
-- 
cgit v0.10.2


From 193e98401a89707ea374e9c32c1b9a5a9dd87a48 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Thu, 23 Apr 2009 11:24:03 +1000
Subject: m68knommu: remove unecessary include of thread_info.h in entry.S

Signed-off-by: Greg Ungerer <gerg@uclinux.org>

diff --git a/arch/m68knommu/kernel/entry.S b/arch/m68knommu/kernel/entry.S
index f4782d2..f56faa5 100644
--- a/arch/m68knommu/kernel/entry.S
+++ b/arch/m68knommu/kernel/entry.S
@@ -26,7 +26,6 @@
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
-#include <asm/thread_info.h>
 #include <asm/errno.h>
 #include <asm/setup.h>
 #include <asm/segment.h>
-- 
cgit v0.10.2


From a50de78dc6d21ee074e9561c800d194bec12128b Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 2 Jun 2009 08:43:59 +0000
Subject: sh: clock div4 frequency table offset fix

This patch fixes the per clock offset calculation in
sh_clk_div4_register(). Without this patch the offset
to the frequency table for each clock is incorrect.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index 88fc30d..e604a6f 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -71,8 +71,9 @@ int __init sh_clk_div4_register(struct clk *clks, int nr,
 	int ret = 0;
 	int k;
 
-	k = nr_divs + 1;
-	freq_table = alloc_bootmem(freq_table_size * nr * (nr_divs + 1));
+	freq_table_size *= (nr_divs + 1);
+
+	freq_table = alloc_bootmem(freq_table_size * nr);
 	if (!freq_table)
 		return -ENOMEM;
 
-- 
cgit v0.10.2


From 2693e2740ddae364a80e6083043ba760b6366b69 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 2 Jun 2009 08:53:54 +0000
Subject: sh: clock div6 helper code

This patch adds div6 clock helper code. The div6 clocks
are simply 6-bit divide-by-n modules where n is 1 to 64.

Needed for vclk on sh7722, sh7723, sh7343 and sh7366.
sh7724 needs this even more for vclk, fclka, fclkb,
irdaclk and spuclk.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 7435e40..026b9da 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -145,4 +145,14 @@ int sh_clk_mstp32_register(struct clk *clks, int nr);
 int sh_clk_div4_register(struct clk *clks, int nr,
 			 struct clk_div_mult_table *table);
 
+#define SH_CLK_DIV6(_name, _parent, _reg, _flags)	\
+{							\
+	.name = _name,					\
+	.parent = _parent,				\
+	.enable_reg = (void __iomem *)_reg,		\
+	.flags = _flags,				\
+}
+
+int sh_clk_div6_register(struct clk *clks, int nr);
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index e604a6f..fedc8b8 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -38,6 +38,70 @@ int __init sh_clk_mstp32_register(struct clk *clks, int nr)
 	return ret;
 }
 
+static long sh_clk_div_round_rate(struct clk *clk, unsigned long rate)
+{
+	return clk_rate_table_round(clk, clk->freq_table, rate);
+}
+
+static int sh_clk_div6_divisors[64] = {
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+	17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+	33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+	49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+};
+
+static struct clk_div_mult_table sh_clk_div6_table = {
+	.divisors = sh_clk_div6_divisors,
+	.nr_divisors = ARRAY_SIZE(sh_clk_div6_divisors),
+};
+
+static unsigned long sh_clk_div6_recalc(struct clk *clk)
+{
+	struct clk_div_mult_table *table = &sh_clk_div6_table;
+	unsigned int idx;
+
+	clk_rate_table_build(clk, clk->freq_table, table->nr_divisors,
+			     table, NULL);
+
+	idx = __raw_readl(clk->enable_reg) & 0x003f;
+
+	return clk->freq_table[idx].frequency;
+}
+
+static struct clk_ops sh_clk_div6_clk_ops = {
+	.recalc		= sh_clk_div6_recalc,
+	.round_rate	= sh_clk_div_round_rate,
+};
+
+int __init sh_clk_div6_register(struct clk *clks, int nr)
+{
+	struct clk *clkp;
+	void *freq_table;
+	int nr_divs = sh_clk_div6_table.nr_divisors;
+	int freq_table_size = sizeof(struct cpufreq_frequency_table);
+	int ret = 0;
+	int k;
+
+	freq_table_size *= (nr_divs + 1);
+
+	freq_table = alloc_bootmem(freq_table_size * nr);
+	if (!freq_table)
+		return -ENOMEM;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+
+		clkp->ops = &sh_clk_div6_clk_ops;
+		clkp->id = -1;
+		clkp->freq_table = freq_table + (k * freq_table_size);
+		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
+
+		ret = clk_register(clkp);
+	}
+
+	return ret;
+}
+
 static unsigned long sh_clk_div4_recalc(struct clk *clk)
 {
 	struct clk_div_mult_table *table = clk->priv;
@@ -51,14 +115,9 @@ static unsigned long sh_clk_div4_recalc(struct clk *clk)
 	return clk->freq_table[idx].frequency;
 }
 
-static long sh_clk_div4_round_rate(struct clk *clk, unsigned long rate)
-{
-	return clk_rate_table_round(clk, clk->freq_table, rate);
-}
-
 static struct clk_ops sh_clk_div4_clk_ops = {
 	.recalc		= sh_clk_div4_recalc,
-	.round_rate	= sh_clk_div4_round_rate,
+	.round_rate	= sh_clk_div_round_rate,
 };
 
 int __init sh_clk_div4_register(struct clk *clks, int nr,
-- 
cgit v0.10.2


From 0d4fdbb64f472ef31195714993f1263f77cf85ca Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 2 Jun 2009 09:22:02 +0000
Subject: sh: rework mode pin code

This patch reworks the mode pin code to keep the pin
definitions in one place. The mode pins values are now
the value of the bit instead of bit number.

With this patch in place the sh7785 header file contains
mode pin comments. The sh7785 clock code and the sh7785lcr
board code are updated to reflect the new shared mode pins.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index c2894c5..7be56fb 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -330,13 +330,13 @@ static int sh7785lcr_mode_pins(void)
 	 * If you change these dip switches then you will need to
 	 * adjust the values below as well.
 	 */
-	value |= 1 << MODE_PIN_MODE4; /* Clock Mode 16 */
-	value |= 1 << MODE_PIN_MODE5; /* 32-bit Area0 bus width */
-	value |= 1 << MODE_PIN_MODE6; /* 32-bit Area0 bus width */
-	value |= 1 << MODE_PIN_MODE7; /* Area 0 SRAM interface [fixed] */
-	value |= 1 << MODE_PIN_MODE8; /* Little Endian */
-	value |= 1 << MODE_PIN_MODE9; /* Master Mode */
-	value |= 1 << MODE_PIN_MODE14; /* No PLL step-up */
+	value |= MODE_PIN4; /* Clock Mode 16 */
+	value |= MODE_PIN5; /* 32-bit Area0 bus width */
+	value |= MODE_PIN6; /* 32-bit Area0 bus width */
+	value |= MODE_PIN7; /* Area 0 SRAM interface [fixed] */
+	value |= MODE_PIN8; /* Little Endian */
+	value |= MODE_PIN9; /* Master Mode */
+	value |= MODE_PIN14; /* No PLL step-up */
 
 	return value;
 }
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index fb67482..ff7daaf 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -95,6 +95,23 @@ const char *get_cpu_subtype(struct sh_cpuinfo *c);
 extern const struct seq_operations cpuinfo_op;
 
 /* processor boot mode configuration */
+#define MODE_PIN0 (1 << 0)
+#define MODE_PIN1 (1 << 1)
+#define MODE_PIN2 (1 << 2)
+#define MODE_PIN3 (1 << 3)
+#define MODE_PIN4 (1 << 4)
+#define MODE_PIN5 (1 << 5)
+#define MODE_PIN6 (1 << 6)
+#define MODE_PIN7 (1 << 7)
+#define MODE_PIN8 (1 << 8)
+#define MODE_PIN9 (1 << 9)
+#define MODE_PIN10 (1 << 10)
+#define MODE_PIN11 (1 << 11)
+#define MODE_PIN12 (1 << 12)
+#define MODE_PIN13 (1 << 13)
+#define MODE_PIN14 (1 << 14)
+#define MODE_PIN15 (1 << 15)
+
 int generic_mode_pins(void);
 int test_mode_pin(int pin);
 
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7785.h b/arch/sh/include/cpu-sh4/cpu/sh7785.h
index 89afaa6..9dc9d91 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7785.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7785.h
@@ -1,25 +1,26 @@
 #ifndef __ASM_SH7785_H__
 #define __ASM_SH7785_H__
 
-/* Boot Mode Pins, more information in sh7785 manual Rev.1.00, page 1628 */
-enum {
-	MODE_PIN_MODE0,  /* CPG - Initial Pck/Bck Frequency [FRQMR1] */
-	MODE_PIN_MODE1,  /* CPG - Initial Uck/SHck/DDRck Frequency [FRQMR1] */
-	MODE_PIN_MODE2,  /* CPG - Reserved (L: Normal operation) */
-	MODE_PIN_MODE3,  /* CPG - Reserved (L: Normal operation) */
-	MODE_PIN_MODE4,  /* CPG - Initial PLL setting (72x/36x) */
-	MODE_PIN_MODE5,  /* LBSC - Area0 Memory Type / Bus Width [CS0BCR.8] */
-	MODE_PIN_MODE6,  /* LBSC - Area0 Memory Type / Bus Width [CS0BCR.9] */
-	MODE_PIN_MODE7,  /* LBSC - Area0 Memory Type / Bus Width [CS0BCR.3] */
-	MODE_PIN_MODE8,  /* LBSC - Endian Mode (L: Big, H: Little) [BCR.31] */
-	MODE_PIN_MODE9,  /* LBSC - Master/Slave Mode (L: Slave) [BCR.30] */
-	MODE_PIN_MODE10, /* CPG - Clock Input (L: Ext Clk, H: Crystal) */
-	MODE_PIN_MODE11, /* PCI - Pin Mode (LL: PCI host, LH: PCI slave) */
-	MODE_PIN_MODE12, /* PCI - Pin Mode (HL: Local bus, HH: DU) */
-	MODE_PIN_MODE13, /* Boot Address Mode (L: 29-bit, H: 32-bit) */
-	MODE_PIN_MODE14, /* Reserved (H: Normal operation) */
-	MODE_PIN_MPMD,   /* Emulation Mode (L: Emulation mode, H: LSI mode) */
-};
+/* Boot Mode Pins:
+ *
+ * MODE0: CPG - Initial Pck/Bck Frequency [FRQMR1]
+ * MODE1: CPG - Initial Uck/SHck/DDRck Frequency [FRQMR1]
+ * MODE2: CPG - Reserved (L: Normal operation)
+ * MODE3: CPG - Reserved (L: Normal operation)
+ * MODE4: CPG - Initial PLL setting (72x/36x)
+ * MODE5: LBSC - Area0 Memory Type / Bus Width [CS0BCR.8]
+ * MODE6: LBSC - Area0 Memory Type / Bus Width [CS0BCR.9]
+ * MODE7: LBSC - Area0 Memory Type / Bus Width [CS0BCR.3]
+ * MODE8: LBSC - Endian Mode (L: Big, H: Little) [BCR.31]
+ * MODE9: LBSC - Master/Slave Mode (L: Slave) [BCR.30]
+ * MODE10: CPG - Clock Input (L: Ext Clk, H: Crystal)
+ * MODE11: PCI - Pin Mode (LL: PCI host, LH: PCI slave)
+ * MODE12: PCI - Pin Mode (HL: Local bus, HH: DU)
+ * MODE13: Boot Address Mode (L: 29-bit, H: 32-bit)
+ * MODE14: Reserved (H: Normal operation)
+ *
+ * More information in sh7785 manual Rev.1.00, page 1628.
+ */
 
 /* Pin Function Controller:
  * GPIO_FN_xx - GPIO used to select pin function
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
index dae20ac..73abfbf 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c
@@ -32,7 +32,7 @@ static unsigned long pll_recalc(struct clk *clk)
 {
 	int multiplier;
 
-	multiplier = test_mode_pin(MODE_PIN_MODE4) ? 36 : 72;
+	multiplier = test_mode_pin(MODE_PIN4) ? 36 : 72;
 
 	return clk->parent->rate * multiplier;
 }
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 050131e..dd38338 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -429,7 +429,7 @@ int generic_mode_pins(void)
 
 int test_mode_pin(int pin)
 {
-	return sh_mv.mv_mode_pins() & (1 << pin);
+	return sh_mv.mv_mode_pins() & pin;
 }
 
 static const char *cpu_name[] = {
-- 
cgit v0.10.2


From e4218ef506a2f8b0b5bf1a4b6effcc2f8983f86b Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 2 Jun 2009 09:45:16 +0000
Subject: sh: sh7723 mode pin V2

This patch is sh7723 mode pin V2. Mode pins and
pin function controller comments are added.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/cpu-sh4/cpu/sh7723.h b/arch/sh/include/cpu-sh4/cpu/sh7723.h
index 9d2f6d7..14c8ca9 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7723.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7723.h
@@ -1,6 +1,20 @@
 #ifndef __ASM_SH7723_H__
 #define __ASM_SH7723_H__
 
+/* Boot Mode Pins:
+ *
+ * MD0: CPG - Clock Mode 0->3
+ * MD1: CPG - Clock Mode 0->3
+ * MD2: CPG - Reserved (L: Normal operation)
+ * MD3: BSC - Area0 Bus Width (16/32-bit) [CS0BCR.9,10]
+ * MD5: BSC - Endian Mode (L: Big, H: Little) [CMNCR.3]
+ * MD8: Test Mode
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PTA */
 	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
-- 
cgit v0.10.2


From 36e5b26bdac0ee3791071d240334853ddd7b9790 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 2 Jun 2009 10:47:32 +0000
Subject: sh: sh7724 mode pin comments

This patch adds comments for the sh7724 mode pins
and pin function controller.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h
index 34605c9..66fd118 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7724.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h
@@ -1,6 +1,20 @@
 #ifndef __ASM_SH7724_H__
 #define __ASM_SH7724_H__
 
+/* Boot Mode Pins:
+ *
+ * MD0: CPG - Clock Mode 0->7
+ * MD1: CPG - Clock Mode 0->7
+ * MD2: CPG - Clock Mode 0->7
+ * MD3: BSC - Area0 Bus Width (16/32-bit) [CS0BCR.9,10]
+ * MD5: BSC - Endian Mode (L: Big, H: Little) [CMNCR.3]
+ * MD8: Test Mode
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PTA */
 	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
-- 
cgit v0.10.2


From ed740cb9b7f6eaee7bb45a07ce53ff9e73a92798 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 3 Jun 2009 08:18:56 +0000
Subject: sh: sh7722 mode pin definitions

This patch adds sh7722 mode pin and pin function
controller comments.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h
index 4b3096f..738ea43 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7722.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h
@@ -1,6 +1,20 @@
 #ifndef __ASM_SH7722_H__
 #define __ASM_SH7722_H__
 
+/* Boot Mode Pins:
+ *
+ * MD0: CPG - Clock Mode 0->3
+ * MD1: CPG - Clock Mode 0->3
+ * MD2: CPG - Reserved (L: Normal operation)
+ * MD3: BSC - Area0 Bus Width (16/32-bit) [CS0BCR.9,10]
+ * MD5: BSC - Endian Mode (L: Big, H: Little) [CMNCR.3]
+ * MD8: Test Mode
+ */
+
+/* Pin Function Controller:
+ * GPIO_FN_xx - GPIO used to select pin function
+ * GPIO_Pxx - GPIO mapped to real I/O pin on CPU
+ */
 enum {
 	/* PTA */
 	GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4,
-- 
cgit v0.10.2


From 0ec80fddf1674579d52b5ff94774ba73107cad31 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 3 Jun 2009 08:22:50 +0000
Subject: sh: add Migo-R mode pin configuration

This patch adds mode pin configuration and
a machvec structure to Migo-R.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 1ee1de0..6ed401c 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -584,3 +584,22 @@ static int __init migor_devices_setup(void)
 	return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
 }
 __initcall(migor_devices_setup);
+
+/* Return the board specific boot mode pin configuration */
+static int migor_mode_pins(void)
+{
+	/* MD0=1, MD1=1, MD2=0: Clock Mode 3
+	 * MD3=0: 16-bit Area0 Bus Width
+	 * MD5=1: Little Endian
+	 * TSTMD=1, MD8=0: Test Mode Disabled
+	 */
+	return MODE_PIN0 | MODE_PIN1 | MODE_PIN5;
+}
+
+/*
+ * The Machine Vector
+ */
+static struct sh_machine_vector mv_migor __initmv = {
+	.mv_name		= "Migo-R",
+	.mv_mode_pins		= migor_mode_pins,
+};
-- 
cgit v0.10.2


From c01641b42a88c475fd4b72cff2b10e42262a80fe Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 3 Jun 2009 08:26:40 +0000
Subject: sh: add AP325RXA mode pin configuration

This patch adds mode pin configuration to ap325rxa.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index f2a2964..1c4d83e 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -535,6 +535,18 @@ static int __init ap325rxa_devices_setup(void)
 }
 device_initcall(ap325rxa_devices_setup);
 
+/* Return the board specific boot mode pin configuration */
+static int ap325rxa_mode_pins(void)
+{
+	/* MD0=0, MD1=0, MD2=0: Clock Mode 0
+	 * MD3=0: 16-bit Area0 Bus Width
+	 * MD5=1: Little Endian
+	 * TSTMD=1, MD8=1: Test Mode Disabled
+	 */
+	return MODE_PIN5 | MODE_PIN8;
+}
+
 static struct sh_machine_vector mv_ap325rxa __initmv = {
 	.mv_name = "AP-325RXA",
+	.mv_mode_pins = ap325rxa_mode_pins,
 };
-- 
cgit v0.10.2


From dcae3626d031fe6296b1e96a16f986193a41f840 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Wed, 10 Jun 2009 12:43:48 -0700
Subject: drm: fix LOCK_TEST_WITH_RETURN macro

When this macro isn't called with 'file_priv' this will result in a build
failure.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index b84d8ae..efa5f79 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -237,15 +237,15 @@ struct drm_device;
  * \param dev DRM device.
  * \param filp file pointer of the caller.
  */
-#define LOCK_TEST_WITH_RETURN( dev, file_priv )				\
-do {									\
-	if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock) ||		\
-	    file_priv->master->lock.file_priv != file_priv)	{			\
+#define LOCK_TEST_WITH_RETURN( dev, _file_priv )				\
+do {										\
+	if (!_DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock) ||	\
+	    _file_priv->master->lock.file_priv != _file_priv)	{		\
 		DRM_ERROR( "%s called without lock held, held  %d owner %p %p\n",\
-			   __func__, _DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock),\
-			   file_priv->master->lock.file_priv, file_priv);		\
-		return -EINVAL;						\
-	}								\
+			   __func__, _DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock),\
+			   _file_priv->master->lock.file_priv, _file_priv);	\
+		return -EINVAL;							\
+	}									\
 } while (0)
 
 /**
-- 
cgit v0.10.2


From 1bcbf3948876e31a8ece28597dec447611ad9c8b Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Wed, 10 Jun 2009 12:43:48 -0700
Subject: intelfb: fix spelling of "CLOCK"

Signed-off-by: Pavel Roskin <proski@gnu.org>
Cc: Eric Anholt <eric@anholt.net>
Cc: Dave Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 7a66b91..cbd2ba8 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -207,7 +207,7 @@ static int intelfb_set_par(struct fb_info *info)
 
 	if (var->pixclock != -1) {
 
-		DRM_ERROR("PIXEL CLCOK SET\n");
+		DRM_ERROR("PIXEL CLOCK SET\n");
 		return -EINVAL;
 	} else {
 		struct drm_crtc *crtc;
-- 
cgit v0.10.2


From b798b1fe3b6436275ad1b517a6823d55e3b75c22 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 10 Jun 2009 12:43:49 -0700
Subject: drm: simplify kcalloc() call to kzalloc().

Calls to kcalloc() for a single element can be simplified to calls to
kzalloc().

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Eric Anholt <eric@anholt.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 4984aa8..ec43005 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -133,7 +133,7 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
 
 	BUG_ON((size & (PAGE_SIZE - 1)) != 0);
 
-	obj = kcalloc(1, sizeof(*obj), GFP_KERNEL);
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 
 	obj->dev = dev;
 	obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
-- 
cgit v0.10.2


From 098dee99d14e8324d3793df442d6078d0c134140 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 05:31:41 +0000
Subject: sh: add enable()/disable()/set_rate() to div6 code

This patch updates the div6 clock helper code to add support
for enable(), disable() and set_rate() callbacks.

Needed by the camera clock enabling board code on Migo-R.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 026b9da..9fe7d7f 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -119,6 +119,10 @@ long clk_rate_table_round(struct clk *clk,
 			  struct cpufreq_frequency_table *freq_table,
 			  unsigned long rate);
 
+int clk_rate_table_find(struct clk *clk,
+			struct cpufreq_frequency_table *freq_table,
+			unsigned long rate);
+
 #define SH_CLK_MSTP32(_name, _id, _parent, _enable_reg,	\
 	    _enable_bit, _flags)			\
 {							\
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index fedc8b8..275942e 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -68,9 +68,53 @@ static unsigned long sh_clk_div6_recalc(struct clk *clk)
 	return clk->freq_table[idx].frequency;
 }
 
+static int sh_clk_div6_set_rate(struct clk *clk,
+				unsigned long rate, int algo_id)
+{
+	unsigned long value;
+	int idx;
+
+	idx = clk_rate_table_find(clk, clk->freq_table, rate);
+	if (idx < 0)
+		return idx;
+
+	value = __raw_readl(clk->enable_reg);
+	value &= ~0x3f;
+	value |= idx;
+	__raw_writel(value, clk->enable_reg);
+	return 0;
+}
+
+static int sh_clk_div6_enable(struct clk *clk)
+{
+	unsigned long value;
+	int ret;
+
+	ret = sh_clk_div6_set_rate(clk, clk->rate, 0);
+	if (ret == 0) {
+		value = __raw_readl(clk->enable_reg);
+		value &= ~0x100; /* clear stop bit to enable clock */
+		__raw_writel(value, clk->enable_reg);
+	}
+	return ret;
+}
+
+static void sh_clk_div6_disable(struct clk *clk)
+{
+	unsigned long value;
+
+	value = __raw_readl(clk->enable_reg);
+	value |= 0x100; /* stop clock */
+	value |= 0x3f; /* VDIV bits must be non-zero, overwrite divider */
+	__raw_writel(value, clk->enable_reg);
+}
+
 static struct clk_ops sh_clk_div6_clk_ops = {
 	.recalc		= sh_clk_div6_recalc,
 	.round_rate	= sh_clk_div_round_rate,
+	.set_rate	= sh_clk_div6_set_rate,
+	.enable		= sh_clk_div6_enable,
+	.disable	= sh_clk_div6_disable,
 };
 
 int __init sh_clk_div6_register(struct clk *clks, int nr)
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index aa0fd08..f3a46be 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -111,6 +111,25 @@ long clk_rate_table_round(struct clk *clk,
 	return rate_best_fit;
 }
 
+int clk_rate_table_find(struct clk *clk,
+			struct cpufreq_frequency_table *freq_table,
+			unsigned long rate)
+{
+	int i;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = freq_table[i].frequency;
+
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq == rate)
+			return i;
+	}
+
+	return -ENOENT;
+}
+
 /* Used for clocks that always have same value as the parent clock */
 unsigned long followparent_recalc(struct clk *clk)
 {
-- 
cgit v0.10.2


From c521dc02034df3681394a30b428bf081cfa22253 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 3 Jun 2009 08:39:59 +0000
Subject: sh: sh7723 clock framework rewrite V2

This patch contains V2 of the sh7723 clock framework
rewrite. The new code makes use of the recently merged
div4, div6 and mstp32 helper code. Both extal and dll
are supported as input clocks to the pll.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index dec8b7b..d13390b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -516,7 +516,7 @@ config SH_CLK_CPG
 
 config SH_CLK_CPG_LEGACY
 	depends on SH_CLK_CPG
-	def_bool y if !CPU_SUBTYPE_SH7785
+	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723
 
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index afd6fba4..25b2833 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -26,7 +26,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7785)	:= clock-sh7785.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index c090c9a..ccefd7d 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -1,7 +1,7 @@
 /*
  * arch/sh/kernel/cpu/sh4a/clock-sh7722.c
  *
- * SH7343, SH7722, SH7723 & SH7366 support for the clock framework
+ * SH7343, SH7722 & SH7366 support for the clock framework
  *
  * Copyright (c) 2006-2007 Nomad Global Solutions Inc
  * Based on code for sh7343 by Paul Mundt
@@ -176,11 +176,6 @@ static unsigned long module_clk_recalc(struct clk *clk)
 #define STCMASK		0x3f
 #define DIVCALC(div)	(div/2-1)
 #define FRQCRKICK	0x80000000
-#elif defined(CONFIG_CPU_SUBTYPE_SH7723)
-#define MASTERDIVS	{ 6, 8, 12, 16 }
-#define STCMASK		0x1f
-#define DIVCALC(div)	(div-1)
-#define FRQCRKICK	0x00000000
 #else
 #define MASTERDIVS	{ 2, 3, 4, 6, 8, 16 }
 #define STCMASK		0x1f
@@ -681,56 +676,6 @@ static struct clk sh7722_mstpcr_clocks[] = {
 	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
 	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7723)
-	/* See page 60 of Datasheet V1.0: Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
-	MSTPCR("ic0", "cpu_clk", 0, 30, 0),
-	MSTPCR("oc0", "cpu_clk", 0, 29, 0),
-	MSTPCR("l2c0", "sh_clk", 0, 28, 0),
-	MSTPCR("ilmem0", "cpu_clk", 0, 27, 0),
-	MSTPCR("fpu0", "cpu_clk", 0, 24, 0),
-	MSTPCR("intc0", "cpu_clk", 0, 22, 0),
-	MSTPCR("dmac0", "bus_clk", 0, 21, 0),
-	MSTPCR("sh0", "sh_clk", 0, 20, 0),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19, 0),
-	MSTPCR("ubc0", "cpu_clk", 0, 17, 0),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
-	MSTPCR("cmt0", "r_clk", 0, 14, 0),
-	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
-	MSTPCR("dmac1", "bus_clk", 0, 12, 0),
-	MSTPCR("tmu1", "peripheral_clk", 0, 11, 0),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10, 0),
-	MSTPCR("scif0", "peripheral_clk", 0, 9, 0),
-	MSTPCR("scif1", "peripheral_clk", 0, 8, 0),
-	MSTPCR("scif2", "peripheral_clk", 0, 7, 0),
-	MSTPCR("scif3", "bus_clk", 0, 6, 0),
-	MSTPCR("scif4", "bus_clk", 0, 5, 0),
-	MSTPCR("scif5", "bus_clk", 0, 4, 0),
-	MSTPCR("msiof0", "bus_clk", 0, 2, 0),
-	MSTPCR("msiof1", "bus_clk", 0, 1, 0),
-	MSTPCR("meram0", "sh_clk", 0, 0, CLK_ENABLE_ON_INIT),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
-	MSTPCR("rtc0", "r_clk", 1, 8, 0),
-	MSTPCR("atapi0", "sh_clk", 2, 28, 0),
-	MSTPCR("adc0", "peripheral_clk", 2, 28, 0),
-	MSTPCR("tpu0", "bus_clk", 2, 25, 0),
-	MSTPCR("irda0", "peripheral_clk", 2, 24, 0),
-	MSTPCR("tsif0", "bus_clk", 2, 22, 0),
-	MSTPCR("icb0", "bus_clk", 2, 21, 0),
-	MSTPCR("sdhi0", "bus_clk", 2, 18, 0),
-	MSTPCR("sdhi1", "bus_clk", 2, 17, 0),
-	MSTPCR("keysc0", "r_clk", 2, 14, 0),
-	MSTPCR("usb0", "bus_clk", 2, 11, 0),
-	MSTPCR("2dg0", "bus_clk", 2, 10, 0),
-	MSTPCR("siu0", "bus_clk", 2, 8, 0),
-	MSTPCR("veu1", "bus_clk", 2, 6, CLK_ENABLE_ON_INIT),
-	MSTPCR("vou0", "bus_clk", 2, 5, 0),
-	MSTPCR("beu0", "bus_clk", 2, 4, 0),
-	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
-	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
-	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
-	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
-#endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7724)
 	/* See Datasheet : Overview -> Block Diagram */
 	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
new file mode 100644
index 0000000..e67c267
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c
@@ -0,0 +1,222 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7723.c
+ *
+ * SH7723 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7723 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define IRDACLKCR	0xa4150018
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The dll multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
+{
+	unsigned long mult;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
+	else
+		mult = 0;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+	unsigned long div = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+	else
+		div = 2;
+
+	return (clk->parent->rate * mult) / div;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
+};
+
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_IRDA, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x0dbf, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x0db4, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x0dbf, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x0dbf, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x0dbf, 0),
+	[DIV4_IRDA] = DIV4("irda_clk", IRDACLKCR, 0, 0x0dbf, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _force_on, _need_cpg, _need_ram) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _force_on * CLK_ENABLE_ON_INIT)
+
+static struct clk mstp_clks[] = {
+	/* See page 60 of Datasheet V1.0: Overview -> Block Diagram */
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, 1, 1, 0),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, 1, 1, 0),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, 1, 1, 0),
+	MSTP("l2c0", &div4_clks[DIV4_SH], MSTPCR0, 28, 1, 1, 0),
+	MSTP("ilmem0", &div4_clks[DIV4_I], MSTPCR0, 27, 1, 1, 0),
+	MSTP("fpu0", &div4_clks[DIV4_I], MSTPCR0, 24, 1, 1, 0),
+	MSTP("intc0", &div4_clks[DIV4_I], MSTPCR0, 22, 1, 1, 0),
+	MSTP("dmac0", &div4_clks[DIV4_B], MSTPCR0, 21, 0, 1, 1),
+	MSTP("sh0", &div4_clks[DIV4_SH], MSTPCR0, 20, 0, 1, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0, 1, 0),
+	MSTP("ubc0", &div4_clks[DIV4_I], MSTPCR0, 17, 0, 1, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0, 1, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0, 0, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0, 0, 0),
+	MSTP("dmac1", &div4_clks[DIV4_B], MSTPCR0, 12, 0, 1, 1),
+	MSTP("tmu1", &div4_clks[DIV4_P], MSTPCR0, 11, 0, 1, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0, 1, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 9, 0, 1, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 8, 0, 1, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 7, 0, 1, 0),
+	MSTP("scif3", &div4_clks[DIV4_B], MSTPCR0, 6, 0, 1, 0),
+	MSTP("scif4", &div4_clks[DIV4_B], MSTPCR0, 5, 0, 1, 0),
+	MSTP("scif5", &div4_clks[DIV4_B], MSTPCR0, 4, 0, 1, 0),
+	MSTP("msiof0", &div4_clks[DIV4_B], MSTPCR0, 2, 0, 1, 0),
+	MSTP("msiof1", &div4_clks[DIV4_B], MSTPCR0, 1, 0, 1, 0),
+	MSTP("meram0", &div4_clks[DIV4_SH], MSTPCR0, 0, 1, 1, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0, 1, 0),
+	MSTP("rtc0", &r_clk, MSTPCR1, 8, 0, 0, 0),
+
+	MSTP("atapi0", &div4_clks[DIV4_SH], MSTPCR2, 28, 0, 1, 0),
+	MSTP("adc0", &div4_clks[DIV4_P], MSTPCR2, 27, 0, 1, 0),
+	MSTP("tpu0", &div4_clks[DIV4_B], MSTPCR2, 25, 0, 1, 0),
+	MSTP("irda0", &div4_clks[DIV4_P], MSTPCR2, 24, 0, 1, 0),
+	MSTP("tsif0", &div4_clks[DIV4_B], MSTPCR2, 22, 0, 1, 0),
+	MSTP("icb0", &div4_clks[DIV4_B], MSTPCR2, 21, 0, 1, 1),
+	MSTP("sdhi0", &div4_clks[DIV4_B], MSTPCR2, 18, 0, 1, 0),
+	MSTP("sdhi1", &div4_clks[DIV4_B], MSTPCR2, 17, 0, 1, 0),
+	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0, 0, 0),
+	MSTP("usb0", &div4_clks[DIV4_B], MSTPCR2, 11, 0, 1, 0),
+	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 10, 0, 1, 1),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0, 1, 0),
+	MSTP("veu1", &div4_clks[DIV4_B], MSTPCR2, 6, 1, 1, 1),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0, 1, 1),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0, 1, 1),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0, 1, 1),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, 1, 1, 1),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, 1, 1, 1),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0, 1, 1),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
-- 
cgit v0.10.2


From b621370a3505f8bd42acc41736cae47d5ce8bd06 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 10 Jun 2009 11:31:16 +0000
Subject: sh: sh7724 clock framework rewrite V3

This patch contains V3 of the sh7724 clock framework
rewrite. The new code makes use of the recently merged
div4, div6 and mstp32 helper code. Both extal and fll are
supported as input clocks to the pll. The div6 clocks are
fed through a divide-by-3 block.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d13390b..7b564bd 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -516,7 +516,7 @@ config SH_CLK_CPG
 
 config SH_CLK_CPG_LEGACY
 	depends on SH_CLK_CPG
-	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723
+	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723 && !CPU_SUBTYPE_SH7724
 
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 25b2833..02a0b17 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -27,7 +27,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7724.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index ccefd7d..5e08504 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -130,11 +130,7 @@ static void adjust_clocks(int originate, int *l, unsigned long v[],
  * is quite simple..
  */
 
-#if defined(CONFIG_CPU_SUBTYPE_SH7724)
-#define STCPLL(frqcr) ((((frqcr >> 24) & 0x3f) + 1) * 2)
-#else
 #define STCPLL(frqcr) (((frqcr >> 24) & 0x1f) + 1)
-#endif
 
 /*
  * Instead of having two separate multipliers/divisors set, like this:
@@ -145,11 +141,7 @@ static void adjust_clocks(int originate, int *l, unsigned long v[],
  * I created the divisors2 array, which is used to calculate rate like
  *   rate = parent * 2 / divisors2[ divisor ];
 */
-#if defined(CONFIG_CPU_SUBTYPE_SH7724)
-static int divisors2[] = { 4, 1, 8, 12, 16, 24, 32, 1, 48, 64, 72, 96, 1, 144 };
-#else
 static int divisors2[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32, 40 };
-#endif
 
 static unsigned long master_clk_recalc(struct clk *clk)
 {
@@ -171,17 +163,10 @@ static unsigned long module_clk_recalc(struct clk *clk)
 	return clk->parent->rate / STCPLL(frqcr);
 }
 
-#if defined(CONFIG_CPU_SUBTYPE_SH7724)
-#define MASTERDIVS	{ 12, 16, 24, 30, 32, 36, 48 }
-#define STCMASK		0x3f
-#define DIVCALC(div)	(div/2-1)
-#define FRQCRKICK	0x80000000
-#else
 #define MASTERDIVS	{ 2, 3, 4, 6, 8, 16 }
 #define STCMASK		0x1f
 #define DIVCALC(div)	(div-1)
 #define FRQCRKICK	0x00000000
-#endif
 
 static int master_clk_setrate(struct clk *clk, unsigned long rate, int id)
 {
@@ -557,8 +542,7 @@ static struct clk sh7722_r_clock = {
 	.rate = 32768,
 };
 
-#if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\
-    !defined(CONFIG_CPU_SUBTYPE_SH7724)
+#if !defined(CONFIG_CPU_SUBTYPE_SH7343)
 /*
  * these three clocks - SIU A, SIU B, IrDA - share the same clk_ops
  * methods of clk_ops determine which register they should access by
@@ -575,10 +559,9 @@ static struct clk sh7722_siu_b_clock = {
 	.arch_flags = SCLKBCR,
 	.ops = &sh7722_siu_clk_ops,
 };
-#endif /* CONFIG_CPU_SUBTYPE_SH7343, SH7724 */
+#endif /* CONFIG_CPU_SUBTYPE_SH7343 */
 
-#if defined(CONFIG_CPU_SUBTYPE_SH7722) ||\
-    defined(CONFIG_CPU_SUBTYPE_SH7724)
+#if defined(CONFIG_CPU_SUBTYPE_SH7722)
 static struct clk sh7722_irda_clock = {
 	.name = "irda_clk",
 	.arch_flags = IrDACLKCR,
@@ -676,61 +659,6 @@ static struct clk sh7722_mstpcr_clocks[] = {
 	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
 	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7724)
-	/* See Datasheet : Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
-	MSTPCR("ic0", "cpu_clk", 0, 30, 0),
-	MSTPCR("oc0", "cpu_clk", 0, 29, 0),
-	MSTPCR("rs0", "bus_clk", 0, 28, 0),
-	MSTPCR("ilmem0", "cpu_clk", 0, 27, 0),
-	MSTPCR("l2c0", "sh_clk", 0, 26, 0),
-	MSTPCR("fpu0", "cpu_clk", 0, 24, 0),
-	MSTPCR("intc0", "peripheral_clk", 0, 22, 0),
-	MSTPCR("dmac0", "bus_clk", 0, 21, 0),
-	MSTPCR("sh0", "sh_clk", 0, 20, 0),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19, 0),
-	MSTPCR("ubc0", "cpu_clk", 0, 17, 0),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
-	MSTPCR("cmt0", "r_clk", 0, 14, 0),
-	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
-	MSTPCR("dmac1", "bus_clk", 0, 12, 0),
-	MSTPCR("tmu1", "peripheral_clk", 0, 10, 0),
-	MSTPCR("scif0", "peripheral_clk", 0, 9, 0),
-	MSTPCR("scif1", "peripheral_clk", 0, 8, 0),
-	MSTPCR("scif2", "peripheral_clk", 0, 7, 0),
-	MSTPCR("scif3", "bus_clk", 0, 6, 0),
-	MSTPCR("scif4", "bus_clk", 0, 5, 0),
-	MSTPCR("scif5", "bus_clk", 0, 4, 0),
-	MSTPCR("msiof0", "bus_clk", 0, 2, 0),
-	MSTPCR("msiof1", "bus_clk", 0, 1, 0),
-	MSTPCR("keysc0", "r_clk", 1, 12, 0),
-	MSTPCR("rtc0", "r_clk", 1, 11, 0),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
-	MSTPCR("i2c1", "peripheral_clk", 1, 8, 0),
-	MSTPCR("mmc0", "bus_clk", 2, 29, 0),
-	MSTPCR("eth0", "bus_clk", 2, 28, 0),
-	MSTPCR("atapi0", "bus_clk", 2, 26, 0),
-	MSTPCR("tpu0", "bus_clk", 2, 25, 0),
-	MSTPCR("irda0", "peripheral_clk", 2, 24, 0),
-	MSTPCR("tsif0", "bus_clk", 2, 22, 0),
-	MSTPCR("usb1", "bus_clk", 2, 21, 0),
-	MSTPCR("usb0", "bus_clk", 2, 20, 0),
-	MSTPCR("2dg0", "bus_clk", 2, 19, 0),
-	MSTPCR("sdhi0", "bus_clk", 2, 18, 0),
-	MSTPCR("sdhi1", "bus_clk", 2, 17, 0),
-	MSTPCR("veu1", "bus_clk", 2, 15, CLK_ENABLE_ON_INIT),
-	MSTPCR("ceu1", "bus_clk", 2, 13, 0),
-	MSTPCR("beu1", "bus_clk", 2, 12, 0),
-	MSTPCR("2ddmac0", "sh_clk", 2, 10, 0),
-	MSTPCR("spu0", "bus_clk", 2, 9, 0),
-	MSTPCR("jpu0", "bus_clk", 2, 6, 0),
-	MSTPCR("vou0", "bus_clk", 2, 5, 0),
-	MSTPCR("beu0", "bus_clk", 2, 4, 0),
-	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
-	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
-	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
-	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
-#endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7343)
 	MSTPCR("uram0", "umem_clk", 0, 28, CLK_ENABLE_ON_INIT),
 	MSTPCR("xymem0", "bus_clk", 0, 26, CLK_ENABLE_ON_INIT),
@@ -802,14 +730,11 @@ static struct clk *sh7722_clocks[] = {
 	&sh7722_sh_clock,
 	&sh7722_peripheral_clock,
 	&sh7722_sdram_clock,
-#if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\
-    !defined(CONFIG_CPU_SUBTYPE_SH7724)
+#if !defined(CONFIG_CPU_SUBTYPE_SH7343)
 	&sh7722_siu_a_clock,
 	&sh7722_siu_b_clock,
 #endif
-/* 7724 should support FSI clock */
-#if defined(CONFIG_CPU_SUBTYPE_SH7722) || \
-    defined(CONFIG_CPU_SUBTYPE_SH7724)
+#if defined(CONFIG_CPU_SUBTYPE_SH7722)
 	&sh7722_irda_clock,
 #endif
 	&sh7722_video_clock,
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
new file mode 100644
index 0000000..5d5c9b9
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -0,0 +1,242 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+ *
+ * SH7724 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7724 registers */
+#define FRQCRA		0xa4150000
+#define FRQCRB		0xa4150004
+#define VCLKCR		0xa4150048
+#define FCLKACR		0xa4150008
+#define FCLKBCR		0xa415000c
+#define IRDACLKCR	0xa4150018
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define SPUCLKCR	0xa415003c
+#define FLLFRQ		0xa4150050
+#define LSTATS		0xa4150060
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The fll multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long fll_recalc(struct clk *clk)
+{
+	unsigned long mult = 0;
+	unsigned long div = 1;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(FLLFRQ) & 0x3ff;
+
+	if (__raw_readl(FLLFRQ) & 0x4000)
+		div = 2;
+
+	return (clk->parent->rate * mult) / div;
+}
+
+static struct clk_ops fll_clk_ops = {
+	.recalc		= fll_recalc,
+};
+
+static struct clk fll_clk = {
+	.name           = "fll_clk",
+	.id             = -1,
+	.ops		= &fll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCRA) >> 24) & 0x3f) + 1) * 2;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+/* A fixed divide-by-3 block use by the div6 clocks */
+static unsigned long div3_recalc(struct clk *clk)
+{
+	return clk->parent->rate / 3;
+}
+
+static struct clk_ops div3_clk_ops = {
+	.recalc		= div3_recalc,
+};
+
+static struct clk div3_clk = {
+	.name		= "div3_clk",
+	.id		= -1,
+	.ops		= &div3_clk_ops,
+	.parent		= &pll_clk,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&fll_clk,
+	&pll_clk,
+	&div3_clk,
+};
+
+static int divisors[] = { 2, 0, 4, 6, 8, 12, 16, 0, 24, 32, 36, 48, 0, 72 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+};
+
+enum { DIV4_I, DIV4_SH, DIV4_B, DIV4_P, DIV4_M1, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCRA, 20, 0x2f7d, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCRA, 12, 0x2f7c, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCRA, 8, 0x2f7c, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCRA, 0, 0x2f7c, 0),
+	[DIV4_M1] = DIV4("vpu_clk", FRQCRB, 4, 0x2f7c, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &div3_clk, VCLKCR, 0),
+	SH_CLK_DIV6("fsia_clk", &div3_clk, FCLKACR, 0),
+	SH_CLK_DIV6("fsib_clk", &div3_clk, FCLKBCR, 0),
+	SH_CLK_DIV6("irda_clk", &div3_clk, IRDACLKCR, 0),
+	SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _force_on, _need_cpg, _need_ram) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _force_on * CLK_ENABLE_ON_INIT)
+
+static struct clk mstp_clks[] = {
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, 1, 1, 0),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, 1, 1, 0),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, 1, 1, 0),
+	MSTP("rs0", &div4_clks[DIV4_B], MSTPCR0, 28, 1, 1, 0),
+	MSTP("ilmem0", &div4_clks[DIV4_I], MSTPCR0, 27, 1, 1, 0),
+	MSTP("l2c0", &div4_clks[DIV4_SH], MSTPCR0, 26, 1, 1, 0),
+	MSTP("fpu0", &div4_clks[DIV4_I], MSTPCR0, 24, 1, 1, 0),
+	MSTP("intc0", &div4_clks[DIV4_P], MSTPCR0, 22, 1, 1, 0),
+	MSTP("dmac0", &div4_clks[DIV4_B], MSTPCR0, 21, 0, 1, 1),
+	MSTP("sh0", &div4_clks[DIV4_SH], MSTPCR0, 20, 0, 1, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0, 1, 0),
+	MSTP("ubc0", &div4_clks[DIV4_I], MSTPCR0, 17, 0, 1, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0, 1, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0, 0, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0, 0, 0),
+	MSTP("dmac1", &div4_clks[DIV4_B], MSTPCR0, 12, 0, 1, 1),
+	MSTP("tmu1", &div4_clks[DIV4_P], MSTPCR0, 10, 0, 1, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 9, 0, 1, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 8, 0, 1, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 7, 0, 1, 0),
+	MSTP("scif3", &div4_clks[DIV4_B], MSTPCR0, 6, 0, 1, 0),
+	MSTP("scif4", &div4_clks[DIV4_B], MSTPCR0, 5, 0, 1, 0),
+	MSTP("scif5", &div4_clks[DIV4_B], MSTPCR0, 4, 0, 1, 0),
+	MSTP("msiof0", &div4_clks[DIV4_B], MSTPCR0, 2, 0, 1, 0),
+	MSTP("msiof1", &div4_clks[DIV4_B], MSTPCR0, 1, 0, 1, 0),
+
+	MSTP("keysc0", &r_clk, MSTPCR1, 12, 0, 0, 0),
+	MSTP("rtc0", &r_clk, MSTPCR1, 11, 0, 0, 0),
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0, 1, 0),
+	MSTP("i2c1", &div4_clks[DIV4_P], MSTPCR1, 8, 0, 1, 0),
+
+	MSTP("mmc0", &div4_clks[DIV4_B], MSTPCR2, 29, 0, 1, 0),
+	MSTP("eth0", &div4_clks[DIV4_B], MSTPCR2, 28, 0, 1, 0),
+	MSTP("atapi0", &div4_clks[DIV4_B], MSTPCR2, 26, 0, 1, 0),
+	MSTP("tpu0", &div4_clks[DIV4_B], MSTPCR2, 25, 0, 1, 0),
+	MSTP("irda0", &div4_clks[DIV4_P], MSTPCR2, 24, 0, 1, 0),
+	MSTP("tsif0", &div4_clks[DIV4_B], MSTPCR2, 22, 0, 1, 0),
+	MSTP("usb1", &div4_clks[DIV4_B], MSTPCR2, 21, 0, 1, 1),
+	MSTP("usb0", &div4_clks[DIV4_B], MSTPCR2, 20, 0, 1, 1),
+	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 19, 0, 1, 1),
+	MSTP("sdhi0", &div4_clks[DIV4_B], MSTPCR2, 18, 0, 1, 0),
+	MSTP("sdhi1", &div4_clks[DIV4_B], MSTPCR2, 17, 0, 1, 0),
+	MSTP("veu1", &div4_clks[DIV4_B], MSTPCR2, 15, 1, 1, 1),
+	MSTP("ceu1", &div4_clks[DIV4_B], MSTPCR2, 13, 0, 1, 1),
+	MSTP("beu1", &div4_clks[DIV4_B], MSTPCR2, 12, 0, 1, 1),
+	MSTP("2ddmac0", &div4_clks[DIV4_SH], MSTPCR2, 10, 0, 1, 1),
+	MSTP("spu0", &div4_clks[DIV4_B], MSTPCR2, 9, 0, 1, 0),
+	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, 1, 1, 1),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0, 1, 1),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0, 1, 1),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0, 1, 1),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, 1, 1, 1),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, 1, 1, 1),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0, 1, 1),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or fll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &fll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
-- 
cgit v0.10.2


From bc49b6eaac6eff86f902a36d846c310e1e0beedf Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 07:32:11 +0000
Subject: sh: sh7343 clock framework rewrite

This patch rewrites the sh7343 clock framework code.
The new code makes use of the recently merged div4,
div6 and mstp32 helper code. Both extal and dll are
supported as input clocks to the pll.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 7b564bd..3980714 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -516,7 +516,7 @@ config SH_CLK_CPG
 
 config SH_CLK_CPG_LEGACY
 	depends on SH_CLK_CPG
-	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723 && !CPU_SUBTYPE_SH7724
+	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723 && !CPU_SUBTYPE_SH7724 && !CPU_SUBTYPE_SH7343
 
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 02a0b17..f4415f9 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -24,7 +24,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7770)	:= clock-sh7770.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7780)	:= clock-sh7780.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7785)	:= clock-sh7785.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7343.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7724.o
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
new file mode 100644
index 0000000..0ee3ee8
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c
@@ -0,0 +1,211 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7343.c
+ *
+ * SH7343 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7343 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The dll block multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
+{
+	unsigned long mult;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
+	else
+		mult = 0;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
+};
+
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _flags) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
+
+static struct clk mstp_clks[] = {
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, CLK_ENABLE_ON_INIT),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, CLK_ENABLE_ON_INIT),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, CLK_ENABLE_ON_INIT),
+	MSTP("uram0", &div4_clks[DIV4_U], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
+	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
+	MSTP("intc3", &div4_clks[DIV4_P], MSTPCR0, 23, 0),
+	MSTP("intc0", &div4_clks[DIV4_P], MSTPCR0, 22, 0),
+	MSTP("dmac0", &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	MSTP("sh0", &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0),
+	MSTP("ubc0", &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
+	MSTP("mfi0", &div4_clks[DIV4_P], MSTPCR0, 11, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
+	MSTP("scif3", &div4_clks[DIV4_P], MSTPCR0, 4, 0),
+	MSTP("sio0", &div4_clks[DIV4_P], MSTPCR0, 3, 0),
+	MSTP("siof0", &div4_clks[DIV4_P], MSTPCR0, 2, 0),
+	MSTP("siof1", &div4_clks[DIV4_P], MSTPCR0, 1, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
+	MSTP("i2c1", &div4_clks[DIV4_P], MSTPCR1, 8, 0),
+
+	MSTP("tpu0", &div4_clks[DIV4_P], MSTPCR2, 25, 0),
+	MSTP("irda0", &div4_clks[DIV4_P], MSTPCR2, 24, 0),
+	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
+	MSTP("mmcif0", &div4_clks[DIV4_P], MSTPCR2, 17, 0),
+	MSTP("sim0", &div4_clks[DIV4_P], MSTPCR2, 16, 0),
+	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0),
+	MSTP("tsif0", &div4_clks[DIV4_P], MSTPCR2, 13, 0),
+	MSTP("s3d40", &div4_clks[DIV4_P], MSTPCR2, 12, 0),
+	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0),
+	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, CLK_ENABLE_ON_INIT),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 5e08504..8aaaac2 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -1,7 +1,7 @@
 /*
  * arch/sh/kernel/cpu/sh4a/clock-sh7722.c
  *
- * SH7343, SH7722 & SH7366 support for the clock framework
+ * SH7722 & SH7366 support for the clock framework
  *
  * Copyright (c) 2006-2007 Nomad Global Solutions Inc
  * Based on code for sh7343 by Paul Mundt
@@ -417,7 +417,6 @@ static struct clk_ops sh7722_frqcr_clk_ops = {
 /*
  * clock ops methods for SIU A/B and IrDA clock
  */
-#ifndef CONFIG_CPU_SUBTYPE_SH7343
 static int sh7722_siu_set_rate(struct clk *clk, unsigned long rate, int algo_id)
 {
 	unsigned long r;
@@ -469,8 +468,6 @@ static struct clk_ops sh7722_siu_clk_ops = {
 	.disable = sh7722_siu_disable,
 };
 
-#endif /* CONFIG_CPU_SUBTYPE_SH7343 */
-
 static int sh7722_video_enable(struct clk *clk)
 {
 	unsigned long r;
@@ -542,7 +539,6 @@ static struct clk sh7722_r_clock = {
 	.rate = 32768,
 };
 
-#if !defined(CONFIG_CPU_SUBTYPE_SH7343)
 /*
  * these three clocks - SIU A, SIU B, IrDA - share the same clk_ops
  * methods of clk_ops determine which register they should access by
@@ -559,7 +555,6 @@ static struct clk sh7722_siu_b_clock = {
 	.arch_flags = SCLKBCR,
 	.ops = &sh7722_siu_clk_ops,
 };
-#endif /* CONFIG_CPU_SUBTYPE_SH7343 */
 
 #if defined(CONFIG_CPU_SUBTYPE_SH7722)
 static struct clk sh7722_irda_clock = {
@@ -659,30 +654,6 @@ static struct clk sh7722_mstpcr_clocks[] = {
 	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
 	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7343)
-	MSTPCR("uram0", "umem_clk", 0, 28, CLK_ENABLE_ON_INIT),
-	MSTPCR("xymem0", "bus_clk", 0, 26, CLK_ENABLE_ON_INIT),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
-	MSTPCR("cmt0", "r_clk", 0, 14, 0),
-	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
-	MSTPCR("scif0", "peripheral_clk", 0, 7, 0),
-	MSTPCR("scif1", "peripheral_clk", 0, 6, 0),
-	MSTPCR("scif2", "peripheral_clk", 0, 5, 0),
-	MSTPCR("scif3", "peripheral_clk", 0, 4, 0),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
-	MSTPCR("i2c1", "peripheral_clk", 1, 8, 0),
-	MSTPCR("sdhi0", "peripheral_clk", 2, 18, 0),
-	MSTPCR("keysc0", "r_clk", 2, 14, 0),
-	MSTPCR("usbf0", "peripheral_clk", 2, 11, 0),
-	MSTPCR("siu0", "bus_clk", 2, 8, 0),
-	MSTPCR("jpu0", "bus_clk", 2, 6, CLK_ENABLE_ON_INIT),
-	MSTPCR("vou0", "bus_clk", 2, 5, 0),
-	MSTPCR("beu0", "bus_clk", 2, 4, 0),
-	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
-	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
-	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
-	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
-#endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7366)
 	/* See page 52 of Datasheet V0.40: Overview -> Block Diagram */
 	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
@@ -730,10 +701,8 @@ static struct clk *sh7722_clocks[] = {
 	&sh7722_sh_clock,
 	&sh7722_peripheral_clock,
 	&sh7722_sdram_clock,
-#if !defined(CONFIG_CPU_SUBTYPE_SH7343)
 	&sh7722_siu_a_clock,
 	&sh7722_siu_b_clock,
-#endif
 #if defined(CONFIG_CPU_SUBTYPE_SH7722)
 	&sh7722_irda_clock,
 #endif
-- 
cgit v0.10.2


From 4ed37394842e5f9c0c4ea087e8716d4458d03bfb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 07:35:25 +0000
Subject: sh: sh7366 clock framework rewrite

This patch rewrites the sh7366 clock framework code.
The new code makes use of the recently merged div4,
div6 and mstp32 helper code. Both extal and dll are
supported as input clocks to the pll.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 3980714..dbc10ff 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -516,7 +516,7 @@ config SH_CLK_CPG
 
 config SH_CLK_CPG_LEGACY
 	depends on SH_CLK_CPG
-	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723 && !CPU_SUBTYPE_SH7724 && !CPU_SUBTYPE_SH7343
+	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723 && !CPU_SUBTYPE_SH7724 && !CPU_SUBTYPE_SH7343 && !CPU_SUBTYPE_SH7366
 
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index f4415f9..96ea09c 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -28,7 +28,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7343.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7724.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7366.o
 clock-$(CONFIG_CPU_SUBTYPE_SHX3)	:= clock-shx3.o
 
 # Pinmux setup
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
new file mode 100644
index 0000000..a95ebab
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c
@@ -0,0 +1,211 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/clock-sh7366.c
+ *
+ * SH7366 clock framework support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/clock.h>
+
+/* SH7366 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
+};
+
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
+};
+
+/* The dll block multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
+{
+	unsigned long mult;
+
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
+	else
+		mult = 0;
+
+	return clk->parent->rate * mult;
+}
+
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
+};
+
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+static unsigned long pll_recalc(struct clk *clk)
+{
+	unsigned long mult = 1;
+	unsigned long div = 1;
+
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+	else
+		div = 2;
+
+	return (clk->parent->rate * mult) / div;
+}
+
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
+};
+
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
+
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
+};
+
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
+
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
+};
+
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_NR };
+
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
+
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fef, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
+};
+
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
+};
+
+#define MSTP(_str, _parent, _reg, _bit, _flags) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
+
+static struct clk mstp_clks[] = {
+	/* See page 52 of Datasheet V0.40: Overview -> Block Diagram */
+	MSTP("tlb0", &div4_clks[DIV4_I], MSTPCR0, 31, CLK_ENABLE_ON_INIT),
+	MSTP("ic0", &div4_clks[DIV4_I], MSTPCR0, 30, CLK_ENABLE_ON_INIT),
+	MSTP("oc0", &div4_clks[DIV4_I], MSTPCR0, 29, CLK_ENABLE_ON_INIT),
+	MSTP("rsmem0", &div4_clks[DIV4_SH], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
+	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
+	MSTP("intc3", &div4_clks[DIV4_P], MSTPCR0, 23, 0),
+	MSTP("intc0", &div4_clks[DIV4_P], MSTPCR0, 22, 0),
+	MSTP("dmac0", &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	MSTP("sh0", &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	MSTP("hudi0", &div4_clks[DIV4_P], MSTPCR0, 19, 0),
+	MSTP("ubc0", &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
+	MSTP("mfi0", &div4_clks[DIV4_P], MSTPCR0, 11, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
+	MSTP("msiof0", &div4_clks[DIV4_P], MSTPCR0, 2, 0),
+	MSTP("sbr0", &div4_clks[DIV4_P], MSTPCR0, 1, 0),
+
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
+
+	MSTP("icb0", &div4_clks[DIV4_P], MSTPCR2, 27, 0),
+	MSTP("meram0", &div4_clks[DIV4_P], MSTPCR2, 26, 0),
+	MSTP("dacy1", &div4_clks[DIV4_P], MSTPCR2, 24, 0),
+	MSTP("dacy0", &div4_clks[DIV4_P], MSTPCR2, 23, 0),
+	MSTP("tsif0", &div4_clks[DIV4_P], MSTPCR2, 22, 0),
+	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
+	MSTP("mmcif0", &div4_clks[DIV4_P], MSTPCR2, 17, 0),
+	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 9, 0),
+	MSTP("veu1", &div4_clks[DIV4_B], MSTPCR2, 7, CLK_ENABLE_ON_INIT),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
+};
+
+int __init arch_clk_init(void)
+{
+	int k, ret = 0;
+
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
+
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
+
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
+
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+
+	return ret;
+}
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 8aaaac2..6ca7c74 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -1,7 +1,7 @@
 /*
  * arch/sh/kernel/cpu/sh4a/clock-sh7722.c
  *
- * SH7722 & SH7366 support for the clock framework
+ * SH7722 support for the clock framework
  *
  * Copyright (c) 2006-2007 Nomad Global Solutions Inc
  * Based on code for sh7343 by Paul Mundt
@@ -654,46 +654,6 @@ static struct clk sh7722_mstpcr_clocks[] = {
 	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
 	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
 #endif
-#if defined(CONFIG_CPU_SUBTYPE_SH7366)
-	/* See page 52 of Datasheet V0.40: Overview -> Block Diagram */
-	MSTPCR("tlb0", "cpu_clk", 0, 31, 0),
-	MSTPCR("ic0", "cpu_clk", 0, 30, 0),
-	MSTPCR("oc0", "cpu_clk", 0, 29, 0),
-	MSTPCR("rsmem0", "sh_clk", 0, 28, CLK_ENABLE_ON_INIT),
-	MSTPCR("xymem0", "cpu_clk", 0, 26, CLK_ENABLE_ON_INIT),
-	MSTPCR("intc30", "peripheral_clk", 0, 23, 0),
-	MSTPCR("intc0", "peripheral_clk", 0, 22, 0),
-	MSTPCR("dmac0", "bus_clk", 0, 21, 0),
-	MSTPCR("sh0", "sh_clk", 0, 20, 0),
-	MSTPCR("hudi0", "peripheral_clk", 0, 19, 0),
-	MSTPCR("ubc0", "cpu_clk", 0, 17, 0),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
-	MSTPCR("cmt0", "r_clk", 0, 14, 0),
-	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10, 0),
-	MSTPCR("scif0", "peripheral_clk", 0, 7, 0),
-	MSTPCR("scif1", "bus_clk", 0, 6, 0),
-	MSTPCR("scif2", "bus_clk", 0, 5, 0),
-	MSTPCR("msiof0", "peripheral_clk", 0, 2, 0),
-	MSTPCR("sbr0", "peripheral_clk", 0, 1, 0),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
-	MSTPCR("icb0", "bus_clk", 2, 27, 0),
-	MSTPCR("meram0", "sh_clk", 2, 26, 0),
-	MSTPCR("dacc0", "peripheral_clk", 2, 24, 0),
-	MSTPCR("dacy0", "peripheral_clk", 2, 23, 0),
-	MSTPCR("tsif0", "bus_clk", 2, 22, 0),
-	MSTPCR("sdhi0", "bus_clk", 2, 18, 0),
-	MSTPCR("mmcif0", "bus_clk", 2, 17, 0),
-	MSTPCR("usb0", "bus_clk", 2, 11, 0),
-	MSTPCR("siu0", "bus_clk", 2, 8, 0),
-	MSTPCR("veu1", "bus_clk", 2, 7, CLK_ENABLE_ON_INIT),
-	MSTPCR("vou0", "bus_clk", 2, 5, 0),
-	MSTPCR("beu0", "bus_clk", 2, 4, 0),
-	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
-	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
-	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
-	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
-#endif
 };
 
 static struct clk *sh7722_clocks[] = {
-- 
cgit v0.10.2


From 46e9371c0eaade8391b3969431a72272b9007158 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 07:37:59 +0000
Subject: sh: sh7722 clock framework rewrite

This patch rewrites the sh7722 clock framework code.
The new code makes use of the recently merged div4,
div6 and mstp32 helper code. Both extal and dll are
supported as input clocks to the pll.

While at it, now when all SuperH Mobile processors
are converted, fix CONFIG_SH_CLK_CPG_LEGACY to depend
on CONFIG_ARCH_SHMOBILE.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index dbc10ff..b5629cd 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -516,7 +516,7 @@ config SH_CLK_CPG
 
 config SH_CLK_CPG_LEGACY
 	depends on SH_CLK_CPG
-	def_bool y if !CPU_SUBTYPE_SH7785 && !CPU_SUBTYPE_SH7723 && !CPU_SUBTYPE_SH7724 && !CPU_SUBTYPE_SH7343 && !CPU_SUBTYPE_SH7366
+	def_bool y if !CPU_SUBTYPE_SH7785 && !ARCH_SHMOBILE
 
 config SH_CLK_MD
 	int "CPU Mode Pin Setting"
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index 6ca7c74..40f8593 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -1,719 +1,197 @@
 /*
  * arch/sh/kernel/cpu/sh4a/clock-sh7722.c
  *
- * SH7722 support for the clock framework
+ * SH7722 clock framework support
  *
- * Copyright (c) 2006-2007 Nomad Global Solutions Inc
- * Based on code for sh7343 by Paul Mundt
+ * Copyright (C) 2009 Magnus Damm
  *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
-#include <linux/errno.h>
-#include <linux/stringify.h>
 #include <asm/clock.h>
-#include <asm/freq.h>
-
-#define N  (-1)
-#define NM (-2)
-#define ROUND_NEAREST 0
-#define ROUND_DOWN -1
-#define ROUND_UP   +1
-
-static int adjust_algos[][3] = {
-	{},	/* NO_CHANGE */
-	{ NM, N, 1 },   /* N:1, N:1 */
-	{ 3, 2, 2 },	/* 3:2:2 */
-	{ 5, 2, 2 },    /* 5:2:2 */
-	{ N, 1, 1 },	/* N:1:1 */
-
-	{ N, 1 },	/* N:1 */
 
-	{ N, 1 },	/* N:1 */
-	{ 3, 2 },
-	{ 4, 3 },
-	{ 5, 4 },
-
-	{ N, 1 }
+/* SH7722 registers */
+#define FRQCR		0xa4150000
+#define VCLKCR		0xa4150004
+#define SCLKACR		0xa4150008
+#define SCLKBCR		0xa415000c
+#define IRDACLKCR	0xa4150018
+#define PLLCR		0xa4150024
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+#define DLLFRQ		0xa4150050
+
+/* Fixed 32 KHz root clock for RTC and Power Management purposes */
+static struct clk r_clk = {
+	.name           = "rclk",
+	.id             = -1,
+	.rate           = 32768,
 };
 
-static unsigned long adjust_pair_of_clocks(unsigned long r1, unsigned long r2,
-			int m1, int m2, int round_flag)
-{
-	unsigned long rem, div;
-	int the_one = 0;
-
-	pr_debug( "Actual values: r1 = %ld\n", r1);
-	pr_debug( "...............r2 = %ld\n", r2);
-
-	if (m1 == m2) {
-		r2 = r1;
-		pr_debug( "setting equal rates: r2 now %ld\n", r2);
-	} else if ((m2 == N  && m1 == 1) ||
-		   (m2 == NM && m1 == N)) { /* N:1 or NM:N */
-		pr_debug( "Setting rates as 1:N (N:N*M)\n");
-		rem = r2 % r1;
-		pr_debug( "...remainder = %ld\n", rem);
-		if (rem) {
-			div = r2 / r1;
-			pr_debug( "...div = %ld\n", div);
-			switch (round_flag) {
-			case ROUND_NEAREST:
-				the_one = rem >= r1/2 ? 1 : 0; break;
-			case ROUND_UP:
-				the_one = 1; break;
-			case ROUND_DOWN:
-				the_one = 0; break;
-			}
-
-			r2 = r1 * (div + the_one);
-			pr_debug( "...setting r2 to %ld\n", r2);
-		}
-	} else if ((m2 == 1  && m1 == N) ||
-		   (m2 == N && m1 == NM)) { /* 1:N or N:NM */
-		pr_debug( "Setting rates as N:1 (N*M:N)\n");
-		rem = r1 % r2;
-		pr_debug( "...remainder = %ld\n", rem);
-		if (rem) {
-			div = r1 / r2;
-			pr_debug( "...div = %ld\n", div);
-			switch (round_flag) {
-			case ROUND_NEAREST:
-				the_one = rem > r2/2 ? 1 : 0; break;
-			case ROUND_UP:
-				the_one = 0; break;
-			case ROUND_DOWN:
-				the_one = 1; break;
-			}
-
-			r2 = r1 / (div + the_one);
-			pr_debug( "...setting r2 to %ld\n", r2);
-		}
-	} else { /* value:value */
-		pr_debug( "Setting rates as %d:%d\n", m1, m2);
-		div = r1 / m1;
-		r2 = div * m2;
-		pr_debug( "...div = %ld\n", div);
-		pr_debug( "...setting r2 to %ld\n", r2);
-	}
-
-	return r2;
-}
-
-static void adjust_clocks(int originate, int *l, unsigned long v[],
-			  int n_in_line)
-{
-	int x;
-
-	pr_debug( "Go down from %d...\n", originate);
-	/* go up recalculation clocks */
-	for (x = originate; x>0; x -- )
-		v[x-1] = adjust_pair_of_clocks(v[x], v[x-1],
-					l[x], l[x-1],
-					ROUND_UP);
-
-	pr_debug( "Go up from %d...\n", originate);
-	/* go down recalculation clocks */
-	for (x = originate; x<n_in_line - 1; x ++ )
-		v[x+1] = adjust_pair_of_clocks(v[x], v[x+1],
-					l[x], l[x+1],
-					ROUND_UP);
-}
-
-
 /*
- * SH7722 uses a common set of multipliers and divisors, so this
- * is quite simple..
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
  */
-
-#define STCPLL(frqcr) (((frqcr >> 24) & 0x1f) + 1)
-
-/*
- * Instead of having two separate multipliers/divisors set, like this:
- *
- * static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
- * static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
- *
- * I created the divisors2 array, which is used to calculate rate like
- *   rate = parent * 2 / divisors2[ divisor ];
-*/
-static int divisors2[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32, 40 };
-
-static unsigned long master_clk_recalc(struct clk *clk)
-{
-	unsigned frqcr = ctrl_inl(FRQCR);
-
-	return CONFIG_SH_PCLK_FREQ * STCPLL(frqcr);
-}
-
-static void master_clk_init(struct clk *clk)
-{
-	clk->parent = NULL;
-	clk->rate = master_clk_recalc(clk);
-}
-
-static unsigned long module_clk_recalc(struct clk *clk)
-{
-	unsigned long frqcr = ctrl_inl(FRQCR);
-
-	return clk->parent->rate / STCPLL(frqcr);
-}
-
-#define MASTERDIVS	{ 2, 3, 4, 6, 8, 16 }
-#define STCMASK		0x1f
-#define DIVCALC(div)	(div-1)
-#define FRQCRKICK	0x00000000
-
-static int master_clk_setrate(struct clk *clk, unsigned long rate, int id)
-{
-	int div = rate / clk->rate;
-	int master_divs[] = MASTERDIVS;
-	int index;
-	unsigned long frqcr;
-
-	for (index = 1; index < ARRAY_SIZE(master_divs); index++)
-		if (div >= master_divs[index - 1] && div < master_divs[index])
-			break;
-
-	if (index >= ARRAY_SIZE(master_divs))
-		index = ARRAY_SIZE(master_divs);
-	div = master_divs[index - 1];
-
-	frqcr = ctrl_inl(FRQCR);
-	frqcr &= ~(STCMASK << 24);
-	frqcr |= (DIVCALC(div) << 24);
-	frqcr |= FRQCRKICK;
-	ctrl_outl(frqcr, FRQCR);
-
-	return 0;
-}
-
-static struct clk_ops sh7722_master_clk_ops = {
-	.init = master_clk_init,
-	.recalc = master_clk_recalc,
-	.set_rate = master_clk_setrate,
-};
-
-static struct clk_ops sh7722_module_clk_ops = {
-       .recalc = module_clk_recalc,
-};
-
-struct frqcr_context {
-	unsigned mask;
-	unsigned shift;
-};
-
-struct frqcr_context sh7722_get_clk_context(const char *name)
-{
-	struct frqcr_context ctx = { 0, };
-
-	if (!strcmp(name, "peripheral_clk")) {
-		ctx.shift = 0;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "sdram_clk")) {
-		ctx.shift = 4;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "bus_clk")) {
-		ctx.shift = 8;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "sh_clk")) {
-		ctx.shift = 12;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "umem_clk")) {
-		ctx.shift = 16;
-		ctx.mask = 0xF;
-	} else if (!strcmp(name, "cpu_clk")) {
-		ctx.shift = 20;
-		ctx.mask = 7;
-	}
-	return ctx;
-}
-
-/**
- * sh7722_find_div_index - find divisor for setting rate
- *
- * All sh7722 clocks use the same set of multipliers/divisors. This function
- * chooses correct divisor to set the rate of clock with parent clock that
- * generates frequency of 'parent_rate'
- *
- * @parent_rate: rate of parent clock
- * @rate: requested rate to be set
- */
-static int sh7722_find_div_index(unsigned long parent_rate, unsigned rate)
-{
-	unsigned div2 = parent_rate * 2 / rate;
-	int index;
-
-	if (rate > parent_rate)
-		return -EINVAL;
-
-	for (index = 1; index < ARRAY_SIZE(divisors2); index++) {
-		if (div2 > divisors2[index - 1] && div2 <= divisors2[index])
-			break;
-	}
-	if (index >= ARRAY_SIZE(divisors2))
-		index = ARRAY_SIZE(divisors2) - 1;
-	return index;
-}
-
-static unsigned long sh7722_frqcr_recalc(struct clk *clk)
-{
-	struct frqcr_context ctx = sh7722_get_clk_context(clk->name);
-	unsigned long frqcr = ctrl_inl(FRQCR);
-	int index;
-
-	index = (frqcr >> ctx.shift) & ctx.mask;
-	return clk->parent->rate * 2 / divisors2[index];
-}
-
-static int sh7722_frqcr_set_rate(struct clk *clk, unsigned long rate,
-				 int algo_id)
-{
-	struct frqcr_context ctx = sh7722_get_clk_context(clk->name);
-	unsigned long parent_rate = clk->parent->rate;
-	int div;
-	unsigned long frqcr;
-	int err = 0;
-
-	/* pretty invalid */
-	if (parent_rate < rate)
-		return -EINVAL;
-
-	/* look for multiplier/divisor pair */
-	div = sh7722_find_div_index(parent_rate, rate);
-	if (div<0)
-		return div;
-
-	/* calculate new value of clock rate */
-	clk->rate = parent_rate * 2 / divisors2[div];
-	frqcr = ctrl_inl(FRQCR);
-
-	/* FIXME: adjust as algo_id specifies */
-	if (algo_id != NO_CHANGE) {
-		int originator;
-		char *algo_group_1[] = { "cpu_clk", "umem_clk", "sh_clk" };
-		char *algo_group_2[] = { "sh_clk", "bus_clk" };
-		char *algo_group_3[] = { "sh_clk", "sdram_clk" };
-		char *algo_group_4[] = { "bus_clk", "peripheral_clk" };
-		char *algo_group_5[] = { "cpu_clk", "peripheral_clk" };
-		char **algo_current = NULL;
-		/* 3 is the maximum number of clocks in relation */
-		struct clk *ck[3];
-		unsigned long values[3]; /* the same comment as above */
-		int part_length = -1;
-		int i;
-
-		/*
-		 * all the steps below only required if adjustion was
-		 * requested
-		 */
-		if (algo_id == IUS_N1_N1 ||
-		    algo_id == IUS_322 ||
-		    algo_id == IUS_522 ||
-		    algo_id == IUS_N11) {
-			algo_current = algo_group_1;
-			part_length = 3;
-		}
-		if (algo_id == SB_N1) {
-			algo_current = algo_group_2;
-			part_length = 2;
-		}
-		if (algo_id == SB3_N1 ||
-		    algo_id == SB3_32 ||
-		    algo_id == SB3_43 ||
-		    algo_id == SB3_54) {
-			algo_current = algo_group_3;
-			part_length = 2;
-		}
-		if (algo_id == BP_N1) {
-			algo_current = algo_group_4;
-			part_length = 2;
-		}
-		if (algo_id == IP_N1) {
-			algo_current = algo_group_5;
-			part_length = 2;
-		}
-		if (!algo_current)
-			goto incorrect_algo_id;
-
-		originator = -1;
-		for (i = 0; i < part_length; i ++ ) {
-			if (originator >= 0 && !strcmp(clk->name,
-						       algo_current[i]))
-				originator = i;
-			ck[i] = clk_get(NULL, algo_current[i]);
-			values[i] = clk_get_rate(ck[i]);
-		}
-
-		if (originator >= 0)
-			adjust_clocks(originator, adjust_algos[algo_id],
-				      values, part_length);
-
-		for (i = 0; i < part_length; i ++ ) {
-			struct frqcr_context part_ctx;
-			int part_div;
-
-			if (likely(!err)) {
-				part_div = sh7722_find_div_index(parent_rate,
-								rate);
-				if (part_div > 0) {
-					part_ctx = sh7722_get_clk_context(
-								ck[i]->name);
-					frqcr &= ~(part_ctx.mask <<
-						   part_ctx.shift);
-					frqcr |= part_div << part_ctx.shift;
-				} else
-					err = part_div;
-			}
-
-			ck[i]->ops->recalc(ck[i]);
-			clk_put(ck[i]);
-		}
-	}
-
-	/* was there any error during recalculation ? If so, bail out.. */
-	if (unlikely(err!=0))
-		goto out_err;
-
-	/* clear FRQCR bits */
-	frqcr &= ~(ctx.mask << ctx.shift);
-	frqcr |= div << ctx.shift;
-	frqcr |= FRQCRKICK;
-
-	/* ...and perform actual change */
-	ctrl_outl(frqcr, FRQCR);
-	return 0;
-
-incorrect_algo_id:
-	return -EINVAL;
-out_err:
-	return err;
-}
-
-static long sh7722_frqcr_round_rate(struct clk *clk, unsigned long rate)
-{
-	unsigned long parent_rate = clk->parent->rate;
-	int div;
-
-	/* look for multiplier/divisor pair */
-	div = sh7722_find_div_index(parent_rate, rate);
-	if (div < 0)
-		return clk->rate;
-
-	/* calculate new value of clock rate */
-	return parent_rate * 2 / divisors2[div];
-}
-
-static struct clk_ops sh7722_frqcr_clk_ops = {
-	.recalc = sh7722_frqcr_recalc,
-	.set_rate = sh7722_frqcr_set_rate,
-	.round_rate = sh7722_frqcr_round_rate,
+struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
 };
 
-/*
- * clock ops methods for SIU A/B and IrDA clock
- */
-static int sh7722_siu_set_rate(struct clk *clk, unsigned long rate, int algo_id)
-{
-	unsigned long r;
-	int div;
-
-	r = ctrl_inl(clk->arch_flags);
-	div = sh7722_find_div_index(clk->parent->rate, rate);
-	if (div < 0)
-		return div;
-	r = (r & ~0xF) | div;
-	ctrl_outl(r, clk->arch_flags);
-	return 0;
-}
-
-static unsigned long sh7722_siu_recalc(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(clk->arch_flags);
-	return clk->parent->rate * 2 / divisors2[r & 0xF];
-}
-
-static int sh7722_siu_start_stop(struct clk *clk, int enable)
+/* The dll block multiplies the 32khz r_clk, may be used instead of extal */
+static unsigned long dll_recalc(struct clk *clk)
 {
-	unsigned long r;
+	unsigned long mult;
 
-	r = ctrl_inl(clk->arch_flags);
-	if (enable)
-		ctrl_outl(r & ~(1 << 8), clk->arch_flags);
+	if (__raw_readl(PLLCR) & 0x1000)
+		mult = __raw_readl(DLLFRQ);
 	else
-		ctrl_outl(r | (1 << 8), clk->arch_flags);
-	return 0;
-}
+		mult = 0;
 
-static int sh7722_siu_enable(struct clk *clk)
-{
-	return sh7722_siu_start_stop(clk, 1);
-}
-
-static void sh7722_siu_disable(struct clk *clk)
-{
-	sh7722_siu_start_stop(clk, 0);
+	return clk->parent->rate * mult;
 }
 
-static struct clk_ops sh7722_siu_clk_ops = {
-	.recalc = sh7722_siu_recalc,
-	.set_rate = sh7722_siu_set_rate,
-	.enable = sh7722_siu_enable,
-	.disable = sh7722_siu_disable,
+static struct clk_ops dll_clk_ops = {
+	.recalc		= dll_recalc,
 };
 
-static int sh7722_video_enable(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(VCLKCR);
-	ctrl_outl( r & ~(1<<8), VCLKCR);
-	return 0;
-}
-
-static void sh7722_video_disable(struct clk *clk)
-{
-	unsigned long r;
-
-	r = ctrl_inl(VCLKCR);
-	ctrl_outl( r | (1<<8), VCLKCR);
-}
+static struct clk dll_clk = {
+	.name           = "dll_clk",
+	.id             = -1,
+	.ops		= &dll_clk_ops,
+	.parent		= &r_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
+};
 
-static int sh7722_video_set_rate(struct clk *clk, unsigned long rate,
-				 int algo_id)
+static unsigned long pll_recalc(struct clk *clk)
 {
-	unsigned long r;
+	unsigned long mult = 1;
+	unsigned long div = 1;
 
-	r = ctrl_inl(VCLKCR);
-	r &= ~0x3F;
-	r |= ((clk->parent->rate / rate - 1) & 0x3F);
-	ctrl_outl(r, VCLKCR);
-	return 0;
-}
-
-static unsigned long sh7722_video_recalc(struct clk *clk)
-{
-	unsigned long r;
+	if (__raw_readl(PLLCR) & 0x4000)
+		mult = (((__raw_readl(FRQCR) >> 24) & 0x1f) + 1);
+	else
+		div = 2;
 
-	r = ctrl_inl(VCLKCR);
-	return clk->parent->rate / ((r & 0x3F) + 1);
+	return (clk->parent->rate * mult) / div;
 }
 
-static struct clk_ops sh7722_video_clk_ops = {
-	.recalc = sh7722_video_recalc,
-	.set_rate = sh7722_video_set_rate,
-	.enable = sh7722_video_enable,
-	.disable = sh7722_video_disable,
-};
-/*
- * and at last, clock definitions themselves
- */
-static struct clk sh7722_umem_clock = {
-	.name = "umem_clk",
-	.ops = &sh7722_frqcr_clk_ops,
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
 };
 
-static struct clk sh7722_sh_clock = {
-	.name = "sh_clk",
-	.ops = &sh7722_frqcr_clk_ops,
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.flags		= CLK_ENABLE_ON_INIT,
 };
 
-static struct clk sh7722_peripheral_clock = {
-	.name = "peripheral_clk",
-	.ops = &sh7722_frqcr_clk_ops,
+struct clk *main_clks[] = {
+	&r_clk,
+	&extal_clk,
+	&dll_clk,
+	&pll_clk,
 };
 
-static struct clk sh7722_sdram_clock = {
-	.name = "sdram_clk",
-	.ops = &sh7722_frqcr_clk_ops,
-};
+static int multipliers[] = { 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+static int divisors[] = { 1, 3, 2, 5, 3, 4, 5, 6, 8, 10, 12, 16, 20 };
 
-static struct clk sh7722_r_clock = {
-	.name = "r_clk",
-	.rate = 32768,
+static struct clk_div_mult_table div4_table = {
+	.divisors = divisors,
+	.nr_divisors = ARRAY_SIZE(divisors),
+	.multipliers = multipliers,
+	.nr_multipliers = ARRAY_SIZE(multipliers),
 };
 
-/*
- * these three clocks - SIU A, SIU B, IrDA - share the same clk_ops
- * methods of clk_ops determine which register they should access by
- * examining clk->name field
- */
-static struct clk sh7722_siu_a_clock = {
-	.name = "siu_a_clk",
-	.arch_flags = SCLKACR,
-	.ops = &sh7722_siu_clk_ops,
-};
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
+       DIV4_SIUA, DIV4_SIUB, DIV4_IRDA, DIV4_NR };
 
-static struct clk sh7722_siu_b_clock = {
-	.name = "siu_b_clk",
-	.arch_flags = SCLKBCR,
-	.ops = &sh7722_siu_clk_ops,
-};
+#define DIV4(_str, _reg, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
 
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-static struct clk sh7722_irda_clock = {
-	.name = "irda_clk",
-	.arch_flags = IrDACLKCR,
-	.ops = &sh7722_siu_clk_ops,
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fef, CLK_ENABLE_ON_INIT),
+	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", FRQCR, 12, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
+	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
+	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
+	[DIV4_IRDA] = DIV4("irda_clk", IRDACLKCR, 0, 0x1fff, 0),
 };
-#endif
 
-static struct clk sh7722_video_clock = {
-	.name = "video_clk",
-	.ops = &sh7722_video_clk_ops,
+struct clk div6_clks[] = {
+	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
 };
 
-#define MSTPCR_ARCH_FLAGS(reg, bit) (((reg) << 8) | (bit))
-#define MSTPCR_ARCH_FLAGS_REG(value) ((value) >> 8)
-#define MSTPCR_ARCH_FLAGS_BIT(value) ((value) & 0xff)
-
-static int sh7722_mstpcr_start_stop(struct clk *clk, int enable)
-{
-	unsigned long bit = MSTPCR_ARCH_FLAGS_BIT(clk->arch_flags);
-	unsigned long reg;
-	unsigned long r;
+#define MSTP(_str, _parent, _reg, _bit, _flags) \
+  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
 
-	switch(MSTPCR_ARCH_FLAGS_REG(clk->arch_flags)) {
-	case 0:
-		reg = MSTPCR0;
-		break;
-	case 1:
-		reg = MSTPCR1;
-		break;
-	case 2:
-		reg = MSTPCR2;
-		break;
-	default:
-		return -EINVAL;
-	}
+static struct clk mstp_clks[] = {
+	MSTP("uram0", &div4_clks[DIV4_U], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
+	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
+	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
+	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
+	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
+	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
+	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
 
-	r = ctrl_inl(reg);
-
-	if (enable)
-		r &= ~(1 << bit);
-	else
-		r |= (1 << bit);
+	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
+	MSTP("rtc0", &r_clk, MSTPCR1, 8, 0),
 
-	ctrl_outl(r, reg);
-	return 0;
-}
-
-static int sh7722_mstpcr_enable(struct clk *clk)
-{
-	return sh7722_mstpcr_start_stop(clk, 1);
-}
-
-static void sh7722_mstpcr_disable(struct clk *clk)
-{
-	sh7722_mstpcr_start_stop(clk, 0);
-}
-
-static struct clk_ops sh7722_mstpcr_clk_ops = {
-	.enable = sh7722_mstpcr_enable,
-	.disable = sh7722_mstpcr_disable,
-	.recalc = followparent_recalc,
-};
-
-#define MSTPCR(_name, _parent, regnr, bitnr, _flags) \
-{						\
-	.name = _name,				\
-	.flags = _flags,			\
-	.arch_flags = MSTPCR_ARCH_FLAGS(regnr, bitnr),	\
-	.ops = (void *)_parent,		\
-}
-
-static struct clk sh7722_mstpcr_clocks[] = {
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-	MSTPCR("uram0", "umem_clk", 0, 28, CLK_ENABLE_ON_INIT),
-	MSTPCR("xymem0", "bus_clk", 0, 26, CLK_ENABLE_ON_INIT),
-	MSTPCR("tmu0", "peripheral_clk", 0, 15, 0),
-	MSTPCR("cmt0", "r_clk", 0, 14, 0),
-	MSTPCR("rwdt0", "r_clk", 0, 13, 0),
-	MSTPCR("flctl0", "peripheral_clk", 0, 10, 0),
-	MSTPCR("scif0", "peripheral_clk", 0, 7, 0),
-	MSTPCR("scif1", "peripheral_clk", 0, 6, 0),
-	MSTPCR("scif2", "peripheral_clk", 0, 5, 0),
-	MSTPCR("i2c0", "peripheral_clk", 1, 9, 0),
-	MSTPCR("rtc0", "r_clk", 1, 8, 0),
-	MSTPCR("sdhi0", "peripheral_clk", 2, 18, 0),
-	MSTPCR("keysc0", "r_clk", 2, 14, 0),
-	MSTPCR("usbf0", "peripheral_clk", 2, 11, 0),
-	MSTPCR("2dg0", "bus_clk", 2, 9, 0),
-	MSTPCR("siu0", "bus_clk", 2, 8, 0),
-	MSTPCR("vou0", "bus_clk", 2, 5, 0),
-	MSTPCR("jpu0", "bus_clk", 2, 6, CLK_ENABLE_ON_INIT),
-	MSTPCR("beu0", "bus_clk", 2, 4, 0),
-	MSTPCR("ceu0", "bus_clk", 2, 3, 0),
-	MSTPCR("veu0", "bus_clk", 2, 2, CLK_ENABLE_ON_INIT),
-	MSTPCR("vpu0", "bus_clk", 2, 1, CLK_ENABLE_ON_INIT),
-	MSTPCR("lcdc0", "bus_clk", 2, 0, 0),
-#endif
-};
-
-static struct clk *sh7722_clocks[] = {
-	&sh7722_umem_clock,
-	&sh7722_sh_clock,
-	&sh7722_peripheral_clock,
-	&sh7722_sdram_clock,
-	&sh7722_siu_a_clock,
-	&sh7722_siu_b_clock,
-#if defined(CONFIG_CPU_SUBTYPE_SH7722)
-	&sh7722_irda_clock,
-#endif
-	&sh7722_video_clock,
-};
-
-/*
- * init in order: master, module, bus, cpu
- */
-struct clk_ops *onchip_ops[] = {
-	&sh7722_master_clk_ops,
-	&sh7722_module_clk_ops,
-	&sh7722_frqcr_clk_ops,
-	&sh7722_frqcr_clk_ops,
+	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
+	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0),
+	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
+	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 9, 0),
+	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0),
+	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
+	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, CLK_ENABLE_ON_INIT),
+	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
+	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
+	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
+	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
+	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
 };
 
-void __init
-arch_init_clk_ops(struct clk_ops **ops, int type)
-{
-	BUG_ON(type < 0 || type >= ARRAY_SIZE(onchip_ops));
-	*ops = onchip_ops[type];
-}
-
 int __init arch_clk_init(void)
 {
-	struct clk *clk;
-	int i;
+	int k, ret = 0;
 
-	cpg_clk_init();
+	/* autodetect extal or dll configuration */
+	if (__raw_readl(PLLCR) & 0x1000)
+		pll_clk.parent = &dll_clk;
+	else
+		pll_clk.parent = &extal_clk;
 
-	clk = clk_get(NULL, "master_clk");
-	for (i = 0; i < ARRAY_SIZE(sh7722_clocks); i++) {
-		pr_debug( "Registering clock '%s'\n", sh7722_clocks[i]->name);
-		sh7722_clocks[i]->parent = clk;
-		clk_register(sh7722_clocks[i]);
-	}
-	clk_put(clk);
+	for (k = 0; !ret && (k < ARRAY_SIZE(main_clks)); k++)
+		ret = clk_register(main_clks[k]);
 
-	clk_register(&sh7722_r_clock);
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
 
-	for (i = 0; i < ARRAY_SIZE(sh7722_mstpcr_clocks); i++) {
-		pr_debug( "Registering mstpcr clock '%s'\n",
-			  sh7722_mstpcr_clocks[i].name);
-		clk = clk_get(NULL, (void *) sh7722_mstpcr_clocks[i].ops);
-		sh7722_mstpcr_clocks[i].parent = clk;
-		sh7722_mstpcr_clocks[i].ops = &sh7722_mstpcr_clk_ops;
-		clk_register(&sh7722_mstpcr_clocks[i]);
-		clk_put(clk);
-	}
+	if (!ret)
+		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
 
-	propagate_rate(&sh7722_r_clock); /* make sure rate gets propagated */
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
 
-	return 0;
+	return ret;
 }
-- 
cgit v0.10.2


From 2ff2e8a3e1898e692b604424c384f134009dea80 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 26 May 2009 10:35:52 +1000
Subject: drm: don't associate _DRM_DRIVER maps with a master

A driver will use the _DRM_DRIVER map flag to indicate that it wants
to be responsible for removing the map itself, bypassing the DRM's
automagic cleanup code.

Since the multi-master changes this has been broken, resulting in some
drivers having their registers unmapped before it's finished with them.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 0411d91..80a2575 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -371,7 +371,8 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	list->user_token = list->hash.key << PAGE_SHIFT;
 	mutex_unlock(&dev->struct_mutex);
 
-	list->master = dev->primary->master;
+	if (!(map->flags & _DRM_DRIVER))
+		list->master = dev->primary->master;
 	*maplist = list;
 	return 0;
 	}
-- 
cgit v0.10.2


From df4f7fe7bd516b3833e25c692c3970e22038a6ca Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 11 Jun 2009 16:16:10 +1000
Subject: radeon: remove _DRM_DRIVER from the preadded sarea map

This shouldn't be there and is what broke r600 late in the 2.6.30
release cycle with Ben's patch.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 77a7a4d..521ef6a 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -2109,7 +2109,7 @@ int radeon_master_create(struct drm_device *dev, struct drm_master *master)
 
 	/* prebuild the SAREA */
 	sareapage = max_t(unsigned long, SAREA_MAX, PAGE_SIZE);
-	ret = drm_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK|_DRM_DRIVER,
+	ret = drm_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK,
 			 &master_priv->sarea);
 	if (ret) {
 		DRM_ERROR("SAREA setup failed\n");
-- 
cgit v0.10.2


From c5eeff1f8ecbc4bc7c1dd8e97a8610bc4dd3def8 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Mon, 8 Jun 2009 10:05:12 +0000
Subject: sh: sh7724: INTC setting update

This patch follows Rev 0.50 manual

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 000f3b8..585dd85 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -511,46 +511,46 @@ enum {
 	IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7,
 	HUDI,
 	DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3,
-	_2DG_TRI, _2DG_INI, _2DG_CEI, _2DG_BRK,
+	_2DG_TRI, _2DG_INI, _2DG_CEI,
 	DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3,
-	VIO_CEU20I, VIO_BEU20I, VIO_VEU3F1, VIO_VOUI,
-	SCIFA_SCIFA0,
-	VPU_VPUI,
-	TPU_TPUI,
-	CEU21I,
-	BEU21I,
-	USB_USI0,
+	VIO_CEU0, VIO_BEU0, VIO_VEU1, VIO_VOU,
+	SCIFA3,
+	VPU,
+	TPU,
+	CEU1,
+	BEU1,
+	USB0, USB1,
 	ATAPI,
 	RTC_ATI, RTC_PRI, RTC_CUI,
 	DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR,
 	DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR,
-	KEYSC_KEYI,
+	KEYSC,
 	SCIF_SCIF0, SCIF_SCIF1, SCIF_SCIF2,
-	VEU3F0I,
+	VEU0,
 	MSIOF_MSIOFI0, MSIOF_MSIOFI1,
 	SPU_SPUI0, SPU_SPUI1,
-	SCIFA_SCIFA1,
-/*	ICB_ICBI, */
+	SCIFA4,
+	ICB,
 	ETHI,
 	I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI,
 	I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI,
-	SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2,
-	CMT_CMTI,
-	TSIF_TSIFI,
-/*	ICB_LMBI, */
-	FSI_FSI,
-	SCIFA_SCIFA2,
+	SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2, SDHI0_SDHII3,
+	CMT,
+	TSIF,
+	FSI,
+	SCIFA5,
 	TMU0_TUNI0, TMU0_TUNI1, TMU0_TUNI2,
-	IRDA_IRDAI,
+	IRDA,
 	SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2,
-	JPU_JPUI,
-	MMC_MMCI0, MMC_MMCI1, MMC_MMCI2,
-	LCDC_LCDCI,
+	JPU,
+	_2DDMAC,
+	MMC_MMC2I, MMC_MMC3I,
+	LCDC,
 	TMU1_TUNI0, TMU1_TUNI1, TMU1_TUNI2,
 
 	/* interrupt groups */
-	DMAC1A, _2DG, DMAC0A, VIO, RTC,
-	DMAC1B, DMAC0B, I2C0, I2C1, SDHI0, SDHI1, SPU, MMC,
+	DMAC1A, _2DG, DMAC0A, VIO, USB, RTC,
+	DMAC1B, DMAC0B, I2C0, I2C1, SDHI0, SDHI1, SPU, MMCIF,
 };
 
 static struct intc_vect vectors[] __initdata = {
@@ -567,25 +567,25 @@ static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(_2DG_TRI, 0x780),
 	INTC_VECT(_2DG_INI, 0x7A0),
 	INTC_VECT(_2DG_CEI, 0x7C0),
-	INTC_VECT(_2DG_BRK, 0x7E0),
 
 	INTC_VECT(DMAC0A_DEI0, 0x800),
 	INTC_VECT(DMAC0A_DEI1, 0x820),
 	INTC_VECT(DMAC0A_DEI2, 0x840),
 	INTC_VECT(DMAC0A_DEI3, 0x860),
 
-	INTC_VECT(VIO_CEU20I, 0x880),
-	INTC_VECT(VIO_BEU20I, 0x8A0),
-	INTC_VECT(VIO_VEU3F1, 0x8C0),
-	INTC_VECT(VIO_VOUI, 0x8E0),
+	INTC_VECT(VIO_CEU0, 0x880),
+	INTC_VECT(VIO_BEU0, 0x8A0),
+	INTC_VECT(VIO_VEU1, 0x8C0),
+	INTC_VECT(VIO_VOU,  0x8E0),
 
-	INTC_VECT(SCIFA_SCIFA0, 0x900),
-	INTC_VECT(VPU_VPUI, 0x980),
-	INTC_VECT(TPU_TPUI, 0x9A0),
-	INTC_VECT(CEU21I, 0x9E0),
-	INTC_VECT(BEU21I, 0xA00),
-	INTC_VECT(USB_USI0, 0xA20),
-	INTC_VECT(ATAPI, 0xA60),
+	INTC_VECT(SCIFA3, 0x900),
+	INTC_VECT(VPU,    0x980),
+	INTC_VECT(TPU,    0x9A0),
+	INTC_VECT(CEU1,   0x9E0),
+	INTC_VECT(BEU1,   0xA00),
+	INTC_VECT(USB0,   0xA20),
+	INTC_VECT(USB1,   0xA40),
+	INTC_VECT(ATAPI,  0xA60),
 
 	INTC_VECT(RTC_ATI, 0xA80),
 	INTC_VECT(RTC_PRI, 0xAA0),
@@ -599,18 +599,18 @@ static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(DMAC0B_DEI5, 0xBA0),
 	INTC_VECT(DMAC0B_DADERR, 0xBC0),
 
-	INTC_VECT(KEYSC_KEYI, 0xBE0),
+	INTC_VECT(KEYSC,      0xBE0),
 	INTC_VECT(SCIF_SCIF0, 0xC00),
 	INTC_VECT(SCIF_SCIF1, 0xC20),
 	INTC_VECT(SCIF_SCIF2, 0xC40),
-	INTC_VECT(VEU3F0I, 0xC60),
+	INTC_VECT(VEU0,       0xC60),
 	INTC_VECT(MSIOF_MSIOFI0, 0xC80),
 	INTC_VECT(MSIOF_MSIOFI1, 0xCA0),
 	INTC_VECT(SPU_SPUI0, 0xCC0),
 	INTC_VECT(SPU_SPUI1, 0xCE0),
-	INTC_VECT(SCIFA_SCIFA1, 0xD00),
+	INTC_VECT(SCIFA4,    0xD00),
 
-/*	INTC_VECT(ICB_ICBI, 0xD20), */
+	INTC_VECT(ICB,  0xD20),
 	INTC_VECT(ETHI, 0xD60),
 
 	INTC_VECT(I2C1_ALI, 0xD80),
@@ -626,30 +626,30 @@ static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(SDHI0_SDHII0, 0xE80),
 	INTC_VECT(SDHI0_SDHII1, 0xEA0),
 	INTC_VECT(SDHI0_SDHII2, 0xEC0),
+	INTC_VECT(SDHI0_SDHII3, 0xEE0),
 
-	INTC_VECT(CMT_CMTI, 0xF00),
-	INTC_VECT(TSIF_TSIFI, 0xF20),
-/*	INTC_VECT(ICB_LMBI, 0xF60), */
-	INTC_VECT(FSI_FSI, 0xF80),
-	INTC_VECT(SCIFA_SCIFA2, 0xFA0),
+	INTC_VECT(CMT,    0xF00),
+	INTC_VECT(TSIF,   0xF20),
+	INTC_VECT(FSI,    0xF80),
+	INTC_VECT(SCIFA5, 0xFA0),
 
 	INTC_VECT(TMU0_TUNI0, 0x400),
 	INTC_VECT(TMU0_TUNI1, 0x420),
 	INTC_VECT(TMU0_TUNI2, 0x440),
 
-	INTC_VECT(IRDA_IRDAI, 0x480),
+	INTC_VECT(IRDA,    0x480),
 
 	INTC_VECT(SDHI1_SDHII0, 0x4E0),
 	INTC_VECT(SDHI1_SDHII1, 0x500),
 	INTC_VECT(SDHI1_SDHII2, 0x520),
 
-	INTC_VECT(JPU_JPUI, 0x560),
+	INTC_VECT(JPU, 0x560),
+	INTC_VECT(_2DDMAC, 0x4A0),
 
-	INTC_VECT(MMC_MMCI0, 0x580),
-	INTC_VECT(MMC_MMCI1, 0x5A0),
-	INTC_VECT(MMC_MMCI2, 0x5C0),
+	INTC_VECT(MMC_MMC2I, 0x5A0),
+	INTC_VECT(MMC_MMC3I, 0x5C0),
 
-	INTC_VECT(LCDC_LCDCI, 0xF40),
+	INTC_VECT(LCDC, 0xF40),
 
 	INTC_VECT(TMU1_TUNI0, 0x920),
 	INTC_VECT(TMU1_TUNI1, 0x940),
@@ -658,86 +658,79 @@ static struct intc_vect vectors[] __initdata = {
 
 static struct intc_group groups[] __initdata = {
 	INTC_GROUP(DMAC1A, DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3),
-	INTC_GROUP(_2DG, _2DG_TRI, _2DG_INI, _2DG_CEI, _2DG_BRK),
+	INTC_GROUP(_2DG, _2DG_TRI, _2DG_INI, _2DG_CEI),
 	INTC_GROUP(DMAC0A, DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3),
-	INTC_GROUP(VIO, VIO_CEU20I, VIO_BEU20I, VIO_VEU3F1, VIO_VOUI),
+	INTC_GROUP(VIO, VIO_CEU0, VIO_BEU0, VIO_VEU1, VIO_VOU),
+	INTC_GROUP(USB, USB0, USB1),
 	INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI),
 	INTC_GROUP(DMAC1B, DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR),
 	INTC_GROUP(DMAC0B, DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR),
 	INTC_GROUP(I2C0, I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI),
 	INTC_GROUP(I2C1, I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI),
-	INTC_GROUP(SDHI0, SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2),
+	INTC_GROUP(SDHI0, SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2, SDHI0_SDHII3),
 	INTC_GROUP(SDHI1, SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2),
 	INTC_GROUP(SPU, SPU_SPUI0, SPU_SPUI1),
-	INTC_GROUP(MMC, MMC_MMCI0, MMC_MMCI1, MMC_MMCI2),
+	INTC_GROUP(MMCIF, MMC_MMC2I, MMC_MMC3I),
 };
 
-/* FIXMEEEEEEEEEEEEEEEEEEE !!!!! */
-/* very bad manual !! */
 static struct intc_mask_reg mask_registers[] __initdata = {
 	{ 0xa4080080, 0xa40800c0, 8, /* IMR0 / IMCR0 */
 	  { 0, TMU1_TUNI2, TMU1_TUNI1, TMU1_TUNI0,
-	    /*SDHII3?*/0, SDHI1_SDHII2, SDHI1_SDHII1, SDHI1_SDHII0 } },
+	    0, SDHI1_SDHII2, SDHI1_SDHII1, SDHI1_SDHII0 } },
 	{ 0xa4080084, 0xa40800c4, 8, /* IMR1 / IMCR1 */
-	  { VIO_VOUI, VIO_VEU3F1, VIO_BEU20I, VIO_CEU20I,
+	  { VIO_VOU, VIO_VEU1, VIO_BEU0, VIO_CEU0,
 	    DMAC0A_DEI3, DMAC0A_DEI2, DMAC0A_DEI1, DMAC0A_DEI0 } },
 	{ 0xa4080088, 0xa40800c8, 8, /* IMR2 / IMCR2 */
-	  { 0, 0, 0, VPU_VPUI, ATAPI, ETHI, 0, /*SCIFA3*/SCIFA_SCIFA0 } },
+	  { 0, 0, 0, VPU, ATAPI, ETHI, 0, SCIFA3 } },
 	{ 0xa408008c, 0xa40800cc, 8, /* IMR3 / IMCR3 */
 	  { DMAC1A_DEI3, DMAC1A_DEI2, DMAC1A_DEI1, DMAC1A_DEI0,
-	    SPU_SPUI1, SPU_SPUI0, BEU21I, IRDA_IRDAI } },
+	    SPU_SPUI1, SPU_SPUI0, BEU1, IRDA } },
 	{ 0xa4080090, 0xa40800d0, 8, /* IMR4 / IMCR4 */
 	  { 0, TMU0_TUNI2, TMU0_TUNI1, TMU0_TUNI0,
-	    JPU_JPUI, 0, 0, LCDC_LCDCI } },
+	    JPU, 0, 0, LCDC } },
 	{ 0xa4080094, 0xa40800d4, 8, /* IMR5 / IMCR5 */
-	  { KEYSC_KEYI, DMAC0B_DADERR, DMAC0B_DEI5, DMAC0B_DEI4,
-	    VEU3F0I, SCIF_SCIF2, SCIF_SCIF1, SCIF_SCIF0 } },
+	  { KEYSC, DMAC0B_DADERR, DMAC0B_DEI5, DMAC0B_DEI4,
+	    VEU0, SCIF_SCIF2, SCIF_SCIF1, SCIF_SCIF0 } },
 	{ 0xa4080098, 0xa40800d8, 8, /* IMR6 / IMCR6 */
-	  { 0, 0, /*ICB_ICBI*/0, /*SCIFA4*/SCIFA_SCIFA1,
-	    CEU21I, 0, MSIOF_MSIOFI1, MSIOF_MSIOFI0 } },
+	  { 0, 0, ICB, SCIFA4,
+	    CEU1, 0, MSIOF_MSIOFI1, MSIOF_MSIOFI0 } },
 	{ 0xa408009c, 0xa40800dc, 8, /* IMR7 / IMCR7 */
 	  { I2C0_DTEI, I2C0_WAITI, I2C0_TACKI, I2C0_ALI,
 	    I2C1_DTEI, I2C1_WAITI, I2C1_TACKI, I2C1_ALI } },
 	{ 0xa40800a0, 0xa40800e0, 8, /* IMR8 / IMCR8 */
-	  { /*SDHII3*/0, SDHI0_SDHII2, SDHI0_SDHII1, SDHI0_SDHII0,
-	    0, 0, /*SCIFA5*/SCIFA_SCIFA2, FSI_FSI } },
+	  { SDHI0_SDHII3, SDHI0_SDHII2, SDHI0_SDHII1, SDHI0_SDHII0,
+	    0, 0, SCIFA5, FSI } },
 	{ 0xa40800a4, 0xa40800e4, 8, /* IMR9 / IMCR9 */
-	  { 0, 0, 0, CMT_CMTI, 0, /*USB1*/0, USB_USI0, 0 } },
+	  { 0, 0, 0, CMT, 0, USB1, USB0, 0 } },
 	{ 0xa40800a8, 0xa40800e8, 8, /* IMR10 / IMCR10 */
 	  { 0, DMAC1B_DADERR, DMAC1B_DEI5, DMAC1B_DEI4,
-	    0, RTC_ATI, RTC_PRI, RTC_CUI } },
+	    0, RTC_CUI, RTC_PRI, RTC_ATI } },
 	{ 0xa40800ac, 0xa40800ec, 8, /* IMR11 / IMCR11 */
-	  { _2DG_BRK, _2DG_CEI, _2DG_INI, _2DG_TRI,
-	    0, TPU_TPUI, /*ICB_LMBI*/0, TSIF_TSIFI } },
+	  { 0, _2DG_CEI, _2DG_INI, _2DG_TRI,
+	    0, TPU, 0, TSIF } },
 	{ 0xa40800b0, 0xa40800f0, 8, /* IMR12 / IMCR12 */
-	  { 0, 0, 0, 0, 0, 0, 0, 0/*2DDMAC*/ } },
+	  { 0, 0, MMC_MMC3I, MMC_MMC2I, 0, 0, 0, _2DDMAC } },
 	{ 0xa4140044, 0xa4140064, 8, /* INTMSK00 / INTMSKCLR00 */
 	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
 };
 
 static struct intc_prio_reg prio_registers[] __initdata = {
 	{ 0xa4080000, 0, 16, 4, /* IPRA */ { TMU0_TUNI0, TMU0_TUNI1,
-					     TMU0_TUNI2, IRDA_IRDAI } },
-	{ 0xa4080004, 0, 16, 4, /* IPRB */ { JPU_JPUI, LCDC_LCDCI,
-					     DMAC1A, BEU21I } },
+					     TMU0_TUNI2, IRDA } },
+	{ 0xa4080004, 0, 16, 4, /* IPRB */ { JPU, LCDC, DMAC1A, BEU1 } },
 	{ 0xa4080008, 0, 16, 4, /* IPRC */ { TMU1_TUNI0, TMU1_TUNI1,
 					     TMU1_TUNI2, SPU } },
-	{ 0xa408000c, 0, 16, 4, /* IPRD */ { 0, MMC, 0, ATAPI } },
-	{ 0xa4080010, 0, 16, 4, /* IPRE */
-	  { DMAC0A, /*BEU?VEU?*/VIO, /*SCIFA3*/SCIFA_SCIFA0, /*VPU5F*/
-	    VPU_VPUI } },
-	{ 0xa4080014, 0, 16, 4, /* IPRF */ { KEYSC_KEYI, DMAC0B,
-					     USB_USI0, CMT_CMTI } },
+	{ 0xa408000c, 0, 16, 4, /* IPRD */ { 0, MMCIF, 0, ATAPI } },
+	{ 0xa4080010, 0, 16, 4, /* IPRE */ { DMAC0A, VIO, SCIFA3, VPU } },
+	{ 0xa4080014, 0, 16, 4, /* IPRF */ { KEYSC, DMAC0B, USB, CMT } },
 	{ 0xa4080018, 0, 16, 4, /* IPRG */ { SCIF_SCIF0, SCIF_SCIF1,
-					     SCIF_SCIF2, VEU3F0I } },
+					     SCIF_SCIF2, VEU0 } },
 	{ 0xa408001c, 0, 16, 4, /* IPRH */ { MSIOF_MSIOFI0, MSIOF_MSIOFI1,
 					     I2C1, I2C0 } },
-	{ 0xa4080020, 0, 16, 4, /* IPRI */ { /*SCIFA4*/SCIFA_SCIFA1, /*ICB*/0,
-					     TSIF_TSIFI, _2DG/*ICB?*/ } },
-	{ 0xa4080024, 0, 16, 4, /* IPRJ */ { CEU21I, ETHI, FSI_FSI, SDHI1 } },
-	{ 0xa4080028, 0, 16, 4, /* IPRK */ { RTC, DMAC1B, /*ICB?*/0, SDHI0 } },
-	{ 0xa408002c, 0, 16, 4, /* IPRL */ { /*SCIFA5*/SCIFA_SCIFA2, 0,
-					     TPU_TPUI, /*2DDMAC*/0 } },
+	{ 0xa4080020, 0, 16, 4, /* IPRI */ { SCIFA4, ICB, TSIF, _2DG } },
+	{ 0xa4080024, 0, 16, 4, /* IPRJ */ { CEU1, ETHI, FSI, SDHI1 } },
+	{ 0xa4080028, 0, 16, 4, /* IPRK */ { RTC, DMAC1B, 0, SDHI0 } },
+	{ 0xa408002c, 0, 16, 4, /* IPRL */ { SCIFA5, 0, TPU, _2DDMAC } },
 	{ 0xa4140010, 0, 32, 4, /* INTPRI00 */
 	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
 };
-- 
cgit v0.10.2


From f168dd00a9440a6f644db73bda47718fd12008e4 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Wed, 10 Jun 2009 05:04:32 +0000
Subject: sh: sh7724: Add JPU support

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Acked-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 585dd85..e5ac9eb 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -451,6 +451,35 @@ static struct platform_device tmu5_device = {
 	.num_resources	= ARRAY_SIZE(tmu5_resources),
 };
 
+/* JPU */
+static struct uio_info jpu_platform_data = {
+	.name = "JPU",
+	.version = "0",
+	.irq = 27,
+};
+
+static struct resource jpu_resources[] = {
+	[0] = {
+		.name	= "JPU",
+		.start	= 0xfe980000,
+		.end	= 0xfe9902d3,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device jpu_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 3,
+	.dev = {
+		.platform_data	= &jpu_platform_data,
+	},
+	.resource	= jpu_resources,
+	.num_resources	= ARRAY_SIZE(jpu_resources),
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
 	&cmt_device,
 	&tmu0_device,
@@ -466,6 +495,7 @@ static struct platform_device *sh7724_devices[] __initdata = {
 	&vpu_device,
 	&veu0_device,
 	&veu1_device,
+	&jpu_device,
 };
 
 static int __init sh7724_devices_setup(void)
@@ -473,6 +503,7 @@ static int __init sh7724_devices_setup(void)
 	platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20);
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
+	platform_resource_setup_memory(&jpu_device,  "jpu",  2 << 20);
 
 	return platform_add_devices(sh7724_devices,
 				    ARRAY_SIZE(sh7724_devices));
-- 
cgit v0.10.2


From 4c7c99788631bab177bd51e15e893be4689bb085 Mon Sep 17 00:00:00 2001
From: Aoi Shinkai <shinkoi2005@gmail.com>
Date: Wed, 10 Jun 2009 16:15:42 +0000
Subject: sh: Fix sh4a llsc-based cmpxchg()

This fixes up a typo in the ll/sc based cmpxchg code which apparently
wasn't getting a lot of testing due to the swapped old/new pair. With
that fixed up, the ll/sc code also starts using it and provides its own
atomic_add_unless().

Signed-off-by: Aoi Shinkai <shinkoi2005@gmail.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 4b00b78..b040e1e 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -104,4 +104,31 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 	: "t");
 }
 
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c == (u)))
+			break;
+		old = atomic_cmpxchg((v), c, c + (a));
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+
+	return c != (u);
+}
+
 #endif /* __ASM_SH_ATOMIC_LLSC_H */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 6327ffb..978b58e 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -45,7 +45,7 @@
 #define atomic_inc(v) atomic_add(1,(v))
 #define atomic_dec(v) atomic_sub(1,(v))
 
-#ifndef CONFIG_GUSA_RB
+#if !defined(CONFIG_GUSA_RB) && !defined(CONFIG_CPU_SH4A)
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -73,7 +73,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 
 	return ret != u;
 }
-#endif
+#endif /* !CONFIG_GUSA_RB && !CONFIG_CPU_SH4A */
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
diff --git a/arch/sh/include/asm/cmpxchg-llsc.h b/arch/sh/include/asm/cmpxchg-llsc.h
index 0fac3da..4713666 100644
--- a/arch/sh/include/asm/cmpxchg-llsc.h
+++ b/arch/sh/include/asm/cmpxchg-llsc.h
@@ -55,7 +55,7 @@ __cmpxchg_u32(volatile int *m, unsigned long old, unsigned long new)
 		"mov		%0, %1				\n\t"
 		"cmp/eq		%1, %3				\n\t"
 		"bf		2f				\n\t"
-		"mov		%3, %0				\n\t"
+		"mov		%4, %0				\n\t"
 		"2:						\n\t"
 		"movco.l	%0, @%2				\n\t"
 		"bf		1b				\n\t"
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 6028356..a28c9f0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -26,7 +26,7 @@
 #define __raw_spin_is_locked(x)		((x)->lock <= 0)
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 #define __raw_spin_unlock_wait(x) \
-	do { cpu_relax(); } while ((x)->lock)
+	do { while (__raw_spin_is_locked(x)) cpu_relax(); } while (0)
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
-- 
cgit v0.10.2


From 75c936aec050113c12f74581f51779ffc5502a51 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 11 Jun 2009 09:33:53 +0300
Subject: sh: Fix sys_pwritev() syscall table entry for sh32.

There was a typo here that had this as sys_writev() instead of
sys_pwritev(), fix this up. sh64 got this right, as did the preadv()
case.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 05202ed..6118f35 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -350,4 +350,4 @@ ENTRY(sys_call_table)
 	.long sys_pipe2
 	.long sys_inotify_init1
 	.long sys_preadv
-	.long sys_writev
+	.long sys_pwritev
-- 
cgit v0.10.2


From 6a1555fdde407dad23b8a119cf5feeb7c6466de9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 11 Jun 2009 09:38:05 +0300
Subject: sh: Wire up sys_rt_tgsigqueueinfo.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 2efb819..6519708 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -343,8 +343,9 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_rt_tgsigqueueinfo	335
 
-#define NR_syscalls 335
+#define NR_syscalls 336
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 6eb9d29..8014aea 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -383,10 +383,11 @@
 #define __NR_inotify_init1	360
 #define __NR_preadv		361
 #define __NR_pwritev		362
+#define __NR_rt_tgsigqueueinfo	363
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 363
+#define NR_syscalls 364
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 6118f35..a9fff9f 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -351,3 +351,4 @@ ENTRY(sys_call_table)
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo	/* 335 */
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index a083609..75c1889 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -389,3 +389,4 @@ sys_call_table:
 	.long sys_inotify_init1		/* 360 */
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_rt_tgsigqueueinfo
-- 
cgit v0.10.2


From 54ff328b46e58568c4b3350c2fa3223ef862e5a4 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 11 Jun 2009 10:33:09 +0300
Subject: sh: Tie sparseirq in to Kconfig.

Now that the dependent patches are merged, we are ready to enable
sparseirq support. This simply adds the Kconfig option, and then converts
from the _cpu to the _node allocation routines to follow the upstream
sparseirq API changes.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b5629cd..586cd04 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -678,6 +678,18 @@ config GUSA_RB
 	  LLSC, this should be more efficient than the other alternative of
 	  disabling interrupts around the atomic sequence.
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on EXPERIMENTAL
+	help
+	  This enables support for sparse irqs. This is useful in general
+	  as most CPUs have a fairly sparse array of IRQ vectors, which
+	  the irq_desc then maps directly on to. Systems with a high
+	  number of off-chip IRQs will want to treat this as
+	  experimental until they have been independently verified.
+
+	  If you don't know what to do here, say N.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index 6ad40db..808d99a 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/topology.h>
 
 static inline struct ipr_desc *get_ipr_desc(unsigned int irq)
 {
@@ -59,21 +60,17 @@ void register_ipr_controller(struct ipr_desc *desc)
 
 	for (i = 0; i < desc->nr_irqs; i++) {
 		struct ipr_data *p = desc->ipr_data + i;
-#ifdef CONFIG_SPARSE_IRQ
 		struct irq_desc *irq_desc;
-#endif
 
 		BUG_ON(p->ipr_idx >= desc->nr_offsets);
 		BUG_ON(!desc->ipr_offsets[p->ipr_idx]);
 
-#ifdef CONFIG_SPARSE_IRQ
-		irq_desc = irq_to_desc_alloc_cpu(p->irq, smp_processor_id());
+		irq_desc = irq_to_desc_alloc_node(p->irq, numa_node_id());
 		if (unlikely(!irq_desc)) {
 			printk(KERN_INFO "can not get irq_desc for %d\n",
 			       p->irq);
 			continue;
 		}
-#endif
 
 		disable_irq_nosync(p->irq);
 		set_irq_chip_and_handler_name(p->irq, &desc->chip,
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index caf0656..d687a9b 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -24,6 +24,7 @@
 #include <linux/sh_intc.h>
 #include <linux/sysdev.h>
 #include <linux/list.h>
+#include <linux/topology.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
 	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
@@ -671,7 +672,7 @@ unsigned int intc_evt2irq(unsigned int vector)
 
 void __init register_intc_controller(struct intc_desc *desc)
 {
-	unsigned int i, k, smp, cpu = smp_processor_id();
+	unsigned int i, k, smp;
 	struct intc_desc_int *d;
 
 	d = alloc_bootmem(sizeof(*d));
@@ -771,19 +772,16 @@ void __init register_intc_controller(struct intc_desc *desc)
 	for (i = 0; i < desc->nr_vectors; i++) {
 		struct intc_vect *vect = desc->vectors + i;
 		unsigned int irq = evt2irq(vect->vect);
-#ifdef CONFIG_SPARSE_IRQ
 		struct irq_desc *irq_desc;
-#endif
+
 		if (!vect->enum_id)
 			continue;
 
-#ifdef CONFIG_SPARSE_IRQ
-		irq_desc = irq_to_desc_alloc_cpu(irq, cpu);
+		irq_desc = irq_to_desc_alloc_node(irq, numa_node_id());
 		if (unlikely(!irq_desc)) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 			continue;
 		}
-#endif
 
 		intc_register_irq(desc, d, vect->enum_id, irq);
 	}
-- 
cgit v0.10.2


From 4fefcb27050b98c97b1c32bc710fc2f874449dee Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Tue, 2 Jun 2009 14:09:47 +0800
Subject: drm: add separate drm debugging levels

Now all the DRM debug info will be reported if the boot option of
"drm.debug=1" is added. Sometimes it is inconvenient to get the debug
info in KMS mode. We will get too much unrelated info.

This will separate several DRM debug levels and the debug level can be used
to print the different debug info. And the debug level is controlled by the
module parameter of drm.debug

In this patch it is divided into four debug levels;
       	drm_core, drm_driver, drm_kms, drm_mode.

At the same time we can get the different debug info by changing the debug
level. This can be done by adding the module parameter. Of course it can
be changed through the /sys/module/drm/parameters/debug after the system is
booted.

Four debug macro definitions are provided.
	DRM_DEBUG(fmt, args...)
	DRM_DEBUG_DRIVER(prefix, fmt, args...)
	DRM_DEBUG_KMS(prefix, fmt, args...)
	DRM_DEBUG_MODE(prefix, fmt, args...)

When the boot option of "drm.debug=4" is added, it will print the debug info
using DRM_DEBUG_KMS macro definition.
When the boot option of "drm.debug=6" is added, it will print the debug info
using DRM_DEBUG_KMS/DRM_DEBUG_DRIVER.

Sometimes we expect to print the value of an array.
For example: SDVO command,
In such case the following four DRM debug macro definitions are added:
	DRM_LOG(fmt, args...)
	DRM_LOG_DRIVER(fmt, args...)
	DRM_LOG_KMS(fmt, args...)
	DRM_LOG_MODE(fmt, args...)

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index b9631e3..8905068 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -51,7 +51,22 @@ struct idr drm_minors_idr;
 struct class *drm_class;
 struct proc_dir_entry *drm_proc_root;
 struct dentry *drm_debugfs_root;
-
+void drm_ut_debug_printk(unsigned int request_level,
+			 const char *prefix,
+			 const char *function_name,
+			 const char *format, ...)
+{
+	va_list args;
+
+	if (drm_debug & request_level) {
+		if (function_name)
+			printk(KERN_DEBUG "[%s:%s], ", prefix, function_name);
+		va_start(args, format);
+		vprintk(format, args);
+		va_end(args);
+	}
+}
+EXPORT_SYMBOL(drm_ut_debug_printk);
 static int drm_minor_get_id(struct drm_device *dev, int type)
 {
 	int new_id;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index efa5f79..afc2168 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -87,6 +87,15 @@ struct drm_device;
 #include "drm_os_linux.h"
 #include "drm_hashtab.h"
 
+#define DRM_UT_CORE 		0x01
+#define DRM_UT_DRIVER		0x02
+#define DRM_UT_KMS		0x04
+#define DRM_UT_MODE		0x08
+
+extern void drm_ut_debug_printk(unsigned int request_level,
+				const char *prefix,
+				const char *function_name,
+				const char *format, ...);
 /***********************************************************************/
 /** \name DRM template customization defaults */
 /*@{*/
@@ -186,15 +195,57 @@ struct drm_device;
  * \param arg arguments
  */
 #if DRM_DEBUG_CODE
-#define DRM_DEBUG(fmt, arg...)						\
+#define DRM_DEBUG(fmt, args...)						\
 	do {								\
-		if ( drm_debug )			\
-			printk(KERN_DEBUG				\
-			       "[" DRM_NAME ":%s] " fmt ,	\
-			       __func__ , ##arg);			\
+		drm_ut_debug_printk(DRM_UT_CORE, DRM_NAME, 		\
+					__func__, fmt, ##args);		\
+	} while (0)
+
+#define DRM_DEBUG_DRIVER(prefix, fmt, args...)				\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_DRIVER, prefix,		\
+					__func__, fmt, ##args);		\
+	} while (0)
+#define DRM_DEBUG_KMS(prefix, fmt, args...)				\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_KMS, prefix, 		\
+					 __func__, fmt, ##args);	\
+	} while (0)
+#define DRM_DEBUG_MODE(prefix, fmt, args...)				\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_MODE, prefix, 		\
+					 __func__, fmt, ##args);	\
+	} while (0)
+#define DRM_LOG(fmt, args...)						\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_CORE, NULL,			\
+					NULL, fmt, ##args);		\
+	} while (0)
+#define DRM_LOG_KMS(fmt, args...)					\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_KMS, NULL,			\
+					NULL, fmt, ##args);		\
+	} while (0)
+#define DRM_LOG_MODE(fmt, args...)					\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_MODE, NULL,			\
+					NULL, fmt, ##args);		\
+	} while (0)
+#define DRM_LOG_DRIVER(fmt, args...)					\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_DRIVER, NULL,		\
+					NULL, fmt, ##args);		\
 	} while (0)
 #else
+#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0)
+#define DRM_DEBUG_KMS(prefix, fmt, args...)	do { } while (0)
+#define DRM_DEBUG_MODE(prefix, fmt, args...)	do { } while (0)
 #define DRM_DEBUG(fmt, arg...)		 do { } while (0)
+#define DRM_LOG(fmt, arg...)		do { } while (0)
+#define DRM_LOG_KMS(fmt, args...) do { } while (0)
+#define DRM_LOG_MODE(fmt, arg...) do { } while (0)
+#define DRM_LOG_DRIVER(fmt, arg...) do { } while (0)
+
 #endif
 
 #define DRM_PROC_LIMIT (PAGE_SIZE-80)
-- 
cgit v0.10.2


From 7fb85bfb54a64e9dd82ee4a79022c38ab58f21a0 Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Tue, 2 Jun 2009 14:10:49 +0800
Subject: drm/i915: replace DRM_DEBUG with DRM_DEBUG_KMS in intel_lvds

Use the DRM_DEBUG_KMS macro definition to print the debug info for
the LVDS.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index a7ae9f4..f22e6ef 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -37,6 +37,8 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 
+#define I915_LVDS "i915_lvds"
+
 /**
  * Sets the backlight level.
  *
@@ -447,7 +449,8 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = {
 
 static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 {
-	DRM_DEBUG("Skipping LVDS initialization for %s\n", id->ident);
+	DRM_DEBUG_KMS(I915_LVDS,
+		      "Skipping LVDS initialization for %s\n", id->ident);
 	return 1;
 }
 
@@ -646,7 +649,7 @@ out:
 	return;
 
 failed:
-	DRM_DEBUG("No LVDS modes found, disabling.\n");
+	DRM_DEBUG_KMS(I915_LVDS, "No LVDS modes found, disabling.\n");
 	if (intel_output->ddc_bus)
 		intel_i2c_destroy(intel_output->ddc_bus);
 	drm_connector_cleanup(connector);
-- 
cgit v0.10.2


From 342dc382c451f75ea202a65e6f529bdff6d184cd Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Tue, 2 Jun 2009 14:12:00 +0800
Subject: drm/i915: Replace DRM_DEBUG with DRM_DEBUG_KMS in intel_sdvo

Use the DRM_DEBUG_KMS/DEBUG_LOG_KMS to print the debug info for
SDVO device.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index d8fb88d..7cb9ddf 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -36,7 +36,7 @@
 #include "intel_sdvo_regs.h"
 
 #undef SDVO_DEBUG
-
+#define I915_SDVO	"i915_sdvo"
 struct intel_sdvo_priv {
 	struct intel_i2c_chan *i2c_bus;
 	int slaveaddr;
@@ -277,20 +277,21 @@ static void intel_sdvo_debug_write(struct intel_output *intel_output, u8 cmd,
 	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 	int i;
 
-	printk(KERN_DEBUG "%s: W: %02X ", SDVO_NAME(sdvo_priv), cmd);
+	DRM_DEBUG_KMS(I915_SDVO, "%s: W: %02X ",
+				SDVO_NAME(sdvo_priv), cmd);
 	for (i = 0; i < args_len; i++)
-		printk(KERN_DEBUG "%02X ", ((u8 *)args)[i]);
+		DRM_LOG_KMS("%02X ", ((u8 *)args)[i]);
 	for (; i < 8; i++)
-		printk(KERN_DEBUG "   ");
+		DRM_LOG_KMS("   ");
 	for (i = 0; i < sizeof(sdvo_cmd_names) / sizeof(sdvo_cmd_names[0]); i++) {
 		if (cmd == sdvo_cmd_names[i].cmd) {
-			printk(KERN_DEBUG "(%s)", sdvo_cmd_names[i].name);
+			DRM_LOG_KMS("(%s)", sdvo_cmd_names[i].name);
 			break;
 		}
 	}
 	if (i == sizeof(sdvo_cmd_names)/ sizeof(sdvo_cmd_names[0]))
-		printk(KERN_DEBUG "(%02X)", cmd);
-	printk(KERN_DEBUG "\n");
+		DRM_LOG_KMS("(%02X)", cmd);
+	DRM_LOG_KMS("\n");
 }
 #else
 #define intel_sdvo_debug_write(o, c, a, l)
@@ -329,16 +330,16 @@ static void intel_sdvo_debug_response(struct intel_output *intel_output,
 	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 	int i;
 
-	printk(KERN_DEBUG "%s: R: ", SDVO_NAME(sdvo_priv));
+	DRM_DEBUG_KMS(I915_SDVO, "%s: R: ", SDVO_NAME(sdvo_priv));
 	for (i = 0; i < response_len; i++)
-		printk(KERN_DEBUG "%02X ", ((u8 *)response)[i]);
+		DRM_LOG_KMS("%02X ", ((u8 *)response)[i]);
 	for (; i < 8; i++)
-		printk(KERN_DEBUG "   ");
+		DRM_LOG_KMS("   ");
 	if (status <= SDVO_CMD_STATUS_SCALING_NOT_SUPP)
-		printk(KERN_DEBUG "(%s)", cmd_status_names[status]);
+		DRM_LOG_KMS("(%s)", cmd_status_names[status]);
 	else
-		printk(KERN_DEBUG "(??? %d)", status);
-	printk(KERN_DEBUG "\n");
+		DRM_LOG_KMS("(??? %d)", status);
+	DRM_LOG_KMS("\n");
 }
 #else
 #define intel_sdvo_debug_response(o, r, l, s)
@@ -1824,8 +1825,9 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
 		if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) {
-			DRM_DEBUG("No SDVO device found on SDVO%c\n",
-				  output_device == SDVOB ? 'B' : 'C');
+			DRM_DEBUG_KMS(I915_SDVO,
+					"No SDVO device found on SDVO%c\n",
+					output_device == SDVOB ? 'B' : 'C');
 			goto err_i2c;
 		}
 	}
@@ -1909,9 +1911,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 
 		sdvo_priv->controlled_output = 0;
 		memcpy (bytes, &sdvo_priv->caps.output_flags, 2);
-		DRM_DEBUG("%s: Unknown SDVO output type (0x%02x%02x)\n",
-			  SDVO_NAME(sdvo_priv),
-			  bytes[0], bytes[1]);
+		DRM_DEBUG_KMS(I915_SDVO,
+				"%s: Unknown SDVO output type (0x%02x%02x)\n",
+				  SDVO_NAME(sdvo_priv),
+				  bytes[0], bytes[1]);
 		encoder_type = DRM_MODE_ENCODER_NONE;
 		connector_type = DRM_MODE_CONNECTOR_Unknown;
 		goto err_i2c;
@@ -1941,21 +1944,21 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 					       &sdvo_priv->pixel_clock_max);
 
 
-	DRM_DEBUG("%s device VID/DID: %02X:%02X.%02X, "
-		  "clock range %dMHz - %dMHz, "
-		  "input 1: %c, input 2: %c, "
-		  "output 1: %c, output 2: %c\n",
-		  SDVO_NAME(sdvo_priv),
-		  sdvo_priv->caps.vendor_id, sdvo_priv->caps.device_id,
-		  sdvo_priv->caps.device_rev_id,
-		  sdvo_priv->pixel_clock_min / 1000,
-		  sdvo_priv->pixel_clock_max / 1000,
-		  (sdvo_priv->caps.sdvo_inputs_mask & 0x1) ? 'Y' : 'N',
-		  (sdvo_priv->caps.sdvo_inputs_mask & 0x2) ? 'Y' : 'N',
-		  /* check currently supported outputs */
-		  sdvo_priv->caps.output_flags &
+	DRM_DEBUG_KMS(I915_SDVO, "%s device VID/DID: %02X:%02X.%02X, "
+			"clock range %dMHz - %dMHz, "
+			"input 1: %c, input 2: %c, "
+			"output 1: %c, output 2: %c\n",
+			SDVO_NAME(sdvo_priv),
+			sdvo_priv->caps.vendor_id, sdvo_priv->caps.device_id,
+			sdvo_priv->caps.device_rev_id,
+			sdvo_priv->pixel_clock_min / 1000,
+			sdvo_priv->pixel_clock_max / 1000,
+			(sdvo_priv->caps.sdvo_inputs_mask & 0x1) ? 'Y' : 'N',
+			(sdvo_priv->caps.sdvo_inputs_mask & 0x2) ? 'Y' : 'N',
+			/* check currently supported outputs */
+			sdvo_priv->caps.output_flags &
 			(SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_RGB0) ? 'Y' : 'N',
-		  sdvo_priv->caps.output_flags &
+			sdvo_priv->caps.output_flags &
 			(SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N');
 
 	return true;
-- 
cgit v0.10.2


From 77e26cca20013e9352a8df86a54640543304a23a Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 11 Jun 2009 16:04:35 +0900
Subject: x86, mce: Fix mce printing

This patch:

 - Adds print_mce_head() instead of first flag
 - Makes the header to be printed always
 - Stops double printing of corrected errors

[ This portion originates from Huang Ying's patch ]

Originally-From: Huang Ying <ying.huang@intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
LKML-Reference: <4A30AC83.5010708@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d4e7b59..6a3127e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -180,12 +180,8 @@ void mce_log(struct mce *mce)
 	set_bit(0, &notify_user);
 }
 
-static void print_mce(struct mce *m, int *first)
+static void print_mce(struct mce *m)
 {
-	if (*first) {
-		printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n");
-		*first = 0;
-	}
 	printk(KERN_EMERG
 	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
 	       m->extcpu, m->mcgstatus, m->bank, m->status);
@@ -208,6 +204,11 @@ static void print_mce(struct mce *m, int *first)
 			m->apicid);
 }
 
+static void print_mce_head(void)
+{
+	printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n");
+}
+
 static void print_mce_tail(void)
 {
 	printk(KERN_EMERG "This is not a software problem!\n"
@@ -234,7 +235,6 @@ static void wait_for_panic(void)
 static void mce_panic(char *msg, struct mce *final, char *exp)
 {
 	int i;
-	int first = 1;
 
 	/*
 	 * Make sure only one CPU runs in machine check panic
@@ -245,23 +245,27 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 
 	bust_spinlocks(1);
 	console_verbose();
+	print_mce_head();
 	/* First print corrected ones that are still unlogged */
 	for (i = 0; i < MCE_LOG_LEN; i++) {
 		struct mce *m = &mcelog.entry[i];
-		if ((m->status & MCI_STATUS_VAL) &&
-			!(m->status & MCI_STATUS_UC))
-			print_mce(m, &first);
+		if (!(m->status & MCI_STATUS_VAL))
+			continue;
+		if (!(m->status & MCI_STATUS_UC))
+			print_mce(m);
 	}
 	/* Now print uncorrected but with the final one last */
 	for (i = 0; i < MCE_LOG_LEN; i++) {
 		struct mce *m = &mcelog.entry[i];
 		if (!(m->status & MCI_STATUS_VAL))
 			continue;
+		if (!(m->status & MCI_STATUS_UC))
+			continue;
 		if (!final || memcmp(m, final, sizeof(struct mce)))
-			print_mce(m, &first);
+			print_mce(m);
 	}
 	if (final)
-		print_mce(final, &first);
+		print_mce(final);
 	if (cpu_missing)
 		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
 	print_mce_tail();
-- 
cgit v0.10.2


From 62fdac5913f71f8f200bd2c9bd59a02e9a1498e9 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 11 Jun 2009 16:06:07 +0900
Subject: x86, mce: Add boot options for corrected errors

This patch introduces three boot options (no_cmci, dont_log_ce
and ignore_ce) to control handling for corrected errors.

The "mce=no_cmci" boot option disables the CMCI feature.

Since CMCI is a new feature so having boot controls to disable
it will be a help if the hardware is misbehaving.

The "mce=dont_log_ce" boot option disables logging for corrected
errors. All reported corrected errors will be cleared silently.
This option will be useful if you never care about corrected
errors.

The "mce=ignore_ce" boot option disables features for corrected
errors, i.e. polling timer and cmci.  All corrected events are
not cleared and kept in bank MSRs.

Usually this disablement is not recommended, however it will be
a help if there are some conflict with the BIOS or hardware
monitoring applications etc., that clears corrected events in
banks instead of OS.

[ And trivial cleanup (space -> tab) for doc is included. ]

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <4A30ACDF.5030408@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 0ee5e3b..fa2bed0 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -7,12 +7,36 @@ Machine check
 
    Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
 
-   mce=off disable machine check
-   mce=bootlog Enable logging of machine checks left over from booting.
-               Disabled by default on AMD because some BIOS leave bogus ones.
-               If your BIOS doesn't do that it's a good idea to enable though
-               to make sure you log even machine check events that result
-               in a reboot. On Intel systems it is enabled by default.
+   mce=off
+		Disable machine check
+   mce=no_cmci
+		Disable CMCI(Corrected Machine Check Interrupt) that
+		Intel processor supports.  Usually this disablement is
+		not recommended, but it might be handy if your hardware
+		is misbehaving.
+		Note that you'll get more problems without CMCI than with
+		due to the shared banks, i.e. you might get duplicated
+		error logs.
+   mce=dont_log_ce
+		Don't make logs for corrected errors.  All events reported
+		as corrected are silently cleared by OS.
+		This option will be useful if you have no interest in any
+		of corrected errors.
+   mce=ignore_ce
+		Disable features for corrected errors, e.g. polling timer
+		and CMCI.  All events reported as corrected are not cleared
+		by OS and remained in its error banks.
+		Usually this disablement is not recommended, however if
+		there is an agent checking/clearing corrected errors
+		(e.g. BIOS or hardware monitoring applications), conflicting
+		with OS's error handling, and you cannot deactivate the agent,
+		then this option will be a help.
+   mce=bootlog
+		Enable logging of machine checks left over from booting.
+		Disabled by default on AMD because some BIOS leave bogus ones.
+		If your BIOS doesn't do that it's a good idea to enable though
+		to make sure you log even machine check events that result
+		in a reboot. On Intel systems it is enabled by default.
    mce=nobootlog
 		Disable boot machine check logging.
    mce=tolerancelevel[,monarchtimeout] (number,number)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 82978ad..540a466 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -119,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
 
 #ifdef CONFIG_X86_MCE_INTEL
+extern int mce_cmci_disabled;
+extern int mce_ignore_ce;
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
 void cmci_clear(void);
 void cmci_reenable(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6a3127e..fabba15 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -84,6 +84,9 @@ static int			rip_msr;
 static int			mce_bootlog = -1;
 static int			monarch_timeout = -1;
 static int			mce_panic_timeout;
+static int			mce_dont_log_ce;
+int				mce_cmci_disabled;
+int				mce_ignore_ce;
 int				mce_ser;
 
 static char			trigger[128];
@@ -526,7 +529,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
 		 */
-		if (!(flags & MCP_DONTLOG)) {
+		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
 			mce_log(&m);
 			add_taint(TAINT_MACHINE_CHECK);
 		}
@@ -1307,6 +1310,9 @@ static void mce_init_timer(void)
 	struct timer_list *t = &__get_cpu_var(mce_timer);
 	int *n = &__get_cpu_var(next_interval);
 
+	if (mce_ignore_ce)
+		return;
+
 	*n = check_interval * HZ;
 	if (!*n)
 		return;
@@ -1517,7 +1523,10 @@ static struct miscdevice mce_log_device = {
 };
 
 /*
- * mce=off disables machine check
+ * mce=off Disables machine check
+ * mce=no_cmci Disables CMCI
+ * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
+ * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
  *	monarchtimeout is how long to wait for other CPUs on machine
  *	check, or 0 to not wait
@@ -1532,6 +1541,12 @@ static int __init mcheck_enable(char *str)
 		str++;
 	if (!strcmp(str, "off"))
 		mce_disabled = 1;
+	else if (!strcmp(str, "no_cmci"))
+		mce_cmci_disabled = 1;
+	else if (!strcmp(str, "dont_log_ce"))
+		mce_dont_log_ce = 1;
+	else if (!strcmp(str, "ignore_ce"))
+		mce_ignore_ce = 1;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		mce_bootlog = (str[0] == 'b');
 	else if (isdigit(str[0])) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index b7c5a24..046087e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -57,6 +57,9 @@ static int cmci_supported(int *banks)
 {
 	u64 cap;
 
+	if (mce_cmci_disabled || mce_ignore_ce)
+		return 0;
+
 	/*
 	 * Vendor check is not strictly needed, but the initial
 	 * initialization is vendor keyed and this
-- 
cgit v0.10.2


From b0fd271d5fba0b2d00888363f3869e3f9b26caa9 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Thu, 11 Jun 2009 13:10:16 +0200
Subject: block: add request clone interface (v2)

This patch adds the following 2 interfaces for request-stacking drivers:

  - blk_rq_prep_clone(struct request *clone, struct request *orig,
		      struct bio_set *bs, gfp_t gfp_mask,
		      int (*bio_ctr)(struct bio *, struct bio*, void *),
		      void *data)
      * Clones bios in the original request to the clone request
        (bio_ctr is called for each cloned bios.)
      * Copies attributes of the original request to the clone request.
        The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not
        copied.

  - blk_rq_unprep_clone(struct request *clone)
      * Frees cloned bios from the clone request.

Request stacking drivers (e.g. request-based dm) need to make a clone
request for a submitted request and dispatch it to other devices.

To allocate request for the clone, request stacking drivers may not
be able to use blk_get_request() because the allocation may be done
in an irq-disabled context.
So blk_rq_prep_clone() takes a request allocated by the caller
as an argument.

For each clone bio in the clone request, request stacking drivers
should be able to set up their own completion handler.
So blk_rq_prep_clone() takes a callback function which is called
for each clone bio, and a pointer for private data which is passed
to the callback.

NOTE:
blk_rq_prep_clone() doesn't copy any actual data of the original
request.  Pages are shared between original bios and cloned bios.
So caller must not complete the original request before the clone
request.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index 03c5a64..02a9252 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2295,6 +2295,106 @@ int blk_lld_busy(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_lld_busy);
 
+/**
+ * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
+ * @rq: the clone request to be cleaned up
+ *
+ * Description:
+ *     Free all bios in @rq for a cloned request.
+ */
+void blk_rq_unprep_clone(struct request *rq)
+{
+	struct bio *bio;
+
+	while ((bio = rq->bio) != NULL) {
+		rq->bio = bio->bi_next;
+
+		bio_put(bio);
+	}
+}
+EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
+
+/*
+ * Copy attributes of the original request to the clone request.
+ * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
+ */
+static void __blk_rq_prep_clone(struct request *dst, struct request *src)
+{
+	dst->cpu = src->cpu;
+	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+	dst->cmd_type = src->cmd_type;
+	dst->__sector = blk_rq_pos(src);
+	dst->__data_len = blk_rq_bytes(src);
+	dst->nr_phys_segments = src->nr_phys_segments;
+	dst->ioprio = src->ioprio;
+	dst->extra_len = src->extra_len;
+}
+
+/**
+ * blk_rq_prep_clone - Helper function to setup clone request
+ * @rq: the request to be setup
+ * @rq_src: original request to be cloned
+ * @bs: bio_set that bios for clone are allocated from
+ * @gfp_mask: memory allocation mask for bio
+ * @bio_ctr: setup function to be called for each clone bio.
+ *           Returns %0 for success, non %0 for failure.
+ * @data: private data to be passed to @bio_ctr
+ *
+ * Description:
+ *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
+ *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
+ *     are not copied, and copying such parts is the caller's responsibility.
+ *     Also, pages which the original bios are pointing to are not copied
+ *     and the cloned bios just point same pages.
+ *     So cloned bios must be completed before original bios, which means
+ *     the caller must complete @rq before @rq_src.
+ */
+int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+		      struct bio_set *bs, gfp_t gfp_mask,
+		      int (*bio_ctr)(struct bio *, struct bio *, void *),
+		      void *data)
+{
+	struct bio *bio, *bio_src;
+
+	if (!bs)
+		bs = fs_bio_set;
+
+	blk_rq_init(NULL, rq);
+
+	__rq_for_each_bio(bio_src, rq_src) {
+		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
+		if (!bio)
+			goto free_and_out;
+
+		__bio_clone(bio, bio_src);
+
+		if (bio_integrity(bio_src) &&
+		    bio_integrity_clone(bio, bio_src, gfp_mask))
+			goto free_and_out;
+
+		if (bio_ctr && bio_ctr(bio, bio_src, data))
+			goto free_and_out;
+
+		if (rq->bio) {
+			rq->biotail->bi_next = bio;
+			rq->biotail = bio;
+		} else
+			rq->bio = rq->biotail = bio;
+	}
+
+	__blk_rq_prep_clone(rq, rq_src);
+
+	return 0;
+
+free_and_out:
+	if (bio)
+		bio_free(bio, bs);
+	blk_rq_unprep_clone(rq);
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5e740a1..ebdfde8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -765,6 +765,11 @@ extern void blk_insert_request(struct request_queue *, struct request *, int, vo
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
+extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+			     struct bio_set *bs, gfp_t gfp_mask,
+			     int (*bio_ctr)(struct bio *, struct bio *, void *),
+			     void *data);
+extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_plug_device(struct request_queue *);
-- 
cgit v0.10.2


From 85d4198e40c289dd623cecd16601fa613559bed7 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 11 Jun 2009 08:51:10 -0400
Subject: Btrfs: check duplicate backrefs for both data and metadata

lookup_inline_extent_backref only checks for duplicate backref for data
extents. It assumes backrefs for tree block never conflict.

This patch makes lookup_inline_extent_backref check for duplicate backrefs
for both data and tree block, so that we can detect potential bug earlier.
This is a safety check, strictly speaking it is not required.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 33a65f2..edc7d20 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1056,8 +1056,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 	want = extent_ref_type(parent, owner);
 	if (insert) {
 		extra_size = btrfs_extent_inline_ref_size(want);
-		if (owner >= BTRFS_FIRST_FREE_OBJECTID)
-			path->keep_locks = 1;
+		path->keep_locks = 1;
 	} else
 		extra_size = -1;
 	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
@@ -1087,12 +1086,6 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 #endif
 	BUG_ON(item_size < sizeof(*ei));
 
-	if (owner < BTRFS_FIRST_FREE_OBJECTID && insert &&
-	    item_size + extra_size >= BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
-		err = -EAGAIN;
-		goto out;
-	}
-
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	flags = btrfs_extent_flags(leaf, ei);
 
@@ -1165,15 +1158,15 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		 * For simplicity, we just do not add new inline back
 		 * ref if there is any kind of item for this block
 		 */
-		if (owner >= BTRFS_FIRST_FREE_OBJECTID &&
-		    find_next_key(path, &key) == 0 && key.objectid == bytenr) {
+		if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
 			err = -EAGAIN;
 			goto out;
 		}
 	}
 	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
 out:
-	if (insert && owner >= BTRFS_FIRST_FREE_OBJECTID) {
+	if (insert) {
 		path->keep_locks = 0;
 		btrfs_unlock_up_safe(path, 1);
 	}
-- 
cgit v0.10.2


From 067c28adc53807514ac0c6ebb6af3243cbd071fa Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 11 Jun 2009 09:30:13 -0400
Subject: Btrfs: fix -o nodatasum printk spelling

It was printing nodatacsum, which was not the correct option name.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3427db2..708ac06 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			 */
 			break;
 		case Opt_nodatasum:
-			printk(KERN_INFO "btrfs: setting nodatacsum\n");
+			printk(KERN_INFO "btrfs: setting nodatasum\n");
 			btrfs_set_opt(info->mount_opt, NODATASUM);
 			break;
 		case Opt_nodatacow:
-- 
cgit v0.10.2


From 27eb97582db53b4b9461373aedf238ca06d85f15 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Thu, 11 Jun 2009 14:30:07 +0100
Subject: [ARM] 5551/1: Add multi-function pin api for w90p910 platform.

Add multi-function pin api for w90p910 platform.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-w90x900/Makefile b/arch/arm/mach-w90x900/Makefile
index 0c0c1d6..947316a 100644
--- a/arch/arm/mach-w90x900/Makefile
+++ b/arch/arm/mach-w90x900/Makefile
@@ -4,7 +4,7 @@
 
 # Object file lists.
 
-obj-y				:= irq.o time.o
+obj-y				:= irq.o time.o mfp-w90p910.o
 
 # W90X900 CPU support files
 
diff --git a/arch/arm/mach-w90x900/mfp-w90p910.c b/arch/arm/mach-w90x900/mfp-w90p910.c
new file mode 100644
index 0000000..a3520fe
--- /dev/null
+++ b/arch/arm/mach-w90x900/mfp-w90p910.c
@@ -0,0 +1,116 @@
+/*
+ * linux/arch/arm/mach-w90x900/mfp-w90p910.c
+ *
+ * Copyright (c) 2008 Nuvoton technology corporation
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/clk.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+
+#include <mach/hardware.h>
+
+#define REG_MFSEL	(W90X900_VA_GCR + 0xC)
+
+#define GPSELF		(0x01 << 1)
+
+#define GPSELC		(0x03 << 2)
+#define ENKPI		(0x02 << 2)
+#define ENNAND		(0x01 << 2)
+
+#define GPSELEI0	(0x01 << 26)
+#define GPSELEI1	(0x01 << 27)
+
+static DECLARE_MUTEX(mfp_sem);
+
+void mfp_set_groupf(struct device *dev)
+{
+	unsigned long mfpen;
+	const char *dev_id;
+
+	BUG_ON(!dev);
+
+	down(&mfp_sem);
+
+	dev_id = dev_name(dev);
+
+	mfpen = __raw_readl(REG_MFSEL);
+
+	if (strcmp(dev_id, "w90p910-emc") == 0)
+		mfpen |= GPSELF;/*enable mac*/
+	else
+		mfpen &= ~GPSELF;/*GPIOF[9:0]*/
+
+	__raw_writel(mfpen, REG_MFSEL);
+
+	up(&mfp_sem);
+}
+EXPORT_SYMBOL(mfp_set_groupf);
+
+void mfp_set_groupc(struct device *dev)
+{
+	unsigned long mfpen;
+	const char *dev_id;
+
+	BUG_ON(!dev);
+
+	down(&mfp_sem);
+
+	dev_id = dev_name(dev);
+
+	mfpen = __raw_readl(REG_MFSEL);
+
+	if (strcmp(dev_id, "w90p910-lcd") == 0)
+		mfpen |= GPSELC;/*enable lcd*/
+	else if (strcmp(dev_id, "w90p910-kpi") == 0) {
+			mfpen &= (~GPSELC);/*enable kpi*/
+			mfpen |= ENKPI;
+		} else if (strcmp(dev_id, "w90p910-nand") == 0) {
+				mfpen &= (~GPSELC);/*enable nand*/
+				mfpen |= ENNAND;
+			} else
+				mfpen &= (~GPSELC);/*GPIOC[14:0]*/
+
+	__raw_writel(mfpen, REG_MFSEL);
+
+	up(&mfp_sem);
+}
+EXPORT_SYMBOL(mfp_set_groupc);
+
+void mfp_set_groupi(struct device *dev, int gpio)
+{
+	unsigned long mfpen;
+	const char *dev_id;
+
+	BUG_ON(!dev);
+
+	down(&mfp_sem);
+
+	dev_id = dev_name(dev);
+
+	mfpen = __raw_readl(REG_MFSEL);
+
+	if (strcmp(dev_id, "w90p910-wdog") == 0)
+		mfpen |= GPSELEI1;/*enable wdog*/
+		else if (strcmp(dev_id, "w90p910-atapi") == 0)
+			mfpen |= GPSELEI0;/*enable atapi*/
+
+	__raw_writel(mfpen, REG_MFSEL);
+
+	up(&mfp_sem);
+}
+EXPORT_SYMBOL(mfp_set_groupi);
+
-- 
cgit v0.10.2


From c52d3d688b09c50de12cb2fbdfebb18b3b664b00 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Wed, 10 Jun 2009 15:49:32 +0100
Subject: [ARM] 5548/1: Add gpio api for w90p910 platform

Add gpio api for w90p910 platform.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a930e5c..013993c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -488,6 +488,8 @@ config ARCH_NS9XXX
 config ARCH_W90X900
 	bool "Nuvoton W90X900 CPU"
 	select CPU_ARM926T
+	select ARCH_REQUIRE_GPIOLIB
+	select GENERIC_GPIO
 	help
 		Support for Nuvoton (Winbond logic dept.) ARM9 processor,You
 		can login www.mcuos.com or www.nuvoton.com to know more.
diff --git a/arch/arm/mach-w90x900/Makefile b/arch/arm/mach-w90x900/Makefile
index 947316a..6287268 100644
--- a/arch/arm/mach-w90x900/Makefile
+++ b/arch/arm/mach-w90x900/Makefile
@@ -4,7 +4,7 @@
 
 # Object file lists.
 
-obj-y				:= irq.o time.o mfp-w90p910.o
+obj-y				:= irq.o time.o mfp-w90p910.o gpio.o
 
 # W90X900 CPU support files
 
diff --git a/arch/arm/mach-w90x900/gpio.c b/arch/arm/mach-w90x900/gpio.c
new file mode 100644
index 0000000..c72e0df
--- /dev/null
+++ b/arch/arm/mach-w90x900/gpio.c
@@ -0,0 +1,154 @@
+/*
+ * linux/arch/arm/mach-w90p910/gpio.c
+ *
+ * Generic w90p910 GPIO handling
+ *
+ *  Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+#include <mach/hardware.h>
+
+#define GPIO_BASE 		(W90X900_VA_GPIO)
+#define GPIO_DIR		(0x04)
+#define GPIO_OUT		(0x08)
+#define GPIO_IN			(0x0C)
+#define GROUPINERV		(0x10)
+#define GPIO_GPIO(Nb)		(0x00000001 << (Nb))
+#define to_w90p910_gpio_chip(c) container_of(c, struct w90p910_gpio_chip, chip)
+
+#define W90P910_GPIO_CHIP(name, base_gpio, nr_gpio)			\
+	{								\
+		.chip = {						\
+			.label		  = name,			\
+			.direction_input  = w90p910_dir_input,		\
+			.direction_output = w90p910_dir_output,		\
+			.get		  = w90p910_gpio_get,		\
+			.set		  = w90p910_gpio_set,		\
+			.base		  = base_gpio,			\
+			.ngpio		  = nr_gpio,			\
+		}							\
+	}
+
+struct w90p910_gpio_chip {
+	struct gpio_chip	chip;
+	void __iomem		*regbase;	/* Base of group register*/
+	spinlock_t 		gpio_lock;
+};
+
+static int w90p910_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct w90p910_gpio_chip *w90p910_gpio = to_w90p910_gpio_chip(chip);
+	void __iomem *pio = w90p910_gpio->regbase + GPIO_IN;
+	unsigned int regval;
+
+	regval = __raw_readl(pio);
+	regval &= GPIO_GPIO(offset);
+
+	return (regval != 0);
+}
+
+static void w90p910_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct w90p910_gpio_chip *w90p910_gpio = to_w90p910_gpio_chip(chip);
+	void __iomem *pio = w90p910_gpio->regbase + GPIO_OUT;
+	unsigned int regval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&w90p910_gpio->gpio_lock, flags);
+
+	regval = __raw_readl(pio);
+
+	if (val)
+		regval |= GPIO_GPIO(offset);
+	else
+		regval &= ~GPIO_GPIO(offset);
+
+	__raw_writel(regval, pio);
+
+	spin_unlock_irqrestore(&w90p910_gpio->gpio_lock, flags);
+}
+
+static int w90p910_dir_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct w90p910_gpio_chip *w90p910_gpio = to_w90p910_gpio_chip(chip);
+	void __iomem *pio = w90p910_gpio->regbase + GPIO_DIR;
+	unsigned int regval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&w90p910_gpio->gpio_lock, flags);
+
+	regval = __raw_readl(pio);
+	regval &= ~GPIO_GPIO(offset);
+	__raw_writel(regval, pio);
+
+	spin_unlock_irqrestore(&w90p910_gpio->gpio_lock, flags);
+
+	return 0;
+}
+
+static int w90p910_dir_output(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct w90p910_gpio_chip *w90p910_gpio = to_w90p910_gpio_chip(chip);
+	void __iomem *outreg = w90p910_gpio->regbase + GPIO_OUT;
+	void __iomem *pio = w90p910_gpio->regbase + GPIO_DIR;
+	unsigned int regval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&w90p910_gpio->gpio_lock, flags);
+
+	regval = __raw_readl(pio);
+	regval |= GPIO_GPIO(offset);
+	__raw_writel(regval, pio);
+
+	regval = __raw_readl(outreg);
+
+	if (val)
+		regval |= GPIO_GPIO(offset);
+	else
+		regval &= ~GPIO_GPIO(offset);
+
+	__raw_writel(regval, outreg);
+
+	spin_unlock_irqrestore(&w90p910_gpio->gpio_lock, flags);
+
+	return 0;
+}
+
+static struct w90p910_gpio_chip w90p910_gpio[] = {
+	W90P910_GPIO_CHIP("GROUPC", 0, 16),
+	W90P910_GPIO_CHIP("GROUPD", 16, 10),
+	W90P910_GPIO_CHIP("GROUPE", 26, 14),
+	W90P910_GPIO_CHIP("GROUPF", 40, 10),
+	W90P910_GPIO_CHIP("GROUPG", 50, 17),
+	W90P910_GPIO_CHIP("GROUPH", 67, 8),
+	W90P910_GPIO_CHIP("GROUPI", 75, 17),
+};
+
+void __init w90p910_init_gpio(int nr_group)
+{
+	unsigned	i;
+	struct w90p910_gpio_chip *gpio_chip;
+
+	for (i = 0; i < nr_group; i++) {
+		gpio_chip = &w90p910_gpio[i];
+		spin_lock_init(&gpio_chip->gpio_lock);
+		gpio_chip->regbase = GPIO_BASE + i * GROUPINERV;
+		gpiochip_add(&gpio_chip->chip);
+	}
+}
diff --git a/arch/arm/mach-w90x900/include/mach/gpio.h b/arch/arm/mach-w90x900/include/mach/gpio.h
new file mode 100644
index 0000000..034da3e
--- /dev/null
+++ b/arch/arm/mach-w90x900/include/mach/gpio.h
@@ -0,0 +1,34 @@
+/*
+ * linux/arch/arm/mach-w90p910/include/mach/gpio.h
+ *
+ * Generic w90p910 GPIO handling
+ *
+ *  Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARCH_W90P910_GPIO_H
+#define __ASM_ARCH_W90P910_GPIO_H
+
+#include <mach/hardware.h>
+#include <asm/irq.h>
+#include <asm-generic/gpio.h>
+
+#define gpio_get_value	__gpio_get_value
+#define gpio_set_value	__gpio_set_value
+#define gpio_cansleep	__gpio_cansleep
+
+static inline int gpio_to_irq(unsigned gpio)
+{
+	return gpio;
+}
+
+static inline int irq_to_gpio(unsigned irq)
+{
+	return irq;
+}
+
+#endif
-- 
cgit v0.10.2


From 0e4a34bb6565346a8d7cc56cab412a1f98d5b1cd Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Wed, 10 Jun 2009 15:50:44 +0100
Subject: [ARM] 5549/1: Add clock api for w90p910 platform.

Add clock api for w90p910 platform.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 013993c..7d54494 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -490,6 +490,7 @@ config ARCH_W90X900
 	select CPU_ARM926T
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_GPIO
+	select COMMON_CLKDEV
 	help
 		Support for Nuvoton (Winbond logic dept.) ARM9 processor,You
 		can login www.mcuos.com or www.nuvoton.com to know more.
diff --git a/arch/arm/mach-w90x900/Makefile b/arch/arm/mach-w90x900/Makefile
index 6287268..d50c94f 100644
--- a/arch/arm/mach-w90x900/Makefile
+++ b/arch/arm/mach-w90x900/Makefile
@@ -4,7 +4,7 @@
 
 # Object file lists.
 
-obj-y				:= irq.o time.o mfp-w90p910.o gpio.o
+obj-y				:= irq.o time.o mfp-w90p910.o gpio.o clock.o
 
 # W90X900 CPU support files
 
diff --git a/arch/arm/mach-w90x900/clock.c b/arch/arm/mach-w90x900/clock.c
new file mode 100644
index 0000000..f420613
--- /dev/null
+++ b/arch/arm/mach-w90x900/clock.c
@@ -0,0 +1,77 @@
+/*
+ * linux/arch/arm/mach-w90x900/clock.c
+ *
+ * Copyright (c) 2008 Nuvoton technology corporation
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/clk.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+
+#include <mach/hardware.h>
+
+#include "clock.h"
+
+static DEFINE_SPINLOCK(clocks_lock);
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clocks_lock, flags);
+	if (clk->enabled++ == 0)
+		(clk->enable)(clk, 1);
+	spin_unlock_irqrestore(&clocks_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_enable);
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long flags;
+
+	WARN_ON(clk->enabled == 0);
+
+	spin_lock_irqsave(&clocks_lock, flags);
+	if (--clk->enabled == 0)
+		(clk->enable)(clk, 0);
+	spin_unlock_irqrestore(&clocks_lock, flags);
+}
+EXPORT_SYMBOL(clk_disable);
+
+void w90x900_clk_enable(struct clk *clk, int enable)
+{
+	unsigned int clocks = clk->cken;
+	unsigned long clken;
+
+	clken = __raw_readl(W90X900_VA_CLKPWR);
+
+	if (enable)
+		clken |= clocks;
+	else
+		clken &= ~clocks;
+
+	__raw_writel(clken, W90X900_VA_CLKPWR);
+}
+
+void clks_register(struct clk_lookup *clks, size_t num)
+{
+	int i;
+
+	for (i = 0; i < num; i++)
+		clkdev_add(&clks[i]);
+}
diff --git a/arch/arm/mach-w90x900/clock.h b/arch/arm/mach-w90x900/clock.h
new file mode 100644
index 0000000..4f27bda
--- /dev/null
+++ b/arch/arm/mach-w90x900/clock.h
@@ -0,0 +1,36 @@
+/*
+ * linux/arch/arm/mach-w90x900/clock.h
+ *
+ * Copyright (c) 2008 Nuvoton technology corporation
+ *
+ * Wan ZongShun <mcuos.com@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+
+#include <asm/clkdev.h>
+
+void w90x900_clk_enable(struct clk *clk, int enable);
+void clks_register(struct clk_lookup *clks, size_t num);
+
+struct clk {
+	unsigned long		cken;
+	unsigned int		enabled;
+	void			(*enable)(struct clk *, int enable);
+};
+
+#define DEFINE_CLK(_name, _ctrlbit)			\
+struct clk clk_##_name = {				\
+		.enable	= w90x900_clk_enable,		\
+		.cken	= (1 << _ctrlbit),		\
+	}
+
+#define DEF_CLKLOOK(_clk, _devname, _conname)		\
+	{						\
+		.clk		= _clk,			\
+		.dev_id		= _devname,		\
+		.con_id		= _conname,		\
+	}
+
diff --git a/arch/arm/mach-w90x900/cpu.h b/arch/arm/mach-w90x900/cpu.h
index de29ddc..57b5dba 100644
--- a/arch/arm/mach-w90x900/cpu.h
+++ b/arch/arm/mach-w90x900/cpu.h
@@ -41,7 +41,7 @@ struct sys_timer;
 extern void w90x900_init_irq(void);
 extern void w90p910_init_io(struct map_desc *mach_desc, int size);
 extern void w90p910_init_uarts(struct w90x900_uartcfg *cfg, int no);
-extern void w90p910_init_clocks(int xtal);
+extern void w90p910_init_clocks(void);
 extern void w90p910_map_io(struct map_desc *mach_desc, int size);
 extern struct platform_device w90p910_serial_device;
 extern struct sys_timer w90x900_timer;
diff --git a/arch/arm/mach-w90x900/include/mach/clkdev.h b/arch/arm/mach-w90x900/include/mach/clkdev.h
new file mode 100644
index 0000000..04b37a8
--- /dev/null
+++ b/arch/arm/mach-w90x900/include/mach/clkdev.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_MACH_CLKDEV_H
+#define __ASM_MACH_CLKDEV_H
+
+#define __clk_get(clk) ({ 1; })
+#define __clk_put(clk) do { } while (0)
+
+#endif
diff --git a/arch/arm/mach-w90x900/mach-w90p910evb.c b/arch/arm/mach-w90x900/mach-w90p910evb.c
index 1968ed4..7a62bd3 100644
--- a/arch/arm/mach-w90x900/mach-w90p910evb.c
+++ b/arch/arm/mach-w90x900/mach-w90p910evb.c
@@ -247,7 +247,7 @@ static struct platform_device *w90p910evb_dev[] __initdata = {
 static void __init w90p910evb_map_io(void)
 {
 	w90p910_map_io(w90p910_iodesc, ARRAY_SIZE(w90p910_iodesc));
-	w90p910_init_clocks(0);
+	w90p910_init_clocks();
 }
 
 static void __init w90p910evb_init(void)
diff --git a/arch/arm/mach-w90x900/w90p910.c b/arch/arm/mach-w90x900/w90p910.c
index 87214b8..1c97e49 100644
--- a/arch/arm/mach-w90x900/w90p910.c
+++ b/arch/arm/mach-w90x900/w90p910.c
@@ -34,6 +34,7 @@
 #include <mach/regs-serial.h>
 
 #include "cpu.h"
+#include "clock.h"
 
 /* Initial IO mappings */
 
@@ -52,6 +53,43 @@ static struct map_desc w90p910_iodesc[] __initdata = {
 	/*IODESC_ENT(LCD),*/
 };
 
+/* Initial clock declarations. */
+static DEFINE_CLK(lcd, 0);
+static DEFINE_CLK(audio, 1);
+static DEFINE_CLK(fmi, 4);
+static DEFINE_CLK(dmac, 5);
+static DEFINE_CLK(atapi, 6);
+static DEFINE_CLK(emc, 7);
+static DEFINE_CLK(usbd, 8);
+static DEFINE_CLK(usbh, 9);
+static DEFINE_CLK(g2d, 10);;
+static DEFINE_CLK(pwm, 18);
+static DEFINE_CLK(ps2, 24);
+static DEFINE_CLK(kpi, 25);
+static DEFINE_CLK(wdt, 26);
+static DEFINE_CLK(gdma, 27);
+static DEFINE_CLK(adc, 28);
+static DEFINE_CLK(usi, 29);
+
+static struct clk_lookup w90p910_clkregs[] = {
+	DEF_CLKLOOK(&clk_lcd, "w90p910-lcd", NULL),
+	DEF_CLKLOOK(&clk_audio, "w90p910-audio", NULL),
+	DEF_CLKLOOK(&clk_fmi, "w90p910-fmi", NULL),
+	DEF_CLKLOOK(&clk_dmac, "w90p910-dmac", NULL),
+	DEF_CLKLOOK(&clk_atapi, "w90p910-atapi", NULL),
+	DEF_CLKLOOK(&clk_emc, "w90p910-emc", NULL),
+	DEF_CLKLOOK(&clk_usbd, "w90p910-usbd", NULL),
+	DEF_CLKLOOK(&clk_usbh, "w90p910-usbh", NULL),
+	DEF_CLKLOOK(&clk_g2d, "w90p910-g2d", NULL),
+	DEF_CLKLOOK(&clk_pwm, "w90p910-pwm", NULL),
+	DEF_CLKLOOK(&clk_ps2, "w90p910-ps2", NULL),
+	DEF_CLKLOOK(&clk_kpi, "w90p910-kpi", NULL),
+	DEF_CLKLOOK(&clk_wdt, "w90p910-wdt", NULL),
+	DEF_CLKLOOK(&clk_gdma, "w90p910-gdma", NULL),
+	DEF_CLKLOOK(&clk_adc, "w90p910-adc", NULL),
+	DEF_CLKLOOK(&clk_usi, "w90p910-usi", NULL),
+};
+
 /* Initial serial platform data */
 
 struct plat_serial8250_port w90p910_uart_data[] = {
@@ -81,8 +119,9 @@ void __init w90p910_map_io(struct map_desc *mach_desc, int mach_size)
 
 /*Init W90P910 clock*/
 
-void __init w90p910_init_clocks(int xtal)
+void __init w90p910_init_clocks(void)
 {
+	clks_register(w90p910_clkregs, ARRAY_SIZE(w90p910_clkregs));
 }
 
 static int __init w90p910_init_cpu(void)
-- 
cgit v0.10.2


From cc63262f27582369737edcb4e810265db2a98ab4 Mon Sep 17 00:00:00 2001
From: wanzongshun <mcuos.com@gmail.com>
Date: Wed, 10 Jun 2009 15:51:32 +0100
Subject: [ARM] 5550/1: Add default configure file for w90p910 platform

Add default configure file for w90p910 platform.

Signed-off-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/configs/w90p910_defconfig b/arch/arm/configs/w90p910_defconfig
index 56bda7c..5245655 100644
--- a/arch/arm/configs/w90p910_defconfig
+++ b/arch/arm/configs/w90p910_defconfig
@@ -1,11 +1,11 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc8-git8
-# Sat Nov 15 10:05:00 2008
+# Linux kernel version: 2.6.30
+# Wed Jun 10 22:09:25 2009
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
-# CONFIG_GENERIC_GPIO is not set
+CONFIG_GENERIC_GPIO=y
 # CONFIG_GENERIC_TIME is not set
 # CONFIG_GENERIC_CLOCKEVENTS is not set
 CONFIG_MMU=y
@@ -22,8 +22,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_SUPPORTS_AOUT=y
-CONFIG_ZONE_DMA=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_VECTORS_BASE=0xffff0000
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -42,10 +40,19 @@ CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=17
-# CONFIG_CGROUPS is not set
 # CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_RELAY=y
@@ -56,52 +63,53 @@ CONFIG_USER_NS=y
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+# CONFIG_INITRAMFS_COMPRESSION_NONE is not set
+# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
+CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
+CONFIG_TRACEPOINTS=y
 # CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
-# CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is not set
-# CONFIG_HAVE_IOREMAP_PROT is not set
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
-# CONFIG_HAVE_ARCH_TRACEHOOK is not set
-# CONFIG_HAVE_DMA_ATTRS is not set
-# CONFIG_USE_GENERIC_SMP_HELPERS is not set
-# CONFIG_HAVE_CLK is not set
-CONFIG_PROC_PAGE_MONITOR=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 # CONFIG_MODULES is not set
 CONFIG_BLOCK=y
 CONFIG_LBD=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_LSF=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -117,7 +125,7 @@ CONFIG_IOSCHED_CFQ=y
 CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="cfq"
-CONFIG_CLASSIC_RCU=y
+# CONFIG_FREEZER is not set
 
 #
 # System Type
@@ -127,10 +135,10 @@ CONFIG_CLASSIC_RCU=y
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
 # CONFIG_ARCH_AT91 is not set
-# CONFIG_ARCH_CLPS7500 is not set
 # CONFIG_ARCH_CLPS711X is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_FOOTBRIDGE is not set
 # CONFIG_ARCH_NETX is not set
 # CONFIG_ARCH_H720X is not set
@@ -151,23 +159,17 @@ CONFIG_CLASSIC_RCU=y
 # CONFIG_ARCH_ORION5X is not set
 # CONFIG_ARCH_PNX4008 is not set
 # CONFIG_ARCH_PXA is not set
+# CONFIG_ARCH_MMP is not set
 # CONFIG_ARCH_RPC is not set
 # CONFIG_ARCH_SA1100 is not set
 # CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
-# CONFIG_ARCH_MSM7X00A is not set
+# CONFIG_ARCH_MSM is not set
 CONFIG_ARCH_W90X900=y
-
-#
-# Boot options
-#
-
-#
-# Power management
-#
 CONFIG_CPU_W90P910=y
 
 #
@@ -198,6 +200,7 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_DCACHE_WRITETHROUGH is not set
 # CONFIG_CPU_CACHE_ROUND_ROBIN is not set
 # CONFIG_OUTER_CACHE is not set
+CONFIG_COMMON_CLKDEV=y
 
 #
 # Bus support
@@ -209,27 +212,32 @@ CONFIG_ARM_THUMB=y
 #
 # Kernel Features
 #
-# CONFIG_TICK_ONESHOT is not set
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
 CONFIG_PREEMPT=y
 CONFIG_HZ=100
 CONFIG_AEABI=y
 CONFIG_OABI_COMPAT=y
-CONFIG_ARCH_FLATMEM_HAS_HOLES=y
-# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
+# CONFIG_ARCH_HAS_HOLES_MEMORYMODEL is not set
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
 CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4096
-# CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_BOUNCE=y
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_ALIGNMENT_TRAP=y
 
 #
@@ -237,12 +245,17 @@ CONFIG_ALIGNMENT_TRAP=y
 #
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
-CONFIG_CMDLINE="root=/dev/ram0 console=ttyS0,115200n8 initrd=0xa00000,4000000 mem=64M"
+CONFIG_CMDLINE="root=/dev/ram0 console=ttyS0,115200n8 rdinit=/sbin/init mem=64M"
 # CONFIG_XIP_KERNEL is not set
 CONFIG_KEXEC=y
 CONFIG_ATAGS_PROC=y
 
 #
+# CPU Power Management
+#
+# CONFIG_CPU_IDLE is not set
+
+#
 # Floating point emulation
 #
 
@@ -258,6 +271,8 @@ CONFIG_FPE_NWFPE=y
 # Userspace binary formats
 #
 CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
 # CONFIG_BINFMT_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
 
@@ -282,11 +297,93 @@ CONFIG_FW_LOADER=y
 CONFIG_FIRMWARE_IN_KERNEL=y
 CONFIG_EXTRA_FIRMWARE=""
 # CONFIG_SYS_HYPERVISOR is not set
-# CONFIG_MTD is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+# CONFIG_MTD_ARM_INTEGRATOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
 # CONFIG_PARPORT is not set
 CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 # CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_UB is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=16384
@@ -300,9 +397,41 @@ CONFIG_HAVE_IDE=y
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
 # CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_PROC_FS is not set
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_SAS_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 
@@ -354,38 +483,57 @@ CONFIG_HW_CONSOLE=y
 #
 # Serial drivers
 #
-# CONFIG_SERIAL_8250 is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+# CONFIG_SERIAL_8250_EXTENDED is not set
 
 #
 # Non-8250 serial port support
 #
-CONFIG_SERIAL_W90X900=y
-# CONFIG_SERIAL_W90X900_PORT1 is not set
-# CONFIG_SERIAL_W90X900_PORT2 is not set
-# CONFIG_SERIAL_W90X900_PORT3 is not set
-# CONFIG_SERIAL_W90X900_PORT4 is not set
-CONFIG_SERIAL_W90X900_CONSOLE=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 # CONFIG_HW_RANDOM is not set
-# CONFIG_NVRAM is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 # CONFIG_SPI is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
 
 #
 # Sonics Silicon Backplane
 #
-CONFIG_SSB_POSSIBLE=y
 # CONFIG_SSB is not set
 
 #
@@ -393,10 +541,11 @@ CONFIG_SSB_POSSIBLE=y
 #
 # CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
-# CONFIG_MFD_T7L66XB is not set
-# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
 
 #
 # Multimedia devices
@@ -433,33 +582,131 @@ CONFIG_SSB_POSSIBLE=y
 CONFIG_DUMMY_CONSOLE=y
 # CONFIG_SOUND is not set
 # CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_ARCH_HAS_HCD=y
+# CONFIG_USB_ARCH_HAS_OHCI is not set
+# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
+
+#
+# Miscellaneous USB options
+#
+# CONFIG_USB_DEVICEFS is not set
+CONFIG_USB_DEVICE_CLASS=y
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+CONFIG_USB_MON=y
+# CONFIG_USB_WUSB is not set
+# CONFIG_USB_WUSB_CBAF is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_OXU210HP_HCD is not set
+# CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_ISP1760_HCD is not set
+# CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_HWA_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_BERRY_CHARGE is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_VST is not set
+# CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
 # CONFIG_NEW_LEDS is not set
 CONFIG_RTC_LIB=y
 # CONFIG_RTC_CLASS is not set
 # CONFIG_DMADEVICES is not set
-
-#
-# Voltage and Current regulators
-#
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_REGULATOR is not set
-# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
-# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
-# CONFIG_REGULATOR_BQ24022 is not set
 # CONFIG_UIO is not set
+# CONFIG_STAGING is not set
 
 #
 # File systems
 #
 # CONFIG_EXT2_FS is not set
 # CONFIG_EXT3_FS is not set
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 CONFIG_FS_POSIX_ACL=y
+CONFIG_FILE_LOCKING=y
 # CONFIG_XFS_FS is not set
 # CONFIG_GFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY is not set
 # CONFIG_QUOTA is not set
@@ -469,6 +716,11 @@ CONFIG_FS_POSIX_ACL=y
 CONFIG_GENERIC_ACL=y
 
 #
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
 # CD-ROM/DVD Filesystems
 #
 # CONFIG_ISO9660_FS is not set
@@ -486,15 +738,13 @@ CONFIG_GENERIC_ACL=y
 #
 CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 # CONFIG_HUGETLB_PAGE is not set
 # CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ADFS_FS is not set
 # CONFIG_AFFS_FS is not set
 # CONFIG_HFS_FS is not set
@@ -502,15 +752,22 @@ CONFIG_TMPFS_POSIX_ACL=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
 # CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_OMFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
 CONFIG_ROMFS_FS=y
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+# CONFIG_ROMFS_BACKED_BY_MTD is not set
+# CONFIG_ROMFS_BACKED_BY_BOTH is not set
+CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -586,18 +843,36 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 # CONFIG_DEBUG_KERNEL is not set
+CONFIG_STACKTRACE=y
 CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_LATENCYTOP is not set
 # CONFIG_SYSCTL_SYSCALL_CHECK is not set
-CONFIG_HAVE_FTRACE=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-# CONFIG_FTRACE is not set
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_ARM_UNWIND=y
 # CONFIG_DEBUG_USER is not set
 
 #
@@ -605,14 +880,15 @@ CONFIG_HAVE_ARCH_KGDB=y
 #
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_FILE_CAPABILITIES is not set
 # CONFIG_CRYPTO is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
 #
-# CONFIG_GENERIC_FIND_FIRST_BIT is not set
-# CONFIG_GENERIC_FIND_NEXT_BIT is not set
+CONFIG_GENERIC_FIND_LAST_BIT=y
 # CONFIG_CRC_CCITT is not set
 # CONFIG_CRC16 is not set
 # CONFIG_CRC_T10DIF is not set
@@ -620,7 +896,10 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_CRC32 is not set
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
-CONFIG_PLIST=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
-- 
cgit v0.10.2


From de0525ca34aa7a65faf6902e3e00956ab80d2ede Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Thu, 11 Jun 2009 16:03:34 +0200
Subject: ALSA: lx6464es - support standard alsa module parameters

trivial patch to support the alsa module parameters `index', `id'
and `enable'

Signed-off-by: Tim Blechmann <tim@klingt.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
index 870bfc5..ccf1b38 100644
--- a/sound/pci/lx6464es/lx6464es.c
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -43,6 +43,13 @@ static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
 static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
+module_param_array(index, int, NULL, 0444);
+MODULE_PARM_DESC(index, "Index value for Digigram LX6464ES interface.");
+module_param_array(id, charp, NULL, 0444);
+MODULE_PARM_DESC(id, "ID string for  Digigram LX6464ES interface.");
+module_param_array(enable, bool, NULL, 0444);
+MODULE_PARM_DESC(enable, "Enable/disable specific Digigram LX6464ES soundcards.");
+
 static const char card_name[] = "LX6464ES";
 
 
-- 
cgit v0.10.2


From 18473473880622b671a73dfdcaf03e9b526e0fb2 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Sun, 7 Jun 2009 23:05:45 +0800
Subject: [ARM] pxa: fix the conflicting non-static declarations of
 global_gpios[]

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
Cc: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>

diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 0f65680..7fff467 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -802,7 +802,7 @@ static struct platform_device *devices[] __initdata = {
 	&pcmcia,
 };
 
-struct gpio_ress global_gpios[] = {
+static struct gpio_ress global_gpios[] = {
 	HX4700_GPIO_IN(GPIO12_HX4700_ASIC3_IRQ, "ASIC3_IRQ"),
 	HX4700_GPIO_IN(GPIO13_HX4700_W3220_IRQ, "W3220_IRQ"),
 	HX4700_GPIO_IN(GPIO14_HX4700_nWLAN_IRQ, "WLAN_IRQ"),
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 804c25c..4dc8c2e 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -798,7 +798,7 @@ static void mioa701_restart(char c, const char *cmd)
 	arm_machine_restart('s', cmd);
 }
 
-struct gpio_ress global_gpios[] = {
+static struct gpio_ress global_gpios[] = {
 	MIO_GPIO_OUT(GPIO9_CHARGE_EN, 1, "Charger enable"),
 	MIO_GPIO_OUT(GPIO18_POWEROFF, 0, "Power Off"),
 	MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power")
-- 
cgit v0.10.2


From 4f64e150191bfddc7f5c0768f325f747dbca1913 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 11 Jun 2009 16:14:11 +0200
Subject: ALSA: pcm - Update document about xrun_debug proc file

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index cfac20c..381908d 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -88,26 +88,34 @@ card*/pcm*/info
 	substreams, etc.
 
 card*/pcm*/xrun_debug
-	This file appears when CONFIG_SND_DEBUG=y.
-	This shows the status of xrun (= buffer overrun/xrun) debug of
-	ALSA PCM middle layer, as an integer from 0 to 2.  The value
-	can be changed by writing to this file, such as
-
-		 # cat 2 > /proc/asound/card0/pcm0p/xrun_debug
-
-	When this value is greater than 0, the driver will show the
-	messages to kernel log when an xrun is detected.  The debug
-	message is shown also when the invalid H/W pointer is detected
-	at the update of periods (usually called from the interrupt
+	This file appears when CONFIG_SND_DEBUG=y and
+	CONFIG_PCM_XRUN_DEBUG=y.
+	This shows the status of xrun (= buffer overrun/xrun) and
+	invalid PCM position debug/check of ALSA PCM middle layer.
+	It takes an integer value, can be changed by writing to this
+	file, such as
+
+		 # cat 5 > /proc/asound/card0/pcm0p/xrun_debug
+
+	The value consists of the following bit flags:
+	  bit 0 = Enable XRUN/jiffies debug messages
+	  bit 1 = Show stack trace at XRUN / jiffies check
+	  bit 2 = Enable additional jiffies check
+
+	When the bit 0 is set, the driver will show the messages to
+	kernel log when an xrun is detected.  The debug message is
+	shown also when the invalid H/W pointer is detected at the
+	update of periods (usually called from the interrupt
 	handler).
 
-	When this value is greater than 1, the driver will show the
-	stack trace additionally.  This may help the debugging.
+	When the bit 1 is set, the driver will show the stack trace
+	additionally.  This may help the debugging.
 
-	Since 2.6.30, this option also enables the hwptr check using
+	Since 2.6.30, this option can enable the hwptr check using
 	jiffies.  This detects spontaneous invalid pointer callback
 	values, but can be lead to too much corrections for a (mostly
 	buggy) hardware that doesn't give smooth pointer updates.
+	This feature is enabled via the bit 2.
 
 card*/pcm*/sub*/info
 	The general information of this PCM sub-stream.
-- 
cgit v0.10.2


From 391c5111c9ce346edc3c3d8f907eb178a6decd16 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Sat, 28 Mar 2009 18:18:50 +0300
Subject: [ARM] sa1100: remove unused collie_pm.c

This file was never enabled in the mainline kernel, new ongoing work on
battery driver by Thomas Kunze makes it completely obsolete, so remove
it now.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-sa1100/collie_pm.c b/arch/arm/mach-sa1100/collie_pm.c
deleted file mode 100644
index 444f266..0000000
--- a/arch/arm/mach-sa1100/collie_pm.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Based on spitz_pm.c and sharp code.
- *
- * Copyright (C) 2001  SHARP
- * Copyright 2005 Pavel Machek <pavel@suse.cz>
- *
- * Distribute under GPLv2.
- *
- * Li-ion batteries are angry beasts, and they like to explode. This driver is not finished,
- * and sometimes charges them when it should not. If it makes angry lithium to come your way...
- * ...well, you have been warned.
- *
- * Actually, this should be quite safe, it seems sharp leaves charger enabled by default,
- * and my collie did not explode (yet).
- */
-
-#include <linux/module.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm/hardware/scoop.h>
-#include <mach/dma.h>
-#include <mach/collie.h>
-#include <asm/mach/sharpsl_param.h>
-#include <asm/hardware/sharpsl_pm.h>
-
-#include "../drivers/mfd/ucb1x00.h"
-
-static struct ucb1x00 *ucb;
-static int ad_revise;
-
-#define ADCtoPower(x)	       ((330 * x * 2) / 1024)
-
-static void collie_charger_init(void)
-{
-	int err;
-
-	if (sharpsl_param.adadj != -1)
-		ad_revise = sharpsl_param.adadj;
-
-	/* Register interrupt handler. */
-	if ((err = request_irq(COLLIE_IRQ_GPIO_AC_IN, sharpsl_ac_isr, IRQF_DISABLED,
-			       "ACIN", sharpsl_ac_isr))) {
-		printk("Could not get irq %d.\n", COLLIE_IRQ_GPIO_AC_IN);
-		return;
-	}
-	if ((err = request_irq(COLLIE_IRQ_GPIO_CO, sharpsl_chrg_full_isr, IRQF_DISABLED,
-			       "CO", sharpsl_chrg_full_isr))) {
-		free_irq(COLLIE_IRQ_GPIO_AC_IN, sharpsl_ac_isr);
-		printk("Could not get irq %d.\n", COLLIE_IRQ_GPIO_CO);
-		return;
-	}
-
-	gpio_request(COLLIE_GPIO_CHARGE_ON, "charge on");
-	gpio_direction_output(COLLIE_GPIO_CHARGE_ON, 1);
-
-	ucb1x00_io_set_dir(ucb, 0, COLLIE_TC35143_GPIO_MBAT_ON | COLLIE_TC35143_GPIO_TMP_ON |
-			           COLLIE_TC35143_GPIO_BBAT_ON);
-	return;
-}
-
-static void collie_measure_temp(int on)
-{
-	if (on)
-		ucb1x00_io_write(ucb, COLLIE_TC35143_GPIO_TMP_ON, 0);
-	else
-		ucb1x00_io_write(ucb, 0, COLLIE_TC35143_GPIO_TMP_ON);
-}
-
-static void collie_charge(int on)
-{
-	/* Zaurus seems to contain LTC1731; it should know when to
-	 * stop charging itself, so setting charge on should be
-	 * relatively harmless (as long as it is not done too often).
-	 */
-	gpio_set_value(COLLIE_GPIO_CHARGE_ON, on);
-}
-
-static void collie_discharge(int on)
-{
-}
-
-static void collie_discharge1(int on)
-{
-}
-
-static void collie_presuspend(void)
-{
-}
-
-static void collie_postsuspend(void)
-{
-}
-
-static int collie_should_wakeup(unsigned int resume_on_alarm)
-{
-	return 0;
-}
-
-static unsigned long collie_charger_wakeup(void)
-{
-	return 0;
-}
-
-int collie_read_backup_battery(void)
-{
-	int voltage;
-
-	ucb1x00_adc_enable(ucb);
-
-	ucb1x00_io_write(ucb, COLLIE_TC35143_GPIO_BBAT_ON, 0);
-	voltage = ucb1x00_adc_read(ucb, UCB_ADC_INP_AD1, UCB_SYNC);
-
-	ucb1x00_io_write(ucb, 0, COLLIE_TC35143_GPIO_BBAT_ON);
-	ucb1x00_adc_disable(ucb);
-
-	printk("Backup battery = %d(%d)\n", ADCtoPower(voltage), voltage);
-
-	return ADCtoPower(voltage);
-}
-
-int collie_read_main_battery(void)
-{
-	int voltage, voltage_rev, voltage_volts;
-
-	ucb1x00_adc_enable(ucb);
-	ucb1x00_io_write(ucb, 0, COLLIE_TC35143_GPIO_BBAT_ON);
-	ucb1x00_io_write(ucb, COLLIE_TC35143_GPIO_MBAT_ON, 0);
-
-	mdelay(1);
-	voltage = ucb1x00_adc_read(ucb, UCB_ADC_INP_AD1, UCB_SYNC);
-
-	ucb1x00_io_write(ucb, 0, COLLIE_TC35143_GPIO_MBAT_ON);
-	ucb1x00_adc_disable(ucb);
-
-	voltage_rev = voltage + ((ad_revise * voltage) / 652);
-	voltage_volts = ADCtoPower(voltage_rev);
-
-	printk("Main battery = %d(%d)\n", voltage_volts, voltage);
-
-	if (voltage != -1)
-		return voltage_volts;
-	else
-		return voltage;
-}
-
-int collie_read_temp(void)
-{
-	int voltage;
-
-	/* According to Sharp, temp must be > 973, main battery must be < 465,
-	   FIXME: sharpsl_pm.c has both conditions negated? FIXME: values
-	   are way out of range? */
-
-	ucb1x00_adc_enable(ucb);
-	ucb1x00_io_write(ucb, COLLIE_TC35143_GPIO_TMP_ON, 0);
-	/* >1010 = battery removed, 460 = 22C ?, higher = lower temp ? */
-	voltage = ucb1x00_adc_read(ucb, UCB_ADC_INP_AD0, UCB_SYNC);
-	ucb1x00_io_write(ucb, 0, COLLIE_TC35143_GPIO_TMP_ON);
-	ucb1x00_adc_disable(ucb);
-
-	printk("Battery temp = %d\n", voltage);
-	return voltage;
-}
-
-static unsigned long read_devdata(int which)
-{
-	switch (which) {
-	case SHARPSL_BATT_VOLT:
-		return collie_read_main_battery();
-	case SHARPSL_BATT_TEMP:
-		return collie_read_temp();
-	case SHARPSL_ACIN_VOLT:
-		return 500;
-	case SHARPSL_STATUS_ACIN: {
-		int ret = GPLR & COLLIE_GPIO_AC_IN;
-		printk("AC status = %d\n", ret);
-		return ret;
-	}
-	case SHARPSL_STATUS_FATAL: {
-		int ret = GPLR & COLLIE_GPIO_MAIN_BAT_LOW;
-		printk("Fatal bat = %d\n", ret);
-		return ret;
-	}
-	default:
-		return ~0;
-	}
-}
-
-struct battery_thresh collie_battery_levels_acin[] = {
-	{ 420, 100},
-	{ 417,  95},
-	{ 415,  90},
-	{ 413,  80},
-	{ 411,  75},
-	{ 408,  70},
-	{ 406,  60},
-	{ 403,  50},
-	{ 398,  40},
-	{ 391,  25},
-	{  10,   5},
-	{   0,   0},
-};
-
-struct battery_thresh collie_battery_levels[] = {
-	{ 394, 100},
-	{ 390,  95},
-	{ 380,  90},
-	{ 370,  80},
-	{ 368,  75},	/* From sharp code: battery high with frontlight */
-	{ 366,  70},	/* 60..90 -- fake values invented by me for testing */
-	{ 364,  60},
-	{ 362,  50},
-	{ 360,  40},
-	{ 358,  25},	/* From sharp code: battery low with frontlight */
-	{ 356,   5},	/* From sharp code: battery verylow with frontlight */
-	{   0,   0},
-};
-
-struct sharpsl_charger_machinfo collie_pm_machinfo = {
-	.init             = collie_charger_init,
-	.read_devdata	  = read_devdata,
-	.discharge        = collie_discharge,
-	.discharge1       = collie_discharge1,
-	.charge           = collie_charge,
-	.measure_temp     = collie_measure_temp,
-	.presuspend       = collie_presuspend,
-	.postsuspend      = collie_postsuspend,
-	.charger_wakeup   = collie_charger_wakeup,
-	.should_wakeup    = collie_should_wakeup,
-	.bat_levels       = 12,
-	.bat_levels_noac  = collie_battery_levels,
-	.bat_levels_acin  = collie_battery_levels_acin,
-	.status_high_acin = 368,
-	.status_low_acin  = 358,
-	.status_high_noac = 368,
-	.status_low_noac  = 358,
-	.charge_on_volt	  = 350,	/* spitz uses 2.90V, but lets play it safe. */
-	.charge_on_temp   = 550,
-	.charge_acin_high = 550,	/* collie does not seem to have sensor for this, anyway */
-	.charge_acin_low  = 450,	/* ignored, too */
-	.fatal_acin_volt  = 356,
-	.fatal_noacin_volt = 356,
-
-	.batfull_irq = 1,		/* We do not want periodical charge restarts */
-};
-
-static int __init collie_pm_ucb_add(struct ucb1x00_dev *pdev)
-{
-	sharpsl_pm.machinfo = &collie_pm_machinfo;
-	ucb = pdev->ucb;
-	return 0;
-}
-
-static struct ucb1x00_driver collie_pm_ucb_driver = {
-	.add	= collie_pm_ucb_add,
-};
-
-static struct platform_device *collie_pm_device;
-
-static int __init collie_pm_init(void)
-{
-	int ret;
-
-	collie_pm_device = platform_device_alloc("sharpsl-pm", -1);
-	if (!collie_pm_device)
-		return -ENOMEM;
-
-	collie_pm_device->dev.platform_data = &collie_pm_machinfo;
-	ret = platform_device_add(collie_pm_device);
-
-	if (ret)
-		platform_device_put(collie_pm_device);
-
-	if (!ret)
-		ret = ucb1x00_register_driver(&collie_pm_ucb_driver);
-
-	return ret;
-}
-
-static void __exit collie_pm_exit(void)
-{
-	ucb1x00_unregister_driver(&collie_pm_ucb_driver);
-	platform_device_unregister(collie_pm_device);
-}
-
-module_init(collie_pm_init);
-module_exit(collie_pm_exit);
-- 
cgit v0.10.2


From 78731d33c1868f4ba43bafcca8dcaf938872c1f2 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Sat, 28 Mar 2009 18:18:51 +0300
Subject: [ARM] pxa/sharpsl_pm: merge the two sharpsl_pm.c since it's now pxa
 specific

collie_pm was the only non-PXA user of sharpsl_pm. Now as it's gone we
can merge code into one single file to allow further cleanup.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index a2cd9be..dea7ce3 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -27,10 +27,6 @@ config SHARP_LOCOMO
 config SHARP_PARAM
 	bool
 
-config SHARPSL_PM
-	bool
-	select APM_EMULATION
-
 config SHARP_SCOOP
 	bool
 
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 7cb7961..76be7ff 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_DMABOUNCE)		+= dmabounce.o
 obj-$(CONFIG_TIMER_ACORN)	+= time-acorn.o
 obj-$(CONFIG_SHARP_LOCOMO)	+= locomo.o
 obj-$(CONFIG_SHARP_PARAM)	+= sharpsl_param.o
-obj-$(CONFIG_SHARPSL_PM)	+= sharpsl_pm.o
 obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
 obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
diff --git a/arch/arm/common/sharpsl_pm.c b/arch/arm/common/sharpsl_pm.c
deleted file mode 100644
index 140f1d7..0000000
--- a/arch/arm/common/sharpsl_pm.c
+++ /dev/null
@@ -1,859 +0,0 @@
-/*
- * Battery and Power Management code for the Sharp SL-C7xx and SL-Cxx00
- * series of PDAs
- *
- * Copyright (c) 2004-2005 Richard Purdie
- *
- * Based on code written by Sharp for 2.4 kernels
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#undef DEBUG
-
-#include <linux/module.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/apm_bios.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/leds.h>
-#include <linux/apm-emulation.h>
-#include <linux/suspend.h>
-
-#include <mach/hardware.h>
-#include <asm/irq.h>
-#include <mach/pm.h>
-#include <mach/pxa2xx-regs.h>
-#include <mach/regs-rtc.h>
-#include <mach/sharpsl.h>
-#include <asm/hardware/sharpsl_pm.h>
-
-/*
- * Constants
- */
-#define SHARPSL_CHARGE_ON_TIME_INTERVAL        (msecs_to_jiffies(1*60*1000))  /* 1 min */
-#define SHARPSL_CHARGE_FINISH_TIME             (msecs_to_jiffies(10*60*1000)) /* 10 min */
-#define SHARPSL_BATCHK_TIME                    (msecs_to_jiffies(15*1000))    /* 15 sec */
-#define SHARPSL_BATCHK_TIME_SUSPEND            (60*10)                        /* 10 min */
-
-#define SHARPSL_WAIT_CO_TIME                   15  /* 15 sec */
-#define SHARPSL_WAIT_DISCHARGE_ON              100 /* 100 msec */
-#define SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP   10  /* 10 msec */
-#define SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT   10  /* 10 msec */
-#define SHARPSL_CHECK_BATTERY_WAIT_TIME_ACIN   10  /* 10 msec */
-#define SHARPSL_CHARGE_WAIT_TIME               15  /* 15 msec */
-#define SHARPSL_CHARGE_CO_CHECK_TIME           5   /* 5 msec */
-#define SHARPSL_CHARGE_RETRY_CNT               1   /* eqv. 10 min */
-
-/*
- * Prototypes
- */
-#ifdef CONFIG_PM
-static int sharpsl_off_charge_battery(void);
-static int sharpsl_check_battery_voltage(void);
-static int sharpsl_fatal_check(void);
-#endif
-static int sharpsl_check_battery_temp(void);
-static int sharpsl_ac_check(void);
-static int sharpsl_average_value(int ad);
-static void sharpsl_average_clear(void);
-static void sharpsl_charge_toggle(struct work_struct *private_);
-static void sharpsl_battery_thread(struct work_struct *private_);
-
-
-/*
- * Variables
- */
-struct sharpsl_pm_status sharpsl_pm;
-DECLARE_DELAYED_WORK(toggle_charger, sharpsl_charge_toggle);
-DECLARE_DELAYED_WORK(sharpsl_bat, sharpsl_battery_thread);
-DEFINE_LED_TRIGGER(sharpsl_charge_led_trigger);
-
-
-static int get_percentage(int voltage)
-{
-	int i = sharpsl_pm.machinfo->bat_levels - 1;
-	int bl_status = sharpsl_pm.machinfo->backlight_get_status ? sharpsl_pm.machinfo->backlight_get_status() : 0;
-	struct battery_thresh *thresh;
-
-	if (sharpsl_pm.charge_mode == CHRG_ON)
-		thresh = bl_status ? sharpsl_pm.machinfo->bat_levels_acin_bl : sharpsl_pm.machinfo->bat_levels_acin;
-	else
-		thresh = bl_status ? sharpsl_pm.machinfo->bat_levels_noac_bl : sharpsl_pm.machinfo->bat_levels_noac;
-
-	while (i > 0 && (voltage > thresh[i].voltage))
-		i--;
-
-	return thresh[i].percentage;
-}
-
-static int get_apm_status(int voltage)
-{
-	int low_thresh, high_thresh;
-
-	if (sharpsl_pm.charge_mode == CHRG_ON) {
-		high_thresh = sharpsl_pm.machinfo->status_high_acin;
-		low_thresh = sharpsl_pm.machinfo->status_low_acin;
-	} else {
-		high_thresh = sharpsl_pm.machinfo->status_high_noac;
-		low_thresh = sharpsl_pm.machinfo->status_low_noac;
-	}
-
-	if (voltage >= high_thresh)
-		return APM_BATTERY_STATUS_HIGH;
-	if (voltage >= low_thresh)
-		return APM_BATTERY_STATUS_LOW;
-	return APM_BATTERY_STATUS_CRITICAL;
-}
-
-void sharpsl_battery_kick(void)
-{
-	schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(125));
-}
-EXPORT_SYMBOL(sharpsl_battery_kick);
-
-
-static void sharpsl_battery_thread(struct work_struct *private_)
-{
-	int voltage, percent, apm_status, i = 0;
-
-	if (!sharpsl_pm.machinfo)
-		return;
-
-	sharpsl_pm.battstat.ac_status = (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN) ? APM_AC_ONLINE : APM_AC_OFFLINE);
-
-	/* Corgi cannot confirm when battery fully charged so periodically kick! */
-	if (!sharpsl_pm.machinfo->batfull_irq && (sharpsl_pm.charge_mode == CHRG_ON)
-			&& time_after(jiffies, sharpsl_pm.charge_start_time +  SHARPSL_CHARGE_ON_TIME_INTERVAL))
-		schedule_delayed_work(&toggle_charger, 0);
-
-	while(1) {
-		voltage = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
-
-		if (voltage > 0) break;
-		if (i++ > 5) {
-			voltage = sharpsl_pm.machinfo->bat_levels_noac[0].voltage;
-			dev_warn(sharpsl_pm.dev, "Warning: Cannot read main battery!\n");
-			break;
-		}
-	}
-
-	voltage = sharpsl_average_value(voltage);
-	apm_status = get_apm_status(voltage);
-	percent = get_percentage(voltage);
-
-	/* At low battery voltages, the voltage has a tendency to start
-           creeping back up so we try to avoid this here */
-	if ((sharpsl_pm.battstat.ac_status == APM_AC_ONLINE) || (apm_status == APM_BATTERY_STATUS_HIGH) ||  percent <= sharpsl_pm.battstat.mainbat_percent) {
-		sharpsl_pm.battstat.mainbat_voltage = voltage;
-		sharpsl_pm.battstat.mainbat_status = apm_status;
-		sharpsl_pm.battstat.mainbat_percent = percent;
-	}
-
-	dev_dbg(sharpsl_pm.dev, "Battery: voltage: %d, status: %d, percentage: %d, time: %ld\n", voltage,
-			sharpsl_pm.battstat.mainbat_status, sharpsl_pm.battstat.mainbat_percent, jiffies);
-
-#ifdef CONFIG_BACKLIGHT_CORGI
-	/* If battery is low. limit backlight intensity to save power. */
-	if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE)
-			&& ((sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_LOW) ||
-			(sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_CRITICAL))) {
-		if (!(sharpsl_pm.flags & SHARPSL_BL_LIMIT)) {
-			sharpsl_pm.machinfo->backlight_limit(1);
-			sharpsl_pm.flags |= SHARPSL_BL_LIMIT;
-		}
-	} else if (sharpsl_pm.flags & SHARPSL_BL_LIMIT) {
-		sharpsl_pm.machinfo->backlight_limit(0);
-		sharpsl_pm.flags &= ~SHARPSL_BL_LIMIT;
-	}
-#endif
-
-	/* Suspend if critical battery level */
-	if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE)
-			&& (sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_CRITICAL)
-			&& !(sharpsl_pm.flags & SHARPSL_APM_QUEUED)) {
-		sharpsl_pm.flags |= SHARPSL_APM_QUEUED;
-		dev_err(sharpsl_pm.dev, "Fatal Off\n");
-		apm_queue_event(APM_CRITICAL_SUSPEND);
-	}
-
-	schedule_delayed_work(&sharpsl_bat, SHARPSL_BATCHK_TIME);
-}
-
-void sharpsl_pm_led(int val)
-{
-	if (val == SHARPSL_LED_ERROR) {
-		dev_err(sharpsl_pm.dev, "Charging Error!\n");
-	} else if (val == SHARPSL_LED_ON) {
-		dev_dbg(sharpsl_pm.dev, "Charge LED On\n");
-		led_trigger_event(sharpsl_charge_led_trigger, LED_FULL);
-	} else {
-		dev_dbg(sharpsl_pm.dev, "Charge LED Off\n");
-		led_trigger_event(sharpsl_charge_led_trigger, LED_OFF);
-	}
-}
-
-static void sharpsl_charge_on(void)
-{
-	dev_dbg(sharpsl_pm.dev, "Turning Charger On\n");
-
-	sharpsl_pm.full_count = 0;
-	sharpsl_pm.charge_mode = CHRG_ON;
-	schedule_delayed_work(&toggle_charger, msecs_to_jiffies(250));
-	schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(500));
-}
-
-static void sharpsl_charge_off(void)
-{
-	dev_dbg(sharpsl_pm.dev, "Turning Charger Off\n");
-
-	sharpsl_pm.machinfo->charge(0);
-	sharpsl_pm_led(SHARPSL_LED_OFF);
-	sharpsl_pm.charge_mode = CHRG_OFF;
-
-	schedule_delayed_work(&sharpsl_bat, 0);
-}
-
-static void sharpsl_charge_error(void)
-{
-	sharpsl_pm_led(SHARPSL_LED_ERROR);
-	sharpsl_pm.machinfo->charge(0);
-	sharpsl_pm.charge_mode = CHRG_ERROR;
-}
-
-static void sharpsl_charge_toggle(struct work_struct *private_)
-{
-	dev_dbg(sharpsl_pm.dev, "Toogling Charger at time: %lx\n", jiffies);
-
-	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN)) {
-		sharpsl_charge_off();
-		return;
-	} else if ((sharpsl_check_battery_temp() < 0) || (sharpsl_ac_check() < 0)) {
-		sharpsl_charge_error();
-		return;
-	}
-
-	sharpsl_pm_led(SHARPSL_LED_ON);
-	sharpsl_pm.machinfo->charge(0);
-	mdelay(SHARPSL_CHARGE_WAIT_TIME);
-	sharpsl_pm.machinfo->charge(1);
-
-	sharpsl_pm.charge_start_time = jiffies;
-}
-
-static void sharpsl_ac_timer(unsigned long data)
-{
-	int acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN);
-
-	dev_dbg(sharpsl_pm.dev, "AC Status: %d\n",acin);
-
-	sharpsl_average_clear();
-	if (acin && (sharpsl_pm.charge_mode != CHRG_ON))
-		sharpsl_charge_on();
-	else if (sharpsl_pm.charge_mode == CHRG_ON)
-		sharpsl_charge_off();
-
-	schedule_delayed_work(&sharpsl_bat, 0);
-}
-
-
-irqreturn_t sharpsl_ac_isr(int irq, void *dev_id)
-{
-	/* Delay the event slightly to debounce */
-	/* Must be a smaller delay than the chrg_full_isr below */
-	mod_timer(&sharpsl_pm.ac_timer, jiffies + msecs_to_jiffies(250));
-
-	return IRQ_HANDLED;
-}
-
-static void sharpsl_chrg_full_timer(unsigned long data)
-{
-	dev_dbg(sharpsl_pm.dev, "Charge Full at time: %lx\n", jiffies);
-
-	sharpsl_pm.full_count++;
-
-	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN)) {
-		dev_dbg(sharpsl_pm.dev, "Charge Full: AC removed - stop charging!\n");
-		if (sharpsl_pm.charge_mode == CHRG_ON)
-			sharpsl_charge_off();
-	} else if (sharpsl_pm.full_count < 2) {
-		dev_dbg(sharpsl_pm.dev, "Charge Full: Count too low\n");
-		schedule_delayed_work(&toggle_charger, 0);
-	} else if (time_after(jiffies, sharpsl_pm.charge_start_time + SHARPSL_CHARGE_FINISH_TIME)) {
-		dev_dbg(sharpsl_pm.dev, "Charge Full: Interrupt generated too slowly - retry.\n");
-		schedule_delayed_work(&toggle_charger, 0);
-	} else {
-		sharpsl_charge_off();
-		sharpsl_pm.charge_mode = CHRG_DONE;
-		dev_dbg(sharpsl_pm.dev, "Charge Full: Charging Finished\n");
-	}
-}
-
-/* Charging Finished Interrupt (Not present on Corgi) */
-/* Can trigger at the same time as an AC status change so
-   delay until after that has been processed */
-irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id)
-{
-	if (sharpsl_pm.flags & SHARPSL_SUSPENDED)
-		return IRQ_HANDLED;
-
-	/* delay until after any ac interrupt */
-	mod_timer(&sharpsl_pm.chrg_full_timer, jiffies + msecs_to_jiffies(500));
-
-	return IRQ_HANDLED;
-}
-
-irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id)
-{
-	int is_fatal = 0;
-
-	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_LOCK)) {
-		dev_err(sharpsl_pm.dev, "Battery now Unlocked! Suspending.\n");
-		is_fatal = 1;
-	}
-
-	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_FATAL)) {
-		dev_err(sharpsl_pm.dev, "Fatal Batt Error! Suspending.\n");
-		is_fatal = 1;
-	}
-
-	if (!(sharpsl_pm.flags & SHARPSL_APM_QUEUED) && is_fatal) {
-		sharpsl_pm.flags |= SHARPSL_APM_QUEUED;
-		apm_queue_event(APM_CRITICAL_SUSPEND);
-	}
-
-	return IRQ_HANDLED;
-}
-
-/*
- * Maintain an average of the last 10 readings
- */
-#define SHARPSL_CNV_VALUE_NUM    10
-static int sharpsl_ad_index;
-
-static void sharpsl_average_clear(void)
-{
-	sharpsl_ad_index = 0;
-}
-
-static int sharpsl_average_value(int ad)
-{
-	int i, ad_val = 0;
-	static int sharpsl_ad[SHARPSL_CNV_VALUE_NUM+1];
-
-	if (sharpsl_pm.battstat.mainbat_status != APM_BATTERY_STATUS_HIGH) {
-		sharpsl_ad_index = 0;
-		return ad;
-	}
-
-	sharpsl_ad[sharpsl_ad_index] = ad;
-	sharpsl_ad_index++;
-	if (sharpsl_ad_index >= SHARPSL_CNV_VALUE_NUM) {
-		for (i=0; i < (SHARPSL_CNV_VALUE_NUM-1); i++)
-			sharpsl_ad[i] = sharpsl_ad[i+1];
-		sharpsl_ad_index = SHARPSL_CNV_VALUE_NUM - 1;
-	}
-	for (i=0; i < sharpsl_ad_index; i++)
-		ad_val += sharpsl_ad[i];
-
-	return (ad_val / sharpsl_ad_index);
-}
-
-/*
- * Take an array of 5 integers, remove the maximum and minimum values
- * and return the average.
- */
-static int get_select_val(int *val)
-{
-	int i, j, k, temp, sum = 0;
-
-	/* Find MAX val */
-	temp = val[0];
-	j=0;
-	for (i=1; i<5; i++) {
-		if (temp < val[i]) {
-			temp = val[i];
-			j = i;
-		}
-	}
-
-	/* Find MIN val */
-	temp = val[4];
-	k=4;
-	for (i=3; i>=0; i--) {
-		if (temp > val[i]) {
-			temp = val[i];
-			k = i;
-		}
-	}
-
-	for (i=0; i<5; i++)
-		if (i != j && i != k )
-			sum += val[i];
-
-	dev_dbg(sharpsl_pm.dev, "Average: %d from values: %d, %d, %d, %d, %d\n", sum/3, val[0], val[1], val[2], val[3], val[4]);
-
-	return (sum/3);
-}
-
-static int sharpsl_check_battery_temp(void)
-{
-	int val, i, buff[5];
-
-	/* Check battery temperature */
-	for (i=0; i<5; i++) {
-		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP);
-		sharpsl_pm.machinfo->measure_temp(1);
-		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP);
-		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_TEMP);
-		sharpsl_pm.machinfo->measure_temp(0);
-	}
-
-	val = get_select_val(buff);
-
-	dev_dbg(sharpsl_pm.dev, "Temperature: %d\n", val);
-	if (val > sharpsl_pm.machinfo->charge_on_temp) {
-		printk(KERN_WARNING "Not charging: temperature out of limits.\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int sharpsl_check_battery_voltage(void)
-{
-	int val, i, buff[5];
-
-	/* disable charge, enable discharge */
-	sharpsl_pm.machinfo->charge(0);
-	sharpsl_pm.machinfo->discharge(1);
-	mdelay(SHARPSL_WAIT_DISCHARGE_ON);
-
-	if (sharpsl_pm.machinfo->discharge1)
-		sharpsl_pm.machinfo->discharge1(1);
-
-	/* Check battery voltage */
-	for (i=0; i<5; i++) {
-		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
-		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT);
-	}
-
-	if (sharpsl_pm.machinfo->discharge1)
-		sharpsl_pm.machinfo->discharge1(0);
-
-	sharpsl_pm.machinfo->discharge(0);
-
-	val = get_select_val(buff);
-	dev_dbg(sharpsl_pm.dev, "Battery Voltage: %d\n", val);
-
-	if (val < sharpsl_pm.machinfo->charge_on_volt)
-		return -1;
-
-	return 0;
-}
-#endif
-
-static int sharpsl_ac_check(void)
-{
-	int temp, i, buff[5];
-
-	for (i=0; i<5; i++) {
-		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_ACIN_VOLT);
-		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_ACIN);
-	}
-
-	temp = get_select_val(buff);
-	dev_dbg(sharpsl_pm.dev, "AC Voltage: %d\n",temp);
-
-	if ((temp > sharpsl_pm.machinfo->charge_acin_high) || (temp < sharpsl_pm.machinfo->charge_acin_low)) {
-		dev_err(sharpsl_pm.dev, "Error: AC check failed.\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	sharpsl_pm.flags |= SHARPSL_SUSPENDED;
-	flush_scheduled_work();
-
-	if (sharpsl_pm.charge_mode == CHRG_ON)
-		sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG;
-	else
-		sharpsl_pm.flags &= ~SHARPSL_DO_OFFLINE_CHRG;
-
-	return 0;
-}
-
-static int sharpsl_pm_resume(struct platform_device *pdev)
-{
-	/* Clear the reset source indicators as they break the bootloader upon reboot */
-	RCSR = 0x0f;
-	sharpsl_average_clear();
-	sharpsl_pm.flags &= ~SHARPSL_APM_QUEUED;
-	sharpsl_pm.flags &= ~SHARPSL_SUSPENDED;
-
-	return 0;
-}
-
-static void corgi_goto_sleep(unsigned long alarm_time, unsigned int alarm_enable, suspend_state_t state)
-{
-	dev_dbg(sharpsl_pm.dev, "Time is: %08x\n",RCNR);
-
-	dev_dbg(sharpsl_pm.dev, "Offline Charge Activate = %d\n",sharpsl_pm.flags & SHARPSL_DO_OFFLINE_CHRG);
-	/* not charging and AC-IN! */
-
-	if ((sharpsl_pm.flags & SHARPSL_DO_OFFLINE_CHRG) && (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN))) {
-		dev_dbg(sharpsl_pm.dev, "Activating Offline Charger...\n");
-		sharpsl_pm.charge_mode = CHRG_OFF;
-		sharpsl_pm.flags &= ~SHARPSL_DO_OFFLINE_CHRG;
-		sharpsl_off_charge_battery();
-	}
-
-	sharpsl_pm.machinfo->presuspend();
-
-	PEDR = 0xffffffff; /* clear it */
-
-	sharpsl_pm.flags &= ~SHARPSL_ALARM_ACTIVE;
-	if ((sharpsl_pm.charge_mode == CHRG_ON) && ((alarm_enable && ((alarm_time - RCNR) > (SHARPSL_BATCHK_TIME_SUSPEND + 30))) || !alarm_enable)) {
-		RTSR &= RTSR_ALE;
-		RTAR = RCNR + SHARPSL_BATCHK_TIME_SUSPEND;
-		dev_dbg(sharpsl_pm.dev, "Charging alarm at: %08x\n",RTAR);
-		sharpsl_pm.flags |= SHARPSL_ALARM_ACTIVE;
-	} else if (alarm_enable) {
-		RTSR &= RTSR_ALE;
-		RTAR = alarm_time;
-		dev_dbg(sharpsl_pm.dev, "User alarm at: %08x\n",RTAR);
-	} else {
-		dev_dbg(sharpsl_pm.dev, "No alarms set.\n");
-	}
-
-	pxa_pm_enter(state);
-
-	sharpsl_pm.machinfo->postsuspend();
-
-	dev_dbg(sharpsl_pm.dev, "Corgi woken up from suspend: %08x\n",PEDR);
-}
-
-static int corgi_enter_suspend(unsigned long alarm_time, unsigned int alarm_enable, suspend_state_t state)
-{
-	if (!sharpsl_pm.machinfo->should_wakeup(!(sharpsl_pm.flags & SHARPSL_ALARM_ACTIVE) && alarm_enable) )
-	{
-		if (!(sharpsl_pm.flags & SHARPSL_ALARM_ACTIVE)) {
-			dev_dbg(sharpsl_pm.dev, "No user triggered wakeup events and not charging. Strange. Suspend.\n");
-			corgi_goto_sleep(alarm_time, alarm_enable, state);
-			return 1;
-		}
-		if(sharpsl_off_charge_battery()) {
-			dev_dbg(sharpsl_pm.dev, "Charging. Suspend...\n");
-			corgi_goto_sleep(alarm_time, alarm_enable, state);
-			return 1;
-		}
-		dev_dbg(sharpsl_pm.dev, "User triggered wakeup in offline charger.\n");
-	}
-
-	if ((!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_LOCK)) || (sharpsl_fatal_check() < 0) )
-	{
-		dev_err(sharpsl_pm.dev, "Fatal condition. Suspend.\n");
-		corgi_goto_sleep(alarm_time, alarm_enable, state);
-		return 1;
-	}
-
-	return 0;
-}
-
-static int corgi_pxa_pm_enter(suspend_state_t state)
-{
-	unsigned long alarm_time = RTAR;
-	unsigned int alarm_status = ((RTSR & RTSR_ALE) != 0);
-
-	dev_dbg(sharpsl_pm.dev, "SharpSL suspending for first time.\n");
-
-	corgi_goto_sleep(alarm_time, alarm_status, state);
-
-	while (corgi_enter_suspend(alarm_time,alarm_status,state))
-		{}
-
-	if (sharpsl_pm.machinfo->earlyresume)
-		sharpsl_pm.machinfo->earlyresume();
-
-	dev_dbg(sharpsl_pm.dev, "SharpSL resuming...\n");
-
-	return 0;
-}
-
-/*
- * Check for fatal battery errors
- * Fatal returns -1
- */
-static int sharpsl_fatal_check(void)
-{
-	int buff[5], temp, i, acin;
-
-	dev_dbg(sharpsl_pm.dev, "sharpsl_fatal_check entered\n");
-
-	/* Check AC-Adapter */
-	acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN);
-
-	if (acin && (sharpsl_pm.charge_mode == CHRG_ON)) {
-		sharpsl_pm.machinfo->charge(0);
-		udelay(100);
-		sharpsl_pm.machinfo->discharge(1);	/* enable discharge */
-		mdelay(SHARPSL_WAIT_DISCHARGE_ON);
-	}
-
-	if (sharpsl_pm.machinfo->discharge1)
-		sharpsl_pm.machinfo->discharge1(1);
-
-	/* Check battery : check inserting battery ? */
-	for (i=0; i<5; i++) {
-		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
-		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT);
-	}
-
-	if (sharpsl_pm.machinfo->discharge1)
-		sharpsl_pm.machinfo->discharge1(0);
-
-	if (acin && (sharpsl_pm.charge_mode == CHRG_ON)) {
-		udelay(100);
-		sharpsl_pm.machinfo->charge(1);
-		sharpsl_pm.machinfo->discharge(0);
-	}
-
-	temp = get_select_val(buff);
-	dev_dbg(sharpsl_pm.dev, "sharpsl_fatal_check: acin: %d, discharge voltage: %d, no discharge: %ld\n", acin, temp, sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT));
-
-	if ((acin && (temp < sharpsl_pm.machinfo->fatal_acin_volt)) ||
-			(!acin && (temp < sharpsl_pm.machinfo->fatal_noacin_volt)))
-		return -1;
-	return 0;
-}
-
-static int sharpsl_off_charge_error(void)
-{
-	dev_err(sharpsl_pm.dev, "Offline Charger: Error occurred.\n");
-	sharpsl_pm.machinfo->charge(0);
-	sharpsl_pm_led(SHARPSL_LED_ERROR);
-	sharpsl_pm.charge_mode = CHRG_ERROR;
-	return 1;
-}
-
-/*
- * Charging Control while suspended
- * Return 1 - go straight to sleep
- * Return 0 - sleep or wakeup depending on other factors
- */
-static int sharpsl_off_charge_battery(void)
-{
-	int time;
-
-	dev_dbg(sharpsl_pm.dev, "Charge Mode: %d\n", sharpsl_pm.charge_mode);
-
-	if (sharpsl_pm.charge_mode == CHRG_OFF) {
-		dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 1\n");
-
-		/* AC Check */
-		if ((sharpsl_ac_check() < 0) || (sharpsl_check_battery_temp() < 0))
-			return sharpsl_off_charge_error();
-
-		/* Start Charging */
-		sharpsl_pm_led(SHARPSL_LED_ON);
-		sharpsl_pm.machinfo->charge(0);
-		mdelay(SHARPSL_CHARGE_WAIT_TIME);
-		sharpsl_pm.machinfo->charge(1);
-
-		sharpsl_pm.charge_mode = CHRG_ON;
-		sharpsl_pm.full_count = 0;
-
-		return 1;
-	} else if (sharpsl_pm.charge_mode != CHRG_ON) {
-		return 1;
-	}
-
-	if (sharpsl_pm.full_count == 0) {
-		int time;
-
-		dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 2\n");
-
-		if ((sharpsl_check_battery_temp() < 0) || (sharpsl_check_battery_voltage() < 0))
-			return sharpsl_off_charge_error();
-
-		sharpsl_pm.machinfo->charge(0);
-		mdelay(SHARPSL_CHARGE_WAIT_TIME);
-		sharpsl_pm.machinfo->charge(1);
-		sharpsl_pm.charge_mode = CHRG_ON;
-
-		mdelay(SHARPSL_CHARGE_CO_CHECK_TIME);
-
-		time = RCNR;
-		while(1) {
-			/* Check if any wakeup event had occurred */
-			if (sharpsl_pm.machinfo->charger_wakeup() != 0)
-				return 0;
-			/* Check for timeout */
-			if ((RCNR - time) > SHARPSL_WAIT_CO_TIME)
-				return 1;
-			if (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_CHRGFULL)) {
-				dev_dbg(sharpsl_pm.dev, "Offline Charger: Charge full occurred. Retrying to check\n");
-	   			sharpsl_pm.full_count++;
-				sharpsl_pm.machinfo->charge(0);
-				mdelay(SHARPSL_CHARGE_WAIT_TIME);
-				sharpsl_pm.machinfo->charge(1);
-				return 1;
-			}
-		}
-	}
-
-	dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 3\n");
-
-	mdelay(SHARPSL_CHARGE_CO_CHECK_TIME);
-
-	time = RCNR;
-	while(1) {
-		/* Check if any wakeup event had occurred */
-		if (sharpsl_pm.machinfo->charger_wakeup() != 0)
-			return 0;
-		/* Check for timeout */
-		if ((RCNR-time) > SHARPSL_WAIT_CO_TIME) {
-			if (sharpsl_pm.full_count > SHARPSL_CHARGE_RETRY_CNT) {
-				dev_dbg(sharpsl_pm.dev, "Offline Charger: Not charged sufficiently. Retrying.\n");
-				sharpsl_pm.full_count = 0;
-			}
-			sharpsl_pm.full_count++;
-			return 1;
-		}
-		if (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_CHRGFULL)) {
-			dev_dbg(sharpsl_pm.dev, "Offline Charger: Charging complete.\n");
-			sharpsl_pm_led(SHARPSL_LED_OFF);
-			sharpsl_pm.machinfo->charge(0);
-			sharpsl_pm.charge_mode = CHRG_DONE;
-			return 1;
-		}
-	}
-}
-#else
-#define sharpsl_pm_suspend	NULL
-#define sharpsl_pm_resume	NULL
-#endif
-
-static ssize_t battery_percentage_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%d\n",sharpsl_pm.battstat.mainbat_percent);
-}
-
-static ssize_t battery_voltage_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%d\n",sharpsl_pm.battstat.mainbat_voltage);
-}
-
-static DEVICE_ATTR(battery_percentage, 0444, battery_percentage_show, NULL);
-static DEVICE_ATTR(battery_voltage, 0444, battery_voltage_show, NULL);
-
-extern void (*apm_get_power_status)(struct apm_power_info *);
-
-static void sharpsl_apm_get_power_status(struct apm_power_info *info)
-{
-	info->ac_line_status = sharpsl_pm.battstat.ac_status;
-
-	if (sharpsl_pm.charge_mode == CHRG_ON)
-		info->battery_status = APM_BATTERY_STATUS_CHARGING;
-	else
-		info->battery_status = sharpsl_pm.battstat.mainbat_status;
-
-	info->battery_flag = (1 << info->battery_status);
-	info->battery_life = sharpsl_pm.battstat.mainbat_percent;
-}
-
-#ifdef CONFIG_PM
-static struct platform_suspend_ops sharpsl_pm_ops = {
-	.enter		= corgi_pxa_pm_enter,
-	.valid		= suspend_valid_only_mem,
-};
-#endif
-
-static int __init sharpsl_pm_probe(struct platform_device *pdev)
-{
-	int ret;
-
-	if (!pdev->dev.platform_data)
-		return -EINVAL;
-
-	sharpsl_pm.dev = &pdev->dev;
-	sharpsl_pm.machinfo = pdev->dev.platform_data;
-	sharpsl_pm.charge_mode = CHRG_OFF;
-	sharpsl_pm.flags = 0;
-
-	init_timer(&sharpsl_pm.ac_timer);
-	sharpsl_pm.ac_timer.function = sharpsl_ac_timer;
-
-	init_timer(&sharpsl_pm.chrg_full_timer);
-	sharpsl_pm.chrg_full_timer.function = sharpsl_chrg_full_timer;
-
-	led_trigger_register_simple("sharpsl-charge", &sharpsl_charge_led_trigger);
-
-	sharpsl_pm.machinfo->init();
-
-	ret = device_create_file(&pdev->dev, &dev_attr_battery_percentage);
-	ret |= device_create_file(&pdev->dev, &dev_attr_battery_voltage);
-	if (ret != 0)
-		dev_warn(&pdev->dev, "Failed to register attributes (%d)\n", ret);
-
-	apm_get_power_status = sharpsl_apm_get_power_status;
-
-#ifdef CONFIG_PM
-	suspend_set_ops(&sharpsl_pm_ops);
-#endif
-
-	mod_timer(&sharpsl_pm.ac_timer, jiffies + msecs_to_jiffies(250));
-
-	return 0;
-}
-
-static int sharpsl_pm_remove(struct platform_device *pdev)
-{
-	suspend_set_ops(NULL);
-
-	device_remove_file(&pdev->dev, &dev_attr_battery_percentage);
-	device_remove_file(&pdev->dev, &dev_attr_battery_voltage);
-
-	led_trigger_unregister_simple(sharpsl_charge_led_trigger);
-
-	sharpsl_pm.machinfo->exit();
-
-	del_timer_sync(&sharpsl_pm.chrg_full_timer);
-	del_timer_sync(&sharpsl_pm.ac_timer);
-
-	return 0;
-}
-
-static struct platform_driver sharpsl_pm_driver = {
-	.probe		= sharpsl_pm_probe,
-	.remove		= sharpsl_pm_remove,
-	.suspend	= sharpsl_pm_suspend,
-	.resume		= sharpsl_pm_resume,
-	.driver		= {
-		.name		= "sharpsl-pm",
-	},
-};
-
-static int __devinit sharpsl_pm_init(void)
-{
-	return platform_driver_register(&sharpsl_pm_driver);
-}
-
-static void sharpsl_pm_exit(void)
-{
- 	platform_driver_unregister(&sharpsl_pm_driver);
-}
-
-late_initcall(sharpsl_pm_init);
-module_exit(sharpsl_pm_exit);
diff --git a/arch/arm/include/asm/hardware/sharpsl_pm.h b/arch/arm/include/asm/hardware/sharpsl_pm.h
deleted file mode 100644
index 2d00db2..0000000
--- a/arch/arm/include/asm/hardware/sharpsl_pm.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * SharpSL Battery/PM Driver
- *
- * Copyright (c) 2004-2005 Richard Purdie
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/interrupt.h>
-
-struct sharpsl_charger_machinfo {
-	void (*init)(void);
-	void (*exit)(void);
-	int gpio_acin;
-	int gpio_batfull;
-	int batfull_irq;
-	int gpio_batlock;
-	int gpio_fatal;
-	void (*discharge)(int);
-	void (*discharge1)(int);
-	void (*charge)(int);
-	void (*measure_temp)(int);
-	void (*presuspend)(void);
-	void (*postsuspend)(void);
-	void (*earlyresume)(void);
-	unsigned long (*read_devdata)(int);
-#define SHARPSL_BATT_VOLT       1
-#define SHARPSL_BATT_TEMP       2
-#define SHARPSL_ACIN_VOLT       3
-#define SHARPSL_STATUS_ACIN     4
-#define SHARPSL_STATUS_LOCK     5
-#define SHARPSL_STATUS_CHRGFULL 6
-#define SHARPSL_STATUS_FATAL    7
-	unsigned long (*charger_wakeup)(void);
-	int (*should_wakeup)(unsigned int resume_on_alarm);
-	void (*backlight_limit)(int);
-	int (*backlight_get_status) (void);
-	int charge_on_volt;
-	int charge_on_temp;
-	int charge_acin_high;
-	int charge_acin_low;
-	int fatal_acin_volt;
-	int fatal_noacin_volt;
-	int bat_levels;
-	struct battery_thresh *bat_levels_noac;
-	struct battery_thresh *bat_levels_acin;
-	struct battery_thresh *bat_levels_noac_bl;
-	struct battery_thresh *bat_levels_acin_bl;
-	int status_high_acin;
-	int status_low_acin;
-	int status_high_noac;
-	int status_low_noac;
-};
-
-struct battery_thresh {
-	int voltage;
-	int percentage;
-};
-
-struct battery_stat {
-	int ac_status;         /* APM AC Present/Not Present */
-	int mainbat_status;    /* APM Main Battery Status */
-	int mainbat_percent;   /* Main Battery Percentage Charge */
-	int mainbat_voltage;   /* Main Battery Voltage */
-};
-
-struct sharpsl_pm_status {
-	struct device *dev;
-	struct timer_list ac_timer;
-	struct timer_list chrg_full_timer;
-
-	int charge_mode;
-#define CHRG_ERROR    (-1)
-#define CHRG_OFF      (0)
-#define CHRG_ON       (1)
-#define CHRG_DONE     (2)
-
-	unsigned int flags;
-#define SHARPSL_SUSPENDED       (1 << 0)  /* Device is Suspended */
-#define SHARPSL_ALARM_ACTIVE    (1 << 1)  /* Alarm is for charging event (not user) */
-#define SHARPSL_BL_LIMIT        (1 << 2)  /* Backlight Intensity Limited */
-#define SHARPSL_APM_QUEUED      (1 << 3)  /* APM Event Queued */
-#define SHARPSL_DO_OFFLINE_CHRG (1 << 4)  /* Trigger the offline charger */
-
-	int full_count;
-	unsigned long charge_start_time;
-	struct sharpsl_charger_machinfo *machinfo;
-	struct battery_stat battstat;
-};
-
-extern struct sharpsl_pm_status sharpsl_pm;
-
-
-#define SHARPSL_LED_ERROR  2
-#define SHARPSL_LED_ON     1
-#define SHARPSL_LED_OFF    0
-
-void sharpsl_battery_kick(void);
-void sharpsl_pm_led(int val);
-irqreturn_t sharpsl_ac_isr(int irq, void *dev_id);
-irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id);
-irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id);
-
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 2e45630..f4533f8 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -94,6 +94,10 @@ config PXA_SHARPSL
 	  SL-C3000 (Spitz), SL-C3100 (Borzoi) or SL-C6000x (Tosa)
 	  handheld computer.
 
+config SHARPSL_PM
+	bool
+	select APM_EMULATION
+
 config CORGI_SSP_DEPRECATED
 	bool
 	select PXA_SSP
diff --git a/arch/arm/mach-pxa/include/mach/sharpsl_pm.h b/arch/arm/mach-pxa/include/mach/sharpsl_pm.h
new file mode 100644
index 0000000..2d00db2
--- /dev/null
+++ b/arch/arm/mach-pxa/include/mach/sharpsl_pm.h
@@ -0,0 +1,106 @@
+/*
+ * SharpSL Battery/PM Driver
+ *
+ * Copyright (c) 2004-2005 Richard Purdie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/interrupt.h>
+
+struct sharpsl_charger_machinfo {
+	void (*init)(void);
+	void (*exit)(void);
+	int gpio_acin;
+	int gpio_batfull;
+	int batfull_irq;
+	int gpio_batlock;
+	int gpio_fatal;
+	void (*discharge)(int);
+	void (*discharge1)(int);
+	void (*charge)(int);
+	void (*measure_temp)(int);
+	void (*presuspend)(void);
+	void (*postsuspend)(void);
+	void (*earlyresume)(void);
+	unsigned long (*read_devdata)(int);
+#define SHARPSL_BATT_VOLT       1
+#define SHARPSL_BATT_TEMP       2
+#define SHARPSL_ACIN_VOLT       3
+#define SHARPSL_STATUS_ACIN     4
+#define SHARPSL_STATUS_LOCK     5
+#define SHARPSL_STATUS_CHRGFULL 6
+#define SHARPSL_STATUS_FATAL    7
+	unsigned long (*charger_wakeup)(void);
+	int (*should_wakeup)(unsigned int resume_on_alarm);
+	void (*backlight_limit)(int);
+	int (*backlight_get_status) (void);
+	int charge_on_volt;
+	int charge_on_temp;
+	int charge_acin_high;
+	int charge_acin_low;
+	int fatal_acin_volt;
+	int fatal_noacin_volt;
+	int bat_levels;
+	struct battery_thresh *bat_levels_noac;
+	struct battery_thresh *bat_levels_acin;
+	struct battery_thresh *bat_levels_noac_bl;
+	struct battery_thresh *bat_levels_acin_bl;
+	int status_high_acin;
+	int status_low_acin;
+	int status_high_noac;
+	int status_low_noac;
+};
+
+struct battery_thresh {
+	int voltage;
+	int percentage;
+};
+
+struct battery_stat {
+	int ac_status;         /* APM AC Present/Not Present */
+	int mainbat_status;    /* APM Main Battery Status */
+	int mainbat_percent;   /* Main Battery Percentage Charge */
+	int mainbat_voltage;   /* Main Battery Voltage */
+};
+
+struct sharpsl_pm_status {
+	struct device *dev;
+	struct timer_list ac_timer;
+	struct timer_list chrg_full_timer;
+
+	int charge_mode;
+#define CHRG_ERROR    (-1)
+#define CHRG_OFF      (0)
+#define CHRG_ON       (1)
+#define CHRG_DONE     (2)
+
+	unsigned int flags;
+#define SHARPSL_SUSPENDED       (1 << 0)  /* Device is Suspended */
+#define SHARPSL_ALARM_ACTIVE    (1 << 1)  /* Alarm is for charging event (not user) */
+#define SHARPSL_BL_LIMIT        (1 << 2)  /* Backlight Intensity Limited */
+#define SHARPSL_APM_QUEUED      (1 << 3)  /* APM Event Queued */
+#define SHARPSL_DO_OFFLINE_CHRG (1 << 4)  /* Trigger the offline charger */
+
+	int full_count;
+	unsigned long charge_start_time;
+	struct sharpsl_charger_machinfo *machinfo;
+	struct battery_stat battstat;
+};
+
+extern struct sharpsl_pm_status sharpsl_pm;
+
+
+#define SHARPSL_LED_ERROR  2
+#define SHARPSL_LED_ON     1
+#define SHARPSL_LED_OFF    0
+
+void sharpsl_battery_kick(void);
+void sharpsl_pm_led(int val);
+irqreturn_t sharpsl_ac_isr(int irq, void *dev_id);
+irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id);
+irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id);
+
diff --git a/arch/arm/mach-pxa/sharpsl.h b/arch/arm/mach-pxa/sharpsl.h
index 047909a..f1cd00e 100644
--- a/arch/arm/mach-pxa/sharpsl.h
+++ b/arch/arm/mach-pxa/sharpsl.h
@@ -7,7 +7,7 @@
  *
  */
 
-#include <asm/hardware/sharpsl_pm.h>
+#include <mach/sharpsl_pm.h>
 
 /*
  * SharpSL SSP Driver
diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index 16b4ec6..9d61c49 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -21,13 +21,63 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/apm-emulation.h>
+#include <linux/timer.h>
+#include <linux/apm_bios.h>
+#include <linux/delay.h>
+#include <linux/leds.h>
+#include <linux/suspend.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
 #include <mach/pm.h>
+#include <mach/pxa2xx-regs.h>
 #include <mach/pxa2xx-gpio.h>
+#include <mach/regs-rtc.h>
 #include <mach/sharpsl.h>
-#include "sharpsl.h"
+#include <mach/sharpsl_pm.h>
+
+/*
+ * Constants
+ */
+#define SHARPSL_CHARGE_ON_TIME_INTERVAL        (msecs_to_jiffies(1*60*1000))  /* 1 min */
+#define SHARPSL_CHARGE_FINISH_TIME             (msecs_to_jiffies(10*60*1000)) /* 10 min */
+#define SHARPSL_BATCHK_TIME                    (msecs_to_jiffies(15*1000))    /* 15 sec */
+#define SHARPSL_BATCHK_TIME_SUSPEND            (60*10)                        /* 10 min */
+
+#define SHARPSL_WAIT_CO_TIME                   15  /* 15 sec */
+#define SHARPSL_WAIT_DISCHARGE_ON              100 /* 100 msec */
+#define SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP   10  /* 10 msec */
+#define SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT   10  /* 10 msec */
+#define SHARPSL_CHECK_BATTERY_WAIT_TIME_ACIN   10  /* 10 msec */
+#define SHARPSL_CHARGE_WAIT_TIME               15  /* 15 msec */
+#define SHARPSL_CHARGE_CO_CHECK_TIME           5   /* 5 msec */
+#define SHARPSL_CHARGE_RETRY_CNT               1   /* eqv. 10 min */
+
+/*
+ * Prototypes
+ */
+#ifdef CONFIG_PM
+static int sharpsl_off_charge_battery(void);
+static int sharpsl_check_battery_voltage(void);
+static int sharpsl_fatal_check(void);
+#endif
+static int sharpsl_check_battery_temp(void);
+static int sharpsl_ac_check(void);
+static int sharpsl_average_value(int ad);
+static void sharpsl_average_clear(void);
+static void sharpsl_charge_toggle(struct work_struct *private_);
+static void sharpsl_battery_thread(struct work_struct *private_);
+
+
+/*
+ * Variables
+ */
+struct sharpsl_pm_status sharpsl_pm;
+DECLARE_DELAYED_WORK(toggle_charger, sharpsl_charge_toggle);
+DECLARE_DELAYED_WORK(sharpsl_bat, sharpsl_battery_thread);
+DEFINE_LED_TRIGGER(sharpsl_charge_led_trigger);
+
+
 
 struct battery_thresh spitz_battery_levels_acin[] = {
 	{ 213, 100},
@@ -189,3 +239,785 @@ void sharpsl_pm_pxa_remove(void)
 	if (sharpsl_pm.machinfo->batfull_irq)
 		free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr);
 }
+
+static int get_percentage(int voltage)
+{
+	int i = sharpsl_pm.machinfo->bat_levels - 1;
+	int bl_status = sharpsl_pm.machinfo->backlight_get_status ? sharpsl_pm.machinfo->backlight_get_status() : 0;
+	struct battery_thresh *thresh;
+
+	if (sharpsl_pm.charge_mode == CHRG_ON)
+		thresh = bl_status ? sharpsl_pm.machinfo->bat_levels_acin_bl : sharpsl_pm.machinfo->bat_levels_acin;
+	else
+		thresh = bl_status ? sharpsl_pm.machinfo->bat_levels_noac_bl : sharpsl_pm.machinfo->bat_levels_noac;
+
+	while (i > 0 && (voltage > thresh[i].voltage))
+		i--;
+
+	return thresh[i].percentage;
+}
+
+static int get_apm_status(int voltage)
+{
+	int low_thresh, high_thresh;
+
+	if (sharpsl_pm.charge_mode == CHRG_ON) {
+		high_thresh = sharpsl_pm.machinfo->status_high_acin;
+		low_thresh = sharpsl_pm.machinfo->status_low_acin;
+	} else {
+		high_thresh = sharpsl_pm.machinfo->status_high_noac;
+		low_thresh = sharpsl_pm.machinfo->status_low_noac;
+	}
+
+	if (voltage >= high_thresh)
+		return APM_BATTERY_STATUS_HIGH;
+	if (voltage >= low_thresh)
+		return APM_BATTERY_STATUS_LOW;
+	return APM_BATTERY_STATUS_CRITICAL;
+}
+
+void sharpsl_battery_kick(void)
+{
+	schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(125));
+}
+EXPORT_SYMBOL(sharpsl_battery_kick);
+
+
+static void sharpsl_battery_thread(struct work_struct *private_)
+{
+	int voltage, percent, apm_status, i = 0;
+
+	if (!sharpsl_pm.machinfo)
+		return;
+
+	sharpsl_pm.battstat.ac_status = (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN) ? APM_AC_ONLINE : APM_AC_OFFLINE);
+
+	/* Corgi cannot confirm when battery fully charged so periodically kick! */
+	if (!sharpsl_pm.machinfo->batfull_irq && (sharpsl_pm.charge_mode == CHRG_ON)
+			&& time_after(jiffies, sharpsl_pm.charge_start_time +  SHARPSL_CHARGE_ON_TIME_INTERVAL))
+		schedule_delayed_work(&toggle_charger, 0);
+
+	while(1) {
+		voltage = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
+
+		if (voltage > 0) break;
+		if (i++ > 5) {
+			voltage = sharpsl_pm.machinfo->bat_levels_noac[0].voltage;
+			dev_warn(sharpsl_pm.dev, "Warning: Cannot read main battery!\n");
+			break;
+		}
+	}
+
+	voltage = sharpsl_average_value(voltage);
+	apm_status = get_apm_status(voltage);
+	percent = get_percentage(voltage);
+
+	/* At low battery voltages, the voltage has a tendency to start
+           creeping back up so we try to avoid this here */
+	if ((sharpsl_pm.battstat.ac_status == APM_AC_ONLINE) || (apm_status == APM_BATTERY_STATUS_HIGH) ||  percent <= sharpsl_pm.battstat.mainbat_percent) {
+		sharpsl_pm.battstat.mainbat_voltage = voltage;
+		sharpsl_pm.battstat.mainbat_status = apm_status;
+		sharpsl_pm.battstat.mainbat_percent = percent;
+	}
+
+	dev_dbg(sharpsl_pm.dev, "Battery: voltage: %d, status: %d, percentage: %d, time: %ld\n", voltage,
+			sharpsl_pm.battstat.mainbat_status, sharpsl_pm.battstat.mainbat_percent, jiffies);
+
+#ifdef CONFIG_BACKLIGHT_CORGI
+	/* If battery is low. limit backlight intensity to save power. */
+	if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE)
+			&& ((sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_LOW) ||
+			(sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_CRITICAL))) {
+		if (!(sharpsl_pm.flags & SHARPSL_BL_LIMIT)) {
+			sharpsl_pm.machinfo->backlight_limit(1);
+			sharpsl_pm.flags |= SHARPSL_BL_LIMIT;
+		}
+	} else if (sharpsl_pm.flags & SHARPSL_BL_LIMIT) {
+		sharpsl_pm.machinfo->backlight_limit(0);
+		sharpsl_pm.flags &= ~SHARPSL_BL_LIMIT;
+	}
+#endif
+
+	/* Suspend if critical battery level */
+	if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE)
+			&& (sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_CRITICAL)
+			&& !(sharpsl_pm.flags & SHARPSL_APM_QUEUED)) {
+		sharpsl_pm.flags |= SHARPSL_APM_QUEUED;
+		dev_err(sharpsl_pm.dev, "Fatal Off\n");
+		apm_queue_event(APM_CRITICAL_SUSPEND);
+	}
+
+	schedule_delayed_work(&sharpsl_bat, SHARPSL_BATCHK_TIME);
+}
+
+void sharpsl_pm_led(int val)
+{
+	if (val == SHARPSL_LED_ERROR) {
+		dev_err(sharpsl_pm.dev, "Charging Error!\n");
+	} else if (val == SHARPSL_LED_ON) {
+		dev_dbg(sharpsl_pm.dev, "Charge LED On\n");
+		led_trigger_event(sharpsl_charge_led_trigger, LED_FULL);
+	} else {
+		dev_dbg(sharpsl_pm.dev, "Charge LED Off\n");
+		led_trigger_event(sharpsl_charge_led_trigger, LED_OFF);
+	}
+}
+
+static void sharpsl_charge_on(void)
+{
+	dev_dbg(sharpsl_pm.dev, "Turning Charger On\n");
+
+	sharpsl_pm.full_count = 0;
+	sharpsl_pm.charge_mode = CHRG_ON;
+	schedule_delayed_work(&toggle_charger, msecs_to_jiffies(250));
+	schedule_delayed_work(&sharpsl_bat, msecs_to_jiffies(500));
+}
+
+static void sharpsl_charge_off(void)
+{
+	dev_dbg(sharpsl_pm.dev, "Turning Charger Off\n");
+
+	sharpsl_pm.machinfo->charge(0);
+	sharpsl_pm_led(SHARPSL_LED_OFF);
+	sharpsl_pm.charge_mode = CHRG_OFF;
+
+	schedule_delayed_work(&sharpsl_bat, 0);
+}
+
+static void sharpsl_charge_error(void)
+{
+	sharpsl_pm_led(SHARPSL_LED_ERROR);
+	sharpsl_pm.machinfo->charge(0);
+	sharpsl_pm.charge_mode = CHRG_ERROR;
+}
+
+static void sharpsl_charge_toggle(struct work_struct *private_)
+{
+	dev_dbg(sharpsl_pm.dev, "Toogling Charger at time: %lx\n", jiffies);
+
+	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN)) {
+		sharpsl_charge_off();
+		return;
+	} else if ((sharpsl_check_battery_temp() < 0) || (sharpsl_ac_check() < 0)) {
+		sharpsl_charge_error();
+		return;
+	}
+
+	sharpsl_pm_led(SHARPSL_LED_ON);
+	sharpsl_pm.machinfo->charge(0);
+	mdelay(SHARPSL_CHARGE_WAIT_TIME);
+	sharpsl_pm.machinfo->charge(1);
+
+	sharpsl_pm.charge_start_time = jiffies;
+}
+
+static void sharpsl_ac_timer(unsigned long data)
+{
+	int acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN);
+
+	dev_dbg(sharpsl_pm.dev, "AC Status: %d\n",acin);
+
+	sharpsl_average_clear();
+	if (acin && (sharpsl_pm.charge_mode != CHRG_ON))
+		sharpsl_charge_on();
+	else if (sharpsl_pm.charge_mode == CHRG_ON)
+		sharpsl_charge_off();
+
+	schedule_delayed_work(&sharpsl_bat, 0);
+}
+
+
+irqreturn_t sharpsl_ac_isr(int irq, void *dev_id)
+{
+	/* Delay the event slightly to debounce */
+	/* Must be a smaller delay than the chrg_full_isr below */
+	mod_timer(&sharpsl_pm.ac_timer, jiffies + msecs_to_jiffies(250));
+
+	return IRQ_HANDLED;
+}
+
+static void sharpsl_chrg_full_timer(unsigned long data)
+{
+	dev_dbg(sharpsl_pm.dev, "Charge Full at time: %lx\n", jiffies);
+
+	sharpsl_pm.full_count++;
+
+	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN)) {
+		dev_dbg(sharpsl_pm.dev, "Charge Full: AC removed - stop charging!\n");
+		if (sharpsl_pm.charge_mode == CHRG_ON)
+			sharpsl_charge_off();
+	} else if (sharpsl_pm.full_count < 2) {
+		dev_dbg(sharpsl_pm.dev, "Charge Full: Count too low\n");
+		schedule_delayed_work(&toggle_charger, 0);
+	} else if (time_after(jiffies, sharpsl_pm.charge_start_time + SHARPSL_CHARGE_FINISH_TIME)) {
+		dev_dbg(sharpsl_pm.dev, "Charge Full: Interrupt generated too slowly - retry.\n");
+		schedule_delayed_work(&toggle_charger, 0);
+	} else {
+		sharpsl_charge_off();
+		sharpsl_pm.charge_mode = CHRG_DONE;
+		dev_dbg(sharpsl_pm.dev, "Charge Full: Charging Finished\n");
+	}
+}
+
+/* Charging Finished Interrupt (Not present on Corgi) */
+/* Can trigger at the same time as an AC status change so
+   delay until after that has been processed */
+irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id)
+{
+	if (sharpsl_pm.flags & SHARPSL_SUSPENDED)
+		return IRQ_HANDLED;
+
+	/* delay until after any ac interrupt */
+	mod_timer(&sharpsl_pm.chrg_full_timer, jiffies + msecs_to_jiffies(500));
+
+	return IRQ_HANDLED;
+}
+
+irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id)
+{
+	int is_fatal = 0;
+
+	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_LOCK)) {
+		dev_err(sharpsl_pm.dev, "Battery now Unlocked! Suspending.\n");
+		is_fatal = 1;
+	}
+
+	if (!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_FATAL)) {
+		dev_err(sharpsl_pm.dev, "Fatal Batt Error! Suspending.\n");
+		is_fatal = 1;
+	}
+
+	if (!(sharpsl_pm.flags & SHARPSL_APM_QUEUED) && is_fatal) {
+		sharpsl_pm.flags |= SHARPSL_APM_QUEUED;
+		apm_queue_event(APM_CRITICAL_SUSPEND);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Maintain an average of the last 10 readings
+ */
+#define SHARPSL_CNV_VALUE_NUM    10
+static int sharpsl_ad_index;
+
+static void sharpsl_average_clear(void)
+{
+	sharpsl_ad_index = 0;
+}
+
+static int sharpsl_average_value(int ad)
+{
+	int i, ad_val = 0;
+	static int sharpsl_ad[SHARPSL_CNV_VALUE_NUM+1];
+
+	if (sharpsl_pm.battstat.mainbat_status != APM_BATTERY_STATUS_HIGH) {
+		sharpsl_ad_index = 0;
+		return ad;
+	}
+
+	sharpsl_ad[sharpsl_ad_index] = ad;
+	sharpsl_ad_index++;
+	if (sharpsl_ad_index >= SHARPSL_CNV_VALUE_NUM) {
+		for (i=0; i < (SHARPSL_CNV_VALUE_NUM-1); i++)
+			sharpsl_ad[i] = sharpsl_ad[i+1];
+		sharpsl_ad_index = SHARPSL_CNV_VALUE_NUM - 1;
+	}
+	for (i=0; i < sharpsl_ad_index; i++)
+		ad_val += sharpsl_ad[i];
+
+	return (ad_val / sharpsl_ad_index);
+}
+
+/*
+ * Take an array of 5 integers, remove the maximum and minimum values
+ * and return the average.
+ */
+static int get_select_val(int *val)
+{
+	int i, j, k, temp, sum = 0;
+
+	/* Find MAX val */
+	temp = val[0];
+	j=0;
+	for (i=1; i<5; i++) {
+		if (temp < val[i]) {
+			temp = val[i];
+			j = i;
+		}
+	}
+
+	/* Find MIN val */
+	temp = val[4];
+	k=4;
+	for (i=3; i>=0; i--) {
+		if (temp > val[i]) {
+			temp = val[i];
+			k = i;
+		}
+	}
+
+	for (i=0; i<5; i++)
+		if (i != j && i != k )
+			sum += val[i];
+
+	dev_dbg(sharpsl_pm.dev, "Average: %d from values: %d, %d, %d, %d, %d\n", sum/3, val[0], val[1], val[2], val[3], val[4]);
+
+	return (sum/3);
+}
+
+static int sharpsl_check_battery_temp(void)
+{
+	int val, i, buff[5];
+
+	/* Check battery temperature */
+	for (i=0; i<5; i++) {
+		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP);
+		sharpsl_pm.machinfo->measure_temp(1);
+		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_TEMP);
+		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_TEMP);
+		sharpsl_pm.machinfo->measure_temp(0);
+	}
+
+	val = get_select_val(buff);
+
+	dev_dbg(sharpsl_pm.dev, "Temperature: %d\n", val);
+	if (val > sharpsl_pm.machinfo->charge_on_temp) {
+		printk(KERN_WARNING "Not charging: temperature out of limits.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int sharpsl_check_battery_voltage(void)
+{
+	int val, i, buff[5];
+
+	/* disable charge, enable discharge */
+	sharpsl_pm.machinfo->charge(0);
+	sharpsl_pm.machinfo->discharge(1);
+	mdelay(SHARPSL_WAIT_DISCHARGE_ON);
+
+	if (sharpsl_pm.machinfo->discharge1)
+		sharpsl_pm.machinfo->discharge1(1);
+
+	/* Check battery voltage */
+	for (i=0; i<5; i++) {
+		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
+		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT);
+	}
+
+	if (sharpsl_pm.machinfo->discharge1)
+		sharpsl_pm.machinfo->discharge1(0);
+
+	sharpsl_pm.machinfo->discharge(0);
+
+	val = get_select_val(buff);
+	dev_dbg(sharpsl_pm.dev, "Battery Voltage: %d\n", val);
+
+	if (val < sharpsl_pm.machinfo->charge_on_volt)
+		return -1;
+
+	return 0;
+}
+#endif
+
+static int sharpsl_ac_check(void)
+{
+	int temp, i, buff[5];
+
+	for (i=0; i<5; i++) {
+		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_ACIN_VOLT);
+		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_ACIN);
+	}
+
+	temp = get_select_val(buff);
+	dev_dbg(sharpsl_pm.dev, "AC Voltage: %d\n",temp);
+
+	if ((temp > sharpsl_pm.machinfo->charge_acin_high) || (temp < sharpsl_pm.machinfo->charge_acin_low)) {
+		dev_err(sharpsl_pm.dev, "Error: AC check failed.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	sharpsl_pm.flags |= SHARPSL_SUSPENDED;
+	flush_scheduled_work();
+
+	if (sharpsl_pm.charge_mode == CHRG_ON)
+		sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG;
+	else
+		sharpsl_pm.flags &= ~SHARPSL_DO_OFFLINE_CHRG;
+
+	return 0;
+}
+
+static int sharpsl_pm_resume(struct platform_device *pdev)
+{
+	/* Clear the reset source indicators as they break the bootloader upon reboot */
+	RCSR = 0x0f;
+	sharpsl_average_clear();
+	sharpsl_pm.flags &= ~SHARPSL_APM_QUEUED;
+	sharpsl_pm.flags &= ~SHARPSL_SUSPENDED;
+
+	return 0;
+}
+
+static void corgi_goto_sleep(unsigned long alarm_time, unsigned int alarm_enable, suspend_state_t state)
+{
+	dev_dbg(sharpsl_pm.dev, "Time is: %08x\n",RCNR);
+
+	dev_dbg(sharpsl_pm.dev, "Offline Charge Activate = %d\n",sharpsl_pm.flags & SHARPSL_DO_OFFLINE_CHRG);
+	/* not charging and AC-IN! */
+
+	if ((sharpsl_pm.flags & SHARPSL_DO_OFFLINE_CHRG) && (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN))) {
+		dev_dbg(sharpsl_pm.dev, "Activating Offline Charger...\n");
+		sharpsl_pm.charge_mode = CHRG_OFF;
+		sharpsl_pm.flags &= ~SHARPSL_DO_OFFLINE_CHRG;
+		sharpsl_off_charge_battery();
+	}
+
+	sharpsl_pm.machinfo->presuspend();
+
+	PEDR = 0xffffffff; /* clear it */
+
+	sharpsl_pm.flags &= ~SHARPSL_ALARM_ACTIVE;
+	if ((sharpsl_pm.charge_mode == CHRG_ON) && ((alarm_enable && ((alarm_time - RCNR) > (SHARPSL_BATCHK_TIME_SUSPEND + 30))) || !alarm_enable)) {
+		RTSR &= RTSR_ALE;
+		RTAR = RCNR + SHARPSL_BATCHK_TIME_SUSPEND;
+		dev_dbg(sharpsl_pm.dev, "Charging alarm at: %08x\n",RTAR);
+		sharpsl_pm.flags |= SHARPSL_ALARM_ACTIVE;
+	} else if (alarm_enable) {
+		RTSR &= RTSR_ALE;
+		RTAR = alarm_time;
+		dev_dbg(sharpsl_pm.dev, "User alarm at: %08x\n",RTAR);
+	} else {
+		dev_dbg(sharpsl_pm.dev, "No alarms set.\n");
+	}
+
+	pxa_pm_enter(state);
+
+	sharpsl_pm.machinfo->postsuspend();
+
+	dev_dbg(sharpsl_pm.dev, "Corgi woken up from suspend: %08x\n",PEDR);
+}
+
+static int corgi_enter_suspend(unsigned long alarm_time, unsigned int alarm_enable, suspend_state_t state)
+{
+	if (!sharpsl_pm.machinfo->should_wakeup(!(sharpsl_pm.flags & SHARPSL_ALARM_ACTIVE) && alarm_enable) )
+	{
+		if (!(sharpsl_pm.flags & SHARPSL_ALARM_ACTIVE)) {
+			dev_dbg(sharpsl_pm.dev, "No user triggered wakeup events and not charging. Strange. Suspend.\n");
+			corgi_goto_sleep(alarm_time, alarm_enable, state);
+			return 1;
+		}
+		if(sharpsl_off_charge_battery()) {
+			dev_dbg(sharpsl_pm.dev, "Charging. Suspend...\n");
+			corgi_goto_sleep(alarm_time, alarm_enable, state);
+			return 1;
+		}
+		dev_dbg(sharpsl_pm.dev, "User triggered wakeup in offline charger.\n");
+	}
+
+	if ((!sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_LOCK)) || (sharpsl_fatal_check() < 0) )
+	{
+		dev_err(sharpsl_pm.dev, "Fatal condition. Suspend.\n");
+		corgi_goto_sleep(alarm_time, alarm_enable, state);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int corgi_pxa_pm_enter(suspend_state_t state)
+{
+	unsigned long alarm_time = RTAR;
+	unsigned int alarm_status = ((RTSR & RTSR_ALE) != 0);
+
+	dev_dbg(sharpsl_pm.dev, "SharpSL suspending for first time.\n");
+
+	corgi_goto_sleep(alarm_time, alarm_status, state);
+
+	while (corgi_enter_suspend(alarm_time,alarm_status,state))
+		{}
+
+	if (sharpsl_pm.machinfo->earlyresume)
+		sharpsl_pm.machinfo->earlyresume();
+
+	dev_dbg(sharpsl_pm.dev, "SharpSL resuming...\n");
+
+	return 0;
+}
+
+/*
+ * Check for fatal battery errors
+ * Fatal returns -1
+ */
+static int sharpsl_fatal_check(void)
+{
+	int buff[5], temp, i, acin;
+
+	dev_dbg(sharpsl_pm.dev, "sharpsl_fatal_check entered\n");
+
+	/* Check AC-Adapter */
+	acin = sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_ACIN);
+
+	if (acin && (sharpsl_pm.charge_mode == CHRG_ON)) {
+		sharpsl_pm.machinfo->charge(0);
+		udelay(100);
+		sharpsl_pm.machinfo->discharge(1);	/* enable discharge */
+		mdelay(SHARPSL_WAIT_DISCHARGE_ON);
+	}
+
+	if (sharpsl_pm.machinfo->discharge1)
+		sharpsl_pm.machinfo->discharge1(1);
+
+	/* Check battery : check inserting battery ? */
+	for (i=0; i<5; i++) {
+		buff[i] = sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT);
+		mdelay(SHARPSL_CHECK_BATTERY_WAIT_TIME_VOLT);
+	}
+
+	if (sharpsl_pm.machinfo->discharge1)
+		sharpsl_pm.machinfo->discharge1(0);
+
+	if (acin && (sharpsl_pm.charge_mode == CHRG_ON)) {
+		udelay(100);
+		sharpsl_pm.machinfo->charge(1);
+		sharpsl_pm.machinfo->discharge(0);
+	}
+
+	temp = get_select_val(buff);
+	dev_dbg(sharpsl_pm.dev, "sharpsl_fatal_check: acin: %d, discharge voltage: %d, no discharge: %ld\n", acin, temp, sharpsl_pm.machinfo->read_devdata(SHARPSL_BATT_VOLT));
+
+	if ((acin && (temp < sharpsl_pm.machinfo->fatal_acin_volt)) ||
+			(!acin && (temp < sharpsl_pm.machinfo->fatal_noacin_volt)))
+		return -1;
+	return 0;
+}
+
+static int sharpsl_off_charge_error(void)
+{
+	dev_err(sharpsl_pm.dev, "Offline Charger: Error occurred.\n");
+	sharpsl_pm.machinfo->charge(0);
+	sharpsl_pm_led(SHARPSL_LED_ERROR);
+	sharpsl_pm.charge_mode = CHRG_ERROR;
+	return 1;
+}
+
+/*
+ * Charging Control while suspended
+ * Return 1 - go straight to sleep
+ * Return 0 - sleep or wakeup depending on other factors
+ */
+static int sharpsl_off_charge_battery(void)
+{
+	int time;
+
+	dev_dbg(sharpsl_pm.dev, "Charge Mode: %d\n", sharpsl_pm.charge_mode);
+
+	if (sharpsl_pm.charge_mode == CHRG_OFF) {
+		dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 1\n");
+
+		/* AC Check */
+		if ((sharpsl_ac_check() < 0) || (sharpsl_check_battery_temp() < 0))
+			return sharpsl_off_charge_error();
+
+		/* Start Charging */
+		sharpsl_pm_led(SHARPSL_LED_ON);
+		sharpsl_pm.machinfo->charge(0);
+		mdelay(SHARPSL_CHARGE_WAIT_TIME);
+		sharpsl_pm.machinfo->charge(1);
+
+		sharpsl_pm.charge_mode = CHRG_ON;
+		sharpsl_pm.full_count = 0;
+
+		return 1;
+	} else if (sharpsl_pm.charge_mode != CHRG_ON) {
+		return 1;
+	}
+
+	if (sharpsl_pm.full_count == 0) {
+		int time;
+
+		dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 2\n");
+
+		if ((sharpsl_check_battery_temp() < 0) || (sharpsl_check_battery_voltage() < 0))
+			return sharpsl_off_charge_error();
+
+		sharpsl_pm.machinfo->charge(0);
+		mdelay(SHARPSL_CHARGE_WAIT_TIME);
+		sharpsl_pm.machinfo->charge(1);
+		sharpsl_pm.charge_mode = CHRG_ON;
+
+		mdelay(SHARPSL_CHARGE_CO_CHECK_TIME);
+
+		time = RCNR;
+		while(1) {
+			/* Check if any wakeup event had occurred */
+			if (sharpsl_pm.machinfo->charger_wakeup() != 0)
+				return 0;
+			/* Check for timeout */
+			if ((RCNR - time) > SHARPSL_WAIT_CO_TIME)
+				return 1;
+			if (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_CHRGFULL)) {
+				dev_dbg(sharpsl_pm.dev, "Offline Charger: Charge full occurred. Retrying to check\n");
+				sharpsl_pm.full_count++;
+				sharpsl_pm.machinfo->charge(0);
+				mdelay(SHARPSL_CHARGE_WAIT_TIME);
+				sharpsl_pm.machinfo->charge(1);
+				return 1;
+			}
+		}
+	}
+
+	dev_dbg(sharpsl_pm.dev, "Offline Charger: Step 3\n");
+
+	mdelay(SHARPSL_CHARGE_CO_CHECK_TIME);
+
+	time = RCNR;
+	while(1) {
+		/* Check if any wakeup event had occurred */
+		if (sharpsl_pm.machinfo->charger_wakeup() != 0)
+			return 0;
+		/* Check for timeout */
+		if ((RCNR-time) > SHARPSL_WAIT_CO_TIME) {
+			if (sharpsl_pm.full_count > SHARPSL_CHARGE_RETRY_CNT) {
+				dev_dbg(sharpsl_pm.dev, "Offline Charger: Not charged sufficiently. Retrying.\n");
+				sharpsl_pm.full_count = 0;
+			}
+			sharpsl_pm.full_count++;
+			return 1;
+		}
+		if (sharpsl_pm.machinfo->read_devdata(SHARPSL_STATUS_CHRGFULL)) {
+			dev_dbg(sharpsl_pm.dev, "Offline Charger: Charging complete.\n");
+			sharpsl_pm_led(SHARPSL_LED_OFF);
+			sharpsl_pm.machinfo->charge(0);
+			sharpsl_pm.charge_mode = CHRG_DONE;
+			return 1;
+		}
+	}
+}
+#else
+#define sharpsl_pm_suspend	NULL
+#define sharpsl_pm_resume	NULL
+#endif
+
+static ssize_t battery_percentage_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n",sharpsl_pm.battstat.mainbat_percent);
+}
+
+static ssize_t battery_voltage_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n",sharpsl_pm.battstat.mainbat_voltage);
+}
+
+static DEVICE_ATTR(battery_percentage, 0444, battery_percentage_show, NULL);
+static DEVICE_ATTR(battery_voltage, 0444, battery_voltage_show, NULL);
+
+extern void (*apm_get_power_status)(struct apm_power_info *);
+
+static void sharpsl_apm_get_power_status(struct apm_power_info *info)
+{
+	info->ac_line_status = sharpsl_pm.battstat.ac_status;
+
+	if (sharpsl_pm.charge_mode == CHRG_ON)
+		info->battery_status = APM_BATTERY_STATUS_CHARGING;
+	else
+		info->battery_status = sharpsl_pm.battstat.mainbat_status;
+
+	info->battery_flag = (1 << info->battery_status);
+	info->battery_life = sharpsl_pm.battstat.mainbat_percent;
+}
+
+#ifdef CONFIG_PM
+static struct platform_suspend_ops sharpsl_pm_ops = {
+	.enter		= corgi_pxa_pm_enter,
+	.valid		= suspend_valid_only_mem,
+};
+#endif
+
+static int __init sharpsl_pm_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	if (!pdev->dev.platform_data)
+		return -EINVAL;
+
+	sharpsl_pm.dev = &pdev->dev;
+	sharpsl_pm.machinfo = pdev->dev.platform_data;
+	sharpsl_pm.charge_mode = CHRG_OFF;
+	sharpsl_pm.flags = 0;
+
+	init_timer(&sharpsl_pm.ac_timer);
+	sharpsl_pm.ac_timer.function = sharpsl_ac_timer;
+
+	init_timer(&sharpsl_pm.chrg_full_timer);
+	sharpsl_pm.chrg_full_timer.function = sharpsl_chrg_full_timer;
+
+	led_trigger_register_simple("sharpsl-charge", &sharpsl_charge_led_trigger);
+
+	sharpsl_pm.machinfo->init();
+
+	ret = device_create_file(&pdev->dev, &dev_attr_battery_percentage);
+	ret |= device_create_file(&pdev->dev, &dev_attr_battery_voltage);
+	if (ret != 0)
+		dev_warn(&pdev->dev, "Failed to register attributes (%d)\n", ret);
+
+	apm_get_power_status = sharpsl_apm_get_power_status;
+
+#ifdef CONFIG_PM
+	suspend_set_ops(&sharpsl_pm_ops);
+#endif
+
+	mod_timer(&sharpsl_pm.ac_timer, jiffies + msecs_to_jiffies(250));
+
+	return 0;
+}
+
+static int sharpsl_pm_remove(struct platform_device *pdev)
+{
+	suspend_set_ops(NULL);
+
+	device_remove_file(&pdev->dev, &dev_attr_battery_percentage);
+	device_remove_file(&pdev->dev, &dev_attr_battery_voltage);
+
+	led_trigger_unregister_simple(sharpsl_charge_led_trigger);
+
+	sharpsl_pm.machinfo->exit();
+
+	del_timer_sync(&sharpsl_pm.chrg_full_timer);
+	del_timer_sync(&sharpsl_pm.ac_timer);
+
+	return 0;
+}
+
+static struct platform_driver sharpsl_pm_driver = {
+	.probe		= sharpsl_pm_probe,
+	.remove		= sharpsl_pm_remove,
+	.suspend	= sharpsl_pm_suspend,
+	.resume		= sharpsl_pm_resume,
+	.driver		= {
+		.name		= "sharpsl-pm",
+	},
+};
+
+static int __devinit sharpsl_pm_init(void)
+{
+	return platform_driver_register(&sharpsl_pm_driver);
+}
+
+static void sharpsl_pm_exit(void)
+{
+	platform_driver_unregister(&sharpsl_pm_driver);
+}
+
+late_initcall(sharpsl_pm_init);
+module_exit(sharpsl_pm_exit);
-- 
cgit v0.10.2


From d48898a3c88c1f36a66a8d4e3e45843c3171c548 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Sat, 28 Mar 2009 18:18:52 +0300
Subject: [ARM] pxa/sharpsl_pm: merge pxa-specific code into generic one

As pxa now is the only user of sharpsl_pm we can drop several startup
functions into generic code thus dropping several global functions.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/corgi_pm.c b/arch/arm/mach-pxa/corgi_pm.c
index 7f04b3a..a093282 100644
--- a/arch/arm/mach-pxa/corgi_pm.c
+++ b/arch/arm/mach-pxa/corgi_pm.c
@@ -41,7 +41,6 @@ static void corgi_charger_init(void)
 	pxa_gpio_mode(CORGI_GPIO_CHRG_ON | GPIO_OUT);
 	pxa_gpio_mode(CORGI_GPIO_CHRG_UKN | GPIO_OUT);
 	pxa_gpio_mode(CORGI_GPIO_KEY_INT | GPIO_IN);
-	sharpsl_pm_pxa_init();
 }
 
 static void corgi_measure_temp(int on)
@@ -191,7 +190,7 @@ unsigned long corgipm_read_devdata(int type)
 
 static struct sharpsl_charger_machinfo corgi_pm_machinfo = {
 	.init            = corgi_charger_init,
-	.exit            = sharpsl_pm_pxa_remove,
+	.exit            = NULL,
 	.gpio_batlock    = CORGI_GPIO_BAT_COVER,
 	.gpio_acin       = CORGI_GPIO_AC_IN,
 	.gpio_batfull    = CORGI_GPIO_CHRG_FULL,
diff --git a/arch/arm/mach-pxa/include/mach/sharpsl_pm.h b/arch/arm/mach-pxa/include/mach/sharpsl_pm.h
index 2d00db2..1920dc6 100644
--- a/arch/arm/mach-pxa/include/mach/sharpsl_pm.h
+++ b/arch/arm/mach-pxa/include/mach/sharpsl_pm.h
@@ -8,8 +8,8 @@
  * published by the Free Software Foundation.
  *
  */
-
-#include <linux/interrupt.h>
+#ifndef _MACH_SHARPSL_PM
+#define _MACH_SHARPSL_PM
 
 struct sharpsl_charger_machinfo {
 	void (*init)(void);
@@ -100,7 +100,5 @@ extern struct sharpsl_pm_status sharpsl_pm;
 
 void sharpsl_battery_kick(void);
 void sharpsl_pm_led(int val);
-irqreturn_t sharpsl_ac_isr(int irq, void *dev_id);
-irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id);
-irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id);
 
+#endif
diff --git a/arch/arm/mach-pxa/sharpsl.h b/arch/arm/mach-pxa/sharpsl.h
index f1cd00e..55259f4 100644
--- a/arch/arm/mach-pxa/sharpsl.h
+++ b/arch/arm/mach-pxa/sharpsl.h
@@ -44,8 +44,6 @@ void corgi_lcdtg_hw_init(int mode);
 
 extern struct battery_thresh spitz_battery_levels_acin[];
 extern struct battery_thresh spitz_battery_levels_noac[];
-void sharpsl_pm_pxa_init(void);
-void sharpsl_pm_pxa_remove(void);
 int sharpsl_pm_pxa_read_max1111(int channel);
 
 
diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index 9d61c49..540b567 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -15,19 +15,16 @@
 #undef DEBUG
 
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/apm-emulation.h>
 #include <linux/timer.h>
-#include <linux/apm_bios.h>
 #include <linux/delay.h>
 #include <linux/leds.h>
 #include <linux/suspend.h>
 
-#include <mach/hardware.h>
 #include <asm/mach-types.h>
 #include <mach/pm.h>
 #include <mach/pxa2xx-regs.h>
@@ -36,6 +33,8 @@
 #include <mach/sharpsl.h>
 #include <mach/sharpsl_pm.h>
 
+#include "sharpsl.h"
+
 /*
  * Constants
  */
@@ -73,8 +72,8 @@ static void sharpsl_battery_thread(struct work_struct *private_);
  * Variables
  */
 struct sharpsl_pm_status sharpsl_pm;
-DECLARE_DELAYED_WORK(toggle_charger, sharpsl_charge_toggle);
-DECLARE_DELAYED_WORK(sharpsl_bat, sharpsl_battery_thread);
+static DECLARE_DELAYED_WORK(toggle_charger, sharpsl_charge_toggle);
+static DECLARE_DELAYED_WORK(sharpsl_bat, sharpsl_battery_thread);
 DEFINE_LED_TRIGGER(sharpsl_charge_led_trigger);
 
 
@@ -194,52 +193,6 @@ int sharpsl_pm_pxa_read_max1111(int channel)
 #endif
 }
 
-void sharpsl_pm_pxa_init(void)
-{
-	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_acin | GPIO_IN);
-	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batfull | GPIO_IN);
-	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batlock | GPIO_IN);
-
-	/* Register interrupt handlers */
-	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, IRQF_DISABLED, "AC Input Detect", sharpsl_ac_isr)) {
-		dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin));
-	}
-	else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin),IRQ_TYPE_EDGE_BOTH);
-
-	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr, IRQF_DISABLED, "Battery Cover", sharpsl_fatal_isr)) {
-		dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock));
-	}
-	else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock),IRQ_TYPE_EDGE_FALLING);
-
-	if (sharpsl_pm.machinfo->gpio_fatal) {
-		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr, IRQF_DISABLED, "Fatal Battery", sharpsl_fatal_isr)) {
-			dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal));
-		}
-		else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal),IRQ_TYPE_EDGE_FALLING);
-	}
-
-	if (sharpsl_pm.machinfo->batfull_irq)
-	{
-		/* Register interrupt handler. */
-		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr, IRQF_DISABLED, "CO", sharpsl_chrg_full_isr)) {
-			dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull));
-		}
-		else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull),IRQ_TYPE_EDGE_RISING);
-	}
-}
-
-void sharpsl_pm_pxa_remove(void)
-{
-	free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr);
-	free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr);
-
-	if (sharpsl_pm.machinfo->gpio_fatal)
-		free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr);
-
-	if (sharpsl_pm.machinfo->batfull_irq)
-		free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr);
-}
-
 static int get_percentage(int voltage)
 {
 	int i = sharpsl_pm.machinfo->bat_levels - 1;
@@ -427,7 +380,7 @@ static void sharpsl_ac_timer(unsigned long data)
 }
 
 
-irqreturn_t sharpsl_ac_isr(int irq, void *dev_id)
+static irqreturn_t sharpsl_ac_isr(int irq, void *dev_id)
 {
 	/* Delay the event slightly to debounce */
 	/* Must be a smaller delay than the chrg_full_isr below */
@@ -462,7 +415,7 @@ static void sharpsl_chrg_full_timer(unsigned long data)
 /* Charging Finished Interrupt (Not present on Corgi) */
 /* Can trigger at the same time as an AC status change so
    delay until after that has been processed */
-irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id)
+static irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id)
 {
 	if (sharpsl_pm.flags & SHARPSL_SUSPENDED)
 		return IRQ_HANDLED;
@@ -473,7 +426,7 @@ irqreturn_t sharpsl_chrg_full_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id)
+static irqreturn_t sharpsl_fatal_isr(int irq, void *dev_id)
 {
 	int is_fatal = 0;
 
@@ -966,6 +919,37 @@ static int __init sharpsl_pm_probe(struct platform_device *pdev)
 
 	sharpsl_pm.machinfo->init();
 
+	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_acin | GPIO_IN);
+	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batfull | GPIO_IN);
+	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batlock | GPIO_IN);
+
+	/* Register interrupt handlers */
+	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, IRQF_DISABLED, "AC Input Detect", sharpsl_ac_isr)) {
+		dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin));
+	}
+	else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin),IRQ_TYPE_EDGE_BOTH);
+
+	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr, IRQF_DISABLED, "Battery Cover", sharpsl_fatal_isr)) {
+		dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock));
+	}
+	else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock),IRQ_TYPE_EDGE_FALLING);
+
+	if (sharpsl_pm.machinfo->gpio_fatal) {
+		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr, IRQF_DISABLED, "Fatal Battery", sharpsl_fatal_isr)) {
+			dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal));
+		}
+		else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal),IRQ_TYPE_EDGE_FALLING);
+	}
+
+	if (sharpsl_pm.machinfo->batfull_irq)
+	{
+		/* Register interrupt handler. */
+		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr, IRQF_DISABLED, "CO", sharpsl_chrg_full_isr)) {
+			dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull));
+		}
+		else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull),IRQ_TYPE_EDGE_RISING);
+	}
+
 	ret = device_create_file(&pdev->dev, &dev_attr_battery_percentage);
 	ret |= device_create_file(&pdev->dev, &dev_attr_battery_voltage);
 	if (ret != 0)
@@ -991,7 +975,17 @@ static int sharpsl_pm_remove(struct platform_device *pdev)
 
 	led_trigger_unregister_simple(sharpsl_charge_led_trigger);
 
-	sharpsl_pm.machinfo->exit();
+	free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr);
+	free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr);
+
+	if (sharpsl_pm.machinfo->gpio_fatal)
+		free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr);
+
+	if (sharpsl_pm.machinfo->batfull_irq)
+		free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr);
+
+	if (sharpsl_pm.machinfo->exit)
+		sharpsl_pm.machinfo->exit();
 
 	del_timer_sync(&sharpsl_pm.chrg_full_timer);
 	del_timer_sync(&sharpsl_pm.ac_timer);
diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c
index 2e44905..724ffb0 100644
--- a/arch/arm/mach-pxa/spitz_pm.c
+++ b/arch/arm/mach-pxa/spitz_pm.c
@@ -41,7 +41,6 @@ static void spitz_charger_init(void)
 {
 	pxa_gpio_mode(SPITZ_GPIO_KEY_INT | GPIO_IN);
 	pxa_gpio_mode(SPITZ_GPIO_SYNC | GPIO_IN);
-	sharpsl_pm_pxa_init();
 }
 
 static void spitz_measure_temp(int on)
@@ -182,7 +181,7 @@ unsigned long spitzpm_read_devdata(int type)
 
 struct sharpsl_charger_machinfo spitz_pm_machinfo = {
 	.init             = spitz_charger_init,
-	.exit             = sharpsl_pm_pxa_remove,
+	.exit             = NULL,
 	.gpio_batlock     = SPITZ_GPIO_BAT_COVER,
 	.gpio_acin        = SPITZ_GPIO_AC_IN,
 	.gpio_batfull     = SPITZ_GPIO_CHRG_FULL,
-- 
cgit v0.10.2


From 5452537210d59268cbc283526dc050f0822385f1 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Sat, 28 Mar 2009 18:18:53 +0300
Subject: [ARM] pxa/sharpsl_pm: drop set_irq_type calls

Merge set_irq_type() into respective request_irq() calls.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index 540b567..a8facf4 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/apm-emulation.h>
 #include <linux/timer.h>
@@ -924,30 +923,26 @@ static int __init sharpsl_pm_probe(struct platform_device *pdev)
 	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batlock | GPIO_IN);
 
 	/* Register interrupt handlers */
-	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, IRQF_DISABLED, "AC Input Detect", sharpsl_ac_isr)) {
+	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, "AC Input Detect", sharpsl_ac_isr)) {
 		dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin));
 	}
-	else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin),IRQ_TYPE_EDGE_BOTH);
 
-	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr, IRQF_DISABLED, "Battery Cover", sharpsl_fatal_isr)) {
+	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr, IRQF_DISABLED | IRQF_TRIGGER_FALLING, "Battery Cover", sharpsl_fatal_isr)) {
 		dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock));
 	}
-	else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock),IRQ_TYPE_EDGE_FALLING);
 
 	if (sharpsl_pm.machinfo->gpio_fatal) {
-		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr, IRQF_DISABLED, "Fatal Battery", sharpsl_fatal_isr)) {
+		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr, IRQF_DISABLED | IRQF_TRIGGER_FALLING, "Fatal Battery", sharpsl_fatal_isr)) {
 			dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal));
 		}
-		else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal),IRQ_TYPE_EDGE_FALLING);
 	}
 
 	if (sharpsl_pm.machinfo->batfull_irq)
 	{
 		/* Register interrupt handler. */
-		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr, IRQF_DISABLED, "CO", sharpsl_chrg_full_isr)) {
+		if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr, IRQF_DISABLED | IRQF_TRIGGER_RISING, "CO", sharpsl_chrg_full_isr)) {
 			dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull));
 		}
-		else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull),IRQ_TYPE_EDGE_RISING);
 	}
 
 	ret = device_create_file(&pdev->dev, &dev_attr_battery_percentage);
-- 
cgit v0.10.2


From d5af27783f8c898b64f7079908ad3e27632a71ea Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Sat, 28 Mar 2009 18:18:54 +0300
Subject: [ARM] pxa/sharpsl_pm: cleanup of gpio-related code.

Replace calls to pxa_gpio_mode with respective gpio_request() /
gpio_direction_input(). In principle these calls can be dropped as
the only use of those GPIO are IRQs and IRQ code does setup GPIO
correctly.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>

diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index a8facf4..dfac7b9 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -23,6 +23,7 @@
 #include <linux/delay.h>
 #include <linux/leds.h>
 #include <linux/suspend.h>
+#include <linux/gpio.h>
 
 #include <asm/mach-types.h>
 #include <mach/pm.h>
@@ -918,9 +919,12 @@ static int __init sharpsl_pm_probe(struct platform_device *pdev)
 
 	sharpsl_pm.machinfo->init();
 
-	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_acin | GPIO_IN);
-	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batfull | GPIO_IN);
-	pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batlock | GPIO_IN);
+	gpio_request(sharpsl_pm.machinfo->gpio_acin, "AC IN");
+	gpio_direction_input(sharpsl_pm.machinfo->gpio_acin);
+	gpio_request(sharpsl_pm.machinfo->gpio_batfull, "Battery Full");
+	gpio_direction_input(sharpsl_pm.machinfo->gpio_batfull);
+	gpio_request(sharpsl_pm.machinfo->gpio_batlock, "Battery Lock");
+	gpio_direction_input(sharpsl_pm.machinfo->gpio_batlock);
 
 	/* Register interrupt handlers */
 	if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, "AC Input Detect", sharpsl_ac_isr)) {
@@ -979,6 +983,10 @@ static int sharpsl_pm_remove(struct platform_device *pdev)
 	if (sharpsl_pm.machinfo->batfull_irq)
 		free_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr);
 
+	gpio_free(sharpsl_pm.machinfo->gpio_batlock);
+	gpio_free(sharpsl_pm.machinfo->gpio_batfull);
+	gpio_free(sharpsl_pm.machinfo->gpio_acin);
+
 	if (sharpsl_pm.machinfo->exit)
 		sharpsl_pm.machinfo->exit();
 
-- 
cgit v0.10.2


From 9866b7e86a2ce4daa677be750e3ccbfc65d187f5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 11 Jun 2009 16:25:01 +0200
Subject: x86: memtest: use pointers of equal type for comparison

Commit c9690998ef48ffefeccb91c70a7739eebdea57f9 (x86: memtest: remove
64-bit division) introduced following compile warning:

arch/x86/mm/memtest.c: In function 'memtest':
arch/x86/mm/memtest.c:56: warning: comparison of distinct pointer types lacks a cast
arch/x86/mm/memtest.c:58: warning: comparison of distinct pointer types lacks a cast

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index c0bedcd..18d244f 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -40,21 +40,20 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
 
 static void __init memtest(u64 pattern, u64 start_phys, u64 size)
 {
-	u64 *p;
-	void *start, *end;
+	u64 *p, *start, *end;
 	u64 start_bad, last_bad;
 	u64 start_phys_aligned;
-	size_t incr;
+	const size_t incr = sizeof(pattern);
 
-	incr = sizeof(pattern);
 	start_phys_aligned = ALIGN(start_phys, incr);
 	start = __va(start_phys_aligned);
-	end = start + size - (start_phys_aligned - start_phys);
+	end = start + (size - (start_phys_aligned - start_phys)) / incr;
 	start_bad = 0;
 	last_bad = 0;
 
 	for (p = start; p < end; p++)
 		*p = pattern;
+
 	for (p = start; p < end; p++, start_phys_aligned += incr) {
 		if (*p == pattern)
 			continue;
-- 
cgit v0.10.2


From dc890c2dcd63a90de68ee5f0253eefbb89d725f0 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sun, 7 Jun 2009 23:27:31 +0100
Subject: [ARM] 5544/1: Trust PrimeCell resource sizes

I found the PrimeCell/AMBA Bus drivers distrusting the resource
passed in as part of the struct amba_device abstraction. This
patch removes all hard coded resource sizes found in the PrimeCell
drivers and move the responsibility of this definition back to
the platform/board device definition, which already exist and
appear to be correct for all in-tree users of these drivers.
We do this using the resource_size() inline function which was
also replicated in the only driver using the resource size, so
that has been changed too. The KMI_SIZE was left in kmi.h in case
someone likes it. Test-compiled against Versatile and Integrator
defconfigs, seems to work but I don't posess these boards and
cannot test them.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c
index a28c06d..89b3941 100644
--- a/drivers/input/serio/ambakmi.c
+++ b/drivers/input/serio/ambakmi.c
@@ -135,7 +135,7 @@ static int amba_kmi_probe(struct amba_device *dev, struct amba_id *id)
 	io->dev.parent	= &dev->dev;
 
 	kmi->io 	= io;
-	kmi->base	= ioremap(dev->res.start, KMI_SIZE);
+	kmi->base	= ioremap(dev->res.start, resource_size(&dev->res));
 	if (!kmi->base) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7d4febd..e1aa847 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -546,7 +546,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		host->mclk = clk_get_rate(host->clk);
 		DBG(host, "eventual mclk rate: %u Hz\n", host->mclk);
 	}
-	host->base = ioremap(dev->res.start, SZ_4K);
+	host->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!host->base) {
 		ret = -ENOMEM;
 		goto clk_disable;
diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c
index aaf1f75..457231b 100644
--- a/drivers/rtc/rtc-pl030.c
+++ b/drivers/rtc/rtc-pl030.c
@@ -117,7 +117,7 @@ static int pl030_probe(struct amba_device *dev, struct amba_id *id)
 		goto err_rtc;
 	}
 
-	rtc->base = ioremap(dev->res.start, SZ_4K);
+	rtc->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!rtc->base) {
 		ret = -ENOMEM;
 		goto err_map;
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 451fc13..f41873f 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -142,8 +142,7 @@ static int pl031_probe(struct amba_device *adev, struct amba_id *id)
 		goto out;
 	}
 
-	ldata->base = ioremap(adev->res.start,
-			      adev->res.end - adev->res.start + 1);
+	ldata->base = ioremap(adev->res.start, resource_size(&adev->res));
 	if (!ldata->base) {
 		ret = -ENOMEM;
 		goto out_no_remap;
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index cdc049d..58a4879 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -686,7 +686,7 @@ static int pl010_probe(struct amba_device *dev, struct amba_id *id)
 		goto out;
 	}
 
-	base = ioremap(dev->res.start, PAGE_SIZE);
+	base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!base) {
 		ret = -ENOMEM;
 		goto free;
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 8c5bda2..bf82e28 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -767,7 +767,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 		goto out;
 	}
 
-	base = ioremap(dev->res.start, PAGE_SIZE);
+	base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!base) {
 		ret = -ENOMEM;
 		goto free;
diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index d1f80ba..fb8163d 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -351,7 +351,7 @@ static int clcdfb_register(struct clcd_fb *fb)
 	}
 
 	fb->fb.fix.mmio_start	= fb->dev->res.start;
-	fb->fb.fix.mmio_len	= 4096;
+	fb->fb.fix.mmio_len	= resource_size(&fb->dev->res);
 
 	fb->regs = ioremap(fb->fb.fix.mmio_start, fb->fb.fix.mmio_len);
 	if (!fb->regs) {
diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c
index 5c48e36..dc78272 100644
--- a/sound/arm/aaci.c
+++ b/sound/arm/aaci.c
@@ -1089,7 +1089,7 @@ static int __devinit aaci_probe(struct amba_device *dev, struct amba_id *id)
 		goto out;
 	}
 
-	aaci->base = ioremap(dev->res.start, SZ_4K);
+	aaci->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!aaci->base) {
 		ret = -ENOMEM;
 		goto out;
-- 
cgit v0.10.2


From 4da52960fd1ae3ddd14901bc88b608cbeaa4b9a6 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 11 Jun 2009 14:54:01 +1000
Subject: perf_counters: powerpc: Add support for POWER7 processors

This adds the back-end for the PMU on POWER7 processors.  POWER7
has 4 fully-programmable counters and two fixed-function counters
(which do respect the freeze conditions, can generate interrupts,
and are writable, unlike PMC5/6 on POWER5+/6).

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18992.36329.189378.17992@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9ba1bb7..a2c6834 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -95,7 +95,8 @@ obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o power4-pmu.o ppc970-pmu.o \
-				   power5-pmu.o power5+-pmu.o power6-pmu.o
+				   power5-pmu.o power5+-pmu.o power6-pmu.o \
+				   power7-pmu.o
 
 obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 4990ce2..5d12e68 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -1181,6 +1181,7 @@ extern struct power_pmu ppc970_pmu;
 extern struct power_pmu power5_pmu;
 extern struct power_pmu power5p_pmu;
 extern struct power_pmu power6_pmu;
+extern struct power_pmu power7_pmu;
 
 static int init_perf_counters(void)
 {
@@ -1207,6 +1208,9 @@ static int init_perf_counters(void)
 	case 0x3e:
 		ppmu = &power6_pmu;
 		break;
+	case 0x3f:
+		ppmu = &power7_pmu;
+		break;
 	}
 
 	/*
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
new file mode 100644
index 0000000..dfac48d
--- /dev/null
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -0,0 +1,316 @@
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_counter.h>
+#include <asm/reg.h>
+
+/*
+ * Bits in event code for POWER7
+ */
+#define PM_PMC_SH	16	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	12	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_COMBINE_SH	11	/* Combined event bit */
+#define PM_COMBINE_MSK	1
+#define PM_COMBINE_MSKS	0x800
+#define PM_L2SEL_SH	8	/* L2 event select */
+#define PM_L2SEL_MSK	7
+#define PM_PMCSEL_MSK	0xff
+
+/*
+ * Bits in MMCR1 for POWER7
+ */
+#define MMCR1_TTM0SEL_SH	60
+#define MMCR1_TTM1SEL_SH	56
+#define MMCR1_TTM2SEL_SH	52
+#define MMCR1_TTM3SEL_SH	48
+#define MMCR1_TTMSEL_MSK	0xf
+#define MMCR1_L2SEL_SH		45
+#define MMCR1_L2SEL_MSK		7
+#define MMCR1_PMC1_COMBINE_SH	35
+#define MMCR1_PMC2_COMBINE_SH	34
+#define MMCR1_PMC3_COMBINE_SH	33
+#define MMCR1_PMC4_COMBINE_SH	32
+#define MMCR1_PMC1SEL_SH	24
+#define MMCR1_PMC2SEL_SH	16
+#define MMCR1_PMC3SEL_SH	8
+#define MMCR1_PMC4SEL_SH	0
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0xff
+
+/*
+ * Bits in MMCRA
+ */
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *                                                 [  ><><><><><><>
+ *                                                  NC P6P5P4P3P2P1
+ *
+ * NC - number of counters
+ *     15: NC error 0x8000
+ *     12-14: number of events needing PMC1-4 0x7000
+ *
+ * P6
+ *     11: P6 error 0x800
+ *     10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ *     0-9: Count of events needing PMC1..PMC5
+ */
+
+static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp)
+{
+	int pmc, sh;
+	u64 mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
+			return -1;
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000;
+		value |= 0x1000;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	2	/* at most 2 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x200f2, 0x300f2 },		/* PM_INST_DISP */
+	{ 0x200f4, 0x600f4 },		/* PM_RUN_CYC */
+	{ 0x400fa, 0x500fa },		/* PM_RUN_INST_CMPL */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static s64 find_alternative_decode(u64 event)
+{
+	int pmc, psel;
+
+	/* this only handles the 4x decode events */
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
+		return event - (1 << PM_PMC_SH) + 8;
+	if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
+		return event + (1 << PM_PMC_SH) - 8;
+	return -1;
+}
+
+static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_decode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC
+		 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:	/* PM_CYC */
+				alt[j++] = 0x600f4;	/* PM_RUN_CYC */
+				break;
+			case 0x600f4:	/* PM_RUN_CYC */
+				alt[j++] = 0x1e;
+				break;
+			case 0x2:	/* PM_PPC_CMPL */
+				alt[j++] = 0x500fa;	/* PM_RUN_INST_CMPL */
+				break;
+			case 0x500fa:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 0x2;	/* PM_PPC_CMPL */
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power7_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int unit;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	psel = event & PM_PMCSEL_MSK & ~1;	/* trim off edge/level bit */
+	if (pmc >= 5)
+		return 0;
+
+	switch (psel >> 4) {
+	case 2:
+		return pmc == 2 || pmc == 4;
+	case 3:
+		if (psel == 0x3c)
+			return pmc == 1;
+		if (psel == 0x3e)
+			return pmc != 2;
+		return 1;
+	case 4:
+	case 5:
+		return unit == 0xd;
+	case 6:
+		if (psel == 0x64)
+			return pmc >= 3;
+	case 8:
+		return unit == 0xd;
+	}
+	return 0;
+}
+
+static int power7_compute_mmcr(u64 event[], int n_ev,
+			       unsigned int hwc[], u64 mmcr[])
+{
+	u64 mmcr1 = 0;
+	u64 mmcra = 0;
+	unsigned int pmc, unit, combine, l2sel, psel;
+	unsigned int pmc_inuse = 0;
+	int i;
+
+	/* First pass to count resource use */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+	}
+
+	/* Second pass: assign PMCs, set all MMCR1 fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
+		l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct or decoded event */
+			--pmc;
+		}
+		if (pmc <= 3) {
+			mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc);
+			mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc);
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+			if (unit == 6)	/* L2 events */
+				mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH;
+		}
+		if (power7_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr[0] = 0;
+	if (pmc_inuse & 1)
+		mmcr[0] = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr[0] |= MMCR0_PMCjCE;
+	mmcr[1] = mmcr1;
+	mmcr[2] = mmcra;
+	return 0;
+}
+
+static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
+{
+	if (pmc <= 3)
+		mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power7_generic_events[] = {
+	[PERF_COUNT_CPU_CYCLES] = 0x1e,
+	[PERF_COUNT_INSTRUCTIONS] = 2,
+	[PERF_COUNT_CACHE_REFERENCES] = 0xc880,		/* LD_REF_L1_LSU */
+	[PERF_COUNT_CACHE_MISSES] = 0x400f0,		/* LD_MISS_L1 */
+	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068,	/* BRU_FIN */
+	[PERF_COUNT_BRANCH_MISSES] = 0x400f6,		/* BR_MPRED */
+};
+
+struct power_pmu power7_pmu = {
+	.n_counter = 6,
+	.max_alternatives = MAX_ALT + 1,
+	.add_fields = 0x1555ull,
+	.test_adder = 0x3000ull,
+	.compute_mmcr = power7_compute_mmcr,
+	.get_constraint = power7_get_constraint,
+	.get_alternatives = power7_get_alternatives,
+	.disable_pmc = power7_disable_pmc,
+	.n_generic = ARRAY_SIZE(power7_generic_events),
+	.generic_events = power7_generic_events,
+};
-- 
cgit v0.10.2


From 106b506c3a8b74daa5751e83ed3e46438fcf9a52 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 11 Jun 2009 14:55:42 +1000
Subject: perf_counter: powerpc: Implement generalized cache events for POWER
 processors

This adds tables of event codes for the generalized cache events for
all the currently supported powerpc processors: POWER{4,5,5+,6,7} and
PPC970*, plus powerpc-specific code to use these tables when a
generalized cache event is requested.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <18992.36430.933526.742969@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index 1c60f0c..cc7c887 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -33,6 +33,9 @@ struct power_pmu {
 	u32	flags;
 	int	n_generic;
 	int	*generic_events;
+	int	(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
 };
 
 extern struct power_pmu *ppmu;
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5d12e68..bb20238 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -856,6 +856,36 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
 	}
 }
 
+/*
+ * Translate a generic cache event config to a raw event code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	u64 ev;
@@ -868,13 +898,21 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if (counter->attr.type != PERF_TYPE_RAW) {
+	switch (counter->attr.type) {
+	case PERF_TYPE_HARDWARE:
 		ev = counter->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return ERR_PTR(-EOPNOTSUPP);
 		ev = ppmu->generic_events[ev];
-	} else {
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(counter->attr.config, &ev);
+		if (err)
+			return ERR_PTR(err);
+		break;
+	case PERF_TYPE_RAW:
 		ev = counter->attr.config;
+		break;
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 836fa11..0e94b68 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -543,6 +543,46 @@ static int p4_generic_events[] = {
 	[PERF_COUNT_BRANCH_MISSES] = 0x331,		/* PM_BR_MPRED_CR */
 };
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x8c10,		0x3c10	},
+		[C(OP_WRITE)] = {	0x7c10,		0xc13	},
+		[C(OP_PREFETCH)] = {	0xc35,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0xc34,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x904	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x900	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x330,		0x331	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
 struct power_pmu power4_pmu = {
 	.n_counter = 8,
 	.max_alternatives = 5,
@@ -554,4 +594,5 @@ struct power_pmu power4_pmu = {
 	.disable_pmc = p4_disable_pmc,
 	.n_generic = ARRAY_SIZE(p4_generic_events),
 	.generic_events = p4_generic_events,
+	.cache_events = &power4_cache_events,
 };
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 8471e3c..bbf2cbb 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -614,6 +614,46 @@ static int power5p_generic_events[] = {
 	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
 };
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x1c10a8,	0x3c1088	},
+		[C(OP_WRITE)] = {	0x2c10a8,	0xc10c3		},
+		[C(OP_PREFETCH)] = {	0xc70e7,	-1		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0,		0		},
+	},
+	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	0,		0		},
+		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0xc20e4,	0x800c4		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x800c0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x230e4,	0x230e5		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
 struct power_pmu power5p_pmu = {
 	.n_counter = 6,
 	.max_alternatives = MAX_ALT,
@@ -623,8 +663,9 @@ struct power_pmu power5p_pmu = {
 	.get_constraint = power5p_get_constraint,
 	.get_alternatives = power5p_get_alternatives,
 	.disable_pmc = power5p_disable_pmc,
+	.limited_pmc_event = power5p_limited_pmc_event,
+	.flags = PPMU_LIMITED_PMC5_6,
 	.n_generic = ARRAY_SIZE(power5p_generic_events),
 	.generic_events = power5p_generic_events,
-	.flags = PPMU_LIMITED_PMC5_6,
-	.limited_pmc_event = power5p_limited_pmc_event,
+	.cache_events = &power5p_cache_events,
 };
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 1b44c5f..670cf10 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -556,6 +556,46 @@ static int power5_generic_events[] = {
 	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
 };
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x4c1090,	0x3c1088	},
+		[C(OP_WRITE)] = {	0x3c1090,	0xc10c3		},
+		[C(OP_PREFETCH)] = {	0xc70e7,	0		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0,		0		},
+	},
+	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x3c309b	},
+		[C(OP_WRITE)] = {	0,		0		},
+		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x2c4090,	0x800c4		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x800c0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x230e4,	0x230e5		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
 struct power_pmu power5_pmu = {
 	.n_counter = 6,
 	.max_alternatives = MAX_ALT,
@@ -567,4 +607,5 @@ struct power_pmu power5_pmu = {
 	.disable_pmc = power5_disable_pmc,
 	.n_generic = ARRAY_SIZE(power5_generic_events),
 	.generic_events = power5_generic_events,
+	.cache_events = &power5_cache_events,
 };
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index cd4fbe0..4da7078 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -474,6 +474,47 @@ static int power6_generic_events[] = {
 	[PERF_COUNT_BRANCH_MISSES] = 0x400052,		/* BR_MPRED */
 };
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
+ */
+static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x80082,	0x80080		},
+		[C(OP_WRITE)] = {	0x80086,	0x80088		},
+		[C(OP_PREFETCH)] = {	0x810a4,	0		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x100056 	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0x4008c,	0		},
+	},
+	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x150730,	0x250532	},
+		[C(OP_WRITE)] = {	0x250432,	0x150432	},
+		[C(OP_PREFETCH)] = {	0x810a6,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x20000e	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x420ce		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x430e6,	0x400052	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
 struct power_pmu power6_pmu = {
 	.n_counter = 6,
 	.max_alternatives = MAX_ALT,
@@ -483,8 +524,9 @@ struct power_pmu power6_pmu = {
 	.get_constraint = p6_get_constraint,
 	.get_alternatives = p6_get_alternatives,
 	.disable_pmc = p6_disable_pmc,
+	.limited_pmc_event = p6_limited_pmc_event,
+	.flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
 	.n_generic = ARRAY_SIZE(power6_generic_events),
 	.generic_events = power6_generic_events,
-	.flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
-	.limited_pmc_event = p6_limited_pmc_event,
+	.cache_events = &power6_cache_events,
 };
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index dfac48d..060e0de 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -302,6 +302,46 @@ static int power7_generic_events[] = {
 	[PERF_COUNT_BRANCH_MISSES] = 0x400f6,		/* BR_MPRED */
 };
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x400f0,	0xc880	},
+		[C(OP_WRITE)] = {	0,		0x300f0	},
+		[C(OP_PREFETCH)] = {	0xd8b8,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x200fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0x408a,		0	},
+	},
+	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x6080,		0x6084	},
+		[C(OP_WRITE)] = {	0x6082,		0x6086	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x300fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x400fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x10068,	0x400f6	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
 struct power_pmu power7_pmu = {
 	.n_counter = 6,
 	.max_alternatives = MAX_ALT + 1,
@@ -313,4 +353,5 @@ struct power_pmu power7_pmu = {
 	.disable_pmc = power7_disable_pmc,
 	.n_generic = ARRAY_SIZE(power7_generic_events),
 	.generic_events = power7_generic_events,
+	.cache_events = &power7_cache_events,
 };
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index eed47c4..336adf1 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -427,6 +427,46 @@ static int ppc970_generic_events[] = {
 	[PERF_COUNT_BRANCH_MISSES] = 0x327,		/* PM_GRP_BR_MPRED */
 };
 
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x8810,		0x3810	},
+		[C(OP_WRITE)] = {	0x7810,		0x813	},
+		[C(OP_PREFETCH)] = {	0x731,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0x733,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x704	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x700	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x431,		0x327	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
 struct power_pmu ppc970_pmu = {
 	.n_counter = 8,
 	.max_alternatives = 2,
@@ -438,4 +478,5 @@ struct power_pmu ppc970_pmu = {
 	.disable_pmc = p970_disable_pmc,
 	.n_generic = ARRAY_SIZE(ppc970_generic_events),
 	.generic_events = ppc970_generic_events,
+	.cache_events = &ppc970_cache_events,
 };
-- 
cgit v0.10.2


From 0764771dab80d7b84b9a271bee7f1b21a04a3f0c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 11:18:36 +0200
Subject: perf_counter: More paranoia settings

Rename the perf_counter_priv knob to perf_counter_paranoia (because
priv can be read as private, as opposed to privileged) and provide
one more level:

 0 - permissive
 1 - restrict cpu counters to privilidged contexts
 2 - restrict kernel-mode code counting and profiling

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5b96647..386be91 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -648,7 +648,7 @@ struct perf_callchain_entry {
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
-extern int sysctl_perf_counter_priv;
+extern int sysctl_perf_counter_paranoid;
 extern int sysctl_perf_counter_mlock;
 extern int sysctl_perf_counter_limit;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8b89b40..63f1987 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -43,7 +43,23 @@ static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
 
-int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+/*
+ * 0 - not paranoid
+ * 1 - disallow cpu counters to unpriv
+ * 2 - disallow kernel profiling to unpriv
+ */
+int sysctl_perf_counter_paranoid __read_mostly; /* do we need to be privileged */
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_counter_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_counter_paranoid > 1;
+}
+
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
 
@@ -1385,7 +1401,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	 */
 	if (cpu != -1) {
 		/* Must be root to operate on a CPU counter: */
-		if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
 
 		if (cpu < 0 || cpu > num_possible_cpus())
@@ -3618,6 +3634,11 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
 		return -EFAULT;
 
+	if (!attr.exclude_kernel) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0c4bf86..344a659 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -916,9 +916,9 @@ static struct ctl_table kern_table[] = {
 #ifdef CONFIG_PERF_COUNTERS
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_privileged",
-		.data		= &sysctl_perf_counter_priv,
-		.maxlen		= sizeof(sysctl_perf_counter_priv),
+		.procname	= "perf_counter_paranoid",
+		.data		= &sysctl_perf_counter_paranoid,
+		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-- 
cgit v0.10.2


From df58ab24bf26b166874bfb18b3b5a2e0a8e63179 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 11:25:05 +0200
Subject: perf_counter: Rename perf_counter_limit sysctl

Rename perf_counter_limit to perf_counter_max_sample_rate and
prohibit creation of counters with a known higher sample
frequency.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 386be91..95c797c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -650,7 +650,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_paranoid;
 extern int sysctl_perf_counter_mlock;
-extern int sysctl_perf_counter_limit;
+extern int sysctl_perf_counter_sample_rate;
 
 extern void perf_counter_init(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 63f1987..3b2829d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -44,11 +44,12 @@ static atomic_t nr_mmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
 
 /*
- * 0 - not paranoid
- * 1 - disallow cpu counters to unpriv
- * 2 - disallow kernel profiling to unpriv
+ * perf counter paranoia level:
+ *  0 - not paranoid
+ *  1 - disallow cpu counters to unpriv
+ *  2 - disallow kernel profiling to unpriv
  */
-int sysctl_perf_counter_paranoid __read_mostly; /* do we need to be privileged */
+int sysctl_perf_counter_paranoid __read_mostly;
 
 static inline bool perf_paranoid_cpu(void)
 {
@@ -61,7 +62,11 @@ static inline bool perf_paranoid_kernel(void)
 }
 
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
-int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
+
+/*
+ * max perf counter sample rate
+ */
+int sysctl_perf_counter_sample_rate __read_mostly = 100000;
 
 static atomic64_t perf_counter_id;
 
@@ -1244,7 +1249,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(counter, 1);
 			counter->pmu->unthrottle(counter);
-			interrupts = 2*sysctl_perf_counter_limit/HZ;
+			interrupts = 2*sysctl_perf_counter_sample_rate/HZ;
 		}
 
 		if (!counter->attr.freq || !counter->attr.sample_freq)
@@ -1682,7 +1687,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
 	spin_lock_irq(&ctx->lock);
 	if (counter->attr.freq) {
-		if (value > sysctl_perf_counter_limit) {
+		if (value > sysctl_perf_counter_sample_rate) {
 			ret = -EINVAL;
 			goto unlock;
 		}
@@ -2979,7 +2984,8 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
 	} else {
 		if (hwc->interrupts != MAX_INTERRUPTS) {
 			hwc->interrupts++;
-			if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
+			if (HZ * hwc->interrupts >
+					(u64)sysctl_perf_counter_sample_rate) {
 				hwc->interrupts = MAX_INTERRUPTS;
 				perf_log_throttle(counter, 0);
 				ret = 1;
@@ -3639,6 +3645,11 @@ SYSCALL_DEFINE5(perf_counter_open,
 			return -EACCES;
 	}
 
+	if (attr.freq) {
+		if (attr.sample_freq > sysctl_perf_counter_sample_rate)
+			return -EINVAL;
+	}
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 344a659..9fd4e43 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,9 +932,9 @@ static struct ctl_table kern_table[] = {
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_int_limit",
-		.data		= &sysctl_perf_counter_limit,
-		.maxlen		= sizeof(sysctl_perf_counter_limit),
+		.procname	= "perf_counter_max_sample_rate",
+		.data		= &sysctl_perf_counter_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-- 
cgit v0.10.2


From 729ff5e2aaf181f5d3ab849337fce406cd19b1d9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Jun 2009 14:16:15 +0200
Subject: perf_counter tools: Clean up u64 usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A build error slipped in:

 builtin-report.c: In function ‘hist_entry__fprintf’:
 builtin-report.c:711: error: format ‘%12d’ expects type ‘int’, but argument 3 has type ‘uint64_t’

Because we got a bit sloppy with those types. uint64_t really sucks,
because there's no printf format for it. So standardize on __u64
instead - for all types that go to or come from the ABI (which is __u64),
or for values that need to be large enough even on 32-bit.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 3334a8b..b1ed5f7 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -145,7 +145,7 @@ static void dsos__fprintf(FILE *fp)
 		dso__fprintf(pos, fp);
 }
 
-static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
+static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
 {
 	return dso__find_symbol(kernel_dso, ip);
 }
@@ -178,19 +178,19 @@ static int load_kernel(void)
 
 struct map {
 	struct list_head node;
-	uint64_t	 start;
-	uint64_t	 end;
-	uint64_t	 pgoff;
-	uint64_t	 (*map_ip)(struct map *, uint64_t);
+	__u64	 start;
+	__u64	 end;
+	__u64	 pgoff;
+	__u64	 (*map_ip)(struct map *, __u64);
 	struct dso	 *dso;
 };
 
-static uint64_t map__map_ip(struct map *map, uint64_t ip)
+static __u64 map__map_ip(struct map *map, __u64 ip)
 {
 	return ip - map->start + map->pgoff;
 }
 
-static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
+static __u64 vdso__map_ip(struct map *map, __u64 ip)
 {
 	return ip;
 }
@@ -249,7 +249,7 @@ static int map__overlap(struct map *l, struct map *r)
 
 static size_t map__fprintf(struct map *self, FILE *fp)
 {
-	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
+	return fprintf(fp, " %Lx-%Lx %Lx %s\n",
 		       self->start, self->end, self->pgoff, self->dso->name);
 }
 
@@ -373,7 +373,7 @@ static int thread__fork(struct thread *self, struct thread *parent)
 	return 0;
 }
 
-static struct map *thread__find_map(struct thread *self, uint64_t ip)
+static struct map *thread__find_map(struct thread *self, __u64 ip)
 {
 	struct map *pos;
 
@@ -414,7 +414,7 @@ struct hist_entry {
 	struct map	 *map;
 	struct dso	 *dso;
 	struct symbol	 *sym;
-	uint64_t	 ip;
+	__u64	 ip;
 	char		 level;
 
 	uint32_t	 count;
@@ -533,7 +533,7 @@ static struct sort_entry sort_dso = {
 static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	uint64_t ip_l, ip_r;
+	__u64 ip_l, ip_r;
 
 	if (left->sym == right->sym)
 		return 0;
@@ -647,7 +647,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 /*
  * collect histogram counts
  */
-static void hist_hit(struct hist_entry *he, uint64_t ip)
+static void hist_hit(struct hist_entry *he, __u64 ip)
 {
 	unsigned int sym_size, offset;
 	struct symbol *sym = he->sym;
@@ -676,7 +676,7 @@ static void hist_hit(struct hist_entry *he, uint64_t ip)
 
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, uint64_t ip, char level)
+		struct symbol *sym, __u64 ip, char level)
 {
 	struct rb_node **p = &hist.rb_node;
 	struct rb_node *parent = NULL;
@@ -848,7 +848,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	int show = 0;
 	struct dso *dso = NULL;
 	struct thread *thread = threads__findnew(event->ip.pid);
-	uint64_t ip = event->ip.ip;
+	__u64 ip = event->ip.ip;
 	struct map *map = NULL;
 
 	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
@@ -1031,7 +1031,7 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 }
 
 static int
-parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len)
+parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len)
 {
 	char *line = NULL, *tmp, *tmp2;
 	unsigned int offset;
@@ -1112,7 +1112,7 @@ parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len)
 static void annotate_sym(struct dso *dso, struct symbol *sym)
 {
 	char *filename = dso->name;
-	uint64_t start, end, len;
+	__u64 start, end, len;
 	char command[PATH_MAX*2];
 	FILE *file;
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 919f23c..84cd336 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -223,7 +223,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
 
 	comm_ev.pid = pid;
 	comm_ev.header.type = PERF_EVENT_COMM;
-	size = ALIGN(size, sizeof(uint64_t));
+	size = ALIGN(size, sizeof(__u64));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
 	if (!full) {
@@ -304,7 +304,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
 			size = strlen(execname);
 			execname[size - 1] = '\0'; /* Remove \n */
 			memcpy(mmap_ev.filename, execname, size);
-			size = ALIGN(size, sizeof(uint64_t));
+			size = ALIGN(size, sizeof(__u64));
 			mmap_ev.len -= mmap_ev.start;
 			mmap_ev.header.size = (sizeof(mmap_ev) -
 					       (sizeof(mmap_ev.filename) - size));
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f57fd5c..82fa93b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -146,7 +146,7 @@ static void dsos__fprintf(FILE *fp)
 		dso__fprintf(pos, fp);
 }
 
-static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip)
+static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip)
 {
 	return dso__find_symbol(kernel_dso, ip);
 }
@@ -193,19 +193,19 @@ static int strcommon(const char *pathname)
 
 struct map {
 	struct list_head node;
-	uint64_t	 start;
-	uint64_t	 end;
-	uint64_t	 pgoff;
-	uint64_t	 (*map_ip)(struct map *, uint64_t);
+	__u64	 start;
+	__u64	 end;
+	__u64	 pgoff;
+	__u64	 (*map_ip)(struct map *, __u64);
 	struct dso	 *dso;
 };
 
-static uint64_t map__map_ip(struct map *map, uint64_t ip)
+static __u64 map__map_ip(struct map *map, __u64 ip)
 {
 	return ip - map->start + map->pgoff;
 }
 
-static uint64_t vdso__map_ip(struct map *map, uint64_t ip)
+static __u64 vdso__map_ip(struct map *map, __u64 ip)
 {
 	return ip;
 }
@@ -288,7 +288,7 @@ static int map__overlap(struct map *l, struct map *r)
 
 static size_t map__fprintf(struct map *self, FILE *fp)
 {
-	return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n",
+	return fprintf(fp, " %Lx-%Lx %Lx %s\n",
 		       self->start, self->end, self->pgoff, self->dso->name);
 }
 
@@ -412,7 +412,7 @@ static int thread__fork(struct thread *self, struct thread *parent)
 	return 0;
 }
 
-static struct map *thread__find_map(struct thread *self, uint64_t ip)
+static struct map *thread__find_map(struct thread *self, __u64 ip)
 {
 	struct map *pos;
 
@@ -453,10 +453,10 @@ struct hist_entry {
 	struct map	 *map;
 	struct dso	 *dso;
 	struct symbol	 *sym;
-	uint64_t	 ip;
+	__u64		 ip;
 	char		 level;
 
-	uint64_t	 count;
+	__u64		 count;
 };
 
 /*
@@ -572,7 +572,7 @@ static struct sort_entry sort_dso = {
 static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	uint64_t ip_l, ip_r;
+	__u64 ip_l, ip_r;
 
 	if (left->sym == right->sym)
 		return 0;
@@ -684,7 +684,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 }
 
 static size_t
-hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
+hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples)
 {
 	struct sort_entry *se;
 	size_t ret;
@@ -708,7 +708,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 		ret = color_fprintf(fp, color, "   %6.2f%%",
 				(self->count * 100.0) / total_samples);
 	} else
-		ret = fprintf(fp, "%12d ", self->count);
+		ret = fprintf(fp, "%12Ld ", self->count);
 
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
 		fprintf(fp, "  ");
@@ -726,7 +726,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples)
 
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, uint64_t ip, char level, uint64_t count)
+		struct symbol *sym, __u64 ip, char level, __u64 count)
 {
 	struct rb_node **p = &hist.rb_node;
 	struct rb_node *parent = NULL;
@@ -873,7 +873,7 @@ static void output__resort(void)
 	}
 }
 
-static size_t output__fprintf(FILE *fp, uint64_t total_samples)
+static size_t output__fprintf(FILE *fp, __u64 total_samples)
 {
 	struct hist_entry *pos;
 	struct sort_entry *se;
@@ -941,8 +941,8 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	int show = 0;
 	struct dso *dso = NULL;
 	struct thread *thread = threads__findnew(event->ip.pid);
-	uint64_t ip = event->ip.ip;
-	uint64_t period = 1;
+	__u64 ip = event->ip.ip;
+	__u64 period = 1;
 	struct map *map = NULL;
 
 	if (event->header.type & PERF_SAMPLE_PERIOD)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8ba2480..309dbc7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -79,8 +79,8 @@ static int			dump_symtab;
  * Symbols
  */
 
-static uint64_t			min_ip;
-static uint64_t			max_ip = -1ll;
+static __u64			min_ip;
+static __u64			max_ip = -1ll;
 
 struct sym_entry {
 	struct rb_node		rb_node;
@@ -372,7 +372,7 @@ out_delete_dso:
 /*
  * Binary search in the histogram table and record the hit:
  */
-static void record_ip(uint64_t ip, int counter)
+static void record_ip(__u64 ip, int counter)
 {
 	struct symbol *sym = dso__find_symbol(kernel_dso, ip);
 
@@ -392,7 +392,7 @@ static void record_ip(uint64_t ip, int counter)
 	samples--;
 }
 
-static void process_event(uint64_t ip, int counter)
+static void process_event(__u64 ip, int counter)
 {
 	samples++;
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 32dd47d..49a55f8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -9,9 +9,9 @@
 
 const char *sym_hist_filter;
 
-static struct symbol *symbol__new(uint64_t start, uint64_t len,
+static struct symbol *symbol__new(__u64 start, __u64 len,
 				  const char *name, unsigned int priv_size,
-				  uint64_t obj_start, int verbose)
+				  __u64 obj_start, int verbose)
 {
 	size_t namelen = strlen(name) + 1;
 	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
@@ -89,7 +89,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
 {
 	struct rb_node **p = &self->syms.rb_node;
 	struct rb_node *parent = NULL;
-	const uint64_t ip = sym->start;
+	const __u64 ip = sym->start;
 	struct symbol *s;
 
 	while (*p != NULL) {
@@ -104,7 +104,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
 	rb_insert_color(&sym->rb_node, &self->syms);
 }
 
-struct symbol *dso__find_symbol(struct dso *self, uint64_t ip)
+struct symbol *dso__find_symbol(struct dso *self, __u64 ip)
 {
 	struct rb_node *n;
 
@@ -523,7 +523,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 
 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
 		struct symbol *f;
-		uint64_t obj_start;
+		__u64 obj_start;
 
 		if (!elf_sym__is_function(&sym))
 			continue;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 4839d68..0d1292b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -19,7 +19,7 @@ struct dso {
 	struct list_head node;
 	struct rb_root	 syms;
 	unsigned int	 sym_priv_size;
-	struct symbol    *(*find_symbol)(struct dso *, uint64_t ip);
+	struct symbol    *(*find_symbol)(struct dso *, __u64 ip);
 	char		 name[0];
 };
 
@@ -35,7 +35,7 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym)
 	return ((void *)sym) - self->sym_priv_size;
 }
 
-struct symbol *dso__find_symbol(struct dso *self, uint64_t ip);
+struct symbol *dso__find_symbol(struct dso *self, __u64 ip);
 
 int dso__load_kernel(struct dso *self, const char *vmlinux,
 		     symbol_filter_t filter, int verbose);
-- 
cgit v0.10.2


From 0b4dcea579a1b6f4d249d61f5bc8adeaa7c895d8 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 11 Jun 2009 11:13:35 -0400
Subject: Btrfs: fix oops when btrfs_inherit_iflags called with a NULL dir

This happens during subvol creation.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 926332a..eff18f5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -112,7 +112,12 @@ void btrfs_update_iflags(struct inode *inode)
  */
 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 {
-	unsigned int flags = BTRFS_I(dir)->flags;
+	unsigned int flags;
+
+	if (!dir)
+		return;
+
+	flags = BTRFS_I(dir)->flags;
 
 	if (S_ISREG(inode->i_mode))
 		flags &= ~BTRFS_INODE_DIRSYNC;
-- 
cgit v0.10.2


From b263c2c8bf13c273485bd99dbbeba79c844409dd Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 11 Jun 2009 11:24:47 -0400
Subject: Btrfs: fix extent_buffer leak during tree log replay

During tree log replay, we read in the tree log roots,
process them and then free them.  A recent change
takes an extra reference on the root node of the tree
when the root is read in, and stores that reference
in root->commit_root.

This reference was not being freed, leaving us with
one buffer pinned in ram for each subvol with
a tree log root after a crash.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2b41fc0..c139222 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3019,6 +3019,7 @@ again:
 		key.offset = found_key.offset - 1;
 		wc.replay_dest->log_root = NULL;
 		free_extent_buffer(log->node);
+		free_extent_buffer(log->commit_root);
 		kfree(log);
 
 		if (found_key.offset == 0)
-- 
cgit v0.10.2


From 51cdd9289d2e0d83eb32ed6d7a42596b02bf924e Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 11 Jun 2009 23:25:09 +0800
Subject: [ARM] pxa/sharpsl_pm: zaurus needs generic pxa suspend/resume
 routines

For suspend/resume to work, spitz needs pxa_pm_suspend/resume to be
called. Otherwise PSPR is not set properly, and system will die during
resume.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>

diff --git a/arch/arm/mach-pxa/include/mach/pm.h b/arch/arm/mach-pxa/include/mach/pm.h
index a6eeef8..fd8360c 100644
--- a/arch/arm/mach-pxa/include/mach/pm.h
+++ b/arch/arm/mach-pxa/include/mach/pm.h
@@ -27,6 +27,8 @@ extern void pxa27x_cpu_suspend(unsigned int);
 extern void pxa_cpu_resume(void);
 
 extern int pxa_pm_enter(suspend_state_t state);
+extern int pxa_pm_prepare(void);
+extern void pxa_pm_finish(void);
 
 /* NOTE: this is for PM debugging on Lubbock,  it's really a big
  * ugly, but let's keep the crap minimum here, instead of direct
diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c
index 884b174..7693355 100644
--- a/arch/arm/mach-pxa/pm.c
+++ b/arch/arm/mach-pxa/pm.c
@@ -79,7 +79,7 @@ static int pxa_pm_valid(suspend_state_t state)
 	return -EINVAL;
 }
 
-static int pxa_pm_prepare(void)
+int pxa_pm_prepare(void)
 {
 	int ret = 0;
 
@@ -89,7 +89,7 @@ static int pxa_pm_prepare(void)
 	return ret;
 }
 
-static void pxa_pm_finish(void)
+void pxa_pm_finish(void)
 {
 	if (pxa_cpu_pm_fns && pxa_cpu_pm_fns->finish)
 		pxa_cpu_pm_fns->finish();
diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index dfac7b9..2546c06 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -892,6 +892,8 @@ static void sharpsl_apm_get_power_status(struct apm_power_info *info)
 
 #ifdef CONFIG_PM
 static struct platform_suspend_ops sharpsl_pm_ops = {
+	.prepare	= pxa_pm_prepare,
+	.finish		= pxa_pm_finish,
 	.enter		= corgi_pxa_pm_enter,
 	.valid		= suspend_valid_only_mem,
 };
-- 
cgit v0.10.2


From ace08c3c4403140e5ce82116c8f2acb38f58f61d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:20:38 +0100
Subject: tty:cyclades, load firmware even on Ze

Ze needs firmware to be loaded as well as Zo. Move cyz_load_fw one
level upper to achieve that.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 1fdb9f6..bde2458 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5043,6 +5043,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 			nchan = ZE_V1_NPORTS;
 		} else {
 			card_name = "Cyclades-8Zo";
+			nchan = 8;
 
 #ifdef CY_PCI_DEBUG
 			if (mailbox == ZO_V1) {
@@ -5065,15 +5066,11 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 			 */
 			if ((mailbox == ZO_V1) || (mailbox == ZO_V2))
 				cy_writel(addr2 + ID_ADDRESS, 0L);
-
-			retval = cyz_load_fw(pdev, addr2, addr0, irq);
-			if (retval)
-				goto err_unmap;
-			/* This must be a Cyclades-8Zo/PCI.  The extendable
-			   version will have a different device_id and will
-			   be allocated its maximum number of ports. */
-			nchan = 8;
 		}
+
+		retval = cyz_load_fw(pdev, addr2, addr0, irq);
+		if (retval)
+			goto err_unmap;
 	}
 
 	if ((cy_next_channel + nchan) > NR_PORTS) {
-- 
cgit v0.10.2


From eca1b592d600bb2b7d24b66e4106de1e751ae510 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:21:30 +0100
Subject: tty: cyclades, don't kill FW

Don't reset the PLX chip after FW load, which effectively kills
the FW, so that user had to boot manually.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index bde2458..6d52439 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -4932,8 +4932,6 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 	cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) |
 			0x00030800UL);
 
-	plx_init(pdev, irq, ctl_addr);
-
 	return 0;
 err_rel:
 	release_firmware(fw);
-- 
cgit v0.10.2


From 65a29f60e121ae5116ac1736b50a237bf2db3225 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:22:27 +0100
Subject: tty: cyclades, remove spurious check in ISR

No need to check if dev_id is NULL, it never is.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 6d52439..ccf68a9 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -1737,14 +1737,6 @@ static irqreturn_t cyz_interrupt(int irq, void *dev_id)
 {
 	struct cyclades_card *cinfo = dev_id;
 
-	if (unlikely(cinfo == NULL)) {
-#ifdef CY_DEBUG_INTERRUPTS
-		printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n",
-									irq);
-#endif
-		return IRQ_NONE;	/* spurious interrupt */
-	}
-
 	if (unlikely(!ISZLOADED(*cinfo))) {
 #ifdef CY_DEBUG_INTERRUPTS
 		printk(KERN_DEBUG "cyz_interrupt: board not yet loaded "
-- 
cgit v0.10.2


From fcc8ac1825d3d0fb81f73bc1a80ebc863168bb56 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 11 Jun 2009 12:24:17 +0100
Subject: tty: Add carrier processing on close to the tty_port core

Some drivers implement this internally, others miss it out. Push the
behaviour into the core code as that way everyone will do it consistently.

Update the dtr rts method to raise or lower depending upon flags. Having a
single method in this style fits most of the implementations more cleanly than
two funtions.

We need this in place before we tackle the USB side

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index af7c13c..8797b77 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port)
 	return 0;
 }
 
-static void epca_raise_dtr_rts(struct tty_port *port)
+static void epca_dtr_rts(struct tty_port *port, int onoff)
 {
 }
 
@@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = {
 
 static const struct tty_port_operations epca_port_ops = {
 	.carrier_raised = epca_carrier_raised,
-	.raise_dtr_rts = epca_raise_dtr_rts,
+	.dtr_rts = epca_dtr_rts,
 };
 
 static int info_open(struct tty_struct *tty, struct file *filp)
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a59eac5..4d745a8 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port)
 
 /* card->lock MUST NOT be held */
 
-static void isicom_raise_dtr_rts(struct tty_port *port)
+static void isicom_dtr_rts(struct tty_port *port, int on)
 {
 	struct isi_port *ip = container_of(port, struct isi_port, port);
 	struct isi_board *card = ip->card;
@@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port)
 	if (!lock_card(card))
 		return;
 
-	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
-	outw(0x0f04, base);
-	InterruptTheCard(base);
-	ip->status |= (ISI_DTR | ISI_RTS);
+	if (on) {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0f04, base);
+		InterruptTheCard(base);
+		ip->status |= (ISI_DTR | ISI_RTS);
+	} else {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0C04, base);
+		InterruptTheCard(base);
+		ip->status &= ~(ISI_DTR | ISI_RTS);
+	}
 	unlock_card(card);
 }
 
@@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = {
 
 static const struct tty_port_operations isicom_port_ops = {
 	.carrier_raised		= isicom_carrier_raised,
-	.raise_dtr_rts		= isicom_raise_dtr_rts,
+	.dtr_rts		= isicom_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fff19f7..e18800c 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stli_raise_dtr_rts(struct tty_port *port)
+static void stli_dtr_rts(struct tty_port *port, int on)
 {
 	struct stliport *portp = container_of(port, struct stliport, port);
 	struct stlibrd *brdp = stli_brds[portp->brdnr];
-	stli_mkasysigs(&portp->asig, 1, 1);
+	stli_mkasysigs(&portp->asig, on, on);
 	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
 		sizeof(asysigs_t), 0) < 0)
-			printk(KERN_WARNING "istallion: dtr raise failed.\n");
+			printk(KERN_WARNING "istallion: dtr set failed.\n");
 }
 
 
@@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = {
 
 static const struct tty_port_operations stli_port_ops = {
 	.carrier_raised = stli_carrier_raised,
-	.raise_dtr_rts = stli_raise_dtr_rts,
+	.dtr_rts = stli_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 13f8871..9533f43 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port)
 	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
-static void mxser_raise_dtr_rts(struct tty_port *port)
+static void mxser_dtr_rts(struct tty_port *port, int on)
 {
 	struct mxser_port *mp = container_of(port, struct mxser_port, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&mp->slock, flags);
-	outb(inb(mp->ioaddr + UART_MCR) |
-		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	if (on)
+		outb(inb(mp->ioaddr + UART_MCR) |
+			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	else
+		outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS),
+			mp->ioaddr + UART_MCR);
 	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
@@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = {
 
 struct tty_port_operations mxser_port_ops = {
 	.carrier_raised = mxser_carrier_raised,
-	.raise_dtr_rts = mxser_raise_dtr_rts,
+	.dtr_rts = mxser_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 19d79fc..77b3648 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info);
 static void tx_timeout(unsigned long context);
 
 static int carrier_raised(struct tty_port *port);
-static void raise_dtr_rts(struct tty_port *port);
+static void dtr_rts(struct tty_port *port, int onoff);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 
 static const struct tty_port_operations mgslpc_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts
+	.dtr_rts = dtr_rts
 };
 
 static int mgslpc_probe(struct pcmcia_device *link)
@@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port)
 	return 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int onoff)
 {
 	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (onoff)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR;
 	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f59fc5c..7399188 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port)
 	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct r_port *info = container_of(port, struct r_port, port);
-	sSetDTR(&info->channel);
-	sSetRTS(&info->channel);
+	if (on) {
+		sSetDTR(&info->channel);
+		sSetRTS(&info->channel);
+	} else {
+		sClrDTR(&info->channel);
+		sClrRTS(&info->channel);
+	}
 }
 
 /*
@@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = {
 
 static const struct tty_port_operations rocket_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 2ad813a..53e504f 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stl_raise_dtr_rts(struct tty_port *port)
+static void stl_dtr_rts(struct tty_port *port, int on)
 {
 	struct stlport *portp = container_of(port, struct stlport, port);
 	/* Takes brd_lock internally */
-	stl_setsignals(portp, 1, 1);
+	stl_setsignals(portp, on, on);
 }
 
 /*****************************************************************************/
@@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = {
 
 static const struct tty_port_operations stl_port_ops = {
 	.carrier_raised = stl_carrier_raised,
-	.raise_dtr_rts = stl_raise_dtr_rts,
+	.dtr_rts = stl_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index afd0b26..afded3a 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->irq_spinlock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	usc_set_serial_signals(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 }
@@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 static const struct tty_port_operations mgsl_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 5e25649..67986ea 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3099,13 +3099,16 @@ static int carrier_raised(struct tty_port *port)
 	return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	unsigned long flags;
 	struct slgt_info *info = container_of(port, struct slgt_info, port);
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3419,7 +3422,7 @@ static void add_device(struct slgt_info *info)
 
 static const struct tty_port_operations slgt_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 26de60e..6f727e3 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info)
 
 static const struct tty_port_operations port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9b8004c..926d4a5 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port)
 EXPORT_SYMBOL(tty_port_carrier_raised);
 
 /**
- *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	tty_port_raise_dtr_rts	-	Raise DTR/RTS
  *	@port: tty port
  *
  *	Wrapper for the DTR/RTS raise logic. For the moment this is used
@@ -147,12 +147,28 @@ EXPORT_SYMBOL(tty_port_carrier_raised);
 
 void tty_port_raise_dtr_rts(struct tty_port *port)
 {
-	if (port->ops->raise_dtr_rts)
-		port->ops->raise_dtr_rts(port);
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 1);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
 
 /**
+ *	tty_port_lower_dtr_rts	-	Lower DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_lower_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 0);
+}
+EXPORT_SYMBOL(tty_port_lower_dtr_rts);
+
+/**
  *	tty_port_block_til_ready	-	Waiting logic for tty open
  *	@port: the tty port being opened
  *	@tty: the tty device being bound
@@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts);
  *		- port flags and counts
  *
  *	The passed tty_port must implement the carrier_raised method if it can
- *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	do carrier detect and the dtr_rts method if it supports software
  *	management of these lines. Note that the dtr/rts raise is done each
  *	iteration as a hangup may have previously dropped them while we wait.
  */
@@ -302,6 +318,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
 
 	tty_ldisc_flush(tty);
 
+	if (tty->termios->c_cflag & HUPCL)
+		tty_port_lower_dtr_rts(port);
+
 	spin_lock_irqsave(&port->lock, flags);
 	tty->closing = 0;
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fc39db9..9869436 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,7 +185,7 @@ struct tty_port;
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
-	void (*raise_dtr_rts)(struct tty_port *port);
+	void (*dtr_rts)(struct tty_port *port, int raise);
 };
 	
 struct tty_port {
@@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_lower_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-- 
cgit v0.10.2


From 1ec739be75a6cb961a46ba0b1982d0edb7f27558 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 11 Jun 2009 12:25:25 +0100
Subject: tty: Implement a drain delay in the tty port

We need this for devices that cannot flush and wait, but which do not order
data and modem events. Without it we will hang up before all the data
clears the hardware. Needed for the USB changes.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 926d4a5..4d08b6d 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -308,6 +308,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 	if (port->flags & ASYNC_INITIALIZED &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
+	if (port->drain_delay) {
+		unsigned int bps = tty_get_baud_rate(tty);
+		long timeout;
+
+		if (bps > 1200)
+			timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps,
+								HZ / 10);
+		else
+			timeout = 2 * HZ;
+		schedule_timeout_interruptible(timeout);
+	}
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 9869436..bed5a3d 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -201,6 +201,9 @@ struct tty_port {
 	unsigned char		*xmit_buf;	/* Optional buffer */
 	int			close_delay;	/* Close port delay */
 	int			closing_wait;	/* Delay for output */
+	int			drain_delay;	/* Set to zero if no pure time
+						   based drain is needed else
+						   set to size of fifo */
 };
 
 /*
-- 
cgit v0.10.2


From 335f8514f200e63d689113d29cb7253a5c282967 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 11 Jun 2009 12:26:29 +0100
Subject: tty: Bring the usb tty port structure into more use

This allows us to clean stuff up, but is probably also going to cause
some app breakage with buggy apps as we now implement proper POSIX behaviour
for USB ports matching all the other ports. This does also mean other apps
that break on USB will now work properly.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index b7eacad..2bfd6dd 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -93,8 +93,7 @@ static int  belkin_sa_startup(struct usb_serial *serial);
 static void belkin_sa_shutdown(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios * old);
@@ -244,8 +243,7 @@ exit:
 } /* belkin_sa_open */
 
 
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void belkin_sa_close(struct usb_serial_port *port)
 {
 	dbg("%s port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index ab4cc27..2830766 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -262,32 +262,40 @@ error:	kfree(priv);
 	return r;
 }
 
-static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int ch341_carrier_raised(struct usb_serial_port *port)
+{
+	struct ch341_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & CH341_BIT_DCD)
+		return 1;
+	return 0;
+}
+
+static void ch341_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct ch341_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
 
 	dbg("%s - port %d", __func__, port->number);
+	/* drop DTR and RTS */
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR;
+	else
+		priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	ch341_set_handshake(port->serial->dev, priv->line_control);
+	wake_up_interruptible(&priv->delta_msr_wait);
+}
+
+static void ch341_close(struct usb_serial_port *port)
+{
+	dbg("%s - port %d", __func__, port->number);
 
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			ch341_set_handshake(port->serial->dev, 0);
-		}
-	}
-	wake_up_interruptible(&priv->delta_msr_wait);
 }
 
 
@@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
 	dbg("ch341_open()");
 
 	priv->baud_rate = DEFAULT_BAUD_RATE;
-	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
 	r = ch341_configure(serial->dev, priv);
 	if (r)
@@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
 	if (r) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, r);
-		ch341_close(tty, port, NULL);
+		ch341_close(port);
 		return -EPROTO;
 	}
 
@@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 
 	dbg("ch341_set_termios()");
 
-	if (!tty || !tty->termios)
-		return;
-
 	baud_rate = tty_get_baud_rate(tty);
 
 	priv->baud_rate = baud_rate;
@@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = {
 	.usb_driver        = &ch341_driver,
 	.num_ports         = 1,
 	.open              = ch341_open,
+	.dtr_rts	   = ch341_dtr_rts,
+	.carrier_raised	   = ch341_carrier_raised,
 	.close             = ch341_close,
 	.ioctl             = ch341_ioctl,
 	.set_termios       = ch341_set_termios,
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 19e2404..247b61b 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options)
 			kfree(tty);
 		}
 	}
-
+	/* So we know not to kill the hardware on a hangup on this
+	   port. We have also bumped the use count by one so it won't go
+	   idle */
 	port->console = 1;
 	retval = 0;
 
@@ -182,7 +184,7 @@ free_tty:
 	kfree(tty);
 reset_open_count:
 	port->port.count = 0;
-goto out;
+	goto out;
 }
 
 static void usb_console_write(struct console *co,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index e8d5133..d9f586d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -36,8 +36,7 @@
 static int cp2101_open(struct tty_struct *, struct usb_serial_port *,
 							struct file *);
 static void cp2101_cleanup(struct usb_serial_port *);
-static void cp2101_close(struct tty_struct *, struct usb_serial_port *,
-							struct file*);
+static void cp2101_close(struct usb_serial_port *);
 static void cp2101_get_termios(struct tty_struct *,
 	struct usb_serial_port *port);
 static void cp2101_get_termios_port(struct usb_serial_port *port,
@@ -398,8 +397,7 @@ static void cp2101_cleanup(struct usb_serial_port *port)
 	}
 }
 
-static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp)
+static void cp2101_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index dd501bb..933ba91 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial);
 static void cyberjack_shutdown(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static int cyberjack_write_room(struct tty_struct *tty);
@@ -185,8 +184,7 @@ static int  cyberjack_open(struct tty_struct *tty,
 	return result;
 }
 
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cyberjack_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index e568710..669f938 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -174,8 +174,8 @@ static int  cypress_ca42v2_startup(struct usb_serial *serial);
 static void cypress_shutdown(struct usb_serial *serial);
 static int  cypress_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cypress_close(struct usb_serial_port *port);
+static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static void cypress_send(struct usb_serial_port *port);
@@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = {
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = {
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = {
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty,
 	priv->rx_flags = 0;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* raise both lines and set termios */
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->line_control = CONTROL_DTR | CONTROL_RTS;
-	priv->cmd_ctrl = 1;
-	spin_unlock_irqrestore(&priv->lock, flags);
+	/* Set termios */
 	result = cypress_write(tty, port, NULL, 0);
 
 	if (result) {
@@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty,
 							__func__, result);
 		cypress_set_dead(port);
 	}
-
+	port->port.drain_delay = 256;
 	return result;
 } /* cypress_open */
 
+static void cypress_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct cypress_private *priv = usb_get_serial_port_data(port);
+	/* drop dtr and rts */
+	priv = usb_get_serial_port_data(port);
+	spin_lock_irq(&priv->lock);
+	if (on == 0)
+		priv->line_control = 0;
+	else 
+		priv->line_control = CONTROL_DTR | CONTROL_RTS;
+	priv->cmd_ctrl = 1;
+	spin_unlock_irq(&priv->lock);
+	cypress_write(NULL, port, NULL, 0);
+}
 
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cypress_close(struct usb_serial_port *port)
 {
 	struct cypress_private *priv = usb_get_serial_port_data(port);
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from buffer */
-	spin_lock_irq(&priv->lock);
-	timeout = CYPRESS_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (cypress_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		/* without mutex, allowed due to harmless failure mode */
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irq(&priv->lock);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irq(&priv->lock);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	/* clear out any remaining data in the buffer */
-	cypress_buf_clear(priv->buf);
-	spin_unlock_irq(&priv->lock);
-
 	/* writing is potentially harmful, lock must be taken */
 	mutex_lock(&port->serial->disc_mutex);
 	if (port->serial->disconnected) {
 		mutex_unlock(&port->serial->disc_mutex);
 		return;
 	}
-	/* wait for characters to drain from device */
-	if (tty) {
-		bps = tty_get_baud_rate(tty);
-		if (bps > 1200)
-			timeout = max((HZ * 2560) / bps, HZ / 10);
-		else
-			timeout = 2 * HZ;
-		schedule_timeout_interruptible(timeout);
-	}
-
+	cypress_buf_clear(priv->buf);
 	dbg("%s - stopping urbs", __func__);
 	usb_kill_urb(port->interrupt_in_urb);
 	usb_kill_urb(port->interrupt_out_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop dtr and rts */
-			priv = usb_get_serial_port_data(port);
-			spin_lock_irq(&priv->lock);
-			priv->line_control = 0;
-			priv->cmd_ctrl = 1;
-			spin_unlock_irq(&priv->lock);
-			cypress_write(tty, port, NULL, 0);
-		}
-	}
 
 	if (stats)
 		dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n",
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 38ba4ea..30f5140 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -422,7 +422,6 @@ struct digi_port {
 	int dp_throttled;
 	int dp_throttle_restart;
 	wait_queue_head_t dp_flush_wait;
-	int dp_in_close;			/* close in progress */
 	wait_queue_head_t dp_close_wait;	/* wait queue for close */
 	struct work_struct dp_wakeup_work;
 	struct usb_serial_port *dp_port;
@@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty);
 static int digi_chars_in_buffer(struct tty_struct *tty);
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	struct file *filp);
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-	struct file *filp);
+static void digi_close(struct usb_serial_port *port);
+static int digi_carrier_raised(struct usb_serial_port *port);
+static void digi_dtr_rts(struct usb_serial_port *port, int on);
 static int digi_startup_device(struct usb_serial *serial);
 static int digi_startup(struct usb_serial *serial);
 static void digi_shutdown(struct usb_serial *serial);
@@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = {
 	.num_ports =			3,
 	.open =				digi_open,
 	.close =			digi_close,
+	.dtr_rts =			digi_dtr_rts,
+	.carrier_raised =		digi_carrier_raised,
 	.write =			digi_write,
 	.write_room =			digi_write_room,
 	.write_bulk_callback = 		digi_write_bulk_callback,
@@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty)
 
 }
 
+static void digi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	/* Adjust DTR and RTS */
+	digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1);
+}
+
+static int digi_carrier_raised(struct usb_serial_port *port)
+{
+	struct digi_port *priv = usb_get_serial_port_data(port);
+	if (priv->dp_modem_signals & TIOCM_CD)
+		return 1;
+	return 0;
+}
 
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
@@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	unsigned char buf[32];
 	struct digi_port *priv = usb_get_serial_port_data(port);
 	struct ktermios not_termios;
-	unsigned long flags = 0;
 
 	dbg("digi_open: TOP: port=%d, open_count=%d",
 		priv->dp_port_num, port->port.count);
@@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	if (digi_startup_device(port->serial) != 0)
 		return -ENXIO;
 
-	spin_lock_irqsave(&priv->dp_port_lock, flags);
-
-	/* don't wait on a close in progress for non-blocking opens */
-	if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) {
-		spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-		return -EAGAIN;
-	}
-
-	/* wait for a close in progress to finish */
-	while (priv->dp_in_close) {
-		cond_wait_interruptible_timeout_irqrestore(
-			&priv->dp_close_wait, DIGI_RETRY_TIMEOUT,
-			&priv->dp_port_lock, flags);
-		if (signal_pending(current))
-			return -EINTR;
-		spin_lock_irqsave(&priv->dp_port_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-
 	/* read modem signals automatically whenever they change */
 	buf[0] = DIGI_CMD_READ_INPUT_SIGNALS;
 	buf[1] = priv->dp_port_num;
@@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 		not_termios.c_iflag = ~tty->termios->c_iflag;
 		digi_set_termios(tty, port, &not_termios);
 	}
-
-	/* set DTR and RTS */
-	digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1);
-
 	return 0;
 }
 
 
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void digi_close(struct usb_serial_port *port)
 {
 	DEFINE_WAIT(wait);
 	int ret;
@@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
 	if (port->serial->disconnected)
 		goto exit;
 
-	/* do cleanup only after final close on this port */
-	spin_lock_irq(&priv->dp_port_lock);
-	priv->dp_in_close = 1;
-	spin_unlock_irq(&priv->dp_port_lock);
-
-	/* tell line discipline to process only XON/XOFF */
-	tty->closing = 1;
-
-	/* wait for output to drain */
-	if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-		tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT);
-
-	/* flush driver and line discipline buffers */
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	if (port->serial->dev) {
-		/* wait for transmit idle */
-		if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-			digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
-		/* drop DTR and RTS */
-		digi_set_modem_signals(port, 0, 0);
+		/* FIXME: Transmit idle belongs in the wait_unti_sent path */
+		digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
 
 		/* disable input flow control */
 		buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL;
@@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
 		/* shutdown any outstanding bulk writes */
 		usb_kill_urb(port->write_urb);
 	}
-	tty->closing = 0;
 exit:
 	spin_lock_irq(&priv->dp_port_lock);
 	priv->dp_write_urb_in_use = 0;
-	priv->dp_in_close = 0;
 	wake_up_interruptible(&priv->dp_close_wait);
 	spin_unlock_irq(&priv->dp_port_lock);
 	mutex_unlock(&port->serial->disc_mutex);
@@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial)
 		priv->dp_throttled = 0;
 		priv->dp_throttle_restart = 0;
 		init_waitqueue_head(&priv->dp_flush_wait);
-		priv->dp_in_close = 0;
 		init_waitqueue_head(&priv->dp_close_wait);
 		INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
 		priv->dp_port = serial->port[i];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index c709ec4..2b141cc 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -81,8 +81,7 @@ static int debug;
 /* function prototypes for an empeg-car player */
 static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 						struct file *filp);
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-						struct file *filp);
+static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
 						const unsigned char *buf,
 						int count);
@@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 }
 
 
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void empeg_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d9fcdae..d9d8711 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -719,8 +719,8 @@ static int  ftdi_sio_port_probe(struct usb_serial_port *port);
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
 static int  ftdi_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ftdi_close(struct usb_serial_port *port);
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static int  ftdi_write_room(struct tty_struct *tty);
@@ -758,6 +758,7 @@ static struct usb_serial_driver ftdi_sio_device = {
 	.port_remove =		ftdi_sio_port_remove,
 	.open =			ftdi_open,
 	.close =		ftdi_close,
+	.dtr_rts =		ftdi_dtr_rts,
 	.throttle =		ftdi_throttle,
 	.unthrottle =		ftdi_unthrottle,
 	.write =		ftdi_write,
@@ -1558,6 +1559,30 @@ static int ftdi_open(struct tty_struct *tty,
 } /* ftdi_open */
 
 
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	char buf[1];
+
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* Disable flow control */
+		if (!on && usb_control_msg(port->serial->dev,
+			    usb_sndctrlpipe(port->serial->dev, 0),
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
+			    0, priv->interface, buf, 0,
+			    WDR_TIMEOUT) < 0) {
+			    dev_err(&port->dev, "error from flowcontrol urb\n");
+		}
+		/* drop RTS and DTR */
+		if (on)
+			set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+		else
+			clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
 /*
  * usbserial:__serial_close  only calls ftdi_close if the point is open
@@ -1567,31 +1592,12 @@ static int ftdi_open(struct tty_struct *tty,
  *
  */
 
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ftdi_close(struct usb_serial_port *port)
 { /* ftdi_close */
-	unsigned int c_cflag = tty->termios->c_cflag;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	char buf[1];
 
 	dbg("%s", __func__);
 
-	mutex_lock(&port->serial->disc_mutex);
-	if (c_cflag & HUPCL && !port->serial->disconnected) {
-		/* Disable flow control */
-		if (usb_control_msg(port->serial->dev,
-				    usb_sndctrlpipe(port->serial->dev, 0),
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-				    0, priv->interface, buf, 0,
-				    WDR_TIMEOUT) < 0) {
-			dev_err(&port->dev, "error from flowcontrol urb\n");
-		}
-
-		/* drop RTS and DTR */
-		clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-	} /* Note change no line if hupcl is off */
-	mutex_unlock(&port->serial->disc_mutex);
 
 	/* cancel any scheduled reading */
 	cancel_delayed_work_sync(&priv->rx_work);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 586d30f..ee25a3f 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty,
 }
 
 
-static void garmin_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void garmin_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 4cec990..be82ea9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial)
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
 
-void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	generic_cleanup(port);
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index fb4a73d..53ef599 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb);
 /* function prototypes for the usbserial callbacks */
 static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void edge_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int edge_write_room(struct tty_struct *tty);
@@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty,
 
 	if (!edge_port->txfifo.fifo) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty,
 
 	if (!edge_port->write_urb) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port)
  * edge_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 513b25e..eabf20e 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2009,8 +2009,7 @@ release_es_lock:
 	return status;
 }
 
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index cd62825..c610a99 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -76,8 +76,7 @@ static int initial_wait;
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
 static void ipaq_shutdown(struct usb_serial *serial);
@@ -714,8 +713,7 @@ error:
 }
 
 
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipaq_close(struct usb_serial_port *port)
 {
 	struct ipaq_private	*priv = usb_get_serial_port_data(port);
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index da2a2b4..29ad038 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty,
 	return 0;
 }
 
-static void ipw_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipw_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct usb_device *dev = port->serial->dev;
 	int result;
 
-	if (tty_hung_up_p(filp)) {
-		dbg("%s: tty_hung_up_p ...", __func__);
-		return;
-	}
-
 	/*--1: drop the dtr */
 	dbg("%s:dropping dtr", __func__);
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN,
 			 USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRDTR,
+			 on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
 			 0,
 			 NULL,
 			 0,
@@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty,
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
 			 		USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRRTS,
+			 on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
 			 0,
 			 NULL,
 			 0,
@@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty,
 	if (result < 0)
 		dev_err(&port->dev,
 				"dropping rts failed (error = %d)\n", result);
+}
 
+static void ipw_close(struct usb_serial_port *port)
+{
+	struct usb_device *dev = port->serial->dev;
+	int result;
 
 	/*--3: purge */
 	dbg("%s:sending purge", __func__);
@@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = {
 	.num_ports =		1,
 	.open =			ipw_open,
 	.close =		ipw_close,
+	.dtr_rts =		ipw_dtr_rts,
 	.port_probe = 		ipw_probe,
 	.port_remove =		ipw_disconnect,
 	.write =		ipw_write,
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 4e2cda9..66009b6 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -88,8 +88,7 @@ static int xbof = -1;
 static int  ir_startup (struct usb_serial *serial);
 static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filep);
-static void ir_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filep);
+static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static void ir_write_bulk_callback (struct urb *urb);
@@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty,
 	return result;
 }
 
-static void ir_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file * filp)
+static void ir_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 4473d44..bb572ce 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb);
 struct iuu_private {
 	spinlock_t lock;	/* store irq state */
 	wait_queue_head_t delta_msr_wait;
-	u8 line_control;
 	u8 line_status;
 	u8 termios_initialized;
 	int tiostatus;		/* store IUART SIGNAL for tiocmget call */
@@ -946,19 +945,10 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud,
 	return status;
 }
 
-static int set_control_lines(struct usb_device *dev, u8 value)
-{
-	return 0;
-}
-
-static void iuu_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void iuu_close(struct usb_serial_port *port)
 {
 	/* iuu_led (port,255,0,0,0); */
 	struct usb_serial *serial;
-	struct iuu_private *priv = usb_get_serial_port_data(port);
-	unsigned long flags;
-	unsigned int c_cflag;
 
 	serial = port->serial;
 	if (!serial)
@@ -968,17 +958,6 @@ static void iuu_close(struct tty_struct *tty,
 
 	iuu_uart_off(port);
 	if (serial->dev) {
-		if (tty) {
-			c_cflag = tty->termios->c_cflag;
-			if (c_cflag & HUPCL) {
-				/* drop DTR and RTS */
-				priv = usb_get_serial_port_data(port);
-				spin_lock_irqsave(&priv->lock, flags);
-				priv->line_control = 0;
-				spin_unlock_irqrestore(&priv->lock, flags);
-				set_control_lines(port->serial->dev, 0);
-			}
-		}
 		/* free writebuf */
 		/* shutdown our urbs */
 		dbg("%s - shutting down urbs", __func__);
@@ -1154,7 +1133,7 @@ static int iuu_open(struct tty_struct *tty,
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		iuu_close(tty, port, NULL);
+		iuu_close(port);
 		return -EPROTO;
 	} else {
 		dbg("%s - rxcmd OK", __func__);
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 00daa8f..f1195a9 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb)
 		usb_kill_urb(urb);
 }
 
-static void keyspan_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
+
+	p_priv->rts_state = on;
+	p_priv->dtr_state = on;
+	keyspan_send_setup(port, 0);
+}
+
+static void keyspan_close(struct usb_serial_port *port)
 {
 	int			i;
 	struct usb_serial	*serial = port->serial;
@@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty,
 			stop_urb(p_priv->out_urbs[i]);
 		}
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* download the firmware to a pre-renumeration device */
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 38b4582..0d4569b 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -38,9 +38,8 @@
 static int  keyspan_open		(struct tty_struct *tty,
 					 struct usb_serial_port *port,
 					 struct file *filp);
-static void keyspan_close		(struct tty_struct *tty,
-					 struct usb_serial_port *port,
-					 struct file *filp);
+static void keyspan_close		(struct usb_serial_port *port);
+static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
 static void keyspan_shutdown		(struct usb_serial *serial);
 static int  keyspan_write_room		(struct tty_struct *tty);
@@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = {
 	.num_ports		= 1,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = {
 	.num_ports		= 2,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = {
 	.num_ports		= 4,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index bf1ae24..ab769db 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty)
 }
 
 
+static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct usb_serial *serial = port->serial;
+
+	if (serial->dev) {
+		if (on)
+			keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2));
+		else
+			keyspan_pda_set_modem_info(serial, 0);
+	}
+}
+
+static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char modembits;
+
+	/* If we can read the modem status and the DCD is low then
+	   carrier is not raised yet */
+	if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) {
+		if (!(modembits & (1>>6)))
+			return 0;
+	}
+	/* Carrier raised, or we failed (eg disconnected) so
+	   progress accordingly */
+	return 1;
+}
+
+
 static int keyspan_pda_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
 {
@@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty,
 	priv->tx_room = room;
 	priv->tx_throttled = room ? 0 : 1;
 
-	/* the normal serial device seems to always turn on DTR and RTS here,
-	   so do the same */
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2));
-	else
-		keyspan_pda_set_modem_info(serial, 0);
-
 	/*Start reading from the device*/
 	port->interrupt_in_urb->dev = serial->dev;
 	rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
@@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty,
 error:
 	return rc;
 }
-
-
-static void keyspan_pda_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_pda_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
 	if (serial->dev) {
-		/* the normal serial device seems to always shut
-		   off DTR and RTS now */
-		if (tty->termios->c_cflag & HUPCL)
-			keyspan_pda_set_modem_info(serial, 0);
-
 		/* shutdown our bulk reads and writes */
 		usb_kill_urb(port->write_urb);
 		usb_kill_urb(port->interrupt_in_urb);
@@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = {
 	.usb_driver = 		&keyspan_pda_driver,
 	.id_table =		id_table_std,
 	.num_ports =		1,
+	.dtr_rts =		keyspan_pda_dtr_rts,
+	.carrier_raised	=	keyspan_pda_carrier_raised,
 	.open =			keyspan_pda_open,
 	.close =		keyspan_pda_close,
 	.write =		keyspan_pda_write,
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fcd9082..fa817c6 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -76,8 +76,7 @@ static int  klsi_105_startup(struct usb_serial *serial);
 static void klsi_105_shutdown(struct usb_serial *serial);
 static int  klsi_105_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static void klsi_105_write_bulk_callback(struct urb *urb);
@@ -447,8 +446,7 @@ exit:
 } /* klsi_105_open */
 
 
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void klsi_105_close(struct usb_serial_port *port)
 {
 	struct klsi_105_private *priv = usb_get_serial_port_data(port);
 	int rc;
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index c148544..6b57049 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -72,8 +72,7 @@ static int  kobil_startup(struct usb_serial *serial);
 static void kobil_shutdown(struct usb_serial *serial);
 static int  kobil_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp);
+static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
 			 const unsigned char *buf, int count);
 static int  kobil_write_room(struct tty_struct *tty);
@@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial)
 
 	for (i = 0; i < serial->num_ports; ++i) {
 		while (serial->port[i]->port.count > 0)
-			kobil_close(NULL, serial->port[i], NULL);
+			kobil_close(serial->port[i]);
 		kfree(usb_get_serial_port_data(serial->port[i]));
 		usb_set_serial_port_data(serial->port[i], NULL);
 	}
@@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty,
 }
 
 
-static void kobil_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void kobil_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
+	/* FIXME: Add rts/dtr methods */
 	if (port->write_urb) {
 		usb_kill_urb(port->write_urb);
 		usb_free_urb(port->write_urb);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 82930a7..8737955 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -95,8 +95,8 @@ static int  mct_u232_startup(struct usb_serial *serial);
 static void mct_u232_shutdown(struct usb_serial *serial);
 static int  mct_u232_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void mct_u232_close(struct usb_serial_port *port);
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
 static void mct_u232_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
@@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = {
 	.num_ports =	     1,
 	.open =		     mct_u232_open,
 	.close =	     mct_u232_close,
+	.dtr_rts =	     mct_u232_dtr_rts,
 	.throttle =	     mct_u232_throttle,
 	.unthrottle =	     mct_u232_unthrottle,
 	.read_int_callback = mct_u232_read_int_callback,
@@ -496,29 +497,29 @@ error:
 	return retval;
 } /* mct_u232_open */
 
-
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on)
 {
-	unsigned int c_cflag;
 	unsigned int control_state;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
-	dbg("%s port %d", __func__, port->number);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		mutex_lock(&port->serial->disc_mutex);
-		if (c_cflag & HUPCL && !port->serial->disconnected) {
-			/* drop DTR and RTS */
-			spin_lock_irq(&priv->lock);
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* drop DTR and RTS */
+		spin_lock_irq(&priv->lock);
+		if (on)
+			priv->control_state |= TIOCM_DTR | TIOCM_RTS;
+		else
 			priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS);
-			control_state = priv->control_state;
-			spin_unlock_irq(&priv->lock);
-			mct_u232_set_modem_ctrl(port->serial, control_state);
-		}
-		mutex_unlock(&port->serial->disc_mutex);
+		control_state = priv->control_state;
+		spin_unlock_irq(&priv->lock);
+		mct_u232_set_modem_ctrl(port->serial, control_state);
 	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
+static void mct_u232_close(struct usb_serial_port *port)
+{
+	dbg("%s port %d", __func__, port->number);
 
 	if (port->serial->dev) {
 		/* shutdown our urbs */
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 24e3b5d..9e1a013 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty)
 	return chars;
 }
 
-static void mos7720_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7720_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7720_port;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 84fb1dc..10b78a3 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty)
 
 }
 
-/************************************************************************
- *
- * mos7840_block_until_tx_empty
- *
- *	This function will block the close until one of the following:
- *		1. TX count are 0
- *		2. The mos7840 has stopped
- *		3. A timeout of 3 seconds without activity has expired
- *
- ************************************************************************/
-static void mos7840_block_until_tx_empty(struct tty_struct *tty,
-				struct moschip_port *mos7840_port)
-{
-	int timeout = HZ / 10;
-	int wait = 30;
-	int count;
-
-	while (1) {
-
-		count = mos7840_chars_in_buffer(tty);
-
-		/* Check for Buffer status */
-		if (count <= 0)
-			return;
-
-		/* Block the thread for a while */
-		interruptible_sleep_on_timeout(&mos7840_port->wait_chase,
-					       timeout);
-
-		/* No activity.. count down section */
-		wait--;
-		if (wait == 0) {
-			dbg("%s - TIMEOUT", __func__);
-			return;
-		} else {
-			/* Reset timeout value back to seconds */
-			wait = 30;
-		}
-	}
-}
-
 /*****************************************************************************
  * mos7840_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
 
-static void mos7840_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7840_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7840_port;
@@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty,
 		}
 	}
 
-	if (serial->dev)
-		/* flush and block until tx is empty */
-		mos7840_block_until_tx_empty(tty, mos7840_port);
-
 	/* While closing port, shutdown all bulk read, write  *
 	 * and interrupt read if they exists                  */
 	if (serial->dev) {
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index bcdcbb8..f5f3751a8 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty,
 	return result;
 }
 
-static void navman_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void navman_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index df65397..1104617 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -66,8 +66,7 @@ static int debug;
 /* function prototypes */
 static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty,
 	return result;
 }
 
-static void omninet_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void omninet_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	usb_kill_urb(port->read_urb);
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index b500ad1..c20480a 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port,
 	return result;
 }
 
-static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void opticon_close(struct usb_serial_port *port)
 {
 	struct opticon_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7817b82..a16d69f 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,9 @@
 /* Function prototypes */
 static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void option_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void option_close(struct usb_serial_port *port);
+static void option_dtr_rts(struct usb_serial_port *port, int on);
+
 static int  option_startup(struct usb_serial *serial);
 static void option_shutdown(struct usb_serial *serial);
 static int  option_write_room(struct tty_struct *tty);
@@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty,
 static int  option_tiocmget(struct tty_struct *tty, struct file *file);
 static int  option_tiocmset(struct tty_struct *tty, struct file *file,
 				unsigned int set, unsigned int clear);
-static int  option_send_setup(struct tty_struct *tty, struct usb_serial_port *port);
+static int  option_send_setup(struct usb_serial_port *port);
 static int  option_suspend(struct usb_serial *serial, pm_message_t message);
 static int  option_resume(struct usb_serial *serial);
 
@@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = {
 	.num_ports         = 1,
 	.open              = option_open,
 	.close             = option_close,
+	.dtr_rts	   = option_dtr_rts,
 	.write             = option_write,
 	.write_room        = option_write_room,
 	.chars_in_buffer   = option_chars_in_buffer,
@@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty,
 	dbg("%s", __func__);
 	/* Doesn't support option setting */
 	tty_termios_copy_hw(tty->termios, old_termios);
-	option_send_setup(tty, port);
+	option_send_setup(port);
 }
 
 static int option_tiocmget(struct tty_struct *tty, struct file *file)
@@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file,
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return option_send_setup(tty, port);
+	return option_send_setup(port);
 }
 
 /* Write */
@@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty,
 
 	dbg("%s", __func__);
 
-	/* Set some sane defaults */
-	portdata->rts_state = 1;
-	portdata->dtr_state = 1;
-
 	/* Reset low level data toggle and start reading from endpoints */
 	for (i = 0; i < N_IN_URB; i++) {
 		urb = portdata->in_urbs[i];
@@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty,
 				usb_pipeout(urb->pipe), 0); */
 	}
 
-	option_send_setup(tty, port);
+	option_send_setup(port);
 
 	return 0;
 }
 
-static void option_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void option_dtr_rts(struct usb_serial_port *port, int on)
 {
-	int i;
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
 
 	dbg("%s", __func__);
 	portdata = usb_get_serial_port_data(port);
+	mutex_lock(&serial->disc_mutex);
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
+	if (serial->dev)
+		option_send_setup(port);
+	mutex_unlock(&serial->disc_mutex);
+}
 
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
 
-	if (serial->dev) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			option_send_setup(tty, port);
-		mutex_unlock(&serial->disc_mutex);
+static void option_close(struct usb_serial_port *port)
+{
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct option_port_private *portdata;
+
+	dbg("%s", __func__);
+	portdata = usb_get_serial_port_data(port);
 
+	if (serial->dev) {
 		/* Stop reading/writing urbs */
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 		for (i = 0; i < N_OUT_URB; i++)
 			usb_kill_urb(portdata->out_urbs[i]);
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* Helper functions used by option_setup_urbs */
@@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial)
  * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
  * CDC.
 */
-static int option_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int option_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
 	int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
+	int val = 0;
 	dbg("%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
 
-		return usb_control_msg(serial->dev,
-			usb_rcvctrlpipe(serial->dev, 0),
-			0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	}
-	return 0;
+	return usb_control_msg(serial->dev,
+		usb_rcvctrlpipe(serial->dev, 0),
+		0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
 }
 
 static int option_startup(struct usb_serial *serial)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index ba551f0..7de5478 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -143,8 +143,7 @@ struct oti6858_control_pkt {
 /* function prototypes */
 static int oti6858_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
@@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty,
 	if (result != 0) {
 		dev_err(&port->dev, "%s(): usb_submit_urb() failed"
 			       " with error %d\n", __func__, result);
-		oti6858_close(tty, port, NULL);
+		oti6858_close(port);
 		return -EPROTO;
 	}
 
 	/* setup termios */
 	if (tty)
 		oti6858_set_termios(tty, port, &tmp_termios);
-
+	port->port.drain_delay = 256;	/* FIXME: check the FIFO length */
 	return 0;
 }
 
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void oti6858_close(struct usb_serial_port *port)
 {
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s(port = %d)", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = 30 * HZ;	/* PL2303_CLOSING_WAIT */
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): entering wait loop", __func__);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (oti6858_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): after wait loop", __func__);
-
 	/* clear out any remaining data in the buffer */
 	oti6858_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	/* FIXME
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps,HZ/10);
-	else
-	*/
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-	dbg("%s(): after schedule_timeout_interruptible()", __func__);
+	dbg("%s(): after buf_clear()", __func__);
 
 	/* cancel scheduled setup */
 	cancel_delayed_work(&priv->delayed_setup_work);
@@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty,
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	/*
-	if (tty && (tty->termios->c_cflag) & HUPCL) {
-		// drop DTR and RTS
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->pending_setup.control &= ~CONTROL_MASK;
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
-	*/
 }
 
 static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 751a533..e02dc3d 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty,
 	kfree(buf);
 }
 
-static void pl2303_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void pl2303_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	/* Change DTR and RTS */
+	if (on)
+		priv->line_control |= (CONTROL_DTR | CONTROL_RTS);
+	else
+		priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	set_control_lines(port->serial->dev, control);
+}
+
+static void pl2303_close(struct usb_serial_port *port)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = PL2303_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (pl2303_buf_data_avail(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current) ||
-		    port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
 	/* clear out any remaining data in the buffer */
 	pl2303_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			set_control_lines(port->serial->dev, 0);
-		}
-	}
 }
 
 static int pl2303_open(struct tty_struct *tty,
@@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty,
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
 
@@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty,
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file)
 	return result;
 }
 
+static int pl2303_carrier_raised(struct usb_serial_port *port)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & UART_DCD)
+		return 1;
+	return 0;
+}
+
 static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
@@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = {
 	.num_ports =		1,
 	.open =			pl2303_open,
 	.close =		pl2303_close,
+	.dtr_rts = 		pl2303_dtr_rts,
+	.carrier_raised =	pl2303_carrier_raised,
 	.write =		pl2303_write,
 	.ioctl =		pl2303_ioctl,
 	.break_ctl =		pl2303_break_ctl,
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 913225c..1319b89 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -240,57 +240,39 @@ struct sierra_port_private {
 	int ri_state;
 };
 
-static int sierra_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int sierra_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 	__u16 interface = 0;
+	int val = 0;
 
 	dev_dbg(&port->dev, "%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
-
-		/* If composite device then properly report interface */
-		if (serial->num_ports == 1) {
-			interface = sierra_calc_interface(serial);
-
-			/* Control message is sent only to interfaces with
-			 * interrupt_in endpoints
-			 */
-			if (port->interrupt_in_urb) {
-				/* send control message */
-				return usb_control_msg(serial->dev,
-					usb_rcvctrlpipe(serial->dev, 0),
-					0x22, 0x21, val, interface,
-					NULL, 0, USB_CTRL_SET_TIMEOUT);
-			}
-		}
-
-		/* Otherwise the need to do non-composite mapping */
-		else {
-			if (port->bulk_out_endpointAddress == 2)
-				interface = 0;
-			else if (port->bulk_out_endpointAddress == 4)
-				interface = 1;
-			else if (port->bulk_out_endpointAddress == 5)
-				interface = 2;
-
-			return usb_control_msg(serial->dev,
-				usb_rcvctrlpipe(serial->dev, 0),
-				0x22, 0x21, val, interface,
-				NULL, 0, USB_CTRL_SET_TIMEOUT);
-
-		}
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
+
+	/* If composite device then properly report interface */
+	if (serial->num_ports == 1)
+		interface = sierra_calc_interface(serial);
+
+	/* Otherwise the need to do non-composite mapping */
+	else {
+		if (port->bulk_out_endpointAddress == 2)
+			interface = 0;
+		else if (port->bulk_out_endpointAddress == 4)
+			interface = 1;
+		else if (port->bulk_out_endpointAddress == 5)
+			interface = 2;
 	}
-
+	return usb_control_msg(serial->dev,
+			usb_rcvctrlpipe(serial->dev, 0),
+			0x22, 0x21, val, interface,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
 	return 0;
 }
 
@@ -299,7 +281,7 @@ static void sierra_set_termios(struct tty_struct *tty,
 {
 	dev_dbg(&port->dev, "%s", __func__);
 	tty_termios_copy_hw(tty->termios, old_termios);
-	sierra_send_setup(tty, port);
+	sierra_send_setup(port);
 }
 
 static int sierra_tiocmget(struct tty_struct *tty, struct file *file)
@@ -338,7 +320,7 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file,
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return sierra_send_setup(tty, port);
+	return sierra_send_setup(port);
 }
 
 static void sierra_outdat_callback(struct urb *urb)
@@ -598,7 +580,7 @@ static int sierra_open(struct tty_struct *tty,
 		}
 	}
 
-	sierra_send_setup(tty, port);
+	sierra_send_setup(port);
 
 	/* start up the interrupt endpoint if we have one */
 	if (port->interrupt_in_urb) {
@@ -610,32 +592,38 @@ static int sierra_open(struct tty_struct *tty,
 	return 0;
 }
 
-static void sierra_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void sierra_dtr_rts(struct usb_serial_port *port, int on)
 {
-	int i;
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 
-	dev_dbg(&port->dev, "%s", __func__);
 	portdata = usb_get_serial_port_data(port);
-
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
 
 	if (serial->dev) {
 		mutex_lock(&serial->disc_mutex);
 		if (!serial->disconnected)
-			sierra_send_setup(tty, port);
+			sierra_send_setup(port);
 		mutex_unlock(&serial->disc_mutex);
+	}
+}
+
+static void sierra_close(struct usb_serial_port *port)
+{
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct sierra_port_private *portdata;
 
+	dev_dbg(&port->dev, "%s", __func__);
+	portdata = usb_get_serial_port_data(port);
+
+	if (serial->dev) {
 		/* Stop reading/writing urbs */
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 	}
-
 	usb_kill_urb(port->interrupt_in_urb);
-	tty_port_tty_set(&port->port, NULL);
 }
 
 static int sierra_startup(struct usb_serial *serial)
@@ -737,6 +725,7 @@ static struct usb_serial_driver sierra_device = {
 	.probe		   = sierra_probe,
 	.open              = sierra_open,
 	.close             = sierra_close,
+	.dtr_rts	   = sierra_dtr_rts,
 	.write             = sierra_write,
 	.write_room        = sierra_write_room,
 	.set_termios       = sierra_set_termios,
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 5e7528c..8f7ed8f 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value,
 			"RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret);
 }
 
+static int spcp8x5_carrier_raised(struct usb_serial_port *port)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & MSR_STATUS_LINE_DCD)
+		return 1;
+	return 0;
+}
+
+static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control = MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS;
+	else
+		priv->line_control &= ~ (MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type);
+}
+
 /* close the serial port. We should wait for data sending to device 1st and
  * then kill all urb. */
-static void spcp8x5_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void spcp8x5_close(struct usb_serial_port *port)
 {
 	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
 	int result;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = SPCP8x5_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (ringbuf_avail_data(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current))
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-
 	/* clear out any remaining data in the buffer */
 	clear_ringbuf(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device (this is long enough
-	 * for the entire all byte spcp8x5 hardware buffer to drain with no
-	 * flow control for data rates of 1200 bps or more, for lower rates we
-	 * should really know how much data is in the buffer to compute a delay
-	 * that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560) / bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	set_current_state(TASK_INTERRUPTIBLE);
-	schedule_timeout(timeout);
-
-	/* clear control lines */
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type);
-		}
-	}
-
 	/* kill urb */
 	if (port->write_urb != NULL) {
 		result = usb_unlink_urb(port->write_urb);
@@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty,
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		priv->line_control = MCR_DTR | MCR_RTS;
-	else
-		priv->line_control = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type);
 
 	/* Setup termios */
@@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty,
 	port->read_urb->dev = serial->dev;
 	ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
 	if (ret) {
-		spcp8x5_close(tty, port, NULL);
+		spcp8x5_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = {
 	.num_ports		= 1,
 	.open 			= spcp8x5_open,
 	.close 			= spcp8x5_close,
+	.dtr_rts		= spcp8x5_dtr_rts,
+	.carrier_raised		= spcp8x5_carrier_raised,
 	.write 			= spcp8x5_write,
 	.set_termios 		= spcp8x5_set_termios,
 	.ioctl 			= spcp8x5_ioctl,
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 69879e4..8b07ebc 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port,
 	return result;
 }
 
-static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void symbol_close(struct usb_serial_port *port)
 {
 	struct symbol_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 0a64bac..42cb04c 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial);
 static void ti_shutdown(struct usb_serial *serial);
 static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
 		struct file *file);
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-		struct file *file);
+static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
 		const unsigned char *data, int count);
 static int ti_write_room(struct tty_struct *tty);
@@ -647,8 +646,7 @@ release_lock:
 }
 
 
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *file)
+static void ti_close(struct usb_serial_port *port)
 {
 	struct ti_device *tdev;
 	struct ti_port *tport;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f331e2b..1967a7e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp)
 			goto bailout_interface_put;
 		mutex_unlock(&serial->disc_mutex);
 	}
-
 	mutex_unlock(&port->mutex);
-	return 0;
+	/* Now do the correct tty layer semantics */
+	retval = tty_port_block_til_ready(&port->port, tty, filp);
+	if (retval == 0)
+		return 0;
 
 bailout_interface_put:
 	usb_autopm_put_interface(serial->interface);
@@ -259,64 +261,89 @@ bailout_serial_put:
 	return retval;
 }
 
-static void serial_close(struct tty_struct *tty, struct file *filp)
+/**
+ *	serial_do_down		-	shut down hardware
+ *	@port: port to shut down
+ *
+ *	Shut down a USB port unless it is the console. We never shut down the
+ *	console hardware as it will always be in use.
+ *
+ *	Don't free any resources at this point
+ */
+static void serial_do_down(struct usb_serial_port *port)
 {
-	struct usb_serial_port *port = tty->driver_data;
+	struct usb_serial_driver *drv = port->serial->type;
 	struct usb_serial *serial;
 	struct module *owner;
-	int count;
 
-	if (!port)
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
 		return;
 
-	dbg("%s - port %d", __func__, port->number);
-
 	mutex_lock(&port->mutex);
 	serial = port->serial;
 	owner = serial->type->driver.owner;
 
-	if (port->port.count == 0) {
-		mutex_unlock(&port->mutex);
-		return;
-	}
-
-	if (port->port.count == 1)
-		/* only call the device specific close if this
-		 * port is being closed by the last owner. Ensure we do
-		 * this before we drop the port count. The call is protected
-		 * by the port mutex
-		 */
-		serial->type->close(tty, port, filp);
-
-	if (port->port.count == (port->console ? 2 : 1)) {
-		struct tty_struct *tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			/* We must do this before we drop the port count to
-			   zero. */
-			if (tty->driver_data)
-				tty->driver_data = NULL;
-			tty_port_tty_set(&port->port, NULL);
-			tty_kref_put(tty);
-		}
-	}
+	if (drv->close)
+		drv->close(port);
 
-	--port->port.count;
-	count = port->port.count;
 	mutex_unlock(&port->mutex);
-	put_device(&port->dev);
+}
+
+/**
+ *	serial_do_free		-	free resources post close/hangup
+ *	@port: port to free up
+ *
+ *	Do the resource freeing and refcount dropping for the port. We must
+ *	be careful about ordering and we must avoid freeing up the console.
+ */
 
+static void serial_do_free(struct usb_serial_port *port)
+{
+	struct usb_serial *serial;
+	struct module *owner;
+
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
+		return;
+
+	serial = port->serial;
+	owner = serial->type->driver.owner;
+	put_device(&port->dev);
 	/* Mustn't dereference port any more */
-	if (count == 0) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			usb_autopm_put_interface(serial->interface);
-		mutex_unlock(&serial->disc_mutex);
-	}
+	mutex_lock(&serial->disc_mutex);
+	if (!serial->disconnected)
+		usb_autopm_put_interface(serial->interface);
+	mutex_unlock(&serial->disc_mutex);
 	usb_serial_put(serial);
-
 	/* Mustn't dereference serial any more */
-	if (count == 0)
-		module_put(owner);
+	module_put(owner);
+}
+
+static void serial_close(struct tty_struct *tty, struct file *filp)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	dbg("%s - port %d", __func__, port->number);
+
+
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+
+	serial_do_down(port);		
+	tty_port_close_end(&port->port, tty);
+	tty_port_tty_set(&port->port, NULL);
+	serial_do_free(port);
+}
+
+static void serial_hangup(struct tty_struct *tty)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	serial_do_down(port);
+	tty_port_hangup(&port->port);
+	serial_do_free(port);
 }
 
 static int serial_write(struct tty_struct *tty, const unsigned char *buf,
@@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device(
 	return NULL;
 }
 
+static int serial_carrier_raised(struct tty_port *port)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->carrier_raised)
+		return drv->carrier_raised(p);
+	/* No carrier control - don't block */
+	return 1;	
+}
+
+static void serial_dtr_rts(struct tty_port *port, int on)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->dtr_rts)
+		drv->dtr_rts(p, on);
+}
+
+static const struct tty_port_operations serial_port_ops = {
+	.carrier_raised = serial_carrier_raised,
+	.dtr_rts = serial_dtr_rts,
+};
+
 int usb_serial_probe(struct usb_interface *interface,
 			       const struct usb_device_id *id)
 {
@@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface,
 		if (!port)
 			goto probe_error;
 		tty_port_init(&port->port);
+		port->port.ops = &serial_port_ops;
 		port->serial = serial;
 		spin_lock_init(&port->lock);
 		mutex_init(&port->mutex);
@@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface)
 		if (port) {
 			struct tty_struct *tty = tty_port_tty_get(&port->port);
 			if (tty) {
+				/* The hangup will occur asynchronously but
+				   the object refcounts will sort out all the
+				   cleanup */
 				tty_hangup(tty);
 				tty_kref_put(tty);
 			}
@@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = {
 	.open =			serial_open,
 	.close =		serial_close,
 	.write =		serial_write,
+	.hangup = 		serial_hangup,
 	.write_room =		serial_write_room,
 	.ioctl =		serial_ioctl,
 	.set_termios =		serial_set_termios,
@@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = {
 	.proc_fops =		&serial_proc_fops,
 };
 
+
 struct tty_driver *usb_serial_tty_driver;
 
 static int __init usb_serial_init(void)
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 5ac414b..b15f1c0 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -38,8 +38,7 @@
 /* function prototypes for a handspring visor */
 static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void visor_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int  visor_write_room(struct tty_struct *tty);
@@ -324,8 +323,7 @@ exit:
 }
 
 
-static void visor_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void visor_close(struct usb_serial_port *port)
 {
 	struct visor_private *priv = usb_get_serial_port_data(port);
 	unsigned char *transfer_buffer;
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 5335d32..7c7295d 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -147,8 +147,7 @@ static int  whiteheat_attach(struct usb_serial *serial);
 static void whiteheat_shutdown(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
 			struct usb_serial_port *port,
 			const unsigned char *buf, int count);
@@ -712,8 +711,7 @@ exit:
 }
 
 
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void whiteheat_close(struct usb_serial_port *port)
 {
 	struct whiteheat_private *info = usb_get_serial_port_data(port);
 	struct whiteheat_urb_wrap *wrap;
@@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	mutex_lock(&port->serial->disc_mutex);
-	/* filp is NULL when called from usb_serial_disconnect */
-	if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) {
-		mutex_unlock(&port->serial->disc_mutex);
-		return;
-	}
-	mutex_unlock(&port->serial->disc_mutex);
-
-	tty->closing = 1;
-
-/*
- * Not currently in use; tty_wait_until_sent() calls
- * serial_chars_in_buffer() which deadlocks on the second semaphore
- * acquisition. This should be fixed at some point. Greg's been
- * notified.
-	if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) {
-		tty_wait_until_sent(tty, CLOSING_DELAY);
-	}
-*/
-
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	firm_report_tx_done(port);
-
 	firm_close(port);
 
 	/* shutdown our bulk reads and writes */
@@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty,
 	}
 	spin_unlock_irq(&info->lock);
 	mutex_unlock(&info->deathwarrant);
-
 	stop_command_port(port->serial);
-
-	tty->closing = 0;
 }
 
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 625e9e4..8cdfed7 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -224,8 +224,7 @@ struct usb_serial_driver {
 	/* Called by console with tty = NULL and by tty */
 	int  (*open)(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-	void (*close)(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	void (*close)(struct usb_serial_port *port);
 	int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 	/* Called only by the tty layer */
@@ -241,6 +240,10 @@ struct usb_serial_driver {
 	int  (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int  (*tiocmset)(struct tty_struct *tty, struct file *file,
 			 unsigned int set, unsigned int clear);
+	/* Called by the tty layer for port level work. There may or may not
+	   be an attached tty at this point */
+	void (*dtr_rts)(struct usb_serial_port *port, int on);
+	int  (*carrier_raised)(struct usb_serial_port *port);
 	/* USB events */
 	void (*read_int_callback)(struct urb *urb);
 	void (*write_int_callback)(struct urb *urb);
@@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty,
 		struct usb_serial_port *port, struct file *filp);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
-extern void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+extern void usb_serial_generic_close(struct usb_serial_port *port);
 extern int usb_serial_generic_resume(struct usb_serial *serial);
 extern int usb_serial_generic_write_room(struct tty_struct *tty);
 extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
-- 
cgit v0.10.2


From 739e0285cbb162c8ddd0061fda581ee54a34c19a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:27:50 +0100
Subject: tty: Update cdc_acm

The CDC ACM driver uses the tty layer correctly so needs conversion. Start by
adding and initializing the port structures.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7a1164d..41d4ca5 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -89,6 +89,9 @@ static DEFINE_MUTEX(open_mutex);
 
 #define ACM_READY(acm)	(acm && acm->dev && acm->used)
 
+static const struct tty_port_operations acm_port_ops = {
+};
+
 #ifdef VERBOSE_DEBUG
 #define verbose	1
 #else
@@ -1082,6 +1085,8 @@ skip_normal_probe:
 	spin_lock_init(&acm->read_lock);
 	mutex_init(&acm->mutex);
 	acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
+	tty_port_init(&acm->port);
+	acm->port.ops = &acm_port_ops;
 
 	buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
 	if (!buf) {
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 1f95e7a..19967cd 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -90,7 +90,8 @@ struct acm {
 	struct usb_interface *control;			/* control interface */
 	struct usb_interface *data;			/* data interface */
 	struct tty_struct *tty;				/* the corresponding tty */
-	struct urb *ctrlurb;			/* urbs */
+	struct tty_port port;			 	/* our tty port data */
+	struct urb *ctrlurb;				/* urbs */
 	u8 *ctrl_buffer;				/* buffers of urbs */
 	dma_addr_t ctrl_dma;				/* dma handles of buffers */
 	u8 *country_codes;				/* country codes from device */
-- 
cgit v0.10.2


From 5ba5a5d21204f61ff655b322a90ed91c75194e4b Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Thu, 11 Jun 2009 12:28:37 +0100
Subject: tty: synclink_gt add receive pio mode

Add receive programmed IO mode to reduce receive latency
when using low data rates. The receive FIFO trigger
level of 128 bytes used in DMA mode creates excessive latency
when operating at low data rates. PIO mode is selected when user
application requests data in blocks of less than 128 bytes.

Signed-off-by: Paul Fulghum <paulkf@microgate.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 67986ea..1386625 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -214,6 +214,7 @@ struct slgt_desc
 #define set_desc_next(a,b) (a).next   = cpu_to_le32((unsigned int)(b))
 #define set_desc_count(a,b)(a).count  = cpu_to_le16((unsigned short)(b))
 #define set_desc_eof(a,b)  (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
+#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b))
 #define desc_count(a)      (le16_to_cpu((a).count))
 #define desc_status(a)     (le16_to_cpu((a).status))
 #define desc_complete(a)   (le16_to_cpu((a).status) & BIT15)
@@ -297,6 +298,7 @@ struct slgt_info {
 	u32 max_frame_size;       /* as set by device config */
 
 	unsigned int rbuf_fill_level;
+	unsigned int rx_pio;
 	unsigned int if_mode;
 	unsigned int base_clock;
 
@@ -331,6 +333,8 @@ struct slgt_info {
 	struct slgt_desc *rbufs;
 	unsigned int rbuf_current;
 	unsigned int rbuf_index;
+	unsigned int rbuf_fill_index;
+	unsigned short rbuf_fill_count;
 
 	unsigned int tbuf_count;
 	struct slgt_desc *tbufs;
@@ -2110,6 +2114,40 @@ static void ri_change(struct slgt_info *info, unsigned short status)
 	info->pending_bh |= BH_STATUS;
 }
 
+static void isr_rxdata(struct slgt_info *info)
+{
+	unsigned int count = info->rbuf_fill_count;
+	unsigned int i = info->rbuf_fill_index;
+	unsigned short reg;
+
+	while (rd_reg16(info, SSR) & IRQ_RXDATA) {
+		reg = rd_reg16(info, RDR);
+		DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg));
+		if (desc_complete(info->rbufs[i])) {
+			/* all buffers full */
+			rx_stop(info);
+			info->rx_restart = 1;
+			continue;
+		}
+		info->rbufs[i].buf[count++] = (unsigned char)reg;
+		/* async mode saves status byte to buffer for each data byte */
+		if (info->params.mode == MGSL_MODE_ASYNC)
+			info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8);
+		if (count == info->rbuf_fill_level || (reg & BIT10)) {
+			/* buffer full or end of frame */
+			set_desc_count(info->rbufs[i], count);
+			set_desc_status(info->rbufs[i], BIT15 | (reg >> 8));
+			info->rbuf_fill_count = count = 0;
+			if (++i == info->rbuf_count)
+				i = 0;
+			info->pending_bh |= BH_RECEIVE;
+		}
+	}
+
+	info->rbuf_fill_index = i;
+	info->rbuf_fill_count = count;
+}
+
 static void isr_serial(struct slgt_info *info)
 {
 	unsigned short status = rd_reg16(info, SSR);
@@ -2125,6 +2163,8 @@ static void isr_serial(struct slgt_info *info)
 			if (info->tx_count)
 				isr_txeom(info, status);
 		}
+		if (info->rx_pio && (status & IRQ_RXDATA))
+			isr_rxdata(info);
 		if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
 			info->icount.brk++;
 			/* process break detection if tty control allows */
@@ -2141,7 +2181,8 @@ static void isr_serial(struct slgt_info *info)
 	} else {
 		if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
 			isr_txeom(info, status);
-
+		if (info->rx_pio && (status & IRQ_RXDATA))
+			isr_rxdata(info);
 		if (status & IRQ_RXIDLE) {
 			if (status & RXIDLE)
 				info->icount.rxidle++;
@@ -2642,6 +2683,10 @@ static int rx_enable(struct slgt_info *info, int enable)
 			return -EINVAL;
 		}
 		info->rbuf_fill_level = rbuf_fill_level;
+		if (rbuf_fill_level < 128)
+			info->rx_pio = 1; /* PIO mode */
+		else
+			info->rx_pio = 0; /* DMA mode */
 		rx_stop(info); /* restart receiver to use new fill level */
 	}
 
@@ -3844,15 +3889,27 @@ static void rx_start(struct slgt_info *info)
 	rdma_reset(info);
 	reset_rbufs(info);
 
-	/* set 1st descriptor address */
-	wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
-
-	if (info->params.mode != MGSL_MODE_ASYNC) {
-		/* enable rx DMA and DMA interrupt */
-		wr_reg32(info, RDCSR, (BIT2 + BIT0));
+	if (info->rx_pio) {
+		/* rx request when rx FIFO not empty */
+		wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14));
+		slgt_irq_on(info, IRQ_RXDATA);
+		if (info->params.mode == MGSL_MODE_ASYNC) {
+			/* enable saving of rx status */
+			wr_reg32(info, RDCSR, BIT6);
+		}
 	} else {
-		/* enable saving of rx status, rx DMA and DMA interrupt */
-		wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+		/* rx request when rx FIFO half full */
+		wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14));
+		/* set 1st descriptor address */
+		wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
+
+		if (info->params.mode != MGSL_MODE_ASYNC) {
+			/* enable rx DMA and DMA interrupt */
+			wr_reg32(info, RDCSR, (BIT2 + BIT0));
+		} else {
+			/* enable saving of rx status, rx DMA and DMA interrupt */
+			wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+		}
 	}
 
 	slgt_irq_on(info, IRQ_RXOVER);
@@ -4470,6 +4527,8 @@ static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last
 static void reset_rbufs(struct slgt_info *info)
 {
 	free_rbufs(info, 0, info->rbuf_count - 1);
+	info->rbuf_fill_index = 0;
+	info->rbuf_fill_count = 0;
 }
 
 /*
-- 
cgit v0.10.2


From 97e87f8ebe978e881c7325ba490574bd5500b133 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:29:27 +0100
Subject: tty: cyclades, plx9060 casts cleanup

Remove ugly all-over-the-code casts of ctl_addr to 9060 space.
Add an union to the cyclades_card structure, which contains
a pointer to both 9050 and 9060 spaces.

The 9050 space layout is unknown, so let it still as a void
__iomem pointer.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index ccf68a9..2cbf741 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -666,12 +666,10 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
 
 #define Z_FPGA_CHECK(card) \
-	((readl(&((struct RUNTIME_9060 __iomem *) \
-		((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
+	((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0)
 
-#define ISZLOADED(card)	(((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
-			((card).ctl_addr))->mail_box_0)) || \
-			Z_FPGA_CHECK(card)) && \
+#define ISZLOADED(card)	(((ZO_V1 == readl(&(card).ctl_addr.p9060->mail_box_0)) \
+			|| Z_FPGA_CHECK(card)) && \
 			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
 			((card).base_addr+ID_ADDRESS))->signature)))
 
@@ -1400,14 +1398,12 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
-	loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *)
-				  (cinfo->ctl_addr))->loc_doorbell);
+	loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
 	if (loc_doorbell) {
 		*cmd = (char)(0xff & loc_doorbell);
 		*channel = readl(&board_ctrl->fwcmd_channel);
 		*param = (__u32) readl(&board_ctrl->fwcmd_param);
-		cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			  loc_doorbell, 0xffffffff);
+		cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
 		return 1;
 	}
 	return 0;
@@ -1431,8 +1427,7 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
 	index = 0;
-	pci_doorbell =
-	    &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell;
+	pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
 	while ((readl(pci_doorbell) & 0xff) != 0) {
 		if (index++ == 1000)
 			return (int)(readl(pci_doorbell) & 0xff);
@@ -1635,8 +1630,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
-	hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			mail_box_0);
+	hw_ver = readl(&cinfo->ctl_addr.p9060->mail_box_0);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
 		special_count = 0;
@@ -2394,8 +2388,8 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
 		if (!ISZLOADED(*cinfo)) {
-			if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *)
-					 (cinfo->ctl_addr))->mail_box_0)) &&
+			if (((ZE_V1 == readl(&cinfo->ctl_addr.p9060->
+							mail_box_0)) &&
 					Z_FPGA_CHECK(*cinfo)) &&
 					(ZFIRM_HLT == readl(
 						&firm_id->signature))) {
@@ -2417,6 +2411,7 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 			if (!cinfo->intr_enabled) {
 				struct ZFW_CTRL __iomem *zfw_ctrl;
 				struct BOARD_CTRL __iomem *board_ctrl;
+				u16 intr;
 
 				zfw_ctrl = cinfo->base_addr +
 					(readl(&firm_id->zfwctrl_addr) &
@@ -2425,8 +2420,10 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 				board_ctrl = &zfw_ctrl->board_ctrl;
 
 				/* Enable interrupts on the PLX chip */
-				cy_writew(cinfo->ctl_addr + 0x68,
-					readw(cinfo->ctl_addr + 0x68) | 0x0900);
+				intr = readw(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat) | 0x0900;
+				cy_writew(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat, intr);
 				/* Enable interrupts on the FW */
 				retval = cyz_issue_cmd(cinfo, 0,
 						C_CM_IRQ_ENBL, 0L);
@@ -4347,8 +4344,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 	spin_lock_init(&cinfo->card_lock);
 
 	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
-		mailbox = readl(&((struct RUNTIME_9060 __iomem *)
-				     cinfo->ctl_addr)->mail_box_0);
+		mailbox = readl(&cinfo->ctl_addr.p9060->mail_box_0);
 		nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
 		cinfo->intr_enabled = 0;
 		cinfo->nports = 0;	/* Will be correctly set later, after
@@ -4613,7 +4609,7 @@ static int __init cy_detect_isa(void)
 
 		/* set cy_card */
 		cy_card[j].base_addr = cy_isa_address;
-		cy_card[j].ctl_addr = NULL;
+		cy_card[j].ctl_addr.p9050 = NULL;
 		cy_card[j].irq = (int)cy_isa_irq;
 		cy_card[j].bus_index = 0;
 		cy_card[j].first_line = cy_next_channel;
@@ -5013,7 +5009,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 
 		/* Disable interrupts on the PLX before resetting it */
-		cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900);
+		cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) & ~0x0900);
 
 		plx_init(pdev, irq, addr0);
 
@@ -5109,7 +5106,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 
 	/* set cy_card */
 	cy_card[card_no].base_addr = addr2;
-	cy_card[card_no].ctl_addr = addr0;
+	cy_card[card_no].ctl_addr.p9050 = addr0;
 	cy_card[card_no].irq = irq;
 	cy_card[card_no].bus_index = 1;
 	cy_card[card_no].first_line = cy_next_channel;
@@ -5125,17 +5122,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		plx_ver = readb(addr2 + CyPLX_VER) & 0x0f;
 		switch (plx_ver) {
 		case PLX_9050:
-
 			cy_writeb(addr0 + 0x4c, 0x43);
 			break;
 
 		case PLX_9060:
 		case PLX_9080:
 		default:	/* Old boards, use PLX_9060 */
-			plx_init(pdev, irq, addr0);
-			cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900);
+		{
+			struct RUNTIME_9060 __iomem *ctl_addr = addr0;
+			plx_init(pdev, irq, ctl_addr);
+			cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) | 0x0900);
 			break;
 		}
+		}
 	}
 
 	dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from "
@@ -5168,17 +5168,18 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev)
 	/* non-Z with old PLX */
 	if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
 			PLX_9050)
-		cy_writeb(cinfo->ctl_addr + 0x4c, 0);
+		cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
 	else
 #ifndef CONFIG_CYZ_INTR
 		if (!IS_CYC_Z(*cinfo))
 #endif
-		cy_writew(cinfo->ctl_addr + 0x68,
-				readw(cinfo->ctl_addr + 0x68) & ~0x0900);
+		cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
+			readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
+			~0x0900);
 
 	iounmap(cinfo->base_addr);
-	if (cinfo->ctl_addr)
-		iounmap(cinfo->ctl_addr);
+	if (cinfo->ctl_addr.p9050)
+		iounmap(cinfo->ctl_addr.p9050);
 	if (cinfo->irq
 #ifndef CONFIG_CYZ_INTR
 		&& !IS_CYC_Z(*cinfo)
@@ -5373,8 +5374,8 @@ static void __exit cy_cleanup_module(void)
 			/* clear interrupt */
 			cy_writeb(card->base_addr + Cy_ClrIntr, 0);
 			iounmap(card->base_addr);
-			if (card->ctl_addr)
-				iounmap(card->ctl_addr);
+			if (card->ctl_addr.p9050)
+				iounmap(card->ctl_addr.p9050);
 			if (card->irq
 #ifndef CONFIG_CYZ_INTR
 				&& !IS_CYC_Z(*card)
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 788850b..9ae03d5 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -507,16 +507,19 @@ struct ZFW_CTRL {
 
 /* Per card data structure */
 struct cyclades_card {
-    void __iomem *base_addr;
-    void __iomem *ctl_addr;
-    int irq;
-    unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
-    unsigned int first_line;	/* minor number of first channel on card */
-    unsigned int nports;	/* Number of ports in the card */
-    int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
-    int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
-    spinlock_t card_lock;
-    struct cyclades_port *ports;
+	void __iomem *base_addr;
+	union {
+		void __iomem *p9050;
+		struct RUNTIME_9060 __iomem *p9060;
+	} ctl_addr;
+	int irq;
+	unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
+	unsigned int first_line;	/* minor number of first channel on card */
+	unsigned int nports;	/* Number of ports in the card */
+	int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
+	int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
+	spinlock_t card_lock;
+	struct cyclades_port *ports;
 };
 
 /***************************************
-- 
cgit v0.10.2


From 101b81590d8df0a74c33cf739886247c0a13f4af Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:30:10 +0100
Subject: tty: cyclades, cache HW version

Store HW version locally to not read it all the time in interrupts
and alike.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 2cbf741..cf191cc 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -668,8 +668,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define Z_FPGA_CHECK(card) \
 	((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0)
 
-#define ISZLOADED(card)	(((ZO_V1 == readl(&(card).ctl_addr.p9060->mail_box_0)) \
-			|| Z_FPGA_CHECK(card)) && \
+#define ISZLOADED(card)	((ZO_V1 == (card).hw_ver || Z_FPGA_CHECK(card)) && \
 			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
 			((card).base_addr+ID_ADDRESS))->signature)))
 
@@ -1393,8 +1392,6 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
 	unsigned long loc_doorbell;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
-		return -1;
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
@@ -1619,10 +1616,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 	static struct BOARD_CTRL __iomem *board_ctrl;
 	static struct CH_CTRL __iomem *ch_ctrl;
 	static struct BUF_CTRL __iomem *buf_ctrl;
-	__u32 channel;
+	__u32 channel, param, fw_ver;
 	__u8 cmd;
-	__u32 param;
-	__u32 hw_ver, fw_ver;
 	int special_count;
 	int delta_count;
 
@@ -1630,7 +1625,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
-	hw_ver = readl(&cinfo->ctl_addr.p9060->mail_box_0);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
 		special_count = 0;
@@ -2388,11 +2382,9 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
 		if (!ISZLOADED(*cinfo)) {
-			if (((ZE_V1 == readl(&cinfo->ctl_addr.p9060->
-							mail_box_0)) &&
-					Z_FPGA_CHECK(*cinfo)) &&
-					(ZFIRM_HLT == readl(
-						&firm_id->signature))) {
+			if (cinfo->hw_ver == ZE_V1 && Z_FPGA_CHECK(*cinfo) &&
+					readl(&firm_id->signature) ==
+					ZFIRM_HLT) {
 				printk(KERN_ERR "cyc:Cyclades-Z Error: you "
 					"need an external power supply for "
 					"this number of ports.\nFirmware "
@@ -4336,7 +4328,6 @@ static void cy_hangup(struct tty_struct *tty)
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	u32 uninitialized_var(mailbox);
 	unsigned int nports, port;
 	unsigned short chip_number;
 	int uninitialized_var(index);
@@ -4344,8 +4335,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 	spin_lock_init(&cinfo->card_lock);
 
 	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
-		mailbox = readl(&cinfo->ctl_addr.p9060->mail_box_0);
-		nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
+		nports = (cinfo->hw_ver == ZE_V1) ? ZE_V1_NPORTS : 8;
 		cinfo->intr_enabled = 0;
 		cinfo->nports = 0;	/* Will be correctly set later, after
 					   Z FW is loaded */
@@ -4377,7 +4367,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 
 		if (IS_CYC_Z(*cinfo)) {
 			info->type = PORT_STARTECH;
-			if (mailbox == ZO_V1)
+			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
 			else
 				info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE;
@@ -4932,7 +4922,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 {
 	void __iomem *addr0 = NULL, *addr2 = NULL;
 	char *card_name = NULL;
-	u32 mailbox;
+	u32 uninitialized_var(mailbox);
 	unsigned int device_id, nchan = 0, card_no, i;
 	unsigned char plx_ver;
 	int retval, irq;
@@ -5014,7 +5004,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 
 		plx_init(pdev, irq, addr0);
 
-		mailbox = (u32)readl(&ctl_addr->mail_box_0);
+		mailbox = readl(&ctl_addr->mail_box_0);
 
 		addr2 = ioremap_nocache(pci_resource_start(pdev, 2),
 				mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin);
@@ -5026,7 +5016,6 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		if (mailbox == ZE_V1) {
 			card_name = "Cyclades-Ze";
 
-			readl(&ctl_addr->mail_box_0);
 			nchan = ZE_V1_NPORTS;
 		} else {
 			card_name = "Cyclades-8Zo";
@@ -5089,6 +5078,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 		cy_card[card_no].num_chips = nchan / 4;
 	} else {
+		cy_card[card_no].hw_ver = mailbox;
+		cy_card[card_no].num_chips = (unsigned int)-1;
 #ifdef CONFIG_CYZ_INTR
 		/* allocate IRQ only if board has an IRQ */
 		if (irq != 0 && irq != 255) {
@@ -5101,7 +5092,6 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 			}
 		}
 #endif				/* CONFIG_CYZ_INTR */
-		cy_card[card_no].num_chips = (unsigned int)-1;
 	}
 
 	/* set cy_card */
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 9ae03d5..a14fe30 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -518,6 +518,7 @@ struct cyclades_card {
 	unsigned int nports;	/* Number of ports in the card */
 	int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
 	int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
+	u32 hw_ver;
 	spinlock_t card_lock;
 	struct cyclades_port *ports;
 };
-- 
cgit v0.10.2


From 2693f485c22d18474c077f12fd0f797ee8679b33 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:31:06 +0100
Subject: tty: cyclades, convert macros to inlines

Remove ugly macros and add inlines instead of them. This improves
readability and type checking a much.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index cf191cc..c9a503f 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -663,15 +663,6 @@
 static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
-#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
-
-#define Z_FPGA_CHECK(card) \
-	((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0)
-
-#define ISZLOADED(card)	((ZO_V1 == (card).hw_ver || Z_FPGA_CHECK(card)) && \
-			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
-			((card).base_addr+ID_ADDRESS))->signature)))
-
 #ifndef SERIAL_XMIT_SIZE
 #define	SERIAL_XMIT_SIZE	(min(PAGE_SIZE, 4096))
 #endif
@@ -684,8 +675,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define DRIVER_VERSION	0x02010203
 #define RAM_SIZE 0x80000
 
-#define Z_FPGA_LOADED(X)	((readl(&(X)->init_ctrl) & (1<<17)) != 0)
-
 enum zblock_type {
 	ZBLOCK_PRG = 0,
 	ZBLOCK_FPGA = 1
@@ -880,6 +869,29 @@ static void cyz_rx_restart(unsigned long);
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif				/* CONFIG_CYZ_INTR */
 
+static inline bool cy_is_Z(struct cyclades_card *card)
+{
+	return card->num_chips == (unsigned int)-1;
+}
+
+static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr)
+{
+	return readl(&ctl_addr->init_ctrl) & (1 << 17);
+}
+
+static inline bool cyz_fpga_loaded(struct cyclades_card *card)
+{
+	return __cyz_fpga_loaded(card->ctl_addr.p9060);
+}
+
+static inline bool cyz_is_loaded(struct cyclades_card *card)
+{
+	struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS;
+
+	return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) &&
+			readl(&fw_id->signature) == ZFIRM_ID;
+}
+
 static inline int serial_paranoia_check(struct cyclades_port *info,
 		char *name, const char *routine)
 {
@@ -1417,7 +1429,7 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
 	unsigned int index;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
+	if (!cyz_is_loaded(cinfo))
 		return -1;
 
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
@@ -1725,7 +1737,7 @@ static irqreturn_t cyz_interrupt(int irq, void *dev_id)
 {
 	struct cyclades_card *cinfo = dev_id;
 
-	if (unlikely(!ISZLOADED(*cinfo))) {
+	if (unlikely(!cyz_is_loaded(cinfo))) {
 #ifdef CY_DEBUG_INTERRUPTS
 		printk(KERN_DEBUG "cyz_interrupt: board not yet loaded "
 				"(IRQ%d).\n", irq);
@@ -1773,9 +1785,9 @@ static void cyz_poll(unsigned long arg)
 	for (card = 0; card < NR_CARDS; card++) {
 		cinfo = &cy_card[card];
 
-		if (!IS_CYC_Z(*cinfo))
+		if (!cy_is_Z(cinfo))
 			continue;
-		if (!ISZLOADED(*cinfo))
+		if (!cyz_is_loaded(cinfo))
 			continue;
 
 		firm_id = cinfo->base_addr + ID_ADDRESS;
@@ -1854,7 +1866,7 @@ static int startup(struct cyclades_port *info)
 
 	set_line_char(info);
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -1911,7 +1923,7 @@ static int startup(struct cyclades_port *info)
 		base_addr = card->base_addr;
 
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return -ENODEV;
 
 		zfw_ctrl = card->base_addr +
@@ -2006,7 +2018,7 @@ static void start_xmit(struct cyclades_port *info)
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2050,7 +2062,7 @@ static void shutdown(struct cyclades_port *info)
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2106,7 +2118,7 @@ static void shutdown(struct cyclades_port *info)
 #endif
 
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return;
 
 		zfw_ctrl = card->base_addr +
@@ -2213,7 +2225,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp,
 #endif
 	info->port.blocked_open++;
 
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = cinfo->bus_index;
@@ -2276,7 +2288,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		base_addr = cinfo->base_addr;
 		firm_id = base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*cinfo)) {
+		if (!cyz_is_loaded(cinfo)) {
 			__set_current_state(TASK_RUNNING);
 			remove_wait_queue(&info->port.open_wait, &wait);
 			return -EINVAL;
@@ -2377,12 +2389,12 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 	   treat it as absent from the system.  This
 	   will make the user pay attention.
 	 */
-	if (IS_CYC_Z(*info->card)) {
+	if (cy_is_Z(info->card)) {
 		struct cyclades_card *cinfo = info->card;
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
-		if (!ISZLOADED(*cinfo)) {
-			if (cinfo->hw_ver == ZE_V1 && Z_FPGA_CHECK(*cinfo) &&
+		if (!cyz_is_loaded(cinfo)) {
+			if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) &&
 					readl(&firm_id->signature) ==
 					ZFIRM_HLT) {
 				printk(KERN_ERR "cyc:Cyclades-Z Error: you "
@@ -2537,7 +2549,7 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
 #endif
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -2582,7 +2594,7 @@ static void cy_flush_buffer(struct tty_struct *tty)
 	info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
-	if (IS_CYC_Z(*card)) {	/* If it is a Z card, flush the on-board
+	if (cy_is_Z(card)) {	/* If it is a Z card, flush the on-board
 					   buffers as well */
 		spin_lock_irqsave(&card->card_lock, flags);
 		retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L);
@@ -2663,7 +2675,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 
 	spin_lock_irqsave(&card->card_lock, flags);
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		int channel = info->line - card->first_line;
 		int index = card->bus_index;
 		void __iomem *base_addr = card->base_addr +
@@ -2883,7 +2895,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty)
 	channel = (info->line) - (card->first_line);
 
 #ifdef Z_EXT_CHARS_IN_BUFFER
-	if (!IS_CYC_Z(cy_card[card])) {
+	if (!cy_is_Z(card)) {
 #endif				/* Z_EXT_CHARS_IN_BUFFER */
 #ifdef CY_DEBUG_IO
 		printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2996,7 +3008,7 @@ static void set_line_char(struct cyclades_port *info)
 	channel = info->line - card->first_line;
 	chip_number = channel / 4;
 
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 
 		index = card->bus_index;
 
@@ -3221,7 +3233,7 @@ static void set_line_char(struct cyclades_port *info)
 		int retval;
 
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (!ISZLOADED(*card))
+		if (!cyz_is_loaded(card))
 			return;
 
 		zfw_ctrl = card->base_addr +
@@ -3438,7 +3450,7 @@ static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value)
 
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3478,7 +3490,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3504,7 +3516,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 	} else {
 		base_addr = card->base_addr;
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (ISZLOADED(*card)) {
+		if (cyz_is_loaded(card)) {
 			zfw_ctrl = card->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
 			board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3547,7 +3559,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 
 	card = info->card;
 	channel = (info->line) - (card->first_line);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3622,7 +3634,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 		base_addr = card->base_addr;
 
 		firm_id = card->base_addr + ID_ADDRESS;
-		if (ISZLOADED(*card)) {
+		if (cyz_is_loaded(card)) {
 			zfw_ctrl = card->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
 			board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3694,7 +3706,7 @@ static int cy_break(struct tty_struct *tty, int break_state)
 	card = info->card;
 
 	spin_lock_irqsave(&card->card_lock, flags);
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		/* Let the transmit ISR take care of this (since it
 		   requires stuffing characters into the output stream).
 		 */
@@ -3763,7 +3775,7 @@ static int set_threshold(struct cyclades_port *info, unsigned long value)
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3791,7 +3803,7 @@ static int get_threshold(struct cyclades_port *info,
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3825,7 +3837,7 @@ static int set_timeout(struct cyclades_port *info, unsigned long value)
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3848,7 +3860,7 @@ static int get_timeout(struct cyclades_port *info,
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	if (!IS_CYC_Z(*card)) {
+	if (!cy_is_Z(card)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -4102,7 +4114,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch)
 	card = info->card;
 	channel = info->line - card->first_line;
 
-	if (IS_CYC_Z(*card)) {
+	if (cy_is_Z(card)) {
 		if (ch == STOP_CHAR(tty))
 			cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L);
 		else if (ch == START_CHAR(tty))
@@ -4135,7 +4147,7 @@ static void cy_throttle(struct tty_struct *tty)
 	card = info->card;
 
 	if (I_IXOFF(tty)) {
-		if (!IS_CYC_Z(*card))
+		if (!cy_is_Z(card))
 			cy_send_xchar(tty, STOP_CHAR(tty));
 		else
 			info->throttle = 1;
@@ -4143,7 +4155,7 @@ static void cy_throttle(struct tty_struct *tty)
 
 	if (tty->termios->c_cflag & CRTSCTS) {
 		channel = info->line - card->first_line;
-		if (!IS_CYC_Z(*card)) {
+		if (!cy_is_Z(card)) {
 			chip = channel >> 2;
 			channel &= 0x03;
 			index = card->bus_index;
@@ -4200,7 +4212,7 @@ static void cy_unthrottle(struct tty_struct *tty)
 	if (tty->termios->c_cflag & CRTSCTS) {
 		card = info->card;
 		channel = info->line - card->first_line;
-		if (!IS_CYC_Z(*card)) {
+		if (!cy_is_Z(card)) {
 			chip = channel >> 2;
 			channel &= 0x03;
 			index = card->bus_index;
@@ -4244,7 +4256,7 @@ static void cy_stop(struct tty_struct *tty)
 
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		index = cinfo->bus_index;
 		chip = channel >> 2;
 		channel &= 0x03;
@@ -4277,7 +4289,7 @@ static void cy_start(struct tty_struct *tty)
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
 	index = cinfo->bus_index;
-	if (!IS_CYC_Z(*cinfo)) {
+	if (!cy_is_Z(cinfo)) {
 		chip = channel >> 2;
 		channel &= 0x03;
 		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
@@ -4334,7 +4346,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 
 	spin_lock_init(&cinfo->card_lock);
 
-	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
+	if (cy_is_Z(cinfo)) {	/* Cyclades-Z */
 		nports = (cinfo->hw_ver == ZE_V1) ? ZE_V1_NPORTS : 8;
 		cinfo->intr_enabled = 0;
 		cinfo->nports = 0;	/* Will be correctly set later, after
@@ -4365,7 +4377,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		init_completion(&info->shutdown_wait);
 		init_waitqueue_head(&info->delta_msr_wait);
 
-		if (IS_CYC_Z(*cinfo)) {
+		if (cy_is_Z(cinfo)) {
 			info->type = PORT_STARTECH;
 			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
@@ -4408,7 +4420,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 	}
 
 #ifndef CONFIG_CYZ_INTR
-	if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) {
+	if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) {
 		mod_timer(&cyz_timerlist, jiffies + 1);
 #ifdef CY_PCI_DEBUG
 		printk(KERN_DEBUG "Cyclades-Z polling initialized\n");
@@ -4771,7 +4783,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 
 	/* Check whether the firmware is already loaded and running. If
 	   positive, skip this board */
-	if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
+	if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
 		u32 cntval = readl(base_addr + 0x190);
 
 		udelay(100);
@@ -4790,7 +4802,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 
 	mailbox = readl(&ctl_addr->mail_box_0);
 
-	if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) {
+	if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) {
 		/* stops CPU and set window to beginning of RAM */
 		cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
 		cy_writel(&cust->cpu_stop, 0);
@@ -4806,7 +4818,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 				base_addr);
 		if (retval)
 			goto err_rel;
-		if (!Z_FPGA_LOADED(ctl_addr)) {
+		if (!__cyz_fpga_loaded(ctl_addr)) {
 			dev_err(&pdev->dev, "fw upload successful, but fw is "
 					"not loaded\n");
 			goto err_rel;
@@ -4865,7 +4877,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 				"system before loading the new FW to the "
 				"Cyclades-Z.\n");
 
-			if (Z_FPGA_LOADED(ctl_addr))
+			if (__cyz_fpga_loaded(ctl_addr))
 				plx_init(pdev, irq, ctl_addr);
 
 			retval = -EIO;
@@ -4889,7 +4901,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 			"check the connection between the Z host card and the "
 			"serial expanders.\n");
 
-		if (Z_FPGA_LOADED(ctl_addr))
+		if (__cyz_fpga_loaded(ctl_addr))
 			plx_init(pdev, irq, ctl_addr);
 
 		dev_info(&pdev->dev, "Null number of ports detected. Board "
@@ -5156,12 +5168,12 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev)
 	unsigned int i;
 
 	/* non-Z with old PLX */
-	if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
+	if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
 			PLX_9050)
 		cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
 	else
 #ifndef CONFIG_CYZ_INTR
-		if (!IS_CYC_Z(*cinfo))
+		if (!cy_is_Z(cinfo))
 #endif
 		cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
 			readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
@@ -5172,7 +5184,7 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev)
 		iounmap(cinfo->ctl_addr.p9050);
 	if (cinfo->irq
 #ifndef CONFIG_CYZ_INTR
-		&& !IS_CYC_Z(*cinfo)
+		&& !cy_is_Z(cinfo)
 #endif /* CONFIG_CYZ_INTR */
 		)
 		free_irq(cinfo->irq, cinfo);
@@ -5368,7 +5380,7 @@ static void __exit cy_cleanup_module(void)
 				iounmap(card->ctl_addr.p9050);
 			if (card->irq
 #ifndef CONFIG_CYZ_INTR
-				&& !IS_CYC_Z(*card)
+				&& !cy_is_Z(card)
 #endif /* CONFIG_CYZ_INTR */
 				)
 				free_irq(card->irq, card);
-- 
cgit v0.10.2


From 08a951e1693e324bd035b194002a82b9057fd9c5 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:33:40 +0100
Subject: tty: cyclades, remove typedefs

They are unused anyway.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index a14fe30..1fbdea4 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL {
 
 
 #ifndef DP_WINDOW_SIZE
-/* #include "cyclomz.h" */
-/****************** ****************** *******************/
-/*
- *	The data types defined below are used in all ZFIRM interface
- *	data structures. They accomodate differences between HW
- *	architectures and compilers.
- */
-
-typedef __u64  ucdouble;		/* 64 bits, unsigned */
-typedef __u32  uclong;			/* 32 bits, unsigned */
-typedef __u16  ucshort;		/* 16 bits, unsigned */
-typedef __u8   ucchar;			/* 8 bits, unsigned */
-
 /*
  *	Memory Window Sizes
  */
-- 
cgit v0.10.2


From 963118eef9e6706e2b5356309fb0cdd9c9eba81d Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:34:27 +0100
Subject: tty: cyclades, fix nports handling

Set up ports right after FW load so that we won't allocate maximal
(64) ports when we use few.

Also remove reading of nports in irq context, since we know it from
initialisation now.

This also fixes a tty ports unregistration on some fail paths and for
Ze which registered 64 and unregistered real port count.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index c9a503f..6adbc97 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -604,7 +604,6 @@
 
 #define NR_PORTS	256
 
-#define ZE_V1_NPORTS	64
 #define ZO_V1	0
 #define ZO_V2	1
 #define ZE_V1	2
@@ -1777,7 +1776,6 @@ static void cyz_poll(unsigned long arg)
 	struct tty_struct *tty;
 	struct FIRM_ID __iomem *firm_id;
 	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
 	struct BUF_CTRL __iomem *buf_ctrl;
 	unsigned long expires = jiffies + HZ;
 	unsigned int port, card;
@@ -1793,11 +1791,9 @@ static void cyz_poll(unsigned long arg)
 		firm_id = cinfo->base_addr + ID_ADDRESS;
 		zfw_ctrl = cinfo->base_addr +
 				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &(zfw_ctrl->board_ctrl);
 
 	/* Skip first polling cycle to avoid racing conditions with the FW */
 		if (!cinfo->intr_enabled) {
-			cinfo->nports = (int)readl(&board_ctrl->n_channel);
 			cinfo->intr_enabled = 1;
 			continue;
 		}
@@ -2413,16 +2409,8 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 		   interrupts should be enabled as soon as the first open
 		   happens to one of its ports. */
 			if (!cinfo->intr_enabled) {
-				struct ZFW_CTRL __iomem *zfw_ctrl;
-				struct BOARD_CTRL __iomem *board_ctrl;
 				u16 intr;
 
-				zfw_ctrl = cinfo->base_addr +
-					(readl(&firm_id->zfwctrl_addr) &
-					 0xfffff);
-
-				board_ctrl = &zfw_ctrl->board_ctrl;
-
 				/* Enable interrupts on the PLX chip */
 				intr = readw(&cinfo->ctl_addr.p9060->
 						intr_ctrl_stat) | 0x0900;
@@ -2435,8 +2423,6 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 					printk(KERN_ERR "cyc:IRQ enable retval "
 						"was %x\n", retval);
 				}
-				cinfo->nports =
-					(int)readl(&board_ctrl->n_channel);
 				cinfo->intr_enabled = 1;
 			}
 		}
@@ -4340,30 +4326,20 @@ static void cy_hangup(struct tty_struct *tty)
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	unsigned int nports, port;
+	unsigned int port;
 	unsigned short chip_number;
-	int uninitialized_var(index);
 
 	spin_lock_init(&cinfo->card_lock);
+	cinfo->intr_enabled = 0;
 
-	if (cy_is_Z(cinfo)) {	/* Cyclades-Z */
-		nports = (cinfo->hw_ver == ZE_V1) ? ZE_V1_NPORTS : 8;
-		cinfo->intr_enabled = 0;
-		cinfo->nports = 0;	/* Will be correctly set later, after
-					   Z FW is loaded */
-	} else {
-		index = cinfo->bus_index;
-		nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips;
-	}
-
-	cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL);
+	cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports),
+			GFP_KERNEL);
 	if (cinfo->ports == NULL) {
 		printk(KERN_ERR "Cyclades: cannot allocate ports\n");
-		cinfo->nports = 0;
 		return -ENOMEM;
 	}
 
-	for (port = cinfo->first_line; port < cinfo->first_line + nports;
+	for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
 			port++) {
 		info = &cinfo->ports[port - cinfo->first_line];
 		tty_port_init(&info->port);
@@ -4388,6 +4364,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 				cyz_rx_restart, (unsigned long)info);
 #endif
 		} else {
+			int index = cinfo->bus_index;
 			info->type = PORT_CIRRUS;
 			info->xmit_fifo_size = CyMAX_CHAR_FIFO;
 			info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS;
@@ -4615,7 +4592,8 @@ static int __init cy_detect_isa(void)
 		cy_card[j].irq = (int)cy_isa_irq;
 		cy_card[j].bus_index = 0;
 		cy_card[j].first_line = cy_next_channel;
-		cy_card[j].num_chips = cy_isa_nchan / 4;
+		cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP;
+		cy_card[j].nports = cy_isa_nchan;
 		if (cy_init_card(&cy_card[j])) {
 			cy_card[j].base_addr = NULL;
 			free_irq(cy_isa_irq, &cy_card[j]);
@@ -4771,7 +4749,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 	struct CUSTOM_REG __iomem *cust = base_addr;
 	struct ZFW_CTRL __iomem *pt_zfwctrl;
 	void __iomem *tmp;
-	u32 mailbox, status;
+	u32 mailbox, status, nchan;
 	unsigned int i;
 	int retval;
 
@@ -4892,11 +4870,11 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 			base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr),
 			base_addr + readl(&fid->zfwctrl_addr));
 
+	nchan = readl(&pt_zfwctrl->board_ctrl.n_channel);
 	dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n",
-		readl(&pt_zfwctrl->board_ctrl.fw_version),
-		readl(&pt_zfwctrl->board_ctrl.n_channel));
+		readl(&pt_zfwctrl->board_ctrl.fw_version), nchan);
 
-	if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) {
+	if (nchan == 0) {
 		dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please "
 			"check the connection between the Z host card and the "
 			"serial expanders.\n");
@@ -4922,7 +4900,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 	cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) |
 			0x00030800UL);
 
-	return 0;
+	return nchan;
 err_rel:
 	release_firmware(fw);
 err:
@@ -5027,12 +5005,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 
 		if (mailbox == ZE_V1) {
 			card_name = "Cyclades-Ze";
-
-			nchan = ZE_V1_NPORTS;
 		} else {
 			card_name = "Cyclades-8Zo";
-			nchan = 8;
-
 #ifdef CY_PCI_DEBUG
 			if (mailbox == ZO_V1) {
 				cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
@@ -5057,8 +5031,9 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 
 		retval = cyz_load_fw(pdev, addr2, addr0, irq);
-		if (retval)
+		if (retval <= 0)
 			goto err_unmap;
+		nchan = retval;
 	}
 
 	if ((cy_next_channel + nchan) > NR_PORTS) {
@@ -5088,7 +5063,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 			dev_err(&pdev->dev, "could not allocate IRQ\n");
 			goto err_unmap;
 		}
-		cy_card[card_no].num_chips = nchan / 4;
+		cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
 	} else {
 		cy_card[card_no].hw_ver = mailbox;
 		cy_card[card_no].num_chips = (unsigned int)-1;
@@ -5112,6 +5087,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 	cy_card[card_no].irq = irq;
 	cy_card[card_no].bus_index = 1;
 	cy_card[card_no].first_line = cy_next_channel;
+	cy_card[card_no].nports = nchan;
 	retval = cy_init_card(&cy_card[card_no]);
 	if (retval)
 		goto err_null;
-- 
cgit v0.10.2


From b39933fbd304021580800796683b8ddaa3dd0a6a Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:35:21 +0100
Subject: tty: cyclades, remove unused variables

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 6adbc97..4560190 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -2963,7 +2963,6 @@ static void set_line_char(struct cyclades_port *info)
 	void __iomem *base_addr;
 	int chip, channel, index;
 	unsigned cflag, iflag;
-	unsigned short chip_number;
 	int baud, baud_rate = 0;
 	int i;
 
@@ -2992,7 +2991,6 @@ static void set_line_char(struct cyclades_port *info)
 
 	card = info->card;
 	channel = info->line - card->first_line;
-	chip_number = channel / 4;
 
 	if (!cy_is_Z(card)) {
 
@@ -3212,9 +3210,7 @@ static void set_line_char(struct cyclades_port *info)
 	} else {
 		struct FIRM_ID __iomem *firm_id;
 		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
 		struct CH_CTRL __iomem *ch_ctrl;
-		struct BUF_CTRL __iomem *buf_ctrl;
 		__u32 sw_flow;
 		int retval;
 
@@ -3224,9 +3220,7 @@ static void set_line_char(struct cyclades_port *info)
 
 		zfw_ctrl = card->base_addr +
 			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
 		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
 
 		/* baud rate */
 		baud = tty_get_baud_rate(info->port.tty);
-- 
cgit v0.10.2


From 10077d4a6674f535abdbe25cdecb1202af7948f1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:36:09 +0100
Subject: tty: cdc_acm add krefs

Now we have a port structure begin using the fields and kref counts

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 41d4ca5..b4e73aa 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -62,7 +62,7 @@
 #include <linux/tty_flip.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/usb/cdc.h>
 #include <asm/byteorder.h>
@@ -87,7 +87,7 @@ static struct acm *acm_table[ACM_TTY_MINORS];
 
 static DEFINE_MUTEX(open_mutex);
 
-#define ACM_READY(acm)	(acm && acm->dev && acm->used)
+#define ACM_READY(acm)	(acm && acm->dev && acm->port.count)
 
 static const struct tty_port_operations acm_port_ops = {
 };
@@ -265,6 +265,7 @@ static void acm_ctrl_irq(struct urb *urb)
 {
 	struct acm *acm = urb->context;
 	struct usb_cdc_notification *dr = urb->transfer_buffer;
+	struct tty_struct *tty;
 	unsigned char *data;
 	int newctrl;
 	int retval;
@@ -297,12 +298,16 @@ static void acm_ctrl_irq(struct urb *urb)
 			break;
 
 		case USB_CDC_NOTIFY_SERIAL_STATE:
-
+			tty = tty_port_tty_get(&acm->port);
 			newctrl = get_unaligned_le16(data);
 
-			if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
-				dbg("calling hangup");
-				tty_hangup(acm->tty);
+			if (tty) {
+				if (!acm->clocal &&
+				    (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
+					dbg("calling hangup");
+					tty_hangup(tty);
+				}
+				tty_kref_put(tty);
 			}
 
 			acm->ctrlin = newctrl;
@@ -374,15 +379,14 @@ static void acm_rx_tasklet(unsigned long _acm)
 {
 	struct acm *acm = (void *)_acm;
 	struct acm_rb *buf;
-	struct tty_struct *tty = acm->tty;
+	struct tty_struct *tty;
 	struct acm_ru *rcv;
 	unsigned long flags;
 	unsigned char throttled;
 
 	dbg("Entering acm_rx_tasklet");
 
-	if (!ACM_READY(acm))
-	{
+	if (!ACM_READY(acm)) {
 		dbg("acm_rx_tasklet: ACM not ready");
 		return;
 	}
@@ -390,12 +394,13 @@ static void acm_rx_tasklet(unsigned long _acm)
 	spin_lock_irqsave(&acm->throttle_lock, flags);
 	throttled = acm->throttle;
 	spin_unlock_irqrestore(&acm->throttle_lock, flags);
-	if (throttled)
-	{
+	if (throttled) {
 		dbg("acm_rx_tasklet: throttled");
 		return;
 	}
 
+	tty = tty_port_tty_get(&acm->port);
+
 next_buffer:
 	spin_lock_irqsave(&acm->read_lock, flags);
 	if (list_empty(&acm->filled_read_bufs)) {
@@ -409,20 +414,22 @@ next_buffer:
 
 	dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size);
 
-	tty_buffer_request_room(tty, buf->size);
-	spin_lock_irqsave(&acm->throttle_lock, flags);
-	throttled = acm->throttle;
-	spin_unlock_irqrestore(&acm->throttle_lock, flags);
-	if (!throttled)
-		tty_insert_flip_string(tty, buf->base, buf->size);
-	tty_flip_buffer_push(tty);
-
-	if (throttled) {
-		dbg("Throttling noticed");
-		spin_lock_irqsave(&acm->read_lock, flags);
-		list_add(&buf->list, &acm->filled_read_bufs);
-		spin_unlock_irqrestore(&acm->read_lock, flags);
-		return;
+	if (tty) {
+		spin_lock_irqsave(&acm->throttle_lock, flags);
+		throttled = acm->throttle;
+		spin_unlock_irqrestore(&acm->throttle_lock, flags);
+		if (!throttled) {
+			tty_buffer_request_room(tty, buf->size);
+			tty_insert_flip_string(tty, buf->base, buf->size);
+			tty_flip_buffer_push(tty);
+		} else {
+			tty_kref_put(tty);
+			dbg("Throttling noticed");
+			spin_lock_irqsave(&acm->read_lock, flags);
+			list_add(&buf->list, &acm->filled_read_bufs);
+			spin_unlock_irqrestore(&acm->read_lock, flags);
+			return;
+		}
 	}
 
 	spin_lock_irqsave(&acm->read_lock, flags);
@@ -431,6 +438,8 @@ next_buffer:
 	goto next_buffer;
 
 urbs:
+	tty_kref_put(tty);
+
 	while (!list_empty(&acm->spare_read_bufs)) {
 		spin_lock_irqsave(&acm->read_lock, flags);
 		if (list_empty(&acm->spare_read_urbs)) {
@@ -502,11 +511,14 @@ static void acm_write_bulk(struct urb *urb)
 static void acm_softint(struct work_struct *work)
 {
 	struct acm *acm = container_of(work, struct acm, work);
+	struct tty_struct *tty;
 
 	dev_vdbg(&acm->data->dev, "tx work\n");
 	if (!ACM_READY(acm))
 		return;
-	tty_wakeup(acm->tty);
+	tty = tty_port_tty_get(&acm->port);
+	tty_wakeup(tty);
+	tty_kref_put(tty);
 }
 
 static void acm_waker(struct work_struct *waker)
@@ -546,8 +558,9 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 		rv = 0;
 
 	set_bit(TTY_NO_WRITE_SPLIT, &tty->flags);
+
 	tty->driver_data = acm;
-	acm->tty = tty;
+	tty_port_tty_set(&acm->port, tty);
 
 	if (usb_autopm_get_interface(acm->control) < 0)
 		goto early_bail;
@@ -555,11 +568,10 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 		acm->control->needs_remote_wakeup = 1;
 
 	mutex_lock(&acm->mutex);
-	if (acm->used++) {
+	if (acm->port.count++) {
 		usb_autopm_put_interface(acm->control);
 		goto done;
-        }
-
+	}
 
 	acm->ctrlurb->dev = acm->dev;
 	if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) {
@@ -570,6 +582,7 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 	if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) &&
 	    (acm->ctrl_caps & USB_CDC_CAP_LINE))
 		goto full_bailout;
+
 	usb_autopm_put_interface(acm->control);
 
 	INIT_LIST_HEAD(&acm->spare_read_urbs);
@@ -585,7 +598,7 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 	acm->throttle = 0;
 
 	tasklet_schedule(&acm->urb_task);
-
+	rv = tty_port_block_til_ready(&acm->port, tty, filp);
 done:
 	mutex_unlock(&acm->mutex);
 err_out:
@@ -596,16 +609,17 @@ full_bailout:
 	usb_kill_urb(acm->ctrlurb);
 bail_out:
 	usb_autopm_put_interface(acm->control);
-	acm->used--;
+	acm->port.count--;
 	mutex_unlock(&acm->mutex);
 early_bail:
 	mutex_unlock(&open_mutex);
+	tty_port_tty_set(&acm->port, NULL);
 	return -EIO;
 }
 
 static void acm_tty_unregister(struct acm *acm)
 {
-	int i,nr;
+	int i, nr;
 
 	nr = acm->rx_buflimit;
 	tty_unregister_device(acm_tty_driver, acm->minor);
@@ -622,37 +636,51 @@ static void acm_tty_unregister(struct acm *acm)
 
 static int acm_tty_chars_in_buffer(struct tty_struct *tty);
 
+static void acm_port_down(struct acm *acm, int drain)
+{
+	int i, nr = acm->rx_buflimit;
+	mutex_lock(&open_mutex);
+	if (acm->dev) {
+		usb_autopm_get_interface(acm->control);
+		acm_set_control(acm, acm->ctrlout = 0);
+		/* try letting the last writes drain naturally */
+		if (drain) {
+			wait_event_interruptible_timeout(acm->drain_wait,
+				(ACM_NW == acm_wb_is_avail(acm)) || !acm->dev,
+					ACM_CLOSE_TIMEOUT * HZ);
+		}
+		usb_kill_urb(acm->ctrlurb);
+		for (i = 0; i < ACM_NW; i++)
+			usb_kill_urb(acm->wb[i].urb);
+		for (i = 0; i < nr; i++)
+			usb_kill_urb(acm->ru[i].urb);
+		acm->control->needs_remote_wakeup = 0;
+		usb_autopm_put_interface(acm->control);
+	}
+	mutex_unlock(&open_mutex);
+}
+
+static void acm_tty_hangup(struct tty_struct *tty)
+{
+	struct acm *acm = tty->driver_data;
+	tty_port_hangup(&acm->port);
+	acm_port_down(acm, 0);
+}
+
 static void acm_tty_close(struct tty_struct *tty, struct file *filp)
 {
 	struct acm *acm = tty->driver_data;
-	int i,nr;
 
-	if (!acm || !acm->used)
+	/* Perform the closing process and see if we need to do the hardware
+	   shutdown */
+	if (tty_port_close_start(&acm->port, tty, filp) == 0)
 		return;
-
-	nr = acm->rx_buflimit;
+	acm_port_down(acm, 0);
+	tty_port_close_end(&acm->port, tty);
 	mutex_lock(&open_mutex);
-	if (!--acm->used) {
-		if (acm->dev) {
-			usb_autopm_get_interface(acm->control);
-			acm_set_control(acm, acm->ctrlout = 0);
-
-			/* try letting the last writes drain naturally */
-			wait_event_interruptible_timeout(acm->drain_wait,
-					(ACM_NW == acm_wb_is_avail(acm))
-						|| !acm->dev,
-					ACM_CLOSE_TIMEOUT * HZ);
-
-			usb_kill_urb(acm->ctrlurb);
-			for (i = 0; i < ACM_NW; i++)
-				usb_kill_urb(acm->wb[i].urb);
-			for (i = 0; i < nr; i++)
-				usb_kill_urb(acm->ru[i].urb);
-			acm->control->needs_remote_wakeup = 0;
-			usb_autopm_put_interface(acm->control);
-		} else
-			acm_tty_unregister(acm);
-	}
+	tty_port_tty_set(&acm->port, NULL);
+	if (!acm->dev)
+		acm_tty_unregister(acm);
 	mutex_unlock(&open_mutex);
 }
 
@@ -885,8 +913,8 @@ static int acm_write_buffers_alloc(struct acm *acm)
 	return 0;
 }
 
-static int acm_probe (struct usb_interface *intf,
-		      const struct usb_device_id *id)
+static int acm_probe(struct usb_interface *intf,
+		     const struct usb_device_id *id)
 {
 	struct usb_cdc_union_desc *union_header = NULL;
 	struct usb_cdc_country_functional_desc *cfd = NULL;
@@ -1232,6 +1260,7 @@ static void acm_disconnect(struct usb_interface *intf)
 {
 	struct acm *acm = usb_get_intfdata(intf);
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	struct tty_struct *tty;
 
 	/* sibling interface is already cleaning up */
 	if (!acm)
@@ -1258,16 +1287,18 @@ static void acm_disconnect(struct usb_interface *intf)
 	usb_driver_release_interface(&acm_driver, intf == acm->control ?
 					acm->data : acm->control);
 
-	if (!acm->used) {
+	if (acm->port.count == 0) {
 		acm_tty_unregister(acm);
 		mutex_unlock(&open_mutex);
 		return;
 	}
 
 	mutex_unlock(&open_mutex);
-
-	if (acm->tty)
-		tty_hangup(acm->tty);
+	tty = tty_port_tty_get(&acm->port);
+	if (tty) {
+		tty_hangup(tty);
+		tty_kref_put(tty);
+	}
 }
 
 #ifdef CONFIG_PM
@@ -1302,7 +1333,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
 	*/
 	mutex_lock(&acm->mutex);
 
-	if (acm->used)
+	if (acm->port.count)
 		stop_data_traffic(acm);
 
 	mutex_unlock(&acm->mutex);
@@ -1324,7 +1355,7 @@ static int acm_resume(struct usb_interface *intf)
 		return 0;
 
 	mutex_lock(&acm->mutex);
-	if (acm->used) {
+	if (acm->port.count) {
 		rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO);
 		if (rv < 0)
 			goto err_out;
@@ -1434,6 +1465,7 @@ static struct usb_driver acm_driver = {
 static const struct tty_operations acm_ops = {
 	.open =			acm_tty_open,
 	.close =		acm_tty_close,
+	.hangup =		acm_tty_hangup,
 	.write =		acm_tty_write,
 	.write_room =		acm_tty_write_room,
 	.ioctl =		acm_tty_ioctl,
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 19967cd..4c38564 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -89,7 +89,6 @@ struct acm {
 	struct usb_device *dev;				/* the corresponding usb device */
 	struct usb_interface *control;			/* control interface */
 	struct usb_interface *data;			/* data interface */
-	struct tty_struct *tty;				/* the corresponding tty */
 	struct tty_port port;			 	/* our tty port data */
 	struct urb *ctrlurb;				/* urbs */
 	u8 *ctrl_buffer;				/* buffers of urbs */
@@ -121,7 +120,6 @@ struct acm {
 	unsigned int ctrlout;				/* output control lines (DTR, RTS) */
 	unsigned int writesize;				/* max packet size for the output bulk endpoint */
 	unsigned int readsize,ctrlsize;			/* buffer sizes for freeing */
-	unsigned int used;				/* someone has this acm's device open */
 	unsigned int minor;				/* acm minor number */
 	unsigned char throttle;				/* throttled by tty layer */
 	unsigned char clocal;				/* termios CLOCAL */
-- 
cgit v0.10.2


From 6e47e069eb4dffa88ad91ddfc3fd85f32c35654b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:37:06 +0100
Subject: tty: Clean up the ACM driver to CodingStyle

Or at least most of it. There are further clean ups possible and there are
are also thing checkpatch moans about that would be silly to "fix".

Also note some FIXME points found as the cleanup was done.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index b4e73aa..ddeb691 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -16,7 +16,8 @@
  *	v0.9  - thorough cleaning, URBification, almost a rewrite
  *	v0.10 - some more cleanups
  *	v0.11 - fixed flow control, read error doesn't stop reads
- *	v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced
+ *	v0.12 - added TIOCM ioctls, added break handling, made struct acm
+ *		kmalloced
  *	v0.13 - added termios, added hangup
  *	v0.14 - sized down struct acm
  *	v0.15 - fixed flow control again - characters could be lost
@@ -102,13 +103,15 @@ static const struct tty_port_operations acm_port_ops = {
  * Functions for ACM control messages.
  */
 
-static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len)
+static int acm_ctrl_msg(struct acm *acm, int request, int value,
+							void *buf, int len)
 {
 	int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0),
 		request, USB_RT_ACM, value,
 		acm->control->altsetting[0].desc.bInterfaceNumber,
 		buf, len, 5000);
-	dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval);
+	dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d",
+						request, value, len, retval);
 	return retval < 0 ? retval : 0;
 }
 
@@ -153,9 +156,8 @@ static int acm_wb_is_avail(struct acm *acm)
 
 	n = ACM_NW;
 	spin_lock_irqsave(&acm->write_lock, flags);
-	for (i = 0; i < ACM_NW; i++) {
+	for (i = 0; i < ACM_NW; i++)
 		n -= acm->wb[i].use;
-	}
 	spin_unlock_irqrestore(&acm->write_lock, flags);
 	return n;
 }
@@ -186,7 +188,8 @@ static int acm_start_wb(struct acm *acm, struct acm_wb *wb)
 	wb->urb->transfer_buffer_length = wb->len;
 	wb->urb->dev = acm->dev;
 
-	if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) {
+	rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
+	if (rc < 0) {
 		dbg("usb_submit_urb(write bulk) failed: %d", rc);
 		acm_write_done(acm, wb);
 	}
@@ -291,44 +294,45 @@ static void acm_ctrl_irq(struct urb *urb)
 
 	data = (unsigned char *)(dr + 1);
 	switch (dr->bNotificationType) {
+	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+		dbg("%s network", dr->wValue ?
+					"connected to" : "disconnected from");
+		break;
 
-		case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+	case USB_CDC_NOTIFY_SERIAL_STATE:
+		tty = tty_port_tty_get(&acm->port);
+		newctrl = get_unaligned_le16(data);
 
-			dbg("%s network", dr->wValue ? "connected to" : "disconnected from");
-			break;
-
-		case USB_CDC_NOTIFY_SERIAL_STATE:
-			tty = tty_port_tty_get(&acm->port);
-			newctrl = get_unaligned_le16(data);
-
-			if (tty) {
-				if (!acm->clocal &&
-				    (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
-					dbg("calling hangup");
-					tty_hangup(tty);
-				}
-				tty_kref_put(tty);
+		if (tty) {
+			if (!acm->clocal &&
+				(acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
+				dbg("calling hangup");
+				tty_hangup(tty);
 			}
+			tty_kref_put(tty);
+		}
 
-			acm->ctrlin = newctrl;
-
-			dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
-				acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',	acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',	acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',	acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
-				acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
+		acm->ctrlin = newctrl;
 
+		dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
+			acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
+			acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
 			break;
 
-		default:
-			dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
-				dr->bNotificationType, dr->wIndex,
-				dr->wLength, data[0], data[1]);
-			break;
+	default:
+		dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
+			dr->bNotificationType, dr->wIndex,
+			dr->wLength, data[0], data[1]);
+		break;
 	}
 exit:
 	usb_mark_last_busy(acm->dev);
-	retval = usb_submit_urb (urb, GFP_ATOMIC);
+	retval = usb_submit_urb(urb, GFP_ATOMIC);
 	if (retval)
 		dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with "
 			"result %d", __func__, retval);
@@ -466,10 +470,11 @@ urbs:
 		rcv->urb->transfer_dma = buf->dma;
 		rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 
-		/* This shouldn't kill the driver as unsuccessful URBs are returned to the
-		   free-urbs-pool and resubmited ASAP */
+		/* This shouldn't kill the driver as unsuccessful URBs are
+		   returned to the free-urbs-pool and resubmited ASAP */
 		spin_lock_irqsave(&acm->read_lock, flags);
-		if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
+		if (acm->susp_count ||
+				usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
 			list_add(&buf->list, &acm->spare_read_bufs);
 			list_add(&rcv->list, &acm->spare_read_urbs);
 			acm->processing = 0;
@@ -588,12 +593,11 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 	INIT_LIST_HEAD(&acm->spare_read_urbs);
 	INIT_LIST_HEAD(&acm->spare_read_bufs);
 	INIT_LIST_HEAD(&acm->filled_read_bufs);
-	for (i = 0; i < acm->rx_buflimit; i++) {
+
+	for (i = 0; i < acm->rx_buflimit; i++)
 		list_add(&(acm->ru[i].list), &acm->spare_read_urbs);
-	}
-	for (i = 0; i < acm->rx_buflimit; i++) {
+	for (i = 0; i < acm->rx_buflimit; i++)
 		list_add(&(acm->rb[i].list), &acm->spare_read_bufs);
-	}
 
 	acm->throttle = 0;
 
@@ -684,7 +688,8 @@ static void acm_tty_close(struct tty_struct *tty, struct file *filp)
 	mutex_unlock(&open_mutex);
 }
 
-static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+static int acm_tty_write(struct tty_struct *tty,
+					const unsigned char *buf, int count)
 {
 	struct acm *acm = tty->driver_data;
 	int stat;
@@ -700,7 +705,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c
 		return 0;
 
 	spin_lock_irqsave(&acm->write_lock, flags);
-	if ((wbn = acm_wb_alloc(acm)) < 0) {
+	wbn = acm_wb_alloc(acm);
+	if (wbn < 0) {
 		spin_unlock_irqrestore(&acm->write_lock, flags);
 		return 0;
 	}
@@ -712,7 +718,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c
 	wb->len = count;
 	spin_unlock_irqrestore(&acm->write_lock, flags);
 
-	if ((stat = acm_write_start(acm, wbn)) < 0)
+	stat = acm_write_start(acm, wbn);
+	if (stat < 0)
 		return stat;
 	return count;
 }
@@ -798,8 +805,10 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file,
 		return -EINVAL;
 
 	newctrl = acm->ctrlout;
-	set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
-	clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+	set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+					(set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+	clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+					(clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
 
 	newctrl = (newctrl & ~clear) | set;
 
@@ -808,7 +817,8 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file,
 	return acm_set_control(acm, acm->ctrlout = newctrl);
 }
 
-static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+static int acm_tty_ioctl(struct tty_struct *tty, struct file *file,
+					unsigned int cmd, unsigned long arg)
 {
 	struct acm *acm = tty->driver_data;
 
@@ -830,7 +840,8 @@ static const __u8 acm_tty_size[] = {
 	5, 6, 7, 8
 };
 
-static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old)
+static void acm_tty_set_termios(struct tty_struct *tty,
+						struct ktermios *termios_old)
 {
 	struct acm *acm = tty->driver_data;
 	struct ktermios *termios = tty->termios;
@@ -840,19 +851,23 @@ static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios
 	if (!ACM_READY(acm))
 		return;
 
+	/* FIXME: Needs to support the tty_baud interface */
+	/* FIXME: Broken on sparc */
 	newline.dwDTERate = cpu_to_le32p(acm_tty_speed +
 		(termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0));
 	newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0;
 	newline.bParityType = termios->c_cflag & PARENB ?
-		(termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
+				(termios->c_cflag & PARODD ? 1 : 2) +
+				(termios->c_cflag & CMSPAR ? 2 : 0) : 0;
 	newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4];
-
+	/* FIXME: Needs to clear unsupported bits in the termios */
 	acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
 
 	if (!newline.dwDTERate) {
 		newline.dwDTERate = acm->line.dwDTERate;
 		newctrl &= ~ACM_CTRL_DTR;
-	} else  newctrl |=  ACM_CTRL_DTR;
+	} else
+		newctrl |=  ACM_CTRL_DTR;
 
 	if (newctrl != acm->ctrlout)
 		acm_set_control(acm, acm->ctrlout = newctrl);
@@ -877,9 +892,8 @@ static void acm_write_buffers_free(struct acm *acm)
 	struct acm_wb *wb;
 	struct usb_device *usb_dev = interface_to_usbdev(acm->control);
 
-	for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) {
+	for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++)
 		usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah);
-	}
 }
 
 static void acm_read_buffers_free(struct acm *acm)
@@ -888,7 +902,8 @@ static void acm_read_buffers_free(struct acm *acm)
 	int i, n = acm->rx_buflimit;
 
 	for (i = 0; i < n; i++)
-		usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma);
+		usb_buffer_free(usb_dev, acm->readsize,
+					acm->rb[i].base, acm->rb[i].dma);
 }
 
 /* Little helper: write buffers allocate */
@@ -928,7 +943,7 @@ static int acm_probe(struct usb_interface *intf,
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
 	struct acm *acm;
 	int minor;
-	int ctrlsize,readsize;
+	int ctrlsize, readsize;
 	u8 *buf;
 	u8 ac_management_function = 0;
 	u8 call_management_function = 0;
@@ -948,7 +963,7 @@ static int acm_probe(struct usb_interface *intf,
 		control_interface = usb_ifnum_to_if(usb_dev, 0);
 		goto skip_normal_probe;
 	}
-	
+
 	/* normal probing*/
 	if (!buffer) {
 		dev_err(&intf->dev, "Weird descriptor references\n");
@@ -956,8 +971,10 @@ static int acm_probe(struct usb_interface *intf,
 	}
 
 	if (!buflen) {
-		if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) {
-			dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n");
+		if (intf->cur_altsetting->endpoint->extralen &&
+				intf->cur_altsetting->endpoint->extra) {
+			dev_dbg(&intf->dev,
+				"Seeking extra descriptors on endpoint\n");
 			buflen = intf->cur_altsetting->endpoint->extralen;
 			buffer = intf->cur_altsetting->endpoint->extra;
 		} else {
@@ -968,47 +985,43 @@ static int acm_probe(struct usb_interface *intf,
 	}
 
 	while (buflen > 0) {
-		if (buffer [1] != USB_DT_CS_INTERFACE) {
+		if (buffer[1] != USB_DT_CS_INTERFACE) {
 			dev_err(&intf->dev, "skipping garbage\n");
 			goto next_desc;
 		}
 
-		switch (buffer [2]) {
-			case USB_CDC_UNION_TYPE: /* we've found it */
-				if (union_header) {
-					dev_err(&intf->dev, "More than one "
-						"union descriptor, "
-						"skipping ...\n");
-					goto next_desc;
-				}
-				union_header = (struct usb_cdc_union_desc *)
-							buffer;
-				break;
-			case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
-				cfd = (struct usb_cdc_country_functional_desc *)buffer;
-				break;
-			case USB_CDC_HEADER_TYPE: /* maybe check version */ 
-				break; /* for now we ignore it */ 
-			case USB_CDC_ACM_TYPE:
-				ac_management_function = buffer[3];
-				break;
-			case USB_CDC_CALL_MANAGEMENT_TYPE:
-				call_management_function = buffer[3];
-				call_interface_num = buffer[4];
-				if ((call_management_function & 3) != 3)
-					dev_err(&intf->dev, "This device "
-						"cannot do calls on its own. "
-						"It is no modem.\n");
-				break;
-			default:
-				/* there are LOTS more CDC descriptors that
-				 * could legitimately be found here.
-				 */
-				dev_dbg(&intf->dev, "Ignoring descriptor: "
-						"type %02x, length %d\n",
-						buffer[2], buffer[0]);
-				break;
+		switch (buffer[2]) {
+		case USB_CDC_UNION_TYPE: /* we've found it */
+			if (union_header) {
+				dev_err(&intf->dev, "More than one "
+					"union descriptor, skipping ...\n");
+				goto next_desc;
 			}
+			union_header = (struct usb_cdc_union_desc *)buffer;
+			break;
+		case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
+			cfd = (struct usb_cdc_country_functional_desc *)buffer;
+			break;
+		case USB_CDC_HEADER_TYPE: /* maybe check version */
+			break; /* for now we ignore it */
+		case USB_CDC_ACM_TYPE:
+			ac_management_function = buffer[3];
+			break;
+		case USB_CDC_CALL_MANAGEMENT_TYPE:
+			call_management_function = buffer[3];
+			call_interface_num = buffer[4];
+			if ((call_management_function & 3) != 3)
+				dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n");
+			break;
+		default:
+			/* there are LOTS more CDC descriptors that
+			 * could legitimately be found here.
+			 */
+			dev_dbg(&intf->dev, "Ignoring descriptor: "
+					"type %02x, length %d\n",
+					buffer[2], buffer[0]);
+			break;
+		}
 next_desc:
 		buflen -= buffer[0];
 		buffer += buffer[0];
@@ -1016,33 +1029,36 @@ next_desc:
 
 	if (!union_header) {
 		if (call_interface_num > 0) {
-			dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n");
+			dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
 			data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
 			control_interface = intf;
 		} else {
-			dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+			dev_dbg(&intf->dev,
+					"No union descriptor, giving up\n");
 			return -ENODEV;
 		}
 	} else {
 		control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
 		data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0));
 		if (!control_interface || !data_interface) {
-			dev_dbg(&intf->dev,"no interfaces\n");
+			dev_dbg(&intf->dev, "no interfaces\n");
 			return -ENODEV;
 		}
 	}
-	
+
 	if (data_interface_num != call_interface_num)
-		dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n");
+		dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
 
 skip_normal_probe:
 
 	/*workaround for switched interfaces */
-	if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) {
-		if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) {
+	if (data_interface->cur_altsetting->desc.bInterfaceClass
+						!= CDC_DATA_INTERFACE_TYPE) {
+		if (control_interface->cur_altsetting->desc.bInterfaceClass
+						== CDC_DATA_INTERFACE_TYPE) {
 			struct usb_interface *t;
-			dev_dbg(&intf->dev,"Your device has switched interfaces.\n");
-
+			dev_dbg(&intf->dev,
+				"Your device has switched interfaces.\n");
 			t = control_interface;
 			control_interface = data_interface;
 			data_interface = t;
@@ -1054,9 +1070,9 @@ skip_normal_probe:
 	/* Accept probe requests only for the control interface */
 	if (intf != control_interface)
 		return -ENODEV;
-	
+
 	if (usb_interface_claimed(data_interface)) { /* valid in this context */
-		dev_dbg(&intf->dev,"The data interface isn't available\n");
+		dev_dbg(&intf->dev, "The data interface isn't available\n");
 		return -EBUSY;
 	}
 
@@ -1073,8 +1089,8 @@ skip_normal_probe:
 	if (!usb_endpoint_dir_in(epread)) {
 		/* descriptors are swapped */
 		struct usb_endpoint_descriptor *t;
-		dev_dbg(&intf->dev,"The data interface has switched endpoints\n");
-		
+		dev_dbg(&intf->dev,
+			"The data interface has switched endpoints\n");
 		t = epread;
 		epread = epwrite;
 		epwrite = t;
@@ -1087,13 +1103,15 @@ skip_normal_probe:
 		return -ENODEV;
 	}
 
-	if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) {
+	acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
+	if (acm == NULL) {
 		dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n");
 		goto alloc_fail;
 	}
 
 	ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize);
-	readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2);
+	readsize = le16_to_cpu(epread->wMaxPacketSize) *
+				(quirks == SINGLE_RX_URB ? 1 : 2);
 	acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20;
 	acm->control = control_interface;
 	acm->data = data_interface;
@@ -1136,8 +1154,10 @@ skip_normal_probe:
 	for (i = 0; i < num_rx_buf; i++) {
 		struct acm_ru *rcv = &(acm->ru[i]);
 
-		if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-			dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n");
+		rcv->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (rcv->urb == NULL) {
+			dev_dbg(&intf->dev,
+				"out of memory (read urbs usb_alloc_urb)\n");
 			goto alloc_fail7;
 		}
 
@@ -1150,26 +1170,29 @@ skip_normal_probe:
 		rb->base = usb_buffer_alloc(acm->dev, readsize,
 				GFP_KERNEL, &rb->dma);
 		if (!rb->base) {
-			dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n");
+			dev_dbg(&intf->dev,
+				"out of memory (read bufs usb_buffer_alloc)\n");
 			goto alloc_fail7;
 		}
 	}
-	for(i = 0; i < ACM_NW; i++)
-	{
+	for (i = 0; i < ACM_NW; i++) {
 		struct acm_wb *snd = &(acm->wb[i]);
 
-		if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-			dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)");
+		snd->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (snd->urb == NULL) {
+			dev_dbg(&intf->dev,
+				"out of memory (write urbs usb_alloc_urb)");
 			goto alloc_fail7;
 		}
 
-		usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
-				NULL, acm->writesize, acm_write_bulk, snd);
+		usb_fill_bulk_urb(snd->urb, usb_dev,
+			usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
+			NULL, acm->writesize, acm_write_bulk, snd);
 		snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 		snd->instance = acm;
 	}
 
-	usb_set_intfdata (intf, acm);
+	usb_set_intfdata(intf, acm);
 
 	i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
 	if (i < 0)
@@ -1180,7 +1203,8 @@ skip_normal_probe:
 		if (!acm->country_codes)
 			goto skip_countries;
 		acm->country_code_size = cfd->bLength - 4;
-		memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4);
+		memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0,
+							cfd->bLength - 4);
 		acm->country_rel_date = cfd->iCountryCodeRelDate;
 
 		i = device_create_file(&intf->dev, &dev_attr_wCountryCodes);
@@ -1189,7 +1213,8 @@ skip_normal_probe:
 			goto skip_countries;
 		}
 
-		i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate);
+		i = device_create_file(&intf->dev,
+						&dev_attr_iCountryCodeRelDate);
 		if (i < 0) {
 			kfree(acm->country_codes);
 			goto skip_countries;
@@ -1197,8 +1222,10 @@ skip_normal_probe:
 	}
 
 skip_countries:
-	usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
-			 acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval);
+	usb_fill_int_urb(acm->ctrlurb, usb_dev,
+			usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
+			acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm,
+			epctrl->bInterval);
 	acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 	acm->ctrlurb->transfer_dma = acm->ctrl_dma;
 
@@ -1245,7 +1272,7 @@ static void stop_data_traffic(struct acm *acm)
 	tasklet_disable(&acm->urb_task);
 
 	usb_kill_urb(acm->ctrlurb);
-	for(i = 0; i < ACM_NW; i++)
+	for (i = 0; i < ACM_NW; i++)
 		usb_kill_urb(acm->wb[i].urb);
 	for (i = 0; i < acm->rx_buflimit; i++)
 		usb_kill_urb(acm->ru[i].urb);
@@ -1267,7 +1294,7 @@ static void acm_disconnect(struct usb_interface *intf)
 		return;
 
 	mutex_lock(&open_mutex);
-	if (acm->country_codes){
+	if (acm->country_codes) {
 		device_remove_file(&acm->control->dev,
 				&dev_attr_wCountryCodes);
 		device_remove_file(&acm->control->dev,
@@ -1281,7 +1308,8 @@ static void acm_disconnect(struct usb_interface *intf)
 	stop_data_traffic(acm);
 
 	acm_write_buffers_free(acm);
-	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
+	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer,
+								acm->ctrl_dma);
 	acm_read_buffers_free(acm);
 
 	usb_driver_release_interface(&acm_driver, intf == acm->control ?
@@ -1434,7 +1462,7 @@ static struct usb_device_id acm_ids[] = {
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_GSM) },
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
-		USB_CDC_ACM_PROTO_AT_3G	) },
+		USB_CDC_ACM_PROTO_AT_3G) },
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_ACM_PROTO_AT_CDMA) },
 
@@ -1442,7 +1470,7 @@ static struct usb_device_id acm_ids[] = {
 	{ }
 };
 
-MODULE_DEVICE_TABLE (usb, acm_ids);
+MODULE_DEVICE_TABLE(usb, acm_ids);
 
 static struct usb_driver acm_driver = {
 	.name =		"cdc_acm",
@@ -1497,7 +1525,8 @@ static int __init acm_init(void)
 	acm_tty_driver->subtype = SERIAL_TYPE_NORMAL,
 	acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
 	acm_tty_driver->init_termios = tty_std_termios;
-	acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD |
+								HUPCL | CLOCAL;
 	tty_set_operations(acm_tty_driver, &acm_ops);
 
 	retval = tty_register_driver(acm_tty_driver);
@@ -1529,7 +1558,7 @@ static void __exit acm_exit(void)
 module_init(acm_init);
 module_exit(acm_exit);
 
-MODULE_AUTHOR( DRIVER_AUTHOR );
-MODULE_DESCRIPTION( DRIVER_DESC );
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
-- 
cgit v0.10.2


From 70beaed22cbe12979e55d99b370e147e2e168562 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:39:12 +0100
Subject: serial: refactor ASYNC_ flags

Define ASYNCB_* flags which are bit numbers of the ASYNC_* flags.
This is useful for {test,set,clear}_bit.

Also convert each ASYNC_% to be (1 << ASYNCB_%) and define masks
with the macros, not constants.

Tested with:
#include "PATH_TO_KERNEL/include/linux/serial.h"
static struct {
        unsigned int new, old;
} as[] = {
        { ASYNC_HUP_NOTIFY, 0x0001 },
        { ASYNC_FOURPORT, 0x0002 },
...
	{ ASYNC_BOOT_ONLYMCA, 0x00400000 },
        { ASYNC_INTERNAL_FLAGS, 0xFFC00000 }
};
...
        for (a = 0; a < ARRAY_SIZE(as); a++)
                if (as[a].old != as[a].new)
                        printf("%.8x != %.8x\n", as[a].old, as[a].new);

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/serial.h b/include/linux/serial.h
index 9136cc5..e5bb75a 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -96,54 +96,76 @@ struct serial_uart_config {
 
 /*
  * Definitions for async_struct (and serial_struct) flags field
+ *
+ * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
  */
-#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 
-				   on the callout port */
-#define ASYNC_FOURPORT  0x0002	/* Set OU1, OUT2 per AST Fourport settings */
-#define ASYNC_SAK	0x0004	/* Secure Attention Key (Orange book) */
-#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */
-
-#define ASYNC_SPD_MASK	0x1030
-#define ASYNC_SPD_HI	0x0010	/* Use 56000 instead of 38400 bps */
-
-#define ASYNC_SPD_VHI	0x0020  /* Use 115200 instead of 38400 bps */
-#define ASYNC_SPD_CUST	0x0030  /* Use user-specified divisor */
-
-#define ASYNC_SKIP_TEST	0x0040 /* Skip UART test during autoconfiguration */
-#define ASYNC_AUTO_IRQ  0x0080 /* Do automatic IRQ during autoconfiguration */
-#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */
-#define ASYNC_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
-#define ASYNC_CALLOUT_NOHUP   0x0400 /* Don't do hangups for cua device */
-
-#define ASYNC_HARDPPS_CD	0x0800	/* Call hardpps when CD goes high  */
-
-#define ASYNC_SPD_SHI	0x1000	/* Use 230400 instead of 38400 bps */
-#define ASYNC_SPD_WARP	0x1010	/* Use 460800 instead of 38400 bps */
-
-#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */
-
-#define ASYNC_BUGGY_UART  0x4000 /* This is a buggy UART, skip some safety
-				  * checks.  Note: can be dangerous! */
-
-#define ASYNC_AUTOPROBE	 0x8000 /* Port was autoprobed by PCI or PNP code */
-
-#define ASYNC_FLAGS	0x7FFF	/* Possible legal async flags */
-#define ASYNC_USR_MASK	0x3430	/* Legal flags that non-privileged
-				 * users can set or reset */
-
-/* Internal flags used only by kernel/chr_drv/serial.c */
-#define ASYNC_INITIALIZED	0x80000000 /* Serial port was initialized */
-#define ASYNC_NORMAL_ACTIVE	0x20000000 /* Normal device is active */
-#define ASYNC_BOOT_AUTOCONF	0x10000000 /* Autoconfigure port on bootup */
-#define ASYNC_CLOSING		0x08000000 /* Serial port is closing */
-#define ASYNC_CTS_FLOW		0x04000000 /* Do CTS flow control */
-#define ASYNC_CHECK_CD		0x02000000 /* i.e., CLOCAL */
-#define ASYNC_SHARE_IRQ		0x01000000 /* for multifunction cards
-					     --- no longer used */
-#define ASYNC_CONS_FLOW		0x00800000 /* flow control for console  */
-
-#define ASYNC_BOOT_ONLYMCA	0x00400000 /* Probe only if MCA bus */
-#define ASYNC_INTERNAL_FLAGS	0xFFC00000 /* Internal flags */
+#define ASYNCB_HUP_NOTIFY	 0 /* Notify getty on hangups and closes
+				    * on the callout port */
+#define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
+#define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
+#define ASYNCB_SPLIT_TERMIOS	 3 /* Separate termios for dialin/callout */
+#define ASYNCB_SPD_HI		 4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_VHI		 5 /* Use 115200 instead of 38400 bps */
+#define ASYNCB_SKIP_TEST	 6 /* Skip UART test during autoconfiguration */
+#define ASYNCB_AUTO_IRQ		 7 /* Do automatic IRQ during
+				    * autoconfiguration */
+#define ASYNCB_SESSION_LOCKOUT	 8 /* Lock out cua opens based on session */
+#define ASYNCB_PGRP_LOCKOUT	 9 /* Lock out cua opens based on pgrp */
+#define ASYNCB_CALLOUT_NOHUP	10 /* Don't do hangups for cua device */
+#define ASYNCB_HARDPPS_CD	11 /* Call hardpps when CD goes high  */
+#define ASYNCB_SPD_SHI		12 /* Use 230400 instead of 38400 bps */
+#define ASYNCB_LOW_LATENCY	13 /* Request low latency behaviour */
+#define ASYNCB_BUGGY_UART	14 /* This is a buggy UART, skip some safety
+				    * checks.  Note: can be dangerous! */
+#define ASYNCB_AUTOPROBE	15 /* Port was autoprobed by PCI or PNP code */
+#define ASYNCB_LAST_USER	15
+
+/* Internal flags used only by kernel */
+#define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
+#define ASYNCB_NORMAL_ACTIVE	29 /* Normal device is active */
+#define ASYNCB_BOOT_AUTOCONF	28 /* Autoconfigure port on bootup */
+#define ASYNCB_CLOSING		27 /* Serial port is closing */
+#define ASYNCB_CTS_FLOW		26 /* Do CTS flow control */
+#define ASYNCB_CHECK_CD		25 /* i.e., CLOCAL */
+#define ASYNCB_SHARE_IRQ	24 /* for multifunction cards, no longer used */
+#define ASYNCB_CONS_FLOW	23 /* flow control for console  */
+#define ASYNCB_BOOT_ONLYMCA	22 /* Probe only if MCA bus */
+#define ASYNCB_FIRST_KERNEL	22
+
+#define ASYNC_HUP_NOTIFY	(1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_FOURPORT		(1U << ASYNCB_FOURPORT)
+#define ASYNC_SAK		(1U << ASYNCB_SAK)
+#define ASYNC_SPLIT_TERMIOS	(1U << ASYNCB_SPLIT_TERMIOS)
+#define ASYNC_SPD_HI		(1U << ASYNCB_SPD_HI)
+#define ASYNC_SPD_VHI		(1U << ASYNCB_SPD_VHI)
+#define ASYNC_SKIP_TEST		(1U << ASYNCB_SKIP_TEST)
+#define ASYNC_AUTO_IRQ		(1U << ASYNCB_AUTO_IRQ)
+#define ASYNC_SESSION_LOCKOUT	(1U << ASYNCB_SESSION_LOCKOUT)
+#define ASYNC_PGRP_LOCKOUT	(1U << ASYNCB_PGRP_LOCKOUT)
+#define ASYNC_CALLOUT_NOHUP	(1U << ASYNCB_CALLOUT_NOHUP)
+#define ASYNC_HARDPPS_CD	(1U << ASYNCB_HARDPPS_CD)
+#define ASYNC_SPD_SHI		(1U << ASYNCB_SPD_SHI)
+#define ASYNC_LOW_LATENCY	(1U << ASYNCB_LOW_LATENCY)
+#define ASYNC_BUGGY_UART	(1U << ASYNCB_BUGGY_UART)
+#define ASYNC_AUTOPROBE		(1U << ASYNCB_AUTOPROBE)
+
+#define ASYNC_FLAGS		((1U << ASYNCB_LAST_USER) - 1)
+#define ASYNC_USR_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI| \
+		ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY)
+#define ASYNC_SPD_CUST		(ASYNC_SPD_HI|ASYNC_SPD_VHI)
+#define ASYNC_SPD_WARP		(ASYNC_SPD_HI|ASYNC_SPD_SHI)
+#define ASYNC_SPD_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI)
+
+#define ASYNC_INITIALIZED	(1U << ASYNCB_INITIALIZED)
+#define ASYNC_NORMAL_ACTIVE	(1U << ASYNCB_NORMAL_ACTIVE)
+#define ASYNC_BOOT_AUTOCONF	(1U << ASYNCB_BOOT_AUTOCONF)
+#define ASYNC_CLOSING		(1U << ASYNCB_CLOSING)
+#define ASYNC_CTS_FLOW		(1U << ASYNCB_CTS_FLOW)
+#define ASYNC_CHECK_CD		(1U << ASYNCB_CHECK_CD)
+#define ASYNC_SHARE_IRQ		(1U << ASYNCB_SHARE_IRQ)
+#define ASYNC_CONS_FLOW		(1U << ASYNCB_CONS_FLOW)
+#define ASYNC_BOOT_ONLYMCA	(1U << ASYNCB_BOOT_ONLYMCA)
+#define ASYNC_INTERNAL_FLAGS	(~((1U << ASYNCB_FIRST_KERNEL) - 1))
 
 /*
  * Multiport serial configuration structure --- external structure
-- 
cgit v0.10.2


From a391ad0f09014856bbc4eeea309593eba977b3b0 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:40:17 +0100
Subject: rocket: fix test_bit parameters

Switch from ASYNC_* to ASYNCB_*, because {test,set}_bit expect
bit number, not mask.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 7399188..63d5b62 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -939,7 +939,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Info->count is now 1; so it's safe to sleep now.
 	 */
-	if (!test_bit(ASYNC_INITIALIZED, &port->flags)) {
+	if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) {
 		cp = &info->channel;
 		sSetRxTrigger(cp, TRIG_1);
 		if (sGetChanStatus(cp) & CD_ACT)
@@ -963,7 +963,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
 		sEnRxFIFO(cp);
 		sEnTransmit(cp);
 
-		set_bit(ASYNC_INITIALIZED, &info->port.flags);
+		set_bit(ASYNCB_INITIALIZED, &info->port.flags);
 
 		/*
 		 * Set up the tty->alt_speed kludge
@@ -1646,7 +1646,7 @@ static int rp_write(struct tty_struct *tty,
 	/*  Write remaining data into the port's xmit_buf */
 	while (1) {
 		/* Hung up ? */
-		if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags))
+		if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags))
 			goto end;
 		c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
 		c = min(c, XMIT_BUF_SIZE - info->xmit_head);
-- 
cgit v0.10.2


From c3301a5c04800bcf8afc8a815bf9e570a4e25a08 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:41:05 +0100
Subject: epca: fix test_bit parameters

Switch from ASYNC_* to ASYNCB_*, because test_bit expects
bit number, not mask.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 8797b77..710ee93 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -1518,7 +1518,7 @@ static void doevent(int crd)
 		if (event & MODEMCHG_IND) {
 			/* A modem signal change has been indicated */
 			ch->imodem = mstat;
-			if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) {
+			if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) {
 				/* We are now receiving dcd */
 				if (mstat & ch->dcd)
 					wake_up_interruptible(&ch->port.open_wait);
@@ -1765,9 +1765,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch)
 		 * that the driver will wait on carrier detect.
 		 */
 		if (ts->c_cflag & CLOCAL)
-			clear_bit(ASYNC_CHECK_CD, &ch->port.flags);
+			clear_bit(ASYNCB_CHECK_CD, &ch->port.flags);
 		else
-			set_bit(ASYNC_CHECK_CD, &ch->port.flags);
+			set_bit(ASYNCB_CHECK_CD, &ch->port.flags);
 		mval = ch->m_dtr | ch->m_rts;
 	} /* End CBAUD not detected */
 	iflag = termios2digi_i(ch, ts->c_iflag);
@@ -2244,7 +2244,8 @@ static void do_softint(struct work_struct *work)
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
 				tty_hangup(tty);
 				wake_up_interruptible(&ch->port.open_wait);
-				clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags);
+				clear_bit(ASYNCB_NORMAL_ACTIVE,
+						&ch->port.flags);
 			}
 		}
 		tty_kref_put(tty);
-- 
cgit v0.10.2


From 70fd8fdecc4430ffcede7704dd812d4054d1faf9 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Thu, 11 Jun 2009 12:41:57 +0100
Subject: 8250_pci: add the OXCB950 chip to the 8250 PCI driver.

This adds support for the following serial controller chip:
Oxford Semiconductor OXCB950 for PCI Cardbus interface
http://www.transdimension.com/products/serial/OXCB950.html

on this card:
ExSys EX-1370 1 port high-speed serial card for ExpressCard/34 slot

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 938bc1b..e371a9c 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
+	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950,
+		PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0,
+		pbn_b0_1_921600 },
 	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_4_115200 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812..d7d1c41 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1996,10 +1996,12 @@
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U	0xC118
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU	0xC11C
 #define PCI_DEVICE_ID_OXSEMI_16PCI954	0x9501
+#define PCI_DEVICE_ID_OXSEMI_C950	0x950B
 #define PCI_DEVICE_ID_OXSEMI_16PCI95N	0x9511
 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP	0x9513
 #define PCI_DEVICE_ID_OXSEMI_16PCI952	0x9521
 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP	0x9523
+#define PCI_SUBDEVICE_ID_OXSEMI_C950	0x0001
 
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
-- 
cgit v0.10.2


From 5b0ed5263cb089500052f8c1ab6e0706bebf0d83 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 Jun 2009 12:42:49 +0100
Subject: x86: fix ktermios-termio conversion

The legacy TCSETA{,W,F} ioctls failed to set the termio->c_line field
on x86. This adds a missing get_user.

The same ioctls also fail to report faulting user pointers, which
we keep ignoring.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index f729563..c4ee805 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios,
 	SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
 	SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
 	SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
+	get_user(termios->c_line, &termio->c_line);
 	return copy_from_user(termios->c_cc, termio->c_cc, NCC);
 }
 
-- 
cgit v0.10.2


From 38db89799bdf11625a831c5af33938dcb11908b6 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:44:17 +0100
Subject: tty: throttling race fix

The tty throttling code can race due to the lock drops. It takes very high
loads but this has been observed and verified by Rob Duncan.

The basic problem is that on an SMP box we can go

	CPU #1				CPU #2
	need to throttle ?
	suppose we should		buffer space cleared
					are we throttled
					yes ? - unthrottle
	call throttle method

This changeet take the termios lock to protect against this. The termios
lock isn't the initial obvious candidate but many implementations of throttle
methods already need to poke around their own termios structures (and nobody
really locks them against a racing change of flow control).

This does mean that anyone who is setting tty->low_latency = 1 and then
calling tty_flip_buffer_push from their unthrottle method is going to end up
collapsing in a pile of locks. However we've removed all the known bogus
users of low_latency = 1 and such use isn't safe anyway for other reasons so
catching it would be an improvement.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 6f4c7d0..2401dbc 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer);
  *	@tty: terminal
  *
  *	Indicate that a tty should stop transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
  */
 
 void tty_throttle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	/* check TTY_THROTTLED first so it indicates our state */
 	if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->throttle)
 		tty->ops->throttle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_throttle);
 
@@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle);
  *	@tty: terminal
  *
  *	Indicate that a tty may continue transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
+ *
+ *	Drivers should however remember that the stack can issue a throttle,
+ *	then change flow control method, then unthrottle.
  */
 
 void tty_unthrottle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->unthrottle)
 		tty->ops->unthrottle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_unthrottle);
 
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index bcba84e..3566129 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -127,7 +127,8 @@
  * 	the line discipline are close to full, and it should somehow
  * 	signal that no more characters should be sent to the tty.
  *
- *	Optional: Always invoke via tty_throttle();
+ *	Optional: Always invoke via tty_throttle(), called under the
+ *	termios lock.
  * 
  * void (*unthrottle)(struct tty_struct * tty);
  *
@@ -135,7 +136,8 @@
  * 	that characters can now be sent to the tty without fear of
  * 	overrunning the input buffers of the line disciplines.
  * 
- *	Optional: Always invoke via tty_unthrottle();
+ *	Optional: Always invoke via tty_unthrottle(), called under the
+ *	termios lock.
  *
  * void (*stop)(struct tty_struct *tty);
  *
-- 
cgit v0.10.2


From 4b6ae89ef40d0d5d4c85098c1d0a012333a68729 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:45:08 +0100
Subject: serial: update maintainers file

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abdd..1979167 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -159,7 +159,8 @@ F:	drivers/net/r8169.c
 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
 L:	linux-serial@vger.kernel.org
 W:	http://serial.sourceforge.net
-S:	Orphan
+M:	alan@lxorguk.ukuu.org.uk
+S:	Odd Fixes
 F:	drivers/serial/8250*
 F:	include/linux/serial_8250.h
 
-- 
cgit v0.10.2


From 620df3c0a5b70656c4de6049825de214f108218e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:45:53 +0100
Subject: pty: Fix a comment

We fixed the globals, so now fix the comment

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 31038a0..da2cb8c 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -30,7 +30,6 @@
 
 #include <asm/system.h>
 
-/* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 static struct tty_driver *ptm_driver;
 static struct tty_driver *pts_driver;
-- 
cgit v0.10.2


From 5f0878acba7db24323f5ba4055ec9a96895bb150 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:46:41 +0100
Subject: tty: Fix oops when scanning the polling list for kgdb

Costantino Leandro found a bug in tty_find_polling_driver and provided a
patch that fixed the crash but not the underlying bug. This fixes the
underlying bug where the list walk corrupts the values it is using on a
match but then reuses them if the open fails.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 66b99a2..6c81739 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -295,7 +295,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
 	struct tty_driver *p, *res = NULL;
 	int tty_line = 0;
 	int len;
-	char *str;
+	char *str, *stp;
 
 	for (str = name; *str; str++)
 		if ((*str >= '0' && *str <= '9') || *str == ',')
@@ -311,13 +311,14 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
 		if (strncmp(name, p->name, len) != 0)
 			continue;
-		if (*str == ',')
-			str++;
-		if (*str == '\0')
-			str = NULL;
+		stp = str;
+		if (*stp == ',')
+			stp++;
+		if (*stp == '\0')
+			stp = NULL;
 
 		if (tty_line >= 0 && tty_line <= p->num && p->ops &&
-		    p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) {
+		    p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
 			res = tty_driver_kref_get(p);
 			*line = tty_line;
 			break;
-- 
cgit v0.10.2


From e8b70e7d3e86319a8b2aaabde3866833d92cd80f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:48:02 +0100
Subject: tty: Extract various bits of ldisc code

Before trying to tackle the ldisc bugs the code needs to be a good deal
more readable, so do the simple extractions of routines first.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6c81739..be49d07 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2481,6 +2481,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int
 	return tty->ops->tiocmset(tty, file, set, clear);
 }
 
+struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+		tty = tty->link;
+	return tty;
+}
+EXPORT_SYMBOL(tty_pair_get_tty);
+
+struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+	    return tty;
+	return tty->link;
+}
+EXPORT_SYMBOL(tty_pair_get_pty);
+
 /*
  * Split this up, as gcc can choke on it otherwise..
  */
@@ -2496,11 +2514,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
-	real_tty = tty;
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-		real_tty = tty->link;
-
+	real_tty = tty_pair_get_tty(tty);
 
 	/*
 	 * Factor out some common prep work
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f78f5b0..e3c6416 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -444,6 +444,50 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 }
 
 /**
+ *	tty_ldisc_halt		-	shutdown the line discipline
+ *	@tty: tty device
+ *
+ *	Shut down the line discipline and work queue for this tty device.
+ *	The TTY_LDISC flag being cleared ensures no further references can
+ *	be obtained while the delayed work queue halt ensures that no more
+ *	data is fed to the ldisc.
+ *
+ *	In order to wait for any existing references to complete see 
+ *	tty_ldisc_wait_idle.
+ */
+
+static void tty_ldisc_halt(struct tty_struct *tty)
+{
+	clear_bit(TTY_LDISC, &tty->flags);
+	cancel_delayed_work(&tty->buf.work);
+	/*
+	 * Wait for ->hangup_work and ->buf.work handlers to terminate
+	 */
+	flush_scheduled_work();
+}
+
+/**
+ *	tty_ldisc_wait_idle	-	wait for the ldisc to become idle
+ *	@tty: tty to wait for
+ *
+ *	Wait for the line discipline to become idle. The discipline must
+ *	have been halted for this to guarantee it remains idle.
+ *
+ */
+
+static void tty_ldisc_wait_idle(struct tty_struct *tty)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	while (tty->ldisc.refcount) {
+		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
+		spin_lock_irqsave(&tty_ldisc_lock, flags);
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+
+/**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
  *	@ldisc: the line discipline
@@ -636,6 +680,21 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 	return 0;
 }
 
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+	struct tty_ldisc ld;
+
+	if (tty->ldisc.ops->close)
+		(tty->ldisc.ops->close)(tty);
+	tty_ldisc_put(tty->ldisc.ops);
+	/*
+	 *	Switch the line discipline back
+	 */
+	WARN_ON(tty_ldisc_get(N_TTY, &ld));
+	tty_ldisc_assign(tty, &ld);
+	tty_set_termios_ldisc(tty, N_TTY);
+}
+
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
@@ -647,58 +706,34 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	unsigned long flags;
-	struct tty_ldisc ld;
+
 	/*
 	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
 	 * kill any delayed work. As this is the final close it does not
 	 * race with the set_ldisc code path.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	cancel_delayed_work(&tty->buf.work);
 
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 */
-
-	flush_scheduled_work();
+	tty_ldisc_halt(tty);
 
 	/*
 	 * Wait for any short term users (we know they are just driver
 	 * side waiters as the file is closing so user count on the file
 	 * side is zero.
 	 */
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc.refcount) {
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
-		spin_lock_irqsave(&tty_ldisc_lock, flags);
-	}
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+	tty_ldisc_wait_idle(tty);
+
 	/*
 	 * Shutdown the current line discipline, and reset it to N_TTY.
 	 *
 	 * FIXME: this MUST get fixed for the new reflocking
 	 */
-	if (tty->ldisc.ops->close)
-		(tty->ldisc.ops->close)(tty);
-	tty_ldisc_put(tty->ldisc.ops);
 
-	/*
-	 *	Switch the line discipline back
-	 */
-	WARN_ON(tty_ldisc_get(N_TTY, &ld));
-	tty_ldisc_assign(tty, &ld);
-	tty_set_termios_ldisc(tty, N_TTY);
+	tty_ldisc_reinit(tty);
 	if (o_tty) {
 		/* FIXME: could o_tty be in setldisc here ? */
 		clear_bit(TTY_LDISC, &o_tty->flags);
-		if (o_tty->ldisc.ops->close)
-			(o_tty->ldisc.ops->close)(o_tty);
-		tty_ldisc_put(o_tty->ldisc.ops);
-		WARN_ON(tty_ldisc_get(N_TTY, &ld));
-		tty_ldisc_assign(o_tty, &ld);
-		tty_set_termios_ldisc(o_tty, N_TTY);
+		tty_ldisc_reinit(o_tty);
 	}
 }
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bed5a3d..f9c13c8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -428,6 +428,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 extern void tty_release_dev(struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
 
+extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
+extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
+
 extern struct mutex tty_mutex;
 
 extern void tty_write_unlock(struct tty_struct *tty);
-- 
cgit v0.10.2


From c65c9bc3efa5589f691276bb9db689119a711222 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:50:12 +0100
Subject: tty: rewrite the ldisc locking

There are several pretty much unfixable races in the old ldisc code, especially
with respect to pty behaviour and also to hangup. It's easier to rewrite the
code than simply try and patch it up.

This patch
- splits the ldisc from the tty (so we will be able to refcount it more cleanly
  later)
- introduces a mutex lock for ldisc changing on an active device
- fixes the complete mess that hangup caused
- implements hopefully correct setldisc/close/hangup locking

There are still some problems around pty pairs that have always been there but
at least it is now possible to understand the code and fix further problems.

This fixes the following known bugs
- hang up can leak ldisc references
- hang up may not call open/close on ldisc in a matched way
- pty/tty pairs can deadlock during an ldisc change
- reading the ldisc proc files can cause every ldisc to be loaded

and probably a few other of the mysterious ldisc race reports.

I'm sure it also adds the odd new one.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index af761dc..6880151 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
 	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
 	   open path is before the ldisc is referencable */
 
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 	tty_driver_flush_buffer(tty);
 
 	return 0;
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 4560190..f3366d3 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5200,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v)
 					(cur_jifs - info->idle_stats.recv_idle)/
 					HZ, info->idle_stats.overruns,
 					/* FIXME: double check locking */
-					(long)info->port.tty->ldisc.ops->num);
+					(long)info->port.tty->ldisc->ops->num);
 			else
 				seq_printf(m, "%3d %8lu %10lu %8lu "
 					"%10lu %8lu %9lu %6ld\n",
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 710ee93..abef1f7 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file,
 			tty_wait_until_sent(tty, 0);
 		} else {
 			/* ldisc lock already held in ioctl */
-			if (tty->ldisc.ops->flush_buffer)
-				tty->ldisc.ops->flush_buffer(tty);
+			if (tty->ldisc->ops->flush_buffer)
+				tty->ldisc->ops->flush_buffer(tty);
 		}
 		unlock_kernel();
 		/* Fall Thru */
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 0061e18..0d10b89 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh)
 		amountToMove = count;
 	}
 	// Move the first block
-	pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+	pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
 	// If we needed to wrap, do the second data move
 	if (count > amountToMove) {
-		pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+		pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 pCh->Ibuf, NULL, count - amountToMove );
 	}
 	// Bump and wrap the stripIndex all at once by the amount of data read. This
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index afd9247..517271c 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1315,8 +1315,8 @@ static inline void  isig(int sig, struct tty_struct *tty, int flush)
 	if (tty->pgrp)
 		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
-		if ( tty->ldisc.ops->flush_buffer )  
-			tty->ldisc.ops->flush_buffer(tty);
+		if ( tty->ldisc->ops->flush_buffer )  
+			tty->ldisc->ops->flush_buffer(tty);
 		i2InputFlush( tty->driver_data );
 	}
 }
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index bacb3e2..461ece5 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty)
 #endif
 	
 	/* Flush any pending characters in the driver and discipline. */
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 
 	tty_driver_flush_buffer(tty);
 		
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index da2cb8c..5acd29e 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -110,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
 	c = to->receive_room;
 	if (c > count)
 		c = count;
-	to->ldisc.ops->receive_buf(to, buf, NULL, c);
+	to->ldisc->ops->receive_buf(to, buf, NULL, c);
 
 	return c;
 }
@@ -148,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty)
 	int count;
 
 	/* We should get the line discipline lock for "tty->link" */
-	if (!to || !to->ldisc.ops->chars_in_buffer)
+	if (!to || !to->ldisc->ops->chars_in_buffer)
 		return 0;
 
 	/* The ldisc must report 0 if no characters available to be read */
-	count = to->ldisc.ops->chars_in_buffer(to);
+	count = to->ldisc->ops->chars_in_buffer(to);
 
 	if (tty->driver->subtype == PTY_TYPE_SLAVE)
 		return count;
@@ -186,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty)
 	if (!to)
 		return;
 
-	if (to->ldisc.ops->flush_buffer)
-		to->ldisc.ops->flush_buffer(to);
+	if (to->ldisc->ops->flush_buffer)
+		to->ldisc->ops->flush_buffer(to);
 
 	if (to->packet) {
 		spin_lock_irqsave(&tty->ctrl_lock, flags);
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index cb8ca56..f97b9e8 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty)
 		}
 		count = sel_buffer_lth - pasted;
 		count = min(count, tty->receive_room);
-		tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted,
+		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index be49d07..2f44b0b 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -492,22 +492,6 @@ void tty_ldisc_flush(struct tty_struct *tty)
 EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 
 /**
- *	tty_reset_termios	-	reset terminal state
- *	@tty: tty to reset
- *
- *	Restore a terminal to the driver default state
- */
-
-static void tty_reset_termios(struct tty_struct *tty)
-{
-	mutex_lock(&tty->termios_mutex);
-	*tty->termios = tty->driver->init_termios;
-	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
-	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
-	mutex_unlock(&tty->termios_mutex);
-}
-
-/**
  *	do_tty_hangup		-	actual handler for hangup events
  *	@work: tty device
  *
@@ -536,7 +520,6 @@ static void do_tty_hangup(struct work_struct *work)
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
-	struct tty_ldisc *ld;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -567,40 +550,8 @@ static void do_tty_hangup(struct work_struct *work)
 		filp->f_op = &hung_up_tty_fops;
 	}
 	file_list_unlock();
-	/*
-	 * FIXME! What are the locking issues here? This may me overdoing
-	 * things... This question is especially important now that we've
-	 * removed the irqlock.
-	 */
-	ld = tty_ldisc_ref(tty);
-	if (ld != NULL) {
-		/* We may have no line discipline at this point */
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_driver_flush_buffer(tty);
-		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
-		    ld->ops->write_wakeup)
-			ld->ops->write_wakeup(tty);
-		if (ld->ops->hangup)
-			ld->ops->hangup(tty);
-	}
-	/*
-	 * FIXME: Once we trust the LDISC code better we can wait here for
-	 * ldisc completion and fix the driver call race
-	 */
-	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
-	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
-	/*
-	 * Shutdown the current line discipline, and reset it to
-	 * N_TTY.
-	 */
-	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
-		tty_reset_termios(tty);
-	/* Defer ldisc switch */
-	/* tty_deferred_ldisc_switch(N_TTY);
 
-	  This should get done automatically when the port closes and
-	  tty_release is called */
+	tty_ldisc_hangup(tty);
 
 	read_lock(&tasklist_lock);
 	if (tty->session) {
@@ -629,12 +580,15 @@ static void do_tty_hangup(struct work_struct *work)
 	read_unlock(&tasklist_lock);
 
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	tty->flags = 0;
+	clear_bit(TTY_THROTTLED, &tty->flags);
+	clear_bit(TTY_PUSH, &tty->flags);
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	put_pid(tty->session);
 	put_pid(tty->pgrp);
 	tty->session = NULL;
 	tty->pgrp = NULL;
 	tty->ctrl_status = 0;
+	set_bit(TTY_HUPPED, &tty->flags);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	/* Account for the p->signal references we killed */
@@ -660,10 +614,7 @@ static void do_tty_hangup(struct work_struct *work)
 	 * can't yet guarantee all that.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
-	if (ld) {
-		tty_ldisc_enable(tty);
-		tty_ldisc_deref(ld);
-	}
+	tty_ldisc_enable(tty);
 	unlock_kernel();
 	if (f)
 		fput(f);
@@ -2570,7 +2521,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case TIOCGSID:
 		return tiocgsid(tty, real_tty, p);
 	case TIOCGETD:
-		return put_user(tty->ldisc.ops->num, (int __user *)p);
+		return put_user(tty->ldisc->ops->num, (int __user *)p);
 	case TIOCSETD:
 		return tiocsetd(tty, p);
 	/*
@@ -2785,6 +2736,7 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
 	mutex_init(&tty->termios_mutex);
+	mutex_init(&tty->ldisc_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index e3c6416..a58a19a 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc);
 /**
  *	tty_ldisc_try_get	-	try and reference an ldisc
  *	@disc: ldisc number
- *	@ld: tty ldisc structure to complete
  *
  *	Attempt to open and lock a line discipline into place. Return
- *	the line discipline refcounted and assigned in ld. On an error
- *	report the error code back
+ *	the line discipline refcounted or an error.
  */
 
-static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_try_get(int disc)
 {
 	unsigned long flags;
+	struct tty_ldisc *ld;
 	struct tty_ldisc_ops *ldops;
 	int err = -EINVAL;
 	
+	ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
+	if (ld == NULL)
+		return ERR_PTR(-ENOMEM);
+
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	ld->ops = NULL;
 	ldops = tty_ldiscs[disc];
@@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
 			/* lock it */
 			ldops->refcount++;
 			ld->ops = ldops;
+			ld->refcount = 0;
 			err = 0;
 		}
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	return err;
+	if (err)
+		return ERR_PTR(err);
+	return ld;
 }
 
 /**
  *	tty_ldisc_get		-	take a reference to an ldisc
  *	@disc: ldisc number
- *	@ld: tty line discipline structure to use
  *
  *	Takes a reference to a line discipline. Deals with refcounts and
  *	module locking counts. Returns NULL if the discipline is not available.
@@ -161,44 +166,46 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static int tty_ldisc_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_get(int disc)
 {
-	int err;
+	struct tty_ldisc *ld;
 
 	if (disc < N_TTY || disc >= NR_LDISCS)
-		return -EINVAL;
-	err = tty_ldisc_try_get(disc, ld);
-	if (err < 0) {
+		return ERR_PTR(-EINVAL);
+	ld = tty_ldisc_try_get(disc);
+	if (IS_ERR(ld)) {
 		request_module("tty-ldisc-%d", disc);
-		err = tty_ldisc_try_get(disc, ld);
+		ld = tty_ldisc_try_get(disc);
 	}
-	return err;
+	return ld;
 }
 
 /**
  *	tty_ldisc_put		-	drop ldisc reference
- *	@disc: ldisc number
+ *	@ld: ldisc
  *
  *	Drop a reference to a line discipline. Manage refcounts and
- *	module usage counts
+ *	module usage counts. Free the ldisc once the recount hits zero.
  *
  *	Locking:
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static void tty_ldisc_put(struct tty_ldisc_ops *ld)
+static void tty_ldisc_put(struct tty_ldisc *ld)
 {
 	unsigned long flags;
-	int disc = ld->num;
+	int disc = ld->ops->num;
+	struct tty_ldisc_ops *ldo;
 
 	BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = tty_ldiscs[disc];
-	BUG_ON(ld->refcount == 0);
-	ld->refcount--;
-	module_put(ld->owner);
+	ldo = tty_ldiscs[disc];
+	BUG_ON(ldo->refcount == 0);
+	ldo->refcount--;
+	module_put(ldo->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	kfree(ld);
 }
 
 static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
@@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
 static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
 	int i = *(loff_t *)v;
-	struct tty_ldisc ld;
+	struct tty_ldisc *ld;
 	
-	if (tty_ldisc_get(i, &ld) < 0)
+	ld = tty_ldisc_try_get(i);
+	if (IS_ERR(ld))
 		return 0;
-	seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i);
-	tty_ldisc_put(ld.ops);
+	seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i);
+	tty_ldisc_put(ld);
 	return 0;
 }
 
@@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = {
 
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
-	ld->refcount = 0;
-	tty->ldisc = *ld;
+	tty->ldisc = ld;
 }
 
 /**
@@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty)
 	int ret = 0;
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = &tty->ldisc;
+	ld = tty->ldisc;
 	if (test_bit(TTY_LDISC, &tty->flags)) {
 		ld->refcount++;
 		ret = 1;
@@ -315,8 +322,8 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	WARN_ON(tty->ldisc.refcount == 0);
-	return &tty->ldisc;
+	WARN_ON(tty->ldisc->refcount == 0);
+	return tty->ldisc;
 }
 
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
@@ -335,7 +342,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
 {
 	if (tty_ldisc_try(tty))
-		return &tty->ldisc;
+		return tty->ldisc;
 	return NULL;
 }
 
@@ -407,6 +414,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 	mutex_unlock(&tty->termios_mutex);
 }
 
+/**
+ *	tty_ldisc_open		-	open a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to open
+ *
+ *	A helper opening method. Also a convenient debugging and check
+ *	point.
+ */
+
+static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags));
+	if (ld->ops->open)
+		return ld->ops->open(tty);
+	return 0;
+}
+
+/**
+ *	tty_ldisc_close		-	close a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to close
+ *
+ *	A helper close method. Also a convenient debugging and check
+ *	point.
+ */
+
+static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
+	clear_bit(TTY_LDISC_OPEN, &tty->flags);
+	if (ld->ops->close)
+		ld->ops->close(tty);
+}
 
 /**
  *	tty_ldisc_restore	-	helper for tty ldisc change
@@ -420,31 +460,32 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 {
 	char buf[64];
-	struct tty_ldisc new_ldisc;
+	struct tty_ldisc *new_ldisc;
+	int r;
 
 	/* There is an outstanding reference here so this is safe */
-	tty_ldisc_get(old->ops->num, old);
+	old = tty_ldisc_get(old->ops->num);
+	WARN_ON(IS_ERR(old));
 	tty_ldisc_assign(tty, old);
 	tty_set_termios_ldisc(tty, old->ops->num);
-	if (old->ops->open && (old->ops->open(tty) < 0)) {
-		tty_ldisc_put(old->ops);
+	if (tty_ldisc_open(tty, old) < 0) {
+		tty_ldisc_put(old);
 		/* This driver is always present */
-		if (tty_ldisc_get(N_TTY, &new_ldisc) < 0)
+		new_ldisc =tty_ldisc_get(N_TTY);
+		if (IS_ERR(new_ldisc))
 			panic("n_tty: get");
-		tty_ldisc_assign(tty, &new_ldisc);
+		tty_ldisc_assign(tty, new_ldisc);
 		tty_set_termios_ldisc(tty, N_TTY);
-		if (new_ldisc.ops->open) {
-			int r = new_ldisc.ops->open(tty);
-				if (r < 0)
-				panic("Couldn't open N_TTY ldisc for "
-				      "%s --- error %d.",
-				      tty_name(tty, buf), r);
-		}
+		r = tty_ldisc_open(tty, new_ldisc);
+		if (r < 0)
+			panic("Couldn't open N_TTY ldisc for "
+			      "%s --- error %d.",
+			      tty_name(tty, buf), r);
 	}
 }
 
 /**
- *	tty_ldisc_halt		-	shutdown the line discipline
+ *	tty_ldisc_halt		-	shut down the line discipline
  *	@tty: tty device
  *
  *	Shut down the line discipline and work queue for this tty device.
@@ -456,14 +497,10 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
  *	tty_ldisc_wait_idle.
  */
 
-static void tty_ldisc_halt(struct tty_struct *tty)
+static int tty_ldisc_halt(struct tty_struct *tty)
 {
 	clear_bit(TTY_LDISC, &tty->flags);
-	cancel_delayed_work(&tty->buf.work);
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 */
-	flush_scheduled_work();
+	return cancel_delayed_work(&tty->buf.work);
 }
 
 /**
@@ -473,18 +510,22 @@ static void tty_ldisc_halt(struct tty_struct *tty)
  *	Wait for the line discipline to become idle. The discipline must
  *	have been halted for this to guarantee it remains idle.
  *
+ *	tty_ldisc_lock protects the ref counts currently.
  */
 
-static void tty_ldisc_wait_idle(struct tty_struct *tty)
+static int tty_ldisc_wait_idle(struct tty_struct *tty)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc.refcount) {
+	while (tty->ldisc->refcount) {
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
+		if (wait_event_timeout(tty_ldisc_wait,
+				tty->ldisc->refcount == 0, 5 * HZ) == 0)
+			return -EBUSY;
 		spin_lock_irqsave(&tty_ldisc_lock, flags);
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return 0;
 }
 
 /**
@@ -493,39 +534,64 @@ static void tty_ldisc_wait_idle(struct tty_struct *tty)
  *	@ldisc: the line discipline
  *
  *	Set the discipline of a tty line. Must be called from a process
- *	context.
+ *	context. The ldisc change logic has to protect itself against any
+ *	overlapping ldisc change (including on the other end of pty pairs),
+ *	the close of one side of a tty/pty pair, and eventually hangup.
  *
- *	Locking: takes tty_ldisc_lock.
- *		 called functions take termios_mutex
+ *	Locking: takes tty_ldisc_lock, termios_mutex
  */
 
 int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 {
 	int retval;
-	struct tty_ldisc o_ldisc, new_ldisc;
-	int work;
-	unsigned long flags;
+	struct tty_ldisc *o_ldisc, *new_ldisc;
+	int work, o_work = 0;
 	struct tty_struct *o_tty;
 
-restart:
-	/* This is a bit ugly for now but means we can break the 'ldisc
-	   is part of the tty struct' assumption later */
-	retval = tty_ldisc_get(ldisc, &new_ldisc);
-	if (retval)
-		return retval;
+	new_ldisc = tty_ldisc_get(ldisc);
+	if (IS_ERR(new_ldisc))
+		return PTR_ERR(new_ldisc);
 
 	/*
-	 *	Problem: What do we do if this blocks ?
+	 *	We need to look at the tty locking here for pty/tty pairs
+	 *	when both sides try to change in parallel.
 	 */
 
-	tty_wait_until_sent(tty, 0);
+	o_tty = tty->link;	/* o_tty is the pty side or NULL */
+
 
-	if (tty->ldisc.ops->num == ldisc) {
-		tty_ldisc_put(new_ldisc.ops);
+	/*
+	 *	Check the no-op case
+	 */
+
+	if (tty->ldisc->ops->num == ldisc) {
+		tty_ldisc_put(new_ldisc);
 		return 0;
 	}
 
 	/*
+	 *	Problem: What do we do if this blocks ?
+	 *	We could deadlock here
+	 */
+
+	tty_wait_until_sent(tty, 0);
+
+	mutex_lock(&tty->ldisc_mutex);
+
+	/*
+	 *	We could be midstream of another ldisc change which has
+	 *	dropped the lock during processing. If so we need to wait.
+	 */
+
+	while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
+		mutex_unlock(&tty->ldisc_mutex);
+		wait_event(tty_ldisc_wait,
+			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
+		mutex_lock(&tty->ldisc_mutex);
+	}
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
+		
+	/*
 	 *	No more input please, we are switching. The new ldisc
 	 *	will update this value in the ldisc open function
 	 */
@@ -533,8 +599,6 @@ restart:
 	tty->receive_room = 0;
 
 	o_ldisc = tty->ldisc;
-	o_tty = tty->link;
-
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -545,108 +609,181 @@ restart:
 	 *	with a userspace app continually trying to use the tty in
 	 *	parallel to the change and re-referencing the tty.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
 
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
-		if (tty->ldisc.refcount) {
-			/* Free the new ldisc we grabbed. Must drop the lock
-			   first. */
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_ldisc.ops);
-			/*
-			 * There are several reasons we may be busy, including
-			 * random momentary I/O traffic. We must therefore
-			 * retry. We could distinguish between blocking ops
-			 * and retries if we made tty_ldisc_wait() smarter.
-			 * That is up for discussion.
-			 */
-			if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-		if (o_tty && o_tty->ldisc.refcount) {
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_tty->ldisc.ops);
-			if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-	}
-	/*
-	 *	If the TTY_LDISC bit is set, then we are racing against
-	 *	another ldisc change
-	 */
-	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
-		struct tty_ldisc *ld;
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		tty_ldisc_put(new_ldisc.ops);
-		ld = tty_ldisc_ref_wait(tty);
-		tty_ldisc_deref(ld);
-		goto restart;
-	}
-	/*
-	 *	This flag is used to avoid two parallel ldisc changes. Once
-	 *	open and close are fine grained locked this may work better
-	 *	as a mutex shared with the open/close/hup paths
-	 */
-	set_bit(TTY_LDISC_CHANGING, &tty->flags);
+	work = tty_ldisc_halt(tty);
 	if (o_tty)
-		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	
-	/*
-	 *	From this point on we know nobody has an ldisc
-	 *	usage reference, nor can they obtain one until
-	 *	we say so later on.
-	 */
+		o_work = tty_ldisc_halt(o_tty);
 
-	work = cancel_delayed_work(&tty->buf.work);
 	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 * MUST NOT hold locks here.
+	 * Wait for ->hangup_work and ->buf.work handlers to terminate.
+	 * We must drop the mutex here in case a hangup is also in process.
 	 */
+
+	mutex_unlock(&tty->ldisc_mutex);
+
 	flush_scheduled_work();
+
+	/* Let any existing reference holders finish */
+	retval = tty_ldisc_wait_idle(tty);
+	if (retval < 0) {
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		tty_ldisc_put(new_ldisc);
+		return retval;
+	}
+
+	mutex_lock(&tty->ldisc_mutex);
+	if (test_bit(TTY_HUPPED, &tty->flags)) {
+		/* We were raced by the hangup method. It will have stomped
+		   the ldisc data and closed the ldisc down */
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_ldisc_put(new_ldisc);
+		return -EIO;
+	}
+
 	/* Shutdown the current discipline. */
-	if (o_ldisc.ops->close)
-		(o_ldisc.ops->close)(tty);
+	tty_ldisc_close(tty, o_ldisc);
 
 	/* Now set up the new line discipline. */
-	tty_ldisc_assign(tty, &new_ldisc);
+	tty_ldisc_assign(tty, new_ldisc);
 	tty_set_termios_ldisc(tty, ldisc);
-	if (new_ldisc.ops->open)
-		retval = (new_ldisc.ops->open)(tty);
+
+	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
-		tty_ldisc_put(new_ldisc.ops);
-		tty_ldisc_restore(tty, &o_ldisc);
+		/* Back to the old one or N_TTY if we can't */
+		tty_ldisc_put(new_ldisc);
+		tty_ldisc_restore(tty, o_ldisc);
 	}
+
 	/* At this point we hold a reference to the new ldisc and a
 	   a reference to the old ldisc. If we ended up flipping back
 	   to the existing ldisc we have two references to it */
 
-	if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc)
+	if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc)
 		tty->ops->set_ldisc(tty);
 
-	tty_ldisc_put(o_ldisc.ops);
+	tty_ldisc_put(o_ldisc);
 
 	/*
-	 *	Allow ldisc referencing to occur as soon as the driver
-	 *	ldisc callback completes.
+	 *	Allow ldisc referencing to occur again
 	 */
 
 	tty_ldisc_enable(tty);
 	if (o_tty)
 		tty_ldisc_enable(o_tty);
 
-	/* Restart it in case no characters kick it off. Safe if
+	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
 	if (work)
 		schedule_delayed_work(&tty->buf.work, 1);
+	if (o_work)
+		schedule_delayed_work(&o_tty->buf.work, 1);
+	mutex_unlock(&tty->ldisc_mutex);
 	return retval;
 }
 
+/**
+ *	tty_reset_termios	-	reset terminal state
+ *	@tty: tty to reset
+ *
+ *	Restore a terminal to the driver default state.
+ */
+
+static void tty_reset_termios(struct tty_struct *tty)
+{
+	mutex_lock(&tty->termios_mutex);
+	*tty->termios = tty->driver->init_termios;
+	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+	mutex_unlock(&tty->termios_mutex);
+}
+
+
+/**
+ *	tty_ldisc_reinit	-	reinitialise the tty ldisc
+ *	@tty: tty to reinit
+ *
+ *	Switch the tty back to N_TTY line discipline and leave the
+ *	ldisc state closed
+ */
+
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	tty_ldisc_close(tty, tty->ldisc);
+	tty_ldisc_put(tty->ldisc);
+	tty->ldisc = NULL;
+	/*
+	 *	Switch the line discipline back
+	 */
+	ld = tty_ldisc_get(N_TTY);
+	BUG_ON(IS_ERR(ld));
+	tty_ldisc_assign(tty, ld);
+	tty_set_termios_ldisc(tty, N_TTY);
+}
+
+/**
+ *	tty_ldisc_hangup		-	hangup ldisc reset
+ *	@tty: tty being hung up
+ *
+ *	Some tty devices reset their termios when they receive a hangup
+ *	event. In that situation we must also switch back to N_TTY properly
+ *	before we reset the termios data.
+ *
+ *	Locking: We can take the ldisc mutex as the rest of the code is
+ *	careful to allow for this.
+ *
+ *	In the pty pair case this occurs in the close() path of the
+ *	tty itself so we must be careful about locking rules.
+ */
+
+void tty_ldisc_hangup(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	/*
+	 * FIXME! What are the locking issues here? This may me overdoing
+	 * things... This question is especially important now that we've
+	 * removed the irqlock.
+	 */
+	ld = tty_ldisc_ref(tty);
+	if (ld != NULL) {
+		/* We may have no line discipline at this point */
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_driver_flush_buffer(tty);
+		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+		    ld->ops->write_wakeup)
+			ld->ops->write_wakeup(tty);
+		if (ld->ops->hangup)
+			ld->ops->hangup(tty);
+		tty_ldisc_deref(ld);
+	}
+	/*
+	 * FIXME: Once we trust the LDISC code better we can wait here for
+	 * ldisc completion and fix the driver call race
+	 */
+	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+	/*
+	 * Shutdown the current line discipline, and reset it to
+	 * N_TTY.
+	 */
+	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+		/* Avoid racing set_ldisc */
+		mutex_lock(&tty->ldisc_mutex);
+		/* Switch back to N_TTY */
+		tty_ldisc_reinit(tty);
+		/* At this point we have a closed ldisc and we want to
+		   reopen it. We could defer this to the next open but
+		   it means auditing a lot of other paths so this is a FIXME */
+		WARN_ON(tty_ldisc_open(tty, tty->ldisc));
+		tty_ldisc_enable(tty);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_reset_termios(tty);
+	}
+}
 
 /**
  *	tty_ldisc_setup			-	open line discipline
@@ -654,24 +791,23 @@ restart:
  *	@o_tty: pair tty for pty/tty pairs
  *
  *	Called during the initial open of a tty/pty pair in order to set up the
- *	line discplines and bind them to the tty.
+ *	line disciplines and bind them to the tty. This has no locking issues
+ *	as the device isn't yet active.
  */
 
 int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	struct tty_ldisc *ld = &tty->ldisc;
+	struct tty_ldisc *ld = tty->ldisc;
 	int retval;
 
-	if (ld->ops->open) {
-		retval = (ld->ops->open)(tty);
-		if (retval)
-			return retval;
-	}
-	if (o_tty && o_tty->ldisc.ops->open) {
-		retval = (o_tty->ldisc.ops->open)(o_tty);
+	retval = tty_ldisc_open(tty, ld);
+	if (retval)
+		return retval;
+
+	if (o_tty) {
+		retval = tty_ldisc_open(o_tty, o_tty->ldisc);
 		if (retval) {
-			if (ld->ops->close)
-				(ld->ops->close)(tty);
+			tty_ldisc_close(tty, ld);
 			return retval;
 		}
 		tty_ldisc_enable(o_tty);
@@ -679,34 +815,18 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 	tty_ldisc_enable(tty);
 	return 0;
 }
-
-static void tty_ldisc_reinit(struct tty_struct *tty)
-{
-	struct tty_ldisc ld;
-
-	if (tty->ldisc.ops->close)
-		(tty->ldisc.ops->close)(tty);
-	tty_ldisc_put(tty->ldisc.ops);
-	/*
-	 *	Switch the line discipline back
-	 */
-	WARN_ON(tty_ldisc_get(N_TTY, &ld));
-	tty_ldisc_assign(tty, &ld);
-	tty_set_termios_ldisc(tty, N_TTY);
-}
-
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
  *	@o_tty: pair tty for pty/tty pairs
  *
  *	Called during the final close of a tty/pty pair in order to shut down the
- *	line discpline layer.
+ *	line discpline layer. On exit the ldisc assigned is N_TTY and the
+ *	ldisc has not been opened.
  */
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-
 	/*
 	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
 	 * kill any delayed work. As this is the final close it does not
@@ -714,6 +834,7 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 	 */
 
 	tty_ldisc_halt(tty);
+	flush_scheduled_work();
 
 	/*
 	 * Wait for any short term users (we know they are just driver
@@ -730,11 +851,9 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 	 */
 
 	tty_ldisc_reinit(tty);
-	if (o_tty) {
-		/* FIXME: could o_tty be in setldisc here ? */
-		clear_bit(TTY_LDISC, &o_tty->flags);
-		tty_ldisc_reinit(o_tty);
-	}
+	/* This will need doing differently if we need to lock */
+	if (o_tty)
+		tty_ldisc_release(o_tty, NULL);
 }
 
 /**
@@ -747,10 +866,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 
 void tty_ldisc_init(struct tty_struct *tty)
 {
-	struct tty_ldisc ld;
-	if (tty_ldisc_get(N_TTY, &ld) < 0)
+	struct tty_ldisc *ld = tty_ldisc_get(N_TTY);
+	if (IS_ERR(ld))
 		panic("n_tty: init_tty");
-	tty_ldisc_assign(tty, &ld);
+	tty_ldisc_assign(tty, ld);
 }
 
 void tty_ldisc_begin(void)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f9c13c8..1488d8c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -226,8 +226,11 @@ struct tty_struct {
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
-	/* The ldisc objects are protected by tty_ldisc_lock at the moment */
-	struct tty_ldisc ldisc;
+
+	/* Protects ldisc changes: Lock tty not pty */
+	struct mutex ldisc_mutex;
+	struct tty_ldisc *ldisc;
+
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -314,6 +317,7 @@ struct tty_struct {
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
 #define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
+#define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
@@ -406,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b);
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
+extern void tty_ldisc_hangup(struct tty_struct *tty);
 extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
-- 
cgit v0.10.2


From f2c4c65c8350d885d74dcdea7061dcecc1223c36 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:50:58 +0100
Subject: tty: Move ldisc_flush

We have a tty_ldisc file now so put tty_ldisc_flush in the right place

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 2f44b0b..939e198 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -471,27 +471,6 @@ void tty_wakeup(struct tty_struct *tty)
 EXPORT_SYMBOL_GPL(tty_wakeup);
 
 /**
- *	tty_ldisc_flush	-	flush line discipline queue
- *	@tty: tty
- *
- *	Flush the line discipline queue (if any) for this tty. If there
- *	is no line discipline active this is a no-op.
- */
-
-void tty_ldisc_flush(struct tty_struct *tty)
-{
-	struct tty_ldisc *ld = tty_ldisc_ref(tty);
-	if (ld) {
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_ldisc_deref(ld);
-	}
-	tty_buffer_flush(tty);
-}
-
-EXPORT_SYMBOL_GPL(tty_ldisc_flush);
-
-/**
  *	do_tty_hangup		-	actual handler for hangup events
  *	@work: tty device
  *
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index a58a19a..01c70c0 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -396,6 +396,27 @@ void tty_ldisc_enable(struct tty_struct *tty)
 }
 
 /**
+ *	tty_ldisc_flush	-	flush line discipline queue
+ *	@tty: tty
+ *
+ *	Flush the line discipline queue (if any) for this tty. If there
+ *	is no line discipline active this is a no-op.
+ */
+
+void tty_ldisc_flush(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld = tty_ldisc_ref(tty);
+	if (ld) {
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_ldisc_deref(ld);
+	}
+	tty_buffer_flush(tty);
+}
+
+EXPORT_SYMBOL_GPL(tty_ldisc_flush);
+
+/**
  *	tty_set_termios_ldisc		-	set ldisc field
  *	@tty: tty structure
  *	@num: line discipline number
-- 
cgit v0.10.2


From 852e99d22f2231d232c45216b027565e3bae7add Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:51:41 +0100
Subject: tty: bring ldisc into CodingStyle

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index 01c70c0..39c8f86 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -126,7 +126,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc)
 	struct tty_ldisc *ld;
 	struct tty_ldisc_ops *ldops;
 	int err = -EINVAL;
-	
+
 	ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
 	if (ld == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -208,12 +208,12 @@ static void tty_ldisc_put(struct tty_ldisc *ld)
 	kfree(ld);
 }
 
-static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
+static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
 {
 	return (*pos < NR_LDISCS) ? pos : NULL;
 }
 
-static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	(*pos)++;
 	return (*pos < NR_LDISCS) ? pos : NULL;
@@ -227,7 +227,7 @@ static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
 	int i = *(loff_t *)v;
 	struct tty_ldisc *ld;
-	
+
 	ld = tty_ldisc_try_get(i);
 	if (IS_ERR(ld))
 		return 0;
@@ -325,7 +325,6 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 	WARN_ON(tty->ldisc->refcount == 0);
 	return tty->ldisc;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 
 /**
@@ -345,7 +344,6 @@ struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
 		return tty->ldisc;
 	return NULL;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref);
 
 /**
@@ -373,7 +371,6 @@ void tty_ldisc_deref(struct tty_ldisc *ld)
 		wake_up(&tty_ldisc_wait);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_deref);
 
 /**
@@ -413,7 +410,6 @@ void tty_ldisc_flush(struct tty_struct *tty)
 	}
 	tty_buffer_flush(tty);
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 
 /**
@@ -492,7 +488,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 	if (tty_ldisc_open(tty, old) < 0) {
 		tty_ldisc_put(old);
 		/* This driver is always present */
-		new_ldisc =tty_ldisc_get(N_TTY);
+		new_ldisc = tty_ldisc_get(N_TTY);
 		if (IS_ERR(new_ldisc))
 			panic("n_tty: get");
 		tty_ldisc_assign(tty, new_ldisc);
@@ -514,7 +510,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
  *	be obtained while the delayed work queue halt ensures that no more
  *	data is fed to the ldisc.
  *
- *	In order to wait for any existing references to complete see 
+ *	In order to wait for any existing references to complete see
  *	tty_ldisc_wait_idle.
  */
 
@@ -611,7 +607,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 		mutex_lock(&tty->ldisc_mutex);
 	}
 	set_bit(TTY_LDISC_CHANGING, &tty->flags);
-		
+
 	/*
 	 *	No more input please, we are switching. The new ldisc
 	 *	will update this value in the ldisc open function
@@ -841,8 +837,8 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
  *	@tty: tty being shut down
  *	@o_tty: pair tty for pty/tty pairs
  *
- *	Called during the final close of a tty/pty pair in order to shut down the
- *	line discpline layer. On exit the ldisc assigned is N_TTY and the
+ *	Called during the final close of a tty/pty pair in order to shut down
+ *	the line discpline layer. On exit the ldisc assigned is N_TTY and the
  *	ldisc has not been opened.
  */
 
-- 
cgit v0.10.2


From 5fcf62b0f1f24ee25931636216f28bc87448a60f Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Thu, 11 Jun 2009 12:52:26 +0100
Subject: tty: iuu_phoenix: fix locking.

Bring in the relevant bits of the 0.9 vendor driver.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index bb572ce..637d0dd 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -650,32 +650,33 @@ static int iuu_bulk_write(struct usb_serial_port *port)
 	unsigned long flags;
 	int result;
 	int i;
+	int buf_len;
 	char *buf_ptr = port->write_urb->transfer_buffer;
 	dbg("%s - enter", __func__);
 
+	spin_lock_irqsave(&priv->lock, flags);
 	*buf_ptr++ = IUU_UART_ESC;
 	*buf_ptr++ = IUU_UART_TX;
 	*buf_ptr++ = priv->writelen;
 
-	memcpy(buf_ptr, priv->writebuf,
-	       priv->writelen);
+	memcpy(buf_ptr, priv->writebuf, priv->writelen);
+	buf_len = priv->writelen;
+	priv->writelen = 0;
+	spin_unlock_irqrestore(&priv->lock, flags);
 	if (debug == 1) {
-		for (i = 0; i < priv->writelen; i++)
+		for (i = 0; i < buf_len; i++)
 			sprintf(priv->dbgbuf + i*2 ,
 				"%02X", priv->writebuf[i]);
-		priv->dbgbuf[priv->writelen+i*2] = 0;
+		priv->dbgbuf[buf_len+i*2] = 0;
 		dbg("%s - writing %i chars : %s", __func__,
-		    priv->writelen, priv->dbgbuf);
+		    buf_len, priv->dbgbuf);
 	}
 	usb_fill_bulk_urb(port->write_urb, port->serial->dev,
 			  usb_sndbulkpipe(port->serial->dev,
 					  port->bulk_out_endpointAddress),
-			  port->write_urb->transfer_buffer, priv->writelen + 3,
+			  port->write_urb->transfer_buffer, buf_len + 3,
 			  iuu_rxcmd, port);
 	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->writelen = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
 	usb_serial_port_softint(port);
 	return result;
 }
@@ -769,14 +770,10 @@ static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port,
 		return -ENOMEM;
 
 	spin_lock_irqsave(&priv->lock, flags);
-	if (priv->writelen > 0) {
-		/* buffer already filled but not commited */
-		spin_unlock_irqrestore(&priv->lock, flags);
-		return 0;
-	}
+
 	/* fill the buffer */
-	memcpy(priv->writebuf, buf, count);
-	priv->writelen = count;
+	memcpy(priv->writebuf + priv->writelen, buf, count);
+	priv->writelen += count;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return count;
-- 
cgit v0.10.2


From cc3447d179d8a5e16807e52b77d7f4c095ffedb7 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Thu, 11 Jun 2009 12:53:24 +0100
Subject: tty: iuu_phoenix: Fix stopbit when uart goes on.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 637d0dd..ef6d12f 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -815,7 +815,7 @@ static int iuu_uart_on(struct usb_serial_port *port)
 	buf[0] = IUU_UART_ENABLE;
 	buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF);
 	buf[2] = (u8) (0x00FF & IUU_BAUD_9600);
-	buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN);
+	buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN);
 
 	status = bulk_immediate(port, buf, 4);
 	if (status != IUU_OPERATION_OK) {
-- 
cgit v0.10.2


From 96dab77ebf3868cc8723ac95e048e8a9c1dccf22 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Thu, 11 Jun 2009 12:54:20 +0100
Subject: tty: iuu_phoenix: set termios.

set_termios can now be used for setting the parity and the stopbits. This is
needed to use with cards which use a different parity then the parity used at
start (even).

If the iuu_uart_baud function return an error, we will return the old_termios
instead of the new one.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>

This was then revamped to use the various helpers, not copy non-hardware
bits any to add mark/space parity and csize reporting

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index ef6d12f..fa86f6e 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -942,6 +942,55 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud,
 	return status;
 }
 
+static void iuu_set_termios(struct tty_struct *tty,
+		struct usb_serial_port *port, struct ktermios *old_termios)
+{
+	const u32 supported_mask = CMSPAR|PARENB|PARODD;
+
+	unsigned int cflag = tty->termios->c_cflag;
+	int status;
+	u32 actual;
+	u32 parity;
+	int csize = CS7;
+	int baud = 9600;	/* Fixed for the moment */
+	u32 newval = cflag & supported_mask;
+
+	/* compute the parity parameter */
+	parity = 0;
+	if (cflag & CMSPAR) {	/* Using mark space */
+		if (cflag & PARODD)
+			parity |= IUU_PARITY_SPACE;
+		else
+			parity |= IUU_PARITY_MARK;
+	} else if (!(cflag & PARENB)) {
+		parity |= IUU_PARITY_NONE;
+		csize = CS8;
+	} else if (cflag & PARODD)
+		parity |= IUU_PARITY_ODD;
+	else
+		parity |= IUU_PARITY_EVEN;
+
+	parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT);
+
+	/* set it */
+	status = iuu_uart_baud(port,
+			(clockmode == 2) ? 16457 : 9600 * boost / 100,
+			&actual, parity);
+
+	/* set the termios value to the real one, so the user now what has
+	 * changed. We support few fields so its easies to copy the old hw
+	 * settings back over and then adjust them
+	 */
+ 	if (old_termios)
+ 		tty_termios_copy_hw(tty->termios, old_termios);
+	if (status != 0)	/* Set failed - return old bits */
+		return;
+	/* Re-encode speed, parity and csize */
+	tty_encode_baud_rate(tty, baud, baud);
+	tty->termios->c_cflag &= ~(supported_mask|CSIZE);
+	tty->termios->c_cflag |= newval | csize;
+}
+
 static void iuu_close(struct usb_serial_port *port)
 {
 	/* iuu_led (port,255,0,0,0); */
@@ -1151,6 +1200,7 @@ static struct usb_serial_driver iuu_device = {
 	.read_bulk_callback = iuu_uart_read_callback,
 	.tiocmget = iuu_tiocmget,
 	.tiocmset = iuu_tiocmset,
+	.set_termios = iuu_set_termios,
 	.attach = iuu_startup,
 	.shutdown = iuu_shutdown,
 };
-- 
cgit v0.10.2


From 9bb41699ad5c74519dc054bfe469a8074799c863 Mon Sep 17 00:00:00 2001
From: Olivier Bornet <Olivier.Bornet@puck.ch>
Date: Thu, 11 Jun 2009 12:55:01 +0100
Subject: tty: iuu_phoenix: update version number.

Signed-off-by: Olivier Bornet <Olivier.Bornet@puck.ch>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index fa86f6e..76a3cc3 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -40,7 +40,7 @@ static int debug;
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.5"
+#define DRIVER_VERSION "v0.10"
 #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"
 
 static struct usb_device_id id_table[] = {
-- 
cgit v0.10.2


From 13858d36903990441a89eb342247b71436808eeb Mon Sep 17 00:00:00 2001
From: "Alexander Y. Fomichev" <git.user@gmail.com>
Date: Thu, 11 Jun 2009 12:56:00 +0100
Subject: jsm: correctly support multiple 4/8-port boards

If there are more then one 4/8-port board jsm_uart_port_init
allocate a line numbers of the second and further boards
from range of previous one.

This patch fixed the problem.

Signed-off-by: Alexander Y. Fomichev <git.user@gmail.com>

[printks fixed to add jsm: ]

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h
index c0a3e27..4e5f3bd 100644
--- a/drivers/serial/jsm/jsm.h
+++ b/drivers/serial/jsm/jsm.h
@@ -61,6 +61,7 @@ enum {
 	if ((DBG_##nlevel & jsm_debug))			\
 	dev_printk(KERN_##klevel, pdev->dev, fmt, ## args)
 
+#define	MAXLINES	256
 #define MAXPORTS	8
 #define MAX_STOPS_SENT	5
 
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 31496dc0..107ce2e 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -33,6 +33,8 @@
 
 #include "jsm.h"
 
+static DECLARE_BITMAP(linemap, MAXLINES);
+
 static void jsm_carrier(struct jsm_channel *ch);
 
 static inline int jsm_get_mstat(struct jsm_channel *ch)
@@ -433,6 +435,7 @@ int __devinit jsm_tty_init(struct jsm_board *brd)
 int __devinit jsm_uart_port_init(struct jsm_board *brd)
 {
 	int i;
+	unsigned int line;
 	struct jsm_channel *ch;
 
 	if (!brd)
@@ -459,9 +462,15 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd)
 		brd->channels[i]->uart_port.membase = brd->re_map_membase;
 		brd->channels[i]->uart_port.fifosize = 16;
 		brd->channels[i]->uart_port.ops = &jsm_ops;
-		brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2;
+		line = find_first_zero_bit(linemap, MAXLINES);
+		if (line >= MAXLINES) {
+			printk(KERN_INFO "jsm: linemap is full, added device failed\n");
+			continue;
+		} else
+			set_bit((int)line, linemap);
+		brd->channels[i]->uart_port.line = line;
 		if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
-			printk(KERN_INFO "Added device failed\n");
+			printk(KERN_INFO "jsm: add device failed\n");
 		else
 			printk(KERN_INFO "Added device \n");
 	}
@@ -494,6 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd)
 
 		ch = brd->channels[i];
 
+		clear_bit((int)(ch->uart_port.line), linemap);
 		uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
 	}
 
-- 
cgit v0.10.2


From aba6593bf77371e71331ba76dacc98b47760cba3 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Thu, 11 Jun 2009 13:02:59 +0100
Subject: icom: fixing a if clause spaghetti

adapter->version can only be ADAPTER_V2 or ADAPTER_V1. So,
that OR operand in the "if" clause is non-sense and can be removed.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index a461b3b..fd5fd23 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -408,7 +408,7 @@ static void load_code(struct icom_port *icom_port)
 	release_firmware(fw);
 
 	/* Set Hardware level */
-	if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2)
+	if (icom_port->adapter->version == ADAPTER_V2)
 		writeb(V2_HARDWARE, &(icom_port->dram->misc_flags));
 
 	/* Start the processor in Adapter */
@@ -861,7 +861,7 @@ static irqreturn_t icom_interrupt(int irq, void *dev_id)
 	/* find icom_port for this interrupt */
 	icom_adapter = (struct icom_adapter *) dev_id;
 
-	if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) {
+	if (icom_adapter->version == ADAPTER_V2) {
 		int_reg = icom_adapter->base_addr + 0x8024;
 
 		adapter_interrupts = readl(int_reg);
-- 
cgit v0.10.2


From c481c707fe4b07783d9a2499a9bbbb94497e9b18 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:04:27 +0100
Subject: tty: remove buffer special casing

Long long ago a 4K kmalloc allocated two pages so the tty layer used the
page allocator, except on some machines where the page size was huge. This was
removed from the core tty layer with the tty buffer re-implementation but not
from tty_audit or the n_tty ldisc.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index f6f0e4e..b4b12b4 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -76,19 +76,12 @@
 static inline unsigned char *alloc_buf(void)
 {
 	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
-
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		return kmalloc(N_TTY_BUF_SIZE, prio);
-	else
-		return (unsigned char *)__get_free_page(prio);
+	return kmalloc(N_TTY_BUF_SIZE, prio);
 }
 
 static inline void free_buf(unsigned char *buf)
 {
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		kfree(buf);
-	else
-		free_page((unsigned long) buf);
+	kfree(buf);
 }
 
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 55ba6f1..ac16fbe 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -29,10 +29,7 @@ static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor,
 	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
 	if (!buf)
 		goto err;
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
-	else
-		buf->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+	buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 	if (!buf->data)
 		goto err_buf;
 	atomic_set(&buf->count, 1);
@@ -52,10 +49,7 @@ err:
 static void tty_audit_buf_free(struct tty_audit_buf *buf)
 {
 	WARN_ON(buf->valid != 0);
-	if (PAGE_SIZE != N_TTY_BUF_SIZE)
-		kfree(buf->data);
-	else
-		free_page((unsigned long)buf->data);
+	kfree(buf->data);
 	kfree(buf);
 }
 
-- 
cgit v0.10.2


From 0b4068a1287b02018d1b3159e7be6f27f3e3e68c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:05:49 +0100
Subject: tty: simplify buffer allocator cleanups

Having cleaned up the allocators we might as well remove the inline helpers
for some of it

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index b4b12b4..94a5d50 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -73,17 +73,6 @@
 #define ECHO_OP_SET_CANON_COL 0x81
 #define ECHO_OP_ERASE_TAB 0x82
 
-static inline unsigned char *alloc_buf(void)
-{
-	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
-	return kmalloc(N_TTY_BUF_SIZE, prio);
-}
-
-static inline void free_buf(unsigned char *buf)
-{
-	kfree(buf);
-}
-
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
 			       unsigned char __user *ptr)
 {
@@ -1551,11 +1540,11 @@ static void n_tty_close(struct tty_struct *tty)
 {
 	n_tty_flush_buffer(tty);
 	if (tty->read_buf) {
-		free_buf(tty->read_buf);
+		kfree(tty->read_buf);
 		tty->read_buf = NULL;
 	}
 	if (tty->echo_buf) {
-		free_buf(tty->echo_buf);
+		kfree(tty->echo_buf);
 		tty->echo_buf = NULL;
 	}
 }
@@ -1577,17 +1566,16 @@ static int n_tty_open(struct tty_struct *tty)
 
 	/* These are ugly. Currently a malloc failure here can panic */
 	if (!tty->read_buf) {
-		tty->read_buf = alloc_buf();
+		tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
 		if (!tty->read_buf)
 			return -ENOMEM;
 	}
 	if (!tty->echo_buf) {
-		tty->echo_buf = alloc_buf();
+		tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+
 		if (!tty->echo_buf)
 			return -ENOMEM;
 	}
-	memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
-	memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
 	reset_buffer_flags(tty);
 	tty->column = 0;
 	n_tty_set_termios(tty, NULL);
-- 
cgit v0.10.2


From 73e0d48b8c28fb39a0bb6713c875e9919a9af546 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Thu, 11 Jun 2009 13:06:31 +0100
Subject: parport_pc: Fix subscription bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes array subscription bugs in the parport_pc driver.

drivers/parport/parport_pc.c: In function ‘parport_irq_probe’:
drivers/parport/parport_pc.c:1589: warning: array subscript is above array bounds
drivers/parport/parport_pc.c: In function ‘parport_pc_probe_port’:
drivers/parport/parport_pc.c:1579: warning: array subscript is above array bounds

The patch also fixes a few other array bugs, which the compiler was
unable to find. Coding style violations are also fixed.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 4e63cc9..24984c4 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -1246,17 +1246,17 @@ static void __devinit show_parconfig_smsc37c669(int io, int key)
 		       (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], 
 		       (cr4 & 0x40) ? "1.7" : "1.9");
 	}
-		
+
 	/* Heuristics !  BIOS setup for this mainboard device limits
 	   the choices to standard settings, i.e. io-address and IRQ
 	   are related, however DMA can be 1 or 3, assume DMA_A=DMA1,
 	   DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */
-	if(cr23*4 >=0x100) { /* if active */
-		while((superios[i].io!= 0) && (i<NR_SUPERIOS))
+	if (cr23 * 4 >= 0x100) { /* if active */
+		while ((i < NR_SUPERIOS) && (superios[i].io != 0))
 			i++;
-		if(i==NR_SUPERIOS)
+		if (i == NR_SUPERIOS) {
 			printk(KERN_INFO "Super-IO: too many chips!\n");
-		else {
+		} else {
 			int d;
 			switch (cr23*4) {
 				case 0x3bc:
@@ -1332,12 +1332,12 @@ static void __devinit show_parconfig_winbond(int io, int key)
 		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]);
 	}
 
-	if(cr30 & 0x01) { /* the settings can be interrogated later ... */
-		while((superios[i].io!= 0) && (i<NR_SUPERIOS))
+	if (cr30 & 0x01) { /* the settings can be interrogated later ... */
+		while ((i < NR_SUPERIOS) && (superios[i].io != 0))
 			i++;
-		if(i==NR_SUPERIOS) 
+		if (i == NR_SUPERIOS) {
 			printk(KERN_INFO "Super-IO: too many chips!\n");
-		else {
+		} else {
 			superios[i].io = (cr60<<8)|cr61;
 			superios[i].irq = cr70&0x0f;
 			superios[i].dma = (((cr74 & 0x07) > 3) ?
@@ -1575,24 +1575,26 @@ static void __devinit detect_and_report_it87(void)
 
 static int get_superio_dma (struct parport *p)
 {
-	int i=0;
-	while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
+	int i = 0;
+
+	while ((i < NR_SUPERIOS) && (superios[i].io != p->base))
 		i++;
-	if (i!=NR_SUPERIOS)
+	if (i != NR_SUPERIOS)
 		return superios[i].dma;
 	return PARPORT_DMA_NONE;
 }
 
 static int get_superio_irq (struct parport *p)
 {
-	int i=0;
-        while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
+	int i = 0;
+
+        while ((i < NR_SUPERIOS) && (superios[i].io != p->base))
                 i++;
-        if (i!=NR_SUPERIOS)
+        if (i != NR_SUPERIOS)
                 return superios[i].irq;
         return PARPORT_IRQ_NONE;
 }
-	
+
 
 /* --- Mode detection ------------------------------------- */
 
-- 
cgit v0.10.2


From 3aeda9bc95d064308a70664e6f4a158c96cc1918 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:07:29 +0100
Subject: parport_pc: Coding style

Michael's patch fixed some of the coding style so the style is now
inconsistent. Sort the rest out

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 24984c4..edf83e9 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -1,5 +1,5 @@
 /* Low-level parallel-port routines for 8255-based PC-style hardware.
- * 
+ *
  * Authors: Phil Blundell <philb@gnu.org>
  *          Tim Waugh <tim@cyberelk.demon.co.uk>
  *	    Jose Renau <renau@acm.org>
@@ -11,7 +11,7 @@
  * Cleaned up include files - Russell King <linux@arm.uk.linux.org>
  * DMA support - Bert De Jonghe <bert@sophis.be>
  * Many ECP bugs fixed.  Fred Barnes & Jamie Lokier, 1999
- * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. 
+ * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G.
  * Various hacks, Fred Barnes, 04/2001
  * Updated probing logic - Adam Belay <ambx1@neo.rr.com>
  */
@@ -56,10 +56,10 @@
 #include <linux/pnp.h>
 #include <linux/platform_device.h>
 #include <linux/sysctl.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
 
-#include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
 
 #include <linux/parport.h>
 #include <linux/parport_pc.h>
@@ -82,7 +82,7 @@
 #define ECR_TST 06
 #define ECR_CNF 07
 #define ECR_MODE_MASK 0xe0
-#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v))
+#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v))
 
 #undef DEBUG
 
@@ -109,27 +109,27 @@ static int pci_registered_parport;
 static int pnp_registered_parport;
 
 /* frob_control, but for ECR */
-static void frob_econtrol (struct parport *pb, unsigned char m,
+static void frob_econtrol(struct parport *pb, unsigned char m,
 			   unsigned char v)
 {
 	unsigned char ectr = 0;
 
 	if (m != 0xff)
-		ectr = inb (ECONTROL (pb));
+		ectr = inb(ECONTROL(pb));
 
-	DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
+	DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
 		m, v, ectr, (ectr & ~m) ^ v);
 
-	outb ((ectr & ~m) ^ v, ECONTROL (pb));
+	outb((ectr & ~m) ^ v, ECONTROL(pb));
 }
 
-static __inline__ void frob_set_mode (struct parport *p, int mode)
+static inline void frob_set_mode(struct parport *p, int mode)
 {
-	frob_econtrol (p, ECR_MODE_MASK, mode << 5);
+	frob_econtrol(p, ECR_MODE_MASK, mode << 5);
 }
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-/* Safely change the mode bits in the ECR 
+/* Safely change the mode bits in the ECR
    Returns:
 	    0    : Success
 	   -EBUSY: Could not drain FIFO in some finite amount of time,
@@ -141,17 +141,18 @@ static int change_mode(struct parport *p, int m)
 	unsigned char oecr;
 	int mode;
 
-	DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m);
+	DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m);
 
 	if (!priv->ecr) {
-		printk (KERN_DEBUG "change_mode: but there's no ECR!\n");
+		printk(KERN_DEBUG "change_mode: but there's no ECR!\n");
 		return 0;
 	}
 
 	/* Bits <7:5> contain the mode. */
-	oecr = inb (ECONTROL (p));
+	oecr = inb(ECONTROL(p));
 	mode = (oecr >> 5) & 0x7;
-	if (mode == m) return 0;
+	if (mode == m)
+		return 0;
 
 	if (mode >= 2 && !(priv->ctr & 0x20)) {
 		/* This mode resets the FIFO, so we may
@@ -163,19 +164,21 @@ static int change_mode(struct parport *p, int m)
 		case ECR_ECP: /* ECP Parallel Port mode */
 			/* Busy wait for 200us */
 			for (counter = 0; counter < 40; counter++) {
-				if (inb (ECONTROL (p)) & 0x01)
+				if (inb(ECONTROL(p)) & 0x01)
 					break;
-				if (signal_pending (current)) break;
-				udelay (5);
+				if (signal_pending(current))
+					break;
+				udelay(5);
 			}
 
 			/* Poll slowly. */
-			while (!(inb (ECONTROL (p)) & 0x01)) {
-				if (time_after_eq (jiffies, expire))
+			while (!(inb(ECONTROL(p)) & 0x01)) {
+				if (time_after_eq(jiffies, expire))
 					/* The FIFO is stuck. */
 					return -EBUSY;
-				schedule_timeout_interruptible(msecs_to_jiffies(10));
-				if (signal_pending (current))
+				schedule_timeout_interruptible(
+							msecs_to_jiffies(10));
+				if (signal_pending(current))
 					break;
 			}
 		}
@@ -185,20 +188,20 @@ static int change_mode(struct parport *p, int m)
 		/* We have to go through mode 001 */
 		oecr &= ~(7 << 5);
 		oecr |= ECR_PS2 << 5;
-		ECR_WRITE (p, oecr);
+		ECR_WRITE(p, oecr);
 	}
 
 	/* Set the mode. */
 	oecr &= ~(7 << 5);
 	oecr |= m << 5;
-	ECR_WRITE (p, oecr);
+	ECR_WRITE(p, oecr);
 	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
 /* Find FIFO lossage; FIFO is reset */
 #if 0
-static int get_fifo_residue (struct parport *p)
+static int get_fifo_residue(struct parport *p)
 {
 	int residue;
 	int cnfga;
@@ -206,26 +209,26 @@ static int get_fifo_residue (struct parport *p)
 
 	/* Adjust for the contents of the FIFO. */
 	for (residue = priv->fifo_depth; ; residue--) {
-		if (inb (ECONTROL (p)) & 0x2)
+		if (inb(ECONTROL(p)) & 0x2)
 				/* Full up. */
 			break;
 
-		outb (0, FIFO (p));
+		outb(0, FIFO(p));
 	}
 
-	printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
+	printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
 		residue);
 
 	/* Reset the FIFO. */
-	frob_set_mode (p, ECR_PS2);
+	frob_set_mode(p, ECR_PS2);
 
 	/* Now change to config mode and clean up. FIXME */
-	frob_set_mode (p, ECR_CNF);
-	cnfga = inb (CONFIGA (p));
-	printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
+	frob_set_mode(p, ECR_CNF);
+	cnfga = inb(CONFIGA(p));
+	printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
 
 	if (!(cnfga & (1<<2))) {
-		printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
+		printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
 		residue++;
 	}
 
@@ -233,9 +236,11 @@ static int get_fifo_residue (struct parport *p)
 	 * PWord != 1 byte. */
 
 	/* Back to PS2 mode. */
-	frob_set_mode (p, ECR_PS2);
+	frob_set_mode(p, ECR_PS2);
 
-	DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p)));
+	DPRINTK(KERN_DEBUG
+	     "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n",
+							inb(ECONTROL(p)));
 	return residue;
 }
 #endif  /*  0 */
@@ -257,8 +262,8 @@ static int clear_epp_timeout(struct parport *pb)
 	/* To clear timeout some chips require double read */
 	parport_pc_read_status(pb);
 	r = parport_pc_read_status(pb);
-	outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */
-	outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */
+	outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */
+	outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */
 	r = parport_pc_read_status(pb);
 
 	return !(r & 0x01);
@@ -272,7 +277,8 @@ static int clear_epp_timeout(struct parport *pb)
  * of these are in parport_pc.h.
  */
 
-static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s)
+static void parport_pc_init_state(struct pardevice *dev,
+						struct parport_state *s)
 {
 	s->u.pc.ctr = 0xc;
 	if (dev->irq_func &&
@@ -289,22 +295,23 @@ static void parport_pc_save_state(struct parport *p, struct parport_state *s)
 	const struct parport_pc_private *priv = p->physport->private_data;
 	s->u.pc.ctr = priv->ctr;
 	if (priv->ecr)
-		s->u.pc.ecr = inb (ECONTROL (p));
+		s->u.pc.ecr = inb(ECONTROL(p));
 }
 
-static void parport_pc_restore_state(struct parport *p, struct parport_state *s)
+static void parport_pc_restore_state(struct parport *p,
+						struct parport_state *s)
 {
 	struct parport_pc_private *priv = p->physport->private_data;
 	register unsigned char c = s->u.pc.ctr & priv->ctr_writable;
-	outb (c, CONTROL (p));
+	outb(c, CONTROL(p));
 	priv->ctr = c;
 	if (priv->ecr)
-		ECR_WRITE (p, s->u.pc.ecr);
+		ECR_WRITE(p, s->u.pc.ecr);
 }
 
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
-					size_t length, int flags)
+static size_t parport_pc_epp_read_data(struct parport *port, void *buf,
+				       size_t length, int flags)
 {
 	size_t got = 0;
 
@@ -316,54 +323,52 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
 		 *  nFault is 0 if there is at least 1 byte in the Warp's FIFO
 		 *  pError is 1 if there are 16 bytes in the Warp's FIFO
 		 */
-		status = inb (STATUS (port));
+		status = inb(STATUS(port));
 
-		while (!(status & 0x08) && (got < length)) {
-			if ((left >= 16) && (status & 0x20) && !(status & 0x08)) {
+		while (!(status & 0x08) && got < length) {
+			if (left >= 16 && (status & 0x20) && !(status & 0x08)) {
 				/* can grab 16 bytes from warp fifo */
-				if (!((long)buf & 0x03)) {
-					insl (EPPDATA (port), buf, 4);
-				} else {
-					insb (EPPDATA (port), buf, 16);
-				}
+				if (!((long)buf & 0x03))
+					insl(EPPDATA(port), buf, 4);
+				else
+					insb(EPPDATA(port), buf, 16);
 				buf += 16;
 				got += 16;
 				left -= 16;
 			} else {
 				/* grab single byte from the warp fifo */
-				*((char *)buf) = inb (EPPDATA (port));
+				*((char *)buf) = inb(EPPDATA(port));
 				buf++;
 				got++;
 				left--;
 			}
-			status = inb (STATUS (port));
+			status = inb(STATUS(port));
 			if (status & 0x01) {
 				/* EPP timeout should never occur... */
-				printk (KERN_DEBUG "%s: EPP timeout occurred while talking to "
-					"w91284pic (should not have done)\n", port->name);
-				clear_epp_timeout (port);
+				printk(KERN_DEBUG
+"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name);
+				clear_epp_timeout(port);
 			}
 		}
 		return got;
 	}
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		if (!(((long)buf | length) & 0x03)) {
-			insl (EPPDATA (port), buf, (length >> 2));
-		} else {
-			insb (EPPDATA (port), buf, length);
-		}
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (!(((long)buf | length) & 0x03))
+			insl(EPPDATA(port), buf, (length >> 2));
+		else
+			insb(EPPDATA(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; got < length; got++) {
-		*((char*)buf) = inb (EPPDATA(port));
+		*((char *)buf) = inb(EPPDATA(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
+		if (inb(STATUS(port)) & 0x01) {
 			/* EPP timeout */
-			clear_epp_timeout (port);
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -371,28 +376,27 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
 	return got;
 }
 
-static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
-					 size_t length, int flags)
+static size_t parport_pc_epp_write_data(struct parport *port, const void *buf,
+					size_t length, int flags)
 {
 	size_t written = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		if (!(((long)buf | length) & 0x03)) {
-			outsl (EPPDATA (port), buf, (length >> 2));
-		} else {
-			outsb (EPPDATA (port), buf, length);
-		}
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (!(((long)buf | length) & 0x03))
+			outsl(EPPDATA(port), buf, (length >> 2));
+		else
+			outsb(EPPDATA(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; written < length; written++) {
-		outb (*((char*)buf), EPPDATA(port));
+		outb(*((char *)buf), EPPDATA(port));
 		buf++;
-		if (inb (STATUS(port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -400,24 +404,24 @@ static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
 	return written;
 }
 
-static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
+static size_t parport_pc_epp_read_addr(struct parport *port, void *buf,
 					size_t length, int flags)
 {
 	size_t got = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		insb (EPPADDR (port), buf, length);
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		insb(EPPADDR(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; got < length; got++) {
-		*((char*)buf) = inb (EPPADDR (port));
+		*((char *)buf) = inb(EPPADDR(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -425,25 +429,25 @@ static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
 	return got;
 }
 
-static size_t parport_pc_epp_write_addr (struct parport *port,
+static size_t parport_pc_epp_write_addr(struct parport *port,
 					 const void *buf, size_t length,
 					 int flags)
 {
 	size_t written = 0;
 
 	if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-		outsb (EPPADDR (port), buf, length);
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		outsb(EPPADDR(port), buf, length);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			return -EIO;
 		}
 		return length;
 	}
 	for (; written < length; written++) {
-		outb (*((char*)buf), EPPADDR (port));
+		outb(*((char *)buf), EPPADDR(port));
 		buf++;
-		if (inb (STATUS (port)) & 0x01) {
-			clear_epp_timeout (port);
+		if (inb(STATUS(port)) & 0x01) {
+			clear_epp_timeout(port);
 			break;
 		}
 	}
@@ -451,74 +455,74 @@ static size_t parport_pc_epp_write_addr (struct parport *port,
 	return written;
 }
 
-static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf,
-					   size_t length, int flags)
+static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf,
+					  size_t length, int flags)
 {
 	size_t got;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_data_reverse (port);
-	parport_pc_write_control (port, 0x4);
-	got = parport_pc_epp_read_data (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_data_reverse(port);
+	parport_pc_write_control(port, 0x4);
+	got = parport_pc_epp_read_data(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return got;
 }
 
-static size_t parport_pc_ecpepp_write_data (struct parport *port,
-					    const void *buf, size_t length,
-					    int flags)
+static size_t parport_pc_ecpepp_write_data(struct parport *port,
+					   const void *buf, size_t length,
+					   int flags)
 {
 	size_t written;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_write_control (port, 0x4);
-	parport_pc_data_forward (port);
-	written = parport_pc_epp_write_data (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_write_control(port, 0x4);
+	parport_pc_data_forward(port);
+	written = parport_pc_epp_write_data(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return written;
 }
 
-static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf,
-					   size_t length, int flags)
+static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf,
+					  size_t length, int flags)
 {
 	size_t got;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_data_reverse (port);
-	parport_pc_write_control (port, 0x4);
-	got = parport_pc_epp_read_addr (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_data_reverse(port);
+	parport_pc_write_control(port, 0x4);
+	got = parport_pc_epp_read_addr(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return got;
 }
 
-static size_t parport_pc_ecpepp_write_addr (struct parport *port,
+static size_t parport_pc_ecpepp_write_addr(struct parport *port,
 					    const void *buf, size_t length,
 					    int flags)
 {
 	size_t written;
 
-	frob_set_mode (port, ECR_EPP);
-	parport_pc_write_control (port, 0x4);
-	parport_pc_data_forward (port);
-	written = parport_pc_epp_write_addr (port, buf, length, flags);
-	frob_set_mode (port, ECR_PS2);
+	frob_set_mode(port, ECR_EPP);
+	parport_pc_write_control(port, 0x4);
+	parport_pc_data_forward(port);
+	written = parport_pc_epp_write_addr(port, buf, length, flags);
+	frob_set_mode(port, ECR_PS2);
 
 	return written;
 }
 #endif /* IEEE 1284 support */
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-static size_t parport_pc_fifo_write_block_pio (struct parport *port,
+static size_t parport_pc_fifo_write_block_pio(struct parport *port,
 					       const void *buf, size_t length)
 {
 	int ret = 0;
 	const unsigned char *bufp = buf;
 	size_t left = length;
 	unsigned long expire = jiffies + port->physport->cad->timeout;
-	const int fifo = FIFO (port);
+	const int fifo = FIFO(port);
 	int poll_for = 8; /* 80 usecs */
 	const struct parport_pc_private *priv = port->physport->private_data;
 	const int fifo_depth = priv->fifo_depth;
@@ -526,25 +530,25 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
 	port = port->physport;
 
 	/* We don't want to be interrupted every character. */
-	parport_pc_disable_irq (port);
+	parport_pc_disable_irq(port);
 	/* set nErrIntrEn and serviceIntr */
-	frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+	frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
 	/* Forward mode. */
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
 
 	while (left) {
 		unsigned char byte;
-		unsigned char ecrval = inb (ECONTROL (port));
+		unsigned char ecrval = inb(ECONTROL(port));
 		int i = 0;
 
-		if (need_resched() && time_before (jiffies, expire))
+		if (need_resched() && time_before(jiffies, expire))
 			/* Can't yield the port. */
-			schedule ();
+			schedule();
 
 		/* Anyone else waiting for the port? */
 		if (port->waithead) {
-			printk (KERN_DEBUG "Somebody wants the port\n");
+			printk(KERN_DEBUG "Somebody wants the port\n");
 			break;
 		}
 
@@ -552,21 +556,22 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
 			/* FIFO is full. Wait for interrupt. */
 
 			/* Clear serviceIntr */
-			ECR_WRITE (port, ecrval & ~(1<<2));
-		false_alarm:
-			ret = parport_wait_event (port, HZ);
-			if (ret < 0) break;
+			ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+			ret = parport_wait_event(port, HZ);
+			if (ret < 0)
+				break;
 			ret = 0;
-			if (!time_before (jiffies, expire)) {
+			if (!time_before(jiffies, expire)) {
 				/* Timed out. */
-				printk (KERN_DEBUG "FIFO write timed out\n");
+				printk(KERN_DEBUG "FIFO write timed out\n");
 				break;
 			}
-			ecrval = inb (ECONTROL (port));
+			ecrval = inb(ECONTROL(port));
 			if (!(ecrval & (1<<2))) {
 				if (need_resched() &&
-				    time_before (jiffies, expire))
-					schedule ();
+				    time_before(jiffies, expire))
+					schedule();
 
 				goto false_alarm;
 			}
@@ -577,38 +582,38 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
 		/* Can't fail now. */
 		expire = jiffies + port->cad->timeout;
 
-	poll:
-		if (signal_pending (current))
+poll:
+		if (signal_pending(current))
 			break;
 
 		if (ecrval & 0x01) {
 			/* FIFO is empty. Blast it full. */
 			const int n = left < fifo_depth ? left : fifo_depth;
-			outsb (fifo, bufp, n);
+			outsb(fifo, bufp, n);
 			bufp += n;
 			left -= n;
 
 			/* Adjust the poll time. */
-			if (i < (poll_for - 2)) poll_for--;
+			if (i < (poll_for - 2))
+				poll_for--;
 			continue;
 		} else if (i++ < poll_for) {
-			udelay (10);
-			ecrval = inb (ECONTROL (port));
+			udelay(10);
+			ecrval = inb(ECONTROL(port));
 			goto poll;
 		}
 
-		/* Half-full (call me an optimist) */
+		/* Half-full(call me an optimist) */
 		byte = *bufp++;
-		outb (byte, fifo);
+		outb(byte, fifo);
 		left--;
-        }
-
-dump_parport_state ("leave fifo_write_block_pio", port);
+	}
+	dump_parport_state("leave fifo_write_block_pio", port);
 	return length - left;
 }
 
 #ifdef HAS_DMA
-static size_t parport_pc_fifo_write_block_dma (struct parport *port,
+static size_t parport_pc_fifo_write_block_dma(struct parport *port,
 					       const void *buf, size_t length)
 {
 	int ret = 0;
@@ -621,7 +626,7 @@ static size_t parport_pc_fifo_write_block_dma (struct parport *port,
 	unsigned long start = (unsigned long) buf;
 	unsigned long end = (unsigned long) buf + length - 1;
 
-dump_parport_state ("enter fifo_write_block_dma", port);
+dump_parport_state("enter fifo_write_block_dma", port);
 	if (end < MAX_DMA_ADDRESS) {
 		/* If it would cross a 64k boundary, cap it at the end. */
 		if ((start ^ end) & ~0xffffUL)
@@ -629,8 +634,9 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 
 		dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length,
 						       DMA_TO_DEVICE);
-        } else {
-		/* above 16 MB we use a bounce buffer as ISA-DMA is not possible */
+	} else {
+		/* above 16 MB we use a bounce buffer as ISA-DMA
+		   is not possible */
 		maxlen   = PAGE_SIZE;          /* sizeof(priv->dma_buf) */
 		dma_addr = priv->dma_handle;
 		dma_handle = 0;
@@ -639,12 +645,12 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 	port = port->physport;
 
 	/* We don't want to be interrupted every character. */
-	parport_pc_disable_irq (port);
+	parport_pc_disable_irq(port);
 	/* set nErrIntrEn and serviceIntr */
-	frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+	frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
 	/* Forward mode. */
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
 
 	while (left) {
 		unsigned long expire = jiffies + port->physport->cad->timeout;
@@ -665,10 +671,10 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 		set_dma_count(port->dma, count);
 
 		/* Set DMA mode */
-		frob_econtrol (port, 1<<3, 1<<3);
+		frob_econtrol(port, 1<<3, 1<<3);
 
 		/* Clear serviceIntr */
-		frob_econtrol (port, 1<<2, 0);
+		frob_econtrol(port, 1<<2, 0);
 
 		enable_dma(port->dma);
 		release_dma_lock(dmaflag);
@@ -676,20 +682,22 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 		/* assume DMA will be successful */
 		left -= count;
 		buf  += count;
-		if (dma_handle) dma_addr += count;
+		if (dma_handle)
+			dma_addr += count;
 
 		/* Wait for interrupt. */
-	false_alarm:
-		ret = parport_wait_event (port, HZ);
-		if (ret < 0) break;
+false_alarm:
+		ret = parport_wait_event(port, HZ);
+		if (ret < 0)
+			break;
 		ret = 0;
-		if (!time_before (jiffies, expire)) {
+		if (!time_before(jiffies, expire)) {
 			/* Timed out. */
-			printk (KERN_DEBUG "DMA write timed out\n");
+			printk(KERN_DEBUG "DMA write timed out\n");
 			break;
 		}
 		/* Is serviceIntr set? */
-		if (!(inb (ECONTROL (port)) & (1<<2))) {
+		if (!(inb(ECONTROL(port)) & (1<<2))) {
 			cond_resched();
 
 			goto false_alarm;
@@ -705,14 +713,15 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 
 		/* Anyone else waiting for the port? */
 		if (port->waithead) {
-			printk (KERN_DEBUG "Somebody wants the port\n");
+			printk(KERN_DEBUG "Somebody wants the port\n");
 			break;
 		}
 
 		/* update for possible DMA residue ! */
 		buf  -= count;
 		left += count;
-		if (dma_handle) dma_addr -= count;
+		if (dma_handle)
+			dma_addr -= count;
 	}
 
 	/* Maybe got here through break, so adjust for DMA residue! */
@@ -723,12 +732,12 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 	release_dma_lock(dmaflag);
 
 	/* Turn off DMA mode */
-	frob_econtrol (port, 1<<3, 0);
+	frob_econtrol(port, 1<<3, 0);
 
 	if (dma_handle)
 		dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE);
 
-dump_parport_state ("leave fifo_write_block_dma", port);
+dump_parport_state("leave fifo_write_block_dma", port);
 	return length - left;
 }
 #endif
@@ -738,13 +747,13 @@ static inline size_t parport_pc_fifo_write_block(struct parport *port,
 {
 #ifdef HAS_DMA
 	if (port->dma != PARPORT_DMA_NONE)
-		return parport_pc_fifo_write_block_dma (port, buf, length);
+		return parport_pc_fifo_write_block_dma(port, buf, length);
 #endif
-	return parport_pc_fifo_write_block_pio (port, buf, length);
+	return parport_pc_fifo_write_block_pio(port, buf, length);
 }
 
 /* Parallel Port FIFO mode (ECP chipsets) */
-static size_t parport_pc_compat_write_block_pio (struct parport *port,
+static size_t parport_pc_compat_write_block_pio(struct parport *port,
 						 const void *buf, size_t length,
 						 int flags)
 {
@@ -756,14 +765,16 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_write_compat (port, buf,
+		return parport_ieee1284_write_compat(port, buf,
 						      length, flags);
 
 	/* Set up parallel port FIFO mode.*/
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
-	parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0);
-	r = change_mode (port, ECR_PPF); /* Parallel port FIFO */
-	if (r)  printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name);
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
+	parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0);
+	r = change_mode(port, ECR_PPF); /* Parallel port FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n",
+								port->name);
 
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
@@ -775,40 +786,39 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
 	 * the FIFO is empty, so allow 4 seconds for each position
 	 * in the fifo.
 	 */
-        expire = jiffies + (priv->fifo_depth * HZ * 4);
+	expire = jiffies + (priv->fifo_depth * HZ * 4);
 	do {
 		/* Wait for the FIFO to empty */
-		r = change_mode (port, ECR_PS2);
-		if (r != -EBUSY) {
+		r = change_mode(port, ECR_PS2);
+		if (r != -EBUSY)
 			break;
-		}
-	} while (time_before (jiffies, expire));
+	} while (time_before(jiffies, expire));
 	if (r == -EBUSY) {
 
-		printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+		printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
 		/* Prevent further data transfer. */
-		frob_set_mode (port, ECR_TST);
+		frob_set_mode(port, ECR_TST);
 
 		/* Adjust for the contents of the FIFO. */
 		for (written -= priv->fifo_depth; ; written++) {
-			if (inb (ECONTROL (port)) & 0x2) {
+			if (inb(ECONTROL(port)) & 0x2) {
 				/* Full up. */
 				break;
 			}
-			outb (0, FIFO (port));
+			outb(0, FIFO(port));
 		}
 
 		/* Reset the FIFO and return to PS2 mode. */
-		frob_set_mode (port, ECR_PS2);
+		frob_set_mode(port, ECR_PS2);
 	}
 
-	r = parport_wait_peripheral (port,
+	r = parport_wait_peripheral(port,
 				     PARPORT_STATUS_BUSY,
 				     PARPORT_STATUS_BUSY);
 	if (r)
-		printk (KERN_DEBUG
-			"%s: BUSY timeout (%d) in compat_write_block_pio\n", 
+		printk(KERN_DEBUG
+			"%s: BUSY timeout (%d) in compat_write_block_pio\n",
 			port->name, r);
 
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
@@ -818,7 +828,7 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
 
 /* ECP */
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_ecp_write_block_pio (struct parport *port,
+static size_t parport_pc_ecp_write_block_pio(struct parport *port,
 					      const void *buf, size_t length,
 					      int flags)
 {
@@ -830,36 +840,38 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_ecp_write_data (port, buf,
+		return parport_ieee1284_ecp_write_data(port, buf,
 							length, flags);
 
 	/* Switch to forward mode if necessary. */
 	if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) {
 		/* Event 47: Set nInit high. */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT
 				      | PARPORT_CONTROL_AUTOFD,
 				      PARPORT_CONTROL_INIT
 				      | PARPORT_CONTROL_AUTOFD);
 
 		/* Event 49: PError goes high. */
-		r = parport_wait_peripheral (port,
+		r = parport_wait_peripheral(port,
 					     PARPORT_STATUS_PAPEROUT,
 					     PARPORT_STATUS_PAPEROUT);
 		if (r) {
-			printk (KERN_DEBUG "%s: PError timeout (%d) "
+			printk(KERN_DEBUG "%s: PError timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 		}
 	}
 
 	/* Set up ECP parallel port mode.*/
-	parport_pc_data_forward (port); /* Must be in PS2 mode */
-	parport_pc_frob_control (port,
+	parport_pc_data_forward(port); /* Must be in PS2 mode */
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_STROBE |
 				 PARPORT_CONTROL_AUTOFD,
 				 0);
-	r = change_mode (port, ECR_ECP); /* ECP FIFO */
-	if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+	r = change_mode(port, ECR_ECP); /* ECP FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+								port->name);
 	port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
 	/* Write the data to the FIFO. */
@@ -873,55 +885,54 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
 	expire = jiffies + (priv->fifo_depth * (HZ * 4));
 	do {
 		/* Wait for the FIFO to empty */
-		r = change_mode (port, ECR_PS2);
-		if (r != -EBUSY) {
+		r = change_mode(port, ECR_PS2);
+		if (r != -EBUSY)
 			break;
-		}
-	} while (time_before (jiffies, expire));
+	} while (time_before(jiffies, expire));
 	if (r == -EBUSY) {
 
-		printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+		printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
 		/* Prevent further data transfer. */
-		frob_set_mode (port, ECR_TST);
+		frob_set_mode(port, ECR_TST);
 
 		/* Adjust for the contents of the FIFO. */
 		for (written -= priv->fifo_depth; ; written++) {
-			if (inb (ECONTROL (port)) & 0x2) {
+			if (inb(ECONTROL(port)) & 0x2) {
 				/* Full up. */
 				break;
 			}
-			outb (0, FIFO (port));
+			outb(0, FIFO(port));
 		}
 
 		/* Reset the FIFO and return to PS2 mode. */
-		frob_set_mode (port, ECR_PS2);
+		frob_set_mode(port, ECR_PS2);
 
 		/* Host transfer recovery. */
-		parport_pc_data_reverse (port); /* Must be in PS2 mode */
-		udelay (5);
-		parport_frob_control (port, PARPORT_CONTROL_INIT, 0);
-		r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
+		parport_pc_data_reverse(port); /* Must be in PS2 mode */
+		udelay(5);
+		parport_frob_control(port, PARPORT_CONTROL_INIT, 0);
+		r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
 		if (r)
-			printk (KERN_DEBUG "%s: PE,1 timeout (%d) "
+			printk(KERN_DEBUG "%s: PE,1 timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT,
 				      PARPORT_CONTROL_INIT);
-		r = parport_wait_peripheral (port,
+		r = parport_wait_peripheral(port,
 					     PARPORT_STATUS_PAPEROUT,
 					     PARPORT_STATUS_PAPEROUT);
-                if (r)
-                        printk (KERN_DEBUG "%s: PE,2 timeout (%d) "
+		if (r)
+			printk(KERN_DEBUG "%s: PE,2 timeout (%d) "
 				"in ecp_write_block_pio\n", port->name, r);
 	}
 
-	r = parport_wait_peripheral (port,
-				     PARPORT_STATUS_BUSY, 
+	r = parport_wait_peripheral(port,
+				     PARPORT_STATUS_BUSY,
 				     PARPORT_STATUS_BUSY);
-	if(r)
-		printk (KERN_DEBUG
+	if (r)
+		printk(KERN_DEBUG
 			"%s: BUSY timeout (%d) in ecp_write_block_pio\n",
 			port->name, r);
 
@@ -931,7 +942,7 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
 }
 
 #if 0
-static size_t parport_pc_ecp_read_block_pio (struct parport *port,
+static size_t parport_pc_ecp_read_block_pio(struct parport *port,
 					     void *buf, size_t length,
 					     int flags)
 {
@@ -944,13 +955,13 @@ static size_t parport_pc_ecp_read_block_pio (struct parport *port,
 	char *bufp = buf;
 
 	port = port->physport;
-DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
-dump_parport_state ("enter fcn", port);
+DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
+dump_parport_state("enter fcn", port);
 
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
 	if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-		return parport_ieee1284_ecp_read_data (port, buf,
+		return parport_ieee1284_ecp_read_data(port, buf,
 						       length, flags);
 
 	if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) {
@@ -966,173 +977,177 @@ dump_parport_state ("enter fcn", port);
 	 * go through software emulation.  Otherwise we may have to throw
 	 * away data. */
 	if (length < fifofull)
-		return parport_ieee1284_ecp_read_data (port, buf,
+		return parport_ieee1284_ecp_read_data(port, buf,
 						       length, flags);
 
 	if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) {
 		/* change to reverse-idle phase (must be in forward-idle) */
 
 		/* Event 38: Set nAutoFd low (also make sure nStrobe is high) */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_AUTOFD
 				      | PARPORT_CONTROL_STROBE,
 				      PARPORT_CONTROL_AUTOFD);
-		parport_pc_data_reverse (port); /* Must be in PS2 mode */
-		udelay (5);
+		parport_pc_data_reverse(port); /* Must be in PS2 mode */
+		udelay(5);
 		/* Event 39: Set nInit low to initiate bus reversal */
-		parport_frob_control (port,
+		parport_frob_control(port,
 				      PARPORT_CONTROL_INIT,
 				      0);
 		/* Event 40: Wait for  nAckReverse (PError) to go low */
-		r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
-                if (r) {
-                        printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) "
+		r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
+		if (r) {
+			printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) "
 				"in ecp_read_block_pio\n", port->name, r);
 			return 0;
 		}
 	}
 
 	/* Set up ECP FIFO mode.*/
-/*	parport_pc_frob_control (port,
+/*	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_STROBE |
 				 PARPORT_CONTROL_AUTOFD,
 				 PARPORT_CONTROL_AUTOFD); */
-	r = change_mode (port, ECR_ECP); /* ECP FIFO */
-	if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+	r = change_mode(port, ECR_ECP); /* ECP FIFO */
+	if (r)
+		printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+								port->name);
 
 	port->ieee1284.phase = IEEE1284_PH_REV_DATA;
 
 	/* the first byte must be collected manually */
-dump_parport_state ("pre 43", port);
+	dump_parport_state("pre 43", port);
 	/* Event 43: Wait for nAck to go low */
-	r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0);
+	r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0);
 	if (r) {
 		/* timed out while reading -- no data */
-		printk (KERN_DEBUG "PIO read timed out (initial byte)\n");
+		printk(KERN_DEBUG "PIO read timed out (initial byte)\n");
 		goto out_no_data;
 	}
 	/* read byte */
-	*bufp++ = inb (DATA (port));
+	*bufp++ = inb(DATA(port));
 	left--;
-dump_parport_state ("43-44", port);
+	dump_parport_state("43-44", port);
 	/* Event 44: nAutoFd (HostAck) goes high to acknowledge */
-	parport_pc_frob_control (port,
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_AUTOFD,
 				 0);
-dump_parport_state ("pre 45", port);
+	dump_parport_state("pre 45", port);
 	/* Event 45: Wait for nAck to go high */
-/*	r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */
-dump_parport_state ("post 45", port);
-r = 0;
+	/* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK,
+						PARPORT_STATUS_ACK); */
+	dump_parport_state("post 45", port);
+	r = 0;
 	if (r) {
 		/* timed out while waiting for peripheral to respond to ack */
-		printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
+		printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
 
 		/* keep hold of the byte we've got already */
 		goto out_no_data;
 	}
 	/* Event 46: nAutoFd (HostAck) goes low to accept more data */
-	parport_pc_frob_control (port,
+	parport_pc_frob_control(port,
 				 PARPORT_CONTROL_AUTOFD,
 				 PARPORT_CONTROL_AUTOFD);
 
 
-dump_parport_state ("rev idle", port);
+	dump_parport_state("rev idle", port);
 	/* Do the transfer. */
 	while (left > fifofull) {
 		int ret;
 		unsigned long expire = jiffies + port->cad->timeout;
-		unsigned char ecrval = inb (ECONTROL (port));
+		unsigned char ecrval = inb(ECONTROL(port));
 
-		if (need_resched() && time_before (jiffies, expire))
+		if (need_resched() && time_before(jiffies, expire))
 			/* Can't yield the port. */
-			schedule ();
+			schedule();
 
 		/* At this point, the FIFO may already be full. In
-                 * that case ECP is already holding back the
-                 * peripheral (assuming proper design) with a delayed
-                 * handshake.  Work fast to avoid a peripheral
-                 * timeout.  */
+		 * that case ECP is already holding back the
+		 * peripheral (assuming proper design) with a delayed
+		 * handshake.  Work fast to avoid a peripheral
+		 * timeout.  */
 
 		if (ecrval & 0x01) {
 			/* FIFO is empty. Wait for interrupt. */
-dump_parport_state ("FIFO empty", port);
+			dump_parport_state("FIFO empty", port);
 
 			/* Anyone else waiting for the port? */
 			if (port->waithead) {
-				printk (KERN_DEBUG "Somebody wants the port\n");
+				printk(KERN_DEBUG "Somebody wants the port\n");
 				break;
 			}
 
 			/* Clear serviceIntr */
-			ECR_WRITE (port, ecrval & ~(1<<2));
-		false_alarm:
-dump_parport_state ("waiting", port);
-			ret = parport_wait_event (port, HZ);
-DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret);
+			ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+			dump_parport_state("waiting", port);
+			ret = parport_wait_event(port, HZ);
+			DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n",
+									ret);
 			if (ret < 0)
 				break;
 			ret = 0;
-			if (!time_before (jiffies, expire)) {
+			if (!time_before(jiffies, expire)) {
 				/* Timed out. */
-dump_parport_state ("timeout", port);
-				printk (KERN_DEBUG "PIO read timed out\n");
+				dump_parport_state("timeout", port);
+				printk(KERN_DEBUG "PIO read timed out\n");
 				break;
 			}
-			ecrval = inb (ECONTROL (port));
+			ecrval = inb(ECONTROL(port));
 			if (!(ecrval & (1<<2))) {
 				if (need_resched() &&
-				    time_before (jiffies, expire)) {
-					schedule ();
+				    time_before(jiffies, expire)) {
+					schedule();
 				}
 				goto false_alarm;
 			}
 
 			/* Depending on how the FIFO threshold was
-                         * set, how long interrupt service took, and
-                         * how fast the peripheral is, we might be
-                         * lucky and have a just filled FIFO. */
+			 * set, how long interrupt service took, and
+			 * how fast the peripheral is, we might be
+			 * lucky and have a just filled FIFO. */
 			continue;
 		}
 
 		if (ecrval & 0x02) {
 			/* FIFO is full. */
-dump_parport_state ("FIFO full", port);
-			insb (fifo, bufp, fifo_depth);
+dump_parport_state("FIFO full", port);
+			insb(fifo, bufp, fifo_depth);
 			bufp += fifo_depth;
 			left -= fifo_depth;
 			continue;
 		}
 
-DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
+DPRINTK(KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
 
 		/* FIFO not filled.  We will cycle this loop for a while
-                 * and either the peripheral will fill it faster,
-                 * tripping a fast empty with insb, or we empty it. */
-		*bufp++ = inb (fifo);
+		 * and either the peripheral will fill it faster,
+		 * tripping a fast empty with insb, or we empty it. */
+		*bufp++ = inb(fifo);
 		left--;
 	}
 
 	/* scoop up anything left in the FIFO */
-	while (left && !(inb (ECONTROL (port) & 0x01))) {
-		*bufp++ = inb (fifo);
+	while (left && !(inb(ECONTROL(port) & 0x01))) {
+		*bufp++ = inb(fifo);
 		left--;
 	}
 
 	port->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-dump_parport_state ("rev idle2", port);
+dump_parport_state("rev idle2", port);
 
 out_no_data:
 
 	/* Go to forward idle mode to shut the peripheral up (event 47). */
-	parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
+	parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
 
 	/* event 49: PError goes high */
-	r = parport_wait_peripheral (port,
+	r = parport_wait_peripheral(port,
 				     PARPORT_STATUS_PAPEROUT,
 				     PARPORT_STATUS_PAPEROUT);
 	if (r) {
-		printk (KERN_DEBUG
+		printk(KERN_DEBUG
 			"%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n",
 			port->name, r);
 	}
@@ -1141,14 +1156,14 @@ out_no_data:
 
 	/* Finish up. */
 	{
-		int lost = get_fifo_residue (port);
+		int lost = get_fifo_residue(port);
 		if (lost)
 			/* Shouldn't happen with compliant peripherals. */
-			printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
+			printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
 				port->name, lost);
 	}
 
-dump_parport_state ("fwd idle", port);
+dump_parport_state("fwd idle", port);
 	return length - left;
 }
 #endif  /*  0  */
@@ -1164,8 +1179,7 @@ dump_parport_state ("fwd idle", port);
 
 /* GCC is not inlining extern inline function later overwriten to non-inline,
    so we use outlined_ variants here.  */
-static const struct parport_operations parport_pc_ops =
-{
+static const struct parport_operations parport_pc_ops = {
 	.write_data	= parport_pc_write_data,
 	.read_data	= parport_pc_read_data,
 
@@ -1205,46 +1219,52 @@ static const struct parport_operations parport_pc_ops =
 /* Super-IO chipset detection, Winbond, SMSC */
 static void __devinit show_parconfig_smsc37c669(int io, int key)
 {
-	int cr1,cr4,cra,cr23,cr26,cr27,i=0;
-	static const char *const modes[]={
+	int cr1, cr4, cra, cr23, cr26, cr27, i = 0;
+	static const char *const modes[] = {
 		"SPP and Bidirectional (PS/2)",
 		"EPP and SPP",
 		"ECP",
 		"ECP and EPP" };
 
-	outb(key,io);
-	outb(key,io);
-	outb(1,io);
-	cr1=inb(io+1);
-	outb(4,io);
-	cr4=inb(io+1);
-	outb(0x0a,io);
-	cra=inb(io+1);
-	outb(0x23,io);
-	cr23=inb(io+1);
-	outb(0x26,io);
-	cr26=inb(io+1);
-	outb(0x27,io);
-	cr27=inb(io+1);
-	outb(0xaa,io);
+	outb(key, io);
+	outb(key, io);
+	outb(1, io);
+	cr1 = inb(io + 1);
+	outb(4, io);
+	cr4 = inb(io + 1);
+	outb(0x0a, io);
+	cra = inb(io + 1);
+	outb(0x23, io);
+	cr23 = inb(io + 1);
+	outb(0x26, io);
+	cr26 = inb(io + 1);
+	outb(0x27, io);
+	cr27 = inb(io + 1);
+	outb(0xaa, io);
 
 	if (verbose_probing) {
-		printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
+		printk(KERN_INFO
+			"SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
 			"A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n",
-			cr1,cr4,cra,cr23,cr26,cr27);
-		
+			cr1, cr4, cra, cr23, cr26, cr27);
+
 		/* The documentation calls DMA and IRQ-Lines by letters, so
 		   the board maker can/will wire them
 		   appropriately/randomly...  G=reserved H=IDE-irq, */
-		printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, "
-			"fifo threshold=%d\n", cr23*4,
-			(cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-',
-			(cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f);
+		printk(KERN_INFO
+	"SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n",
+				cr23 * 4,
+				(cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-',
+				(cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-',
+				cra & 0x0f);
 		printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n",
-		       (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no");
-		printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n",
-		       (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], 
-		       (cr4 & 0x40) ? "1.7" : "1.9");
+		       (cr23 * 4 >= 0x100) ? "yes" : "no",
+		       (cr1 & 4) ? "yes" : "no");
+		printk(KERN_INFO
+			"SMSC LPT Config: Port mode=%s, EPP version =%s\n",
+				(cr1 & 0x08) ? "Standard mode only (SPP)"
+					      : modes[cr4 & 0x03],
+				(cr4 & 0x40) ? "1.7" : "1.9");
 	}
 
 	/* Heuristics !  BIOS setup for this mainboard device limits
@@ -1258,32 +1278,32 @@ static void __devinit show_parconfig_smsc37c669(int io, int key)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
 		} else {
 			int d;
-			switch (cr23*4) {
-				case 0x3bc:
-					superios[i].io = 0x3bc;
-					superios[i].irq = 7;
-					break;
-				case 0x378:
-					superios[i].io = 0x378;
-					superios[i].irq = 7;
-					break;
-				case 0x278:
-					superios[i].io = 0x278;
-					superios[i].irq = 5;
+			switch (cr23 * 4) {
+			case 0x3bc:
+				superios[i].io = 0x3bc;
+				superios[i].irq = 7;
+				break;
+			case 0x378:
+				superios[i].io = 0x378;
+				superios[i].irq = 7;
+				break;
+			case 0x278:
+				superios[i].io = 0x278;
+				superios[i].irq = 5;
 			}
-			d=(cr26 &0x0f);
-			if((d==1) || (d==3)) 
-				superios[i].dma= d;
+			d = (cr26 & 0x0f);
+			if (d == 1 || d == 3)
+				superios[i].dma = d;
 			else
-				superios[i].dma= PARPORT_DMA_NONE;
+				superios[i].dma = PARPORT_DMA_NONE;
 		}
- 	}
+	}
 }
 
 
 static void __devinit show_parconfig_winbond(int io, int key)
 {
-	int cr30,cr60,cr61,cr70,cr74,crf0,i=0;
+	int cr30, cr60, cr61, cr70, cr74, crf0, i = 0;
 	static const char *const modes[] = {
 		"Standard (SPP) and Bidirectional(PS/2)", /* 0 */
 		"EPP-1.9 and SPP",
@@ -1296,40 +1316,43 @@ static void __devinit show_parconfig_winbond(int io, int key)
 	static char *const irqtypes[] = {
 		"pulsed low, high-Z",
 		"follows nACK" };
-		
+
 	/* The registers are called compatible-PnP because the
-           register layout is modelled after ISA-PnP, the access
-           method is just another ... */
-	outb(key,io);
-	outb(key,io);
-	outb(0x07,io);   /* Register 7: Select Logical Device */
-	outb(0x01,io+1); /* LD1 is Parallel Port */
-	outb(0x30,io);
-	cr30=inb(io+1);
-	outb(0x60,io);
-	cr60=inb(io+1);
-	outb(0x61,io);
-	cr61=inb(io+1);
-	outb(0x70,io);
-	cr70=inb(io+1);
-	outb(0x74,io);
-	cr74=inb(io+1);
-	outb(0xf0,io);
-	crf0=inb(io+1);
-	outb(0xaa,io);
+	   register layout is modelled after ISA-PnP, the access
+	   method is just another ... */
+	outb(key, io);
+	outb(key, io);
+	outb(0x07, io);   /* Register 7: Select Logical Device */
+	outb(0x01, io + 1); /* LD1 is Parallel Port */
+	outb(0x30, io);
+	cr30 = inb(io + 1);
+	outb(0x60, io);
+	cr60 = inb(io + 1);
+	outb(0x61, io);
+	cr61 = inb(io + 1);
+	outb(0x70, io);
+	cr70 = inb(io + 1);
+	outb(0x74, io);
+	cr74 = inb(io + 1);
+	outb(0xf0, io);
+	crf0 = inb(io + 1);
+	outb(0xaa, io);
 
 	if (verbose_probing) {
-		printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x "
-		       "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0);
-		printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", 
-		       (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f );
+		printk(KERN_INFO
+    "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n",
+					cr30, cr60, cr61, cr70, cr74, crf0);
+		printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ",
+		       (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f);
 		if ((cr74 & 0x07) > 3)
 			printk("dma=none\n");
 		else
-			printk("dma=%d\n",cr74 & 0x07);
-		printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
-		       irqtypes[crf0>>7], (crf0>>3)&0x0f);
-		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]);
+			printk("dma=%d\n", cr74 & 0x07);
+		printk(KERN_INFO
+		    "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
+					irqtypes[crf0>>7], (crf0>>3)&0x0f);
+		printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n",
+					modes[crf0 & 0x07]);
 	}
 
 	if (cr30 & 0x01) { /* the settings can be interrogated later ... */
@@ -1346,60 +1369,82 @@ static void __devinit show_parconfig_winbond(int io, int key)
 	}
 }
 
-static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid)
+static void __devinit decode_winbond(int efer, int key, int devid,
+							int devrev, int oldid)
 {
 	const char *type = "unknown";
-	int id,progif=2;
+	int id, progif = 2;
 
 	if (devid == devrev)
 		/* simple heuristics, we happened to read some
-                   non-winbond register */
+		   non-winbond register */
 		return;
 
-	id=(devid<<8) | devrev;
+	id = (devid << 8) | devrev;
 
 	/* Values are from public data sheets pdf files, I can just
-           confirm 83977TF is correct :-) */
-	if      (id == 0x9771) type="83977F/AF";
-	else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x";
-	else if (id == 0x9774) type="83977ATF";
-	else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x";
-	else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x";
-	else if ((id & ~0x0f) == 0x5210) type="83627";
-	else if ((id & ~0x0f) == 0x6010) type="83697HF";
-	else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;}
-	else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;}
-	else progif=0;
+	   confirm 83977TF is correct :-) */
+	if (id == 0x9771)
+		type = "83977F/AF";
+	else if (id == 0x9773)
+		type = "83977TF / SMSC 97w33x/97w34x";
+	else if (id == 0x9774)
+		type = "83977ATF";
+	else if ((id & ~0x0f) == 0x5270)
+		type = "83977CTF / SMSC 97w36x";
+	else if ((id & ~0x0f) == 0x52f0)
+		type = "83977EF / SMSC 97w35x";
+	else if ((id & ~0x0f) == 0x5210)
+		type = "83627";
+	else if ((id & ~0x0f) == 0x6010)
+		type = "83697HF";
+	else if ((oldid & 0x0f) == 0x0a) {
+		type = "83877F";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0b) {
+		type = "83877AF";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0c) {
+		type = "83877TF";
+		progif = 1;
+	} else if ((oldid & 0x0f) == 0x0d) {
+		type = "83877ATF";
+		progif = 1;
+	} else
+		progif = 0;
 
 	if (verbose_probing)
 		printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x "
-		       "devid=%02x devrev=%02x oldid=%02x type=%s\n", 
+		       "devid=%02x devrev=%02x oldid=%02x type=%s\n",
 		       efer, key, devid, devrev, oldid, type);
 
 	if (progif == 2)
-		show_parconfig_winbond(efer,key);
+		show_parconfig_winbond(efer, key);
 }
 
 static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
 {
-        const char *type = "unknown";
+	const char *type = "unknown";
 	void (*func)(int io, int key);
-        int id;
+	int id;
 
-        if (devid == devrev)
+	if (devid == devrev)
 		/* simple heuristics, we happened to read some
-                   non-smsc register */
+		   non-smsc register */
 		return;
 
-	func=NULL;
-        id=(devid<<8) | devrev;
+	func = NULL;
+	id = (devid << 8) | devrev;
 
-	if	(id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;}
-	else if	(id==0x6582) type="37c665IR";
-	else if	(devid==0x65) type="37c665GT";
-	else if	(devid==0x66) type="37c666GT";
+	if (id == 0x0302) {
+		type = "37c669";
+		func = show_parconfig_smsc37c669;
+	} else if (id == 0x6582)
+		type = "37c665IR";
+	else if	(devid == 0x65)
+		type = "37c665GT";
+	else if	(devid == 0x66)
+		type = "37c666GT";
 
 	if (verbose_probing)
 		printk(KERN_INFO "SMSC chip at EFER=0x%x "
@@ -1407,138 +1452,138 @@ static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
 		       efer, key, devid, devrev, type);
 
 	if (func)
-		func(efer,key);
+		func(efer, key);
 }
 
 
 static void __devinit winbond_check(int io, int key)
 {
-	int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+	int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without key */
-	outb(0x20,io);
-	x_devid=inb(io+1);
-	outb(0x21,io);
-	x_devrev=inb(io+1);
-	outb(0x09,io);
-	x_oldid=inb(io+1);
-
-	outb(key,io);
-	outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-	outb(0x20,io);    /* Write EFIR, extended function index register */
-	devid=inb(io+1);  /* Read EFDR, extended function data register */
-	outb(0x21,io);
-	devrev=inb(io+1);
-	outb(0x09,io);
-	oldid=inb(io+1);
-	outb(0xaa,io);    /* Magic Seal */
+	outb(0x20, io);
+	x_devid = inb(io + 1);
+	outb(0x21, io);
+	x_devrev = inb(io + 1);
+	outb(0x09, io);
+	x_oldid = inb(io + 1);
+
+	outb(key, io);
+	outb(key, io);     /* Write Magic Sequence to EFER, extended
+			      funtion enable register */
+	outb(0x20, io);    /* Write EFIR, extended function index register */
+	devid = inb(io + 1);  /* Read EFDR, extended function data register */
+	outb(0x21, io);
+	devrev = inb(io + 1);
+	outb(0x09, io);
+	oldid = inb(io + 1);
+	outb(0xaa, io);    /* Magic Seal */
 
 	if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
 		goto out; /* protection against false positives */
 
-	decode_winbond(io,key,devid,devrev,oldid);
+	decode_winbond(io, key, devid, devrev, oldid);
 out:
 	release_region(io, 3);
 }
 
-static void __devinit winbond_check2(int io,int key)
+static void __devinit winbond_check2(int io, int key)
 {
-        int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+	int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without the key */
-	outb(0x20,io+2);
-	x_devid=inb(io+2);
-	outb(0x21,io+1);
-	x_devrev=inb(io+2);
-	outb(0x09,io+1);
-	x_oldid=inb(io+2);
-
-        outb(key,io);     /* Write Magic Byte to EFER, extended
-                             funtion enable register */
-        outb(0x20,io+2);  /* Write EFIR, extended function index register */
-        devid=inb(io+2);  /* Read EFDR, extended function data register */
-        outb(0x21,io+1);
-        devrev=inb(io+2);
-        outb(0x09,io+1);
-        oldid=inb(io+2);
-        outb(0xaa,io);    /* Magic Seal */
-
-	if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
+	outb(0x20, io + 2);
+	x_devid = inb(io + 2);
+	outb(0x21, io + 1);
+	x_devrev = inb(io + 2);
+	outb(0x09, io + 1);
+	x_oldid = inb(io + 2);
+
+	outb(key, io);     /* Write Magic Byte to EFER, extended
+			      funtion enable register */
+	outb(0x20, io + 2);  /* Write EFIR, extended function index register */
+	devid = inb(io + 2);  /* Read EFDR, extended function data register */
+	outb(0x21, io + 1);
+	devrev = inb(io + 2);
+	outb(0x09, io + 1);
+	oldid = inb(io + 2);
+	outb(0xaa, io);    /* Magic Seal */
+
+	if (x_devid == devid && x_devrev == devrev && x_oldid == oldid)
 		goto out; /* protection against false positives */
 
-	decode_winbond(io,key,devid,devrev,oldid);
+	decode_winbond(io, key, devid, devrev, oldid);
 out:
 	release_region(io, 3);
 }
 
 static void __devinit smsc_check(int io, int key)
 {
-        int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev;
+	int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev;
 
 	if (!request_region(io, 3, __func__))
 		return;
 
 	/* First probe without the key */
-	outb(0x0d,io);
-	x_oldid=inb(io+1);
-	outb(0x0e,io);
-	x_oldrev=inb(io+1);
-	outb(0x20,io);
-	x_id=inb(io+1);
-	outb(0x21,io);
-	x_rev=inb(io+1);
-
-        outb(key,io);
-        outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-        outb(0x0d,io);    /* Write EFIR, extended function index register */
-        oldid=inb(io+1);  /* Read EFDR, extended function data register */
-        outb(0x0e,io);
-        oldrev=inb(io+1);
-	outb(0x20,io);
-	id=inb(io+1);
-	outb(0x21,io);
-	rev=inb(io+1);
-        outb(0xaa,io);    /* Magic Seal */
-
-	if ((x_id == id) && (x_oldrev == oldrev) &&
-	    (x_oldid == oldid) && (x_rev == rev))
+	outb(0x0d, io);
+	x_oldid = inb(io + 1);
+	outb(0x0e, io);
+	x_oldrev = inb(io + 1);
+	outb(0x20, io);
+	x_id = inb(io + 1);
+	outb(0x21, io);
+	x_rev = inb(io + 1);
+
+	outb(key, io);
+	outb(key, io);     /* Write Magic Sequence to EFER, extended
+			      funtion enable register */
+	outb(0x0d, io);    /* Write EFIR, extended function index register */
+	oldid = inb(io + 1);  /* Read EFDR, extended function data register */
+	outb(0x0e, io);
+	oldrev = inb(io + 1);
+	outb(0x20, io);
+	id = inb(io + 1);
+	outb(0x21, io);
+	rev = inb(io + 1);
+	outb(0xaa, io);    /* Magic Seal */
+
+	if (x_id == id && x_oldrev == oldrev &&
+	    x_oldid == oldid && x_rev == rev)
 		goto out; /* protection against false positives */
 
-        decode_smsc(io,key,oldid,oldrev);
+	decode_smsc(io, key, oldid, oldrev);
 out:
 	release_region(io, 3);
 }
 
 
-static void __devinit detect_and_report_winbond (void)
-{ 
+static void __devinit detect_and_report_winbond(void)
+{
 	if (verbose_probing)
 		printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n");
-	winbond_check(0x3f0,0x87);
-	winbond_check(0x370,0x87);
-	winbond_check(0x2e ,0x87);
-	winbond_check(0x4e ,0x87);
-	winbond_check(0x3f0,0x86);
-	winbond_check2(0x250,0x88); 
-	winbond_check2(0x250,0x89);
+	winbond_check(0x3f0, 0x87);
+	winbond_check(0x370, 0x87);
+	winbond_check(0x2e , 0x87);
+	winbond_check(0x4e , 0x87);
+	winbond_check(0x3f0, 0x86);
+	winbond_check2(0x250, 0x88);
+	winbond_check2(0x250, 0x89);
 }
 
-static void __devinit detect_and_report_smsc (void)
+static void __devinit detect_and_report_smsc(void)
 {
 	if (verbose_probing)
 		printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n");
-	smsc_check(0x3f0,0x55);
-	smsc_check(0x370,0x55);
-	smsc_check(0x3f0,0x44);
-	smsc_check(0x370,0x44);
+	smsc_check(0x3f0, 0x55);
+	smsc_check(0x370, 0x55);
+	smsc_check(0x3f0, 0x44);
+	smsc_check(0x370, 0x44);
 }
 
 static void __devinit detect_and_report_it87(void)
@@ -1573,7 +1618,7 @@ static void __devinit detect_and_report_it87(void)
 }
 #endif /* CONFIG_PARPORT_PC_SUPERIO */
 
-static int get_superio_dma (struct parport *p)
+static int get_superio_dma(struct parport *p)
 {
 	int i = 0;
 
@@ -1584,15 +1629,15 @@ static int get_superio_dma (struct parport *p)
 	return PARPORT_DMA_NONE;
 }
 
-static int get_superio_irq (struct parport *p)
+static int get_superio_irq(struct parport *p)
 {
 	int i = 0;
 
-        while ((i < NR_SUPERIOS) && (superios[i].io != p->base))
-                i++;
-        if (i != NR_SUPERIOS)
-                return superios[i].irq;
-        return PARPORT_IRQ_NONE;
+	while ((i < NR_SUPERIOS) && (superios[i].io != p->base))
+		i++;
+	if (i != NR_SUPERIOS)
+		return superios[i].irq;
+	return PARPORT_IRQ_NONE;
 }
 
 
@@ -1600,9 +1645,9 @@ static int get_superio_irq (struct parport *p)
 
 /*
  * Checks for port existence, all ports support SPP MODE
- * Returns: 
+ * Returns:
  *         0           :  No parallel port at this address
- *  PARPORT_MODE_PCSPP :  SPP port detected 
+ *  PARPORT_MODE_PCSPP :  SPP port detected
  *                        (if the user specified an ioport himself,
  *                         this shall always be the case!)
  *
@@ -1612,7 +1657,7 @@ static int parport_SPP_supported(struct parport *pb)
 	unsigned char r, w;
 
 	/*
-	 * first clear an eventually pending EPP timeout 
+	 * first clear an eventually pending EPP timeout
 	 * I (sailer@ife.ee.ethz.ch) have an SMSC chipset
 	 * that does not even respond to SPP cycles if an EPP
 	 * timeout is pending
@@ -1621,19 +1666,19 @@ static int parport_SPP_supported(struct parport *pb)
 
 	/* Do a simple read-write test to make sure the port exists. */
 	w = 0xc;
-	outb (w, CONTROL (pb));
+	outb(w, CONTROL(pb));
 
 	/* Is there a control register that we can read from?  Some
 	 * ports don't allow reads, so read_control just returns a
 	 * software copy. Some ports _do_ allow reads, so bypass the
 	 * software copy here.  In addition, some bits aren't
 	 * writable. */
-	r = inb (CONTROL (pb));
+	r = inb(CONTROL(pb));
 	if ((r & 0xf) == w) {
 		w = 0xe;
-		outb (w, CONTROL (pb));
-		r = inb (CONTROL (pb));
-		outb (0xc, CONTROL (pb));
+		outb(w, CONTROL(pb));
+		r = inb(CONTROL(pb));
+		outb(0xc, CONTROL(pb));
 		if ((r & 0xf) == w)
 			return PARPORT_MODE_PCSPP;
 	}
@@ -1641,18 +1686,18 @@ static int parport_SPP_supported(struct parport *pb)
 	if (user_specified)
 		/* That didn't work, but the user thinks there's a
 		 * port here. */
-		printk (KERN_INFO "parport 0x%lx (WARNING): CTR: "
+		printk(KERN_INFO "parport 0x%lx (WARNING): CTR: "
 			"wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
 
 	/* Try the data register.  The data lines aren't tri-stated at
 	 * this stage, so we expect back what we wrote. */
 	w = 0xaa;
-	parport_pc_write_data (pb, w);
-	r = parport_pc_read_data (pb);
+	parport_pc_write_data(pb, w);
+	r = parport_pc_read_data(pb);
 	if (r == w) {
 		w = 0x55;
-		parport_pc_write_data (pb, w);
-		r = parport_pc_read_data (pb);
+		parport_pc_write_data(pb, w);
+		r = parport_pc_read_data(pb);
 		if (r == w)
 			return PARPORT_MODE_PCSPP;
 	}
@@ -1660,9 +1705,9 @@ static int parport_SPP_supported(struct parport *pb)
 	if (user_specified) {
 		/* Didn't work, but the user is convinced this is the
 		 * place. */
-		printk (KERN_INFO "parport 0x%lx (WARNING): DATA: "
+		printk(KERN_INFO "parport 0x%lx (WARNING): DATA: "
 			"wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
-		printk (KERN_INFO "parport 0x%lx: You gave this address, "
+		printk(KERN_INFO "parport 0x%lx: You gave this address, "
 			"but there is probably no parallel port there!\n",
 			pb->base);
 	}
@@ -1693,33 +1738,33 @@ static int parport_ECR_present(struct parport *pb)
 	struct parport_pc_private *priv = pb->private_data;
 	unsigned char r = 0xc;
 
-	outb (r, CONTROL (pb));
-	if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) {
-		outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */
+	outb(r, CONTROL(pb));
+	if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) {
+		outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */
 
-		r = inb (CONTROL (pb));
-		if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2))
+		r = inb(CONTROL(pb));
+		if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2))
 			goto no_reg; /* Sure that no ECR register exists */
 	}
-	
-	if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1)
+
+	if ((inb(ECONTROL(pb)) & 0x3) != 0x1)
 		goto no_reg;
 
-	ECR_WRITE (pb, 0x34);
-	if (inb (ECONTROL (pb)) != 0x35)
+	ECR_WRITE(pb, 0x34);
+	if (inb(ECONTROL(pb)) != 0x35)
 		goto no_reg;
 
 	priv->ecr = 1;
-	outb (0xc, CONTROL (pb));
-	
+	outb(0xc, CONTROL(pb));
+
 	/* Go to mode 000 */
-	frob_set_mode (pb, ECR_SPP);
+	frob_set_mode(pb, ECR_SPP);
 
 	return 1;
 
  no_reg:
-	outb (0xc, CONTROL (pb));
-	return 0; 
+	outb(0xc, CONTROL(pb));
+	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
@@ -1729,7 +1774,7 @@ static int parport_ECR_present(struct parport *pb)
  * allows us to read data from the data lines.  In theory we would get back
  * 0xff but any peripheral attached to the port may drag some or all of the
  * lines down to zero.  So if we get back anything that isn't the contents
- * of the data register we deem PS/2 support to be present. 
+ * of the data register we deem PS/2 support to be present.
  *
  * Some SPP ports have "half PS/2" ability - you can't turn off the line
  * drivers, but an external peripheral with sufficiently beefy drivers of
@@ -1737,26 +1782,28 @@ static int parport_ECR_present(struct parport *pb)
  * where they can then be read back as normal.  Ports with this property
  * and the right type of device attached are likely to fail the SPP test,
  * (as they will appear to have stuck bits) and so the fact that they might
- * be misdetected here is rather academic. 
+ * be misdetected here is rather academic.
  */
 
 static int parport_PS2_supported(struct parport *pb)
 {
 	int ok = 0;
-  
+
 	clear_epp_timeout(pb);
 
 	/* try to tri-state the buffer */
-	parport_pc_data_reverse (pb);
-	
+	parport_pc_data_reverse(pb);
+
 	parport_pc_write_data(pb, 0x55);
-	if (parport_pc_read_data(pb) != 0x55) ok++;
+	if (parport_pc_read_data(pb) != 0x55)
+		ok++;
 
 	parport_pc_write_data(pb, 0xaa);
-	if (parport_pc_read_data(pb) != 0xaa) ok++;
+	if (parport_pc_read_data(pb) != 0xaa)
+		ok++;
 
 	/* cancel input mode */
-	parport_pc_data_forward (pb);
+	parport_pc_data_forward(pb);
 
 	if (ok) {
 		pb->modes |= PARPORT_MODE_TRISTATE;
@@ -1775,68 +1822,68 @@ static int parport_ECP_supported(struct parport *pb)
 	int config, configb;
 	int pword;
 	struct parport_pc_private *priv = pb->private_data;
-	/* Translate ECP intrLine to ISA irq value */	
-	static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; 
+	/* Translate ECP intrLine to ISA irq value */
+	static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 };
 
 	/* If there is no ECR, we have no hope of supporting ECP. */
 	if (!priv->ecr)
 		return 0;
 
 	/* Find out FIFO depth */
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */
-	for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++)
-		outb (0xaa, FIFO (pb));
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */
+	for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++)
+		outb(0xaa, FIFO(pb));
 
 	/*
 	 * Using LGS chipset it uses ECR register, but
 	 * it doesn't support ECP or FIFO MODE
 	 */
 	if (i == 1024) {
-		ECR_WRITE (pb, ECR_SPP << 5);
+		ECR_WRITE(pb, ECR_SPP << 5);
 		return 0;
 	}
 
 	priv->fifo_depth = i;
 	if (verbose_probing)
-		printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
+		printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
 
 	/* Find out writeIntrThreshold */
-	frob_econtrol (pb, 1<<2, 1<<2);
-	frob_econtrol (pb, 1<<2, 0);
+	frob_econtrol(pb, 1<<2, 1<<2);
+	frob_econtrol(pb, 1<<2, 0);
 	for (i = 1; i <= priv->fifo_depth; i++) {
-		inb (FIFO (pb));
-		udelay (50);
-		if (inb (ECONTROL (pb)) & (1<<2))
+		inb(FIFO(pb));
+		udelay(50);
+		if (inb(ECONTROL(pb)) & (1<<2))
 			break;
 	}
 
 	if (i <= priv->fifo_depth) {
 		if (verbose_probing)
-			printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
+			printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
 				pb->base, i);
 	} else
 		/* Number of bytes we know we can write if we get an
-                   interrupt. */
+		   interrupt. */
 		i = 0;
 
 	priv->writeIntrThreshold = i;
 
 	/* Find out readIntrThreshold */
-	frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */
-	parport_pc_data_reverse (pb); /* Must be in PS2 mode */
-	frob_set_mode (pb, ECR_TST); /* Test FIFO */
-	frob_econtrol (pb, 1<<2, 1<<2);
-	frob_econtrol (pb, 1<<2, 0);
+	frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */
+	parport_pc_data_reverse(pb); /* Must be in PS2 mode */
+	frob_set_mode(pb, ECR_TST); /* Test FIFO */
+	frob_econtrol(pb, 1<<2, 1<<2);
+	frob_econtrol(pb, 1<<2, 0);
 	for (i = 1; i <= priv->fifo_depth; i++) {
-		outb (0xaa, FIFO (pb));
-		if (inb (ECONTROL (pb)) & (1<<2))
+		outb(0xaa, FIFO(pb));
+		if (inb(ECONTROL(pb)) & (1<<2))
 			break;
 	}
 
 	if (i <= priv->fifo_depth) {
 		if (verbose_probing)
-			printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n",
+			printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n",
 				pb->base, i);
 	} else
 		/* Number of bytes we can read if we get an interrupt. */
@@ -1844,23 +1891,23 @@ static int parport_ECP_supported(struct parport *pb)
 
 	priv->readIntrThreshold = i;
 
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, 0xf4); /* Configuration mode */
-	config = inb (CONFIGA (pb));
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, 0xf4); /* Configuration mode */
+	config = inb(CONFIGA(pb));
 	pword = (config >> 4) & 0x7;
 	switch (pword) {
 	case 0:
 		pword = 2;
-		printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+		printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
 			pb->base);
 		break;
 	case 2:
 		pword = 4;
-		printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+		printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
 			pb->base);
 		break;
 	default:
-		printk (KERN_WARNING "0x%lx: Unknown implementation ID\n",
+		printk(KERN_WARNING "0x%lx: Unknown implementation ID\n",
 			pb->base);
 		/* Assume 1 */
 	case 1:
@@ -1869,28 +1916,29 @@ static int parport_ECP_supported(struct parport *pb)
 	priv->pword = pword;
 
 	if (verbose_probing) {
-		printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword);
-		
-		printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
+		printk(KERN_DEBUG "0x%lx: PWord is %d bits\n",
+			pb->base, 8 * pword);
+
+		printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
 			config & 0x80 ? "Level" : "Pulses");
 
-		configb = inb (CONFIGB (pb));
-		printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
+		configb = inb(CONFIGB(pb));
+		printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
 			pb->base, config, configb);
-		printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
-		if ((configb >>3) & 0x07)
-			printk("%d",intrline[(configb >>3) & 0x07]);
+		printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
+		if ((configb >> 3) & 0x07)
+			printk("%d", intrline[(configb >> 3) & 0x07]);
 		else
 			printk("<none or set by other means>");
-		printk (" dma=");
-		if( (configb & 0x03 ) == 0x00)
+		printk(" dma=");
+		if ((configb & 0x03) == 0x00)
 			printk("<none or set by other means>\n");
 		else
-			printk("%d\n",configb & 0x07);
+			printk("%d\n", configb & 0x07);
 	}
 
 	/* Go back to mode 000 */
-	frob_set_mode (pb, ECR_SPP);
+	frob_set_mode(pb, ECR_SPP);
 
 	return 1;
 }
@@ -1905,10 +1953,10 @@ static int parport_ECPPS2_supported(struct parport *pb)
 	if (!priv->ecr)
 		return 0;
 
-	oecr = inb (ECONTROL (pb));
-	ECR_WRITE (pb, ECR_PS2 << 5);
+	oecr = inb(ECONTROL(pb));
+	ECR_WRITE(pb, ECR_PS2 << 5);
 	result = parport_PS2_supported(pb);
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 	return result;
 }
 
@@ -1932,16 +1980,15 @@ static int parport_EPP_supported(struct parport *pb)
 	 */
 
 	/* If EPP timeout bit clear then EPP available */
-	if (!clear_epp_timeout(pb)) {
+	if (!clear_epp_timeout(pb))
 		return 0;  /* No way to clear timeout */
-	}
 
 	/* Check for Intel bug. */
 	if (priv->ecr) {
 		unsigned char i;
 		for (i = 0x00; i < 0x80; i += 0x20) {
-			ECR_WRITE (pb, i);
-			if (clear_epp_timeout (pb)) {
+			ECR_WRITE(pb, i);
+			if (clear_epp_timeout(pb)) {
 				/* Phony EPP in ECP. */
 				return 0;
 			}
@@ -1965,17 +2012,16 @@ static int parport_ECPEPP_supported(struct parport *pb)
 	int result;
 	unsigned char oecr;
 
-	if (!priv->ecr) {
+	if (!priv->ecr)
 		return 0;
-	}
 
-	oecr = inb (ECONTROL (pb));
+	oecr = inb(ECONTROL(pb));
 	/* Search for SMC style EPP+ECP mode */
-	ECR_WRITE (pb, 0x80);
-	outb (0x04, CONTROL (pb));
+	ECR_WRITE(pb, 0x80);
+	outb(0x04, CONTROL(pb));
 	result = parport_EPP_supported(pb);
 
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 
 	if (result) {
 		/* Set up access functions to use ECP+EPP hardware. */
@@ -1993,11 +2039,25 @@ static int parport_ECPEPP_supported(struct parport *pb)
 /* Don't bother probing for modes we know we won't use. */
 static int __devinit parport_PS2_supported(struct parport *pb) { return 0; }
 #ifdef CONFIG_PARPORT_PC_FIFO
-static int parport_ECP_supported(struct parport *pb) { return 0; }
+static int parport_ECP_supported(struct parport *pb)
+{
+	return 0;
+}
 #endif
-static int __devinit parport_EPP_supported(struct parport *pb) { return 0; }
-static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;}
-static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
+static int __devinit parport_EPP_supported(struct parport *pb)
+{
+	return 0;
+}
+
+static int __devinit parport_ECPEPP_supported(struct parport *pb)
+{
+	return 0;
+}
+
+static int __devinit parport_ECPPS2_supported(struct parport *pb)
+{
+	return 0;
+}
 
 #endif /* No IEEE 1284 support */
 
@@ -2007,17 +2067,17 @@ static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
 static int programmable_irq_support(struct parport *pb)
 {
 	int irq, intrLine;
-	unsigned char oecr = inb (ECONTROL (pb));
+	unsigned char oecr = inb(ECONTROL(pb));
 	static const int lookup[8] = {
 		PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5
 	};
 
-	ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */
+	ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */
 
-	intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07;
+	intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07;
 	irq = lookup[intrLine];
 
-	ECR_WRITE (pb, oecr);
+	ECR_WRITE(pb, oecr);
 	return irq;
 }
 
@@ -2027,17 +2087,17 @@ static int irq_probe_ECP(struct parport *pb)
 	unsigned long irqs;
 
 	irqs = probe_irq_on();
-		
-	ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-	ECR_WRITE (pb, (ECR_TST << 5) | 0x04);
-	ECR_WRITE (pb, ECR_TST << 5);
+
+	ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+	ECR_WRITE(pb, (ECR_TST << 5) | 0x04);
+	ECR_WRITE(pb, ECR_TST << 5);
 
 	/* If Full FIFO sure that writeIntrThreshold is generated */
-	for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) 
-		outb (0xaa, FIFO (pb));
-		
+	for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++)
+		outb(0xaa, FIFO(pb));
+
 	pb->irq = probe_irq_off(irqs);
-	ECR_WRITE (pb, ECR_SPP << 5);
+	ECR_WRITE(pb, ECR_SPP << 5);
 
 	if (pb->irq <= 0)
 		pb->irq = PARPORT_IRQ_NONE;
@@ -2047,7 +2107,7 @@ static int irq_probe_ECP(struct parport *pb)
 
 /*
  * This detection seems that only works in National Semiconductors
- * This doesn't work in SMC, LGS, and Winbond 
+ * This doesn't work in SMC, LGS, and Winbond
  */
 static int irq_probe_EPP(struct parport *pb)
 {
@@ -2058,16 +2118,16 @@ static int irq_probe_EPP(struct parport *pb)
 	unsigned char oecr;
 
 	if (pb->modes & PARPORT_MODE_PCECR)
-		oecr = inb (ECONTROL (pb));
+		oecr = inb(ECONTROL(pb));
 
 	irqs = probe_irq_on();
 
 	if (pb->modes & PARPORT_MODE_PCECR)
-		frob_econtrol (pb, 0x10, 0x10);
-	
+		frob_econtrol(pb, 0x10, 0x10);
+
 	clear_epp_timeout(pb);
-	parport_pc_frob_control (pb, 0x20, 0x20);
-	parport_pc_frob_control (pb, 0x10, 0x10);
+	parport_pc_frob_control(pb, 0x20, 0x20);
+	parport_pc_frob_control(pb, 0x10, 0x10);
 	clear_epp_timeout(pb);
 
 	/* Device isn't expecting an EPP read
@@ -2076,9 +2136,9 @@ static int irq_probe_EPP(struct parport *pb)
 	parport_pc_read_epp(pb);
 	udelay(20);
 
-	pb->irq = probe_irq_off (irqs);
+	pb->irq = probe_irq_off(irqs);
 	if (pb->modes & PARPORT_MODE_PCECR)
-		ECR_WRITE (pb, oecr);
+		ECR_WRITE(pb, oecr);
 	parport_pc_write_control(pb, 0xc);
 
 	if (pb->irq <= 0)
@@ -2135,28 +2195,28 @@ static int parport_irq_probe(struct parport *pb)
 /* --- DMA detection -------------------------------------- */
 
 /* Only if chipset conforms to ECP ISA Interface Standard */
-static int programmable_dma_support (struct parport *p)
+static int programmable_dma_support(struct parport *p)
 {
-	unsigned char oecr = inb (ECONTROL (p));
+	unsigned char oecr = inb(ECONTROL(p));
 	int dma;
 
-	frob_set_mode (p, ECR_CNF);
-	
-	dma = inb (CONFIGB(p)) & 0x07;
+	frob_set_mode(p, ECR_CNF);
+
+	dma = inb(CONFIGB(p)) & 0x07;
 	/* 000: Indicates jumpered 8-bit DMA if read-only.
 	   100: Indicates jumpered 16-bit DMA if read-only. */
 	if ((dma & 0x03) == 0)
 		dma = PARPORT_DMA_NONE;
 
-	ECR_WRITE (p, oecr);
+	ECR_WRITE(p, oecr);
 	return dma;
 }
 
-static int parport_dma_probe (struct parport *p)
+static int parport_dma_probe(struct parport *p)
 {
 	const struct parport_pc_private *priv = p->private_data;
-	if (priv->ecr)
-		p->dma = programmable_dma_support(p); /* ask ECP chipset first */
+	if (priv->ecr)		/* ask ECP chipset first */
+		p->dma = programmable_dma_support(p);
 	if (p->dma == PARPORT_DMA_NONE) {
 		/* ask known Super-IO chips proper, although these
 		   claim ECP compatible, some don't report their DMA
@@ -2214,7 +2274,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 	if (!base_res)
 		goto out4;
 
-	memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations));
+	memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations));
 	priv->ctr = 0xc;
 	priv->ctr_writable = ~0x10;
 	priv->ecr = 0;
@@ -2241,7 +2301,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 			if (!parport_EPP_supported(p))
 				parport_ECPEPP_supported(p);
 	}
-	if (!parport_SPP_supported (p))
+	if (!parport_SPP_supported(p))
 		/* No port. */
 		goto out5;
 	if (priv->ecr)
@@ -2249,7 +2309,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 	else
 		parport_PS2_supported(p);
 
-	p->size = (p->modes & PARPORT_MODE_EPP)?8:3;
+	p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3;
 
 	printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base);
 	if (p->base_hi && priv->ecr)
@@ -2273,7 +2333,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 		}
 	}
 	if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
-                                           is mandatory (see above) */
+					   is mandatory (see above) */
 		p->dma = PARPORT_DMA_NONE;
 
 #ifdef CONFIG_PARPORT_PC_FIFO
@@ -2290,16 +2350,23 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 		if (p->dma != PARPORT_DMA_NONE) {
 			printk(", dma %d", p->dma);
 			p->modes |= PARPORT_MODE_DMA;
-		}
-		else printk(", using FIFO");
-	}
-	else
+		} else
+			printk(", using FIFO");
+	} else
 		/* We can't use the DMA channel after all. */
 		p->dma = PARPORT_DMA_NONE;
 #endif /* Allowed to use FIFO/DMA */
 
 	printk(" [");
-#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}}
+
+#define printmode(x) \
+	{\
+		if (p->modes & PARPORT_MODE_##x) {\
+			printk("%s%s", f ? "," : "", #x);\
+			f++;\
+		} \
+	}
+
 	{
 		int f = 0;
 		printmode(PCSPP);
@@ -2311,10 +2378,10 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 	}
 #undef printmode
 #ifndef CONFIG_PARPORT_1284
-	printk ("(,...)");
+	printk("(,...)");
 #endif /* CONFIG_PARPORT_1284 */
 	printk("]\n");
-	if (probedirq != PARPORT_IRQ_NONE) 
+	if (probedirq != PARPORT_IRQ_NONE)
 		printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq);
 
 	/* If No ECP release the ports grabbed above. */
@@ -2330,7 +2397,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 	if (p->irq != PARPORT_IRQ_NONE) {
 		if (request_irq(p->irq, parport_irq_handler,
 				 irqflags, p->name, p)) {
-			printk (KERN_WARNING "%s: irq %d in use, "
+			printk(KERN_WARNING "%s: irq %d in use, "
 				"resorting to polled operation\n",
 				p->name, p->irq);
 			p->irq = PARPORT_IRQ_NONE;
@@ -2340,8 +2407,8 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 #ifdef CONFIG_PARPORT_PC_FIFO
 #ifdef HAS_DMA
 		if (p->dma != PARPORT_DMA_NONE) {
-			if (request_dma (p->dma, p->name)) {
-				printk (KERN_WARNING "%s: dma %d in use, "
+			if (request_dma(p->dma, p->name)) {
+				printk(KERN_WARNING "%s: dma %d in use, "
 					"resorting to PIO operation\n",
 					p->name, p->dma);
 				p->dma = PARPORT_DMA_NONE;
@@ -2351,8 +2418,8 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 						       PAGE_SIZE,
 						       &priv->dma_handle,
 						       GFP_KERNEL);
-				if (! priv->dma_buf) {
-					printk (KERN_WARNING "%s: "
+				if (!priv->dma_buf) {
+					printk(KERN_WARNING "%s: "
 						"cannot get buffer for DMA, "
 						"resorting to PIO operation\n",
 						p->name);
@@ -2371,10 +2438,10 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 		 * Put the ECP detected port in PS2 mode.
 		 * Do this also for ports that have ECR but don't do ECP.
 		 */
-		ECR_WRITE (p, 0x34);
+		ECR_WRITE(p, 0x34);
 
 	parport_pc_write_data(p, 0);
-	parport_pc_data_forward (p);
+	parport_pc_data_forward(p);
 
 	/* Now that we've told the sharing engine about the port, and
 	   found out its characteristics, let the high-level drivers
@@ -2382,7 +2449,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 	spin_lock(&ports_lock);
 	list_add(&priv->list, &ports_list);
 	spin_unlock(&ports_lock);
-	parport_announce_port (p);
+	parport_announce_port(p);
 
 	return p;
 
@@ -2395,18 +2462,17 @@ out5:
 out4:
 	parport_put_port(p);
 out3:
-	kfree (priv);
+	kfree(priv);
 out2:
-	kfree (ops);
+	kfree(ops);
 out1:
 	if (pdev)
 		platform_device_unregister(pdev);
 	return NULL;
 }
+EXPORT_SYMBOL(parport_pc_probe_port);
 
-EXPORT_SYMBOL (parport_pc_probe_port);
-
-void parport_pc_unregister_port (struct parport *p)
+void parport_pc_unregister_port(struct parport *p)
 {
 	struct parport_pc_private *priv = p->private_data;
 	struct parport_operations *ops = p->ops;
@@ -2432,17 +2498,16 @@ void parport_pc_unregister_port (struct parport *p)
 				    priv->dma_buf,
 				    priv->dma_handle);
 #endif
-	kfree (p->private_data);
+	kfree(p->private_data);
 	parport_put_port(p);
-	kfree (ops); /* hope no-one cached it */
+	kfree(ops); /* hope no-one cached it */
 }
-
-EXPORT_SYMBOL (parport_pc_unregister_port);
+EXPORT_SYMBOL(parport_pc_unregister_port);
 
 #ifdef CONFIG_PCI
 
 /* ITE support maintained by Rich Liu <richliu@poorman.org> */
-static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
 					 int autodma,
 					 const struct parport_pc_via_data *via)
 {
@@ -2454,73 +2519,74 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
 	int irq;
 	int i;
 
-	DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n");
-	
-	// make sure which one chip
-	for(i = 0; i < 5; i++) {
+	DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n");
+
+	/* make sure which one chip */
+	for (i = 0; i < 5; i++) {
 		base_res = request_region(inta_addr[i], 32, "it887x");
 		if (base_res) {
 			int test;
-			pci_write_config_dword (pdev, 0x60,
+			pci_write_config_dword(pdev, 0x60,
 						0xe5000000 | inta_addr[i]);
-			pci_write_config_dword (pdev, 0x78,
+			pci_write_config_dword(pdev, 0x78,
 						0x00000000 | inta_addr[i]);
-			test = inb (inta_addr[i]);
-			if (test != 0xff) break;
+			test = inb(inta_addr[i]);
+			if (test != 0xff)
+				break;
 			release_region(inta_addr[i], 0x8);
 		}
 	}
-	if(i >= 5) {
-		printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
+	if (i >= 5) {
+		printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
 		return 0;
 	}
 
-	type = inb (inta_addr[i] + 0x18);
+	type = inb(inta_addr[i] + 0x18);
 	type &= 0x0f;
 
 	switch (type) {
 	case 0x2:
-		printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n");
 		ite8872set = 0x64200000;
 		break;
 	case 0xa:
-		printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n");
 		ite8872set = 0x64200000;
 		break;
 	case 0xe:
-		printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
+		printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
 		ite8872set = 0x64e00000;
 		break;
 	case 0x6:
-		printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n");
+		printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n");
 		return 0;
 	case 0x8:
-		DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
+		DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
 		return 0;
 	default:
-		printk (KERN_INFO "parport_pc: unknown ITE887x\n");
-		printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' "
+		printk(KERN_INFO "parport_pc: unknown ITE887x\n");
+		printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' "
 			"output to Rich.Liu@ite.com.tw\n");
 		return 0;
 	}
 
-	pci_read_config_byte (pdev, 0x3c, &ite8872_irq);
-	pci_read_config_dword (pdev, 0x1c, &ite8872_lpt);
+	pci_read_config_byte(pdev, 0x3c, &ite8872_irq);
+	pci_read_config_dword(pdev, 0x1c, &ite8872_lpt);
 	ite8872_lpt &= 0x0000ff00;
-	pci_read_config_dword (pdev, 0x20, &ite8872_lpthi);
+	pci_read_config_dword(pdev, 0x20, &ite8872_lpthi);
 	ite8872_lpthi &= 0x0000ff00;
-	pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt);
-	pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi);
-	pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
-	// SET SPP&EPP , Parallel Port NO DMA , Enable All Function
-	// SET Parallel IRQ
-	pci_write_config_dword (pdev, 0x9c,
+	pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt);
+	pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi);
+	pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
+	/* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */
+	/* SET Parallel IRQ */
+	pci_write_config_dword(pdev, 0x9c,
 				ite8872set | (ite8872_irq * 0x11111));
 
-	DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
-	DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
+	DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
+	DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
 		 ite8872_lpt);
-	DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
+	DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
 		 ite8872_lpthi);
 
 	/* Let the user (or defaults) steer us away from interrupts */
@@ -2532,14 +2598,14 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
 	 * Release the resource so that parport_pc_probe_port can get it.
 	 */
 	release_resource(base_res);
-	if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi,
+	if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi,
 				   irq, PARPORT_DMA_NONE, &pdev->dev, 0)) {
-		printk (KERN_INFO
+		printk(KERN_INFO
 			"parport_pc: ITE 8872 parallel port: io=0x%X",
-			ite8872_lpt);
+								ite8872_lpt);
 		if (irq != PARPORT_IRQ_NONE)
-			printk (", irq=%d", irq);
-		printk ("\n");
+			printk(", irq=%d", irq);
+		printk("\n");
 		return 1;
 	}
 
@@ -2548,7 +2614,7 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
 
 /* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru>
    based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */
-static int __devinitdata parport_init_mode = 0;
+static int __devinitdata parport_init_mode;
 
 /* Data for two known VIA chips */
 static struct parport_pc_via_data via_686a_data __devinitdata = {
@@ -2570,7 +2636,7 @@ static struct parport_pc_via_data via_8231_data __devinitdata = {
 	0xF6
 };
 
-static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq,
 				    int autodma,
 				    const struct parport_pc_via_data *via)
 {
@@ -2582,38 +2648,38 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 
 	printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n");
 
-	switch(parport_init_mode)
-	{
+	switch (parport_init_mode) {
 	case 1:
-	    printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_SPP;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_SPP;
+		break;
 	case 2:
-	    printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_SPP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_SPP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		break;
 	case 3:
-	    printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_EPP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    have_epp = 1;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_EPP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		have_epp = 1;
+		break;
 	case 4:
-	    printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_ECP;
-	    ppcontrol = VIA_PARPORT_BIDIR;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_ECP;
+		ppcontrol = VIA_PARPORT_BIDIR;
+		break;
 	case 5:
-	    printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
-	    siofunc = VIA_FUNCTION_PARPORT_ECP;
-	    ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
-	    have_epp = 1;
-	    break;
-	 default:
-	    printk(KERN_DEBUG "parport_pc: probing current configuration\n");
-	    siofunc = VIA_FUNCTION_PROBE;
-	    break;
+		printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
+		siofunc = VIA_FUNCTION_PARPORT_ECP;
+		ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
+		have_epp = 1;
+		break;
+	default:
+		printk(KERN_DEBUG
+			"parport_pc: probing current configuration\n");
+		siofunc = VIA_FUNCTION_PROBE;
+		break;
 	}
 	/*
 	 * unlock super i/o configuration
@@ -2624,38 +2690,36 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 
 	/* Bits 1-0: Parallel Port Mode / Enable */
 	outb(via->viacfg_function, VIA_CONFIG_INDEX);
-	tmp = inb (VIA_CONFIG_DATA);
+	tmp = inb(VIA_CONFIG_DATA);
 	/* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */
 	outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-	tmp2 = inb (VIA_CONFIG_DATA);
-	if (siofunc == VIA_FUNCTION_PROBE)
-	{
-	    siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
-	    ppcontrol = tmp2;
+	tmp2 = inb(VIA_CONFIG_DATA);
+	if (siofunc == VIA_FUNCTION_PROBE) {
+		siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
+		ppcontrol = tmp2;
+	} else {
+		tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
+		tmp |= siofunc;
+		outb(via->viacfg_function, VIA_CONFIG_INDEX);
+		outb(tmp, VIA_CONFIG_DATA);
+		tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
+		tmp2 |= ppcontrol;
+		outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
+		outb(tmp2, VIA_CONFIG_DATA);
 	}
-	else
-	{
-	    tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
-	    tmp |= siofunc;
-	    outb(via->viacfg_function, VIA_CONFIG_INDEX);
-	    outb(tmp, VIA_CONFIG_DATA);
-	    tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
-	    tmp2 |= ppcontrol;
-	    outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-	    outb(tmp2, VIA_CONFIG_DATA);
-	}
-	
+
 	/* Parallel Port I/O Base Address, bits 9-2 */
 	outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
 	port1 = inb(VIA_CONFIG_DATA) << 2;
-	
-	printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1);
-	if ((port1 == 0x3BC) && have_epp)
-	{
-	    outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
-	    outb((0x378 >> 2), VIA_CONFIG_DATA);
-	    printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n");
-	    port1 = 0x378;
+
+	printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",
+									port1);
+	if (port1 == 0x3BC && have_epp) {
+		outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
+		outb((0x378 >> 2), VIA_CONFIG_DATA);
+		printk(KERN_DEBUG
+			"parport_pc: Parallel port base changed to 0x378\n");
+		port1 = 0x378;
 	}
 
 	/*
@@ -2669,36 +2733,39 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 		printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n");
 		return 0;
 	}
-	
+
 	/* Bits 7-4: PnP Routing for Parallel Port IRQ */
 	pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp);
 	irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4);
 
-	if (siofunc == VIA_FUNCTION_PARPORT_ECP)
-	{
-	    /* Bits 3-2: PnP Routing for Parallel Port DMA */
-	    pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
-	    dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
-	}
-	else
-	    /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */
-	    dma = PARPORT_DMA_NONE;
+	if (siofunc == VIA_FUNCTION_PARPORT_ECP) {
+		/* Bits 3-2: PnP Routing for Parallel Port DMA */
+		pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
+		dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
+	} else
+		/* if ECP not enabled, DMA is not enabled, assumed
+		   bogus 'dma' value */
+		dma = PARPORT_DMA_NONE;
 
 	/* Let the user (or defaults) steer us away from interrupts and DMA */
 	if (autoirq == PARPORT_IRQ_NONE) {
-	    irq = PARPORT_IRQ_NONE;
-	    dma = PARPORT_DMA_NONE;
+		irq = PARPORT_IRQ_NONE;
+		dma = PARPORT_DMA_NONE;
 	}
 	if (autodma == PARPORT_DMA_NONE)
-	    dma = PARPORT_DMA_NONE;
+		dma = PARPORT_DMA_NONE;
 
 	switch (port1) {
-	case 0x3bc: port2 = 0x7bc; break;
-	case 0x378: port2 = 0x778; break;
-	case 0x278: port2 = 0x678; break;
+	case 0x3bc:
+		port2 = 0x7bc; break;
+	case 0x378:
+		port2 = 0x778; break;
+	case 0x278:
+		port2 = 0x678; break;
 	default:
-		printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n",
-			port1);
+		printk(KERN_INFO
+			"parport_pc: Weird VIA parport base 0x%X, ignoring\n",
+									port1);
 		return 0;
 	}
 
@@ -2716,17 +2783,17 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 	}
 
 	/* finally, do the probe with values obtained */
-	if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) {
-		printk (KERN_INFO
+	if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) {
+		printk(KERN_INFO
 			"parport_pc: VIA parallel port: io=0x%X", port1);
 		if (irq != PARPORT_IRQ_NONE)
-			printk (", irq=%d", irq);
+			printk(", irq=%d", irq);
 		if (dma != PARPORT_DMA_NONE)
-			printk (", dma=%d", dma);
-		printk ("\n");
+			printk(", dma=%d", dma);
+		printk("\n");
 		return 1;
 	}
-	
+
 	printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n",
 		port1, irq, dma);
 	return 0;
@@ -2734,8 +2801,8 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 
 
 enum parport_pc_sio_types {
-	sio_via_686a = 0,	/* Via VT82C686A motherboard Super I/O */
-	sio_via_8231,		/* Via VT8231 south bridge integrated Super IO */
+	sio_via_686a = 0,   /* Via VT82C686A motherboard Super I/O */
+	sio_via_8231,	    /* Via VT8231 south bridge integrated Super IO */
 	sio_ite_8872,
 	last_sio
 };
@@ -2806,15 +2873,15 @@ enum parport_pc_pci_cards {
 };
 
 
-/* each element directly indexed from enum list, above 
+/* each element directly indexed from enum list, above
  * (but offset by last_sio) */
 static struct parport_pc_pci {
 	int numports;
 	struct { /* BAR (base address registers) numbers in the config
-                    space header */
+		    space header */
 		int lo;
-		int hi; /* -1 if not there, >6 for offset-method (max
-                           BAR is 6) */
+		int hi;
+		/* -1 if not there, >6 for offset-method (max BAR is 6) */
 	} addr[4];
 
 	/* If set, this is called immediately after pci_enable_device.
@@ -2859,7 +2926,7 @@ static struct parport_pc_pci {
 	/* timedia_4018  */             { 2, { { 0, 1 }, { 2, 3 }, } },
 	/* timedia_9018a */             { 2, { { 0, 1 }, { 2, 3 }, } },
 					/* SYBA uses fixed offsets in
-                                           a 1K io window */
+					   a 1K io window */
 	/* syba_2p_epp AP138B */	{ 2, { { 0, 0x078 }, { 0, 0x178 }, } },
 	/* syba_1p_ecp W83787 */	{ 1, { { 0, 0x078 }, } },
 	/* titan_010l */		{ 1, { { 3, -1 }, } },
@@ -2875,11 +2942,14 @@ static struct parport_pc_pci {
 	/* oxsemi_pcie_pport */		{ 1, { { 0, 1 }, } },
 	/* aks_0100 */                  { 1, { { 0, -1 }, } },
 	/* mobility_pp */		{ 1, { { 0, 1 }, } },
-	/* netmos_9705 */               { 1, { { 0, -1 }, } }, /* untested */
-        /* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-        /* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-	/* netmos_9805 */               { 1, { { 0, -1 }, } }, /* untested */
-	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */
+
+	/* The netmos entries below are untested */
+	/* netmos_9705 */               { 1, { { 0, -1 }, } },
+	/* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} },
+	/* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} },
+	/* netmos_9805 */               { 1, { { 0, -1 }, } },
+	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
+
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
@@ -2908,7 +2978,7 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar },
 	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
-	  PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 },
+	  PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 },
 	/* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/
 	{ 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a },
 	{ 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h },
@@ -2942,7 +3012,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	{ 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 },
 	{ 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 },
 	/* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/
-	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */
+	/* AFAVLAB_TK9902 */
+	{ 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p},
 	{ 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p},
 	{ PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 },
@@ -2985,14 +3056,14 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
 	{ 0, } /* terminate list */
 };
-MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl);
+MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl);
 
 struct pci_parport_data {
 	int num;
 	struct parport *ports[2];
 };
 
-static int parport_pc_pci_probe (struct pci_dev *dev,
+static int parport_pc_pci_probe(struct pci_dev *dev,
 					   const struct pci_device_id *id)
 {
 	int err, count, n, i = id->driver_data;
@@ -3005,7 +3076,8 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
 	/* This is a PCI card */
 	i -= last_sio;
 	count = 0;
-	if ((err = pci_enable_device (dev)) != 0)
+	err = pci_enable_device(dev);
+	if (err)
 		return err;
 
 	data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL);
@@ -3013,7 +3085,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
 		return -ENOMEM;
 
 	if (cards[i].preinit_hook &&
-	    cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
+	    cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
 		kfree(data);
 		return -ENODEV;
 	}
@@ -3023,25 +3095,25 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
 		int hi = cards[i].addr[n].hi;
 		int irq;
 		unsigned long io_lo, io_hi;
-		io_lo = pci_resource_start (dev, lo);
+		io_lo = pci_resource_start(dev, lo);
 		io_hi = 0;
 		if ((hi >= 0) && (hi <= 6))
-			io_hi = pci_resource_start (dev, hi);
+			io_hi = pci_resource_start(dev, hi);
 		else if (hi > 6)
 			io_lo += hi; /* Reinterpret the meaning of
-                                        "hi" as an offset (see SYBA
-                                        def.) */
+					"hi" as an offset (see SYBA
+					def.) */
 		/* TODO: test if sharing interrupts works */
 		irq = dev->irq;
 		if (irq == IRQ_NONE) {
-			printk (KERN_DEBUG
+			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n",
 				parport_pc_pci_tbl[i + last_sio].vendor,
 				parport_pc_pci_tbl[i + last_sio].device,
 				io_lo, io_hi);
 			irq = PARPORT_IRQ_NONE;
 		} else {
-			printk (KERN_DEBUG
+			printk(KERN_DEBUG
 	"PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n",
 				parport_pc_pci_tbl[i + last_sio].vendor,
 				parport_pc_pci_tbl[i + last_sio].device,
@@ -3058,7 +3130,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
 	data->num = count;
 
 	if (cards[i].postinit_hook)
-		cards[i].postinit_hook (dev, count == 0);
+		cards[i].postinit_hook(dev, count == 0);
 
 	if (count) {
 		pci_set_drvdata(dev, data);
@@ -3092,7 +3164,7 @@ static struct pci_driver parport_pc_pci_driver = {
 	.remove		= __devexit_p(parport_pc_pci_remove),
 };
 
-static int __init parport_pc_init_superio (int autoirq, int autodma)
+static int __init parport_pc_init_superio(int autoirq, int autodma)
 {
 	const struct pci_device_id *id;
 	struct pci_dev *pdev = NULL;
@@ -3103,8 +3175,9 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
 		if (id == NULL || id->driver_data >= last_sio)
 			continue;
 
-		if (parport_pc_superio_info[id->driver_data].probe
-			(pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) {
+		if (parport_pc_superio_info[id->driver_data].probe(
+			pdev, autoirq, autodma,
+			parport_pc_superio_info[id->driver_data].via)) {
 			ret++;
 		}
 	}
@@ -3113,7 +3186,10 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
 }
 #else
 static struct pci_driver parport_pc_pci_driver;
-static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;}
+static int __init parport_pc_init_superio(int autoirq, int autodma)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_PNP
@@ -3126,44 +3202,45 @@ static const struct pnp_device_id parport_pc_pnp_tbl[] = {
 	{ }
 };
 
-MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl);
+MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl);
 
-static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id)
+static int parport_pc_pnp_probe(struct pnp_dev *dev,
+						const struct pnp_device_id *id)
 {
 	struct parport *pdata;
 	unsigned long io_lo, io_hi;
 	int dma, irq;
 
-	if (pnp_port_valid(dev,0) &&
-		!(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) {
-		io_lo = pnp_port_start(dev,0);
+	if (pnp_port_valid(dev, 0) &&
+		!(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		io_lo = pnp_port_start(dev, 0);
 	} else
 		return -EINVAL;
 
-	if (pnp_port_valid(dev,1) &&
-		!(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) {
-		io_hi = pnp_port_start(dev,1);
+	if (pnp_port_valid(dev, 1) &&
+		!(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) {
+		io_hi = pnp_port_start(dev, 1);
 	} else
 		io_hi = 0;
 
-	if (pnp_irq_valid(dev,0) &&
-		!(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) {
-		irq = pnp_irq(dev,0);
+	if (pnp_irq_valid(dev, 0) &&
+		!(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		irq = pnp_irq(dev, 0);
 	} else
 		irq = PARPORT_IRQ_NONE;
 
-	if (pnp_dma_valid(dev,0) &&
-		!(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) {
-		dma = pnp_dma(dev,0);
+	if (pnp_dma_valid(dev, 0) &&
+		!(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		dma = pnp_dma(dev, 0);
 	} else
 		dma = PARPORT_DMA_NONE;
 
 	dev_info(&dev->dev, "reported by %s\n", dev->protocol->name);
-	if (!(pdata = parport_pc_probe_port(io_lo, io_hi,
-					irq, dma, &dev->dev, 0)))
+	pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0);
+	if (pdata == NULL)
 		return -ENODEV;
 
-	pnp_set_drvdata(dev,pdata);
+	pnp_set_drvdata(dev, pdata);
 	return 0;
 }
 
@@ -3205,7 +3282,7 @@ static struct platform_driver parport_pc_platform_driver = {
 
 /* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */
 static int __devinit __attribute__((unused))
-parport_pc_find_isa_ports (int autoirq, int autodma)
+parport_pc_find_isa_ports(int autoirq, int autodma)
 {
 	int count = 0;
 
@@ -3229,7 +3306,7 @@ parport_pc_find_isa_ports (int autoirq, int autodma)
  * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY
  * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO
  */
-static void __init parport_pc_find_ports (int autoirq, int autodma)
+static void __init parport_pc_find_ports(int autoirq, int autodma)
 {
 	int count = 0, err;
 
@@ -3263,11 +3340,18 @@ static void __init parport_pc_find_ports (int autoirq, int autodma)
  *	syntax and keep in mind that code below is a cleaned up version.
  */
 
-static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 };
-static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] =
-	{ [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO };
-static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE };
-static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY };
+static int __initdata io[PARPORT_PC_MAX_PORTS+1] = {
+	[0 ... PARPORT_PC_MAX_PORTS] = 0
+};
+static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = {
+	[0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO
+};
+static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = {
+	[0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE
+};
+static int __initdata irqval[PARPORT_PC_MAX_PORTS] = {
+	[0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY
+};
 
 static int __init parport_parse_param(const char *s, int *val,
 				int automatic, int none, int nofifo)
@@ -3308,18 +3392,19 @@ static int __init parport_parse_dma(const char *dmastr, int *val)
 #ifdef CONFIG_PCI
 static int __init parport_init_mode_setup(char *str)
 {
-	printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
-
-	if (!strcmp (str, "spp"))
-		parport_init_mode=1;
-	if (!strcmp (str, "ps2"))
-		parport_init_mode=2;
-	if (!strcmp (str, "epp"))
-		parport_init_mode=3;
-	if (!strcmp (str, "ecp"))
-		parport_init_mode=4;
-	if (!strcmp (str, "ecpepp"))
-		parport_init_mode=5;
+	printk(KERN_DEBUG
+	     "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
+
+	if (!strcmp(str, "spp"))
+		parport_init_mode = 1;
+	if (!strcmp(str, "ps2"))
+		parport_init_mode = 2;
+	if (!strcmp(str, "epp"))
+		parport_init_mode = 3;
+	if (!strcmp(str, "ecp"))
+		parport_init_mode = 4;
+	if (!strcmp(str, "ecpepp"))
+		parport_init_mode = 5;
 	return 1;
 }
 #endif
@@ -3343,7 +3428,8 @@ module_param(verbose_probing, int, 0644);
 #endif
 #ifdef CONFIG_PCI
 static char *init_mode;
-MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
+MODULE_PARM_DESC(init_mode,
+	"Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
 module_param(init_mode, charp, 0);
 #endif
 
@@ -3374,7 +3460,7 @@ static int __init parse_parport_params(void)
 				irqval[0] = val;
 				break;
 			default:
-				printk (KERN_WARNING
+				printk(KERN_WARNING
 					"parport_pc: irq specified "
 					"without base address.  Use 'io=' "
 					"to specify one\n");
@@ -3387,7 +3473,7 @@ static int __init parse_parport_params(void)
 				dmaval[0] = val;
 				break;
 			default:
-				printk (KERN_WARNING
+				printk(KERN_WARNING
 					"parport_pc: dma specified "
 					"without base address.  Use 'io=' "
 					"to specify one\n");
@@ -3398,7 +3484,7 @@ static int __init parse_parport_params(void)
 
 #else
 
-static int parport_setup_ptr __initdata = 0;
+static int parport_setup_ptr __initdata;
 
 /*
  * Acceptable parameters:
@@ -3409,7 +3495,7 @@ static int parport_setup_ptr __initdata = 0;
  *
  * IRQ/DMA may be numeric or 'auto' or 'none'
  */
-static int __init parport_setup (char *str)
+static int __init parport_setup(char *str)
 {
 	char *endptr;
 	char *sep;
@@ -3421,15 +3507,15 @@ static int __init parport_setup (char *str)
 		return 1;
 	}
 
-	if (!strncmp (str, "auto", 4)) {
+	if (!strncmp(str, "auto", 4)) {
 		irqval[0] = PARPORT_IRQ_AUTO;
 		dmaval[0] = PARPORT_DMA_AUTO;
 		return 1;
 	}
 
-	val = simple_strtoul (str, &endptr, 0);
+	val = simple_strtoul(str, &endptr, 0);
 	if (endptr == str) {
-		printk (KERN_WARNING "parport=%s not understood\n", str);
+		printk(KERN_WARNING "parport=%s not understood\n", str);
 		return 1;
 	}
 
@@ -3463,7 +3549,7 @@ static int __init parse_parport_params(void)
 	return io[0] == PARPORT_DISABLE;
 }
 
-__setup ("parport=", parport_setup);
+__setup("parport=", parport_setup);
 
 /*
  * Acceptable parameters:
@@ -3471,7 +3557,7 @@ __setup ("parport=", parport_setup);
  * parport_init_mode=[spp|ps2|epp|ecp|ecpepp]
  */
 #ifdef CONFIG_PCI
-__setup("parport_init_mode=",parport_init_mode_setup);
+__setup("parport_init_mode=", parport_init_mode_setup);
 #endif
 #endif
 
@@ -3495,13 +3581,13 @@ static int __init parport_pc_init(void)
 		for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) {
 			if (!io[i])
 				break;
-			if ((io_hi[i]) == PARPORT_IOHI_AUTO)
-			       io_hi[i] = 0x400 + io[i];
+			if (io_hi[i] == PARPORT_IOHI_AUTO)
+				io_hi[i] = 0x400 + io[i];
 			parport_pc_probe_port(io[i], io_hi[i],
-					  irqval[i], dmaval[i], NULL, 0);
+					irqval[i], dmaval[i], NULL, 0);
 		}
 	} else
-		parport_pc_find_ports (irqval[0], dmaval[0]);
+		parport_pc_find_ports(irqval[0], dmaval[0]);
 
 	return 0;
 }
@@ -3509,9 +3595,9 @@ static int __init parport_pc_init(void)
 static void __exit parport_pc_exit(void)
 {
 	if (pci_registered_parport)
-		pci_unregister_driver (&parport_pc_pci_driver);
+		pci_unregister_driver(&parport_pc_pci_driver);
 	if (pnp_registered_parport)
-		pnp_unregister_driver (&parport_pc_pnp_driver);
+		pnp_unregister_driver(&parport_pc_pnp_driver);
 	platform_driver_unregister(&parport_pc_platform_driver);
 
 	while (!list_empty(&ports_list)) {
-- 
cgit v0.10.2


From 181bf1e815a2ad2a9a8b30ef6e92583d1530b255 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:08:10 +0100
Subject: parport_pc: clean up the modified while loops using for

And tidy up a few bits coding style detectors missed

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index edf83e9..151bf5b 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -626,7 +626,7 @@ static size_t parport_pc_fifo_write_block_dma(struct parport *port,
 	unsigned long start = (unsigned long) buf;
 	unsigned long end = (unsigned long) buf + length - 1;
 
-dump_parport_state("enter fifo_write_block_dma", port);
+	dump_parport_state("enter fifo_write_block_dma", port);
 	if (end < MAX_DMA_ADDRESS) {
 		/* If it would cross a 64k boundary, cap it at the end. */
 		if ((start ^ end) & ~0xffffUL)
@@ -737,7 +737,7 @@ false_alarm:
 	if (dma_handle)
 		dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE);
 
-dump_parport_state("leave fifo_write_block_dma", port);
+	dump_parport_state("leave fifo_write_block_dma", port);
 	return length - left;
 }
 #endif
@@ -955,8 +955,8 @@ static size_t parport_pc_ecp_read_block_pio(struct parport *port,
 	char *bufp = buf;
 
 	port = port->physport;
-DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
-dump_parport_state("enter fcn", port);
+	DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
+	dump_parport_state("enter fcn", port);
 
 	/* Special case: a timeout of zero means we cannot call schedule().
 	 * Also if O_NONBLOCK is set then use the default implementation. */
@@ -1112,14 +1112,15 @@ false_alarm:
 
 		if (ecrval & 0x02) {
 			/* FIFO is full. */
-dump_parport_state("FIFO full", port);
+			dump_parport_state("FIFO full", port);
 			insb(fifo, bufp, fifo_depth);
 			bufp += fifo_depth;
 			left -= fifo_depth;
 			continue;
 		}
 
-DPRINTK(KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
+		DPRINTK(KERN_DEBUG
+		  "*** ecp_read_block_pio: reading one byte from the FIFO\n");
 
 		/* FIFO not filled.  We will cycle this loop for a while
 		 * and either the peripheral will fill it faster,
@@ -1135,7 +1136,7 @@ DPRINTK(KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
 	}
 
 	port->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-dump_parport_state("rev idle2", port);
+	dump_parport_state("rev idle2", port);
 
 out_no_data:
 
@@ -1163,7 +1164,7 @@ out_no_data:
 				port->name, lost);
 	}
 
-dump_parport_state("fwd idle", port);
+	dump_parport_state("fwd idle", port);
 	return length - left;
 }
 #endif  /*  0  */
@@ -1216,10 +1217,23 @@ static const struct parport_operations parport_pc_ops = {
 };
 
 #ifdef CONFIG_PARPORT_PC_SUPERIO
+
+static struct superio_struct *find_free_superio(void)
+{
+	int i;
+	for (i = 0; i < NR_SUPERIOS; i++)
+		if (superios[i].io == 0)
+			return &superios[i];
+	return NULL;
+}
+
+
 /* Super-IO chipset detection, Winbond, SMSC */
 static void __devinit show_parconfig_smsc37c669(int io, int key)
 {
-	int cr1, cr4, cra, cr23, cr26, cr27, i = 0;
+	int cr1, cr4, cra, cr23, cr26, cr27;
+	struct superio_struct *s;
+
 	static const char *const modes[] = {
 		"SPP and Bidirectional (PS/2)",
 		"EPP and SPP",
@@ -1272,30 +1286,29 @@ static void __devinit show_parconfig_smsc37c669(int io, int key)
 	   are related, however DMA can be 1 or 3, assume DMA_A=DMA1,
 	   DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */
 	if (cr23 * 4 >= 0x100) { /* if active */
-		while ((i < NR_SUPERIOS) && (superios[i].io != 0))
-			i++;
-		if (i == NR_SUPERIOS) {
+		s = find_free_superio();
+		if (s == NULL)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
-		} else {
+		else {
 			int d;
 			switch (cr23 * 4) {
 			case 0x3bc:
-				superios[i].io = 0x3bc;
-				superios[i].irq = 7;
+				s->io = 0x3bc;
+				s->irq = 7;
 				break;
 			case 0x378:
-				superios[i].io = 0x378;
-				superios[i].irq = 7;
+				s->io = 0x378;
+				s->irq = 7;
 				break;
 			case 0x278:
-				superios[i].io = 0x278;
-				superios[i].irq = 5;
+				s->io = 0x278;
+				s->irq = 5;
 			}
 			d = (cr26 & 0x0f);
 			if (d == 1 || d == 3)
-				superios[i].dma = d;
+				s->dma = d;
 			else
-				superios[i].dma = PARPORT_DMA_NONE;
+				s->dma = PARPORT_DMA_NONE;
 		}
 	}
 }
@@ -1303,7 +1316,8 @@ static void __devinit show_parconfig_smsc37c669(int io, int key)
 
 static void __devinit show_parconfig_winbond(int io, int key)
 {
-	int cr30, cr60, cr61, cr70, cr74, crf0, i = 0;
+	int cr30, cr60, cr61, cr70, cr74, crf0;
+	struct superio_struct *s;
 	static const char *const modes[] = {
 		"Standard (SPP) and Bidirectional(PS/2)", /* 0 */
 		"EPP-1.9 and SPP",
@@ -1356,14 +1370,13 @@ static void __devinit show_parconfig_winbond(int io, int key)
 	}
 
 	if (cr30 & 0x01) { /* the settings can be interrogated later ... */
-		while ((i < NR_SUPERIOS) && (superios[i].io != 0))
-			i++;
-		if (i == NR_SUPERIOS) {
+		s = find_free_superio();
+		if (s == NULL)
 			printk(KERN_INFO "Super-IO: too many chips!\n");
-		} else {
-			superios[i].io = (cr60<<8)|cr61;
-			superios[i].irq = cr70&0x0f;
-			superios[i].dma = (((cr74 & 0x07) > 3) ?
+		else {
+			s->io = (cr60 << 8) | cr61;
+			s->irq = cr70 & 0x0f;
+			s->dma = (((cr74 & 0x07) > 3) ?
 					   PARPORT_DMA_NONE : (cr74 & 0x07));
 		}
 	}
@@ -1618,25 +1631,28 @@ static void __devinit detect_and_report_it87(void)
 }
 #endif /* CONFIG_PARPORT_PC_SUPERIO */
 
-static int get_superio_dma(struct parport *p)
+static struct superio_struct *find_superio(struct parport *p)
 {
-	int i = 0;
+	int i;
+	for (i = 0; i < NR_SUPERIOS; i++)
+		if (superios[i].io != p->base)
+			return &superios[i];
+	return NULL;
+}
 
-	while ((i < NR_SUPERIOS) && (superios[i].io != p->base))
-		i++;
-	if (i != NR_SUPERIOS)
-		return superios[i].dma;
+static int get_superio_dma(struct parport *p)
+{
+	struct superio_struct *s = find_superio(p);
+	if (s)
+		return s->dma;
 	return PARPORT_DMA_NONE;
 }
 
 static int get_superio_irq(struct parport *p)
 {
-	int i = 0;
-
-	while ((i < NR_SUPERIOS) && (superios[i].io != p->base))
-		i++;
-	if (i != NR_SUPERIOS)
-		return superios[i].irq;
+	struct superio_struct *s = find_superio(p);
+	if (s)
+		return s->irq;
 	return PARPORT_IRQ_NONE;
 }
 
-- 
cgit v0.10.2


From 257a6e8cc7f9274f0af090494a3f1ee06548b5bd Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@linux.vnet.ibm.com>
Date: Thu, 11 Jun 2009 13:20:09 +0100
Subject: icom: fix compile errors when defining ICOM_TRACE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As it is, defining ICOM_TRACE produces some compile errors, as
"parameter name omitted" and "redefinition of ‘trace’"

This patch removes the wrong trace definition.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index fd5fd23..9f2891c 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -137,7 +137,12 @@ static LIST_HEAD(icom_adapter_head);
 static spinlock_t icom_lock;
 
 #ifdef ICOM_TRACE
-static inline void trace(struct icom_port *, char *, unsigned long) {};
+static inline void trace(struct icom_port *icom_port, char *trace_pt,
+			unsigned long trace_data)
+{
+	dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
+	icom_port->port, trace_pt, trace_data);
+}
 #else
 static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {};
 #endif
@@ -1647,15 +1652,6 @@ static void __exit icom_exit(void)
 module_init(icom_init);
 module_exit(icom_exit);
 
-#ifdef ICOM_TRACE
-static inline void trace(struct icom_port *icom_port, char *trace_pt,
-		  unsigned long trace_data)
-{
-	dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
-		 icom_port->port, trace_pt, trace_data);
-}
-#endif
-
 MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>");
 MODULE_DESCRIPTION("IBM iSeries Serial IOA driver");
 MODULE_SUPPORTED_DEVICE
-- 
cgit v0.10.2


From 08e0992f60ad44025a8a8b8a821838ca4a562686 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Thu, 11 Jun 2009 13:21:24 +0100
Subject: serial: add support for the TI AR7 internal UART

This patch adds support for the TI AR7 internal UART.

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a0127e9..fb867a9 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_AR7] = {
+		.name		= "AR7",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 57a97e5..48766ea 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -41,7 +41,8 @@
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
-#define PORT_MAX_8250	17	/* max port ID */
+#define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
+#define PORT_MAX_8250	18	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
-- 
cgit v0.10.2


From b5c6794fe4256fd63664aa185c468647c28bfd4a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 11 Jun 2009 13:23:02 +0100
Subject: Blackfin SPORT UART: fix typo in sport_set_termios prototype

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index 529c0ff..fd5cb95 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -419,7 +419,7 @@ static void sport_shutdown(struct uart_port *port)
 }
 
 static void sport_set_termios(struct uart_port *port,
-		struct termios *termios, struct termios *old)
+		struct ktermios *termios, struct ktermios *old)
 {
 	pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
 	uart_update_timeout(port, CS8 ,port->uartclk);
-- 
cgit v0.10.2


From a19e8b205915b2925aca75b2d2bf0c3104c8be14 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Thu, 11 Jun 2009 13:23:42 +0100
Subject: Blackfin SPORT UART: fix data misses while using transmit frame sync

SPORT transmit frame sync (TFS) isn't used as an electrical signal during
normal SPORT UART emulation.  However, it is useful in EIA RS-485
emulation as RS-485 Transceiver Driver Enable DE strobe.

This patch configures:
TFS to be active high in order to drive an DE strobe of
an eventually connected RS-485 Transceiver.

Late frame sync mode (LATFS) gating the entire TX shift cycle.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index fd5cb95..6687ccd 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -149,7 +149,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
 	int tclkdiv, tfsdiv, rclkdiv;
 
 	/* Set TCR1 and TCR2 */
-	SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK));
+	SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK));
 	SPORT_PUT_TCR2(up, 10);
 	pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
 
-- 
cgit v0.10.2


From 4328e3e5ef1ae3427a4f6863aa65916a68ec2dd9 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 11 Jun 2009 13:37:11 +0100
Subject: Blackfin SPORT UART: rewrite inline assembly

Hopefuly the new version is easier to read, but in the process it declares
proper clobber lists and better constraints so that GCC can do a better
job at allocating free registers.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index 6687ccd..34b4ae0 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -101,15 +101,16 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 {
 	pr_debug("%s value:%x\n", __func__, value);
 	/* Place a Start and Stop bit */
-	__asm__ volatile (
-		"R2 = b#01111111100;\n\t"
-		"R3 = b#10000000001;\n\t"
-		"%0 <<= 2;\n\t"
-		"%0 = %0 & R2;\n\t"
-		"%0 = %0 | R3;\n\t"
-		:"=r"(value)
-		:"0"(value)
-		:"R2", "R3");
+	__asm__ __volatile__ (
+		"R2 = b#01111111100;"
+		"R3 = b#10000000001;"
+		"%0 <<= 2;"
+		"%0 = %0 & R2;"
+		"%0 = %0 | R3;"
+		: "=d"(value)
+		: "d"(value)
+		: "ASTAT", "R2", "R3"
+	);
 	pr_debug("%s value:%x\n", __func__, value);
 
 	SPORT_PUT_TX(up, value);
@@ -118,27 +119,30 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 static inline unsigned int rx_one_byte(struct sport_uart_port *up)
 {
 	unsigned int value, extract;
+	u32 tmp_mask1, tmp_mask2, tmp_shift, tmp;
 
 	value = SPORT_GET_RX32(up);
 	pr_debug("%s value:%x\n", __func__, value);
 
 	/* Extract 8 bits data */
-	__asm__ volatile (
-		"R5 = 0;\n\t"
-		"P0 = 8;\n\t"
-		"R1 = 0x1801(Z);\n\t"
-		"R3 = 0x0300(Z);\n\t"
-		"R4 = 0;\n\t"
-		"LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t"
-		"R2 = extract(%1, R1.L)(Z);\n\t"
-		"R2 <<= R4;\n\t"
-		"R5 = R5 | R2;\n\t"
-		"R1 = R1 - R3;\nloop_e:\t"
-		"R4 += 1;\n\t"
-		"%0 = R5;\n\t"
-		:"=r"(extract)
-		:"r"(value)
-		:"P0", "R1", "R2","R3","R4", "R5");
+	__asm__ __volatile__ (
+		"%[extr] = 0;"
+		"%[mask1] = 0x1801(Z);"
+		"%[mask2] = 0x0300(Z);"
+		"%[shift] = 0;"
+		"LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];"
+		".Lloop_s:"
+		"%[tmp] = extract(%[val], %[mask1].L)(Z);"
+		"%[tmp] <<= %[shift];"
+		"%[extr] = %[extr] | %[tmp];"
+		"%[mask1] = %[mask1] - %[mask2];"
+		".Lloop_e:"
+		"%[shift] += 1;"
+		: [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp),
+		  [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2)
+		: "d"(value), [lc]"a"(8)
+		: "ASTAT", "LB0", "LC0", "LT0"
+	);
 
 	pr_debug("	extract:%x\n", extract);
 	return extract;
-- 
cgit v0.10.2


From 8516c568f25666a012ec4c859e640a76fc9b6ec0 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 11 Jun 2009 13:38:16 +0100
Subject: Blackfin Serial Driver: fix error while transferring large files

Ignore receiving data if new position is in the same line of current
buffer tail and is small.  This should decrease overruns.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index d86123e..65a4c07 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -477,6 +477,15 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 
 	spin_lock_irqsave(&uart->port.lock, flags);
 
+	/* 2D DMA RX buffer ring is used. Because curr_y_count and
+	 * curr_x_count can't be read as an atomic operation,
+	 * curr_y_count should be read before curr_x_count. When
+	 * curr_x_count is read, curr_y_count may already indicate
+	 * next buffer line. But, the position calculated here is
+	 * still indicate the old line. The wrong position data may
+	 * be smaller than current buffer tail, which cause garbages
+	 * are received if it is not prohibit.
+	 */
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
 	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
 	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
@@ -487,7 +496,11 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 		x_pos = 0;
 
 	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos;
-	if (pos != uart->rx_dma_buf.tail) {
+	/* Ignore receiving data if new position is in the same line of
+	 * current buffer tail and small.
+	 */
+	if (pos > uart->rx_dma_buf.tail ||
+		uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
 		uart->rx_dma_buf.head = pos;
 		bfin_serial_dma_rx_chars(uart);
 		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
@@ -532,11 +545,25 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id)
 {
 	struct bfin_serial_port *uart = dev_id;
 	unsigned short irqstat;
+	int pos;
 
 	spin_lock(&uart->port.lock);
 	irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
 	clear_dma_irqstat(uart->rx_dma_channel);
-	bfin_serial_dma_rx_chars(uart);
+
+	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
+	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT)
+		uart->rx_dma_nrows = 0;
+
+	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT;
+	if (pos > uart->rx_dma_buf.tail ||
+		uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
+		uart->rx_dma_buf.head = pos;
+		bfin_serial_dma_rx_chars(uart);
+		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
+	}
+
 	spin_unlock(&uart->port.lock);
 
 	return IRQ_HANDLED;
-- 
cgit v0.10.2


From 7de7c55bf54dede2bd2262349fc7b558bcc8e413 Mon Sep 17 00:00:00 2001
From: Robin Getz <rgetz@blackfin.uclinux.org>
Date: Thu, 11 Jun 2009 13:38:57 +0100
Subject: Blackfin Serial Driver: fix baudrate for early_printk

Since we already setup the early console UART in
arch/blackfin/kernel/early_printk.c, and common functions which are
enabled from the .setup will override the proper settings later, don't
fill in these structures.  Otherwise we get mangled baudrate settings when
using early_printk.

Signed-off-by: Robin Getz <rgetz@blackfin.uclinux.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 65a4c07..dfae22d 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -1272,12 +1272,17 @@ static __init void early_serial_write(struct console *con, const char *s,
 	}
 }
 
+/*
+ * This should have a .setup or .early_setup in it, but then things get called
+ * without the command line options, and the baud rate gets messed up - so
+ * don't let the common infrastructure play with things. (see calls to setup
+ * & earlysetup in ./kernel/printk.c:register_console()
+ */
 static struct __initdata console bfin_early_serial_console = {
 	.name = "early_BFuart",
 	.write = early_serial_write,
 	.device = uart_console_device,
 	.flags = CON_PRINTBUFFER,
-	.setup = bfin_serial_console_setup,
 	.index = -1,
 	.data  = &bfin_serial_reg,
 };
-- 
cgit v0.10.2


From f9d36da9cdc2504cd9bb6034cfaba0673ce2d6df Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Thu, 11 Jun 2009 13:42:17 +0100
Subject: Blackfin Serial Driver: fix missing new lines when under load

Add a SSYNC() into bfin_serial_dma_tx_chars() to ensure DMA registers are
written with new data otherwise we might miss a byte or two when the
system is under load.  PIO mode is OK though.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index dfae22d..676efda 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -415,6 +415,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
 	set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail));
 	set_dma_x_count(uart->tx_dma_channel, uart->tx_count);
 	set_dma_x_modify(uart->tx_dma_channel, 1);
+	SSYNC();
 	enable_dma(uart->tx_dma_channel);
 
 	UART_SET_IER(uart, ETBEI);
-- 
cgit v0.10.2


From 35ff69357949cfff5c3d8e3038b77146872e3bd3 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 11 Jun 2009 13:42:57 +0100
Subject: Blackfin Serial Driver: handle irregular DMA register status in auto
 start mode

This bug is caused by irregular behavior of DMA register CURR_X_COUNT
and CURR_Y_COUNT when an auto restart uart rx DMA run to last byte in
DMA buffer, trigger the interrupt and stay at this possiton. The status
of current x and y is 0:7 instead of 512:8 or 0:8. The driver doesn't
take care of this case when calculating the position.

URL: http://blackfin.uclinux.org/gf/tracker/5063
Reported-by: Tomasz Motylewski <t.motylewski@bfad.de>

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 676efda..854e96d 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -490,7 +490,7 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
 	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
 	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
-	if (uart->rx_dma_nrows == DMA_RX_YCOUNT)
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
 		uart->rx_dma_nrows = 0;
 	x_pos = DMA_RX_XCOUNT - x_pos;
 	if (x_pos == DMA_RX_XCOUNT)
@@ -546,15 +546,16 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id)
 {
 	struct bfin_serial_port *uart = dev_id;
 	unsigned short irqstat;
-	int pos;
+	int x_pos, pos;
 
 	spin_lock(&uart->port.lock);
 	irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
 	clear_dma_irqstat(uart->rx_dma_channel);
 
 	uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
+	x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
 	uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
-	if (uart->rx_dma_nrows == DMA_RX_YCOUNT)
+	if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
 		uart->rx_dma_nrows = 0;
 
 	pos = uart->rx_dma_nrows * DMA_RX_XCOUNT;
-- 
cgit v0.10.2


From 0efa4f2c944fabffc81918cc86d4d17dba39a021 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 11 Jun 2009 13:45:07 +0100
Subject: Blackfin Serial Driver: annotate anomalies 05000215 and 05000099

Add some comments for how these anomalies are addressed:

05000215 - UART TX Interrupt Masked Erroneously
We always clear ETBEI within last UART TX interrupt to end
a string.  It is always set when starting a new tx transfer.

05000099 - UART Line Status Register (UART_LSR) Bits Are Not Updated at
the Same Time
This anomaly affects driver only in POLL code where multi bits of
UART_LSR are checked. It doesn't affect current bfin_5xx.c driver.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 854e96d..ab583ef 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -330,6 +330,11 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart)
 		/* Clear TFI bit */
 		UART_PUT_LSR(uart, TFI);
 #endif
+		/* Anomaly notes:
+		 *  05000215 -	we always clear ETBEI within last UART TX
+		 *		interrupt to end a string. It is always set
+		 *		when start a new tx.
+		 */
 		UART_CLEAR_IER(uart, ETBEI);
 		return;
 	}
@@ -528,6 +533,11 @@ static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id)
 	if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) {
 		disable_dma(uart->tx_dma_channel);
 		clear_dma_irqstat(uart->tx_dma_channel);
+		/* Anomaly notes:
+		 *  05000215 -	we always clear ETBEI within last UART TX
+		 *		interrupt to end a string. It is always set
+		 *		when start a new tx.
+		 */
 		UART_CLEAR_IER(uart, ETBEI);
 		xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1);
 		uart->port.icount.tx += uart->tx_count;
@@ -969,6 +979,10 @@ static void bfin_serial_reset_irda(struct uart_port *port)
 }
 
 #ifdef CONFIG_CONSOLE_POLL
+/* Anomaly notes:
+ *  05000099 -  Because we only use THRE in poll_put and DR in poll_get,
+ *		losing other bits of UART_LSR is not a problem here.
+ */
 static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
-- 
cgit v0.10.2


From 84507794a9d5d4decd6bc9c111480076dba0d301 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 11 Jun 2009 13:50:20 +0100
Subject: Blackfin Serial Driver: handle anomaly 05000231

05000231 - UART STB Bit Incorrectly Affects Receiver Setting
For processors affected by this, we cannot safely allow CSTOPB to be set
as the UART will then be unable to properly clock in bytes.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index ab583ef..64603f5 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -828,8 +828,16 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios,
 			__func__);
 	}
 
-	if (termios->c_cflag & CSTOPB)
-		lcr |= STB;
+	/* Anomaly notes:
+	 *  05000231 -  STOP bit is always set to 1 whatever the user is set.
+	 */
+	if (termios->c_cflag & CSTOPB) {
+		if (ANOMALY_05000231)
+			printk(KERN_WARNING "STOP bits other than 1 is not "
+				"supported in case of anomaly 05000231.\n");
+		else
+			lcr |= STB;
+	}
 	if (termios->c_cflag & PARENB)
 		lcr |= PEN;
 	if (!(termios->c_cflag & PARODD))
-- 
cgit v0.10.2


From 2860b7911137eabb01c159abefb506e538ff3cb7 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 11 Jun 2009 13:51:33 +0100
Subject: Blackfin Serial Driver: disable dma rx interrupt only rather than all
 irqs

The UART RX handling code isn't exactly speeding, so don't go disabling
all interrupts when processing the buffer.  Just disable the relevant DMA
interrupt.  This greatly improves latency of the system when utilizing the
UART.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 64603f5..e2f6b1b 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -479,9 +479,9 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 {
 	int x_pos, pos;
-	unsigned long flags;
 
-	spin_lock_irqsave(&uart->port.lock, flags);
+	dma_disable_irq(uart->rx_dma_channel);
+	spin_lock_bh(&uart->port.lock);
 
 	/* 2D DMA RX buffer ring is used. Because curr_y_count and
 	 * curr_x_count can't be read as an atomic operation,
@@ -512,7 +512,8 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 		uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
 	}
 
-	spin_unlock_irqrestore(&uart->port.lock, flags);
+	spin_unlock_bh(&uart->port.lock);
+	dma_enable_irq(uart->rx_dma_channel);
 
 	mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
 }
-- 
cgit v0.10.2


From b7c7cbc898e8a97829f33ad3bcd1b5e91690d8f4 Mon Sep 17 00:00:00 2001
From: Craig Shelley <craig@microtron.org.uk>
Date: Thu, 11 Jun 2009 13:52:31 +0100
Subject: USB: CP210X Add device IDs

Signed-off-by: Craig Shelley <craig@microtron.org.uk>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index d9f586d..ac4944a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -57,13 +57,16 @@ static int debug;
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
 	{ USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
+	{ USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
 	{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
+	{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
 	{ USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
 	{ USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
 	{ USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
 	{ USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
 	{ USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
 	{ USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
+	{ USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */
 	{ USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
 	{ USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
 	{ USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
@@ -84,10 +87,12 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
 	{ USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
 	{ USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
+	{ USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */
 	{ USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */
 	{ USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */
 	{ USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
@@ -98,7 +103,9 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */
 	{ USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
 	{ USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
+	{ USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
 	{ USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
+	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
 	{ USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
 	{ USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
-- 
cgit v0.10.2


From 93ef1f1fbce37f14666e4856ff933d4a1b735d02 Mon Sep 17 00:00:00 2001
From: Craig Shelley <craig@microtron.org.uk>
Date: Thu, 11 Jun 2009 13:53:30 +0100
Subject: USB: CP210X Use official request code definitions

The CP210X driver was developed without official device specifications.
This has lead to an incorrect assumption that all GET request codes are
equal to the corresponding SET request code +1.
This patch removes this incorrect assumption, and uses request code
definitions based on the updated GPL driver from SiLabs.
This modification is needed before extended functionality such as GPIO
on CP2103 can be supported.

Signed-off-by: Craig Shelley <craig@microtron.org.uk>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index ac4944a..b50f27f 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -27,7 +27,7 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.08"
+#define DRIVER_VERSION "v0.09"
 #define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver"
 
 /*
@@ -144,23 +144,40 @@ static struct usb_serial_driver cp2101_device = {
 #define REQTYPE_HOST_TO_DEVICE	0x41
 #define REQTYPE_DEVICE_TO_HOST	0xc1
 
-/* Config SET requests. To GET, add 1 to the request number */
-#define CP2101_UART 		0x00	/* Enable / Disable */
-#define CP2101_BAUDRATE		0x01	/* (BAUD_RATE_GEN_FREQ / baudrate) */
-#define CP2101_BITS		0x03	/* 0x(0)(databits)(parity)(stopbits) */
-#define CP2101_BREAK		0x05	/* On / Off */
-#define CP2101_CONTROL		0x07	/* Flow control line states */
-#define CP2101_MODEMCTL		0x13	/* Modem controls */
-#define CP2101_CONFIG_6		0x19	/* 6 bytes of config data ??? */
-
-/* CP2101_UART */
+/* Config request codes */
+#define CP210X_IFC_ENABLE	0x00
+#define CP210X_SET_BAUDDIV	0x01
+#define CP210X_GET_BAUDDIV	0x02
+#define CP210X_SET_LINE_CTL	0x03
+#define CP210X_GET_LINE_CTL	0x04
+#define CP210X_SET_BREAK	0x05
+#define CP210X_IMM_CHAR		0x06
+#define CP210X_SET_MHS		0x07
+#define CP210X_GET_MDMSTS	0x08
+#define CP210X_SET_XON		0x09
+#define CP210X_SET_XOFF		0x0A
+#define CP210X_SET_EVENTMASK	0x0B
+#define CP210X_GET_EVENTMASK	0x0C
+#define CP210X_SET_CHAR		0x0D
+#define CP210X_GET_CHARS	0x0E
+#define CP210X_GET_PROPS	0x0F
+#define CP210X_GET_COMM_STATUS	0x10
+#define CP210X_RESET		0x11
+#define CP210X_PURGE		0x12
+#define CP210X_SET_FLOW		0x13
+#define CP210X_GET_FLOW		0x14
+#define CP210X_EMBED_EVENTS	0x15
+#define CP210X_GET_EVENTSTATE	0x16
+#define CP210X_SET_CHARS	0x19
+
+/* CP210X_IFC_ENABLE */
 #define UART_ENABLE		0x0001
 #define UART_DISABLE		0x0000
 
-/* CP2101_BAUDRATE */
+/* CP210X_(SET|GET)_BAUDDIV */
 #define BAUD_RATE_GEN_FREQ	0x384000
 
-/* CP2101_BITS */
+/* CP210X_(SET|GET)_LINE_CTL */
 #define BITS_DATA_MASK		0X0f00
 #define BITS_DATA_5		0X0500
 #define BITS_DATA_6		0X0600
@@ -180,11 +197,11 @@ static struct usb_serial_driver cp2101_device = {
 #define BITS_STOP_1_5		0x0001
 #define BITS_STOP_2		0x0002
 
-/* CP2101_BREAK */
+/* CP210X_SET_BREAK */
 #define BREAK_ON		0x0000
 #define BREAK_OFF		0x0001
 
-/* CP2101_CONTROL */
+/* CP210X_(SET_MHS|GET_MDMSTS) */
 #define CONTROL_DTR		0x0001
 #define CONTROL_RTS		0x0002
 #define CONTROL_CTS		0x0010
@@ -217,9 +234,6 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request,
 		return -ENOMEM;
 	}
 
-	/* For get requests, the request number must be incremented */
-	request++;
-
 	/* Issue the request, attempting to read 'size' bytes */
 	result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
 				request, REQTYPE_DEVICE_TO_HOST, 0x0000,
@@ -357,7 +371,7 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) {
+	if (cp2101_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
 		dev_err(&port->dev, "%s - Unable to enable UART\n",
 				__func__);
 		return -EPROTO;
@@ -415,7 +429,7 @@ static void cp2101_close(struct usb_serial_port *port)
 
 	mutex_lock(&port->serial->disc_mutex);
 	if (!port->serial->disconnected)
-		cp2101_set_config_single(port, CP2101_UART, UART_DISABLE);
+		cp2101_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
 	mutex_unlock(&port->serial->disc_mutex);
 }
 
@@ -456,7 +470,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2);
+	cp2101_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
 	/* Convert to baudrate */
 	if (baud)
 		baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
@@ -466,7 +480,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 
 	cflag = *cflagp;
 
-	cp2101_get_config(port, CP2101_BITS, &bits, 2);
+	cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 	cflag &= ~CSIZE;
 	switch (bits & BITS_DATA_MASK) {
 	case BITS_DATA_5:
@@ -491,14 +505,14 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown number of data bits, using 8", __func__);
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -521,20 +535,20 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_PARITY_SPACE:
 		dbg("%s - parity = SPACE (not supported, disabling parity)",
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown parity mode, disabling parity", __func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -547,7 +561,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 		dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_STOP_2:
 		dbg("%s - stop bits = 2", __func__);
@@ -557,11 +571,11 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 		dbg("%s - Unknown number of stop bits, using 1 stop bit",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP2101_BITS, &bits, 2);
+		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
-	cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+	cp2101_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 	if (modem_ctl[0] & 0x0008) {
 		dbg("%s - flow control = CRTSCTS", __func__);
 		cflag |= CRTSCTS;
@@ -594,7 +608,7 @@ static void cp2101_set_termios(struct tty_struct *tty,
 	if (baud != tty_termios_baud_rate(old_termios) && baud != 0) {
 		dbg("%s - Setting baud rate to %d baud", __func__,
 				baud);
-		if (cp2101_set_config_single(port, CP2101_BAUDRATE,
+		if (cp2101_set_config_single(port, CP210X_SET_BAUDDIV,
 					((BAUD_RATE_GEN_FREQ + baud/2) / baud))) {
 			dbg("Baud rate requested not supported by device\n");
 			baud = tty_termios_baud_rate(old_termios);
@@ -605,7 +619,7 @@ static void cp2101_set_termios(struct tty_struct *tty,
 
 	/* If the number of data bits is to be updated */
 	if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_DATA_MASK;
 		switch (cflag & CSIZE) {
 		case CS5:
@@ -635,13 +649,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
 				bits |= BITS_DATA_8;
 				break;
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of data bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_PARITY_MASK;
 		if (cflag & PARENB) {
 			if (cflag & PARODD) {
@@ -652,13 +666,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
 				dbg("%s - parity = EVEN", __func__);
 			}
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Parity mode not supported "
 					"by device\n");
 	}
 
 	if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) {
-		cp2101_get_config(port, CP2101_BITS, &bits, 2);
+		cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_STOP_MASK;
 		if (cflag & CSTOPB) {
 			bits |= BITS_STOP_2;
@@ -667,13 +681,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
 			bits |= BITS_STOP_1;
 			dbg("%s - stop bits = 1", __func__);
 		}
-		if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+		if (cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of stop bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
-		cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+		cp2101_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 		dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
@@ -693,7 +707,7 @@ static void cp2101_set_termios(struct tty_struct *tty,
 		dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
-		cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+		cp2101_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
 	}
 
 }
@@ -731,7 +745,7 @@ static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
 
 	dbg("%s - control = 0x%.4x", __func__, control);
 
-	return cp2101_set_config(port, CP2101_CONTROL, &control, 2);
+	return cp2101_set_config(port, CP210X_SET_MHS, &control, 2);
 }
 
 static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
@@ -742,7 +756,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP2101_CONTROL, &control, 1);
+	cp2101_get_config(port, CP210X_GET_MDMSTS, &control, 1);
 
 	result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
 		|((control & CONTROL_RTS) ? TIOCM_RTS : 0)
@@ -768,7 +782,7 @@ static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
 		state = BREAK_ON;
 	dbg("%s - turning break %s", __func__,
 			state == BREAK_OFF ? "off" : "on");
-	cp2101_set_config(port, CP2101_BREAK, &state, 2);
+	cp2101_set_config(port, CP210X_SET_BREAK, &state, 2);
 }
 
 static int cp2101_startup(struct usb_serial *serial)
-- 
cgit v0.10.2


From 4cc27bd6d7d6750dba33b4ccb4585c00b8fca7d2 Mon Sep 17 00:00:00 2001
From: Craig Shelley <craig@microtron.org.uk>
Date: Thu, 11 Jun 2009 13:54:40 +0100
Subject: USB: CP210X Replace CP2101 with CP210x

This patch replaces the string "CP2101" with "CP210x" within cp210x.c
This is to reduce confusion about the fact that the driver is actually
compatible with CP2101, CP2102 and CP2103 devices.

Signed-off-by: Craig Shelley <craig@microtron.org.uk>

(Fixed some collisions merging)

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index b50f27f..16a154d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -1,5 +1,5 @@
 /*
- * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver
+ * Silicon Laboratories CP210x USB to RS232 serial adaptor driver
  *
  * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk)
  *
@@ -28,29 +28,29 @@
  * Version Information
  */
 #define DRIVER_VERSION "v0.09"
-#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver"
+#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver"
 
 /*
  * Function Prototypes
  */
-static int cp2101_open(struct tty_struct *, struct usb_serial_port *,
+static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
 							struct file *);
-static void cp2101_cleanup(struct usb_serial_port *);
-static void cp2101_close(struct usb_serial_port *);
-static void cp2101_get_termios(struct tty_struct *,
+static void cp210x_cleanup(struct usb_serial_port *);
+static void cp210x_close(struct usb_serial_port *);
+static void cp210x_get_termios(struct tty_struct *,
 	struct usb_serial_port *port);
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
 	unsigned int *cflagp, unsigned int *baudp);
-static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *,
+static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
 							struct ktermios*);
-static int cp2101_tiocmget(struct tty_struct *, struct file *);
-static int cp2101_tiocmset(struct tty_struct *, struct file *,
+static int cp210x_tiocmget(struct tty_struct *, struct file *);
+static int cp210x_tiocmset(struct tty_struct *, struct file *,
 		unsigned int, unsigned int);
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *,
 		unsigned int, unsigned int);
-static void cp2101_break_ctl(struct tty_struct *, int);
-static int cp2101_startup(struct usb_serial *);
-static void cp2101_shutdown(struct usb_serial *);
+static void cp210x_break_ctl(struct tty_struct *, int);
+static int cp210x_startup(struct usb_serial *);
+static void cp210x_shutdown(struct usb_serial *);
 
 static int debug;
 
@@ -114,30 +114,30 @@ static struct usb_device_id id_table [] = {
 
 MODULE_DEVICE_TABLE(usb, id_table);
 
-static struct usb_driver cp2101_driver = {
-	.name		= "cp2101",
+static struct usb_driver cp210x_driver = {
+	.name		= "cp210x",
 	.probe		= usb_serial_probe,
 	.disconnect	= usb_serial_disconnect,
 	.id_table	= id_table,
 	.no_dynamic_id	= 	1,
 };
 
-static struct usb_serial_driver cp2101_device = {
+static struct usb_serial_driver cp210x_device = {
 	.driver = {
 		.owner =	THIS_MODULE,
-		.name = 	"cp2101",
+		.name = 	"cp210x",
 	},
-	.usb_driver		= &cp2101_driver,
+	.usb_driver		= &cp210x_driver,
 	.id_table		= id_table,
 	.num_ports		= 1,
-	.open			= cp2101_open,
-	.close			= cp2101_close,
-	.break_ctl		= cp2101_break_ctl,
-	.set_termios		= cp2101_set_termios,
-	.tiocmget 		= cp2101_tiocmget,
-	.tiocmset		= cp2101_tiocmset,
-	.attach			= cp2101_startup,
-	.shutdown		= cp2101_shutdown,
+	.open			= cp210x_open,
+	.close			= cp210x_close,
+	.break_ctl		= cp210x_break_ctl,
+	.set_termios		= cp210x_set_termios,
+	.tiocmget 		= cp210x_tiocmget,
+	.tiocmset		= cp210x_tiocmset,
+	.attach			= cp210x_startup,
+	.shutdown		= cp210x_shutdown,
 };
 
 /* Config request types */
@@ -212,13 +212,13 @@ static struct usb_serial_driver cp2101_device = {
 #define CONTROL_WRITE_RTS	0x0200
 
 /*
- * cp2101_get_config
- * Reads from the CP2101 configuration registers
+ * cp210x_get_config
+ * Reads from the CP210x configuration registers
  * 'size' is specified in bytes.
  * 'data' is a pointer to a pre-allocated array of integers large
  * enough to hold 'size' bytes (with 4 bytes to each integer)
  */
-static int cp2101_get_config(struct usb_serial_port *port, u8 request,
+static int cp210x_get_config(struct usb_serial_port *port, u8 request,
 		unsigned int *data, int size)
 {
 	struct usb_serial *serial = port->serial;
@@ -256,12 +256,12 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request,
 }
 
 /*
- * cp2101_set_config
- * Writes to the CP2101 configuration registers
+ * cp210x_set_config
+ * Writes to the CP210x configuration registers
  * Values less than 16 bits wide are sent directly
  * 'size' is specified in bytes.
  */
-static int cp2101_set_config(struct usb_serial_port *port, u8 request,
+static int cp210x_set_config(struct usb_serial_port *port, u8 request,
 		unsigned int *data, int size)
 {
 	struct usb_serial *serial = port->serial;
@@ -312,21 +312,21 @@ static int cp2101_set_config(struct usb_serial_port *port, u8 request,
 }
 
 /*
- * cp2101_set_config_single
- * Convenience function for calling cp2101_set_config on single data values
+ * cp210x_set_config_single
+ * Convenience function for calling cp210x_set_config on single data values
  * without requiring an integer pointer
  */
-static inline int cp2101_set_config_single(struct usb_serial_port *port,
+static inline int cp210x_set_config_single(struct usb_serial_port *port,
 		u8 request, unsigned int data)
 {
-	return cp2101_set_config(port, request, &data, 2);
+	return cp210x_set_config(port, request, &data, 2);
 }
 
 /*
- * cp2101_quantise_baudrate
+ * cp210x_quantise_baudrate
  * Quantises the baud rate as per AN205 Table 1
  */
-static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
+static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
 	if      (baud <= 56)       baud = 0;
 	else if (baud <= 300)      baud = 300;
 	else if (baud <= 600)      baud = 600;
@@ -363,7 +363,7 @@ static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
 	return baud;
 }
 
-static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
 {
 	struct usb_serial *serial = port->serial;
@@ -371,7 +371,7 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	if (cp2101_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
+	if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
 		dev_err(&port->dev, "%s - Unable to enable UART\n",
 				__func__);
 		return -EPROTO;
@@ -393,17 +393,17 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
 	}
 
 	/* Configure the termios structure */
-	cp2101_get_termios(tty, port);
+	cp210x_get_termios(tty, port);
 
 	/* Set the DTR and RTS pins low */
-	cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
+	cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
 			: port,
 		NULL, TIOCM_DTR | TIOCM_RTS, 0);
 
 	return 0;
 }
 
-static void cp2101_cleanup(struct usb_serial_port *port)
+static void cp210x_cleanup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
@@ -418,7 +418,7 @@ static void cp2101_cleanup(struct usb_serial_port *port)
 	}
 }
 
-static void cp2101_close(struct usb_serial_port *port)
+static void cp210x_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
@@ -429,23 +429,23 @@ static void cp2101_close(struct usb_serial_port *port)
 
 	mutex_lock(&port->serial->disc_mutex);
 	if (!port->serial->disconnected)
-		cp2101_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
+		cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
 	mutex_unlock(&port->serial->disc_mutex);
 }
 
 /*
- * cp2101_get_termios
+ * cp210x_get_termios
  * Reads the baud rate, data bits, parity, stop bits and flow control mode
  * from the device, corrects any unsupported values, and configures the
  * termios structure to reflect the state of the device
  */
-static void cp2101_get_termios(struct tty_struct *tty,
+static void cp210x_get_termios(struct tty_struct *tty,
 	struct usb_serial_port *port)
 {
 	unsigned int baud;
 
 	if (tty) {
-		cp2101_get_termios_port(tty->driver_data,
+		cp210x_get_termios_port(tty->driver_data,
 			&tty->termios->c_cflag, &baud);
 		tty_encode_baud_rate(tty, baud, baud);
 	}
@@ -453,15 +453,15 @@ static void cp2101_get_termios(struct tty_struct *tty,
 	else {
 		unsigned int cflag;
 		cflag = 0;
-		cp2101_get_termios_port(port, &cflag, &baud);
+		cp210x_get_termios_port(port, &cflag, &baud);
 	}
 }
 
 /*
- * cp2101_get_termios_port
- * This is the heart of cp2101_get_termios which always uses a &usb_serial_port.
+ * cp210x_get_termios_port
+ * This is the heart of cp210x_get_termios which always uses a &usb_serial_port.
  */
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
 	unsigned int *cflagp, unsigned int *baudp)
 {
 	unsigned int cflag, modem_ctl[4];
@@ -470,17 +470,17 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
+	cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
 	/* Convert to baudrate */
 	if (baud)
-		baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
+		baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
 
 	dbg("%s - baud rate = %d", __func__, baud);
 	*baudp = baud;
 
 	cflag = *cflagp;
 
-	cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
+	cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 	cflag &= ~CSIZE;
 	switch (bits & BITS_DATA_MASK) {
 	case BITS_DATA_5:
@@ -505,14 +505,14 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown number of data bits, using 8", __func__);
 		cflag |= CS8;
 		bits &= ~BITS_DATA_MASK;
 		bits |= BITS_DATA_8;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -535,20 +535,20 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_PARITY_SPACE:
 		dbg("%s - parity = SPACE (not supported, disabling parity)",
 				__func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	default:
 		dbg("%s - Unknown parity mode, disabling parity", __func__);
 		cflag &= ~PARENB;
 		bits &= ~BITS_PARITY_MASK;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
@@ -561,7 +561,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 		dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	case BITS_STOP_2:
 		dbg("%s - stop bits = 2", __func__);
@@ -571,11 +571,11 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 		dbg("%s - Unknown number of stop bits, using 1 stop bit",
 								__func__);
 		bits &= ~BITS_STOP_MASK;
-		cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
+		cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
 		break;
 	}
 
-	cp2101_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
+	cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 	if (modem_ctl[0] & 0x0008) {
 		dbg("%s - flow control = CRTSCTS", __func__);
 		cflag |= CRTSCTS;
@@ -587,7 +587,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 	*cflagp = cflag;
 }
 
-static void cp2101_set_termios(struct tty_struct *tty,
+static void cp210x_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
 	unsigned int cflag, old_cflag;
@@ -602,13 +602,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
 	tty->termios->c_cflag &= ~CMSPAR;
 	cflag = tty->termios->c_cflag;
 	old_cflag = old_termios->c_cflag;
-	baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty));
+	baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty));
 
 	/* If the baud rate is to be updated*/
 	if (baud != tty_termios_baud_rate(old_termios) && baud != 0) {
 		dbg("%s - Setting baud rate to %d baud", __func__,
 				baud);
-		if (cp2101_set_config_single(port, CP210X_SET_BAUDDIV,
+		if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV,
 					((BAUD_RATE_GEN_FREQ + baud/2) / baud))) {
 			dbg("Baud rate requested not supported by device\n");
 			baud = tty_termios_baud_rate(old_termios);
@@ -619,7 +619,7 @@ static void cp2101_set_termios(struct tty_struct *tty,
 
 	/* If the number of data bits is to be updated */
 	if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
-		cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_DATA_MASK;
 		switch (cflag & CSIZE) {
 		case CS5:
@@ -643,19 +643,19 @@ static void cp2101_set_termios(struct tty_struct *tty,
 			dbg("%s - data bits = 9", __func__);
 			break;*/
 		default:
-			dbg("cp2101 driver does not "
+			dbg("cp210x driver does not "
 					"support the number of bits requested,"
 					" using 8 bit mode\n");
 				bits |= BITS_DATA_8;
 				break;
 		}
-		if (cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of data bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) {
-		cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_PARITY_MASK;
 		if (cflag & PARENB) {
 			if (cflag & PARODD) {
@@ -666,13 +666,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
 				dbg("%s - parity = EVEN", __func__);
 			}
 		}
-		if (cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Parity mode not supported "
 					"by device\n");
 	}
 
 	if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) {
-		cp2101_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
+		cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
 		bits &= ~BITS_STOP_MASK;
 		if (cflag & CSTOPB) {
 			bits |= BITS_STOP_2;
@@ -681,13 +681,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
 			bits |= BITS_STOP_1;
 			dbg("%s - stop bits = 1", __func__);
 		}
-		if (cp2101_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
+		if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
 			dbg("Number of stop bits requested "
 					"not supported by device\n");
 	}
 
 	if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
-		cp2101_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
+		cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
 		dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
@@ -707,19 +707,19 @@ static void cp2101_set_termios(struct tty_struct *tty,
 		dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
 				__func__, modem_ctl[0], modem_ctl[1],
 				modem_ctl[2], modem_ctl[3]);
-		cp2101_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
+		cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
 	}
 
 }
 
-static int cp2101_tiocmset (struct tty_struct *tty, struct file *file,
+static int cp210x_tiocmset (struct tty_struct *tty, struct file *file,
 		unsigned int set, unsigned int clear)
 {
 	struct usb_serial_port *port = tty->driver_data;
-	return cp2101_tiocmset_port(port, file, set, clear);
+	return cp210x_tiocmset_port(port, file, set, clear);
 }
 
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file,
 		unsigned int set, unsigned int clear)
 {
 	unsigned int control = 0;
@@ -745,10 +745,10 @@ static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
 
 	dbg("%s - control = 0x%.4x", __func__, control);
 
-	return cp2101_set_config(port, CP210X_SET_MHS, &control, 2);
+	return cp210x_set_config(port, CP210X_SET_MHS, &control, 2);
 }
 
-static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
+static int cp210x_tiocmget (struct tty_struct *tty, struct file *file)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned int control;
@@ -756,7 +756,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	cp2101_get_config(port, CP210X_GET_MDMSTS, &control, 1);
+	cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1);
 
 	result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
 		|((control & CONTROL_RTS) ? TIOCM_RTS : 0)
@@ -770,7 +770,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
 	return result;
 }
 
-static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
+static void cp210x_break_ctl (struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned int state;
@@ -782,17 +782,17 @@ static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
 		state = BREAK_ON;
 	dbg("%s - turning break %s", __func__,
 			state == BREAK_OFF ? "off" : "on");
-	cp2101_set_config(port, CP210X_SET_BREAK, &state, 2);
+	cp210x_set_config(port, CP210X_SET_BREAK, &state, 2);
 }
 
-static int cp2101_startup(struct usb_serial *serial)
+static int cp210x_startup(struct usb_serial *serial)
 {
-	/* CP2101 buffers behave strangely unless device is reset */
+	/* cp210x buffers behave strangely unless device is reset */
 	usb_reset_device(serial->dev);
 	return 0;
 }
 
-static void cp2101_shutdown(struct usb_serial *serial)
+static void cp210x_shutdown(struct usb_serial *serial)
 {
 	int i;
 
@@ -800,21 +800,21 @@ static void cp2101_shutdown(struct usb_serial *serial)
 
 	/* Stop reads and writes on all ports */
 	for (i = 0; i < serial->num_ports; ++i)
-		cp2101_cleanup(serial->port[i]);
+		cp210x_cleanup(serial->port[i]);
 }
 
-static int __init cp2101_init(void)
+static int __init cp210x_init(void)
 {
 	int retval;
 
-	retval = usb_serial_register(&cp2101_device);
+	retval = usb_serial_register(&cp210x_device);
 	if (retval)
 		return retval; /* Failed to register */
 
-	retval = usb_register(&cp2101_driver);
+	retval = usb_register(&cp210x_driver);
 	if (retval) {
 		/* Failed to register */
-		usb_serial_deregister(&cp2101_device);
+		usb_serial_deregister(&cp210x_device);
 		return retval;
 	}
 
@@ -824,14 +824,14 @@ static int __init cp2101_init(void)
 	return 0;
 }
 
-static void __exit cp2101_exit(void)
+static void __exit cp210x_exit(void)
 {
-	usb_deregister(&cp2101_driver);
-	usb_serial_deregister(&cp2101_device);
+	usb_deregister(&cp210x_driver);
+	usb_serial_deregister(&cp210x_device);
 }
 
-module_init(cp2101_init);
-module_exit(cp2101_exit);
+module_init(cp210x_init);
+module_exit(cp210x_exit);
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_VERSION(DRIVER_VERSION);
-- 
cgit v0.10.2


From 557aaa7ffab639d0190b935a041b16ae44606342 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:55:34 +0100
Subject: ft232: support the ASYNC_LOW_LATENCY flag

This allows users to use the standard setserial command with this FT232
feature as well as obscure chip specific interfaces we have now. We keep
track of and respect the sysfs value for non-low-latency cases. In theory we
could do smart stuff with VTIME and the like but this seems of questionable
worth.

Closes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=9120
Signed-off-by: Alan Cox <alan@linux.intel.com)
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d9d8711..21c053c 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -89,6 +89,7 @@ struct ftdi_private {
 	int force_rtscts;	/* if non-zero, force RTS-CTS to always
 				   be enabled */
 
+	unsigned int latency;		/* latency setting in use */
 	spinlock_t tx_lock;	/* spinlock for transmit state */
 	unsigned long tx_bytes;
 	unsigned long tx_outstanding_bytes;
@@ -1038,7 +1039,54 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port)
 	return rv;
 }
 
+static int write_latency_timer(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_device *udev = port->serial->dev;
+	char buf[1];
+	int rv = 0;
+	int l = priv->latency;
+
+	if (priv->flags & ASYNC_LOW_LATENCY)
+		l = 1;
+
+	dbg("%s: setting latency timer = %i", __func__, l);
+
+	rv = usb_control_msg(udev,
+			     usb_sndctrlpipe(udev, 0),
+			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
+			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
+			     l, priv->interface,
+			     buf, 0, WDR_TIMEOUT);
 
+	if (rv < 0)
+		dev_err(&port->dev, "Unable to write latency timer: %i\n", rv);
+	return rv;
+}
+
+static int read_latency_timer(struct usb_serial_port *port)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	struct usb_device *udev = port->serial->dev;
+	unsigned short latency = 0;
+	int rv = 0;
+
+
+	dbg("%s", __func__);
+
+	rv = usb_control_msg(udev,
+			     usb_rcvctrlpipe(udev, 0),
+			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
+			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
+			     0, priv->interface,
+			     (char *) &latency, 1, WDR_TIMEOUT);
+
+	if (rv < 0) {
+		dev_err(&port->dev, "Unable to read latency timer: %i\n", rv);
+		return -EIO;
+	}
+	return latency;
+}
 
 static int get_serial_info(struct usb_serial_port *port,
 				struct serial_struct __user *retinfo)
@@ -1098,6 +1146,7 @@ static int set_serial_info(struct tty_struct *tty,
 	priv->custom_divisor = new_serial.custom_divisor;
 
 	tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	write_latency_timer(port);
 
 check_and_exit:
 	if ((old_priv.flags & ASYNC_SPD_MASK) !=
@@ -1193,27 +1242,13 @@ static ssize_t show_latency_timer(struct device *dev,
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	struct usb_device *udev = port->serial->dev;
-	unsigned short latency = 0;
-	int rv = 0;
-
-
-	dbg("%s", __func__);
-
-	rv = usb_control_msg(udev,
-			     usb_rcvctrlpipe(udev, 0),
-			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
-			     FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
-			     0, priv->interface,
-			     (char *) &latency, 1, WDR_TIMEOUT);
-
-	if (rv < 0) {
-		dev_err(dev, "Unable to read latency timer: %i\n", rv);
-		return -EIO;
-	}
-	return sprintf(buf, "%i\n", latency);
+	if (priv->flags & ASYNC_LOW_LATENCY)
+		return sprintf(buf, "1\n");
+	else
+		return sprintf(buf, "%i\n", priv->latency);
 }
 
+
 /* Write a new value of the latency timer, in units of milliseconds. */
 static ssize_t store_latency_timer(struct device *dev,
 			struct device_attribute *attr, const char *valbuf,
@@ -1221,25 +1256,13 @@ static ssize_t store_latency_timer(struct device *dev,
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	struct usb_device *udev = port->serial->dev;
-	char buf[1];
 	int v = simple_strtoul(valbuf, NULL, 10);
 	int rv = 0;
 
-	dbg("%s: setting latency timer = %i", __func__, v);
-
-	rv = usb_control_msg(udev,
-			     usb_sndctrlpipe(udev, 0),
-			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
-			     FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
-			     v, priv->interface,
-			     buf, 0, WDR_TIMEOUT);
-
-	if (rv < 0) {
-		dev_err(dev, "Unable to write latency timer: %i\n", rv);
+	priv->latency = v;
+	rv = write_latency_timer(port);
+	if (rv < 0)
 		return -EIO;
-	}
-
 	return count;
 }
 
@@ -1393,6 +1416,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
 	usb_set_serial_port_data(port, priv);
 
 	ftdi_determine_type(port);
+	read_latency_timer(port);
 	create_sysfs_attrs(port);
 	return 0;
 }
@@ -1515,6 +1539,8 @@ static int ftdi_open(struct tty_struct *tty,
 	if (tty)
 		tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
+	write_latency_timer(port);
+
 	/* No error checking for this (will get errors later anyway) */
 	/* See ftdi_sio.h for description of what is reset */
 	usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
-- 
cgit v0.10.2


From 7f8d09eae26a8108406583192996561665b36371 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:56:16 +0100
Subject: tty: fix bluetooth scribbling on low latency flags

Bluetooth shouldn't be doing this as most drivers don't support the flag,
furthermore it shouldn't be needed with newer buffering. This becomes rather
more visible as the locking fixes make the abuse of low_latency visible as
spew on the users console/dmesg.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 6880151..4895f0e 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -463,7 +463,6 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file * file,
 				clear_bit(HCI_UART_PROTO_SET, &hu->flags);
 				return err;
 			}
-			tty->low_latency = 1;
 		} else
 			return -EBUSY;
 		break;
-- 
cgit v0.10.2


From 7e9cd3a617414cfe74342659ceeb4e92975c1efa Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 13:57:01 +0100
Subject: ftdi_sio: don't override modem bits

The new open/close logic handles DTR and friends, so don't do it in our own
open routine as well.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 21c053c..683304d 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1556,11 +1556,6 @@ static int ftdi_open(struct tty_struct *tty,
 	if (tty)
 		ftdi_set_termios(tty, port, tty->termios);
 
-	/* FIXME: Flow control might be enabled, so it should be checked -
-	   we have no control of defaults! */
-	/* Turn on RTS and DTR since we are not flow controlling by default */
-	set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-
 	/* Not throttled */
 	spin_lock_irqsave(&priv->rx_lock, flags);
 	priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED);
-- 
cgit v0.10.2


From 0b91421857414f525690ee452c0b0acb6ab1dba3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 11 Jun 2009 14:01:45 +0100
Subject: tty: bfin_jtag_comm: emulate a TTY over the Blackfin EMUDAT/JTAG
 interface

The Blackfin JTAG interface has a 4 byte generic data field (EMUDAT).  With
a little creative thinking, we can turn this into a TTY device.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 735bbe2..02ecfd5 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -97,6 +97,19 @@ config DEVKMEM
 	  kind of kernel debugging operations.
 	  When in doubt, say "N".
 
+config BFIN_JTAG_COMM
+	tristate "Blackfin JTAG Communication"
+	depends on BLACKFIN
+	help
+	  Add support for emulating a TTY device over the Blackfin JTAG.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called bfin_jtag_comm.
+
+config BFIN_JTAG_COMM_CONSOLE
+	bool "Console on Blackfin JTAG"
+	depends on BFIN_JTAG_COMM=y
+
 config SERIAL_NONSTANDARD
 	bool "Non-standard serial port support"
 	depends on HAS_IOMEM
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 9caf5b5..189efcf 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-y				+= misc.o
 obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_BFIN_JTAG_COMM)	+= bfin_jtag_comm.o
 obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c
new file mode 100644
index 0000000..44c113d
--- /dev/null
+++ b/drivers/char/bfin_jtag_comm.c
@@ -0,0 +1,365 @@
+/*
+ * TTY over Blackfin JTAG Communication
+ *
+ * Copyright 2008-2009 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <asm/atomic.h>
+
+/* See the Debug/Emulation chapter in the HRM */
+#define EMUDOF   0x00000001	/* EMUDAT_OUT full & valid */
+#define EMUDIF   0x00000002	/* EMUDAT_IN full & valid */
+#define EMUDOOVF 0x00000004	/* EMUDAT_OUT overflow */
+#define EMUDIOVF 0x00000008	/* EMUDAT_IN overflow */
+
+#define DRV_NAME "bfin-jtag-comm"
+#define DEV_NAME "ttyBFJC"
+
+#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); })
+#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args)
+
+static inline uint32_t bfin_write_emudat(uint32_t emudat)
+{
+	__asm__ __volatile__("emudat = %0;" : : "d"(emudat));
+	return emudat;
+}
+
+static inline uint32_t bfin_read_emudat(void)
+{
+	uint32_t emudat;
+	__asm__ __volatile__("%0 = emudat;" : "=d"(emudat));
+	return emudat;
+}
+
+static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d)
+{
+	return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24));
+}
+
+#define CIRC_SIZE 2048	/* see comment in tty_io.c:do_tty_write() */
+#define CIRC_MASK (CIRC_SIZE - 1)
+#define circ_empty(circ)     ((circ)->head == (circ)->tail)
+#define circ_free(circ)      CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_cnt(circ)       CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK])
+
+static struct tty_driver *bfin_jc_driver;
+static struct task_struct *bfin_jc_kthread;
+static struct tty_struct * volatile bfin_jc_tty;
+static unsigned long bfin_jc_count;
+static DEFINE_MUTEX(bfin_jc_tty_mutex);
+static volatile struct circ_buf bfin_jc_write_buf;
+
+static int
+bfin_jc_emudat_manager(void *arg)
+{
+	uint32_t inbound_len = 0, outbound_len = 0;
+
+	while (!kthread_should_stop()) {
+		/* no one left to give data to, so sleep */
+		if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) {
+			debug("waiting for readers\n");
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule();
+			__set_current_state(TASK_RUNNING);
+		}
+
+		/* no data available, so just chill */
+		if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) {
+			debug("waiting for data (in_len = %i) (circ: %i %i)\n",
+				inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head);
+			if (inbound_len)
+				schedule();
+			else
+				schedule_timeout_interruptible(HZ);
+			continue;
+		}
+
+		/* if incoming data is ready, eat it */
+		if (bfin_read_DBGSTAT() & EMUDIF) {
+			struct tty_struct *tty;
+			mutex_lock(&bfin_jc_tty_mutex);
+			tty = (struct tty_struct *)bfin_jc_tty;
+			if (tty != NULL) {
+				uint32_t emudat = bfin_read_emudat();
+				if (inbound_len == 0) {
+					debug("incoming length: 0x%08x\n", emudat);
+					inbound_len = emudat;
+				} else {
+					size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
+					debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
+					inbound_len -= num_chars;
+					tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
+					tty_flip_buffer_push(tty);
+				}
+			}
+			mutex_unlock(&bfin_jc_tty_mutex);
+		}
+
+		/* if outgoing data is ready, post it */
+		if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) {
+			if (outbound_len == 0) {
+				outbound_len = circ_cnt(&bfin_jc_write_buf);
+				bfin_write_emudat(outbound_len);
+				debug("outgoing length: 0x%08x\n", outbound_len);
+			} else {
+				struct tty_struct *tty;
+				int tail = bfin_jc_write_buf.tail;
+				size_t ate = (4 <= outbound_len ? 4 : outbound_len);
+				uint32_t emudat =
+				bfin_write_emudat_chars(
+					circ_byte(&bfin_jc_write_buf, tail + 0),
+					circ_byte(&bfin_jc_write_buf, tail + 1),
+					circ_byte(&bfin_jc_write_buf, tail + 2),
+					circ_byte(&bfin_jc_write_buf, tail + 3)
+				);
+				bfin_jc_write_buf.tail += ate;
+				outbound_len -= ate;
+				mutex_lock(&bfin_jc_tty_mutex);
+				tty = (struct tty_struct *)bfin_jc_tty;
+				if (tty)
+					tty_wakeup(tty);
+				mutex_unlock(&bfin_jc_tty_mutex);
+				debug("  outgoing data: 0x%08x (pushing %zu)\n", emudat, ate);
+			}
+		}
+	}
+
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+static int
+bfin_jc_open(struct tty_struct *tty, struct file *filp)
+{
+	mutex_lock(&bfin_jc_tty_mutex);
+	debug("open %lu\n", bfin_jc_count);
+	++bfin_jc_count;
+	bfin_jc_tty = tty;
+	wake_up_process(bfin_jc_kthread);
+	mutex_unlock(&bfin_jc_tty_mutex);
+	return 0;
+}
+
+static void
+bfin_jc_close(struct tty_struct *tty, struct file *filp)
+{
+	mutex_lock(&bfin_jc_tty_mutex);
+	debug("close %lu\n", bfin_jc_count);
+	if (--bfin_jc_count == 0)
+		bfin_jc_tty = NULL;
+	wake_up_process(bfin_jc_kthread);
+	mutex_unlock(&bfin_jc_tty_mutex);
+}
+
+/* XXX: we dont handle the put_char() case where we must handle count = 1 */
+static int
+bfin_jc_circ_write(const unsigned char *buf, int count)
+{
+	int i;
+	count = min(count, circ_free(&bfin_jc_write_buf));
+	debug("going to write chunk of %i bytes\n", count);
+	for (i = 0; i < count; ++i)
+		circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i];
+	bfin_jc_write_buf.head += i;
+	return i;
+}
+
+#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE
+# define acquire_console_sem()
+# define release_console_sem()
+#endif
+static int
+bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+	int i;
+	acquire_console_sem();
+	i = bfin_jc_circ_write(buf, count);
+	release_console_sem();
+	wake_up_process(bfin_jc_kthread);
+	return i;
+}
+
+static void
+bfin_jc_flush_chars(struct tty_struct *tty)
+{
+	wake_up_process(bfin_jc_kthread);
+}
+
+static int
+bfin_jc_write_room(struct tty_struct *tty)
+{
+	return circ_free(&bfin_jc_write_buf);
+}
+
+static int
+bfin_jc_chars_in_buffer(struct tty_struct *tty)
+{
+	return circ_cnt(&bfin_jc_write_buf);
+}
+
+static void
+bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	unsigned long expire = jiffies + timeout;
+	while (!circ_empty(&bfin_jc_write_buf)) {
+		if (signal_pending(current))
+			break;
+		if (time_after(jiffies, expire))
+			break;
+	}
+}
+
+static struct tty_operations bfin_jc_ops = {
+	.open            = bfin_jc_open,
+	.close           = bfin_jc_close,
+	.write           = bfin_jc_write,
+	/*.put_char        = bfin_jc_put_char,*/
+	.flush_chars     = bfin_jc_flush_chars,
+	.write_room      = bfin_jc_write_room,
+	.chars_in_buffer = bfin_jc_chars_in_buffer,
+	.wait_until_sent = bfin_jc_wait_until_sent,
+};
+
+static int __init bfin_jc_init(void)
+{
+	int ret;
+
+	bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME);
+	if (IS_ERR(bfin_jc_kthread))
+		return PTR_ERR(bfin_jc_kthread);
+
+	ret = -ENOMEM;
+
+	bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0;
+	bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL);
+	if (!bfin_jc_write_buf.buf)
+		goto err;
+
+	bfin_jc_driver = alloc_tty_driver(1);
+	if (!bfin_jc_driver)
+		goto err;
+
+	bfin_jc_driver->owner        = THIS_MODULE;
+	bfin_jc_driver->driver_name  = DRV_NAME;
+	bfin_jc_driver->name         = DEV_NAME;
+	bfin_jc_driver->type         = TTY_DRIVER_TYPE_SERIAL;
+	bfin_jc_driver->subtype      = SERIAL_TYPE_NORMAL;
+	bfin_jc_driver->init_termios = tty_std_termios;
+	tty_set_operations(bfin_jc_driver, &bfin_jc_ops);
+
+	ret = tty_register_driver(bfin_jc_driver);
+	if (ret)
+		goto err;
+
+	pr_init(KERN_INFO DRV_NAME ": initialized\n");
+
+	return 0;
+
+ err:
+	put_tty_driver(bfin_jc_driver);
+	kfree(bfin_jc_write_buf.buf);
+	kthread_stop(bfin_jc_kthread);
+	return ret;
+}
+module_init(bfin_jc_init);
+
+static void __exit bfin_jc_exit(void)
+{
+	kthread_stop(bfin_jc_kthread);
+	kfree(bfin_jc_write_buf.buf);
+	tty_unregister_driver(bfin_jc_driver);
+	put_tty_driver(bfin_jc_driver);
+}
+module_exit(bfin_jc_exit);
+
+#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
+static void
+bfin_jc_straight_buffer_write(const char *buf, unsigned count)
+{
+	unsigned ate = 0;
+	while (bfin_read_DBGSTAT() & EMUDOF)
+		continue;
+	bfin_write_emudat(count);
+	while (ate < count) {
+		while (bfin_read_DBGSTAT() & EMUDOF)
+			continue;
+		bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]);
+		ate += 4;
+	}
+}
+#endif
+
+#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE
+static void
+bfin_jc_console_write(struct console *co, const char *buf, unsigned count)
+{
+	if (bfin_jc_kthread == NULL)
+		bfin_jc_straight_buffer_write(buf, count);
+	else
+		bfin_jc_circ_write(buf, count);
+}
+
+static struct tty_driver *
+bfin_jc_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+	return bfin_jc_driver;
+}
+
+static struct console bfin_jc_console = {
+	.name    = DEV_NAME,
+	.write   = bfin_jc_console_write,
+	.device  = bfin_jc_console_device,
+	.flags   = CON_ANYTIME | CON_PRINTBUFFER,
+	.index   = -1,
+};
+
+static int __init bfin_jc_console_init(void)
+{
+	register_console(&bfin_jc_console);
+	return 0;
+}
+console_initcall(bfin_jc_console_init);
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+static void __init
+bfin_jc_early_write(struct console *co, const char *buf, unsigned int count)
+{
+	bfin_jc_straight_buffer_write(buf, count);
+}
+
+static struct __initdata console bfin_jc_early_console = {
+	.name   = "early_BFJC",
+	.write   = bfin_jc_early_write,
+	.flags   = CON_ANYTIME | CON_PRINTBUFFER,
+	.index   = -1,
+};
+
+struct console * __init
+bfin_jc_early_init(unsigned int port, unsigned int cflag)
+{
+	return &bfin_jc_early_console;
+}
+#endif
+
+MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
+MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 26a2e20f4a966bebc312836aeb4423fbfd4e33e2 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 14:03:13 +0100
Subject: tty: Untangle termios and mm mutex dependencies

Although this doesn't cause any problems it could potentially do so for
future mmap using devices. No real work is needed to sort it out so untangle
it before it causes problems

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 2401dbc..8116bb1c 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -626,9 +626,25 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt)
 	return 0;
 }
 
+static void copy_termios(struct tty_struct *tty, struct ktermios *kterm)
+{
+	mutex_lock(&tty->termios_mutex);
+	memcpy(kterm, tty->termios, sizeof(struct ktermios));
+	mutex_unlock(&tty->termios_mutex);
+}
+
+static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm)
+{
+	mutex_lock(&tty->termios_mutex);
+	memcpy(kterm, tty->termios_locked, sizeof(struct ktermios));
+	mutex_unlock(&tty->termios_mutex);
+}
+
 static int get_termio(struct tty_struct *tty, struct termio __user *termio)
 {
-	if (kernel_termios_to_user_termio(termio, tty->termios))
+	struct ktermios kterm;
+	copy_termios(tty, &kterm);
+	if (kernel_termios_to_user_termio(termio, &kterm))
 		return -EFAULT;
 	return 0;
 }
@@ -930,6 +946,8 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int ret = 0;
+	struct ktermios kterm;
+	struct termiox ktermx;
 
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -965,23 +983,20 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 		return set_termios(real_tty, p, TERMIOS_OLD);
 #ifndef TCGETS2
 	case TCGETS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 #else
 	case TCGETS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TCGETS2:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios))
+		copy_termios(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TCSETSF2:
 		return set_termios(real_tty, p,  TERMIOS_FLUSH | TERMIOS_WAIT);
@@ -1000,34 +1015,36 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 		return set_termios(real_tty, p, TERMIOS_TERMIO);
 #ifndef TCGETS2
 	case TIOCGLCKTRMIOS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked))
+		copy_termios_locked(real_tty, &kterm);
+		if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSLCKTRMIOS:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		mutex_lock(&real_tty->termios_mutex);
-		if (user_termios_to_kernel_termios(real_tty->termios_locked,
+		copy_termios_locked(real_tty, &kterm);
+		if (user_termios_to_kernel_termios(&kterm,
 					       (struct termios __user *) arg))
-			ret = -EFAULT;
+			return -EFAULT;
+		mutex_lock(&real_tty->termios_mutex);
+		memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
 		mutex_unlock(&real_tty->termios_mutex);
-		return ret;
+		return 0;
 #else
 	case TIOCGLCKTRMIOS:
-		mutex_lock(&real_tty->termios_mutex);
-		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked))
+		copy_termios_locked(real_tty, &kterm);
+		if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
 			ret = -EFAULT;
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSLCKTRMIOS:
 		if (!capable(CAP_SYS_ADMIN))
-			ret = -EPERM;
-		mutex_lock(&real_tty->termios_mutex);
-		if (user_termios_to_kernel_termios_1(real_tty->termios_locked,
+			return -EPERM;
+		copy_termios_locked(real_tty, &kterm);
+		if (user_termios_to_kernel_termios_1(&kterm,
 					       (struct termios __user *) arg))
-			ret = -EFAULT;
+			return -EFAULT;
+		mutex_lock(&real_tty->termios_mutex);
+		memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
 		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 #endif
@@ -1036,9 +1053,10 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 		if (real_tty->termiox == NULL)
 			return -EINVAL;
 		mutex_lock(&real_tty->termios_mutex);
-		if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox)))
-			ret = -EFAULT;
+		memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox));
 		mutex_unlock(&real_tty->termios_mutex);
+		if (copy_to_user(p, &ktermx, sizeof(struct termiox)))
+			ret = -EFAULT;
 		return ret;
 	case TCSETX:
 		return set_termiox(real_tty, p, 0);
@@ -1048,10 +1066,9 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 		return set_termiox(real_tty, p, TERMIOS_FLUSH);
 #endif		
 	case TIOCGSOFTCAR:
-		mutex_lock(&real_tty->termios_mutex);
-		ret = put_user(C_CLOCAL(real_tty) ? 1 : 0,
+		copy_termios(real_tty, &kterm);
+		ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0,
 						(int __user *)arg);
-		mutex_unlock(&real_tty->termios_mutex);
 		return ret;
 	case TIOCSSOFTCAR:
 		if (get_user(arg, (unsigned int __user *) arg))
-- 
cgit v0.10.2


From 93d5581e20600593ec3236921b6620225fb76034 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 14:03:55 +0100
Subject: devpts: unregister the file system on error

Closes-bug: http://bugzilla.kernel.org/show_bug.cgi?id=13429

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c68edb9..9b1d285 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -557,8 +557,10 @@ static int __init init_devpts_fs(void)
 	int err = register_filesystem(&devpts_fs_type);
 	if (!err) {
 		devpts_mnt = kern_mount(&devpts_fs_type);
-		if (IS_ERR(devpts_mnt))
+		if (IS_ERR(devpts_mnt)) {
 			err = PTR_ERR(devpts_mnt);
+			unregister_filesystem(&devpts_fs_type);
+		}
 	}
 	return err;
 }
-- 
cgit v0.10.2


From e960bf73ddd73714bcfbadb1717e53ecda9924cb Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 14:04:57 +0100
Subject: tty: Add URL for ttydev queue

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 1979167..84285b5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -71,7 +71,7 @@ P: Person
 M: Mail patches to
 L: Mailing list that is relevant to this area
 W: Web-page with status/info
-T: SCM tree type and location.  Type is one of: git, hg, quilt.
+T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 S: Status, one of the following:
 
 	Supported:	Someone is actually paid to look after this.
@@ -5630,6 +5630,7 @@ P:	Alan Cox
 M:	alan@lxorguk.ukuu.org.uk
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
+T:	stgit http://zeniv.linux.org.uk/~alan/ttydev/
 
 TULIP NETWORK DRIVERS
 P:	Grant Grundler
-- 
cgit v0.10.2


From 34aec591847c696339189b070cce2a11f901cfea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20R=C3=B6jfors?=
 <richard.rojfors.ext@mocean-labs.com>
Date: Thu, 11 Jun 2009 14:05:39 +0100
Subject: serial: Added Timberdale UART driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Driver for the UART found in the Timberdale FPGA

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 343e3a3..3391ef0 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1433,4 +1433,11 @@ config SPORT_BAUD_RATE
 	default 19200 if (SERIAL_SPORT_BAUD_RATE_19200)
 	default 9600 if (SERIAL_SPORT_BAUD_RATE_9600)
 
+config SERIAL_TIMBERDALE
+	tristate "Support for timberdale UART"
+	depends on MFD_TIMBERDALE
+	select SERIAL_CORE
+	---help---
+	Add support for UART controller on timberdale.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d438eb2..45a8658 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
+obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
new file mode 100644
index 0000000..30ba3c4
--- /dev/null
+++ b/drivers/serial/timbuart.c
@@ -0,0 +1,520 @@
+/*
+ * timbuart.c timberdale FPGA UART driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/serial_core.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+
+#include "timbuart.h"
+
+struct timbuart_port {
+	struct uart_port	port;
+	struct tasklet_struct	tasklet;
+	int			usedma;
+	u8			last_ier;
+	struct platform_device  *dev;
+};
+
+static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800,
+	921600, 1843200, 3250000};
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier);
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid);
+
+static void timbuart_stop_rx(struct uart_port *port)
+{
+	/* spin lock held by upper layer, disable all RX interrupts */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_stop_tx(struct uart_port *port)
+{
+	/* spinlock held by upper layer, disable TX interrupt */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_start_tx(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	/* do not transfer anything here -> fire off the tasklet */
+	tasklet_schedule(&uart->tasklet);
+}
+
+static void timbuart_flush_buffer(struct uart_port *port)
+{
+	u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX;
+
+	iowrite8(ctl, port->membase + TIMBUART_CTRL);
+	iowrite8(TXBF, port->membase + TIMBUART_ISR);
+}
+
+static void timbuart_rx_chars(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+
+	while (ioread8(port->membase + TIMBUART_ISR) & RXDP) {
+		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
+		port->icount.rx++;
+		tty_insert_flip_char(tty, ch, TTY_NORMAL);
+	}
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(port->info->port.tty);
+	spin_lock(&port->lock);
+
+	dev_dbg(port->dev, "%s - total read %d bytes\n",
+		__func__, port->icount.rx);
+}
+
+static void timbuart_tx_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->info->xmit;
+
+	while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) &&
+		!uart_circ_empty(xmit)) {
+		iowrite8(xmit->buf[xmit->tail],
+			port->membase + TIMBUART_TXFIFO);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	dev_dbg(port->dev,
+		"%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n",
+		 __func__,
+		port->icount.tx,
+		ioread8(port->membase + TIMBUART_CTRL),
+		port->mctrl & TIOCM_RTS,
+		ioread8(port->membase + TIMBUART_BAUDRATE));
+}
+
+static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	struct circ_buf *xmit = &port->info->xmit;
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
+		return;
+
+	if (port->x_char)
+		return;
+
+	if (isr & TXFLAGS) {
+		timbuart_tx_chars(port);
+		/* clear all TX interrupts */
+		iowrite8(TXFLAGS, port->membase + TIMBUART_ISR);
+
+		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+			uart_write_wakeup(port);
+	} else
+		/* Re-enable any tx interrupt */
+		*ier |= uart->last_ier & TXFLAGS;
+
+	/* enable interrupts if there are chars in the transmit buffer,
+	 * Or if we delivered some bytes and want the almost empty interrupt
+	 * we wake up the upper layer later when we got the interrupt
+	 * to give it some time to go out...
+	 */
+	if (!uart_circ_empty(xmit))
+		*ier |= TXBAE;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	if (isr & RXFLAGS) {
+		/* Some RX status is set */
+		if (isr & RXBF) {
+			u8 ctl = ioread8(port->membase + TIMBUART_CTRL) |
+				TIMBUART_CTRL_FLSHRX;
+			iowrite8(ctl, port->membase + TIMBUART_CTRL);
+			port->icount.overrun++;
+		} else if (isr & (RXDP))
+			timbuart_rx_chars(port);
+
+		/* ack all RX interrupts */
+		iowrite8(RXFLAGS, port->membase + TIMBUART_ISR);
+	}
+
+	/* always have the RX interrupts enabled */
+	*ier |= RXBAF | RXBF | RXTT;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_tasklet(unsigned long arg)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)arg;
+	u8 isr, ier = 0;
+
+	spin_lock(&uart->port.lock);
+
+	isr = ioread8(uart->port.membase + TIMBUART_ISR);
+	dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr);
+
+	if (!uart->usedma)
+		timbuart_handle_tx_port(&uart->port, isr, &ier);
+
+	timbuart_mctrl_check(&uart->port, isr, &ier);
+
+	if (!uart->usedma)
+		timbuart_handle_rx_port(&uart->port, isr, &ier);
+
+	iowrite8(ier, uart->port.membase + TIMBUART_IER);
+
+	spin_unlock(&uart->port.lock);
+	dev_dbg(uart->port.dev, "%s leaving\n", __func__);
+}
+
+static unsigned int timbuart_tx_empty(struct uart_port *port)
+{
+	u8 isr = ioread8(port->membase + TIMBUART_ISR);
+
+	return (isr & TXBAE) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int timbuart_get_mctrl(struct uart_port *port)
+{
+	u8 cts = ioread8(port->membase + TIMBUART_CTRL);
+	dev_dbg(port->dev, "%s - cts %x\n", __func__, cts);
+
+	if (cts & TIMBUART_CTRL_CTS)
+		return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+	else
+		return TIOCM_DSR | TIOCM_CAR;
+}
+
+static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	dev_dbg(port->dev, "%s - %x\n", __func__, mctrl);
+
+	if (mctrl & TIOCM_RTS)
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+	else
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+}
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier)
+{
+	unsigned int cts;
+
+	if (isr & CTS_DELTA) {
+		/* ack */
+		iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR);
+		cts = timbuart_get_mctrl(port);
+		uart_handle_cts_change(port, cts & TIOCM_CTS);
+		wake_up_interruptible(&port->info->delta_msr_wait);
+	}
+
+	*ier |= CTS_DELTA;
+}
+
+static void timbuart_enable_ms(struct uart_port *port)
+{
+	/* N/A */
+}
+
+static void timbuart_break_ctl(struct uart_port *port, int ctl)
+{
+	/* N/A */
+}
+
+static int timbuart_startup(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	dev_dbg(port->dev, "%s\n", __func__);
+
+	iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL);
+	iowrite8(0xff, port->membase + TIMBUART_ISR);
+	/* Enable all but TX interrupts */
+	iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA,
+		port->membase + TIMBUART_IER);
+
+	return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED,
+		"timb-uart", uart);
+}
+
+static void timbuart_shutdown(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	dev_dbg(port->dev, "%s\n", __func__);
+	free_irq(port->irq, uart);
+	iowrite8(0, port->membase + TIMBUART_IER);
+}
+
+static int get_bindex(int baud)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(baudrates); i++)
+		if (baud == baudrates[i])
+			return i;
+
+	return -1;
+}
+
+static void timbuart_set_termios(struct uart_port *port,
+	struct ktermios *termios,
+	struct ktermios *old)
+{
+	unsigned int baud;
+	short bindex;
+	unsigned long flags;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+	bindex = get_bindex(baud);
+	dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
+
+	if (bindex < 0) {
+		printk(KERN_ALERT "timbuart: Unsupported baud rate\n");
+	} else {
+		spin_lock_irqsave(&port->lock, flags);
+		iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
+		uart_update_timeout(port, termios->c_cflag, baud);
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+}
+
+static const char *timbuart_type(struct uart_port *port)
+{
+	return port->type == PORT_UNKNOWN ? "timbuart" : NULL;
+}
+
+/* We do not request/release mappings of the registers here,
+ * currently it's done in the proble function.
+ */
+static void timbuart_release_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (port->flags & UPF_IOREMAP) {
+		iounmap(port->membase);
+		port->membase = NULL;
+	}
+
+	release_mem_region(port->mapbase, size);
+}
+
+static int timbuart_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (!request_mem_region(port->mapbase, size, "timb-uart"))
+		return -EBUSY;
+
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = ioremap(port->mapbase, size);
+		if (port->membase == NULL) {
+			release_mem_region(port->mapbase, size);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)devid;
+
+	if (ioread8(uart->port.membase + TIMBUART_IPR)) {
+		uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER);
+
+		/* disable interrupts, the tasklet enables them again */
+		iowrite8(0, uart->port.membase + TIMBUART_IER);
+
+		/* fire off bottom half */
+		tasklet_schedule(&uart->tasklet);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * Configure/autoconfigure the port.
+ */
+static void timbuart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_TIMBUART;
+		timbuart_request_port(port);
+	}
+}
+
+static int timbuart_verify_port(struct uart_port *port,
+	struct serial_struct *ser)
+{
+	/* we don't want the core code to modify any port params */
+	return -EINVAL;
+}
+
+static struct uart_ops timbuart_ops = {
+	.tx_empty = timbuart_tx_empty,
+	.set_mctrl = timbuart_set_mctrl,
+	.get_mctrl = timbuart_get_mctrl,
+	.stop_tx = timbuart_stop_tx,
+	.start_tx = timbuart_start_tx,
+	.flush_buffer = timbuart_flush_buffer,
+	.stop_rx = timbuart_stop_rx,
+	.enable_ms = timbuart_enable_ms,
+	.break_ctl = timbuart_break_ctl,
+	.startup = timbuart_startup,
+	.shutdown = timbuart_shutdown,
+	.set_termios = timbuart_set_termios,
+	.type = timbuart_type,
+	.release_port = timbuart_release_port,
+	.request_port = timbuart_request_port,
+	.config_port = timbuart_config_port,
+	.verify_port = timbuart_verify_port
+};
+
+static struct uart_driver timbuart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "timberdale_uart",
+	.dev_name = "ttyTU",
+	.major = TIMBUART_MAJOR,
+	.minor = TIMBUART_MINOR,
+	.nr = 1
+};
+
+static int timbuart_probe(struct platform_device *dev)
+{
+	int err;
+	struct timbuart_port *uart;
+	struct resource *iomem;
+
+	dev_dbg(&dev->dev, "%s\n", __func__);
+
+	uart = kzalloc(sizeof(*uart), GFP_KERNEL);
+	if (!uart) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+
+	uart->usedma = 0;
+
+	uart->port.uartclk = 3250000 * 16;
+	uart->port.fifosize  = TIMBUART_FIFO_SIZE;
+	uart->port.regshift  = 2;
+	uart->port.iotype  = UPIO_MEM;
+	uart->port.ops = &timbuart_ops;
+	uart->port.irq = 0;
+	uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
+	uart->port.line  = 0;
+	uart->port.dev	= &dev->dev;
+
+	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		err = -ENOMEM;
+		goto err_register;
+	}
+	uart->port.mapbase = iomem->start;
+	uart->port.membase = NULL;
+
+	uart->port.irq = platform_get_irq(dev, 0);
+	if (uart->port.irq < 0) {
+		err = -EINVAL;
+		goto err_register;
+	}
+
+	tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart);
+
+	err = uart_register_driver(&timbuart_driver);
+	if (err)
+		goto err_register;
+
+	err = uart_add_one_port(&timbuart_driver, &uart->port);
+	if (err)
+		goto err_add_port;
+
+	platform_set_drvdata(dev, uart);
+
+	return 0;
+
+err_add_port:
+	uart_unregister_driver(&timbuart_driver);
+err_register:
+	kfree(uart);
+err_mem:
+	printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n",
+		err);
+
+	return err;
+}
+
+static int timbuart_remove(struct platform_device *dev)
+{
+	struct timbuart_port *uart = platform_get_drvdata(dev);
+
+	tasklet_kill(&uart->tasklet);
+	uart_remove_one_port(&timbuart_driver, &uart->port);
+	uart_unregister_driver(&timbuart_driver);
+	kfree(uart);
+
+	return 0;
+}
+
+static struct platform_driver timbuart_platform_driver = {
+	.driver = {
+		.name	= "timb-uart",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= timbuart_probe,
+	.remove		= timbuart_remove,
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __init timbuart_init(void)
+{
+	return platform_driver_register(&timbuart_platform_driver);
+}
+
+static void __exit timbuart_exit(void)
+{
+	platform_driver_unregister(&timbuart_platform_driver);
+}
+
+module_init(timbuart_init);
+module_exit(timbuart_exit);
+
+MODULE_DESCRIPTION("Timberdale UART driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:timb-uart");
+
diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h
new file mode 100644
index 0000000..7e56676
--- /dev/null
+++ b/drivers/serial/timbuart.h
@@ -0,0 +1,58 @@
+/*
+ * timbuart.c timberdale FPGA GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#ifndef _TIMBUART_H
+#define _TIMBUART_H
+
+#define TIMBUART_FIFO_SIZE	2048
+
+#define TIMBUART_RXFIFO		0x08
+#define TIMBUART_TXFIFO		0x0c
+#define TIMBUART_IER		0x10
+#define TIMBUART_IPR		0x14
+#define TIMBUART_ISR		0x18
+#define TIMBUART_CTRL		0x1c
+#define TIMBUART_BAUDRATE	0x20
+
+#define TIMBUART_CTRL_RTS	0x01
+#define TIMBUART_CTRL_CTS	0x02
+#define TIMBUART_CTRL_FLSHTX	0x40
+#define TIMBUART_CTRL_FLSHRX	0x80
+
+#define TXBF		0x01
+#define TXBAE		0x02
+#define CTS_DELTA	0x04
+#define RXDP		0x08
+#define RXBAF		0x10
+#define RXBF		0x20
+#define RXTT		0x40
+#define RXBNAE		0x80
+#define TXBE		0x100
+
+#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE)
+#define TXFLAGS (TXBF | TXBAE)
+
+#define TIMBUART_MAJOR 204
+#define TIMBUART_MINOR 192
+
+#endif /* _TIMBUART_H */
+
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 48766ea..6fd80c4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -168,6 +168,9 @@
 /* MAX3100 */
 #define PORT_MAX3100    86
 
+/* Timberdale UART */
+#define PORT_TIMBUART	87
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v0.10.2


From 7d55deaf50182c47c1e805dc8cc85f2769f0673e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 14:27:13 +0100
Subject: timbuart: Fix the termios logic

The driver only handles speeds but it fails to return the current values
for the hardware features it does not support.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
index 30ba3c4..ac9e5d5 100644
--- a/drivers/serial/timbuart.c
+++ b/drivers/serial/timbuart.c
@@ -278,7 +278,7 @@ static int get_bindex(int baud)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(baudrates); i++)
-		if (baud == baudrates[i])
+		if (baud <= baudrates[i])
 			return i;
 
 	return -1;
@@ -296,14 +296,20 @@ static void timbuart_set_termios(struct uart_port *port,
 	bindex = get_bindex(baud);
 	dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
 
-	if (bindex < 0) {
-		printk(KERN_ALERT "timbuart: Unsupported baud rate\n");
-	} else {
-		spin_lock_irqsave(&port->lock, flags);
-		iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
-		uart_update_timeout(port, termios->c_cflag, baud);
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
+	if (bindex < 0)
+		bindex = 0;
+	baud = baudrates[bindex];
+
+	/* The serial layer calls into this once with old = NULL when setting
+	   up initially */
+	if (old)
+		tty_termios_copy_hw(termios, old);
+	tty_termios_encode_baud_rate(termios, baud, baud);
+
+	spin_lock_irqsave(&port->lock, flags);
+	iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
+	uart_update_timeout(port, termios->c_cflag, baud);
+	spin_unlock_irqrestore(&port->lock, flags);
 }
 
 static const char *timbuart_type(struct uart_port *port)
-- 
cgit v0.10.2


From 00b040deca907a113f5bef67a6cc7a4f65a5ace9 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 14:29:29 +0100
Subject: tty: resolve some sierra breakage

The various merges into the sierra driver inadvertently undid
commit 212b8f0c3f5a2280bfa1d6ab13a6fe98552becaa by Elina Pasheva
<epasheva@sierrawireless.com>. Put it back so the OBEX port works again.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 1319b89..222a623 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -257,8 +257,19 @@ static int sierra_send_setup(struct usb_serial_port *port)
 		val |= 0x02;
 
 	/* If composite device then properly report interface */
-	if (serial->num_ports == 1)
+	if (serial->num_ports == 1) {
 		interface = sierra_calc_interface(serial);
+		/* Control message is sent only to interfaces with
+		 * interrupt_in endpoints
+		 */
+		if (port->interrupt_in_urb) {
+			/* send control message */
+			return usb_control_msg(serial->dev,
+				usb_rcvctrlpipe(serial->dev, 0),
+				0x22, 0x21, val, interface,
+				NULL, 0, USB_CTRL_SET_TIMEOUT);
+		}
+	}
 
 	/* Otherwise the need to do non-composite mapping */
 	else {
@@ -268,11 +279,11 @@ static int sierra_send_setup(struct usb_serial_port *port)
 			interface = 1;
 		else if (port->bulk_out_endpointAddress == 5)
 			interface = 2;
-	}
-	return usb_control_msg(serial->dev,
+		return usb_control_msg(serial->dev,
 			usb_rcvctrlpipe(serial->dev, 0),
 			0x22, 0x21, val, interface,
 			NULL, 0, USB_CTRL_SET_TIMEOUT);
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From b9a44bc19f48fd82b8f411500a9bb0ea4153d23c Mon Sep 17 00:00:00 2001
From: Elina Pasheva <epasheva@sierrawireless.com>
Date: Thu, 11 Jun 2009 14:30:21 +0100
Subject: sierra: driver urb handling improvements

[Folded from eight patches into one as the original set according to the
 author "All of the patches need to be applied to obtain a working product"
 so keeping them split seems unhelpful

 Merge fixes done versus other conflicting changes and moved the
 spin_lock_init from open to setup time -- Alan]

Summary of the changes and code re-organization in this patch:

- The memory for urbs is allocated and urbs are submitted only for the active
  interfaces (instead of pre-allocating these for all interfaces). This will
  save memory especially in the case of using composite devices.
- The code has been re-organized and functionality has been extracted from
  sierra_startup(), sierra_shutdown(), sierra_open(), sierra_close() and added
  in helper functions sierra_release_urb(), sierra_stop_rx_urbs(),
  sierra_submit_rx_urbs() and sierra_setup_urb()

- Added function sierra_release_urb() to free an urb and its transfer
buffer.
- Removed unecessary include file reference and comment.
- Added function sierra_stop_rx_urbs() that takes care of the release of
receive and interrupt urbs. This function is to be called by sierra_close()
whenever an interface is de-activated.
- Added new function sierra_submit_rx_urbs() that handles the submission of
receive urbs and interrupt urbs (if any) during the interface activation.
This function is to be called by sierra_open(). Added a second parameter to
pass the memory allocation (as suggested by Oliver Neukum) so that this
function can be used in post_reset() and resume().
- Added new function sierra_setup_urb() that contains the functionality to
allocate an urb, fill bulk urb using the supplied memory allocation flag
and release urb upon error. Added parameter so that the caller pass the
memory allocation flag for flexibility.
- Moved sierra_close() before sierra_open() to resolve dependencies
introduced by the code reorganization.
- Modified sierra_close() to call sierra_stop_rx_urbs() and
sierra_release_urb() functions added in previous patch.
- Modified sierra_open() to call sierra_setup_urb() and sierra_submit_rx_urbs()
functions; note urbs are allocated and submitted for each activated interface.
- Modified sierra_startup() so that allocation of urbs happens whenever an
interface is activated (urb allocation is moved to sierra_open()).
- Modified sierra_shutdown() so that urbs are freed whenever an interface is
de-activated (urb freeing moved to sierra_close() as shown in previous patch
from the series)
- Removed unecessary data structure from sierra_port_private_data
- Suppress an entry in logs by not re-submitting an urb when usb_submit_urb()
returns -EPERM, as this shows that usb_kill_urb() is running (as suggested by
Oliver Neukum)

Signed-off-by: Elina Pasheva <epasheva@sierrawireless.com>
Signed-off-by: Alan Cox <alan.cox@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 222a623..88ec7bf 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -26,12 +26,10 @@
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
-#include <linux/usb/ch9.h>
 
 #define SWIMS_USB_REQUEST_SetPower	0x00
 #define SWIMS_USB_REQUEST_SetNmea	0x07
 
-/* per port private data */
 #define N_IN_URB	4
 #define N_OUT_URB	4
 #define IN_BUFLEN	4096
@@ -229,7 +227,6 @@ struct sierra_port_private {
 
 	/* Input endpoints and buffers for this port */
 	struct urb *in_urbs[N_IN_URB];
-	char *in_buffer[N_IN_URB];
 
 	/* Settings for the port */
 	int rts_state;	/* Handshaking pins (outputs) */
@@ -334,6 +331,17 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file,
 	return sierra_send_setup(port);
 }
 
+static void sierra_release_urb(struct urb *urb)
+{
+	struct usb_serial_port *port;
+	if (urb) {
+		port =  urb->context;
+		dev_dbg(&port->dev, "%s: %p\n", __func__, urb);
+		kfree(urb->transfer_buffer);
+		usb_free_urb(urb);
+	}
+}
+
 static void sierra_outdat_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
@@ -458,7 +466,7 @@ static void sierra_indat_callback(struct urb *urb)
 				" received", __func__);
 
 		/* Resubmit urb so we continue receiving */
-		if (port->port.count && status != -ESHUTDOWN) {
+		if (port->port.count && status != -ESHUTDOWN && status != -EPERM) {
 			err = usb_submit_urb(urb, GFP_ATOMIC);
 			if (err)
 				dev_err(&port->dev, "resubmit read urb failed."
@@ -550,100 +558,184 @@ static int sierra_write_room(struct tty_struct *tty)
 	return 2048;
 }
 
-static int sierra_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void sierra_stop_rx_urbs(struct usb_serial_port *port)
 {
-	struct sierra_port_private *portdata;
-	struct usb_serial *serial = port->serial;
 	int i;
-	struct urb *urb;
-	int result;
+	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
 
-	portdata = usb_get_serial_port_data(port);
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++)
+		usb_kill_urb(portdata->in_urbs[i]);
 
-	dev_dbg(&port->dev, "%s", __func__);
+	usb_kill_urb(port->interrupt_in_urb);
+}
 
-	/* Set some sane defaults */
-	portdata->rts_state = 1;
-	portdata->dtr_state = 1;
+static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags)
+{
+	int ok_cnt;
+	int err = -EINVAL;
+	int i;
+	struct urb *urb;
+	struct sierra_port_private *portdata = usb_get_serial_port_data(port);
 
-	/* Reset low level data toggle and start reading from endpoints */
-	for (i = 0; i < N_IN_URB; i++) {
+	ok_cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
 		urb = portdata->in_urbs[i];
 		if (!urb)
 			continue;
-		if (urb->dev != serial->dev) {
-			dev_dbg(&port->dev, "%s: dev %p != %p",
-				 __func__, urb->dev, serial->dev);
-			continue;
+		err = usb_submit_urb(urb, mem_flags);
+		if (err) {
+			dev_err(&port->dev, "%s: submit urb failed: %d\n",
+				__func__, err);
+		} else {
+			ok_cnt++;
 		}
+	}
 
-		/*
-		 * make sure endpoint data toggle is synchronized with the
-		 * device
-		 */
-		usb_clear_halt(urb->dev, urb->pipe);
-
-		result = usb_submit_urb(urb, GFP_KERNEL);
-		if (result) {
-			dev_err(&port->dev, "submit urb %d failed (%d) %d\n",
-				i, result, urb->transfer_buffer_length);
+	if (ok_cnt && port->interrupt_in_urb) {
+		err = usb_submit_urb(port->interrupt_in_urb, mem_flags);
+		if (err) {
+			dev_err(&port->dev, "%s: submit intr urb failed: %d\n",
+				__func__, err);
 		}
 	}
 
-	sierra_send_setup(port);
+	if (ok_cnt > 0) /* at least one rx urb submitted */
+		return 0;
+	else
+		return err;
+}
+
+static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint,
+					int dir, void *ctx, int len,
+					gfp_t mem_flags,
+					usb_complete_t callback)
+{
+	struct urb	*urb;
+	u8		*buf;
+
+	if (endpoint == -1)
+		return NULL;
 
-	/* start up the interrupt endpoint if we have one */
-	if (port->interrupt_in_urb) {
-		result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
-		if (result)
-			dev_err(&port->dev, "submit irq_in urb failed %d\n",
-				result);
+	urb = usb_alloc_urb(0, mem_flags);
+	if (urb == NULL) {
+		dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n",
+			__func__, endpoint);
+		return NULL;
 	}
-	return 0;
+
+	buf = kmalloc(len, mem_flags);
+	if (buf) {
+		/* Fill URB using supplied data */
+		usb_fill_bulk_urb(urb, serial->dev,
+			usb_sndbulkpipe(serial->dev, endpoint) | dir,
+			buf, len, callback, ctx);
+
+		/* debug */
+		dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__,
+				dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+	} else {
+		dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__,
+				dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+
+		sierra_release_urb(urb);
+		urb = NULL;
+	}
+
+	return urb;
 }
 
-static void sierra_dtr_rts(struct usb_serial_port *port, int on)
+static void sierra_close(struct usb_serial_port *port)
 {
+	int i;
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 
+	dev_dbg(&port->dev, "%s\n", __func__);
 	portdata = usb_get_serial_port_data(port);
-	portdata->rts_state = on;
-	portdata->dtr_state = on;
+
+	portdata->rts_state = 0;
+	portdata->dtr_state = 0;
 
 	if (serial->dev) {
 		mutex_lock(&serial->disc_mutex);
 		if (!serial->disconnected)
 			sierra_send_setup(port);
 		mutex_unlock(&serial->disc_mutex);
+
+		/* Stop reading urbs */
+		sierra_stop_rx_urbs(port);
+		/* .. and release them */
+		for (i = 0; i < N_IN_URB; i++) {
+			sierra_release_urb(portdata->in_urbs[i]);
+			portdata->in_urbs[i] = NULL;
+		}
 	}
 }
 
-static void sierra_close(struct usb_serial_port *port)
+static int sierra_open(struct tty_struct *tty,
+			struct usb_serial_port *port, struct file *filp)
 {
+	struct sierra_port_private *portdata;
+	struct usb_serial *serial = port->serial;
 	int i;
+	int err;
+	int endpoint;
+	struct urb *urb;
+
+	portdata = usb_get_serial_port_data(port);
+
+	dev_dbg(&port->dev, "%s", __func__);
+
+	/* Set some sane defaults */
+	portdata->rts_state = 1;
+	portdata->dtr_state = 1;
+
+
+	endpoint = port->bulk_in_endpointAddress;
+	for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
+		urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port,
+					IN_BUFLEN, GFP_KERNEL,
+					sierra_indat_callback);
+		portdata->in_urbs[i] = urb;
+	}
+	/* clear halt condition */
+	usb_clear_halt(serial->dev,
+			usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN);
+
+	err = sierra_submit_rx_urbs(port, GFP_KERNEL);
+	if (err) {
+		/* get rid of everything as in close */
+		sierra_close(port);
+		return err;
+	}
+	sierra_send_setup(port);
+
+	return 0;
+}
+
+
+static void sierra_dtr_rts(struct usb_serial_port *port, int on)
+{
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 
-	dev_dbg(&port->dev, "%s", __func__);
 	portdata = usb_get_serial_port_data(port);
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
 
 	if (serial->dev) {
-		/* Stop reading/writing urbs */
-		for (i = 0; i < N_IN_URB; i++)
-			usb_kill_urb(portdata->in_urbs[i]);
+		mutex_lock(&serial->disc_mutex);
+		if (!serial->disconnected)
+			sierra_send_setup(port);
+		mutex_unlock(&serial->disc_mutex);
 	}
-	usb_kill_urb(port->interrupt_in_urb);
 }
 
 static int sierra_startup(struct usb_serial *serial)
 {
 	struct usb_serial_port *port;
 	struct sierra_port_private *portdata;
-	struct urb *urb;
 	int i;
-	int j;
 
 	dev_dbg(&serial->dev->dev, "%s", __func__);
 
@@ -665,34 +757,8 @@ static int sierra_startup(struct usb_serial *serial)
 			return -ENOMEM;
 		}
 		spin_lock_init(&portdata->lock);
-		for (j = 0; j < N_IN_URB; j++) {
-			portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL);
-			if (!portdata->in_buffer[j]) {
-				for (--j; j >= 0; j--)
-					kfree(portdata->in_buffer[j]);
-				kfree(portdata);
-				return -ENOMEM;
-			}
-		}
-
+		/* Set the port private data pointer */
 		usb_set_serial_port_data(port, portdata);
-
-		/* initialize the in urbs */
-		for (j = 0; j < N_IN_URB; ++j) {
-			urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (urb == NULL) {
-				dev_dbg(&port->dev, "%s: alloc for in "
-					"port failed.", __func__);
-				continue;
-			}
-			/* Fill URB using supplied data. */
-			usb_fill_bulk_urb(urb, serial->dev,
-					  usb_rcvbulkpipe(serial->dev,
-						port->bulk_in_endpointAddress),
-					  portdata->in_buffer[j], IN_BUFLEN,
-					  sierra_indat_callback, port);
-			portdata->in_urbs[j] = urb;
-		}
 	}
 
 	return 0;
@@ -700,7 +766,7 @@ static int sierra_startup(struct usb_serial *serial)
 
 static void sierra_shutdown(struct usb_serial *serial)
 {
-	int i, j;
+	int i;
 	struct usb_serial_port *port;
 	struct sierra_port_private *portdata;
 
@@ -713,12 +779,6 @@ static void sierra_shutdown(struct usb_serial *serial)
 		portdata = usb_get_serial_port_data(port);
 		if (!portdata)
 			continue;
-
-		for (j = 0; j < N_IN_URB; j++) {
-			usb_kill_urb(portdata->in_urbs[j]);
-			usb_free_urb(portdata->in_urbs[j]);
-			kfree(portdata->in_buffer[j]);
-		}
 		kfree(portdata);
 		usb_set_serial_port_data(port, NULL);
 	}
-- 
cgit v0.10.2


From 4db2299da213d1ba8cf7f4c0a197ae7ba49db5cb Mon Sep 17 00:00:00 2001
From: Elina Pasheva <epasheva@sierrawireless.com>
Date: Thu, 11 Jun 2009 14:32:01 +0100
Subject: sierra: driver interface blacklisting

Interface blacklisting is necessary for non-serial interfaces that are handled
by a different driver. The interface blacklisting is implemented in sierra
driver per device. Each device in need of a blacklist has a static information
array kept in the driver. This array contains the interface numbers that are
blacklisted. The pointer for each blacklist array and the length
of that blacklist are 'bundled' in data structure sierra_iface_info. A pointer
to this information is set in id_table when the device is added to the id_table.

The following is summary of changes we have made to sierra.c driver in
this patch dealing with interface blacklisting support:
- Added data structure sierra_iface_info and function is_blacklisted()
to support blacklisting
- Modified sierra_probe() to handle blacklisted interfaces accordingly
- Improved comments in id_table
- Added new device in id_table with blacklist interface support

Signed-off-by: Elina Pasheva <epasheva@sierrawireless.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 88ec7bf..17ac34f 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -37,6 +37,12 @@
 static int debug;
 static int nmea;
 
+/* Used in interface blacklisting */
+struct sierra_iface_info {
+	const u32 infolen;	/* number of interface numbers on blacklist */
+	const u8  *ifaceinfo;	/* pointer to the array holding the numbers */
+};
+
 static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
 {
 	int result;
@@ -83,6 +89,23 @@ static int sierra_calc_num_ports(struct usb_serial *serial)
 	return result;
 }
 
+static int is_blacklisted(const u8 ifnum,
+				const struct sierra_iface_info *blacklist)
+{
+	const u8  *info;
+	int i;
+
+	if (blacklist) {
+		info = blacklist->ifaceinfo;
+
+		for (i = 0; i < blacklist->infolen; i++) {
+			if (info[i] == ifnum)
+				return 1;
+		}
+	}
+	return 0;
+}
+
 static int sierra_calc_interface(struct usb_serial *serial)
 {
 	int interface;
@@ -151,9 +174,25 @@ static int sierra_probe(struct usb_serial *serial,
 	 */
 	usb_set_serial_data(serial, (void *)num_ports);
 
+	/* ifnum could have changed - by calling usb_set_interface */
+	ifnum = sierra_calc_interface(serial);
+
+	if (is_blacklisted(ifnum,
+				(struct sierra_iface_info *)id->driver_info)) {
+		dev_dbg(&serial->dev->dev,
+			"Ignoring blacklisted interface #%d\n", ifnum);
+		return -ENODEV;
+	}
+
 	return result;
 }
 
+static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 };
+static const struct sierra_iface_info direct_ip_interface_blacklist = {
+	.infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces),
+	.ifaceinfo = direct_ip_non_serial_ifaces,
+};
+
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x1199, 0x0017) },	/* Sierra Wireless EM5625 */
 	{ USB_DEVICE(0x1199, 0x0018) },	/* Sierra Wireless MC5720 */
@@ -186,9 +225,11 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x1199, 0x6833) },	/* Sierra Wireless MC8781 */
 	{ USB_DEVICE(0x1199, 0x683A) },	/* Sierra Wireless MC8785 */
 	{ USB_DEVICE(0x1199, 0x683B) },	/* Sierra Wireless MC8785 Composite */
-	{ USB_DEVICE(0x1199, 0x683C) },	/* Sierra Wireless MC8790 */
-	{ USB_DEVICE(0x1199, 0x683D) },	/* Sierra Wireless MC8790 */
-	{ USB_DEVICE(0x1199, 0x683E) },	/* Sierra Wireless MC8790 */
+	/* Sierra Wireless MC8790, MC8791, MC8792 Composite */
+	{ USB_DEVICE(0x1199, 0x683C) },
+	{ USB_DEVICE(0x1199, 0x683D) },	/* Sierra Wireless MC8791 Composite */
+	/* Sierra Wireless MC8790, MC8791, MC8792 */
+	{ USB_DEVICE(0x1199, 0x683E) },
 	{ USB_DEVICE(0x1199, 0x6850) },	/* Sierra Wireless AirCard 880 */
 	{ USB_DEVICE(0x1199, 0x6851) },	/* Sierra Wireless AirCard 881 */
 	{ USB_DEVICE(0x1199, 0x6852) },	/* Sierra Wireless AirCard 880 E */
@@ -209,6 +250,10 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
 	{ USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */
 
+	{ USB_DEVICE(0x1199, 0x68A3), 	/* Sierra Wireless Direct IP modems */
+	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
+	},
+
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, id_table);
-- 
cgit v0.10.2


From 5fc5b42a3bb564f0b6e03f0f1b522ed9100250ad Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 14:32:42 +0100
Subject: tty: remove sleep_on

Use wait_event instead of sleep_on in tty_block_til_ready.

Wait for ASYNC_CLOSING flag being 0.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 4d08b6d..931af10 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -198,7 +198,8 @@ int tty_port_block_til_ready(struct tty_port *port,
 
 	/* block if port is in the process of being closed */
 	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->close_wait);
+		wait_event_interruptible(port->close_wait,
+				!(port->flags & ASYNC_CLOSING));
 		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
-- 
cgit v0.10.2


From 3e3b5c087799e536871c8261b05bc28e4783c8da Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 14:33:37 +0100
Subject: tty: use prepare/finish_wait

Use prepare_to_wait and finish_wait instead of add_wait_queue and
remove_wait_queue.

This avoids us setting a task state.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 931af10..62dadfc 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -222,7 +222,6 @@ int tty_port_block_til_ready(struct tty_port *port,
 	   before the next open may complete */
 
 	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
 
 	/* The port lock protects the port counts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -236,7 +235,7 @@ int tty_port_block_til_ready(struct tty_port *port,
 		if (tty->termios->c_cflag & CBAUD)
 			tty_port_raise_dtr_rts(port);
 
-		set_current_state(TASK_INTERRUPTIBLE);
+		prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
 		/* Check for a hangup or uninitialised port. Return accordingly */
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
 			if (port->flags & ASYNC_HUP_NOTIFY)
@@ -257,8 +256,7 @@ int tty_port_block_til_ready(struct tty_port *port,
 		}
 		schedule();
 	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
+	finish_wait(&port->open_wait, &wait);
 
 	/* Update counts. A parallel hangup will have set count to zero and
 	   we must not mess that up further */
-- 
cgit v0.10.2


From d3810cd4d7064b109574374f73c41b11b481dbf2 Mon Sep 17 00:00:00 2001
From: Oskar Schirmer <os@emlix.com>
Date: Thu, 11 Jun 2009 14:35:01 +0100
Subject: imx: serial: fix whitespaces (no changes in functionality)

Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 5f0be40..76c8fa1 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -302,8 +302,7 @@ static inline void imx_transmit_buffer(struct imx_port *sport)
 		/* send xmit->buf[xmit->tail]
 		 * out the port here */
 		writel(xmit->buf[xmit->tail], sport->port.membase + URTX0);
-		xmit->tail = (xmit->tail + 1) &
-		         (UART_XMIT_SIZE - 1);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
 		sport->port.icount.tx++;
 		if (uart_circ_empty(xmit))
 			break;
@@ -395,8 +394,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 				continue;
 		}
 
-		if (uart_handle_sysrq_char
-		            (&sport->port, (unsigned char)rx))
+		if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx))
 			continue;
 
 		if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) {
@@ -471,26 +469,26 @@ static unsigned int imx_tx_empty(struct uart_port *port)
  */
 static unsigned int imx_get_mctrl(struct uart_port *port)
 {
-        struct imx_port *sport = (struct imx_port *)port;
-        unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
+	struct imx_port *sport = (struct imx_port *)port;
+	unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
 
-        if (readl(sport->port.membase + USR1) & USR1_RTSS)
-                tmp |= TIOCM_CTS;
+	if (readl(sport->port.membase + USR1) & USR1_RTSS)
+		tmp |= TIOCM_CTS;
 
-        if (readl(sport->port.membase + UCR2) & UCR2_CTS)
-                tmp |= TIOCM_RTS;
+	if (readl(sport->port.membase + UCR2) & UCR2_CTS)
+		tmp |= TIOCM_RTS;
 
-        return tmp;
+	return tmp;
 }
 
 static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-        struct imx_port *sport = (struct imx_port *)port;
+	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
 	temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS;
 
-        if (mctrl & TIOCM_RTS)
+	if (mctrl & TIOCM_RTS)
 		temp |= UCR2_CTS;
 
 	writel(temp, sport->port.membase + UCR2);
@@ -1072,22 +1070,22 @@ static struct uart_driver imx_reg = {
 
 static int serial_imx_suspend(struct platform_device *dev, pm_message_t state)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+	struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_suspend_port(&imx_reg, &sport->port);
+	if (sport)
+		uart_suspend_port(&imx_reg, &sport->port);
 
-        return 0;
+	return 0;
 }
 
 static int serial_imx_resume(struct platform_device *dev)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+	struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_resume_port(&imx_reg, &sport->port);
+	if (sport)
+		uart_resume_port(&imx_reg, &sport->port);
 
-        return 0;
+	return 0;
 }
 
 static int serial_imx_probe(struct platform_device *pdev)
@@ -1143,7 +1141,7 @@ static int serial_imx_probe(struct platform_device *pdev)
 	imx_ports[pdev->id] = sport;
 
 	pdata = pdev->dev.platform_data;
-	if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
+	if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
 		sport->have_rtscts = 1;
 
 	if (pdata->init) {
@@ -1193,13 +1191,13 @@ static int serial_imx_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver serial_imx_driver = {
-        .probe          = serial_imx_probe,
-        .remove         = serial_imx_remove,
+	.probe		= serial_imx_probe,
+	.remove		= serial_imx_remove,
 
 	.suspend	= serial_imx_suspend,
 	.resume		= serial_imx_resume,
 	.driver		= {
-	        .name	= "imx-uart",
+		.name	= "imx-uart",
 		.owner	= THIS_MODULE,
 	},
 };
-- 
cgit v0.10.2


From 26bbb3ff1ff6163d6a233055766e26af8054a212 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Thu, 11 Jun 2009 14:36:29 +0100
Subject: imx: serial: fix one bit field type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"have_rtscts" is assigned 1, while it is declared
int:1, two's complement, which can hold 0 and -1
only. The code works, as the upper bits are cut
off, and tests are done against 0 only.

Nonetheless, correctly declaring the bit field
as unsigned int:1 renders the code more robust.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 76c8fa1..6b8f12f4 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -211,7 +211,7 @@ struct imx_port {
 	struct timer_list	timer;
 	unsigned int		old_status;
 	int			txirq,rxirq,rtsirq;
-	int			have_rtscts:1;
+	unsigned int		have_rtscts:1;
 	struct clk		*clk;
 };
 
-- 
cgit v0.10.2


From 977757311e50dc5d832c9fef34e7555411f7ccd8 Mon Sep 17 00:00:00 2001
From: Fabian Godehardt <fg@emlix.com>
Date: Thu, 11 Jun 2009 14:37:19 +0100
Subject: imx: serial: notify higher layers in case xmit IRQ was not called

upper layers, namely line discipline, need to be notified
when transmission of more data is possible. For spurious
cases, where IRQ handling does not supply notification
for sure, it is given additionally here, when data has just
been transmitted and space in the buffer will most probably
be available.

Signed-off-by: Fabian Godehardt <fg@emlix.com>
Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 6b8f12f4..49f2e12 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -308,6 +308,9 @@ static inline void imx_transmit_buffer(struct imx_port *sport)
 			break;
 	}
 
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&sport->port);
+
 	if (uart_circ_empty(xmit))
 		imx_stop_tx(&sport->port);
 }
-- 
cgit v0.10.2


From 2e1463922a35584c863f71d4021e1e71f76eaed0 Mon Sep 17 00:00:00 2001
From: Fabian Godehardt <fg@emlix.com>
Date: Thu, 11 Jun 2009 14:38:38 +0100
Subject: imx: serial: be sure to stop xmit upon shutdown

needed to avoid continued transmission by hardware
while software already shuts down, which might
cause dangling characters to show up in hardware
queues when restarting the device.

Signed-off-by: Fabian Godehardt <fg@emlix.com>
Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 49f2e12..e6c2ba2 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -634,6 +634,10 @@ static void imx_shutdown(struct uart_port *port)
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	temp = readl(sport->port.membase + UCR2);
+	temp &= ~(UCR2_TXEN);
+	writel(temp, sport->port.membase + UCR2);
+
 	/*
 	 * Stop our timer.
 	 */
-- 
cgit v0.10.2


From 9f322ad064f9210e7d472dfe77e702274d5c9dba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com>
Date: Thu, 11 Jun 2009 14:39:21 +0100
Subject: imx: serial: handle initialisation failure correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

correctly de-initialise device when setting up failed,
call to pdata->exit() was missing.

Signed-off-by: Daniel Glöckner <dg@emlix.com>
Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index e6c2ba2..cbd4f32 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -1157,10 +1157,15 @@ static int serial_imx_probe(struct platform_device *pdev)
 			goto clkput;
 	}
 
-	uart_add_one_port(&imx_reg, &sport->port);
+	ret = uart_add_one_port(&imx_reg, &sport->port);
+	if (ret)
+		goto deinit;
 	platform_set_drvdata(pdev, &sport->port);
 
 	return 0;
+deinit:
+	if (pdata->exit)
+		pdata->exit(pdev);
 clkput:
 	clk_put(sport->clk);
 	clk_disable(sport->clk);
-- 
cgit v0.10.2


From 8759ef32d992fc6c0bcbe40fca7aa302190918a5 Mon Sep 17 00:00:00 2001
From: Oskar Schirmer <os@emlix.com>
Date: Thu, 11 Jun 2009 14:51:15 +0100
Subject: lib: isolate rational fractions helper function

Provide a helper function to determine optimum numerator
denominator value pairs taking into account restricted
register size. Useful especially with PLL and other clock
configurations.

Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/rational.h b/include/linux/rational.h
new file mode 100644
index 0000000..4f532fc
--- /dev/null
+++ b/include/linux/rational.h
@@ -0,0 +1,19 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers,
+ * e.g. when calculating optimum numerator/denominator pairs for
+ * pll configuration taking into account restricted register size
+ */
+
+#ifndef _LINUX_RATIONAL_H
+#define _LINUX_RATIONAL_H
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator);
+
+#endif /* _LINUX_RATIONAL_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 8ade0a7..9960be0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -10,6 +10,9 @@ menu "Library routines"
 config BITREVERSE
 	tristate
 
+config RATIONAL
+	boolean
+
 config GENERIC_FIND_FIRST_BIT
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e4..1f6edef 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
 endif
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644
index 0000000..b3c099b
--- /dev/null
+++ b/lib/rational.c
@@ -0,0 +1,62 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ *		(1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator)
+{
+	unsigned long n, d, n0, d0, n1, d1;
+	n = given_numerator;
+	d = given_denominator;
+	n0 = d1 = 0;
+	n1 = d0 = 1;
+	for (;;) {
+		unsigned long t, a;
+		if ((n1 > max_numerator) || (d1 > max_denominator)) {
+			n1 = n0;
+			d1 = d0;
+			break;
+		}
+		if (d == 0)
+			break;
+		t = d;
+		a = n / d;
+		d = n % d;
+		n = t;
+		t = n0 + a * n1;
+		n0 = n1;
+		n1 = t;
+		t = d0 + a * d1;
+		d0 = d1;
+		d1 = t;
+	}
+	*best_numerator = n1;
+	*best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
-- 
cgit v0.10.2


From 534fca068ec8063ec8b67626b3eb34ba6ec86967 Mon Sep 17 00:00:00 2001
From: Oskar Schirmer <os@emlix.com>
Date: Thu, 11 Jun 2009 14:52:23 +0100
Subject: imx: serial: use rational library function

for calculation of numerator and denominator
used in baud rate setting, use generic library function
for optimum settings.

Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 3391ef0..641e800 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -833,6 +833,7 @@ config SERIAL_IMX
 	bool "IMX serial port support"
 	depends on ARM && (ARCH_IMX || ARCH_MXC)
 	select SERIAL_CORE
+	select RATIONAL
 	help
 	  If you have a machine based on a Motorola IMX CPU you
 	  can enable its onboard serial port by enabling this option.
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index cbd4f32..0de81f7 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -41,6 +41,7 @@
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/clk.h>
+#include <linux/rational.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -670,7 +671,8 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	unsigned long flags;
 	unsigned int ucr2, old_ucr1, old_txrxen, baud, quot;
 	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
-	unsigned int div, num, denom, ufcr;
+	unsigned int div, ufcr;
+	unsigned long num, denom;
 
 	/*
 	 * If we don't support modem control lines, don't allow
@@ -772,32 +774,20 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	if (!div)
 		div = 1;
 
-	num = baud;
-	denom = port->uartclk / div / 16;
+	rational_best_approximation(16 * div * baud, sport->port.uartclk,
+		1 << 16, 1 << 16, &num, &denom);
 
-	/* shift num and denom right until they fit into 16 bits */
-	while (num > 0x10000 || denom > 0x10000) {
-		num >>= 1;
-		denom >>= 1;
-	}
-	if (num > 0)
-		num -= 1;
-	if (denom > 0)
-		denom -= 1;
-
-	writel(num, sport->port.membase + UBIR);
-	writel(denom, sport->port.membase + UBMR);
-
-	if (div == 7)
-		div = 6; /* 6 in RFDIV means divide by 7 */
-	else
-		div = 6 - div;
+	num -= 1;
+	denom -= 1;
 
 	ufcr = readl(sport->port.membase + UFCR);
 	ufcr = (ufcr & (~UFCR_RFDIV)) |
 	    (div << 7);
 	writel(ufcr, sport->port.membase + UFCR);
 
+	writel(num, sport->port.membase + UBIR);
+	writel(denom, sport->port.membase + UBMR);
+
 #ifdef ONEMS
 	writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS);
 #endif
-- 
cgit v0.10.2


From b6e4913834cd032082e5c76dfade61050212dc98 Mon Sep 17 00:00:00 2001
From: Fabian Godehardt <fg@emlix.com>
Date: Thu, 11 Jun 2009 14:53:18 +0100
Subject: imx: serial: add IrDA support to serial driver

Using the iMX serial driver with an IrDA device
needs extra peripheral settings and specific
timing depending on the transmitter circuitry used.

Signed-off-by: Fabian Godehardt <fg@emlix.com>
Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h
index 599217b..f9bd17d 100644
--- a/arch/arm/plat-mxc/include/mach/imx-uart.h
+++ b/arch/arm/plat-mxc/include/mach/imx-uart.h
@@ -20,11 +20,16 @@
 #define ASMARM_ARCH_UART_H
 
 #define IMXUART_HAVE_RTSCTS (1<<0)
+#define IMXUART_IRDA        (1<<1)
 
 struct imxuart_platform_data {
 	int (*init)(struct platform_device *pdev);
 	int (*exit)(struct platform_device *pdev);
 	unsigned int flags;
+	void (*irda_enable)(int enable);
+	unsigned int irda_inv_rx:1;
+	unsigned int irda_inv_tx:1;
+	unsigned short transceiver_delay;
 };
 
 #endif
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 0de81f7..8c79e8c 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -8,6 +8,9 @@
  *  Author: Sascha Hauer <sascha@saschahauer.de>
  *  Copyright (C) 2004 Pengutronix
  *
+ *  Copyright (C) 2009 emlix GmbH
+ *  Author: Fabian Godehardt (added IrDA support for iMX)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -41,6 +44,7 @@
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/rational.h>
 
 #include <asm/io.h>
@@ -149,6 +153,7 @@
 #define  UCR4_DREN  	 (1<<0)  /* Recv data ready interrupt enable */
 #define  UFCR_RXTL_SHF   0       /* Receiver trigger level shift */
 #define  UFCR_RFDIV      (7<<7)  /* Reference freq divider mask */
+#define  UFCR_RFDIV_REG(x)	(((x) < 7 ? 6 - (x) : 6) << 7)
 #define  UFCR_TXTL_SHF   10      /* Transmitter trigger level shift */
 #define  USR1_PARITYERR  (1<<15) /* Parity error interrupt flag */
 #define  USR1_RTSS  	 (1<<14) /* RTS pin status */
@@ -213,9 +218,19 @@ struct imx_port {
 	unsigned int		old_status;
 	int			txirq,rxirq,rtsirq;
 	unsigned int		have_rtscts:1;
+	unsigned int		use_irda:1;
+	unsigned int		irda_inv_rx:1;
+	unsigned int		irda_inv_tx:1;
+	unsigned short		trcv_delay; /* transceiver delay */
 	struct clk		*clk;
 };
 
+#ifdef CONFIG_IRDA
+#define USE_IRDA(sport)	((sport)->use_irda)
+#else
+#define USE_IRDA(sport)	(0)
+#endif
+
 /*
  * Handle any change of modem status signal since we were last called.
  */
@@ -269,6 +284,48 @@ static void imx_stop_tx(struct uart_port *port)
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	if (USE_IRDA(sport)) {
+		/* half duplex - wait for end of transmission */
+		int n = 256;
+		while ((--n > 0) &&
+		      !(readl(sport->port.membase + USR2) & USR2_TXDC)) {
+			udelay(5);
+			barrier();
+		}
+		/*
+		 * irda transceiver - wait a bit more to avoid
+		 * cutoff, hardware dependent
+		 */
+		udelay(sport->trcv_delay);
+
+		/*
+		 * half duplex - reactivate receive mode,
+		 * flush receive pipe echo crap
+		 */
+		if (readl(sport->port.membase + USR2) & USR2_TXDC) {
+			temp = readl(sport->port.membase + UCR1);
+			temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN);
+			writel(temp, sport->port.membase + UCR1);
+
+			temp = readl(sport->port.membase + UCR4);
+			temp &= ~(UCR4_TCEN);
+			writel(temp, sport->port.membase + UCR4);
+
+			while (readl(sport->port.membase + URXD0) &
+			       URXD_CHARRDY)
+				barrier();
+
+			temp = readl(sport->port.membase + UCR1);
+			temp |= UCR1_RRDYEN;
+			writel(temp, sport->port.membase + UCR1);
+
+			temp = readl(sport->port.membase + UCR4);
+			temp |= UCR4_DREN;
+			writel(temp, sport->port.membase + UCR4);
+		}
+		return;
+	}
+
 	temp = readl(sport->port.membase + UCR1);
 	writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1);
 }
@@ -324,9 +381,30 @@ static void imx_start_tx(struct uart_port *port)
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long temp;
 
+	if (USE_IRDA(sport)) {
+		/* half duplex in IrDA mode; have to disable receive mode */
+		temp = readl(sport->port.membase + UCR4);
+		temp &= ~(UCR4_DREN);
+		writel(temp, sport->port.membase + UCR4);
+
+		temp = readl(sport->port.membase + UCR1);
+		temp &= ~(UCR1_RRDYEN);
+		writel(temp, sport->port.membase + UCR1);
+	}
+
 	temp = readl(sport->port.membase + UCR1);
 	writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1);
 
+	if (USE_IRDA(sport)) {
+		temp = readl(sport->port.membase + UCR1);
+		temp |= UCR1_TRDYEN;
+		writel(temp, sport->port.membase + UCR1);
+
+		temp = readl(sport->port.membase + UCR4);
+		temp |= UCR4_TCEN;
+		writel(temp, sport->port.membase + UCR4);
+	}
+
 	if (readl(sport->port.membase + UTS) & UTS_TXEMPTY)
 		imx_transmit_buffer(sport);
 }
@@ -536,12 +614,7 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode)
 	if(!ufcr_rfdiv)
 		ufcr_rfdiv = 1;
 
-	if(ufcr_rfdiv >= 7)
-		ufcr_rfdiv = 6;
-	else
-		ufcr_rfdiv = 6 - ufcr_rfdiv;
-
-	val |= UFCR_RFDIV & (ufcr_rfdiv << 7);
+	val |= UFCR_RFDIV_REG(ufcr_rfdiv);
 
 	writel(val, sport->port.membase + UFCR);
 
@@ -560,8 +633,24 @@ static int imx_startup(struct uart_port *port)
 	 * requesting IRQs
 	 */
 	temp = readl(sport->port.membase + UCR4);
+
+	if (USE_IRDA(sport))
+		temp |= UCR4_IRSC;
+
 	writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
 
+	if (USE_IRDA(sport)) {
+		/* reset fifo's and state machines */
+		int i = 100;
+		temp = readl(sport->port.membase + UCR2);
+		temp &= ~UCR2_SRST;
+		writel(temp, sport->port.membase + UCR2);
+		while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) &&
+		    (--i > 0)) {
+			udelay(1);
+		}
+	}
+
 	/*
 	 * Allocate the IRQ(s) i.MX1 has three interrupts whereas later
 	 * chips only have one interrupt.
@@ -577,12 +666,16 @@ static int imx_startup(struct uart_port *port)
 		if (retval)
 			goto error_out2;
 
-		retval = request_irq(sport->rtsirq, imx_rtsint,
-			     (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
-			       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				DRIVER_NAME, sport);
-		if (retval)
-			goto error_out3;
+		/* do not use RTS IRQ on IrDA */
+		if (!USE_IRDA(sport)) {
+			retval = request_irq(sport->rtsirq, imx_rtsint,
+				     (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
+				       IRQF_TRIGGER_FALLING |
+				       IRQF_TRIGGER_RISING,
+					DRIVER_NAME, sport);
+			if (retval)
+				goto error_out3;
+		}
 	} else {
 		retval = request_irq(sport->port.irq, imx_int, 0,
 				DRIVER_NAME, sport);
@@ -599,18 +692,49 @@ static int imx_startup(struct uart_port *port)
 
 	temp = readl(sport->port.membase + UCR1);
 	temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN;
+
+	if (USE_IRDA(sport)) {
+		temp |= UCR1_IREN;
+		temp &= ~(UCR1_RTSDEN);
+	}
+
 	writel(temp, sport->port.membase + UCR1);
 
 	temp = readl(sport->port.membase + UCR2);
 	temp |= (UCR2_RXEN | UCR2_TXEN);
 	writel(temp, sport->port.membase + UCR2);
 
+	if (USE_IRDA(sport)) {
+		/* clear RX-FIFO */
+		int i = 64;
+		while ((--i > 0) &&
+			(readl(sport->port.membase + URXD0) & URXD_CHARRDY)) {
+			barrier();
+		}
+	}
+
 #if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3
 	temp = readl(sport->port.membase + UCR3);
 	temp |= UCR3_RXDMUXSEL;
 	writel(temp, sport->port.membase + UCR3);
 #endif
 
+	if (USE_IRDA(sport)) {
+		temp = readl(sport->port.membase + UCR4);
+		if (sport->irda_inv_rx)
+			temp |= UCR4_INVR;
+		else
+			temp &= ~(UCR4_INVR);
+		writel(temp | UCR4_DREN, sport->port.membase + UCR4);
+
+		temp = readl(sport->port.membase + UCR3);
+		if (sport->irda_inv_tx)
+			temp |= UCR3_INVT;
+		else
+			temp &= ~(UCR3_INVT);
+		writel(temp, sport->port.membase + UCR3);
+	}
+
 	/*
 	 * Enable modem status interrupts
 	 */
@@ -618,6 +742,16 @@ static int imx_startup(struct uart_port *port)
 	imx_enable_ms(&sport->port);
 	spin_unlock_irqrestore(&sport->port.lock,flags);
 
+	if (USE_IRDA(sport)) {
+		struct imxuart_platform_data *pdata;
+		pdata = sport->port.dev->platform_data;
+		sport->irda_inv_rx = pdata->irda_inv_rx;
+		sport->irda_inv_tx = pdata->irda_inv_tx;
+		sport->trcv_delay = pdata->transceiver_delay;
+		if (pdata->irda_enable)
+			pdata->irda_enable(1);
+	}
+
 	return 0;
 
 error_out3:
@@ -639,6 +773,13 @@ static void imx_shutdown(struct uart_port *port)
 	temp &= ~(UCR2_TXEN);
 	writel(temp, sport->port.membase + UCR2);
 
+	if (USE_IRDA(sport)) {
+		struct imxuart_platform_data *pdata;
+		pdata = sport->port.dev->platform_data;
+		if (pdata->irda_enable)
+			pdata->irda_enable(0);
+	}
+
 	/*
 	 * Stop our timer.
 	 */
@@ -648,7 +789,8 @@ static void imx_shutdown(struct uart_port *port)
 	 * Free the interrupts
 	 */
 	if (sport->txirq > 0) {
-		free_irq(sport->rtsirq, sport);
+		if (!USE_IRDA(sport))
+			free_irq(sport->rtsirq, sport);
 		free_irq(sport->txirq, sport);
 		free_irq(sport->rxirq, sport);
 	} else
@@ -660,6 +802,9 @@ static void imx_shutdown(struct uart_port *port)
 
 	temp = readl(sport->port.membase + UCR1);
 	temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
+	if (USE_IRDA(sport))
+		temp &= ~(UCR1_IREN);
+
 	writel(temp, sport->port.membase + UCR1);
 }
 
@@ -768,11 +913,19 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 			sport->port.membase + UCR2);
 	old_txrxen &= (UCR2_TXEN | UCR2_RXEN);
 
-	div = sport->port.uartclk / (baud * 16);
-	if (div > 7)
-		div = 7;
-	if (!div)
+	if (USE_IRDA(sport)) {
+		/*
+		 * use maximum available submodule frequency to
+		 * avoid missing short pulses due to low sampling rate
+		 */
 		div = 1;
+	} else {
+		div = sport->port.uartclk / (baud * 16);
+		if (div > 7)
+			div = 7;
+		if (!div)
+			div = 1;
+	}
 
 	rational_best_approximation(16 * div * baud, sport->port.uartclk,
 		1 << 16, 1 << 16, &num, &denom);
@@ -781,8 +934,7 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	denom -= 1;
 
 	ufcr = readl(sport->port.membase + UFCR);
-	ufcr = (ufcr & (~UFCR_RFDIV)) |
-	    (div << 7);
+	ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div);
 	writel(ufcr, sport->port.membase + UFCR);
 
 	writel(num, sport->port.membase + UBIR);
@@ -1141,6 +1293,11 @@ static int serial_imx_probe(struct platform_device *pdev)
 	if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
 		sport->have_rtscts = 1;
 
+#ifdef CONFIG_IRDA
+	if (pdata && (pdata->flags & IMXUART_IRDA))
+		sport->use_irda = 1;
+#endif
+
 	if (pdata->init) {
 		ret = pdata->init(pdev);
 		if (ret)
-- 
cgit v0.10.2


From d7f8d437bda0ec409e26cffb846bc28a40603ee3 Mon Sep 17 00:00:00 2001
From: Oskar Schirmer <os@emlix.com>
Date: Thu, 11 Jun 2009 14:55:22 +0100
Subject: imx: serial: use tty_encode_baud_rate to set true rate

real baud rate may be different from the one requested.
for upper layers, set the nearest value to the real rate
in favour of the rate previously requested.

Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 8c79e8c..7b5d1de 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -818,6 +818,7 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
 	unsigned int div, ufcr;
 	unsigned long num, denom;
+	uint64_t tdiv64;
 
 	/*
 	 * If we don't support modem control lines, don't allow
@@ -930,6 +931,12 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	rational_best_approximation(16 * div * baud, sport->port.uartclk,
 		1 << 16, 1 << 16, &num, &denom);
 
+	tdiv64 = sport->port.uartclk;
+	tdiv64 *= num;
+	do_div(tdiv64, denom * 16 * div);
+	tty_encode_baud_rate(sport->port.info->port.tty,
+		(speed_t)tdiv64, (speed_t)tdiv64);
+
 	num -= 1;
 	denom -= 1;
 
-- 
cgit v0.10.2


From f0e8527726b9e56649b9eafde3bc0fbc4dd2dd47 Mon Sep 17 00:00:00 2001
From: Dirk Eibach <eibach@gdsys.de>
Date: Thu, 11 Jun 2009 14:56:44 +0100
Subject: moxa: prevent opening unavailable ports

In moxa.c there are 32 minor numbers reserved for each device. The
number of ports actually available per device is stored in
moxa_board_conf->numPorts. This number is not considered in moxa_open().
Opening a port that is not available results in a kernel oops.
This patch adds a test to moxa_open() that prevents opening unavailable
ports.

Signed-off-by: Dirk Eibach <eibach@gdsys.de>
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 4a4cab7..65b6ff2 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -1184,6 +1184,11 @@ static int moxa_open(struct tty_struct *tty, struct file *filp)
 		return -ENODEV;
 	}
 
+	if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) {
+		mutex_unlock(&moxa_openlock);
+		return -ENODEV;
+	}
+
 	ch = &brd->ports[port % MAX_PORTS_PER_BOARD];
 	ch->port.count++;
 	tty->driver_data = ch;
-- 
cgit v0.10.2


From 1c432d899d32d36371ee4ee310fa3609cf0e5742 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 13:19:29 +0200
Subject: perf_counter: Rename enums

Rename the perf enums to be in the 'perf_' namespace and strictly
enumerate the ABI bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 95c797c..d5911b0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,24 +24,21 @@
 /*
  * attr.type
  */
-enum perf_event_types {
+enum perf_type_id {
 	PERF_TYPE_HARDWARE		= 0,
 	PERF_TYPE_SOFTWARE		= 1,
 	PERF_TYPE_TRACEPOINT		= 2,
 	PERF_TYPE_HW_CACHE		= 3,
+	PERF_TYPE_RAW			= 4,
 
-	/*
-	 * available TYPE space, raw is the max value.
-	 */
-
-	PERF_TYPE_RAW			= 128,
+	PERF_TYPE_MAX,			/* non ABI */
 };
 
 /*
  * Generalized performance counter event types, used by the attr.event_id
  * parameter of the sys_perf_counter_open() syscall:
  */
-enum attr_ids {
+enum perf_hw_id {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
@@ -53,7 +50,7 @@ enum attr_ids {
 	PERF_COUNT_BRANCH_MISSES	= 5,
 	PERF_COUNT_BUS_CYCLES		= 6,
 
-	PERF_HW_EVENTS_MAX		= 7,
+	PERF_HW_EVENTS_MAX,		/* non ABI */
 };
 
 /*
@@ -63,30 +60,30 @@ enum attr_ids {
  *       { read, write, prefetch } x
  *       { accesses, misses }
  */
-enum hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D,
-	PERF_COUNT_HW_CACHE_L1I,
-	PERF_COUNT_HW_CACHE_L2,
-	PERF_COUNT_HW_CACHE_DTLB,
-	PERF_COUNT_HW_CACHE_ITLB,
-	PERF_COUNT_HW_CACHE_BPU,
-
-	PERF_COUNT_HW_CACHE_MAX,
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D		= 0,
+	PERF_COUNT_HW_CACHE_L1I		= 1,
+	PERF_COUNT_HW_CACHE_L2		= 2,
+	PERF_COUNT_HW_CACHE_DTLB	= 3,
+	PERF_COUNT_HW_CACHE_ITLB	= 4,
+	PERF_COUNT_HW_CACHE_BPU		= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,	/* non ABI */
 };
 
-enum hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ,
-	PERF_COUNT_HW_CACHE_OP_WRITE,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH,
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ	= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE	= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH	= 2,
 
-	PERF_COUNT_HW_CACHE_OP_MAX,
+	PERF_COUNT_HW_CACHE_OP_MAX,	/* non ABI */
 };
 
-enum hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS,
-	PERF_COUNT_HW_CACHE_RESULT_MISS,
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
 
-	PERF_COUNT_HW_CACHE_RESULT_MAX,
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non ABI */
 };
 
 /*
@@ -95,7 +92,7 @@ enum hw_cache_op_result_id {
  * physical and sw events of the kernel (and allow the profiling of them as
  * well):
  */
-enum sw_event_ids {
+enum perf_sw_ids {
 	PERF_COUNT_CPU_CLOCK		= 0,
 	PERF_COUNT_TASK_CLOCK		= 1,
 	PERF_COUNT_PAGE_FAULTS		= 2,
@@ -104,7 +101,7 @@ enum sw_event_ids {
 	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
 	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
 
-	PERF_SW_EVENTS_MAX		= 7,
+	PERF_SW_EVENTS_MAX,		/* non ABI */
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b2829d..c02535b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3162,7 +3162,7 @@ static int perf_swcounter_is_counting(struct perf_counter *counter)
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-				enum perf_event_types type,
+				enum perf_type_id type,
 				u32 event, struct pt_regs *regs)
 {
 	if (!perf_swcounter_is_counting(counter))
@@ -3194,7 +3194,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum perf_event_types type, u32 event,
+				     enum perf_type_id type, u32 event,
 				     u64 nr, int nmi, struct pt_regs *regs,
 				     u64 addr)
 {
@@ -3225,7 +3225,7 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 	return &cpuctx->recursion[0];
 }
 
-static void __perf_swcounter_event(enum perf_event_types type, u32 event,
+static void __perf_swcounter_event(enum perf_type_id type, u32 event,
 				   u64 nr, int nmi, struct pt_regs *regs,
 				   u64 addr)
 {
-- 
cgit v0.10.2


From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 14:06:28 +0200
Subject: perf_counter: Standardize event names

Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 0e94b68..73956f0 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -535,12 +535,12 @@ static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int p4_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 7,
-	[PERF_COUNT_INSTRUCTIONS] = 0x1001,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x8c10,		/* PM_LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3c10,		/* PM_LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330,	/* PM_BR_ISSUED */
-	[PERF_COUNT_BRANCH_MISSES] = 0x331,		/* PM_BR_MPRED_CR */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x1001,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8c10, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c10, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x330,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x331,  /* PM_BR_MPRED_CR */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index bbf2cbb..5f8b774 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -606,12 +606,12 @@ static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power5p_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0xf,
-	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8,	/* LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
-	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x1c10a8, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 670cf10..d54723a 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -548,12 +548,12 @@ static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power5_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0xf,
-	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x4c1090,	/* LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
-	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4c1090, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 4da7078..0cd406ee 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -466,12 +466,12 @@ static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power6_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0x1e,
-	[PERF_COUNT_INSTRUCTIONS] = 2,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x280030,	/* LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x30000c,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0,	/* BR_PRED */ 
-	[PERF_COUNT_BRANCH_MISSES] = 0x400052,		/* BR_MPRED */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x1e,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x280030, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x30000c, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x410a0,  /* BR_PRED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x400052, /* BR_MPRED */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 336adf1..46a2064 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -419,12 +419,12 @@ static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int ppc970_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 7,
-	[PERF_COUNT_INSTRUCTIONS] = 1,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x8810,		/* PM_LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3810,		/* PM_LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431,	/* PM_BR_ISSUED */
-	[PERF_COUNT_BRANCH_MISSES] = 0x327,		/* PM_GRP_BR_MPRED */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8810, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3810, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x431,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES] 		= 0x327,  /* PM_GRP_BR_MPRED */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ac0e112..5beffc8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -323,7 +323,7 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 57ae1be..572fb43 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -69,13 +69,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
  */
 static const u64 intel_perfmon_event_map[] =
 {
-  [PERF_COUNT_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
-  [PERF_COUNT_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_BUS_CYCLES]		= 0x013c,
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
 static u64 intel_pmu_event_map(int event)
@@ -485,12 +485,12 @@ static const u64 amd_0f_hw_cache_event_ids
  */
 static const u64 amd_perfmon_event_map[] =
 {
-  [PERF_COUNT_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_CACHE_REFERENCES]		= 0x0080,
-  [PERF_COUNT_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
 };
 
 static u64 amd_pmu_event_map(int event)
@@ -970,11 +970,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6f9df2b..5c6d816 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1142,11 +1142,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d5911b0..887df88 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -42,15 +42,15 @@ enum perf_hw_id {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CPU_CYCLES		= 0,
-	PERF_COUNT_INSTRUCTIONS		= 1,
-	PERF_COUNT_CACHE_REFERENCES	= 2,
-	PERF_COUNT_CACHE_MISSES		= 3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_BRANCH_MISSES	= 5,
-	PERF_COUNT_BUS_CYCLES		= 6,
-
-	PERF_HW_EVENTS_MAX,		/* non ABI */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,		/* non ABI */
 };
 
 /*
@@ -93,15 +93,15 @@ enum perf_hw_cache_op_result_id {
  * well):
  */
 enum perf_sw_ids {
-	PERF_COUNT_CPU_CLOCK		= 0,
-	PERF_COUNT_TASK_CLOCK		= 1,
-	PERF_COUNT_PAGE_FAULTS		= 2,
-	PERF_COUNT_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
-
-	PERF_SW_EVENTS_MAX,		/* non ABI */
+	PERF_COUNT_SW_CPU_CLOCK		= 0,
+	PERF_COUNT_SW_TASK_CLOCK	= 1,
+	PERF_COUNT_SW_PAGE_FAULTS	= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+
+	PERF_COUNT_SW_MAX,		/* non ABI */
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c02535b..8859b97 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1024,7 +1024,7 @@ void perf_counter_task_sched_out(struct task_struct *task,
 	int do_switch = 1;
 
 	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
+	perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
 
 	if (likely(!ctx || !cpuctx->task_ctx))
 		return;
@@ -3411,13 +3411,13 @@ void perf_counter_task_migration(struct task_struct *task, int cpu)
 	struct perf_counter_context *ctx;
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
-				 PERF_COUNT_CPU_MIGRATIONS,
+				 PERF_COUNT_SW_CPU_MIGRATIONS,
 				 1, 1, NULL, 0);
 
 	ctx = perf_pin_task_context(task);
 	if (ctx) {
 		perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
-					 PERF_COUNT_CPU_MIGRATIONS,
+					 PERF_COUNT_SW_CPU_MIGRATIONS,
 					 1, 1, NULL, 0);
 		perf_unpin_context(ctx);
 	}
@@ -3475,11 +3475,11 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * events.
 	 */
 	switch (counter->attr.config) {
-	case PERF_COUNT_CPU_CLOCK:
+	case PERF_COUNT_SW_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
 		break;
-	case PERF_COUNT_TASK_CLOCK:
+	case PERF_COUNT_SW_TASK_CLOCK:
 		/*
 		 * If the user instantiates this as a per-cpu counter,
 		 * use the cpu_clock counter instead.
@@ -3490,11 +3490,11 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 			pmu = &perf_ops_cpu_clock;
 
 		break;
-	case PERF_COUNT_PAGE_FAULTS:
-	case PERF_COUNT_PAGE_FAULTS_MIN:
-	case PERF_COUNT_PAGE_FAULTS_MAJ:
-	case PERF_COUNT_CONTEXT_SWITCHES:
-	case PERF_COUNT_CPU_MIGRATIONS:
+	case PERF_COUNT_SW_PAGE_FAULTS:
+	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+	case PERF_COUNT_SW_CONTEXT_SWITCHES:
+	case PERF_COUNT_SW_CPU_MIGRATIONS:
 		pmu = &perf_ops_generic;
 		break;
 	}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 84cd336..29259e7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -378,12 +378,12 @@ try_again:
 		 * is always available even if no PMU support:
 		 */
 		if (attr->type == PERF_TYPE_HARDWARE
-			&& attr->config == PERF_COUNT_CPU_CYCLES) {
+			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 
 			if (verbose)
 				warning(" ... trying to fall back to cpu-clock-ticks\n");
 			attr->type = PERF_TYPE_SOFTWARE;
-			attr->config = PERF_COUNT_CPU_CLOCK;
+			attr->config = PERF_COUNT_SW_CPU_CLOCK;
 			goto try_again;
 		}
 		printf("\n");
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6404906..c43e4a9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,15 +46,16 @@
 
 static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
 
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK		},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS	},
-
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS	},
+
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES	},
+
 };
 
 static int			system_wide			=  0;
@@ -120,10 +121,10 @@ static inline int nsec_counter(int counter)
 	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
 		return 0;
 
-	if (attrs[counter].config == PERF_COUNT_CPU_CLOCK)
+	if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
 		return 1;
 
-	if (attrs[counter].config == PERF_COUNT_TASK_CLOCK)
+	if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
 		return 1;
 
 	return 0;
@@ -176,10 +177,10 @@ static void read_counter(int counter)
 	 * Save the full runtime - to allow normalization during printout:
 	 */
 	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
+		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
 		runtime_nsecs = count[0];
 	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
-		attrs[counter].config == PERF_COUNT_CPU_CYCLES)
+		attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
 		runtime_cycles = count[0];
 }
 
@@ -206,7 +207,7 @@ static void print_counter(int counter)
 		fprintf(stderr, " %14.6f  %-20s",
 			msecs, event_name(counter));
 		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
+			attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
 
 			if (walltime_nsecs)
 				fprintf(stderr, " # %11.3f CPU utilization factor",
@@ -220,7 +221,7 @@ static void print_counter(int counter)
 				(double)count[0]/runtime_nsecs*1000.0);
 		if (runtime_cycles &&
 			attrs[counter].type == PERF_TYPE_HARDWARE &&
-				attrs[counter].config == PERF_COUNT_INSTRUCTIONS) {
+				attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
 
 			fprintf(stderr, " # %1.3f per cycle",
 				(double)count[0] / (double)runtime_cycles);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 309dbc7..fe338d3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -562,13 +562,13 @@ try_again:
 		 * is always available even if no PMU support:
 		 */
 		if (attr->type == PERF_TYPE_HARDWARE
-			&& attr->config == PERF_COUNT_CPU_CYCLES) {
+			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 
 			if (verbose)
 				warning(" ... trying to fall back to cpu-clock-ticks\n");
 
 			attr->type = PERF_TYPE_SOFTWARE;
-			attr->config = PERF_COUNT_CPU_CLOCK;
+			attr->config = PERF_COUNT_SW_CPU_CLOCK;
 			goto try_again;
 		}
 		printf("\n");
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index d325076..860e116 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -99,13 +99,13 @@ enum hw_event_ids {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CPU_CYCLES		= 0,
-	PERF_COUNT_INSTRUCTIONS		= 1,
-	PERF_COUNT_CACHE_REFERENCES	= 2,
-	PERF_COUNT_CACHE_MISSES		= 3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_BRANCH_MISSES	= 5,
-	PERF_COUNT_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES	= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES	= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
 };
 
 These are standardized types of events that work relatively uniformly
@@ -130,13 +130,13 @@ software events, selected by 'event_id':
  * well):
  */
 enum sw_event_ids {
-	PERF_COUNT_CPU_CLOCK		= 0,
-	PERF_COUNT_TASK_CLOCK		= 1,
-	PERF_COUNT_PAGE_FAULTS		= 2,
-	PERF_COUNT_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
+	PERF_COUNT_SW_CPU_CLOCK		= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
 };
 
 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f18a9a0..9d5f1ca 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -22,26 +22,26 @@ struct event_symbol {
 #define CR(x, y) .type = PERF_TYPE_##x, .config = y
 
 static struct event_symbol event_symbols[] = {
-  { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
-  { C(HARDWARE, CPU_CYCLES),		"cycles",		},
-  { C(HARDWARE, INSTRUCTIONS),		"instructions",		},
-  { C(HARDWARE, CACHE_REFERENCES),	"cache-references",	},
-  { C(HARDWARE, CACHE_MISSES),		"cache-misses",		},
-  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branches",		},
-  { C(HARDWARE, BRANCH_MISSES),		"branch-misses",	},
-  { C(HARDWARE, BUS_CYCLES),		"bus-cycles",		},
-
-  { C(SOFTWARE, CPU_CLOCK),		"cpu-clock",		},
-  { C(SOFTWARE, TASK_CLOCK),		"task-clock",		},
-  { C(SOFTWARE, PAGE_FAULTS),		"page-faults",		},
-  { C(SOFTWARE, PAGE_FAULTS),		"faults",		},
-  { C(SOFTWARE, PAGE_FAULTS_MIN),	"minor-faults",		},
-  { C(SOFTWARE, PAGE_FAULTS_MAJ),	"major-faults",		},
-  { C(SOFTWARE, CONTEXT_SWITCHES),	"context-switches",	},
-  { C(SOFTWARE, CONTEXT_SWITCHES),	"cs",			},
-  { C(SOFTWARE, CPU_MIGRATIONS),	"cpu-migrations",	},
-  { C(SOFTWARE, CPU_MIGRATIONS),	"migrations",		},
+  { C(HARDWARE, HW_CPU_CYCLES),		"cpu-cycles",		},
+  { C(HARDWARE, HW_CPU_CYCLES),		"cycles",		},
+  { C(HARDWARE, HW_INSTRUCTIONS),	"instructions",		},
+  { C(HARDWARE, HW_CACHE_REFERENCES),	"cache-references",	},
+  { C(HARDWARE, HW_CACHE_MISSES),	"cache-misses",		},
+  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions",	},
+  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches",		},
+  { C(HARDWARE, HW_BRANCH_MISSES),	"branch-misses",	},
+  { C(HARDWARE, HW_BUS_CYCLES),		"bus-cycles",		},
+
+  { C(SOFTWARE, SW_CPU_CLOCK),		"cpu-clock",		},
+  { C(SOFTWARE, SW_TASK_CLOCK),		"task-clock",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS),	"page-faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS),	"faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS_MIN),	"minor-faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS_MAJ),	"major-faults",		},
+  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"context-switches",	},
+  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"cs",			},
+  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"cpu-migrations",	},
+  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"migrations",		},
 };
 
 #define __PERF_COUNTER_FIELD(config, name) \
@@ -107,7 +107,7 @@ char *event_name(int counter)
 
 	switch (type) {
 	case PERF_TYPE_HARDWARE:
-		if (config < PERF_HW_EVENTS_MAX)
+		if (config < PERF_COUNT_HW_MAX)
 			return hw_event_names[config];
 		return "unknown-hardware";
 
@@ -136,7 +136,7 @@ char *event_name(int counter)
 	}
 
 	case PERF_TYPE_SOFTWARE:
-		if (config < PERF_SW_EVENTS_MAX)
+		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
 		return "unknown-software";
 
-- 
cgit v0.10.2


From 8be6e8f3c3a13900169f1141870562d0c723b010 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 14:19:11 +0200
Subject: perf_counter: Rename L2 to LL cache

The top (fastest) and last level (biggest) caches are the most
interesting ones, performance wise.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
[ Fixed the Nehalem LL table to LLC Reference/Miss events ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 73956f0..07bd308 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -561,7 +561,7 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	0,		0	},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0	},
 		[C(OP_WRITE)] = {	0,		0	},
 		[C(OP_PREFETCH)] = {	0xc34,		0	},
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 5f8b774..41e5d2d 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -632,7 +632,7 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	0,		0		},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0		},
 		[C(OP_WRITE)] = {	0,		0		},
 		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index d54723a..05600b6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -574,7 +574,7 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	0,		0		},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0x3c309b	},
 		[C(OP_WRITE)] = {	0,		0		},
 		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 0cd406ee..46f74be 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -493,7 +493,7 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	0x4008c,	0		},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x150730,	0x250532	},
 		[C(OP_WRITE)] = {	0x250432,	0x150432	},
 		[C(OP_PREFETCH)] = {	0x810a6,	0		},
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 060e0de..b3f7d12 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -320,7 +320,7 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	0x408a,		0	},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x6080,		0x6084	},
 		[C(OP_WRITE)] = {	0x6082,		0x6086	},
 		[C(OP_PREFETCH)] = {	0,		0	},
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 46a2064..ba0a357 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -445,7 +445,7 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	0,		0	},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0	},
 		[C(OP_WRITE)] = {	0,		0	},
 		[C(OP_PREFETCH)] = {	0x733,		0	},
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 572fb43..895c82e 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -131,7 +131,7 @@ static const u64 nehalem_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0x0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
 		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
@@ -141,8 +141,8 @@ static const u64 nehalem_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES          */
-		[ C(RESULT_MISS)   ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS       */
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
 	},
  },
  [ C(DTLB) ] = {
@@ -222,7 +222,7 @@ static const u64 core2_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
 		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
@@ -313,7 +313,7 @@ static const u64 atom_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
 		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
@@ -422,7 +422,7 @@ static const u64 amd_0f_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0,
 		[ C(RESULT_MISS)   ] = 0,
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 887df88..20cf5af 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -56,14 +56,14 @@ enum perf_hw_id {
 /*
  * Generalized hardware cache counters:
  *
- *       { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
  *       { read, write, prefetch } x
  *       { accesses, misses }
  */
 enum perf_hw_cache_id {
 	PERF_COUNT_HW_CACHE_L1D		= 0,
 	PERF_COUNT_HW_CACHE_L1I		= 1,
-	PERF_COUNT_HW_CACHE_L2		= 2,
+	PERF_COUNT_HW_CACHE_LL		= 2,
 	PERF_COUNT_HW_CACHE_DTLB	= 3,
 	PERF_COUNT_HW_CACHE_ITLB	= 4,
 	PERF_COUNT_HW_CACHE_BPU		= 5,
-- 
cgit v0.10.2


From a308444ceb576d3089f9ca0dfd097eba6f1e623f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Jun 2009 14:44:26 +0200
Subject: perf_counter: Better align code

Whitespace and comment bits. Also update copyrights.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 20cf5af..1fa1a26 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -1,12 +1,13 @@
 /*
  *  Performance counters:
  *
- *   Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
- *   Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
  *
  *  Data type definitions, declarations, prototypes.
  *
- *  Started by: Thomas Gleixner and Ingo Molnar
+ *    Started by: Thomas Gleixner and Ingo Molnar
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -25,18 +26,19 @@
  * attr.type
  */
 enum perf_type_id {
-	PERF_TYPE_HARDWARE		= 0,
-	PERF_TYPE_SOFTWARE		= 1,
-	PERF_TYPE_TRACEPOINT		= 2,
-	PERF_TYPE_HW_CACHE		= 3,
-	PERF_TYPE_RAW			= 4,
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
 
-	PERF_TYPE_MAX,			/* non ABI */
+	PERF_TYPE_MAX,				/* non-ABI */
 };
 
 /*
- * Generalized performance counter event types, used by the attr.event_id
- * parameter of the sys_perf_counter_open() syscall:
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
  */
 enum perf_hw_id {
 	/*
@@ -50,7 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
 
-	PERF_COUNT_HW_MAX,		/* non ABI */
+	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
 
 /*
@@ -61,29 +63,29 @@ enum perf_hw_id {
  *       { accesses, misses }
  */
 enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D		= 0,
-	PERF_COUNT_HW_CACHE_L1I		= 1,
-	PERF_COUNT_HW_CACHE_LL		= 2,
-	PERF_COUNT_HW_CACHE_DTLB	= 3,
-	PERF_COUNT_HW_CACHE_ITLB	= 4,
-	PERF_COUNT_HW_CACHE_BPU		= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,	/* non ABI */
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
 };
 
 enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ	= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE	= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH	= 2,
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
 
-	PERF_COUNT_HW_CACHE_OP_MAX,	/* non ABI */
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
 };
 
 enum perf_hw_cache_op_result_id {
 	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
 	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
 
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non ABI */
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
 };
 
 /*
@@ -93,15 +95,15 @@ enum perf_hw_cache_op_result_id {
  * well):
  */
 enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK		= 0,
-	PERF_COUNT_SW_TASK_CLOCK	= 1,
-	PERF_COUNT_SW_PAGE_FAULTS	= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
-
-	PERF_COUNT_SW_MAX,		/* non ABI */
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
 
 /*
@@ -109,15 +111,15 @@ enum perf_sw_ids {
  * in the overflow packets.
  */
 enum perf_counter_sample_format {
-	PERF_SAMPLE_IP			= 1U << 0,
-	PERF_SAMPLE_TID			= 1U << 1,
-	PERF_SAMPLE_TIME		= 1U << 2,
-	PERF_SAMPLE_ADDR		= 1U << 3,
-	PERF_SAMPLE_GROUP		= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
-	PERF_SAMPLE_ID			= 1U << 6,
-	PERF_SAMPLE_CPU			= 1U << 7,
-	PERF_SAMPLE_PERIOD		= 1U << 8,
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_GROUP			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
 };
 
 /*
@@ -126,9 +128,9 @@ enum perf_counter_sample_format {
  * in increasing order of bit value, after the counter value.
  */
 enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING	=  1U << 1,
-	PERF_FORMAT_ID			=  1U << 2,
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
 };
 
 /*
@@ -229,12 +231,12 @@ struct perf_counter_mmap_page {
 	__u64   data_head;		/* head in the data section */
 };
 
-#define PERF_EVENT_MISC_CPUMODE_MASK	(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN	(0 << 0)
-#define PERF_EVENT_MISC_KERNEL		(1 << 0)
-#define PERF_EVENT_MISC_USER		(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR	(3 << 0)
-#define PERF_EVENT_MISC_OVERFLOW	(1 << 2)
+#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_EVENT_MISC_KERNEL			(1 << 0)
+#define PERF_EVENT_MISC_USER			(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
+#define PERF_EVENT_MISC_OVERFLOW		(1 << 2)
 
 struct perf_event_header {
 	__u32	type;
@@ -351,14 +353,14 @@ struct hw_perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
 	union {
 		struct { /* hardware */
-			u64				config;
-			unsigned long			config_base;
-			unsigned long			counter_base;
-			int				idx;
+			u64		config;
+			unsigned long	config_base;
+			unsigned long	counter_base;
+			int		idx;
 		};
 		union { /* software */
-			atomic64_t			count;
-			struct hrtimer			hrtimer;
+			atomic64_t	count;
+			struct hrtimer	hrtimer;
 		};
 	};
 	atomic64_t			prev_count;
@@ -523,37 +525,37 @@ struct perf_counter_context {
 	 * Protect the states of the counters in the list,
 	 * nr_active, and the list:
 	 */
-	spinlock_t		lock;
+	spinlock_t			lock;
 	/*
 	 * Protect the list of counters.  Locking either mutex or lock
 	 * is sufficient to ensure the list doesn't change; to change
 	 * the list you need to lock both the mutex and the spinlock.
 	 */
-	struct mutex		mutex;
+	struct mutex			mutex;
 
-	struct list_head	counter_list;
-	struct list_head	event_list;
-	int			nr_counters;
-	int			nr_active;
-	int			is_active;
-	atomic_t		refcount;
-	struct task_struct	*task;
+	struct list_head		counter_list;
+	struct list_head		event_list;
+	int				nr_counters;
+	int				nr_active;
+	int				is_active;
+	atomic_t			refcount;
+	struct task_struct		*task;
 
 	/*
 	 * Context clock, runs when context enabled.
 	 */
-	u64			time;
-	u64			timestamp;
+	u64				time;
+	u64				timestamp;
 
 	/*
 	 * These fields let us detect when two contexts have both
 	 * been cloned (inherited) from a common ancestor.
 	 */
-	struct perf_counter_context *parent_ctx;
-	u64			parent_gen;
-	u64			generation;
-	int			pin_count;
-	struct rcu_head		rcu_head;
+	struct perf_counter_context	*parent_ctx;
+	u64				parent_gen;
+	u64				generation;
+	int				pin_count;
+	struct rcu_head			rcu_head;
 };
 
 /**
@@ -604,9 +606,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 struct perf_sample_data {
-	struct pt_regs		*regs;
-	u64			addr;
-	u64			period;
+	struct pt_regs			*regs;
+	u64				addr;
+	u64				period;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
@@ -636,11 +638,14 @@ extern void perf_counter_fork(struct task_struct *tsk);
 
 extern void perf_counter_task_migration(struct task_struct *task, int cpu);
 
-#define MAX_STACK_DEPTH		255
+#define MAX_STACK_DEPTH			255
 
 struct perf_callchain_entry {
-	u16	nr, hv, kernel, user;
-	u64	ip[MAX_STACK_DEPTH];
+	u16				nr;
+	u16				hv;
+	u16				kernel;
+	u16				user;
+	u64				ip[MAX_STACK_DEPTH];
 };
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-- 
cgit v0.10.2


From cca3f454a85ff42d426401bce7ac804541b2bd03 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 14:57:55 +0200
Subject: perf_counter: Add counter->id to the throttle event

So as to be able to distuinguish between multiple counters.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1fa1a26..6e13395 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -286,6 +286,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
+	 *	u64				id;
 	 * };
 	 */
 	PERF_EVENT_THROTTLE		= 5,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8859b97..ef5d8a5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2950,13 +2950,15 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 	struct {
 		struct perf_event_header	header;
 		u64				time;
+		u64				id;
 	} throttle_event = {
 		.header = {
 			.type = PERF_EVENT_THROTTLE + 1,
 			.misc = 0,
 			.size = sizeof(throttle_event),
 		},
-		.time = sched_clock(),
+		.time	= sched_clock(),
+		.id	= counter->id,
 	};
 
 	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
-- 
cgit v0.10.2


From 8dc8e5e8bc0ce00b0f656bf972f67cd8a72759e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Jun 2009 16:13:24 +0200
Subject: perf_counter: Turn off by default

Perfcounters were enabled by default to help testing - but now that we
are submitting it upstream, make it default-disabled.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/init/Kconfig b/init/Kconfig
index 8158f1f..aef16f9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -941,7 +941,6 @@ menu "Performance Counters"
 config PERF_COUNTERS
 	bool "Kernel Performance Counters"
 	depends on HAVE_PERF_COUNTERS
-	default y
 	select ANON_INODES
 	help
 	  Enable kernel support for performance counter hardware.
-- 
cgit v0.10.2


From d2f11bf7fc630e27dfcede367399dddf40521878 Mon Sep 17 00:00:00 2001
From: Justin Chen <jchen@hpdst41.cup.hp.com>
Date: Thu, 11 Jun 2009 13:04:59 +0100
Subject: FRV: bitops: Change the bitmap index from int to unsigned long

Change the index to unsigned long in all bitops for [frv]

Signed-off-by: Justin Chen <justin.chen@hp.com>
Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 287f6f6..50ae91b 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -112,7 +112,7 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
 #define atomic_clear_mask(mask, v)	atomic_test_and_ANDNOT_mask((mask), (v))
 #define atomic_set_mask(mask, v)	atomic_test_and_OR_mask((mask), (v))
 
-static inline int test_and_clear_bit(int nr, volatile void *addr)
+static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -120,7 +120,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
 	return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_set_bit(int nr, volatile void *addr)
+static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -128,7 +128,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
 	return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_change_bit(int nr, volatile void *addr)
+static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *ptr = addr;
 	unsigned long mask = 1UL << (nr & 31);
@@ -136,22 +136,22 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
 	return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline void clear_bit(int nr, volatile void *addr)
+static inline void clear_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_clear_bit(nr, addr);
 }
 
-static inline void set_bit(int nr, volatile void *addr)
+static inline void set_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_set_bit(nr, addr);
 }
 
-static inline void change_bit(int nr, volatile void * addr)
+static inline void change_bit(unsigned long nr, volatile void *addr)
 {
 	test_and_change_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile void * addr)
+static inline void __clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -161,7 +161,7 @@ static inline void __clear_bit(int nr, volatile void * addr)
 	*a &= ~mask;
 }
 
-static inline void __set_bit(int nr, volatile void * addr)
+static inline void __set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -171,7 +171,7 @@ static inline void __set_bit(int nr, volatile void * addr)
 	*a |= mask;
 }
 
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void __change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask;
@@ -181,7 +181,7 @@ static inline void __change_bit(int nr, volatile void *addr)
 	*a ^= mask;
 }
 
-static inline int __test_and_clear_bit(int nr, volatile void * addr)
+static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -193,7 +193,7 @@ static inline int __test_and_clear_bit(int nr, volatile void * addr)
 	return retval;
 }
 
-static inline int __test_and_set_bit(int nr, volatile void * addr)
+static inline int __test_and_set_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -205,7 +205,7 @@ static inline int __test_and_set_bit(int nr, volatile void * addr)
 	return retval;
 }
 
-static inline int __test_and_change_bit(int nr, volatile void * addr)
+static inline int __test_and_change_bit(unsigned long nr, volatile void *addr)
 {
 	volatile unsigned long *a = addr;
 	int mask, retval;
@@ -220,12 +220,13 @@ static inline int __test_and_change_bit(int nr, volatile void * addr)
 /*
  * This routine doesn't need to be atomic.
  */
-static inline int __constant_test_bit(int nr, const volatile void * addr)
+static inline int
+__constant_test_bit(unsigned long nr, const volatile void *addr)
 {
 	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
-static inline int __test_bit(int nr, const volatile void * addr)
+static inline int __test_bit(unsigned long nr, const volatile void *addr)
 {
 	int 	* a = (int *) addr;
 	int	mask;
-- 
cgit v0.10.2


From db5c444eeb781788a0db36a2682b2417cf71f764 Mon Sep 17 00:00:00 2001
From: Stoyan Gaydarov <stoyboyker@gmail.com>
Date: Thu, 11 Jun 2009 13:05:04 +0100
Subject: FRV: BUG to BUG_ON changes

Change some BUG()'s to BUG_ON()'s.

Signed-off-by: Stoyan Gaydarov <stoyboyker@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 585d9b4..cc685e6 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -87,8 +87,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev,
 				       dma_addr_t dma_handle,
 				       size_t size, int direction)
 {
-	if (direction == PCI_DMA_NONE)
-                BUG();
+	BUG_ON(direction == PCI_DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
 			    (unsigned long)bus_to_virt(dma_handle) + size);
@@ -105,9 +104,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
 				   int nelems, int direction)
 {
 	int i;
-
-	if (direction == PCI_DMA_NONE)
-                BUG();
+	BUG_ON(direction == PCI_DMA_NONE);
 
 	for (i = 0; i < nelems; i++)
 		frv_cache_wback_inv(sg_dma_address(&sg[i]),
diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c
index 9fb771a..374f88d 100644
--- a/arch/frv/kernel/uaccess.c
+++ b/arch/frv/kernel/uaccess.c
@@ -23,8 +23,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 	char *p, ch;
 	long err = -EFAULT;
 
-	if (count < 0)
-		BUG();
+	BUG_ON(count < 0);
 
 	p = dst;
 
@@ -76,8 +75,7 @@ long strnlen_user(const char __user *src, long count)
 	long err = 0;
 	char ch;
 
-	if (count < 0)
-		BUG();
+	BUG_ON(count < 0);
 
 #ifndef CONFIG_MMU
 	if ((unsigned long) src < memory_start)
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 52ff9ae..4e1ba0b 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -116,8 +116,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 			  enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -151,8 +150,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		frv_cache_wback_inv(sg_dma_address(&sg[i]),
 				    sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]));
 
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	return nents;
 }
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 3ddedeb..45954f0 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -48,8 +48,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
 			  enum dma_data_direction direction)
 {
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -81,8 +80,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	void *vaddr;
 	int i;
 
-	if (direction == DMA_NONE)
-                BUG();
+	BUG_ON(direction == DMA_NONE);
 
 	dampr2 = __get_DAMPR(2);
 
-- 
cgit v0.10.2


From aa1913c0214a53568731617c0afbbfa3f59513fb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Jun 2009 13:05:09 +0100
Subject: FRV: Remove in-kernel strace code

Remove in-kernel strace code from the FRV arch as it's not really needed any
more.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 5e7d401..6b15e5d 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -306,387 +306,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	return ret;
 }
 
-int __nongprelbss kstrace;
-
-static const struct {
-	const char	*name;
-	unsigned	argmask;
-} __syscall_name_table[NR_syscalls] = {
-	[0]	= { "restart_syscall"			},
-	[1]	= { "exit",		0x000001	},
-	[2]	= { "fork",		0xffffff	},
-	[3]	= { "read",		0x000141	},
-	[4]	= { "write",		0x000141	},
-	[5]	= { "open",		0x000235	},
-	[6]	= { "close",		0x000001	},
-	[7]	= { "waitpid",		0x000141	},
-	[8]	= { "creat",		0x000025	},
-	[9]	= { "link",		0x000055	},
-	[10]	= { "unlink",		0x000005	},
-	[11]	= { "execve",		0x000445	},
-	[12]	= { "chdir",		0x000005	},
-	[13]	= { "time",		0x000004	},
-	[14]	= { "mknod",		0x000325	},
-	[15]	= { "chmod",		0x000025	},
-	[16]	= { "lchown",		0x000025	},
-	[17]	= { "break" },
-	[18]	= { "oldstat",		0x000045	},
-	[19]	= { "lseek",		0x000131	},
-	[20]	= { "getpid",		0xffffff	},
-	[21]	= { "mount",		0x043555	},
-	[22]	= { "umount",		0x000005	},
-	[23]	= { "setuid",		0x000001	},
-	[24]	= { "getuid",		0xffffff	},
-	[25]	= { "stime",		0x000004	},
-	[26]	= { "ptrace",		0x004413	},
-	[27]	= { "alarm",		0x000001	},
-	[28]	= { "oldfstat",		0x000041	},
-	[29]	= { "pause",		0xffffff	},
-	[30]	= { "utime",		0x000045	},
-	[31]	= { "stty" },
-	[32]	= { "gtty" },
-	[33]	= { "access",		0x000025	},
-	[34]	= { "nice",		0x000001	},
-	[35]	= { "ftime" },
-	[36]	= { "sync",		0xffffff	},
-	[37]	= { "kill",		0x000011	},
-	[38]	= { "rename",		0x000055	},
-	[39]	= { "mkdir",		0x000025	},
-	[40]	= { "rmdir",		0x000005	},
-	[41]	= { "dup",		0x000001	},
-	[42]	= { "pipe",		0x000004	},
-	[43]	= { "times",		0x000004	},
-	[44]	= { "prof" },
-	[45]	= { "brk",		0x000004	},
-	[46]	= { "setgid",		0x000001	},
-	[47]	= { "getgid",		0xffffff	},
-	[48]	= { "signal",		0x000041	},
-	[49]	= { "geteuid",		0xffffff	},
-	[50]	= { "getegid",		0xffffff	},
-	[51]	= { "acct",		0x000005	},
-	[52]	= { "umount2",		0x000035	},
-	[53]	= { "lock" },
-	[54]	= { "ioctl",		0x000331	},
-	[55]	= { "fcntl",		0x000331	},
-	[56]	= { "mpx" },
-	[57]	= { "setpgid",		0x000011	},
-	[58]	= { "ulimit" },
-	[60]	= { "umask",		0x000002	},
-	[61]	= { "chroot",		0x000005	},
-	[62]	= { "ustat",		0x000043	},
-	[63]	= { "dup2",		0x000011	},
-	[64]	= { "getppid",		0xffffff	},
-	[65]	= { "getpgrp",		0xffffff	},
-	[66]	= { "setsid",		0xffffff	},
-	[67]	= { "sigaction" },
-	[68]	= { "sgetmask" },
-	[69]	= { "ssetmask" },
-	[70]	= { "setreuid" },
-	[71]	= { "setregid" },
-	[72]	= { "sigsuspend" },
-	[73]	= { "sigpending" },
-	[74]	= { "sethostname" },
-	[75]	= { "setrlimit" },
-	[76]	= { "getrlimit" },
-	[77]	= { "getrusage" },
-	[78]	= { "gettimeofday" },
-	[79]	= { "settimeofday" },
-	[80]	= { "getgroups" },
-	[81]	= { "setgroups" },
-	[82]	= { "select" },
-	[83]	= { "symlink" },
-	[84]	= { "oldlstat" },
-	[85]	= { "readlink" },
-	[86]	= { "uselib" },
-	[87]	= { "swapon" },
-	[88]	= { "reboot" },
-	[89]	= { "readdir" },
-	[91]	= { "munmap",		0x000034	},
-	[92]	= { "truncate" },
-	[93]	= { "ftruncate" },
-	[94]	= { "fchmod" },
-	[95]	= { "fchown" },
-	[96]	= { "getpriority" },
-	[97]	= { "setpriority" },
-	[99]	= { "statfs" },
-	[100]	= { "fstatfs" },
-	[102]	= { "socketcall" },
-	[103]	= { "syslog" },
-	[104]	= { "setitimer" },
-	[105]	= { "getitimer" },
-	[106]	= { "stat" },
-	[107]	= { "lstat" },
-	[108]	= { "fstat" },
-	[111]	= { "vhangup" },
-	[114]	= { "wait4" },
-	[115]	= { "swapoff" },
-	[116]	= { "sysinfo" },
-	[117]	= { "ipc" },
-	[118]	= { "fsync" },
-	[119]	= { "sigreturn" },
-	[120]	= { "clone" },
-	[121]	= { "setdomainname" },
-	[122]	= { "uname" },
-	[123]	= { "modify_ldt" },
-	[123]	= { "cacheflush" },
-	[124]	= { "adjtimex" },
-	[125]	= { "mprotect" },
-	[126]	= { "sigprocmask" },
-	[127]	= { "create_module" },
-	[128]	= { "init_module" },
-	[129]	= { "delete_module" },
-	[130]	= { "get_kernel_syms" },
-	[131]	= { "quotactl" },
-	[132]	= { "getpgid" },
-	[133]	= { "fchdir" },
-	[134]	= { "bdflush" },
-	[135]	= { "sysfs" },
-	[136]	= { "personality" },
-	[137]	= { "afs_syscall" },
-	[138]	= { "setfsuid" },
-	[139]	= { "setfsgid" },
-	[140]	= { "_llseek",			0x014331	},
-	[141]	= { "getdents" },
-	[142]	= { "_newselect",		0x000141	},
-	[143]	= { "flock" },
-	[144]	= { "msync" },
-	[145]	= { "readv" },
-	[146]	= { "writev" },
-	[147]	= { "getsid",			0x000001	},
-	[148]	= { "fdatasync",		0x000001	},
-	[149]	= { "_sysctl",			0x000004	},
-	[150]	= { "mlock" },
-	[151]	= { "munlock" },
-	[152]	= { "mlockall" },
-	[153]	= { "munlockall" },
-	[154]	= { "sched_setparam" },
-	[155]	= { "sched_getparam" },
-	[156]	= { "sched_setscheduler" },
-	[157]	= { "sched_getscheduler" },
-	[158]	= { "sched_yield" },
-	[159]	= { "sched_get_priority_max" },
-	[160]	= { "sched_get_priority_min" },
-	[161]	= { "sched_rr_get_interval" },
-	[162]	= { "nanosleep",		0x000044	},
-	[163]	= { "mremap" },
-	[164]	= { "setresuid" },
-	[165]	= { "getresuid" },
-	[166]	= { "vm86" },
-	[167]	= { "query_module" },
-	[168]	= { "poll" },
-	[169]	= { "nfsservctl" },
-	[170]	= { "setresgid" },
-	[171]	= { "getresgid" },
-	[172]	= { "prctl",			0x333331	},
-	[173]	= { "rt_sigreturn",		0xffffff	},
-	[174]	= { "rt_sigaction",		0x001441	},
-	[175]	= { "rt_sigprocmask",		0x001441	},
-	[176]	= { "rt_sigpending",		0x000014	},
-	[177]	= { "rt_sigtimedwait",		0x001444	},
-	[178]	= { "rt_sigqueueinfo",		0x000411	},
-	[179]	= { "rt_sigsuspend",		0x000014	},
-	[180]	= { "pread",			0x003341	},
-	[181]	= { "pwrite",			0x003341	},
-	[182]	= { "chown",			0x000115	},
-	[183]	= { "getcwd" },
-	[184]	= { "capget" },
-	[185]	= { "capset" },
-	[186]	= { "sigaltstack" },
-	[187]	= { "sendfile" },
-	[188]	= { "getpmsg" },
-	[189]	= { "putpmsg" },
-	[190]	= { "vfork",			0xffffff	},
-	[191]	= { "ugetrlimit" },
-	[192]	= { "mmap2",			0x313314	},
-	[193]	= { "truncate64" },
-	[194]	= { "ftruncate64" },
-	[195]	= { "stat64",			0x000045	},
-	[196]	= { "lstat64",			0x000045	},
-	[197]	= { "fstat64",			0x000041	},
-	[198]	= { "lchown32" },
-	[199]	= { "getuid32",			0xffffff	},
-	[200]	= { "getgid32",			0xffffff	},
-	[201]	= { "geteuid32",		0xffffff	},
-	[202]	= { "getegid32",		0xffffff	},
-	[203]	= { "setreuid32" },
-	[204]	= { "setregid32" },
-	[205]	= { "getgroups32" },
-	[206]	= { "setgroups32" },
-	[207]	= { "fchown32" },
-	[208]	= { "setresuid32" },
-	[209]	= { "getresuid32" },
-	[210]	= { "setresgid32" },
-	[211]	= { "getresgid32" },
-	[212]	= { "chown32" },
-	[213]	= { "setuid32" },
-	[214]	= { "setgid32" },
-	[215]	= { "setfsuid32" },
-	[216]	= { "setfsgid32" },
-	[217]	= { "pivot_root" },
-	[218]	= { "mincore" },
-	[219]	= { "madvise" },
-	[220]	= { "getdents64" },
-	[221]	= { "fcntl64" },
-	[223]	= { "security" },
-	[224]	= { "gettid" },
-	[225]	= { "readahead" },
-	[226]	= { "setxattr" },
-	[227]	= { "lsetxattr" },
-	[228]	= { "fsetxattr" },
-	[229]	= { "getxattr" },
-	[230]	= { "lgetxattr" },
-	[231]	= { "fgetxattr" },
-	[232]	= { "listxattr" },
-	[233]	= { "llistxattr" },
-	[234]	= { "flistxattr" },
-	[235]	= { "removexattr" },
-	[236]	= { "lremovexattr" },
-	[237]	= { "fremovexattr" },
-	[238]	= { "tkill" },
-	[239]	= { "sendfile64" },
-	[240]	= { "futex" },
-	[241]	= { "sched_setaffinity" },
-	[242]	= { "sched_getaffinity" },
-	[243]	= { "set_thread_area" },
-	[244]	= { "get_thread_area" },
-	[245]	= { "io_setup" },
-	[246]	= { "io_destroy" },
-	[247]	= { "io_getevents" },
-	[248]	= { "io_submit" },
-	[249]	= { "io_cancel" },
-	[250]	= { "fadvise64" },
-	[252]	= { "exit_group",		0x000001	},
-	[253]	= { "lookup_dcookie" },
-	[254]	= { "epoll_create" },
-	[255]	= { "epoll_ctl" },
-	[256]	= { "epoll_wait" },
-	[257]	= { "remap_file_pages" },
-	[258]	= { "set_tid_address" },
-	[259]	= { "timer_create" },
-	[260]	= { "timer_settime" },
-	[261]	= { "timer_gettime" },
-	[262]	= { "timer_getoverrun" },
-	[263]	= { "timer_delete" },
-	[264]	= { "clock_settime" },
-	[265]	= { "clock_gettime" },
-	[266]	= { "clock_getres" },
-	[267]	= { "clock_nanosleep" },
-	[268]	= { "statfs64" },
-	[269]	= { "fstatfs64" },
-	[270]	= { "tgkill" },
-	[271]	= { "utimes" },
-	[272]	= { "fadvise64_64" },
-	[273]	= { "vserver" },
-	[274]	= { "mbind" },
-	[275]	= { "get_mempolicy" },
-	[276]	= { "set_mempolicy" },
-	[277]	= { "mq_open" },
-	[278]	= { "mq_unlink" },
-	[279]	= { "mq_timedsend" },
-	[280]	= { "mq_timedreceive" },
-	[281]	= { "mq_notify" },
-	[282]	= { "mq_getsetattr" },
-	[283]	= { "sys_kexec_load" },
-};
-
 asmlinkage void do_syscall_trace(int leaving)
 {
-#if 0
-	unsigned long *argp;
-	const char *name;
-	unsigned argmask;
-	char buffer[16];
-
-	if (!kstrace)
-		return;
-
-	if (!current->mm)
-		return;
-
-	if (__frame->gr7 == __NR_close)
-		return;
-
-#if 0
-	if (__frame->gr7 != __NR_mmap2 &&
-	    __frame->gr7 != __NR_vfork &&
-	    __frame->gr7 != __NR_execve &&
-	    __frame->gr7 != __NR_exit)
-		return;
-#endif
-
-	argmask = 0;
-	name = NULL;
-	if (__frame->gr7 < NR_syscalls) {
-		name = __syscall_name_table[__frame->gr7].name;
-		argmask = __syscall_name_table[__frame->gr7].argmask;
-	}
-	if (!name) {
-		sprintf(buffer, "sys_%lx", __frame->gr7);
-		name = buffer;
-	}
-
-	if (!leaving) {
-		if (!argmask) {
-			printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n",
-			       current->pid,
-			       name,
-			       __frame->gr8,
-			       __frame->gr9,
-			       __frame->gr10,
-			       __frame->gr11,
-			       __frame->gr12,
-			       __frame->gr13);
-		}
-		else if (argmask == 0xffffff) {
-			printk(KERN_CRIT "[%d] %s()\n",
-			       current->pid,
-			       name);
-		}
-		else {
-			printk(KERN_CRIT "[%d] %s(",
-			       current->pid,
-			       name);
-
-			argp = &__frame->gr8;
-
-			do {
-				switch (argmask & 0xf) {
-				case 1:
-					printk("%ld", (long) *argp);
-					break;
-				case 2:
-					printk("%lo", *argp);
-					break;
-				case 3:
-					printk("%lx", *argp);
-					break;
-				case 4:
-					printk("%p", (void *) *argp);
-					break;
-				case 5:
-					printk("\"%s\"", (char *) *argp);
-					break;
-				}
-
-				argp++;
-				argmask >>= 4;
-				if (argmask)
-					printk(",");
-
-			} while (argmask);
-
-			printk(")\n");
-		}
-	}
-	else {
-		if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096)
-			printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8);
-		else
-			printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8);
-	}
-	return;
-#endif
-
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
 		return;
 
-- 
cgit v0.10.2


From b7bab880c795ec620041ef0295cbbbc5a726f414 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Jun 2009 13:05:14 +0100
Subject: FRV: Implement TIF_NOTIFY_RESUME

Implement the TIF_NOTIFY_RESUME thread flag, making it call do_notify_resume()
which then clears it.  This will be made use of later by tracehooks in the
new-style ptrace implementation

Also discard TIF_IRET as that's not used by FRV.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index bb53ab7..e8a5ed7 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -109,20 +109,20 @@ register struct thread_info *__current_thread_info asm("gr15");
  * - other flags in MSW
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_SIGPENDING		1	/* signal pending */
-#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_SINGLESTEP		3	/* restore singlestep on return to user mode */
-#define TIF_IRET		4	/* return with iret */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* OOM killer killed process */
 #define TIF_FREEZE		18	/* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 3bdb368..7ae290a 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -564,4 +564,9 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal();
 
+	/* deal with notification on about to resume userspace execution */
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+	}
+
 } /* end do_notify_resume() */
-- 
cgit v0.10.2


From 24ceb7e8a6c5dd6e32ac3d43a2424542c97989f5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Jun 2009 13:05:19 +0100
Subject: FRV: Don't turn on TIF_SYSCALL_TRACE unconditionally in syscall
 prologue

Don't turn on TIF_SYSCALL_TRACE unconditionally in syscall prologue in FRV's
entry.S.  This was originally for debugging stuff and should have been removed
a long time ago.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 1da523b..268dfbd 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -886,7 +886,6 @@ system_call:
 	bnc		icc0,#0,__syscall_badsys
 
 	ldi		@(gr15,#TI_FLAGS),gr4
-	ori		gr4,#_TIF_SYSCALL_TRACE,gr4
 	andicc		gr4,#_TIF_SYSCALL_TRACE,gr0,icc0
 	bne		icc0,#0,__syscall_trace_entry
 
-- 
cgit v0.10.2


From 4a3b98932270f5d69f2c081924e356325ed704d9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Jun 2009 13:05:24 +0100
Subject: FRV: Implement new-style ptrace

Implement the new-style ptrace for FRV, including adding appropriate
tracehooks.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 9d1552a..8a5bd7a 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -6,6 +6,7 @@ config FRV
 	bool
 	default y
 	select HAVE_IDE
+	select HAVE_ARCH_TRACEHOOK
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h
index 7279ec07d..7bbf6e4 100644
--- a/arch/frv/include/asm/elf.h
+++ b/arch/frv/include/asm/elf.h
@@ -116,6 +116,7 @@ do {											\
 } while(0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_FDPIC_CORE_EFLAGS	EF_FRV_FDPIC
 #define ELF_EXEC_PAGESIZE	16384
 
diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h
index cf69340..a54b535 100644
--- a/arch/frv/include/asm/ptrace.h
+++ b/arch/frv/include/asm/ptrace.h
@@ -65,6 +65,8 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+struct task_struct;
+
 /*
  * we dedicate GR28 to keeping a pointer to the current exception frame
  * - gr28 is destroyed on entry to the kernel from userspace
@@ -73,11 +75,18 @@ register struct pt_regs *__frame asm("gr28");
 
 #define user_mode(regs)			(!((regs)->psr & PSR_S))
 #define instruction_pointer(regs)	((regs)->pc)
+#define user_stack_pointer(regs)	((regs)->sp)
 
 extern unsigned long user_stack(const struct pt_regs *);
 extern void show_regs(struct pt_regs *);
 #define profile_pc(regs) ((regs)->pc)
-#endif
+
+#define task_pt_regs(task) ((task)->thread.frame0)
+
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h
new file mode 100644
index 0000000..70689eb
--- /dev/null
+++ b/arch/frv/include/asm/syscall.h
@@ -0,0 +1,123 @@
+/* syscall parameter access functions
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H
+
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * Get the system call number or -1
+ */
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return regs->syscallno;
+}
+
+/*
+ * Restore the clobbered GR8 register
+ * (1st syscall arg was overwritten with syscall return or error)
+ */
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gr8 = regs->orig_gr8;
+}
+
+/*
+ * See if the syscall return value is an error, returning it if it is and 0 if
+ * not
+ */
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0;
+}
+
+/*
+ * Get the syscall return value
+ */
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gr8;
+}
+
+/*
+ * Set the syscall return value
+ */
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error)
+		regs->gr8 = -error;
+	else
+		regs->gr8 = val;
+}
+
+/*
+ * Retrieve the system call arguments
+ */
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	/*
+	 * Do this simply for now. If we need to start supporting
+	 * fetching arguments from arbitrary indices, this will need some
+	 * extra logic. Presently there are no in-tree users that depend
+	 * on this behaviour.
+	 */
+	BUG_ON(i);
+
+	/* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */
+	switch (n) {
+	case 6: args[5] = regs->gr13;
+	case 5: args[4] = regs->gr12;
+	case 4: args[3] = regs->gr11;
+	case 3: args[2] = regs->gr10;
+	case 2: args[1] = regs->gr9;
+	case 1:	args[0] = regs->gr8;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Alter the system call arguments
+ */
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	/* Same note as above applies */
+	BUG_ON(i);
+
+	switch (n) {
+	case 6: regs->gr13 = args[5];
+	case 5: regs->gr12 = args[4];
+	case 4: regs->gr11 = args[3];
+	case 3: regs->gr10 = args[2];
+	case 2: regs->gr9  = args[1];
+	case 1: regs->gr8  = args[0];
+		break;
+	default:
+		BUG();
+	}
+}
+
+#endif /* _ASM_SYSCALL_H */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 268dfbd..356e0e3 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1149,11 +1149,10 @@ __entry_work_notifysig:
 	# perform syscall entry tracing
 __syscall_trace_entry:
 	LEDS		0x6320
-	setlos.p	#0,gr8
-	call		do_syscall_trace
+	call		syscall_trace_entry
 
-	ldi		@(gr28,#REG_SYSCALLNO),gr7
-	lddi		@(gr28,#REG_GR(8)) ,gr8
+	lddi.p		@(gr28,#REG_GR(8)) ,gr8
+	ori		gr8,#0,gr7		; syscall_trace_entry() returned new syscallno
 	lddi		@(gr28,#REG_GR(10)),gr10
 	lddi.p		@(gr28,#REG_GR(12)),gr12
 
@@ -1168,11 +1167,10 @@ __syscall_exit_work:
 	beq		icc0,#1,__entry_work_pending
 
 	movsg		psr,gr23
-	andi		gr23,#~PSR_PIL,gr23	; could let do_syscall_trace() call schedule()
+	andi		gr23,#~PSR_PIL,gr23	; could let syscall_trace_exit() call schedule()
 	movgs		gr23,psr
 
-	setlos.p	#1,gr8
-	call		do_syscall_trace
+	call		syscall_trace_exit
 	bra		__entry_resume_userspace
 
 __syscall_badsys:
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 6b15e5d..60eeed3 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -19,6 +19,9 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -33,6 +36,169 @@
  */
 
 /*
+ * retrieve the contents of FRV userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct user_int_regs *iregs = &target->thread.user->i;
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  iregs, 0, sizeof(*iregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*iregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct user_int_regs *iregs = &target->thread.user->i;
+	unsigned int offs_gr0, offs_gr1;
+	int ret;
+
+	/* not allowed to set PSR or __status */
+	if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) &&
+	    pos + count > offsetof(struct user_int_regs, psr))
+		return -EIO;
+
+	if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) &&
+	    pos + count > offsetof(struct user_int_regs, __status))
+		return -EIO;
+
+	/* set the control regs */
+	offs_gr0 = offsetof(struct user_int_regs, gr[0]);
+	offs_gr1 = offsetof(struct user_int_regs, gr[1]);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 iregs, 0, offs_gr0);
+	if (ret < 0)
+		return ret;
+
+	/* skip GR0/TBR */
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					offs_gr0, offs_gr1);
+	if (ret < 0)
+		return ret;
+
+	/* set the general regs */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &iregs->gr[1], offs_gr1, sizeof(*iregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					sizeof(*iregs), -1);
+}
+
+/*
+ * retrieve the contents of FRV userspace FP/Media registers
+ */
+static int fpmregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	const struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace FP/Media registers
+ */
+static int fpmregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
+}
+
+/*
+ * determine if the FP/Media registers have actually been used
+ */
+static int fpmregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	return tsk_used_math(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the FRV under Linux
+ */
+enum frv_regset {
+	REGSET_GENERAL,
+	REGSET_FPMEDIA,
+};
+
+static const struct user_regset frv_regsets[] = {
+	/*
+	 * General register format is:
+	 *	PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8
+	 *	GNER0-1, IACC0, TBR, GR1-63
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+	/*
+	 * FPU/Media register format is:
+	 *	FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR
+	 */
+	[REGSET_FPMEDIA] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct user_fpmedia_regs) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpmregs_get,
+		.set		= fpmregs_set,
+		.active		= fpmregs_active,
+	},
+};
+
+static const struct user_regset_view user_frv_native_view = {
+	.name		= "frv",
+	.e_machine	= EM_FRV,
+	.regsets	= frv_regsets,
+	.n		= ARRAY_SIZE(frv_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_frv_native_view;
+}
+
+/*
  * Get contents of register REGNO in task TASK.
  */
 static inline long get_reg(struct task_struct *task, int regno)
@@ -69,40 +235,23 @@ static inline int put_reg(struct task_struct *task, int regno,
 }
 
 /*
- * check that an address falls within the bounds of the target process's memory
- * mappings
- */
-static inline int is_user_addr_valid(struct task_struct *child,
-				     unsigned long start, unsigned long len)
-{
-#ifdef CONFIG_MMU
-	if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start)
-		return -EIO;
-	return 0;
-#else
-	struct vm_area_struct *vma;
-
-	vma = find_vma(child->mm, start);
-	if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
-		return 0;
-
-	return -EIO;
-#endif
-}
-
-/*
  * Called by kernel/ptrace.c when detaching..
  *
  * Control h/w single stepping
  */
-void ptrace_disable(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
+{
+	child->thread.frame0->__status |= REG__STATUS_STEP;
+}
+
+void user_disable_single_step(struct task_struct *child)
 {
 	child->thread.frame0->__status &= ~REG__STATUS_STEP;
 }
 
-void ptrace_enable(struct task_struct *child)
+void ptrace_disable(struct task_struct *child)
 {
-	child->thread.frame0->__status |= REG__STATUS_STEP;
+	user_disable_single_step(child);
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -111,15 +260,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	int ret;
 
 	switch (request) {
-		/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */
-	case PTRACE_PEEKDATA:
-		ret = -EIO;
-		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-			break;
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
 		/* read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
 		tmp = 0;
@@ -163,15 +303,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
-		/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = -EIO;
-		if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-			break;
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
 		ret = -EIO;
 		if ((addr & 3) || addr < 0)
@@ -179,7 +310,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 		ret = 0;
 		switch (addr >> 2) {
-		case 0 ... PT__END-1:
+		case 0 ... PT__END - 1:
 			ret = put_reg(child, addr >> 2, data);
 			break;
 
@@ -189,95 +320,29 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		}
 		break;
 
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT: /* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-		/* make the child exit.  Best I can do is send it a sigkill.
-		 * perhaps it should be put in the status that it wants to
-		 * exit.
-		 */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ptrace_disable(child);
-		wake_up_process(child);
-		break;
-
-	case PTRACE_SINGLESTEP:  /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		ptrace_enable(child);
-		child->exit_code = data;
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
-	case PTRACE_GETREGS: { /* Get all integer regs from the child. */
-		int i;
-		for (i = 0; i < PT__GPEND; i++) {
-			tmp = get_reg(child, i);
-			if (put_user(tmp, (unsigned long *) data)) {
-				ret = -EFAULT;
-				break;
-			}
-			data += sizeof(long);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all integer regs in the child. */
-		int i;
-		for (i = 0; i < PT__GPEND; i++) {
-			if (get_user(tmp, (unsigned long *) data)) {
-				ret = -EFAULT;
-				break;
-			}
-			put_reg(child, i, tmp);
-			data += sizeof(long);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
-		ret = 0;
-		if (copy_to_user((void *) data,
-				 &child->thread.user->f,
-				 sizeof(child->thread.user->f)))
-			ret = -EFAULT;
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
-		ret = 0;
-		if (copy_from_user(&child->thread.user->f,
-				   (void *) data,
-				   sizeof(child->thread.user->f)))
-			ret = -EFAULT;
-		break;
-	}
+	case PTRACE_GETREGS:	/* Get all integer regs from the child. */
+		return copy_regset_to_user(child, &user_frv_native_view,
+					   REGSET_GENERAL,
+					   0, sizeof(child->thread.user->i),
+					   (void __user *)data);
+
+	case PTRACE_SETREGS:	/* Set all integer regs in the child. */
+		return copy_regset_from_user(child, &user_frv_native_view,
+					     REGSET_GENERAL,
+					     0, sizeof(child->thread.user->i),
+					     (const void __user *)data);
+
+	case PTRACE_GETFPREGS:	/* Get the child FP/Media state. */
+		return copy_regset_to_user(child, &user_frv_native_view,
+					   REGSET_FPMEDIA,
+					   0, sizeof(child->thread.user->f),
+					   (void __user *)data);
+
+	case PTRACE_SETFPREGS:	/* Set the child FP/Media state. */
+		return copy_regset_from_user(child, &user_frv_native_view,
+					     REGSET_FPMEDIA,
+					     0, sizeof(child->thread.user->f),
+					     (const void __user *)data);
 
 	case PTRACE_GETFDPIC:
 		tmp = 0;
@@ -300,35 +365,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 
 	default:
-		ret = -EIO;
+		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
 	return ret;
 }
 
-asmlinkage void do_syscall_trace(int leaving)
+/*
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
+ */
+asmlinkage unsigned long syscall_trace_entry(void)
 {
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return;
-
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-	/* we need to indicate entry or exit to strace */
-	if (leaving)
-		__frame->__status |= REG__STATUS_SYSC_EXIT;
-	else
-		__frame->__status |= REG__STATUS_SYSC_ENTRY;
+	__frame->__status |= REG__STATUS_SYSC_ENTRY;
+	if (tracehook_report_syscall_entry(__frame)) {
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * __frame->syscallno
+		 */
+		return ULONG_MAX;
+	}
 
-	ptrace_notify(SIGTRAP);
+	return __frame->syscallno;
+}
 
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(void)
+{
+	__frame->__status |= REG__STATUS_SYSC_EXIT;
+	tracehook_report_syscall_exit(__frame, 0);
 }
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 7ae290a..4a7a62c 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -516,6 +517,9 @@ static void do_signal(void)
 			 * clear the TIF_RESTORE_SIGMASK flag */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, __frame,
+						 test_thread_flag(TIF_SINGLESTEP));
 		}
 
 		return;
@@ -567,6 +571,7 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
 	/* deal with notification on about to resume userspace execution */
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(__frame);
 	}
 
 } /* end do_notify_resume() */
-- 
cgit v0.10.2


From fd4f683d045e053abb093f80d81afce30ceadad2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Jun 2009 13:08:32 +0100
Subject: MN10300: Don't set the dirty bit in the DTLB entries in the TLB-miss
 handler

Remove the special handling for the Data TLB entry dirty bit in the TLB-miss
handler.  As the code stands, all that it does is to cause us to take a second
data address exception to set the dirty bit.  Instead, we can just let
pte_mkdirty() set the bit.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S
index 7892080..7095147 100644
--- a/arch/mn10300/mm/tlb-mn10300.S
+++ b/arch/mn10300/mm/tlb-mn10300.S
@@ -165,24 +165,6 @@ ENTRY(itlb_aerror)
 ENTRY(dtlb_aerror)
 	and	~EPSW_NMID,epsw
 	add	-4,sp
-	mov	d1,(sp)
-
-	movhu	(MMUFCR_DFC),d1			# is it the initial valid write
-						# to this page?
-	and	MMUFCR_xFC_INITWR,d1
- 	beq	dtlb_pagefault			# jump if not
-
-	mov	(DPTEL),d1			# set the dirty bit
-						# (don't replace with BSET!)
-	or	_PAGE_DIRTY,d1
-	mov	d1,(DPTEL)
-	mov	(sp),d1
-	add	4,sp
- 	rti
-
-	ALIGN
-dtlb_pagefault:
-	mov	(sp),d1
 	SAVE_ALL
 	add	-4,sp				# need to pass three params
 
-- 
cgit v0.10.2


From 5d289964e1f1e8a2ec4289274bf15bce6a4f8ab8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Jun 2009 13:08:37 +0100
Subject: MN10300: Add utrace/tracehooks support

Add utrace/tracehooks support to MN10300.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 3559267..89faaca 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
 config MN10300
 	def_bool y
 	select HAVE_OPROFILE
+	select HAVE_ARCH_TRACEHOOK
 
 config AM33
 	def_bool y
diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h
index bf09f8b..4910546 100644
--- a/arch/mn10300/include/asm/elf.h
+++ b/arch/mn10300/include/asm/elf.h
@@ -34,7 +34,7 @@
  */
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
+#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1)
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 #define ELF_NFPREG 32
@@ -76,6 +76,7 @@ do {									\
 } while (0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
 /*
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 7323927..f7d4b0d 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -143,13 +143,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define task_pt_regs(task)						\
-({									\
-       struct pt_regs *__regs__;					\
-       __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
-       __regs__ - 1;							\
-})
-
+#define task_pt_regs(task) ((task)->thread.uregs)
 #define KSTK_EIP(task) (task_pt_regs(task)->pc)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 7b06cc6..921942e 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -91,9 +91,17 @@ extern struct pt_regs *__frame; /* current frame pointer */
 #if defined(__KERNEL__)
 
 #if !defined(__ASSEMBLY__)
+struct task_struct;
+
 #define user_mode(regs)			(((regs)->epsw & EPSW_nSL) == EPSW_nSL)
 #define instruction_pointer(regs)	((regs)->pc)
+#define user_stack_pointer(regs)	((regs)->sp)
 extern void show_regs(struct pt_regs *);
+
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif  /*  !__ASSEMBLY  */
 
 #define profile_pc(regs) ((regs)->pc)
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 3dc3e46..7408a27 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -76,7 +76,7 @@ ENTRY(system_call)
 	cmp	nr_syscalls,d0
 	bcc	syscall_badsys
 	btst	_TIF_SYSCALL_TRACE,(TI_flags,a2)
-	bne	syscall_trace_entry
+	bne	syscall_entry_trace
 syscall_call:
 	add	d0,d0,a1
 	add	a1,a1
@@ -104,11 +104,10 @@ restore_all:
 syscall_exit_work:
 	btst	_TIF_SYSCALL_TRACE,d2
 	beq	work_pending
-	__sti				# could let do_syscall_trace() call
+	__sti				# could let syscall_trace_exit() call
 					# schedule() instead
 	mov	fp,d0
-	mov	1,d1
-	call	do_syscall_trace[],0	# do_syscall_trace(regs,entryexit)
+	call	syscall_trace_exit[],0	# do_syscall_trace(regs)
 	jmp	resume_userspace
 
 	ALIGN
@@ -138,13 +137,11 @@ work_notifysig:
 	jmp	resume_userspace
 
 	# perform syscall entry tracing
-syscall_trace_entry:
+syscall_entry_trace:
 	mov	-ENOSYS,d0
 	mov	d0,(REG_D0,fp)
 	mov	fp,d0
-	clr	d1
-	call	do_syscall_trace[],0
-	mov	(REG_ORIG_D0,fp),d0
+	call	syscall_trace_entry[],0	# returns the syscall number to actually use
 	mov	(REG_D1,fp),d1
 	cmp	nr_syscalls,d0
 	bcs	syscall_call
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index d6d6cdc..e143339 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -17,6 +17,9 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -64,12 +67,6 @@ static inline int get_stack_long(struct task_struct *task, int offset)
 		((unsigned long) task->thread.uregs + offset);
 }
 
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
 static inline
 int put_stack_long(struct task_struct *task, int offset, unsigned long data)
 {
@@ -80,94 +77,233 @@ int put_stack_long(struct task_struct *task, int offset, unsigned long data)
 	return 0;
 }
 
-static inline unsigned long get_fpregs(struct fpu_state_struct *buf,
-				       struct task_struct *tsk)
+/*
+ * retrieve the contents of MN10300 userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
 {
-	return __copy_to_user(buf, &tsk->thread.fpu_state,
-			      sizeof(struct fpu_state_struct));
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	/* we need to skip regs->next */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs, 0, PT_ORIG_D0 * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+				  NR_PTREGS * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					NR_PTREGS * sizeof(long), -1);
 }
 
-static inline unsigned long set_fpregs(struct task_struct *tsk,
-				       struct fpu_state_struct *buf)
+/*
+ * update the contents of the MN10300 userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
 {
-	return __copy_from_user(&tsk->thread.fpu_state, buf,
-				sizeof(struct fpu_state_struct));
+	struct pt_regs *regs = task_pt_regs(target);
+	unsigned long tmp;
+	int ret;
+
+	/* we need to skip regs->next */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs, 0, PT_ORIG_D0 * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+				 PT_EPSW * sizeof(long));
+	if (ret < 0)
+		return ret;
+
+	/* we need to mask off changes to EPSW */
+	tmp = regs->epsw;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &tmp, PT_EPSW * sizeof(long),
+				 PT_PC * sizeof(long));
+	tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z;
+	tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N |
+			      EPSW_FLAG_Z);
+	regs->epsw = tmp;
+
+	if (ret < 0)
+		return ret;
+
+	/* and finally load the PC */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->pc, PT_PC * sizeof(long),
+				 NR_PTREGS * sizeof(long));
+
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 NR_PTREGS * sizeof(long), -1);
 }
 
-static inline void fpsave_init(struct task_struct *task)
+/*
+ * retrieve the contents of MN10300 userspace FPU registers
+ */
+static int fpuregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
 {
-	memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct));
+	const struct fpu_state_struct *fpregs = &target->thread.fpu_state;
+	int ret;
+
+	unlazy_fpu(target);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs, 0, sizeof(*fpregs));
+	if (ret < 0)
+		return ret;
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					sizeof(*fpregs), -1);
 }
 
 /*
- * make sure the single step bit is not set
+ * update the contents of the MN10300 userspace FPU registers
  */
-void ptrace_disable(struct task_struct *child)
+static int fpuregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	struct fpu_state_struct fpu_state = target->thread.fpu_state;
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &fpu_state, 0, sizeof(fpu_state));
+	if (ret < 0)
+		return ret;
+
+	fpu_kill_state(target);
+	target->thread.fpu_state = fpu_state;
+	set_using_fpu(target);
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 sizeof(fpu_state), -1);
+}
+
+/*
+ * determine if the FPU registers have actually been used
+ */
+static int fpuregs_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	return is_using_fpu(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the MN10300 under Linux
+ */
+enum mn10300_regset {
+	REGSET_GENERAL,
+	REGSET_FPU,
+};
+
+static const struct user_regset mn10300_regsets[] = {
+	/*
+	 * General register format is:
+	 *	A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ
+	 *	E1, E0, E7...E2, SP, LAR, LIR, MDR
+	 *	A1, A0, D1, D0, ORIG_D0, EPSW, PC
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type	= NT_PRSTATUS,
+		.n		= ELF_NGREG,
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= genregs_get,
+		.set		= genregs_set,
+	},
+	/*
+	 * FPU register format is:
+	 *	FS0-31, FPCR
+	 */
+	[REGSET_FPU] = {
+		.core_note_type	= NT_PRFPREG,
+		.n		= sizeof(struct fpu_state_struct) / sizeof(long),
+		.size		= sizeof(long),
+		.align		= sizeof(long),
+		.get		= fpuregs_get,
+		.set		= fpuregs_set,
+		.active		= fpuregs_active,
+	},
+};
+
+static const struct user_regset_view user_mn10300_native_view = {
+	.name		= "mn10300",
+	.e_machine	= EM_MN10300,
+	.regsets	= mn10300_regsets,
+	.n		= ARRAY_SIZE(mn10300_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_mn10300_native_view;
+}
+
+/*
+ * set the single-step bit
+ */
+void user_enable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
 	struct user *dummy = NULL;
 	long tmp;
 
 	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-	tmp &= ~EPSW_T;
+	tmp |= EPSW_T;
 	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
 #endif
 }
 
 /*
- * set the single step bit
+ * make sure the single-step bit is not set
  */
-void ptrace_enable(struct task_struct *child)
+void user_disable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
 	struct user *dummy = NULL;
 	long tmp;
 
 	tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-	tmp |= EPSW_T;
+	tmp &= ~EPSW_T;
 	put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
 #endif
 }
 
+void ptrace_disable(struct task_struct *child)
+{
+	user_disable_single_step(child);
+}
+
 /*
  * handle the arch-specific side of process tracing
  */
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct fpu_state_struct fpu_state;
-	int i, ret;
+	unsigned long tmp;
+	int ret;
 
 	switch (request) {
-	/* read the word at location addr. */
-	case PTRACE_PEEKTEXT: {
-		unsigned long tmp;
-		int copied;
-
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-		ret = -EIO;
-		if (copied != sizeof(tmp))
-			break;
-		ret = put_user(tmp, (unsigned long *) data);
-		break;
-	}
-
-	/* read the word at location addr. */
-	case PTRACE_PEEKDATA: {
-		unsigned long tmp;
-		int copied;
-
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-		ret = -EIO;
-		if (copied != sizeof(tmp))
-			break;
-		ret = put_user(tmp, (unsigned long *) data);
-		break;
-	}
-
 	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR: {
-		unsigned long tmp;
-
+	case PTRACE_PEEKUSR:
 		ret = -EIO;
 		if ((addr & 3) || addr < 0 ||
 		    addr > sizeof(struct user) - 3)
@@ -179,17 +315,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					     ptrace_regid_to_frame[addr]);
 		ret = put_user(tmp, (unsigned long *) data);
 		break;
-	}
-
-	/* write the word at location addr. */
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEDATA:
-		if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
-		    sizeof(data))
-			ret = 0;
-		else
-			ret = -EIO;
-		break;
 
 		/* write the word at location addr in the USER area */
 	case PTRACE_POKEUSR:
@@ -204,132 +329,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					     data);
 		break;
 
-		/* continue and stop at next (return from) syscall */
-	case PTRACE_SYSCALL:
-		/* restart after signal. */
-	case PTRACE_CONT:
-		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-		/*
-		 * make the child exit
-		 * - the best I can do is send it a sigkill
-		 * - perhaps it should be put in the status that it wants to
-		 *   exit
-		 */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ptrace_disable(child);
-		wake_up_process(child);
-		break;
-
-	case PTRACE_SINGLESTEP: /* set the trap flag. */
-#ifndef CONFIG_MN10300_USING_JTAG
-		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		ptrace_enable(child);
-		child->exit_code = data;
-		wake_up_process(child);
-		ret = 0;
-#else
-		ret = -EINVAL;
-#endif
-		break;
-
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
-		/* Get all gp regs from the child. */
-	case PTRACE_GETREGS: {
-		unsigned long tmp;
-
-		if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
-			ret = -EIO;
-			break;
-		}
-
-		for (i = 0; i < NR_PTREGS << 2; i += 4) {
-			tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
-			__put_user(tmp, (unsigned long *) data);
-			data += sizeof(tmp);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-
-		if (!access_ok(VERIFY_READ, (unsigned long *)data,
-			       sizeof(struct pt_regs))) {
-			ret = -EIO;
-			break;
-		}
-
-		for (i = 0; i < NR_PTREGS << 2; i += 4) {
-			__get_user(tmp, (unsigned long *) data);
-			put_stack_long(child, ptrace_regid_to_frame[i], tmp);
-			data += sizeof(tmp);
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-		if (is_using_fpu(child)) {
-			unlazy_fpu(child);
-			fpu_state = child->thread.fpu_state;
-		} else {
-			memset(&fpu_state, 0, sizeof(fpu_state));
-		}
-
-		ret = -EIO;
-		if (copy_to_user((void *) data, &fpu_state,
-				 sizeof(fpu_state)) == 0)
-			ret = 0;
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-		ret = -EFAULT;
-		if (copy_from_user(&fpu_state, (const void *) data,
-				   sizeof(fpu_state)) == 0) {
-			fpu_kill_state(child);
-			child->thread.fpu_state = fpu_state;
-			set_using_fpu(child);
-			ret = 0;
-		}
-		break;
-	}
-
-	case PTRACE_SETOPTIONS: {
-		if (data & PTRACE_O_TRACESYSGOOD)
-			child->ptrace |= PT_TRACESYSGOOD;
-		else
-			child->ptrace &= ~PT_TRACESYSGOOD;
-		ret = 0;
-		break;
-	}
+	case PTRACE_GETREGS:	/* Get all integer regs from the child. */
+		return copy_regset_to_user(child, &user_mn10300_native_view,
+					   REGSET_GENERAL,
+					   0, NR_PTREGS * sizeof(long),
+					   (void __user *)data);
+
+	case PTRACE_SETREGS:	/* Set all integer regs in the child. */
+		return copy_regset_from_user(child, &user_mn10300_native_view,
+					     REGSET_GENERAL,
+					     0, NR_PTREGS * sizeof(long),
+					     (const void __user *)data);
+
+	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
+		return copy_regset_to_user(child, &user_mn10300_native_view,
+					   REGSET_FPU,
+					   0, sizeof(struct fpu_state_struct),
+					   (void __user *)data);
+
+	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
+		return copy_regset_from_user(child, &user_mn10300_native_view,
+					     REGSET_FPU,
+					     0, sizeof(struct fpu_state_struct),
+					     (const void __user *)data);
 
 	default:
-		ret = -EIO;
+		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
 
@@ -337,43 +362,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 }
 
 /*
- * notification of system call entry/exit
- * - triggered by current->work.syscall_trace
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
 {
-#if 0
-	/* just in case... */
-	printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n",
-	       current->pid,
-	       regs->orig_d0,
-	       regs->a0,
-	       regs->d1,
-	       regs->a3,
-	       regs->a2,
-	       regs->d0);
-	return;
-#endif
-
-	if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
-	    !test_thread_flag(TIF_SINGLESTEP))
-		return;
-	if (!(current->ptrace & PT_PTRACED))
-		return;
+	if (tracehook_report_syscall_entry(regs))
+		/* tracing decided this syscall should not happen, so
+		 * We'll return a bogus call number to get an ENOSYS
+		 * error, but leave the original number in
+		 * regs->orig_d0
+		 */
+		return ULONG_MAX;
 
-	/* the 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	ptrace_notify(SIGTRAP |
-		      ((current->ptrace & PT_TRACESYSGOOD) &&
-		       !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
+	return regs->orig_d0;
+}
 
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	tracehook_report_syscall_exit(regs, 0);
 }
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 841ca99..9f7572a 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/suspend.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -511,6 +512,9 @@ static void do_signal(struct pt_regs *regs)
 			 * clear the TIF_RESTORE_SIGMASK flag */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signr, &info, &ka, regs,
+						 test_thread_flag(TIF_SINGLESTEP));
 		}
 
 		return;
@@ -561,4 +565,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	/* deal with pending signal delivery */
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(__frame);
+	}
 }
-- 
cgit v0.10.2


From 3c7b4e6b8be4c16f1e6e5c558e33b7ff0db2dfaf Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:22:39 +0100
Subject: kmemleak: Add the base support

This patch adds the base support for the kernel memory leak
detector. It traces the memory allocation/freeing in a way similar to
the Boehm's conservative garbage collector, the difference being that
the unreferenced objects are not freed but only shown in
/sys/kernel/debug/kmemleak. Enabling this feature introduces an
overhead to memory allocations.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
new file mode 100644
index 0000000..7796aed
--- /dev/null
+++ b/include/linux/kmemleak.h
@@ -0,0 +1,96 @@
+/*
+ * include/linux/kmemleak.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __KMEMLEAK_H
+#define __KMEMLEAK_H
+
+#ifdef CONFIG_DEBUG_KMEMLEAK
+
+extern void kmemleak_init(void);
+extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+			   gfp_t gfp);
+extern void kmemleak_free(const void *ptr);
+extern void kmemleak_padding(const void *ptr, unsigned long offset,
+			     size_t size);
+extern void kmemleak_not_leak(const void *ptr);
+extern void kmemleak_ignore(const void *ptr);
+extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
+			       size_t length, gfp_t gfp);
+extern void kmemleak_no_scan(const void *ptr);
+
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+					    int min_count, unsigned long flags,
+					    gfp_t gfp)
+{
+	if (!(flags & SLAB_NOLEAKTRACE))
+		kmemleak_alloc(ptr, size, min_count, gfp);
+}
+
+static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
+{
+	if (!(flags & SLAB_NOLEAKTRACE))
+		kmemleak_free(ptr);
+}
+
+static inline void kmemleak_erase(void **ptr)
+{
+	*ptr = NULL;
+}
+
+#else
+
+static inline void kmemleak_init(void)
+{
+}
+static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+				  gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+					    int min_count, unsigned long flags,
+					    gfp_t gfp)
+{
+}
+static inline void kmemleak_free(const void *ptr)
+{
+}
+static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
+{
+}
+static inline void kmemleak_not_leak(const void *ptr)
+{
+}
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
+static inline void kmemleak_scan_area(const void *ptr, unsigned long offset,
+				      size_t length, gfp_t gfp)
+{
+}
+static inline void kmemleak_erase(void **ptr)
+{
+}
+static inline void kmemleak_no_scan(const void *ptr)
+{
+}
+
+#endif	/* CONFIG_DEBUG_KMEMLEAK */
+
+#endif	/* __KMEMLEAK_H */
diff --git a/init/main.c b/init/main.c
index bb7dc57..9d759d6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -56,6 +56,7 @@
 #include <linux/debug_locks.h>
 #include <linux/debugobjects.h>
 #include <linux/lockdep.h>
+#include <linux/kmemleak.h>
 #include <linux/pid_namespace.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
@@ -603,6 +604,7 @@ asmlinkage void __init start_kernel(void)
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
+	prio_tree_init();
 	pidhash_init();
 	init_timers();
 	hrtimers_init();
@@ -654,6 +656,7 @@ asmlinkage void __init start_kernel(void)
 	cpu_hotplug_init();
 	kmem_cache_init();
 	kmemtrace_init();
+	kmemleak_init();
 	debug_objects_mem_init();
 	idr_init_cache();
 	setup_per_cpu_pageset();
@@ -663,7 +666,6 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	pgtable_cache_init();
-	prio_tree_init();
 	anon_vma_init();
 #ifdef CONFIG_X86
 	if (efi_enabled)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
new file mode 100644
index 0000000..58ec86c
--- /dev/null
+++ b/mm/kmemleak.c
@@ -0,0 +1,1498 @@
+/*
+ * mm/kmemleak.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * For more information on the algorithm and kmemleak usage, please see
+ * Documentation/kmemleak.txt.
+ *
+ * Notes on locking
+ * ----------------
+ *
+ * The following locks and mutexes are used by kmemleak:
+ *
+ * - kmemleak_lock (rwlock): protects the object_list modifications and
+ *   accesses to the object_tree_root. The object_list is the main list
+ *   holding the metadata (struct kmemleak_object) for the allocated memory
+ *   blocks. The object_tree_root is a priority search tree used to look-up
+ *   metadata based on a pointer to the corresponding memory block.  The
+ *   kmemleak_object structures are added to the object_list and
+ *   object_tree_root in the create_object() function called from the
+ *   kmemleak_alloc() callback and removed in delete_object() called from the
+ *   kmemleak_free() callback
+ * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to
+ *   the metadata (e.g. count) are protected by this lock. Note that some
+ *   members of this structure may be protected by other means (atomic or
+ *   kmemleak_lock). This lock is also held when scanning the corresponding
+ *   memory block to avoid the kernel freeing it via the kmemleak_free()
+ *   callback. This is less heavyweight than holding a global lock like
+ *   kmemleak_lock during scanning
+ * - scan_mutex (mutex): ensures that only one thread may scan the memory for
+ *   unreferenced objects at a time. The gray_list contains the objects which
+ *   are already referenced or marked as false positives and need to be
+ *   scanned. This list is only modified during a scanning episode when the
+ *   scan_mutex is held. At the end of a scan, the gray_list is always empty.
+ *   Note that the kmemleak_object.use_count is incremented when an object is
+ *   added to the gray_list and therefore cannot be freed
+ * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs
+ *   file together with modifications to the memory scanning parameters
+ *   including the scan_thread pointer
+ *
+ * The kmemleak_object structures have a use_count incremented or decremented
+ * using the get_object()/put_object() functions. When the use_count becomes
+ * 0, this count can no longer be incremented and put_object() schedules the
+ * kmemleak_object freeing via an RCU callback. All calls to the get_object()
+ * function must be protected by rcu_read_lock() to avoid accessing a freed
+ * structure.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/prio_tree.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/stacktrace.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/nodemask.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+
+#include <linux/kmemleak.h>
+
+/*
+ * Kmemleak configuration and common defines.
+ */
+#define MAX_TRACE		16	/* stack trace length */
+#define REPORTS_NR		50	/* maximum number of reported leaks */
+#define MSECS_MIN_AGE		5000	/* minimum object age for reporting */
+#define MSECS_SCAN_YIELD	10	/* CPU yielding period */
+#define SECS_FIRST_SCAN		60	/* delay before the first scan */
+#define SECS_SCAN_WAIT		600	/* subsequent auto scanning delay */
+
+#define BYTES_PER_POINTER	sizeof(void *)
+
+/* scanning area inside a memory block */
+struct kmemleak_scan_area {
+	struct hlist_node node;
+	unsigned long offset;
+	size_t length;
+};
+
+/*
+ * Structure holding the metadata for each allocated memory block.
+ * Modifications to such objects should be made while holding the
+ * object->lock. Insertions or deletions from object_list, gray_list or
+ * tree_node are already protected by the corresponding locks or mutex (see
+ * the notes on locking above). These objects are reference-counted
+ * (use_count) and freed using the RCU mechanism.
+ */
+struct kmemleak_object {
+	spinlock_t lock;
+	unsigned long flags;		/* object status flags */
+	struct list_head object_list;
+	struct list_head gray_list;
+	struct prio_tree_node tree_node;
+	struct rcu_head rcu;		/* object_list lockless traversal */
+	/* object usage count; object freed when use_count == 0 */
+	atomic_t use_count;
+	unsigned long pointer;
+	size_t size;
+	/* minimum number of a pointers found before it is considered leak */
+	int min_count;
+	/* the total number of pointers found pointing to this object */
+	int count;
+	/* memory ranges to be scanned inside an object (empty for all) */
+	struct hlist_head area_list;
+	unsigned long trace[MAX_TRACE];
+	unsigned int trace_len;
+	unsigned long jiffies;		/* creation timestamp */
+	pid_t pid;			/* pid of the current task */
+	char comm[TASK_COMM_LEN];	/* executable name */
+};
+
+/* flag representing the memory block allocation status */
+#define OBJECT_ALLOCATED	(1 << 0)
+/* flag set after the first reporting of an unreference object */
+#define OBJECT_REPORTED		(1 << 1)
+/* flag set to not scan the object */
+#define OBJECT_NO_SCAN		(1 << 2)
+
+/* the list of all allocated objects */
+static LIST_HEAD(object_list);
+/* the list of gray-colored objects (see color_gray comment below) */
+static LIST_HEAD(gray_list);
+/* prio search tree for object boundaries */
+static struct prio_tree_root object_tree_root;
+/* rw_lock protecting the access to object_list and prio_tree_root */
+static DEFINE_RWLOCK(kmemleak_lock);
+
+/* allocation caches for kmemleak internal data */
+static struct kmem_cache *object_cache;
+static struct kmem_cache *scan_area_cache;
+
+/* set if tracing memory operations is enabled */
+static atomic_t kmemleak_enabled = ATOMIC_INIT(0);
+/* set in the late_initcall if there were no errors */
+static atomic_t kmemleak_initialized = ATOMIC_INIT(0);
+/* enables or disables early logging of the memory operations */
+static atomic_t kmemleak_early_log = ATOMIC_INIT(1);
+/* set if a fata kmemleak error has occurred */
+static atomic_t kmemleak_error = ATOMIC_INIT(0);
+
+/* minimum and maximum address that may be valid pointers */
+static unsigned long min_addr = ULONG_MAX;
+static unsigned long max_addr;
+
+/* used for yielding the CPU to other tasks during scanning */
+static unsigned long next_scan_yield;
+static struct task_struct *scan_thread;
+static unsigned long jiffies_scan_yield;
+static unsigned long jiffies_min_age;
+/* delay between automatic memory scannings */
+static signed long jiffies_scan_wait;
+/* enables or disables the task stacks scanning */
+static int kmemleak_stack_scan;
+/* mutex protecting the memory scanning */
+static DEFINE_MUTEX(scan_mutex);
+/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */
+static DEFINE_MUTEX(kmemleak_mutex);
+
+/* number of leaks reported (for limitation purposes) */
+static int reported_leaks;
+
+/*
+ * Early object allocation/freeing logging. Kkmemleak is initialized after the
+ * kernel allocator. However, both the kernel allocator and kmemleak may
+ * allocate memory blocks which need to be tracked. Kkmemleak defines an
+ * arbitrary buffer to hold the allocation/freeing information before it is
+ * fully initialized.
+ */
+
+/* kmemleak operation type for early logging */
+enum {
+	KMEMLEAK_ALLOC,
+	KMEMLEAK_FREE,
+	KMEMLEAK_NOT_LEAK,
+	KMEMLEAK_IGNORE,
+	KMEMLEAK_SCAN_AREA,
+	KMEMLEAK_NO_SCAN
+};
+
+/*
+ * Structure holding the information passed to kmemleak callbacks during the
+ * early logging.
+ */
+struct early_log {
+	int op_type;			/* kmemleak operation type */
+	const void *ptr;		/* allocated/freed memory block */
+	size_t size;			/* memory block size */
+	int min_count;			/* minimum reference count */
+	unsigned long offset;		/* scan area offset */
+	size_t length;			/* scan area length */
+};
+
+/* early logging buffer and current position */
+static struct early_log early_log[200];
+static int crt_early_log;
+
+static void kmemleak_disable(void);
+
+/*
+ * Print a warning and dump the stack trace.
+ */
+#define kmemleak_warn(x...)	do {	\
+	pr_warning(x);			\
+	dump_stack();			\
+} while (0)
+
+/*
+ * Macro invoked when a serious kmemleak condition occured and cannot be
+ * recovered from. Kkmemleak will be disabled and further allocation/freeing
+ * tracing no longer available.
+ */
+#define kmemleak_panic(x...)	do {	\
+	kmemleak_warn(x);		\
+	kmemleak_disable();		\
+} while (0)
+
+/*
+ * Object colors, encoded with count and min_count:
+ * - white - orphan object, not enough references to it (count < min_count)
+ * - gray  - not orphan, not marked as false positive (min_count == 0) or
+ *		sufficient references to it (count >= min_count)
+ * - black - ignore, it doesn't contain references (e.g. text section)
+ *		(min_count == -1). No function defined for this color.
+ * Newly created objects don't have any color assigned (object->count == -1)
+ * before the next memory scan when they become white.
+ */
+static int color_white(const struct kmemleak_object *object)
+{
+	return object->count != -1 && object->count < object->min_count;
+}
+
+static int color_gray(const struct kmemleak_object *object)
+{
+	return object->min_count != -1 && object->count >= object->min_count;
+}
+
+/*
+ * Objects are considered referenced if their color is gray and they have not
+ * been deleted.
+ */
+static int referenced_object(struct kmemleak_object *object)
+{
+	return (object->flags & OBJECT_ALLOCATED) && color_gray(object);
+}
+
+/*
+ * Objects are considered unreferenced only if their color is white, they have
+ * not be deleted and have a minimum age to avoid false positives caused by
+ * pointers temporarily stored in CPU registers.
+ */
+static int unreferenced_object(struct kmemleak_object *object)
+{
+	return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
+		time_is_before_eq_jiffies(object->jiffies + jiffies_min_age);
+}
+
+/*
+ * Printing of the (un)referenced objects information, either to the seq file
+ * or to the kernel log. The print_referenced/print_unreferenced functions
+ * must be called with the object->lock held.
+ */
+#define print_helper(seq, x...)	do {	\
+	struct seq_file *s = (seq);	\
+	if (s)				\
+		seq_printf(s, x);	\
+	else				\
+		pr_info(x);		\
+} while (0)
+
+static void print_referenced(struct kmemleak_object *object)
+{
+	pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n",
+		object->pointer, object->size);
+}
+
+static void print_unreferenced(struct seq_file *seq,
+			       struct kmemleak_object *object)
+{
+	int i;
+
+	print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n",
+		     object->pointer, object->size);
+	print_helper(seq, "  comm \"%s\", pid %d, jiffies %lu\n",
+		     object->comm, object->pid, object->jiffies);
+	print_helper(seq, "  backtrace:\n");
+
+	for (i = 0; i < object->trace_len; i++) {
+		void *ptr = (void *)object->trace[i];
+		print_helper(seq, "    [<%p>] %pS\n", ptr, ptr);
+	}
+}
+
+/*
+ * Print the kmemleak_object information. This function is used mainly for
+ * debugging special cases when kmemleak operations. It must be called with
+ * the object->lock held.
+ */
+static void dump_object_info(struct kmemleak_object *object)
+{
+	struct stack_trace trace;
+
+	trace.nr_entries = object->trace_len;
+	trace.entries = object->trace;
+
+	pr_notice("kmemleak: Object 0x%08lx (size %zu):\n",
+		  object->tree_node.start, object->size);
+	pr_notice("  comm \"%s\", pid %d, jiffies %lu\n",
+		  object->comm, object->pid, object->jiffies);
+	pr_notice("  min_count = %d\n", object->min_count);
+	pr_notice("  count = %d\n", object->count);
+	pr_notice("  backtrace:\n");
+	print_stack_trace(&trace, 4);
+}
+
+/*
+ * Look-up a memory block metadata (kmemleak_object) in the priority search
+ * tree based on a pointer value. If alias is 0, only values pointing to the
+ * beginning of the memory block are allowed. The kmemleak_lock must be held
+ * when calling this function.
+ */
+static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
+{
+	struct prio_tree_node *node;
+	struct prio_tree_iter iter;
+	struct kmemleak_object *object;
+
+	prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr);
+	node = prio_tree_next(&iter);
+	if (node) {
+		object = prio_tree_entry(node, struct kmemleak_object,
+					 tree_node);
+		if (!alias && object->pointer != ptr) {
+			kmemleak_warn("kmemleak: Found object by alias");
+			object = NULL;
+		}
+	} else
+		object = NULL;
+
+	return object;
+}
+
+/*
+ * Increment the object use_count. Return 1 if successful or 0 otherwise. Note
+ * that once an object's use_count reached 0, the RCU freeing was already
+ * registered and the object should no longer be used. This function must be
+ * called under the protection of rcu_read_lock().
+ */
+static int get_object(struct kmemleak_object *object)
+{
+	return atomic_inc_not_zero(&object->use_count);
+}
+
+/*
+ * RCU callback to free a kmemleak_object.
+ */
+static void free_object_rcu(struct rcu_head *rcu)
+{
+	struct hlist_node *elem, *tmp;
+	struct kmemleak_scan_area *area;
+	struct kmemleak_object *object =
+		container_of(rcu, struct kmemleak_object, rcu);
+
+	/*
+	 * Once use_count is 0 (guaranteed by put_object), there is no other
+	 * code accessing this object, hence no need for locking.
+	 */
+	hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) {
+		hlist_del(elem);
+		kmem_cache_free(scan_area_cache, area);
+	}
+	kmem_cache_free(object_cache, object);
+}
+
+/*
+ * Decrement the object use_count. Once the count is 0, free the object using
+ * an RCU callback. Since put_object() may be called via the kmemleak_free() ->
+ * delete_object() path, the delayed RCU freeing ensures that there is no
+ * recursive call to the kernel allocator. Lock-less RCU object_list traversal
+ * is also possible.
+ */
+static void put_object(struct kmemleak_object *object)
+{
+	if (!atomic_dec_and_test(&object->use_count))
+		return;
+
+	/* should only get here after delete_object was called */
+	WARN_ON(object->flags & OBJECT_ALLOCATED);
+
+	call_rcu(&object->rcu, free_object_rcu);
+}
+
+/*
+ * Look up an object in the prio search tree and increase its use_count.
+ */
+static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
+{
+	unsigned long flags;
+	struct kmemleak_object *object = NULL;
+
+	rcu_read_lock();
+	read_lock_irqsave(&kmemleak_lock, flags);
+	if (ptr >= min_addr && ptr < max_addr)
+		object = lookup_object(ptr, alias);
+	read_unlock_irqrestore(&kmemleak_lock, flags);
+
+	/* check whether the object is still available */
+	if (object && !get_object(object))
+		object = NULL;
+	rcu_read_unlock();
+
+	return object;
+}
+
+/*
+ * Create the metadata (struct kmemleak_object) corresponding to an allocated
+ * memory block and add it to the object_list and object_tree_root.
+ */
+static void create_object(unsigned long ptr, size_t size, int min_count,
+			  gfp_t gfp)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	struct prio_tree_node *node;
+	struct stack_trace trace;
+
+	object = kmem_cache_alloc(object_cache, gfp & ~GFP_SLAB_BUG_MASK);
+	if (!object) {
+		kmemleak_panic("kmemleak: Cannot allocate a kmemleak_object "
+			       "structure\n");
+		return;
+	}
+
+	INIT_LIST_HEAD(&object->object_list);
+	INIT_LIST_HEAD(&object->gray_list);
+	INIT_HLIST_HEAD(&object->area_list);
+	spin_lock_init(&object->lock);
+	atomic_set(&object->use_count, 1);
+	object->flags = OBJECT_ALLOCATED;
+	object->pointer = ptr;
+	object->size = size;
+	object->min_count = min_count;
+	object->count = -1;			/* no color initially */
+	object->jiffies = jiffies;
+
+	/* task information */
+	if (in_irq()) {
+		object->pid = 0;
+		strncpy(object->comm, "hardirq", sizeof(object->comm));
+	} else if (in_softirq()) {
+		object->pid = 0;
+		strncpy(object->comm, "softirq", sizeof(object->comm));
+	} else {
+		object->pid = current->pid;
+		/*
+		 * There is a small chance of a race with set_task_comm(),
+		 * however using get_task_comm() here may cause locking
+		 * dependency issues with current->alloc_lock. In the worst
+		 * case, the command line is not correct.
+		 */
+		strncpy(object->comm, current->comm, sizeof(object->comm));
+	}
+
+	/* kernel backtrace */
+	trace.max_entries = MAX_TRACE;
+	trace.nr_entries = 0;
+	trace.entries = object->trace;
+	trace.skip = 1;
+	save_stack_trace(&trace);
+	object->trace_len = trace.nr_entries;
+
+	INIT_PRIO_TREE_NODE(&object->tree_node);
+	object->tree_node.start = ptr;
+	object->tree_node.last = ptr + size - 1;
+
+	write_lock_irqsave(&kmemleak_lock, flags);
+	min_addr = min(min_addr, ptr);
+	max_addr = max(max_addr, ptr + size);
+	node = prio_tree_insert(&object_tree_root, &object->tree_node);
+	/*
+	 * The code calling the kernel does not yet have the pointer to the
+	 * memory block to be able to free it.  However, we still hold the
+	 * kmemleak_lock here in case parts of the kernel started freeing
+	 * random memory blocks.
+	 */
+	if (node != &object->tree_node) {
+		unsigned long flags;
+
+		kmemleak_panic("kmemleak: Cannot insert 0x%lx into the object "
+			       "search tree (already existing)\n", ptr);
+		object = lookup_object(ptr, 1);
+		spin_lock_irqsave(&object->lock, flags);
+		dump_object_info(object);
+		spin_unlock_irqrestore(&object->lock, flags);
+
+		goto out;
+	}
+	list_add_tail_rcu(&object->object_list, &object_list);
+out:
+	write_unlock_irqrestore(&kmemleak_lock, flags);
+}
+
+/*
+ * Remove the metadata (struct kmemleak_object) for a memory block from the
+ * object_list and object_tree_root and decrement its use_count.
+ */
+static void delete_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	write_lock_irqsave(&kmemleak_lock, flags);
+	object = lookup_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n",
+			      ptr);
+		write_unlock_irqrestore(&kmemleak_lock, flags);
+		return;
+	}
+	prio_tree_remove(&object_tree_root, &object->tree_node);
+	list_del_rcu(&object->object_list);
+	write_unlock_irqrestore(&kmemleak_lock, flags);
+
+	WARN_ON(!(object->flags & OBJECT_ALLOCATED));
+	WARN_ON(atomic_read(&object->use_count) < 1);
+
+	/*
+	 * Locking here also ensures that the corresponding memory block
+	 * cannot be freed when it is being scanned.
+	 */
+	spin_lock_irqsave(&object->lock, flags);
+	if (object->flags & OBJECT_REPORTED)
+		print_referenced(object);
+	object->flags &= ~OBJECT_ALLOCATED;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Make a object permanently as gray-colored so that it can no longer be
+ * reported as a leak. This is used in general to mark a false positive.
+ */
+static void make_gray_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->min_count = 0;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Mark the object as black-colored so that it is ignored from scans and
+ * reporting.
+ */
+static void make_black_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->min_count = -1;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Add a scanning area to the object. If at least one such area is added,
+ * kmemleak will only scan these ranges rather than the whole memory block.
+ */
+static void add_scan_area(unsigned long ptr, unsigned long offset,
+			  size_t length, gfp_t gfp)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	struct kmemleak_scan_area *area;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Adding scan area to unknown "
+			      "object at 0x%08lx\n", ptr);
+		return;
+	}
+
+	area = kmem_cache_alloc(scan_area_cache, gfp & ~GFP_SLAB_BUG_MASK);
+	if (!area) {
+		kmemleak_warn("kmemleak: Cannot allocate a scan area\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	if (offset + length > object->size) {
+		kmemleak_warn("kmemleak: Scan area larger than object "
+			      "0x%08lx\n", ptr);
+		dump_object_info(object);
+		kmem_cache_free(scan_area_cache, area);
+		goto out_unlock;
+	}
+
+	INIT_HLIST_NODE(&area->node);
+	area->offset = offset;
+	area->length = length;
+
+	hlist_add_head(&area->node, &object->area_list);
+out_unlock:
+	spin_unlock_irqrestore(&object->lock, flags);
+out:
+	put_object(object);
+}
+
+/*
+ * Set the OBJECT_NO_SCAN flag for the object corresponding to the give
+ * pointer. Such object will not be scanned by kmemleak but references to it
+ * are searched.
+ */
+static void object_no_scan(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Not scanning unknown object at "
+			      "0x%08lx\n", ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->flags |= OBJECT_NO_SCAN;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Log an early kmemleak_* call to the early_log buffer. These calls will be
+ * processed later once kmemleak is fully initialized.
+ */
+static void log_early(int op_type, const void *ptr, size_t size,
+		      int min_count, unsigned long offset, size_t length)
+{
+	unsigned long flags;
+	struct early_log *log;
+
+	if (crt_early_log >= ARRAY_SIZE(early_log)) {
+		kmemleak_panic("kmemleak: Early log buffer exceeded\n");
+		return;
+	}
+
+	/*
+	 * There is no need for locking since the kernel is still in UP mode
+	 * at this stage. Disabling the IRQs is enough.
+	 */
+	local_irq_save(flags);
+	log = &early_log[crt_early_log];
+	log->op_type = op_type;
+	log->ptr = ptr;
+	log->size = size;
+	log->min_count = min_count;
+	log->offset = offset;
+	log->length = length;
+	crt_early_log++;
+	local_irq_restore(flags);
+}
+
+/*
+ * Memory allocation function callback. This function is called from the
+ * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
+ * vmalloc etc.).
+ */
+void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+{
+	pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		create_object((unsigned long)ptr, size, min_count, gfp);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_alloc);
+
+/*
+ * Memory freeing function callback. This function is called from the kernel
+ * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
+ */
+void kmemleak_free(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		delete_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free);
+
+/*
+ * Mark an already allocated memory block as a false positive. This will cause
+ * the block to no longer be reported as leak and always be scanned.
+ */
+void kmemleak_not_leak(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		make_gray_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_not_leak);
+
+/*
+ * Ignore a memory block. This is usually done when it is known that the
+ * corresponding block is not a leak and does not contain any references to
+ * other allocated memory blocks.
+ */
+void kmemleak_ignore(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		make_black_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_ignore);
+
+/*
+ * Limit the range to be scanned in an allocated memory block.
+ */
+void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
+			gfp_t gfp)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		add_scan_area((unsigned long)ptr, offset, length, gfp);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+}
+EXPORT_SYMBOL(kmemleak_scan_area);
+
+/*
+ * Inform kmemleak not to scan the given memory block.
+ */
+void kmemleak_no_scan(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		object_no_scan((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_no_scan);
+
+/*
+ * Yield the CPU so that other tasks get a chance to run.  The yielding is
+ * rate-limited to avoid excessive number of calls to the schedule() function
+ * during memory scanning.
+ */
+static void scan_yield(void)
+{
+	might_sleep();
+
+	if (time_is_before_eq_jiffies(next_scan_yield)) {
+		schedule();
+		next_scan_yield = jiffies + jiffies_scan_yield;
+	}
+}
+
+/*
+ * Memory scanning is a long process and it needs to be interruptable. This
+ * function checks whether such interrupt condition occured.
+ */
+static int scan_should_stop(void)
+{
+	if (!atomic_read(&kmemleak_enabled))
+		return 1;
+
+	/*
+	 * This function may be called from either process or kthread context,
+	 * hence the need to check for both stop conditions.
+	 */
+	if (current->mm)
+		return signal_pending(current);
+	else
+		return kthread_should_stop();
+
+	return 0;
+}
+
+/*
+ * Scan a memory block (exclusive range) for valid pointers and add those
+ * found to the gray list.
+ */
+static void scan_block(void *_start, void *_end,
+		       struct kmemleak_object *scanned)
+{
+	unsigned long *ptr;
+	unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
+	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
+
+	for (ptr = start; ptr < end; ptr++) {
+		unsigned long flags;
+		unsigned long pointer = *ptr;
+		struct kmemleak_object *object;
+
+		if (scan_should_stop())
+			break;
+
+		/*
+		 * When scanning a memory block with a corresponding
+		 * kmemleak_object, the CPU yielding is handled in the calling
+		 * code since it holds the object->lock to avoid the block
+		 * freeing.
+		 */
+		if (!scanned)
+			scan_yield();
+
+		object = find_and_get_object(pointer, 1);
+		if (!object)
+			continue;
+		if (object == scanned) {
+			/* self referenced, ignore */
+			put_object(object);
+			continue;
+		}
+
+		/*
+		 * Avoid the lockdep recursive warning on object->lock being
+		 * previously acquired in scan_object(). These locks are
+		 * enclosed by scan_mutex.
+		 */
+		spin_lock_irqsave_nested(&object->lock, flags,
+					 SINGLE_DEPTH_NESTING);
+		if (!color_white(object)) {
+			/* non-orphan, ignored or new */
+			spin_unlock_irqrestore(&object->lock, flags);
+			put_object(object);
+			continue;
+		}
+
+		/*
+		 * Increase the object's reference count (number of pointers
+		 * to the memory block). If this count reaches the required
+		 * minimum, the object's color will become gray and it will be
+		 * added to the gray_list.
+		 */
+		object->count++;
+		if (color_gray(object))
+			list_add_tail(&object->gray_list, &gray_list);
+		else
+			put_object(object);
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+}
+
+/*
+ * Scan a memory block corresponding to a kmemleak_object. A condition is
+ * that object->use_count >= 1.
+ */
+static void scan_object(struct kmemleak_object *object)
+{
+	struct kmemleak_scan_area *area;
+	struct hlist_node *elem;
+	unsigned long flags;
+
+	/*
+	 * Once the object->lock is aquired, the corresponding memory block
+	 * cannot be freed (the same lock is aquired in delete_object).
+	 */
+	spin_lock_irqsave(&object->lock, flags);
+	if (object->flags & OBJECT_NO_SCAN)
+		goto out;
+	if (!(object->flags & OBJECT_ALLOCATED))
+		/* already freed object */
+		goto out;
+	if (hlist_empty(&object->area_list))
+		scan_block((void *)object->pointer,
+			   (void *)(object->pointer + object->size), object);
+	else
+		hlist_for_each_entry(area, elem, &object->area_list, node)
+			scan_block((void *)(object->pointer + area->offset),
+				   (void *)(object->pointer + area->offset
+					    + area->length), object);
+out:
+	spin_unlock_irqrestore(&object->lock, flags);
+}
+
+/*
+ * Scan data sections and all the referenced memory blocks allocated via the
+ * kernel's standard allocators. This function must be called with the
+ * scan_mutex held.
+ */
+static void kmemleak_scan(void)
+{
+	unsigned long flags;
+	struct kmemleak_object *object, *tmp;
+	struct task_struct *task;
+	int i;
+
+	/* prepare the kmemleak_object's */
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		spin_lock_irqsave(&object->lock, flags);
+#ifdef DEBUG
+		/*
+		 * With a few exceptions there should be a maximum of
+		 * 1 reference to any object at this point.
+		 */
+		if (atomic_read(&object->use_count) > 1) {
+			pr_debug("kmemleak: object->use_count = %d\n",
+				 atomic_read(&object->use_count));
+			dump_object_info(object);
+		}
+#endif
+		/* reset the reference count (whiten the object) */
+		object->count = 0;
+		if (color_gray(object) && get_object(object))
+			list_add_tail(&object->gray_list, &gray_list);
+
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+	rcu_read_unlock();
+
+	/* data/bss scanning */
+	scan_block(_sdata, _edata, NULL);
+	scan_block(__bss_start, __bss_stop, NULL);
+
+#ifdef CONFIG_SMP
+	/* per-cpu sections scanning */
+	for_each_possible_cpu(i)
+		scan_block(__per_cpu_start + per_cpu_offset(i),
+			   __per_cpu_end + per_cpu_offset(i), NULL);
+#endif
+
+	/*
+	 * Struct page scanning for each node. The code below is not yet safe
+	 * with MEMORY_HOTPLUG.
+	 */
+	for_each_online_node(i) {
+		pg_data_t *pgdat = NODE_DATA(i);
+		unsigned long start_pfn = pgdat->node_start_pfn;
+		unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
+		unsigned long pfn;
+
+		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+			struct page *page;
+
+			if (!pfn_valid(pfn))
+				continue;
+			page = pfn_to_page(pfn);
+			/* only scan if page is in use */
+			if (page_count(page) == 0)
+				continue;
+			scan_block(page, page + 1, NULL);
+		}
+	}
+
+	/*
+	 * Scanning the task stacks may introduce false negatives and it is
+	 * not enabled by default.
+	 */
+	if (kmemleak_stack_scan) {
+		read_lock(&tasklist_lock);
+		for_each_process(task)
+			scan_block(task_stack_page(task),
+				   task_stack_page(task) + THREAD_SIZE, NULL);
+		read_unlock(&tasklist_lock);
+	}
+
+	/*
+	 * Scan the objects already referenced from the sections scanned
+	 * above. More objects will be referenced and, if there are no memory
+	 * leaks, all the objects will be scanned. The list traversal is safe
+	 * for both tail additions and removals from inside the loop. The
+	 * kmemleak objects cannot be freed from outside the loop because their
+	 * use_count was increased.
+	 */
+	object = list_entry(gray_list.next, typeof(*object), gray_list);
+	while (&object->gray_list != &gray_list) {
+		scan_yield();
+
+		/* may add new objects to the list */
+		if (!scan_should_stop())
+			scan_object(object);
+
+		tmp = list_entry(object->gray_list.next, typeof(*object),
+				 gray_list);
+
+		/* remove the object from the list and release it */
+		list_del(&object->gray_list);
+		put_object(object);
+
+		object = tmp;
+	}
+	WARN_ON(!list_empty(&gray_list));
+}
+
+/*
+ * Thread function performing automatic memory scanning. Unreferenced objects
+ * at the end of a memory scan are reported but only the first time.
+ */
+static int kmemleak_scan_thread(void *arg)
+{
+	static int first_run = 1;
+
+	pr_info("kmemleak: Automatic memory scanning thread started\n");
+
+	/*
+	 * Wait before the first scan to allow the system to fully initialize.
+	 */
+	if (first_run) {
+		first_run = 0;
+		ssleep(SECS_FIRST_SCAN);
+	}
+
+	while (!kthread_should_stop()) {
+		struct kmemleak_object *object;
+		signed long timeout = jiffies_scan_wait;
+
+		mutex_lock(&scan_mutex);
+
+		kmemleak_scan();
+		reported_leaks = 0;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(object, &object_list, object_list) {
+			unsigned long flags;
+
+			if (reported_leaks >= REPORTS_NR)
+				break;
+			spin_lock_irqsave(&object->lock, flags);
+			if (!(object->flags & OBJECT_REPORTED) &&
+			    unreferenced_object(object)) {
+				print_unreferenced(NULL, object);
+				object->flags |= OBJECT_REPORTED;
+				reported_leaks++;
+			} else if ((object->flags & OBJECT_REPORTED) &&
+				   referenced_object(object)) {
+				print_referenced(object);
+				object->flags &= ~OBJECT_REPORTED;
+			}
+			spin_unlock_irqrestore(&object->lock, flags);
+		}
+		rcu_read_unlock();
+
+		mutex_unlock(&scan_mutex);
+		/* wait before the next scan */
+		while (timeout && !kthread_should_stop())
+			timeout = schedule_timeout_interruptible(timeout);
+	}
+
+	pr_info("kmemleak: Automatic memory scanning thread ended\n");
+
+	return 0;
+}
+
+/*
+ * Start the automatic memory scanning thread. This function must be called
+ * with the kmemleak_mutex held.
+ */
+void start_scan_thread(void)
+{
+	if (scan_thread)
+		return;
+	scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak");
+	if (IS_ERR(scan_thread)) {
+		pr_warning("kmemleak: Failed to create the scan thread\n");
+		scan_thread = NULL;
+	}
+}
+
+/*
+ * Stop the automatic memory scanning thread. This function must be called
+ * with the kmemleak_mutex held.
+ */
+void stop_scan_thread(void)
+{
+	if (scan_thread) {
+		kthread_stop(scan_thread);
+		scan_thread = NULL;
+	}
+}
+
+/*
+ * Iterate over the object_list and return the first valid object at or after
+ * the required position with its use_count incremented. The function triggers
+ * a memory scanning when the pos argument points to the first position.
+ */
+static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct kmemleak_object *object;
+	loff_t n = *pos;
+
+	if (!n) {
+		kmemleak_scan();
+		reported_leaks = 0;
+	}
+	if (reported_leaks >= REPORTS_NR)
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		if (n-- > 0)
+			continue;
+		if (get_object(object))
+			goto out;
+	}
+	object = NULL;
+out:
+	rcu_read_unlock();
+	return object;
+}
+
+/*
+ * Return the next object in the object_list. The function decrements the
+ * use_count of the previous object and increases that of the next one.
+ */
+static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct kmemleak_object *prev_obj = v;
+	struct kmemleak_object *next_obj = NULL;
+	struct list_head *n = &prev_obj->object_list;
+
+	++(*pos);
+	if (reported_leaks >= REPORTS_NR)
+		goto out;
+
+	rcu_read_lock();
+	list_for_each_continue_rcu(n, &object_list) {
+		next_obj = list_entry(n, struct kmemleak_object, object_list);
+		if (get_object(next_obj))
+			break;
+	}
+	rcu_read_unlock();
+out:
+	put_object(prev_obj);
+	return next_obj;
+}
+
+/*
+ * Decrement the use_count of the last object required, if any.
+ */
+static void kmemleak_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v)
+		put_object(v);
+}
+
+/*
+ * Print the information for an unreferenced object to the seq file.
+ */
+static int kmemleak_seq_show(struct seq_file *seq, void *v)
+{
+	struct kmemleak_object *object = v;
+	unsigned long flags;
+
+	spin_lock_irqsave(&object->lock, flags);
+	if (!unreferenced_object(object))
+		goto out;
+	print_unreferenced(seq, object);
+	reported_leaks++;
+out:
+	spin_unlock_irqrestore(&object->lock, flags);
+	return 0;
+}
+
+static const struct seq_operations kmemleak_seq_ops = {
+	.start = kmemleak_seq_start,
+	.next  = kmemleak_seq_next,
+	.stop  = kmemleak_seq_stop,
+	.show  = kmemleak_seq_show,
+};
+
+static int kmemleak_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (!atomic_read(&kmemleak_enabled))
+		return -EBUSY;
+
+	ret = mutex_lock_interruptible(&kmemleak_mutex);
+	if (ret < 0)
+		goto out;
+	if (file->f_mode & FMODE_READ) {
+		ret = mutex_lock_interruptible(&scan_mutex);
+		if (ret < 0)
+			goto kmemleak_unlock;
+		ret = seq_open(file, &kmemleak_seq_ops);
+		if (ret < 0)
+			goto scan_unlock;
+	}
+	return ret;
+
+scan_unlock:
+	mutex_unlock(&scan_mutex);
+kmemleak_unlock:
+	mutex_unlock(&kmemleak_mutex);
+out:
+	return ret;
+}
+
+static int kmemleak_release(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		seq_release(inode, file);
+		mutex_unlock(&scan_mutex);
+	}
+	mutex_unlock(&kmemleak_mutex);
+
+	return ret;
+}
+
+/*
+ * File write operation to configure kmemleak at run-time. The following
+ * commands can be written to the /sys/kernel/debug/kmemleak file:
+ *   off	- disable kmemleak (irreversible)
+ *   stack=on	- enable the task stacks scanning
+ *   stack=off	- disable the tasks stacks scanning
+ *   scan=on	- start the automatic memory scanning thread
+ *   scan=off	- stop the automatic memory scanning thread
+ *   scan=...	- set the automatic memory scanning period in seconds (0 to
+ *		  disable it)
+ */
+static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
+			      size_t size, loff_t *ppos)
+{
+	char buf[64];
+	int buf_size;
+
+	if (!atomic_read(&kmemleak_enabled))
+		return -EBUSY;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (strncpy_from_user(buf, user_buf, buf_size) < 0)
+		return -EFAULT;
+	buf[buf_size] = 0;
+
+	if (strncmp(buf, "off", 3) == 0)
+		kmemleak_disable();
+	else if (strncmp(buf, "stack=on", 8) == 0)
+		kmemleak_stack_scan = 1;
+	else if (strncmp(buf, "stack=off", 9) == 0)
+		kmemleak_stack_scan = 0;
+	else if (strncmp(buf, "scan=on", 7) == 0)
+		start_scan_thread();
+	else if (strncmp(buf, "scan=off", 8) == 0)
+		stop_scan_thread();
+	else if (strncmp(buf, "scan=", 5) == 0) {
+		unsigned long secs;
+		int err;
+
+		err = strict_strtoul(buf + 5, 0, &secs);
+		if (err < 0)
+			return err;
+		stop_scan_thread();
+		if (secs) {
+			jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
+			start_scan_thread();
+		}
+	} else
+		return -EINVAL;
+
+	/* ignore the rest of the buffer, only one command at a time */
+	*ppos += size;
+	return size;
+}
+
+static const struct file_operations kmemleak_fops = {
+	.owner		= THIS_MODULE,
+	.open		= kmemleak_open,
+	.read		= seq_read,
+	.write		= kmemleak_write,
+	.llseek		= seq_lseek,
+	.release	= kmemleak_release,
+};
+
+/*
+ * Perform the freeing of the kmemleak internal objects after waiting for any
+ * current memory scan to complete.
+ */
+static int kmemleak_cleanup_thread(void *arg)
+{
+	struct kmemleak_object *object;
+
+	mutex_lock(&kmemleak_mutex);
+	stop_scan_thread();
+	mutex_unlock(&kmemleak_mutex);
+
+	mutex_lock(&scan_mutex);
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list)
+		delete_object(object->pointer);
+	rcu_read_unlock();
+	mutex_unlock(&scan_mutex);
+
+	return 0;
+}
+
+/*
+ * Start the clean-up thread.
+ */
+static void kmemleak_cleanup(void)
+{
+	struct task_struct *cleanup_thread;
+
+	cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
+				     "kmemleak-clean");
+	if (IS_ERR(cleanup_thread))
+		pr_warning("kmemleak: Failed to create the clean-up thread\n");
+}
+
+/*
+ * Disable kmemleak. No memory allocation/freeing will be traced once this
+ * function is called. Disabling kmemleak is an irreversible operation.
+ */
+static void kmemleak_disable(void)
+{
+	/* atomically check whether it was already invoked */
+	if (atomic_cmpxchg(&kmemleak_error, 0, 1))
+		return;
+
+	/* stop any memory operation tracing */
+	atomic_set(&kmemleak_early_log, 0);
+	atomic_set(&kmemleak_enabled, 0);
+
+	/* check whether it is too early for a kernel thread */
+	if (atomic_read(&kmemleak_initialized))
+		kmemleak_cleanup();
+
+	pr_info("Kernel memory leak detector disabled\n");
+}
+
+/*
+ * Allow boot-time kmemleak disabling (enabled by default).
+ */
+static int kmemleak_boot_config(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (strcmp(str, "off") == 0)
+		kmemleak_disable();
+	else if (strcmp(str, "on") != 0)
+		return -EINVAL;
+	return 0;
+}
+early_param("kmemleak", kmemleak_boot_config);
+
+/*
+ * Kkmemleak initialization.
+ */
+void __init kmemleak_init(void)
+{
+	int i;
+	unsigned long flags;
+
+	jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD);
+	jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
+	jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
+
+	object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
+	scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
+	INIT_PRIO_TREE_ROOT(&object_tree_root);
+
+	/* the kernel is still in UP mode, so disabling the IRQs is enough */
+	local_irq_save(flags);
+	if (!atomic_read(&kmemleak_error)) {
+		atomic_set(&kmemleak_enabled, 1);
+		atomic_set(&kmemleak_early_log, 0);
+	}
+	local_irq_restore(flags);
+
+	/*
+	 * This is the point where tracking allocations is safe. Automatic
+	 * scanning is started during the late initcall. Add the early logged
+	 * callbacks to the kmemleak infrastructure.
+	 */
+	for (i = 0; i < crt_early_log; i++) {
+		struct early_log *log = &early_log[i];
+
+		switch (log->op_type) {
+		case KMEMLEAK_ALLOC:
+			kmemleak_alloc(log->ptr, log->size, log->min_count,
+				       GFP_KERNEL);
+			break;
+		case KMEMLEAK_FREE:
+			kmemleak_free(log->ptr);
+			break;
+		case KMEMLEAK_NOT_LEAK:
+			kmemleak_not_leak(log->ptr);
+			break;
+		case KMEMLEAK_IGNORE:
+			kmemleak_ignore(log->ptr);
+			break;
+		case KMEMLEAK_SCAN_AREA:
+			kmemleak_scan_area(log->ptr, log->offset, log->length,
+					   GFP_KERNEL);
+			break;
+		case KMEMLEAK_NO_SCAN:
+			kmemleak_no_scan(log->ptr);
+			break;
+		default:
+			WARN_ON(1);
+		}
+	}
+}
+
+/*
+ * Late initialization function.
+ */
+static int __init kmemleak_late_init(void)
+{
+	struct dentry *dentry;
+
+	atomic_set(&kmemleak_initialized, 1);
+
+	if (atomic_read(&kmemleak_error)) {
+		/*
+		 * Some error occured and kmemleak was disabled. There is a
+		 * small chance that kmemleak_disable() was called immediately
+		 * after setting kmemleak_initialized and we may end up with
+		 * two clean-up threads but serialized by scan_mutex.
+		 */
+		kmemleak_cleanup();
+		return -ENOMEM;
+	}
+
+	dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL,
+				     &kmemleak_fops);
+	if (!dentry)
+		pr_warning("kmemleak: Failed to create the debugfs kmemleak "
+			   "file\n");
+	mutex_lock(&kmemleak_mutex);
+	start_scan_thread();
+	mutex_unlock(&kmemleak_mutex);
+
+	pr_info("Kernel memory leak detector initialized\n");
+
+	return 0;
+}
+late_initcall(kmemleak_late_init);
-- 
cgit v0.10.2


From 04f70336c80c43a15e617b36c2043dfa0ad6ed0f Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:22:39 +0100
Subject: kmemleak: Add documentation on the memory leak detector

This patch adds the Documentation/kmemleak.txt file with some
information about how kmemleak works.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4a3c220..04a44cc 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1077,6 +1077,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			Configure the RouterBoard 532 series on-chip
 			Ethernet adapter MAC address.
 
+	kmemleak=	[KNL] Boot-time kmemleak enable/disable
+			Valid arguments: on, off
+			Default: on
+
 	kstack=N	[X86] Print N words from the kernel stack
 			in oops dumps.
 
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
new file mode 100644
index 0000000..0112da3
--- /dev/null
+++ b/Documentation/kmemleak.txt
@@ -0,0 +1,142 @@
+Kernel Memory Leak Detector
+===========================
+
+Introduction
+------------
+
+Kmemleak provides a way of detecting possible kernel memory leaks in a
+way similar to a tracing garbage collector
+(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
+with the difference that the orphan objects are not freed but only
+reported via /sys/kernel/debug/kmemleak. A similar method is used by the
+Valgrind tool (memcheck --leak-check) to detect the memory leaks in
+user-space applications.
+
+Usage
+-----
+
+CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
+thread scans the memory every 10 minutes (by default) and prints any new
+unreferenced objects found. To trigger an intermediate scan and display
+all the possible memory leaks:
+
+  # mount -t debugfs nodev /sys/kernel/debug/
+  # cat /sys/kernel/debug/kmemleak
+
+Note that the orphan objects are listed in the order they were allocated
+and one object at the beginning of the list may cause other subsequent
+objects to be reported as orphan.
+
+Memory scanning parameters can be modified at run-time by writing to the
+/sys/kernel/debug/kmemleak file. The following parameters are supported:
+
+  off		- disable kmemleak (irreversible)
+  stack=on	- enable the task stacks scanning
+  stack=off	- disable the tasks stacks scanning
+  scan=on	- start the automatic memory scanning thread
+  scan=off	- stop the automatic memory scanning thread
+  scan=<secs>	- set the automatic memory scanning period in seconds (0
+		  to disable it)
+
+Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
+the kernel command line.
+
+Basic Algorithm
+---------------
+
+The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
+friends are traced and the pointers, together with additional
+information like size and stack trace, are stored in a prio search tree.
+The corresponding freeing function calls are tracked and the pointers
+removed from the kmemleak data structures.
+
+An allocated block of memory is considered orphan if no pointer to its
+start address or to any location inside the block can be found by
+scanning the memory (including saved registers). This means that there
+might be no way for the kernel to pass the address of the allocated
+block to a freeing function and therefore the block is considered a
+memory leak.
+
+The scanning algorithm steps:
+
+  1. mark all objects as white (remaining white objects will later be
+     considered orphan)
+  2. scan the memory starting with the data section and stacks, checking
+     the values against the addresses stored in the prio search tree. If
+     a pointer to a white object is found, the object is added to the
+     gray list
+  3. scan the gray objects for matching addresses (some white objects
+     can become gray and added at the end of the gray list) until the
+     gray set is finished
+  4. the remaining white objects are considered orphan and reported via
+     /sys/kernel/debug/kmemleak
+
+Some allocated memory blocks have pointers stored in the kernel's
+internal data structures and they cannot be detected as orphans. To
+avoid this, kmemleak can also store the number of values pointing to an
+address inside the block address range that need to be found so that the
+block is not considered a leak. One example is __vmalloc().
+
+Kmemleak API
+------------
+
+See the include/linux/kmemleak.h header for the functions prototype.
+
+kmemleak_init		 - initialize kmemleak
+kmemleak_alloc		 - notify of a memory block allocation
+kmemleak_free		 - notify of a memory block freeing
+kmemleak_not_leak	 - mark an object as not a leak
+kmemleak_ignore		 - do not scan or report an object as leak
+kmemleak_scan_area	 - add scan areas inside a memory block
+kmemleak_no_scan	 - do not scan a memory block
+kmemleak_erase		 - erase an old value in a pointer variable
+kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness
+kmemleak_free_recursive	 - as kmemleak_free but checks the recursiveness
+
+Dealing with false positives/negatives
+--------------------------------------
+
+The false negatives are real memory leaks (orphan objects) but not
+reported by kmemleak because values found during the memory scanning
+point to such objects. To reduce the number of false negatives, kmemleak
+provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and
+kmemleak_erase functions (see above). The task stacks also increase the
+amount of false negatives and their scanning is not enabled by default.
+
+The false positives are objects wrongly reported as being memory leaks
+(orphan). For objects known not to be leaks, kmemleak provides the
+kmemleak_not_leak function. The kmemleak_ignore could also be used if
+the memory block is known not to contain other pointers and it will no
+longer be scanned.
+
+Some of the reported leaks are only transient, especially on SMP
+systems, because of pointers temporarily stored in CPU registers or
+stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing
+the minimum age of an object to be reported as a memory leak.
+
+Limitations and Drawbacks
+-------------------------
+
+The main drawback is the reduced performance of memory allocation and
+freeing. To avoid other penalties, the memory scanning is only performed
+when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is
+intended for debugging purposes where the performance might not be the
+most important requirement.
+
+To keep the algorithm simple, kmemleak scans for values pointing to any
+address inside a block's address range. This may lead to an increased
+number of false negatives. However, it is likely that a real memory leak
+will eventually become visible.
+
+Another source of false negatives is the data stored in non-pointer
+values. In a future version, kmemleak could only scan the pointer
+members in the allocated structures. This feature would solve many of
+the false negative cases described above.
+
+The tool can report false positives. These are cases where an allocated
+block doesn't need to be freed (some cases in the init_call functions),
+the pointer is calculated by other methods than the usual container_of
+macro or the pointer is stored in a location not scanned by kmemleak.
+
+Page allocations and ioremap are not tracked. Only the ARM and x86
+architectures are currently supported.
-- 
cgit v0.10.2


From d5cff635290aec9ad7e6ee546aa4fae895361cbb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:22:40 +0100
Subject: kmemleak: Add the slab memory allocation/freeing hooks

This patch adds the callbacks to kmemleak_(alloc|free) functions from
the slab allocator. The patch also adds the SLAB_NOLEAKTRACE flag to
avoid recursive calls to kmemleak when it allocates its own data
structures.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 24c5602..4880306 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -62,6 +62,8 @@
 # define SLAB_DEBUG_OBJECTS	0x00000000UL
 #endif
 
+#define SLAB_NOLEAKTRACE	0x00800000UL	/* Avoid kmemleak tracing */
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
diff --git a/mm/slab.c b/mm/slab.c
index f85831d..859067f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -107,6 +107,7 @@
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
 #include	<linux/nodemask.h>
+#include	<linux/kmemleak.h>
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
 #include	<linux/fault-inject.h>
@@ -178,13 +179,13 @@
 			 SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS)
+			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS)
+			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE)
 #endif
 
 /*
@@ -964,6 +965,14 @@ static struct array_cache *alloc_arraycache(int node, int entries,
 	struct array_cache *nc = NULL;
 
 	nc = kmalloc_node(memsize, GFP_KERNEL, node);
+	/*
+	 * The array_cache structures contain pointers to free object.
+	 * However, when such objects are allocated or transfered to another
+	 * cache the pointers are not cleared and they could be counted as
+	 * valid references during a kmemleak scan. Therefore, kmemleak must
+	 * not scan such objects.
+	 */
+	kmemleak_no_scan(nc);
 	if (nc) {
 		nc->avail = 0;
 		nc->limit = entries;
@@ -2621,6 +2630,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 		/* Slab management obj is off-slab. */
 		slabp = kmem_cache_alloc_node(cachep->slabp_cache,
 					      local_flags, nodeid);
+		/*
+		 * If the first object in the slab is leaked (it's allocated
+		 * but no one has a reference to it), we want to make sure
+		 * kmemleak does not treat the ->s_mem pointer as a reference
+		 * to the object. Otherwise we will not report the leak.
+		 */
+		kmemleak_scan_area(slabp, offsetof(struct slab, list),
+				   sizeof(struct list_head), local_flags);
 		if (!slabp)
 			return NULL;
 	} else {
@@ -3141,6 +3158,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 		STATS_INC_ALLOCMISS(cachep);
 		objp = cache_alloc_refill(cachep, flags);
 	}
+	/*
+	 * To avoid a false negative, if an object that is in one of the
+	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
+	 * treat the array pointers as a reference to the object.
+	 */
+	kmemleak_erase(&ac->entry[ac->avail]);
 	return objp;
 }
 
@@ -3360,6 +3383,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
   out:
 	local_irq_restore(save_flags);
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+	kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+				 flags);
 
 	if (unlikely((flags & __GFP_ZERO) && ptr))
 		memset(ptr, 0, obj_size(cachep));
@@ -3415,6 +3440,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	objp = __do_cache_alloc(cachep, flags);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+	kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
+				 flags);
 	prefetchw(objp);
 
 	if (unlikely((flags & __GFP_ZERO) && objp))
@@ -3530,6 +3557,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
 	struct array_cache *ac = cpu_cache_get(cachep);
 
 	check_irq_off();
+	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
 	/*
-- 
cgit v0.10.2


From 4374e616d28e65265a5b433ceece275449f3d2e3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:23:17 +0100
Subject: kmemleak: Add the slob memory allocation/freeing hooks

This patch adds the callbacks to kmemleak_(alloc|free) functions from the
slob allocator.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slob.c b/mm/slob.c
index 9b1737b..12f2614 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -67,6 +67,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 #include <linux/kmemtrace.h>
+#include <linux/kmemleak.h>
 #include <asm/atomic.h>
 
 /*
@@ -509,6 +510,7 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
+	kmemleak_alloc(ret, size, 1, gfp);
 	return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
@@ -521,6 +523,7 @@ void kfree(const void *block)
 
 	if (unlikely(ZERO_OR_NULL_PTR(block)))
 		return;
+	kmemleak_free(block);
 
 	sp = slob_page(block);
 	if (is_slob_page(sp)) {
@@ -584,12 +587,14 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 	} else if (flags & SLAB_PANIC)
 		panic("Cannot create slab cache %s\n", name);
 
+	kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
 	return c;
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *c)
 {
+	kmemleak_free(c);
 	slob_free(c, sizeof(struct kmem_cache));
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
@@ -613,6 +618,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 	if (c->ctor)
 		c->ctor(b);
 
+	kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
 	return b;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
@@ -635,6 +641,7 @@ static void kmem_rcu_free(struct rcu_head *head)
 
 void kmem_cache_free(struct kmem_cache *c, void *b)
 {
+	kmemleak_free_recursive(b, c->flags);
 	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
 		struct slob_rcu *slob_rcu;
 		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
-- 
cgit v0.10.2


From 06f22f13f3cc2eff00db09f053218e5d4b757bc8 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:23:18 +0100
Subject: kmemleak: Add the slub memory allocation/freeing hooks

This patch adds the callbacks to kmemleak_(alloc|free) functions from the
slub allocator.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slub.c b/mm/slub.c
index 5e805a6..6674a79 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -20,6 +20,7 @@
 #include <linux/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/kmemleak.h>
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/debugobjects.h>
@@ -143,7 +144,7 @@
  * Set of flags that will prevent slab merging
  */
 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU)
+		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
 
 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
 		SLAB_CACHE_DMA)
@@ -1617,6 +1618,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	if (unlikely((gfpflags & __GFP_ZERO) && object))
 		memset(object, 0, objsize);
 
+	kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
 	return object;
 }
 
@@ -1746,6 +1748,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
 	struct kmem_cache_cpu *c;
 	unsigned long flags;
 
+	kmemleak_free_recursive(x, s->flags);
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
 	debug_check_no_locks_freed(object, c->objsize);
-- 
cgit v0.10.2


From 89219d37a2377c44fde7bff0bf0623453c05329a Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:23:19 +0100
Subject: kmemleak: Add the vmalloc memory allocation/freeing hooks

This patch adds the callbacks to kmemleak_(alloc|free) functions from
vmalloc/vfree.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 083716e..b7db935 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
+#include <linux/kmemleak.h>
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
@@ -1327,6 +1328,9 @@ static void __vunmap(const void *addr, int deallocate_pages)
 void vfree(const void *addr)
 {
 	BUG_ON(in_interrupt());
+
+	kmemleak_free(addr);
+
 	__vunmap(addr, 1);
 }
 EXPORT_SYMBOL(vfree);
@@ -1439,8 +1443,17 @@ fail:
 
 void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 {
-	return __vmalloc_area_node(area, gfp_mask, prot, -1,
-					__builtin_return_address(0));
+	void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
+					 __builtin_return_address(0));
+
+	/*
+	 * A ref_count = 3 is needed because the vm_struct and vmap_area
+	 * structures allocated in the __get_vm_area_node() function contain
+	 * references to the virtual address of the vmalloc'ed block.
+	 */
+	kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
+
+	return addr;
 }
 
 /**
@@ -1459,6 +1472,8 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 						int node, void *caller)
 {
 	struct vm_struct *area;
+	void *addr;
+	unsigned long real_size = size;
 
 	size = PAGE_ALIGN(size);
 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
@@ -1470,7 +1485,16 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 	if (!area)
 		return NULL;
 
-	return __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+	addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+
+	/*
+	 * A ref_count = 3 is needed because the vm_struct and vmap_area
+	 * structures allocated in the __get_vm_area_node() function contain
+	 * references to the virtual address of the vmalloc'ed block.
+	 */
+	kmemleak_alloc(addr, real_size, 3, gfp_mask);
+
+	return addr;
 }
 
 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
-- 
cgit v0.10.2


From dbb1f81ca67a56c6cfce4c94d07c76378fd4af9e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:23:19 +0100
Subject: kmemleak: Add kmemleak_alloc callback from alloc_large_system_hash

The alloc_large_system_hash function is called from various places in
the kernel and it contains pointers to other allocated structures. It
therefore needs to be traced by kmemleak.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 474c7e9..17d5f53 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,6 +46,7 @@
 #include <linux/page-isolation.h>
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
+#include <linux/kmemleak.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -4546,6 +4547,16 @@ void *__init alloc_large_system_hash(const char *tablename,
 	if (_hash_mask)
 		*_hash_mask = (1 << log2qty) - 1;
 
+	/*
+	 * If hashdist is set, the table allocation is done with __vmalloc()
+	 * which invokes the kmemleak_alloc() callback. This function may also
+	 * be called before the slab and kmemleak are initialised when
+	 * kmemleak simply buffers the request to be executed later
+	 * (GFP_ATOMIC flag ignored in this case).
+	 */
+	if (!hashdist)
+		kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+
 	return table;
 }
 
-- 
cgit v0.10.2


From 4f2294b6dc88d99295230d97fef2c9863cec44c3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:23:20 +0100
Subject: kmemleak: Add modules support

This patch handles the kmemleak operations needed for modules loading so
that memory allocations from inside a module are properly tracked.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/kernel/module.c b/kernel/module.c
index 2383e60..5cd55ab 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -53,6 +53,7 @@
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/percpu.h>
+#include <linux/kmemleak.h>
 
 #if 0
 #define DEBUGP printk
@@ -430,6 +431,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
 	unsigned long extra;
 	unsigned int i;
 	void *ptr;
+	int cpu;
 
 	if (align > PAGE_SIZE) {
 		printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
@@ -459,6 +461,11 @@ static void *percpu_modalloc(unsigned long size, unsigned long align,
 			if (!split_block(i, size))
 				return NULL;
 
+		/* add the per-cpu scanning areas */
+		for_each_possible_cpu(cpu)
+			kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0,
+				       GFP_KERNEL);
+
 		/* Mark allocated */
 		pcpu_size[i] = -pcpu_size[i];
 		return ptr;
@@ -473,6 +480,7 @@ static void percpu_modfree(void *freeme)
 {
 	unsigned int i;
 	void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
+	int cpu;
 
 	/* First entry is core kernel percpu data. */
 	for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -484,6 +492,10 @@ static void percpu_modfree(void *freeme)
 	BUG();
 
  free:
+	/* remove the per-cpu scanning areas */
+	for_each_possible_cpu(cpu)
+		kmemleak_free(freeme + per_cpu_offset(cpu));
+
 	/* Merge with previous? */
 	if (pcpu_size[i-1] >= 0) {
 		pcpu_size[i-1] += pcpu_size[i];
@@ -1876,6 +1888,36 @@ static void *module_alloc_update_bounds(unsigned long size)
 	return ret;
 }
 
+#ifdef CONFIG_DEBUG_KMEMLEAK
+static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
+				 Elf_Shdr *sechdrs, char *secstrings)
+{
+	unsigned int i;
+
+	/* only scan the sections containing data */
+	kmemleak_scan_area(mod->module_core, (unsigned long)mod -
+			   (unsigned long)mod->module_core,
+			   sizeof(struct module), GFP_KERNEL);
+
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+		if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0
+		    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
+			continue;
+
+		kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
+				   (unsigned long)mod->module_core,
+				   sechdrs[i].sh_size, GFP_KERNEL);
+	}
+}
+#else
+static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
+					Elf_Shdr *sechdrs, char *secstrings)
+{
+}
+#endif
+
 /* Allocate and load the module: note that size of section 0 is always
    zero, and we rely on this for optional sections. */
 static noinline struct module *load_module(void __user *umod,
@@ -2046,6 +2088,12 @@ static noinline struct module *load_module(void __user *umod,
 
 	/* Do the allocs. */
 	ptr = module_alloc_update_bounds(mod->core_size);
+	/*
+	 * The pointer to this block is stored in the module structure
+	 * which is inside the block. Just mark it as not being a
+	 * leak.
+	 */
+	kmemleak_not_leak(ptr);
 	if (!ptr) {
 		err = -ENOMEM;
 		goto free_percpu;
@@ -2054,6 +2102,13 @@ static noinline struct module *load_module(void __user *umod,
 	mod->module_core = ptr;
 
 	ptr = module_alloc_update_bounds(mod->init_size);
+	/*
+	 * The pointer to this block is stored in the module structure
+	 * which is inside the block. This block doesn't need to be
+	 * scanned as it contains data and code that will be freed
+	 * after the module is initialized.
+	 */
+	kmemleak_ignore(ptr);
 	if (!ptr && mod->init_size) {
 		err = -ENOMEM;
 		goto free_core;
@@ -2084,6 +2139,7 @@ static noinline struct module *load_module(void __user *umod,
 	}
 	/* Module has been moved. */
 	mod = (void *)sechdrs[modindex].sh_addr;
+	kmemleak_load_module(mod, hdr, sechdrs, secstrings);
 
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
 	mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
-- 
cgit v0.10.2


From 2e1483c995bbd0fa6cbd055ad76088a520799ba4 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:24:13 +0100
Subject: kmemleak: Remove some of the kmemleak false positives

There are allocations for which the main pointer cannot be found but
they are not memory leaks. This patch fixes some of them. For more
information on false positives, see Documentation/kmemleak.txt.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 08151d4..961c1a7 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -104,6 +104,7 @@
 #include <linux/io.h>
 #include <asm/system.h>
 #include <linux/uaccess.h>
+#include <linux/kmemleak.h>
 
 #define MAX_NR_CON_DRIVER 16
 
@@ -2880,6 +2881,12 @@ static int __init con_init(void)
 	 */
 	for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
 		vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data));
+		/*
+		 * Kmemleak does not track the memory allocated via
+		 * alloc_bootmem() but this block contains pointers to
+		 * other blocks allocated via kmalloc.
+		 */
+		kmemleak_alloc(vc, sizeof(struct vc_data), 1, GFP_ATOMIC);
 		INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
 		visual_init(vc, currcons, 1);
 		vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f45dbc1..d250f80 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -25,6 +25,7 @@
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <linux/log2.h>
+#include <linux/kmemleak.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -492,6 +493,11 @@ void __init bdev_cache_init(void)
 	bd_mnt = kern_mount(&bd_type);
 	if (IS_ERR(bd_mnt))
 		panic("Cannot create bdev pseudo-fs");
+	/*
+	 * This vfsmount structure is only used to obtain the
+	 * blockdev_superblock, so tell kmemleak not to report it.
+	 */
+	kmemleak_not_leak(bd_mnt);
 	blockdev_superblock = bd_mnt->mnt_sb;	/* For writeback */
 }
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1581ff2..26fd9d1 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -86,7 +86,12 @@ struct percpu_data {
 	void *ptrs[1];
 };
 
+/* pointer disguising messes up the kmemleak objects tracking */
+#ifndef CONFIG_DEBUG_KMEMLEAK
 #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
+#else
+#define __percpu_disguise(pdata) (struct percpu_data *)(pdata)
+#endif
 
 #define per_cpu_ptr(ptr, cpu)						\
 ({									\
-- 
cgit v0.10.2


From 3bba00d7bdd57cb7aa739b751fa0a1fbbb04dc18 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:24:13 +0100
Subject: kmemleak: Enable the building of the memory leak detector

This patch adds the Kconfig.debug and Makefile entries needed for
building kmemleak into the kernel.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6cdcf38..5cc3506 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -336,6 +336,28 @@ config SLUB_STATS
 	  out which slabs are relevant to a particular load.
 	  Try running: slabinfo -DA
 
+config DEBUG_KMEMLEAK
+	bool "Kernel memory leak detector"
+	depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \
+		!MEMORY_HOTPLUG
+	select DEBUG_SLAB if SLAB
+	select SLUB_DEBUG if SLUB
+	select DEBUG_FS if SYSFS
+	select STACKTRACE if STACKTRACE_SUPPORT
+	select KALLSYMS
+	help
+	  Say Y here if you want to enable the memory leak
+	  detector. The memory allocation/freeing is traced in a way
+	  similar to the Boehm's conservative garbage collector, the
+	  difference being that the orphan objects are not freed but
+	  only shown in /sys/kernel/debug/kmemleak. Enabling this
+	  feature will introduce an overhead to memory
+	  allocations. See Documentation/kmemleak.txt for more
+	  details.
+
+	  In order to access the kmemleak file, debugfs needs to be
+	  mounted (usually at /sys/kernel/debug).
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
diff --git a/mm/Makefile b/mm/Makefile
index ec73c68..cb84a02 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -38,3 +38,4 @@ obj-$(CONFIG_SMP) += allocpercpu.o
 endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
-- 
cgit v0.10.2


From 0822ee4ac1ae6af5a953f97f75553738834b10b9 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:24:14 +0100
Subject: kmemleak: Simple testing module for kmemleak

This patch adds a loadable module that deliberately leaks memory. It
is used for testing various memory leaking scenarios.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5cc3506..116a350 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -358,6 +358,16 @@ config DEBUG_KMEMLEAK
 	  In order to access the kmemleak file, debugfs needs to be
 	  mounted (usually at /sys/kernel/debug).
 
+config DEBUG_KMEMLEAK_TEST
+	tristate "Simple test for the kernel memory leak detector"
+	depends on DEBUG_KMEMLEAK
+	help
+	  Say Y or M here to build a test for the kernel memory leak
+	  detector. This option enables a module that explicitly leaks
+	  memory.
+
+	  If unsure, say N.
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
diff --git a/mm/Makefile b/mm/Makefile
index cb84a02..e89acb0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -39,3 +39,4 @@ endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
+obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c
new file mode 100644
index 0000000..d5292fc
--- /dev/null
+++ b/mm/kmemleak-test.c
@@ -0,0 +1,111 @@
+/*
+ * mm/kmemleak-test.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/fdtable.h>
+
+#include <linux/kmemleak.h>
+
+struct test_node {
+	long header[25];
+	struct list_head list;
+	long footer[25];
+};
+
+static LIST_HEAD(test_list);
+static DEFINE_PER_CPU(void *, test_pointer);
+
+/*
+ * Some very simple testing. This function needs to be extended for
+ * proper testing.
+ */
+static int __init kmemleak_test_init(void)
+{
+	struct test_node *elem;
+	int i;
+
+	printk(KERN_INFO "Kmemleak testing\n");
+
+	/* make some orphan objects */
+	pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+	pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
+#ifndef CONFIG_MODULES
+	pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n",
+		kmem_cache_alloc(files_cachep, GFP_KERNEL));
+	pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n",
+		kmem_cache_alloc(files_cachep, GFP_KERNEL));
+#endif
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+	pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64));
+
+	/*
+	 * Add elements to a list. They should only appear as orphan
+	 * after the module is removed.
+	 */
+	for (i = 0; i < 10; i++) {
+		elem = kmalloc(sizeof(*elem), GFP_KERNEL);
+		pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem);
+		if (!elem)
+			return -ENOMEM;
+		memset(elem, 0, sizeof(*elem));
+		INIT_LIST_HEAD(&elem->list);
+
+		list_add_tail(&elem->list, &test_list);
+	}
+
+	for_each_possible_cpu(i) {
+		per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL);
+		pr_info("kmemleak: kmalloc(129) = %p\n",
+			per_cpu(test_pointer, i));
+	}
+
+	return 0;
+}
+module_init(kmemleak_test_init);
+
+static void __exit kmemleak_test_exit(void)
+{
+	struct test_node *elem, *tmp;
+
+	/*
+	 * Remove the list elements without actually freeing the
+	 * memory.
+	 */
+	list_for_each_entry_safe(elem, tmp, &test_list, list)
+		list_del(&elem->list);
+}
+module_exit(kmemleak_test_exit);
+
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 3aa27bbe7a6536d1ec859d3a97caf3319b5081b7 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:24:14 +0100
Subject: kmemleak: Add the corresponding MAINTAINERS entry

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index cf4abdd..e5af306 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3359,6 +3359,12 @@ F:	Documentation/trace/kmemtrace.txt
 F:	include/trace/kmemtrace.h
 F:	kernel/trace/kmemtrace.c
 
+KMEMLEAK
+P:	Catalin Marinas
+M:	catalin.marinas@arm.com
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+
 KPROBES
 P:	Ananth N Mavinakayanahalli
 M:	ananth@in.ibm.com
-- 
cgit v0.10.2


From 441c7e0a2ed38827b48b907bd1fa29faba2017a3 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 10 Jun 2009 20:05:53 +0300
Subject: bootmem: use slab if bootmem is no longer available

As a preparation for initializing the slab allocator early, make sure the
bootmem allocator does not crash and burn if someone calls it after slab is up;
otherwise we'd need a flag day for switching to early slab.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/bootmem.c b/mm/bootmem.c
index daf9271..457269c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -532,6 +532,9 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
 					unsigned long size, unsigned long align,
 					unsigned long goal, unsigned long limit)
 {
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc(size, GFP_NOWAIT);
+
 #ifdef CONFIG_HAVE_ARCH_BOOTMEM
 	bootmem_data_t *p_bdata;
 
-- 
cgit v0.10.2


From c91c4773b334d4d3a6d44626dc2a558ad97b86f3 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 11 Jun 2009 08:10:28 +0300
Subject: bootmem: fix slab fallback on numa

If the user requested bootmem allocation on a specific node, we should use
kzalloc_node() for the fallback allocation.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 457269c..282df0a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -665,6 +665,9 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 				   unsigned long align, unsigned long goal)
 {
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
 	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 }
 
@@ -696,6 +699,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
 {
 	void *ptr;
 
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
 	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return ptr;
@@ -748,6 +754,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 				       unsigned long align, unsigned long goal)
 {
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
 	return ___alloc_bootmem_node(pgdat->bdata, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
 }
-- 
cgit v0.10.2


From 83b519e8b9572c319c8e0c615ee5dd7272856090 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 10 Jun 2009 19:40:04 +0300
Subject: slab: setup allocators earlier in the boot sequence

This patch makes kmalloc() available earlier in the boot sequence so we can get
rid of some bootmem allocations. The bulk of the changes are due to
kmem_cache_init() being called with interrupts disabled which requires some
changes to allocator boostrap code.

Note: 32-bit x86 does WP protect test in mem_init() so we must setup traps
before we call mem_init() during boot as reported by Ingo Molnar:

  We have a hard crash in the WP-protect code:

  [    0.000000] Checking if this processor honours the WP bit even in supervisor mode...BUG: Int 14: CR2 ffcff000
  [    0.000000]      EDI 00000188  ESI 00000ac7  EBP c17eaf9c  ESP c17eaf8c
  [    0.000000]      EBX 000014e0  EDX 0000000e  ECX 01856067  EAX 00000001
  [    0.000000]      err 00000003  EIP c10135b1   CS 00000060  flg 00010002
  [    0.000000] Stack: c17eafa8 c17fd410 c16747bc c17eafc4 c17fd7e5 000011fd f8616000 c18237cc
  [    0.000000]        00099800 c17bb000 c17eafec c17f1668 000001c5 c17f1322 c166e039 c1822bf0
  [    0.000000]        c166e033 c153a014 c18237cc 00020800 c17eaff8 c17f106a 00020800 01ba5003
  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip-02161-g7a74539-dirty #52203
  [    0.000000] Call Trace:
  [    0.000000]  [<c15357c2>] ? printk+0x14/0x16
  [    0.000000]  [<c10135b1>] ? do_test_wp_bit+0x19/0x23
  [    0.000000]  [<c17fd410>] ? test_wp_bit+0x26/0x64
  [    0.000000]  [<c17fd7e5>] ? mem_init+0x1ba/0x1d8
  [    0.000000]  [<c17f1668>] ? start_kernel+0x164/0x2f7
  [    0.000000]  [<c17f1322>] ? unknown_bootoption+0x0/0x19c
  [    0.000000]  [<c17f106a>] ? __init_begin+0x6a/0x6f

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/init/main.c b/init/main.c
index bb7dc57..0ab82a4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -574,6 +574,28 @@ asmlinkage void __init start_kernel(void)
 	setup_nr_cpu_ids();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
 
+	build_all_zonelists();
+	page_alloc_init();
+
+	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
+	parse_early_param();
+	parse_args("Booting kernel", static_command_line, __start___param,
+		   __stop___param - __start___param,
+		   &unknown_bootoption);
+	/*
+	 * These use large bootmem allocations and must precede
+	 * kmem_cache_init()
+	 */
+	pidhash_init();
+	vmalloc_init();
+	vfs_caches_init_early();
+	sort_main_extable();
+	trap_init();
+	/*
+	 * Set up kernel memory allocators
+	 */
+	mem_init();
+	kmem_cache_init();
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -585,25 +607,15 @@ asmlinkage void __init start_kernel(void)
 	 * fragile until we cpu_idle() for the first time.
 	 */
 	preempt_disable();
-	build_all_zonelists();
-	page_alloc_init();
-	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
-	parse_early_param();
-	parse_args("Booting kernel", static_command_line, __start___param,
-		   __stop___param - __start___param,
-		   &unknown_bootoption);
 	if (!irqs_disabled()) {
 		printk(KERN_WARNING "start_kernel(): bug: interrupts were "
 				"enabled *very* early, fixing it\n");
 		local_irq_disable();
 	}
-	sort_main_extable();
-	trap_init();
 	rcu_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
-	pidhash_init();
 	init_timers();
 	hrtimers_init();
 	softirq_init();
@@ -645,14 +657,10 @@ asmlinkage void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
-	vmalloc_init();
-	vfs_caches_init_early();
 	cpuset_init_early();
 	page_cgroup_init();
-	mem_init();
 	enable_debug_pagealloc();
 	cpu_hotplug_init();
-	kmem_cache_init();
 	kmemtrace_init();
 	debug_objects_mem_init();
 	idr_init_cache();
diff --git a/mm/slab.c b/mm/slab.c
index f85831d..2bd611f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -315,7 +315,7 @@ static int drain_freelist(struct kmem_cache *cache,
 			struct kmem_list3 *l3, int tofree);
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
 			int node);
-static int enable_cpucache(struct kmem_cache *cachep);
+static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
 /*
@@ -958,12 +958,12 @@ static void __cpuinit start_cpu_timer(int cpu)
 }
 
 static struct array_cache *alloc_arraycache(int node, int entries,
-					    int batchcount)
+					    int batchcount, gfp_t gfp)
 {
 	int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
 	struct array_cache *nc = NULL;
 
-	nc = kmalloc_node(memsize, GFP_KERNEL, node);
+	nc = kmalloc_node(memsize, gfp, node);
 	if (nc) {
 		nc->avail = 0;
 		nc->limit = entries;
@@ -1003,7 +1003,7 @@ static int transfer_objects(struct array_cache *to,
 #define drain_alien_cache(cachep, alien) do { } while (0)
 #define reap_alien(cachep, l3) do { } while (0)
 
-static inline struct array_cache **alloc_alien_cache(int node, int limit)
+static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
 	return (struct array_cache **)BAD_ALIEN_MAGIC;
 }
@@ -1034,7 +1034,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
 static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
 
-static struct array_cache **alloc_alien_cache(int node, int limit)
+static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
 	struct array_cache **ac_ptr;
 	int memsize = sizeof(void *) * nr_node_ids;
@@ -1042,14 +1042,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit)
 
 	if (limit > 1)
 		limit = 12;
-	ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
+	ac_ptr = kmalloc_node(memsize, gfp, node);
 	if (ac_ptr) {
 		for_each_node(i) {
 			if (i == node || !node_online(i)) {
 				ac_ptr[i] = NULL;
 				continue;
 			}
-			ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
+			ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
 			if (!ac_ptr[i]) {
 				for (i--; i >= 0; i--)
 					kfree(ac_ptr[i]);
@@ -1282,20 +1282,20 @@ static int __cpuinit cpuup_prepare(long cpu)
 		struct array_cache **alien = NULL;
 
 		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount);
+					cachep->batchcount, GFP_KERNEL);
 		if (!nc)
 			goto bad;
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
-				0xbaadf00d);
+				0xbaadf00d, GFP_KERNEL);
 			if (!shared) {
 				kfree(nc);
 				goto bad;
 			}
 		}
 		if (use_alien_caches) {
-			alien = alloc_alien_cache(node, cachep->limit);
+			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
 				kfree(nc);
@@ -1399,10 +1399,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
 {
 	struct kmem_list3 *ptr;
 
-	ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
+	ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
 	BUG_ON(!ptr);
 
-	local_irq_disable();
 	memcpy(ptr, list, sizeof(struct kmem_list3));
 	/*
 	 * Do not assume that spinlocks can be initialized via memcpy:
@@ -1411,7 +1410,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
 
 	MAKE_ALL_LISTS(cachep, ptr, nodeid);
 	cachep->nodelists[nodeid] = ptr;
-	local_irq_enable();
 }
 
 /*
@@ -1575,9 +1573,8 @@ void __init kmem_cache_init(void)
 	{
 		struct array_cache *ptr;
 
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
 
-		local_irq_disable();
 		BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
 		memcpy(ptr, cpu_cache_get(&cache_cache),
 		       sizeof(struct arraycache_init));
@@ -1587,11 +1584,9 @@ void __init kmem_cache_init(void)
 		spin_lock_init(&ptr->lock);
 
 		cache_cache.array[smp_processor_id()] = ptr;
-		local_irq_enable();
 
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
 
-		local_irq_disable();
 		BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
 		       != &initarray_generic.cache);
 		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
@@ -1603,7 +1598,6 @@ void __init kmem_cache_init(void)
 
 		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
 		    ptr;
-		local_irq_enable();
 	}
 	/* 5) Replace the bootstrap kmem_list3's */
 	{
@@ -1627,7 +1621,7 @@ void __init kmem_cache_init(void)
 		struct kmem_cache *cachep;
 		mutex_lock(&cache_chain_mutex);
 		list_for_each_entry(cachep, &cache_chain, next)
-			if (enable_cpucache(cachep))
+			if (enable_cpucache(cachep, GFP_NOWAIT))
 				BUG();
 		mutex_unlock(&cache_chain_mutex);
 	}
@@ -2064,10 +2058,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 	return left_over;
 }
 
-static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
+static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (g_cpucache_up == FULL)
-		return enable_cpucache(cachep);
+		return enable_cpucache(cachep, gfp);
 
 	if (g_cpucache_up == NONE) {
 		/*
@@ -2089,7 +2083,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
 			g_cpucache_up = PARTIAL_AC;
 	} else {
 		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+			kmalloc(sizeof(struct arraycache_init), gfp);
 
 		if (g_cpucache_up == PARTIAL_AC) {
 			set_up_list3s(cachep, SIZE_L3);
@@ -2153,6 +2147,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 {
 	size_t left_over, slab_size, ralign;
 	struct kmem_cache *cachep = NULL, *pc;
+	gfp_t gfp;
 
 	/*
 	 * Sanity checks... these are all serious usage bugs.
@@ -2168,8 +2163,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	 * We use cache_chain_mutex to ensure a consistent view of
 	 * cpu_online_mask as well.  Please see cpuup_callback
 	 */
-	get_online_cpus();
-	mutex_lock(&cache_chain_mutex);
+	if (slab_is_available()) {
+		get_online_cpus();
+		mutex_lock(&cache_chain_mutex);
+	}
 
 	list_for_each_entry(pc, &cache_chain, next) {
 		char tmp;
@@ -2278,8 +2275,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	 */
 	align = ralign;
 
+	if (slab_is_available())
+		gfp = GFP_KERNEL;
+	else
+		gfp = GFP_NOWAIT;
+
 	/* Get cache's description obj. */
-	cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
+	cachep = kmem_cache_zalloc(&cache_cache, gfp);
 	if (!cachep)
 		goto oops;
 
@@ -2382,7 +2384,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	cachep->ctor = ctor;
 	cachep->name = name;
 
-	if (setup_cpu_cache(cachep)) {
+	if (setup_cpu_cache(cachep, gfp)) {
 		__kmem_cache_destroy(cachep);
 		cachep = NULL;
 		goto oops;
@@ -2394,8 +2396,10 @@ oops:
 	if (!cachep && (flags & SLAB_PANIC))
 		panic("kmem_cache_create(): failed to create slab `%s'\n",
 		      name);
-	mutex_unlock(&cache_chain_mutex);
-	put_online_cpus();
+	if (slab_is_available()) {
+		mutex_unlock(&cache_chain_mutex);
+		put_online_cpus();
+	}
 	return cachep;
 }
 EXPORT_SYMBOL(kmem_cache_create);
@@ -3802,7 +3806,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name);
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
-static int alloc_kmemlist(struct kmem_cache *cachep)
+static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int node;
 	struct kmem_list3 *l3;
@@ -3812,7 +3816,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
 	for_each_online_node(node) {
 
                 if (use_alien_caches) {
-                        new_alien = alloc_alien_cache(node, cachep->limit);
+                        new_alien = alloc_alien_cache(node, cachep->limit, gfp);
                         if (!new_alien)
                                 goto fail;
                 }
@@ -3821,7 +3825,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
 		if (cachep->shared) {
 			new_shared = alloc_arraycache(node,
 				cachep->shared*cachep->batchcount,
-					0xbaadf00d);
+					0xbaadf00d, gfp);
 			if (!new_shared) {
 				free_alien_cache(new_alien);
 				goto fail;
@@ -3850,7 +3854,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
 			free_alien_cache(new_alien);
 			continue;
 		}
-		l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
+		l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
 		if (!l3) {
 			free_alien_cache(new_alien);
 			kfree(new_shared);
@@ -3906,18 +3910,18 @@ static void do_ccupdate_local(void *info)
 
 /* Always called with the cache_chain_mutex held */
 static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
-				int batchcount, int shared)
+				int batchcount, int shared, gfp_t gfp)
 {
 	struct ccupdate_struct *new;
 	int i;
 
-	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	new = kzalloc(sizeof(*new), gfp);
 	if (!new)
 		return -ENOMEM;
 
 	for_each_online_cpu(i) {
 		new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
-						batchcount);
+						batchcount, gfp);
 		if (!new->new[i]) {
 			for (i--; i >= 0; i--)
 				kfree(new->new[i]);
@@ -3944,11 +3948,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 		kfree(ccold);
 	}
 	kfree(new);
-	return alloc_kmemlist(cachep);
+	return alloc_kmemlist(cachep, gfp);
 }
 
 /* Called with cache_chain_mutex held always */
-static int enable_cpucache(struct kmem_cache *cachep)
+static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int err;
 	int limit, shared;
@@ -3994,7 +3998,7 @@ static int enable_cpucache(struct kmem_cache *cachep)
 	if (limit > 32)
 		limit = 32;
 #endif
-	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
+	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 		       cachep->name, -err);
@@ -4300,7 +4304,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
 				res = 0;
 			} else {
 				res = do_tune_cpucache(cachep, limit,
-						       batchcount, shared);
+						       batchcount, shared,
+						       GFP_KERNEL);
 			}
 			break;
 		}
diff --git a/mm/slub.c b/mm/slub.c
index 5e805a6..c1815a6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2557,13 +2557,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
 	if (gfp_flags & SLUB_DMA)
 		flags = SLAB_CACHE_DMA;
 
-	down_write(&slub_lock);
+	/*
+	 * This function is called with IRQs disabled during early-boot on
+	 * single CPU so there's no need to take slub_lock here.
+	 */
 	if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
 								flags, NULL))
 		goto panic;
 
 	list_add(&s->list, &slab_caches);
-	up_write(&slub_lock);
+
 	if (sysfs_slab_add(s))
 		goto panic;
 	return s;
@@ -3021,7 +3024,7 @@ void __init kmem_cache_init(void)
 	 * kmem_cache_open for slab_state == DOWN.
 	 */
 	create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
-		sizeof(struct kmem_cache_node), GFP_KERNEL);
+		sizeof(struct kmem_cache_node), GFP_NOWAIT);
 	kmalloc_caches[0].refcount = -1;
 	caches++;
 
@@ -3034,16 +3037,16 @@ void __init kmem_cache_init(void)
 	/* Caches that are not of the two-to-the-power-of size */
 	if (KMALLOC_MIN_SIZE <= 64) {
 		create_kmalloc_cache(&kmalloc_caches[1],
-				"kmalloc-96", 96, GFP_KERNEL);
+				"kmalloc-96", 96, GFP_NOWAIT);
 		caches++;
 		create_kmalloc_cache(&kmalloc_caches[2],
-				"kmalloc-192", 192, GFP_KERNEL);
+				"kmalloc-192", 192, GFP_NOWAIT);
 		caches++;
 	}
 
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
 		create_kmalloc_cache(&kmalloc_caches[i],
-			"kmalloc", 1 << i, GFP_KERNEL);
+			"kmalloc", 1 << i, GFP_NOWAIT);
 		caches++;
 	}
 
@@ -3080,7 +3083,7 @@ void __init kmem_cache_init(void)
 	/* Provide the correct kmalloc names now that the caches are up */
 	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
 		kmalloc_caches[i]. name =
-			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
+			kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
-- 
cgit v0.10.2


From 43ebdac42f16037263b52a5aeedcd1bfa4a9bb29 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Mon, 25 May 2009 15:01:35 +0300
Subject: vmalloc: use kzalloc() instead of alloc_bootmem()

We can call vmalloc_init() after kmem_cache_init() and use kzalloc() instead of
the bootmem allocator when initializing vmalloc data structures.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/init/main.c b/init/main.c
index 0ab82a4..6d38f96 100644
--- a/init/main.c
+++ b/init/main.c
@@ -587,7 +587,6 @@ asmlinkage void __init start_kernel(void)
 	 * kmem_cache_init()
 	 */
 	pidhash_init();
-	vmalloc_init();
 	vfs_caches_init_early();
 	sort_main_extable();
 	trap_init();
@@ -596,6 +595,7 @@ asmlinkage void __init start_kernel(void)
 	 */
 	mem_init();
 	kmem_cache_init();
+	vmalloc_init();
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 083716e..3235138 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -23,7 +23,6 @@
 #include <linux/rbtree.h>
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
-#include <linux/bootmem.h>
 #include <linux/pfn.h>
 
 #include <asm/atomic.h>
@@ -1032,7 +1031,7 @@ void __init vmalloc_init(void)
 
 	/* Import existing vmlist entries. */
 	for (tmp = vmlist; tmp; tmp = tmp->next) {
-		va = alloc_bootmem(sizeof(struct vmap_area));
+		va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
 		va->flags = tmp->flags | VM_VM_AREA;
 		va->va_start = (unsigned long)tmp->addr;
 		va->va_end = va->va_start + tmp->size;
-- 
cgit v0.10.2


From 444f478f65c7ca4606f9965b31feed13fe2bc9fa Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 11 Jun 2009 18:29:06 +0300
Subject: init: introduce mm_init()

As suggested by Christoph Lameter, introduce mm_init() now that we initialize
all the kernel memory allocations together.

Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/init/main.c b/init/main.c
index 6d38f96..7917695 100644
--- a/init/main.c
+++ b/init/main.c
@@ -533,6 +533,16 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
+/*
+ * Set up kernel memory allocators
+ */
+static void __init mm_init(void)
+{
+	mem_init();
+	kmem_cache_init();
+	vmalloc_init();
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
@@ -590,12 +600,7 @@ asmlinkage void __init start_kernel(void)
 	vfs_caches_init_early();
 	sort_main_extable();
 	trap_init();
-	/*
-	 * Set up kernel memory allocators
-	 */
-	mem_init();
-	kmem_cache_init();
-	vmalloc_init();
+	mm_init();
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
-- 
cgit v0.10.2


From 36b7b6d465489c4754c4fd66fcec6086eba87896 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 10 Jun 2009 23:42:36 +0300
Subject: sched: use kzalloc() instead of the bootmem allocator

Now that kmem_cache_init() happens before sched_init(), we should use kzalloc()
and not the bootmem allocator.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/kernel/sched.c b/kernel/sched.c
index 14c447a..a9ff953 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -68,7 +68,6 @@
 #include <linux/pagemap.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
-#include <linux/bootmem.h>
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
@@ -7782,21 +7781,18 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 
 static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
 {
+	gfp_t gfp = GFP_KERNEL;
+
 	memset(rd, 0, sizeof(*rd));
 
-	if (bootmem) {
-		alloc_bootmem_cpumask_var(&def_root_domain.span);
-		alloc_bootmem_cpumask_var(&def_root_domain.online);
-		alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
-		cpupri_init(&rd->cpupri, true);
-		return 0;
-	}
+	if (bootmem)
+		gfp = GFP_NOWAIT;
 
-	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+	if (!alloc_cpumask_var(&rd->span, gfp))
 		goto out;
-	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+	if (!alloc_cpumask_var(&rd->online, gfp))
 		goto free_span;
-	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+	if (!alloc_cpumask_var(&rd->rto_mask, gfp))
 		goto free_online;
 
 	if (cpupri_init(&rd->cpupri, false) != 0)
@@ -9123,7 +9119,7 @@ void __init sched_init(void)
 	 * we use alloc_bootmem().
 	 */
 	if (alloc_size) {
-		ptr = (unsigned long)alloc_bootmem(alloc_size);
+		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 		init_task_group.se = (struct sched_entity **)ptr;
-- 
cgit v0.10.2


From a5f4f52e82114e85aa1a066bd1a450acc19a464d Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 10 Jun 2009 23:53:37 +0300
Subject: vt: use kzalloc() instead of the bootmem allocator

Now that kmem_cache_init() happens before console_init(), we should use
kzalloc() and not the bootmem allocator.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 08151d4..c796a86 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -95,7 +95,6 @@
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
 #include <linux/pm.h>
 #include <linux/font.h>
 #include <linux/bitops.h>
@@ -2875,14 +2874,11 @@ static int __init con_init(void)
 		mod_timer(&console_timer, jiffies + blankinterval);
 	}
 
-	/*
-	 * kmalloc is not running yet - we use the bootmem allocator.
-	 */
 	for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
-		vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data));
+		vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT);
 		INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
 		visual_init(vc, currcons, 1);
-		vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size);
+		vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT);
 		vc->vc_kmalloced = 0;
 		vc_init(vc, vc->vc_rows, vc->vc_cols,
 			currcons || !vc->vc_sw->con_save_screen);
-- 
cgit v0.10.2


From 38c7fed2f5ffee17e1fa3e0f78b0e1bf43d52d13 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 25 May 2009 15:10:58 +0300
Subject: x86: remove some alloc_bootmem_cpumask_var calling

Now that we set up the slab allocator earlier, we can get rid of some
alloc_bootmem_cpumask_var() calls in boot code.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1946fac..139201a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -185,8 +185,8 @@ int __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_bootmem_cpumask_var(&cfg[i].domain);
-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		alloc_cpumask_var(&cfg[i].domain, GFP_NOWAIT);
+		alloc_cpumask_var(&cfg[i].old_domain, GFP_NOWAIT);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index eedbb8e..1e50c34 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -430,23 +430,19 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
  * Returns true if successful (or not required).
  */
 static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-								bool boot)
+							bool boot)
 {
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (boot) {
-		alloc_bootmem_cpumask_var(&desc->affinity);
+	gfp_t gfp = GFP_ATOMIC;
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-		alloc_bootmem_cpumask_var(&desc->pending_mask);
-#endif
-		return true;
-	}
+	if (boot)
+		gfp = GFP_NOWAIT;
 
-	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
 		return false;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
+	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
 		free_cpumask_var(desc->affinity);
 		return false;
 	}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 026facc..d5a7e17 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = {
 
 int __init cpuset_init_early(void)
 {
-	alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+	alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
 
 	top_cpuset.mems_generation = cpuset_mems_generation++;
 	return 0;
diff --git a/kernel/profile.c b/kernel/profile.c
index 7724e04..28cf26a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -111,12 +111,6 @@ int __ref profile_init(void)
 	/* only text is profiled */
 	prof_len = (_etext - _stext) >> prof_shift;
 	buffer_bytes = prof_len*sizeof(atomic_t);
-	if (!slab_is_available()) {
-		prof_buffer = alloc_bootmem(buffer_bytes);
-		alloc_bootmem_cpumask_var(&prof_cpu_mask);
-		cpumask_copy(prof_cpu_mask, cpu_possible_mask);
-		return 0;
-	}
 
 	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
 		return -ENOMEM;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index eb23aaa..7bb4142 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -92,15 +92,8 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
  */
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
-	if (likely(slab_is_available()))
-		*mask = kmalloc_node(cpumask_size(), flags, node);
-	else {
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-		printk(KERN_ERR
-			"=> alloc_cpumask_var: kmalloc not available!\n");
-#endif
-		*mask = NULL;
-	}
+	*mask = kmalloc_node(cpumask_size(), flags, node);
+
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 	if (!*mask) {
 		printk(KERN_ERR "=> alloc_cpumask_var: failed!\n");
-- 
cgit v0.10.2


From dad213aeb59718623fc59defeff95fe8c3feb8a0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 28 May 2009 18:14:40 -0700
Subject: irq/cpumask: make memoryless node zero happy

Don't hardcode to node zero for early boot IRQ setup memory allocations.

[ penberg@cs.helsinki.fi: minor cleanups ]
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 139201a..94605e7 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -177,16 +177,18 @@ int __init arch_early_irq_init(void)
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
 	int count;
+	int node;
 	int i;
 
 	cfg = irq_cfgx;
 	count = ARRAY_SIZE(irq_cfgx);
+	node= cpu_to_node(boot_cpu_id);
 
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_cpumask_var(&cfg[i].domain, GFP_NOWAIT);
-		alloc_cpumask_var(&cfg[i].old_domain, GFP_NOWAIT);
+		alloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+		alloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a600184..e161999 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -150,6 +150,7 @@ int __init early_irq_init(void)
 {
 	struct irq_desc *desc;
 	int legacy_count;
+	int node;
 	int i;
 
 	init_irq_default_affinity();
@@ -160,20 +161,20 @@ int __init early_irq_init(void)
 
 	desc = irq_desc_legacy;
 	legacy_count = ARRAY_SIZE(irq_desc_legacy);
+ 	node = first_online_node;
 
 	/* allocate irq_desc_ptrs array based on nr_irqs */
 	irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
 
 	/* allocate based on nr_cpu_ids */
-	/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
-	kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
-					  sizeof(int));
+	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
+					  sizeof(int), GFP_NOWAIT, node);
 
 	for (i = 0; i < legacy_count; i++) {
 		desc[i].irq = i;
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-		alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], node, true);
 		init_desc_masks(&desc[i]);
 		irq_desc_ptrs[i] = desc + i;
 	}
-- 
cgit v0.10.2


From 959982fee4e635c61780e989c3e34267143fcc02 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 28 May 2009 18:15:16 -0700
Subject: memcg: don't use bootmem allocator in setup code

The bootmem allocator is no longer available for page_cgroup_init() because we
set up the kernel slab allocator much earlier now.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 791905c..3dd4a90 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -47,6 +47,8 @@ static int __init alloc_node_page_cgroup(int nid)
 	struct page_cgroup *base, *pc;
 	unsigned long table_size;
 	unsigned long start_pfn, nr_pages, index;
+	struct page *page;
+	unsigned int order;
 
 	start_pfn = NODE_DATA(nid)->node_start_pfn;
 	nr_pages = NODE_DATA(nid)->node_spanned_pages;
@@ -55,11 +57,13 @@ static int __init alloc_node_page_cgroup(int nid)
 		return 0;
 
 	table_size = sizeof(struct page_cgroup) * nr_pages;
-
-	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
-			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-	if (!base)
+	order = get_order(table_size);
+	page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order);
+	if (!page)
+		page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order);
+	if (!page)
 		return -ENOMEM;
+	base = page_address(page);
 	for (index = 0; index < nr_pages; index++) {
 		pc = base + index;
 		__init_page_cgroup(pc, start_pfn + index);
-- 
cgit v0.10.2


From 4bdddf8ff9bbb8aa7b4d7847586202bd25842c90 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 11 Jun 2009 08:35:27 +0300
Subject: sched: use alloc_cpumask_var() instead of alloc_bootmem_cpumask_var()

Slab is initialized when sched_init() runs now so lets use alloc_cpumask_var().

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/kernel/sched.c b/kernel/sched.c
index a9ff953..12cc09c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9310,13 +9310,13 @@ void __init sched_init(void)
 	current->sched_class = &fair_sched_class;
 
 	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
-	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+	alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
-	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
-	alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask);
+	alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
+	alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
 #endif
-	alloc_bootmem_cpumask_var(&cpu_isolated_map);
+	alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
 	scheduler_running = 1;
-- 
cgit v0.10.2


From 0fb530291621c8b8a1b16abeeab05d9262489f71 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 11 Jun 2009 08:41:22 +0300
Subject: sched: use slab in cpupri_init()

Lets not use the bootmem allocator in cpupri_init() as slab is already up when
it is run.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/kernel/sched.c b/kernel/sched.c
index 12cc09c..dcf2dc2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7795,7 +7795,7 @@ static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
 	if (!alloc_cpumask_var(&rd->rto_mask, gfp))
 		goto free_online;
 
-	if (cpupri_init(&rd->cpupri, false) != 0)
+	if (cpupri_init(&rd->cpupri, bootmem) != 0)
 		goto free_rto_mask;
 	return 0;
 
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 344712a..7deffc9 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -154,8 +154,12 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
  */
 int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
 {
+	gfp_t gfp = GFP_KERNEL;
 	int i;
 
+	if (bootmem)
+		gfp = GFP_NOWAIT;
+
 	memset(cp, 0, sizeof(*cp));
 
 	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
@@ -163,9 +167,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
 
 		spin_lock_init(&vec->lock);
 		vec->count = 0;
-		if (bootmem)
-			alloc_bootmem_cpumask_var(&vec->mask);
-		else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
+		if (!zalloc_cpumask_var(&vec->mask, gfp))
 			goto cleanup;
 	}
 
-- 
cgit v0.10.2


From 22fb4e71e646695c7e0f379ada66b372c2d1aa1a Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 11 Jun 2009 14:46:49 +0300
Subject: irq: use kcalloc() instead of the bootmem allocator

Fixes the following problem:

[    0.000000] Experimental hierarchical RCU init done.
[    0.000000] NR_IRQS:4352 nr_irqs:256
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at mm/bootmem.c:537 alloc_arch_preferred_bootmem+0x40/0x7e()
[    0.000000] Hardware name: To Be Filled By O.E.M.
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip-02161-g7a74539-dirty #59709
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff823f8c8e>] ? alloc_arch_preferred_bootmem+0x40/0x7e
[    0.000000]  [<ffffffff81067168>] warn_slowpath_common+0x88/0xcb
[    0.000000]  [<ffffffff810671d2>] warn_slowpath_null+0x27/0x3d
[    0.000000]  [<ffffffff823f8c8e>] alloc_arch_preferred_bootmem+0x40/0x7e
[    0.000000]  [<ffffffff823f9307>] ___alloc_bootmem_nopanic+0x4e/0xec
[    0.000000]  [<ffffffff823f93c5>] ___alloc_bootmem+0x20/0x61
[    0.000000]  [<ffffffff823f962e>] __alloc_bootmem+0x1e/0x34
[    0.000000]  [<ffffffff823f757c>] early_irq_init+0x6d/0x118
[    0.000000]  [<ffffffff823e0140>] ? early_idt_handler+0x0/0x71
[    0.000000]  [<ffffffff823e0cf7>] start_kernel+0x192/0x394
[    0.000000]  [<ffffffff823e0140>] ? early_idt_handler+0x0/0x71
[    0.000000]  [<ffffffff823e02ad>] x86_64_start_reservations+0xb4/0xcf
[    0.000000]  [<ffffffff823e0000>] ? __init_begin+0x0/0x140
[    0.000000]  [<ffffffff823e0420>] x86_64_start_kernel+0x158/0x17b
[    0.000000] ---[ end trace a7919e7f17c0a725 ]---
[    0.000000] Fast TSC calibration using PIT
[    0.000000] Detected 2002.510 MHz processor.
[    0.004000] Console: colour VGA+ 80x25

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e161999..1045785 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -164,7 +164,7 @@ int __init early_irq_init(void)
  	node = first_online_node;
 
 	/* allocate irq_desc_ptrs array based on nr_irqs */
-	irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
+	irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT);
 
 	/* allocate based on nr_cpu_ids */
 	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
-- 
cgit v0.10.2


From b8ec757390282e21d349bf6b602a8cb182da0429 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 11 Jun 2009 19:25:37 +0300
Subject: vgacon: use slab allocator instead of the bootmem allocator

Slab is initialized before the console subsystem so use the slab allocator in
vgacon_scrollback_startup().

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 38e86b8..59d7d5e 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -180,7 +180,7 @@ static inline void vga_set_mem_top(struct vc_data *c)
 }
 
 #ifdef CONFIG_VGACON_SOFT_SCROLLBACK
-#include <linux/bootmem.h>
+#include <linux/slab.h>
 /* software scrollback */
 static void *vgacon_scrollback;
 static int vgacon_scrollback_tail;
@@ -210,8 +210,7 @@ static void vgacon_scrollback_init(int pitch)
  */
 static void __init_refok vgacon_scrollback_startup(void)
 {
-	vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE
-					  * 1024);
+	vgacon_scrollback = kcalloc(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE, 1024, GFP_NOWAIT);
 	vgacon_scrollback_init(vga_video_num_columns * 2);
 }
 
-- 
cgit v0.10.2


From b415c49a864dab8ee90713833d642dd461eccae9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 11 Jun 2009 13:12:55 +0100
Subject: slow_work_thread() should do the exclusive wait

slow_work_thread() sleeps on slow_work_thread_wq without WQ_FLAG_EXCLUSIVE,
this means that slow_work_enqueue()->__wake_up(nr_exclusive => 1) wakes up all
kslowd threads.  This is not what we want, so we change slow_work_thread() to
use prepare_to_wait_exclusive() instead.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index b28d1913..521ed20 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -372,8 +372,8 @@ static int slow_work_thread(void *_data)
 		vsmax *= atomic_read(&slow_work_thread_count);
 		vsmax /= 100;
 
-		prepare_to_wait(&slow_work_thread_wq, &wait,
-				TASK_INTERRUPTIBLE);
+		prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
+					  TASK_INTERRUPTIBLE);
 		if (!freezing(current) &&
 		    !slow_work_threads_should_exit &&
 		    !slow_work_available(vsmax) &&
-- 
cgit v0.10.2


From 90586523eb4b349806887c62ee70685a49415124 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:20 -0400
Subject: fsnotify: unified filesystem notification backend

fsnotify is a backend for filesystem notification.  fsnotify does
not provide any userspace interface but does provide the basis
needed for other notification schemes such as dnotify.  fsnotify
can be extended to be the backend for inotify or the upcoming
fanotify.  fsnotify provides a mechanism for "groups" to register for
some set of filesystem events and to then deliver those events to
those groups for processing.

fsnotify has a number of benefits, the first being actually shrinking the size
of an inode.  Before fsnotify to support both dnotify and inotify an inode had

        unsigned long           i_dnotify_mask; /* Directory notify events */
        struct dnotify_struct   *i_dnotify; /* for directory notifications */
        struct list_head        inotify_watches; /* watches on this inode */
        struct mutex            inotify_mutex;  /* protects the watches list

But with fsnotify this same functionallity (and more) is done with just

        __u32                   i_fsnotify_mask; /* all events for this inode */
        struct hlist_head       i_fsnotify_mark_entries; /* marks on this inode */

That's right, inotify, dnotify, and fanotify all in 64 bits.  We used that
much space just in inotify_watches alone, before this patch set.

fsnotify object lifetime and locking is MUCH better than what we have today.
inotify locking is incredibly complex.  See 8f7b0ba1c8539 as an example of
what's been busted since inception.  inotify needs to know internal semantics
of superblock destruction and unmounting to function.  The inode pinning and
vfs contortions are horrible.

no fsnotify implementers do allocation under locks.  This means things like
f04b30de3 which (due to an overabundance of caution) changes GFP_KERNEL to
GFP_NOFS can be reverted.  There are no longer any allocation rules when using
or implementing your own fsnotify listener.

fsnotify paves the way for fanotify.  In brief fanotify is a notification
mechanism that delivers the lisener both an 'event' and an open file descriptor
to the object in question.  This means that fanotify is pathname agnostic.
Some on lkml may not care for the original companies or users that pushed for
TALPA, but fanotify was designed with flexibility and input for other users in
mind.  The readahead group expressed interest in fanotify as it could be used
to profile disk access on boot without breaking the audit system.  The desktop
search groups have also expressed interest in fanotify as it solves a number
of the race conditions and problems present with managing inotify when more
than a limited number of specific files are of interest.  fanotify can provide
for a userspace access control system which makes it a clean interface for AV
vendors to hook without trying to do binary patching on the syscall table,
LSM, and everywhere else they do their things today.  With this patch series
fanotify can be implemented in less than 1200 lines of easy to review code.
Almost all of which is the socket based user interface.

This patch series builds fsnotify to the point that it can implement
dnotify and inotify_user.  Patches exist and will be sent soon after
acceptance to finish the in kernel inotify conversion (audit) and implement
fanotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 50914d7..31dac7e 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,2 +1,15 @@
+config FSNOTIFY
+	bool "Filesystem notification backend"
+	default y
+	---help---
+	   fsnotify is a backend for filesystem notification.  fsnotify does
+	   not provide any userspace interface but does provide the basis
+	   needed for other notification schemes such as dnotify, inotify,
+	   and fanotify.
+
+	   Say Y here to enable fsnotify suport.
+
+	   If unsure, say Y.
+
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 5a95b60..db5467b 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,2 +1,4 @@
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o
+
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
new file mode 100644
index 0000000..56bee0f
--- /dev/null
+++ b/fs/notify/fsnotify.c
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/srcu.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+/*
+ * This is the main call to fsnotify.  The VFS calls into hook specific functions
+ * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
+ * out to all of the registered fsnotify_group.  Those groups can then use the
+ * notification event in whatever means they feel necessary.
+ */
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event *event = NULL;
+	int idx;
+
+	if (list_empty(&fsnotify_groups))
+		return;
+
+	if (!(mask & fsnotify_mask))
+		return;
+
+	/*
+	 * SRCU!!  the groups list is very very much read only and the path is
+	 * very hot.  The VAST majority of events are not going to need to do
+	 * anything other than walk the list so it's crazy to pre-allocate.
+	 */
+	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
+		if (mask & group->mask) {
+			if (!event) {
+				event = fsnotify_create_event(to_tell, mask, data, data_is);
+				/* shit, we OOM'd and now we can't tell, maybe
+				 * someday someone else will want to do something
+				 * here */
+				if (!event)
+					break;
+			}
+			group->ops->handle_event(group, event);
+		}
+	}
+	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	/*
+	 * fsnotify_create_event() took a reference so the event can't be cleaned
+	 * up while we are still trying to add it to lists, drop that one.
+	 */
+	if (event)
+		fsnotify_put_event(event);
+}
+EXPORT_SYMBOL_GPL(fsnotify);
+
+static __init int fsnotify_init(void)
+{
+	return init_srcu_struct(&fsnotify_grp_srcu);
+}
+subsys_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
new file mode 100644
index 0000000..c6a8bd4
--- /dev/null
+++ b/fs/notify/fsnotify.h
@@ -0,0 +1,15 @@
+#ifndef __FS_NOTIFY_FSNOTIFY_H_
+#define __FS_NOTIFY_FSNOTIFY_H_
+
+#include <linux/list.h>
+#include <linux/fsnotify.h>
+#include <linux/srcu.h>
+#include <linux/types.h>
+
+/* protects reads of fsnotify_groups */
+extern struct srcu_struct fsnotify_grp_srcu;
+/* all groups which receive fsnotify events */
+extern struct list_head fsnotify_groups;
+/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
+extern __u32 fsnotify_mask;
+#endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
new file mode 100644
index 0000000..c681295
--- /dev/null
+++ b/fs/notify/group.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/srcu.h>
+#include <linux/rculist.h>
+#include <linux/wait.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+#include <asm/atomic.h>
+
+/* protects writes to fsnotify_groups and fsnotify_mask */
+static DEFINE_MUTEX(fsnotify_grp_mutex);
+/* protects reads while running the fsnotify_groups list */
+struct srcu_struct fsnotify_grp_srcu;
+/* all groups registered to receive filesystem notifications */
+LIST_HEAD(fsnotify_groups);
+/* bitwise OR of all events (FS_*) interesting to some group on this system */
+__u32 fsnotify_mask;
+
+/*
+ * When a new group registers or changes it's set of interesting events
+ * this function updates the fsnotify_mask to contain all interesting events
+ */
+void fsnotify_recalc_global_mask(void)
+{
+	struct fsnotify_group *group;
+	__u32 mask = 0;
+	int idx;
+
+	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
+		mask |= group->mask;
+	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	fsnotify_mask = mask;
+}
+
+/*
+ * Take a reference to a group so things found under the fsnotify_grp_mutex
+ * can't get freed under us
+ */
+static void fsnotify_get_group(struct fsnotify_group *group)
+{
+	atomic_inc(&group->refcnt);
+}
+
+/*
+ * Final freeing of a group
+ */
+static void fsnotify_destroy_group(struct fsnotify_group *group)
+{
+	if (group->ops->free_group_priv)
+		group->ops->free_group_priv(group);
+
+	kfree(group);
+}
+
+/*
+ * Remove this group from the global list of groups that will get events
+ * this can be done even if there are still references and things still using
+ * this group.  This just stops the group from getting new events.
+ */
+static void __fsnotify_evict_group(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	if (group->on_group_list)
+		list_del_rcu(&group->group_list);
+	group->on_group_list = 0;
+}
+
+/*
+ * Called when a group is no longer interested in getting events.  This can be
+ * used if a group is misbehaving or if for some reason a group should no longer
+ * get any filesystem events.
+ */
+void fsnotify_evict_group(struct fsnotify_group *group)
+{
+	mutex_lock(&fsnotify_grp_mutex);
+	__fsnotify_evict_group(group);
+	mutex_unlock(&fsnotify_grp_mutex);
+}
+
+/*
+ * Drop a reference to a group.  Free it if it's through.
+ */
+void fsnotify_put_group(struct fsnotify_group *group)
+{
+	if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
+		return;
+
+	/*
+	 * OK, now we know that there's no other users *and* we hold mutex,
+	 * so no new references will appear
+	 */
+	__fsnotify_evict_group(group);
+
+	/*
+	 * now it's off the list, so the only thing we might care about is
+	 * srcu access....
+	 */
+	mutex_unlock(&fsnotify_grp_mutex);
+	synchronize_srcu(&fsnotify_grp_srcu);
+
+	/* and now it is really dead. _Nothing_ could be seeing it */
+	fsnotify_recalc_global_mask();
+	fsnotify_destroy_group(group);
+}
+
+/*
+ * Simply run the fsnotify_groups list and find a group which matches
+ * the given parameters.  If a group is found we take a reference to that
+ * group.
+ */
+static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
+						  const struct fsnotify_ops *ops)
+{
+	struct fsnotify_group *group_iter;
+	struct fsnotify_group *group = NULL;
+
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
+		if (group_iter->group_num == group_num) {
+			if ((group_iter->mask == mask) &&
+			    (group_iter->ops == ops)) {
+				fsnotify_get_group(group_iter);
+				group = group_iter;
+			} else
+				group = ERR_PTR(-EEXIST);
+		}
+	}
+	return group;
+}
+
+/*
+ * Either finds an existing group which matches the group_num, mask, and ops or
+ * creates a new group and adds it to the global group list.  In either case we
+ * take a reference for the group returned.
+ */
+struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
+					     const struct fsnotify_ops *ops)
+{
+	struct fsnotify_group *group, *tgroup;
+
+	/* very low use, simpler locking if we just always alloc */
+	group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+
+	atomic_set(&group->refcnt, 1);
+
+	group->on_group_list = 0;
+	group->group_num = group_num;
+	group->mask = mask;
+
+	group->ops = ops;
+
+	mutex_lock(&fsnotify_grp_mutex);
+	tgroup = fsnotify_find_group(group_num, mask, ops);
+	if (tgroup) {
+		/* group already exists */
+		mutex_unlock(&fsnotify_grp_mutex);
+		/* destroy the new one we made */
+		fsnotify_put_group(group);
+		return tgroup;
+	}
+
+	/* group not found, add a new one */
+	list_add_rcu(&group->group_list, &fsnotify_groups);
+	group->on_group_list = 1;
+
+	mutex_unlock(&fsnotify_grp_mutex);
+
+	if (mask)
+		fsnotify_recalc_global_mask();
+
+	return group;
+}
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 220c13f..40b1cf9 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 
 static atomic_t inotify_cookie;
 
@@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch);
  */
 static int __init inotify_setup(void)
 {
+	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
+	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
+	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
+
+	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
+	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+
 	atomic_set(&inotify_cookie, 0);
 
 	return 0;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
new file mode 100644
index 0000000..b8e9a87
--- /dev/null
+++ b/fs/notify/notification.c
@@ -0,0 +1,121 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+static struct kmem_cache *fsnotify_event_cachep;
+
+void fsnotify_get_event(struct fsnotify_event *event)
+{
+	atomic_inc(&event->refcnt);
+}
+
+void fsnotify_put_event(struct fsnotify_event *event)
+{
+	if (!event)
+		return;
+
+	if (atomic_dec_and_test(&event->refcnt)) {
+		if (event->data_type == FSNOTIFY_EVENT_PATH)
+			path_put(&event->path);
+
+		kmem_cache_free(fsnotify_event_cachep, event);
+	}
+}
+
+/*
+ * Allocate a new event which will be sent to each group's handle_event function
+ * if the group was interested in this particular event.
+ */
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+					     void *data, int data_type)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	atomic_set(&event->refcnt, 1);
+
+	spin_lock_init(&event->lock);
+
+	event->path.dentry = NULL;
+	event->path.mnt = NULL;
+	event->inode = NULL;
+
+	event->to_tell = to_tell;
+
+	switch (data_type) {
+	case FSNOTIFY_EVENT_FILE: {
+		struct file *file = data;
+		struct path *path = &file->f_path;
+		event->path.dentry = path->dentry;
+		event->path.mnt = path->mnt;
+		path_get(&event->path);
+		event->data_type = FSNOTIFY_EVENT_PATH;
+		break;
+	}
+	case FSNOTIFY_EVENT_PATH: {
+		struct path *path = data;
+		event->path.dentry = path->dentry;
+		event->path.mnt = path->mnt;
+		path_get(&event->path);
+		event->data_type = FSNOTIFY_EVENT_PATH;
+		break;
+	}
+	case FSNOTIFY_EVENT_INODE:
+		event->inode = data;
+		event->data_type = FSNOTIFY_EVENT_INODE;
+		break;
+	case FSNOTIFY_EVENT_NONE:
+		event->inode = NULL;
+		event->path.dentry = NULL;
+		event->path.mnt = NULL;
+		break;
+	default:
+		BUG();
+	}
+
+	event->mask = mask;
+
+	return event;
+}
+
+__init int fsnotify_notification_init(void)
+{
+	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
+
+	return 0;
+}
+subsys_initcall(fsnotify_notification_init);
+
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 00fbd5b..6c9ebef 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -13,6 +13,7 @@
 
 #include <linux/dnotify.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 #include <linux/audit.h>
 
 /*
@@ -35,6 +36,16 @@ static inline void fsnotify_d_move(struct dentry *entry)
 }
 
 /*
+ * fsnotify_link_count - inode's link count changed
+ */
+static inline void fsnotify_link_count(struct inode *inode)
+{
+	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE);
+}
+
+/*
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
 static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
@@ -43,28 +54,47 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 {
 	struct inode *source = moved->d_inode;
 	u32 cookie = inotify_get_cookie();
+	__u32 old_dir_mask = 0;
+	__u32 new_dir_mask = 0;
 
-	if (old_dir == new_dir)
+	if (old_dir == new_dir) {
 		inode_dir_notify(old_dir, DN_RENAME);
-	else {
+		old_dir_mask = FS_DN_RENAME;
+	} else {
 		inode_dir_notify(old_dir, DN_DELETE);
+		old_dir_mask = FS_DELETE;
 		inode_dir_notify(new_dir, DN_CREATE);
+		new_dir_mask = FS_CREATE;
 	}
 
-	if (isdir)
+	if (isdir) {
 		isdir = IN_ISDIR;
+		old_dir_mask |= FS_IN_ISDIR;
+		new_dir_mask |= FS_IN_ISDIR;
+	}
+
+	old_dir_mask |= FS_MOVED_FROM;
+	new_dir_mask |= FS_MOVED_TO;
+
 	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name,
 				  source);
 	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
 				  source);
 
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE);
+
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
 		inotify_inode_is_dead(target);
+
+		/* this is really a link_count change not a removal */
+		fsnotify_link_count(target);
 	}
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
@@ -74,10 +104,12 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
  */
 static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 {
+	__u32 mask = FS_DELETE;
+
 	if (isdir)
-		isdir = IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 	dnotify_parent(dentry, DN_DELETE);
-	inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name);
+	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 }
 
 /*
@@ -87,14 +119,8 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 {
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
-}
 
-/*
- * fsnotify_link_count - inode's link count changed
- */
-static inline void fsnotify_link_count(struct inode *inode)
-{
-	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -106,6 +132,8 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
+
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -120,6 +148,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 				  inode);
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -127,10 +157,14 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
  */
 static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
+	__u32 mask = (FS_CREATE | FS_IN_ISDIR);
+	struct inode *d_inode = dentry->d_inode;
+
 	inode_dir_notify(inode, DN_CREATE);
-	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 
-				  dentry->d_name.name, dentry->d_inode);
+	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
+
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -139,14 +173,16 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 static inline void fsnotify_access(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_ACCESS;
+	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_ACCESS);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -155,14 +191,16 @@ static inline void fsnotify_access(struct dentry *dentry)
 static inline void fsnotify_modify(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_MODIFY;
+	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_MODIFY);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -171,13 +209,15 @@ static inline void fsnotify_modify(struct dentry *dentry)
 static inline void fsnotify_open(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_OPEN;
+	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -189,13 +229,15 @@ static inline void fsnotify_close(struct file *file)
 	struct inode *inode = dentry->d_inode;
 	const char *name = dentry->d_name.name;
 	fmode_t mode = file->f_mode;
-	u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE;
+	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE);
 }
 
 /*
@@ -204,13 +246,15 @@ static inline void fsnotify_close(struct file *file)
 static inline void fsnotify_xattr(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_ATTRIB;
+	__u32 mask = FS_ATTRIB;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -221,34 +265,34 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 {
 	struct inode *inode = dentry->d_inode;
 	int dn_mask = 0;
-	u32 in_mask = 0;
+	__u32 in_mask = 0;
 
 	if (ia_valid & ATTR_UID) {
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	}
 	if (ia_valid & ATTR_GID) {
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	}
 	if (ia_valid & ATTR_SIZE) {
-		in_mask |= IN_MODIFY;
+		in_mask |= FS_MODIFY;
 		dn_mask |= DN_MODIFY;
 	}
 	/* both times implies a utime(s) call */
 	if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
 	{
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	} else if (ia_valid & ATTR_ATIME) {
-		in_mask |= IN_ACCESS;
+		in_mask |= FS_ACCESS;
 		dn_mask |= DN_ACCESS;
 	} else if (ia_valid & ATTR_MTIME) {
-		in_mask |= IN_MODIFY;
+		in_mask |= FS_MODIFY;
 		dn_mask |= DN_MODIFY;
 	}
 	if (ia_valid & ATTR_MODE) {
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	}
 
@@ -256,14 +300,15 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		dnotify_parent(dentry, dn_mask);
 	if (in_mask) {
 		if (S_ISDIR(inode->i_mode))
-			in_mask |= IN_ISDIR;
+			in_mask |= FS_IN_ISDIR;
 		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
 		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
 						  dentry->d_name.name);
+		fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE);
 	}
 }
 
-#ifdef CONFIG_INOTIFY	/* inotify helpers */
+#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY)	/* notify helpers */
 
 /*
  * fsnotify_oldname_init - save off the old filename before we change it
@@ -281,7 +326,7 @@ static inline void fsnotify_oldname_free(const char *old_name)
 	kfree(old_name);
 }
 
-#else	/* CONFIG_INOTIFY */
+#else	/* CONFIG_INOTIFY || CONFIG_FSNOTIFY */
 
 static inline const char *fsnotify_oldname_init(const char *name)
 {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
new file mode 100644
index 0000000..1a55718
--- /dev/null
+++ b/include/linux/fsnotify_backend.h
@@ -0,0 +1,177 @@
+/*
+ * Filesystem access notification for Linux
+ *
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ */
+
+#ifndef __LINUX_FSNOTIFY_BACKEND_H
+#define __LINUX_FSNOTIFY_BACKEND_H
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h> /* struct inode */
+#include <linux/list.h>
+#include <linux/path.h> /* struct path */
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+
+/*
+ * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
+ * convert between them.  dnotify only needs conversion at watch creation
+ * so no perf loss there.  fanotify isn't defined yet, so it can use the
+ * wholes if it needs more events.
+ */
+#define FS_ACCESS		0x00000001	/* File was accessed */
+#define FS_MODIFY		0x00000002	/* File was modified */
+#define FS_ATTRIB		0x00000004	/* Metadata changed */
+#define FS_CLOSE_WRITE		0x00000008	/* Writtable file was closed */
+#define FS_CLOSE_NOWRITE	0x00000010	/* Unwrittable file closed */
+#define FS_OPEN			0x00000020	/* File was opened */
+#define FS_MOVED_FROM		0x00000040	/* File was moved from X */
+#define FS_MOVED_TO		0x00000080	/* File was moved to Y */
+#define FS_CREATE		0x00000100	/* Subfile was created */
+#define FS_DELETE		0x00000200	/* Subfile was deleted */
+#define FS_DELETE_SELF		0x00000400	/* Self was deleted */
+#define FS_MOVE_SELF		0x00000800	/* Self was moved */
+
+#define FS_UNMOUNT		0x00002000	/* inode on umount fs */
+#define FS_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
+#define FS_IN_IGNORED		0x00008000	/* last inotify event here */
+
+#define FS_IN_ISDIR		0x40000000	/* event occurred against dir */
+#define FS_IN_ONESHOT		0x80000000	/* only send event once */
+
+#define FS_DN_RENAME		0x10000000	/* file renamed */
+#define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
+
+struct fsnotify_group;
+struct fsnotify_event;
+
+/*
+ * Each group much define these ops.  The fsnotify infrastructure will call
+ * these operations for each relevant group.
+ *
+ * handle_event - main call for a group to handle an fs event
+ * free_group_priv - called when a group refcnt hits 0 to clean up the private union
+ */
+struct fsnotify_ops {
+	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
+	void (*free_group_priv)(struct fsnotify_group *group);
+};
+
+/*
+ * A group is a "thing" that wants to receive notification about filesystem
+ * events.  The mask holds the subset of event types this group cares about.
+ * refcnt on a group is up to the implementor and at any moment if it goes 0
+ * everything will be cleaned up.
+ */
+struct fsnotify_group {
+	/*
+	 * global list of all groups receiving events from fsnotify.
+	 * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex
+	 * or fsnotify_grp_srcu depending on write vs read.
+	 */
+	struct list_head group_list;
+
+	/*
+	 * Defines all of the event types in which this group is interested.
+	 * This mask is a bitwise OR of the FS_* events from above.  Each time
+	 * this mask changes for a group (if it changes) the correct functions
+	 * must be called to update the global structures which indicate global
+	 * interest in event types.
+	 */
+	__u32 mask;
+
+	/*
+	 * How the refcnt is used is up to each group.  When the refcnt hits 0
+	 * fsnotify will clean up all of the resources associated with this group.
+	 * As an example, the dnotify group will always have a refcnt=1 and that
+	 * will never change.  Inotify, on the other hand, has a group per
+	 * inotify_init() and the refcnt will hit 0 only when that fd has been
+	 * closed.
+	 */
+	atomic_t refcnt;		/* things with interest in this group */
+	unsigned int group_num;		/* simply prevents accidental group collision */
+
+	const struct fsnotify_ops *ops;	/* how this group handles things */
+
+	/* prevents double list_del of group_list.  protected by global fsnotify_gr_mutex */
+	bool on_group_list;
+
+	/* groups can define private fields here or use the void *private */
+	union {
+		void *private;
+	};
+};
+
+/*
+ * all of the information about the original object we want to now send to
+ * a group.  If you want to carry more info from the accessing task to the
+ * listener this structure is where you need to be adding fields.
+ */
+struct fsnotify_event {
+	spinlock_t lock;	/* protection for the associated event_holder and private_list */
+	/* to_tell may ONLY be dereferenced during handle_event(). */
+	struct inode *to_tell;	/* either the inode the event happened to or its parent */
+	/*
+	 * depending on the event type we should have either a path or inode
+	 * We hold a reference on path, but NOT on inode.  Since we have the ref on
+	 * the path, it may be dereferenced at any point during this object's
+	 * lifetime.  That reference is dropped when this object's refcnt hits
+	 * 0.  If this event contains an inode instead of a path, the inode may
+	 * ONLY be used during handle_event().
+	 */
+	union {
+		struct path path;
+		struct inode *inode;
+	};
+/* when calling fsnotify tell it if the data is a path or inode */
+#define FSNOTIFY_EVENT_NONE	0
+#define FSNOTIFY_EVENT_PATH	1
+#define FSNOTIFY_EVENT_INODE	2
+#define FSNOTIFY_EVENT_FILE	3
+	int data_type;		/* which of the above union we have */
+	atomic_t refcnt;	/* how many groups still are using/need to send this event */
+	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
+};
+
+#ifdef CONFIG_FSNOTIFY
+
+/* called from the vfs helpers */
+
+/* main fsnotify call to send events */
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+
+
+/* called from fsnotify listeners, such as fanotify or dnotify */
+
+/* must call when a group changes its ->mask */
+extern void fsnotify_recalc_global_mask(void);
+/* get a reference to an existing or create a new group */
+extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
+						    __u32 mask,
+						    const struct fsnotify_ops *ops);
+/* drop reference on a group from fsnotify_obtain_group */
+extern void fsnotify_put_group(struct fsnotify_group *group);
+
+/* take a reference to an event */
+extern void fsnotify_get_event(struct fsnotify_event *event);
+extern void fsnotify_put_event(struct fsnotify_event *event);
+/* find private data previously attached to an event */
+extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
+									struct fsnotify_event *event);
+
+/* put here because inotify does some weird stuff when destroying watches */
+extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+						    void *data, int data_is);
+#else
+
+static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+{}
+#endif	/* CONFIG_FSNOTIFY */
+
+#endif	/* __KERNEL __ */
+
+#endif	/* __LINUX_FSNOTIFY_BACKEND_H */
-- 
cgit v0.10.2


From 3be25f49b9d6a97eae9bcb96d3292072b7658bd8 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:26 -0400
Subject: fsnotify: add marks to inodes so groups can interpret how to handle
 those inodes

This patch creates a way for fsnotify groups to attach marks to inodes.
These marks have little meaning to the generic fsnotify infrastructure
and thus their meaning should be interpreted by the group that attached
them to the inode's list.

dnotify and inotify  will make use of these markings to indicate which
inodes are of interest to their respective groups.  But this implementation
has the useful property that in the future other listeners could actually
use the marks for the exact opposite reason, aka to indicate which inodes
it had NO interest in.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/inode.c b/fs/inode.c
index bca0c61..54c63ce 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
 
@@ -189,6 +190,10 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
 
+#ifdef CONFIG_FSNOTIFY
+	inode->i_fsnotify_mask = 0;
+#endif
+
 	return inode;
 
 out_free_security:
@@ -221,6 +226,7 @@ void destroy_inode(struct inode *inode)
 	BUG_ON(inode_has_buffers(inode));
 	ima_inode_free(inode);
 	security_inode_free(inode);
+	fsnotify_inode_delete(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
@@ -252,6 +258,9 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->inotify_watches);
 	mutex_init(&inode->inotify_mutex);
 #endif
+#ifdef CONFIG_FSNOTIFY
+	INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
+#endif
 }
 EXPORT_SYMBOL(inode_init_once);
 
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index db5467b..0922cc8 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 56bee0f..d565462 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -26,6 +26,15 @@
 #include "fsnotify.h"
 
 /*
+ * Clear all of the marks on an inode when it is being evicted from core
+ */
+void __fsnotify_inode_delete(struct inode *inode)
+{
+	fsnotify_clear_marks_by_inode(inode);
+}
+EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
+
+/*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
  * out to all of the registered fsnotify_group.  Those groups can then use the
@@ -43,6 +52,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 	if (!(mask & fsnotify_mask))
 		return;
 
+	if (!(mask & to_tell->i_fsnotify_mask))
+		return;
 	/*
 	 * SRCU!!  the groups list is very very much read only and the path is
 	 * very hot.  The VAST majority of events are not going to need to do
@@ -51,6 +62,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
 	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
 		if (mask & group->mask) {
+			if (!group->ops->should_send_event(group, to_tell, mask))
+				continue;
 			if (!event) {
 				event = fsnotify_create_event(to_tell, mask, data, data_is);
 				/* shit, we OOM'd and now we can't tell, maybe
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index c6a8bd4..8ebcbe8 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,4 +12,9 @@ extern struct srcu_struct fsnotify_grp_srcu;
 extern struct list_head fsnotify_groups;
 /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
 extern __u32 fsnotify_mask;
+
+/* final kfree of a group */
+extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
+/* run the list of all marks associated with inode and flag them to be freed */
+extern void fsnotify_clear_marks_by_inode(struct inode *inode);
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index c681295..a29d2fa 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -55,6 +55,29 @@ void fsnotify_recalc_global_mask(void)
 }
 
 /*
+ * Update the group->mask by running all of the marks associated with this
+ * group and finding the bitwise | of all of the mark->mask.  If we change
+ * the group->mask we need to update the global mask of events interesting
+ * to the system.
+ */
+void fsnotify_recalc_group_mask(struct fsnotify_group *group)
+{
+	__u32 mask = 0;
+	__u32 old_mask = group->mask;
+	struct fsnotify_mark_entry *entry;
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry(entry, &group->mark_entries, g_list)
+		mask |= entry->mask;
+	spin_unlock(&group->mark_lock);
+
+	group->mask = mask;
+
+	if (old_mask != mask)
+		fsnotify_recalc_global_mask();
+}
+
+/*
  * Take a reference to a group so things found under the fsnotify_grp_mutex
  * can't get freed under us
  */
@@ -66,7 +89,7 @@ static void fsnotify_get_group(struct fsnotify_group *group)
 /*
  * Final freeing of a group
  */
-static void fsnotify_destroy_group(struct fsnotify_group *group)
+void fsnotify_final_destroy_group(struct fsnotify_group *group)
 {
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
@@ -75,6 +98,24 @@ static void fsnotify_destroy_group(struct fsnotify_group *group)
 }
 
 /*
+ * Trying to get rid of a group.  We need to first get rid of any outstanding
+ * allocations and then free the group.  Remember that fsnotify_clear_marks_by_group
+ * could miss marks that are being freed by inode and those marks could still
+ * hold a reference to this group (via group->num_marks)  If we get into that
+ * situtation, the fsnotify_final_destroy_group will get called when that final
+ * mark is freed.
+ */
+static void fsnotify_destroy_group(struct fsnotify_group *group)
+{
+	/* clear all inode mark entries for this group */
+	fsnotify_clear_marks_by_group(group);
+
+	/* past the point of no return, matches the initial value of 1 */
+	if (atomic_dec_and_test(&group->num_marks))
+		fsnotify_final_destroy_group(group);
+}
+
+/*
  * Remove this group from the global list of groups that will get events
  * this can be done even if there are still references and things still using
  * this group.  This just stops the group from getting new events.
@@ -173,6 +214,10 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	group->group_num = group_num;
 	group->mask = mask;
 
+	spin_lock_init(&group->mark_lock);
+	atomic_set(&group->num_marks, 0);
+	INIT_LIST_HEAD(&group->mark_entries);
+
 	group->ops = ops;
 
 	mutex_lock(&fsnotify_grp_mutex);
@@ -188,6 +233,8 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	/* group not found, add a new one */
 	list_add_rcu(&group->group_list, &fsnotify_groups);
 	group->on_group_list = 1;
+	/* being on the fsnotify_groups list holds one num_marks */
+	atomic_inc(&group->num_marks);
 
 	mutex_unlock(&fsnotify_grp_mutex);
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
new file mode 100644
index 0000000..cdc1541
--- /dev/null
+++ b/fs/notify/inode_mark.c
@@ -0,0 +1,329 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * fsnotify inode mark locking/lifetime/and refcnting
+ *
+ * REFCNT:
+ * The mark->refcnt tells how many "things" in the kernel currently are
+ * referencing this object.  The object typically will live inside the kernel
+ * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
+ * which can find this object holding the appropriete locks, can take a reference
+ * and the object itself is guarenteed to survive until the reference is dropped.
+ *
+ * LOCKING:
+ * There are 3 spinlocks involved with fsnotify inode marks and they MUST
+ * be taken in order as follows:
+ *
+ * entry->lock
+ * group->mark_lock
+ * inode->i_lock
+ *
+ * entry->lock protects 2 things, entry->group and entry->inode.  You must hold
+ * that lock to dereference either of these things (they could be NULL even with
+ * the lock)
+ *
+ * group->mark_lock protects the mark_entries list anchored inside a given group
+ * and each entry is hooked via the g_list.  It also sorta protects the
+ * free_g_list, which when used is anchored by a private list on the stack of the
+ * task which held the group->mark_lock.
+ *
+ * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
+ * given inode and each entry is hooked via the i_list. (and sorta the
+ * free_i_list)
+ *
+ *
+ * LIFETIME:
+ * Inode marks survive between when they are added to an inode and when their
+ * refcnt==0.
+ *
+ * The inode mark can be cleared for a number of different reasons including:
+ * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
+ * - The inode is being evicted from cache. (fsnotify_inode_delete)
+ * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
+ * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark_by_entry)
+ * - The fsnotify_group associated with the mark is going away and all such marks
+ *   need to be cleaned up. (fsnotify_clear_marks_by_group)
+ *
+ * Worst case we are given an inode and need to clean up all the marks on that
+ * inode.  We take i_lock and walk the i_fsnotify_mark_entries safely.  For each
+ * mark on the list we take a reference (so the mark can't disappear under us).
+ * We remove that mark form the inode's list of marks and we add this mark to a
+ * private list anchored on the stack using i_free_list;  At this point we no
+ * longer fear anything finding the mark using the inode's list of marks.
+ *
+ * We can safely and locklessly run the private list on the stack of everything
+ * we just unattached from the original inode.  For each mark on the private list
+ * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
+ * we see the group and inode are not NULL we take those locks.  Now holding all
+ * 3 locks we can completely remove the mark from other tasks finding it in the
+ * future.  Remember, 10 things might already be referencing this mark, but they
+ * better be holding a ref.  We drop our reference we took before we unhooked it
+ * from the inode.  When the ref hits 0 we can free the mark.
+ *
+ * Very similarly for freeing by group, except we use free_g_list.
+ *
+ * This has the very interesting property of being able to run concurrently with
+ * any (or all) other directions.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
+{
+	atomic_inc(&entry->refcnt);
+}
+
+void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
+{
+	if (atomic_dec_and_test(&entry->refcnt))
+		entry->free_mark(entry);
+}
+
+/*
+ * Recalculate the mask of events relevant to a given inode locked.
+ */
+static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry;
+	struct hlist_node *pos;
+	__u32 new_mask = 0;
+
+	assert_spin_locked(&inode->i_lock);
+
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
+		new_mask |= entry->mask;
+	inode->i_fsnotify_mask = new_mask;
+}
+
+/*
+ * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
+ * any notifier is interested in hearing for this inode.
+ */
+void fsnotify_recalc_inode_mask(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	fsnotify_recalc_inode_mask_locked(inode);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Any time a mark is getting freed we end up here.
+ * The caller had better be holding a reference to this mark so we don't actually
+ * do the final put under the entry->lock
+ */
+void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
+{
+	struct fsnotify_group *group;
+	struct inode *inode;
+
+	spin_lock(&entry->lock);
+
+	group = entry->group;
+	inode = entry->inode;
+
+	BUG_ON(group && !inode);
+	BUG_ON(!group && inode);
+
+	/* if !group something else already marked this to die */
+	if (!group) {
+		spin_unlock(&entry->lock);
+		return;
+	}
+
+	/* 1 from caller and 1 for being on i_list/g_list */
+	BUG_ON(atomic_read(&entry->refcnt) < 2);
+
+	spin_lock(&group->mark_lock);
+	spin_lock(&inode->i_lock);
+
+	hlist_del_init(&entry->i_list);
+	entry->inode = NULL;
+
+	list_del_init(&entry->g_list);
+	entry->group = NULL;
+
+	fsnotify_put_mark(entry); /* for i_list and g_list */
+
+	/*
+	 * this mark is now off the inode->i_fsnotify_mark_entries list and we
+	 * hold the inode->i_lock, so this is the perfect time to update the
+	 * inode->i_fsnotify_mask
+	 */
+	fsnotify_recalc_inode_mask_locked(inode);
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&entry->lock);
+
+	/*
+	 * Some groups like to know that marks are being freed.  This is a
+	 * callback to the group function to let it know that this entry
+	 * is being freed.
+	 */
+	group->ops->freeing_mark(entry, group);
+
+	/*
+	 * it's possible that this group tried to destroy itself, but this
+	 * this mark was simultaneously being freed by inode.  If that's the
+	 * case, we finish freeing the group here.
+	 */
+	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+		fsnotify_final_destroy_group(group);
+}
+
+/*
+ * Given a group, destroy all of the marks associated with that group.
+ */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+{
+	struct fsnotify_mark_entry *lentry, *entry;
+	LIST_HEAD(free_list);
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
+		list_add(&entry->free_g_list, &free_list);
+		list_del_init(&entry->g_list);
+		fsnotify_get_mark(entry);
+	}
+	spin_unlock(&group->mark_lock);
+
+	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+}
+
+/*
+ * Given an inode, destroy all of the marks associated with that inode.
+ */
+void fsnotify_clear_marks_by_inode(struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry, *lentry;
+	struct hlist_node *pos, *n;
+	LIST_HEAD(free_list);
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
+		list_add(&entry->free_i_list, &free_list);
+		hlist_del_init(&entry->i_list);
+		fsnotify_get_mark(entry);
+	}
+	spin_unlock(&inode->i_lock);
+
+	list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+}
+
+/*
+ * given a group and inode, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
+						     struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry;
+	struct hlist_node *pos;
+
+	assert_spin_locked(&inode->i_lock);
+
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
+		if (entry->group == group) {
+			fsnotify_get_mark(entry);
+			return entry;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Nothing fancy, just initialize lists and locks and counters.
+ */
+void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
+			void (*free_mark)(struct fsnotify_mark_entry *entry))
+
+{
+	spin_lock_init(&entry->lock);
+	atomic_set(&entry->refcnt, 1);
+	INIT_HLIST_NODE(&entry->i_list);
+	entry->group = NULL;
+	entry->mask = 0;
+	entry->inode = NULL;
+	entry->free_mark = free_mark;
+}
+
+/*
+ * Attach an initialized mark entry to a given group and inode.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which group and for which inodes.
+ */
+int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
+		      struct fsnotify_group *group, struct inode *inode)
+{
+	struct fsnotify_mark_entry *lentry;
+	int ret = 0;
+
+	/*
+	 * LOCKING ORDER!!!!
+	 * entry->lock
+	 * group->mark_lock
+	 * inode->i_lock
+	 */
+	spin_lock(&entry->lock);
+	spin_lock(&group->mark_lock);
+	spin_lock(&inode->i_lock);
+
+	entry->group = group;
+	entry->inode = inode;
+
+	lentry = fsnotify_find_mark_entry(group, inode);
+	if (!lentry) {
+		hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
+		list_add(&entry->g_list, &group->mark_entries);
+
+		fsnotify_get_mark(entry); /* for i_list and g_list */
+
+		atomic_inc(&group->num_marks);
+
+		fsnotify_recalc_inode_mask_locked(inode);
+	}
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&entry->lock);
+
+	if (lentry) {
+		ret = -EEXIST;
+		fsnotify_put_mark(lentry);
+	}
+
+	return ret;
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83d6b43..275b086 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -755,6 +755,11 @@ struct inode {
 
 	__u32			i_generation;
 
+#ifdef CONFIG_FSNOTIFY
+	__u32			i_fsnotify_mask; /* all events this inode cares about */
+	struct hlist_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
+#endif
+
 #ifdef CONFIG_DNOTIFY
 	unsigned long		i_dnotify_mask; /* Directory notify events */
 	struct dnotify_struct	*i_dnotify; /* for directory notifications */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 6c9ebef..3856eb6 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -100,6 +100,14 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 }
 
 /*
+ * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed
+ */
+static inline void fsnotify_inode_delete(struct inode *inode)
+{
+	__fsnotify_inode_delete(inode);
+}
+
+/*
  * fsnotify_nameremove - a filename was removed from a directory
  */
 static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
@@ -121,6 +129,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_is_dead(inode);
 
 	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
+	__fsnotify_inode_delete(inode);
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1a55718..cad5c4d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -48,17 +48,25 @@
 
 struct fsnotify_group;
 struct fsnotify_event;
+struct fsnotify_mark_entry;
 
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
  * these operations for each relevant group.
  *
+ * should_send_event - given a group, inode, and mask this function determines
+ *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
+ * freeing-mark - this means that a mark has been flagged to die when everything
+ *		finishes using it.  The function is supplied with what must be a
+ *		valid group and inode to use to clean up.
  */
 struct fsnotify_ops {
+	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask);
 	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
 	void (*free_group_priv)(struct fsnotify_group *group);
+	void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
 };
 
 /*
@@ -97,7 +105,14 @@ struct fsnotify_group {
 
 	const struct fsnotify_ops *ops;	/* how this group handles things */
 
-	/* prevents double list_del of group_list.  protected by global fsnotify_gr_mutex */
+	/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
+	spinlock_t mark_lock;		/* protect mark_entries list */
+	atomic_t num_marks;		/* 1 for each mark entry and 1 for not being
+					 * past the point of no return when freeing
+					 * a group */
+	struct list_head mark_entries;	/* all inode mark entries for this group */
+
+	/* prevents double list_del of group_list.  protected by global fsnotify_grp_mutex */
 	bool on_group_list;
 
 	/* groups can define private fields here or use the void *private */
@@ -137,12 +152,38 @@ struct fsnotify_event {
 	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
 };
 
+/*
+ * a mark is simply an entry attached to an in core inode which allows an
+ * fsnotify listener to indicate they are either no longer interested in events
+ * of a type matching mask or only interested in those events.
+ *
+ * these are flushed when an inode is evicted from core and may be flushed
+ * when the inode is modified (as seen by fsnotify_access).  Some fsnotify users
+ * (such as dnotify) will flush these when the open fd is closed and not at
+ * inode eviction or modification.
+ */
+struct fsnotify_mark_entry {
+	__u32 mask;			/* mask this mark entry is for */
+	/* we hold ref for each i_list and g_list.  also one ref for each 'thing'
+	 * in kernel that found and may be using this mark. */
+	atomic_t refcnt;		/* active things looking at this mark */
+	struct inode *inode;		/* inode this entry is associated with */
+	struct fsnotify_group *group;	/* group this mark entry is for */
+	struct hlist_node i_list;	/* list of mark_entries by inode->i_fsnotify_mark_entries */
+	struct list_head g_list;	/* list of mark_entries by group->i_fsnotify_mark_entries */
+	spinlock_t lock;		/* protect group, inode, and killme */
+	struct list_head free_i_list;	/* tmp list used when freeing this mark */
+	struct list_head free_g_list;	/* tmp list used when freeing this mark */
+	void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */
+};
+
 #ifdef CONFIG_FSNOTIFY
 
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
 extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+extern void __fsnotify_inode_delete(struct inode *inode);
 
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
@@ -153,6 +194,8 @@ extern void fsnotify_recalc_global_mask(void);
 extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
 						    __u32 mask,
 						    const struct fsnotify_ops *ops);
+/* run all marks associated with this group and update group->mask */
+extern void fsnotify_recalc_group_mask(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_obtain_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
 
@@ -163,6 +206,22 @@ extern void fsnotify_put_event(struct fsnotify_event *event);
 extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
 									struct fsnotify_event *event);
 
+/* functions used to manipulate the marks attached to inodes */
+
+/* run all marks associated with an inode and update inode->i_fsnotify_mask */
+extern void fsnotify_recalc_inode_mask(struct inode *inode);
+extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry));
+/* find (and take a reference) to a mark associated with group and inode */
+extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode);
+/* attach the mark to both the group and the inode */
+extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode);
+/* given a mark, flag it to be freed when all references are dropped */
+extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
+/* run all the marks in a group, and flag them to be freed */
+extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
+extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
+extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
+
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 						    void *data, int data_is);
@@ -170,6 +229,10 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32
 
 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 {}
+
+static inline void __fsnotify_inode_delete(struct inode *inode)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v0.10.2


From c28f7e56e9d95fb531dc3be8df2e7f52bee76d21 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:29 -0400
Subject: fsnotify: parent event notification

inotify and dnotify both use a similar parent notification mechanism.  We
add a generic parent notification mechanism to fsnotify for both of these
to use.  This new machanism also adds the dentry flag optimization which
exists for inotify to dnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index d565462..7fc7600 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -35,6 +35,97 @@ void __fsnotify_inode_delete(struct inode *inode)
 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
 
 /*
+ * Given an inode, first check if we care what happens to our children.  Inotify
+ * and dnotify both tell their parents about events.  If we care about any event
+ * on a child we run all of our children and set a dentry flag saying that the
+ * parent cares.  Thus when an event happens on a child it can quickly tell if
+ * if there is a need to find a parent and send the event to the parent.
+ */
+void __fsnotify_update_child_dentry_flags(struct inode *inode)
+{
+	struct dentry *alias;
+	int watched;
+
+	if (!S_ISDIR(inode->i_mode))
+		return;
+
+	/* determine if the children should tell inode about their events */
+	watched = fsnotify_inode_watches_children(inode);
+
+	spin_lock(&dcache_lock);
+	/* run all of the dentries associated with this inode.  Since this is a
+	 * directory, there damn well better only be one item on this list */
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		/* run all of the children of the original inode and fix their
+		 * d_flags to indicate parental interest (their parent is the
+		 * original inode) */
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode)
+				continue;
+
+			spin_lock(&child->d_lock);
+			if (watched)
+				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
+			else
+				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/* Notify this dentry's parent about a child's events. */
+void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+{
+	struct dentry *parent;
+	struct inode *p_inode;
+	bool send = false;
+	bool should_update_children = false;
+
+	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
+		return;
+
+	spin_lock(&dentry->d_lock);
+	parent = dentry->d_parent;
+	p_inode = parent->d_inode;
+
+	if (fsnotify_inode_watches_children(p_inode)) {
+		if (p_inode->i_fsnotify_mask & mask) {
+			dget(parent);
+			send = true;
+		}
+	} else {
+		/*
+		 * The parent doesn't care about events on it's children but
+		 * at least one child thought it did.  We need to run all the
+		 * children and update their d_flags to let them know p_inode
+		 * doesn't care about them any more.
+		 */
+		dget(parent);
+		should_update_children = true;
+	}
+
+	spin_unlock(&dentry->d_lock);
+
+	if (send) {
+		/* we are notifying a parent so come up with the new mask which
+		 * specifies these are events which came from a child. */
+		mask |= FS_EVENT_ON_CHILD;
+
+		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE);
+		dput(parent);
+	}
+
+	if (unlikely(should_update_children)) {
+		__fsnotify_update_child_dentry_flags(p_inode);
+		dput(parent);
+	}
+}
+EXPORT_SYMBOL_GPL(__fsnotify_parent);
+
+/*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
  * out to all of the registered fsnotify_group.  Those groups can then use the
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 8ebcbe8..83b8ec0 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -17,4 +17,9 @@ extern __u32 fsnotify_mask;
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
+/*
+ * update the dentry->d_flags of all of inode's children to indicate if inode cares
+ * about events that happen to its children.
+ */
+extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index cdc1541..a395348 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -131,6 +131,8 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
+
+	__fsnotify_update_child_dentry_flags(inode);
 }
 
 /*
@@ -190,6 +192,19 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	group->ops->freeing_mark(entry, group);
 
 	/*
+	 * __fsnotify_update_child_dentry_flags(inode);
+	 *
+	 * I really want to call that, but we can't, we have no idea if the inode
+	 * still exists the second we drop the entry->lock.
+	 *
+	 * The next time an event arrive to this inode from one of it's children
+	 * __fsnotify_parent will see that the inode doesn't care about it's
+	 * children and will update all of these flags then.  So really this
+	 * is just a lazy update (and could be a perf win...)
+	 */
+
+
+	/*
 	 * it's possible that this group tried to destroy itself, but this
 	 * this mark was simultaneously being freed by inode.  If that's the
 	 * case, we finish freeing the group here.
@@ -323,6 +338,8 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	if (lentry) {
 		ret = -EEXIST;
 		fsnotify_put_mark(lentry);
+	} else {
+		__fsnotify_update_child_dentry_flags(inode);
 	}
 
 	return ret;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1515636..9797800 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -180,10 +180,12 @@ d_iput:		no		no		no       yes
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
 
-#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
+#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched by inotify */
 
 #define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
 
+#define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 3856eb6..6a662ed 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -23,15 +23,31 @@
 static inline void fsnotify_d_instantiate(struct dentry *entry,
 						struct inode *inode)
 {
+	__fsnotify_d_instantiate(entry, inode);
+
 	inotify_d_instantiate(entry, inode);
 }
 
+/* Notify this dentry's parent about a child's events. */
+static inline void fsnotify_parent(struct dentry *dentry, __u32 mask)
+{
+	__fsnotify_parent(dentry, mask);
+
+	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
+}
+
 /*
  * fsnotify_d_move - entry has been moved
  * Called with dcache_lock and entry->d_lock held.
  */
 static inline void fsnotify_d_move(struct dentry *entry)
 {
+	/*
+	 * On move we need to update entry->d_flags to indicate if the new parent
+	 * cares about events from this entry.
+	 */
+	__fsnotify_update_dcache_flags(entry);
+
 	inotify_d_move(entry);
 }
 
@@ -117,7 +133,8 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 	if (isdir)
 		mask |= FS_IN_ISDIR;
 	dnotify_parent(dentry, DN_DELETE);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
+
+	fsnotify_parent(dentry, mask);
 }
 
 /*
@@ -188,9 +205,9 @@ static inline void fsnotify_access(struct dentry *dentry)
 		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_ACCESS);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -206,9 +223,9 @@ static inline void fsnotify_modify(struct dentry *dentry)
 		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_MODIFY);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -223,9 +240,9 @@ static inline void fsnotify_open(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -236,16 +253,15 @@ static inline void fsnotify_close(struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	const char *name = dentry->d_name.name;
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE);
 }
 
@@ -260,9 +276,9 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -311,8 +327,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		if (S_ISDIR(inode->i_mode))
 			in_mask |= FS_IN_ISDIR;
 		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
-		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
-						  dentry->d_name.name);
+
+		fsnotify_parent(dentry, in_mask);
 		fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE);
 	}
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index cad5c4d..13d2dd5 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -46,6 +46,17 @@
 #define FS_DN_RENAME		0x10000000	/* file renamed */
 #define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
 
+/* This inode cares about things that happen to its children.  Always set for
+ * dnotify and inotify. */
+#define FS_EVENT_ON_CHILD	0x08000000
+
+/* This is a list of all events that may get sent to a parernt based on fs event
+ * happening to inodes inside that directory */
+#define FS_EVENTS_POSS_ON_CHILD   (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
+				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
+				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
+				   FS_DELETE)
+
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark_entry;
@@ -183,8 +194,52 @@ struct fsnotify_mark_entry {
 
 /* main fsnotify call to send events */
 extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 
+static inline int fsnotify_inode_watches_children(struct inode *inode)
+{
+	/* FS_EVENT_ON_CHILD is set if the inode may care */
+	if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
+		return 0;
+	/* this inode might care about child events, does it care about the
+	 * specific set of events that can happen on a child? */
+	return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
+}
+
+/*
+ * Update the dentry with a flag indicating the interest of its parent to receive
+ * filesystem events when those events happens to this dentry->d_inode.
+ */
+static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	assert_spin_locked(&dcache_lock);
+	assert_spin_locked(&dentry->d_lock);
+
+	parent = dentry->d_parent;
+	if (fsnotify_inode_watches_children(parent->d_inode))
+		dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
+	else
+		dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+}
+
+/*
+ * fsnotify_d_instantiate - instantiate a dentry for inode
+ * Called with dcache_lock held.
+ */
+static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+{
+	if (!inode)
+		return;
+
+	assert_spin_locked(&dcache_lock);
+
+	spin_lock(&dentry->d_lock);
+	__fsnotify_update_dcache_flags(dentry);
+	spin_unlock(&dentry->d_lock);
+}
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
@@ -230,9 +285,18 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32
 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 {}
 
+static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+{}
+
 static inline void __fsnotify_inode_delete(struct inode *inode)
 {}
 
+static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+{}
+
+static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v0.10.2


From 3c5119c05d624f95f4967d16b38c9624b816bdb9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:33 -0400
Subject: dnotify: reimplement dnotify using fsnotify

Reimplement dnotify using fsnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index ccdb575..96e0c8c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1802,10 +1802,10 @@ F:	drivers/char/epca*
 F:	drivers/char/digi*
 
 DIRECTORY NOTIFICATION (DNOTIFY)
-P:	Stephen Rothwell
-M:	sfr@canb.auug.org.au
+P:	Eric Paris
+M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org
-S:	Supported
+S:	Maintained
 F:	Documentation/filesystems/dnotify.txt
 F:	fs/notify/dnotify/
 F:	include/linux/dnotify.h
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
index 26adf5d..904ff8d 100644
--- a/fs/notify/dnotify/Kconfig
+++ b/fs/notify/dnotify/Kconfig
@@ -1,5 +1,6 @@
 config DNOTIFY
 	bool "Dnotify support"
+	depends on FSNOTIFY
 	default y
 	help
 	  Dnotify is a directory-based per-fd file change notification system
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index b0aa2cd..d9d80f5 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 2000,2001,2002 Stephen Rothwell
  *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * dnotify was largly rewritten to use the new fsnotify infrastructure
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2, or (at your option) any
@@ -21,24 +24,178 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
 
 int dir_notify_enable __read_mostly = 1;
 
-static struct kmem_cache *dn_cache __read_mostly;
+static struct kmem_cache *dnotify_struct_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct fsnotify_group *dnotify_group __read_mostly;
+static DEFINE_MUTEX(dnotify_mark_mutex);
+
+/*
+ * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * is being watched by dnotify.  If multiple userspace applications are watching
+ * the same directory with dnotify their information is chained in dn
+ */
+struct dnotify_mark_entry {
+	struct fsnotify_mark_entry fsn_entry;
+	struct dnotify_struct *dn;
+};
 
-static void redo_inode_mask(struct inode *inode)
+/*
+ * When a process starts or stops watching an inode the set of events which
+ * dnotify cares about for that inode may change.  This function runs the
+ * list of everything receiving dnotify events about this directory and calculates
+ * the set of all those events.  After it updates what dnotify is interested in
+ * it calls the fsnotify function so it can update the set of all events relevant
+ * to this inode.
+ */
+static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
 {
-	unsigned long new_mask;
+	__u32 new_mask, old_mask;
 	struct dnotify_struct *dn;
+	struct dnotify_mark_entry *dnentry  = container_of(entry,
+							   struct dnotify_mark_entry,
+							   fsn_entry);
+
+	assert_spin_locked(&entry->lock);
 
+	old_mask = entry->mask;
 	new_mask = 0;
-	for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
-		new_mask |= dn->dn_mask & ~DN_MULTISHOT;
-	inode->i_dnotify_mask = new_mask;
+	for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
+		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
+	entry->mask = new_mask;
+
+	if (old_mask == new_mask)
+		return;
+
+	if (entry->inode)
+		fsnotify_recalc_inode_mask(entry->inode);
 }
 
+/*
+ * Mains fsnotify call where events are delivered to dnotify.
+ * Find the dnotify mark on the relevant inode, run the list of dnotify structs
+ * on that mark and determine which of them has expressed interest in receiving
+ * events of this type.  When found send the correct process and signal and
+ * destroy the dnotify struct if it was not registered to receive multiple
+ * events.
+ */
+static int dnotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_event *event)
+{
+	struct fsnotify_mark_entry *entry = NULL;
+	struct dnotify_mark_entry *dnentry;
+	struct inode *to_tell;
+	struct dnotify_struct *dn;
+	struct dnotify_struct **prev;
+	struct fown_struct *fown;
+
+	to_tell = event->to_tell;
+
+	spin_lock(&to_tell->i_lock);
+	entry = fsnotify_find_mark_entry(group, to_tell);
+	spin_unlock(&to_tell->i_lock);
+
+	/* unlikely since we alreay passed dnotify_should_send_event() */
+	if (unlikely(!entry))
+		return 0;
+	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+
+	spin_lock(&entry->lock);
+	prev = &dnentry->dn;
+	while ((dn = *prev) != NULL) {
+		if ((dn->dn_mask & event->mask) == 0) {
+			prev = &dn->dn_next;
+			continue;
+		}
+		fown = &dn->dn_filp->f_owner;
+		send_sigio(fown, dn->dn_fd, POLL_MSG);
+		if (dn->dn_mask & FS_DN_MULTISHOT)
+			prev = &dn->dn_next;
+		else {
+			*prev = dn->dn_next;
+			kmem_cache_free(dnotify_struct_cache, dn);
+			dnotify_recalc_inode_mask(entry);
+		}
+	}
+
+	spin_unlock(&entry->lock);
+	fsnotify_put_mark(entry);
+
+	return 0;
+}
+
+/*
+ * Given an inode and mask determine if dnotify would be interested in sending
+ * userspace notification for that pair.
+ */
+static bool dnotify_should_send_event(struct fsnotify_group *group,
+				      struct inode *inode, __u32 mask)
+{
+	struct fsnotify_mark_entry *entry;
+	bool send;
+
+	/* !dir_notify_enable should never get here, don't waste time checking
+	if (!dir_notify_enable)
+		return 0; */
+
+	/* not a dir, dnotify doesn't care */
+	if (!S_ISDIR(inode->i_mode))
+		return false;
+
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+
+	/* no mark means no dnotify watch */
+	if (!entry)
+		return false;
+
+	spin_lock(&entry->lock);
+	send = (mask & entry->mask) ? true : false;
+	spin_unlock(&entry->lock);
+	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
+
+	return send;
+}
+
+static void dnotify_freeing_mark(struct fsnotify_mark_entry *entry,
+				 struct fsnotify_group *group)
+{
+	/* dnotify doesn't care than an inode is on the way out */
+}
+
+static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+	struct dnotify_mark_entry *dnentry = container_of(entry,
+							  struct dnotify_mark_entry,
+							  fsn_entry);
+
+	BUG_ON(dnentry->dn);
+
+	kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+}
+
+static struct fsnotify_ops dnotify_fsnotify_ops = {
+	.handle_event = dnotify_handle_event,
+	.should_send_event = dnotify_should_send_event,
+	.free_group_priv = NULL,
+	.freeing_mark = dnotify_freeing_mark,
+};
+
+/*
+ * Called every time a file is closed.  Looks first for a dnotify mark on the
+ * inode.  If one is found run all of the ->dn entries attached to that
+ * mark for one relevant to this process closing the file and remove that
+ * dnotify_struct.  If that was the last dnotify_struct also remove the
+ * fsnotify_mark_entry.
+ */
 void dnotify_flush(struct file *filp, fl_owner_t id)
 {
+	struct fsnotify_mark_entry *entry;
+	struct dnotify_mark_entry *dnentry;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
 	struct inode *inode;
@@ -46,145 +203,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	inode = filp->f_path.dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return;
+
 	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
+	entry = fsnotify_find_mark_entry(dnotify_group, inode);
+	spin_unlock(&inode->i_lock);
+	if (!entry)
+		return;
+	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+
+	mutex_lock(&dnotify_mark_mutex);
+
+	spin_lock(&entry->lock);
+	prev = &dnentry->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
 			*prev = dn->dn_next;
-			redo_inode_mask(inode);
-			kmem_cache_free(dn_cache, dn);
+			kmem_cache_free(dnotify_struct_cache, dn);
+			dnotify_recalc_inode_mask(entry);
 			break;
 		}
 		prev = &dn->dn_next;
 	}
-	spin_unlock(&inode->i_lock);
+
+	spin_unlock(&entry->lock);
+
+	/* nothing else could have found us thanks to the dnotify_mark_mutex */
+	if (dnentry->dn == NULL)
+		fsnotify_destroy_mark_by_entry(entry);
+
+	fsnotify_recalc_group_mask(dnotify_group);
+
+	mutex_unlock(&dnotify_mark_mutex);
+
+	fsnotify_put_mark(entry);
+}
+
+/* this conversion is done only at watch creation */
+static __u32 convert_arg(unsigned long arg)
+{
+	__u32 new_mask = FS_EVENT_ON_CHILD;
+
+	if (arg & DN_MULTISHOT)
+		new_mask |= FS_DN_MULTISHOT;
+	if (arg & DN_DELETE)
+		new_mask |= (FS_DELETE | FS_MOVED_FROM);
+	if (arg & DN_MODIFY)
+		new_mask |= FS_MODIFY;
+	if (arg & DN_ACCESS)
+		new_mask |= FS_ACCESS;
+	if (arg & DN_ATTRIB)
+		new_mask |= FS_ATTRIB;
+	if (arg & DN_RENAME)
+		new_mask |= FS_DN_RENAME;
+	if (arg & DN_CREATE)
+		new_mask |= (FS_CREATE | FS_MOVED_TO);
+
+	return new_mask;
 }
 
+/*
+ * If multiple processes watch the same inode with dnotify there is only one
+ * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * onto that mark.  This function either attaches the new dnotify_struct onto
+ * that list, or it |= the mask onto an existing dnofiy_struct.
+ */
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+		     fl_owner_t id, int fd, struct file *filp, __u32 mask)
+{
+	struct dnotify_struct *odn;
+
+	odn = dnentry->dn;
+	while (odn != NULL) {
+		/* adding more events to existing dnofiy_struct? */
+		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
+			odn->dn_fd = fd;
+			odn->dn_mask |= mask;
+			return -EEXIST;
+		}
+		odn = odn->dn_next;
+	}
+
+	dn->dn_mask = mask;
+	dn->dn_fd = fd;
+	dn->dn_filp = filp;
+	dn->dn_owner = id;
+	dn->dn_next = dnentry->dn;
+	dnentry->dn = dn;
+
+	return 0;
+}
+
+/*
+ * When a process calls fcntl to attach a dnotify watch to a directory it ends
+ * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
+ * attached to the fsnotify_mark.
+ */
 int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 {
+	struct dnotify_mark_entry *new_dnentry, *dnentry;
+	struct fsnotify_mark_entry *new_entry, *entry;
 	struct dnotify_struct *dn;
-	struct dnotify_struct *odn;
-	struct dnotify_struct **prev;
 	struct inode *inode;
 	fl_owner_t id = current->files;
 	struct file *f;
-	int error = 0;
+	int destroy = 0, error = 0;
+	__u32 mask;
+
+	/* we use these to tell if we need to kfree */
+	new_entry = NULL;
+	dn = NULL;
 
+	if (!dir_notify_enable) {
+		error = -EINVAL;
+		goto out_err;
+	}
+
+	/* a 0 mask means we are explicitly removing the watch */
 	if ((arg & ~DN_MULTISHOT) == 0) {
 		dnotify_flush(filp, id);
-		return 0;
+		error = 0;
+		goto out_err;
 	}
-	if (!dir_notify_enable)
-		return -EINVAL;
+
+	/* dnotify only works on directories */
 	inode = filp->f_path.dentry->d_inode;
-	if (!S_ISDIR(inode->i_mode))
-		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
-	if (dn == NULL)
-		return -ENOMEM;
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((odn = *prev) != NULL) {
-		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
-			odn->dn_fd = fd;
-			odn->dn_mask |= arg;
-			inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-			goto out_free;
-		}
-		prev = &odn->dn_next;
+	if (!S_ISDIR(inode->i_mode)) {
+		error = -ENOTDIR;
+		goto out_err;
 	}
 
-	rcu_read_lock();
-	f = fcheck(fd);
-	rcu_read_unlock();
-	/* we'd lost the race with close(), sod off silently */
-	/* note that inode->i_lock prevents reordering problems
-	 * between accesses to descriptor table and ->i_dnotify */
-	if (f != filp)
-		goto out_free;
+	/* expect most fcntl to add new rather than augment old */
+	dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
+	if (!dn) {
+		error = -ENOMEM;
+		goto out_err;
+	}
 
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-	if (error)
-		goto out_free;
+	/* new fsnotify mark, we expect most fcntl calls to add a new mark */
+	new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
+	if (!new_dnentry) {
+		error = -ENOMEM;
+		goto out_err;
+	}
 
-	dn->dn_mask = arg;
-	dn->dn_fd = fd;
-	dn->dn_filp = filp;
-	dn->dn_owner = id;
-	inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-	dn->dn_next = inode->i_dnotify;
-	inode->i_dnotify = dn;
-	spin_unlock(&inode->i_lock);
-	return 0;
+	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
+	mask = convert_arg(arg);
 
-out_free:
-	spin_unlock(&inode->i_lock);
-	kmem_cache_free(dn_cache, dn);
-	return error;
-}
+	/* set up the new_entry and new_dnentry */
+	new_entry = &new_dnentry->fsn_entry;
+	fsnotify_init_mark(new_entry, dnotify_free_mark);
+	new_entry->mask = mask;
+	new_dnentry->dn = NULL;
 
-void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	struct dnotify_struct *	dn;
-	struct dnotify_struct **prev;
-	struct fown_struct *	fown;
-	int			changed = 0;
+	/* this is needed to prevent the fcntl/close race described below */
+	mutex_lock(&dnotify_mark_mutex);
 
+	/* add the new_entry or find an old one. */
 	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((dn = *prev) != NULL) {
-		if ((dn->dn_mask & event) == 0) {
-			prev = &dn->dn_next;
-			continue;
-		}
-		fown = &dn->dn_filp->f_owner;
-		send_sigio(fown, dn->dn_fd, POLL_MSG);
-		if (dn->dn_mask & DN_MULTISHOT)
-			prev = &dn->dn_next;
-		else {
-			*prev = dn->dn_next;
-			changed = 1;
-			kmem_cache_free(dn_cache, dn);
-		}
-	}
-	if (changed)
-		redo_inode_mask(inode);
+	entry = fsnotify_find_mark_entry(dnotify_group, inode);
 	spin_unlock(&inode->i_lock);
-}
-
-EXPORT_SYMBOL(__inode_dir_notify);
+	if (entry) {
+		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+		spin_lock(&entry->lock);
+	} else {
+		fsnotify_add_mark(new_entry, dnotify_group, inode);
+		spin_lock(&new_entry->lock);
+		entry = new_entry;
+		dnentry = new_dnentry;
+		/* we used new_entry, so don't free it */
+		new_entry = NULL;
+	}
 
-/*
- * This is hopelessly wrong, but unfixable without API changes.  At
- * least it doesn't oops the kernel...
- *
- * To safely access ->d_parent we need to keep d_move away from it.  Use the
- * dentry's d_lock for this.
- */
-void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-	struct dentry *parent;
+	rcu_read_lock();
+	f = fcheck(fd);
+	rcu_read_unlock();
 
-	if (!dir_notify_enable)
-		return;
+	/* if (f != filp) means that we lost a race and another task/thread
+	 * actually closed the fd we are still playing with before we grabbed
+	 * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
+	 * only time we clean up the mark entries we need to get our mark off
+	 * the list. */
+	if (f != filp) {
+		/* if we added ourselves, shoot ourselves, it's possible that
+		 * the flush actually did shoot this entry.  That's fine too
+		 * since multiple calls to destroy_mark is perfectly safe, if
+		 * we found a dnentry already attached to the inode, just sod
+		 * off silently as the flush at close time dealt with it.
+		 */
+		if (dnentry == new_dnentry)
+			destroy = 1;
+		goto out;
+	}
 
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	if (parent->d_inode->i_dnotify_mask & event) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		__inode_dir_notify(parent->d_inode, event);
-		dput(parent);
-	} else {
-		spin_unlock(&dentry->d_lock);
+	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	if (error) {
+		/* if we added, we must shoot */
+		if (dnentry == new_dnentry)
+			destroy = 1;
+		goto out;
 	}
+
+	error = attach_dn(dn, dnentry, id, fd, filp, mask);
+	/* !error means that we attached the dn to the dnentry, so don't free it */
+	if (!error)
+		dn = NULL;
+	/* -EEXIST means that we didn't add this new dn and used an old one.
+	 * that isn't an error (and the unused dn should be freed) */
+	else if (error == -EEXIST)
+		error = 0;
+
+	dnotify_recalc_inode_mask(entry);
+out:
+	spin_unlock(&entry->lock);
+
+	if (destroy)
+		fsnotify_destroy_mark_by_entry(entry);
+
+	fsnotify_recalc_group_mask(dnotify_group);
+
+	mutex_unlock(&dnotify_mark_mutex);
+	fsnotify_put_mark(entry);
+out_err:
+	if (new_entry)
+		fsnotify_put_mark(new_entry);
+	if (dn)
+		kmem_cache_free(dnotify_struct_cache, dn);
+	return error;
 }
-EXPORT_SYMBOL_GPL(dnotify_parent);
 
 static int __init dnotify_init(void)
 {
-	dn_cache = kmem_cache_create("dnotify_cache",
-		sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
+	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
+	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+
+	dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
+					      0, &dnotify_fsnotify_ops);
+	if (IS_ERR(dnotify_group))
+		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
 }
 
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index 102a902..ecc0628 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -10,7 +10,7 @@
 
 struct dnotify_struct {
 	struct dnotify_struct *	dn_next;
-	unsigned long		dn_mask;
+	__u32			dn_mask;
 	int			dn_fd;
 	struct file *		dn_filp;
 	fl_owner_t		dn_owner;
@@ -21,23 +21,18 @@ struct dnotify_struct {
 
 #ifdef CONFIG_DNOTIFY
 
-extern void __inode_dir_notify(struct inode *, unsigned long);
+#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\
+			    FS_MODIFY | FS_MODIFY_CHILD |\
+			    FS_ACCESS | FS_ACCESS_CHILD |\
+			    FS_ATTRIB | FS_ATTRIB_CHILD |\
+			    FS_CREATE | FS_DN_RENAME |\
+			    FS_MOVED_FROM | FS_MOVED_TO)
+
 extern void dnotify_flush(struct file *, fl_owner_t);
 extern int fcntl_dirnotify(int, struct file *, unsigned long);
-extern void dnotify_parent(struct dentry *, unsigned long);
-
-static inline void inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	if (inode->i_dnotify_mask & (event))
-		__inode_dir_notify(inode, event);
-}
 
 #else
 
-static inline void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-}
-
 static inline void dnotify_flush(struct file *filp, fl_owner_t id)
 {
 }
@@ -47,14 +42,6 @@ static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	return -EINVAL;
 }
 
-static inline void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-}
-
-static inline void inode_dir_notify(struct inode *inode, unsigned long event)
-{
-}
-
 #endif /* CONFIG_DNOTIFY */
 
 #endif /* __KERNEL __ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 275b086..323b5ce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -760,11 +760,6 @@ struct inode {
 	struct hlist_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
 #endif
 
-#ifdef CONFIG_DNOTIFY
-	unsigned long		i_dnotify_mask; /* Directory notify events */
-	struct dnotify_struct	*i_dnotify; /* for directory notifications */
-#endif
-
 #ifdef CONFIG_INOTIFY
 	struct list_head	inotify_watches; /* watches on this inode */
 	struct mutex		inotify_mutex;	/* protects the watches list */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 6a662ed..db12d9d 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -74,13 +74,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	__u32 new_dir_mask = 0;
 
 	if (old_dir == new_dir) {
-		inode_dir_notify(old_dir, DN_RENAME);
 		old_dir_mask = FS_DN_RENAME;
-	} else {
-		inode_dir_notify(old_dir, DN_DELETE);
-		old_dir_mask = FS_DELETE;
-		inode_dir_notify(new_dir, DN_CREATE);
-		new_dir_mask = FS_CREATE;
 	}
 
 	if (isdir) {
@@ -132,7 +126,6 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 
 	if (isdir)
 		mask |= FS_IN_ISDIR;
-	dnotify_parent(dentry, DN_DELETE);
 
 	fsnotify_parent(dentry, mask);
 }
@@ -154,7 +147,6 @@ static inline void fsnotify_inoderemove(struct inode *inode)
  */
 static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
-	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
@@ -169,7 +161,6 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
  */
 static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry)
 {
-	inode_dir_notify(dir, DN_CREATE);
 	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
 				  inode);
 	fsnotify_link_count(inode);
@@ -186,7 +177,6 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	__u32 mask = (FS_CREATE | FS_IN_ISDIR);
 	struct inode *d_inode = dentry->d_inode;
 
-	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
@@ -204,7 +194,6 @@ static inline void fsnotify_access(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	dnotify_parent(dentry, DN_ACCESS);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
@@ -222,7 +211,6 @@ static inline void fsnotify_modify(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	dnotify_parent(dentry, DN_MODIFY);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
@@ -289,47 +277,33 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 {
 	struct inode *inode = dentry->d_inode;
-	int dn_mask = 0;
-	__u32 in_mask = 0;
+	__u32 mask = 0;
+
+	if (ia_valid & ATTR_UID)
+		mask |= FS_ATTRIB;
+	if (ia_valid & ATTR_GID)
+		mask |= FS_ATTRIB;
+	if (ia_valid & ATTR_SIZE)
+		mask |= FS_MODIFY;
 
-	if (ia_valid & ATTR_UID) {
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
-	if (ia_valid & ATTR_GID) {
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
-	if (ia_valid & ATTR_SIZE) {
-		in_mask |= FS_MODIFY;
-		dn_mask |= DN_MODIFY;
-	}
 	/* both times implies a utime(s) call */
 	if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
-	{
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	} else if (ia_valid & ATTR_ATIME) {
-		in_mask |= FS_ACCESS;
-		dn_mask |= DN_ACCESS;
-	} else if (ia_valid & ATTR_MTIME) {
-		in_mask |= FS_MODIFY;
-		dn_mask |= DN_MODIFY;
-	}
-	if (ia_valid & ATTR_MODE) {
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
+		mask |= FS_ATTRIB;
+	else if (ia_valid & ATTR_ATIME)
+		mask |= FS_ACCESS;
+	else if (ia_valid & ATTR_MTIME)
+		mask |= FS_MODIFY;
+
+	if (ia_valid & ATTR_MODE)
+		mask |= FS_ATTRIB;
 
-	if (dn_mask)
-		dnotify_parent(dentry, dn_mask);
-	if (in_mask) {
+	if (mask) {
 		if (S_ISDIR(inode->i_mode))
-			in_mask |= FS_IN_ISDIR;
-		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
+			mask |= FS_IN_ISDIR;
+		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
-		fsnotify_parent(dentry, in_mask);
-		fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE);
+		fsnotify_parent(dentry, mask);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 13d2dd5..9ea800e 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -57,6 +57,9 @@
 				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
 				   FS_DELETE)
 
+/* listeners that hard code group numbers near the top */
+#define DNOTIFY_GROUP_NUM	UINT_MAX
+
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark_entry;
-- 
cgit v0.10.2


From a2d8bc6cb4a3024661baf877242f123787d0c054 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:37 -0400
Subject: fsnotify: generic notification queue and waitq

inotify needs to do asyc notification in which event information is stored
on a queue until the listener is ready to receive it.  This patch
implements a generic notification queue for inotify (and later fanotify) to
store events to be sent at a later time.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 83b8ec0..4dc2408 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -13,8 +13,12 @@ extern struct list_head fsnotify_groups;
 /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
 extern __u32 fsnotify_mask;
 
+/* destroy all events sitting in this groups notification queue */
+extern void fsnotify_flush_notify(struct fsnotify_group *group);
+
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
+
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
 /*
@@ -22,4 +26,9 @@ extern void fsnotify_clear_marks_by_inode(struct inode *inode);
  * about events that happen to its children.
  */
 extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
+
+/* allocate and destroy and event holder to attach events to notification/access queues */
+extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
+extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
+
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index a29d2fa..0e16771 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -91,6 +91,9 @@ static void fsnotify_get_group(struct fsnotify_group *group)
  */
 void fsnotify_final_destroy_group(struct fsnotify_group *group)
 {
+	/* clear the notification queue of all events */
+	fsnotify_flush_notify(group);
+
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
 
@@ -214,6 +217,12 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	group->group_num = group_num;
 	group->mask = mask;
 
+	mutex_init(&group->notification_mutex);
+	INIT_LIST_HEAD(&group->notification_list);
+	init_waitqueue_head(&group->notification_waitq);
+	group->q_len = 0;
+	group->max_events = UINT_MAX;
+
 	spin_lock_init(&group->mark_lock);
 	atomic_set(&group->num_marks, 0);
 	INIT_LIST_HEAD(&group->mark_entries);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b8e9a87..dddecc7 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -16,6 +16,21 @@
  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+/*
+ * Basic idea behind the notification queue: An fsnotify group (like inotify)
+ * sends the userspace notification about events asyncronously some time after
+ * the event happened.  When inotify gets an event it will need to add that
+ * event to the group notify queue.  Since a single event might need to be on
+ * multiple group's notification queues we can't add the event directly to each
+ * queue and instead add a small "event_holder" to each queue.  This event_holder
+ * has a pointer back to the original event.  Since the majority of events are
+ * going to end up on one, and only one, notification queue we embed one
+ * event_holder into each event.  This means we have a single allocation instead
+ * of always needing two.  If the embedded event_holder is already in use by
+ * another group a new event_holder (from fsnotify_event_holder_cachep) will be
+ * allocated and used.
+ */
+
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -33,6 +48,21 @@
 #include "fsnotify.h"
 
 static struct kmem_cache *fsnotify_event_cachep;
+static struct kmem_cache *fsnotify_event_holder_cachep;
+/*
+ * This is a magic event we send when the q is too full.  Since it doesn't
+ * hold real event information we just keep one system wide and use it any time
+ * it is needed.  It's refcnt is set 1 at kernel init time and will never
+ * get set to 0 so it will never get 'freed'
+ */
+static struct fsnotify_event q_overflow_event;
+
+/* return true if the notify queue is empty, false otherwise */
+bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+	return list_empty(&group->notification_list) ? true : false;
+}
 
 void fsnotify_get_event(struct fsnotify_event *event)
 {
@@ -52,19 +82,176 @@ void fsnotify_put_event(struct fsnotify_event *event)
 	}
 }
 
+struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
+{
+	return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
+}
+
+void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
+{
+	kmem_cache_free(fsnotify_event_holder_cachep, holder);
+}
+
+/*
+ * check if 2 events contain the same information.
+ */
+static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	if ((old->mask == new->mask) &&
+	    (old->to_tell == new->to_tell) &&
+	    (old->data_type == new->data_type)) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_INODE):
+			if (old->inode == new->inode)
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_PATH):
+			if ((old->path.mnt == new->path.mnt) &&
+			    (old->path.dentry == new->path.dentry))
+				return true;
+		case (FSNOTIFY_EVENT_NONE):
+			return true;
+		};
+	}
+	return false;
+}
+
 /*
- * Allocate a new event which will be sent to each group's handle_event function
- * if the group was interested in this particular event.
+ * Add an event to the group notification queue.  The group can later pull this
+ * event off the queue to deal with.  If the event is successfully added to the
+ * group's notification queue, a reference is taken on event.
  */
-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-					     void *data, int data_type)
+int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_event_holder *holder = NULL;
+	struct list_head *list = &group->notification_list;
+	struct fsnotify_event_holder *last_holder;
+	struct fsnotify_event *last_event;
+
+	/*
+	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
+	 * Check if we expect to be able to use that holder.  If not alloc a new
+	 * holder.
+	 * For the overflow event it's possible that something will use the in
+	 * event holder before we get the lock so we may need to jump back and
+	 * alloc a new holder, this can't happen for most events...
+	 */
+	if (!list_empty(&event->holder.event_list)) {
+alloc_holder:
+		holder = fsnotify_alloc_event_holder();
+		if (!holder)
+			return -ENOMEM;
+	}
+
+	mutex_lock(&group->notification_mutex);
+
+	if (group->q_len >= group->max_events)
+		event = &q_overflow_event;
+
+	spin_lock(&event->lock);
+
+	if (list_empty(&event->holder.event_list)) {
+		if (unlikely(holder))
+			fsnotify_destroy_event_holder(holder);
+		holder = &event->holder;
+	} else if (unlikely(!holder)) {
+		/* between the time we checked above and got the lock the in
+		 * event holder was used, go back and get a new one */
+		spin_unlock(&event->lock);
+		mutex_unlock(&group->notification_mutex);
+		goto alloc_holder;
+	}
+
+	if (!list_empty(list)) {
+		last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
+		last_event = last_holder->event;
+		if (event_compare(last_event, event)) {
+			spin_unlock(&event->lock);
+			mutex_unlock(&group->notification_mutex);
+			if (holder != &event->holder)
+				fsnotify_destroy_event_holder(holder);
+			return 0;
+		}
+	}
+
+	group->q_len++;
+	holder->event = event;
+
+	fsnotify_get_event(event);
+	list_add_tail(&holder->event_list, list);
+	spin_unlock(&event->lock);
+	mutex_unlock(&group->notification_mutex);
+
+	wake_up(&group->notification_waitq);
+	return 0;
+}
+
+/*
+ * Remove and return the first event from the notification list.  There is a
+ * reference held on this event since it was on the list.  It is the responsibility
+ * of the caller to drop this reference.
+ */
+struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
 {
 	struct fsnotify_event *event;
+	struct fsnotify_event_holder *holder;
 
-	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
-	if (!event)
-		return NULL;
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
+	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
+
+	event = holder->event;
+
+	spin_lock(&event->lock);
+	holder->event = NULL;
+	list_del_init(&holder->event_list);
+	spin_unlock(&event->lock);
+
+	/* event == holder means we are referenced through the in event holder */
+	if (holder != &event->holder)
+		fsnotify_destroy_event_holder(holder);
+
+	group->q_len--;
+
+	return event;
+}
+
+/*
+ * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ */
+struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+	struct fsnotify_event_holder *holder;
+
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
+	event = holder->event;
+
+	return event;
+}
+
+/*
+ * Called when a group is being torn down to clean up any outstanding
+ * event notifications.
+ */
+void fsnotify_flush_notify(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+
+	mutex_lock(&group->notification_mutex);
+	while (!fsnotify_notify_queue_is_empty(group)) {
+		event = fsnotify_remove_notify_event(group);
+		fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
+	}
+	mutex_unlock(&group->notification_mutex);
+}
+
+static void initialize_event(struct fsnotify_event *event)
+{
+	event->holder.event = NULL;
+	INIT_LIST_HEAD(&event->holder.event_list);
 	atomic_set(&event->refcnt, 1);
 
 	spin_lock_init(&event->lock);
@@ -72,7 +259,32 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 	event->path.dentry = NULL;
 	event->path.mnt = NULL;
 	event->inode = NULL;
+	event->data_type = FSNOTIFY_EVENT_NONE;
 
+	event->to_tell = NULL;
+}
+
+/*
+ * fsnotify_create_event - Allocate a new event which will be sent to each
+ * group's handle_event function if the group was interested in this
+ * particular event.
+ *
+ * @to_tell the inode which is supposed to receive the event (sometimes a
+ *	parent of the inode to which the event happened.
+ * @mask what actually happened.
+ * @data pointer to the object which was actually affected
+ * @data_type flag indication if the data is a file, path, inode, nothing...
+ */
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+					     void *data, int data_type)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	initialize_event(event);
 	event->to_tell = to_tell;
 
 	switch (data_type) {
@@ -114,6 +326,10 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 __init int fsnotify_notification_init(void)
 {
 	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
+	fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
+
+	initialize_event(&q_overflow_event);
+	q_overflow_event.mask = FS_Q_OVERFLOW;
 
 	return 0;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9ea800e..15f8f82 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -119,6 +119,13 @@ struct fsnotify_group {
 
 	const struct fsnotify_ops *ops;	/* how this group handles things */
 
+	/* needed to send notification to userspace */
+	struct mutex notification_mutex;	/* protect the notification_list */
+	struct list_head notification_list;	/* list of event_holder this group needs to send to userspace */
+	wait_queue_head_t notification_waitq;	/* read() on the notification file blocks on this waitq */
+	unsigned int q_len;			/* events on the queue */
+	unsigned int max_events;		/* maximum events allowed on the list */
+
 	/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
 	spinlock_t mark_lock;		/* protect mark_entries list */
 	atomic_t num_marks;		/* 1 for each mark entry and 1 for not being
@@ -136,11 +143,32 @@ struct fsnotify_group {
 };
 
 /*
+ * A single event can be queued in multiple group->notification_lists.
+ *
+ * each group->notification_list will point to an event_holder which in turns points
+ * to the actual event that needs to be sent to userspace.
+ *
+ * Seemed cheaper to create a refcnt'd event and a small holder for every group
+ * than create a different event for every group
+ *
+ */
+struct fsnotify_event_holder {
+	struct fsnotify_event *event;
+	struct list_head event_list;
+};
+
+/*
  * all of the information about the original object we want to now send to
  * a group.  If you want to carry more info from the accessing task to the
  * listener this structure is where you need to be adding fields.
  */
 struct fsnotify_event {
+	/*
+	 * If we create an event we are also likely going to need a holder
+	 * to link to a group.  So embed one holder in the event.  Means only
+	 * one allocation for the common case where we only have one group
+	 */
+	struct fsnotify_event_holder holder;
 	spinlock_t lock;	/* protection for the associated event_holder and private_list */
 	/* to_tell may ONLY be dereferenced during handle_event(). */
 	struct inode *to_tell;	/* either the inode the event happened to or its parent */
@@ -264,6 +292,15 @@ extern void fsnotify_put_event(struct fsnotify_event *event);
 extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
 									struct fsnotify_event *event);
 
+/* attach the event to the group notification queue */
+extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event);
+/* true if the group notification queue is empty */
+extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
+/* return, but do not dequeue the first event on the notification queue */
+extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+/* reutnr AND dequeue the first event on the notification queue */
+extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+
 /* functions used to manipulate the marks attached to inodes */
 
 /* run all marks associated with an inode and update inode->i_fsnotify_mask */
-- 
cgit v0.10.2


From 62ffe5dfba056f7ba81d710fee9f28c58a42fdd6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:43 -0400
Subject: fsnotify: include pathnames with entries when possible

When inotify wants to send events to a directory about a child it includes
the name of the original file.  This patch collects that filename and makes
it available for notification.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 7fc7600..675129f 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -114,7 +114,8 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
-		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE);
+		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+			 dentry->d_name.name);
 		dput(parent);
 	}
 
@@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent);
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name)
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
@@ -156,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 			if (!group->ops->should_send_event(group, to_tell, mask))
 				continue;
 			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data, data_is);
+				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name);
 				/* shit, we OOM'd and now we can't tell, maybe
 				 * someday someone else will want to do something
 				 * here */
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index dddecc7..c69b18b 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -78,6 +78,7 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		if (event->data_type == FSNOTIFY_EVENT_PATH)
 			path_put(&event->path);
 
+		kfree(event->file_name);
 		kmem_cache_free(fsnotify_event_cachep, event);
 	}
 }
@@ -262,6 +263,9 @@ static void initialize_event(struct fsnotify_event *event)
 	event->data_type = FSNOTIFY_EVENT_NONE;
 
 	event->to_tell = NULL;
+
+	event->file_name = NULL;
+	event->name_len = 0;
 }
 
 /*
@@ -274,9 +278,10 @@ static void initialize_event(struct fsnotify_event *event)
  * @mask what actually happened.
  * @data pointer to the object which was actually affected
  * @data_type flag indication if the data is a file, path, inode, nothing...
+ * @name the filename, if available
  */
 struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-					     void *data, int data_type)
+					     void *data, int data_type, const char *name)
 {
 	struct fsnotify_event *event;
 
@@ -285,6 +290,15 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		return NULL;
 
 	initialize_event(event);
+
+	if (name) {
+		event->file_name = kstrdup(name, GFP_KERNEL);
+		if (!event->file_name) {
+			kmem_cache_free(fsnotify_event_cachep, event);
+			return NULL;
+		}
+		event->name_len = strlen(event->file_name);
+	}
 	event->to_tell = to_tell;
 
 	switch (data_type) {
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index db12d9d..180740e 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode)
 {
 	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
 
-	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -91,8 +91,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
 				  source);
 
-	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE);
-	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE);
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name);
 
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
@@ -104,7 +104,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
-		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
@@ -138,7 +138,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
 
-	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL);
 	__fsnotify_inode_delete(inode);
 }
 
@@ -151,7 +151,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
 }
 
 /*
@@ -166,7 +166,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
 
-	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name);
 }
 
 /*
@@ -180,7 +180,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
 }
 
 /*
@@ -197,7 +197,7 @@ static inline void fsnotify_access(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -214,7 +214,7 @@ static inline void fsnotify_modify(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -231,7 +231,7 @@ static inline void fsnotify_open(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -250,7 +250,7 @@ static inline void fsnotify_close(struct file *file)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE);
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL);
 }
 
 /*
@@ -267,7 +267,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -303,7 +303,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 		fsnotify_parent(dentry, mask);
-		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 15f8f82..52692f4 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -192,6 +192,9 @@ struct fsnotify_event {
 	int data_type;		/* which of the above union we have */
 	atomic_t refcnt;	/* how many groups still are using/need to send this event */
 	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
+
+	char *file_name;
+	size_t name_len;
 };
 
 /*
@@ -224,7 +227,7 @@ struct fsnotify_mark_entry {
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
-extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name);
 extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 
@@ -319,10 +322,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-						    void *data, int data_is);
+						    void *data, int data_is, const char *name);
+
 #else
 
-static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+			    const char *name);
 {}
 
 static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
-- 
cgit v0.10.2


From 47882c6f51e8ef41fbbe2bbb746a1ea3228dd7ca Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:47 -0400
Subject: fsnotify: add correlations between events

As part of the standard inotify events it includes a correlation cookie
between two dentry move operations.  This patch includes the same behaviour
in fsnotify events.  It is needed so that inotify userspace can be
implemented on top of fsnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 675129f..f11d75f 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -115,7 +115,7 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 		mask |= FS_EVENT_ON_CHILD;
 
 		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-			 dentry->d_name.name);
+			 dentry->d_name.name, 0);
 		dput(parent);
 	}
 
@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent);
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name)
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
@@ -157,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 			if (!group->ops->should_send_event(group, to_tell, mask))
 				continue;
 			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name);
+				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie);
 				/* shit, we OOM'd and now we can't tell, maybe
 				 * someday someone else will want to do something
 				 * here */
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index c69b18b..346f6e5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/mutex.h>
 #include <linux/namei.h>
@@ -56,6 +57,17 @@ static struct kmem_cache *fsnotify_event_holder_cachep;
  * get set to 0 so it will never get 'freed'
  */
 static struct fsnotify_event q_overflow_event;
+static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
+
+/**
+ * fsnotify_get_cookie - return a unique cookie for use in synchronizing events.
+ * Called from fsnotify_move, which is inlined into filesystem modules.
+ */
+u32 fsnotify_get_cookie(void)
+{
+	return atomic_inc_return(&fsnotify_sync_cookie);
+}
+EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
 
 /* return true if the notify queue is empty, false otherwise */
 bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
@@ -266,6 +278,8 @@ static void initialize_event(struct fsnotify_event *event)
 
 	event->file_name = NULL;
 	event->name_len = 0;
+
+	event->sync_cookie = 0;
 }
 
 /*
@@ -280,8 +294,8 @@ static void initialize_event(struct fsnotify_event *event)
  * @data_type flag indication if the data is a file, path, inode, nothing...
  * @name the filename, if available
  */
-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-					     void *data, int data_type, const char *name)
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
+					     int data_type, const char *name, u32 cookie)
 {
 	struct fsnotify_event *event;
 
@@ -299,6 +313,8 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		}
 		event->name_len = strlen(event->file_name);
 	}
+
+	event->sync_cookie = cookie;
 	event->to_tell = to_tell;
 
 	switch (data_type) {
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 180740e..c25b39d 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode)
 {
 	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
 
-	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -69,7 +69,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 				 int isdir, struct inode *target, struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
-	u32 cookie = inotify_get_cookie();
+	u32 in_cookie = inotify_get_cookie();
+	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = 0;
 	__u32 new_dir_mask = 0;
 
@@ -86,13 +87,13 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	old_dir_mask |= FS_MOVED_FROM;
 	new_dir_mask |= FS_MOVED_TO;
 
-	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name,
+	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
 				  source);
-	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
+	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
 				  source);
 
-	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name);
-	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name);
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie);
 
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
@@ -104,7 +105,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
-		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
@@ -138,7 +139,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
 
-	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	__fsnotify_inode_delete(inode);
 }
 
@@ -151,7 +152,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
 
 /*
@@ -166,7 +167,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
 
-	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name);
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
 }
 
 /*
@@ -180,7 +181,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
 
 /*
@@ -197,7 +198,7 @@ static inline void fsnotify_access(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -214,7 +215,7 @@ static inline void fsnotify_modify(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -231,7 +232,7 @@ static inline void fsnotify_open(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -250,7 +251,7 @@ static inline void fsnotify_close(struct file *file)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL);
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
 }
 
 /*
@@ -267,7 +268,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -303,7 +304,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 		fsnotify_parent(dentry, mask);
-		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 52692f4..b78b557 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -193,6 +193,7 @@ struct fsnotify_event {
 	atomic_t refcnt;	/* how many groups still are using/need to send this event */
 	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
 
+	u32 sync_cookie;	/* used to corrolate events, namely inotify mv events */
 	char *file_name;
 	size_t name_len;
 };
@@ -227,9 +228,11 @@ struct fsnotify_mark_entry {
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
-extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name);
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+		     const char *name, u32 cookie);
 extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
+extern u32 fsnotify_get_cookie(void);
 
 static inline int fsnotify_inode_watches_children(struct inode *inode)
 {
@@ -322,12 +325,13 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-						    void *data, int data_is, const char *name);
+						    void *data, int data_is, const char *name,
+						    u32 cookie);
 
 #else
 
 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
-			    const char *name);
+			    const char *name, u32 cookie)
 {}
 
 static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
@@ -342,6 +346,11 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
 {}
 
+static inline u32 fsnotify_get_cookie(void)
+{
+	return 0;
+}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v0.10.2


From e4aff117368cfdd3567ee41844d216d079b55173 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:50 -0400
Subject: fsnotify: allow groups to add private data to events

inotify needs per group information attached to events.  This patch allows
groups to attach private information and implements a callback so that
information can be freed when an event is being destroyed.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index d9d80f5..12f9e6b 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -183,6 +183,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
 	.should_send_event = dnotify_should_send_event,
 	.free_group_priv = NULL,
 	.freeing_mark = dnotify_freeing_mark,
+	.free_event_priv = NULL,
 };
 
 /*
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 346f6e5..959b73e 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -90,6 +90,8 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		if (event->data_type == FSNOTIFY_EVENT_PATH)
 			path_put(&event->path);
 
+		BUG_ON(!list_empty(&event->private_data_list));
+
 		kfree(event->file_name);
 		kmem_cache_free(fsnotify_event_cachep, event);
 	}
@@ -106,7 +108,29 @@ void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
 }
 
 /*
- * check if 2 events contain the same information.
+ * Find the private data that the group previously attached to this event when
+ * the group added the event to the notification queue (fsnotify_add_notify_event)
+ */
+struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_event_private_data *lpriv;
+	struct fsnotify_event_private_data *priv = NULL;
+
+	assert_spin_locked(&event->lock);
+
+	list_for_each_entry(lpriv, &event->private_data_list, event_list) {
+		if (lpriv->group == group) {
+			priv = lpriv;
+			list_del(&priv->event_list);
+			break;
+		}
+	}
+	return priv;
+}
+
+/*
+ * Check if 2 events contain the same information.  We do not compare private data
+ * but at this moment that isn't a problem for any know fsnotify listeners.
  */
 static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
 {
@@ -134,13 +158,17 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
  * event off the queue to deal with.  If the event is successfully added to the
  * group's notification queue, a reference is taken on event.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event)
+int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+			      struct fsnotify_event_private_data *priv)
 {
 	struct fsnotify_event_holder *holder = NULL;
 	struct list_head *list = &group->notification_list;
 	struct fsnotify_event_holder *last_holder;
 	struct fsnotify_event *last_event;
 
+	/* easy to tell if priv was attached to the event */
+	INIT_LIST_HEAD(&priv->event_list);
+
 	/*
 	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
 	 * Check if we expect to be able to use that holder.  If not alloc a new
@@ -158,8 +186,11 @@ alloc_holder:
 
 	mutex_lock(&group->notification_mutex);
 
-	if (group->q_len >= group->max_events)
+	if (group->q_len >= group->max_events) {
 		event = &q_overflow_event;
+		/* sorry, no private data on the overflow event */
+		priv = NULL;
+	}
 
 	spin_lock(&event->lock);
 
@@ -183,7 +214,7 @@ alloc_holder:
 			mutex_unlock(&group->notification_mutex);
 			if (holder != &event->holder)
 				fsnotify_destroy_event_holder(holder);
-			return 0;
+			return -EEXIST;
 		}
 	}
 
@@ -192,6 +223,8 @@ alloc_holder:
 
 	fsnotify_get_event(event);
 	list_add_tail(&holder->event_list, list);
+	if (priv)
+		list_add_tail(&priv->event_list, &event->private_data_list);
 	spin_unlock(&event->lock);
 	mutex_unlock(&group->notification_mutex);
 
@@ -252,10 +285,19 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
 void fsnotify_flush_notify(struct fsnotify_group *group)
 {
 	struct fsnotify_event *event;
+	struct fsnotify_event_private_data *priv;
 
 	mutex_lock(&group->notification_mutex);
 	while (!fsnotify_notify_queue_is_empty(group)) {
 		event = fsnotify_remove_notify_event(group);
+		/* if they don't implement free_event_priv they better not have attached any */
+		if (group->ops->free_event_priv) {
+			spin_lock(&event->lock);
+			priv = fsnotify_remove_priv_from_event(group, event);
+			spin_unlock(&event->lock);
+			if (priv)
+				group->ops->free_event_priv(priv);
+		}
 		fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
 	}
 	mutex_unlock(&group->notification_mutex);
@@ -274,6 +316,8 @@ static void initialize_event(struct fsnotify_event *event)
 	event->inode = NULL;
 	event->data_type = FSNOTIFY_EVENT_NONE;
 
+	INIT_LIST_HEAD(&event->private_data_list);
+
 	event->to_tell = NULL;
 
 	event->file_name = NULL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b78b557..efdf9e4 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -63,6 +63,7 @@
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark_entry;
+struct fsnotify_event_private_data;
 
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
@@ -81,6 +82,7 @@ struct fsnotify_ops {
 	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
 	void (*free_group_priv)(struct fsnotify_group *group);
 	void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+	void (*free_event_priv)(struct fsnotify_event_private_data *priv);
 };
 
 /*
@@ -158,6 +160,15 @@ struct fsnotify_event_holder {
 };
 
 /*
+ * Inotify needs to tack data onto an event.  This struct lets us later find the
+ * correct private data of the correct group.
+ */
+struct fsnotify_event_private_data {
+	struct fsnotify_group *group;
+	struct list_head event_list;
+};
+
+/*
  * all of the information about the original object we want to now send to
  * a group.  If you want to carry more info from the accessing task to the
  * listener this structure is where you need to be adding fields.
@@ -196,6 +207,8 @@ struct fsnotify_event {
 	u32 sync_cookie;	/* used to corrolate events, namely inotify mv events */
 	char *file_name;
 	size_t name_len;
+
+	struct list_head private_data_list;	/* groups can store private data here */
 };
 
 /*
@@ -294,17 +307,18 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-/* find private data previously attached to an event */
-extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
-									struct fsnotify_event *event);
+/* find private data previously attached to an event and unlink it */
+extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
+									   struct fsnotify_event *event);
 
 /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event);
+extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+				     struct fsnotify_event_private_data *priv);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
 extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
-/* reutnr AND dequeue the first event on the notification queue */
+/* return AND dequeue the first event on the notification queue */
 extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
 
 /* functions used to manipulate the marks attached to inodes */
-- 
cgit v0.10.2


From 1ef5f13c6c8acd3fd10db9f1743f3b4cf30a4abb Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:54 -0400
Subject: fsnotify: fsnotify marks on inodes pin them in core

This patch pins any inodes with an fsnotify mark in core.  The idea is that
as soon as the mark is removed from the inode->fsnotify_mark_entries list
the inode will be iput.  In reality is doesn't quite work exactly this way.
The igrab will happen when the mark is added to an inode, but the iput will
happen when the inode pointer is NULL'd inside the mark.

It's possible that 2 racing things will try to remove the mark from
different directions.  One may try to remove the mark because of an
explicit request and one might try to remove it because the inode was
deleted.  It's possible that the removal because of inode deletion will
remove the mark from the inode's list, but the removal by explicit request
will actually set entry->inode == NULL; and call the iput.  This is safe.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index a395348..282150f 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -204,6 +204,8 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	 */
 
 
+	iput(inode);
+
 	/*
 	 * it's possible that this group tried to destroy itself, but this
 	 * this mark was simultaneously being freed by inode.  If that's the
@@ -306,6 +308,10 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	struct fsnotify_mark_entry *lentry;
 	int ret = 0;
 
+	inode = igrab(inode);
+	if (unlikely(!inode))
+		return -EINVAL;
+
 	/*
 	 * LOCKING ORDER!!!!
 	 * entry->lock
@@ -337,6 +343,7 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 
 	if (lentry) {
 		ret = -EEXIST;
+		iput(inode);
 		fsnotify_put_mark(lentry);
 	} else {
 		__fsnotify_update_child_dentry_flags(inode);
-- 
cgit v0.10.2


From 164bc6195139047faaf5ada1278332e99494803b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:58 -0400
Subject: fsnotify: handle filesystem unmounts with fsnotify marks

When an fs is unmounted with an fsnotify mark entry attached to one of its
inodes we need to destroy that mark entry and we also (like inotify) send
an unmount event.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/fs/inode.c b/fs/inode.c
index 54c63ce..ca33701 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -407,6 +407,7 @@ int invalidate_inodes(struct super_block *sb)
 	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
+	fsnotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 282150f..0a499d2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -89,6 +89,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/writeback.h> /* for inode_lock */
 
 #include <asm/atomic.h>
 
@@ -351,3 +352,74 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 
 	return ret;
 }
+
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
+ * @list: list of inodes being unmounted (sb->s_inodes)
+ *
+ * Called with inode_lock held, protecting the unmounting super block's list
+ * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * We temporarily drop inode_lock, however, and CAN block.
+ */
+void fsnotify_unmount_inodes(struct list_head *list)
+{
+	struct inode *inode, *next_i, *need_iput = NULL;
+
+	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+		struct inode *need_iput_tmp;
+
+		/*
+		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+		 * I_WILL_FREE, or I_NEW which is fine because by that point
+		 * the inode cannot have any associated watches.
+		 */
+		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+			continue;
+
+		/*
+		 * If i_count is zero, the inode cannot have any watches and
+		 * doing an __iget/iput with MS_ACTIVE clear would actually
+		 * evict all inodes with zero i_count from icache which is
+		 * unnecessarily violent and may in fact be illegal to do.
+		 */
+		if (!atomic_read(&inode->i_count))
+			continue;
+
+		need_iput_tmp = need_iput;
+		need_iput = NULL;
+
+		/* In case fsnotify_inode_delete() drops a reference. */
+		if (inode != need_iput_tmp)
+			__iget(inode);
+		else
+			need_iput_tmp = NULL;
+
+		/* In case the dropping of a reference would nuke next_i. */
+		if ((&next_i->i_sb_list != list) &&
+		    atomic_read(&next_i->i_count) &&
+		    !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+			__iget(next_i);
+			need_iput = next_i;
+		}
+
+		/*
+		 * We can safely drop inode_lock here because we hold
+		 * references on both inode and next_i.  Also no new inodes
+		 * will be added since the umount has begun.  Finally,
+		 * iprune_mutex keeps shrink_icache_memory() away.
+		 */
+		spin_unlock(&inode_lock);
+
+		if (need_iput_tmp)
+			iput(need_iput_tmp);
+
+		/* for each watch, send FS_UNMOUNT and then remove it */
+		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+
+		fsnotify_inode_delete(inode);
+
+		iput(inode);
+
+		spin_lock(&inode_lock);
+	}
+}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index efdf9e4..d2c0ee3 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -336,6 +336,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
 extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
 extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
+extern void fsnotify_unmount_inodes(struct list_head *list);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
@@ -365,6 +366,9 @@ static inline u32 fsnotify_get_cookie(void)
 	return 0;
 }
 
+static inline void fsnotify_unmount_inodes(struct list_head *list)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v0.10.2


From 63c882a05416e18de6fb59f7dd6da48f3bbe8273 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:02:01 -0400
Subject: inotify: reimplement inotify using fsnotify

Reimplement inotify_user using fsnotify.  This should be feature for feature
exactly the same as the original inotify_user.  This does not make any changes
to the in kernel inotify feature used by audit.  Those patches (and the eventual
removal of in kernel inotify) will come after the new inotify_user proves to be
working correctly.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index 96e0c8c..e697b67 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2858,6 +2858,8 @@ P:	John McCutchan
 M:	john@johnmccutchan.com
 P:	Robert Love
 M:	rlove@rlove.org
+P:	Eric Paris
+M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	Documentation/filesystems/inotify.txt
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 4467928..5356884 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,26 +1,30 @@
 config INOTIFY
 	bool "Inotify file change notification support"
-	default y
+	default n
 	---help---
-	  Say Y here to enable inotify support.  Inotify is a file change
-	  notification system and a replacement for dnotify.  Inotify fixes
-	  numerous shortcomings in dnotify and introduces several new features
-	  including multiple file events, one-shot support, and unmount
-	  notification.
+	  Say Y here to enable legacy in kernel inotify support.  Inotify is a
+	  file change notification system.  It is a replacement for dnotify.
+	  This option only provides the legacy inotify in kernel API.  There
+	  are no in tree kernel users of this interface since it is deprecated.
+	  You only need this if you are loading an out of tree kernel module
+	  that uses inotify.
 
 	  For more information, see <file:Documentation/filesystems/inotify.txt>
 
-	  If unsure, say Y.
+	  If unsure, say N.
 
 config INOTIFY_USER
 	bool "Inotify support for userspace"
-	depends on INOTIFY
+	depends on FSNOTIFY
 	default y
 	---help---
 	  Say Y here to enable inotify support for userspace, including the
 	  associated system calls.  Inotify allows monitoring of both files and
 	  directories via a single open fd.  Events are read from the file
 	  descriptor, which is also select()- and poll()-able.
+	  Inotify fixes numerous shortcomings in dnotify and introduces several
+	  new features including multiple file events, one-shot support, and
+	  unmount notification.
 
 	  For more information, see <file:Documentation/filesystems/inotify.txt>
 
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index e290f3b..9438281 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INOTIFY)		+= inotify.o
-obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
+obj-$(CONFIG_INOTIFY_USER)	+= inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
new file mode 100644
index 0000000..ea2605a
--- /dev/null
+++ b/fs/notify/inotify/inotify.h
@@ -0,0 +1,21 @@
+#include <linux/fsnotify_backend.h>
+#include <linux/inotify.h>
+#include <linux/slab.h> /* struct kmem_cache */
+
+extern struct kmem_cache *event_priv_cachep;
+
+struct inotify_event_private_data {
+	struct fsnotify_event_private_data fsnotify_event_priv_data;
+	int wd;
+};
+
+struct inotify_inode_mark_entry {
+	/* fsnotify_mark_entry MUST be the first thing */
+	struct fsnotify_mark_entry fsn_entry;
+	int wd;
+};
+
+extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
+
+extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
new file mode 100644
index 0000000..160da54
--- /dev/null
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -0,0 +1,137 @@
+/*
+ * fs/inotify_user.c - inotify support for userspace
+ *
+ * Authors:
+ *	John McCutchan	<ttb@tentacle.dhs.org>
+ *	Robert Love	<rml@novell.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * inotify was largely rewriten to make use of the fsnotify infrastructure
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/fs.h> /* struct inode */
+#include <linux/fsnotify_backend.h>
+#include <linux/inotify.h>
+#include <linux/path.h> /* struct path */
+#include <linux/slab.h> /* kmem_* */
+#include <linux/types.h>
+
+#include "inotify.h"
+
+static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_mark_entry *entry;
+	struct inotify_inode_mark_entry *ientry;
+	struct inode *to_tell;
+	struct inotify_event_private_data *event_priv;
+	struct fsnotify_event_private_data *fsn_event_priv;
+	int wd, ret;
+
+	to_tell = event->to_tell;
+
+	spin_lock(&to_tell->i_lock);
+	entry = fsnotify_find_mark_entry(group, to_tell);
+	spin_unlock(&to_tell->i_lock);
+	/* race with watch removal?  We already passes should_send */
+	if (unlikely(!entry))
+		return 0;
+	ientry = container_of(entry, struct inotify_inode_mark_entry,
+			      fsn_entry);
+	wd = ientry->wd;
+
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	if (unlikely(!event_priv))
+		return -ENOMEM;
+
+	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
+
+	fsn_event_priv->group = group;
+	event_priv->wd = wd;
+
+	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
+	/* EEXIST is not an error */
+	if (ret == -EEXIST)
+		ret = 0;
+
+	/* did event_priv get attached? */
+	if (list_empty(&fsn_event_priv->event_list))
+		inotify_free_event_priv(fsn_event_priv);
+
+	/*
+	 * If we hold the entry until after the event is on the queue
+	 * IN_IGNORED won't be able to pass this event in the queue
+	 */
+	fsnotify_put_mark(entry);
+
+	return ret;
+}
+
+static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+{
+	inotify_destroy_mark_entry(entry, group);
+}
+
+static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
+{
+	struct fsnotify_mark_entry *entry;
+	bool send;
+
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+	if (!entry)
+		return false;
+
+	send = (entry->mask & mask);
+
+	/* find took a reference */
+	fsnotify_put_mark(entry);
+
+	return send;
+}
+
+static int idr_callback(int id, void *p, void *data)
+{
+	BUG();
+	return 0;
+}
+
+static void inotify_free_group_priv(struct fsnotify_group *group)
+{
+	/* ideally the idr is empty and we won't hit the BUG in teh callback */
+	idr_for_each(&group->inotify_data.idr, idr_callback, NULL);
+	idr_remove_all(&group->inotify_data.idr);
+	idr_destroy(&group->inotify_data.idr);
+}
+
+void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
+{
+	struct inotify_event_private_data *event_priv;
+
+
+	event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
+				  fsnotify_event_priv_data);
+
+	kmem_cache_free(event_priv_cachep, event_priv);
+}
+
+const struct fsnotify_ops inotify_fsnotify_ops = {
+	.handle_event = inotify_handle_event,
+	.should_send_event = inotify_should_send_event,
+	.free_group_priv = inotify_free_group_priv,
+	.free_event_priv = inotify_free_event_priv,
+	.freeing_mark = inotify_freeing_mark,
+};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1634319..982a412 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -8,6 +8,9 @@
  * Copyright (C) 2005 John McCutchan
  * Copyright 2006 Hewlett-Packard Development Company, L.P.
  *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * inotify was largely rewriten to make use of the fsnotify infrastructure
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2, or (at your option) any
@@ -19,94 +22,48 @@
  * General Public License for more details.
  */
 
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/list.h>
+#include <linux/fs.h> /* struct inode */
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h> /* module_init */
 #include <linux/inotify.h>
+#include <linux/kernel.h> /* roundup() */
+#include <linux/magic.h> /* superblock magic number */
+#include <linux/mount.h> /* mntget */
+#include <linux/namei.h> /* LOOKUP_FOLLOW */
+#include <linux/path.h> /* struct path */
+#include <linux/sched.h> /* struct user */
+#include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
-#include <linux/magic.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
 
-#include <asm/ioctls.h>
+#include "inotify.h"
 
-static struct kmem_cache *watch_cachep __read_mostly;
-static struct kmem_cache *event_cachep __read_mostly;
+#include <asm/ioctls.h>
 
 static struct vfsmount *inotify_mnt __read_mostly;
 
+/* this just sits here and wastes global memory.  used to just pad userspace messages with zeros */
+static struct inotify_event nul_inotify_event;
+
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
-static int inotify_max_user_watches __read_mostly;
 static int inotify_max_queued_events __read_mostly;
+int inotify_max_user_watches __read_mostly;
 
-/*
- * Lock ordering:
- *
- * inotify_dev->up_mutex (ensures we don't re-add the same watch)
- * 	inode->inotify_mutex (protects inode's watch list)
- * 		inotify_handle->mutex (protects inotify_handle's watch list)
- * 			inotify_dev->ev_mutex (protects device's event queue)
- */
+static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
+struct kmem_cache *event_priv_cachep __read_mostly;
+static struct fsnotify_event *inotify_ignored_event;
 
 /*
- * Lifetimes of the main data structures:
- *
- * inotify_device: Lifetime is managed by reference count, from
- * sys_inotify_init() until release.  Additional references can bump the count
- * via get_inotify_dev() and drop the count via put_inotify_dev().
- *
- * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
- * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
- * first event, or to inotify_destroy().
+ * When inotify registers a new group it increments this and uses that
+ * value as an offset to set the fsnotify group "name" and priority.
  */
-
-/*
- * struct inotify_device - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
-	struct mutex		ev_mutex;	/* protects event queue */
-	struct mutex		up_mutex;	/* synchronizes watch updates */
-	struct list_head 	events;		/* list of queued events */
-	struct user_struct	*user;		/* user who opened this dev */
-	struct inotify_handle	*ih;		/* inotify handle */
-	struct fasync_struct    *fa;            /* async notification */
-	atomic_t		count;		/* reference count */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
-};
-
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->ev_mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_user_watch - our version of an inotify_watch, we add
- * a reference to the associated inotify_device.
- */
-struct inotify_user_watch {
-	struct inotify_device	*dev;	/* associated device */
-	struct inotify_watch	wdata;	/* inotify watch data */
-};
+static atomic_t inotify_grp_num;
 
 #ifdef CONFIG_SYSCTL
 
@@ -149,280 +106,36 @@ ctl_table inotify_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
-static inline void get_inotify_dev(struct inotify_device *dev)
-{
-	atomic_inc(&dev->count);
-}
-
-static inline void put_inotify_dev(struct inotify_device *dev)
-{
-	if (atomic_dec_and_test(&dev->count)) {
-		atomic_dec(&dev->user->inotify_devs);
-		free_uid(dev->user);
-		kfree(dev);
-	}
-}
-
-/*
- * free_inotify_user_watch - cleans up the watch and its references
- */
-static void free_inotify_user_watch(struct inotify_watch *w)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
-
-	atomic_dec(&dev->user->inotify_watches);
-	put_inotify_dev(dev);
-	kmem_cache_free(watch_cachep, watch);
-}
-
-/*
- * kernel_event - create a new kernel event with the given parameters
- *
- * This function can sleep.
- */
-static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
-						  const char *name)
-{
-	struct inotify_kernel_event *kevent;
-
-	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
-	if (unlikely(!kevent))
-		return NULL;
-
-	/* we hand this out to user-space, so zero it just in case */
-	memset(&kevent->event, 0, sizeof(struct inotify_event));
-
-	kevent->event.wd = wd;
-	kevent->event.mask = mask;
-	kevent->event.cookie = cookie;
-
-	INIT_LIST_HEAD(&kevent->list);
-
-	if (name) {
-		size_t len, rem, event_size = sizeof(struct inotify_event);
-
-		/*
-		 * We need to pad the filename so as to properly align an
-		 * array of inotify_event structures.  Because the structure is
-		 * small and the common case is a small filename, we just round
-		 * up to the next multiple of the structure's sizeof.  This is
-		 * simple and safe for all architectures.
-		 */
-		len = strlen(name) + 1;
-		rem = event_size - len;
-		if (len > event_size) {
-			rem = event_size - (len % event_size);
-			if (len % event_size == 0)
-				rem = 0;
-		}
-
-		kevent->name = kmalloc(len + rem, GFP_NOFS);
-		if (unlikely(!kevent->name)) {
-			kmem_cache_free(event_cachep, kevent);
-			return NULL;
-		}
-		memcpy(kevent->name, name, len);
-		if (rem)
-			memset(kevent->name + len, 0, rem);
-		kevent->event.len = len + rem;
-	} else {
-		kevent->event.len = 0;
-		kevent->name = NULL;
-	}
-
-	return kevent;
-}
-
-/*
- * inotify_dev_get_event - return the next event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_event(struct inotify_device *dev)
-{
-	return list_entry(dev->events.next, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_get_last_event - return the last event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_last_event(struct inotify_device *dev)
+static inline __u32 inotify_arg_to_mask(u32 arg)
 {
-	if (list_empty(&dev->events))
-		return NULL;
-	return list_entry(dev->events.prev, struct inotify_kernel_event, list);
-}
+	__u32 mask;
 
-/*
- * inotify_dev_queue_event - event handler registered with core inotify, adds
- * a new event to the given device
- *
- * Can sleep (calls kernel_event()).
- */
-static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
-				    u32 cookie, const char *name,
-				    struct inode *ignored)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-	struct inotify_kernel_event *kevent, *last;
+	/* everything should accept their own ignored and cares about children */
+	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
 
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
+	/* mask off the flags used to open the fd */
+	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
 
-	mutex_lock(&dev->ev_mutex);
-
-	/* we can safely put the watch as we don't reference it while
-	 * generating the event
-	 */
-	if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
-		put_inotify_watch(w); /* final put */
-
-	/* coalescing: drop this event if it is a dupe of the previous */
-	last = inotify_dev_get_last_event(dev);
-	if (last && last->event.mask == mask && last->event.wd == wd &&
-			last->event.cookie == cookie) {
-		const char *lastname = last->name;
-
-		if (!name && !lastname)
-			goto out;
-		if (name && lastname && !strcmp(lastname, name))
-			goto out;
-	}
-
-	/* the queue overflowed and we already sent the Q_OVERFLOW event */
-	if (unlikely(dev->event_count > dev->max_events))
-		goto out;
-
-	/* if the queue overflows, we need to notify user space */
-	if (unlikely(dev->event_count == dev->max_events))
-		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
-	else
-		kevent = kernel_event(wd, mask, cookie, name);
-
-	if (unlikely(!kevent))
-		goto out;
-
-	/* queue the event and wake up anyone waiting */
-	dev->event_count++;
-	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
-	list_add_tail(&kevent->list, &dev->events);
-	wake_up_interruptible(&dev->wq);
-	kill_fasync(&dev->fa, SIGIO, POLL_IN);
-
-out:
-	mutex_unlock(&dev->ev_mutex);
-}
-
-/*
- * remove_kevent - cleans up the given kevent
- *
- * Caller must hold dev->ev_mutex.
- */
-static void remove_kevent(struct inotify_device *dev,
-			  struct inotify_kernel_event *kevent)
-{
-	list_del(&kevent->list);
-
-	dev->event_count--;
-	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
-}
-
-/*
- * free_kevent - frees the given kevent.
- */
-static void free_kevent(struct inotify_kernel_event *kevent)
-{
-	kfree(kevent->name);
-	kmem_cache_free(event_cachep, kevent);
-}
-
-/*
- * inotify_dev_event_dequeue - destroy an event on the given device
- *
- * Caller must hold dev->ev_mutex.
- */
-static void inotify_dev_event_dequeue(struct inotify_device *dev)
-{
-	if (!list_empty(&dev->events)) {
-		struct inotify_kernel_event *kevent;
-		kevent = inotify_dev_get_event(dev);
-		remove_kevent(dev, kevent);
-		free_kevent(kevent);
-	}
-}
-
-/*
- * find_inode - resolve a user-given path to a specific inode
- */
-static int find_inode(const char __user *dirname, struct path *path,
-		      unsigned flags)
-{
-	int error;
-
-	error = user_path_at(AT_FDCWD, dirname, flags, path);
-	if (error)
-		return error;
-	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(path->dentry->d_inode, MAY_READ);
-	if (error)
-		path_put(path);
-	return error;
+	return mask;
 }
 
-/*
- * create_watch - creates a watch on the given device.
- *
- * Callers must hold dev->up_mutex.
- */
-static int create_watch(struct inotify_device *dev, struct inode *inode,
-			u32 mask)
+static inline u32 inotify_mask_to_arg(__u32 mask)
 {
-	struct inotify_user_watch *watch;
-	int ret;
-
-	if (atomic_read(&dev->user->inotify_watches) >=
-			inotify_max_user_watches)
-		return -ENOSPC;
-
-	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
-	if (unlikely(!watch))
-		return -ENOMEM;
-
-	/* save a reference to device and bump the count to make it official */
-	get_inotify_dev(dev);
-	watch->dev = dev;
-
-	atomic_inc(&dev->user->inotify_watches);
-
-	inotify_init_watch(&watch->wdata);
-	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
-	if (ret < 0)
-		free_inotify_user_watch(&watch->wdata);
-
-	return ret;
+	return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
+		       IN_Q_OVERFLOW);
 }
 
-/* Device Interface */
-
+/* intofiy userspace file descriptor functions */
 static unsigned int inotify_poll(struct file *file, poll_table *wait)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 	int ret = 0;
 
-	poll_wait(file, &dev->wq, wait);
-	mutex_lock(&dev->ev_mutex);
-	if (!list_empty(&dev->events))
+	poll_wait(file, &group->notification_waitq, wait);
+	mutex_lock(&group->notification_mutex);
+	if (!fsnotify_notify_queue_is_empty(group))
 		ret = POLLIN | POLLRDNORM;
-	mutex_unlock(&dev->ev_mutex);
+	mutex_unlock(&group->notification_mutex);
 
 	return ret;
 }
@@ -432,26 +145,29 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
  * enough to fit in "count". Return an error pointer if
  * not large enough.
  *
- * Called with the device ev_mutex held.
+ * Called with the group->notification_mutex held.
  */
-static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
-						  size_t count)
+static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+					    size_t count)
 {
 	size_t event_size = sizeof(struct inotify_event);
-	struct inotify_kernel_event *kevent;
+	struct fsnotify_event *event;
 
-	if (list_empty(&dev->events))
+	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
-	kevent = inotify_dev_get_event(dev);
-	if (kevent->name)
-		event_size += kevent->event.len;
+	event = fsnotify_peek_notify_event(group);
+
+	event_size += roundup(event->name_len, event_size);
 
 	if (event_size > count)
 		return ERR_PTR(-EINVAL);
 
-	remove_kevent(dev, kevent);
-	return kevent;
+	/* held the notification_mutex the whole time, so this is the
+	 * same event we peeked above */
+	fsnotify_remove_notify_event(group);
+
+	return event;
 }
 
 /*
@@ -460,51 +176,90 @@ static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
  * We already checked that the event size is smaller than the
  * buffer we had in "get_one_event()" above.
  */
-static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent,
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+				  struct fsnotify_event *event,
 				  char __user *buf)
 {
+	struct inotify_event inotify_event;
+	struct fsnotify_event_private_data *fsn_priv;
+	struct inotify_event_private_data *priv;
 	size_t event_size = sizeof(struct inotify_event);
+	size_t name_len;
+
+	/* we get the inotify watch descriptor from the event private data */
+	spin_lock(&event->lock);
+	fsn_priv = fsnotify_remove_priv_from_event(group, event);
+	spin_unlock(&event->lock);
+
+	if (!fsn_priv)
+		inotify_event.wd = -1;
+	else {
+		priv = container_of(fsn_priv, struct inotify_event_private_data,
+				    fsnotify_event_priv_data);
+		inotify_event.wd = priv->wd;
+		inotify_free_event_priv(fsn_priv);
+	}
+
+	/* round up event->name_len so it is a multiple of event_size */
+	name_len = roundup(event->name_len, event_size);
+	inotify_event.len = name_len;
+
+	inotify_event.mask = inotify_mask_to_arg(event->mask);
+	inotify_event.cookie = event->sync_cookie;
 
-	if (copy_to_user(buf, &kevent->event, event_size))
+	/* send the main event */
+	if (copy_to_user(buf, &inotify_event, event_size))
 		return -EFAULT;
 
-	if (kevent->name) {
-		buf += event_size;
+	buf += event_size;
 
-		if (copy_to_user(buf, kevent->name, kevent->event.len))
+	/*
+	 * fsnotify only stores the pathname, so here we have to send the pathname
+	 * and then pad that pathname out to a multiple of sizeof(inotify_event)
+	 * with zeros.  I get my zeros from the nul_inotify_event.
+	 */
+	if (name_len) {
+		unsigned int len_to_zero = name_len - event->name_len;
+		/* copy the path name */
+		if (copy_to_user(buf, event->file_name, event->name_len))
 			return -EFAULT;
+		buf += event->name_len;
 
-		event_size += kevent->event.len;
+		/* fill userspace with 0's from nul_inotify_event */
+		if (copy_to_user(buf, &nul_inotify_event, len_to_zero))
+			return -EFAULT;
+		buf += len_to_zero;
+		event_size += name_len;
 	}
+
 	return event_size;
 }
 
 static ssize_t inotify_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *pos)
 {
-	struct inotify_device *dev;
+	struct fsnotify_group *group;
+	struct fsnotify_event *kevent;
 	char __user *start;
 	int ret;
 	DEFINE_WAIT(wait);
 
 	start = buf;
-	dev = file->private_data;
+	group = file->private_data;
 
 	while (1) {
-		struct inotify_kernel_event *kevent;
+		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
 
-		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
-
-		mutex_lock(&dev->ev_mutex);
-		kevent = get_one_event(dev, count);
-		mutex_unlock(&dev->ev_mutex);
+		mutex_lock(&group->notification_mutex);
+		kevent = get_one_event(group, count);
+		mutex_unlock(&group->notification_mutex);
 
 		if (kevent) {
 			ret = PTR_ERR(kevent);
 			if (IS_ERR(kevent))
 				break;
-			ret = copy_event_to_user(kevent, buf);
-			free_kevent(kevent);
+			ret = copy_event_to_user(group, kevent, buf);
+			fsnotify_put_event(kevent);
 			if (ret < 0)
 				break;
 			buf += ret;
@@ -525,7 +280,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 		schedule();
 	}
 
-	finish_wait(&dev->wq, &wait);
+	finish_wait(&group->notification_waitq, &wait);
 	if (start != buf && ret != -EFAULT)
 		ret = buf - start;
 	return ret;
@@ -533,25 +288,19 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
 static int inotify_fasync(int fd, struct file *file, int on)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 
-	return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
+	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
 }
 
 static int inotify_release(struct inode *ignored, struct file *file)
 {
-	struct inotify_device *dev = file->private_data;
-
-	inotify_destroy(dev->ih);
+	struct fsnotify_group *group = file->private_data;
 
-	/* destroy all of the events on this device */
-	mutex_lock(&dev->ev_mutex);
-	while (!list_empty(&dev->events))
-		inotify_dev_event_dequeue(dev);
-	mutex_unlock(&dev->ev_mutex);
+	fsnotify_clear_marks_by_group(group);
 
-	/* free this device: the put matching the get in inotify_init() */
-	put_inotify_dev(dev);
+	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
+	fsnotify_put_group(group);
 
 	return 0;
 }
@@ -559,16 +308,27 @@ static int inotify_release(struct inode *ignored, struct file *file)
 static long inotify_ioctl(struct file *file, unsigned int cmd,
 			  unsigned long arg)
 {
-	struct inotify_device *dev;
+	struct fsnotify_group *group;
+	struct fsnotify_event_holder *holder;
+	struct fsnotify_event *event;
 	void __user *p;
 	int ret = -ENOTTY;
+	size_t send_len = 0;
 
-	dev = file->private_data;
+	group = file->private_data;
 	p = (void __user *) arg;
 
 	switch (cmd) {
 	case FIONREAD:
-		ret = put_user(dev->queue_size, (int __user *) p);
+		mutex_lock(&group->notification_mutex);
+		list_for_each_entry(holder, &group->notification_list, event_list) {
+			event = holder->event;
+			send_len += sizeof(struct inotify_event);
+			send_len += roundup(event->name_len,
+					     sizeof(struct inotify_event));
+		}
+		mutex_unlock(&group->notification_mutex);
+		ret = put_user(send_len, (int __user *) p);
 		break;
 	}
 
@@ -576,23 +336,233 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 }
 
 static const struct file_operations inotify_fops = {
-	.poll           = inotify_poll,
-	.read           = inotify_read,
-	.fasync         = inotify_fasync,
-	.release        = inotify_release,
-	.unlocked_ioctl = inotify_ioctl,
+	.poll		= inotify_poll,
+	.read		= inotify_read,
+	.fasync		= inotify_fasync,
+	.release	= inotify_release,
+	.unlocked_ioctl	= inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
 };
 
-static const struct inotify_operations inotify_user_ops = {
-	.handle_event	= inotify_dev_queue_event,
-	.destroy_watch	= free_inotify_user_watch,
-};
 
+/*
+ * find_inode - resolve a user-given path to a specific inode
+ */
+static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
+{
+	int error;
+
+	error = user_path_at(AT_FDCWD, dirname, flags, path);
+	if (error)
+		return error;
+	/* you can only watch an inode if you have read permissions on it */
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (error)
+		path_put(path);
+	return error;
+}
+
+/*
+ * When, for whatever reason, inotify is done with a mark (or what used to be a
+ * watch) we need to remove that watch from the idr and we need to send IN_IGNORED
+ * for the given wd.
+ *
+ * There is a bit of recursion here.  The loop looks like:
+ * 	inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
+ *	inotify_freeing_mark -> inotify_destory_mark_entry -> restart
+ * But the loop is broken in 2 places.  fsnotify_destroy_mark_by_entry sets
+ * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
+ * test below will not call back to fsnotify again.  But even if that test wasn't
+ * there this would still be safe since fsnotify_destroy_mark_by_entry() is
+ * safe from recursion.
+ */
+void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+{
+	struct inotify_inode_mark_entry *ientry;
+	struct inotify_event_private_data *event_priv;
+	struct fsnotify_event_private_data *fsn_event_priv;
+	struct fsnotify_group *egroup;
+	struct idr *idr;
+
+	spin_lock(&entry->lock);
+	egroup = entry->group;
+
+	/* if egroup we aren't really done and something might still send events
+	 * for this inode, on the callback we'll send the IN_IGNORED */
+	if (egroup) {
+		spin_unlock(&entry->lock);
+		fsnotify_destroy_mark_by_entry(entry);
+		return;
+	}
+	spin_unlock(&entry->lock);
+
+	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	if (unlikely(!event_priv))
+		goto skip_send_ignore;
+
+	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
+
+	fsn_event_priv->group = group;
+	event_priv->wd = ientry->wd;
+
+	fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv);
+
+	/* did the private data get added? */
+	if (list_empty(&fsn_event_priv->event_list))
+		inotify_free_event_priv(fsn_event_priv);
+
+skip_send_ignore:
+
+	/* remove this entry from the idr */
+	spin_lock(&group->inotify_data.idr_lock);
+	idr = &group->inotify_data.idr;
+	idr_remove(idr, ientry->wd);
+	spin_unlock(&group->inotify_data.idr_lock);
+
+	/* removed from idr, drop that reference */
+	fsnotify_put_mark(entry);
+}
+
+/* ding dong the mark is dead */
+static void inotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+	struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
+
+	kmem_cache_free(inotify_inode_mark_cachep, ientry);
+}
+
+static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+{
+	struct fsnotify_mark_entry *entry = NULL;
+	struct inotify_inode_mark_entry *ientry;
+	int ret = 0;
+	int add = (arg & IN_MASK_ADD);
+	__u32 mask;
+	__u32 old_mask, new_mask;
+
+	/* don't allow invalid bits: we don't want flags set */
+	mask = inotify_arg_to_mask(arg);
+	if (unlikely(!mask))
+		return -EINVAL;
+
+	ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+	if (unlikely(!ientry))
+		return -ENOMEM;
+	/* we set the mask at the end after attaching it */
+	fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark);
+	ientry->wd = 0;
+
+find_entry:
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+	if (entry) {
+		kmem_cache_free(inotify_inode_mark_cachep, ientry);
+		ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	} else {
+		if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) {
+			ret = -ENOSPC;
+			goto out_err;
+		}
+
+		ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode);
+		if (ret == -EEXIST)
+			goto find_entry;
+		else if (ret)
+			goto out_err;
+
+		entry = &ientry->fsn_entry;
+retry:
+		ret = -ENOMEM;
+		if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
+			goto out_err;
+
+		spin_lock(&group->inotify_data.idr_lock);
+		/* if entry is added to the idr we keep the reference obtained
+		 * through fsnotify_mark_add.  remember to drop this reference
+		 * when entry is removed from idr */
+		ret = idr_get_new_above(&group->inotify_data.idr, entry,
+					++group->inotify_data.last_wd,
+					&ientry->wd);
+		spin_unlock(&group->inotify_data.idr_lock);
+		if (ret) {
+			if (ret == -EAGAIN)
+				goto retry;
+			goto out_err;
+		}
+		atomic_inc(&group->inotify_data.user->inotify_watches);
+	}
+
+	spin_lock(&entry->lock);
+
+	old_mask = entry->mask;
+	if (add) {
+		entry->mask |= mask;
+		new_mask = entry->mask;
+	} else {
+		entry->mask = mask;
+		new_mask = entry->mask;
+	}
+
+	spin_unlock(&entry->lock);
+
+	if (old_mask != new_mask) {
+		/* more bits in old than in new? */
+		int dropped = (old_mask & ~new_mask);
+		/* more bits in this entry than the inode's mask? */
+		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
+		/* more bits in this entry than the group? */
+		int do_group = (new_mask & ~group->mask);
+
+		/* update the inode with this new entry */
+		if (dropped || do_inode)
+			fsnotify_recalc_inode_mask(inode);
+
+		/* update the group mask with the new mask */
+		if (dropped || do_group)
+			fsnotify_recalc_group_mask(group);
+	}
+
+	return ientry->wd;
+
+out_err:
+	/* see this isn't supposed to happen, just kill the watch */
+	if (entry) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+	return ret;
+}
+
+static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
+{
+	struct fsnotify_group *group;
+	unsigned int grp_num;
+
+	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
+	grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
+	group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
+	if (IS_ERR(group))
+		return group;
+
+	group->max_events = max_events;
+
+	spin_lock_init(&group->inotify_data.idr_lock);
+	idr_init(&group->inotify_data.idr);
+	group->inotify_data.last_wd = 0;
+	group->inotify_data.user = user;
+	group->inotify_data.fa = NULL;
+
+	return group;
+}
+
+
+/* inotify syscalls */
 SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
-	struct inotify_device *dev;
-	struct inotify_handle *ih;
+	struct fsnotify_group *group;
 	struct user_struct *user;
 	struct file *filp;
 	int fd, ret;
@@ -621,45 +591,27 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 		goto out_free_uid;
 	}
 
-	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
-	if (unlikely(!dev)) {
-		ret = -ENOMEM;
+	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
+	group = inotify_new_group(user, inotify_max_queued_events);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
 		goto out_free_uid;
 	}
 
-	ih = inotify_init(&inotify_user_ops);
-	if (IS_ERR(ih)) {
-		ret = PTR_ERR(ih);
-		goto out_free_dev;
-	}
-	dev->ih = ih;
-	dev->fa = NULL;
-
 	filp->f_op = &inotify_fops;
 	filp->f_path.mnt = mntget(inotify_mnt);
 	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
 	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
 	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = dev;
-
-	INIT_LIST_HEAD(&dev->events);
-	init_waitqueue_head(&dev->wq);
-	mutex_init(&dev->ev_mutex);
-	mutex_init(&dev->up_mutex);
-	dev->event_count = 0;
-	dev->queue_size = 0;
-	dev->max_events = inotify_max_queued_events;
-	dev->user = user;
-	atomic_set(&dev->count, 0);
-
-	get_inotify_dev(dev);
+	filp->private_data = group;
+
 	atomic_inc(&user->inotify_devs);
+
 	fd_install(fd, filp);
 
 	return fd;
-out_free_dev:
-	kfree(dev);
+
 out_free_uid:
 	free_uid(user);
 	put_filp(filp);
@@ -676,8 +628,8 @@ SYSCALL_DEFINE0(inotify_init)
 SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 		u32, mask)
 {
+	struct fsnotify_group *group;
 	struct inode *inode;
-	struct inotify_device *dev;
 	struct path path;
 	struct file *filp;
 	int ret, fput_needed;
@@ -698,20 +650,20 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = find_inode(pathname, &path, flags);
-	if (unlikely(ret))
+	ret = inotify_find_inode(pathname, &path, flags);
+	if (ret)
 		goto fput_and_out;
 
-	/* inode held in place by reference to path; dev by fget on fd */
+	/* inode held in place by reference to path; group by fget on fd */
 	inode = path.dentry->d_inode;
-	dev = filp->private_data;
+	group = filp->private_data;
 
-	mutex_lock(&dev->up_mutex);
-	ret = inotify_find_update_watch(dev->ih, inode, mask);
-	if (ret == -ENOENT)
-		ret = create_watch(dev, inode, mask);
-	mutex_unlock(&dev->up_mutex);
+	/* create/update an inode mark */
+	ret = inotify_update_watch(group, inode, mask);
+	if (unlikely(ret))
+		goto path_put_and_out;
 
+path_put_and_out:
 	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);
@@ -720,9 +672,10 @@ fput_and_out:
 
 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
+	struct fsnotify_group *group;
+	struct fsnotify_mark_entry *entry;
 	struct file *filp;
-	struct inotify_device *dev;
-	int ret, fput_needed;
+	int ret = 0, fput_needed;
 
 	filp = fget_light(fd, &fput_needed);
 	if (unlikely(!filp))
@@ -734,10 +687,20 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 		goto out;
 	}
 
-	dev = filp->private_data;
+	group = filp->private_data;
 
-	/* we free our watch data when we get IN_IGNORED */
-	ret = inotify_rm_wd(dev->ih, wd);
+	spin_lock(&group->inotify_data.idr_lock);
+	entry = idr_find(&group->inotify_data.idr, wd);
+	if (unlikely(!entry)) {
+		spin_unlock(&group->inotify_data.idr_lock);
+		ret = -EINVAL;
+		goto out;
+	}
+	fsnotify_get_mark(entry);
+	spin_unlock(&group->inotify_data.idr_lock);
+
+	inotify_destroy_mark_entry(entry, group);
+	fsnotify_put_mark(entry);
 
 out:
 	fput_light(filp, fput_needed);
@@ -753,9 +716,9 @@ inotify_get_sb(struct file_system_type *fs_type, int flags,
 }
 
 static struct file_system_type inotify_fs_type = {
-    .name           = "inotifyfs",
-    .get_sb         = inotify_get_sb,
-    .kill_sb        = kill_anon_super,
+    .name	= "inotifyfs",
+    .get_sb	= inotify_get_sb,
+    .kill_sb	= kill_anon_super,
 };
 
 /*
@@ -775,18 +738,16 @@ static int __init inotify_user_setup(void)
 	if (IS_ERR(inotify_mnt))
 		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
 
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
+	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
+	inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
+	if (!inotify_ignored_event)
+		panic("unable to allocate the inotify ignored event\n");
+
 	inotify_max_queued_events = 16384;
 	inotify_max_user_instances = 128;
 	inotify_max_user_watches = 8192;
 
-	watch_cachep = kmem_cache_create("inotify_watch_cache",
-					 sizeof(struct inotify_user_watch),
-					 0, SLAB_PANIC, NULL);
-	event_cachep = kmem_cache_create("inotify_event_cache",
-					 sizeof(struct inotify_kernel_event),
-					 0, SLAB_PANIC, NULL);
-
 	return 0;
 }
-
 module_init(inotify_user_setup);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d2c0ee3..44848aa 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -9,6 +9,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/idr.h> /* inotify uses this */
 #include <linux/fs.h> /* struct inode */
 #include <linux/list.h>
 #include <linux/path.h> /* struct path */
@@ -59,6 +60,7 @@
 
 /* listeners that hard code group numbers near the top */
 #define DNOTIFY_GROUP_NUM	UINT_MAX
+#define INOTIFY_GROUP_NUM	(DNOTIFY_GROUP_NUM-1)
 
 struct fsnotify_group;
 struct fsnotify_event;
@@ -141,6 +143,15 @@ struct fsnotify_group {
 	/* groups can define private fields here or use the void *private */
 	union {
 		void *private;
+#ifdef CONFIG_INOTIFY_USER
+		struct inotify_group_private_data {
+			spinlock_t	idr_lock;
+			struct idr      idr;
+			u32             last_wd;
+			struct fasync_struct    *fa;    /* async notification */
+			struct user_struct      *user;
+		} inotify_data;
+#endif
 	};
 };
 
diff --git a/init/Kconfig b/init/Kconfig
index d4e9671..5de1c17c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -302,7 +302,8 @@ config AUDITSYSCALL
 
 config AUDIT_TREE
 	def_bool y
-	depends on AUDITSYSCALL && INOTIFY
+	depends on AUDITSYSCALL
+	select INOTIFY
 
 menu "RCU Subsystem"
 
-- 
cgit v0.10.2


From ff52cc2158b32b3b979ca7802b1fd7c70f36e13c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 11:09:47 -0400
Subject: fsnotify: move events should indicate the event was on a child

fsnotify tells its listeners explicitly when an event happened on the given
inode verses on the child of the given inode.  (see __fsnotify_parent)
However, the semantics of fsnotify_move() are such that we deliver events
directly to the two parent directories in question (old_dir and new_dir)
directly without using the __fsnotify_parent() call.  fsnotify should be
adding FS_EVENT_ON_CHILD for the notifications to these parents.

Signed-off-by: Eric Paris <eparis@redhat.com>

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index c25b39d..936f9aa 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -71,12 +71,11 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	struct inode *source = moved->d_inode;
 	u32 in_cookie = inotify_get_cookie();
 	u32 fs_cookie = fsnotify_get_cookie();
-	__u32 old_dir_mask = 0;
-	__u32 new_dir_mask = 0;
+	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
+	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
 
-	if (old_dir == new_dir) {
-		old_dir_mask = FS_DN_RENAME;
-	}
+	if (old_dir == new_dir)
+		old_dir_mask |= FS_DN_RENAME;
 
 	if (isdir) {
 		isdir = IN_ISDIR;
@@ -84,9 +83,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		new_dir_mask |= FS_IN_ISDIR;
 	}
 
-	old_dir_mask |= FS_MOVED_FROM;
-	new_dir_mask |= FS_MOVED_TO;
-
 	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
 				  source);
 	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
-- 
cgit v0.10.2


From 5ac697b793a3c45005c568df692518da6e690390 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 11:09:47 -0400
Subject: dnotify: do not use ?true:false when assigning to a bool

dnotify_should send event assigned a bool using ?true:false when computing
a bit operation.  This is poitless and the bool type does this for us.

Signed-off-by: Eric Paris <eparis@redhat.com>

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 12f9e6b..5134e89 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -154,7 +154,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 		return false;
 
 	spin_lock(&entry->lock);
-	send = (mask & entry->mask) ? true : false;
+	send = (mask & entry->mask);
 	spin_unlock(&entry->lock);
 	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
 
-- 
cgit v0.10.2


From ce61856bd2aadb064f595e5c0444376a2b117c41 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 11:09:47 -0400
Subject: dnotify: do not bother to lock entry->lock when reading mask

entry->lock is needed to make sure entry->mask does not change while
manipulating it.  In dnotify_should_send_event() we don't care if we get an
old or a new mask value out of this entry so there is no point it taking
the lock.

Signed-off-by: Eric Paris <eparis@redhat.com>

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 5134e89..ec459b6 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -153,9 +153,8 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	if (!entry)
 		return false;
 
-	spin_lock(&entry->lock);
 	send = (mask & entry->mask);
-	spin_unlock(&entry->lock);
+
 	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
 
 	return send;
-- 
cgit v0.10.2


From e42e27736de80045f925564ea27a1d32957219e7 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 11:09:47 -0400
Subject: inotify/dnotify: should_send_event shouldn't match on
 FS_EVENT_ON_CHILD

inotify and dnotify will both indicate that they want any event which came
from a child inode.  The fix is to mask off FS_EVENT_ON_CHILD when deciding
if inotify or dnotify is interested in a given event.

Signed-off-by: Eric Paris <eparis@redhat.com>

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index ec459b6..98a7516 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -153,6 +153,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	if (!entry)
 		return false;
 
+	mask = (mask & ~FS_EVENT_ON_CHILD);
 	send = (mask & entry->mask);
 
 	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f11d75f..ec2f7bd 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -137,14 +137,16 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
 	int idx;
+	/* global tests shouldn't care about events on child only the specific event */
+	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
 	if (list_empty(&fsnotify_groups))
 		return;
 
-	if (!(mask & fsnotify_mask))
+	if (!(test_mask & fsnotify_mask))
 		return;
 
-	if (!(mask & to_tell->i_fsnotify_mask))
+	if (!(test_mask & to_tell->i_fsnotify_mask))
 		return;
 	/*
 	 * SRCU!!  the groups list is very very much read only and the path is
@@ -153,7 +155,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	 */
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
 	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
-		if (mask & group->mask) {
+		if (test_mask & group->mask) {
 			if (!group->ops->should_send_event(group, to_tell, mask))
 				continue;
 			if (!event) {
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 160da54..7ef75b8 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -95,6 +95,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	if (!entry)
 		return false;
 
+	mask = (mask & ~FS_EVENT_ON_CHILD);
 	send = (entry->mask & mask);
 
 	/* find took a reference */
-- 
cgit v0.10.2


From a092ee20fd33d2df0990dcbf2235afc181612818 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 11:09:48 -0400
Subject: fsnotify: allow groups to set freeing_mark to null

Most fsnotify listeners (all but inotify) do not care about marks being
freed.  Allow groups to set freeing_mark to null and do not call any
function if it is set that way.

Signed-off-by: Eric Paris <eparis@redhat.com>

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 98a7516..828a889 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -161,12 +161,6 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	return send;
 }
 
-static void dnotify_freeing_mark(struct fsnotify_mark_entry *entry,
-				 struct fsnotify_group *group)
-{
-	/* dnotify doesn't care than an inode is on the way out */
-}
-
 static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
 {
 	struct dnotify_mark_entry *dnentry = container_of(entry,
@@ -182,7 +176,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
 	.handle_event = dnotify_handle_event,
 	.should_send_event = dnotify_should_send_event,
 	.free_group_priv = NULL,
-	.freeing_mark = dnotify_freeing_mark,
+	.freeing_mark = NULL,
 	.free_event_priv = NULL,
 };
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 0a499d2..c8a07c6 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -190,7 +190,8 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	 * callback to the group function to let it know that this entry
 	 * is being freed.
 	 */
-	group->ops->freeing_mark(entry, group);
+	if (group->ops->freeing_mark)
+		group->ops->freeing_mark(entry, group);
 
 	/*
 	 * __fsnotify_update_child_dentry_flags(inode);
-- 
cgit v0.10.2


From 63b852a6b67d0820d388b0ecd0da83ccb4048b8d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:24 +0000
Subject: asm-generic: rename termios.h, signal.h and mman.h

The existing asm-generic versions are incomplete and included
by some architectures. New architectures should be able
to use a generic version, so rename the existing files and
change all users, which lets us add the new files.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index 13c2305..a938830 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h
@@ -111,7 +111,7 @@ typedef unsigned long sigset_t;
 #define SIG_UNBLOCK        2	/* for unblocking signals */
 #define SIG_SETMASK        3	/* for setting the signal mask */
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct osf_sigaction {
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index 54570d2..fc26976 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ARM_MMAN_H__
 #define __ARM_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index d0fb487..43ba0fb 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -111,7 +111,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 648f91e..9a92b15 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_AVR32_MMAN_H__
 #define __ASM_AVR32_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h
index caffefe..8790dfc 100644
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -112,7 +112,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/avr32/include/asm/termios.h b/arch/avr32/include/asm/termios.h
index 0152aba..dd7e9da 100644
--- a/arch/avr32/include/asm/termios.h
+++ b/arch/avr32/include/asm/termios.h
@@ -55,7 +55,7 @@ struct termio {
 */
 #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/blackfin/include/asm/signal.h b/arch/blackfin/include/asm/signal.h
index 87951d2..2eea907 100644
--- a/arch/blackfin/include/asm/signal.h
+++ b/arch/blackfin/include/asm/signal.h
@@ -104,7 +104,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index 1c35e1b..b7f0afb 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -3,7 +3,7 @@
 
 /* verbatim copy of asm-i386/ version */
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index 349ae68..ea6af9a 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -106,7 +106,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index b4371e9..58c1d11 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_MMAN_H__
 #define __ASM_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/frv/include/asm/termios.h b/arch/frv/include/asm/termios.h
index a62fb58..b4868aa 100644
--- a/arch/frv/include/asm/termios.h
+++ b/arch/frv/include/asm/termios.h
@@ -52,7 +52,7 @@ struct termio {
 /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
 
 #ifdef __KERNEL__
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 #endif
 
 #endif /* _ASM_TERMIOS_H */
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index b9f104f..cf35f0a 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __H8300_MMAN_H__
 #define __H8300_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index 7bc1504..fd8b66e 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -105,7 +105,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index c73b878..48cf8b9 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -8,7 +8,7 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x00100		/* stack-like segment */
 #define MAP_GROWSUP	0x00200		/* register stack-like segment */
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
index 4f5ca56..b166248 100644
--- a/arch/ia64/include/asm/signal.h
+++ b/arch/ia64/include/asm/signal.h
@@ -114,7 +114,7 @@
 
 #endif /* __KERNEL__ */
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 # ifndef __ASSEMBLY__
 
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index 516a897..04a5f40 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __M32R_MMAN_H__
 #define __M32R_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index 1a60706..9c1acb2 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -107,7 +107,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index 1626d37..9f5c4c4 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __M68K_MMAN_H__
 #define __M68K_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 08788fd..5bc09c7 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -103,7 +103,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/microblaze/include/asm/signal.h b/arch/microblaze/include/asm/signal.h
index 9676fad..46bc226 100644
--- a/arch/microblaze/include/asm/signal.h
+++ b/arch/microblaze/include/asm/signal.h
@@ -90,7 +90,7 @@
 
 # ifndef __ASSEMBLY__
 # include <linux/types.h>
-# include <asm-generic/signal.h>
+# include <asm-generic/signal-defs.h>
 
 /* Avoid too many header ordering problems. */
 struct siginfo;
diff --git a/arch/microblaze/include/asm/termios.h b/arch/microblaze/include/asm/termios.h
index 102d772..47a46d1 100644
--- a/arch/microblaze/include/asm/termios.h
+++ b/arch/microblaze/include/asm/termios.h
@@ -81,7 +81,7 @@ struct termio {
 
 #ifdef __KERNEL__
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index bee5153..c783f36 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -109,7 +109,7 @@ typedef unsigned long old_sigset_t;		/* at least 32 bits */
 #define SIG_UNBLOCK	2	/* for unblocking signals */
 #define SIG_SETMASK	3	/* for setting the signal mask */
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 struct sigaction {
 	unsigned int	sa_flags;
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index b7986b6..d04fac1 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -12,7 +12,7 @@
 #ifndef _ASM_MMAN_H
 #define _ASM_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h
index e98817c..7e891fc 100644
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -115,7 +115,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index e7b99ba..7b1c498 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_POWERPC_MMAN_H
 #define _ASM_POWERPC_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 /*
  * This program is free software; you can redistribute it and/or
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 69f709d..3eb13be 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -94,7 +94,7 @@ typedef struct {
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 struct old_sigaction {
 	__sighandler_t sa_handler;
diff --git a/arch/powerpc/include/asm/termios.h b/arch/powerpc/include/asm/termios.h
index 2c14fea..a24f487 100644
--- a/arch/powerpc/include/asm/termios.h
+++ b/arch/powerpc/include/asm/termios.h
@@ -78,7 +78,7 @@ struct termio {
 
 #ifdef __KERNEL__
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index da01432..f63fe7b 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -9,7 +9,7 @@
 #ifndef __S390_MMAN_H__
 #define __S390_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index f6cfddb..cdf5cb2 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -115,7 +115,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ     2048
 #define SIGSTKSZ        8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
index 67f6627..bc3a35c 100644
--- a/arch/s390/include/asm/termios.h
+++ b/arch/s390/include/asm/termios.h
@@ -60,7 +60,7 @@ struct termio {
 #define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
 #define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/sh/include/asm/mman.h b/arch/sh/include/asm/mman.h
index 156eb02..7d8b72c 100644
--- a/arch/sh/include/asm/mman.h
+++ b/arch/sh/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_SH_MMAN_H
 #define __ASM_SH_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/sh/include/asm/signal.h b/arch/sh/include/asm/signal.h
index 5c5c1e8..9cc5f01 100644
--- a/arch/sh/include/asm/signal.h
+++ b/arch/sh/include/asm/signal.h
@@ -106,7 +106,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index fdfbbf0..988192e 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __SPARC_MMAN_H__
 #define __SPARC_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 /* SunOS'ified... */
 
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index cba4520..e49b828 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -176,7 +176,7 @@ struct sigstack {
 #define SA_STATIC_ALLOC         0x8000
 #endif
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 struct __new_sigaction {
 	__sighandler_t		sa_handler;
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 90bc410..751af25 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_MMAN_H
 #define _ASM_X86_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 7761a5d..598457c 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -117,7 +117,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifndef __ASSEMBLY__
 
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 4c9932a..460b08d 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -2,9 +2,9 @@ header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
 header-y += ioctl.h
-header-y += mman.h
+header-y += mman-common.h
 header-y += poll.h
-header-y += signal.h
+header-y += signal-defs.h
 header-y += statfs.h
 
 unifdef-y += int-l64.h
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
new file mode 100644
index 0000000..3b69ad3
--- /dev/null
+++ b/include/asm-generic/mman-common.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping */
+#define MAP_FIXED	0x10		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x20		/* don't use a file */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_INVALIDATE	2		/* invalidate the caches */
+#define MS_SYNC		4		/* synchronous memory sync */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
deleted file mode 100644
index 5e3dde2..0000000
--- a/include/asm-generic/mman.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_MMAN_H
-#define _ASM_GENERIC_MMAN_H
-
-/*
- Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
- Based on: asm-xxx/mman.h
-*/
-
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
-
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
-#define MADV_NORMAL	0		/* no further special treatment */
-#define MADV_RANDOM	1		/* expect random page references */
-#define MADV_SEQUENTIAL	2		/* expect sequential page references */
-#define MADV_WILLNEED	3		/* will need these pages */
-#define MADV_DONTNEED	4		/* don't need these pages */
-
-/* common parameters: try to keep these consistent across architectures */
-#define MADV_REMOVE	9		/* remove these pages & resources */
-#define MADV_DONTFORK	10		/* don't inherit across fork */
-#define MADV_DOFORK	11		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_FILE	0
-
-#endif
diff --git a/include/asm-generic/signal-defs.h b/include/asm-generic/signal-defs.h
new file mode 100644
index 0000000..00f95df
--- /dev/null
+++ b/include/asm-generic/signal-defs.h
@@ -0,0 +1,28 @@
+#ifndef __ASM_GENERIC_SIGNAL_DEFS_H
+#define __ASM_GENERIC_SIGNAL_DEFS_H
+
+#include <linux/compiler.h>
+
+#ifndef SIG_BLOCK
+#define SIG_BLOCK          0	/* for blocking signals */
+#endif
+#ifndef SIG_UNBLOCK
+#define SIG_UNBLOCK        1	/* for unblocking signals */
+#endif
+#ifndef SIG_SETMASK
+#define SIG_SETMASK        2	/* for setting the signal mask */
+#endif
+
+#ifndef __ASSEMBLY__
+typedef void __signalfn_t(int);
+typedef __signalfn_t __user *__sighandler_t;
+
+typedef void __restorefn_t(void);
+typedef __restorefn_t __user *__sigrestore_t;
+
+#define SIG_DFL	((__force __sighandler_t)0)	/* default signal handling */
+#define SIG_IGN	((__force __sighandler_t)1)	/* ignore signal */
+#define SIG_ERR	((__force __sighandler_t)-1)	/* error return from signal */
+#endif
+
+#endif /* __ASM_GENERIC_SIGNAL_DEFS_H */
diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h
deleted file mode 100644
index dae1d87..0000000
--- a/include/asm-generic/signal.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef __ASM_GENERIC_SIGNAL_H
-#define __ASM_GENERIC_SIGNAL_H
-
-#include <linux/compiler.h>
-
-#ifndef SIG_BLOCK
-#define SIG_BLOCK          0	/* for blocking signals */
-#endif
-#ifndef SIG_UNBLOCK
-#define SIG_UNBLOCK        1	/* for unblocking signals */
-#endif
-#ifndef SIG_SETMASK
-#define SIG_SETMASK        2	/* for setting the signal mask */
-#endif
-
-#ifndef __ASSEMBLY__
-typedef void __signalfn_t(int);
-typedef __signalfn_t __user *__sighandler_t;
-
-typedef void __restorefn_t(void);
-typedef __restorefn_t __user *__sigrestore_t;
-
-#define SIG_DFL	((__force __sighandler_t)0)	/* default signal handling */
-#define SIG_IGN	((__force __sighandler_t)1)	/* ignore signal */
-#define SIG_ERR	((__force __sighandler_t)-1)	/* error return from signal */
-#endif
-
-#endif /* __ASM_GENERIC_SIGNAL_H */
diff --git a/include/asm-generic/termios-base.h b/include/asm-generic/termios-base.h
new file mode 100644
index 0000000..0a769fe
--- /dev/null
+++ b/include/asm-generic/termios-base.h
@@ -0,0 +1,77 @@
+/* termios.h: generic termios/termio user copying/translation
+ */
+
+#ifndef _ASM_GENERIC_TERMIOS_BASE_H
+#define _ASM_GENERIC_TERMIOS_BASE_H
+
+#include <asm/uaccess.h>
+
+#ifndef __ARCH_TERMIO_GETPUT
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+static inline int user_termio_to_kernel_termios(struct ktermios *termios,
+						struct termio __user *termio)
+{
+	unsigned short tmp;
+
+	if (get_user(tmp, &termio->c_iflag) < 0)
+		goto fault;
+	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
+
+	if (get_user(tmp, &termio->c_oflag) < 0)
+		goto fault;
+	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
+
+	if (get_user(tmp, &termio->c_cflag) < 0)
+		goto fault;
+	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
+
+	if (get_user(tmp, &termio->c_lflag) < 0)
+		goto fault;
+	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
+
+	if (get_user(termios->c_line, &termio->c_line) < 0)
+		goto fault;
+
+	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
+		goto fault;
+
+	return 0;
+
+ fault:
+	return -EFAULT;
+}
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+static inline int kernel_termios_to_user_termio(struct termio __user *termio,
+						struct ktermios *termios)
+{
+	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
+	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
+	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
+	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
+	    put_user(termios->c_line,  &termio->c_line) < 0 ||
+	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+#ifndef user_termios_to_kernel_termios
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios))
+#endif
+
+#ifndef kernel_termios_to_user_termios
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios))
+#endif
+
+#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+#endif	/* __ARCH_TERMIO_GETPUT */
+
+#endif /* _ASM_GENERIC_TERMIOS_BASE_H */
diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h
deleted file mode 100644
index 7d39ecc..0000000
--- a/include/asm-generic/termios.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* termios.h: generic termios/termio user copying/translation
- */
-
-#ifndef _ASM_GENERIC_TERMIOS_H
-#define _ASM_GENERIC_TERMIOS_H
-
-#include <asm/uaccess.h>
-
-#ifndef __ARCH_TERMIO_GETPUT
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-static inline int user_termio_to_kernel_termios(struct ktermios *termios,
-						struct termio __user *termio)
-{
-	unsigned short tmp;
-
-	if (get_user(tmp, &termio->c_iflag) < 0)
-		goto fault;
-	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
-
-	if (get_user(tmp, &termio->c_oflag) < 0)
-		goto fault;
-	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
-
-	if (get_user(tmp, &termio->c_cflag) < 0)
-		goto fault;
-	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
-
-	if (get_user(tmp, &termio->c_lflag) < 0)
-		goto fault;
-	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
-
-	if (get_user(termios->c_line, &termio->c_line) < 0)
-		goto fault;
-
-	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
-		goto fault;
-
-	return 0;
-
- fault:
-	return -EFAULT;
-}
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-static inline int kernel_termios_to_user_termio(struct termio __user *termio,
-						struct ktermios *termios)
-{
-	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
-	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
-	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
-	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
-	    put_user(termios->c_line,  &termio->c_line) < 0 ||
-	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
-		return -EFAULT;
-
-	return 0;
-}
-
-#ifndef user_termios_to_kernel_termios
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios))
-#endif
-
-#ifndef kernel_termios_to_user_termios
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios))
-#endif
-
-#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
-#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
-
-#endif	/* __ARCH_TERMIO_GETPUT */
-
-#endif /* _ASM_GENERIC_TERMIOS_H */
-- 
cgit v0.10.2


From c31ae4bb4a9fa4606a74c0a4fb61b74f804e861e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:25 +0000
Subject: asm-generic: introduce asm/bitsperlong.h

This provides a reliable way for asm-generic/types.h and other
files to find out if it is running on a 32 or 64 bit platform.

We cannot use CONFIG_64BIT for this in headers that are included
from user space because CONFIG symbols are not available there.
We also cannot do it inside of asm/types.h because some headers
need the word size but cannot include types.h.

The solution is to introduce a new header <asm/bitsperlong.h>
that defines both __BITS_PER_LONG for user space and
BITS_PER_LONG for usage in the kernel. The asm-generic
version falls back to 32 bit unless the architecture overrides
it, which I did for all 64 bit platforms.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/arch/alpha/include/asm/bitsperlong.h b/arch/alpha/include/asm/bitsperlong.h
new file mode 100644
index 0000000..ad57f78
--- /dev/null
+++ b/arch/alpha/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index f072f34..bd621ec 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -25,9 +25,6 @@ typedef unsigned int umode_t;
  * These aren't exported outside the kernel to avoid name space clashes
  */
 #ifdef __KERNEL__
-
-#define BITS_PER_LONG 64
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma_addr_t;
diff --git a/arch/arm/include/asm/bitsperlong.h b/arch/arm/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/arm/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/avr32/include/asm/bitsperlong.h b/arch/avr32/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/avr32/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/blackfin/include/asm/bitsperlong.h b/arch/blackfin/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/blackfin/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/cris/include/asm/bitsperlong.h b/arch/cris/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/cris/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/frv/include/asm/bitsperlong.h b/arch/frv/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/frv/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/h8300/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/ia64/include/asm/bitsperlong.h b/arch/ia64/include/asm/bitsperlong.h
new file mode 100644
index 0000000..ec4db3c
--- /dev/null
+++ b/arch/ia64/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_IA64_BITSPERLONG_H
+#define __ASM_IA64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_IA64_BITSPERLONG_H */
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
index e36b371..fbf1ed3 100644
--- a/arch/ia64/include/asm/types.h
+++ b/arch/ia64/include/asm/types.h
@@ -19,10 +19,6 @@
 # define __IA64_UL(x)		(x)
 # define __IA64_UL_CONST(x)	x
 
-# ifdef __KERNEL__
-#  define BITS_PER_LONG 64
-# endif
-
 #else
 # define __IA64_UL(x)		((unsigned long)(x))
 # define __IA64_UL_CONST(x)	x##UL
@@ -34,10 +30,7 @@ typedef unsigned int umode_t;
  */
 # ifdef __KERNEL__
 
-#define BITS_PER_LONG 64
-
 /* DMA addresses are 64-bits wide, in general.  */
-
 typedef u64 dma_addr_t;
 
 # endif /* __KERNEL__ */
diff --git a/arch/m32r/include/asm/bitsperlong.h b/arch/m32r/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/m32r/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/m68k/include/asm/bitsperlong.h b/arch/m68k/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/m68k/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/microblaze/include/asm/bitsperlong.h b/arch/microblaze/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/microblaze/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/mips/include/asm/bitsperlong.h b/arch/mips/include/asm/bitsperlong.h
new file mode 100644
index 0000000..3e4c10a
--- /dev/null
+++ b/arch/mips/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_MIPS_BITSPERLONG_H
+#define __ASM_MIPS_BITSPERLONG_H
+
+#define __BITS_PER_LONG _MIPS_SZLONG
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_MIPS_BITSPERLONG_H */
diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h
index 7956e69..544a285 100644
--- a/arch/mips/include/asm/types.h
+++ b/arch/mips/include/asm/types.h
@@ -31,9 +31,6 @@ typedef unsigned short umode_t;
  * These aren't exported outside the kernel to avoid name space clashes
  */
 #ifdef __KERNEL__
-
-#define BITS_PER_LONG _MIPS_SZLONG
-
 #ifndef __ASSEMBLY__
 
 #if (defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) \
diff --git a/arch/mn10300/include/asm/bitsperlong.h b/arch/mn10300/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/mn10300/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/parisc/include/asm/bitsperlong.h b/arch/parisc/include/asm/bitsperlong.h
new file mode 100644
index 0000000..75196b4
--- /dev/null
+++ b/arch/parisc/include/asm/bitsperlong.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_PARISC_BITSPERLONG_H
+#define __ASM_PARISC_BITSPERLONG_H
+
+/*
+ * using CONFIG_* outside of __KERNEL__ is wrong,
+ * __LP64__ was also removed from headers, so what
+ * is the right approach on parisc?
+ *	-arnd
+ */
+#if (defined(__KERNEL__) && defined(CONFIG_64BIT)) || defined (__LP64__)
+#define __BITS_PER_LONG 64
+#define SHIFT_PER_LONG 6
+#else
+#define __BITS_PER_LONG 32
+#define SHIFT_PER_LONG 5
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_PARISC_BITSPERLONG_H */
diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h
index 7f5a39b..20135cc 100644
--- a/arch/parisc/include/asm/types.h
+++ b/arch/parisc/include/asm/types.h
@@ -14,14 +14,6 @@ typedef unsigned short umode_t;
  */
 #ifdef __KERNEL__
 
-#ifdef CONFIG_64BIT
-#define BITS_PER_LONG 64
-#define SHIFT_PER_LONG 6
-#else
-#define BITS_PER_LONG 32
-#define SHIFT_PER_LONG 5
-#endif
-
 #ifndef __ASSEMBLY__
 
 /* Dma addresses are 32-bits wide.  */
diff --git a/arch/powerpc/include/asm/bitsperlong.h b/arch/powerpc/include/asm/bitsperlong.h
new file mode 100644
index 0000000..5f16590
--- /dev/null
+++ b/arch/powerpc/include/asm/bitsperlong.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_POWERPC_BITSPERLONG_H
+#define __ASM_POWERPC_BITSPERLONG_H
+
+#if defined(__powerpc64__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_POWERPC_BITSPERLONG_H */
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index 7ce27a5..a5aea0c 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -40,15 +40,6 @@ typedef struct {
 #endif /* __ASSEMBLY__ */
 
 #ifdef __KERNEL__
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __powerpc64__
-#define BITS_PER_LONG 64
-#else
-#define BITS_PER_LONG 32
-#endif
-
 #ifndef __ASSEMBLY__
 
 typedef __vector128 vector128;
diff --git a/arch/s390/include/asm/bitsperlong.h b/arch/s390/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6b235ae
--- /dev/null
+++ b/arch/s390/include/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
+
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 3dc3fc2..04d6b95 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -28,12 +28,6 @@ typedef __signed__ long saddr_t;
  */
 #ifdef __KERNEL__
 
-#ifndef __s390x__
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
diff --git a/arch/sh/include/asm/bitsperlong.h b/arch/sh/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/sh/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/sparc/include/asm/bitsperlong.h b/arch/sparc/include/asm/bitsperlong.h
new file mode 100644
index 0000000..40dcaa3
--- /dev/null
+++ b/arch/sparc/include/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#if defined(__sparc__) && defined(__arch64__)
+#define __BITS_PER_LONG 64
+#else
+#define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
+
diff --git a/arch/sparc/include/asm/types.h b/arch/sparc/include/asm/types.h
index 22371188..de671d7 100644
--- a/arch/sparc/include/asm/types.h
+++ b/arch/sparc/include/asm/types.h
@@ -21,8 +21,6 @@ typedef unsigned short umode_t;
 
 #ifdef __KERNEL__
 
-#define BITS_PER_LONG 64
-
 #ifndef __ASSEMBLY__
 
 /* Dma addresses come in generic and 64-bit flavours.  */
@@ -46,8 +44,6 @@ typedef unsigned short umode_t;
 
 #ifdef __KERNEL__
 
-#define BITS_PER_LONG 32
-
 #ifndef __ASSEMBLY__
 
 typedef u32 dma_addr_t;
diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/asm/bitsperlong.h
new file mode 100644
index 0000000..b0ae1c4
--- /dev/null
+++ b/arch/x86/include/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_X86_BITSPERLONG_H
+#define __ASM_X86_BITSPERLONG_H
+
+#ifdef __x86_64__
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_X86_BITSPERLONG_H */
+
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index e6f7363..09b9774 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -14,12 +14,6 @@ typedef unsigned short umode_t;
  */
 #ifdef __KERNEL__
 
-#ifdef CONFIG_X86_32
-# define BITS_PER_LONG 32
-#else
-# define BITS_PER_LONG 64
-#endif
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
diff --git a/arch/xtensa/include/asm/bitsperlong.h b/arch/xtensa/include/asm/bitsperlong.h
new file mode 100644
index 0000000..6dc0bb0
--- /dev/null
+++ b/arch/xtensa/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 460b08d..cbb4378 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -1,3 +1,4 @@
+header-y += bitsperlong.h
 header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 70d1855..290910e 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -9,6 +9,7 @@ unifdef-y += a.out.h
 endif
 unifdef-y += auxvec.h
 unifdef-y += byteorder.h
+unifdef-y += bitsperlong.h
 unifdef-y += errno.h
 unifdef-y += fcntl.h
 unifdef-y += ioctl.h
diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h
new file mode 100644
index 0000000..4ae54e0
--- /dev/null
+++ b/include/asm-generic/bitsperlong.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_GENERIC_BITS_PER_LONG
+#define __ASM_GENERIC_BITS_PER_LONG
+
+/*
+ * There seems to be no way of detecting this automatically from user
+ * space, so 64 bit architectures should override this in their
+ * bitsperlong.h. In particular, an architecture that supports
+ * both 32 and 64 bit user space must not rely on CONFIG_64BIT
+ * to decide it, but rather check a compiler provided macro.
+ */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG 32
+#endif
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_64BIT
+#define BITS_PER_LONG 64
+#else
+#define BITS_PER_LONG 32
+#endif /* CONFIG_64BIT */
+
+/*
+ * FIXME: The check currently breaks x86-64 build, so it's
+ * temporarily disabled. Please fix x86-64 and reenable
+ */
+#if 0 && BITS_PER_LONG != __BITS_PER_LONG
+#error Inconsistent word size. Check asm/bitsperlong.h
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/asm-generic/int-l64.h b/include/asm-generic/int-l64.h
index 2af9b75..1ca3efc 100644
--- a/include/asm-generic/int-l64.h
+++ b/include/asm-generic/int-l64.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_GENERIC_INT_L64_H
 #define _ASM_GENERIC_INT_L64_H
 
+#include <asm/bitsperlong.h>
+
 #ifndef __ASSEMBLY__
 /*
  * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index f9bc9ac..f394147 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_GENERIC_INT_LL64_H
 #define _ASM_GENERIC_INT_LL64_H
 
+#include <asm/bitsperlong.h>
+
 #ifndef __ASSEMBLY__
 /*
  * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
-- 
cgit v0.10.2


From 2864d32be31a20a4617e37a857dd9915a57e2efb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:26 +0000
Subject: asm-generic: add generic sysv ipc headers

The ipc64 data structures were originally meant to
be architecture specific so that each architecture
could add their own optimizations for padding.

In the end, most of them just copied the x86 version,
and most got that wrong. UClibc expects the x86 anyway,
so we might just declare that the default and get
rid of the extra copies.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index cbb4378..fd7a9c4 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -3,8 +3,13 @@ header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
 header-y += ioctl.h
+header-y += ipcbuf.h
 header-y += mman-common.h
+header-y += msgbuf.h
 header-y += poll.h
+header-y += sembuf.h
+header-y += shmbuf.h
+header-y += shmparam.h
 header-y += signal-defs.h
 header-y += statfs.h
 
diff --git a/include/asm-generic/ipcbuf.h b/include/asm-generic/ipcbuf.h
new file mode 100644
index 0000000..76982b2
--- /dev/null
+++ b/include/asm-generic/ipcbuf.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_GENERIC_IPCBUF_H
+#define __ASM_GENERIC_IPCBUF_H
+
+/*
+ * The generic ipc64_perm structure:
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * ipc64_perm was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t on architectures that only had 16 bit
+ * - 32-bit seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm {
+	__kernel_key_t		key;
+	__kernel_uid32_t	uid;
+	__kernel_gid32_t	gid;
+	__kernel_uid32_t	cuid;
+	__kernel_gid32_t	cgid;
+	__kernel_mode_t		mode;
+				/* pad if mode_t is u16: */
+	unsigned char		__pad1[4 - sizeof(__kernel_mode_t)];
+	unsigned short		seq;
+	unsigned short		__pad2;
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+#endif /* __ASM_GENERIC_IPCBUF_H */
diff --git a/include/asm-generic/msgbuf.h b/include/asm-generic/msgbuf.h
new file mode 100644
index 0000000..aec850d
--- /dev/null
+++ b/include/asm-generic/msgbuf.h
@@ -0,0 +1,47 @@
+#ifndef __ASM_GENERIC_MSGBUF_H
+#define __ASM_GENERIC_MSGBUF_H
+
+#include <asm/bitsperlong.h>
+/*
+ * generic msqid64_ds structure.
+ *
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * msqid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first three padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused1;
+#endif
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused2;
+#endif
+	__kernel_time_t msg_ctime;	/* last change time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused3;
+#endif
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+#endif /* __ASM_GENERIC_MSGBUF_H */
diff --git a/include/asm-generic/sembuf.h b/include/asm-generic/sembuf.h
new file mode 100644
index 0000000..4cb2c13
--- /dev/null
+++ b/include/asm-generic/sembuf.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_GENERIC_SEMBUF_H
+#define __ASM_GENERIC_SEMBUF_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * The semid64_ds structure for x86 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * semid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first two padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+struct semid64_ds {
+	struct ipc64_perm sem_perm;	/* permissions .. see ipc.h */
+	__kernel_time_t	sem_otime;	/* last semop time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused1;
+#endif
+	__kernel_time_t	sem_ctime;	/* last change time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused2;
+#endif
+	unsigned long	sem_nsems;	/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* __ASM_GENERIC_SEMBUF_H */
diff --git a/include/asm-generic/shmbuf.h b/include/asm-generic/shmbuf.h
new file mode 100644
index 0000000..5768fa6
--- /dev/null
+++ b/include/asm-generic/shmbuf.h
@@ -0,0 +1,59 @@
+#ifndef __ASM_GENERIC_SHMBUF_H
+#define __ASM_GENERIC_SHMBUF_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * The shmid64_ds structure for x86 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * shmid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first two padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+#if __BITS_PER_LONG != 64
+	unsigned long		__unused1;
+#endif
+	__kernel_time_t		shm_dtime;	/* last detach time */
+#if __BITS_PER_LONG != 64
+	unsigned long		__unused2;
+#endif
+	__kernel_time_t		shm_ctime;	/* last change time */
+#if __BITS_PER_LONG != 64
+	unsigned long		__unused3;
+#endif
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* __ASM_GENERIC_SHMBUF_H */
diff --git a/include/asm-generic/shmparam.h b/include/asm-generic/shmparam.h
new file mode 100644
index 0000000..51a3852
--- /dev/null
+++ b/include/asm-generic/shmparam.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_GENERIC_SHMPARAM_H
+#define __ASM_GENERIC_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE	 /* attach addr a multiple of this */
+
+#endif /* _ASM_GENERIC_SHMPARAM_H */
-- 
cgit v0.10.2


From 6103ec56c65c33774c7c38652c8204120c3c7519 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:27 +0000
Subject: asm-generic: add generic ABI headers

These header files are typically copied from an existing architecture
into any new one, slightly modified and then remain untouched until
the end of time in the name of ABI stability.

To make it easier for future architectures, provide a sane generic
version here. In cases where multiple architectures already use
identical code, I used the most common version. In cases like
stat.h that are more or less broken everywhere, I provide a
version that is meant to be ideal for new architectures.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index fd7a9c4..11a78b8 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -1,17 +1,32 @@
+header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
 header-y += ioctl.h
+header-y += ioctls.h
 header-y += ipcbuf.h
 header-y += mman-common.h
+header-y += mman.h
 header-y += msgbuf.h
+header-y += param.h
 header-y += poll.h
+header-y += posix_types.h
 header-y += sembuf.h
+header-y += setup.h
 header-y += shmbuf.h
 header-y += shmparam.h
 header-y += signal-defs.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
 header-y += statfs.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += ucontext.h
 
 unifdef-y += int-l64.h
 unifdef-y += int-ll64.h
diff --git a/include/asm-generic/auxvec.h b/include/asm-generic/auxvec.h
new file mode 100644
index 0000000..b99573b
--- /dev/null
+++ b/include/asm-generic/auxvec.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_GENERIC_AUXVEC_H
+#define __ASM_GENERIC_AUXVEC_H
+/*
+ * Not all architectures need their own auxvec.h, the most
+ * common definitions are already in linux/auxvec.h.
+ */
+
+#endif /* __ASM_GENERIC_AUXVEC_H */
diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h
new file mode 100644
index 0000000..a799e20
--- /dev/null
+++ b/include/asm-generic/ioctls.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_GENERIC_IOCTLS_H
+#define __ASM_GENERIC_IOCTLS_H
+
+#include <linux/ioctl.h>
+
+/*
+ * These are the most common definitions for tty ioctl numbers.
+ * Most of them do not use the recommended _IOC(), but there is
+ * probably some source code out there hardcoding the number,
+ * so we might as well use them for all new platforms.
+ *
+ * The architectures that use different values here typically
+ * try to be compatible with some Unix variants for the same
+ * architecture.
+ */
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS		0x5401
+#define TCSETS		0x5402
+#define TCSETSW		0x5403
+#define TCSETSF		0x5404
+#define TCGETA		0x5405
+#define TCSETA		0x5406
+#define TCSETAW		0x5407
+#define TCSETAF		0x5408
+#define TCSBRK		0x5409
+#define TCXONC		0x540A
+#define TCFLSH		0x540B
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+#define TIOCGPGRP	0x540F
+#define TIOCSPGRP	0x5410
+#define TIOCOUTQ	0x5411
+#define TIOCSTI		0x5412
+#define TIOCGWINSZ	0x5413
+#define TIOCSWINSZ	0x5414
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define FIONREAD	0x541B
+#define TIOCINQ		FIONREAD
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+#define FIONBIO		0x5421
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TCGETS2		_IOR('T', 0x2A, struct termios2)
+#define TCSETS2		_IOW('T', 0x2B, struct termios2)
+#define TCSETSW2	_IOW('T', 0x2C, struct termios2)
+#define TCSETSF2	_IOW('T', 0x2D, struct termios2)
+#define TIOCGRS485	0x542E
+#define TIOCSRS485	0x542F
+#define TIOCGPTN	_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T', 0x31, int)  /* Lock/unlock Pty */
+#define TCGETX		0x5432 /* SYS5 TCGETX compatibility */
+#define TCSETX		0x5433
+#define TCSETXF		0x5434
+#define TCSETXW		0x5435
+
+#define FIONCLEX	0x5450
+#define FIOCLEX		0x5451
+#define FIOASYNC	0x5452
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+
+/*
+ * some architectures define FIOQSIZE as 0x545E, which is used for
+ * TIOCGHAYESESP on others
+ */
+#ifndef FIOQSIZE
+# define TIOCGHAYESESP	0x545E  /* Get Hayes ESP configuration */
+# define TIOCSHAYESESP	0x545F  /* Set Hayes ESP configuration */
+# define FIOQSIZE	0x5460
+#endif
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+
+#define TIOCSER_TEMT	0x01	/* Transmitter physically empty */
+
+#endif /* __ASM_GENERIC_IOCTLS_H */
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
new file mode 100644
index 0000000..7cab4de
--- /dev/null
+++ b/include/asm-generic/mman.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h
new file mode 100644
index 0000000..cdf8251
--- /dev/null
+++ b/include/asm-generic/param.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_GENERIC_PARAM_H
+#define __ASM_GENERIC_PARAM_H
+
+#ifdef __KERNEL__
+# define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
+# define USER_HZ	100		/* some user interfaces are */
+# define CLOCKS_PER_SEC	(USER_HZ)       /* in "ticks" like times() */
+#endif
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#ifndef EXEC_PAGESIZE
+#define EXEC_PAGESIZE	4096
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#endif /* __ASM_GENERIC_PARAM_H */
diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h
new file mode 100644
index 0000000..3dab008
--- /dev/null
+++ b/include/asm-generic/posix_types.h
@@ -0,0 +1,165 @@
+#ifndef __ASM_GENERIC_POSIX_TYPES_H
+#define __ASM_GENERIC_POSIX_TYPES_H
+
+#include <asm/bitsperlong.h>
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.
+ *
+ * First the types that are often defined in different ways across
+ * architectures, so that you can override them.
+ */
+
+#ifndef __kernel_ino_t
+typedef unsigned long	__kernel_ino_t;
+#endif
+
+#ifndef __kernel_mode_t
+typedef unsigned int	__kernel_mode_t;
+#endif
+
+#ifndef __kernel_nlink_t
+typedef unsigned long	__kernel_nlink_t;
+#endif
+
+#ifndef __kernel_pid_t
+typedef int		__kernel_pid_t;
+#endif
+
+#ifndef __kernel_ipc_pid_t
+typedef int		__kernel_ipc_pid_t;
+#endif
+
+#ifndef __kernel_uid_t
+typedef unsigned int	__kernel_uid_t;
+typedef unsigned int	__kernel_gid_t;
+#endif
+
+#ifndef __kernel_suseconds_t
+typedef long		__kernel_suseconds_t;
+#endif
+
+#ifndef __kernel_daddr_t
+typedef int		__kernel_daddr_t;
+#endif
+
+#ifndef __kernel_uid32_t
+typedef __kernel_uid_t	__kernel_uid32_t;
+typedef __kernel_gid_t	__kernel_gid32_t;
+#endif
+
+#ifndef __kernel_old_uid_t
+typedef __kernel_uid_t	__kernel_old_uid_t;
+typedef __kernel_gid_t	__kernel_old_gid_t;
+#endif
+
+#ifndef __kernel_old_dev_t
+typedef unsigned int	__kernel_old_dev_t;
+#endif
+
+/*
+ * Most 32 bit architectures use "unsigned int" size_t,
+ * and all 64 bit architectures use "unsigned long" size_t.
+ */
+#ifndef __kernel_size_t
+#if __BITS_PER_LONG != 64
+typedef unsigned int	__kernel_size_t;
+typedef int		__kernel_ssize_t;
+typedef int		__kernel_ptrdiff_t;
+#else
+typedef unsigned long	__kernel_size_t;
+typedef long		__kernel_ssize_t;
+typedef long		__kernel_ptrdiff_t;
+#endif
+#endif
+
+/*
+ * anything below here should be completely generic
+ */
+typedef long		__kernel_off_t;
+typedef long long	__kernel_loff_t;
+typedef long		__kernel_time_t;
+typedef long		__kernel_clock_t;
+typedef int		__kernel_timer_t;
+typedef int		__kernel_clockid_t;
+typedef char *		__kernel_caddr_t;
+typedef unsigned short	__kernel_uid16_t;
+typedef unsigned short	__kernel_gid16_t;
+
+typedef struct {
+	int	val[2];
+} __kernel_fsid_t;
+
+#ifdef __KERNEL__
+
+#undef __FD_SET
+static inline void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef __FD_CLR
+static inline void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+#undef __FD_ISSET
+static inline int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static inline void __FD_ZERO(__kernel_fd_set *__p)
+{
+	unsigned long *__tmp = __p->fds_bits;
+	int __i;
+
+	if (__builtin_constant_p(__FDSET_LONGS)) {
+		switch (__FDSET_LONGS) {
+		case 16:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			__tmp[ 8] = 0; __tmp[ 9] = 0;
+			__tmp[10] = 0; __tmp[11] = 0;
+			__tmp[12] = 0; __tmp[13] = 0;
+			__tmp[14] = 0; __tmp[15] = 0;
+			return;
+
+		case 8:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			return;
+
+		case 4:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			return;
+		}
+	}
+	__i = __FDSET_LONGS;
+	while (__i) {
+		__i--;
+		*__tmp = 0;
+		__tmp++;
+	}
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_GENERIC_POSIX_TYPES_H */
diff --git a/include/asm-generic/setup.h b/include/asm-generic/setup.h
new file mode 100644
index 0000000..6fc26a5
--- /dev/null
+++ b/include/asm-generic/setup.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_GENERIC_SETUP_H
+#define __ASM_GENERIC_SETUP_H
+
+#define COMMAND_LINE_SIZE	512
+
+#endif	/* __ASM_GENERIC_SETUP_H */
diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h
new file mode 100644
index 0000000..555c0ae
--- /dev/null
+++ b/include/asm-generic/signal.h
@@ -0,0 +1,131 @@
+#ifndef __ASM_GENERIC_SIGNAL_H
+#define __ASM_GENERIC_SIGNAL_H
+
+#include <linux/types.h>
+
+#define _NSIG		64
+#define _NSIG_BPW	__BITS_PER_LONG
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#ifndef SIGRTMAX
+#define SIGRTMAX	_NSIG
+#endif
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002
+#define SA_SIGINFO	0x00000004
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+/*
+ * New architectures should not define the obsolete
+ *	SA_RESTORER	0x04000000
+ */
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#ifndef __ASSEMBLY__
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+/* not actually used, but required for linux/syscalls.h */
+typedef unsigned long old_sigset_t;
+
+#include <asm-generic/signal-defs.h>
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+#ifdef SA_RESTORER
+	__sigrestore_t sa_restorer;
+#endif
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+#include <asm/sigcontext.h>
+#undef __HAVE_ARCH_SIG_BITOPS
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_GENERIC_SIGNAL_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
new file mode 100644
index 0000000..5d79e40
--- /dev/null
+++ b/include/asm-generic/socket.h
@@ -0,0 +1,63 @@
+#ifndef __ASM_GENERIC_SOCKET_H
+#define __ASM_GENERIC_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+/* To add :#define SO_REUSEPORT 15 */
+
+#ifndef SO_PASSCRED /* powerpc only differs in these */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+#endif
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/asm-generic/sockios.h b/include/asm-generic/sockios.h
new file mode 100644
index 0000000..9a61a36
--- /dev/null
+++ b/include/asm-generic/sockios.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_GENERIC_SOCKIOS_H
+#define __ASM_GENERIC_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif /* __ASM_GENERIC_SOCKIOS_H */
diff --git a/include/asm-generic/stat.h b/include/asm-generic/stat.h
new file mode 100644
index 0000000..47e6417
--- /dev/null
+++ b/include/asm-generic/stat.h
@@ -0,0 +1,72 @@
+#ifndef __ASM_GENERIC_STAT_H
+#define __ASM_GENERIC_STAT_H
+
+/*
+ * Everybody gets this wrong and has to stick with it for all
+ * eternity. Hopefully, this version gets used by new architectures
+ * so they don't fall into the same traps.
+ *
+ * stat64 is copied from powerpc64, with explicit padding added.
+ * stat is the same structure layout on 64-bit, without the 'long long'
+ * types.
+ *
+ * By convention, 64 bit architectures use the stat interface, while
+ * 32 bit architectures use the stat64 interface. Note that we don't
+ * provide an __old_kernel_stat here, which new architecture should
+ * not have to start with.
+ */
+
+#include <asm/bitsperlong.h>
+
+#define STAT_HAVE_NSEC 1
+
+struct stat {
+	unsigned long	st_dev;		/* Device.  */
+	unsigned long	st_ino;		/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long	st_rdev;	/* Device number, if device.  */
+	unsigned long	__pad1;
+	long		st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long		st_blocks;	/* Number 512-byte blocks allocated. */
+	int		st_atime;	/* Time of last access.  */
+	unsigned int	st_atime_nsec;
+	int		st_mtime;	/* Time of last modification.  */
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;	/* Time of last status change.  */
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+
+#if __BITS_PER_LONG != 64
+/* This matches struct stat64 in glibc2.1. Only used for 32 bit. */
+struct stat64 {
+	unsigned long long st_dev;	/* Device.  */
+	unsigned long long st_ino;	/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long long st_rdev;	/* Device number, if device.  */
+	unsigned long long __pad1;
+	long long	st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long long	st_blocks;	/* Number 512-byte blocks allocated. */
+	int		st_atime;	/* Time of last access.  */
+	unsigned int	st_atime_nsec;
+	int		st_mtime;	/* Time of last modification.  */
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;	/* Time of last status change.  */
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+#endif
+
+#endif /* __ASM_GENERIC_STAT_H */
diff --git a/include/asm-generic/swab.h b/include/asm-generic/swab.h
new file mode 100644
index 0000000..a8e9029
--- /dev/null
+++ b/include/asm-generic/swab.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_GENERIC_SWAB_H
+#define _ASM_GENERIC_SWAB_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * 32 bit architectures typically (but not always) want to
+ * set __SWAB_64_THRU_32__. In user space, this is only
+ * valid if the compiler supports 64 bit data types.
+ */
+
+#if __BITS_PER_LONG == 32
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#define __SWAB_64_THRU_32__
+#endif
+#endif
+
+#endif /* _ASM_GENERIC_SWAB_H */
diff --git a/include/asm-generic/termbits.h b/include/asm-generic/termbits.h
new file mode 100644
index 0000000..1c9773d
--- /dev/null
+++ b/include/asm-generic/termbits.h
@@ -0,0 +1,198 @@
+#ifndef __ASM_GENERIC_TERMBITS_H
+#define __ASM_GENERIC_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define    BOTHER 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000	/* input baud rate */
+#define CMSPAR	  010000000000	/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000	/* flow control */
+
+#define IBSHIFT	  16		/* Shift from CBAUD to CIBAUD */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* __ASM_GENERIC_TERMBITS_H */
diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h
new file mode 100644
index 0000000..d0922ad
--- /dev/null
+++ b/include/asm-generic/termios.h
@@ -0,0 +1,154 @@
+#ifndef _ASM_GENERIC_TERMIOS_H
+#define _ASM_GENERIC_TERMIOS_H
+/*
+ * Most architectures have straight copies of the x86 code, with
+ * varying levels of bug fixes on top. Usually it's a good idea
+ * to use this generic version instead, but be careful to avoid
+ * ABI changes.
+ * New architectures should not provide their own version.
+ */
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+#ifdef __KERNEL__
+
+#include <asm/uaccess.h>
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+static inline int user_termio_to_kernel_termios(struct ktermios *termios,
+						const struct termio __user *termio)
+{
+	unsigned short tmp;
+
+	if (get_user(tmp, &termio->c_iflag) < 0)
+		goto fault;
+	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
+
+	if (get_user(tmp, &termio->c_oflag) < 0)
+		goto fault;
+	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
+
+	if (get_user(tmp, &termio->c_cflag) < 0)
+		goto fault;
+	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
+
+	if (get_user(tmp, &termio->c_lflag) < 0)
+		goto fault;
+	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
+
+	if (get_user(termios->c_line, &termio->c_line) < 0)
+		goto fault;
+
+	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
+		goto fault;
+
+	return 0;
+
+ fault:
+	return -EFAULT;
+}
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+static inline int kernel_termios_to_user_termio(struct termio __user *termio,
+						struct ktermios *termios)
+{
+	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
+	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
+	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
+	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
+	    put_user(termios->c_line,  &termio->c_line) < 0 ||
+	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+#ifdef TCGETS2
+static inline int user_termios_to_kernel_termios(struct ktermios *k,
+						 struct termios2 __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios2));
+}
+
+static inline int kernel_termios_to_user_termios(struct termios2 __user *u,
+						 struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios2));
+}
+
+static inline int user_termios_to_kernel_termios_1(struct ktermios *k,
+						   struct termios __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios));
+}
+
+static inline int kernel_termios_to_user_termios_1(struct termios __user *u,
+						   struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios));
+}
+#else /* TCGETS2 */
+static inline int user_termios_to_kernel_termios(struct ktermios *k,
+						 struct termios __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios));
+}
+
+static inline int kernel_termios_to_user_termios(struct termios __user *u,
+						 struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios));
+}
+#endif /* TCGETS2 */
+
+#endif	/* __KERNEL__ */
+
+#endif /* _ASM_GENERIC_TERMIOS_H */
diff --git a/include/asm-generic/types.h b/include/asm-generic/types.h
new file mode 100644
index 0000000..fba7d33
--- /dev/null
+++ b/include/asm-generic/types.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_GENERIC_TYPES_H
+#define _ASM_GENERIC_TYPES_H
+/*
+ * int-ll64 is used practically everywhere now,
+ * so use it as a reasonable default.
+ */
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+/*
+ * DMA addresses may be very different from physical addresses
+ * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit
+ * systems, while sparc64 uses 32 bit DMA addresses for 64 bit
+ * physical addresses.
+ * This default defines dma_addr_t to have the same size as
+ * phys_addr_t, which is the most common way.
+ * Do not define the dma64_addr_t type, which never really
+ * worked.
+ */
+#ifndef dma_addr_t
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif /* CONFIG_PHYS_ADDR_T_64BIT */
+#endif /* dma_addr_t */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_GENERIC_TYPES_H */
diff --git a/include/asm-generic/ucontext.h b/include/asm-generic/ucontext.h
new file mode 100644
index 0000000..ad77343
--- /dev/null
+++ b/include/asm-generic/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_UCONTEXT_H
+#define __ASM_GENERIC_UCONTEXT_H
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* __ASM_GENERIC_UCONTEXT_H */
-- 
cgit v0.10.2


From e64a1617eca39d62b248a11699de9c1195369661 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:28 +0000
Subject: asm-generic: add a generic unistd.h

A new architecture should only define a minimal set of system
calls while still providing the full functionality. This version
of unistd.h has gone through intensive review to make sure that
by default it only enables syscalls that do not already have
a more featureful replacement.

It is modeled after the x86-64 version of unistd.h, which unifies
the syscall number definition and the actual system call table
in a single file, in order to keep them synchronized much more
easily.

This first version still keeps legacy system call definitions
around, guarded by various #ifdefs, and with numbers larger
than 1024. The idea behind this is to make it easier for
new architectures to transition from a full list to the reduced
set. In particular, the new microblaze architecture that should
migrate to using the generic ABI headers can at least use an
existing uClibc source tree without major rewrites during the
conversion.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 11a78b8..eb62334 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -27,6 +27,7 @@ header-y += termbits.h
 header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
+header-y += unistd.h
 
 unifdef-y += int-l64.h
 unifdef-y += int-ll64.h
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
new file mode 100644
index 0000000..5b34b62
--- /dev/null
+++ b/include/asm-generic/unistd.h
@@ -0,0 +1,854 @@
+#if !defined(_ASM_GENERIC_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_GENERIC_UNISTD_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * This file contains the system call numbers, based on the
+ * layout of the x86-64 architecture, which embeds the
+ * pointer to the syscall in the table.
+ *
+ * As a basic principle, no duplication of functionality
+ * should be added, e.g. we don't use lseek when llseek
+ * is present. New architectures should use this file
+ * and implement the less feature-full calls in user space.
+ */
+
+#ifndef __SYSCALL
+#define __SYSCALL(x, y)
+#endif
+
+#if __BITS_PER_LONG == 32
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32)
+#else
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
+#endif
+
+#define __NR_io_setup 0
+__SYSCALL(__NR_io_setup, sys_io_setup)
+#define __NR_io_destroy 1
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_submit 2
+__SYSCALL(__NR_io_submit, sys_io_submit)
+#define __NR_io_cancel 3
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_io_getevents 4
+__SYSCALL(__NR_io_getevents, sys_io_getevents)
+
+/* fs/xattr.c */
+#define __NR_setxattr 5
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 6
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 7
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 8
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 9
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 10
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 11
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 12
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 13
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 14
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 15
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 16
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+
+/* fs/dcache.c */
+#define __NR_getcwd 17
+__SYSCALL(__NR_getcwd, sys_getcwd)
+
+/* fs/cookies.c */
+#define __NR_lookup_dcookie 18
+__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie)
+
+/* fs/eventfd.c */
+#define __NR_eventfd2 19
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+
+/* fs/eventpoll.c */
+#define __NR_epoll_create1 20
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_epoll_ctl 21
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_pwait 22
+__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
+
+/* fs/fcntl.c */
+#define __NR_dup 23
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_dup3 24
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR3264_fcntl 25
+__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl)
+
+/* fs/inotify_user.c */
+#define __NR_inotify_init1 26
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_inotify_add_watch 27
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 28
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+
+/* fs/ioctl.c */
+#define __NR_ioctl 29
+__SYSCALL(__NR_ioctl, sys_ioctl)
+
+/* fs/ioprio.c */
+#define __NR_ioprio_set 30
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 31
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+
+/* fs/locks.c */
+#define __NR_flock 32
+__SYSCALL(__NR_flock, sys_flock)
+
+/* fs/namei.c */
+#define __NR_mknodat 33
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_mkdirat 34
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_unlinkat 35
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_symlinkat 36
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_linkat 37
+__SYSCALL(__NR_linkat, sys_linkat)
+#define __NR_renameat 38
+__SYSCALL(__NR_renameat, sys_renameat)
+
+/* fs/namespace.c */
+#define __NR_umount2 39
+__SYSCALL(__NR_umount2, sys_umount)
+#define __NR_mount 40
+__SYSCALL(__NR_mount, sys_mount)
+#define __NR_pivot_root 41
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+
+/* fs/nfsctl.c */
+#define __NR_nfsservctl 42
+__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+
+/* fs/open.c */
+#define __NR3264_statfs 43
+__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs)
+#define __NR3264_fstatfs 44
+__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs)
+#define __NR3264_truncate 45
+__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate)
+#define __NR3264_ftruncate 46
+__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate)
+
+#define __NR_fallocate 47
+__SYSCALL(__NR_fallocate, sys_fallocate)
+#define __NR_faccessat 48
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_chdir 49
+__SYSCALL(__NR_chdir, sys_chdir)
+#define __NR_fchdir 50
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_chroot 51
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_fchmod 52
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchmodat 53
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_fchownat 54
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_fchown 55
+__SYSCALL(__NR_fchown, sys_fchown)
+#define __NR_openat 56
+__SYSCALL(__NR_openat, sys_openat)
+#define __NR_close 57
+__SYSCALL(__NR_close, sys_close)
+#define __NR_vhangup 58
+__SYSCALL(__NR_vhangup, sys_vhangup)
+
+/* fs/pipe.c */
+#define __NR_pipe2 59
+__SYSCALL(__NR_pipe2, sys_pipe2)
+
+/* fs/quota.c */
+#define __NR_quotactl 60
+__SYSCALL(__NR_quotactl, sys_quotactl)
+
+/* fs/readdir.c */
+#define __NR_getdents64 61
+__SYSCALL(__NR_getdents64, sys_getdents64)
+
+/* fs/read_write.c */
+#define __NR3264_lseek 62
+__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
+#define __NR_read 63
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 64
+__SYSCALL(__NR_write, sys_write)
+#define __NR_readv 65
+__SYSCALL(__NR_readv, sys_readv)
+#define __NR_writev 66
+__SYSCALL(__NR_writev, sys_writev)
+#define __NR_pread64 67
+__SYSCALL(__NR_pread64, sys_pread64)
+#define __NR_pwrite64 68
+__SYSCALL(__NR_pwrite64, sys_pwrite64)
+#define __NR_preadv 69
+__SYSCALL(__NR_preadv, sys_preadv)
+#define __NR_pwritev 70
+__SYSCALL(__NR_pwritev, sys_pwritev)
+
+/* fs/sendfile.c */
+#define __NR3264_sendfile 71
+__SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile)
+
+/* fs/select.c */
+#define __NR_pselect6 72
+__SYSCALL(__NR_pselect6, sys_pselect6)
+#define __NR_ppoll 73
+__SYSCALL(__NR_ppoll, sys_ppoll)
+
+/* fs/signalfd.c */
+#define __NR_signalfd4 74
+__SYSCALL(__NR_signalfd4, sys_signalfd4)
+
+/* fs/splice.c */
+#define __NR_vmsplice 75
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
+#define __NR_splice 76
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_tee 77
+__SYSCALL(__NR_tee, sys_tee)
+
+/* fs/stat.c */
+#define __NR_readlinkat 78
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#define __NR3264_fstatat 79
+__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
+#define __NR3264_fstat 80
+__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+
+/* fs/sync.c */
+#define __NR_sync 81
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_fsync 82
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_fdatasync 83
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#define __NR_sync_file_range 84
+__SYSCALL(__NR_sync_file_range, sys_sync_file_range) /* .long sys_sync_file_range2, */
+
+/* fs/timerfd.c */
+#define __NR_timerfd_create 85
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_timerfd_settime 86
+__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
+#define __NR_timerfd_gettime 87
+__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+
+/* fs/utimes.c */
+#define __NR_utimensat 88
+__SYSCALL(__NR_utimensat, sys_utimensat)
+
+/* kernel/acct.c */
+#define __NR_acct 89
+__SYSCALL(__NR_acct, sys_acct)
+
+/* kernel/capability.c */
+#define __NR_capget 90
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 91
+__SYSCALL(__NR_capset, sys_capset)
+
+/* kernel/exec_domain.c */
+#define __NR_personality 92
+__SYSCALL(__NR_personality, sys_personality)
+
+/* kernel/exit.c */
+#define __NR_exit 93
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_exit_group 94
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_waitid 95
+__SYSCALL(__NR_waitid, sys_waitid)
+
+/* kernel/fork.c */
+#define __NR_set_tid_address 96
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_unshare 97
+__SYSCALL(__NR_unshare, sys_unshare)
+
+/* kernel/futex.c */
+#define __NR_futex 98
+__SYSCALL(__NR_futex, sys_futex)
+#define __NR_set_robust_list 99
+__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
+#define __NR_get_robust_list 100
+__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
+
+/* kernel/hrtimer.c */
+#define __NR_nanosleep 101
+__SYSCALL(__NR_nanosleep, sys_nanosleep)
+
+/* kernel/itimer.c */
+#define __NR_getitimer 102
+__SYSCALL(__NR_getitimer, sys_getitimer)
+#define __NR_setitimer 103
+__SYSCALL(__NR_setitimer, sys_setitimer)
+
+/* kernel/kexec.c */
+#define __NR_kexec_load 104
+__SYSCALL(__NR_kexec_load, sys_kexec_load)
+
+/* kernel/module.c */
+#define __NR_init_module 105
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 106
+__SYSCALL(__NR_delete_module, sys_delete_module)
+
+/* kernel/posix-timers.c */
+#define __NR_timer_create 107
+__SYSCALL(__NR_timer_create, sys_timer_create)
+#define __NR_timer_gettime 108
+__SYSCALL(__NR_timer_gettime, sys_timer_gettime)
+#define __NR_timer_getoverrun 109
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_settime 110
+__SYSCALL(__NR_timer_settime, sys_timer_settime)
+#define __NR_timer_delete 111
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 112
+__SYSCALL(__NR_clock_settime, sys_clock_settime)
+#define __NR_clock_gettime 113
+__SYSCALL(__NR_clock_gettime, sys_clock_gettime)
+#define __NR_clock_getres 114
+__SYSCALL(__NR_clock_getres, sys_clock_getres)
+#define __NR_clock_nanosleep 115
+__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
+
+/* kernel/printk.c */
+#define __NR_syslog 116
+__SYSCALL(__NR_syslog, sys_syslog)
+
+/* kernel/ptrace.c */
+#define __NR_ptrace 117
+__SYSCALL(__NR_ptrace, sys_ptrace)
+
+/* kernel/sched.c */
+#define __NR_sched_setparam 118
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_setscheduler 119
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 120
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_getparam 121
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setaffinity 122
+__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
+#define __NR_sched_getaffinity 123
+__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
+#define __NR_sched_yield 124
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 125
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 126
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 127
+__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
+
+/* kernel/signal.c */
+#define __NR_restart_syscall 128
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_kill 129
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_tkill 130
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_tgkill 131
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_sigaltstack 132
+__SYSCALL(__NR_sigaltstack, sys_sigaltstack)
+#define __NR_rt_sigsuspend 133
+__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
+#define __NR_rt_sigaction 134
+__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */
+#define __NR_rt_sigprocmask 135
+__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
+#define __NR_rt_sigpending 136
+__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 137
+__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 138
+__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
+#define __NR_rt_sigreturn 139
+__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */
+
+/* kernel/sys.c */
+#define __NR_setpriority 140
+__SYSCALL(__NR_setpriority, sys_setpriority)
+#define __NR_getpriority 141
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_reboot 142
+__SYSCALL(__NR_reboot, sys_reboot)
+#define __NR_setregid 143
+__SYSCALL(__NR_setregid, sys_setregid)
+#define __NR_setgid 144
+__SYSCALL(__NR_setgid, sys_setgid)
+#define __NR_setreuid 145
+__SYSCALL(__NR_setreuid, sys_setreuid)
+#define __NR_setuid 146
+__SYSCALL(__NR_setuid, sys_setuid)
+#define __NR_setresuid 147
+__SYSCALL(__NR_setresuid, sys_setresuid)
+#define __NR_getresuid 148
+__SYSCALL(__NR_getresuid, sys_getresuid)
+#define __NR_setresgid 149
+__SYSCALL(__NR_setresgid, sys_setresgid)
+#define __NR_getresgid 150
+__SYSCALL(__NR_getresgid, sys_getresgid)
+#define __NR_setfsuid 151
+__SYSCALL(__NR_setfsuid, sys_setfsuid)
+#define __NR_setfsgid 152
+__SYSCALL(__NR_setfsgid, sys_setfsgid)
+#define __NR_times 153
+__SYSCALL(__NR_times, sys_times)
+#define __NR_setpgid 154
+__SYSCALL(__NR_setpgid, sys_setpgid)
+#define __NR_getpgid 155
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_getsid 156
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_setsid 157
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_getgroups 158
+__SYSCALL(__NR_getgroups, sys_getgroups)
+#define __NR_setgroups 159
+__SYSCALL(__NR_setgroups, sys_setgroups)
+#define __NR_uname 160
+__SYSCALL(__NR_uname, sys_newuname)
+#define __NR_sethostname 161
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setdomainname 162
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_getrlimit 163
+__SYSCALL(__NR_getrlimit, sys_getrlimit)
+#define __NR_setrlimit 164
+__SYSCALL(__NR_setrlimit, sys_setrlimit)
+#define __NR_getrusage 165
+__SYSCALL(__NR_getrusage, sys_getrusage)
+#define __NR_umask 166
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_prctl 167
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_getcpu 168
+__SYSCALL(__NR_getcpu, sys_getcpu)
+
+/* kernel/time.c */
+#define __NR_gettimeofday 169
+__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
+#define __NR_settimeofday 170
+__SYSCALL(__NR_settimeofday, sys_settimeofday)
+#define __NR_adjtimex 171
+__SYSCALL(__NR_adjtimex, sys_adjtimex)
+
+/* kernel/timer.c */
+#define __NR_getpid 172
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_getppid 173
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getuid 174
+__SYSCALL(__NR_getuid, sys_getuid)
+#define __NR_geteuid 175
+__SYSCALL(__NR_geteuid, sys_geteuid)
+#define __NR_getgid 176
+__SYSCALL(__NR_getgid, sys_getgid)
+#define __NR_getegid 177
+__SYSCALL(__NR_getegid, sys_getegid)
+#define __NR_gettid 178
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_sysinfo 179
+__SYSCALL(__NR_sysinfo, sys_sysinfo)
+
+/* ipc/mqueue.c */
+#define __NR_mq_open 180
+__SYSCALL(__NR_mq_open, sys_mq_open)
+#define __NR_mq_unlink 181
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 182
+__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend)
+#define __NR_mq_timedreceive 183
+__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive)
+#define __NR_mq_notify 184
+__SYSCALL(__NR_mq_notify, sys_mq_notify)
+#define __NR_mq_getsetattr 185
+__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
+
+/* ipc/msg.c */
+#define __NR_msgget 186
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 187
+__SYSCALL(__NR_msgctl, sys_msgctl)
+#define __NR_msgrcv 188
+__SYSCALL(__NR_msgrcv, sys_msgrcv)
+#define __NR_msgsnd 189
+__SYSCALL(__NR_msgsnd, sys_msgsnd)
+
+/* ipc/sem.c */
+#define __NR_semget 190
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 191
+__SYSCALL(__NR_semctl, sys_semctl)
+#define __NR_semtimedop 192
+__SYSCALL(__NR_semtimedop, sys_semtimedop)
+#define __NR_semop 193
+__SYSCALL(__NR_semop, sys_semop)
+
+/* ipc/shm.c */
+#define __NR_shmget 194
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 195
+__SYSCALL(__NR_shmctl, sys_shmctl)
+#define __NR_shmat 196
+__SYSCALL(__NR_shmat, sys_shmat)
+#define __NR_shmdt 197
+__SYSCALL(__NR_shmdt, sys_shmdt)
+
+/* net/socket.c */
+#define __NR_socket 198
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_socketpair 199
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_bind 200
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_listen 201
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 202
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_connect 203
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_getsockname 204
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 205
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_sendto 206
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recvfrom 207
+__SYSCALL(__NR_recvfrom, sys_recvfrom)
+#define __NR_setsockopt 208
+__SYSCALL(__NR_setsockopt, sys_setsockopt)
+#define __NR_getsockopt 209
+__SYSCALL(__NR_getsockopt, sys_getsockopt)
+#define __NR_shutdown 210
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_sendmsg 211
+__SYSCALL(__NR_sendmsg, sys_sendmsg)
+#define __NR_recvmsg 212
+__SYSCALL(__NR_recvmsg, sys_recvmsg)
+
+/* mm/filemap.c */
+#define __NR_readahead 213
+__SYSCALL(__NR_readahead, sys_readahead)
+
+/* mm/nommu.c, also with MMU */
+#define __NR_brk 214
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_munmap 215
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_mremap 216
+__SYSCALL(__NR_mremap, sys_mremap)
+
+/* security/keys/keyctl.c */
+#define __NR_add_key 217
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 218
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 219
+__SYSCALL(__NR_keyctl, sys_keyctl)
+
+/* arch/example/kernel/sys_example.c */
+#define __NR_clone 220
+__SYSCALL(__NR_clone, sys_clone)	/* .long sys_clone_wrapper */
+#define __NR_execve 221
+__SYSCALL(__NR_execve, sys_execve)	/* .long sys_execve_wrapper */
+
+#define __NR3264_mmap 222
+__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
+/* mm/fadvise.c */
+#define __NR3264_fadvise64 223
+__SC_3264(__NR3264_fadvise64, sys_fadvise64_64, sys_fadvise64)
+
+/* mm/, CONFIG_MMU only */
+#ifndef __ARCH_NOMMU
+#define __NR_swapon 224
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_swapoff 225
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_mprotect 226
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_msync 227
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_mlock 228
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 229
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 230
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 231
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_mincore 232
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 233
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_remap_file_pages 234
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+#define __NR_mbind 235
+__SYSCALL(__NR_mbind, sys_mbind)
+#define __NR_get_mempolicy 236
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
+#define __NR_set_mempolicy 237
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
+#define __NR_migrate_pages 238
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
+#define __NR_move_pages 239
+__SYSCALL(__NR_move_pages, sys_move_pages)
+#endif
+
+#undef __NR_syscalls
+#define __NR_syscalls 240
+
+/*
+ * All syscalls below here should go away really,
+ * these are provided for both review and as a porting
+ * help for the C library version.
+*
+ * Last chance: are any of these important enought to
+ * enable by default?
+ */
+#ifdef __ARCH_WANT_SYSCALL_NO_AT
+#define __NR_open 1024
+__SYSCALL(__NR_open, sys_open)
+#define __NR_link 1025
+__SYSCALL(__NR_link, sys_link)
+#define __NR_unlink 1026
+__SYSCALL(__NR_unlink, sys_unlink)
+#define __NR_mknod 1027
+__SYSCALL(__NR_mknod, sys_mknod)
+#define __NR_chmod 1028
+__SYSCALL(__NR_chmod, sys_chmod)
+#define __NR_chown 1029
+__SYSCALL(__NR_chown, sys_chown)
+#define __NR_mkdir 1030
+__SYSCALL(__NR_mkdir, sys_mkdir)
+#define __NR_rmdir 1031
+__SYSCALL(__NR_rmdir, sys_rmdir)
+#define __NR_lchown 1032
+__SYSCALL(__NR_lchown, sys_lchown)
+#define __NR_access 1033
+__SYSCALL(__NR_access, sys_access)
+#define __NR_rename 1034
+__SYSCALL(__NR_rename, sys_rename)
+#define __NR_readlink 1035
+__SYSCALL(__NR_readlink, sys_readlink)
+#define __NR_symlink 1036
+__SYSCALL(__NR_symlink, sys_symlink)
+#define __NR_utimes 1037
+__SYSCALL(__NR_utimes, sys_utimes)
+#define __NR3264_stat 1038
+__SC_3264(__NR3264_stat, sys_stat64, sys_newstat)
+#define __NR3264_lstat 1039
+__SC_3264(__NR3264_lstat, sys_lstat64, sys_newlstat)
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR3264_lstat+1)
+#endif /* __ARCH_WANT_SYSCALL_NO_AT */
+
+#ifdef __ARCH_WANT_SYSCALL_NO_FLAGS
+#define __NR_pipe 1040
+__SYSCALL(__NR_pipe, sys_pipe)
+#define __NR_dup2 1041
+__SYSCALL(__NR_dup2, sys_dup2)
+#define __NR_epoll_create 1042
+__SYSCALL(__NR_epoll_create, sys_epoll_create)
+#define __NR_inotify_init 1043
+__SYSCALL(__NR_inotify_init, sys_inotify_init)
+#define __NR_eventfd 1044
+__SYSCALL(__NR_eventfd, sys_eventfd)
+#define __NR_signalfd 1045
+__SYSCALL(__NR_signalfd, sys_signalfd)
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR_signalfd+1)
+#endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */
+
+#if __BITS_PER_LONG == 32 && defined(__ARCH_WANT_SYSCALL_OFF_T)
+#define __NR_sendfile 1046
+__SYSCALL(__NR_sendfile, sys_sendfile)
+#define __NR_ftruncate 1047
+__SYSCALL(__NR_ftruncate, sys_ftruncate)
+#define __NR_truncate 1048
+__SYSCALL(__NR_truncate, sys_truncate)
+#define __NR_stat 1049
+__SYSCALL(__NR_stat, sys_newstat)
+#define __NR_lstat 1050
+__SYSCALL(__NR_lstat, sys_newlstat)
+#define __NR_fstat 1051
+__SYSCALL(__NR_fstat, sys_newfstat)
+#define __NR_fcntl 1052
+__SYSCALL(__NR_fcntl, sys_fcntl)
+#define __NR_fadvise64 1053
+#define __ARCH_WANT_SYS_FADVISE64
+__SYSCALL(__NR_fadvise64, sys_fadvise64)
+#define __NR_newfstatat 1054
+#define __ARCH_WANT_SYS_NEWFSTATAT
+__SYSCALL(__NR_newfstatat, sys_newfstatat)
+#define __NR_fstatfs 1055
+__SYSCALL(__NR_fstatfs, sys_fstatfs)
+#define __NR_statfs 1056
+__SYSCALL(__NR_statfs, sys_statfs)
+#define __NR_lseek 1057
+__SYSCALL(__NR_lseek, sys_lseek)
+#define __NR_mmap 1058
+__SYSCALL(__NR_mmap, sys_mmap)
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR_mmap+1)
+#endif /* 32 bit off_t syscalls */
+
+#ifdef __ARCH_WANT_SYSCALL_DEPRECATED
+#define __NR_alarm 1059
+#define __ARCH_WANT_SYS_ALARM
+__SYSCALL(__NR_alarm, sys_alarm)
+#define __NR_getpgrp 1060
+#define __ARCH_WANT_SYS_GETPGRP
+__SYSCALL(__NR_getpgrp, sys_getpgrp)
+#define __NR_pause 1061
+#define __ARCH_WANT_SYS_PAUSE
+__SYSCALL(__NR_pause, sys_pause)
+#define __NR_time 1062
+#define __ARCH_WANT_SYS_TIME
+__SYSCALL(__NR_time, sys_time)
+#define __NR_utime 1063
+#define __ARCH_WANT_SYS_UTIME
+__SYSCALL(__NR_utime, sys_utime)
+
+#define __NR_creat 1064
+__SYSCALL(__NR_creat, sys_creat)
+#define __NR_getdents 1065
+#define __ARCH_WANT_SYS_GETDENTS
+__SYSCALL(__NR_getdents, sys_getdents)
+#define __NR_futimesat 1066
+__SYSCALL(__NR_futimesat, sys_futimesat)
+#define __NR_select 1067
+#define __ARCH_WANT_SYS_SELECT
+__SYSCALL(__NR_select, sys_select)
+#define __NR_poll 1068
+__SYSCALL(__NR_poll, sys_poll)
+#define __NR_epoll_wait 1069
+__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
+#define __NR_ustat 1070
+__SYSCALL(__NR_ustat, sys_ustat)
+#define __NR_vfork 1071
+__SYSCALL(__NR_vfork, sys_vfork)
+#define __NR_wait4 1072
+__SYSCALL(__NR_wait4, sys_wait4)
+#define __NR_recv 1073
+__SYSCALL(__NR_recv, sys_recv)
+#define __NR_send 1074
+__SYSCALL(__NR_send, sys_send)
+#define __NR_bdflush 1075
+__SYSCALL(__NR_bdflush, sys_bdflush)
+#define __NR_umount 1076
+__SYSCALL(__NR_umount, sys_oldumount)
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __NR_uselib 1077
+__SYSCALL(__NR_uselib, sys_uselib)
+#define __NR__sysctl 1078
+__SYSCALL(__NR__sysctl, sys_sysctl)
+
+#define __NR_fork 1079
+#ifdef CONFIG_MMU
+__SYSCALL(__NR_fork, sys_fork)
+#else
+__SYSCALL(__NR_fork, sys_ni_syscall)
+#endif /* CONFIG_MMU */
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR_fork+1)
+
+#endif /* __ARCH_WANT_SYSCALL_DEPRECATED */
+
+/*
+ * 32 bit systems traditionally used different
+ * syscalls for off_t and loff_t arguments, while
+ * 64 bit systems only need the off_t version.
+ * For new 32 bit platforms, there is no need to
+ * implement the old 32 bit off_t syscalls, so
+ * they take different names.
+ * Here we map the numbers so that both versions
+ * use the same syscall table layout.
+ */
+#if __BITS_PER_LONG == 64
+#define __NR_fcntl __NR3264_fcntl
+#define __NR_statfs __NR3264_statfs
+#define __NR_fstatfs __NR3264_fstatfs
+#define __NR_truncate __NR3264_truncate
+#define __NR_ftruncate __NR3264_truncate
+#define __NR_lseek __NR3264_lseek
+#define __NR_sendfile __NR3264_sendfile
+#define __NR_newfstatat __NR3264_fstatat
+#define __NR_fstat __NR3264_fstat
+#define __NR_mmap __NR3264_mmap
+#define __NR_fadvise64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat __NR3264_stat
+#define __NR_lstat __NR3264_lstat
+#endif
+#else
+#define __NR_fcntl64 __NR3264_fcntl
+#define __NR_statfs64 __NR3264_statfs
+#define __NR_fstatfs64 __NR3264_fstatfs
+#define __NR_truncate64 __NR3264_truncate
+#define __NR_ftruncate64 __NR3264_truncate
+#define __NR_llseek __NR3264_lseek
+#define __NR_sendfile64 __NR3264_sendfile
+#define __NR_fstatat64 __NR3264_fstatat
+#define __NR_fstat64 __NR3264_fstat
+#define __NR_mmap2 __NR3264_mmap
+#define __NR_fadvise64_64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat64 __NR3264_stat
+#define __NR_lstat64 __NR3264_lstat
+#endif
+#endif
+
+#ifdef __KERNEL__
+
+/*
+ * These are required system calls, we should
+ * invert the logic eventually and let them
+ * be selected by default.
+ */
+#if __BITS_PER_LONG == 32
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_LLSEEK
+#endif
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#ifndef cond_syscall
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_GENERIC_UNISTD_H */
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index 60d00d1..66ad375 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -14,6 +14,57 @@ cat << EOF
 #include <asm/types.h>
 #include <asm/unistd.h>
 
+/* *at */
+#define __IGNORE_open		/* openat */
+#define __IGNORE_link		/* linkat */
+#define __IGNORE_unlink		/* unlinkat */
+#define __IGNORE_mknod		/* mknodat */
+#define __IGNORE_chmod		/* fchmodat */
+#define __IGNORE_chown		/* fchownat */
+#define __IGNORE_mkdir		/* mkdirat */
+#define __IGNORE_rmdir		/* unlinkat */
+#define __IGNORE_lchown		/* fchownat */
+#define __IGNORE_access		/* faccessat */
+#define __IGNORE_rename		/* renameat */
+#define __IGNORE_readlink	/* readlinkat */
+#define __IGNORE_symlink	/* symlinkat */
+#define __IGNORE_utimes		/* futimesat */
+#if BITS_PER_LONG == 64
+#define __IGNORE_stat		/* fstatat */
+#define __IGNORE_lstat		/* fstatat */
+#else
+#define __IGNORE_stat64		/* fstatat64 */
+#define __IGNORE_lstat64	/* fstatat64 */
+#endif
+
+/* CLOEXEC flag */
+#define __IGNORE_pipe		/* pipe2 */
+#define __IGNORE_dup2		/* dup3 */
+#define __IGNORE_epoll_create	/* epoll_create1 */
+#define __IGNORE_inotify_init	/* inotify_init1 */
+#define __IGNORE_eventfd	/* eventfd2 */
+#define __IGNORE_signalfd	/* signalfd4 */
+
+/* MMU */
+#ifndef CONFIG_MMU
+#define __IGNORE_madvise
+#define __IGNORE_mbind
+#define __IGNORE_mincore
+#define __IGNORE_mlock
+#define __IGNORE_mlockall
+#define __IGNORE_munlock
+#define __IGNORE_munlockall
+#define __IGNORE_mprotect
+#define __IGNORE_msync
+#define __IGNORE_migrate_pages
+#define __IGNORE_move_pages
+#define __IGNORE_remap_file_pages
+#define __IGNORE_get_mempolicy
+#define __IGNORE_set_mempolicy
+#define __IGNORE_swapoff
+#define __IGNORE_swapon
+#endif
+
 /* System calls for 32-bit kernels only */
 #if BITS_PER_LONG == 64
 #define __IGNORE_sendfile64
@@ -27,6 +78,22 @@ cat << EOF
 #define __IGNORE_fstatat64
 #define __IGNORE_fstatfs64
 #define __IGNORE_statfs64
+#define __IGNORE_llseek
+#define __IGNORE_mmap2
+#else
+#define __IGNORE_sendfile
+#define __IGNORE_ftruncate
+#define __IGNORE_truncate
+#define __IGNORE_stat
+#define __IGNORE_lstat
+#define __IGNORE_fstat
+#define __IGNORE_fcntl
+#define __IGNORE_fadvise64
+#define __IGNORE_newfstatat
+#define __IGNORE_fstatfs
+#define __IGNORE_statfs
+#define __IGNORE_lseek
+#define __IGNORE_mmap
 #endif
 
 /* i386-specific or historical system calls */
@@ -44,7 +111,6 @@ cat << EOF
 #define __IGNORE_idle
 #define __IGNORE_modify_ldt
 #define __IGNORE_ugetrlimit
-#define __IGNORE_mmap2
 #define __IGNORE_vm86
 #define __IGNORE_vm86old
 #define __IGNORE_set_thread_area
@@ -55,7 +121,6 @@ cat << EOF
 #define __IGNORE_oldlstat
 #define __IGNORE_oldolduname
 #define __IGNORE_olduname
-#define __IGNORE_umount2
 #define __IGNORE_umount
 #define __IGNORE_waitpid
 #define __IGNORE_stime
@@ -75,9 +140,12 @@ cat << EOF
 #define __IGNORE__llseek
 #define __IGNORE__newselect
 #define __IGNORE_create_module
-#define __IGNORE_delete_module
 #define __IGNORE_query_module
 #define __IGNORE_get_kernel_syms
+#define __IGNORE_sysfs
+#define __IGNORE_uselib
+#define __IGNORE__sysctl
+
 /* ... including the "new" 32-bit uid syscalls */
 #define __IGNORE_lchown32
 #define __IGNORE_getuid32
@@ -99,6 +167,24 @@ cat << EOF
 #define __IGNORE_setfsuid32
 #define __IGNORE_setfsgid32
 
+/* these can be expressed using other calls */
+#define __IGNORE_alarm		/* setitimer */
+#define __IGNORE_creat		/* open */
+#define __IGNORE_fork		/* clone */
+#define __IGNORE_futimesat	/* utimensat */
+#define __IGNORE_getpgrp	/* getpgid */
+#define __IGNORE_getdents	/* getdents64 */
+#define __IGNORE_pause		/* sigsuspend */
+#define __IGNORE_poll		/* ppoll */
+#define __IGNORE_select		/* pselect6 */
+#define __IGNORE_epoll_wait	/* epoll_pwait */
+#define __IGNORE_time		/* gettimeofday */
+#define __IGNORE_uname		/* newuname */
+#define __IGNORE_ustat		/* statfs */
+#define __IGNORE_utime		/* utimes */
+#define __IGNORE_vfork		/* clone */
+#define __IGNORE_wait4		/* waitid */
+
 /* sync_file_range had a stupid ABI. Allow sync_file_range2 instead */
 #ifdef __NR_sync_file_range2
 #define __IGNORE_sync_file_range
-- 
cgit v0.10.2


From 72099ed2719fc5829bd79c6ca9d1783ed026eb37 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:29 +0000
Subject: asm-generic: rename atomic.h to atomic-long.h

The existing asm-generic/atomic.h only defines the
atomic_long type. This renames it to atomic-long.h
so we have a place to add a truly generic atomic.h
that can be used on all non-SMP systems.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 62b3635..610dff4 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -256,5 +256,5 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 16b52f3..9e07fe5 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -249,6 +249,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif
 #endif
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 3188151..b131c27 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -196,6 +196,6 @@ static inline int atomic_sub_if_positive(int i, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /*  __ASM_AVR32_ATOMIC_H */
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 94b2a9b..7bbf44e 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -208,6 +208,6 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
 #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif				/* __ARCH_BLACKFIN_ATOMIC __ */
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
index 5718dd8..a6aca81 100644
--- a/arch/cris/include/asm/atomic.h
+++ b/arch/cris/include/asm/atomic.h
@@ -158,5 +158,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()    barrier()
 #define smp_mb__after_atomic_inc()     barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 296c35c..0409d98 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -194,5 +194,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 833186c..33c8c0f 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -141,5 +141,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, unsigned long *v)
 #define smp_mb__before_atomic_inc()    barrier()
 #define smp_mb__after_atomic_inc() barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index d37292b..88405cb 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -216,5 +216,5 @@ atomic64_add_negative (__s64 i, atomic64_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 2eed30f..63f0cf0 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -314,5 +314,5 @@ static __inline__ void atomic_set_mask(unsigned long  mask, atomic_t *addr)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif	/* _ASM_M32R_ATOMIC_H */
diff --git a/arch/m68k/include/asm/atomic_mm.h b/arch/m68k/include/asm/atomic_mm.h
index eb0ab9d..88b7af2 100644
--- a/arch/m68k/include/asm/atomic_mm.h
+++ b/arch/m68k/include/asm/atomic_mm.h
@@ -192,5 +192,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/m68k/include/asm/atomic_no.h b/arch/m68k/include/asm/atomic_no.h
index 6bb6748..5674cb9 100644
--- a/arch/m68k/include/asm/atomic_no.h
+++ b/arch/m68k/include/asm/atomic_no.h
@@ -151,5 +151,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ARCH_M68KNOMMU_ATOMIC __ */
diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index a448d94..0de612a 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -118,6 +118,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 1b332e1..eb7f01c 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -793,6 +793,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define smp_mb__before_atomic_inc()	smp_llsc_mb()
 #define smp_mb__after_atomic_inc()	smp_llsc_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index bc06482..5bf5be9 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -151,7 +151,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index ada3e53..7eeaff9 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -338,6 +338,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #endif /* CONFIG_64BIT */
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* _ASM_PARISC_ATOMIC_H_ */
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index b401950..b7d2d07 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -472,6 +472,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #endif /* __powerpc64__ */
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index de432f2..fca9dff 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -275,6 +275,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v,
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __KERNEL__ */
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 6327ffb..a5647d0 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -84,5 +84,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index bb91b12..f0d343c 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -161,5 +161,5 @@ static inline int __atomic24_sub(int i, atomic24_t *v)
 
 #endif /* !(__KERNEL__) */
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index a0a7064..f2e4800 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -114,5 +114,5 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fb..c83d314 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 8c21731..0d63602 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -455,5 +455,5 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_X86_ATOMIC_64_H */
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 67ad67b..22d6dde 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -292,7 +292,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
new file mode 100644
index 0000000..76e27d6
--- /dev/null
+++ b/include/asm-generic/atomic-long.h
@@ -0,0 +1,258 @@
+#ifndef _ASM_GENERIC_ATOMIC_LONG_H
+#define _ASM_GENERIC_ATOMIC_LONG_H
+/*
+ * Copyright (C) 2005 Silicon Graphics, Inc.
+ *	Christoph Lameter
+ *
+ * Allows to provide arch independent atomic definitions without the need to
+ * edit all arch specific atomic.h files.
+ */
+
+#include <asm/types.h>
+
+/*
+ * Suppport for atomic_long_t
+ *
+ * Casts for parameters are avoided for existing atomic functions in order to
+ * avoid issues with cast-as-lval under gcc 4.x and other limitations that the
+ * macros of a platform may have.
+ */
+
+#if BITS_PER_LONG == 64
+
+typedef atomic64_t atomic_long_t;
+
+#define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
+
+static inline long atomic_long_read(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_read(v);
+}
+
+static inline void atomic_long_set(atomic_long_t *l, long i)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_set(v, i);
+}
+
+static inline void atomic_long_inc(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_inc(v);
+}
+
+static inline void atomic_long_dec(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_dec(v);
+}
+
+static inline void atomic_long_add(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_add(i, v);
+}
+
+static inline void atomic_long_sub(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_sub(i, v);
+}
+
+static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_sub_and_test(i, v);
+}
+
+static inline int atomic_long_dec_and_test(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_dec_and_test(v);
+}
+
+static inline int atomic_long_inc_and_test(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_inc_and_test(v);
+}
+
+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_add_negative(i, v);
+}
+
+static inline long atomic_long_add_return(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_add_return(i, v);
+}
+
+static inline long atomic_long_sub_return(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_sub_return(i, v);
+}
+
+static inline long atomic_long_inc_return(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_inc_return(v);
+}
+
+static inline long atomic_long_dec_return(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_dec_return(v);
+}
+
+static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_add_unless(v, a, u);
+}
+
+#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
+
+#define atomic_long_cmpxchg(l, old, new) \
+	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
+#define atomic_long_xchg(v, new) \
+	(atomic64_xchg((atomic64_t *)(l), (new)))
+
+#else  /*  BITS_PER_LONG == 64  */
+
+typedef atomic_t atomic_long_t;
+
+#define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
+static inline long atomic_long_read(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_read(v);
+}
+
+static inline void atomic_long_set(atomic_long_t *l, long i)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_set(v, i);
+}
+
+static inline void atomic_long_inc(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_inc(v);
+}
+
+static inline void atomic_long_dec(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_dec(v);
+}
+
+static inline void atomic_long_add(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_add(i, v);
+}
+
+static inline void atomic_long_sub(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_sub(i, v);
+}
+
+static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_sub_and_test(i, v);
+}
+
+static inline int atomic_long_dec_and_test(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_dec_and_test(v);
+}
+
+static inline int atomic_long_inc_and_test(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_inc_and_test(v);
+}
+
+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_add_negative(i, v);
+}
+
+static inline long atomic_long_add_return(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_add_return(i, v);
+}
+
+static inline long atomic_long_sub_return(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_sub_return(i, v);
+}
+
+static inline long atomic_long_inc_return(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_inc_return(v);
+}
+
+static inline long atomic_long_dec_return(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_dec_return(v);
+}
+
+static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_add_unless(v, a, u);
+}
+
+#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
+
+#define atomic_long_cmpxchg(l, old, new) \
+	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
+#define atomic_long_xchg(v, new) \
+	(atomic_xchg((atomic_t *)(v), (new)))
+
+#endif  /*  BITS_PER_LONG == 64  */
+
+#endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
deleted file mode 100644
index 3673a13..0000000
--- a/include/asm-generic/atomic.h
+++ /dev/null
@@ -1,258 +0,0 @@
-#ifndef _ASM_GENERIC_ATOMIC_H
-#define _ASM_GENERIC_ATOMIC_H
-/*
- * Copyright (C) 2005 Silicon Graphics, Inc.
- *	Christoph Lameter
- *
- * Allows to provide arch independent atomic definitions without the need to
- * edit all arch specific atomic.h files.
- */
-
-#include <asm/types.h>
-
-/*
- * Suppport for atomic_long_t
- *
- * Casts for parameters are avoided for existing atomic functions in order to
- * avoid issues with cast-as-lval under gcc 4.x and other limitations that the
- * macros of a platform may have.
- */
-
-#if BITS_PER_LONG == 64
-
-typedef atomic64_t atomic_long_t;
-
-#define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
-
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(l), (new)))
-
-#else  /*  BITS_PER_LONG == 64  */
-
-typedef atomic_t atomic_long_t;
-
-#define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic_xchg((atomic_t *)(v), (new)))
-
-#endif  /*  BITS_PER_LONG == 64  */
-
-#endif  /*  _ASM_GENERIC_ATOMIC_H  */
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 4657f3e..c894646 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -2,6 +2,7 @@
 #define _ASM_GENERIC_BITOPS_ATOMIC_H_
 
 #include <asm/types.h>
+#include <asm/system.h>
 
 #ifdef CONFIG_SMP
 #include <asm/spinlock.h>
-- 
cgit v0.10.2


From 5b17e1cd8928ae65932758ce6478ac6d3e9a86b2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:30 +0000
Subject: asm-generic: rename page.h and uaccess.h

The current asm-generic/page.h only contains the get_order
function, and asm-generic/uaccess.h only implements
unaligned accesses. This renames the file to getorder.h
and uaccess-unaligned.h to make room for new page.h
and uaccess.h file that will be usable by all simple
(e.g. nommu) architectures.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 0995f9d1..07af062 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -93,6 +93,6 @@ typedef struct page *pgtable_t;
 					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ALPHA_PAGE_H */
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 7b52277..be962c1 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -202,6 +202,6 @@ typedef struct page *pgtable_t;
 	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif
diff --git a/arch/blackfin/include/asm/page.h b/arch/blackfin/include/asm/page.h
index 344f6a8..3ea2016 100644
--- a/arch/blackfin/include/asm/page.h
+++ b/arch/blackfin/include/asm/page.h
@@ -81,7 +81,7 @@ extern unsigned long memory_end;
 #define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
 				((void *)(kaddr) < (void *)memory_end))
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif				/* __ASSEMBLY__ */
 
diff --git a/arch/cris/include/asm/page.h b/arch/cris/include/asm/page.h
index f3fdbd0..be45ee3 100644
--- a/arch/cris/include/asm/page.h
+++ b/arch/cris/include/asm/page.h
@@ -68,7 +68,7 @@ typedef struct page *pgtable_t;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _CRIS_PAGE_H */
 
diff --git a/arch/frv/include/asm/page.h b/arch/frv/include/asm/page.h
index bd9c220..25c6a50 100644
--- a/arch/frv/include/asm/page.h
+++ b/arch/frv/include/asm/page.h
@@ -73,6 +73,6 @@ extern unsigned long max_pfn;
 #endif /* __ASSEMBLY__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_PAGE_H */
diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h
index 0b6acf0..837381a 100644
--- a/arch/h8300/include/asm/page.h
+++ b/arch/h8300/include/asm/page.h
@@ -73,6 +73,6 @@ extern unsigned long memory_end;
 #endif /* __ASSEMBLY__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _H8300_PAGE_H */
diff --git a/arch/m32r/include/asm/page.h b/arch/m32r/include/asm/page.h
index c933308..11777f7 100644
--- a/arch/m32r/include/asm/page.h
+++ b/arch/m32r/include/asm/page.h
@@ -82,6 +82,6 @@ typedef struct page *pgtable_t;
 #define devmem_is_allowed(x) 1
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_M32R_PAGE_H */
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index a34b8ba..d009f3e 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -223,6 +223,6 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _M68K_PAGE_H */
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 3a1ede4..9aa3f90 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -72,6 +72,6 @@ extern unsigned long memory_end;
 
 #endif /* __ASSEMBLY__ */
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _M68KNOMMU_PAGE_H */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 7238dcf..962c210 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -135,6 +135,6 @@ extern unsigned int memory_size;
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_MICROBLAZE_PAGE_H */
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 9f946e4..72c80d2 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -189,6 +189,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define CAC_ADDR(addr)		((addr) - UNCAC_BASE + PAGE_OFFSET)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_PAGE_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 7bc5125..a84cc1f 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -159,6 +159,6 @@ extern int npmem_ranges;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _PARISC_PAGE_H */
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index cd4c0b2..7cf799d 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,7 +7,7 @@
 #include <asm/page.h>
 #include <asm/system.h>
 #include <asm/cache.h>
-#include <asm-generic/uaccess.h>
+#include <asm-generic/uaccess-unaligned.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index a0e3f6e..bd0849d 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -41,7 +41,7 @@ extern void clear_pages(void *page, int order);
 static inline void clear_page(void *page) { clear_pages(page, 0); }
 extern void copy_page(void *to, void *from);
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #define PGD_T_LOG2	(__builtin_ffs(sizeof(pgd_t)) - 1)
 #define PTE_T_LOG2	(__builtin_ffs(sizeof(pte_t)) - 1)
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 043bfdf..5817a3b 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -180,6 +180,6 @@ do {						\
 	(test_thread_flag(TIF_32BIT) ? \
 	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_POWERPC_PAGE_64_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 32e8f6a..3e3594d 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -150,7 +150,7 @@ void arch_alloc_page(struct page *page, int order);
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #define __HAVE_ARCH_GATE_AREA 1
 
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 9c6d21e..49592c7 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -163,7 +163,7 @@ typedef struct page *pgtable_t;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 /* vDSO support */
 #ifdef CONFIG_VSYSCALL
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
index d1806ed..f72080b 100644
--- a/arch/sparc/include/asm/page_32.h
+++ b/arch/sparc/include/asm/page_32.h
@@ -152,6 +152,6 @@ extern unsigned long pfn_base;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _SPARC_PAGE_H */
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 4274ed1..f0d09b4 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -132,6 +132,6 @@ typedef struct page *pgtable_t;
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _SPARC64_PAGE_H */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index c64e767..a38c032 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -12,7 +12,7 @@
 #include <asm/asi.h>
 #include <asm/system.h>
 #include <asm/spitfire.h>
-#include <asm-generic/uaccess.h>
+#include <asm-generic/uaccess-unaligned.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 55f28a0..4cc9b6c 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -116,7 +116,7 @@ extern unsigned long uml_physmem;
 #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* __UM_PAGE_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 89ed9d7..625c3f0 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -56,7 +56,7 @@ extern bool __virt_addr_valid(unsigned long kaddr);
 #endif	/* __ASSEMBLY__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #define __HAVE_ARCH_GATE_AREA 1
 
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 17e0c53..161bb89 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -129,7 +129,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
 
 #else
 
-# include <asm-generic/page.h>
+# include <asm-generic/getorder.h>
 
 #endif
 
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
new file mode 100644
index 0000000..67e7245
--- /dev/null
+++ b/include/asm-generic/getorder.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_GENERIC_GETORDER_H
+#define __ASM_GENERIC_GETORDER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+
+/* Pure 2^n version of get_order */
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+	int order;
+
+	size = (size - 1) >> (PAGE_SHIFT - 1);
+	order = -1;
+	do {
+		size >>= 1;
+		order++;
+	} while (size);
+	return order;
+}
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* __ASM_GENERIC_GETORDER_H */
diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
deleted file mode 100644
index 14db733..0000000
--- a/include/asm-generic/page.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _ASM_GENERIC_PAGE_H
-#define _ASM_GENERIC_PAGE_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/compiler.h>
-
-/* Pure 2^n version of get_order */
-static __inline__ __attribute_const__ int get_order(unsigned long size)
-{
-	int order;
-
-	size = (size - 1) >> (PAGE_SHIFT - 1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
-
-#endif	/* __ASSEMBLY__ */
-
-#endif	/* _ASM_GENERIC_PAGE_H */
diff --git a/include/asm-generic/uaccess-unaligned.h b/include/asm-generic/uaccess-unaligned.h
new file mode 100644
index 0000000..67deb89
--- /dev/null
+++ b/include/asm-generic/uaccess-unaligned.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_GENERIC_UACCESS_UNALIGNED_H
+#define __ASM_GENERIC_UACCESS_UNALIGNED_H
+
+/*
+ * This macro should be used instead of __get_user() when accessing
+ * values at locations that are not known to be aligned.
+ */
+#define __get_user_unaligned(x, ptr)					\
+({									\
+	__typeof__ (*(ptr)) __x;					\
+	__copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0;	\
+	(x) = __x;							\
+})
+
+
+/*
+ * This macro should be used instead of __put_user() when accessing
+ * values at locations that are not known to be aligned.
+ */
+#define __put_user_unaligned(x, ptr)					\
+({									\
+	__typeof__ (*(ptr)) __x = (x);					\
+	__copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0;	\
+})
+
+#endif /* __ASM_GENERIC_UACCESS_UNALIGNED_H */
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
deleted file mode 100644
index 549cb3a..0000000
--- a/include/asm-generic/uaccess.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _ASM_GENERIC_UACCESS_H_
-#define _ASM_GENERIC_UACCESS_H_
-
-/*
- * This macro should be used instead of __get_user() when accessing
- * values at locations that are not known to be aligned.
- */
-#define __get_user_unaligned(x, ptr)					\
-({									\
-	__typeof__ (*(ptr)) __x;					\
-	__copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0;	\
-	(x) = __x;							\
-})
-
-
-/*
- * This macro should be used instead of __put_user() when accessing
- * values at locations that are not known to be aligned.
- */
-#define __put_user_unaligned(x, ptr)					\
-({									\
-	__typeof__ (*(ptr)) __x = (x);					\
-	__copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0;	\
-})
-
-#endif /* _ASM_GENERIC_UACCESS_H */
-- 
cgit v0.10.2


From 3aef392822e1a42e80077a332ea2efdfd8a4248a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 17 May 2009 21:00:05 +0000
Subject: asm-generic: make get_rtc_time overridable

Evidently, set_rtc_time is supposed to be overridable
by architectures that define their own version, but
unfortunately, get_rtc_ss would in that case still
use the generic version.

This makes get_rtc_ss call the real set_rtc_time
to let architectures define their own version.
The change should fix the "Extended RTC operation"
on Alpha, which uses the incorrect get_rtc_ss
call. It also allows PowerPC to use the asm-generic/rtc.h
file in the future.

Cc: Richard Henderson <rth@twiddle.net>
Cc: linux-alpha@vger.kernel.org
Cc: Tom Rini <trini@mvista.com>
Cc: rtc-linux@googlegroups.com
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 763e3b0..fa86f24 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -202,7 +202,7 @@ static inline unsigned int get_rtc_ss(void)
 {
 	struct rtc_time h;
 
-	__get_rtc_time(&h);
+	get_rtc_time(&h);
 	return h.tm_sec;
 }
 
-- 
cgit v0.10.2


From d7c4f1b78afeedfc22b1756fcdc1acbe84284d74 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:31 +0000
Subject: asm-generic: make pci.h usable directly

Some generic code is using the horribly misnamed PCI_DMA_BUS_IS_PHYS
from asm/pci.h. This makes sure that an architecture without PCI
support does not have to define this itself but can rely on the
asm-generic version.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 585d9b4..3ce227b 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -10,8 +10,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef ASM_PCI_H
-#define	ASM_PCI_H
+#ifndef _ASM_FRV_PCI_H
+#define _ASM_FRV_PCI_H
 
 #include <linux/mm.h>
 #include <asm/scatterlist.h>
@@ -43,12 +43,6 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
 /* Return the index of the PCI controller for device PDEV. */
 #define pci_controller_num(PDEV)	(0)
 
-/* The PCI address space does equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 /* pci_unmap_{page,single} is a nop so... */
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
 #define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
@@ -114,5 +108,4 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
 				    sg_dma_address(&sg[i])+sg_dma_len(&sg[i]));
 }
 
-
-#endif
+#endif /* _ASM_FRV_PCI_H */
diff --git a/arch/m32r/include/asm/pci.h b/arch/m32r/include/asm/pci.h
index fe785d1..07d3834 100644
--- a/arch/m32r/include/asm/pci.h
+++ b/arch/m32r/include/asm/pci.h
@@ -3,6 +3,4 @@
 
 #include <asm-generic/pci.h>
 
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 #endif /* _ASM_M32R_PCI_H */
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index c36a77d..515c6e2 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -52,4 +52,12 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 }
 #endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */
 
+/*
+ * By default, assume that no iommu is in use and that the PCI
+ * space is mapped to address physical 0.
+ */
+#ifndef PCI_DMA_BUS_IS_PHYS
+#define PCI_DMA_BUS_IS_PHYS	(1)
 #endif
+
+#endif /* _ASM_GENERIC_PCI_H */
-- 
cgit v0.10.2


From 9858c60cc2d33b18367b2bc6947e3ea23db26ccb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:32 +0000
Subject: asm-generic: make bitops.h usable

bitops.h apparently suffered from some level of bitrot, it
was missing the smp_mb__{before,after}_clear_bit functions,
and included other headers in an invalid order.

This changes the file so that new architectures can use
it out of the box.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index c9f369c..a54f442 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -1,19 +1,29 @@
-#ifndef _ASM_GENERIC_BITOPS_H_
-#define _ASM_GENERIC_BITOPS_H_
+#ifndef __ASM_GENERIC_BITOPS_H
+#define __ASM_GENERIC_BITOPS_H
 
 /*
  * For the benefit of those who are trying to port Linux to another
  * architecture, here are some C-language equivalents.  You should
  * recode these in the native assembly language, if at all possible.
- * 
+ *
  * C language equivalents written by Theodore Ts'o, 9/26/92
  */
 
-#include <asm-generic/bitops/atomic.h>
-#include <asm-generic/bitops/non-atomic.h>
+#include <linux/irqflags.h>
+#include <linux/compiler.h>
+
+/*
+ * clear_bit may not imply a memory barrier
+ */
+#ifndef smp_mb__before_clear_bit
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
+#endif
+
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/find.h>
 
@@ -26,8 +36,10 @@
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/lock.h>
 
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
 #include <asm-generic/bitops/ext2-atomic.h>
 #include <asm-generic/bitops/minix.h>
 
-#endif /* _ASM_GENERIC_BITOPS_H */
+#endif /* __ASM_GENERIC_BITOPS_H */
-- 
cgit v0.10.2


From aafe4dbed0bf6cbdb2e9f03e1d42f8a540d8541d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:33 +0000
Subject: asm-generic: add generic versions of common headers

These are all kernel internal interfaces that get copied
around a lot. In most cases, architectures can provide
their own optimized versions, but these generic versions
can work as well.

I have tried to use the most common contents of each
header to allow existing architectures to migrate easily.

Thanks to Remis for suggesting a number of cleanups.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/bugs.h b/include/asm-generic/bugs.h
new file mode 100644
index 0000000..6c4f62e
--- /dev/null
+++ b/include/asm-generic/bugs.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_GENERIC_BUGS_H
+#define __ASM_GENERIC_BUGS_H
+/*
+ * This file is included by 'init/main.c' to check for
+ * architecture-dependent bugs.
+ */
+
+static inline void check_bugs(void) { }
+
+#endif	/* __ASM_GENERIC_BUGS_H */
diff --git a/include/asm-generic/current.h b/include/asm-generic/current.h
new file mode 100644
index 0000000..5e86f6a
--- /dev/null
+++ b/include/asm-generic/current.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_CURRENT_H
+#define __ASM_GENERIC_CURRENT_H
+
+#include <linux/thread_info.h>
+
+#define get_current() (current_thread_info()->task)
+#define current get_current()
+
+#endif /* __ASM_GENERIC_CURRENT_H */
diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
new file mode 100644
index 0000000..4586fec
--- /dev/null
+++ b/include/asm-generic/delay.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_DELAY_H
+#define __ASM_GENERIC_DELAY_H
+
+extern void __udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) __udelay(n)
+
+#endif /* __ASM_GENERIC_DELAY_H */
diff --git a/include/asm-generic/fb.h b/include/asm-generic/fb.h
new file mode 100644
index 0000000..fe8ca7f
--- /dev/null
+++ b/include/asm-generic/fb.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_FB_H_
+#define __ASM_GENERIC_FB_H_
+#include <linux/fb.h>
+
+#define fb_pgprotect(...) do {} while (0)
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* __ASM_GENERIC_FB_H_ */
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
new file mode 100644
index 0000000..3d5d2c9
--- /dev/null
+++ b/include/asm-generic/hardirq.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_GENERIC_HARDIRQ_H
+#define __ASM_GENERIC_HARDIRQ_H
+
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned long __softirq_pending;
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+#ifndef HARDIRQ_BITS
+#define HARDIRQ_BITS	8
+#endif
+
+/*
+ * The hardirq mask has to be large enough to have
+ * space for potentially all IRQ sources in the system
+ * nesting on a single CPU:
+ */
+#if (1 << HARDIRQ_BITS) < NR_IRQS
+# error HARDIRQ_BITS is too low!
+#endif
+
+#ifndef ack_bad_irq
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#endif /* __ASM_GENERIC_HARDIRQ_H */
diff --git a/include/asm-generic/irq.h b/include/asm-generic/irq.h
new file mode 100644
index 0000000..b90ec0b
--- /dev/null
+++ b/include/asm-generic/irq.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_GENERIC_IRQ_H
+#define __ASM_GENERIC_IRQ_H
+
+/*
+ * NR_IRQS is the upper bound of how many interrupts can be handled
+ * in the platform. It is used to size the static irq_map array,
+ * so don't make it too big.
+ */
+#ifndef NR_IRQS
+#define NR_IRQS 64
+#endif
+
+static inline int irq_canonicalize(int irq)
+{
+	return irq;
+}
+
+#endif /* __ASM_GENERIC_IRQ_H */
diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h
new file mode 100644
index 0000000..9aebf61
--- /dev/null
+++ b/include/asm-generic/irqflags.h
@@ -0,0 +1,72 @@
+#ifndef __ASM_GENERIC_IRQFLAGS_H
+#define __ASM_GENERIC_IRQFLAGS_H
+
+/*
+ * All architectures should implement at least the first two functions,
+ * usually inline assembly will be the best way.
+ */
+#ifndef RAW_IRQ_DISABLED
+#define RAW_IRQ_DISABLED 0
+#define RAW_IRQ_ENABLED 1
+#endif
+
+/* read interrupt enabled status */
+#ifndef __raw_local_save_flags
+unsigned long __raw_local_save_flags(void);
+#endif
+
+/* set interrupt enabled status */
+#ifndef raw_local_irq_restore
+void raw_local_irq_restore(unsigned long flags);
+#endif
+
+/* get status and disable interrupts */
+#ifndef __raw_local_irq_save
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags;
+	flags = __raw_local_save_flags();
+	raw_local_irq_restore(RAW_IRQ_DISABLED);
+	return flags;
+}
+#endif
+
+/* test flags */
+#ifndef raw_irqs_disabled_flags
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return flags == RAW_IRQ_DISABLED;
+}
+#endif
+
+/* unconditionally enable interrupts */
+#ifndef raw_local_irq_enable
+static inline void raw_local_irq_enable(void)
+{
+	raw_local_irq_restore(RAW_IRQ_ENABLED);
+}
+#endif
+
+/* unconditionally disable interrupts */
+#ifndef raw_local_irq_disable
+static inline void raw_local_irq_disable(void)
+{
+	raw_local_irq_restore(RAW_IRQ_DISABLED);
+}
+#endif
+
+/* test hardware interrupt enable bit */
+#ifndef raw_irqs_disabled
+static inline int raw_irqs_disabled(void)
+{
+	return raw_irqs_disabled_flags(__raw_local_save_flags());
+}
+#endif
+
+#define raw_local_save_flags(flags) \
+	do { (flags) = __raw_local_save_flags(); } while (0)
+
+#define raw_local_irq_save(flags) \
+	do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASM_GENERIC_IRQFLAGS_H */
diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h
new file mode 100644
index 0000000..58c3305
--- /dev/null
+++ b/include/asm-generic/kmap_types.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_GENERIC_KMAP_TYPES_H
+#define _ASM_GENERIC_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0)	KM_BOUNCE_READ,
+D(1)	KM_SKB_SUNRPC_DATA,
+D(2)	KM_SKB_DATA_SOFTIRQ,
+D(3)	KM_USER0,
+D(4)	KM_USER1,
+D(5)	KM_BIO_SRC_IRQ,
+D(6)	KM_BIO_DST_IRQ,
+D(7)	KM_PTE0,
+D(8)	KM_PTE1,
+D(9)	KM_IRQ0,
+D(10)	KM_IRQ1,
+D(11)	KM_SOFTIRQ0,
+D(12)	KM_SOFTIRQ1,
+D(13)	KM_SYNC_ICACHE,
+D(14)	KM_SYNC_DCACHE,
+D(15)	KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
+D(16)	KM_TYPE_NR
+};
+
+#undef D
+
+#endif
diff --git a/include/asm-generic/linkage.h b/include/asm-generic/linkage.h
new file mode 100644
index 0000000..fef7a01
--- /dev/null
+++ b/include/asm-generic/linkage.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_GENERIC_LINKAGE_H
+#define __ASM_GENERIC_LINKAGE_H
+/*
+ * linux/linkage.h provides reasonable defaults.
+ * an architecture can override them by providing its own version.
+ */
+
+#endif /* __ASM_GENERIC_LINKAGE_H */
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
new file mode 100644
index 0000000..ed5b44d
--- /dev/null
+++ b/include/asm-generic/module.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_MODULE_H
+#define __ASM_GENERIC_MODULE_H
+
+/*
+ * Many architectures just need a simple module
+ * loader without arch specific data.
+ */
+struct mod_arch_specific
+{
+};
+
+#ifdef CONFIG_64BIT
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
+#else
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+#endif
+
+#endif /* __ASM_GENERIC_MODULE_H */
diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h
new file mode 100644
index 0000000..fe91ab5
--- /dev/null
+++ b/include/asm-generic/mutex.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_MUTEX_H
+#define __ASM_GENERIC_MUTEX_H
+/*
+ * Pull in the generic implementation for the mutex fastpath,
+ * which is a reasonable default on many architectures.
+ */
+
+#include <asm-generic/mutex-dec.h>
+#endif /* __ASM_GENERIC_MUTEX_H */
diff --git a/include/asm-generic/scatterlist.h b/include/asm-generic/scatterlist.h
new file mode 100644
index 0000000..8b94544
--- /dev/null
+++ b/include/asm-generic/scatterlist.h
@@ -0,0 +1,43 @@
+#ifndef __ASM_GENERIC_SCATTERLIST_H
+#define __ASM_GENERIC_SCATTERLIST_H
+
+#include <linux/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+	unsigned int	dma_length;
+};
+
+/*
+ * These macros should be used after a dma_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns, or alternatively stop on the first sg_dma_len(sg) which
+ * is 0.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#ifndef sg_dma_len
+/*
+ * Normally, you have an iommu on 64 bit machines, but not on 32 bit
+ * machines. Architectures that are differnt should override this.
+ */
+#if __BITS_PER_LONG == 64
+#define sg_dma_len(sg)		((sg)->dma_length)
+#else
+#define sg_dma_len(sg)		((sg)->length)
+#endif /* 64 bit */
+#endif /* sg_dma_len */
+
+#ifndef ISA_DMA_THRESHOLD
+#define ISA_DMA_THRESHOLD	(~0UL)
+#endif
+
+#define ARCH_HAS_SG_CHAIN
+
+#endif /* __ASM_GENERIC_SCATTERLIST_H */
diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
new file mode 100644
index 0000000..1547a03
--- /dev/null
+++ b/include/asm-generic/spinlock.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_GENERIC_SPINLOCK_H
+#define __ASM_GENERIC_SPINLOCK_H
+/*
+ * You need to implement asm/spinlock.h for SMP support. The generic
+ * version does not handle SMP.
+ */
+#ifdef CONFIG_SMP
+#error need an architecture specific asm/spinlock.h
+#endif
+
+#endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/string.h b/include/asm-generic/string.h
new file mode 100644
index 0000000..de5e020
--- /dev/null
+++ b/include/asm-generic/string.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_GENERIC_STRING_H
+#define __ASM_GENERIC_STRING_H
+/*
+ * The kernel provides all required functions in lib/string.c
+ *
+ * Architectures probably want to provide at least their own optimized
+ * memcpy and memset functions though.
+ */
+
+#endif /* __ASM_GENERIC_STRING_H */
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
new file mode 100644
index 0000000..df84e3b
--- /dev/null
+++ b/include/asm-generic/syscalls.h
@@ -0,0 +1,60 @@
+#ifndef __ASM_GENERIC_SYSCALLS_H
+#define __ASM_GENERIC_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+
+/*
+ * Calling conventions for these system calls can differ, so
+ * it's possible to override them.
+ */
+#ifndef sys_clone
+asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
+			void __user *parent_tid, void __user *child_tid,
+			struct pt_regs *regs);
+#endif
+
+#ifndef sys_fork
+asmlinkage long sys_fork(struct pt_regs *regs);
+#endif
+
+#ifndef sys_vfork
+asmlinkage long sys_vfork(struct pt_regs *regs);
+#endif
+
+#ifndef sys_execve
+asmlinkage long sys_execve(char __user *filename, char __user * __user *argv,
+			char __user * __user *envp, struct pt_regs *regs);
+#endif
+
+#ifndef sys_mmap2
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			unsigned long fd, unsigned long pgoff);
+#endif
+
+#ifndef sys_mmap
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			unsigned long fd, off_t pgoff);
+#endif
+
+#ifndef sys_sigaltstack
+asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+			struct pt_regs *);
+#endif
+
+#ifndef sys_rt_sigreturn
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs);
+#endif
+
+#ifndef sys_rt_sigsuspend
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
+#endif
+
+#ifndef sys_rt_sigaction
+asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
+			 struct sigaction __user *oact, size_t sigsetsize);
+#endif
+
+#endif /* __ASM_GENERIC_SYSCALLS_H */
diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h
new file mode 100644
index 0000000..efa403b
--- /dev/null
+++ b/include/asm-generic/system.h
@@ -0,0 +1,161 @@
+/* Generic system definitions, based on MN10300 definitions.
+ *
+ * It should be possible to use these on really simple architectures,
+ * but it serves more as a starting point for new ports.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_SYSTEM_H
+#define __ASM_GENERIC_SYSTEM_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/irqflags.h>
+
+#include <asm/cmpxchg-local.h>
+
+struct task_struct;
+
+/* context switching is now performed out-of-line in switch_to.S */
+extern struct task_struct *__switch_to(struct task_struct *,
+		struct task_struct *);
+#define switch_to(prev, next, last)					\
+	do {								\
+		((last) = __switch_to((prev), (next)));			\
+	} while (0)
+
+#define arch_align_stack(x) (x)
+
+#define nop() asm volatile ("nop")
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * This implementation only contains a compiler barrier.
+ */
+
+#define mb()	asm volatile ("": : :"memory")
+#define rmb()	mb()
+#define wmb()	asm volatile ("": : :"memory")
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#endif
+
+#define set_mb(var, value)  do { var = value;  mb(); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+#define read_barrier_depends()		do {} while (0)
+#define smp_read_barrier_depends()	do {} while (0)
+
+/*
+ * we make sure local_irq_enable() doesn't cause priority inversion
+ */
+#ifndef __ASSEMBLY__
+
+/* This function doesn't exist, so you'll get a linker error
+ *    if something tries to do an invalid xchg().  */
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline
+unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
+{
+	unsigned long ret, flags;
+
+	switch (size) {
+	case 1:
+#ifdef __xchg_u8
+		return __xchg_u8(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u8 *)ptr;
+		*(volatile u8 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u8 */
+
+	case 2:
+#ifdef __xchg_u16
+		return __xchg_u16(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u16 *)ptr;
+		*(volatile u16 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u16 */
+
+	case 4:
+#ifdef __xchg_u32
+		return __xchg_u32(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u32 *)ptr;
+		*(volatile u32 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u32 */
+
+#ifdef CONFIG_64BIT
+	case 8:
+#ifdef __xchg_u64
+		return __xchg_u64(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u64 *)ptr;
+		*(volatile u64 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u64 */
+#endif /* CONFIG_64BIT */
+
+	default:
+		__xchg_called_with_bad_pointer();
+		return x;
+	}
+}
+
+#define xchg(ptr, x) \
+	((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+
+static inline unsigned long __cmpxchg(volatile unsigned long *m,
+				      unsigned long old, unsigned long new)
+{
+	unsigned long retval;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	retval = *m;
+	if (retval == old)
+		*m = new;
+	local_irq_restore(flags);
+	return retval;
+}
+
+#define cmpxchg(ptr, o, n)					\
+	((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \
+					(unsigned long)(o),	\
+					(unsigned long)(n)))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_GENERIC_SYSTEM_H */
diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
new file mode 100644
index 0000000..03cf593
--- /dev/null
+++ b/include/asm-generic/unaligned.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_GENERIC_UNALIGNED_H
+#define __ASM_GENERIC_UNALIGNED_H
+
+/*
+ * This is the most generic implementation of unaligned accesses
+ * and should work almost anywhere.
+ *
+ * If an architecture can handle unaligned accesses in hardware,
+ * it may want to use the linux/unaligned/access_ok.h implementation
+ * instead.
+ */
+#include <asm/byteorder.h>
+
+#if defined(__LITTLE_ENDIAN)
+# include <linux/unaligned/le_struct.h>
+# include <linux/unaligned/be_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned	__get_unaligned_le
+# define put_unaligned	__put_unaligned_le
+#elif defined(__BIG_ENDIAN)
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned	__get_unaligned_be
+# define put_unaligned	__put_unaligned_be
+#else
+# error need to define endianess
+#endif
+
+#endif /* __ASM_GENERIC_UNALIGNED_H */
diff --git a/include/asm-generic/user.h b/include/asm-generic/user.h
new file mode 100644
index 0000000..8b9c3c9
--- /dev/null
+++ b/include/asm-generic/user.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_GENERIC_USER_H
+#define __ASM_GENERIC_USER_H
+/*
+ * This file may define a 'struct user' structure. However, it it only
+ * used for a.out file, which are not supported on new architectures.
+ */
+
+#endif	/* __ASM_GENERIC_USER_H */
-- 
cgit v0.10.2


From ae49e807951d5e26767bc55d1dc29671c596c450 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:34 +0000
Subject: asm-generic: add legacy I/O header files

The dma.h, hw_irq.h, serial.h and timex.h files originally
described PC-style i8237, i8259A, i8250, i8253 and i8255 chips
as well as the VGA style text mode graphics.

Modern architectures live happily without these specific
interfaces, but a few definitions from these headers keep
getting used in common code.

The new generic headers are what most architectures use
anyway nowadays, just implementing the minimal definitions.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/dma.h b/include/asm-generic/dma.h
new file mode 100644
index 0000000..9dfc3a7f
--- /dev/null
+++ b/include/asm-generic/dma.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_GENERIC_DMA_H
+#define __ASM_GENERIC_DMA_H
+/*
+ * This file traditionally describes the i8237 PC style DMA controller.
+ * Most architectures don't have these any more and can get the minimal
+ * implementation from kernel/dma.c by not defining MAX_DMA_CHANNELS.
+ *
+ * Some code relies on seeing MAX_DMA_ADDRESS though.
+ */
+#define MAX_DMA_ADDRESS PAGE_OFFSET
+
+extern int request_dma(unsigned int dmanr, const char *device_id);
+extern void free_dma(unsigned int dmanr);
+
+#endif /* __ASM_GENERIC_DMA_H */
diff --git a/include/asm-generic/hw_irq.h b/include/asm-generic/hw_irq.h
new file mode 100644
index 0000000..89036d7
--- /dev/null
+++ b/include/asm-generic/hw_irq.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_HW_IRQ_H
+#define __ASM_GENERIC_HW_IRQ_H
+/*
+ * hw_irq.h has internal declarations for the low-level interrupt
+ * controller, like the original i8259A.
+ * In general, this is not needed for new architectures.
+ */
+
+#endif /* __ASM_GENERIC_HW_IRQ_H */
diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h
new file mode 100644
index 0000000..40528cb
--- /dev/null
+++ b/include/asm-generic/parport.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_GENERIC_PARPORT_H
+#define __ASM_GENERIC_PARPORT_H
+
+/*
+ * An ISA bus may have i8255 parallel ports at well-known
+ * locations in the I/O space, which are scanned by
+ * parport_pc_find_isa_ports.
+ *
+ * Without ISA support, the driver will only attach
+ * to devices on the PCI bus.
+ */
+
+static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
+static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
+{
+#ifdef CONFIG_ISA
+	return parport_pc_find_isa_ports(autoirq, autodma);
+#else
+	return 0;
+#endif
+}
+
+#endif /* __ASM_GENERIC_PARPORT_H */
diff --git a/include/asm-generic/serial.h b/include/asm-generic/serial.h
new file mode 100644
index 0000000..5e29109
--- /dev/null
+++ b/include/asm-generic/serial.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_GENERIC_SERIAL_H
+#define __ASM_GENERIC_SERIAL_H
+
+/*
+ * This should not be an architecture specific #define, oh well.
+ *
+ * Traditionally, it just describes i8250 and related serial ports
+ * that have this clock rate.
+ */
+
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* __ASM_GENERIC_SERIAL_H */
diff --git a/include/asm-generic/timex.h b/include/asm-generic/timex.h
new file mode 100644
index 0000000..b2243cb
--- /dev/null
+++ b/include/asm-generic/timex.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_TIMEX_H
+#define __ASM_GENERIC_TIMEX_H
+
+/*
+ * If you have a cycle counter, return the value here.
+ */
+typedef unsigned long cycles_t;
+#ifndef get_cycles
+static inline cycles_t get_cycles(void)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Architectures are encouraged to implement read_current_timer
+ * and define this in order to avoid the expensive delay loop
+ * calibration during boot.
+ */
+#undef ARCH_HAS_READ_CURRENT_TIMER
+
+#endif /* __ASM_GENERIC_TIMEX_H */
diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h
new file mode 100644
index 0000000..36c8ff5
--- /dev/null
+++ b/include/asm-generic/vga.h
@@ -0,0 +1,24 @@
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ */
+#ifndef __ASM_GENERIC_VGA_H
+#define __ASM_GENERIC_VGA_H
+
+/*
+ *	On most architectures that support VGA, we can just
+ *	recalculate addresses and then access the videoram
+ *	directly without any black magic.
+ *
+ *	Everyone else needs to ioremap the address and use
+ *	proper I/O accesses.
+ */
+#ifndef VGA_MAP_MEM
+#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
+#endif
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x, y) (*(y) = (x))
+
+#endif /* _ASM_GENERIC_VGA_H */
-- 
cgit v0.10.2


From 3f7e212df82ca0459d44c91d9e019efd1b5f936c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:35 +0000
Subject: asm-generic: add generic atomic.h and io.h

atomic.h and io.h are based on the mn10300 architecture,
which is already pretty generic and can be used by
other architectures that do not have hardware support
for atomic operations or out-of-order I/O access.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
new file mode 100644
index 0000000..c99c64d
--- /dev/null
+++ b/include/asm-generic/atomic.h
@@ -0,0 +1,165 @@
+/*
+ * Generic C implementation of atomic counter operations
+ * Originally implemented for MN10300.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_ATOMIC_H
+#define __ASM_GENERIC_ATOMIC_H
+
+#ifdef CONFIG_SMP
+#error not SMP safe
+#endif
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#ifdef __KERNEL__
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_read(v)	((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_set(v, i) (((v)->counter) = (i))
+
+#include <asm/system.h>
+
+/**
+ * atomic_add_return - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns the result
+ * Note that the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	unsigned long flags;
+	int temp;
+
+	local_irq_save(flags);
+	temp = v->counter;
+	temp += i;
+	v->counter = temp;
+	local_irq_restore(flags);
+
+	return temp;
+}
+
+/**
+ * atomic_sub_return - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns the result
+ * Note that the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	unsigned long flags;
+	int temp;
+
+	local_irq_save(flags);
+	temp = v->counter;
+	temp -= i;
+	v->counter = temp;
+	local_irq_restore(flags);
+
+	return temp;
+}
+
+static inline int atomic_add_negative(int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	atomic_add_return(i, v);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	atomic_sub_return(i, v);
+}
+
+static inline void atomic_inc(atomic_t *v)
+{
+	atomic_add_return(1, v);
+}
+
+static inline void atomic_dec(atomic_t *v)
+{
+	atomic_sub_return(1, v);
+}
+
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
+#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
+
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
+{
+	unsigned long flags;
+
+	mask = ~mask;
+	local_irq_save(flags);
+	*addr &= mask;
+	local_irq_restore(flags);
+}
+
+#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
+#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
+
+#define cmpxchg_local(ptr, o, n)				  	       \
+	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+			(unsigned long)(n), sizeof(*(ptr))))
+
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+
+/* Assume that atomic operations are already serializing */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#include <asm-generic/atomic-long.h>
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
new file mode 100644
index 0000000..bcee636
--- /dev/null
+++ b/include/asm-generic/io.h
@@ -0,0 +1,300 @@
+/* Generic I/O port emulation, based on MN10300 code
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_IO_H
+#define __ASM_GENERIC_IO_H
+
+#include <asm/page.h> /* I/O is all done through memory accesses */
+#include <asm/cacheflush.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_GENERIC_IOMAP
+#include <asm-generic/iomap.h>
+#endif
+
+#define mmiowb() do {} while (0)
+
+/*****************************************************************************/
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the simple architectures, we just read/write the
+ * memory location directly.
+ */
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	return *(const volatile u8 __force *) addr;
+}
+
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	return *(const volatile u16 __force *) addr;
+}
+
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	return *(const volatile u32 __force *) addr;
+}
+
+#define readb __raw_readb
+#define readw(addr) __le16_to_cpu(__raw_readw(addr))
+#define readl(addr) __le32_to_cpu(__raw_readl(addr))
+
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	*(volatile u8 __force *) addr = b;
+}
+
+static inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+	*(volatile u16 __force *) addr = b;
+}
+
+static inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+	*(volatile u32 __force *) addr = b;
+}
+
+#define writeb __raw_writeb
+#define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr)
+#define writel(b,addr) __raw_writel(__cpu_to_le32(b),addr)
+
+#ifdef CONFIG_64BIT
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	return *(const volatile u64 __force *) addr;
+}
+#define readq(addr) __le64_to_cpu(__raw_readq(addr))
+
+static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+	*(volatile u64 __force *) addr = b;
+}
+#define writeq(b,addr) __raw_writeq(__cpu_to_le64(b),addr)
+#endif
+
+/*****************************************************************************/
+/*
+ * traditional input/output functions
+ */
+
+static inline u8 inb(unsigned long addr)
+{
+	return readb((volatile void __iomem *) addr);
+}
+
+static inline u16 inw(unsigned long addr)
+{
+	return readw((volatile void __iomem *) addr);
+}
+
+static inline u32 inl(unsigned long addr)
+{
+	return readl((volatile void __iomem *) addr);
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+	writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+	writew(b, (volatile void __iomem *) addr);
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+	writel(b, (volatile void __iomem *) addr);
+}
+
+#define inb_p(addr)	inb(addr)
+#define inw_p(addr)	inw(addr)
+#define inl_p(addr)	inl(addr)
+#define outb_p(x, addr)	outb((x), (addr))
+#define outw_p(x, addr)	outw((x), (addr))
+#define outl_p(x, addr)	outl((x), (addr))
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u8 *buf = buffer;
+		do {
+			u8 x = inb(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u16 *buf = buffer;
+		do {
+			u16 x = inw(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u32 *buf = buffer;
+		do {
+			u32 x = inl(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u8 *buf = buffer;
+		do {
+			outb(*buf++, addr);
+		} while (--count);
+	}
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u16 *buf = buffer;
+		do {
+			outw(*buf++, addr);
+		} while (--count);
+	}
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u32 *buf = buffer;
+		do {
+			outl(*buf++, addr);
+		} while (--count);
+	}
+}
+
+#ifndef CONFIG_GENERIC_IOMAP
+#define ioread8(addr)		readb(addr)
+#define ioread16(addr)		readw(addr)
+#define ioread32(addr)		readl(addr)
+
+#define iowrite8(v, addr)	writeb((v), (addr))
+#define iowrite16(v, addr)	writew((v), (addr))
+#define iowrite32(v, addr)	writel((v), (addr))
+
+#define ioread8_rep(p, dst, count) \
+	insb((unsigned long) (p), (dst), (count))
+#define ioread16_rep(p, dst, count) \
+	insw((unsigned long) (p), (dst), (count))
+#define ioread32_rep(p, dst, count) \
+	insl((unsigned long) (p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) \
+	outsb((unsigned long) (p), (src), (count))
+#define iowrite16_rep(p, src, count) \
+	outsw((unsigned long) (p), (src), (count))
+#define iowrite32_rep(p, src, count) \
+	outsl((unsigned long) (p), (src), (count))
+#endif /* CONFIG_GENERIC_IOMAP */
+
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#ifdef __KERNEL__
+
+#include <linux/vmalloc.h>
+#define __io_virt(x) ((void __force *) (x))
+
+#ifndef CONFIG_GENERIC_IOMAP
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+struct pci_dev;
+extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
+{
+}
+#endif /* CONFIG_GENERIC_IOMAP */
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void *address)
+{
+	return __pa((unsigned long)address);
+}
+
+static inline void *phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size)
+{
+	return (void __iomem*) (unsigned long)offset;
+}
+
+#define __ioremap(offset, size, flags)	ioremap(offset, size)
+
+#ifndef ioremap_nocache
+#define ioremap_nocache ioremap
+#endif
+
+#ifndef ioremap_wc
+#define ioremap_wc ioremap_nocache
+#endif
+
+static inline void iounmap(void *addr)
+{
+}
+
+#ifndef CONFIG_GENERIC_IOMAP
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *) port;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+#else /* CONFIG_GENERIC_IOMAP */
+extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
+extern void ioport_unmap(void __iomem *p);
+#endif /* CONFIG_GENERIC_IOMAP */
+
+#define xlate_dev_kmem_ptr(p)	p
+#define xlate_dev_mem_ptr(p)	((void *) (p))
+
+#ifndef virt_to_bus
+static inline unsigned long virt_to_bus(volatile void *address)
+{
+	return ((unsigned long) address);
+}
+
+static inline void *bus_to_virt(unsigned long address)
+{
+	return (void *) address;
+}
+#endif
+
+#define memset_io(a, b, c)	memset(__io_virt(a), (b), (c))
+#define memcpy_fromio(a, b, c)	memcpy((a), __io_virt(b), (c))
+#define memcpy_toio(a, b, c)	memcpy(__io_virt(a), (b), (c))
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_GENERIC_IO_H */
-- 
cgit v0.10.2


From 5c01b46bb6bb8f2662573c05c87b5d68fa25af89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:36 +0000
Subject: asm-generic: add generic NOMMU versions of some headers

Memory management in generic is highly architecture specific,
but on NOMMU architectures, it is mostly trivial, so just
add a default implementation in asm-generic that applies
to all NOMMU architectures.

The two files cache.h and cacheflush.h can possibly also
be used by architectures that have an MMU but never require
flushing the cache or have cache lines larger than 32 bytes.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/cache.h b/include/asm-generic/cache.h
new file mode 100644
index 0000000..1bfcfe5
--- /dev/null
+++ b/include/asm-generic/cache.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_CACHE_H
+#define __ASM_GENERIC_CACHE_H
+/*
+ * 32 bytes appears to be the most common cache line size,
+ * so make that the default here. Architectures with larger
+ * cache lines need to provide their own cache.h.
+ */
+
+#define L1_CACHE_SHIFT		5
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#endif /* __ASM_GENERIC_CACHE_H */
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
new file mode 100644
index 0000000..ba4ec39
--- /dev/null
+++ b/include/asm-generic/cacheflush.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_CACHEFLUSH_H
+#define __ASM_CACHEFLUSH_H
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+
+/*
+ * The cache doesn't need to be flushed when TLB entries change when
+ * the cache is mapped to physical memory, not virtual memory
+ */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_range(start, end)		do { } while (0)
+#define flush_icache_page(vma,pg)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+#endif /* __ASM_CACHEFLUSH_H */
diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h
new file mode 100644
index 0000000..4f4aa56
--- /dev/null
+++ b/include/asm-generic/mmu.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_GENERIC_MMU_H
+#define __ASM_GENERIC_MMU_H
+
+/*
+ * This is the mmu.h header for nommu implementations.
+ * Architectures with an MMU need something more complex.
+ */
+#ifndef __ASSEMBLY__
+typedef struct {
+	struct vm_list_struct	*vmlist;
+	unsigned long		end_brk;
+} mm_context_t;
+#endif
+
+#endif /* __ASM_GENERIC_MMU_H */
diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h
new file mode 100644
index 0000000..a7eec91
--- /dev/null
+++ b/include/asm-generic/mmu_context.h
@@ -0,0 +1,45 @@
+#ifndef __ASM_GENERIC_MMU_CONTEXT_H
+#define __ASM_GENERIC_MMU_CONTEXT_H
+
+/*
+ * Generic hooks for NOMMU architectures, which do not need to do
+ * anything special here.
+ */
+
+#include <asm-generic/mm_hooks.h>
+
+struct task_struct;
+struct mm_struct;
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+			struct task_struct *tsk)
+{
+}
+
+static inline int init_new_context(struct task_struct *tsk,
+			struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+}
+
+static inline void deactivate_mm(struct task_struct *task,
+			struct mm_struct *mm)
+{
+}
+
+static inline void switch_mm(struct mm_struct *prev,
+			struct mm_struct *next,
+			struct task_struct *tsk)
+{
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+}
+
+#endif /* __ASM_GENERIC_MMU_CONTEXT_H */
diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
new file mode 100644
index 0000000..75fec18
--- /dev/null
+++ b/include/asm-generic/page.h
@@ -0,0 +1,99 @@
+#ifndef __ASM_GENERIC_PAGE_H
+#define __ASM_GENERIC_PAGE_H
+/*
+ * Generic page.h implementation, for NOMMU architectures.
+ * This provides the dummy definitions for the memory management.
+ */
+
+#ifdef CONFIG_MMU
+#error need to prove a real asm/page.h
+#endif
+
+
+/* PAGE_SHIFT determines the page size */
+
+#define PAGE_SHIFT	12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE	(1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#include <asm/setup.h>
+
+#ifndef __ASSEMBLY__
+
+#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
+#define free_user_page(page, addr)	free_page(addr)
+
+#define clear_page(page)	memset((page), 0, PAGE_SIZE)
+#define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
+
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct {
+	unsigned long pte;
+} pte_t;
+typedef struct {
+	unsigned long pmd[16];
+} pmd_t;
+typedef struct {
+	unsigned long pgd;
+} pgd_t;
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x)	((x).pte)
+#define pmd_val(x)	((&x)->pmd[0])
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) } )
+#define __pmd(x)	((pmd_t) { (x) } )
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+extern unsigned long memory_start;
+extern unsigned long memory_end;
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_KERNEL_RAM_BASE_ADDRESS
+#define PAGE_OFFSET		(CONFIG_KERNEL_RAM_BASE_ADDRESS)
+#else
+#define PAGE_OFFSET		(0)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
+#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
+
+#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(addr)	(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
+#define page_to_virt(page)	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
+
+#ifndef page_to_phys
+#define page_to_phys(page)      ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+#endif
+
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+
+#define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
+				((void *)(kaddr) < (void *)memory_end))
+
+#endif /* __ASSEMBLY__ */
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASM_GENERIC_PAGE_H */
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
new file mode 100644
index 0000000..9e429d0
--- /dev/null
+++ b/include/asm-generic/pgalloc.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_PGALLOC_H
+#define __ASM_GENERIC_PGALLOC_H
+/*
+ * an empty file is enough for a nommu architecture
+ */
+#ifdef CONFIG_MMU
+#error need to implement an architecture specific asm/pgalloc.h
+#endif
+
+#define check_pgt_cache()          do { } while (0)
+
+#endif /* __ASM_GENERIC_PGALLOC_H */
diff --git a/include/asm-generic/segment.h b/include/asm-generic/segment.h
new file mode 100644
index 0000000..5580eac
--- /dev/null
+++ b/include/asm-generic/segment.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_SEGMENT_H
+#define __ASM_GENERIC_SEGMENT_H
+/*
+ * Only here because we have some old header files that expect it...
+ *
+ * New architectures probably don't want to have their own version.
+ */
+
+#endif /* __ASM_GENERIC_SEGMENT_H */
diff --git a/include/asm-generic/tlbflush.h b/include/asm-generic/tlbflush.h
new file mode 100644
index 0000000..c7af037
--- /dev/null
+++ b/include/asm-generic/tlbflush.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_GENERIC_TLBFLUSH_H
+#define __ASM_GENERIC_TLBFLUSH_H
+/*
+ * This is a dummy tlbflush implementation that can be used on all
+ * nommu architectures.
+ * If you have an MMU, you need to write your own functions.
+ */
+#ifdef CONFIG_MMU
+#error need to implement an architecture specific asm/tlbflush.h
+#endif
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG();
+}
+
+
+#endif /* __ASM_GENERIC_TLBFLUSH_H */
-- 
cgit v0.10.2


From eed417ddd52146f446595b5a7d8f21b1814b95b7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:37 +0000
Subject: asm-generic: add a generic uaccess.h

Based on discussions with Michal Simek and code
from m68knommu and h8300, this version of uaccess.h
should be usable by most architectures, by overriding
some parts of it.

Simple NOMMU architectures can use it out of
the box, but a minimal __access_ok() should be
added there as well.

Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
new file mode 100644
index 0000000..6d8cab2
--- /dev/null
+++ b/include/asm-generic/uaccess.h
@@ -0,0 +1,325 @@
+#ifndef __ASM_GENERIC_UACCESS_H
+#define __ASM_GENERIC_UACCESS_H
+
+/*
+ * User space memory access functions, these should work
+ * on a ny machine that has kernel and user data in the same
+ * address space, e.g. all NOMMU machines.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+
+#include <asm/segment.h>
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+#ifndef KERNEL_DS
+#define KERNEL_DS	MAKE_MM_SEG(~0UL)
+#endif
+
+#ifndef USER_DS
+#define USER_DS		MAKE_MM_SEG(TASK_SIZE - 1)
+#endif
+
+#ifndef get_fs
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+
+static inline void set_fs(mm_segment_t fs)
+{
+	current_thread_info()->addr_limit = fs;
+}
+#endif
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+#define access_ok(type, addr, size) __access_ok((unsigned long)(addr),(size))
+
+/*
+ * The architecture should really override this if possible, at least
+ * doing a check on the get_fs()
+ */
+#ifndef __access_ok
+static inline int __access_ok(unsigned long addr, unsigned long size)
+{
+	return 1;
+}
+#endif
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern unsigned long search_exception_table(unsigned long);
+
+/*
+ * architectures with an MMU should override these two
+ */
+#ifndef __copy_from_user
+static inline __must_check long __copy_from_user(void *to,
+		const void __user * from, unsigned long n)
+{
+	if (__builtin_constant_p(n)) {
+		switch(n) {
+		case 1:
+			*(u8 *)to = *(u8 __force *)from;
+			return 0;
+		case 2:
+			*(u16 *)to = *(u16 __force *)from;
+			return 0;
+		case 4:
+			*(u32 *)to = *(u32 __force *)from;
+			return 0;
+#ifdef CONFIG_64BIT
+		case 8:
+			*(u64 *)to = *(u64 __force *)from;
+			return 0;
+#endif
+		default:
+			break;
+		}
+	}
+
+	memcpy(to, (const void __force *)from, n);
+	return 0;
+}
+#endif
+
+#ifndef __copy_to_user
+static inline __must_check long __copy_to_user(void __user *to,
+		const void *from, unsigned long n)
+{
+	if (__builtin_constant_p(n)) {
+		switch(n) {
+		case 1:
+			*(u8 __force *)to = *(u8 *)from;
+			return 0;
+		case 2:
+			*(u16 __force *)to = *(u16 *)from;
+			return 0;
+		case 4:
+			*(u32 __force *)to = *(u32 *)from;
+			return 0;
+#ifdef CONFIG_64BIT
+		case 8:
+			*(u64 __force *)to = *(u64 *)from;
+			return 0;
+#endif
+		default:
+			break;
+		}
+	}
+
+	memcpy((void __force *)to, from, n);
+	return 0;
+}
+#endif
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ * This version just falls back to copy_{from,to}_user, which should
+ * provide a fast-path for small values.
+ */
+#define __put_user(x, ptr) \
+({								\
+	__typeof__(*(ptr)) __x = (x);				\
+	int __pu_err = -EFAULT;					\
+        __chk_user_ptr(ptr);                                    \
+	switch (sizeof (*(ptr))) {				\
+	case 1:							\
+	case 2:							\
+	case 4:							\
+	case 8:							\
+		__pu_err = __put_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		break;						\
+	default:						\
+		__put_user_bad();				\
+		break;						\
+	 }							\
+	__pu_err;						\
+})
+
+#define put_user(x, ptr)					\
+({								\
+	might_sleep();						\
+	__access_ok(ptr, sizeof (*ptr)) ?			\
+		__put_user(x, ptr) :				\
+		-EFAULT;					\
+})
+
+static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
+{
+	size = __copy_to_user(ptr, x, size);
+	return size ? -EFAULT : size;
+}
+
+extern int __put_user_bad(void) __attribute__((noreturn));
+
+#define __get_user(x, ptr)					\
+({								\
+	int __gu_err = -EFAULT;					\
+	__chk_user_ptr(ptr);					\
+	switch (sizeof(*(ptr))) {				\
+	case 1: {						\
+		unsigned char __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 2: {						\
+		unsigned short __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 4: {						\
+		unsigned int __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 8: {						\
+		unsigned long long __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	default:						\
+		__get_user_bad();				\
+		break;						\
+	}							\
+	__gu_err;						\
+})
+
+#define get_user(x, ptr)					\
+({								\
+	might_sleep();						\
+	__access_ok(ptr, sizeof (*ptr)) ?			\
+		__get_user(x, ptr) :				\
+		-EFAULT;					\
+})
+
+static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
+{
+	size = __copy_from_user(x, ptr, size);
+	return size ? -EFAULT : size;
+}
+
+extern int __get_user_bad(void) __attribute__((noreturn));
+
+#ifndef __copy_from_user_inatomic
+#define __copy_from_user_inatomic __copy_from_user
+#endif
+
+#ifndef __copy_to_user_inatomic
+#define __copy_to_user_inatomic __copy_to_user
+#endif
+
+static inline long copy_from_user(void *to,
+		const void __user * from, unsigned long n)
+{
+	might_sleep();
+	if (__access_ok(from, n))
+		return __copy_from_user(to, from, n);
+	else
+		return n;
+}
+
+static inline long copy_to_user(void __user *to,
+		const void *from, unsigned long n)
+{
+	might_sleep();
+	if (__access_ok(to, n))
+		return __copy_to_user(to, from, n);
+	else
+		return n;
+}
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+#ifndef __strncpy_from_user
+static inline long
+__strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	char *tmp;
+	strncpy(dst, (const char __force *)src, count);
+	for (tmp = dst; *tmp && count > 0; tmp++, count--)
+		;
+	return (tmp - dst);
+}
+#endif
+
+static inline long
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	if (!__access_ok(src, 1))
+		return -EFAULT;
+	return __strncpy_from_user(dst, src, count);
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+#ifndef strnlen_user
+static inline long strnlen_user(const char __user *src, long n)
+{
+	return strlen((void * __force)src) + 1;
+}
+#endif
+
+static inline long strlen_user(const char __user *src)
+{
+	return strnlen_user(src, 32767);
+}
+
+/*
+ * Zero Userspace
+ */
+#ifndef __clear_user
+static inline __must_check unsigned long
+__clear_user(void __user *to, unsigned long n)
+{
+	memset((void __force *)to, 0, n);
+	return 0;
+}
+#endif
+
+static inline __must_check unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+	might_sleep();
+	if (!__access_ok(to, n))
+		return n;
+
+	return __clear_user(to, n);
+}
+
+#endif /* __ASM_GENERIC_UACCESS_H */
-- 
cgit v0.10.2


From 26a28fa4fea5b8c65713aa50c124f76a88c7924d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:38 +0000
Subject: add generic lib/checksum.c

Add a generic (unoptimized) implementation of checksum.c in pure C
for use by all architectures that cannot be bother with implementing
their own version.

Based on microblaze code by Michal Simek <monstr@monstr.eu>

Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
new file mode 100644
index 0000000..4647c76
--- /dev/null
+++ b/include/asm-generic/checksum.h
@@ -0,0 +1,79 @@
+#ifndef __ASM_GENERIC_CHECKSUM_H
+#define __ASM_GENERIC_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum);
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					int len, __wsum sum, int *csum_err);
+
+#define csum_partial_copy_nocheck(src, dst, len, sum)	\
+	csum_partial_copy((src), (dst), (len), (sum))
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+#ifndef csum_tcpudp_nofold
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+extern __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+		unsigned short proto, __wsum sum);
+#endif
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
+		  unsigned short proto, __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif /* __ASM_GENERIC_CHECKSUM_H */
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e4..b6869e1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -92,6 +92,8 @@ obj-$(CONFIG_NLATTR) += nlattr.o
 
 obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
+obj-$(CONFIG_GENERIC_CSUM) += checksum.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/checksum.c b/lib/checksum.c
new file mode 100644
index 0000000..12e5a1c
--- /dev/null
+++ b/lib/checksum.c
@@ -0,0 +1,193 @@
+/*
+ *
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Tom May, <ftom@netcom.com>
+ *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ *
+ * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
+ *		Fixed some nasty bugs, causing some horrible crashes.
+ *		A: At some points, the sum (%0) was used as
+ *		length-counter instead of the length counter
+ *		(%1). Thanks to Roman Hodek for pointing this out.
+ *		B: GCC seems to mess up if one uses too many
+ *		data-registers to hold input values and one tries to
+ *		specify d0 and d1 as scratch registers. Letting gcc
+ *		choose these registers itself solves the problem.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
+ kills, so most of the assembly has to go. */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short from32to16(unsigned long x)
+{
+	/* add up 16-bit and 16-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = *buff;
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+			unsigned long carry = 0;
+			do {
+				unsigned long w = *(unsigned long *) buff;
+				count--;
+				buff += 4;
+				result += carry;
+				result += w;
+				carry = (w > result);
+			} while (count);
+			result += carry;
+			result = (result & 0xffff) + (result >> 16);
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += (*buff << 8);
+	result = from32to16(result);
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+	return result;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return (__force __sum16)~do_csum(iph, ihl*4);
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
+{
+	unsigned int sum = (__force unsigned int)wsum;
+	unsigned int result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += sum;
+	if (sum > result)
+		result += 1;
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+	return (__force __sum16)~do_csum(buff, len);
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+						__wsum sum, int *csum_err)
+{
+	int missing;
+
+	missing = __copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*csum_err = -EFAULT;
+	} else
+		*csum_err = 0;
+
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
+
+#ifndef csum_tcpudp_nofold
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+			unsigned short len,
+			unsigned short proto,
+			__wsum sum)
+{
+	unsigned long long s = (__force u32)sum;
+
+	s += (__force u32)saddr;
+	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
+	s += (s >> 32);
+	return (__force __wsum)s;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+#endif
-- 
cgit v0.10.2


From ca8cbc8391cbd4d6e4304fc6b62682ed93d2b165 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias@kaehlcke.net>
Date: Thu, 11 Jun 2009 19:57:34 +0100
Subject: [ARM] 5552/1: ep93xx get_uart_rate(): use EP93XX_SYSCON_PWRCNT and
 EP93XX_SYSCON_PWRCN

ep93xx: get_uart_rate() uses the constants EP93XX_SYSCON_CLOCK_CONTROL
and EP93XX_SYSCON_CLOCK_UARTBAUD, which no longer exist. Use
EP93XX_SYSCON_PWRCNT and EP93XX_SYSCON_PWRCNT_UARTBAUD instead

Signed-off-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c
index 755e981..6c4c163 100644
--- a/arch/arm/mach-ep93xx/clock.c
+++ b/arch/arm/mach-ep93xx/clock.c
@@ -186,8 +186,8 @@ static unsigned long get_uart_rate(struct clk *clk)
 {
 	u32 value;
 
-	value = __raw_readl(EP93XX_SYSCON_CLOCK_CONTROL);
-	if (value & EP93XX_SYSCON_CLOCK_UARTBAUD)
+	value = __raw_readl(EP93XX_SYSCON_PWRCNT);
+	if (value & EP93XX_SYSCON_PWRCNT_UARTBAUD)
 		return EP93XX_EXT_CLK_RATE;
 	else
 		return EP93XX_EXT_CLK_RATE / 2;
-- 
cgit v0.10.2


From 8c5dd8f43367f4f266dd616f11658005bc2d20ef Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 4 Jun 2009 19:14:22 -0700
Subject: x86: handle initrd that extends into unusable memory

On a system where system memory (according e820) is not covered by
mtrr, mtrr_trim_memory converts a portion of memory to reserved, but
bootloader has already put the initrd in that range.

Thus, we need to have 64bit to use relocate_initrd too.

[ Impact: fix using initrd when mtrr_trim_memory happen ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: stable@kernel.org

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d1c636b..be5ae80 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -301,15 +301,13 @@ static void __init reserve_brk(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-#ifdef CONFIG_X86_32
-
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
 
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 	u64 ramdisk_here;
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
@@ -365,14 +363,13 @@ static void __init relocate_initrd(void)
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 }
-#endif
 
 static void __init reserve_initrd(void)
 {
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -402,14 +399,8 @@ static void __init reserve_initrd(void)
 		return;
 	}
 
-#ifdef CONFIG_X86_32
 	relocate_initrd();
-#else
-	printk(KERN_ERR "initrd extends beyond end of memory "
-	       "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
-	       ramdisk_end, end_of_lowmem);
-	initrd_start = 0;
-#endif
+
 	free_early(ramdisk_image, ramdisk_end);
 }
 #else
-- 
cgit v0.10.2


From 73422811d290c628b4ddbf6830e5cd6fa42e84f1 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sun, 10 May 2009 16:05:39 -0400
Subject: reiserfs: allow exposing privroot w/ xattrs enabled

This patch adds an -oexpose_privroot option to allow access to the privroot.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 45ee3d3..6d2668f 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 static inline bool is_privroot_deh(struct dentry *dir,
 				   struct reiserfs_de_head *deh)
 {
-	int ret = 0;
-#ifdef CONFIG_REISERFS_FS_XATTR
 	struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
-	ret = (dir == dir->d_parent && privroot->d_inode &&
-	       deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
-#endif
-	return ret;
+	if (reiserfs_expose_privroot(dir->d_sb))
+		return 0;
+	return (dir == dir->d_parent && privroot->d_inode &&
+	        deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
 }
 
 int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3567fb9..9dbdcfb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -898,6 +898,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"conv",.setmask = 1 << REISERFS_CONVERT},
 		{"attrs",.setmask = 1 << REISERFS_ATTRS},
 		{"noattrs",.clrmask = 1 << REISERFS_ATTRS},
+		{"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT},
 #ifdef CONFIG_REISERFS_FS_XATTR
 		{"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
 		{"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8e7deb0..f3d47d8 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s)
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		s->s_root->d_op = &xattr_lookup_poison_ops;
+		if (!reiserfs_expose_privroot(s))
+			s->s_root->d_op = &xattr_lookup_poison_ops;
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 6473650..dab68bb 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -453,6 +453,7 @@ enum reiserfs_mount_options {
 	REISERFS_ATTRS,
 	REISERFS_XATTRS_USER,
 	REISERFS_POSIXACL,
+	REISERFS_EXPOSE_PRIVROOT,
 	REISERFS_BARRIER_NONE,
 	REISERFS_BARRIER_FLUSH,
 
@@ -490,6 +491,7 @@ enum reiserfs_mount_options {
 #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
 #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
 #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
+#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
 #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
 #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
 #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
-- 
cgit v0.10.2


From 4e44b6852e03c915618ca6776b6697b436246b00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 11:08:56 -0400
Subject: Get rid of path_lookup in autofs4

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 84168c0..f71dac9 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -192,77 +192,42 @@ static int autofs_dev_ioctl_protosubver(struct file *fp,
 	return 0;
 }
 
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested device number (aka. new_encode_dev(sb->s_dev).
- */
-static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
+static int find_autofs_mount(const char *pathname,
+			     struct path *res,
+			     int test(struct path *path, void *data),
+			     void *data)
 {
-	struct dentry *dentry;
-	struct inode *inode;
-	struct super_block *sb;
-	dev_t s_dev;
-	unsigned int err;
-
+	struct path path;
+	int err = kern_path(pathname, 0, &path);
+	if (err)
+		return err;
 	err = -ENOENT;
-
-	/* Lookup the dentry name at the base of our mount point */
-	dentry = d_lookup(nd->path.dentry, &nd->last);
-	if (!dentry)
-		goto out;
-
-	dput(nd->path.dentry);
-	nd->path.dentry = dentry;
-
-	/* And follow the mount stack looking for our autofs mount */
-	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
-		inode = nd->path.dentry->d_inode;
-		if (!inode)
-			break;
-
-		sb = inode->i_sb;
-		s_dev = new_encode_dev(sb->s_dev);
-		if (devno == s_dev) {
-			if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
+	while (path.dentry == path.mnt->mnt_root) {
+		if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) {
+			if (test(&path, data)) {
+				path_get(&path);
+				if (!err) /* already found some */
+					path_put(res);
+				*res = path;
 				err = 0;
-				break;
 			}
 		}
+		if (!follow_up(&path.mnt, &path.dentry))
+			break;
 	}
-out:
+	path_put(&path);
 	return err;
 }
 
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested mount type (ie. indirect, direct or offset).
- */
-static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
+static int test_by_dev(struct path *path, void *p)
 {
-	struct dentry *dentry;
-	struct autofs_info *ino;
-	unsigned int err;
-
-	err = -ENOENT;
-
-	/* Lookup the dentry name at the base of our mount point */
-	dentry = d_lookup(nd->path.dentry, &nd->last);
-	if (!dentry)
-		goto out;
-
-	dput(nd->path.dentry);
-	nd->path.dentry = dentry;
+	return path->mnt->mnt_sb->s_dev == *(dev_t *)p;
+}
 
-	/* And follow the mount stack looking for our autofs mount */
-	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
-		ino = autofs4_dentry_ino(nd->path.dentry);
-		if (ino && ino->sbi->type & type) {
-			err = 0;
-			break;
-		}
-	}
-out:
-	return err;
+static int test_by_type(struct path *path, void *p)
+{
+	struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
+	return ino && ino->sbi->type & *(unsigned *)p;
 }
 
 static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
@@ -283,31 +248,25 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
  * Open a file descriptor on the autofs mount point corresponding
  * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
  */
-static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
+static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
 {
-	struct file *filp;
-	struct nameidata nd;
 	int err, fd;
 
 	fd = get_unused_fd();
 	if (likely(fd >= 0)) {
-		/* Get nameidata of the parent directory */
-		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		struct file *filp;
+		struct path path;
+
+		err = find_autofs_mount(name, &path, test_by_dev, &devid);
 		if (err)
 			goto out;
 
 		/*
-		 * Search down, within the parent, looking for an
-		 * autofs super block that has the device number
+		 * Find autofs super block that has the device number
 		 * corresponding to the autofs fs we want to open.
 		 */
-		err = autofs_dev_ioctl_find_super(&nd, devid);
-		if (err) {
-			path_put(&nd.path);
-			goto out;
-		}
 
-		filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+		filp = dentry_open(path.dentry, path.mnt, O_RDONLY,
 				   current_cred());
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
@@ -340,7 +299,7 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
 	param->ioctlfd = -1;
 
 	path = param->path;
-	devid = param->openmount.devid;
+	devid = new_decode_dev(param->openmount.devid);
 
 	err = 0;
 	fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -475,8 +434,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 				      struct autofs_dev_ioctl *param)
 {
 	struct autofs_info *ino;
-	struct nameidata nd;
-	const char *path;
+	struct path path;
 	dev_t devid;
 	int err = -ENOENT;
 
@@ -485,32 +443,24 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 		goto out;
 	}
 
-	path = param->path;
-	devid = new_encode_dev(sbi->sb->s_dev);
+	devid = sbi->sb->s_dev;
 
 	param->requester.uid = param->requester.gid = -1;
 
-	/* Get nameidata of the parent directory */
-	err = path_lookup(path, LOOKUP_PARENT, &nd);
+	err = find_autofs_mount(param->path, &path, test_by_dev, &devid);
 	if (err)
 		goto out;
 
-	err = autofs_dev_ioctl_find_super(&nd, devid);
-	if (err)
-		goto out_release;
-
-	ino = autofs4_dentry_ino(nd.path.dentry);
+	ino = autofs4_dentry_ino(path.dentry);
 	if (ino) {
 		err = 0;
-		autofs4_expire_wait(nd.path.dentry);
+		autofs4_expire_wait(path.dentry);
 		spin_lock(&sbi->fs_lock);
 		param->requester.uid = ino->uid;
 		param->requester.gid = ino->gid;
 		spin_unlock(&sbi->fs_lock);
 	}
-
-out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return err;
 }
@@ -569,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 					 struct autofs_sb_info *sbi,
 					 struct autofs_dev_ioctl *param)
 {
-	struct nameidata nd;
-	const char *path;
+	struct path path;
+	const char *name;
 	unsigned int type;
 	unsigned int devid, magic;
 	int err = -ENOENT;
@@ -580,71 +530,46 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 		goto out;
 	}
 
-	path = param->path;
+	name = param->path;
 	type = param->ismountpoint.in.type;
 
 	param->ismountpoint.out.devid = devid = 0;
 	param->ismountpoint.out.magic = magic = 0;
 
 	if (!fp || param->ioctlfd == -1) {
-		if (autofs_type_any(type)) {
-			struct super_block *sb;
-
-			err = path_lookup(path, LOOKUP_FOLLOW, &nd);
-			if (err)
-				goto out;
-
-			sb = nd.path.dentry->d_sb;
-			devid = new_encode_dev(sb->s_dev);
-		} else {
-			struct autofs_info *ino;
-
-			err = path_lookup(path, LOOKUP_PARENT, &nd);
-			if (err)
-				goto out;
-
-			err = autofs_dev_ioctl_find_sbi_type(&nd, type);
-			if (err)
-				goto out_release;
-
-			ino = autofs4_dentry_ino(nd.path.dentry);
-			devid = autofs4_get_dev(ino->sbi);
-		}
-
+		if (autofs_type_any(type))
+			err = kern_path(name, LOOKUP_FOLLOW, &path);
+		else
+			err = find_autofs_mount(name, &path, test_by_type, &type);
+		if (err)
+			goto out;
+		devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
 		err = 0;
-		if (nd.path.dentry->d_inode &&
-		    nd.path.mnt->mnt_root == nd.path.dentry) {
+		if (path.dentry->d_inode &&
+		    path.mnt->mnt_root == path.dentry) {
 			err = 1;
-			magic = nd.path.dentry->d_inode->i_sb->s_magic;
+			magic = path.dentry->d_inode->i_sb->s_magic;
 		}
 	} else {
-		dev_t dev = autofs4_get_dev(sbi);
+		dev_t dev = sbi->sb->s_dev;
 
-		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		err = find_autofs_mount(name, &path, test_by_dev, &dev);
 		if (err)
 			goto out;
 
-		err = autofs_dev_ioctl_find_super(&nd, dev);
-		if (err)
-			goto out_release;
-
-		devid = dev;
+		devid = new_encode_dev(dev);
 
-		err = have_submounts(nd.path.dentry);
+		err = have_submounts(path.dentry);
 
-		if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
-			if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
-				struct inode *inode = nd.path.dentry->d_inode;
-				magic = inode->i_sb->s_magic;
-			}
+		if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
+			if (follow_down(&path.mnt, &path.dentry))
+				magic = path.mnt->mnt_sb->s_magic;
 		}
 	}
 
 	param->ismountpoint.out.devid = devid;
 	param->ismountpoint.out.magic = magic;
-
-out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return err;
 }
-- 
cgit v0.10.2


From 9b4a9b14a793bc69b505ed916051f6f32db13bb8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 11:44:16 -0400
Subject: Preparations to caching root in path_walk()

Split do_path_lookup(), opencode the call from do_filp_open()
do_filp_open() is the only caller of do_path_lookup() that
cares about root afterwards (it keeps resolving symlinks on
O_CREAT path after it'd done LOOKUP_PARENT walk).  So when
we start caching fs->root in path_walk(), it'll need a different
treatment.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index c82805d..895733e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1017,9 +1017,7 @@ static int path_walk(const char *name, struct nameidata *nd)
 	return link_path_walk(name, nd);
 }
 
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
-				unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
 {
 	int retval = 0;
 	int fput_needed;
@@ -1063,17 +1061,25 @@ static int do_path_lookup(int dfd, const char *name,
 
 		fput_light(file, fput_needed);
 	}
+	return 0;
 
-	retval = path_walk(name, nd);
+fput_fail:
+	fput_light(file, fput_needed);
+out_fail:
+	return retval;
+}
+
+/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+static int do_path_lookup(int dfd, const char *name,
+				unsigned int flags, struct nameidata *nd)
+{
+	int retval = path_init(dfd, name, flags, nd);
+	if (!retval)
+		retval = path_walk(name, nd);
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
-out_fail:
 	return retval;
-
-fput_fail:
-	fput_light(file, fput_needed);
-	goto out_fail;
 }
 
 int path_lookup(const char *name, unsigned int flags,
@@ -1676,9 +1682,14 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
 	if (error)
 		return ERR_PTR(error);
+	error = path_walk(pathname, &nd);
+	if (error)
+		return ERR_PTR(error);
+	if (unlikely(!audit_dummy_context()))
+		audit_inode(pathname, nd.path.dentry);
 
 	/*
 	 * We have the parent and last component. First of all, check
-- 
cgit v0.10.2


From 2a737871108de9ba8930f7650d549f1383767f8b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 11:49:53 -0400
Subject: Cache root in nameidata

New field: nd->root.  When pathname resolution wants to know the root,
check if nd->root.mnt is non-NULL; use nd->root if it is, otherwise
copy current->fs->root there.  After path_walk() is finished, we check
if we'd got a cached value in nd->root and drop it.  Before calling
path_walk() we should either set nd->root.mnt to NULL *or* copy (and
pin down) some path to nd->root.  In the latter case we won't be
looking at current->fs->root at all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index 895733e..88baaf2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd
 	return result;
 }
 
+static __always_inline void set_root(struct nameidata *nd)
+{
+	if (!nd->root.mnt) {
+		struct fs_struct *fs = current->fs;
+		read_lock(&fs->lock);
+		nd->root = fs->root;
+		path_get(&nd->root);
+		read_unlock(&fs->lock);
+	}
+}
+
 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
 	int res = 0;
@@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 		goto fail;
 
 	if (*link == '/') {
-		struct fs_struct *fs = current->fs;
-
+		set_root(nd);
 		path_put(&nd->path);
-
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	}
 
 	res = link_path_walk(link, nd);
@@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 
 static __always_inline void follow_dotdot(struct nameidata *nd)
 {
-	struct fs_struct *fs = current->fs;
+	set_root(nd);
 
 	while(1) {
 		struct vfsmount *parent;
 		struct dentry *old = nd->path.dentry;
 
-                read_lock(&fs->lock);
-		if (nd->path.dentry == fs->root.dentry &&
-		    nd->path.mnt == fs->root.mnt) {
-                        read_unlock(&fs->lock);
+		if (nd->path.dentry == nd->root.dentry &&
+		    nd->path.mnt == nd->root.mnt) {
 			break;
 		}
-                read_unlock(&fs->lock);
 		spin_lock(&dcache_lock);
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			nd->path.dentry = dget(nd->path.dentry->d_parent);
@@ -1022,18 +1026,18 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
 	int retval = 0;
 	int fput_needed;
 	struct file *file;
-	struct fs_struct *fs = current->fs;
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags;
 	nd->depth = 0;
+	nd->root.mnt = NULL;
 
 	if (*name=='/') {
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		set_root(nd);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	} else if (dfd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
 		read_lock(&fs->lock);
 		nd->path = fs->pwd;
 		path_get(&fs->pwd);
@@ -1079,6 +1083,10 @@ static int do_path_lookup(int dfd, const char *name,
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
+	if (nd->root.mnt) {
+		path_put(&nd->root);
+		nd->root.mnt = NULL;
+	}
 	return retval;
 }
 
@@ -1115,6 +1123,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 	nd->last_type = LAST_ROOT;
 	nd->flags = flags;
 	nd->depth = 0;
+	nd->root.mnt = NULL;
 
 	nd->path.dentry = dentry;
 	nd->path.mnt = mnt;
@@ -1125,8 +1134,12 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
 
-	return retval;
+	if (nd->root.mnt) {
+		path_put(&nd->root);
+		nd->root.mnt = NULL;
+	}
 
+	return retval;
 }
 
 /**
@@ -1817,6 +1830,8 @@ exit:
 	if (!IS_ERR(nd.intent.open.file))
 		release_open_intent(&nd);
 exit_parent:
+	if (nd.root.mnt)
+		path_put(&nd.root);
 	path_put(&nd.path);
 	return ERR_PTR(error);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 518098f..325dd3a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -18,6 +18,7 @@ enum { MAX_NESTED_LINKS = 8 };
 struct nameidata {
 	struct path	path;
 	struct qstr	last;
+	struct path	root;
 	unsigned int	flags;
 	int		last_type;
 	unsigned	depth;
-- 
cgit v0.10.2


From 5b857119538daac7118c1364d7ff3613f12b84d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 11:53:49 -0400
Subject: Make vfs_path_lookup() use starting point as root

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index 88baaf2..4379ef9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1123,21 +1123,20 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 	nd->last_type = LAST_ROOT;
 	nd->flags = flags;
 	nd->depth = 0;
-	nd->root.mnt = NULL;
 
 	nd->path.dentry = dentry;
 	nd->path.mnt = mnt;
 	path_get(&nd->path);
+	nd->root = nd->path;
+	path_get(&nd->root);
 
 	retval = path_walk(name, nd);
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
 
-	if (nd->root.mnt) {
-		path_put(&nd->root);
-		nd->root.mnt = NULL;
-	}
+	path_put(&nd->root);
+	nd->root.mnt = NULL;
 
 	return retval;
 }
-- 
cgit v0.10.2


From dd5cae6e9772ecc62fd374f7a8ec10eb51c96c4d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 12:21:18 -0400
Subject: Don't bother with check_mnt() in do_add_mount() on shrinkable ones

These guys are what we add as submounts; checks for "is that attached in
our namespace" are simply irrelevant for those and counterproductive for
use of private vfsmount trees a-la what NFS folks want.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index 134d494..88a904d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1698,7 +1698,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 	       follow_down(&path->mnt, &path->dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(path->mnt))
+	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
-- 
cgit v0.10.2


From 55430e2ecee574e729c12d4063b3ecabfa98fa82 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 02:04:46 -0400
Subject: nfsd struct path use: exp_get_by_name()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5839b22..3f6d51b 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -847,9 +847,8 @@ exp_get_fsid_key(svc_client *clp, int fsid)
 	return exp_find_key(clp, FSID_NUM, fsidv, NULL);
 }
 
-static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
-				   struct dentry *dentry,
-				   struct cache_req *reqp)
+static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
+				     struct cache_req *reqp)
 {
 	struct svc_export *exp, key;
 	int err;
@@ -858,8 +857,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
 		return ERR_PTR(-ENOENT);
 
 	key.ex_client = clp;
-	key.ex_path.mnt = mnt;
-	key.ex_path.dentry = dentry;
+	key.ex_path = *path;
 
 	exp = svc_export_lookup(&key);
 	if (exp == NULL)
@@ -877,20 +875,19 @@ static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
 				     struct dentry *dentry,
 				     struct cache_req *reqp)
 {
+	struct path path = {.mnt = mnt, .dentry = dentry};
 	svc_export *exp;
 
-	dget(dentry);
-	exp = exp_get_by_name(clp, mnt, dentry, reqp);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
+	dget(path.dentry);
+	exp = exp_get_by_name(clp, &path, reqp);
 
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = exp_get_by_name(clp, mnt, dentry, reqp);
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path.dentry)) {
+		struct dentry *parent = dget_parent(path.dentry);
+		dput(path.dentry);
+		path.dentry = parent;
+		exp = exp_get_by_name(clp, &path, reqp);
 	}
-	dput(dentry);
+	dput(path.dentry);
 	return exp;
 }
 
@@ -1018,7 +1015,7 @@ exp_export(struct nfsctl_export *nxp)
 		goto out_put_clp;
 	err = -EINVAL;
 
-	exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
+	exp = exp_get_by_name(clp, &path, NULL);
 
 	memset(&new, 0, sizeof(new));
 
@@ -1135,7 +1132,7 @@ exp_unexport(struct nfsctl_export *nxp)
 		goto out_domain;
 
 	err = -EINVAL;
-	exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
+	exp = exp_get_by_name(dom, &path, NULL);
 	path_put(&path);
 	if (IS_ERR(exp))
 		goto out_domain;
@@ -1207,7 +1204,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
 	if (IS_ERR(ek))
 		return ERR_CAST(ek);
 
-	exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
+	exp = exp_get_by_name(clp, &ek->ek_path, reqp);
 	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (IS_ERR(exp))
@@ -1251,12 +1248,13 @@ rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
 		struct dentry *dentry)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
+	struct path path = {.mnt = mnt, .dentry = dentry};
 
 	if (rqstp->rq_client == NULL)
 		goto gss;
 
 	/* First try the auth_unix client: */
-	exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
+	exp = exp_get_by_name(rqstp->rq_client, &path,
 						&rqstp->rq_chandle);
 	if (PTR_ERR(exp) == -ENOENT)
 		goto gss;
@@ -1269,7 +1267,7 @@ gss:
 	/* Otherwise, try falling back on gss client */
 	if (rqstp->rq_gssclient == NULL)
 		return exp;
-	gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
+	gssexp = exp_get_by_name(rqstp->rq_gssclient, &path,
 						&rqstp->rq_chandle);
 	if (PTR_ERR(gssexp) == -ENOENT)
 		return exp;
-- 
cgit v0.10.2


From 5bf3bd2b5cb68ba43c91f5bd0ac043543fba2558 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 02:14:32 -0400
Subject: switch exp_parent() to struct path

... and lose the always-NULL last argument (non-NULL case had been
split off a while ago).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 3f6d51b..5149dab 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -871,23 +871,19 @@ static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
 /*
  * Find the export entry for a given dentry.
  */
-static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
-				     struct dentry *dentry,
-				     struct cache_req *reqp)
+static struct svc_export *exp_parent(svc_client *clp, struct path *path)
 {
-	struct path path = {.mnt = mnt, .dentry = dentry};
-	svc_export *exp;
-
-	dget(path.dentry);
-	exp = exp_get_by_name(clp, &path, reqp);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path.dentry)) {
-		struct dentry *parent = dget_parent(path.dentry);
-		dput(path.dentry);
-		path.dentry = parent;
-		exp = exp_get_by_name(clp, &path, reqp);
+	struct dentry *saved = dget(path->dentry);
+	svc_export *exp = exp_get_by_name(clp, path, NULL);
+
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = exp_get_by_name(clp, path, NULL);
 	}
-	dput(path.dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 
@@ -1174,7 +1170,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize)
 	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
 		 name, path.dentry, clp->name,
 		 inode->i_sb->s_id, inode->i_ino);
-	exp = exp_parent(clp, path.mnt, path.dentry, NULL);
+	exp = exp_parent(clp, &path);
 	if (IS_ERR(exp)) {
 		err = PTR_ERR(exp);
 		goto out;
-- 
cgit v0.10.2


From 91c9fa8f75877c0c1e455c23e8f8206c91c8f77f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 02:42:05 -0400
Subject: switch rqst_exp_get_by_name()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5149dab..84f5e5c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1240,18 +1240,15 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
  * use exp_get_by_name() or exp_find().
  */
 struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
-	struct path path = {.mnt = mnt, .dentry = dentry};
 
 	if (rqstp->rq_client == NULL)
 		goto gss;
 
 	/* First try the auth_unix client: */
-	exp = exp_get_by_name(rqstp->rq_client, &path,
-						&rqstp->rq_chandle);
+	exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
 	if (PTR_ERR(exp) == -ENOENT)
 		goto gss;
 	if (IS_ERR(exp))
@@ -1263,8 +1260,7 @@ gss:
 	/* Otherwise, try falling back on gss client */
 	if (rqstp->rq_gssclient == NULL)
 		return exp;
-	gssexp = exp_get_by_name(rqstp->rq_gssclient, &path,
-						&rqstp->rq_chandle);
+	gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
 	if (PTR_ERR(gssexp) == -ENOENT)
 		return exp;
 	if (!IS_ERR(exp))
@@ -1307,9 +1303,10 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
 		struct dentry *dentry)
 {
 	struct svc_export *exp;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 
 	dget(dentry);
-	exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+	exp = rqst_exp_get_by_name(rqstp, &path);
 
 	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
 		struct dentry *parent;
@@ -1317,7 +1314,7 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
 		parent = dget_parent(dentry);
 		dput(dentry);
 		dentry = parent;
-		exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+		exp = rqst_exp_get_by_name(rqstp, &path);
 	}
 	dput(dentry);
 	return exp;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bd584bc..d84c4ea 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -101,36 +101,36 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 {
 	struct svc_export *exp = *expp, *exp2 = NULL;
 	struct dentry *dentry = *dpp;
-	struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-	struct dentry *mounts = dget(dentry);
+	struct path path = {.mnt = mntget(exp->ex_path.mnt),
+			    .dentry = dget(dentry)};
 	int err = 0;
 
-	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+	while (follow_down(&path.mnt, &path.dentry) &&
+	       d_mountpoint(path.dentry))
+		;
 
-	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+	exp2 = rqst_exp_get_by_name(rqstp, &path);
 	if (IS_ERR(exp2)) {
 		if (PTR_ERR(exp2) != -ENOENT)
 			err = PTR_ERR(exp2);
-		dput(mounts);
-		mntput(mnt);
+		path_put(&path);
 		goto out;
 	}
 	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
 		/* successfully crossed mount point */
 		/*
-		 * This is subtle: dentry is *not* under mnt at this point.
-		 * The only reason we are safe is that original mnt is pinned
-		 * down by exp, so we should dput before putting exp.
+		 * This is subtle: path.dentry is *not* on path.mnt
+		 * at this point.  The only reason we are safe is that
+		 * original mnt is pinned down by exp, so we should
+		 * put path *before* putting exp
 		 */
-		dput(dentry);
-		*dpp = mounts;
-		exp_put(exp);
+		*dpp = path.dentry;
+		path.dentry = dentry;
 		*expp = exp2;
-	} else {
-		exp_put(exp2);
-		dput(mounts);
+		exp2 = exp;
 	}
-	mntput(mnt);
+	path_put(&path);
+	exp_put(exp2);
 out:
 	return err;
 }
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index bcd0201..98f6fd5 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -125,8 +125,7 @@ void			nfsd_export_flush(void);
 void			exp_readlock(void);
 void			exp_readunlock(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
-					     struct vfsmount *,
-					     struct dentry *);
+					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
 					struct vfsmount *mnt,
 					struct dentry *dentry);
-- 
cgit v0.10.2


From e64c390ca0b60fd2119331ef1fa888d7ea27e424 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 03:00:46 -0400
Subject: switch rqst_exp_parent()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 84f5e5c..8b1f8ef 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1299,24 +1299,19 @@ gss:
 }
 
 struct svc_export *
-rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
 {
-	struct svc_export *exp;
-	struct path path = {.mnt = mnt, .dentry = dentry};
-
-	dget(dentry);
-	exp = rqst_exp_get_by_name(rqstp, &path);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
+	struct dentry *saved = dget(path->dentry);
+	struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
 
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = rqst_exp_get_by_name(rqstp, &path);
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = rqst_exp_get_by_name(rqstp, path);
 	}
-	dput(dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d84c4ea..9f1ea31 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -169,28 +169,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			/* checking mountpoint crossing is very different when stepping up */
 			struct svc_export *exp2 = NULL;
 			struct dentry *dp;
-			struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-			dentry = dget(dparent);
-			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+			struct path path = {.mnt = mntget(exp->ex_path.mnt),
+					    .dentry = dget(dparent)};
+
+			while (path.dentry == path.mnt->mnt_root &&
+			       follow_up(&path.mnt, &path.dentry))
 				;
-			dp = dget_parent(dentry);
-			dput(dentry);
-			dentry = dp;
+			dp = dget_parent(path.dentry);
+			dput(path.dentry);
+			path.dentry = dp;
 
-			exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+			exp2 = rqst_exp_parent(rqstp, &path);
 			if (PTR_ERR(exp2) == -ENOENT) {
-				dput(dentry);
 				dentry = dget(dparent);
 			} else if (IS_ERR(exp2)) {
 				host_err = PTR_ERR(exp2);
-				dput(dentry);
-				mntput(mnt);
+				path_put(&path);
 				goto out_nfserr;
 			} else {
+				dentry = dget(path.dentry);
 				exp_put(exp);
 				exp = exp2;
 			}
-			mntput(mnt);
+			path_put(&path);
 		}
 	} else {
 		fh_lock(fhp);
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 98f6fd5..a6d9ef2 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -127,8 +127,7 @@ void			exp_readunlock(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
-					struct vfsmount *mnt,
-					struct dentry *dentry);
+					struct path *);
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
-- 
cgit v0.10.2


From bab77ebf51e3902f608ecf08c9d34a0a52ac35a9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 03:26:48 -0400
Subject: switch follow_up() to struct path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index f71dac9..6704075 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname,
 				err = 0;
 			}
 		}
-		if (!follow_up(&path.mnt, &path.dentry))
+		if (!follow_up(&path))
 			break;
 	}
 	path_put(&path);
diff --git a/fs/namei.c b/fs/namei.c
index 4379ef9..8c1f48a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -675,23 +675,23 @@ loop:
 	return err;
 }
 
-int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+int follow_up(struct path *path)
 {
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
 	spin_lock(&vfsmount_lock);
-	parent=(*mnt)->mnt_parent;
-	if (parent == *mnt) {
+	parent = path->mnt->mnt_parent;
+	if (parent == path->mnt) {
 		spin_unlock(&vfsmount_lock);
 		return 0;
 	}
 	mntget(parent);
-	mountpoint=dget((*mnt)->mnt_mountpoint);
+	mountpoint = dget(path->mnt->mnt_mountpoint);
 	spin_unlock(&vfsmount_lock);
-	dput(*dentry);
-	*dentry = mountpoint;
-	mntput(*mnt);
-	*mnt = parent;
+	dput(path->dentry);
+	path->dentry = mountpoint;
+	mntput(path->mnt);
+	path->mnt = parent;
 	return 1;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9f1ea31..7b2b3f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -173,7 +173,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 					    .dentry = dget(dparent)};
 
 			while (path.dentry == path.mnt->mnt_root &&
-			       follow_up(&path.mnt, &path.dentry))
+			       follow_up(&path))
 				;
 			dp = dget_parent(path.dentry);
 			dput(path.dentry);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 325dd3a..9cd5a717b 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -79,7 +79,7 @@ extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
 
 extern int follow_down(struct vfsmount **, struct dentry **);
-extern int follow_up(struct vfsmount **, struct dentry **);
+extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
-- 
cgit v0.10.2


From 589ff870ed60a9ebdd5ec99ec3f5afe1282fe151 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 03:28:19 -0400
Subject: Switch collect_mounts() to struct path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index 88a904d..c859622 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1253,11 +1253,11 @@ Enomem:
 	return NULL;
 }
 
-struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *collect_mounts(struct path *path)
 {
 	struct vfsmount *tree;
 	down_write(&namespace_sem);
-	tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+	tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
 	up_write(&namespace_sem);
 	return tree;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 323b5ce..03fb210 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1800,7 +1800,7 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
+extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 6e73517..1f6396d7 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -568,7 +568,7 @@ void audit_trim_trees(void)
 		if (err)
 			goto skip_it;
 
-		root_mnt = collect_mounts(path.mnt, path.dentry);
+		root_mnt = collect_mounts(&path);
 		path_put(&path);
 		if (!root_mnt)
 			goto skip_it;
@@ -660,7 +660,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
 	err = kern_path(tree->pathname, 0, &path);
 	if (err)
 		goto Err;
-	mnt = collect_mounts(path.mnt, path.dentry);
+	mnt = collect_mounts(&path);
 	path_put(&path);
 	if (!mnt) {
 		err = -ENOMEM;
@@ -720,7 +720,7 @@ int audit_tag_tree(char *old, char *new)
 	err = kern_path(new, 0, &path);
 	if (err)
 		return err;
-	tagged = collect_mounts(path.mnt, path.dentry);
+	tagged = collect_mounts(&path);
 	path_put(&path);
 	if (!tagged)
 		return -ENOMEM;
-- 
cgit v0.10.2


From 9393bd07cf218ca51d0e627653f906a9d76a9131 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 13:58:15 -0400
Subject: switch follow_down()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2b9e2d0..c52be53 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 4eb4d8d..2316e94 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 		}
 		path.mnt = mnt;
 		path_get(&path);
-		if (!follow_down(&path.mnt, &path.dentry)) {
+		if (!follow_down(&path)) {
 			path_put(&path);
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) &&
-		       follow_down(&path.mnt, &path.dentry))
+		while (d_mountpoint(path.dentry) && follow_down(&path));
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b7ff33c..8f7cdde 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
-static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static inline int autofs4_follow_mount(struct path *path)
 {
 	int res = 0;
 
-	while (d_mountpoint(*dentry)) {
-		int followed = follow_down(mnt, dentry);
+	while (d_mountpoint(path->dentry)) {
+		int followed = follow_down(path);
 		if (!followed)
 			break;
 		res = 1;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 6704075..f3da2eb 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -562,7 +562,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 		err = have_submounts(path.dentry);
 
 		if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
-			if (follow_down(&path.mnt, &path.dentry))
+			if (follow_down(&path))
 				magic = path.mnt->mnt_sb->s_magic;
 		}
 	}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3077d8f..aa39ae8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct dentry *top = dentry;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-	mntget(mnt);
-	dget(dentry);
+	path_get(&path);
 
-	if (!follow_down(&mnt, &dentry))
+	if (!follow_down(&path))
 		goto done;
 
-	if (is_autofs4_dentry(dentry)) {
-		struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	if (is_autofs4_dentry(path.dentry)) {
+		struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
 
 		/* This is an autofs submount, we can't expire it */
 		if (autofs_type_indirect(sbi->type))
@@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		 * Otherwise it's an offset mount and we need to check
 		 * if we can umount its mount, if there is one.
 		 */
-		if (!d_mountpoint(dentry)) {
+		if (!d_mountpoint(path.dentry)) {
 			status = 0;
 			goto done;
 		}
@@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	status = 0;
 done:
 	DPRINTK("returning = %d", status);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path);
 	return status;
 }
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e383bf0..b96a3c5 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		nd->flags);
 	/*
 	 * For an expire of a covered direct or offset mount we need
-	 * to beeak out of follow_down() at the autofs mount trigger
+	 * to break out of follow_down() at the autofs mount trigger
 	 * (d_mounted--), so we can see the expiring flag, and manage
 	 * the blocking and following here until the expire is completed.
 	 */
@@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		if (ino->flags & AUTOFS_INF_EXPIRING) {
 			spin_unlock(&sbi->fs_lock);
 			/* Follow down to our covering mount. */
-			if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+			if (!follow_down(&nd->path))
 				goto done;
 			goto follow;
 		}
@@ -230,8 +230,7 @@ follow:
 	 * to follow it.
 	 */
 	if (d_mountpoint(dentry)) {
-		if (!autofs4_follow_mount(&nd->path.mnt,
-					  &nd->path.dentry)) {
+		if (!autofs4_follow_mount(&nd->path)) {
 			status = -ENOENT;
 			goto out_error;
 		}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 83d6275..3bb11be 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:
diff --git a/fs/namei.c b/fs/namei.c
index 8c1f48a..4d49a3e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -731,16 +731,16 @@ static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 /* no need for dcache_lock, as serialization is taken care in
  * namespace.c
  */
-int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
 
-	mounted = lookup_mnt(*mnt, *dentry);
+	mounted = lookup_mnt(path->mnt, path->dentry);
 	if (mounted) {
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 		return 1;
 	}
 	return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index c859622..ba5237b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1601,7 +1601,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	down_write(&namespace_sem);
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
 	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
@@ -1695,7 +1695,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
 	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 64a288e..f01caec 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -154,7 +154,7 @@ out_err:
 	goto out;
 out_follow:
 	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	       follow_down(&nd->path))
 		;
 	err = 0;
 	goto out;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7b2b3f7..99f8357 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -105,8 +105,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 			    .dentry = dget(dentry)};
 	int err = 0;
 
-	while (follow_down(&path.mnt, &path.dentry) &&
-	       d_mountpoint(path.dentry))
+	while (d_mountpoint(path.dentry) && follow_down(&path))
 		;
 
 	exp2 = rqst_exp_get_by_name(rqstp, &path);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 9cd5a717b..d870ae2 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -78,7 +78,7 @@ extern void release_open_intent(struct nameidata *);
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
 
-extern int follow_down(struct vfsmount **, struct dentry **);
+extern int follow_down(struct path *);
 extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
-- 
cgit v0.10.2


From 79ed0226198c628133530b179a90dbf42b1c2eba Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 13:59:41 -0400
Subject: switch follow_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index 4d49a3e..c006bc6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -715,16 +715,16 @@ static int __follow_mount(struct path *path)
 	return res;
 }
 
-static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static void follow_mount(struct path *path)
 {
-	while (d_mountpoint(*dentry)) {
-		struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+	while (d_mountpoint(path->dentry)) {
+		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
 		if (!mounted)
 			break;
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 	}
 }
 
@@ -779,7 +779,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
 		mntput(nd->path.mnt);
 		nd->path.mnt = parent;
 	}
-	follow_mount(&nd->path.mnt, &nd->path.dentry);
+	follow_mount(&nd->path);
 }
 
 /*
-- 
cgit v0.10.2


From 1c755af4df75996b0dd4b7e6cacaf9d57a6ef2ef Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 14:06:57 -0400
Subject: switch lookup_mnt()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namei.c b/fs/namei.c
index c006bc6..527119a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -702,7 +702,7 @@ static int __follow_mount(struct path *path)
 {
 	int res = 0;
 	while (d_mountpoint(path->dentry)) {
-		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
 		dput(path->dentry);
@@ -718,7 +718,7 @@ static int __follow_mount(struct path *path)
 static void follow_mount(struct path *path)
 {
 	while (d_mountpoint(path->dentry)) {
-		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
 		dput(path->dentry);
@@ -735,7 +735,7 @@ int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
 
-	mounted = lookup_mnt(path->mnt, path->dentry);
+	mounted = lookup_mnt(path);
 	if (mounted) {
 		dput(path->dentry);
 		mntput(path->mnt);
diff --git a/fs/namespace.c b/fs/namespace.c
index ba5237b..b94ad3d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -442,11 +442,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
  * lookup_mnt increments the ref count before returning
  * the vfsmount struct.
  */
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct vfsmount *child_mnt;
 	spin_lock(&vfsmount_lock);
-	if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
+	if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
 		mntget(child_mnt);
 	spin_unlock(&vfsmount_lock);
 	return child_mnt;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9797800..72ce2ae 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -370,7 +370,7 @@ static inline int d_mountpoint(struct dentry *dentry)
 	return dentry->d_mounted;
 }
 
-extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *);
+extern struct vfsmount *lookup_mnt(struct path *);
 extern struct dentry *lookup_create(struct nameidata *nd, int is_dir);
 
 extern int sysctl_vfs_cache_pressure;
-- 
cgit v0.10.2


From 3174c21b74b56c6a53fddd41a30fd6f757a32bd0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 13:19:18 -0400
Subject: Move junk from proc_fs.h to fs/proc/internal.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f6db961..753ca37 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -92,3 +92,28 @@ struct pde_opener {
 	struct list_head lh;
 };
 void pde_users_dec(struct proc_dir_entry *pde);
+
+extern spinlock_t proc_subdir_lock;
+
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+unsigned long task_vsize(struct mm_struct *);
+int task_statm(struct mm_struct *, int *, int *, int *, int *);
+void task_mem(struct seq_file *, struct mm_struct *);
+
+struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+void de_put(struct proc_dir_entry *de);
+
+extern struct vfsmount *proc_mnt;
+int proc_fill_super(struct super_block *);
+struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
+
+/*
+ * These are generic /proc routines that use the internal
+ * "struct proc_dir_entry" tree to traverse the filesystem.
+ *
+ * The /proc root directory has extended versions to take care
+ * of the /proc/<pid> subdirectories.
+ */
+int proc_readdir(struct file *, void *, filldir_t);
+struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de2bba5..fc6c302 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <asm/prom.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 #ifndef HAVE_ARCH_DEVTREE_FIXUPS
 static inline void set_node_proc_entry(struct device_node *np,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index fbfa3d4..e6e77d3 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -93,20 +93,9 @@ struct vmcore {
 
 #ifdef CONFIG_PROC_FS
 
-extern spinlock_t proc_subdir_lock;
-
 extern void proc_root_init(void);
 
 void proc_flush_task(struct task_struct *task);
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
-unsigned long task_vsize(struct mm_struct *);
-int task_statm(struct mm_struct *, int *, int *, int *, int *);
-void task_mem(struct seq_file *, struct mm_struct *);
-void clear_refs_smap(struct mm_struct *mm);
-
-struct proc_dir_entry *de_get(struct proc_dir_entry *de);
-void de_put(struct proc_dir_entry *de);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
@@ -116,20 +105,7 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
 				void *data);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
-extern struct vfsmount *proc_mnt;
 struct pid_namespace;
-extern int proc_fill_super(struct super_block *);
-extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
-
-/*
- * These are generic /proc routines that use the internal
- * "struct proc_dir_entry" tree to traverse the filesystem.
- *
- * The /proc root directory has extended versions to take care
- * of the /proc/<pid> subdirectories.
- */
-extern int proc_readdir(struct file *, void *, filldir_t);
-extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
-- 
cgit v0.10.2


From d3ef3d7351ccfbef3e5d926efc5ee332136f40d4 Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Sun, 26 Apr 2009 20:25:54 +1000
Subject: fs: mnt_want_write speedup

This patch speeds up lmbench lat_mmap test by about 8%. lat_mmap is set up
basically to mmap a 64MB file on tmpfs, fault in its pages, then unmap it.
A microbenchmark yes, but it exercises some important paths in the mm.

Before:
 avg = 501.9
 std = 14.7773

After:
 avg = 462.286
 std = 5.46106

(50 runs of each, stddev gives a reasonable confidence, but there is quite
a bit of variation there still)

It does this by removing the complex per-cpu locking and counter-cache and
replaces it with a percpu counter in struct vfsmount. This makes the code
much simpler, and avoids spinlocks (although the msync is still pretty
costly, unfortunately). It results in about 900 bytes smaller code too. It
does increase the size of a vfsmount, however.

It should also give a speedup on large systems if CPUs are frequently operating
on different mounts (because the existing scheme has to operate on an atomic in
the struct vfsmount when switching between mounts). But I'm most interested in
the single threaded path performance for the moment.

[AV: minor cleanup]

Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index b94ad3d..22ae06a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
-		atomic_set(&mnt->__mnt_writers, 0);
+#ifdef CONFIG_SMP
+		mnt->mnt_writers = alloc_percpu(int);
+		if (!mnt->mnt_writers)
+			goto out_free_devname;
+#else
+		mnt->mnt_writers = 0;
+#endif
 	}
 	return mnt;
 
+#ifdef CONFIG_SMP
+out_free_devname:
+	kfree(mnt->mnt_devname);
+#endif
 out_free_id:
 	mnt_free_id(mnt);
 out_free_cache:
@@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 
-struct mnt_writer {
-	/*
-	 * If holding multiple instances of this lock, they
-	 * must be ordered by cpu number.
-	 */
-	spinlock_t lock;
-	struct lock_class_key lock_class; /* compiles out with !lockdep */
-	unsigned long count;
-	struct vfsmount *mnt;
-} ____cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
+static inline void inc_mnt_writers(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+#else
+	mnt->mnt_writers++;
+#endif
+}
 
-static int __init init_mnt_writers(void)
+static inline void dec_mnt_writers(struct vfsmount *mnt)
 {
-	int cpu;
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
-		lockdep_set_class(&writer->lock, &writer->lock_class);
-		writer->count = 0;
-	}
-	return 0;
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+#else
+	mnt->mnt_writers--;
+#endif
 }
-fs_initcall(init_mnt_writers);
 
-static void unlock_mnt_writers(void)
+static unsigned int count_mnt_writers(struct vfsmount *mnt)
 {
+#ifdef CONFIG_SMP
+	unsigned int count = 0;
 	int cpu;
-	struct mnt_writer *cpu_writer;
 
 	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		count += *per_cpu_ptr(mnt->mnt_writers, cpu);
 	}
-}
 
-static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
-{
-	if (!cpu_writer->mnt)
-		return;
-	/*
-	 * This is in case anyone ever leaves an invalid,
-	 * old ->mnt and a count of 0.
-	 */
-	if (!cpu_writer->count)
-		return;
-	atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
-	cpu_writer->count = 0;
-}
- /*
- * must hold cpu_writer->lock
- */
-static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
-					  struct vfsmount *mnt)
-{
-	if (cpu_writer->mnt == mnt)
-		return;
-	__clear_mnt_count(cpu_writer);
-	cpu_writer->mnt = mnt;
+	return count;
+#else
+	return mnt->mnt_writers;
+#endif
 }
 
 /*
@@ -253,75 +236,34 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
 int mnt_want_write(struct vfsmount *mnt)
 {
 	int ret = 0;
-	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	/*
+	 * The store to inc_mnt_writers must be visible before we pass
+	 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
+	 * incremented count after it has set MNT_WRITE_HOLD.
+	 */
+	smp_mb();
+	while (mnt->mnt_flags & MNT_WRITE_HOLD)
+		cpu_relax();
+	/*
+	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+	 * be set to match its requirements. So we must not load that until
+	 * MNT_WRITE_HOLD is cleared.
+	 */
+	smp_rmb();
 	if (__mnt_is_readonly(mnt)) {
+		dec_mnt_writers(mnt);
 		ret = -EROFS;
 		goto out;
 	}
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	preempt_enable();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
-static void lock_mnt_writers(void)
-{
-	int cpu;
-	struct mnt_writer *cpu_writer;
-
-	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		__clear_mnt_count(cpu_writer);
-		cpu_writer->mnt = NULL;
-	}
-}
-
-/*
- * These per-cpu write counts are not guaranteed to have
- * matched increments and decrements on any given cpu.
- * A file open()ed for write on one cpu and close()d on
- * another cpu will imbalance this count.  Make sure it
- * does not get too far out of whack.
- */
-static void handle_write_count_underflow(struct vfsmount *mnt)
-{
-	if (atomic_read(&mnt->__mnt_writers) >=
-	    MNT_WRITER_UNDERFLOW_LIMIT)
-		return;
-	/*
-	 * It isn't necessary to hold all of the locks
-	 * at the same time, but doing it this way makes
-	 * us share a lot more code.
-	 */
-	lock_mnt_writers();
-	/*
-	 * vfsmount_lock is for mnt_flags.
-	 */
-	spin_lock(&vfsmount_lock);
-	/*
-	 * If coalescing the per-cpu writer counts did not
-	 * get us back to a positive writer count, we have
-	 * a bug.
-	 */
-	if ((atomic_read(&mnt->__mnt_writers) < 0) &&
-	    !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
-		WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
-				"count: %d\n",
-			mnt, atomic_read(&mnt->__mnt_writers));
-		/* use the flag to keep the dmesg spam down */
-		mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
-	}
-	spin_unlock(&vfsmount_lock);
-	unlock_mnt_writers();
-}
-
 /**
  * mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
@@ -332,37 +274,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
  */
 void mnt_drop_write(struct vfsmount *mnt)
 {
-	int must_check_underflow = 0;
-	struct mnt_writer *cpu_writer;
-
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
-
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	if (cpu_writer->count > 0) {
-		cpu_writer->count--;
-	} else {
-		must_check_underflow = 1;
-		atomic_dec(&mnt->__mnt_writers);
-	}
-
-	spin_unlock(&cpu_writer->lock);
-	/*
-	 * Logically, we could call this each time,
-	 * but the __mnt_writers cacheline tends to
-	 * be cold, and makes this expensive.
-	 */
-	if (must_check_underflow)
-		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
+	preempt_disable();
+	dec_mnt_writers(mnt);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -370,24 +284,41 @@ static int mnt_make_readonly(struct vfsmount *mnt)
 {
 	int ret = 0;
 
-	lock_mnt_writers();
+	spin_lock(&vfsmount_lock);
+	mnt->mnt_flags |= MNT_WRITE_HOLD;
 	/*
-	 * With all the locks held, this value is stable
+	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+	 * should be visible before we do.
 	 */
-	if (atomic_read(&mnt->__mnt_writers) > 0) {
-		ret = -EBUSY;
-		goto out;
-	}
+	smp_mb();
+
 	/*
-	 * nobody can do a successful mnt_want_write() with all
-	 * of the counts in MNT_DENIED_WRITE and the locks held.
+	 * With writers on hold, if this value is zero, then there are
+	 * definitely no active writers (although held writers may subsequently
+	 * increment the count, they'll have to wait, and decrement it after
+	 * seeing MNT_READONLY).
+	 *
+	 * It is OK to have counter incremented on one CPU and decremented on
+	 * another: the sum will add up correctly. The danger would be when we
+	 * sum up each counter, if we read a counter before it is incremented,
+	 * but then read another CPU's count which it has been subsequently
+	 * decremented from -- we would see more decrements than we should.
+	 * MNT_WRITE_HOLD protects against this scenario, because
+	 * mnt_want_write first increments count, then smp_mb, then spins on
+	 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+	 * we're counting up here.
 	 */
-	spin_lock(&vfsmount_lock);
-	if (!ret)
+	if (count_mnt_writers(mnt) > 0)
+		ret = -EBUSY;
+	else
 		mnt->mnt_flags |= MNT_READONLY;
+	/*
+	 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+	 * that become unheld will see MNT_READONLY.
+	 */
+	smp_wmb();
+	mnt->mnt_flags &= ~MNT_WRITE_HOLD;
 	spin_unlock(&vfsmount_lock);
-out:
-	unlock_mnt_writers();
 	return ret;
 }
 
@@ -410,6 +341,9 @@ void free_vfsmnt(struct vfsmount *mnt)
 {
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
+#ifdef CONFIG_SMP
+	free_percpu(mnt->mnt_writers);
+#endif
 	kmem_cache_free(mnt_cache, mnt);
 }
 
@@ -604,38 +538,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 
 static inline void __mntput(struct vfsmount *mnt)
 {
-	int cpu;
 	struct super_block *sb = mnt->mnt_sb;
 	/*
-	 * We don't have to hold all of the locks at the
-	 * same time here because we know that we're the
-	 * last reference to mnt and that no new writers
-	 * can come in.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		if (cpu_writer->mnt != mnt) {
-			spin_unlock(&cpu_writer->lock);
-			continue;
-		}
-		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
-		cpu_writer->count = 0;
-		/*
-		 * Might as well do this so that no one
-		 * ever sees the pointer and expects
-		 * it to be valid.
-		 */
-		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
-	}
-	/*
 	 * This probably indicates that somebody messed
 	 * up a mnt_want/drop_write() pair.  If this
 	 * happens, the filesystem was probably unable
 	 * to make r/w->r/o transitions.
 	 */
-	WARN_ON(atomic_read(&mnt->__mnt_writers));
+	/*
+	 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
+	 * provides barriers, so count_mnt_writers() below is safe.  AV
+	 */
+	WARN_ON(count_mnt_writers(mnt));
 	dput(mnt->mnt_root);
 	free_vfsmnt(mnt);
 	deactivate_super(sb);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 51f55f9..ac49c1f 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -30,7 +30,7 @@ struct mnt_namespace;
 #define MNT_STRICTATIME 0x80
 
 #define MNT_SHRINKABLE	0x100
-#define MNT_IMBALANCED_WRITE_COUNT	0x200 /* just for debugging */
+#define MNT_WRITE_HOLD	0x200
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
@@ -65,13 +65,22 @@ struct vfsmount {
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
-	/*
-	 * This value is not stable unless all of the mnt_writers[] spinlocks
-	 * are held, and all mnt_writer[]s on this mount have 0 as their ->count
-	 */
-	atomic_t __mnt_writers;
+#ifdef CONFIG_SMP
+	int *mnt_writers;
+#else
+	int mnt_writers;
+#endif
 };
 
+static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	return mnt->mnt_writers;
+#else
+	return &mnt->mnt_writers;
+#endif
+}
+
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
 {
 	if (mnt)
-- 
cgit v0.10.2


From 96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Sun, 26 Apr 2009 20:25:55 +1000
Subject: fs: introduce mnt_clone_write

This patch speeds up lmbench lat_mmap test by about another 2% after the
first patch.

Before:
 avg = 462.286
 std = 5.46106

After:
 avg = 453.12
 std = 9.58257

(50 runs of each, stddev gives a reasonable confidence)

It does this by introducing mnt_clone_write, which avoids some heavyweight
operations of mnt_want_write if called on a vfsmount which we know already
has a write count; and mnt_want_write_file, which can call mnt_clone_write
if the file is open for write.

After these two patches, mnt_want_write and mnt_drop_write go from 7% on
the profile down to 1.3% (including mnt_clone_write).

[AV: mnt_want_write_file() should take file alone and derive mnt from it;
not only all callers have that form, but that's the only mnt about which
we know that it's already held for write if file is opened for write]

Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/file_table.c b/fs/file_table.c
index 54018fe..3d66dbc 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
 		file_take_write(file);
-		error = mnt_want_write(mnt);
+		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
 	return error;
diff --git a/fs/inode.c b/fs/inode.c
index ca33701..a88baeb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1422,7 +1422,7 @@ void file_update_time(struct file *file)
 	if (IS_NOCMTIME(inode))
 		return;
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		return;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 22ae06a..120b8a6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -265,6 +265,46 @@ out:
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
 /**
+ * mnt_clone_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This is effectively like mnt_want_write, except
+ * it must only be used to take an extra write reference
+ * on a mountpoint that we already know has a write reference
+ * on it. This allows some optimisation.
+ *
+ * After finished, mnt_drop_write must be called as usual to
+ * drop the reference.
+ */
+int mnt_clone_write(struct vfsmount *mnt)
+{
+	/* superblock may be r/o */
+	if (__mnt_is_readonly(mnt))
+		return -EROFS;
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mnt_clone_write);
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+	if (!(file->f_mode & FMODE_WRITE))
+		return mnt_want_write(file->f_path.mnt);
+	else
+		return mnt_clone_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL_GPL(mnt_want_write_file);
+
+/**
  * mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
  *
diff --git a/fs/open.c b/fs/open.c
index bdfbf03..7200e23 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 
 	audit_inode(NULL, dentry);
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
 	mutex_lock(&inode->i_mutex);
@@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	if (!file)
 		goto out;
 
-	error = mnt_want_write(file->f_path.mnt);
+	error = mnt_want_write_file(file);
 	if (error)
 		goto out_fput;
 	dentry = file->f_path.dentry;
diff --git a/fs/xattr.c b/fs/xattr.c
index d51b8f9..1c3d0af 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = setxattr(dentry, name, value, size, flags);
 		mnt_drop_write(f->f_path.mnt);
@@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = removexattr(dentry, name);
 		mnt_drop_write(f->f_path.mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index ac49c1f..5d52753 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
 	return mnt;
 }
 
+struct file; /* forward dec */
+
 extern int mnt_want_write(struct vfsmount *mnt);
+extern int mnt_want_write_file(struct file *file);
+extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput_no_expire(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
-- 
cgit v0.10.2


From 864d7c4c068f23642efe91b33be3a84afe5f71e0 Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Sun, 26 Apr 2009 20:25:56 +1000
Subject: fs: move mark_files_ro into file_table.c

This function walks the s_files lock, and operates primarily on the
files in a superblock, so it better belongs here (eg. see also
fs_may_remount_ro).

[AV: ... and it shouldn't be static after that move]

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/file_table.c b/fs/file_table.c
index 3d66dbc..334ce39 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -399,6 +399,44 @@ too_bad:
 	return 0;
 }
 
+/**
+ *	mark_files_ro - mark all files read-only
+ *	@sb: superblock in question
+ *
+ *	All files are marked read-only.  We don't care about pending
+ *	delete files so this should be used in 'force' mode only.
+ */
+void mark_files_ro(struct super_block *sb)
+{
+	struct file *f;
+
+retry:
+	file_list_lock();
+	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+		struct vfsmount *mnt;
+		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+		       continue;
+		if (!file_count(f))
+			continue;
+		if (!(f->f_mode & FMODE_WRITE))
+			continue;
+		f->f_mode &= ~FMODE_WRITE;
+		if (file_check_writeable(f) != 0)
+			continue;
+		file_release_write(f);
+		mnt = mntget(f->f_path.mnt);
+		file_list_unlock();
+		/*
+		 * This can sleep, so we can't hold
+		 * the file_list_lock() spinlock.
+		 */
+		mnt_drop_write(mnt);
+		mntput(mnt);
+		goto retry;
+	}
+	file_list_unlock();
+}
+
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
diff --git a/fs/internal.h b/fs/internal.h
index b4dac4f..6d4ef20 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -66,3 +66,8 @@ extern void __init mnt_init(void);
  * fs_struct.c
  */
 extern void chroot_fs_refs(struct path *, struct path *);
+
+/*
+ * file_table.c
+ */
+extern void mark_files_ro(struct super_block *);
diff --git a/fs/super.c b/fs/super.c
index 1943fdf..c170551 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -616,45 +616,6 @@ out:
 }
 
 /**
- *	mark_files_ro - mark all files read-only
- *	@sb: superblock in question
- *
- *	All files are marked read-only.  We don't care about pending
- *	delete files so this should be used in 'force' mode only.
- */
-
-static void mark_files_ro(struct super_block *sb)
-{
-	struct file *f;
-
-retry:
-	file_list_lock();
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-		struct vfsmount *mnt;
-		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
-		       continue;
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		f->f_mode &= ~FMODE_WRITE;
-		if (file_check_writeable(f) != 0)
-			continue;
-		file_release_write(f);
-		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
-		mnt_drop_write(mnt);
-		mntput(mnt);
-		goto retry;
-	}
-	file_list_unlock();
-}
-
-/**
  *	do_remount_sb - asks filesystem to change mount options.
  *	@sb:	superblock in question
  *	@flags:	numeric part of options
-- 
cgit v0.10.2


From 876a9f76abbcb775f8d21cbc99fa161f9e5937f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Apr 2009 18:05:55 +0200
Subject: remove s_async_list

Remove the unused s_async_list in the superblock, a leftover of the
broken async inode deletion code that leaked into mainline.  Having this
in the middle of the sync/unmount path is not helpful for the following
cleanups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/super.c b/fs/super.c
index c170551..3d9f117 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,7 +38,6 @@
 #include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/file.h>
-#include <linux/async.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -72,7 +71,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
-		INIT_LIST_HEAD(&s->s_async_list);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -342,11 +340,6 @@ void generic_shutdown_super(struct super_block *sb)
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 
-		/*
-		 * wait for asynchronous fs operations to finish before going further
-		 */
-		async_synchronize_full_domain(&sb->s_async_list);
-
 		/* bad name - it should be evict_inodes() */
 		invalidate_inodes(sb);
 		lock_kernel();
@@ -517,7 +510,6 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		async_synchronize_full_domain(&sb->s_async_list);
 		if (sb->s_root && (wait || sb->s_dirt))
 			sb->s_op->sync_fs(sb, wait);
 		up_read(&sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 03fb210..36bcff7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1372,11 +1372,6 @@ struct super_block {
 	 * generic_show_options()
 	 */
 	char *s_options;
-
-	/*
-	 * storage for asynchronous operations
-	 */
-	struct list_head s_async_list;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v0.10.2


From 5a3e5cb8e08bd876e2542c1451c9a93dab1b0e39 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:48 +0200
Subject: vfs: Fix sys_sync() and fsync_super() reliability (version 4)

So far, do_sync() called:
  sync_inodes(0);
  sync_supers();
  sync_filesystems(0);
  sync_filesystems(1);
  sync_inodes(1);

This ordering makes it kind of hard for filesystems as sync_inodes(0) need not
submit all the IO (for example it skips inodes with I_SYNC set) so e.g. forcing
transaction to disk in ->sync_fs() is not really enough. Therefore sys_sync has
not been completely reliable on some filesystems (ext3, ext4, reiserfs, ocfs2
and others are hit by this) when racing e.g. with background writeback. A
similar problem hits also other filesystems (e.g. ext2) because of
write_supers() being called before the sync_inodes(1).

Change the ordering of calls in do_sync() - this requires a new function
sync_blockdevs() to preserve the property that block devices are always synced
after write_super() / sync_fs() call.

The same issue is fixed in __fsync_super() function used on umount /
remount read-only.

[AV: build fixes]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/internal.h b/fs/internal.h
index 6d4ef20..343a537 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -71,3 +71,12 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
+
+/*
+ * super.c
+ */
+#ifdef CONFIG_BLOCK
+extern void sync_blockdevs(void);
+#else
+static inline void sync_blockdevs(void) { }
+#endif
diff --git a/fs/super.c b/fs/super.c
index 3d9f117..18d159d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -293,6 +293,7 @@ void __fsync_super(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	vfs_dq_sync(sb);
+	sync_inodes_sb(sb, 1);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
 		sb->s_op->write_super(sb);
@@ -300,7 +301,6 @@ void __fsync_super(struct super_block *sb)
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, 1);
 	sync_blockdev(sb->s_bdev);
-	sync_inodes_sb(sb, 1);
 }
 
 /*
@@ -522,6 +522,33 @@ restart:
 	mutex_unlock(&mutex);
 }
 
+#ifdef CONFIG_BLOCK
+/*
+ *  Sync all block devices underlying some superblock
+ */
+void sync_blockdevs(void)
+{
+	struct super_block *sb;
+
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb->s_bdev)
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root)
+			sync_blockdev(sb->s_bdev);
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
+	}
+	spin_unlock(&sb_lock);
+}
+#endif
+
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c
index 7abc65f..631fd5a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include "internal.h"
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
@@ -26,10 +27,11 @@ static void do_sync(unsigned long wait)
 	wakeup_pdflush(0);
 	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
 	vfs_dq_sync(NULL);
+	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
 	sync_supers();		/* Write the superblocks */
 	sync_filesystems(0);	/* Start syncing the filesystems */
 	sync_filesystems(wait);	/* Waitingly sync the filesystems */
-	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
+	sync_blockdevs();
 	if (!wait)
 		printk("Emergency Sync complete\n");
 	if (unlikely(laptop_mode))
-- 
cgit v0.10.2


From bfe881255c74800147523b59c85328a1a826ba21 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:49 +0200
Subject: vfs: Call ->sync_fs() even if s_dirt is 0 (version 4)

sync_filesystems() has a condition that if wait == 0 and s_dirt == 0, then
->sync_fs() isn't called. This does not really make much sence since s_dirt is
generally used by a filesystem to mean that ->write_super() needs to be called.
But ->sync_fs() does different things. I even suspect that some filesystems
(btrfs?) sets s_dirt just to fool this logic.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/super.c b/fs/super.c
index 18d159d..fae91ba 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -510,7 +510,7 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && (wait || sb->s_dirt))
+		if (sb->s_root)
 			sb->s_op->sync_fs(sb, wait);
 		up_read(&sb->s_umount);
 		/* restart only when sb is no longer on the list */
-- 
cgit v0.10.2


From 429479f031322a0cc5c921ffb2321a51718dc875 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:50 +0200
Subject: vfs: Make __fsync_super() a static function (version 4)

__fsync_super() does the same thing as fsync_super(). So change the only
caller to use fsync_super() and make __fsync_super() static. This removes
unnecessarily duplicated call to sync_blockdev() and prepares ground
for the changes to __fsync_super() in the following patches.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 931f6b8..fe47f72 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -241,7 +241,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		__fsync_super(sb);
+		fsync_super(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
diff --git a/fs/super.c b/fs/super.c
index fae91ba..8dbe1ea 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL(unlock_super);
  * device.  Takes the superblock lock.  Requires a second blkdev
  * flush by the caller to complete the operation.
  */
-void __fsync_super(struct super_block *sb)
+static int __fsync_super(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	vfs_dq_sync(sb);
@@ -300,7 +300,7 @@ void __fsync_super(struct super_block *sb)
 	unlock_super(sb);
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
+	return sync_blockdev(sb->s_bdev);
 }
 
 /*
@@ -310,8 +310,7 @@ void __fsync_super(struct super_block *sb)
  */
 int fsync_super(struct super_block *sb)
 {
-	__fsync_super(sb);
-	return sync_blockdev(sb->s_bdev);
+	return __fsync_super(sb);
 }
 EXPORT_SYMBOL_GPL(fsync_super);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 36bcff7..41a9907 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2078,7 +2078,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
-extern void __fsync_super(struct super_block *sb);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
-- 
cgit v0.10.2


From 5cee5815d1564bbbd505fea86f4550f1efdb5cd0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:51 +0200
Subject: vfs: Make sys_sync() use fsync_super() (version 4)

It is unnecessarily fragile to have two places (fsync_super() and do_sync())
doing data integrity sync of the filesystem. Alter __fsync_super() to
accommodate needs of both callers and use it. So after this patch
__fsync_super() is the only place where we gather all the calls needed to
properly send all data on a filesystem to disk.

Nice bonus is that we get a complete livelock avoidance and write_supers()
is now only used for periodic writeback of superblocks.

sync_blockdevs() introduced a couple of patches ago is gone now.

[build fixes folded]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe47f72..4b6a3b9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 				iov, offset, nr_segs, blkdev_get_blocks, NULL);
 }
 
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	if (!bdev)
+		return 0;
+	if (!wait)
+		return filemap_flush(bdev->bd_inode->i_mapping);
+	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
 /*
  * Write out and wait upon all the dirty data associated with a block
  * device via its mapping.  Does not take the superblock lock.
  */
 int sync_blockdev(struct block_device *bdev)
 {
-	int ret = 0;
-
-	if (bdev)
-		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-	return ret;
+	return __sync_blockdev(bdev, 1);
 }
 EXPORT_SYMBOL(sync_blockdev);
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 91013ff..e0fb2e7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -679,55 +679,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
 }
 
 /**
- * sync_inodes - writes all inodes to disk
- * @wait: wait for completion
- *
- * sync_inodes() goes through each super block's dirty inode list, writes the
- * inodes out, waits on the writeout and puts the inodes back on the normal
- * list.
- *
- * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
- * part of the sync functions is that the blockdev "superblock" is processed
- * last.  This is because the write_inode() function of a typical fs will
- * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
- * What we want to do is to perform all that dirtying first, and then write
- * back all those inode blocks via the blockdev mapping in one sweep.  So the
- * additional (somewhat redundant) sync_blockdev() calls here are to make
- * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
- * outstanding dirty inodes, the writeback goes block-at-a-time within the
- * filesystem's write_inode().  This is extremely slow.
- */
-static void __sync_inodes(int wait)
-{
-	struct super_block *sb;
-
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root) {
-			sync_inodes_sb(sb, wait);
-			sync_blockdev(sb->s_bdev);
-		}
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-}
-
-void sync_inodes(int wait)
-{
-	__sync_inodes(0);
-
-	if (wait)
-		__sync_inodes(1);
-}
-
-/**
  * write_inode_now	-	write an inode to disk
  * @inode: inode to write to disk
  * @sync: whether the write should be synchronous or not
diff --git a/fs/internal.h b/fs/internal.h
index 343a537..dbec3cc 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 	return sb == blockdev_superblock;
 }
 
+extern int __sync_blockdev(struct block_device *bdev, int wait);
+
 #else
 static inline void bdev_cache_init(void)
 {
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 {
 	return 0;
 }
+
+static inline int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -71,12 +78,3 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
-
-/*
- * super.c
- */
-#ifdef CONFIG_BLOCK
-extern void sync_blockdevs(void);
-#else
-static inline void sync_blockdevs(void) { }
-#endif
diff --git a/fs/super.c b/fs/super.c
index 8dbe1ea..c8ce5ed 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -284,23 +284,23 @@ EXPORT_SYMBOL(lock_super);
 EXPORT_SYMBOL(unlock_super);
 
 /*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.  Requires a second blkdev
- * flush by the caller to complete the operation.
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
  */
-static int __fsync_super(struct super_block *sb)
+static int __fsync_super(struct super_block *sb, int wait)
 {
-	sync_inodes_sb(sb, 0);
 	vfs_dq_sync(sb);
-	sync_inodes_sb(sb, 1);
+	sync_inodes_sb(sb, wait);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
 		sb->s_op->write_super(sb);
 	unlock_super(sb);
 	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	return sync_blockdev(sb->s_bdev);
+		sb->s_op->sync_fs(sb, wait);
+	return __sync_blockdev(sb->s_bdev, wait);
 }
 
 /*
@@ -310,7 +310,12 @@ static int __fsync_super(struct super_block *sb)
  */
 int fsync_super(struct super_block *sb)
 {
-	return __fsync_super(sb);
+	int ret;
+
+	ret = __fsync_super(sb, 0);
+	if (ret < 0)
+		return ret;
+	return __fsync_super(sb, 1);
 }
 EXPORT_SYMBOL_GPL(fsync_super);
 
@@ -469,20 +474,18 @@ restart:
 }
 
 /*
- * Call the ->sync_fs super_op against all filesystems which are r/w and
- * which implement it.
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
  *
  * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync_fs
+ * if two or more filesystems are being continuously dirtied.  s_need_sync
  * is used only here.  We set it against all filesystems and then clear it as
  * we sync them.  So redirtied filesystems are skipped.
  *
  * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync_fs
+ * calls sync_filesystems as well, process B will set all the s_need_sync
  * flags again, which will cause process A to resync everything.  Fix that with
  * a local mutex.
- *
- * (Fabian) Avoid sync_fs with clean fs & wait mode 0
  */
 void sync_filesystems(int wait)
 {
@@ -492,25 +495,23 @@ void sync_filesystems(int wait)
 	mutex_lock(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_op->sync_fs)
-			continue;
 		if (sb->s_flags & MS_RDONLY)
 			continue;
-		sb->s_need_sync_fs = 1;
+		sb->s_need_sync = 1;
 	}
 
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_need_sync_fs)
+		if (!sb->s_need_sync)
 			continue;
-		sb->s_need_sync_fs = 0;
+		sb->s_need_sync = 0;
 		if (sb->s_flags & MS_RDONLY)
 			continue;	/* hm.  Was remounted r/o meanwhile */
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			sb->s_op->sync_fs(sb, wait);
+			__fsync_super(sb, wait);
 		up_read(&sb->s_umount);
 		/* restart only when sb is no longer on the list */
 		spin_lock(&sb_lock);
@@ -521,33 +522,6 @@ restart:
 	mutex_unlock(&mutex);
 }
 
-#ifdef CONFIG_BLOCK
-/*
- *  Sync all block devices underlying some superblock
- */
-void sync_blockdevs(void)
-{
-	struct super_block *sb;
-
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_bdev)
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root)
-			sync_blockdev(sb->s_bdev);
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-}
-#endif
-
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c
index 631fd5a..be0798c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -18,35 +18,24 @@
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
-/*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
+SYSCALL_DEFINE0(sync)
 {
-	wakeup_pdflush(0);
-	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
-	vfs_dq_sync(NULL);
-	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
-	sync_supers();		/* Write the superblocks */
-	sync_filesystems(0);	/* Start syncing the filesystems */
-	sync_filesystems(wait);	/* Waitingly sync the filesystems */
-	sync_blockdevs();
-	if (!wait)
-		printk("Emergency Sync complete\n");
+	sync_filesystems(0);
+	sync_filesystems(1);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
-}
-
-SYSCALL_DEFINE0(sync)
-{
-	do_sync(1);
 	return 0;
 }
 
 static void do_sync_work(struct work_struct *work)
 {
-	do_sync(0);
+	/*
+	 * Sync twice to reduce the possibility we skipped some inodes / pages
+	 * because they were temporarily locked
+	 */
+	sync_filesystems(0);
+	sync_filesystems(0);
+	printk("Emergency Sync complete\n");
 	kfree(work);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 41a9907..f00df65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1321,7 +1321,7 @@ struct super_block {
 	struct rw_semaphore	s_umount;
 	struct mutex		s_lock;
 	int			s_count;
-	int			s_need_sync_fs;
+	int			s_need_sync;
 	atomic_t		s_active;
 #ifdef CONFIG_SECURITY
 	void                    *s_security;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9344547..3224820 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,7 +79,6 @@ struct writeback_control {
 void writeback_inodes(struct writeback_control *wbc);
 int inode_wait(void *);
 void sync_inodes_sb(struct super_block *, int wait);
-void sync_inodes(int wait);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
-- 
cgit v0.10.2


From c15c54f5f056ee4819da9fde59a5f2cd45445f23 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:52 +0200
Subject: vfs: Move syncing code from super.c to sync.c (version 4)

Move sync_filesystems(), __fsync_super(), fsync_super() from
super.c to sync.c where it fits better.

[build fixes folded]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/super.c b/fs/super.c
index c8ce5ed..f822c74 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -283,42 +283,6 @@ void unlock_super(struct super_block * sb)
 EXPORT_SYMBOL(lock_super);
 EXPORT_SYMBOL(unlock_super);
 
-/*
- * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
- * just dirties buffers with inodes so we have to submit IO for these buffers
- * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
- * case write_inode() functions do sync_dirty_buffer() and thus effectively
- * write one block at a time.
- */
-static int __fsync_super(struct super_block *sb, int wait)
-{
-	vfs_dq_sync(sb);
-	sync_inodes_sb(sb, wait);
-	lock_super(sb);
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, wait);
-	return __sync_blockdev(sb->s_bdev, wait);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
-	int ret;
-
-	ret = __fsync_super(sb, 0);
-	if (ret < 0)
-		return ret;
-	return __fsync_super(sb, 1);
-}
-EXPORT_SYMBOL_GPL(fsync_super);
-
 /**
  *	generic_shutdown_super	-	common helper for ->kill_sb()
  *	@sb: superblock to kill
@@ -473,55 +437,6 @@ restart:
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Sync all the data for all the filesystems (called by sys_sync() and
- * emergency sync)
- *
- * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync
- * is used only here.  We set it against all filesystems and then clear it as
- * we sync them.  So redirtied filesystems are skipped.
- *
- * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync
- * flags again, which will cause process A to resync everything.  Fix that with
- * a local mutex.
- */
-void sync_filesystems(int wait)
-{
-	struct super_block *sb;
-	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);		/* Could be down_interruptible */
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_flags & MS_RDONLY)
-			continue;
-		sb->s_need_sync = 1;
-	}
-
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_need_sync)
-			continue;
-		sb->s_need_sync = 0;
-		if (sb->s_flags & MS_RDONLY)
-			continue;	/* hm.  Was remounted r/o meanwhile */
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root)
-			__fsync_super(sb, wait);
-		up_read(&sb->s_umount);
-		/* restart only when sb is no longer on the list */
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-	mutex_unlock(&mutex);
-}
-
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c
index be0798c..d5fa7b7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -18,6 +18,91 @@
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
+/*
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
+ */
+static int __fsync_super(struct super_block *sb, int wait)
+{
+	vfs_dq_sync(sb);
+	sync_inodes_sb(sb, wait);
+	lock_super(sb);
+	if (sb->s_dirt && sb->s_op->write_super)
+		sb->s_op->write_super(sb);
+	unlock_super(sb);
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, wait);
+	return __sync_blockdev(sb->s_bdev, wait);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+	int ret;
+
+	ret = __fsync_super(sb, 0);
+	if (ret < 0)
+		return ret;
+	return __fsync_super(sb, 1);
+}
+EXPORT_SYMBOL_GPL(fsync_super);
+
+/*
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
+ *
+ * This operation is careful to avoid the livelock which could easily happen
+ * if two or more filesystems are being continuously dirtied.  s_need_sync
+ * is used only here.  We set it against all filesystems and then clear it as
+ * we sync them.  So redirtied filesystems are skipped.
+ *
+ * But if process A is currently running sync_filesystems and then process B
+ * calls sync_filesystems as well, process B will set all the s_need_sync
+ * flags again, which will cause process A to resync everything.  Fix that with
+ * a local mutex.
+ */
+static void sync_filesystems(int wait)
+{
+	struct super_block *sb;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);		/* Could be down_interruptible */
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_flags & MS_RDONLY)
+			continue;
+		sb->s_need_sync = 1;
+	}
+
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb->s_need_sync)
+			continue;
+		sb->s_need_sync = 0;
+		if (sb->s_flags & MS_RDONLY)
+			continue;	/* hm.  Was remounted r/o meanwhile */
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root)
+			__fsync_super(sb, wait);
+		up_read(&sb->s_umount);
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
+	}
+	spin_unlock(&sb_lock);
+	mutex_unlock(&mutex);
+}
+
 SYSCALL_DEFINE0(sync)
 {
 	sync_filesystems(0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f00df65..d3f7159 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-extern int fsync_super(struct super_block *);
 extern int fsync_no_super(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
@@ -1959,6 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	return 0;
 }
 #endif
+extern int fsync_super(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
@@ -2077,7 +2077,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 
 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
-extern void sync_filesystems(int wait);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
-- 
cgit v0.10.2


From 60b0680fa236ac4e17ce31a50048c9d75f9ec831 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:53 +0200
Subject: vfs: Rename fsync_super() to sync_filesystem() (version 4)

Rename the function so that it better describe what it really does. Also
remove the unnecessary include of buffer_head.h.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4b6a3b9..3a6d4fb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -204,7 +204,7 @@ int fsync_bdev(struct block_device *bdev)
 {
 	struct super_block *sb = get_super(bdev);
 	if (sb) {
-		int res = fsync_super(sb);
+		int res = sync_filesystem(sb);
 		drop_super(sb);
 		return res;
 	}
@@ -246,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		fsync_super(sb);
+		sync_filesystem(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1e96234..dafd484 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -354,7 +354,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
 	/* make sure all pages pinned by operations on behalf of the netfs are
 	 * written to disc */
 	cachefiles_begin_secure(cache, &saved_cred);
-	ret = fsync_super(cache->mnt->mnt_sb);
+	ret = sync_filesystem(cache->mnt->mnt_sb);
 	cachefiles_end_secure(cache, saved_cred);
 
 	if (ret == -EIO)
diff --git a/fs/super.c b/fs/super.c
index f822c74..721236e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -28,7 +28,6 @@
 #include <linux/blkdev.h>
 #include <linux/quotaops.h>
 #include <linux/namei.h>
-#include <linux/buffer_head.h>		/* for fsync_super() */
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -304,7 +303,7 @@ void generic_shutdown_super(struct super_block *sb)
 
 	if (sb->s_root) {
 		shrink_dcache_for_umount(sb);
-		fsync_super(sb);
+		sync_filesystem(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 
@@ -543,7 +542,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
-	fsync_super(sb);
+	sync_filesystem(sb);
 
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
diff --git a/fs/sync.c b/fs/sync.c
index d5fa7b7..8aa870a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -25,7 +25,7 @@
  * case write_inode() functions do sync_dirty_buffer() and thus effectively
  * write one block at a time.
  */
-static int __fsync_super(struct super_block *sb, int wait)
+static int __sync_filesystem(struct super_block *sb, int wait)
 {
 	vfs_dq_sync(sb);
 	sync_inodes_sb(sb, wait);
@@ -43,16 +43,16 @@ static int __fsync_super(struct super_block *sb, int wait)
  * superblock.  Filesystem data as well as the underlying block
  * device.  Takes the superblock lock.
  */
-int fsync_super(struct super_block *sb)
+int sync_filesystem(struct super_block *sb)
 {
 	int ret;
 
-	ret = __fsync_super(sb, 0);
+	ret = __sync_filesystem(sb, 0);
 	if (ret < 0)
 		return ret;
-	return __fsync_super(sb, 1);
+	return __sync_filesystem(sb, 1);
 }
-EXPORT_SYMBOL_GPL(fsync_super);
+EXPORT_SYMBOL_GPL(sync_filesystem);
 
 /*
  * Sync all the data for all the filesystems (called by sys_sync() and
@@ -92,7 +92,7 @@ restart:
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			__fsync_super(sb, wait);
+			__sync_filesystem(sb, wait);
 		up_read(&sb->s_umount);
 		/* restart only when sb is no longer on the list */
 		spin_lock(&sb_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d3f7159..fb1822b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1958,7 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	return 0;
 }
 #endif
-extern int fsync_super(struct super_block *);
+extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
-- 
cgit v0.10.2


From 850b201b087f5525a0a7278551c2bcd0423c3b26 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 27 Apr 2009 16:43:54 +0200
Subject: quota: cleanup dquota sync functions (version 4)

Currently the VFS calls vfs_dq_sync to sync out disk quotas for a given
superblock.  This is a small wrapper around sync_dquots which for the
case of a non-NULL superblock is a small wrapper around quota_sync_sb.

Just make quota_sync_sb global (rename it to sync_quota_sb) and call it
directly.  Also call it directly for those cases in quota.c that have a
superblock and leave sync_dquots purely an iterator over sync_quota_sb and
remove it's superblock argument.

To make this nicer move the check for the lack of a quota_sync method
from the callers into sync_quota_sb.

[folded build fix from Alexander Beregalov <a.beregalov@gmail.com>]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b7f5a46..95c5b42 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd,
 	return error;
 }
 
-static void quota_sync_sb(struct super_block *sb, int type)
+#ifdef CONFIG_QUOTA
+void sync_quota_sb(struct super_block *sb, int type)
 {
 	int cnt;
 
+	if (!sb->s_qcop->quota_sync)
+		return;
+
 	sb->s_qcop->quota_sync(sb, type);
 
 	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
@@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	}
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 }
+#endif
 
-void sync_dquots(struct super_block *sb, int type)
+static void sync_dquots(int type)
 {
+	struct super_block *sb;
 	int cnt;
 
-	if (sb) {
-		if (sb->s_qcop->quota_sync)
-			quota_sync_sb(sb, type);
-		return;
-	}
-
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
@@ -222,8 +222,8 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && sb->s_qcop->quota_sync)
-			quota_sync_sb(sb, type);
+		if (sb->s_root)
+			sync_quota_sb(sb, type);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
@@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 			return sb->s_qcop->set_dqblk(sb, type, id, &idq);
 		}
 		case Q_SYNC:
-			sync_dquots(sb, type);
+			if (sb)
+				sync_quota_sb(sb, type);
+			else
+				sync_dquots(type);
 			return 0;
 
 		case Q_XQUOTAON:
diff --git a/fs/sync.c b/fs/sync.c
index 8aa870a..d90ab77 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,7 +27,7 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	vfs_dq_sync(sb);
+	sync_quota_sb(sb, -1);
 	sync_inodes_sb(sb, wait);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 36353d9..047310f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -20,7 +20,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 /*
  * declaration of quota_function calls in kernel.
  */
-void sync_dquots(struct super_block *sb, int type);
+void sync_quota_sb(struct super_block *sb, int type);
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
@@ -253,12 +253,7 @@ static inline void vfs_dq_free_inode(struct inode *inode)
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
-/* The following two functions cannot be called inside a transaction */
-static inline void vfs_dq_sync(struct super_block *sb)
-{
-	sync_dquots(sb, -1);
-}
-
+/* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	int ret = -ENOSYS;
@@ -334,7 +329,7 @@ static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void vfs_dq_sync(struct super_block *sb)
+static inline void sync_quota_sb(struct super_block *sb, int type)
 {
 }
 
-- 
cgit v0.10.2


From c3f8a40c1cd5591b882497d1d00d43d0e5bb4698 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:55 +0200
Subject: quota: Introduce writeout_quota_sb() (version 4)

Introduce this function which just writes all the quota structures but
avoids all the syncing and cache pruning work to expose quota structures
to userspace. Use this function from __sync_filesystem when wait == 0.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/sync.c b/fs/sync.c
index d90ab77..4487b55 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,7 +27,11 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	sync_quota_sb(sb, -1);
+	/* Avoid doing twice syncing and cache pruning for quota sync */
+	if (!wait)
+		writeout_quota_sb(sb, -1);
+	else
+		sync_quota_sb(sb, -1);
 	sync_inodes_sb(sb, wait);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 047310f..7bc4575 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -21,6 +21,11 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
  * declaration of quota_function calls in kernel.
  */
 void sync_quota_sb(struct super_block *sb, int type);
+static inline void writeout_quota_sb(struct super_block *sb, int type)
+{
+	if (sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, type);
+}
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
@@ -333,6 +338,10 @@ static inline void sync_quota_sb(struct super_block *sb, int type)
 {
 }
 
+static inline void writeout_quota_sb(struct super_block *sb, int type)
+{
+}
+
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
-- 
cgit v0.10.2


From 59d697b70285c348c01cfc2695c3469ba71d7539 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 27 Apr 2009 09:46:41 -0400
Subject: btrfs: remove ->write_super and stop maintaining ->s_dirt

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b68330..8612b3a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2322,7 +2322,6 @@ err:
 	btrfs_update_inode(trans, root, dir);
 	btrfs_drop_nlink(inode);
 	ret = btrfs_update_inode(trans, root, inode);
-	dir->i_sb->s_dirt = 1;
 out:
 	return ret;
 }
@@ -2806,7 +2805,6 @@ error:
 				      pending_del_nr);
 	}
 	btrfs_free_path(path);
-	inode->i_sb->s_dirt = 1;
 	return ret;
 }
 
@@ -3768,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 		init_special_inode(inode, inode->i_mode, rdev);
 		btrfs_update_inode(trans, root, inode);
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
@@ -3833,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 		inode->i_op = &btrfs_file_inode_operations;
 		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
@@ -3880,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (err)
 		drop_inode = 1;
 
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, dir);
 	err = btrfs_update_inode(trans, root, inode);
 
@@ -3962,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	d_instantiate(dentry, inode);
 	drop_on_err = 0;
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 
@@ -4991,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 		inode->i_op = &btrfs_file_inode_operations;
 		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 	if (drop_inode)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 708ac06..52d8452 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -397,7 +397,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
-	sb->s_dirt = 0;
 	if (!wait) {
 		filemap_flush(root->fs_info->btree_inode->i_mapping);
 		return 0;
@@ -408,7 +407,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 
 	trans = btrfs_start_transaction(root, 1);
 	ret = btrfs_commit_transaction(trans, root);
-	sb->s_dirt = 0;
 	return ret;
 }
 
@@ -454,11 +452,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	return 0;
 }
 
-static void btrfs_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-}
-
 static int btrfs_test_super(struct super_block *s, void *data)
 {
 	struct btrfs_fs_devices *test_fs_devices = data;
@@ -689,7 +682,6 @@ static int btrfs_unfreeze(struct super_block *sb)
 static struct super_operations btrfs_super_ops = {
 	.delete_inode	= btrfs_delete_inode,
 	.put_super	= btrfs_put_super,
-	.write_super	= btrfs_write_super,
 	.sync_fs	= btrfs_sync_fs,
 	.show_options	= btrfs_show_options,
 	.write_inode	= btrfs_write_inode,
-- 
cgit v0.10.2


From ca41f7b918294c2a17780e057568413dcbfc6d49 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 27 Apr 2009 09:46:42 -0400
Subject: ext3: remove ->write_super and stop maintaining ->s_dirt

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 225202d..27967f9 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -649,7 +649,7 @@ do_more:
 		count = overflow;
 		goto do_more;
 	}
-	sb->s_dirt = 1;
+
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, err);
@@ -1708,7 +1708,6 @@ allocated:
 	if (!fatal)
 		fatal = err;
 
-	sb->s_dirt = 1;
 	if (fatal)
 		goto out;
 
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index dd13d60..b399912 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -181,7 +181,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
 	if (!fatal)
 		fatal = err;
-	sb->s_dirt = 1;
+
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, fatal);
@@ -537,7 +537,6 @@ got:
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
 	if (S_ISDIR(mode))
 		percpu_counter_inc(&sbi->s_dirs_counter);
-	sb->s_dirt = 1;
 
 	inode->i_uid = current_fsuid();
 	if (test_opt (sb, GRPID))
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fcfa243..b0248c6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2960,7 +2960,6 @@ static int ext3_do_update_inode(handle_t *handle,
 				ext3_update_dynamic_rev(sb);
 				EXT3_SET_RO_COMPAT_FEATURE(sb,
 					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
-				sb->s_dirt = 1;
 				handle->h_sync = 1;
 				err = ext3_journal_dirty_metadata(handle,
 						EXT3_SB(sb)->s_sbh);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 78fdf38..8a0b263 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -934,7 +934,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 			   EXT3_INODES_PER_GROUP(sb));
 
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
-	sb->s_dirt = 1;
 
 exit_journal:
 	unlock_super(sb);
@@ -1066,7 +1065,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	sb->s_dirt = 1;
 	unlock_super(sb);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3c70d52..1efd958 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -67,7 +67,6 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
 static int ext3_remount (struct super_block * sb, int * flags, char * data);
 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
 static int ext3_unfreeze(struct super_block *sb);
-static void ext3_write_super (struct super_block * sb);
 static int ext3_freeze(struct super_block *sb);
 
 /*
@@ -761,7 +760,6 @@ static const struct super_operations ext3_sops = {
 	.dirty_inode	= ext3_dirty_inode,
 	.delete_inode	= ext3_delete_inode,
 	.put_super	= ext3_put_super,
-	.write_super	= ext3_write_super,
 	.sync_fs	= ext3_sync_fs,
 	.freeze_fs	= ext3_freeze,
 	.unfreeze_fs	= ext3_unfreeze,
@@ -1785,7 +1783,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 #else
 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
 #endif
-		sb->s_dirt = 1;
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
@@ -2265,7 +2262,6 @@ static int ext3_load_journal(struct super_block *sb,
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
-		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
 		ext3_commit_super(sb, es, 1);
@@ -2308,7 +2304,6 @@ static int ext3_create_journal(struct super_block * sb,
 	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
 
 	es->s_journal_inum = cpu_to_le32(journal_inum);
-	sb->s_dirt = 1;
 
 	/* Make sure we flush the recovery flag to disk. */
 	ext3_commit_super(sb, es, 1);
@@ -2354,7 +2349,6 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		sb->s_dirt = 0;
 		ext3_commit_super(sb, es, 1);
 	}
 	unlock_super(sb);
@@ -2413,29 +2407,14 @@ int ext3_force_commit(struct super_block *sb)
 		return 0;
 
 	journal = EXT3_SB(sb)->s_journal;
-	sb->s_dirt = 0;
 	ret = ext3_journal_force_commit(journal);
 	return ret;
 }
 
-/*
- * Ext3 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...)
- */
-static void ext3_write_super (struct super_block * sb)
-{
-	if (mutex_trylock(&sb->s_lock) != 0)
-		BUG();
-	sb->s_dirt = 0;
-}
-
 static int ext3_sync_fs(struct super_block *sb, int wait)
 {
 	tid_t target;
 
-	sb->s_dirt = 0;
 	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
 		if (wait)
 			log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2451,7 +2430,6 @@ static int ext3_freeze(struct super_block *sb)
 {
 	int error = 0;
 	journal_t *journal;
-	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		journal = EXT3_SB(sb)->s_journal;
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 83b7be8..545e37c 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -463,7 +463,6 @@ static void ext3_xattr_update_super_block(handle_t *handle,
 
 	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
 		EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
-		sb->s_dirt = 1;
 		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	}
 }
-- 
cgit v0.10.2


From b7d245de25d1f0bb75a0d04194b647762b30d3db Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 27 Apr 2009 09:46:43 -0400
Subject: gfs2: remove ->write_super and stop maintaining ->s_dirt

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index aa62cf5..f2e449c 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -764,7 +764,6 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	}
 	gfs2_log_unlock(sdp);
 
-	sdp->sd_vfs->s_dirt = 0;
 	up_write(&sdp->sd_log_flush_lock);
 
 	kfree(ai);
@@ -823,7 +822,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	log_refund(sdp, tr);
 	buf_lo_incore_commit(sdp, tr);
 
-	sdp->sd_vfs->s_dirt = 1;
 	up_read(&sdp->sd_log_flush_lock);
 
 	gfs2_log_lock(sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 40bcc37..0a68013 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -788,17 +788,6 @@ restart:
 }
 
 /**
- * gfs2_write_super
- * @sb: the superblock
- *
- */
-
-static void gfs2_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-}
-
-/**
  * gfs2_sync_fs - sync the filesystem
  * @sb: the superblock
  *
@@ -807,7 +796,6 @@ static void gfs2_write_super(struct super_block *sb)
 
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
-	sb->s_dirt = 0;
 	if (wait && sb->s_fs_info)
 		gfs2_log_flush(sb->s_fs_info, NULL);
 	return 0;
@@ -1324,7 +1312,6 @@ const struct super_operations gfs2_super_ops = {
 	.write_inode		= gfs2_write_inode,
 	.delete_inode		= gfs2_delete_inode,
 	.put_super		= gfs2_put_super,
-	.write_super		= gfs2_write_super,
 	.sync_fs		= gfs2_sync_fs,
 	.freeze_fs 		= gfs2_freeze,
 	.unfreeze_fs		= gfs2_unfreeze,
-- 
cgit v0.10.2


From 94cb993f2ee99f3a9318e7b4dbb383497c4bedea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 27 Apr 2009 09:46:44 -0400
Subject: ocfs2: remove ->write_super and stop maintaining ->s_dirt

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 5c6163f..3eb076c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -126,7 +126,6 @@ static int ocfs2_get_sector(struct super_block *sb,
 			    struct buffer_head **bh,
 			    int block,
 			    int sect_size);
-static void ocfs2_write_super(struct super_block *sb);
 static struct inode *ocfs2_alloc_inode(struct super_block *sb);
 static void ocfs2_destroy_inode(struct inode *inode);
 static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
@@ -141,7 +140,6 @@ static const struct super_operations ocfs2_sops = {
 	.clear_inode	= ocfs2_clear_inode,
 	.delete_inode	= ocfs2_delete_inode,
 	.sync_fs	= ocfs2_sync_fs,
-	.write_super	= ocfs2_write_super,
 	.put_super	= ocfs2_put_super,
 	.remount_fs	= ocfs2_remount,
 	.show_options   = ocfs2_show_options,
@@ -365,24 +363,12 @@ static struct file_operations ocfs2_osb_debug_fops = {
 	.llseek =	generic_file_llseek,
 };
 
-/*
- * write_super and sync_fs ripped right out of ext3.
- */
-static void ocfs2_write_super(struct super_block *sb)
-{
-	if (mutex_trylock(&sb->s_lock) != 0)
-		BUG();
-	sb->s_dirt = 0;
-}
-
 static int ocfs2_sync_fs(struct super_block *sb, int wait)
 {
 	int status;
 	tid_t target;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
-	sb->s_dirt = 0;
-
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 
-- 
cgit v0.10.2


From 517bfae28353e996160518add4d00033d3886e61 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 27 Apr 2009 09:46:45 -0400
Subject: qnx4: remove ->write_super

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fe1f0f3..95c12fc 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -70,14 +70,6 @@ static void qnx4_delete_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static void qnx4_write_super(struct super_block *sb)
-{
-	lock_kernel();
-	QNX4DEBUG(("qnx4: write_super\n"));
-	sb->s_dirt = 0;
-	unlock_kernel();
-}
-
 static int qnx4_write_inode(struct inode *inode, int unused)
 {
 	struct qnx4_inode_entry *raw_inode;
@@ -138,7 +130,6 @@ static const struct super_operations qnx4_sops =
 #ifdef CONFIG_QNX4FS_RW
 	.write_inode	= qnx4_write_inode,
 	.delete_inode	= qnx4_delete_inode,
-	.write_super	= qnx4_write_super,
 #endif
 };
 
-- 
cgit v0.10.2


From 8c85e125124a473d6f3e9bb187b0b84207f81d91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Apr 2009 18:00:26 +0200
Subject: remove ->write_super call in generic_shutdown_super

We just did a full fs writeout using sync_filesystem before, and if
that's not enough for the filesystem it can perform it's own writeout
in ->put_super, which many filesystems already do.

Move a call to foofs_write_super into every foofs_put_super for now to
guarantee identical behaviour until it's cleaned up by the individual
filesystem maintainers.

Exceptions:

 - affs already has identical copy & pasted code at the beginning of
   affs_put_super so no need to do it twice.
 - xfs does the right thing without it and I have changes pending for
   the xfs tree touching this are so I don't really need conflicts
   here..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index cc4062d..4cf3d26 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,6 +30,7 @@ MODULE_LICENSE("GPL");
 #define dprintf(x...)
 #endif
 
+static void bfs_write_super(struct super_block *s);
 void dump_imap(const char *prefix, struct super_block *s);
 
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -216,6 +217,9 @@ static void bfs_put_super(struct super_block *s)
 	if (!info)
 		return;
 
+	if (s->s_dirt)
+		bfs_write_super(s);
+
 	brelse(info->si_sbh);
 	mutex_destroy(&info->bfs_lock);
 	kfree(info->si_imap);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9f1985e..3cdb761 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -258,6 +258,9 @@ static void exofs_put_super(struct super_block *sb)
 	int num_pend;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
 
+	if (sb->s_dirt)
+		exofs_write_super(sb);
+
 	/* make sure there are no pending commands */
 	for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
 	     num_pend = atomic_read(&sbi->s_curr_pending)) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e3c748f..932a2bc 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -114,6 +114,9 @@ static void ext2_put_super (struct super_block * sb)
 	int i;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 
+	if (sb->s_dirt)
+		ext2_write_super(sb);
+
 	ext2_xattr_put_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		struct ext2_super_block *es = sbi->s_es;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f016707..c7b8f8d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -576,6 +576,9 @@ static void ext4_put_super(struct super_block *sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	if (sb->s_dirt)
+		ext4_write_super(sb);
+
 	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 296785a..4978621 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -451,6 +451,9 @@ static void fat_put_super(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
+	if (sb->s_dirt)
+		fat_write_super(sb);
+
 	if (sbi->nls_disk) {
 		unload_nls(sbi->nls_disk);
 		sbi->nls_disk = NULL;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index a36bb74..e071e6d 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -65,6 +65,8 @@ static void hfs_write_super(struct super_block *sb)
  */
 static void hfs_put_super(struct super_block *sb)
 {
+	if (sb->s_dirt)
+		hfs_write_super(sb);
 	hfs_mdb_close(sb);
 	/* release the MDB's resources */
 	hfs_mdb_put(sb);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index f2a6402..40bdab7 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -199,6 +199,8 @@ static void hfsplus_put_super(struct super_block *sb)
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
 	if (!sb->s_fs_info)
 		return;
+	if (sb->s_dirt)
+		hfsplus_write_super(sb);
 	if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
 		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 4c4e18c..5059e96 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -174,6 +174,9 @@ static void jffs2_put_super (struct super_block *sb)
 
 	D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
 
+	if (sb->s_dirt)
+		jffs2_write_super(sb);
+
 	mutex_lock(&c->alloc_sem);
 	jffs2_flush_wbuf_pad(c);
 	mutex_unlock(&c->alloc_sem);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6989b03..7901d8c 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -65,6 +65,7 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
 		   "(NILFS)");
 MODULE_LICENSE("GPL");
 
+static void nilfs_write_super(struct super_block *sb);
 static int nilfs_remount(struct super_block *sb, int *flags, char *data);
 static int test_exclusive_mount(struct file_system_type *fs_type,
 				struct block_device *bdev, int flags);
@@ -315,6 +316,9 @@ static void nilfs_put_super(struct super_block *sb)
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
 	struct the_nilfs *nilfs = sbi->s_nilfs;
 
+	if (sb->s_dirt)
+		nilfs_write_super(sb);
+
 	nilfs_detach_segment_constructor(sbi);
 
 	if (!(sb->s_flags & MS_RDONLY)) {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9dbdcfb..1b52daa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -468,6 +468,9 @@ static void reiserfs_put_super(struct super_block *s)
 	struct reiserfs_transaction_handle th;
 	th.t_trans_id = 0;
 
+	if (s->s_dirt)
+		reiserfs_write_super(s);
+
 	/* change file system state to current state if it was mounted with read-write permissions */
 	if (!(s->s_flags & MS_RDONLY)) {
 		if (!journal_begin(&th, s, 10)) {
diff --git a/fs/super.c b/fs/super.c
index 721236e..d9a29d5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -311,8 +311,6 @@ void generic_shutdown_super(struct super_block *sb)
 		invalidate_inodes(sb);
 		lock_kernel();
 
-		if (sop->write_super && sb->s_dirt)
-			sop->write_super(sb);
 		if (sop->put_super)
 			sop->put_super(sb);
 
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index da20b48..cd80316 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -72,6 +72,9 @@ static void sysv_put_super(struct super_block *sb)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
+	if (sb->s_dirt)
+		sysv_write_super(sb);
+
 	if (!(sb->s_flags & MS_RDONLY)) {
 		/* XXX ext2 also updates the state here */
 		mark_buffer_dirty(sbi->s_bh1);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6035929..74afb9f 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1152,6 +1152,9 @@ static void ufs_put_super(struct super_block *sb)
 		
 	UFSD("ENTER\n");
 
+	if (sb->s_dirt)
+		ufs_write_super(sb);
+
 	if (!(sb->s_flags & MS_RDONLY))
 		ufs_put_super_internal(sb);
 	
-- 
cgit v0.10.2


From f3da392e9ff14b9f388e74319e6d195848991c07 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 4 May 2009 03:32:03 +0400
Subject: dcache: extrace and use d_unlinked()

d_unlinked() will be used in middle-term to ban checkpointing when opened
but unlinked file is detected, and in long term, to detect such situation
and special case on it.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 75659a6..9e5cd3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root,
 
 	spin_lock(&vfsmount_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, " (deleted)", 10) != 0))
 			goto Elong;
 
@@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 
 	spin_lock(&dcache_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, "//deleted", 9) != 0))
 			goto Elong;
 	if (buflen < 1)
@@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
-	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
-	if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
+	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		struct path tmp = root;
 		char * cwd;
diff --git a/fs/namespace.c b/fs/namespace.c
index 120b8a6..7e537f0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1384,7 +1384,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 		goto out_unlock;
 
 	err = -ENOENT;
-	if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
+	if (!d_unlinked(path->dentry))
 		err = attach_recursive_mnt(mnt, path, NULL);
 out_unlock:
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
@@ -1566,7 +1566,7 @@ static int do_move_mount(struct path *path, char *old_name)
 	if (IS_DEADDIR(path->dentry->d_inode))
 		goto out1;
 
-	if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
+	if (d_unlinked(path->dentry))
 		goto out1;
 
 	err = -EINVAL;
@@ -2129,9 +2129,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	error = -ENOENT;
 	if (IS_DEADDIR(new.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
+	if (d_unlinked(new.dentry))
 		goto out2;
-	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
+	if (d_unlinked(old.dentry))
 		goto out2;
 	error = -EBUSY;
 	if (new.mnt == root.mnt ||
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 72ce2ae..30b93b2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -353,6 +353,11 @@ static inline int d_unhashed(struct dentry *dentry)
 	return (dentry->d_flags & DCACHE_UNHASHED);
 }
 
+static inline int d_unlinked(struct dentry *dentry)
+{
+	return d_unhashed(dentry) && !IS_ROOT(dentry);
+}
+
 static inline struct dentry *dget_parent(struct dentry *dentry)
 {
 	struct dentry *ret;
-- 
cgit v0.10.2


From e5004753388dcf5e1b8a52ac0ab807d232340fbb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 May 2009 16:08:56 +0200
Subject: cleanup sync_supers

Merge the write_super helper into sync_super and move the check for
->write_super earlier so that we can avoid grabbing a reference to
a superblock that doesn't have it.

While we're at it also add a little comment documenting sync_supers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/super.c b/fs/super.c
index d9a29d5..cb19fff 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -399,16 +399,14 @@ void drop_super(struct super_block *sb)
 
 EXPORT_SYMBOL(drop_super);
 
-static inline void write_super(struct super_block *sb)
-{
-	lock_super(sb);
-	if (sb->s_root && sb->s_dirt)
-		if (sb->s_op->write_super)
-			sb->s_op->write_super(sb);
-	unlock_super(sb);
-}
-
-/*
+/**
+ * sync_supers - helper for periodic superblock writeback
+ *
+ * Call the write_super method if present on all dirty superblocks in
+ * the system.  This is for the periodic writeback used by most older
+ * filesystems.  For data integrity superblock writeback use
+ * sync_filesystems() instead.
+ *
  * Note: check the dirty flag before waiting, so we don't
  * hold up the sync while mounting a device. (The newly
  * mounted device won't need syncing.)
@@ -420,12 +418,17 @@ void sync_supers(void)
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_dirt) {
+		if (sb->s_op->write_super && sb->s_dirt) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
+
 			down_read(&sb->s_umount);
-			write_super(sb);
+			lock_super(sb);
+			if (sb->s_root && sb->s_dirt)
+				sb->s_op->write_super(sb);
+			unlock_super(sb);
 			up_read(&sb->s_umount);
+
 			spin_lock(&sb_lock);
 			if (__put_super_and_need_restart(sb))
 				goto restart;
-- 
cgit v0.10.2


From 5af7926ff33b68b3ba46531471c6e0564b285efc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 May 2009 15:41:25 +0200
Subject: enforce ->sync_fs is only called for rw superblock

Make sure a superblock really is writeable by checking MS_RDONLY
under s_umount.  sync_filesystems needed some re-arragement for
that, but all but one sync_filesystem caller had the correct locking
already so that we could add that check there.  cachefiles grew
s_umount locking.

I've also added a WARN_ON to sync_filesystem to assert this for
future callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 52d8452..9f179d4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -394,9 +394,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 	struct btrfs_root *root = btrfs_sb(sb);
 	int ret;
 
-	if (sb->s_flags & MS_RDONLY)
-		return 0;
-
 	if (!wait) {
 		filemap_flush(root->fs_info->btree_inode->i_mapping);
 		return 0;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index dafd484..431accd 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -354,7 +354,9 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
 	/* make sure all pages pinned by operations on behalf of the netfs are
 	 * written to disc */
 	cachefiles_begin_secure(cache, &saved_cred);
+	down_read(&cache->mnt->mnt_sb->s_umount);
 	ret = sync_filesystem(cache->mnt->mnt_sb);
+	up_read(&cache->mnt->mnt_sb->s_umount);
 	cachefiles_end_secure(cache, saved_cred);
 
 	if (ret == -EIO)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1b52daa..3da0401 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -64,18 +64,15 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
 
 static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
-	if (!(s->s_flags & MS_RDONLY)) {
-		struct reiserfs_transaction_handle th;
-		reiserfs_write_lock(s);
-		if (!journal_begin(&th, s, 1))
-			if (!journal_end_sync(&th, s, 1))
-				reiserfs_flush_old_commits(s);
-		s->s_dirt = 0;	/* Even if it's not true.
-				 * We'll loop forever in sync_supers otherwise */
-		reiserfs_write_unlock(s);
-	} else {
-		s->s_dirt = 0;
-	}
+	struct reiserfs_transaction_handle th;
+
+	reiserfs_write_lock(s);
+	if (!journal_begin(&th, s, 1))
+		if (!journal_end_sync(&th, s, 1))
+			reiserfs_flush_old_commits(s);
+	s->s_dirt = 0;	/* Even if it's not true.
+			 * We'll loop forever in sync_supers otherwise */
+	reiserfs_write_unlock(s);
 	return 0;
 }
 
diff --git a/fs/sync.c b/fs/sync.c
index 4487b55..89c37f7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -51,6 +51,18 @@ int sync_filesystem(struct super_block *sb)
 {
 	int ret;
 
+	/*
+	 * We need to be protected against the filesystem going from
+	 * r/o to r/w or vice versa.
+	 */
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+	/*
+	 * No point in syncing out anything if the filesystem is read-only.
+	 */
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
 	ret = __sync_filesystem(sb, 0);
 	if (ret < 0)
 		return ret;
@@ -79,25 +91,22 @@ static void sync_filesystems(int wait)
 
 	mutex_lock(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_flags & MS_RDONLY)
-			continue;
+	list_for_each_entry(sb, &super_blocks, s_list)
 		sb->s_need_sync = 1;
-	}
 
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_need_sync)
 			continue;
 		sb->s_need_sync = 0;
-		if (sb->s_flags & MS_RDONLY)
-			continue;	/* hm.  Was remounted r/o meanwhile */
 		sb->s_count++;
 		spin_unlock(&sb_lock);
+
 		down_read(&sb->s_umount);
-		if (sb->s_root)
+		if (!(sb->s_flags & MS_RDONLY) && sb->s_root)
 			__sync_filesystem(sb, wait);
 		up_read(&sb->s_umount);
+
 		/* restart only when sb is no longer on the list */
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e9f7a75..84f3c7f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -447,9 +447,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 	if (!wait)
 		return 0;
 
-	if (sb->s_flags & MS_RDONLY)
-		return 0;
-
 	/*
 	 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
 	 * pages, so synchronize them first, then commit the journal. Strictly
-- 
cgit v0.10.2


From 443b94baaa16771e98b29ca7c24f1e305738ffca Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 5 May 2009 23:48:50 -0400
Subject: Make sure that all callers of remount hold s_umount exclusive

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/super.c b/fs/super.c
index cb19fff..49f670c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -579,7 +579,7 @@ static void do_emergency_remount(struct work_struct *work)
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		sb->s_count++;
 		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
+		down_write(&sb->s_umount);
 		if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) {
 			/*
 			 * ->remount_fs needs lock_kernel().
@@ -590,7 +590,8 @@ static void do_emergency_remount(struct work_struct *work)
 			do_remount_sb(sb, MS_RDONLY, NULL, 1);
 			unlock_kernel();
 		}
-		drop_super(sb);
+		up_write(&sb->s_umount);
+		put_super(sb);
 		spin_lock(&sb_lock);
 	}
 	spin_unlock(&sb_lock);
-- 
cgit v0.10.2


From 62c6943b4b1e818aea60c11c5a68a50785b83119 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 May 2009 03:12:29 -0400
Subject: Trim a bit of crap from fs.h

do_remount_sb() is fs/internal.h fodder, fsync_no_super() is long gone.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/internal.h b/fs/internal.h
index dbec3cc..d55ef56 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -78,3 +78,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
+
+/*
+ * super.c
+ */
+extern int do_remount_sb(struct super_block *, int, void *, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb1822b..e7833ef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-extern int fsync_no_super(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@ -2079,8 +2078,6 @@ extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
-extern int do_remount_sb(struct super_block *sb, int flags,
-			 void *data, int force);
 #ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
 #endif
-- 
cgit v0.10.2


From a9e220f8322e2b0e0b8903fe00265461cffad3f0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 5 May 2009 22:10:44 -0400
Subject: No need to do lock_super() for exclusion in generic_shutdown_super()

We can't run into contention on it.  All other callers of lock_super()
either hold s_umount (and we have it exclusive) or hold an active
reference to superblock in question, which prevents the call of
generic_shutdown_super() while the reference is held.  So we can
replace lock_super(s) with get_fs_excl() in generic_shutdown_super()
(and corresponding change for unlock_super(), of course).

Since ext4 expects s_lock held for its put_super, take lock_super()
into it.  The rest of filesystems do not care at all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c7b8f8d..0d3034c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -576,6 +576,7 @@ static void ext4_put_super(struct super_block *sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	lock_super(sb);
 	if (sb->s_dirt)
 		ext4_write_super(sb);
 
@@ -645,7 +646,6 @@ static void ext4_put_super(struct super_block *sb)
 	unlock_super(sb);
 	kobject_put(&sbi->s_kobj);
 	wait_for_completion(&sbi->s_kobj_unregister);
-	lock_super(sb);
 	lock_kernel();
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
diff --git a/fs/super.c b/fs/super.c
index 49f670c..54fd331 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -304,7 +304,7 @@ void generic_shutdown_super(struct super_block *sb)
 	if (sb->s_root) {
 		shrink_dcache_for_umount(sb);
 		sync_filesystem(sb);
-		lock_super(sb);
+		get_fs_excl();
 		sb->s_flags &= ~MS_ACTIVE;
 
 		/* bad name - it should be evict_inodes() */
@@ -322,7 +322,7 @@ void generic_shutdown_super(struct super_block *sb)
 		}
 
 		unlock_kernel();
-		unlock_super(sb);
+		put_fs_excl();
 	}
 	spin_lock(&sb_lock);
 	/* should be initialized for __put_super_and_need_restart() */
-- 
cgit v0.10.2


From 6cfd0148425e528b859b26e436b01f23f6926224 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 May 2009 15:40:36 +0200
Subject: push BKL down into ->put_super

Move BKL into ->put_super from the only caller.  A couple of
filesystems had trivial enough ->put_super (only kfree and NULLing of
s_fs_info + stuff in there) to not get any locking: coda, cramfs, efs,
hugetlbfs, omfs, qnx4, shmem, all others got the full treatment.  Most
of them probably don't need it, but I'd rather sort that out individually.
Preferably after all the other BKL pushdowns in that area.

[AV: original used to move lock_super() down as well; these changes are
removed since we don't do lock_super() at all in generic_shutdown_super()
now]
[AV: fuse, btrfs and xfs are known to need no damn BKL, exempt]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index dd9becc..0ec5aaf 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb)
 	int i;
 	struct adfs_sb_info *asb = ADFS_SB(sb);
 
+	lock_kernel();
+
 	for (i = 0; i < asb->s_map_size; i++)
 		brelse(asb->s_map[i].dm_bh);
 	kfree(asb->s_map);
 	kfree(asb);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 63f5183..d738646 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -29,6 +29,8 @@ affs_put_super(struct super_block *sb)
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	pr_debug("AFFS: put_super()\n");
 
+	lock_kernel();
+
 	if (!(sb->s_flags & MS_RDONLY)) {
 		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1);
 		secs_to_datestamp(get_seconds(),
@@ -42,7 +44,8 @@ affs_put_super(struct super_block *sb)
 	affs_brelse(sbi->s_root_bh);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
-	return;
+
+	unlock_kernel();
 }
 
 static void
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 76828e5..ad0514d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb)
 
 	_enter("");
 
+	lock_kernel();
+
 	afs_put_volume(as->volume);
 
+	unlock_kernel();
+
 	_leave("");
 }
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 76afd0d..9367b62 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts)
 static void
 befs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
 	kfree(BEFS_SB(sb)->mount_opts.iocharset);
 	BEFS_SB(sb)->mount_opts.iocharset = NULL;
 
@@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb)
 
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
-	return;
+
+	unlock_kernel();
 }
 
 /* Allocate private field of the superblock, fill it.
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 4cf3d26..3a9a136 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -217,6 +217,8 @@ static void bfs_put_super(struct super_block *s)
 	if (!info)
 		return;
 
+	lock_kernel();
+
 	if (s->s_dirt)
 		bfs_write_super(s);
 
@@ -225,6 +227,8 @@ static void bfs_put_super(struct super_block *s)
 	kfree(info->si_imap);
 	kfree(info);
 	s->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0a10a59..0d92114 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb)
 		cFYI(1, ("Empty cifs superblock info passed to unmount"));
 		return;
 	}
+
+	lock_kernel();
+
 	rc = cifs_umount(sb, cifs_sb);
 	if (rc)
 		cERROR(1, ("cifs_umount failed with return code %d", rc));
@@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb)
 
 	unload_nls(cifs_sb->local_nls);
 	kfree(cifs_sb);
-	return;
+
+	unlock_kernel();
 }
 
 static int
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index fa4c7e7..12d6496 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/key.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
@@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb)
 {
 	struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
 
+	lock_kernel();
+
 	ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
 	kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
 	ecryptfs_set_superblock_private(sb, NULL);
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 3cdb761..cd1f8b1 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -258,6 +258,8 @@ static void exofs_put_super(struct super_block *sb)
 	int num_pend;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		exofs_write_super(sb);
 
@@ -274,6 +276,8 @@ static void exofs_put_super(struct super_block *sb)
 	osduld_put_device(sbi->s_dev);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 /*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 932a2bc..a44963d 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -114,6 +114,8 @@ static void ext2_put_super (struct super_block * sb)
 	int i;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		ext2_write_super(sb);
 
@@ -138,7 +140,7 @@ static void ext2_put_super (struct super_block * sb)
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache * ext2_inode_cachep;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 1efd958..546b8d7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -398,6 +398,8 @@ static void ext3_put_super (struct super_block * sb)
 	struct ext3_super_block *es = sbi->s_es;
 	int i, err;
 
+	lock_kernel();
+
 	ext3_xattr_put_super(sb);
 	err = journal_destroy(sbi->s_journal);
 	sbi->s_journal = NULL;
@@ -446,7 +448,8 @@ static void ext3_put_super (struct super_block * sb)
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-	return;
+
+	unlock_kernel();
 }
 
 static struct kmem_cache *ext3_inode_cachep;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0d3034c..1d4180b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -577,6 +577,7 @@ static void ext4_put_super(struct super_block *sb)
 	int i, err;
 
 	lock_super(sb);
+	lock_kernel();
 	if (sb->s_dirt)
 		ext4_write_super(sb);
 
@@ -646,7 +647,6 @@ static void ext4_put_super(struct super_block *sb)
 	unlock_super(sb);
 	kobject_put(&sbi->s_kobj);
 	wait_for_completion(&sbi->s_kobj_unregister);
-	lock_kernel();
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 4978621..2b88c93 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -451,6 +451,8 @@ static void fat_put_super(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		fat_write_super(sb);
 
@@ -470,6 +472,8 @@ static void fat_put_super(struct super_block *sb)
 
 	sb->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 static struct kmem_cache *fat_inode_cachep;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 1dacda8..cdbd165 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp)
 {
 	struct vxfs_sb_info	*infp = VXFS_SBI(sbp);
 
+	lock_kernel();
+
 	vxfs_put_fake_inode(infp->vsi_fship);
 	vxfs_put_fake_inode(infp->vsi_ilist);
 	vxfs_put_fake_inode(infp->vsi_stilist);
 
 	brelse(infp->vsi_bp);
 	kfree(infp);
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0a68013..c8930b3 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -719,6 +719,8 @@ static void gfs2_put_super(struct super_block *sb)
 	int error;
 	struct gfs2_jdesc *jd;
 
+	lock_kernel();
+
 	/*  Unfreeze the filesystem, if we need to  */
 
 	mutex_lock(&sdp->sd_freeze_lock);
@@ -785,6 +787,8 @@ restart:
 
 	/*  At this point, we're through participating in the lockspace  */
 	gfs2_sys_fs_del(sdp);
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index e071e6d..9f5eaa0 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -65,11 +65,15 @@ static void hfs_write_super(struct super_block *sb)
  */
 static void hfs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
 	if (sb->s_dirt)
 		hfs_write_super(sb);
 	hfs_mdb_close(sb);
 	/* release the MDB's resources */
 	hfs_mdb_put(sb);
+
+	unlock_kernel();
 }
 
 /*
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 40bdab7..9b292dc 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -199,6 +199,9 @@ static void hfsplus_put_super(struct super_block *sb)
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
 	if (!sb->s_fs_info)
 		return;
+
+	lock_kernel();
+
 	if (sb->s_dirt)
 		hfsplus_write_super(sb);
 	if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
@@ -220,6 +223,8 @@ static void hfsplus_put_super(struct super_block *sb)
 		unload_nls(HFSPLUS_SB(sb).nls);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index fc77965..437a32e 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -99,11 +99,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
 static void hpfs_put_super(struct super_block *s)
 {
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
+
+	lock_kernel();
+
 	kfree(sbi->sb_cp_table);
 	kfree(sbi->sb_bmp_dir);
 	unmark_dirty(s);
 	s->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index b4cbe96..068b34b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst
 static void isofs_put_super(struct super_block *sb)
 {
 	struct isofs_sb_info *sbi = ISOFS_SB(sb);
+
 #ifdef CONFIG_JOLIET
+	lock_kernel();
+
 	if (sbi->s_nls_iocharset) {
 		unload_nls(sbi->s_nls_iocharset);
 		sbi->s_nls_iocharset = NULL;
 	}
+
+	unlock_kernel();
 #endif
 
 	kfree(sbi);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 5059e96..37b1212 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -174,6 +174,8 @@ static void jffs2_put_super (struct super_block *sb)
 
 	D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		jffs2_write_super(sb);
 
@@ -195,6 +197,8 @@ static void jffs2_put_super (struct super_block *sb)
 	if (c->mtd->sync)
 		c->mtd->sync(c->mtd);
 
+	unlock_kernel();
+
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index d9b0e92..3eb13ad 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -183,6 +183,9 @@ static void jfs_put_super(struct super_block *sb)
 	int rc;
 
 	jfs_info("In jfs_put_super");
+
+	lock_kernel();
+
 	rc = jfs_umount(sb);
 	if (rc)
 		jfs_err("jfs_umount failed with return code %d", rc);
@@ -195,6 +198,8 @@ static void jfs_put_super(struct super_block *sb)
 	sbi->direct_inode = NULL;
 
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 enum {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index daad3c2..7eb5397 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb)
 	int i;
 	struct minix_sb_info *sbi = minix_sb(sb);
 
+	lock_kernel();
+
 	if (!(sb->s_flags & MS_RDONLY)) {
 		if (sbi->s_version != MINIX_V3)	 /* s_state is now out from V3 sb */
 			sbi->s_ms->s_state = sbi->s_mount_state;
@@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache * minix_inode_cachep;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d642f0e..b99ce20 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb)
 {
 	struct ncp_server *server = NCP_SBP(sb);
 
+	lock_kernel();
+
 	ncp_lock_server(server);
 	ncp_disconnect(server);
 	ncp_unlock_server(server);
@@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb)
 	vfree(server->packet);
 	sb->s_fs_info = NULL;
 	kfree(server);
+
+	unlock_kernel();
 }
 
 static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 7901d8c..7262e84 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -316,6 +316,8 @@ static void nilfs_put_super(struct super_block *sb)
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
 	struct the_nilfs *nilfs = sbi->s_nilfs;
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		nilfs_write_super(sb);
 
@@ -333,6 +335,8 @@ static void nilfs_put_super(struct super_block *sb)
 	sbi->s_super = NULL;
 	sb->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6aa7c47..a9ec4e1 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2246,6 +2246,9 @@ static void ntfs_put_super(struct super_block *sb)
 	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering.");
+
+	lock_kernel();
+
 #ifdef NTFS_RW
 	/*
 	 * Commit all inodes while they are still open in case some of them
@@ -2444,7 +2447,8 @@ static void ntfs_put_super(struct super_block *sb)
 	}
 	sb->s_fs_info = NULL;
 	kfree(vol);
-	return;
+
+	unlock_kernel();
 }
 
 /**
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 3eb076c..0273759 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1536,9 +1536,13 @@ static void ocfs2_put_super(struct super_block *sb)
 {
 	mlog_entry("(0x%p)\n", sb);
 
+	lock_kernel();
+
 	ocfs2_sync_blockdev(sb);
 	ocfs2_dismount_volume(sb, 0);
 
+	unlock_kernel();
+
 	mlog_exit_void();
 }
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3da0401..90dcb7b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,6 +465,8 @@ static void reiserfs_put_super(struct super_block *s)
 	struct reiserfs_transaction_handle th;
 	th.t_trans_id = 0;
 
+	lock_kernel();
+
 	if (s->s_dirt)
 		reiserfs_write_super(s);
 
@@ -500,7 +502,7 @@ static void reiserfs_put_super(struct super_block *s)
 	kfree(s->s_fs_info);
 	s->s_fs_info = NULL;
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache *reiserfs_inode_cachep;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index fc27fbf..1402d2d 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -474,6 +474,8 @@ smb_put_super(struct super_block *sb)
 {
 	struct smb_sb_info *server = SMB_SB(sb);
 
+	lock_kernel();
+
 	smb_lock_server(server);
 	server->state = CONN_INVALID;
 	smbiod_unregister_server(server);
@@ -489,6 +491,8 @@ smb_put_super(struct super_block *sb)
 	smb_unlock_server(server);
 	put_pid(server->conn_pid);
 	kfree(server);
+
+	unlock_kernel();
 }
 
 static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 0adc624..3b52770 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -338,6 +338,8 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data)
 
 static void squashfs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
 	if (sb->s_fs_info) {
 		struct squashfs_sb_info *sbi = sb->s_fs_info;
 		squashfs_cache_delete(sbi->block_cache);
@@ -350,6 +352,8 @@ static void squashfs_put_super(struct super_block *sb)
 		kfree(sb->s_fs_info);
 		sb->s_fs_info = NULL;
 	}
+
+	unlock_kernel();
 }
 
 
diff --git a/fs/super.c b/fs/super.c
index 54fd331..bdd7158 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -309,7 +309,6 @@ void generic_shutdown_super(struct super_block *sb)
 
 		/* bad name - it should be evict_inodes() */
 		invalidate_inodes(sb);
-		lock_kernel();
 
 		if (sop->put_super)
 			sop->put_super(sb);
@@ -320,8 +319,6 @@ void generic_shutdown_super(struct super_block *sb)
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
 		}
-
-		unlock_kernel();
 		put_fs_excl();
 	}
 	spin_lock(&sb_lock);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index cd80316..a818986 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -72,6 +72,8 @@ static void sysv_put_super(struct super_block *sb)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
+	lock_kernel();
+
 	if (sb->s_dirt)
 		sysv_write_super(sb);
 
@@ -87,6 +89,8 @@ static void sysv_put_super(struct super_block *sb)
 		brelse(sbi->s_bh2);
 
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 84f3c7f..522c3fd 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1684,6 +1684,9 @@ static void ubifs_put_super(struct super_block *sb)
 
 	ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
 		  c->vi.vol_id);
+
+	lock_kernel();
+
 	/*
 	 * The following asserts are only valid if there has not been a failure
 	 * of the media. For example, there will be dirty inodes if we failed
@@ -1750,6 +1753,8 @@ static void ubifs_put_super(struct super_block *sb)
 	ubi_close_volume(c->ubi);
 	mutex_unlock(&c->umount_mutex);
 	kfree(c);
+
+	unlock_kernel();
 }
 
 static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0ba4410..04802cc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -2062,6 +2062,9 @@ static void udf_put_super(struct super_block *sb)
 	struct udf_sb_info *sbi;
 
 	sbi = UDF_SB(sb);
+
+	lock_kernel();
+
 	if (sbi->s_vat_inode)
 		iput(sbi->s_vat_inode);
 	if (sbi->s_partitions)
@@ -2077,6 +2080,8 @@ static void udf_put_super(struct super_block *sb)
 	kfree(sbi->s_partmaps);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int udf_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 74afb9f..2b4d2b6 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -594,6 +594,9 @@ static void ufs_put_super_internal(struct super_block *sb)
 
 	
 	UFSD("ENTER\n");
+
+	lock_kernel();
+
 	ufs_put_cstotal(sb);
 	size = uspi->s_cssize;
 	blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -621,6 +624,9 @@ static void ufs_put_super_internal(struct super_block *sb)
 		brelse (sbi->s_ucg[i]);
 	kfree (sbi->s_ucg);
 	kfree (base);
+
+	unlock_kernel();
+
 	UFSD("EXIT\n");
 }
 
-- 
cgit v0.10.2


From bbd6851a3213a525128473e978b692ab6ac11aba Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 May 2009 10:43:07 -0400
Subject: Push lock_super() into the ->remount_fs() of filesystems that care
 about it

Note that since we can't run into contention between remount_fs and write_super
(due to exclusion on s_umount), we have to care only about filesystems that
touch lock_super() on their own.  Out of those ext3, ext4, hpfs, sysv and ufs
do need it; fat doesn't since its ->remount_fs() only accesses assign-once
data (basically, it's "we have no atime on directories and only have atime on
files for vfat; force nodiratime and possibly noatime into *flags").

[folded a build fix from hch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 546b8d7..e213a26 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2491,6 +2491,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 #endif
 
 	/* Store the original options */
+	lock_super(sb);
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
 	old_opts.s_resuid = sbi->s_resuid;
@@ -2598,6 +2599,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
 			kfree(old_opts.s_qf_names[i]);
 #endif
+	unlock_super(sb);
 	return 0;
 restore_opts:
 	sb->s_flags = old_sb_flags;
@@ -2614,6 +2616,7 @@ restore_opts:
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
 #endif
+	unlock_super(sb);
 	return err;
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1d4180b..a9c6834 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3421,6 +3421,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #endif
 
 	/* Store the original options */
+	lock_super(sb);
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
 	old_opts.s_resuid = sbi->s_resuid;
@@ -3554,6 +3555,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
 			kfree(old_opts.s_qf_names[i]);
 #endif
+	unlock_super(sb);
 	return 0;
 
 restore_opts:
@@ -3573,6 +3575,7 @@ restore_opts:
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
 #endif
+	unlock_super(sb);
 	return err;
 }
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 437a32e..f68193c 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -398,6 +398,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	
 	*flags |= MS_NOATIME;
 	
+	lock_super(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
 	lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
@@ -430,9 +431,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 
 	replace_mount_options(s, new_opts);
 
+	unlock_super(s);
 	return 0;
 
 out_err:
+	unlock_super(s);
 	kfree(new_opts);
 	return -EINVAL;
 }
diff --git a/fs/super.c b/fs/super.c
index bdd7158..2a49fed 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -556,9 +556,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
 
 	if (sb->s_op->remount_fs) {
-		lock_super(sb);
 		retval = sb->s_op->remount_fs(sb, &flags, data);
-		unlock_super(sb);
 		if (retval)
 			return retval;
 	}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index a818986..e0a39f1 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -61,10 +61,12 @@ clean:
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
+	lock_super(sb);
 	if (sbi->s_forced_ro)
 		*flags |= MS_RDONLY;
 	if (!(*flags & MS_RDONLY))
 		sb->s_dirt = 1;
+	unlock_super(sb);
 	return 0;
 }
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2b4d2b6..a5ecabf 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1181,6 +1181,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	unsigned new_mount_opt, ufstype;
 	unsigned flags;
 	
+	lock_super(sb);
 	uspi = UFS_SB(sb)->s_uspi;
 	flags = UFS_SB(sb)->s_flags;
 	usb1 = ubh_get_usb_first(uspi);
@@ -1193,17 +1194,21 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE;
 	new_mount_opt = 0;
 	ufs_set_opt (new_mount_opt, ONERROR_LOCK);
-	if (!ufs_parse_options (data, &new_mount_opt))
+	if (!ufs_parse_options (data, &new_mount_opt)) {
+		unlock_super(sb);
 		return -EINVAL;
+	}
 	if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
 		new_mount_opt |= ufstype;
 	} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
 		printk("ufstype can't be changed during remount\n");
+		unlock_super(sb);
 		return -EINVAL;
 	}
 
 	if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
 		UFS_SB(sb)->s_mount_opt = new_mount_opt;
+		unlock_super(sb);
 		return 0;
 	}
 	
@@ -1228,6 +1233,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 #ifndef CONFIG_UFS_FS_WRITE
 		printk("ufs was compiled with read-only support, "
 		"can't be mounted as read-write\n");
+		unlock_super(sb);
 		return -EINVAL;
 #else
 		if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 
@@ -1236,16 +1242,19 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 		    ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
 		    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
 			printk("this ufstype is read-only supported\n");
+			unlock_super(sb);
 			return -EINVAL;
 		}
 		if (!ufs_read_cylinder_structures(sb)) {
 			printk("failed during remounting\n");
+			unlock_super(sb);
 			return -EPERM;
 		}
 		sb->s_flags &= ~MS_RDONLY;
 #endif
 	}
 	UFS_SB(sb)->s_mount_opt = new_mount_opt;
+	unlock_super(sb);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 6fac98dd218653c6aff8a0f56305c424930cea2a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 May 2009 13:31:17 -0400
Subject: Push BKL into do_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 42ee059..9a3334a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -371,8 +371,6 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 	int retval = -EINVAL;
 	char *name;
 
-	lock_kernel();
-
 	name = getname(path);
 	retval = PTR_ERR(name);
 	if (IS_ERR(name))
@@ -392,7 +390,6 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 	}
 	putname(name);
  out:
-	unlock_kernel();
 	return retval;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index bb2a9b2..6aefb77 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -812,10 +812,8 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
 		}
 	}
 
-	lock_kernel();
 	retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
 			flags, (void*)data_page);
-	unlock_kernel();
 
  out4:
 	free_page(data_page);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7e537f0..4740f7b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1921,6 +1921,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 	if (retval)
 		goto dput_out;
 
+	lock_kernel();
 	if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
@@ -1933,6 +1934,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 	else
 		retval = do_new_mount(&path, type_page, flags, mnt_flags,
 				      dev_name, data_page);
+	unlock_kernel();
 dput_out:
 	path_put(&path);
 	return retval;
@@ -2046,10 +2048,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (retval < 0)
 		goto out3;
 
-	lock_kernel();
 	retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
 			  flags, (void *)data_page);
-	unlock_kernel();
 	free_page(data_page);
 
 out3:
-- 
cgit v0.10.2


From 7f78d4cd4c5d01864943c22b79df1b6bde923129 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 May 2009 13:34:06 -0400
Subject: Push BKL down beyond VFS-only parts of do_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index 4740f7b..b94325f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1515,8 +1515,11 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	down_write(&sb->s_umount);
 	if (flags & MS_BIND)
 		err = change_mount_flags(path->mnt, flags);
-	else
+	else {
+		lock_kernel();
 		err = do_remount_sb(sb, flags, data, 0);
+		unlock_kernel();
+	}
 	if (!err)
 		path->mnt->mnt_flags = mnt_flags;
 	up_write(&sb->s_umount);
@@ -1630,7 +1633,9 @@ static int do_new_mount(struct path *path, char *type, int flags,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	lock_kernel();
 	mnt = do_kern_mount(type, flags, name, data);
+	unlock_kernel();
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -1921,7 +1926,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 	if (retval)
 		goto dput_out;
 
-	lock_kernel();
 	if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
 				    data_page);
@@ -1934,7 +1938,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 	else
 		retval = do_new_mount(&path, type_page, flags, mnt_flags,
 				      dev_name, data_page);
-	unlock_kernel();
 dput_out:
 	path_put(&path);
 	return retval;
-- 
cgit v0.10.2


From 4aa98cf768b6f2ea4b204620d949a665959214f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 May 2009 13:36:58 -0400
Subject: Push BKL down into do_remount_sb()

[folded fix from Jiri Slaby]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index b94325f..2dd333b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1060,11 +1060,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
 		 * we just try to remount it readonly.
 		 */
 		down_write(&sb->s_umount);
-		if (!(sb->s_flags & MS_RDONLY)) {
-			lock_kernel();
+		if (!(sb->s_flags & MS_RDONLY))
 			retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
-			unlock_kernel();
-		}
 		up_write(&sb->s_umount);
 		return retval;
 	}
@@ -1515,11 +1512,8 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	down_write(&sb->s_umount);
 	if (flags & MS_BIND)
 		err = change_mount_flags(path->mnt, flags);
-	else {
-		lock_kernel();
+	else
 		err = do_remount_sb(sb, flags, data, 0);
-		unlock_kernel();
-	}
 	if (!err)
 		path->mnt->mnt_flags = mnt_flags;
 	up_write(&sb->s_umount);
diff --git a/fs/super.c b/fs/super.c
index 2a49fed..a64f362 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -542,25 +542,33 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	shrink_dcache_sb(sb);
 	sync_filesystem(sb);
 
+	lock_kernel();
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
 	if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
 		if (force)
 			mark_files_ro(sb);
-		else if (!fs_may_remount_ro(sb))
+		else if (!fs_may_remount_ro(sb)) {
+			unlock_kernel();
 			return -EBUSY;
+		}
 		retval = vfs_dq_off(sb, 1);
-		if (retval < 0 && retval != -ENOSYS)
+		if (retval < 0 && retval != -ENOSYS) {
+			unlock_kernel();
 			return -EBUSY;
+		}
 	}
 	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
 
 	if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
-		if (retval)
+		if (retval) {
+			unlock_kernel();
 			return retval;
+		}
 	}
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
+	unlock_kernel();
 	if (remount_rw)
 		vfs_dq_quota_on_remount(sb);
 	return 0;
@@ -581,9 +589,7 @@ static void do_emergency_remount(struct work_struct *work)
 			 *
 			 * What lock protects sb->s_flags??
 			 */
-			lock_kernel();
 			do_remount_sb(sb, MS_RDONLY, NULL, 1);
-			unlock_kernel();
 		}
 		up_write(&sb->s_umount);
 		put_super(sb);
-- 
cgit v0.10.2


From 01ba687577647beef6c5f2ea59bfb56fac9fcde2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 11 May 2009 23:34:27 +0200
Subject: jffs2: move jffs2_write_super to super.c

jffs2_write_super is only called from super.c and doesn't use any
functionality from fs.c.  So move it over to super.c and make it
static there.

[should go in through the vfs tree as it is a requirement for the
 next patch]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 249305d..237b27a 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -402,21 +402,6 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-void jffs2_write_super (struct super_block *sb)
-{
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
-	sb->s_dirt = 0;
-
-	if (sb->s_flags & MS_RDONLY)
-		return;
-
-	D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
-	jffs2_garbage_collect_trigger(c);
-	jffs2_erase_pending_blocks(c, 0);
-	jffs2_flush_wbuf_gc(c, 0);
-}
-
-
 /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
    fill in the raw_inode while you're at it. */
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri)
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 5e194a5..2228380 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -181,7 +181,6 @@ void jffs2_dirty_inode(struct inode *inode);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
 int jffs2_statfs (struct dentry *, struct kstatfs *);
-void jffs2_write_super (struct super_block *);
 int jffs2_remount_fs (struct super_block *, int *, char *);
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
 void jffs2_gc_release_inode(struct jffs2_sb_info *c,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 37b1212..a80a50e 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -53,6 +53,20 @@ static void jffs2_i_init_once(void *foo)
 	inode_init_once(&f->vfs_inode);
 }
 
+static void jffs2_write_super(struct super_block *sb)
+{
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+	sb->s_dirt = 0;
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
+	jffs2_garbage_collect_trigger(c);
+	jffs2_erase_pending_blocks(c, 0);
+	jffs2_flush_wbuf_gc(c, 0);
+}
+
 static int jffs2_sync_fs(struct super_block *sb, int wait)
 {
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
-- 
cgit v0.10.2


From ebc1ac164560a241d9bf1b7519062910c3f90a01 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 11 May 2009 23:35:03 +0200
Subject: ->write_super lock_super pushdown

Push down lock_super into ->write_super instances and remove it from the
caller.

Following filesystem don't need ->s_lock in ->write_super and are skipped:

 * bfs, nilfs2 - no other uses of s_lock and have internal locks in
	->write_super
 * ext2 - uses BKL in ext2_write_super and has internal calls without s_lock
 * reiserfs - no other uses of s_lock as has reiserfs_write_lock (BKL) in
 	->write_super
 * xfs - no other uses of s_lock and uses internal lock (buffer lock on
	superblock buffer) to serialize ->write_super.  Also xfs_fs_write_super
	is superflous and will go away in the next merge window

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/affs/super.c b/fs/affs/super.c
index d738646..280d361 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -54,6 +54,7 @@ affs_write_super(struct super_block *sb)
 	int clean = 2;
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 
+	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		//	if (sbi->s_bitmap[i].bm_bh) {
 		//		if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
@@ -66,6 +67,7 @@ affs_write_super(struct super_block *sb)
 		sb->s_dirt = !clean;	/* redo until bitmap synced */
 	} else
 		sb->s_dirt = 0;
+	unlock_super(sb);
 
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index cd1f8b1..49e16af 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -214,6 +214,7 @@ static void exofs_write_super(struct super_block *sb)
 		return;
 	}
 
+	lock_super(sb);
 	lock_kernel();
 	sbi = sb->s_fs_info;
 	fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -246,6 +247,7 @@ out:
 	if (or)
 		osd_end_request(or);
 	unlock_kernel();
+	unlock_super(sb);
 	kfree(fscb);
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9c6834..c17200a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -579,7 +579,7 @@ static void ext4_put_super(struct super_block *sb)
 	lock_super(sb);
 	lock_kernel();
 	if (sb->s_dirt)
-		ext4_write_super(sb);
+		ext4_commit_super(sb, 1);
 
 	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
@@ -3336,7 +3336,9 @@ int ext4_force_commit(struct super_block *sb)
 
 static void ext4_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	ext4_commit_super(sb, 1);
+	unlock_super(sb);
 }
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 2b88c93..2292cbf 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -441,10 +441,12 @@ static void fat_clear_inode(struct inode *inode)
 
 static void fat_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY))
 		fat_clusters_flush(sb);
+	unlock_super(sb);
 }
 
 static void fat_put_super(struct super_block *sb)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 9f5eaa0..3aac417 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -49,11 +49,13 @@ MODULE_LICENSE("GPL");
  */
 static void hfs_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	sb->s_dirt = 0;
-	if (sb->s_flags & MS_RDONLY)
-		return;
+
 	/* sync everything to the buffers */
-	hfs_mdb_commit(sb);
+	if (!(sb->s_flags & MS_RDONLY))
+		hfs_mdb_commit(sb);
+	unlock_super(sb);
 }
 
 /*
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9b292dc..1aab8aa 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -157,10 +157,12 @@ static void hfsplus_write_super(struct super_block *sb)
 	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 	dprint(DBG_SUPER, "hfsplus_write_super\n");
+
+	lock_super(sb);
 	sb->s_dirt = 0;
 	if (sb->s_flags & MS_RDONLY)
 		/* warn? */
-		return;
+		goto out;
 
 	vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks);
 	vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc);
@@ -192,6 +194,8 @@ static void hfsplus_write_super(struct super_block *sb)
 		}
 		HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
 	}
+ out:
+	unlock_super(sb);
 }
 
 static void hfsplus_put_super(struct super_block *sb)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index a80a50e..f7bfd3a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -56,15 +56,18 @@ static void jffs2_i_init_once(void *foo)
 static void jffs2_write_super(struct super_block *sb)
 {
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+
+	lock_super(sb);
 	sb->s_dirt = 0;
 
-	if (sb->s_flags & MS_RDONLY)
-		return;
+	if (!(sb->s_flags & MS_RDONLY)) {
+		D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
+		jffs2_garbage_collect_trigger(c);
+		jffs2_erase_pending_blocks(c, 0);
+		jffs2_flush_wbuf_gc(c, 0);
+	}
 
-	D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
-	jffs2_garbage_collect_trigger(c);
-	jffs2_erase_pending_blocks(c, 0);
-	jffs2_flush_wbuf_gc(c, 0);
+	unlock_super(sb);
 }
 
 static int jffs2_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/super.c b/fs/super.c
index a64f362..1905f4a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -420,10 +420,8 @@ restart:
 			spin_unlock(&sb_lock);
 
 			down_read(&sb->s_umount);
-			lock_super(sb);
 			if (sb->s_root && sb->s_dirt)
 				sb->s_op->write_super(sb);
-			unlock_super(sb);
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
diff --git a/fs/sync.c b/fs/sync.c
index 89c37f7..e9d56f6 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -33,10 +33,8 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	else
 		sync_quota_sb(sb, -1);
 	sync_inodes_sb(sb, wait);
-	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
 		sb->s_op->write_super(sb);
-	unlock_super(sb);
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
@@ -164,10 +162,8 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
-	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
 		sb->s_op->write_super(sb);
-	unlock_super(sb);
 
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index e0a39f1..a3f45fc 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -37,6 +37,7 @@ static void sysv_write_super(struct super_block *sb)
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 	unsigned long time = get_seconds(), old_time;
 
+	lock_super(sb);
 	lock_kernel();
 	if (sb->s_flags & MS_RDONLY)
 		goto clean;
@@ -56,6 +57,7 @@ static void sysv_write_super(struct super_block *sb)
 clean:
 	sb->s_dirt = 0;
 	unlock_kernel();
+	unlock_super(sb);
 }
 
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index a5ecabf..c97210e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1131,6 +1131,7 @@ static void ufs_write_super(struct super_block *sb)
 	struct ufs_super_block_third * usb3;
 	unsigned flags;
 
+	lock_super(sb);
 	lock_kernel();
 	UFSD("ENTER\n");
 	flags = UFS_SB(sb)->s_flags;
@@ -1150,6 +1151,7 @@ static void ufs_write_super(struct super_block *sb)
 	sb->s_dirt = 0;
 	UFSD("EXIT\n");
 	unlock_kernel();
+	unlock_super(sb);
 }
 
 static void ufs_put_super(struct super_block *sb)
-- 
cgit v0.10.2


From 9fd5746fd3d7838bf6ff991d50f1257057d1156f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 21 May 2009 16:01:00 -0400
Subject: fs: Remove i_cindex from struct inode

The only user of the i_cindex element in the inode structure is used
is by the firewire drivers.  As part of an attempt to slim down the
inode structure to save memory --- since a typical Linux system will
have hundreds of thousands if not millions of inodes cached, a
reduction in the size inode has high leverage.

The firewire driver does not need i_cindex in any fast path, so it's
simple enough to calculate when it is needed, instead of wasting space
in the inode structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: krh@redhat.com
Cc: stefanr@s5r6.in-berlin.de
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c
index 823a629..2cd00b5 100644
--- a/drivers/ieee1394/dv1394.c
+++ b/drivers/ieee1394/dv1394.c
@@ -1789,12 +1789,13 @@ static int dv1394_open(struct inode *inode, struct file *file)
 	} else {
 		/* look up the card by ID */
 		unsigned long flags;
+		int idx = ieee1394_file_to_instance(file);
 
 		spin_lock_irqsave(&dv1394_cards_lock, flags);
 		if (!list_empty(&dv1394_cards)) {
 			struct video_card *p;
 			list_for_each_entry(p, &dv1394_cards, list) {
-				if ((p->id) == ieee1394_file_to_instance(file)) {
+				if ((p->id) == idx) {
 					video = p;
 					break;
 				}
@@ -1803,7 +1804,7 @@ static int dv1394_open(struct inode *inode, struct file *file)
 		spin_unlock_irqrestore(&dv1394_cards_lock, flags);
 
 		if (!video) {
-			debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file));
+			debug_printk("dv1394: OHCI card %d not found", idx);
 			return -ENODEV;
 		}
 
diff --git a/drivers/ieee1394/ieee1394_core.h b/drivers/ieee1394/ieee1394_core.h
index 21d50f7..28b9f58 100644
--- a/drivers/ieee1394/ieee1394_core.h
+++ b/drivers/ieee1394/ieee1394_core.h
@@ -5,6 +5,7 @@
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/cdev.h>
 #include <asm/atomic.h>
 
 #include "hosts.h"
@@ -155,7 +156,10 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size,
  */
 static inline unsigned char ieee1394_file_to_instance(struct file *file)
 {
-	return file->f_path.dentry->d_inode->i_cindex;
+	int idx = cdev_index(file->f_path.dentry->d_inode);
+	if (idx < 0)
+		idx = 0;
+	return idx;
 }
 
 extern int hpsb_disable_irm;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 38f7122..b7c9d51 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 		p = inode->i_cdev;
 		if (!p) {
 			inode->i_cdev = p = new;
-			inode->i_cindex = idx;
 			list_add(&inode->i_devices, &p->list);
 			new = NULL;
 		} else if (!cdev_get(p))
@@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 	return ret;
 }
 
+int cdev_index(struct inode *inode)
+{
+	int idx;
+	struct kobject *kobj;
+
+	kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
+	if (!kobj)
+		return -1;
+	kobject_put(kobj);
+	return idx;
+}
+
 void cd_forget(struct inode *inode)
 {
 	spin_lock(&cdev_lock);
@@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init);
 EXPORT_SYMBOL(cdev_alloc);
 EXPORT_SYMBOL(cdev_del);
 EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_index);
 EXPORT_SYMBOL(register_chrdev);
 EXPORT_SYMBOL(unregister_chrdev);
 EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index fb45919..f389e31 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -28,6 +28,8 @@ int cdev_add(struct cdev *, dev_t, unsigned);
 
 void cdev_del(struct cdev *);
 
+int cdev_index(struct inode *inode);
+
 void cd_forget(struct inode *);
 
 extern struct backing_dev_info directly_mappable_cdev_bdi;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e7833ef..bcd6370 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -751,7 +751,6 @@ struct inode {
 		struct block_device	*i_bdev;
 		struct cdev		*i_cdev;
 	};
-	int			i_cindex;
 
 	__u32			i_generation;
 
-- 
cgit v0.10.2


From 28ad0c118b0ed98b042d362acfe0017591921138 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 21 May 2009 16:01:02 -0400
Subject: fs: Rearrange inode structure elements to avoid waste due to padding

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bcd6370..d883aa1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,8 +729,8 @@ struct inode {
 	struct timespec		i_atime;
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
-	unsigned int		i_blkbits;
 	blkcnt_t		i_blocks;
+	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
-- 
cgit v0.10.2


From 13205fb9260c2377438599ef0773c6a3eaeb0b07 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 25 May 2009 09:30:45 +0200
Subject: ntfs: remove old debug check for dirty data in ntfs_put_super()

This should not trigger anymore, so kill it.

Acked-by: Anton Altaparmakov <aia21@cam.ac.uk>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index a9ec4e1..7a7b0d3 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2376,39 +2376,12 @@ static void ntfs_put_super(struct super_block *sb)
 		vol->mftmirr_ino = NULL;
 	}
 	/*
-	 * If any dirty inodes are left, throw away all mft data page cache
-	 * pages to allow a clean umount.  This should never happen any more
-	 * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
-	 * the underlying mft records are written out and cleaned.  If it does,
-	 * happen anyway, we want to know...
+	 * We should have no dirty inodes left, due to
+	 * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
+	 * the underlying mft records are written out and cleaned.
 	 */
 	ntfs_commit_inode(vol->mft_ino);
 	write_inode_now(vol->mft_ino, 1);
-	if (sb_has_dirty_inodes(sb)) {
-		const char *s1, *s2;
-
-		mutex_lock(&vol->mft_ino->i_mutex);
-		truncate_inode_pages(vol->mft_ino->i_mapping, 0);
-		mutex_unlock(&vol->mft_ino->i_mutex);
-		write_inode_now(vol->mft_ino, 1);
-		if (sb_has_dirty_inodes(sb)) {
-			static const char *_s1 = "inodes";
-			static const char *_s2 = "";
-			s1 = _s1;
-			s2 = _s2;
-		} else {
-			static const char *_s1 = "mft pages";
-			static const char *_s2 = "They have been thrown "
-					"away.  ";
-			s1 = _s1;
-			s2 = _s2;
-		}
-		ntfs_error(sb, "Dirty %s found at umount time.  %sYou should "
-				"run chkdsk.  Please email "
-				"linux-ntfs-dev@lists.sourceforge.net and say "
-				"that you saw this message.  Thank you.", s1,
-				s2);
-	}
 #endif /* NTFS_RW */
 
 	iput(vol->mft_ino);
-- 
cgit v0.10.2


From f95022161d23ee661a48af8f280472209f513a67 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Jun 2009 12:26:23 +0200
Subject: xfs: remove ->write_super and stop maintaining ->s_dirt

the write_super method is used for

 (1) writing back the superblock periodically from pdflush
 (2) called just before ->sync_fs for data integerity syncs

We don't need (1) because we have our own peridoc writeout through xfssyncd,
and we don't need (2) because xfs_fs_sync_fs performs a proper synchronous
superblock writeout after all other data and metadata has been written out.

Also remove ->s_dirt tracking as it's only used to decide when too call
->write_super.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bb68526..08d6bd9 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1104,15 +1104,6 @@ xfs_fs_put_super(
 	kfree(mp);
 }
 
-STATIC void
-xfs_fs_write_super(
-	struct super_block	*sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		xfs_sync_fsdata(XFS_M(sb), 0);
-	sb->s_dirt = 0;
-}
-
 STATIC int
 xfs_fs_sync_super(
 	struct super_block	*sb,
@@ -1137,7 +1128,6 @@ xfs_fs_sync_super(
 		error = xfs_quiesce_data(mp);
 	else
 		error = xfs_sync_fsdata(mp, 0);
-	sb->s_dirt = 0;
 
 	if (unlikely(laptop_mode)) {
 		int	prev_sync_seq = mp->m_sync_seq;
@@ -1443,7 +1433,6 @@ xfs_fs_fill_super(
 
 	XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
 
-	sb->s_dirt = 1;
 	sb->s_magic = XFS_SB_MAGIC;
 	sb->s_blocksize = mp->m_sb.sb_blocksize;
 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1533,7 +1522,6 @@ static struct super_operations xfs_super_operations = {
 	.write_inode		= xfs_fs_write_inode,
 	.clear_inode		= xfs_fs_clear_inode,
 	.put_super		= xfs_fs_put_super,
-	.write_super		= xfs_fs_write_super,
 	.sync_fs		= xfs_fs_sync_super,
 	.freeze_fs		= xfs_fs_freeze,
 	.statfs			= xfs_fs_statfs,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8570b82..bcc39d3 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -628,8 +628,6 @@ xfs_trans_apply_sb_deltas(
 		xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount),
 				  offsetof(xfs_dsb_t, sb_frextents) +
 				  sizeof(sbp->sb_frextents) - 1);
-
-	tp->t_mountp->m_super->s_dirt = 1;
 }
 
 /*
-- 
cgit v0.10.2


From 8688b8635266cf98f00c6b0350ea2dbe7c42c321 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 05:45:04 -0400
Subject: linux/magic.h: move cramfs magic out of cramfs_fs.h

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h
index 3be4e5a..6fc2bed 100644
--- a/include/linux/cramfs_fs.h
+++ b/include/linux/cramfs_fs.h
@@ -2,9 +2,8 @@
 #define __CRAMFS_H
 
 #include <linux/types.h>
+#include <linux/magic.h>
 
-#define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
-#define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
 #define CRAMFS_SIGNATURE	"Compressed ROMFS"
 
 /*
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 927138c..1923327 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -6,6 +6,8 @@
 #define AFS_SUPER_MAGIC                0x5346414F
 #define AUTOFS_SUPER_MAGIC	0x0187
 #define CODA_SUPER_MAGIC	0x73757245
+#define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
+#define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
 #define DEBUGFS_MAGIC          0x64626720
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
-- 
cgit v0.10.2


From 545b9fd3d737afc0bb5203b1e79194a471605acd Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 2 Jun 2009 12:07:47 +0200
Subject: fs: remove incorrect I_NEW warnings

Some filesystems can call in to sync an inode that is still in the
I_NEW state (eg. ext family, when mounted with -osync). This is OK
because the filesystem has sole access to the new inode, so it can
modify i_state without races (because no other thread should be
modifying it, by definition of I_NEW). Ie. a false positive, so
remove the warnings.

The races are described here 7ef0d7377cb287e08f3ae94cebc919448e1f5dff,
which is also where the warnings were introduced.

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e0fb2e7..efcedb6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -289,7 +289,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	int ret;
 
 	BUG_ON(inode->i_state & I_SYNC);
-	WARN_ON(inode->i_state & I_NEW);
 
 	/* Set I_SYNC, reset I_DIRTY */
 	dirty = inode->i_state & I_DIRTY;
@@ -314,7 +313,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	}
 
 	spin_lock(&inode_lock);
-	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & I_FREEING)) {
 		if (!(inode->i_state & I_DIRTY) &&
-- 
cgit v0.10.2


From 4195f73d1329e49727bcceb028e58cb38376c2b0 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 28 May 2009 09:01:15 +0200
Subject: fs: block_dump missing dentry locking

I think the block_dump output in __mark_inode_dirty is missing dentry locking.
Surely the i_dentry list can change any time, so we may not even *get* a
dentry there. If we do get one by chance, then it would appear to be able to
go away or get renamed at any time...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index efcedb6..40308e9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi)
 	clear_bit(BDI_pdflush, &bdi->state);
 }
 
+static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+{
+	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+		struct dentry *dentry;
+		const char *name = "?";
+
+		dentry = d_find_alias(inode);
+		if (dentry) {
+			spin_lock(&dentry->d_lock);
+			name = (const char *) dentry->d_name.name;
+		}
+		printk(KERN_DEBUG
+		       "%s(%d): dirtied inode %lu (%s) on %s\n",
+		       current->comm, task_pid_nr(current), inode->i_ino,
+		       name, inode->i_sb->s_id);
+		if (dentry) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+		}
+	}
+}
+
 /**
  *	__mark_inode_dirty -	internal function
  *	@inode: inode to mark
@@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	if ((inode->i_state & flags) == flags)
 		return;
 
-	if (unlikely(block_dump)) {
-		struct dentry *dentry = NULL;
-		const char *name = "?";
-
-		if (!list_empty(&inode->i_dentry)) {
-			dentry = list_entry(inode->i_dentry.next,
-					    struct dentry, d_alias);
-			if (dentry && dentry->d_name.name)
-				name = (const char *) dentry->d_name.name;
-		}
-
-		if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
-			printk(KERN_DEBUG
-			       "%s(%d): dirtied inode %lu (%s) on %s\n",
-			       current->comm, task_pid_nr(current), inode->i_ino,
-			       name, inode->i_sb->s_id);
-	}
+	if (unlikely(block_dump))
+		block_dump___mark_inode_dirty(inode);
 
 	spin_lock(&inode_lock);
 	if ((inode->i_state & flags) != flags) {
-- 
cgit v0.10.2


From 337eb00a2c3a421999c39c94ce7e33545ee8baa7 Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani <abogani@texware.it>
Date: Tue, 12 May 2009 15:10:54 +0200
Subject: Push BKL down into ->remount_fs()

[xfs, btrfs, capifs, shmem don't need BKL, exempt]

Signed-off-by: Alessio Igor Bogani <abogani@texware.it>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index dff5760..ffe75e8 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -39,6 +39,7 @@
 #include <linux/parser.h>
 #include <linux/notifier.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <asm/byteorder.h>
 #include "usb.h"
 #include "hcd.h"
@@ -265,9 +266,13 @@ static int remount(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 	}
 
+	lock_kernel();
+
 	if (usbfs_mount && usbfs_mount->mnt_sb)
 		update_sb(usbfs_mount->mnt_sb);
 
+	unlock_kernel();
+
 	return 0;
 }
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 280d361..c481493 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -16,6 +16,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include "affs.h"
 
 extern struct timezone sys_tz;
@@ -512,6 +513,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		kfree(new_opts);
 		return -EINVAL;
 	}
+	lock_kernel();
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
@@ -519,8 +521,10 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	sbi->s_uid   = uid;
 	sbi->s_gid   = gid;
 
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY) {
 		sb->s_dirt = 1;
 		while (sb->s_dirt)
@@ -529,6 +533,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	} else
 		res = affs_init_bitmap(sb, flags);
 
+	unlock_kernel();
 	return res;
 }
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index a44963d..f8cbdf5 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1162,6 +1162,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	unsigned long old_sb_flags;
 	int err;
 
+	lock_kernel();
+
 	/* Store the old options */
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
@@ -1197,12 +1199,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
 		sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
 	}
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY) {
 		if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
-		    !(sbi->s_mount_state & EXT2_VALID_FS))
+		    !(sbi->s_mount_state & EXT2_VALID_FS)) {
+			unlock_kernel();
 			return 0;
+		}
 		/*
 		 * OK, we are remounting a valid rw partition rdonly, so set
 		 * the rdonly flag and then mark the partition as valid again.
@@ -1229,12 +1235,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 			sb->s_flags &= ~MS_RDONLY;
 	}
 	ext2_sync_super(sb, es);
+	unlock_kernel();
 	return 0;
 restore_opts:
 	sbi->s_mount_opt = old_opts.s_mount_opt;
 	sbi->s_resuid = old_opts.s_resuid;
 	sbi->s_resgid = old_opts.s_resgid;
 	sb->s_flags = old_sb_flags;
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e213a26..26aa64d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2490,6 +2490,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 	int i;
 #endif
 
+	lock_kernel();
+
 	/* Store the original options */
 	lock_super(sb);
 	old_sb_flags = sb->s_flags;
@@ -2600,6 +2602,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			kfree(old_opts.s_qf_names[i]);
 #endif
 	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 restore_opts:
 	sb->s_flags = old_sb_flags;
@@ -2617,6 +2620,7 @@ restore_opts:
 	}
 #endif
 	unlock_super(sb);
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c17200a..012c425 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3422,6 +3422,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 	int i;
 #endif
 
+	lock_kernel();
+
 	/* Store the original options */
 	lock_super(sb);
 	old_sb_flags = sb->s_flags;
@@ -3558,6 +3560,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			kfree(old_opts.s_qf_names[i]);
 #endif
 	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 
 restore_opts:
@@ -3578,6 +3581,7 @@ restore_opts:
 	}
 #endif
 	unlock_super(sb);
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f68193c..f2feaa0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -13,6 +13,7 @@
 #include <linux/statfs.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
 
@@ -398,6 +399,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	
 	*flags |= MS_NOATIME;
 	
+	lock_kernel();
 	lock_super(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
@@ -432,10 +434,12 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	replace_mount_options(s, new_opts);
 
 	unlock_super(s);
+	unlock_kernel();
 	return 0;
 
 out_err:
 	unlock_super(s);
+	unlock_kernel();
 	kfree(new_opts);
 	return -EINVAL;
 }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 237b27a..3451a81 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
 #include <linux/crc32.h>
+#include <linux/smp_lock.h>
 #include "nodelist.h"
 
 static int jffs2_flash_setup(struct jffs2_sb_info *c);
@@ -387,6 +388,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
 	   This also catches the case where it was stopped and this
 	   is just a remount to restart it.
 	   Flush the writebuffer, if neccecary, else we loose it */
+	lock_kernel();
 	if (!(sb->s_flags & MS_RDONLY)) {
 		jffs2_stop_garbage_collect_thread(c);
 		mutex_lock(&c->alloc_sem);
@@ -399,6 +401,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
 
 	*flags |= MS_NOATIME;
 
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 3eb13ad..09b1b6e 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -32,6 +32,7 @@
 #include <linux/crc32.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -375,19 +376,24 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 	s64 newLVSize = 0;
 	int rc = 0;
 	int flag = JFS_SBI(sb)->flag;
+	int ret;
 
 	if (!parse_options(data, sb, &newLVSize, &flag)) {
 		return -EINVAL;
 	}
+	lock_kernel();
 	if (newLVSize) {
 		if (sb->s_flags & MS_RDONLY) {
 			printk(KERN_ERR
 		  "JFS: resize requires volume to be mounted read-write\n");
+			unlock_kernel();
 			return -EROFS;
 		}
 		rc = jfs_extendfs(sb, newLVSize, 0);
-		if (rc)
+		if (rc) {
+			unlock_kernel();
 			return rc;
+		}
 	}
 
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
@@ -398,23 +404,31 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 		truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
 
 		JFS_SBI(sb)->flag = flag;
-		return jfs_mount_rw(sb, 1);
+		ret = jfs_mount_rw(sb, 1);
+		unlock_kernel();
+		return ret;
 	}
 	if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
 		rc = jfs_umount_rw(sb);
 		JFS_SBI(sb)->flag = flag;
+		unlock_kernel();
 		return rc;
 	}
 	if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
 		if (!(sb->s_flags & MS_RDONLY)) {
 			rc = jfs_umount_rw(sb);
-			if (rc)
+			if (rc) {
+				unlock_kernel();
 				return rc;
+			}
 			JFS_SBI(sb)->flag = flag;
-			return jfs_mount_rw(sb, 1);
+			ret = jfs_mount_rw(sb, 1);
+			unlock_kernel();
+			return ret;
 		}
 	JFS_SBI(sb)->flag = flag;
 
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d2d6778..26127b6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1813,6 +1813,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	if (data == NULL)
 		return -ENOMEM;
 
+	lock_kernel();
 	/* fill out struct with values from existing mount */
 	data->flags = nfss->flags;
 	data->rsize = nfss->rsize;
@@ -1837,6 +1838,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	error = nfs_compare_remount_data(nfss, data);
 out:
 	kfree(data);
+	unlock_kernel();
 	return error;
 }
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 7262e84..11151ea 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -906,6 +906,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 	struct nilfs_mount_options old_opts;
 	int err;
 
+	lock_kernel();
+
 	old_sb_flags = sb->s_flags;
 	old_opts.mount_opt = sbi->s_mount_opt;
 	old_opts.snapshot_cno = sbi->s_snapshot_cno;
@@ -985,6 +987,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 		up(&sb->s_bdev->bd_mount_sem);
 	}
  out:
+	unlock_kernel();
 	return 0;
 
  rw_remount_failed:
@@ -993,6 +996,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.mount_opt;
 	sbi->s_snapshot_cno = old_opts.snapshot_cno;
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 7a7b0d3..abaaa1c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -443,6 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering with remount options string: %s", opt);
+
+	lock_kernel();
 #ifndef NTFS_RW
 	/* For read-only compiled driver, enforce read-only flag. */
 	*flags |= MS_RDONLY;
@@ -466,15 +468,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 		if (NVolErrors(vol)) {
 			ntfs_error(sb, "Volume has errors and is read-only%s",
 					es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_IS_DIRTY) {
 			ntfs_error(sb, "Volume is dirty and read-only%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
 			ntfs_error(sb, "Volume has been modified by chkdsk "
 					"and is read-only%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -482,11 +487,13 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 					"(0x%x) and is read-only%s",
 					(unsigned)le16_to_cpu(vol->vol_flags),
 					es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
 			ntfs_error(sb, "Failed to set dirty bit in volume "
 					"information flags%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 #if 0
@@ -506,18 +513,21 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 			ntfs_error(sb, "Failed to empty journal $LogFile%s",
 					es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (!ntfs_mark_quotas_out_of_date(vol)) {
 			ntfs_error(sb, "Failed to mark quotas out of date%s",
 					es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (!ntfs_stamp_usnjrnl(vol)) {
 			ntfs_error(sb, "Failed to stamp transation log "
 					"($UsnJrnl)%s", es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
@@ -533,8 +543,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 
 	// TODO: Deal with *flags.
 
-	if (!parse_options(vol, opt))
+	if (!parse_options(vol, opt)) {
+		unlock_kernel();
 		return -EINVAL;
+	}
+	unlock_kernel();
 	ntfs_debug("Done.");
 	return 0;
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0273759..201b40a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,6 +42,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
+#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
@@ -581,6 +582,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 	struct mount_options parsed_options;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
+	lock_kernel();
+
 	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
 		ret = -EINVAL;
 		goto out;
@@ -684,6 +687,7 @@ unlock_osb:
 			ocfs2_set_journal_params(osb);
 	}
 out:
+	unlock_kernel();
 	return ret;
 }
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 90dcb7b..2969773 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
+#include <linux/smp_lock.h>
 
 struct file_system_type reiserfs_fs_type;
 
@@ -1196,6 +1197,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
 #endif
 
+	lock_kernel();
 	rs = SB_DISK_SUPER_BLOCK(s);
 
 	if (!reiserfs_parse_options
@@ -1318,10 +1320,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 
 out_ok:
 	replace_mount_options(s, new_opts);
+	unlock_kernel();
 	return 0;
 
 out_err:
 	kfree(new_opts);
+	unlock_kernel();
 	return err;
 }
 
diff --git a/fs/super.c b/fs/super.c
index 1905f4a..83b47416 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -540,7 +540,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	shrink_dcache_sb(sb);
 	sync_filesystem(sb);
 
-	lock_kernel();
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
 	if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
@@ -566,7 +565,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 		}
 	}
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
-	unlock_kernel();
 	if (remount_rw)
 		vfs_dq_quota_on_remount(sb);
 	return 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 522c3fd..3589eab 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,6 +36,7 @@
 #include <linux/mount.h>
 #include <linux/math64.h>
 #include <linux/writeback.h>
+#include <linux/smp_lock.h>
 #include "ubifs.h"
 
 /*
@@ -1770,17 +1771,22 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
 		return err;
 	}
 
+	lock_kernel();
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
 		if (c->ro_media) {
 			ubifs_msg("cannot re-mount due to prior errors");
+			unlock_kernel();
 			return -EROFS;
 		}
 		err = ubifs_remount_rw(c);
-		if (err)
+		if (err) {
+			unlock_kernel();
 			return err;
+		}
 	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
 		if (c->ro_media) {
 			ubifs_msg("cannot re-mount due to prior errors");
+			unlock_kernel();
 			return -EROFS;
 		}
 		ubifs_remount_ro(c);
@@ -1795,6 +1801,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
 	}
 
 	ubifs_assert(c->lst.taken_empty_lebs > 0);
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 04802cc..6832135 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -568,6 +568,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 	if (!udf_parse_options(options, &uopt, true))
 		return -EINVAL;
 
+	lock_kernel();
 	sbi->s_flags = uopt.flags;
 	sbi->s_uid   = uopt.uid;
 	sbi->s_gid   = uopt.gid;
@@ -581,13 +582,16 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 			*flags |= MS_RDONLY;
 	}
 
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY)
 		udf_close_lvid(sb);
 	else
 		udf_open_lvid(sb);
 
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index c97210e..6560dda 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -263,6 +263,7 @@ void ufs_panic (struct super_block * sb, const char * function,
 	struct ufs_super_block_first * usb1;
 	va_list args;
 	
+	lock_kernel();
 	uspi = UFS_SB(sb)->s_uspi;
 	usb1 = ubh_get_usb_first(uspi);
 	
@@ -1182,7 +1183,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	struct ufs_super_block_third * usb3;
 	unsigned new_mount_opt, ufstype;
 	unsigned flags;
-	
+
+	lock_kernel();
 	lock_super(sb);
 	uspi = UFS_SB(sb)->s_uspi;
 	flags = UFS_SB(sb)->s_flags;
@@ -1198,6 +1200,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	ufs_set_opt (new_mount_opt, ONERROR_LOCK);
 	if (!ufs_parse_options (data, &new_mount_opt)) {
 		unlock_super(sb);
+		unlock_kernel();
 		return -EINVAL;
 	}
 	if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
@@ -1205,12 +1208,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
 		printk("ufstype can't be changed during remount\n");
 		unlock_super(sb);
+		unlock_kernel();
 		return -EINVAL;
 	}
 
 	if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
 		UFS_SB(sb)->s_mount_opt = new_mount_opt;
 		unlock_super(sb);
+		unlock_kernel();
 		return 0;
 	}
 	
@@ -1236,6 +1241,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 		printk("ufs was compiled with read-only support, "
 		"can't be mounted as read-write\n");
 		unlock_super(sb);
+		unlock_kernel();
 		return -EINVAL;
 #else
 		if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 
@@ -1245,11 +1251,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 		    ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
 			printk("this ufstype is read-only supported\n");
 			unlock_super(sb);
+			unlock_kernel();
 			return -EINVAL;
 		}
 		if (!ufs_read_cylinder_structures(sb)) {
 			printk("failed during remounting\n");
 			unlock_super(sb);
+			unlock_kernel();
 			return -EPERM;
 		}
 		sb->s_flags &= ~MS_RDONLY;
@@ -1257,6 +1265,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	}
 	UFS_SB(sb)->s_mount_opt = new_mount_opt;
 	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a7267bf..3fb789f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,6 +46,7 @@
 #include <linux/cgroupstats.h>
 #include <linux/hash.h>
 #include <linux/namei.h>
+#include <linux/smp_lock.h>
 
 #include <asm/atomic.h>
 
@@ -900,6 +901,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	struct cgroup *cgrp = &root->top_cgroup;
 	struct cgroup_sb_opts opts;
 
+	lock_kernel();
 	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
 	mutex_lock(&cgroup_mutex);
 
@@ -927,6 +929,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	kfree(opts.release_agent);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+	unlock_kernel();
 	return ret;
 }
 
-- 
cgit v0.10.2


From d5aacad548db1ff547adf35d0a77eb2a8ed4fe14 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 14:56:44 -0400
Subject: New helper - simple_fsync()

writes associated buffers, then does sync_inode() to write
the inode itself (and to make it clean).  Depends on
->write_inode() honouring the second argument.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/libfs.c b/fs/libfs.c
index 80046dd..ddfa899 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -9,6 +9,8 @@
 #include <linux/vfs.h>
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
 
 #include <asm/uaccess.h>
 
@@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 
+int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0, /* metadata-only; caller takes care of data */
+	};
+	struct inode *inode = dentry->d_inode;
+	int err;
+	int ret;
+
+	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!(inode->i_state & I_DIRTY))
+		return ret;
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		return ret;
+
+	err = sync_inode(inode, &wbc);
+	if (ret == 0)
+		ret = err;
+	return ret;
+}
+EXPORT_SYMBOL(simple_fsync);
+
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d883aa1..ede84fa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2345,6 +2345,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count);
 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 			loff_t *ppos, const void *from, size_t available);
 
+extern int simple_fsync(struct file *, struct dentry *, int);
+
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
 				struct page *, struct page *);
-- 
cgit v0.10.2


From 79d25767583e4e086f8309bfd1f502660a64fe7f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 09:30:08 -0400
Subject: Sanitize qnx4 fsync handling

* have directory operations use mark_buffer_dirty_inode(),
  so that sync_mapping_buffers() would get those.
* make qnx4_write_inode() honour its last argument.
* get rid of insane copies of very ancient "walk the indirect blocks"
  in qnx4/fsync - they never matched the actual fs layout and, fortunately,
  never'd been called.  Again, all this junk is not needed; ->fsync()
  should just do sync_mapping_buffers + sync_inode (and if we implement
  block allocation for qnx4, we'll need to use mark_buffer_dirty_inode()
  for extent blocks)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile
index 502d7fe..e4d408c 100644
--- a/fs/qnx4/Makefile
+++ b/fs/qnx4/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_QNX4FS_FS) += qnx4.o
 
-qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o
+qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index ea9ffef..ff6c1ba 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -84,7 +84,7 @@ const struct file_operations qnx4_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 const struct inode_operations qnx4_dir_inode_operations =
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 867f42b..e7033ea 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -29,7 +29,7 @@ const struct file_operations qnx4_file_operations =
 #ifdef CONFIG_QNX4FS_RW
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
-	.fsync		= qnx4_sync_file,
+	.fsync		= simple_fsync,
 #endif
 };
 
diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c
deleted file mode 100644
index aa3b195..0000000
--- a/fs/qnx4/fsync.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/* 
- * QNX4 file system, Linux implementation.
- * 
- * Version : 0.1
- * 
- * Using parts of the xiafs filesystem.
- * 
- * History :
- * 
- * 24-03-1998 by Richard Frowijn : first release.
- */
-
-#include <linux/errno.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/smp_lock.h>
-#include <linux/buffer_head.h>
-
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-
-#include <asm/system.h>
-
-/*
- * The functions for qnx4 fs file synchronization.
- */
-
-#ifdef CONFIG_QNX4FS_RW
-
-static int sync_block(struct inode *inode, unsigned short *block, int wait)
-{
-	struct buffer_head *bh;
-	unsigned short tmp;
-
-	if (!*block)
-		return 0;
-	tmp = *block;
-	bh = sb_find_get_block(inode->i_sb, *block);
-	if (!bh)
-		return 0;
-	if (*block != tmp) {
-		brelse(bh);
-		return 1;
-	}
-	if (wait && buffer_req(bh) && !buffer_uptodate(bh)) {
-		brelse(bh);
-		return -1;
-	}
-	if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) {
-		brelse(bh);
-		return 0;
-	}
-	ll_rw_block(WRITE, 1, &bh);
-	atomic_dec(&bh->b_count);
-	return 0;
-}
-
-#ifdef WTF
-static int sync_iblock(struct inode *inode, unsigned short *iblock,
-		       struct buffer_head **bh, int wait)
-{
-	int rc;
-	unsigned short tmp;
-
-	*bh = NULL;
-	tmp = *iblock;
-	if (!tmp)
-		return 0;
-	rc = sync_block(inode, iblock, wait);
-	if (rc)
-		return rc;
-	*bh = sb_bread(inode->i_sb, tmp);
-	if (tmp != *iblock) {
-		brelse(*bh);
-		*bh = NULL;
-		return 1;
-	}
-	if (!*bh)
-		return -1;
-	return 0;
-}
-#endif
-
-static int sync_direct(struct inode *inode, int wait)
-{
-	int i;
-	int rc, err = 0;
-
-	for (i = 0; i < 7; i++) {
-		rc = sync_block(inode,
-				(unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	return err;
-}
-
-#ifdef WTF
-static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait)
-{
-	int i;
-	struct buffer_head *ind_bh;
-	int rc, err = 0;
-
-	rc = sync_iblock(inode, iblock, &ind_bh, wait);
-	if (rc || !ind_bh)
-		return rc;
-
-	for (i = 0; i < 512; i++) {
-		rc = sync_block(inode,
-				((unsigned short *) ind_bh->b_data) + i,
-				wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	brelse(ind_bh);
-	return err;
-}
-
-static int sync_dindirect(struct inode *inode, unsigned short *diblock,
-			  int wait)
-{
-	int i;
-	struct buffer_head *dind_bh;
-	int rc, err = 0;
-
-	rc = sync_iblock(inode, diblock, &dind_bh, wait);
-	if (rc || !dind_bh)
-		return rc;
-
-	for (i = 0; i < 512; i++) {
-		rc = sync_indirect(inode,
-				((unsigned short *) dind_bh->b_data) + i,
-				   wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	brelse(dind_bh);
-	return err;
-}
-#endif
-
-int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused)
-{
-        struct inode *inode = dentry->d_inode;
-	int wait, err = 0;
-        
-        (void) file;
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return -EINVAL;
-
-	lock_kernel();
-	for (wait = 0; wait <= 1; wait++) {
-		err |= sync_direct(inode, wait);
-	}
-	err |= qnx4_sync_inode(inode);
-	unlock_kernel();
-	return (err < 0) ? -EIO : 0;
-}
-
-#endif
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 95c12fc..4071286 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -24,6 +24,7 @@
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/vfs.h>
 #include <asm/uaccess.h>
 
@@ -34,31 +35,6 @@ static const struct super_operations qnx4_sops;
 
 #ifdef CONFIG_QNX4FS_RW
 
-int qnx4_sync_inode(struct inode *inode)
-{
-	int err = 0;
-# if 0
-	struct buffer_head *bh;
-
-   	bh = qnx4_update_inode(inode);
-	if (bh && buffer_dirty(bh))
-	{
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh))
-		{
-			printk ("IO error syncing qnx4 inode [%s:%08lx]\n",
-				inode->i_sb->s_id, inode->i_ino);
-			err = -1;
-		}
-	        brelse (bh);
-	} else if (!bh) {
-		err = -1;
-	}
-# endif
-
-	return err;
-}
-
 static void qnx4_delete_inode(struct inode *inode)
 {
 	QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
@@ -70,7 +46,7 @@ static void qnx4_delete_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static int qnx4_write_inode(struct inode *inode, int unused)
+static int qnx4_write_inode(struct inode *inode, int do_sync)
 {
 	struct qnx4_inode_entry *raw_inode;
 	int block, ino;
@@ -107,6 +83,16 @@ static int qnx4_write_inode(struct inode *inode, int unused)
 	raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
 	raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks);
 	mark_buffer_dirty(bh);
+	if (do_sync) {
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
+			printk("qnx4: IO error syncing inode [%s:%08x]\n",
+					inode->i_sb->s_id, ino);
+			brelse(bh);
+			unlock_kernel();
+			return -EIO;
+		}
+	}
 	brelse(bh);
 	unlock_kernel();
 	return 0;
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 775eed3..123270c 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -187,7 +187,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry)
 	de->di_status = 0;
 	memset(de->di_fname, 0, sizeof de->di_fname);
 	de->di_mode = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	clear_nlink(inode);
 	mark_inode_dirty(inode);
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -232,7 +232,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry)
 	de->di_status = 0;
 	memset(de->di_fname, 0, sizeof de->di_fname);
 	de->di_mode = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 	inode->i_ctime = dir->i_ctime;
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 787d19e..acbaec3 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -126,8 +126,6 @@ extern void qnx4_truncate(struct inode *inode);
 extern void qnx4_free_inode(struct inode *inode);
 extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
 extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
-extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int);
-extern int qnx4_sync_inode(struct inode *inode);
 
 static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
 {
-- 
cgit v0.10.2


From 964f5369667b342994fe3f384e9ba41d404ee796 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 09:47:13 -0400
Subject: fs/qnx4: sanitize includes

fs-internal parts of qnx4_fs.h taken to fs/qnx4/qnx4.h, includes adjusted,
qnx4_fs.h doesn't need unifdef anymore.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 8425cf6..e1cd061 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -13,14 +13,9 @@
  * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) .
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/bitops.h>
+#include "qnx4.h"
 
 #if 0
 int qnx4_new_block(struct super_block *sb)
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index ff6c1ba..003c68f 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -11,14 +11,9 @@
  * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support.
  */
 
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
-
+#include "qnx4.h"
 
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index e7033ea..09b170a 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -12,8 +12,7 @@
  * 27-06-1998 by Frank Denis : file overwriting.
  */
 
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
+#include "qnx4.h"
 
 /*
  * We have mostly NULL's here: the current defaults are ok for
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 4071286..681df5f 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -13,20 +13,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/highuid.h>
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
-#include <linux/vfs.h>
-#include <asm/uaccess.h>
+#include <linux/statfs.h>
+#include "qnx4.h"
 
 #define QNX4_VERSION  4
 #define QNX4_BMNAME   ".bitmap"
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 123270c..5972ed2 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -12,16 +12,9 @@
  * 04-07-1998 by Frank Denis : first step for rmdir/unlink.
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/errno.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include "qnx4.h"
 
 
 /*
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
new file mode 100644
index 0000000..9efc089
--- /dev/null
+++ b/fs/qnx4/qnx4.h
@@ -0,0 +1,57 @@
+#include <linux/fs.h>
+#include <linux/qnx4_fs.h>
+
+#define QNX4_DEBUG 0
+
+#if QNX4_DEBUG
+#define QNX4DEBUG(X) printk X
+#else
+#define QNX4DEBUG(X) (void) 0
+#endif
+
+struct qnx4_sb_info {
+	struct buffer_head	*sb_buf;	/* superblock buffer */
+	struct qnx4_super_block	*sb;		/* our superblock */
+	unsigned int		Version;	/* may be useful */
+	struct qnx4_inode_entry	*BitMap;	/* useful */
+};
+
+struct qnx4_inode_info {
+	struct qnx4_inode_entry raw;
+	loff_t mmu_private;
+	struct inode vfs_inode;
+};
+
+extern struct inode *qnx4_iget(struct super_block *, unsigned long);
+extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
+extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
+extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
+
+extern struct buffer_head *qnx4_bread(struct inode *, int, int);
+
+extern const struct inode_operations qnx4_file_inode_operations;
+extern const struct inode_operations qnx4_dir_inode_operations;
+extern const struct file_operations qnx4_file_operations;
+extern const struct file_operations qnx4_dir_operations;
+extern int qnx4_is_free(struct super_block *sb, long block);
+extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
+extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
+extern void qnx4_truncate(struct inode *inode);
+extern void qnx4_free_inode(struct inode *inode);
+extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
+extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
+
+static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
+{
+	return container_of(inode, struct qnx4_inode_info, vfs_inode);
+}
+
+static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
+{
+	return &qnx4_i(inode)->raw;
+}
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
index 6437c1c..d94d9ee 100644
--- a/fs/qnx4/truncate.c
+++ b/fs/qnx4/truncate.c
@@ -10,12 +10,8 @@
  * 30-06-1998 by Frank DENIS : ugly filler.
  */
 
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
 #include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include "qnx4.h"
 
 #ifdef CONFIG_QNX4FS_RW
 
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 3f0eaa3..b3afd22 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -135,6 +135,7 @@ header-y += posix_types.h
 header-y += ppdev.h
 header-y += prctl.h
 header-y += qnxtypes.h
+header-y += qnx4_fs.h
 header-y += radeonfb.h
 header-y += raw.h
 header-y += resource.h
@@ -308,7 +309,6 @@ unifdef-y += poll.h
 unifdef-y += ppp_defs.h
 unifdef-y += ppp-comp.h
 unifdef-y += ptrace.h
-unifdef-y += qnx4_fs.h
 unifdef-y += quota.h
 unifdef-y += random.h
 unifdef-y += irqnr.h
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index acbaec3..8b9aee1 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -85,63 +85,4 @@ struct qnx4_super_block {
 	struct qnx4_inode_entry AltBoot;
 };
 
-#ifdef __KERNEL__
-
-#define QNX4_DEBUG 0
-
-#if QNX4_DEBUG
-#define QNX4DEBUG(X) printk X
-#else
-#define QNX4DEBUG(X) (void) 0
-#endif
-
-struct qnx4_sb_info {
-	struct buffer_head	*sb_buf;	/* superblock buffer */
-	struct qnx4_super_block	*sb;		/* our superblock */
-	unsigned int		Version;	/* may be useful */
-	struct qnx4_inode_entry	*BitMap;	/* useful */
-};
-
-struct qnx4_inode_info {
-	struct qnx4_inode_entry raw;
-	loff_t mmu_private;
-	struct inode vfs_inode;
-};
-
-extern struct inode *qnx4_iget(struct super_block *, unsigned long);
-extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
-extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
-extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
-
-extern struct buffer_head *qnx4_bread(struct inode *, int, int);
-
-extern const struct inode_operations qnx4_file_inode_operations;
-extern const struct inode_operations qnx4_dir_inode_operations;
-extern const struct file_operations qnx4_file_operations;
-extern const struct file_operations qnx4_dir_operations;
-extern int qnx4_is_free(struct super_block *sb, long block);
-extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
-extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
-extern void qnx4_truncate(struct inode *inode);
-extern void qnx4_free_inode(struct inode *inode);
-extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
-extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
-
-static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
-{
-	return sb->s_fs_info;
-}
-
-static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
-{
-	return container_of(inode, struct qnx4_inode_info, vfs_inode);
-}
-
-static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
-{
-	return &qnx4_i(inode)->raw;
-}
-
-#endif				/* __KERNEL__ */
-
 #endif
-- 
cgit v0.10.2


From b522412aeabadbb302fd4338eaabf09d10e2d29c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 13:44:36 -0400
Subject: Sanitize ->fsync() for FAT

* mark directory data blocks as assoc. metadata
* add new inode to deal with FAT, mark FAT blocks as assoc. metadata of that
* now ->fsync() is trivial both for files and directories

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3a7f603..f350029 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -840,7 +840,7 @@ const struct file_operations fat_dir_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= fat_compat_dir_ioctl,
 #endif
-	.fsync		= file_fsync,
+	.fsync		= fat_file_fsync,
 };
 
 static int fat_get_short_entry(struct inode *dir, loff_t *pos,
@@ -967,7 +967,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
 			de++;
 			nr_slots--;
 		}
-		mark_buffer_dirty(bh);
+		mark_buffer_dirty_inode(bh, dir);
 		if (IS_DIRSYNC(dir))
 			err = sync_dirty_buffer(bh);
 		brelse(bh);
@@ -1001,7 +1001,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
 		de--;
 		nr_slots--;
 	}
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	if (IS_DIRSYNC(dir))
 		err = sync_dirty_buffer(bh);
 	brelse(bh);
@@ -1051,7 +1051,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
 		}
 		memset(bhs[n]->b_data, 0, sb->s_blocksize);
 		set_buffer_uptodate(bhs[n]);
-		mark_buffer_dirty(bhs[n]);
+		mark_buffer_dirty_inode(bhs[n], dir);
 
 		n++;
 		blknr++;
@@ -1131,7 +1131,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
 	de[0].size = de[1].size = 0;
 	memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
 	set_buffer_uptodate(bhs[0]);
-	mark_buffer_dirty(bhs[0]);
+	mark_buffer_dirty_inode(bhs[0], dir);
 
 	err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
 	if (err)
@@ -1193,7 +1193,7 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 			slots += copy;
 			size -= copy;
 			set_buffer_uptodate(bhs[n]);
-			mark_buffer_dirty(bhs[n]);
+			mark_buffer_dirty_inode(bhs[n], dir);
 			if (!size)
 				break;
 			n++;
@@ -1293,7 +1293,7 @@ found:
 		for (i = 0; i < long_bhs; i++) {
 			int copy = min_t(int, sb->s_blocksize - offset, size);
 			memcpy(bhs[i]->b_data + offset, slots, copy);
-			mark_buffer_dirty(bhs[i]);
+			mark_buffer_dirty_inode(bhs[i], dir);
 			offset = 0;
 			slots += copy;
 			size -= copy;
@@ -1304,7 +1304,7 @@ found:
 			/* Fill the short name slot. */
 			int copy = min_t(int, sb->s_blocksize - offset, size);
 			memcpy(bhs[i]->b_data + offset, slots, copy);
-			mark_buffer_dirty(bhs[i]);
+			mark_buffer_dirty_inode(bhs[i], dir);
 			if (IS_DIRSYNC(dir))
 				err = sync_dirty_buffer(bhs[i]);
 		}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index ea440d6..e4d8852 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -74,6 +74,7 @@ struct msdos_sb_info {
 
 	int fatent_shift;
 	struct fatent_operations *fatent_ops;
+	struct inode *fat_inode;
 
 	spinlock_t inode_hash_lock;
 	struct hlist_head inode_hashtable[FAT_HASH_SIZE];
@@ -251,6 +252,7 @@ struct fat_entry {
 	} u;
 	int nr_bhs;
 	struct buffer_head *bhs[2];
+	struct inode *fat_inode;
 };
 
 static inline void fatent_init(struct fat_entry *fatent)
@@ -259,6 +261,7 @@ static inline void fatent_init(struct fat_entry *fatent)
 	fatent->entry = 0;
 	fatent->u.ent32_p = NULL;
 	fatent->bhs[0] = fatent->bhs[1] = NULL;
+	fatent->fat_inode = NULL;
 }
 
 static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
@@ -275,6 +278,7 @@ static inline void fatent_brelse(struct fat_entry *fatent)
 		brelse(fatent->bhs[i]);
 	fatent->nr_bhs = 0;
 	fatent->bhs[0] = fatent->bhs[1] = NULL;
+	fatent->fat_inode = NULL;
 }
 
 extern void fat_ent_access_init(struct super_block *sb);
@@ -296,6 +300,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate(struct inode *inode);
 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		       struct kstat *stat);
+extern int fat_file_fsync(struct file *file, struct dentry *dentry,
+			  int datasync);
 
 /* fat/inode.c */
 extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index da6eea4..618f530 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -73,6 +73,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
 	struct buffer_head **bhs = fatent->bhs;
 
 	WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+	fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
+
 	bhs[0] = sb_bread(sb, blocknr);
 	if (!bhs[0])
 		goto err;
@@ -103,6 +105,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
 	struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
 
 	WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+	fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
 	fatent->bhs[0] = sb_bread(sb, blocknr);
 	if (!fatent->bhs[0]) {
 		printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
@@ -167,9 +170,9 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
 	}
 	spin_unlock(&fat12_entry_lock);
 
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 	if (fatent->nr_bhs == 2)
-		mark_buffer_dirty(fatent->bhs[1]);
+		mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode);
 }
 
 static void fat16_ent_put(struct fat_entry *fatent, int new)
@@ -178,7 +181,7 @@ static void fat16_ent_put(struct fat_entry *fatent, int new)
 		new = EOF_FAT16;
 
 	*fatent->u.ent16_p = cpu_to_le16(new);
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 }
 
 static void fat32_ent_put(struct fat_entry *fatent, int new)
@@ -189,7 +192,7 @@ static void fat32_ent_put(struct fat_entry *fatent, int new)
 	WARN_ON(new & 0xf0000000);
 	new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff;
 	*fatent->u.ent32_p = cpu_to_le32(new);
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 }
 
 static int fat12_ent_next(struct fat_entry *fatent)
@@ -381,7 +384,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
 			}
 			memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
 			set_buffer_uptodate(c_bh);
-			mark_buffer_dirty(c_bh);
+			mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
 			if (sb->s_flags & MS_SYNCHRONOUS)
 				err = sync_dirty_buffer(c_bh);
 			brelse(c_bh);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 0a7f4a9..e955a56 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -133,6 +133,18 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int res, err;
+
+	res = simple_fsync(filp, dentry, datasync);
+	err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
+
+	return res ? res : err;
+}
+
+
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -142,7 +154,7 @@ const struct file_operations fat_file_operations = {
 	.mmap		= generic_file_mmap,
 	.release	= fat_file_release,
 	.ioctl		= fat_generic_ioctl,
-	.fsync		= file_fsync,
+	.fsync		= fat_file_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 2292cbf..476f80b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -458,6 +458,8 @@ static void fat_put_super(struct super_block *sb)
 	if (sb->s_dirt)
 		fat_write_super(sb);
 
+	iput(sbi->fat_inode);
+
 	if (sbi->nls_disk) {
 		unload_nls(sbi->nls_disk);
 		sbi->nls_disk = NULL;
@@ -1183,7 +1185,7 @@ static int fat_read_root(struct inode *inode)
 int fat_fill_super(struct super_block *sb, void *data, int silent,
 		   const struct inode_operations *fs_dir_inode_ops, int isvfat)
 {
-	struct inode *root_inode = NULL;
+	struct inode *root_inode = NULL, *fat_inode = NULL;
 	struct buffer_head *bh;
 	struct fat_boot_sector *b;
 	struct msdos_sb_info *sbi;
@@ -1423,6 +1425,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
 	}
 
 	error = -ENOMEM;
+	fat_inode = new_inode(sb);
+	if (!fat_inode)
+		goto out_fail;
+	MSDOS_I(fat_inode)->i_pos = 0;
+	sbi->fat_inode = fat_inode;
 	root_inode = new_inode(sb);
 	if (!root_inode)
 		goto out_fail;
@@ -1448,6 +1455,8 @@ out_invalid:
 		       " on dev %s.\n", sb->s_id);
 
 out_fail:
+	if (fat_inode)
+		iput(fat_inode);
 	if (root_inode)
 		iput(root_inode);
 	if (sbi->nls_io)
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index da3f361..20f5228 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -544,7 +544,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
 		int start = MSDOS_I(new_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		if (IS_DIRSYNC(new_dir)) {
 			err = sync_dirty_buffer(dotdot_bh);
 			if (err)
@@ -586,7 +586,7 @@ error_dotdot:
 		int start = MSDOS_I(old_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		corrupt |= sync_dirty_buffer(dotdot_bh);
 	}
 error_inode:
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a0e00e3..b50ecbe 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -965,7 +965,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
 		int start = MSDOS_I(new_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		if (IS_DIRSYNC(new_dir)) {
 			err = sync_dirty_buffer(dotdot_bh);
 			if (err)
@@ -1009,7 +1009,7 @@ error_dotdot:
 		int start = MSDOS_I(old_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		corrupt |= sync_dirty_buffer(dotdot_bh);
 	}
 error_inode:
-- 
cgit v0.10.2


From e1740a462ecb2eae213be15857b577cc6f6bb8b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 15:14:02 -0400
Subject: switch ext2 to simple_fsync()

kill ext2_sync_file() (along with ext2/fsync.c), get rid of
ext2_update_inode() - it's an alias of ext2_write_inode().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index e0b2b43..f42af45 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_EXT2_FS) += ext2.o
 
-ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \
+ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
 	  ioctl.o namei.o super.o symlink.o
 
 ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2999d72..0035004 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -720,5 +720,5 @@ const struct file_operations ext2_dir_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 };
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 3203042..b2bbf45 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -113,9 +113,6 @@ extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
 
-/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *, int);
-
 /* ialloc.c */
 extern struct inode * ext2_new_inode (struct inode *, int);
 extern void ext2_free_inode (struct inode *);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 45ed071..2b9e47d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,7 +55,7 @@ const struct file_operations ext2_file_operations = {
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
@@ -72,7 +72,7 @@ const struct file_operations ext2_xip_file_operations = {
 	.mmap		= xip_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 };
 #endif
 
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
deleted file mode 100644
index fc66c93..0000000
--- a/fs/ext2/fsync.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  linux/fs/ext2/fsync.c
- *
- *  Copyright (C) 1993  Stephen Tweedie (sct@dcs.ed.ac.uk)
- *  from
- *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
- *                      Laboratoire MASI - Institut Blaise Pascal
- *                      Universite Pierre et Marie Curie (Paris VI)
- *  from
- *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
- * 
- *  ext2fs fsync primitive
- *
- *  Big-endian to little-endian byte-swapping/bitmaps by
- *        David S. Miller (davem@caip.rutgers.edu), 1995
- * 
- *  Removed unnecessary code duplication for little endian machines
- *  and excessive __inline__s. 
- *        Andi Kleen, 1997
- *
- * Major simplications and cleanup - we only need to do the metadata, because
- * we can depend on generic_block_fdatasync() to sync the data blocks.
- */
-
-#include "ext2.h"
-#include <linux/buffer_head.h>		/* for sync_mapping_buffers() */
-
-
-/*
- *	File may be NULL when we are called. Perhaps we shouldn't
- *	even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-	int ret;
-
-	ret = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return ret;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return ret;
-
-	err = ext2_sync_inode(inode);
-	if (ret == 0)
-		ret = err;
-	return ret;
-}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index acf6788..29ed682 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,8 +41,6 @@ MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
 MODULE_LICENSE("GPL");
 
-static int ext2_update_inode(struct inode * inode, int do_sync);
-
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -66,7 +64,7 @@ void ext2_delete_inode (struct inode * inode)
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
 	mark_inode_dirty(inode);
-	ext2_update_inode(inode, inode_needs_sync(inode));
+	ext2_write_inode(inode, inode_needs_sync(inode));
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
@@ -1337,7 +1335,7 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
-static int ext2_update_inode(struct inode * inode, int do_sync)
+int ext2_write_inode(struct inode *inode, int do_sync)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1442,11 +1440,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
 	return err;
 }
 
-int ext2_write_inode(struct inode *inode, int wait)
-{
-	return ext2_update_inode(inode, wait);
-}
-
 int ext2_sync_inode(struct inode *inode)
 {
 	struct writeback_control wbc = {
-- 
cgit v0.10.2


From 0d7916d7e985da52cdd2989c900485e17b035972 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 15:21:06 -0400
Subject: switch minix to simple_fsync()

* get minix_write_inode() to honour the second argument
* now we can use simple_fsync() for minixfs

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d4946c4..e5f2064 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -22,7 +22,7 @@ static int minix_readdir(struct file *, void *, filldir_t);
 const struct file_operations minix_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= minix_readdir,
-	.fsync		= minix_sync_file,
+	.fsync		= simple_fsync,
 };
 
 static inline void dir_put_page(struct page *page)
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 17765f6..3eec3e6 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -6,15 +6,12 @@
  *  minix regular file handling primitives
  */
 
-#include <linux/buffer_head.h>		/* for fsync_inode_buffers() */
 #include "minix.h"
 
 /*
  * We have mostly NULLs here: the current defaults are OK for
  * the minix filesystem.
  */
-int minix_sync_file(struct file *, struct dentry *, int);
-
 const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -22,7 +19,7 @@ const struct file_operations minix_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= minix_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -30,18 +27,3 @@ const struct inode_operations minix_file_inode_operations = {
 	.truncate	= minix_truncate,
 	.getattr	= minix_getattr,
 };
-
-int minix_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	
-	err |= minix_sync_inode(inode);
-	return err ? -EIO : 0;
-}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 7eb5397..f91a236 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -556,38 +556,25 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
 	return bh;
 }
 
-static struct buffer_head *minix_update_inode(struct inode *inode)
-{
-	if (INODE_VERSION(inode) == MINIX_V1)
-		return V1_minix_update_inode(inode);
-	else
-		return V2_minix_update_inode(inode);
-}
-
-static int minix_write_inode(struct inode * inode, int wait)
-{
-	brelse(minix_update_inode(inode));
-	return 0;
-}
-
-int minix_sync_inode(struct inode * inode)
+static int minix_write_inode(struct inode *inode, int wait)
 {
 	int err = 0;
 	struct buffer_head *bh;
 
-	bh = minix_update_inode(inode);
-	if (bh && buffer_dirty(bh))
-	{
+	if (INODE_VERSION(inode) == MINIX_V1)
+		bh = V1_minix_update_inode(inode);
+	else
+		bh = V2_minix_update_inode(inode);
+	if (!bh)
+		return -EIO;
+	if (wait && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh))
-		{
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
 			printk("IO error syncing minix inode [%s:%08lx]\n",
 				inode->i_sb->s_id, inode->i_ino);
-			err = -1;
+			err = -EIO;
 		}
 	}
-	else if (!bh)
-		err = -1;
 	brelse (bh);
 	return err;
 }
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e6a0b19..cb7fdd1 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -57,7 +57,6 @@ extern int __minix_write_begin(struct file *file, struct address_space *mapping,
 extern void V1_minix_truncate(struct inode *);
 extern void V2_minix_truncate(struct inode *);
 extern void minix_truncate(struct inode *);
-extern int minix_sync_inode(struct inode *);
 extern void minix_set_inode(struct inode *, dev_t);
 extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int);
 extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
@@ -72,7 +71,6 @@ extern int minix_empty_dir(struct inode*);
 extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*);
 extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
 extern ino_t minix_inode_by_name(struct dentry*);
-extern int minix_sync_file(struct file *, struct dentry *, int);
 
 extern const struct inode_operations minix_file_inode_operations;
 extern const struct inode_operations minix_dir_inode_operations;
-- 
cgit v0.10.2


From 05459ca81ac3064cb040d983342bc453cccec458 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 15:29:45 -0400
Subject: repair sysv_write_inode(), switch sysv to simple_fsync()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 56f6552..c779807 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -24,7 +24,7 @@ static int sysv_readdir(struct file *, void *, filldir_t);
 const struct file_operations sysv_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysv_readdir,
-	.fsync		= sysv_sync_file,
+	.fsync		= simple_fsync,
 };
 
 static inline void dir_put_page(struct page *page)
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 589be21..96340c0 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= sysv_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -34,18 +34,3 @@ const struct inode_operations sysv_file_inode_operations = {
 	.truncate	= sysv_truncate,
 	.getattr	= sysv_getattr,
 };
-
-int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	
-	err |= sysv_sync_inode(inode);
-	return err ? -EIO : 0;
-}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index a3f45fc..425c976 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -247,7 +247,7 @@ bad_inode:
 	return ERR_PTR(-EIO);
 }
 
-static struct buffer_head * sysv_update_inode(struct inode * inode)
+int sysv_write_inode(struct inode *inode, int wait)
 {
 	struct super_block * sb = inode->i_sb;
 	struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -255,19 +255,21 @@ static struct buffer_head * sysv_update_inode(struct inode * inode)
 	struct sysv_inode * raw_inode;
 	struct sysv_inode_info * si;
 	unsigned int ino, block;
+	int err = 0;
 
 	ino = inode->i_ino;
 	if (!ino || ino > sbi->s_ninodes) {
 		printk("Bad inode number on dev %s: %d is out of range\n",
 		       inode->i_sb->s_id, ino);
-		return NULL;
+		return -EIO;
 	}
 	raw_inode = sysv_raw_inode(sb, ino, &bh);
 	if (!raw_inode) {
 		printk("unable to read i-node block\n");
-		return NULL;
+		return -EIO;
 	}
 
+	lock_kernel();
 	raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
 	raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid));
 	raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid));
@@ -283,38 +285,23 @@ static struct buffer_head * sysv_update_inode(struct inode * inode)
 	for (block = 0; block < 10+1+1+1; block++)
 		write3byte(sbi, (u8 *)&si->i_data[block],
 			&raw_inode->i_data[3*block]);
+	unlock_kernel();
 	mark_buffer_dirty(bh);
-	return bh;
-}
-
-int sysv_write_inode(struct inode * inode, int wait)
-{
-	struct buffer_head *bh;
-	lock_kernel();
-	bh = sysv_update_inode(inode);
+	if (wait) {
+                sync_dirty_buffer(bh);
+                if (buffer_req(bh) && !buffer_uptodate(bh)) {
+                        printk ("IO error syncing sysv inode [%s:%08x]\n",
+                                sb->s_id, ino);
+                        err = -EIO;
+                }
+        }
 	brelse(bh);
-	unlock_kernel();
 	return 0;
 }
 
-int sysv_sync_inode(struct inode * inode)
+int sysv_sync_inode(struct inode *inode)
 {
-        int err = 0;
-        struct buffer_head *bh;
-
-        bh = sysv_update_inode(inode);
-        if (bh && buffer_dirty(bh)) {
-                sync_dirty_buffer(bh);
-                if (buffer_req(bh) && !buffer_uptodate(bh)) {
-                        printk ("IO error syncing sysv inode [%s:%08lx]\n",
-                                inode->i_sb->s_id, inode->i_ino);
-                        err = -1;
-                }
-        }
-        else if (!bh)
-                err = -1;
-        brelse (bh);
-        return err;
+	return sysv_write_inode(inode, 1);
 }
 
 static void sysv_delete_inode(struct inode *inode)
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 5784a31..53786eb 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -144,7 +144,6 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
 extern int sysv_write_inode(struct inode *, int);
 extern int sysv_sync_inode(struct inode *);
-extern int sysv_sync_file(struct file *, struct dentry *, int);
 extern void sysv_set_inode(struct inode *, dev_t);
 extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int sysv_init_icache(void);
-- 
cgit v0.10.2


From a932801543fe74050ebee07fde082234c46b624f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 15:35:18 -0400
Subject: switch ufs to simple_fsync()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 6321b79..6f671f1 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -666,6 +666,6 @@ not_empty:
 const struct file_operations ufs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= ufs_readdir,
-	.fsync		= ufs_sync_file,
+	.fsync		= simple_fsync,
 	.llseek		= generic_file_llseek,
 };
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 2bd3a16..73655c6 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,31 +24,10 @@
  */
 
 #include <linux/fs.h>
-#include <linux/buffer_head.h>	/* for sync_mapping_buffers() */
 
 #include "ufs_fs.h"
 #include "ufs.h"
 
-
-int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-	int ret;
-
-	ret = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return ret;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return ret;
-
-	err = ufs_sync_inode(inode);
-	if (ret == 0)
-		ret = err;
-	return ret;
-}
-
-
 /*
  * We have mostly NULL's here: the current defaults are ok for
  * the ufs filesystem.
@@ -62,6 +41,6 @@ const struct file_operations ufs_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.open           = generic_file_open,
-	.fsync		= ufs_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index d0c4acd..644e77e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -99,7 +99,6 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
 extern const struct inode_operations ufs_file_inode_operations;
 extern const struct file_operations ufs_file_operations;
 extern const struct address_space_operations ufs_aops;
-extern int ufs_sync_file(struct file *, struct dentry *, int);
 
 /* ialloc.c */
 extern void ufs_free_inode (struct inode *inode);
-- 
cgit v0.10.2


From 90de066443a8632bb42fed0a8216313d7da07aba Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 15:40:27 -0400
Subject: switch udf to simple_fsync()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/udf/Makefile b/fs/udf/Makefile
index 0d4503f..eb880f6 100644
--- a/fs/udf/Makefile
+++ b/fs/udf/Makefile
@@ -5,5 +5,5 @@
 obj-$(CONFIG_UDF_FS) += udf.o
 
 udf-objs     := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \
-		partition.o super.o truncate.o symlink.o fsync.o \
+		partition.o super.o truncate.o symlink.o \
 		directory.o misc.o udftime.o unicode.o
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 2efd4d5..61d9a76 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -210,5 +210,5 @@ const struct file_operations udf_dir_operations = {
 	.read			= generic_read_dir,
 	.readdir		= udf_readdir,
 	.ioctl			= udf_ioctl,
-	.fsync			= udf_fsync_file,
+	.fsync			= simple_fsync,
 };
diff --git a/fs/udf/file.c b/fs/udf/file.c
index eb91f3b..7464305 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -209,7 +209,7 @@ const struct file_operations udf_file_operations = {
 	.write			= do_sync_write,
 	.aio_write		= udf_file_aio_write,
 	.release		= udf_release_file,
-	.fsync			= udf_fsync_file,
+	.fsync			= simple_fsync,
 	.splice_read		= generic_file_splice_read,
 	.llseek			= generic_file_llseek,
 };
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c
deleted file mode 100644
index b2c472b..0000000
--- a/fs/udf/fsync.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * fsync.c
- *
- * PURPOSE
- *  Fsync handling routines for the OSTA-UDF(tm) filesystem.
- *
- * COPYRIGHT
- *  This file is distributed under the terms of the GNU General Public
- *  License (GPL). Copies of the GPL can be obtained from:
- *      ftp://prep.ai.mit.edu/pub/gnu/GPL
- *  Each contributing author retains all rights to their own work.
- *
- *  (C) 1999-2001 Ben Fennema
- *  (C) 1999-2000 Stelias Computing Inc
- *
- * HISTORY
- *
- *  05/22/99 blf  Created.
- */
-
-#include "udfdecl.h"
-
-#include <linux/fs.h>
-
-static int udf_fsync_inode(struct inode *, int);
-
-/*
- *	File may be NULL when we are called. Perhaps we shouldn't
- *	even pass file to fsync ?
- */
-
-int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-
-	return udf_fsync_inode(inode, datasync);
-}
-
-static int udf_fsync_inode(struct inode *inode, int datasync)
-{
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-
-	err |= udf_sync_inode(inode);
-
-	return err ? -EIO : 0;
-}
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index cac51b7..8d46f42 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -223,9 +223,6 @@ extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t,
 extern int udf_new_block(struct super_block *, struct inode *, uint16_t,
 			 uint32_t, int *);
 
-/* fsync.c */
-extern int udf_fsync_file(struct file *, struct dentry *, int);
-
 /* directory.c */
 extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
 						struct udf_fileident_bh *,
-- 
cgit v0.10.2


From bea6b64c277f0824cdaea6190209b26a164419d6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 15:44:50 -0400
Subject: switch omfs to simple_fsync()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 834b233..d17e774 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -11,21 +11,6 @@
 #include <linux/mpage.h>
 #include "omfs.h"
 
-static int omfs_sync_file(struct file *file, struct dentry *dentry,
-		int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	err |= omfs_sync_inode(inode);
-	return err ? -EIO : 0;
-}
-
 static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
 {
 	return (sbi->s_sys_blocksize - offset -
@@ -344,7 +329,7 @@ struct file_operations omfs_file_operations = {
 	.aio_read = generic_file_aio_read,
 	.aio_write = generic_file_aio_write,
 	.mmap = generic_file_mmap,
-	.fsync = omfs_sync_file,
+	.fsync = simple_fsync,
 	.splice_read = generic_file_splice_read,
 };
 
-- 
cgit v0.10.2


From ffdc9064f8b4fa9db37a7d5180f41cce2ea2b7ad Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 00:44:42 -0400
Subject: repair adfs ->write_inode(), switch to simple_fsync()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index e0a85db..a6665f3 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -53,6 +53,7 @@ struct adfs_dir_ops {
 	int	(*update)(struct adfs_dir *dir, struct object_info *obj);
 	int	(*create)(struct adfs_dir *dir, struct object_info *obj);
 	int	(*remove)(struct adfs_dir *dir, struct object_info *obj);
+	int	(*sync)(struct adfs_dir *dir);
 	void	(*free)(struct adfs_dir *dir);
 };
 
@@ -90,7 +91,8 @@ extern const struct dentry_operations adfs_dentry_operations;
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
 
-extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
+extern int adfs_dir_update(struct super_block *sb, struct object_info *obj,
+			   int wait);
 
 /* file.c */
 extern const struct inode_operations adfs_file_inode_operations;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index e867ccf..4d40734 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -83,7 +83,7 @@ out:
 }
 
 int
-adfs_dir_update(struct super_block *sb, struct object_info *obj)
+adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait)
 {
 	int ret = -EINVAL;
 #ifdef CONFIG_ADFS_FS_RW
@@ -106,6 +106,12 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj)
 	ret = ops->update(&dir, obj);
 	write_unlock(&adfs_dir_lock);
 
+	if (wait) {
+		int err = ops->sync(&dir);
+		if (!ret)
+			ret = err;
+	}
+
 	ops->free(&dir);
 out:
 #endif
@@ -199,7 +205,7 @@ const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.readdir	= adfs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 static int
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index ea7df21..31df6ad 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -437,6 +437,22 @@ bad_dir:
 #endif
 }
 
+static int
+adfs_f_sync(struct adfs_dir *dir)
+{
+	int err = 0;
+	int i;
+
+	for (i = dir->nr_buffers - 1; i >= 0; i--) {
+		struct buffer_head *bh = dir->bh[i];
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 static void
 adfs_f_free(struct adfs_dir *dir)
 {
@@ -456,5 +472,6 @@ struct adfs_dir_ops adfs_f_dir_ops = {
 	.setpos		= adfs_f_setpos,
 	.getnext	= adfs_f_getnext,
 	.update		= adfs_f_update,
+	.sync		= adfs_f_sync,
 	.free		= adfs_f_free
 };
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1ec644e..139e0f3 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -161,6 +161,22 @@ out:
 	return ret;
 }
 
+static int
+adfs_fplus_sync(struct adfs_dir *dir)
+{
+	int err = 0;
+	int i;
+
+	for (i = dir->nr_buffers - 1; i >= 0; i--) {
+		struct buffer_head *bh = dir->bh[i];
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 static void
 adfs_fplus_free(struct adfs_dir *dir)
 {
@@ -175,5 +191,6 @@ struct adfs_dir_ops adfs_fplus_dir_ops = {
 	.read		= adfs_fplus_read,
 	.setpos		= adfs_fplus_setpos,
 	.getnext	= adfs_fplus_getnext,
+	.sync		= adfs_fplus_sync,
 	.free		= adfs_fplus_free
 };
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 36e381c..8224d54 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -30,7 +30,7 @@ const struct file_operations adfs_file_operations = {
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
 	.mmap		= generic_file_mmap,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index e647200..05b3a67 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -376,7 +376,7 @@ out:
  * The adfs-specific inode data has already been updated by
  * adfs_notify_change()
  */
-int adfs_write_inode(struct inode *inode, int unused)
+int adfs_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct object_info obj;
@@ -391,7 +391,7 @@ int adfs_write_inode(struct inode *inode, int unused)
 	obj.attr	= ADFS_I(inode)->attr;
 	obj.size	= inode->i_size;
 
-	ret = adfs_dir_update(sb, &obj);
+	ret = adfs_dir_update(sb, &obj, wait);
 	unlock_kernel();
 	return ret;
 }
-- 
cgit v0.10.2


From 224c886643e52e6b4c1143489cd0b289b6c03976 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 00:46:40 -0400
Subject: Fix adfs GET_FRAG_ID() on big-endian

Missing conversion to host-endian before doing shifts

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index 92ab4fb..568081b 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -62,7 +62,7 @@ static DEFINE_RWLOCK(adfs_map_lock);
 #define GET_FRAG_ID(_map,_start,_idmask)				\
 	({								\
 		unsigned char *_m = _map + (_start >> 3);		\
-		u32 _frag = get_unaligned((u32 *)_m);			\
+		u32 _frag = get_unaligned_le32(_m);			\
 		_frag >>= (_start & 7);					\
 		_frag & _idmask;					\
 	})
-- 
cgit v0.10.2


From 4427f0c36e22e2cd6696b2fe7643e9756a14b3d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 01:15:58 -0400
Subject: repair bfs_write_inode(), switch bfs to simple_fsync()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 4dd1b62..54bd07d 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -79,7 +79,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
 const struct file_operations bfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= bfs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 	.llseek		= generic_file_llseek,
 };
 
@@ -205,7 +205,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 		inode->i_nlink = 1;
 	}
 	de->ino = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 	inode->i_ctime = dir->i_ctime;
@@ -267,7 +267,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		inode_dec_link_count(new_inode);
 	}
-	mark_buffer_dirty(old_bh);
+	mark_buffer_dirty_inode(old_bh, old_dir);
 	error = 0;
 
 end_rename:
@@ -320,7 +320,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
 				for (i = 0; i < BFS_NAMELEN; i++)
 					de->name[i] =
 						(i < namelen) ? name[i] : 0;
-				mark_buffer_dirty(bh);
+				mark_buffer_dirty_inode(bh, dir);
 				brelse(bh);
 				return 0;
 			}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 3a9a136..d1d9d90 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -98,14 +98,15 @@ error:
 	return ERR_PTR(-EIO);
 }
 
-static int bfs_write_inode(struct inode *inode, int unused)
+static int bfs_write_inode(struct inode *inode, int wait)
 {
+	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 	unsigned int ino = (u16)inode->i_ino;
         unsigned long i_sblock;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
 	int block, off;
-	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
+	int err = 0;
 
         dprintf("ino=%08x\n", ino);
 
@@ -146,9 +147,14 @@ static int bfs_write_inode(struct inode *inode, int unused)
 	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
+	if (wait) {
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
 	brelse(bh);
 	mutex_unlock(&info->bfs_lock);
-	return 0;
+	return err;
 }
 
 static void bfs_delete_inode(struct inode *inode)
-- 
cgit v0.10.2


From c475879556a8602bbe2faa9a06f6e5fcc8c05bb2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 01:22:00 -0400
Subject: sanitize ->fsync() for affs

unfortunately, for affs (especially for affs directories) we have
no real way to keep track of metadata ownership.  So we have to
do more or less what file_fsync() does, but we do *not* need to
call write_super() there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1a2d5e3..e511dc6 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,6 +182,7 @@ extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dent
 
 void		affs_free_prealloc(struct inode *inode);
 extern void	affs_truncate(struct inode *);
+int		affs_file_fsync(struct file *, struct dentry *, int);
 
 /* dir.c */
 
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 7b36904..8ca8f3a 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -21,7 +21,7 @@ const struct file_operations affs_dir_operations = {
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.readdir	= affs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= affs_file_fsync,
 };
 
 /*
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 9246cb4..184e55c 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -34,7 +34,7 @@ const struct file_operations affs_file_operations = {
 	.mmap		= generic_file_mmap,
 	.open		= affs_file_open,
 	.release	= affs_file_release,
-	.fsync		= file_fsync,
+	.fsync		= affs_file_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -915,3 +915,15 @@ affs_truncate(struct inode *inode)
 	}
 	affs_free_prealloc(inode);
 }
+
+int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode * inode = dentry->d_inode;
+	int ret, err;
+
+	ret = write_inode_now(inode, 0);
+	err = sync_blockdev(inode->i_sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
-- 
cgit v0.10.2


From e28964365faf3b9961695eb62b48cbc9f2a2a245 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:03:15 +0200
Subject: affs: add ->sync_fs

Add a ->sync_fs method for data integrity syncs.  Factor out common code
between affs_put_super, affs_write_super and the new affs_sync_fs into
a helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/affs/super.c b/fs/affs/super.c
index c481493..104fdcb 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -25,6 +25,19 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
+affs_commit_super(struct super_block *sb, int clean)
+{
+	struct affs_sb_info *sbi = AFFS_SB(sb);
+	struct buffer_head *bh = sbi->s_root_bh;
+	struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
+
+	tail->bm_flag = cpu_to_be32(clean);
+	secs_to_datestamp(get_seconds(), &tail->disk_change);
+	affs_fix_checksum(sb, bh);
+	mark_buffer_dirty(bh);
+}
+
+static void
 affs_put_super(struct super_block *sb)
 {
 	struct affs_sb_info *sbi = AFFS_SB(sb);
@@ -32,13 +45,8 @@ affs_put_super(struct super_block *sb)
 
 	lock_kernel();
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1);
-		secs_to_datestamp(get_seconds(),
-				  &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
-		affs_fix_checksum(sb, sbi->s_root_bh);
-		mark_buffer_dirty(sbi->s_root_bh);
-	}
+	if (!(sb->s_flags & MS_RDONLY))
+		affs_commit_super(sb, 1);
 
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
@@ -53,18 +61,13 @@ static void
 affs_write_super(struct super_block *sb)
 {
 	int clean = 2;
-	struct affs_sb_info *sbi = AFFS_SB(sb);
 
 	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		//	if (sbi->s_bitmap[i].bm_bh) {
 		//		if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
 		//			clean = 0;
-		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean);
-		secs_to_datestamp(get_seconds(),
-				  &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
-		affs_fix_checksum(sb, sbi->s_root_bh);
-		mark_buffer_dirty(sbi->s_root_bh);
+		affs_commit_super(sb, clean);
 		sb->s_dirt = !clean;	/* redo until bitmap synced */
 	} else
 		sb->s_dirt = 0;
@@ -73,6 +76,16 @@ affs_write_super(struct super_block *sb)
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
 
+static int
+affs_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	affs_commit_super(sb, 2);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+	return 0;
+}
+
 static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
@@ -130,6 +143,7 @@ static const struct super_operations affs_sops = {
 	.clear_inode	= affs_clear_inode,
 	.put_super	= affs_put_super,
 	.write_super	= affs_write_super,
+	.sync_fs	= affs_sync_fs,
 	.statfs		= affs_statfs,
 	.remount_fs	= affs_remount,
 	.show_options	= generic_show_options,
-- 
cgit v0.10.2


From 561e47ce7244168788d4ecef9a2271df204b3c89 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:03:38 +0200
Subject: bfs: add ->sync_fs

Add a ->sync_fs method for data integrity syncs, and reimplement
->write_super ontop of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index d1d9d90..6f60336 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -216,6 +216,26 @@ static void bfs_delete_inode(struct inode *inode)
 	clear_inode(inode);
 }
 
+static int bfs_sync_fs(struct super_block *sb, int wait)
+{
+	struct bfs_sb_info *info = BFS_SB(sb);
+
+	mutex_lock(&info->bfs_lock);
+	mark_buffer_dirty(info->si_sbh);
+	sb->s_dirt = 0;
+	mutex_unlock(&info->bfs_lock);
+
+	return 0;
+}
+
+static void bfs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		bfs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
+}
+
 static void bfs_put_super(struct super_block *s)
 {
 	struct bfs_sb_info *info = BFS_SB(s);
@@ -254,17 +274,6 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static void bfs_write_super(struct super_block *s)
-{
-	struct bfs_sb_info *info = BFS_SB(s);
-
-	mutex_lock(&info->bfs_lock);
-	if (!(s->s_flags & MS_RDONLY))
-		mark_buffer_dirty(info->si_sbh);
-	s->s_dirt = 0;
-	mutex_unlock(&info->bfs_lock);
-}
-
 static struct kmem_cache *bfs_inode_cachep;
 
 static struct inode *bfs_alloc_inode(struct super_block *sb)
@@ -312,6 +321,7 @@ static const struct super_operations bfs_sops = {
 	.delete_inode	= bfs_delete_inode,
 	.put_super	= bfs_put_super,
 	.write_super	= bfs_write_super,
+	.sync_fs	= bfs_sync_fs,
 	.statfs		= bfs_statfs,
 };
 
-- 
cgit v0.10.2


From 80e09fb942d38beb19dcffbeb14d496beeb0a989 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:03:58 +0200
Subject: exofs: add ->sync_fs

Add a ->sync_fs method for data integrity syncs, and reimplement
->write_super ontop of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 49e16af..8216c5b 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -200,18 +200,18 @@ static const struct export_operations exofs_export_ops;
 /*
  * Write the superblock to the OSD
  */
-static void exofs_write_super(struct super_block *sb)
+static int exofs_sync_fs(struct super_block *sb, int wait)
 {
 	struct exofs_sb_info *sbi;
 	struct exofs_fscb *fscb;
 	struct osd_request *or;
 	struct osd_obj_id obj;
-	int ret;
+	int ret = -ENOMEM;
 
 	fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
 	if (!fscb) {
 		EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
-		return;
+		return -ENOMEM;
 	}
 
 	lock_super(sb);
@@ -249,6 +249,15 @@ out:
 	unlock_kernel();
 	unlock_super(sb);
 	kfree(fscb);
+	return ret;
+}
+
+static void exofs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		exofs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 /*
@@ -493,6 +502,7 @@ static const struct super_operations exofs_sops = {
 	.delete_inode   = exofs_delete_inode,
 	.put_super      = exofs_put_super,
 	.write_super    = exofs_write_super,
+	.sync_fs	= exofs_sync_fs,
 	.statfs         = exofs_statfs,
 };
 
-- 
cgit v0.10.2


From 40f31dd47e7c3d15af1f9845eda0fa0c4c33f32f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:04:17 +0200
Subject: ext2: add ->sync_fs

Add a ->sync_fs method for data integrity syncs, and reimplement
->write_super ontop of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f8cbdf5..4589996 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,7 @@ static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext2_sync_fs(struct super_block *sb, int wait);
 
 void ext2_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
@@ -309,6 +310,7 @@ static const struct super_operations ext2_sops = {
 	.delete_inode	= ext2_delete_inode,
 	.put_super	= ext2_put_super,
 	.write_super	= ext2_write_super,
+	.sync_fs	= ext2_sync_fs,
 	.statfs		= ext2_statfs,
 	.remount_fs	= ext2_remount,
 	.clear_inode	= ext2_clear_inode,
@@ -1132,25 +1134,36 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
  * set s_state to EXT2_VALID_FS after some corrections.
  */
 
-void ext2_write_super (struct super_block * sb)
+static int ext2_sync_fs(struct super_block *sb, int wait)
 {
-	struct ext2_super_block * es;
+	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+
 	lock_kernel();
-	if (!(sb->s_flags & MS_RDONLY)) {
-		es = EXT2_SB(sb)->s_es;
-
-		if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
-			ext2_debug ("setting valid to 0\n");
-			es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
-			es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
-			es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
-			es->s_mtime = cpu_to_le32(get_seconds());
-			ext2_sync_super(sb, es);
-		} else
-			ext2_commit_super (sb, es);
+	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
+		ext2_debug("setting valid to 0\n");
+		es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
+		es->s_free_blocks_count =
+			cpu_to_le32(ext2_count_free_blocks(sb));
+		es->s_free_inodes_count =
+			cpu_to_le32(ext2_count_free_inodes(sb));
+		es->s_mtime = cpu_to_le32(get_seconds());
+		ext2_sync_super(sb, es);
+	} else {
+		ext2_commit_super(sb, es);
 	}
 	sb->s_dirt = 0;
 	unlock_kernel();
+
+	return 0;
+}
+
+
+void ext2_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		ext2_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static int ext2_remount (struct super_block * sb, int * flags, char * data)
-- 
cgit v0.10.2


From f83d6d46e7adf241a064a4a425e5cd8a8fd8925f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:04:35 +0200
Subject: fat: add ->sync_fs

Add a ->sync_fs method for data integrity syncs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 476f80b..51a5ecf 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -449,6 +449,16 @@ static void fat_write_super(struct super_block *sb)
 	unlock_super(sb);
 }
 
+static int fat_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	fat_clusters_flush(sb);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+
+	return 0;
+}
+
 static void fat_put_super(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -643,6 +653,7 @@ static const struct super_operations fat_sops = {
 	.delete_inode	= fat_delete_inode,
 	.put_super	= fat_put_super,
 	.write_super	= fat_write_super,
+	.sync_fs	= fat_sync_fs,
 	.statfs		= fat_statfs,
 	.clear_inode	= fat_clear_inode,
 	.remount_fs	= fat_remount,
-- 
cgit v0.10.2


From 58bc5bbb873eb5d86126a3fd3ff02aaa69ec15d0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:04:54 +0200
Subject: hfs: add ->sync_fs

Add a ->sync_fs method for data integrity syncs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 3aac417..6f833dc 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -58,6 +58,16 @@ static void hfs_write_super(struct super_block *sb)
 	unlock_super(sb);
 }
 
+static int hfs_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	hfs_mdb_commit(sb);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+
+	return 0;
+}
+
 /*
  * hfs_put_super()
  *
@@ -172,6 +182,7 @@ static const struct super_operations hfs_super_operations = {
 	.clear_inode	= hfs_clear_inode,
 	.put_super	= hfs_put_super,
 	.write_super	= hfs_write_super,
+	.sync_fs	= hfs_sync_fs,
 	.statfs		= hfs_statfs,
 	.remount_fs     = hfs_remount,
 	.show_options	= hfs_show_options,
-- 
cgit v0.10.2


From 7fbc6df0e7a561a313f49faa77829d5de45a97f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:05:12 +0200
Subject: hfsplus: add ->sync_fs

Add a ->sync_fs method for data integrity syncs, and reimplement
->write_super ontop of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 1aab8aa..9fc3af0 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -152,7 +152,7 @@ static void hfsplus_clear_inode(struct inode *inode)
 	}
 }
 
-static void hfsplus_write_super(struct super_block *sb)
+static int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
 	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
@@ -160,9 +160,6 @@ static void hfsplus_write_super(struct super_block *sb)
 
 	lock_super(sb);
 	sb->s_dirt = 0;
-	if (sb->s_flags & MS_RDONLY)
-		/* warn? */
-		goto out;
 
 	vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks);
 	vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc);
@@ -194,8 +191,16 @@ static void hfsplus_write_super(struct super_block *sb)
 		}
 		HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
 	}
- out:
 	unlock_super(sb);
+	return 0;
+}
+
+static void hfsplus_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		hfsplus_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static void hfsplus_put_super(struct super_block *sb)
@@ -290,6 +295,7 @@ static const struct super_operations hfsplus_sops = {
 	.clear_inode	= hfsplus_clear_inode,
 	.put_super	= hfsplus_put_super,
 	.write_super	= hfsplus_write_super,
+	.sync_fs	= hfsplus_sync_fs,
 	.statfs		= hfsplus_statfs,
 	.remount_fs	= hfsplus_remount,
 	.show_options	= hfsplus_show_options,
-- 
cgit v0.10.2


From ad43ffdeea0a7bd3e6036c4aeec2e6699aef8ac7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:07:45 +0200
Subject: sysv: add ->sync_fs

Add a ->sync_fs method for data integrity syncs, and reimplement
->write_super ontop of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 425c976..4799234 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -31,16 +31,13 @@
 #include <asm/byteorder.h>
 #include "sysv.h"
 
-/* This is only called on sync() and umount(), when s_dirt=1. */
-static void sysv_write_super(struct super_block *sb)
+static int sysv_sync_fs(struct super_block *sb, int wait)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 	unsigned long time = get_seconds(), old_time;
 
 	lock_super(sb);
 	lock_kernel();
-	if (sb->s_flags & MS_RDONLY)
-		goto clean;
 
 	/*
 	 * If we are going to write out the super block,
@@ -54,10 +51,19 @@ static void sysv_write_super(struct super_block *sb)
 		*sbi->s_sb_time = cpu_to_fs32(sbi, time);
 		mark_buffer_dirty(sbi->s_bh2);
 	}
-clean:
-	sb->s_dirt = 0;
+
 	unlock_kernel();
 	unlock_super(sb);
+
+	return 0;
+}
+
+static void sysv_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		sysv_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
@@ -345,6 +351,7 @@ const struct super_operations sysv_sops = {
 	.delete_inode	= sysv_delete_inode,
 	.put_super	= sysv_put_super,
 	.write_super	= sysv_write_super,
+	.sync_fs	= sysv_sync_fs,
 	.remount_fs	= sysv_remount,
 	.statfs		= sysv_statfs,
 };
-- 
cgit v0.10.2


From 8c8006564a58d0ea912bf7f2d0a758d97e4b464f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:08:05 +0200
Subject: ufs: add ->sync_fs

Add a ->sync_fs method for data integrity syncs, and reimplement
->write_super ontop of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6560dda..5faed79 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1125,7 +1125,7 @@ failed_nomem:
 	return -ENOMEM;
 }
 
-static void ufs_write_super(struct super_block *sb)
+static int ufs_sync_fs(struct super_block *sb, int wait)
 {
 	struct ufs_sb_private_info * uspi;
 	struct ufs_super_block_first * usb1;
@@ -1134,25 +1134,36 @@ static void ufs_write_super(struct super_block *sb)
 
 	lock_super(sb);
 	lock_kernel();
+
 	UFSD("ENTER\n");
+
 	flags = UFS_SB(sb)->s_flags;
 	uspi = UFS_SB(sb)->s_uspi;
 	usb1 = ubh_get_usb_first(uspi);
 	usb3 = ubh_get_usb_third(uspi);
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		usb1->fs_time = cpu_to_fs32(sb, get_seconds());
-		if ((flags & UFS_ST_MASK) == UFS_ST_SUN 
-		  || (flags & UFS_ST_MASK) == UFS_ST_SUNOS
-		  || (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
-			ufs_set_fs_state(sb, usb1, usb3,
-					UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
-		ufs_put_cstotal(sb);
-	}
+	usb1->fs_time = cpu_to_fs32(sb, get_seconds());
+	if ((flags & UFS_ST_MASK) == UFS_ST_SUN  ||
+	    (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
+	    (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
+		ufs_set_fs_state(sb, usb1, usb3,
+				UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
+	ufs_put_cstotal(sb);
 	sb->s_dirt = 0;
+
 	UFSD("EXIT\n");
 	unlock_kernel();
 	unlock_super(sb);
+
+	return 0;
+}
+
+static void ufs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		ufs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static void ufs_put_super(struct super_block *sb)
@@ -1381,6 +1392,7 @@ static const struct super_operations ufs_super_ops = {
 	.delete_inode	= ufs_delete_inode,
 	.put_super	= ufs_put_super,
 	.write_super	= ufs_write_super,
+	.sync_fs	= ufs_sync_fs,
 	.statfs		= ufs_statfs,
 	.remount_fs	= ufs_remount,
 	.show_options   = ufs_show_options,
-- 
cgit v0.10.2


From d579ed00aa96a7f7486978540a0d7cecaff742ae Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:08:21 +0200
Subject: jffs2: call jffs2_write_super from jffs2_sync_fs

The call to ->write_super from __sync_filesystem will go away, so make
sure jffs2 performs the same actions from inside ->sync_fs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f7bfd3a..07a22ca 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -74,6 +74,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
 {
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
 
+	jffs2_write_super(sb);
+
 	mutex_lock(&c->alloc_sem);
 	jffs2_flush_wbuf_pad(c);
 	mutex_unlock(&c->alloc_sem);
-- 
cgit v0.10.2


From d731e06323cb705003e4172ec209e469be4c18e1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:08:36 +0200
Subject: nilfs2: call nilfs2_write_super from nilfs2_sync_fs

The call to ->write_super from __sync_filesystem will go away, so make
sure nilfs2 performs the same actions from inside ->sync_fs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 11151ea..122dc1e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -391,6 +391,8 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
 {
 	int err = 0;
 
+	nilfs_write_super(sb);
+
 	/* This function is called when super block should be written back */
 	if (wait)
 		err = nilfs_construct_segment(sb);
-- 
cgit v0.10.2


From 0c95ee190e1dea60c55c834d14695341085c9b7b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 10:08:54 +0200
Subject: remove the call to ->write_super in __sync_filesystem

Now that all filesystems provide ->sync_fs methods we can change
__sync_filesystem to only call ->sync_fs.

This gives us a clear separation between periodic writeouts which
are driven by ->write_super and data integrity syncs that go
through ->sync_fs. (modulo file_fsync which is also going away)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/sync.c b/fs/sync.c
index e9d56f6..dd20002 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -33,8 +33,6 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	else
 		sync_quota_sb(sb, -1);
 	sync_inodes_sb(sb, wait);
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
-- 
cgit v0.10.2


From 81fc20bd0e75ba6357bce2403767d7c2585d8f28 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 8 Jun 2009 01:39:28 +0900
Subject: nilfs2: remove meaningless EBUSY case from nilfs_get_sb function

The following EBUSY case in nilfs_get_sb() is meaningless.  Indeed,
this error code is never returned to the caller.

    if (!s->s_root) {
          ...
    } else if (!(s->s_flags & MS_RDONLY)) {
        err = -EBUSY;
    }

This simply removes the else case.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 122dc1e..1c505d0 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1186,8 +1186,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 
 		s->s_flags |= MS_ACTIVE;
 		need_to_close = 0;
-	} else if (!(s->s_flags & MS_RDONLY)) {
-		err = -EBUSY;
 	}
 
 	up(&sd.bdev->bd_mount_sem);
-- 
cgit v0.10.2


From 33c8e57c86d1bd1548c12a4f7c4bceb94b862cca Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 8 Jun 2009 01:39:29 +0900
Subject: nilfs2: get rid of sget use for acquiring nilfs object

This will change the way to obtain nilfs object in nilfs_get_sb()
function.

Previously, a preliminary sget() call was performed, and the nilfs
object was acquired from a super block instance found by the sget()
call.

This patch, instead, instroduces a new dedicated function
find_or_create_nilfs(); as the name implies, the function finds an
existent nilfs object from a global list or creates a new one if no
object is found on the device.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1c505d0..3c9833e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1061,13 +1061,6 @@ static int nilfs_set_bdev_super(struct super_block *s, void *data)
 static int nilfs_test_bdev_super(struct super_block *s, void *data)
 {
 	struct nilfs_super_data *sd = data;
-
-	return s->s_bdev == sd->bdev;
-}
-
-static int nilfs_test_bdev_super2(struct super_block *s, void *data)
-{
-	struct nilfs_super_data *sd = data;
 	int ret;
 
 	if (s->s_bdev != sd->bdev)
@@ -1096,8 +1089,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	     const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	struct nilfs_super_data sd;
-	struct super_block *s, *s2;
-	struct the_nilfs *nilfs = NULL;
+	struct super_block *s;
+	struct the_nilfs *nilfs;
 	int err, need_to_close = 1;
 
 	sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type);
@@ -1118,11 +1111,12 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 		goto failed;
 	}
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
+	nilfs = find_or_create_nilfs(sd.bdev);
+	if (!nilfs) {
+		err = -ENOMEM;
+		goto failed;
+	}
+
 	down(&sd.bdev->bd_mount_sem);
 	if (!sd.cno &&
 	    (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) {
@@ -1131,51 +1125,22 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	}
 
 	/*
-	 * Phase-1: search any existent instance and get the_nilfs
+	 * Search specified snapshot or R/W mode super_block
 	 */
-	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
-	if (IS_ERR(s))
-		goto error_s;
-
-	if (!s->s_root) {
-		err = -ENOMEM;
-		nilfs = alloc_nilfs(sd.bdev);
-		if (!nilfs)
-			goto cancel_new;
-	} else {
-		struct nilfs_sb_info *sbi = NILFS_SB(s);
+	if (!sd.cno)
+		/* trying to get the latest checkpoint.  */
+		sd.cno = nilfs_last_cno(nilfs);
 
-		/*
-		 * s_umount protects super_block from unmount process;
-		 * It covers pointers of nilfs_sb_info and the_nilfs.
-		 */
-		nilfs = sbi->s_nilfs;
-		get_nilfs(nilfs);
-		up_write(&s->s_umount);
-
-		/*
-		 * Phase-2: search specified snapshot or R/W mode super_block
-		 */
-		if (!sd.cno)
-			/* trying to get the latest checkpoint.  */
-			sd.cno = nilfs_last_cno(nilfs);
-
-		s2 = sget(fs_type, nilfs_test_bdev_super2,
-			  nilfs_set_bdev_super, &sd);
-		deactivate_super(s);
-		/*
-		 * Although deactivate_super() invokes close_bdev_exclusive() at
-		 * kill_block_super().  Here, s is an existent mount; we need
-		 * one more close_bdev_exclusive() call.
-		 */
-		s = s2;
-		if (IS_ERR(s))
-			goto error_s;
+	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
+	if (IS_ERR(s)) {
+		err = PTR_ERR(s);
+		goto failed_unlock;
 	}
 
 	if (!s->s_root) {
 		char b[BDEVNAME_SIZE];
 
+		/* New superblock instance created */
 		s->s_flags = flags;
 		strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(sd.bdev));
@@ -1195,15 +1160,9 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	simple_set_mnt(mnt, s);
 	return 0;
 
- error_s:
-	up(&sd.bdev->bd_mount_sem);
-	if (nilfs)
-		put_nilfs(nilfs);
-	close_bdev_exclusive(sd.bdev, flags);
-	return PTR_ERR(s);
-
  failed_unlock:
 	up(&sd.bdev->bd_mount_sem);
+	put_nilfs(nilfs);
  failed:
 	close_bdev_exclusive(sd.bdev, flags);
 
@@ -1212,8 +1171,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
  cancel_new:
 	/* Abandoning the newly allocated superblock */
 	up(&sd.bdev->bd_mount_sem);
-	if (nilfs)
-		put_nilfs(nilfs);
+	put_nilfs(nilfs);
 	up_write(&s->s_umount);
 	deactivate_super(s);
 	/*
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index a91f15b..45dbf6a 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -35,6 +35,10 @@
 #include "seglist.h"
 #include "segbuf.h"
 
+
+static LIST_HEAD(nilfs_objects);
+static DEFINE_SPINLOCK(nilfs_lock);
+
 void nilfs_set_last_segment(struct the_nilfs *nilfs,
 			    sector_t start_blocknr, u64 seq, __u64 cno)
 {
@@ -55,7 +59,7 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
  * Return Value: On success, pointer to the_nilfs is returned.
  * On error, NULL is returned.
  */
-struct the_nilfs *alloc_nilfs(struct block_device *bdev)
+static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 {
 	struct the_nilfs *nilfs;
 
@@ -69,6 +73,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 	atomic_set(&nilfs->ns_ndirtyblks, 0);
 	init_rwsem(&nilfs->ns_sem);
 	mutex_init(&nilfs->ns_writer_mutex);
+	INIT_LIST_HEAD(&nilfs->ns_list);
 	INIT_LIST_HEAD(&nilfs->ns_supers);
 	spin_lock_init(&nilfs->ns_last_segment_lock);
 	nilfs->ns_gc_inodes_h = NULL;
@@ -78,6 +83,45 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 }
 
 /**
+ * find_or_create_nilfs - find or create nilfs object
+ * @bdev: block device to which the_nilfs is related
+ *
+ * find_nilfs() looks up an existent nilfs object created on the
+ * device and gets the reference count of the object.  If no nilfs object
+ * is found on the device, a new nilfs object is allocated.
+ *
+ * Return Value: On success, pointer to the nilfs object is returned.
+ * On error, NULL is returned.
+ */
+struct the_nilfs *find_or_create_nilfs(struct block_device *bdev)
+{
+	struct the_nilfs *nilfs, *new = NULL;
+
+ retry:
+	spin_lock(&nilfs_lock);
+	list_for_each_entry(nilfs, &nilfs_objects, ns_list) {
+		if (nilfs->ns_bdev == bdev) {
+			get_nilfs(nilfs);
+			spin_unlock(&nilfs_lock);
+			if (new)
+				put_nilfs(new);
+			return nilfs; /* existing object */
+		}
+	}
+	if (new) {
+		list_add_tail(&new->ns_list, &nilfs_objects);
+		spin_unlock(&nilfs_lock);
+		return new; /* new object */
+	}
+	spin_unlock(&nilfs_lock);
+
+	new = alloc_nilfs(bdev);
+	if (new)
+		goto retry;
+	return NULL; /* insufficient memory */
+}
+
+/**
  * put_nilfs - release a reference to the_nilfs
  * @nilfs: the_nilfs structure to be released
  *
@@ -86,13 +130,20 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
  */
 void put_nilfs(struct the_nilfs *nilfs)
 {
-	if (!atomic_dec_and_test(&nilfs->ns_count))
+	spin_lock(&nilfs_lock);
+	if (!atomic_dec_and_test(&nilfs->ns_count)) {
+		spin_unlock(&nilfs_lock);
 		return;
+	}
+	list_del_init(&nilfs->ns_list);
+	spin_unlock(&nilfs_lock);
+
 	/*
-	 * Increment of ns_count never occur below because the caller
+	 * Increment of ns_count never occurs below because the caller
 	 * of get_nilfs() holds at least one reference to the_nilfs.
 	 * Thus its exclusion control is not required here.
 	 */
+
 	might_sleep();
 	if (nilfs_loaded(nilfs)) {
 		nilfs_mdt_clear(nilfs->ns_sufile);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 30fe587..116caf9 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -43,6 +43,7 @@ enum {
  * struct the_nilfs - struct to supervise multiple nilfs mount points
  * @ns_flags: flags
  * @ns_count: reference count
+ * @ns_list: list head for nilfs_list
  * @ns_bdev: block device
  * @ns_bdi: backing dev info
  * @ns_writer: back pointer to writable nilfs_sb_info
@@ -88,6 +89,7 @@ enum {
 struct the_nilfs {
 	unsigned long		ns_flags;
 	atomic_t		ns_count;
+	struct list_head	ns_list;
 
 	struct block_device    *ns_bdev;
 	struct backing_dev_info *ns_bdi;
@@ -191,7 +193,7 @@ THE_NILFS_FNS(DISCONTINUED, discontinued)
 #define NILFS_ALTSB_FREQ	60  /* spare superblock */
 
 void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
-struct the_nilfs *alloc_nilfs(struct block_device *);
+struct the_nilfs *find_or_create_nilfs(struct block_device *);
 void put_nilfs(struct the_nilfs *);
 int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
 int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
-- 
cgit v0.10.2


From 3f82ff55168e92859119bf348e9e0bd6714d2fea Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 8 Jun 2009 01:39:30 +0900
Subject: nilfs2: get rid of sget use for checking if current mount is present

This stops using sget() for checking if an r/w-mount or an r/o-mount
exists on the device.  This elimination uses a back pointer to the
current mount added to nilfs object.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 3c9833e..5a8c5e4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -67,8 +67,6 @@ MODULE_LICENSE("GPL");
 
 static void nilfs_write_super(struct super_block *sb);
 static int nilfs_remount(struct super_block *sb, int *flags, char *data);
-static int test_exclusive_mount(struct file_system_type *fs_type,
-				struct block_device *bdev, int flags);
 
 /**
  * nilfs_error() - report failure condition on a filesystem
@@ -329,6 +327,10 @@ static void nilfs_put_super(struct super_block *sb)
 		nilfs_commit_super(sbi, 1);
 		up_write(&nilfs->ns_sem);
 	}
+	down_write(&nilfs->ns_sem);
+	if (nilfs->ns_current == sbi)
+		nilfs->ns_current = NULL;
+	up_write(&nilfs->ns_sem);
 
 	nilfs_detach_checkpoint(sbi);
 	put_nilfs(sbi->s_nilfs);
@@ -880,6 +882,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 		goto failed_root;
 	}
 
+	down_write(&nilfs->ns_sem);
+	if (!nilfs_test_opt(sbi, SNAPSHOT))
+		nilfs->ns_current = sbi;
+	up_write(&nilfs->ns_sem);
+
 	return 0;
 
  failed_root:
@@ -958,14 +965,16 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 		 * by fsck since we originally mounted the partition.)
 		 */
 		down(&sb->s_bdev->bd_mount_sem);
-		/* Check existing RW-mount */
-		if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) {
+		down_read(&nilfs->ns_sem);
+		if (nilfs->ns_current && nilfs->ns_current != sbi) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
-			       "remount because a RW-mount exists.\n",
+			       "remount because an RW-mount exists.\n",
 			       sb->s_id);
+			up_read(&nilfs->ns_sem);
 			err = -EBUSY;
 			goto rw_remount_failed;
 		}
+		up_read(&nilfs->ns_sem);
 		if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
 			       "remount because the current RO-mount is not "
@@ -984,6 +993,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 
 		down_write(&nilfs->ns_sem);
 		nilfs_setup_super(sbi);
+		nilfs->ns_current = sbi;
 		up_write(&nilfs->ns_sem);
 
 		up(&sb->s_bdev->bd_mount_sem);
@@ -1118,10 +1128,23 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	}
 
 	down(&sd.bdev->bd_mount_sem);
-	if (!sd.cno &&
-	    (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) {
-		err = (err < 0) ? : -EBUSY;
-		goto failed_unlock;
+
+	if (!sd.cno) {
+		/*
+		 * Check if an exclusive mount exists or not.
+		 * Snapshot mounts coexist with a current mount
+		 * (i.e. rw-mount or ro-mount), whereas rw-mount and
+		 * ro-mount are mutually exclusive.
+		 */
+		down_read(&nilfs->ns_sem);
+		if (nilfs->ns_current &&
+		    ((nilfs->ns_current->s_super->s_flags ^ flags)
+		     & MS_RDONLY)) {
+			up_read(&nilfs->ns_sem);
+			err = -EBUSY;
+			goto failed_unlock;
+		}
+		up_read(&nilfs->ns_sem);
 	}
 
 	/*
@@ -1182,57 +1205,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	return err;
 }
 
-static int nilfs_test_bdev_super3(struct super_block *s, void *data)
-{
-	struct nilfs_super_data *sd = data;
-	int ret;
-
-	if (s->s_bdev != sd->bdev)
-		return 0;
-	if (down_read_trylock(&s->s_umount)) {
-		ret = (s->s_flags & MS_RDONLY) && s->s_root &&
-			nilfs_test_opt(NILFS_SB(s), SNAPSHOT);
-		up_read(&s->s_umount);
-		if (ret)
-			return 0; /* ignore snapshot mounts */
-	}
-	return !((sd->flags ^ s->s_flags) & MS_RDONLY);
-}
-
-static int __false_bdev_super(struct super_block *s, void *data)
-{
-#if 0 /* XXX: workaround for lock debug. This is not good idea */
-	up_write(&s->s_umount);
-#endif
-	return -EFAULT;
-}
-
-/**
- * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not.
- * fs_type: filesystem type
- * bdev: block device
- * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount)
- * res: pointer to an integer to store result
- *
- * This function must be called within a section protected by bd_mount_mutex.
- */
-static int test_exclusive_mount(struct file_system_type *fs_type,
-				struct block_device *bdev, int flags)
-{
-	struct super_block *s;
-	struct nilfs_super_data sd = { .flags = flags, .bdev = bdev };
-
-	s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd);
-	if (IS_ERR(s)) {
-		if (PTR_ERR(s) != -EFAULT)
-			return PTR_ERR(s);
-		return 0;  /* Not found */
-	}
-	up_write(&s->s_umount);
-	deactivate_super(s);
-	return 1;  /* Found */
-}
-
 struct file_system_type nilfs_fs_type = {
 	.owner    = THIS_MODULE,
 	.name     = "nilfs2",
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 116caf9..99f7e29 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -50,6 +50,7 @@ enum {
  * @ns_sem: semaphore for shared states
  * @ns_writer_mutex: mutex protecting ns_writer attach/detach
  * @ns_writer_refcount: number of referrers on ns_writer
+ * @ns_current: back pointer to current mount
  * @ns_sbh: buffer heads of on-disk super blocks
  * @ns_sbp: pointers to super block data
  * @ns_sbwtime: previous write time of super blocks
@@ -98,6 +99,8 @@ struct the_nilfs {
 	struct mutex		ns_writer_mutex;
 	atomic_t		ns_writer_refcount;
 
+	struct nilfs_sb_info   *ns_current;
+
 	/*
 	 * used for
 	 * - loading the latest checkpoint exclusively.
-- 
cgit v0.10.2


From 6dd4740662405a68bb229ac2b9e0aeaaf2188bf2 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 8 Jun 2009 01:39:31 +0900
Subject: nilfs2: simplify remaining sget() use

This simplifies the test function passed on the remaining sget()
callsite in nilfs.

Instead of checking mount type (i.e. ro-mount/rw-mount/snapshot mount)
in the test function passed to sget(), this patch first looks up the
nilfs_sb_info struct which the given mount type matches, and then
acquires the super block instance holding the nilfs_sb_info.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
index adccd4f..0776ccc 100644
--- a/fs/nilfs2/sb.h
+++ b/fs/nilfs2/sb.h
@@ -60,6 +60,7 @@ struct nilfs_sb_info {
 	struct super_block *s_super;	/* reverse pointer to super_block */
 	struct the_nilfs *s_nilfs;
 	struct list_head s_list;	/* list head for nilfs->ns_supers */
+	atomic_t s_count;		/* reference count */
 
 	/* Segment constructor */
 	struct list_head s_dirty_files;	/* dirty files list */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5a8c5e4..1d1b6e1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -336,7 +336,7 @@ static void nilfs_put_super(struct super_block *sb)
 	put_nilfs(sbi->s_nilfs);
 	sbi->s_super = NULL;
 	sb->s_fs_info = NULL;
-	kfree(sbi);
+	nilfs_put_sbinfo(sbi);
 
 	unlock_kernel();
 }
@@ -785,6 +785,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 	get_nilfs(nilfs);
 	sbi->s_nilfs = nilfs;
 	sbi->s_super = sb;
+	atomic_set(&sbi->s_count, 1);
 
 	err = init_nilfs(nilfs, sbi, (char *)data);
 	if (err)
@@ -902,7 +903,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
  failed_sbi:
 	put_nilfs(nilfs);
 	sb->s_fs_info = NULL;
-	kfree(sbi);
+	nilfs_put_sbinfo(sbi);
 	return err;
 }
 
@@ -1014,6 +1015,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 
 struct nilfs_super_data {
 	struct block_device *bdev;
+	struct nilfs_sb_info *sbi;
 	__u64 cno;
 	int flags;
 };
@@ -1071,27 +1073,8 @@ static int nilfs_set_bdev_super(struct super_block *s, void *data)
 static int nilfs_test_bdev_super(struct super_block *s, void *data)
 {
 	struct nilfs_super_data *sd = data;
-	int ret;
-
-	if (s->s_bdev != sd->bdev)
-		return 0;
-
-	if (!((s->s_flags | sd->flags) & MS_RDONLY))
-		return 1; /* Reuse an old R/W-mode super_block */
-
-	if (s->s_flags & sd->flags & MS_RDONLY) {
-		if (down_read_trylock(&s->s_umount)) {
-			ret = s->s_root &&
-				(sd->cno == NILFS_SB(s)->s_snapshot_cno);
-			up_read(&s->s_umount);
-			/*
-			 * This path is locked with sb_lock by sget().
-			 * So, drop_super() causes deadlock.
-			 */
-			return ret;
-		}
-	}
-	return 0;
+
+	return sd->sbi && s->s_fs_info == (void *)sd->sbi;
 }
 
 static int
@@ -1112,7 +1095,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	 * much more information than normal filesystems to identify mount
 	 * instance.  For snapshot mounts, not only a mount type (ro-mount
 	 * or rw-mount) but also a checkpoint number is required.
-	 * The results are passed in sget() using nilfs_super_data.
 	 */
 	sd.cno = 0;
 	sd.flags = flags;
@@ -1148,13 +1130,23 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	}
 
 	/*
-	 * Search specified snapshot or R/W mode super_block
+	 * Find existing nilfs_sb_info struct
 	 */
+	sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
+
 	if (!sd.cno)
 		/* trying to get the latest checkpoint.  */
 		sd.cno = nilfs_last_cno(nilfs);
 
+	/*
+	 * Get super block instance holding the nilfs_sb_info struct.
+	 * A new instance is allocated if no existing mount is present or
+	 * existing instance has been unmounted.
+	 */
 	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
+	if (sd.sbi)
+		nilfs_put_sbinfo(sd.sbi);
+
 	if (IS_ERR(s)) {
 		err = PTR_ERR(s);
 		goto failed_unlock;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 45dbf6a..221953b 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -664,6 +664,56 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs)
 	return ret;
 }
 
+/**
+ * nilfs_find_sbinfo - find existing nilfs_sb_info structure
+ * @nilfs: nilfs object
+ * @rw_mount: mount type (non-zero value for read/write mount)
+ * @cno: checkpoint number (zero for read-only mount)
+ *
+ * nilfs_find_sbinfo() returns the nilfs_sb_info structure which
+ * @rw_mount and @cno (in case of snapshots) matched.  If no instance
+ * was found, NULL is returned.  Although the super block instance can
+ * be unmounted after this function returns, the nilfs_sb_info struct
+ * is kept on memory until nilfs_put_sbinfo() is called.
+ */
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs,
+					int rw_mount, __u64 cno)
+{
+	struct nilfs_sb_info *sbi;
+
+	down_read(&nilfs->ns_sem);
+	/*
+	 * The SNAPSHOT flag and sb->s_flags are supposed to be
+	 * protected with nilfs->ns_sem.
+	 */
+	sbi = nilfs->ns_current;
+	if (rw_mount) {
+		if (sbi && !(sbi->s_super->s_flags & MS_RDONLY))
+			goto found; /* read/write mount */
+		else
+			goto out;
+	} else if (cno == 0) {
+		if (sbi && (sbi->s_super->s_flags & MS_RDONLY))
+			goto found; /* read-only mount */
+		else
+			goto out;
+	}
+
+	list_for_each_entry(sbi, &nilfs->ns_supers, s_list) {
+		if (nilfs_test_opt(sbi, SNAPSHOT) &&
+		    sbi->s_snapshot_cno == cno)
+			goto found; /* snapshot mount */
+	}
+ out:
+	up_read(&nilfs->ns_sem);
+	return NULL;
+
+ found:
+	atomic_inc(&sbi->s_count);
+	up_read(&nilfs->ns_sem);
+	return sbi;
+}
+
 int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
 				int snapshot_mount)
 {
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 99f7e29..be4c040 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -201,6 +201,7 @@ void put_nilfs(struct the_nilfs *);
 int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
 int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
 int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
 int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
 int nilfs_near_disk_full(struct the_nilfs *);
 void nilfs_fall_back_super_block(struct the_nilfs *);
@@ -243,6 +244,12 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 	mutex_unlock(&nilfs->ns_writer_mutex);
 }
 
+static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
+{
+	if (!atomic_dec_and_test(&sbi->s_count))
+		kfree(sbi);
+}
+
 static inline void
 nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
 			sector_t *seg_start, sector_t *seg_end)
-- 
cgit v0.10.2


From e59399d0102c1813cec48db5cebe1750313f88a0 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 8 Jun 2009 01:39:32 +0900
Subject: nilfs2: correct exclusion control in nilfs_remount function

nilfs_remount() changes mount state of a superblock instance.  Even
though nilfs accesses other superblock instances during mount or
remount, the mount state was not properly protected in
nilfs_remount().

Moreover, nilfs_remount() has a lock order reversal problem;
nilfs_get_sb() holds:

  1. bdev->bd_mount_sem
  2. sb->s_umount  (sget acquires)

and nilfs_remount() holds:

  1. sb->s_umount  (locked by the caller in vfs)
  2. bdev->bd_mount_sem

To avoid these problems, this patch divides a semaphore protecting
super block instances from nilfs->ns_sem, and applies it to the mount
state protection in nilfs_remount().

With this change, bd_mount_sem use is removed from nilfs_remount() and
the lock order reversal will be resolved.  And the new rw-semaphore,
nilfs->ns_super_sem will properly protect the mount state except the
modification from nilfs_error function.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1d1b6e1..f02762f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -327,10 +327,10 @@ static void nilfs_put_super(struct super_block *sb)
 		nilfs_commit_super(sbi, 1);
 		up_write(&nilfs->ns_sem);
 	}
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	if (nilfs->ns_current == sbi)
 		nilfs->ns_current = NULL;
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	nilfs_detach_checkpoint(sbi);
 	put_nilfs(sbi->s_nilfs);
@@ -408,9 +408,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
 	struct buffer_head *bh_cp;
 	int err;
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_add(&sbi->s_list, &nilfs->ns_supers);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	sbi->s_ifile = nilfs_mdt_new(
 		nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
@@ -448,9 +448,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
 	nilfs_mdt_destroy(sbi->s_ifile);
 	sbi->s_ifile = NULL;
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_del_init(&sbi->s_list);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	return err;
 }
@@ -462,9 +462,9 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
 	nilfs_mdt_clear(sbi->s_ifile);
 	nilfs_mdt_destroy(sbi->s_ifile);
 	sbi->s_ifile = NULL;
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_del_init(&sbi->s_list);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 }
 
 static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
@@ -883,10 +883,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 		goto failed_root;
 	}
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	if (!nilfs_test_opt(sbi, SNAPSHOT))
 		nilfs->ns_current = sbi;
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	return 0;
 
@@ -918,6 +918,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 
 	lock_kernel();
 
+	down_write(&nilfs->ns_super_sem);
 	old_sb_flags = sb->s_flags;
 	old_opts.mount_opt = sbi->s_mount_opt;
 	old_opts.snapshot_cno = sbi->s_snapshot_cno;
@@ -965,24 +966,20 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 		 * store the current valid flag.  (It may have been changed
 		 * by fsck since we originally mounted the partition.)
 		 */
-		down(&sb->s_bdev->bd_mount_sem);
-		down_read(&nilfs->ns_sem);
 		if (nilfs->ns_current && nilfs->ns_current != sbi) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
 			       "remount because an RW-mount exists.\n",
 			       sb->s_id);
-			up_read(&nilfs->ns_sem);
 			err = -EBUSY;
-			goto rw_remount_failed;
+			goto restore_opts;
 		}
-		up_read(&nilfs->ns_sem);
 		if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
 			       "remount because the current RO-mount is not "
 			       "the latest one.\n",
 			       sb->s_id);
 			err = -EINVAL;
-			goto rw_remount_failed;
+			goto restore_opts;
 		}
 		sb->s_flags &= ~MS_RDONLY;
 		nilfs_clear_opt(sbi, SNAPSHOT);
@@ -990,25 +987,24 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 
 		err = nilfs_attach_segment_constructor(sbi);
 		if (err)
-			goto rw_remount_failed;
+			goto restore_opts;
 
 		down_write(&nilfs->ns_sem);
 		nilfs_setup_super(sbi);
-		nilfs->ns_current = sbi;
 		up_write(&nilfs->ns_sem);
 
-		up(&sb->s_bdev->bd_mount_sem);
+		nilfs->ns_current = sbi;
 	}
  out:
+	up_write(&nilfs->ns_super_sem);
 	unlock_kernel();
 	return 0;
 
- rw_remount_failed:
-	up(&sb->s_bdev->bd_mount_sem);
  restore_opts:
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.mount_opt;
 	sbi->s_snapshot_cno = old_opts.snapshot_cno;
+	up_write(&nilfs->ns_super_sem);
 	unlock_kernel();
 	return err;
 }
@@ -1118,15 +1114,15 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 		 * (i.e. rw-mount or ro-mount), whereas rw-mount and
 		 * ro-mount are mutually exclusive.
 		 */
-		down_read(&nilfs->ns_sem);
+		down_read(&nilfs->ns_super_sem);
 		if (nilfs->ns_current &&
 		    ((nilfs->ns_current->s_super->s_flags ^ flags)
 		     & MS_RDONLY)) {
-			up_read(&nilfs->ns_sem);
+			up_read(&nilfs->ns_super_sem);
 			err = -EBUSY;
 			goto failed_unlock;
 		}
-		up_read(&nilfs->ns_sem);
+		up_read(&nilfs->ns_super_sem);
 	}
 
 	/*
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 221953b..06e8dfd 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -72,6 +72,7 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 	atomic_set(&nilfs->ns_writer_refcount, -1);
 	atomic_set(&nilfs->ns_ndirtyblks, 0);
 	init_rwsem(&nilfs->ns_sem);
+	init_rwsem(&nilfs->ns_super_sem);
 	mutex_init(&nilfs->ns_writer_mutex);
 	INIT_LIST_HEAD(&nilfs->ns_list);
 	INIT_LIST_HEAD(&nilfs->ns_supers);
@@ -681,10 +682,10 @@ struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs,
 {
 	struct nilfs_sb_info *sbi;
 
-	down_read(&nilfs->ns_sem);
+	down_read(&nilfs->ns_super_sem);
 	/*
 	 * The SNAPSHOT flag and sb->s_flags are supposed to be
-	 * protected with nilfs->ns_sem.
+	 * protected with nilfs->ns_super_sem.
 	 */
 	sbi = nilfs->ns_current;
 	if (rw_mount) {
@@ -705,12 +706,12 @@ struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs,
 			goto found; /* snapshot mount */
 	}
  out:
-	up_read(&nilfs->ns_sem);
+	up_read(&nilfs->ns_super_sem);
 	return NULL;
 
  found:
 	atomic_inc(&sbi->s_count);
-	up_read(&nilfs->ns_sem);
+	up_read(&nilfs->ns_super_sem);
 	return sbi;
 }
 
@@ -720,7 +721,7 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
 	struct nilfs_sb_info *sbi;
 	int ret = 0;
 
-	down_read(&nilfs->ns_sem);
+	down_read(&nilfs->ns_super_sem);
 	if (cno == 0 || cno > nilfs->ns_cno)
 		goto out_unlock;
 
@@ -737,6 +738,6 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
 		ret++;
 
  out_unlock:
-	up_read(&nilfs->ns_sem);
+	up_read(&nilfs->ns_super_sem);
 	return ret;
 }
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index be4c040..d0cf4fb 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -48,6 +48,7 @@ enum {
  * @ns_bdi: backing dev info
  * @ns_writer: back pointer to writable nilfs_sb_info
  * @ns_sem: semaphore for shared states
+ * @ns_super_sem: semaphore for global operations across super block instances
  * @ns_writer_mutex: mutex protecting ns_writer attach/detach
  * @ns_writer_refcount: number of referrers on ns_writer
  * @ns_current: back pointer to current mount
@@ -96,10 +97,15 @@ struct the_nilfs {
 	struct backing_dev_info *ns_bdi;
 	struct nilfs_sb_info   *ns_writer;
 	struct rw_semaphore	ns_sem;
+	struct rw_semaphore	ns_super_sem;
 	struct mutex		ns_writer_mutex;
 	atomic_t		ns_writer_refcount;
 
+	/*
+	 * components protected by ns_super_sem
+	 */
 	struct nilfs_sb_info   *ns_current;
+	struct list_head	ns_supers;
 
 	/*
 	 * used for
@@ -113,7 +119,6 @@ struct the_nilfs {
 	time_t			ns_sbwtime[2];
 	unsigned		ns_sbsize;
 	unsigned		ns_mount_state;
-	struct list_head	ns_supers;
 
 	/*
 	 * Following fields are dedicated to a writable FS-instance.
-- 
cgit v0.10.2


From aa7dfb8954ccf49e026ba13d12991a4eb7defb96 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 8 Jun 2009 01:39:33 +0900
Subject: nilfs2: get rid of bd_mount_sem use from nilfs

This will remove every bd_mount_sem use in nilfs.

The intended exclusion control was replaced by the previous patch
("nilfs2: correct exclusion control in nilfs_remount function") for
nilfs_remount(), and this patch will replace remains with a new mutex
that this inserts in nilfs object.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 300f1cd..cadd36b 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -864,11 +864,11 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
 	case NILFS_CHECKPOINT:
 		/*
 		 * Check for protecting existing snapshot mounts:
-		 * bd_mount_sem is used to make this operation atomic and
+		 * ns_mount_mutex is used to make this operation atomic and
 		 * exclusive with a new mount job.  Though it doesn't cover
 		 * umount, it's enough for the purpose.
 		 */
-		down(&nilfs->ns_bdev->bd_mount_sem);
+		mutex_lock(&nilfs->ns_mount_mutex);
 		if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) {
 			/* Current implementation does not have to protect
 			   plain read-only mounts since they are exclusive
@@ -877,7 +877,7 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
 			ret = -EBUSY;
 		} else
 			ret = nilfs_cpfile_clear_snapshot(cpfile, cno);
-		up(&nilfs->ns_bdev->bd_mount_sem);
+		mutex_unlock(&nilfs->ns_mount_mutex);
 		return ret;
 	case NILFS_SNAPSHOT:
 		return nilfs_cpfile_set_snapshot(cpfile, cno);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f02762f..1777a34 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -764,7 +764,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
  * @silent: silent mode flag
  * @nilfs: the_nilfs struct
  *
- * This function is called exclusively by bd_mount_mutex.
+ * This function is called exclusively by nilfs->ns_mount_mutex.
  * So, the recovery process is protected from other simultaneous mounts.
  */
 static int
@@ -1105,7 +1105,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 		goto failed;
 	}
 
-	down(&sd.bdev->bd_mount_sem);
+	mutex_lock(&nilfs->ns_mount_mutex);
 
 	if (!sd.cno) {
 		/*
@@ -1164,7 +1164,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 		need_to_close = 0;
 	}
 
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
 	put_nilfs(nilfs);
 	if (need_to_close)
 		close_bdev_exclusive(sd.bdev, flags);
@@ -1172,7 +1172,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	return 0;
 
  failed_unlock:
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
 	put_nilfs(nilfs);
  failed:
 	close_bdev_exclusive(sd.bdev, flags);
@@ -1181,14 +1181,14 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 
  cancel_new:
 	/* Abandoning the newly allocated superblock */
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
 	put_nilfs(nilfs);
 	up_write(&s->s_umount);
 	deactivate_super(s);
 	/*
 	 * deactivate_super() invokes close_bdev_exclusive().
 	 * We must finish all post-cleaning before this call;
-	 * put_nilfs() and unlocking bd_mount_sem need the block device.
+	 * put_nilfs() needs the block device.
 	 */
 	return err;
 }
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 06e8dfd..e4e5c78 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -73,6 +73,7 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 	atomic_set(&nilfs->ns_ndirtyblks, 0);
 	init_rwsem(&nilfs->ns_sem);
 	init_rwsem(&nilfs->ns_super_sem);
+	mutex_init(&nilfs->ns_mount_mutex);
 	mutex_init(&nilfs->ns_writer_mutex);
 	INIT_LIST_HEAD(&nilfs->ns_list);
 	INIT_LIST_HEAD(&nilfs->ns_supers);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index d0cf4fb..e8adbff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -49,6 +49,7 @@ enum {
  * @ns_writer: back pointer to writable nilfs_sb_info
  * @ns_sem: semaphore for shared states
  * @ns_super_sem: semaphore for global operations across super block instances
+ * @ns_mount_mutex: mutex protecting mount process of nilfs
  * @ns_writer_mutex: mutex protecting ns_writer attach/detach
  * @ns_writer_refcount: number of referrers on ns_writer
  * @ns_current: back pointer to current mount
@@ -98,6 +99,7 @@ struct the_nilfs {
 	struct nilfs_sb_info   *ns_writer;
 	struct rw_semaphore	ns_sem;
 	struct rw_semaphore	ns_super_sem;
+	struct mutex		ns_mount_mutex;
 	struct mutex		ns_writer_mutex;
 	atomic_t		ns_writer_refcount;
 
-- 
cgit v0.10.2


From e14112d1bd5e193166b54be19119cf6440470560 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 12 Jun 2009 10:14:22 +1000
Subject: perfcounters: remove powerpc definitions of perf_counter_do_pending

Commit 925d519ab82b6dd7aca9420d809ee83819c08db2 ("perf_counter:
unify and fix delayed counter wakeup") added global definitions.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Paul Mackerras <paulus@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 20a44d0..5351237 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -156,8 +156,6 @@ static inline void clear_perf_counter_pending(void)
 		"i" (offsetof(struct paca_struct, perf_counter_pending)));
 }
 
-extern void perf_counter_do_pending(void);
-
 #else
 
 static inline unsigned long test_perf_counter_pending(void)
@@ -167,7 +165,6 @@ static inline unsigned long test_perf_counter_pending(void)
 
 static inline void set_perf_counter_pending(void) {}
 static inline void clear_perf_counter_pending(void) {}
-static inline void perf_counter_do_pending(void) {}
 #endif /* CONFIG_PERF_COUNTERS */
 
 #endif	/* __KERNEL__ */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index feff792..844d3f8 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,6 +53,7 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-- 
cgit v0.10.2


From 12274e96b42884f043dfaa87eb1e5e10281bfac3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 11 Jun 2009 15:07:48 -0700
Subject: x86: use zalloc_cpumask_var in arch_early_irq_init

So we make sure MAXSMP gets a cleared cpumask

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 94605e7..ef8d929 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -187,8 +187,8 @@ int __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
-		alloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
-- 
cgit v0.10.2


From aee74f3bb3f4fb5dbeae8c1947c6d8ebdc19ee01 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 11 Jun 2009 15:09:00 -0700
Subject: kvm: remove the duplicated cpumask_clear

zalloc_cpumask_var already cleared it.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e211945..7645543 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2604,7 +2604,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 		r = -ENOMEM;
 		goto out_free_0;
 	}
-	cpumask_clear(cpus_hardware_enabled);
 
 	r = kvm_arch_hardware_setup();
 	if (r < 0)
-- 
cgit v0.10.2


From 8ebf975608aaebd7feb33d77f07ba21a6380e086 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 11 Jun 2009 20:00:41 -0700
Subject: block: fix kernel-doc in recent block/ changes

Fix kernel-doc warnings in recently changed block/ source code.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/block/blk-core.c b/block/blk-core.c
index d17d71c..f6452f6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -884,9 +884,10 @@ EXPORT_SYMBOL(blk_get_request);
 
 /**
  * blk_make_request - given a bio, allocate a corresponding struct request.
- *
+ * @q: target request queue
  * @bio:  The bio describing the memory mappings that will be submitted for IO.
  *        It may be a chained-bio properly constructed by block/bio layer.
+ * @gfp_mask: gfp flags to be used for memory allocation
  *
  * blk_make_request is the parallel of generic_make_request for BLOCK_PC
  * type commands. Where the struct request needs to be farther initialized by
@@ -1872,14 +1873,14 @@ EXPORT_SYMBOL(blk_fetch_request);
 
 /**
  * blk_update_request - Special helper function for request stacking drivers
- * @rq:	      the request being processed
+ * @req:      the request being processed
  * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete @rq
+ * @nr_bytes: number of bytes to complete @req
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
- *     the request structure even if @rq doesn't have leftover.
- *     If @rq has leftover, sets it up for the next range of segments.
+ *     Ends I/O on a number of bytes attached to @req, but doesn't complete
+ *     the request structure even if @req doesn't have leftover.
+ *     If @req has leftover, sets it up for the next range of segments.
  *
  *     This special helper function is only for request stacking drivers
  *     (e.g. request-based dm) so that they can handle partial completion.
@@ -2145,7 +2146,7 @@ EXPORT_SYMBOL_GPL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @err: %0 for success, < %0 for error
+ * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.
@@ -2166,7 +2167,7 @@ EXPORT_SYMBOL_GPL(blk_end_request_all);
 /**
  * blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @err: %0 for success, < %0 for error
+ * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.
@@ -2203,7 +2204,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @err: %0 for success, < %0 for error
+ * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
@@ -2224,7 +2225,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @err: %0 for success, < %0 for error
+ * @error: %0 for success, < %0 for error
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1c4df9b..d71cedc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -343,7 +343,7 @@ EXPORT_SYMBOL(blk_queue_physical_block_size);
 /**
  * blk_queue_alignment_offset - set physical block alignment offset
  * @q:	the request queue for the device
- * @alignment:	alignment offset in bytes
+ * @offset: alignment offset in bytes
  *
  * Description:
  *   Some devices are naturally misaligned to compensate for things like
@@ -362,7 +362,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
 /**
  * blk_queue_io_min - set minimum request size for the queue
  * @q:	the request queue for the device
- * @io_min:  smallest I/O size in bytes
+ * @min:  smallest I/O size in bytes
  *
  * Description:
  *   Some devices have an internal block size bigger than the reported
@@ -385,7 +385,7 @@ EXPORT_SYMBOL(blk_queue_io_min);
 /**
  * blk_queue_io_opt - set optimal request size for the queue
  * @q:	the request queue for the device
- * @io_opt:  optimal request size in bytes
+ * @opt:  optimal request size in bytes
  *
  * Description:
  *   Drivers can call this function to set the preferred I/O request
-- 
cgit v0.10.2


From f053185948a1bd16329433f5371809765086c1ec Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Tue, 2 Jun 2009 14:12:47 +0800
Subject: drm: Replace DRM_DEBUG with DRM_DEBUG_MODE in drm_mode

Replace the DRM_DEBUG with DRM_DEBUG_MODE macro to print the info in drm_mode.

airlied:- fixed up to remove a conflicting #define
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index c9b80fd..54f492a 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -38,6 +38,7 @@
 #include "drm.h"
 #include "drm_crtc.h"
 
+#define DRM_MODESET_DEBUG	"drm_mode"
 /**
  * drm_mode_debug_printmodeline - debug print a mode
  * @dev: DRM device
@@ -50,12 +51,13 @@
  */
 void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 {
-	DRM_DEBUG("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
-		  mode->base.id, mode->name, mode->vrefresh, mode->clock,
-		  mode->hdisplay, mode->hsync_start,
-		  mode->hsync_end, mode->htotal,
-		  mode->vdisplay, mode->vsync_start,
-		  mode->vsync_end, mode->vtotal, mode->type, mode->flags);
+	DRM_DEBUG_MODE(DRM_MODESET_DEBUG,
+		"Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
+		mode->base.id, mode->name, mode->vrefresh, mode->clock,
+		mode->hdisplay, mode->hsync_start,
+		mode->hsync_end, mode->htotal,
+		mode->vdisplay, mode->vsync_start,
+		mode->vsync_end, mode->vtotal, mode->type, mode->flags);
 }
 EXPORT_SYMBOL(drm_mode_debug_printmodeline);
 
@@ -401,7 +403,9 @@ void drm_mode_prune_invalid(struct drm_device *dev,
 			list_del(&mode->head);
 			if (verbose) {
 				drm_mode_debug_printmodeline(mode);
-				DRM_DEBUG("Not using %s mode %d\n", mode->name, mode->status);
+				DRM_DEBUG_MODE(DRM_MODESET_DEBUG,
+					"Not using %s mode %d\n",
+					mode->name, mode->status);
 			}
 			drm_mode_destroy(dev, mode);
 		}
-- 
cgit v0.10.2


From be25ed9c5cc06e1d17aa97e41daf88f0b46143e6 Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Tue, 2 Jun 2009 14:13:55 +0800
Subject: drm: Replace DRM_DEBUG with DRM_DEBUG_DRIVER in i915 driver

Replace the DRM_DEBUG with the DRM_DEBUG_DRIVER to print the debug info
in i915 driver.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 6bc716d..054576d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,6 +33,8 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 
+#define I915_DRV	"i915_drv"
+
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
  * the head pointer changes, so that EBUSY only happens if the ring
@@ -99,7 +101,7 @@ static int i915_init_phys_hws(struct drm_device *dev)
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
 
 	I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
-	DRM_DEBUG("Enabled hardware status page\n");
+	DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n");
 	return 0;
 }
 
@@ -185,7 +187,8 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
 		master_priv->sarea_priv = (drm_i915_sarea_t *)
 			((u8 *)master_priv->sarea->handle + init->sarea_priv_offset);
 	} else {
-		DRM_DEBUG("sarea not found assuming DRI2 userspace\n");
+		DRM_DEBUG_DRIVER(I915_DRV,
+				"sarea not found assuming DRI2 userspace\n");
 	}
 
 	if (init->ring_size != 0) {
@@ -235,7 +238,7 @@ static int i915_dma_resume(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
-	DRM_DEBUG("%s\n", __func__);
+	DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__);
 
 	if (dev_priv->ring.map.handle == NULL) {
 		DRM_ERROR("can not ioremap virtual address for"
@@ -248,13 +251,14 @@ static int i915_dma_resume(struct drm_device * dev)
 		DRM_ERROR("Can not find hardware status page\n");
 		return -EINVAL;
 	}
-	DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page);
+	DRM_DEBUG_DRIVER(I915_DRV, "hw status page @ %p\n",
+				dev_priv->hw_status_page);
 
 	if (dev_priv->status_gfx_addr != 0)
 		I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
 	else
 		I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
-	DRM_DEBUG("Enabled hardware status page\n");
+	DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n");
 
 	return 0;
 }
@@ -548,10 +552,10 @@ static int i915_dispatch_flip(struct drm_device * dev)
 	if (!master_priv->sarea_priv)
 		return -EINVAL;
 
-	DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
-		  __func__,
-		  dev_priv->current_page,
-		  master_priv->sarea_priv->pf_current_page);
+	DRM_DEBUG_DRIVER(I915_DRV, "%s: page=%d pfCurrentPage=%d\n",
+			  __func__,
+			 dev_priv->current_page,
+			 master_priv->sarea_priv->pf_current_page);
 
 	i915_kernel_lost_context(dev);
 
@@ -629,8 +633,9 @@ static int i915_batchbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	DRM_DEBUG("i915 batchbuffer, start %x used %d cliprects %d\n",
-		  batch->start, batch->used, batch->num_cliprects);
+	DRM_DEBUG_DRIVER(I915_DRV,
+			"i915 batchbuffer, start %x used %d cliprects %d\n",
+			batch->start, batch->used, batch->num_cliprects);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
 
@@ -678,8 +683,9 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 	void *batch_data;
 	int ret;
 
-	DRM_DEBUG("i915 cmdbuffer, buf %p sz %d cliprects %d\n",
-		  cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects);
+	DRM_DEBUG_DRIVER(I915_DRV,
+			"i915 cmdbuffer, buf %p sz %d cliprects %d\n",
+			cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
 
@@ -734,7 +740,7 @@ static int i915_flip_bufs(struct drm_device *dev, void *data,
 {
 	int ret;
 
-	DRM_DEBUG("%s\n", __func__);
+	DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
 
@@ -777,7 +783,8 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
 		break;
 	default:
-		DRM_DEBUG("Unknown parameter %d\n", param->param);
+		DRM_DEBUG_DRIVER(I915_DRV, "Unknown parameter %d\n",
+					param->param);
 		return -EINVAL;
 	}
 
@@ -817,7 +824,8 @@ static int i915_setparam(struct drm_device *dev, void *data,
 		dev_priv->fence_reg_start = param->value;
 		break;
 	default:
-		DRM_DEBUG("unknown parameter %d\n", param->param);
+		DRM_DEBUG_DRIVER(I915_DRV, "unknown parameter %d\n",
+					param->param);
 		return -EINVAL;
 	}
 
@@ -865,9 +873,10 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
 	I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
-	DRM_DEBUG("load hws HWS_PGA with gfx mem 0x%x\n",
-			dev_priv->status_gfx_addr);
-	DRM_DEBUG("load hws at %p\n", dev_priv->hw_status_page);
+	DRM_DEBUG_DRIVER(I915_DRV, "load hws HWS_PGA with gfx mem 0x%x\n",
+				dev_priv->status_gfx_addr);
+	DRM_DEBUG_DRIVER(I915_DRV, "load hws at %p\n",
+				dev_priv->hw_status_page);
 	return 0;
 }
 
@@ -1270,7 +1279,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct drm_i915_file_private *i915_file_priv;
 
-	DRM_DEBUG("\n");
+	DRM_DEBUG_DRIVER(I915_DRV, "\n");
 	i915_file_priv = (struct drm_i915_file_private *)
 	    drm_alloc(sizeof(*i915_file_priv), DRM_MEM_FILES);
 
-- 
cgit v0.10.2


From c5c07550d41abe86d109430e718f2007113031f8 Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Sat, 6 Jun 2009 18:26:26 +0800
Subject: drm/via: vfree() no need checking before calling it

vfree() does it's own NULL checking, no need for explicit check before
calling it.

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index 409e00a..3273808 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -195,10 +195,8 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 	default:
 		vsg->state = dr_via_sg_init;
 	}
-	if (vsg->bounce_buffer) {
-		vfree(vsg->bounce_buffer);
-		vsg->bounce_buffer = NULL;
-	}
+	vfree(vsg->bounce_buffer);
+	vsg->bounce_buffer = NULL;
 	vsg->free_on_sequence = 0;
 }
 
-- 
cgit v0.10.2


From 7ff145593d808a371924652c8d6a15fb75ce2250 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 22 Apr 2009 18:52:14 +1000
Subject: drm/i915: duplicate desired mode for use by fbcon.

duplicate the mode into fbcon storage, so when we free modes later
we don't just lose this.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b32a51f..028f5b6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2310,6 +2310,8 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
+	if (intel_crtc->mode_set.mode)
+		drm_mode_destroy(crtc->dev, intel_crtc->mode_set.mode);
 	drm_crtc_cleanup(crtc);
 	kfree(intel_crtc);
 }
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index cbd2ba8..0ecf6b7 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -674,8 +674,12 @@ static int intelfb_multi_fb_probe_crtc(struct drm_device *dev, struct drm_crtc *
 	par->crtc_ids[0] = crtc->base.id;
 
 	modeset->num_connectors = conn_count;
-	if (modeset->mode != modeset->crtc->desired_mode)
-		modeset->mode = modeset->crtc->desired_mode;
+	if (modeset->crtc->desired_mode) {
+		if (modeset->mode)
+			drm_mode_destroy(dev, modeset->mode);
+		modeset->mode = drm_mode_duplicate(dev,
+						   modeset->crtc->desired_mode);
+	}
 
 	par->crtc_count = 1;
 
@@ -824,8 +828,12 @@ static int intelfb_single_fb_probe(struct drm_device *dev)
 		par->crtc_ids[crtc_count++] = crtc->base.id;
 
 		modeset->num_connectors = conn_count;
-		if (modeset->mode != modeset->crtc->desired_mode)
-			modeset->mode = modeset->crtc->desired_mode;
+		if (modeset->crtc->desired_mode) {
+			if (modeset->mode)
+				drm_mode_destroy(dev, modeset->mode);
+			modeset->mode = drm_mode_duplicate(dev,
+							   modeset->crtc->desired_mode);
+		}
 	}
 	par->crtc_count = crtc_count;
 
-- 
cgit v0.10.2


From 61f11699e7a92d932b31ded3715ad4f70eb26ef2 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sat, 30 May 2009 20:42:27 -0700
Subject: drm: Eliminate magic I2C frobbing when reading EDID

This code depends on the underlying I2C adapter using the bit-banging algo,
which may not be the case. If specific encoders require this mechanism, they
should build a custom I2C algo that implements this workaround, rather than
having it in the general path.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ca9c616..c4d9b33 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -584,85 +584,13 @@ int drm_do_probe_ddc_edid(struct i2c_adapter *adapter,
 }
 EXPORT_SYMBOL(drm_do_probe_ddc_edid);
 
-/**
- * Get EDID information.
- *
- * \param adapter : i2c device adaptor.
- * \param buf     : EDID data buffer to be filled
- * \param len     : EDID data buffer length
- * \return 0 on success or -1 on failure.
- *
- * Initialize DDC, then fetch EDID information
- * by calling drm_do_probe_ddc_edid function.
- */
-static int drm_ddc_read(struct i2c_adapter *adapter,
-			unsigned char *buf, int len)
-{
-	struct i2c_algo_bit_data *algo_data = adapter->algo_data;
-	int i, j;
-	int ret = -1;
-
-	algo_data->setscl(algo_data->data, 1);
-
-	for (i = 0; i < 1; i++) {
-		/* For some old monitors we need the
-		 * following process to initialize/stop DDC
-		 */
-		algo_data->setsda(algo_data->data, 1);
-		msleep(13);
-
-		algo_data->setscl(algo_data->data, 1);
-		for (j = 0; j < 5; j++) {
-			msleep(10);
-			if (algo_data->getscl(algo_data->data))
-				break;
-		}
-		if (j == 5)
-			continue;
-
-		algo_data->setsda(algo_data->data, 0);
-		msleep(15);
-		algo_data->setscl(algo_data->data, 0);
-		msleep(15);
-		algo_data->setsda(algo_data->data, 1);
-		msleep(15);
-
-		/* Do the real work */
-		ret = drm_do_probe_ddc_edid(adapter, buf, len);
-		algo_data->setsda(algo_data->data, 0);
-		algo_data->setscl(algo_data->data, 0);
-		msleep(15);
-
-		algo_data->setscl(algo_data->data, 1);
-		for (j = 0; j < 10; j++) {
-			msleep(10);
-			if (algo_data->getscl(algo_data->data))
-				break;
-		}
-
-		algo_data->setsda(algo_data->data, 1);
-		msleep(15);
-		algo_data->setscl(algo_data->data, 0);
-		algo_data->setsda(algo_data->data, 0);
-		if (ret == 0)
-			break;
-	}
-	/* Release the DDC lines when done or the Apple Cinema HD display
-	 * will switch off
-	 */
-	algo_data->setsda(algo_data->data, 1);
-	algo_data->setscl(algo_data->data, 1);
-
-	return ret;
-}
-
 static int drm_ddc_read_edid(struct drm_connector *connector,
 			     struct i2c_adapter *adapter,
 			     char *buf, int len)
 {
 	int ret;
 
-	ret = drm_ddc_read(adapter, buf, len);
+	ret = drm_do_probe_ddc_edid(adapter, buf, len);
 	if (ret != 0) {
 		dev_info(&connector->dev->pdev->dev, "%s: no EDID data\n",
 			 drm_get_connector_name(connector));
-- 
cgit v0.10.2


From fbe0efb869efde8d847ede3a925230ef88910086 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= <krh@redhat.com>
Date: Tue, 9 Jun 2009 01:50:41 +1000
Subject: drm_calloc_large: check right size, check integer overflow, use
 GFP_ZERO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously we would check size instead of size * nmemb, and so would
never hit the vmalloc path.  Also add integer overflow check as in kcalloc,
and allocate GFP_ZERO pages instead of memset()ing them.

Signed-off-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index afc2168..1cc51a0 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1573,18 +1573,14 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area)
 
 static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
 {
-	u8 *addr;
-
-	if (size <= PAGE_SIZE)
+	if (size * nmemb <= PAGE_SIZE)
 	    return kcalloc(nmemb, size, GFP_KERNEL);
 
-	addr = vmalloc(nmemb * size);
-	if (!addr)
+	if (size != 0 && nmemb > ULONG_MAX / size)
 		return NULL;
 
-	memset(addr, 0, nmemb * size);
-
-	return addr;
+	return __vmalloc(size * nmemb,
+			 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
 }
 
 static __inline void drm_free_large(void *ptr)
-- 
cgit v0.10.2


From 2a71ebcd85bcc4d6607f577f23a491f796c30e82 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 12 Jun 2009 15:53:10 +1000
Subject: drm/radeon: add rv740 drm support.

This adds drm support for the RV740 family of chips to the r600 support code.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index bc9d09d..aa4eee4 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -489,15 +489,16 @@ static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
 			RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]);
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 
-	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730)) {
+	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730) ||
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)) {
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
-		DRM_INFO("Loading RV730 PFP Microcode\n");
+		DRM_INFO("Loading RV730/RV740 PFP Microcode\n");
 		for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV730_pfp_microcode[i]);
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
-		DRM_INFO("Loading RV730 CP Microcode\n");
+		DRM_INFO("Loading RV730/RV740 CP Microcode\n");
 		for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_ME_RAM_DATA, RV730_cp_microcode[i]);
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
@@ -1324,6 +1325,10 @@ static void r700_gfx_init(struct drm_device *dev,
 		dev_priv->r700_sc_prim_fifo_size = 0xf9;
 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
+		if (dev_priv->r600_sx_max_export_pos_size > 16) {
+			dev_priv->r600_sx_max_export_pos_size -= 16;
+			dev_priv->r600_sx_max_export_smx_size += 16;
+		}
 		break;
 	case CHIP_RV710:
 		dev_priv->r600_max_pipes = 2;
@@ -1345,6 +1350,31 @@ static void r700_gfx_init(struct drm_device *dev,
 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
 		break;
+	case CHIP_RV740:
+		dev_priv->r600_max_pipes = 4;
+		dev_priv->r600_max_tile_pipes = 4;
+		dev_priv->r600_max_simds = 8;
+		dev_priv->r600_max_backends = 4;
+		dev_priv->r600_max_gprs = 256;
+		dev_priv->r600_max_threads = 248;
+		dev_priv->r600_max_stack_entries = 512;
+		dev_priv->r600_max_hw_contexts = 8;
+		dev_priv->r600_max_gs_threads = 16 * 2;
+		dev_priv->r600_sx_max_export_size = 256;
+		dev_priv->r600_sx_max_export_pos_size = 32;
+		dev_priv->r600_sx_max_export_smx_size = 224;
+		dev_priv->r600_sq_num_cf_insts = 2;
+
+		dev_priv->r700_sx_num_of_sets = 7;
+		dev_priv->r700_sc_prim_fifo_size = 0x100;
+		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
+		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
+
+		if (dev_priv->r600_sx_max_export_pos_size > 16) {
+			dev_priv->r600_sx_max_export_pos_size -= 16;
+			dev_priv->r600_sx_max_export_smx_size += 16;
+		}
+		break;
 	default:
 		break;
 	}
@@ -1493,6 +1523,7 @@ static void r700_gfx_init(struct drm_device *dev,
 		break;
 	case CHIP_RV730:
 	case CHIP_RV710:
+	case CHIP_RV740:
 	default:
 		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
 		break;
@@ -1569,6 +1600,7 @@ static void r700_gfx_init(struct drm_device *dev,
 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 	case CHIP_RV770:
 	case CHIP_RV730:
+	case CHIP_RV740:
 		gs_prim_buffer_depth = 384;
 		break;
 	case CHIP_RV710:
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 8071d96..e266e5f 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -146,6 +146,7 @@ enum radeon_family {
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
+	CHIP_RV740,
 	CHIP_LAST,
 };
 
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 8b4c80c..c7a1a8d 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -254,6 +254,11 @@
 	{0x1002, 0x940A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x940B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
-- 
cgit v0.10.2


From 715cbb05c935e8a4306a730d14a72d5af881523e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 12 Jun 2009 15:55:44 +1000
Subject: drm/radeon: add support for RV790.

This adds the PCI IDs for the rv790 which are equiv to the rv770.

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index aa4eee4..146f357 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -478,13 +478,13 @@ static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
 
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)) {
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
-		DRM_INFO("Loading RV770 PFP Microcode\n");
+		DRM_INFO("Loading RV770/RV790 PFP Microcode\n");
 		for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV770_pfp_microcode[i]);
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
-		DRM_INFO("Loading RV770 CP Microcode\n");
+		DRM_INFO("Loading RV770/RV790 CP Microcode\n");
 		for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]);
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index c7a1a8d..f8634ab 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -273,6 +273,8 @@
 	{0x1002, 0x9456, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x947A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v0.10.2


From 249d6048ca98b5452105b0824abac1275661b8e3 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <glisse@freedesktop.org>
Date: Wed, 8 Apr 2009 17:11:16 +0200
Subject: drm: Split out the mm declarations in a separate header. Add atomic
 operations.

this is a TTM preparation patch, it rearranges the mm and
add operations needed to do mm operations in atomic context.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 367c590..7819fd9 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -42,8 +42,11 @@
  */
 
 #include "drmP.h"
+#include "drm_mm.h"
 #include <linux/slab.h>
 
+#define MM_UNUSED_TARGET 4
+
 unsigned long drm_mm_tail_space(struct drm_mm *mm)
 {
 	struct list_head *tail_node;
@@ -74,16 +77,62 @@ int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size)
 	return 0;
 }
 
+static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic)
+{
+	struct drm_mm_node *child;
+
+	if (atomic)
+		child = kmalloc(sizeof(*child), GFP_ATOMIC);
+	else
+		child = kmalloc(sizeof(*child), GFP_KERNEL);
+
+	if (unlikely(child == NULL)) {
+		spin_lock(&mm->unused_lock);
+		if (list_empty(&mm->unused_nodes))
+			child = NULL;
+		else {
+			child =
+			    list_entry(mm->unused_nodes.next,
+				       struct drm_mm_node, fl_entry);
+			list_del(&child->fl_entry);
+			--mm->num_unused;
+		}
+		spin_unlock(&mm->unused_lock);
+	}
+	return child;
+}
+
+int drm_mm_pre_get(struct drm_mm *mm)
+{
+	struct drm_mm_node *node;
+
+	spin_lock(&mm->unused_lock);
+	while (mm->num_unused < MM_UNUSED_TARGET) {
+		spin_unlock(&mm->unused_lock);
+		node = kmalloc(sizeof(*node), GFP_KERNEL);
+		spin_lock(&mm->unused_lock);
+
+		if (unlikely(node == NULL)) {
+			int ret = (mm->num_unused < 2) ? -ENOMEM : 0;
+			spin_unlock(&mm->unused_lock);
+			return ret;
+		}
+		++mm->num_unused;
+		list_add_tail(&node->fl_entry, &mm->unused_nodes);
+	}
+	spin_unlock(&mm->unused_lock);
+	return 0;
+}
+EXPORT_SYMBOL(drm_mm_pre_get);
 
 static int drm_mm_create_tail_node(struct drm_mm *mm,
-			    unsigned long start,
-			    unsigned long size)
+				   unsigned long start,
+				   unsigned long size, int atomic)
 {
 	struct drm_mm_node *child;
 
-	child = (struct drm_mm_node *)
-		drm_alloc(sizeof(*child), DRM_MEM_MM);
-	if (!child)
+	child = drm_mm_kmalloc(mm, atomic);
+	if (unlikely(child == NULL))
 		return -ENOMEM;
 
 	child->free = 1;
@@ -97,8 +146,7 @@ static int drm_mm_create_tail_node(struct drm_mm *mm,
 	return 0;
 }
 
-
-int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size)
+int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size, int atomic)
 {
 	struct list_head *tail_node;
 	struct drm_mm_node *entry;
@@ -106,20 +154,21 @@ int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size)
 	tail_node = mm->ml_entry.prev;
 	entry = list_entry(tail_node, struct drm_mm_node, ml_entry);
 	if (!entry->free) {
-		return drm_mm_create_tail_node(mm, entry->start + entry->size, size);
+		return drm_mm_create_tail_node(mm, entry->start + entry->size,
+					       size, atomic);
 	}
 	entry->size += size;
 	return 0;
 }
 
 static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent,
-					    unsigned long size)
+						 unsigned long size,
+						 int atomic)
 {
 	struct drm_mm_node *child;
 
-	child = (struct drm_mm_node *)
-		drm_alloc(sizeof(*child), DRM_MEM_MM);
-	if (!child)
+	child = drm_mm_kmalloc(parent->mm, atomic);
+	if (unlikely(child == NULL))
 		return NULL;
 
 	INIT_LIST_HEAD(&child->fl_entry);
@@ -151,8 +200,9 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
 		tmp = parent->start % alignment;
 
 	if (tmp) {
-		align_splitoff = drm_mm_split_at_start(parent, alignment - tmp);
-		if (!align_splitoff)
+		align_splitoff =
+		    drm_mm_split_at_start(parent, alignment - tmp, 0);
+		if (unlikely(align_splitoff == NULL))
 			return NULL;
 	}
 
@@ -161,7 +211,7 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
 		parent->free = 0;
 		return parent;
 	} else {
-		child = drm_mm_split_at_start(parent, size);
+		child = drm_mm_split_at_start(parent, size, 0);
 	}
 
 	if (align_splitoff)
@@ -169,14 +219,49 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
 
 	return child;
 }
+
 EXPORT_SYMBOL(drm_mm_get_block);
 
+struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
+					    unsigned long size,
+					    unsigned alignment)
+{
+
+	struct drm_mm_node *align_splitoff = NULL;
+	struct drm_mm_node *child;
+	unsigned tmp = 0;
+
+	if (alignment)
+		tmp = parent->start % alignment;
+
+	if (tmp) {
+		align_splitoff =
+		    drm_mm_split_at_start(parent, alignment - tmp, 1);
+		if (unlikely(align_splitoff == NULL))
+			return NULL;
+	}
+
+	if (parent->size == size) {
+		list_del_init(&parent->fl_entry);
+		parent->free = 0;
+		return parent;
+	} else {
+		child = drm_mm_split_at_start(parent, size, 1);
+	}
+
+	if (align_splitoff)
+		drm_mm_put_block(align_splitoff);
+
+	return child;
+}
+EXPORT_SYMBOL(drm_mm_get_block_atomic);
+
 /*
  * Put a block. Merge with the previous and / or next block if they are free.
  * Otherwise add to the free stack.
  */
 
-void drm_mm_put_block(struct drm_mm_node * cur)
+void drm_mm_put_block(struct drm_mm_node *cur)
 {
 
 	struct drm_mm *mm = cur->mm;
@@ -188,21 +273,27 @@ void drm_mm_put_block(struct drm_mm_node * cur)
 	int merged = 0;
 
 	if (cur_head->prev != root_head) {
-		prev_node = list_entry(cur_head->prev, struct drm_mm_node, ml_entry);
+		prev_node =
+		    list_entry(cur_head->prev, struct drm_mm_node, ml_entry);
 		if (prev_node->free) {
 			prev_node->size += cur->size;
 			merged = 1;
 		}
 	}
 	if (cur_head->next != root_head) {
-		next_node = list_entry(cur_head->next, struct drm_mm_node, ml_entry);
+		next_node =
+		    list_entry(cur_head->next, struct drm_mm_node, ml_entry);
 		if (next_node->free) {
 			if (merged) {
 				prev_node->size += next_node->size;
 				list_del(&next_node->ml_entry);
 				list_del(&next_node->fl_entry);
-				drm_free(next_node, sizeof(*next_node),
-					 DRM_MEM_MM);
+				if (mm->num_unused < MM_UNUSED_TARGET) {
+					list_add(&next_node->fl_entry,
+						 &mm->unused_nodes);
+					++mm->num_unused;
+				} else
+					kfree(next_node);
 			} else {
 				next_node->size += cur->size;
 				next_node->start = cur->start;
@@ -215,14 +306,19 @@ void drm_mm_put_block(struct drm_mm_node * cur)
 		list_add(&cur->fl_entry, &mm->fl_entry);
 	} else {
 		list_del(&cur->ml_entry);
-		drm_free(cur, sizeof(*cur), DRM_MEM_MM);
+		if (mm->num_unused < MM_UNUSED_TARGET) {
+			list_add(&cur->fl_entry, &mm->unused_nodes);
+			++mm->num_unused;
+		} else
+			kfree(cur);
 	}
 }
+
 EXPORT_SYMBOL(drm_mm_put_block);
 
-struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,
-				  unsigned long size,
-				  unsigned alignment, int best_match)
+struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
+				       unsigned long size,
+				       unsigned alignment, int best_match)
 {
 	struct list_head *list;
 	const struct list_head *free_stack = &mm->fl_entry;
@@ -247,7 +343,6 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,
 				wasted += alignment - tmp;
 		}
 
-
 		if (entry->size >= size + wasted) {
 			if (!best_match)
 				return entry;
@@ -260,6 +355,7 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,
 
 	return best;
 }
+EXPORT_SYMBOL(drm_mm_search_free);
 
 int drm_mm_clean(struct drm_mm * mm)
 {
@@ -267,14 +363,17 @@ int drm_mm_clean(struct drm_mm * mm)
 
 	return (head->next->next == head);
 }
-EXPORT_SYMBOL(drm_mm_search_free);
+EXPORT_SYMBOL(drm_mm_clean);
 
 int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 {
 	INIT_LIST_HEAD(&mm->ml_entry);
 	INIT_LIST_HEAD(&mm->fl_entry);
+	INIT_LIST_HEAD(&mm->unused_nodes);
+	mm->num_unused = 0;
+	spin_lock_init(&mm->unused_lock);
 
-	return drm_mm_create_tail_node(mm, start, size);
+	return drm_mm_create_tail_node(mm, start, size, 0);
 }
 EXPORT_SYMBOL(drm_mm_init);
 
@@ -282,6 +381,7 @@ void drm_mm_takedown(struct drm_mm * mm)
 {
 	struct list_head *bnode = mm->fl_entry.next;
 	struct drm_mm_node *entry;
+	struct drm_mm_node *next;
 
 	entry = list_entry(bnode, struct drm_mm_node, fl_entry);
 
@@ -293,7 +393,16 @@ void drm_mm_takedown(struct drm_mm * mm)
 
 	list_del(&entry->fl_entry);
 	list_del(&entry->ml_entry);
+	kfree(entry);
+
+	spin_lock(&mm->unused_lock);
+	list_for_each_entry_safe(entry, next, &mm->unused_nodes, fl_entry) {
+		list_del(&entry->fl_entry);
+		kfree(entry);
+		--mm->num_unused;
+	}
+	spin_unlock(&mm->unused_lock);
 
-	drm_free(entry, sizeof(*entry), DRM_MEM_MM);
+	BUG_ON(mm->num_unused != 0);
 }
 EXPORT_SYMBOL(drm_mm_takedown);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 1cc51a0..d4ddc22 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -86,6 +86,7 @@ struct drm_device;
 
 #include "drm_os_linux.h"
 #include "drm_hashtab.h"
+#include "drm_mm.h"
 
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
@@ -553,26 +554,6 @@ struct drm_sigdata {
 };
 
 
-/*
- * Generic memory manager structs
- */
-
-struct drm_mm_node {
-	struct list_head fl_entry;
-	struct list_head ml_entry;
-	int free;
-	unsigned long start;
-	unsigned long size;
-	struct drm_mm *mm;
-	void *private;
-};
-
-struct drm_mm {
-	struct list_head fl_entry;
-	struct list_head ml_entry;
-};
-
-
 /**
  * Kernel side of a mapping
  */
@@ -1436,22 +1417,6 @@ extern char *drm_get_connector_status_name(enum drm_connector_status status);
 extern int drm_sysfs_connector_add(struct drm_connector *connector);
 extern void drm_sysfs_connector_remove(struct drm_connector *connector);
 
-/*
- * Basic memory manager support (drm_mm.c)
- */
-extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
-				       unsigned long size,
-				       unsigned alignment);
-extern void drm_mm_put_block(struct drm_mm_node * cur);
-extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, unsigned long size,
-					 unsigned alignment, int best_match);
-extern int drm_mm_init(struct drm_mm *mm, unsigned long start, unsigned long size);
-extern void drm_mm_takedown(struct drm_mm *mm);
-extern int drm_mm_clean(struct drm_mm *mm);
-extern unsigned long drm_mm_tail_space(struct drm_mm *mm);
-extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size);
-extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
-
 /* Graphics Execution Manager library functions (drm_gem.c) */
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
new file mode 100644
index 0000000..5662f42
--- /dev/null
+++ b/include/drm/drm_mm.h
@@ -0,0 +1,90 @@
+/**************************************************************************
+ *
+ * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX. USA.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ */
+
+#ifndef _DRM_MM_H_
+#define _DRM_MM_H_
+
+/*
+ * Generic range manager structs
+ */
+#include <linux/list.h>
+
+struct drm_mm_node {
+	struct list_head fl_entry;
+	struct list_head ml_entry;
+	int free;
+	unsigned long start;
+	unsigned long size;
+	struct drm_mm *mm;
+	void *private;
+};
+
+struct drm_mm {
+	struct list_head fl_entry;
+	struct list_head ml_entry;
+	struct list_head unused_nodes;
+	int num_unused;
+	spinlock_t unused_lock;
+};
+
+/*
+ * Basic range manager support (drm_mm.c)
+ */
+
+extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent,
+					    unsigned long size,
+					    unsigned alignment);
+extern struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
+						   unsigned long size,
+						   unsigned alignment);
+extern void drm_mm_put_block(struct drm_mm_node *cur);
+extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
+					      unsigned long size,
+					      unsigned alignment,
+					      int best_match);
+extern int drm_mm_init(struct drm_mm *mm, unsigned long start,
+		       unsigned long size);
+extern void drm_mm_takedown(struct drm_mm *mm);
+extern int drm_mm_clean(struct drm_mm *mm);
+extern unsigned long drm_mm_tail_space(struct drm_mm *mm);
+extern int drm_mm_remove_space_from_tail(struct drm_mm *mm,
+					 unsigned long size);
+extern int drm_mm_add_space_to_tail(struct drm_mm *mm,
+				    unsigned long size, int atomic);
+extern int drm_mm_pre_get(struct drm_mm *mm);
+
+static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
+{
+	return block->mm;
+}
+
+#endif
-- 
cgit v0.10.2


From f2cb5d86e1af175a9b210241800f03a447f92621 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <glisse@freedesktop.org>
Date: Wed, 8 Apr 2009 17:16:24 +0200
Subject: drm: Export hash table functionality.

add exports so TTM module can use these functions.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index af539f7..ac35145 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -62,6 +62,7 @@ int drm_ht_create(struct drm_open_hash *ht, unsigned int order)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_create);
 
 void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key)
 {
@@ -156,6 +157,7 @@ int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *it
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_just_insert_please);
 
 int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key,
 		     struct drm_hash_item **item)
@@ -169,6 +171,7 @@ int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key,
 	*item = hlist_entry(list, struct drm_hash_item, head);
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_find_item);
 
 int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key)
 {
@@ -202,3 +205,4 @@ void drm_ht_remove(struct drm_open_hash *ht)
 		ht->table = NULL;
 	}
 }
+EXPORT_SYMBOL(drm_ht_remove);
-- 
cgit v0.10.2


From 3c24475c1e4e8d10e50df161d8c4f1d382997a7c Mon Sep 17 00:00:00 2001
From: Jerome Glisse <glisse@freedesktop.org>
Date: Wed, 8 Apr 2009 18:34:28 +0200
Subject: drm: include kernel list header file in hashtab header

Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h
index cd2b189..0af087a 100644
--- a/include/drm/drm_hashtab.h
+++ b/include/drm/drm_hashtab.h
@@ -35,6 +35,8 @@
 #ifndef DRM_HASHTAB_H
 #define DRM_HASHTAB_H
 
+#include <linux/list.h>
+
 #define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member)
 
 struct drm_hash_item {
-- 
cgit v0.10.2


From 1260866a271abc84274345580f3d613d3bceff87 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 11 May 2009 13:22:00 +0100
Subject: x86: Provide _sdata in the vmlinux.lds.S file

_sdata is a common symbol defined by many architectures and made
available to the kernel via asm-generic/sections.h. Kmemleak uses this
symbol when scanning the data sections.

[ Impact: add new global symbol ]

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
LKML-Reference: <20090511122105.26556.96593.stgit@pc1117.cambridge.arm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4c85b2e..367e878 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -108,6 +108,8 @@ SECTIONS
 	/* Data */
 	. = ALIGN(PAGE_SIZE);
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		/* Start of data section */
+		_sdata = .;
 		DATA_DATA
 		CONSTRUCTORS
 
-- 
cgit v0.10.2


From 63997775b795f97ef51f3e56bc3abc9edc04bbb0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 12 Jun 2009 08:49:20 +0100
Subject: GFS2: Add tracepoints

This patch adds the ability to trace various aspects of the GFS2
filesystem. The trace points are divided into three groups,
glocks, logging and bmap. These points have been chosen because
they allow inspection of the major internal functions of GFS2
and they are also generic enough that they are unlikely to need
any major changes as the filesystem evolves.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index d53a9be..3da2f1f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,3 +1,4 @@
+EXTRA_CFLAGS := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3297635..6d47379 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -25,6 +25,7 @@
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
+#include "trace_gfs2.h"
 
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  * block is 512, so __u16 is fine for that. It saves stack space to
@@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	clear_buffer_mapped(bh_map);
 	clear_buffer_new(bh_map);
 	clear_buffer_boundary(bh_map);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 	if (gfs2_is_dir(ip)) {
 		bsize = sdp->sd_jbsize;
 		arr = sdp->sd_jheightsize;
@@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	ret = 0;
 out:
 	release_metapath(&mp);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 	bmap_unlock(ip, create);
 	return ret;
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2bf62bc..297421c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
 #include "super.h"
 #include "util.h"
 #include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
 
 struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl)
 
 	if (aspace)
 		gfs2_aspace_put(aspace);
-
+	trace_gfs2_glock_put(gl);
 	sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
 }
 
@@ -317,14 +319,17 @@ restart:
 						return 2;
 					gh->gh_error = ret;
 					list_del_init(&gh->gh_list);
+					trace_gfs2_glock_queue(gh, 0);
 					gfs2_holder_wake(gh);
 					goto restart;
 				}
 				set_bit(HIF_HOLDER, &gh->gh_iflags);
+				trace_gfs2_promote(gh, 1);
 				gfs2_holder_wake(gh);
 				goto restart;
 			}
 			set_bit(HIF_HOLDER, &gh->gh_iflags);
+			trace_gfs2_promote(gh, 0);
 			gfs2_holder_wake(gh);
 			continue;
 		}
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret)
 		else
 			continue;
 		list_del_init(&gh->gh_list);
+		trace_gfs2_glock_queue(gh, 0);
 		gfs2_holder_wake(gh);
 	}
 }
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
 	int rv;
 
 	spin_lock(&gl->gl_spin);
+	trace_gfs2_glock_state_change(gl, state);
 	state_change(gl, state);
 	gh = find_first_waiter(gl);
 
@@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
 			gl->gl_demote_state != state) {
 		gl->gl_demote_state = LM_ST_UNLOCKED;
 	}
+	trace_gfs2_demote_rq(gl);
 }
 
 /**
@@ -936,6 +944,7 @@ fail:
 			goto do_cancel;
 		return;
 	}
+	trace_gfs2_glock_queue(gh, 1);
 	list_add_tail(&gh->gh_list, insert_pt);
 do_cancel:
 	gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
+	trace_gfs2_glock_queue(gh, 0);
 	spin_unlock(&gl->gl_spin);
 	if (likely(fast_path))
 		return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f2e449c..13c6237 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
 #include "meta_io.h"
 #include "util.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 
 #define PULL 1
 
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 		gfs2_log_lock(sdp);
 	}
 	atomic_sub(blks, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, -blks);
 	gfs2_log_unlock(sdp);
 	mutex_unlock(&sdp->sd_log_reserve_mutex);
 
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 
 	gfs2_log_lock(sdp);
 	atomic_add(blks, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, blks);
 	gfs2_assert_withdraw(sdp,
 			     atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 
 	gfs2_log_lock(sdp);
 	atomic_add(dist, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, dist);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
 
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 		up_write(&sdp->sd_log_flush_lock);
 		return;
 	}
+	trace_gfs2_log_flush(sdp, 1);
 
 	ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
 		gfs2_log_lock(sdp);
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+		trace_gfs2_log_blocks(sdp, -1);
 		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
 	}
@@ -763,7 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 		ai = NULL;
 	}
 	gfs2_log_unlock(sdp);
-
+	trace_gfs2_log_flush(sdp, 0);
 	up_write(&sdp->sd_log_flush_lock);
 
 	kfree(ai);
@@ -787,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
 	unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
 	atomic_add(unused, &sdp->sd_log_blks_free);
+	trace_gfs2_log_blocks(sdp, unused);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
 			     sdp->sd_jdesc->jd_blocks);
 	sdp->sd_log_blks_reserved = reserved;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 00315f5..9969ff0 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,6 +27,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "trace_gfs2.h"
 
 /**
  * gfs2_pin - Pin a buffer in memory
@@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	if (bd->bd_ail)
 		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
 	get_bh(bh);
+	trace_gfs2_pin(bd, 1);
 }
 
 /**
@@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	bd->bd_ail = ai;
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
 	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+	trace_gfs2_pin(bd, 0);
 	gfs2_log_unlock(sdp);
 	unlock_buffer(bh);
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cc34f27..7bc3c45 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
 #include "log.h"
 #include "quota.h"
 #include "dir.h"
+#include "trace_gfs2.h"
 
 #define DO 0
 #define UNDO 1
@@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 		/* Map the extents for this journal's blocks */
 		map_journal_extents(sdp);
 	}
+	trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
 
 	if (sdp->sd_lockstruct.ls_first) {
 		unsigned int x;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index de32397..daa4ae3 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -29,6 +29,7 @@
 #include "util.h"
 #include "log.h"
 #include "inode.h"
+#include "trace_gfs2.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone -= *n;
 	spin_unlock(&sdp->sd_rindex_spin);
-
+	trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
 	*bn = block;
 	return 0;
 
@@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone--;
 	spin_unlock(&sdp->sd_rindex_spin);
-
+	trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
 	return block;
 }
 
@@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
 	if (!rgd)
 		return;
-
+	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode)
 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
 	if (!rgd)
 		return;
+	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 	gfs2_trans_add_rg(rgd);
@@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
+	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
new file mode 100644
index 0000000..98d6ef1
--- /dev/null
+++ b/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,407 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+
+#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
+#define glock_trace_name(x) __print_symbolic(x,		\
+			    dlm_state_name(IV),		\
+			    dlm_state_name(NL),		\
+			    dlm_state_name(CR),		\
+			    dlm_state_name(CW),		\
+			    dlm_state_name(PR),		\
+			    dlm_state_name(PW),		\
+			    dlm_state_name(EX))
+
+#define block_state_name(x) __print_symbolic(x,			\
+			    { GFS2_BLKST_FREE, "free" },	\
+			    { GFS2_BLKST_USED, "used" },	\
+			    { GFS2_BLKST_DINODE, "dinode" },	\
+			    { GFS2_BLKST_UNLINKED, "unlinked" })
+
+#define show_glock_flags(flags) __print_flags(flags, "",	\
+	{(1UL << GLF_LOCK),			"l" },		\
+	{(1UL << GLF_DEMOTE),			"D" },		\
+	{(1UL << GLF_PENDING_DEMOTE),		"d" },		\
+	{(1UL << GLF_DEMOTE_IN_PROGRESS),	"p" },		\
+	{(1UL << GLF_DIRTY),			"y" },		\
+	{(1UL << GLF_LFLUSH),			"f" },		\
+	{(1UL << GLF_INVALIDATE_IN_PROGRESS),	"i" },		\
+	{(1UL << GLF_REPLY_PENDING),		"r" },		\
+	{(1UL << GLF_INITIAL),			"I" },		\
+	{(1UL << GLF_FROZEN),			"F" })
+
+#ifndef NUMPTY
+#define NUMPTY
+static inline u8 glock_trace_state(unsigned int state)
+{
+	switch(state) {
+	case LM_ST_SHARED:
+		return DLM_LOCK_PR;
+	case LM_ST_DEFERRED:
+		return DLM_LOCK_CW;
+	case LM_ST_EXCLUSIVE:
+		return DLM_LOCK_EX;
+	}
+	return DLM_LOCK_NL;
+}
+#endif
+
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+
+	TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+
+	TP_ARGS(gl, new_state),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	u8,	new_state		)
+		__field(	u8,	dmt_state		)
+		__field(	u8,	tgt_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->new_state	= glock_trace_state(new_state);
+		__entry->tgt_state	= glock_trace_state(gl->gl_target);
+		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		 (unsigned long long)__entry->glnum,
+		  glock_trace_name(__entry->cur_state),
+		  glock_trace_name(__entry->new_state),
+		  glock_trace_name(__entry->tgt_state),
+		  glock_trace_name(__entry->dmt_state),
+		  show_glock_flags(__entry->flags))
+);
+
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+
+	TP_PROTO(const struct gfs2_glock *gl),
+
+	TP_ARGS(gl),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->gltype, (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+		  glock_trace_name(DLM_LOCK_IV),
+		  show_glock_flags(__entry->flags))
+
+);
+
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+
+	TP_PROTO(const struct gfs2_glock *gl),
+
+	TP_ARGS(gl),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	u8,	cur_state		)
+		__field(	u8,	dmt_state		)
+		__field(	unsigned long,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= gl->gl_sbd->sd_vfs->s_dev;
+		__entry->gltype		= gl->gl_name.ln_type;
+		__entry->glnum		= gl->gl_name.ln_number;
+		__entry->cur_state	= glock_trace_state(gl->gl_state);
+		__entry->dmt_state	= glock_trace_state(gl->gl_demote_state);
+		__entry->flags		= gl->gl_flags;
+	),
+
+	TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+                  glock_trace_name(__entry->cur_state),
+                  glock_trace_name(__entry->dmt_state),
+		  show_glock_flags(__entry->flags))
+
+);
+
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+
+	TP_PROTO(const struct gfs2_holder *gh, int first),
+
+	TP_ARGS(gh, first),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	int,	first			)
+		__field(	u8,	state			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum	= gh->gh_gl->gl_name.ln_number;
+		__entry->gltype	= gh->gh_gl->gl_name.ln_type;
+		__entry->first	= first;
+		__entry->state	= glock_trace_state(gh->gh_state);
+	),
+
+	TP_printk("%u,%u glock %u:%llu promote %s %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->first ? "first": "other",
+		  glock_trace_name(__entry->state))
+);
+
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+
+	TP_PROTO(const struct gfs2_holder *gh, int queue),
+
+	TP_ARGS(gh, queue),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	glnum			)
+		__field(	u32,	gltype			)
+		__field(	int,	queue			)
+		__field(	u8,	state			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum	= gh->gh_gl->gl_name.ln_number;
+		__entry->gltype	= gh->gh_gl->gl_name.ln_type;
+		__entry->queue	= queue;
+		__entry->state	= glock_trace_state(gh->gh_state);
+	),
+
+	TP_printk("%u,%u glock %u:%llu %squeue %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->queue ? "" : "de",
+		  glock_trace_name(__entry->state))
+);
+
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+
+	TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+
+	TP_ARGS(bd, pin),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	pin			)
+		__field(	u32,	len			)
+		__field(	sector_t,	block		)
+		__field(	u64,	ino			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->pin		= pin;
+		__entry->len		= bd->bd_bh->b_size;
+		__entry->block		= bd->bd_bh->b_blocknr;
+		__entry->ino		= bd->bd_gl->gl_name.ln_number;
+	),
+
+	TP_printk("%u,%u log %s %llu/%lu inode %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->pin ? "pin" : "unpin",
+		  (unsigned long long)__entry->block,
+		  (unsigned long)__entry->len,
+		  (unsigned long long)__entry->ino)
+);
+
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, int start),
+
+	TP_ARGS(sdp, start),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	start			)
+		__field(	u64,	log_seq			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = sdp->sd_vfs->s_dev;
+		__entry->start		= start;
+		__entry->log_seq	= sdp->sd_log_sequence;
+	),
+
+	TP_printk("%u,%u log flush %s %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->start ? "start" : "end",
+		  (unsigned long long)__entry->log_seq)
+);
+
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+
+	TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+
+	TP_ARGS(sdp, blocks),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	int,	blocks			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= sdp->sd_vfs->s_dev;
+		__entry->blocks		= blocks;
+	),
+
+	TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
+		  MINOR(__entry->dev), __entry->blocks)
+);
+
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+
+	TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+		sector_t lblock, int create, int errno),
+
+	TP_ARGS(ip, bh, lblock, create, errno),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	sector_t, lblock		)
+		__field(	sector_t, pblock		)
+		__field(	u64,	inum			)
+		__field(	unsigned long, state		)
+		__field(	u32,	len			)
+		__field(	int,	create			)
+		__field(	int,	errno			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->lblock		= lblock;
+		__entry->pblock		= buffer_mapped(bh) ?  bh->b_blocknr : 0;
+		__entry->inum		= ip->i_no_addr;
+		__entry->state		= bh->b_state;
+		__entry->len		= bh->b_size;
+		__entry->create		= create;
+		__entry->errno		= errno;
+	),
+
+	TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->lblock,
+		  (unsigned long)__entry->len,
+		  (unsigned long long)__entry->pblock,
+		  __entry->state, __entry->create ? "create " : "nocreate",
+		  __entry->errno)
+);
+
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+
+	TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+		u8 block_state),
+
+	TP_ARGS(ip, block, len, block_state),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	start			)
+		__field(	u64,	inum			)
+		__field(	u32,	len			)
+		__field(	u8,	block_state		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ip->i_gl->gl_sbd->sd_vfs->s_dev;
+		__entry->start		= block;
+		__entry->inum		= ip->i_no_addr;
+		__entry->len		= len;
+		__entry->block_state	= block_state;
+	),
+
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->start,
+		  (unsigned long)__entry->len,
+		  block_state_name(__entry->block_state))
+);
+
+#endif /* _TRACE_GFS2_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
-- 
cgit v0.10.2


From ca371c0d7e23d0d0afae65fc83a0e91cf7399573 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 12 Jun 2009 10:33:53 +0300
Subject: memcg: fix page_cgroup fatal error in FLATMEM

Now, SLAB is configured in very early stage and it can be used in
init routine now.

But replacing alloc_bootmem() in FLAT/DISCONTIGMEM's page_cgroup()
initialization breaks the allocation, now.
(Works well in SPARSEMEM case...it supports MEMORY_HOTPLUG and
 size of page_cgroup is in reasonable size (< 1 << MAX_ORDER.)

This patch revive FLATMEM+memory cgroup by using alloc_bootmem.

In future,
We stop to support FLATMEM (if no users) or rewrite codes for flatmem
completely.But this will adds more messy codes and overheads.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 7339c7b..13f126c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -18,7 +18,19 @@ struct page_cgroup {
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
-void __init page_cgroup_init(void);
+
+#ifdef CONFIG_SPARSEMEM
+static inline void __init page_cgroup_init_flatmem(void)
+{
+}
+extern void __init page_cgroup_init(void);
+#else
+void __init page_cgroup_init_flatmem(void);
+static inline void __init page_cgroup_init(void)
+{
+}
+#endif
+
 struct page_cgroup *lookup_page_cgroup(struct page *page);
 
 enum {
@@ -87,6 +99,10 @@ static inline void page_cgroup_init(void)
 {
 }
 
+static inline void __init page_cgroup_init_flatmem(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
diff --git a/init/main.c b/init/main.c
index 5616661..b3e8f14 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,6 +539,11 @@ void __init __weak thread_info_cache_init(void)
  */
 static void __init mm_init(void)
 {
+	/*
+	 * page_cgroup requires countinous pages as memmap
+	 * and it's bigger than MAX_ORDER unless SPARSEMEM.
+	 */
+	page_cgroup_init_flatmem();
 	mem_init();
 	kmem_cache_init();
 	vmalloc_init();
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3dd4a90..11a8a10 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -47,8 +47,6 @@ static int __init alloc_node_page_cgroup(int nid)
 	struct page_cgroup *base, *pc;
 	unsigned long table_size;
 	unsigned long start_pfn, nr_pages, index;
-	struct page *page;
-	unsigned int order;
 
 	start_pfn = NODE_DATA(nid)->node_start_pfn;
 	nr_pages = NODE_DATA(nid)->node_spanned_pages;
@@ -57,13 +55,11 @@ static int __init alloc_node_page_cgroup(int nid)
 		return 0;
 
 	table_size = sizeof(struct page_cgroup) * nr_pages;
-	order = get_order(table_size);
-	page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order);
-	if (!page)
-		page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order);
-	if (!page)
+
+	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!base)
 		return -ENOMEM;
-	base = page_address(page);
 	for (index = 0; index < nr_pages; index++) {
 		pc = base + index;
 		__init_page_cgroup(pc, start_pfn + index);
@@ -73,7 +69,7 @@ static int __init alloc_node_page_cgroup(int nid)
 	return 0;
 }
 
-void __init page_cgroup_init(void)
+void __init page_cgroup_init_flatmem(void)
 {
 
 	int nid, fail;
@@ -117,16 +113,11 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
 	if (!section->page_cgroup) {
 		nid = page_to_nid(pfn_to_page(pfn));
 		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-		if (slab_is_available()) {
-			base = kmalloc_node(table_size,
-					GFP_KERNEL | __GFP_NOWARN, nid);
-			if (!base)
-				base = vmalloc_node(table_size, nid);
-		} else {
-			base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
-				table_size,
-				PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-		}
+		VM_BUG_ON(!slab_is_available());
+		base = kmalloc_node(table_size,
+				GFP_KERNEL | __GFP_NOWARN, nid);
+		if (!base)
+			base = vmalloc_node(table_size, nid);
 	} else {
 		/*
  		 * We don't have to allocate page_cgroup again, but
-- 
cgit v0.10.2


From 7aa79f948749da7de3de0c427e9c9ee0ff595243 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:20 +0200
Subject: [S390] vdso: kernel parameter syntax

The syntax of the vdso kernel parameter is documented as vdso=[on|off].
The implementation uses vdso=[0|1], an invalid parameter string disables
the vdso support. Fix the mismatch by adding vdso=[on|off] as additional
parameter syntax.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 89b2e7f..7b76ee4 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -53,8 +53,19 @@ unsigned int __read_mostly vdso_enabled = 1;
 
 static int __init vdso_setup(char *s)
 {
-	vdso_enabled = simple_strtoul(s, NULL, 0);
-	return 1;
+	unsigned long val;
+	int rc;
+
+	rc = 0;
+	if (strncmp(s, "on", 3) == 0)
+		vdso_enabled = 1;
+	else if (strncmp(s, "off", 4) == 0)
+		vdso_enabled = 0;
+	else {
+		rc = strict_strtoul(s, 0, &val);
+		vdso_enabled = rc ? 0 : !!val;
+	}
+	return !rc;
 }
 __setup("vdso=", vdso_setup);
 
-- 
cgit v0.10.2


From 76d4e00a05d06c1d1552adea24fcf6182c9d8999 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:21 +0200
Subject: [S390] merge cpu.h into cputime.h

All definition in cpu.h have to do with cputime accounting. Move
them to cputime.h and remove the header file.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
deleted file mode 100644
index d60a2ee..0000000
--- a/arch/s390/include/asm/cpu.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  include/asm-s390/cpu.h
- *
- *    Copyright IBM Corp. 2007
- *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#ifndef _ASM_S390_CPU_H_
-#define _ASM_S390_CPU_H_
-
-#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/spinlock.h>
-
-struct s390_idle_data {
-	spinlock_t lock;
-	unsigned long long idle_count;
-	unsigned long long idle_enter;
-	unsigned long long idle_time;
-};
-
-DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
-
-void vtime_start_cpu(void);
-
-static inline void s390_idle_check(void)
-{
-	if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
-		vtime_start_cpu();
-}
-
-#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 941384f..ec917d4 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -9,6 +9,9 @@
 #ifndef _S390_CPUTIME_H
 #define _S390_CPUTIME_H
 
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
 #include <asm/div64.h>
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
@@ -174,8 +177,24 @@ cputime64_to_clock_t(cputime64_t cputime)
        return __div(cputime, 4096000000ULL / USER_HZ);
 }
 
+struct s390_idle_data {
+	spinlock_t lock;
+	unsigned long long idle_count;
+	unsigned long long idle_enter;
+	unsigned long long idle_time;
+};
+
+DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void vtime_start_cpu(void);
 cputime64_t s390_get_idle_time(int cpu);
 
 #define arch_idle_time(cpu) s390_get_idle_time(cpu)
 
+static inline void s390_idle_check(void)
+{
+	if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
+		vtime_start_cpu();
+}
+
 #endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 28cf196..015e27d 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -16,7 +16,7 @@
 #include <asm/lowcore.h>
 #include <asm/smp.h>
 #include <asm/etr.h>
-#include <asm/cpu.h>
+#include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
 
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index a0d2d55..6b0686d 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -13,7 +13,7 @@
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-#include <asm/cpu.h>
+#include <asm/cputime.h>
 #include <asm/lowcore.h>
 #include <asm/s390_ext.h>
 #include <asm/irq_regs.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a985a3b..0af302c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,7 +47,7 @@
 #include <asm/timer.h>
 #include <asm/lowcore.h>
 #include <asm/sclp.h>
-#include <asm/cpu.h>
+#include <asm/cputime.h>
 #include <asm/vdso.h>
 #include "entry.h"
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c87f59b..c8eb725 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,7 +23,7 @@
 #include <asm/s390_ext.h>
 #include <asm/timer.h>
 #include <asm/irq_regs.h>
-#include <asm/cpu.h>
+#include <asm/cputime.h>
 
 static ext_int_info_t ext_int_info_timer;
 
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 2aebb98..9889f18 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -28,7 +28,7 @@
 #include <asm/chpid.h>
 #include <asm/airq.h>
 #include <asm/isc.h>
-#include <asm/cpu.h>
+#include <asm/cputime.h>
 #include <asm/fcx.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
-- 
cgit v0.10.2


From ce58ae6f7f6bdd48c87472ff830a6d586ff076b2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:22 +0200
Subject: [S390] implement interrupt-enabling rwlocks

arch backend for f5f7eac41db827a47b2163330eecd7bb55ae9f12
"Allow rwlocks to re-enable interrupts".

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index f3861b0..c9af0d19 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -122,8 +122,10 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lp)
 #define __raw_write_can_lock(x) ((x)->lock == 0)
 
 extern void _raw_read_lock_wait(raw_rwlock_t *lp);
+extern void _raw_read_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags);
 extern int _raw_read_trylock_retry(raw_rwlock_t *lp);
 extern void _raw_write_lock_wait(raw_rwlock_t *lp);
+extern void _raw_write_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags);
 extern int _raw_write_trylock_retry(raw_rwlock_t *lp);
 
 static inline void __raw_read_lock(raw_rwlock_t *rw)
@@ -134,6 +136,14 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
 		_raw_read_lock_wait(rw);
 }
 
+static inline void __raw_read_lock_flags(raw_rwlock_t *rw, unsigned long flags)
+{
+	unsigned int old;
+	old = rw->lock & 0x7fffffffU;
+	if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old)
+		_raw_read_lock_wait_flags(rw, flags);
+}
+
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
 	unsigned int old, cmp;
@@ -151,6 +161,12 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
 		_raw_write_lock_wait(rw);
 }
 
+static inline void __raw_write_lock_flags(raw_rwlock_t *rw, unsigned long flags)
+{
+	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
+		_raw_write_lock_wait_flags(rw, flags);
+}
+
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
 	_raw_compare_and_swap(&rw->lock, 0x80000000, 0);
@@ -172,9 +188,6 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
 	return _raw_write_trylock_retry(rw);
 }
 
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
-
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index e41f400..f7e0d30 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -124,6 +124,27 @@ void _raw_read_lock_wait(raw_rwlock_t *rw)
 }
 EXPORT_SYMBOL(_raw_read_lock_wait);
 
+void _raw_read_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags)
+{
+	unsigned int old;
+	int count = spin_retry;
+
+	local_irq_restore(flags);
+	while (1) {
+		if (count-- <= 0) {
+			_raw_yield();
+			count = spin_retry;
+		}
+		if (!__raw_read_can_lock(rw))
+			continue;
+		old = rw->lock & 0x7fffffffU;
+		local_irq_disable();
+		if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+			return;
+	}
+}
+EXPORT_SYMBOL(_raw_read_lock_wait_flags);
+
 int _raw_read_trylock_retry(raw_rwlock_t *rw)
 {
 	unsigned int old;
@@ -157,6 +178,25 @@ void _raw_write_lock_wait(raw_rwlock_t *rw)
 }
 EXPORT_SYMBOL(_raw_write_lock_wait);
 
+void _raw_write_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags)
+{
+	int count = spin_retry;
+
+	local_irq_restore(flags);
+	while (1) {
+		if (count-- <= 0) {
+			_raw_yield();
+			count = spin_retry;
+		}
+		if (!__raw_write_can_lock(rw))
+			continue;
+		local_irq_disable();
+		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+			return;
+	}
+}
+EXPORT_SYMBOL(_raw_write_lock_wait_flags);
+
 int _raw_write_trylock_retry(raw_rwlock_t *rw)
 {
 	int count = spin_retry;
-- 
cgit v0.10.2


From 8c4caa4fbfc18aa50d9d682f502303a8e0d72726 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:23 +0200
Subject: [S390] use facility list for cpu type safety check

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 3aeca49..713fe9f 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -30,6 +30,7 @@
 #define __LC_SUBCHANNEL_NR		0x00ba
 #define __LC_IO_INT_PARM		0x00bc
 #define __LC_IO_INT_WORD		0x00c0
+#define __LC_STFL_FAC_LIST		0x00c8
 #define __LC_MCCK_CODE			0x00e8
 
 #define __LC_DUMP_REIPL			0x0e00
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 22596d7..bf8cf1c 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -479,27 +479,46 @@ startup:basr	%r13,0			# get base
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
 	mvc	__LC_EXIT_TIMER(8),5f-.LPG0(%r13)
 #ifndef CONFIG_MARCH_G5
-	# check processor version against MARCH_{G5,Z900,Z990,Z9_109,Z10}
-	stidp	__LC_CPUID		# store cpuid
-	lhi	%r0,(3f-2f) / 2
-	la	%r1,2f-.LPG0(%r13)
-0:	clc	__LC_CPUID+4(2),0(%r1)
-	jne	3f
-	lpsw	1f-.LPG0(13)		# machine type not good enough, crash
+	# check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
+	xc	__LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
+	stfl	__LC_STFL_FAC_LIST	# store facility list
+	tm	__LC_STFL_FAC_LIST,0x01	# stfle available ?
+	jz	0f
+	la	%r0,0
+	.insn	s,0xb2b00000,__LC_STFL_FAC_LIST	# store facility list extended
+0:	l	%r0,__LC_STFL_FAC_LIST
+	n	%r0,2f+8-.LPG0(%r13)
+	cl	%r0,2f+8-.LPG0(%r13)
+	jne	1f
+	l	%r0,__LC_STFL_FAC_LIST+4
+	n	%r0,2f+12-.LPG0(%r13)
+	cl	%r0,2f+12-.LPG0(%r13)
+	je	3f
+1:	lpsw	2f-.LPG0(13)		# machine type not good enough, crash
 	.align 16
-1:	.long	0x000a0000,0x00000000
-2:
+2:	.long	0x000a0000,0x8badcccc
+#if defined(CONFIG_64BIT)
+#if defined(CONFIG_MARCH_Z10)
+	.long 0xc100efe3, 0xf0680000
+#elif defined(CONFIG_MARCH_Z9_109)
+	.long 0xc100efc3, 0x00000000
+#elif defined(CONFIG_MARCH_Z990)
+	.long 0xc0002000, 0x00000000
+#elif defined(CONFIG_MARCH_Z900)
+	.long 0xc0000000, 0x00000000
+#endif
+#else
 #if defined(CONFIG_MARCH_Z10)
-	.short 0x9672, 0x2064, 0x2066, 0x2084, 0x2086, 0x2094, 0x2096
+	.long 0x8100c880, 0x00000000
 #elif defined(CONFIG_MARCH_Z9_109)
-	.short 0x9672, 0x2064, 0x2066, 0x2084, 0x2086
+	.long 0x8100c880, 0x00000000
 #elif defined(CONFIG_MARCH_Z990)
-	.short 0x9672, 0x2064, 0x2066
+	.long 0x80002000, 0x00000000
 #elif defined(CONFIG_MARCH_Z900)
-	.short 0x9672
+	.long 0x80000000, 0x00000000
+#endif
 #endif
-3:	la	%r1,2(%r1)
-	brct	%r0,0b
+3:
 #endif
 
 	l	%r13,4f-.LPG0(%r13)
-- 
cgit v0.10.2


From d90cbd469c9c7c1494fc2084af9319e6a557368b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:24 +0200
Subject: [S390] add mini sclp driver

This adds a mini sclp device driver for very early use. The primary
and probably only use will be to emit a message to the console if the
cpu doesn't provide the minimum required capabilities to run the kernel.
After printing the message a disabled wait will be loaded and the
machine stops operating.
Printing the message is also part of this patch.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 228e310..0657de7 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -22,7 +22,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o setup.o \
 	    processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
 	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
-	    vdso.o vtime.o sysinfo.o nmi.o
+	    vdso.o vtime.o sysinfo.o nmi.o sclp.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index bf8cf1c..6d22741 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -1,7 +1,5 @@
 /*
- *  arch/s390/kernel/head.S
- *
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright IBM Corp. 1999,2009
  *
  *    Author(s): Hartmut Penner <hp@de.ibm.com>
  *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
@@ -494,7 +492,19 @@ startup:basr	%r13,0			# get base
 	n	%r0,2f+12-.LPG0(%r13)
 	cl	%r0,2f+12-.LPG0(%r13)
 	je	3f
-1:	lpsw	2f-.LPG0(13)		# machine type not good enough, crash
+1:	l	%r15,.Lstack-.LPG0(%r13)
+	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
+	ahi	%r15,-96
+	la	%r2,.Lals_string-.LPG0(%r13)
+	l	%r3,.Lsclp_print-.LPG0(%r13)
+	basr	%r14,%r3
+	lpsw	2f-.LPG0(%r13)		# machine type not good enough, crash
+.Lals_string:
+	.asciz	"The Linux kernel requires more recent processor hardware"
+.Lsclp_print:
+	.long	_sclp_print_early
+.Lstack:
+	.long	init_thread_union
 	.align 16
 2:	.long	0x000a0000,0x8badcccc
 #if defined(CONFIG_64BIT)
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
new file mode 100644
index 0000000..20639df
--- /dev/null
+++ b/arch/s390/kernel/sclp.S
@@ -0,0 +1,327 @@
+/*
+ * Mini SCLP driver.
+ *
+ * Copyright IBM Corp. 2004,2009
+ *
+ *   Author(s):	Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
+ *		Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+LC_EXT_NEW_PSW		= 0x58			# addr of ext int handler
+LC_EXT_INT_PARAM	= 0x80			# addr of ext int parameter
+LC_EXT_INT_CODE		= 0x86			# addr of ext int code
+
+#
+# Subroutine which waits synchronously until either an external interruption
+# or a timeout occurs.
+#
+# Parameters:
+#   R2	= 0 for no timeout, non-zero for timeout in (approximated) seconds
+#
+# Returns:
+#   R2	= 0 on interrupt, 2 on timeout
+#   R3	= external interruption parameter if R2=0
+#
+
+.section ".init.text","ax"
+
+_sclp_wait_int:
+	stm	%r6,%r15,24(%r15)		# save registers
+	basr	%r13,0				# get base register
+.LbaseS1:
+	ahi	%r15,-96			# create stack frame
+	la	%r8,LC_EXT_NEW_PSW		# register int handler
+	mvc	.LoldpswS1-.LbaseS1(8,%r13),0(%r8)
+	mvc	0(8,%r8),.LextpswS1-.LbaseS1(%r13)
+	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
+	ltr	%r2,%r2
+	jz	.LsetctS1
+	ahi	%r6,0x0800			# cr mask for clock int (cr0.52)
+	stck	.LtimeS1-.LbaseS1(%r13)		# initiate timeout
+	al	%r2,.LtimeS1-.LbaseS1(%r13)
+	st	%r2,.LtimeS1-.LbaseS1(%r13)
+	sckc	.LtimeS1-.LbaseS1(%r13)
+
+.LsetctS1:
+	stctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# enable required interrupts
+	l	%r0,.LctlS1-.LbaseS1(%r13)
+	lhi	%r1,~(0x200 | 0x800)		# clear old values
+	nr	%r1,%r0
+	or	%r1,%r6				# set new value
+	st	%r1,.LctlS1-.LbaseS1(%r13)
+	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)
+	st	%r0,.LctlS1-.LbaseS1(%r13)
+	lhi	%r2,2				# return code for timeout
+.LloopS1:
+	lpsw	.LwaitpswS1-.LbaseS1(%r13)	# wait until interrupt
+.LwaitS1:
+	lh	%r7,LC_EXT_INT_CODE
+	chi	%r7,0x1004			# timeout?
+	je	.LtimeoutS1
+	chi	%r7,0x2401			# service int?
+	jne	.LloopS1
+	sr	%r2,%r2
+	l	%r3,LC_EXT_INT_PARAM
+.LtimeoutS1:
+	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# restore interrupt setting
+	# restore old handler
+	mvc	0(8,%r8),.LoldpswS1-.LbaseS1(%r13)
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14				# return to caller
+
+	.align	8
+.LoldpswS1:
+	.long	0, 0				# old ext int PSW
+.LextpswS1:
+	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
+.LwaitpswS1:
+	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
+.LtimeS1:
+	.quad	0				# current time
+.LctlS1:
+	.long	0				# CT0 contents
+
+#
+# Subroutine to synchronously issue a service call.
+#
+# Parameters:
+#   R2	= command word
+#   R3	= sccb address
+#
+# Returns:
+#   R2	= 0 on success, 1 on failure
+#   R3	= sccb response code if R2 = 0
+#
+
+_sclp_servc:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	lr	%r6,%r2				# save command word
+	lr	%r7,%r3				# save sccb address
+.LretryS2:
+	lhi	%r2,1				# error return code
+	.insn	rre,0xb2200000,%r6,%r7		# servc
+	brc	1,.LendS2			# exit if not operational
+	brc	8,.LnotbusyS2			# go on if not busy
+	sr	%r2,%r2				# wait until no longer busy
+	bras	%r14,_sclp_wait_int
+	j	.LretryS2			# retry
+.LnotbusyS2:
+	sr	%r2,%r2				# wait until result
+	bras	%r14,_sclp_wait_int
+	sr	%r2,%r2
+	lh	%r3,6(%r7)
+.LendS2:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+
+#
+# Subroutine to set up the SCLP interface.
+#
+# Parameters:
+#   R2	= 0 to activate, non-zero to deactivate
+#
+# Returns:
+#   R2	= 0 on success, non-zero on failure
+#
+
+_sclp_setup:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	basr	%r13,0				# get base register
+.LbaseS3:
+	l	%r6,.LsccbS0-.LbaseS3(%r13)	# prepare init mask sccb
+	mvc	0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
+	ltr	%r2,%r2				# initialization?
+	jz	.LdoinitS3			# go ahead
+	# clear masks
+	xc	.LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
+.LdoinitS3:
+	l	%r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
+	lr	%r3,%r6				# get sccb address
+	bras	%r14,_sclp_servc		# issue service call
+	ltr	%r2,%r2				# servc successful?
+	jnz	.LerrorS3
+	chi	%r3,0x20			# write mask successful?
+	jne	.LerrorS3
+	# check masks
+	la	%r2,.LinitmaskS3-.LinitsccbS3(%r6)
+	l	%r1,0(%r2)			# receive mask ok?
+	n	%r1,12(%r2)
+	cl	%r1,0(%r2)
+	jne	.LerrorS3
+	l	%r1,4(%r2)			# send mask ok?
+	n	%r1,8(%r2)
+	cl	%r1,4(%r2)
+	sr	%r2,%r2
+	je	.LendS3
+.LerrorS3:
+	lhi	%r2,1				# error return code
+.LendS3:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+.LwritemaskS3:
+	.long	0x00780005			# SCLP command for write mask
+.LinitsccbS3:
+	.word	.LinitendS3-.LinitsccbS3
+	.byte	0,0,0,0
+	.word	0
+	.word	0
+	.word	4
+.LinitmaskS3:
+	.long	0x80000000
+	.long	0x40000000
+	.long	0
+	.long	0
+.LinitendS3:
+
+#
+# Subroutine which prints a given text to the SCLP console.
+#
+# Parameters:
+#   R2	= address of nil-terminated ASCII text
+#
+# Returns:
+#   R2	= 0 on success, 1 on failure
+#
+
+_sclp_print:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	basr	%r13,0				# get base register
+.LbaseS4:
+	l	%r8,.LsccbS0-.LbaseS4(%r13)	# prepare write data sccb
+	mvc	0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
+	la	%r7,.LmtoS4-.LwritesccbS4(%r8)	# current mto addr
+	sr	%r0,%r0
+	l	%r10,.Lascebc-.LbaseS4(%r13)	# address of translation table
+.LinitmtoS4:
+	# initialize mto
+	mvc	0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
+	lhi	%r6,.LmtoendS4-.LmtoS4		# current mto length
+.LloopS4:
+	ic	%r0,0(%r2)			# get character
+	ahi	%r2,1
+	ltr	%r0,%r0				# end of string?
+	jz	.LfinalizemtoS4
+	chi	%r0,0x15			# end of line (NL)?
+	jz	.LfinalizemtoS4
+	stc	%r0,0(%r6,%r7)			# copy to mto
+	la	%r11,0(%r6,%r7)
+	tr	0(1,%r11),0(%r10)		# translate to EBCDIC
+	ahi	%r6,1
+	j	.LloopS4
+.LfinalizemtoS4:
+	sth	%r6,0(%r7)			# update mto length
+	lh	%r9,.LmdbS4-.LwritesccbS4(%r8)	# update mdb length
+	ar	%r9,%r6
+	sth	%r9,.LmdbS4-.LwritesccbS4(%r8)
+	lh	%r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
+	ar	%r9,%r6
+	sth	%r9,.LevbufS4-.LwritesccbS4(%r8)
+	lh	%r9,0(%r8)			# update sccb length
+	ar	%r9,%r6
+	sth	%r9,0(%r8)
+	ar	%r7,%r6				# update current mto adress
+	ltr	%r0,%r0				# more characters?
+	jnz	.LinitmtoS4
+	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
+	lr	%r3,%r8
+	bras	%r14,_sclp_servc
+	ltr	%r2,%r2				# servc successful?
+	jnz	.LendS4
+	chi	%r3,0x20			# write data successful?
+	je	.LendS4
+	lhi	%r2,1				# error return code
+.LendS4:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+
+#
+# Function which prints a given text to the SCLP console.
+#
+# Parameters:
+#   R2	= address of nil-terminated ASCII text
+#
+# Returns:
+#   R2	= 0 on success, 1 on failure
+#
+
+	.globl _sclp_print_early
+_sclp_print_early:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	lr	%r10,%r2			# save string pointer
+	lhi	%r2,0
+	bras	%r14,_sclp_setup		# enable console
+	ltr	%r2,%r2
+	jnz	.LendS5
+	lr	%r2,%r10
+	bras	%r14,_sclp_print		# print string
+	ltr	%r2,%r2
+	jnz	.LendS5
+	lhi	%r2,1
+	bras	%r14,_sclp_setup		# disable console
+.LendS5:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+
+.LwritedataS4:
+	.long	0x00760005			# SCLP command for write data
+.LwritesccbS4:
+	# sccb
+	.word	.LmtoS4-.LwritesccbS4
+	.byte	0
+	.byte	0,0,0
+	.word	0
+
+	# evbuf
+.LevbufS4:
+	.word	.LmtoS4-.LevbufS4
+	.byte	0x02
+	.byte	0
+	.word	0
+
+.LmdbS4:
+	# mdb
+	.word	.LmtoS4-.LmdbS4
+	.word	1
+	.long	0xd4c4c240
+	.long	1
+
+	# go
+.LgoS4:
+	.word	.LmtoS4-.LgoS4
+	.word	1
+	.long	0
+	.byte	0,0,0,0,0,0,0,0
+	.byte	0,0,0
+	.byte	0
+	.byte	0,0,0,0,0,0,0
+	.byte	0
+	.word	0
+	.byte	0,0,0,0,0,0,0,0,0,0
+	.byte	0,0,0,0,0,0,0,0
+	.byte	0,0,0,0,0,0,0,0
+
+.LmtoS4:
+	.word	.LmtoendS4-.LmtoS4
+	.word	4
+	.word	0x1000
+	.byte	0
+	.byte	0,0,0
+.LmtoendS4:
+
+	# Global constants
+.LsccbS0:
+	.long	_sclp_work_area
+.Lascebc:
+	.long	_ascebc
+.previous
+
+.section ".init.data","a"
+	.balign 4096
+_sclp_work_area:
+	.fill	4096
+.previous
-- 
cgit v0.10.2


From 7757591ab4a36314a258e181dbf0994415c288c2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:25 +0200
Subject: [S390] implement is_compat_task

Implement is_compat_task and use it all over the place.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index de065b3..01a0802 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -5,6 +5,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/thread_info.h>
 
 #define PSW32_MASK_PER		0x40000000UL
 #define PSW32_MASK_DAT		0x04000000UL
@@ -163,12 +164,28 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
+#ifdef CONFIG_COMPAT
+
+static inline int is_compat_task(void)
+{
+	return test_thread_flag(TIF_31BIT);
+}
+
+#else
+
+static inline int is_compat_task(void)
+{
+	return 0;
+}
+
+#endif
+
 static inline void __user *compat_alloc_user_space(long len)
 {
 	unsigned long stack;
 
 	stack = KSTK_ESP(current);
-	if (test_thread_flag(TIF_31BIT))
+	if (is_compat_task())
 		stack &= 0x7fffffffUL;
 	return (void __user *) (stack - len);
 }
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index a3acd8e..355f7a3 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -32,6 +32,7 @@
 #include <linux/elfcore.h>
 #include <linux/kernel_stat.h>
 #include <linux/syscalls.h>
+#include <asm/compat.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -204,7 +205,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	save_fp_regs(&p->thread.fp_regs);
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
-		if (test_thread_flag(TIF_31BIT)) {
+		if (is_compat_task()) {
 			p->thread.acrs[0] = (unsigned int) regs->gprs[6];
 		} else {
 			p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 75c496f..99eef17 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -36,7 +36,7 @@
 #include <linux/elf.h>
 #include <linux/regset.h>
 #include <linux/tracehook.h>
-
+#include <linux/compat.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -69,7 +69,7 @@ FixPerRegisters(struct task_struct *task)
 	if (per_info->single_step) {
 		per_info->control_regs.bits.starting_addr = 0;
 #ifdef CONFIG_COMPAT
-		if (test_thread_flag(TIF_31BIT))
+		if (is_compat_task())
 			per_info->control_regs.bits.ending_addr = 0x7fffffffUL;
 		else
 #endif
@@ -482,8 +482,7 @@ static int peek_user_compat(struct task_struct *child,
 {
 	__u32 tmp;
 
-	if (!test_thread_flag(TIF_31BIT) ||
-	    (addr & 3) || addr > sizeof(struct user) - 3)
+	if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
 		return -EIO;
 
 	tmp = __peek_user_compat(child, addr);
@@ -584,8 +583,7 @@ static int __poke_user_compat(struct task_struct *child,
 static int poke_user_compat(struct task_struct *child,
 			    addr_t addr, addr_t data)
 {
-	if (!test_thread_flag(TIF_31BIT) ||
-	    (addr & 3) || addr > sizeof(struct user32) - 3)
+	if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user32) - 3)
 		return -EIO;
 
 	return __poke_user_compat(child, addr, data);
@@ -660,7 +658,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	if (unlikely(current->audit_context))
-		audit_syscall_entry(test_thread_flag(TIF_31BIT) ?
+		audit_syscall_entry(is_compat_task() ?
 					AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
 				    regs->gprs[2], regs->orig_gpr2,
 				    regs->gprs[3], regs->gprs[4],
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 3cf74c3..062bd64 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -26,6 +26,7 @@
 #include <linux/binfmts.h>
 #include <linux/tracehook.h>
 #include <linux/syscalls.h>
+#include <linux/compat.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/lowcore.h>
@@ -482,7 +483,7 @@ void do_signal(struct pt_regs *regs)
 		/* Whee!  Actually deliver the signal.  */
 		int ret;
 #ifdef CONFIG_COMPAT
-		if (test_thread_flag(TIF_31BIT)) {
+		if (is_compat_task()) {
 			ret = handle_signal32(signr, &ka, &info, oldset, regs);
 	        }
 		else
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 7b76ee4..45e1708 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -22,7 +22,7 @@
 #include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
-
+#include <linux/compat.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
 #include <asm/processor.h>
@@ -214,7 +214,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	vdso_pagelist = vdso64_pagelist;
 	vdso_pages = vdso64_pages;
 #ifdef CONFIG_COMPAT
-	if (test_thread_flag(TIF_31BIT)) {
+	if (is_compat_task()) {
 		vdso_pagelist = vdso32_pagelist;
 		vdso_pages = vdso32_pages;
 	}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 833e836..220a152 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -19,6 +19,7 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/compat.h>
 #include <linux/smp.h>
 #include <linux/kdebug.h>
 #include <linux/smp_lock.h>
@@ -239,7 +240,7 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
 	up_read(&mm->mmap_sem);
 	clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
 #ifdef CONFIG_COMPAT
-	compat = test_tsk_thread_flag(current, TIF_31BIT);
+	compat = is_compat_task();
 	if (compat && instruction == 0x0a77)
 		sys32_sigreturn();
 	else if (compat && instruction == 0x0aad)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index e008d23..f4558cc 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -28,6 +28,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <asm/pgalloc.h>
+#include <asm/compat.h>
 
 /*
  * Top of mmap area (just below the process stack).
@@ -55,7 +56,7 @@ static inline int mmap_is_legacy(void)
 	/*
 	 * Force standard allocation for 64 bit programs.
 	 */
-	if (!test_thread_flag(TIF_31BIT))
+	if (!is_compat_task())
 		return 1;
 #endif
 	return sysctl_legacy_va_layout ||
@@ -91,7 +92,7 @@ EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
 
 int s390_mmap_check(unsigned long addr, unsigned long len)
 {
-	if (!test_thread_flag(TIF_31BIT) &&
+	if (!is_compat_task() &&
 	    len >= TASK_SIZE && TASK_SIZE < (1UL << 53))
 		return crst_table_upgrade(current->mm, 1UL << 53);
 	return 0;
@@ -108,8 +109,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
 	area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM &&
-	    !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
 		/* Upgrade the page table to 4 levels and retry. */
 		rc = crst_table_upgrade(mm, 1UL << 53);
 		if (rc)
@@ -131,8 +131,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
 	area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM &&
-	    !test_thread_flag(TIF_31BIT) && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
 		/* Upgrade the page table to 4 levels and retry. */
 		rc = crst_table_upgrade(mm, 1UL << 53);
 		if (rc)
-- 
cgit v0.10.2


From bcf5cef7db869dd3b0ec55ad99641e66b2f5cf02 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:26 +0200
Subject: [S390] secure computing arch backend

Enable secure computing on s390 as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2eca5fe..1094787 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -567,6 +567,24 @@ bool "s390 guest support for KVM (EXPERIMENTAL)"
 	  the KVM hypervisor. This will add detection for KVM as well  as a
 	  virtio transport. If KVM is detected, the virtio console will be
 	  the default console.
+
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	depends on PROC_FS
+	default y
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+	  If unsure, say Y.
+
 endmenu
 
 source "net/Kconfig"
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
new file mode 100644
index 0000000..781a9cf
--- /dev/null
+++ b/arch/s390/include/asm/seccomp.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_S390_SECCOMP_H
+#define _ASM_S390_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_read	__NR_read
+#define __NR_seccomp_write	__NR_write
+#define __NR_seccomp_exit	__NR_exit
+#define __NR_seccomp_sigreturn	__NR_sigreturn
+
+#define __NR_seccomp_read_32	__NR_read
+#define __NR_seccomp_write_32	__NR_write
+#define __NR_seccomp_exit_32	__NR_exit
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#endif	/* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 461f2ab..2f86653 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -83,14 +83,15 @@ static inline struct thread_info *current_thread_info(void)
 /*
  * thread information flags bit numbers
  */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_RESTART_SVC		4	/* restart svc with new svc number */
-#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
 #define TIF_SINGLE_STEP		6	/* deliver sigtrap on return to user */
 #define TIF_MCCK_PENDING	7	/* machine check handling is pending */
+#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	9	/* syscall auditing active */
+#define TIF_SECCOMP		10	/* secure computing */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling 
 					   TIF_NEED_RESCHED */
@@ -99,15 +100,16 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
 #define TIF_FREEZE		21	/* thread is freezing for suspend */
 
-#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_RESTART_SVC	(1<<TIF_RESTART_SVC)
-#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
 #define _TIF_MCCK_PENDING	(1<<TIF_MCCK_PENDING)
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index f3e2759..db25cdc 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -53,6 +53,7 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
+_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | _TIF_SECCOMP>>8)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
@@ -265,7 +266,7 @@ sysc_do_restart:
 	sth	%r7,SP_SVCNR(%r15)
 	sll	%r7,2		  # svc number *4
 	l	%r8,BASED(.Lsysc_table)
-	tm	__TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
+	tm	__TI_flags+2(%r9),_TIF_SYSCALL
 	l	%r8,0(%r7,%r8)	  # get system call addr.
 	bnz	BASED(sysc_tracesys)
 	basr	%r14,%r8	  # call sys_xxxx
@@ -405,7 +406,7 @@ sysc_tracego:
 	basr	%r14,%r8		# call sys_xxx
 	st	%r2,SP_R2(%r15)		# store return value
 sysc_tracenogo:
-	tm	__TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
+	tm	__TI_flags+2(%r9),_TIF_SYSCALL
 	bz	BASED(sysc_return)
 	l	%r1,BASED(.Ltrace_exit)
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 84a1058..3cec9b5 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -56,6 +56,7 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
+_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | _TIF_SECCOMP>>8)
 
 #define BASED(name) name-system_call(%r13)
 
@@ -260,7 +261,7 @@ sysc_do_restart:
 	larl	%r10,sys_call_table_emu  # use 31 bit emulation system calls
 sysc_noemu:
 #endif
-	tm	__TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
+	tm	__TI_flags+6(%r9),_TIF_SYSCALL
 	lgf	%r8,0(%r7,%r10) # load address of system call routine
 	jnz	sysc_tracesys
 	basr	%r14,%r8	# call sys_xxxx
@@ -391,7 +392,7 @@ sysc_tracego:
 	basr	%r14,%r8		# call sys_xxx
 	stg	%r2,SP_R2(%r15)		# store return value
 sysc_tracenogo:
-	tm	__TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
+	tm	__TI_flags+6(%r9),_TIF_SYSCALL
 	jz	sysc_return
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	larl	%r14,sysc_return	# return point is sysc_return
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 99eef17..b6fc1ae 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -36,7 +36,8 @@
 #include <linux/elf.h>
 #include <linux/regset.h>
 #include <linux/tracehook.h>
-#include <linux/compat.h>
+#include <linux/seccomp.h>
+#include <asm/compat.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -640,6 +641,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret;
 
+	/* Do the secure computing check first. */
+	secure_computing(regs->gprs[2]);
+
 	/*
 	 * The sysc_tracesys code in entry.S stored the system
 	 * call number to gprs[2].
-- 
cgit v0.10.2


From e45efa99b0b0035a2afc192c242e37eec5477497 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2009 10:26:27 +0200
Subject: [S390] cio: fix sanity checks in device_ops.

Some sanity checks in device_ops.c test the output of container_of
macros to be !NULL. Test the input parameters instead.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 151754d..bf0a24a 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -114,7 +114,7 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
 	struct subchannel *sch;
 	int ret;
 
-	if (!cdev)
+	if (!cdev || !cdev->dev.parent)
 		return -ENODEV;
 	if (cdev->private->state == DEV_STATE_NOT_OPER)
 		return -ENODEV;
@@ -122,8 +122,6 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
 	    cdev->private->state != DEV_STATE_W4SENSE)
 		return -EINVAL;
 	sch = to_subchannel(cdev->dev.parent);
-	if (!sch)
-		return -ENODEV;
 	ret = cio_clear(sch);
 	if (ret == 0)
 		cdev->private->intparm = intparm;
@@ -161,11 +159,9 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 	struct subchannel *sch;
 	int ret;
 
-	if (!cdev)
+	if (!cdev || !cdev->dev.parent)
 		return -ENODEV;
 	sch = to_subchannel(cdev->dev.parent);
-	if (!sch)
-		return -ENODEV;
 	if (cdev->private->state == DEV_STATE_NOT_OPER)
 		return -ENODEV;
 	if (cdev->private->state == DEV_STATE_VERIFY ||
@@ -339,7 +335,7 @@ int ccw_device_halt(struct ccw_device *cdev, unsigned long intparm)
 	struct subchannel *sch;
 	int ret;
 
-	if (!cdev)
+	if (!cdev || !cdev->dev.parent)
 		return -ENODEV;
 	if (cdev->private->state == DEV_STATE_NOT_OPER)
 		return -ENODEV;
@@ -347,8 +343,6 @@ int ccw_device_halt(struct ccw_device *cdev, unsigned long intparm)
 	    cdev->private->state != DEV_STATE_W4SENSE)
 		return -EINVAL;
 	sch = to_subchannel(cdev->dev.parent);
-	if (!sch)
-		return -ENODEV;
 	ret = cio_halt(sch);
 	if (ret == 0)
 		cdev->private->intparm = intparm;
@@ -372,11 +366,9 @@ int ccw_device_resume(struct ccw_device *cdev)
 {
 	struct subchannel *sch;
 
-	if (!cdev)
+	if (!cdev || !cdev->dev.parent)
 		return -ENODEV;
 	sch = to_subchannel(cdev->dev.parent);
-	if (!sch)
-		return -ENODEV;
 	if (cdev->private->state == DEV_STATE_NOT_OPER)
 		return -ENODEV;
 	if (cdev->private->state != DEV_STATE_ONLINE ||
@@ -471,11 +463,11 @@ __u8 ccw_device_get_path_mask(struct ccw_device *cdev)
 {
 	struct subchannel *sch;
 
-	sch = to_subchannel(cdev->dev.parent);
-	if (!sch)
+	if (!cdev->dev.parent)
 		return 0;
-	else
-		return sch->lpm;
+
+	sch = to_subchannel(cdev->dev.parent);
+	return sch->lpm;
 }
 
 /*
-- 
cgit v0.10.2


From 4c57542320e73b9ff46b04092273dbcc184a4fb6 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2009 10:26:28 +0200
Subject: [S390] qdio: simplify error handling in irq handler

The check for the device status in qdio_establish_handle_irq()
had dead code. Remove the unused code and simplify the error
handling.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index accd957..ba4facc 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -881,42 +881,26 @@ no_handler:
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
 }
 
-static int qdio_establish_check_errors(struct ccw_device *cdev, int cstat,
-				       int dstat)
+static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat,
+				      int dstat)
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
 
-	if (cstat || (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END))) {
-		DBF_ERROR("EQ:ck con");
-		goto error;
-	}
+	DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq");
 
-	if (!(dstat & DEV_STAT_DEV_END)) {
-		DBF_ERROR("EQ:no dev");
+	if (cstat)
 		goto error;
-	}
-
-	if (dstat & ~(DEV_STAT_CHN_END | DEV_STAT_DEV_END)) {
-		DBF_ERROR("EQ: bad io");
+	if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END))
 		goto error;
-	}
-	return 0;
+	if (!(dstat & DEV_STAT_DEV_END))
+		goto error;
+	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
+	return;
+
 error:
 	DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no);
 	DBF_ERROR("ds: %2x cs:%2x", dstat, cstat);
-
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-	return 1;
-}
-
-static void qdio_establish_handle_irq(struct ccw_device *cdev, int cstat,
-				      int dstat)
-{
-	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
-
-	DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq");
-	if (!qdio_establish_check_errors(cdev, cstat, dstat))
-		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
 }
 
 /* qdio interrupt handler */
@@ -946,7 +930,6 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 		}
 	}
 	qdio_irq_check_sense(irq_ptr, irb);
-
 	cstat = irb->scsw.cmd.cstat;
 	dstat = irb->scsw.cmd.dstat;
 
@@ -954,22 +937,19 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 	case QDIO_IRQ_STATE_INACTIVE:
 		qdio_establish_handle_irq(cdev, cstat, dstat);
 		break;
-
 	case QDIO_IRQ_STATE_CLEANUP:
 		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
 		break;
-
 	case QDIO_IRQ_STATE_ESTABLISHED:
 	case QDIO_IRQ_STATE_ACTIVE:
 		if (cstat & SCHN_STAT_PCI) {
 			qdio_int_handler_pci(irq_ptr);
 			return;
 		}
-		if ((cstat & ~SCHN_STAT_PCI) || dstat) {
+		if (cstat || dstat)
 			qdio_handle_activate_check(cdev, intparm, cstat,
 						   dstat);
-			break;
-		}
+		break;
 	default:
 		WARN_ON(1);
 	}
-- 
cgit v0.10.2


From a7c65a559ac371a08e67600ae585052441d71392 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2009 10:26:29 +0200
Subject: [S390] qdio: inline qdio_perf_stat_inc

Move qdio_perf_stat_inc to the header file so it can be inlined.
Remove unused qdio_perf_stat_dec.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c
index 136d0f0..eff9439 100644
--- a/drivers/s390/cio/qdio_perf.c
+++ b/drivers/s390/cio/qdio_perf.c
@@ -25,18 +25,6 @@ struct qdio_perf_stats perf_stats;
 static struct proc_dir_entry *qdio_perf_pde;
 #endif
 
-inline void qdio_perf_stat_inc(atomic_long_t *count)
-{
-	if (qdio_performance_stats)
-		atomic_long_inc(count);
-}
-
-inline void qdio_perf_stat_dec(atomic_long_t *count)
-{
-	if (qdio_performance_stats)
-		atomic_long_dec(count);
-}
-
 /*
  * procfs functions
  */
diff --git a/drivers/s390/cio/qdio_perf.h b/drivers/s390/cio/qdio_perf.h
index 7821ac4..ff4504c 100644
--- a/drivers/s390/cio/qdio_perf.h
+++ b/drivers/s390/cio/qdio_perf.h
@@ -9,7 +9,6 @@
 #define QDIO_PERF_H
 
 #include <linux/types.h>
-#include <linux/device.h>
 #include <asm/atomic.h>
 
 struct qdio_perf_stats {
@@ -50,10 +49,13 @@ struct qdio_perf_stats {
 extern struct qdio_perf_stats perf_stats;
 extern int qdio_performance_stats;
 
+static inline void qdio_perf_stat_inc(atomic_long_t *count)
+{
+	if (qdio_performance_stats)
+		atomic_long_inc(count);
+}
+
 int qdio_setup_perf_stats(void);
 void qdio_remove_perf_stats(void);
 
-extern void qdio_perf_stat_inc(atomic_long_t *count);
-extern void qdio_perf_stat_dec(atomic_long_t *count);
-
 #endif
-- 
cgit v0.10.2


From fcf7581f7ca82e63e4e137be77c342a4e4ec8401 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:30 +0200
Subject: [S390] 3270: do not register with tty_register_device

The tty3270_notifier that calls tty_register_device / tty_unregister_device
is harmful in two ways:
1) the device node that is create is wrong because the minor numbers for
   3270 tty start with 1 and tty_notifier passes the minor as index.
2) If 1) is corrected you'll get a warning:
     WARNING: at fs/sysfs/dir.c:462 sysfs_add_one+0x4c/0x60()
     sysfs: duplicate filename '227:1' can not be created
   The 227:1 link is already created by raw3270_create_attributes to refer
   to ../../class/tty/tty<devno>. There cannot be two links.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index a7fe630..aa7a114 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -1754,14 +1754,6 @@ static const struct tty_operations tty3270_ops = {
 	.set_termios = tty3270_set_termios
 };
 
-static void tty3270_notifier(int index, int active)
-{
-	if (active)
-		tty_register_device(tty3270_driver, index, NULL);
-	else
-		tty_unregister_device(tty3270_driver, index);
-}
-
 /*
  * 3270 tty registration code called from tty_init().
  * Most kernel services (incl. kmalloc) are available at this poimt.
@@ -1796,12 +1788,6 @@ static int __init tty3270_init(void)
 		return ret;
 	}
 	tty3270_driver = driver;
-	ret = raw3270_register_notifier(tty3270_notifier);
-	if (ret) {
-		put_tty_driver(driver);
-		return ret;
-
-	}
 	return 0;
 }
 
@@ -1810,7 +1796,6 @@ tty3270_exit(void)
 {
 	struct tty_driver *driver;
 
-	raw3270_unregister_notifier(tty3270_notifier);
 	driver = tty3270_driver;
 	tty3270_driver = NULL;
 	tty_unregister_driver(driver);
-- 
cgit v0.10.2


From 205d7ab9c9af6847dda30650a0b8f98555a20654 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:31 +0200
Subject: [S390] 3270: lock dependency fixes

Lockdep found a problem with the lock order of the view lock and the
ccw device lock. raw3270_activate_view/raw3270_deactivate_view first
take the ccw device lock then call the activate/deactivate functions
of the view which take view lock. The update functions of the
con3270/tty3270 view will first take the view lock, then take the
ccw device lock. To fix this the activate/deactivate functions are
changed to avoid taking the view lock by moving the functions calls
that modify the 3270 output buffer to the update function which is
called by a timer.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index d028d2e..ed5396d 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -64,7 +64,7 @@ static struct con3270 *condev;
 #define CON_UPDATE_ERASE	1	/* Use EWRITEA instead of WRITE. */
 #define CON_UPDATE_LIST		2	/* Update lines in tty3270->update. */
 #define CON_UPDATE_STATUS	4	/* Update status line. */
-#define CON_UPDATE_ALL		7
+#define CON_UPDATE_ALL		8	/* Recreate screen. */
 
 static void con3270_update(struct con3270 *);
 
@@ -73,18 +73,10 @@ static void con3270_update(struct con3270 *);
  */
 static void con3270_set_timer(struct con3270 *cp, int expires)
 {
-	if (expires == 0) {
-		if (timer_pending(&cp->timer))
-			del_timer(&cp->timer);
-		return;
-	}
-	if (timer_pending(&cp->timer) &&
-	    mod_timer(&cp->timer, jiffies + expires))
-		return;
-	cp->timer.function = (void (*)(unsigned long)) con3270_update;
-	cp->timer.data = (unsigned long) cp;
-	cp->timer.expires = jiffies + expires;
-	add_timer(&cp->timer);
+	if (expires == 0)
+		del_timer(&cp->timer);
+	else
+		mod_timer(&cp->timer, jiffies + expires);
 }
 
 /*
@@ -225,6 +217,12 @@ con3270_update(struct con3270 *cp)
 
 	spin_lock_irqsave(&cp->view.lock, flags);
 	updated = 0;
+	if (cp->update_flags & CON_UPDATE_ALL) {
+		con3270_rebuild_update(cp);
+		con3270_update_status(cp);
+		cp->update_flags = CON_UPDATE_ERASE | CON_UPDATE_LIST |
+			CON_UPDATE_STATUS;
+	}
 	if (cp->update_flags & CON_UPDATE_ERASE) {
 		/* Use erase write alternate to initialize display. */
 		raw3270_request_set_cmd(wrq, TC_EWRITEA);
@@ -302,7 +300,6 @@ con3270_read_tasklet(struct raw3270_request *rrq)
 		deactivate = 1;
 		break;
 	case 0x6d:	/* clear: start from scratch. */
-		con3270_rebuild_update(cp);
 		cp->update_flags = CON_UPDATE_ALL;
 		con3270_set_timer(cp, 1);
 		break;
@@ -382,30 +379,21 @@ con3270_issue_read(struct con3270 *cp)
 static int
 con3270_activate(struct raw3270_view *view)
 {
-	unsigned long flags;
 	struct con3270 *cp;
 
 	cp = (struct con3270 *) view;
-	spin_lock_irqsave(&cp->view.lock, flags);
-	cp->nr_up = 0;
-	con3270_rebuild_update(cp);
-	con3270_update_status(cp);
 	cp->update_flags = CON_UPDATE_ALL;
 	con3270_set_timer(cp, 1);
-	spin_unlock_irqrestore(&cp->view.lock, flags);
 	return 0;
 }
 
 static void
 con3270_deactivate(struct raw3270_view *view)
 {
-	unsigned long flags;
 	struct con3270 *cp;
 
 	cp = (struct con3270 *) view;
-	spin_lock_irqsave(&cp->view.lock, flags);
 	del_timer(&cp->timer);
-	spin_unlock_irqrestore(&cp->view.lock, flags);
 }
 
 static int
@@ -504,6 +492,7 @@ con3270_write(struct console *co, const char *str, unsigned int count)
 			con3270_cline_end(cp);
 	}
 	/* Setup timer to output current console buffer after 1/10 second */
+	cp->nr_up = 0;
 	if (cp->view.dev && !timer_pending(&cp->timer))
 		con3270_set_timer(cp, HZ/10);
 	spin_unlock_irqrestore(&cp->view.lock,flags);
@@ -624,7 +613,8 @@ con3270_init(void)
 
 	INIT_LIST_HEAD(&condev->lines);
 	INIT_LIST_HEAD(&condev->update);
-	init_timer(&condev->timer);
+	setup_timer(&condev->timer, (void (*)(unsigned long)) con3270_update,
+		    (unsigned long) condev);
 	tasklet_init(&condev->readlet, 
 		     (void (*)(unsigned long)) con3270_read_tasklet,
 		     (unsigned long) condev->read);
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index aa7a114..3838567 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -112,7 +112,7 @@ struct tty3270 {
 #define TTY_UPDATE_LIST		2	/* Update lines in tty3270->update. */
 #define TTY_UPDATE_INPUT	4	/* Update input line. */
 #define TTY_UPDATE_STATUS	8	/* Update status line. */
-#define TTY_UPDATE_ALL		15
+#define TTY_UPDATE_ALL		16	/* Recreate screen. */
 
 static void tty3270_update(struct tty3270 *);
 
@@ -121,19 +121,10 @@ static void tty3270_update(struct tty3270 *);
  */
 static void tty3270_set_timer(struct tty3270 *tp, int expires)
 {
-	if (expires == 0) {
-		if (timer_pending(&tp->timer) && del_timer(&tp->timer))
-			raw3270_put_view(&tp->view);
-		return;
-	}
-	if (timer_pending(&tp->timer) &&
-	    mod_timer(&tp->timer, jiffies + expires))
-		return;
-	raw3270_get_view(&tp->view);
-	tp->timer.function = (void (*)(unsigned long)) tty3270_update;
-	tp->timer.data = (unsigned long) tp;
-	tp->timer.expires = jiffies + expires;
-	add_timer(&tp->timer);
+	if (expires == 0)
+		del_timer(&tp->timer);
+	else
+		mod_timer(&tp->timer, jiffies + expires);
 }
 
 /*
@@ -337,7 +328,6 @@ tty3270_write_callback(struct raw3270_request *rq, void *data)
 	tp = (struct tty3270 *) rq->view;
 	if (rq->rc != 0) {
 		/* Write wasn't successfull. Refresh all. */
-		tty3270_rebuild_update(tp);
 		tp->update_flags = TTY_UPDATE_ALL;
 		tty3270_set_timer(tp, 1);
 	}
@@ -366,6 +356,12 @@ tty3270_update(struct tty3270 *tp)
 
 	spin_lock(&tp->view.lock);
 	updated = 0;
+	if (tp->update_flags & TTY_UPDATE_ALL) {
+		tty3270_rebuild_update(tp);
+		tty3270_update_status(tp);
+		tp->update_flags = TTY_UPDATE_ERASE | TTY_UPDATE_LIST |
+			TTY_UPDATE_INPUT | TTY_UPDATE_STATUS;
+	}
 	if (tp->update_flags & TTY_UPDATE_ERASE) {
 		/* Use erase write alternate to erase display. */
 		raw3270_request_set_cmd(wrq, TC_EWRITEA);
@@ -425,7 +421,6 @@ tty3270_update(struct tty3270 *tp)
 		xchg(&tp->write, wrq);
 	}
 	spin_unlock(&tp->view.lock);
-	raw3270_put_view(&tp->view);
 }
 
 /*
@@ -570,7 +565,6 @@ tty3270_read_tasklet(struct raw3270_request *rrq)
 		tty3270_set_timer(tp, 1);
 	} else if (tp->input->string[0] == 0x6d) {
 		/* Display has been cleared. Redraw. */
-		tty3270_rebuild_update(tp);
 		tp->update_flags = TTY_UPDATE_ALL;
 		tty3270_set_timer(tp, 1);
 	}
@@ -641,22 +635,20 @@ static int
 tty3270_activate(struct raw3270_view *view)
 {
 	struct tty3270 *tp;
-	unsigned long flags;
 
 	tp = (struct tty3270 *) view;
-	spin_lock_irqsave(&tp->view.lock, flags);
-	tp->nr_up = 0;
-	tty3270_rebuild_update(tp);
-	tty3270_update_status(tp);
 	tp->update_flags = TTY_UPDATE_ALL;
 	tty3270_set_timer(tp, 1);
-	spin_unlock_irqrestore(&tp->view.lock, flags);
 	return 0;
 }
 
 static void
 tty3270_deactivate(struct raw3270_view *view)
 {
+	struct tty3270 *tp;
+
+	tp = (struct tty3270 *) view;
+	del_timer(&tp->timer);
 }
 
 static int
@@ -743,6 +735,7 @@ tty3270_free_view(struct tty3270 *tp)
 {
 	int pages;
 
+	del_timer_sync(&tp->timer);
 	kbd_free(tp->kbd);
 	raw3270_request_free(tp->kreset);
 	raw3270_request_free(tp->read);
@@ -889,7 +882,8 @@ tty3270_open(struct tty_struct *tty, struct file * filp)
 	INIT_LIST_HEAD(&tp->update);
 	INIT_LIST_HEAD(&tp->rcl_lines);
 	tp->rcl_max = 20;
-	init_timer(&tp->timer);
+	setup_timer(&tp->timer, (void (*)(unsigned long)) tty3270_update,
+		    (unsigned long) tp);
 	tasklet_init(&tp->readlet, 
 		     (void (*)(unsigned long)) tty3270_read_tasklet,
 		     (unsigned long) tp->read);
-- 
cgit v0.10.2


From dab4079d5b5ac421208499d5e554a07f9beb16e4 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:32 +0200
Subject: [S390] uaccess: use might_fault() instead of might_sleep()

Adds more checking in case lockdep is turned on.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 0235970..8377e91 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -131,7 +131,7 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
 
 #define put_user(x, ptr)					\
 ({								\
-	might_sleep();						\
+	might_fault();						\
 	__put_user(x, ptr);					\
 })
 
@@ -180,7 +180,7 @@ extern int __put_user_bad(void) __attribute__((noreturn));
 
 #define get_user(x, ptr)					\
 ({								\
-	might_sleep();						\
+	might_fault();						\
 	__get_user(x, ptr);					\
 })
 
@@ -231,7 +231,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (access_ok(VERIFY_WRITE, to, n))
 		n = __copy_to_user(to, from, n);
 	return n;
@@ -282,7 +282,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (access_ok(VERIFY_READ, from, n))
 		n = __copy_from_user(to, from, n);
 	else
@@ -299,7 +299,7 @@ __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 static inline unsigned long __must_check
 copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (__access_ok(from,n) && __access_ok(to,n))
 		n = __copy_in_user(to, from, n);
 	return n;
@@ -312,7 +312,7 @@ static inline long __must_check
 strncpy_from_user(char *dst, const char __user *src, long count)
 {
         long res = -EFAULT;
-        might_sleep();
+	might_fault();
         if (access_ok(VERIFY_READ, src, 1))
 		res = uaccess.strncpy_from_user(count, src, dst);
         return res;
@@ -321,7 +321,7 @@ strncpy_from_user(char *dst, const char __user *src, long count)
 static inline unsigned long
 strnlen_user(const char __user * src, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	return uaccess.strnlen_user(n, src);
 }
 
@@ -354,7 +354,7 @@ __clear_user(void __user *to, unsigned long n)
 static inline unsigned long __must_check
 clear_user(void __user *to, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (access_ok(VERIFY_WRITE, to, n))
 		n = uaccess.clear_user(n, to);
 	return n;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 10bccd1..c18b21d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -512,7 +512,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		BUG();
 	}
 
-	might_sleep();
+	might_fault();
 
 	do {
 		__vcpu_run(vcpu);
-- 
cgit v0.10.2


From 239a64255fae8933d95273b5b92545949ca4e743 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:33 +0200
Subject: [S390] vmalloc: add vmalloc kernel parameter support

With the kernel parameter 'vmalloc=<size>' the size of the vmalloc area
can be specified. This can be used to increase or decrease the size of
the area. Works in the same way as on some other architectures.
This can be useful for features which make excessive use of vmalloc and
wouldn't work otherwise.
The default sizes remain unchanged: 96MB for 31 bit kernels and 1GB for
64 bit kernels.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5caddd4..60a7b1a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -112,12 +112,15 @@ extern char empty_zero_page[PAGE_SIZE];
  * effect, this also makes sure that 64 bit module code cannot be used
  * as system call address.
  */
+
+extern unsigned long VMALLOC_START;
+
 #ifndef __s390x__
-#define VMALLOC_START	0x78000000UL
+#define VMALLOC_SIZE	(96UL << 20)
 #define VMALLOC_END	0x7e000000UL
 #define VMEM_MAP_END	0x80000000UL
 #else /* __s390x__ */
-#define VMALLOC_START	0x3e000000000UL
+#define VMALLOC_SIZE	(1UL << 30)
 #define VMALLOC_END	0x3e040000000UL
 #define VMEM_MAP_END	0x40000000000UL
 #endif /* __s390x__ */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index be6c1cf..4ca8e82 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,7 +1,5 @@
 /*
- *  arch/s390/mm/pgtable.c
- *
- *    Copyright IBM Corp. 2007
+ *    Copyright IBM Corp. 2007,2009
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
@@ -53,6 +51,18 @@ void clear_table_pgstes(unsigned long *table)
 
 #endif
 
+unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
+EXPORT_SYMBOL(VMALLOC_START);
+
+static int __init parse_vmalloc(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+	VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
+	return 0;
+}
+early_param("vmalloc", parse_vmalloc);
+
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
 {
 	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
-- 
cgit v0.10.2


From 6b9d8e80bb9edd0c9fe948a6ef105391de56b012 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 12 Jun 2009 10:26:34 +0200
Subject: [S390] qdio: fix access beyond ARRAY_SIZE of irq_ptr->{in,out}put_qs

Do not go beyond ARRAY_SIZE of irq_ptr->{in,out}put_qs

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index ba4facc..d79cf5b 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1494,7 +1494,7 @@ int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
 
 	if ((bufnr > QDIO_MAX_BUFFERS_PER_Q) ||
 	    (count > QDIO_MAX_BUFFERS_PER_Q) ||
-	    (q_nr > QDIO_MAX_QUEUES_PER_IRQ))
+	    (q_nr >= QDIO_MAX_QUEUES_PER_IRQ))
 		return -EINVAL;
 
 	if (!count)
-- 
cgit v0.10.2


From d0591485e15ccd908f91058f7da134248dcdbbb3 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:35 +0200
Subject: [S390] dcssblk: revert devt conversion

git commit f331c0296f2a9fee0d396a70598b954062603015 changed users of
->first_minor to devt. This broke device handling in dcssblk, so that
no additional devices could be added after the first one.

This patch reverts the devt conversion to the previous ->first_minor
handling.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index a4c7ffc..b21caf1 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -127,7 +127,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info)
 		found = 0;
 		// test if minor available
 		list_for_each_entry(entry, &dcssblk_devices, lh)
-			if (minor == MINOR(disk_devt(entry->gd)))
+			if (minor == entry->gd->first_minor)
 				found++;
 		if (!found) break; // got unused minor
 	}
@@ -625,7 +625,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	if (rc)
 		goto release_gd;
 	sprintf(dev_info->gd->disk_name, "dcssblk%d",
-		MINOR(disk_devt(dev_info->gd)));
+		dev_info->gd->first_minor);
 	list_add_tail(&dev_info->lh, &dcssblk_devices);
 
 	if (!try_module_get(THIS_MODULE)) {
-- 
cgit v0.10.2


From 45b44d76d373e66d08e0c745dc82ff9123103588 Mon Sep 17 00:00:00 2001
From: Stefan Weinhuber <wein@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:36 +0200
Subject: [S390] dasd: no High Performance FICON in 31-bit mode

The High Performance FICON feature is not supported in 31-bit mode,
no matter what the various flags say. So we need to check for the
CONFIG_64BIT option as well.

Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a41c940..81f8819 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2336,9 +2336,10 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev,
 {
 	int tpm, cmdrtd, cmdwtd;
 	int use_prefix;
-
-	struct dasd_eckd_private *private;
+#if defined(CONFIG_64BIT)
 	int fcx_in_css, fcx_in_gneq, fcx_in_features;
+#endif
+	struct dasd_eckd_private *private;
 	struct dasd_device *basedev;
 	sector_t first_rec, last_rec;
 	sector_t first_trk, last_trk;
@@ -2361,11 +2362,15 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev,
 	last_offs = sector_div(last_trk, blk_per_trk);
 	cdlspecial = (private->uses_cdl && first_rec < 2*blk_per_trk);
 
-	/* is transport mode supported ? */
+	/* is transport mode supported? */
+#if defined(CONFIG_64BIT)
 	fcx_in_css = css_general_characteristics.fcx;
 	fcx_in_gneq = private->gneq->reserved2[7] & 0x04;
 	fcx_in_features = private->features.feature[40] & 0x80;
 	tpm = fcx_in_css && fcx_in_gneq && fcx_in_features;
+#else
+	tpm = 0;
+#endif
 
 	/* is read track data and write track data in command mode supported? */
 	cmdrtd = private->features.feature[9] & 0x20;
-- 
cgit v0.10.2


From 92636b152f3b58e459988934f689619af9e04dbc Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2009 10:26:37 +0200
Subject: [S390] dasd: check_characteristics cleanup

Fix a broken memset (sizeof pointer vs sizeof the underlying
structure) by cleaning up the involved functions.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 27a1be0..35f43be 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2427,12 +2427,12 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
 
 
 int dasd_generic_read_dev_chars(struct dasd_device *device, char *magic,
-				void **rdc_buffer, int rdc_buffer_size)
+				void *rdc_buffer, int rdc_buffer_size)
 {
 	int ret;
 	struct dasd_ccw_req *cqr;
 
-	cqr = dasd_generic_build_rdc(device, *rdc_buffer, rdc_buffer_size,
+	cqr = dasd_generic_build_rdc(device, rdc_buffer, rdc_buffer_size,
 				     magic);
 	if (IS_ERR(cqr))
 		return PTR_ERR(cqr);
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 81f8819..c4e8181 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1097,20 +1097,20 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 {
 	struct dasd_eckd_private *private;
 	struct dasd_block *block;
-	void *rdc_data;
 	int is_known, rc;
 
 	private = (struct dasd_eckd_private *) device->private;
-	if (private == NULL) {
-		private = kzalloc(sizeof(struct dasd_eckd_private),
-				  GFP_KERNEL | GFP_DMA);
-		if (private == NULL) {
+	if (!private) {
+		private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+		if (!private) {
 			dev_warn(&device->cdev->dev,
 				 "Allocating memory for private DASD data "
 				 "failed\n");
 			return -ENOMEM;
 		}
 		device->private = (void *) private;
+	} else {
+		memset(private, 0, sizeof(*private));
 	}
 	/* Invalidate status of initial analysis. */
 	private->init_cqr_status = -1;
@@ -1161,9 +1161,8 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 		goto out_err3;
 
 	/* Read Device Characteristics */
-	rdc_data = (void *) &(private->rdc_data);
-	memset(rdc_data, 0, sizeof(rdc_data));
-	rc = dasd_generic_read_dev_chars(device, "ECKD", &rdc_data, 64);
+	rc = dasd_generic_read_dev_chars(device, "ECKD", &private->rdc_data,
+					 64);
 	if (rc) {
 		DBF_EVENT(DBF_WARNING,
 			  "Read device characteristics failed, rc=%d for "
@@ -1183,7 +1182,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 		 private->rdc_data.dev_model,
 		 private->rdc_data.cu_type,
 		 private->rdc_data.cu_model.model,
-		    private->real_cyl,
+		 private->real_cyl,
 		 private->rdc_data.trk_per_cyl,
 		 private->rdc_data.sec_per_trk);
 	return 0;
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 8912358..8c3c8ff 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -122,20 +122,20 @@ dasd_fba_check_characteristics(struct dasd_device *device)
 	struct dasd_block *block;
 	struct dasd_fba_private *private;
 	struct ccw_device *cdev = device->cdev;
-	void *rdc_data;
 	int rc;
 
 	private = (struct dasd_fba_private *) device->private;
-	if (private == NULL) {
-		private = kzalloc(sizeof(struct dasd_fba_private),
-				  GFP_KERNEL | GFP_DMA);
-		if (private == NULL) {
+	if (!private) {
+		private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+		if (!private) {
 			dev_warn(&device->cdev->dev,
 				 "Allocating memory for private DASD "
 				 "data failed\n");
 			return -ENOMEM;
 		}
 		device->private = (void *) private;
+	} else {
+		memset(private, 0, sizeof(*private));
 	}
 	block = dasd_alloc_block();
 	if (IS_ERR(block)) {
@@ -150,8 +150,8 @@ dasd_fba_check_characteristics(struct dasd_device *device)
 	block->base = device;
 
 	/* Read Device Characteristics */
-	rdc_data = (void *) &(private->rdc_data);
-	rc = dasd_generic_read_dev_chars(device, "FBA ", &rdc_data, 32);
+	rc = dasd_generic_read_dev_chars(device, "FBA ", &private->rdc_data,
+					 32);
 	if (rc) {
 		DBF_EVENT(DBF_WARNING, "Read device characteristics returned "
 			  "error %d for device: %s",
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index c1e487f..3ab69b5 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -578,7 +578,7 @@ int dasd_generic_set_offline (struct ccw_device *cdev);
 int dasd_generic_notify(struct ccw_device *, int);
 void dasd_generic_handle_state_change(struct dasd_device *);
 
-int dasd_generic_read_dev_chars(struct dasd_device *, char *, void **, int);
+int dasd_generic_read_dev_chars(struct dasd_device *, char *, void *, int);
 char *dasd_get_sense(struct irb *);
 
 /* externals in dasd_devmap.c */
-- 
cgit v0.10.2


From 736e6ea0bf97ec79521f88704ce8506e5d60d078 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2009 10:26:38 +0200
Subject: [S390] dasd: sync after async probe

Some functions called as a late_initcall depend on completely
initialized devices. Since commit
f3445a1a656bc26b07946cc6d20de1ef07c8d116 the dasd driver uses the
new async framework and relies on the fact that synchronization is
done in prepare_namespace which is called after the late_initcalls.

Fix this by calling async_synchronize_full at the end of the related
init functions.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index c4e8181..216c09b 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3277,8 +3277,14 @@ static struct dasd_discipline dasd_eckd_discipline = {
 static int __init
 dasd_eckd_init(void)
 {
+	int ret;
+
 	ASCEBC(dasd_eckd_discipline.ebcname, 4);
-	return ccw_driver_register(&dasd_eckd_driver);
+	ret = ccw_driver_register(&dasd_eckd_driver);
+	if (!ret)
+		wait_for_device_probe();
+
+	return ret;
 }
 
 static void __exit
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 8c3c8ff..597c6ff 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -604,8 +604,14 @@ static struct dasd_discipline dasd_fba_discipline = {
 static int __init
 dasd_fba_init(void)
 {
+	int ret;
+
 	ASCEBC(dasd_fba_discipline.ebcname, 4);
-	return ccw_driver_register(&dasd_fba_driver);
+	ret = ccw_driver_register(&dasd_fba_driver);
+	if (!ret)
+		wait_for_device_probe();
+
+	return ret;
 }
 
 static void __exit
-- 
cgit v0.10.2


From 6cc7f168954fe8b3d8988a90b2478a9c11c5ebcb Mon Sep 17 00:00:00 2001
From: Stefan Weinhuber <wein@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:39 +0200
Subject: [S390] dasd: forward internal errors to dasd_sleep_on caller

If a DASD requests is started with dasd_sleep_on and fails, then the
calling function may need to know the reason for the failure.
In cases of hardware errors it can inspect the sense data in the irb,
but when the reason is internal (e.g. start_IO failed) then it needs
a meaningfull return code.

Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 35f43be..442bb98 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -851,8 +851,10 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
 
 	/* Check the cqr */
 	rc = dasd_check_cqr(cqr);
-	if (rc)
+	if (rc) {
+		cqr->intrc = rc;
 		return rc;
+	}
 	device = (struct dasd_device *) cqr->startdev;
 	if (cqr->retries < 0) {
 		/* internal error 14 - start_IO run out of retries */
@@ -915,6 +917,7 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
 		BUG();
 		break;
 	}
+	cqr->intrc = rc;
 	return rc;
 }
 
@@ -1454,8 +1457,12 @@ int dasd_sleep_on(struct dasd_ccw_req *cqr)
 	dasd_add_request_tail(cqr);
 	wait_event(generic_waitq, _wait_for_wakeup(cqr));
 
-	/* Request status is either done or failed. */
-	rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
+	if (cqr->status == DASD_CQR_DONE)
+		rc = 0;
+	else if (cqr->intrc)
+		rc = cqr->intrc;
+	else
+		rc = -EIO;
 	return rc;
 }
 
@@ -1477,8 +1484,15 @@ int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr)
 		dasd_cancel_req(cqr);
 		/* wait (non-interruptible) for final status */
 		wait_event(generic_waitq, _wait_for_wakeup(cqr));
+		cqr->intrc = rc;
 	}
-	rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
+
+	if (cqr->status == DASD_CQR_DONE)
+		rc = 0;
+	else if (cqr->intrc)
+		rc = cqr->intrc;
+	else
+		rc = -EIO;
 	return rc;
 }
 
@@ -1523,8 +1537,12 @@ int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr)
 
 	wait_event(generic_waitq, _wait_for_wakeup(cqr));
 
-	/* Request status is either done or failed. */
-	rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
+	if (cqr->status == DASD_CQR_DONE)
+		rc = 0;
+	else if (cqr->intrc)
+		rc = cqr->intrc;
+	else
+		rc = -EIO;
 	return rc;
 }
 
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 2efaddf..644086b 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -202,6 +202,7 @@ dasd_start_diag(struct dasd_ccw_req * cqr)
 		rc = -EIO;
 		break;
 	}
+	cqr->intrc = rc;
 	return rc;
 }
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 216c09b..cf0cfdb 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3017,8 +3017,9 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
 		      " I/O status report for device %s:\n",
 		      dev_name(&device->cdev->dev));
 	len += sprintf(page + len, KERN_ERR PRINTK_HEADER
-		       " in req: %p CS: 0x%02X DS: 0x%02X\n", req,
-		       scsw_cstat(&irb->scsw), scsw_dstat(&irb->scsw));
+		       " in req: %p CS: 0x%02X DS: 0x%02X CC: 0x%02X RC: %d\n",
+		       req, scsw_cstat(&irb->scsw), scsw_dstat(&irb->scsw),
+		       scsw_cc(&irb->scsw), req->intrc);
 	len += sprintf(page + len, KERN_ERR PRINTK_HEADER
 		       " device %s: Failing CCW: %p\n",
 		       dev_name(&device->cdev->dev),
@@ -3119,9 +3120,10 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
 		      " I/O status report for device %s:\n",
 		      dev_name(&device->cdev->dev));
 	len += sprintf(page + len, KERN_ERR PRINTK_HEADER
-		       " in req: %p CS: 0x%02X DS: 0x%02X "
+		       " in req: %p CS: 0x%02X DS: 0x%02X CC: 0x%02X RC: %d "
 		       "fcxs: 0x%02X schxs: 0x%02X\n", req,
 		       scsw_cstat(&irb->scsw), scsw_dstat(&irb->scsw),
+		       scsw_cc(&irb->scsw), req->intrc,
 		       irb->scsw.tm.fcxs, irb->scsw.tm.schxs);
 	len += sprintf(page + len, KERN_ERR PRINTK_HEADER
 		       " device %s: Failing TCW: %p\n",
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 3ab69b5..f97ceb7 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -173,6 +173,7 @@ struct dasd_ccw_req {
 	void *data;			/* pointer to data area */
 
 	/* these are important for recovering erroneous requests          */
+	int intrc;			/* internal error, e.g. from start_IO */
 	struct irb irb;			/* device status in case of an error */
 	struct dasd_ccw_req *refers;	/* ERP-chain queueing. */
 	void *function; 		/* originating ERP action */
-- 
cgit v0.10.2


From f3d0a0d96a7a2b4e1335437f68b3181ab252050a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:40 +0200
Subject: [S390] profile_tick called twice

profile_tick is called twice for every clock comparator interrupt.
The generic clock event code does it in tick_sched_timer and the
s390 backend code in clock_comparator_work. That is one too many,
remove the one in the arch backend code.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ef596d0..ad9a999 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -95,12 +95,6 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime)
 	xtime->tv_nsec = ((todval * 1000) >> 12);
 }
 
-#ifdef CONFIG_PROFILING
-#define s390_do_profile()	profile_tick(CPU_PROFILING)
-#else
-#define s390_do_profile()	do { ; } while(0)
-#endif /* CONFIG_PROFILING */
-
 void clock_comparator_work(void)
 {
 	struct clock_event_device *cd;
@@ -109,7 +103,6 @@ void clock_comparator_work(void)
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	cd = &__get_cpu_var(comparators);
 	cd->event_handler(cd);
-	s390_do_profile();
 }
 
 /*
-- 
cgit v0.10.2


From d93f82b6e0c12a4373f2d04b1f92fcb2d175b62c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:41 +0200
Subject: [S390] maccess: add weak attribute to probe_kernel_write

probe_kernel_write() gets used to write to the kernel address space.
E.g. to patch the kernel (kgdb, ftrace, kprobes...). Some architectures
however enable write protection for the kernel text section, so that
writes to this region would fault.
This patch allows to specify an architecture specific version of
probe_kernel_write() which allows to handle and bypass write protection
of the text segment.
That way it is still possible to catch random writes to kernel text
and explicitly allow writes via this interface.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/mm/maccess.c b/mm/maccess.c
index ac40796..9073695 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long probe_kernel_write(void *dst, void *src, size_t size)
+long notrace __weak probe_kernel_write(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v0.10.2


From 88df125fd6127e409793fd84a574566651450320 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:42 +0200
Subject: [S390] maccess: arch specific probe_kernel_write() implementation

Add an s390 specific probe_kernel_write() function which allows to
write to the kernel text segment even if write protection is enabled.
This is implemented using the lra (load real address) and stura (store
using real address) instructions.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 2a74581..db05661 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux s390-specific parts of the memory manager.
 #
 
-obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
+obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
 obj-$(CONFIG_CMM) += cmm.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_PAGE_STATES) += page-states.o
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
new file mode 100644
index 0000000..8175627
--- /dev/null
+++ b/arch/s390/mm/maccess.c
@@ -0,0 +1,61 @@
+/*
+ * Access kernel memory without faulting -- s390 specific implementation.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/system.h>
+
+/*
+ * This function writes to kernel memory bypassing DAT and possible
+ * write protection. It copies one to four bytes from src to dst
+ * using the stura instruction.
+ * Returns the number of bytes copied or -EFAULT.
+ */
+static long probe_kernel_write_odd(void *dst, void *src, size_t size)
+{
+	unsigned long count, aligned;
+	int offset, mask;
+	int rc = -EFAULT;
+
+	aligned = (unsigned long) dst & ~3UL;
+	offset = (unsigned long) dst & 3;
+	count = min_t(unsigned long, 4 - offset, size);
+	mask = (0xf << (4 - count)) & 0xf;
+	mask >>= offset;
+	asm volatile(
+		"	bras	1,0f\n"
+		"	icm	0,0,0(%3)\n"
+		"0:	l	0,0(%1)\n"
+		"	lra	%1,0(%1)\n"
+		"1:	ex	%2,0(1)\n"
+		"2:	stura	0,%1\n"
+		"	la	%0,0\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,3b) EX_TABLE(2b,3b)
+		: "+d" (rc), "+a" (aligned)
+		: "a" (mask), "a" (src) : "cc", "memory", "0", "1");
+	return rc ? rc : count;
+}
+
+long probe_kernel_write(void *dst, void *src, size_t size)
+{
+	long copied = 0;
+
+	while (size) {
+		copied = probe_kernel_write_odd(dst, src, size);
+		if (copied < 0)
+			break;
+		dst += copied;
+		src += copied;
+		size -= copied;
+	}
+	return copied < 0 ? -EFAULT : 0;
+}
-- 
cgit v0.10.2


From a2b53673fae14601bb520acf6a6f154463994565 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:43 +0200
Subject: [S390] kprobes: use probe_kernel_write

Use proble_kernel_write() to patch the kernel.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index a01cf02..9bb2f62 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -25,9 +25,9 @@
 #include <linux/preempt.h>
 #include <linux/stop_machine.h>
 #include <linux/kdebug.h>
+#include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
-#include <asm/uaccess.h>
 #include <linux/module.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -155,35 +155,8 @@ void __kprobes get_instruction_type(struct arch_specific_insn *ainsn)
 static int __kprobes swap_instruction(void *aref)
 {
 	struct ins_replace_args *args = aref;
-	u32 *addr;
-	u32 instr;
-	int err = -EFAULT;
 
-	/*
-	 * Text segment is read-only, hence we use stura to bypass dynamic
-	 * address translation to exchange the instruction. Since stura
-	 * always operates on four bytes, but we only want to exchange two
-	 * bytes do some calculations to get things right. In addition we
-	 * shall not cross any page boundaries (vmalloc area!) when writing
-	 * the new instruction.
-	 */
-	addr = (u32 *)((unsigned long)args->ptr & -4UL);
-	if ((unsigned long)args->ptr & 2)
-		instr = ((*addr) & 0xffff0000) | args->new;
-	else
-		instr = ((*addr) & 0x0000ffff) | args->new << 16;
-
-	asm volatile(
-		"	lra	%1,0(%1)\n"
-		"0:	stura	%2,%1\n"
-		"1:	la	%0,0\n"
-		"2:\n"
-		EX_TABLE(0b,2b)
-		: "+d" (err)
-		: "a" (addr), "d" (instr)
-		: "memory", "cc");
-
-	return err;
+	return probe_kernel_write(args->ptr, &args->new, sizeof(args->new));
 }
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
-- 
cgit v0.10.2


From dfd9f7abc0fb67b5781f340d982384cea53b2884 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:44 +0200
Subject: [S390] ftrace: add dynamic ftrace support

Dynamic ftrace support for s390.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1094787..b674e79 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -82,6 +82,8 @@ config S390
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_SYSCALL_WRAPPERS
 	select HAVE_FUNCTION_TRACER
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DEFAULT_NO_SPIN_MUTEXES
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 5a5bc75..ba23d8f 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -2,7 +2,26 @@
 #define _ASM_S390_FTRACE_H
 
 #ifndef __ASSEMBLY__
+
 extern void _mcount(void);
+extern unsigned long ftrace_dyn_func;
+
+struct dyn_arch_ftrace { };
+
+#define MCOUNT_ADDR ((long)_mcount)
+
+#ifdef CONFIG_64BIT
+#define MCOUNT_INSN_SIZE 24
+#define MCOUNT_OFFSET	 14
+#else
+#define MCOUNT_INSN_SIZE 30
+#define MCOUNT_OFFSET	  8
 #endif
 
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr - MCOUNT_OFFSET;
+}
+
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 713fe9f..5046ad6 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -68,6 +68,7 @@
 #define __LC_CPUID			0x02b0
 #define __LC_INT_CLOCK			0x02c8
 #define __LC_MACHINE_FLAGS		0x02d8
+#define __LC_FTRACE_FUNC		0x02dc
 #define __LC_IRB			0x0300
 #define __LC_PFAULT_INTPARM		0x0080
 #define __LC_CPU_TIMER_SAVE_AREA	0x00d8
@@ -113,6 +114,7 @@
 #define __LC_INT_CLOCK			0x0340
 #define __LC_VDSO_PER_CPU		0x0350
 #define __LC_MACHINE_FLAGS		0x0358
+#define __LC_FTRACE_FUNC		0x0360
 #define __LC_IRB			0x0380
 #define __LC_PASTE			0x03c0
 #define __LC_PFAULT_INTPARM		0x11b8
@@ -281,7 +283,8 @@ struct _lowcore
 	__u64	int_clock;			/* 0x02c8 */
 	__u64	clock_comparator;		/* 0x02d0 */
 	__u32	machine_flags;			/* 0x02d8 */
-	__u8	pad_0x02dc[0x0300-0x02dc];	/* 0x02dc */
+	__u32	ftrace_func;			/* 0x02dc */
+	__u8	pad_0x02f0[0x0300-0x02f0];	/* 0x02f0 */
 
 	/* Interrupt response block */
 	__u8	irb[64];			/* 0x0300 */
@@ -386,7 +389,8 @@ struct _lowcore
 	__u64	clock_comparator;		/* 0x0348 */
 	__u64	vdso_per_cpu_data;		/* 0x0350 */
 	__u64	machine_flags;			/* 0x0358 */
-	__u8	pad_0x0360[0x0380-0x0360];	/* 0x0360 */
+	__u64	ftrace_func;			/* 0x0360 */
+	__u8	pad_0x0368[0x0380-0x0368];	/* 0x0368 */
 
 	/* Interrupt response block. */
 	__u8	irb[64];			/* 0x0380 */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 0657de7..ce172bf 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_early.o = -pg
 endif
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 #
 # Passing null pointers is ok for smp code, since we access the lowcore here.
 #
@@ -41,6 +45,7 @@ obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cf09948..fb26373 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
+#include <linux/ftrace.h>
 #include <linux/lockdep.h>
 #include <linux/module.h>
 #include <linux/pfn.h>
@@ -410,5 +411,8 @@ void __init startup_init(void)
 	sclp_facilities_detect();
 	detect_memory_layout(memory_chunk);
 	S390_lowcore.machine_flags = machine_flags;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
+#endif
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 0000000..0b81a78
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,132 @@
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/lowcore.h>
+
+void ftrace_disable_code(void);
+void ftrace_call_code(void);
+void ftrace_nop_code(void);
+
+#define FTRACE_INSN_SIZE 4
+
+#ifdef CONFIG_64BIT
+
+asm(
+	"	.align	4\n"
+	"ftrace_disable_code:\n"
+	"	j	0f\n"
+	"	.word	0x0024\n"
+	"	lg	%r1,"__stringify(__LC_FTRACE_FUNC)"\n"
+	"	basr	%r14,%r1\n"
+	"	lg	%r14,8(15)\n"
+	"	lgr	%r0,%r0\n"
+	"0:\n");
+
+asm(
+	"	.align	4\n"
+	"ftrace_nop_code:\n"
+	"	j	.+"__stringify(MCOUNT_INSN_SIZE)"\n");
+
+asm(
+	"	.align	4\n"
+	"ftrace_call_code:\n"
+	"	stg	%r14,8(%r15)\n");
+
+#else /* CONFIG_64BIT */
+
+asm(
+	"	.align	4\n"
+	"ftrace_disable_code:\n"
+	"	j	0f\n"
+	"	l	%r1,"__stringify(__LC_FTRACE_FUNC)"\n"
+	"	basr	%r14,%r1\n"
+	"	l	%r14,4(%r15)\n"
+	"	j	0f\n"
+	"	bcr	0,%r7\n"
+	"	bcr	0,%r7\n"
+	"	bcr	0,%r7\n"
+	"	bcr	0,%r7\n"
+	"	bcr	0,%r7\n"
+	"	bcr	0,%r7\n"
+	"0:\n");
+
+asm(
+	"	.align	4\n"
+	"ftrace_nop_code:\n"
+	"	j	.+"__stringify(MCOUNT_INSN_SIZE)"\n");
+
+asm(
+	"	.align	4\n"
+	"ftrace_call_code:\n"
+	"	st	%r14,4(%r15)\n");
+
+#endif /* CONFIG_64BIT */
+
+static int ftrace_modify_code(unsigned long ip,
+			      void *old_code, int old_size,
+			      void *new_code, int new_size)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules code can disappear and change.
+	 *  We need to protect against faulting as well as code
+	 *  changing. We do this by using the probe_kernel_*
+	 *  functions.
+	 *  This however is just a simple sanity check.
+	 */
+	if (probe_kernel_read(replaced, (void *)ip, old_size))
+		return -EFAULT;
+	if (memcmp(replaced, old_code, old_size) != 0)
+		return -EINVAL;
+	if (probe_kernel_write((void *)ip, new_code, new_size))
+		return -EPERM;
+	return 0;
+}
+
+static int ftrace_make_initial_nop(struct module *mod, struct dyn_ftrace *rec,
+				   unsigned long addr)
+{
+	return ftrace_modify_code(rec->ip,
+				  ftrace_call_code, FTRACE_INSN_SIZE,
+				  ftrace_disable_code, MCOUNT_INSN_SIZE);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	if (addr == MCOUNT_ADDR)
+		return ftrace_make_initial_nop(mod, rec, addr);
+	return ftrace_modify_code(rec->ip,
+				  ftrace_call_code, FTRACE_INSN_SIZE,
+				  ftrace_nop_code, FTRACE_INSN_SIZE);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	return ftrace_modify_code(rec->ip,
+				  ftrace_nop_code, FTRACE_INSN_SIZE,
+				  ftrace_call_code, FTRACE_INSN_SIZE);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	ftrace_dyn_func = (unsigned long)func;
+	return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+	*(unsigned long *)data = 0;
+	return 0;
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 8064122..de27499 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -1,5 +1,5 @@
 /*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2009
  *
  *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
  *
@@ -7,36 +7,46 @@
 
 #include <asm/asm-offsets.h>
 
-#ifndef CONFIG_64BIT
-.globl _mcount
+	.globl ftrace_stub
+ftrace_stub:
+	br	%r14
+
+#ifdef CONFIG_64BIT
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+	.globl _mcount
 _mcount:
-	stm	%r0,%r5,8(%r15)
-	st	%r14,56(%r15)
-	lr	%r1,%r15
-	ahi	%r15,-96
-	l	%r3,100(%r15)
-	la	%r2,0(%r14)
-	st	%r1,__SF_BACKCHAIN(%r15)
-	la	%r3,0(%r3)
-	bras	%r14,0f
-	.long	ftrace_trace_function
-0:	l	%r14,0(%r14)
-	l	%r14,0(%r14)
-	basr	%r14,%r14
-	ahi	%r15,96
-	lm	%r0,%r5,8(%r15)
-	l	%r14,56(%r15)
 	br	%r14
 
-.globl ftrace_stub
-ftrace_stub:
+	.globl ftrace_caller
+ftrace_caller:
+	stmg	%r2,%r5,32(%r15)
+	stg	%r14,112(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-160
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r14
+	lg	%r3,168(%r15)
+	larl	%r14,ftrace_dyn_func
+	lg	%r14,0(%r14)
+	basr	%r14,%r14
+	aghi	%r15,160
+	lmg	%r2,%r5,32(%r15)
+	lg	%r14,112(%r15)
 	br	%r14
 
-#else /* CONFIG_64BIT */
+	.data
+	.globl	ftrace_dyn_func
+ftrace_dyn_func:
+	.quad	ftrace_stub
+	.previous
+
+#else /* CONFIG_DYNAMIC_FTRACE */
 
-.globl _mcount
+	.globl _mcount
 _mcount:
-	stmg	%r0,%r5,16(%r15)
+	stmg	%r2,%r5,32(%r15)
 	stg	%r14,112(%r15)
 	lgr	%r1,%r15
 	aghi	%r15,-160
@@ -47,12 +57,67 @@ _mcount:
 	lg	%r14,0(%r14)
 	basr	%r14,%r14
 	aghi	%r15,160
-	lmg	%r0,%r5,16(%r15)
+	lmg	%r2,%r5,32(%r15)
 	lg	%r14,112(%r15)
 	br	%r14
 
-.globl ftrace_stub
-ftrace_stub:
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#else /* CONFIG_64BIT */
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+	.globl _mcount
+_mcount:
+	br	%r14
+
+	.globl ftrace_caller
+ftrace_caller:
+	stm	%r2,%r5,16(%r15)
+	st	%r14,56(%r15)
+	lr	%r1,%r15
+	ahi	%r15,-96
+	l	%r3,100(%r15)
+	la	%r2,0(%r14)
+	st	%r1,__SF_BACKCHAIN(%r15)
+	la	%r3,0(%r3)
+	bras	%r14,0f
+	.long	ftrace_dyn_func
+0:	l	%r14,0(%r14)
+	l	%r14,0(%r14)
+	basr	%r14,%r14
+	ahi	%r15,96
+	lm	%r2,%r5,16(%r15)
+	l	%r14,56(%r15)
+	br	%r14
+
+	.data
+	.globl	ftrace_dyn_func
+ftrace_dyn_func:
+	.long	ftrace_stub
+	.previous
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+
+	.globl _mcount
+_mcount:
+	stm	%r2,%r5,16(%r15)
+	st	%r14,56(%r15)
+	lr	%r1,%r15
+	ahi	%r15,-96
+	l	%r3,100(%r15)
+	la	%r2,0(%r14)
+	st	%r1,__SF_BACKCHAIN(%r15)
+	la	%r3,0(%r3)
+	bras	%r14,0f
+	.long	ftrace_trace_function
+0:	l	%r14,0(%r14)
+	l	%r14,0(%r14)
+	basr	%r14,%r14
+	ahi	%r15,96
+	lm	%r2,%r5,16(%r15)
+	l	%r14,56(%r15)
 	br	%r14
 
+#endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7402b6a..9717717 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -42,6 +42,7 @@
 #include <linux/ctype.h>
 #include <linux/reboot.h>
 #include <linux/topology.h>
+#include <linux/ftrace.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -442,6 +443,7 @@ setup_lowcore(void)
 	lc->steal_timer = S390_lowcore.steal_timer;
 	lc->last_update_timer = S390_lowcore.last_update_timer;
 	lc->last_update_clock = S390_lowcore.last_update_clock;
+	lc->ftrace_func = S390_lowcore.ftrace_func;
 	set_prefix((u32)(unsigned long) lc);
 	lowcore_ptr[0] = lc;
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0af302c..cc8c484 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -572,6 +572,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	cpu_lowcore->cpu_nr = cpu;
 	cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
 	cpu_lowcore->machine_flags = S390_lowcore.machine_flags;
+	cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func;
 	eieio();
 
 	while (signal_processor(cpu, sigp_restart) == sigp_busy)
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 0fae7da..91033e6 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -185,6 +185,19 @@ if ($arch eq "x86_64") {
     $objcopy .= " -O elf32-i386";
     $cc .= " -m32";
 
+} elsif ($arch eq "s390" && $bits == 32) {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$";
+    $alignment = 4;
+    $ld .= " -m elf_s390";
+    $cc .= " -m31";
+
+} elsif ($arch eq "s390" && $bits == 64) {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
+    $alignment = 8;
+    $type = ".quad";
+    $ld .= " -m elf64_s390";
+    $cc .= " -m64";
+
 } elsif ($arch eq "sh") {
     $alignment = 2;
 
-- 
cgit v0.10.2


From 8b4488f85d619253c9e631ec723368f400106771 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:45 +0200
Subject: [S390] ftrace: add function trace mcount test support

Add support for early test if the function tracer is enabled or
disabled. Saves some extra function calls.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b674e79..480590f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -82,6 +82,7 @@ config S390
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_SYSCALL_WRAPPERS
 	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DEFAULT_NO_SPIN_MUTEXES
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index de27499..0aa85ec 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -21,6 +21,9 @@ _mcount:
 
 	.globl ftrace_caller
 ftrace_caller:
+	larl	%r1,function_trace_stop
+	icm	%r1,0xf,0(%r1)
+	bnzr	%r14
 	stmg	%r2,%r5,32(%r15)
 	stg	%r14,112(%r15)
 	lgr	%r1,%r15
@@ -46,6 +49,9 @@ ftrace_dyn_func:
 
 	.globl _mcount
 _mcount:
+	larl	%r1,function_trace_stop
+	icm	%r1,0xf,0(%r1)
+	bnzr	%r14
 	stmg	%r2,%r5,32(%r15)
 	stg	%r14,112(%r15)
 	lgr	%r1,%r15
@@ -74,21 +80,25 @@ _mcount:
 	.globl ftrace_caller
 ftrace_caller:
 	stm	%r2,%r5,16(%r15)
+	bras	%r1,2f
+0:	.long	ftrace_trace_function
+1:	.long	function_trace_stop
+2:	l	%r2,1b-0b(%r1)
+	icm	%r2,0xf,0(%r2)
+	jnz	3f
 	st	%r14,56(%r15)
-	lr	%r1,%r15
+	lr	%r0,%r15
 	ahi	%r15,-96
 	l	%r3,100(%r15)
 	la	%r2,0(%r14)
-	st	%r1,__SF_BACKCHAIN(%r15)
+	st	%r0,__SF_BACKCHAIN(%r15)
 	la	%r3,0(%r3)
-	bras	%r14,0f
-	.long	ftrace_dyn_func
-0:	l	%r14,0(%r14)
+	l	%r14,0b-0b(%r1)
 	l	%r14,0(%r14)
 	basr	%r14,%r14
 	ahi	%r15,96
-	lm	%r2,%r5,16(%r15)
 	l	%r14,56(%r15)
+3:	lm	%r2,%r5,16(%r15)
 	br	%r14
 
 	.data
@@ -102,21 +112,25 @@ ftrace_dyn_func:
 	.globl _mcount
 _mcount:
 	stm	%r2,%r5,16(%r15)
+	bras	%r1,2f
+0:	.long	ftrace_trace_function
+1:	.long	function_trace_stop
+2:	l	%r2,1b-0b(%r1)
+	icm	%r2,0xf,0(%r2)
+	jnz	3f
 	st	%r14,56(%r15)
-	lr	%r1,%r15
+	lr	%r0,%r15
 	ahi	%r15,-96
 	l	%r3,100(%r15)
 	la	%r2,0(%r14)
-	st	%r1,__SF_BACKCHAIN(%r15)
+	st	%r0,__SF_BACKCHAIN(%r15)
 	la	%r3,0(%r3)
-	bras	%r14,0f
-	.long	ftrace_trace_function
-0:	l	%r14,0(%r14)
+	l	%r14,0b-0b(%r1)
 	l	%r14,0(%r14)
 	basr	%r14,%r14
 	ahi	%r15,96
-	lm	%r2,%r5,16(%r15)
 	l	%r14,56(%r15)
+3:	lm	%r2,%r5,16(%r15)
 	br	%r14
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
-- 
cgit v0.10.2


From 88dbd2037229bd2ed7543ffd0d8f2d9dec9d31d2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:46 +0200
Subject: [S390] ftrace: add function graph tracer support

Function graph tracer support for s390.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 480590f..9023cc9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -85,6 +85,7 @@ config S390
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_DEFAULT_NO_SPIN_MUTEXES
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index ba23d8f..96c14a9 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,11 +11,13 @@ struct dyn_arch_ftrace { };
 #define MCOUNT_ADDR ((long)_mcount)
 
 #ifdef CONFIG_64BIT
-#define MCOUNT_INSN_SIZE 24
-#define MCOUNT_OFFSET	 14
+#define MCOUNT_OFFSET_RET 18
+#define MCOUNT_INSN_SIZE  24
+#define MCOUNT_OFFSET	  14
 #else
-#define MCOUNT_INSN_SIZE 30
-#define MCOUNT_OFFSET	  8
+#define MCOUNT_OFFSET_RET 26
+#define MCOUNT_INSN_SIZE  30
+#define MCOUNT_OFFSET	   8
 #endif
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index ce172bf..c75ed43 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -3,11 +3,8 @@
 #
 
 ifdef CONFIG_FUNCTION_TRACER
-# Do not trace early boot code
+# Don't trace early setup code and tracing code
 CFLAGS_REMOVE_early.o = -pg
-endif
-
-ifdef CONFIG_DYNAMIC_FTRACE
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
@@ -46,6 +43,7 @@ obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0b81a78..c92a109 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,17 @@
  *
  */
 
+#include <linux/hardirq.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <asm/lowcore.h>
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+
 void ftrace_disable_code(void);
+void ftrace_disable_return(void);
 void ftrace_call_code(void);
 void ftrace_nop_code(void);
 
@@ -28,6 +32,7 @@ asm(
 	"	.word	0x0024\n"
 	"	lg	%r1,"__stringify(__LC_FTRACE_FUNC)"\n"
 	"	basr	%r14,%r1\n"
+	"ftrace_disable_return:\n"
 	"	lg	%r14,8(15)\n"
 	"	lgr	%r0,%r0\n"
 	"0:\n");
@@ -50,6 +55,7 @@ asm(
 	"	j	0f\n"
 	"	l	%r1,"__stringify(__LC_FTRACE_FUNC)"\n"
 	"	basr	%r14,%r1\n"
+	"ftrace_disable_return:\n"
 	"	l	%r14,4(%r15)\n"
 	"	j	0f\n"
 	"	bcr	0,%r7\n"
@@ -130,3 +136,69 @@ int __init ftrace_dyn_arch_init(void *data)
 	*(unsigned long *)data = 0;
 	return 0;
 }
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Patch the kernel code at ftrace_graph_caller location:
+ * The instruction there is branch relative on condition. The condition mask
+ * is either all ones (always branch aka disable ftrace_graph_caller) or all
+ * zeroes (nop aka enable ftrace_graph_caller).
+ * Instruction format for brc is a7m4xxxx where m is the condition mask.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	unsigned short opcode = 0xa704;
+
+	return probe_kernel_write(ftrace_graph_caller, &opcode, sizeof(opcode));
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	unsigned short opcode = 0xa7f4;
+
+	return probe_kernel_write(ftrace_graph_caller, &opcode, sizeof(opcode));
+}
+
+static inline unsigned long ftrace_mcount_call_adjust(unsigned long addr)
+{
+	return addr - (ftrace_disable_return - ftrace_disable_code);
+}
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+
+static inline unsigned long ftrace_mcount_call_adjust(unsigned long addr)
+{
+	return addr - MCOUNT_OFFSET_RET;
+}
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent)
+{
+	struct ftrace_graph_ent trace;
+
+	/* Nmi's are currently unsupported. */
+	if (unlikely(in_nmi()))
+		goto out;
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+	if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY)
+		goto out;
+	trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN;
+	/* Only trace if the calling function expects to. */
+	if (!ftrace_graph_entry(&trace)) {
+		current->curr_ret_stack--;
+		goto out;
+	}
+	parent = (unsigned long)return_to_handler;
+out:
+	return parent;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 0aa85ec..2a0a5e9 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -34,6 +34,18 @@ ftrace_caller:
 	larl	%r14,ftrace_dyn_func
 	lg	%r14,0(%r14)
 	basr	%r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl	ftrace_graph_caller
+ftrace_graph_caller:
+	# This unconditional branch gets runtime patched. Change only if
+	# you know what you are doing. See ftrace_enable_graph_caller().
+	j	0f
+	lg	%r2,272(%r15)
+	lg	%r3,168(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,168(%r15)
+0:
+#endif
 	aghi	%r15,160
 	lmg	%r2,%r5,32(%r15)
 	lg	%r14,112(%r15)
@@ -62,6 +74,12 @@ _mcount:
 	larl	%r14,ftrace_trace_function
 	lg	%r14,0(%r14)
 	basr	%r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	lg	%r2,272(%r15)
+	lg	%r3,168(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,168(%r15)
+#endif
 	aghi	%r15,160
 	lmg	%r2,%r5,32(%r15)
 	lg	%r14,112(%r15)
@@ -69,6 +87,22 @@ _mcount:
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+	.globl	return_to_handler
+return_to_handler:
+	stmg	%r2,%r5,32(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-160
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	brasl	%r14,ftrace_return_to_handler
+	aghi	%r15,160
+	lgr	%r14,%r2
+	lmg	%r2,%r5,32(%r15)
+	br	%r14
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 #else /* CONFIG_64BIT */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -96,6 +130,21 @@ ftrace_caller:
 	l	%r14,0b-0b(%r1)
 	l	%r14,0(%r14)
 	basr	%r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl	ftrace_graph_caller
+ftrace_graph_caller:
+	# This unconditional branch gets runtime patched. Change only if
+	# you know what you are doing. See ftrace_enable_graph_caller().
+	j	1f
+	bras	%r1,0f
+	.long	prepare_ftrace_return
+0:	l	%r2,152(%r15)
+	l	%r4,0(%r1)
+	l	%r3,100(%r15)
+	basr	%r14,%r4
+	st	%r2,100(%r15)
+1:
+#endif
 	ahi	%r15,96
 	l	%r14,56(%r15)
 3:	lm	%r2,%r5,16(%r15)
@@ -128,10 +177,40 @@ _mcount:
 	l	%r14,0b-0b(%r1)
 	l	%r14,0(%r14)
 	basr	%r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	bras	%r1,0f
+	.long	prepare_ftrace_return
+0:	l	%r2,152(%r15)
+	l	%r4,0(%r1)
+	l	%r3,100(%r15)
+	basr	%r14,%r4
+	st	%r2,100(%r15)
+#endif
 	ahi	%r15,96
 	l	%r14,56(%r15)
 3:	lm	%r2,%r5,16(%r15)
 	br	%r14
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+	.globl	return_to_handler
+return_to_handler:
+	stm	%r2,%r5,16(%r15)
+	st	%r14,56(%r15)
+	lr	%r0,%r15
+	ahi	%r15,-96
+	st	%r0,__SF_BACKCHAIN(%r15)
+	bras	%r1,0f
+	.long	ftrace_return_to_handler
+0:	l	%r2,0b-0b(%r1)
+	basr	%r14,%r2
+	lr	%r14,%r2
+	ahi	%r15,96
+	lm	%r2,%r5,16(%r15)
+	br	%r14
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 #endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index 6b0686d..0de305b 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/ftrace.h>
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
@@ -112,7 +113,7 @@ int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler,
 	return 0;
 }
 
-void do_extint(struct pt_regs *regs, unsigned short code)
+void __irq_entry do_extint(struct pt_regs *regs, unsigned short code)
 {
         ext_int_info_t *p;
         int index;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ad9a999..215330a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -70,7 +70,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
-unsigned long long sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
 	return ((get_clock_xt() - sched_clock_base_cc) * 125) >> 9;
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 89399b8..a53db23 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -34,6 +34,7 @@ SECTIONS
 		SCHED_TEXT
 		LOCK_TEXT
 		KPROBES_TEXT
+		IRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 	} :text = 0x0700
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 9889f18..5ec7789 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -12,6 +12,7 @@
 #define KMSG_COMPONENT "cio"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -626,8 +627,7 @@ out:
  *	    handlers).
  *
  */
-void
-do_IRQ (struct pt_regs *regs)
+void __irq_entry do_IRQ(struct pt_regs *regs)
 {
 	struct tpi_info *tpi_info;
 	struct subchannel *sch;
-- 
cgit v0.10.2


From 9bf1226b33dc2f94fc37ffd70ee161e2bda1ff5c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:47 +0200
Subject: [S390] ftrace: add system call tracer support

System call tracer support for s390.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9023cc9..99dc3de 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -84,6 +84,7 @@ config S390
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FTRACE_SYSCALLS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_DEFAULT_NO_SPIN_MUTEXES
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 2429b87..e0a73d3 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -12,6 +12,7 @@
 #ifndef _ASM_SYSCALL_H
 #define _ASM_SYSCALL_H	1
 
+#include <linux/sched.h>
 #include <asm/ptrace.h>
 
 static inline long syscall_get_nr(struct task_struct *task,
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 2f86653..925bcc6 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -92,6 +92,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing active */
 #define TIF_SECCOMP		10	/* secure computing */
+#define TIF_SYSCALL_FTRACE	11	/* ftrace syscall instrumentation */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling 
 					   TIF_NEED_RESCHED */
@@ -110,6 +111,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_SYSCALL_FTRACE	(1<<TIF_SYSCALL_FTRACE)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index db25cdc..c4c80a2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -53,7 +53,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
-_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | _TIF_SECCOMP>>8)
+_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
+		_TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
@@ -1108,6 +1109,7 @@ cleanup_io_leave_insn:
 
 		.section .rodata, "a"
 #define SYSCALL(esa,esame,emu)	.long esa
+	.globl	sys_call_table
 sys_call_table:
 #include "syscalls.S"
 #undef SYSCALL
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 3cec9b5..f6618e9 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -56,7 +56,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
-_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | _TIF_SECCOMP>>8)
+_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
+		_TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8)
 
 #define BASED(name) name-system_call(%r13)
 
@@ -1059,6 +1060,7 @@ cleanup_io_leave_insn:
 
 		.section .rodata, "a"
 #define SYSCALL(esa,esame,emu)	.long esame
+	.globl	sys_call_table
 sys_call_table:
 #include "syscalls.S"
 #undef SYSCALL
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c92a109..82ddfd3 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -12,6 +12,7 @@
 #include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <trace/syscall.h>
 #include <asm/lowcore.h>
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -202,3 +203,58 @@ out:
 	return parent;
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_FTRACE_SYSCALLS
+
+extern unsigned long __start_syscalls_metadata[];
+extern unsigned long __stop_syscalls_metadata[];
+extern unsigned int sys_call_table[];
+
+static struct syscall_metadata **syscalls_metadata;
+
+struct syscall_metadata *syscall_nr_to_meta(int nr)
+{
+	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
+		return NULL;
+
+	return syscalls_metadata[nr];
+}
+
+static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
+{
+	struct syscall_metadata *start;
+	struct syscall_metadata *stop;
+	char str[KSYM_SYMBOL_LEN];
+
+	start = (struct syscall_metadata *)__start_syscalls_metadata;
+	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
+	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
+
+	for ( ; start < stop; start++) {
+		if (start->name && !strcmp(start->name + 3, str + 3))
+			return start;
+	}
+	return NULL;
+}
+
+void arch_init_ftrace_syscalls(void)
+{
+	struct syscall_metadata *meta;
+	int i;
+	static atomic_t refs;
+
+	if (atomic_inc_return(&refs) != 1)
+		goto out;
+	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls,
+				    GFP_KERNEL);
+	if (!syscalls_metadata)
+		goto out;
+	for (i = 0; i < NR_syscalls; i++) {
+		meta = find_syscall_meta((unsigned long)sys_call_table[i]);
+		syscalls_metadata[i] = meta;
+	}
+	return;
+out:
+	atomic_dec(&refs);
+}
+#endif
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index b6fc1ae..490b399 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -37,6 +37,7 @@
 #include <linux/regset.h>
 #include <linux/tracehook.h>
 #include <linux/seccomp.h>
+#include <trace/syscall.h>
 #include <asm/compat.h>
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -661,6 +662,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		ret = -1;
 	}
 
+	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+		ftrace_syscall_enter(regs);
+
 	if (unlikely(current->audit_context))
 		audit_syscall_entry(is_compat_task() ?
 					AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
@@ -676,6 +680,9 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
 				   regs->gprs[2]);
 
+	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+		ftrace_syscall_exit(regs);
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
 }
-- 
cgit v0.10.2


From fc39453debac927a3ba477bafbc6c18c7166ae78 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:48 +0200
Subject: [S390] wire up sys_rt_tgsigqueueinfo

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index f0f19e6..9055723 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -267,7 +267,8 @@
 #define __NR_epoll_create1	327
 #define	__NR_preadv		328
 #define	__NR_pwritev		329
-#define NR_syscalls 330
+#define __NR_rt_tgsigqueueinfo	330
+#define NR_syscalls 331
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index fb38af6..d2e7fbf 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1823,3 +1823,11 @@ compat_sys_pwritev_wrapper:
 	llgfr	%r5,%r5			# u32
 	llgfr	%r6,%r6			# u32
 	jg	compat_sys_pwritev	# branch to system call
+
+	.globl	compat_sys_rt_tgsigqueueinfo_wrapper
+compat_sys_rt_tgsigqueueinfo_wrapper:
+	lgfr	%r2,%r2			# compat_pid_t
+	lgfr	%r3,%r3			# compat_pid_t
+	lgfr	%r4,%r4			# int
+	llgtr	%r5,%r5			# struct compat_siginfo *
+	jg	compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2c7739f..82f89ef 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -338,3 +338,4 @@ SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper)
 SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
 SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
 SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
+SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
-- 
cgit v0.10.2


From 310d6b671588dd7695cbc0d09d02e41d94a42bed Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 12 Jun 2009 10:26:49 +0200
Subject: [S390] wire up sys_perf_counter_open

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 9055723..c80602d 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -268,7 +268,8 @@
 #define	__NR_preadv		328
 #define	__NR_pwritev		329
 #define __NR_rt_tgsigqueueinfo	330
-#define NR_syscalls 331
+#define __NR_perf_counter_open	331
+#define NR_syscalls 332
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index d2e7fbf..88a83366 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1831,3 +1831,12 @@ compat_sys_rt_tgsigqueueinfo_wrapper:
 	lgfr	%r4,%r4			# int
 	llgtr	%r5,%r5			# struct compat_siginfo *
 	jg	compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call
+
+	.globl	sys_perf_counter_open_wrapper
+sys_perf_counter_open_wrapper:
+	llgtr	%r2,%r2			# const struct perf_counter_attr *
+	lgfr	%r3,%r3			# pid_t
+	lgfr	%r4,%r4			# int
+	lgfr	%r5,%r5			# int
+	llgfr	%r6,%r6			# unsigned long
+	jg	sys_perf_counter_open	# branch to system call
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 82f89ef..ad1acd2 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -339,3 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
 SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
 SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
 SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
+SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper)
-- 
cgit v0.10.2


From 6ab56315a36e42e90ad7173aa8b3bbd1467d1fea Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 12 Jun 2009 09:29:52 +0100
Subject: kmemleak: Remove the kmemleak.h include in drivers/char/vt.c

This file is no longer annotated for false positives but the kmemleak.h
include was still present.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Alan Cox <alan.cox@linux.intel.com>

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index de9ebee..c796a86 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -103,7 +103,6 @@
 #include <linux/io.h>
 #include <asm/system.h>
 #include <linux/uaccess.h>
-#include <linux/kmemleak.h>
 
 #define MAX_NR_CON_DRIVER 16
 
-- 
cgit v0.10.2


From 349ebbcc26258f40da5534a586233e7d6233071c Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 15 Apr 2009 08:48:08 +0000
Subject: Blackfin: add comment for anomaly 05000171 to init code

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index a7d7b2d..3498a90 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -1116,6 +1116,9 @@ int __init init_arch_irq(void)
 	    IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 |
 	    IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW;
 
+	/* This implicitly covers ANOMALY_05000171
+	 * Boot-ROM code modifies SICA_IWRx wakeup registers
+	 */
 #ifdef SIC_IWR0
 	bfin_write_SIC_IWR0(IWR_DISABLE_ALL);
 # ifdef SIC_IWR1
-- 
cgit v0.10.2


From c8342f872d7a3967fd09db54fece3f6c38d834df Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 31 Mar 2009 00:18:35 +0000
Subject: Blackfin: add some help text to the EBIU_AMBCTL settings

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 3640cdc..6b97aa8 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -1011,21 +1011,34 @@ endmenu
 
 menu "EBIU_AMBCTL Control"
 config BANK_0
-	hex "Bank 0"
+	hex "Bank 0 (AMBCTL0.L)"
 	default 0x7BB0
+	help
+	  These are the low 16 bits of the EBIU_AMBCTL0 MMR which are
+	  used to control the Asynchronous Memory Bank 0 settings.
 
 config BANK_1
-	hex "Bank 1"
+	hex "Bank 1 (AMBCTL0.H)"
 	default 0x7BB0
 	default 0x5558 if BF54x
+	help
+	  These are the high 16 bits of the EBIU_AMBCTL0 MMR which are
+	  used to control the Asynchronous Memory Bank 1 settings.
 
 config BANK_2
-	hex "Bank 2"
+	hex "Bank 2 (AMBCTL1.L)"
 	default 0x7BB0
+	help
+	  These are the low 16 bits of the EBIU_AMBCTL1 MMR which are
+	  used to control the Asynchronous Memory Bank 2 settings.
 
 config BANK_3
-	hex "Bank 3"
+	hex "Bank 3 (AMBCTL1.H)"
 	default 0x99B3
+	help
+	  These are the high 16 bits of the EBIU_AMBCTL1 MMR which are
+	  used to control the Asynchronous Memory Bank 3 settings.
+
 endmenu
 
 config EBIU_MBSCTLVAL
-- 
cgit v0.10.2


From ffdf3ec8060c1089f92468ade63981a217b5aace Mon Sep 17 00:00:00 2001
From: Yi Li <yi.li@analog.com>
Date: Tue, 31 Mar 2009 10:36:51 +0000
Subject: Blackfin: bf527-ezkit: add support for mmc-spi

Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index d086411..9ab8d8c 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -509,6 +509,13 @@ static struct bfin5xx_spi_chip ad9960_spi_chip_info = {
 };
 #endif
 
+#if defined(CONFIG_MMC_SPI) || defined(CONFIG_MMC_SPI_MODULE)
+static struct bfin5xx_spi_chip  mmc_spi_chip_info = {
+	.enable_dma = 0,
+	.bits_per_word = 8,
+};
+#endif
+
 #if defined(CONFIG_PBX)
 static struct bfin5xx_spi_chip spi_si3xxx_chip_info = {
 	.ctl_reg	= 0x4, /* send zero */
@@ -624,6 +631,17 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 		.controller_data = &ad9960_spi_chip_info,
 	},
 #endif
+#if defined(CONFIG_MMC_SPI) || defined(CONFIG_MMC_SPI_MODULE)
+	{
+		.modalias = "mmc_spi",
+		.max_speed_hz = 20000000,     /* max spi clock (SCK) speed in HZ */
+		.bus_num = 0,
+		.chip_select = 3,
+		.controller_data = &mmc_spi_chip_info,
+		.mode = SPI_MODE_0,
+	},
+#endif
+
 #if defined(CONFIG_PBX)
 	{
 		.modalias = "fxs-spi",
-- 
cgit v0.10.2


From 8d5c2f03bb6302a2aa5a4d0d28f59d01e2eea6c1 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Mon, 30 Mar 2009 09:07:26 +0000
Subject: Blackfin: Allow bf548 ATAPI to be routed to GPIO

By default, it is routed to async memory address. In GPIO case,
GPIO peripheral PINs should be requested in advance.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-bf548/Kconfig b/arch/blackfin/mach-bf548/Kconfig
index dcf6571..8d24eaa 100644
--- a/arch/blackfin/mach-bf548/Kconfig
+++ b/arch/blackfin/mach-bf548/Kconfig
@@ -11,6 +11,13 @@ config DEB_DMA_URGENT
 	help
 	  Treat any DEB1, DEB2 and DEB3 request as Urgent
 
+config BF548_ATAPI_ALTERNATIVE_PORT
+	bool "BF548 ATAPI alternative port via GPIO"
+	help
+	  BF548 ATAPI data and address PINs can be routed through
+	  async address or GPIO port F and G. Select y to route it
+	  to GPIO.
+
 comment "Interrupt Priority Assignment"
 menu "Priority"
 
diff --git a/arch/blackfin/mach-bf548/include/mach/portmux.h b/arch/blackfin/mach-bf548/include/mach/portmux.h
index ffb1d0a..ce372ba 100644
--- a/arch/blackfin/mach-bf548/include/mach/portmux.h
+++ b/arch/blackfin/mach-bf548/include/mach/portmux.h
@@ -167,22 +167,42 @@
 #define P_PPI0_D13	(P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(0))
 #define P_PPI0_D14	(P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(0))
 #define P_PPI0_D15	(P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(0))
-#define P_ATAPI_D0A	(P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(1))
-#define P_ATAPI_D1A	(P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(1))
-#define P_ATAPI_D2A	(P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(1))
-#define P_ATAPI_D3A	(P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(1))
-#define P_ATAPI_D4A	(P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(1))
-#define P_ATAPI_D5A	(P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(1))
-#define P_ATAPI_D6A	(P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(1))
-#define P_ATAPI_D7A	(P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(1))
-#define P_ATAPI_D8A	(P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(1))
-#define P_ATAPI_D9A	(P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(1))
-#define P_ATAPI_D10A	(P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(1))
-#define P_ATAPI_D11A	(P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(1))
-#define P_ATAPI_D12A	(P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(1))
-#define P_ATAPI_D13A	(P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(1))
-#define P_ATAPI_D14A	(P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(1))
-#define P_ATAPI_D15A	(P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(1))
+
+#ifdef CONFIG_BF548_ATAPI_ALTERNATIVE_PORT
+# define P_ATAPI_D0A	(P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(1))
+# define P_ATAPI_D1A	(P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(1))
+# define P_ATAPI_D2A	(P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(1))
+# define P_ATAPI_D3A	(P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(1))
+# define P_ATAPI_D4A	(P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(1))
+# define P_ATAPI_D5A	(P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(1))
+# define P_ATAPI_D6A	(P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(1))
+# define P_ATAPI_D7A	(P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(1))
+# define P_ATAPI_D8A	(P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(1))
+# define P_ATAPI_D9A	(P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(1))
+# define P_ATAPI_D10A	(P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(1))
+# define P_ATAPI_D11A	(P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(1))
+# define P_ATAPI_D12A	(P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(1))
+# define P_ATAPI_D13A	(P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(1))
+# define P_ATAPI_D14A	(P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(1))
+# define P_ATAPI_D15A	(P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(1))
+#else
+# define P_ATAPI_D0A	(P_DONTCARE)
+# define P_ATAPI_D1A	(P_DONTCARE)
+# define P_ATAPI_D2A	(P_DONTCARE)
+# define P_ATAPI_D3A	(P_DONTCARE)
+# define P_ATAPI_D4A	(P_DONTCARE)
+# define P_ATAPI_D5A	(P_DONTCARE)
+# define P_ATAPI_D6A	(P_DONTCARE)
+# define P_ATAPI_D7A	(P_DONTCARE)
+# define P_ATAPI_D8A	(P_DONTCARE)
+# define P_ATAPI_D9A	(P_DONTCARE)
+# define P_ATAPI_D10A	(P_DONTCARE)
+# define P_ATAPI_D11A	(P_DONTCARE)
+# define P_ATAPI_D12A	(P_DONTCARE)
+# define P_ATAPI_D13A	(P_DONTCARE)
+# define P_ATAPI_D14A	(P_DONTCARE)
+# define P_ATAPI_D15A	(P_DONTCARE)
+#endif
 
 #define P_PPI0_CLK	(P_DEFINED | P_IDENT(GPIO_PG0) | P_FUNCT(0))
 #define P_PPI0_FS1	(P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(0))
@@ -200,9 +220,15 @@
 #define P_CAN0_RX	(P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(0))
 #define P_CAN1_TX	(P_DEFINED | P_IDENT(GPIO_PG14) | P_FUNCT(0))
 #define P_CAN1_RX	(P_DEFINED | P_IDENT(GPIO_PG15) | P_FUNCT(0))
-#define P_ATAPI_A0A	(P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(1))
-#define P_ATAPI_A1A	(P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(1))
-#define P_ATAPI_A2A	(P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(1))
+#ifdef CONFIG_BF548_ATAPI_ALTERNATIVE_PORT
+# define P_ATAPI_A0A	(P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(1))
+# define P_ATAPI_A1A	(P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(1))
+# define P_ATAPI_A2A	(P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(1))
+#else
+# define P_ATAPI_A0A	(P_DONTCARE)
+# define P_ATAPI_A1A	(P_DONTCARE)
+# define P_ATAPI_A2A	(P_DONTCARE)
+#endif
 #define P_HOST_CE	(P_DEFINED | P_IDENT(GPIO_PG5) | P_FUNCT(1))
 #define P_HOST_RD	(P_DEFINED | P_IDENT(GPIO_PG6) | P_FUNCT(1))
 #define P_HOST_WR	(P_DEFINED | P_IDENT(GPIO_PG7) | P_FUNCT(1))
-- 
cgit v0.10.2


From b8d0c778e652d23750cb1af9848408d620cbc425 Mon Sep 17 00:00:00 2001
From: Robin Getz <rgetz@blackfin.uclinux.org>
Date: Tue, 31 Mar 2009 13:40:52 +0000
Subject: Blackfin: allow scheduler functions to be placed into L1

Signed-off-by: Robin Getz <rgetz@blackfin.uclinux.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 27952ae..8b67167 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -50,7 +50,9 @@ SECTIONS
 		_text = .;
 		__stext = .;
 		TEXT_TEXT
+#ifndef CONFIG_SCHEDULE_L1
 		SCHED_TEXT
+#endif
 		LOCK_TEXT
 		KPROBES_TEXT
 		*(.text.*)
@@ -180,6 +182,9 @@ SECTIONS
 		. = ALIGN(4);
 		__stext_l1 = .;
 		*(.l1.text)
+#ifdef CONFIG_SCHEDULE_L1
+		SCHED_TEXT
+#endif
 		. = ALIGN(4);
 		__etext_l1 = .;
 	}
-- 
cgit v0.10.2


From f82e0a0c67621df83458753aef580a3508d5428e Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Wed, 8 Apr 2009 08:30:22 +0000
Subject: Blackfin: fix link failure due to CONFIG_EXCEPTION_L1_SCRATCH

Move exception stack mess from entry.S to init.c to fix link failure when
CONFIG_EXCEPTION_L1_SCRATCH is in use.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 6b97aa8..672b7b0 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -808,7 +808,7 @@ config APP_STACK_L1
 config EXCEPTION_L1_SCRATCH
 	bool "Locate exception stack in L1 Scratch Memory"
 	default n
-	depends on !APP_STACK_L1 && !SYSCALL_TAB_L1
+	depends on !APP_STACK_L1
 	help
 	  Whenever an exception occurs, use the L1 Scratch memory for
 	  stack storage.  You cannot place the stacks of FLAT binaries
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index a063a43..f0636fd 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -36,7 +36,6 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/unistd.h>
-#include <linux/threads.h>
 #include <asm/blackfin.h>
 #include <asm/errno.h>
 #include <asm/fixed_code.h>
@@ -1588,19 +1587,3 @@ ENTRY(_sys_call_table)
 	.long _sys_ni_syscall
 	.endr
 END(_sys_call_table)
-
-#ifdef CONFIG_EXCEPTION_L1_SCRATCH
-/* .section .l1.bss.scratch */
-.set _exception_stack_top, L1_SCRATCH_START + L1_SCRATCH_LENGTH
-#else
-#ifdef CONFIG_SYSCALL_TAB_L1
-.section .l1.bss
-#else
-.bss
-#endif
-ENTRY(_exception_stack)
-	.rept 1024 * NR_CPUS
-	.long 0
-	.endr
-_exception_stack_top:
-#endif
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 9c3629b..00543c8 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -52,9 +52,14 @@ static unsigned long empty_bad_page_table;
 
 static unsigned long empty_bad_page;
 
-unsigned long empty_zero_page;
+static unsigned long empty_zero_page;
 
-extern unsigned long exception_stack[NR_CPUS][1024];
+#ifndef CONFIG_EXCEPTION_L1_SCRATCH
+#if defined CONFIG_SYSCALL_TAB_L1
+__attribute__((l1_data))
+#endif
+static unsigned long exception_stack[NR_CPUS][1024];
+#endif
 
 struct blackfin_pda cpu_pda[NR_CPUS];
 EXPORT_SYMBOL(cpu_pda);
@@ -117,7 +122,12 @@ asmlinkage void __init init_pda(void)
 	cpu_pda[0].next = &cpu_pda[1];
 	cpu_pda[1].next = &cpu_pda[0];
 
+#ifdef CONFIG_EXCEPTION_L1_SCRATCH
+	cpu_pda[cpu].ex_stack = (unsigned long *)(L1_SCRATCH_START + \
+					L1_SCRATCH_LENGTH);
+#else
 	cpu_pda[cpu].ex_stack = exception_stack[cpu + 1];
+#endif
 
 #ifdef CONFIG_SMP
 	cpu_pda[cpu].imask = 0x1f;
-- 
cgit v0.10.2


From 5d89137a17ca804ee60077f5d4ad8d7ca60f0614 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 10 Apr 2009 20:52:08 +0000
Subject: Blackfin: fix data cache flushing when doing icache flushing

Make sure we flush all data caches and their write buffers before flushing
icache, otherwise random edge cases could crop up where stale data is read
into icache from external memory.  As fallout, punt the combined icache +
dcache flush function since we cannot safely do them back to back -- the
SSYNC is needed between the dcache flush and the icache flush.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/include/asm/cacheflush.h b/arch/blackfin/include/asm/cacheflush.h
index 1b040f5..d7726ab 100644
--- a/arch/blackfin/include/asm/cacheflush.h
+++ b/arch/blackfin/include/asm/cacheflush.h
@@ -30,7 +30,8 @@
 #ifndef _BLACKFIN_CACHEFLUSH_H
 #define _BLACKFIN_CACHEFLUSH_H
 
-extern void blackfin_icache_dcache_flush_range(unsigned long start_address, unsigned long end_address);
+#include <asm/blackfin.h>	/* for SSYNC() */
+
 extern void blackfin_icache_flush_range(unsigned long start_address, unsigned long end_address);
 extern void blackfin_dcache_flush_range(unsigned long start_address, unsigned long end_address);
 extern void blackfin_dcache_invalidate_range(unsigned long start_address, unsigned long end_address);
@@ -54,32 +55,28 @@ extern void blackfin_invalidate_entire_dcache(void);
 
 static inline void flush_icache_range(unsigned start, unsigned end)
 {
-#if defined(CONFIG_BFIN_DCACHE) && defined(CONFIG_BFIN_ICACHE)
-
-# if defined(CONFIG_BFIN_WT)
-	blackfin_icache_flush_range((start), (end));
-	flush_icache_range_others(start, end);
-# else
-	blackfin_icache_dcache_flush_range((start), (end));
-# endif
-
-#else
+#if defined(CONFIG_BFIN_WB)
+	blackfin_dcache_flush_range(start, end);
+#endif
 
-# if defined(CONFIG_BFIN_ICACHE)
-	blackfin_icache_flush_range((start), (end));
+	/* Make sure all write buffers in the data side of the core
+	 * are flushed before trying to invalidate the icache.  This
+	 * needs to be after the data flush and before the icache
+	 * flush so that the SSYNC does the right thing in preventing
+	 * the instruction prefetcher from hitting things in cached
+	 * memory at the wrong time -- it runs much further ahead than
+	 * the pipeline.
+	 */
+	SSYNC();
+#if defined(CONFIG_BFIN_ICACHE)
+	blackfin_icache_flush_range(start, end);
 	flush_icache_range_others(start, end);
-# endif
-# if defined(CONFIG_BFIN_DCACHE)
-	blackfin_dcache_flush_range((start), (end));
-# endif
-
 #endif
 }
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
 do { memcpy(dst, src, len);						\
      flush_icache_range((unsigned) (dst), (unsigned) (dst) + (len));	\
-     flush_icache_range_others((unsigned long) (dst), (unsigned long) (dst) + (len));\
 } while (0)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)	memcpy(dst, src, len)
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index 01f917d..53e893f 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -16,7 +16,6 @@ EXPORT_SYMBOL(bfin_return_from_exception);
 
 /* All the Blackfin cache functions: mach-common/cache.S */
 EXPORT_SYMBOL(blackfin_dcache_invalidate_range);
-EXPORT_SYMBOL(blackfin_icache_dcache_flush_range);
 EXPORT_SYMBOL(blackfin_icache_flush_range);
 EXPORT_SYMBOL(blackfin_dcache_flush_range);
 EXPORT_SYMBOL(blackfin_dflush_page);
diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index aa0648c..c295e8f 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -80,22 +80,6 @@ ENTRY(_blackfin_icache_flush_range)
 	do_flush IFLUSH, , nop
 ENDPROC(_blackfin_icache_flush_range)
 
-/* Flush all cache lines assocoiated with this area of memory. */
-ENTRY(_blackfin_icache_dcache_flush_range)
-/*
- * Walkaround to avoid loading wrong instruction after invalidating icache
- * and following sequence is met.
- *
- * 1) One instruction address is cached in the instruction cache.
- * 2) This instruction in SDRAM is changed.
- * 3) IFLASH[P0] is executed only once in blackfin_icache_flush_range().
- * 4) This instruction is executed again, but the old one is loaded.
- */
-	P0 = R0;
-	IFLUSH[P0];
-	do_flush FLUSH, IFLUSH
-ENDPROC(_blackfin_icache_dcache_flush_range)
-
 /* Throw away all D-cached data in specified region without any obligation to
  * write them back.  Since the Blackfin ISA does not have an "invalidate"
  * instruction, we use flush/invalidate.  Perhaps as a speed optimization we
-- 
cgit v0.10.2


From 4b402e3a54a46505692ec8215231e2ede46a4aeb Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Mon, 20 Apr 2009 09:20:58 +0000
Subject: Blackfin: fix bug found by traps test case 21

The traps test case 21 "exception 0x3f: l1_instruction_access" would make
the kernel panic on BF533's because we end up calling show_stack()
infinitely.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mm/isram-driver.c b/arch/blackfin/mm/isram-driver.c
index 22913e7..c080e70 100644
--- a/arch/blackfin/mm/isram-driver.c
+++ b/arch/blackfin/mm/isram-driver.c
@@ -125,7 +125,7 @@ static bool isram_check_addr(const void *addr, size_t n)
 {
 	if ((addr >= (void *)L1_CODE_START) &&
 	    (addr < (void *)(L1_CODE_START + L1_CODE_LENGTH))) {
-		if ((addr + n) >= (void *)(L1_CODE_START + L1_CODE_LENGTH)) {
+		if ((addr + n) > (void *)(L1_CODE_START + L1_CODE_LENGTH)) {
 			show_stack(NULL, NULL);
 			printk(KERN_ERR "isram_memcpy: copy involving %p length "
 					"(%zu) too long\n", addr, n);
-- 
cgit v0.10.2


From f4290e81f6035b49d4a03706081b2c5f58906386 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Date: Sun, 12 Apr 2009 20:03:30 +0000
Subject: Blackfin: fix parentheses balance and convert some tab/space mixing

Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
index 241725b..7a5f74c 100644
--- a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
+++ b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
@@ -67,14 +67,14 @@
 #define bfin_write_SIC_ISR0(val)       bfin_write32(SIC_ISR0, val)
 #define bfin_read_SIC_ISR1()           bfin_read32(SIC_ISR1)
 #define bfin_write_SIC_ISR1(val)       bfin_write32(SIC_ISR1, val)
-#define bfin_read_SIC_ISR(x)	       bfin_read32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0))
+#define bfin_read_SIC_ISR(x)           bfin_read32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0))
 #define bfin_write_SIC_ISR(x, val)     bfin_write32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0), val)
 #define bfin_read_SIC_IWR0()           bfin_read32(SIC_IWR0)
 #define bfin_write_SIC_IWR0(val)       bfin_write32(SIC_IWR0, val)
 #define bfin_read_SIC_IWR1()           bfin_read32(SIC_IWR1)
 #define bfin_write_SIC_IWR1(val)       bfin_write32(SIC_IWR1, val)
-#define bfin_read_SIC_IWR(x)	       bfin_read32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0))
-#define bfin_write_SIC_IWR(x, val)     bfin_write32((SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0), val)
+#define bfin_read_SIC_IWR(x)           bfin_read32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0))
+#define bfin_write_SIC_IWR(x, val)     bfin_write32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0), val)
 #define bfin_read_SIC_IAR0()           bfin_read32(SIC_IAR0)
 #define bfin_write_SIC_IAR0(val)       bfin_write32(SIC_IAR0, val)
 #define bfin_read_SIC_IAR1()           bfin_read32(SIC_IAR1)
-- 
cgit v0.10.2


From f75196c404246b371182bb7554eab695619cb36f Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 1 Apr 2009 12:25:58 +0000
Subject: Blackfin: fix warnings with I/O port macros

The I/O port functions take ints, so we need to cast them up before
passing to our read/write funcs to avoid ugly messes of warnings.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h
index 63b2d8c..3022b5c 100644
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -80,19 +80,22 @@ static inline unsigned int readl(const volatile void __iomem *addr)
 #define memcpy_fromio(a,b,c)	memcpy((a),(void *)(b),(c))
 #define memcpy_toio(a,b,c)	memcpy((void *)(a),(b),(c))
 
-#define inb(addr)    readb(addr)
-#define inw(addr)    readw(addr)
-#define inl(addr)    readl(addr)
-#define outb(x,addr) ((void) writeb(x,addr))
-#define outw(x,addr) ((void) writew(x,addr))
-#define outl(x,addr) ((void) writel(x,addr))
-
-#define inb_p(addr)    inb(addr)
-#define inw_p(addr)    inw(addr)
-#define inl_p(addr)    inl(addr)
-#define outb_p(x,addr) outb(x,addr)
-#define outw_p(x,addr) outw(x,addr)
-#define outl_p(x,addr) outl(x,addr)
+/* Convert "I/O port addresses" to actual addresses.  i.e. ugly casts. */
+#define __io(port) ((void *)(unsigned long)(port))
+
+#define inb(port)    readb(__io(port))
+#define inw(port)    readw(__io(port))
+#define inl(port)    readl(__io(port))
+#define outb(x,port) writeb(x,__io(port))
+#define outw(x,port) writew(x,__io(port))
+#define outl(x,port) writel(x,__io(port))
+
+#define inb_p(port)    inb(__io(port))
+#define inw_p(port)    inw(__io(port))
+#define inl_p(port)    inl(__io(port))
+#define outb_p(x,port) outb(x,__io(port))
+#define outw_p(x,port) outw(x,__io(port))
+#define outl_p(x,port) outl(x,__io(port))
 
 #define ioread8_rep(a,d,c)	readsb(a,d,c)
 #define ioread16_rep(a,d,c)	readsw(a,d,c)
-- 
cgit v0.10.2


From 51387009bd4d9fdbc9d4a91e45bd96cc8e3065de Mon Sep 17 00:00:00 2001
From: Philippe Gerum <rpm@xenomai.org>
Date: Wed, 8 Apr 2009 07:41:55 +0000
Subject: Blackfin: merge Philippe's recent ipipe patch

ipipe-2.6.28.9-blackfin-git95aafe6.patch

Singed-off-by: Philippe Gerum <rpm@xenomai.org>
Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/ipipe.h b/arch/blackfin/include/asm/ipipe.h
index 343b563..51d0bf5 100644
--- a/arch/blackfin/include/asm/ipipe.h
+++ b/arch/blackfin/include/asm/ipipe.h
@@ -35,10 +35,10 @@
 #include <asm/atomic.h>
 #include <asm/traps.h>
 
-#define IPIPE_ARCH_STRING     "1.9-00"
+#define IPIPE_ARCH_STRING     "1.9-01"
 #define IPIPE_MAJOR_NUMBER    1
 #define IPIPE_MINOR_NUMBER    9
-#define IPIPE_PATCH_NUMBER    0
+#define IPIPE_PATCH_NUMBER    1
 
 #ifdef CONFIG_SMP
 #error "I-pipe/blackfin: SMP not implemented"
diff --git a/arch/blackfin/kernel/ipipe.c b/arch/blackfin/kernel/ipipe.c
index a5de8d4..5fc4248 100644
--- a/arch/blackfin/kernel/ipipe.c
+++ b/arch/blackfin/kernel/ipipe.c
@@ -167,7 +167,7 @@ int __ipipe_check_root(void)
 void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	int prio = desc->ic_prio;
+	int prio = __ipipe_get_irq_priority(irq);
 
 	desc->depth = 0;
 	if (ipd != &ipipe_root &&
@@ -178,8 +178,7 @@ EXPORT_SYMBOL(__ipipe_enable_irqdesc);
 
 void __ipipe_disable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	int prio = desc->ic_prio;
+	int prio = __ipipe_get_irq_priority(irq);
 
 	if (ipd != &ipipe_root &&
 	    atomic_dec_and_test(&__ipipe_irq_lvdepth[prio]))
@@ -310,12 +309,16 @@ int ipipe_trigger_irq(unsigned irq)
 
 asmlinkage void __ipipe_sync_root(void)
 {
+	void (*irq_tail_hook)(void) = (void (*)(void))__ipipe_irq_tail_hook;
 	unsigned long flags;
 
 	BUG_ON(irqs_disabled());
 
 	local_irq_save_hw(flags);
 
+	if (irq_tail_hook)
+		irq_tail_hook();
+
 	clear_thread_flag(TIF_IRQ_SYNC);
 
 	if (ipipe_root_cpudom_var(irqpend_himask) != 0)
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 3498a90..389c5e8 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -1139,13 +1139,6 @@ int __init init_arch_irq(void)
 	bfin_write_SIC_IWR(IWR_DISABLE_ALL);
 #endif
 
-#ifdef CONFIG_IPIPE
-	for (irq = 0; irq < NR_IRQS; irq++) {
-		struct irq_desc *desc = irq_to_desc(irq);
-		desc->ic_prio = __ipipe_get_irq_priority(irq);
-	}
-#endif /* CONFIG_IPIPE */
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From 648882d940a1f84cbf11418ae6e405ef42a66855 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 21 Apr 2009 12:05:50 +0000
Subject: Blackfin: fix up PATA resource handling in bf537-stamp

Make sure the addresses declared match reality, and make the PATA IRQ code
optional.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 0572926..9bd0a80 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -1358,16 +1358,18 @@ static struct resource bfin_pata_resources[] = {
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 0,
 };
-
+/* CompactFlash Storage Card Memory Mapped Adressing
+ * /REG = A11 = 1
+ */
 static struct resource bfin_pata_resources[] = {
 	{
-		.start = 0x20211820,
-		.end = 0x2021183F,
+		.start = 0x20211800,
+		.end = 0x20211807,
 		.flags = IORESOURCE_MEM,
 	},
 	{
-		.start = 0x2021181C,
-		.end = 0x2021181F,
+		.start = 0x2021180E,	/* Device Ctl */
+		.end = 0x2021180E,
 		.flags = IORESOURCE_MEM,
 	},
 };
@@ -1527,7 +1529,8 @@ static int __init stamp_init(void)
 	platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices));
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
 
-#if defined(CONFIG_PATA_PLATFORM) || defined(CONFIG_PATA_PLATFORM_MODULE)
+#if (defined(CONFIG_PATA_PLATFORM) || defined(CONFIG_PATA_PLATFORM_MODULE)) \
+	 && defined(PATA_INT)
 	irq_desc[PATA_INT].status |= IRQ_NOAUTOEN;
 #endif
 
-- 
cgit v0.10.2


From a413647bb5bbe5414cd68332ff77588db09d10be Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 8 May 2009 07:40:25 +0000
Subject: Blackfin: pull updated anomaly lists from toolchain

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-bf518/include/mach/anomaly.h b/arch/blackfin/mach-bf518/include/mach/anomaly.h
index c847bb1..b69bd9a 100644
--- a/arch/blackfin/mach-bf518/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf518/include/mach/anomaly.h
@@ -6,14 +6,19 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision B, 02/03/2009; ADSP-BF512/BF514/BF516/BF518 Blackfin Processor Anomaly List
  */
 
+/* We plan on not supporting 0.0 silicon, but 0.1 isn't out yet - sorry */
+#if __SILICON_REVISION__ < 0
+# error will not work on BF518 silicon version
+#endif
+
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
@@ -47,7 +52,7 @@
 #define ANOMALY_05000435 (1)
 /* PORTx_DRIVE and PORTx_HYSTERESIS Registers Read Back Incorrect Values */
 #define ANOMALY_05000438 (1)
-/* Preboot Cannot be Used to Program the PLL_DIV Register */
+/* Preboot Cannot be Used to Alter the PLL_DIV Register */
 #define ANOMALY_05000439 (1)
 /* bfrom_SysControl() Cannot be Used to Write the PLL_DIV Register */
 #define ANOMALY_05000440 (1)
@@ -61,32 +66,56 @@
 #define ANOMALY_05000453 (1)
 /* PPI_FS3 is Driven One Half Cycle Later Than PPI Data */
 #define ANOMALY_05000455 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000119 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
 #define ANOMALY_05000244 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
 #define ANOMALY_05000261 (0)
 #define ANOMALY_05000263 (0)
 #define ANOMALY_05000266 (0)
 #define ANOMALY_05000273 (0)
+#define ANOMALY_05000274 (0)
 #define ANOMALY_05000278 (0)
 #define ANOMALY_05000285 (0)
+#define ANOMALY_05000287 (0)
+#define ANOMALY_05000301 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000307 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000312 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (0)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (0)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf527/include/mach/anomaly.h b/arch/blackfin/mach-bf527/include/mach/anomaly.h
index df6808d..c84ddea 100644
--- a/arch/blackfin/mach-bf527/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf527/include/mach/anomaly.h
@@ -6,14 +6,19 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
- *  - Revision B, 08/12/2008; ADSP-BF526 Blackfin Processor Anomaly List
- *  - Revision E, 08/18/2008; ADSP-BF527 Blackfin Processor Anomaly List
+/* This file should be up to date with:
+ *  - Revision C, 03/13/2009; ADSP-BF526 Blackfin Processor Anomaly List
+ *  - Revision F, 03/03/2009; ADSP-BF527 Blackfin Processor Anomaly List
  */
 
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
+/* We do not support old silicon - sorry */
+#if __SILICON_REVISION__ < 0
+# error will not work on BF526/BF527 silicon version
+#endif
+
 #if defined(__ADSPBF522__) || defined(__ADSPBF524__) || defined(__ADSPBF526__)
 # define ANOMALY_BF526 1
 #else
@@ -25,158 +30,203 @@
 # define ANOMALY_BF527 0
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+#define _ANOMALY_BF526(rev526) (ANOMALY_BF526 && __SILICON_REVISION__ rev526)
+#define _ANOMALY_BF527(rev527) (ANOMALY_BF527 && __SILICON_REVISION__ rev527)
+#define _ANOMALY_BF526_BF527(rev526, rev527) (_ANOMALY_BF526(rev526) || _ANOMALY_BF527(rev527))
+
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)	/* note: brokenness is noted in documentation, not anomaly sheet */
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
+/* Incorrect Timer Pulse Width in Single-Shot PWM_OUT Mode with External Clock */
+#define ANOMALY_05000254 (1)
 /* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
 #define ANOMALY_05000265 (1)
 /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
 /* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
-#define ANOMALY_05000313 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000313 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Incorrect Access of OTP_STATUS During otp_write() Function */
-#define ANOMALY_05000328 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000328 (_ANOMALY_BF527(< 2))
+/* Host DMA Boot Modes Are Not Functional */
+#define ANOMALY_05000330 (__SILICON_REVISION__ < 2)
 /* Disallowed Configuration Prevents Subsequent Allowed Configuration on Host DMA Port */
-#define ANOMALY_05000337 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000337 (_ANOMALY_BF527(< 2))
 /* Ethernet MAC MDIO Reads Do Not Meet IEEE Specification */
-#define ANOMALY_05000341 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000341 (_ANOMALY_BF527(< 2))
 /* TWI May Not Operate Correctly Under Certain Signal Termination Conditions */
-#define ANOMALY_05000342 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000342 (_ANOMALY_BF527(< 2))
 /* USB Calibration Value Is Not Initialized */
-#define ANOMALY_05000346 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000346 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* USB Calibration Value to use */
 #define ANOMALY_05000346_value 0xE510
 /* Preboot Routine Incorrectly Alters Reset Value of USB Register */
-#define ANOMALY_05000347 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000347 (_ANOMALY_BF527(< 2))
 /* Security Features Are Not Functional */
-#define ANOMALY_05000348 (ANOMALY_BF527 && __SILICON_REVISION__ < 1)
+#define ANOMALY_05000348 (_ANOMALY_BF527(< 1))
 /* bfrom_SysControl() Firmware Function Performs Improper System Reset */
-#define ANOMALY_05000353 (ANOMALY_BF526)
+#define ANOMALY_05000353 (_ANOMALY_BF526(< 1))
 /* Regulator Programming Blocked when Hibernate Wakeup Source Remains Active */
-#define ANOMALY_05000355 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000355 (_ANOMALY_BF527(< 2))
 /* Serial Port (SPORT) Multichannel Transmit Failure when Channel 0 Is Disabled */
-#define ANOMALY_05000357 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000357 (_ANOMALY_BF527(< 2))
 /* Incorrect Revision Number in DSPID Register */
-#define ANOMALY_05000364 (ANOMALY_BF527 && __SILICON_REVISION__ == 1)
+#define ANOMALY_05000364 (_ANOMALY_BF527(== 1))
 /* PPI Underflow Error Goes Undetected in ITU-R 656 Mode */
 #define ANOMALY_05000366 (1)
 /* Incorrect Default CSEL Value in PLL_DIV */
-#define ANOMALY_05000368 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000368 (_ANOMALY_BF527(< 2))
 /* Possible RETS Register Corruption when Subroutine Is under 5 Cycles in Duration */
-#define ANOMALY_05000371 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000371 (_ANOMALY_BF527(< 2))
 /* Authentication Fails To Initiate */
-#define ANOMALY_05000376 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000376 (_ANOMALY_BF527(< 2))
 /* Data Read From L3 Memory by USB DMA May be Corrupted */
-#define ANOMALY_05000380 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000380 (_ANOMALY_BF527(< 2))
 /* 8-Bit NAND Flash Boot Mode Not Functional */
-#define ANOMALY_05000382 (__SILICON_REVISION__ < 2)
-/* Host Must Not Read Back During Host DMA Boot */
-#define ANOMALY_05000384 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000382 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Boot from OTP Memory Not Functional */
-#define ANOMALY_05000385 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000385 (_ANOMALY_BF527(< 2))
 /* bfrom_SysControl() Firmware Routine Not Functional */
-#define ANOMALY_05000386 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000386 (_ANOMALY_BF527(< 2))
 /* Programmable Preboot Settings Not Functional */
-#define ANOMALY_05000387 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000387 (_ANOMALY_BF527(< 2))
 /* CRC32 Checksum Support Not Functional */
-#define ANOMALY_05000388 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000388 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Reset Vector Must Not Be in SDRAM Memory Space */
-#define ANOMALY_05000389 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000389 (_ANOMALY_BF527(< 2))
 /* pTempCurrent Not Present in ADI_BOOT_DATA Structure */
-#define ANOMALY_05000392 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000392 (_ANOMALY_BF527(< 2))
 /* Deprecated Value of dTempByteCount in ADI_BOOT_DATA Structure */
-#define ANOMALY_05000393 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000393 (_ANOMALY_BF527(< 2))
 /* Log Buffer Not Functional */
-#define ANOMALY_05000394 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000394 (_ANOMALY_BF527(< 2))
 /* Hook Routine Not Functional */
-#define ANOMALY_05000395 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000395 (_ANOMALY_BF527(< 2))
 /* Header Indirect Bit Not Functional */
-#define ANOMALY_05000396 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000396 (_ANOMALY_BF527(< 2))
 /* BK_ONES, BK_ZEROS, and BK_DATECODE Constants Not Functional */
-#define ANOMALY_05000397 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000397 (_ANOMALY_BF527(< 2))
 /* SWRESET, DFRESET and WDRESET Bits in the SYSCR Register Not Functional */
-#define ANOMALY_05000398 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000398 (_ANOMALY_BF527(< 2))
 /* BCODE_NOBOOT in BCODE Field of SYSCR Register Not Functional */
-#define ANOMALY_05000399 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000399 (_ANOMALY_BF527(< 2))
 /* PPI Data Signals D0 and D8 do not Tristate After Disabling PPI */
-#define ANOMALY_05000401 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000401 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Level-Sensitive External GPIO Wakeups May Cause Indefinite Stall */
-#define ANOMALY_05000403 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000403 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Lockbox SESR Disallows Certain User Interrupts */
-#define ANOMALY_05000404 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000404 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Lockbox SESR Firmware Does Not Save/Restore Full Context */
 #define ANOMALY_05000405 (1)
 /* Lockbox SESR Firmware Arguments Are Not Retained After First Initialization */
-#define ANOMALY_05000407 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000407 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Lockbox Firmware Memory Cleanup Routine Does not Clear Registers */
 #define ANOMALY_05000408 (1)
 /* Lockbox firmware leaves MDMA0 channel enabled */
-#define ANOMALY_05000409 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000409 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Incorrect Default Internal Voltage Regulator Setting */
-#define ANOMALY_05000410 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000410 (_ANOMALY_BF527(< 2))
 /* bfrom_SysControl() Firmware Function Cannot be Used to Enter Power Saving Modes */
-#define ANOMALY_05000411 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000411 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* OTP_CHECK_FOR_PREV_WRITE Bit is Not Functional in bfrom_OtpWrite() API */
-#define ANOMALY_05000414 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000414 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* DEB2_URGENT Bit Not Functional */
-#define ANOMALY_05000415 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000415 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Speculative Fetches Can Cause Undesired External FIFO Operations */
 #define ANOMALY_05000416 (1)
 /* SPORT0 Ignores External TSCLK0 on PG14 When TMR6 is an Output */
-#define ANOMALY_05000417 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
-/* tSFSPE and tHFSPE Do Not Meet Data Sheet Specifications */
-#define ANOMALY_05000418 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000417 (_ANOMALY_BF527(< 2))
+/* PPI Timing Requirements tSFSPE and tHFSPE Do Not Meet Data Sheet Specifications */
+#define ANOMALY_05000418 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* USB PLL_STABLE Bit May Not Accurately Reflect the USB PLL's Status */
-#define ANOMALY_05000420 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000420 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* TWI Fall Time (Tof) May Violate the Minimum I2C Specification */
 #define ANOMALY_05000421 (1)
 /* TWI Input Capacitance (Ci) May Violate the Maximum I2C Specification */
-#define ANOMALY_05000422 (ANOMALY_BF527 && __SILICON_REVISION__ > 1)
+#define ANOMALY_05000422 (_ANOMALY_BF526_BF527(> 0, > 1))
 /* Certain Ethernet Frames With Errors are Misclassified in RMII Mode */
-#define ANOMALY_05000423 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000423 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Internal Voltage Regulator Not Trimmed */
-#define ANOMALY_05000424 (ANOMALY_BF527 && __SILICON_REVISION__ < 2)
+#define ANOMALY_05000424 (_ANOMALY_BF527(< 2))
 /* Multichannel SPORT Channel Misalignment Under Specific Configuration */
-#define ANOMALY_05000425 (__SILICON_REVISION__ < 2)
-/* Speculative Fetches of Indirect-Pointer Instructions Can Cause Spurious Hardware Errors */
+#define ANOMALY_05000425 (_ANOMALY_BF526_BF527(< 1, < 2))
+/* Speculative Fetches of Indirect-Pointer Instructions Can Cause False Hardware Errors */
 #define ANOMALY_05000426 (1)
 /* WB_EDGE Bit in NFC_IRQSTAT Incorrectly Reflects Buffer Status Instead of IRQ Status */
-#define ANOMALY_05000429 (__SILICON_REVISION__ < 2)
+#define ANOMALY_05000429 (_ANOMALY_BF526_BF527(< 1, < 2))
 /* Software System Reset Corrupts PLL_LOCKCNT Register */
-#define ANOMALY_05000430 (ANOMALY_BF527 && __SILICON_REVISION__ > 1)
+#define ANOMALY_05000430 (_ANOMALY_BF527(> 1))
+/* Incorrect Use of Stack in Lockbox Firmware During Authentication */
+#define ANOMALY_05000431 (1)
 /* bfrom_SysControl() Does Not Clear SIC_IWR1 Before Executing PLL Programming Sequence */
-#define ANOMALY_05000432 (ANOMALY_BF526)
+#define ANOMALY_05000432 (_ANOMALY_BF526(< 1))
 /* Certain SIC Registers are not Reset After Soft or Core Double Fault Reset */
-#define ANOMALY_05000435 ((ANOMALY_BF526 && __SILICON_REVISION__ < 1) || ANOMALY_BF527)
+#define ANOMALY_05000435 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* Preboot Cannot be Used to Alter the PLL_DIV Register */
+#define ANOMALY_05000439 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* bfrom_SysControl() Cannot be Used to Write the PLL_DIV Register */
+#define ANOMALY_05000440 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* OTP Write Accesses Not Supported */
+#define ANOMALY_05000442 (_ANOMALY_BF527(< 1))
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* The WURESET Bit in the SYSCR Register is not Functional */
+#define ANOMALY_05000445 (1)
+/* BCODE_QUICKBOOT, BCODE_ALLBOOT, and BCODE_FULLBOOT Settings in SYSCR Register Not Functional */
+#define ANOMALY_05000451 (1)
+/* Incorrect Default Hysteresis Setting for RESET, NMI, and BMODE Signals */
+#define ANOMALY_05000452 (_ANOMALY_BF526_BF527(< 1, >= 0))
+/* USB Receive Interrupt Is Not Generated in DMA Mode 1 */
+#define ANOMALY_05000456 (1)
+/* Host DMA Port Responds to Certain Bus Activity Without HOST_CE Assertion */
+#define ANOMALY_05000457 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
 #define ANOMALY_05000244 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
 #define ANOMALY_05000261 (0)
 #define ANOMALY_05000263 (0)
 #define ANOMALY_05000266 (0)
 #define ANOMALY_05000273 (0)
+#define ANOMALY_05000274 (0)
 #define ANOMALY_05000278 (0)
 #define ANOMALY_05000285 (0)
+#define ANOMALY_05000287 (0)
+#define ANOMALY_05000301 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000307 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000312 (0)
 #define ANOMALY_05000323 (0)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf533/include/mach/anomaly.h b/arch/blackfin/mach-bf533/include/mach/anomaly.h
index 1cf893e..31145b5 100644
--- a/arch/blackfin/mach-bf533/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf533/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision E, 09/18/2008; ADSP-BF531/BF532/BF533 Blackfin Processor Anomaly List
  */
 
@@ -34,12 +34,12 @@
 # define ANOMALY_BF533 0
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot 2 Not Supported */
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* UART Line Status Register (UART_LSR) Bits Are Not Updated at the Same Time */
 #define ANOMALY_05000099 (__SILICON_REVISION__ < 5)
 /* Watchpoint Status Register (WPSTAT) Bits Are Set on Every Corresponding Match */
-#define ANOMALY_05000105 (1)
+#define ANOMALY_05000105 (__SILICON_REVISION__ > 2)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
@@ -48,7 +48,7 @@
 #define ANOMALY_05000158 (__SILICON_REVISION__ < 5)
 /* PPI Data Lengths Between 8 and 16 Do Not Zero Out Upper Bits */
 #define ANOMALY_05000166 (1)
-/* Turning Serial Ports on with External Frame Syncs */
+/* Turning SPORTs on while External Frame Sync Is Active May Corrupt Data */
 #define ANOMALY_05000167 (1)
 /* PPI_COUNT Cannot Be Programmed to 0 in General Purpose TX or RX Modes */
 #define ANOMALY_05000179 (__SILICON_REVISION__ < 5)
@@ -67,9 +67,9 @@
 /* Current DMA Address Shows Wrong Value During Carry Fix */
 #define ANOMALY_05000199 (__SILICON_REVISION__ < 4)
 /* SPORT TFS and DT Are Incorrectly Driven During Inactive Channels in Certain Conditions */
-#define ANOMALY_05000200 (__SILICON_REVISION__ < 5)
+#define ANOMALY_05000200 (__SILICON_REVISION__ == 3 || __SILICON_REVISION__ == 4)
 /* Receive Frame Sync Not Ignored During Active Frames in SPORT Multi-Channel Mode */
-#define ANOMALY_05000201 (__SILICON_REVISION__ < 4)
+#define ANOMALY_05000201 (__SILICON_REVISION__ == 3)
 /* Possible Infinite Stall with Specific Dual-DAG Situation */
 #define ANOMALY_05000202 (__SILICON_REVISION__ < 5)
 /* Specific Sequence That Can Cause DMA Error or DMA Stopping */
@@ -104,7 +104,7 @@
 #define ANOMALY_05000242 (__SILICON_REVISION__ < 5)
 /* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 5)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* Data CPLBs Should Prevent Spurious Hardware Errors */
 #define ANOMALY_05000246 (__SILICON_REVISION__ < 5)
@@ -137,7 +137,7 @@
 /* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */
 #define ANOMALY_05000270 (__SILICON_REVISION__ < 5)
 /* Spontaneous Reset of Internal Voltage Regulator */
-#define ANOMALY_05000271 (__SILICON_REVISION__ < 4)
+#define ANOMALY_05000271 (__SILICON_REVISION__ == 3)
 /* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */
 #define ANOMALY_05000272 (1)
 /* Writes to Synchronous SDRAM Memory May Be Lost */
@@ -165,14 +165,14 @@
 /* New Feature: Additional PPI Frame Sync Sampling Options (Not Available On Older Silicon) */
 #define ANOMALY_05000306 (__SILICON_REVISION__ < 5)
 /* SCKELOW Bit Does Not Maintain State Through Hibernate */
-#define ANOMALY_05000307 (1)
+#define ANOMALY_05000307 (1)	/* note: brokenness is noted in documentation, not anomaly sheet */
 /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
 /* Erroneous Flag (GPIO) Pin Operations under Specific Sequences */
 #define ANOMALY_05000311 (__SILICON_REVISION__ < 6)
 /* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (__SILICON_REVISION__ < 6)
-/* PPI Is Level-Sensitive on First Transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (__SILICON_REVISION__ < 6)
 /* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (__SILICON_REVISION__ < 6)
@@ -200,17 +200,63 @@
 #define ANOMALY_05000426 (1)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* These anomalies have been "phased" out of analog.com anomaly sheets and are
  * here to show running on older silicon just isn't feasible.
  */
 
+/* Internal voltage regulator can't be modified via register writes */
+#define ANOMALY_05000066 (__SILICON_REVISION__ < 2)
 /* Watchpoints (Hardware Breakpoints) are not supported */
 #define ANOMALY_05000067 (__SILICON_REVISION__ < 3)
+/* SDRAM PSSE bit cannot be set again after SDRAM Powerup */
+#define ANOMALY_05000070 (__SILICON_REVISION__ < 2)
+/* Writing FIO_DIR can corrupt a programmable flag's data */
+#define ANOMALY_05000079 (__SILICON_REVISION__ < 2)
+/* Timer Auto-Baud Mode requires the UART clock to be enabled */
+#define ANOMALY_05000086 (__SILICON_REVISION__ < 2)
+/* Internal Clocking Modes on SPORT0 not supported */
+#define ANOMALY_05000088 (__SILICON_REVISION__ < 2)
+/* Internal voltage regulator does not wake up from an RTC wakeup */
+#define ANOMALY_05000092 (__SILICON_REVISION__ < 2)
+/* The IFLUSH instruction must be preceded by a CSYNC instruction */
+#define ANOMALY_05000093 (__SILICON_REVISION__ < 2)
+/* Vectoring to an instruction that is presently being filled into the instruction cache may cause erroneous behavior */
+#define ANOMALY_05000095 (__SILICON_REVISION__ < 2)
+/* PREFETCH, FLUSH, and FLUSHINV must be followed by a CSYNC */
+#define ANOMALY_05000096 (__SILICON_REVISION__ < 2)
+/* Performance Monitor 0 and 1 are swapped when monitoring memory events */
+#define ANOMALY_05000097 (__SILICON_REVISION__ < 2)
+/* 32-bit SPORT DMA will be word reversed */
+#define ANOMALY_05000098 (__SILICON_REVISION__ < 2)
+/* Incorrect status in the UART_IIR register */
+#define ANOMALY_05000100 (__SILICON_REVISION__ < 2)
+/* Reading X_MODIFY or Y_MODIFY while DMA channel is active */
+#define ANOMALY_05000101 (__SILICON_REVISION__ < 2)
+/* Descriptor-based MemDMA may lock up with 32-bit transfers or if transfers span 64KB buffers */
+#define ANOMALY_05000102 (__SILICON_REVISION__ < 2)
+/* Incorrect value written to the cycle counters */
+#define ANOMALY_05000103 (__SILICON_REVISION__ < 2)
+/* Stores to L1 Data memory incorrect when a specific sequence is followed */
+#define ANOMALY_05000104 (__SILICON_REVISION__ < 2)
+/* Programmable Flag (PF3) functionality not supported in all PPI modes */
+#define ANOMALY_05000106 (__SILICON_REVISION__ < 2)
+/* Data store can be lost when targeting a cache line fill */
+#define ANOMALY_05000107 (__SILICON_REVISION__ < 2)
 /* Reserved bits in SYSCFG register not set at power on */
 #define ANOMALY_05000109 (__SILICON_REVISION__ < 3)
+/* Infinite Core Stall */
+#define ANOMALY_05000114 (__SILICON_REVISION__ < 2)
+/* PPI_FSx may glitch when generated by the on chip Timers */
+#define ANOMALY_05000115 (__SILICON_REVISION__ < 2)
 /* Trace Buffers may record discontinuities into emulation mode and/or exception, NMI, reset handlers */
 #define ANOMALY_05000116 (__SILICON_REVISION__ < 3)
+/* DTEST registers allow access to Data Cache when DTEST_COMMAND< 14 >= 0 */
+#define ANOMALY_05000117 (__SILICON_REVISION__ < 2)
+/* Booting from an 8-bit or 24-bit Addressable SPI device is not supported */
+#define ANOMALY_05000118 (__SILICON_REVISION__ < 2)
 /* DTEST_COMMAND initiated memory access may be incorrect if data cache or DMA is active */
 #define ANOMALY_05000123 (__SILICON_REVISION__ < 3)
 /* DMA Lock-up at CCLK to SCLK ratios of 4:1, 2:1, or 1:1 */
@@ -222,7 +268,9 @@
 /* DMEM_CONTROL is not set on Reset */
 #define ANOMALY_05000137 (__SILICON_REVISION__ < 3)
 /* SPI boot will not complete if there is a zero fill block in the loader file */
-#define ANOMALY_05000138 (__SILICON_REVISION__ < 3)
+#define ANOMALY_05000138 (__SILICON_REVISION__ == 2)
+/* Timerx_Config must be set for using the PPI in GP output mode with internal Frame Syncs */
+#define ANOMALY_05000139 (__SILICON_REVISION__ < 2)
 /* Allowing the SPORT RX FIFO to fill will cause an overflow */
 #define ANOMALY_05000140 (__SILICON_REVISION__ < 3)
 /* An Infinite Stall occurs with a particular sequence of consecutive dual dag events */
@@ -237,17 +285,17 @@
 #define ANOMALY_05000145 (__SILICON_REVISION__ < 3)
 /* MDMA may lose the first few words of a descriptor chain */
 #define ANOMALY_05000146 (__SILICON_REVISION__ < 3)
-/* The source MDMA descriptor may stop with a DMA Error */
+/* Source MDMA descriptor may stop with a DMA Error near beginning of descriptor fetch */
 #define ANOMALY_05000147 (__SILICON_REVISION__ < 3)
 /* When booting from a 16-bit asynchronous memory device, the upper 8-bits of each word must be 0x00 */
 #define ANOMALY_05000148 (__SILICON_REVISION__ < 3)
 /* Frame Delay in SPORT Multichannel Mode */
 #define ANOMALY_05000153 (__SILICON_REVISION__ < 3)
-/* SPORT TFS signal is active in Multi-channel mode outside of valid channels */
+/* SPORT TFS signal stays active in multichannel mode outside of valid channels */
 #define ANOMALY_05000154 (__SILICON_REVISION__ < 3)
 /* Timer1 can not be used for PWMOUT mode when a certain PPI mode is in use */
 #define ANOMALY_05000155 (__SILICON_REVISION__ < 3)
-/* A killed 32-bit System MMR write will lead to the next system MMR access thinking it should be 32-bit. */
+/* Killed 32-bit MMR write leads to next system MMR access thinking it should be 32-bit */
 #define ANOMALY_05000157 (__SILICON_REVISION__ < 3)
 /* SPORT transmit data is not gated by external frame sync in certain conditions */
 #define ANOMALY_05000163 (__SILICON_REVISION__ < 3)
@@ -275,15 +323,27 @@
 #define ANOMALY_05000206 (__SILICON_REVISION__ < 3)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000120 (0)
+#define ANOMALY_05000149 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000248 (0)
 #define ANOMALY_05000266 (0)
+#define ANOMALY_05000274 (0)
+#define ANOMALY_05000287 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (1)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
 #define ANOMALY_05000412 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf537/include/mach/anomaly.h b/arch/blackfin/mach-bf537/include/mach/anomaly.h
index 1bfd80c..fc96634 100644
--- a/arch/blackfin/mach-bf537/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf537/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision D, 09/18/2008; ADSP-BF534/ADSP-BF536/ADSP-BF537 Blackfin Processor Anomaly List
  */
 
@@ -36,77 +36,75 @@
 
 /* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
-/* DMA_RUN bit is not valid after a Peripheral Receive Channel DMA stops */
+/* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
-/* Rx.H cannot be used to access 16-bit System MMR registers */
+/* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
 /* Killed 32-bit MMR write leads to next system MMR access thinking it should be 32-bit */
 #define ANOMALY_05000157 (__SILICON_REVISION__ < 2)
-/* Turning SPORTs on while External Frame Sync Is Active May Corrupt Data */
-#define ANOMALY_05000167 (1)
-/* PPI_DELAY not functional in PPI modes with 0 frame syncs */
+/* PPI_DELAY Not Functional in PPI Modes with 0 Frame Syncs */
 #define ANOMALY_05000180 (1)
 /* Instruction Cache Is Not Functional */
 #define ANOMALY_05000237 (__SILICON_REVISION__ < 2)
-/* If i-cache is on, CSYNC/SSYNC/IDLE around Change of Control causes failures */
+/* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 3)
-/* Spurious Hardware Error from an access in the shadow of a conditional branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* CLKIN Buffer Output Enable Reset Behavior Is Changed */
 #define ANOMALY_05000247 (1)
-/* Incorrect Bit-Shift of Data Word in Multichannel (TDM) mode in certain conditions */
+/* Incorrect Bit Shift of Data Word in Multichannel (TDM) Mode in Certain Conditions */
 #define ANOMALY_05000250 (__SILICON_REVISION__ < 3)
 /* EMAC Tx DMA error after an early frame abort */
 #define ANOMALY_05000252 (__SILICON_REVISION__ < 3)
-/* Maximum external clock speed for Timers */
+/* Maximum External Clock Speed for Timers */
 #define ANOMALY_05000253 (__SILICON_REVISION__ < 3)
-/* Incorrect Timer Pulse Width in Single-Shot PWM_OUT mode with external clock */
+/* Incorrect Timer Pulse Width in Single-Shot PWM_OUT Mode with External Clock */
 #define ANOMALY_05000254 (__SILICON_REVISION__ > 2)
-/* Entering Hibernate Mode with RTC Seconds event interrupt not functional */
+/* Entering Hibernate State with RTC Seconds Interrupt Not Functional */
 #define ANOMALY_05000255 (__SILICON_REVISION__ < 3)
 /* EMAC MDIO input latched on wrong MDC edge */
 #define ANOMALY_05000256 (__SILICON_REVISION__ < 3)
-/* Interrupt/Exception during short hardware loop may cause bad instruction fetches */
+/* Interrupt/Exception During Short Hardware Loop May Cause Bad Instruction Fetches */
 #define ANOMALY_05000257 (__SILICON_REVISION__ < 3)
-/* Instruction Cache is corrupted when bits 9 and 12 of the ICPLB Data registers differ */
+/* Instruction Cache Is Corrupted When Bits 9 and 12 of the ICPLB Data Registers Differ */
 #define ANOMALY_05000258 (((ANOMALY_BF536 || ANOMALY_BF537) && __SILICON_REVISION__ == 1) || __SILICON_REVISION__ == 2)
-/* ICPLB_STATUS MMR register may be corrupted */
+/* ICPLB_STATUS MMR Register May Be Corrupted */
 #define ANOMALY_05000260 (__SILICON_REVISION__ == 2)
-/* DCPLB_FAULT_ADDR MMR register may be corrupted */
+/* DCPLB_FAULT_ADDR MMR Register May Be Corrupted */
 #define ANOMALY_05000261 (__SILICON_REVISION__ < 3)
-/* Stores to data cache may be lost */
+/* Stores To Data Cache May Be Lost */
 #define ANOMALY_05000262 (__SILICON_REVISION__ < 3)
-/* Hardware loop corrupted when taking an ICPLB exception */
+/* Hardware Loop Corrupted When Taking an ICPLB Exception */
 #define ANOMALY_05000263 (__SILICON_REVISION__ == 2)
-/* CSYNC/SSYNC/IDLE causes infinite stall in second to last instruction in hardware loop */
+/* CSYNC/SSYNC/IDLE Causes Infinite Stall in Penultimate Instruction in Hardware Loop */
 #define ANOMALY_05000264 (__SILICON_REVISION__ < 3)
-/* Sensitivity to noise with slow input edge rates on external SPORT TX and RX clocks */
+/* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
 #define ANOMALY_05000265 (1)
 /* Memory DMA error when peripheral DMA is running with non-zero DEB_TRAFFIC_PERIOD */
 #define ANOMALY_05000268 (__SILICON_REVISION__ < 3)
-/* High I/O activity causes output voltage of internal voltage regulator (VDDint) to decrease */
+/* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */
 #define ANOMALY_05000270 (__SILICON_REVISION__ < 3)
-/* Certain data cache write through modes fail for VDDint <=0.9V */
+/* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */
 #define ANOMALY_05000272 (1)
-/* Writes to Synchronous SDRAM memory may be lost */
+/* Writes to Synchronous SDRAM Memory May Be Lost */
 #define ANOMALY_05000273 (__SILICON_REVISION__ < 3)
-/* Writes to an I/O data register one SCLK cycle after an edge is detected may clear interrupt */
+/* Writes to an I/O Data Register One SCLK Cycle after an Edge Is Detected May Clear Interrupt */
 #define ANOMALY_05000277 (__SILICON_REVISION__ < 3)
-/* Disabling Peripherals with DMA running may cause DMA system instability */
+/* Disabling Peripherals with DMA Running May Cause DMA System Instability */
 #define ANOMALY_05000278 (((ANOMALY_BF536 || ANOMALY_BF537) && __SILICON_REVISION__ < 3) || (ANOMALY_BF534 && __SILICON_REVISION__ < 2))
 /* SPI Master boot mode does not work well with Atmel Data flash devices */
 #define ANOMALY_05000280 (1)
-/* False Hardware Error Exception when ISR context is not restored */
+/* False Hardware Error Exception When ISR Context Is Not Restored */
 #define ANOMALY_05000281 (__SILICON_REVISION__ < 3)
-/* Memory DMA corruption with 32-bit data and traffic control */
+/* Memory DMA Corruption with 32-Bit Data and Traffic Control */
 #define ANOMALY_05000282 (__SILICON_REVISION__ < 3)
 /* System MMR Write Is Stalled Indefinitely When Killed in a Particular Stage */
 #define ANOMALY_05000283 (__SILICON_REVISION__ < 3)
 /* New Feature: EMAC TX DMA Word Alignment (Not Available On Older Silicon) */
 #define ANOMALY_05000285 (__SILICON_REVISION__ < 3)
-/* SPORTs may receive bad data if FIFOs fill up */
+/* SPORTs May Receive Bad Data If FIFOs Fill Up */
 #define ANOMALY_05000288 (__SILICON_REVISION__ < 3)
-/* Memory to memory DMA source/destination descriptors must be in same memory space */
+/* Memory-To-Memory DMA Source/Destination Descriptors Must Be in Same Memory Space */
 #define ANOMALY_05000301 (1)
 /* SSYNCs After Writes To CAN/DMA MMR Registers Are Not Always Handled Correctly */
 #define ANOMALY_05000304 (__SILICON_REVISION__ < 3)
@@ -116,11 +114,11 @@
 #define ANOMALY_05000307 (__SILICON_REVISION__ < 3)
 /* Writing UART_THR while UART clock is disabled sends erroneous start bit */
 #define ANOMALY_05000309 (__SILICON_REVISION__ < 3)
-/* False hardware errors caused by fetches at the boundary of reserved memory */
+/* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
-/* Errors when SSYNC, CSYNC, or loads to LT, LB and LC registers are interrupted */
+/* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (1)
-/* PPI is level sensitive on first transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (1)
 /* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (__SILICON_REVISION__ < 3)
@@ -156,24 +154,46 @@
 #define ANOMALY_05000426 (1)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
+#define ANOMALY_05000248 (0)
 #define ANOMALY_05000266 (0)
+#define ANOMALY_05000274 (0)
+#define ANOMALY_05000287 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (1)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf538/include/mach/anomaly.h b/arch/blackfin/mach-bf538/include/mach/anomaly.h
index 3a56998..175ca9e 100644
--- a/arch/blackfin/mach-bf538/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf538/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision G, 09/18/2008; ADSP-BF538/BF538F Blackfin Processor Anomaly List
  *  - Revision L, 09/18/2008; ADSP-BF539/BF539F Blackfin Processor Anomaly List
  */
@@ -14,17 +14,29 @@
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
+/* We do not support old silicon - sorry */
 #if __SILICON_REVISION__ < 4
-# error will not work on BF538 silicon version 0.0, 0.1, 0.2, or 0.3
+# error will not work on BF538/BF539 silicon version 0.0, 0.1, 0.2, or 0.3
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+#if defined(__ADSPBF538__)
+# define ANOMALY_BF538 1
+#else
+# define ANOMALY_BF538 0
+#endif
+#if defined(__ADSPBF539__)
+# define ANOMALY_BF539 1
+#else
+# define ANOMALY_BF539 0
+#endif
+
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
-/* PPI Data Lengths between 8 and 16 Do Not Zero Out Upper Bits */
+/* PPI Data Lengths Between 8 and 16 Do Not Zero Out Upper Bits */
 #define ANOMALY_05000166 (1)
 /* PPI_COUNT Cannot Be Programmed to 0 in General Purpose TX or RX Modes */
 #define ANOMALY_05000179 (1)
@@ -40,13 +52,13 @@
 #define ANOMALY_05000229 (1)
 /* PPI_FS3 Is Not Driven in 2 or 3 Internal Frame Sync Transmit Modes */
 #define ANOMALY_05000233 (1)
-/* If i-cache is on, CSYNC/SSYNC/IDLE around Change of Control causes failures */
+/* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 3)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* Maximum External Clock Speed for Timers */
 #define ANOMALY_05000253 (1)
-/* DCPLB_FAULT_ADDR MMR register may be corrupted */
+/* DCPLB_FAULT_ADDR MMR Register May Be Corrupted */
 #define ANOMALY_05000261 (__SILICON_REVISION__ < 3)
 /* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */
 #define ANOMALY_05000270 (__SILICON_REVISION__ < 4)
@@ -58,11 +70,11 @@
 #define ANOMALY_05000277 (__SILICON_REVISION__ < 4)
 /* Disabling Peripherals with DMA Running May Cause DMA System Instability */
 #define ANOMALY_05000278 (__SILICON_REVISION__ < 4)
-/* False Hardware Error Exception when ISR Context Is Not Restored */
+/* False Hardware Error Exception When ISR Context Is Not Restored */
 #define ANOMALY_05000281 (__SILICON_REVISION__ < 4)
 /* Memory DMA Corruption with 32-Bit Data and Traffic Control */
 #define ANOMALY_05000282 (__SILICON_REVISION__ < 4)
-/* System MMR Write Is Stalled Indefinitely when Killed in a Particular Stage */
+/* System MMR Write Is Stalled Indefinitely When Killed in a Particular Stage */
 #define ANOMALY_05000283 (__SILICON_REVISION__ < 4)
 /* SPORTs May Receive Bad Data If FIFOs Fill Up */
 #define ANOMALY_05000288 (__SILICON_REVISION__ < 4)
@@ -80,14 +92,14 @@
 #define ANOMALY_05000307 (__SILICON_REVISION__ < 4)
 /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */
 #define ANOMALY_05000310 (1)
-/* Errors when SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
+/* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (__SILICON_REVISION__ < 5)
-/* PPI Is Level-Sensitive on First Transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (__SILICON_REVISION__ < 4)
-/* Killed System MMR Write Completes Erroneously on Next System MMR Access */
+/* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (__SILICON_REVISION__ < 4)
 /* PFx Glitch on Write to FIO_FLAG_D or FIO_FLAG_T */
-#define ANOMALY_05000318 (__SILICON_REVISION__ < 4)
+#define ANOMALY_05000318 (ANOMALY_BF539 && __SILICON_REVISION__ < 4)
 /* Regulator Programming Blocked when Hibernate Wakeup Source Remains Active */
 #define ANOMALY_05000355 (__SILICON_REVISION__ < 5)
 /* Serial Port (SPORT) Multichannel Transmit Failure when Channel 0 Is Disabled */
@@ -114,23 +126,45 @@
 #define ANOMALY_05000436 (__SILICON_REVISION__ > 3)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000242 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
+#define ANOMALY_05000254 (0)
 #define ANOMALY_05000263 (0)
+#define ANOMALY_05000274 (0)
+#define ANOMALY_05000287 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000323 (0)
 #define ANOMALY_05000353 (1)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
diff --git a/arch/blackfin/mach-bf548/include/mach/anomaly.h b/arch/blackfin/mach-bf548/include/mach/anomaly.h
index 882e40c..c510ae6 100644
--- a/arch/blackfin/mach-bf548/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf548/include/mach/anomaly.h
@@ -6,26 +6,31 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision H, 01/16/2009; ADSP-BF542/BF544/BF547/BF548/BF549 Blackfin Processor Anomaly List
  */
 
 #ifndef _MACH_ANOMALY_H_
 #define _MACH_ANOMALY_H_
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */
+/* We do not support 0.0 or 0.1 silicon - sorry */
+#if __SILICON_REVISION__ < 2
+# error will not work on BF548 silicon version 0.0, or 0.1
+#endif
+
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */
 #define ANOMALY_05000119 (1)
 /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */
 #define ANOMALY_05000122 (1)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (1)
 /* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */
 #define ANOMALY_05000265 (1)
 /* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */
 #define ANOMALY_05000272 (1)
-/* False Hardware Error Exception when ISR context is not restored */
+/* False Hardware Error Exception When ISR Context Is Not Restored */
 #define ANOMALY_05000281 (__SILICON_REVISION__ < 1)
 /* SSYNCs After Writes To CAN/DMA MMR Registers Are Not Always Handled Correctly */
 #define ANOMALY_05000304 (__SILICON_REVISION__ < 1)
@@ -59,7 +64,7 @@
 #define ANOMALY_05000340 (__SILICON_REVISION__ < 1)
 /* Boot Host Wait (HWAIT) and Boot Host Wait Alternate (HWAITA) Signals Are Swapped */
 #define ANOMALY_05000344 (__SILICON_REVISION__ < 1)
-/* USB Calibration Value Is Not Intialized */
+/* USB Calibration Value Is Not Initialized */
 #define ANOMALY_05000346 (__SILICON_REVISION__ < 1)
 /* USB Calibration Value to use */
 #define ANOMALY_05000346_value 0x5411
@@ -147,11 +152,11 @@
 #define ANOMALY_05000416 (1)
 /* Multichannel SPORT Channel Misalignment Under Specific Configuration */
 #define ANOMALY_05000425 (1)
-/* Speculative Fetches of Indirect-Pointer Instructions Can Cause Spurious Hardware Errors */
+/* Speculative Fetches of Indirect-Pointer Instructions Can Cause False Hardware Errors */
 #define ANOMALY_05000426 (1)
 /* CORE_EPPI_PRIO bit and SYS_EPPI_PRIO bit in the HMDMA1_CONTROL register are not functional */
 #define ANOMALY_05000427 (__SILICON_REVISION__ < 2)
-/* WB_EDGE Bit in NFC_IRQSTAT Incorrectly Behaves as a Buffer Status Bit Instead of an IRQ Status Bit */
+/* WB_EDGE Bit in NFC_IRQSTAT Incorrectly Reflects Buffer Status Instead of IRQ Status */
 #define ANOMALY_05000429 (__SILICON_REVISION__ < 2)
 /* Software System Reset Corrupts PLL_LOCKCNT Register */
 #define ANOMALY_05000430 (__SILICON_REVISION__ >= 2)
@@ -170,26 +175,49 @@
 /* Reduced Timing Margins on DDR Output Setup and Hold (tDS and tDH) */
 #define ANOMALY_05000449 (__SILICON_REVISION__ == 1)
 /* USB DMA Mode 1 Short Packet Data Corruption */
-#define ANOMALY_05000450 (1
+#define ANOMALY_05000450 (1)
+/* USB Receive Interrupt Is Not Generated in DMA Mode 1 */
+#define ANOMALY_05000456 (__SILICON_REVISION__ < 3)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000099 (0)
+#define ANOMALY_05000120 (0)
 #define ANOMALY_05000125 (0)
+#define ANOMALY_05000149 (0)
 #define ANOMALY_05000158 (0)
+#define ANOMALY_05000171 (0)
+#define ANOMALY_05000179 (0)
 #define ANOMALY_05000183 (0)
 #define ANOMALY_05000198 (0)
+#define ANOMALY_05000215 (0)
+#define ANOMALY_05000220 (0)
+#define ANOMALY_05000227 (0)
 #define ANOMALY_05000230 (0)
+#define ANOMALY_05000231 (0)
+#define ANOMALY_05000233 (0)
+#define ANOMALY_05000242 (0)
 #define ANOMALY_05000244 (0)
+#define ANOMALY_05000248 (0)
+#define ANOMALY_05000250 (0)
+#define ANOMALY_05000254 (0)
 #define ANOMALY_05000261 (0)
 #define ANOMALY_05000263 (0)
 #define ANOMALY_05000266 (0)
 #define ANOMALY_05000273 (0)
+#define ANOMALY_05000274 (0)
 #define ANOMALY_05000278 (0)
+#define ANOMALY_05000287 (0)
+#define ANOMALY_05000301 (0)
 #define ANOMALY_05000305 (0)
 #define ANOMALY_05000307 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000323 (0)
+#define ANOMALY_05000362 (1)
 #define ANOMALY_05000363 (0)
 #define ANOMALY_05000380 (0)
+#define ANOMALY_05000400 (0)
 #define ANOMALY_05000412 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
diff --git a/arch/blackfin/mach-bf561/include/mach/anomaly.h b/arch/blackfin/mach-bf561/include/mach/anomaly.h
index d0b0b35..dccd396c 100644
--- a/arch/blackfin/mach-bf561/include/mach/anomaly.h
+++ b/arch/blackfin/mach-bf561/include/mach/anomaly.h
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-/* This file shoule be up to date with:
+/* This file should be up to date with:
  *  - Revision Q, 11/07/2008; ADSP-BF561 Blackfin Processor Anomaly List
  */
 
@@ -18,11 +18,11 @@
 # error will not work on BF561 silicon version 0.0, 0.1, 0.2, or 0.4
 #endif
 
-/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot 2 Not Supported */
+/* Multi-issue instruction with dsp32shiftimm in slot1 and P-reg store in slot 2 not supported */
 #define ANOMALY_05000074 (1)
 /* UART Line Status Register (UART_LSR) Bits Are Not Updated at the Same Time */
 #define ANOMALY_05000099 (__SILICON_REVISION__ < 5)
-/* Trace Buffers may contain errors in emulation mode and/or exception, NMI, reset handlers */
+/* Trace Buffers may record discontinuities into emulation mode and/or exception, NMI, reset handlers */
 #define ANOMALY_05000116 (__SILICON_REVISION__ < 3)
 /* Testset instructions restricted to 32-bit aligned memory locations */
 #define ANOMALY_05000120 (1)
@@ -40,7 +40,7 @@
 #define ANOMALY_05000136 (__SILICON_REVISION__ < 3)
 /* Allowing the SPORT RX FIFO to fill will cause an overflow */
 #define ANOMALY_05000140 (__SILICON_REVISION__ < 3)
-/* Infinite Stall may occur with a particular sequence of consecutive dual dag events */
+/* An Infinite Stall occurs with a particular sequence of consecutive dual dag events */
 #define ANOMALY_05000141 (__SILICON_REVISION__ < 3)
 /* Interrupts may be lost when a programmable input flag is configured to be edge sensitive */
 #define ANOMALY_05000142 (__SILICON_REVISION__ < 3)
@@ -80,7 +80,7 @@
 #define ANOMALY_05000163 (__SILICON_REVISION__ < 3)
 /* PPI Data Lengths Between 8 and 16 Do Not Zero Out Upper Bits */
 #define ANOMALY_05000166 (1)
-/* Turning Serial Ports on with External Frame Syncs */
+/* Turning SPORTs on while External Frame Sync Is Active May Corrupt Data */
 #define ANOMALY_05000167 (1)
 /* SDRAM auto-refresh and subsequent Power Ups */
 #define ANOMALY_05000168 (__SILICON_REVISION__ < 5)
@@ -164,7 +164,7 @@
 #define ANOMALY_05000242 (__SILICON_REVISION__ < 5)
 /* If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control Causes Failures */
 #define ANOMALY_05000244 (__SILICON_REVISION__ < 5)
-/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */
+/* False Hardware Error from an Access in the Shadow of a Conditional Branch */
 #define ANOMALY_05000245 (__SILICON_REVISION__ < 5)
 /* TESTSET operation forces stall on the other core */
 #define ANOMALY_05000248 (__SILICON_REVISION__ < 5)
@@ -208,7 +208,7 @@
 #define ANOMALY_05000275 (__SILICON_REVISION__ > 2)
 /* Timing Requirements Change for External Frame Sync PPI Modes with Non-Zero PPI_DELAY */
 #define ANOMALY_05000276 (__SILICON_REVISION__ < 5)
-/* Writes to an I/O data register one SCLK cycle after an edge is detected may clear interrupt */
+/* Writes to an I/O Data Register One SCLK Cycle after an Edge Is Detected May Clear Interrupt */
 #define ANOMALY_05000277 (__SILICON_REVISION__ < 3)
 /* Disabling Peripherals with DMA Running May Cause DMA System Instability */
 #define ANOMALY_05000278 (__SILICON_REVISION__ < 5)
@@ -232,7 +232,7 @@
 #define ANOMALY_05000310 (1)
 /* Errors When SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */
 #define ANOMALY_05000312 (1)
-/* PPI Is Level-Sensitive on First Transfer */
+/* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */
 #define ANOMALY_05000313 (1)
 /* Killed System MMR Write Completes Erroneously On Next System MMR Access */
 #define ANOMALY_05000315 (1)
@@ -276,18 +276,27 @@
 #define ANOMALY_05000428 (__SILICON_REVISION__ > 3)
 /* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */
 #define ANOMALY_05000443 (1)
+/* False Hardware Error when RETI points to invalid memory */
+#define ANOMALY_05000461 (1)
 
 /* Anomalies that don't exist on this proc */
+#define ANOMALY_05000119 (0)
 #define ANOMALY_05000158 (0)
 #define ANOMALY_05000183 (0)
+#define ANOMALY_05000233 (0)
 #define ANOMALY_05000273 (0)
 #define ANOMALY_05000311 (0)
 #define ANOMALY_05000353 (1)
 #define ANOMALY_05000380 (0)
 #define ANOMALY_05000386 (1)
+#define ANOMALY_05000389 (0)
+#define ANOMALY_05000400 (0)
+#define ANOMALY_05000430 (0)
 #define ANOMALY_05000432 (0)
 #define ANOMALY_05000435 (0)
 #define ANOMALY_05000447 (0)
 #define ANOMALY_05000448 (0)
+#define ANOMALY_05000456 (0)
+#define ANOMALY_05000450 (0)
 
 #endif
-- 
cgit v0.10.2


From d4b890b0cd9e0ac75cbdf7011ef38ebc662930e6 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Thu, 16 Apr 2009 12:38:34 +0000
Subject: Blackfin: do not error if GPIO IRQ is requested already as GPIO

Some drivers expect to be able to request both as GPIO and GPIO IRQ, so
allow that use case.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c
index a0678da..32289b3 100644
--- a/arch/blackfin/kernel/bfin_gpio.c
+++ b/arch/blackfin/kernel/bfin_gpio.c
@@ -1021,15 +1021,6 @@ int bfin_gpio_irq_request(unsigned gpio, const char *label)
 
 	local_irq_save_hw(flags);
 
-	if (unlikely(reserved_gpio_irq_map[gpio_bank(gpio)] & gpio_bit(gpio))) {
-		if (system_state == SYSTEM_BOOTING)
-			dump_stack();
-		printk(KERN_ERR
-		       "bfin-gpio: GPIO %d is already reserved as gpio-irq !\n",
-		       gpio);
-		local_irq_restore_hw(flags);
-		return -EBUSY;
-	}
 	if (unlikely(reserved_peri_map[gpio_bank(gpio)] & gpio_bit(gpio))) {
 		if (system_state == SYSTEM_BOOTING)
 			dump_stack();
-- 
cgit v0.10.2


From f8b556514c4481a500ace4745731f04a77c08d54 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 13 Apr 2009 21:58:34 +0000
Subject: Blackfin: set bf51x/bf52x to 0.0 rev by default and bf54x to 0.2

Update the default revs based on what we actually support (bf54x-0.[01]
is too broken to use).

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 672b7b0..cea2bfd 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -263,8 +263,8 @@ config BF_REV_MAX
 
 choice
 	prompt "Silicon Rev"
-	default BF_REV_0_1 if (BF51x || BF52x || (BF54x && !BF54xM))
-	default BF_REV_0_2 if (BF534 || BF536 || BF537)
+	default BF_REV_0_0 if (BF51x || BF52x)
+	default BF_REV_0_2 if (BF534 || BF536 || BF537 || (BF54x && !BF54xM))
 	default BF_REV_0_3 if (BF531 || BF532 || BF533 || BF54xM || BF561)
 
 config BF_REV_0_0
-- 
cgit v0.10.2


From 78f28a0a83a86dfe021f7d8169208b9af2a23201 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 10 Apr 2009 21:20:19 +0000
Subject: Blackfin: simplify the do_flush macro

Simplify the do_flush macro now that we don't need to take into account
a second instruction being used together.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index c295e8f..d9666fe 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -15,6 +15,13 @@
 
 .text
 
+/* 05000443 - IFLUSH cannot be last instruction in hardware loop */
+#if ANOMALY_05000443
+# define BROK_FLUSH_INST "IFLUSH"
+#else
+# define BROK_FLUSH_INST "no anomaly! yeah!"
+#endif
+
 /* Since all L1 caches work the same way, we use the same method for flushing
  * them.  Only the actual flush instruction differs.  We write this in asm as
  * GCC can be hard to coax into writing nice hardware loops.
@@ -23,7 +30,7 @@
  * R0 = start address
  * R1 = end address
  */
-.macro do_flush flushins:req optflushins optnopins label
+.macro do_flush flushins:req label
 
 	R2 = -L1_CACHE_BYTES;
 
@@ -44,22 +51,15 @@
 \label :
 .endif
 	P0 = R0;
+
 	LSETUP (1f, 2f) LC1 = P1;
 1:
-.ifnb \optflushins
-	\optflushins [P0];
-.endif
-#if ANOMALY_05000443
-.ifb \optnopins
-2:
-.endif
+.ifeqs "\flushins", BROK_FLUSH_INST
 	\flushins [P0++];
-.ifnb \optnopins
-2:	\optnopins;
-.endif
-#else
+2:	nop;
+.else
 2:	\flushins [P0++];
-#endif
+.endif
 
 	RTS;
 .endm
@@ -77,7 +77,7 @@ ENTRY(_blackfin_icache_flush_range)
  */
 	P0 = R0;
 	IFLUSH[P0];
-	do_flush IFLUSH, , nop
+	do_flush IFLUSH
 ENDPROC(_blackfin_icache_flush_range)
 
 /* Throw away all D-cached data in specified region without any obligation to
@@ -91,7 +91,7 @@ ENDPROC(_blackfin_dcache_invalidate_range)
 
 /* Flush all data cache lines assocoiated with this memory area */
 ENTRY(_blackfin_dcache_flush_range)
-	do_flush FLUSH, , , .Ldfr
+	do_flush FLUSH, .Ldfr
 ENDPROC(_blackfin_dcache_flush_range)
 
 /* Our headers convert the page structure to an address, so just need to flush
-- 
cgit v0.10.2


From b9ccf14bc5352b86e7e254e6cf55d9b917b1b1cc Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 21 May 2009 06:39:46 +0000
Subject: Blackfin: update defconfigs

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig
index c121d6e..baec133 100644
--- a/arch/blackfin/configs/BF518F-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -259,7 +259,10 @@ CONFIG_HZ=250
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -404,7 +407,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -688,14 +691,14 @@ CONFIG_INPUT_MISC=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 # CONFIG_BFIN_SPORT is not set
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -802,7 +805,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -831,6 +857,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -962,7 +989,8 @@ CONFIG_RTC_DRV_BFIN=y
 #
 # File systems
 #
-# CONFIG_EXT2_FS is not set
+CONFIG_EXT2_FS=m
+# CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -988,8 +1016,11 @@ CONFIG_INOTIFY_USER=y
 #
 # DOS/FAT/NT Filesystems
 #
+CONFIG_FAT_FS=m
 # CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 
 #
@@ -1012,8 +1043,8 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_YAFFS_FS is not set
 # CONFIG_JFFS2_FS is not set
+# CONFIG_YAFFS_FS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1048,9 +1079,9 @@ CONFIG_SMB_FS=m
 #
 # CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-CONFIG_NLS=y
+CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_850 is not set
@@ -1065,7 +1096,7 @@ CONFIG_NLS_CODEPAGE_437=y
 # CONFIG_NLS_CODEPAGE_865 is not set
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
+CONFIG_NLS_CODEPAGE_936=m
 # CONFIG_NLS_CODEPAGE_950 is not set
 # CONFIG_NLS_CODEPAGE_932 is not set
 # CONFIG_NLS_CODEPAGE_949 is not set
@@ -1074,7 +1105,7 @@ CONFIG_NLS_CODEPAGE_437=y
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -1087,7 +1118,7 @@ CONFIG_NLS_ISO8859_1=y
 # CONFIG_NLS_ISO8859_15 is not set
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
+CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
 #
@@ -1102,7 +1133,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1111,8 +1142,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1132,7 +1161,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 
 #
 # Tracers
@@ -1148,16 +1176,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1269,7 +1301,6 @@ CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig
index 3e562b2..c06262e 100644
--- a/arch/blackfin/configs/BF526-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF526-EZBRD_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -144,8 +144,8 @@ CONFIG_BF526=y
 # CONFIG_BF561 is not set
 CONFIG_BF_REV_MIN=0
 CONFIG_BF_REV_MAX=2
-# CONFIG_BF_REV_0_0 is not set
-CONFIG_BF_REV_0_1=y
+CONFIG_BF_REV_0_0=y
+# CONFIG_BF_REV_0_1 is not set
 # CONFIG_BF_REV_0_2 is not set
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
@@ -264,7 +264,10 @@ CONFIG_HZ=250
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -409,7 +412,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -491,7 +494,7 @@ CONFIG_MTD_PARTITIONS=y
 #
 # User Modules And Translation Layers
 #
-CONFIG_MTD_CHAR=m
+CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_BLOCK=y
 # CONFIG_FTL is not set
@@ -504,9 +507,9 @@ CONFIG_MTD_BLOCK=y
 #
 # RAM/ROM/Flash chip drivers
 #
-# CONFIG_MTD_CFI is not set
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
 # CONFIG_MTD_CFI_ADV_OPTIONS is not set
 CONFIG_MTD_MAP_BANK_WIDTH_1=y
 CONFIG_MTD_MAP_BANK_WIDTH_2=y
@@ -518,9 +521,10 @@ CONFIG_MTD_CFI_I1=y
 CONFIG_MTD_CFI_I2=y
 # CONFIG_MTD_CFI_I4 is not set
 # CONFIG_MTD_CFI_I8 is not set
-# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_INTELEXT=y
 # CONFIG_MTD_CFI_AMDSTD is not set
 # CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_ROM=m
 # CONFIG_MTD_ABSENT is not set
@@ -529,7 +533,8 @@ CONFIG_MTD_ROM=m
 # Mapping drivers for chip access
 #
 CONFIG_MTD_COMPLEX_MAPPINGS=y
-# CONFIG_MTD_PHYSMAP is not set
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_GPIO_ADDR is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
@@ -597,9 +602,42 @@ CONFIG_HAVE_IDE=y
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
 # CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_PROC_FS is not set
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 CONFIG_NETDEVICES=y
@@ -644,9 +682,8 @@ CONFIG_BFIN_MAC_RMII=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -715,14 +752,14 @@ CONFIG_INPUT_MISC=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 # CONFIG_BFIN_SPORT is not set
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -832,11 +869,35 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD5252 is not set
 # CONFIG_SENSORS_AD7414 is not set
 # CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADCXX is not set
@@ -920,6 +981,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1008,8 +1070,8 @@ CONFIG_USB=y
 #
 # Miscellaneous USB options
 #
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_DEVICE_CLASS=y
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG_WHITELIST is not set
@@ -1037,10 +1099,10 @@ CONFIG_USB_MUSB_SOC=y
 CONFIG_USB_MUSB_HOST=y
 # CONFIG_USB_MUSB_PERIPHERAL is not set
 # CONFIG_USB_MUSB_OTG is not set
-# CONFIG_USB_GADGET_MUSB_HDRC is not set
 CONFIG_USB_MUSB_HDRC_HCD=y
-CONFIG_MUSB_PIO_ONLY=y
-CONFIG_MUSB_DMA_POLL=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
 # CONFIG_USB_MUSB_DEBUG is not set
 
 #
@@ -1058,7 +1120,7 @@ CONFIG_MUSB_DMA_POLL=y
 #
 # see USB_STORAGE Help for more information
 #
-CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
@@ -1107,33 +1169,10 @@ CONFIG_USB_STORAGE=m
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
-# CONFIG_USB_GADGET_AT91 is not set
-# CONFIG_USB_GADGET_ATMEL_USBA is not set
-# CONFIG_USB_GADGET_FSL_USB2 is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_PXA25X is not set
-# CONFIG_USB_GADGET_PXA27X is not set
-# CONFIG_USB_GADGET_S3C2410 is not set
-# CONFIG_USB_GADGET_M66592 is not set
-# CONFIG_USB_GADGET_AMD5536UDC is not set
-# CONFIG_USB_GADGET_FSL_QE is not set
-# CONFIG_USB_GADGET_NET2272 is not set
-# CONFIG_USB_GADGET_NET2280 is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-# CONFIG_USB_ZERO is not set
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_ETH is not set
-# CONFIG_USB_GADGETFS is not set
-# CONFIG_USB_FILE_STORAGE is not set
-# CONFIG_USB_G_SERIAL is not set
-# CONFIG_USB_MIDI_GADGET is not set
-# CONFIG_USB_G_PRINTER is not set
-# CONFIG_USB_CDC_COMPOSITE is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
@@ -1206,7 +1245,8 @@ CONFIG_RTC_DRV_BFIN=y
 #
 # File systems
 #
-# CONFIG_EXT2_FS is not set
+CONFIG_EXT2_FS=m
+# CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -1226,14 +1266,19 @@ CONFIG_INOTIFY_USER=y
 #
 # CD-ROM/DVD Filesystems
 #
-# CONFIG_ISO9660_FS is not set
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
 # CONFIG_UDF_FS is not set
 
 #
 # DOS/FAT/NT Filesystems
 #
+CONFIG_FAT_FS=m
 # CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 
 #
@@ -1256,16 +1301,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1277,6 +1312,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1313,7 +1358,7 @@ CONFIG_SMB_FS=m
 CONFIG_MSDOS_PARTITION=y
 CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_850 is not set
@@ -1328,7 +1373,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_CODEPAGE_865 is not set
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
+CONFIG_NLS_CODEPAGE_936=m
 # CONFIG_NLS_CODEPAGE_950 is not set
 # CONFIG_NLS_CODEPAGE_932 is not set
 # CONFIG_NLS_CODEPAGE_949 is not set
@@ -1337,7 +1382,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-# CONFIG_NLS_ISO8859_1 is not set
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -1350,7 +1395,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_ISO8859_15 is not set
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
+CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
 #
@@ -1365,7 +1410,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1374,8 +1419,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1395,7 +1438,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 
 #
 # Tracers
@@ -1411,16 +1453,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1534,7 +1580,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig
index 911b5db..e9175c6 100644
--- a/arch/blackfin/configs/BF527-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF527-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -145,8 +145,8 @@ CONFIG_BF527=y
 CONFIG_BF_REV_MIN=0
 CONFIG_BF_REV_MAX=2
 # CONFIG_BF_REV_0_0 is not set
-CONFIG_BF_REV_0_1=y
-# CONFIG_BF_REV_0_2 is not set
+# CONFIG_BF_REV_0_1 is not set
+CONFIG_BF_REV_0_2=y
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
 # CONFIG_BF_REV_0_5 is not set
@@ -264,7 +264,10 @@ CONFIG_HZ=250
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -318,7 +321,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
 CONFIG_VIRT_TO_BUS=y
-CONFIG_BFIN_GPTIMERS=m
+CONFIG_BFIN_GPTIMERS=y
 # CONFIG_DMA_UNCACHED_4M is not set
 # CONFIG_DMA_UNCACHED_2M is not set
 CONFIG_DMA_UNCACHED_1M=y
@@ -409,7 +412,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -639,9 +642,42 @@ CONFIG_HAVE_IDE=y
 # SCSI device support
 #
 # CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_TGT is not set
 # CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_PROC_FS is not set
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+# CONFIG_SCSI_SCAN_ASYNC is not set
+CONFIG_SCSI_WAIT_SCAN=m
+
+#
+# SCSI Transports
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+# CONFIG_SCSI_LOWLEVEL is not set
+# CONFIG_SCSI_DH is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
 CONFIG_NETDEVICES=y
@@ -687,9 +723,8 @@ CONFIG_BFIN_MAC_RMII=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -758,14 +793,14 @@ CONFIG_INPUT_MISC=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -875,7 +910,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -909,6 +967,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1091,8 +1150,8 @@ CONFIG_USB=y
 #
 # Miscellaneous USB options
 #
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_DEVICE_CLASS=y
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG_WHITELIST is not set
@@ -1120,10 +1179,10 @@ CONFIG_USB_MUSB_SOC=y
 CONFIG_USB_MUSB_HOST=y
 # CONFIG_USB_MUSB_PERIPHERAL is not set
 # CONFIG_USB_MUSB_OTG is not set
-# CONFIG_USB_GADGET_MUSB_HDRC is not set
 CONFIG_USB_MUSB_HDRC_HCD=y
-CONFIG_MUSB_PIO_ONLY=y
-CONFIG_MUSB_DMA_POLL=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
 # CONFIG_USB_MUSB_DEBUG is not set
 
 #
@@ -1141,7 +1200,7 @@ CONFIG_MUSB_DMA_POLL=y
 #
 # see USB_STORAGE Help for more information
 #
-CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
@@ -1190,33 +1249,10 @@ CONFIG_USB_STORAGE=m
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
-# CONFIG_USB_GADGET_AT91 is not set
-# CONFIG_USB_GADGET_ATMEL_USBA is not set
-# CONFIG_USB_GADGET_FSL_USB2 is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_PXA25X is not set
-# CONFIG_USB_GADGET_PXA27X is not set
-# CONFIG_USB_GADGET_S3C2410 is not set
-# CONFIG_USB_GADGET_M66592 is not set
-# CONFIG_USB_GADGET_AMD5536UDC is not set
-# CONFIG_USB_GADGET_FSL_QE is not set
-# CONFIG_USB_GADGET_NET2272 is not set
-# CONFIG_USB_GADGET_NET2280 is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-# CONFIG_USB_ZERO is not set
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_ETH is not set
-# CONFIG_USB_GADGETFS is not set
-# CONFIG_USB_FILE_STORAGE is not set
-# CONFIG_USB_G_SERIAL is not set
-# CONFIG_USB_MIDI_GADGET is not set
-# CONFIG_USB_G_PRINTER is not set
-# CONFIG_USB_CDC_COMPOSITE is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
@@ -1289,7 +1325,8 @@ CONFIG_RTC_DRV_BFIN=y
 #
 # File systems
 #
-# CONFIG_EXT2_FS is not set
+CONFIG_EXT2_FS=m
+# CONFIG_EXT2_FS_XATTR is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -1309,14 +1346,20 @@ CONFIG_INOTIFY_USER=y
 #
 # CD-ROM/DVD Filesystems
 #
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_ZISOFS is not set
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
 
 #
 # DOS/FAT/NT Filesystems
 #
+CONFIG_FAT_FS=m
 # CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 
 #
@@ -1339,16 +1382,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1360,6 +1393,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1396,7 +1439,7 @@ CONFIG_SMB_FS=m
 CONFIG_MSDOS_PARTITION=y
 CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_850 is not set
@@ -1411,7 +1454,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_CODEPAGE_865 is not set
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
+CONFIG_NLS_CODEPAGE_936=m
 # CONFIG_NLS_CODEPAGE_950 is not set
 # CONFIG_NLS_CODEPAGE_932 is not set
 # CONFIG_NLS_CODEPAGE_949 is not set
@@ -1420,7 +1463,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-# CONFIG_NLS_ISO8859_1 is not set
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -1433,7 +1476,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_ISO8859_15 is not set
 # CONFIG_NLS_KOI8_R is not set
 # CONFIG_NLS_KOI8_U is not set
-# CONFIG_NLS_UTF8 is not set
+CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
 #
@@ -1448,7 +1491,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1457,8 +1500,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1478,7 +1519,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1494,16 +1534,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1611,13 +1655,12 @@ CONFIG_BITREVERSE=y
 CONFIG_CRC_CCITT=m
 # CONFIG_CRC16 is not set
 # CONFIG_CRC_T10DIF is not set
-# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC_ITU_T=m
 CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig
index 4c41e03..5aa63ba 100644
--- a/arch/blackfin/configs/BF533-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF533-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -225,7 +225,10 @@ CONFIG_HZ=250
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -382,7 +385,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -613,9 +616,8 @@ CONFIG_SMC91X=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -667,13 +669,13 @@ CONFIG_INPUT_EVDEV=m
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=y
 # CONFIG_BFIN_TIMER_LATENCY is not set
-CONFIG_BFIN_DMA_INTERFACE=m
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -729,7 +731,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -904,16 +929,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -925,6 +940,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1013,7 +1038,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1022,8 +1047,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1043,7 +1066,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1059,16 +1081,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1181,7 +1207,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig
index 9c482cd..fed2532 100644
--- a/arch/blackfin/configs/BF533-STAMP_defconfig
+++ b/arch/blackfin/configs/BF533-STAMP_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -225,7 +225,10 @@ CONFIG_HZ=250
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -382,7 +385,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -618,9 +621,8 @@ CONFIG_SMC91X=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -674,14 +676,14 @@ CONFIG_CONFIG_INPUT_PCF8574=m
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -781,7 +783,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1068,16 +1093,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1089,6 +1104,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1177,7 +1202,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1186,8 +1211,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1207,7 +1230,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1223,16 +1245,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1345,7 +1371,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig
index 591f6ed..f9ac20d 100644
--- a/arch/blackfin/configs/BF537-STAMP_defconfig
+++ b/arch/blackfin/configs/BF537-STAMP_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -232,7 +232,10 @@ CONFIG_HZ=250
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -390,7 +393,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -548,9 +551,7 @@ CONFIG_MTD_ROM=m
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -649,9 +650,8 @@ CONFIG_BFIN_RX_DESC_NUM=20
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -708,14 +708,14 @@ CONFIG_SERIO_LIBPS2=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -823,7 +823,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1123,16 +1146,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1144,6 +1157,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1232,7 +1255,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1241,8 +1264,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1262,7 +1283,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1278,16 +1298,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1400,7 +1424,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig
index 1a8e8c3..ee98e22 100644
--- a/arch/blackfin/configs/BF538-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF538-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -243,7 +243,10 @@ CONFIG_HZ=250
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -389,7 +392,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -546,9 +549,7 @@ CONFIG_MTD_ROM=m
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -691,11 +692,11 @@ CONFIG_INPUT_EVDEV=m
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_AD7877 is not set
 # CONFIG_TOUCHSCREEN_AD7879_I2C is not set
 CONFIG_TOUCHSCREEN_AD7879_SPI=y
 CONFIG_TOUCHSCREEN_AD7879=y
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -720,14 +721,14 @@ CONFIG_INPUT_MISC=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -833,7 +834,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1056,16 +1080,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1077,6 +1091,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1165,7 +1189,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1174,8 +1198,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1195,7 +1217,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 
 #
 # Tracers
@@ -1211,16 +1232,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1333,7 +1358,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
index 2cd1c2b..deeabef 100644
--- a/arch/blackfin/configs/BF548-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -196,6 +196,7 @@ CONFIG_BFIN548_EZKIT=y
 # BF548 Specific Configuration
 #
 # CONFIG_DEB_DMA_URGENT is not set
+# CONFIG_BF548_ATAPI_ALTERNATIVE_PORT is not set
 
 #
 # Interrupt Priority Assignment
@@ -298,7 +299,10 @@ CONFIG_HZ=250
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -367,7 +371,9 @@ CONFIG_BFIN_DCACHE=y
 # CONFIG_BFIN_ICACHE_LOCK is not set
 CONFIG_BFIN_WB=y
 # CONFIG_BFIN_WT is not set
-# CONFIG_BFIN_L2_CACHEABLE is not set
+# CONFIG_BFIN_L2_WB is not set
+CONFIG_BFIN_L2_WT=y
+# CONFIG_BFIN_L2_NOT_CACHED is not set
 # CONFIG_MPU is not set
 
 #
@@ -447,7 +453,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -616,9 +622,7 @@ CONFIG_MTD_RAM=y
 #
 CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_GPIO_ADDR is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
@@ -696,7 +700,7 @@ CONFIG_SCSI_DMA=y
 CONFIG_BLK_DEV_SD=y
 # CONFIG_CHR_DEV_ST is not set
 # CONFIG_CHR_DEV_OSST is not set
-CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR=m
 # CONFIG_BLK_DEV_SR_VENDOR is not set
 # CONFIG_CHR_DEV_SG is not set
 # CONFIG_CHR_DEV_SCH is not set
@@ -718,9 +722,7 @@ CONFIG_SCSI_WAIT_SCAN=m
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 # CONFIG_SCSI_SAS_LIBSAS is not set
 # CONFIG_SCSI_SRP_ATTRS is not set
-CONFIG_SCSI_LOWLEVEL=y
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_DH is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
@@ -752,9 +754,8 @@ CONFIG_SMSC911X=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -821,11 +822,11 @@ CONFIG_KEYBOARD_BFIN=y
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_TOUCHSCREEN_ADS7846 is not set
 CONFIG_TOUCHSCREEN_AD7877=m
 # CONFIG_TOUCHSCREEN_AD7879_I2C is not set
 # CONFIG_TOUCHSCREEN_AD7879_SPI is not set
 # CONFIG_TOUCHSCREEN_AD7879 is not set
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -858,14 +859,14 @@ CONFIG_INPUT_MISC=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-CONFIG_BF5xx_PPI=m
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=m
 # CONFIG_BFIN_TIMER_LATENCY is not set
-# CONFIG_TWI_LCD is not set
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 CONFIG_SIMPLE_GPIO=m
 CONFIG_VT=y
 CONFIG_CONSOLE_TRANSLATIONS=y
@@ -977,7 +978,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -1011,6 +1035,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1193,8 +1218,8 @@ CONFIG_USB=y
 #
 # Miscellaneous USB options
 #
-# CONFIG_USB_DEVICEFS is not set
-CONFIG_USB_DEVICE_CLASS=y
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG_WHITELIST is not set
@@ -1222,10 +1247,10 @@ CONFIG_USB_MUSB_SOC=y
 CONFIG_USB_MUSB_HOST=y
 # CONFIG_USB_MUSB_PERIPHERAL is not set
 # CONFIG_USB_MUSB_OTG is not set
-# CONFIG_USB_GADGET_MUSB_HDRC is not set
 CONFIG_USB_MUSB_HDRC_HCD=y
-CONFIG_MUSB_PIO_ONLY=y
-CONFIG_MUSB_DMA_POLL=y
+# CONFIG_MUSB_PIO_ONLY is not set
+CONFIG_USB_INVENTRA_DMA=y
+# CONFIG_USB_TI_CPPI_DMA is not set
 # CONFIG_USB_MUSB_DEBUG is not set
 
 #
@@ -1243,7 +1268,7 @@ CONFIG_MUSB_DMA_POLL=y
 #
 # see USB_STORAGE Help for more information
 #
-CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
@@ -1292,33 +1317,10 @@ CONFIG_USB_STORAGE=m
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_IOWARRIOR is not set
+# CONFIG_USB_TEST is not set
 # CONFIG_USB_ISIGHTFW is not set
 # CONFIG_USB_VST is not set
 # CONFIG_USB_GADGET is not set
-# CONFIG_USB_GADGET_AT91 is not set
-# CONFIG_USB_GADGET_ATMEL_USBA is not set
-# CONFIG_USB_GADGET_FSL_USB2 is not set
-# CONFIG_USB_GADGET_LH7A40X is not set
-# CONFIG_USB_GADGET_OMAP is not set
-# CONFIG_USB_GADGET_PXA25X is not set
-# CONFIG_USB_GADGET_PXA27X is not set
-# CONFIG_USB_GADGET_S3C2410 is not set
-# CONFIG_USB_GADGET_M66592 is not set
-# CONFIG_USB_GADGET_AMD5536UDC is not set
-# CONFIG_USB_GADGET_FSL_QE is not set
-# CONFIG_USB_GADGET_NET2272 is not set
-# CONFIG_USB_GADGET_NET2280 is not set
-# CONFIG_USB_GADGET_GOKU is not set
-# CONFIG_USB_GADGET_DUMMY_HCD is not set
-# CONFIG_USB_ZERO is not set
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_ETH is not set
-# CONFIG_USB_GADGETFS is not set
-# CONFIG_USB_FILE_STORAGE is not set
-# CONFIG_USB_G_SERIAL is not set
-# CONFIG_USB_MIDI_GADGET is not set
-# CONFIG_USB_G_PRINTER is not set
-# CONFIG_USB_CDC_COMPOSITE is not set
 CONFIG_MMC=y
 # CONFIG_MMC_DEBUG is not set
 # CONFIG_MMC_UNSAFE_RESUME is not set
@@ -1414,13 +1416,8 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 # CONFIG_EXT2_FS_POSIX_ACL is not set
 # CONFIG_EXT2_FS_SECURITY is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
@@ -1476,16 +1473,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -1497,6 +1484,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1539,63 +1536,47 @@ CONFIG_CIFS=y
 #
 # Partition Types
 #
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
+# CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
-# CONFIG_SYSV68_PARTITION is not set
 CONFIG_NLS=y
 CONFIG_NLS_DEFAULT="iso8859-1"
 CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
 CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
 CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
 CONFIG_NLS_UTF8=m
 # CONFIG_DLM is not set
 
@@ -1611,7 +1592,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1620,8 +1601,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1641,7 +1620,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1657,16 +1635,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1780,7 +1762,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig
index 4a6ea8e..dcfbe2e 100644
--- a/arch/blackfin/configs/BF561-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
+# Thu May 21 05:50:01 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -42,10 +43,11 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,16 +55,15 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -71,7 +72,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -148,9 +148,9 @@ CONFIG_BF_REV_MAX=5
 # CONFIG_BF_REV_0_0 is not set
 # CONFIG_BF_REV_0_1 is not set
 # CONFIG_BF_REV_0_2 is not set
-CONFIG_BF_REV_0_3=y
+# CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
-# CONFIG_BF_REV_0_5 is not set
+CONFIG_BF_REV_0_5=y
 # CONFIG_BF_REV_0_6 is not set
 # CONFIG_BF_REV_ANY is not set
 # CONFIG_BF_REV_NONE is not set
@@ -179,7 +179,6 @@ CONFIG_BFIN561_EZKIT=y
 # Core B Support
 #
 CONFIG_BF561_COREB=y
-CONFIG_BF561_COREB_RESET=y
 
 #
 # Interrupt Priority Assignment
@@ -264,7 +263,10 @@ CONFIG_HZ=250
 CONFIG_SCHED_HRTICK=y
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 CONFIG_TICK_ONESHOT=y
 # CONFIG_NO_HZ is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -334,7 +336,9 @@ CONFIG_BFIN_DCACHE=y
 # CONFIG_BFIN_ICACHE_LOCK is not set
 CONFIG_BFIN_WB=y
 # CONFIG_BFIN_WT is not set
-# CONFIG_BFIN_L2_CACHEABLE is not set
+# CONFIG_BFIN_L2_WB is not set
+CONFIG_BFIN_L2_WT=y
+# CONFIG_BFIN_L2_NOT_CACHED is not set
 # CONFIG_MPU is not set
 
 #
@@ -415,7 +419,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -570,9 +574,7 @@ CONFIG_MTD_ROM=m
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 CONFIG_MTD_PHYSMAP=m
-CONFIG_MTD_PHYSMAP_START=0x20000000
-CONFIG_MTD_PHYSMAP_LEN=0x0
-CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
 # CONFIG_MTD_UCLINUX is not set
 # CONFIG_MTD_PLATRAM is not set
 
@@ -649,9 +651,8 @@ CONFIG_SMC91X=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -703,13 +704,13 @@ CONFIG_INPUT_EVDEV=m
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 # CONFIG_BFIN_SPORT is not set
 # CONFIG_BFIN_TIMER_LATENCY is not set
-CONFIG_BFIN_DMA_INTERFACE=m
 CONFIG_SIMPLE_GPIO=m
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
@@ -765,7 +766,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
@@ -897,16 +921,6 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-CONFIG_YAFFS_FS=m
-CONFIG_YAFFS_YAFFS1=y
-# CONFIG_YAFFS_9BYTE_TAGS is not set
-# CONFIG_YAFFS_DOES_ECC is not set
-CONFIG_YAFFS_YAFFS2=y
-CONFIG_YAFFS_AUTO_YAFFS2=y
-# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
-# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
-# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
-CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 CONFIG_JFFS2_FS=m
 CONFIG_JFFS2_FS_DEBUG=0
 CONFIG_JFFS2_FS_WRITEBUFFER=y
@@ -918,6 +932,16 @@ CONFIG_JFFS2_ZLIB=y
 # CONFIG_JFFS2_LZO is not set
 CONFIG_JFFS2_RTIME=y
 # CONFIG_JFFS2_RUBIN is not set
+CONFIG_YAFFS_FS=m
+CONFIG_YAFFS_YAFFS1=y
+# CONFIG_YAFFS_9BYTE_TAGS is not set
+# CONFIG_YAFFS_DOES_ECC is not set
+CONFIG_YAFFS_YAFFS2=y
+CONFIG_YAFFS_AUTO_YAFFS2=y
+# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set
+# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
+# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1006,7 +1030,7 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
@@ -1015,8 +1039,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1036,7 +1058,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_FAULT_INJECTION is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1052,16 +1073,20 @@ CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
-# CONFIG_DEBUG_HWERR is not set
-# CONFIG_DEBUG_DOUBLEFAULT is not set
+CONFIG_DEBUG_HWERR=y
+CONFIG_EXACT_HWERR=y
+CONFIG_DEBUG_DOUBLEFAULT=y
+CONFIG_DEBUG_DOUBLEFAULT_PRINT=y
+# CONFIG_DEBUG_DOUBLEFAULT_RESET is not set
+# CONFIG_DEBUG_ICACHE_CHECK is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
-# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set
+# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF is not set
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 # CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set
-CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0
+CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=1
 # CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set
-# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set
+CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_ACCESS_CHECK=y
@@ -1174,7 +1199,6 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig
index ef1a2c8..174c578 100644
--- a/arch/blackfin/configs/BlackStamp_defconfig
+++ b/arch/blackfin/configs/BlackStamp_defconfig
@@ -46,7 +46,7 @@ CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -381,7 +381,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig
index e2fc588..e17875e 100644
--- a/arch/blackfin/configs/CM-BF527_defconfig
+++ b/arch/blackfin/configs/CM-BF527_defconfig
@@ -46,7 +46,7 @@ CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -411,7 +411,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -783,7 +783,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
diff --git a/arch/blackfin/configs/CM-BF533_defconfig b/arch/blackfin/configs/CM-BF533_defconfig
index 65a8bbb..fafd95e 100644
--- a/arch/blackfin/configs/CM-BF533_defconfig
+++ b/arch/blackfin/configs/CM-BF533_defconfig
@@ -49,7 +49,7 @@ CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -347,7 +347,7 @@ CONFIG_IP_FIB_HASH=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -547,9 +547,9 @@ CONFIG_MII=y
 CONFIG_SMC91X=y
 # CONFIG_SMSC911X is not set
 # CONFIG_DM9000 is not set
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -641,6 +641,10 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/CM-BF537E_defconfig b/arch/blackfin/configs/CM-BF537E_defconfig
index 9b7e9d78..e73aa5a 100644
--- a/arch/blackfin/configs/CM-BF537E_defconfig
+++ b/arch/blackfin/configs/CM-BF537E_defconfig
@@ -1,6 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.22.16
+# Linux kernel version: 2.6.28.10
+# Wed Jun  3 06:27:41 2009
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -8,48 +9,44 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
 CONFIG_BLACKFIN=y
 CONFIG_ZONE_DMA=y
-CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_GPIO=y
 CONFIG_FORCE_MAX_ZONEORDER=14
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
-# Code maturity level options
+# General setup
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
 CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSFS_DEPRECATED is not set
+# CONFIG_CGROUPS is not set
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
 # CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
 # CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -58,37 +55,36 @@ CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_EVENTFD=y
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_BIG_ORDER_ALLOC_NOFAIL_MAGIC=3
-# CONFIG_NP2 is not set
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
-CONFIG_RT_MUTEXES=y
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
-
-#
-# Block layer
-#
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
 
 #
 # IO Schedulers
@@ -102,9 +98,11 @@ CONFIG_IOSCHED_CFQ=y
 # CONFIG_DEFAULT_CFQ is not set
 CONFIG_DEFAULT_NOOP=y
 CONFIG_DEFAULT_IOSCHED="noop"
+CONFIG_CLASSIC_RCU=y
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
+# CONFIG_FREEZER is not set
 
 #
 # Blackfin Processor Options
@@ -113,6 +111,10 @@ CONFIG_PREEMPT_NONE=y
 #
 # Processor and Board Settings
 #
+# CONFIG_BF512 is not set
+# CONFIG_BF514 is not set
+# CONFIG_BF516 is not set
+# CONFIG_BF518 is not set
 # CONFIG_BF522 is not set
 # CONFIG_BF523 is not set
 # CONFIG_BF524 is not set
@@ -125,22 +127,31 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_BF534 is not set
 # CONFIG_BF536 is not set
 CONFIG_BF537=y
+# CONFIG_BF538 is not set
+# CONFIG_BF539 is not set
 # CONFIG_BF542 is not set
+# CONFIG_BF542M is not set
 # CONFIG_BF544 is not set
+# CONFIG_BF544M is not set
 # CONFIG_BF547 is not set
+# CONFIG_BF547M is not set
 # CONFIG_BF548 is not set
+# CONFIG_BF548M is not set
 # CONFIG_BF549 is not set
+# CONFIG_BF549M is not set
 # CONFIG_BF561 is not set
+CONFIG_BF_REV_MIN=2
+CONFIG_BF_REV_MAX=3
 # CONFIG_BF_REV_0_0 is not set
 # CONFIG_BF_REV_0_1 is not set
 CONFIG_BF_REV_0_2=y
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
 # CONFIG_BF_REV_0_5 is not set
+# CONFIG_BF_REV_0_6 is not set
 # CONFIG_BF_REV_ANY is not set
 # CONFIG_BF_REV_NONE is not set
 CONFIG_BF53x=y
-CONFIG_BFIN_SINGLE_CORE=y
 CONFIG_MEM_MT48LC16M16A2TG_75=y
 CONFIG_IRQ_PLL_WAKEUP=7
 CONFIG_IRQ_RTC=8
@@ -150,7 +161,6 @@ CONFIG_IRQ_SPORT0_TX=9
 CONFIG_IRQ_SPORT1_RX=9
 CONFIG_IRQ_SPORT1_TX=9
 CONFIG_IRQ_TWI=10
-CONFIG_IRQ_SPI=10
 CONFIG_IRQ_UART0_RX=10
 CONFIG_IRQ_UART0_TX=10
 CONFIG_IRQ_UART1_RX=10
@@ -169,11 +179,12 @@ CONFIG_IRQ_PORTG_INTB=12
 CONFIG_IRQ_MEM_DMA0=13
 CONFIG_IRQ_MEM_DMA1=13
 CONFIG_IRQ_WATCH=13
+CONFIG_IRQ_SPI=10
 # CONFIG_BFIN537_STAMP is not set
 CONFIG_BFIN537_BLUETECHNIX_CM=y
+# CONFIG_BFIN537_BLUETECHNIX_TCM is not set
 # CONFIG_PNAV10 is not set
 # CONFIG_CAMSIG_MINOTAUR is not set
-# CONFIG_GENERIC_BF537_BOARD is not set
 
 #
 # BF537 Specific Configuration
@@ -196,6 +207,7 @@ CONFIG_IRQ_PROG_INTA=12
 # Board customizations
 #
 # CONFIG_CMDLINE_BOOL is not set
+CONFIG_BOOT_LOAD=0x1000
 
 #
 # Clock/PLL Setup
@@ -215,13 +227,20 @@ CONFIG_HZ_250=y
 # CONFIG_HZ_300 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=250
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
+# CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 
 #
-# Memory Setup
+# Misc
 #
-CONFIG_MAX_MEM_SIZE=32
-CONFIG_MEM_ADD_WIDTH=9
-CONFIG_BOOT_LOAD=0x1000
 CONFIG_BFIN_SCRATCH_REG_RETN=y
 # CONFIG_BFIN_SCRATCH_REG_RETE is not set
 # CONFIG_BFIN_SCRATCH_REG_CYCLES is not set
@@ -248,6 +267,12 @@ CONFIG_IP_CHECKSUM_L1=y
 CONFIG_CACHELINE_ALIGNED_L1=y
 CONFIG_SYSCALL_TAB_L1=y
 CONFIG_CPLB_SWITCH_TAB_L1=y
+CONFIG_APP_STACK_L1=y
+
+#
+# Speed Optimizations
+#
+CONFIG_BFIN_INS_LOWOVERHEAD=y
 CONFIG_RAMKERNEL=y
 # CONFIG_ROMKERNEL is not set
 CONFIG_SELECT_MEMORY_MODEL=y
@@ -256,12 +281,14 @@ CONFIG_FLATMEM_MANUAL=y
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_RESOURCES_64BIT is not set
+# CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=1
-CONFIG_LARGE_ALLOCS=y
+CONFIG_VIRT_TO_BUS=y
 # CONFIG_BFIN_GPTIMERS is not set
+# CONFIG_DMA_UNCACHED_4M is not set
 # CONFIG_DMA_UNCACHED_2M is not set
 CONFIG_DMA_UNCACHED_1M=y
 # CONFIG_DMA_UNCACHED_NONE is not set
@@ -275,7 +302,6 @@ CONFIG_BFIN_DCACHE=y
 # CONFIG_BFIN_ICACHE_LOCK is not set
 CONFIG_BFIN_WB=y
 # CONFIG_BFIN_WT is not set
-CONFIG_L1_MAX_PIECE=16
 # CONFIG_MPU is not set
 
 #
@@ -304,36 +330,28 @@ CONFIG_BANK_3=0xFFC2
 #
 # Bus options (PCI, PCMCIA, EISA, MCA, ISA)
 #
-# CONFIG_PCI is not set
 # CONFIG_ARCH_SUPPORTS_MSI is not set
 
 #
-# PCCARD (PCMCIA/CardBus) support
-#
-
-#
 # Executable file formats
 #
 CONFIG_BINFMT_ELF_FDPIC=y
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
 CONFIG_BINFMT_SHARED_FLAT=y
+# CONFIG_HAVE_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
 
 #
 # Power management options
 #
 # CONFIG_PM is not set
-# CONFIG_PM_WAKEUP_BY_GPIO is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
 
 #
 # CPU Frequency scaling
 #
 # CONFIG_CPU_FREQ is not set
-
-#
-# Networking
-#
 CONFIG_NET=y
 
 #
@@ -346,6 +364,7 @@ CONFIG_XFRM=y
 # CONFIG_XFRM_USER is not set
 # CONFIG_XFRM_SUB_POLICY is not set
 # CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
@@ -358,7 +377,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -367,6 +386,7 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_XFRM_MODE_TRANSPORT=y
 CONFIG_INET_XFRM_MODE_TUNNEL=y
 CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
@@ -374,8 +394,6 @@ CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_TCP_MD5SIG is not set
 # CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
 # CONFIG_NETLABEL is not set
 # CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
@@ -384,6 +402,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
 # CONFIG_VLAN_8021Q is not set
 # CONFIG_DECNET is not set
 # CONFIG_LLC2 is not set
@@ -393,10 +412,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
 # CONFIG_NET_SCHED is not set
 
 #
@@ -404,18 +419,14 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-
-#
-# Wireless
-#
-# CONFIG_CFG80211 is not set
-# CONFIG_WIRELESS_EXT is not set
-# CONFIG_MAC80211 is not set
-# CONFIG_IEEE80211 is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
 # CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
 
 #
 # Device Drivers
@@ -427,10 +438,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 # CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
 # CONFIG_CONNECTOR is not set
 CONFIG_MTD=y
 # CONFIG_MTD_DEBUG is not set
@@ -438,6 +445,7 @@ CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 # CONFIG_MTD_REDBOOT_PARTS is not set
 # CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
 
 #
 # User Modules And Translation Layers
@@ -450,12 +458,15 @@ CONFIG_MTD_BLOCK=y
 # CONFIG_INFTL is not set
 # CONFIG_RFD_FTL is not set
 # CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
 
 #
 # RAM/ROM/Flash chip drivers
 #
-# CONFIG_MTD_CFI is not set
+CONFIG_MTD_CFI=y
 # CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
 CONFIG_MTD_MAP_BANK_WIDTH_1=y
 CONFIG_MTD_MAP_BANK_WIDTH_2=y
 CONFIG_MTD_MAP_BANK_WIDTH_4=y
@@ -466,6 +477,10 @@ CONFIG_MTD_CFI_I1=y
 CONFIG_MTD_CFI_I2=y
 # CONFIG_MTD_CFI_I4 is not set
 # CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=y
+# CONFIG_MTD_CFI_AMDSTD is not set
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
 CONFIG_MTD_RAM=y
 # CONFIG_MTD_ROM is not set
 # CONFIG_MTD_ABSENT is not set
@@ -473,7 +488,8 @@ CONFIG_MTD_RAM=y
 #
 # Mapping drivers for chip access
 #
-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_GPIO_ADDR=y
 CONFIG_MTD_UCLINUX=y
 # CONFIG_MTD_PLATRAM is not set
 
@@ -498,33 +514,23 @@ CONFIG_MTD_UCLINUX=y
 # UBI - Unsorted block images
 #
 # CONFIG_MTD_UBI is not set
-
-#
-# Parallel port support
-#
 # CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-# CONFIG_PNPACPI is not set
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 # CONFIG_BLK_DEV_LOOP is not set
 # CONFIG_BLK_DEV_NBD is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
+# CONFIG_BLK_DEV_XIP is not set
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+CONFIG_HAVE_IDE=y
 # CONFIG_IDE is not set
 
 #
@@ -532,22 +538,17 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
 #
 # CONFIG_RAID_ATTRS is not set
 # CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
 # CONFIG_SCSI_NETLINK is not set
 # CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 # CONFIG_MD is not set
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
+# CONFIG_VETH is not set
 CONFIG_PHYLIB=y
 
 #
@@ -561,46 +562,44 @@ CONFIG_PHYLIB=y
 # CONFIG_VITESSE_PHY is not set
 # CONFIG_SMSC_PHY is not set
 # CONFIG_BROADCOM_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_REALTEK_PHY is not set
 # CONFIG_FIXED_PHY is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
+# CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
-# CONFIG_SMC91X is not set
 CONFIG_BFIN_MAC=y
 CONFIG_BFIN_MAC_USE_L1=y
 CONFIG_BFIN_TX_DESC_NUM=10
 CONFIG_BFIN_RX_DESC_NUM=20
 # CONFIG_BFIN_MAC_RMII is not set
+# CONFIG_SMC91X is not set
 # CONFIG_SMSC911X is not set
 # CONFIG_DM9000 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
 #
 # CONFIG_WLAN_PRE80211 is not set
 # CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
 # CONFIG_WAN is not set
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
 # CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
 # CONFIG_PHONE is not set
 
 #
@@ -618,15 +617,17 @@ CONFIG_NETDEV_10000=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PFLAGS is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=y
 # CONFIG_BFIN_TIMER_LATENCY is not set
+# CONFIG_SIMPLE_GPIO is not set
 # CONFIG_VT is not set
 # CONFIG_DEVKMEM is not set
+# CONFIG_BFIN_JTAG_COMM is not set
 # CONFIG_SERIAL_NONSTANDARD is not set
 
 #
@@ -655,138 +656,119 @@ CONFIG_UNIX98_PTYS=y
 # CAN, the car bus and industrial fieldbus
 #
 # CONFIG_CAN4LINUX is not set
-
-#
-# IPMI
-#
 # CONFIG_IPMI_HANDLER is not set
-# CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
-# CONFIG_GEN_RTC is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
 
 #
-# TPM devices
+# Memory mapped GPIO expanders:
 #
-# CONFIG_TCG_TPM is not set
-# CONFIG_I2C is not set
 
 #
-# SPI support
+# I2C GPIO expanders:
+#
+
+#
+# PCI GPIO expanders:
 #
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
 
 #
-# Dallas's 1-wire bus
+# SPI GPIO expanders:
 #
 # CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
 # CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
 # CONFIG_SENSORS_VT1211 is not set
 # CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
 
 #
 # Multifunction device drivers
 #
+# CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
 
 #
 # Multimedia devices
 #
+
+#
+# Multimedia core support
+#
 # CONFIG_VIDEO_DEV is not set
 # CONFIG_DVB_CORE is not set
-# CONFIG_DAB is not set
+# CONFIG_VIDEO_MEDIA is not set
 
 #
-# Graphics support
+# Multimedia drivers
 #
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+# CONFIG_DAB is not set
 
 #
-# Display device support
+# Graphics support
 #
-# CONFIG_DISPLAY_SUPPORT is not set
 # CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
 # CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
-# Sound
+# Display device support
 #
+# CONFIG_DISPLAY_SUPPORT is not set
 # CONFIG_SOUND is not set
-
-#
-# USB support
-#
+CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
 # CONFIG_USB_ARCH_HAS_EHCI is not set
 # CONFIG_USB is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
 
 #
 # Enable Host or Gadget support to see Inventra options
 #
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
 #
 # CONFIG_USB_GADGET is not set
 # CONFIG_MMC is not set
-
-#
-# LED devices
-#
+# CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
+# CONFIG_ACCESSIBILITY is not set
 # CONFIG_RTC_CLASS is not set
-
-#
-# DMA Engine support
-#
-# CONFIG_DMA_ENGINE is not set
-
-#
-# DMA Clients
-#
-
-#
-# DMA Devices
-#
-
-#
-# PBX support
-#
-# CONFIG_PBX is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
 
 #
 # File systems
@@ -796,20 +778,18 @@ CONFIG_EXT2_FS_XATTR=y
 # CONFIG_EXT2_FS_POSIX_ACL is not set
 # CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT3_FS is not set
-# CONFIG_EXT4DEV_FS is not set
+# CONFIG_EXT4_FS is not set
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
 # CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
+# CONFIG_DNOTIFY is not set
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
-# CONFIG_DNOTIFY is not set
 # CONFIG_AUTOFS_FS is not set
 # CONFIG_AUTOFS4_FS is not set
 # CONFIG_FUSE_FS is not set
@@ -835,7 +815,6 @@ CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
 # CONFIG_TMPFS is not set
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
 # CONFIG_CONFIGFS_FS is not set
 
 #
@@ -848,60 +827,53 @@ CONFIG_RAMFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_YAFFS_FS is not set
 # CONFIG_JFFS2_FS is not set
+# CONFIG_YAFFS_FS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
 
 #
 # Partition Types
 #
 # CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
 # CONFIG_NLS is not set
-
-#
-# Distributed Lock Manager
-#
 # CONFIG_DLM is not set
 
 #
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
 # CONFIG_MAGIC_SYSRQ is not set
 # CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
 # CONFIG_DEBUG_KERNEL is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+
+#
+# Tracers
+#
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_DEBUG_VERBOSE=y
 CONFIG_DEBUG_MMRS=y
+# CONFIG_DEBUG_DOUBLEFAULT is not set
 CONFIG_DEBUG_HUNT_FOR_ZERO=y
 CONFIG_DEBUG_BFIN_HWTRACE_ON=y
 CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y
@@ -919,13 +891,95 @@ CONFIG_ACCESS_CHECK=y
 #
 # CONFIG_KEYS is not set
 CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
 # CONFIG_SECURITY_NETWORK is not set
-CONFIG_SECURITY_CAPABILITIES=y
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
 
 #
-# Cryptographic options
+# Random Number Generation
 #
-# CONFIG_CRYPTO is not set
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_HW=y
 
 #
 # Library routines
@@ -933,11 +987,12 @@ CONFIG_SECURITY_CAPABILITIES=y
 CONFIG_BITREVERSE=y
 CONFIG_CRC_CCITT=m
 # CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
 # CONFIG_CRC_ITU_T is not set
 CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/CM-BF537U_defconfig b/arch/blackfin/configs/CM-BF537U_defconfig
index 569523c..8021130 100644
--- a/arch/blackfin/configs/CM-BF537U_defconfig
+++ b/arch/blackfin/configs/CM-BF537U_defconfig
@@ -49,7 +49,7 @@ CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -355,7 +355,7 @@ CONFIG_IP_FIB_HASH=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -556,9 +556,9 @@ CONFIG_SMC91X=y
 # CONFIG_BFIN_MAC is not set
 # CONFIG_SMSC911X is not set
 # CONFIG_DM9000 is not set
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -652,6 +652,10 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig
index 035b635..dd815f0 100644
--- a/arch/blackfin/configs/CM-BF548_defconfig
+++ b/arch/blackfin/configs/CM-BF548_defconfig
@@ -49,7 +49,7 @@ CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
@@ -125,9 +125,9 @@ CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_BF548=y
 # CONFIG_BF549 is not set
 # CONFIG_BF561 is not set
-CONFIG_BF_REV_0_0=y
+# CONFIG_BF_REV_0_0 is not set
 # CONFIG_BF_REV_0_1 is not set
-# CONFIG_BF_REV_0_2 is not set
+CONFIG_BF_REV_0_2=y
 # CONFIG_BF_REV_0_3 is not set
 # CONFIG_BF_REV_0_4 is not set
 # CONFIG_BF_REV_0_5 is not set
@@ -422,7 +422,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -811,6 +811,10 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100
 # CONFIG_I2C_DEBUG_BUS is not set
 # CONFIG_I2C_DEBUG_CHIP is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/CM-BF561_defconfig b/arch/blackfin/configs/CM-BF561_defconfig
index 7015e42..16c198b 100644
--- a/arch/blackfin/configs/CM-BF561_defconfig
+++ b/arch/blackfin/configs/CM-BF561_defconfig
@@ -49,7 +49,7 @@ CONFIG_FAIR_USER_SCHED=y
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -389,7 +389,7 @@ CONFIG_IP_FIB_HASH=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -569,9 +569,9 @@ CONFIG_SMC91X=y
 # CONFIG_IBM_NEW_EMAC_TAH is not set
 # CONFIG_IBM_NEW_EMAC_EMAC4 is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -646,6 +646,10 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_TCG_TPM is not set
 # CONFIG_I2C is not set
 
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+
 #
 # SPI support
 #
diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig
index dfc8e1d..6b4c1a9 100644
--- a/arch/blackfin/configs/H8606_defconfig
+++ b/arch/blackfin/configs/H8606_defconfig
@@ -48,7 +48,7 @@ CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
@@ -347,7 +347,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -594,8 +594,8 @@ CONFIG_MII=y
 # CONFIG_SMC91X is not set
 # CONFIG_SMSC911X is not set
 CONFIG_DM9000=y
-CONFIG_NETDEV_1000=y
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 # CONFIG_AX88180 is not set
 
 #
diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig
index 95a5f91..1ec9ae2 100644
--- a/arch/blackfin/configs/IP0X_defconfig
+++ b/arch/blackfin/configs/IP0X_defconfig
@@ -49,7 +49,7 @@ CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -355,7 +355,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -672,9 +672,9 @@ CONFIG_MII=y
 # CONFIG_SMC91X is not set
 # CONFIG_SMSC911X is not set
 CONFIG_DM9000=y
-CONFIG_NETDEV_1000=y
+# CONFIG_NETDEV_1000 is not set
 # CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig
index 78e2408..09701f9 100644
--- a/arch/blackfin/configs/PNAV-10_defconfig
+++ b/arch/blackfin/configs/PNAV-10_defconfig
@@ -1,6 +1,6 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28.7
+# Linux kernel version: 2.6.28.10
 #
 # CONFIG_MMU is not set
 # CONFIG_FPU is not set
@@ -40,26 +40,26 @@ CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_NAMESPACES is not set
 # CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 # CONFIG_ELF_CORE is not set
-CONFIG_COMPAT_BRK=y
 CONFIG_BASE_FULL=y
 # CONFIG_FUTEX is not set
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 # CONFIG_AIO is not set
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
@@ -68,7 +68,6 @@ CONFIG_SLAB=y
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
 CONFIG_TINY_SHMEM=y
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
@@ -229,7 +228,10 @@ CONFIG_HZ=250
 # CONFIG_SCHED_HRTICK is not set
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
+# CONFIG_TICKSOURCE_GPTMR0 is not set
+CONFIG_TICKSOURCE_CORETMR=y
 # CONFIG_CYCLES_CLOCKSOURCE is not set
+# CONFIG_GPTMR0_CLOCKSOURCE is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
@@ -374,7 +376,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
@@ -598,9 +600,8 @@ CONFIG_BFIN_MAC_RMII=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_B44 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_AX88180 is not set
-CONFIG_NETDEV_10000=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
 
 #
 # Wireless LAN
@@ -640,11 +641,11 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_JOYSTICK is not set
 # CONFIG_INPUT_TABLET is not set
 CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_TOUCHSCREEN_ADS7846 is not set
 CONFIG_TOUCHSCREEN_AD7877=y
 # CONFIG_TOUCHSCREEN_AD7879_I2C is not set
 # CONFIG_TOUCHSCREEN_AD7879_SPI is not set
 # CONFIG_TOUCHSCREEN_AD7879 is not set
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -676,14 +677,14 @@ CONFIG_INPUT_UINPUT=y
 # Character devices
 #
 # CONFIG_AD9960 is not set
-# CONFIG_SPI_ADC_BF533 is not set
-# CONFIG_BF5xx_PPIFCD is not set
+CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_PPI is not set
+# CONFIG_BFIN_PPIFCD is not set
 # CONFIG_BFIN_SIMPLE_TIMER is not set
-# CONFIG_BF5xx_PPI is not set
+# CONFIG_BFIN_SPI_ADC is not set
 CONFIG_BFIN_SPORT=y
 # CONFIG_BFIN_TIMER_LATENCY is not set
-CONFIG_TWI_LCD=m
-CONFIG_BFIN_DMA_INTERFACE=m
+# CONFIG_BFIN_TWI_LCD is not set
 # CONFIG_SIMPLE_GPIO is not set
 # CONFIG_VT is not set
 CONFIG_DEVKMEM=y
@@ -796,6 +797,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD5252 is not set
 # CONFIG_SENSORS_AD7414 is not set
 # CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADCXX is not set
@@ -867,6 +869,7 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
 # CONFIG_MFD_WM8400 is not set
 # CONFIG_MFD_WM8350_I2C is not set
 # CONFIG_REGULATOR is not set
@@ -1111,6 +1114,7 @@ CONFIG_SYSFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
+# CONFIG_JFFS2_FS is not set
 CONFIG_YAFFS_FS=y
 CONFIG_YAFFS_YAFFS1=y
 # CONFIG_YAFFS_9BYTE_TAGS is not set
@@ -1121,7 +1125,6 @@ CONFIG_YAFFS_AUTO_YAFFS2=y
 # CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set
 # CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set
 CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y
-# CONFIG_JFFS2_FS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_MINIX_FS is not set
@@ -1213,7 +1216,6 @@ CONFIG_FRAME_WARN=1024
 # CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_DEBUG_MEMORY_INIT is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_SYSCTL_SYSCALL_CHECK is not set
 
 #
 # Tracers
@@ -1343,7 +1345,6 @@ CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig
index 2bc0779..ec84a53 100644
--- a/arch/blackfin/configs/SRV1_defconfig
+++ b/arch/blackfin/configs/SRV1_defconfig
@@ -52,7 +52,7 @@ CONFIG_INITRAMFS_SOURCE=""
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -373,7 +373,7 @@ CONFIG_IP_PNP=y
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig
index e65b3a4..6e27962 100644
--- a/arch/blackfin/configs/TCM-BF537_defconfig
+++ b/arch/blackfin/configs/TCM-BF537_defconfig
@@ -42,7 +42,7 @@ CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_SYSCTL is not set
 CONFIG_EMBEDDED=y
 # CONFIG_UID16 is not set
-CONFIG_SYSCTL_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_HOTPLUG is not set
@@ -537,7 +537,30 @@ CONFIG_SPI_BFIN=y
 # CONFIG_SPI_SPIDEV is not set
 # CONFIG_SPI_TLE62X0 is not set
 CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-# CONFIG_GPIOLIB is not set
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_GPIO_SYSFS=y
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_GPIO_MAX7301 is not set
+# CONFIG_GPIO_MCP23S08 is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
-- 
cgit v0.10.2


From c8f36dc3c11c3e9e879ded82cdf5d748d4ab2fb2 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 14 May 2009 14:55:50 +0000
Subject: Blackfin: simplify BF561 coreb driver greatly

Since 90% of this driver can be handled in user space, move it to the
corebld user space application.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-bf561/Kconfig b/arch/blackfin/mach-bf561/Kconfig
index 638ec38..6965dd5 100644
--- a/arch/blackfin/mach-bf561/Kconfig
+++ b/arch/blackfin/mach-bf561/Kconfig
@@ -9,22 +9,9 @@ if (!SMP)
 comment "Core B Support"
 
 config BF561_COREB
-	bool "Enable Core B support"
+	bool "Enable Core B loader"
 	default y
 
-config BF561_COREB_RESET
-	bool "Enable Core B reset support"
-	default n
-	help
-	  This requires code in the application that is loaded
-	  into Core B. In order to reset, the application needs
-	  to install an interrupt handler for Supplemental
-	  Interrupt 0, that sets RETI to 0xff600000 and writes
-	  bit 11 of SICB_SYSCR when bit 5 of SICA_SYSCR is 0.
-	  This causes Core B to stall when Supplemental Interrupt
-	  0 is set, and will reset PC to 0xff600000 when
-	  COREB_SRAM_INIT is cleared.
-
 endif
 
 comment "Interrupt Priority Assignment"
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 8598098..93635a7 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -1,406 +1,74 @@
-/*
- * File:         arch/blackfin/mach-bf561/coreb.c
- * Based on:
- * Author:
+/* Load firmware into Core B on a BF561
  *
- * Created:
- * Description:  Handle CoreB on a BF561
- *
- * Modified:
- *               Copyright 2004-2006 Analog Devices Inc.
- *
- * Bugs:         Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ * Copyright 2004-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+/* The Core B reset func requires code in the application that is loaded into
+ * Core B.  In order to reset, the application needs to install an interrupt
+ * handler for Supplemental Interrupt 0, that sets RETI to 0xff600000 and
+ * writes bit 11 of SICB_SYSCR when bit 5 of SICA_SYSCR is 0.  This causes Core
+ * B to stall when Supplemental Interrupt 0 is set, and will reset PC to
+ * 0xff600000 when COREB_SRAM_INIT is cleared.
  */
 
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
 #include <linux/device.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
 #include <linux/fs.h>
-#include <asm/dma.h>
-#include <asm/cacheflush.h>
-
-#define MODULE_VER		"v0.1"
-
-static spinlock_t coreb_lock;
-static wait_queue_head_t coreb_dma_wait;
-
-#define COREB_IS_OPEN		0x00000001
-#define COREB_IS_RUNNING	0x00000010
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
 
-#define CMD_COREB_INDEX		1
 #define CMD_COREB_START		2
 #define CMD_COREB_STOP		3
 #define CMD_COREB_RESET		4
 
-#define COREB_MINOR		229
-
-static unsigned long coreb_status = 0;
-static unsigned long coreb_base = 0xff600000;
-static unsigned long coreb_size = 0x4000;
-int coreb_dma_done;
-
-static loff_t coreb_lseek(struct file *file, loff_t offset, int origin);
-static ssize_t coreb_read(struct file *file, char *buf, size_t count,
-			  loff_t * ppos);
-static ssize_t coreb_write(struct file *file, const char *buf, size_t count,
-			   loff_t * ppos);
-static int coreb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-		       unsigned long arg);
-static int coreb_open(struct inode *inode, struct file *file);
-static int coreb_release(struct inode *inode, struct file *file);
-
-static irqreturn_t coreb_dma_interrupt(int irq, void *dev_id)
-{
-	clear_dma_irqstat(CH_MEM_STREAM2_DEST);
-	coreb_dma_done = 1;
-	wake_up_interruptible(&coreb_dma_wait);
-	return IRQ_HANDLED;
-}
-
-static ssize_t coreb_write(struct file *file, const char *buf, size_t count,
-			   loff_t * ppos)
-{
-	unsigned long p = *ppos;
-	ssize_t wrote = 0;
-
-	if (p + count > coreb_size)
-		return -EFAULT;
-
-	while (count > 0) {
-		int len = count;
-
-		if (len > PAGE_SIZE)
-			len = PAGE_SIZE;
-
-		coreb_dma_done = 0;
-
-		flush_dcache_range((unsigned long)buf, (unsigned long)(buf+len));
-		/* Source Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_SRC, (unsigned long)buf);
-		set_dma_x_count(CH_MEM_STREAM2_SRC, len);
-		set_dma_x_modify(CH_MEM_STREAM2_SRC, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_SRC, 0);
-		/* Destination Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_DEST, coreb_base + p);
-		set_dma_x_count(CH_MEM_STREAM2_DEST, len);
-		set_dma_x_modify(CH_MEM_STREAM2_DEST, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_DEST, WNR | RESTART | DI_EN);
-
-		enable_dma(CH_MEM_STREAM2_SRC);
-		enable_dma(CH_MEM_STREAM2_DEST);
-
-		wait_event_interruptible(coreb_dma_wait, coreb_dma_done);
-
-		disable_dma(CH_MEM_STREAM2_SRC);
-		disable_dma(CH_MEM_STREAM2_DEST);
-
-		count -= len;
-		wrote += len;
-		buf += len;
-		p += len;
-	}
-	*ppos = p;
-	return wrote;
-}
-
-static ssize_t coreb_read(struct file *file, char *buf, size_t count,
-			  loff_t * ppos)
-{
-	unsigned long p = *ppos;
-	ssize_t read = 0;
-
-	if ((p + count) > coreb_size)
-		return -EFAULT;
-
-	while (count > 0) {
-		int len = count;
-
-		if (len > PAGE_SIZE)
-			len = PAGE_SIZE;
-
-		coreb_dma_done = 0;
-
-		invalidate_dcache_range((unsigned long)buf, (unsigned long)(buf+len));
-		/* Source Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_SRC, coreb_base + p);
-		set_dma_x_count(CH_MEM_STREAM2_SRC, len);
-		set_dma_x_modify(CH_MEM_STREAM2_SRC, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_SRC, 0);
-		/* Destination Channel */
-		set_dma_start_addr(CH_MEM_STREAM2_DEST, (unsigned long)buf);
-		set_dma_x_count(CH_MEM_STREAM2_DEST, len);
-		set_dma_x_modify(CH_MEM_STREAM2_DEST, sizeof(char));
-		set_dma_config(CH_MEM_STREAM2_DEST, WNR | RESTART | DI_EN);
-
-		enable_dma(CH_MEM_STREAM2_SRC);
-		enable_dma(CH_MEM_STREAM2_DEST);
-
-		wait_event_interruptible(coreb_dma_wait, coreb_dma_done);
-
-		disable_dma(CH_MEM_STREAM2_SRC);
-		disable_dma(CH_MEM_STREAM2_DEST);
-
-		count -= len;
-		read += len;
-		buf += len;
-		p += len;
-	}
-
-	return read;
-}
-
-static loff_t coreb_lseek(struct file *file, loff_t offset, int origin)
+static int
+coreb_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
 {
-	loff_t ret;
-
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
-
-	switch (origin) {
-	case 0 /* SEEK_SET */ :
-		if (offset < coreb_size) {
-			file->f_pos = offset;
-			ret = file->f_pos;
-		} else
-			ret = -EINVAL;
-		break;
-	case 1 /* SEEK_CUR */ :
-		if ((offset + file->f_pos) < coreb_size) {
-			file->f_pos += offset;
-			ret = file->f_pos;
-		} else
-			ret = -EINVAL;
-	default:
-		ret = -EINVAL;
-	}
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-	return ret;
-}
-
-/* No BKL needed here */
-static int coreb_open(struct inode *inode, struct file *file)
-{
-	spin_lock_irq(&coreb_lock);
-
-	if (coreb_status & COREB_IS_OPEN)
-		goto out_busy;
-
-	coreb_status |= COREB_IS_OPEN;
-
-	spin_unlock_irq(&coreb_lock);
-	return 0;
-
- out_busy:
-	spin_unlock_irq(&coreb_lock);
-	return -EBUSY;
-}
-
-static int coreb_release(struct inode *inode, struct file *file)
-{
-	spin_lock_irq(&coreb_lock);
-	coreb_status &= ~COREB_IS_OPEN;
-	spin_unlock_irq(&coreb_lock);
-	return 0;
-}
-
-static int coreb_ioctl(struct inode *inode, struct file *file,
-		       unsigned int cmd, unsigned long arg)
-{
-	int retval = 0;
-	int coreb_index = 0;
+	int ret = 0;
 
 	switch (cmd) {
-	case CMD_COREB_INDEX:
-		if (copy_from_user(&coreb_index, (int *)arg, sizeof(int))) {
-			retval = -EFAULT;
-			break;
-		}
-
-		spin_lock_irq(&coreb_lock);
-		switch (coreb_index) {
-		case 0:
-			coreb_base = 0xff600000;
-			coreb_size = 0x4000;
-			break;
-		case 1:
-			coreb_base = 0xff610000;
-			coreb_size = 0x4000;
-			break;
-		case 2:
-			coreb_base = 0xff500000;
-			coreb_size = 0x8000;
-			break;
-		case 3:
-			coreb_base = 0xff400000;
-			coreb_size = 0x8000;
-			break;
-		default:
-			retval = -EINVAL;
-			break;
-		}
-		spin_unlock_irq(&coreb_lock);
-
-		mutex_lock(&file->f_dentry->d_inode->i_mutex);
-		file->f_pos = 0;
-		mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-		break;
 	case CMD_COREB_START:
-		spin_lock_irq(&coreb_lock);
-		if (coreb_status & COREB_IS_RUNNING) {
-			retval = -EBUSY;
-			break;
-		}
-		printk(KERN_INFO "Starting Core B\n");
-		coreb_status |= COREB_IS_RUNNING;
 		bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() & ~0x0020);
-		SSYNC();
-		spin_unlock_irq(&coreb_lock);
 		break;
-#if defined(CONFIG_BF561_COREB_RESET)
 	case CMD_COREB_STOP:
-		spin_lock_irq(&coreb_lock);
-		printk(KERN_INFO "Stopping Core B\n");
 		bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() | 0x0020);
 		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | 0x0080);
-		coreb_status &= ~COREB_IS_RUNNING;
-		spin_unlock_irq(&coreb_lock);
 		break;
 	case CMD_COREB_RESET:
-		printk(KERN_INFO "Resetting Core B\n");
 		bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | 0x0080);
 		break;
-#endif
+	default:
+		ret = -EINVAL;
+		break;
 	}
 
-	return retval;
+	CSYNC();
+
+	return ret;
 }
 
 static struct file_operations coreb_fops = {
-	.owner = THIS_MODULE,
-	.llseek = coreb_lseek,
-	.read = coreb_read,
-	.write = coreb_write,
-	.ioctl = coreb_ioctl,
-	.open = coreb_open,
-	.release = coreb_release
+	.owner   = THIS_MODULE,
+	.ioctl   = coreb_ioctl,
 };
 
 static struct miscdevice coreb_dev = {
-	COREB_MINOR,
-	"coreb",
-	&coreb_fops
+	.minor = MISC_DYNAMIC_MINOR,
+	.name  = "coreb",
+	.fops  = &coreb_fops,
 };
 
-static ssize_t coreb_show_status(struct device *dev, struct device_attribute *attr, char *buf)
+static int __init bf561_coreb_init(void)
 {
-	return sprintf(buf,
-		       "Base Address:\t0x%08lx\n"
-		       "Core B is %s\n"
-		       "SICA_SYSCR:\t%04x\n"
-		       "SICB_SYSCR:\t%04x\n"
-		       "\n"
-		       "IRQ Status:\tCore A\t\tCore B\n"
-		       "ISR0:\t\t%08x\t\t%08x\n"
-		       "ISR1:\t\t%08x\t\t%08x\n"
-		       "IMASK0:\t\t%08x\t\t%08x\n"
-		       "IMASK1:\t\t%08x\t\t%08x\n",
-		       coreb_base,
-		       coreb_status & COREB_IS_RUNNING ? "running" : "stalled",
-		       bfin_read_SICA_SYSCR(), bfin_read_SICB_SYSCR(),
-		       bfin_read_SICA_ISR0(), bfin_read_SICB_ISR0(),
-		       bfin_read_SICA_ISR1(), bfin_read_SICB_ISR0(),
-		       bfin_read_SICA_IMASK0(), bfin_read_SICB_IMASK0(),
-		       bfin_read_SICA_IMASK1(), bfin_read_SICB_IMASK1());
-}
-
-static DEVICE_ATTR(coreb_status, S_IRUGO, coreb_show_status, NULL);
-
-int __init bf561_coreb_init(void)
-{
-	init_waitqueue_head(&coreb_dma_wait);
-
-	spin_lock_init(&coreb_lock);
-	/* Request the core memory regions for Core B */
-	if (request_mem_region(0xff600000, 0x4000,
-			       "Core B - Instruction SRAM") == NULL)
-		goto exit;
-
-	if (request_mem_region(0xFF610000, 0x4000,
-			       "Core B - Instruction SRAM") == NULL)
-		goto release_instruction_a_sram;
-
-	if (request_mem_region(0xFF500000, 0x8000,
-			       "Core B - Data Bank B SRAM") == NULL)
-		goto release_instruction_b_sram;
-
-	if (request_mem_region(0xff400000, 0x8000,
-			       "Core B - Data Bank A SRAM") == NULL)
-		goto release_data_b_sram;
-
-	if (request_dma(CH_MEM_STREAM2_DEST, "Core B - DMA Destination") < 0)
-		goto release_data_a_sram;
-
-	if (request_dma(CH_MEM_STREAM2_SRC, "Core B - DMA Source") < 0)
-		goto release_dma_dest;
-
-	set_dma_callback(CH_MEM_STREAM2_DEST, coreb_dma_interrupt, NULL);
-
-	misc_register(&coreb_dev);
-
-	if (device_create_file(coreb_dev.this_device, &dev_attr_coreb_status))
-		goto release_dma_src;
-
-	printk(KERN_INFO "BF561 Core B driver %s initialized.\n", MODULE_VER);
-	return 0;
-
- release_dma_src:
-	free_dma(CH_MEM_STREAM2_SRC);
- release_dma_dest:
-	free_dma(CH_MEM_STREAM2_DEST);
- release_data_a_sram:
-	release_mem_region(0xff400000, 0x8000);
- release_data_b_sram:
-	release_mem_region(0xff500000, 0x8000);
- release_instruction_b_sram:
-	release_mem_region(0xff610000, 0x4000);
- release_instruction_a_sram:
-	release_mem_region(0xff600000, 0x4000);
- exit:
-	return -ENOMEM;
+	return misc_register(&coreb_dev);
 }
+module_init(bf561_coreb_init);
 
-void __exit bf561_coreb_exit(void)
+static void __exit bf561_coreb_exit(void)
 {
-	device_remove_file(coreb_dev.this_device, &dev_attr_coreb_status);
 	misc_deregister(&coreb_dev);
-
-	release_mem_region(0xff610000, 0x4000);
-	release_mem_region(0xff600000, 0x4000);
-	release_mem_region(0xff500000, 0x8000);
-	release_mem_region(0xff400000, 0x8000);
-
-	free_dma(CH_MEM_STREAM2_DEST);
-	free_dma(CH_MEM_STREAM2_SRC);
 }
-
-module_init(bf561_coreb_init);
 module_exit(bf561_coreb_exit);
 
 MODULE_AUTHOR("Bas Vermeulen <bvermeul@blackstar.xs4all.nl>");
-- 
cgit v0.10.2


From f339f46b05cfe289024b15a0525c8b61f1426a88 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 19 May 2009 12:58:13 +0000
Subject: Blackfin: fix detection of cached L2 SRAM

Make sure our bfin_addr_dcachable() function flags cached L2 SRAM properly
else memory easily goes unflushed when working with DMA.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/cacheflush.h b/arch/blackfin/include/asm/cacheflush.h
index d7726ab..94697f0 100644
--- a/arch/blackfin/include/asm/cacheflush.h
+++ b/arch/blackfin/include/asm/cacheflush.h
@@ -108,6 +108,11 @@ static inline int bfin_addr_dcachable(unsigned long addr)
 		addr >= _ramend && addr < physical_mem_end)
 		return 1;
 
+#ifndef CONFIG_BFIN_L2_NOT_CACHED
+	if (addr >= L2_START && addr < L2_START + L2_LENGTH)
+		return 1;
+#endif
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From f5879fda09ea98d7aa845a0e0fa7e508452e5f9f Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Wed, 6 May 2009 09:59:11 +0000
Subject: Blackfin: add MDMA defines to make cross-variant coding easier

Add some defines to make the BF538/BF561 look like most other Blackfin
parts in that it has a MDMA0 channel available for low level init.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-bf538/include/mach/blackfin.h b/arch/blackfin/mach-bf538/include/mach/blackfin.h
index ea25371a9..6f62835 100644
--- a/arch/blackfin/mach-bf538/include/mach/blackfin.h
+++ b/arch/blackfin/mach-bf538/include/mach/blackfin.h
@@ -68,25 +68,6 @@
 #define OFFSET_SCR              0x1C	/* SCR Scratch Register                 */
 #define OFFSET_GCTL             0x24	/* Global Control Register              */
 
-
-#define bfin_write_MDMA_D0_IRQ_STATUS	bfin_write_MDMA0_D0_IRQ_STATUS
-#define bfin_write_MDMA_D0_START_ADDR   bfin_write_MDMA0_D0_START_ADDR
-#define bfin_write_MDMA_S0_START_ADDR   bfin_write_MDMA0_S0_START_ADDR
-#define bfin_write_MDMA_D0_X_COUNT      bfin_write_MDMA0_D0_X_COUNT
-#define bfin_write_MDMA_S0_X_COUNT      bfin_write_MDMA0_S0_X_COUNT
-#define bfin_write_MDMA_D0_Y_COUNT      bfin_write_MDMA0_D0_Y_COUNT
-#define bfin_write_MDMA_S0_Y_COUNT      bfin_write_MDMA0_S0_Y_COUNT
-#define bfin_write_MDMA_D0_X_MODIFY     bfin_write_MDMA0_D0_X_MODIFY
-#define bfin_write_MDMA_S0_X_MODIFY     bfin_write_MDMA0_S0_X_MODIFY
-#define bfin_write_MDMA_D0_Y_MODIFY     bfin_write_MDMA0_D0_Y_MODIFY
-#define bfin_write_MDMA_S0_Y_MODIFY     bfin_write_MDMA0_S0_Y_MODIFY
-#define bfin_write_MDMA_S0_CONFIG       bfin_write_MDMA0_S0_CONFIG
-#define bfin_write_MDMA_D0_CONFIG       bfin_write_MDMA0_D0_CONFIG
-#define bfin_read_MDMA_S0_CONFIG        bfin_read_MDMA0_S0_CONFIG
-#define bfin_read_MDMA_D0_IRQ_STATUS    bfin_read_MDMA0_D0_IRQ_STATUS
-#define bfin_write_MDMA_S0_IRQ_STATUS   bfin_write_MDMA0_S0_IRQ_STATUS
-
-
 /* DPMC*/
 #define bfin_read_STOPCK_OFF() bfin_read_STOPCK()
 #define bfin_write_STOPCK_OFF(val) bfin_write_STOPCK(val)
diff --git a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
index 7a5f74c..99ca3f4 100644
--- a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
+++ b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h
@@ -1247,6 +1247,65 @@
 #define bfin_write_MDMA1_S1_CURR_X_COUNT(val) bfin_write16(MDMA1_S1_CURR_X_COUNT, val)
 #define bfin_read_MDMA1_S1_CURR_Y_COUNT() bfin_read16(MDMA1_S1_CURR_Y_COUNT)
 #define bfin_write_MDMA1_S1_CURR_Y_COUNT(val) bfin_write16(MDMA1_S1_CURR_Y_COUNT, val)
+
+#define bfin_read_MDMA_S0_CONFIG()  bfin_read_MDMA0_S0_CONFIG()
+#define bfin_write_MDMA_S0_CONFIG(val) bfin_write_MDMA0_S0_CONFIG(val)
+#define bfin_read_MDMA_S0_IRQ_STATUS()  bfin_read_MDMA0_S0_IRQ_STATUS()
+#define bfin_write_MDMA_S0_IRQ_STATUS(val) bfin_write_MDMA0_S0_IRQ_STATUS(val)
+#define bfin_read_MDMA_S0_X_MODIFY()  bfin_read_MDMA0_S0_X_MODIFY()
+#define bfin_write_MDMA_S0_X_MODIFY(val) bfin_write_MDMA0_S0_X_MODIFY(val)
+#define bfin_read_MDMA_S0_Y_MODIFY()  bfin_read_MDMA0_S0_Y_MODIFY()
+#define bfin_write_MDMA_S0_Y_MODIFY(val) bfin_write_MDMA0_S0_Y_MODIFY(val)
+#define bfin_read_MDMA_S0_X_COUNT()  bfin_read_MDMA0_S0_X_COUNT()
+#define bfin_write_MDMA_S0_X_COUNT(val) bfin_write_MDMA0_S0_X_COUNT(val)
+#define bfin_read_MDMA_S0_Y_COUNT()  bfin_read_MDMA0_S0_Y_COUNT()
+#define bfin_write_MDMA_S0_Y_COUNT(val) bfin_write_MDMA0_S0_Y_COUNT(val)
+#define bfin_read_MDMA_S0_START_ADDR()  bfin_read_MDMA0_S0_START_ADDR()
+#define bfin_write_MDMA_S0_START_ADDR(val) bfin_write_MDMA0_S0_START_ADDR(val)
+#define bfin_read_MDMA_D0_CONFIG()  bfin_read_MDMA0_D0_CONFIG()
+#define bfin_write_MDMA_D0_CONFIG(val) bfin_write_MDMA0_D0_CONFIG(val)
+#define bfin_read_MDMA_D0_IRQ_STATUS()  bfin_read_MDMA0_D0_IRQ_STATUS()
+#define bfin_write_MDMA_D0_IRQ_STATUS(val) bfin_write_MDMA0_D0_IRQ_STATUS(val)
+#define bfin_read_MDMA_D0_X_MODIFY()  bfin_read_MDMA0_D0_X_MODIFY()
+#define bfin_write_MDMA_D0_X_MODIFY(val) bfin_write_MDMA0_D0_X_MODIFY(val)
+#define bfin_read_MDMA_D0_Y_MODIFY()  bfin_read_MDMA0_D0_Y_MODIFY()
+#define bfin_write_MDMA_D0_Y_MODIFY(val) bfin_write_MDMA0_D0_Y_MODIFY(val)
+#define bfin_read_MDMA_D0_X_COUNT()  bfin_read_MDMA0_D0_X_COUNT()
+#define bfin_write_MDMA_D0_X_COUNT(val) bfin_write_MDMA0_D0_X_COUNT(val)
+#define bfin_read_MDMA_D0_Y_COUNT()  bfin_read_MDMA0_D0_Y_COUNT()
+#define bfin_write_MDMA_D0_Y_COUNT(val) bfin_write_MDMA0_D0_Y_COUNT(val)
+#define bfin_read_MDMA_D0_START_ADDR()  bfin_read_MDMA0_D0_START_ADDR()
+#define bfin_write_MDMA_D0_START_ADDR(val) bfin_write_MDMA0_D0_START_ADDR(val)
+
+#define bfin_read_MDMA_S1_CONFIG()  bfin_read_MDMA0_S1_CONFIG()
+#define bfin_write_MDMA_S1_CONFIG(val) bfin_write_MDMA0_S1_CONFIG(val)
+#define bfin_read_MDMA_S1_IRQ_STATUS()  bfin_read_MDMA0_S1_IRQ_STATUS()
+#define bfin_write_MDMA_S1_IRQ_STATUS(val) bfin_write_MDMA0_S1_IRQ_STATUS(val)
+#define bfin_read_MDMA_S1_X_MODIFY()  bfin_read_MDMA0_S1_X_MODIFY()
+#define bfin_write_MDMA_S1_X_MODIFY(val) bfin_write_MDMA0_S1_X_MODIFY(val)
+#define bfin_read_MDMA_S1_Y_MODIFY()  bfin_read_MDMA0_S1_Y_MODIFY()
+#define bfin_write_MDMA_S1_Y_MODIFY(val) bfin_write_MDMA0_S1_Y_MODIFY(val)
+#define bfin_read_MDMA_S1_X_COUNT()  bfin_read_MDMA0_S1_X_COUNT()
+#define bfin_write_MDMA_S1_X_COUNT(val) bfin_write_MDMA0_S1_X_COUNT(val)
+#define bfin_read_MDMA_S1_Y_COUNT()  bfin_read_MDMA0_S1_Y_COUNT()
+#define bfin_write_MDMA_S1_Y_COUNT(val) bfin_write_MDMA0_S1_Y_COUNT(val)
+#define bfin_read_MDMA_S1_START_ADDR()  bfin_read_MDMA0_S1_START_ADDR()
+#define bfin_write_MDMA_S1_START_ADDR(val) bfin_write_MDMA0_S1_START_ADDR(val)
+#define bfin_read_MDMA_D1_CONFIG()  bfin_read_MDMA0_D1_CONFIG()
+#define bfin_write_MDMA_D1_CONFIG(val) bfin_write_MDMA0_D1_CONFIG(val)
+#define bfin_read_MDMA_D1_IRQ_STATUS()  bfin_read_MDMA0_D1_IRQ_STATUS()
+#define bfin_write_MDMA_D1_IRQ_STATUS(val) bfin_write_MDMA0_D1_IRQ_STATUS(val)
+#define bfin_read_MDMA_D1_X_MODIFY()  bfin_read_MDMA0_D1_X_MODIFY()
+#define bfin_write_MDMA_D1_X_MODIFY(val) bfin_write_MDMA0_D1_X_MODIFY(val)
+#define bfin_read_MDMA_D1_Y_MODIFY()  bfin_read_MDMA0_D1_Y_MODIFY()
+#define bfin_write_MDMA_D1_Y_MODIFY(val) bfin_write_MDMA0_D1_Y_MODIFY(val)
+#define bfin_read_MDMA_D1_X_COUNT()  bfin_read_MDMA0_D1_X_COUNT()
+#define bfin_write_MDMA_D1_X_COUNT(val) bfin_write_MDMA0_D1_X_COUNT(val)
+#define bfin_read_MDMA_D1_Y_COUNT()  bfin_read_MDMA0_D1_Y_COUNT()
+#define bfin_write_MDMA_D1_Y_COUNT(val) bfin_write_MDMA0_D1_Y_COUNT(val)
+#define bfin_read_MDMA_D1_START_ADDR()  bfin_read_MDMA0_D1_START_ADDR()
+#define bfin_write_MDMA_D1_START_ADDR(val) bfin_write_MDMA0_D1_START_ADDR(val)
+
 #define bfin_read_PPI_CONTROL()        bfin_read16(PPI_CONTROL)
 #define bfin_write_PPI_CONTROL(val)    bfin_write16(PPI_CONTROL, val)
 #define bfin_read_PPI_STATUS()         bfin_read16(PPI_STATUS)
diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h
index 6adbfcc..bdc330c 100644
--- a/arch/blackfin/mach-bf538/include/mach/defBF539.h
+++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h
@@ -412,6 +412,62 @@
 #define	MDMA0_S1_CURR_X_COUNT	0xFFC00EF0	/* MemDMA0 Stream 1 Source Current X Count Register */
 #define	MDMA0_S1_CURR_Y_COUNT	0xFFC00EF8	/* MemDMA0 Stream 1 Source Current Y Count Register */
 
+#define MDMA_D0_NEXT_DESC_PTR MDMA0_D0_NEXT_DESC_PTR
+#define MDMA_D0_START_ADDR MDMA0_D0_START_ADDR
+#define MDMA_D0_CONFIG MDMA0_D0_CONFIG
+#define MDMA_D0_X_COUNT MDMA0_D0_X_COUNT
+#define MDMA_D0_X_MODIFY MDMA0_D0_X_MODIFY
+#define MDMA_D0_Y_COUNT MDMA0_D0_Y_COUNT
+#define MDMA_D0_Y_MODIFY MDMA0_D0_Y_MODIFY
+#define MDMA_D0_CURR_DESC_PTR MDMA0_D0_CURR_DESC_PTR
+#define MDMA_D0_CURR_ADDR MDMA0_D0_CURR_ADDR
+#define MDMA_D0_IRQ_STATUS MDMA0_D0_IRQ_STATUS
+#define MDMA_D0_PERIPHERAL_MAP MDMA0_D0_PERIPHERAL_MAP
+#define MDMA_D0_CURR_X_COUNT MDMA0_D0_CURR_X_COUNT
+#define MDMA_D0_CURR_Y_COUNT MDMA0_D0_CURR_Y_COUNT
+
+#define MDMA_S0_NEXT_DESC_PTR MDMA0_S0_NEXT_DESC_PTR
+#define MDMA_S0_START_ADDR MDMA0_S0_START_ADDR
+#define MDMA_S0_CONFIG MDMA0_S0_CONFIG
+#define MDMA_S0_X_COUNT MDMA0_S0_X_COUNT
+#define MDMA_S0_X_MODIFY MDMA0_S0_X_MODIFY
+#define MDMA_S0_Y_COUNT MDMA0_S0_Y_COUNT
+#define MDMA_S0_Y_MODIFY MDMA0_S0_Y_MODIFY
+#define MDMA_S0_CURR_DESC_PTR MDMA0_S0_CURR_DESC_PTR
+#define MDMA_S0_CURR_ADDR MDMA0_S0_CURR_ADDR
+#define MDMA_S0_IRQ_STATUS MDMA0_S0_IRQ_STATUS
+#define MDMA_S0_PERIPHERAL_MAP MDMA0_S0_PERIPHERAL_MAP
+#define MDMA_S0_CURR_X_COUNT MDMA0_S0_CURR_X_COUNT
+#define MDMA_S0_CURR_Y_COUNT MDMA0_S0_CURR_Y_COUNT
+
+#define MDMA_D1_NEXT_DESC_PTR MDMA0_D1_NEXT_DESC_PTR
+#define MDMA_D1_START_ADDR MDMA0_D1_START_ADDR
+#define MDMA_D1_CONFIG MDMA0_D1_CONFIG
+#define MDMA_D1_X_COUNT MDMA0_D1_X_COUNT
+#define MDMA_D1_X_MODIFY MDMA0_D1_X_MODIFY
+#define MDMA_D1_Y_COUNT MDMA0_D1_Y_COUNT
+#define MDMA_D1_Y_MODIFY MDMA0_D1_Y_MODIFY
+#define MDMA_D1_CURR_DESC_PTR MDMA0_D1_CURR_DESC_PTR
+#define MDMA_D1_CURR_ADDR MDMA0_D1_CURR_ADDR
+#define MDMA_D1_IRQ_STATUS MDMA0_D1_IRQ_STATUS
+#define MDMA_D1_PERIPHERAL_MAP MDMA0_D1_PERIPHERAL_MAP
+#define MDMA_D1_CURR_X_COUNT MDMA0_D1_CURR_X_COUNT
+#define MDMA_D1_CURR_Y_COUNT MDMA0_D1_CURR_Y_COUNT
+
+#define MDMA_S1_NEXT_DESC_PTR MDMA0_S1_NEXT_DESC_PTR
+#define MDMA_S1_START_ADDR MDMA0_S1_START_ADDR
+#define MDMA_S1_CONFIG MDMA0_S1_CONFIG
+#define MDMA_S1_X_COUNT MDMA0_S1_X_COUNT
+#define MDMA_S1_X_MODIFY MDMA0_S1_X_MODIFY
+#define MDMA_S1_Y_COUNT MDMA0_S1_Y_COUNT
+#define MDMA_S1_Y_MODIFY MDMA0_S1_Y_MODIFY
+#define MDMA_S1_CURR_DESC_PTR MDMA0_S1_CURR_DESC_PTR
+#define MDMA_S1_CURR_ADDR MDMA0_S1_CURR_ADDR
+#define MDMA_S1_IRQ_STATUS MDMA0_S1_IRQ_STATUS
+#define MDMA_S1_PERIPHERAL_MAP MDMA0_S1_PERIPHERAL_MAP
+#define MDMA_S1_CURR_X_COUNT MDMA0_S1_CURR_X_COUNT
+#define MDMA_S1_CURR_Y_COUNT MDMA0_S1_CURR_Y_COUNT
+
 
 /* Parallel Peripheral Interface (PPI) (0xFFC01000 - 0xFFC010FF) */
 #define	PPI_CONTROL			0xFFC01000	/* PPI Control Register */
diff --git a/arch/blackfin/mach-bf561/include/mach/cdefBF561.h b/arch/blackfin/mach-bf561/include/mach/cdefBF561.h
index 95d609f..9d9858c 100644
--- a/arch/blackfin/mach-bf561/include/mach/cdefBF561.h
+++ b/arch/blackfin/mach-bf561/include/mach/cdefBF561.h
@@ -1526,6 +1526,35 @@
 #define bfin_read_MDMA_D0_START_ADDR()  bfin_read_MDMA1_D0_START_ADDR()
 #define bfin_write_MDMA_D0_START_ADDR(val) bfin_write_MDMA1_D0_START_ADDR(val)
 
+#define bfin_read_MDMA_S1_CONFIG()  bfin_read_MDMA1_S1_CONFIG()
+#define bfin_write_MDMA_S1_CONFIG(val) bfin_write_MDMA1_S1_CONFIG(val)
+#define bfin_read_MDMA_S1_IRQ_STATUS()  bfin_read_MDMA1_S1_IRQ_STATUS()
+#define bfin_write_MDMA_S1_IRQ_STATUS(val) bfin_write_MDMA1_S1_IRQ_STATUS(val)
+#define bfin_read_MDMA_S1_X_MODIFY()  bfin_read_MDMA1_S1_X_MODIFY()
+#define bfin_write_MDMA_S1_X_MODIFY(val) bfin_write_MDMA1_S1_X_MODIFY(val)
+#define bfin_read_MDMA_S1_Y_MODIFY()  bfin_read_MDMA1_S1_Y_MODIFY()
+#define bfin_write_MDMA_S1_Y_MODIFY(val) bfin_write_MDMA1_S1_Y_MODIFY(val)
+#define bfin_read_MDMA_S1_X_COUNT()  bfin_read_MDMA1_S1_X_COUNT()
+#define bfin_write_MDMA_S1_X_COUNT(val) bfin_write_MDMA1_S1_X_COUNT(val)
+#define bfin_read_MDMA_S1_Y_COUNT()  bfin_read_MDMA1_S1_Y_COUNT()
+#define bfin_write_MDMA_S1_Y_COUNT(val) bfin_write_MDMA1_S1_Y_COUNT(val)
+#define bfin_read_MDMA_S1_START_ADDR()  bfin_read_MDMA1_S1_START_ADDR()
+#define bfin_write_MDMA_S1_START_ADDR(val) bfin_write_MDMA1_S1_START_ADDR(val)
+#define bfin_read_MDMA_D1_CONFIG()  bfin_read_MDMA1_D1_CONFIG()
+#define bfin_write_MDMA_D1_CONFIG(val) bfin_write_MDMA1_D1_CONFIG(val)
+#define bfin_read_MDMA_D1_IRQ_STATUS()  bfin_read_MDMA1_D1_IRQ_STATUS()
+#define bfin_write_MDMA_D1_IRQ_STATUS(val) bfin_write_MDMA1_D1_IRQ_STATUS(val)
+#define bfin_read_MDMA_D1_X_MODIFY()  bfin_read_MDMA1_D1_X_MODIFY()
+#define bfin_write_MDMA_D1_X_MODIFY(val) bfin_write_MDMA1_D1_X_MODIFY(val)
+#define bfin_read_MDMA_D1_Y_MODIFY()  bfin_read_MDMA1_D1_Y_MODIFY()
+#define bfin_write_MDMA_D1_Y_MODIFY(val) bfin_write_MDMA1_D1_Y_MODIFY(val)
+#define bfin_read_MDMA_D1_X_COUNT()  bfin_read_MDMA1_D1_X_COUNT()
+#define bfin_write_MDMA_D1_X_COUNT(val) bfin_write_MDMA1_D1_X_COUNT(val)
+#define bfin_read_MDMA_D1_Y_COUNT()  bfin_read_MDMA1_D1_Y_COUNT()
+#define bfin_write_MDMA_D1_Y_COUNT(val) bfin_write_MDMA1_D1_Y_COUNT(val)
+#define bfin_read_MDMA_D1_START_ADDR()  bfin_read_MDMA1_D1_START_ADDR()
+#define bfin_write_MDMA_D1_START_ADDR(val) bfin_write_MDMA1_D1_START_ADDR(val)
+
 /* These need to be last due to the cdef/linux inter-dependencies */
 #include <asm/irq.h>
 
diff --git a/arch/blackfin/mach-bf561/include/mach/defBF561.h b/arch/blackfin/mach-bf561/include/mach/defBF561.h
index cf92229..5fc0f05 100644
--- a/arch/blackfin/mach-bf561/include/mach/defBF561.h
+++ b/arch/blackfin/mach-bf561/include/mach/defBF561.h
@@ -796,6 +796,62 @@
 #define MDMA2_S1_IRQ_STATUS 0xFFC00FE8	/*MemDMA2 Stream 1 Source Interrupt/Status Register */
 #define MDMA2_S1_PERIPHERAL_MAP 0xFFC00FEC	/*MemDMA2 Stream 1 Source Peripheral Map register */
 
+#define MDMA_D0_NEXT_DESC_PTR MDMA1_D0_NEXT_DESC_PTR
+#define MDMA_D0_START_ADDR MDMA1_D0_START_ADDR
+#define MDMA_D0_CONFIG MDMA1_D0_CONFIG
+#define MDMA_D0_X_COUNT MDMA1_D0_X_COUNT
+#define MDMA_D0_X_MODIFY MDMA1_D0_X_MODIFY
+#define MDMA_D0_Y_COUNT MDMA1_D0_Y_COUNT
+#define MDMA_D0_Y_MODIFY MDMA1_D0_Y_MODIFY
+#define MDMA_D0_CURR_DESC_PTR MDMA1_D0_CURR_DESC_PTR
+#define MDMA_D0_CURR_ADDR MDMA1_D0_CURR_ADDR
+#define MDMA_D0_IRQ_STATUS MDMA1_D0_IRQ_STATUS
+#define MDMA_D0_PERIPHERAL_MAP MDMA1_D0_PERIPHERAL_MAP
+#define MDMA_D0_CURR_X_COUNT MDMA1_D0_CURR_X_COUNT
+#define MDMA_D0_CURR_Y_COUNT MDMA1_D0_CURR_Y_COUNT
+
+#define MDMA_S0_NEXT_DESC_PTR MDMA1_S0_NEXT_DESC_PTR
+#define MDMA_S0_START_ADDR MDMA1_S0_START_ADDR
+#define MDMA_S0_CONFIG MDMA1_S0_CONFIG
+#define MDMA_S0_X_COUNT MDMA1_S0_X_COUNT
+#define MDMA_S0_X_MODIFY MDMA1_S0_X_MODIFY
+#define MDMA_S0_Y_COUNT MDMA1_S0_Y_COUNT
+#define MDMA_S0_Y_MODIFY MDMA1_S0_Y_MODIFY
+#define MDMA_S0_CURR_DESC_PTR MDMA1_S0_CURR_DESC_PTR
+#define MDMA_S0_CURR_ADDR MDMA1_S0_CURR_ADDR
+#define MDMA_S0_IRQ_STATUS MDMA1_S0_IRQ_STATUS
+#define MDMA_S0_PERIPHERAL_MAP MDMA1_S0_PERIPHERAL_MAP
+#define MDMA_S0_CURR_X_COUNT MDMA1_S0_CURR_X_COUNT
+#define MDMA_S0_CURR_Y_COUNT MDMA1_S0_CURR_Y_COUNT
+
+#define MDMA_D1_NEXT_DESC_PTR MDMA1_D1_NEXT_DESC_PTR
+#define MDMA_D1_START_ADDR MDMA1_D1_START_ADDR
+#define MDMA_D1_CONFIG MDMA1_D1_CONFIG
+#define MDMA_D1_X_COUNT MDMA1_D1_X_COUNT
+#define MDMA_D1_X_MODIFY MDMA1_D1_X_MODIFY
+#define MDMA_D1_Y_COUNT MDMA1_D1_Y_COUNT
+#define MDMA_D1_Y_MODIFY MDMA1_D1_Y_MODIFY
+#define MDMA_D1_CURR_DESC_PTR MDMA1_D1_CURR_DESC_PTR
+#define MDMA_D1_CURR_ADDR MDMA1_D1_CURR_ADDR
+#define MDMA_D1_IRQ_STATUS MDMA1_D1_IRQ_STATUS
+#define MDMA_D1_PERIPHERAL_MAP MDMA1_D1_PERIPHERAL_MAP
+#define MDMA_D1_CURR_X_COUNT MDMA1_D1_CURR_X_COUNT
+#define MDMA_D1_CURR_Y_COUNT MDMA1_D1_CURR_Y_COUNT
+
+#define MDMA_S1_NEXT_DESC_PTR MDMA1_S1_NEXT_DESC_PTR
+#define MDMA_S1_START_ADDR MDMA1_S1_START_ADDR
+#define MDMA_S1_CONFIG MDMA1_S1_CONFIG
+#define MDMA_S1_X_COUNT MDMA1_S1_X_COUNT
+#define MDMA_S1_X_MODIFY MDMA1_S1_X_MODIFY
+#define MDMA_S1_Y_COUNT MDMA1_S1_Y_COUNT
+#define MDMA_S1_Y_MODIFY MDMA1_S1_Y_MODIFY
+#define MDMA_S1_CURR_DESC_PTR MDMA1_S1_CURR_DESC_PTR
+#define MDMA_S1_CURR_ADDR MDMA1_S1_CURR_ADDR
+#define MDMA_S1_IRQ_STATUS MDMA1_S1_IRQ_STATUS
+#define MDMA_S1_PERIPHERAL_MAP MDMA1_S1_PERIPHERAL_MAP
+#define MDMA_S1_CURR_X_COUNT MDMA1_S1_CURR_X_COUNT
+#define MDMA_S1_CURR_Y_COUNT MDMA1_S1_CURR_Y_COUNT
+
 /* Internal Memory DMA Registers (0xFFC0_1800 - 0xFFC0_19FF) */
 #define IMDMA_D0_CONFIG 0xFFC01808	/*IMDMA Stream 0 Destination Configuration */
 #define IMDMA_D0_NEXT_DESC_PTR 0xFFC01800	/*IMDMA Stream 0 Destination Next Descriptor Ptr Reg */
-- 
cgit v0.10.2


From fecbd7366bf5a39eaae2c03541f0b412f319534f Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Thu, 23 Apr 2009 20:49:43 +0000
Subject: Blackfin: fix early L1 relocation crash

Our early L1 relocate code may implicitly call code which lives in L1
memory.  This is due to the dma_memcpy() rewrite that made the DMA code
lockless and safe to be used by multiple processes.  If we start the
early DMA memcpy to relocate things into L1 instruction but then our
DMA memcpy code calls a function that lives in L1, things fall apart.
As such, create a small dedicated DMA memcpy routine that we can assume
sanity at boot time.

Reported-by: Filip Van Rillaer <filip.vanrillaer@oneaccess-net.com>
Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index e4f7b80..46c5618 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h
@@ -253,5 +253,7 @@ static inline void clear_dma_irqstat(unsigned int channel)
 void *dma_memcpy(void *dest, const void *src, size_t count);
 void *safe_dma_memcpy(void *dest, const void *src, size_t count);
 void blackfin_dma_early_init(void);
+void early_dma_memcpy(void *dest, const void *src, size_t count);
+void early_dma_memcpy_done(void);
 
 #endif
diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c
index 8531693..704419e 100644
--- a/arch/blackfin/kernel/bfin_dma_5xx.c
+++ b/arch/blackfin/kernel/bfin_dma_5xx.c
@@ -232,6 +232,87 @@ void blackfin_dma_resume(void)
 void __init blackfin_dma_early_init(void)
 {
 	bfin_write_MDMA_S0_CONFIG(0);
+	bfin_write_MDMA_S1_CONFIG(0);
+}
+
+void __init early_dma_memcpy(void *pdst, const void *psrc, size_t size)
+{
+	unsigned long dst = (unsigned long)pdst;
+	unsigned long src = (unsigned long)psrc;
+	struct dma_register *dst_ch, *src_ch;
+
+	/* We assume that everything is 4 byte aligned, so include
+	 * a basic sanity check
+	 */
+	BUG_ON(dst % 4);
+	BUG_ON(src % 4);
+	BUG_ON(size % 4);
+
+	/* Force a sync in case a previous config reset on this channel
+	 * occurred.  This is needed so subsequent writes to DMA registers
+	 * are not spuriously lost/corrupted.
+	 */
+	__builtin_bfin_ssync();
+
+	src_ch = 0;
+	/* Find an avalible memDMA channel */
+	while (1) {
+		if (!src_ch || src_ch == (struct dma_register *)MDMA_S1_NEXT_DESC_PTR) {
+			dst_ch = (struct dma_register *)MDMA_D0_NEXT_DESC_PTR;
+			src_ch = (struct dma_register *)MDMA_S0_NEXT_DESC_PTR;
+		} else {
+			dst_ch = (struct dma_register *)MDMA_D1_NEXT_DESC_PTR;
+			src_ch = (struct dma_register *)MDMA_S1_NEXT_DESC_PTR;
+		}
+
+		if (!bfin_read16(&src_ch->cfg)) {
+			break;
+		} else {
+			if (bfin_read16(&src_ch->irq_status) & DMA_DONE)
+				bfin_write16(&src_ch->cfg, 0);
+		}
+
+	}
+
+	/* Destination */
+	bfin_write32(&dst_ch->start_addr, dst);
+	bfin_write16(&dst_ch->x_count, size >> 2);
+	bfin_write16(&dst_ch->x_modify, 1 << 2);
+	bfin_write16(&dst_ch->irq_status, DMA_DONE | DMA_ERR);
+
+	/* Source */
+	bfin_write32(&src_ch->start_addr, src);
+	bfin_write16(&src_ch->x_count, size >> 2);
+	bfin_write16(&src_ch->x_modify, 1 << 2);
+	bfin_write16(&src_ch->irq_status, DMA_DONE | DMA_ERR);
+
+	/* Enable */
+	bfin_write16(&src_ch->cfg, DMAEN | WDSIZE_32);
+	bfin_write16(&dst_ch->cfg, WNR | DI_EN | DMAEN | WDSIZE_32);
+
+	/* Since we are atomic now, don't use the workaround ssync */
+	__builtin_bfin_ssync();
+}
+
+void __init early_dma_memcpy_done(void)
+{
+	while ((bfin_read_MDMA_S0_CONFIG() && !(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE)) ||
+	       (bfin_read_MDMA_S1_CONFIG() && !(bfin_read_MDMA_D1_IRQ_STATUS() & DMA_DONE)))
+		continue;
+
+	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	bfin_write_MDMA_D1_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	/*
+	 * Now that DMA is done, we would normally flush cache, but
+	 * i/d cache isn't running this early, so we don't bother,
+	 * and just clear out the DMA channel for next time
+	 */
+	bfin_write_MDMA_S0_CONFIG(0);
+	bfin_write_MDMA_S1_CONFIG(0);
+	bfin_write_MDMA_D0_CONFIG(0);
+	bfin_write_MDMA_D1_CONFIG(0);
+
+	__builtin_bfin_ssync();
 }
 
 /**
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index a58687b..0838eaf 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -150,40 +150,45 @@ void __init bfin_relocate_l1_mem(void)
 	unsigned long l1_data_b_length;
 	unsigned long l2_length;
 
+	/*
+	 * due to the ALIGN(4) in the arch/blackfin/kernel/vmlinux.lds.S
+	 * we know that everything about l1 text/data is nice and aligned,
+	 * so copy by 4 byte chunks, and don't worry about overlapping
+	 * src/dest.
+	 *
+	 * We can't use the dma_memcpy functions, since they can call
+	 * scheduler functions which might be in L1 :( and core writes
+	 * into L1 instruction cause bad access errors, so we are stuck,
+	 * we are required to use DMA, but can't use the common dma
+	 * functions. We can't use memcpy either - since that might be
+	 * going to be in the relocated L1
+	 */
+
 	blackfin_dma_early_init();
 
+	/* if necessary, copy _stext_l1 to _etext_l1 to L1 instruction SRAM */
 	l1_code_length = _etext_l1 - _stext_l1;
-	if (l1_code_length > L1_CODE_LENGTH)
-		panic("L1 Instruction SRAM Overflow\n");
-	/* cannot complain as printk is not available as yet.
-	 * But we can continue booting and complain later!
-	 */
-
-	/* Copy _stext_l1 to _etext_l1 to L1 instruction SRAM */
-	dma_memcpy(_stext_l1, _l1_lma_start, l1_code_length);
+	if (l1_code_length)
+		early_dma_memcpy(_stext_l1, _l1_lma_start, l1_code_length);
 
+	/* if necessary, copy _sdata_l1 to _sbss_l1 to L1 data bank A SRAM */
 	l1_data_a_length = _sbss_l1 - _sdata_l1;
-	if (l1_data_a_length > L1_DATA_A_LENGTH)
-		panic("L1 Data SRAM Bank A Overflow\n");
-
-	/* Copy _sdata_l1 to _sbss_l1 to L1 data bank A SRAM */
-	dma_memcpy(_sdata_l1, _l1_lma_start + l1_code_length, l1_data_a_length);
+	if (l1_data_a_length)
+		early_dma_memcpy(_sdata_l1, _l1_lma_start + l1_code_length, l1_data_a_length);
 
+	/* if necessary, copy _sdata_b_l1 to _sbss_b_l1 to L1 data bank B SRAM */
 	l1_data_b_length = _sbss_b_l1 - _sdata_b_l1;
-	if (l1_data_b_length > L1_DATA_B_LENGTH)
-		panic("L1 Data SRAM Bank B Overflow\n");
-
-	/* Copy _sdata_b_l1 to _sbss_b_l1 to L1 data bank B SRAM */
-	dma_memcpy(_sdata_b_l1, _l1_lma_start + l1_code_length +
+	if (l1_data_b_length)
+		early_dma_memcpy(_sdata_b_l1, _l1_lma_start + l1_code_length +
 			l1_data_a_length, l1_data_b_length);
 
+	early_dma_memcpy_done();
+
+	/* if necessary, copy _stext_l2 to _edata_l2 to L2 SRAM */
 	if (L2_LENGTH != 0) {
 		l2_length = _sbss_l2 - _stext_l2;
-		if (l2_length > L2_LENGTH)
-			panic("L2 SRAM Overflow\n");
-
-		/* Copy _stext_l2 to _edata_l2 to L2 SRAM */
-		dma_memcpy(_stext_l2, _l2_lma_start, l2_length);
+		if (l2_length)
+			memcpy(_stext_l2, _l2_lma_start, l2_length);
 	}
 }
 
-- 
cgit v0.10.2


From bc4d6f36db4b24bf328a5fd70038a2e609b9f028 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Thu, 23 Apr 2009 14:15:04 +0000
Subject: Blackfin: document anomaly 05000234 workaround

Note the reason for using CHIPD over DSPID.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 0eece23..3040415 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -131,8 +131,8 @@ unsigned long get_wchan(struct task_struct *p);
 /* Get the Silicon Revision of the chip */
 static inline uint32_t __pure bfin_revid(void)
 {
-	/* stored in the upper 4 bits */
-	uint32_t revid = bfin_read_CHIPID() >> 28;
+	/* Always use CHIPID, to work around ANOMALY_05000234 */
+	uint32_t revid = (bfin_read_CHIPID() & CHIPID_VERSION) >> 28;
 
 #ifdef CONFIG_BF52x
 	/* ANOMALY_05000357
-- 
cgit v0.10.2


From 76068c3c5dee4332f532573b6423d75c3e14f660 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Thu, 23 Apr 2009 20:56:42 +0000
Subject: Blackfin: annotate anomaly 05000119 in core DMA code

Add a reminder note to avoid the DMA_DONE bit in our DMA core code.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c
index 704419e..763ed84 100644
--- a/arch/blackfin/kernel/bfin_dma_5xx.c
+++ b/arch/blackfin/kernel/bfin_dma_5xx.c
@@ -20,6 +20,11 @@
 #include <asm/dma.h>
 #include <asm/uaccess.h>
 
+/*
+ * To make sure we work around 05000119 - we always check DMA_DONE bit,
+ * never the DMA_RUN bit
+ */
+
 struct dma_channel dma_ch[MAX_DMA_CHANNELS];
 EXPORT_SYMBOL(dma_ch);
 
-- 
cgit v0.10.2


From a9a59e3096443ea4d6f50db978d7d3bbb47708b4 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 24 Apr 2009 01:42:54 +0000
Subject: Blackfin: punt useless GPIO init call

This init code existed only to dump a printk(), and not even a useful one.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c
index 32289b3..beffa00 100644
--- a/arch/blackfin/kernel/bfin_gpio.c
+++ b/arch/blackfin/kernel/bfin_gpio.c
@@ -313,15 +313,6 @@ inline void portmux_setup(unsigned short per)
 # define portmux_setup(...)  do { } while (0)
 #endif
 
-static int __init bfin_gpio_init(void)
-{
-	printk(KERN_INFO "Blackfin GPIO Controller\n");
-
-	return 0;
-}
-arch_initcall(bfin_gpio_init);
-
-
 #ifndef CONFIG_BF54x
 /***********************************************************
 *
-- 
cgit v0.10.2


From e522c8466d6f8437cf02a34287c8707ef53081ed Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Fri, 8 May 2009 07:42:12 +0000
Subject: Blackfin: work around anomaly 05000287

Make sure we work around anomaly 05000287 by configuring different port
preferences for the data cache.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/cplb-mpu/cacheinit.c b/arch/blackfin/kernel/cplb-mpu/cacheinit.c
index c6ff947..d5a86c3 100644
--- a/arch/blackfin/kernel/cplb-mpu/cacheinit.c
+++ b/arch/blackfin/kernel/cplb-mpu/cacheinit.c
@@ -55,7 +55,14 @@ void __cpuinit bfin_dcache_init(struct cplb_entry *dcplb_tbl)
 	}
 
 	ctrl = bfin_read_DMEM_CONTROL();
-	ctrl |= DMEM_CNTR;
+
+	/*
+	 *  Anomaly notes:
+	 *  05000287 - We implement workaround #2 - Change the DMEM_CONTROL
+	 *  register, so that the port preferences for DAG0 and DAG1 are set
+	 *  to port B
+	 */
+	ctrl |= DMEM_CNTR | PORT_PREF0 | (ANOMALY_05000287 ? PORT_PREF1 : 0);
 	bfin_write_DMEM_CONTROL(ctrl);
 	SSYNC();
 }
-- 
cgit v0.10.2


From a9031028ef70f658daa2151e361aee7b082965ee Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 23 Apr 2009 17:04:10 +0000
Subject: Blackfin: delete unused sys_getpagesize() function

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
index fce49d7..a8f1329 100644
--- a/arch/blackfin/kernel/sys_bfin.c
+++ b/arch/blackfin/kernel/sys_bfin.c
@@ -78,11 +78,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return do_mmap2(addr, len, prot, flags, fd, pgoff);
 }
 
-asmlinkage int sys_getpagesize(void)
-{
-	return PAGE_SIZE;
-}
-
 asmlinkage void *sys_sram_alloc(size_t size, unsigned long flags)
 {
 	return sram_alloc_with_lsl(size, flags);
-- 
cgit v0.10.2


From 729a3fa7333af58d57eecb0aacf7ca3d58237f81 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 24 Apr 2009 03:55:41 +0000
Subject: Blackfin: workaround anomaly 05000227

Workaround anomaly 05000227 by only using the scratch pad for stack when
absolutely necessary.  The core code which reprograms clocks really only
touches MMRs directly with constants.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S
index 698d4c0..01d62ff 100644
--- a/arch/blackfin/mach-common/head.S
+++ b/arch/blackfin/mach-common/head.S
@@ -30,8 +30,6 @@ ENTRY(__init_clear_bss)
 	rts;
 ENDPROC(__init_clear_bss)
 
-#define INITIAL_STACK	(L1_SCRATCH_START + L1_SCRATCH_LENGTH - 12)
-
 ENTRY(__start)
 	/* R0: argument of command line string, passed from uboot, save it */
 	R7 = R0;
@@ -148,8 +146,8 @@ ENTRY(__start)
 #endif
 
 	/* Initialize stack pointer */
-	sp.l = lo(INITIAL_STACK);
-	sp.h = hi(INITIAL_STACK);
+	sp.l = _init_thread_union;
+	sp.h = _init_thread_union;
 	fp = sp;
 	usp = sp;
 
@@ -189,7 +187,15 @@ ENTRY(__start)
 	/* Put The Code for PLL Programming and SDRAM Programming in L1 ISRAM */
 	call _bfin_relocate_l1_mem;
 #ifdef CONFIG_BFIN_KERNEL_CLOCK
+	/* Only use on-chip scratch space for stack when absolutely required
+	 * to avoid Anomaly 05000227 ... we know the init_clocks() func only
+	 * uses L1 text and stack space and no other memory region.
+	 */
+# define KERNEL_CLOCK_STACK (L1_SCRATCH_START + L1_SCRATCH_LENGTH - 12)
+	sp.l = lo(KERNEL_CLOCK_STACK);
+	sp.h = hi(KERNEL_CLOCK_STACK);
 	call _init_clocks;
+	sp = usp;	/* usp hasnt been touched, so restore from there */
 #endif
 
 	/* This section keeps the processor in supervisor mode
@@ -243,9 +249,7 @@ ENTRY(_real_start)
 	call _cmdline_init;
 
 	/* Load the current thread pointer and stack */
-	sp.l = _init_thread_union;
-	sp.h = _init_thread_union;
-	p1 = THREAD_SIZE (z);
+	p1 = THREAD_SIZE + 4 (z);	/* +4 is for reti loading */
 	sp = sp + p1;
 	usp = sp;
 	fp = sp;
-- 
cgit v0.10.2


From d8804adf52f5991388fa9af77428e4cc7768059d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 29 Apr 2009 06:26:46 +0000
Subject: Blackfin: do not append newlines to panic() messages

The panic() function already handles newlines for us.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index 0838eaf..5a4a0367 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -477,7 +477,7 @@ static __init void memory_setup(void)
 
 	if (DMA_UNCACHED_REGION > (_ramend - _ramstart)) {
 		console_init();
-		panic("DMA region exceeds memory limit: %lu.\n",
+		panic("DMA region exceeds memory limit: %lu.",
 			_ramend - _ramstart);
 	}
 	memory_end = _ramend - DMA_UNCACHED_REGION;
@@ -531,7 +531,7 @@ static __init void memory_setup(void)
 
 	if (mtd_size == 0) {
 		console_init();
-		panic("Don't boot kernel without rootfs attached.\n");
+		panic("Don't boot kernel without rootfs attached.");
 	}
 
 	/* Relocate MTD image to the top of memory after the uncached memory area */
@@ -886,7 +886,7 @@ void __init setup_arch(char **cmdline_p)
 				printk(KERN_ERR "Warning: Compiled for Rev %d, but running on Rev %d\n",
 				       bfin_compiled_revid(), bfin_revid());
 				if (bfin_compiled_revid() > bfin_revid())
-					panic("Error: you are missing anomaly workarounds for this rev\n");
+					panic("Error: you are missing anomaly workarounds for this rev");
 			}
 		}
 		if (bfin_revid() < CONFIG_BF_REV_MIN || bfin_revid() > CONFIG_BF_REV_MAX)
@@ -896,7 +896,7 @@ void __init setup_arch(char **cmdline_p)
 
 	/* We can't run on BF548-0.1 due to ANOMALY 05000448 */
 	if (bfin_cpuid() == 0x27de && bfin_revid() == 1)
-		panic("You can't run on this processor due to 05000448\n");
+		panic("You can't run on this processor due to 05000448");
 
 	printk(KERN_INFO "Blackfin Linux support by http://blackfin.uclinux.org/\n");
 
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index ffe7fb5..fb57742 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -222,7 +222,7 @@ asmlinkage void double_fault_c(struct pt_regs *fp)
 		show_regs(fp);
 	}
 #endif
-	panic("Double Fault - unrecoverable event\n");
+	panic("Double Fault - unrecoverable event");
 
 }
 
@@ -1246,5 +1246,5 @@ void panic_cplb_error(int cplb_panic, struct pt_regs *fp)
 	dump_bfin_mem(fp);
 	show_regs(fp);
 	dump_stack();
-	panic("Unrecoverable event\n");
+	panic("Unrecoverable event");
 }
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
index 9b27e69..4ea13d2 100644
--- a/arch/blackfin/mach-bf561/smp.c
+++ b/arch/blackfin/mach-bf561/smp.c
@@ -135,7 +135,7 @@ void __init platform_request_ipi(irq_handler_t handler)
 	ret = request_irq(IRQ_SUPPLE_0, handler, IRQF_DISABLED,
 			  "SMP interrupt", handler);
 	if (ret)
-		panic("Cannot request supplemental interrupt 0 for IPI service\n");
+		panic("Cannot request supplemental interrupt 0 for IPI service");
 }
 
 void platform_send_ipi(cpumask_t callmap)
-- 
cgit v0.10.2


From 7f3aee3c187641ec7c7e260d9cabb71ac4ac9f7c Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Thu, 7 May 2009 10:04:19 +0000
Subject: Blackfin: detect anomaly 05000274

Detect and reject operating conditions for anomaly 05000274 since the
problem cannot be worked around in software.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index 5a4a0367..dc30e0a 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -801,10 +801,8 @@ void __init setup_arch(char **cmdline_p)
 	cclk = get_cclk();
 	sclk = get_sclk();
 
-#if !defined(CONFIG_BFIN_KERNEL_CLOCK)
-	if (ANOMALY_05000273 && cclk == sclk)
-		panic("ANOMALY 05000273, SCLK can not be same as CCLK");
-#endif
+	if ((ANOMALY_05000273 || ANOMALY_05000274) && (cclk >> 1) < sclk)
+		panic("ANOMALY 05000273 or 05000274: CCLK must be >= 2*SCLK");
 
 #ifdef BF561_FAMILY
 	if (ANOMALY_05000266) {
@@ -903,9 +901,6 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_INFO "Processor Speed: %lu MHz core clock and %lu MHz System Clock\n",
 	       cclk / 1000000, sclk / 1000000);
 
-	if (ANOMALY_05000273 && (cclk >> 1) <= sclk)
-		printk("\n\n\nANOMALY_05000273: CCLK must be >= 2*SCLK !!!\n\n\n");
-
 	setup_bootmem_allocator();
 
 	paging_init();
diff --git a/arch/blackfin/mach-common/cpufreq.c b/arch/blackfin/mach-common/cpufreq.c
index 72e1660..70e3411 100644
--- a/arch/blackfin/mach-common/cpufreq.c
+++ b/arch/blackfin/mach-common/cpufreq.c
@@ -140,7 +140,8 @@ static int __init __bfin_cpu_init(struct cpufreq_policy *policy)
 	cclk = get_cclk() / 1000;
 	sclk = get_sclk() / 1000;
 
-#if ANOMALY_05000273 || (!defined(CONFIG_BF54x) && defined(CONFIG_BFIN_DCACHE))
+#if ANOMALY_05000273 || ANOMALY_05000274 || \
+	(!defined(CONFIG_BF54x) && defined(CONFIG_BFIN_DCACHE))
 	min_cclk = sclk * 2;
 #else
 	min_cclk = sclk;
-- 
cgit v0.10.2


From ee0263cc2e7d774655bba6a6750a06099a3cebf0 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Wed, 20 May 2009 06:06:15 +0000
Subject: Blackfin: BF518F-EZBRD: handle required portmuxing of async pins

The two high address lines on the BF51x are not dedicated which means we
need to handle them like any other peripheral pin if we want to access the
upper 2MB of parallel flash.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c
index 41f2eac..602dce0 100644
--- a/arch/blackfin/mach-bf518/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf518/boards/ezbrd.c
@@ -82,7 +82,11 @@ static struct physmap_flash_data ezbrd_flash_data = {
 
 static struct resource ezbrd_flash_resource = {
 	.start = 0x20000000,
+#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
+	.end   = 0x202fffff,
+#else
 	.end   = 0x203fffff,
+#endif
 	.flags = IORESOURCE_MEM,
 };
 
@@ -678,6 +682,11 @@ static int __init ezbrd_init(void)
 				ARRAY_SIZE(bfin_i2c_board_info));
 	platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices));
 	spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info));
+	/* setup BF518-EZBRD GPIO pin PG11 to AMS2, PG15 to AMS3. */
+	peripheral_request(P_AMS2, "ParaFlash");
+#if !defined(CONFIG_SPI_BFIN) && !defined(CONFIG_SPI_BFIN_MODULE)
+	peripheral_request(P_AMS3, "ParaFlash");
+#endif
 	return 0;
 }
 
diff --git a/arch/blackfin/mach-bf518/include/mach/portmux.h b/arch/blackfin/mach-bf518/include/mach/portmux.h
index f618b48..a0fc77f 100644
--- a/arch/blackfin/mach-bf518/include/mach/portmux.h
+++ b/arch/blackfin/mach-bf518/include/mach/portmux.h
@@ -185,6 +185,10 @@
 #define P_PTP_PPS		(P_DEFINED | P_IDENT(GPIO_PG12) | P_FUNCT(2))
 #define P_PTP_CLKOUT		(P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(2))
 
-#define P_HWAIT		(P_DEFINED | P_IDENT(GPIO_PG000000000) | P_FUNCT(1))
+/* AMS */
+#define P_AMS2			(P_DEFINED | P_IDENT(GPIO_PG11) | P_FUNCT(1))
+#define P_AMS3			(P_DEFINED | P_IDENT(GPIO_PG15) | P_FUNCT(2))
+
+#define P_HWAIT			(P_DEFINED | P_IDENT(GPIO_PG000000000) | P_FUNCT(1))
 
 #endif				/* _MACH_PORTMUX_H_ */
-- 
cgit v0.10.2


From 555487bbb63f527e63fecbff48c86e2c07ce5024 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Wed, 6 May 2009 10:38:07 +0000
Subject: Blackfin: annotate anomaly 05000120

Add some notes for anomaly 05000120 to make sure we work around it.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 93eab61..187c79e 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -45,6 +45,10 @@
 #include <asm/cpu.h>
 #include <linux/err.h>
 
+/*
+ * Anomaly notes:
+ * 05000120 - we always define corelock as 32-bit integer in L2
+ */
 struct corelock_slot corelock __attribute__ ((__section__(".l2.bss")));
 
 void __cpuinitdata *init_retx_coreb, *init_saved_retx_coreb,
-- 
cgit v0.10.2


From 1fa9be72b558c39459f98835eb86dbb4ef4da30b Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Fri, 15 May 2009 11:01:59 +0000
Subject: Blackfin: add support for gptimer0 as a tick source

For systems where the core cycles are not a usable tick source (like SMP
or cycles gets updated), enable gptimer0 as an alternative.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index cea2bfd..c04e7a4 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -241,12 +241,6 @@ config IRQ_PER_CPU
 	depends on SMP
 	default y
 
-config TICK_SOURCE_SYSTMR0
-	bool
-	select BFIN_GPTIMERS
-	depends on SMP
-	default y
-
 config BF_REV_MIN
 	int
 	default 0 if (BF51x || BF52x || (BF54x && !BF54xM))
@@ -607,7 +601,6 @@ source kernel/Kconfig.hz
 
 config GENERIC_TIME
 	bool "Generic time"
-	depends on !SMP
 	default y
 
 config GENERIC_CLOCKEVENTS
@@ -615,12 +608,26 @@ config GENERIC_CLOCKEVENTS
 	depends on GENERIC_TIME
 	default y
 
+choice
+	prompt "Kernel Tick Source"
+	depends on GENERIC_CLOCKEVENTS
+	default TICKSOURCE_CORETMR
+
+config TICKSOURCE_GPTMR0
+	bool "Gptimer0 (SCLK domain)"
+	select BFIN_GPTIMERS
+	depends on !IPIPE
+
+config TICKSOURCE_CORETMR
+	bool "Core timer (CCLK domain)"
+
+endchoice
+
 config CYCLES_CLOCKSOURCE
-	bool "Use 'CYCLES' as a clocksource (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "Use 'CYCLES' as a clocksource"
 	depends on GENERIC_CLOCKEVENTS
 	depends on !BFIN_SCRATCH_REG_CYCLES
-	default n
+	depends on !SMP
 	help
 	  If you say Y here, you will enable support for using the 'cycles'
 	  registers as a clock source.  Doing so means you will be unable to
@@ -628,6 +635,11 @@ config CYCLES_CLOCKSOURCE
 	  still be able to read it (such as for performance monitoring), but
 	  writing the registers will most likely crash the kernel.
 
+config GPTMR0_CLOCKSOURCE
+	bool "Use GPTimer0 as a clocksource (higher rating)"
+	depends on GENERIC_CLOCKEVENTS
+	depends on !TICKSOURCE_GPTMR0
+
 source kernel/time/Kconfig
 
 comment "Misc"
diff --git a/arch/blackfin/include/asm/time.h b/arch/blackfin/include/asm/time.h
index ddc43ce..589e937 100644
--- a/arch/blackfin/include/asm/time.h
+++ b/arch/blackfin/include/asm/time.h
@@ -37,4 +37,5 @@ extern unsigned long long __bfin_cycles_off;
 extern unsigned int __bfin_cycles_mod;
 #endif
 
+extern void __init setup_core_timer(void);
 #endif
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index 2764612..0791eba 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -20,8 +20,9 @@
 
 #include <asm/blackfin.h>
 #include <asm/time.h>
+#include <asm/gptimers.h>
 
-#ifdef CONFIG_CYCLES_CLOCKSOURCE
+#if defined(CONFIG_CYCLES_CLOCKSOURCE)
 
 /* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
@@ -58,15 +59,15 @@ static inline unsigned long long cycles_2_ns(cycle_t cyc)
 	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
 }
 
-static cycle_t read_cycles(struct clocksource *cs)
+static cycle_t bfin_read_cycles(struct clocksource *cs)
 {
 	return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod);
 }
 
-static struct clocksource clocksource_bfin = {
-	.name		= "bfin_cycles",
+static struct clocksource bfin_cs_cycles = {
+	.name		= "bfin_cs_cycles",
 	.rating		= 350,
-	.read		= read_cycles,
+	.read		= bfin_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(64),
 	.shift		= 22,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
@@ -74,53 +75,198 @@ static struct clocksource clocksource_bfin = {
 
 unsigned long long sched_clock(void)
 {
-	return cycles_2_ns(read_cycles(&clocksource_bfin));
+	return cycles_2_ns(bfin_read_cycles(&bfin_cs_cycles));
 }
 
-static int __init bfin_clocksource_init(void)
+static int __init bfin_cs_cycles_init(void)
 {
 	set_cyc2ns_scale(get_cclk() / 1000);
 
-	clocksource_bfin.mult = clocksource_hz2mult(get_cclk(), clocksource_bfin.shift);
+	bfin_cs_cycles.mult = \
+		clocksource_hz2mult(get_cclk(), bfin_cs_cycles.shift);
 
-	if (clocksource_register(&clocksource_bfin))
+	if (clocksource_register(&bfin_cs_cycles))
 		panic("failed to register clocksource");
 
 	return 0;
 }
+#else
+# define bfin_cs_cycles_init()
+#endif
+
+#ifdef CONFIG_GPTMR0_CLOCKSOURCE
+
+void __init setup_gptimer0(void)
+{
+	disable_gptimers(TIMER0bit);
+
+	set_gptimer_config(TIMER0_id, \
+		TIMER_OUT_DIS | TIMER_PERIOD_CNT | TIMER_MODE_PWM);
+	set_gptimer_period(TIMER0_id, -1);
+	set_gptimer_pwidth(TIMER0_id, -2);
+	SSYNC();
+	enable_gptimers(TIMER0bit);
+}
+
+static cycle_t bfin_read_gptimer0(void)
+{
+	return bfin_read_TIMER0_COUNTER();
+}
+
+static struct clocksource bfin_cs_gptimer0 = {
+	.name		= "bfin_cs_gptimer0",
+	.rating		= 400,
+	.read		= bfin_read_gptimer0,
+	.mask		= CLOCKSOURCE_MASK(32),
+	.shift		= 22,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int __init bfin_cs_gptimer0_init(void)
+{
+	setup_gptimer0();
 
+	bfin_cs_gptimer0.mult = \
+		clocksource_hz2mult(get_sclk(), bfin_cs_gptimer0.shift);
+
+	if (clocksource_register(&bfin_cs_gptimer0))
+		panic("failed to register clocksource");
+
+	return 0;
+}
 #else
-# define bfin_clocksource_init()
+# define bfin_cs_gptimer0_init()
 #endif
 
+#ifdef CONFIG_CORE_TIMER_IRQ_L1
+__attribute__((l1_text))
+#endif
+irqreturn_t timer_interrupt(int irq, void *dev_id);
+
+static int bfin_timer_set_next_event(unsigned long, \
+		struct clock_event_device *);
+
+static void bfin_timer_set_mode(enum clock_event_mode, \
+		struct clock_event_device *);
+
+static struct clock_event_device clockevent_bfin = {
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
+	.name		= "bfin_gptimer0",
+	.rating		= 300,
+	.irq		= IRQ_TIMER0,
+#else
+	.name		= "bfin_core_timer",
+	.rating		= 350,
+	.irq		= IRQ_CORETMR,
+#endif
+	.shift		= 32,
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_next_event = bfin_timer_set_next_event,
+	.set_mode	= bfin_timer_set_mode,
+};
+
+static struct irqaction bfin_timer_irq = {
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
+	.name		= "Blackfin GPTimer0",
+#else
+	.name		= "Blackfin CoreTimer",
+#endif
+	.flags		= IRQF_DISABLED | IRQF_TIMER | \
+			  IRQF_IRQPOLL | IRQF_PERCPU,
+	.handler	= timer_interrupt,
+	.dev_id		= &clockevent_bfin,
+};
+
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
 static int bfin_timer_set_next_event(unsigned long cycles,
                                      struct clock_event_device *evt)
 {
+	disable_gptimers(TIMER0bit);
+
+	/* it starts counting three SCLK cycles after the TIMENx bit is set */
+	set_gptimer_pwidth(TIMER0_id, cycles - 3);
+	enable_gptimers(TIMER0bit);
+	return 0;
+}
+
+static void bfin_timer_set_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC: {
+		set_gptimer_config(TIMER0_id, \
+			TIMER_OUT_DIS | TIMER_IRQ_ENA | \
+			TIMER_PERIOD_CNT | TIMER_MODE_PWM);
+		set_gptimer_period(TIMER0_id, get_sclk() / HZ);
+		set_gptimer_pwidth(TIMER0_id, get_sclk() / HZ - 1);
+		enable_gptimers(TIMER0bit);
+		break;
+	}
+	case CLOCK_EVT_MODE_ONESHOT:
+		disable_gptimers(TIMER0bit);
+		set_gptimer_config(TIMER0_id, \
+			TIMER_OUT_DIS | TIMER_IRQ_ENA | TIMER_MODE_PWM);
+		set_gptimer_period(TIMER0_id, 0);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		disable_gptimers(TIMER0bit);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+	}
+}
+
+static void bfin_timer_ack(void)
+{
+	set_gptimer_status(TIMER_GROUP1, TIMER_STATUS_TIMIL0);
+}
+
+static void __init bfin_timer_init(void)
+{
+	disable_gptimers(TIMER0bit);
+}
+
+static unsigned long  __init bfin_clockevent_check(void)
+{
+	setup_irq(IRQ_TIMER0, &bfin_timer_irq);
+	return get_sclk();
+}
+
+#else /* CONFIG_TICKSOURCE_CORETMR */
+
+static int bfin_timer_set_next_event(unsigned long cycles,
+				struct clock_event_device *evt)
+{
+	bfin_write_TCNTL(TMPWR);
+	CSYNC();
 	bfin_write_TCOUNT(cycles);
 	CSYNC();
+	bfin_write_TCNTL(TMPWR | TMREN);
 	return 0;
 }
 
 static void bfin_timer_set_mode(enum clock_event_mode mode,
-                                struct clock_event_device *evt)
+				struct clock_event_device *evt)
 {
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC: {
 		unsigned long tcount = ((get_cclk() / (HZ * TIME_SCALE)) - 1);
 		bfin_write_TCNTL(TMPWR);
-		bfin_write_TSCALE(TIME_SCALE - 1);
 		CSYNC();
+		bfin_write_TSCALE(TIME_SCALE - 1);
 		bfin_write_TPERIOD(tcount);
 		bfin_write_TCOUNT(tcount);
-		bfin_write_TCNTL(TMPWR | TMREN | TAUTORLD);
 		CSYNC();
+		bfin_write_TCNTL(TMPWR | TMREN | TAUTORLD);
 		break;
 	}
 	case CLOCK_EVT_MODE_ONESHOT:
+		bfin_write_TCNTL(TMPWR);
+		CSYNC();
 		bfin_write_TSCALE(TIME_SCALE - 1);
+		bfin_write_TPERIOD(0);
 		bfin_write_TCOUNT(0);
-		bfin_write_TCNTL(TMPWR | TMREN);
-		CSYNC();
 		break;
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
@@ -132,6 +278,10 @@ static void bfin_timer_set_mode(enum clock_event_mode mode,
 	}
 }
 
+static void bfin_timer_ack(void)
+{
+}
+
 static void __init bfin_timer_init(void)
 {
 	/* power up the timer, but don't enable it just yet */
@@ -145,38 +295,32 @@ static void __init bfin_timer_init(void)
 	bfin_write_TPERIOD(0);
 	bfin_write_TCOUNT(0);
 
-	/* now enable the timer */
 	CSYNC();
 }
 
+static unsigned long  __init bfin_clockevent_check(void)
+{
+	setup_irq(IRQ_CORETMR, &bfin_timer_irq);
+	return get_cclk() / TIME_SCALE;
+}
+
+void __init setup_core_timer(void)
+{
+	bfin_timer_init();
+	bfin_timer_set_mode(CLOCK_EVT_MODE_PERIODIC, NULL);
+}
+#endif /* CONFIG_TICKSOURCE_GPTMR0 */
+
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
  */
-#ifdef CONFIG_CORE_TIMER_IRQ_L1
-__attribute__((l1_text))
-#endif
-irqreturn_t timer_interrupt(int irq, void *dev_id);
-
-static struct clock_event_device clockevent_bfin = {
-	.name		= "bfin_core_timer",
-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.shift		= 32,
-	.set_next_event = bfin_timer_set_next_event,
-	.set_mode	= bfin_timer_set_mode,
-};
-
-static struct irqaction bfin_timer_irq = {
-	.name		= "Blackfin Core Timer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-	.handler	= timer_interrupt,
-	.dev_id		= &clockevent_bfin,
-};
-
 irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *evt = dev_id;
+	smp_mb();
 	evt->event_handler(evt);
+	bfin_timer_ack();
 	return IRQ_HANDLED;
 }
 
@@ -184,9 +328,8 @@ static int __init bfin_clockevent_init(void)
 {
 	unsigned long timer_clk;
 
-	timer_clk = get_cclk() / TIME_SCALE;
+	timer_clk = bfin_clockevent_check();
 
-	setup_irq(IRQ_CORETMR, &bfin_timer_irq);
 	bfin_timer_init();
 
 	clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
@@ -218,6 +361,7 @@ void __init time_init(void)
 	xtime.tv_nsec = 0;
 	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
 
-	bfin_clocksource_init();
+	bfin_cs_cycles_init();
+	bfin_cs_gptimer0_init();
 	bfin_clockevent_init();
 }
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index 1bbacfb..daa02d4 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -31,7 +31,7 @@ static struct irqaction bfin_timer_irq = {
 #endif
 };
 
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0) || defined(CONFIG_IPIPE)
 void __init setup_system_timer0(void)
 {
 	/* Power down the core timer, just to play safe. */
@@ -74,7 +74,7 @@ void __init setup_core_timer(void)
 static void __init
 time_sched_init(irqreturn_t(*timer_routine) (int, void *))
 {
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0) || defined(CONFIG_IPIPE)
 	setup_system_timer0();
 	bfin_timer_irq.handler = timer_routine;
 	setup_irq(IRQ_TIMER0, &bfin_timer_irq);
@@ -94,7 +94,7 @@ static unsigned long gettimeoffset(void)
 	unsigned long offset;
 	unsigned long clocks_per_jiffy;
 
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0) || defined(CONFIG_IPIPE)
 	clocks_per_jiffy = bfin_read_TIMER0_PERIOD();
 	offset = bfin_read_TIMER0_COUNTER() / \
 		(((clocks_per_jiffy + 1) * HZ) / USEC_PER_SEC);
@@ -133,7 +133,7 @@ irqreturn_t timer_interrupt(int irq, void *dummy)
 	static long last_rtc_update;
 
 	write_seqlock(&xtime_lock);
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) && !defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0) && !defined(CONFIG_IPIPE)
 	/*
 	 * TIMIL0 is latched in __ipipe_grab_irq() when the I-Pipe is
 	 * enabled.
@@ -159,7 +159,7 @@ irqreturn_t timer_interrupt(int irq, void *dummy)
 				/* Do it again in 60s. */
 				last_rtc_update = xtime.tv_sec - 600;
 		}
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) && !defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0) && !defined(CONFIG_IPIPE)
 		set_gptimer_status(0, TIMER_STATUS_TIMIL0);
 	}
 #endif
diff --git a/arch/blackfin/mach-bf518/Kconfig b/arch/blackfin/mach-bf518/Kconfig
index f397ede..4c76fef 100644
--- a/arch/blackfin/mach-bf518/Kconfig
+++ b/arch/blackfin/mach-bf518/Kconfig
@@ -156,6 +156,7 @@ config IRQ_PORTH_INTB
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf527/Kconfig b/arch/blackfin/mach-bf527/Kconfig
index 8438ec6..848ac6f 100644
--- a/arch/blackfin/mach-bf527/Kconfig
+++ b/arch/blackfin/mach-bf527/Kconfig
@@ -170,6 +170,7 @@ config IRQ_PORTH_INTB
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf533/Kconfig b/arch/blackfin/mach-bf533/Kconfig
index 14427de..4c57244 100644
--- a/arch/blackfin/mach-bf533/Kconfig
+++ b/arch/blackfin/mach-bf533/Kconfig
@@ -59,6 +59,7 @@ config DMA7_UARTTX
 	default 10
 config TIMER0
 	int "TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config TIMER1
 	int "TIMER1"
diff --git a/arch/blackfin/mach-bf537/Kconfig b/arch/blackfin/mach-bf537/Kconfig
index bbc08fd..d81224f 100644
--- a/arch/blackfin/mach-bf537/Kconfig
+++ b/arch/blackfin/mach-bf537/Kconfig
@@ -66,6 +66,7 @@ config IRQ_MAC_TX
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf538/Kconfig b/arch/blackfin/mach-bf538/Kconfig
index f068c35..2d280f5 100644
--- a/arch/blackfin/mach-bf538/Kconfig
+++ b/arch/blackfin/mach-bf538/Kconfig
@@ -57,6 +57,7 @@ config IRQ_UART0_TX
 	default 10
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf548/Kconfig b/arch/blackfin/mach-bf548/Kconfig
index 8d24eaa..a09623d 100644
--- a/arch/blackfin/mach-bf548/Kconfig
+++ b/arch/blackfin/mach-bf548/Kconfig
@@ -257,6 +257,7 @@ config IRQ_OTPSEC
 	default 11
 config IRQ_TIMER0
 	int "IRQ_TIMER0"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "IRQ_TIMER1"
diff --git a/arch/blackfin/mach-bf561/Kconfig b/arch/blackfin/mach-bf561/Kconfig
index 6965dd5..cb97436 100644
--- a/arch/blackfin/mach-bf561/Kconfig
+++ b/arch/blackfin/mach-bf561/Kconfig
@@ -125,6 +125,7 @@ config IRQ_DMA2_11
 	default 9
 config IRQ_TIMER0
 	int "TIMER 0  Interrupt"
+	default 7 if TICKSOURCE_GPTMR0
 	default 8
 config IRQ_TIMER1
 	int "TIMER 1  Interrupt"
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
index 4ea13d2..8c10701 100644
--- a/arch/blackfin/mach-bf561/smp.c
+++ b/arch/blackfin/mach-bf561/smp.c
@@ -133,7 +133,7 @@ void __init platform_request_ipi(irq_handler_t handler)
 	int ret;
 
 	ret = request_irq(IRQ_SUPPLE_0, handler, IRQF_DISABLED,
-			  "SMP interrupt", handler);
+			  "Supplemental Interrupt0", handler);
 	if (ret)
 		panic("Cannot request supplemental interrupt 0 for IPI service");
 }
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 389c5e8..6e80861 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -1052,7 +1052,7 @@ int __init init_arch_irq(void)
 			set_irq_chained_handler(irq, bfin_demux_error_irq);
 			break;
 #endif
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
 		case IRQ_TIMER0:
 			set_irq_handler(irq, handle_percpu_irq);
 			break;
@@ -1232,13 +1232,9 @@ asmlinkage int __ipipe_grab_irq(int vec, struct pt_regs *regs)
 
 	if (likely(vec == EVT_IVTMR_P)) {
 		irq = IRQ_CORETMR;
-		goto core_tick;
-	}
-
-	SSYNC();
 
+	} else {
 #if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561)
-	{
 		unsigned long sic_status[3];
 
 		sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0();
@@ -1254,9 +1250,7 @@ asmlinkage int __ipipe_grab_irq(int vec, struct pt_regs *regs)
 			if (sic_status[(ivg->irqno - IVG7) / 32] & ivg->isrflag)
 				break;
 		}
-	}
 #else
-	{
 		unsigned long sic_status;
 
 		sic_status = bfin_read_SIC_IMASK() & bfin_read_SIC_ISR();
@@ -1268,15 +1262,13 @@ asmlinkage int __ipipe_grab_irq(int vec, struct pt_regs *regs)
 			} else if (sic_status & ivg->isrflag)
 				break;
 		}
-	}
 #endif
 
-	irq = ivg->irqno;
+		irq = ivg->irqno;
+	}
 
 	if (irq == IRQ_SYSTMR) {
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
-core_tick:
-#else
+#ifndef CONFIG_GENERIC_CLOCKEVENTS
 		bfin_write_TIMER_STATUS(1); /* Latch TIMIL0 */
 #endif
 		/* This is basically what we need from the register frame. */
@@ -1288,9 +1280,6 @@ core_tick:
 			__raw_get_cpu_var(__ipipe_tick_regs).ipend |= 0x10;
 	}
 
-#ifndef CONFIG_GENERIC_CLOCKEVENTS
-core_tick:
-#endif
 	if (this_domain == ipipe_root_domain) {
 		s = __test_and_set_bit(IPIPE_SYNCDEFER_FLAG, &p->status);
 		barrier();
@@ -1308,7 +1297,7 @@ core_tick:
 		}
 	}
 
-       return 0;
+	return 0;
 }
 
 #endif /* CONFIG_IPIPE */
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 187c79e..b669595 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -43,6 +43,7 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/cpu.h>
+#include <asm/time.h>
 #include <linux/err.h>
 
 /*
@@ -356,7 +357,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 static void __cpuinit setup_secondary(unsigned int cpu)
 {
-#if !(defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE))
+#if !defined(CONFIG_TICKSOURCE_GPTMR0)
 	struct irq_desc *timer_desc;
 #endif
 	unsigned long ilat;
@@ -377,7 +378,7 @@ static void __cpuinit setup_secondary(unsigned int cpu)
 	    IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 |
 	    IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW;
 
-#if defined(CONFIG_TICK_SOURCE_SYSTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_TICKSOURCE_GPTMR0)
 	/* Power down the core timer, just to play safe. */
 	bfin_write_TCNTL(0);
 
-- 
cgit v0.10.2


From 8af7ffa0d5460586e0f06b2f045a6a2631224b61 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Fri, 8 May 2009 07:42:12 +0000
Subject: Blackfin: add workaround for anomaly 05000287

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/cplb-nompu/cacheinit.c b/arch/blackfin/kernel/cplb-nompu/cacheinit.c
index c6ff947..d5a86c3 100644
--- a/arch/blackfin/kernel/cplb-nompu/cacheinit.c
+++ b/arch/blackfin/kernel/cplb-nompu/cacheinit.c
@@ -55,7 +55,14 @@ void __cpuinit bfin_dcache_init(struct cplb_entry *dcplb_tbl)
 	}
 
 	ctrl = bfin_read_DMEM_CONTROL();
-	ctrl |= DMEM_CNTR;
+
+	/*
+	 *  Anomaly notes:
+	 *  05000287 - We implement workaround #2 - Change the DMEM_CONTROL
+	 *  register, so that the port preferences for DAG0 and DAG1 are set
+	 *  to port B
+	 */
+	ctrl |= DMEM_CNTR | PORT_PREF0 | (ANOMALY_05000287 ? PORT_PREF1 : 0);
 	bfin_write_DMEM_CONTROL(ctrl);
 	SSYNC();
 }
-- 
cgit v0.10.2


From a17c7f6f5b3b3f91ff7121c33bea8748c415ab15 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Tue, 5 May 2009 17:14:39 +0000
Subject: Blackfin: make sure MPU CPLB for first 1k is marked as valid

This way we properly catch and kill applications that jump to a NULL ptr.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/cplb-mpu/cplbinit.c b/arch/blackfin/kernel/cplb-mpu/cplbinit.c
index 3e329a6..c006a44 100644
--- a/arch/blackfin/kernel/cplb-mpu/cplbinit.c
+++ b/arch/blackfin/kernel/cplb-mpu/cplbinit.c
@@ -64,7 +64,7 @@ void __init generate_cplb_tables_cpu(unsigned int cpu)
 	dcplb_tbl[cpu][i_d++].data = SDRAM_OOPS | PAGE_SIZE_1KB;
 
 	icplb_tbl[cpu][i_i].addr = 0;
-	icplb_tbl[cpu][i_i++].data = i_cache | CPLB_USER_RD | PAGE_SIZE_1KB;
+	icplb_tbl[cpu][i_i++].data = CPLB_VALID | i_cache | CPLB_USER_RD | PAGE_SIZE_1KB;
 
 	/* Cover kernel memory with 4M pages.  */
 	addr = 0;
-- 
cgit v0.10.2


From a261eec0009b6093727fb7a59b12a10c6c981714 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 20 May 2009 14:05:36 +0000
Subject: Blackfin: rename some Blackfin drivers

Try to keep the naming conventions consistent, so:
	SPI_ADC_BF533 -> BFIN_SPI_ADC
	TWI_LCD       -> BFIN_TWI_LCD

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c
index 602dce0..62bba09 100644
--- a/arch/blackfin/mach-bf518/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf518/boards/ezbrd.c
@@ -166,8 +166,8 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -253,8 +253,8 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -518,7 +518,7 @@ static struct platform_device i2c_bfin_twi_device = {
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf527/boards/cm_bf527.c b/arch/blackfin/mach-bf527/boards/cm_bf527.c
index 48e69ee..6d6f9ef 100644
--- a/arch/blackfin/mach-bf527/boards/cm_bf527.c
+++ b/arch/blackfin/mach-bf527/boards/cm_bf527.c
@@ -463,8 +463,8 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -554,8 +554,8 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -789,7 +789,7 @@ static struct platform_device i2c_bfin_twi_device = {
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 		.type = "pcf8574_lcd",
diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index 7fe480e..1435c5d 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -247,8 +247,8 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -354,8 +354,8 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -586,7 +586,7 @@ static struct platform_device i2c_bfin_twi_device = {
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index 9ab8d8c..147edd1 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -485,8 +485,8 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -600,8 +600,8 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -854,7 +854,7 @@ static struct platform_device i2c_bfin_twi_device = {
 #endif
 
 static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c
index 0c66bf4..895f213 100644
--- a/arch/blackfin/mach-bf533/boards/H8606.c
+++ b/arch/blackfin/mach-bf533/boards/H8606.c
@@ -173,7 +173,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.ctl_reg = 0x1000,
@@ -216,7 +216,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 4,     /* actual baudrate is SCLK/(2xspeed_hz) */
diff --git a/arch/blackfin/mach-bf533/boards/cm_bf533.c b/arch/blackfin/mach-bf533/boards/cm_bf533.c
index e897487..a727e53 100644
--- a/arch/blackfin/mach-bf533/boards/cm_bf533.c
+++ b/arch/blackfin/mach-bf533/boards/cm_bf533.c
@@ -82,7 +82,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 #endif
 
 /* SPI ADC chip */
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
 	.bits_per_word = 16,
@@ -117,7 +117,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf533/boards/ezkit.c b/arch/blackfin/mach-bf533/boards/ezkit.c
index 08cd096..842f1c9 100644
--- a/arch/blackfin/mach-bf533/boards/ezkit.c
+++ b/arch/blackfin/mach-bf533/boards/ezkit.c
@@ -118,7 +118,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -154,7 +154,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index db96f33..e19c565 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c
@@ -192,7 +192,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -237,7 +237,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -448,7 +448,7 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 		.irq = 39,
 	},
 #endif
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537.c b/arch/blackfin/mach-bf537/boards/cm_bf537.c
index 41c75b9..4fee196 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537.c
@@ -86,7 +86,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -129,7 +129,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf537/boards/pnav10.c b/arch/blackfin/mach-bf537/boards/pnav10.c
index 4e1de1e..26707ce 100644
--- a/arch/blackfin/mach-bf537/boards/pnav10.c
+++ b/arch/blackfin/mach-bf537/boards/pnav10.c
@@ -265,8 +265,8 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -333,8 +333,8 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 9bd0a80..f56289a 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -508,8 +508,8 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -695,8 +695,8 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 		.mode = SPI_MODE_3,
 	},
 #endif
-#if defined(CONFIG_SPI_ADC_BF533) \
-	|| defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) \
+	|| defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
@@ -1280,7 +1280,7 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 		.irq = IRQ_PF5,
 	},
 #endif
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index 53ad10f..2805745 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -86,7 +86,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -129,7 +129,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 096e661..2780e41 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -786,7 +786,7 @@ static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
 
 #if !defined(CONFIG_BF542)	/* The BF542 only has 1 TWI */
 static struct i2c_board_info __initdata bfin_i2c_board_info1[] = {
-#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
+#if defined(CONFIG_BFIN_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE)
 	{
 		I2C_BOARD_INFO("pcf8574_lcd", 0x22),
 	},
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index f623c6b..0dd9685 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -83,7 +83,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 };
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 /* SPI ADC chip */
 static struct bfin5xx_spi_chip spi_adc_chip_info = {
 	.enable_dma = 1,         /* use dma transfer with this chip*/
@@ -126,7 +126,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SPI_ADC_BF533) || defined(CONFIG_SPI_ADC_BF533_MODULE)
+#if defined(CONFIG_BFIN_SPI_ADC) || defined(CONFIG_BFIN_SPI_ADC_MODULE)
 	{
 		.modalias = "bfin_spi_adc", /* Name of spi_driver for this device */
 		.max_speed_hz = 6250000,     /* max spi clock (SCK) speed in HZ */
-- 
cgit v0.10.2


From 5ba766752d14a741aa2d7a3c321917a310b34afb Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Thu, 7 May 2009 04:09:15 +0000
Subject: Blackfin: work around anomaly 05000220

When possible, work around anomaly 05000220 (external memory is write
back cached, but L2 is not cached).  If not possible, detect the
conditions at build time and reject any qualifying configurations.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c04e7a4..f1a7969 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -913,7 +913,7 @@ config BFIN_ICACHE_LOCK
 	bool "Enable Instruction Cache Locking"
 
 choice
-	prompt "Policy"
+	prompt "External memory cache policy"
 	depends on BFIN_DCACHE
 	default BFIN_WB if !SMP
 	default BFIN_WT if SMP
@@ -954,12 +954,22 @@ config BFIN_WT
 
 endchoice
 
-config BFIN_L2_CACHEABLE
-	bool "Cache L2 SRAM"
-	depends on (BFIN_DCACHE || BFIN_ICACHE) && (BF54x || (BF561 && !SMP))
-	default n
-	help
-	  Select to make L2 SRAM cacheable in L1 data and instruction cache.
+choice
+	prompt "L2 SRAM cache policy"
+	depends on (BF54x || BF561)
+	default BFIN_L2_WT
+config BFIN_L2_WB
+	bool "Write back"
+	depends on !SMP
+
+config BFIN_L2_WT
+	bool "Write through"
+	depends on !SMP
+
+config BFIN_L2_NOT_CACHED
+	bool "Not cached"
+
+endchoice
 
 config MPU
 	bool "Enable the memory protection unit (EXPERIMENTAL)"
diff --git a/arch/blackfin/include/asm/cplb.h b/arch/blackfin/include/asm/cplb.h
index ad566ff..a75a6a9 100644
--- a/arch/blackfin/include/asm/cplb.h
+++ b/arch/blackfin/include/asm/cplb.h
@@ -53,29 +53,32 @@
 #define SDRAM_DGENERIC   (CPLB_L1_CHBL | CPLB_WT | CPLB_L1_AOW  | CPLB_COMMON)
 #endif
 
+#define SDRAM_DNON_CHBL  (CPLB_COMMON)
+#define SDRAM_EBIU       (CPLB_COMMON)
+#define SDRAM_OOPS       (CPLB_VALID | ANOMALY_05000158_WORKAROUND | CPLB_LOCK | CPLB_DIRTY)
+
 #define L1_DMEMORY       (CPLB_LOCK | CPLB_COMMON)
 
 #ifdef CONFIG_SMP
-#define L2_ATTR           (INITIAL_T | I_CPLB | D_CPLB)
-#define L2_IMEMORY         (CPLB_COMMON | CPLB_LOCK)
-#define L2_DMEMORY         (CPLB_COMMON | CPLB_LOCK)
+#define L2_ATTR          (INITIAL_T | I_CPLB | D_CPLB)
+#define L2_IMEMORY       (CPLB_COMMON)
+#define L2_DMEMORY       (CPLB_LOCK | CPLB_COMMON)
 
 #else
-#ifdef CONFIG_BFIN_L2_CACHEABLE
-#define L2_IMEMORY        (SDRAM_IGENERIC)
-#define L2_DMEMORY        (SDRAM_DGENERIC)
-#else
-#define L2_IMEMORY        (CPLB_COMMON)
-#define L2_DMEMORY        (CPLB_COMMON)
-#endif /* CONFIG_BFIN_L2_CACHEABLE */
-
-#define L2_ATTR           (INITIAL_T | SWITCH_T | I_CPLB | D_CPLB)
+#define L2_ATTR          (INITIAL_T | SWITCH_T | I_CPLB | D_CPLB)
+#define L2_IMEMORY       (SDRAM_IGENERIC)
+
+# if defined(CONFIG_BFIN_L2_WB)
+# define L2_DMEMORY      (CPLB_L1_CHBL | CPLB_COMMON)
+# elif defined(CONFIG_BFIN_L2_WT)
+# define L2_DMEMORY      (CPLB_L1_CHBL | CPLB_WT | CPLB_L1_AOW  | CPLB_COMMON)
+# elif defined(CONFIG_BFIN_L2_NOT_CACHED)
+# define L2_DMEMORY      (CPLB_COMMON)
+# else
+# define L2_DMEMORY      (0)
+# endif
 #endif /* CONFIG_SMP */
 
-#define SDRAM_DNON_CHBL  (CPLB_COMMON)
-#define SDRAM_EBIU       (CPLB_COMMON)
-#define SDRAM_OOPS       (CPLB_VALID | ANOMALY_05000158_WORKAROUND | CPLB_LOCK | CPLB_DIRTY)
-
 #define SIZE_1K 0x00000400      /* 1K */
 #define SIZE_4K 0x00001000      /* 4K */
 #define SIZE_1M 0x00100000      /* 1M */
diff --git a/arch/blackfin/mach-common/arch_checks.c b/arch/blackfin/mach-common/arch_checks.c
index 80d39b2..da93d92 100644
--- a/arch/blackfin/mach-common/arch_checks.c
+++ b/arch/blackfin/mach-common/arch_checks.c
@@ -71,3 +71,10 @@
 #if ANOMALY_05000448
 # error You are using a part with anomaly 05000448, this issue causes random memory read/write failures - that means random crashes.
 #endif
+
+/* if 220 exists, can not set External Memory WB and L2 not_cached, either External Memory not_cached and L2 WB */
+#if ANOMALY_05000220 && \
+	((defined(CONFIG_BFIN_WB) && defined(CONFIG_BFIN_L2_NOT_CACHED)) || \
+	 (!defined(CONFIG_BFIN_DCACHE) && defined(CONFIG_BFIN_L2_WB)))
+# error You are exposing Anomaly 220 in this config, either config L2 as Write Through, or make External Memory WB.
+#endif
-- 
cgit v0.10.2


From 97b070c8e7e82be30c8a3bf19e69b8c0c71f1fac Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 24 Apr 2009 03:17:07 +0000
Subject: Blackfin: add note about anomaly 05000242 being worked around

Document anomaly 05000242 workaround in source code.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-common/clocks-init.c b/arch/blackfin/mach-common/clocks-init.c
index 3539365..ef6870e 100644
--- a/arch/blackfin/mach-common/clocks-init.c
+++ b/arch/blackfin/mach-common/clocks-init.c
@@ -72,6 +72,7 @@ void init_clocks(void)
 #endif
 	bfin_write_PLL_LOCKCNT(0x300);
 	do_sync();
+	/* We always write PLL_CTL thus avoiding Anomaly 05000242 */
 	bfin_write16(PLL_CTL, PLL_CTL_VAL);
 	__asm__ __volatile__("IDLE;");
 	bfin_write_PLL_DIV(CONFIG_CCLK_ACT_DIV | CONFIG_SCLK_DIV);
-- 
cgit v0.10.2


From b9a3899d59c3f0fc074573f0eba2419b1e4c0bca Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Mon, 18 May 2009 18:33:26 +0000
Subject: Blackfin: make deferred hardware errors more exact

Hardware errors on the Blackfin architecture are queued by nature of the
hardware design.  Things that could generate a hardware level queue up at
the system interface and might not process until much later, at which
point the system would send a notification back to the core.

As such, it is possible for user space code to do something that would
trigger a hardware error, but have it delay long enough for the process
context to switch.  So when the hardware error does signal, we mistakenly
evaluate it as a different process or as kernel context and panic (erp!).
This makes it pretty difficult to find the offending context.  But wait,
there is good news somewhere.

By forcing a SSYNC in the interrupt entry, we force all pending queues at
the system level to be processed and all hardware errors to be signaled.
Then we check the current interrupt state to see if the hardware error is
now signaled.  If so, we re-queue the current interrupt and return thus
allowing the higher priority hardware error interrupt to process properly.
Since we haven't done any other context processing yet, the right context
will be selected and killed.  There is still the possibility that the
exact offending instruction will be unknown, but at least we'll have a
much better idea of where to look.

The downside of course is that this causes system-wide syncs at every
interrupt point which results in significant performance degradation.
Since this situation should not occur in any properly configured system
(as hardware errors are triggered by things like bad pointers), make it a
debug configuration option and disable it by default.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index 79e7e63..1fc4981 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -54,6 +54,19 @@ config DEBUG_HWERR
 	  hardware error interrupts and need to know where they are coming
 	  from.
 
+config EXACT_HWERR
+	bool "Try to make Hardware errors exact"
+	depends on DEBUG_HWERR
+	help
+	  By default, the Blackfin hardware errors are not exact - the error
+          be reported multiple cycles after the error happens. This delay
+	  can cause the wrong application, or even the kernel to receive a
+	  signal to be killed. If you are getting HW errors in your system,
+	  try turning this on to ensure they are at least comming from the
+	  proper thread.
+
+	  On production systems, it is safe (and a small optimization) to say N.
+
 config DEBUG_DOUBLEFAULT
 	bool "Debug Double Faults"
 	default n
diff --git a/arch/blackfin/include/asm/entry.h b/arch/blackfin/include/asm/entry.h
index b30a296..ec58efc 100644
--- a/arch/blackfin/include/asm/entry.h
+++ b/arch/blackfin/include/asm/entry.h
@@ -35,21 +35,39 @@
 #else
 # define LOAD_IPIPE_IPEND
 #endif
+
+#ifndef CONFIG_EXACT_HWERR
+/* As a debugging aid - we save IPEND when DEBUG_KERNEL is on,
+ * otherwise it is a waste of cycles.
+ */
+# ifndef CONFIG_DEBUG_KERNEL
+#define INTERRUPT_ENTRY(N)						\
+    [--sp] = SYSCFG;							\
+    [--sp] = P0;	/*orig_p0*/					\
+    [--sp] = R0;	/*orig_r0*/					\
+    [--sp] = (R7:0,P5:0);						\
+    R0 = (N);								\
+    LOAD_IPIPE_IPEND							\
+    jump __common_int_entry;
+# else /* CONFIG_DEBUG_KERNEL */
 #define INTERRUPT_ENTRY(N)						\
     [--sp] = SYSCFG;							\
-									\
     [--sp] = P0;	/*orig_p0*/					\
     [--sp] = R0;	/*orig_r0*/					\
     [--sp] = (R7:0,P5:0);						\
+    p0.l = lo(IPEND);							\
+    p0.h = hi(IPEND);							\
+    r1 = [p0];								\
     R0 = (N);								\
     LOAD_IPIPE_IPEND							\
     jump __common_int_entry;
+# endif /* CONFIG_DEBUG_KERNEL */
 
 /* For timer interrupts, we need to save IPEND, since the user_mode
-	   macro accesses it to determine where to account time.  */
+ *macro accesses it to determine where to account time.
+ */
 #define TIMER_INTERRUPT_ENTRY(N)					\
     [--sp] = SYSCFG;							\
-									\
     [--sp] = P0;	/*orig_p0*/					\
     [--sp] = R0;	/*orig_r0*/					\
     [--sp] = (R7:0,P5:0);						\
@@ -58,6 +76,74 @@
     r1 = [p0];								\
     R0 = (N);								\
     jump __common_int_entry;
+#else /* CONFIG_EXACT_HWERR is defined */
+
+/* if we want hardware error to be exact, we need to do a SSYNC (which forces
+ * read/writes to complete to the memory controllers), and check to see that
+ * caused a pending HW error condition. If so, we assume it was caused by user
+ * space, by setting the same interrupt that we are in (so it goes off again)
+ * and context restore, and a RTI (without servicing anything). This should
+ * cause the pending HWERR to fire, and when that is done, this interrupt will
+ * be re-serviced properly.
+ * As you can see by the code - we actually need to do two SSYNCS - one to
+ * make sure the read/writes complete, and another to make sure the hardware
+ * error is recognized by the core.
+ */
+#define INTERRUPT_ENTRY(N)						\
+    SSYNC;								\
+    SSYNC;								\
+    [--sp] = SYSCFG;							\
+    [--sp] = P0;	/*orig_p0*/					\
+    [--sp] = R0;	/*orig_r0*/					\
+    [--sp] = (R7:0,P5:0);						\
+    R1 = ASTAT;								\
+    P0.L = LO(ILAT);							\
+    P0.H = HI(ILAT);							\
+    R0 = [P0];								\
+    CC = BITTST(R0, EVT_IVHW_P);					\
+    IF CC JUMP 1f;							\
+    ASTAT = R1;								\
+    p0.l = lo(IPEND);							\
+    p0.h = hi(IPEND);							\
+    r1 = [p0];								\
+    R0 = (N);								\
+    LOAD_IPIPE_IPEND							\
+    jump __common_int_entry;						\
+1:  ASTAT = R1;								\
+    RAISE N;								\
+    (R7:0, P5:0) = [SP++];						\
+    SP += 0x8;								\
+    SYSCFG = [SP++];							\
+    CSYNC;								\
+    RTI;
+
+#define TIMER_INTERRUPT_ENTRY(N)					\
+    SSYNC;								\
+    SSYNC;								\
+    [--sp] = SYSCFG;							\
+    [--sp] = P0;	/*orig_p0*/					\
+    [--sp] = R0;	/*orig_r0*/					\
+    [--sp] = (R7:0,P5:0);						\
+    R1 = ASTAT;								\
+    P0.L = LO(ILAT);							\
+    P0.H = HI(ILAT);							\
+    R0 = [P0];								\
+    CC = BITTST(R0, EVT_IVHW_P);					\
+    IF CC JUMP 1f;							\
+    ASTAT = R1;								\
+    p0.l = lo(IPEND);							\
+    p0.h = hi(IPEND);							\
+    r1 = [p0];								\
+    R0 = (N);								\
+    jump __common_int_entry;						\
+1:  ASTAT = R1;								\
+    RAISE N;								\
+    (R7:0, P5:0) = [SP++];						\
+    SP += 0x8;								\
+    SYSCFG = [SP++];							\
+    CSYNC;								\
+    RTI;
+#endif	/* CONFIG_EXACT_HWERR */
 
 /* This one pushes RETI without using CLI.  Interrupts are enabled.  */
 #define SAVE_CONTEXT_SYSCALL	save_context_syscall
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index f0636fd..da0558a 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -200,7 +200,18 @@ ENTRY(_ex_single_step)
 	cc = r7 == 0;
 	if !cc jump 1f;
 #endif
-
+#ifdef CONFIG_EXACT_HWERR
+	/* Read the ILAT, and to check to see if the process we are
+	 * single stepping caused a previous hardware error
+	 * If so, do not single step, (which lowers to IRQ5, and makes
+	 * us miss the error).
+	 */
+	p5.l = lo(ILAT);
+	p5.h = hi(ILAT);
+	r7 = [p5];
+	cc = bittst(r7, EVT_IVHW_P);
+	if cc jump 1f;
+#endif
 	/* Single stepping only a single instruction, so clear the trace
 	 * bit here.  */
 	r7 = syscfg;
@@ -262,15 +273,6 @@ ENTRY(_bfin_return_from_exception)
 	r6 = 0x25;
 	CC = R7 == R6;
 	if CC JUMP _double_fault;
-
-	/* Did we cause a HW error? */
-	p5.l = lo(ILAT);
-	p5.h = hi(ILAT);
-	r6 = [p5];
-	r7 = 0x20;		/* Did I just cause anther HW error? */
-	r6 = r7 & r6;
-	CC = R7 == R6;
-	if CC JUMP _double_fault;
 #endif
 
 	(R7:6,P5:4) = [sp++];
@@ -472,6 +474,16 @@ ENTRY(_trap) /* Exception: 4th entry into system event table(supervisor mode)*/
 	[--sp] = ASTAT;
 	[--sp] = (R7:6,P5:4);
 
+#ifdef CONFIG_EXACT_HWERR
+	/* Make sure all pending read/writes complete. This will ensure any
+	 * accesses which could cause hardware errors completes, and signal
+	 * the the hardware before we do something silly, like crash the
+	 * kernel. We don't need to work around anomaly 05000312, since
+	 * we are already atomic
+	 */
+	ssync;
+#endif
+
 #if ANOMALY_05000283 || ANOMALY_05000315
 	cc = r7 == r7;
 	p5.h = HI(CHIPID);
@@ -854,7 +866,7 @@ ENTRY(_ret_from_exception)
 	p1.h = _schedule_and_signal;
 	[p0] = p1;
 	csync;
-	raise 15;		/* raise evt14 to do signal or reschedule */
+	raise 15;		/* raise evt15 to do signal or reschedule */
 4:
 	r0 = syscfg;
 	bitclr(r0, 0);
@@ -915,7 +927,7 @@ ENTRY(_return_from_int)
 	p1.h = _schedule_and_signal_from_int;
 	[p0] = p1;
 	csync;
-#if ANOMALY_05000281
+#if ANOMALY_05000281 || ANOMALY_05000461
 	r0.l = lo(SAFE_USER_INSTRUCTION);
 	r0.h = hi(SAFE_USER_INSTRUCTION);
 	reti = r0;
@@ -929,18 +941,27 @@ ENTRY(_return_from_int)
 ENDPROC(_return_from_int)
 
 ENTRY(_lower_to_irq14)
-#if ANOMALY_05000281
+#if ANOMALY_05000281 || ANOMALY_05000461
 	r0.l = lo(SAFE_USER_INSTRUCTION);
 	r0.h = hi(SAFE_USER_INSTRUCTION);
 	reti = r0;
 #endif
-	r0 = 0x401f;
+
+#ifdef CONFIG_DEBUG_HWERR
+	/* enable irq14 & hwerr interrupt, until we transition to _evt14_softirq */
+	r0 = (EVT_IVG14 | EVT_IVHW | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
+#else
+	/* Only enable irq14 interrupt, until we transition to _evt14_softirq */
+	r0 = (EVT_IVG14 | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
+#endif
 	sti r0;
 	raise 14;
 	rti;
+ENDPROC(_lower_to_irq14)
+
 ENTRY(_evt14_softirq)
 #ifdef CONFIG_DEBUG_HWERR
-	r0 = 0x3f;
+	r0 = (EVT_IVHW | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
 	sti r0;
 #else
 	cli r0;
@@ -948,8 +969,9 @@ ENTRY(_evt14_softirq)
 	[--sp] = RETI;
 	SP += 4;
 	rts;
+ENDPROC(_evt14_softirq)
 
-_schedule_and_signal_from_int:
+ENTRY(_schedule_and_signal_from_int)
 	/* To end up here, vector 15 was changed - so we have to change it
 	 * back.
 	 */
@@ -982,8 +1004,9 @@ _schedule_and_signal_from_int:
 	call _finish_atomic_sections;
 	sp += 12;
 	jump.s .Lresume_userspace;
+ENDPROC(_schedule_and_signal_from_int)
 
-_schedule_and_signal:
+ENTRY(_schedule_and_signal)
 	SAVE_CONTEXT_SYSCALL
 	/* To end up here, vector 15 was changed - so we have to change it
 	 * back.
@@ -1001,7 +1024,7 @@ _schedule_and_signal:
 1:
 	RESTORE_CONTEXT
 	rti;
-ENDPROC(_lower_to_irq14)
+ENDPROC(_schedule_and_signal)
 
 /* We handle this 100% in exception space - to reduce overhead
  * Only potiential problem is if the software buffer gets swapped out of the
diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S
index 0069c2d..9c46680 100644
--- a/arch/blackfin/mach-common/interrupt.S
+++ b/arch/blackfin/mach-common/interrupt.S
@@ -145,6 +145,14 @@ __common_int_entry:
 
 /* interrupt routine for ivhw - 5 */
 ENTRY(_evt_ivhw)
+	/* In case a single action kicks off multiple memory transactions, (like
+	 * a cache line fetch, - this can cause multiple hardware errors, let's
+	 * catch them all. First - make sure all the actions are complete, and
+	 * the core sees the hardware errors.
+	 */
+	SSYNC;
+	SSYNC;
+
 	SAVE_ALL_SYS
 #ifdef CONFIG_FRAME_POINTER
 	fp = 0;
@@ -159,6 +167,25 @@ ENTRY(_evt_ivhw)
 1:
 #endif
 
+	/* Handle all stacked hardware errors
+	 * To make sure we don't hang forever, only do it 10 times
+	 */
+	R0 = 0;
+	R2 = 10;
+1:
+	P0.L = LO(ILAT);
+	P0.H = HI(ILAT);
+	R1 = [P0];
+	CC = BITTST(R1, EVT_IVHW_P);
+	IF ! CC JUMP 2f;
+	/* OK a hardware error is pending - clear it */
+	R1 = EVT_IVHW_P;
+	[P0] = R1;
+	R0 += 1;
+	CC = R1 == R2;
+	if CC JUMP 2f;
+	JUMP 1b;
+2:
 	# We are going to dump something out, so make sure we print IPEND properly
 	p2.l = lo(IPEND);
 	p2.h = hi(IPEND);
-- 
cgit v0.10.2


From a0cab65642813b7990e1b4b2ab6ad92e171571f4 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Mon, 11 May 2009 18:34:41 +0000
Subject: Blackfin: make sure stack is accessible before dumping it

When displaying a crash dump, make sure accessing the stack is safe so
we don't crash at the same time.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index fb57742..0d04a47 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -196,6 +196,11 @@ done:
 
 asmlinkage void double_fault_c(struct pt_regs *fp)
 {
+#ifdef CONFIG_DEBUG_BFIN_HWTRACE_ON
+	int j;
+	trace_buffer_save(j);
+#endif
+
 	console_verbose();
 	oops_in_progress = 1;
 #ifdef CONFIG_DEBUG_VERBOSE
@@ -220,6 +225,7 @@ asmlinkage void double_fault_c(struct pt_regs *fp)
 		dump_bfin_process(fp);
 		dump_bfin_mem(fp);
 		show_regs(fp);
+		dump_bfin_trace_buffer();
 	}
 #endif
 	panic("Double Fault - unrecoverable event");
@@ -832,6 +838,11 @@ void show_stack(struct task_struct *task, unsigned long *stack)
 	decode_address(buf, (unsigned int)stack);
 	printk(KERN_NOTICE " SP: [0x%p] %s\n", stack, buf);
 
+	if (!access_ok(VERIFY_READ, stack, (unsigned int)endstack - (unsigned int)stack)) {
+		printk(KERN_NOTICE "Invalid stack pointer\n");
+		return;
+	}
+
 	/* First thing is to look for a frame pointer */
 	for (addr = (unsigned int *)((unsigned int)stack & ~0xF); addr < endstack; addr++) {
 		if (*addr & 0x1)
-- 
cgit v0.10.2


From 0acad8dfee6bde7e246a95a52f864a8eee777ed8 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Mon, 11 May 2009 18:55:16 +0000
Subject: Blackfin: add workaround for anomaly 05000461

Returning too fast with a bad RETI can trigger false errors.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 0d04a47..778a756 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -594,6 +594,9 @@ asmlinkage void trap_c(struct pt_regs *fp)
 		force_sig_info(sig, &info, current);
 	}
 
+	if (ANOMALY_05000461 && trapnr == VEC_HWERR && !access_ok(VERIFY_READ, fp->pc, 8))
+		fp->pc = SAFE_USER_INSTRUCTION;
+
 	trace_buffer_restore(j);
 	return;
 }
-- 
cgit v0.10.2


From 9ba3c24f10c948dadac2ca91ed714dfbcedd61ca Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Wed, 20 May 2009 21:19:21 +0000
Subject: Blackfin: include system/processor info in dump messages

People often copy & paste crash messages without surrounding context, so
include common useful information like system/processor stats in the crash
summary.  This should smooth over the report/test cycle a bit more.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 778a756..35f675f 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -1080,6 +1080,29 @@ void show_regs(struct pt_regs *fp)
 	unsigned int cpu = smp_processor_id();
 	unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
 
+	verbose_printk(KERN_NOTICE "\n");
+	if (CPUID != bfin_cpuid())
+		verbose_printk(KERN_NOTICE "Compiled for cpu family 0x%04x (Rev %d), "
+			"but running on:0x%04x (Rev %d)\n",
+			CPUID, bfin_compiled_revid(), bfin_cpuid(), bfin_revid());
+
+	verbose_printk(KERN_NOTICE "ADSP-%s-0.%d",
+		CPU, bfin_compiled_revid());
+
+	if (bfin_compiled_revid() !=  bfin_revid())
+		verbose_printk("(Detected 0.%d)", bfin_revid());
+
+	verbose_printk(" %lu(MHz CCLK) %lu(MHz SCLK) (%s)\n",
+		get_cclk()/1000000, get_sclk()/1000000,
+#ifdef CONFIG_MPU
+		"mpu on"
+#else
+		"mpu off"
+#endif
+		);
+
+	verbose_printk(KERN_NOTICE "%s", linux_banner);
+
 	verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "SEQUENCER STATUS:\t\t%s\n", print_tainted());
 	verbose_printk(KERN_NOTICE " SEQSTAT: %08lx  IPEND: %04lx  SYSCFG: %04lx\n",
 		(long)fp->seqstat, fp->ipend, fp->syscfg);
-- 
cgit v0.10.2


From c8d5ea8ccb1ce586131c6c549899cb5073222da0 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 4 May 2009 09:11:37 +0000
Subject: Blackfin: update gptimers API

First we fix the prototypes for functions that return boolean values by
using "int" rather than "uint16_t".  Then we introduce a get_gptimer_run()
function for checking the current run status of a timer, and then we add a
disable_gptimers_sync() function which parallels disable_gptimers() with
corresponding normal "_sync" behavior.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/gptimers.h b/arch/blackfin/include/asm/gptimers.h
index b0f847a..89f08de 100644
--- a/arch/blackfin/include/asm/gptimers.h
+++ b/arch/blackfin/include/asm/gptimers.h
@@ -30,6 +30,7 @@
 # else
 #  define MAX_BLACKFIN_GPTIMERS 11
 #  define TIMER8_GROUP_REG      TIMER_ENABLE1
+#  define TIMER_GROUP2          1
 # endif
 # define TIMER0_GROUP_REG       TIMER_ENABLE0
 #endif
@@ -40,10 +41,12 @@
 # define MAX_BLACKFIN_GPTIMERS 12
 # define TIMER0_GROUP_REG      TMRS8_ENABLE
 # define TIMER8_GROUP_REG      TMRS4_ENABLE
+# define TIMER_GROUP2          1
 #endif
 /*
  * All others: 3 timers:
  */
+#define TIMER_GROUP1           0
 #if !defined(MAX_BLACKFIN_GPTIMERS)
 # define MAX_BLACKFIN_GPTIMERS 3
 # define TIMER0_GROUP_REG      TIMER_ENABLE
@@ -109,8 +112,8 @@
 #define TIMER_ERR_PROG_PER  0x8000
 #define TIMER_ERR_PROG_PW   0xC000
 #define TIMER_EMU_RUN       0x0200
-#define	TIMER_TOGGLE_HI     0x0100
-#define	TIMER_CLK_SEL       0x0080
+#define TIMER_TOGGLE_HI     0x0100
+#define TIMER_CLK_SEL       0x0080
 #define TIMER_OUT_DIS       0x0040
 #define TIMER_TIN_SEL       0x0020
 #define TIMER_IRQ_ENA       0x0010
@@ -169,23 +172,25 @@
 
 /* The actual gptimer API */
 
-void     set_gptimer_pwidth    (int timer_id, uint32_t width);
-uint32_t get_gptimer_pwidth    (int timer_id);
-void     set_gptimer_period    (int timer_id, uint32_t period);
-uint32_t get_gptimer_period    (int timer_id);
-uint32_t get_gptimer_count     (int timer_id);
-uint16_t get_gptimer_intr      (int timer_id);
-void     clear_gptimer_intr    (int timer_id);
-uint16_t get_gptimer_over      (int timer_id);
-void     clear_gptimer_over    (int timer_id);
-void     set_gptimer_config    (int timer_id, uint16_t config);
-uint16_t get_gptimer_config    (int timer_id);
-void     set_gptimer_pulse_hi  (int timer_id);
+void     set_gptimer_pwidth(int timer_id, uint32_t width);
+uint32_t get_gptimer_pwidth(int timer_id);
+void     set_gptimer_period(int timer_id, uint32_t period);
+uint32_t get_gptimer_period(int timer_id);
+uint32_t get_gptimer_count(int timer_id);
+int      get_gptimer_intr(int timer_id);
+void     clear_gptimer_intr(int timer_id);
+int      get_gptimer_over(int timer_id);
+void     clear_gptimer_over(int timer_id);
+void     set_gptimer_config(int timer_id, uint16_t config);
+uint16_t get_gptimer_config(int timer_id);
+int      get_gptimer_run(int timer_id);
+void     set_gptimer_pulse_hi(int timer_id);
 void     clear_gptimer_pulse_hi(int timer_id);
-void     enable_gptimers       (uint16_t mask);
-void     disable_gptimers      (uint16_t mask);
-uint16_t get_enabled_gptimers  (void);
-uint32_t get_gptimer_status    (int group);
-void     set_gptimer_status    (int group, uint32_t value);
+void     enable_gptimers(uint16_t mask);
+void     disable_gptimers(uint16_t mask);
+void     disable_gptimers_sync(uint16_t mask);
+uint16_t get_enabled_gptimers(void);
+uint32_t get_gptimer_status(int group);
+void     set_gptimer_status(int group, uint32_t value);
 
 #endif
diff --git a/arch/blackfin/kernel/gptimers.c b/arch/blackfin/kernel/gptimers.c
index 3a3e961..7281a91 100644
--- a/arch/blackfin/kernel/gptimers.c
+++ b/arch/blackfin/kernel/gptimers.c
@@ -189,10 +189,10 @@ void set_gptimer_status(int group, uint32_t value)
 }
 EXPORT_SYMBOL(set_gptimer_status);
 
-uint16_t get_gptimer_intr(int timer_id)
+int get_gptimer_intr(int timer_id)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
-	return (group_regs[BFIN_TIMER_OCTET(timer_id)]->status & timil_mask[timer_id]) ? 1 : 0;
+	return !!(group_regs[BFIN_TIMER_OCTET(timer_id)]->status & timil_mask[timer_id]);
 }
 EXPORT_SYMBOL(get_gptimer_intr);
 
@@ -203,10 +203,10 @@ void clear_gptimer_intr(int timer_id)
 }
 EXPORT_SYMBOL(clear_gptimer_intr);
 
-uint16_t get_gptimer_over(int timer_id)
+int get_gptimer_over(int timer_id)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
-	return (group_regs[BFIN_TIMER_OCTET(timer_id)]->status & tovf_mask[timer_id]) ? 1 : 0;
+	return !!(group_regs[BFIN_TIMER_OCTET(timer_id)]->status & tovf_mask[timer_id]);
 }
 EXPORT_SYMBOL(get_gptimer_over);
 
@@ -217,6 +217,13 @@ void clear_gptimer_over(int timer_id)
 }
 EXPORT_SYMBOL(clear_gptimer_over);
 
+int get_gptimer_run(int timer_id)
+{
+	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
+	return !!(group_regs[BFIN_TIMER_OCTET(timer_id)]->status & trun_mask[timer_id]);
+}
+EXPORT_SYMBOL(get_gptimer_run);
+
 void set_gptimer_config(int timer_id, uint16_t config)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
@@ -244,7 +251,7 @@ void enable_gptimers(uint16_t mask)
 }
 EXPORT_SYMBOL(enable_gptimers);
 
-void disable_gptimers(uint16_t mask)
+static void _disable_gptimers(uint16_t mask)
 {
 	int i;
 	uint16_t m = mask;
@@ -253,6 +260,12 @@ void disable_gptimers(uint16_t mask)
 		group_regs[i]->disable = m & 0xFF;
 		m >>= 8;
 	}
+}
+
+void disable_gptimers(uint16_t mask)
+{
+	int i;
+	_disable_gptimers(mask);
 	for (i = 0; i < MAX_BLACKFIN_GPTIMERS; ++i)
 		if (mask & (1 << i))
 			group_regs[BFIN_TIMER_OCTET(i)]->status |= trun_mask[i];
@@ -260,6 +273,13 @@ void disable_gptimers(uint16_t mask)
 }
 EXPORT_SYMBOL(disable_gptimers);
 
+void disable_gptimers_sync(uint16_t mask)
+{
+	_disable_gptimers(mask);
+	SSYNC();
+}
+EXPORT_SYMBOL(disable_gptimers_sync);
+
 void set_gptimer_pulse_hi(int timer_id)
 {
 	tassert(timer_id < MAX_BLACKFIN_GPTIMERS);
-- 
cgit v0.10.2


From d1800fe0e50ed22673cd895f2b0a7c2b79d3010a Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Mon, 25 May 2009 04:27:22 +0000
Subject: Blackfin: drop unused reserve_pda() function

The Per-processor Data Area isn't actually reserved by this function, and
all it ended up doing was issuing a printk(), so punt it.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/pda.h b/arch/blackfin/include/asm/pda.h
index a671427..b42555c 100644
--- a/arch/blackfin/include/asm/pda.h
+++ b/arch/blackfin/include/asm/pda.h
@@ -64,8 +64,6 @@ struct blackfin_pda {			/* Per-processor Data Area */
 
 extern struct blackfin_pda cpu_pda[];
 
-void reserve_pda(void);
-
 #endif	/* __ASSEMBLY__ */
 
 #endif /* _ASM_BLACKFIN_PDA_H */
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index b669595..3b8ebae 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -369,9 +369,6 @@ static void __cpuinit setup_secondary(unsigned int cpu)
 	bfin_write_ILAT(ilat);
 	CSYNC();
 
-	/* Reserve the PDA space for the secondary CPU. */
-	reserve_pda();
-
 	/* Enable interrupt levels IVG7-15. IARs have been already
 	 * programmed by the boot CPU.  */
 	bfin_irq_flags |= IMASK_IVG15 |
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 00543c8..9a62b30 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -134,12 +134,6 @@ asmlinkage void __init init_pda(void)
 #endif
 }
 
-void __cpuinit reserve_pda(void)
-{
-	printk(KERN_INFO "PDA for CPU%u reserved at %p\n", smp_processor_id(),
-					&cpu_pda[smp_processor_id()]);
-}
-
 void __init mem_init(void)
 {
 	unsigned int codek = 0, datak = 0, initk = 0;
@@ -186,10 +180,6 @@ static int __init sram_init(void)
 	/* Initialize the blackfin L1 Memory. */
 	bfin_sram_init();
 
-	/* Reserve the PDA space for the boot CPU right after we
-	 * initialized the scratch memory allocator.
-	 */
-	reserve_pda();
 	return 0;
 }
 pure_initcall(sram_init);
-- 
cgit v0.10.2


From c72aa0794a0ecc0b87ba9d5546215c26c8c80668 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Mon, 25 May 2009 04:44:00 +0000
Subject: Blackfin: merge sram init functions

Now that the sram_init() function exists only to call the bfin_sram_init()
after the punting of the reserve_pda() function, simply merge the two to
avoid pointless overhead.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mm/blackfin_sram.h b/arch/blackfin/mm/blackfin_sram.h
index 8cb0945..bc00628 100644
--- a/arch/blackfin/mm/blackfin_sram.h
+++ b/arch/blackfin/mm/blackfin_sram.h
@@ -30,7 +30,6 @@
 #ifndef __BLACKFIN_SRAM_H__
 #define __BLACKFIN_SRAM_H__
 
-extern void bfin_sram_init(void);
 extern void *l1sram_alloc(size_t);
 
 #endif
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 9a62b30..014a55a 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -175,15 +175,6 @@ void __init mem_init(void)
 		initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10)));
 }
 
-static int __init sram_init(void)
-{
-	/* Initialize the blackfin L1 Memory. */
-	bfin_sram_init();
-
-	return 0;
-}
-pure_initcall(sram_init);
-
 static void __init free_init_pages(const char *what, unsigned long begin, unsigned long end)
 {
 	unsigned long addr;
diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c
index 530d139..36376d8 100644
--- a/arch/blackfin/mm/sram-alloc.c
+++ b/arch/blackfin/mm/sram-alloc.c
@@ -223,7 +223,7 @@ static void __init l2_sram_init(void)
 	spin_lock_init(&l2_sram_lock);
 }
 
-void __init bfin_sram_init(void)
+static int __init bfin_sram_init(void)
 {
 	sram_piece_cache = kmem_cache_create("sram_piece_cache",
 				sizeof(struct sram_piece),
@@ -233,7 +233,10 @@ void __init bfin_sram_init(void)
 	l1_data_sram_init();
 	l1_inst_sram_init();
 	l2_sram_init();
+
+	return 0;
 }
+pure_initcall(bfin_sram_init);
 
 /* SRAM allocate function */
 static void *_sram_alloc(size_t size, struct sram_piece *pfree_head,
-- 
cgit v0.10.2


From 89ecd506917ba6e86ede072efbef6c69d01db4dd Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Mon, 25 May 2009 06:40:42 +0000
Subject: Blackfin: fix handling of initial L1 reservation

This restores some L1 reservation logic that was lost during the Blackfin
SMP merge.

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c
index 36376d8..fa54a4e 100644
--- a/arch/blackfin/mm/sram-alloc.c
+++ b/arch/blackfin/mm/sram-alloc.c
@@ -83,6 +83,14 @@ static struct kmem_cache *sram_piece_cache;
 static void __init l1sram_init(void)
 {
 	unsigned int cpu;
+	unsigned long reserve;
+
+#ifdef CONFIG_SMP
+	reserve = 0;
+#else
+	reserve = sizeof(struct l1_scratch_task_info);
+#endif
+
 	for (cpu = 0; cpu < num_possible_cpus(); ++cpu) {
 		per_cpu(free_l1_ssram_head, cpu).next =
 			kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
@@ -91,8 +99,8 @@ static void __init l1sram_init(void)
 			return;
 		}
 
-		per_cpu(free_l1_ssram_head, cpu).next->paddr = (void *)get_l1_scratch_start_cpu(cpu);
-		per_cpu(free_l1_ssram_head, cpu).next->size = L1_SCRATCH_LENGTH;
+		per_cpu(free_l1_ssram_head, cpu).next->paddr = (void *)get_l1_scratch_start_cpu(cpu) + reserve;
+		per_cpu(free_l1_ssram_head, cpu).next->size = L1_SCRATCH_LENGTH - reserve;
 		per_cpu(free_l1_ssram_head, cpu).next->pid = 0;
 		per_cpu(free_l1_ssram_head, cpu).next->next = NULL;
 
-- 
cgit v0.10.2


From 37082511f06108129bd5f96d625a6fae2d5a4ab4 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 21:48:38 +0000
Subject: Blackfin: fix command line corruption with DEBUG_DOUBLEFAULT

Commit 6b3087c6 (which introduced Blackfin SMP) broke command line passing
when the DEBUG_DOUBLEFAULT config option was enabled.  Switch the code to
using a scratch register and not R7 which holds the command line.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S
index 01d62ff..f826f6b 100644
--- a/arch/blackfin/mach-common/head.S
+++ b/arch/blackfin/mach-common/head.S
@@ -124,25 +124,25 @@ ENTRY(__start)
 	 * below
 	 */
 	GET_PDA(p0, r0);
-	r7 = [p0 + PDA_RETX];
+	r6 = [p0 + PDA_RETX];
 	p1.l = _init_saved_retx;
 	p1.h = _init_saved_retx;
-	[p1] = r7;
+	[p1] = r6;
 
-	r7 = [p0 + PDA_DCPLB];
+	r6 = [p0 + PDA_DCPLB];
 	p1.l = _init_saved_dcplb_fault_addr;
 	p1.h = _init_saved_dcplb_fault_addr;
-	[p1] = r7;
+	[p1] = r6;
 
-	r7 = [p0 + PDA_ICPLB];
+	r6 = [p0 + PDA_ICPLB];
 	p1.l = _init_saved_icplb_fault_addr;
 	p1.h = _init_saved_icplb_fault_addr;
-	[p1] = r7;
+	[p1] = r6;
 
-	r7 = [p0 + PDA_SEQSTAT];
+	r6 = [p0 + PDA_SEQSTAT];
 	p1.l = _init_saved_seqstat;
 	p1.h = _init_saved_seqstat;
-	[p1] = r7;
+	[p1] = r6;
 #endif
 
 	/* Initialize stack pointer */
-- 
cgit v0.10.2


From 79df1b69471b5ea4be8285c9dc6ab05cdf5a85d7 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 23:34:51 +0000
Subject: Blackfin: override default uClinux MTD addr/size

Due to a processor anomaly (05000263 to be exact), most Blackfin parts
cannot keep the embedded filesystem image directly after the kernel in
RAM.  Instead, the filesystem needs to be relocated to the end of memory.
As such, we need to tweak the map addr/size during boot for Blackfin
systems.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index dc30e0a..80447f9 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -18,9 +18,12 @@
 #include <linux/tty.h>
 #include <linux/pfn.h>
 
+#ifdef CONFIG_MTD_UCLINUX
+#include <linux/mtd/map.h>
 #include <linux/ext2_fs.h>
 #include <linux/cramfs_fs.h>
 #include <linux/romfs_fs.h>
+#endif
 
 #include <asm/cplb.h>
 #include <asm/cacheflush.h>
@@ -45,6 +48,7 @@ EXPORT_SYMBOL(_ramend);
 EXPORT_SYMBOL(reserved_mem_dcache_on);
 
 #ifdef CONFIG_MTD_UCLINUX
+extern struct map_info uclinux_ram_map;
 unsigned long memory_mtd_end, memory_mtd_start, mtd_size;
 unsigned long _ebss;
 EXPORT_SYMBOL(memory_mtd_end);
@@ -535,10 +539,9 @@ static __init void memory_setup(void)
 	}
 
 	/* Relocate MTD image to the top of memory after the uncached memory area */
-	dma_memcpy((char *)memory_end, _end, mtd_size);
-
-	memory_mtd_start = memory_end;
-	_ebss = memory_mtd_start;	/* define _ebss for compatible */
+	uclinux_ram_map.phys = memory_mtd_start = memory_end;
+	uclinux_ram_map.size = mtd_size;
+	dma_memcpy((void *)uclinux_ram_map.phys, _end, uclinux_ram_map.size);
 #endif				/* CONFIG_MTD_UCLINUX */
 
 #if (defined(CONFIG_BFIN_ICACHE) && ANOMALY_05000263)
-- 
cgit v0.10.2


From ffc4d8bc4435918d2fd1e840b0bf985e7b56f0d3 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 29 May 2009 15:41:18 +0000
Subject: Blackfin: bf548-ezkit/bf537-stamp: add resources for ADXL345/346

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index f56289a..dfb5036 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -607,6 +607,43 @@ static const struct ad7879_platform_data bfin_ad7879_ts_info = {
 };
 #endif
 
+#if defined(CONFIG_INPUT_ADXL34X) || defined(CONFIG_INPUT_ADXL34X_MODULE)
+#include <linux/input.h>
+#include <linux/spi/adxl34x.h>
+static const struct adxl34x_platform_data adxl34x_info = {
+	.x_axis_offset = 0,
+	.y_axis_offset = 0,
+	.z_axis_offset = 0,
+	.tap_threshold = 0x31,
+	.tap_duration = 0x10,
+	.tap_latency = 0x60,
+	.tap_window = 0xF0,
+	.tap_axis_control = ADXL_TAP_X_EN | ADXL_TAP_Y_EN | ADXL_TAP_Z_EN,
+	.act_axis_control = 0xFF,
+	.activity_threshold = 5,
+	.inactivity_threshold = 3,
+	.inactivity_time = 4,
+	.free_fall_threshold = 0x7,
+	.free_fall_time = 0x20,
+	.data_rate = 0x8,
+	.data_range = ADXL_FULL_RES,
+
+	.ev_type = EV_ABS,
+	.ev_code_x = ABS_X,		/* EV_REL */
+	.ev_code_y = ABS_Y,		/* EV_REL */
+	.ev_code_z = ABS_Z,		/* EV_REL */
+
+	.ev_code_tap_x = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_y = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_z = BTN_TOUCH,		/* EV_KEY */
+
+/*	.ev_code_ff = KEY_F,*/		/* EV_KEY */
+/*	.ev_code_act_inactivity = KEY_A,*/	/* EV_KEY */
+	.power_mode = ADXL_AUTO_SLEEP | ADXL_LINK,
+	.fifo_mode = ADXL_FIFO_STREAM,
+};
+#endif
+
 #if defined(CONFIG_TOUCHSCREEN_AD7879_SPI) || defined(CONFIG_TOUCHSCREEN_AD7879_SPI_MODULE)
 static struct bfin5xx_spi_chip spi_ad7879_chip_info = {
 	.enable_dma = 0,
@@ -1312,6 +1349,13 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 		.platform_data = (void *)&adp5520_pdev_data,
 	},
 #endif
+#if defined(CONFIG_INPUT_ADXL34X_I2C) || defined(CONFIG_INPUT_ADXL34X_I2C_MODULE)
+	{
+		I2C_BOARD_INFO("adxl34x", 0x53),
+		.irq = IRQ_PG3,
+		.platform_data = (void *)&adxl34x_info,
+	},
+#endif
 };
 
 #if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE)
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index 2780e41..add5a17 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -208,6 +208,43 @@ static struct platform_device bfin_rotary_device = {
 };
 #endif
 
+#if defined(CONFIG_INPUT_ADXL34X) || defined(CONFIG_INPUT_ADXL34X_MODULE)
+#include <linux/input.h>
+#include <linux/spi/adxl34x.h>
+static const struct adxl34x_platform_data adxl34x_info = {
+	.x_axis_offset = 0,
+	.y_axis_offset = 0,
+	.z_axis_offset = 0,
+	.tap_threshold = 0x31,
+	.tap_duration = 0x10,
+	.tap_latency = 0x60,
+	.tap_window = 0xF0,
+	.tap_axis_control = ADXL_TAP_X_EN | ADXL_TAP_Y_EN | ADXL_TAP_Z_EN,
+	.act_axis_control = 0xFF,
+	.activity_threshold = 5,
+	.inactivity_threshold = 3,
+	.inactivity_time = 4,
+	.free_fall_threshold = 0x7,
+	.free_fall_time = 0x20,
+	.data_rate = 0x8,
+	.data_range = ADXL_FULL_RES,
+
+	.ev_type = EV_ABS,
+	.ev_code_x = ABS_X,		/* EV_REL */
+	.ev_code_y = ABS_Y,		/* EV_REL */
+	.ev_code_z = ABS_Z,		/* EV_REL */
+
+	.ev_code_tap_x = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_y = BTN_TOUCH,		/* EV_KEY */
+	.ev_code_tap_z = BTN_TOUCH,		/* EV_KEY */
+
+/*	.ev_code_ff = KEY_F,*/		/* EV_KEY */
+/*	.ev_code_act_inactivity = KEY_A,*/	/* EV_KEY */
+	.power_mode = ADXL_AUTO_SLEEP | ADXL_LINK,
+	.fifo_mode = ADXL_FIFO_STREAM,
+};
+#endif
+
 #if defined(CONFIG_RTC_DRV_BFIN) || defined(CONFIG_RTC_DRV_BFIN_MODULE)
 static struct platform_device rtc_device = {
 	.name = "rtc-bfin",
@@ -628,6 +665,14 @@ static struct bfin5xx_spi_chip spidev_chip_info = {
 };
 #endif
 
+#if defined(CONFIG_INPUT_ADXL34X_SPI) || defined(CONFIG_INPUT_ADXL34X_SPI_MODULE)
+static struct bfin5xx_spi_chip spi_adxl34x_chip_info = {
+	.enable_dma = 0,         /* use dma transfer with this chip*/
+	.bits_per_word = 8,
+	.cs_change_per_word = 0,
+};
+#endif
+
 static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #if defined(CONFIG_MTD_M25P80) \
 	|| defined(CONFIG_MTD_M25P80_MODULE)
@@ -653,15 +698,15 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 #if defined(CONFIG_TOUCHSCREEN_AD7877) || defined(CONFIG_TOUCHSCREEN_AD7877_MODULE)
-{
-	.modalias		= "ad7877",
-	.platform_data		= &bfin_ad7877_ts_info,
-	.irq			= IRQ_PB4,	/* old boards (<=Rev 1.3) use IRQ_PJ11 */
-	.max_speed_hz		= 12500000,     /* max spi clock (SCK) speed in HZ */
-	.bus_num		= 0,
-	.chip_select  		= 2,
-	.controller_data = &spi_ad7877_chip_info,
-},
+	{
+		.modalias		= "ad7877",
+		.platform_data		= &bfin_ad7877_ts_info,
+		.irq			= IRQ_PB4,	/* old boards (<=Rev 1.3) use IRQ_PJ11 */
+		.max_speed_hz		= 12500000,     /* max spi clock (SCK) speed in HZ */
+		.bus_num		= 0,
+		.chip_select  		= 2,
+		.controller_data = &spi_ad7877_chip_info,
+	},
 #endif
 #if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE)
 	{
@@ -672,8 +717,19 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 		.controller_data = &spidev_chip_info,
 	},
 #endif
+#if defined(CONFIG_INPUT_ADXL34X_SPI) || defined(CONFIG_INPUT_ADXL34X_SPI_MODULE)
+	{
+		.modalias		= "adxl34x",
+		.platform_data		= &adxl34x_info,
+		.irq			= IRQ_PC5,
+		.max_speed_hz		= 5000000,     /* max spi clock (SCK) speed in HZ */
+		.bus_num		= 1,
+		.chip_select  		= 2,
+		.controller_data = &spi_adxl34x_chip_info,
+		.mode = SPI_MODE_3,
+	},
+#endif
 };
-
 #if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
@@ -797,6 +853,13 @@ static struct i2c_board_info __initdata bfin_i2c_board_info1[] = {
 		.irq = 212,
 	},
 #endif
+#if defined(CONFIG_INPUT_ADXL34X_I2C) || defined(CONFIG_INPUT_ADXL34X_I2C_MODULE)
+	{
+		I2C_BOARD_INFO("adxl34x", 0x53),
+		.irq = IRQ_PC5,
+		.platform_data = (void *)&adxl34x_info,
+	},
+#endif
 };
 #endif
 
-- 
cgit v0.10.2


From 9b9bfded623cffb4259b95e5419404015dba361f Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Wed, 27 May 2009 09:58:35 +0000
Subject: Blackfin: convert SMP to only use generic time framework

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index f1a7969..c56fd3e 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -223,6 +223,7 @@ endchoice
 
 config SMP
 	depends on BF561
+	select GENERIC_TIME
 	bool "Symmetric multi-processing support"
 	---help---
 	  This enables support for systems with more than one CPU,
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index daa02d4..adb54aa 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -24,14 +24,10 @@
 
 static struct irqaction bfin_timer_irq = {
 	.name = "Blackfin Timer Tick",
-#ifdef CONFIG_IRQ_PER_CPU
-	.flags = IRQF_DISABLED | IRQF_PERCPU,
-#else
 	.flags = IRQF_DISABLED
-#endif
 };
 
-#if defined(CONFIG_TICKSOURCE_GPTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_IPIPE)
 void __init setup_system_timer0(void)
 {
 	/* Power down the core timer, just to play safe. */
@@ -74,7 +70,7 @@ void __init setup_core_timer(void)
 static void __init
 time_sched_init(irqreturn_t(*timer_routine) (int, void *))
 {
-#if defined(CONFIG_TICKSOURCE_GPTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_IPIPE)
 	setup_system_timer0();
 	bfin_timer_irq.handler = timer_routine;
 	setup_irq(IRQ_TIMER0, &bfin_timer_irq);
@@ -94,7 +90,7 @@ static unsigned long gettimeoffset(void)
 	unsigned long offset;
 	unsigned long clocks_per_jiffy;
 
-#if defined(CONFIG_TICKSOURCE_GPTMR0) || defined(CONFIG_IPIPE)
+#if defined(CONFIG_IPIPE)
 	clocks_per_jiffy = bfin_read_TIMER0_PERIOD();
 	offset = bfin_read_TIMER0_COUNTER() / \
 		(((clocks_per_jiffy + 1) * HZ) / USEC_PER_SEC);
@@ -133,36 +129,25 @@ irqreturn_t timer_interrupt(int irq, void *dummy)
 	static long last_rtc_update;
 
 	write_seqlock(&xtime_lock);
-#if defined(CONFIG_TICKSOURCE_GPTMR0) && !defined(CONFIG_IPIPE)
+	do_timer(1);
+
 	/*
-	 * TIMIL0 is latched in __ipipe_grab_irq() when the I-Pipe is
-	 * enabled.
+	 * If we have an externally synchronized Linux clock, then update
+	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+	 * called as close as possible to 500 ms before the new second starts.
 	 */
-	if (get_gptimer_status(0) & TIMER_STATUS_TIMIL0) {
-#endif
-		do_timer(1);
-
-		/*
-		 * If we have an externally synchronized Linux clock, then update
-		 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-		 * called as close as possible to 500 ms before the new second starts.
-		 */
-		if (ntp_synced() &&
-		    xtime.tv_sec > last_rtc_update + 660 &&
-		    (xtime.tv_nsec / NSEC_PER_USEC) >=
-		    500000 - ((unsigned)TICK_SIZE) / 2
-		    && (xtime.tv_nsec / NSEC_PER_USEC) <=
-		    500000 + ((unsigned)TICK_SIZE) / 2) {
-			if (set_rtc_mmss(xtime.tv_sec) == 0)
-				last_rtc_update = xtime.tv_sec;
-			else
-				/* Do it again in 60s. */
-				last_rtc_update = xtime.tv_sec - 600;
-		}
-#if defined(CONFIG_TICKSOURCE_GPTMR0) && !defined(CONFIG_IPIPE)
-		set_gptimer_status(0, TIMER_STATUS_TIMIL0);
+	if (ntp_synced() &&
+	    xtime.tv_sec > last_rtc_update + 660 &&
+	    (xtime.tv_nsec / NSEC_PER_USEC) >=
+	    500000 - ((unsigned)TICK_SIZE) / 2
+	    && (xtime.tv_nsec / NSEC_PER_USEC) <=
+	    500000 + ((unsigned)TICK_SIZE) / 2) {
+		if (set_rtc_mmss(xtime.tv_sec) == 0)
+			last_rtc_update = xtime.tv_sec;
+		else
+			/* Do it again in 60s. */
+			last_rtc_update = xtime.tv_sec - 600;
 	}
-#endif
 	write_sequnlock(&xtime_lock);
 
 #ifdef CONFIG_IPIPE
-- 
cgit v0.10.2


From ea8538a039607cd959d28ed10086484e051ce7db Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Mon, 1 Jun 2009 00:49:32 -0400
Subject: Blackfin: add SSYNC to set_dma_sg() for descriptor fetching

Make sure the internal core buffers are flushed before telling the DMA
engine to fetch the descriptor structure so that it gets the right values.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index 46c5618..c9a5962 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h
@@ -206,10 +206,16 @@ static inline unsigned long get_dma_curr_addr(unsigned int channel)
 
 static inline void set_dma_sg(unsigned int channel, struct dmasg *sg, int ndsize)
 {
+	/* Make sure the internal data buffers in the core are drained
+	 * so that the DMA descriptors are completely written when the
+	 * DMA engine goes to fetch them below.
+	 */
+	SSYNC();
+
+	dma_ch[channel].regs->next_desc_ptr = sg;
 	dma_ch[channel].regs->cfg =
 		(dma_ch[channel].regs->cfg & ~(0xf << 8)) |
 		((ndsize & 0xf) << 8);
-	dma_ch[channel].regs->next_desc_ptr = sg;
 }
 
 static inline int dma_channel_active(unsigned int channel)
-- 
cgit v0.10.2


From 780172bf875298133fe5627552a632d1fda3775a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 1 Jun 2009 19:43:02 -0400
Subject: Blackfin: check SIC defines rather than variant names

Rather than having to maintain a hard coded list of Blackfin variants, use
the SIC defines themselves.  This fixes build problems on BF51x/BF538 under
some configurations as they were missing from one of the lists.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 6e80861..351afd0 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -1152,23 +1152,22 @@ void do_irq(int vec, struct pt_regs *fp)
 	} else {
 		struct ivgx *ivg = ivg7_13[vec - IVG7].ifirst;
 		struct ivgx *ivg_stop = ivg7_13[vec - IVG7].istop;
-#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) \
-	|| defined(BF538_FAMILY) || defined(CONFIG_BF51x)
+#if defined(SIC_ISR0) || defined(SICA_ISR0)
 		unsigned long sic_status[3];
 
 		if (smp_processor_id()) {
-#ifdef CONFIG_SMP
+# ifdef SICB_ISR0
 			/* This will be optimized out in UP mode. */
 			sic_status[0] = bfin_read_SICB_ISR0() & bfin_read_SICB_IMASK0();
 			sic_status[1] = bfin_read_SICB_ISR1() & bfin_read_SICB_IMASK1();
-#endif
+# endif
 		} else {
 			sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0();
 			sic_status[1] = bfin_read_SIC_ISR1() & bfin_read_SIC_IMASK1();
 		}
-#ifdef CONFIG_BF54x
+# ifdef SIC_ISR2
 		sic_status[2] = bfin_read_SIC_ISR2() & bfin_read_SIC_IMASK2();
-#endif
+# endif
 		for (;; ivg++) {
 			if (ivg >= ivg_stop) {
 				atomic_inc(&num_spurious);
@@ -1234,14 +1233,14 @@ asmlinkage int __ipipe_grab_irq(int vec, struct pt_regs *regs)
 		irq = IRQ_CORETMR;
 
 	} else {
-#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561)
+#if defined(SIC_ISR0) || defined(SICA_ISR0)
 		unsigned long sic_status[3];
 
 		sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0();
 		sic_status[1] = bfin_read_SIC_ISR1() & bfin_read_SIC_IMASK1();
-#ifdef CONFIG_BF54x
+# ifdef SIC_ISR2
 		sic_status[2] = bfin_read_SIC_ISR2() & bfin_read_SIC_IMASK2();
-#endif
+# endif
 		for (;; ivg++) {
 			if (ivg >= ivg_stop) {
 				atomic_inc(&num_spurious);
-- 
cgit v0.10.2


From 595d681f2c0610223bbe79189b0896f1b8c24f8a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 2 Jun 2009 00:35:33 +0000
Subject: Blackfin: rename Blackfin relocs according to the toolchain

The latest Blackfin toolchain has fixed its relocation scheme to match
other ports: always use R_BFIN_ prefix and capitalize everything.  This
brings the kernel in line with those fixes.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/elf.h b/arch/blackfin/include/asm/elf.h
index cdbfcfc..230e160 100644
--- a/arch/blackfin/include/asm/elf.h
+++ b/arch/blackfin/include/asm/elf.h
@@ -55,50 +55,50 @@ do {											\
 #define ELF_FDPIC_CORE_EFLAGS	EF_BFIN_FDPIC
 #define ELF_EXEC_PAGESIZE	4096
 
-#define	R_unused0	0	/* relocation type 0 is not defined */
-#define R_pcrel5m2	1	/*LSETUP part a */
-#define R_unused1	2	/* relocation type 2 is not defined */
-#define R_pcrel10	3	/* type 3, if cc jump <target>  */
-#define R_pcrel12_jump	4	/* type 4, jump <target> */
-#define R_rimm16	5	/* type 0x5, rN = <target> */
-#define R_luimm16	6	/* # 0x6, preg.l=<target> Load imm 16 to lower half */
-#define R_huimm16  	7	/* # 0x7, preg.h=<target> Load imm 16 to upper half */
-#define R_pcrel12_jump_s 8	/* # 0x8 jump.s <target> */
-#define R_pcrel24_jump_x 9	/* # 0x9 jump.x <target> */
-#define R_pcrel24       10	/* # 0xa call <target> , not expandable */
-#define R_unusedb       11	/* # 0xb not generated */
-#define R_unusedc       12	/* # 0xc  not used */
-#define R_pcrel24_jump_l 13	/*0xd jump.l <target> */
-#define R_pcrel24_call_x 14	/* 0xE, call.x <target> if <target> is above 24 bit limit call through P1 */
-#define R_var_eq_symb    15	/* 0xf, linker should treat it same as 0x12 */
-#define R_byte_data      16	/* 0x10, .byte var = symbol */
-#define R_byte2_data     17	/* 0x11, .byte2 var = symbol */
-#define R_byte4_data     18	/* 0x12, .byte4 var = symbol and .var var=symbol */
-#define R_pcrel11        19	/* 0x13, lsetup part b */
-#define R_unused14      20	/* 0x14, undefined */
-#define R_unused15       21	/* not generated by VDSP 3.5 */
+#define R_BFIN_UNUSED0         0   /* relocation type 0 is not defined */
+#define R_BFIN_PCREL5M2        1   /* LSETUP part a */
+#define R_BFIN_UNUSED1         2   /* relocation type 2 is not defined */
+#define R_BFIN_PCREL10         3   /* type 3, if cc jump <target> */
+#define R_BFIN_PCREL12_JUMP    4   /* type 4, jump <target> */
+#define R_BFIN_RIMM16          5   /* type 0x5, rN = <target> */
+#define R_BFIN_LUIMM16         6   /* # 0x6, preg.l=<target> Load imm 16 to lower half */
+#define R_BFIN_HUIMM16         7   /* # 0x7, preg.h=<target> Load imm 16 to upper half */
+#define R_BFIN_PCREL12_JUMP_S  8   /* # 0x8 jump.s <target> */
+#define R_BFIN_PCREL24_JUMP_X  9   /* # 0x9 jump.x <target> */
+#define R_BFIN_PCREL24         10  /* # 0xa call <target> , not expandable */
+#define R_BFIN_UNUSEDB         11  /* # 0xb not generated */
+#define R_BFIN_UNUSEDC         12  /* # 0xc  not used */
+#define R_BFIN_PCREL24_JUMP_L  13  /* 0xd jump.l <target> */
+#define R_BFIN_PCREL24_CALL_X  14  /* 0xE, call.x <target> if <target> is above 24 bit limit call through P1 */
+#define R_BFIN_VAR_EQ_SYMB     15  /* 0xf, linker should treat it same as 0x12 */
+#define R_BFIN_BYTE_DATA       16  /* 0x10, .byte var = symbol */
+#define R_BFIN_BYTE2_DATA      17  /* 0x11, .byte2 var = symbol */
+#define R_BFIN_BYTE4_DATA      18  /* 0x12, .byte4 var = symbol and .var var=symbol */
+#define R_BFIN_PCREL11         19  /* 0x13, lsetup part b */
+#define R_BFIN_UNUSED14        20  /* 0x14, undefined */
+#define R_BFIN_UNUSED15        21  /* not generated by VDSP 3.5 */
 
 /* arithmetic relocations */
-#define R_push		 0xE0
-#define R_const		 0xE1
-#define R_add		 0xE2
-#define R_sub		 0xE3
-#define R_mult		 0xE4
-#define R_div		 0xE5
-#define R_mod		 0xE6
-#define R_lshift	 0xE7
-#define R_rshift	 0xE8
-#define R_and		 0xE9
-#define R_or		 0xEA
-#define R_xor		 0xEB
-#define R_land		 0xEC
-#define R_lor		 0xED
-#define R_len		 0xEE
-#define R_neg		 0xEF
-#define R_comp		 0xF0
-#define R_page		 0xF1
-#define R_hwpage	 0xF2
-#define R_addr		 0xF3
+#define R_BFIN_PUSH            0xE0
+#define R_BFIN_CONST           0xE1
+#define R_BFIN_ADD             0xE2
+#define R_BFIN_SUB             0xE3
+#define R_BFIN_MULT            0xE4
+#define R_BFIN_DIV             0xE5
+#define R_BFIN_MOD             0xE6
+#define R_BFIN_LSHIFT          0xE7
+#define R_BFIN_RSHIFT          0xE8
+#define R_BFIN_AND             0xE9
+#define R_BFIN_OR              0xEA
+#define R_BFIN_XOR             0xEB
+#define R_BFIN_LAND            0xEC
+#define R_BFIN_LOR             0xED
+#define R_BFIN_LEN             0xEE
+#define R_BFIN_NEG             0xEF
+#define R_BFIN_COMP            0xF0
+#define R_BFIN_PAGE            0xF1
+#define R_BFIN_HWPAGE          0xF2
+#define R_BFIN_ADDR            0xF3
 
 /* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
    use of this is to invoke "./ld.so someprog" to test out a new version of
diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c
index 1bd7f2d..d5aee36 100644
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -201,8 +201,8 @@ apply_relocate(Elf_Shdr * sechdrs, const char *strtab,
 /*            Arithmetic relocations are handled.                        */
 /*            We do not expect LSETUP to be split and hence is not       */
 /*            handled.                                                   */
-/*            R_byte and R_byte2 are also not handled as the gas         */
-/*            does not generate it.                                      */
+/*            R_BFIN_BYTE and R_BFIN_BYTE2 are also not handled as the   */
+/*            gas does not generate it.                                  */
 /*************************************************************************/
 int
 apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
@@ -243,8 +243,8 @@ apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
 #endif
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 
-		case R_pcrel24:
-		case R_pcrel24_jump_l:
+		case R_BFIN_PCREL24:
+		case R_BFIN_PCREL24_JUMP_L:
 			/* Add the value, subtract its postition */
 			location16 =
 			    (uint16_t *) (sechdrs[sechdrs[relsec].sh_info].
@@ -266,18 +266,18 @@ apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
 			    (*location16 & 0xff00) | (value >> 16 & 0x00ff);
 			*(location16 + 1) = value & 0xffff;
 			break;
-		case R_pcrel12_jump:
-		case R_pcrel12_jump_s:
+		case R_BFIN_PCREL12_JUMP:
+		case R_BFIN_PCREL12_JUMP_S:
 			value -= (uint32_t) location32;
 			value >>= 1;
 			*location16 = (value & 0xfff);
 			break;
-		case R_pcrel10:
+		case R_BFIN_PCREL10:
 			value -= (uint32_t) location32;
 			value >>= 1;
 			*location16 = (value & 0x3ff);
 			break;
-		case R_luimm16:
+		case R_BFIN_LUIMM16:
 			pr_debug("before %x after %x\n", *location16,
 				       (value & 0xffff));
 			tmp = (value & 0xffff);
@@ -286,7 +286,7 @@ apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
 			} else
 				*location16 = tmp;
 			break;
-		case R_huimm16:
+		case R_BFIN_HUIMM16:
 			pr_debug("before %x after %x\n", *location16,
 				       ((value >> 16) & 0xffff));
 			tmp = ((value >> 16) & 0xffff);
@@ -295,10 +295,10 @@ apply_relocate_add(Elf_Shdr * sechdrs, const char *strtab,
 			} else
 				*location16 = tmp;
 			break;
-		case R_rimm16:
+		case R_BFIN_RIMM16:
 			*location16 = (value & 0xffff);
 			break;
-		case R_byte4_data:
+		case R_BFIN_BYTE4_DATA:
 			pr_debug("before %x after %x\n", *location32, value);
 			*location32 = value;
 			break;
-- 
cgit v0.10.2


From f1db88d2a7f1c92284e64f5dbb5c7a316a22576d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 2 Jun 2009 08:46:35 +0000
Subject: Blackfin: document the lsl variants of the L1 allocator

Make sure the meaning of "lsl" is covered somewhere and it is clear why we
somewhat duplicate the sram alloc/free functions.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c
index fa54a4e..0bc3c4e 100644
--- a/arch/blackfin/mm/sram-alloc.c
+++ b/arch/blackfin/mm/sram-alloc.c
@@ -743,6 +743,10 @@ found:
 }
 EXPORT_SYMBOL(sram_free_with_lsl);
 
+/* Allocate memory and keep in L1 SRAM List (lsl) so that the resources are
+ * tracked.  These are designed for userspace so that when a process exits,
+ * we can safely reap their resources.
+ */
 void *sram_alloc_with_lsl(size_t size, unsigned long flags)
 {
 	void *addr = NULL;
-- 
cgit v0.10.2


From 685a694f0653b7db49f663b2cd6953695214fb30 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 3 Jun 2009 00:33:31 +0000
Subject: Blackfin: convert early_printk EVT init to a loop

The EVT registers are all contiguous in the memory map, so using a loop to
initialize them all rather than hardcoding the list results in much better
generated code (a hardware loop rather than a whole bunch of individual
loads).

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c
index c8ad051..3302719 100644
--- a/arch/blackfin/kernel/early_printk.c
+++ b/arch/blackfin/kernel/early_printk.c
@@ -178,25 +178,15 @@ int __init setup_early_printk(char *buf)
 
 asmlinkage void __init init_early_exception_vectors(void)
 {
+	u32 evt;
 	SSYNC();
 
 	/* cannot program in software:
 	 * evt0 - emulation (jtag)
 	 * evt1 - reset
 	 */
-	bfin_write_EVT2(early_trap);
-	bfin_write_EVT3(early_trap);
-	bfin_write_EVT5(early_trap);
-	bfin_write_EVT6(early_trap);
-	bfin_write_EVT7(early_trap);
-	bfin_write_EVT8(early_trap);
-	bfin_write_EVT9(early_trap);
-	bfin_write_EVT10(early_trap);
-	bfin_write_EVT11(early_trap);
-	bfin_write_EVT12(early_trap);
-	bfin_write_EVT13(early_trap);
-	bfin_write_EVT14(early_trap);
-	bfin_write_EVT15(early_trap);
+	for (evt = EVT2; evt <= EVT15; evt += 4)
+		bfin_write32(evt, early_trap);
 	CSYNC();
 
 	/* Set all the return from interrupt, exception, NMI to a known place
-- 
cgit v0.10.2


From a8372b5ca618d4ea41b3c7ac3cef8dd6efa68bc6 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Thu, 4 Jun 2009 00:32:59 +0000
Subject: Blackfin: add missing access_ok() checks to user functions

The core string/clear user functions weren't checking the user pointers
which caused kernel crashes with some bad programs and tests (like LTP).

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 3248033..97729be 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h
@@ -233,16 +233,29 @@ strncpy_from_user(char *dst, const char *src, long count)
 }
 
 /*
- * Return the size of a string (including the ending 0)
+ * Get the size of a string in user space.
+ *   src: The string to measure
+ *     n: The maximum valid length
  *
- * Return 0 on exception, a value greater than N if too long
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than n.
  */
-static inline long strnlen_user(const char *src, long n)
+static inline long __must_check strnlen_user(const char *src, long n)
 {
-	return (strlen(src) + 1);
+	if (!access_ok(VERIFY_READ, src, 1))
+		return 0;
+	return strnlen(src, n) + 1;
 }
 
-#define strlen_user(str) strnlen_user(str, 32767)
+static inline long __must_check strlen_user(const char *src)
+{
+	if (!access_ok(VERIFY_READ, src, 1))
+		return 0;
+	return strlen(src) + 1;
+}
 
 /*
  * Zero Userspace
@@ -251,6 +264,8 @@ static inline long strnlen_user(const char *src, long n)
 static inline unsigned long __must_check
 __clear_user(void *to, unsigned long n)
 {
+	if (!access_ok(VERIFY_WRITE, to, n))
+		return n;
 	memset(to, 0, n);
 	return 0;
 }
-- 
cgit v0.10.2


From 8d0d8f2a3a57479b695a59ed2f67236f1488b90d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 4 Jun 2009 19:11:31 +0000
Subject: Blackfin: punt duplicated search_exception_table() prototype

The common code already has a prototype for this function and we don't use
it anywhere in the Blackfin code, so punt it from the Blackfin headers.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 97729be..01f42b0 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h
@@ -83,9 +83,6 @@ struct exception_table_entry {
 	unsigned long insn, fixup;
 };
 
-/* Returns 0 if exception not found and fixup otherwise.  */
-extern unsigned long search_exception_table(unsigned long);
-
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
-- 
cgit v0.10.2


From a43b739f257fd2569e11c6c93fbb86df382848e9 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 4 Jun 2009 19:24:31 +0000
Subject: Blackfin: push access_ok() L1 attribute down

There is no need for the L1 attribute to be on the prototype of the
access_ok() function as all consumers of the function do not care where it
lives -- they'll always use pcrel calls to get to it.  This prevents
pointless recompiles of most of the system when this config option changes.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 01f42b0..8894e9f 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h
@@ -59,12 +59,8 @@ static inline int is_in_rom(unsigned long addr)
 #ifndef CONFIG_ACCESS_CHECK
 static inline int _access_ok(unsigned long addr, unsigned long size) { return 1; }
 #else
-#ifdef CONFIG_ACCESS_OK_L1
-extern int _access_ok(unsigned long addr, unsigned long size)__attribute__((l1_text));
-#else
 extern int _access_ok(unsigned long addr, unsigned long size);
 #endif
-#endif
 
 /*
  * The exception table consists of pairs of addresses: the first is the
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index e040e03..30d0843 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -322,6 +322,9 @@ void finish_atomic_sections (struct pt_regs *regs)
 }
 
 #if defined(CONFIG_ACCESS_CHECK)
+#ifdef CONFIG_ACCESS_OK_L1
+__attribute__((l1_text))
+#endif
 /* Return 1 if access to memory range is OK, 0 otherwise */
 int _access_ok(unsigned long addr, unsigned long size)
 {
-- 
cgit v0.10.2


From 11aca0e7352b6a28193221cd2dd85f758fb71b23 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 4 Jun 2009 17:28:16 +0000
Subject: Blackfin: kgdb: fix up error return values

The Blackfin kgdb code was all passing back positive errno values when it
really should have been using negative errno values.

Reported-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index b163f6d..c5362b2 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -466,7 +466,7 @@ static int validate_memory_access_address(unsigned long addr, int size)
 	int cpu = raw_smp_processor_id();
 
 	if (size < 0)
-		return EFAULT;
+		return -EFAULT;
 	if (addr >= 0x1000 && (addr + size) <= physical_mem_end)
 		return 0;
 	if (addr >= SYSMMR_BASE)
@@ -498,7 +498,7 @@ static int validate_memory_access_address(unsigned long addr, int size)
 	if (IN_MEM(addr, size, L2_START, L2_LENGTH))
 		return 0;
 
-	return EFAULT;
+	return -EFAULT;
 }
 
 /*
@@ -508,14 +508,15 @@ static int validate_memory_access_address(unsigned long addr, int size)
 int kgdb_mem2hex(char *mem, char *buf, int count)
 {
 	char *tmp;
-	int err = 0;
+	int err;
 	unsigned char *pch;
 	unsigned short mmr16;
 	unsigned long mmr32;
 	int cpu = raw_smp_processor_id();
 
-	if (validate_memory_access_address((unsigned long)mem, count))
-		return EFAULT;
+	err = validate_memory_access_address((unsigned long)mem, count);
+	if (err)
+		return err;
 
 	/*
 	 * We use the upper half of buf as an intermediate buffer for the
@@ -533,7 +534,7 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
 				*tmp++ = *pch++;
 				tmp -= 2;
 			} else
-				err = EFAULT;
+				err = -EFAULT;
 			break;
 		case 4:
 			if ((unsigned int)mem % 4 == 0) {
@@ -545,10 +546,10 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
 				*tmp++ = *pch++;
 				tmp -= 4;
 			} else
-				err = EFAULT;
+				err = -EFAULT;
 			break;
 		default:
-			err = EFAULT;
+			err = -EFAULT;
 		}
 	} else if ((cpu == 0 && IN_MEM(mem, count, L1_CODE_START, L1_CODE_LENGTH))
 #ifdef CONFIG_SMP
@@ -557,7 +558,7 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
 		) {
 		/* access L1 instruction SRAM*/
 		if (dma_memcpy(tmp, mem, count) == NULL)
-			err = EFAULT;
+			err = -EFAULT;
 	} else
 		err = probe_kernel_read(tmp, mem, count);
 
@@ -585,7 +586,7 @@ int kgdb_ebin2mem(char *buf, char *mem, int count)
 	char *tmp_new;
 	unsigned short *mmr16;
 	unsigned long *mmr32;
-	int err = 0;
+	int err;
 	int size = 0;
 	int cpu = raw_smp_processor_id();
 
@@ -601,8 +602,9 @@ int kgdb_ebin2mem(char *buf, char *mem, int count)
 		size++;
 	}
 
-	if (validate_memory_access_address((unsigned long)mem, size))
-		return EFAULT;
+	err = validate_memory_access_address((unsigned long)mem, size);
+	if (err)
+		return err;
 
 	if ((unsigned int)mem >= SYSMMR_BASE) { /*access MMR registers*/
 		switch (size) {
@@ -611,17 +613,17 @@ int kgdb_ebin2mem(char *buf, char *mem, int count)
 				mmr16 = (unsigned short *)buf;
 				*(unsigned short *)mem = *mmr16;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		case 4:
 			if ((unsigned int)mem % 4 == 0) {
 				mmr32 = (unsigned long *)buf;
 				*(unsigned long *)mem = *mmr32;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		default:
-			return EFAULT;
+			err = -EFAULT;
 		}
 	} else if ((cpu == 0 && IN_MEM(mem, count, L1_CODE_START, L1_CODE_LENGTH))
 #ifdef CONFIG_SMP
@@ -630,7 +632,7 @@ int kgdb_ebin2mem(char *buf, char *mem, int count)
 		) {
 		/* access L1 instruction SRAM */
 		if (dma_memcpy(mem, buf, size) == NULL)
-			err = EFAULT;
+			err = -EFAULT;
 	} else
 		err = probe_kernel_write(mem, buf, size);
 
@@ -648,10 +650,12 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
 	char *tmp_hex;
 	unsigned short *mmr16;
 	unsigned long *mmr32;
+	int err;
 	int cpu = raw_smp_processor_id();
 
-	if (validate_memory_access_address((unsigned long)mem, count))
-		return EFAULT;
+	err = validate_memory_access_address((unsigned long)mem, count);
+	if (err)
+		return err;
 
 	/*
 	 * We use the upper half of buf as an intermediate buffer for the
@@ -673,17 +677,17 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
 				mmr16 = (unsigned short *)tmp_raw;
 				*(unsigned short *)mem = *mmr16;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		case 4:
 			if ((unsigned int)mem % 4 == 0) {
 				mmr32 = (unsigned long *)tmp_raw;
 				*(unsigned long *)mem = *mmr32;
 			} else
-				return EFAULT;
+				err = -EFAULT;
 			break;
 		default:
-			return EFAULT;
+			err = -EFAULT;
 		}
 	} else if ((cpu == 0 && IN_MEM(mem, count, L1_CODE_START, L1_CODE_LENGTH))
 #ifdef CONFIG_SMP
@@ -692,10 +696,11 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
 		) {
 		/* access L1 instruction SRAM */
 		if (dma_memcpy(mem, tmp_raw, count) == NULL)
-			return EFAULT;
+			err = -EFAULT;
 	} else
-		return probe_kernel_write(mem, tmp_raw, count);
-	return 0;
+		err = probe_kernel_write(mem, tmp_raw, count);
+
+	return err;
 }
 
 int kgdb_validate_break_address(unsigned long addr)
@@ -715,7 +720,7 @@ int kgdb_validate_break_address(unsigned long addr)
 	if (IN_MEM(addr, BREAK_INSTR_SIZE, L2_START, L2_LENGTH))
 		return 0;
 
-	return EFAULT;
+	return -EFAULT;
 }
 
 int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
-- 
cgit v0.10.2


From 0a990614264f04879d9608da1cfcf9a6596efe08 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 4 Jun 2009 19:57:24 -0400
Subject: Blackfin: fix length checking in kgdb_ebin2mem

The kgdb_ebin2mem() was decrementing the count variable to do parsing, but
then later still tries to use it based on its original meaning.  So leave
it untouched and use a different variable to walk the memory.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index c5362b2..da28f79 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -587,19 +587,18 @@ int kgdb_ebin2mem(char *buf, char *mem, int count)
 	unsigned short *mmr16;
 	unsigned long *mmr32;
 	int err;
-	int size = 0;
+	int size;
 	int cpu = raw_smp_processor_id();
 
 	tmp_old = tmp_new = buf;
 
-	while (count-- > 0) {
+	for (size = 0; size < count; ++size) {
 		if (*tmp_old == 0x7d)
 			*tmp_new = *(++tmp_old) ^ 0x20;
 		else
 			*tmp_new = *tmp_old;
 		tmp_new++;
 		tmp_old++;
-		size++;
 	}
 
 	err = validate_memory_access_address((unsigned long)mem, size);
-- 
cgit v0.10.2


From 81f7f45606812f1d15d618c2646d0f33ca111f87 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Wed, 3 Jun 2009 18:58:26 +0000
Subject: Blackfin: export the last exception cause via debugfs

We have some test code that runs in userspace that exercises the exception
handling of the Blackfin pretty thoroughly.  Part of the validation process
is checking the exact exception triggered, so export the last one seen to
userspace via debugfs when debugging is enabled for the test code to check.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 35f675f..63ccb28 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -68,6 +68,13 @@
 	({ if (0) printk(fmt, ##arg); 0; })
 #endif
 
+#if defined(CONFIG_DEBUG_MMRS) || defined(CONFIG_DEBUG_MMRS_MODULE)
+u32 last_seqstat;
+#ifdef CONFIG_DEBUG_MMRS_MODULE
+EXPORT_SYMBOL(last_seqstat);
+#endif
+#endif
+
 /* Initiate the event table handler */
 void __init trap_init(void)
 {
@@ -245,6 +252,9 @@ asmlinkage void trap_c(struct pt_regs *fp)
 	unsigned long trapnr = fp->seqstat & SEQSTAT_EXCAUSE;
 
 	trace_buffer_save(j);
+#if defined(CONFIG_DEBUG_MMRS) || defined(CONFIG_DEBUG_MMRS_MODULE)
+	last_seqstat = (u32)fp->seqstat;
+#endif
 
 	/* Important - be very careful dereferncing pointers - will lead to
 	 * double faults if the stack has become corrupt
-- 
cgit v0.10.2


From ce0bf52dd3f9d980889190bc6097f1756b71eb8c Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 4 Jun 2009 23:51:27 -0400
Subject: Blackfin: fix unused warnings after nommu update

The massive nommu update (8feae131) left the local variable "vml" unused,
so punt it.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 63ccb28..aa76dfb 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -86,7 +86,6 @@ void __init trap_init(void)
 static void decode_address(char *buf, unsigned long address)
 {
 #ifdef CONFIG_DEBUG_VERBOSE
-	struct vm_list_struct *vml;
 	struct task_struct *p;
 	struct mm_struct *mm;
 	unsigned long flags, offset;
-- 
cgit v0.10.2


From bf664c0a3a42683b78d74aca2d7cfb6ccc2aa2c3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 5 Jun 2009 03:13:11 -0400
Subject: Blackfin: fix sparseirq/kstat_irqs fallout

The sparseirq changes (d7e51e66) played poorly with the Blackfin irqchip
implementation as we're still using the old hardirq method.  Our bad irq
structure had a NULL kstat_irqs field so when all the common code tries
to increment this field, everything goes big bada boom.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c
index 401bd32..6e31e93 100644
--- a/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@ -59,12 +59,14 @@ static struct irq_chip bad_chip = {
 	.unmask = dummy_mask_unmask_irq,
 };
 
+static int bad_stats;
 static struct irq_desc bad_irq_desc = {
 	.status = IRQ_DISABLED,
 	.chip = &bad_chip,
 	.handle_irq = handle_bad_irq,
 	.depth = 1,
 	.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+	.kstat_irqs = &bad_stats,
 #ifdef CONFIG_SMP
 	.affinity = CPU_MASK_ALL
 #endif
-- 
cgit v0.10.2


From 28be225b23b115573e0ecc8ef9996f42a1652f74 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 12 Jun 2009 11:33:02 +0300
Subject: irq: slab alloc for default irq_affinity

Ingo had

[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at mm/bootmem.c:537 alloc_arch_preferred_bootmem+0x2b/0x71()
[    0.000000] Hardware name: System Product Name
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Tainted: G        W  2.6.30-tip-03087-g0bb2618-dirty #52506
[    0.000000] Call Trace:
[    0.000000]  [<81032588>] warn_slowpath_common+0x60/0x90
[    0.000000]  [<810325c5>] warn_slowpath_null+0xd/0x10
[    0.000000]  [<819d1bc0>] alloc_arch_preferred_bootmem+0x2b/0x71
[    0.000000]  [<819d1c31>] ___alloc_bootmem_nopanic+0x2b/0x9a
[    0.000000]  [<81050a0a>] ? lock_release+0xac/0xb2
[    0.000000]  [<819d1d4c>] ___alloc_bootmem+0xe/0x2d
[    0.000000]  [<819d1e9f>] __alloc_bootmem+0xa/0xc
[    0.000000]  [<819d7c63>] alloc_bootmem_cpumask_var+0x21/0x26
[    0.000000]  [<819d0cc8>] early_irq_init+0x15/0x10d
[    0.000000]  [<819bb75a>] start_kernel+0x167/0x326
[    0.000000]  [<819bb06b>] __init_begin+0x6b/0x70
[    0.000000] ---[ end trace 4eaa2a86a8e2da23 ]---
[    0.000000] NR_IRQS:2304 nr_irqs:424
[    0.000000] CPU 0 irqstacks, hard=821e6000 soft=821e7000

we need to update init_irq_default_affinity

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 1045785..065205b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -45,7 +45,7 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 static void __init init_irq_default_affinity(void)
 {
-	alloc_bootmem_cpumask_var(&irq_default_affinity);
+	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
 	cpumask_setall(irq_default_affinity);
 }
 #else
-- 
cgit v0.10.2


From 55cd63676e0c5710fbe1ea86dfd9f8ea9aaa90f2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 12 Jun 2009 11:36:52 +0300
Subject: x86: make zap_low_mapping could be used early

Only one cpu is there, just call __flush_tlb for it. Fixes the following boot
warning on x86:

  [    0.000000] Memory: 885032k/915540k available (5993k kernel code, 29844k reserved, 3842k data, 428k init, 0k highmem)
  [    0.000000] virtual kernel memory layout:
  [    0.000000]     fixmap  : 0xffe17000 - 0xfffff000   (1952 kB)
  [    0.000000]     vmalloc : 0xf8615000 - 0xffe15000   ( 120 MB)
  [    0.000000]     lowmem  : 0xc0000000 - 0xf7e15000   ( 894 MB)
  [    0.000000]       .init : 0xc19a5000 - 0xc1a10000   ( 428 kB)
  [    0.000000]       .data : 0xc15da4bb - 0xc199af6c   (3842 kB)
  [    0.000000]       .text : 0xc1000000 - 0xc15da4bb   (5993 kB)
  [    0.000000] Checking if this processor honours the WP bit even in supervisor mode...Ok.
  [    0.000000] ------------[ cut here ]------------
  [    0.000000] WARNING: at kernel/smp.c:369 smp_call_function_many+0x50/0x1b0()
  [    0.000000] Hardware name: System Product Name
  [    0.000000] Modules linked in:
  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip #52504
  [    0.000000] Call Trace:
  [    0.000000]  [<c104aa16>] warn_slowpath_common+0x65/0x95
  [    0.000000]  [<c104aa58>] warn_slowpath_null+0x12/0x15
  [    0.000000]  [<c1073bbe>] smp_call_function_many+0x50/0x1b0
  [    0.000000]  [<c1037615>] ? do_flush_tlb_all+0x0/0x41
  [    0.000000]  [<c1037615>] ? do_flush_tlb_all+0x0/0x41
  [    0.000000]  [<c1073d4f>] smp_call_function+0x31/0x58
  [    0.000000]  [<c1037615>] ? do_flush_tlb_all+0x0/0x41
  [    0.000000]  [<c104f635>] on_each_cpu+0x26/0x65
  [    0.000000]  [<c10374b5>] flush_tlb_all+0x19/0x1b
  [    0.000000]  [<c1032ab3>] zap_low_mappings+0x4d/0x56
  [    0.000000]  [<c15d64b5>] ? printk+0x14/0x17
  [    0.000000]  [<c19b42a8>] mem_init+0x23d/0x245
  [    0.000000]  [<c19a56a1>] start_kernel+0x17a/0x2d5
  [    0.000000]  [<c19a5347>] ? unknown_bootoption+0x0/0x19a
  [    0.000000]  [<c19a5039>] __init_begin+0x39/0x41
  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index a5ecc9c..7f3eba0 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -172,6 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 	flush_tlb_all();
 }
 
-extern void zap_low_mappings(void);
+extern void zap_low_mappings(bool early);
 
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7c80007..2fecda6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -873,7 +873,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 
 	err = do_boot_cpu(apicid, cpu);
 
-	zap_low_mappings();
+	zap_low_mappings(false);
 	low_mappings = 0;
 #else
 	err = do_boot_cpu(apicid, cpu);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 949708d..9ff3c08 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -564,7 +564,7 @@ static inline void save_pg_dir(void)
 }
 #endif /* !CONFIG_ACPI_SLEEP */
 
-void zap_low_mappings(void)
+void zap_low_mappings(bool early)
 {
 	int i;
 
@@ -581,7 +581,11 @@ void zap_low_mappings(void)
 		set_pgd(swapper_pg_dir+i, __pgd(0));
 #endif
 	}
-	flush_tlb_all();
+
+	if (early)
+		__flush_tlb();
+	else
+		flush_tlb_all();
 }
 
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
@@ -956,7 +960,7 @@ void __init mem_init(void)
 		test_wp_bit();
 
 	save_pg_dir();
-	zap_low_mappings();
+	zap_low_mappings(true);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-- 
cgit v0.10.2


From faafec1e61e61d350248af2a7e5f047606adab6e Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Fri, 12 Jun 2009 11:17:06 +0800
Subject: perf_counter tools: Remove one L1-data alias

Otherwise all L1-instruction aliases will be recognized as
L1-data by strcasestr() when calling function parse_aliases.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090612031706.GA22126@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9d5f1ca..5a72586 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -75,7 +75,7 @@ static char *sw_event_names[] = {
 #define MAX_ALIASES 8
 
 static char *hw_cache [][MAX_ALIASES] = {
-	{ "L1-data"		, "l1-d", "l1d", "l1"				},
+	{ "L1-data"		, "l1-d", "l1d"					},
 	{ "L1-instruction"	, "l1-i", "l1i"					},
 	{ "L2"			, "l2"						},
 	{ "Data-TLB"		, "dtlb", "d-tlb"				},
-- 
cgit v0.10.2


From dff5da6d09daaab40a8741dce0ed3c2e94079de2 Mon Sep 17 00:00:00 2001
From: Yong Wang <yong.y.wang@linux.intel.com>
Date: Fri, 12 Jun 2009 16:08:55 +0800
Subject: perf_counter/x86: Add a quirk for Atom processors

The fixed-function performance counters do not work on current Atom
processors. Use the general-purpose ones instead.

Signed-off-by: Yong Wang <yong.y.wang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090612080855.GA2286@ywang-moblin2.bj.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 895c82e..275bc14 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -968,6 +968,13 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 	if (!x86_pmu.num_counters_fixed)
 		return -1;
 
+	/*
+	 * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
+	 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+					boot_cpu_data.x86_model == 28)
+		return -1;
+
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
 	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
-- 
cgit v0.10.2


From c3bb4d24ab4b74e11992ccb9828569583166a87d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 12 Jun 2009 09:35:22 +0100
Subject: kmemleak: Add more info to the MAINTAINERS entry

The patch adds the "F:" fields to the KMEMLEAK MAINTAINERS entry and
also moves it before KMEMTRACE to preserve the alphabetical order.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index c944d61..f7d64fa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3363,6 +3363,16 @@ F:	drivers/serial/kgdboc.c
 F:	include/linux/kgdb.h
 F:	kernel/kgdb.c
 
+KMEMLEAK
+P:	Catalin Marinas
+M:	catalin.marinas@arm.com
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	Documentation/kmemleak.txt
+F:	include/linux/kmemleak.h
+F:	mm/kmemleak.c
+F:	mm/kmemleak-test.c
+
 KMEMTRACE
 P:	Eduard - Gabriel Munteanu
 M:	eduard.munteanu@linux360.ro
@@ -3372,12 +3382,6 @@ F:	Documentation/trace/kmemtrace.txt
 F:	include/trace/kmemtrace.h
 F:	kernel/trace/kmemtrace.c
 
-KMEMLEAK
-P:	Catalin Marinas
-M:	catalin.marinas@arm.com
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-
 KPROBES
 P:	Ananth N Mavinakayanahalli
 M:	ananth@in.ibm.com
-- 
cgit v0.10.2


From ab8e2eb722f1e5fcbd8181e3e9ef4e95c52124df Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:50 -0600
Subject: cyber2000fb.c: use proper method for stopping unload if
 CONFIG_ARCH_SHARK

Russell explains the __module_get():
> cyber2000fb.c does it in its module initialization function
> to prevent the module (when built for Shark) from being unloaded.  It
> does this because it's from the days of 2.2 kernels and no one bothered
> writing the module unload support for Shark.

Since 2.4, the correct answer has been to not define an unload fn.

Cc: Russell King <rmk+lkml@arm.linux.org.uk>
Cc: alex@shark-linux.de
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 83c5cef..da7c01b 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -1736,10 +1736,8 @@ static int __init cyber2000fb_init(void)
 
 #ifdef CONFIG_ARCH_SHARK
 	err = cyberpro_vl_probe();
-	if (!err) {
+	if (!err)
 		ret = 0;
-		__module_get(THIS_MODULE);
-	}
 #endif
 #ifdef CONFIG_PCI
 	err = pci_register_driver(&cyberpro_driver);
@@ -1749,14 +1747,15 @@ static int __init cyber2000fb_init(void)
 
 	return ret ? err : 0;
 }
+module_init(cyber2000fb_init);
 
+#ifndef CONFIG_ARCH_SHARK
 static void __exit cyberpro_exit(void)
 {
 	pci_unregister_driver(&cyberpro_driver);
 }
-
-module_init(cyber2000fb_init);
 module_exit(cyberpro_exit);
+#endif
 
 MODULE_AUTHOR("Russell King");
 MODULE_DESCRIPTION("CyberPro 2000, 2010 and 5000 framebuffer driver");
-- 
cgit v0.10.2


From 9a71af2c3627b379b7c31917a7f6ee0d29bc559b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:53 -0600
Subject: module_param: invbool should take a 'bool', not an 'int'

It takes an 'int' for historical reasons, and there are only two
users: simply switch it over to bool.

The other user (uvesafb.c) will get a (harmless-on-x86) warning until
the next patch is applied.

Cc: Brad Douglas <brad@neruo.com>
Cc: Michal Januszewski <spock@gentoo.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 35e8eb0..e4e4d43 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -354,7 +354,7 @@ static int default_crt_on __devinitdata = 0;
 static int default_lcd_on __devinitdata = 1;
 
 #ifdef CONFIG_MTRR
-static int mtrr = 1;
+static bool mtrr = true;
 #endif
 
 #ifdef CONFIG_PMAC_BACKLIGHT
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index a4f0b93..9bbca8e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -192,7 +192,7 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp);
 
 extern int param_set_invbool(const char *val, struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, struct kernel_param *kp);
-#define param_check_invbool(name, p) __param_check(name, p, int)
+#define param_check_invbool(name, p) __param_check(name, p, bool)
 
 /* Comma-separated array: *nump is set to number they actually specified. */
 #define module_param_array_named(name, array, type, nump, perm)		\
diff --git a/kernel/params.c b/kernel/params.c
index de273ec..023abbf 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -272,13 +272,13 @@ int param_set_invbool(const char *val, struct kernel_param *kp)
 	dummy.arg = &boolval;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
-		*(int *)kp->arg = !boolval;
+		*(bool *)kp->arg = !boolval;
 	return ret;
 }
 
 int param_get_invbool(char *buffer, struct kernel_param *kp)
 {
-	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
+	return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
 }
 
 /* We break the rule and mangle the string. */
-- 
cgit v0.10.2


From 45fcc70c0b6ee0c508e1fdb5fef735c3546803f4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:56 -0600
Subject: module_param: split perm field into flags and perm

Impact: cleanup

Rather than hack KPARAM_KMALLOCED into the perm field, separate it out.
Since the perm field was 32 bits and only needs 16, we don't add bloat.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 9bbca8e..009a5f7 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -36,9 +36,13 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp);
 /* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
 typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
+/* Flag bits for kernel_param.flags */
+#define KPARAM_KMALLOCED	1
+
 struct kernel_param {
 	const char *name;
-	unsigned int perm;
+	u16 perm;
+	u16 flags;
 	param_set_fn set;
 	param_get_fn get;
 	union {
@@ -88,7 +92,7 @@ struct kparam_array
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-	= { __param_str_##name, perm, set, get, { arg } }
+	= { __param_str_##name, perm, 0, set, get, { arg } }
 
 #define module_param_call(name, set, get, arg, perm)			      \
 	__module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm)
diff --git a/kernel/params.c b/kernel/params.c
index 023abbf..b4660dc 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,9 +24,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */
-#define KPARAM_KMALLOCED	0x80000000
-
 #if 0
 #define DEBUGP printk
 #else
@@ -220,13 +217,13 @@ int param_set_charp(const char *val, struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
-	if (kp->perm & KPARAM_KMALLOCED)
+	if (kp->flags & KPARAM_KMALLOCED)
 		kfree(*(char **)kp->arg);
 
 	/* This is a hack.  We can't need to strdup in early boot, and we
 	 * don't need to; this mangled commandline is preserved. */
 	if (slab_is_available()) {
-		kp->perm |= KPARAM_KMALLOCED;
+		kp->flags |= KPARAM_KMALLOCED;
 		*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
 		if (!kp->arg)
 			return -ENOMEM;
@@ -591,7 +588,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		if (params[i].perm & KPARAM_KMALLOCED)
+		if (params[i].flags & KPARAM_KMALLOCED)
 			kfree(*(char **)params[i].arg);
 }
 
-- 
cgit v0.10.2


From d2c123c27db841c6c11a63de9c144823d2b1ba76 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:56 -0600
Subject: module_param: add __same_type convenience wrapper for
 __builtin_types_compatible_p

Impact: new API

__builtin_types_compatible_p() is a little awkward to use: it takes two
types rather than types or variables, and it's just damn long.

(typeof(type) == type, so this works on types as well as vars).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 37bcb50..04fb513 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -261,6 +261,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
+/* Are two types/vars the same type (ignoring qualifiers)? */
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v0.10.2


From fddd520122953550ec2c8b60e7ca0d0f0d115d97 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:57 -0600
Subject: module_param: allow 'bool' module_params to be bool, not just int.

Impact: API cleanup

For historical reasons, 'bool' parameters must be an int, not a bool.
But there are around 600 users, so a conversion seems like useless churn.

So we use __same_type() to distinguish, and handle both cases.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 009a5f7..6547c3c 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -38,6 +38,7 @@ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
 /* Flag bits for kernel_param.flags */
 #define KPARAM_KMALLOCED	1
+#define KPARAM_ISBOOL		2
 
 struct kernel_param {
 	const char *name;
@@ -83,7 +84,7 @@ struct kparam_array
    parameters.  perm sets the visibility in sysfs: 000 means it's
    not there, read bits mean it's readable, write bits mean it's
    writable. */
-#define __module_param_call(prefix, name, set, get, arg, perm)		\
+#define __module_param_call(prefix, name, set, get, arg, isbool, perm)	\
 	/* Default value instead of permissions? */			\
 	static int __param_perm_check_##name __attribute__((unused)) =	\
 	BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2))	\
@@ -92,10 +93,13 @@ struct kparam_array
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-	= { __param_str_##name, perm, 0, set, get, { arg } }
+	= { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0,	\
+	    set, get, { arg } }
 
 #define module_param_call(name, set, get, arg, perm)			      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm)
+	__module_param_call(MODULE_PARAM_PREFIX,			      \
+			    name, set, get, arg,			      \
+			    __same_type(*(arg), bool), perm)
 
 /* Helper functions: type is byte, short, ushort, int, uint, long,
    ulong, charp, bool or invbool, or XXX if you define param_get_XXX,
@@ -124,15 +128,16 @@ struct kparam_array
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
 	__module_param_call("", name, param_set_##type, param_get_##type, \
-			    &var, perm)
+			    &var, __same_type(var, bool), perm)
 #endif /* !MODULE */
 
 /* Actually copy string: maxlen param is usually sizeof(string). */
 #define module_param_string(name, string, len, perm)			\
 	static const struct kparam_string __param_string_##name		\
 		= { len, string };					\
-	module_param_call(name, param_set_copystring, param_get_string,	\
-			  .str = &__param_string_##name, perm);		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    param_set_copystring, param_get_string,	\
+			    .str = &__param_string_##name, 0, perm);	\
 	__MODULE_PARM_TYPE(name, "string")
 
 /* Called on module insert or kernel boot */
@@ -190,9 +195,16 @@ extern int param_set_charp(const char *val, struct kernel_param *kp);
 extern int param_get_charp(char *buffer, struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
+/* For historical reasons "bool" parameters can be (unsigned) "int". */
 extern int param_set_bool(const char *val, struct kernel_param *kp);
 extern int param_get_bool(char *buffer, struct kernel_param *kp);
-#define param_check_bool(name, p) __param_check(name, p, int)
+#define param_check_bool(name, p)					\
+	static inline void __check_##name(void)				\
+	{								\
+		BUILD_BUG_ON(!__same_type(*(p), bool) &&		\
+			     !__same_type(*(p), unsigned int) &&	\
+			     !__same_type(*(p), int));			\
+	}
 
 extern int param_set_invbool(const char *val, struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, struct kernel_param *kp);
@@ -203,8 +215,10 @@ extern int param_get_invbool(char *buffer, struct kernel_param *kp);
 	static const struct kparam_array __param_arr_##name		\
 	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\
 	    sizeof(array[0]), array };					\
-	module_param_call(name, param_array_set, param_array_get, 	\
-			  .arr = &__param_arr_##name, perm);		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    param_array_set, param_array_get,		\
+			    .arr = &__param_arr_##name,			\
+			    __same_type(array[0], bool), perm);		\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 #define module_param_array(name, type, nump, perm)		\
diff --git a/kernel/params.c b/kernel/params.c
index b4660dc..7f6912c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -238,35 +238,54 @@ int param_get_charp(char *buffer, struct kernel_param *kp)
 	return sprintf(buffer, "%s", *((char **)kp->arg));
 }
 
+/* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, struct kernel_param *kp)
 {
+	bool v;
+
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
 	/* One of =[yYnN01] */
 	switch (val[0]) {
 	case 'y': case 'Y': case '1':
-		*(int *)kp->arg = 1;
-		return 0;
+		v = true;
+		break;
 	case 'n': case 'N': case '0':
-		*(int *)kp->arg = 0;
-		return 0;
+		v = false;
+		break;
+	default:
+		return -EINVAL;
 	}
-	return -EINVAL;
+
+	if (kp->flags & KPARAM_ISBOOL)
+		*(bool *)kp->arg = v;
+	else
+		*(int *)kp->arg = v;
+	return 0;
 }
 
 int param_get_bool(char *buffer, struct kernel_param *kp)
 {
+	bool val;
+	if (kp->flags & KPARAM_ISBOOL)
+		val = *(bool *)kp->arg;
+	else
+		val = *(int *)kp->arg;
+
 	/* Y and N chosen as being relatively non-coder friendly */
-	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
+	return sprintf(buffer, "%c", val ? 'Y' : 'N');
 }
 
+/* This one must be bool. */
 int param_set_invbool(const char *val, struct kernel_param *kp)
 {
-	int boolval, ret;
+	int ret;
+	bool boolval;
 	struct kernel_param dummy;
 
 	dummy.arg = &boolval;
+	dummy.flags = KPARAM_ISBOOL;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
 		*(bool *)kp->arg = !boolval;
-- 
cgit v0.10.2


From 2ead9439f0c6ed03faafe27abe8bc1dd256d117b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:58 -0600
Subject: uvesafb: improve parameter handling.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1) Now module_param(..., invbool, ...) requires a bool, and similarly
   module_param(..., bool, ...) allows it, change pmi_setpal to a bool.
2) #define param_get_scroll to NULL, since it can never be called (perm
   argument to module_param_named is 0).
3) Return -EINVAL from param_set_scroll if the value is bad, so it's
   reported.

Note that I don't think the old fb_get_options() is required for new
drivers: the parameters automatically work as uvesafb.XXX=... anyway.

Acked-by: Michał Januszewski <spock@gentoo.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 421770b..ca5b464 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -45,7 +45,7 @@ static struct fb_fix_screeninfo uvesafb_fix __devinitdata = {
 static int mtrr		__devinitdata = 3; /* enable mtrr by default */
 static int blank	= 1;		   /* enable blanking by default */
 static int ypan		= 1; 		 /* 0: scroll, 1: ypan, 2: ywrap */
-static int pmi_setpal	__devinitdata = 1; /* use PMI for palette changes */
+static bool pmi_setpal	__devinitdata = true; /* use PMI for palette changes */
 static int nocrtc	__devinitdata; /* ignore CRTC settings */
 static int noedid	__devinitdata; /* don't try DDC transfers */
 static int vram_remap	__devinitdata; /* set amt. of memory to be used */
@@ -2002,11 +2002,7 @@ static void __devexit uvesafb_exit(void)
 
 module_exit(uvesafb_exit);
 
-static int param_get_scroll(char *buffer, struct kernel_param *kp)
-{
-	return 0;
-}
-
+#define param_get_scroll NULL
 static int param_set_scroll(const char *val, struct kernel_param *kp)
 {
 	ypan = 0;
@@ -2017,6 +2013,8 @@ static int param_set_scroll(const char *val, struct kernel_param *kp)
 		ypan = 1;
 	else if (!strcmp(val, "ywrap"))
 		ypan = 2;
+	else
+		return -EINVAL;
 
 	return 0;
 }
-- 
cgit v0.10.2


From 2d5bf28fb9e3c178db4f5536e2fe38d3a5ed7f40 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 3 Jun 2009 21:46:09 -0400
Subject: x86 module: merge the same functions in module_32.c and module_64.c

Merge the same functions both in module_32.c and module_64.c into
module.c.

This is the first step to merge both of them finally.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4f78bd6..1cd55c7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_MODULES)		+= module_$(BITS).o
+obj-$(CONFIG_MODULES)		+= module.o module_$(BITS).o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
new file mode 100644
index 0000000..1ea42d0
--- /dev/null
+++ b/arch/x86/kernel/module.c
@@ -0,0 +1,98 @@
+/*  Kernel module help for x86.
+    Copyright (C) 2001 Rusty Russell.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+	   table entries. */
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+		*para = NULL;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		if (!strcmp(".text", secstrings + s->sh_name))
+			text = s;
+		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+			alt = s;
+		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+			locks = s;
+		if (!strcmp(".parainstructions", secstrings + s->sh_name))
+			para = s;
+	}
+
+	if (alt) {
+		/* patch .altinstructions */
+		void *aseg = (void *)alt->sh_addr;
+		apply_alternatives(aseg, aseg + alt->sh_size);
+	}
+	if (locks && text) {
+		void *lseg = (void *)locks->sh_addr;
+		void *tseg = (void *)text->sh_addr;
+		alternatives_smp_module_add(me, me->name,
+					    lseg, lseg + locks->sh_size,
+					    tseg, tseg + text->sh_size);
+	}
+
+	if (para) {
+		void *pseg = (void *)para->sh_addr;
+		apply_paravirt(pseg, pseg + para->sh_size);
+	}
+
+	return module_bug_finalize(hdr, sechdrs, me);
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+	alternatives_smp_module_del(mod);
+	module_bug_cleanup(mod);
+}
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 0edd819..6145e68 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -37,23 +37,6 @@ void *module_alloc(unsigned long size)
 }
 
 
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
-}
-
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
-			      Elf_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *mod)
-{
-	return 0;
-}
-
 int apply_relocate(Elf32_Shdr *sechdrs,
 		   const char *strtab,
 		   unsigned int symindex,
@@ -105,48 +88,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 	return -ENOEXEC;
 }
 
-int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *me)
-{
-	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
-		*para = NULL;
-	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
-	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
-		if (!strcmp(".text", secstrings + s->sh_name))
-			text = s;
-		if (!strcmp(".altinstructions", secstrings + s->sh_name))
-			alt = s;
-		if (!strcmp(".smp_locks", secstrings + s->sh_name))
-			locks = s;
-		if (!strcmp(".parainstructions", secstrings + s->sh_name))
-			para = s;
-	}
-
-	if (alt) {
-		/* patch .altinstructions */
-		void *aseg = (void *)alt->sh_addr;
-		apply_alternatives(aseg, aseg + alt->sh_size);
-	}
-	if (locks && text) {
-		void *lseg = (void *)locks->sh_addr;
-		void *tseg = (void *)text->sh_addr;
-		alternatives_smp_module_add(me, me->name,
-					    lseg, lseg + locks->sh_size,
-					    tseg, tseg + text->sh_size);
-	}
-
-	if (para) {
-		void *pseg = (void *)para->sh_addr;
-		apply_paravirt(pseg, pseg + para->sh_size);
-	}
-
-	return module_bug_finalize(hdr, sechdrs, me);
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-	alternatives_smp_module_del(mod);
-	module_bug_cleanup(mod);
-}
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index c23880b..68ec7d8 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -33,13 +33,6 @@
 #define DEBUGP(fmt...)
 
 #ifndef CONFIG_UML
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
-}
-
 void *module_alloc(unsigned long size)
 {
 	struct vm_struct *area;
@@ -58,15 +51,6 @@ void *module_alloc(unsigned long size)
 }
 #endif
 
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
-			      Elf_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *mod)
-{
-	return 0;
-}
-
 int apply_relocate_add(Elf64_Shdr *sechdrs,
 		   const char *strtab,
 		   unsigned int symindex,
@@ -147,48 +131,3 @@ int apply_relocate(Elf_Shdr *sechdrs,
 	return -ENOSYS;
 }
 
-int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *me)
-{
-	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
-		*para = NULL;
-	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
-	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
-		if (!strcmp(".text", secstrings + s->sh_name))
-			text = s;
-		if (!strcmp(".altinstructions", secstrings + s->sh_name))
-			alt = s;
-		if (!strcmp(".smp_locks", secstrings + s->sh_name))
-			locks = s;
-		if (!strcmp(".parainstructions", secstrings + s->sh_name))
-			para = s;
-	}
-
-	if (alt) {
-		/* patch .altinstructions */
-		void *aseg = (void *)alt->sh_addr;
-		apply_alternatives(aseg, aseg + alt->sh_size);
-	}
-	if (locks && text) {
-		void *lseg = (void *)locks->sh_addr;
-		void *tseg = (void *)text->sh_addr;
-		alternatives_smp_module_add(me, me->name,
-					    lseg, lseg + locks->sh_size,
-					    tseg, tseg + text->sh_size);
-	}
-
-	if (para) {
-		void *pseg = (void *)para->sh_addr;
-		apply_paravirt(pseg, pseg + para->sh_size);
-	}
-
-	return module_bug_finalize(hdr, sechdrs, me);
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-	alternatives_smp_module_del(mod);
-	module_bug_cleanup(mod);
-}
-- 
cgit v0.10.2


From 0fdc83b950df9e2eb45db6fca9c3d92c66fd5028 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 3 Jun 2009 21:46:19 -0400
Subject: x86 module: merge the rest functions with macros

Merge the rest functions together, with proper preprocessing directives.
Finally remove module_{32|64}.c.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1cd55c7..f3477bb 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_MODULES)		+= module.o module_$(BITS).o
+obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 1ea42d0..b92fbcf 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -34,6 +34,32 @@
 #define DEBUGP(fmt...)
 #endif
 
+#if defined(CONFIG_UML) || defined(CONFIG_X86_32)
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc_exec(size);
+}
+#else /*X86_64*/
+void *module_alloc(unsigned long size)
+{
+	struct vm_struct *area;
+
+	if (!size)
+		return NULL;
+	size = PAGE_ALIGN(size);
+	if (size > MODULES_LEN)
+		return NULL;
+
+	area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END);
+	if (!area)
+		return NULL;
+
+	return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
+}
+#endif
+
 /* Free memory returned from module_alloc */
 void module_free(struct module *mod, void *module_region)
 {
@@ -51,6 +77,140 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_386_32:
+			/* We add the value into the location given */
+			*location += sym->st_value;
+			break;
+		case R_386_PC32:
+			/* Add the value, subtract its postition */
+			*location += sym->st_value - (uint32_t)location;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
+	       me->name);
+	return -ENOEXEC;
+}
+#else /*X86_64*/
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf64_Sym *sym;
+	void *loc;
+	u64 val;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+			+ ELF64_R_SYM(rel[i].r_info);
+
+		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
+			(int)ELF64_R_TYPE(rel[i].r_info),
+			sym->st_value, rel[i].r_addend, (u64)loc);
+
+		val = sym->st_value + rel[i].r_addend;
+
+		switch (ELF64_R_TYPE(rel[i].r_info)) {
+		case R_X86_64_NONE:
+			break;
+		case R_X86_64_64:
+			*(u64 *)loc = val;
+			break;
+		case R_X86_64_32:
+			*(u32 *)loc = val;
+			if (val != *(u32 *)loc)
+				goto overflow;
+			break;
+		case R_X86_64_32S:
+			*(s32 *)loc = val;
+			if ((s64)val != *(s32 *)loc)
+				goto overflow;
+			break;
+		case R_X86_64_PC32:
+			val -= (u64)loc;
+			*(u32 *)loc = val;
+#if 0
+			if ((s64)val != *(s32 *)loc)
+				goto overflow;
+#endif
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
+			       me->name, ELF64_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+
+overflow:
+	printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
+	       (int)ELF64_R_TYPE(rel[i].r_info), val);
+	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
+	       me->name);
+	return -ENOEXEC;
+}
+
+int apply_relocate(Elf_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	printk(KERN_ERR "non add relocation not supported\n");
+	return -ENOSYS;
+}
+
+#endif
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
deleted file mode 100644
index 6145e68..0000000
--- a/arch/x86/kernel/module_32.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*  Kernel module help for i386.
-    Copyright (C) 2001 Rusty Russell.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/bug.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt...)
-#endif
-
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-	return vmalloc_exec(size);
-}
-
-
-int apply_relocate(Elf32_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	unsigned int i;
-	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
-	Elf32_Sym *sym;
-	uint32_t *location;
-
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
-			+ ELF32_R_SYM(rel[i].r_info);
-
-		switch (ELF32_R_TYPE(rel[i].r_info)) {
-		case R_386_32:
-			/* We add the value into the location given */
-			*location += sym->st_value;
-			break;
-		case R_386_PC32:
-			/* Add the value, subtract its postition */
-			*location += sym->st_value - (uint32_t)location;
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
-			       me->name, ELF32_R_TYPE(rel[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
-}
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-	printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
-	       me->name);
-	return -ENOEXEC;
-}
-
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
deleted file mode 100644
index 68ec7d8..0000000
--- a/arch/x86/kernel/module_64.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*  Kernel module help for x86-64
-    Copyright (C) 2001 Rusty Russell.
-    Copyright (C) 2002,2003 Andi Kleen, SuSE Labs.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/bug.h>
-
-#include <asm/system.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-#define DEBUGP(fmt...)
-
-#ifndef CONFIG_UML
-void *module_alloc(unsigned long size)
-{
-	struct vm_struct *area;
-
-	if (!size)
-		return NULL;
-	size = PAGE_ALIGN(size);
-	if (size > MODULES_LEN)
-		return NULL;
-
-	area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END);
-	if (!area)
-		return NULL;
-
-	return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
-}
-#endif
-
-int apply_relocate_add(Elf64_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	unsigned int i;
-	Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
-	Elf64_Sym *sym;
-	void *loc;
-	u64 val;
-
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
-			+ ELF64_R_SYM(rel[i].r_info);
-
-		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
-			(int)ELF64_R_TYPE(rel[i].r_info),
-			sym->st_value, rel[i].r_addend, (u64)loc);
-
-		val = sym->st_value + rel[i].r_addend;
-
-		switch (ELF64_R_TYPE(rel[i].r_info)) {
-		case R_X86_64_NONE:
-			break;
-		case R_X86_64_64:
-			*(u64 *)loc = val;
-			break;
-		case R_X86_64_32:
-			*(u32 *)loc = val;
-			if (val != *(u32 *)loc)
-				goto overflow;
-			break;
-		case R_X86_64_32S:
-			*(s32 *)loc = val;
-			if ((s64)val != *(s32 *)loc)
-				goto overflow;
-			break;
-		case R_X86_64_PC32:
-			val -= (u64)loc;
-			*(u32 *)loc = val;
-#if 0
-			if ((s64)val != *(s32 *)loc)
-				goto overflow;
-#endif
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
-			       me->name, ELF64_R_TYPE(rel[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
-
-overflow:
-	printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
-	       (int)ELF64_R_TYPE(rel[i].r_info), val);
-	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
-	       me->name);
-	return -ENOEXEC;
-}
-
-int apply_relocate(Elf_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	printk(KERN_ERR "non add relocation not supported\n");
-	return -ENOSYS;
-}
-
-- 
cgit v0.10.2


From c0e5e10bf380c7a7c4e8c8b7d42e0aba623e76a6 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 3 Jun 2009 21:46:28 -0400
Subject: uml module: fix uml build process due to this merge

Due to the previous merge, uml needs to be fixed.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 598b5c1..1b549bc 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -8,7 +8,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 
 subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
+subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
 USER_OBJS := bugs.o ptrace_user.o fault.o
 
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index c8b4cce..2201e9c 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -8,10 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
 	setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
 	sysrq.o ksyms.o tls.o
 
-obj-$(CONFIG_MODULES) += um_module.o
-
 subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
-subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
+subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
 ldt-y = ../sys-i386/ldt.o
 
diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c
deleted file mode 100644
index 3dead39..0000000
--- a/arch/um/sys-x86_64/um_module.c
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <linux/vmalloc.h>
-#include <linux/moduleloader.h>
-
-/* Copied from i386 arch/i386/kernel/module.c */
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-	return vmalloc_exec(size);
-}
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/*
-	 * FIXME: If module_region == mod->init_region, trim exception
-	 * table entries.
-	 */
-}
-
-- 
cgit v0.10.2


From c398df30d5caad626ac72bfab0361a7b0f67a661 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 3 Jun 2009 21:46:46 -0400
Subject: module: merge module_alloc() finally

As Christoph Hellwig suggested, module_alloc() actually can be
unified for i386 and x86_64 (of course, also UML).

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: 'Ingo Molnar' <mingo@elte.hu>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 58da248..9ce3f16 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -53,16 +53,21 @@ extern unsigned long end_iomem;
 #else
 # define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #endif
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-
+#define __PAGE_KERNEL_EXEC                                              \
+	 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC)
 
 /*
  * The i386 can't do page protection for execute, and considers that the same
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 2733fad..5e67c15 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -46,6 +46,10 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 # define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
 #endif
 
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
+
 #define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
 
 #endif /* _ASM_X86_PGTABLE_32_DEFS_H */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b92fbcf..894bb71 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -34,14 +34,6 @@
 #define DEBUGP(fmt...)
 #endif
 
-#if defined(CONFIG_UML) || defined(CONFIG_X86_32)
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-	return vmalloc_exec(size);
-}
-#else /*X86_64*/
 void *module_alloc(unsigned long size)
 {
 	struct vm_struct *area;
@@ -56,9 +48,9 @@ void *module_alloc(unsigned long size)
 	if (!area)
 		return NULL;
 
-	return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC);
+	return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM,
+					PAGE_KERNEL_EXEC);
 }
-#endif
 
 /* Free memory returned from module_alloc */
 void module_free(struct module *mod, void *module_region)
-- 
cgit v0.10.2


From ad6561dffa17f17bb68d7207d422c26c381c4313 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:47:03 -0600
Subject: module: trim exception table on init free.

It's theoretically possible that there are exception table entries
which point into the (freed) init text of modules.  These could cause
future problems if other modules get loaded into that memory and cause
an exception as we'd see the wrong fixup.  The only case I know of is
kvm-intel.ko (when CONFIG_CC_OPTIMIZE_FOR_SIZE=n).

Amerigo fixed this long-standing FIXME in the x86 version, but this
patch is more general.

This implements trim_init_extable(); most archs are simple since they
use the standard lib/extable.c sort code.  Alpha and IA64 use relative
addresses in their fixups, so thier trimming is a slight variation.

Sparc32 is unique; it doesn't seem to define ARCH_HAS_SORT_EXTABLE,
yet it defines its own sort_extable() which overrides the one in lib.
It doesn't sort, so we have to mark deleted entries instead of
actually trimming them.

Inspired-by: Amerigo Wang <amwang@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-alpha@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-ia64@vger.kernel.org

diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c
index 62dc379..813c9b6 100644
--- a/arch/alpha/mm/extable.c
+++ b/arch/alpha/mm/extable.c
@@ -48,6 +48,27 @@ void sort_extable(struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable(const struct exception_table_entry *first,
 	       const struct exception_table_entry *last,
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 71c50dd..e95d5ad 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -53,6 +53,32 @@ void sort_extable (struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
+static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable (const struct exception_table_entry *first,
 		const struct exception_table_entry *last,
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 47d5619..8303ac4 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -17,6 +17,9 @@
 
 #ifndef __ASSEMBLY__
 
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
 /* Sparc is not segmented, however we need to be able to fool access_ok()
  * when doing system calls from kernel mode legitimately.
  *
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index 16cc289..a61c349 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c
@@ -28,6 +28,10 @@ search_extable(const struct exception_table_entry *start,
 	 *	word 3: last insn address + 4 bytes
 	 *	word 4: fixup code address
 	 *
+	 * Deleted entries are encoded as:
+	 *	word 1: unused
+	 *	word 2: -1
+	 *
 	 * See asm/uaccess.h for more details.
 	 */
 
@@ -39,6 +43,10 @@ search_extable(const struct exception_table_entry *start,
 			continue;
 		}
 
+		/* A deleted entry; see trim_init_extable */
+		if (walk->fixup == -1)
+			continue;
+
 		if (walk->insn == value)
 			return walk;
 	}
@@ -57,6 +65,27 @@ search_extable(const struct exception_table_entry *start,
         return NULL;
 }
 
+#ifdef CONFIG_MODULES
+/* We could memmove them around; easier to mark the trimmed ones. */
+void trim_init_extable(struct module *m)
+{
+	unsigned int i;
+	bool range;
+
+	for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
+		range = m->extable[i].fixup == 0;
+
+		if (within_module_init(m->extable[i].insn, m)) {
+			m->extable[i].fixup = -1;
+			if (range)
+				m->extable[i+1].fixup = -1;
+		}
+		if (range)
+			i++;
+	}
+}
+#endif /* CONFIG_MODULES */
+
 /* Special extable search, which handles ranges.  Returns fixup */
 unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
 {
diff --git a/include/linux/module.h b/include/linux/module.h
index a8f2c0a..a7bc6e7 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -77,6 +77,7 @@ search_extable(const struct exception_table_entry *first,
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);
 void sort_main_extable(void);
+void trim_init_extable(struct module *m);
 
 #ifdef MODULE
 #define MODULE_GENERIC_TABLE(gtype,name)			\
diff --git a/kernel/module.c b/kernel/module.c
index 35f7de0..e4ab36c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2455,6 +2455,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
+	trim_init_extable(mod);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
diff --git a/lib/extable.c b/lib/extable.c
index 179c087..4cac81e 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -39,7 +39,26 @@ void sort_extable(struct exception_table_entry *start,
 	sort(start, finish - start, sizeof(struct exception_table_entry),
 	     cmp_ex, NULL);
 }
-#endif
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+		within_module_init(m->extable[m->num_exentries-1].insn, m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+#endif /* !ARCH_HAS_SORT_EXTABLE */
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
 /*
-- 
cgit v0.10.2


From 5933048c69edb546f1e93c26dc93816f0be9f754 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:47:04 -0600
Subject: module: cleanup FIXME comments about trimming exception table
 entries.

Everyone cut and paste this comment from my original one.  We now do
it generically, so cut the comments.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Amerigo Wang <amwang@redhat.com>

diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
index 1167fe9c..98f94d0 100644
--- a/arch/avr32/kernel/module.c
+++ b/arch/avr32/kernel/module.c
@@ -32,8 +32,6 @@ void module_free(struct module *mod, void *module_region)
 	mod->arch.syminfo = NULL;
 
 	vfree(module_region);
-	/* FIXME: if module_region == mod->init_region, trim exception
-	 * table entries. */
 }
 
 static inline int check_rela(Elf32_Rela *rela, struct module *module,
diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c
index a187833..abc13e3 100644
--- a/arch/cris/kernel/module.c
+++ b/arch/cris/kernel/module.c
@@ -48,8 +48,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	FREE_MODULE(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c
index 850d168..711763c 100644
--- a/arch/frv/kernel/module.c
+++ b/arch/frv/kernel/module.c
@@ -35,8 +35,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c
index cfc9127..0865e29 100644
--- a/arch/h8300/kernel/module.c
+++ b/arch/h8300/kernel/module.c
@@ -23,8 +23,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/m32r/kernel/module.c b/arch/m32r/kernel/module.c
index 8d42057..cb5f37d 100644
--- a/arch/m32r/kernel/module.c
+++ b/arch/m32r/kernel/module.c
@@ -44,8 +44,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/m68k/kernel/module.c b/arch/m68k/kernel/module.c
index 774862b..cd6bcb1c9 100644
--- a/arch/m68k/kernel/module.c
+++ b/arch/m68k/kernel/module.c
@@ -31,8 +31,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/m68knommu/kernel/module.c b/arch/m68knommu/kernel/module.c
index 3b1a2ff..d11ffae 100644
--- a/arch/m68knommu/kernel/module.c
+++ b/arch/m68knommu/kernel/module.c
@@ -23,8 +23,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 1f60e27..3e9100d 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -68,8 +68,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c
index 6b287f2..4fa0e36 100644
--- a/arch/mn10300/kernel/module.c
+++ b/arch/mn10300/kernel/module.c
@@ -48,8 +48,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	 * table entries. */
 }
 
 /*
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index ecd1c50..ef5caf2 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -267,8 +267,6 @@ void module_free(struct module *mod, void *module_region)
 	mod->arch.section = NULL;
 
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* Additional bytes needed in front of individual sections */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 43e7e3a..477c663 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -43,8 +43,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index eed4a00..ab2e3ed 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -56,8 +56,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 static void
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index c19b0f7..c2efdcd 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -46,8 +46,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 9027376..0ee642f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -75,8 +75,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
 }
 
 /* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 894bb71..89f386f 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -56,8 +56,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
 }
 
 /* We don't need anything special. */
diff --git a/arch/xtensa/kernel/module.c b/arch/xtensa/kernel/module.c
index 3981a46..c1accea 100644
--- a/arch/xtensa/kernel/module.c
+++ b/arch/xtensa/kernel/module.c
@@ -34,8 +34,6 @@ void *module_alloc(unsigned long size)
 void module_free(struct module *mod, void *module_region)
 {
 	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-	   table entries. */
 }
 
 int module_frob_arch_sections(Elf32_Ehdr *hdr,
-- 
cgit v0.10.2


From 4c921126fe553440261f56691c5f60fbaaa486d6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Fri, 12 Jun 2009 12:04:54 +0530
Subject: powerpc, perf_counter: Fix performance counter event types

Sachin Sant reported these compiler errors:

 CC      arch/powerpc/kernel/power7-pmu.o
arch/powerpc/kernel/power7-pmu.c:297: error: PERF_COUNT_CPU_CYCLES undeclared here (not in a function)

Which happened because a last-minute rename of symbols crossed with
the Power7 support patch.

Fix this by using the new symbol names.

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org
LKML-Reference: <1244788494.5554.1.camel@ht.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index b3f7d12..b72e7a1 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -294,12 +294,12 @@ static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power7_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0x1e,
-	[PERF_COUNT_INSTRUCTIONS] = 2,
-	[PERF_COUNT_CACHE_REFERENCES] = 0xc880,		/* LD_REF_L1_LSU */
-	[PERF_COUNT_CACHE_MISSES] = 0x400f0,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068,	/* BRU_FIN */
-	[PERF_COUNT_BRANCH_MISSES] = 0x400f6,		/* BR_MPRED */
+	[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
+	[PERF_COUNT_HW_INSTRUCTIONS] = 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880,	/* LD_REF_L1_LSU*/
+	[PERF_COUNT_HW_CACHE_MISSES] = 0x400f0,		/* LD_MISS_L1	*/
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068,	/* BRU_FIN	*/
+	[PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6,	/* BR_MPRED	*/
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
-- 
cgit v0.10.2


From f1a3c979059b2033d0b1cc4f9ee5c90bf92b5f94 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 17:56:09 +0200
Subject: perf_counter: PERF_TYPE_HW_CACHE is a hardware counter too

is_software_counter() was missing the new HW_CACHE category.

( This could have caused some counter scheduling artifacts
  with mixed sw and hw counters and counter groups. )

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6e13395..7c4f32f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -621,7 +621,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
 static inline int is_software_counter(struct perf_counter *counter)
 {
 	return (counter->attr.type != PERF_TYPE_RAW) &&
-		(counter->attr.type != PERF_TYPE_HARDWARE);
+		(counter->attr.type != PERF_TYPE_HARDWARE) &&
+		(counter->attr.type != PERF_TYPE_HW_CACHE);
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
-- 
cgit v0.10.2


From 081fad86178ec0f64f32f1bd04cf4aad22714fb9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 17:57:21 +0200
Subject: perf_counter: Remove PERF_TYPE_RAW special casing

The PERF_TYPE_RAW special case seems superfluous these days. Remove
it and add it to the switch() stmt like the others.

[ Impact: cleanup ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ef5d8a5..663bbe0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3570,12 +3570,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
-	if (attr->type == PERF_TYPE_RAW) {
-		pmu = hw_perf_counter_init(counter);
-		goto done;
-	}
-
 	switch (attr->type) {
+	case PERF_TYPE_RAW:
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
 		pmu = hw_perf_counter_init(counter);
-- 
cgit v0.10.2


From bbd36e5e6aa6f1757c84cdb406b6eb81686d14af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 23:11:50 +0200
Subject: perf record: Explicity program a default counter

Up until now record has worked on the assumption that type=0, config=0
was a suitable configuration - which it is. Lets make this a little more
explicit and more readable via the use of proper symbols.

[ Impact: cleanup ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 29259e7..0f5771f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -568,8 +568,11 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 	if (!argc && target_pid == -1 && !system_wide)
 		usage_with_options(record_usage, options);
 
-	if (!nr_counters)
-		nr_counters = 1;
+	if (!nr_counters) {
+		nr_counters	= 1;
+		attrs[0].type	= PERF_TYPE_HARDWARE;
+		attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
+	}
 
 	for (counter = 0; counter < nr_counters; counter++) {
 		if (attrs[counter].sample_period)
-- 
cgit v0.10.2


From 974802eaa1afdc87e00821df7020a2b3c6fee623 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 12 Jun 2009 12:46:55 +0200
Subject: perf_counter: Add forward/backward attribute ABI compatibility

Provide for means of extending the perf_counter_attr in a 'natural' way.

We allow growing the structure by appending fields at the end by specifying
the full structure size inside it.

When a new kernel sees a smaller (old) structure, it will 0 pad the tail.
When an old kernel sees a larger (new) structure, it will verify the tail
consists of 0s, otherwise fail.

If we fail due to a size-mismatch, we return -E2BIG and write the kernel's
native attribe size back into the provided structure.

Furthermore, add some attribute verification, so that we'll fail counter
creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in
the __reserved fields).

(This ABI detail is introduced while keeping the existing syscall ABI.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7c4f32f..1b3118a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -120,6 +120,8 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_ID				= 1U << 6,
 	PERF_SAMPLE_CPU				= 1U << 7,
 	PERF_SAMPLE_PERIOD			= 1U << 8,
+
+	PERF_SAMPLE_MAX = 1U << 9,		/* non-ABI */
 };
 
 /*
@@ -131,17 +133,26 @@ enum perf_counter_read_format {
 	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
+
+	PERF_FORMAT_MAX = 1U << 3, 		/* non-ABI */
 };
 
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_attr {
+
 	/*
 	 * Major type: hardware/software/tracepoint/etc.
 	 */
 	__u32			type;
-	__u32			__reserved_1;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
 
 	/*
 	 * Type specific configuration information.
@@ -168,12 +179,12 @@ struct perf_counter_attr {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_2   : 53;
+				__reserved_1   : 53;
 
 	__u32			wakeup_events;	/* wakeup every n events */
-	__u32			__reserved_3;
+	__u32			__reserved_2;
 
-	__u64			__reserved_4;
+	__u64			__reserved_3;
 };
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6c84ad..418d90f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
 asmlinkage long sys_perf_counter_open(
-		const struct perf_counter_attr __user *attr_uptr,
+		struct perf_counter_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 663bbe0..29b685f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3584,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_TRACEPOINT:
 		pmu = tp_perf_counter_init(counter);
 		break;
+
+	default:
+		break;
 	}
 done:
 	err = 0;
@@ -3610,6 +3613,85 @@ done:
 	return counter;
 }
 
+static int perf_copy_attr(struct perf_counter_attr __user *uattr,
+			  struct perf_counter_attr *attr)
+{
+	int ret;
+	u32 size;
+
+	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size < PERF_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned long val;
+		unsigned long __user *addr;
+		unsigned long __user *end;
+
+		addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
+				sizeof(unsigned long));
+		end  = PTR_ALIGN((void __user *)uattr + size,
+				sizeof(unsigned long));
+
+		for (; addr < end; addr += sizeof(unsigned long)) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * If the type exists, the corresponding creation will verify
+	 * the attr->config.
+	 */
+	if (attr->type >= PERF_TYPE_MAX)
+		return -EINVAL;
+
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+		return -EINVAL;
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+		return -EINVAL;
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+		return -EINVAL;
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -3619,7 +3701,7 @@ done:
  * @group_fd:		group leader counter fd
  */
 SYSCALL_DEFINE5(perf_counter_open,
-		const struct perf_counter_attr __user *, attr_uptr,
+		struct perf_counter_attr __user *, attr_uptr,
 		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_counter *counter, *group_leader;
@@ -3635,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (flags)
 		return -EINVAL;
 
-	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
-		return -EFAULT;
+	ret = perf_copy_attr(attr_uptr, &attr);
+	if (ret)
+		return ret;
 
 	if (!attr.exclude_kernel) {
 		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index af0a504..87a1aca 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void)
 	_min1 < _min2 ? _min1 : _min2; })
 
 static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
+sys_perf_counter_open(struct perf_counter_attr *attr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
-	return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
+	attr->size = sizeof(*attr);
+	return syscall(__NR_perf_counter_open, attr, pid, cpu,
 		       group_fd, flags);
 }
 
-- 
cgit v0.10.2


From 3ea400581f2b595afd91207bbd79c11cb38598e0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 12 Jun 2009 13:40:47 +0100
Subject: GFS2: Remove lock_kernel from gfs2_put_super()

It is not required here.

Signed-off-by: Steven Whitehouse <swhiteho@redhat,com>
Cc: Christoph Hellwig <hch@infradead.org>

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c8930b3..0a68013 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -719,8 +719,6 @@ static void gfs2_put_super(struct super_block *sb)
 	int error;
 	struct gfs2_jdesc *jd;
 
-	lock_kernel();
-
 	/*  Unfreeze the filesystem, if we need to  */
 
 	mutex_lock(&sdp->sd_freeze_lock);
@@ -787,8 +785,6 @@ restart:
 
 	/*  At this point, we're through participating in the lockspace  */
 	gfs2_sys_fs_del(sdp);
-
-	unlock_kernel();
 }
 
 /**
-- 
cgit v0.10.2


From 20f77f5654042cf484d8964b618faf9d620f639b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:33 -0600
Subject: virtio: fix obsolete documentation on probe function

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 06005fa..9410394 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -99,8 +99,7 @@ void unregister_virtio_device(struct virtio_device *dev);
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this device.
  * @feature_table_size: number of entries in the feature table array.
- * @probe: the function to call when a device is found.  Returns a token for
- *    remove, or PTR_ERR().
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
-- 
cgit v0.10.2


From ef688e151c00e5d529703be9a04fd506df8bc54e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:35 -0600
Subject: virtio: meet virtio spec by finalizing features before using device

Virtio devices are supposed to negotiate features before they start using
the device, but the current code doesn't do this.  This is because the
driver's probe() function invariably has to add buffers to a virtqueue,
or probe the disk (virtio_blk).

This currently doesn't matter since no existing backend is strict about
the feature negotiation.  But it's possible to imagine a future feature
which completely changes how a device operates: in this case, we'd need
to acknowledge it before using the device.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 018c070..6b68103 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -118,13 +118,14 @@ static int virtio_dev_probe(struct device *_d)
 		if (device_features & (1 << i))
 			set_bit(i, dev->features);
 
+	dev->config->finalize_features(dev);
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else {
-		dev->config->finalize_features(dev);
+	else
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-	}
+
 	return err;
 }
 
-- 
cgit v0.10.2


From 9499f5e7ed5224c40706f0cec6542a9916bc7606 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:35 -0600
Subject: virtio: add names to virtqueue struct, mapping from devices to
 queues.

Add a linked list of all virtqueues for a virtio device: this helps for
debugging and is also needed for upcoming interface change.

Also, add a "name" field for clearer debug messages.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c0facaa..db55a50 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	sg_init_table(vblk->sg, vblk->sg_elems);
 
 	/* We expect one virtqueue, for output. */
-	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done);
+	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests");
 	if (IS_ERR(vblk->vq)) {
 		err = PTR_ERR(vblk->vq);
 		goto out_free_vblk;
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 86e83f8..2aeafce 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -94,7 +94,7 @@ static int virtrng_probe(struct virtio_device *vdev)
 	int err;
 
 	/* We expect a single virtqueue. */
-	vq = vdev->config->find_vq(vdev, 0, random_recv_done);
+	vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input");
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index ff6f5a4..58684e4 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -202,13 +202,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	/* Find the input queue. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
+	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input");
 	if (IS_ERR(in_vq)) {
 		err = PTR_ERR(in_vq);
 		goto free;
 	}
 
-	out_vq = vdev->config->find_vq(vdev, 1, NULL);
+	out_vq = vdev->config->find_vq(vdev, 1, NULL, "output");
 	if (IS_ERR(out_vq)) {
 		err = PTR_ERR(out_vq);
 		goto free_in_vq;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index df44d96..4babed8 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -228,7 +228,8 @@ extern void lguest_setup_irq(unsigned int irq);
  * function. */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 				    unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				    void (*callback)(struct virtqueue *vq),
+				    const char *name)
 {
 	struct lguest_device *ldev = to_lgdev(vdev);
 	struct lguest_vq_info *lvq;
@@ -263,7 +264,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 	 * and we've got a pointer to its pages. */
 	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback);
+				 vdev, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4d1d479..be3b734 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -906,20 +906,20 @@ static int virtnet_probe(struct virtio_device *vdev)
 		vi->mergeable_rx_bufs = true;
 
 	/* We expect two virtqueues, receive then send. */
-	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
+	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done, "input");
 	if (IS_ERR(vi->rvq)) {
 		err = PTR_ERR(vi->rvq);
 		goto free;
 	}
 
-	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
+	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done, "output");
 	if (IS_ERR(vi->svq)) {
 		err = PTR_ERR(vi->svq);
 		goto free_recv;
 	}
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vdev->config->find_vq(vdev, 2, NULL);
+		vi->cvq = vdev->config->find_vq(vdev, 2, NULL, "control");
 		if (IS_ERR(vi->cvq)) {
 			err = PTR_ERR(vi->svq);
 			goto free_send;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index cbc8566..ba8995f 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -173,8 +173,9 @@ static void kvm_notify(struct virtqueue *vq)
  * this device and sets it up.
  */
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
-				    unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				     unsigned index,
+				     void (*callback)(struct virtqueue *vq),
+				     const char *name)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	struct kvm_vqconfig *config;
@@ -194,7 +195,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
 				 vdev, (void *) config->address,
-				 kvm_notify, callback);
+				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 6b68103..3f52c76 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -186,6 +186,8 @@ int register_virtio_device(struct virtio_device *dev)
 	/* Acknowledge that we've seen the device. */
 	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 
+	INIT_LIST_HEAD(&dev->vqs);
+
 	/* device_register() causes the bus infrastructure to look for a
 	 * matching driver. */
 	err = device_register(&dev->dev);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9c76a06..0fa73b4 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -218,13 +218,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 
 	/* We expect two virtqueues. */
-	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack);
+	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack, "inflate");
 	if (IS_ERR(vb->inflate_vq)) {
 		err = PTR_ERR(vb->inflate_vq);
 		goto out_free_vb;
 	}
 
-	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack);
+	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack, "deflate");
 	if (IS_ERR(vb->deflate_vq)) {
 		err = PTR_ERR(vb->deflate_vq);
 		goto out_del_inflate_vq;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 330aacb..be4047a 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -208,7 +208,8 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 
 /* the config->find_vq() implementation */
 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				    void (*callback)(struct virtqueue *vq),
+				    const char *name)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
@@ -247,7 +248,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback);
+				 vdev, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5c52369..579fa69 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -23,21 +23,30 @@
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
-#define BAD_RING(_vq, fmt...)			\
-	do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0)
+#define BAD_RING(_vq, fmt, args...)				\
+	do {							\
+		dev_err(&(_vq)->vq.vdev->dev,			\
+			"%s:"fmt, (_vq)->vq.name, ##args);	\
+		BUG();						\
+	} while (0)
 /* Caller is supposed to guarantee no reentry. */
 #define START_USE(_vq)						\
 	do {							\
 		if ((_vq)->in_use)				\
-			panic("in_use = %i\n", (_vq)->in_use);	\
+			panic("%s:in_use = %i\n",		\
+			      (_vq)->vq.name, (_vq)->in_use);	\
 		(_vq)->in_use = __LINE__;			\
 		mb();						\
-	} while(0)
+	} while (0)
 #define END_USE(_vq) \
 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0)
 #else
-#define BAD_RING(_vq, fmt...)			\
-	do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0)
+#define BAD_RING(_vq, fmt, args...)				\
+	do {							\
+		dev_err(&_vq->vq.vdev->dev,			\
+			"%s:"fmt, (_vq)->vq.name, ##args);	\
+		(_vq)->broken = true;				\
+	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
 #endif
@@ -284,7 +293,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
-				      void (*callback)(struct virtqueue *))
+				      void (*callback)(struct virtqueue *),
+				      const char *name)
 {
 	struct vring_virtqueue *vq;
 	unsigned int i;
@@ -303,10 +313,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
+	vq->vq.name = name;
 	vq->notify = notify;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
+	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
 #endif
@@ -327,6 +339,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
 void vring_del_virtqueue(struct virtqueue *vq)
 {
+	list_del(&vq->list);
 	kfree(to_vvq(vq));
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 9410394..4fca4f5 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -10,14 +10,17 @@
 
 /**
  * virtqueue - a queue to register buffers for sending or receiving.
+ * @list: the chain of virtqueues for this device
  * @callback: the function to call when buffers are consumed (can be NULL).
+ * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @vq_ops: the operations for this virtqueue (see below).
  * @priv: a pointer for the virtqueue implementation to use.
  */
-struct virtqueue
-{
+struct virtqueue {
+	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
+	const char *name;
 	struct virtio_device *vdev;
 	struct virtqueue_ops *vq_ops;
 	void *priv;
@@ -76,15 +79,16 @@ struct virtqueue_ops {
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
+ * @vqs: the list of virtqueues for this device.
  * @features: the features supported by both driver and device.
  * @priv: private pointer for the driver's use.
  */
-struct virtio_device
-{
+struct virtio_device {
 	int index;
 	struct device dev;
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
+	struct list_head vqs;
 	/* Note that this is a Linux set_bit-style bitmap. */
 	unsigned long features[1];
 	void *priv;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index bf8ec28..9fae274 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -55,7 +55,8 @@
  * @find_vq: find a virtqueue and instantiate it.
  *	vdev: the virtio_device
  *	index: the 0-based virtqueue number in case there's more than one.
- *	callback: the virqtueue callback
+ *	callback: the virtqueue callback
+ *	name: the virtqueue name (mainly for debugging)
  *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
  * @del_vq: free a virtqueue found by find_vq().
  * @get_features: get the array of feature bits for this device.
@@ -77,7 +78,8 @@ struct virtio_config_ops
 	void (*reset)(struct virtio_device *vdev);
 	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
 				     unsigned index,
-				     void (*callback)(struct virtqueue *));
+				     void (*callback)(struct virtqueue *),
+				     const char *name);
 	void (*del_vq)(struct virtqueue *vq);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 71e0372..166c519 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -119,7 +119,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
-				      void (*callback)(struct virtqueue *vq));
+				      void (*callback)(struct virtqueue *vq),
+				      const char *name);
 void vring_del_virtqueue(struct virtqueue *vq);
 /* Filter out transport-specific feature bits. */
 void vring_transport_features(struct virtio_device *vdev);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index bb8579a..ab8791f 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	chan->vdev = vdev;
 
 	/* We expect one virtqueue, for requests. */
-	chan->vq = vdev->config->find_vq(vdev, 0, req_done);
+	chan->vq = vdev->config->find_vq(vdev, 0, req_done, "requests");
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);
 		goto out_free_vq;
-- 
cgit v0.10.2


From d2a7ddda9ffb1c8961abff6714b0f1eb925c120f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 12 Jun 2009 22:16:36 -0600
Subject: virtio: find_vqs/del_vqs virtio operations

This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations,
and updates all drivers. This is needed for MSI support, because MSI
needs to know the total number of vectors upfront.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ lguest/9p compile fixes)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index db55a50..07d8e59 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	sg_init_table(vblk->sg, vblk->sg_elems);
 
 	/* We expect one virtqueue, for output. */
-	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests");
+	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
 	if (IS_ERR(vblk->vq)) {
 		err = PTR_ERR(vblk->vq);
 		goto out_free_vblk;
@@ -388,7 +388,7 @@ out_put_disk:
 out_mempool:
 	mempool_destroy(vblk->pool);
 out_free_vq:
-	vdev->config->del_vq(vblk->vq);
+	vdev->config->del_vqs(vdev);
 out_free_vblk:
 	kfree(vblk);
 out:
@@ -409,7 +409,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
 	mempool_destroy(vblk->pool);
-	vdev->config->del_vq(vblk->vq);
+	vdev->config->del_vqs(vdev);
 	kfree(vblk);
 }
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 2aeafce..f2041fe 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -94,13 +94,13 @@ static int virtrng_probe(struct virtio_device *vdev)
 	int err;
 
 	/* We expect a single virtqueue. */
-	vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input");
+	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
 
 	err = hwrng_register(&virtio_hwrng);
 	if (err) {
-		vdev->config->del_vq(vq);
+		vdev->config->del_vqs(vdev);
 		return err;
 	}
 
@@ -112,7 +112,7 @@ static void virtrng_remove(struct virtio_device *vdev)
 {
 	vdev->config->reset(vdev);
 	hwrng_unregister(&virtio_hwrng);
-	vdev->config->del_vq(vq);
+	vdev->config->del_vqs(vdev);
 }
 
 static struct virtio_device_id id_table[] = {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 58684e4..c74dacf 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -188,6 +188,9 @@ static void hvc_handle_input(struct virtqueue *vq)
  * Finally we put our input buffer in the input queue, ready to receive. */
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
+	vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
+	const char *names[] = { "input", "output" };
+	struct virtqueue *vqs[2];
 	int err;
 
 	vdev = dev;
@@ -199,20 +202,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 		goto fail;
 	}
 
-	/* Find the input queue. */
+	/* Find the queues. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input");
-	if (IS_ERR(in_vq)) {
-		err = PTR_ERR(in_vq);
+	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	if (err)
 		goto free;
-	}
 
-	out_vq = vdev->config->find_vq(vdev, 1, NULL, "output");
-	if (IS_ERR(out_vq)) {
-		err = PTR_ERR(out_vq);
-		goto free_in_vq;
-	}
+	in_vq = vqs[0];
+	out_vq = vqs[1];
 
 	/* Start using the new console output. */
 	virtio_cons.get_chars = get_chars;
@@ -233,17 +231,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
 	if (IS_ERR(hvc)) {
 		err = PTR_ERR(hvc);
-		goto free_out_vq;
+		goto free_vqs;
 	}
 
 	/* Register the input buffer the first time. */
 	add_inbuf();
 	return 0;
 
-free_out_vq:
-	vdev->config->del_vq(out_vq);
-free_in_vq:
-	vdev->config->del_vq(in_vq);
+free_vqs:
+	vdev->config->del_vqs(vdev);
 free:
 	kfree(inbuf);
 fail:
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 4babed8..e082cda 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -313,6 +313,38 @@ static void lg_del_vq(struct virtqueue *vq)
 	kfree(lvq);
 }
 
+static void lg_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		lg_del_vq(vq);
+}
+
+static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	struct lguest_device *ldev = to_lgdev(vdev);
+	int i;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > ldev->desc->num_vq)
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	lg_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -322,8 +354,8 @@ static struct virtio_config_ops lguest_config_ops = {
 	.get_status = lg_get_status,
 	.set_status = lg_set_status,
 	.reset = lg_reset,
-	.find_vq = lg_find_vq,
-	.del_vq = lg_del_vq,
+	.find_vqs = lg_find_vqs,
+	.del_vqs = lg_del_vqs,
 };
 
 /* The root device for the lguest virtio devices.  This makes them appear as
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index be3b734..7fa620d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -845,6 +845,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
+	const char *names[] = { "input", "output", "control" };
+	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -905,25 +909,19 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send. */
-	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done, "input");
-	if (IS_ERR(vi->rvq)) {
-		err = PTR_ERR(vi->rvq);
+	/* We expect two virtqueues, receive then send,
+	 * and optionally control. */
+	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	if (err)
 		goto free;
-	}
 
-	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done, "output");
-	if (IS_ERR(vi->svq)) {
-		err = PTR_ERR(vi->svq);
-		goto free_recv;
-	}
+	vi->rvq = vqs[0];
+	vi->svq = vqs[1];
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vdev->config->find_vq(vdev, 2, NULL, "control");
-		if (IS_ERR(vi->cvq)) {
-			err = PTR_ERR(vi->svq);
-			goto free_send;
-		}
+		vi->cvq = vqs[2];
 
 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
 			dev->features |= NETIF_F_HW_VLAN_FILTER;
@@ -941,7 +939,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-		goto free_ctrl;
+		goto free_vqs;
 	}
 
 	/* Last of all, set up some receive buffers. */
@@ -962,13 +960,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 unregister:
 	unregister_netdev(dev);
-free_ctrl:
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
-		vdev->config->del_vq(vi->cvq);
-free_send:
-	vdev->config->del_vq(vi->svq);
-free_recv:
-	vdev->config->del_vq(vi->rvq);
+free_vqs:
+	vdev->config->del_vqs(vdev);
 free:
 	free_netdev(dev);
 	return err;
@@ -994,12 +987,10 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	BUG_ON(vi->num != 0);
 
-	vdev->config->del_vq(vi->svq);
-	vdev->config->del_vq(vi->rvq);
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
-		vdev->config->del_vq(vi->cvq);
 	unregister_netdev(vi->dev);
 
+	vdev->config->del_vqs(vi->vdev);
+
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index ba8995f..e38e5d3 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -227,6 +227,38 @@ static void kvm_del_vq(struct virtqueue *vq)
 				       KVM_S390_VIRTIO_RING_ALIGN));
 }
 
+static void kvm_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		kvm_del_vq(vq);
+}
+
+static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[])
+{
+	struct kvm_device *kdev = to_kvmdev(vdev);
+	int i;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > kdev->desc->num_vq)
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	kvm_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 /*
  * The config ops structure as defined by virtio config
  */
@@ -238,8 +270,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_status = kvm_get_status,
 	.set_status = kvm_set_status,
 	.reset = kvm_reset,
-	.find_vq = kvm_find_vq,
-	.del_vq = kvm_del_vq,
+	.find_vqs = kvm_find_vqs,
+	.del_vqs = kvm_del_vqs,
 };
 
 /*
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0fa73b4..26b2782 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -204,6 +204,9 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
+	struct virtqueue *vqs[2];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
+	const char *names[] = { "inflate", "deflate" };
 	int err;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -218,22 +221,17 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 
 	/* We expect two virtqueues. */
-	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack, "inflate");
-	if (IS_ERR(vb->inflate_vq)) {
-		err = PTR_ERR(vb->inflate_vq);
+	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	if (err)
 		goto out_free_vb;
-	}
 
-	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack, "deflate");
-	if (IS_ERR(vb->deflate_vq)) {
-		err = PTR_ERR(vb->deflate_vq);
-		goto out_del_inflate_vq;
-	}
+	vb->inflate_vq = vqs[0];
+	vb->deflate_vq = vqs[1];
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
-		goto out_del_deflate_vq;
+		goto out_del_vqs;
 	}
 
 	vb->tell_host_first
@@ -241,10 +239,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 	return 0;
 
-out_del_deflate_vq:
-	vdev->config->del_vq(vb->deflate_vq);
-out_del_inflate_vq:
-	vdev->config->del_vq(vb->inflate_vq);
+out_del_vqs:
+	vdev->config->del_vqs(vdev);
 out_free_vb:
 	kfree(vb);
 out:
@@ -264,8 +260,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	/* Now we reset the device so we can clean up the queues. */
 	vdev->config->reset(vdev);
 
-	vdev->config->del_vq(vb->deflate_vq);
-	vdev->config->del_vq(vb->inflate_vq);
+	vdev->config->del_vqs(vdev);
 	kfree(vb);
 }
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index be4047a..027f13f 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -276,11 +276,7 @@ static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
-	unsigned long flags, size;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_del(&info->node);
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
+	unsigned long size;
 
 	vring_del_virtqueue(vq);
 
@@ -293,14 +289,41 @@ static void vp_del_vq(struct virtqueue *vq)
 	kfree(info);
 }
 
+static void vp_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		vp_del_vq(vq);
+}
+
+static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	int i;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	vp_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
 	.set_status	= vp_set_status,
 	.reset		= vp_reset,
-	.find_vq	= vp_find_vq,
-	.del_vq		= vp_del_vq,
+	.find_vqs	= vp_find_vqs,
+	.del_vqs	= vp_del_vqs,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
 };
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 9fae274..4cd290c 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -29,6 +29,7 @@
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
 
 #ifdef __KERNEL__
+#include <linux/err.h>
 #include <linux/virtio.h>
 
 /**
@@ -49,16 +50,26 @@
  * @set_status: write the status byte
  *	vdev: the virtio_device
  *	status: the new status byte
+ * @request_vqs: request the specified number of virtqueues
+ *	vdev: the virtio_device
+ *	max_vqs: the max number of virtqueues we want
+ *      If supplied, must call before any virtqueues are instantiated.
+ *      To modify the max number of virtqueues after request_vqs has been
+ *      called, call free_vqs and then request_vqs with a new value.
+ * @free_vqs: cleanup resources allocated by request_vqs
+ *	vdev: the virtio_device
+ *      If supplied, must call after all virtqueues have been deleted.
  * @reset: reset the device
  *	vdev: the virtio device
  *	After this, status and feature negotiation must be done again
- * @find_vq: find a virtqueue and instantiate it.
+ * @find_vqs: find virtqueues and instantiate them.
  *	vdev: the virtio_device
- *	index: the 0-based virtqueue number in case there's more than one.
- *	callback: the virtqueue callback
- *	name: the virtqueue name (mainly for debugging)
- *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
- * @del_vq: free a virtqueue found by find_vq().
+ *	nvqs: the number of virtqueues to find
+ *	vqs: on success, includes new virtqueues
+ *	callbacks: array of callbacks, for each virtqueue
+ *	names: array of virtqueue names (mainly for debugging)
+ *	Returns 0 on success or error status
+ * @del_vqs: free virtqueues found by find_vqs().
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
@@ -67,6 +78,7 @@
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
  */
+typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops
 {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -76,11 +88,11 @@ struct virtio_config_ops
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
-	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
-				     unsigned index,
-				     void (*callback)(struct virtqueue *),
-				     const char *name);
-	void (*del_vq)(struct virtqueue *vq);
+	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[]);
+	void (*del_vqs)(struct virtio_device *);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 };
@@ -128,5 +140,18 @@ static inline int virtio_config_buf(struct virtio_device *vdev,
 	vdev->config->get(vdev, offset, buf, len);
 	return 0;
 }
+
+static inline
+struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
+					vq_callback_t *c, const char *n)
+{
+	vq_callback_t *callbacks[] = { c };
+	const char *names[] = { n };
+	struct virtqueue *vq;
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
+	if (err < 0)
+		return ERR_PTR(err);
+	return vq;
+}
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ab8791f..a49484e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	chan->vdev = vdev;
 
 	/* We expect one virtqueue, for requests. */
-	chan->vq = vdev->config->find_vq(vdev, 0, req_done, "requests");
+	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);
 		goto out_free_vq;
@@ -261,7 +261,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	return 0;
 
 out_free_vq:
-	vdev->config->del_vq(chan->vq);
+	vdev->config->del_vqs(vdev);
 fail:
 	mutex_lock(&virtio_9p_lock);
 	chan_index--;
@@ -332,7 +332,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	BUG_ON(chan->inuse);
 
 	if (chan->initialized) {
-		vdev->config->del_vq(chan->vq);
+		vdev->config->del_vqs(vdev);
 		chan->initialized = false;
 	}
 }
-- 
cgit v0.10.2


From 77cf524654a886e0fbbf03b16b44f048deef7b0c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 May 2009 13:55:31 +0300
Subject: virtio_pci: split up vp_interrupt

This reorganizes virtio-pci code in vp_interrupt slightly, so that
it's easier to add per-vq MSI support on top.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 027f13f..951e673 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -164,6 +164,37 @@ static void vp_notify(struct virtqueue *vq)
 	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
+/* Handle a configuration change: Tell driver if it wants to know. */
+static irqreturn_t vp_config_changed(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_driver *drv;
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->config_changed)
+		drv->config_changed(&vp_dev->vdev);
+	return IRQ_HANDLED;
+}
+
+/* Notify all virtqueues on an interrupt. */
+static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_pci_vq_info *info;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_for_each_entry(info, &vp_dev->virtqueues, node) {
+		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+	return ret;
+}
+
 /* A small wrapper to also acknowledge the interrupt when it's handled.
  * I really need an EIO hook for the vring so I can ack the interrupt once we
  * know that we'll be handling the IRQ but before we invoke the callback since
@@ -173,9 +204,6 @@ static void vp_notify(struct virtqueue *vq)
 static irqreturn_t vp_interrupt(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_pci_vq_info *info;
-	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
 	u8 isr;
 
 	/* reading the ISR has the effect of also clearing it so it's very
@@ -187,23 +215,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 		return IRQ_NONE;
 
 	/* Configuration change?  Tell driver if it wants to know. */
-	if (isr & VIRTIO_PCI_ISR_CONFIG) {
-		struct virtio_driver *drv;
-		drv = container_of(vp_dev->vdev.dev.driver,
-				   struct virtio_driver, driver);
-
-		if (drv && drv->config_changed)
-			drv->config_changed(&vp_dev->vdev);
-	}
+	if (isr & VIRTIO_PCI_ISR_CONFIG)
+		vp_config_changed(irq, opaque);
 
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_for_each_entry(info, &vp_dev->virtqueues, node) {
-		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
-			ret = IRQ_HANDLED;
-	}
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-	return ret;
+	return vp_vring_interrupt(irq, opaque);
 }
 
 /* the config->find_vq() implementation */
-- 
cgit v0.10.2


From 82af8ce84ed65d2fb6d8c017d3f2bbbf161061fb Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 May 2009 13:55:41 +0300
Subject: virtio_pci: optional MSI-X support

This implements optional MSI-X support in virtio_pci.
MSI-X is used whenever the host supports at least 2 MSI-X
vectors: 1 for configuration changes and 1 for virtqueues.
Per-virtqueue vectors are allocated if enough vectors
available.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ whitespace, style)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 951e673..193c8f0 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -42,6 +42,26 @@ struct virtio_pci_device
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
+
+	/* MSI-X support */
+	int msix_enabled;
+	int intx_enabled;
+	struct msix_entry *msix_entries;
+	/* Name strings for interrupts. This size should be enough,
+	 * and I'm too lazy to allocate each name separately. */
+	char (*msix_names)[256];
+	/* Number of available vectors */
+	unsigned msix_vectors;
+	/* Vectors allocated */
+	unsigned msix_used_vectors;
+};
+
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+	VP_MSIX_CONFIG_VECTOR = 0,
+	VP_MSIX_VQ_VECTOR = 1,
 };
 
 struct virtio_pci_vq_info
@@ -60,6 +80,9 @@ struct virtio_pci_vq_info
 
 	/* the list node for the virtqueues list */
 	struct list_head node;
+
+	/* MSI-X vector (or none) */
+	unsigned vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr +
+				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	u8 *ptr = buf;
 	int i;
 
@@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr +
+				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	const u8 *ptr = buf;
 	int i;
 
@@ -221,7 +246,122 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	return vp_vring_interrupt(irq, opaque);
 }
 
-/* the config->find_vq() implementation */
+static void vp_free_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled) {
+		free_irq(vp_dev->pci_dev->irq, vp_dev);
+		vp_dev->intx_enabled = 0;
+	}
+
+	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
+	vp_dev->msix_used_vectors = 0;
+
+	if (vp_dev->msix_enabled) {
+		/* Disable the vector used for configuration */
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		/* Flush the write out to device */
+		ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+
+		vp_dev->msix_enabled = 0;
+		pci_disable_msix(vp_dev->pci_dev);
+	}
+}
+
+static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+			  int *options, int noptions)
+{
+	int i;
+	for (i = 0; i < noptions; ++i)
+		if (!pci_enable_msix(dev, entries, options[i]))
+			return options[i];
+	return -EBUSY;
+}
+
+static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned i, v;
+	int err = -ENOMEM;
+	/* We want at most one vector per queue and one for config changes.
+	 * Fallback to separate vectors for config and a shared for queues.
+	 * Finally fall back to regular interrupts. */
+	int options[] = { max_vqs + 1, 2 };
+	int nvectors = max(options[0], options[1]);
+
+	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
+				       GFP_KERNEL);
+	if (!vp_dev->msix_entries)
+		goto error_entries;
+	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+				     GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto error_names;
+
+	for (i = 0; i < nvectors; ++i)
+		vp_dev->msix_entries[i].entry = i;
+
+	err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries,
+			     options, ARRAY_SIZE(options));
+	if (err < 0) {
+		/* Can't allocate enough MSI-X vectors, use regular interrupt */
+		vp_dev->msix_vectors = 0;
+		err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+				  IRQF_SHARED, name, vp_dev);
+		if (err)
+			goto error_irq;
+		vp_dev->intx_enabled = 1;
+	} else {
+		vp_dev->msix_vectors = err;
+		vp_dev->msix_enabled = 1;
+
+		/* Set the vector used for configuration */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-config", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_config_changed, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error_irq;
+		++vp_dev->msix_used_vectors;
+
+		iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		/* Verify we had enough resources to assign the vector */
+		v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		if (v == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto error_irq;
+		}
+	}
+
+	if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) {
+		/* Shared vector for all VQs */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-virtqueues", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error_irq;
+		++vp_dev->msix_used_vectors;
+	}
+	return 0;
+error_irq:
+	vp_free_vectors(vdev);
+	kfree(vp_dev->msix_names);
+error_names:
+	kfree(vp_dev->msix_entries);
+error_entries:
+	return err;
+}
+
 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 				    void (*callback)(struct virtqueue *vq),
 				    const char *name)
@@ -230,7 +370,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
 	unsigned long flags, size;
-	u16 num;
+	u16 num, vector;
 	int err;
 
 	/* Select the queue we're interested in */
@@ -249,6 +389,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	info->queue_index = index;
 	info->num = num;
+	info->vector = VIRTIO_MSI_NO_VECTOR;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -272,12 +413,43 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	vq->priv = info;
 	info->vq = vq;
 
+	/* allocate per-vq vector if available and necessary */
+	if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) {
+		vector = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
+			 "%s-%s", dev_name(&vp_dev->vdev.dev), name);
+		err = request_irq(vp_dev->msix_entries[vector].vector,
+				  vring_interrupt, 0,
+				  vp_dev->msix_names[vector], vq);
+		if (err)
+			goto out_request_irq;
+		info->vector = vector;
+		++vp_dev->msix_used_vectors;
+	} else
+		vector = VP_MSIX_VQ_VECTOR;
+
+	 if (callback && vp_dev->msix_enabled) {
+		iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		if (vector == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto out_assign;
+		}
+	}
+
 	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_add(&info->node, &vp_dev->virtqueues);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return vq;
 
+out_assign:
+	if (info->vector != VIRTIO_MSI_NO_VECTOR) {
+		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+		--vp_dev->msix_used_vectors;
+	}
+out_request_irq:
+	vring_del_virtqueue(vq);
 out_activate_queue:
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 	free_pages_exact(info->queue, size);
@@ -286,17 +458,27 @@ out_info:
 	return ERR_PTR(err);
 }
 
-/* the config->del_vq() implementation */
 static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
 	unsigned long size;
 
+	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+
+	if (info->vector != VIRTIO_MSI_NO_VECTOR)
+		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+
+	if (vp_dev->msix_enabled) {
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		/* Flush the write out to device */
+		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+	}
+
 	vring_del_virtqueue(vq);
 
 	/* Select and deactivate the queue */
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
@@ -304,30 +486,46 @@ static void vp_del_vq(struct virtqueue *vq)
 	kfree(info);
 }
 
+/* the config->del_vqs() implementation */
 static void vp_del_vqs(struct virtio_device *vdev)
 {
 	struct virtqueue *vq, *n;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
 		vp_del_vq(vq);
+
+	vp_free_vectors(vdev);
 }
 
+/* the config->find_vqs() implementation */
 static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
 		       const char *names[])
 {
-	int i;
+	int vectors = 0;
+	int i, err;
+
+	/* How many vectors would we like? */
+	for (i = 0; i < nvqs; ++i)
+		if (callbacks[i])
+			++vectors;
+
+	err = vp_request_vectors(vdev, vectors);
+	if (err)
+		goto error_request;
 
 	for (i = 0; i < nvqs; ++i) {
 		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
 		if (IS_ERR(vqs[i]))
-			goto error;
+			goto error_find;
 	}
 	return 0;
 
-error:
+error_find:
 	vp_del_vqs(vdev);
+
+error_request:
 	return PTR_ERR(vqs[i]);
 }
 
@@ -349,7 +547,7 @@ static void virtio_pci_release_dev(struct device *_d)
 	struct virtio_pci_device *vp_dev = to_vp_device(dev);
 	struct pci_dev *pci_dev = vp_dev->pci_dev;
 
-	free_irq(pci_dev->irq, vp_dev);
+	vp_del_vqs(dev);
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
 	pci_release_regions(pci_dev);
@@ -408,21 +606,13 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
 	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 	vp_dev->vdev.id.device = pci_dev->subsystem_device;
 
-	/* register a handler for the queue with the PCI device's interrupt */
-	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
-			  dev_name(&vp_dev->vdev.dev), vp_dev);
-	if (err)
-		goto out_set_drvdata;
-
 	/* finally register the virtio device */
 	err = register_virtio_device(&vp_dev->vdev);
 	if (err)
-		goto out_req_irq;
+		goto out_set_drvdata;
 
 	return 0;
 
-out_req_irq:
-	free_irq(pci_dev->irq, vp_dev);
 out_set_drvdata:
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index cd0fd5d..9a3d7c4 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -47,9 +47,17 @@
 /* The bit of the ISR which indicates a device configuration change. */
 #define VIRTIO_PCI_ISR_CONFIG		0x2
 
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
-#define VIRTIO_PCI_CONFIG		20
+#define VIRTIO_PCI_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
-- 
cgit v0.10.2


From a92892825a122a74ddad1d408fa27132e28b05ae Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:37 -0600
Subject: virtio: expose features in sysfs

Each device negotiates feature bits; expose these in sysfs to help
diagnostics and debugging.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3f52c76..bd074525 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -31,11 +31,27 @@ static ssize_t modalias_show(struct device *_d,
 	return sprintf(buf, "virtio:d%08Xv%08X\n",
 		       dev->id.device, dev->id.vendor);
 }
+static ssize_t features_show(struct device *_d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
+	unsigned int i;
+	ssize_t len = 0;
+
+	/* We actually represent this as a bitstring, as it could be
+	 * arbitrary length in future. */
+	for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
+		len += sprintf(buf+len, "%c",
+			       test_bit(i, dev->features) ? '1' : '0');
+	len += sprintf(buf+len, "\n");
+	return len;
+}
 static struct device_attribute virtio_dev_attrs[] = {
 	__ATTR_RO(device),
 	__ATTR_RO(vendor),
 	__ATTR_RO(status),
 	__ATTR_RO(modalias),
+	__ATTR_RO(features),
 	__ATTR_NULL
 };
 
-- 
cgit v0.10.2


From ee006b353f1ca8c9a8470b72b462beb011d62e32 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 11 May 2009 18:11:44 +0100
Subject: virtio: teach virtio_has_feature() about transport features

Drivers don't add transport features to their table, so we
shouldn't check these with virtio_check_driver_offered_feature().

We could perhaps add an ->offered_feature() virtio_config_op,
but that perhaps that would be overkill for a consitency check
like this.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 4cd290c..99f5145 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -113,7 +113,9 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 	if (__builtin_constant_p(fbit))
 		BUILD_BUG_ON(fbit >= 32);
 
-	virtio_check_driver_offered_feature(vdev, fbit);
+	if (fbit < VIRTIO_TRANSPORT_F_START)
+		virtio_check_driver_offered_feature(vdev, fbit);
+
 	return test_bit(fbit, vdev->features);
 }
 
-- 
cgit v0.10.2


From 9fa29b9df32ba4db055f3977933cd0c1b8fe67cd Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 11 May 2009 18:11:45 +0100
Subject: virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC)

Add a new feature flag for indirect ring entries. These are ring
entries which point to a table of buffer descriptors.

The idea here is to increase the ring capacity by allowing a larger
effective ring size whereby the ring size dictates the number of
requests that may be outstanding, rather than the size of those
requests.

This should be most effective in the case of block I/O where we can
potentially benefit by concurrently dispatching a large number of
large requests. Even in the simple case of single segment block
requests, this results in a threefold increase in ring capacity.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 579fa69..a882f26 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -61,6 +61,9 @@ struct vring_virtqueue
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
+	/* Host supports indirect buffers */
+	bool indirect;
+
 	/* Number of free buffers */
 	unsigned int num_free;
 	/* Head of free buffer list. */
@@ -85,6 +88,55 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+/* Set up an indirect table of descriptors and add it to the queue. */
+static int vring_add_indirect(struct vring_virtqueue *vq,
+			      struct scatterlist sg[],
+			      unsigned int out,
+			      unsigned int in)
+{
+	struct vring_desc *desc;
+	unsigned head;
+	int i;
+
+	desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
+	if (!desc)
+		return vq->vring.num;
+
+	/* Transfer entries from the sg list into the indirect page */
+	for (i = 0; i < out; i++) {
+		desc[i].flags = VRING_DESC_F_NEXT;
+		desc[i].addr = sg_phys(sg);
+		desc[i].len = sg->length;
+		desc[i].next = i+1;
+		sg++;
+	}
+	for (; i < (out + in); i++) {
+		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+		desc[i].addr = sg_phys(sg);
+		desc[i].len = sg->length;
+		desc[i].next = i+1;
+		sg++;
+	}
+
+	/* Last one doesn't continue. */
+	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
+	desc[i-1].next = 0;
+
+	/* We're about to use a buffer */
+	vq->num_free--;
+
+	/* Use a single buffer which doesn't continue */
+	head = vq->free_head;
+	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+	vq->vring.desc[head].addr = virt_to_phys(desc);
+	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
+
+	/* Update free pointer */
+	vq->free_head = vq->vring.desc[head].next;
+
+	return head;
+}
+
 static int vring_add_buf(struct virtqueue *_vq,
 			 struct scatterlist sg[],
 			 unsigned int out,
@@ -94,12 +146,21 @@ static int vring_add_buf(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, head, uninitialized_var(prev);
 
+	START_USE(vq);
+
 	BUG_ON(data == NULL);
+
+	/* If the host supports indirect descriptor tables, and we have multiple
+	 * buffers, then go indirect. FIXME: tune this threshold */
+	if (vq->indirect && (out + in) > 1 && vq->num_free) {
+		head = vring_add_indirect(vq, sg, out, in);
+		if (head != vq->vring.num)
+			goto add_head;
+	}
+
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
-	START_USE(vq);
-
 	if (vq->num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 out + in, vq->num_free);
@@ -136,6 +197,7 @@ static int vring_add_buf(struct virtqueue *_vq,
 	/* Update free pointer */
 	vq->free_head = i;
 
+add_head:
 	/* Set token. */
 	vq->data[head] = data;
 
@@ -179,6 +241,11 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 
 	/* Put back on free list: find end */
 	i = head;
+
+	/* Free the indirect table */
+	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
+		kfree(phys_to_virt(vq->vring.desc[i].addr));
+
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
 		vq->num_free++;
@@ -323,6 +390,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->in_use = false;
 #endif
 
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback)
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -351,6 +420,8 @@ void vring_transport_features(struct virtio_device *vdev)
 
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
 		switch (i) {
+		case VIRTIO_RING_F_INDIRECT_DESC:
+			break;
 		default:
 			/* We don't understand this bit. */
 			clear_bit(i, vdev->features);
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 166c519..693e0ec 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -14,6 +14,8 @@
 #define VRING_DESC_F_NEXT	1
 /* This marks a buffer as write-only (otherwise read-only). */
 #define VRING_DESC_F_WRITE	2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT	4
 
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
@@ -24,6 +26,9 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
 
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC	28
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc
 {
-- 
cgit v0.10.2


From 98e94444748e9af93423d1fab90543e75569a58c Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 18 May 2009 03:39:09 -0400
Subject: virtio_blk: add missing __dev{init,exit} markings

The remove member of the virtio_driver structure uses __devexit_p(), so
the remove function itself should be marked with __devexit.  And where
there be __devexit on the remove, so is there __devinit on the probe.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 07d8e59..43db3ea 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -254,7 +254,7 @@ static int index_to_minor(int index)
 	return index << PART_BITS;
 }
 
-static int virtblk_probe(struct virtio_device *vdev)
+static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
 	int err;
@@ -395,7 +395,7 @@ out:
 	return err;
 }
 
-static void virtblk_remove(struct virtio_device *vdev)
+static void __devexit virtblk_remove(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk = vdev->priv;
 
-- 
cgit v0.10.2


From 594de1dd6449f79c99e1ba4577ea0e4e06e2b405 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:39 -0600
Subject: virtio: handle short buffers in virtio_rng.

If the device fills less than 4 bytes of our random buffer, we'll
BUG_ON.  It's nicer to handle the case where it partially fills the
buffer (the protocol doesn't explicitly bad that).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index f2041fe..32216b6 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -35,13 +35,13 @@ static DECLARE_COMPLETION(have_data);
 
 static void random_recv_done(struct virtqueue *vq)
 {
-	int len;
+	unsigned int len;
 
 	/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
 	if (!vq->vq_ops->get_buf(vq, &len))
 		return;
 
-	data_left = len / sizeof(random_data[0]);
+	data_left += len;
 	complete(&have_data);
 }
 
@@ -49,7 +49,7 @@ static void register_buffer(void)
 {
 	struct scatterlist sg;
 
-	sg_init_one(&sg, random_data, RANDOM_DATA_SIZE);
+	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
 	/* There should always be room for one buffer. */
 	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
 		BUG();
@@ -59,24 +59,32 @@ static void register_buffer(void)
 /* At least we don't udelay() in a loop like some other drivers. */
 static int virtio_data_present(struct hwrng *rng, int wait)
 {
-	if (data_left)
+	if (data_left >= sizeof(u32))
 		return 1;
 
+again:
 	if (!wait)
 		return 0;
 
 	wait_for_completion(&have_data);
+
+	/* Not enough?  Re-register. */
+	if (unlikely(data_left < sizeof(u32))) {
+		register_buffer();
+		goto again;
+	}
+
 	return 1;
 }
 
 /* virtio_data_present() must have succeeded before this is called. */
 static int virtio_data_read(struct hwrng *rng, u32 *data)
 {
-	BUG_ON(!data_left);
-
-	*data = random_data[--data_left];
+	BUG_ON(data_left < sizeof(u32));
+	data_left -= sizeof(u32);
+	*data = random_data[data_left / 4];
 
-	if (!data_left) {
+	if (data_left < sizeof(u32)) {
 		init_completion(&have_data);
 		register_buffer();
 	}
-- 
cgit v0.10.2


From c89e80168ba1ed37627fe03116b0cf8474dcb7e0 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 26 May 2009 15:46:09 +0200
Subject: virtio: fix id_matching for virtio drivers

This bug never appeared, since all current virtio drivers use
VIRTIO_DEV_ANY_ID for the vendor field. If a real vendor would be used,
the check in virtio_id_match is wrong - it returns 0 if
id->vendor == dev->id.vendor.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index bd074525..22642a2 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -61,7 +61,7 @@ static inline int virtio_id_match(const struct virtio_device *dev,
 	if (id->device != dev->id.device)
 		return 0;
 
-	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor;
+	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
 }
 
 /* This looks through all the IDs a driver claims to support.  If any of them
-- 
cgit v0.10.2


From e3353853730eb99c56b7b0aed1667d51c0e3699a Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 26 May 2009 15:46:10 +0200
Subject: virtio: enhance id_matching for virtio drivers

This patch allows a virtio driver to use VIRTIO_DEV_ANY_ID for the
device id. This will be used by a test module that can be bound to
any virtio device.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 22642a2..3a43ebf 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -58,7 +58,7 @@ static struct device_attribute virtio_dev_attrs[] = {
 static inline int virtio_id_match(const struct virtio_device *dev,
 				  const struct virtio_device_id *id)
 {
-	if (id->device != dev->id.device)
+	if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID)
 		return 0;
 
 	return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index a334428..40e0045 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -641,7 +641,7 @@ static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
 	id->vendor = TO_NATIVE(id->vendor);
 
 	strcpy(alias, "virtio:");
-	ADD(alias, "d", 1, id->device);
+	ADD(alias, "d", id->device != VIRTIO_DEV_ANY_ID, id->device);
 	ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor);
 
 	add_wildcard(alias);
-- 
cgit v0.10.2


From 713b15b3781240653d2b38414da3f4567dcbcf91 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:26:58 -0600
Subject: lguest: be paranoid about guest playing with device descriptors.

We can't trust the values in the device descriptor table once the
guest has booted, so keep local copies.  They could set them to
strange values then cause us to segv (they're 8 bit values, so they
can't make our pointers go too wild).

This becomes more important with the following patches which read them.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index d36fcc0..e65d6cb 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -126,9 +126,13 @@ struct device
 	/* The linked-list pointer. */
 	struct device *next;
 
-	/* The this device's descriptor, as mapped into the Guest. */
+	/* The device's descriptor, as mapped into the Guest. */
 	struct lguest_device_desc *desc;
 
+	/* We can't trust desc values once Guest has booted: we use these. */
+	unsigned int feature_len;
+	unsigned int num_vq;
+
 	/* The name of this device, for --verbose. */
 	const char *name;
 
@@ -245,7 +249,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
 static u8 *get_feature_bits(struct device *dev)
 {
 	return (u8 *)(dev->desc + 1)
-		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig);
+		+ dev->num_vq * sizeof(struct lguest_vqconfig);
 }
 
 /*L:100 The Launcher code itself takes us out into userspace, that scary place
@@ -979,8 +983,8 @@ static void update_device_status(struct device *dev)
 		verbose("Resetting device %s\n", dev->name);
 
 		/* Clear any features they've acked. */
-		memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
-		       dev->desc->feature_len);
+		memset(get_feature_bits(dev) + dev->feature_len, 0,
+		       dev->feature_len);
 
 		/* Zero out the virtqueues. */
 		for (vq = dev->vq; vq; vq = vq->next) {
@@ -994,12 +998,12 @@ static void update_device_status(struct device *dev)
 		unsigned int i;
 
 		verbose("Device %s OK: offered", dev->name);
-		for (i = 0; i < dev->desc->feature_len; i++)
+		for (i = 0; i < dev->feature_len; i++)
 			verbose(" %02x", get_feature_bits(dev)[i]);
 		verbose(", accepted");
-		for (i = 0; i < dev->desc->feature_len; i++)
+		for (i = 0; i < dev->feature_len; i++)
 			verbose(" %02x", get_feature_bits(dev)
-				[dev->desc->feature_len+i]);
+				[dev->feature_len+i]);
 
 		if (dev->ready)
 			dev->ready(dev);
@@ -1129,8 +1133,8 @@ static void handle_input(int fd)
 static u8 *device_config(const struct device *dev)
 {
 	return (void *)(dev->desc + 1)
-		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig)
-		+ dev->desc->feature_len * 2;
+		+ dev->num_vq * sizeof(struct lguest_vqconfig)
+		+ dev->feature_len * 2;
 }
 
 /* This routine allocates a new "struct lguest_device_desc" from descriptor
@@ -1191,6 +1195,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	 * yet, otherwise we'd be overwriting them. */
 	assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
 	memcpy(device_config(dev), &vq->config, sizeof(vq->config));
+	dev->num_vq++;
 	dev->desc->num_vq++;
 
 	verbose("Virtqueue page %#lx\n", to_guest_phys(p));
@@ -1219,7 +1224,7 @@ static void add_feature(struct device *dev, unsigned bit)
 	/* We can't extend the feature bits once we've added config bytes */
 	if (dev->desc->feature_len <= bit / CHAR_BIT) {
 		assert(dev->desc->config_len == 0);
-		dev->desc->feature_len = (bit / CHAR_BIT) + 1;
+		dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
 	}
 
 	features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
@@ -1259,6 +1264,8 @@ static struct device *new_device(const char *name, u16 type, int fd,
 	dev->name = name;
 	dev->vq = NULL;
 	dev->ready = NULL;
+	dev->feature_len = 0;
+	dev->num_vq = 0;
 
 	/* Append to device list.  Prepending to a single-linked list is
 	 * easier, but the user expects the devices to be arranged on the bus
-- 
cgit v0.10.2


From 56739c802ca845435f60e909104637880e14c769 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:26:59 -0600
Subject: lguest: cleanup passing of /dev/lguest fd around example launcher.

We hand the /dev/lguest fd everywhere; it's far neater to just make it
a global (it already is, in fact, hidden in the waker_fds struct).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index e65d6cb..1a2b906 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -79,7 +79,6 @@ static bool verbose;
 /* File descriptors for the Waker. */
 struct {
 	int pipe[2];
-	int lguest_fd;
 } waker_fds;
 
 /* The pointer to the start of guest memory. */
@@ -89,6 +88,8 @@ static unsigned long guest_limit, guest_max;
 /* The pipe for signal hander to write to. */
 static int timeoutpipe[2];
 static unsigned int timeout_usec = 500;
+/* The /dev/lguest file descriptor. */
+static int lguest_fd;
 
 /* a per-cpu variable indicating whose vcpu is currently running */
 static unsigned int __thread cpu_id;
@@ -139,7 +140,7 @@ struct device
 	/* If handle_input is set, it wants to be called when this file
 	 * descriptor is ready. */
 	int fd;
-	bool (*handle_input)(int fd, struct device *me);
+	bool (*handle_input)(struct device *me);
 
 	/* Any queues attached to this device */
 	struct virtqueue *vq;
@@ -169,7 +170,7 @@ struct virtqueue
 	u16 last_avail_idx;
 
 	/* The routine to call when the Guest pings us, or timeout. */
-	void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
+	void (*handle_output)(struct virtqueue *me, bool timeout);
 
 	/* Outstanding buffers */
 	unsigned int inflight;
@@ -509,21 +510,16 @@ static void concat(char *dst, char *args[])
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
  * the base of Guest "physical" memory, the top physical page to allow and the
  * entry point for the Guest. */
-static int tell_kernel(unsigned long start)
+static void tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
 				 (unsigned long)guest_base,
 				 guest_limit / getpagesize(), start };
-	int fd;
-
 	verbose("Guest: %p - %p (%#lx)\n",
 		guest_base, guest_base + guest_limit, guest_limit);
-	fd = open_or_die("/dev/lguest", O_RDWR);
-	if (write(fd, args, sizeof(args)) < 0)
+	lguest_fd = open_or_die("/dev/lguest", O_RDWR);
+	if (write(lguest_fd, args, sizeof(args)) < 0)
 		err(1, "Writing to /dev/lguest");
-
-	/* We return the /dev/lguest file descriptor to control this Guest */
-	return fd;
 }
 /*:*/
 
@@ -583,21 +579,18 @@ static int waker(void *unused)
 		}
 
 		/* Send LHREQ_BREAK command to snap the Launcher out of it. */
-		pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
+		pwrite(lguest_fd, args, sizeof(args), cpu_id);
 	}
 	return 0;
 }
 
 /* This routine just sets up a pipe to the Waker process. */
-static void setup_waker(int lguest_fd)
+static void setup_waker(void)
 {
 	/* This pipe is closed when Launcher dies, telling Waker. */
 	if (pipe(waker_fds.pipe) != 0)
 		err(1, "Creating pipe for Waker");
 
-	/* Waker also needs to know the lguest fd */
-	waker_fds.lguest_fd = lguest_fd;
-
 	if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
 		err(1, "Creating Waker");
 }
@@ -727,7 +720,7 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
 }
 
 /* This actually sends the interrupt for this virtqueue */
-static void trigger_irq(int fd, struct virtqueue *vq)
+static void trigger_irq(struct virtqueue *vq)
 {
 	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
 
@@ -737,16 +730,15 @@ static void trigger_irq(int fd, struct virtqueue *vq)
 		return;
 
 	/* Send the Guest an interrupt tell them we used something up. */
-	if (write(fd, buf, sizeof(buf)) != 0)
+	if (write(lguest_fd, buf, sizeof(buf)) != 0)
 		err(1, "Triggering irq %i", vq->config.irq);
 }
 
 /* And here's the combo meal deal.  Supersize me! */
-static void add_used_and_trigger(int fd, struct virtqueue *vq,
-				 unsigned int head, int len)
+static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
 {
 	add_used(vq, head, len);
-	trigger_irq(fd, vq);
+	trigger_irq(vq);
 }
 
 /*
@@ -770,7 +762,7 @@ struct console_abort
 };
 
 /* This is the routine which handles console input (ie. stdin). */
-static bool handle_console_input(int fd, struct device *dev)
+static bool handle_console_input(struct device *dev)
 {
 	int len;
 	unsigned int head, in_num, out_num;
@@ -804,7 +796,7 @@ static bool handle_console_input(int fd, struct device *dev)
 	}
 
 	/* Tell the Guest about the new input. */
-	add_used_and_trigger(fd, dev->vq, head, len);
+	add_used_and_trigger(dev->vq, head, len);
 
 	/* Three ^C within one second?  Exit.
 	 *
@@ -824,7 +816,7 @@ static bool handle_console_input(int fd, struct device *dev)
 				close(waker_fds.pipe[1]);
 				/* Just in case Waker is blocked in BREAK, send
 				 * unbreak now. */
-				write(fd, args, sizeof(args));
+				write(lguest_fd, args, sizeof(args));
 				exit(2);
 			}
 			abort->count = 0;
@@ -839,7 +831,7 @@ static bool handle_console_input(int fd, struct device *dev)
 
 /* Handling output for console is simple: we just get all the output buffers
  * and write them to stdout. */
-static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
+static void handle_console_output(struct virtqueue *vq, bool timeout)
 {
 	unsigned int head, out, in;
 	int len;
@@ -850,7 +842,7 @@ static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
 		if (in)
 			errx(1, "Input buffers in output queue?");
 		len = writev(STDOUT_FILENO, iov, out);
-		add_used_and_trigger(fd, vq, head, len);
+		add_used_and_trigger(vq, head, len);
 	}
 }
 
@@ -879,7 +871,7 @@ static void block_vq(struct virtqueue *vq)
  * and write them (ignoring the first element) to this device's file descriptor
  * (/dev/net/tun).
  */
-static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
+static void handle_net_output(struct virtqueue *vq, bool timeout)
 {
 	unsigned int head, out, in, num = 0;
 	int len;
@@ -893,7 +885,7 @@ static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
 		len = writev(vq->dev->fd, iov, out);
 		if (len < 0)
 			err(1, "Writing network packet to tun");
-		add_used_and_trigger(fd, vq, head, len);
+		add_used_and_trigger(vq, head, len);
 		num++;
 	}
 
@@ -917,7 +909,7 @@ static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
 
 /* This is where we handle a packet coming in from the tun device to our
  * Guest. */
-static bool handle_tun_input(int fd, struct device *dev)
+static bool handle_tun_input(struct device *dev)
 {
 	unsigned int head, in_num, out_num;
 	int len;
@@ -946,7 +938,7 @@ static bool handle_tun_input(int fd, struct device *dev)
 		err(1, "reading network");
 
 	/* Tell the Guest about the new packet. */
-	add_used_and_trigger(fd, dev->vq, head, len);
+	add_used_and_trigger(dev->vq, head, len);
 
 	verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
 		((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1],
@@ -959,18 +951,18 @@ static bool handle_tun_input(int fd, struct device *dev)
 /*L:215 This is the callback attached to the network and console input
  * virtqueues: it ensures we try again, in case we stopped console or net
  * delivery because Guest didn't have any buffers. */
-static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
+static void enable_fd(struct virtqueue *vq, bool timeout)
 {
 	add_device_fd(vq->dev->fd);
 	/* Snap the Waker out of its select loop. */
 	write(waker_fds.pipe[1], "", 1);
 }
 
-static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
+static void net_enable_fd(struct virtqueue *vq, bool timeout)
 {
 	/* We don't need to know again when Guest refills receive buffer. */
 	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-	enable_fd(fd, vq, timeout);
+	enable_fd(vq, timeout);
 }
 
 /* When the Guest tells us they updated the status field, we handle it. */
@@ -1011,7 +1003,7 @@ static void update_device_status(struct device *dev)
 }
 
 /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
-static void handle_output(int fd, unsigned long addr)
+static void handle_output(unsigned long addr)
 {
 	struct device *i;
 	struct virtqueue *vq;
@@ -1039,7 +1031,7 @@ static void handle_output(int fd, unsigned long addr)
 			if (strcmp(vq->dev->name, "console") != 0)
 				verbose("Output to %s\n", vq->dev->name);
 			if (vq->handle_output)
-				vq->handle_output(fd, vq, false);
+				vq->handle_output(vq, false);
 			return;
 		}
 	}
@@ -1053,7 +1045,7 @@ static void handle_output(int fd, unsigned long addr)
 	      strnlen(from_guest_phys(addr), guest_limit - addr));
 }
 
-static void handle_timeout(int fd)
+static void handle_timeout(void)
 {
 	char buf[32];
 	struct device *i;
@@ -1071,14 +1063,14 @@ static void handle_timeout(int fd)
 			vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
 			vq->blocked = false;
 			if (vq->handle_output)
-				vq->handle_output(fd, vq, true);
+				vq->handle_output(vq, true);
 		}
 	}
 }
 
 /* This is called when the Waker wakes us up: check for incoming file
  * descriptors. */
-static void handle_input(int fd)
+static void handle_input(void)
 {
 	/* select() wants a zeroed timeval to mean "don't wait". */
 	struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
@@ -1100,7 +1092,7 @@ static void handle_input(int fd)
 		 * descriptors and a method of handling them.  */
 		for (i = devices.dev; i; i = i->next) {
 			if (i->handle_input && FD_ISSET(i->fd, &fds)) {
-				if (i->handle_input(fd, i))
+				if (i->handle_input(i))
 					continue;
 
 				/* If handle_input() returns false, it means we
@@ -1114,7 +1106,7 @@ static void handle_input(int fd)
 
 		/* Is this the timeout fd? */
 		if (FD_ISSET(timeoutpipe[0], &fds))
-			handle_timeout(fd);
+			handle_timeout();
 	}
 }
 
@@ -1163,7 +1155,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
 /* Each device descriptor is followed by the description of its virtqueues.  We
  * specify how many descriptors the virtqueue is to have. */
 static void add_virtqueue(struct device *dev, unsigned int num_descs,
-			  void (*handle_output)(int, struct virtqueue *, bool))
+			  void (*handle_output)(struct virtqueue *, bool))
 {
 	unsigned int pages;
 	struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1249,7 +1241,7 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
  *
  * See what I mean about userspace being boring? */
 static struct device *new_device(const char *name, u16 type, int fd,
-				 bool (*handle_input)(int, struct device *))
+				 bool (*handle_input)(struct device *))
 {
 	struct device *dev = malloc(sizeof(*dev));
 
@@ -1678,7 +1670,7 @@ static int io_thread(void *_dev)
 
 /* Now we've seen the I/O thread, we return to the Launcher to see what happens
  * when that thread tells us it's completed some I/O. */
-static bool handle_io_finish(int fd, struct device *dev)
+static bool handle_io_finish(struct device *dev)
 {
 	char c;
 
@@ -1688,12 +1680,12 @@ static bool handle_io_finish(int fd, struct device *dev)
 		exit(1);
 
 	/* It did some work, so trigger the irq. */
-	trigger_irq(fd, dev->vq);
+	trigger_irq(dev->vq);
 	return true;
 }
 
 /* When the Guest submits some I/O, we just need to wake the I/O thread. */
-static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
+static void handle_virtblk_output(struct virtqueue *vq, bool timeout)
 {
 	struct vblk_info *vblk = vq->dev->priv;
 	char c = 0;
@@ -1771,7 +1763,7 @@ static void setup_block_file(const char *filename)
  * console is the reverse.
  *
  * The same logic applies, however. */
-static bool handle_rng_input(int fd, struct device *dev)
+static bool handle_rng_input(struct device *dev)
 {
 	int len;
 	unsigned int head, in_num, out_num, totlen = 0;
@@ -1800,7 +1792,7 @@ static bool handle_rng_input(int fd, struct device *dev)
 	}
 
 	/* Tell the Guest about the new input. */
-	add_used_and_trigger(fd, dev->vq, head, totlen);
+	add_used_and_trigger(dev->vq, head, totlen);
 
 	/* Everything went OK! */
 	return true;
@@ -1841,7 +1833,7 @@ static void __attribute__((noreturn)) restart_guest(void)
 
 /*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
  * its input and output, and finally, lays it to rest. */
-static void __attribute__((noreturn)) run_guest(int lguest_fd)
+static void __attribute__((noreturn)) run_guest(void)
 {
 	for (;;) {
 		unsigned long args[] = { LHREQ_BREAK, 0 };
@@ -1855,7 +1847,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
 		/* One unsigned long means the Guest did HCALL_NOTIFY */
 		if (readval == sizeof(notify_addr)) {
 			verbose("Notify on address %#lx\n", notify_addr);
-			handle_output(lguest_fd, notify_addr);
+			handle_output(notify_addr);
 			continue;
 		/* ENOENT means the Guest died.  Reading tells us why. */
 		} else if (errno == ENOENT) {
@@ -1875,7 +1867,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
 			continue;
 
 		/* Service input, then unset the BREAK to release the Waker. */
-		handle_input(lguest_fd);
+		handle_input();
 		if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
 			err(1, "Resetting break");
 	}
@@ -1911,8 +1903,8 @@ int main(int argc, char *argv[])
 	/* Memory, top-level pagetable, code startpoint and size of the
 	 * (optional) initrd. */
 	unsigned long mem = 0, start, initrd_size = 0;
-	/* Two temporaries and the /dev/lguest file descriptor. */
-	int i, c, lguest_fd;
+	/* Two temporaries. */
+	int i, c;
 	/* The boot information for the Guest. */
 	struct boot_params *boot;
 	/* If they specify an initrd file to load. */
@@ -2030,15 +2022,15 @@ int main(int argc, char *argv[])
 
 	/* We tell the kernel to initialize the Guest: this returns the open
 	 * /dev/lguest file descriptor. */
-	lguest_fd = tell_kernel(start);
+	tell_kernel(start);
 
 	/* We clone off a thread, which wakes the Launcher whenever one of the
 	 * input file descriptors needs attention.  We call this the Waker, and
 	 * we'll cover it in a moment. */
-	setup_waker(lguest_fd);
+	setup_waker();
 
 	/* Finally, run the Guest.  This doesn't return. */
-	run_guest(lguest_fd);
+	run_guest();
 }
 /*:*/
 
-- 
cgit v0.10.2


From 1028375e93a7aa4dbe466947d1c65f368b1f61c1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:26:59 -0600
Subject: lguest: clean up lguest_init_IRQ

Copy from arch/x86/kernel/irqinit_32.c: we don't use the vectors beyond
LGUEST_IRQS (if any), but we might as well set them all.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4e0c265..2392a7a1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -628,13 +628,12 @@ static void __init lguest_init_IRQ(void)
 {
 	unsigned int i;
 
-	for (i = 0; i < LGUEST_IRQS; i++) {
-		int vector = FIRST_EXTERNAL_VECTOR + i;
+	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Lguest has
 		 * a straightforward 1 to 1 mapping, so force that here. */
-		__get_cpu_var(vector_irq)[vector] = i;
-		if (vector != SYSCALL_VECTOR)
-			set_intr_gate(vector, interrupt[i]);
+		__get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR;
+		if (i != SYSCALL_VECTOR)
+			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
 	/* This call is required to set up for 4k stacks, where we have
 	 * separate stacks for hard and soft interrupts. */
-- 
cgit v0.10.2


From f7027c6387d0c3acf569845165ec7947e2083c82 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:00 -0600
Subject: lguest: get more serious about wmb() in example Launcher code

Since the Launcher process runs the Guest, it doesn't have to be very
serious about its barriers: the Guest isn't running while we are (Guest
is UP).

Before we change to use threads to service devices, we need to fix this.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 1a2b906..1e31d1e 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -182,9 +182,10 @@ struct virtqueue
 /* Remember the arguments to the program so we can "reboot" */
 static char **main_args;
 
-/* Since guest is UP and we don't run at the same time, we don't need barriers.
- * But I include them in the code in case others copy it. */
-#define wmb()
+/* We have to be careful with barriers: our devices are all run in separate
+ * threads and so we need to make sure that changes visible to the Guest happen
+ * in precise order. */
+#define wmb() __asm__ __volatile__("" : : : "memory")
 
 /* Convert an iovec element to the given type.
  *
-- 
cgit v0.10.2


From b43e352139f51216a8c56b0bd5fc3d4e05c65619 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:00 -0600
Subject: sched: export kick_process

lguest needs kick_process: wake_up_process() does nothing if a process
is running, which isn't sufficient (we need it in the kernel).

And lguest support is usually modular.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index f04aa96..8ec9d13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2192,6 +2192,7 @@ void kick_process(struct task_struct *p)
 		smp_send_reschedule(cpu);
 	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(kick_process);
 
 /*
  * Return a low guess at the load of a migration-source cpu weighted
-- 
cgit v0.10.2


From a6c372de6e4b9a8188b66badcee3e3792eccdd26 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:01 -0600
Subject: lguest: fix lguest wake on guest clock tick, or fd activity

The Launcher could be inside the Guest on another CPU; wake_up_process
will do nothing because it is "running".  kick_process will knock it
back into our kernel in this case, otherwise we'll miss it until the
next guest exit.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 6e99adb..9ea26ad 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -511,9 +511,9 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
 
 	/* Remember the first interrupt is the timer interrupt. */
 	set_bit(0, cpu->irqs_pending);
-	/* If the Guest is actually stopped, we need to wake it up. */
-	if (cpu->halted)
-		wake_up_process(cpu->tsk);
+	/* Guest may be stopped or running on another CPU. */
+	if (!wake_up_process(cpu->tsk))
+		kick_process(cpu->tsk);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index b8ee103..bcdcf34 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -24,8 +24,8 @@ static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
 
 	if (on) {
 		cpu->break_out = 1;
-		/* Pop it out of the Guest (may be running on different CPU) */
-		wake_up_process(cpu->tsk);
+		if (!wake_up_process(cpu->tsk))
+			kick_process(cpu->tsk);
 		/* Wait for them to reset it */
 		return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
 	} else {
-- 
cgit v0.10.2


From ebf9a5a99c1a464afe0b4dfa64416fc8b273bc5c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:01 -0600
Subject: lguest: remove invalid interrupt forcing logic.

20887611523e749d99cc7d64ff6c97d27529fbae (lguest: notify on empty) introduced
lguest support for the VIRTIO_F_NOTIFY_ON_EMPTY flag, but in fact it turned on
interrupts all the time.

Because we always process one buffer at a time, the inflight count is always 0
when call trigger_irq and so we always ignore VRING_AVAIL_F_NO_INTERRUPT from
the Guest.

It should be looking to see if there are more buffers in the Guest's queue:
if it's empty, then we force an interrupt.

This makes little difference, since we usually have an empty queue; but
that's the subject of another patch.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 1e31d1e..9f3240c 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -172,9 +172,6 @@ struct virtqueue
 	/* The routine to call when the Guest pings us, or timeout. */
 	void (*handle_output)(struct virtqueue *me, bool timeout);
 
-	/* Outstanding buffers */
-	unsigned int inflight;
-
 	/* Is this blocked awaiting a timer? */
 	bool blocked;
 };
@@ -699,7 +696,6 @@ static unsigned get_vq_desc(struct virtqueue *vq,
 			errx(1, "Looped descriptor");
 	} while ((i = next_desc(vq, i)) != vq->vring.num);
 
-	vq->inflight++;
 	return head;
 }
 
@@ -717,7 +713,6 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
 	/* Make sure buffer is written before we update index. */
 	wmb();
 	vq->vring.used->idx++;
-	vq->inflight--;
 }
 
 /* This actually sends the interrupt for this virtqueue */
@@ -727,7 +722,7 @@ static void trigger_irq(struct virtqueue *vq)
 
 	/* If they don't want an interrupt, don't send one, unless empty. */
 	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-	    && vq->inflight)
+	    && lg_last_avail(vq) != vq->vring.avail->idx)
 		return;
 
 	/* Send the Guest an interrupt tell them we used something up. */
@@ -1171,7 +1166,6 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	vq->next = NULL;
 	vq->last_avail_idx = 0;
 	vq->dev = dev;
-	vq->inflight = 0;
 	vq->blocked = false;
 
 	/* Initialize the configuration. */
-- 
cgit v0.10.2


From abd41f037e1a64543000ed73b42f616d04d92700 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:02 -0600
Subject: lguest: fix race in halt code

When the Guest does the LHCALL_HALT hypercall, we go to sleep, expecting
that a timer or the Waker will wake_up_process() us.

But we do it in a stupid way, leaving a classic missing wakeup race.

So split maybe_do_interrupt() into interrupt_pending() and
try_deliver_interrupt(), and check maybe_do_interrupt() and the
"break_out" flag before calling schedule.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 4845fb3..8ca1def 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -188,6 +188,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 {
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
+		unsigned int irq;
+
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
 			do_hypercalls(cpu);
@@ -211,7 +213,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
-		maybe_do_interrupt(cpu);
+		irq = interrupt_pending(cpu);
+		if (irq < LGUEST_IRQS)
+			try_deliver_interrupt(cpu, irq);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -227,7 +231,13 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		 * clock timer or LHREQ_BREAK from the Waker will wake us. */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			schedule();
+			/* Just before we sleep, make sure nothing snuck in
+			 * which we should be doing. */
+			if (interrupt_pending(cpu) < LGUEST_IRQS
+			    || cpu->break_out)
+				set_current_state(TASK_RUNNING);
+			else
+				schedule();
 			continue;
 		}
 
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 9ea26ad..a8c966f 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -128,30 +128,38 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 /*H:205
  * Virtual Interrupts.
  *
- * maybe_do_interrupt() gets called before every entry to the Guest, to see if
- * we should divert the Guest to running an interrupt handler. */
-void maybe_do_interrupt(struct lg_cpu *cpu)
+ * interrupt_pending() returns the first pending interrupt which isn't blocked
+ * by the Guest.  It is called before every entry to the Guest, and just before
+ * we go to sleep when the Guest has halted itself. */
+unsigned int interrupt_pending(struct lg_cpu *cpu)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
-	struct desc_struct *idt;
 
 	/* If the Guest hasn't even initialized yet, we can do nothing. */
 	if (!cpu->lg->lguest_data)
-		return;
+		return LGUEST_IRQS;
 
 	/* Take our "irqs_pending" array and remove any interrupts the Guest
 	 * wants blocked: the result ends up in "blk". */
 	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
-		return;
+		return LGUEST_IRQS;
 	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
-	/* None?  Nothing to do */
-	if (irq >= LGUEST_IRQS)
-		return;
+
+	return irq;
+}
+
+/* This actually diverts the Guest to running an interrupt handler, once an
+ * interrupt has been identified by interrupt_pending(). */
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
+{
+	struct desc_struct *idt;
+
+	BUG_ON(irq >= LGUEST_IRQS);
 
 	/* They may be in the middle of an iret, where they asked us never to
 	 * deliver interrupts. */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index af92a17..6743cf1 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -139,7 +139,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
-void maybe_do_interrupt(struct lg_cpu *cpu);
+unsigned int interrupt_pending(struct lg_cpu *cpu);
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);
-- 
cgit v0.10.2


From a32a8813d0173163ba44d8f9556e0d89fdc4fb46 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:02 -0600
Subject: lguest: improve interrupt handling, speed up stream networking

lguest never checked for pending interrupts when enabling interrupts, and
things still worked.  However, it makes a significant difference to TCP
performance, so it's time we fixed it by introducing a pending_irq flag
and checking it on irq_restore and irq_enable.

These two routines are now too big to patch into the 8/10 bytes
patch space, so we drop that code.

Note: The high latency on interrupt delivery had a very curious
effect: once everything else was optimized, networking without GSO was
faster than networking with GSO, since more interrupts were sent and
hence a greater chance of one getting through to the Guest!

Note2: (Almost) Closing the same loophole for iret doesn't have any
measurable effect, so I'm leaving that patch for the moment.

Before:
	1GB tcpblast Guest->Host:		30.7 seconds
	1GB tcpblast Guest->Host (no GSO):	76.0 seconds

After:
	1GB tcpblast Guest->Host:		6.8 seconds
	1GB tcpblast Guest->Host (no GSO):	27.8 seconds

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index faae199..f9a9f78 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -17,6 +17,7 @@
 #define LHCALL_LOAD_TLS		16
 #define LHCALL_NOTIFY		17
 #define LHCALL_LOAD_GDT_ENTRY	18
+#define LHCALL_SEND_INTERRUPTS	19
 
 #define LGUEST_TRAP_ENTRY 0x1F
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 2392a7a1..37b8c1d 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -205,6 +205,12 @@ PV_CALLEE_SAVE_REGS_THUNK(save_fl);
 static void restore_fl(unsigned long flags)
 {
 	lguest_data.irq_enabled = flags;
+	mb();
+	/* Null hcall forces interrupt delivery now, if irq_pending is
+	 * set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags
+	 * enables interrupts. */
+	if (flags & lguest_data.irq_pending)
+		kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
 }
 PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
 
@@ -219,6 +225,11 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
 static void irq_enable(void)
 {
 	lguest_data.irq_enabled = X86_EFLAGS_IF;
+	mb();
+	/* Null hcall forces interrupt delivery now. */
+	if (lguest_data.irq_pending)
+		kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
+
 }
 PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
 
@@ -972,10 +983,10 @@ static void lguest_restart(char *reason)
  *
  * Our current solution is to allow the paravirt back end to optionally patch
  * over the indirect calls to replace them with something more efficient.  We
- * patch the four most commonly called functions: disable interrupts, enable
- * interrupts, restore interrupts and save interrupts.  We usually have 6 or 10
- * bytes to patch into: the Guest versions of these operations are small enough
- * that we can fit comfortably.
+ * patch two of the simplest of the most commonly called functions: disable
+ * interrupts and save interrupts.  We usually have 6 or 10 bytes to patch
+ * into: the Guest versions of these operations are small enough that we can
+ * fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
  * and these are in i386_head.S. */
@@ -986,8 +997,6 @@ static const struct lguest_insns
 	const char *start, *end;
 } lguest_insns[] = {
 	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
-	[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
-	[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
 	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
 };
 
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index f795419..3e0c554 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -46,8 +46,6 @@ ENTRY(lguest_entry)
 	.globl lgstart_##name; .globl lgend_##name
 
 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
 /*:*/
 
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 8ca1def..03fbc88 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -189,6 +189,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
 		unsigned int irq;
+		bool more;
 
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
@@ -213,9 +214,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
-		irq = interrupt_pending(cpu);
+		irq = interrupt_pending(cpu, &more);
 		if (irq < LGUEST_IRQS)
-			try_deliver_interrupt(cpu, irq);
+			try_deliver_interrupt(cpu, irq, more);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -233,7 +234,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 			set_current_state(TASK_INTERRUPTIBLE);
 			/* Just before we sleep, make sure nothing snuck in
 			 * which we should be doing. */
-			if (interrupt_pending(cpu) < LGUEST_IRQS
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS
 			    || cpu->break_out)
 				set_current_state(TASK_RUNNING);
 			else
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 54d66f0..f252b71 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 		/* This call does nothing, except by breaking out of the Guest
 		 * it makes us process all the asynchronous hypercalls. */
 		break;
+	case LHCALL_SEND_INTERRUPTS:
+		/* This call does nothing too, but by breaking out of the Guest
+		 * it makes us process any pending interrupts. */
+		break;
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a8c966f..5a10754 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -131,7 +131,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
  * interrupt_pending() returns the first pending interrupt which isn't blocked
  * by the Guest.  It is called before every entry to the Guest, and just before
  * we go to sleep when the Guest has halted itself. */
-unsigned int interrupt_pending(struct lg_cpu *cpu)
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
@@ -149,13 +149,14 @@ unsigned int interrupt_pending(struct lg_cpu *cpu)
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
+	*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
 
 	return irq;
 }
 
 /* This actually diverts the Guest to running an interrupt handler, once an
  * interrupt has been identified by interrupt_pending(). */
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 {
 	struct desc_struct *idt;
 
@@ -178,8 +179,12 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
 		u32 irq_enabled;
 		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
-		if (!irq_enabled)
+		if (!irq_enabled) {
+			/* Make sure they know an IRQ is pending. */
+			put_user(X86_EFLAGS_IF,
+				 &cpu->lg->lguest_data->irq_pending);
 			return;
+		}
 	}
 
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
@@ -202,6 +207,11 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
 	write_timestamp(cpu);
+
+	/* If there are no other interrupts we want to deliver, clear
+	 * the pending flag. */
+	if (!more)
+		put_user(0, &cpu->lg->lguest_data->irq_pending);
 }
 /*:*/
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 6743cf1..573896533 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -139,8 +139,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
-unsigned int interrupt_pending(struct lg_cpu *cpu);
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq);
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 175e63f..7bc1440 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -30,6 +30,10 @@ struct lguest_data
 	/* Wallclock time set by the Host. */
 	struct timespec time;
 
+	/* Interrupt pending set by the Host.  The Guest should do a hypercall
+	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
+	int irq_pending;
+
 	/* Async hypercall ring.  Instead of directly making hypercalls, we can
 	 * place them in here for processing the next time the Host wants.
 	 * This batching can be quite efficient. */
-- 
cgit v0.10.2


From 61f4bc83fea248a3092beb7ba43daa5629615513 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:03 -0600
Subject: lguest: optimize by coding restore_flags and irq_enable in assembler.

The downside of the last patch which made restore_flags and irq_enable
check interrupts is that they are now too big to be patched directly
into the callsites, so the C versions are always used.

But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all
the registers.  In fact, we don't need any registers in the fast path,
so we can do better than this if we actually code them in assembler.

The results are in the noise, but since it's about the same amount of
code, it's worth applying.

1GB Guest->Host: input(suppressed),output(suppressed)
Before:
	Seconds: 0:16.53
	Packets: 377268,753673
	Interrupts: 22461,24297
	Notifications: 1(5245),21303(732370)
	Net IRQs triggered: 377023(245),42578(711095)

After:
	Seconds: 0:16.48
	Packets: 377289,753673
	Interrupts: 22281,24465
	Notifications: 1(5245),21296(732377)
	Net IRQs triggered: 377060(229),42564(711109)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a830cb..dfdbf64 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -126,6 +126,7 @@ void foo(void)
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
+	OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
 	OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
 
 	BLANK();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 37b8c1d..514f4d0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -179,7 +179,7 @@ static void lguest_end_context_switch(struct task_struct *next)
 	paravirt_end_context_switch(next);
 }
 
-/*G:033
+/*G:032
  * After that diversion we return to our first native-instruction
  * replacements: four functions for interrupt control.
  *
@@ -199,41 +199,28 @@ static unsigned long save_fl(void)
 {
 	return lguest_data.irq_enabled;
 }
-PV_CALLEE_SAVE_REGS_THUNK(save_fl);
-
-/* restore_flags() just sets the flags back to the value given. */
-static void restore_fl(unsigned long flags)
-{
-	lguest_data.irq_enabled = flags;
-	mb();
-	/* Null hcall forces interrupt delivery now, if irq_pending is
-	 * set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags
-	 * enables interrupts. */
-	if (flags & lguest_data.irq_pending)
-		kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
-}
-PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
 
 /* Interrupts go off... */
 static void irq_disable(void)
 {
 	lguest_data.irq_enabled = 0;
 }
-PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
 
-/* Interrupts go on... */
-static void irq_enable(void)
-{
-	lguest_data.irq_enabled = X86_EFLAGS_IF;
-	mb();
-	/* Null hcall forces interrupt delivery now. */
-	if (lguest_data.irq_pending)
-		kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
+/* Let's pause a moment.  Remember how I said these are called so often?
+ * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
+ * break some rules.  In particular, these functions are assumed to save their
+ * own registers if they need to: normal C functions assume they can trash the
+ * eax register.  To use normal C functions, we use
+ * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
+ * C function, then restores it. */
+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
+PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
+/*:*/
 
-}
-PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
+/* These are in i386_head.S */
+extern void lg_irq_enable(void);
+extern void lg_restore_fl(unsigned long flags);
 
-/*:*/
 /*M:003 Note that we don't check for outstanding interrupts when we re-enable
  * them (or when we unmask an interrupt).  This seems to work for the moment,
  * since interrupts are rare and we'll just get the interrupt on the next timer
@@ -1041,9 +1028,9 @@ __init void lguest_init(void)
 	/* interrupt-related operations */
 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
 	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
-	pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
+	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
 	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
-	pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
+	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
 	/* init-time operations */
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 3e0c554..a9c8cfe 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -47,7 +47,63 @@ ENTRY(lguest_entry)
 
 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-/*:*/
+
+/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
+ * matter for save_fl and irq_disable later).  If we write our routines
+ * carefully in assembler, we can avoid clobbering any registers and avoid
+ * jumping through the wrapper functions.
+ *
+ * I skipped over our first piece of assembler, but this one is worth studying
+ * in a bit more detail so I'll describe in easy stages.  First, the routine
+ * to enable interrupts: */
+ENTRY(lg_irq_enable)
+	/* The reverse of irq_disable, this sets lguest_data.irq_enabled to
+	 * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
+	movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
+	/* But now we need to check if the Host wants to know: there might have
+	 * been interrupts waiting to be delivered, in which case it will have
+	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
+	 * jump to send_interrupts, otherwise we're done. */
+	testl $0, lguest_data+LGUEST_DATA_irq_pending
+	jnz send_interrupts
+	/* One cool thing about x86 is that you can do many things without using
+	 * a register.  In this case, the normal path hasn't needed to save or
+	 * restore any registers at all! */
+	ret
+send_interrupts:
+	/* OK, now we need a register: eax is used for the hypercall number,
+	 * which is LHCALL_SEND_INTERRUPTS.
+	 *
+	 * We used not to bother with this pending detection at all, which was
+	 * much simpler.  Sooner or later the Host would realize it had to
+	 * send us an interrupt.  But that turns out to make performance 7
+	 * times worse on a simple tcp benchmark.  So now we do this the hard
+	 * way. */
+	pushl %eax
+	movl $LHCALL_SEND_INTERRUPTS, %eax
+	/* This is a vmcall instruction (same thing that KVM uses).  Older
+	 * assembler versions might not know the "vmcall" instruction, so we
+	 * create one manually here. */
+	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
+	popl %eax
+	ret
+
+/* Finally, the "popf" or "restore flags" routine.  The %eax register holds the
+ * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
+ * enabling interrupts again, if it's 0 we're leaving them off. */
+ENTRY(lg_restore_fl)
+	/* This is just "lguest_data.irq_enabled = flags;" */
+	movl %eax, lguest_data+LGUEST_DATA_irq_enabled
+	/* Now, if the %eax value has enabled interrupts and
+	 * lguest_data.irq_pending is set, we want to tell the Host so it can
+	 * deliver any outstanding interrupts.  Fortunately, both values will
+	 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
+	 * instruction will AND them together for us.  If both are set, we
+	 * jump to send_interrupts. */
+	testl lguest_data+LGUEST_DATA_irq_pending, %eax
+	jnz send_interrupts
+	/* Again, the normal path has used no extra registers.  Clever, huh? */
+	ret
 
 /* These demark the EIP range where host should never deliver interrupts. */
 .global lguest_noirq_start
-- 
cgit v0.10.2


From 2644f17d6c932929fd68cfec95691490947e0fd1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:03 -0600
Subject: lguest: clean up example launcher compile flags.

18 months ago 5bbf89fc260830f3f58b331d946a16b39ad1ca2d changed to loading
bzImages directly, and no longer manually ungzipping them, so we no longer
need libz.

Also, -m32 is useful for those on 64-bit platforms (and harmless on
32-bit).

Reported-by: Ron Minnich <rminnich@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 1f4f9e8..28c8cdf 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -1,6 +1,5 @@
 # This creates the demonstration utility "lguest" which runs a Linux guest.
-CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
-LDLIBS:=-lz
+CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
 
 all: lguest
 
-- 
cgit v0.10.2


From 81b79b01d057f8c5a378c38d2f738775b972934a Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Wed, 20 May 2009 01:45:45 +0200
Subject: lguest: beyond ARRAY_SIZE of cpu->arch.gdt

Do not go beyond ARRAY_SIZE of cpu->arch.gdt

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 7ede64f..482ed5a 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -150,7 +150,7 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi)
 {
 	/* We assume the Guest has the same number of GDT entries as the
 	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
-	if (num > ARRAY_SIZE(cpu->arch.gdt))
+	if (num >= ARRAY_SIZE(cpu->arch.gdt))
 		kill_guest(cpu, "too many gdt entries %i", num);
 
 	/* Set it up, then fix it. */
-- 
cgit v0.10.2


From f086122bb6e885f926f935b1418fca3b293375f0 Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Fri, 12 Jun 2009 22:27:04 -0600
Subject: lguest: Segment selectors are 16-bit long. Fix lg_cpu.ss1 definition.

If GDT_ENTRIES were every > 256, this could become a problem.

Signed-off-by: Matias Zabaljauregui <zabaljauregui at gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 573896533..74af503 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -49,7 +49,7 @@ struct lg_cpu {
 	u32 cr2;
 	int ts;
 	u32 esp1;
-	u8 ss1;
+	u16 ss1;
 
 	/* Bitmap of what has changed: see CHANGED_* above. */
 	int changed;
-- 
cgit v0.10.2


From e606490c440900e50ccf73a54f6fc6150ff40815 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:04 -0600
Subject: lguest: clean up length-used value in example launcher

The "len" field in the used ring for virtio indicates the number of
bytes *written* to the buffer.  This means the guest doesn't have to
zero the buffers in advance as it always knows the used length.

Erroneously, the console and network example code puts the length
*read* into that field.  The guest ignores it, but it's wrong.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 9f3240c..8704600 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -830,15 +830,14 @@ static bool handle_console_input(struct device *dev)
 static void handle_console_output(struct virtqueue *vq, bool timeout)
 {
 	unsigned int head, out, in;
-	int len;
 	struct iovec iov[vq->vring.num];
 
 	/* Keep getting output buffers from the Guest until we run out. */
 	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
 		if (in)
 			errx(1, "Input buffers in output queue?");
-		len = writev(STDOUT_FILENO, iov, out);
-		add_used_and_trigger(vq, head, len);
+		writev(STDOUT_FILENO, iov, out);
+		add_used_and_trigger(vq, head, 0);
 	}
 }
 
@@ -870,7 +869,6 @@ static void block_vq(struct virtqueue *vq)
 static void handle_net_output(struct virtqueue *vq, bool timeout)
 {
 	unsigned int head, out, in, num = 0;
-	int len;
 	struct iovec iov[vq->vring.num];
 	static int last_timeout_num;
 
@@ -878,10 +876,9 @@ static void handle_net_output(struct virtqueue *vq, bool timeout)
 	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
 		if (in)
 			errx(1, "Input buffers in output queue?");
-		len = writev(vq->dev->fd, iov, out);
-		if (len < 0)
+		if (writev(vq->dev->fd, iov, out) < 0)
 			err(1, "Writing network packet to tun");
-		add_used_and_trigger(vq, head, len);
+		add_used_and_trigger(vq, head, 0);
 		num++;
 	}
 
-- 
cgit v0.10.2


From 7b5c806c35f6ff76b2e36a8b5b1513c8a83fcff7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:05 -0600
Subject: lguest: fix writev returning short on console output

I've never seen it here, but I can't find anywhere that says writev
will write everything.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 8704600..02fa524 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -836,7 +836,12 @@ static void handle_console_output(struct virtqueue *vq, bool timeout)
 	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
 		if (in)
 			errx(1, "Input buffers in output queue?");
-		writev(STDOUT_FILENO, iov, out);
+		while (!iov_empty(iov, out)) {
+			int len = writev(STDOUT_FILENO, iov, out);
+			if (len <= 0)
+				err(1, "Write to stdout gave %i", len);
+			iov_consume(iov, out, len);
+		}
 		add_used_and_trigger(vq, head, 0);
 	}
 }
-- 
cgit v0.10.2


From ed1dc77810159a733240ba6751c1b31023bf8dd7 Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Sat, 30 May 2009 15:35:49 -0300
Subject: lguest: map switcher with executable page table entries

Map switcher with executable page table entries.
(This bug didn't matter before PAE and hence NX support -- RR)

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 03fbc88..d0298dc 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -95,7 +95,7 @@ static __init int map_switcher(void)
 	 * array of struct pages.  It increments that pointer, but we don't
 	 * care. */
 	pagep = switcher_page;
-	err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
+	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
 	if (err) {
 		printk("lguest: map_vm_area failed: %i\n", err);
 		goto free_vma;
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a059cf9..4969953 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -714,7 +714,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 
 	/* Make the last PGD entry for this Guest point to the Switcher's PTE
 	 * page for this CPU (with appropriate flags). */
-	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL);
+	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
 
 	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
 
-- 
cgit v0.10.2


From 90603d15fa95605d1d08235b73e220d766f04bb0 Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Fri, 12 Jun 2009 22:27:06 -0600
Subject: lguest: use native_set_* macros, which properly handle 64-bit entries
 when PAE is activated

Some cleanups and replace direct assignment with native_set_* macros which properly handle 64-bit entries when PAE is activated

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 514f4d0..4f311e4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -525,7 +525,7 @@ static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
 static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	*ptep = pteval;
+	native_set_pte(ptep, pteval);
 	lguest_pte_update(mm, addr, ptep);
 }
 
@@ -534,9 +534,9 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
  * changed. */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
-	*pmdp = pmdval;
+	native_set_pmd(pmdp, pmdval);
 	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
-		   (__pa(pmdp) & (PAGE_SIZE - 1)) / 4);
+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
 }
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -550,7 +550,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * which brings boot back to 0.25 seconds. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
-	*ptep = pteval;
+	native_set_pte(ptep, pteval);
 	if (cr3_changed)
 		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 4969953..ffba723 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -90,7 +90,7 @@ static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
 	pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
 	/* You should never call this if the PGD entry wasn't valid */
 	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
-	return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE];
+	return &page[pte_index(vaddr)];
 }
 
 /* These two functions just like the above two, except they access the Guest
@@ -105,7 +105,7 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
 {
 	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
-	return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
+	return gpage + pte_index(vaddr) * sizeof(pte_t);
 }
 /*:*/
 
@@ -171,7 +171,7 @@ static void release_pte(pte_t pte)
 	/* Remember that get_user_pages_fast() took a reference to the page, in
 	 * get_pfn()?  We have to put it back now. */
 	if (pte_flags(pte) & _PAGE_PRESENT)
-		put_page(pfn_to_page(pte_pfn(pte)));
+		put_page(pte_page(pte));
 }
 /*:*/
 
@@ -273,7 +273,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * table entry, even if the Guest says it's writable.  That way
 		 * we will come back here when a write does actually occur, so
 		 * we can update the Guest's _PAGE_DIRTY flag. */
-		*spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0);
+		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
 
 	/* Finally, we write the Guest PTE entry back: we've set the
 	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
@@ -323,7 +323,7 @@ void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
 }
 
 /*H:450 If we chase down the release_pgd() code, it looks like this: */
-static void release_pgd(struct lguest *lg, pgd_t *spgd)
+static void release_pgd(pgd_t *spgd)
 {
 	/* If the entry's not present, there's nothing to release. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
@@ -350,7 +350,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
 	unsigned int i;
 	/* Release every pgd entry up to the kernel's address. */
 	for (i = 0; i < pgd_index(lg->kernel_address); i++)
-		release_pgd(lg, lg->pgdirs[idx].pgdir + i);
+		release_pgd(lg->pgdirs[idx].pgdir + i);
 }
 
 /*H:440 (v) Flushing (throwing away) page tables,
@@ -431,7 +431,7 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 
 /*H:430 (iv) Switching page tables
  *
- * Now we've seen all the page table setting and manipulation, let's see what
+ * Now we've seen all the page table setting and manipulation, let's see
  * what happens when the Guest changes page tables (ie. changes the top-level
  * pgdir).  This occurs on almost every context switch. */
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
@@ -463,7 +463,7 @@ static void release_all_pagetables(struct lguest *lg)
 		if (lg->pgdirs[i].pgdir)
 			/* Every PGD entry except the Switcher at the top */
 			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
-				release_pgd(lg, lg->pgdirs[i].pgdir + j);
+				release_pgd(lg->pgdirs[i].pgdir + j);
 }
 
 /* We also throw away everything when a Guest tells us it's changed a kernel
@@ -581,7 +581,7 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 	pgdir = find_pgdir(lg, gpgdir);
 	if (pgdir < ARRAY_SIZE(lg->pgdirs))
 		/* ... throw it away. */
-		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
+		release_pgd(lg->pgdirs[pgdir].pgdir + idx);
 }
 
 /* Once we know how much memory we have we can construct simple identity
@@ -726,8 +726,9 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 	 * page is already mapped there, we don't have to copy them out
 	 * again. */
 	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
-	regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL));
-	switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
+	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
+	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
+			regs_pte);
 }
 /*:*/
 
@@ -752,21 +753,21 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 
 	/* The first entries are easy: they map the Switcher code. */
 	for (i = 0; i < pages; i++) {
-		pte[i] = mk_pte(switcher_page[i],
-				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
+		native_set_pte(&pte[i], mk_pte(switcher_page[i],
+				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 	}
 
 	/* The only other thing we map is this CPU's pair of pages. */
 	i = pages + cpu*2;
 
 	/* First page (Guest registers) is writable from the Guest */
-	pte[i] = pfn_pte(page_to_pfn(switcher_page[i]),
-			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW));
+	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
+			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
 
 	/* The second page contains the "struct lguest_ro_state", and is
 	 * read-only. */
-	pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]),
-			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
+	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
+			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 }
 
 /* We've made it through the page table code.  Perhaps our tired brains are
-- 
cgit v0.10.2


From ebe0ba84f55950a89cb7af94c7ffc35ee3992f9e Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Sat, 30 May 2009 15:48:08 -0300
Subject: lguest: replace hypercall name LHCALL_SET_PMD with LHCALL_SET_PGD

replace LHCALL_SET_PMD with LHCALL_SET_PGD hypercall name
(That's really what it is, and the confusion gets worse with PAE support)

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reported-by: Jeremy Fitzhardinge <jeremy@goop.org>

diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index f9a9f78..05b9c19 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -13,7 +13,7 @@
 #define LHCALL_SET_CLOCKEVENT	9
 #define LHCALL_HALT		10
 #define LHCALL_SET_PTE		14
-#define LHCALL_SET_PMD		15
+#define LHCALL_SET_PGD		15
 #define LHCALL_LOAD_TLS		16
 #define LHCALL_NOTIFY		17
 #define LHCALL_LOAD_GDT_ENTRY	18
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4f311e4..943a75e 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -535,7 +535,7 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	native_set_pmd(pmdp, pmdval);
-	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
+	lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
 		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
 }
 
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index f252b71..51149ca 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -79,8 +79,8 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 	case LHCALL_SET_PTE:
 		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
 		break;
-	case LHCALL_SET_PMD:
-		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
+	case LHCALL_SET_PGD:
+		guest_set_pgd(cpu->lg, args->arg1, args->arg2);
 		break;
 	case LHCALL_SET_CLOCKEVENT:
 		guest_set_clockevent(cpu, args->arg1);
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 74af503..cacc2da 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -169,7 +169,7 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
 int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
-void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
+void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
 void guest_pagetable_clear_all(struct lg_cpu *cpu);
 void guest_pagetable_flush_user(struct lg_cpu *cpu);
 void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index ffba723..6a54d76 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -568,7 +568,7 @@ void guest_set_pte(struct lg_cpu *cpu,
  *
  * So with that in mind here's our code to to update a (top-level) PGD entry:
  */
-void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
+void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
 	int pgdir;
 
-- 
cgit v0.10.2


From cefcad1773197523e11e18b669f245e6a8d32058 Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Fri, 12 Jun 2009 22:27:07 -0600
Subject: lguest: Add support for kvm_hypercall4()

Add support for kvm_hypercall4(); PAE wants it.

Signed-off-by: Matias Zabaljauregui <zabaljauregui at gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 05b9c19..b14b355 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -35,8 +35,8 @@
  *
  * We use the KVM hypercall mechanism. Eighteen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %ebx, %ecx and %edx.  If a return
- * value makes sense, it's returned in %eax.
+ * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
+ * If a return value makes sense, it's returned in %eax.
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
@@ -48,8 +48,9 @@
 
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
-	/* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
-	unsigned long arg0, arg1, arg2, arg3;
+	/* These map directly onto eax, ebx, ecx, edx and esi
+	 * in struct lguest_regs */
+	unsigned long arg0, arg1, arg2, arg3, arg4;
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 943a75e..d12f554 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -87,7 +87,7 @@ struct lguest_data lguest_data = {
 
 /*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
  * ring buffer of stored hypercalls which the Host will run though next time we
- * do a normal hypercall.  Each entry in the ring has 4 slots for the hypercall
+ * do a normal hypercall.  Each entry in the ring has 5 slots for the hypercall
  * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
  * and 255 once the Host has finished with it.
  *
@@ -96,7 +96,8 @@ struct lguest_data lguest_data = {
  * effect of causing the Host to run all the stored calls in the ring buffer
  * which empties it for next time! */
 static void async_hcall(unsigned long call, unsigned long arg1,
-			unsigned long arg2, unsigned long arg3)
+			unsigned long arg2, unsigned long arg3,
+			unsigned long arg4)
 {
 	/* Note: This code assumes we're uniprocessor. */
 	static unsigned int next_call;
@@ -108,12 +109,13 @@ static void async_hcall(unsigned long call, unsigned long arg1,
 	local_irq_save(flags);
 	if (lguest_data.hcall_status[next_call] != 0xFF) {
 		/* Table full, so do normal hcall which will flush table. */
-		kvm_hypercall3(call, arg1, arg2, arg3);
+		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
 	} else {
 		lguest_data.hcalls[next_call].arg0 = call;
 		lguest_data.hcalls[next_call].arg1 = arg1;
 		lguest_data.hcalls[next_call].arg2 = arg2;
 		lguest_data.hcalls[next_call].arg3 = arg3;
+		lguest_data.hcalls[next_call].arg4 = arg4;
 		/* Arguments must all be written before we mark it to go */
 		wmb();
 		lguest_data.hcall_status[next_call] = 0;
@@ -141,7 +143,7 @@ static void lazy_hcall1(unsigned long call,
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall1(call, arg1);
 	else
-		async_hcall(call, arg1, 0, 0);
+		async_hcall(call, arg1, 0, 0, 0);
 }
 
 static void lazy_hcall2(unsigned long call,
@@ -151,7 +153,7 @@ static void lazy_hcall2(unsigned long call,
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall2(call, arg1, arg2);
 	else
-		async_hcall(call, arg1, arg2, 0);
+		async_hcall(call, arg1, arg2, 0, 0);
 }
 
 static void lazy_hcall3(unsigned long call,
@@ -162,7 +164,19 @@ static void lazy_hcall3(unsigned long call,
 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
 		kvm_hypercall3(call, arg1, arg2, arg3);
 	else
-		async_hcall(call, arg1, arg2, arg3);
+		async_hcall(call, arg1, arg2, arg3, 0);
+}
+
+static void lazy_hcall4(unsigned long call,
+		       unsigned long arg1,
+		       unsigned long arg2,
+		       unsigned long arg3,
+		       unsigned long arg4)
+{
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
+		kvm_hypercall4(call, arg1, arg2, arg3, arg4);
+	else
+		async_hcall(call, arg1, arg2, arg3, arg4);
 }
 
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
-- 
cgit v0.10.2


From acdd0b6292b282c4511897ac2691a47befbf1c6a Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Fri, 12 Jun 2009 22:27:07 -0600
Subject: lguest: PAE support

This version requires that host and guest have the same PAE status.
NX cap is not offered to the guest, yet.

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt
index 28c7473..efb3a6a 100644
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/lguest/lguest.txt
@@ -37,7 +37,6 @@ Running Lguest:
      "Paravirtualized guest support" = Y
         "Lguest guest support" = Y
      "High Memory Support" = off/4GB
-     "PAE (Physical Address Extension) Support" = N
      "Alignment value to which kernel should be aligned" = 0x100000
         (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
          CONFIG_PHYSICAL_ALIGN=0x100000)
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 1caf576..313389c 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -17,8 +17,13 @@
 /* Pages for switcher itself, then two pages per cpu */
 #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
 
-/* We map at -4M for ease of mapping into the guest (one PTE page). */
+/* We map at -4M (-2M when PAE is activated) for ease of mapping
+ * into the guest (one PTE page). */
+#ifdef CONFIG_X86_PAE
+#define SWITCHER_ADDR 0xFFE00000
+#else
 #define SWITCHER_ADDR 0xFFC00000
+#endif
 
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index b14b355..d31c4a6 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -12,6 +12,7 @@
 #define LHCALL_TS		8
 #define LHCALL_SET_CLOCKEVENT	9
 #define LHCALL_HALT		10
+#define LHCALL_SET_PMD		13
 #define LHCALL_SET_PTE		14
 #define LHCALL_SET_PGD		15
 #define LHCALL_LOAD_TLS		16
@@ -33,7 +34,7 @@
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
- * We use the KVM hypercall mechanism. Eighteen hypercalls are
+ * We use the KVM hypercall mechanism. Seventeen hypercalls are
  * available: the hypercall number is put in the %eax register, and the
  * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
  * If a return value makes sense, it's returned in %eax.
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 8dab8f7..3871804 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,7 +2,6 @@ config LGUEST_GUEST
 	bool "Lguest guest support"
 	select PARAVIRT
 	depends on X86_32
-	depends on !X86_PAE
 	select VIRTIO
 	select VIRTIO_RING
 	select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d12f554..7bc65f0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -167,6 +167,7 @@ static void lazy_hcall3(unsigned long call,
 		async_hcall(call, arg1, arg2, arg3, 0);
 }
 
+#ifdef CONFIG_X86_PAE
 static void lazy_hcall4(unsigned long call,
 		       unsigned long arg1,
 		       unsigned long arg2,
@@ -178,6 +179,7 @@ static void lazy_hcall4(unsigned long call,
 	else
 		async_hcall(call, arg1, arg2, arg3, arg4);
 }
+#endif
 
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
  * issue the do-nothing hypercall to flush any stored calls. */
@@ -380,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
 		*cx &= 0x00002201;
-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
-		*dx &= 0x07808111;
+		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
+		*dx &= 0x07808151;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
@@ -400,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 		if (*ax > 0x80000008)
 			*ax = 0x80000008;
 		break;
+	case 0x80000001:
+		/* Here we should fix nx cap depending on host. */
+		/* For this version of PAE, we just clear NX bit. */
+		*dx &= ~(1 << 20);
+		break;
 	}
 }
 
@@ -533,7 +540,12 @@ static void lguest_write_cr4(unsigned long val)
 static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
 			       pte_t *ptep)
 {
+#ifdef CONFIG_X86_PAE
+	lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
+		    ptep->pte_low, ptep->pte_high);
+#else
 	lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
+#endif
 }
 
 static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -543,15 +555,37 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	lguest_pte_update(mm, addr, ptep);
 }
 
-/* The Guest calls this to set a top-level entry.  Again, we set the entry then
- * tell the Host which top-level page we changed, and the index of the entry we
- * changed. */
+/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
+ * to set a middle-level entry when PAE is activated.
+ * Again, we set the entry then tell the Host which page we changed,
+ * and the index of the entry we changed. */
+#ifdef CONFIG_X86_PAE
+static void lguest_set_pud(pud_t *pudp, pud_t pudval)
+{
+	native_set_pud(pudp, pudval);
+
+	/* 32 bytes aligned pdpt address and the index. */
+	lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
+		   (__pa(pudp) & 0x1F) / sizeof(pud_t));
+}
+
+static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	native_set_pmd(pmdp, pmdval);
+	lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
+		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
+}
+#else
+
+/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
+ * activated. */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	native_set_pmd(pmdp, pmdval);
 	lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
 		   (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
 }
+#endif
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
  * don't know the top level any more.  This is useless for us, since we don't
@@ -569,6 +603,26 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
 }
 
+#ifdef CONFIG_X86_PAE
+static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	native_set_pte_atomic(ptep, pte);
+	if (cr3_changed)
+		lazy_hcall1(LHCALL_FLUSH_TLB, 1);
+}
+
+void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	native_pte_clear(mm, addr, ptep);
+	lguest_pte_update(mm, addr, ptep);
+}
+
+void lguest_pmd_clear(pmd_t *pmdp)
+{
+	lguest_set_pmd(pmdp, __pmd(0));
+}
+#endif
+
 /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
  * native page table operations.  On native hardware you can set a new page
  * table entry whenever you want, but if you want to remove one you have to do
@@ -1035,6 +1089,7 @@ __init void lguest_init(void)
 	pv_info.name = "lguest";
 	pv_info.paravirt_enabled = 1;
 	pv_info.kernel_rpl = 1;
+	pv_info.shared_kernel_pmd = 1;
 
 	/* We set up all the lguest overrides for sensitive operations.  These
 	 * are detailed with the operations themselves. */
@@ -1080,6 +1135,12 @@ __init void lguest_init(void)
 	pv_mmu_ops.set_pte = lguest_set_pte;
 	pv_mmu_ops.set_pte_at = lguest_set_pte_at;
 	pv_mmu_ops.set_pmd = lguest_set_pmd;
+#ifdef CONFIG_X86_PAE
+	pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
+	pv_mmu_ops.pte_clear = lguest_pte_clear;
+	pv_mmu_ops.pmd_clear = lguest_pmd_clear;
+	pv_mmu_ops.set_pud = lguest_set_pud;
+#endif
 	pv_mmu_ops.read_cr2 = lguest_read_cr2;
 	pv_mmu_ops.read_cr3 = lguest_read_cr3;
 	pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index a3d3cba..8f63845 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST
 	tristate "Linux hypervisor example code"
-	depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
+	depends on X86_32 && EXPERIMENTAL && FUTEX
 	select HVC_DRIVER
 	---help---
 	  This is a very simple module which allows you to run
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 51149ca..c29ffa1 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -77,11 +77,21 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
 		break;
 	case LHCALL_SET_PTE:
+#ifdef CONFIG_X86_PAE
+		guest_set_pte(cpu, args->arg1, args->arg2,
+				__pte(args->arg3 | (u64)args->arg4 << 32));
+#else
 		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
+#endif
 		break;
 	case LHCALL_SET_PGD:
 		guest_set_pgd(cpu->lg, args->arg1, args->arg2);
 		break;
+#ifdef CONFIG_X86_PAE
+	case LHCALL_SET_PMD:
+		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
+		break;
+#endif
 	case LHCALL_SET_CLOCKEVENT:
 		guest_set_clockevent(cpu, args->arg1);
 		break;
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index cacc2da..6201ce5 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -137,6 +137,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
  * in the kernel. */
 #define pgd_flags(x)	(pgd_val(x) & ~PAGE_MASK)
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
+#define pmd_flags(x)    (pmd_val(x) & ~PAGE_MASK)
+#define pmd_pfn(x)	(pmd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
 unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
@@ -170,6 +172,9 @@ int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
 void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
+#ifdef CONFIG_X86_PAE
+void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
+#endif
 void guest_pagetable_clear_all(struct lg_cpu *cpu);
 void guest_pagetable_flush_user(struct lg_cpu *cpu);
 void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 6a54d76..5e2c26a 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -53,6 +53,17 @@
  * page.  */
 #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
 
+/* For PAE we need the PMD index as well. We use the last 2MB, so we
+ * will need the last pmd entry of the last pmd page.  */
+#ifdef CONFIG_X86_PAE
+#define SWITCHER_PMD_INDEX 	(PTRS_PER_PMD - 1)
+#define RESERVE_MEM 		2U
+#define CHECK_GPGD_MASK		_PAGE_PRESENT
+#else
+#define RESERVE_MEM 		4U
+#define CHECK_GPGD_MASK		_PAGE_TABLE
+#endif
+
 /* We actually need a separate PTE page for each CPU.  Remember that after the
  * Switcher code itself comes two pages for each CPU, and we don't want this
  * CPU's guest to see the pages of any other CPU. */
@@ -73,23 +84,58 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
 {
 	unsigned int index = pgd_index(vaddr);
 
+#ifndef CONFIG_X86_PAE
 	/* We kill any Guest trying to touch the Switcher addresses. */
 	if (index >= SWITCHER_PGD_INDEX) {
 		kill_guest(cpu, "attempt to access switcher pages");
 		index = 0;
 	}
+#endif
 	/* Return a pointer index'th pgd entry for the i'th page table. */
 	return &cpu->lg->pgdirs[i].pgdir[index];
 }
 
+#ifdef CONFIG_X86_PAE
+/* This routine then takes the PGD entry given above, which contains the
+ * address of the PMD page.  It then returns a pointer to the PMD entry for the
+ * given address. */
+static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
+{
+	unsigned int index = pmd_index(vaddr);
+	pmd_t *page;
+
+	/* We kill any Guest trying to touch the Switcher addresses. */
+	if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
+					index >= SWITCHER_PMD_INDEX) {
+		kill_guest(cpu, "attempt to access switcher pages");
+		index = 0;
+	}
+
+	/* You should never call this if the PGD entry wasn't valid */
+	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
+	page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
+
+	return &page[index];
+}
+#endif
+
 /* This routine then takes the page directory entry returned above, which
  * contains the address of the page table entry (PTE) page.  It then returns a
  * pointer to the PTE entry for the given address. */
-static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr)
+static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 {
+#ifdef CONFIG_X86_PAE
+	pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
+	pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT);
+
+	/* You should never call this if the PMD entry wasn't valid */
+	BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT));
+#else
 	pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
 	/* You should never call this if the PGD entry wasn't valid */
 	BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
+#endif
+
 	return &page[pte_index(vaddr)];
 }
 
@@ -101,10 +147,31 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
 	return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
 }
 
-static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr)
+#ifdef CONFIG_X86_PAE
+static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
 {
 	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
+	return gpage + pmd_index(vaddr) * sizeof(pmd_t);
+}
+#endif
+
+static unsigned long gpte_addr(struct lg_cpu *cpu,
+				pgd_t gpgd, unsigned long vaddr)
+{
+#ifdef CONFIG_X86_PAE
+	pmd_t gpmd;
+#endif
+	unsigned long gpage;
+
+	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
+#ifdef CONFIG_X86_PAE
+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+	gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
+	BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
+#else
+	gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
+#endif
 	return gpage + pte_index(vaddr) * sizeof(pte_t);
 }
 /*:*/
@@ -184,11 +251,20 @@ static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
 
 static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
 {
-	if ((pgd_flags(gpgd) & ~_PAGE_TABLE) ||
+	if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
 	   (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
 		kill_guest(cpu, "bad page directory entry");
 }
 
+#ifdef CONFIG_X86_PAE
+static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
+{
+	if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
+	   (pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
+		kill_guest(cpu, "bad page middle directory entry");
+}
+#endif
+
 /*H:330
  * (i) Looking up a page table entry when the Guest faults.
  *
@@ -207,6 +283,11 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 	pte_t gpte;
 	pte_t *spte;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t *spmd;
+	pmd_t gpmd;
+#endif
+
 	/* First step: get the top-level Guest page table entry. */
 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
 	/* Toplevel not present?  We can't map it in. */
@@ -228,12 +309,40 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		check_gpgd(cpu, gpgd);
 		/* And we copy the flags to the shadow PGD entry.  The page
 		 * number in the shadow PGD is the page we just allocated. */
-		*spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
+		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
 	}
 
+#ifdef CONFIG_X86_PAE
+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+	/* middle level not present?  We can't map it in. */
+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+		return false;
+
+	/* Now look at the matching shadow entry. */
+	spmd = spmd_addr(cpu, *spgd, vaddr);
+
+	if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
+		/* No shadow entry: allocate a new shadow PTE page. */
+		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
+
+		/* This is not really the Guest's fault, but killing it is
+		* simple for this corner case. */
+		if (!ptepage) {
+			kill_guest(cpu, "out of memory allocating pte page");
+			return false;
+		}
+
+		/* We check that the Guest pmd is OK. */
+		check_gpmd(cpu, gpmd);
+
+		/* And we copy the flags to the shadow PMD entry.  The page
+		 * number in the shadow PMD is the page we just allocated. */
+		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
+	}
+#endif
 	/* OK, now we look at the lower level in the Guest page table: keep its
 	 * address, because we might update it later. */
-	gpte_ptr = gpte_addr(gpgd, vaddr);
+	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
 	gpte = lgread(cpu, gpte_ptr, pte_t);
 
 	/* If this page isn't in the Guest page tables, we can't page it in. */
@@ -259,7 +368,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		gpte = pte_mkdirty(gpte);
 
 	/* Get the pointer to the shadow PTE entry we're going to set. */
-	spte = spte_addr(*spgd, vaddr);
+	spte = spte_addr(cpu, *spgd, vaddr);
 	/* If there was a valid shadow PTE entry here before, we release it.
 	 * This can happen with a write to a previously read-only entry. */
 	release_pte(*spte);
@@ -301,14 +410,23 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
 	pgd_t *spgd;
 	unsigned long flags;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t *spmd;
+#endif
 	/* Look at the current top level entry: is it present? */
 	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
 	if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
 		return false;
 
+#ifdef CONFIG_X86_PAE
+	spmd = spmd_addr(cpu, *spgd, vaddr);
+	if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
+		return false;
+#endif
+
 	/* Check the flags on the pte entry itself: it must be present and
 	 * writable. */
-	flags = pte_flags(*(spte_addr(*spgd, vaddr)));
+	flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
 
 	return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
 }
@@ -322,6 +440,41 @@ void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
 		kill_guest(cpu, "bad stack page %#lx", vaddr);
 }
 
+#ifdef CONFIG_X86_PAE
+static void release_pmd(pmd_t *spmd)
+{
+	/* If the entry's not present, there's nothing to release. */
+	if (pmd_flags(*spmd) & _PAGE_PRESENT) {
+		unsigned int i;
+		pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
+		/* For each entry in the page, we might need to release it. */
+		for (i = 0; i < PTRS_PER_PTE; i++)
+			release_pte(ptepage[i]);
+		/* Now we can free the page of PTEs */
+		free_page((long)ptepage);
+		/* And zero out the PMD entry so we never release it twice. */
+		native_set_pmd(spmd, __pmd(0));
+	}
+}
+
+static void release_pgd(pgd_t *spgd)
+{
+	/* If the entry's not present, there's nothing to release. */
+	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
+		unsigned int i;
+		pmd_t *pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
+
+		for (i = 0; i < PTRS_PER_PMD; i++)
+			release_pmd(&pmdpage[i]);
+
+		/* Now we can free the page of PMDs */
+		free_page((long)pmdpage);
+		/* And zero out the PGD entry so we never release it twice. */
+		set_pgd(spgd, __pgd(0));
+	}
+}
+
+#else /* !CONFIG_X86_PAE */
 /*H:450 If we chase down the release_pgd() code, it looks like this: */
 static void release_pgd(pgd_t *spgd)
 {
@@ -341,7 +494,7 @@ static void release_pgd(pgd_t *spgd)
 		*spgd = __pgd(0);
 	}
 }
-
+#endif
 /*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
  * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
  * It simply releases every PTE page from 0 up to the Guest's kernel address. */
@@ -370,6 +523,9 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 	pgd_t gpgd;
 	pte_t gpte;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t gpmd;
+#endif
 	/* First step: get the top-level Guest page table entry. */
 	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
 	/* Toplevel not present?  We can't map it in. */
@@ -378,7 +534,13 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 		return -1UL;
 	}
 
-	gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
+	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
+#ifdef CONFIG_X86_PAE
+	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+		kill_guest(cpu, "Bad address %#lx", vaddr);
+#endif
+	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
 		kill_guest(cpu, "Bad address %#lx", vaddr);
 
@@ -405,6 +567,9 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 			      int *blank_pgdir)
 {
 	unsigned int next;
+#ifdef CONFIG_X86_PAE
+	pmd_t *pmd_table;
+#endif
 
 	/* We pick one entry at random to throw out.  Choosing the Least
 	 * Recently Used might be better, but this is easy. */
@@ -416,10 +581,27 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 		/* If the allocation fails, just keep using the one we have */
 		if (!cpu->lg->pgdirs[next].pgdir)
 			next = cpu->cpu_pgd;
-		else
-			/* This is a blank page, so there are no kernel
-			 * mappings: caller must map the stack! */
+		else {
+#ifdef CONFIG_X86_PAE
+			/* In PAE mode, allocate a pmd page and populate the
+			 * last pgd entry. */
+			pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+			if (!pmd_table) {
+				free_page((long)cpu->lg->pgdirs[next].pgdir);
+				set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
+				next = cpu->cpu_pgd;
+			} else {
+				set_pgd(cpu->lg->pgdirs[next].pgdir +
+					SWITCHER_PGD_INDEX,
+					__pgd(__pa(pmd_table) | _PAGE_PRESENT));
+				/* This is a blank page, so there are no kernel
+				 * mappings: caller must map the stack! */
+				*blank_pgdir = 1;
+			}
+#else
 			*blank_pgdir = 1;
+#endif
+		}
 	}
 	/* Record which Guest toplevel this shadows. */
 	cpu->lg->pgdirs[next].gpgdir = gpgdir;
@@ -460,10 +642,25 @@ static void release_all_pagetables(struct lguest *lg)
 
 	/* Every shadow pagetable this Guest has */
 	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		if (lg->pgdirs[i].pgdir)
+		if (lg->pgdirs[i].pgdir) {
+#ifdef CONFIG_X86_PAE
+			pgd_t *spgd;
+			pmd_t *pmdpage;
+			unsigned int k;
+
+			/* Get the last pmd page. */
+			spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
+			pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
+
+			/* And release the pmd entries of that pmd page,
+			 * except for the switcher pmd. */
+			for (k = 0; k < SWITCHER_PMD_INDEX; k++)
+				release_pmd(&pmdpage[k]);
+#endif
 			/* Every PGD entry except the Switcher at the top */
 			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
 				release_pgd(lg->pgdirs[i].pgdir + j);
+		}
 }
 
 /* We also throw away everything when a Guest tells us it's changed a kernel
@@ -504,24 +701,37 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
 {
 	/* Look up the matching shadow page directory entry. */
 	pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
+#ifdef CONFIG_X86_PAE
+	pmd_t *spmd;
+#endif
 
 	/* If the top level isn't present, there's no entry to update. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
-		/* Otherwise, we start by releasing the existing entry. */
-		pte_t *spte = spte_addr(*spgd, vaddr);
-		release_pte(*spte);
-
-		/* If they're setting this entry as dirty or accessed, we might
-		 * as well put that entry they've given us in now.  This shaves
-		 * 10% off a copy-on-write micro-benchmark. */
-		if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
-			check_gpte(cpu, gpte);
-			*spte = gpte_to_spte(cpu, gpte,
-					     pte_flags(gpte) & _PAGE_DIRTY);
-		} else
-			/* Otherwise kill it and we can demand_page() it in
-			 * later. */
-			*spte = __pte(0);
+#ifdef CONFIG_X86_PAE
+		spmd = spmd_addr(cpu, *spgd, vaddr);
+		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
+#endif
+			/* Otherwise, we start by releasing
+			 * the existing entry. */
+			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
+			release_pte(*spte);
+
+			/* If they're setting this entry as dirty or accessed,
+			 * we might as well put that entry they've given us
+			 * in now.  This shaves 10% off a
+			 * copy-on-write micro-benchmark. */
+			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
+				check_gpte(cpu, gpte);
+				native_set_pte(spte,
+						gpte_to_spte(cpu, gpte,
+						pte_flags(gpte) & _PAGE_DIRTY));
+			} else
+				/* Otherwise kill it and we can demand_page()
+				 * it in later. */
+				native_set_pte(spte, __pte(0));
+#ifdef CONFIG_X86_PAE
+		}
+#endif
 	}
 }
 
@@ -572,8 +782,6 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
 	int pgdir;
 
-	/* The kernel seems to try to initialize this early on: we ignore its
-	 * attempts to map over the Switcher. */
 	if (idx >= SWITCHER_PGD_INDEX)
 		return;
 
@@ -583,6 +791,12 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 		/* ... throw it away. */
 		release_pgd(lg->pgdirs[pgdir].pgdir + idx);
 }
+#ifdef CONFIG_X86_PAE
+void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
+{
+	guest_pagetable_clear_all(&lg->cpus[0]);
+}
+#endif
 
 /* Once we know how much memory we have we can construct simple identity
  * (which set virtual == physical) and linear mappings
@@ -596,8 +810,16 @@ static unsigned long setup_pagetables(struct lguest *lg,
 {
 	pgd_t __user *pgdir;
 	pte_t __user *linear;
-	unsigned int mapped_pages, i, linear_pages, phys_linear;
 	unsigned long mem_base = (unsigned long)lg->mem_base;
+	unsigned int mapped_pages, i, linear_pages;
+#ifdef CONFIG_X86_PAE
+	pmd_t __user *pmds;
+	unsigned int j;
+	pgd_t pgd;
+	pmd_t pmd;
+#else
+	unsigned int phys_linear;
+#endif
 
 	/* We have mapped_pages frames to map, so we need
 	 * linear_pages page tables to map them. */
@@ -610,6 +832,9 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	/* Now we use the next linear_pages pages as pte pages */
 	linear = (void *)pgdir - linear_pages * PAGE_SIZE;
 
+#ifdef CONFIG_X86_PAE
+	pmds = (void *)linear - PAGE_SIZE;
+#endif
 	/* Linear mapping is easy: put every page's address into the
 	 * mapping in order. */
 	for (i = 0; i < mapped_pages; i++) {
@@ -621,6 +846,22 @@ static unsigned long setup_pagetables(struct lguest *lg,
 
 	/* The top level points to the linear page table pages above.
 	 * We setup the identity and linear mappings here. */
+#ifdef CONFIG_X86_PAE
+	for (i = 0, j; i < mapped_pages && j < PTRS_PER_PMD;
+	     i += PTRS_PER_PTE, j++) {
+		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
+		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
+
+		if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
+			return -EFAULT;
+	}
+
+	set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
+	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
+		return -EFAULT;
+	if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
+		return -EFAULT;
+#else
 	phys_linear = (unsigned long)linear - mem_base;
 	for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
 		pgd_t pgd;
@@ -633,6 +874,7 @@ static unsigned long setup_pagetables(struct lguest *lg,
 				    &pgd, sizeof(pgd)))
 			return -EFAULT;
 	}
+#endif
 
 	/* We return the top level (guest-physical) address: remember where
 	 * this is. */
@@ -648,7 +890,10 @@ int init_guest_pagetable(struct lguest *lg)
 	u64 mem;
 	u32 initrd_size;
 	struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
-
+#ifdef CONFIG_X86_PAE
+	pgd_t *pgd;
+	pmd_t *pmd_table;
+#endif
 	/* Get the Guest memory size and the ramdisk size from the boot header
 	 * located at lg->mem_base (Guest address 0). */
 	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
@@ -663,6 +908,15 @@ int init_guest_pagetable(struct lguest *lg)
 	lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	if (!lg->pgdirs[0].pgdir)
 		return -ENOMEM;
+#ifdef CONFIG_X86_PAE
+	pgd = lg->pgdirs[0].pgdir;
+	pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
+	if (!pmd_table)
+		return -ENOMEM;
+
+	set_pgd(pgd + SWITCHER_PGD_INDEX,
+		__pgd(__pa(pmd_table) | _PAGE_PRESENT));
+#endif
 	lg->cpus[0].cpu_pgd = 0;
 	return 0;
 }
@@ -672,17 +926,24 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
 {
 	/* We get the kernel address: above this is all kernel memory. */
 	if (get_user(cpu->lg->kernel_address,
-		     &cpu->lg->lguest_data->kernel_address)
-	    /* We tell the Guest that it can't use the top 4MB of virtual
-	     * addresses used by the Switcher. */
-	    || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem)
-	    || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir))
+		&cpu->lg->lguest_data->kernel_address)
+		/* We tell the Guest that it can't use the top 2 or 4 MB
+		 * of virtual addresses used by the Switcher. */
+		|| put_user(RESERVE_MEM * 1024 * 1024,
+			&cpu->lg->lguest_data->reserve_mem)
+		|| put_user(cpu->lg->pgdirs[0].gpgdir,
+			&cpu->lg->lguest_data->pgdir))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* In flush_user_mappings() we loop from 0 to
 	 * "pgd_index(lg->kernel_address)".  This assumes it won't hit the
 	 * Switcher mappings, so check that now. */
+#ifdef CONFIG_X86_PAE
+	if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
+		pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
+#else
 	if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
+#endif
 		kill_guest(cpu, "bad kernel address %#lx",
 				 cpu->lg->kernel_address);
 }
@@ -708,16 +969,30 @@ void free_guest_pagetable(struct lguest *lg)
 void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
-	pgd_t switcher_pgd;
 	pte_t regs_pte;
 	unsigned long pfn;
 
+#ifdef CONFIG_X86_PAE
+	pmd_t switcher_pmd;
+	pmd_t *pmd_table;
+
+	native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
+		       PAGE_SHIFT, PAGE_KERNEL_EXEC));
+
+	pmd_table = __va(pgd_pfn(cpu->lg->
+			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
+								<< PAGE_SHIFT);
+	native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
+#else
+	pgd_t switcher_pgd;
+
 	/* Make the last PGD entry for this Guest point to the Switcher's PTE
 	 * page for this CPU (with appropriate flags). */
 	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
 
 	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
 
+#endif
 	/* We also change the Switcher PTE page.  When we're running the Guest,
 	 * we want the Guest's "regs" page to appear where the first Switcher
 	 * page for this CPU is.  This is an optimization: when the Switcher
-- 
cgit v0.10.2


From 92b4d8df8436cdd74d22a2a5b6b23b9abc737a3e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:08 -0600
Subject: lguest: PAE fixes

1) j wasn't initialized in setup_pagetables, so they weren't set up for me
   causing immediate guest crashes.

2) gpte_addr should not re-read the pmd from the Guest.  Especially
   not BUG_ON() based on the value.  If we ever supported SMP guests,
   they could trigger that.  And the Launcher could also trigger it
   (tho currently root-only).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 5e2c26a..a6fe1ab 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -154,26 +154,25 @@ static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
 	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
 	return gpage + pmd_index(vaddr) * sizeof(pmd_t);
 }
-#endif
 
 static unsigned long gpte_addr(struct lg_cpu *cpu,
-				pgd_t gpgd, unsigned long vaddr)
+			       pmd_t gpmd, unsigned long vaddr)
 {
-#ifdef CONFIG_X86_PAE
-	pmd_t gpmd;
-#endif
-	unsigned long gpage;
+	unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
 
-	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
-#ifdef CONFIG_X86_PAE
-	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
-	gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
 	BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
+	return gpage + pte_index(vaddr) * sizeof(pte_t);
+}
 #else
-	gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
-#endif
+static unsigned long gpte_addr(struct lg_cpu *cpu,
+				pgd_t gpgd, unsigned long vaddr)
+{
+	unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
+
+	BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
 	return gpage + pte_index(vaddr) * sizeof(pte_t);
 }
+#endif
 /*:*/
 
 /*M:014 get_pfn is slow: we could probably try to grab batches of pages here as
@@ -339,10 +338,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * number in the shadow PMD is the page we just allocated. */
 		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
 	}
-#endif
+
+	/* OK, now we look at the lower level in the Guest page table: keep its
+	 * address, because we might update it later. */
+	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
+#else
 	/* OK, now we look at the lower level in the Guest page table: keep its
 	 * address, because we might update it later. */
 	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
+#endif
 	gpte = lgread(cpu, gpte_ptr, pte_t);
 
 	/* If this page isn't in the Guest page tables, we can't page it in. */
@@ -522,7 +526,6 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 {
 	pgd_t gpgd;
 	pte_t gpte;
-
 #ifdef CONFIG_X86_PAE
 	pmd_t gpmd;
 #endif
@@ -534,13 +537,14 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 		return -1UL;
 	}
 
-	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
 #ifdef CONFIG_X86_PAE
 	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
 	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
 		kill_guest(cpu, "Bad address %#lx", vaddr);
-#endif
+	gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
+#else
 	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
+#endif
 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
 		kill_guest(cpu, "Bad address %#lx", vaddr);
 
@@ -847,7 +851,7 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	/* The top level points to the linear page table pages above.
 	 * We setup the identity and linear mappings here. */
 #ifdef CONFIG_X86_PAE
-	for (i = 0, j; i < mapped_pages && j < PTRS_PER_PMD;
+	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
 	     i += PTRS_PER_PTE, j++) {
 		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
 		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
-- 
cgit v0.10.2


From 9f155a9b3d5a5444bcc5e049ec2547bb5107150e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:08 -0600
Subject: lguest: allow any process to send interrupts

We currently only allow the Launcher process to send interrupts, but it
as we already send interrupts from the hrtimer, it's a simple matter of
extracting that code into a common set_interrupt routine.

As we switch to a thread per virtqueue, this avoids a bottleneck through the
main Launcher process.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 5a10754..0e9067b 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -213,6 +213,20 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 	if (!more)
 		put_user(0, &cpu->lg->lguest_data->irq_pending);
 }
+
+/* And this is the routine when we want to set an interrupt for the Guest. */
+void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
+{
+	/* Next time the Guest runs, the core code will see if it can deliver
+	 * this interrupt. */
+	set_bit(irq, cpu->irqs_pending);
+
+	/* Make sure it sees it; it might be asleep (eg. halted), or
+	 * running the Guest right now, in which case kick_process()
+	 * will knock it out. */
+	if (!wake_up_process(cpu->tsk))
+		kick_process(cpu->tsk);
+}
 /*:*/
 
 /* Linux uses trap 128 for system calls.  Plan9 uses 64, and Ron Minnich sent
@@ -528,10 +542,7 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
 	struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
 
 	/* Remember the first interrupt is the timer interrupt. */
-	set_bit(0, cpu->irqs_pending);
-	/* Guest may be stopped or running on another CPU. */
-	if (!wake_up_process(cpu->tsk))
-		kick_process(cpu->tsk);
+	set_interrupt(cpu, 0);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 6201ce5..040cb70 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -143,6 +143,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
 /* interrupts_and_traps.c: */
 unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
 void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
+void set_interrupt(struct lg_cpu *cpu, unsigned int irq);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index bcdcf34..1982b45 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -45,9 +45,8 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
 		return -EFAULT;
 	if (irq >= LGUEST_IRQS)
 		return -EINVAL;
-	/* Next time the Guest runs, the core code will see if it can deliver
-	 * this interrupt. */
-	set_bit(irq, cpu->irqs_pending);
+
+	set_interrupt(cpu, irq);
 	return 0;
 }
 
@@ -252,11 +251,6 @@ static ssize_t write(struct file *file, const char __user *in,
 		/* Once the Guest is dead, you can only read() why it died. */
 		if (lg->dead)
 			return -ENOENT;
-
-		/* If you're not the task which owns the Guest, all you can do
-		 * is break the Launcher out of running the Guest. */
-		if (current != cpu->tsk && req != LHREQ_BREAK)
-			return -EPERM;
 	}
 
 	switch (req) {
-- 
cgit v0.10.2


From 5718607bb670c721f45f0dbb1cc7d6c64969aab1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:09 -0600
Subject: eventfd: export eventfd_signal and eventfd_fget for lguest

lguest wants to attach eventfds to guest notifications, and lguest is
usually a module.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
To: Davide Libenzi <davidel@xmailserver.org>

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 2a701d5..3f0e197 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/eventfd.h>
 #include <linux/syscalls.h>
+#include <linux/module.h>
 
 struct eventfd_ctx {
 	wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
 
 	return n;
 }
+EXPORT_SYMBOL_GPL(eventfd_signal);
 
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
 
 	return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_fget);
 
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
-- 
cgit v0.10.2


From df60aeef4f4fe0645d9a195a7689005520422de5 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:09 -0600
Subject: lguest: use eventfds for device notification

Currently, when a Guest wants to perform I/O it calls LHCALL_NOTIFY with
an address: the main Launcher process returns with this address, and figures
out what device to run.

A far nicer model is to let processes bind an eventfd to an address: if we
find one, we simply signal the eventfd.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Davide Libenzi <davidel@xmailserver.org>

diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 8f63845..0aaa059 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST
 	tristate "Linux hypervisor example code"
-	depends on X86_32 && EXPERIMENTAL && FUTEX
+	depends on X86_32 && EXPERIMENTAL && EVENTFD
 	select HVC_DRIVER
 	---help---
 	  This is a very simple module which allows you to run
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index d0298dc..508569c 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -198,9 +198,11 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		/* It's possible the Guest did a NOTIFY hypercall to the
 		 * Launcher, in which case we return from the read() now. */
 		if (cpu->pending_notify) {
-			if (put_user(cpu->pending_notify, user))
-				return -EFAULT;
-			return sizeof(cpu->pending_notify);
+			if (!send_notify_to_eventfd(cpu)) {
+				if (put_user(cpu->pending_notify, user))
+					return -EFAULT;
+				return sizeof(cpu->pending_notify);
+			}
 		}
 
 		/* Check for signals */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 040cb70..32fefdc 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -82,6 +82,16 @@ struct lg_cpu {
 	struct lg_cpu_arch arch;
 };
 
+struct lg_eventfd {
+	unsigned long addr;
+	struct file *event;
+};
+
+struct lg_eventfd_map {
+	unsigned int num;
+	struct lg_eventfd map[];
+};
+
 /* The private info the thread maintains about the guest. */
 struct lguest
 {
@@ -102,6 +112,8 @@ struct lguest
 	unsigned int stack_pages;
 	u32 tsc_khz;
 
+	struct lg_eventfd_map *eventfds;
+
 	/* Dead? */
 	const char *dead;
 };
@@ -154,6 +166,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
 void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		const unsigned long *def);
 void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
+bool send_notify_to_eventfd(struct lg_cpu *cpu);
 void init_clockdev(struct lg_cpu *cpu);
 bool check_syscall_vector(struct lguest *lg);
 int init_interrupts(void);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 1982b45..f6bf255 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -7,6 +7,8 @@
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
 #include "lg.h"
 
 /*L:055 When something happens, the Waker process needs a way to stop the
@@ -35,6 +37,81 @@ static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
 	}
 }
 
+bool send_notify_to_eventfd(struct lg_cpu *cpu)
+{
+	unsigned int i;
+	struct lg_eventfd_map *map;
+
+	/* lg->eventfds is RCU-protected */
+	rcu_read_lock();
+	map = rcu_dereference(cpu->lg->eventfds);
+	for (i = 0; i < map->num; i++) {
+		if (map->map[i].addr == cpu->pending_notify) {
+			eventfd_signal(map->map[i].event, 1);
+			cpu->pending_notify = 0;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return cpu->pending_notify == 0;
+}
+
+static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
+{
+	struct lg_eventfd_map *new, *old = lg->eventfds;
+
+	if (!addr)
+		return -EINVAL;
+
+	/* Replace the old array with the new one, carefully: others can
+	 * be accessing it at the same time */
+	new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
+		      GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	/* First make identical copy. */
+	memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
+	new->num = old->num;
+
+	/* Now append new entry. */
+	new->map[new->num].addr = addr;
+	new->map[new->num].event = eventfd_fget(fd);
+	if (IS_ERR(new->map[new->num].event)) {
+		kfree(new);
+		return PTR_ERR(new->map[new->num].event);
+	}
+	new->num++;
+
+	/* Now put new one in place. */
+	rcu_assign_pointer(lg->eventfds, new);
+
+	/* We're not in a big hurry.  Wait until noone's looking at old
+	 * version, then delete it. */
+	synchronize_rcu();
+	kfree(old);
+
+	return 0;
+}
+
+static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
+{
+	unsigned long addr, fd;
+	int err;
+
+	if (get_user(addr, input) != 0)
+		return -EFAULT;
+	input++;
+	if (get_user(fd, input) != 0)
+		return -EFAULT;
+
+	mutex_lock(&lguest_lock);
+	err = add_eventfd(lg, addr, fd);
+	mutex_unlock(&lguest_lock);
+
+	return 0;
+}
+
 /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
  * number to /dev/lguest. */
 static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
@@ -184,6 +261,13 @@ static int initialize(struct file *file, const unsigned long __user *input)
 		goto unlock;
 	}
 
+	lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL);
+	if (!lg->eventfds) {
+		err = -ENOMEM;
+		goto free_lg;
+	}
+	lg->eventfds->num = 0;
+
 	/* Populate the easy fields of our "struct lguest" */
 	lg->mem_base = (void __user *)args[0];
 	lg->pfn_limit = args[1];
@@ -191,7 +275,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
 	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
 	if (err)
-		goto release_guest;
+		goto free_eventfds;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
 	 * address the Launcher gave us.  This allocates memory, so can fail. */
@@ -210,7 +294,9 @@ static int initialize(struct file *file, const unsigned long __user *input)
 free_regs:
 	/* FIXME: This should be in free_vcpu */
 	free_page(lg->cpus[0].regs_page);
-release_guest:
+free_eventfds:
+	kfree(lg->eventfds);
+free_lg:
 	kfree(lg);
 unlock:
 	mutex_unlock(&lguest_lock);
@@ -260,6 +346,8 @@ static ssize_t write(struct file *file, const char __user *in,
 		return user_send_irq(cpu, input);
 	case LHREQ_BREAK:
 		return break_guest_out(cpu, input);
+	case LHREQ_EVENTFD:
+		return attach_eventfd(lg, input);
 	default:
 		return -EINVAL;
 	}
@@ -297,6 +385,12 @@ static int close(struct inode *inode, struct file *file)
 		 * the Launcher's memory management structure. */
 		mmput(lg->cpus[i].mm);
 	}
+
+	/* Release any eventfds they registered. */
+	for (i = 0; i < lg->eventfds->num; i++)
+		fput(lg->eventfds->map[i].event);
+	kfree(lg->eventfds);
+
 	/* If lg->dead doesn't contain an error code it will be NULL or a
 	 * kmalloc()ed string, either of which is ok to hand to kfree(). */
 	if (!IS_ERR(lg->dead))
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index a53407a..9de964b 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -58,6 +58,7 @@ enum lguest_req
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
 	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
+	LHREQ_EVENTFD, /* + address, fd. */
 };
 
 /* The alignment to use between consumer and producer parts of vring.
-- 
cgit v0.10.2


From 659a0e6633567246edcb7bd400c7e2bece9237d9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:10 -0600
Subject: lguest: have example Launcher service all devices in separate threads

Currently lguest has three threads: the main Launcher thread, a Waker
thread, and a thread for the block device (because synchronous block
was simply too painful to bear).

The Waker selects() on all the input file descriptors (eg. stdin, net
devices, pipe to the block thread) and when one becomes readable it calls
into the kernel to kick the Launcher thread out into userspace, which
repeats the poll, services the device(s), and then tells the kernel to
release the Waker before re-entering the kernel to run the Guest.

Also, to make a slightly-decent network transmit routine, the Launcher
would suppress further network interrupts while it set a timer: that
signal handler would write to a pipe, which would rouse the Waker
which would prod the Launcher out of the kernel to check the network
device again.

Now we can convert all our virtqueues to separate threads: each one has
a separate eventfd for when the Guest pokes the device, and can trigger
interrupts in the Guest directly.

The linecount shows how much this simplifies, but to really bring it
home, here's an strace analysis of single Guest->Host ping before:

* Guest sends packet, notifies xmit vq, return control to Launcher
* Launcher clears notification flag on xmit ring
* Launcher writes packet to TUN device
	writev(4, [{"\0\0\0\0\0\0\0\0\0\0", 10}, {"\366\r\224`\2058\272m\224vf\274\10\0E\0\0T\0\0@\0@\1\265"..., 98}], 2) = 108
* Launcher sets up interrupt for Guest (xmit ring is empty)
	write(10, "\2\0\0\0\3\0\0\0", 8) = 0
* Launcher sets up timer for interrupt mitigation
	setitimer(ITIMER_REAL, {it_interval={0, 0}, it_value={0, 505}}, NULL) = 0
* Launcher re-runs guest
	pread64(10, 0xbfa5f4d4, 4, 0) ...
* Waker notices reply packet in tun device (it was in select)
	select(12, [0 3 4 6 11], NULL, NULL, NULL) = 1 (in [4])
* Waker kicks Launcher out of guest:
	pwrite64(10, "\3\0\0\0\1\0\0\0", 8, 0) = 0
* Launcher returns from running guest:
	... = -1 EAGAIN (Resource temporarily unavailable)
* Launcher looks at input fds:
	select(7, [0 3 4 6], NULL, NULL, {0, 0}) = 1 (in [4], left {0, 0})
* Launcher reads pong from tun device:
	readv(4, [{"\0\0\0\0\0\0\0\0\0\0", 10}, {"\272m\224vf\274\366\r\224`\2058\10\0E\0\0T\364\26\0\0@"..., 1518}], 2) = 108
* Launcher injects guest notification:
	write(10, "\2\0\0\0\2\0\0\0", 8) = 0
* Launcher rechecks fds:
	select(7, [0 3 4 6], NULL, NULL, {0, 0}) = 0 (Timeout)
* Launcher clears Waker:
	pwrite64(10, "\3\0\0\0\0\0\0\0", 8, 0) = 0
* Launcher reruns Guest:
	pread64(10, 0xbfa5f4d4, 4, 0) = ? ERESTARTSYS (To be restarted)
* Signal comes in, uses pipe to wake up Launcher:
	--- SIGALRM (Alarm clock) @ 0 (0) ---
	write(8, "\0", 1)       = 1
	sigreturn()             = ? (mask now [])
* Waker sees write on pipe:
	select(12, [0 3 4 6 11], NULL, NULL, NULL) = 1 (in [6])
* Waker kicks Launcher out of Guest:
	pwrite64(10, "\3\0\0\0\1\0\0\0", 8, 0) = 0
* Launcher exits from kernel:
	pread64(10, 0xbfa5f4d4, 4, 0) = -1 EAGAIN (Resource temporarily unavailable)
* Launcher looks to see what fd woke it:
	select(7, [0 3 4 6], NULL, NULL, {0, 0}) = 1 (in [6], left {0, 0})
* Launcher reads timeout fd, sets notification flag on xmit ring
	read(6, "\0", 32)       = 1
* Launcher rechecks fds:
	select(7, [0 3 4 6], NULL, NULL, {0, 0}) = 0 (Timeout)
* Launcher clears Waker:
	pwrite64(10, "\3\0\0\0\0\0\0\0", 8, 0) = 0
* Launcher resumes Guest:
	pread64(10, "\0p\0\4", 4, 0) ....

strace analysis of single Guest->Host ping after:

* Guest sends packet, notifies xmit vq, creates event on eventfd.
* Network xmit thread wakes from read on eventfd:
	read(7, "\1\0\0\0\0\0\0\0", 8)          = 8
* Network xmit thread writes packet to TUN device
	writev(4, [{"\0\0\0\0\0\0\0\0\0\0", 10}, {"J\217\232FI\37j\27\375\276\0\304\10\0E\0\0T\0\0@\0@\1\265"..., 98}], 2) = 108
* Network recv thread wakes up from read on tunfd:
	readv(4, [{"\0\0\0\0\0\0\0\0\0\0", 10}, {"j\27\375\276\0\304J\217\232FI\37\10\0E\0\0TiO\0\0@\1\214"..., 1518}], 2) = 108
* Network recv thread sets up interrupt for the Guest
	write(6, "\2\0\0\0\2\0\0\0", 8) = 0
* Network recv thread goes back to reading tunfd
	13:39:42.460285 readv(4,  <unfinished ...>
* Network xmit thread sets up interrupt for Guest (xmit ring is empty)
	write(6, "\2\0\0\0\3\0\0\0", 8) = 0
* Network xmit thread goes back to reading from eventfd
	read(7, <unfinished ...>

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 02fa524..5470b8e 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -16,6 +16,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
+#include <sys/eventfd.h>
 #include <fcntl.h>
 #include <stdbool.h>
 #include <errno.h>
@@ -59,7 +60,6 @@ typedef uint8_t u8;
 /*:*/
 
 #define PAGE_PRESENT 0x7 	/* Present, RW, Execute */
-#define NET_PEERNUM 1
 #define BRIDGE_PFX "bridge:"
 #ifndef SIOCBRADDIF
 #define SIOCBRADDIF	0x89a2		/* add interface to bridge      */
@@ -76,18 +76,10 @@ static bool verbose;
 	do { if (verbose) printf(args); } while(0)
 /*:*/
 
-/* File descriptors for the Waker. */
-struct {
-	int pipe[2];
-} waker_fds;
-
 /* The pointer to the start of guest memory. */
 static void *guest_base;
 /* The maximum guest physical address allowed, and maximum possible. */
 static unsigned long guest_limit, guest_max;
-/* The pipe for signal hander to write to. */
-static int timeoutpipe[2];
-static unsigned int timeout_usec = 500;
 /* The /dev/lguest file descriptor. */
 static int lguest_fd;
 
@@ -97,11 +89,6 @@ static unsigned int __thread cpu_id;
 /* This is our list of devices. */
 struct device_list
 {
-	/* Summary information about the devices in our list: ready to pass to
-	 * select() to ask which need servicing.*/
-	fd_set infds;
-	int max_infd;
-
 	/* Counter to assign interrupt numbers. */
 	unsigned int next_irq;
 
@@ -137,16 +124,11 @@ struct device
 	/* The name of this device, for --verbose. */
 	const char *name;
 
-	/* If handle_input is set, it wants to be called when this file
-	 * descriptor is ready. */
-	int fd;
-	bool (*handle_input)(struct device *me);
-
 	/* Any queues attached to this device */
 	struct virtqueue *vq;
 
-	/* Handle status being finalized (ie. feature bits stable). */
-	void (*ready)(struct device *me);
+	/* Is it operational */
+	bool running;
 
 	/* Device-specific data. */
 	void *priv;
@@ -169,16 +151,20 @@ struct virtqueue
 	/* Last available index we saw. */
 	u16 last_avail_idx;
 
-	/* The routine to call when the Guest pings us, or timeout. */
-	void (*handle_output)(struct virtqueue *me, bool timeout);
+	/* Eventfd where Guest notifications arrive. */
+	int eventfd;
 
-	/* Is this blocked awaiting a timer? */
-	bool blocked;
+	/* Function for the thread which is servicing this virtqueue. */
+	void (*service)(struct virtqueue *vq);
+	pid_t thread;
 };
 
 /* Remember the arguments to the program so we can "reboot" */
 static char **main_args;
 
+/* The original tty settings to restore on exit. */
+static struct termios orig_term;
+
 /* We have to be careful with barriers: our devices are all run in separate
  * threads and so we need to make sure that changes visible to the Guest happen
  * in precise order. */
@@ -521,78 +507,6 @@ static void tell_kernel(unsigned long start)
 }
 /*:*/
 
-static void add_device_fd(int fd)
-{
-	FD_SET(fd, &devices.infds);
-	if (fd > devices.max_infd)
-		devices.max_infd = fd;
-}
-
-/*L:200
- * The Waker.
- *
- * With console, block and network devices, we can have lots of input which we
- * need to process.  We could try to tell the kernel what file descriptors to
- * watch, but handing a file descriptor mask through to the kernel is fairly
- * icky.
- *
- * Instead, we clone off a thread which watches the file descriptors and writes
- * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
- * stop running the Guest.  This causes the Launcher to return from the
- * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
- * the LHREQ_BREAK and wake us up again.
- *
- * This, of course, is merely a different *kind* of icky.
- *
- * Given my well-known antipathy to threads, I'd prefer to use processes.  But
- * it's easier to share Guest memory with threads, and trivial to share the
- * devices.infds as the Launcher changes it.
- */
-static int waker(void *unused)
-{
-	/* Close the write end of the pipe: only the Launcher has it open. */
-	close(waker_fds.pipe[1]);
-
-	for (;;) {
-		fd_set rfds = devices.infds;
-		unsigned long args[] = { LHREQ_BREAK, 1 };
-		unsigned int maxfd = devices.max_infd;
-
-		/* We also listen to the pipe from the Launcher. */
-		FD_SET(waker_fds.pipe[0], &rfds);
-		if (waker_fds.pipe[0] > maxfd)
-			maxfd = waker_fds.pipe[0];
-
-		/* Wait until input is ready from one of the devices. */
-		select(maxfd+1, &rfds, NULL, NULL, NULL);
-
-		/* Message from Launcher? */
-		if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
-			char c;
-			/* If this fails, then assume Launcher has exited.
-			 * Don't do anything on exit: we're just a thread! */
-			if (read(waker_fds.pipe[0], &c, 1) != 1)
-				_exit(0);
-			continue;
-		}
-
-		/* Send LHREQ_BREAK command to snap the Launcher out of it. */
-		pwrite(lguest_fd, args, sizeof(args), cpu_id);
-	}
-	return 0;
-}
-
-/* This routine just sets up a pipe to the Waker process. */
-static void setup_waker(void)
-{
-	/* This pipe is closed when Launcher dies, telling Waker. */
-	if (pipe(waker_fds.pipe) != 0)
-		err(1, "Creating pipe for Waker");
-
-	if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
-		err(1, "Creating Waker");
-}
-
 /*
  * Device Handling.
  *
@@ -642,25 +556,27 @@ static unsigned next_desc(struct virtqueue *vq, unsigned int i)
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found, or vq->vring.num (which
- * is never a valid descriptor number) if none was found. */
-static unsigned get_vq_desc(struct virtqueue *vq,
-			    struct iovec iov[],
-			    unsigned int *out_num, unsigned int *in_num)
+ * This function returns the descriptor number found. */
+static unsigned wait_for_vq_desc(struct virtqueue *vq,
+				 struct iovec iov[],
+				 unsigned int *out_num, unsigned int *in_num)
 {
 	unsigned int i, head;
-	u16 last_avail;
+	u16 last_avail = lg_last_avail(vq);
+
+	while (last_avail == vq->vring.avail->idx) {
+		u64 event;
+
+		/* Nothing new?  Wait for eventfd to tell us they refilled. */
+		if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
+			errx(1, "Event read failed?");
+	}
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
-	last_avail = lg_last_avail(vq);
 	if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
 		errx(1, "Guest moved used index from %u to %u",
 		     last_avail, vq->vring.avail->idx);
 
-	/* If there's nothing new since last we looked, return invalid. */
-	if (vq->vring.avail->idx == last_avail)
-		return vq->vring.num;
-
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
 	head = vq->vring.avail->ring[last_avail % vq->vring.num];
@@ -740,15 +656,7 @@ static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
 /*
  * The Console
  *
- * Here is the input terminal setting we save, and the routine to restore them
- * on exit so the user gets their terminal back. */
-static struct termios orig_term;
-static void restore_term(void)
-{
-	tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
-}
-
-/* We associate some data with the console for our exit hack. */
+ * We associate some data with the console for our exit hack. */
 struct console_abort
 {
 	/* How many times have they hit ^C? */
@@ -758,245 +666,235 @@ struct console_abort
 };
 
 /* This is the routine which handles console input (ie. stdin). */
-static bool handle_console_input(struct device *dev)
+static void console_input(struct virtqueue *vq)
 {
 	int len;
 	unsigned int head, in_num, out_num;
-	struct iovec iov[dev->vq->vring.num];
-	struct console_abort *abort = dev->priv;
-
-	/* First we need a console buffer from the Guests's input virtqueue. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-
-	/* If they're not ready for input, stop listening to this file
-	 * descriptor.  We'll start again once they add an input buffer. */
-	if (head == dev->vq->vring.num)
-		return false;
+	struct console_abort *abort = vq->dev->priv;
+	struct iovec iov[vq->vring.num];
 
+	/* Make sure there's a descriptor waiting. */
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 	if (out_num)
 		errx(1, "Output buffers in console in queue?");
 
-	/* This is why we convert to iovecs: the readv() call uses them, and so
-	 * it reads straight into the Guest's buffer. */
-	len = readv(dev->fd, iov, in_num);
+	/* Read it in. */
+	len = readv(STDIN_FILENO, iov, in_num);
 	if (len <= 0) {
-		/* This implies that the console is closed, is /dev/null, or
-		 * something went terribly wrong. */
+		/* Ran out of input? */
 		warnx("Failed to get console input, ignoring console.");
-		/* Put the input terminal back. */
-		restore_term();
-		/* Remove callback from input vq, so it doesn't restart us. */
-		dev->vq->handle_output = NULL;
-		/* Stop listening to this fd: don't call us again. */
-		return false;
+		/* For simplicity, dying threads kill the whole Launcher.  So
+		 * just nap here. */
+		for (;;)
+			pause();
 	}
 
-	/* Tell the Guest about the new input. */
-	add_used_and_trigger(dev->vq, head, len);
+	add_used_and_trigger(vq, head, len);
 
 	/* Three ^C within one second?  Exit.
 	 *
-	 * This is such a hack, but works surprisingly well.  Each ^C has to be
-	 * in a buffer by itself, so they can't be too fast.  But we check that
-	 * we get three within about a second, so they can't be too slow. */
-	if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {
-		if (!abort->count++)
-			gettimeofday(&abort->start, NULL);
-		else if (abort->count == 3) {
-			struct timeval now;
-			gettimeofday(&now, NULL);
-			if (now.tv_sec <= abort->start.tv_sec+1) {
-				unsigned long args[] = { LHREQ_BREAK, 0 };
-				/* Close the fd so Waker will know it has to
-				 * exit. */
-				close(waker_fds.pipe[1]);
-				/* Just in case Waker is blocked in BREAK, send
-				 * unbreak now. */
-				write(lguest_fd, args, sizeof(args));
-				exit(2);
-			}
-			abort->count = 0;
-		}
-	} else
-		/* Any other key resets the abort counter. */
+	 * This is such a hack, but works surprisingly well.  Each ^C has to
+	 * be in a buffer by itself, so they can't be too fast.  But we check
+	 * that we get three within about a second, so they can't be too
+	 * slow. */
+	if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
 		abort->count = 0;
+		return;
+	}
 
-	/* Everything went OK! */
-	return true;
+	abort->count++;
+	if (abort->count == 1)
+		gettimeofday(&abort->start, NULL);
+	else if (abort->count == 3) {
+		struct timeval now;
+		gettimeofday(&now, NULL);
+		/* Kill all Launcher processes with SIGINT, like normal ^C */
+		if (now.tv_sec <= abort->start.tv_sec+1)
+			kill(0, SIGINT);
+		abort->count = 0;
+	}
 }
 
-/* Handling output for console is simple: we just get all the output buffers
- * and write them to stdout. */
-static void handle_console_output(struct virtqueue *vq, bool timeout)
+/* This is the routine which handles console output (ie. stdout). */
+static void console_output(struct virtqueue *vq)
 {
 	unsigned int head, out, in;
 	struct iovec iov[vq->vring.num];
 
-	/* Keep getting output buffers from the Guest until we run out. */
-	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
-		if (in)
-			errx(1, "Input buffers in output queue?");
-		while (!iov_empty(iov, out)) {
-			int len = writev(STDOUT_FILENO, iov, out);
-			if (len <= 0)
-				err(1, "Write to stdout gave %i", len);
-			iov_consume(iov, out, len);
-		}
-		add_used_and_trigger(vq, head, 0);
+	head = wait_for_vq_desc(vq, iov, &out, &in);
+	if (in)
+		errx(1, "Input buffers in console output queue?");
+	while (!iov_empty(iov, out)) {
+		int len = writev(STDOUT_FILENO, iov, out);
+		if (len <= 0)
+			err(1, "Write to stdout gave %i", len);
+		iov_consume(iov, out, len);
 	}
-}
-
-/* This is called when we no longer want to hear about Guest changes to a
- * virtqueue.  This is more efficient in high-traffic cases, but it means we
- * have to set a timer to check if any more changes have occurred. */
-static void block_vq(struct virtqueue *vq)
-{
-	struct itimerval itm;
-
-	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-	vq->blocked = true;
-
-	itm.it_interval.tv_sec = 0;
-	itm.it_interval.tv_usec = 0;
-	itm.it_value.tv_sec = 0;
-	itm.it_value.tv_usec = timeout_usec;
-
-	setitimer(ITIMER_REAL, &itm, NULL);
+	add_used_and_trigger(vq, head, 0);
 }
 
 /*
  * The Network
  *
  * Handling output for network is also simple: we get all the output buffers
- * and write them (ignoring the first element) to this device's file descriptor
- * (/dev/net/tun).
+ * and write them to /dev/net/tun.
  */
-static void handle_net_output(struct virtqueue *vq, bool timeout)
+struct net_info {
+	int tunfd;
+};
+
+static void net_output(struct virtqueue *vq)
 {
-	unsigned int head, out, in, num = 0;
+	struct net_info *net_info = vq->dev->priv;
+	unsigned int head, out, in;
 	struct iovec iov[vq->vring.num];
-	static int last_timeout_num;
-
-	/* Keep getting output buffers from the Guest until we run out. */
-	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
-		if (in)
-			errx(1, "Input buffers in output queue?");
-		if (writev(vq->dev->fd, iov, out) < 0)
-			err(1, "Writing network packet to tun");
-		add_used_and_trigger(vq, head, 0);
-		num++;
-	}
 
-	/* Block further kicks and set up a timer if we saw anything. */
-	if (!timeout && num)
-		block_vq(vq);
-
-	/* We never quite know how long should we wait before we check the
-	 * queue again for more packets.  We start at 500 microseconds, and if
-	 * we get fewer packets than last time, we assume we made the timeout
-	 * too small and increase it by 10 microseconds.  Otherwise, we drop it
-	 * by one microsecond every time.  It seems to work well enough. */
-	if (timeout) {
-		if (num < last_timeout_num)
-			timeout_usec += 10;
-		else if (timeout_usec > 1)
-			timeout_usec--;
-		last_timeout_num = num;
-	}
+	head = wait_for_vq_desc(vq, iov, &out, &in);
+	if (in)
+		errx(1, "Input buffers in net output queue?");
+	if (writev(net_info->tunfd, iov, out) < 0)
+		errx(1, "Write to tun failed?");
+	add_used_and_trigger(vq, head, 0);
 }
 
-/* This is where we handle a packet coming in from the tun device to our
+/* This is where we handle packets coming in from the tun device to our
  * Guest. */
-static bool handle_tun_input(struct device *dev)
+static void net_input(struct virtqueue *vq)
 {
-	unsigned int head, in_num, out_num;
 	int len;
-	struct iovec iov[dev->vq->vring.num];
-
-	/* First we need a network buffer from the Guests's recv virtqueue. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-	if (head == dev->vq->vring.num) {
-		/* Now, it's expected that if we try to send a packet too
-		 * early, the Guest won't be ready yet.  Wait until the device
-		 * status says it's ready. */
-		/* FIXME: Actually want DRIVER_ACTIVE here. */
-
-		/* Now tell it we want to know if new things appear. */
-		dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-		wmb();
-
-		/* We'll turn this back on if input buffers are registered. */
-		return false;
-	} else if (out_num)
-		errx(1, "Output buffers in network recv queue?");
-
-	/* Read the packet from the device directly into the Guest's buffer. */
-	len = readv(dev->fd, iov, in_num);
+	unsigned int head, out, in;
+	struct iovec iov[vq->vring.num];
+	struct net_info *net_info = vq->dev->priv;
+
+	head = wait_for_vq_desc(vq, iov, &out, &in);
+	if (out)
+		errx(1, "Output buffers in net input queue?");
+	len = readv(net_info->tunfd, iov, in);
 	if (len <= 0)
-		err(1, "reading network");
+		err(1, "Failed to read from tun.");
+	add_used_and_trigger(vq, head, len);
+}
 
-	/* Tell the Guest about the new packet. */
-	add_used_and_trigger(dev->vq, head, len);
+/* This is the helper to create threads. */
+static int do_thread(void *_vq)
+{
+	struct virtqueue *vq = _vq;
 
-	verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
-		((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1],
-		head != dev->vq->vring.num ? "sent" : "discarded");
+	for (;;)
+		vq->service(vq);
+	return 0;
+}
 
-	/* All good. */
-	return true;
+/* When a child dies, we kill our entire process group with SIGTERM.  This
+ * also has the side effect that the shell restores the console for us! */
+static void kill_launcher(int signal)
+{
+	kill(0, SIGTERM);
 }
 
-/*L:215 This is the callback attached to the network and console input
- * virtqueues: it ensures we try again, in case we stopped console or net
- * delivery because Guest didn't have any buffers. */
-static void enable_fd(struct virtqueue *vq, bool timeout)
+static void reset_device(struct device *dev)
 {
-	add_device_fd(vq->dev->fd);
-	/* Snap the Waker out of its select loop. */
-	write(waker_fds.pipe[1], "", 1);
+	struct virtqueue *vq;
+
+	verbose("Resetting device %s\n", dev->name);
+
+	/* Clear any features they've acked. */
+	memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
+
+	/* We're going to be explicitly killing threads, so ignore them. */
+	signal(SIGCHLD, SIG_IGN);
+
+	/* Zero out the virtqueues, get rid of their threads */
+	for (vq = dev->vq; vq; vq = vq->next) {
+		if (vq->thread != (pid_t)-1) {
+			kill(vq->thread, SIGTERM);
+			waitpid(vq->thread, NULL, 0);
+			vq->thread = (pid_t)-1;
+		}
+		memset(vq->vring.desc, 0,
+		       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
+		lg_last_avail(vq) = 0;
+	}
+	dev->running = false;
+
+	/* Now we care if threads die. */
+	signal(SIGCHLD, (void *)kill_launcher);
 }
 
-static void net_enable_fd(struct virtqueue *vq, bool timeout)
+static void create_thread(struct virtqueue *vq)
 {
-	/* We don't need to know again when Guest refills receive buffer. */
-	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
-	enable_fd(vq, timeout);
+	/* Create stack for thread and run it.  Since stack grows
+	 * upwards, we point the stack pointer to the end of this
+	 * region. */
+	char *stack = malloc(32768);
+	unsigned long args[] = { LHREQ_EVENTFD,
+				 vq->config.pfn*getpagesize(), 0 };
+
+	/* Create a zero-initialized eventfd. */
+	vq->eventfd = eventfd(0, 0);
+	if (vq->eventfd < 0)
+		err(1, "Creating eventfd");
+	args[2] = vq->eventfd;
+
+	/* Attach an eventfd to this virtqueue: it will go off
+	 * when the Guest does an LHCALL_NOTIFY for this vq. */
+	if (write(lguest_fd, &args, sizeof(args)) != 0)
+		err(1, "Attaching eventfd");
+
+	/* CLONE_VM: because it has to access the Guest memory, and
+	 * SIGCHLD so we get a signal if it dies. */
+	vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
+	if (vq->thread == (pid_t)-1)
+		err(1, "Creating clone");
+	/* We close our local copy, now the child has it. */
+	close(vq->eventfd);
 }
 
-/* When the Guest tells us they updated the status field, we handle it. */
-static void update_device_status(struct device *dev)
+static void start_device(struct device *dev)
 {
+	unsigned int i;
 	struct virtqueue *vq;
 
-	/* This is a reset. */
-	if (dev->desc->status == 0) {
-		verbose("Resetting device %s\n", dev->name);
+	verbose("Device %s OK: offered", dev->name);
+	for (i = 0; i < dev->feature_len; i++)
+		verbose(" %02x", get_feature_bits(dev)[i]);
+	verbose(", accepted");
+	for (i = 0; i < dev->feature_len; i++)
+		verbose(" %02x", get_feature_bits(dev)
+			[dev->feature_len+i]);
+
+	for (vq = dev->vq; vq; vq = vq->next) {
+		if (vq->service)
+			create_thread(vq);
+	}
+	dev->running = true;
+}
+
+static void cleanup_devices(void)
+{
+	struct device *dev;
+
+	for (dev = devices.dev; dev; dev = dev->next)
+		reset_device(dev);
 
-		/* Clear any features they've acked. */
-		memset(get_feature_bits(dev) + dev->feature_len, 0,
-		       dev->feature_len);
+	/* If we saved off the original terminal settings, restore them now. */
+	if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
+		tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
+}
 
-		/* Zero out the virtqueues. */
-		for (vq = dev->vq; vq; vq = vq->next) {
-			memset(vq->vring.desc, 0,
-			       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
-			lg_last_avail(vq) = 0;
-		}
-	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
+/* When the Guest tells us they updated the status field, we handle it. */
+static void update_device_status(struct device *dev)
+{
+	/* A zero status is a reset, otherwise it's a set of flags. */
+	if (dev->desc->status == 0)
+		reset_device(dev);
+	else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
 		warnx("Device %s configuration FAILED", dev->name);
+		if (dev->running)
+			reset_device(dev);
 	} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
-		unsigned int i;
-
-		verbose("Device %s OK: offered", dev->name);
-		for (i = 0; i < dev->feature_len; i++)
-			verbose(" %02x", get_feature_bits(dev)[i]);
-		verbose(", accepted");
-		for (i = 0; i < dev->feature_len; i++)
-			verbose(" %02x", get_feature_bits(dev)
-				[dev->feature_len+i]);
-
-		if (dev->ready)
-			dev->ready(dev);
+		if (!dev->running)
+			start_device(dev);
 	}
 }
 
@@ -1004,32 +902,24 @@ static void update_device_status(struct device *dev)
 static void handle_output(unsigned long addr)
 {
 	struct device *i;
-	struct virtqueue *vq;
 
-	/* Check each device and virtqueue. */
+	/* Check each device. */
 	for (i = devices.dev; i; i = i->next) {
+		struct virtqueue *vq;
+
 		/* Notifications to device descriptors update device status. */
 		if (from_guest_phys(addr) == i->desc) {
 			update_device_status(i);
 			return;
 		}
 
-		/* Notifications to virtqueues mean output has occurred. */
+		/* Devices *can* be used before status is set to DRIVER_OK. */
 		for (vq = i->vq; vq; vq = vq->next) {
-			if (vq->config.pfn != addr/getpagesize())
+			if (addr != vq->config.pfn*getpagesize())
 				continue;
-
-			/* Guest should acknowledge (and set features!)  before
-			 * using the device. */
-			if (i->desc->status == 0) {
-				warnx("%s gave early output", i->name);
-				return;
-			}
-
-			if (strcmp(vq->dev->name, "console") != 0)
-				verbose("Output to %s\n", vq->dev->name);
-			if (vq->handle_output)
-				vq->handle_output(vq, false);
+			if (i->running)
+				errx(1, "Notification on running %s", i->name);
+			start_device(i);
 			return;
 		}
 	}
@@ -1043,71 +933,6 @@ static void handle_output(unsigned long addr)
 	      strnlen(from_guest_phys(addr), guest_limit - addr));
 }
 
-static void handle_timeout(void)
-{
-	char buf[32];
-	struct device *i;
-	struct virtqueue *vq;
-
-	/* Clear the pipe */
-	read(timeoutpipe[0], buf, sizeof(buf));
-
-	/* Check each device and virtqueue: flush blocked ones. */
-	for (i = devices.dev; i; i = i->next) {
-		for (vq = i->vq; vq; vq = vq->next) {
-			if (!vq->blocked)
-				continue;
-
-			vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
-			vq->blocked = false;
-			if (vq->handle_output)
-				vq->handle_output(vq, true);
-		}
-	}
-}
-
-/* This is called when the Waker wakes us up: check for incoming file
- * descriptors. */
-static void handle_input(void)
-{
-	/* select() wants a zeroed timeval to mean "don't wait". */
-	struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
-
-	for (;;) {
-		struct device *i;
-		fd_set fds = devices.infds;
-		int num;
-
-		num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
-		/* Could get interrupted */
-		if (num < 0)
-			continue;
-		/* If nothing is ready, we're done. */
-		if (num == 0)
-			break;
-
-		/* Otherwise, call the device(s) which have readable file
-		 * descriptors and a method of handling them.  */
-		for (i = devices.dev; i; i = i->next) {
-			if (i->handle_input && FD_ISSET(i->fd, &fds)) {
-				if (i->handle_input(i))
-					continue;
-
-				/* If handle_input() returns false, it means we
-				 * should no longer service it.  Networking and
-				 * console do this when there's no input
-				 * buffers to deliver into.  Console also uses
-				 * it when it discovers that stdin is closed. */
-				FD_CLR(i->fd, &devices.infds);
-			}
-		}
-
-		/* Is this the timeout fd? */
-		if (FD_ISSET(timeoutpipe[0], &fds))
-			handle_timeout();
-	}
-}
-
 /*L:190
  * Device Setup
  *
@@ -1153,7 +978,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
 /* Each device descriptor is followed by the description of its virtqueues.  We
  * specify how many descriptors the virtqueue is to have. */
 static void add_virtqueue(struct device *dev, unsigned int num_descs,
-			  void (*handle_output)(struct virtqueue *, bool))
+			  void (*service)(struct virtqueue *))
 {
 	unsigned int pages;
 	struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1168,7 +993,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	vq->next = NULL;
 	vq->last_avail_idx = 0;
 	vq->dev = dev;
-	vq->blocked = false;
+	vq->service = service;
+	vq->thread = (pid_t)-1;
 
 	/* Initialize the configuration. */
 	vq->config.num = num_descs;
@@ -1193,15 +1019,6 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	 * second.  */
 	for (i = &dev->vq; *i; i = &(*i)->next);
 	*i = vq;
-
-	/* Set the routine to call when the Guest does something to this
-	 * virtqueue. */
-	vq->handle_output = handle_output;
-
-	/* As an optimization, set the advisory "Don't Notify Me" flag if we
-	 * don't have a handler */
-	if (!handle_output)
-		vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
 }
 
 /* The first half of the feature bitmask is for us to advertise features.  The
@@ -1237,24 +1054,17 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
  * calling new_dev_desc() to allocate the descriptor and device memory.
  *
  * See what I mean about userspace being boring? */
-static struct device *new_device(const char *name, u16 type, int fd,
-				 bool (*handle_input)(struct device *))
+static struct device *new_device(const char *name, u16 type)
 {
 	struct device *dev = malloc(sizeof(*dev));
 
 	/* Now we populate the fields one at a time. */
-	dev->fd = fd;
-	/* If we have an input handler for this file descriptor, then we add it
-	 * to the device_list's fdset and maxfd. */
-	if (handle_input)
-		add_device_fd(dev->fd);
 	dev->desc = new_dev_desc(type);
-	dev->handle_input = handle_input;
 	dev->name = name;
 	dev->vq = NULL;
-	dev->ready = NULL;
 	dev->feature_len = 0;
 	dev->num_vq = 0;
+	dev->running = false;
 
 	/* Append to device list.  Prepending to a single-linked list is
 	 * easier, but the user expects the devices to be arranged on the bus
@@ -1282,13 +1092,10 @@ static void setup_console(void)
 		 * raw input stream to the Guest. */
 		term.c_lflag &= ~(ISIG|ICANON|ECHO);
 		tcsetattr(STDIN_FILENO, TCSANOW, &term);
-		/* If we exit gracefully, the original settings will be
-		 * restored so the user can see what they're typing. */
-		atexit(restore_term);
 	}
 
-	dev = new_device("console", VIRTIO_ID_CONSOLE,
-			 STDIN_FILENO, handle_console_input);
+	dev = new_device("console", VIRTIO_ID_CONSOLE);
+
 	/* We store the console state in dev->priv, and initialize it. */
 	dev->priv = malloc(sizeof(struct console_abort));
 	((struct console_abort *)dev->priv)->count = 0;
@@ -1297,31 +1104,13 @@ static void setup_console(void)
 	 * they put something the input queue, we make sure we're listening to
 	 * stdin.  When they put something in the output queue, we write it to
 	 * stdout. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
-	add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
+	add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
 
-	verbose("device %u: console\n", devices.device_num++);
+	verbose("device %u: console\n", ++devices.device_num);
 }
 /*:*/
 
-static void timeout_alarm(int sig)
-{
-	write(timeoutpipe[1], "", 1);
-}
-
-static void setup_timeout(void)
-{
-	if (pipe(timeoutpipe) != 0)
-		err(1, "Creating timeout pipe");
-
-	if (fcntl(timeoutpipe[1], F_SETFL,
-		  fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
-		err(1, "Making timeout pipe nonblocking");
-
-	add_device_fd(timeoutpipe[0]);
-	signal(SIGALRM, timeout_alarm);
-}
-
 /*M:010 Inter-guest networking is an interesting area.  Simplest is to have a
  * --sharenet=<name> option which opens or creates a named pipe.  This can be
  * used to send packets to another guest in a 1:1 manner.
@@ -1443,21 +1232,23 @@ static int get_tun_device(char tapif[IFNAMSIZ])
 static void setup_tun_net(char *arg)
 {
 	struct device *dev;
-	int netfd, ipfd;
+	struct net_info *net_info = malloc(sizeof(*net_info));
+	int ipfd;
 	u32 ip = INADDR_ANY;
 	bool bridging = false;
 	char tapif[IFNAMSIZ], *p;
 	struct virtio_net_config conf;
 
-	netfd = get_tun_device(tapif);
+	net_info->tunfd = get_tun_device(tapif);
 
 	/* First we create a new network device. */
-	dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input);
+	dev = new_device("net", VIRTIO_ID_NET);
+	dev->priv = net_info;
 
 	/* Network devices need a receive and a send queue, just like
 	 * console. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd);
-	add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
+	add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
 
 	/* We need a socket to perform the magic network ioctls to bring up the
 	 * tap interface, connect to the bridge etc.  Any socket will do! */
@@ -1546,20 +1337,18 @@ struct vblk_info
  * Remember that the block device is handled by a separate I/O thread.  We head
  * straight into the core of that thread here:
  */
-static bool service_io(struct device *dev)
+static void blk_request(struct virtqueue *vq)
 {
-	struct vblk_info *vblk = dev->priv;
+	struct vblk_info *vblk = vq->dev->priv;
 	unsigned int head, out_num, in_num, wlen;
 	int ret;
 	u8 *in;
 	struct virtio_blk_outhdr *out;
-	struct iovec iov[dev->vq->vring.num];
+	struct iovec iov[vq->vring.num];
 	off64_t off;
 
-	/* See if there's a request waiting.  If not, nothing to do. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-	if (head == dev->vq->vring.num)
-		return false;
+	/* Get the next request. */
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 
 	/* Every block request should contain at least one output buffer
 	 * (detailing the location on disk and the type of request) and one
@@ -1633,83 +1422,21 @@ static bool service_io(struct device *dev)
 	if (out->type & VIRTIO_BLK_T_BARRIER)
 		fdatasync(vblk->fd);
 
-	/* We can't trigger an IRQ, because we're not the Launcher.  It does
-	 * that when we tell it we're done. */
-	add_used(dev->vq, head, wlen);
-	return true;
-}
-
-/* This is the thread which actually services the I/O. */
-static int io_thread(void *_dev)
-{
-	struct device *dev = _dev;
-	struct vblk_info *vblk = dev->priv;
-	char c;
-
-	/* Close other side of workpipe so we get 0 read when main dies. */
-	close(vblk->workpipe[1]);
-	/* Close the other side of the done_fd pipe. */
-	close(dev->fd);
-
-	/* When this read fails, it means Launcher died, so we follow. */
-	while (read(vblk->workpipe[0], &c, 1) == 1) {
-		/* We acknowledge each request immediately to reduce latency,
-		 * rather than waiting until we've done them all.  I haven't
-		 * measured to see if it makes any difference.
-		 *
-		 * That would be an interesting test, wouldn't it?  You could
-		 * also try having more than one I/O thread. */
-		while (service_io(dev))
-			write(vblk->done_fd, &c, 1);
-	}
-	return 0;
-}
-
-/* Now we've seen the I/O thread, we return to the Launcher to see what happens
- * when that thread tells us it's completed some I/O. */
-static bool handle_io_finish(struct device *dev)
-{
-	char c;
-
-	/* If the I/O thread died, presumably it printed the error, so we
-	 * simply exit. */
-	if (read(dev->fd, &c, 1) != 1)
-		exit(1);
-
-	/* It did some work, so trigger the irq. */
-	trigger_irq(dev->vq);
-	return true;
-}
-
-/* When the Guest submits some I/O, we just need to wake the I/O thread. */
-static void handle_virtblk_output(struct virtqueue *vq, bool timeout)
-{
-	struct vblk_info *vblk = vq->dev->priv;
-	char c = 0;
-
-	/* Wake up I/O thread and tell it to go to work! */
-	if (write(vblk->workpipe[1], &c, 1) != 1)
-		/* Presumably it indicated why it died. */
-		exit(1);
+	add_used_and_trigger(vq, head, wlen);
 }
 
 /*L:198 This actually sets up a virtual block device. */
 static void setup_block_file(const char *filename)
 {
-	int p[2];
 	struct device *dev;
 	struct vblk_info *vblk;
-	void *stack;
 	struct virtio_blk_config conf;
 
-	/* This is the pipe the I/O thread will use to tell us I/O is done. */
-	pipe(p);
-
 	/* The device responds to return from I/O thread. */
-	dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish);
+	dev = new_device("block", VIRTIO_ID_BLOCK);
 
 	/* The device has one virtqueue, where the Guest places requests. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);
+	add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
 
 	/* Allocate the room for our own bookkeeping */
 	vblk = dev->priv = malloc(sizeof(*vblk));
@@ -1731,49 +1458,29 @@ static void setup_block_file(const char *filename)
 
 	set_config(dev, sizeof(conf), &conf);
 
-	/* The I/O thread writes to this end of the pipe when done. */
-	vblk->done_fd = p[1];
-
-	/* This is the second pipe, which is how we tell the I/O thread about
-	 * more work. */
-	pipe(vblk->workpipe);
-
-	/* Create stack for thread and run it.  Since stack grows upwards, we
-	 * point the stack pointer to the end of this region. */
-	stack = malloc(32768);
-	/* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
-	 * becoming a zombie. */
-	if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
-		err(1, "Creating clone");
-
-	/* We don't need to keep the I/O thread's end of the pipes open. */
-	close(vblk->done_fd);
-	close(vblk->workpipe[0]);
-
 	verbose("device %u: virtblock %llu sectors\n",
-		devices.device_num, le64_to_cpu(conf.capacity));
+		++devices.device_num, le64_to_cpu(conf.capacity));
 }
 
+struct rng_info {
+	int rfd;
+};
+
 /* Our random number generator device reads from /dev/random into the Guest's
  * input buffers.  The usual case is that the Guest doesn't want random numbers
  * and so has no buffers although /dev/random is still readable, whereas
  * console is the reverse.
  *
  * The same logic applies, however. */
-static bool handle_rng_input(struct device *dev)
+static void rng_input(struct virtqueue *vq)
 {
 	int len;
 	unsigned int head, in_num, out_num, totlen = 0;
-	struct iovec iov[dev->vq->vring.num];
+	struct rng_info *rng_info = vq->dev->priv;
+	struct iovec iov[vq->vring.num];
 
 	/* First we need a buffer from the Guests's virtqueue. */
-	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
-
-	/* If they're not ready for input, stop listening to this file
-	 * descriptor.  We'll start again once they add an input buffer. */
-	if (head == dev->vq->vring.num)
-		return false;
-
+	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 	if (out_num)
 		errx(1, "Output buffers in rng?");
 
@@ -1781,7 +1488,7 @@ static bool handle_rng_input(struct device *dev)
 	 * it reads straight into the Guest's buffer.  We loop to make sure we
 	 * fill it. */
 	while (!iov_empty(iov, in_num)) {
-		len = readv(dev->fd, iov, in_num);
+		len = readv(rng_info->rfd, iov, in_num);
 		if (len <= 0)
 			err(1, "Read from /dev/random gave %i", len);
 		iov_consume(iov, in_num, len);
@@ -1789,25 +1496,23 @@ static bool handle_rng_input(struct device *dev)
 	}
 
 	/* Tell the Guest about the new input. */
-	add_used_and_trigger(dev->vq, head, totlen);
-
-	/* Everything went OK! */
-	return true;
+	add_used_and_trigger(vq, head, totlen);
 }
 
 /* And this creates a "hardware" random number device for the Guest. */
 static void setup_rng(void)
 {
 	struct device *dev;
-	int fd;
+	struct rng_info *rng_info = malloc(sizeof(*rng_info));
 
-	fd = open_or_die("/dev/random", O_RDONLY);
+	rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
 
 	/* The device responds to return from I/O thread. */
-	dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input);
+	dev = new_device("rng", VIRTIO_ID_RNG);
+	dev->priv = rng_info;
 
 	/* The device has one virtqueue, where the Guest places inbufs. */
-	add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd);
+	add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
 
 	verbose("device %u: rng\n", devices.device_num++);
 }
@@ -1823,7 +1528,9 @@ static void __attribute__((noreturn)) restart_guest(void)
 	for (i = 3; i < FD_SETSIZE; i++)
 		close(i);
 
-	/* The exec automatically gets rid of the I/O and Waker threads. */
+	/* Reset all the devices (kills all threads). */
+	cleanup_devices();
+
 	execv(main_args[0], main_args);
 	err(1, "Could not exec %s", main_args[0]);
 }
@@ -1833,7 +1540,6 @@ static void __attribute__((noreturn)) restart_guest(void)
 static void __attribute__((noreturn)) run_guest(void)
 {
 	for (;;) {
-		unsigned long args[] = { LHREQ_BREAK, 0 };
 		unsigned long notify_addr;
 		int readval;
 
@@ -1845,7 +1551,6 @@ static void __attribute__((noreturn)) run_guest(void)
 		if (readval == sizeof(notify_addr)) {
 			verbose("Notify on address %#lx\n", notify_addr);
 			handle_output(notify_addr);
-			continue;
 		/* ENOENT means the Guest died.  Reading tells us why. */
 		} else if (errno == ENOENT) {
 			char reason[1024] = { 0 };
@@ -1854,19 +1559,9 @@ static void __attribute__((noreturn)) run_guest(void)
 		/* ERESTART means that we need to reboot the guest */
 		} else if (errno == ERESTART) {
 			restart_guest();
-		/* EAGAIN means a signal (timeout).
-		 * Anything else means a bug or incompatible change. */
-		} else if (errno != EAGAIN)
+		/* Anything else means a bug or incompatible change. */
+		} else
 			err(1, "Running guest failed");
-
-		/* Only service input on thread for CPU 0. */
-		if (cpu_id != 0)
-			continue;
-
-		/* Service input, then unset the BREAK to release the Waker. */
-		handle_input();
-		if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
-			err(1, "Resetting break");
 	}
 }
 /*L:240
@@ -1909,18 +1604,10 @@ int main(int argc, char *argv[])
 
 	/* Save the args: we "reboot" by execing ourselves again. */
 	main_args = argv;
-	/* We don't "wait" for the children, so prevent them from becoming
-	 * zombies. */
-	signal(SIGCHLD, SIG_IGN);
 
-	/* First we initialize the device list.  Since console and network
-	 * device receive input from a file descriptor, we keep an fdset
-	 * (infds) and the maximum fd number (max_infd) with the head of the
-	 * list.  We also keep a pointer to the last device.  Finally, we keep
-	 * the next interrupt number to use for devices (1: remember that 0 is
-	 * used by the timer). */
-	FD_ZERO(&devices.infds);
-	devices.max_infd = -1;
+	/* First we initialize the device list.  We keep a pointer to the last
+	 * device, and the next interrupt number to use for devices (1:
+	 * remember that 0 is used by the timer). */
 	devices.lastdev = NULL;
 	devices.next_irq = 1;
 
@@ -1978,9 +1665,6 @@ int main(int argc, char *argv[])
 	/* We always have a console device */
 	setup_console();
 
-	/* We can timeout waiting for Guest network transmit. */
-	setup_timeout();
-
 	/* Now we load the kernel */
 	start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
 
@@ -2021,10 +1705,11 @@ int main(int argc, char *argv[])
 	 * /dev/lguest file descriptor. */
 	tell_kernel(start);
 
-	/* We clone off a thread, which wakes the Launcher whenever one of the
-	 * input file descriptors needs attention.  We call this the Waker, and
-	 * we'll cover it in a moment. */
-	setup_waker();
+	/* Ensure that we terminate if a child dies. */
+	signal(SIGCHLD, kill_launcher);
+
+	/* If we exit via err(), this kills all the threads, restores tty. */
+	atexit(cleanup_devices);
 
 	/* Finally, run the Guest.  This doesn't return. */
 	run_guest();
-- 
cgit v0.10.2


From 5dac051bc6030963181b69faddd9e0ad04f85fa8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:10 -0600
Subject: lguest: remove obsolete LHREQ_BREAK call

We no longer need an efficient mechanism to force the Guest back into
host userspace, as each device is serviced without bothering the main
Guest process (aka. the Launcher).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 508569c..a6974e9 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -209,10 +209,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
-		/* If Waker set break_out, return to Launcher. */
-		if (cpu->break_out)
-			return -EAGAIN;
-
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
@@ -231,13 +227,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 			break;
 
 		/* If the Guest asked to be stopped, we sleep.  The Guest's
-		 * clock timer or LHREQ_BREAK from the Waker will wake us. */
+		 * clock timer will wake us. */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			/* Just before we sleep, make sure nothing snuck in
+			/* Just before we sleep, make sure no interrupt snuck in
 			 * which we should be doing. */
-			if (interrupt_pending(cpu, &more) < LGUEST_IRQS
-			    || cpu->break_out)
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
 				set_current_state(TASK_RUNNING);
 			else
 				schedule();
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 32fefdc..d4e8979 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -71,9 +71,7 @@ struct lg_cpu {
 	/* Virtual clock device */
 	struct hrtimer hrt;
 
-	/* Do we need to stop what we're doing and return to userspace? */
-	int break_out;
-	wait_queue_head_t break_wq;
+	/* Did the Guest tell us to halt? */
 	int halted;
 
 	/* Pending virtual interrupts */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index f6bf255..32e2971 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -11,32 +11,6 @@
 #include <linux/file.h>
 #include "lg.h"
 
-/*L:055 When something happens, the Waker process needs a way to stop the
- * kernel running the Guest and return to the Launcher.  So the Waker writes
- * LHREQ_BREAK and the value "1" to /dev/lguest to do this.  Once the Launcher
- * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
- * the Waker. */
-static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
-{
-	unsigned long on;
-
-	/* Fetch whether they're turning break on or off. */
-	if (get_user(on, input) != 0)
-		return -EFAULT;
-
-	if (on) {
-		cpu->break_out = 1;
-		if (!wake_up_process(cpu->tsk))
-			kick_process(cpu->tsk);
-		/* Wait for them to reset it */
-		return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
-	} else {
-		cpu->break_out = 0;
-		wake_up(&cpu->break_wq);
-		return 0;
-	}
-}
-
 bool send_notify_to_eventfd(struct lg_cpu *cpu)
 {
 	unsigned int i;
@@ -202,9 +176,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	 * address. */
 	lguest_arch_setup_regs(cpu, start_ip);
 
-	/* Initialize the queue for the Waker to wait on */
-	init_waitqueue_head(&cpu->break_wq);
-
 	/* We keep a pointer to the Launcher task (ie. current task) for when
 	 * other Guests want to wake this one (eg. console input). */
 	cpu->tsk = current;
@@ -344,8 +315,6 @@ static ssize_t write(struct file *file, const char __user *in,
 		return initialize(file, input);
 	case LHREQ_IRQ:
 		return user_send_irq(cpu, input);
-	case LHREQ_BREAK:
-		return break_guest_out(cpu, input);
 	case LHREQ_EVENTFD:
 		return attach_eventfd(lg, input);
 	default:
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index 9de964b..bfefbdf 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -57,7 +57,7 @@ enum lguest_req
 	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
-	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
+	LHREQ_BREAK, /* No longer used */
 	LHREQ_EVENTFD, /* + address, fd. */
 };
 
-- 
cgit v0.10.2


From 38bc2b8c56a2e212bbd19de7cf9976dcc7bf9953 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:11 -0600
Subject: lguest: implement deferred interrupts in example Launcher

Rather than sending an interrupt on every buffer, we only send an interrupt
when we're about to wait for the Guest to send us a new one.  The console
input and network input still send interrupts manually, but the block device,
network and console output queues can simply rely on this logic to send
interrupts to the Guest at the right time.

The patch is cluttered by moving trigger_irq() higher in the code.

In practice, two factors make this optimization less interesting:
(1) we often only get one input at a time, even for networking,
(2) triggering an interrupt rapidly tends to get coalesced anyway.

Before:				Secs	RxIRQS	TxIRQs
 1G TCP Guest->Host:		3.72	32784	32771
 1M normal pings:		99	1000004	995541
 100,000 1k pings (-l 120):	5	49510	49058

After:
 1G TCP Guest->Host:		3.69	32809	32769
 1M normal pings:		99	1000004	996196
 100,000 1k pings (-l 120):	5	52435	52361

(Note the interrupt count on 100k pings goes *up*: see next patch).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 5470b8e..84c471b 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -551,6 +551,21 @@ static unsigned next_desc(struct virtqueue *vq, unsigned int i)
 	return next;
 }
 
+/* This actually sends the interrupt for this virtqueue */
+static void trigger_irq(struct virtqueue *vq)
+{
+	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
+
+	/* If they don't want an interrupt, don't send one, unless empty. */
+	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+	    && lg_last_avail(vq) != vq->vring.avail->idx)
+		return;
+
+	/* Send the Guest an interrupt tell them we used something up. */
+	if (write(lguest_fd, buf, sizeof(buf)) != 0)
+		err(1, "Triggering irq %i", vq->config.irq);
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
@@ -567,6 +582,9 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 	while (last_avail == vq->vring.avail->idx) {
 		u64 event;
 
+		/* OK, tell Guest about progress up to now. */
+		trigger_irq(vq);
+
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
 			errx(1, "Event read failed?");
@@ -631,21 +649,6 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
 	vq->vring.used->idx++;
 }
 
-/* This actually sends the interrupt for this virtqueue */
-static void trigger_irq(struct virtqueue *vq)
-{
-	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
-
-	/* If they don't want an interrupt, don't send one, unless empty. */
-	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-	    && lg_last_avail(vq) != vq->vring.avail->idx)
-		return;
-
-	/* Send the Guest an interrupt tell them we used something up. */
-	if (write(lguest_fd, buf, sizeof(buf)) != 0)
-		err(1, "Triggering irq %i", vq->config.irq);
-}
-
 /* And here's the combo meal deal.  Supersize me! */
 static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
 {
@@ -730,7 +733,7 @@ static void console_output(struct virtqueue *vq)
 			err(1, "Write to stdout gave %i", len);
 		iov_consume(iov, out, len);
 	}
-	add_used_and_trigger(vq, head, 0);
+	add_used(vq, head, 0);
 }
 
 /*
@@ -754,7 +757,7 @@ static void net_output(struct virtqueue *vq)
 		errx(1, "Input buffers in net output queue?");
 	if (writev(net_info->tunfd, iov, out) < 0)
 		errx(1, "Write to tun failed?");
-	add_used_and_trigger(vq, head, 0);
+	add_used(vq, head, 0);
 }
 
 /* This is where we handle packets coming in from the tun device to our
@@ -1422,7 +1425,7 @@ static void blk_request(struct virtqueue *vq)
 	if (out->type & VIRTIO_BLK_T_BARRIER)
 		fdatasync(vblk->fd);
 
-	add_used_and_trigger(vq, head, wlen);
+	add_used(vq, head, wlen);
 }
 
 /*L:198 This actually sets up a virtual block device. */
@@ -1496,7 +1499,7 @@ static void rng_input(struct virtqueue *vq)
 	}
 
 	/* Tell the Guest about the new input. */
-	add_used_and_trigger(vq, head, totlen);
+	add_used(vq, head, totlen);
 }
 
 /* And this creates a "hardware" random number device for the Guest. */
-- 
cgit v0.10.2


From 95c517c09bad31a03e22f2fdb5f0aa26a490a92d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:11 -0600
Subject: lguest: avoid sending interrupts to Guest when no activity occurs.

If we track how many buffers we've used, we can tell whether we really
need to interrupt the Guest.  This happens as a side effect of
spurious notifications.

Spurious notifications happen because it can take a while before the
Host thread wakes up and sets the VRING_USED_F_NO_NOTIFY flag, and
meanwhile the Guest can more notifications.

A real fix would be to use wake counts, rather than a suppression
flag, but the practical difference is generally in the noise: the
interrupt is usually coalesced into a pending one anyway so we just
save a system call which isn't clearly measurable.

				Secs	Spurious IRQS
1G TCP Guest->Host:		3.93	58
1M normal pings:		100	72
1M 1k pings (-l 120):		57	492904

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 84c471b..20f8253 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -151,6 +151,9 @@ struct virtqueue
 	/* Last available index we saw. */
 	u16 last_avail_idx;
 
+	/* How many are used since we sent last irq? */
+	unsigned int pending_used;
+
 	/* Eventfd where Guest notifications arrive. */
 	int eventfd;
 
@@ -556,6 +559,11 @@ static void trigger_irq(struct virtqueue *vq)
 {
 	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
 
+	/* Don't inform them if nothing used. */
+	if (!vq->pending_used)
+		return;
+	vq->pending_used = 0;
+
 	/* If they don't want an interrupt, don't send one, unless empty. */
 	if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
 	    && lg_last_avail(vq) != vq->vring.avail->idx)
@@ -647,6 +655,7 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
 	/* Make sure buffer is written before we update index. */
 	wmb();
 	vq->vring.used->idx++;
+	vq->pending_used++;
 }
 
 /* And here's the combo meal deal.  Supersize me! */
-- 
cgit v0.10.2


From 4a8962e21bc505c714fc2508494d4c7dd3fe2d29 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:12 -0600
Subject: lguest: try to batch interrupts on network receive

Rather than triggering an interrupt every time, we only trigger an
interrupt when there are no more incoming packets (or the recv queue
is full).

However, the overhead of doing the select to figure this out is
measurable: 1M pings goes from 98 to 104 seconds, and 1G Guest->Host
TCP goes from 3.69 to 3.94 seconds.  It's close to the noise though.

I tested various timeouts, including reducing it as the number of
pending packets increased, timing a 1 gigabyte TCP send from Guest ->
Host and Host -> Guest (GSO disabled, to increase packet rate).

// time tcpblast -o -s 65536 -c 16k 192.168.2.1:9999 > /dev/null

Timeout		Guest->Host	Pkts/irq	Host->Guest	Pkts/irq
Before		11.3s		1.0		6.3s		1.0
0		11.7s		1.0		6.6s		23.5
1		17.1s		8.8		8.6s		26.0
1/pending	13.4s		1.9		6.6s		23.8
2/pending	13.6s		2.8		6.6s		24.1
5/pending	14.1s		5.0		6.6s		24.4

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 20f8253..6411079 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -769,6 +769,16 @@ static void net_output(struct virtqueue *vq)
 	add_used(vq, head, 0);
 }
 
+/* Will reading from this file descriptor block? */
+static bool will_block(int fd)
+{
+	fd_set fdset;
+	struct timeval zero = { 0, 0 };
+	FD_ZERO(&fdset);
+	FD_SET(fd, &fdset);
+	return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
+}
+
 /* This is where we handle packets coming in from the tun device to our
  * Guest. */
 static void net_input(struct virtqueue *vq)
@@ -781,10 +791,15 @@ static void net_input(struct virtqueue *vq)
 	head = wait_for_vq_desc(vq, iov, &out, &in);
 	if (out)
 		errx(1, "Output buffers in net input queue?");
+
+	/* Deliver interrupt now, since we're about to sleep. */
+	if (vq->pending_used && will_block(net_info->tunfd))
+		trigger_irq(vq);
+
 	len = readv(net_info->tunfd, iov, in);
 	if (len <= 0)
 		err(1, "Failed to read from tun.");
-	add_used_and_trigger(vq, head, len);
+	add_used(vq, head, len);
 }
 
 /* This is the helper to create threads. */
-- 
cgit v0.10.2


From b60da13fc7bbf99d3c68578bd3fbcf66e1cb5f41 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:12 -0600
Subject: lguest: suppress notifications in example Launcher

The Guest only really needs to tell us about activity when we're going
to listen to the eventfd: normally, we don't want to know.

So if there are no available buffers, turn on notifications, re-check,
then wait for the Guest to notify us via the eventfd, then turn
notifications off again.

There's enough else going on that the differences are in the noise.

Before:				Secs	RxKicks	TxKicks
 1G TCP Guest->Host:		3.94	  4686	  32815
 1M normal pings:		104	142862	1000010
 1M 1k pings (-l 120):		57	142026	1000007

After:
 1G TCP Guest->Host:		3.76	  4691	  32811
 1M normal pings:		111	142859	 997467
 1M 1k pings (-l 120):		55	 19648	 501549

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 6411079..bb5e3c2 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -172,6 +172,7 @@ static struct termios orig_term;
  * threads and so we need to make sure that changes visible to the Guest happen
  * in precise order. */
 #define wmb() __asm__ __volatile__("" : : : "memory")
+#define mb() __asm__ __volatile__("" : : : "memory")
 
 /* Convert an iovec element to the given type.
  *
@@ -593,9 +594,23 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 		/* OK, tell Guest about progress up to now. */
 		trigger_irq(vq);
 
+		/* OK, now we need to know about added descriptors. */
+		vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+
+		/* They could have slipped one in as we were doing that: make
+		 * sure it's written, then check again. */
+		mb();
+		if (last_avail != vq->vring.avail->idx) {
+			vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+			break;
+		}
+
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
 			errx(1, "Event read failed?");
+
+		/* We don't need to be notified again. */
+		vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
 	}
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
-- 
cgit v0.10.2


From d1f0132e76a11b05167313c606a853953f416081 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 11 May 2009 18:11:46 +0100
Subject: lguest: add support for indirect ring entries

Support the VIRTIO_RING_F_INDIRECT_DESC feature.

This is a simple matter of changing the descriptor walking
code to operate on a struct vring_desc* and supplying it
with an indirect table if detected.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index bb5e3c2..9ebcd6e 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -536,20 +536,21 @@ static void *_check_pointer(unsigned long addr, unsigned int size,
 /* Each buffer in the virtqueues is actually a chain of descriptors.  This
  * function returns the next descriptor in the chain, or vq->vring.num if we're
  * at the end. */
-static unsigned next_desc(struct virtqueue *vq, unsigned int i)
+static unsigned next_desc(struct vring_desc *desc,
+			  unsigned int i, unsigned int max)
 {
 	unsigned int next;
 
 	/* If this descriptor says it doesn't chain, we're done. */
-	if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
-		return vq->vring.num;
+	if (!(desc[i].flags & VRING_DESC_F_NEXT))
+		return max;
 
 	/* Check they're not leading us off end of descriptors. */
-	next = vq->vring.desc[i].next;
+	next = desc[i].next;
 	/* Make sure compiler knows to grab that: we don't want it changing! */
 	wmb();
 
-	if (next >= vq->vring.num)
+	if (next >= max)
 		errx(1, "Desc next is %u", next);
 
 	return next;
@@ -585,7 +586,8 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 				 struct iovec iov[],
 				 unsigned int *out_num, unsigned int *in_num)
 {
-	unsigned int i, head;
+	unsigned int i, head, max;
+	struct vring_desc *desc;
 	u16 last_avail = lg_last_avail(vq);
 
 	while (last_avail == vq->vring.avail->idx) {
@@ -630,15 +632,28 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 	/* When we start there are none of either input nor output. */
 	*out_num = *in_num = 0;
 
+	max = vq->vring.num;
+	desc = vq->vring.desc;
 	i = head;
+
+	/* If this is an indirect entry, then this buffer contains a descriptor
+	 * table which we handle as if it's any normal descriptor chain. */
+	if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+		if (desc[i].len % sizeof(struct vring_desc))
+			errx(1, "Invalid size for indirect buffer table");
+
+		max = desc[i].len / sizeof(struct vring_desc);
+		desc = check_pointer(desc[i].addr, desc[i].len);
+		i = 0;
+	}
+
 	do {
 		/* Grab the first descriptor, and check it's OK. */
-		iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;
+		iov[*out_num + *in_num].iov_len = desc[i].len;
 		iov[*out_num + *in_num].iov_base
-			= check_pointer(vq->vring.desc[i].addr,
-					vq->vring.desc[i].len);
+			= check_pointer(desc[i].addr, desc[i].len);
 		/* If this is an input descriptor, increment that count. */
-		if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+		if (desc[i].flags & VRING_DESC_F_WRITE)
 			(*in_num)++;
 		else {
 			/* If it's an output descriptor, they're all supposed
@@ -649,9 +664,9 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 		}
 
 		/* If we've got too many, that implies a descriptor loop. */
-		if (*out_num + *in_num > vq->vring.num)
+		if (*out_num + *in_num > max)
 			errx(1, "Looped descriptor");
-	} while ((i = next_desc(vq, i)) != vq->vring.num);
+	} while ((i = next_desc(desc, i, max)) != max);
 
 	return head;
 }
@@ -1331,6 +1346,8 @@ static void setup_tun_net(char *arg)
 	add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
 	add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
 	add_feature(dev, VIRTIO_NET_F_HOST_ECN);
+	/* We handle indirect ring entries */
+	add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
 	set_config(dev, sizeof(conf), &conf);
 
 	/* We don't need the socket any more; setup is done. */
-- 
cgit v0.10.2


From 7747a0b0af5976ba3828796b4f7a7adc3bb76dbd Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Thu, 11 Jun 2009 17:07:28 -0500
Subject: xfs: fix freeing memory in xfs_getbmap()

Regression from commit 28e211700a81b0a934b6c7a4b8e7dda843634d2f.
Need to free temporary buffer allocated in xfs_getbmap().

Signed-off-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Hedi Berriche <hedi@sgi.com>
Reported-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 4b0f6ef..7928b99 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6086,6 +6086,7 @@ xfs_getbmap(
 			break;
 	}
 
+	kmem_free(out);
 	return error;
 }
 
-- 
cgit v0.10.2


From eb91f1d0a531289e18f5587dc197d12a251c66a3 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 12 Jun 2009 14:56:09 +0300
Subject: slab: fix gfp flag in setup_cpu_cache()

Fixes the following warning during bootup when compiling with CONFIG_SLAB:

  [    0.000000] ------------[ cut here ]------------
  [    0.000000] WARNING: at kernel/lockdep.c:2282 lockdep_trace_alloc+0x91/0xb9()
  [    0.000000] Hardware name:
  [    0.000000] Modules linked in:
  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.30 #491
  [    0.000000] Call Trace:
  [    0.000000]  [<ffffffff81087d84>] ? lockdep_trace_alloc+0x91/0xb9
  [    0.000000]  [<ffffffff81061764>] warn_slowpath_common+0x7c/0xa9
  [    0.000000]  [<ffffffff810617a5>] warn_slowpath_null+0x14/0x16
  [    0.000000]  [<ffffffff81087d84>] lockdep_trace_alloc+0x91/0xb9
  [    0.000000]  [<ffffffff810f5b03>] kmem_cache_alloc_node_notrace+0x26/0xdf
  [    0.000000]  [<ffffffff81487f4e>] ? setup_cpu_cache+0x7e/0x210
  [    0.000000]  [<ffffffff81487fe3>] setup_cpu_cache+0x113/0x210
  [    0.000000]  [<ffffffff810f73ff>] kmem_cache_create+0x409/0x486
  [    0.000000]  [<ffffffff818131c1>] kmem_cache_init+0x232/0x593
  [    0.000000]  [<ffffffff8180987c>] ? mem_init+0x156/0x161
  [    0.000000]  [<ffffffff817f8aae>] start_kernel+0x1cc/0x3b9
  [    0.000000]  [<ffffffff817f829a>] x86_64_start_reservations+0xaa/0xae
  [    0.000000]  [<ffffffff817f837f>] x86_64_start_kernel+0xe1/0xe8
  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slab.c b/mm/slab.c
index f46b65d..cd76964 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2102,7 +2102,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 			for_each_online_node(node) {
 				cachep->nodelists[node] =
 				    kmalloc_node(sizeof(struct kmem_list3),
-						GFP_KERNEL, node);
+						gfp, node);
 				BUG_ON(!cachep->nodelists[node]);
 				kmem_list3_init(cachep->nodelists[node]);
 			}
-- 
cgit v0.10.2


From 7e85ee0c1d15ca5f8bff0f514f158eba1742dd87 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 12 Jun 2009 14:03:06 +0300
Subject: slab,slub: don't enable interrupts during early boot

As explained by Benjamin Herrenschmidt:

  Oh and btw, your patch alone doesn't fix powerpc, because it's missing
  a whole bunch of GFP_KERNEL's in the arch code... You would have to
  grep the entire kernel for things that check slab_is_available() and
  even then you'll be missing some.

  For example, slab_is_available() didn't always exist, and so in the
  early days on powerpc, we used a mem_init_done global that is set form
  mem_init() (not perfect but works in practice). And we still have code
  using that to do the test.

Therefore, mask out __GFP_WAIT, __GFP_IO, and __GFP_FS in the slab allocators
in early boot code to avoid enabling interrupts.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0bbc15f..3760e7c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -85,6 +85,9 @@ struct vm_area_struct;
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_NOMEMALLOC)
 
+/* Control slab gfp mask during early boot */
+#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)
+
 /* Control allocation constraints */
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4880306..219b8fb 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -319,4 +319,6 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+void __init kmem_cache_init_late(void);
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 0ec00b3..bb5368d 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -34,4 +34,9 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags)
 	return kmalloc(size, flags);
 }
 
+static inline void kmem_cache_init_late(void)
+{
+	/* Nothing to do */
+}
+
 #endif /* __LINUX_SLOB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index be5d40c..4dcbc2c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -302,4 +302,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 }
 #endif
 
+void __init kmem_cache_init_late(void);
+
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/init/main.c b/init/main.c
index b3e8f14..f6204f7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -640,6 +640,7 @@ asmlinkage void __init start_kernel(void)
 				 "enabled early\n");
 	early_boot_irqs_on();
 	local_irq_enable();
+	kmem_cache_init_late();
 
 	/*
 	 * HACK ALERT! This is early. We're enabling the console before
diff --git a/mm/slab.c b/mm/slab.c
index cd76964..453efcb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -304,6 +304,12 @@ struct kmem_list3 {
 };
 
 /*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
+/*
  * Need this for bootstrapping a per node allocator.
  */
 #define NUM_INIT_LISTS (3 * MAX_NUMNODES)
@@ -1654,6 +1660,14 @@ void __init kmem_cache_init(void)
 	 */
 }
 
+void __init kmem_cache_init_late(void)
+{
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
+}
+
 static int __init cpucache_init(void)
 {
 	int cpu;
@@ -3354,6 +3368,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 
+	flags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(flags);
 
 	if (slab_should_failslab(cachep, flags))
@@ -3434,6 +3450,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	unsigned long save_flags;
 	void *objp;
 
+	flags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(flags);
 
 	if (slab_should_failslab(cachep, flags))
diff --git a/mm/slub.c b/mm/slub.c
index 3964d3c..30354bf 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -178,6 +178,12 @@ static enum {
 	SYSFS		/* Sysfs up */
 } slab_state = DOWN;
 
+/*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
 /* A list of all slab caches on the system */
 static DECLARE_RWSEM(slub_lock);
 static LIST_HEAD(slab_caches);
@@ -1595,6 +1601,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	unsigned long flags;
 	unsigned int objsize;
 
+	gfpflags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(gfpflags);
 	might_sleep_if(gfpflags & __GFP_WAIT);
 
@@ -3104,6 +3112,14 @@ void __init kmem_cache_init(void)
 		nr_cpu_ids, nr_node_ids);
 }
 
+void __init kmem_cache_init_late(void)
+{
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
+}
+
 /*
  * Find a mergeable slab cache
  */
-- 
cgit v0.10.2


From 8429db5c6336083594036c30f49401405d536911 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 12 Jun 2009 15:58:59 +0300
Subject: slab: setup cpu caches later on when interrupts are enabled

Fixes the following boot-time warning:

  [    0.000000] ------------[ cut here ]------------
  [    0.000000] WARNING: at kernel/smp.c:369 smp_call_function_many+0x56/0x1bc()
  [    0.000000] Hardware name:
  [    0.000000] Modules linked in:
  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.30 #492
  [    0.000000] Call Trace:
  [    0.000000]  [<ffffffff8149e021>] ? _spin_unlock+0x4f/0x5c
  [    0.000000]  [<ffffffff8108f11b>] ? smp_call_function_many+0x56/0x1bc
  [    0.000000]  [<ffffffff81061764>] warn_slowpath_common+0x7c/0xa9
  [    0.000000]  [<ffffffff810617a5>] warn_slowpath_null+0x14/0x16
  [    0.000000]  [<ffffffff8108f11b>] smp_call_function_many+0x56/0x1bc
  [    0.000000]  [<ffffffff810f3e00>] ? do_ccupdate_local+0x0/0x54
  [    0.000000]  [<ffffffff810f3e00>] ? do_ccupdate_local+0x0/0x54
  [    0.000000]  [<ffffffff8108f2be>] smp_call_function+0x3d/0x68
  [    0.000000]  [<ffffffff810f3e00>] ? do_ccupdate_local+0x0/0x54
  [    0.000000]  [<ffffffff81066fd8>] on_each_cpu+0x31/0x7c
  [    0.000000]  [<ffffffff810f64f5>] do_tune_cpucache+0x119/0x454
  [    0.000000]  [<ffffffff81087080>] ? lockdep_init_map+0x94/0x10b
  [    0.000000]  [<ffffffff818133b0>] ? kmem_cache_init+0x421/0x593
  [    0.000000]  [<ffffffff810f69cf>] enable_cpucache+0x68/0xad
  [    0.000000]  [<ffffffff818133c3>] kmem_cache_init+0x434/0x593
  [    0.000000]  [<ffffffff8180987c>] ? mem_init+0x156/0x161
  [    0.000000]  [<ffffffff817f8aae>] start_kernel+0x1cc/0x3b9
  [    0.000000]  [<ffffffff817f829a>] x86_64_start_reservations+0xaa/0xae
  [    0.000000]  [<ffffffff817f837f>] x86_64_start_kernel+0xe1/0xe8
  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---

Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slab.c b/mm/slab.c
index 453efcb..18e3164 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -759,6 +759,7 @@ static enum {
 	NONE,
 	PARTIAL_AC,
 	PARTIAL_L3,
+	EARLY,
 	FULL
 } g_cpucache_up;
 
@@ -767,7 +768,7 @@ static enum {
  */
 int slab_is_available(void)
 {
-	return g_cpucache_up == FULL;
+	return g_cpucache_up >= EARLY;
 }
 
 static DEFINE_PER_CPU(struct delayed_work, reap_work);
@@ -1631,19 +1632,27 @@ void __init kmem_cache_init(void)
 		}
 	}
 
-	/* 6) resize the head arrays to their final sizes */
-	{
-		struct kmem_cache *cachep;
-		mutex_lock(&cache_chain_mutex);
-		list_for_each_entry(cachep, &cache_chain, next)
-			if (enable_cpucache(cachep, GFP_NOWAIT))
-				BUG();
-		mutex_unlock(&cache_chain_mutex);
-	}
+	g_cpucache_up = EARLY;
 
 	/* Annotate slab for lockdep -- annotate the malloc caches */
 	init_lock_keys();
+}
+
+void __init kmem_cache_init_late(void)
+{
+	struct kmem_cache *cachep;
+
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
 
+	/* 6) resize the head arrays to their final sizes */
+	mutex_lock(&cache_chain_mutex);
+	list_for_each_entry(cachep, &cache_chain, next)
+		if (enable_cpucache(cachep, GFP_NOWAIT))
+			BUG();
+	mutex_unlock(&cache_chain_mutex);
 
 	/* Done! */
 	g_cpucache_up = FULL;
@@ -1660,14 +1669,6 @@ void __init kmem_cache_init(void)
 	 */
 }
 
-void __init kmem_cache_init_late(void)
-{
-	/*
-	 * Interrupts are enabled now so all GFP allocations are safe.
-	 */
-	slab_gfp_mask = __GFP_BITS_MASK;
-}
-
 static int __init cpucache_init(void)
 {
 	int cpu;
-- 
cgit v0.10.2


From 88164ff4fca75051315d73729ea5a014e8986234 Mon Sep 17 00:00:00 2001
From: Ali Gholami Rudi <ali@rudi.ir>
Date: Mon, 30 Mar 2009 16:49:27 +0430
Subject: trivial: ext2: fix a typo in comment in ext2.h

Signed-off-by: Ali Gholami Rudi <ali@rudi.ir>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index b2bbf45..f2e5811 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -27,7 +27,7 @@ struct ext2_inode_info {
 	/*
 	 * i_block_group is the number of the block group which contains
 	 * this file's inode.  Constant across the lifetime of the inode,
-	 * it is ued for making block allocation decisions - we try to
+	 * it is used for making block allocation decisions - we try to
 	 * place a file's data blocks near its inode block, and new inodes
 	 * near to their parent directory's inode.
 	 */
-- 
cgit v0.10.2


From ff677f8d10a7b7dea6fbfc48d5ceeb3018cabb23 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Wed, 1 Apr 2009 14:40:51 +0530
Subject: trivial: fix comment typo in fs/compat.c

Fix a typo in fs/compat.c

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/fs/compat.c b/fs/compat.c
index 6aefb77..cdd51a3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -471,7 +471,7 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 		ret = sys_fcntl(fd, cmd, (unsigned long)&f);
 		set_fs(old_fs);
 		if (cmd == F_GETLK && ret == 0) {
-			/* GETLK was successfule and we need to return the data...
+			/* GETLK was successful and we need to return the data...
 			 * but it needs to fit in the compat structure.
 			 * l_start shouldn't be too big, unless the original
 			 * start + end is greater than COMPAT_OFF_T_MAX, in which
-- 
cgit v0.10.2


From 2eadfc0ed68690075dcff08b30d87831388a1663 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Thu, 2 Apr 2009 15:23:37 +0200
Subject: trivial: fs/inode: Fix typo in file_update_time nanodoc

The advertised flag for not updating the time was wrong.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/fs/inode.c b/fs/inode.c
index a88baeb..f643be5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(touch_atime);
  *	for writeback.  Note that this function is meant exclusively for
  *	usage in the file write path of filesystems, and filesystems may
  *	choose to explicitly ignore update via this function with the
- *	S_NOCTIME inode flag, e.g. for network filesystem where these
+ *	S_NOCMTIME inode flag, e.g. for network filesystem where these
  *	timestamps are handled by the server.
  */
 
-- 
cgit v0.10.2


From cf3f9130f48ed04f32a31cfad21f576d45b8788b Mon Sep 17 00:00:00 2001
From: Viral Mehta <viral.mehta@einfochips.com>
Date: Fri, 3 Apr 2009 13:08:14 +0530
Subject: trivial: remove extra space

Just for the sake of readability, removing extra space

Signed-off-by: Viral Mehta <viral.mehta@einfochips.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index ecb58e7..4bd3a7a 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -423,7 +423,7 @@ static int set_ctl_value(struct usb_mixer_elem_info *cval, int request, int vali
 	value_set = convert_bytes_value(cval, value_set);
 	buf[0] = value_set & 0xff;
 	buf[1] = (value_set >> 8) & 0xff;
-	while (timeout -- > 0)
+	while (timeout-- > 0)
 		if (snd_usb_ctl_msg(cval->mixer->chip->dev,
 				    usb_sndctrlpipe(cval->mixer->chip->dev, 0),
 				    request,
-- 
cgit v0.10.2


From 0b1b51f50ed7b4225d0631140de8873fb235a6c0 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Date: Mon, 6 Apr 2009 16:10:54 +0300
Subject: trivial: mtd: fix Kconfig comment about 'armflash'

The real 'armflash' map driver is selected by CONFIG_MTD_ARM_INTEGRATOR

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 7d04fb9..b8e35a0 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -154,7 +154,8 @@ config MTD_AFS_PARTS
 
 	  You will still need the parsing functions to be called by the driver
 	  for your particular device. It won't happen automatically. The
-	  'armflash' map driver (CONFIG_MTD_ARMFLASH) does this, for example.
+	  'armflash' map driver (CONFIG_MTD_ARM_INTEGRATOR) does this, for
+	  example.
 
 config MTD_OF_PARTS
 	tristate "Flash partition map based on OF description"
-- 
cgit v0.10.2


From 4b512d26f425be1c779c8319249b42ce3c3424d2 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Tue, 14 Apr 2009 23:14:10 -0300
Subject: trivial: typo (en|dis|avail|remove)bale -> (en|dis|avail|remove)able

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c
index fd3ebd1..72429b6 100644
--- a/drivers/char/amiserial.c
+++ b/drivers/char/amiserial.c
@@ -779,7 +779,7 @@ static void change_speed(struct async_struct *info,
 		info->IER |= UART_IER_MSI;
 	}
 	/* TBD:
-	 * Does clearing IER_MSI imply that we should disbale the VBL interrupt ?
+	 * Does clearing IER_MSI imply that we should disable the VBL interrupt ?
 	 */
 
 	/*
diff --git a/drivers/media/video/hdpvr/hdpvr-video.c b/drivers/media/video/hdpvr/hdpvr-video.c
index 3e6ffee..ccd47f5 100644
--- a/drivers/media/video/hdpvr/hdpvr-video.c
+++ b/drivers/media/video/hdpvr/hdpvr-video.c
@@ -181,7 +181,7 @@ static int hdpvr_submit_buffers(struct hdpvr_device *dev)
 				 buff_list);
 		if (buf->status != BUFSTAT_AVAILABLE) {
 			v4l2_err(&dev->v4l2_dev,
-				 "buffer not marked as availbale\n");
+				 "buffer not marked as available\n");
 			ret = -EFAULT;
 			goto err;
 		}
diff --git a/drivers/net/b44.h b/drivers/net/b44.h
index e678498..d24158e 100644
--- a/drivers/net/b44.h
+++ b/drivers/net/b44.h
@@ -97,7 +97,7 @@
 #define B44_DMARX_STAT	0x021CUL /* DMA RX Current Active Desc. + Status */
 #define  DMARX_STAT_CDMASK	0x00000fff /* Current Descriptor Mask */
 #define  DMARX_STAT_SMASK	0x0000f000 /* State Mask */
-#define  DMARX_STAT_SDISABLED	0x00000000 /* State Disbaled */
+#define  DMARX_STAT_SDISABLED	0x00000000 /* State Disabled */
 #define  DMARX_STAT_SACTIVE	0x00001000 /* State Active */
 #define  DMARX_STAT_SIDLE	0x00002000 /* State Idle Wait */
 #define  DMARX_STAT_SSTOPPED	0x00003000 /* State Stopped */
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 0f9ee13..af5364f 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2785,7 +2785,7 @@ static int e100_resume(struct pci_dev *pdev)
 	/* ack any pending wake events, disable PME */
 	pci_enable_wake(pdev, 0, 0);
 
-	/* disbale reverse auto-negotiation */
+	/* disable reverse auto-negotiation */
 	if (nic->phy == phy_82552_v) {
 		u16 smartspeed = mdio_read(netdev, nic->mii.phy_id,
 		                           E100_82552_SMARTSPEED);
diff --git a/drivers/net/niu.h b/drivers/net/niu.h
index 8754e44..3bd0b59 100644
--- a/drivers/net/niu.h
+++ b/drivers/net/niu.h
@@ -3242,8 +3242,8 @@ struct niu {
 	struct niu_parent		*parent;
 
 	u32				flags;
-#define NIU_FLAGS_HOTPLUG_PHY_PRESENT	0x02000000 /* Removebale PHY detected*/
-#define NIU_FLAGS_HOTPLUG_PHY		0x01000000 /* Removebale PHY */
+#define NIU_FLAGS_HOTPLUG_PHY_PRESENT	0x02000000 /* Removeable PHY detected*/
+#define NIU_FLAGS_HOTPLUG_PHY		0x01000000 /* Removeable PHY */
 #define NIU_FLAGS_VPD_VALID		0x00800000 /* VPD has valid version */
 #define NIU_FLAGS_MSIX			0x00400000 /* MSI-X in use */
 #define NIU_FLAGS_MCAST			0x00200000 /* multicast filter enabled */
diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h
index 795201f..512c2cc 100644
--- a/drivers/scsi/megaraid.h
+++ b/drivers/scsi/megaraid.h
@@ -469,7 +469,7 @@ typedef struct {
 	u8	type;		/* Type of the device */
 	u8	cur_status;	/* current status of the device */
 	u8	tag_depth;	/* Level of tagging */
-	u8	sync_neg;	/* sync negotiation - ENABLE or DISBALE */
+	u8	sync_neg;	/* sync negotiation - ENABLE or DISABLE */
 	u32	size;		/* configurable size in terms of 512 byte
 				   blocks */
 }__attribute__ ((packed)) phys_drv;
diff --git a/drivers/scsi/megaraid/mbox_defs.h b/drivers/scsi/megaraid/mbox_defs.h
index 170399e..b25b747 100644
--- a/drivers/scsi/megaraid/mbox_defs.h
+++ b/drivers/scsi/megaraid/mbox_defs.h
@@ -686,7 +686,7 @@ typedef struct {
  * @type	: Type of the device
  * @cur_status	: current status of the device
  * @tag_depth	: Level of tagging
- * @sync_neg	: sync negotiation - ENABLE or DISBALE
+ * @sync_neg	: sync negotiation - ENABLE or DISABLE
  * @size	: configurable size in terms of 512 byte
  */
 typedef struct {
diff --git a/drivers/staging/rt2860/common/mlme.c b/drivers/staging/rt2860/common/mlme.c
index c00f9ab..2edf299 100644
--- a/drivers/staging/rt2860/common/mlme.c
+++ b/drivers/staging/rt2860/common/mlme.c
@@ -5664,7 +5664,7 @@ VOID 	AsicUpdateProtect(
 #if 0
 	MacReg |= (pAd->CommonCfg.RtsThreshold << 8);
 #else
-	// If the user want disable RtsThreshold and enbale Amsdu/Ralink-Aggregation, set the RtsThreshold as 4096
+	// If the user want disable RtsThreshold and enable Amsdu/Ralink-Aggregation, set the RtsThreshold as 4096
         if ((
 #ifdef DOT11_N_SUPPORT
 			(pAd->CommonCfg.BACapability.field.AmsduEnable) ||
diff --git a/drivers/staging/rt2870/common/mlme.c b/drivers/staging/rt2870/common/mlme.c
index 8a82cee..a26bc03 100644
--- a/drivers/staging/rt2870/common/mlme.c
+++ b/drivers/staging/rt2870/common/mlme.c
@@ -5561,7 +5561,7 @@ VOID 	AsicUpdateProtect(
 #if 0
 	MacReg |= (pAd->CommonCfg.RtsThreshold << 8);
 #else
-	// If the user want disable RtsThreshold and enbale Amsdu/Ralink-Aggregation, set the RtsThreshold as 4096
+	// If the user want disable RtsThreshold and enable Amsdu/Ralink-Aggregation, set the RtsThreshold as 4096
         if ((
 #ifdef DOT11_N_SUPPORT
 			(pAd->CommonCfg.BACapability.field.AmsduEnable) ||
diff --git a/drivers/staging/rt3070/common/mlme.c b/drivers/staging/rt3070/common/mlme.c
index 0ffbfa3..0189bab 100644
--- a/drivers/staging/rt3070/common/mlme.c
+++ b/drivers/staging/rt3070/common/mlme.c
@@ -5575,7 +5575,7 @@ VOID 	AsicUpdateProtect(
 	RTMP_IO_READ32(pAd, TX_RTS_CFG, &MacReg);
 	MacReg &= 0xFF0000FF;
 
-	// If the user want disable RtsThreshold and enbale Amsdu/Ralink-Aggregation, set the RtsThreshold as 4096
+	// If the user want disable RtsThreshold and enable Amsdu/Ralink-Aggregation, set the RtsThreshold as 4096
     if ((
 #ifdef DOT11_N_SUPPORT
 		(pAd->CommonCfg.BACapability.field.AmsduEnable) ||
diff --git a/drivers/watchdog/iop_wdt.c b/drivers/watchdog/iop_wdt.c
index 96eb2cb..0c90596 100644
--- a/drivers/watchdog/iop_wdt.c
+++ b/drivers/watchdog/iop_wdt.c
@@ -192,7 +192,7 @@ static int iop_wdt_release(struct inode *inode, struct file *file)
 		if (test_bit(WDT_ENABLED, &wdt_status))
 			state = wdt_disable();
 
-	/* if the timer is not disbaled reload and notify that we are still
+	/* if the timer is not disabled reload and notify that we are still
 	 * going down
 	 */
 	if (state != 0) {
diff --git a/sound/pci/aw2/aw2-saa7146.c b/sound/pci/aw2/aw2-saa7146.c
index 6a3891a..296123a 100644
--- a/sound/pci/aw2/aw2-saa7146.c
+++ b/sound/pci/aw2/aw2-saa7146.c
@@ -108,7 +108,7 @@ void snd_aw2_saa7146_setup(struct snd_aw2_saa7146 *chip,
 #endif
 	/* WS0_CTRL, WS0_SYNC: input TSL1, I2S */
 
-	/* At initialization WS1 and WS2 are disbaled (configured as input */
+	/* At initialization WS1 and WS2 are disabled (configured as input) */
 	acon1 |= 0 * WS1_CTRL;
 	acon1 |= 0 * WS2_CTRL;
 
-- 
cgit v0.10.2


From 7ea2ac9b6632038377cb488c7d1cb60b88164d4d Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Tue, 14 Apr 2009 23:14:17 -0300
Subject: Trivial: fix typo s/balence/balance/

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/linux/ultrasound.h b/include/linux/ultrasound.h
index 6b7703e..71339dc 100644
--- a/include/linux/ultrasound.h
+++ b/include/linux/ultrasound.h
@@ -34,7 +34,7 @@
  *		_GUS_VOICEOFF	- Stops voice (no parameters)
  *		_GUS_VOICEFADE	- Stops the voice smoothly.
  *		_GUS_VOICEMODE	- Alters the voice mode, don't start or stop voice (P1=voice mode)
- *		_GUS_VOICEBALA	- Sets voice balence (P1, 0=left, 7=middle and 15=right, default 7)
+ *		_GUS_VOICEBALA	- Sets voice balance (P1, 0=left, 7=middle and 15=right, default 7)
  *		_GUS_VOICEFREQ	- Sets voice (sample) playback frequency (P1=Hz)
  *		_GUS_VOICEVOL	- Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off)
  *		_GUS_VOICEVOL2	- Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off)
-- 
cgit v0.10.2


From 6d60f9dfc8d437e914d46fa355c50c695cad24e7 Mon Sep 17 00:00:00 2001
From: Martin Olsson <mnemo@minimum.se>
Date: Tue, 7 Apr 2009 10:30:24 +0200
Subject: trivial: Fix paramater/parameter typo in dmesg and source comments

Signed-off-by: Martin Olsson <martin@minimum.se>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index bebf735..ff0042f 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -584,7 +584,7 @@ static int rndis_set_config_parameter(struct usbnet *dev, char *param,
 	ret = rndis_set_oid(dev, OID_GEN_RNDIS_CONFIG_PARAMETER,
 							infobuf, info_len);
 	if (ret != 0)
-		devdbg(dev, "setting rndis config paramater failed, %d.", ret);
+		devdbg(dev, "setting rndis config parameter failed, %d.", ret);
 
 	kfree(infobuf);
 	return ret;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 9fe8982..4a1d94a 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -116,7 +116,7 @@ int nf_conntrack_acct_init(struct net *net)
 	if (net_eq(net, &init_net)) {
 #ifdef CONFIG_NF_CT_ACCT
 	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
-		printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n");
+		printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n");
 		printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
 #endif
 
-- 
cgit v0.10.2


From 1dc492a0a4470852cb451db1e00d580ce9fd7a28 Mon Sep 17 00:00:00 2001
From: Manish Katiyar <mkatiyar@gmail.com>
Date: Sun, 22 Feb 2009 10:24:27 +0530
Subject: trivial: kernel/power/poweroff.c: whitespace fix

Fix coding style whitespace fixes. Patch compile tested
Before :-
total: 1 errors, 0 warnings, 46 lines checked
After
total: 0 errors, 0 warnings, 46 lines checked

Before :-
  text	   data	    bss	    dec	    hex	filename
    107	     48	      0	    155	     9b	kernel/power/poweroff.o
After
   text	   data	    bss	    dec	    hex	filename
    107	     48	      0	    155	     9b	kernel/power/poweroff.o

Signed-off-by: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 9789083..e8b3370 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -34,7 +34,7 @@ static struct sysrq_key_op	sysrq_poweroff_op = {
 	.handler        = handle_poweroff,
 	.help_msg       = "powerOff",
 	.action_msg     = "Power Off",
- 	.enable_mask	= SYSRQ_ENABLE_BOOT,
+	.enable_mask	= SYSRQ_ENABLE_BOOT,
 };
 
 static int pm_sysrq_init(void)
-- 
cgit v0.10.2


From ab2274af0569a43c6da390e969759d1138799839 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Fri, 17 Apr 2009 08:14:23 -0300
Subject: trivial: fix typo compatiable/compatiability has extra 'a'.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index b83f6bc..0aac371 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1765,7 +1765,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
 
 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
  * for some operations; this forces use of the newer bridge-utils that
- * use compatiable ioctls
+ * use compatible ioctls
  */
 static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
-- 
cgit v0.10.2


From 98a1708de1bfa5fe1c490febba850d6043d3c7fa Mon Sep 17 00:00:00 2001
From: Martin Olsson <martin@minimum.se>
Date: Wed, 22 Apr 2009 18:21:29 +0200
Subject: trivial: fix typos s/paramter/parameter/ and s/excute/execute/ in
 documentation and source comments.

Signed-off-by: Martin Olsson <martin@minimum.se>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt
index 6c974d2..e8b5bc2 100644
--- a/Documentation/powerpc/dts-bindings/fsl/board.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/board.txt
@@ -38,7 +38,7 @@ Required properities:
 - reg : Should contain the address and the length of the GPIO bank
   register.
 - #gpio-cells : Should be two. The first cell is the pin number and the
-  second cell is used to specify optional paramters (currently unused).
+  second cell is used to specify optional parameters (currently unused).
 - gpio-controller : Marks the port as GPIO controller.
 
 Example:
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
index 1815dfe..349f79f 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
@@ -11,7 +11,7 @@ Required properties:
   "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d",
   "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank"
 - #gpio-cells : Should be two. The first cell is the pin number and the
-  second cell is used to specify optional paramters (currently unused).
+  second cell is used to specify optional parameters (currently unused).
 - gpio-controller : Marks the port as GPIO controller.
 
 Example of three SOC GPIO banks defined as gpio-controller nodes:
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 22596d7..2c54e970 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -64,7 +64,7 @@ __HEAD
 	.org   0x100
 #
 # subroutine for loading from tape
-# Paramters:
+# Parameters:
 #  R1 = device number
 #  R2 = load address
 .Lloader:
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 6273d98..ac176da 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -748,9 +748,9 @@ static int ata_acpi_run_tf(struct ata_device *dev,
 /**
  * ata_acpi_exec_tfs - get then write drive taskfile settings
  * @dev: target ATA device
- * @nr_executed: out paramter for the number of executed commands
+ * @nr_executed: out parameter for the number of executed commands
  *
- * Evaluate _GTF and excute returned taskfiles.
+ * Evaluate _GTF and execute returned taskfiles.
  *
  * LOCKING:
  * EH context.
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 94919ad..fa22f94 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2864,7 +2864,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 /**
  *	ata_set_mode - Program timings and issue SET FEATURES - XFER
  *	@link: link on which timings will be programmed
- *	@r_failed_dev: out paramter for failed device
+ *	@r_failed_dev: out parameter for failed device
  *
  *	Set ATA device disk transfer mode (PIO3, UDMA6, etc.).  If
  *	ata_set_mode() fails, pointer to the failing device is
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index facfdb1..d205d49 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -1084,7 +1084,7 @@ static void e752x_init_sysbus_parity_mask(struct e752x_pvt *pvt)
 	struct pci_dev *dev = pvt->dev_d0f1;
 	int enable = 1;
 
-	/* Allow module paramter override, else see if CPU supports parity */
+	/* Allow module parameter override, else see if CPU supports parity */
 	if (sysbus_parity != -1) {
 		enable = sysbus_parity;
 	} else if (cpu_id[0] &&
diff --git a/drivers/isdn/divert/isdn_divert.c b/drivers/isdn/divert/isdn_divert.c
index 7d97d54..77e9fdd 100644
--- a/drivers/isdn/divert/isdn_divert.c
+++ b/drivers/isdn/divert/isdn_divert.c
@@ -183,7 +183,7 @@ int cf_command(int drvid, int mode,
           (mode != 1) ? "" : " 0 ",
           (mode != 1) ? "" : fwd_nr);
  
-  retval = divert_if.ll_cmd(&cs->ics); /* excute command */
+  retval = divert_if.ll_cmd(&cs->ics); /* execute command */
 
   if (!retval)
    { cs->prev = NULL;
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 78cc714..b642647 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -1220,7 +1220,7 @@ static int __init ltpc_setup(char *str)
 		if (ints[0] > 2) {
 			dma = ints[3];
 		}
-		/* ignore any other paramters */
+		/* ignore any other parameters */
 	}
 	return 1;
 }
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index f37360a..44f0bf2 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -62,7 +62,7 @@ struct e1000_info;
 	e_printk(KERN_NOTICE, adapter, format, ## arg)
 
 
-/* Interrupt modes, as used by the IntMode paramter */
+/* Interrupt modes, as used by the IntMode parameter */
 #define E1000E_INT_MODE_LEGACY		0
 #define E1000E_INT_MODE_MSI		1
 #define E1000E_INT_MODE_MSIX		2
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index 16a4138..78952f8 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -268,7 +268,7 @@ struct ehea_qp_init_attr {
 };
 
 /*
- * Event Queue attributes, passed as paramter
+ * Event Queue attributes, passed as parameter
  */
 struct ehea_eq_attr {
 	u32 type;
diff --git a/drivers/net/igbvf/igbvf.h b/drivers/net/igbvf/igbvf.h
index 4bff35e..d488733 100644
--- a/drivers/net/igbvf/igbvf.h
+++ b/drivers/net/igbvf/igbvf.h
@@ -45,7 +45,7 @@ struct igbvf_adapter;
 /* Interrupt defines */
 #define IGBVF_START_ITR                 648 /* ~6000 ints/sec */
 
-/* Interrupt modes, as used by the IntMode paramter */
+/* Interrupt modes, as used by the IntMode parameter */
 #define IGBVF_INT_MODE_LEGACY           0
 #define IGBVF_INT_MODE_MSI              1
 #define IGBVF_INT_MODE_MSIX             2
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 7bcc49d..e8eeef0 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -371,7 +371,7 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
 	int i;
 
 	/* If we haven't received a specific coalescing setting
-	 * (module param), we set the moderation paramters as follows:
+	 * (module param), we set the moderation parameters as follows:
 	 * - moder_cnt is set to the number of mtu sized packets to
 	 *   satisfy our coelsing target.
 	 * - moder_time is set to a fixed value.
diff --git a/drivers/net/qlge/qlge_mpi.c b/drivers/net/qlge/qlge_mpi.c
index 9f81b79..ac9493f 100644
--- a/drivers/net/qlge/qlge_mpi.c
+++ b/drivers/net/qlge/qlge_mpi.c
@@ -141,7 +141,7 @@ end:
 /* We are being asked by firmware to accept
  * a change to the port.  This is only
  * a change to max frame sizes (Tx/Rx), pause
- * paramters, or loopback mode. We wake up a worker
+ * parameters, or loopback mode. We wake up a worker
  * to handler processing this since a mailbox command
  * will need to be sent to ACK the request.
  */
@@ -371,7 +371,7 @@ static int ql_mpi_handler(struct ql_adapter *qdev, struct mbox_params *mbcp)
 	/* We are being asked by firmware to accept
 	 * a change to the port.  This is only
 	 * a change to max frame sizes (Tx/Rx), pause
-	 * paramters, or loopback mode.
+	 * parameters, or loopback mode.
 	 */
 	case AEN_IDC_REQ:
 		status = ql_idc_req_aen(qdev);
@@ -380,7 +380,7 @@ static int ql_mpi_handler(struct ql_adapter *qdev, struct mbox_params *mbcp)
 	/* Process and inbound IDC event.
 	 * This will happen when we're trying to
 	 * change tx/rx max frame size, change pause
-	 * paramters or loopback mode.
+	 * parameters or loopback mode.
 	 */
 	case AEN_IDC_CMPLT:
 	case AEN_IDC_EXT:
diff --git a/drivers/net/skfp/h/smt.h b/drivers/net/skfp/h/smt.h
index 1ff5899..2976757 100644
--- a/drivers/net/skfp/h/smt.h
+++ b/drivers/net/skfp/h/smt.h
@@ -413,7 +413,7 @@ struct smt_p_reason {
 #define SMT_RDF_SUCCESS	0x00000003	/* success (PMF) */
 #define SMT_RDF_BADSET	0x00000004	/* bad set count (PMF) */
 #define SMT_RDF_ILLEGAL 0x00000005	/* read only (PMF) */
-#define SMT_RDF_NOPARAM	0x6		/* paramter not supported (PMF) */
+#define SMT_RDF_NOPARAM	0x6		/* parameter not supported (PMF) */
 #define SMT_RDF_RANGE	0x8		/* out of range */
 #define SMT_RDF_AUTHOR	0x9		/* not autohorized */
 #define SMT_RDF_LENGTH	0x0a		/* length error */
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 534c0f3..0337b9d 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -79,7 +79,7 @@ MODULE_AUTHOR("Mike Phillips <mikep@linuxtr.net>") ;
 MODULE_DESCRIPTION("3Com 3C359 Velocity XL Token Ring Adapter Driver \n") ;
 MODULE_FIRMWARE(FW_NAME);
 
-/* Module paramters */
+/* Module parameters */
 
 /* Ring Speed 0,4,16 
  * 0 = Autosense   
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index 2e70ee8..46a2cc9 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -169,7 +169,7 @@ static char *open_min_error[] = {
 	"Monitor Contention failer for RPL", "FDX Protocol Error"
 };
 
-/* Module paramters */
+/* Module parameters */
 
 /* Ring Speed 0,4,16
  * 0 = Autosense         
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index d068a9d..2d819fc 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -132,7 +132,7 @@ static char *open_min_error[] = {"No error", "Function Failure", "Signal Lost",
 				   "Reserved", "Reserved", "No Monitor Detected for RPL", 
 				   "Monitor Contention failer for RPL", "FDX Protocol Error"};
 
-/* Module paramters */
+/* Module parameters */
 
 MODULE_AUTHOR("Mike Phillips <mikep@linuxtr.net>") ; 
 MODULE_DESCRIPTION("Olympic PCI/Cardbus Chipset Driver") ; 
diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c
index 6fcb500..61fe80d 100644
--- a/drivers/net/ucc_geth_ethtool.c
+++ b/drivers/net/ucc_geth_ethtool.c
@@ -7,7 +7,7 @@
  *
  * Limitation:
  * Can only get/set setttings of the first queue.
- * Need to re-open the interface manually after changing some paramters.
+ * Need to re-open the interface manually after changing some parameters.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h
index a631613..d83e379 100644
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -235,7 +235,7 @@ void rt2x00link_reset_tuner(struct rt2x00_dev *rt2x00dev, bool antenna);
  * @rt2x00dev: Pointer to &struct rt2x00_dev.
  *
  * Initialize work structure and all link tuning related
- * paramters. This will not start the link tuning process itself.
+ * parameters. This will not start the link tuning process itself.
  */
 void rt2x00link_register(struct rt2x00_dev *rt2x00dev);
 
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index e55b339..fa2821b 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -138,7 +138,7 @@ psa_read(struct net_device *	dev,
 
 /*------------------------------------------------------------------*/
 /*
- * Write the Paramter Storage Area to the WaveLAN card's memory
+ * Write the Parameter Storage Area to the WaveLAN card's memory
  */
 static void
 psa_write(struct net_device *	dev,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 8032c5a..679adff 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -827,8 +827,8 @@ lpfc_cmd_blksize(struct scsi_cmnd *sc)
  * @reftag:         out: ref tag (reference tag)
  *
  * Description:
- *   Extract DIF paramters from the command if possible.  Otherwise,
- *   use default paratmers.
+ *   Extract DIF parameters from the command if possible.  Otherwise,
+ *   use default parameters.
  *
  **/
 static inline void
diff --git a/sound/pci/sis7019.h b/sound/pci/sis7019.h
index 013b673..bc8c768 100644
--- a/sound/pci/sis7019.h
+++ b/sound/pci/sis7019.h
@@ -203,7 +203,7 @@
 #define SIS_WEISR_B	0xac
 
 
-/* Playback DMA parameters (paramter RAM) */
+/* Playback DMA parameters (parameter RAM) */
 #define SIS_PLAY_DMA_OFFSET	0x0000
 #define SIS_PLAY_DMA_SIZE	0x10
 #define SIS_PLAY_DMA_ADDR(addr, num) \
@@ -228,7 +228,7 @@
 #define		SIS_PLAY_DMA_SSO_MASK		0xffff0000
 #define		SIS_PLAY_DMA_ESO_MASK		0x0000ffff
 
-/* Capture DMA parameters (paramter RAM) */
+/* Capture DMA parameters (parameter RAM) */
 #define SIS_CAPTURE_DMA_OFFSET	0x0800
 #define SIS_CAPTURE_DMA_SIZE	0x10
 #define SIS_CAPTURE_DMA_ADDR(addr, num) \
-- 
cgit v0.10.2


From 19af5cdb7c79ff5ec96a99893ffb7f894f4a3dc1 Mon Sep 17 00:00:00 2001
From: Martin Olsson <martin@minimum.se>
Date: Thu, 23 Apr 2009 11:37:37 +0200
Subject: trivial: fix typo milisecond/millisecond for documentation and source
 comments.

Signed-off-by: Martin Olsson <martin@minimum.se>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 72968cd..8bb3723 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -698,8 +698,8 @@ very often is not. Abundant use of the inline keyword leads to a much bigger
 kernel, which in turn slows the system as a whole down, due to a bigger
 icache footprint for the CPU and simply because there is less memory
 available for the pagecache. Just think about it; a pagecache miss causes a
-disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles
-that can go into these 5 miliseconds.
+disk seek, which easily takes 5 milliseconds. There are a LOT of cpu cycles
+that can go into these 5 milliseconds.
 
 A reasonable rule of thumb is to not put inline at functions that have more
 than 3 lines of code in them. An exception to this rule are the cases where
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 757e595..ae1cae3 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -577,7 +577,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 
 		/*
 		 * If necessary schedule the packet transfer to occur 'timeout'
-		 * miliseconds later in ide_delayed_transfer_pc() after the
+		 * milliseconds later in ide_delayed_transfer_pc() after the
 		 * device says it's ready for a packet.
 		 */
 		if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) {
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 3083338..47dbfe2 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -502,7 +502,7 @@ tone_off:
 			break;
 		}
 		dsp->cmx_delay = (*((int *)data)) << 3;
-			/* miliseconds to samples */
+			/* milliseconds to samples */
 		if (dsp->cmx_delay >= (CMX_BUFF_HALF>>1))
 			/* clip to half of maximum usable buffer
 			(half of half buffer) */
diff --git a/drivers/net/ipg.h b/drivers/net/ipg.h
index dd9318f..dfc2541 100644
--- a/drivers/net/ipg.h
+++ b/drivers/net/ipg.h
@@ -514,7 +514,7 @@ enum ipg_regs {
 #define		IPG_DMALIST_ALIGN_PAD	0x07
 #define		IPG_MULTICAST_HASHTABLE_SIZE	0x40
 
-/* Number of miliseconds to wait after issuing a software reset.
+/* Number of milliseconds to wait after issuing a software reset.
  * 0x05 <= IPG_AC_RESETWAIT to account for proper 10Mbps operation.
  */
 #define         IPG_AC_RESETWAIT             0x05
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 19ae084..18fd975 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -116,7 +116,7 @@ static void zfcp_wka_port_put(struct zfcp_wka_port *wka_port)
 {
 	if (atomic_dec_return(&wka_port->refcount) != 0)
 		return;
-	/* wait 10 miliseconds, other reqs might pop in */
+	/* wait 10 milliseconds, other reqs might pop in */
 	schedule_delayed_work(&wka_port->work, HZ / 100);
 }
 
diff --git a/drivers/scsi/dpt/osd_util.h b/drivers/scsi/dpt/osd_util.h
index 4b56c04..b2613c2 100644
--- a/drivers/scsi/dpt/osd_util.h
+++ b/drivers/scsi/dpt/osd_util.h
@@ -342,7 +342,7 @@ uLONG osdGetThreadID(void);
 /* wakes up the specifed thread */
 void osdWakeThread(uLONG);
 
-/* osd sleep for x miliseconds */
+/* osd sleep for x milliseconds */
 void osdSleep(uLONG);
 
 #define DPT_THREAD_PRIORITY_LOWEST 0x00
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index eabf20e..db964db 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -102,7 +102,7 @@ struct edgeport_port {
 	__u8 shadow_mcr;
 	__u8 shadow_lsr;
 	__u8 lsr_mask;
-	__u32 ump_read_timeout;		/* Number of miliseconds the UMP will
+	__u32 ump_read_timeout;		/* Number of milliseconds the UMP will
 					   wait without data before completing
 					   a read short */
 	int baud_rate;
diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c
index c0efe44..6416d3f 100644
--- a/sound/pci/vx222/vx222_ops.c
+++ b/sound/pci/vx222/vx222_ops.c
@@ -367,7 +367,7 @@ static int vx2_load_xilinx_binary(struct vx_core *chip, const struct firmware *x
 	unsigned int port;
 	const unsigned char *image;
 
-	/* XILINX reset (wait at least 1 milisecond between reset on and off). */
+	/* XILINX reset (wait at least 1 millisecond between reset on and off). */
 	vx_outl(chip, CNTRL, VX_CNTRL_REGISTER_VALUE | VX_XILINX_RESET_MASK);
 	vx_inl(chip, CNTRL);
 	msleep(10);
-- 
cgit v0.10.2


From 19f594600110377ec4037fdf7fb93a25ec516212 Mon Sep 17 00:00:00 2001
From: Matt LaPlante <kernel1@cyberdogtech.com>
Date: Mon, 27 Apr 2009 15:06:31 +0200
Subject: trivial: Miscellaneous documentation typo fixes

Fix various typos in documentation txts.

Signed-off-by: Matt LaPlante <kernel1@cyberdogtech.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 25fb8bc..5aceb88 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -676,8 +676,8 @@ this directory the following files can currently be found:
 	dma-api/all_errors	This file contains a numeric value. If this
 				value is not equal to zero the debugging code
 				will print a warning for every error it finds
-				into the kernel log. Be carefull with this
-				option. It can easily flood your logs.
+				into the kernel log. Be careful with this
+				option, as it can easily flood your logs.
 
 	dma-api/disabled	This read-only file contains the character 'Y'
 				if the debugging code is disabled. This can
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 6389dec..93cb28d 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -118,7 +118,7 @@ to another chain) checking the final 'nulls' value if
 the lookup met the end of chain. If final 'nulls' value
 is not the slot number, then we must restart the lookup at
 the beginning. If the object was moved to the same chain,
-then the reader doesnt care : It might eventually
+then the reader doesn't care : It might eventually
 scan the list again without harm.
 
 
diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt
index 6fc6560..561826f 100644
--- a/Documentation/SM501.txt
+++ b/Documentation/SM501.txt
@@ -5,7 +5,7 @@ Copyright 2006, 2007 Simtec Electronics
 
 The Silicon Motion SM501 multimedia companion chip is a multifunction device
 which may provide numerous interfaces including USB host controller USB gadget,
-Asyncronous Serial ports, Audio functions and a dual display video interface.
+asynchronous serial ports, audio functions, and a dual display video interface.
 The device may be connected by PCI or local bus with varying functions enabled.
 
 Core
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index 7257676..2d82c80 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt
@@ -58,7 +58,7 @@ same criteria as reads.
 front_merges	(bool)
 ------------
 
-Sometimes it happens that a request enters the io scheduler that is contigious
+Sometimes it happens that a request enters the io scheduler that is contiguous
 with a request that is already on the queue. Either it fits in the back of that
 request, or it fits at the front. That is called either a back merge candidate
 or a front merge candidate. Due to the way files are typically laid out,
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt
index 000b0fb..d0d042c 100644
--- a/Documentation/braille-console.txt
+++ b/Documentation/braille-console.txt
@@ -27,7 +27,7 @@ parameter.
 
 For simplicity, only one braille console can be enabled, other uses of
 console=brl,... will be discarded.  Also note that it does not interfere with
-the console selection mecanism described in serial-console.txt
+the console selection mechanism described in serial-console.txt
 
 For now, only the VisioBraille device is supported.
 
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 387b8a7..d79aead 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -188,7 +188,7 @@ For example, you can do something like the following.
 
   void my_midlayer_destroy_something()
   {
-	devres_release_group(dev, my_midlayer_create_soemthing);
+	devres_release_group(dev, my_midlayer_create_something);
   }
 
 
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 8eda3fb..06f8f46 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -23,8 +23,8 @@ first time, it was renamed to 'EDAC'.
 The bluesmoke project at sourceforge.net is now utilized as a 'staging area'
 for EDAC development, before it is sent upstream to kernel.org
 
-At the bluesmoke/EDAC project site, is a series of quilt patches against
-recent kernels, stored in a SVN respository. For easier downloading, there
+At the bluesmoke/EDAC project site is a series of quilt patches against
+recent kernels, stored in a SVN repository. For easier downloading, there
 is also a tarball snapshot available.
 
 ============================================================================
@@ -73,9 +73,9 @@ the vendor should tie the parity status bits to 0 if they do not intend
 to generate parity.  Some vendors do not do this, and thus the parity bit
 can "float" giving false positives.
 
-In the kernel there is a pci device attribute located in sysfs that is
+In the kernel there is a PCI device attribute located in sysfs that is
 checked by the EDAC PCI scanning code. If that attribute is set,
-PCI parity/error scannining is skipped for that device. The attribute
+PCI parity/error scanning is skipped for that device. The attribute
 is:
 
 	broken_parity_status
diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt
index c87bfe5..b994c3b 100644
--- a/Documentation/fb/sh7760fb.txt
+++ b/Documentation/fb/sh7760fb.txt
@@ -1,7 +1,7 @@
 SH7760/SH7763 integrated LCDC Framebuffer driver
 ================================================
 
-0. Overwiew
+0. Overview
 -----------
 The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
 supports (in theory) resolutions ranging from 1x1 to 1024x1024,
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
index c634174..8f78ded 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -369,7 +369,7 @@ The call requires an initialized struct autofs_dev_ioctl. There are two
 possible variations. Both use the path field set to the path of the mount
 point to check and the size field adjusted appropriately. One uses the
 ioctlfd field to identify a specific mount point to check while the other
-variation uses the path and optionaly arg1 set to an autofs mount type.
+variation uses the path and optionally arg1 set to an autofs mount type.
 The call returns 1 if this is a mount point and sets arg1 to the device
 number of the mount and field arg2 to the relevant super block magic
 number (described below) or 0 if it isn't a mountpoint. In both cases
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 4db125b..2666b1e 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -184,7 +184,7 @@ This has the following fields:
      have index children.
 
      If this function is not supplied or if it returns NULL then the first
-     cache in the parent's list will be chosed, or failing that, the first
+     cache in the parent's list will be chosen, or failing that, the first
      cache in the master list.
 
  (4) A function to retrieve an object's key from the netfs [mandatory].
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 97882df..608fdba 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -294,7 +294,7 @@ max_batch_time=usec	Maximum amount of time ext4 should wait for
 			amount of time (on average) that it takes to
 			finish committing a transaction.  Call this time
 			the "commit time".  If the time that the
-			transactoin has been running is less than the
+			transaction has been running is less than the
 			commit time, ext4 will try sleeping for the
 			commit time to see if other operations will join
 			the transaction.   The commit time is capped by
@@ -328,7 +328,7 @@ noauto_da_alloc		replacing existing files via patterns such as
 			journal commit, in the default data=ordered
 			mode, the data blocks of the new file are forced
 			to disk before the rename() operation is
-			commited.  This provides roughly the same level
+			committed.  This provides roughly the same level
 			of guarantees as ext3, and avoids the
 			"zero-length" problem that can happen when a
 			system crashes before the delayed allocation
@@ -358,7 +358,7 @@ written to the journal first, and then to its final location.
 In the event of a crash, the journal can be replayed, bringing both data and
 metadata into a consistent state.  This mode is the slowest except when data
 needs to be read from and written to disk at the same time where it
-outperforms all others modes.  Curently ext4 does not have delayed
+outperforms all others modes.  Currently ext4 does not have delayed
 allocation support if this data journalling mode is selected.
 
 References
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
index 1e3defc..606233c 100644
--- a/Documentation/filesystems/fiemap.txt
+++ b/Documentation/filesystems/fiemap.txt
@@ -204,7 +204,7 @@ fiemap_check_flags() helper:
 
 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
 
-The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The
+The struct fieinfo should be passed in as received from ioctl_fiemap(). The
 set of fiemap flags which the fs understands should be passed via fs_flags. If
 fiemap_check_flags finds invalid user flags, it will place the bad values in
 fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
index 85eaead..e386f7e 100644
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -100,7 +100,7 @@ Installation
     $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
 
     In this location, mount.nfs will be invoked automatically for NFS mounts
-    by the system mount commmand.
+    by the system mount command.
 
     NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
     on the NFS client machine. You do not need this specific version of
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ce84cfc..cd8717a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -366,7 +366,7 @@ just those considered 'most important'.  The new vectors are:
   RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are
   sent from one CPU to another per the needs of the OS.  Typically,
   their statistics are used by kernel developers and interested users to
-  determine the occurance of interrupt of the given type.
+  determine the occurrence of interrupts of the given type.
 
 The above IRQ vectors are displayed only when relevent.  For example,
 the threshold vector does not exist on x86_64 platforms.  Others are
@@ -551,7 +551,7 @@ Committed_AS: The amount of memory presently allocated on the system.
               memory once that memory has been successfully allocated.
 VmallocTotal: total size of vmalloc memory area
  VmallocUsed: amount of vmalloc area which is used
-VmallocChunk: largest contigious block of vmalloc area which is free
+VmallocChunk: largest contiguous block of vmalloc area which is free
 
 ..............................................................................
 
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 26e4b8b..85354b3 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -72,7 +72,7 @@ The 'rom' file is special in that it provides read-only access to the device's
 ROM file, if available.  It's disabled by default, however, so applications
 should write the string "1" to the file to enable it before attempting a read
 call, and disable it following the access by writing "0" to the file.  Note
-that the device must be enabled for a rom read to return data succesfully.
+that the device must be enabled for a rom read to return data successfully.
 In the event a driver is not bound to the device, it can be enabled using the
 'enable' file, documented above.
 
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 3a5ddc9..5147be5 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -124,10 +124,10 @@ sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as
 flush         -- If set, the filesystem will try to flush to disk more
 		 early than normal. Not set by default.
 
-rodir	      -- FAT has the ATTR_RO (read-only) attribute. But on Windows,
-		 the ATTR_RO of the directory will be just ignored actually,
-		 and is used by only applications as flag. E.g. it's setted
-		 for the customized folder.
+rodir	      -- FAT has the ATTR_RO (read-only) attribute. On Windows,
+		 the ATTR_RO of the directory will just be ignored,
+		 and is used only by applications as a flag (e.g. it's set
+		 for the customized folder).
 
 		 If you want to use ATTR_RO as read-only flag even for
 		 the directory, set this option.
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 145c25a..e4b6985 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -458,7 +458,7 @@ debugfs interface, since it provides control over GPIO direction and
 value instead of just showing a gpio state summary.  Plus, it could be
 present on production systems without debugging support.
 
-Given approprate hardware documentation for the system, userspace could
+Given appropriate hardware documentation for the system, userspace could
 know for example that GPIO #23 controls the write protect line used to
 protect boot loader segments in flash memory.  System upgrade procedures
 may need to temporarily remove that protection, first importing a GPIO,
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 3f4bc84..cab61d8 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -108,7 +108,7 @@ There are two possible methods of using Kdump.
 
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
-   only with the architecutres which support a relocatable kernel. As
+   only with the architectures which support a relocatable kernel. As
    of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
    kernel.
 
@@ -222,7 +222,7 @@ Dump-capture kernel config options (Arch Dependent, ia64)
 ----------------------------------------------------------
 
 - No specific options are required to create a dump-capture kernel
-  for ia64, other than those specified in the arch idependent section
+  for ia64, other than those specified in the arch independent section
   above. This means that it is possible to use the system kernel
   as a dump-capture kernel if desired.
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7bcdebf..24d726f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1076,7 +1076,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	kgdboc=		[HW] kgdb over consoles.
 			Requires a tty driver that supports console polling.
-			(only serial suported for now)
+			(only serial supported for now)
 			Format: <serial_device>[,baud]
 
 	kmac=		[MIPS] korina ethernet MAC address.
@@ -1405,7 +1405,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			('y', default) or cooked coordinates ('n')
 
 	mtrr_chunk_size=nn[KMG] [X86]
-			used for mtrr cleanup. It is largest continous chunk
+			used for mtrr cleanup. It is largest continuous chunk
 			that could hold holes aka. UC entries.
 
 	mtrr_gran_size=nn[KMG] [X86]
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index b2e3745..c79ab99 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -132,7 +132,7 @@ kobject_name():
     const char *kobject_name(const struct kobject * kobj);
 
 There is a helper function to both initialize and add the kobject to the
-kernel at the same time, called supprisingly enough kobject_init_and_add():
+kernel at the same time, called surprisingly enough kobject_init_and_add():
 
     int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
                              struct kobject *parent, const char *fmt, ...);
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt
index 5ee2a02..0768fcc 100644
--- a/Documentation/laptops/acer-wmi.txt
+++ b/Documentation/laptops/acer-wmi.txt
@@ -40,7 +40,7 @@ NOTE: The Acer Aspire One is not supported hardware. It cannot work with
 acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been
 blacklisted until that happens.
 
-Please see the website for the current list of known working hardare:
+Please see the website for the current list of known working hardware:
 
 http://code.google.com/p/aceracpi/wiki/SupportedHardware
 
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt
index 8b2bc15..23ce7d3 100644
--- a/Documentation/laptops/sony-laptop.txt
+++ b/Documentation/laptops/sony-laptop.txt
@@ -22,7 +22,7 @@ If your laptop model supports it, you will find sysfs files in the
 /sys/class/backlight/sony/
 directory. You will be able to query and set the current screen
 brightness:
-	brightness		get/set screen brightness (an iteger
+	brightness		get/set screen brightness (an integer
 				between 0 and 7)
 	actual_brightness	reading from this file will query the HW
 				to get real brightness value
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index e7e9a690..78e354b 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -506,7 +506,7 @@ generate input device EV_KEY events.
 In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
 events for switches:
 
-SW_RFKILL_ALL	T60 and later hardare rfkill rocker switch
+SW_RFKILL_ALL	T60 and later hardware rfkill rocker switch
 SW_TABLET_MODE	Tablet ThinkPads HKEY events 0x5009 and 0x500A
 
 Non hot-key ACPI HKEY event map:
diff --git a/Documentation/local_ops.txt b/Documentation/local_ops.txt
index 23045b8..300da4b 100644
--- a/Documentation/local_ops.txt
+++ b/Documentation/local_ops.txt
@@ -34,7 +34,7 @@ out of order wrt other memory writes by the owner CPU.
 
 It can be done by slightly modifying the standard atomic operations : only
 their UP variant must be kept. It typically means removing LOCK prefix (on
-i386 and x86_64) and any SMP sychronization barrier. If the architecture does
+i386 and x86_64) and any SMP synchronization barrier. If the architecture does
 not have a different behavior between SMP and UP, including asm-generic/local.h
 in your architecture's local.h is sufficient.
 
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 4c2ecf5..bbc8a6a 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -73,13 +73,13 @@ this phase is triggered automatically. ACPI can notify this event. If not,
 (see Section 4.).
 
 Logical Memory Hotplug phase is to change memory state into
-avaiable/unavailable for users. Amount of memory from user's view is
+available/unavailable for users. Amount of memory from user's view is
 changed by this phase. The kernel makes all memory in it as free pages
 when a memory range is available.
 
 In this document, this phase is described as online/offline.
 
-Logical Memory Hotplug phase is triggred by write of sysfs file by system
+Logical Memory Hotplug phase is triggered by write of sysfs file by system
 administrator. For the hot-add case, it must be executed after Physical Hotplug
 phase by hand.
 (However, if you writes udev's hotplug scripts for memory hotplug, these
@@ -334,7 +334,7 @@ MEMORY_CANCEL_ONLINE
   Generated if MEMORY_GOING_ONLINE fails.
 
 MEMORY_ONLINE
-  Generated when memory has succesfully brought online. The callback may
+  Generated when memory has successfully brought online. The callback may
   allocate pages from the new memory.
 
 MEMORY_GOING_OFFLINE
@@ -359,7 +359,7 @@ The third argument is passed by pointer of struct memory_notify.
 struct memory_notify {
        unsigned long start_pfn;
        unsigned long nr_pages;
-       int status_cahnge_nid;
+       int status_change_nid;
 }
 
 start_pfn is start_pfn of online/offline memory.
diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt
index 1fef1f0..d3507ba 100644
--- a/Documentation/mn10300/ABI.txt
+++ b/Documentation/mn10300/ABI.txt
@@ -26,7 +26,7 @@ registers and the stack. If the first argument is a 64-bit value, it will be
 passed in D0:D1. If the first argument is not a 64-bit value, but the second
 is, the second will be passed entirely on the stack and D1 will be unused.
 
-Arguments smaller than 32-bits are not coelesced within a register or a stack
+Arguments smaller than 32-bits are not coalesced within a register or a stack
 word. For example, two byte-sized arguments will always be passed in separate
 registers or word-sized stack slots.
 
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
index bdf93b7..274821b 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/mtd/nand_ecc.txt
@@ -50,7 +50,7 @@ byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
            cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
 
 This figure represents a sector of 256 bytes.
-cp is my abbreviaton for column parity, rp for row parity.
+cp is my abbreviation for column parity, rp for row parity.
 
 Let's start to explain column parity.
 cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
@@ -560,7 +560,7 @@ Measuring this code again showed big gain. When executing the original
 linux code 1 million times, this took about 1 second on my system.
 (using time to measure the performance). After this iteration I was back
 to 0.075 sec. Actually I had to decide to start measuring over 10
-million interations in order not to loose too much accuracy. This one
+million iterations in order not to lose too much accuracy. This one
 definitely seemed to be the jackpot!
 
 There is a little bit more room for improvement though. There are three
@@ -571,8 +571,8 @@ loop; This eliminates 3 statements per loop. Of course after the loop we
 need to correct by adding:
     rp4 ^= rp4_6;
     rp6 ^= rp4_6
-Furthermore there are 4 sequential assingments to rp8. This can be
-encoded slightly more efficient by saving tmppar before those 4 lines
+Furthermore there are 4 sequential assignments to rp8. This can be
+encoded slightly more efficiently by saving tmppar before those 4 lines
 and later do rp8 = rp8 ^ tmppar ^ notrp8;
 (where notrp8 is the value of rp8 before those 4 lines).
 Again a use of the commutative property of xor.
@@ -622,7 +622,7 @@ Not a big change, but every penny counts :-)
 Analysis 7
 ==========
 
-Acutally this made things worse. Not very much, but I don't want to move
+Actually this made things worse. Not very much, but I don't want to move
 into the wrong direction. Maybe something to investigate later. Could
 have to do with caching again.
 
@@ -642,7 +642,7 @@ Analysis 8
 This makes things worse. Let's stick with attempt 6 and continue from there.
 Although it seems that the code within the loop cannot be optimised
 further there is still room to optimize the generation of the ecc codes.
-We can simply calcualate the total parity. If this is 0 then rp4 = rp5
+We can simply calculate the total parity. If this is 0 then rp4 = rp5
 etc. If the parity is 1, then rp4 = !rp5;
 But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
 in the result byte and then do something like
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 0876275..d5181ce 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -221,7 +221,7 @@ ad_select
 
 		- Any slave's 802.3ad association state changes
 
-		- The bond's adminstrative state changes to up
+		- The bond's administrative state changes to up
 
 	count or 2
 
@@ -369,7 +369,7 @@ fail_over_mac
 		When this policy is used in conjuction with the mii
 		monitor, devices which assert link up prior to being
 		able to actually transmit and receive are particularly
-		susecptible to loss of the gratuitous ARP, and an
+		susceptible to loss of the gratuitous ARP, and an
 		appropriate updelay setting may be required.
 
 	follow or 2
@@ -1794,7 +1794,7 @@ target to query.
 generally referred to as "trunk failover."  This is a feature of the
 switch that causes the link state of a particular switch port to be set
 down (or up) when the state of another switch port goes down (or up).
-It's purpose is to propogate link failures from logically "exterior" ports
+Its purpose is to propagate link failures from logically "exterior" ports
 to the logically "interior" ports that bonding is able to monitor via
 miimon.  Availability and configuration for trunk failover varies by
 switch, but this can be a viable alternative to the ARP monitor when using
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 2035bc4..463d9e0 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -327,7 +327,7 @@ solution for a couple of reasons:
             return 1;
     }
 
-    /* paraniod check ... */
+    /* paranoid check ... */
     if (nbytes < sizeof(struct can_frame)) {
             fprintf(stderr, "read: incomplete CAN frame\n");
             return 1;
diff --git a/Documentation/networking/dm9000.txt b/Documentation/networking/dm9000.txt
index 65df3de..5552e2e 100644
--- a/Documentation/networking/dm9000.txt
+++ b/Documentation/networking/dm9000.txt
@@ -129,7 +129,7 @@ PHY Link state polling
 ----------------------
 
 The driver keeps track of the link state and informs the network core
-about link (carrier) availablilty. This is managed by several methods
+about link (carrier) availability. This is managed by several methods
 depending on the version of the chip and on which PHY is being used.
 
 For the internal PHY, the original (and currently default) method is
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
index 2451f55..63214b2 100644
--- a/Documentation/networking/l2tp.txt
+++ b/Documentation/networking/l2tp.txt
@@ -158,7 +158,7 @@ Sample Userspace Code
         }
         return 0;
 
-Miscellanous
+Miscellaneous
 ============
 
 The PPPoL2TP driver was developed as part of the OpenL2TP project by
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index a2ab6a0..87b3d15 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -74,7 +74,7 @@ dev->hard_start_xmit:
 	for this and return NETDEV_TX_LOCKED when the spin lock fails.
 	The locking there should also properly protect against 
 	set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated.
-	Dont use it for new drivers.
+	Don't use it for new drivers.
 
 	Context: Process with BHs disabled or BH (timer),
 	         will be called with interrupts disabled by netconsole.
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt
index 6a07e45..6e8ce09 100644
--- a/Documentation/networking/phonet.txt
+++ b/Documentation/networking/phonet.txt
@@ -36,7 +36,7 @@ Phonet packets have a common header as follows:
 On Linux, the link-layer header includes the pn_media byte (see below).
 The next 7 bytes are part of the network-layer header.
 
-The device ID is split: the 6 higher-order bits consitute the device
+The device ID is split: the 6 higher-order bits constitute the device
 address, while the 2 lower-order bits are used for multiplexing, as are
 the 8-bit object identifiers. As such, Phonet can be considered as a
 network layer with 6 bits of address space and 10 bits for transport
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index dcf3164..eaa1a25 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -89,7 +89,7 @@ added to this document when its support is enabled.
 Device drivers who provide their own built regulatory domain
 do not need a callback as the channels registered by them are
 the only ones that will be allowed and therefore *additional*
-cannels cannot be enabled.
+channels cannot be enabled.
 
 Example code - drivers hinting an alpha2:
 ------------------------------------------
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
index 82b7a43..5f83fd2 100644
--- a/Documentation/power/regulator/consumer.txt
+++ b/Documentation/power/regulator/consumer.txt
@@ -178,5 +178,5 @@ Consumers can uregister interest by calling :-
 int regulator_unregister_notifier(struct regulator *regulator,
 				struct notifier_block *nb);
 
-Regulators use the kernel notifier framework to send event to thier interested
+Regulators use the kernel notifier framework to send event to their interested
 consumers.
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
index bdcb332..0cded69 100644
--- a/Documentation/power/regulator/overview.txt
+++ b/Documentation/power/regulator/overview.txt
@@ -119,7 +119,7 @@ Some terms used in this document:-
                    battery power, USB power)
 
                    Regulator Domains: is the new current limit within the
-                   regulator operating parameters for input/ouput voltage.
+                   regulator operating parameters for input/output voltage.
 
                    If the regulator request passes all the constraint tests
                    then the new regulator value is applied.
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
index 2ebdc60..514b94f 100644
--- a/Documentation/power/s2ram.txt
+++ b/Documentation/power/s2ram.txt
@@ -63,7 +63,7 @@ hardware during resume operations where a value can be set that will
 survive a reboot.
 
 Consequence is that after a resume (even if it is successful) your system
-clock will have a value corresponding to the magic mumber instead of the
+clock will have a value corresponding to the magic number instead of the
 correct date/time! It is therefore advisable to use a program like ntp-date
 or rdate to reset the correct date/time from an external time source when
 using this trace option.
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
index 7b99636..b967cd9 100644
--- a/Documentation/power/userland-swsusp.txt
+++ b/Documentation/power/userland-swsusp.txt
@@ -109,7 +109,7 @@ unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
 still frozen when the device is being closed).
 
 Currently it is assumed that the userland utilities reading/writing the
-snapshot image from/to the kernel will use a swap parition, called the resume
+snapshot image from/to the kernel will use a swap partition, called the resume
 partition, or a swap file as storage space (if a swap file is used, the resume
 partition is the partition that holds this file).  However, this is not really
 required, as they can use, for example, a special (blank) suspend partition or
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index d16b7a1..8d999d8 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1356,7 +1356,7 @@ platforms are moved over to use the flattened-device-tree model.
     - phy-map           : 1 cell, optional, bitmap of addresses to probe the PHY
 			  for, used if phy-address is absent. bit 0x00000001 is
 			  MDIO address 0.
-			  For Axon it can be absent, thouugh my current driver
+			  For Axon it can be absent, though my current driver
 			  doesn't handle phy-address yet so for now, keep
 			  0x00ffffff in it.
     - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
@@ -1438,7 +1438,7 @@ platforms are moved over to use the flattened-device-tree model.
 
    The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
    in Xilinx Spartan and Virtex FPGAs.  The devices cover the whole range
-   of standard device types (network, serial, etc.) and miscellanious
+   of standard device types (network, serial, etc.) and miscellaneous
    devices (gpio, LCD, spi, etc).  Also, since these devices are
    implemented within the fpga fabric every instance of the device can be
    synthesised with different options that change the behaviour.
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
index 088fc47..160c752 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
@@ -19,7 +19,7 @@ Example:
 	reg = <119c0 30>;
      }
 
-* Properties common to mulitple CPM/QE devices
+* Properties common to multiple CPM/QE devices
 
 - fsl,cpm-command : This value is ORed with the opcode and command flag
                     to specify the device on which a CPM command operates.
diff --git a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
index b26b919..bcc30ba 100644
--- a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
@@ -1,6 +1,6 @@
 * Freescale MSI interrupt controller
 
-Reguired properities:
+Required properties:
 - compatible : compatible list, contains 2 entries,
   first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572,
   etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on
diff --git a/Documentation/powerpc/dts-bindings/fsl/pmc.txt b/Documentation/powerpc/dts-bindings/fsl/pmc.txt
index 02f6f43..07256b7 100644
--- a/Documentation/powerpc/dts-bindings/fsl/pmc.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/pmc.txt
@@ -15,8 +15,8 @@ Properties:
   compatible; all statements below that apply to "fsl,mpc8548-pmc" also
   apply to "fsl,mpc8641d-pmc".
 
-  Compatibility does not include bit assigments in SCCR/PMCDR/DEVDISR; these
-  bit assigments are indicated via the sleep specifier in each device's
+  Compatibility does not include bit assignments in SCCR/PMCDR/DEVDISR; these
+  bit assignments are indicated via the sleep specifier in each device's
   sleep property.
 
 - reg: For devices compatible with "fsl,mpc8349-pmc", the first resource
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
index 06da4d4..2031ddb 100644
--- a/Documentation/powerpc/qe_firmware.txt
+++ b/Documentation/powerpc/qe_firmware.txt
@@ -225,7 +225,7 @@ For example, to match the 8323, revision 1.0:
      soc.major = 1
      soc.minor = 0
 
-'padding' is neccessary for structure alignment.  This field ensures that the
+'padding' is necessary for structure alignment.  This field ensures that the
 'extended_modes' field is aligned on a 64-bit boundary.
 
 'extended_modes' is a bitfield that defines special functionality which has an
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 10711d9..1eb576a 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -1984,7 +1984,7 @@ break *$pc
 
 break *0x400618
 
-heres a really useful one for large programs
+Here's a really useful one for large programs
 rbr
 Set a breakpoint for all functions matching REGEXP
 e.g.
@@ -2211,7 +2211,7 @@ Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
 #5  0x51692c in readline_internal () at readline.c:521
 #6  0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ")
     at readline.c:349
-#7  0x4d7a8a in command_line_input (prrompt=0x564420 "(gdb) ", repeat=1,
+#7  0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
     annotation_suffix=0x4d6b44 "prompt") at top.c:2091
 #8  0x4d6cf0 in command_loop () at top.c:1345
 #9  0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
diff --git a/Documentation/scheduler/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt
index e2bae5a..3ac1e46 100644
--- a/Documentation/scheduler/sched-nice-design.txt
+++ b/Documentation/scheduler/sched-nice-design.txt
@@ -55,7 +55,7 @@ To sum it up: we always wanted to make nice levels more consistent, but
 within the constraints of HZ and jiffies and their nasty design level
 coupling to timeslices and granularity it was not really viable.
 
-The second (less frequent but still periodically occuring) complaint
+The second (less frequent but still periodically occurring) complaint
 about Linux's nice level support was its assymetry around the origo
 (which you can see demonstrated in the picture above), or more
 accurately: the fact that nice level behavior depended on the _absolute_
diff --git a/Documentation/scsi/aic79xx.txt b/Documentation/scsi/aic79xx.txt
index 683ccae..c014ecc 100644
--- a/Documentation/scsi/aic79xx.txt
+++ b/Documentation/scsi/aic79xx.txt
@@ -194,7 +194,7 @@ The following information is available in this file:
           - Packetized SCSI Protocol at 160MB/s and 320MB/s
           - Quick Arbitration Selection (QAS)
           - Retained Training Information (Rev B. ASIC only)
-        - Interrupt Coalessing
+        - Interrupt Coalescing
         - Initiator Mode (target mode not currently 
           supported)
         - Support for the PCI-X standard up to 133MHz
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
index 230e308..08e2b4d 100644
--- a/Documentation/scsi/ncr53c8xx.txt
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -206,7 +206,7 @@ of MOVE MEMORY instructions.
 The 896 and the 895A allows handling of the phase mismatch context from 
 SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor 
 until the C code has saved the context of the transfer).
-Implementing this without using LOAD/STORE instructions would be painfull 
+Implementing this without using LOAD/STORE instructions would be painful 
 and I didn't even want to try it.
 
 The 896 chip supports 64 bit PCI transactions and addressing, while the 
@@ -240,7 +240,7 @@ characteristics. This feature may also reduce average command latency.
 In order to really gain advantage of this feature, devices must have 
 a reasonable cache size (No miracle is to be expected for a low-end 
 hard disk with 128 KB or less).
-Some kown SCSI devices do not properly support tagged command queuing.
+Some known SCSI devices do not properly support tagged command queuing.
 Generally, firmware revisions that fix this kind of problems are available 
 at respective vendor web/ftp sites.
 All I can say is that the hard disks I use on my machines behave well with 
diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt
index 49ea5c5..eb9a7b9 100644
--- a/Documentation/scsi/sym53c8xx_2.txt
+++ b/Documentation/scsi/sym53c8xx_2.txt
@@ -206,7 +206,7 @@ characteristics. This feature may also reduce average command latency.
 In order to really gain advantage of this feature, devices must have 
 a reasonable cache size (No miracle is to be expected for a low-end 
 hard disk with 128 KB or less).
-Some kown old SCSI devices do not properly support tagged command queuing.
+Some known old SCSI devices do not properly support tagged command queuing.
 Generally, firmware revisions that fix this kind of problems are available 
 at respective vendor web/ftp sites.
 All I can say is that I never have had problem with tagged queuing using 
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d..ecb969b 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -754,7 +754,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     single_cmd  - Use single immediate commands to communicate with
 		codecs (for debugging only)
     enable_msi	- Enable Message Signaled Interrupt (MSI) (default = off)
-    power_save	- Automatic power-saving timtout (in second, 0 =
+    power_save	- Automatic power-saving timeout (in second, 0 =
 		disable)
     power_save_controller - Reset HD-audio controller in power-saving mode
 		(default = on)
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index 88b7433..71ac995 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -16,7 +16,7 @@ methods for the	HD-audio hardware.
 The HD-audio component consists of two parts: the controller chip and 
 the codec chips on the HD-audio bus.  Linux provides a single driver
 for all controllers, snd-hda-intel.  Although the driver name contains
-a word of a well-known harware vendor, it's not specific to it but for
+a word of a well-known hardware vendor, it's not specific to it but for
 all controller chips by other companies.  Since the HD-audio
 controllers are supposed to be compatible, the single snd-hda-driver
 should work in most cases.  But, not surprisingly, there are known
diff --git a/Documentation/sound/alsa/hda_codec.txt b/Documentation/sound/alsa/hda_codec.txt
index 34e87ec..de8efbc 100644
--- a/Documentation/sound/alsa/hda_codec.txt
+++ b/Documentation/sound/alsa/hda_codec.txt
@@ -114,7 +114,7 @@ For writing a sequence of verbs, use snd_hda_sequence_write().
 
 There are variants of cached read/write, snd_hda_codec_write_cache(),
 snd_hda_sequence_write_cache().  These are used for recording the
-register states for the power-mangement resume.  When no PM is needed,
+register states for the power-management resume.  When no PM is needed,
 these are equivalent with non-cached version.
 
 To retrieve the number of sub nodes connected to the given node, use
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index c302ddf..6fab2dc 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -358,7 +358,7 @@ nr_pdflush_threads
 The current number of pdflush threads.  This value is read-only.
 The value changes according to the number of dirty pages in the system.
 
-When neccessary, additional pdflush threads are created, one per second, up to
+When necessary, additional pdflush threads are created, one per second, up to
 nr_pdflush_threads_max.
 
 ==============================================================
@@ -565,7 +565,7 @@ swappiness
 
 This control is used to define how aggressive the kernel will swap
 memory pages.  Higher values will increase agressiveness, lower values
-descrease the amount of swap.
+decrease the amount of swap.
 
 The default value is 60.
 
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt
index e7c09ab..04763a3 100644
--- a/Documentation/timers/hpet.txt
+++ b/Documentation/timers/hpet.txt
@@ -7,7 +7,7 @@ by Intel and Microsoft which can be found at
 
 Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
 and up to 32 comparators.  Normally three or more comparators are provided,
-each of which can generate oneshot interupts and at least one of which has
+each of which can generate oneshot interrupts and at least one of which has
 additional hardware to support periodic interrupts.  The comparators are
 also called "timers", which can be misleading since usually timers are
 independent of each other ... these share a counter, complicating resets.
diff --git a/Documentation/timers/timer_stats.txt b/Documentation/timers/timer_stats.txt
index 20d368c..9bd00fc 100644
--- a/Documentation/timers/timer_stats.txt
+++ b/Documentation/timers/timer_stats.txt
@@ -62,7 +62,7 @@ Timerstats sample period: 3.888770 s
 
 The first column is the number of events, the second column the pid, the third
 column is the name of the process. The forth column shows the function which
-initialized the timer and in parantheses the callback function which was
+initialized the timer and in parenthesis the callback function which was
 executed on expiry.
 
     Thomas, Ingo
diff --git a/Documentation/usb/WUSB-Design-overview.txt b/Documentation/usb/WUSB-Design-overview.txt
index 4c3d62c..c480e9c 100644
--- a/Documentation/usb/WUSB-Design-overview.txt
+++ b/Documentation/usb/WUSB-Design-overview.txt
@@ -84,7 +84,7 @@ The different logical parts of this driver are:
 
       *UWB*: the Ultra-Wide-Band stack -- manages the radio and
       associated spectrum to allow for devices sharing it. Allows to
-      control bandwidth assingment, beaconing, scanning, etc
+      control bandwidth assignment, beaconing, scanning, etc
 
     *
 
@@ -184,7 +184,7 @@ and sends the replies and notifications back to the API
 [/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
 is chartered, among other things, to keep the tab of how the UWB radio
 neighborhood looks, creating and destroying devices as they show up or
-dissapear.
+disappear.
 
 Command execution is very simple: a command block is sent and a event
 block or reply is expected back. For sending/receiving command/events, a
@@ -333,7 +333,7 @@ read descriptors and move our data.
 
 *Device life cycle and keep alives*
 
-Everytime there is a succesful transfer to/from a device, we update a
+Every time there is a successful transfer to/from a device, we update a
 per-device activity timestamp. If not, every now and then we check and
 if the activity timestamp gets old, we ping the device by sending it a
 Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
@@ -411,7 +411,7 @@ context (wa_xfer) and submit it. When the xfer is done, our callback is
 called and we assign the status bits and release the xfer resources.
 
 In dequeue() we are basically cancelling/aborting the transfer. We issue
-a xfer abort request to the HC, cancell all the URBs we had submitted
+a xfer abort request to the HC, cancel all the URBs we had submitted
 and not yet done and when all that is done, the xfer callback will be
 called--this will call the URB callback.
 
diff --git a/Documentation/usb/anchors.txt b/Documentation/usb/anchors.txt
index 6f24f56..fe6a99a 100644
--- a/Documentation/usb/anchors.txt
+++ b/Documentation/usb/anchors.txt
@@ -27,7 +27,7 @@ Association and disassociation of URBs with anchors
 
 An association of URBs to an anchor is made by an explicit
 call to usb_anchor_urb(). The association is maintained until
-an URB is finished by (successfull) completion. Thus disassociation
+an URB is finished by (successful) completion. Thus disassociation
 is automatic. A function is provided to forcibly finish (kill)
 all URBs associated with an anchor.
 Furthermore, disassociation can be made with usb_unanchor_urb()
@@ -76,4 +76,4 @@ usb_get_from_anchor()
 Returns the oldest anchored URB of an anchor. The URB is unanchored
 and returned with a reference. As you may mix URBs to several
 destinations in one anchor you have no guarantee the chronologically
-first submitted URB is returned.
\ No newline at end of file
+first submitted URB is returned.
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt
index 914cb7e..4652c0f 100644
--- a/Documentation/video4linux/cx18.txt
+++ b/Documentation/video4linux/cx18.txt
@@ -11,7 +11,7 @@ encoder chip:
 2) Some people have problems getting the i2c bus to work.
    The symptom is that the eeprom cannot be read and the card is
    unusable. This is probably fixed, but if you have problems
-   then post to the video4linux or ivtv-users mailinglist.
+   then post to the video4linux or ivtv-users mailing list.
 
 3) VBI (raw or sliced) has not yet been implemented.
 
diff --git a/drivers/message/fusion/lsi/mpi_history.txt b/drivers/message/fusion/lsi/mpi_history.txt
index 693e4b5..fa9249b 100644
--- a/drivers/message/fusion/lsi/mpi_history.txt
+++ b/drivers/message/fusion/lsi/mpi_history.txt
@@ -130,7 +130,7 @@ mpi_ioc.h
  *  08-08-01  01.02.01  Original release for v1.2 work.
  *                      New format for FWVersion and ProductId in
  *                      MSG_IOC_FACTS_REPLY and MPI_FW_HEADER.
- *  08-31-01  01.02.02  Addded event MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE and
+ *  08-31-01  01.02.02  Added event MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE and
  *                      related structure and defines.
  *                      Added event MPI_EVENT_ON_BUS_TIMER_EXPIRED.
  *                      Added MPI_IOCINIT_FLAGS_DISCARD_FW_IMAGE.
@@ -190,7 +190,7 @@ mpi_ioc.h
  *  10-11-06  01.05.12  Added MPI_IOCFACTS_EXCEPT_METADATA_UNSUPPORTED.
  *                      Added MaxInitiators field to PortFacts reply.
  *                      Added SAS Device Status Change ReasonCode for
- *                      asynchronous notificaiton.
+ *                      asynchronous notification.
  *                      Added MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE and event
  *                      data structure.
  *                      Added new ImageType values for FWDownload and FWUpload
@@ -623,7 +623,7 @@ mpi_fc.h
  *  11-02-00  01.01.01  Original release for post 1.0 work
  *  12-04-00  01.01.02  Added messages for Common Transport Send and
  *                      Primitive Send.
- *  01-09-01  01.01.03  Modifed some of the new flags to have an MPI prefix
+ *  01-09-01  01.01.03  Modified some of the new flags to have an MPI prefix
  *                      and modified the FcPrimitiveSend flags.
  *  01-25-01  01.01.04  Move InitiatorIndex in LinkServiceRsp reply to a larger
  *                      field.
diff --git a/drivers/staging/go7007/go7007.txt b/drivers/staging/go7007/go7007.txt
index 9f6772b..1c2907c 100644
--- a/drivers/staging/go7007/go7007.txt
+++ b/drivers/staging/go7007/go7007.txt
@@ -2,7 +2,7 @@ This is a driver for the WIS GO7007SB multi-format video encoder.
 
 Pete Eberlein <pete@sensoray.com>
 
-The driver was orignally released under the GPL and is currently hosted at:
+The driver was originally released under the GPL and is currently hosted at:
 http://nikosapi.org/wiki/index.php/WIS_Go7007_Linux_driver
 The go7007 firmware can be acquired from the package on the site above.
 
@@ -24,7 +24,7 @@ These should be used instead of the non-standard GO7007 ioctls described
 below.
 
 
-The README files from the orignal package appear below:
+The README files from the original package appears below:
 
 ---------------------------------------------------------------------------
                      WIS GO7007SB Public Linux Driver
diff --git a/drivers/staging/panel/lcd-panel-cgram.txt b/drivers/staging/panel/lcd-panel-cgram.txt
index f9ceef4..7f82c90 100644
--- a/drivers/staging/panel/lcd-panel-cgram.txt
+++ b/drivers/staging/panel/lcd-panel-cgram.txt
@@ -3,7 +3,7 @@ characters 0 to 7. The escape code to define a new character is
 '\e[LG' followed by one digit from 0 to 7, representing the character
 number, and up to 8 couples of hex digits terminated by a semi-colon
 (';'). Each couple of digits represents a line, with 1-bits for each
-illuminated pixel with LSB on the right. Lines are numberred from the
+illuminated pixel with LSB on the right. Lines are numbered from the
 top of the character to the bottom. On a 5x7 matrix, only the 5 lower
 bits of the 7 first bytes are used for each character. If the string
 is incomplete, only complete lines will be redefined. Here are some
-- 
cgit v0.10.2


From 27af1da4b58675d5c6bacf9b7de9c2746687d272 Mon Sep 17 00:00:00 2001
From: "figo.zhang" <figo.zhang@kolorific.com>
Date: Fri, 17 Apr 2009 10:58:48 +0800
Subject: trivial: Documentation/rbtree.txt: cleanup kerneldoc of rbtree.txt

 The first formal parameter of the rb_link_node() is a pointer, and the
 "node" is define a data struct (pls see line 67 and line 73 in the
 doc), so the actual parameter should use "&data->node".

Signed-off-by: Figo.zhang <figo.zhang@kolorific.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 7224459b..7710214 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -131,8 +131,8 @@ Example:
   	}
 
   	/* Add new node and rebalance tree. */
-  	rb_link_node(data->node, parent, new);
-  	rb_insert_color(data->node, root);
+  	rb_link_node(&data->node, parent, new);
+  	rb_insert_color(&data->node, root);
 
 	return TRUE;
   }
@@ -146,10 +146,10 @@ To remove an existing node from a tree, call:
 
 Example:
 
-  struct mytype *data = mysearch(mytree, "walrus");
+  struct mytype *data = mysearch(&mytree, "walrus");
 
   if (data) {
-  	rb_erase(data->node, mytree);
+  	rb_erase(&data->node, &mytree);
   	myfree(data);
   }
 
-- 
cgit v0.10.2


From 76d93ff344f547c633fd7b2ee6511bad82c4616f Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Wed, 22 Apr 2009 13:38:58 +0530
Subject: trivial: fix typo in bio_alloc kernel doc

Fix typo in bio_alloc kernel doc.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/fs/bio.c b/fs/bio.c
index 5900021..5f80848 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -358,9 +358,9 @@ static void bio_kmalloc_destructor(struct bio *bio)
  *
  *   If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
  *   a bio. This is due to the mempool guarantees. To make this work, callers
- *   must never allocate more than 1 bio at the time from this pool. Callers
+ *   must never allocate more than 1 bio at a time from this pool. Callers
  *   that need to allocate more than 1 bio must always submit the previously
- *   allocate bio for IO before attempting to allocate a new one. Failure to
+ *   allocated bio for IO before attempting to allocate a new one. Failure to
  *   do so can cause livelocks under memory pressure.
  *
  **/
-- 
cgit v0.10.2


From 5cdcd9d691a4810ec3f5ed6b49e2bb24871c6907 Mon Sep 17 00:00:00 2001
From: Sankar P <sankar.curiosity@gmail.com>
Date: Tue, 12 May 2009 12:41:13 +0530
Subject: trivial: spelling fix in ppc code comments

Fixes a trivial spelling error in powerpc code comments.

Signed-off-by: Sankar P <sankar.curiosity@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 89497fb..3b52c80 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -2,7 +2,7 @@
  * PowerPC64 SLB support.
  *
  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- * Based on earlier code writteh by:
+ * Based on earlier code written by:
  * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
  *    Copyright (c) 2001 Dave Engebretsen
  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
-- 
cgit v0.10.2


From 190342335c2a7939407d7391e5bb6c9ee39244eb Mon Sep 17 00:00:00 2001
From: Wang Tinggong <wangtinggong@gmail.com>
Date: Thu, 14 May 2009 11:00:20 +0200
Subject: trivial: rbtree.txt: fix rb_entry() parameters in sample code

Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 7710214..aae8355 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -188,5 +188,5 @@ Example:
 
   struct rb_node *node;
   for (node = rb_first(&mytree); node; node = rb_next(node))
-  	printk("key=%s\n", rb_entry(node, int, keystring));
+	printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
 
-- 
cgit v0.10.2


From 0fa1b0a144ee3e57f63ae25a7c5402f57232853d Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 14 May 2009 23:15:22 +0200
Subject: trivial: fix grammo in bus_for_each_dev() kerneldoc

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index c659961..4b04a15 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -279,7 +279,7 @@ static struct device *next_device(struct klist_iter *i)
  *
  * NOTE: The device that returns a non-zero value is not retained
  * in any way, nor is its refcount incremented. If the caller needs
- * to retain this data, it should do, and increment the reference
+ * to retain this data, it should do so, and increment the reference
  * count in the supplied callback.
  */
 int bus_for_each_dev(struct bus_type *bus, struct device *start,
-- 
cgit v0.10.2


From 492d0f95e6927d60be6234c4b0dd500216e87e18 Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani <abogani@texware.it>
Date: Thu, 21 May 2009 19:54:33 +0200
Subject: trivial: input/misc: Fix typo in Kconfig

Signed-off-by: Alessio Igor Bogani <abogani@texware.it>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 5c0a631..06f46fc 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -232,7 +232,7 @@ config INPUT_GPIO_ROTARY_ENCODER
 	depends on GPIOLIB && GENERIC_GPIO
 	help
 	  Say Y here to add support for rotary encoders connected to GPIO lines.
-	  Check file:Documentation/incput/rotary_encoder.txt for more
+	  Check file:Documentation/input/rotary-encoder.txt for more
 	  information.
 
 	  To compile this driver as a module, choose M here: the
-- 
cgit v0.10.2


From 1944df6bff414c769b55b00cb0547b6f285884a1 Mon Sep 17 00:00:00 2001
From: Chris Sanford <crsanford@gmail.com>
Date: Wed, 27 May 2009 17:23:50 +0200
Subject: trivial: unnecessary (void*) cast removal in sound/oss/msnd.c

This is a trivial patch that removes an unnecessary void pointer cast.

Signed-off-by: Chris Sanford <crsanford@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/sound/oss/msnd.c b/sound/oss/msnd.c
index e4282d9..21eb6dc 100644
--- a/sound/oss/msnd.c
+++ b/sound/oss/msnd.c
@@ -100,7 +100,7 @@ void msnd_fifo_free(msnd_fifo *f)
 int msnd_fifo_alloc(msnd_fifo *f, size_t n)
 {
 	msnd_fifo_free(f);
-	f->data = (char *)vmalloc(n);
+	f->data = vmalloc(n);
 	f->n = n;
 	f->tail = 0;
 	f->head = 0;
-- 
cgit v0.10.2


From baf20b3e51913e8a5003d4e7a143934be8fe52b5 Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Mon, 1 Jun 2009 10:49:41 +0200
Subject: trivial: ftrace:fix description of trace directory

Fix trace source directory from kernel/tracing/ to kernel/trace/.

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 2a82d86..7bd27f0 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1834,4 +1834,4 @@ an error.
 -----------
 
 More details can be found in the source code, in the
-kernel/tracing/*.c files.
+kernel/trace/*.c files.
-- 
cgit v0.10.2


From b6e731d8774810033823e78e04dfe67aae13b61f Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sat, 30 May 2009 00:58:41 +0200
Subject: trivial: Fix Pavel's address in MAINTAINERS

Fix my address in MAINTAINERS.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/MAINTAINERS b/MAINTAINERS
index c944d61..8f6b38d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2351,7 +2351,7 @@ F:	fs/freevxfs/
 
 FREEZER
 P:	Pavel Machek
-M:	pavel@suse.cz
+M:	pavel@ucw.cz
 P:	Rafael J. Wysocki
 M:	rjw@sisk.pl
 L:	linux-pm@lists.linux-foundation.org
-- 
cgit v0.10.2


From 1b68bfc18b258f5a0f285f9101a84da502254768 Mon Sep 17 00:00:00 2001
From: Masanori Kobayasi <zap03216@nifty.ne.jp>
Date: Thu, 4 Jun 2009 21:12:29 +0900
Subject: trivial: Documentation/dell_rbu.txt: fix typos

Remove a period from end of command-line and fix misplaced comma.

Signed-off-by: Masanori Kobayasi <zap03216@nifty.ne.jp>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index c11b931..1517498 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -76,9 +76,9 @@ Do the steps below to download the BIOS image.
 
 The /sys/class/firmware/dell_rbu/ entries will remain till the following is
 done.
-echo -1 > /sys/class/firmware/dell_rbu/loading.
+echo -1 > /sys/class/firmware/dell_rbu/loading
 Until this step is completed the driver cannot be unloaded.
-Also echoing either mono ,packet or init in to image_type will free up the
+Also echoing either mono, packet or init in to image_type will free up the
 memory allocated by the driver.
 
 If a user by accident executes steps 1 and 3 above without executing step 2;
-- 
cgit v0.10.2


From 3226224039c8f8cb840d236b5f27d2a1104789e2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 4 Jun 2009 16:26:50 +0200
Subject: trivial: SubmittingPatches: fix typo

Fix typo.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index f309d3c..c282380 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -444,7 +444,7 @@ offer a Reviewed-by tag for a patch.  This tag serves to give credit to
 reviewers and to inform maintainers of the degree of review which has been
 done on the patch.  Reviewed-by: tags, when supplied by reviewers known to
 understand the subject area and to perform thorough reviews, will normally
-increase the liklihood of your patch getting into the kernel.
+increase the likelihood of your patch getting into the kernel.
 
 
 15) The canonical patch format
-- 
cgit v0.10.2


From 4737f0978d6e64eae468e01fa181abf6499e6b84 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Fri, 5 Jun 2009 00:44:53 +0200
Subject: trivial: Kconfig: .ko is normally not included in module names

.ko is normally not included in Kconfig help, make it consistent.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 3640cdc..bc9ce89 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -872,7 +872,7 @@ config BFIN_GPTIMERS
 	  are unsure, say N.
 
 	  To compile this driver as a module, choose M here: the module
-	  will be called gptimers.ko.
+	  will be called gptimers.
 
 choice
 	prompt "Uncached DMA region"
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index f42fa50..1b98cc5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -112,7 +112,7 @@ config GDROM
 	  with up to 1 GB of data. This drive will also read standard CD ROM
 	  disks. Select this option to access any disks in your GD ROM drive.
 	  Most users will want to say "Y" here.
-	  You can also build this as a module which will be called gdrom.ko
+	  You can also build this as a module which will be called gdrom.
 
 source "drivers/block/paride/Kconfig"
 
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 02ecfd5..067e9dc 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -692,7 +692,7 @@ config HVCS
 	  this driver.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called hvcs.ko.  Additionally, this module
+	  module will be called hvcs.  Additionally, this module
 	  will depend on arch specific APIs exported from hvcserver.ko
 	  which will also be compiled when this driver is built as a
 	  module.
diff --git a/drivers/connector/Kconfig b/drivers/connector/Kconfig
index 100bfd4..6e6730f 100644
--- a/drivers/connector/Kconfig
+++ b/drivers/connector/Kconfig
@@ -7,7 +7,7 @@ menuconfig CONNECTOR
 	  of the netlink socket protocol.
 
 	  Connector support can also be built as a module.  If so, the module
-	  will be called cn.ko.
+	  will be called cn.
 
 if CONNECTOR
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 01afd75..3e72a6a 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -34,7 +34,7 @@ config CRYPTO_DEV_PADLOCK_AES
 	  Available in VIA C3 and newer CPUs.
 
 	  If unsure say M. The compiled module will be
-	  called padlock-aes.ko
+	  called padlock-aes.
 
 config CRYPTO_DEV_PADLOCK_SHA
 	tristate "PadLock driver for SHA1 and SHA256 algorithms"
@@ -47,7 +47,7 @@ config CRYPTO_DEV_PADLOCK_SHA
 	  Available in VIA C7 and newer processors.
 
 	  If unsure say M. The compiled module will be
-	  called padlock-sha.ko
+	  called padlock-sha.
 
 config CRYPTO_DEV_GEODE
 	tristate "Support for the Geode LX AES engine"
@@ -79,7 +79,7 @@ config ZCRYPT_MONOLITHIC
 	bool "Monolithic zcrypt module"
 	depends on ZCRYPT="m"
 	help
-	  Select this option if you want to have a single module z90crypt.ko
+	  Select this option if you want to have a single module z90crypt,
 	  that contains all parts of the crypto device driver (ap bus,
 	  request router and all the card drivers).
 
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index cf06494..9a5d0aa 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -46,7 +46,7 @@ menuconfig IDE
 	  SMART parameters from disk drives.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called ide-core.ko.
+	  module will be called ide-core.
 
 	  For further information, please read <file:Documentation/ide/ide.txt>.
 
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index 60955a7..1bb66e1 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -216,7 +216,7 @@ config DVB_USB_TTUSB2
 	help
 	  Say Y here to support the Pinnacle 400e DVB-S USB2.0 receiver. The
 	  firmware protocol used by this module is similar to the one used by the
-	  old ttusb-driver - that's why the module is called dvb-usb-ttusb2.ko.
+	  old ttusb-driver - that's why the module is called dvb-usb-ttusb2.
 
 config DVB_USB_DTT200U
 	tristate "WideView WT-200U and WT-220U (pen) DVB-T USB2.0 support (Yakumo/Hama/Typhoon/Yuan)"
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 6fde0a2..325fab92 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -49,7 +49,7 @@ config MTD_MS02NV
 	  If you want to compile this driver as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want),
 	  say M here and read <file:Documentation/kbuild/modules.txt>.
-	  The module will be called ms02-nv.ko.
+	  The module will be called ms02-nv.
 
 config MTD_DATAFLASH
 	tristate "Support for AT45xxx DataFlash"
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 890936d..f327689 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -260,7 +260,7 @@ config MTD_NAND_BASLER_EXCITE
 	help
           This enables the driver for the NAND flash device found on the
           Basler eXcite Smart Camera. If built as a module, the driver
-          will be named "excite_nandflash.ko".
+          will be named excite_nandflash.
 
 config MTD_NAND_CAFE
 	tristate "NAND support for OLPC CAFÉ chip"
@@ -282,7 +282,7 @@ config MTD_NAND_CS553X
 	  controller is enabled for NAND, and currently requires that
 	  the controller be in MMIO mode.
 
-	  If you say "m", the module will be called "cs553x_nand.ko".
+	  If you say "m", the module will be called cs553x_nand.
 
 config MTD_NAND_ATMEL
 	tristate "Support for NAND Flash / SmartMedia on AT91 and AVR32"
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 214a92d..b4683ce 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1880,7 +1880,7 @@ config FEC_MPC52xx
 	---help---
 	  This option enables support for the MPC5200's on-chip
 	  Fast Ethernet Controller
-	  If compiled as module, it will be called 'fec_mpc52xx.ko'.
+	  If compiled as module, it will be called fec_mpc52xx.
 
 config FEC_MPC52xx_MDIO
 	bool "MPC52xx FEC MDIO bus driver"
@@ -1892,7 +1892,7 @@ config FEC_MPC52xx_MDIO
 	  (Motorola? industry standard).
 	  If your board uses an external PHY connected to FEC, enable this.
 	  If not sure, enable.
-	  If compiled as module, it will be called 'fec_mpc52xx_phy.ko'.
+	  If compiled as module, it will be called fec_mpc52xx_phy.
 
 config NE_H8300
 	tristate "NE2000 compatible support for H8/300"
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 3d94e7d..3359497 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -310,7 +310,7 @@ config PRISM54
 	  If you want to compile the driver as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want),
 	  say M here and read <file:Documentation/kbuild/modules.txt>.
-	  The module will be called prism54.ko.
+	  The module will be called prism54.
 
 config USB_ZD1201
 	tristate "USB ZD1201 based Wireless device support"
diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig
index 932d207..c15db22 100644
--- a/drivers/net/wireless/hostap/Kconfig
+++ b/drivers/net/wireless/hostap/Kconfig
@@ -29,7 +29,7 @@ config HOSTAP
 	PLX/PCI/CS version of the driver to actually use the driver.
 
 	The driver can be compiled as a module and it will be called
-	"hostap.ko".
+	hostap.
 
 config HOSTAP_FIRMWARE
 	bool "Support downloading firmware images with Host AP driver"
@@ -68,7 +68,7 @@ config HOSTAP_PLX
 	driver.
 
 	The driver can be compiled as a module and will be named
-	"hostap_plx.ko".
+	hostap_plx.
 
 config HOSTAP_PCI
 	tristate "Host AP driver for Prism2.5 PCI adaptors"
@@ -81,7 +81,7 @@ config HOSTAP_PCI
 	driver.
 
 	The driver can be compiled as a module and will be named
-	"hostap_pci.ko".
+	hostap_pci.
 
 config HOSTAP_CS
 	tristate "Host AP driver for Prism2/2.5/3 PC Cards"
@@ -94,4 +94,4 @@ config HOSTAP_CS
 	driver.
 
 	The driver can be compiled as a module and will be named
-	"hostap_cs.ko".
+	hostap_cs.
diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
index 8304f64..7361623 100644
--- a/drivers/net/wireless/iwlwifi/Kconfig
+++ b/drivers/net/wireless/iwlwifi/Kconfig
@@ -75,7 +75,7 @@ config IWLAGN
 	  If you want to compile the driver as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want),
 	  say M here and read <file:Documentation/kbuild/modules.txt>.  The
-	  module will be called iwlagn.ko.
+	  module will be called iwlagn.
 
 
 config IWL4965
@@ -113,7 +113,7 @@ config IWL3945
 	  If you want to compile the driver as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want),
 	  say M here and read <file:Documentation/kbuild/modules.txt>.  The
-	  module will be called iwl3945.ko.
+	  module will be called iwl3945.
 
 config IWL3945_SPECTRUM_MEASUREMENT
 	bool "Enable Spectrum Measurement in iwl3945 driver"
diff --git a/drivers/net/wireless/rt2x00/Kconfig b/drivers/net/wireless/rt2x00/Kconfig
index bfc5d9c..1ae11c7 100644
--- a/drivers/net/wireless/rt2x00/Kconfig
+++ b/drivers/net/wireless/rt2x00/Kconfig
@@ -9,11 +9,11 @@ menuconfig RT2X00
 
 	  When building one of the individual drivers, the rt2x00 library
 	  will also be created. That library (when the driver is built as
-	  a module) will be called "rt2x00lib.ko".
+	  a module) will be called rt2x00lib.
 
 	  Additionally PCI and USB libraries will also be build depending
 	  on the types of drivers being selected, these libraries will be
-	  called "rt2x00pci.ko" and "rt2x00usb.ko".
+	  called rt2x00pci and rt2x00usb.
 
 if RT2X00
 
@@ -26,7 +26,7 @@ config RT2400PCI
 	  This adds support for rt2400 wireless chipset family.
 	  Supported chips: RT2460.
 
-	  When compiled as a module, this driver will be called "rt2400pci.ko".
+	  When compiled as a module, this driver will be called rt2400pci.
 
 config RT2500PCI
 	tristate "Ralink rt2500 (PCI/PCMCIA) support"
@@ -37,7 +37,7 @@ config RT2500PCI
 	  This adds support for rt2500 wireless chipset family.
 	  Supported chips: RT2560.
 
-	  When compiled as a module, this driver will be called "rt2500pci.ko".
+	  When compiled as a module, this driver will be called rt2500pci.
 
 config RT61PCI
 	tristate "Ralink rt2501/rt61 (PCI/PCMCIA) support"
@@ -51,7 +51,7 @@ config RT61PCI
 	  This adds support for rt2501 wireless chipset family.
 	  Supported chips: RT2561, RT2561S & RT2661.
 
-	  When compiled as a module, this driver will be called "rt61pci.ko".
+	  When compiled as a module, this driver will be called rt61pci.
 
 config RT2500USB
 	tristate "Ralink rt2500 (USB) support"
@@ -62,7 +62,7 @@ config RT2500USB
 	  This adds support for rt2500 wireless chipset family.
 	  Supported chips: RT2571 & RT2572.
 
-	  When compiled as a module, this driver will be called "rt2500usb.ko".
+	  When compiled as a module, this driver will be called rt2500usb.
 
 config RT73USB
 	tristate "Ralink rt2501/rt73 (USB) support"
@@ -75,7 +75,7 @@ config RT73USB
 	  This adds support for rt2501 wireless chipset family.
 	  Supported chips: RT2571W, RT2573 & RT2671.
 
-	  When compiled as a module, this driver will be called "rt73usb.ko".
+	  When compiled as a module, this driver will be called rt73usb.
 
 config RT2X00_LIB_PCI
 	tristate
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index a7745c8..cb909a5 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -8,7 +8,7 @@ config LCS
 	   Select this option if you want to use LCS networking on IBM System z.
 	   This device driver supports Token Ring (IEEE 802.5),
 	   FDDI (IEEE 802.7) and Ethernet.
-	   To compile as a module, choose M. The module name is lcs.ko.
+	   To compile as a module, choose M. The module name is lcs.
 	   If you do not know what it is, it's safe to choose Y.
 
 config CTCM
@@ -21,7 +21,7 @@ config CTCM
 	  It also supports virtual CTCs when running under VM.
 	  This driver also supports channel-to-channel MPC SNA devices.
 	  MPC is an SNA protocol device used by Communication Server for Linux.
-	  To compile as a module, choose M. The module name is ctcm.ko.
+	  To compile as a module, choose M. The module name is ctcm.
 	  To compile into the kernel, choose Y.
 	  If you do not need any channel-to-channel connection, choose N.
 
@@ -34,7 +34,7 @@ config NETIUCV
 	  link between VM guests. Using ifconfig a point-to-point connection
 	  can be established to the Linux on IBM System z
 	  running on the other VM guest. To compile as a module, choose M.
-	  The module name is netiucv.ko. If unsure, choose Y.
+	  The module name is netiucv. If unsure, choose Y.
 
 config SMSGIUCV
 	tristate "IUCV special message support (VM only)"
@@ -50,7 +50,7 @@ config CLAW
 	  This driver supports channel attached CLAW devices.
 	  CLAW is Common Link Access for Workstation.  Common devices
           that use CLAW are RS/6000s, Cisco Routers (CIP) and 3172 devices.
-	  To compile as a module, choose M. The module name is claw.ko.
+	  To compile as a module, choose M. The module name is claw.
 	  To compile into the kernel, choose Y.
 
 config QETH
@@ -65,14 +65,14 @@ config QETH
 	  <http://www.ibm.com/developerworks/linux/linux390>
 
 	  To compile this driver as a module, choose M.
-	  The module name is qeth.ko.
+	  The module name is qeth.
 
 config QETH_L2
         tristate "qeth layer 2 device support"
         depends on QETH
         help
           Select this option to be able to run qeth devices in layer 2 mode.
-          To compile as a module, choose M. The module name is qeth_l2.ko.
+          To compile as a module, choose M. The module name is qeth_l2.
           If unsure, choose y.
 
 config QETH_L3
@@ -80,7 +80,7 @@ config QETH_L3
         depends on QETH
         help
           Select this option to be able to run qeth devices in layer 3 mode.
-          To compile as a module choose M. The module name is qeth_l3.ko.
+          To compile as a module choose M. The module name is qeth_l3.
           If unsure, choose Y.
 
 config QETH_IPV6
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 641e800..1132c5c 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -861,7 +861,7 @@ config SERIAL_UARTLITE
 	  Say Y here if you want to use the Xilinx uartlite serial controller.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called uartlite.ko.
+	  module will be called uartlite.
 
 config SERIAL_UARTLITE_CONSOLE
 	bool "Support for console on Xilinx uartlite serial port"
diff --git a/drivers/w1/Kconfig b/drivers/w1/Kconfig
index 9adbb4f..fd2c7bd 100644
--- a/drivers/w1/Kconfig
+++ b/drivers/w1/Kconfig
@@ -8,7 +8,7 @@ menuconfig W1
 	  If you want W1 support, you should say Y here.
 
 	  This W1 support can also be built as a module.  If so, the module
-	  will be called wire.ko.
+	  will be called wire.
 
 if W1
 
diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig
index 96d2f8e..3195fb8 100644
--- a/drivers/w1/masters/Kconfig
+++ b/drivers/w1/masters/Kconfig
@@ -12,7 +12,7 @@ config W1_MASTER_MATROX
 	  using Matrox's G400 GPIO pins.
 
 	  This support is also available as a module.  If so, the module
-	  will be called matrox_w1.ko.
+	  will be called matrox_w1.
 
 config W1_MASTER_DS2490
 	tristate "DS2490 USB <-> W1 transport layer for 1-wire"
@@ -22,7 +22,7 @@ config W1_MASTER_DS2490
 	  for example DS9490*.
 
   	  This support is also available as a module.  If so, the module
-	  will be called ds2490.ko.
+	  will be called ds2490.
 
 config W1_MASTER_DS2482
 	tristate "Maxim DS2482 I2C to 1-Wire bridge"
@@ -56,7 +56,7 @@ config W1_MASTER_GPIO
 	  GPIO pins. This driver uses the GPIO API to control the wire.
 
 	  This support is also available as a module.  If so, the module
-	  will be called w1-gpio.ko.
+	  will be called w1-gpio.
 
 config HDQ_MASTER_OMAP
 	tristate "OMAP HDQ driver"
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 5eb8f21..452082f 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -531,7 +531,7 @@ config SBC8360_WDT
 	  Board Computer produced by Axiomtek Co., Ltd. (www.axiomtek.com).
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called sbc8360.ko.
+	  module will be called sbc8360.
 
 	  Most people will say N.
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ca8cb326..ead6c7a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -168,7 +168,7 @@ config IPV6_SIT
 	  into IPv4 packets. This is useful if you want to connect two IPv6
 	  networks over an IPv4-only path.
 
-	  Saying M here will produce a module called sit.ko. If unsure, say Y.
+	  Saying M here will produce a module called sit. If unsure, say Y.
 
 config IPV6_NDISC_NODETYPE
 	bool
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index cb3ad74..c26a20c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -327,7 +327,7 @@ config NETFILTER_XT_TARGET_CONNMARK
 
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_CONNMARK.ko.  If unsure, say `N'.
+	  ipt_CONNMARK.  If unsure, say `N'.
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
@@ -584,7 +584,7 @@ config NETFILTER_XT_MATCH_CONNMARK
 
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_connmark.ko.  If unsure, say `N'.
+	  ipt_connmark.  If unsure, say `N'.
 
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
-- 
cgit v0.10.2


From 3ac49a1c9928b4a242b3cb1d83bc1d5c9b8fcb50 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 4 Jun 2009 16:20:28 +0200
Subject: trivial: fix ETIMEOUT -> ETIMEDOUT typos

fix ETIMEOUT -> ETIMEDOUT typos

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index 28cf369..506a5e5 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(jornada_ssp_reverse);
  * timeout after <timeout> rounds. Needs mcu running before its called.
  *
  * returns : %mcu output on success
- *	   : %-ETIMEOUT on timeout
+ *	   : %-ETIMEDOUT on timeout
  */
 int jornada_ssp_byte(u8 byte)
 {
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(jornada_ssp_byte);
  * jornada_ssp_inout - decide if input is command or trading byte
  *
  * returns : (jornada_ssp_byte(byte)) on success
- *         : %-ETIMEOUT on timeout failure
+ *         : %-ETIMEDOUT on timeout failure
  */
 int jornada_ssp_inout(u8 byte)
 {
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index c92ced2..1fd5ecb 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3174,7 +3174,7 @@ static int ql_adapter_reset(struct ql_adapter *qdev)
 
 	if (value & RST_FO_FR) {
 		QPRINTK(qdev, IFDOWN, ERR,
-			"ETIMEOUT!!! errored out of resetting the chip!\n");
+			"ETIMEDOUT!!! errored out of resetting the chip!\n");
 		status = -ETIMEDOUT;
 	}
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index f3a2fce..47f68cf 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -398,7 +398,7 @@ static void rx_complete (struct urb *urb)
 
 	/* stalls need manual reset. this is rare ... except that
 	 * when going through USB 2.0 TTs, unplug appears this way.
-	 * we avoid the highspeed version of the ETIMEOUT/EILSEQ
+	 * we avoid the highspeed version of the ETIMEDOUT/EILSEQ
 	 * storm, recovering as needed.
 	 */
 	case -EPIPE:
diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c
index 888198c..824e65b 100644
--- a/drivers/staging/wlan-ng/hfa384x_usb.c
+++ b/drivers/staging/wlan-ng/hfa384x_usb.c
@@ -2424,7 +2424,7 @@ int hfa384x_drvr_ramdl_write(hfa384x_t *hw, u32 daddr, void *buf, u32 len)
 *	0		success
 *	>0		f/w reported error - f/w status code
 *	<0		driver reported error
-*	-ETIMEOUT	timout waiting for the cmd regs to become
+*	-ETIMEDOUT	timout waiting for the cmd regs to become
 *			available, or waiting for the control reg
 *			to indicate the Aux port is enabled.
 *	-ENODATA	the buffer does NOT contain a valid PDA.
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 820c5af..fcd107a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -902,7 +902,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  * Returns:
  *  0 		on success
  * -EINTR 	when interrupted by a signal
- * -ETIMEOUT	when the timeout expired
+ * -ETIMEDOUT	when the timeout expired
  * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
  */
 int
-- 
cgit v0.10.2


From ff2f5ff0cf224780c3bd035d3e6ff4a30fcacae7 Mon Sep 17 00:00:00 2001
From: Matt Kraai <kraai@ftbfs.org>
Date: Thu, 4 Jun 2009 21:43:10 -0700
Subject: trivial: Remove the hyphen from git commands

Signed-off-by: Matt Kraai <kraai@ftbfs.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/.gitignore b/.gitignore
index 51bd99d..caa8395 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,7 @@
 # subdirectories here. Add them in the ".gitignore" file
 # in that subdirectory instead.
 #
-# NOTE! Please use 'git-ls-files -i --exclude-standard'
+# NOTE! Please use 'git ls-files -i --exclude-standard'
 # command after changing this file, to see if there are
 # any tracked files which get ignored after the change.
 #
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
index a956d9b..6308735 100644
--- a/Documentation/trace/kmemtrace.txt
+++ b/Documentation/trace/kmemtrace.txt
@@ -64,7 +64,7 @@ III. Quick usage guide
 CONFIG_KMEMTRACE).
 
 2) Get the userspace tool and build it:
-$ git-clone git://repo.or.cz/kmemtrace-user.git		# current repository
+$ git clone git://repo.or.cz/kmemtrace-user.git		# current repository
 $ cd kmemtrace-user/
 $ ./autogen.sh
 $ ./configure
-- 
cgit v0.10.2


From db5ed9beabc0a2084ab0e0cc46bf911b8bf16fa8 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Sat, 6 Jun 2009 14:58:56 +0200
Subject: trivial: pci hotplug: adding __init/__exit macros to sgi_hotplug

Trivial patch which adds the __init and __exit macros to the module_init /
module_exit functions from drivers/pci/hotplug/sgi_hotplug.c
linux version 2.6.30-rc8

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index 3eee709..2d6da78 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -679,7 +679,7 @@ alloc_err:
 	return rc;
 }
 
-static int sn_pci_hotplug_init(void)
+static int __init sn_pci_hotplug_init(void)
 {
 	struct pci_bus *pci_bus = NULL;
 	int rc;
@@ -716,7 +716,7 @@ static int sn_pci_hotplug_init(void)
 	return registered == 1 ? 0 : -ENODEV;
 }
 
-static void sn_pci_hotplug_exit(void)
+static void __exit sn_pci_hotplug_exit(void)
 {
 	struct hotplug_slot *bss_hotplug_slot;
 
-- 
cgit v0.10.2


From 6e2216895421b4f83d2ebac15c9d9506dc105cff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?N=C3=A9meth=20M=C3=A1rton?= <nm127@freemail.hu>
Date: Sat, 6 Jun 2009 19:06:36 +0200
Subject: trivial: usb: fix missing space typo in doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Márton Németh <nm127@freemail.hu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
index 7c81241..bfb36b3 100644
--- a/Documentation/usb/callbacks.txt
+++ b/Documentation/usb/callbacks.txt
@@ -65,7 +65,7 @@ Accept or decline an interface. If you accept the device return 0,
 otherwise -ENODEV or -ENXIO. Other error codes should be used only if a
 genuine error occurred during initialisation which prevented a driver
 from accepting a device that would else have been accepted.
-You are strongly encouraged to use usbcore'sfacility,
+You are strongly encouraged to use usbcore's facility,
 usb_set_intfdata(), to associate a data structure with an interface, so
 that you know which internal state and identity you associate with a
 particular interface. The device will not be suspended and you may do IO
-- 
cgit v0.10.2


From 590a9887a23b60d9c6ff5a82da757371037edbd4 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Tue, 9 Jun 2009 10:41:12 +0900
Subject: trivial: Fix a typo in comment of addrconf_dad_start()

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a8218bc..597487a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2765,7 +2765,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 		/*
-		 * If the defice is not ready:
+		 * If the device is not ready:
 		 * - keep it tentative if it is a permanent address.
 		 * - otherwise, kill it.
 		 */
-- 
cgit v0.10.2


From 82d27b2b2f3a80ffa7759a49b9cba39e47df476e Mon Sep 17 00:00:00 2001
From: Markus Heidelberg <markus.heidelberg@web.de>
Date: Fri, 12 Jun 2009 01:02:34 +0200
Subject: trivial: remove the trivial patch monkey's name from
 SubmittingPatches

It is outdated here and can be found in the MAINTAINERS file. Also
remove the URL of the previous maintainer, similar content can be found
in the SubmittingPatches file.

Signed-off-by: Markus Heidelberg <markus.heidelberg@web.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index c282380..ddc903a 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -183,8 +183,9 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS
 copy the maintainer when you change their code.
 
 For small patches you may want to CC the Trivial Patch Monkey
-trivial@kernel.org managed by Jesper Juhl; which collects "trivial"
-patches. Trivial patches must qualify for one of the following rules:
+trivial@kernel.org which collects "trivial" patches. Have a look
+into the MAINTAINERS file for its current manager.
+Trivial patches must qualify for one of the following rules:
  Spelling fixes in documentation
  Spelling fixes which could break grep(1)
  Warning fixes (cluttering with useless warnings is bad)
@@ -196,7 +197,6 @@ patches. Trivial patches must qualify for one of the following rules:
  since people copy, as long as it's trivial)
  Any fix by the author/maintainer of the file (ie. patch monkey
  in re-transmission mode)
-URL: <http://www.kernel.org/pub/linux/kernel/people/juhl/trivial/>
 
 
-- 
cgit v0.10.2


From 638772c7553f6893f7b346bfee4d46851af59afc Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Wed, 11 Feb 2009 17:25:24 +0800
Subject: fb: add support of LCD display controller on pxa168/910 (base layer)

This driver is originally written by Lennert, modified by Green to be
feature complete,  and ported by Jun Nie and Kevin Liu for pxa168/910
processors.

The patch adds support for the on-chip LCD display controller, it
currently supports the base (graphics) layer only.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Green Wan <gwan@marvell.com>
Cc: Peter Liao <pliao@marvell.com>
Signed-off-by: Jun Nie <njun@marvell.com>
Signed-off-by: Kevin Liu <kliu5@marvell.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0048f11..13fd66a 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1759,6 +1759,16 @@ config FB_68328
 	  Say Y here if you want to support the built-in frame buffer of
 	  the Motorola 68328 CPU family.
 
+config FB_PXA168
+	tristate "PXA168/910 LCD framebuffer support"
+	depends on FB && (CPU_PXA168 || CPU_PXA910)
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Frame buffer driver for the built-in LCD controller in the Marvell
+	  MMP processor.
+
 config FB_PXA
 	tristate "PXA LCD framebuffer support"
 	depends on FB && ARCH_PXA
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index d8d0be5..01a819f 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_FB_GBE)              += gbefb.o
 obj-$(CONFIG_FB_CIRRUS)		  += cirrusfb.o
 obj-$(CONFIG_FB_ASILIANT)	  += asiliantfb.o
 obj-$(CONFIG_FB_PXA)		  += pxafb.o
+obj-$(CONFIG_FB_PXA168)		  += pxa168fb.o
 obj-$(CONFIG_FB_W100)		  += w100fb.o
 obj-$(CONFIG_FB_TMIO)		  += tmiofb.o
 obj-$(CONFIG_FB_AU1100)		  += au1100fb.o
diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c
new file mode 100644
index 0000000..84d8327
--- /dev/null
+++ b/drivers/video/pxa168fb.c
@@ -0,0 +1,803 @@
+/*
+ * linux/drivers/video/pxa168fb.c -- Marvell PXA168 LCD Controller
+ *
+ *  Copyright (C) 2008 Marvell International Ltd.
+ *  All rights reserved.
+ *
+ *  2009-02-16  adapted from original version for PXA168/910
+ *              Jun Nie <njun@marvell.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <video/pxa168fb.h>
+
+#include "pxa168fb.h"
+
+#define DEFAULT_REFRESH		60	/* Hz */
+
+static int determine_best_pix_fmt(struct fb_var_screeninfo *var)
+{
+	/*
+	 * Pseudocolor mode?
+	 */
+	if (var->bits_per_pixel == 8)
+		return PIX_FMT_PSEUDOCOLOR;
+
+	/*
+	 * Check for 565/1555.
+	 */
+	if (var->bits_per_pixel == 16 && var->red.length <= 5 &&
+	    var->green.length <= 6 && var->blue.length <= 5) {
+		if (var->transp.length == 0) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB565;
+			else
+				return PIX_FMT_BGR565;
+		}
+
+		if (var->transp.length == 1 && var->green.length <= 5) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB1555;
+			else
+				return PIX_FMT_BGR1555;
+		}
+
+		/* fall through */
+	}
+
+	/*
+	 * Check for 888/A888.
+	 */
+	if (var->bits_per_pixel <= 32 && var->red.length <= 8 &&
+	    var->green.length <= 8 && var->blue.length <= 8) {
+		if (var->bits_per_pixel == 24 && var->transp.length == 0) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB888PACK;
+			else
+				return PIX_FMT_BGR888PACK;
+		}
+
+		if (var->bits_per_pixel == 32 && var->transp.length == 8) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGBA888;
+			else
+				return PIX_FMT_BGRA888;
+		} else {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB888UNPACK;
+			else
+				return PIX_FMT_BGR888UNPACK;
+		}
+
+		/* fall through */
+	}
+
+	return -EINVAL;
+}
+
+static void set_pix_fmt(struct fb_var_screeninfo *var, int pix_fmt)
+{
+	switch (pix_fmt) {
+	case PIX_FMT_RGB565:
+		var->bits_per_pixel = 16;
+		var->red.offset = 11;    var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 6;
+		var->blue.offset = 0;    var->blue.length = 5;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_BGR565:
+		var->bits_per_pixel = 16;
+		var->red.offset = 0;     var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 6;
+		var->blue.offset = 11;   var->blue.length = 5;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_RGB1555:
+		var->bits_per_pixel = 16;
+		var->red.offset = 10;    var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 5;
+		var->blue.offset = 0;    var->blue.length = 5;
+		var->transp.offset = 15; var->transp.length = 1;
+		break;
+	case PIX_FMT_BGR1555:
+		var->bits_per_pixel = 16;
+		var->red.offset = 0;     var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 5;
+		var->blue.offset = 10;   var->blue.length = 5;
+		var->transp.offset = 15; var->transp.length = 1;
+		break;
+	case PIX_FMT_RGB888PACK:
+		var->bits_per_pixel = 24;
+		var->red.offset = 16;    var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 0;    var->blue.length = 8;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_BGR888PACK:
+		var->bits_per_pixel = 24;
+		var->red.offset = 0;     var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 16;   var->blue.length = 8;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_RGBA888:
+		var->bits_per_pixel = 32;
+		var->red.offset = 16;    var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 0;    var->blue.length = 8;
+		var->transp.offset = 24; var->transp.length = 8;
+		break;
+	case PIX_FMT_BGRA888:
+		var->bits_per_pixel = 32;
+		var->red.offset = 0;     var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 16;   var->blue.length = 8;
+		var->transp.offset = 24; var->transp.length = 8;
+		break;
+	case PIX_FMT_PSEUDOCOLOR:
+		var->bits_per_pixel = 8;
+		var->red.offset = 0;     var->red.length = 8;
+		var->green.offset = 0;   var->green.length = 8;
+		var->blue.offset = 0;    var->blue.length = 8;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	}
+}
+
+static void set_mode(struct pxa168fb_info *fbi, struct fb_var_screeninfo *var,
+		     struct fb_videomode *mode, int pix_fmt, int ystretch)
+{
+	struct fb_info *info = fbi->info;
+
+	set_pix_fmt(var, pix_fmt);
+
+	var->xres = mode->xres;
+	var->yres = mode->yres;
+	var->xres_virtual = max(var->xres, var->xres_virtual);
+	if (ystretch)
+		var->yres_virtual = info->fix.smem_len /
+			(var->xres_virtual * (var->bits_per_pixel >> 3));
+	else
+		var->yres_virtual = max(var->yres, var->yres_virtual);
+	var->grayscale = 0;
+	var->accel_flags = FB_ACCEL_NONE;
+	var->pixclock = mode->pixclock;
+	var->left_margin = mode->left_margin;
+	var->right_margin = mode->right_margin;
+	var->upper_margin = mode->upper_margin;
+	var->lower_margin = mode->lower_margin;
+	var->hsync_len = mode->hsync_len;
+	var->vsync_len = mode->vsync_len;
+	var->sync = mode->sync;
+	var->vmode = FB_VMODE_NONINTERLACED;
+	var->rotate = FB_ROTATE_UR;
+}
+
+static int pxa168fb_check_var(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	int pix_fmt;
+
+	/*
+	 * Determine which pixel format we're going to use.
+	 */
+	pix_fmt = determine_best_pix_fmt(var);
+	if (pix_fmt < 0)
+		return pix_fmt;
+	set_pix_fmt(var, pix_fmt);
+	fbi->pix_fmt = pix_fmt;
+
+	/*
+	 * Basic geometry sanity checks.
+	 */
+	if (var->xoffset + var->xres > var->xres_virtual)
+		return -EINVAL;
+	if (var->yoffset + var->yres > var->yres_virtual)
+		return -EINVAL;
+	if (var->xres + var->right_margin +
+	    var->hsync_len + var->left_margin > 2048)
+		return -EINVAL;
+	if (var->yres + var->lower_margin +
+	    var->vsync_len + var->upper_margin > 2048)
+		return -EINVAL;
+
+	/*
+	 * Check size of framebuffer.
+	 */
+	if (var->xres_virtual * var->yres_virtual *
+	    (var->bits_per_pixel >> 3) > info->fix.smem_len)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * The hardware clock divider has an integer and a fractional
+ * stage:
+ *
+ *	clk2 = clk_in / integer_divider
+ *	clk_out = clk2 * (1 - (fractional_divider >> 12))
+ *
+ * Calculate integer and fractional divider for given clk_in
+ * and clk_out.
+ */
+static void set_clock_divider(struct pxa168fb_info *fbi,
+			      const struct fb_videomode *m)
+{
+	int divider_int;
+	int needed_pixclk;
+	u64 div_result;
+	u32 x = 0;
+
+	/*
+	 * Notice: The field pixclock is used by linux fb
+	 * is in pixel second. E.g. struct fb_videomode &
+	 * struct fb_var_screeninfo
+	 */
+
+	/*
+	 * Check input values.
+	 */
+	if (!m || !m->pixclock || !m->refresh) {
+		dev_err(fbi->dev, "Input refresh or pixclock is wrong.\n");
+		return;
+	}
+
+	/*
+	 * Using PLL/AXI clock.
+	 */
+	x = 0x80000000;
+
+	/*
+	 * Calc divider according to refresh rate.
+	 */
+	div_result = 1000000000000ll;
+	do_div(div_result, m->pixclock);
+	needed_pixclk = (u32)div_result;
+
+	divider_int = clk_get_rate(fbi->clk) / needed_pixclk;
+
+	/* check whether divisor is too small. */
+	if (divider_int < 2) {
+		dev_warn(fbi->dev, "Warning: clock source is too slow."
+				"Try smaller resolution\n");
+		divider_int = 2;
+	}
+
+	/*
+	 * Set setting to reg.
+	 */
+	x |= divider_int;
+	writel(x, fbi->reg_base + LCD_CFG_SCLK_DIV);
+}
+
+static void set_dma_control0(struct pxa168fb_info *fbi)
+{
+	u32 x;
+
+	/*
+	 * Set bit to enable graphics DMA.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DMA_CTRL0);
+	x |= fbi->active ? 0x00000100 : 0;
+	fbi->active = 0;
+
+	/*
+	 * If we are in a pseudo-color mode, we need to enable
+	 * palette lookup.
+	 */
+	if (fbi->pix_fmt == PIX_FMT_PSEUDOCOLOR)
+		x |= 0x10000000;
+
+	/*
+	 * Configure hardware pixel format.
+	 */
+	x &= ~(0xF << 16);
+	x |= (fbi->pix_fmt >> 1) << 16;
+
+	/*
+	 * Check red and blue pixel swap.
+	 * 1. source data swap
+	 * 2. panel output data swap
+	 */
+	x &= ~(1 << 12);
+	x |= ((fbi->pix_fmt & 1) ^ (fbi->panel_rbswap)) << 12;
+
+	writel(x, fbi->reg_base + LCD_SPU_DMA_CTRL0);
+}
+
+static void set_dma_control1(struct pxa168fb_info *fbi, int sync)
+{
+	u32 x;
+
+	/*
+	 * Configure default bits: vsync triggers DMA, gated clock
+	 * enable, power save enable, configure alpha registers to
+	 * display 100% graphics, and set pixel command.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DMA_CTRL1);
+	x |= 0x2032ff81;
+
+	/*
+	 * We trigger DMA on the falling edge of vsync if vsync is
+	 * active low, or on the rising edge if vsync is active high.
+	 */
+	if (!(sync & FB_SYNC_VERT_HIGH_ACT))
+		x |= 0x08000000;
+
+	writel(x, fbi->reg_base + LCD_SPU_DMA_CTRL1);
+}
+
+static void set_graphics_start(struct fb_info *info, int xoffset, int yoffset)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	int pixel_offset;
+	unsigned long addr;
+
+	pixel_offset = (yoffset * var->xres_virtual) + xoffset;
+
+	addr = fbi->fb_start_dma + (pixel_offset * (var->bits_per_pixel >> 3));
+	writel(addr, fbi->reg_base + LCD_CFG_GRA_START_ADDR0);
+}
+
+static void set_dumb_panel_control(struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct pxa168fb_mach_info *mi = fbi->dev->platform_data;
+	u32 x;
+
+	/*
+	 * Preserve enable flag.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DUMB_CTRL) & 0x00000001;
+
+	x |= (fbi->is_blanked ? 0x7 : mi->dumb_mode) << 28;
+	x |= mi->gpio_output_data << 20;
+	x |= mi->gpio_output_mask << 12;
+	x |= mi->panel_rgb_reverse_lanes ? 0x00000080 : 0;
+	x |= mi->invert_composite_blank ? 0x00000040 : 0;
+	x |= (info->var.sync & FB_SYNC_COMP_HIGH_ACT) ? 0x00000020 : 0;
+	x |= mi->invert_pix_val_ena ? 0x00000010 : 0;
+	x |= (info->var.sync & FB_SYNC_VERT_HIGH_ACT) ? 0 : 0x00000008;
+	x |= (info->var.sync & FB_SYNC_HOR_HIGH_ACT) ? 0 : 0x00000004;
+	x |= mi->invert_pixclock ? 0x00000002 : 0;
+
+	writel(x, fbi->reg_base + LCD_SPU_DUMB_CTRL);
+}
+
+static void set_dumb_screen_dimensions(struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *v = &info->var;
+	int x;
+	int y;
+
+	x = v->xres + v->right_margin + v->hsync_len + v->left_margin;
+	y = v->yres + v->lower_margin + v->vsync_len + v->upper_margin;
+
+	writel((y << 16) | x, fbi->reg_base + LCD_SPUT_V_H_TOTAL);
+}
+
+static int pxa168fb_set_par(struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	struct fb_videomode mode;
+	u32 x;
+	struct pxa168fb_mach_info *mi;
+
+	mi = fbi->dev->platform_data;
+
+	/*
+	 * Set additional mode info.
+	 */
+	if (fbi->pix_fmt == PIX_FMT_PSEUDOCOLOR)
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+	else
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
+	info->fix.ypanstep = var->yres;
+
+	/*
+	 * Disable panel output while we setup the display.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DUMB_CTRL);
+	writel(x & ~1, fbi->reg_base + LCD_SPU_DUMB_CTRL);
+
+	/*
+	 * Configure global panel parameters.
+	 */
+	writel((var->yres << 16) | var->xres,
+		fbi->reg_base + LCD_SPU_V_H_ACTIVE);
+
+	/*
+	 * convet var to video mode
+	 */
+	fb_var_to_videomode(&mode, &info->var);
+
+	/* Calculate clock divisor. */
+	set_clock_divider(fbi, &mode);
+
+	/* Configure dma ctrl regs. */
+	set_dma_control0(fbi);
+	set_dma_control1(fbi, info->var.sync);
+
+	/*
+	 * Configure graphics DMA parameters.
+	 */
+	x = readl(fbi->reg_base + LCD_CFG_GRA_PITCH);
+	x = (x & ~0xFFFF) | ((var->xres_virtual * var->bits_per_pixel) >> 3);
+	writel(x, fbi->reg_base + LCD_CFG_GRA_PITCH);
+	writel((var->yres << 16) | var->xres,
+		fbi->reg_base + LCD_SPU_GRA_HPXL_VLN);
+	writel((var->yres << 16) | var->xres,
+		fbi->reg_base + LCD_SPU_GZM_HPXL_VLN);
+
+	/*
+	 * Configure dumb panel ctrl regs & timings.
+	 */
+	set_dumb_panel_control(info);
+	set_dumb_screen_dimensions(info);
+
+	writel((var->left_margin << 16) | var->right_margin,
+			fbi->reg_base + LCD_SPU_H_PORCH);
+	writel((var->upper_margin << 16) | var->lower_margin,
+			fbi->reg_base + LCD_SPU_V_PORCH);
+
+	/*
+	 * Re-enable panel output.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DUMB_CTRL);
+	writel(x | 1, fbi->reg_base + LCD_SPU_DUMB_CTRL);
+
+	return 0;
+}
+
+static unsigned int chan_to_field(unsigned int chan, struct fb_bitfield *bf)
+{
+	return ((chan & 0xffff) >> (16 - bf->length)) << bf->offset;
+}
+
+static u32 to_rgb(u16 red, u16 green, u16 blue)
+{
+	red >>= 8;
+	green >>= 8;
+	blue >>= 8;
+
+	return (red << 16) | (green << 8) | blue;
+}
+
+static int
+pxa168fb_setcolreg(unsigned int regno, unsigned int red, unsigned int green,
+		 unsigned int blue, unsigned int trans, struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	u32 val;
+
+	if (info->var.grayscale)
+		red = green = blue = (19595 * red + 38470 * green +
+					7471 * blue) >> 16;
+
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR && regno < 16) {
+		val =  chan_to_field(red,   &info->var.red);
+		val |= chan_to_field(green, &info->var.green);
+		val |= chan_to_field(blue , &info->var.blue);
+		fbi->pseudo_palette[regno] = val;
+	}
+
+	if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR && regno < 256) {
+		val = to_rgb(red, green, blue);
+		writel(val, fbi->reg_base + LCD_SPU_SRAM_WRDAT);
+		writel(0x8300 | regno, fbi->reg_base + LCD_SPU_SRAM_CTRL);
+	}
+
+	return 0;
+}
+
+static int pxa168fb_blank(int blank, struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+
+	fbi->is_blanked = (blank == FB_BLANK_UNBLANK) ? 0 : 1;
+	set_dumb_panel_control(info);
+
+	return 0;
+}
+
+static int pxa168fb_pan_display(struct fb_var_screeninfo *var,
+				struct fb_info *info)
+{
+	set_graphics_start(info, var->xoffset, var->yoffset);
+
+	return 0;
+}
+
+static irqreturn_t pxa168fb_handle_irq(int irq, void *dev_id)
+{
+	struct pxa168fb_info *fbi = dev_id;
+	u32 isr = readl(fbi->reg_base + SPU_IRQ_ISR);
+
+	if ((isr & GRA_FRAME_IRQ0_ENA_MASK)) {
+
+		writel(isr & (~GRA_FRAME_IRQ0_ENA_MASK),
+			fbi->reg_base + SPU_IRQ_ISR);
+
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static struct fb_ops pxa168fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= pxa168fb_check_var,
+	.fb_set_par	= pxa168fb_set_par,
+	.fb_setcolreg	= pxa168fb_setcolreg,
+	.fb_blank	= pxa168fb_blank,
+	.fb_pan_display	= pxa168fb_pan_display,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+};
+
+static int __init pxa168fb_init_mode(struct fb_info *info,
+			      struct pxa168fb_mach_info *mi)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	int ret = 0;
+	u32 total_w, total_h, refresh;
+	u64 div_result;
+	const struct fb_videomode *m;
+
+	/*
+	 * Set default value
+	 */
+	refresh = DEFAULT_REFRESH;
+
+	/* try to find best video mode. */
+	m = fb_find_best_mode(&info->var, &info->modelist);
+	if (m)
+		fb_videomode_to_var(&info->var, m);
+
+	/* Init settings. */
+	var->xres_virtual = var->xres;
+	var->yres_virtual = info->fix.smem_len /
+		(var->xres_virtual * (var->bits_per_pixel >> 3));
+	dev_dbg(fbi->dev, "pxa168fb: find best mode: res = %dx%d\n",
+				var->xres, var->yres);
+
+	/* correct pixclock. */
+	total_w = var->xres + var->left_margin + var->right_margin +
+		  var->hsync_len;
+	total_h = var->yres + var->upper_margin + var->lower_margin +
+		  var->vsync_len;
+
+	div_result = 1000000000000ll;
+	do_div(div_result, total_w * total_h * refresh);
+	var->pixclock = (u32)div_result;
+
+	return ret;
+}
+
+static int __init pxa168fb_probe(struct platform_device *pdev)
+{
+	struct pxa168fb_mach_info *mi;
+	struct fb_info *info = 0;
+	struct pxa168fb_info *fbi = 0;
+	struct resource *res;
+	struct clk *clk;
+	int irq, ret;
+
+	mi = pdev->dev.platform_data;
+	if (mi == NULL) {
+		dev_err(&pdev->dev, "no platform data defined\n");
+		return -EINVAL;
+	}
+
+	clk = clk_get(&pdev->dev, "LCDCLK");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "unable to get LCDCLK");
+		return PTR_ERR(clk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "no IO memory defined\n");
+		return -ENOENT;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no IRQ defined\n");
+		return -ENOENT;
+	}
+
+	info = framebuffer_alloc(sizeof(struct pxa168fb_info), &pdev->dev);
+	if (info == NULL) {
+		clk_put(clk);
+		return -ENOMEM;
+	}
+
+	/* Initialize private data */
+	fbi = info->par;
+	fbi->info = info;
+	fbi->clk = clk;
+	fbi->dev = info->dev = &pdev->dev;
+	fbi->panel_rbswap = mi->panel_rbswap;
+	fbi->is_blanked = 0;
+	fbi->active = mi->active;
+
+	/*
+	 * Initialise static fb parameters.
+	 */
+	info->flags = FBINFO_DEFAULT | FBINFO_PARTIAL_PAN_OK |
+		      FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN;
+	info->node = -1;
+	strlcpy(info->fix.id, mi->id, 16);
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 0;
+	info->fix.ypanstep = 0;
+	info->fix.ywrapstep = 0;
+	info->fix.mmio_start = res->start;
+	info->fix.mmio_len = res->end - res->start + 1;
+	info->fix.accel = FB_ACCEL_NONE;
+	info->fbops = &pxa168fb_ops;
+	info->pseudo_palette = fbi->pseudo_palette;
+
+	/*
+	 * Map LCD controller registers.
+	 */
+	fbi->reg_base = ioremap_nocache(res->start, res->end - res->start);
+	if (fbi->reg_base == NULL) {
+		ret = -ENOMEM;
+		goto failed;
+	}
+
+	/*
+	 * Allocate framebuffer memory.
+	 */
+	info->fix.smem_len = PAGE_ALIGN(DEFAULT_FB_SIZE);
+
+	info->screen_base = dma_alloc_writecombine(fbi->dev, info->fix.smem_len,
+						&fbi->fb_start_dma, GFP_KERNEL);
+	if (info->screen_base == NULL) {
+		ret = -ENOMEM;
+		goto failed;
+	}
+
+	info->fix.smem_start = (unsigned long)fbi->fb_start_dma;
+
+	/*
+	 * Set video mode according to platform data.
+	 */
+	set_mode(fbi, &info->var, mi->modes, mi->pix_fmt, 1);
+
+	fb_videomode_to_modelist(mi->modes, mi->num_modes, &info->modelist);
+
+	/*
+	 * init video mode data.
+	 */
+	pxa168fb_init_mode(info, mi);
+
+	ret = pxa168fb_check_var(&info->var, info);
+	if (ret)
+		goto failed_free_fbmem;
+
+	/*
+	 * Fill in sane defaults.
+	 */
+	ret = pxa168fb_check_var(&info->var, info);
+	if (ret)
+		goto failed;
+
+	/*
+	 * enable controller clock
+	 */
+	clk_enable(fbi->clk);
+
+	pxa168fb_set_par(info);
+
+	/*
+	 * Configure default register values.
+	 */
+	writel(0, fbi->reg_base + LCD_SPU_BLANKCOLOR);
+	writel(mi->io_pin_allocation_mode, fbi->reg_base + SPU_IOPAD_CONTROL);
+	writel(0, fbi->reg_base + LCD_CFG_GRA_START_ADDR1);
+	writel(0, fbi->reg_base + LCD_SPU_GRA_OVSA_HPXL_VLN);
+	writel(0, fbi->reg_base + LCD_SPU_SRAM_PARA0);
+	writel(CFG_CSB_256x32(0x1)|CFG_CSB_256x24(0x1)|CFG_CSB_256x8(0x1),
+		fbi->reg_base + LCD_SPU_SRAM_PARA1);
+
+	/*
+	 * Allocate color map.
+	 */
+	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
+		ret = -ENOMEM;
+		goto failed_free_clk;
+	}
+
+	/*
+	 * Register irq handler.
+	 */
+	ret = request_irq(irq, pxa168fb_handle_irq, IRQF_SHARED,
+					info->fix.id, fbi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to request IRQ\n");
+		ret = -ENXIO;
+		goto failed_free_cmap;
+	}
+
+	/*
+	 * Enable GFX interrupt
+	 */
+	writel(GRA_FRAME_IRQ0_ENA(0x1), fbi->reg_base + SPU_IRQ_ENA);
+
+	/*
+	 * Register framebuffer.
+	 */
+	ret = register_framebuffer(info);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register pxa168-fb: %d\n", ret);
+		ret = -ENXIO;
+		goto failed_free_irq;
+	}
+
+	platform_set_drvdata(pdev, fbi);
+	return 0;
+
+failed_free_irq:
+	free_irq(irq, fbi);
+failed_free_cmap:
+	fb_dealloc_cmap(&info->cmap);
+failed_free_clk:
+	clk_disable(fbi->clk);
+failed_free_fbmem:
+	dma_free_coherent(fbi->dev, info->fix.smem_len,
+			info->screen_base, fbi->fb_start_dma);
+failed:
+	kfree(info);
+	clk_put(clk);
+
+	dev_err(&pdev->dev, "frame buffer device init failed with %d\n", ret);
+	return ret;
+}
+
+static struct platform_driver pxa168fb_driver = {
+	.driver		= {
+		.name	= "pxa168-fb",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pxa168fb_probe,
+};
+
+static int __devinit pxa168fb_init(void)
+{
+	return platform_driver_register(&pxa168fb_driver);
+}
+module_init(pxa168fb_init);
+
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@marvell.com> "
+	      "Green Wan <gwan@marvell.com>");
+MODULE_DESCRIPTION("Framebuffer driver for PXA168/910");
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/pxa168fb.h b/drivers/video/pxa168fb.h
new file mode 100644
index 0000000..eee0927
--- /dev/null
+++ b/drivers/video/pxa168fb.h
@@ -0,0 +1,558 @@
+#ifndef __PXA168FB_H__
+#define __PXA168FB_H__
+
+/* ------------< LCD register >------------ */
+/* Video Frame 0&1 start address registers */
+#define	LCD_SPU_DMA_START_ADDR_Y0		0x00C0
+#define	LCD_SPU_DMA_START_ADDR_U0		0x00C4
+#define	LCD_SPU_DMA_START_ADDR_V0		0x00C8
+#define LCD_CFG_DMA_START_ADDR_0		0x00CC /* Cmd address */
+#define	LCD_SPU_DMA_START_ADDR_Y1		0x00D0
+#define	LCD_SPU_DMA_START_ADDR_U1		0x00D4
+#define	LCD_SPU_DMA_START_ADDR_V1		0x00D8
+#define LCD_CFG_DMA_START_ADDR_1		0x00DC /* Cmd address */
+
+/* YC & UV Pitch */
+#define LCD_SPU_DMA_PITCH_YC			0x00E0
+#define     SPU_DMA_PITCH_C(c)			((c) << 16)
+#define     SPU_DMA_PITCH_Y(y)			(y)
+#define LCD_SPU_DMA_PITCH_UV			0x00E4
+#define     SPU_DMA_PITCH_V(v)			((v) << 16)
+#define     SPU_DMA_PITCH_U(u)			(u)
+
+/* Video Starting Point on Screen Register */
+#define LCD_SPUT_DMA_OVSA_HPXL_VLN		0x00E8
+#define     CFG_DMA_OVSA_VLN(y)			((y) << 16) /* 0~0xfff */
+#define     CFG_DMA_OVSA_HPXL(x)		(x)     /* 0~0xfff */
+
+/* Video Size Register */
+#define LCD_SPU_DMA_HPXL_VLN			0x00EC
+#define     CFG_DMA_VLN(y)			((y) << 16)
+#define     CFG_DMA_HPXL(x)			(x)
+
+/* Video Size After zooming Register */
+#define LCD_SPU_DZM_HPXL_VLN			0x00F0
+#define     CFG_DZM_VLN(y)			((y) << 16)
+#define     CFG_DZM_HPXL(x)			(x)
+
+/* Graphic Frame 0&1 Starting Address Register */
+#define LCD_CFG_GRA_START_ADDR0			0x00F4
+#define LCD_CFG_GRA_START_ADDR1			0x00F8
+
+/* Graphic Frame Pitch */
+#define LCD_CFG_GRA_PITCH			0x00FC
+
+/* Graphic Starting Point on Screen Register */
+#define LCD_SPU_GRA_OVSA_HPXL_VLN		0x0100
+#define     CFG_GRA_OVSA_VLN(y)			((y) << 16)
+#define     CFG_GRA_OVSA_HPXL(x)		(x)
+
+/* Graphic Size Register */
+#define LCD_SPU_GRA_HPXL_VLN			0x0104
+#define     CFG_GRA_VLN(y)			((y) << 16)
+#define     CFG_GRA_HPXL(x)			(x)
+
+/* Graphic Size after Zooming Register */
+#define LCD_SPU_GZM_HPXL_VLN			0x0108
+#define     CFG_GZM_VLN(y)			((y) << 16)
+#define     CFG_GZM_HPXL(x)			(x)
+
+/* HW Cursor Starting Point on Screen Register */
+#define LCD_SPU_HWC_OVSA_HPXL_VLN		0x010C
+#define     CFG_HWC_OVSA_VLN(y)			((y) << 16)
+#define     CFG_HWC_OVSA_HPXL(x)		(x)
+
+/* HW Cursor Size */
+#define LCD_SPU_HWC_HPXL_VLN			0x0110
+#define     CFG_HWC_VLN(y)			((y) << 16)
+#define     CFG_HWC_HPXL(x)			(x)
+
+/* Total Screen Size Register */
+#define LCD_SPUT_V_H_TOTAL			0x0114
+#define     CFG_V_TOTAL(y)			((y) << 16)
+#define     CFG_H_TOTAL(x)			(x)
+
+/* Total Screen Active Size Register */
+#define LCD_SPU_V_H_ACTIVE			0x0118
+#define     CFG_V_ACTIVE(y)			((y) << 16)
+#define     CFG_H_ACTIVE(x)			(x)
+
+/* Screen H&V Porch Register */
+#define LCD_SPU_H_PORCH				0x011C
+#define     CFG_H_BACK_PORCH(b)			((b) << 16)
+#define     CFG_H_FRONT_PORCH(f)		(f)
+#define LCD_SPU_V_PORCH				0x0120
+#define     CFG_V_BACK_PORCH(b)			((b) << 16)
+#define     CFG_V_FRONT_PORCH(f)		(f)
+
+/* Screen Blank Color Register */
+#define LCD_SPU_BLANKCOLOR			0x0124
+#define     CFG_BLANKCOLOR_MASK			0x00FFFFFF
+#define     CFG_BLANKCOLOR_R_MASK		0x000000FF
+#define     CFG_BLANKCOLOR_G_MASK		0x0000FF00
+#define     CFG_BLANKCOLOR_B_MASK		0x00FF0000
+
+/* HW Cursor Color 1&2 Register */
+#define LCD_SPU_ALPHA_COLOR1			0x0128
+#define     CFG_HWC_COLOR1			0x00FFFFFF
+#define     CFG_HWC_COLOR1_R(red)		((red) << 16)
+#define     CFG_HWC_COLOR1_G(green)		((green) << 8)
+#define     CFG_HWC_COLOR1_B(blue)		(blue)
+#define     CFG_HWC_COLOR1_R_MASK		0x000000FF
+#define     CFG_HWC_COLOR1_G_MASK		0x0000FF00
+#define     CFG_HWC_COLOR1_B_MASK		0x00FF0000
+#define LCD_SPU_ALPHA_COLOR2			0x012C
+#define     CFG_HWC_COLOR2			0x00FFFFFF
+#define     CFG_HWC_COLOR2_R_MASK		0x000000FF
+#define     CFG_HWC_COLOR2_G_MASK		0x0000FF00
+#define     CFG_HWC_COLOR2_B_MASK		0x00FF0000
+
+/* Video YUV Color Key Control */
+#define LCD_SPU_COLORKEY_Y			0x0130
+#define     CFG_CKEY_Y2(y2)			((y2) << 24)
+#define     CFG_CKEY_Y2_MASK			0xFF000000
+#define     CFG_CKEY_Y1(y1)			((y1) << 16)
+#define     CFG_CKEY_Y1_MASK			0x00FF0000
+#define     CFG_CKEY_Y(y)			((y) << 8)
+#define     CFG_CKEY_Y_MASK			0x0000FF00
+#define     CFG_ALPHA_Y(y)			(y)
+#define     CFG_ALPHA_Y_MASK			0x000000FF
+#define LCD_SPU_COLORKEY_U			0x0134
+#define     CFG_CKEY_U2(u2)			((u2) << 24)
+#define     CFG_CKEY_U2_MASK			0xFF000000
+#define     CFG_CKEY_U1(u1)			((u1) << 16)
+#define     CFG_CKEY_U1_MASK			0x00FF0000
+#define     CFG_CKEY_U(u)			((u) << 8)
+#define     CFG_CKEY_U_MASK			0x0000FF00
+#define     CFG_ALPHA_U(u)			(u)
+#define     CFG_ALPHA_U_MASK			0x000000FF
+#define LCD_SPU_COLORKEY_V			0x0138
+#define     CFG_CKEY_V2(v2)			((v2) << 24)
+#define     CFG_CKEY_V2_MASK			0xFF000000
+#define     CFG_CKEY_V1(v1)			((v1) << 16)
+#define     CFG_CKEY_V1_MASK			0x00FF0000
+#define     CFG_CKEY_V(v)			((v) << 8)
+#define     CFG_CKEY_V_MASK			0x0000FF00
+#define     CFG_ALPHA_V(v)			(v)
+#define     CFG_ALPHA_V_MASK			0x000000FF
+
+/* SPI Read Data Register */
+#define LCD_SPU_SPI_RXDATA			0x0140
+
+/* Smart Panel Read Data Register */
+#define LCD_SPU_ISA_RSDATA			0x0144
+#define     ISA_RXDATA_16BIT_1_DATA_MASK	0x000000FF
+#define     ISA_RXDATA_16BIT_2_DATA_MASK	0x0000FF00
+#define     ISA_RXDATA_16BIT_3_DATA_MASK	0x00FF0000
+#define     ISA_RXDATA_16BIT_4_DATA_MASK	0xFF000000
+#define     ISA_RXDATA_32BIT_1_DATA_MASK	0x00FFFFFF
+
+/* HWC SRAM Read Data Register */
+#define LCD_SPU_HWC_RDDAT			0x0158
+
+/* Gamma Table SRAM Read Data Register */
+#define LCD_SPU_GAMMA_RDDAT			0x015c
+#define     CFG_GAMMA_RDDAT_MASK		0x000000FF
+
+/* Palette Table SRAM Read Data Register */
+#define LCD_SPU_PALETTE_RDDAT			0x0160
+#define     CFG_PALETTE_RDDAT_MASK		0x00FFFFFF
+
+/* I/O Pads Input Read Only Register */
+#define LCD_SPU_IOPAD_IN			0x0178
+#define     CFG_IOPAD_IN_MASK			0x0FFFFFFF
+
+/* Reserved Read Only Registers */
+#define LCD_CFG_RDREG5F				0x017C
+#define     IRE_FRAME_CNT_MASK			0x000000C0
+#define     IPE_FRAME_CNT_MASK			0x00000030
+#define     GRA_FRAME_CNT_MASK			0x0000000C  /* Graphic */
+#define     DMA_FRAME_CNT_MASK			0x00000003  /* Video */
+
+/* SPI Control Register. */
+#define LCD_SPU_SPI_CTRL			0x0180
+#define     CFG_SCLKCNT(div)			((div) << 24)  /* 0xFF~0x2 */
+#define     CFG_SCLKCNT_MASK			0xFF000000
+#define     CFG_RXBITS(rx)			((rx) << 16)   /* 0x1F~0x1 */
+#define     CFG_RXBITS_MASK			0x00FF0000
+#define     CFG_TXBITS(tx)			((tx) << 8)    /* 0x1F~0x1 */
+#define     CFG_TXBITS_MASK			0x0000FF00
+#define     CFG_CLKINV(clk)			((clk) << 7)
+#define     CFG_CLKINV_MASK			0x00000080
+#define     CFG_KEEPXFER(transfer)		((transfer) << 6)
+#define     CFG_KEEPXFER_MASK			0x00000040
+#define     CFG_RXBITSTO0(rx)			((rx) << 5)
+#define     CFG_RXBITSTO0_MASK			0x00000020
+#define     CFG_TXBITSTO0(tx)			((tx) << 4)
+#define     CFG_TXBITSTO0_MASK			0x00000010
+#define     CFG_SPI_ENA(spi)			((spi) << 3)
+#define     CFG_SPI_ENA_MASK			0x00000008
+#define     CFG_SPI_SEL(spi)			((spi) << 2)
+#define     CFG_SPI_SEL_MASK			0x00000004
+#define     CFG_SPI_3W4WB(wire)			((wire) << 1)
+#define     CFG_SPI_3W4WB_MASK			0x00000002
+#define     CFG_SPI_START(start)		(start)
+#define     CFG_SPI_START_MASK			0x00000001
+
+/* SPI Tx Data Register */
+#define LCD_SPU_SPI_TXDATA			0x0184
+
+/*
+   1. Smart Pannel 8-bit Bus Control Register.
+   2. AHB Slave Path Data Port Register
+*/
+#define LCD_SPU_SMPN_CTRL			0x0188
+
+/* DMA Control 0 Register */
+#define LCD_SPU_DMA_CTRL0			0x0190
+#define     CFG_NOBLENDING(nb)			((nb) << 31)
+#define     CFG_NOBLENDING_MASK			0x80000000
+#define     CFG_GAMMA_ENA(gn)			((gn) << 30)
+#define     CFG_GAMMA_ENA_MASK			0x40000000
+#define     CFG_CBSH_ENA(cn)			((cn) << 29)
+#define     CFG_CBSH_ENA_MASK			0x20000000
+#define     CFG_PALETTE_ENA(pn)			((pn) << 28)
+#define     CFG_PALETTE_ENA_MASK		0x10000000
+#define     CFG_ARBFAST_ENA(an)			((an) << 27)
+#define     CFG_ARBFAST_ENA_MASK		0x08000000
+#define     CFG_HWC_1BITMOD(mode)		((mode) << 26)
+#define     CFG_HWC_1BITMOD_MASK		0x04000000
+#define     CFG_HWC_1BITENA(mn)			((mn) << 25)
+#define     CFG_HWC_1BITENA_MASK		0x02000000
+#define     CFG_HWC_ENA(cn)		        ((cn) << 24)
+#define     CFG_HWC_ENA_MASK			0x01000000
+#define     CFG_DMAFORMAT(dmaformat)		((dmaformat) << 20)
+#define     CFG_DMAFORMAT_MASK			0x00F00000
+#define     CFG_GRAFORMAT(graformat)		((graformat) << 16)
+#define     CFG_GRAFORMAT_MASK			0x000F0000
+/* for graphic part */
+#define     CFG_GRA_FTOGGLE(toggle)		((toggle) << 15)
+#define     CFG_GRA_FTOGGLE_MASK		0x00008000
+#define     CFG_GRA_HSMOOTH(smooth)		((smooth) << 14)
+#define     CFG_GRA_HSMOOTH_MASK		0x00004000
+#define     CFG_GRA_TSTMODE(test)		((test) << 13)
+#define     CFG_GRA_TSTMODE_MASK		0x00002000
+#define     CFG_GRA_SWAPRB(swap)		((swap) << 12)
+#define     CFG_GRA_SWAPRB_MASK			0x00001000
+#define     CFG_GRA_SWAPUV(swap)		((swap) << 11)
+#define     CFG_GRA_SWAPUV_MASK			0x00000800
+#define     CFG_GRA_SWAPYU(swap)		((swap) << 10)
+#define     CFG_GRA_SWAPYU_MASK			0x00000400
+#define     CFG_YUV2RGB_GRA(cvrt)		((cvrt) << 9)
+#define     CFG_YUV2RGB_GRA_MASK		0x00000200
+#define     CFG_GRA_ENA(gra)			((gra) << 8)
+#define     CFG_GRA_ENA_MASK			0x00000100
+/* for video part */
+#define     CFG_DMA_FTOGGLE(toggle)		((toggle) << 7)
+#define     CFG_DMA_FTOGGLE_MASK		0x00000080
+#define     CFG_DMA_HSMOOTH(smooth)		((smooth) << 6)
+#define     CFG_DMA_HSMOOTH_MASK		0x00000040
+#define     CFG_DMA_TSTMODE(test)		((test) << 5)
+#define     CFG_DMA_TSTMODE_MASK		0x00000020
+#define     CFG_DMA_SWAPRB(swap)		((swap) << 4)
+#define     CFG_DMA_SWAPRB_MASK			0x00000010
+#define     CFG_DMA_SWAPUV(swap)		((swap) << 3)
+#define     CFG_DMA_SWAPUV_MASK			0x00000008
+#define     CFG_DMA_SWAPYU(swap)		((swap) << 2)
+#define     CFG_DMA_SWAPYU_MASK			0x00000004
+#define     CFG_DMA_SWAP_MASK			0x0000001C
+#define     CFG_YUV2RGB_DMA(cvrt)		((cvrt) << 1)
+#define     CFG_YUV2RGB_DMA_MASK		0x00000002
+#define     CFG_DMA_ENA(video)			(video)
+#define     CFG_DMA_ENA_MASK			0x00000001
+
+/* DMA Control 1 Register */
+#define LCD_SPU_DMA_CTRL1			0x0194
+#define     CFG_FRAME_TRIG(trig)		((trig) << 31)
+#define     CFG_FRAME_TRIG_MASK			0x80000000
+#define     CFG_VSYNC_TRIG(trig)		((trig) << 28)
+#define     CFG_VSYNC_TRIG_MASK			0x70000000
+#define     CFG_VSYNC_INV(inv)			((inv) << 27)
+#define     CFG_VSYNC_INV_MASK			0x08000000
+#define     CFG_COLOR_KEY_MODE(cmode)		((cmode) << 24)
+#define     CFG_COLOR_KEY_MASK			0x07000000
+#define     CFG_CARRY(carry)			((carry) << 23)
+#define     CFG_CARRY_MASK			0x00800000
+#define     CFG_LNBUF_ENA(lnbuf)		((lnbuf) << 22)
+#define     CFG_LNBUF_ENA_MASK			0x00400000
+#define     CFG_GATED_ENA(gated)		((gated) << 21)
+#define     CFG_GATED_ENA_MASK			0x00200000
+#define     CFG_PWRDN_ENA(power)		((power) << 20)
+#define     CFG_PWRDN_ENA_MASK			0x00100000
+#define     CFG_DSCALE(dscale)			((dscale) << 18)
+#define     CFG_DSCALE_MASK			0x000C0000
+#define     CFG_ALPHA_MODE(amode)		((amode) << 16)
+#define     CFG_ALPHA_MODE_MASK			0x00030000
+#define     CFG_ALPHA(alpha)			((alpha) << 8)
+#define     CFG_ALPHA_MASK			0x0000FF00
+#define     CFG_PXLCMD(pxlcmd)			(pxlcmd)
+#define     CFG_PXLCMD_MASK			0x000000FF
+
+/* SRAM Control Register */
+#define LCD_SPU_SRAM_CTRL			0x0198
+#define     CFG_SRAM_INIT_WR_RD(mode)		((mode) << 14)
+#define     CFG_SRAM_INIT_WR_RD_MASK		0x0000C000
+#define     CFG_SRAM_ADDR_LCDID(id)		((id) << 8)
+#define     CFG_SRAM_ADDR_LCDID_MASK		0x00000F00
+#define     CFG_SRAM_ADDR(addr)			(addr)
+#define     CFG_SRAM_ADDR_MASK			0x000000FF
+
+/* SRAM Write Data Register */
+#define LCD_SPU_SRAM_WRDAT			0x019C
+
+/* SRAM RTC/WTC Control Register */
+#define LCD_SPU_SRAM_PARA0			0x01A0
+
+/* SRAM Power Down Control Register */
+#define LCD_SPU_SRAM_PARA1			0x01A4
+#define     CFG_CSB_256x32(hwc)			((hwc) << 15)	/* HWC */
+#define     CFG_CSB_256x32_MASK			0x00008000
+#define     CFG_CSB_256x24(palette)		((palette) << 14)	/* Palette */
+#define     CFG_CSB_256x24_MASK			0x00004000
+#define     CFG_CSB_256x8(gamma)		((gamma) << 13)	/* Gamma */
+#define     CFG_CSB_256x8_MASK			0x00002000
+#define     CFG_PDWN256x32(pdwn)		((pdwn) << 7)	/* HWC */
+#define     CFG_PDWN256x32_MASK			0x00000080
+#define     CFG_PDWN256x24(pdwn)		((pdwn) << 6)	/* Palette */
+#define     CFG_PDWN256x24_MASK			0x00000040
+#define     CFG_PDWN256x8(pdwn)			((pdwn) << 5)	/* Gamma */
+#define     CFG_PDWN256x8_MASK			0x00000020
+#define     CFG_PDWN32x32(pdwn)			((pdwn) << 3)
+#define     CFG_PDWN32x32_MASK			0x00000008
+#define     CFG_PDWN16x66(pdwn)			((pdwn) << 2)
+#define     CFG_PDWN16x66_MASK			0x00000004
+#define     CFG_PDWN32x66(pdwn)			((pdwn) << 1)
+#define     CFG_PDWN32x66_MASK			0x00000002
+#define     CFG_PDWN64x66(pdwn)			(pdwn)
+#define     CFG_PDWN64x66_MASK			0x00000001
+
+/* Smart or Dumb Panel Clock Divider */
+#define LCD_CFG_SCLK_DIV			0x01A8
+#define     SCLK_SOURCE_SELECT(src)		((src) << 31)
+#define     SCLK_SOURCE_SELECT_MASK		0x80000000
+#define     CLK_FRACDIV(frac)			((frac) << 16)
+#define     CLK_FRACDIV_MASK			0x0FFF0000
+#define     CLK_INT_DIV(div)			(div)
+#define     CLK_INT_DIV_MASK			0x0000FFFF
+
+/* Video Contrast Register */
+#define LCD_SPU_CONTRAST			0x01AC
+#define     CFG_BRIGHTNESS(bright)		((bright) << 16)
+#define     CFG_BRIGHTNESS_MASK			0xFFFF0000
+#define     CFG_CONTRAST(contrast)		(contrast)
+#define     CFG_CONTRAST_MASK			0x0000FFFF
+
+/* Video Saturation Register */
+#define LCD_SPU_SATURATION			0x01B0
+#define     CFG_C_MULTS(mult)			((mult) << 16)
+#define     CFG_C_MULTS_MASK			0xFFFF0000
+#define     CFG_SATURATION(sat)			(sat)
+#define     CFG_SATURATION_MASK			0x0000FFFF
+
+/* Video Hue Adjust Register */
+#define LCD_SPU_CBSH_HUE			0x01B4
+#define     CFG_SIN0(sin0)			((sin0) << 16)
+#define     CFG_SIN0_MASK			0xFFFF0000
+#define     CFG_COS0(con0)			(con0)
+#define     CFG_COS0_MASK			0x0000FFFF
+
+/* Dump LCD Panel Control Register */
+#define LCD_SPU_DUMB_CTRL			0x01B8
+#define     CFG_DUMBMODE(mode)			((mode) << 28)
+#define     CFG_DUMBMODE_MASK			0xF0000000
+#define     CFG_LCDGPIO_O(data)			((data) << 20)
+#define     CFG_LCDGPIO_O_MASK			0x0FF00000
+#define     CFG_LCDGPIO_ENA(gpio)		((gpio) << 12)
+#define     CFG_LCDGPIO_ENA_MASK		0x000FF000
+#define     CFG_BIAS_OUT(bias)			((bias) << 8)
+#define     CFG_BIAS_OUT_MASK			0x00000100
+#define     CFG_REVERSE_RGB(rRGB)		((rRGB) << 7)
+#define     CFG_REVERSE_RGB_MASK		0x00000080
+#define     CFG_INV_COMPBLANK(blank)		((blank) << 6)
+#define     CFG_INV_COMPBLANK_MASK		0x00000040
+#define     CFG_INV_COMPSYNC(sync)		((sync) << 5)
+#define     CFG_INV_COMPSYNC_MASK		0x00000020
+#define     CFG_INV_HENA(hena)			((hena) << 4)
+#define     CFG_INV_HENA_MASK			0x00000010
+#define     CFG_INV_VSYNC(vsync)		((vsync) << 3)
+#define     CFG_INV_VSYNC_MASK			0x00000008
+#define     CFG_INV_HSYNC(hsync)		((hsync) << 2)
+#define     CFG_INV_HSYNC_MASK			0x00000004
+#define     CFG_INV_PCLK(pclk)			((pclk) << 1)
+#define     CFG_INV_PCLK_MASK			0x00000002
+#define     CFG_DUMB_ENA(dumb)			(dumb)
+#define     CFG_DUMB_ENA_MASK			0x00000001
+
+/* LCD I/O Pads Control Register */
+#define SPU_IOPAD_CONTROL			0x01BC
+#define     CFG_GRA_VM_ENA(vm)			((vm) << 15)        /* gfx */
+#define     CFG_GRA_VM_ENA_MASK			0x00008000
+#define     CFG_DMA_VM_ENA(vm)			((vm) << 13)	/* video */
+#define     CFG_DMA_VM_ENA_MASK			0x00002000
+#define     CFG_CMD_VM_ENA(vm)			((vm) << 13)
+#define     CFG_CMD_VM_ENA_MASK			0x00000800
+#define     CFG_CSC(csc)			((csc) << 8)	/* csc */
+#define     CFG_CSC_MASK			0x00000300
+#define     CFG_AXICTRL(axi)			((axi) << 4)
+#define     CFG_AXICTRL_MASK			0x000000F0
+#define     CFG_IOPADMODE(iopad)		(iopad)
+#define     CFG_IOPADMODE_MASK			0x0000000F
+
+/* LCD Interrupt Control Register */
+#define SPU_IRQ_ENA				0x01C0
+#define     DMA_FRAME_IRQ0_ENA(irq)		((irq) << 31)
+#define     DMA_FRAME_IRQ0_ENA_MASK		0x80000000
+#define     DMA_FRAME_IRQ1_ENA(irq)		((irq) << 30)
+#define     DMA_FRAME_IRQ1_ENA_MASK		0x40000000
+#define     DMA_FF_UNDERFLOW_ENA(ff)		((ff) << 29)
+#define     DMA_FF_UNDERFLOW_ENA_MASK		0x20000000
+#define     GRA_FRAME_IRQ0_ENA(irq)		((irq) << 27)
+#define     GRA_FRAME_IRQ0_ENA_MASK		0x08000000
+#define     GRA_FRAME_IRQ1_ENA(irq)		((irq) << 26)
+#define     GRA_FRAME_IRQ1_ENA_MASK		0x04000000
+#define     GRA_FF_UNDERFLOW_ENA(ff)		((ff) << 25)
+#define     GRA_FF_UNDERFLOW_ENA_MASK		0x02000000
+#define     VSYNC_IRQ_ENA(vsync_irq)		((vsync_irq) << 23)
+#define     VSYNC_IRQ_ENA_MASK			0x00800000
+#define     DUMB_FRAMEDONE_ENA(fdone)		((fdone) << 22)
+#define     DUMB_FRAMEDONE_ENA_MASK		0x00400000
+#define     TWC_FRAMEDONE_ENA(fdone)		((fdone) << 21)
+#define     TWC_FRAMEDONE_ENA_MASK		0x00200000
+#define     HWC_FRAMEDONE_ENA(fdone)		((fdone) << 20)
+#define     HWC_FRAMEDONE_ENA_MASK		0x00100000
+#define     SLV_IRQ_ENA(irq)			((irq) << 19)
+#define     SLV_IRQ_ENA_MASK			0x00080000
+#define     SPI_IRQ_ENA(irq)			((irq) << 18)
+#define     SPI_IRQ_ENA_MASK			0x00040000
+#define     PWRDN_IRQ_ENA(irq)			((irq) << 17)
+#define     PWRDN_IRQ_ENA_MASK			0x00020000
+#define     ERR_IRQ_ENA(irq)			((irq) << 16)
+#define     ERR_IRQ_ENA_MASK			0x00010000
+#define     CLEAN_SPU_IRQ_ISR(irq)		(irq)
+#define     CLEAN_SPU_IRQ_ISR_MASK		0x0000FFFF
+
+/* LCD Interrupt Status Register */
+#define SPU_IRQ_ISR				0x01C4
+#define     DMA_FRAME_IRQ0(irq)			((irq) << 31)
+#define     DMA_FRAME_IRQ0_MASK			0x80000000
+#define     DMA_FRAME_IRQ1(irq)			((irq) << 30)
+#define     DMA_FRAME_IRQ1_MASK			0x40000000
+#define     DMA_FF_UNDERFLOW(ff)		((ff) << 29)
+#define     DMA_FF_UNDERFLOW_MASK		0x20000000
+#define     GRA_FRAME_IRQ0(irq)			((irq) << 27)
+#define     GRA_FRAME_IRQ0_MASK			0x08000000
+#define     GRA_FRAME_IRQ1(irq)			((irq) << 26)
+#define     GRA_FRAME_IRQ1_MASK			0x04000000
+#define     GRA_FF_UNDERFLOW(ff)		((ff) << 25)
+#define     GRA_FF_UNDERFLOW_MASK		0x02000000
+#define     VSYNC_IRQ(vsync_irq)		((vsync_irq) << 23)
+#define     VSYNC_IRQ_MASK			0x00800000
+#define     DUMB_FRAMEDONE(fdone)		((fdone) << 22)
+#define     DUMB_FRAMEDONE_MASK			0x00400000
+#define     TWC_FRAMEDONE(fdone)		((fdone) << 21)
+#define     TWC_FRAMEDONE_MASK			0x00200000
+#define     HWC_FRAMEDONE(fdone)		((fdone) << 20)
+#define     HWC_FRAMEDONE_MASK			0x00100000
+#define     SLV_IRQ(irq)			((irq) << 19)
+#define     SLV_IRQ_MASK			0x00080000
+#define     SPI_IRQ(irq)			((irq) << 18)
+#define     SPI_IRQ_MASK			0x00040000
+#define     PWRDN_IRQ(irq)			((irq) << 17)
+#define     PWRDN_IRQ_MASK			0x00020000
+#define     ERR_IRQ(irq)			((irq) << 16)
+#define     ERR_IRQ_MASK			0x00010000
+/* read-only */
+#define     DMA_FRAME_IRQ0_LEVEL_MASK		0x00008000
+#define     DMA_FRAME_IRQ1_LEVEL_MASK		0x00004000
+#define     DMA_FRAME_CNT_ISR_MASK		0x00003000
+#define     GRA_FRAME_IRQ0_LEVEL_MASK		0x00000800
+#define     GRA_FRAME_IRQ1_LEVEL_MASK		0x00000400
+#define     GRA_FRAME_CNT_ISR_MASK		0x00000300
+#define     VSYNC_IRQ_LEVEL_MASK		0x00000080
+#define     DUMB_FRAMEDONE_LEVEL_MASK		0x00000040
+#define     TWC_FRAMEDONE_LEVEL_MASK		0x00000020
+#define     HWC_FRAMEDONE_LEVEL_MASK		0x00000010
+#define     SLV_FF_EMPTY_MASK			0x00000008
+#define     DMA_FF_ALLEMPTY_MASK		0x00000004
+#define     GRA_FF_ALLEMPTY_MASK		0x00000002
+#define     PWRDN_IRQ_LEVEL_MASK		0x00000001
+
+
+/*
+ * defined Video Memory Color format for DMA control 0 register
+ * DMA0 bit[23:20]
+ */
+#define VMODE_RGB565		0x0
+#define VMODE_RGB1555		0x1
+#define VMODE_RGB888PACKED	0x2
+#define VMODE_RGB888UNPACKED	0x3
+#define VMODE_RGBA888		0x4
+#define VMODE_YUV422PACKED	0x5
+#define VMODE_YUV422PLANAR	0x6
+#define VMODE_YUV420PLANAR	0x7
+#define VMODE_SMPNCMD		0x8
+#define VMODE_PALETTE4BIT	0x9
+#define VMODE_PALETTE8BIT	0xa
+#define VMODE_RESERVED		0xb
+
+/*
+ * defined Graphic Memory Color format for DMA control 0 register
+ * DMA0 bit[19:16]
+ */
+#define GMODE_RGB565		0x0
+#define GMODE_RGB1555		0x1
+#define GMODE_RGB888PACKED	0x2
+#define GMODE_RGB888UNPACKED	0x3
+#define GMODE_RGBA888		0x4
+#define GMODE_YUV422PACKED	0x5
+#define GMODE_YUV422PLANAR	0x6
+#define GMODE_YUV420PLANAR	0x7
+#define GMODE_SMPNCMD		0x8
+#define GMODE_PALETTE4BIT	0x9
+#define GMODE_PALETTE8BIT	0xa
+#define GMODE_RESERVED		0xb
+
+/*
+ * define for DMA control 1 register
+ */
+#define DMA1_FRAME_TRIG		31 /* bit location */
+#define DMA1_VSYNC_MODE		28
+#define DMA1_VSYNC_INV		27
+#define DMA1_CKEY		24
+#define DMA1_CARRY		23
+#define DMA1_LNBUF_ENA		22
+#define DMA1_GATED_ENA		21
+#define DMA1_PWRDN_ENA		20
+#define DMA1_DSCALE		18
+#define DMA1_ALPHA_MODE		16
+#define DMA1_ALPHA		08
+#define DMA1_PXLCMD		00
+
+/*
+ * defined for Configure Dumb Mode
+ * DUMB LCD Panel bit[31:28]
+ */
+#define DUMB16_RGB565_0		0x0
+#define DUMB16_RGB565_1		0x1
+#define DUMB18_RGB666_0		0x2
+#define DUMB18_RGB666_1		0x3
+#define DUMB12_RGB444_0		0x4
+#define DUMB12_RGB444_1		0x5
+#define DUMB24_RGB888_0		0x6
+#define DUMB_BLANK		0x7
+
+/*
+ * defined for Configure I/O Pin Allocation Mode
+ * LCD LCD I/O Pads control register bit[3:0]
+ */
+#define IOPAD_DUMB24		0x0
+#define IOPAD_DUMB18SPI		0x1
+#define IOPAD_DUMB18GPIO	0x2
+#define IOPAD_DUMB16SPI		0x3
+#define IOPAD_DUMB16GPIO	0x4
+#define IOPAD_DUMB12		0x5
+#define IOPAD_SMART18SPI	0x6
+#define IOPAD_SMART16SPI	0x7
+#define IOPAD_SMART8BOTH	0x8
+
+#endif /* __PXA168FB_H__ */
diff --git a/include/video/pxa168fb.h b/include/video/pxa168fb.h
new file mode 100644
index 0000000..b5cc72f
--- /dev/null
+++ b/include/video/pxa168fb.h
@@ -0,0 +1,127 @@
+/*
+ * linux/arch/arm/mach-mmp/include/mach/pxa168fb.h
+ *
+ *  Copyright (C) 2009 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MACH_PXA168FB_H
+#define __ASM_MACH_PXA168FB_H
+
+#include <linux/fb.h>
+#include <linux/interrupt.h>
+
+/* Dumb interface */
+#define PIN_MODE_DUMB_24		0
+#define PIN_MODE_DUMB_18_SPI		1
+#define PIN_MODE_DUMB_18_GPIO		2
+#define PIN_MODE_DUMB_16_SPI		3
+#define PIN_MODE_DUMB_16_GPIO		4
+#define PIN_MODE_DUMB_12_SPI_GPIO	5
+#define PIN_MODE_SMART_18_SPI		6
+#define PIN_MODE_SMART_16_SPI		7
+#define PIN_MODE_SMART_8_SPI_GPIO	8
+
+/* Dumb interface pin allocation */
+#define DUMB_MODE_RGB565		0
+#define DUMB_MODE_RGB565_UPPER		1
+#define DUMB_MODE_RGB666		2
+#define DUMB_MODE_RGB666_UPPER		3
+#define DUMB_MODE_RGB444		4
+#define DUMB_MODE_RGB444_UPPER		5
+#define DUMB_MODE_RGB888		6
+
+/* default fb buffer size WVGA-32bits */
+#define DEFAULT_FB_SIZE	(800 * 480 * 4)
+
+/*
+ * Buffer pixel format
+ * bit0 is for rb swap.
+ * bit12 is for Y UorV swap
+ */
+#define PIX_FMT_RGB565		0
+#define PIX_FMT_BGR565		1
+#define PIX_FMT_RGB1555		2
+#define PIX_FMT_BGR1555		3
+#define PIX_FMT_RGB888PACK	4
+#define PIX_FMT_BGR888PACK	5
+#define PIX_FMT_RGB888UNPACK	6
+#define PIX_FMT_BGR888UNPACK	7
+#define PIX_FMT_RGBA888		8
+#define PIX_FMT_BGRA888		9
+#define PIX_FMT_YUV422PACK	10
+#define PIX_FMT_YVU422PACK	11
+#define PIX_FMT_YUV422PLANAR	12
+#define PIX_FMT_YVU422PLANAR	13
+#define PIX_FMT_YUV420PLANAR	14
+#define PIX_FMT_YVU420PLANAR	15
+#define PIX_FMT_PSEUDOCOLOR	20
+#define PIX_FMT_UYVY422PACK	(0x1000|PIX_FMT_YUV422PACK)
+
+/*
+ * PXA LCD controller private state.
+ */
+struct pxa168fb_info {
+	struct device		*dev;
+	struct clk		*clk;
+	struct fb_info		*info;
+
+	void __iomem		*reg_base;
+	dma_addr_t		fb_start_dma;
+	u32			pseudo_palette[16];
+
+	int			pix_fmt;
+	unsigned		is_blanked:1;
+	unsigned		panel_rbswap:1;
+	unsigned		active:1;
+};
+
+/*
+ * PXA fb machine information
+ */
+struct pxa168fb_mach_info {
+	char	id[16];
+
+	int		num_modes;
+	struct fb_videomode *modes;
+
+	/*
+	 * Pix_fmt
+	 */
+	unsigned	pix_fmt;
+
+	/*
+	 * I/O pin allocation.
+	 */
+	unsigned	io_pin_allocation_mode:4;
+
+	/*
+	 * Dumb panel -- assignment of R/G/B component info to the 24
+	 * available external data lanes.
+	 */
+	unsigned	dumb_mode:4;
+	unsigned	panel_rgb_reverse_lanes:1;
+
+	/*
+	 * Dumb panel -- GPIO output data.
+	 */
+	unsigned	gpio_output_mask:8;
+	unsigned	gpio_output_data:8;
+
+	/*
+	 * Dumb panel -- configurable output signal polarity.
+	 */
+	unsigned	invert_composite_blank:1;
+	unsigned	invert_pix_val_ena:1;
+	unsigned	invert_pixclock:1;
+	unsigned	invert_vsync:1;
+	unsigned	invert_hsync:1;
+	unsigned	panel_rbswap:1;
+	unsigned	active:1;
+	unsigned	enable_lcd:1;
+};
+
+#endif /* __ASM_MACH_PXA168FB_H */
-- 
cgit v0.10.2


From 7517b3fbe40c231d79d36f31c1e9930cbb8c4be2 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Sat, 13 Jun 2009 00:10:17 +0800
Subject: MAINTAINERS: update Eric Miao's email address and status

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index b1c449f..9146a1c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4571,7 +4571,7 @@ F:	drivers/media/video/pvrusb2/
 
 PXA2xx/PXA3xx SUPPORT
 P:	Eric Miao
-M:	eric.miao@marvell.com
+M:	eric.y.miao@gmail.com
 P:	Russell King
 M:	linux@arm.linux.org.uk
 L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
@@ -4585,19 +4585,19 @@ F:	sound/soc/pxa/pxa2xx*
 
 PXA168 SUPPORT
 P:	Eric Miao
-M:	eric.miao@marvell.com
+M:	eric.y.miao@gmail.com
 P:	Jason Chagas
 M:	jason.chagas@marvell.com
 L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
-S:	Supported
+S:	Maintained
 
 PXA910 SUPPORT
 P:	Eric Miao
-M:	eric.miao@marvell.com
+M:	eric.y.miao@gmail.com
 L:	linux-arm-kernel@lists.arm.linux.org.uk	(subscribers-only)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
-S:	Supported
+S:	Maintained
 
 PXA MMCI DRIVER
 S:	Orphan
-- 
cgit v0.10.2


From 018df72dd01576ab199c6129233cdeaf1409958b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 12 Jun 2009 13:17:43 -0400
Subject: perf_counter: Start documenting HAVE_PERF_COUNTERS requirements

Help out arch porters who want to support perf counters by listing some
basic requirements.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1244827063-24046-1-git-send-email-vapier@gentoo.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/init/Kconfig b/init/Kconfig
index c649657..d3a5096 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -936,6 +936,8 @@ config AIO
 
 config HAVE_PERF_COUNTERS
 	bool
+	help
+	  See tools/perf/design.txt for details.
 
 menu "Performance Counters"
 
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 860e116..f71e0d2 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -440,3 +440,18 @@ by this process or by another, and doesn't affect any counters that
 this process has created on other processes.  It only enables or
 disables the group leaders, not any other members in the groups.
 
+
+Arch requirements
+-----------------
+
+If your architecture does not have hardware performance metrics, you can
+still use the generic software counters based on hrtimers for sampling.
+
+So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
+will need at least this:
+	- asm/perf_counter.h - a basic stub will suffice at first
+	- support for atomic64 types (and associated helper functions)
+	- set_perf_counter_pending() implemented
+
+If your architecture does have hardware capabilities, you can override the
+weak stub hw_perf_counter_init() to register hardware counters.
-- 
cgit v0.10.2


From ce4b3c55475e451cb489e857640396c37ca88974 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 18 Apr 2009 13:44:57 +0200
Subject: PM/ACPI/x86: Fix sparse warning in arch/x86/kernel/acpi/sleep.c

One of the numbers in arch/x86/kernel/acpi/sleep.c is long, but it is
not annotated appropriately, so sparese warns about it.  Fix that.

[rjw: added the changelog.]

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 7c243a2..ca93638 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -104,7 +104,7 @@ int acpi_save_state_mem(void)
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
-	saved_magic = 0x123456789abcdef0;
+       saved_magic = 0x123456789abcdef0L;
 #endif /* CONFIG_64BIT */
 
 	return 0;
-- 
cgit v0.10.2


From 62b0124761b9c2e304ee07dcf4db46c4a3dfec11 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 18 Apr 2009 13:45:13 +0200
Subject: PM: Warn if interrupts are enabled during suspend-resume of sysdevs

Sysdevs have to be suspended and resumed with interrupts disabled and
things usually break in a way that's difficult to debug if one of
sysdev drivers enables interrupts by mistake during suspend or
resume.  Add extra checks that will generate warnings in such cases.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 3236b43..9742a78 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -343,11 +343,15 @@ static void __sysdev_resume(struct sys_device *dev)
 	/* First, call the class-specific one */
 	if (cls->resume)
 		cls->resume(dev);
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled after %pF\n", cls->resume);
 
 	/* Call auxillary drivers next. */
 	list_for_each_entry(drv, &cls->drivers, entry) {
 		if (drv->resume)
 			drv->resume(dev);
+		WARN_ONCE(!irqs_disabled(),
+			"Interrupts enabled after %pF\n", drv->resume);
 	}
 }
 
@@ -377,6 +381,9 @@ int sysdev_suspend(pm_message_t state)
 	if (ret)
 		return ret;
 
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled while suspending system devices\n");
+
 	pr_debug("Suspending System Devices\n");
 
 	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
@@ -393,6 +400,9 @@ int sysdev_suspend(pm_message_t state)
 					if (ret)
 						goto aux_driver;
 				}
+				WARN_ONCE(!irqs_disabled(),
+					"Interrupts enabled after %pF\n",
+					drv->suspend);
 			}
 
 			/* Now call the generic one */
@@ -400,6 +410,9 @@ int sysdev_suspend(pm_message_t state)
 				ret = cls->suspend(sysdev, state);
 				if (ret)
 					goto cls_driver;
+				WARN_ONCE(!irqs_disabled(),
+					"Interrupts enabled after %pF\n",
+					cls->suspend);
 			}
 		}
 	}
@@ -452,6 +465,9 @@ int sysdev_resume(void)
 {
 	struct sysdev_class *cls;
 
+	WARN_ONCE(!irqs_disabled(),
+		"Interrupts enabled while resuming system devices\n");
+
 	pr_debug("Resuming System Devices\n");
 
 	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
-- 
cgit v0.10.2


From f6783d20d4b85b360b4a6f86bdbd9282a4a7004c Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Tue, 28 Apr 2009 00:26:22 +0200
Subject: x86: unify power/cpu_(32|64) headers

First step towards the unification of cpu_32.c and cpu_64.c.
This commit unifies the headers of such files, making both
of them use the same header files. It also remove the uneeded
<module.h>.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Lauro Salmito <laurosalmito@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index ce702c5..12a9c87 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -7,9 +7,13 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
 
-#include <linux/module.h>
 #include <linux/suspend.h>
+#include <linux/smp.h>
+
+#include <asm/pgtable.h>
+#include <asm/proto.h>
 #include <asm/mtrr.h>
+#include <asm/page.h>
 #include <asm/mce.h>
 #include <asm/xcr.h>
 #include <asm/suspend.h>
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 5343540..39b27b7 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -8,12 +8,14 @@
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
 
-#include <linux/smp.h>
 #include <linux/suspend.h>
-#include <asm/proto.h>
-#include <asm/page.h>
+#include <linux/smp.h>
+
 #include <asm/pgtable.h>
+#include <asm/proto.h>
 #include <asm/mtrr.h>
+#include <asm/page.h>
+#include <asm/mce.h>
 #include <asm/xcr.h>
 #include <asm/suspend.h>
 
-- 
cgit v0.10.2


From 833b2ca0795526898a66c7b6770273bb16567e19 Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Tue, 28 Apr 2009 00:26:50 +0200
Subject: x86: unify power/cpu_(32|64) global variables

Aiming total unification of cpu_32.c and cpu_64.c, in this step
we do unify the global variables and existing forward declarations
for such files.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Lauro Salmito <laurosalmito@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 12a9c87..de1a86b 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -18,12 +18,19 @@
 #include <asm/xcr.h>
 #include <asm/suspend.h>
 
+#ifdef CONFIG_X86_32
 static struct saved_context saved_context;
 
 unsigned long saved_context_ebx;
 unsigned long saved_context_esp, saved_context_ebp;
 unsigned long saved_context_esi, saved_context_edi;
 unsigned long saved_context_eflags;
+#else
+/* CONFIG_X86_64 */
+static void fix_processor_context(void);
+
+struct saved_context saved_context;
+#endif
 
 static void __save_processor_state(struct saved_context *ctxt)
 {
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 39b27b7..6ce0eca 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -19,9 +19,19 @@
 #include <asm/xcr.h>
 #include <asm/suspend.h>
 
+#ifdef CONFIG_X86_32
+static struct saved_context saved_context;
+
+unsigned long saved_context_ebx;
+unsigned long saved_context_esp, saved_context_ebp;
+unsigned long saved_context_esi, saved_context_edi;
+unsigned long saved_context_eflags;
+#else
+/* CONFIG_X86_64 */
 static void fix_processor_context(void);
 
 struct saved_context saved_context;
+#endif
 
 /**
  *	__save_processor_state - save CPU registers before creating a
-- 
cgit v0.10.2


From f9ebbe53e79c5978d0e8ead0843a3717b41ad3d5 Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Tue, 28 Apr 2009 00:27:00 +0200
Subject: x86: unify power/cpu_(32|64) regarding saving processor state

In this step we do unify cpu_32.c and cpu_64.c functions that
work on saving the processor state.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Lauro Salmito <laurosalmito@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index de1a86b..294e78b 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -32,25 +32,65 @@ static void fix_processor_context(void);
 struct saved_context saved_context;
 #endif
 
+/**
+ *	__save_processor_state - save CPU registers before creating a
+ *		hibernation image and before restoring the memory state from it
+ *	@ctxt - structure to store the registers contents in
+ *
+ *	NOTE: If there is a CPU register the modification of which by the
+ *	boot kernel (ie. the kernel used for loading the hibernation image)
+ *	might affect the operations of the restored target kernel (ie. the one
+ *	saved in the hibernation image), then its contents must be saved by this
+ *	function.  In other words, if kernel A is hibernated and different
+ *	kernel B is used for loading the hibernation image into memory, the
+ *	kernel A's __save_processor_state() function must save all registers
+ *	needed by kernel A, so that it can operate correctly after the resume
+ *	regardless of what kernel B does in the meantime.
+ */
 static void __save_processor_state(struct saved_context *ctxt)
 {
+#ifdef CONFIG_X86_32
 	mtrr_save_fixed_ranges(NULL);
+#endif
 	kernel_fpu_begin();
 
 	/*
 	 * descriptor tables
 	 */
+#ifdef CONFIG_X86_32
 	store_gdt(&ctxt->gdt);
 	store_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
+	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
+	store_idt((struct desc_ptr *)&ctxt->idt_limit);
+#endif
 	store_tr(ctxt->tr);
 
+	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
 	/*
 	 * segment registers
 	 */
+#ifdef CONFIG_X86_32
 	savesegment(es, ctxt->es);
 	savesegment(fs, ctxt->fs);
 	savesegment(gs, ctxt->gs);
 	savesegment(ss, ctxt->ss);
+#else
+/* CONFIG_X86_64 */
+	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
+	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
+	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
+	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
+	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+
+	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
+	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	mtrr_save_fixed_ranges(NULL);
+
+	rdmsrl(MSR_EFER, ctxt->efer);
+#endif
 
 	/*
 	 * control registers
@@ -58,7 +98,13 @@ static void __save_processor_state(struct saved_context *ctxt)
 	ctxt->cr0 = read_cr0();
 	ctxt->cr2 = read_cr2();
 	ctxt->cr3 = read_cr3();
+#ifdef CONFIG_X86_32
 	ctxt->cr4 = read_cr4_safe();
+#else
+/* CONFIG_X86_64 */
+	ctxt->cr4 = read_cr4();
+	ctxt->cr8 = read_cr8();
+#endif
 }
 
 /* Needed by apm.c */
@@ -66,7 +112,9 @@ void save_processor_state(void)
 {
 	__save_processor_state(&saved_context);
 }
+#ifdef CONFIG_X86_32
 EXPORT_SYMBOL(save_processor_state);
+#endif
 
 static void do_fpu_end(void)
 {
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 6ce0eca..11ea7d0 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -50,19 +50,35 @@ struct saved_context saved_context;
  */
 static void __save_processor_state(struct saved_context *ctxt)
 {
+#ifdef CONFIG_X86_32
+	mtrr_save_fixed_ranges(NULL);
+#endif
 	kernel_fpu_begin();
 
 	/*
 	 * descriptor tables
 	 */
+#ifdef CONFIG_X86_32
+	store_gdt(&ctxt->gdt);
+	store_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
 	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
 	store_idt((struct desc_ptr *)&ctxt->idt_limit);
+#endif
 	store_tr(ctxt->tr);
 
 	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
 	/*
 	 * segment registers
 	 */
+#ifdef CONFIG_X86_32
+	savesegment(es, ctxt->es);
+	savesegment(fs, ctxt->fs);
+	savesegment(gs, ctxt->gs);
+	savesegment(ss, ctxt->ss);
+#else
+/* CONFIG_X86_64 */
 	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
 	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
 	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
@@ -74,21 +90,32 @@ static void __save_processor_state(struct saved_context *ctxt)
 	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 	mtrr_save_fixed_ranges(NULL);
 
+	rdmsrl(MSR_EFER, ctxt->efer);
+#endif
+
 	/*
 	 * control registers
 	 */
-	rdmsrl(MSR_EFER, ctxt->efer);
 	ctxt->cr0 = read_cr0();
 	ctxt->cr2 = read_cr2();
 	ctxt->cr3 = read_cr3();
+#ifdef CONFIG_X86_32
+	ctxt->cr4 = read_cr4_safe();
+#else
+/* CONFIG_X86_64 */
 	ctxt->cr4 = read_cr4();
 	ctxt->cr8 = read_cr8();
+#endif
 }
 
+/* Needed by apm.c */
 void save_processor_state(void)
 {
 	__save_processor_state(&saved_context);
 }
+#ifdef CONFIG_X86_32
+EXPORT_SYMBOL(save_processor_state);
+#endif
 
 static void do_fpu_end(void)
 {
-- 
cgit v0.10.2


From 3134d04b7790f7239b221f16c2d97db4d96ac3c0 Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Tue, 28 Apr 2009 00:27:05 +0200
Subject: x86: unify power/cpu_(32|64) regarding restoring processor state

In this step we do unify cpu_32.c and cpu_64.c functions that
work on restoring the saved processor state. Also, we do
eliminate the forward declaration of fix_processor_context()
for X86_64, as it's not needed anymore.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Lauro Salmito <laurosalmito@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 294e78b..29b9c0a 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -27,8 +27,6 @@ unsigned long saved_context_esi, saved_context_edi;
 unsigned long saved_context_eflags;
 #else
 /* CONFIG_X86_64 */
-static void fix_processor_context(void);
-
 struct saved_context saved_context;
 #endif
 
@@ -136,6 +134,11 @@ static void fix_processor_context(void)
 				 * similar stupidity.
 				 */
 
+#ifdef CONFIG_X86_64
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
+
+	syscall_init();				/* This sets MSR_*STAR and related */
+#endif
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
 
@@ -143,6 +146,7 @@ static void fix_processor_context(void)
 	 * Now maybe reload the debug registers
 	 */
 	if (current->thread.debugreg7) {
+#ifdef CONFIG_X86_32
 		set_debugreg(current->thread.debugreg0, 0);
 		set_debugreg(current->thread.debugreg1, 1);
 		set_debugreg(current->thread.debugreg2, 2);
@@ -150,18 +154,40 @@ static void fix_processor_context(void)
 		/* no 4 and 5 */
 		set_debugreg(current->thread.debugreg6, 6);
 		set_debugreg(current->thread.debugreg7, 7);
+#else
+		/* CONFIG_X86_64 */
+		loaddebug(&current->thread, 0);
+		loaddebug(&current->thread, 1);
+		loaddebug(&current->thread, 2);
+		loaddebug(&current->thread, 3);
+		/* no 4 and 5 */
+		loaddebug(&current->thread, 6);
+		loaddebug(&current->thread, 7);
+#endif
 	}
 
 }
 
+/**
+ *	__restore_processor_state - restore the contents of CPU registers saved
+ *		by __save_processor_state()
+ *	@ctxt - structure to load the registers contents from
+ */
 static void __restore_processor_state(struct saved_context *ctxt)
 {
 	/*
 	 * control registers
 	 */
 	/* cr4 was introduced in the Pentium CPU */
+#ifdef CONFIG_X86_32
 	if (ctxt->cr4)
 		write_cr4(ctxt->cr4);
+#else
+/* CONFIG X86_64 */
+	wrmsrl(MSR_EFER, ctxt->efer);
+	write_cr8(ctxt->cr8);
+	write_cr4(ctxt->cr4);
+#endif
 	write_cr3(ctxt->cr3);
 	write_cr2(ctxt->cr2);
 	write_cr0(ctxt->cr0);
@@ -170,12 +196,19 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
+#ifdef CONFIG_X86_32
 	load_gdt(&ctxt->gdt);
 	load_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
+	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
+	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+#endif
 
 	/*
 	 * segment registers
 	 */
+#ifdef CONFIG_X86_32
 	loadsegment(es, ctxt->es);
 	loadsegment(fs, ctxt->fs);
 	loadsegment(gs, ctxt->gs);
@@ -186,6 +219,18 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	 */
 	if (boot_cpu_has(X86_FEATURE_SEP))
 		enable_sep_cpu();
+#else
+/* CONFIG_X86_64 */
+	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
+	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
+	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
+	load_gs_index(ctxt->gs);
+	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
+
+	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
+	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+#endif
 
 	/*
 	 * restore XCR0 for xsave capable cpu's.
@@ -194,9 +239,13 @@ static void __restore_processor_state(struct saved_context *ctxt)
 		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
 
 	fix_processor_context();
+
 	do_fpu_end();
 	mtrr_ap_init();
+
+#ifdef CONFIG_X86_32
 	mcheck_init(&boot_cpu_data);
+#endif
 }
 
 /* Needed by apm.c */
@@ -204,4 +253,6 @@ void restore_processor_state(void)
 {
 	__restore_processor_state(&saved_context);
 }
+#ifdef CONFIG_X86_32
 EXPORT_SYMBOL(restore_processor_state);
+#endif
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 11ea7d0..5c8bdc0 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -28,8 +28,6 @@ unsigned long saved_context_esi, saved_context_edi;
 unsigned long saved_context_eflags;
 #else
 /* CONFIG_X86_64 */
-static void fix_processor_context(void);
-
 struct saved_context saved_context;
 #endif
 
@@ -120,11 +118,57 @@ EXPORT_SYMBOL(save_processor_state);
 static void do_fpu_end(void)
 {
 	/*
-	 * Restore FPU regs if necessary
+	 * Restore FPU regs if necessary.
 	 */
 	kernel_fpu_end();
 }
 
+static void fix_processor_context(void)
+{
+	int cpu = smp_processor_id();
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
+
+	set_tss_desc(cpu, t);	/*
+				 * This just modifies memory; should not be
+				 * necessary. But... This is necessary, because
+				 * 386 hardware has concept of busy TSS or some
+				 * similar stupidity.
+				 */
+
+#ifdef CONFIG_X86_64
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
+
+	syscall_init();				/* This sets MSR_*STAR and related */
+#endif
+	load_TR_desc();				/* This does ltr */
+	load_LDT(&current->active_mm->context);	/* This does lldt */
+
+	/*
+	 * Now maybe reload the debug registers
+	 */
+	if (current->thread.debugreg7) {
+#ifdef CONFIG_X86_32
+		set_debugreg(current->thread.debugreg0, 0);
+		set_debugreg(current->thread.debugreg1, 1);
+		set_debugreg(current->thread.debugreg2, 2);
+		set_debugreg(current->thread.debugreg3, 3);
+		/* no 4 and 5 */
+		set_debugreg(current->thread.debugreg6, 6);
+		set_debugreg(current->thread.debugreg7, 7);
+#else
+		/* CONFIG_X86_64 */
+		loaddebug(&current->thread, 0);
+		loaddebug(&current->thread, 1);
+		loaddebug(&current->thread, 2);
+		loaddebug(&current->thread, 3);
+		/* no 4 and 5 */
+		loaddebug(&current->thread, 6);
+		loaddebug(&current->thread, 7);
+#endif
+	}
+
+}
+
 /**
  *	__restore_processor_state - restore the contents of CPU registers saved
  *		by __save_processor_state()
@@ -135,9 +179,16 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	/*
 	 * control registers
 	 */
+	/* cr4 was introduced in the Pentium CPU */
+#ifdef CONFIG_X86_32
+	if (ctxt->cr4)
+		write_cr4(ctxt->cr4);
+#else
+/* CONFIG X86_64 */
 	wrmsrl(MSR_EFER, ctxt->efer);
 	write_cr8(ctxt->cr8);
 	write_cr4(ctxt->cr4);
+#endif
 	write_cr3(ctxt->cr3);
 	write_cr2(ctxt->cr2);
 	write_cr0(ctxt->cr0);
@@ -146,13 +197,31 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
+#ifdef CONFIG_X86_32
+	load_gdt(&ctxt->gdt);
+	load_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
 	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
 	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-
+#endif
 
 	/*
 	 * segment registers
 	 */
+#ifdef CONFIG_X86_32
+	loadsegment(es, ctxt->es);
+	loadsegment(fs, ctxt->fs);
+	loadsegment(gs, ctxt->gs);
+	loadsegment(ss, ctxt->ss);
+
+	/*
+	 * sysenter MSRs
+	 */
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu();
+#else
+/* CONFIG_X86_64 */
 	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
 	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
 	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
@@ -162,6 +231,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
 	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+#endif
 
 	/*
 	 * restore XCR0 for xsave capable cpu's.
@@ -173,41 +243,17 @@ static void __restore_processor_state(struct saved_context *ctxt)
 
 	do_fpu_end();
 	mtrr_ap_init();
+
+#ifdef CONFIG_X86_32
+	mcheck_init(&boot_cpu_data);
+#endif
 }
 
+/* Needed by apm.c */
 void restore_processor_state(void)
 {
 	__restore_processor_state(&saved_context);
 }
-
-static void fix_processor_context(void)
-{
-	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-
-	/*
-	 * This just modifies memory; should not be necessary. But... This
-	 * is necessary, because 386 hardware has concept of busy TSS or some
-	 * similar stupidity.
-	 */
-	set_tss_desc(cpu, t);
-
-	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
-
-	syscall_init();                         /* This sets MSR_*STAR and related */
-	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	if (current->thread.debugreg7){
-                loaddebug(&current->thread, 0);
-                loaddebug(&current->thread, 1);
-                loaddebug(&current->thread, 2);
-                loaddebug(&current->thread, 3);
-                /* no 4 and 5 */
-                loaddebug(&current->thread, 6);
-                loaddebug(&current->thread, 7);
-	}
-}
+#ifdef CONFIG_X86_32
+EXPORT_SYMBOL(restore_processor_state);
+#endif
-- 
cgit v0.10.2


From 6d48becd33a7921694ba1955ba91604d648020f1 Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Tue, 28 Apr 2009 00:27:18 +0200
Subject: x86: unify power/cpu_(32|64) copyright notes

In this step, we do unify the copyright notes for both files
cpu_32.c and cpu_64.c, making such files exactly the same.
It's the last step before the actual unification, that will
rename one of them to cpu.c and remove the other one.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Lauro Salmito <laurosalmito@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 29b9c0a..ca0ecae 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -1,8 +1,9 @@
 /*
- * Suspend support specific for i386.
+ * Suspend and hibernation support for i386/x86-64.
  *
  * Distribute under GPLv2
  *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
  * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 5c8bdc0..d277ef1 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -1,5 +1,5 @@
 /*
- * Suspend and hibernation support for x86-64
+ * Suspend support specific for i386/x86-64.
  *
  * Distribute under GPLv2
  *
-- 
cgit v0.10.2


From 08687aec71bc9134fe336e561f6877bacf74fc0a Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Tue, 28 Apr 2009 00:27:22 +0200
Subject: x86: unify power/cpu_(32|64).c

This is the last unification step. Here we do remove one of the files
and rename the left one as cpu.c, as both are now the same.
Also update power/Makefile, telling it to build cpu.o, instead of
cpu_(32|64).o

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Lauro Salmito <laurosalmito@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index 58b32db..de2abbd 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -3,5 +3,5 @@
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_cpu_$(BITS).o	:= $(nostackp)
 
-obj-$(CONFIG_PM_SLEEP)		+= cpu_$(BITS).o
+obj-$(CONFIG_PM_SLEEP)		+= cpu.o
 obj-$(CONFIG_HIBERNATION)	+= hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
new file mode 100644
index 0000000..d277ef1
--- /dev/null
+++ b/arch/x86/power/cpu.c
@@ -0,0 +1,259 @@
+/*
+ * Suspend support specific for i386/x86-64.
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/suspend.h>
+#include <linux/smp.h>
+
+#include <asm/pgtable.h>
+#include <asm/proto.h>
+#include <asm/mtrr.h>
+#include <asm/page.h>
+#include <asm/mce.h>
+#include <asm/xcr.h>
+#include <asm/suspend.h>
+
+#ifdef CONFIG_X86_32
+static struct saved_context saved_context;
+
+unsigned long saved_context_ebx;
+unsigned long saved_context_esp, saved_context_ebp;
+unsigned long saved_context_esi, saved_context_edi;
+unsigned long saved_context_eflags;
+#else
+/* CONFIG_X86_64 */
+struct saved_context saved_context;
+#endif
+
+/**
+ *	__save_processor_state - save CPU registers before creating a
+ *		hibernation image and before restoring the memory state from it
+ *	@ctxt - structure to store the registers contents in
+ *
+ *	NOTE: If there is a CPU register the modification of which by the
+ *	boot kernel (ie. the kernel used for loading the hibernation image)
+ *	might affect the operations of the restored target kernel (ie. the one
+ *	saved in the hibernation image), then its contents must be saved by this
+ *	function.  In other words, if kernel A is hibernated and different
+ *	kernel B is used for loading the hibernation image into memory, the
+ *	kernel A's __save_processor_state() function must save all registers
+ *	needed by kernel A, so that it can operate correctly after the resume
+ *	regardless of what kernel B does in the meantime.
+ */
+static void __save_processor_state(struct saved_context *ctxt)
+{
+#ifdef CONFIG_X86_32
+	mtrr_save_fixed_ranges(NULL);
+#endif
+	kernel_fpu_begin();
+
+	/*
+	 * descriptor tables
+	 */
+#ifdef CONFIG_X86_32
+	store_gdt(&ctxt->gdt);
+	store_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
+	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
+	store_idt((struct desc_ptr *)&ctxt->idt_limit);
+#endif
+	store_tr(ctxt->tr);
+
+	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
+	/*
+	 * segment registers
+	 */
+#ifdef CONFIG_X86_32
+	savesegment(es, ctxt->es);
+	savesegment(fs, ctxt->fs);
+	savesegment(gs, ctxt->gs);
+	savesegment(ss, ctxt->ss);
+#else
+/* CONFIG_X86_64 */
+	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
+	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
+	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
+	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
+	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+
+	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
+	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+	mtrr_save_fixed_ranges(NULL);
+
+	rdmsrl(MSR_EFER, ctxt->efer);
+#endif
+
+	/*
+	 * control registers
+	 */
+	ctxt->cr0 = read_cr0();
+	ctxt->cr2 = read_cr2();
+	ctxt->cr3 = read_cr3();
+#ifdef CONFIG_X86_32
+	ctxt->cr4 = read_cr4_safe();
+#else
+/* CONFIG_X86_64 */
+	ctxt->cr4 = read_cr4();
+	ctxt->cr8 = read_cr8();
+#endif
+}
+
+/* Needed by apm.c */
+void save_processor_state(void)
+{
+	__save_processor_state(&saved_context);
+}
+#ifdef CONFIG_X86_32
+EXPORT_SYMBOL(save_processor_state);
+#endif
+
+static void do_fpu_end(void)
+{
+	/*
+	 * Restore FPU regs if necessary.
+	 */
+	kernel_fpu_end();
+}
+
+static void fix_processor_context(void)
+{
+	int cpu = smp_processor_id();
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
+
+	set_tss_desc(cpu, t);	/*
+				 * This just modifies memory; should not be
+				 * necessary. But... This is necessary, because
+				 * 386 hardware has concept of busy TSS or some
+				 * similar stupidity.
+				 */
+
+#ifdef CONFIG_X86_64
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
+
+	syscall_init();				/* This sets MSR_*STAR and related */
+#endif
+	load_TR_desc();				/* This does ltr */
+	load_LDT(&current->active_mm->context);	/* This does lldt */
+
+	/*
+	 * Now maybe reload the debug registers
+	 */
+	if (current->thread.debugreg7) {
+#ifdef CONFIG_X86_32
+		set_debugreg(current->thread.debugreg0, 0);
+		set_debugreg(current->thread.debugreg1, 1);
+		set_debugreg(current->thread.debugreg2, 2);
+		set_debugreg(current->thread.debugreg3, 3);
+		/* no 4 and 5 */
+		set_debugreg(current->thread.debugreg6, 6);
+		set_debugreg(current->thread.debugreg7, 7);
+#else
+		/* CONFIG_X86_64 */
+		loaddebug(&current->thread, 0);
+		loaddebug(&current->thread, 1);
+		loaddebug(&current->thread, 2);
+		loaddebug(&current->thread, 3);
+		/* no 4 and 5 */
+		loaddebug(&current->thread, 6);
+		loaddebug(&current->thread, 7);
+#endif
+	}
+
+}
+
+/**
+ *	__restore_processor_state - restore the contents of CPU registers saved
+ *		by __save_processor_state()
+ *	@ctxt - structure to load the registers contents from
+ */
+static void __restore_processor_state(struct saved_context *ctxt)
+{
+	/*
+	 * control registers
+	 */
+	/* cr4 was introduced in the Pentium CPU */
+#ifdef CONFIG_X86_32
+	if (ctxt->cr4)
+		write_cr4(ctxt->cr4);
+#else
+/* CONFIG X86_64 */
+	wrmsrl(MSR_EFER, ctxt->efer);
+	write_cr8(ctxt->cr8);
+	write_cr4(ctxt->cr4);
+#endif
+	write_cr3(ctxt->cr3);
+	write_cr2(ctxt->cr2);
+	write_cr0(ctxt->cr0);
+
+	/*
+	 * now restore the descriptor tables to their proper values
+	 * ltr is done i fix_processor_context().
+	 */
+#ifdef CONFIG_X86_32
+	load_gdt(&ctxt->gdt);
+	load_idt(&ctxt->idt);
+#else
+/* CONFIG_X86_64 */
+	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
+	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+#endif
+
+	/*
+	 * segment registers
+	 */
+#ifdef CONFIG_X86_32
+	loadsegment(es, ctxt->es);
+	loadsegment(fs, ctxt->fs);
+	loadsegment(gs, ctxt->gs);
+	loadsegment(ss, ctxt->ss);
+
+	/*
+	 * sysenter MSRs
+	 */
+	if (boot_cpu_has(X86_FEATURE_SEP))
+		enable_sep_cpu();
+#else
+/* CONFIG_X86_64 */
+	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
+	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
+	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
+	load_gs_index(ctxt->gs);
+	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
+
+	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
+	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+#endif
+
+	/*
+	 * restore XCR0 for xsave capable cpu's.
+	 */
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
+	fix_processor_context();
+
+	do_fpu_end();
+	mtrr_ap_init();
+
+#ifdef CONFIG_X86_32
+	mcheck_init(&boot_cpu_data);
+#endif
+}
+
+/* Needed by apm.c */
+void restore_processor_state(void)
+{
+	__restore_processor_state(&saved_context);
+}
+#ifdef CONFIG_X86_32
+EXPORT_SYMBOL(restore_processor_state);
+#endif
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
deleted file mode 100644
index ca0ecae..0000000
--- a/arch/x86/power/cpu_32.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Suspend and hibernation support for i386/x86-64.
- *
- * Distribute under GPLv2
- *
- * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
- * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
- * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
- */
-
-#include <linux/suspend.h>
-#include <linux/smp.h>
-
-#include <asm/pgtable.h>
-#include <asm/proto.h>
-#include <asm/mtrr.h>
-#include <asm/page.h>
-#include <asm/mce.h>
-#include <asm/xcr.h>
-#include <asm/suspend.h>
-
-#ifdef CONFIG_X86_32
-static struct saved_context saved_context;
-
-unsigned long saved_context_ebx;
-unsigned long saved_context_esp, saved_context_ebp;
-unsigned long saved_context_esi, saved_context_edi;
-unsigned long saved_context_eflags;
-#else
-/* CONFIG_X86_64 */
-struct saved_context saved_context;
-#endif
-
-/**
- *	__save_processor_state - save CPU registers before creating a
- *		hibernation image and before restoring the memory state from it
- *	@ctxt - structure to store the registers contents in
- *
- *	NOTE: If there is a CPU register the modification of which by the
- *	boot kernel (ie. the kernel used for loading the hibernation image)
- *	might affect the operations of the restored target kernel (ie. the one
- *	saved in the hibernation image), then its contents must be saved by this
- *	function.  In other words, if kernel A is hibernated and different
- *	kernel B is used for loading the hibernation image into memory, the
- *	kernel A's __save_processor_state() function must save all registers
- *	needed by kernel A, so that it can operate correctly after the resume
- *	regardless of what kernel B does in the meantime.
- */
-static void __save_processor_state(struct saved_context *ctxt)
-{
-#ifdef CONFIG_X86_32
-	mtrr_save_fixed_ranges(NULL);
-#endif
-	kernel_fpu_begin();
-
-	/*
-	 * descriptor tables
-	 */
-#ifdef CONFIG_X86_32
-	store_gdt(&ctxt->gdt);
-	store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
-	store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
-	store_tr(ctxt->tr);
-
-	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
-	/*
-	 * segment registers
-	 */
-#ifdef CONFIG_X86_32
-	savesegment(es, ctxt->es);
-	savesegment(fs, ctxt->fs);
-	savesegment(gs, ctxt->gs);
-	savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
-	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
-	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
-	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
-	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
-
-	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
-	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
-	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
-	mtrr_save_fixed_ranges(NULL);
-
-	rdmsrl(MSR_EFER, ctxt->efer);
-#endif
-
-	/*
-	 * control registers
-	 */
-	ctxt->cr0 = read_cr0();
-	ctxt->cr2 = read_cr2();
-	ctxt->cr3 = read_cr3();
-#ifdef CONFIG_X86_32
-	ctxt->cr4 = read_cr4_safe();
-#else
-/* CONFIG_X86_64 */
-	ctxt->cr4 = read_cr4();
-	ctxt->cr8 = read_cr8();
-#endif
-}
-
-/* Needed by apm.c */
-void save_processor_state(void)
-{
-	__save_processor_state(&saved_context);
-}
-#ifdef CONFIG_X86_32
-EXPORT_SYMBOL(save_processor_state);
-#endif
-
-static void do_fpu_end(void)
-{
-	/*
-	 * Restore FPU regs if necessary.
-	 */
-	kernel_fpu_end();
-}
-
-static void fix_processor_context(void)
-{
-	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-
-	set_tss_desc(cpu, t);	/*
-				 * This just modifies memory; should not be
-				 * necessary. But... This is necessary, because
-				 * 386 hardware has concept of busy TSS or some
-				 * similar stupidity.
-				 */
-
-#ifdef CONFIG_X86_64
-	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
-
-	syscall_init();				/* This sets MSR_*STAR and related */
-#endif
-	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	if (current->thread.debugreg7) {
-#ifdef CONFIG_X86_32
-		set_debugreg(current->thread.debugreg0, 0);
-		set_debugreg(current->thread.debugreg1, 1);
-		set_debugreg(current->thread.debugreg2, 2);
-		set_debugreg(current->thread.debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-#else
-		/* CONFIG_X86_64 */
-		loaddebug(&current->thread, 0);
-		loaddebug(&current->thread, 1);
-		loaddebug(&current->thread, 2);
-		loaddebug(&current->thread, 3);
-		/* no 4 and 5 */
-		loaddebug(&current->thread, 6);
-		loaddebug(&current->thread, 7);
-#endif
-	}
-
-}
-
-/**
- *	__restore_processor_state - restore the contents of CPU registers saved
- *		by __save_processor_state()
- *	@ctxt - structure to load the registers contents from
- */
-static void __restore_processor_state(struct saved_context *ctxt)
-{
-	/*
-	 * control registers
-	 */
-	/* cr4 was introduced in the Pentium CPU */
-#ifdef CONFIG_X86_32
-	if (ctxt->cr4)
-		write_cr4(ctxt->cr4);
-#else
-/* CONFIG X86_64 */
-	wrmsrl(MSR_EFER, ctxt->efer);
-	write_cr8(ctxt->cr8);
-	write_cr4(ctxt->cr4);
-#endif
-	write_cr3(ctxt->cr3);
-	write_cr2(ctxt->cr2);
-	write_cr0(ctxt->cr0);
-
-	/*
-	 * now restore the descriptor tables to their proper values
-	 * ltr is done i fix_processor_context().
-	 */
-#ifdef CONFIG_X86_32
-	load_gdt(&ctxt->gdt);
-	load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
-	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
-
-	/*
-	 * segment registers
-	 */
-#ifdef CONFIG_X86_32
-	loadsegment(es, ctxt->es);
-	loadsegment(fs, ctxt->fs);
-	loadsegment(gs, ctxt->gs);
-	loadsegment(ss, ctxt->ss);
-
-	/*
-	 * sysenter MSRs
-	 */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
-	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
-	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
-	load_gs_index(ctxt->gs);
-	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
-
-	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
-	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
-#endif
-
-	/*
-	 * restore XCR0 for xsave capable cpu's.
-	 */
-	if (cpu_has_xsave)
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-
-	fix_processor_context();
-
-	do_fpu_end();
-	mtrr_ap_init();
-
-#ifdef CONFIG_X86_32
-	mcheck_init(&boot_cpu_data);
-#endif
-}
-
-/* Needed by apm.c */
-void restore_processor_state(void)
-{
-	__restore_processor_state(&saved_context);
-}
-#ifdef CONFIG_X86_32
-EXPORT_SYMBOL(restore_processor_state);
-#endif
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
deleted file mode 100644
index d277ef1..0000000
--- a/arch/x86/power/cpu_64.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Suspend support specific for i386/x86-64.
- *
- * Distribute under GPLv2
- *
- * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
- * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
- * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
- */
-
-#include <linux/suspend.h>
-#include <linux/smp.h>
-
-#include <asm/pgtable.h>
-#include <asm/proto.h>
-#include <asm/mtrr.h>
-#include <asm/page.h>
-#include <asm/mce.h>
-#include <asm/xcr.h>
-#include <asm/suspend.h>
-
-#ifdef CONFIG_X86_32
-static struct saved_context saved_context;
-
-unsigned long saved_context_ebx;
-unsigned long saved_context_esp, saved_context_ebp;
-unsigned long saved_context_esi, saved_context_edi;
-unsigned long saved_context_eflags;
-#else
-/* CONFIG_X86_64 */
-struct saved_context saved_context;
-#endif
-
-/**
- *	__save_processor_state - save CPU registers before creating a
- *		hibernation image and before restoring the memory state from it
- *	@ctxt - structure to store the registers contents in
- *
- *	NOTE: If there is a CPU register the modification of which by the
- *	boot kernel (ie. the kernel used for loading the hibernation image)
- *	might affect the operations of the restored target kernel (ie. the one
- *	saved in the hibernation image), then its contents must be saved by this
- *	function.  In other words, if kernel A is hibernated and different
- *	kernel B is used for loading the hibernation image into memory, the
- *	kernel A's __save_processor_state() function must save all registers
- *	needed by kernel A, so that it can operate correctly after the resume
- *	regardless of what kernel B does in the meantime.
- */
-static void __save_processor_state(struct saved_context *ctxt)
-{
-#ifdef CONFIG_X86_32
-	mtrr_save_fixed_ranges(NULL);
-#endif
-	kernel_fpu_begin();
-
-	/*
-	 * descriptor tables
-	 */
-#ifdef CONFIG_X86_32
-	store_gdt(&ctxt->gdt);
-	store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
-	store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
-	store_tr(ctxt->tr);
-
-	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
-	/*
-	 * segment registers
-	 */
-#ifdef CONFIG_X86_32
-	savesegment(es, ctxt->es);
-	savesegment(fs, ctxt->fs);
-	savesegment(gs, ctxt->gs);
-	savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
-	asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
-	asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
-	asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
-	asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
-
-	rdmsrl(MSR_FS_BASE, ctxt->fs_base);
-	rdmsrl(MSR_GS_BASE, ctxt->gs_base);
-	rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
-	mtrr_save_fixed_ranges(NULL);
-
-	rdmsrl(MSR_EFER, ctxt->efer);
-#endif
-
-	/*
-	 * control registers
-	 */
-	ctxt->cr0 = read_cr0();
-	ctxt->cr2 = read_cr2();
-	ctxt->cr3 = read_cr3();
-#ifdef CONFIG_X86_32
-	ctxt->cr4 = read_cr4_safe();
-#else
-/* CONFIG_X86_64 */
-	ctxt->cr4 = read_cr4();
-	ctxt->cr8 = read_cr8();
-#endif
-}
-
-/* Needed by apm.c */
-void save_processor_state(void)
-{
-	__save_processor_state(&saved_context);
-}
-#ifdef CONFIG_X86_32
-EXPORT_SYMBOL(save_processor_state);
-#endif
-
-static void do_fpu_end(void)
-{
-	/*
-	 * Restore FPU regs if necessary.
-	 */
-	kernel_fpu_end();
-}
-
-static void fix_processor_context(void)
-{
-	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-
-	set_tss_desc(cpu, t);	/*
-				 * This just modifies memory; should not be
-				 * necessary. But... This is necessary, because
-				 * 386 hardware has concept of busy TSS or some
-				 * similar stupidity.
-				 */
-
-#ifdef CONFIG_X86_64
-	get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
-
-	syscall_init();				/* This sets MSR_*STAR and related */
-#endif
-	load_TR_desc();				/* This does ltr */
-	load_LDT(&current->active_mm->context);	/* This does lldt */
-
-	/*
-	 * Now maybe reload the debug registers
-	 */
-	if (current->thread.debugreg7) {
-#ifdef CONFIG_X86_32
-		set_debugreg(current->thread.debugreg0, 0);
-		set_debugreg(current->thread.debugreg1, 1);
-		set_debugreg(current->thread.debugreg2, 2);
-		set_debugreg(current->thread.debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(current->thread.debugreg6, 6);
-		set_debugreg(current->thread.debugreg7, 7);
-#else
-		/* CONFIG_X86_64 */
-		loaddebug(&current->thread, 0);
-		loaddebug(&current->thread, 1);
-		loaddebug(&current->thread, 2);
-		loaddebug(&current->thread, 3);
-		/* no 4 and 5 */
-		loaddebug(&current->thread, 6);
-		loaddebug(&current->thread, 7);
-#endif
-	}
-
-}
-
-/**
- *	__restore_processor_state - restore the contents of CPU registers saved
- *		by __save_processor_state()
- *	@ctxt - structure to load the registers contents from
- */
-static void __restore_processor_state(struct saved_context *ctxt)
-{
-	/*
-	 * control registers
-	 */
-	/* cr4 was introduced in the Pentium CPU */
-#ifdef CONFIG_X86_32
-	if (ctxt->cr4)
-		write_cr4(ctxt->cr4);
-#else
-/* CONFIG X86_64 */
-	wrmsrl(MSR_EFER, ctxt->efer);
-	write_cr8(ctxt->cr8);
-	write_cr4(ctxt->cr4);
-#endif
-	write_cr3(ctxt->cr3);
-	write_cr2(ctxt->cr2);
-	write_cr0(ctxt->cr0);
-
-	/*
-	 * now restore the descriptor tables to their proper values
-	 * ltr is done i fix_processor_context().
-	 */
-#ifdef CONFIG_X86_32
-	load_gdt(&ctxt->gdt);
-	load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
-	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
-
-	/*
-	 * segment registers
-	 */
-#ifdef CONFIG_X86_32
-	loadsegment(es, ctxt->es);
-	loadsegment(fs, ctxt->fs);
-	loadsegment(gs, ctxt->gs);
-	loadsegment(ss, ctxt->ss);
-
-	/*
-	 * sysenter MSRs
-	 */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
-	asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
-	asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
-	asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
-	load_gs_index(ctxt->gs);
-	asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
-
-	wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
-	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
-#endif
-
-	/*
-	 * restore XCR0 for xsave capable cpu's.
-	 */
-	if (cpu_has_xsave)
-		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
-
-	fix_processor_context();
-
-	do_fpu_end();
-	mtrr_ap_init();
-
-#ifdef CONFIG_X86_32
-	mcheck_init(&boot_cpu_data);
-#endif
-}
-
-/* Needed by apm.c */
-void restore_processor_state(void)
-{
-	__restore_processor_state(&saved_context);
-}
-#ifdef CONFIG_X86_32
-EXPORT_SYMBOL(restore_processor_state);
-#endif
-- 
cgit v0.10.2


From 1380a37e3da5d9e14ea5c2a4c6ab2b307a2798ea Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 15 May 2009 00:52:00 +0200
Subject: PM: Remove unused asm/suspend.h

This patch removes unused asm/suspend.h files for
the following architectures:

 alpha, arm, ia64, m68k, mips, s390, um

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/alpha/include/asm/suspend.h b/arch/alpha/include/asm/suspend.h
deleted file mode 100644
index c7042d5..0000000
--- a/arch/alpha/include/asm/suspend.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ALPHA_SUSPEND_H
-#define __ALPHA_SUSPEND_H
-
-/* Dummy include. */
-
-#endif  /* __ALPHA_SUSPEND_H */
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
deleted file mode 100644
index cf0d0bd..0000000
--- a/arch/arm/include/asm/suspend.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _ASMARM_SUSPEND_H
-#define _ASMARM_SUSPEND_H
-
-#endif
diff --git a/arch/ia64/include/asm/suspend.h b/arch/ia64/include/asm/suspend.h
deleted file mode 100644
index b05bbb6..0000000
--- a/arch/ia64/include/asm/suspend.h
+++ /dev/null
@@ -1 +0,0 @@
-/* dummy (must be non-empty to prevent prejudicial removal...) */
diff --git a/arch/m68k/include/asm/suspend.h b/arch/m68k/include/asm/suspend.h
deleted file mode 100644
index 57b3ddb..0000000
--- a/arch/m68k/include/asm/suspend.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _M68K_SUSPEND_H
-#define _M68K_SUSPEND_H
-
-/* Dummy include. */
-
-#endif  /* _M68K_SUSPEND_H */
diff --git a/arch/mips/include/asm/suspend.h b/arch/mips/include/asm/suspend.h
deleted file mode 100644
index 2562f8f..0000000
--- a/arch/mips/include/asm/suspend.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_SUSPEND_H
-#define __ASM_SUSPEND_H
-
-/* Somewhen...  Maybe :-)  */
-
-#endif /* __ASM_SUSPEND_H */
diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h
deleted file mode 100644
index 1f34580..0000000
--- a/arch/s390/include/asm/suspend.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef __ASM_S390_SUSPEND_H
-#define __ASM_S390_SUSPEND_H
-
-#endif
-
diff --git a/arch/um/include/asm/suspend.h b/arch/um/include/asm/suspend.h
deleted file mode 100644
index f4e8e00..0000000
--- a/arch/um/include/asm/suspend.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __UM_SUSPEND_H
-#define __UM_SUSPEND_H
-
-#endif
-- 
cgit v0.10.2


From e39a71ef80877f4e30d808af9acceec80f4d2f7c Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 15 May 2009 00:53:26 +0200
Subject: PM: Rename device_power_down/up()

Rename the functions performing "_noirq" dev_pm_ops
operations from device_power_down() and device_power_up()
to device_suspend_noirq() and device_resume_noirq().

The new function names are chosen to show that the functions
are responsible for calling the _noirq() versions to finalize
the suspend/resume operation. The current function names do
not perform power down/up anymore so the names may be misleading.

Global function renames:
- device_power_down() -> device_suspend_noirq()
- device_power_up() -> device_resume_noirq()

Static function renames:
- suspend_device_noirq() -> __device_suspend_noirq()
- resume_device_noirq() -> __device_resume_noirq()

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Len Brown <lenb@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 49e0939..31ae547 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1235,7 +1235,7 @@ static int suspend(int vetoable)
 
 	device_suspend(PMSG_SUSPEND);
 
-	device_power_down(PMSG_SUSPEND);
+	device_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1259,7 +1259,7 @@ static int suspend(int vetoable)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
 	device_resume(PMSG_RESUME);
 	queue_event(APM_NORMAL_RESUME, NULL);
@@ -1277,7 +1277,7 @@ static void standby(void)
 {
 	int err;
 
-	device_power_down(PMSG_SUSPEND);
+	device_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1291,7 +1291,7 @@ static void standby(void)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 }
 
 static apm_event_t get_event(void)
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3e4bc69..c5a35bc 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -315,13 +315,13 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device_noirq - Power on one device (early resume).
+ *	__device_resume_noirq - Power on one device (early resume).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	Must be called with interrupts disabled.
  */
-static int resume_device_noirq(struct device *dev, pm_message_t state)
+static int __device_resume_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -363,7 +363,7 @@ static void dpm_power_up(pm_message_t state)
 			int error;
 
 			dev->power.status = DPM_OFF;
-			error = resume_device_noirq(dev, state);
+			error = __device_resume_noirq(dev, state);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
@@ -371,18 +371,18 @@ static void dpm_power_up(pm_message_t state)
 }
 
 /**
- *	device_power_up - Turn on all devices that need special attention.
+ *	device_resume_noirq - Turn on all devices that need special attention.
  *	@state: PM transition of the system being carried out.
  *
  *	Call the "early" resume handlers and enable device drivers to receive
  *	interrupts.
  */
-void device_power_up(pm_message_t state)
+void device_resume_noirq(pm_message_t state)
 {
 	dpm_power_up(state);
 	resume_device_irqs();
 }
-EXPORT_SYMBOL_GPL(device_power_up);
+EXPORT_SYMBOL_GPL(device_resume_noirq);
 
 /**
  *	resume_device - Restore state for one device.
@@ -577,13 +577,13 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 }
 
 /**
- *	suspend_device_noirq - Shut down one device (late suspend).
+ *	__device_suspend_noirq - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	This is called with interrupts off and only a single CPU running.
  */
-static int suspend_device_noirq(struct device *dev, pm_message_t state)
+static int __device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -602,7 +602,7 @@ static int suspend_device_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *	device_power_down - Shut down special devices.
+ *	device_suspend_noirq - Shut down special devices.
  *	@state: PM transition of the system being carried out.
  *
  *	Prevent device drivers from receiving interrupts and call the "late"
@@ -610,7 +610,7 @@ static int suspend_device_noirq(struct device *dev, pm_message_t state)
  *
  *	Must be called under dpm_list_mtx.
  */
-int device_power_down(pm_message_t state)
+int device_suspend_noirq(pm_message_t state)
 {
 	struct device *dev;
 	int error = 0;
@@ -618,7 +618,7 @@ int device_power_down(pm_message_t state)
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = suspend_device_noirq(dev, state);
+		error = __device_suspend_noirq(dev, state);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
 			break;
@@ -627,10 +627,10 @@ int device_power_down(pm_message_t state)
 	}
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
-		device_power_up(resume_event(state));
+		device_resume_noirq(resume_event(state));
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend_noirq);
 
 /**
  *	suspend_device - Save state of one device.
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index fddc202..d5b327a 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -43,7 +43,7 @@ static int xen_suspend(void *data)
 	if (err) {
 		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
 			err);
-		device_power_up(PMSG_RESUME);
+		device_resume_noirq(PMSG_RESUME);
 		return err;
 	}
 
@@ -69,7 +69,7 @@ static int xen_suspend(void *data)
 	}
 
 	sysdev_resume();
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
 	return 0;
 }
@@ -101,9 +101,9 @@ static void do_suspend(void)
 	printk(KERN_DEBUG "suspending xenstore...\n");
 	xs_suspend();
 
-	err = device_power_down(PMSG_SUSPEND);
+	err = device_suspend_noirq(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "device_power_down failed: %d\n", err);
+		printk(KERN_ERR "device_suspend_noirq failed: %d\n", err);
 		goto resume_devices;
 	}
 
@@ -119,7 +119,7 @@ static void do_suspend(void)
 	} else
 		xs_suspend_cancel();
 
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
 resume_devices:
 	device_resume(PMSG_RESUME);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1d4e2d2..2170252 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -382,12 +382,12 @@ struct dev_pm_info {
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
 extern int sysdev_resume(void);
-extern void device_power_up(pm_message_t state);
+extern void device_resume_noirq(pm_message_t state);
 extern void device_resume(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int sysdev_suspend(pm_message_t state);
-extern int device_power_down(pm_message_t state);
+extern int device_suspend_noirq(pm_message_t state);
 extern int device_suspend(pm_message_t state);
 extern int device_prepare_suspend(pm_message_t state);
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e498377..5a3da87 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1452,13 +1452,13 @@ int kernel_kexec(void)
 		if (error)
 			goto Resume_console;
 		/* At this point, device_suspend() has been called,
-		 * but *not* device_power_down(). We *must*
-		 * device_power_down() now.  Otherwise, drivers for
+		 * but *not* device_suspend_noirq(). We *must* call
+		 * device_suspend_noirq() now.  Otherwise, drivers for
 		 * some devices (e.g. interrupt controllers) become
 		 * desynchronized with the actual state of the
 		 * hardware at resume time, and evil weirdness ensues.
 		 */
-		error = device_power_down(PMSG_FREEZE);
+		error = device_suspend_noirq(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
 		error = disable_nonboot_cpus();
@@ -1486,7 +1486,7 @@ int kernel_kexec(void)
 		local_irq_enable();
  Enable_cpus:
 		enable_nonboot_cpus();
-		device_power_up(PMSG_RESTORE);
+		device_resume_noirq(PMSG_RESTORE);
  Resume_devices:
 		device_resume(PMSG_RESTORE);
  Resume_console:
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 5cb080e..1c18bc8 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -216,12 +216,12 @@ static int create_image(int platform_mode)
 		return error;
 
 	/* At this point, device_suspend() has been called, but *not*
-	 * device_power_down(). We *must* call device_power_down() now.
+	 * device_suspend_noirq(). We *must* call device_suspend_noirq() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
 	 * become desynchronized with the actual state of the hardware
 	 * at resume time, and evil weirdness ensues.
 	 */
-	error = device_power_down(PMSG_FREEZE);
+	error = device_suspend_noirq(PMSG_FREEZE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
@@ -262,7 +262,7 @@ static int create_image(int platform_mode)
 
  Power_up:
 	sysdev_resume();
-	/* NOTE:  device_power_up() is just a resume() for devices
+	/* NOTE:  device_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
 
@@ -275,7 +275,7 @@ static int create_image(int platform_mode)
  Platform_finish:
 	platform_finish(platform_mode);
 
-	device_power_up(in_suspend ?
+	device_resume_noirq(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
 	return error;
@@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode)
 {
 	int error;
 
-	error = device_power_down(PMSG_QUIESCE);
+	error = device_suspend_noirq(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
@@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode)
  Cleanup:
 	platform_restore_cleanup(platform_mode);
 
-	device_power_up(PMSG_RECOVER);
+	device_resume_noirq(PMSG_RECOVER);
 
 	return error;
 }
@@ -454,7 +454,7 @@ int hibernation_platform_enter(void)
 		goto Resume_devices;
 	}
 
-	error = device_power_down(PMSG_HIBERNATE);
+	error = device_suspend_noirq(PMSG_HIBERNATE);
 	if (error)
 		goto Resume_devices;
 
@@ -479,7 +479,7 @@ int hibernation_platform_enter(void)
  Platofrm_finish:
 	hibernation_ops->finish();
 
-	device_power_up(PMSG_RESTORE);
+	device_suspend_noirq(PMSG_RESTORE);
 
  Resume_devices:
 	entering_platform_hibernation = false;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 8680282..2f6638e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -295,7 +295,7 @@ static int suspend_enter(suspend_state_t state)
 			return error;
 	}
 
-	error = device_power_down(PMSG_SUSPEND);
+	error = device_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
 		goto Platfrom_finish;
@@ -335,7 +335,7 @@ static int suspend_enter(suspend_state_t state)
 		suspend_ops->wake();
 
  Power_up_devices:
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
  Platfrom_finish:
 	if (suspend_ops->finish)
-- 
cgit v0.10.2


From d161630297a20802d01c55847bfcba85d2118a9f Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sun, 24 May 2009 22:05:42 +0200
Subject: PM core: rename suspend and resume functions

This patch (as1241) renames a bunch of functions in the PM core.
Rather than go through a boring list of name changes, suffice it to
say that in the end we have a bunch of pairs of functions:

	device_resume_noirq	dpm_resume_noirq
	device_resume		dpm_resume
	device_complete		dpm_complete
	device_suspend_noirq	dpm_suspend_noirq
	device_suspend		dpm_suspend
	device_prepare		dpm_prepare

in which device_X does the X operation on a single device and dpm_X
invokes device_X for all devices in the dpm_list.

In addition, the old dpm_power_up and device_resume_noirq have been
combined into a single function (dpm_resume_noirq).

Lastly, dpm_suspend_start and dpm_resume_end are the renamed versions
of the former top-level device_suspend and device_resume routines.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 31ae547..79302e9 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1233,9 +1233,9 @@ static int suspend(int vetoable)
 	int err;
 	struct apm_user	*as;
 
-	device_suspend(PMSG_SUSPEND);
+	dpm_suspend_start(PMSG_SUSPEND);
 
-	device_suspend_noirq(PMSG_SUSPEND);
+	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1259,9 +1259,9 @@ static int suspend(int vetoable)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 	queue_event(APM_NORMAL_RESUME, NULL);
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1277,7 @@ static void standby(void)
 {
 	int err;
 
-	device_suspend_noirq(PMSG_SUSPEND);
+	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1291,7 +1291,7 @@ static void standby(void)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 }
 
 static apm_event_t get_event(void)
@@ -1376,7 +1376,7 @@ static void check_events(void)
 			ignore_bounce = 1;
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
-				device_resume(PMSG_RESUME);
+				dpm_resume_end(PMSG_RESUME);
 				queue_event(event, NULL);
 			}
 			ignore_normal_resume = 0;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index c5a35bc..1f3d822 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -315,13 +315,13 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	__device_resume_noirq - Power on one device (early resume).
+ *	device_resume_noirq - Power on one device (early resume).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	Must be called with interrupts disabled.
  */
-static int __device_resume_noirq(struct device *dev, pm_message_t state)
+static int device_resume_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -344,16 +344,16 @@ static int __device_resume_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *	dpm_resume_noirq - Power on all regular (non-sysdev) devices.
  *	@state: PM transition of the system being carried out.
  *
- *	Execute the appropriate "noirq resume" callback for all devices marked
- *	as DPM_OFF_IRQ.
+ *	Call the "noirq" resume handlers for all devices marked as
+ *	DPM_OFF_IRQ and enable device drivers to receive interrupts.
  *
  *	Must be called under dpm_list_mtx.  Device drivers should not receive
  *	interrupts while it's being executed.
  */
-static void dpm_power_up(pm_message_t state)
+void dpm_resume_noirq(pm_message_t state)
 {
 	struct device *dev;
 
@@ -363,33 +363,21 @@ static void dpm_power_up(pm_message_t state)
 			int error;
 
 			dev->power.status = DPM_OFF;
-			error = __device_resume_noirq(dev, state);
+			error = device_resume_noirq(dev, state);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
 	mutex_unlock(&dpm_list_mtx);
-}
-
-/**
- *	device_resume_noirq - Turn on all devices that need special attention.
- *	@state: PM transition of the system being carried out.
- *
- *	Call the "early" resume handlers and enable device drivers to receive
- *	interrupts.
- */
-void device_resume_noirq(pm_message_t state)
-{
-	dpm_power_up(state);
 	resume_device_irqs();
 }
-EXPORT_SYMBOL_GPL(device_resume_noirq);
+EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- *	resume_device - Restore state for one device.
+ *	device_resume - Restore state for one device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int resume_device(struct device *dev, pm_message_t state)
+static int device_resume(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -462,7 +450,7 @@ static void dpm_resume(pm_message_t state)
 			dev->power.status = DPM_RESUMING;
 			mutex_unlock(&dpm_list_mtx);
 
-			error = resume_device(dev, state);
+			error = device_resume(dev, state);
 
 			mutex_lock(&dpm_list_mtx);
 			if (error)
@@ -480,11 +468,11 @@ static void dpm_resume(pm_message_t state)
 }
 
 /**
- *	complete_device - Complete a PM transition for given device
+ *	device_complete - Complete a PM transition for given device
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static void complete_device(struct device *dev, pm_message_t state)
+static void device_complete(struct device *dev, pm_message_t state)
 {
 	down(&dev->sem);
 
@@ -527,7 +515,7 @@ static void dpm_complete(pm_message_t state)
 			dev->power.status = DPM_ON;
 			mutex_unlock(&dpm_list_mtx);
 
-			complete_device(dev, state);
+			device_complete(dev, state);
 
 			mutex_lock(&dpm_list_mtx);
 		}
@@ -540,19 +528,19 @@ static void dpm_complete(pm_message_t state)
 }
 
 /**
- *	device_resume - Restore state of each device in system.
+ *	dpm_resume_end - Restore state of each device in system.
  *	@state: PM transition of the system being carried out.
  *
  *	Resume all the devices, unlock them all, and allow new
  *	devices to be registered once again.
  */
-void device_resume(pm_message_t state)
+void dpm_resume_end(pm_message_t state)
 {
 	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
-EXPORT_SYMBOL_GPL(device_resume);
+EXPORT_SYMBOL_GPL(dpm_resume_end);
 
 
 /*------------------------- Suspend routines -------------------------*/
@@ -577,13 +565,13 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 }
 
 /**
- *	__device_suspend_noirq - Shut down one device (late suspend).
+ *	device_suspend_noirq - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	This is called with interrupts off and only a single CPU running.
  */
-static int __device_suspend_noirq(struct device *dev, pm_message_t state)
+static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -602,15 +590,15 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *	device_suspend_noirq - Shut down special devices.
+ *	dpm_suspend_noirq - Power down all regular (non-sysdev) devices.
  *	@state: PM transition of the system being carried out.
  *
- *	Prevent device drivers from receiving interrupts and call the "late"
+ *	Prevent device drivers from receiving interrupts and call the "noirq"
  *	suspend handlers.
  *
  *	Must be called under dpm_list_mtx.
  */
-int device_suspend_noirq(pm_message_t state)
+int dpm_suspend_noirq(pm_message_t state)
 {
 	struct device *dev;
 	int error = 0;
@@ -618,7 +606,7 @@ int device_suspend_noirq(pm_message_t state)
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = __device_suspend_noirq(dev, state);
+		error = device_suspend_noirq(dev, state);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
 			break;
@@ -627,17 +615,17 @@ int device_suspend_noirq(pm_message_t state)
 	}
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
-		device_resume_noirq(resume_event(state));
+		dpm_resume_noirq(resume_event(state));
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_suspend_noirq);
+EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
 
 /**
- *	suspend_device - Save state of one device.
+ *	device_suspend - Save state of one device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int suspend_device(struct device *dev, pm_message_t state)
+static int device_suspend(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -704,7 +692,7 @@ static int dpm_suspend(pm_message_t state)
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = suspend_device(dev, state);
+		error = device_suspend(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -723,11 +711,11 @@ static int dpm_suspend(pm_message_t state)
 }
 
 /**
- *	prepare_device - Execute the ->prepare() callback(s) for given device.
+ *	device_prepare - Execute the ->prepare() callback(s) for given device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int prepare_device(struct device *dev, pm_message_t state)
+static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -781,7 +769,7 @@ static int dpm_prepare(pm_message_t state)
 		dev->power.status = DPM_PREPARING;
 		mutex_unlock(&dpm_list_mtx);
 
-		error = prepare_device(dev, state);
+		error = device_prepare(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -807,12 +795,12 @@ static int dpm_prepare(pm_message_t state)
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
+ *	dpm_suspend_start - Save state and stop all devices in system.
  *	@state: PM transition of the system being carried out.
  *
  *	Prepare and suspend all devices.
  */
-int device_suspend(pm_message_t state)
+int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
@@ -822,7 +810,7 @@ int device_suspend(pm_message_t state)
 		error = dpm_suspend(state);
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_suspend);
+EXPORT_SYMBOL_GPL(dpm_suspend_start);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index d5b327a..10d03d7 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -43,7 +43,7 @@ static int xen_suspend(void *data)
 	if (err) {
 		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
 			err);
-		device_resume_noirq(PMSG_RESUME);
+		dpm_resume_noirq(PMSG_RESUME);
 		return err;
 	}
 
@@ -69,7 +69,7 @@ static int xen_suspend(void *data)
 	}
 
 	sysdev_resume();
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
 	return 0;
 }
@@ -92,18 +92,18 @@ static void do_suspend(void)
 	}
 #endif
 
-	err = device_suspend(PMSG_SUSPEND);
+	err = dpm_suspend_start(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+		printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
 		goto out;
 	}
 
 	printk(KERN_DEBUG "suspending xenstore...\n");
 	xs_suspend();
 
-	err = device_suspend_noirq(PMSG_SUSPEND);
+	err = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "device_suspend_noirq failed: %d\n", err);
+		printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
 		goto resume_devices;
 	}
 
@@ -119,10 +119,10 @@ static void do_suspend(void)
 	} else
 		xs_suspend_cancel();
 
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
 resume_devices:
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 
 	/* Make sure timer events get retriggered on all CPUs */
 	clock_was_set();
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 2170252..b3f7476 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -382,14 +382,13 @@ struct dev_pm_info {
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
 extern int sysdev_resume(void);
-extern void device_resume_noirq(pm_message_t state);
-extern void device_resume(pm_message_t state);
+extern void dpm_resume_noirq(pm_message_t state);
+extern void dpm_resume_end(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int sysdev_suspend(pm_message_t state);
-extern int device_suspend_noirq(pm_message_t state);
-extern int device_suspend(pm_message_t state);
-extern int device_prepare_suspend(pm_message_t state);
+extern int dpm_suspend_noirq(pm_message_t state);
+extern int dpm_suspend_start(pm_message_t state);
 
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
@@ -403,7 +402,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 #define device_pm_lock() do {} while (0)
 #define device_pm_unlock() do {} while (0)
 
-static inline int device_suspend(pm_message_t state)
+static inline int dpm_suspend_start(pm_message_t state)
 {
 	return 0;
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5a3da87..ae1c352 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1448,17 +1448,17 @@ int kernel_kexec(void)
 			goto Restore_console;
 		}
 		suspend_console();
-		error = device_suspend(PMSG_FREEZE);
+		error = dpm_suspend_start(PMSG_FREEZE);
 		if (error)
 			goto Resume_console;
-		/* At this point, device_suspend() has been called,
-		 * but *not* device_suspend_noirq(). We *must* call
-		 * device_suspend_noirq() now.  Otherwise, drivers for
+		/* At this point, dpm_suspend_start() has been called,
+		 * but *not* dpm_suspend_noirq(). We *must* call
+		 * dpm_suspend_noirq() now.  Otherwise, drivers for
 		 * some devices (e.g. interrupt controllers) become
 		 * desynchronized with the actual state of the
 		 * hardware at resume time, and evil weirdness ensues.
 		 */
-		error = device_suspend_noirq(PMSG_FREEZE);
+		error = dpm_suspend_noirq(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
 		error = disable_nonboot_cpus();
@@ -1486,9 +1486,9 @@ int kernel_kexec(void)
 		local_irq_enable();
  Enable_cpus:
 		enable_nonboot_cpus();
-		device_resume_noirq(PMSG_RESTORE);
+		dpm_resume_noirq(PMSG_RESTORE);
  Resume_devices:
-		device_resume(PMSG_RESTORE);
+		dpm_resume_end(PMSG_RESTORE);
  Resume_console:
 		resume_console();
 		thaw_processes();
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 1c18bc8..a9beba6 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -215,13 +215,13 @@ static int create_image(int platform_mode)
 	if (error)
 		return error;
 
-	/* At this point, device_suspend() has been called, but *not*
-	 * device_suspend_noirq(). We *must* call device_suspend_noirq() now.
+	/* At this point, dpm_suspend_start() has been called, but *not*
+	 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
 	 * become desynchronized with the actual state of the hardware
 	 * at resume time, and evil weirdness ensues.
 	 */
-	error = device_suspend_noirq(PMSG_FREEZE);
+	error = dpm_suspend_noirq(PMSG_FREEZE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
@@ -262,7 +262,7 @@ static int create_image(int platform_mode)
 
  Power_up:
 	sysdev_resume();
-	/* NOTE:  device_resume_noirq() is just a resume() for devices
+	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
 
@@ -275,7 +275,7 @@ static int create_image(int platform_mode)
  Platform_finish:
 	platform_finish(platform_mode);
 
-	device_resume_noirq(in_suspend ?
+	dpm_resume_noirq(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
 	return error;
@@ -304,7 +304,7 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
-	error = device_suspend(PMSG_FREEZE);
+	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
 
@@ -315,7 +315,7 @@ int hibernation_snapshot(int platform_mode)
 	/* Control returns here after successful restore */
 
  Resume_devices:
-	device_resume(in_suspend ?
+	dpm_resume_end(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 	resume_console();
  Close:
@@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode)
 {
 	int error;
 
-	error = device_suspend_noirq(PMSG_QUIESCE);
+	error = dpm_suspend_noirq(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
@@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode)
  Cleanup:
 	platform_restore_cleanup(platform_mode);
 
-	device_resume_noirq(PMSG_RECOVER);
+	dpm_resume_noirq(PMSG_RECOVER);
 
 	return error;
 }
@@ -414,10 +414,10 @@ int hibernation_restore(int platform_mode)
 
 	pm_prepare_console();
 	suspend_console();
-	error = device_suspend(PMSG_QUIESCE);
+	error = dpm_suspend_start(PMSG_QUIESCE);
 	if (!error) {
 		error = resume_target_kernel(platform_mode);
-		device_resume(PMSG_RECOVER);
+		dpm_resume_end(PMSG_RECOVER);
 	}
 	resume_console();
 	pm_restore_console();
@@ -447,14 +447,14 @@ int hibernation_platform_enter(void)
 
 	entering_platform_hibernation = true;
 	suspend_console();
-	error = device_suspend(PMSG_HIBERNATE);
+	error = dpm_suspend_start(PMSG_HIBERNATE);
 	if (error) {
 		if (hibernation_ops->recover)
 			hibernation_ops->recover();
 		goto Resume_devices;
 	}
 
-	error = device_suspend_noirq(PMSG_HIBERNATE);
+	error = dpm_suspend_noirq(PMSG_HIBERNATE);
 	if (error)
 		goto Resume_devices;
 
@@ -479,11 +479,11 @@ int hibernation_platform_enter(void)
  Platofrm_finish:
 	hibernation_ops->finish();
 
-	device_suspend_noirq(PMSG_RESTORE);
+	dpm_suspend_noirq(PMSG_RESTORE);
 
  Resume_devices:
 	entering_platform_hibernation = false;
-	device_resume(PMSG_RESTORE);
+	dpm_resume_end(PMSG_RESTORE);
 	resume_console();
 
  Close:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 2f6638e..46386b9 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -295,7 +295,7 @@ static int suspend_enter(suspend_state_t state)
 			return error;
 	}
 
-	error = device_suspend_noirq(PMSG_SUSPEND);
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
 		goto Platfrom_finish;
@@ -335,7 +335,7 @@ static int suspend_enter(suspend_state_t state)
 		suspend_ops->wake();
 
  Power_up_devices:
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
  Platfrom_finish:
 	if (suspend_ops->finish)
@@ -363,7 +363,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 	}
 	suspend_console();
 	suspend_test_start();
-	error = device_suspend(PMSG_SUSPEND);
+	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to suspend\n");
 		goto Recover_platform;
@@ -376,7 +376,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 
  Resume_devices:
 	suspend_test_start();
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 	suspend_test_finish("resume devices");
 	resume_console();
  Close:
-- 
cgit v0.10.2


From e240b58c79144708530138e05f17c6d0d8d744a8 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Sun, 24 May 2009 22:05:54 +0200
Subject: PM: Remove bus_type suspend_late()/resume_early() V2

Remove the ->suspend_late() and ->resume_early() callbacks
from struct bus_type V2. These callbacks are legacy stuff
at this point and since there seem to be no in-tree users
we may as well remove them. New users should use dev_pm_ops.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 421e7d0..c9abbd8 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -75,9 +75,6 @@ may need to apply in domain-specific ways to their devices:
 struct bus_type {
 	...
 	int  (*suspend)(struct device *dev, pm_message_t state);
-	int  (*suspend_late)(struct device *dev, pm_message_t state);
-
-	int  (*resume_early)(struct device *dev);
 	int  (*resume)(struct device *dev);
 };
 
@@ -226,20 +223,7 @@ The phases are seen by driver notifications issued in this order:
 
 	This call should handle parts of device suspend logic that require
 	sleeping.  It probably does work to quiesce the device which hasn't
-	been abstracted into class.suspend() or bus.suspend_late().
-
-   3	bus.suspend_late(dev, message) is called with IRQs disabled, and
-	with only one CPU active.  Until the bus.resume_early() phase
-	completes (see later), IRQs are not enabled again.  This method
-	won't be exposed by all busses; for message based busses like USB,
-	I2C, or SPI, device interactions normally require IRQs.  This bus
-	call may be morphed into a driver call with bus-specific parameters.
-
-	This call might save low level hardware state that might otherwise
-	be lost in the upcoming low power state, and actually put the
-	device into a low power state ... so that in some cases the device
-	may stay partly usable until this late.  This "late" call may also
-	help when coping with hardware that behaves badly.
+	been abstracted into class.suspend().
 
 The pm_message_t parameter is currently used to refine those semantics
 (described later).
@@ -351,19 +335,11 @@ devices processing each phase's calls before the next phase begins.
 
 The phases are seen by driver notifications issued in this order:
 
-   1	bus.resume_early(dev) is called with IRQs disabled, and with
-   	only one CPU active.  As with bus.suspend_late(), this method
-	won't be supported on busses that require IRQs in order to
-	interact with devices.
-
-	This reverses the effects of bus.suspend_late().
-
-   2	bus.resume(dev) is called next.  This may be morphed into a device
-   	driver call with bus-specific parameters; implementations may sleep.
-
-	This reverses the effects of bus.suspend().
+   1	bus.resume(dev) reverses the effects of bus.suspend().  This may
+	be morphed into a device driver call with bus-specific parameters;
+	implementations may sleep.
 
-   3	class.resume(dev) is called for devices associated with a class
+   2	class.resume(dev) is called for devices associated with a class
 	that has such a method.  Implementations may sleep.
 
 	This reverses the effects of class.suspend(), and would usually
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1f3d822..68f9f3c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -334,9 +334,6 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 	if (dev->bus->pm) {
 		pm_dev_dbg(dev, state, "EARLY ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-	} else if (dev->bus->resume_early) {
-		pm_dev_dbg(dev, state, "legacy EARLY ");
-		error = dev->bus->resume_early(dev);
 	}
  End:
 	TRACE_RESUME(error);
@@ -581,10 +578,6 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 	if (dev->bus->pm) {
 		pm_dev_dbg(dev, state, "LATE ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-	} else if (dev->bus->suspend_late) {
-		pm_dev_dbg(dev, state, "legacy LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
 	}
 	return error;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 5d5c197..84d79cd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,8 +62,6 @@ struct bus_type {
 	void (*shutdown)(struct device *dev);
 
 	int (*suspend)(struct device *dev, pm_message_t state);
-	int (*suspend_late)(struct device *dev, pm_message_t state);
-	int (*resume_early)(struct device *dev);
 	int (*resume)(struct device *dev);
 
 	struct dev_pm_ops *pm;
-- 
cgit v0.10.2


From c6f37f12197ac3bd2e5a35f2f0e195ae63d437de Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 24 May 2009 22:16:31 +0200
Subject: PM/Suspend: Do not shrink memory before suspend

Remove the shrinking of memory from the suspend-to-RAM code, where
it is not really necessary.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Nigel Cunningham <nigel@tuxonice.net>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 46386b9..2a19f34 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -188,9 +188,6 @@ static void suspend_test_finish(const char *label)
 
 #endif
 
-/* This is just an arbitrary number */
-#define FREE_PAGE_NUMBER (100)
-
 static struct platform_suspend_ops *suspend_ops;
 
 /**
@@ -226,7 +223,6 @@ int suspend_valid_only_mem(suspend_state_t state)
 static int suspend_prepare(void)
 {
 	int error;
-	unsigned int free_pages;
 
 	if (!suspend_ops || !suspend_ops->enter)
 		return -EPERM;
@@ -241,24 +237,10 @@ static int suspend_prepare(void)
 	if (error)
 		goto Finish;
 
-	if (suspend_freeze_processes()) {
-		error = -EAGAIN;
-		goto Thaw;
-	}
-
-	free_pages = global_page_state(NR_FREE_PAGES);
-	if (free_pages < FREE_PAGE_NUMBER) {
-		pr_debug("PM: free some memory\n");
-		shrink_all_memory(FREE_PAGE_NUMBER - free_pages);
-		if (nr_free_pages() < FREE_PAGE_NUMBER) {
-			error = -ENOMEM;
-			printk(KERN_ERR "PM: No enough memory\n");
-		}
-	}
+	error = suspend_freeze_processes();
 	if (!error)
 		return 0;
 
- Thaw:
 	suspend_thaw_processes();
 	usermodehelper_enable();
  Finish:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d254306..95c08a8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2056,7 +2056,7 @@ unsigned long global_lru_pages(void)
 		+ global_page_state(NR_INACTIVE_FILE);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_HIBERNATION
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
  * from LRU lists system-wide, for given pass and priority.
@@ -2196,7 +2196,7 @@ out:
 
 	return sc.nr_reclaimed;
 }
-#endif
+#endif /* CONFIG_HIBERNATION */
 
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
    not required for correctness.  So if the last cpu in a node goes
-- 
cgit v0.10.2


From fe419535d82724314bbf1244a0e740e4ea1bd3ae Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 11 Jun 2009 23:11:17 +0200
Subject: PM/Hibernate: Move memory shrinking to snapshot.c (rev. 2)

A future patch is going to modify the memory shrinking code so that
it will make memory allocations to free memory instead of using an
artificial memory shrinking mechanism for that.  For this purpose it
is convenient to move swsusp_shrink_memory() from
kernel/power/swsusp.c to kernel/power/snapshot.c, because the new
memory-shrinking code is going to use things that are local to
kernel/power/snapshot.c .

[rev. 2: Make some functions static and remove their headers from
 kernel/power/power.h]

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>

diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46b5ec7..ec4dbdf 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void);
 
 extern int create_basic_memory_bitmaps(void);
 extern void free_basic_memory_bitmaps(void);
-extern unsigned int count_data_pages(void);
+extern int swsusp_shrink_memory(void);
 
 /**
  *	Auxiliary structure used for reading the snapshot image data and
@@ -149,7 +149,6 @@ extern int swsusp_swap_in_use(void);
 
 /* kernel/power/disk.c */
 extern int swsusp_check(void);
-extern int swsusp_shrink_memory(void);
 extern void swsusp_free(void);
 extern int swsusp_read(unsigned int *flags_p);
 extern int swsusp_write(unsigned int flags);
@@ -176,7 +175,6 @@ extern int pm_notifier_call_chain(unsigned long val);
 #endif
 
 #ifdef CONFIG_HIGHMEM
-unsigned int count_highmem_pages(void);
 int restore_highmem(void);
 #else
 static inline unsigned int count_highmem_pages(void) { return 0; }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 33e2e4a..523a451 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -39,6 +39,14 @@ static int swsusp_page_is_free(struct page *);
 static void swsusp_set_page_forbidden(struct page *);
 static void swsusp_unset_page_forbidden(struct page *);
 
+/*
+ * Preferred image size in bytes (tunable via /sys/power/image_size).
+ * When it is set to N, swsusp will do its best to ensure the image
+ * size will not exceed N bytes, but if that is impossible, it will
+ * try to create the smallest image possible.
+ */
+unsigned long image_size = 500 * 1024 * 1024;
+
 /* List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written
@@ -840,7 +848,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
  *	pages.
  */
 
-unsigned int count_highmem_pages(void)
+static unsigned int count_highmem_pages(void)
 {
 	struct zone *zone;
 	unsigned int n = 0;
@@ -902,7 +910,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
  *	pages.
  */
 
-unsigned int count_data_pages(void)
+static unsigned int count_data_pages(void)
 {
 	struct zone *zone;
 	unsigned long pfn, max_zone_pfn;
@@ -1058,6 +1066,74 @@ void swsusp_free(void)
 	buffer = NULL;
 }
 
+/**
+ *	swsusp_shrink_memory -  Try to free as much memory as needed
+ *
+ *	... but do not OOM-kill anyone
+ *
+ *	Notice: all userland should be stopped before it is called, or
+ *	livelock is possible.
+ */
+
+#define SHRINK_BITE	10000
+static inline unsigned long __shrink_memory(long tmp)
+{
+	if (tmp > SHRINK_BITE)
+		tmp = SHRINK_BITE;
+	return shrink_all_memory(tmp);
+}
+
+int swsusp_shrink_memory(void)
+{
+	long tmp;
+	struct zone *zone;
+	unsigned long pages = 0;
+	unsigned int i = 0;
+	char *p = "-\\|/";
+	struct timeval start, stop;
+
+	printk(KERN_INFO "PM: Shrinking memory...  ");
+	do_gettimeofday(&start);
+	do {
+		long size, highmem_size;
+
+		highmem_size = count_highmem_pages();
+		size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
+		tmp = size;
+		size += highmem_size;
+		for_each_populated_zone(zone) {
+			tmp += snapshot_additional_pages(zone);
+			if (is_highmem(zone)) {
+				highmem_size -=
+					zone_page_state(zone, NR_FREE_PAGES);
+			} else {
+				tmp -= zone_page_state(zone, NR_FREE_PAGES);
+				tmp += zone->lowmem_reserve[ZONE_NORMAL];
+			}
+		}
+
+		if (highmem_size < 0)
+			highmem_size = 0;
+
+		tmp += highmem_size;
+		if (tmp > 0) {
+			tmp = __shrink_memory(tmp);
+			if (!tmp)
+				return -ENOMEM;
+			pages += tmp;
+		} else if (size > image_size / PAGE_SIZE) {
+			tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
+			pages += tmp;
+		}
+		printk("\b%c", p[i++%4]);
+	} while (tmp > 0);
+	do_gettimeofday(&stop);
+	printk("\bdone (%lu pages freed)\n", pages);
+	swsusp_show_speed(&start, &stop, pages, "Freed");
+
+	return 0;
+}
+
 #ifdef CONFIG_HIGHMEM
 /**
   *	count_pages_for_highmem - compute the number of non-highmem pages
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 78c3504..87b901c 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -55,14 +55,6 @@
 
 #include "power.h"
 
-/*
- * Preferred image size in bytes (tunable via /sys/power/image_size).
- * When it is set to N, swsusp will do its best to ensure the image
- * size will not exceed N bytes, but if that is impossible, it will
- * try to create the smallest image possible.
- */
-unsigned long image_size = 500 * 1024 * 1024;
-
 int in_suspend __nosavedata = 0;
 
 /**
@@ -195,74 +187,6 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
 			kps / 1000, (kps % 1000) / 10);
 }
 
-/**
- *	swsusp_shrink_memory -  Try to free as much memory as needed
- *
- *	... but do not OOM-kill anyone
- *
- *	Notice: all userland should be stopped before it is called, or
- *	livelock is possible.
- */
-
-#define SHRINK_BITE	10000
-static inline unsigned long __shrink_memory(long tmp)
-{
-	if (tmp > SHRINK_BITE)
-		tmp = SHRINK_BITE;
-	return shrink_all_memory(tmp);
-}
-
-int swsusp_shrink_memory(void)
-{
-	long tmp;
-	struct zone *zone;
-	unsigned long pages = 0;
-	unsigned int i = 0;
-	char *p = "-\\|/";
-	struct timeval start, stop;
-
-	printk(KERN_INFO "PM: Shrinking memory...  ");
-	do_gettimeofday(&start);
-	do {
-		long size, highmem_size;
-
-		highmem_size = count_highmem_pages();
-		size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
-		tmp = size;
-		size += highmem_size;
-		for_each_populated_zone(zone) {
-			tmp += snapshot_additional_pages(zone);
-			if (is_highmem(zone)) {
-				highmem_size -=
-					zone_page_state(zone, NR_FREE_PAGES);
-			} else {
-				tmp -= zone_page_state(zone, NR_FREE_PAGES);
-				tmp += zone->lowmem_reserve[ZONE_NORMAL];
-			}
-		}
-
-		if (highmem_size < 0)
-			highmem_size = 0;
-
-		tmp += highmem_size;
-		if (tmp > 0) {
-			tmp = __shrink_memory(tmp);
-			if (!tmp)
-				return -ENOMEM;
-			pages += tmp;
-		} else if (size > image_size / PAGE_SIZE) {
-			tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
-			pages += tmp;
-		}
-		printk("\b%c", p[i++%4]);
-	} while (tmp > 0);
-	do_gettimeofday(&stop);
-	printk("\bdone (%lu pages freed)\n", pages);
-	swsusp_show_speed(&start, &stop, pages, "Freed");
-
-	return 0;
-}
-
 /*
  * Platforms, like ACPI, may want us to save some memory used by them during
  * hibernation and to restore the contents of this memory during the subsequent
-- 
cgit v0.10.2


From 00725787511e20dbd1fdc1fb233606120ae5c8cf Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 22:13:25 +0200
Subject: PM: Remove device_type suspend()/resume()

This patch removes the legacy callbacks ->suspend() and
->resume() from struct device_type. These callbacks seem
unused, and new code should instead make use of struct
dev_pm_ops.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 68f9f3c..fae7254 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -399,9 +399,6 @@ static int device_resume(struct device *dev, pm_message_t state)
 		if (dev->type->pm) {
 			pm_dev_dbg(dev, state, "type ");
 			error = pm_op(dev, dev->type->pm, state);
-		} else if (dev->type->resume) {
-			pm_dev_dbg(dev, state, "legacy type ");
-			error = dev->type->resume(dev);
 		}
 		if (error)
 			goto End;
@@ -641,10 +638,6 @@ static int device_suspend(struct device *dev, pm_message_t state)
 		if (dev->type->pm) {
 			pm_dev_dbg(dev, state, "type ");
 			error = pm_op(dev, dev->type->pm, state);
-		} else if (dev->type->suspend) {
-			pm_dev_dbg(dev, state, "legacy type ");
-			error = dev->type->suspend(dev, state);
-			suspend_report_result(dev->type->suspend, error);
 		}
 		if (error)
 			goto End;
diff --git a/include/linux/device.h b/include/linux/device.h
index 84d79cd..a4a7b10 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -289,9 +289,6 @@ struct device_type {
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	void (*release)(struct device *dev);
 
-	int (*suspend)(struct device *dev, pm_message_t state);
-	int (*resume)(struct device *dev);
-
 	struct dev_pm_ops *pm;
 };
 
-- 
cgit v0.10.2


From 783ea7d4eeefe895f2731fe73ac951e94418927b Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 22:13:33 +0200
Subject: Driver Core: Rework platform suspend/resume, print warning

This patch reworks the platform driver code for legacy
suspend and resume to avoid installing callbacks in
struct device_driver. A warning is also added telling
users to update the platform driver to use dev_pm_ops.

The functions platform_legacy_suspend()/resume() directly
call suspend and resume callbacks in struct platform_driver
instead of wrapping things in platform_drv_suspend()/resume().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8b4708e..ead3f64 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -469,22 +469,6 @@ static void platform_drv_shutdown(struct device *_dev)
 	drv->shutdown(dev);
 }
 
-static int platform_drv_suspend(struct device *_dev, pm_message_t state)
-{
-	struct platform_driver *drv = to_platform_driver(_dev->driver);
-	struct platform_device *dev = to_platform_device(_dev);
-
-	return drv->suspend(dev, state);
-}
-
-static int platform_drv_resume(struct device *_dev)
-{
-	struct platform_driver *drv = to_platform_driver(_dev->driver);
-	struct platform_device *dev = to_platform_device(_dev);
-
-	return drv->resume(dev);
-}
-
 /**
  * platform_driver_register
  * @drv: platform driver structure
@@ -498,10 +482,10 @@ int platform_driver_register(struct platform_driver *drv)
 		drv->driver.remove = platform_drv_remove;
 	if (drv->shutdown)
 		drv->driver.shutdown = platform_drv_shutdown;
-	if (drv->suspend)
-		drv->driver.suspend = platform_drv_suspend;
-	if (drv->resume)
-		drv->driver.resume = platform_drv_resume;
+	if (drv->suspend || drv->resume)
+		pr_warning("Platform driver '%s' needs updating - please use "
+			"dev_pm_ops\n", drv->driver.name);
+
 	return driver_register(&drv->driver);
 }
 EXPORT_SYMBOL_GPL(platform_driver_register);
@@ -633,10 +617,12 @@ static int platform_match(struct device *dev, struct device_driver *drv)
 
 static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
 {
+	struct platform_driver *pdrv = to_platform_driver(dev->driver);
+	struct platform_device *pdev = to_platform_device(dev);
 	int ret = 0;
 
-	if (dev->driver && dev->driver->suspend)
-		ret = dev->driver->suspend(dev, mesg);
+	if (dev->driver && pdrv->suspend)
+		ret = pdrv->suspend(pdev, mesg);
 
 	return ret;
 }
@@ -667,10 +653,12 @@ static int platform_legacy_resume_early(struct device *dev)
 
 static int platform_legacy_resume(struct device *dev)
 {
+	struct platform_driver *pdrv = to_platform_driver(dev->driver);
+	struct platform_device *pdev = to_platform_device(dev);
 	int ret = 0;
 
-	if (dev->driver && dev->driver->resume)
-		ret = dev->driver->resume(dev);
+	if (dev->driver && pdrv->resume)
+		ret = pdrv->resume(pdev);
 
 	return ret;
 }
-- 
cgit v0.10.2


From a9d7052363a6e06bb623ed1876c56c7ca5b2c6d8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 10 Jun 2009 01:27:12 +0200
Subject: PM: Separate suspend to RAM functionality from core

Move the suspend to RAM and standby code from kernel/power/main.c
to two separate files, kernel/power/suspend.c containing the basic
functions and kernel/power/suspend_test.c containing the automatic
suspend test facility based on the RTC clock alarm.

There are no changes in functionality related to these modifications.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>

diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 720ea4f..c4baf1b 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -6,6 +6,8 @@ endif
 obj-$(CONFIG_PM)		+= main.o
 obj-$(CONFIG_PM_SLEEP)		+= console.o
 obj-$(CONFIG_FREEZER)		+= process.o
+obj-$(CONFIG_SUSPEND)		+= suspend.o
+obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o disk.o snapshot.o swap.o user.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 2a19f34..f710e36 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -8,20 +8,9 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/suspend.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/kmod.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/cpu.h>
 #include <linux/resume-trace.h>
-#include <linux/freezer.h>
-#include <linux/vmstat.h>
-#include <linux/syscalls.h>
 
 #include "power.h"
 
@@ -119,355 +108,6 @@ power_attr(pm_test);
 
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_SUSPEND
-
-static int suspend_test(int level)
-{
-#ifdef CONFIG_PM_DEBUG
-	if (pm_test_level == level) {
-		printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
-		mdelay(5000);
-		return 1;
-	}
-#endif /* !CONFIG_PM_DEBUG */
-	return 0;
-}
-
-#ifdef CONFIG_PM_TEST_SUSPEND
-
-/*
- * We test the system suspend code by setting an RTC wakealarm a short
- * time in the future, then suspending.  Suspending the devices won't
- * normally take long ... some systems only need a few milliseconds.
- *
- * The time it takes is system-specific though, so when we test this
- * during system bootup we allow a LOT of time.
- */
-#define TEST_SUSPEND_SECONDS	5
-
-static unsigned long suspend_test_start_time;
-
-static void suspend_test_start(void)
-{
-	/* FIXME Use better timebase than "jiffies", ideally a clocksource.
-	 * What we want is a hardware counter that will work correctly even
-	 * during the irqs-are-off stages of the suspend/resume cycle...
-	 */
-	suspend_test_start_time = jiffies;
-}
-
-static void suspend_test_finish(const char *label)
-{
-	long nj = jiffies - suspend_test_start_time;
-	unsigned msec;
-
-	msec = jiffies_to_msecs(abs(nj));
-	pr_info("PM: %s took %d.%03d seconds\n", label,
-			msec / 1000, msec % 1000);
-
-	/* Warning on suspend means the RTC alarm period needs to be
-	 * larger -- the system was sooo slooowwww to suspend that the
-	 * alarm (should have) fired before the system went to sleep!
-	 *
-	 * Warning on either suspend or resume also means the system
-	 * has some performance issues.  The stack dump of a WARN_ON
-	 * is more likely to get the right attention than a printk...
-	 */
-	WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
-}
-
-#else
-
-static void suspend_test_start(void)
-{
-}
-
-static void suspend_test_finish(const char *label)
-{
-}
-
-#endif
-
-static struct platform_suspend_ops *suspend_ops;
-
-/**
- *	suspend_set_ops - Set the global suspend method table.
- *	@ops:	Pointer to ops structure.
- */
-
-void suspend_set_ops(struct platform_suspend_ops *ops)
-{
-	mutex_lock(&pm_mutex);
-	suspend_ops = ops;
-	mutex_unlock(&pm_mutex);
-}
-
-/**
- * suspend_valid_only_mem - generic memory-only valid callback
- *
- * Platform drivers that implement mem suspend only and only need
- * to check for that in their .valid callback can use this instead
- * of rolling their own .valid callback.
- */
-int suspend_valid_only_mem(suspend_state_t state)
-{
-	return state == PM_SUSPEND_MEM;
-}
-
-/**
- *	suspend_prepare - Do prep work before entering low-power state.
- *
- *	This is common code that is called for each state that we're entering.
- *	Run suspend notifiers, allocate a console and stop all processes.
- */
-static int suspend_prepare(void)
-{
-	int error;
-
-	if (!suspend_ops || !suspend_ops->enter)
-		return -EPERM;
-
-	pm_prepare_console();
-
-	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
-	if (error)
-		goto Finish;
-
-	error = usermodehelper_disable();
-	if (error)
-		goto Finish;
-
-	error = suspend_freeze_processes();
-	if (!error)
-		return 0;
-
-	suspend_thaw_processes();
-	usermodehelper_enable();
- Finish:
-	pm_notifier_call_chain(PM_POST_SUSPEND);
-	pm_restore_console();
-	return error;
-}
-
-/* default implementation */
-void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
-{
-	local_irq_disable();
-}
-
-/* default implementation */
-void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
-{
-	local_irq_enable();
-}
-
-/**
- *	suspend_enter - enter the desired system sleep state.
- *	@state:		state to enter
- *
- *	This function should be called after devices have been suspended.
- */
-static int suspend_enter(suspend_state_t state)
-{
-	int error;
-
-	if (suspend_ops->prepare) {
-		error = suspend_ops->prepare();
-		if (error)
-			return error;
-	}
-
-	error = dpm_suspend_noirq(PMSG_SUSPEND);
-	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down\n");
-		goto Platfrom_finish;
-	}
-
-	if (suspend_ops->prepare_late) {
-		error = suspend_ops->prepare_late();
-		if (error)
-			goto Power_up_devices;
-	}
-
-	if (suspend_test(TEST_PLATFORM))
-		goto Platform_wake;
-
-	error = disable_nonboot_cpus();
-	if (error || suspend_test(TEST_CPUS))
-		goto Enable_cpus;
-
-	arch_suspend_disable_irqs();
-	BUG_ON(!irqs_disabled());
-
-	error = sysdev_suspend(PMSG_SUSPEND);
-	if (!error) {
-		if (!suspend_test(TEST_CORE))
-			error = suspend_ops->enter(state);
-		sysdev_resume();
-	}
-
-	arch_suspend_enable_irqs();
-	BUG_ON(irqs_disabled());
-
- Enable_cpus:
-	enable_nonboot_cpus();
-
- Platform_wake:
-	if (suspend_ops->wake)
-		suspend_ops->wake();
-
- Power_up_devices:
-	dpm_resume_noirq(PMSG_RESUME);
-
- Platfrom_finish:
-	if (suspend_ops->finish)
-		suspend_ops->finish();
-
-	return error;
-}
-
-/**
- *	suspend_devices_and_enter - suspend devices and enter the desired system
- *				    sleep state.
- *	@state:		  state to enter
- */
-int suspend_devices_and_enter(suspend_state_t state)
-{
-	int error;
-
-	if (!suspend_ops)
-		return -ENOSYS;
-
-	if (suspend_ops->begin) {
-		error = suspend_ops->begin(state);
-		if (error)
-			goto Close;
-	}
-	suspend_console();
-	suspend_test_start();
-	error = dpm_suspend_start(PMSG_SUSPEND);
-	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to suspend\n");
-		goto Recover_platform;
-	}
-	suspend_test_finish("suspend devices");
-	if (suspend_test(TEST_DEVICES))
-		goto Recover_platform;
-
-	suspend_enter(state);
-
- Resume_devices:
-	suspend_test_start();
-	dpm_resume_end(PMSG_RESUME);
-	suspend_test_finish("resume devices");
-	resume_console();
- Close:
-	if (suspend_ops->end)
-		suspend_ops->end();
-	return error;
-
- Recover_platform:
-	if (suspend_ops->recover)
-		suspend_ops->recover();
-	goto Resume_devices;
-}
-
-/**
- *	suspend_finish - Do final work before exiting suspend sequence.
- *
- *	Call platform code to clean up, restart processes, and free the 
- *	console that we've allocated. This is not called for suspend-to-disk.
- */
-static void suspend_finish(void)
-{
-	suspend_thaw_processes();
-	usermodehelper_enable();
-	pm_notifier_call_chain(PM_POST_SUSPEND);
-	pm_restore_console();
-}
-
-
-
-
-static const char * const pm_states[PM_SUSPEND_MAX] = {
-	[PM_SUSPEND_STANDBY]	= "standby",
-	[PM_SUSPEND_MEM]	= "mem",
-};
-
-static inline int valid_state(suspend_state_t state)
-{
-	/* All states need lowlevel support and need to be valid
-	 * to the lowlevel implementation, no valid callback
-	 * implies that none are valid. */
-	if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state))
-		return 0;
-	return 1;
-}
-
-
-/**
- *	enter_state - Do common work of entering low-power state.
- *	@state:		pm_state structure for state we're entering.
- *
- *	Make sure we're the only ones trying to enter a sleep state. Fail
- *	if someone has beat us to it, since we don't want anything weird to
- *	happen when we wake up.
- *	Then, do the setup for suspend, enter the state, and cleaup (after
- *	we've woken up).
- */
-static int enter_state(suspend_state_t state)
-{
-	int error;
-
-	if (!valid_state(state))
-		return -ENODEV;
-
-	if (!mutex_trylock(&pm_mutex))
-		return -EBUSY;
-
-	printk(KERN_INFO "PM: Syncing filesystems ... ");
-	sys_sync();
-	printk("done.\n");
-
-	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
-	error = suspend_prepare();
-	if (error)
-		goto Unlock;
-
-	if (suspend_test(TEST_FREEZER))
-		goto Finish;
-
-	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
-	error = suspend_devices_and_enter(state);
-
- Finish:
-	pr_debug("PM: Finishing wakeup.\n");
-	suspend_finish();
- Unlock:
-	mutex_unlock(&pm_mutex);
-	return error;
-}
-
-
-/**
- *	pm_suspend - Externally visible function for suspending system.
- *	@state:		Enumerated value of state to enter.
- *
- *	Determine whether or not value is within range, get state 
- *	structure, and enter (above).
- */
-
-int pm_suspend(suspend_state_t state)
-{
-	if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
-		return enter_state(state);
-	return -EINVAL;
-}
-
-EXPORT_SYMBOL(pm_suspend);
-
-#endif /* CONFIG_SUSPEND */
-
 struct kobject *power_kobj;
 
 /**
@@ -480,7 +120,6 @@ struct kobject *power_kobj;
  *	store() accepts one of those strings, translates it into the 
  *	proper enumerated value, and initiates a suspend transition.
  */
-
 static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 			  char *buf)
 {
@@ -578,7 +217,6 @@ static struct attribute_group attr_group = {
 	.attrs = g,
 };
 
-
 static int __init pm_init(void)
 {
 	power_kobj = kobject_create_and_add("power", NULL);
@@ -588,144 +226,3 @@ static int __init pm_init(void)
 }
 
 core_initcall(pm_init);
-
-
-#ifdef CONFIG_PM_TEST_SUSPEND
-
-#include <linux/rtc.h>
-
-/*
- * To test system suspend, we need a hands-off mechanism to resume the
- * system.  RTCs wake alarms are a common self-contained mechanism.
- */
-
-static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
-{
-	static char err_readtime[] __initdata =
-		KERN_ERR "PM: can't read %s time, err %d\n";
-	static char err_wakealarm [] __initdata =
-		KERN_ERR "PM: can't set %s wakealarm, err %d\n";
-	static char err_suspend[] __initdata =
-		KERN_ERR "PM: suspend test failed, error %d\n";
-	static char info_test[] __initdata =
-		KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
-
-	unsigned long		now;
-	struct rtc_wkalrm	alm;
-	int			status;
-
-	/* this may fail if the RTC hasn't been initialized */
-	status = rtc_read_time(rtc, &alm.time);
-	if (status < 0) {
-		printk(err_readtime, dev_name(&rtc->dev), status);
-		return;
-	}
-	rtc_tm_to_time(&alm.time, &now);
-
-	memset(&alm, 0, sizeof alm);
-	rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
-	alm.enabled = true;
-
-	status = rtc_set_alarm(rtc, &alm);
-	if (status < 0) {
-		printk(err_wakealarm, dev_name(&rtc->dev), status);
-		return;
-	}
-
-	if (state == PM_SUSPEND_MEM) {
-		printk(info_test, pm_states[state]);
-		status = pm_suspend(state);
-		if (status == -ENODEV)
-			state = PM_SUSPEND_STANDBY;
-	}
-	if (state == PM_SUSPEND_STANDBY) {
-		printk(info_test, pm_states[state]);
-		status = pm_suspend(state);
-	}
-	if (status < 0)
-		printk(err_suspend, status);
-
-	/* Some platforms can't detect that the alarm triggered the
-	 * wakeup, or (accordingly) disable it after it afterwards.
-	 * It's supposed to give oneshot behavior; cope.
-	 */
-	alm.enabled = false;
-	rtc_set_alarm(rtc, &alm);
-}
-
-static int __init has_wakealarm(struct device *dev, void *name_ptr)
-{
-	struct rtc_device *candidate = to_rtc_device(dev);
-
-	if (!candidate->ops->set_alarm)
-		return 0;
-	if (!device_may_wakeup(candidate->dev.parent))
-		return 0;
-
-	*(const char **)name_ptr = dev_name(dev);
-	return 1;
-}
-
-/*
- * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
- * at startup time.  They're normally disabled, for faster boot and because
- * we can't know which states really work on this particular system.
- */
-static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
-
-static char warn_bad_state[] __initdata =
-	KERN_WARNING "PM: can't test '%s' suspend state\n";
-
-static int __init setup_test_suspend(char *value)
-{
-	unsigned i;
-
-	/* "=mem" ==> "mem" */
-	value++;
-	for (i = 0; i < PM_SUSPEND_MAX; i++) {
-		if (!pm_states[i])
-			continue;
-		if (strcmp(pm_states[i], value) != 0)
-			continue;
-		test_state = (__force suspend_state_t) i;
-		return 0;
-	}
-	printk(warn_bad_state, value);
-	return 0;
-}
-__setup("test_suspend", setup_test_suspend);
-
-static int __init test_suspend(void)
-{
-	static char		warn_no_rtc[] __initdata =
-		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
-
-	char			*pony = NULL;
-	struct rtc_device	*rtc = NULL;
-
-	/* PM is initialized by now; is that state testable? */
-	if (test_state == PM_SUSPEND_ON)
-		goto done;
-	if (!valid_state(test_state)) {
-		printk(warn_bad_state, pm_states[test_state]);
-		goto done;
-	}
-
-	/* RTCs have initialized by now too ... can we use one? */
-	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
-	if (pony)
-		rtc = rtc_class_open(pony);
-	if (!rtc) {
-		printk(warn_no_rtc);
-		goto done;
-	}
-
-	/* go for it */
-	test_wakealarm(rtc, test_state);
-	rtc_class_close(rtc);
-done:
-	return 0;
-}
-late_initcall(test_suspend);
-
-#endif /* CONFIG_PM_TEST_SUSPEND */
diff --git a/kernel/power/power.h b/kernel/power/power.h
index ec4dbdf..2bd98d9 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -160,15 +160,30 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *,
 				unsigned int, char *);
 
 #ifdef CONFIG_SUSPEND
-/* kernel/power/main.c */
+/* kernel/power/suspend.c */
+extern const char *const pm_states[];
+
+extern bool valid_state(suspend_state_t state);
 extern int suspend_devices_and_enter(suspend_state_t state);
+extern int enter_state(suspend_state_t state);
 #else /* !CONFIG_SUSPEND */
 static inline int suspend_devices_and_enter(suspend_state_t state)
 {
 	return -ENOSYS;
 }
+static inline int enter_state(suspend_state_t state) { return -ENOSYS; }
+static inline bool valid_state(suspend_state_t state) { return false; }
 #endif /* !CONFIG_SUSPEND */
 
+#ifdef CONFIG_PM_TEST_SUSPEND
+/* kernel/power/suspend_test.c */
+extern void suspend_test_start(void);
+extern void suspend_test_finish(const char *label);
+#else /* !CONFIG_PM_TEST_SUSPEND */
+static inline void suspend_test_start(void) {}
+static inline void suspend_test_finish(const char *label) {}
+#endif /* !CONFIG_PM_TEST_SUSPEND */
+
 #ifdef CONFIG_PM_SLEEP
 /* kernel/power/main.c */
 extern int pm_notifier_call_chain(unsigned long val);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
new file mode 100644
index 0000000..6f10dfc
--- /dev/null
+++ b/kernel/power/suspend.c
@@ -0,0 +1,300 @@
+/*
+ * kernel/power/suspend.c - Suspend to RAM and standby functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/syscalls.h>
+
+#include "power.h"
+
+const char *const pm_states[PM_SUSPEND_MAX] = {
+	[PM_SUSPEND_STANDBY]	= "standby",
+	[PM_SUSPEND_MEM]	= "mem",
+};
+
+static struct platform_suspend_ops *suspend_ops;
+
+/**
+ *	suspend_set_ops - Set the global suspend method table.
+ *	@ops:	Pointer to ops structure.
+ */
+void suspend_set_ops(struct platform_suspend_ops *ops)
+{
+	mutex_lock(&pm_mutex);
+	suspend_ops = ops;
+	mutex_unlock(&pm_mutex);
+}
+
+bool valid_state(suspend_state_t state)
+{
+	/*
+	 * All states need lowlevel support and need to be valid to the lowlevel
+	 * implementation, no valid callback implies that none are valid.
+	 */
+	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
+}
+
+/**
+ * suspend_valid_only_mem - generic memory-only valid callback
+ *
+ * Platform drivers that implement mem suspend only and only need
+ * to check for that in their .valid callback can use this instead
+ * of rolling their own .valid callback.
+ */
+int suspend_valid_only_mem(suspend_state_t state)
+{
+	return state == PM_SUSPEND_MEM;
+}
+
+static int suspend_test(int level)
+{
+#ifdef CONFIG_PM_DEBUG
+	if (pm_test_level == level) {
+		printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
+		mdelay(5000);
+		return 1;
+	}
+#endif /* !CONFIG_PM_DEBUG */
+	return 0;
+}
+
+/**
+ *	suspend_prepare - Do prep work before entering low-power state.
+ *
+ *	This is common code that is called for each state that we're entering.
+ *	Run suspend notifiers, allocate a console and stop all processes.
+ */
+static int suspend_prepare(void)
+{
+	int error;
+
+	if (!suspend_ops || !suspend_ops->enter)
+		return -EPERM;
+
+	pm_prepare_console();
+
+	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+	if (error)
+		goto Finish;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Finish;
+
+	error = suspend_freeze_processes();
+	if (!error)
+		return 0;
+
+	suspend_thaw_processes();
+	usermodehelper_enable();
+ Finish:
+	pm_notifier_call_chain(PM_POST_SUSPEND);
+	pm_restore_console();
+	return error;
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
+{
+	local_irq_disable();
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
+{
+	local_irq_enable();
+}
+
+/**
+ *	suspend_enter - enter the desired system sleep state.
+ *	@state:		state to enter
+ *
+ *	This function should be called after devices have been suspended.
+ */
+static int suspend_enter(suspend_state_t state)
+{
+	int error;
+
+	if (suspend_ops->prepare) {
+		error = suspend_ops->prepare();
+		if (error)
+			return error;
+	}
+
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		goto Platfrom_finish;
+	}
+
+	if (suspend_ops->prepare_late) {
+		error = suspend_ops->prepare_late();
+		if (error)
+			goto Power_up_devices;
+	}
+
+	if (suspend_test(TEST_PLATFORM))
+		goto Platform_wake;
+
+	error = disable_nonboot_cpus();
+	if (error || suspend_test(TEST_CPUS))
+		goto Enable_cpus;
+
+	arch_suspend_disable_irqs();
+	BUG_ON(!irqs_disabled());
+
+	error = sysdev_suspend(PMSG_SUSPEND);
+	if (!error) {
+		if (!suspend_test(TEST_CORE))
+			error = suspend_ops->enter(state);
+		sysdev_resume();
+	}
+
+	arch_suspend_enable_irqs();
+	BUG_ON(irqs_disabled());
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Platform_wake:
+	if (suspend_ops->wake)
+		suspend_ops->wake();
+
+ Power_up_devices:
+	dpm_resume_noirq(PMSG_RESUME);
+
+ Platfrom_finish:
+	if (suspend_ops->finish)
+		suspend_ops->finish();
+
+	return error;
+}
+
+/**
+ *	suspend_devices_and_enter - suspend devices and enter the desired system
+ *				    sleep state.
+ *	@state:		  state to enter
+ */
+int suspend_devices_and_enter(suspend_state_t state)
+{
+	int error;
+
+	if (!suspend_ops)
+		return -ENOSYS;
+
+	if (suspend_ops->begin) {
+		error = suspend_ops->begin(state);
+		if (error)
+			goto Close;
+	}
+	suspend_console();
+	suspend_test_start();
+	error = dpm_suspend_start(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to suspend\n");
+		goto Recover_platform;
+	}
+	suspend_test_finish("suspend devices");
+	if (suspend_test(TEST_DEVICES))
+		goto Recover_platform;
+
+	suspend_enter(state);
+
+ Resume_devices:
+	suspend_test_start();
+	dpm_resume_end(PMSG_RESUME);
+	suspend_test_finish("resume devices");
+	resume_console();
+ Close:
+	if (suspend_ops->end)
+		suspend_ops->end();
+	return error;
+
+ Recover_platform:
+	if (suspend_ops->recover)
+		suspend_ops->recover();
+	goto Resume_devices;
+}
+
+/**
+ *	suspend_finish - Do final work before exiting suspend sequence.
+ *
+ *	Call platform code to clean up, restart processes, and free the
+ *	console that we've allocated. This is not called for suspend-to-disk.
+ */
+static void suspend_finish(void)
+{
+	suspend_thaw_processes();
+	usermodehelper_enable();
+	pm_notifier_call_chain(PM_POST_SUSPEND);
+	pm_restore_console();
+}
+
+/**
+ *	enter_state - Do common work of entering low-power state.
+ *	@state:		pm_state structure for state we're entering.
+ *
+ *	Make sure we're the only ones trying to enter a sleep state. Fail
+ *	if someone has beat us to it, since we don't want anything weird to
+ *	happen when we wake up.
+ *	Then, do the setup for suspend, enter the state, and cleaup (after
+ *	we've woken up).
+ */
+int enter_state(suspend_state_t state)
+{
+	int error;
+
+	if (!valid_state(state))
+		return -ENODEV;
+
+	if (!mutex_trylock(&pm_mutex))
+		return -EBUSY;
+
+	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	sys_sync();
+	printk("done.\n");
+
+	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
+	error = suspend_prepare();
+	if (error)
+		goto Unlock;
+
+	if (suspend_test(TEST_FREEZER))
+		goto Finish;
+
+	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+	error = suspend_devices_and_enter(state);
+
+ Finish:
+	pr_debug("PM: Finishing wakeup.\n");
+	suspend_finish();
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
+/**
+ *	pm_suspend - Externally visible function for suspending system.
+ *	@state:		Enumerated value of state to enter.
+ *
+ *	Determine whether or not value is within range, get state
+ *	structure, and enter (above).
+ */
+int pm_suspend(suspend_state_t state)
+{
+	if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
+		return enter_state(state);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
new file mode 100644
index 0000000..17d8bb1
--- /dev/null
+++ b/kernel/power/suspend_test.c
@@ -0,0 +1,187 @@
+/*
+ * kernel/power/suspend_test.c - Suspend to RAM and standby test facility.
+ *
+ * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/rtc.h>
+
+#include "power.h"
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending.  Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS	5
+
+static unsigned long suspend_test_start_time;
+
+void suspend_test_start(void)
+{
+	/* FIXME Use better timebase than "jiffies", ideally a clocksource.
+	 * What we want is a hardware counter that will work correctly even
+	 * during the irqs-are-off stages of the suspend/resume cycle...
+	 */
+	suspend_test_start_time = jiffies;
+}
+
+void suspend_test_finish(const char *label)
+{
+	long nj = jiffies - suspend_test_start_time;
+	unsigned msec;
+
+	msec = jiffies_to_msecs(abs(nj));
+	pr_info("PM: %s took %d.%03d seconds\n", label,
+			msec / 1000, msec % 1000);
+
+	/* Warning on suspend means the RTC alarm period needs to be
+	 * larger -- the system was sooo slooowwww to suspend that the
+	 * alarm (should have) fired before the system went to sleep!
+	 *
+	 * Warning on either suspend or resume also means the system
+	 * has some performance issues.  The stack dump of a WARN_ON
+	 * is more likely to get the right attention than a printk...
+	 */
+	WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
+}
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system.  RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+	static char err_readtime[] __initdata =
+		KERN_ERR "PM: can't read %s time, err %d\n";
+	static char err_wakealarm [] __initdata =
+		KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+	static char err_suspend[] __initdata =
+		KERN_ERR "PM: suspend test failed, error %d\n";
+	static char info_test[] __initdata =
+		KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+	unsigned long		now;
+	struct rtc_wkalrm	alm;
+	int			status;
+
+	/* this may fail if the RTC hasn't been initialized */
+	status = rtc_read_time(rtc, &alm.time);
+	if (status < 0) {
+		printk(err_readtime, dev_name(&rtc->dev), status);
+		return;
+	}
+	rtc_tm_to_time(&alm.time, &now);
+
+	memset(&alm, 0, sizeof alm);
+	rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+	alm.enabled = true;
+
+	status = rtc_set_alarm(rtc, &alm);
+	if (status < 0) {
+		printk(err_wakealarm, dev_name(&rtc->dev), status);
+		return;
+	}
+
+	if (state == PM_SUSPEND_MEM) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+		if (status == -ENODEV)
+			state = PM_SUSPEND_STANDBY;
+	}
+	if (state == PM_SUSPEND_STANDBY) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+	}
+	if (status < 0)
+		printk(err_suspend, status);
+
+	/* Some platforms can't detect that the alarm triggered the
+	 * wakeup, or (accordingly) disable it after it afterwards.
+	 * It's supposed to give oneshot behavior; cope.
+	 */
+	alm.enabled = false;
+	rtc_set_alarm(rtc, &alm);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time.  They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+	KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+	unsigned i;
+
+	/* "=mem" ==> "mem" */
+	value++;
+	for (i = 0; i < PM_SUSPEND_MAX; i++) {
+		if (!pm_states[i])
+			continue;
+		if (strcmp(pm_states[i], value) != 0)
+			continue;
+		test_state = (__force suspend_state_t) i;
+		return 0;
+	}
+	printk(warn_bad_state, value);
+	return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+	static char		warn_no_rtc[] __initdata =
+		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+	char			*pony = NULL;
+	struct rtc_device	*rtc = NULL;
+
+	/* PM is initialized by now; is that state testable? */
+	if (test_state == PM_SUSPEND_ON)
+		goto done;
+	if (!valid_state(test_state)) {
+		printk(warn_bad_state, pm_states[test_state]);
+		goto done;
+	}
+
+	/* RTCs have initialized by now too ... can we use one? */
+	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+	if (pony)
+		rtc = rtc_class_open(pony);
+	if (!rtc) {
+		printk(warn_no_rtc);
+		goto done;
+	}
+
+	/* go for it */
+	test_wakealarm(rtc, test_state);
+	rtc_class_close(rtc);
+done:
+	return 0;
+}
+late_initcall(test_suspend);
-- 
cgit v0.10.2


From 8b759b84c8b3c27ccc8dd787294636297b3ebb40 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 10 Jun 2009 01:27:49 +0200
Subject: PM/Hibernate: Rename disk.c to hibernate.c

Change the name of kernel/power/disk.c to kernel/power/hibernate.c
in analogy with the file names introduced by the changes that
separated the suspend to RAM and standby funtionality from the
common PM functions.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>

diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c4baf1b..eadb17f 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,6 +8,6 @@ obj-$(CONFIG_PM_SLEEP)		+= console.o
 obj-$(CONFIG_FREEZER)		+= process.o
 obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
-obj-$(CONFIG_HIBERNATION)	+= swsusp.o disk.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION)	+= swsusp.o hibernate.o snapshot.o swap.o user.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
deleted file mode 100644
index a9beba6..0000000
--- a/kernel/power/disk.c
+++ /dev/null
@@ -1,955 +0,0 @@
-/*
- * kernel/power/disk.c - Suspend-to-disk support.
- *
- * Copyright (c) 2003 Patrick Mochel
- * Copyright (c) 2003 Open Source Development Lab
- * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
- *
- * This file is released under the GPLv2.
- *
- */
-
-#include <linux/suspend.h>
-#include <linux/syscalls.h>
-#include <linux/reboot.h>
-#include <linux/string.h>
-#include <linux/device.h>
-#include <linux/kmod.h>
-#include <linux/delay.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pm.h>
-#include <linux/console.h>
-#include <linux/cpu.h>
-#include <linux/freezer.h>
-#include <scsi/scsi_scan.h>
-#include <asm/suspend.h>
-
-#include "power.h"
-
-
-static int noresume = 0;
-static char resume_file[256] = CONFIG_PM_STD_PARTITION;
-dev_t swsusp_resume_device;
-sector_t swsusp_resume_block;
-
-enum {
-	HIBERNATION_INVALID,
-	HIBERNATION_PLATFORM,
-	HIBERNATION_TEST,
-	HIBERNATION_TESTPROC,
-	HIBERNATION_SHUTDOWN,
-	HIBERNATION_REBOOT,
-	/* keep last */
-	__HIBERNATION_AFTER_LAST
-};
-#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1)
-#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1)
-
-static int hibernation_mode = HIBERNATION_SHUTDOWN;
-
-static struct platform_hibernation_ops *hibernation_ops;
-
-/**
- * hibernation_set_ops - set the global hibernate operations
- * @ops: the hibernation operations to use in subsequent hibernation transitions
- */
-
-void hibernation_set_ops(struct platform_hibernation_ops *ops)
-{
-	if (ops && !(ops->begin && ops->end &&  ops->pre_snapshot
-	    && ops->prepare && ops->finish && ops->enter && ops->pre_restore
-	    && ops->restore_cleanup)) {
-		WARN_ON(1);
-		return;
-	}
-	mutex_lock(&pm_mutex);
-	hibernation_ops = ops;
-	if (ops)
-		hibernation_mode = HIBERNATION_PLATFORM;
-	else if (hibernation_mode == HIBERNATION_PLATFORM)
-		hibernation_mode = HIBERNATION_SHUTDOWN;
-
-	mutex_unlock(&pm_mutex);
-}
-
-static bool entering_platform_hibernation;
-
-bool system_entering_hibernation(void)
-{
-	return entering_platform_hibernation;
-}
-EXPORT_SYMBOL(system_entering_hibernation);
-
-#ifdef CONFIG_PM_DEBUG
-static void hibernation_debug_sleep(void)
-{
-	printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
-	mdelay(5000);
-}
-
-static int hibernation_testmode(int mode)
-{
-	if (hibernation_mode == mode) {
-		hibernation_debug_sleep();
-		return 1;
-	}
-	return 0;
-}
-
-static int hibernation_test(int level)
-{
-	if (pm_test_level == level) {
-		hibernation_debug_sleep();
-		return 1;
-	}
-	return 0;
-}
-#else /* !CONFIG_PM_DEBUG */
-static int hibernation_testmode(int mode) { return 0; }
-static int hibernation_test(int level) { return 0; }
-#endif /* !CONFIG_PM_DEBUG */
-
-/**
- *	platform_begin - tell the platform driver that we're starting
- *	hibernation
- */
-
-static int platform_begin(int platform_mode)
-{
-	return (platform_mode && hibernation_ops) ?
-		hibernation_ops->begin() : 0;
-}
-
-/**
- *	platform_end - tell the platform driver that we've entered the
- *	working state
- */
-
-static void platform_end(int platform_mode)
-{
-	if (platform_mode && hibernation_ops)
-		hibernation_ops->end();
-}
-
-/**
- *	platform_pre_snapshot - prepare the machine for hibernation using the
- *	platform driver if so configured and return an error code if it fails
- */
-
-static int platform_pre_snapshot(int platform_mode)
-{
-	return (platform_mode && hibernation_ops) ?
-		hibernation_ops->pre_snapshot() : 0;
-}
-
-/**
- *	platform_leave - prepare the machine for switching to the normal mode
- *	of operation using the platform driver (called with interrupts disabled)
- */
-
-static void platform_leave(int platform_mode)
-{
-	if (platform_mode && hibernation_ops)
-		hibernation_ops->leave();
-}
-
-/**
- *	platform_finish - switch the machine to the normal mode of operation
- *	using the platform driver (must be called after platform_prepare())
- */
-
-static void platform_finish(int platform_mode)
-{
-	if (platform_mode && hibernation_ops)
-		hibernation_ops->finish();
-}
-
-/**
- *	platform_pre_restore - prepare the platform for the restoration from a
- *	hibernation image.  If the restore fails after this function has been
- *	called, platform_restore_cleanup() must be called.
- */
-
-static int platform_pre_restore(int platform_mode)
-{
-	return (platform_mode && hibernation_ops) ?
-		hibernation_ops->pre_restore() : 0;
-}
-
-/**
- *	platform_restore_cleanup - switch the platform to the normal mode of
- *	operation after a failing restore.  If platform_pre_restore() has been
- *	called before the failing restore, this function must be called too,
- *	regardless of the result of platform_pre_restore().
- */
-
-static void platform_restore_cleanup(int platform_mode)
-{
-	if (platform_mode && hibernation_ops)
-		hibernation_ops->restore_cleanup();
-}
-
-/**
- *	platform_recover - recover the platform from a failure to suspend
- *	devices.
- */
-
-static void platform_recover(int platform_mode)
-{
-	if (platform_mode && hibernation_ops && hibernation_ops->recover)
-		hibernation_ops->recover();
-}
-
-/**
- *	create_image - freeze devices that need to be frozen with interrupts
- *	off, create the hibernation image and thaw those devices.  Control
- *	reappears in this routine after a restore.
- */
-
-static int create_image(int platform_mode)
-{
-	int error;
-
-	error = arch_prepare_suspend();
-	if (error)
-		return error;
-
-	/* At this point, dpm_suspend_start() has been called, but *not*
-	 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
-	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
-	 * become desynchronized with the actual state of the hardware
-	 * at resume time, and evil weirdness ensues.
-	 */
-	error = dpm_suspend_noirq(PMSG_FREEZE);
-	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
-			"aborting hibernation\n");
-		return error;
-	}
-
-	error = platform_pre_snapshot(platform_mode);
-	if (error || hibernation_test(TEST_PLATFORM))
-		goto Platform_finish;
-
-	error = disable_nonboot_cpus();
-	if (error || hibernation_test(TEST_CPUS)
-	    || hibernation_testmode(HIBERNATION_TEST))
-		goto Enable_cpus;
-
-	local_irq_disable();
-
-	error = sysdev_suspend(PMSG_FREEZE);
-	if (error) {
-		printk(KERN_ERR "PM: Some system devices failed to power down, "
-			"aborting hibernation\n");
-		goto Enable_irqs;
-	}
-
-	if (hibernation_test(TEST_CORE))
-		goto Power_up;
-
-	in_suspend = 1;
-	save_processor_state();
-	error = swsusp_arch_suspend();
-	if (error)
-		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
-			error);
-	/* Restore control flow magically appears here */
-	restore_processor_state();
-	if (!in_suspend)
-		platform_leave(platform_mode);
-
- Power_up:
-	sysdev_resume();
-	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
-	 * that suspended with irqs off ... no overall powerup.
-	 */
-
- Enable_irqs:
-	local_irq_enable();
-
- Enable_cpus:
-	enable_nonboot_cpus();
-
- Platform_finish:
-	platform_finish(platform_mode);
-
-	dpm_resume_noirq(in_suspend ?
-		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
-
-	return error;
-}
-
-/**
- *	hibernation_snapshot - quiesce devices and create the hibernation
- *	snapshot image.
- *	@platform_mode - if set, use the platform driver, if available, to
- *			 prepare the platform firmware for the power transition.
- *
- *	Must be called with pm_mutex held
- */
-
-int hibernation_snapshot(int platform_mode)
-{
-	int error;
-
-	error = platform_begin(platform_mode);
-	if (error)
-		return error;
-
-	/* Free memory before shutting down devices. */
-	error = swsusp_shrink_memory();
-	if (error)
-		goto Close;
-
-	suspend_console();
-	error = dpm_suspend_start(PMSG_FREEZE);
-	if (error)
-		goto Recover_platform;
-
-	if (hibernation_test(TEST_DEVICES))
-		goto Recover_platform;
-
-	error = create_image(platform_mode);
-	/* Control returns here after successful restore */
-
- Resume_devices:
-	dpm_resume_end(in_suspend ?
-		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
-	resume_console();
- Close:
-	platform_end(platform_mode);
-	return error;
-
- Recover_platform:
-	platform_recover(platform_mode);
-	goto Resume_devices;
-}
-
-/**
- *	resume_target_kernel - prepare devices that need to be suspended with
- *	interrupts off, restore the contents of highmem that have not been
- *	restored yet from the image and run the low level code that will restore
- *	the remaining contents of memory and switch to the just restored target
- *	kernel.
- */
-
-static int resume_target_kernel(bool platform_mode)
-{
-	int error;
-
-	error = dpm_suspend_noirq(PMSG_QUIESCE);
-	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
-			"aborting resume\n");
-		return error;
-	}
-
-	error = platform_pre_restore(platform_mode);
-	if (error)
-		goto Cleanup;
-
-	error = disable_nonboot_cpus();
-	if (error)
-		goto Enable_cpus;
-
-	local_irq_disable();
-
-	error = sysdev_suspend(PMSG_QUIESCE);
-	if (error)
-		goto Enable_irqs;
-
-	/* We'll ignore saved state, but this gets preempt count (etc) right */
-	save_processor_state();
-	error = restore_highmem();
-	if (!error) {
-		error = swsusp_arch_resume();
-		/*
-		 * The code below is only ever reached in case of a failure.
-		 * Otherwise execution continues at place where
-		 * swsusp_arch_suspend() was called
-		 */
-		BUG_ON(!error);
-		/* This call to restore_highmem() undos the previous one */
-		restore_highmem();
-	}
-	/*
-	 * The only reason why swsusp_arch_resume() can fail is memory being
-	 * very tight, so we have to free it as soon as we can to avoid
-	 * subsequent failures
-	 */
-	swsusp_free();
-	restore_processor_state();
-	touch_softlockup_watchdog();
-
-	sysdev_resume();
-
- Enable_irqs:
-	local_irq_enable();
-
- Enable_cpus:
-	enable_nonboot_cpus();
-
- Cleanup:
-	platform_restore_cleanup(platform_mode);
-
-	dpm_resume_noirq(PMSG_RECOVER);
-
-	return error;
-}
-
-/**
- *	hibernation_restore - quiesce devices and restore the hibernation
- *	snapshot image.  If successful, control returns in hibernation_snaphot()
- *	@platform_mode - if set, use the platform driver, if available, to
- *			 prepare the platform firmware for the transition.
- *
- *	Must be called with pm_mutex held
- */
-
-int hibernation_restore(int platform_mode)
-{
-	int error;
-
-	pm_prepare_console();
-	suspend_console();
-	error = dpm_suspend_start(PMSG_QUIESCE);
-	if (!error) {
-		error = resume_target_kernel(platform_mode);
-		dpm_resume_end(PMSG_RECOVER);
-	}
-	resume_console();
-	pm_restore_console();
-	return error;
-}
-
-/**
- *	hibernation_platform_enter - enter the hibernation state using the
- *	platform driver (if available)
- */
-
-int hibernation_platform_enter(void)
-{
-	int error;
-
-	if (!hibernation_ops)
-		return -ENOSYS;
-
-	/*
-	 * We have cancelled the power transition by running
-	 * hibernation_ops->finish() before saving the image, so we should let
-	 * the firmware know that we're going to enter the sleep state after all
-	 */
-	error = hibernation_ops->begin();
-	if (error)
-		goto Close;
-
-	entering_platform_hibernation = true;
-	suspend_console();
-	error = dpm_suspend_start(PMSG_HIBERNATE);
-	if (error) {
-		if (hibernation_ops->recover)
-			hibernation_ops->recover();
-		goto Resume_devices;
-	}
-
-	error = dpm_suspend_noirq(PMSG_HIBERNATE);
-	if (error)
-		goto Resume_devices;
-
-	error = hibernation_ops->prepare();
-	if (error)
-		goto Platofrm_finish;
-
-	error = disable_nonboot_cpus();
-	if (error)
-		goto Platofrm_finish;
-
-	local_irq_disable();
-	sysdev_suspend(PMSG_HIBERNATE);
-	hibernation_ops->enter();
-	/* We should never get here */
-	while (1);
-
-	/*
-	 * We don't need to reenable the nonboot CPUs or resume consoles, since
-	 * the system is going to be halted anyway.
-	 */
- Platofrm_finish:
-	hibernation_ops->finish();
-
-	dpm_suspend_noirq(PMSG_RESTORE);
-
- Resume_devices:
-	entering_platform_hibernation = false;
-	dpm_resume_end(PMSG_RESTORE);
-	resume_console();
-
- Close:
-	hibernation_ops->end();
-
-	return error;
-}
-
-/**
- *	power_down - Shut the machine down for hibernation.
- *
- *	Use the platform driver, if configured so; otherwise try
- *	to power off or reboot.
- */
-
-static void power_down(void)
-{
-	switch (hibernation_mode) {
-	case HIBERNATION_TEST:
-	case HIBERNATION_TESTPROC:
-		break;
-	case HIBERNATION_REBOOT:
-		kernel_restart(NULL);
-		break;
-	case HIBERNATION_PLATFORM:
-		hibernation_platform_enter();
-	case HIBERNATION_SHUTDOWN:
-		kernel_power_off();
-		break;
-	}
-	kernel_halt();
-	/*
-	 * Valid image is on the disk, if we continue we risk serious data
-	 * corruption after resume.
-	 */
-	printk(KERN_CRIT "PM: Please power down manually\n");
-	while(1);
-}
-
-static int prepare_processes(void)
-{
-	int error = 0;
-
-	if (freeze_processes()) {
-		error = -EBUSY;
-		thaw_processes();
-	}
-	return error;
-}
-
-/**
- *	hibernate - The granpappy of the built-in hibernation management
- */
-
-int hibernate(void)
-{
-	int error;
-
-	mutex_lock(&pm_mutex);
-	/* The snapshot device should not be opened while we're running */
-	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
-		error = -EBUSY;
-		goto Unlock;
-	}
-
-	pm_prepare_console();
-	error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
-	if (error)
-		goto Exit;
-
-	error = usermodehelper_disable();
-	if (error)
-		goto Exit;
-
-	/* Allocate memory management structures */
-	error = create_basic_memory_bitmaps();
-	if (error)
-		goto Exit;
-
-	printk(KERN_INFO "PM: Syncing filesystems ... ");
-	sys_sync();
-	printk("done.\n");
-
-	error = prepare_processes();
-	if (error)
-		goto Finish;
-
-	if (hibernation_test(TEST_FREEZER))
-		goto Thaw;
-
-	if (hibernation_testmode(HIBERNATION_TESTPROC))
-		goto Thaw;
-
-	error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
-	if (in_suspend && !error) {
-		unsigned int flags = 0;
-
-		if (hibernation_mode == HIBERNATION_PLATFORM)
-			flags |= SF_PLATFORM_MODE;
-		pr_debug("PM: writing image.\n");
-		error = swsusp_write(flags);
-		swsusp_free();
-		if (!error)
-			power_down();
-	} else {
-		pr_debug("PM: Image restored successfully.\n");
-		swsusp_free();
-	}
- Thaw:
-	thaw_processes();
- Finish:
-	free_basic_memory_bitmaps();
-	usermodehelper_enable();
- Exit:
-	pm_notifier_call_chain(PM_POST_HIBERNATION);
-	pm_restore_console();
-	atomic_inc(&snapshot_device_available);
- Unlock:
-	mutex_unlock(&pm_mutex);
-	return error;
-}
-
-
-/**
- *	software_resume - Resume from a saved image.
- *
- *	Called as a late_initcall (so all devices are discovered and
- *	initialized), we call swsusp to see if we have a saved image or not.
- *	If so, we quiesce devices, the restore the saved image. We will
- *	return above (in hibernate() ) if everything goes well.
- *	Otherwise, we fail gracefully and return to the normally
- *	scheduled program.
- *
- */
-
-static int software_resume(void)
-{
-	int error;
-	unsigned int flags;
-
-	/*
-	 * If the user said "noresume".. bail out early.
-	 */
-	if (noresume)
-		return 0;
-
-	/*
-	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
-	 * is configured into the kernel. Since the regular hibernate
-	 * trigger path is via sysfs which takes a buffer mutex before
-	 * calling hibernate functions (which take pm_mutex) this can
-	 * cause lockdep to complain about a possible ABBA deadlock
-	 * which cannot happen since we're in the boot code here and
-	 * sysfs can't be invoked yet. Therefore, we use a subclass
-	 * here to avoid lockdep complaining.
-	 */
-	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
-
-	if (swsusp_resume_device)
-		goto Check_image;
-
-	if (!strlen(resume_file)) {
-		error = -ENOENT;
-		goto Unlock;
-	}
-
-	pr_debug("PM: Checking image partition %s\n", resume_file);
-
-	/* Check if the device is there */
-	swsusp_resume_device = name_to_dev_t(resume_file);
-	if (!swsusp_resume_device) {
-		/*
-		 * Some device discovery might still be in progress; we need
-		 * to wait for this to finish.
-		 */
-		wait_for_device_probe();
-		/*
-		 * We can't depend on SCSI devices being available after loading
-		 * one of their modules until scsi_complete_async_scans() is
-		 * called and the resume device usually is a SCSI one.
-		 */
-		scsi_complete_async_scans();
-
-		swsusp_resume_device = name_to_dev_t(resume_file);
-		if (!swsusp_resume_device) {
-			error = -ENODEV;
-			goto Unlock;
-		}
-	}
-
- Check_image:
-	pr_debug("PM: Resume from partition %d:%d\n",
-		MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
-
-	pr_debug("PM: Checking hibernation image.\n");
-	error = swsusp_check();
-	if (error)
-		goto Unlock;
-
-	/* The snapshot device should not be opened while we're running */
-	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
-		error = -EBUSY;
-		goto Unlock;
-	}
-
-	pm_prepare_console();
-	error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
-	if (error)
-		goto Finish;
-
-	error = usermodehelper_disable();
-	if (error)
-		goto Finish;
-
-	error = create_basic_memory_bitmaps();
-	if (error)
-		goto Finish;
-
-	pr_debug("PM: Preparing processes for restore.\n");
-	error = prepare_processes();
-	if (error) {
-		swsusp_close(FMODE_READ);
-		goto Done;
-	}
-
-	pr_debug("PM: Reading hibernation image.\n");
-
-	error = swsusp_read(&flags);
-	if (!error)
-		hibernation_restore(flags & SF_PLATFORM_MODE);
-
-	printk(KERN_ERR "PM: Restore failed, recovering.\n");
-	swsusp_free();
-	thaw_processes();
- Done:
-	free_basic_memory_bitmaps();
-	usermodehelper_enable();
- Finish:
-	pm_notifier_call_chain(PM_POST_RESTORE);
-	pm_restore_console();
-	atomic_inc(&snapshot_device_available);
-	/* For success case, the suspend path will release the lock */
- Unlock:
-	mutex_unlock(&pm_mutex);
-	pr_debug("PM: Resume from disk failed.\n");
-	return error;
-}
-
-late_initcall(software_resume);
-
-
-static const char * const hibernation_modes[] = {
-	[HIBERNATION_PLATFORM]	= "platform",
-	[HIBERNATION_SHUTDOWN]	= "shutdown",
-	[HIBERNATION_REBOOT]	= "reboot",
-	[HIBERNATION_TEST]	= "test",
-	[HIBERNATION_TESTPROC]	= "testproc",
-};
-
-/**
- *	disk - Control hibernation mode
- *
- *	Suspend-to-disk can be handled in several ways. We have a few options
- *	for putting the system to sleep - using the platform driver (e.g. ACPI
- *	or other hibernation_ops), powering off the system or rebooting the
- *	system (for testing) as well as the two test modes.
- *
- *	The system can support 'platform', and that is known a priori (and
- *	encoded by the presence of hibernation_ops). However, the user may
- *	choose 'shutdown' or 'reboot' as alternatives, as well as one fo the
- *	test modes, 'test' or 'testproc'.
- *
- *	show() will display what the mode is currently set to.
- *	store() will accept one of
- *
- *	'platform'
- *	'shutdown'
- *	'reboot'
- *	'test'
- *	'testproc'
- *
- *	It will only change to 'platform' if the system
- *	supports it (as determined by having hibernation_ops).
- */
-
-static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
-			 char *buf)
-{
-	int i;
-	char *start = buf;
-
-	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
-		if (!hibernation_modes[i])
-			continue;
-		switch (i) {
-		case HIBERNATION_SHUTDOWN:
-		case HIBERNATION_REBOOT:
-		case HIBERNATION_TEST:
-		case HIBERNATION_TESTPROC:
-			break;
-		case HIBERNATION_PLATFORM:
-			if (hibernation_ops)
-				break;
-			/* not a valid mode, continue with loop */
-			continue;
-		}
-		if (i == hibernation_mode)
-			buf += sprintf(buf, "[%s] ", hibernation_modes[i]);
-		else
-			buf += sprintf(buf, "%s ", hibernation_modes[i]);
-	}
-	buf += sprintf(buf, "\n");
-	return buf-start;
-}
-
-
-static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
-			  const char *buf, size_t n)
-{
-	int error = 0;
-	int i;
-	int len;
-	char *p;
-	int mode = HIBERNATION_INVALID;
-
-	p = memchr(buf, '\n', n);
-	len = p ? p - buf : n;
-
-	mutex_lock(&pm_mutex);
-	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
-		if (len == strlen(hibernation_modes[i])
-		    && !strncmp(buf, hibernation_modes[i], len)) {
-			mode = i;
-			break;
-		}
-	}
-	if (mode != HIBERNATION_INVALID) {
-		switch (mode) {
-		case HIBERNATION_SHUTDOWN:
-		case HIBERNATION_REBOOT:
-		case HIBERNATION_TEST:
-		case HIBERNATION_TESTPROC:
-			hibernation_mode = mode;
-			break;
-		case HIBERNATION_PLATFORM:
-			if (hibernation_ops)
-				hibernation_mode = mode;
-			else
-				error = -EINVAL;
-		}
-	} else
-		error = -EINVAL;
-
-	if (!error)
-		pr_debug("PM: Hibernation mode set to '%s'\n",
-			 hibernation_modes[mode]);
-	mutex_unlock(&pm_mutex);
-	return error ? error : n;
-}
-
-power_attr(disk);
-
-static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
-			   char *buf)
-{
-	return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
-		       MINOR(swsusp_resume_device));
-}
-
-static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
-			    const char *buf, size_t n)
-{
-	unsigned int maj, min;
-	dev_t res;
-	int ret = -EINVAL;
-
-	if (sscanf(buf, "%u:%u", &maj, &min) != 2)
-		goto out;
-
-	res = MKDEV(maj,min);
-	if (maj != MAJOR(res) || min != MINOR(res))
-		goto out;
-
-	mutex_lock(&pm_mutex);
-	swsusp_resume_device = res;
-	mutex_unlock(&pm_mutex);
-	printk(KERN_INFO "PM: Starting manual resume from disk\n");
-	noresume = 0;
-	software_resume();
-	ret = n;
- out:
-	return ret;
-}
-
-power_attr(resume);
-
-static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
-			       char *buf)
-{
-	return sprintf(buf, "%lu\n", image_size);
-}
-
-static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
-				const char *buf, size_t n)
-{
-	unsigned long size;
-
-	if (sscanf(buf, "%lu", &size) == 1) {
-		image_size = size;
-		return n;
-	}
-
-	return -EINVAL;
-}
-
-power_attr(image_size);
-
-static struct attribute * g[] = {
-	&disk_attr.attr,
-	&resume_attr.attr,
-	&image_size_attr.attr,
-	NULL,
-};
-
-
-static struct attribute_group attr_group = {
-	.attrs = g,
-};
-
-
-static int __init pm_disk_init(void)
-{
-	return sysfs_create_group(power_kobj, &attr_group);
-}
-
-core_initcall(pm_disk_init);
-
-
-static int __init resume_setup(char *str)
-{
-	if (noresume)
-		return 1;
-
-	strncpy( resume_file, str, 255 );
-	return 1;
-}
-
-static int __init resume_offset_setup(char *str)
-{
-	unsigned long long offset;
-
-	if (noresume)
-		return 1;
-
-	if (sscanf(str, "%llu", &offset) == 1)
-		swsusp_resume_block = offset;
-
-	return 1;
-}
-
-static int __init noresume_setup(char *str)
-{
-	noresume = 1;
-	return 1;
-}
-
-__setup("noresume", noresume_setup);
-__setup("resume_offset=", resume_offset_setup);
-__setup("resume=", resume_setup);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
new file mode 100644
index 0000000..81d2e74
--- /dev/null
+++ b/kernel/power/hibernate.c
@@ -0,0 +1,955 @@
+/*
+ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/kmod.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pm.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <scsi/scsi_scan.h>
+#include <asm/suspend.h>
+
+#include "power.h"
+
+
+static int noresume = 0;
+static char resume_file[256] = CONFIG_PM_STD_PARTITION;
+dev_t swsusp_resume_device;
+sector_t swsusp_resume_block;
+
+enum {
+	HIBERNATION_INVALID,
+	HIBERNATION_PLATFORM,
+	HIBERNATION_TEST,
+	HIBERNATION_TESTPROC,
+	HIBERNATION_SHUTDOWN,
+	HIBERNATION_REBOOT,
+	/* keep last */
+	__HIBERNATION_AFTER_LAST
+};
+#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1)
+#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1)
+
+static int hibernation_mode = HIBERNATION_SHUTDOWN;
+
+static struct platform_hibernation_ops *hibernation_ops;
+
+/**
+ * hibernation_set_ops - set the global hibernate operations
+ * @ops: the hibernation operations to use in subsequent hibernation transitions
+ */
+
+void hibernation_set_ops(struct platform_hibernation_ops *ops)
+{
+	if (ops && !(ops->begin && ops->end &&  ops->pre_snapshot
+	    && ops->prepare && ops->finish && ops->enter && ops->pre_restore
+	    && ops->restore_cleanup)) {
+		WARN_ON(1);
+		return;
+	}
+	mutex_lock(&pm_mutex);
+	hibernation_ops = ops;
+	if (ops)
+		hibernation_mode = HIBERNATION_PLATFORM;
+	else if (hibernation_mode == HIBERNATION_PLATFORM)
+		hibernation_mode = HIBERNATION_SHUTDOWN;
+
+	mutex_unlock(&pm_mutex);
+}
+
+static bool entering_platform_hibernation;
+
+bool system_entering_hibernation(void)
+{
+	return entering_platform_hibernation;
+}
+EXPORT_SYMBOL(system_entering_hibernation);
+
+#ifdef CONFIG_PM_DEBUG
+static void hibernation_debug_sleep(void)
+{
+	printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+	mdelay(5000);
+}
+
+static int hibernation_testmode(int mode)
+{
+	if (hibernation_mode == mode) {
+		hibernation_debug_sleep();
+		return 1;
+	}
+	return 0;
+}
+
+static int hibernation_test(int level)
+{
+	if (pm_test_level == level) {
+		hibernation_debug_sleep();
+		return 1;
+	}
+	return 0;
+}
+#else /* !CONFIG_PM_DEBUG */
+static int hibernation_testmode(int mode) { return 0; }
+static int hibernation_test(int level) { return 0; }
+#endif /* !CONFIG_PM_DEBUG */
+
+/**
+ *	platform_begin - tell the platform driver that we're starting
+ *	hibernation
+ */
+
+static int platform_begin(int platform_mode)
+{
+	return (platform_mode && hibernation_ops) ?
+		hibernation_ops->begin() : 0;
+}
+
+/**
+ *	platform_end - tell the platform driver that we've entered the
+ *	working state
+ */
+
+static void platform_end(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->end();
+}
+
+/**
+ *	platform_pre_snapshot - prepare the machine for hibernation using the
+ *	platform driver if so configured and return an error code if it fails
+ */
+
+static int platform_pre_snapshot(int platform_mode)
+{
+	return (platform_mode && hibernation_ops) ?
+		hibernation_ops->pre_snapshot() : 0;
+}
+
+/**
+ *	platform_leave - prepare the machine for switching to the normal mode
+ *	of operation using the platform driver (called with interrupts disabled)
+ */
+
+static void platform_leave(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->leave();
+}
+
+/**
+ *	platform_finish - switch the machine to the normal mode of operation
+ *	using the platform driver (must be called after platform_prepare())
+ */
+
+static void platform_finish(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->finish();
+}
+
+/**
+ *	platform_pre_restore - prepare the platform for the restoration from a
+ *	hibernation image.  If the restore fails after this function has been
+ *	called, platform_restore_cleanup() must be called.
+ */
+
+static int platform_pre_restore(int platform_mode)
+{
+	return (platform_mode && hibernation_ops) ?
+		hibernation_ops->pre_restore() : 0;
+}
+
+/**
+ *	platform_restore_cleanup - switch the platform to the normal mode of
+ *	operation after a failing restore.  If platform_pre_restore() has been
+ *	called before the failing restore, this function must be called too,
+ *	regardless of the result of platform_pre_restore().
+ */
+
+static void platform_restore_cleanup(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->restore_cleanup();
+}
+
+/**
+ *	platform_recover - recover the platform from a failure to suspend
+ *	devices.
+ */
+
+static void platform_recover(int platform_mode)
+{
+	if (platform_mode && hibernation_ops && hibernation_ops->recover)
+		hibernation_ops->recover();
+}
+
+/**
+ *	create_image - freeze devices that need to be frozen with interrupts
+ *	off, create the hibernation image and thaw those devices.  Control
+ *	reappears in this routine after a restore.
+ */
+
+static int create_image(int platform_mode)
+{
+	int error;
+
+	error = arch_prepare_suspend();
+	if (error)
+		return error;
+
+	/* At this point, dpm_suspend_start() has been called, but *not*
+	 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
+	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
+	 * become desynchronized with the actual state of the hardware
+	 * at resume time, and evil weirdness ensues.
+	 */
+	error = dpm_suspend_noirq(PMSG_FREEZE);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down, "
+			"aborting hibernation\n");
+		return error;
+	}
+
+	error = platform_pre_snapshot(platform_mode);
+	if (error || hibernation_test(TEST_PLATFORM))
+		goto Platform_finish;
+
+	error = disable_nonboot_cpus();
+	if (error || hibernation_test(TEST_CPUS)
+	    || hibernation_testmode(HIBERNATION_TEST))
+		goto Enable_cpus;
+
+	local_irq_disable();
+
+	error = sysdev_suspend(PMSG_FREEZE);
+	if (error) {
+		printk(KERN_ERR "PM: Some system devices failed to power down, "
+			"aborting hibernation\n");
+		goto Enable_irqs;
+	}
+
+	if (hibernation_test(TEST_CORE))
+		goto Power_up;
+
+	in_suspend = 1;
+	save_processor_state();
+	error = swsusp_arch_suspend();
+	if (error)
+		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
+			error);
+	/* Restore control flow magically appears here */
+	restore_processor_state();
+	if (!in_suspend)
+		platform_leave(platform_mode);
+
+ Power_up:
+	sysdev_resume();
+	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
+	 * that suspended with irqs off ... no overall powerup.
+	 */
+
+ Enable_irqs:
+	local_irq_enable();
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Platform_finish:
+	platform_finish(platform_mode);
+
+	dpm_resume_noirq(in_suspend ?
+		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+
+	return error;
+}
+
+/**
+ *	hibernation_snapshot - quiesce devices and create the hibernation
+ *	snapshot image.
+ *	@platform_mode - if set, use the platform driver, if available, to
+ *			 prepare the platform firmware for the power transition.
+ *
+ *	Must be called with pm_mutex held
+ */
+
+int hibernation_snapshot(int platform_mode)
+{
+	int error;
+
+	error = platform_begin(platform_mode);
+	if (error)
+		return error;
+
+	/* Free memory before shutting down devices. */
+	error = swsusp_shrink_memory();
+	if (error)
+		goto Close;
+
+	suspend_console();
+	error = dpm_suspend_start(PMSG_FREEZE);
+	if (error)
+		goto Recover_platform;
+
+	if (hibernation_test(TEST_DEVICES))
+		goto Recover_platform;
+
+	error = create_image(platform_mode);
+	/* Control returns here after successful restore */
+
+ Resume_devices:
+	dpm_resume_end(in_suspend ?
+		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	resume_console();
+ Close:
+	platform_end(platform_mode);
+	return error;
+
+ Recover_platform:
+	platform_recover(platform_mode);
+	goto Resume_devices;
+}
+
+/**
+ *	resume_target_kernel - prepare devices that need to be suspended with
+ *	interrupts off, restore the contents of highmem that have not been
+ *	restored yet from the image and run the low level code that will restore
+ *	the remaining contents of memory and switch to the just restored target
+ *	kernel.
+ */
+
+static int resume_target_kernel(bool platform_mode)
+{
+	int error;
+
+	error = dpm_suspend_noirq(PMSG_QUIESCE);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down, "
+			"aborting resume\n");
+		return error;
+	}
+
+	error = platform_pre_restore(platform_mode);
+	if (error)
+		goto Cleanup;
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto Enable_cpus;
+
+	local_irq_disable();
+
+	error = sysdev_suspend(PMSG_QUIESCE);
+	if (error)
+		goto Enable_irqs;
+
+	/* We'll ignore saved state, but this gets preempt count (etc) right */
+	save_processor_state();
+	error = restore_highmem();
+	if (!error) {
+		error = swsusp_arch_resume();
+		/*
+		 * The code below is only ever reached in case of a failure.
+		 * Otherwise execution continues at place where
+		 * swsusp_arch_suspend() was called
+		 */
+		BUG_ON(!error);
+		/* This call to restore_highmem() undos the previous one */
+		restore_highmem();
+	}
+	/*
+	 * The only reason why swsusp_arch_resume() can fail is memory being
+	 * very tight, so we have to free it as soon as we can to avoid
+	 * subsequent failures
+	 */
+	swsusp_free();
+	restore_processor_state();
+	touch_softlockup_watchdog();
+
+	sysdev_resume();
+
+ Enable_irqs:
+	local_irq_enable();
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Cleanup:
+	platform_restore_cleanup(platform_mode);
+
+	dpm_resume_noirq(PMSG_RECOVER);
+
+	return error;
+}
+
+/**
+ *	hibernation_restore - quiesce devices and restore the hibernation
+ *	snapshot image.  If successful, control returns in hibernation_snaphot()
+ *	@platform_mode - if set, use the platform driver, if available, to
+ *			 prepare the platform firmware for the transition.
+ *
+ *	Must be called with pm_mutex held
+ */
+
+int hibernation_restore(int platform_mode)
+{
+	int error;
+
+	pm_prepare_console();
+	suspend_console();
+	error = dpm_suspend_start(PMSG_QUIESCE);
+	if (!error) {
+		error = resume_target_kernel(platform_mode);
+		dpm_resume_end(PMSG_RECOVER);
+	}
+	resume_console();
+	pm_restore_console();
+	return error;
+}
+
+/**
+ *	hibernation_platform_enter - enter the hibernation state using the
+ *	platform driver (if available)
+ */
+
+int hibernation_platform_enter(void)
+{
+	int error;
+
+	if (!hibernation_ops)
+		return -ENOSYS;
+
+	/*
+	 * We have cancelled the power transition by running
+	 * hibernation_ops->finish() before saving the image, so we should let
+	 * the firmware know that we're going to enter the sleep state after all
+	 */
+	error = hibernation_ops->begin();
+	if (error)
+		goto Close;
+
+	entering_platform_hibernation = true;
+	suspend_console();
+	error = dpm_suspend_start(PMSG_HIBERNATE);
+	if (error) {
+		if (hibernation_ops->recover)
+			hibernation_ops->recover();
+		goto Resume_devices;
+	}
+
+	error = dpm_suspend_noirq(PMSG_HIBERNATE);
+	if (error)
+		goto Resume_devices;
+
+	error = hibernation_ops->prepare();
+	if (error)
+		goto Platofrm_finish;
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto Platofrm_finish;
+
+	local_irq_disable();
+	sysdev_suspend(PMSG_HIBERNATE);
+	hibernation_ops->enter();
+	/* We should never get here */
+	while (1);
+
+	/*
+	 * We don't need to reenable the nonboot CPUs or resume consoles, since
+	 * the system is going to be halted anyway.
+	 */
+ Platofrm_finish:
+	hibernation_ops->finish();
+
+	dpm_suspend_noirq(PMSG_RESTORE);
+
+ Resume_devices:
+	entering_platform_hibernation = false;
+	dpm_resume_end(PMSG_RESTORE);
+	resume_console();
+
+ Close:
+	hibernation_ops->end();
+
+	return error;
+}
+
+/**
+ *	power_down - Shut the machine down for hibernation.
+ *
+ *	Use the platform driver, if configured so; otherwise try
+ *	to power off or reboot.
+ */
+
+static void power_down(void)
+{
+	switch (hibernation_mode) {
+	case HIBERNATION_TEST:
+	case HIBERNATION_TESTPROC:
+		break;
+	case HIBERNATION_REBOOT:
+		kernel_restart(NULL);
+		break;
+	case HIBERNATION_PLATFORM:
+		hibernation_platform_enter();
+	case HIBERNATION_SHUTDOWN:
+		kernel_power_off();
+		break;
+	}
+	kernel_halt();
+	/*
+	 * Valid image is on the disk, if we continue we risk serious data
+	 * corruption after resume.
+	 */
+	printk(KERN_CRIT "PM: Please power down manually\n");
+	while(1);
+}
+
+static int prepare_processes(void)
+{
+	int error = 0;
+
+	if (freeze_processes()) {
+		error = -EBUSY;
+		thaw_processes();
+	}
+	return error;
+}
+
+/**
+ *	hibernate - The granpappy of the built-in hibernation management
+ */
+
+int hibernate(void)
+{
+	int error;
+
+	mutex_lock(&pm_mutex);
+	/* The snapshot device should not be opened while we're running */
+	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+		error = -EBUSY;
+		goto Unlock;
+	}
+
+	pm_prepare_console();
+	error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+	if (error)
+		goto Exit;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Exit;
+
+	/* Allocate memory management structures */
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Exit;
+
+	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	sys_sync();
+	printk("done.\n");
+
+	error = prepare_processes();
+	if (error)
+		goto Finish;
+
+	if (hibernation_test(TEST_FREEZER))
+		goto Thaw;
+
+	if (hibernation_testmode(HIBERNATION_TESTPROC))
+		goto Thaw;
+
+	error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
+	if (in_suspend && !error) {
+		unsigned int flags = 0;
+
+		if (hibernation_mode == HIBERNATION_PLATFORM)
+			flags |= SF_PLATFORM_MODE;
+		pr_debug("PM: writing image.\n");
+		error = swsusp_write(flags);
+		swsusp_free();
+		if (!error)
+			power_down();
+	} else {
+		pr_debug("PM: Image restored successfully.\n");
+		swsusp_free();
+	}
+ Thaw:
+	thaw_processes();
+ Finish:
+	free_basic_memory_bitmaps();
+	usermodehelper_enable();
+ Exit:
+	pm_notifier_call_chain(PM_POST_HIBERNATION);
+	pm_restore_console();
+	atomic_inc(&snapshot_device_available);
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
+
+/**
+ *	software_resume - Resume from a saved image.
+ *
+ *	Called as a late_initcall (so all devices are discovered and
+ *	initialized), we call swsusp to see if we have a saved image or not.
+ *	If so, we quiesce devices, the restore the saved image. We will
+ *	return above (in hibernate() ) if everything goes well.
+ *	Otherwise, we fail gracefully and return to the normally
+ *	scheduled program.
+ *
+ */
+
+static int software_resume(void)
+{
+	int error;
+	unsigned int flags;
+
+	/*
+	 * If the user said "noresume".. bail out early.
+	 */
+	if (noresume)
+		return 0;
+
+	/*
+	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
+	 * is configured into the kernel. Since the regular hibernate
+	 * trigger path is via sysfs which takes a buffer mutex before
+	 * calling hibernate functions (which take pm_mutex) this can
+	 * cause lockdep to complain about a possible ABBA deadlock
+	 * which cannot happen since we're in the boot code here and
+	 * sysfs can't be invoked yet. Therefore, we use a subclass
+	 * here to avoid lockdep complaining.
+	 */
+	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+
+	if (swsusp_resume_device)
+		goto Check_image;
+
+	if (!strlen(resume_file)) {
+		error = -ENOENT;
+		goto Unlock;
+	}
+
+	pr_debug("PM: Checking image partition %s\n", resume_file);
+
+	/* Check if the device is there */
+	swsusp_resume_device = name_to_dev_t(resume_file);
+	if (!swsusp_resume_device) {
+		/*
+		 * Some device discovery might still be in progress; we need
+		 * to wait for this to finish.
+		 */
+		wait_for_device_probe();
+		/*
+		 * We can't depend on SCSI devices being available after loading
+		 * one of their modules until scsi_complete_async_scans() is
+		 * called and the resume device usually is a SCSI one.
+		 */
+		scsi_complete_async_scans();
+
+		swsusp_resume_device = name_to_dev_t(resume_file);
+		if (!swsusp_resume_device) {
+			error = -ENODEV;
+			goto Unlock;
+		}
+	}
+
+ Check_image:
+	pr_debug("PM: Resume from partition %d:%d\n",
+		MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+
+	pr_debug("PM: Checking hibernation image.\n");
+	error = swsusp_check();
+	if (error)
+		goto Unlock;
+
+	/* The snapshot device should not be opened while we're running */
+	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+		error = -EBUSY;
+		goto Unlock;
+	}
+
+	pm_prepare_console();
+	error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+	if (error)
+		goto Finish;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Finish;
+
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Finish;
+
+	pr_debug("PM: Preparing processes for restore.\n");
+	error = prepare_processes();
+	if (error) {
+		swsusp_close(FMODE_READ);
+		goto Done;
+	}
+
+	pr_debug("PM: Reading hibernation image.\n");
+
+	error = swsusp_read(&flags);
+	if (!error)
+		hibernation_restore(flags & SF_PLATFORM_MODE);
+
+	printk(KERN_ERR "PM: Restore failed, recovering.\n");
+	swsusp_free();
+	thaw_processes();
+ Done:
+	free_basic_memory_bitmaps();
+	usermodehelper_enable();
+ Finish:
+	pm_notifier_call_chain(PM_POST_RESTORE);
+	pm_restore_console();
+	atomic_inc(&snapshot_device_available);
+	/* For success case, the suspend path will release the lock */
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	pr_debug("PM: Resume from disk failed.\n");
+	return error;
+}
+
+late_initcall(software_resume);
+
+
+static const char * const hibernation_modes[] = {
+	[HIBERNATION_PLATFORM]	= "platform",
+	[HIBERNATION_SHUTDOWN]	= "shutdown",
+	[HIBERNATION_REBOOT]	= "reboot",
+	[HIBERNATION_TEST]	= "test",
+	[HIBERNATION_TESTPROC]	= "testproc",
+};
+
+/**
+ *	disk - Control hibernation mode
+ *
+ *	Suspend-to-disk can be handled in several ways. We have a few options
+ *	for putting the system to sleep - using the platform driver (e.g. ACPI
+ *	or other hibernation_ops), powering off the system or rebooting the
+ *	system (for testing) as well as the two test modes.
+ *
+ *	The system can support 'platform', and that is known a priori (and
+ *	encoded by the presence of hibernation_ops). However, the user may
+ *	choose 'shutdown' or 'reboot' as alternatives, as well as one fo the
+ *	test modes, 'test' or 'testproc'.
+ *
+ *	show() will display what the mode is currently set to.
+ *	store() will accept one of
+ *
+ *	'platform'
+ *	'shutdown'
+ *	'reboot'
+ *	'test'
+ *	'testproc'
+ *
+ *	It will only change to 'platform' if the system
+ *	supports it (as determined by having hibernation_ops).
+ */
+
+static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	int i;
+	char *start = buf;
+
+	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+		if (!hibernation_modes[i])
+			continue;
+		switch (i) {
+		case HIBERNATION_SHUTDOWN:
+		case HIBERNATION_REBOOT:
+		case HIBERNATION_TEST:
+		case HIBERNATION_TESTPROC:
+			break;
+		case HIBERNATION_PLATFORM:
+			if (hibernation_ops)
+				break;
+			/* not a valid mode, continue with loop */
+			continue;
+		}
+		if (i == hibernation_mode)
+			buf += sprintf(buf, "[%s] ", hibernation_modes[i]);
+		else
+			buf += sprintf(buf, "%s ", hibernation_modes[i]);
+	}
+	buf += sprintf(buf, "\n");
+	return buf-start;
+}
+
+
+static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
+			  const char *buf, size_t n)
+{
+	int error = 0;
+	int i;
+	int len;
+	char *p;
+	int mode = HIBERNATION_INVALID;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	mutex_lock(&pm_mutex);
+	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+		if (len == strlen(hibernation_modes[i])
+		    && !strncmp(buf, hibernation_modes[i], len)) {
+			mode = i;
+			break;
+		}
+	}
+	if (mode != HIBERNATION_INVALID) {
+		switch (mode) {
+		case HIBERNATION_SHUTDOWN:
+		case HIBERNATION_REBOOT:
+		case HIBERNATION_TEST:
+		case HIBERNATION_TESTPROC:
+			hibernation_mode = mode;
+			break;
+		case HIBERNATION_PLATFORM:
+			if (hibernation_ops)
+				hibernation_mode = mode;
+			else
+				error = -EINVAL;
+		}
+	} else
+		error = -EINVAL;
+
+	if (!error)
+		pr_debug("PM: Hibernation mode set to '%s'\n",
+			 hibernation_modes[mode]);
+	mutex_unlock(&pm_mutex);
+	return error ? error : n;
+}
+
+power_attr(disk);
+
+static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
+			   char *buf)
+{
+	return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
+		       MINOR(swsusp_resume_device));
+}
+
+static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
+			    const char *buf, size_t n)
+{
+	unsigned int maj, min;
+	dev_t res;
+	int ret = -EINVAL;
+
+	if (sscanf(buf, "%u:%u", &maj, &min) != 2)
+		goto out;
+
+	res = MKDEV(maj,min);
+	if (maj != MAJOR(res) || min != MINOR(res))
+		goto out;
+
+	mutex_lock(&pm_mutex);
+	swsusp_resume_device = res;
+	mutex_unlock(&pm_mutex);
+	printk(KERN_INFO "PM: Starting manual resume from disk\n");
+	noresume = 0;
+	software_resume();
+	ret = n;
+ out:
+	return ret;
+}
+
+power_attr(resume);
+
+static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
+			       char *buf)
+{
+	return sprintf(buf, "%lu\n", image_size);
+}
+
+static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
+				const char *buf, size_t n)
+{
+	unsigned long size;
+
+	if (sscanf(buf, "%lu", &size) == 1) {
+		image_size = size;
+		return n;
+	}
+
+	return -EINVAL;
+}
+
+power_attr(image_size);
+
+static struct attribute * g[] = {
+	&disk_attr.attr,
+	&resume_attr.attr,
+	&image_size_attr.attr,
+	NULL,
+};
+
+
+static struct attribute_group attr_group = {
+	.attrs = g,
+};
+
+
+static int __init pm_disk_init(void)
+{
+	return sysfs_create_group(power_kobj, &attr_group);
+}
+
+core_initcall(pm_disk_init);
+
+
+static int __init resume_setup(char *str)
+{
+	if (noresume)
+		return 1;
+
+	strncpy( resume_file, str, 255 );
+	return 1;
+}
+
+static int __init resume_offset_setup(char *str)
+{
+	unsigned long long offset;
+
+	if (noresume)
+		return 1;
+
+	if (sscanf(str, "%llu", &offset) == 1)
+		swsusp_resume_block = offset;
+
+	return 1;
+}
+
+static int __init noresume_setup(char *str)
+{
+	noresume = 1;
+	return 1;
+}
+
+__setup("noresume", noresume_setup);
+__setup("resume_offset=", resume_offset_setup);
+__setup("resume=", resume_setup);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 2bd98d9..26d5a26 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -45,7 +45,7 @@ static inline char *check_image_kernel(struct swsusp_info *info)
  */
 #define SPARE_PAGES	((1024 * 1024) >> PAGE_SHIFT)
 
-/* kernel/power/disk.c */
+/* kernel/power/hibernate.c */
 extern int hibernation_snapshot(int platform_mode);
 extern int hibernation_restore(int platform_mode);
 extern int hibernation_platform_enter(void);
@@ -147,7 +147,7 @@ extern int swsusp_swap_in_use(void);
  */
 #define SF_PLATFORM_MODE	1
 
-/* kernel/power/disk.c */
+/* kernel/power/hibernate.c */
 extern int swsusp_check(void);
 extern void swsusp_free(void);
 extern int swsusp_read(unsigned int *flags_p);
-- 
cgit v0.10.2


From fce2b111fae9151a53dabb36513b398d03337a19 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Wed, 10 Jun 2009 01:28:19 +0200
Subject: PM/Hibernate: Move NVS routines into a seperate file (v2).

The *_nvs_* routines in swsusp.c make use of the io*map()
functions, which are only provided for HAS_IOMEM, thus
breaking compilation if HAS_IOMEM is not set. Fix this
by moving the *_nvs_* routines into hibernate_nvs.c, which
is only compiled if HAS_IOMEM is set.

[rjw: Change the name of the new file to hibernate_nvs.c, add the
 license line to the header comment.]

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 795032e..cd15df6 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -245,11 +245,6 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 
 extern void hibernation_set_ops(struct platform_hibernation_ops *ops);
 extern int hibernate(void);
-extern int hibernate_nvs_register(unsigned long start, unsigned long size);
-extern int hibernate_nvs_alloc(void);
-extern void hibernate_nvs_free(void);
-extern void hibernate_nvs_save(void);
-extern void hibernate_nvs_restore(void);
 extern bool system_entering_hibernation(void);
 #else /* CONFIG_HIBERNATION */
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
@@ -258,6 +253,16 @@ static inline void swsusp_unset_page_free(struct page *p) {}
 
 static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
+static inline bool system_entering_hibernation(void) { return false; }
+#endif /* CONFIG_HIBERNATION */
+
+#ifdef CONFIG_HIBERNATION_NVS
+extern int hibernate_nvs_register(unsigned long start, unsigned long size);
+extern int hibernate_nvs_alloc(void);
+extern void hibernate_nvs_free(void);
+extern void hibernate_nvs_save(void);
+extern void hibernate_nvs_restore(void);
+#else /* CONFIG_HIBERNATION_NVS */
 static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
 {
 	return 0;
@@ -266,8 +271,7 @@ static inline int hibernate_nvs_alloc(void) { return 0; }
 static inline void hibernate_nvs_free(void) {}
 static inline void hibernate_nvs_save(void) {}
 static inline void hibernate_nvs_restore(void) {}
-static inline bool system_entering_hibernation(void) { return false; }
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION_NVS */
 
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 23bd4da..72067cb 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -116,9 +116,13 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HIBERNATION_NVS
+	bool
+
 config HIBERNATION
 	bool "Hibernation (aka 'suspend to disk')"
 	depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+	select HIBERNATION_NVS if HAS_IOMEM
 	---help---
 	  Enable the suspend to disk (STD) functionality, which is usually
 	  called "hibernation" in user interfaces.  STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index eadb17f..c3b81c3 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -9,5 +9,6 @@ obj-$(CONFIG_FREEZER)		+= process.o
 obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION_NVS)	+= hibernate_nvs.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c
new file mode 100644
index 0000000..39ac698
--- /dev/null
+++ b/kernel/power/hibernate_nvs.c
@@ -0,0 +1,135 @@
+/*
+ * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
+ *
+ * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+
+/*
+ * Platforms, like ACPI, may want us to save some memory used by them during
+ * hibernation and to restore the contents of this memory during the subsequent
+ * resume.  The code below implements a mechanism allowing us to do that.
+ */
+
+struct nvs_page {
+	unsigned long phys_start;
+	unsigned int size;
+	void *kaddr;
+	void *data;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_list);
+
+/**
+ *	hibernate_nvs_register - register platform NVS memory region to save
+ *	@start - physical address of the region
+ *	@size - size of the region
+ *
+ *	The NVS region need not be page-aligned (both ends) and we arrange
+ *	things so that the data from page-aligned addresses in this region will
+ *	be copied into separate RAM pages.
+ */
+int hibernate_nvs_register(unsigned long start, unsigned long size)
+{
+	struct nvs_page *entry, *next;
+
+	while (size > 0) {
+		unsigned int nr_bytes;
+
+		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
+		if (!entry)
+			goto Error;
+
+		list_add_tail(&entry->node, &nvs_list);
+		entry->phys_start = start;
+		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
+		entry->size = (size < nr_bytes) ? size : nr_bytes;
+
+		start += entry->size;
+		size -= entry->size;
+	}
+	return 0;
+
+ Error:
+	list_for_each_entry_safe(entry, next, &nvs_list, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	return -ENOMEM;
+}
+
+/**
+ *	hibernate_nvs_free - free data pages allocated for saving NVS regions
+ */
+void hibernate_nvs_free(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			free_page((unsigned long)entry->data);
+			entry->data = NULL;
+			if (entry->kaddr) {
+				iounmap(entry->kaddr);
+				entry->kaddr = NULL;
+			}
+		}
+}
+
+/**
+ *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ */
+int hibernate_nvs_alloc(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node) {
+		entry->data = (void *)__get_free_page(GFP_KERNEL);
+		if (!entry->data) {
+			hibernate_nvs_free();
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/**
+ *	hibernate_nvs_save - save NVS memory regions
+ */
+void hibernate_nvs_save(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Saving platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			entry->kaddr = ioremap(entry->phys_start, entry->size);
+			memcpy(entry->data, entry->kaddr, entry->size);
+		}
+}
+
+/**
+ *	hibernate_nvs_restore - restore NVS memory regions
+ *
+ *	This function is going to be called with interrupts disabled, so it
+ *	cannot iounmap the virtual addresses used to access the NVS region.
+ */
+void hibernate_nvs_restore(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data)
+			memcpy(entry->kaddr, entry->data, entry->size);
+}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 87b901c..6a07f4d 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -186,125 +186,3 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
 			centisecs / 100, centisecs % 100,
 			kps / 1000, (kps % 1000) / 10);
 }
-
-/*
- * Platforms, like ACPI, may want us to save some memory used by them during
- * hibernation and to restore the contents of this memory during the subsequent
- * resume.  The code below implements a mechanism allowing us to do that.
- */
-
-struct nvs_page {
-	unsigned long phys_start;
-	unsigned int size;
-	void *kaddr;
-	void *data;
-	struct list_head node;
-};
-
-static LIST_HEAD(nvs_list);
-
-/**
- *	hibernate_nvs_register - register platform NVS memory region to save
- *	@start - physical address of the region
- *	@size - size of the region
- *
- *	The NVS region need not be page-aligned (both ends) and we arrange
- *	things so that the data from page-aligned addresses in this region will
- *	be copied into separate RAM pages.
- */
-int hibernate_nvs_register(unsigned long start, unsigned long size)
-{
-	struct nvs_page *entry, *next;
-
-	while (size > 0) {
-		unsigned int nr_bytes;
-
-		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
-		if (!entry)
-			goto Error;
-
-		list_add_tail(&entry->node, &nvs_list);
-		entry->phys_start = start;
-		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
-		entry->size = (size < nr_bytes) ? size : nr_bytes;
-
-		start += entry->size;
-		size -= entry->size;
-	}
-	return 0;
-
- Error:
-	list_for_each_entry_safe(entry, next, &nvs_list, node) {
-		list_del(&entry->node);
-		kfree(entry);
-	}
-	return -ENOMEM;
-}
-
-/**
- *	hibernate_nvs_free - free data pages allocated for saving NVS regions
- */
-void hibernate_nvs_free(void)
-{
-	struct nvs_page *entry;
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data) {
-			free_page((unsigned long)entry->data);
-			entry->data = NULL;
-			if (entry->kaddr) {
-				iounmap(entry->kaddr);
-				entry->kaddr = NULL;
-			}
-		}
-}
-
-/**
- *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
- */
-int hibernate_nvs_alloc(void)
-{
-	struct nvs_page *entry;
-
-	list_for_each_entry(entry, &nvs_list, node) {
-		entry->data = (void *)__get_free_page(GFP_KERNEL);
-		if (!entry->data) {
-			hibernate_nvs_free();
-			return -ENOMEM;
-		}
-	}
-	return 0;
-}
-
-/**
- *	hibernate_nvs_save - save NVS memory regions
- */
-void hibernate_nvs_save(void)
-{
-	struct nvs_page *entry;
-
-	printk(KERN_INFO "PM: Saving platform NVS memory\n");
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data) {
-			entry->kaddr = ioremap(entry->phys_start, entry->size);
-			memcpy(entry->data, entry->kaddr, entry->size);
-		}
-}
-
-/**
- *	hibernate_nvs_restore - restore NVS memory regions
- *
- *	This function is going to be called with interrupts disabled, so it
- *	cannot iounmap the virtual addresses used to access the NVS region.
- */
-void hibernate_nvs_restore(void)
-{
-	struct nvs_page *entry;
-
-	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data)
-			memcpy(entry->kaddr, entry->data, entry->size);
-}
-- 
cgit v0.10.2


From 5818a6e2519b34cd6d0220d89f5729ab2725e1bf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 11 Jun 2009 21:59:21 +0200
Subject: PM: Add empty suspend/resume device irq functions

git commit 0a0c5168 "PM: Introduce functions for suspending and resuming
device interrupts" introduced some helper functions. However these
functions are only available for architectures which support
GENERIC_HARDIRQS.

Other architectures will see this build error:

drivers/built-in.o: In function `sysdev_suspend':
(.text+0x15138): undefined reference to `check_wakeup_irqs'
drivers/built-in.o: In function `device_power_up':
(.text+0x1cb66): undefined reference to `resume_device_irqs'
drivers/built-in.o: In function `device_power_down':
(.text+0x1cb92): undefined reference to `suspend_device_irqs'

To fix this add some empty inline functions for !GENERIC_HARDIRQS.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ff374ce..c41e812 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -183,6 +183,7 @@ extern void disable_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
 
 /* The following three functions are for the core kernel use only. */
+#ifdef CONFIG_GENERIC_HARDIRQS
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
 #ifdef CONFIG_PM_SLEEP
@@ -190,6 +191,11 @@ extern int check_wakeup_irqs(void);
 #else
 static inline int check_wakeup_irqs(void) { return 0; }
 #endif
+#else
+static inline void suspend_device_irqs(void) { };
+static inline void resume_device_irqs(void) { };
+static inline int check_wakeup_irqs(void) { return 0; }
+#endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
-- 
cgit v0.10.2


From 493b87e5ed352cf548e6456ddfc36576e28278ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 12 Jun 2009 11:34:55 -0400
Subject: xfs: fix warnings with CONFIG_XFS_QUOTA disabled

Fix warnings about unitialized dquot variables by making sure
xfs_qm_vop_dqalloc touches it even when quotas are disabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 079d2e6..3ec91ac 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -335,6 +335,14 @@ extern void xfs_qm_unmount(struct xfs_mount *);
 extern void xfs_qm_unmount_quotas(struct xfs_mount *);
 
 #else
+static inline int
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
+		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
+{
+	*udqp = NULL;
+	*gdqp = NULL;
+	return 0;
+}
 #define xfs_trans_dup_dqinfo(tp, tp2)
 #define xfs_trans_free_dqinfo(tp)
 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
@@ -342,7 +350,6 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *);
 #define xfs_trans_unreserve_and_mod_dquots(tp)
 #define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)	(0)
 #define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)	(0)
-#define xfs_qm_vop_dqalloc(ip, uid, gid, prid, fl, ou, og)		(0)
 #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
 #define xfs_qm_vop_rename_dqattach(it)					(0)
 #define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)
-- 
cgit v0.10.2


From e83f1eb6bfc4004c19a99ee5f5aa65bd3fbecec3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 12 Jun 2009 11:19:11 -0400
Subject: xfs: fix small mismerge in xfs_vn_mknod

Identation got messed up when merging the current_umask changes with
the generic ACL support.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 800dd4f..58973bb 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -225,8 +225,8 @@ xfs_vn_mknod(
 		if (IS_ERR(default_acl))
 			return -PTR_ERR(default_acl);
 
-	if (!default_acl)
-		mode &= ~current_umask();
+		if (!default_acl)
+			mode &= ~current_umask();
 	}
 
 	xfs_dentry_to_name(&name, dentry);
-- 
cgit v0.10.2


From 9528d1c7a541b481a0e80301dc8d545848104023 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 18 May 2009 08:14:41 -0400
Subject: i2c: Blackfin TWI: make sure we don't end up with a CLKDIV=0

Make sure we don't end up with an invalid CLKDIV=0 in case someone
specifies 20kHz SCL or less (5 * 1024 / 20 = 0x100).

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
[ben-linux@fluff.org: shortened subject line]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index f1c6ca7..c8460fa 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -298,7 +298,7 @@ config I2C_BLACKFIN_TWI
 config I2C_BLACKFIN_TWI_CLK_KHZ
 	int "Blackfin TWI I2C clock (kHz)"
 	depends on I2C_BLACKFIN_TWI
-	range 10 400
+	range 21 400
 	default 50
 	help
 	  The unit of the TWI clock is kHz.
diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c
index fc548b3..77cafb6 100644
--- a/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/drivers/i2c/busses/i2c-bfin-twi.c
@@ -614,6 +614,7 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev)
 	struct i2c_adapter *p_adap;
 	struct resource *res;
 	int rc;
+	unsigned int clkhilow;
 
 	iface = kzalloc(sizeof(struct bfin_twi_iface), GFP_KERNEL);
 	if (!iface) {
@@ -675,10 +676,14 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev)
 	/* Set TWI internal clock as 10MHz */
 	write_CONTROL(iface, ((get_sclk() / 1024 / 1024 + 5) / 10) & 0x7F);
 
+	/*
+	 * We will not end up with a CLKDIV=0 because no one will specify
+	 * 20kHz SCL or less in Kconfig now. (5 * 1024 / 20 = 0x100)
+	 */
+	clkhilow = 5 * 1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ;
+
 	/* Set Twi interface clock as specified */
-	write_CLKDIV(iface, ((5*1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ)
-			<< 8) | ((5*1024 / CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ)
-			& 0xFF));
+	write_CLKDIV(iface, (clkhilow << 8) | clkhilow);
 
 	/* Enable TWI */
 	write_CONTROL(iface, read_CONTROL(iface) | TWI_ENA);
-- 
cgit v0.10.2


From 57a8f32eafa6f36ea3a128e8b13f353c5a3ca9b2 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Tue, 19 May 2009 07:21:58 -0400
Subject: i2c: Blackfin TWI: fix REPEAT START mode doesn't repeat

Avoid rewrite TWI MASTER_CTL reg when issue next message
In i2c repeat transfer mode, byte count of next message should be filled
into part of the TWI MASTER_CTL reg when interrupt MCOMP of last
message transfer is triggered. But, other bits in this reg should
not be touched.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
[ben-linux@fluff.org: shorted subject]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c
index 77cafb6..4d73ad7 100644
--- a/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/drivers/i2c/busses/i2c-bfin-twi.c
@@ -196,8 +196,6 @@ static void bfin_twi_handle_interrupt(struct bfin_twi_iface *iface)
 			/* remove restart bit and enable master receive */
 			write_MASTER_CTL(iface,
 				read_MASTER_CTL(iface) & ~RSTART);
-			write_MASTER_CTL(iface,
-				read_MASTER_CTL(iface) | MEN | MDIR);
 			SSYNC();
 		} else if (iface->cur_mode == TWI_I2C_MODE_REPEAT &&
 				iface->cur_msg+1 < iface->msg_num) {
@@ -222,18 +220,19 @@ static void bfin_twi_handle_interrupt(struct bfin_twi_iface *iface)
 			}
 
 			if (iface->pmsg[iface->cur_msg].len <= 255)
-				write_MASTER_CTL(iface,
-				iface->pmsg[iface->cur_msg].len << 6);
+					write_MASTER_CTL(iface,
+					(read_MASTER_CTL(iface) &
+					(~(0xff << 6))) |
+				(iface->pmsg[iface->cur_msg].len << 6));
 			else {
-				write_MASTER_CTL(iface, 0xff << 6);
+				write_MASTER_CTL(iface,
+					(read_MASTER_CTL(iface) |
+					(0xff << 6)));
 				iface->manual_stop = 1;
 			}
 			/* remove restart bit and enable master receive */
 			write_MASTER_CTL(iface,
 				read_MASTER_CTL(iface) & ~RSTART);
-			write_MASTER_CTL(iface, read_MASTER_CTL(iface) |
-				MEN | ((iface->read_write == I2C_SMBUS_READ) ?
-				MDIR : 0));
 			SSYNC();
 		} else {
 			iface->result = 1;
-- 
cgit v0.10.2


From 94327d009e3aa20214e9dfa486a1fd14445fe736 Mon Sep 17 00:00:00 2001
From: Frank Shew <fshew@geometrics.com>
Date: Tue, 19 May 2009 07:23:49 -0400
Subject: i2c: Blackfin TWI: fix transfer errors with repeat start

We have a custom BF537 board with an I2C RTC (MAX DS3231) running
uclinux 2007R1 for some time. Recently during migration to 2008R1.5-RC3
we losted access to the RTC. The RTC driver calls 'i2c_transfer()' which
in turns calls 'bfin_twi_master_xfer()' in i2c-bfin-twi.c.

Compared with 2007R1, it looks like the 2008R1.5 version of i2c-bin-twi.c
has a new mode 'TWI_I2C-MODE_REPEAT' which corresponds to the Repeat Start
Condition described in the HRM. However, according to the HRM, at XMIT or
RECV interrupt and when the data count is 0, not only is the RESTART bit
supposed to be set, but MDIR must also be set if the next operation is a
receive sequence, and cleared if not. Currently there is no code that looks
at the I2C_M_RD bit in the flag from the next cur_msg and set/clear the MDIR
flag accordingly at the same time that the RSTART bit is set. Instead, MDIR
is set or cleared (by OR'ing with 0?) after the RESTART bit has been cleared
during handling of MCOMP interrupt.

It appears that this is causing our failure with reading the RTC, as a
quick patch to set/clear MDIR when RESTART is set seem to solve our problem.

Signed-off-by: Frank Shew <fshew@geometrics.com>
Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
[ben-linux@fluff.org: shorted subject]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c
index 4d73ad7..40136ea 100644
--- a/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/drivers/i2c/busses/i2c-bfin-twi.c
@@ -104,9 +104,14 @@ static void bfin_twi_handle_interrupt(struct bfin_twi_iface *iface)
 			write_MASTER_CTL(iface,
 				read_MASTER_CTL(iface) | STOP);
 		else if (iface->cur_mode == TWI_I2C_MODE_REPEAT &&
-				iface->cur_msg+1 < iface->msg_num)
-			write_MASTER_CTL(iface,
-				read_MASTER_CTL(iface) | RSTART);
+		         iface->cur_msg + 1 < iface->msg_num) {
+			if (iface->pmsg[iface->cur_msg + 1].flags & I2C_M_RD)
+				write_MASTER_CTL(iface,
+					read_MASTER_CTL(iface) | RSTART | MDIR);
+			else
+				write_MASTER_CTL(iface,
+					(read_MASTER_CTL(iface) | RSTART) & ~MDIR);
+		}
 		SSYNC();
 		/* Clear status */
 		write_INT_STAT(iface, XMTSERV);
@@ -134,9 +139,13 @@ static void bfin_twi_handle_interrupt(struct bfin_twi_iface *iface)
 				read_MASTER_CTL(iface) | STOP);
 			SSYNC();
 		} else if (iface->cur_mode == TWI_I2C_MODE_REPEAT &&
-				iface->cur_msg+1 < iface->msg_num) {
-			write_MASTER_CTL(iface,
-				read_MASTER_CTL(iface) | RSTART);
+		           iface->cur_msg + 1 < iface->msg_num) {
+			if (iface->pmsg[iface->cur_msg + 1].flags & I2C_M_RD)
+				write_MASTER_CTL(iface,
+					read_MASTER_CTL(iface) | RSTART | MDIR);
+			else
+				write_MASTER_CTL(iface,
+					(read_MASTER_CTL(iface) | RSTART) & ~MDIR);
 			SSYNC();
 		}
 		/* Clear interrupt source */
-- 
cgit v0.10.2


From e0cd2dd5dd2b7c6512e46ce0b4f119cd7b0c74a4 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 27 May 2009 09:24:10 +0000
Subject: i2c: Blackfin TWI: implement I2C_FUNC_SMBUS_I2C_BLOCK functionality

Some drivers need i2c_smbus_read_i2c_block_data() functionality, so add
support for it to the Blackfin I2C bus driver.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
[ben-linux@fluff.org: shortened subject]
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c
index 40136ea..26d8987 100644
--- a/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/drivers/i2c/busses/i2c-bfin-twi.c
@@ -449,6 +449,16 @@ int bfin_twi_smbus_xfer(struct i2c_adapter *adap, u16 addr,
 		}
 		iface->transPtr = data->block;
 		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		if (read_write == I2C_SMBUS_READ) {
+			iface->readNum = data->block[0];
+			iface->cur_mode = TWI_I2C_MODE_COMBINED;
+		} else {
+			iface->writeNum = data->block[0];
+			iface->cur_mode = TWI_I2C_MODE_STANDARDSUB;
+		}
+		iface->transPtr = (u8 *)&data->block[1];
+		break;
 	default:
 		return -1;
 	}
@@ -572,7 +582,7 @@ static u32 bfin_twi_functionality(struct i2c_adapter *adap)
 	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
 	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
 	       I2C_FUNC_SMBUS_BLOCK_DATA | I2C_FUNC_SMBUS_PROC_CALL |
-	       I2C_FUNC_I2C;
+	       I2C_FUNC_I2C | I2C_FUNC_SMBUS_I2C_BLOCK;
 }
 
 static struct i2c_algorithm bfin_twi_algorithm = {
-- 
cgit v0.10.2


From baf46b4e378d7950dff7ba30cfd50ff585987cb4 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 May 2009 17:54:45 +0300
Subject: i2c: OMAP2/3: Fix scll/sclh calculations

Fix scll/sclh calculations for HS and fast modes. Currently the driver
uses equal (roughly) low/high times which will result in too short
low time.

OMAP3430 TRM gives the following equations:

	F/S: tLow  = (scll + 7) * internal_clk
	     tHigh = (sclh + 5) * internal_clk
	HS:  tLow  = (scll + 7) * fclk
	     tHigh = (sclh + 5) * fclk

Furthermore, the I2C specification sets the following minimum values
for HS tLow/tHigh for capacitive bus loads 100 pF (maximum speed 3400)
and 400 pF (maximum speed 1700):

	speed	tLow		tHigh
	3400	160 ns		60 ns
	1700	320 ns		120 ns

and for F/S:

	speed	tLow		tHigh
	400	1300 ns		600 ns
	100	4700 ns		4000 ns

By using duty cycles 33/66 (HS, F) and 50/50 (S) we stay above these
minimum values.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index ece0125..a879e4b 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -343,17 +343,28 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 
 		/* If configured for High Speed */
 		if (dev->speed > 400) {
+			unsigned long scl;
+
 			/* For first phase of HS mode */
-			fsscll = internal_clk / (400 * 2) - 6;
-			fssclh = internal_clk / (400 * 2) - 6;
+			scl = internal_clk / 400;
+			fsscll = scl - (scl / 3) - 7;
+			fssclh = (scl / 3) - 5;
 
 			/* For second phase of HS mode */
-			hsscll = fclk_rate / (dev->speed * 2) - 6;
-			hssclh = fclk_rate / (dev->speed * 2) - 6;
+			scl = fclk_rate / dev->speed;
+			hsscll = scl - (scl / 3) - 7;
+			hssclh = (scl / 3) - 5;
+		} else if (dev->speed > 100) {
+			unsigned long scl;
+
+			/* Fast mode */
+			scl = internal_clk / dev->speed;
+			fsscll = scl - (scl / 3) - 7;
+			fssclh = (scl / 3) - 5;
 		} else {
-			/* To handle F/S modes */
-			fsscll = internal_clk / (dev->speed * 2) - 6;
-			fssclh = internal_clk / (dev->speed * 2) - 6;
+			/* Standard mode */
+			fsscll = internal_clk / (dev->speed * 2) - 7;
+			fssclh = internal_clk / (dev->speed * 2) - 5;
 		}
 		scll = (hsscll << OMAP_I2C_SCLL_HSSCLL) | fsscll;
 		sclh = (hssclh << OMAP_I2C_SCLH_HSSCLH) | fssclh;
-- 
cgit v0.10.2


From 84bf2c868f3ca996e5bbd3beb2ef502f457140f3 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 May 2009 17:54:46 +0300
Subject: i2c: OMAP3: Better noise suppression for fast/standard modes

Use longer noise filter period for fast and standard mode. Based on an
earlier patch by Eero Nurkkala.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index a879e4b..c73475d 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -333,8 +333,18 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
 
 	if (cpu_is_omap2430() || cpu_is_omap34xx()) {
 
-		/* HSI2C controller internal clk rate should be 19.2 Mhz */
-		internal_clk = 19200;
+		/*
+		 * HSI2C controller internal clk rate should be 19.2 Mhz for
+		 * HS and for all modes on 2430. On 34xx we can use lower rate
+		 * to get longer filter period for better noise suppression.
+		 * The filter is iclk (fclk for HS) period.
+		 */
+		if (dev->speed > 400 || cpu_is_omap_2430())
+			internal_clk = 19200;
+		else if (dev->speed > 100)
+			internal_clk = 9600;
+		else
+			internal_clk = 4000;
 		fclk_rate = clk_get_rate(dev->fclk) / 1000;
 
 		/* Compute prescaler divisor */
-- 
cgit v0.10.2


From 7d85ccd816535f56880f7dfdb4de056794376b2c Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 12 Jun 2009 10:45:29 +0100
Subject: i2c-s3c2410: move to using platform idtable to match devices

Change to using platform id table to match either of the two supported
platform device names in the driver. This simplifies the driver init and
exit code

Note, log messages will now be prefixed with 's3c-i2c' instead of the
driver name, so output will be of the form of:

s3c-i2c s3c2440-i2c.0: slave address 0x10

Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 1691ef0..079a312 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -51,6 +51,11 @@ enum s3c24xx_i2c_state {
 	STATE_STOP
 };
 
+enum s3c24xx_i2c_type {
+	TYPE_S3C2410,
+	TYPE_S3C2440,
+};
+
 struct s3c24xx_i2c {
 	spinlock_t		lock;
 	wait_queue_head_t	wait;
@@ -88,8 +93,10 @@ struct s3c24xx_i2c {
 static inline int s3c24xx_i2c_is2440(struct s3c24xx_i2c *i2c)
 {
 	struct platform_device *pdev = to_platform_device(i2c->dev);
+	enum s3c24xx_i2c_type type;
 
-	return !strcmp(pdev->name, "s3c2440-i2c");
+	type = platform_get_device_id(pdev)->driver_data;
+	return type == TYPE_S3C2440;
 }
 
 /* s3c24xx_i2c_master_complete
@@ -969,52 +976,41 @@ static int s3c24xx_i2c_resume(struct platform_device *dev)
 
 /* device driver for platform bus bits */
 
-static struct platform_driver s3c2410_i2c_driver = {
-	.probe		= s3c24xx_i2c_probe,
-	.remove		= s3c24xx_i2c_remove,
-	.suspend_late	= s3c24xx_i2c_suspend_late,
-	.resume		= s3c24xx_i2c_resume,
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= "s3c2410-i2c",
-	},
+static struct platform_device_id s3c24xx_driver_ids[] = {
+	{
+		.name		= "s3c2410-i2c",
+		.driver_data	= TYPE_S3C2410,
+	}, {
+		.name		= "s3c2440-i2c",
+		.driver_data	= TYPE_S3C2440,
+	}, { },
 };
+MODULE_DEVICE_TABLE(platform, s3c24xx_driver_ids);
 
-static struct platform_driver s3c2440_i2c_driver = {
+static struct platform_driver s3c24xx_i2c_driver = {
 	.probe		= s3c24xx_i2c_probe,
 	.remove		= s3c24xx_i2c_remove,
 	.suspend_late	= s3c24xx_i2c_suspend_late,
 	.resume		= s3c24xx_i2c_resume,
+	.id_table	= s3c24xx_driver_ids,
 	.driver		= {
 		.owner	= THIS_MODULE,
-		.name	= "s3c2440-i2c",
+		.name	= "s3c-i2c",
 	},
 };
 
 static int __init i2c_adap_s3c_init(void)
 {
-	int ret;
-
-	ret = platform_driver_register(&s3c2410_i2c_driver);
-	if (ret == 0) {
-		ret = platform_driver_register(&s3c2440_i2c_driver);
-		if (ret)
-			platform_driver_unregister(&s3c2410_i2c_driver);
-	}
-
-	return ret;
+	return platform_driver_register(&s3c24xx_i2c_driver);
 }
 subsys_initcall(i2c_adap_s3c_init);
 
 static void __exit i2c_adap_s3c_exit(void)
 {
-	platform_driver_unregister(&s3c2410_i2c_driver);
-	platform_driver_unregister(&s3c2440_i2c_driver);
+	platform_driver_unregister(&s3c24xx_i2c_driver);
 }
 module_exit(i2c_adap_s3c_exit);
 
 MODULE_DESCRIPTION("S3C24XX I2C Bus driver");
 MODULE_AUTHOR("Ben Dooks, <ben@simtec.co.uk>");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:s3c2410-i2c");
-MODULE_ALIAS("platform:s3c2440-i2c");
-- 
cgit v0.10.2


From dd14be4c274fc484eccace03ae9726e516630331 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20R=C3=B6jfors?=
 <richard.rojfors.ext@mocean-labs.com>
Date: Fri, 5 Jun 2009 15:40:32 +0200
Subject: i2c-ocores: Can add I2C devices to the bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is sometimes a need for the ocores driver to add devices to the
bus when installed.

i2c_register_board_info can not always be used, because the I2C devices
 are not known at an early state, they could for instance be connected
 on a I2C bus on a PCI device which has the Open Cores IP.

i2c_new_device can not be used in all cases either since the resulting
bus nummer might be unknown.

The solution is the pass a list of I2C devices in the platform data to
the Open Cores driver. This is useful for MFD drivers.

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index cfcebb1..c269aaa 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -20,6 +20,8 @@ platform_device with the base address and interrupt number. The
 dev.platform_data of the device should also point to a struct
 ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the
 distance between registers and the input clock speed.
+There is also a possibility to attach a list of i2c_board_info which
+the i2c-ocores driver will add to the bus upon creation.
 
 E.G. something like:
 
@@ -36,9 +38,24 @@ static struct resource ocores_resources[] = {
 	},
 };
 
+/* optional board info */
+struct i2c_board_info ocores_i2c_board_info[] = {
+	{
+		I2C_BOARD_INFO("tsc2003", 0x48),
+		.platform_data = &tsc2003_platform_data,
+		.irq = TSC_IRQ
+	},
+	{
+		I2C_BOARD_INFO("adv7180", 0x42 >> 1),
+		.irq = ADV_IRQ
+	}
+};
+
 static struct ocores_i2c_platform_data myi2c_data = {
 	.regstep	= 2,		/* two bytes between registers */
 	.clock_khz	= 50000,	/* input clock of 50MHz */
+	.devices	= ocores_i2c_board_info, /* optional table of devices */
+	.num_devices	= ARRAY_SIZE(ocores_i2c_board_info), /* table size */
 };
 
 static struct platform_device myi2c = {
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index e5193bf..3542c6b 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -216,6 +216,7 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
 	struct ocores_i2c_platform_data *pdata;
 	struct resource *res, *res2;
 	int ret;
+	int i;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -271,6 +272,10 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
 		goto add_adapter_failed;
 	}
 
+	/* add in known devices to the bus */
+	for (i = 0; i < pdata->num_devices; i++)
+		i2c_new_device(&i2c->adap, pdata->devices + i);
+
 	return 0;
 
 add_adapter_failed:
diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h
index 8ed591b..4d5e57f 100644
--- a/include/linux/i2c-ocores.h
+++ b/include/linux/i2c-ocores.h
@@ -14,6 +14,8 @@
 struct ocores_i2c_platform_data {
 	u32 regstep;   /* distance between registers */
 	u32 clock_khz; /* input clock in kHz */
+	u8 num_devices; /* number of devices in the devices list */
+	struct i2c_board_info const *devices; /* devices connected to the bus */
 };
 
 #endif /* _LINUX_I2C_OCORES_H */
-- 
cgit v0.10.2


From bb6e647051a59dca5a72b3deef1e061d7c1c34da Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Wed, 3 Jun 2009 14:29:16 +0200
Subject: avr32: Fix oops on unaligned user access

The unaligned address exception handler (and others) does not scan the
fixup tables before oopsing. This is bad because it means passing a
badly aligned pointer from user space might crash the kernel.

Fix this by scanning the fixup tables in _exception(). This should
resolve the issue for unaligned addresses as well as other less common
exceptions that might be happening during a userspace access. The page
fault handler already does fixup processing.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>

diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index d547c8d..6e3d491 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -75,8 +75,17 @@ void _exception(long signr, struct pt_regs *regs, int code,
 {
 	siginfo_t info;
 
-	if (!user_mode(regs))
+	if (!user_mode(regs)) {
+		const struct exception_table_entry *fixup;
+
+		/* Are we prepared to handle this kernel fault? */
+		fixup = search_exception_tables(regs->pc);
+		if (fixup) {
+			regs->pc = fixup->fixup;
+			return;
+		}
 		die("Unhandled exception in kernel mode", regs, signr);
+	}
 
 	memset(&info, 0, sizeof(info));
 	info.si_signo = signr;
-- 
cgit v0.10.2


From faea56c9bb44f539da1ae0194184873fc2720b20 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 12 Jun 2009 11:43:48 -0700
Subject: [SCSI] cnic: fix undefined reference to `ip6_route_output'

Fix cnic build for case of CONFIG_INET=n.
Fix cnic build for case of CONFIG_IPV6=m and CONFIG_CNIC=y.

Fixes these build errors:

cnic.c:(.text+0x236a1d): undefined reference to `ip_route_output_key'
cnic.c:(.text+0x15a8e8): undefined reference to `ip6_route_output'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 8d74037..a9e2fd3 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -1454,6 +1454,7 @@ static inline u16 cnic_get_vlan(struct net_device *dev,
 static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
 			     struct dst_entry **dst)
 {
+#if defined(CONFIG_INET)
 	struct flowi fl;
 	int err;
 	struct rtable *rt;
@@ -1465,12 +1466,15 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
 	if (!err)
 		*dst = &rt->u.dst;
 	return err;
+#else
+	return -ENETUNREACH;
+#endif
 }
 
 static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr,
 			     struct dst_entry **dst)
 {
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if defined(CONFIG_IPV6) || (defined(CONFIG_IPV6_MODULE) && defined(MODULE))
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
@@ -1550,7 +1554,7 @@ static int cnic_get_route(struct cnic_sock *csk, struct cnic_sockaddr *saddr)
 	clear_bit(SK_F_IPV6, &csk->flags);
 
 	if (is_v6) {
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if defined(CONFIG_IPV6) || (defined(CONFIG_IPV6_MODULE) && defined(MODULE))
 		set_bit(SK_F_IPV6, &csk->flags);
 		err = cnic_get_v6_route(&saddr->remote.v6, &dst);
 		if (err)
-- 
cgit v0.10.2


From bc3bf8fd330ce981ce632a1a4a283eee46838f32 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 13 Jun 2009 08:29:33 +0200
Subject: =?UTF-8?q?[SCSI]=20cnic:=20fix=20error:=20implicit=20declaration?=
 =?UTF-8?q?=20of=20function=20=E2=80=98=5F=5Fsymbol=5Fget=E2=80=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 drivers/net/cnic.c: In function ‘init_bnx2_cnic’:
 drivers/net/cnic.c:2520: error: implicit declaration of function ‘__symbol_get’
 drivers/net/cnic.c:2520: warning: assignment makes pointer from integer without a cast
 make[1]: *** [drivers/net/cnic.o] Error 1
 make: *** [drivers/net/cnic.o] Error 2

Caused by not including linux/module.h

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index a9e2fd3..44f77eb 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -25,6 +25,8 @@
 #include <linux/delay.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
+#include <linux/module.h>
+
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 #define BCM_VLAN 1
 #endif
-- 
cgit v0.10.2


From 5c55b40b27bc3249358dcfc86c0845be409ab7a6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 13 Jun 2009 13:23:54 +0100
Subject: FRV: Fix interaction with new generic header stuff

Fix interaction with new generic header stuff as added by:

	commit 6103ec56c65c33774c7c38652c8204120c3c7519
	Author: Arnd Bergmann <arnd@arndb.de>
	Date:   Wed May 13 22:56:27 2009 +0000

	    asm-generic: add generic ABI headers

The problem is that asm/signal.h has been made to include asm-generic/signal.h,
but the redundant stuff from asm/signal.h has not been discarded, leading to
multiple redefinitions.

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/signal.h b/arch/frv/include/asm/signal.h
index 2079197..f071e81 100644
--- a/arch/frv/include/asm/signal.h
+++ b/arch/frv/include/asm/signal.h
@@ -3,107 +3,15 @@
 
 #include <linux/types.h>
 
-/* Avoid too many header ordering problems.  */
-struct siginfo;
-
-#ifdef __KERNEL__
-/* Most things should be clean enough to redefine this at will, if care
-   is taken to make libc match.  */
-
-#define _NSIG		64
-#define _NSIG_BPW	32
-#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
-
-typedef unsigned long old_sigset_t;		/* at least 32 bits */
-
-typedef struct {
-	unsigned long sig[_NSIG_WORDS];
-} sigset_t;
-
-#else
+#ifndef __KERNEL__
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
 #define NSIG		32
 typedef unsigned long sigset_t;
 
-#endif /* __KERNEL__ */
-
-#define SIGHUP		 1
-#define SIGINT		 2
-#define SIGQUIT		 3
-#define SIGILL		 4
-#define SIGTRAP		 5
-#define SIGABRT		 6
-#define SIGIOT		 6
-#define SIGBUS		 7
-#define SIGFPE		 8
-#define SIGKILL		 9
-#define SIGUSR1		10
-#define SIGSEGV		11
-#define SIGUSR2		12
-#define SIGPIPE		13
-#define SIGALRM		14
-#define SIGTERM		15
-#define SIGSTKFLT	16
-#define SIGCHLD		17
-#define SIGCONT		18
-#define SIGSTOP		19
-#define SIGTSTP		20
-#define SIGTTIN		21
-#define SIGTTOU		22
-#define SIGURG		23
-#define SIGXCPU		24
-#define SIGXFSZ		25
-#define SIGVTALRM	26
-#define SIGPROF		27
-#define SIGWINCH	28
-#define SIGIO		29
-#define SIGPOLL		SIGIO
-/*
-#define SIGLOST		29
-*/
-#define SIGPWR		30
-#define SIGSYS		31
-#define	SIGUNUSED	31
-
-/* These should not be considered constants from userland.  */
-#define SIGRTMIN	32
-#define SIGRTMAX	(_NSIG-1)
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001
-#define SA_NOCLDWAIT	0x00000002 /* not supported yet */
-#define SA_SIGINFO	0x00000004
-#define SA_ONSTACK	0x08000000
-#define SA_RESTART	0x10000000
-#define SA_NODEFER	0x40000000
-#define SA_RESETHAND	0x80000000
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
-#define SA_RESTORER	0x04000000
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK	1
-#define SS_DISABLE	2
+#endif /* !__KERNEL__ */
 
-#define MINSIGSTKSZ	2048
-#define SIGSTKSZ	8192
+#define SA_RESTORER	0x04000000 /* to get struct sigaction correct */
 
 #include <asm-generic/signal.h>
 
@@ -115,16 +23,6 @@ struct old_sigaction {
 	__sigrestore_t sa_restorer;
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-};
 #else
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
@@ -143,19 +41,4 @@ struct sigaction {
 
 #endif /* __KERNEL__ */
 
-typedef struct sigaltstack {
-	void __user *ss_sp;
-	int ss_flags;
-	size_t ss_size;
-} stack_t;
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#ifdef __KERNEL__
-
-#include <asm/sigcontext.h>
-#undef __HAVE_ARCH_SIG_BITOPS
-
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_SIGNAL_H */
-- 
cgit v0.10.2


From 94d89efb2c347a82a08a61dbac8565b1087c3259 Mon Sep 17 00:00:00 2001
From: Jorg Schummer <ext-jorg.2.schummer@nokia.com>
Date: Tue, 31 Mar 2009 17:51:21 +0300
Subject: mmc: mmc_rescan detects card change in one run

With this patch, mmc_rescan can detect the removal of an mmc card and
the insertion of (possibly another) card in the same run. This means
that a card change can be detected without having to call
mmc_detect_change multiple times.

This change generalises the core such that it can be easily used by
hosts which provide a mechanism to detect only the presence of a card
reader cover, which has to be taken off in order to insert a card. Other
hosts ("card detect" or "MMC_CAP_NEEDS_POLL") each receive an event when
a card is removed and when a card is inserted, so it is sufficient for
them if mmc_rescan handles only one event at a time. "Cover detect"
hosts, however, only receive events about the cover status. This means
that between 2 subsequent events, both a card removal and a card
insertion can occur. In this case, the pre-patch version of mmc_rescan
would only detect the removal of the previous card but not the insertion
of the new card.

Signed-off-by: Jorg Schummer <ext-jorg.2.schummer@nokia.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 2649117..23fb2c3 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -855,61 +855,72 @@ void mmc_rescan(struct work_struct *work)
 
 	mmc_bus_get(host);
 
-	if (host->bus_ops == NULL) {
-		/*
-		 * Only we can add a new handler, so it's safe to
-		 * release the lock here.
-		 */
+	/* if there is a card registered, check whether it is still present */
+	if ((host->bus_ops != NULL) && host->bus_ops->detect && !host->bus_dead)
+		host->bus_ops->detect(host);
+
+	mmc_bus_put(host);
+
+
+	mmc_bus_get(host);
+
+	/* if there still is a card present, stop here */
+	if (host->bus_ops != NULL) {
 		mmc_bus_put(host);
+		goto out;
+	}
 
-		if (host->ops->get_cd && host->ops->get_cd(host) == 0)
-			goto out;
+	/* detect a newly inserted card */
 
-		mmc_claim_host(host);
+	/*
+	 * Only we can add a new handler, so it's safe to
+	 * release the lock here.
+	 */
+	mmc_bus_put(host);
 
-		mmc_power_up(host);
-		mmc_go_idle(host);
+	if (host->ops->get_cd && host->ops->get_cd(host) == 0)
+		goto out;
 
-		mmc_send_if_cond(host, host->ocr_avail);
+	mmc_claim_host(host);
 
-		/*
-		 * First we search for SDIO...
-		 */
-		err = mmc_send_io_op_cond(host, 0, &ocr);
-		if (!err) {
-			if (mmc_attach_sdio(host, ocr))
-				mmc_power_off(host);
-			goto out;
-		}
+	mmc_power_up(host);
+	mmc_go_idle(host);
 
-		/*
-		 * ...then normal SD...
-		 */
-		err = mmc_send_app_op_cond(host, 0, &ocr);
-		if (!err) {
-			if (mmc_attach_sd(host, ocr))
-				mmc_power_off(host);
-			goto out;
-		}
+	mmc_send_if_cond(host, host->ocr_avail);
 
-		/*
-		 * ...and finally MMC.
-		 */
-		err = mmc_send_op_cond(host, 0, &ocr);
-		if (!err) {
-			if (mmc_attach_mmc(host, ocr))
-				mmc_power_off(host);
-			goto out;
-		}
+	/*
+	 * First we search for SDIO...
+	 */
+	err = mmc_send_io_op_cond(host, 0, &ocr);
+	if (!err) {
+		if (mmc_attach_sdio(host, ocr))
+			mmc_power_off(host);
+		goto out;
+	}
 
-		mmc_release_host(host);
-		mmc_power_off(host);
-	} else {
-		if (host->bus_ops->detect && !host->bus_dead)
-			host->bus_ops->detect(host);
+	/*
+	 * ...then normal SD...
+	 */
+	err = mmc_send_app_op_cond(host, 0, &ocr);
+	if (!err) {
+		if (mmc_attach_sd(host, ocr))
+			mmc_power_off(host);
+		goto out;
+	}
 
-		mmc_bus_put(host);
+	/*
+	 * ...and finally MMC.
+	 */
+	err = mmc_send_op_cond(host, 0, &ocr);
+	if (!err) {
+		if (mmc_attach_mmc(host, ocr))
+			mmc_power_off(host);
+		goto out;
 	}
+
+	mmc_release_host(host);
+	mmc_power_off(host);
+
 out:
 	if (host->caps & MMC_CAP_NEEDS_POLL)
 		mmc_schedule_delayed_work(&host->detect, HZ);
-- 
cgit v0.10.2


From 5cf20aa557e8f9dd5af302b8f33972082479753a Mon Sep 17 00:00:00 2001
From: Wolfgang Muees <wolfgang.mues@auerswald.de>
Date: Wed, 8 Apr 2009 10:14:07 +0100
Subject: mmc_spi: speedup for slow cards, less wear-out

Speedup for slow cards by transfering more data at once.
This patch also reduces the amount of wear-out of the flash
blocks because fewer partial blocks are written.

Signed-off-by: Wolfgang Muees <wolfgang.mues@auerswald.de>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index f48349d..a789db8 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -97,6 +97,14 @@
  */
 #define r1b_timeout		(HZ * 3)
 
+/* One of the critical speed parameters is the amount of data which may
+ * be transfered in one command. If this value is too low, the SD card
+ * controller has to do multiple partial block writes (argggh!). With
+ * today (2008) SD cards there is little speed gain if we transfer more
+ * than 64 KBytes at a time. So use this value until there is any indication
+ * that we should do more here.
+ */
+#define MMC_SPI_BLOCKSATONCE	128
 
 /****************************************************************************/
 
@@ -1366,6 +1374,10 @@ static int mmc_spi_probe(struct spi_device *spi)
 
 	mmc->ops = &mmc_spi_ops;
 	mmc->max_blk_size = MMC_SPI_BLOCKSIZE;
+	mmc->max_hw_segs = MMC_SPI_BLOCKSATONCE;
+	mmc->max_phys_segs = MMC_SPI_BLOCKSATONCE;
+	mmc->max_req_size = MMC_SPI_BLOCKSATONCE * MMC_SPI_BLOCKSIZE;
+	mmc->max_blk_count = MMC_SPI_BLOCKSATONCE;
 
 	mmc->caps = MMC_CAP_SPI;
 
-- 
cgit v0.10.2


From 7ceeb6a40a4dcc9b9cded6127ad5cdddb79b40ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 2 Apr 2009 19:47:41 +0200
Subject: mmc/omap: make mmci-omap using platform_driver_probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A pointer to mmc_omap_probe which lives in .init.text is passed to the
core via platform_driver_register and so the kernel might oops if probe
is called after the init code is discarded.

As requested by David Brownell platform_driver_probe is used instead of
moving the probe function to .devinit.text.  This saves some memory, but
might have the downside that a device being registered after the call to
mmc_omap_init but before the init sections are discarded will not be
bound anymore to the driver.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index dceb5ee..e7a331d 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1593,7 +1593,6 @@ static int mmc_omap_resume(struct platform_device *pdev)
 #endif
 
 static struct platform_driver mmc_omap_driver = {
-	.probe		= mmc_omap_probe,
 	.remove		= mmc_omap_remove,
 	.suspend	= mmc_omap_suspend,
 	.resume		= mmc_omap_resume,
@@ -1605,7 +1604,7 @@ static struct platform_driver mmc_omap_driver = {
 
 static int __init mmc_omap_init(void)
 {
-	return platform_driver_register(&mmc_omap_driver);
+	return platform_driver_probe(&mmc_omap_driver, mmc_omap_probe);
 }
 
 static void __exit mmc_omap_exit(void)
-- 
cgit v0.10.2


From ae628903ab6cc9a04fdf8d4ff3f0c5b2ffa6f939 Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Sun, 3 May 2009 20:45:03 +0200
Subject: sdhci: avoid changing voltage needlessly

Because of granularity issues, sometimes we told the hardware to change
to the voltage we were already at. Rework the logic so this doesn't
happen.

Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9234be2..1432a35 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1005,12 +1005,34 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power)
 {
 	u8 pwr;
 
-	if (host->power == power)
+	if (power == (unsigned short)-1)
+		pwr = 0;
+	else {
+		switch (1 << power) {
+		case MMC_VDD_165_195:
+			pwr = SDHCI_POWER_180;
+			break;
+		case MMC_VDD_29_30:
+		case MMC_VDD_30_31:
+			pwr = SDHCI_POWER_300;
+			break;
+		case MMC_VDD_32_33:
+		case MMC_VDD_33_34:
+			pwr = SDHCI_POWER_330;
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	if (host->pwr == pwr)
 		return;
 
-	if (power == (unsigned short)-1) {
+	host->pwr = pwr;
+
+	if (pwr == 0) {
 		sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
-		goto out;
+		return;
 	}
 
 	/*
@@ -1020,35 +1042,16 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned short power)
 	if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
 		sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
 
-	pwr = SDHCI_POWER_ON;
-
-	switch (1 << power) {
-	case MMC_VDD_165_195:
-		pwr |= SDHCI_POWER_180;
-		break;
-	case MMC_VDD_29_30:
-	case MMC_VDD_30_31:
-		pwr |= SDHCI_POWER_300;
-		break;
-	case MMC_VDD_32_33:
-	case MMC_VDD_33_34:
-		pwr |= SDHCI_POWER_330;
-		break;
-	default:
-		BUG();
-	}
-
 	/*
 	 * At least the Marvell CaFe chip gets confused if we set the voltage
 	 * and set turn on power at the same time, so set the voltage first.
 	 */
 	if ((host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER))
-		sdhci_writeb(host, pwr & ~SDHCI_POWER_ON, SDHCI_POWER_CONTROL);
+		sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
 
-	sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+	pwr |= SDHCI_POWER_ON;
 
-out:
-	host->power = power;
+	sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
 }
 
 /*****************************************************************************\
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 65c6f99..2de0834 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -255,7 +255,7 @@ struct sdhci_host {
 	unsigned int		timeout_clk;	/* Timeout freq (KHz) */
 
 	unsigned int		clock;		/* Current clock (MHz) */
-	unsigned short		power;		/* Current voltage */
+	u8			pwr;		/* Current voltage */
 
 	struct mmc_request	*mrq;		/* Current request */
 	struct mmc_command	*cmd;		/* Current command */
-- 
cgit v0.10.2


From 8dfd0374be84793360db7fff2e635d2cd3bbcb21 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 9 Apr 2009 08:32:02 +0200
Subject: MMC core: limit minimum initialization frequency to 400kHz

Some controllers allow a much lower frequency than 400kHz.
Keep the minimum frequency within sensible limits.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 23fb2c3..d84c880 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -708,7 +708,13 @@ static void mmc_power_up(struct mmc_host *host)
 	 */
 	mmc_delay(10);
 
-	host->ios.clock = host->f_min;
+	if (host->f_min > 400000) {
+		pr_warning("%s: Minimum clock frequency too high for "
+				"identification mode\n", mmc_hostname(host));
+		host->ios.clock = host->f_min;
+	} else
+		host->ios.clock = 400000;
+
 	host->ios.power_mode = MMC_POWER_ON;
 	mmc_set_ios(host);
 
-- 
cgit v0.10.2


From 8385f9cb7f12ef6a5261fa76f1a1b612280c94f7 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Thu, 21 May 2009 08:54:18 -0300
Subject: pxamci: add regulator support.

Changes pxamci.c to use the regulator subsystem. Uses the regulator case
CONFIG_REGULATOR is defined and a matching is regulator is provided, or
falls back to pdata->setpower otherwise. A warning is displayed case
both a valid regulator and pdata is set, and the regulator is used.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Acked-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 4300957..d7d7109 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -27,6 +27,7 @@
 #include <linux/err.h>
 #include <linux/mmc/host.h>
 #include <linux/io.h>
+#include <linux/regulator/consumer.h>
 
 #include <asm/sizes.h>
 
@@ -67,8 +68,42 @@ struct pxamci_host {
 	unsigned int		dma_dir;
 	unsigned int		dma_drcmrrx;
 	unsigned int		dma_drcmrtx;
+
+	struct regulator	*vcc;
 };
 
+static inline void pxamci_init_ocr(struct pxamci_host *host)
+{
+#ifdef CONFIG_REGULATOR
+	host->vcc = regulator_get(mmc_dev(host->mmc), "vmmc");
+
+	if (IS_ERR(host->vcc))
+		host->vcc = NULL;
+	else {
+		host->mmc->ocr_avail = mmc_regulator_get_ocrmask(host->vcc);
+		if (host->pdata && host->pdata->ocr_mask)
+			dev_warn(mmc_dev(host->mmc),
+				"ocr_mask/setpower will not be used\n");
+	}
+#endif
+	if (host->vcc == NULL) {
+		/* fall-back to platform data */
+		host->mmc->ocr_avail = host->pdata ?
+			host->pdata->ocr_mask :
+			MMC_VDD_32_33 | MMC_VDD_33_34;
+	}
+}
+
+static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd)
+{
+#ifdef CONFIG_REGULATOR
+	if (host->vcc)
+		mmc_regulator_set_ocr(host->vcc, vdd);
+#endif
+	if (!host->vcc && host->pdata && host->pdata->setpower)
+		host->pdata->setpower(mmc_dev(host->mmc), vdd);
+}
+
 static void pxamci_stop_clock(struct pxamci_host *host)
 {
 	if (readl(host->base + MMC_STAT) & STAT_CLK_EN) {
@@ -438,8 +473,7 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (host->power_mode != ios->power_mode) {
 		host->power_mode = ios->power_mode;
 
-		if (host->pdata && host->pdata->setpower)
-			host->pdata->setpower(mmc_dev(mmc), ios->vdd);
+		pxamci_set_power(host, ios->vdd);
 
 		if (ios->power_mode == MMC_POWER_ON)
 			host->cmdat |= CMDAT_INIT;
@@ -562,9 +596,8 @@ static int pxamci_probe(struct platform_device *pdev)
 	mmc->f_max = (cpu_is_pxa300() || cpu_is_pxa310()) ? 26000000
 							  : host->clkrate;
 
-	mmc->ocr_avail = host->pdata ?
-			 host->pdata->ocr_mask :
-			 MMC_VDD_32_33|MMC_VDD_33_34;
+	pxamci_init_ocr(host);
+
 	mmc->caps = 0;
 	host->cmdat = 0;
 	if (!cpu_is_pxa25x()) {
@@ -661,6 +694,9 @@ static int pxamci_remove(struct platform_device *pdev)
 	if (mmc) {
 		struct pxamci_host *host = mmc_priv(mmc);
 
+		if (host->vcc)
+			regulator_put(host->vcc);
+
 		if (host->pdata && host->pdata->exit)
 			host->pdata->exit(&pdev->dev, mmc);
 
-- 
cgit v0.10.2


From 5f5bac8272be791b67c7b7b411e7c8c5847e598a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Fri, 22 May 2009 20:33:59 +0200
Subject: mmc: Driver for CB710/720 memory card reader (MMC part)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code is divided in two parts. There is a virtual 'bus' driver
that handles PCI device and registers three new devices one per card
reader type. The other driver handles SD/MMC part of the reader.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/MAINTAINERS b/MAINTAINERS
index 90f8128..b5cebdd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2105,6 +2105,15 @@ W:	http://sourceforge.net/projects/lpfcxxxx
 S:	Supported
 F:	drivers/scsi/lpfc/
 
+ENE CB710 FLASH CARD READER DRIVER
+P:	Michał Mirosław
+M:	mirq-linux@rere.qmqm.pl
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/misc/cb710/
+F:	drivers/mmc/host/cb710-mmc.*
+F:	include/linux/cb710.h
+
 EPSON 1355 FRAMEBUFFER DRIVER
 P:	Christopher Hoover
 M:	ch@murgatroid.com
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 6d1ac18..68ab39d 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -235,5 +235,6 @@ config ISL29003
 
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
+source "drivers/misc/cb710/Kconfig"
 
 endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 7871f05..36f733c 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_HP_ILO)		+= hpilo.o
 obj-$(CONFIG_ISL29003)		+= isl29003.o
 obj-$(CONFIG_C2PORT)		+= c2port/
 obj-y				+= eeprom/
+obj-y				+= cb710/
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
new file mode 100644
index 0000000..0b55079
--- /dev/null
+++ b/drivers/misc/cb710/Kconfig
@@ -0,0 +1,21 @@
+config CB710_CORE
+	tristate "ENE CB710/720 Flash memory card reader support"
+	depends on PCI
+	help
+	  This option enables support for PCI ENE CB710/720 Flash memory card
+	  reader found in some laptops (ie. some versions of HP Compaq nx9500).
+
+	  You will also have to select some flash card format drivers (MMC/SD,
+	  MemoryStick).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710.
+
+config CB710_DEBUG
+	bool "Enable driver debugging"
+	depends on CB710_CORE != n
+	default n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This adds a lot of debugging output to dmesg.
+
diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
new file mode 100644
index 0000000..62d51ac
--- /dev/null
+++ b/drivers/misc/cb710/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_CB710_CORE)	+= cb710.o
+
+cb710-y				:= core.o sgbuf2.o
+cb710-$(CONFIG_CB710_DEBUG)	+= debug.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
new file mode 100644
index 0000000..b14eab0
--- /dev/null
+++ b/drivers/misc/cb710/core.c
@@ -0,0 +1,357 @@
+/*
+ *  cb710/core.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/cb710.h>
+
+static DEFINE_IDA(cb710_ida);
+static DEFINE_SPINLOCK(cb710_ida_lock);
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t mask, uint32_t xor)
+{
+	u32 rval;
+
+	pci_read_config_dword(pdev, reg, &rval);
+	rval = (rval & mask) ^ xor;
+	pci_write_config_dword(pdev, reg, rval);
+}
+EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
+
+/* Some magic writes based on Windows driver init code */
+static int __devinit cb710_pci_configure(struct pci_dev *pdev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	struct pci_dev *pdev0 = pci_get_slot(pdev->bus, devfn);
+	u32 val;
+
+	cb710_pci_update_config_reg(pdev, 0x48,
+		~0x000000FF, 0x0000003F);
+
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (val & 0x80000000)
+		return 0;
+
+	if (!pdev0)
+		return -ENODEV;
+
+	if (pdev0->vendor == PCI_VENDOR_ID_ENE
+	    && pdev0->device == PCI_DEVICE_ID_ENE_720) {
+		cb710_pci_update_config_reg(pdev0, 0x8C,
+			~0x00F00000, 0x00100000);
+		cb710_pci_update_config_reg(pdev0, 0xB0,
+			~0x08000000, 0x08000000);
+	}
+
+	cb710_pci_update_config_reg(pdev0, 0x8C,
+		~0x00000F00, 0x00000200);
+	cb710_pci_update_config_reg(pdev0, 0x90,
+		~0x00060000, 0x00040000);
+
+	pci_dev_put(pdev0);
+
+	return 0;
+}
+
+static irqreturn_t cb710_irq_handler(int irq, void *data)
+{
+	struct cb710_chip *chip = data;
+	struct cb710_slot *slot = &chip->slot[0];
+	irqreturn_t handled = IRQ_NONE;
+	unsigned nr;
+
+	spin_lock(&chip->irq_lock); /* incl. smp_rmb() */
+
+	for (nr = chip->slots; nr; ++slot, --nr) {
+		cb710_irq_handler_t handler_func = slot->irq_handler;
+		if (handler_func && handler_func(slot))
+			handled = IRQ_HANDLED;
+	}
+
+	spin_unlock(&chip->irq_lock);
+
+	return handled;
+}
+
+static void cb710_release_slot(struct device *dev)
+{
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev));
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+
+	/* slot struct can be freed now */
+	atomic_dec(&chip->slot_refs_count);
+#endif
+}
+
+static int __devinit cb710_register_slot(struct cb710_chip *chip,
+	unsigned slot_mask, unsigned io_offset, const char *name)
+{
+	int nr = chip->slots;
+	struct cb710_slot *slot = &chip->slot[nr];
+	int err;
+
+	dev_dbg(cb710_chip_dev(chip),
+		"register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n",
+		name, chip->platform_id, nr, slot_mask, io_offset);
+
+	/* slot->irq_handler == NULL here; this needs to be
+	 * seen before platform_device_register() */
+	++chip->slots;
+	smp_wmb();
+
+	slot->iobase = chip->iobase + io_offset;
+	slot->pdev.name = name;
+	slot->pdev.id = chip->platform_id;
+	slot->pdev.dev.parent = &chip->pdev->dev;
+	slot->pdev.dev.release = cb710_release_slot;
+
+	err = platform_device_register(&slot->pdev);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_inc(&chip->slot_refs_count);
+#endif
+
+	if (err) {
+		/* device_initialize() called from platform_device_register()
+		 * wants this on error path */
+		platform_device_put(&slot->pdev);
+
+		/* slot->irq_handler == NULL here anyway, so no lock needed */
+		--chip->slots;
+		return err;
+	}
+
+	chip->slot_mask |= slot_mask;
+
+	return 0;
+}
+
+static void cb710_unregister_slot(struct cb710_chip *chip,
+	unsigned slot_mask)
+{
+	int nr = chip->slots - 1;
+
+	if (!(chip->slot_mask & slot_mask))
+		return;
+
+	platform_device_unregister(&chip->slot[nr].pdev);
+
+	/* complementary to spin_unlock() in cb710_set_irq_handler() */
+	smp_rmb();
+	BUG_ON(chip->slot[nr].irq_handler != NULL);
+
+	/* slot->irq_handler == NULL here, so no lock needed */
+	--chip->slots;
+	chip->slot_mask &= ~slot_mask;
+}
+
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler)
+{
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	slot->irq_handler = handler;
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cb710_set_irq_handler);
+
+#ifdef CONFIG_PM
+
+static int cb710_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	free_irq(pdev->irq, chip);
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	if (state.event & PM_EVENT_SLEEP)
+		pci_set_power_state(pdev, PCI_D3cold);
+	return 0;
+}
+
+static int cb710_resume(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	return devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+}
+
+#endif /* CONFIG_PM */
+
+static int __devinit cb710_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct cb710_chip *chip;
+	unsigned long flags;
+	u32 val;
+	int err;
+	int n = 0;
+
+	err = cb710_pci_configure(pdev);
+	if (err)
+		return err;
+
+	/* this is actually magic... */
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (!(val & 0x80000000)) {
+		pci_write_config_dword(pdev, 0x48, val|0x71000000);
+		pci_read_config_dword(pdev, 0x48, &val);
+	}
+
+	dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val);
+	if (!(val & 0x70000000))
+		return -ENODEV;
+	val = (val >> 28) & 7;
+	if (val & CB710_SLOT_MMC)
+		++n;
+	if (val & CB710_SLOT_MS)
+		++n;
+	if (val & CB710_SLOT_SM)
+		++n;
+
+	chip = devm_kzalloc(&pdev->dev,
+		sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	chip->pdev = pdev;
+	chip->iobase = pcim_iomap_table(pdev)[0];
+
+	pci_set_drvdata(pdev, chip);
+
+	err = devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (err)
+		return err;
+
+	do {
+		if (!ida_pre_get(&cb710_ida, GFP_KERNEL))
+			return -ENOMEM;
+
+		spin_lock_irqsave(&cb710_ida_lock, flags);
+		err = ida_get_new(&cb710_ida, &chip->platform_id);
+		spin_unlock_irqrestore(&cb710_ida_lock, flags);
+
+		if (err && err != -EAGAIN)
+			return err;
+	} while (err);
+
+
+	dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n",
+		chip->platform_id, chip->iobase, pdev->irq);
+
+	if (val & CB710_SLOT_MMC) {	/* MMC/SD slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MMC, 0x00, "cb710-mmc");
+		if (err)
+			return err;
+	}
+
+	if (val & CB710_SLOT_MS) {	/* MemoryStick slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MS, 0x40, "cb710-ms");
+		if (err)
+			goto unreg_mmc;
+	}
+
+	if (val & CB710_SLOT_SM) {	/* SmartMedia slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_SM, 0x60, "cb710-sm");
+		if (err)
+			goto unreg_ms;
+	}
+
+	return 0;
+unreg_ms:
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+unreg_mmc:
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+	return err;
+}
+
+static void __devexit cb710_remove_one(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	unsigned long flags;
+
+	cb710_unregister_slot(chip, CB710_SLOT_SM);
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+
+	spin_lock_irqsave(&cb710_ida_lock, flags);
+	ida_remove(&cb710_ida, chip->platform_id);
+	spin_unlock_irqrestore(&cb710_ida_lock, flags);
+}
+
+static const struct pci_device_id cb710_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{ 0, }
+};
+
+static struct pci_driver cb710_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = cb710_pci_tbl,
+	.probe = cb710_probe,
+	.remove = __devexit_p(cb710_remove_one),
+#ifdef CONFIG_PM
+	.suspend = cb710_suspend,
+	.resume = cb710_resume,
+#endif
+};
+
+static int __init cb710_init_module(void)
+{
+	return pci_register_driver(&cb710_driver);
+}
+
+static void __exit cb710_cleanup_module(void)
+{
+	pci_unregister_driver(&cb710_driver);
+	ida_destroy(&cb710_ida);
+}
+
+module_init(cb710_init_module);
+module_exit(cb710_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cb710_pci_tbl);
diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
new file mode 100644
index 0000000..9da11bc
--- /dev/null
+++ b/drivers/misc/cb710/debug.c
@@ -0,0 +1,119 @@
+/*
+ *  cb710/debug.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cb710.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define CB710_REG_COUNT		0x80
+
+static const u16 allow[CB710_REG_COUNT/16] = {
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+};
+static const char *const prefix[ARRAY_SIZE(allow)] = {
+	"MMC", "MMC", "MMC", "MMC",
+	"MS?", "MS?", "SM?", "SM?"
+};
+
+static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits)
+{
+	unsigned mask = (1 << bits/8) - 1;
+	offset *= bits/8;
+	return ((allow[block] >> offset) & mask) == mask;
+}
+
+#define CB710_READ_REGS_TEMPLATE(t)					\
+static void cb710_read_regs_##t(void __iomem *iobase,			\
+	u##t *reg, unsigned select)					\
+{									\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!select & (1 << i))					\
+			continue;					\
+									\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			if (!allow_reg_read(i, j, t))			\
+				continue;				\
+			reg[j] = ioread##t(iobase			\
+				+ (i << 4) + (j * (t/8)));		\
+		}							\
+	}								\
+}
+
+static const char cb710_regf_8[] = "%02X";
+static const char cb710_regf_16[] = "%04X";
+static const char cb710_regf_32[] = "%08X";
+static const char cb710_xes[] = "xxxxxxxx";
+
+#define CB710_DUMP_REGS_TEMPLATE(t)					\
+static void cb710_dump_regs_##t(struct device *dev,			\
+	const u##t *reg, unsigned select)				\
+{									\
+	const char *const xp = &cb710_xes[8 - t/4];			\
+	const char *const format = cb710_regf_##t;			\
+									\
+	char msg[100], *p;						\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))				\
+			continue;					\
+		p = msg;						\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			*p++ = ' ';					\
+			if (j == 8/(t/8))				\
+				*p++ = ' ';				\
+			if (allow_reg_read(i, j, t))			\
+				p += sprintf(p, format, reg[j]);	\
+			else						\
+				p += sprintf(p, "%s", xp);		\
+		}							\
+		dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg);	\
+	}								\
+}
+
+#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t)				\
+static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip,	\
+	unsigned select)						\
+{									\
+	u##t regs[CB710_REG_COUNT/sizeof(u##t)];			\
+									\
+	memset(&regs, 0, sizeof(regs));					\
+	cb710_read_regs_##t(chip->iobase, regs, select);		\
+	cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select);	\
+}
+
+#define CB710_REG_ACCESS_TEMPLATES(t)		\
+  CB710_READ_REGS_TEMPLATE(t)			\
+  CB710_DUMP_REGS_TEMPLATE(t)			\
+  CB710_READ_AND_DUMP_REGS_TEMPLATE(t)
+
+CB710_REG_ACCESS_TEMPLATES(8)
+CB710_REG_ACCESS_TEMPLATES(16)
+CB710_REG_ACCESS_TEMPLATES(32)
+
+void cb710_dump_regs(struct cb710_chip *chip, unsigned select)
+{
+	if (!(select & CB710_DUMP_REGS_MASK))
+		select = CB710_DUMP_REGS_ALL;
+	if (!(select & CB710_DUMP_ACCESS_MASK))
+		select |= CB710_DUMP_ACCESS_8;
+
+	if (select & CB710_DUMP_ACCESS_32)
+		cb710_read_and_dump_regs_32(chip, select);
+	if (select & CB710_DUMP_ACCESS_16)
+		cb710_read_and_dump_regs_16(chip, select);
+	if (select & CB710_DUMP_ACCESS_8)
+		cb710_read_and_dump_regs_8(chip, select);
+}
+EXPORT_SYMBOL_GPL(cb710_dump_regs);
+
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
new file mode 100644
index 0000000..d38a7ac
--- /dev/null
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -0,0 +1,150 @@
+/*
+ *  cb710/sgbuf2.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cb710.h>
+
+static bool sg_dwiter_next(struct sg_mapping_iter *miter)
+{
+	if (sg_miter_next(miter)) {
+		miter->consumed = 0;
+		return true;
+	} else
+		return false;
+}
+
+static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter)
+{
+	return miter->length == miter->consumed && !sg_dwiter_next(miter);
+}
+
+static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
+{
+	size_t len, left = 4;
+	uint32_t data;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(addr, miter->addr + miter->consumed, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return data;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+
+	memset(addr, 0, left);
+	return data;
+}
+
+static inline bool needs_unaligned_copy(const void *ptr)
+{
+#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return false;
+#else
+	return ((ptr - NULL) & 3) != 0;
+#endif
+}
+
+static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr)
+{
+	size_t len;
+
+	if (sg_dwiter_is_at_end(miter))
+		return true;
+
+	len = miter->length - miter->consumed;
+
+	if (likely(len >= 4 && !needs_unaligned_copy(
+			miter->addr + miter->consumed))) {
+		*ptr = miter->addr + miter->consumed;
+		miter->consumed += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer
+ * @miter: sg mapping iterator used for reading
+ *
+ * Description:
+ *   Returns 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes past the buffer's end
+ *   are not accessed (read) but are returned as zeroes.  @miter@
+ *   is advanced by 4 bytes or to the end of buffer whichever is
+ *   closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ *
+ * Returns:
+ *   32-bit word just read.
+ */
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr)))
+		return ptr ? *ptr : 0;
+
+	return sg_dwiter_read_buffer(miter);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block);
+
+static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
+{
+	size_t len, left = 4;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(miter->addr, addr, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return;
+		addr += len;
+		flush_kernel_dcache_page(miter->page);
+	} while (sg_dwiter_next(miter));
+}
+
+/**
+ * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer
+ * @miter: sg mapping iterator used for writing
+ *
+ * Description:
+ *   Writes 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes which would be written
+ *   past the buffer's end are silently discarded. @miter@ is
+ *   advanced by 4 bytes or to the end of buffer whichever is closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ */
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr))) {
+		if (ptr)
+			*ptr = data;
+		else
+			return;
+	} else
+		sg_dwiter_write_slow(miter, data);
+
+	if (miter->length == miter->consumed)
+		flush_kernel_dcache_page(miter->page);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
+
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index b4cf691..75337e4 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -241,3 +241,16 @@ config MMC_TMIO
 	help
 	  This provides support for the SD/MMC cell found in TC6393XB,
 	  T7L66XB and also ipaq ASIC3
+
+config MMC_CB710
+	tristate "ENE CB710 MMC/SD Interface support"
+	depends on PCI
+	select CB710_CORE
+	help
+	  This option enables support for MMC/SD part of ENE CB710/720 Flash
+	  memory card reader found in some laptops (ie. some versions of
+	  HP Compaq nx9500).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710-mmc.
+
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 970a997..ce9c8b6 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -29,4 +29,5 @@ endif
 obj-$(CONFIG_MMC_S3C)   	+= s3cmci.o
 obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
+obj-$(CONFIG_MMC_CB710)	+= cb710-mmc.o
 
diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
new file mode 100644
index 0000000..bc048aa
--- /dev/null
+++ b/drivers/mmc/host/cb710-mmc.c
@@ -0,0 +1,804 @@
+/*
+ *  cb710/mmc.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "cb710-mmc.h"
+
+static const u8 cb710_clock_divider_log2[8] = {
+/*	1, 2, 4, 8, 16, 32, 128, 512 */
+	0, 1, 2, 3,  4,  5,   7,   9
+};
+#define CB710_MAX_DIVIDER_IDX	\
+	(ARRAY_SIZE(cb710_clock_divider_log2) - 1)
+
+static const u8 cb710_src_freq_mhz[16] = {
+	33, 10, 20, 25, 30, 35, 40, 45,
+	50, 55, 60, 65, 70, 75, 80, 85
+};
+
+static void cb710_mmc_set_clock(struct mmc_host *mmc, int hz)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct pci_dev *pdev = cb710_slot_to_chip(slot)->pdev;
+	u32 src_freq_idx;
+	u32 divider_idx;
+	int src_hz;
+
+	/* this is magic, unverifiable for me, unless I get
+	 * MMC card with cables connected to bus signals */
+	pci_read_config_dword(pdev, 0x48, &src_freq_idx);
+	src_freq_idx = (src_freq_idx >> 16) & 0xF;
+	src_hz = cb710_src_freq_mhz[src_freq_idx] * 1000000;
+
+	for (divider_idx = 0; divider_idx < CB710_MAX_DIVIDER_IDX; ++divider_idx) {
+		if (hz >= src_hz >> cb710_clock_divider_log2[divider_idx])
+			break;
+	}
+
+	if (src_freq_idx)
+		divider_idx |= 0x8;
+
+	cb710_pci_update_config_reg(pdev, 0x40, ~0xF0000000, divider_idx << 28);
+
+	dev_dbg(cb710_slot_dev(slot),
+		"clock set to %d Hz, wanted %d Hz; flag = %d\n",
+		src_hz >> cb710_clock_divider_log2[divider_idx & 7],
+		hz, (divider_idx & 8) != 0);
+}
+
+static void __cb710_mmc_enable_irq(struct cb710_slot *slot,
+	unsigned short enable, unsigned short mask)
+{
+	/* clear global IE
+	 * - it gets set later if any interrupt sources are enabled */
+	mask |= CB710_MMC_IE_IRQ_ENABLE;
+
+	/* look like interrupt is fired whenever
+	 * WORD[0x0C] & WORD[0x10] != 0;
+	 * -> bit 15 port 0x0C seems to be global interrupt enable
+	 */
+
+	enable = (cb710_read_port_16(slot, CB710_MMC_IRQ_ENABLE_PORT)
+		& ~mask) | enable;
+
+	if (enable)
+		enable |= CB710_MMC_IE_IRQ_ENABLE;
+
+	cb710_write_port_16(slot, CB710_MMC_IRQ_ENABLE_PORT, enable);
+}
+
+static void cb710_mmc_enable_irq(struct cb710_slot *slot,
+	unsigned short enable, unsigned short mask)
+{
+	struct cb710_mmc_reader *reader = mmc_priv(cb710_slot_to_mmc(slot));
+	unsigned long flags;
+
+	spin_lock_irqsave(&reader->irq_lock, flags);
+	/* this is the only thing irq_lock protects */
+	__cb710_mmc_enable_irq(slot, enable, mask);
+	spin_unlock_irqrestore(&reader->irq_lock, flags);
+}
+
+static void cb710_mmc_reset_events(struct cb710_slot *slot)
+{
+	cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT, 0xFF);
+	cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT, 0xFF);
+	cb710_write_port_8(slot, CB710_MMC_STATUS2_PORT, 0xFF);
+}
+
+static int cb710_mmc_is_card_inserted(struct cb710_slot *slot)
+{
+	return cb710_read_port_8(slot, CB710_MMC_STATUS3_PORT)
+		& CB710_MMC_S3_CARD_DETECTED;
+}
+
+static void cb710_mmc_enable_4bit_data(struct cb710_slot *slot, int enable)
+{
+	dev_dbg(cb710_slot_dev(slot), "configuring %d-data-line%s mode\n",
+		enable ? 4 : 1, enable ? "s" : "");
+	if (enable)
+		cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT,
+			CB710_MMC_C1_4BIT_DATA_BUS, 0);
+	else
+		cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT,
+			0, CB710_MMC_C1_4BIT_DATA_BUS);
+}
+
+static int cb710_check_event(struct cb710_slot *slot, u8 what)
+{
+	u16 status;
+
+	status = cb710_read_port_16(slot, CB710_MMC_STATUS_PORT);
+
+	if (status & CB710_MMC_S0_FIFO_UNDERFLOW) {
+		/* it is just a guess, so log it */
+		dev_dbg(cb710_slot_dev(slot),
+			"CHECK : ignoring bit 6 in status %04X\n", status);
+		cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT,
+			CB710_MMC_S0_FIFO_UNDERFLOW);
+		status &= ~CB710_MMC_S0_FIFO_UNDERFLOW;
+	}
+
+	if (status & CB710_MMC_STATUS_ERROR_EVENTS) {
+		dev_dbg(cb710_slot_dev(slot),
+			"CHECK : returning EIO on status %04X\n", status);
+		cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT, status & 0xFF);
+		cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT,
+			CB710_MMC_S1_RESET);
+		return -EIO;
+	}
+
+	/* 'what' is a bit in MMC_STATUS1 */
+	if ((status >> 8) & what) {
+		cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT, what);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int cb710_wait_for_event(struct cb710_slot *slot, u8 what)
+{
+	int err = 0;
+	unsigned limit = 2000000;	/* FIXME: real timeout */
+
+#ifdef CONFIG_CB710_DEBUG
+	u32 e, x;
+	e = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+#endif
+
+	while (!(err = cb710_check_event(slot, what))) {
+		if (!--limit) {
+			cb710_dump_regs(cb710_slot_to_chip(slot),
+				CB710_DUMP_REGS_MMC);
+			err = -ETIMEDOUT;
+			break;
+		}
+		udelay(1);
+	}
+
+#ifdef CONFIG_CB710_DEBUG
+	x = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+
+	limit = 2000000 - limit;
+	if (limit > 100)
+		dev_dbg(cb710_slot_dev(slot),
+			"WAIT10: waited %d loops, what %d, entry val %08X, exit val %08X\n",
+			limit, what, e, x);
+#endif
+	return err < 0 ? err : 0;
+}
+
+
+static int cb710_wait_while_busy(struct cb710_slot *slot, uint8_t mask)
+{
+	unsigned limit = 500000;	/* FIXME: real timeout */
+	int err = 0;
+
+#ifdef CONFIG_CB710_DEBUG
+	u32 e, x;
+	e = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+#endif
+
+	while (cb710_read_port_8(slot, CB710_MMC_STATUS2_PORT) & mask) {
+		if (!--limit) {
+			cb710_dump_regs(cb710_slot_to_chip(slot),
+				CB710_DUMP_REGS_MMC);
+			err = -ETIMEDOUT;
+			break;
+		}
+		udelay(1);
+	}
+
+#ifdef CONFIG_CB710_DEBUG
+	x = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+
+	limit = 500000 - limit;
+	if (limit > 100)
+		dev_dbg(cb710_slot_dev(slot),
+			"WAIT12: waited %d loops, mask %02X, entry val %08X, exit val %08X\n",
+			limit, mask, e, x);
+#endif
+	return 0;
+}
+
+static void cb710_mmc_set_transfer_size(struct cb710_slot *slot,
+	size_t count, size_t blocksize)
+{
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	cb710_write_port_32(slot, CB710_MMC_TRANSFER_SIZE_PORT,
+		((count - 1) << 16)|(blocksize - 1));
+
+	dev_vdbg(cb710_slot_dev(slot), "set up for %d block%s of %d bytes\n",
+		count, count == 1 ? "" : "s", blocksize);
+}
+
+static void cb710_mmc_fifo_hack(struct cb710_slot *slot)
+{
+	/* without this, received data is prepended with 8-bytes of zeroes */
+	u32 r1, r2;
+	int ok = 0;
+
+	r1 = cb710_read_port_32(slot, CB710_MMC_DATA_PORT);
+	r2 = cb710_read_port_32(slot, CB710_MMC_DATA_PORT);
+	if (cb710_read_port_8(slot, CB710_MMC_STATUS0_PORT)
+	    & CB710_MMC_S0_FIFO_UNDERFLOW) {
+		cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT,
+			CB710_MMC_S0_FIFO_UNDERFLOW);
+		ok = 1;
+	}
+
+	dev_dbg(cb710_slot_dev(slot),
+		"FIFO-read-hack: expected STATUS0 bit was %s\n",
+		ok ? "set." : "NOT SET!");
+	dev_dbg(cb710_slot_dev(slot),
+		"FIFO-read-hack: dwords ignored: %08X %08X - %s\n",
+		r1, r2, (r1|r2) ? "BAD (NOT ZERO)!" : "ok");
+}
+
+static int cb710_mmc_receive_pio(struct cb710_slot *slot,
+	struct sg_mapping_iter *miter, size_t dw_count)
+{
+	if (!(cb710_read_port_8(slot, CB710_MMC_STATUS2_PORT) & CB710_MMC_S2_FIFO_READY)) {
+		int err = cb710_wait_for_event(slot,
+			CB710_MMC_S1_PIO_TRANSFER_DONE);
+		if (err)
+			return err;
+	}
+
+	cb710_sg_dwiter_write_from_io(miter,
+		slot->iobase + CB710_MMC_DATA_PORT, dw_count);
+
+	return 0;
+}
+
+static bool cb710_is_transfer_size_supported(struct mmc_data *data)
+{
+	return !(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8));
+}
+
+static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data)
+{
+	struct sg_mapping_iter miter;
+	size_t len, blocks = data->blocks;
+	int err = 0;
+
+	/* TODO: I don't know how/if the hardware handles non-16B-boundary blocks
+	 * except single 8B block */
+	if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8)))
+		return -EINVAL;
+
+	sg_miter_start(&miter, data->sg, data->sg_len, 0);
+
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
+		15, CB710_MMC_C2_READ_PIO_SIZE_MASK);
+
+	cb710_mmc_fifo_hack(slot);
+
+	while (blocks-- > 0) {
+		len = data->blksz;
+
+		while (len >= 16) {
+			err = cb710_mmc_receive_pio(slot, &miter, 4);
+			if (err)
+				goto out;
+			len -= 16;
+		}
+
+		if (!len)
+			continue;
+
+		cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
+			len - 1, CB710_MMC_C2_READ_PIO_SIZE_MASK);
+
+		len = (len >= 8) ? 4 : 2;
+		err = cb710_mmc_receive_pio(slot, &miter, len);
+		if (err)
+			goto out;
+	}
+out:
+	cb710_sg_miter_stop_writing(&miter);
+	return err;
+}
+
+static int cb710_mmc_send(struct cb710_slot *slot, struct mmc_data *data)
+{
+	struct sg_mapping_iter miter;
+	size_t len, blocks = data->blocks;
+	int err = 0;
+
+	/* TODO: I don't know how/if the hardware handles multiple
+	 * non-16B-boundary blocks */
+	if (unlikely(data->blocks > 1 && data->blksz & 15))
+		return -EINVAL;
+
+	sg_miter_start(&miter, data->sg, data->sg_len, 0);
+
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
+		0, CB710_MMC_C2_READ_PIO_SIZE_MASK);
+
+	while (blocks-- > 0) {
+		len = (data->blksz + 15) >> 4;
+		do {
+			if (!(cb710_read_port_8(slot, CB710_MMC_STATUS2_PORT)
+			    & CB710_MMC_S2_FIFO_EMPTY)) {
+				err = cb710_wait_for_event(slot,
+					CB710_MMC_S1_PIO_TRANSFER_DONE);
+				if (err)
+					goto out;
+			}
+			cb710_sg_dwiter_read_to_io(&miter,
+				slot->iobase + CB710_MMC_DATA_PORT, 4);
+		} while (--len);
+	}
+out:
+	sg_miter_stop(&miter);
+	return err;
+}
+
+static u16 cb710_encode_cmd_flags(struct cb710_mmc_reader *reader,
+	struct mmc_command *cmd)
+{
+	unsigned int flags = cmd->flags;
+	u16 cb_flags = 0;
+
+	/* Windows driver returned 0 for commands for which no response
+	 * is expected. It happened that there were only two such commands
+	 * used: MMC_GO_IDLE_STATE and MMC_GO_INACTIVE_STATE so it might
+	 * as well be a bug in that driver.
+	 *
+	 * Original driver set bit 14 for MMC/SD application
+	 * commands. There's no difference 'on the wire' and
+	 * it apparently works without it anyway.
+	 */
+
+	switch (flags & MMC_CMD_MASK) {
+	case MMC_CMD_AC:	cb_flags = CB710_MMC_CMD_AC;	break;
+	case MMC_CMD_ADTC:	cb_flags = CB710_MMC_CMD_ADTC;	break;
+	case MMC_CMD_BC:	cb_flags = CB710_MMC_CMD_BC;	break;
+	case MMC_CMD_BCR:	cb_flags = CB710_MMC_CMD_BCR;	break;
+	}
+
+	if (flags & MMC_RSP_BUSY)
+		cb_flags |= CB710_MMC_RSP_BUSY;
+
+	cb_flags |= cmd->opcode << CB710_MMC_CMD_CODE_SHIFT;
+
+	if (cmd->data && (cmd->data->flags & MMC_DATA_READ))
+		cb_flags |= CB710_MMC_DATA_READ;
+
+	if (flags & MMC_RSP_PRESENT) {
+		/* Windows driver set 01 at bits 4,3 except for
+		 * MMC_SET_BLOCKLEN where it set 10. Maybe the
+		 * hardware can do something special about this
+		 * command? The original driver looks buggy/incomplete
+		 * anyway so we ignore this for now.
+		 *
+		 * I assume that 00 here means no response is expected.
+		 */
+		cb_flags |= CB710_MMC_RSP_PRESENT;
+
+		if (flags & MMC_RSP_136)
+			cb_flags |= CB710_MMC_RSP_136;
+		if (!(flags & MMC_RSP_CRC))
+			cb_flags |= CB710_MMC_RSP_NO_CRC;
+	}
+
+	return cb_flags;
+}
+
+static void cb710_receive_response(struct cb710_slot *slot,
+	struct mmc_command *cmd)
+{
+	unsigned rsp_opcode, wanted_opcode;
+
+	/* Looks like final byte with CRC is always stripped (same as SDHCI) */
+	if (cmd->flags & MMC_RSP_136) {
+		u32 resp[4];
+
+		resp[0] = cb710_read_port_32(slot, CB710_MMC_RESPONSE3_PORT);
+		resp[1] = cb710_read_port_32(slot, CB710_MMC_RESPONSE2_PORT);
+		resp[2] = cb710_read_port_32(slot, CB710_MMC_RESPONSE1_PORT);
+		resp[3] = cb710_read_port_32(slot, CB710_MMC_RESPONSE0_PORT);
+		rsp_opcode = resp[0] >> 24;
+
+		cmd->resp[0] = (resp[0] << 8)|(resp[1] >> 24);
+		cmd->resp[1] = (resp[1] << 8)|(resp[2] >> 24);
+		cmd->resp[2] = (resp[2] << 8)|(resp[3] >> 24);
+		cmd->resp[3] = (resp[3] << 8);
+	} else {
+		rsp_opcode = cb710_read_port_32(slot, CB710_MMC_RESPONSE1_PORT) & 0x3F;
+		cmd->resp[0] = cb710_read_port_32(slot, CB710_MMC_RESPONSE0_PORT);
+	}
+
+	wanted_opcode = (cmd->flags & MMC_RSP_OPCODE) ? cmd->opcode : 0x3F;
+	if (rsp_opcode != wanted_opcode)
+		cmd->error = -EILSEQ;
+}
+
+static int cb710_mmc_transfer_data(struct cb710_slot *slot,
+	struct mmc_data *data)
+{
+	int error, to;
+
+	if (data->flags & MMC_DATA_READ)
+		error = cb710_mmc_receive(slot, data);
+	else
+		error = cb710_mmc_send(slot, data);
+
+	to = cb710_wait_for_event(slot, CB710_MMC_S1_DATA_TRANSFER_DONE);
+	if (!error)
+		error = to;
+
+	if (!error)
+		data->bytes_xfered = data->blksz * data->blocks;
+	return error;
+}
+
+static int cb710_mmc_command(struct mmc_host *mmc, struct mmc_command *cmd)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	struct mmc_data *data = cmd->data;
+
+	u16 cb_cmd = cb710_encode_cmd_flags(reader, cmd);
+	dev_dbg(cb710_slot_dev(slot), "cmd request: 0x%04X\n", cb_cmd);
+
+	if (data) {
+		if (!cb710_is_transfer_size_supported(data)) {
+			data->error = -EINVAL;
+			return -1;
+		}
+		cb710_mmc_set_transfer_size(slot, data->blocks, data->blksz);
+	}
+
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20|CB710_MMC_S2_BUSY_10);
+	cb710_write_port_16(slot, CB710_MMC_CMD_TYPE_PORT, cb_cmd);
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	cb710_write_port_32(slot, CB710_MMC_CMD_PARAM_PORT, cmd->arg);
+	cb710_mmc_reset_events(slot);
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG0_PORT, 0x01, 0);
+
+	cmd->error = cb710_wait_for_event(slot, CB710_MMC_S1_COMMAND_SENT);
+	if (cmd->error)
+		return -1;
+
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		cb710_receive_response(slot, cmd);
+		if (cmd->error)
+			return -1;
+	}
+
+	if (data)
+		data->error = cb710_mmc_transfer_data(slot, data);
+	return 0;
+}
+
+static void cb710_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+
+	WARN_ON(reader->mrq != NULL);
+
+	reader->mrq = mrq;
+	cb710_mmc_enable_irq(slot, CB710_MMC_IE_TEST_MASK, 0);
+
+	if (cb710_mmc_is_card_inserted(slot)) {
+		if (!cb710_mmc_command(mmc, mrq->cmd) && mrq->stop)
+			cb710_mmc_command(mmc, mrq->stop);
+		mdelay(1);
+	} else {
+		mrq->cmd->error = -ENOMEDIUM;
+	}
+
+	tasklet_schedule(&reader->finish_req_tasklet);
+}
+
+static int cb710_mmc_powerup(struct cb710_slot *slot)
+{
+#ifdef CONFIG_CB710_DEBUG
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+#endif
+	int err;
+
+	/* a lot of magic; see comment in cb710_mmc_set_clock() */
+	dev_dbg(cb710_slot_dev(slot), "bus powerup\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0x80, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG3_PORT, 0x80, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	mdelay(1);
+	dev_dbg(cb710_slot_dev(slot), "after delay 1\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0x09, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	mdelay(1);
+	dev_dbg(cb710_slot_dev(slot), "after delay 2\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0, 0x08);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	mdelay(2);
+	dev_dbg(cb710_slot_dev(slot), "after delay 3\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG0_PORT, 0x06, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0x70, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, 0x80, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG3_PORT, 0x03, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	/* This port behaves weird: quick byte reads of 0x08,0x09 return
+	 * 0xFF,0x00 after writing 0xFFFF to 0x08; it works correctly when
+	 * read/written from userspace...  What am I missing here?
+	 * (it doesn't depend on write-to-read delay) */
+	cb710_write_port_16(slot, CB710_MMC_CONFIGB_PORT, 0xFFFF);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG0_PORT, 0x06, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	dev_dbg(cb710_slot_dev(slot), "bus powerup finished\n");
+
+	return cb710_check_event(slot, 0);
+}
+
+static void cb710_mmc_powerdown(struct cb710_slot *slot)
+{
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0, 0x81);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG3_PORT, 0, 0x80);
+}
+
+static void cb710_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	int err;
+
+	cb710_mmc_set_clock(mmc, ios->clock);
+
+	if (!cb710_mmc_is_card_inserted(slot)) {
+		dev_dbg(cb710_slot_dev(slot),
+			"no card inserted - ignoring bus powerup request\n");
+		ios->power_mode = MMC_POWER_OFF;
+	}
+
+	if (ios->power_mode != reader->last_power_mode)
+	switch (ios->power_mode) {
+	case MMC_POWER_ON:
+		err = cb710_mmc_powerup(slot);
+		if (err) {
+			dev_warn(cb710_slot_dev(slot),
+				"powerup failed (%d)- retrying\n", err);
+			cb710_mmc_powerdown(slot);
+			udelay(1);
+			err = cb710_mmc_powerup(slot);
+			if (err)
+				dev_warn(cb710_slot_dev(slot),
+					"powerup retry failed (%d) - expect errors\n",
+					err);
+		}
+		reader->last_power_mode = MMC_POWER_ON;
+		break;
+	case MMC_POWER_OFF:
+		cb710_mmc_powerdown(slot);
+		reader->last_power_mode = MMC_POWER_OFF;
+		break;
+	case MMC_POWER_UP:
+	default:
+		/* ignore */;
+	}
+
+	cb710_mmc_enable_4bit_data(slot, ios->bus_width != MMC_BUS_WIDTH_1);
+
+	cb710_mmc_enable_irq(slot, CB710_MMC_IE_TEST_MASK, 0);
+}
+
+static int cb710_mmc_get_ro(struct mmc_host *mmc)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+
+	return cb710_read_port_8(slot, CB710_MMC_STATUS3_PORT)
+		& CB710_MMC_S3_WRITE_PROTECTED;
+}
+
+static int cb710_mmc_irq_handler(struct cb710_slot *slot)
+{
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	u32 status, config1, config2, irqen;
+
+	status = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+	irqen = cb710_read_port_32(slot, CB710_MMC_IRQ_ENABLE_PORT);
+	config2 = cb710_read_port_32(slot, CB710_MMC_CONFIGB_PORT);
+	config1 = cb710_read_port_32(slot, CB710_MMC_CONFIG_PORT);
+
+	dev_dbg(cb710_slot_dev(slot), "interrupt; status: %08X, "
+		"ie: %08X, c2: %08X, c1: %08X\n",
+		status, irqen, config2, config1);
+
+	if (status & (CB710_MMC_S1_CARD_CHANGED << 8)) {
+		/* ack the event */
+		cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT,
+			CB710_MMC_S1_CARD_CHANGED);
+		if ((irqen & CB710_MMC_IE_CISTATUS_MASK)
+		    == CB710_MMC_IE_CISTATUS_MASK)
+			mmc_detect_change(mmc, HZ/5);
+	} else {
+		dev_dbg(cb710_slot_dev(slot), "unknown interrupt (test)\n");
+		spin_lock(&reader->irq_lock);
+		__cb710_mmc_enable_irq(slot, 0, CB710_MMC_IE_TEST_MASK);
+		spin_unlock(&reader->irq_lock);
+	}
+
+	return 1;
+}
+
+static void cb710_mmc_finish_request_tasklet(unsigned long data)
+{
+	struct mmc_host *mmc = (void *)data;
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	struct mmc_request *mrq = reader->mrq;
+
+	reader->mrq = NULL;
+	mmc_request_done(mmc, mrq);
+}
+
+static const struct mmc_host_ops cb710_mmc_host = {
+	.request = cb710_mmc_request,
+	.set_ios = cb710_mmc_set_ios,
+	.get_ro = cb710_mmc_get_ro
+};
+
+#ifdef CONFIG_PM
+
+static int cb710_mmc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+	int err;
+
+	err = mmc_suspend_host(mmc, state);
+	if (err)
+		return err;
+
+	cb710_mmc_enable_irq(slot, 0, ~0);
+	return 0;
+}
+
+static int cb710_mmc_resume(struct platform_device *pdev)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+
+	cb710_mmc_enable_irq(slot, 0, ~0);
+
+	return mmc_resume_host(mmc);
+}
+
+#endif /* CONFIG_PM */
+
+static int __devinit cb710_mmc_init(struct platform_device *pdev)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	struct mmc_host *mmc;
+	struct cb710_mmc_reader *reader;
+	int err;
+	u32 val;
+
+	mmc = mmc_alloc_host(sizeof(*reader), cb710_slot_dev(slot));
+	if (!mmc)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, mmc);
+
+	/* harmless (maybe) magic */
+	pci_read_config_dword(chip->pdev, 0x48, &val);
+	val = cb710_src_freq_mhz[(val >> 16) & 0xF];
+	dev_dbg(cb710_slot_dev(slot), "source frequency: %dMHz\n", val);
+	val *= 1000000;
+
+	mmc->ops = &cb710_mmc_host;
+	mmc->f_max = val;
+	mmc->f_min = val >> cb710_clock_divider_log2[CB710_MAX_DIVIDER_IDX];
+	mmc->ocr_avail = MMC_VDD_32_33|MMC_VDD_33_34;
+	mmc->caps = MMC_CAP_4_BIT_DATA;
+
+	reader = mmc_priv(mmc);
+
+	tasklet_init(&reader->finish_req_tasklet,
+		cb710_mmc_finish_request_tasklet, (unsigned long)mmc);
+	spin_lock_init(&reader->irq_lock);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+
+	cb710_mmc_enable_irq(slot, 0, ~0);
+	cb710_set_irq_handler(slot, cb710_mmc_irq_handler);
+
+	err = mmc_add_host(mmc);
+	if (unlikely(err))
+		goto err_free_mmc;
+
+	dev_dbg(cb710_slot_dev(slot), "mmc_hostname is %s\n",
+		mmc_hostname(mmc));
+
+	cb710_mmc_enable_irq(slot, CB710_MMC_IE_CARD_INSERTION_STATUS, 0);
+
+	return 0;
+
+err_free_mmc:
+	dev_dbg(cb710_slot_dev(slot), "mmc_add_host() failed: %d\n", err);
+
+	mmc_free_host(mmc);
+	return err;
+}
+
+static int __devexit cb710_mmc_exit(struct platform_device *pdev)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+
+	cb710_mmc_enable_irq(slot, 0, CB710_MMC_IE_CARD_INSERTION_STATUS);
+
+	mmc_remove_host(mmc);
+
+	/* IRQs should be disabled now, but let's stay on the safe side */
+	cb710_mmc_enable_irq(slot, 0, ~0);
+	cb710_set_irq_handler(slot, NULL);
+
+	/* clear config ports - just in case */
+	cb710_write_port_32(slot, CB710_MMC_CONFIG_PORT, 0);
+	cb710_write_port_16(slot, CB710_MMC_CONFIGB_PORT, 0);
+
+	tasklet_kill(&reader->finish_req_tasklet);
+
+	mmc_free_host(mmc);
+	return 0;
+}
+
+static struct platform_driver cb710_mmc_driver = {
+	.driver.name = "cb710-mmc",
+	.probe = cb710_mmc_init,
+	.remove = __devexit_p(cb710_mmc_exit),
+#ifdef CONFIG_PM
+	.suspend = cb710_mmc_suspend,
+	.resume = cb710_mmc_resume,
+#endif
+};
+
+static int __init cb710_mmc_init_module(void)
+{
+	return platform_driver_register(&cb710_mmc_driver);
+}
+
+static void __exit cb710_mmc_cleanup_module(void)
+{
+	platform_driver_unregister(&cb710_mmc_driver);
+}
+
+module_init(cb710_mmc_init_module);
+module_exit(cb710_mmc_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver - MMC/SD part");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cb710-mmc");
diff --git a/drivers/mmc/host/cb710-mmc.h b/drivers/mmc/host/cb710-mmc.h
new file mode 100644
index 0000000..e845c77
--- /dev/null
+++ b/drivers/mmc/host/cb710-mmc.h
@@ -0,0 +1,104 @@
+/*
+ *  cb710/cb710-mmc.h
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_CB710_MMC_H
+#define LINUX_CB710_MMC_H
+
+#include <linux/cb710.h>
+
+/* per-MMC-reader structure */
+struct cb710_mmc_reader {
+	struct tasklet_struct finish_req_tasklet;
+	struct mmc_request *mrq;
+	spinlock_t irq_lock;
+	unsigned char last_power_mode;
+};
+
+/* some device struct walking */
+
+static inline struct mmc_host *cb710_slot_to_mmc(struct cb710_slot *slot)
+{
+	return dev_get_drvdata(&slot->pdev.dev);
+}
+
+static inline struct cb710_slot *cb710_mmc_to_slot(struct mmc_host *mmc)
+{
+	struct platform_device *pdev = container_of(mmc_dev(mmc),
+		struct platform_device, dev);
+	return cb710_pdev_to_slot(pdev);
+}
+
+/* registers (this might be all wrong ;) */
+
+#define CB710_MMC_DATA_PORT		0x00
+
+#define CB710_MMC_CONFIG_PORT		0x04
+#define CB710_MMC_CONFIG0_PORT		0x04
+#define CB710_MMC_CONFIG1_PORT		0x05
+#define   CB710_MMC_C1_4BIT_DATA_BUS		0x40
+#define CB710_MMC_CONFIG2_PORT		0x06
+#define   CB710_MMC_C2_READ_PIO_SIZE_MASK	0x0F	/* N-1 */
+#define CB710_MMC_CONFIG3_PORT		0x07
+
+#define CB710_MMC_CONFIGB_PORT		0x08
+
+#define CB710_MMC_IRQ_ENABLE_PORT	0x0C
+#define   CB710_MMC_IE_TEST_MASK		0x00BF
+#define   CB710_MMC_IE_CARD_INSERTION_STATUS	0x1000
+#define   CB710_MMC_IE_IRQ_ENABLE		0x8000
+#define   CB710_MMC_IE_CISTATUS_MASK		\
+		(CB710_MMC_IE_CARD_INSERTION_STATUS|CB710_MMC_IE_IRQ_ENABLE)
+
+#define CB710_MMC_STATUS_PORT		0x10
+#define   CB710_MMC_STATUS_ERROR_EVENTS		0x60FF
+#define CB710_MMC_STATUS0_PORT		0x10
+#define   CB710_MMC_S0_FIFO_UNDERFLOW		0x40
+#define CB710_MMC_STATUS1_PORT		0x11
+#define   CB710_MMC_S1_COMMAND_SENT		0x01
+#define   CB710_MMC_S1_DATA_TRANSFER_DONE	0x02
+#define   CB710_MMC_S1_PIO_TRANSFER_DONE	0x04
+#define   CB710_MMC_S1_CARD_CHANGED		0x10
+#define   CB710_MMC_S1_RESET			0x20
+#define CB710_MMC_STATUS2_PORT		0x12
+#define   CB710_MMC_S2_FIFO_READY		0x01
+#define   CB710_MMC_S2_FIFO_EMPTY		0x02
+#define   CB710_MMC_S2_BUSY_10			0x10
+#define   CB710_MMC_S2_BUSY_20			0x20
+#define CB710_MMC_STATUS3_PORT		0x13
+#define   CB710_MMC_S3_CARD_DETECTED		0x02
+#define   CB710_MMC_S3_WRITE_PROTECTED		0x04
+
+#define CB710_MMC_CMD_TYPE_PORT		0x14
+#define   CB710_MMC_RSP_TYPE_MASK		0x0007
+#define     CB710_MMC_RSP_R1			(0)
+#define     CB710_MMC_RSP_136			(5)
+#define     CB710_MMC_RSP_NO_CRC		(2)
+#define   CB710_MMC_RSP_PRESENT_MASK		0x0018
+#define     CB710_MMC_RSP_NONE			(0 << 3)
+#define     CB710_MMC_RSP_PRESENT		(1 << 3)
+#define     CB710_MMC_RSP_PRESENT_X		(2 << 3)
+#define   CB710_MMC_CMD_TYPE_MASK		0x0060
+#define     CB710_MMC_CMD_BC			(0 << 5)
+#define     CB710_MMC_CMD_BCR			(1 << 5)
+#define     CB710_MMC_CMD_AC			(2 << 5)
+#define     CB710_MMC_CMD_ADTC			(3 << 5)
+#define   CB710_MMC_DATA_READ			0x0080
+#define   CB710_MMC_CMD_CODE_MASK		0x3F00
+#define   CB710_MMC_CMD_CODE_SHIFT		8
+#define   CB710_MMC_IS_APP_CMD			0x4000
+#define   CB710_MMC_RSP_BUSY			0x8000
+
+#define CB710_MMC_CMD_PARAM_PORT	0x18
+#define CB710_MMC_TRANSFER_SIZE_PORT	0x1C
+#define CB710_MMC_RESPONSE0_PORT	0x20
+#define CB710_MMC_RESPONSE1_PORT	0x24
+#define CB710_MMC_RESPONSE2_PORT	0x28
+#define CB710_MMC_RESPONSE3_PORT	0x2C
+
+#endif /* LINUX_CB710_MMC_H */
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
new file mode 100644
index 0000000..c3819e1
--- /dev/null
+++ b/include/linux/cb710.h
@@ -0,0 +1,238 @@
+/*
+ *  cb710/cb710.h
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_CB710_DRIVER_H
+#define LINUX_CB710_DRIVER_H
+
+#ifdef CONFIG_CB710_DEBUG
+#define DEBUG
+#endif
+
+/* verify assumptions on platform_device framework */
+#define CONFIG_CB710_DEBUG_ASSUMPTIONS
+
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/mmc/host.h>
+
+struct cb710_slot;
+
+typedef int (*cb710_irq_handler_t)(struct cb710_slot *);
+
+/* per-virtual-slot structure */
+struct cb710_slot {
+	struct platform_device	pdev;
+	void __iomem		*iobase;
+	cb710_irq_handler_t	irq_handler;
+};
+
+/* per-device structure */
+struct cb710_chip {
+	struct pci_dev		*pdev;
+	void __iomem		*iobase;
+	unsigned		platform_id;
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_t		slot_refs_count;
+#endif
+	unsigned		slot_mask;
+	unsigned		slots;
+	spinlock_t		irq_lock;
+	struct cb710_slot	slot[0];
+};
+
+/* NOTE: cb710_chip.slots is modified only during device init/exit and
+ * they are all serialized wrt themselves */
+
+/* cb710_chip.slot_mask values */
+#define CB710_SLOT_MMC		1
+#define CB710_SLOT_MS		2
+#define CB710_SLOT_SM		4
+
+/* slot port accessors - so the logic is more clear in the code */
+#define CB710_PORT_ACCESSORS(t) \
+static inline void cb710_write_port_##t(struct cb710_slot *slot,	\
+	unsigned port, u##t value)					\
+{									\
+	iowrite##t(value, slot->iobase + port);				\
+}									\
+									\
+static inline u##t cb710_read_port_##t(struct cb710_slot *slot,		\
+	unsigned port)							\
+{									\
+	return ioread##t(slot->iobase + port);				\
+}									\
+									\
+static inline void cb710_modify_port_##t(struct cb710_slot *slot,	\
+	unsigned port, u##t set, u##t clear)				\
+{									\
+	iowrite##t(							\
+		(ioread##t(slot->iobase + port) & ~clear)|set,		\
+		slot->iobase + port);					\
+}
+
+CB710_PORT_ACCESSORS(8)
+CB710_PORT_ACCESSORS(16)
+CB710_PORT_ACCESSORS(32)
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t and, uint32_t xor);
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler);
+
+/* some device struct walking */
+
+static inline struct cb710_slot *cb710_pdev_to_slot(
+	struct platform_device *pdev)
+{
+	return container_of(pdev, struct cb710_slot, pdev);
+}
+
+static inline struct cb710_chip *cb710_slot_to_chip(struct cb710_slot *slot)
+{
+	return dev_get_drvdata(slot->pdev.dev.parent);
+}
+
+static inline struct device *cb710_slot_dev(struct cb710_slot *slot)
+{
+	return &slot->pdev.dev;
+}
+
+static inline struct device *cb710_chip_dev(struct cb710_chip *chip)
+{
+	return &chip->pdev->dev;
+}
+
+/* debugging aids */
+
+#ifdef CONFIG_CB710_DEBUG
+void cb710_dump_regs(struct cb710_chip *chip, unsigned dump);
+#else
+#define cb710_dump_regs(c, d) do {} while (0)
+#endif
+
+#define CB710_DUMP_REGS_MMC	0x0F
+#define CB710_DUMP_REGS_MS	0x30
+#define CB710_DUMP_REGS_SM	0xC0
+#define CB710_DUMP_REGS_ALL	0xFF
+#define CB710_DUMP_REGS_MASK	0xFF
+
+#define CB710_DUMP_ACCESS_8	0x100
+#define CB710_DUMP_ACCESS_16	0x200
+#define CB710_DUMP_ACCESS_32	0x400
+#define CB710_DUMP_ACCESS_ALL	0x700
+#define CB710_DUMP_ACCESS_MASK	0x700
+
+#endif /* LINUX_CB710_DRIVER_H */
+/*
+ *  cb710/sgbuf2.h
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_CB710_SG_H
+#define LINUX_CB710_SG_H
+
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+
+/**
+ * cb710_sg_miter_stop_writing - stop mapping iteration after writing
+ * @miter: sg mapping iter to be stopped
+ *
+ * Description:
+ *   Stops mapping iterator @miter.  @miter should have been started
+ *   started using sg_miter_start().  A stopped iteration can be
+ *   resumed by calling sg_miter_next() on it.  This is useful when
+ *   resources (kmap) need to be released during iteration.
+ *
+ *   This is a convenience wrapper that will be optimized out for arches
+ *   that don't need flush_kernel_dcache_page().
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter)
+{
+	if (miter->page)
+		flush_kernel_dcache_page(miter->page);
+	sg_miter_stop(miter);
+}
+
+/*
+ * 32-bit PIO mapping sg iterator
+ *
+ * Hides scatterlist access issues - fragment boundaries, alignment, page
+ * mapping - for drivers using 32-bit-word-at-a-time-PIO (ie. PCI devices
+ * without DMA support).
+ *
+ * Best-case reading (transfer from device):
+ *   sg_miter_start();
+ *   cb710_sg_dwiter_write_from_io();
+ *   cb710_sg_miter_stop_writing();
+ *
+ * Best-case writing (transfer to device):
+ *   sg_miter_start();
+ *   cb710_sg_dwiter_read_to_io();
+ *   sg_miter_stop();
+ */
+
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter);
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data);
+
+/**
+ * cb710_sg_dwiter_write_from_io - transfer data to mapped buffer from 32-bit IO port
+ * @miter: sg mapping iter
+ * @port: PIO port - IO or MMIO address
+ * @count: number of 32-bit words to transfer
+ *
+ * Description:
+ *   Reads @count 32-bit words from register @port and stores it in
+ *   buffer iterated by @miter.  Data that would overflow the buffer
+ *   is silently ignored.  Iterator is advanced by 4*@count bytes
+ *   or to the buffer's end whichever is closer.
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+static inline void cb710_sg_dwiter_write_from_io(struct sg_mapping_iter *miter,
+	void __iomem *port, size_t count)
+{
+	while (count-- > 0)
+		cb710_sg_dwiter_write_next_block(miter, ioread32(port));
+}
+
+/**
+ * cb710_sg_dwiter_read_to_io - transfer data to 32-bit IO port from mapped buffer
+ * @miter: sg mapping iter
+ * @port: PIO port - IO or MMIO address
+ * @count: number of 32-bit words to transfer
+ *
+ * Description:
+ *   Writes @count 32-bit words to register @port from buffer iterated
+ *   through @miter.  If buffer ends before @count words are written
+ *   missing data is replaced by zeroes. @miter is advanced by 4*@count
+ *   bytes or to the buffer's end whichever is closer.
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+static inline void cb710_sg_dwiter_read_to_io(struct sg_mapping_iter *miter,
+	void __iomem *port, size_t count)
+{
+	while (count-- > 0)
+		iowrite32(cb710_sg_dwiter_read_next_block(miter), port);
+}
+
+#endif /* LINUX_CB710_SG_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 19f8e6d..9f36e1c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2127,6 +2127,7 @@
 #define PCI_VENDOR_ID_MAINPINE		0x1522
 #define PCI_DEVICE_ID_MAINPINE_PBRIDGE	0x0100
 #define PCI_VENDOR_ID_ENE		0x1524
+#define PCI_DEVICE_ID_ENE_CB710_FLASH	0x0510
 #define PCI_DEVICE_ID_ENE_CB712_SD	0x0550
 #define PCI_DEVICE_ID_ENE_CB712_SD_2	0x0551
 #define PCI_DEVICE_ID_ENE_CB714_SD	0x0750
-- 
cgit v0.10.2


From 09adfe454c87e3ec5dffbc42567cd7b4b4948522 Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Thu, 4 Jun 2009 07:53:38 +0200
Subject: cb710: fix printk format string

Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
index bc048aa..11efefb 100644
--- a/drivers/mmc/host/cb710-mmc.c
+++ b/drivers/mmc/host/cb710-mmc.c
@@ -219,7 +219,7 @@ static void cb710_mmc_set_transfer_size(struct cb710_slot *slot,
 	cb710_write_port_32(slot, CB710_MMC_TRANSFER_SIZE_PORT,
 		((count - 1) << 16)|(blocksize - 1));
 
-	dev_vdbg(cb710_slot_dev(slot), "set up for %d block%s of %d bytes\n",
+	dev_vdbg(cb710_slot_dev(slot), "set up for %zu block%s of %zu bytes\n",
 		count, count == 1 ? "" : "s", blocksize);
 }
 
-- 
cgit v0.10.2


From 10eb4f901cacf7da87145330f3ca77b723783497 Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Thu, 4 Jun 2009 07:58:57 +0200
Subject: cb710: add missing parenthesis

Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
index 9da11bc..02358d0 100644
--- a/drivers/misc/cb710/debug.c
+++ b/drivers/misc/cb710/debug.c
@@ -37,7 +37,7 @@ static void cb710_read_regs_##t(void __iomem *iobase,			\
 	unsigned i, j;							\
 									\
 	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
-		if (!select & (1 << i))					\
+		if (!(select & (1 << i)))					\
 			continue;					\
 									\
 		for (j = 0; j < 0x10/(t/8); ++j) {			\
-- 
cgit v0.10.2


From 9bf69a26ad9ccdc49469402275204271b3336ab6 Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Thu, 4 Jun 2009 08:00:40 +0200
Subject: cb710: handle DEBUG define in Makefile

Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
index 62d51ac..7b80cbf 100644
--- a/drivers/misc/cb710/Makefile
+++ b/drivers/misc/cb710/Makefile
@@ -1,3 +1,7 @@
+ifeq ($(CONFIG_CB710_DEBUG),y)
+	EXTRA_CFLAGS		+= -DDEBUG
+endif
+
 obj-$(CONFIG_CB710_CORE)	+= cb710.o
 
 cb710-y				:= core.o sgbuf2.o
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
index c3819e1..a09213b 100644
--- a/include/linux/cb710.h
+++ b/include/linux/cb710.h
@@ -10,10 +10,6 @@
 #ifndef LINUX_CB710_DRIVER_H
 #define LINUX_CB710_DRIVER_H
 
-#ifdef CONFIG_CB710_DEBUG
-#define DEBUG
-#endif
-
 /* verify assumptions on platform_device framework */
 #define CONFIG_CB710_DEBUG_ASSUMPTIONS
 
-- 
cgit v0.10.2


From 99d9260c30f0705c95a5839d9184f2e949d2051e Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Tue, 9 Jun 2009 20:17:25 +0200
Subject: mxcmmc: remove frequency workaround

The MMC core has now been fixed to not send silly frequencies to the
drivers which means we can remove this workaround.

Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index f4cbe47..bc14bb1 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -746,8 +746,6 @@ static int mxcmci_probe(struct platform_device *pdev)
 	}
 
 	mmc->f_min = clk_get_rate(host->clk) >> 16;
-	if (mmc->f_min < 400000)
-		mmc->f_min = 400000;
 	mmc->f_max = clk_get_rate(host->clk) >> 1;
 
 	/* recommended in data sheet */
-- 
cgit v0.10.2


From a3456a2da14fb1d8246df63bb229623d58d09ce1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20R=C3=B6jfors?=
 <richard.rojfors.ext@mocean-labs.com>
Date: Thu, 4 Jun 2009 13:57:29 +0200
Subject: sdhci: platform driver for SDHCI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added a platform driver which uses the SDHCI core.

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 75337e4..b1cf790 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -83,6 +83,17 @@ config MMC_SDHCI_OF
 
 	  If unsure, say N.
 
+config MMC_SDHCI_PLTFM
+	tristate "SDHCI support on the platform specific bus"
+	depends on MMC_SDHCI
+	help
+	  This selects the platform specific bus support for Secure Digital Host
+	  Controller Interface.
+
+	  If you have a controller with this interface, say Y or M here.
+
+	  If unsure, say N.
+
 config MMC_OMAP
 	tristate "TI OMAP Multimedia Card Interface support"
 	depends on ARCH_OMAP
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index ce9c8b6..254e441 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
 obj-$(CONFIG_MMC_RICOH_MMC)	+= ricoh_mmc.o
 obj-$(CONFIG_MMC_SDHCI_OF)	+= sdhci-of.o
+obj-$(CONFIG_MMC_SDHCI_PLTFM)	+= sdhci-pltfm.o
 obj-$(CONFIG_MMC_WBSD)		+= wbsd.o
 obj-$(CONFIG_MMC_AU1X)		+= au1xmmc.o
 obj-$(CONFIG_MMC_OMAP)		+= omap.o
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
new file mode 100644
index 0000000..297f40a
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -0,0 +1,168 @@
+/*
+ * sdhci-pltfm.c Support for SDHCI platform devices
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * SDHCI platform devices
+ *
+ * Inspired by sdhci-pci.c, by Pierre Ossman
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/platform_device.h>
+
+#include <linux/mmc/host.h>
+
+#include <linux/io.h>
+
+#include "sdhci.h"
+
+/*****************************************************************************\
+ *                                                                           *
+ * SDHCI core callbacks                                                      *
+ *                                                                           *
+\*****************************************************************************/
+
+static struct sdhci_ops sdhci_pltfm_ops = {
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Device probing/removal                                                    *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
+{
+	struct sdhci_host *host;
+	struct resource *iomem;
+	int ret;
+
+	BUG_ON(pdev == NULL);
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	if (resource_size(iomem) != 0x100)
+		dev_err(&pdev->dev, "Invalid iomem size. You may "
+			"experience problems.\n");
+
+	if (pdev->dev.parent)
+		host = sdhci_alloc_host(pdev->dev.parent, 0);
+	else
+		host = sdhci_alloc_host(&pdev->dev, 0);
+
+	if (IS_ERR(host)) {
+		ret = PTR_ERR(host);
+		goto err;
+	}
+
+	host->hw_name = "platform";
+	host->ops = &sdhci_pltfm_ops;
+	host->irq = platform_get_irq(pdev, 0);
+
+	if (!request_mem_region(iomem->start, resource_size(iomem),
+		mmc_hostname(host->mmc))) {
+		dev_err(&pdev->dev, "cannot request region\n");
+		ret = -EBUSY;
+		goto err_request;
+	}
+
+	host->ioaddr = ioremap(iomem->start, resource_size(iomem));
+	if (!host->ioaddr) {
+		dev_err(&pdev->dev, "failed to remap registers\n");
+		ret = -ENOMEM;
+		goto err_remap;
+	}
+
+	ret = sdhci_add_host(host);
+	if (ret)
+		goto err_add_host;
+
+	platform_set_drvdata(pdev, host);
+
+	return 0;
+
+err_add_host:
+	iounmap(host->ioaddr);
+err_remap:
+	release_mem_region(iomem->start, resource_size(iomem));
+err_request:
+	sdhci_free_host(host);
+err:
+	printk(KERN_ERR"Probing of sdhci-pltfm failed: %d\n", ret);
+	return ret;
+}
+
+static int __devexit sdhci_pltfm_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int dead;
+	u32 scratch;
+
+	dead = 0;
+	scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
+	if (scratch == (u32)-1)
+		dead = 1;
+
+	sdhci_remove_host(host, dead);
+	iounmap(host->ioaddr);
+	release_mem_region(iomem->start, resource_size(iomem));
+	sdhci_free_host(host);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver sdhci_pltfm_driver = {
+	.driver = {
+		.name	= "sdhci",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_pltfm_probe,
+	.remove		= __devexit_p(sdhci_pltfm_remove),
+};
+
+/*****************************************************************************\
+ *                                                                           *
+ * Driver init/exit                                                          *
+ *                                                                           *
+\*****************************************************************************/
+
+static int __init sdhci_drv_init(void)
+{
+	return platform_driver_register(&sdhci_pltfm_driver);
+}
+
+static void __exit sdhci_drv_exit(void)
+{
+	platform_driver_unregister(&sdhci_pltfm_driver);
+}
+
+module_init(sdhci_drv_init);
+module_exit(sdhci_drv_exit);
+
+MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver");
+MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:sdhci");
+
-- 
cgit v0.10.2


From c54f6bc67a4398243682f7438a2129906e127d21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Sat, 13 Jun 2009 12:37:59 +0200
Subject: cb710: more cleanup for the DEBUG case.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
index 0b55079..22429b8 100644
--- a/drivers/misc/cb710/Kconfig
+++ b/drivers/misc/cb710/Kconfig
@@ -19,3 +19,7 @@ config CB710_DEBUG
 	  This is an option for use by developers; most people should
 	  say N here.  This adds a lot of debugging output to dmesg.
 
+config CB710_DEBUG_ASSUMPTIONS
+	bool
+	depends on CB710_CORE != n
+	default y
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 254e441..79da397 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -32,3 +32,6 @@ obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
 obj-$(CONFIG_MMC_CB710)	+= cb710-mmc.o
 
+ifeq ($(CONFIG_CB710_DEBUG),y)
+	CFLAGS-cb710-mmc	+= -DDEBUG
+endif
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
index a09213b..63bc9a4 100644
--- a/include/linux/cb710.h
+++ b/include/linux/cb710.h
@@ -10,9 +10,6 @@
 #ifndef LINUX_CB710_DRIVER_H
 #define LINUX_CB710_DRIVER_H
 
-/* verify assumptions on platform_device framework */
-#define CONFIG_CB710_DEBUG_ASSUMPTIONS
-
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-- 
cgit v0.10.2


From fdd858db7113ca64132de390188d7ca00701013d Mon Sep 17 00:00:00 2001
From: Wolfgang Muees <wolfgang.mues@auerswald.de>
Date: Tue, 26 May 2009 08:56:19 +0100
Subject: mmc_spi: don't use EINVAL for possible transmission errors

This patch changes the reported error code for the responses
to a command from EINVAL to EFAULT/ENOSYS, as EINVAL is reserved
for non-recoverable host errors, and the responses from
the SD/MMC card may be because of recoverable transmission
errors in the command or in the response. Response codes
in SPI mode are NOT protected by a checksum, so don't trust them.

Signed-off-by: Wolfgang Muees <wolfgang.mues@auerswald.de>
Acked-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a789db8..240608c 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -335,15 +335,16 @@ checkstatus:
 
 	/* Status byte: the entire seven-bit R1 response.  */
 	if (cmd->resp[0] != 0) {
-		if ((R1_SPI_PARAMETER | R1_SPI_ADDRESS
-				      | R1_SPI_ILLEGAL_COMMAND)
+		if ((R1_SPI_PARAMETER | R1_SPI_ADDRESS)
 				& cmd->resp[0])
-			value = -EINVAL;
+			value = -EFAULT; /* Bad address */
+		else if (R1_SPI_ILLEGAL_COMMAND & cmd->resp[0])
+			value = -ENOSYS; /* Function not implemented */
 		else if (R1_SPI_COM_CRC & cmd->resp[0])
-			value = -EILSEQ;
+			value = -EILSEQ; /* Illegal byte sequence */
 		else if ((R1_SPI_ERASE_SEQ | R1_SPI_ERASE_RESET)
 				& cmd->resp[0])
-			value = -EIO;
+			value = -EIO;    /* I/O error */
 		/* else R1_SPI_IDLE, "it's resetting" */
 	}
 
-- 
cgit v0.10.2


From f0e46cc4971f6be96010d9248e0fc076b229d989 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:31 +0200
Subject: MFD,mmc: tmio_mmc: make HCLK configurable

The Toshiba parts all have a 24 MHz HCLK, but HTC ASIC3 has a 24.576 MHz HCLK
and AMD Imageon w228x's HCLK is 80 MHz. With this patch, the MFD driver
provides the HCLK frequency to tmio_mmc via mfd_cell->driver_data.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <ian@mnementh.co.uk>
Acked-by: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index e9f4323..875f7a8 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -108,6 +108,10 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 
 /*--------------------------------------------------------------------------*/
 
+static const struct tmio_mmc_data t7166xb_mmc_data = {
+	.hclk = 24000000,
+};
+
 static const struct resource t7l66xb_mmc_resources[] = {
 	{
 		.start = 0x800,
@@ -149,6 +153,7 @@ static struct mfd_cell t7l66xb_cells[] = {
 		.name = "tmio-mmc",
 		.enable = t7l66xb_mmc_enable,
 		.disable = t7l66xb_mmc_disable,
+		.driver_data = &t7166xb_mmc_data,
 		.num_resources = ARRAY_SIZE(t7l66xb_mmc_resources),
 		.resources = t7l66xb_mmc_resources,
 	},
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 43222c1..c3993ac 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -75,6 +75,10 @@ static int tc6387xb_mmc_disable(struct platform_device *mmc)
 
 /*--------------------------------------------------------------------------*/
 
+const static struct tmio_mmc_data tc6387xb_mmc_data = {
+	.hclk = 24000000,
+};
+
 static struct resource tc6387xb_mmc_resources[] = {
 	{
 		.start = 0x800,
@@ -98,6 +102,7 @@ static struct mfd_cell tc6387xb_cells[] = {
 		.name = "tmio-mmc",
 		.enable = tc6387xb_mmc_enable,
 		.disable = tc6387xb_mmc_disable,
+		.driver_data = &tc6387xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6387xb_mmc_resources),
 		.resources = tc6387xb_mmc_resources,
 	},
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 77a12fc..9d2abb5 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,6 +136,10 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
+const static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+};
+
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -351,6 +355,7 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
 	},
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 63fbd5b..49df71e 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -35,23 +35,14 @@
 
 #include "tmio_mmc.h"
 
-/*
- * Fixme - documentation conflicts on what the clock values are for the
- * various dividers.
- * One document I have says that its a divisor of a 24MHz clock, another 33.
- * This probably depends on HCLK for a given platform, so we may need to
- * require HCLK be passed to us from the MFD core.
- *
- */
-
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 {
 	void __iomem *cnf = host->cnf;
 	void __iomem *ctl = host->ctl;
-	u32 clk = 0, clock;
+	u32 clk = 0, clock, f_min = host->mmc->f_min;
 
 	if (new_clock) {
-		for (clock = 46875, clk = 0x100; new_clock >= (clock<<1); ) {
+		for (clock = f_min, clk = 0x100; new_clock >= (clock<<1); ) {
 			clock <<= 1;
 			clk >>= 1;
 		}
@@ -545,6 +536,7 @@ out:
 static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
+	struct tmio_mmc_data *pdata;
 	struct resource *res_ctl, *res_cnf;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
@@ -560,6 +552,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 		goto out;
 	}
 
+	pdata = cell->driver_data;
+	if (!pdata || !pdata->hclk) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	mmc = mmc_alloc_host(sizeof(struct tmio_mmc_host), &dev->dev);
 	if (!mmc)
 		goto out;
@@ -578,8 +576,8 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
-	mmc->f_min = 46875; /* 24000000 / 512 */
-	mmc->f_max = 24000000;
+	mmc->f_max = pdata->hclk;
+	mmc->f_min = mmc->f_max / 512;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	/* Enable the MMC/SD Control registers */
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 516d955..c377118 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -19,6 +19,13 @@
 	} while (0)
 
 /*
+ * data for the MMC controller
+ */
+struct tmio_mmc_data {
+	unsigned int		hclk;
+};
+
+/*
  * data for the NAND controller
  */
 struct tmio_nand_data {
-- 
cgit v0.10.2


From 5e74672c0925335bb00772530634ac70179e8a19 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:32 +0200
Subject: tmio_mmc: add bus_shift support

Some ASIC3 devices in the wild are connected with the address bus shifted
by one line, so that its 16-bit registers appear 32-bit aligned in host
memory space.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 49df71e..01add9b 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -37,8 +37,6 @@
 
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 {
-	void __iomem *cnf = host->cnf;
-	void __iomem *ctl = host->ctl;
 	u32 clk = 0, clock, f_min = host->mmc->f_min;
 
 	if (new_clock) {
@@ -50,45 +48,39 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 			clk = 0x20000;
 
 		clk >>= 2;
-		tmio_iowrite8((clk & 0x8000) ? 0 : 1, cnf + CNF_SD_CLK_MODE);
+		sd_config_write8(host, CNF_SD_CLK_MODE, (clk & 0x8000) ? 0 : 1);
 		clk |= 0x100;
 	}
 
-	tmio_iowrite16(clk, ctl + CTL_SD_CARD_CLK_CTL);
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk);
 }
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 {
-	void __iomem *ctl = host->ctl;
-
-	tmio_iowrite16(0x0000, ctl + CTL_CLK_AND_WAIT_CTL);
+	sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0000);
 	msleep(10);
-	tmio_iowrite16(tmio_ioread16(ctl + CTL_SD_CARD_CLK_CTL) & ~0x0100,
-	       ctl + CTL_SD_CARD_CLK_CTL);
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
+		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(10);
 }
 
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
-	void __iomem *ctl = host->ctl;
-
-	tmio_iowrite16(tmio_ioread16(ctl + CTL_SD_CARD_CLK_CTL) | 0x0100,
-	       ctl + CTL_SD_CARD_CLK_CTL);
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
+		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(10);
-	tmio_iowrite16(0x0100, ctl + CTL_CLK_AND_WAIT_CTL);
+	sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
 	msleep(10);
 }
 
 static void reset(struct tmio_mmc_host *host)
 {
-	void __iomem *ctl = host->ctl;
-
 	/* FIXME - should we set stop clock reg here */
-	tmio_iowrite16(0x0000, ctl + CTL_RESET_SD);
-	tmio_iowrite16(0x0000, ctl + CTL_RESET_SDIO);
+	sd_ctrl_write16(host, CTL_RESET_SD, 0x0000);
+	sd_ctrl_write16(host, CTL_RESET_SDIO, 0x0000);
 	msleep(10);
-	tmio_iowrite16(0x0001, ctl + CTL_RESET_SD);
-	tmio_iowrite16(0x0001, ctl + CTL_RESET_SDIO);
+	sd_ctrl_write16(host, CTL_RESET_SD, 0x0001);
+	sd_ctrl_write16(host, CTL_RESET_SDIO, 0x0001);
 	msleep(10);
 }
 
@@ -120,13 +112,12 @@ tmio_mmc_finish_request(struct tmio_mmc_host *host)
 static int
 tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
 {
-	void __iomem *ctl = host->ctl;
 	struct mmc_data *data = host->data;
 	int c = cmd->opcode;
 
 	/* Command 12 is handled by hardware */
 	if (cmd->opcode == 12 && !cmd->arg) {
-		tmio_iowrite16(0x001, ctl + CTL_STOP_INTERNAL_ACTION);
+		sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x001);
 		return 0;
 	}
 
@@ -151,18 +142,18 @@ tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
 	if (data) {
 		c |= DATA_PRESENT;
 		if (data->blocks > 1) {
-			tmio_iowrite16(0x100, ctl + CTL_STOP_INTERNAL_ACTION);
+			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x100);
 			c |= TRANSFER_MULTI;
 		}
 		if (data->flags & MMC_DATA_READ)
 			c |= TRANSFER_READ;
 	}
 
-	enable_mmc_irqs(ctl, TMIO_MASK_CMD);
+	enable_mmc_irqs(host, TMIO_MASK_CMD);
 
 	/* Fire off the command */
-	tmio_iowrite32(cmd->arg, ctl + CTL_ARG_REG);
-	tmio_iowrite16(c, ctl + CTL_SD_CMD);
+	sd_ctrl_write32(host, CTL_ARG_REG, cmd->arg);
+	sd_ctrl_write16(host, CTL_SD_CMD, c);
 
 	return 0;
 }
@@ -174,7 +165,6 @@ tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
  */
 static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
 {
-	void __iomem *ctl = host->ctl;
 	struct mmc_data *data = host->data;
 	unsigned short *buf;
 	unsigned int count;
@@ -197,9 +187,9 @@ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
 
 	/* Transfer the data */
 	if (data->flags & MMC_DATA_READ)
-		tmio_ioread16_rep(ctl + CTL_SD_DATA_PORT, buf, count >> 1);
+		sd_ctrl_read16_rep(host, CTL_SD_DATA_PORT, buf, count >> 1);
 	else
-		tmio_iowrite16_rep(ctl + CTL_SD_DATA_PORT, buf, count >> 1);
+		sd_ctrl_write16_rep(host, CTL_SD_DATA_PORT, buf, count >> 1);
 
 	host->sg_off += count;
 
@@ -213,7 +203,6 @@ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
 
 static inline void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 {
-	void __iomem *ctl = host->ctl;
 	struct mmc_data *data = host->data;
 	struct mmc_command *stop;
 
@@ -242,13 +231,13 @@ static inline void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 	 */
 
 	if (data->flags & MMC_DATA_READ)
-		disable_mmc_irqs(ctl, TMIO_MASK_READOP);
+		disable_mmc_irqs(host, TMIO_MASK_READOP);
 	else
-		disable_mmc_irqs(ctl, TMIO_MASK_WRITEOP);
+		disable_mmc_irqs(host, TMIO_MASK_WRITEOP);
 
 	if (stop) {
 		if (stop->opcode == 12 && !stop->arg)
-			tmio_iowrite16(0x000, ctl + CTL_STOP_INTERNAL_ACTION);
+			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x000);
 		else
 			BUG();
 	}
@@ -259,9 +248,8 @@ static inline void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 static inline void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 	unsigned int stat)
 {
-	void __iomem *ctl = host->ctl, *addr;
 	struct mmc_command *cmd = host->cmd;
-	int i;
+	int i, addr;
 
 	if (!host->cmd) {
 		pr_debug("Spurious CMD irq\n");
@@ -275,8 +263,8 @@ static inline void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 	 * modify the order of the response for short response command types.
 	 */
 
-	for (i = 3, addr = ctl + CTL_RESPONSE ; i >= 0 ; i--, addr += 4)
-		cmd->resp[i] = tmio_ioread32(addr);
+	for (i = 3, addr = CTL_RESPONSE ; i >= 0 ; i--, addr += 4)
+		cmd->resp[i] = sd_ctrl_read32(host, addr);
 
 	if (cmd->flags &  MMC_RSP_136) {
 		cmd->resp[0] = (cmd->resp[0] << 8) | (cmd->resp[1] >> 24);
@@ -298,9 +286,9 @@ static inline void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 	 */
 	if (host->data && !cmd->error) {
 		if (host->data->flags & MMC_DATA_READ)
-			enable_mmc_irqs(ctl, TMIO_MASK_READOP);
+			enable_mmc_irqs(host, TMIO_MASK_READOP);
 		else
-			enable_mmc_irqs(ctl, TMIO_MASK_WRITEOP);
+			enable_mmc_irqs(host, TMIO_MASK_WRITEOP);
 	} else {
 		tmio_mmc_finish_request(host);
 	}
@@ -312,20 +300,19 @@ static inline void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
-	void __iomem *ctl = host->ctl;
 	unsigned int ireg, irq_mask, status;
 
 	pr_debug("MMC IRQ begin\n");
 
-	status = tmio_ioread32(ctl + CTL_STATUS);
-	irq_mask = tmio_ioread32(ctl + CTL_IRQ_MASK);
+	status = sd_ctrl_read32(host, CTL_STATUS);
+	irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
 	ireg = status & TMIO_MASK_IRQ & ~irq_mask;
 
 	pr_debug_status(status);
 	pr_debug_status(ireg);
 
 	if (!ireg) {
-		disable_mmc_irqs(ctl, status & ~irq_mask);
+		disable_mmc_irqs(host, status & ~irq_mask);
 
 		pr_debug("tmio_mmc: Spurious irq, disabling! "
 			"0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg);
@@ -337,7 +324,7 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 	while (ireg) {
 		/* Card insert / remove attempts */
 		if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) {
-			ack_mmc_irqs(ctl, TMIO_STAT_CARD_INSERT |
+			ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT |
 				TMIO_STAT_CARD_REMOVE);
 			mmc_detect_change(host->mmc, 0);
 		}
@@ -349,25 +336,25 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 
 		/* Command completion */
 		if (ireg & TMIO_MASK_CMD) {
-			ack_mmc_irqs(ctl, TMIO_MASK_CMD);
+			ack_mmc_irqs(host, TMIO_MASK_CMD);
 			tmio_mmc_cmd_irq(host, status);
 		}
 
 		/* Data transfer */
 		if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) {
-			ack_mmc_irqs(ctl, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ);
+			ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ);
 			tmio_mmc_pio_irq(host);
 		}
 
 		/* Data transfer completion */
 		if (ireg & TMIO_STAT_DATAEND) {
-			ack_mmc_irqs(ctl, TMIO_STAT_DATAEND);
+			ack_mmc_irqs(host, TMIO_STAT_DATAEND);
 			tmio_mmc_data_irq(host);
 		}
 
 		/* Check status - keep going until we've handled it all */
-		status = tmio_ioread32(ctl + CTL_STATUS);
-		irq_mask = tmio_ioread32(ctl + CTL_IRQ_MASK);
+		status = sd_ctrl_read32(host, CTL_STATUS);
+		irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
 		ireg = status & TMIO_MASK_IRQ & ~irq_mask;
 
 		pr_debug("Status at end of loop: %08x\n", status);
@@ -382,8 +369,6 @@ out:
 static int tmio_mmc_start_data(struct tmio_mmc_host *host,
 	struct mmc_data *data)
 {
-	void __iomem *ctl = host->ctl;
-
 	pr_debug("setup data transfer: blocksize %08x  nr_blocks %d\n",
 	    data->blksz, data->blocks);
 
@@ -398,8 +383,8 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host,
 	host->data = data;
 
 	/* Set transfer length / blocksize */
-	tmio_iowrite16(data->blksz,  ctl + CTL_SD_XFER_LEN);
-	tmio_iowrite16(data->blocks, ctl + CTL_XFER_BLK_COUNT);
+	sd_ctrl_write16(host, CTL_SD_XFER_LEN, data->blksz);
+	sd_ctrl_write16(host, CTL_XFER_BLK_COUNT, data->blocks);
 
 	return 0;
 }
@@ -440,8 +425,6 @@ fail:
 static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
-	void __iomem *cnf = host->cnf;
-	void __iomem *ctl = host->ctl;
 
 	if (ios->clock)
 		tmio_mmc_set_clock(host, ios->clock);
@@ -449,12 +432,12 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Power sequence - OFF -> ON -> UP */
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF: /* power down SD bus */
-		tmio_iowrite8(0x00, cnf + CNF_PWR_CTL_2);
+		sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_ON: /* power up SD bus */
 
-		tmio_iowrite8(0x02, cnf + CNF_PWR_CTL_2);
+		sd_config_write8(host, CNF_PWR_CTL_2, 0x02);
 		break;
 	case MMC_POWER_UP: /* start bus clock */
 		tmio_mmc_clk_start(host);
@@ -463,10 +446,10 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	switch (ios->bus_width) {
 	case MMC_BUS_WIDTH_1:
-		tmio_iowrite16(0x80e0, ctl + CTL_SD_MEM_CARD_OPT);
+		sd_ctrl_write16(host, CTL_SD_MEM_CARD_OPT, 0x80e0);
 	break;
 	case MMC_BUS_WIDTH_4:
-		tmio_iowrite16(0x00e0, ctl + CTL_SD_MEM_CARD_OPT);
+		sd_ctrl_write16(host, CTL_SD_MEM_CARD_OPT, 0x00e0);
 	break;
 	}
 
@@ -477,9 +460,8 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 static int tmio_mmc_get_ro(struct mmc_host *mmc)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
-	void __iomem *ctl = host->ctl;
 
-	return (tmio_ioread16(ctl + CTL_STATUS) & TMIO_STAT_WRPROTECT) ? 0 : 1;
+	return (sd_ctrl_read16(host, CTL_STATUS) & TMIO_STAT_WRPROTECT) ? 0 : 1;
 }
 
 static struct mmc_host_ops tmio_mmc_ops = {
@@ -509,12 +491,12 @@ static int tmio_mmc_resume(struct platform_device *dev)
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct mmc_host *mmc = platform_get_drvdata(dev);
 	struct tmio_mmc_host *host = mmc_priv(mmc);
-	void __iomem *cnf = host->cnf;
 	int ret = 0;
 
 	/* Enable the MMC/SD Control registers */
-	tmio_iowrite16(SDCREN, cnf + CNF_CMD);
-	tmio_iowrite32(dev->resource[0].start & 0xfffe, cnf + CNF_CTL_BASE);
+	sd_config_write16(host, CNF_CMD, SDCREN);
+	sd_config_write32(host, CNF_CTL_BASE,
+		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
 
 	/* Tell the MFD core we are ready to be enabled */
 	if (cell->enable) {
@@ -566,6 +548,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	host->mmc = mmc;
 	platform_set_drvdata(dev, mmc);
 
+	/* SD control register space size is 0x200, 0x400 for bus_shift=1 */
+	host->bus_shift = resource_size(res_ctl) >> 10;
+
 	host->ctl = ioremap(res_ctl->start, resource_size(res_ctl));
 	if (!host->ctl)
 		goto host_free;
@@ -581,9 +566,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	/* Enable the MMC/SD Control registers */
-	tmio_iowrite16(SDCREN, host->cnf + CNF_CMD);
-	tmio_iowrite32(dev->resource[0].start & 0xfffe,
-		host->cnf + CNF_CTL_BASE);
+	sd_config_write16(host, CNF_CMD, SDCREN);
+	sd_config_write32(host, CNF_CTL_BASE,
+		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
 
 	/* Tell the MFD core we are ready to be enabled */
 	if (cell->enable) {
@@ -593,13 +578,13 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	}
 
 	/* Disable SD power during suspend */
-	tmio_iowrite8(0x01, host->cnf + CNF_PWR_CTL_3);
+	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
 
 	/* The below is required but why? FIXME */
-	tmio_iowrite8(0x1f, host->cnf + CNF_STOP_CLK_CTL);
+	sd_config_write8(host, CNF_STOP_CLK_CTL, 0x1f);
 
 	/* Power down SD bus*/
-	tmio_iowrite8(0x0,  host->cnf + CNF_PWR_CTL_2);
+	sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
 
 	tmio_mmc_clk_stop(host);
 	reset(host);
@@ -610,7 +595,7 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	else
 		goto unmap_cnf;
 
-	disable_mmc_irqs(host->ctl, TMIO_MASK_ALL);
+	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED, "tmio-mmc",
 		host);
@@ -625,7 +610,7 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	       (unsigned long)host->ctl, host->irq);
 
 	/* Unmask the IRQs we want to know about */
-	enable_mmc_irqs(host->ctl,  TMIO_MASK_IRQ);
+	enable_mmc_irqs(host, TMIO_MASK_IRQ);
 
 	return 0;
 
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 9c831ab..9fa9985 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -83,34 +83,36 @@
 		TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)
 #define TMIO_MASK_IRQ     (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)
 
-#define enable_mmc_irqs(ctl, i) \
+
+#define enable_mmc_irqs(host, i) \
 	do { \
 		u32 mask;\
-		mask  = tmio_ioread32((ctl) + CTL_IRQ_MASK); \
+		mask  = sd_ctrl_read32((host), CTL_IRQ_MASK); \
 		mask &= ~((i) & TMIO_MASK_IRQ); \
-		tmio_iowrite32(mask, (ctl) + CTL_IRQ_MASK); \
+		sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
 	} while (0)
 
-#define disable_mmc_irqs(ctl, i) \
+#define disable_mmc_irqs(host, i) \
 	do { \
 		u32 mask;\
-		mask  = tmio_ioread32((ctl) + CTL_IRQ_MASK); \
+		mask  = sd_ctrl_read32((host), CTL_IRQ_MASK); \
 		mask |= ((i) & TMIO_MASK_IRQ); \
-		tmio_iowrite32(mask, (ctl) + CTL_IRQ_MASK); \
+		sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
 	} while (0)
 
-#define ack_mmc_irqs(ctl, i) \
+#define ack_mmc_irqs(host, i) \
 	do { \
 		u32 mask;\
-		mask  = tmio_ioread32((ctl) + CTL_STATUS); \
+		mask  = sd_ctrl_read32((host), CTL_STATUS); \
 		mask &= ~((i) & TMIO_MASK_IRQ); \
-		tmio_iowrite32(mask, (ctl) + CTL_STATUS); \
+		sd_ctrl_write32((host), CTL_STATUS, mask); \
 	} while (0)
 
 
 struct tmio_mmc_host {
 	void __iomem *cnf;
 	void __iomem *ctl;
+	unsigned long bus_shift;
 	struct mmc_command      *cmd;
 	struct mmc_request      *mrq;
 	struct mmc_data         *data;
@@ -123,6 +125,63 @@ struct tmio_mmc_host {
 	unsigned int            sg_off;
 };
 
+#include <linux/io.h>
+
+static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift));
+}
+
+static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	readsw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift)) |
+	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
+}
+
+static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr,
+		u16 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+}
+
+static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	writesw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
+		u32 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
+}
+
+static inline void sd_config_write8(struct tmio_mmc_host *host, int addr,
+		u8 val)
+{
+	writeb(val, host->cnf + (addr << host->bus_shift));
+}
+
+static inline void sd_config_write16(struct tmio_mmc_host *host, int addr,
+		u16 val)
+{
+	writew(val, host->cnf + (addr << host->bus_shift));
+}
+
+static inline void sd_config_write32(struct tmio_mmc_host *host, int addr,
+		u32 val)
+{
+	writew(val, host->cnf + (addr << host->bus_shift));
+	writew(val >> 16, host->cnf + ((addr + 2) << host->bus_shift));
+}
+
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 
-- 
cgit v0.10.2


From 6c413cc76b893310b3b258b7de47fb74dcc50203 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:33 +0200
Subject: tmio_mmc: don't use set_irq_type

Use an IRQF_TRIGGER_ flag in request_irq instead.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 01add9b..bbcbd72 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -597,13 +597,11 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
-	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED, "tmio-mmc",
-		host);
+	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
+		IRQF_TRIGGER_FALLING, "tmio-mmc", host);
 	if (ret)
 		goto unmap_cnf;
 
-	set_irq_type(host->irq, IRQ_TYPE_EDGE_FALLING);
-
 	mmc_add_host(mmc);
 
 	printk(KERN_INFO "%s at 0x%08lx irq %d\n", mmc_hostname(host->mmc),
-- 
cgit v0.10.2


From d6c9b5ed37c26503795d241474a17db1d306e7ea Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:34 +0200
Subject: tmio_mmc: correct probe return value for num_resources != 3

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index bbcbd72..b576640 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -522,23 +522,21 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	struct resource *res_ctl, *res_cnf;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
-	int ret = -ENOMEM;
+	int ret = -EINVAL;
 
 	if (dev->num_resources != 3)
 		goto out;
 
 	res_ctl = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	res_cnf = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	if (!res_ctl || !res_cnf) {
-		ret = -EINVAL;
+	if (!res_ctl || !res_cnf)
 		goto out;
-	}
 
 	pdata = cell->driver_data;
-	if (!pdata || !pdata->hclk) {
-		ret = -EINVAL;
+	if (!pdata || !pdata->hclk)
 		goto out;
-	}
+
+	ret = -ENOMEM;
 
 	mmc = mmc_alloc_host(sizeof(struct tmio_mmc_host), &dev->dev);
 	if (!mmc)
-- 
cgit v0.10.2


From 544f277bb849da0ba86cfc4203a4c9139e2cd927 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:35 +0200
Subject: tmio_mmc: map SD control registers after enabling the MFD cell

ASIC3 can disable the memory, so we need to wait for mfd_cell->enable
to enable the memory before we can map the SD control registers.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index b576640..fe6d2b6 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -493,11 +493,6 @@ static int tmio_mmc_resume(struct platform_device *dev)
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int ret = 0;
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	/* Tell the MFD core we are ready to be enabled */
 	if (cell->enable) {
 		ret = cell->enable(dev);
@@ -505,6 +500,11 @@ static int tmio_mmc_resume(struct platform_device *dev)
 			goto out;
 	}
 
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(host, CNF_CMD, SDCREN);
+	sd_config_write32(host, CNF_CTL_BASE,
+		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
+
 	mmc_resume_host(mmc);
 
 out:
@@ -563,11 +563,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	mmc->f_min = mmc->f_max / 512;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	/* Tell the MFD core we are ready to be enabled */
 	if (cell->enable) {
 		ret = cell->enable(dev);
@@ -575,6 +570,11 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 			goto unmap_cnf;
 	}
 
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(host, CNF_CMD, SDCREN);
+	sd_config_write32(host, CNF_CTL_BASE,
+		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
+
 	/* Disable SD power during suspend */
 	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
 
-- 
cgit v0.10.2


From da46a0bd42c81a473618e94871500fb792c98727 Mon Sep 17 00:00:00 2001
From: Ian Molton <ian@mnementh.co.uk>
Date: Fri, 12 Jun 2009 21:53:05 +0100
Subject: tmio_mmc: fix clock setup

This patch fixes the clock setup in tmio_mmc.

  * Incorrect divider setting
  * Cruft written to the clock registers (seemingly harmless but Not
Good (tm))

It also eliminates some unnecessary ifs and tidies the loop syntax.

Thanks to Philipp Zabel who discovered the divider issue, commenting

   "Except for the SDCLK = HCLK (divider bypassed) case, the clock
    setting resulted in double the requested frequency.
    The smallest possible frequency (f_max/512) is configured with
    a divider setting 0x80, not 0x40."

Signed-off-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index fe6d2b6..10951b7 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -37,22 +37,17 @@
 
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 {
-	u32 clk = 0, clock, f_min = host->mmc->f_min;
+	u32 clk = 0, clock;
 
 	if (new_clock) {
-		for (clock = f_min, clk = 0x100; new_clock >= (clock<<1); ) {
+		for (clock = host->mmc->f_min, clk = 0x80000080;
+			new_clock >= (clock<<1); clk >>= 1)
 			clock <<= 1;
-			clk >>= 1;
-		}
-		if (clk & 0x1)
-			clk = 0x20000;
-
-		clk >>= 2;
-		sd_config_write8(host, CNF_SD_CLK_MODE, (clk & 0x8000) ? 0 : 1);
 		clk |= 0x100;
 	}
 
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk);
+	sd_config_write8(host, CNF_SD_CLK_MODE, clk >> 22);
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
 }
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
-- 
cgit v0.10.2


From be3f4ae0c0c56aab903aceaceed4b9d8418e180e Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 8 Jun 2009 23:33:52 +0100
Subject: sdhci: Print ADMA status and pointer on debug

If using ADMA, then we should print the ADMA error
and current pointer in sdhci_dumpregs() when any
debug is requested.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 1432a35..35789c6 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -78,6 +78,11 @@ static void sdhci_dumpregs(struct sdhci_host *host)
 		sdhci_readl(host, SDHCI_CAPABILITIES),
 		sdhci_readl(host, SDHCI_MAX_CURRENT));
 
+	if (host->flags & SDHCI_USE_ADMA)
+		printk(KERN_DEBUG DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n",
+		       readl(host->ioaddr + SDHCI_ADMA_ERROR),
+		       readl(host->ioaddr + SDHCI_ADMA_ADDRESS));
+
 	printk(KERN_DEBUG DRIVER_NAME ": ===========================================\n");
 }
 
-- 
cgit v0.10.2


From 051913dada046ac948eb6f48c0717fc25de2a917 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 8 Jun 2009 23:33:57 +0100
Subject: mmc_block: do not DMA to stack

In the write recovery routine, the data to get from the card
is allocated from the stack. The DMA mapping documentation says
explicitly stack memory is not mappable by any of the DMA calls.

Change to using kmalloc() to allocate the memory for the result
from the card and then free it once we've finished with the
transaction.

[ Changed to GFP_KERNEL allocation - Pierre Ossman ]

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 98ffc41e..adc205c 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -147,7 +147,8 @@ struct mmc_blk_request {
 static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 {
 	int err;
-	__be32 blocks;
+	u32 result;
+	__be32 *blocks;
 
 	struct mmc_request mrq;
 	struct mmc_command cmd;
@@ -199,14 +200,21 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
 	mrq.cmd = &cmd;
 	mrq.data = &data;
 
-	sg_init_one(&sg, &blocks, 4);
+	blocks = kmalloc(4, GFP_KERNEL);
+	if (!blocks)
+		return (u32)-1;
+
+	sg_init_one(&sg, blocks, 4);
 
 	mmc_wait_for_req(card->host, &mrq);
 
+	result = ntohl(*blocks);
+	kfree(blocks);
+
 	if (cmd.error || data.error)
-		return (u32)-1;
+		result = (u32)-1;
 
-	return ntohl(blocks);
+	return result;
 }
 
 static u32 get_card_status(struct mmc_card *card, struct request *req)
-- 
cgit v0.10.2


From e6f2c7adc1318e233d31d113e6896607c54073a4 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:37 +0200
Subject: tmio_mmc: allow compilation for ASIC3

Now tmio_mmc is able to drive the MMC/SD cell in ASIC3.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <spyro@f2s.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index b1cf790..660bda0 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -248,10 +248,10 @@ config MMC_SDRICOH_CS
 
 config MMC_TMIO
 	tristate "Toshiba Mobile IO Controller (TMIO) MMC/SD function support"
-	depends on MFD_TMIO
+	depends on MFD_TMIO || MFD_ASIC3
 	help
 	  This provides support for the SD/MMC cell found in TC6393XB,
-	  T7L66XB and also ipaq ASIC3
+	  T7L66XB and also HTC ASIC3
 
 config MMC_CB710
 	tristate "ENE CB710 MMC/SD Interface support"
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 10951b7..91991b4 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -10,7 +10,7 @@
  *
  * Driver for the MMC / SD / SDIO cell found in:
  *
- * TC6393XB TC6391XB TC6387XB T7L66XB
+ * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3
  *
  * This driver draws mainly on scattered spec sheets, Reverse engineering
  * of the toshiba e800  SD driver and some parts of the 2.4 ASIC3 driver (4 bit
-- 
cgit v0.10.2


From 199118959e9b31cd6cd6492a323669966061033c Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Fri, 12 Jun 2009 17:58:29 +0200
Subject: atmel-mci: Integrate AT91 specific definition in header file

The MCI IP is shared among AVR32 and AT91 SOCs.
AT91 has specific bit definitions in the user interface of MCI SD/MMC IP.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h
index b58364e..adfb885 100644
--- a/drivers/mmc/host/atmel-mci-regs.h
+++ b/drivers/mmc/host/atmel-mci-regs.h
@@ -7,6 +7,11 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+/*
+ * Superset of MCI IP registers integrated in Atmel AVR32 and AT91 Processors
+ */
+
 #ifndef __DRIVERS_MMC_ATMEL_MCI_H__
 #define __DRIVERS_MMC_ATMEL_MCI_H__
 
@@ -14,11 +19,17 @@
 #define MCI_CR			0x0000	/* Control */
 # define MCI_CR_MCIEN		(  1 <<  0)	/* MCI Enable */
 # define MCI_CR_MCIDIS		(  1 <<  1)	/* MCI Disable */
+# define MCI_CR_PWSEN		(  1 <<  2)	/* Power Save Enable */
+# define MCI_CR_PWSDIS		(  1 <<  3)	/* Power Save Disable */
 # define MCI_CR_SWRST		(  1 <<  7)	/* Software Reset */
 #define MCI_MR			0x0004	/* Mode */
 # define MCI_MR_CLKDIV(x)	((x) <<  0)	/* Clock Divider */
+# define MCI_MR_PWSDIV(x)	((x) <<  8)	/* Power Saving Divider */
 # define MCI_MR_RDPROOF		(  1 << 11)	/* Read Proof */
 # define MCI_MR_WRPROOF		(  1 << 12)	/* Write Proof */
+# define MCI_MR_PDCFBYTE	(  1 << 13)	/* Force Byte Transfer */
+# define MCI_MR_PDCPADV		(  1 << 14)	/* Padding Value */
+# define MCI_MR_PDCMODE		(  1 << 15)	/* PDC-oriented Mode */
 #define MCI_DTOR		0x0008	/* Data Timeout */
 # define MCI_DTOCYC(x)		((x) <<  0)	/* Data Timeout Cycles */
 # define MCI_DTOMUL(x)		((x) <<  4)	/* Data Timeout Multiplier */
-- 
cgit v0.10.2


From 7f72134c32eb64c77d1fb35123ba8bf815bf797c Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Fri, 12 Jun 2009 17:58:30 +0200
Subject: atmel-mci: add MCI2 register definitions

New revision of Atmel MCI interface adds new features. This is a update of
register definition in header file. This new MCI IP is called MCI2.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>

diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h
index adfb885..fc8a0fe 100644
--- a/drivers/mmc/host/atmel-mci-regs.h
+++ b/drivers/mmc/host/atmel-mci-regs.h
@@ -10,6 +10,7 @@
 
 /*
  * Superset of MCI IP registers integrated in Atmel AVR32 and AT91 Processors
+ * Registers and bitfields marked with [2] are only available in MCI2
  */
 
 #ifndef __DRIVERS_MMC_ATMEL_MCI_H__
@@ -39,6 +40,7 @@
 # define MCI_SDCSEL_MASK	(  3 <<  0)
 # define MCI_SDCBUS_1BIT	(  0 <<  6)	/* 1-bit data bus */
 # define MCI_SDCBUS_4BIT	(  2 <<  6)	/* 4-bit data bus */
+# define MCI_SDCBUS_8BIT	(  3 <<  6)	/* 8-bit data bus[2] */
 # define MCI_SDCBUS_MASK	(  3 <<  6)
 #define MCI_ARGR		0x0010	/* Command Argument */
 #define MCI_CMDR		0x0014	/* Command */
@@ -67,6 +69,9 @@
 #define MCI_BLKR		0x0018	/* Block */
 # define MCI_BCNT(x)		((x) <<  0)	/* Data Block Count */
 # define MCI_BLKLEN(x)		((x) << 16)	/* Data Block Length */
+#define MCI_CSTOR		0x001c	/* Completion Signal Timeout[2] */
+# define MCI_CSTOCYC(x)		((x) <<  0)	/* CST cycles */
+# define MCI_CSTOMUL(x)		((x) <<  4)	/* CST multiplier */
 #define MCI_RSPR		0x0020	/* Response 0 */
 #define MCI_RSPR1		0x0024	/* Response 1 */
 #define MCI_RSPR2		0x0028	/* Response 2 */
@@ -94,7 +99,24 @@
 # define MCI_DTOE		(  1 <<  22)	/* Data Time-Out Error */
 # define MCI_OVRE		(  1 <<  30)	/* RX Overrun Error */
 # define MCI_UNRE		(  1 <<  31)	/* TX Underrun Error */
+#define MCI_DMA			0x0050	/* DMA Configuration[2] */
+# define MCI_DMA_OFFSET(x)	((x) <<  0)	/* DMA Write Buffer Offset */
+# define MCI_DMA_CHKSIZE(x)	((x) <<  4)	/* DMA Channel Read and Write Chunk Size */
+# define MCI_DMAEN		(  1 <<  8)	/* DMA Hardware Handshaking Enable */
+#define MCI_CFG			0x0054	/* Configuration[2] */
+# define MCI_CFG_FIFOMODE_1DATA	(  1 <<  0)	/* MCI Internal FIFO control mode */
+# define MCI_CFG_FERRCTRL_COR	(  1 <<  4)	/* Flow Error flag reset control mode */
+# define MCI_CFG_HSMODE		(  1 <<  8)	/* High Speed Mode */
+# define MCI_CFG_LSYNC		(  1 << 12)	/* Synchronize on the last block */
+#define MCI_WPMR		0x00e4	/* Write Protection Mode[2] */
+# define MCI_WP_EN		(  1 <<  0)	/* WP Enable */
+# define MCI_WP_KEY		(0x4d4349 << 8)	/* WP Key */
+#define MCI_WPSR		0x00e8	/* Write Protection Status[2] */
+# define MCI_GET_WP_VS(x)	((x) & 0x0f)
+# define MCI_GET_WP_VSRC(x)	(((x) >> 8) & 0xffff)
+#define MCI_FIFO_APERTURE	0x0200	/* FIFO Aperture[2] */
 
+/* This is not including the FIFO Aperture on MCI2 */
 #define MCI_REGS_SIZE		0x100
 
 /* Register access macros */
-- 
cgit v0.10.2


From 9aa0a489d909af0cc36c41d3061ef956c7442ce2 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Sat, 13 Jun 2009 15:14:09 -0700
Subject: IB/mthca: Don't double-free IRQs when falling back from MSI-X to INTx

When both MSI-X and legacy INTx fail to generate an interrupt, the
driver frees the MSI-X interrupts twice.  Fix this by clearing the
have_irq flag for the MSI-X interrupts when they are freed the first
time.

Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Tested-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 28f0e0c..90e4e45 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -641,9 +641,11 @@ static void mthca_free_irqs(struct mthca_dev *dev)
 	if (dev->eq_table.have_irq)
 		free_irq(dev->pdev->irq, dev);
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
-		if (dev->eq_table.eq[i].have_irq)
+		if (dev->eq_table.eq[i].have_irq) {
 			free_irq(dev->eq_table.eq[i].msi_x_vector,
 				 dev->eq_table.eq + i);
+			dev->eq_table.eq[i].have_irq = 0;
+		}
 }
 
 static int mthca_map_reg(struct mthca_dev *dev,
-- 
cgit v0.10.2


From c14e1a13b77e0c9cbae7c356ee13af4fc3064428 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 14 Jun 2009 11:49:41 +0530
Subject: headers_check fix: mn10300, ptrace.h

fix the following 'make headers_check' warning:

  usr/include/asm-mn10300/ptrace.h:80: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>

diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 921942e..1b0ba5e 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -77,8 +77,6 @@ struct pt_regs {
 };
 #endif
 
-extern struct pt_regs *__frame; /* current frame pointer */
-
 /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
 #define PTRACE_GETREGS            12
 #define PTRACE_SETREGS            13
@@ -90,6 +88,8 @@ extern struct pt_regs *__frame; /* current frame pointer */
 
 #if defined(__KERNEL__)
 
+extern struct pt_regs *__frame;		/* current frame pointer */
+
 #if !defined(__ASSEMBLY__)
 struct task_struct;
 
@@ -107,5 +107,4 @@ extern void user_disable_single_step(struct task_struct *);
 #define profile_pc(regs) ((regs)->pc)
 
 #endif  /*  __KERNEL__  */
-
 #endif /* _ASM_PTRACE_H */
-- 
cgit v0.10.2


From 0419bb466f5059e40e141b1e3ab258a862ae11f0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 14 Jun 2009 11:51:12 +0530
Subject: headers_check fix: mn10300, setup.h

fix the following 'make headers_check' warnings:

  usr/include/asm-mn10300/setup.h:14: extern's make no sense in userspace
  usr/include/asm-mn10300/setup.h:15: extern's make no sense in userspace

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>

diff --git a/arch/mn10300/include/asm/setup.h b/arch/mn10300/include/asm/setup.h
index 08356c8..c229d1e 100644
--- a/arch/mn10300/include/asm/setup.h
+++ b/arch/mn10300/include/asm/setup.h
@@ -11,7 +11,8 @@
 #ifndef _ASM_SETUP_H
 #define _ASM_SETUP_H
 
+#ifdef __KERNEL__
 extern void __init unit_setup(void);
 extern void __init unit_init_IRQ(void);
-
+#endif
 #endif /* _ASM_SETUP_H */
-- 
cgit v0.10.2


From ce53895a5d24e0ee19fb92f56c17323fb4c9ab27 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 14 Jun 2009 11:09:29 +0100
Subject: MAINTAINERS: EB110ATX is not ebsa110

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/MAINTAINERS b/MAINTAINERS
index 1ab1b7f..aed47f2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5181,7 +5181,6 @@ P:	Vincent Sanders
 M:	support@simtec.co.uk
 W:	http://www.simtec.co.uk/products/EB110ATX/
 S:	Supported
-F:	arch/arm/mach-ebsa110/
 
 SIMTEC EB2410ITX (BAST)
 P:	Ben Dooks
-- 
cgit v0.10.2


From 73be1591579084a8103a7005dd3172f3e9dd7362 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 12 Jun 2009 03:09:29 +0100
Subject: [ARM] 5545/2: add flush_kernel_dcache_page() for ARM

Without this, the default implementation is a no op which is completely
wrong with a VIVT cache, and usage of sg_copy_buffer() produces
unpredictable results.

Tested-by: Sebastian Andrzej Siewior <bigeasy@breakpoint.cc>

CC: stable@kernel.org
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index bb7d695..1a711ea 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -429,6 +429,14 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 		__flush_anon_page(vma, page, vmaddr);
 }
 
+#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+static inline void flush_kernel_dcache_page(struct page *page)
+{
+	/* highmem pages are always flushed upon kunmap already */
+	if ((cache_is_vivt() || cache_is_vipt_aliasing()) && !PageHighMem(page))
+		__cpuc_flush_dcache_page(page_address(page));
+}
+
 #define flush_dcache_mmap_lock(mapping) \
 	spin_lock_irq(&(mapping)->tree_lock)
 #define flush_dcache_mmap_unlock(mapping) \
-- 
cgit v0.10.2


From 7923f90fffa8746f6457d4eea2109fd3d6414189 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 14 Jun 2009 22:10:41 +0200
Subject: vmlinux.lds.h update

Updated after review by Tim Abbott.
- Use HEAD_TEXT_SECTION
- Drop use of section-names.h and delete file
- Introduce EXIT_CALL

Deleting section-names.h required a few simple
updates of init.h

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@ksplice.com>

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fba4223..7381f70 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -12,7 +12,7 @@
  * {
  *	. = START;
  *	__init_begin = .;
- *	HEAD_SECTION
+ *	HEAD_TEXT_SECTION
  *	INIT_TEXT_SECTION(PAGE_SIZE)
  *	INIT_DATA_SECTION(...)
  *	PERCPU(PAGE_SIZE)
@@ -38,7 +38,7 @@
  *	/DISCARD/ : {
  *		EXIT_TEXT
  *		EXIT_DATA
- *		*(.exitcall.exit)
+ *		EXIT_CALL
  *	}
  *	STABS_DEBUG
  *	DWARF_DEBUG
@@ -52,7 +52,6 @@
  * Examples are: [__initramfs_start, __initramfs_end] for initramfs and
  *               [__nosave_begin, __nosave_end] for the nosave data
  */
- #include <linux/section-names.h>
 
 #ifndef LOAD_OFFSET
 #define LOAD_OFFSET 0
@@ -414,9 +413,9 @@
 #endif
 
 /* Section used for early init (in .S files) */
-#define HEAD_TEXT  *(HEAD_TEXT_SECTION)
+#define HEAD_TEXT  *(.head.text)
 
-#define HEAD_SECTION							\
+#define HEAD_TEXT_SECTION							\
 	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {		\
 		HEAD_TEXT						\
 	}
@@ -473,6 +472,9 @@
 	CPU_DISCARD(exit.text)						\
 	MEM_DISCARD(exit.text)
 
+#define EXIT_CALL							\
+	*(.exitcall.exit)
+
 /*
  * bss (Block Started by Symbol) - uninitialized data
  * zeroed during startup
@@ -692,7 +694,7 @@
  * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which
  * matches the requirment of PAGE_ALIGNED_DATA.
  *
-/* use 0 as page_align if page_aligned data is not used */
+ * use 0 as page_align if page_aligned data is not used */
 #define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask)	\
 	. = ALIGN(PAGE_SIZE);						\
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
@@ -726,4 +728,5 @@
 #define BSS_SECTION(sbss_align, bss_align)				\
 	SBSS								\
 	BSS(bss_align)							\
-	. = ALIGN(4);							\
+	. = ALIGN(4);
+
diff --git a/include/linux/init.h b/include/linux/init.h
index 9f70c9f..b218980 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -2,8 +2,6 @@
 #define _LINUX_INIT_H
 
 #include <linux/compiler.h>
-#include <linux/section-names.h>
-#include <linux/stringify.h>
 
 /* These macros are used to mark some functions or 
  * initialized data (doesn't apply to uninitialized data)
@@ -101,7 +99,7 @@
 #define __memexitconst   __section(.memexit.rodata)
 
 /* For assembly routines */
-#define __HEAD		.section	__stringify(HEAD_TEXT_SECTION),"ax"
+#define __HEAD		.section	".head.text","ax"
 #define __INIT		.section	".init.text","ax"
 #define __FINIT		.previous
 
diff --git a/include/linux/section-names.h b/include/linux/section-names.h
deleted file mode 100644
index c956f4e..0000000
--- a/include/linux/section-names.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_SECTION_NAMES_H
-#define __LINUX_SECTION_NAMES_H
-
-#define HEAD_TEXT_SECTION .head.text
-
-#endif /* !__LINUX_SECTION_NAMES_H */
-- 
cgit v0.10.2


From b2d8993026a26f4ece8c8b3f2e138d2ba8f18eaa Mon Sep 17 00:00:00 2001
From: Patrick Ringl <patrick_@freenet.de>
Date: Fri, 12 Jun 2009 13:58:36 +0200
Subject: README: fix misleading pointer to the defconf directory

Signed-off-by: Patrick Ringl <patrick_@freenet.de>
Cc: Arnd Bergmann <arnd@arndb.de>

diff --git a/README b/README
index 7a078ff..737838f 100644
--- a/README
+++ b/README
@@ -176,7 +176,15 @@ CONFIGURING the kernel:
 			   with questions already answered.
 			   Additionally updates the dependencies.
 	"make defconfig"   Create a ./.config file by using the default
-			   symbol values from arch/$ARCH/defconfig.
+			   symbol values from either arch/$ARCH/defconfig
+			   or arch/$ARCH/configs/${PLATFORM}_defconfig,
+			   depending on the architecture.
+	"make ${PLATFORM}_defconfig"
+			  Create a ./.config file by using the default
+			  symbol values from
+			  arch/$ARCH/configs/${PLATFORM}_defconfig.
+			  Use "make help" to get a list of all available
+			  platforms of your architecture.
 	"make allyesconfig"
 			   Create a ./.config file by setting symbol
 			   values to 'y' as much as possible.
-- 
cgit v0.10.2


From a2bb90a08cb3b64dd815d762ffde2312582a6ec9 Mon Sep 17 00:00:00 2001
From: Nico Schottelius <nico-linuxsetlocalversion@schottelius.org>
Date: Fri, 12 Jun 2009 09:59:52 +0200
Subject: kbuild: fix delay in setlocalversion on readonly source

Do not update index on read only media.
Idea published by Christian Kujau <lists@nerdbynature.de>.

Cc: Nico Schottelius <nico@ikn.schottelius.org>
Cc: Christian Kujau <lists@nerdbynature.de>

diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 0079047..46989b8 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -39,8 +39,10 @@ if head=`git rev-parse --verify --short HEAD 2>/dev/null`; then
 	        printf -- '-svn%s' "`git svn find-rev $head`"
 	fi
 
-	# Are there uncommitted changes?
-	git update-index --refresh --unmerged > /dev/null
+	# Update index only on r/w media
+	[ -w . ] && git update-index --refresh --unmerged > /dev/null
+
+	# Check for uncommitted changes
 	if git diff-index --name-only HEAD | grep -v "^scripts/package" \
 	    | read dummy; then
 		printf '%s' -dirty
-- 
cgit v0.10.2


From f2ac5e78928bf94118b4d4911166a02e8a87d3ea Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Thu, 11 Jun 2009 12:21:47 +0300
Subject: gitignore: Add GNU GLOBAL files to top .gitignore

Ignore GPATH, GRTAGS, GSYMS, and GTAGS generated by GNU GLOBAL.

Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/.gitignore b/.gitignore
index 3433d7c..f3b09fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,6 +63,12 @@ series
 cscope.*
 ncscope.*
 
+# gnu global files
+GPATH
+GRTAGS
+GSYMS
+GTAGS
+
 *.orig
 *~
 \#*#
-- 
cgit v0.10.2


From d1fdf24b4074a8d962f9a28519c99dcdd66bdee3 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Sun, 14 Jun 2009 13:30:45 -0700
Subject: mlx4_core: Don't double-free IRQs when falling back from MSI-X to
 INTx

When both MSI-X and legacy INTx fail to generate an interrupt, the
driver frees the MSI-X interrupts twice.  Fix this by clearing the
have_irq flag for the MSI-X interrupts when they are freed the first
time.  This is the same bug that was reported in ib_mthca by Yinghai
Lu <yhlu.kernel@gmail.com>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 8830dcb..ce064e3 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -497,8 +497,10 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
 	if (eq_table->have_irq)
 		free_irq(dev->pdev->irq, dev);
 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
-		if (eq_table->eq[i].have_irq)
+		if (eq_table->eq[i].have_irq) {
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
+			eq_table->eq[i].have_irq = 0;
+		}
 
 	kfree(eq_table->irq_names);
 }
-- 
cgit v0.10.2


From d067aa741589a9783cc43315119e0f431b4e382c Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 10 Jun 2009 12:48:23 -0700
Subject: kbuild: fix a compile warning

gcc-4.4.1:

 HOSTCC  scripts/basic/fixdep
scripts/basic/fixdep.c: In function 'traps':
scripts/basic/fixdep.c:377: warning: dereferencing type-punned pointer will break strict-aliasing rules
scripts/basic/fixdep.c:379: warning: dereferencing type-punned pointer will break strict-aliasing rules

(Apparently -fno-strict-aliasing will fix this too)

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 8912c0f..72c1520 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -373,10 +373,11 @@ void print_deps(void)
 void traps(void)
 {
 	static char test[] __attribute__((aligned(sizeof(int)))) = "CONF";
+	int *p = (int *)test;
 
-	if (*(int *)test != INT_CONF) {
+	if (*p != INT_CONF) {
 		fprintf(stderr, "fixdep: sizeof(int) != 4 or wrong endianess? %#x\n",
-			*(int *)test);
+			*p);
 		exit(2);
 	}
 }
-- 
cgit v0.10.2


From 2185a5ecd98d2cebc6a29b07b1ea4f7334c2ccc3 Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Sun, 14 Jun 2009 22:38:59 +0200
Subject: documentation: make version fix

The Makefiles in the build directories use the internal make variable
MAKEFILE_LIST which is available from make 3.80 only.  (The patch would be
valid back to 2.6.25)

Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Documentation/Changes b/Documentation/Changes
index b95082b..112e765 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -29,7 +29,7 @@ hardware, for example, you probably needn't concern yourself with
 isdn4k-utils.
 
 o  Gnu C                  3.2                     # gcc --version
-o  Gnu make               3.79.1                  # make --version
+o  Gnu make               3.80                    # make --version
 o  binutils               2.12                    # ld -v
 o  util-linux             2.10o                   # fdformat --version
 o  module-init-tools      0.9.10                  # depmod -V
@@ -61,7 +61,7 @@ computer.
 Make
 ----
 
-You will need Gnu make 3.79.1 or later to build the kernel.
+You will need Gnu make 3.80 or later to build the kernel.
 
 Binutils
 --------
-- 
cgit v0.10.2


From 028f042613c3c99db20dd7f4e4069fbbcea92dd7 Mon Sep 17 00:00:00 2001
From: Robin Getz <robin.getz@analog.com>
Date: Mon, 10 Jul 2006 06:25:40 +0000
Subject: kallsyms: support kernel symbols in Blackfin on-chip memory

The Blackfin arch has a discontiguous .text layout due to having on-chip
instruction memory and no virtual memory support.  As such, we need to
add explicit checks for these additional .text regions.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 6654cbed..fb82a5b 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -35,6 +35,7 @@ struct sym_entry {
 static struct sym_entry *table;
 static unsigned int table_size, table_cnt;
 static unsigned long long _text, _stext, _etext, _sinittext, _einittext;
+static unsigned long long _stext_l1, _etext_l1, _stext_l2, _etext_l2;
 static int all_symbols = 0;
 static char symbol_prefix_char = '\0';
 
@@ -92,6 +93,14 @@ static int read_symbol(FILE *in, struct sym_entry *s)
 		_sinittext = s->addr;
 	else if (strcmp(sym, "_einittext") == 0)
 		_einittext = s->addr;
+	else if (strcmp(sym, "_stext_l1") == 0)
+		_stext_l1 = s->addr;
+	else if (strcmp(sym, "_etext_l1") == 0)
+		_etext_l1 = s->addr;
+	else if (strcmp(sym, "_stext_l2") == 0)
+		_stext_l2 = s->addr;
+	else if (strcmp(sym, "_etext_l2") == 0)
+		_etext_l2 = s->addr;
 	else if (toupper(stype) == 'A')
 	{
 		/* Keep these useful absolute symbols */
@@ -157,7 +166,9 @@ static int symbol_valid(struct sym_entry *s)
 	 * and inittext sections are discarded */
 	if (!all_symbols) {
 		if ((s->addr < _stext || s->addr > _etext)
-		    && (s->addr < _sinittext || s->addr > _einittext))
+		    && (s->addr < _sinittext || s->addr > _einittext)
+		    && (s->addr < _stext_l1 || s->addr > _etext_l1)
+		    && (s->addr < _stext_l2 || s->addr > _etext_l2))
 			return 0;
 		/* Corner case.  Discard any symbols with the same value as
 		 * _etext _einittext; they can move between pass 1 and 2 when
-- 
cgit v0.10.2


From 17b1f0de79dbdf5cfb2686b63a7fb9ecc440da7c Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 8 Jun 2009 19:12:13 -0400
Subject: kallsyms: generalize text region handling

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index fb82a5b..3cb5789 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -23,6 +23,10 @@
 #include <string.h>
 #include <ctype.h>
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+#endif
+
 #define KSYM_NAME_LEN		128
 
 struct sym_entry {
@@ -32,10 +36,23 @@ struct sym_entry {
 	unsigned char *sym;
 };
 
+struct text_range {
+	const char *stext, *etext;
+	unsigned long long start, end;
+};
+
+static unsigned long long _text;
+static struct text_range text_ranges[] = {
+	{ "_stext",     "_etext"     },
+	{ "_sinittext", "_einittext" },
+	{ "_stext_l1",  "_etext_l1"  },	/* Blackfin on-chip L1 inst SRAM */
+	{ "_stext_l2",  "_etext_l2"  },	/* Blackfin on-chip L2 SRAM */
+};
+#define text_range_text     (&text_ranges[0])
+#define text_range_inittext (&text_ranges[1])
+
 static struct sym_entry *table;
 static unsigned int table_size, table_cnt;
-static unsigned long long _text, _stext, _etext, _sinittext, _einittext;
-static unsigned long long _stext_l1, _etext_l1, _stext_l2, _etext_l2;
 static int all_symbols = 0;
 static char symbol_prefix_char = '\0';
 
@@ -62,6 +79,26 @@ static inline int is_arm_mapping_symbol(const char *str)
 	       && (str[2] == '\0' || str[2] == '.');
 }
 
+static int read_symbol_tr(const char *sym, unsigned long long addr)
+{
+	size_t i;
+	struct text_range *tr;
+
+	for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) {
+		tr = &text_ranges[i];
+
+		if (strcmp(sym, tr->stext) == 0) {
+			tr->start = addr;
+			return 0;
+		} else if (strcmp(sym, tr->etext) == 0) {
+			tr->end = addr;
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
 static int read_symbol(FILE *in, struct sym_entry *s)
 {
 	char str[500];
@@ -85,22 +122,8 @@ static int read_symbol(FILE *in, struct sym_entry *s)
 	/* Ignore most absolute/undefined (?) symbols. */
 	if (strcmp(sym, "_text") == 0)
 		_text = s->addr;
-	else if (strcmp(sym, "_stext") == 0)
-		_stext = s->addr;
-	else if (strcmp(sym, "_etext") == 0)
-		_etext = s->addr;
-	else if (strcmp(sym, "_sinittext") == 0)
-		_sinittext = s->addr;
-	else if (strcmp(sym, "_einittext") == 0)
-		_einittext = s->addr;
-	else if (strcmp(sym, "_stext_l1") == 0)
-		_stext_l1 = s->addr;
-	else if (strcmp(sym, "_etext_l1") == 0)
-		_etext_l1 = s->addr;
-	else if (strcmp(sym, "_stext_l2") == 0)
-		_stext_l2 = s->addr;
-	else if (strcmp(sym, "_etext_l2") == 0)
-		_etext_l2 = s->addr;
+	else if (read_symbol_tr(sym, s->addr) == 0)
+		/* nothing to do */;
 	else if (toupper(stype) == 'A')
 	{
 		/* Keep these useful absolute symbols */
@@ -136,6 +159,21 @@ static int read_symbol(FILE *in, struct sym_entry *s)
 	return 0;
 }
 
+static int symbol_valid_tr(struct sym_entry *s)
+{
+	size_t i;
+	struct text_range *tr;
+
+	for (i = 0; i < ARRAY_SIZE(text_ranges); ++i) {
+		tr = &text_ranges[i];
+
+		if (s->addr >= tr->start && s->addr < tr->end)
+			return 0;
+	}
+
+	return 1;
+}
+
 static int symbol_valid(struct sym_entry *s)
 {
 	/* Symbols which vary between passes.  Passes 1 and 2 must have
@@ -165,10 +203,7 @@ static int symbol_valid(struct sym_entry *s)
 	/* if --all-symbols is not specified, then symbols outside the text
 	 * and inittext sections are discarded */
 	if (!all_symbols) {
-		if ((s->addr < _stext || s->addr > _etext)
-		    && (s->addr < _sinittext || s->addr > _einittext)
-		    && (s->addr < _stext_l1 || s->addr > _etext_l1)
-		    && (s->addr < _stext_l2 || s->addr > _etext_l2))
+		if (symbol_valid_tr(s) == 0)
 			return 0;
 		/* Corner case.  Discard any symbols with the same value as
 		 * _etext _einittext; they can move between pass 1 and 2 when
@@ -176,10 +211,10 @@ static int symbol_valid(struct sym_entry *s)
 		 * they may get dropped in pass 2, which breaks the kallsyms
 		 * rules.
 		 */
-		if ((s->addr == _etext &&
-				strcmp((char *)s->sym + offset, "_etext")) ||
-		    (s->addr == _einittext &&
-				strcmp((char *)s->sym + offset, "_einittext")))
+		if ((s->addr == text_range_text->end &&
+				strcmp((char *)s->sym + offset, text_range_text->etext)) ||
+		    (s->addr == text_range_inittext->end &&
+				strcmp((char *)s->sym + offset, text_range_inittext->etext)))
 			return 0;
 	}
 
-- 
cgit v0.10.2


From 566432224731c3d8fa7925ce07953701f536a666 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Sun, 14 Jun 2009 22:48:07 +0200
Subject: kbuild: handle non-existing options in scripts/config

If an option does not exist in .config, set it at the end of the file.

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/config b/scripts/config
index db6084b..30825a5 100755
--- a/scripts/config
+++ b/scripts/config
@@ -26,8 +26,6 @@ options:
 
 config doesn't check the validity of the .config file. This is done at next
  make time.
-The options need to be already in the file before they can be changed,
-but sometimes you can cheat with the --*-after options.
 EOL
 	exit 1
 }
@@ -45,8 +43,18 @@ checkarg() {
 	ARG="`echo $ARG | tr a-z A-Z`"
 }
 
-replace() {
-	sed -i -e "$@" $FN
+set_var() {
+	local name=$1 new=$2 before=$3
+
+	name_re="^($name=|# $name is not set)"
+	before_re="^($before=|# $before is not set)"
+	if test -n "$before" && grep -Eq "$before_re" "$FN"; then
+		sed -ri "/$before_re/a $new" "$FN"
+	elif grep -Eq "$name_re" "$FN"; then
+		sed -ri "s:$name_re.*:$new:" "$FN"
+	else
+		echo "$new" >>"$FN"
+	fi
 }
 
 if [ "$1" = "--file" ]; then
@@ -70,20 +78,19 @@ while [ "$1" != "" ] ; do
 	case "$CMD" in
 	--enable|-e)
 		checkarg "$1"
-		replace "s/# CONFIG_$ARG is not set/CONFIG_$ARG=y/"
+		set_var "CONFIG_$ARG" "CONFIG_$ARG=y"
 		shift
 		;;
 
 	--disable|-d)
 		checkarg "$1"
-		replace "s/CONFIG_$ARG=[my]/# CONFIG_$ARG is not set/"
+		set_var "CONFIG_$ARG" "# CONFIG_$ARG is not set"
 		shift
 		;;
 
 	--module|-m)
 		checkarg "$1"
-		replace "s/CONFIG_$ARG=y/CONFIG_$ARG=m/" \
-			-e "s/# CONFIG_$ARG is not set/CONFIG_$ARG=m/"
+		set_var "CONFIG_$ARG" "CONFIG_$ARG=m"
 		shift
 		;;
 
@@ -109,9 +116,7 @@ while [ "$1" != "" ] ; do
 		A=$ARG
 		checkarg "$2"
 		B=$ARG
-		replace "/CONFIG_$A=[my]/aCONFIG_$B=y" \
-			-e "/# CONFIG_$ARG is not set/a/CONFIG_$ARG=y" \
-			-e "s/# CONFIG_$ARG is not set/CONFIG_$ARG=y/"
+		set_var "CONFIG_$B" "CONFIG_$B=y" "CONFIG_$A"
 		shift
 		shift
 		;;
@@ -121,9 +126,7 @@ while [ "$1" != "" ] ; do
 		A=$ARG
 		checkarg "$2"
 		B=$ARG
-		replace "/CONFIG_$A=[my]/a# CONFIG_$B is not set" \
-		-e "/# CONFIG_$ARG is not set/a/# CONFIG_$ARG is not set" \
-		-e "s/CONFIG_$ARG=[my]/# CONFIG_$ARG is not set/"
+		set_var "CONFIG_$B" "# CONFIG_$B is not set" "CONFIG_$A"
 		shift
 		shift
 		;;
@@ -133,10 +136,7 @@ while [ "$1" != "" ] ; do
 		A=$ARG
 		checkarg "$2"
 		B=$ARG
-		replace "/CONFIG_$A=[my]/aCONFIG_$B=m" \
-			-e "/# CONFIG_$ARG is not set/a/CONFIG_$ARG=m" \
-			-e "s/CONFIG_$ARG=y/CONFIG_$ARG=m/" \
-			-e "s/# CONFIG_$ARG is not set/CONFIG_$ARG=m/"
+		set_var "CONFIG_$B" "CONFIG_$B=m" "CONFIG_$A"
 		shift
 		shift
 		;;
-- 
cgit v0.10.2


From 47312d2cfd9b769c1739738602c163c4c9814c7b Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 25 May 2009 16:43:25 +0200
Subject: kbuild: simplify argument loop in scripts/config

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/config b/scripts/config
index 30825a5..640c6fe 100755
--- a/scripts/config
+++ b/scripts/config
@@ -62,8 +62,7 @@ if [ "$1" = "--file" ]; then
 	if [ "$FN" = "" ] ; then
 		usage
 	fi
-	shift
-	shift
+	shift 2
 else
 	FN=.config
 fi
@@ -76,26 +75,34 @@ while [ "$1" != "" ] ; do
 	CMD="$1"
 	shift
 	case "$CMD" in
-	--enable|-e)
+	--refresh)
+		;;
+	--*-after)
+		checkarg "$1"
+		A=$ARG
+		checkarg "$2"
+		B=$ARG
+		shift 2
+		;;
+	--*)
 		checkarg "$1"
-		set_var "CONFIG_$ARG" "CONFIG_$ARG=y"
 		shift
 		;;
+	esac
+	case "$CMD" in
+	--enable|-e)
+		set_var "CONFIG_$ARG" "CONFIG_$ARG=y"
+		;;
 
 	--disable|-d)
-		checkarg "$1"
 		set_var "CONFIG_$ARG" "# CONFIG_$ARG is not set"
-		shift
 		;;
 
 	--module|-m)
-		checkarg "$1"
 		set_var "CONFIG_$ARG" "CONFIG_$ARG=m"
-		shift
 		;;
 
 	--state|-s)
-		checkarg "$1"
 		if grep -q "# CONFIG_$ARG is not set" $FN ; then
 			echo n
 		else
@@ -108,37 +115,18 @@ while [ "$1" != "" ] ; do
 				echo "$V"
 			fi
 		fi
-		shift
 		;;
 
 	--enable-after|-E)
-		checkarg "$1"
-		A=$ARG
-		checkarg "$2"
-		B=$ARG
 		set_var "CONFIG_$B" "CONFIG_$B=y" "CONFIG_$A"
-		shift
-		shift
 		;;
 
 	--disable-after|-D)
-		checkarg "$1"
-		A=$ARG
-		checkarg "$2"
-		B=$ARG
 		set_var "CONFIG_$B" "# CONFIG_$B is not set" "CONFIG_$A"
-		shift
-		shift
 		;;
 
 	--module-after|-M)
-		checkarg "$1"
-		A=$ARG
-		checkarg "$2"
-		B=$ARG
 		set_var "CONFIG_$B" "CONFIG_$B=m" "CONFIG_$A"
-		shift
-		shift
 		;;
 
 	# undocumented because it ignores --file (fixme)
-- 
cgit v0.10.2


From 1f990cf94559e0a7363d56aade1d5dc6c515b60b Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 25 May 2009 16:43:27 +0200
Subject: kbuild: add generic --set-str option to scripts/config

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/scripts/config b/scripts/config
index 640c6fe..608d7fd 100755
--- a/scripts/config
+++ b/scripts/config
@@ -9,8 +9,10 @@ config options command ...
 commands:
 	--enable|-e option   Enable option
 	--disable|-d option  Disable option
-	--module|-m option      Turn option into a module
-	--state|-s option       Print state of option (n,y,m,undef)
+	--module|-m option   Turn option into a module
+	--set-str option value
+	                     Set option to "value"
+	--state|-s option    Print state of option (n,y,m,undef)
 
 	--enable-after|-E beforeopt option
                              Enable option directly after other option
@@ -102,6 +104,11 @@ while [ "$1" != "" ] ; do
 		set_var "CONFIG_$ARG" "CONFIG_$ARG=m"
 		;;
 
+	--set-str)
+		set_var "CONFIG_$ARG" "CONFIG_$ARG=\"$1\""
+		shift
+		;;
+
 	--state|-s)
 		if grep -q "# CONFIG_$ARG is not set" $FN ; then
 			echo n
-- 
cgit v0.10.2


From 5a7e3d1281bbc4404b250b4a18d3ecb07c77640c Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Sat, 13 Jun 2009 14:52:33 +0200
Subject: keyboard: advertise KT_DEAD2 extended diacriticals

In addition to KT_DEAD which has limited support for diacriticals,
there is KT_DEAD2 that can support 256 criticals, so let's advertise
it in <linux/keyboard.h>.

This lets userland know abut the drivers/char/keyboard.c function
k_dead2, which supports more than the few trivial ones that k_dead
supports.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index a3c984d..33a63f6 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -56,6 +56,7 @@ extern int unregister_keyboard_notifier(struct notifier_block *nb);
 #define KT_ASCII	9
 #define KT_LOCK		10
 #define KT_SLOCK	12
+#define KT_DEAD2	13
 #define KT_BRL		14
 
 #define K(t,v)		(((t)<<8)|(v))
-- 
cgit v0.10.2


From 9cf46a35d25debfc314dd6f090b8075bd0b7f74c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 13 Jun 2009 19:37:18 -0700
Subject: fusion: fix recent kernel-doc problems

Fix recent fusion driver kernel-doc fatal error and warnings.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Eric.Moore@lsi.com
Cc: support@lsi.com
Cc: DL-MPTFusionLinux@lsi.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 44b9315..0df0652 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -1023,8 +1023,7 @@ mpt_add_sge_64bit(void *pAddr, u32 flagslength, dma_addr_t dma_addr)
 }
 
 /**
- *	mpt_add_sge_64bit_1078 - Place a simple 64 bit SGE at address pAddr
- *	(1078 workaround).
+ *	mpt_add_sge_64bit_1078 - Place a simple 64 bit SGE at address pAddr (1078 workaround).
  *	@pAddr: virtual address for SGE
  *	@flagslength: SGE flags and data transfer length
  *	@dma_addr: Physical address
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 024e830..8440f78f 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -2675,8 +2675,8 @@ mptscsih_timer_expired(unsigned long data)
 /**
  *	mptscsih_get_completion_code -
  *	@ioc: Pointer to MPT_ADAPTER structure
- *	@reply:
- *	@cmd:
+ *	@req: Pointer to original MPT request frame
+ *	@reply: Pointer to MPT reply frame (NULL if TurboReply)
  *
  **/
 static int
-- 
cgit v0.10.2


From 3f8d9ced7746f3f329ccca0bb3f3c7a2c15c47bb Mon Sep 17 00:00:00 2001
From: Arne Janbu <arnej@ampheus.de>
Date: Wed, 10 Jun 2009 18:25:10 +0200
Subject: .gitignore: ignore *.lzma files

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/.gitignore b/.gitignore
index f3b09fe..e9950be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@
 *.elf
 *.bin
 *.gz
+*.lzma
 *.patch
 
 #
-- 
cgit v0.10.2